1. xml
  2. /transformation
  3. /xquery

XQuery for Transformation

While XQuery is covered comprehensively in the Advanced section, this page focuses specifically on using XQuery for XML transformation tasks, emphasizing its functional programming approach and data manipulation capabilities.

XQuery vs XSLT for Transformations

AspectXQueryXSLT
ParadigmFunctional programmingTemplate-based
Data ExtractionExcellentGood
Complex QueriesExcellentModerate
GroupingNative supportComplex patterns
Output FormatsXML, TextXML, HTML, Text
Learning CurveSQL-like, easierSteeper

Data Extraction and Transformation

Basic Data Extraction

Transform product catalog with filtering and restructuring:

(: Input XML structure similar to XSLT example :)
let $catalog := doc("catalog.xml")

(: Extract electronics products with enhanced structure :)
<electronics-catalog>
  <summary>
    <total-products>{count($catalog//product[@category='electronics'])}</total-products>
    <average-price>{avg($catalog//product[@category='electronics']/price)}</average-price>
    <price-range>
      <min>{min($catalog//product[@category='electronics']/price)}</min>
      <max>{max($catalog//product[@category='electronics']/price)}</max>
    </price-range>
  </summary>
  
  <products>
    {for $product in $catalog//product[@category='electronics']
     let $specs := $product/specs
     order by $product/price descending
     return
       <product id="{$product/@id}">
         <basic-info>
           <name>{$product/name/text()}</name>
           <price currency="{$product/price/@currency}">
             {$product/price/text()}
           </price>
           <description>{$product/description/text()}</description>
         </basic-info>
         {if ($specs) then
           <specifications>
             {for $spec in $specs/*
              return element {local-name($spec)} {$spec/text()}}
           </specifications>
         else ()}
         <calculated-fields>
           <price-tier>
             {if ($product/price > 500) then "premium"
              else if ($product/price > 100) then "mid-range"
              else "budget"}
           </price-tier>
           <has-specs>{exists($specs)}</has-specs>
         </calculated-fields>
       </product>}
  </products>
</electronics-catalog>

Advanced Data Aggregation

(: Complex aggregation with multiple grouping levels :)
let $catalog := doc("catalog.xml")

<catalog-analysis>
  <by-category>
    {for $category in distinct-values($catalog//product/@category)
     let $category-products := $catalog//product[@category = $category]
     let $total-value := sum($category-products/price)
     let $product-count := count($category-products)
     return
       <category name="{$category}">
         <metrics>
           <count>{$product-count}</count>
           <total-value>{$total-value}</total-value>
           <average-price>{$total-value div $product-count}</average-price>
           <percentage-of-catalog>
             {($product-count div count($catalog//product)) * 100}
           </percentage-of-catalog>
         </metrics>
         
         <price-breakdown>
           {for $tier in ("budget", "mid-range", "premium")
            let $tier-products := 
              if ($tier = "budget") then $category-products[price <= 50]
              else if ($tier = "mid-range") then $category-products[price > 50 and price <= 200]
              else $category-products[price > 200]
            return
              <tier name="{$tier}">
                <count>{count($tier-products)}</count>
                <percentage>
                  {if ($product-count > 0) 
                   then (count($tier-products) div $product-count) * 100
                   else 0}
                </percentage>
              </tier>}
         </price-breakdown>
         
         <top-products>
           {for $product in $category-products
            order by $product/price descending
            return 
              if (position() <= 3) then
                <product rank="{position()}">
                  <name>{$product/name/text()}</name>
                  <price>{$product/price/text()}</price>
                </product>
              else ()}
         </top-products>
       </category>}
  </by-category>
</catalog-analysis>

Data Restructuring Patterns

Hierarchical to Flat Transformation

(: Transform nested order structure to flat records :)
let $orders := doc("orders.xml")

<flat-order-items>
  {for $order in $orders//order
   for $item in $order//item
   let $customer := $order/customer
   return
     <order-item>
       <order-info>
         <order-id>{$order/@id/string()}</order-id>
         <order-date>{$order/@date/string()}</order-date>
         <customer-name>{$customer/name/text()}</customer-name>
         <customer-email>{$customer/email/text()}</customer-email>
       </order-info>
       <item-info>
         <item-id>{$item/@id/string()}</item-id>
         <item-name>{$item/text()}</item-name>
         <quantity>{$item/@quantity/string()}</quantity>
         <unit-price>{$item/@price/string()}</unit-price>
         <line-total>{$item/@quantity * $item/@price}</line-total>
       </item-info>
       <derived-fields>
         <order-total>
           {sum($order//item/(@quantity * @price))}
         </order-total>
         <item-percentage>
           {($item/@quantity * $item/@price) div sum($order//item/(@quantity * @price)) * 100}
         </item-percentage>
       </derived-fields>
     </order-item>}
</flat-order-items>

Flat to Hierarchical Transformation

(: Group flat transaction data into hierarchical structure :)
let $transactions := doc("transactions.xml")

<customers>
  {for $customer-id in distinct-values($transactions//transaction/customer-id)
   let $customer-transactions := $transactions//transaction[customer-id = $customer-id]
   let $customer-name := $customer-transactions[1]/customer-name
   return
     <customer id="{$customer-id}">
       <name>{$customer-name/text()}</name>
       <summary>
         <transaction-count>{count($customer-transactions)}</transaction-count>
         <total-amount>{sum($customer-transactions/amount)}</total-amount>
         <average-transaction>{avg($customer-transactions/amount)}</average-transaction>
         <date-range>
           <first>{min($customer-transactions/date)}</first>
           <last>{max($customer-transactions/date)}</last>
         </date-range>
       </summary>
       
       <transactions-by-month>
         {for $month in distinct-values(
            for $date in $customer-transactions/date
            return substring($date, 1, 7)
          )
          let $month-transactions := $customer-transactions[starts-with(date, $month)]
          order by $month
          return
            <month value="{$month}">
              <count>{count($month-transactions)}</count>
              <total>{sum($month-transactions/amount)}</total>
              <transactions>
                {for $txn in $month-transactions
                 order by $txn/date
                 return
                   <transaction>
                     <date>{$txn/date/text()}</date>
                     <amount>{$txn/amount/text()}</amount>
                     <description>{$txn/description/text()}</description>
                   </transaction>}
              </transactions>
            </month>}
       </transactions-by-month>
     </customer>}
</customers>

Format Conversion with XQuery

XML to JSON Structure

(: Generate JSON-like structure using XQuery 3.1 :)
declare function local:to-json($products as element()*) as xs:string {
  let $json-products := 
    for $product in $products
    return concat(
      '{',
      '"id": "', $product/@id, '",',
      '"name": "', $product/name, '",',
      '"category": "', $product/@category, '",',
      '"price": {',
        '"amount": ', $product/price, ',',
        '"currency": "', $product/price/@currency, '"',
      '},',
      '"description": "', $product/description, '"',
      if ($product/specs) then
        concat(',',
        '"specs": {',
        string-join(
          for $spec in $product/specs/*
          return concat('"', local-name($spec), '": "', $spec, '"'),
          ','
        ),
        '}')
      else '',
      '}'
    )
  
  return concat(
    '{',
    '"catalog": {',
      '"products": [',
      string-join($json-products, ','),
      ']',
    '}',
    '}'
  )
};

(: Usage :)
local:to-json(doc("catalog.xml")//product)

XML to CSV Generation

(: Generate CSV output with XQuery :)
let $products := doc("catalog.xml")//product

let $header := "ID,Name,Category,Price,Currency,Description"

let $rows := 
  for $product in $products
  order by $product/@id
  return concat(
    $product/@id, ',',
    '"', replace($product/name, '"', '""'), '",',
    $product/@category, ',',
    $product/price, ',',
    $product/price/@currency, ',',
    '"', replace($product/description, '"', '""'), '"'
  )

return string-join(($header, $rows), "&#10;")

Advanced Transformation Techniques

Multi-Document Processing

(: Combine data from multiple XML sources :)
let $catalog := doc("catalog.xml")
let $inventory := doc("inventory.xml")
let $reviews := doc("reviews.xml")

<enriched-catalog>
  {for $product in $catalog//product
   let $product-id := $product/@id
   let $stock := $inventory//item[@product-id = $product-id]
   let $product-reviews := $reviews//review[@product-id = $product-id]
   
   return
     <product id="{$product-id}">
       {$product/*}
       
       <inventory-info>
         <in-stock>{exists($stock)}</in-stock>
         <quantity>{if ($stock) then $stock/quantity else 0}</quantity>
         <warehouse>{if ($stock) then $stock/warehouse else "none"}</warehouse>
       </inventory-info>
       
       <review-summary>
         <review-count>{count($product-reviews)}</review-count>
         {if ($product-reviews) then
           <ratings>
             <average>{avg($product-reviews/rating)}</average>
             <min>{min($product-reviews/rating)}</min>
             <max>{max($product-reviews/rating)}</max>
           </ratings>
         else ()}
         
         <recent-reviews>
           {for $review in $product-reviews
            order by $review/date descending
            return 
              if (position() <= 3) then
                <review>
                  <rating>{$review/rating/text()}</rating>
                  <comment>{$review/comment/text()}</comment>
                  <reviewer>{$review/reviewer/text()}</reviewer>
                  <date>{$review/date/text()}</date>
                </review>
              else ()}
         </recent-reviews>
       </review-summary>
       
       <computed-fields>
         <availability-status>
           {if (exists($stock) and $stock/quantity > 0) then "available"
            else if (exists($stock)) then "out-of-stock"
            else "discontinued"}
         </availability-status>
         
         <popularity-score>
           {let $review-factor := if ($product-reviews) 
                                 then count($product-reviews) * avg($product-reviews/rating)
                                 else 0
            let $stock-factor := if ($stock) then $stock/quantity else 0
            return $review-factor + $stock-factor}
         </popularity-score>
       </computed-fields>
     </product>}
</enriched-catalog>

Conditional Transformations

(: Transform based on parameters and conditions :)
declare variable $output-format external := "detailed";
declare variable $target-audience external := "general";
declare variable $price-filter external := 0;

let $catalog := doc("catalog.xml")
let $filtered-products := $catalog//product[price >= $price-filter]

return
  if ($output-format = "summary") then
    <product-summary audience="{$target-audience}">
      <count>{count($filtered-products)}</count>
      <items>
        {for $product in $filtered-products
         order by $product/name
         return
           <item>
             {$product/name/text()} - {$product/price/@currency}{$product/price/text()}
           </item>}
      </items>
    </product-summary>
    
  else if ($output-format = "detailed") then
    <detailed-catalog audience="{$target-audience}">
      {for $product in $filtered-products
       order by $product/@category, $product/price descending
       return
         <product>
           {$product/@*}
           {$product/*}
           
           {if ($target-audience = "technical") then
             <technical-metadata>
               <xml-depth>{count($product/ancestor-or-self::*)}</xml-depth>
               <element-count>{count($product/descendant-or-self::*)}</element-count>
               <attribute-count>{count($product//@*)}</attribute-count>
               <text-nodes>{count($product//text()[normalize-space()])}</text-nodes>
             </technical-metadata>
           else ()}
           
           {if ($target-audience = "business") then
             <business-metrics>
               <margin-estimate>{$product/price * 0.3}</margin-estimate>
               <category-rank>
                 {count($catalog//product[@category = $product/@category][price > $product/price]) + 1}
               </category-rank>
             </business-metrics>
           else ()}
         </product>}
    </detailed-catalog>
    
  else (: default full format :)
    <full-catalog audience="{$target-audience}" filter="price >= {$price-filter}">
      <metadata>
        <total-products>{count($filtered-products)}</total-products>
        <categories>{count(distinct-values($filtered-products/@category))}</categories>
        <price-range>
          <min>{min($filtered-products/price)}</min>
          <max>{max($filtered-products/price)}</max>
          <average>{avg($filtered-products/price)}</average>
        </price-range>
      </metadata>
      
      <products>
        {$filtered-products}
      </products>
    </full-catalog>

Performance Optimization for Transformations

Efficient Grouping with XQuery 3.0+

(: Use group by for efficient processing :)
let $orders := doc("orders.xml")//order

<sales-report>
  {for $order in $orders
   group by $month := substring($order/@date, 1, 7)
   order by $month
   return
     <monthly-sales month="{$month}">
       <order-count>{count($order)}</order-count>
       <total-revenue>{sum($order//item/(@quantity * @price))}</total-revenue>
       <average-order-value>
         {sum($order//item/(@quantity * @price)) div count($order)}
       </average-order-value>
       
       <top-customers>
         {for $customer-order in $order
          group by $customer := $customer-order/customer/name
          let $customer-total := sum($customer-order//item/(@quantity * @price))
          order by $customer-total descending
          return 
            if (position() <= 5) then
              <customer rank="{position()}">
                <name>{$customer}</name>
                <order-count>{count($customer-order)}</order-count>
                <total-spent>{$customer-total}</total-spent>
              </customer>
            else ()}
       </top-customers>
     </monthly-sales>}
</sales-report>

Memory-Efficient Processing

(: Process large datasets efficiently :)
declare function local:process-batch($items as element()*, $batch-size as xs:integer) {
  for $i in 1 to ceiling(count($items) div $batch-size)
  let $start := ($i - 1) * $batch-size + 1
  let $end := min(($i * $batch-size, count($items)))
  let $batch := subsequence($items, $start, $end - $start + 1)
  return
    <batch number="{$i}">
      <processed-count>{count($batch)}</processed-count>
      <batch-total>{sum($batch/price)}</batch-total>
      <items>
        {for $item in $batch
         return
           <item id="{$item/@id}">
             <name>{$item/name/text()}</name>
             <price>{$item/price/text()}</price>
           </item>}
      </items>
    </batch>
};

(: Usage for large product catalog :)
let $products := doc("large-catalog.xml")//product
return
  <batched-processing>
    {local:process-batch($products, 100)}
  </batched-processing>

Integration with External Systems

Parameterized Transformations

(: XQuery with external parameters for flexible transformations :)
declare variable $filter-category external;
declare variable $min-price external := 0;
declare variable $max-price external := 999999;
declare variable $sort-order external := "name";
declare variable $include-specs external := "true";

let $catalog := doc("catalog.xml")
let $filtered := $catalog//product[
  (empty($filter-category) or @category = $filter-category) and
  price >= $min-price and
  price <= $max-price
]

let $sorted := 
  if ($sort-order = "price-asc") then
    for $p in $filtered order by $p/price return $p
  else if ($sort-order = "price-desc") then
    for $p in $filtered order by $p/price descending return $p
  else if ($sort-order = "category") then
    for $p in $filtered order by $p/@category, $p/name return $p
  else
    for $p in $filtered order by $p/name return $p

return
  <filtered-catalog>
    <filter-criteria>
      <category>{if ($filter-category) then $filter-category else "all"}</category>
      <price-range min="{$min-price}" max="{$max-price}"/>
      <sort-order>{$sort-order}</sort-order>
      <include-specs>{$include-specs}</include-specs>
    </filter-criteria>
    
    <results count="{count($sorted)}">
      {for $product in $sorted
       return
         <product>
           {$product/@*}
           {$product/name}
           {$product/price}
           {$product/description}
           {if ($include-specs = "true") then $product/specs else ()}
         </product>}
    </results>
  </filtered-catalog>

Running XQuery Transformations

Command Line Processing

# Using Saxon
java -cp saxon.jar net.sf.saxon.Query -q:transform.xq -s:input.xml -o:output.xml

# With parameters
java -cp saxon.jar net.sf.saxon.Query -q:transform.xq -s:input.xml \
  output-format=summary target-audience=business

# Using BaseX
basex -i input.xml transform.xq > output.xml

Java Integration

public class XQueryTransformer {
    
    public void transform(String xmlPath, String xqueryPath, String outputPath,
                         Map<String, String> parameters) throws Exception {
        
        System.setProperty("javax.xml.xquery.datastore", ".");
        
        XQDataSource xqds = new SaxonXQDataSource();
        XQConnection xqconn = xqds.getConnection();
        
        // Load XQuery
        String xqueryString = Files.readString(Paths.get(xqueryPath));
        XQPreparedExpression xqpe = xqconn.prepareExpression(xqueryString);
        
        // Set parameters
        parameters.forEach((key, value) -> {
            try {
                xqpe.bindString(new QName(key), value, null);
            } catch (XQException e) {
                throw new RuntimeException(e);
            }
        });
        
        // Set context document
        xqpe.bindDocument(new QName("input-doc"), 
                         new FileInputStream(xmlPath), null);
        
        // Execute and write result
        XQResultSequence xqrs = xqpe.executeQuery();
        
        try (FileWriter writer = new FileWriter(outputPath)) {
            xqrs.writeSequence(writer, null);
        }
        
        xqconn.close();
    }
}

Best Practices for XQuery Transformations

Modular XQuery Design

(: Import utility modules :)
import module namespace utils = "http://example.com/utils" at "utils.xqm";
import module namespace formatters = "http://example.com/formatters" at "formatters.xqm";

(: Main transformation logic :)
let $data := doc("input.xml")
let $processed := utils:preprocess($data)
let $transformed := local:transform($processed)
return formatters:format-output($transformed)

declare function local:transform($data) {
  (: Transformation logic here :)
  $data
};

Error Handling

(: Robust error handling in transformations :)
try {
  let $catalog := doc("catalog.xml")
  return
    if (empty($catalog//product)) then
      <error>No products found in catalog</error>
    else
      <success>
        {for $product in $catalog//product
         return
           try {
             <product>
               <id>{$product/@id}</id>
               <name>{$product/name/text()}</name>
               <price>{number($product/price)}</price>
             </product>
           } catch * {
             <product-error>
               <id>{$product/@id}</id>
               <error>{$err:description}</error>
             </product-error>
           }}
      </success>
} catch * {
  <fatal-error>
    <code>{$err:code}</code>
    <description>{$err:description}</description>
  </fatal-error>
}

Conclusion

XQuery provides a powerful functional programming approach to XML transformation that excels at data extraction, complex queries, and analytical processing. Its SQL-like syntax and native support for grouping make it particularly effective for data-oriented transformation tasks.

Next Steps