XQuery for Transformation
While XQuery is covered comprehensively in the Advanced section, this page focuses specifically on using XQuery for XML transformation tasks, emphasizing its functional programming approach and data manipulation capabilities.
XQuery vs XSLT for Transformations
Aspect | XQuery | XSLT |
---|---|---|
Paradigm | Functional programming | Template-based |
Data Extraction | Excellent | Good |
Complex Queries | Excellent | Moderate |
Grouping | Native support | Complex patterns |
Output Formats | XML, Text | XML, HTML, Text |
Learning Curve | SQL-like, easier | Steeper |
Data Extraction and Transformation
Basic Data Extraction
Transform product catalog with filtering and restructuring:
(: Input XML structure similar to XSLT example :)
let $catalog := doc("catalog.xml")
(: Extract electronics products with enhanced structure :)
<electronics-catalog>
<summary>
<total-products>{count($catalog//product[@category='electronics'])}</total-products>
<average-price>{avg($catalog//product[@category='electronics']/price)}</average-price>
<price-range>
<min>{min($catalog//product[@category='electronics']/price)}</min>
<max>{max($catalog//product[@category='electronics']/price)}</max>
</price-range>
</summary>
<products>
{for $product in $catalog//product[@category='electronics']
let $specs := $product/specs
order by $product/price descending
return
<product id="{$product/@id}">
<basic-info>
<name>{$product/name/text()}</name>
<price currency="{$product/price/@currency}">
{$product/price/text()}
</price>
<description>{$product/description/text()}</description>
</basic-info>
{if ($specs) then
<specifications>
{for $spec in $specs/*
return element {local-name($spec)} {$spec/text()}}
</specifications>
else ()}
<calculated-fields>
<price-tier>
{if ($product/price > 500) then "premium"
else if ($product/price > 100) then "mid-range"
else "budget"}
</price-tier>
<has-specs>{exists($specs)}</has-specs>
</calculated-fields>
</product>}
</products>
</electronics-catalog>
Advanced Data Aggregation
(: Complex aggregation with multiple grouping levels :)
let $catalog := doc("catalog.xml")
<catalog-analysis>
<by-category>
{for $category in distinct-values($catalog//product/@category)
let $category-products := $catalog//product[@category = $category]
let $total-value := sum($category-products/price)
let $product-count := count($category-products)
return
<category name="{$category}">
<metrics>
<count>{$product-count}</count>
<total-value>{$total-value}</total-value>
<average-price>{$total-value div $product-count}</average-price>
<percentage-of-catalog>
{($product-count div count($catalog//product)) * 100}
</percentage-of-catalog>
</metrics>
<price-breakdown>
{for $tier in ("budget", "mid-range", "premium")
let $tier-products :=
if ($tier = "budget") then $category-products[price <= 50]
else if ($tier = "mid-range") then $category-products[price > 50 and price <= 200]
else $category-products[price > 200]
return
<tier name="{$tier}">
<count>{count($tier-products)}</count>
<percentage>
{if ($product-count > 0)
then (count($tier-products) div $product-count) * 100
else 0}
</percentage>
</tier>}
</price-breakdown>
<top-products>
{for $product in $category-products
order by $product/price descending
return
if (position() <= 3) then
<product rank="{position()}">
<name>{$product/name/text()}</name>
<price>{$product/price/text()}</price>
</product>
else ()}
</top-products>
</category>}
</by-category>
</catalog-analysis>
Data Restructuring Patterns
Hierarchical to Flat Transformation
(: Transform nested order structure to flat records :)
let $orders := doc("orders.xml")
<flat-order-items>
{for $order in $orders//order
for $item in $order//item
let $customer := $order/customer
return
<order-item>
<order-info>
<order-id>{$order/@id/string()}</order-id>
<order-date>{$order/@date/string()}</order-date>
<customer-name>{$customer/name/text()}</customer-name>
<customer-email>{$customer/email/text()}</customer-email>
</order-info>
<item-info>
<item-id>{$item/@id/string()}</item-id>
<item-name>{$item/text()}</item-name>
<quantity>{$item/@quantity/string()}</quantity>
<unit-price>{$item/@price/string()}</unit-price>
<line-total>{$item/@quantity * $item/@price}</line-total>
</item-info>
<derived-fields>
<order-total>
{sum($order//item/(@quantity * @price))}
</order-total>
<item-percentage>
{($item/@quantity * $item/@price) div sum($order//item/(@quantity * @price)) * 100}
</item-percentage>
</derived-fields>
</order-item>}
</flat-order-items>
Flat to Hierarchical Transformation
(: Group flat transaction data into hierarchical structure :)
let $transactions := doc("transactions.xml")
<customers>
{for $customer-id in distinct-values($transactions//transaction/customer-id)
let $customer-transactions := $transactions//transaction[customer-id = $customer-id]
let $customer-name := $customer-transactions[1]/customer-name
return
<customer id="{$customer-id}">
<name>{$customer-name/text()}</name>
<summary>
<transaction-count>{count($customer-transactions)}</transaction-count>
<total-amount>{sum($customer-transactions/amount)}</total-amount>
<average-transaction>{avg($customer-transactions/amount)}</average-transaction>
<date-range>
<first>{min($customer-transactions/date)}</first>
<last>{max($customer-transactions/date)}</last>
</date-range>
</summary>
<transactions-by-month>
{for $month in distinct-values(
for $date in $customer-transactions/date
return substring($date, 1, 7)
)
let $month-transactions := $customer-transactions[starts-with(date, $month)]
order by $month
return
<month value="{$month}">
<count>{count($month-transactions)}</count>
<total>{sum($month-transactions/amount)}</total>
<transactions>
{for $txn in $month-transactions
order by $txn/date
return
<transaction>
<date>{$txn/date/text()}</date>
<amount>{$txn/amount/text()}</amount>
<description>{$txn/description/text()}</description>
</transaction>}
</transactions>
</month>}
</transactions-by-month>
</customer>}
</customers>
Format Conversion with XQuery
XML to JSON Structure
(: Generate JSON-like structure using XQuery 3.1 :)
declare function local:to-json($products as element()*) as xs:string {
let $json-products :=
for $product in $products
return concat(
'{',
'"id": "', $product/@id, '",',
'"name": "', $product/name, '",',
'"category": "', $product/@category, '",',
'"price": {',
'"amount": ', $product/price, ',',
'"currency": "', $product/price/@currency, '"',
'},',
'"description": "', $product/description, '"',
if ($product/specs) then
concat(',',
'"specs": {',
string-join(
for $spec in $product/specs/*
return concat('"', local-name($spec), '": "', $spec, '"'),
','
),
'}')
else '',
'}'
)
return concat(
'{',
'"catalog": {',
'"products": [',
string-join($json-products, ','),
']',
'}',
'}'
)
};
(: Usage :)
local:to-json(doc("catalog.xml")//product)
XML to CSV Generation
(: Generate CSV output with XQuery :)
let $products := doc("catalog.xml")//product
let $header := "ID,Name,Category,Price,Currency,Description"
let $rows :=
for $product in $products
order by $product/@id
return concat(
$product/@id, ',',
'"', replace($product/name, '"', '""'), '",',
$product/@category, ',',
$product/price, ',',
$product/price/@currency, ',',
'"', replace($product/description, '"', '""'), '"'
)
return string-join(($header, $rows), " ")
Advanced Transformation Techniques
Multi-Document Processing
(: Combine data from multiple XML sources :)
let $catalog := doc("catalog.xml")
let $inventory := doc("inventory.xml")
let $reviews := doc("reviews.xml")
<enriched-catalog>
{for $product in $catalog//product
let $product-id := $product/@id
let $stock := $inventory//item[@product-id = $product-id]
let $product-reviews := $reviews//review[@product-id = $product-id]
return
<product id="{$product-id}">
{$product/*}
<inventory-info>
<in-stock>{exists($stock)}</in-stock>
<quantity>{if ($stock) then $stock/quantity else 0}</quantity>
<warehouse>{if ($stock) then $stock/warehouse else "none"}</warehouse>
</inventory-info>
<review-summary>
<review-count>{count($product-reviews)}</review-count>
{if ($product-reviews) then
<ratings>
<average>{avg($product-reviews/rating)}</average>
<min>{min($product-reviews/rating)}</min>
<max>{max($product-reviews/rating)}</max>
</ratings>
else ()}
<recent-reviews>
{for $review in $product-reviews
order by $review/date descending
return
if (position() <= 3) then
<review>
<rating>{$review/rating/text()}</rating>
<comment>{$review/comment/text()}</comment>
<reviewer>{$review/reviewer/text()}</reviewer>
<date>{$review/date/text()}</date>
</review>
else ()}
</recent-reviews>
</review-summary>
<computed-fields>
<availability-status>
{if (exists($stock) and $stock/quantity > 0) then "available"
else if (exists($stock)) then "out-of-stock"
else "discontinued"}
</availability-status>
<popularity-score>
{let $review-factor := if ($product-reviews)
then count($product-reviews) * avg($product-reviews/rating)
else 0
let $stock-factor := if ($stock) then $stock/quantity else 0
return $review-factor + $stock-factor}
</popularity-score>
</computed-fields>
</product>}
</enriched-catalog>
Conditional Transformations
(: Transform based on parameters and conditions :)
declare variable $output-format external := "detailed";
declare variable $target-audience external := "general";
declare variable $price-filter external := 0;
let $catalog := doc("catalog.xml")
let $filtered-products := $catalog//product[price >= $price-filter]
return
if ($output-format = "summary") then
<product-summary audience="{$target-audience}">
<count>{count($filtered-products)}</count>
<items>
{for $product in $filtered-products
order by $product/name
return
<item>
{$product/name/text()} - {$product/price/@currency}{$product/price/text()}
</item>}
</items>
</product-summary>
else if ($output-format = "detailed") then
<detailed-catalog audience="{$target-audience}">
{for $product in $filtered-products
order by $product/@category, $product/price descending
return
<product>
{$product/@*}
{$product/*}
{if ($target-audience = "technical") then
<technical-metadata>
<xml-depth>{count($product/ancestor-or-self::*)}</xml-depth>
<element-count>{count($product/descendant-or-self::*)}</element-count>
<attribute-count>{count($product//@*)}</attribute-count>
<text-nodes>{count($product//text()[normalize-space()])}</text-nodes>
</technical-metadata>
else ()}
{if ($target-audience = "business") then
<business-metrics>
<margin-estimate>{$product/price * 0.3}</margin-estimate>
<category-rank>
{count($catalog//product[@category = $product/@category][price > $product/price]) + 1}
</category-rank>
</business-metrics>
else ()}
</product>}
</detailed-catalog>
else (: default full format :)
<full-catalog audience="{$target-audience}" filter="price >= {$price-filter}">
<metadata>
<total-products>{count($filtered-products)}</total-products>
<categories>{count(distinct-values($filtered-products/@category))}</categories>
<price-range>
<min>{min($filtered-products/price)}</min>
<max>{max($filtered-products/price)}</max>
<average>{avg($filtered-products/price)}</average>
</price-range>
</metadata>
<products>
{$filtered-products}
</products>
</full-catalog>
Performance Optimization for Transformations
Efficient Grouping with XQuery 3.0+
(: Use group by for efficient processing :)
let $orders := doc("orders.xml")//order
<sales-report>
{for $order in $orders
group by $month := substring($order/@date, 1, 7)
order by $month
return
<monthly-sales month="{$month}">
<order-count>{count($order)}</order-count>
<total-revenue>{sum($order//item/(@quantity * @price))}</total-revenue>
<average-order-value>
{sum($order//item/(@quantity * @price)) div count($order)}
</average-order-value>
<top-customers>
{for $customer-order in $order
group by $customer := $customer-order/customer/name
let $customer-total := sum($customer-order//item/(@quantity * @price))
order by $customer-total descending
return
if (position() <= 5) then
<customer rank="{position()}">
<name>{$customer}</name>
<order-count>{count($customer-order)}</order-count>
<total-spent>{$customer-total}</total-spent>
</customer>
else ()}
</top-customers>
</monthly-sales>}
</sales-report>
Memory-Efficient Processing
(: Process large datasets efficiently :)
declare function local:process-batch($items as element()*, $batch-size as xs:integer) {
for $i in 1 to ceiling(count($items) div $batch-size)
let $start := ($i - 1) * $batch-size + 1
let $end := min(($i * $batch-size, count($items)))
let $batch := subsequence($items, $start, $end - $start + 1)
return
<batch number="{$i}">
<processed-count>{count($batch)}</processed-count>
<batch-total>{sum($batch/price)}</batch-total>
<items>
{for $item in $batch
return
<item id="{$item/@id}">
<name>{$item/name/text()}</name>
<price>{$item/price/text()}</price>
</item>}
</items>
</batch>
};
(: Usage for large product catalog :)
let $products := doc("large-catalog.xml")//product
return
<batched-processing>
{local:process-batch($products, 100)}
</batched-processing>
Integration with External Systems
Parameterized Transformations
(: XQuery with external parameters for flexible transformations :)
declare variable $filter-category external;
declare variable $min-price external := 0;
declare variable $max-price external := 999999;
declare variable $sort-order external := "name";
declare variable $include-specs external := "true";
let $catalog := doc("catalog.xml")
let $filtered := $catalog//product[
(empty($filter-category) or @category = $filter-category) and
price >= $min-price and
price <= $max-price
]
let $sorted :=
if ($sort-order = "price-asc") then
for $p in $filtered order by $p/price return $p
else if ($sort-order = "price-desc") then
for $p in $filtered order by $p/price descending return $p
else if ($sort-order = "category") then
for $p in $filtered order by $p/@category, $p/name return $p
else
for $p in $filtered order by $p/name return $p
return
<filtered-catalog>
<filter-criteria>
<category>{if ($filter-category) then $filter-category else "all"}</category>
<price-range min="{$min-price}" max="{$max-price}"/>
<sort-order>{$sort-order}</sort-order>
<include-specs>{$include-specs}</include-specs>
</filter-criteria>
<results count="{count($sorted)}">
{for $product in $sorted
return
<product>
{$product/@*}
{$product/name}
{$product/price}
{$product/description}
{if ($include-specs = "true") then $product/specs else ()}
</product>}
</results>
</filtered-catalog>
Running XQuery Transformations
Command Line Processing
# Using Saxon
java -cp saxon.jar net.sf.saxon.Query -q:transform.xq -s:input.xml -o:output.xml
# With parameters
java -cp saxon.jar net.sf.saxon.Query -q:transform.xq -s:input.xml \
output-format=summary target-audience=business
# Using BaseX
basex -i input.xml transform.xq > output.xml
Java Integration
public class XQueryTransformer {
public void transform(String xmlPath, String xqueryPath, String outputPath,
Map<String, String> parameters) throws Exception {
System.setProperty("javax.xml.xquery.datastore", ".");
XQDataSource xqds = new SaxonXQDataSource();
XQConnection xqconn = xqds.getConnection();
// Load XQuery
String xqueryString = Files.readString(Paths.get(xqueryPath));
XQPreparedExpression xqpe = xqconn.prepareExpression(xqueryString);
// Set parameters
parameters.forEach((key, value) -> {
try {
xqpe.bindString(new QName(key), value, null);
} catch (XQException e) {
throw new RuntimeException(e);
}
});
// Set context document
xqpe.bindDocument(new QName("input-doc"),
new FileInputStream(xmlPath), null);
// Execute and write result
XQResultSequence xqrs = xqpe.executeQuery();
try (FileWriter writer = new FileWriter(outputPath)) {
xqrs.writeSequence(writer, null);
}
xqconn.close();
}
}
Best Practices for XQuery Transformations
Modular XQuery Design
(: Import utility modules :)
import module namespace utils = "http://example.com/utils" at "utils.xqm";
import module namespace formatters = "http://example.com/formatters" at "formatters.xqm";
(: Main transformation logic :)
let $data := doc("input.xml")
let $processed := utils:preprocess($data)
let $transformed := local:transform($processed)
return formatters:format-output($transformed)
declare function local:transform($data) {
(: Transformation logic here :)
$data
};
Error Handling
(: Robust error handling in transformations :)
try {
let $catalog := doc("catalog.xml")
return
if (empty($catalog//product)) then
<error>No products found in catalog</error>
else
<success>
{for $product in $catalog//product
return
try {
<product>
<id>{$product/@id}</id>
<name>{$product/name/text()}</name>
<price>{number($product/price)}</price>
</product>
} catch * {
<product-error>
<id>{$product/@id}</id>
<error>{$err:description}</error>
</product-error>
}}
</success>
} catch * {
<fatal-error>
<code>{$err:code}</code>
<description>{$err:description}</description>
</fatal-error>
}
Conclusion
XQuery provides a powerful functional programming approach to XML transformation that excels at data extraction, complex queries, and analytical processing. Its SQL-like syntax and native support for grouping make it particularly effective for data-oriented transformation tasks.
Next Steps
- Learn Advanced Techniques for complex scenarios
- Explore Tools and Processors for implementation options
- Compare with XSLT for template-based approaches