Advanced Techniques
Advanced XML transformation techniques go beyond basic XSLT and XQuery to address complex scenarios involving large datasets, real-time processing, multiple data sources, and sophisticated business logic. These techniques are essential for enterprise-level XML processing systems.
Streaming Transformations
SAX-Based Streaming Transformation
For very large XML documents that don't fit in memory:
public class StreamingTransformer extends DefaultHandler {
private XMLStreamWriter writer;
private StringBuilder currentText;
private String currentElement;
private Map<String, String> transformationRules;
private boolean inTransformableElement = false;
public StreamingTransformer(XMLStreamWriter writer) {
this.writer = writer;
this.currentText = new StringBuilder();
this.transformationRules = new HashMap<>();
initializeRules();
}
private void initializeRules() {
// Define transformation rules
transformationRules.put("old-price", "price");
transformationRules.put("product-name", "title");
transformationRules.put("description", "summary");
}
@Override
public void startDocument() throws SAXException {
try {
writer.writeStartDocument();
writer.writeStartElement("transformed-catalog");
} catch (XMLStreamException e) {
throw new SAXException(e);
}
}
@Override
public void startElement(String uri, String localName, String qName, Attributes attrs)
throws SAXException {
currentElement = qName;
currentText.setLength(0);
try {
String transformedName = transformationRules.getOrDefault(qName, qName);
if ("product".equals(qName)) {
// Transform product element with enhanced attributes
writer.writeStartElement("item");
// Copy and transform attributes
for (int i = 0; i < attrs.getLength(); i++) {
String attrName = attrs.getQName(i);
String attrValue = attrs.getValue(i);
// Apply attribute transformations
if ("id".equals(attrName)) {
writer.writeAttribute("item-id", "ITEM_" + attrValue);
} else if ("category".equals(attrName)) {
writer.writeAttribute("type", attrValue.toUpperCase());
} else {
writer.writeAttribute(attrName, attrValue);
}
}
inTransformableElement = true;
} else if (inTransformableElement) {
writer.writeStartElement(transformedName);
// Copy attributes
for (int i = 0; i < attrs.getLength(); i++) {
writer.writeAttribute(attrs.getQName(i), attrs.getValue(i));
}
}
} catch (XMLStreamException e) {
throw new SAXException(e);
}
}
@Override
public void characters(char[] chars, int start, int length) throws SAXException {
currentText.append(chars, start, length);
}
@Override
public void endElement(String uri, String localName, String qName) throws SAXException {
try {
if ("product".equals(qName)) {
// Add computed fields
writer.writeStartElement("computed-fields");
writer.writeStartElement("processed-date");
writer.writeCharacters(new Date().toString());
writer.writeEndElement();
writer.writeEndElement();
writer.writeEndElement(); // Close item element
inTransformableElement = false;
} else if (inTransformableElement) {
String text = currentText.toString().trim();
// Apply text transformations
if ("old-price".equals(qName) && !text.isEmpty()) {
try {
double price = Double.parseDouble(text);
double convertedPrice = price * 1.1; // Apply conversion
writer.writeCharacters(String.format("%.2f", convertedPrice));
} catch (NumberFormatException e) {
writer.writeCharacters(text);
}
} else if ("description".equals(qName)) {
// Truncate and add ellipsis if too long
if (text.length() > 100) {
writer.writeCharacters(text.substring(0, 100) + "...");
} else {
writer.writeCharacters(text);
}
} else if (!text.isEmpty()) {
writer.writeCharacters(text);
}
String transformedName = transformationRules.getOrDefault(qName, qName);
writer.writeEndElement();
}
} catch (XMLStreamException e) {
throw new SAXException(e);
}
currentText.setLength(0);
}
@Override
public void endDocument() throws SAXException {
try {
writer.writeEndElement(); // Close transformed-catalog
writer.writeEndDocument();
writer.close();
} catch (XMLStreamException e) {
throw new SAXException(e);
}
}
}
StAX-Based Incremental Transformation
public class IncrementalTransformer {
public void transformIncrementally(InputStream input, OutputStream output)
throws XMLStreamException {
XMLInputFactory inputFactory = XMLInputFactory.newInstance();
XMLOutputFactory outputFactory = XMLOutputFactory.newInstance();
XMLStreamReader reader = inputFactory.createXMLStreamReader(input);
XMLStreamWriter writer = outputFactory.createXMLStreamWriter(output);
TransformationContext context = new TransformationContext();
while (reader.hasNext()) {
int event = reader.next();
switch (event) {
case XMLStreamConstants.START_DOCUMENT:
handleStartDocument(writer, context);
break;
case XMLStreamConstants.START_ELEMENT:
handleStartElement(reader, writer, context);
break;
case XMLStreamConstants.CHARACTERS:
handleCharacters(reader, writer, context);
break;
case XMLStreamConstants.END_ELEMENT:
handleEndElement(reader, writer, context);
break;
case XMLStreamConstants.END_DOCUMENT:
handleEndDocument(writer, context);
break;
}
}
reader.close();
writer.close();
}
private void handleStartElement(XMLStreamReader reader, XMLStreamWriter writer,
TransformationContext context) throws XMLStreamException {
String elementName = reader.getLocalName();
context.pushElement(elementName);
if ("product".equals(elementName)) {
// Transform product to enhanced structure
writer.writeStartElement("enhanced-product");
// Copy attributes with transformations
for (int i = 0; i < reader.getAttributeCount(); i++) {
String attrName = reader.getAttributeLocalName(i);
String attrValue = reader.getAttributeValue(i);
if ("id".equals(attrName)) {
writer.writeAttribute("product-id", attrValue);
writer.writeAttribute("generated-key", "PROD_" + attrValue + "_" + System.currentTimeMillis());
} else {
writer.writeAttribute(attrName, attrValue);
}
}
// Add metadata
writer.writeStartElement("metadata");
writer.writeStartElement("transformation-timestamp");
writer.writeCharacters(Instant.now().toString());
writer.writeEndElement();
writer.writeStartElement("source");
writer.writeCharacters("legacy-system");
writer.writeEndElement();
writer.writeEndElement();
} else if (context.isInContext("product")) {
// Transform child elements
String transformedName = transformElementName(elementName);
writer.writeStartElement(transformedName);
// Copy attributes
for (int i = 0; i < reader.getAttributeCount(); i++) {
writer.writeAttribute(
reader.getAttributeLocalName(i),
reader.getAttributeValue(i)
);
}
} else {
// Pass through unchanged
writer.writeStartElement(elementName);
for (int i = 0; i < reader.getAttributeCount(); i++) {
writer.writeAttribute(
reader.getAttributeLocalName(i),
reader.getAttributeValue(i)
);
}
}
}
private String transformElementName(String elementName) {
switch (elementName) {
case "name": return "title";
case "desc": return "description";
case "cost": return "price";
default: return elementName;
}
}
// Additional handler methods...
}
Pipeline Processing
Multi-Stage Transformation Pipeline
public class TransformationPipeline {
private final List<TransformationStage> stages;
public TransformationPipeline() {
this.stages = new ArrayList<>();
}
public TransformationPipeline addStage(TransformationStage stage) {
stages.add(stage);
return this;
}
public void execute(InputStream input, OutputStream output) throws Exception {
ByteArrayInputStream currentInput = new ByteArrayInputStream(
input.readAllBytes()
);
for (int i = 0; i < stages.size(); i++) {
TransformationStage stage = stages.get(i);
if (i == stages.size() - 1) {
// Last stage outputs to final destination
stage.transform(currentInput, output);
} else {
// Intermediate stage outputs to next stage input
ByteArrayOutputStream intermediateOutput = new ByteArrayOutputStream();
stage.transform(currentInput, intermediateOutput);
currentInput = new ByteArrayInputStream(intermediateOutput.toByteArray());
}
}
}
public interface TransformationStage {
void transform(InputStream input, OutputStream output) throws Exception;
String getName();
}
// Built-in transformation stages
public static class XSLTStage implements TransformationStage {
private final String xsltPath;
private final Map<String, String> parameters;
public XSLTStage(String xsltPath, Map<String, String> parameters) {
this.xsltPath = xsltPath;
this.parameters = parameters;
}
@Override
public void transform(InputStream input, OutputStream output) throws Exception {
TransformerFactory factory = TransformerFactory.newInstance();
Transformer transformer = factory.newTransformer(new StreamSource(xsltPath));
parameters.forEach(transformer::setParameter);
transformer.transform(
new StreamSource(input),
new StreamResult(output)
);
}
@Override
public String getName() {
return "XSLT: " + xsltPath;
}
}
public static class ValidationStage implements TransformationStage {
private final String schemaPath;
public ValidationStage(String schemaPath) {
this.schemaPath = schemaPath;
}
@Override
public void transform(InputStream input, OutputStream output) throws Exception {
// Validate and pass through
SchemaFactory schemaFactory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI);
Schema schema = schemaFactory.newSchema(new File(schemaPath));
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
dbFactory.setNamespaceAware(true);
dbFactory.setSchema(schema);
DocumentBuilder builder = dbFactory.newDocumentBuilder();
builder.setErrorHandler(new ValidationErrorHandler());
Document doc = builder.parse(input);
// Write validated document to output
TransformerFactory.newInstance().newTransformer()
.transform(new DOMSource(doc), new StreamResult(output));
}
@Override
public String getName() {
return "Validation: " + schemaPath;
}
}
public static class FilterStage implements TransformationStage {
private final Predicate<Element> filter;
public FilterStage(Predicate<Element> filter) {
this.filter = filter;
}
@Override
public void transform(InputStream input, OutputStream output) throws Exception {
DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
Document doc = builder.parse(input);
// Apply filtering
NodeList elements = doc.getElementsByTagName("*");
List<Node> toRemove = new ArrayList<>();
for (int i = 0; i < elements.getLength(); i++) {
Element element = (Element) elements.item(i);
if (!filter.test(element)) {
toRemove.add(element);
}
}
// Remove filtered elements
toRemove.forEach(node -> node.getParentNode().removeChild(node));
// Write filtered document
TransformerFactory.newInstance().newTransformer()
.transform(new DOMSource(doc), new StreamResult(output));
}
@Override
public String getName() {
return "Filter Stage";
}
}
}
// Usage example
public class PipelineExample {
public void processComplexTransformation() throws Exception {
TransformationPipeline pipeline = new TransformationPipeline()
.addStage(new TransformationPipeline.ValidationStage("input-schema.xsd"))
.addStage(new TransformationPipeline.XSLTStage("normalize.xsl", Map.of("version", "2.0")))
.addStage(new TransformationPipeline.FilterStage(element ->
!"deprecated".equals(element.getAttribute("status"))))
.addStage(new TransformationPipeline.XSLTStage("enrich.xsl", Map.of("timestamp", "true")))
.addStage(new TransformationPipeline.XSLTStage("format-output.xsl", Map.of()));
try (FileInputStream input = new FileInputStream("input.xml");
FileOutputStream output = new FileOutputStream("output.xml")) {
pipeline.execute(input, output);
}
}
}
Parallel and Concurrent Transformations
Parallel Processing for Large Datasets
public class ParallelTransformer {
private final ExecutorService executorService;
private final int batchSize;
public ParallelTransformer(int threadCount, int batchSize) {
this.executorService = Executors.newFixedThreadPool(threadCount);
this.batchSize = batchSize;
}
public void transformInParallel(List<File> xmlFiles,
String xsltPath,
File outputDirectory) throws Exception {
List<CompletableFuture<Void>> futures = new ArrayList<>();
// Process files in batches
for (int i = 0; i < xmlFiles.size(); i += batchSize) {
int endIndex = Math.min(i + batchSize, xmlFiles.size());
List<File> batch = xmlFiles.subList(i, endIndex);
CompletableFuture<Void> future = CompletableFuture.runAsync(() -> {
try {
processBatch(batch, xsltPath, outputDirectory);
} catch (Exception e) {
throw new RuntimeException(e);
}
}, executorService);
futures.add(future);
}
// Wait for all batches to complete
CompletableFuture.allOf(futures.toArray(new CompletableFuture[0])).get();
}
private void processBatch(List<File> files, String xsltPath, File outputDirectory)
throws Exception {
TransformerFactory factory = TransformerFactory.newInstance();
Transformer transformer = factory.newTransformer(new StreamSource(xsltPath));
for (File file : files) {
String outputFileName = file.getName().replace(".xml", "_transformed.xml");
File outputFile = new File(outputDirectory, outputFileName);
transformer.transform(
new StreamSource(file),
new StreamResult(outputFile)
);
}
}
public void shutdown() {
executorService.shutdown();
}
}
Fork-Join Transformation
public class ForkJoinTransformation extends RecursiveTask<Document> {
private final Document sourceDoc;
private final int threshold;
private final String nodeName;
public ForkJoinTransformation(Document sourceDoc, String nodeName, int threshold) {
this.sourceDoc = sourceDoc;
this.nodeName = nodeName;
this.threshold = threshold;
}
@Override
protected Document compute() {
NodeList nodes = sourceDoc.getElementsByTagName(nodeName);
if (nodes.getLength() <= threshold) {
// Process directly
return transformDocument(sourceDoc);
} else {
// Split the work
try {
Document leftDoc = cloneDocument(sourceDoc);
Document rightDoc = cloneDocument(sourceDoc);
splitNodes(nodes, leftDoc, rightDoc);
ForkJoinTransformation leftTask = new ForkJoinTransformation(leftDoc, nodeName, threshold);
ForkJoinTransformation rightTask = new ForkJoinTransformation(rightDoc, nodeName, threshold);
leftTask.fork();
Document rightResult = rightTask.compute();
Document leftResult = leftTask.join();
return mergeDocuments(leftResult, rightResult);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
}
private Document transformDocument(Document doc) {
try {
// Apply transformation logic
NodeList products = doc.getElementsByTagName("product");
for (int i = 0; i < products.getLength(); i++) {
Element product = (Element) products.item(i);
// Add processing metadata
Element metadata = doc.createElement("processing-info");
metadata.setAttribute("thread", Thread.currentThread().getName());
metadata.setAttribute("timestamp", Instant.now().toString());
product.appendChild(metadata);
// Apply business logic transformations
transformProductElement(product);
}
return doc;
} catch (Exception e) {
throw new RuntimeException(e);
}
}
private void transformProductElement(Element product) {
// Example transformation logic
Element priceElement = (Element) product.getElementsByTagName("price").item(0);
if (priceElement != null) {
double price = Double.parseDouble(priceElement.getTextContent());
// Add tax calculation
Element taxElement = product.getOwnerDocument().createElement("tax");
taxElement.setTextContent(String.valueOf(price * 0.1));
product.appendChild(taxElement);
// Add total with tax
Element totalElement = product.getOwnerDocument().createElement("total");
totalElement.setTextContent(String.valueOf(price * 1.1));
product.appendChild(totalElement);
}
}
}
Dynamic Transformation Generation
Rule-Based Transformation Engine
public class RuleBasedTransformer {
private final List<TransformationRule> rules;
private final Map<String, Object> context;
public RuleBasedTransformer() {
this.rules = new ArrayList<>();
this.context = new HashMap<>();
}
public RuleBasedTransformer addRule(TransformationRule rule) {
rules.add(rule);
return this;
}
public RuleBasedTransformer setContext(String key, Object value) {
context.put(key, value);
return this;
}
public Document transform(Document input) throws Exception {
Document result = cloneDocument(input);
// Apply rules in order
for (TransformationRule rule : rules) {
if (rule.isApplicable(result, context)) {
result = rule.apply(result, context);
}
}
return result;
}
public interface TransformationRule {
boolean isApplicable(Document document, Map<String, Object> context);
Document apply(Document document, Map<String, Object> context) throws Exception;
String getName();
int getPriority();
}
// Example rules
public static class PriceConversionRule implements TransformationRule {
private final String fromCurrency;
private final String toCurrency;
private final double exchangeRate;
public PriceConversionRule(String fromCurrency, String toCurrency, double exchangeRate) {
this.fromCurrency = fromCurrency;
this.toCurrency = toCurrency;
this.exchangeRate = exchangeRate;
}
@Override
public boolean isApplicable(Document document, Map<String, Object> context) {
NodeList prices = document.getElementsByTagName("price");
for (int i = 0; i < prices.getLength(); i++) {
Element price = (Element) prices.item(i);
if (fromCurrency.equals(price.getAttribute("currency"))) {
return true;
}
}
return false;
}
@Override
public Document apply(Document document, Map<String, Object> context) throws Exception {
NodeList prices = document.getElementsByTagName("price");
for (int i = 0; i < prices.getLength(); i++) {
Element price = (Element) prices.item(i);
if (fromCurrency.equals(price.getAttribute("currency"))) {
double originalPrice = Double.parseDouble(price.getTextContent());
double convertedPrice = originalPrice * exchangeRate;
price.setTextContent(String.format("%.2f", convertedPrice));
price.setAttribute("currency", toCurrency);
price.setAttribute("original-currency", fromCurrency);
price.setAttribute("exchange-rate", String.valueOf(exchangeRate));
}
}
return document;
}
@Override
public String getName() {
return "Currency Conversion: " + fromCurrency + " to " + toCurrency;
}
@Override
public int getPriority() {
return 100;
}
}
public static class FieldMappingRule implements TransformationRule {
private final Map<String, String> fieldMappings;
public FieldMappingRule(Map<String, String> fieldMappings) {
this.fieldMappings = fieldMappings;
}
@Override
public boolean isApplicable(Document document, Map<String, Object> context) {
return fieldMappings.keySet().stream()
.anyMatch(oldName -> document.getElementsByTagName(oldName).getLength() > 0);
}
@Override
public Document apply(Document document, Map<String, Object> context) throws Exception {
for (Map.Entry<String, String> mapping : fieldMappings.entrySet()) {
String oldName = mapping.getKey();
String newName = mapping.getValue();
NodeList oldElements = document.getElementsByTagName(oldName);
List<Element> elementsToRename = new ArrayList<>();
for (int i = 0; i < oldElements.getLength(); i++) {
elementsToRename.add((Element) oldElements.item(i));
}
for (Element oldElement : elementsToRename) {
Element newElement = document.createElement(newName);
// Copy attributes
NamedNodeMap attributes = oldElement.getAttributes();
for (int i = 0; i < attributes.getLength(); i++) {
Attr attr = (Attr) attributes.item(i);
newElement.setAttribute(attr.getName(), attr.getValue());
}
// Copy children
while (oldElement.hasChildNodes()) {
newElement.appendChild(oldElement.getFirstChild());
}
// Replace old element with new
oldElement.getParentNode().replaceChild(newElement, oldElement);
}
}
return document;
}
@Override
public String getName() {
return "Field Mapping: " + fieldMappings.size() + " mappings";
}
@Override
public int getPriority() {
return 50;
}
}
}
Data Enrichment and Augmentation
External Data Integration
public class DataEnrichmentTransformer {
private final Map<String, DataSource> dataSources;
private final Cache<String, Object> cache;
public DataEnrichmentTransformer() {
this.dataSources = new HashMap<>();
this.cache = CacheBuilder.newBuilder()
.maximumSize(1000)
.expireAfterWrite(1, TimeUnit.HOURS)
.build();
}
public void addDataSource(String name, DataSource dataSource) {
dataSources.put(name, dataSource);
}
public Document enrichDocument(Document document) throws Exception {
NodeList products = document.getElementsByTagName("product");
for (int i = 0; i < products.getLength(); i++) {
Element product = (Element) products.item(i);
enrichProduct(product);
}
return document;
}
private void enrichProduct(Element product) throws Exception {
String productId = product.getAttribute("id");
// Enrich with inventory data
InventoryInfo inventory = getInventoryInfo(productId);
if (inventory != null) {
Element inventoryElement = product.getOwnerDocument().createElement("inventory");
inventoryElement.setAttribute("quantity", String.valueOf(inventory.getQuantity()));
inventoryElement.setAttribute("warehouse", inventory.getWarehouse());
inventoryElement.setAttribute("last-updated", inventory.getLastUpdated().toString());
product.appendChild(inventoryElement);
}
// Enrich with pricing data
PricingInfo pricing = getPricingInfo(productId);
if (pricing != null) {
Element pricingElement = product.getOwnerDocument().createElement("pricing");
pricingElement.setAttribute("list-price", String.valueOf(pricing.getListPrice()));
pricingElement.setAttribute("discount", String.valueOf(pricing.getDiscount()));
pricingElement.setAttribute("effective-price", String.valueOf(pricing.getEffectivePrice()));
product.appendChild(pricingElement);
}
// Enrich with reviews
ReviewSummary reviews = getReviewSummary(productId);
if (reviews != null) {
Element reviewsElement = product.getOwnerDocument().createElement("reviews");
reviewsElement.setAttribute("count", String.valueOf(reviews.getCount()));
reviewsElement.setAttribute("average-rating", String.valueOf(reviews.getAverageRating()));
reviewsElement.setAttribute("recommendation-percentage",
String.valueOf(reviews.getRecommendationPercentage()));
product.appendChild(reviewsElement);
}
}
private InventoryInfo getInventoryInfo(String productId) throws Exception {
String cacheKey = "inventory:" + productId;
return (InventoryInfo) cache.get(cacheKey, () -> {
DataSource inventoryDS = dataSources.get("inventory");
if (inventoryDS != null) {
return inventoryDS.lookup(productId);
}
return null;
});
}
public interface DataSource {
Object lookup(String key) throws Exception;
String getName();
}
public static class DatabaseDataSource implements DataSource {
private final DataSource dataSource;
private final String query;
public DatabaseDataSource(DataSource dataSource, String query) {
this.dataSource = dataSource;
this.query = query;
}
@Override
public Object lookup(String key) throws Exception {
try (Connection conn = dataSource.getConnection();
PreparedStatement stmt = conn.prepareStatement(query)) {
stmt.setString(1, key);
ResultSet rs = stmt.executeQuery();
if (rs.next()) {
return mapResultSet(rs);
}
return null;
}
}
private Object mapResultSet(ResultSet rs) throws SQLException {
// Map result set to appropriate object
// Implementation depends on specific data structure
return new InventoryInfo(
rs.getInt("quantity"),
rs.getString("warehouse"),
rs.getTimestamp("last_updated").toLocalDateTime()
);
}
@Override
public String getName() {
return "Database";
}
}
public static class RestApiDataSource implements DataSource {
private final String baseUrl;
private final HttpClient httpClient;
public RestApiDataSource(String baseUrl) {
this.baseUrl = baseUrl;
this.httpClient = HttpClient.newHttpClient();
}
@Override
public Object lookup(String key) throws Exception {
HttpRequest request = HttpRequest.newBuilder()
.uri(URI.create(baseUrl + "/" + key))
.header("Accept", "application/json")
.build();
HttpResponse<String> response = httpClient.send(request,
HttpResponse.BodyHandlers.ofString());
if (response.statusCode() == 200) {
return parseJsonResponse(response.body());
}
return null;
}
private Object parseJsonResponse(String json) {
// Parse JSON and return appropriate object
// Implementation depends on JSON structure and target object
return null; // Placeholder
}
@Override
public String getName() {
return "REST API";
}
}
}
Performance Optimization Techniques
Memory-Efficient Large Document Processing
public class MemoryOptimizedTransformer {
public void transformLargeDocument(File inputFile, File outputFile,
TransformationConfig config) throws Exception {
try (FileInputStream fis = new FileInputStream(inputFile);
FileOutputStream fos = new FileOutputStream(outputFile);
BufferedInputStream bis = new BufferedInputStream(fis, 64 * 1024);
BufferedOutputStream bos = new BufferedOutputStream(fos, 64 * 1024)) {
XMLInputFactory inputFactory = XMLInputFactory.newInstance();
XMLOutputFactory outputFactory = XMLOutputFactory.newInstance();
// Configure for memory efficiency
inputFactory.setProperty(XMLInputFactory.IS_COALESCING, false);
inputFactory.setProperty(XMLInputFactory.IS_NAMESPACE_AWARE, false);
inputFactory.setProperty(XMLInputFactory.SUPPORT_DTD, false);
XMLStreamReader reader = inputFactory.createXMLStreamReader(bis);
XMLStreamWriter writer = outputFactory.createXMLStreamWriter(bos);
ProcessingContext context = new ProcessingContext(config);
while (reader.hasNext()) {
processEvent(reader, writer, context);
reader.next();
}
reader.close();
writer.close();
}
}
private void processEvent(XMLStreamReader reader, XMLStreamWriter writer,
ProcessingContext context) throws XMLStreamException {
int eventType = reader.getEventType();
switch (eventType) {
case XMLStreamConstants.START_ELEMENT:
String elementName = reader.getLocalName();
if (context.shouldTransform(elementName)) {
transformElement(reader, writer, context);
} else {
copyElement(reader, writer, context);
}
break;
case XMLStreamConstants.CHARACTERS:
if (context.isInTransformableElement()) {
String text = reader.getText();
String transformedText = context.transformText(text);
writer.writeCharacters(transformedText);
} else {
writer.writeCharacters(reader.getText());
}
break;
case XMLStreamConstants.END_ELEMENT:
writer.writeEndElement();
context.popElement();
break;
default:
// Handle other event types as needed
break;
}
}
private void transformElement(XMLStreamReader reader, XMLStreamWriter writer,
ProcessingContext context) throws XMLStreamException {
String elementName = reader.getLocalName();
String transformedName = context.getTransformedElementName(elementName);
writer.writeStartElement(transformedName);
context.pushElement(transformedName);
// Copy and transform attributes
for (int i = 0; i < reader.getAttributeCount(); i++) {
String attrName = reader.getAttributeLocalName(i);
String attrValue = reader.getAttributeValue(i);
String transformedAttrName = context.getTransformedAttributeName(attrName);
String transformedAttrValue = context.transformAttributeValue(attrName, attrValue);
writer.writeAttribute(transformedAttrName, transformedAttrValue);
}
// Add any additional attributes
Map<String, String> additionalAttrs = context.getAdditionalAttributes(elementName);
for (Map.Entry<String, String> entry : additionalAttrs.entrySet()) {
writer.writeAttribute(entry.getKey(), entry.getValue());
}
}
}
Error Handling and Recovery
Robust Transformation Pipeline
public class RobustTransformationPipeline {
private final List<TransformationStage> stages;
private final ErrorHandler errorHandler;
private final TransformationMonitor monitor;
public RobustTransformationPipeline(ErrorHandler errorHandler,
TransformationMonitor monitor) {
this.stages = new ArrayList<>();
this.errorHandler = errorHandler;
this.monitor = monitor;
}
public TransformationResult execute(InputStream input) {
TransformationResult result = new TransformationResult();
ByteArrayInputStream currentInput = null;
try {
currentInput = new ByteArrayInputStream(input.readAllBytes());
for (int i = 0; i < stages.size(); i++) {
TransformationStage stage = stages.get(i);
try {
monitor.stageStarted(stage.getName(), i);
ByteArrayOutputStream stageOutput = new ByteArrayOutputStream();
stage.transform(currentInput, stageOutput);
currentInput = new ByteArrayInputStream(stageOutput.toByteArray());
result.addStageResult(stage.getName(), true, null);
monitor.stageCompleted(stage.getName(), i);
} catch (Exception e) {
monitor.stageError(stage.getName(), i, e);
ErrorHandlingDecision decision = errorHandler.handleError(stage, e, result);
switch (decision.getAction()) {
case RETRY:
i--; // Retry current stage
break;
case SKIP:
result.addStageResult(stage.getName(), false, e.getMessage());
break;
case ABORT:
result.setFailed(true);
result.setFailureReason("Pipeline aborted at stage: " + stage.getName());
return result;
case CONTINUE_WITH_FALLBACK:
ByteArrayOutputStream fallbackOutput = new ByteArrayOutputStream();
decision.getFallbackTransformation().transform(currentInput, fallbackOutput);
currentInput = new ByteArrayInputStream(fallbackOutput.toByteArray());
result.addStageResult(stage.getName() + " (fallback)", true, "Used fallback");
break;
}
}
}
result.setOutput(currentInput.readAllBytes());
result.setSuccessful(true);
} catch (Exception e) {
result.setFailed(true);
result.setFailureReason("Pipeline failed: " + e.getMessage());
}
return result;
}
public interface ErrorHandler {
ErrorHandlingDecision handleError(TransformationStage stage, Exception error,
TransformationResult partialResult);
}
public static class ErrorHandlingDecision {
public enum Action { RETRY, SKIP, ABORT, CONTINUE_WITH_FALLBACK }
private final Action action;
private final TransformationStage fallbackTransformation;
public ErrorHandlingDecision(Action action) {
this(action, null);
}
public ErrorHandlingDecision(Action action, TransformationStage fallbackTransformation) {
this.action = action;
this.fallbackTransformation = fallbackTransformation;
}
// Getters...
}
}
Conclusion
Advanced XML transformation techniques enable sophisticated processing workflows that can handle complex business requirements, large datasets, and integration scenarios. The key is choosing the appropriate technique based on your specific needs: streaming for large documents, pipelines for complex multi-stage processing, parallel processing for performance, and rule-based systems for dynamic transformations.
Next Steps
- Explore Tools and Processors for implementation options
- Study Performance Optimization for tuning techniques
- Learn Best Practices for production systems