how to convert xml to avro without ignoring !CDATA content? - java
I have the following source XML file named customers.xml:
<?xml version="1.0" encoding="utf-8"?>
<p:CustomerElement xmlns:p="http://www.dog.com/customer" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:schemaLocation="http://www.dog.com/customer Customer.xsd">
<Customer>
<Sender>
<transmitDate>2016-02-21T00:00:00</transmitDate>
<transmitter>Dog ETL v2.0</transmitter>
<dealerCode><![CDATA[P020]]></dealerCode>
<DMSSystem><![CDATA[DBS]]></DMSSystem>
<DMSReleaseNumber><![CDATA[5.0]]></DMSReleaseNumber>
</Sender>
<Identifier>
<updateInd><![CDATA[A]]></updateInd>
<dealerCustNumber><![CDATA[AMADOR]]></dealerCustNumber>
<dealerCustName><![CDATA[AMADOR COMPUTERS]]></dealerCustName>
<phoneNumber><![CDATA[800 111 4444]]></phoneNumber>
<faxNumber><![CDATA[780 111 4444]]></faxNumber>
<email xsi:nil="true" />
<customerType><![CDATA[R]]></customerType>
<activeCustomerInd>false</activeCustomerInd>
<parentCustomerNumber xsi:nil="true" />
<primaryStoreNumber><![CDATA[00]]></primaryStoreNumber>
<preferredLanguage><![CDATA[ENG]]></preferredLanguage>
<dealerDateInSystem>2000-01-11T00:00:00</dealerDateInSystem>
<dealerLastUpdatedDate>2015-02-05T00:00:00</dealerLastUpdatedDate>
</Identifier>
<Location>
<address2><![CDATA[ACCOUNT FLAGGED FOR DELETION]]></address2>
<address3><![CDATA[AS PER BILL FEB AA/15]]></address3>
<city><![CDATA[CHICAGO]]></city>
<postalCode><![CDATA[Q5S 1E5]]></postalCode>
<state><![CDATA[AB]]></state>
<country><![CDATA[CA]]></country>
<location><![CDATA[FLAGGED FOR DELETION]]></location>
<addressType><![CDATA[M]]></addressType>
</Location>
<Division>
<divisionCode><![CDATA[G]]></divisionCode>
<divisionName><![CDATA[CAR]]></divisionName>
<IndustryCode>
<industryCode><![CDATA[AQ99]]></industryCode>
<primaryIndustryCodeInd>true</primaryIndustryCodeInd>
</IndustryCode>
<SalesRep>
<number><![CDATA[XXX]]></number>
<name><![CDATA[KILL ACCOUNT IN PROCESS]]></name>
<type><![CDATA[M]]></type>
<par>0</par>
<email xsi:nil="true" />
<phoneNumber><![CDATA[000 000 0000]]></phoneNumber>
</SalesRep>
</Division>
</Customer>
<Customer>
<Sender>
<transmitDate>2016-02-21T00:00:00</transmitDate>
<transmitter>Dog ETL v2.0</transmitter>
<dealerCode><![CDATA[P000]]></dealerCode>
<DMSSystem><![CDATA[DBS]]></DMSSystem>
<DMSReleaseNumber><![CDATA[5.0]]></DMSReleaseNumber>
</Sender>
<Identifier>
<updateInd><![CDATA[A]]></updateInd>
<dealerCustNumber><![CDATA[UU20888]]></dealerCustNumber>
<dealerCustName><![CDATA[ ADVERTISING AND PR]]></dealerCustName>
<phoneNumber xsi:nil="true" />
<faxNumber xsi:nil="true" />
<email xsi:nil="true" />
<customerType><![CDATA[I]]></customerType>
<activeCustomerInd>true</activeCustomerInd>
<parentCustomerNumber xsi:nil="true" />
<primaryStoreNumber><![CDATA[M2]]></primaryStoreNumber>
<preferredLanguage><![CDATA[ENG]]></preferredLanguage>
<dealerDateInSystem>2015-11-18T00:00:00</dealerDateInSystem>
<dealerLastUpdatedDate>2015-11-19T00:00:00</dealerLastUpdatedDate>
</Identifier>
<Location>
<address2><![CDATA[EQUIP]]></address2>
<city><![CDATA[ADER]]></city>
<country><![CDATA[CA]]></country>
<addressType><![CDATA[M]]></addressType>
</Location>
<Division>
<divisionCode><![CDATA[A]]></divisionCode>
<divisionName><![CDATA[AGRO]]></divisionName>
<IndustryCode>
<industryCode><![CDATA[EQ00]]></industryCode>
<primaryIndustryCodeInd>true</primaryIndustryCodeInd>
</IndustryCode>
</Division>
</Customer>
</p:CustomerElement>
I have the following java code, which parses customers.xml into individual "Customer" entities, and then attempts to convert each of them into an AVRO format:
package com.dogsoft.data.xmltoavro;
import java.io.*;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.FactoryConfigurationError;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import jdk.nashorn.internal.runtime.regexp.joni.constants.NodeType;
import org.w3c.dom.Attr;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import org.apache.avro.Protocol;
import org.apache.avro.Schema;
import org.apache.avro.file.DataFileReader;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.generic.GenericArray;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.io.DatumReader;
import org.apache.avro.io.DatumWriter;
import org.apache.avro.specific.SpecificDatumWriter;
import org.apache.avro.util.Utf8;
import org.w3c.dom.*;
import org.xml.sax.SAXException;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Pattern;
public class ParseXmlFile {
private static Protocol protocol;
public static void xmlToAvro(File xmlFile, File avroFile) throws IOException, SAXException {
try {
InputStream stream = new FileInputStream("/tmp/xml.avsc");
if (stream == null) throw new IllegalStateException("Classpath should include xml.avsc");
protocol = Protocol.parse(stream);
} catch (IOException e) {
throw new RuntimeException(e);
}
Schema schema = protocol.getType("Element");
Document doc = parse(xmlFile);
DatumWriter<GenericRecord> datumWriter = new SpecificDatumWriter<>(schema);
try (DataFileWriter<GenericRecord> fileWriter = new DataFileWriter<>(datumWriter)) {
fileWriter.create(schema, avroFile);
Object docElement = doc.getDocumentElement();
fileWriter.append(wrapElement(doc.getDocumentElement()));
}
}
private static GenericData.Record wrapElement(Element el) {
GenericData.Record record = new GenericData.Record(protocol.getType("Element"));
record.put("name", el.getNodeName());
NamedNodeMap attributeNodes = el.getAttributes();
List<GenericData.Record> attrRecords = new ArrayList<>();
for (int i = 0; i < attributeNodes.getLength(); i++) {
Attr attr = (Attr) attributeNodes.item(i);
attrRecords.add(wrapAttr(attr));
}
record.put("attributes", attrRecords);
List<Object> childArray = new ArrayList<>();
NodeList childNodes = el.getChildNodes();
for (int i = 0; i < childNodes.getLength(); i++) {
Node node = childNodes.item(i);
Object nt = node.getNodeType();
if (node.getNodeType() == Node.ELEMENT_NODE)
childArray.add(wrapElement((Element) node));
if (node.getNodeType() == Node.TEXT_NODE)
childArray.add(node.getTextContent());
}
record.put("children", childArray);
return record;
}
private static GenericData.Record wrapAttr(Attr attr) {
GenericData.Record record = new GenericData.Record(protocol.getType("Attribute"));
record.put("name", attr.getName());
record.put("value", attr.getValue());
return record;
}
private static Document parse(File file) throws IOException, SAXException {
try {
DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
return builder.parse(file);
} catch (ParserConfigurationException e) {
throw new RuntimeException(e);
}
}
public static void avroToXml(File avroFile, File xmlFile) throws IOException {
try {
InputStream stream = new FileInputStream("/tmp/xml.avsc");
if (stream == null) throw new IllegalStateException("Classpath should include xml.avsc");
protocol = Protocol.parse(stream);
} catch (IOException e) {
throw new RuntimeException(e);
}
DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(protocol.getType("Element"));
DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(avroFile, datumReader);
GenericRecord record = dataFileReader.next();
Document doc;
try {
doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
} catch (ParserConfigurationException e) {
throw new RuntimeException(e);
}
Element el = unwrapElement(record, doc);
doc.appendChild(el);
saveDocument(doc, xmlFile);
}
private static Element unwrapElement(GenericRecord record, Document doc) {
String name = "" + record.get("name");
Element el = doc.createElement(name);
#SuppressWarnings("unchecked")
GenericArray<GenericRecord> attrArray = (GenericArray<GenericRecord>) record.get("attributes");
for (GenericRecord attrRecord : attrArray)
el.setAttributeNode(unwrapAttr(attrRecord, doc));
#SuppressWarnings("unchecked")
GenericArray<Object> childArray = (GenericArray<Object>) record.get("children");
for (Object childObj : childArray) {
if (childObj instanceof GenericRecord)
el.appendChild(unwrapElement((GenericRecord) childObj, doc));
if (childObj instanceof Utf8)
el.appendChild(doc.createTextNode("" + childObj));
}
return el;
}
private static Attr unwrapAttr(GenericRecord record, Document doc) {
Attr attr = doc.createAttribute("" + record.get("name"));
attr.setValue("" + record.get("value"));
return attr;
}
private static void saveDocument(Document doc, File file) {
try {
Transformer transformer = TransformerFactory.newInstance().newTransformer();
transformer.transform(new DOMSource(doc), new StreamResult(file));
} catch (TransformerException e) {
throw new RuntimeException(e);
}
}
public static void main(String[] args)
{
Object nodeObject = null;
Node myNode = null;
Transformer transformer = null;
try
{
try {
transformer =
TransformerFactory.newInstance().newTransformer();
} catch (TransformerConfigurationException e) {
e.printStackTrace();
}
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
dbf.setNamespaceAware(true);
DocumentBuilder db = dbf.newDocumentBuilder();
Document doc = db.parse("/tmp/customers.xml");
System.out.printf("Version = %s%n", doc.getXmlVersion());
System.out.printf("Encoding = %s%n", doc.getXmlEncoding());
System.out.printf("Standalone = %b%n%n", doc.getXmlStandalone());
if (doc.hasChildNodes())
{
int customerNumber = 0;
NodeList nl = doc.getDocumentElement().getChildNodes();
for (int i = 0; i < nl.getLength(); i++) {
Node node = nl.item(i);
if (node.getNodeType() == Node.ELEMENT_NODE) {
System.out.println(node.toString());
customerNumber++;
File avroFile = new File("/tmp/customer" + customerNumber + ".avro");
File xmlFile = new File("/tmp/customer" + customerNumber + ".xml");
File xmlFile1 = new File("/tmp/customer" + customerNumber + "-foo.xml");
try {
transformer.transform(
new DOMSource(node), new StreamResult(xmlFile));
File outputFile = new File("/tmp/customer" + customerNumber + ".avro");
xmlToAvro(xmlFile, outputFile);
} catch (TransformerException e) {
e.printStackTrace();
}
}
}
}
}
catch (IOException ioe)
{
System.err.println("IOE: " + ioe);
}
catch (SAXException saxe)
{
System.err.println("SAXE: " + saxe);
}
catch (FactoryConfigurationError fce)
{
System.err.println("FCE: " + fce);
}
catch (ParserConfigurationException pce)
{
System.err.println("PCE: " + pce);
}
}
}
This code works overall, but it ignores any content, which is enclosed into
![CDATA[
tag. As it happens, most of the actual useful data in the customers.xml files is enclosed into these tags.
Is there a way to modify this code, to make it not ignore the CDATA contents?
Instead of hand-writing parser code, you might want to split the problem in two parts: first, bind XML into POJO (using JAXB or Jackson XML module); and then write POJO as Avro (using Apache Avro lib, or Jackson Avro module). All you need for that would be POJO definition that matches expected structure for data as XML and Avro. Result should be less code, and basically specifying what needs to happen and now how to do it.
Related
No getting desired XML output in Java
I am converting CSV file to XML , it is converting but not getting desired structured output . My java Code :- public static void main(String[] args){ List<String> headers=new ArrayList<String>(5); File file=new File("C:/Users/Admin/Desktop/data.csv"); BufferedReader reader=null; try { DocumentBuilderFactory domFactory =DocumentBuilderFactory.newInstance(); DocumentBuilder domBuilder=domFactory.newDocumentBuilder(); Document newDoc=domBuilder.newDocument(); // Root element Element rootElement=newDoc.createElement("root"); newDoc.appendChild(rootElement); reader = new BufferedReader(new FileReader(file)); int line=0; String text=null; while ((text=reader.readLine())!=null) { StringTokenizer st=new StringTokenizer(text, "?", false); String[] rowValues=new String[st.countTokens()]; int index=0; while (st.hasMoreTokens()) { String next=st.nextToken(); rowValues[index++]=next; } //String[] rowValues = text.split(","); if (line == 0) { // Header row for (String col:rowValues) { headers.add(col); Element rowElement=newDoc.createElement("header"); rootElement.appendChild(rowElement); for (int col1=0;col1<headers.size();col1++) { String header = headers.get(col1); String value = null; if (col1<rowValues.length) { value=rowValues[col1]; } else { // ?? Default value value=" "; } rowElement.setTextContent(value); System.out.println(headers+" "+value); } }} else { // Data row Element rowElement=newDoc.createElement("row"); rootElement.appendChild(rowElement); for (int col=0;col<headers.size();col++) { String header = headers.get(col); String value = null; if (col<rowValues.length) { value=rowValues[col]; } else { // ?? Default value value=" "; } rowElement.setTextContent(value); System.out.println(header+" "+value); } } line++; } try { TransformerFactory tranFactory = TransformerFactory.newInstance(); Transformer aTransformer = tranFactory.newTransformer(); aTransformer.setOutputProperty(OutputKeys.INDENT, "yes"); aTransformer.setOutputProperty(OutputKeys.METHOD, "xml"); aTransformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "4"); Source src = new DOMSource(newDoc); Result result = new StreamResult(new File("C:/Users/Admin/Desktop/data.xml")); aTransformer.transform(src, result); System.out.println("File creation successfully!"); } catch (Exception exp) { exp.printStackTrace(); } finally { try { } catch (Exception e1) { } try { } catch (Exception e1) { } } } catch (Exception e1) { e1.printStackTrace(); } } This is my CSV file:- Symbol,Open,High,Low,Last Traded Price,Change "NIFTY 50","9,645.90","9,650.65","9,600.95","9,609.30","-5.70" "RELIANCE","1,390.00","1,414.20","1,389.00","1,407.55","26.50" "BPCL","647.70","665.00","645.95","660.10","10.75" "ADANIPORTS","368.50","373.80","368.00","372.25","4.25" "ONGC","159.50","161.75","159.35","160.80","1.70" And this is the output I am getting:- <?xml version="1.0" encoding="UTF-8" standalone="no"?> <root> <header>Symbol,Open,High,Low,Last Traded Price,Change</header> <row>"NIFTY 50","9,645.90","9,650.65","9,600.95","9,609.30","-5.70"</row> <row>"RELIANCE","1,390.00","1,414.20","1,389.00","1,407.55","26.50"</row> <row>"BPCL","647.70","665.00","645.95","660.10","10.75"</row> <row>"ADANIPORTS","368.50","373.80","368.00","372.25","4.25"</row> <row>"ONGC","159.50","161.75","159.35","160.80","1.70"</row> </root> Suggest me where am I going wrong ? I tried according to me , but getting confuse where in header and row section should I make changes. ADDED :- Expected output <root> <header>symbol</header> <row>NIFTY 50</row> <row>RELIANCE</row> <row>BPCL></row> . . <header>Open</header> <row>9,645.90</row> <row>1,390.00</row> . . </root>
For your reference: import java.io.File; import java.io.IOException; import java.nio.charset.StandardCharsets; import java.util.List; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.transform.OutputKeys; import javax.xml.transform.Result; import javax.xml.transform.Source; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; import org.apache.commons.csv.CSVFormat; import org.apache.commons.csv.CSVParser; import org.apache.commons.csv.CSVRecord; import org.apache.commons.csv.QuoteMode; import org.w3c.dom.Document; import org.w3c.dom.Element; public class CsvToXml { public static void main(String[] args) { File inputFile = new File("C:/Users/Admin/Desktop/data.csv"); CSVParser inParser = null; Document newDoc = null; try { inParser = CSVParser.parse(inputFile, StandardCharsets.UTF_8, CSVFormat.EXCEL.withHeader().withQuoteMode(QuoteMode.NON_NUMERIC)); DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance(); DocumentBuilder domBuilder = domFactory.newDocumentBuilder(); newDoc = domBuilder.newDocument(); // Root element Element rootElement = newDoc.createElement("root"); newDoc.appendChild(rootElement); List<CSVRecord> records = inParser.getRecords(); for (String key : inParser.getHeaderMap().keySet()) { Element rowElement = newDoc.createElement("header"); rootElement.appendChild(rowElement); rowElement.setTextContent(key); for (CSVRecord record : records) { rowElement = newDoc.createElement("row"); rootElement.appendChild(rowElement); rowElement.setTextContent(record.get(key)); } } TransformerFactory tranFactory = TransformerFactory.newInstance(); Transformer aTransformer = tranFactory.newTransformer(); aTransformer.setOutputProperty(OutputKeys.INDENT, "yes"); aTransformer.setOutputProperty(OutputKeys.METHOD, "xml"); aTransformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "4"); Source src = new DOMSource(newDoc); Result result = new StreamResult(new File("C:/Users/Admin/Desktop/data.xml")); aTransformer.transform(src, result); System.out.println("File creation successfully!"); } catch (Exception e) { e.printStackTrace(); } finally { if (inParser != null) { try { inParser.close(); } catch (IOException e) { e.printStackTrace(); } } } } } This is using Apache Commons CSV.
java write xml attribute with value
This is my first post on Stack, so if there is something wrong, be patient ... Ok, my question is how can I write an XML attribute with value. The result would be something like this: <GroupAttribute> <Attribute name = "Color"> Pink </Attribute> ..... </GroupAttribute> I tried this: Element attribute = doc.createElement ("attribute"); groupAttribute.appendChild (attribute); attribute.setAttribute ("attributeType" p.attributeColor); groupAttribute.appendChild (getCompanyElements (doc, attribute, "attribute", p.attributeColor)); But it does not work.. the result is: <GroupAttribute> <Attribute> Pink </Attribute> ..... </GroupAttribute> the setAttribute is missing ... What am I doing wrong? Here the code: import com.opencsv.*; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import java.util.logging.Level; import java.util.logging.Logger; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.transform.OutputKeys; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.Node; /** * * #author Mike */ public class prueba { /** * #param args the command line arguments */ public static void main(String[] args) { List<Producto> prods = new ArrayList<Producto>(); try { CSVReader reader; reader = new CSVReader(new FileReader("C:\\Temp\\feeds\\Product_Prueba.csv"), ';'); String[] nextLine; try { while ((nextLine = reader.readNext()) != null) { // nextLine[] is an array of values from the line //System.out.println(Arrays.toString(nextLine)); //Lee Producto p; p = new Producto(); p.attributeColor = "Pink"; prods.add(p); } } catch (IOException ex) { Logger.getLogger(prueba.class.getName()).log(Level.SEVERE, null, ex); } } catch (FileNotFoundException ex) { Logger.getLogger(prueba.class.getName()).log(Level.SEVERE, null, ex); } xeraXML(prods); } static void xeraXML(List<Producto> ps) { DocumentBuilderFactory icFactory = DocumentBuilderFactory.newInstance(); DocumentBuilder icBuilder; try { icBuilder = icFactory.newDocumentBuilder(); Document doc = icBuilder.newDocument(); Element mainRootElement = doc.createElement("productRequest"); doc.appendChild(mainRootElement); for (Iterator<Producto> i = ps.iterator(); i.hasNext();) { Producto p; p = i.next(); mainRootElement.appendChild(getProductElement(doc, p)); } // output DOM XML to console Transformer transformer = TransformerFactory.newInstance().newTransformer(); transformer.setOutputProperty(OutputKeys.INDENT, "yes"); DOMSource source = new DOMSource(doc); StreamResult console = new StreamResult(System.out); //StreamResult out = new StreamResult("C:\\Temp\\results\\resultado.xml"); transformer.transform(source, console); //transformer.transform(source, out); } catch (Exception e) { e.printStackTrace(); } } private static Element getProductElement(Document doc /*String localizedFor,*/, Producto p) { Element groupAttribute = doc.createElement("groupAttribute"); Element attribute = doc.createElement("attribute"); groupAttribute.appendChild(attribute); attribute.setAttribute("attributeType", p.attributeColor); groupAttribute.appendChild(getElements(doc, attribute, "attribute", p.attributeColor)); return groupAttribute; } private static Node getElements(Document doc, Element element, String name, String value) { Element node = doc.createElement(name); node.appendChild(doc.createTextNode(value)); return node; } } And here the Producto class: public class Producto { public String attributeColor; }
I just wanted to add the comment but am writing it as an answer since I don't have that privilege yet. I was looking to add the attribute to the xml node and I came across this post. dependency = dom.createElement("dependency"); dependency.setAttribute("type", "value"); dependencies.appendChild(dependency); I added the child after setting the attribute.
Writing (overriding) same XML file that I read
I made a method to read XML files, but now I need to write or override that same XML file with the same tags, nodes and objects, but with different data inside child nodes. First I want to make read and write working and then I have some ideas to maybe put the whole XML file into a buffer. Then I could put the whole XML under one class and just write that class again into the same XML this is just an idea, any suggestion or idea is welcome. My XML file looks like this: <?xml version="1.0" encoding="UTF-8" standalone="no"?> <!-- Document Root --> <DATA> <Settings USERNAME="test" PASSWORD="test" STATUS="active" / > </DATA> This is my code for reading: import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.NodeList; public class Read { private final static String SETTINGS_LINE = Settings; public void readXML() { try { File xmlFile = new File("Test.xml"); DocumentBuilderFactory documentFactory = DocumentBuilderFactory.newInstance(); DocumentBuilder documentBuilder = documentFactory.newDocumentBuilder(); Document doc = documentBuilder.parse(xmlFile); // Normalize the XML file doc.getDocumentElement().normalize(); NodeList nodeList = doc.getDocumentElement().getChildNodes(); for(int temp = 0; temp < nodeList.getLength(); temp++) { Node node = nodeList.item(temp); if(node instanceof Element && node.getNodeName() == SETTINGS_LINE) { Element settings = (Element) node; System.out.println("User" +settings.getAttribute("USERNAME")); System.out.println("Password" +settings.getAttribute("PASSWORD")); System.out.println("Status" +settings.getAttribute("STATUS")); } } } catch (Exception e) { e.printStackTrace(); } } } And this is the code for writing that is not working: import java.io.File; import java.io.IOException; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerException; import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.xml.sax.SAXException; public class Modify { private final static String SETTINGS_LINE = "Settings"; public static void main(String argv[]) { try { String filepath = "test.xml"; DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance(); DocumentBuilder docBuilder = docFactory.newDocumentBuilder(); // Normalize the XML File doc.getDocumentElement().normalize(); NodeList nodeList = doc.getDocumentElement().getChildNodes(); for (int i = 0; i < nodeList.getLength(); i++) { Node node = nodeList.item(i); if (node instanceof Element && node.getNodeName() == SETTINGS_LINE) { Element settings = (Element) node; if("USERNAME".equals(node.getChildNodes())){ node.setTextContent("mivnadic"); } } // Write the content into xml file TransformerFactory transformerFactory = TransformerFactory.newInstance(); Transformer transformer = transformerFactory.newTransformer(); DOMSource source = new DOMSource(doc); StreamResult result = new StreamResult(new File(filepath)); transformer.transform(source, result); } System.out.println("File saved"); } catch (ParserConfigurationException pce) { pce.printStackTrace(); } catch (TransformerException tfe) { tfe.printStackTrace(); } catch (IOException ioe) { ioe.printStackTrace(); } catch (SAXException sae) { sae.printStackTrace(); } } }
Java Xml Parsing :Distinguish two Xml Elements by missing tag
From a Webserver request i get an XML Response,which contains my needed data. it looks like (excerpt): <ctc:BasePrice> <cgc:PriceAmount amountCurrencyID="EUR">18.75</cbc:PriceAmount> <cgc:BaseQuantity quantityUnitCode="EA">1</cbc:BaseQuantity> </ctc:BasePrice> <ctc:BasePrice> <cgc:PriceAmount amountCurrencyID="EUR">18.25</cbc:PriceAmount> <cgc:BaseQuantity quantityUnitCode="EA">1</cbc:BaseQuantity> <cgc:MinimumQuantity quantityUnitCode="EA">3</cbc:MinimumQuantity> <ctc:BasePrice> What i need is the first "PriceAmount" value,which could be a different price then the second. But how can i make sure to retrieve the correct one,by "telling" the parser he should take the element which does not contain the "MinimumQuantity" Field and distinguish them ? I read a lot in Sax etc but could find an idea how to implement a "logic" for that. Maybe someone ran into similar problem.Thanks in advance for every hint.
You could use xpath for this. The expression "//*[local-name()='MinimumQuantity']/../*[local-name()='PriceAmount']" should return what you want. eg import java.io.ByteArrayInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.xpath.XPath; import javax.xml.xpath.XPathConstants; import javax.xml.xpath.XPathExpressionException; import javax.xml.xpath.XPathFactory; import org.w3c.dom.Document; import org.w3c.dom.NodeList; import org.xml.sax.SAXException; public class XpathParser { public static void main(String[] args) { try { String xml = "<root>" + "<ctc:BasePrice>" + "<cgc:PriceAmount amountCurrencyID=\"EUR\">18.75</cgc:PriceAmount>" + "<cgc:BaseQuantity quantityUnitCode=\"EA\">1</cgc:BaseQuantity>" + "</ctc:BasePrice>" + "<ctc:BasePrice>" + "<cgc:PriceAmount amountCurrencyID=\"EUR\">18.25</cgc:PriceAmount>" + "<cgc:BaseQuantity quantityUnitCode=\"EA\">1</cgc:BaseQuantity>" + "<cgc:MinimumQuantity quantityUnitCode=\"EA\">3</cgc:MinimumQuantity>" + "</ctc:BasePrice>" + "</root>"; InputStream xmlStream = new ByteArrayInputStream(xml.getBytes()); DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance(); DocumentBuilder builder = builderFactory.newDocumentBuilder(); Document xmlDocument = builder.parse(xmlStream); XPath xPath = XPathFactory.newInstance().newXPath(); String expression = "//*[local-name() = 'BasePrice' and not(descendant::*[local-name() = 'MinimumQuantity'])]/*[local-name()='PriceAmount']"; NodeList nodeList = (NodeList) xPath.compile(expression).evaluate(xmlDocument, XPathConstants.NODESET); for (int i = 0; i < nodeList.getLength(); i++) { System.out.println(nodeList.item(i).getFirstChild().getNodeValue()); } } catch (FileNotFoundException e) { e.printStackTrace(); } catch (SAXException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } catch (ParserConfigurationException e) { e.printStackTrace(); } catch (XPathExpressionException e) { e.printStackTrace(); } } } Dom Parser way import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.xml.sax.SAXException; public class DomParser{ public static void main(String[] args) { try { String xml = "<root>" + "<ctc:BasePrice>" + "<cgc:PriceAmount amountCurrencyID=\"EUR\">18.75</cgc:PriceAmount>" + "<cgc:BaseQuantity quantityUnitCode=\"EA\">1</cgc:BaseQuantity>" + "</ctc:BasePrice>" + "<ctc:BasePrice>" + "<cgc:PriceAmount amountCurrencyID=\"EUR\">18.25</cgc:PriceAmount>" + "<cgc:BaseQuantity quantityUnitCode=\"EA\">1</cgc:BaseQuantity>" + "<cgc:MinimumQuantity quantityUnitCode=\"EA\">3</cgc:MinimumQuantity>" + "</ctc:BasePrice>" + "</root>"; InputStream xmlStream = new ByteArrayInputStream(xml.getBytes()); DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance(); DocumentBuilder builder = builderFactory.newDocumentBuilder(); Document xmlDocument = builder.parse(xmlStream); xmlDocument.getDocumentElement().normalize(); NodeList nList = xmlDocument.getElementsByTagName("ctc:BasePrice"); for (int temp = 0; temp < nList.getLength(); temp++) { Node nNode = nList.item(temp); if (nNode.getNodeType() == Node.ELEMENT_NODE) { Element eElement = (Element) nNode; if(eElement.getElementsByTagName("cgc:MinimumQuantity").getLength() == 0){ System.out.println(eElement.getElementsByTagName("cgc:PriceAmount").item(0).getTextContent()); } } } } catch (ParserConfigurationException e) { e.printStackTrace(); } catch (SAXException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } }
Java DOM: cannot write adapted XML to file
I have the following simplified XML: <?xml version="1.0" encoding="UTF-8"?> <ExportData> <Rows> <R> <companyCodestringtrue>101</companyCodestringtrue> <transactionQualifierstring>Sales</transactionQualifierstring> <menuItemNumberlong>4302150</menuItemNumberlong> <productQuantityinttrue>14</productQuantityinttrue> <productValueInclVATdecimaltrue>1.90</productValueInclVATdecimaltrue> <productValueExclVATdecimaltrue>1.775701</productValueExclVATdecimaltrue> </R> <R> <companyCodestringtrue>101</companyCodestringtrue> <transactionQualifierstring>Sales</transactionQualifierstring> <menuItemNumberlong>333555</menuItemNumberlong> <productQuantityinttrue>0</productQuantityinttrue> <productValueInclVATdecimaltrue>3.90</productValueInclVATdecimaltrue> <productValueExclVATdecimaltrue>3.775701</productValueExclVATdecimaltrue> </R> <R> <companyCodestringtrue>101</companyCodestringtrue> <transactionQualifierstring>Sales</transactionQualifierstring> <menuItemNumberlong>1235665</menuItemNumberlong> <productQuantityinttrue>5</productQuantityinttrue> <productValueInclVATdecimaltrue>4.90</productValueInclVATdecimaltrue> <productValueExclVATdecimaltrue>4.775701</productValueExclVATdecimaltrue> </R> </Rows> </ExportData> I need to delete each complete <R> element if the <productQuantityinttrue> element equals "0". I came up with the following Java code: package filterPositions; import java.io.File; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.transform.Result; import javax.xml.transform.Source; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerConfigurationException; import javax.xml.transform.TransformerException; import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.w3c.dom.NodeList; public class FilterPositions { public static String result = ""; public static void main(String[] args) throws Exception { try { DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance(); DocumentBuilder docBuilder = docFactory.newDocumentBuilder(); File filePath = new File("C:/LSA_SALES_EXPORT_1507_test_zero_qu.xml"); Document doc = docBuilder.parse(filePath); Node rootNode = doc.getDocumentElement(); final Element element = doc.getDocumentElement(); // output new XML Document DocumentBuilder parser = docFactory.newDocumentBuilder(); Document newdoc = parser.newDocument(); newdoc.adoptNode(traversingXML(element)); writeXmlFile(newdoc, "LSA_SALES_EXPORT_1507_test_zero_qu_OUT.xml"); System.out.println("Done..."); System.out.println("Exiting..."); } catch (Exception e) { e.printStackTrace(); } } public static Element traversingXML(Element element) { NodeList positionen = element.getElementsByTagName("R"); Element e = null; for (int i = 0; i < positionen.getLength(); i++) { e = (Element) positionen.item(i); for (Node child = e.getFirstChild(); child != null; child = child.getNextSibling()) { if (child instanceof Element && "productQuantityinttrue".equals(child.getNodeName())&& "0".equals(child.getTextContent())) { e.getParentNode().removeChild(e); } } } System.out.println(e); return e; } public static void writeXmlFile(Document doc, String filename) { try { // Prepare the DOM document for writing Source source = new DOMSource(); // Prepare the output file File file = new File(filename); Result result = new StreamResult(file); // Write the DOM document to the file Transformer xformer = TransformerFactory.newInstance() .newTransformer(); xformer.transform(source, result); } catch (TransformerConfigurationException e) { } catch (TransformerException e) { } } } I am not sure if my method "traversingXML" is working properly. My problem right now is that the adapted XML structure (one deleted) is not written to newdoc.
You don't copy the original document to newdoc; instead you create a new, empty XML document. Instead, try this code: ... final Element element = doc.getDocumentElement(); // original code up to here traversingXML(element); // delete the node writeXmlFile(doc, "LSA_SALES_EXPORT_1507_test_zero_qu_OUT.xml"); // save modified document