I have the following simplified XML:
<?xml version="1.0" encoding="UTF-8"?>
<ExportData>
<Rows>
<R>
<companyCodestringtrue>101</companyCodestringtrue>
<transactionQualifierstring>Sales</transactionQualifierstring>
<menuItemNumberlong>4302150</menuItemNumberlong>
<productQuantityinttrue>14</productQuantityinttrue>
<productValueInclVATdecimaltrue>1.90</productValueInclVATdecimaltrue>
<productValueExclVATdecimaltrue>1.775701</productValueExclVATdecimaltrue>
</R>
<R>
<companyCodestringtrue>101</companyCodestringtrue>
<transactionQualifierstring>Sales</transactionQualifierstring>
<menuItemNumberlong>333555</menuItemNumberlong>
<productQuantityinttrue>0</productQuantityinttrue>
<productValueInclVATdecimaltrue>3.90</productValueInclVATdecimaltrue>
<productValueExclVATdecimaltrue>3.775701</productValueExclVATdecimaltrue>
</R>
<R>
<companyCodestringtrue>101</companyCodestringtrue>
<transactionQualifierstring>Sales</transactionQualifierstring>
<menuItemNumberlong>1235665</menuItemNumberlong>
<productQuantityinttrue>5</productQuantityinttrue>
<productValueInclVATdecimaltrue>4.90</productValueInclVATdecimaltrue>
<productValueExclVATdecimaltrue>4.775701</productValueExclVATdecimaltrue>
</R>
</Rows>
</ExportData>
I need to delete each complete <R> element if the <productQuantityinttrue> element equals "0".
I came up with the following Java code:
package filterPositions;
import java.io.File;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.Result;
import javax.xml.transform.Source;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
public class FilterPositions {
public static String result = "";
public static void main(String[] args) throws Exception {
try {
DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder docBuilder = docFactory.newDocumentBuilder();
File filePath = new File("C:/LSA_SALES_EXPORT_1507_test_zero_qu.xml");
Document doc = docBuilder.parse(filePath);
Node rootNode = doc.getDocumentElement();
final Element element = doc.getDocumentElement();
// output new XML Document
DocumentBuilder parser = docFactory.newDocumentBuilder();
Document newdoc = parser.newDocument();
newdoc.adoptNode(traversingXML(element));
writeXmlFile(newdoc, "LSA_SALES_EXPORT_1507_test_zero_qu_OUT.xml");
System.out.println("Done...");
System.out.println("Exiting...");
} catch (Exception e) {
e.printStackTrace();
}
}
public static Element traversingXML(Element element) {
NodeList positionen = element.getElementsByTagName("R");
Element e = null;
for (int i = 0; i < positionen.getLength(); i++) {
e = (Element) positionen.item(i);
for (Node child = e.getFirstChild(); child != null; child = child.getNextSibling()) {
if (child instanceof Element && "productQuantityinttrue".equals(child.getNodeName())&& "0".equals(child.getTextContent())) {
e.getParentNode().removeChild(e);
}
}
}
System.out.println(e);
return e;
}
public static void writeXmlFile(Document doc, String filename) {
try {
// Prepare the DOM document for writing
Source source = new DOMSource();
// Prepare the output file
File file = new File(filename);
Result result = new StreamResult(file);
// Write the DOM document to the file
Transformer xformer = TransformerFactory.newInstance()
.newTransformer();
xformer.transform(source, result);
} catch (TransformerConfigurationException e) {
} catch (TransformerException e) {
}
}
}
I am not sure if my method "traversingXML" is working properly. My problem right now is that the adapted XML structure (one deleted) is not written to newdoc.
You don't copy the original document to newdoc; instead you create a new, empty XML document.
Instead, try this code:
...
final Element element = doc.getDocumentElement(); // original code up to here
traversingXML(element); // delete the node
writeXmlFile(doc, "LSA_SALES_EXPORT_1507_test_zero_qu_OUT.xml"); // save modified document
Related
I have the following source XML file named customers.xml:
<?xml version="1.0" encoding="utf-8"?>
<p:CustomerElement xmlns:p="http://www.dog.com/customer" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:schemaLocation="http://www.dog.com/customer Customer.xsd">
<Customer>
<Sender>
<transmitDate>2016-02-21T00:00:00</transmitDate>
<transmitter>Dog ETL v2.0</transmitter>
<dealerCode><![CDATA[P020]]></dealerCode>
<DMSSystem><![CDATA[DBS]]></DMSSystem>
<DMSReleaseNumber><![CDATA[5.0]]></DMSReleaseNumber>
</Sender>
<Identifier>
<updateInd><![CDATA[A]]></updateInd>
<dealerCustNumber><![CDATA[AMADOR]]></dealerCustNumber>
<dealerCustName><![CDATA[AMADOR COMPUTERS]]></dealerCustName>
<phoneNumber><![CDATA[800 111 4444]]></phoneNumber>
<faxNumber><![CDATA[780 111 4444]]></faxNumber>
<email xsi:nil="true" />
<customerType><![CDATA[R]]></customerType>
<activeCustomerInd>false</activeCustomerInd>
<parentCustomerNumber xsi:nil="true" />
<primaryStoreNumber><![CDATA[00]]></primaryStoreNumber>
<preferredLanguage><![CDATA[ENG]]></preferredLanguage>
<dealerDateInSystem>2000-01-11T00:00:00</dealerDateInSystem>
<dealerLastUpdatedDate>2015-02-05T00:00:00</dealerLastUpdatedDate>
</Identifier>
<Location>
<address2><![CDATA[ACCOUNT FLAGGED FOR DELETION]]></address2>
<address3><![CDATA[AS PER BILL FEB AA/15]]></address3>
<city><![CDATA[CHICAGO]]></city>
<postalCode><![CDATA[Q5S 1E5]]></postalCode>
<state><![CDATA[AB]]></state>
<country><![CDATA[CA]]></country>
<location><![CDATA[FLAGGED FOR DELETION]]></location>
<addressType><![CDATA[M]]></addressType>
</Location>
<Division>
<divisionCode><![CDATA[G]]></divisionCode>
<divisionName><![CDATA[CAR]]></divisionName>
<IndustryCode>
<industryCode><![CDATA[AQ99]]></industryCode>
<primaryIndustryCodeInd>true</primaryIndustryCodeInd>
</IndustryCode>
<SalesRep>
<number><![CDATA[XXX]]></number>
<name><![CDATA[KILL ACCOUNT IN PROCESS]]></name>
<type><![CDATA[M]]></type>
<par>0</par>
<email xsi:nil="true" />
<phoneNumber><![CDATA[000 000 0000]]></phoneNumber>
</SalesRep>
</Division>
</Customer>
<Customer>
<Sender>
<transmitDate>2016-02-21T00:00:00</transmitDate>
<transmitter>Dog ETL v2.0</transmitter>
<dealerCode><![CDATA[P000]]></dealerCode>
<DMSSystem><![CDATA[DBS]]></DMSSystem>
<DMSReleaseNumber><![CDATA[5.0]]></DMSReleaseNumber>
</Sender>
<Identifier>
<updateInd><![CDATA[A]]></updateInd>
<dealerCustNumber><![CDATA[UU20888]]></dealerCustNumber>
<dealerCustName><![CDATA[ ADVERTISING AND PR]]></dealerCustName>
<phoneNumber xsi:nil="true" />
<faxNumber xsi:nil="true" />
<email xsi:nil="true" />
<customerType><![CDATA[I]]></customerType>
<activeCustomerInd>true</activeCustomerInd>
<parentCustomerNumber xsi:nil="true" />
<primaryStoreNumber><![CDATA[M2]]></primaryStoreNumber>
<preferredLanguage><![CDATA[ENG]]></preferredLanguage>
<dealerDateInSystem>2015-11-18T00:00:00</dealerDateInSystem>
<dealerLastUpdatedDate>2015-11-19T00:00:00</dealerLastUpdatedDate>
</Identifier>
<Location>
<address2><![CDATA[EQUIP]]></address2>
<city><![CDATA[ADER]]></city>
<country><![CDATA[CA]]></country>
<addressType><![CDATA[M]]></addressType>
</Location>
<Division>
<divisionCode><![CDATA[A]]></divisionCode>
<divisionName><![CDATA[AGRO]]></divisionName>
<IndustryCode>
<industryCode><![CDATA[EQ00]]></industryCode>
<primaryIndustryCodeInd>true</primaryIndustryCodeInd>
</IndustryCode>
</Division>
</Customer>
</p:CustomerElement>
I have the following java code, which parses customers.xml into individual "Customer" entities, and then attempts to convert each of them into an AVRO format:
package com.dogsoft.data.xmltoavro;
import java.io.*;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.FactoryConfigurationError;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import jdk.nashorn.internal.runtime.regexp.joni.constants.NodeType;
import org.w3c.dom.Attr;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import org.apache.avro.Protocol;
import org.apache.avro.Schema;
import org.apache.avro.file.DataFileReader;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.generic.GenericArray;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.io.DatumReader;
import org.apache.avro.io.DatumWriter;
import org.apache.avro.specific.SpecificDatumWriter;
import org.apache.avro.util.Utf8;
import org.w3c.dom.*;
import org.xml.sax.SAXException;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Pattern;
public class ParseXmlFile {
private static Protocol protocol;
public static void xmlToAvro(File xmlFile, File avroFile) throws IOException, SAXException {
try {
InputStream stream = new FileInputStream("/tmp/xml.avsc");
if (stream == null) throw new IllegalStateException("Classpath should include xml.avsc");
protocol = Protocol.parse(stream);
} catch (IOException e) {
throw new RuntimeException(e);
}
Schema schema = protocol.getType("Element");
Document doc = parse(xmlFile);
DatumWriter<GenericRecord> datumWriter = new SpecificDatumWriter<>(schema);
try (DataFileWriter<GenericRecord> fileWriter = new DataFileWriter<>(datumWriter)) {
fileWriter.create(schema, avroFile);
Object docElement = doc.getDocumentElement();
fileWriter.append(wrapElement(doc.getDocumentElement()));
}
}
private static GenericData.Record wrapElement(Element el) {
GenericData.Record record = new GenericData.Record(protocol.getType("Element"));
record.put("name", el.getNodeName());
NamedNodeMap attributeNodes = el.getAttributes();
List<GenericData.Record> attrRecords = new ArrayList<>();
for (int i = 0; i < attributeNodes.getLength(); i++) {
Attr attr = (Attr) attributeNodes.item(i);
attrRecords.add(wrapAttr(attr));
}
record.put("attributes", attrRecords);
List<Object> childArray = new ArrayList<>();
NodeList childNodes = el.getChildNodes();
for (int i = 0; i < childNodes.getLength(); i++) {
Node node = childNodes.item(i);
Object nt = node.getNodeType();
if (node.getNodeType() == Node.ELEMENT_NODE)
childArray.add(wrapElement((Element) node));
if (node.getNodeType() == Node.TEXT_NODE)
childArray.add(node.getTextContent());
}
record.put("children", childArray);
return record;
}
private static GenericData.Record wrapAttr(Attr attr) {
GenericData.Record record = new GenericData.Record(protocol.getType("Attribute"));
record.put("name", attr.getName());
record.put("value", attr.getValue());
return record;
}
private static Document parse(File file) throws IOException, SAXException {
try {
DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
return builder.parse(file);
} catch (ParserConfigurationException e) {
throw new RuntimeException(e);
}
}
public static void avroToXml(File avroFile, File xmlFile) throws IOException {
try {
InputStream stream = new FileInputStream("/tmp/xml.avsc");
if (stream == null) throw new IllegalStateException("Classpath should include xml.avsc");
protocol = Protocol.parse(stream);
} catch (IOException e) {
throw new RuntimeException(e);
}
DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(protocol.getType("Element"));
DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(avroFile, datumReader);
GenericRecord record = dataFileReader.next();
Document doc;
try {
doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
} catch (ParserConfigurationException e) {
throw new RuntimeException(e);
}
Element el = unwrapElement(record, doc);
doc.appendChild(el);
saveDocument(doc, xmlFile);
}
private static Element unwrapElement(GenericRecord record, Document doc) {
String name = "" + record.get("name");
Element el = doc.createElement(name);
#SuppressWarnings("unchecked")
GenericArray<GenericRecord> attrArray = (GenericArray<GenericRecord>) record.get("attributes");
for (GenericRecord attrRecord : attrArray)
el.setAttributeNode(unwrapAttr(attrRecord, doc));
#SuppressWarnings("unchecked")
GenericArray<Object> childArray = (GenericArray<Object>) record.get("children");
for (Object childObj : childArray) {
if (childObj instanceof GenericRecord)
el.appendChild(unwrapElement((GenericRecord) childObj, doc));
if (childObj instanceof Utf8)
el.appendChild(doc.createTextNode("" + childObj));
}
return el;
}
private static Attr unwrapAttr(GenericRecord record, Document doc) {
Attr attr = doc.createAttribute("" + record.get("name"));
attr.setValue("" + record.get("value"));
return attr;
}
private static void saveDocument(Document doc, File file) {
try {
Transformer transformer = TransformerFactory.newInstance().newTransformer();
transformer.transform(new DOMSource(doc), new StreamResult(file));
} catch (TransformerException e) {
throw new RuntimeException(e);
}
}
public static void main(String[] args)
{
Object nodeObject = null;
Node myNode = null;
Transformer transformer = null;
try
{
try {
transformer =
TransformerFactory.newInstance().newTransformer();
} catch (TransformerConfigurationException e) {
e.printStackTrace();
}
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
dbf.setNamespaceAware(true);
DocumentBuilder db = dbf.newDocumentBuilder();
Document doc = db.parse("/tmp/customers.xml");
System.out.printf("Version = %s%n", doc.getXmlVersion());
System.out.printf("Encoding = %s%n", doc.getXmlEncoding());
System.out.printf("Standalone = %b%n%n", doc.getXmlStandalone());
if (doc.hasChildNodes())
{
int customerNumber = 0;
NodeList nl = doc.getDocumentElement().getChildNodes();
for (int i = 0; i < nl.getLength(); i++) {
Node node = nl.item(i);
if (node.getNodeType() == Node.ELEMENT_NODE) {
System.out.println(node.toString());
customerNumber++;
File avroFile = new File("/tmp/customer" + customerNumber + ".avro");
File xmlFile = new File("/tmp/customer" + customerNumber + ".xml");
File xmlFile1 = new File("/tmp/customer" + customerNumber + "-foo.xml");
try {
transformer.transform(
new DOMSource(node), new StreamResult(xmlFile));
File outputFile = new File("/tmp/customer" + customerNumber + ".avro");
xmlToAvro(xmlFile, outputFile);
} catch (TransformerException e) {
e.printStackTrace();
}
}
}
}
}
catch (IOException ioe)
{
System.err.println("IOE: " + ioe);
}
catch (SAXException saxe)
{
System.err.println("SAXE: " + saxe);
}
catch (FactoryConfigurationError fce)
{
System.err.println("FCE: " + fce);
}
catch (ParserConfigurationException pce)
{
System.err.println("PCE: " + pce);
}
}
}
This code works overall, but it ignores any content, which is enclosed into
![CDATA[
tag. As it happens, most of the actual useful data in the customers.xml files is enclosed into these tags.
Is there a way to modify this code, to make it not ignore the CDATA contents?
Instead of hand-writing parser code, you might want to split the problem in two parts: first, bind XML into POJO (using JAXB or Jackson XML module); and then write POJO as Avro (using Apache Avro lib, or Jackson Avro module). All you need for that would be POJO definition that matches expected structure for data as XML and Avro. Result should be less code, and basically specifying what needs to happen and now how to do it.
I want to change the rating of jojo restaurant only , i tried this code but didn't work please help. Below is my xml document and the code I have.
<city>
<beirut>
<restaurant>
<name>sada</name>
</restaurant>
</beirut>
<jbeil>
<restaurant>
<name>toto</name>
<rating>4.3/5</rating>
</restaurant>
<restaurant>
<name>jojo</name>
<rating>4.3/5</rating>
</restaurant>
</jbeil>
<sour>
<restaurant>
<name>sada</name>
</restaurant>
</sour>
</city>
Code:
try {
File inputFile = new File("src/xpath/josephXml.xml");
DocumentBuilderFactory dbFactory
= DocumentBuilderFactory.newInstance();
DocumentBuilder dBuilder;
dBuilder = dbFactory.newDocumentBuilder();
Document doc = dBuilder.parse(inputFile);
doc.getDocumentElement().normalize();
XPath xPath = XPathFactory.newInstance().newXPath();
String expression = "/City/Jbeil/Restaurants/Restaurant[name='Feniqia']/rating";
Element e = (Element)xPath.evaluate(expression, doc, XPathConstants.NODE);
if (e != null){
e.setTextContent("5/5");
}
} catch (ParserConfigurationException | SAXException | IOException e) {
}
if you only need to write the result (there are other ways):
import java.io.File;
import org.w3c.dom.Document;
import javax.xml.transform.Result;
import javax.xml.transform.Source;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
try
{
Transformer trans = TransformerFactory.newInstance().newTransformer();
Result out = new StreamResult(new File("result.xml"));
Source in = new DOMSource(doc);
trans.transform(in, out);
}
catch (Exception exe)
{
System.err.println(exe.getMessage());
}
First, your XPath did not align to your posted XML (no <restaurants> or Feniqia name) and all should have been lower case. Possibly you extracted a piece from large xml document. For demonstration, I used the jojo restaurant.
Second, XPath does not itself update an XML, you will need to create a new document. Simply add a transformation at end:
import javax.xml.parsers.*;
import javax.xml.transform.*;
import javax.xml.transform.dom.*;
import javax.xml.transform.stream.*;
import javax.xml.xpath.*;
import java.io.File;
import java.io.IOException;
import org.xml.sax.*;
import org.w3c.dom.*;
public class RatingUpdate {
public static void main(String [] args) {
try {
File inputFile = new File("src/xpath/josephXml.xml");
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder dBuilder;
dBuilder = dbFactory.newDocumentBuilder();
Document doc = dBuilder.parse(inputFile);
doc.getDocumentElement().normalize();
XPath xPath = XPathFactory.newInstance().newXPath();
String expression = "/city/jbeil/restaurant[name='jojo']/rating";
Element e = (Element)xPath.evaluate(expression, doc, XPathConstants.NODE);
if (e != null){
e.setTextContent("5/5");
}
Transformer xformer = TransformerFactory.newInstance().newTransformer();
xformer.transform(new DOMSource(doc), new StreamResult(new File("src/xpath/josephXml_update.xml")));
} catch (ParserConfigurationException | SAXException | IOException | XPathExpressionException |
TransformerException e) {
}
}
}
I made a method to read XML files, but now I need to write or override that same XML file with the same tags, nodes and objects, but with different data inside child nodes.
First I want to make read and write working and then I have some ideas to maybe put the whole XML file into a buffer. Then I could put the whole XML under one class and just write that class again into the same XML this is just an idea, any suggestion or idea is welcome.
My XML file looks like this:
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!-- Document Root -->
<DATA>
<Settings USERNAME="test" PASSWORD="test" STATUS="active" / >
</DATA>
This is my code for reading:
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
public class Read {
private final static String SETTINGS_LINE = Settings;
public void readXML() {
try {
File xmlFile = new File("Test.xml");
DocumentBuilderFactory documentFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder documentBuilder = documentFactory.newDocumentBuilder();
Document doc = documentBuilder.parse(xmlFile);
// Normalize the XML file
doc.getDocumentElement().normalize();
NodeList nodeList = doc.getDocumentElement().getChildNodes();
for(int temp = 0; temp < nodeList.getLength(); temp++) {
Node node = nodeList.item(temp);
if(node instanceof Element && node.getNodeName() == SETTINGS_LINE) {
Element settings = (Element) node;
System.out.println("User" +settings.getAttribute("USERNAME"));
System.out.println("Password" +settings.getAttribute("PASSWORD"));
System.out.println("Status" +settings.getAttribute("STATUS"));
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
And this is the code for writing that is not working:
import java.io.File;
import java.io.IOException;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
public class Modify {
private final static String SETTINGS_LINE = "Settings";
public static void main(String argv[]) {
try {
String filepath = "test.xml";
DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder docBuilder = docFactory.newDocumentBuilder();
// Normalize the XML File
doc.getDocumentElement().normalize();
NodeList nodeList = doc.getDocumentElement().getChildNodes();
for (int i = 0; i < nodeList.getLength(); i++) {
Node node = nodeList.item(i);
if (node instanceof Element && node.getNodeName() == SETTINGS_LINE) {
Element settings = (Element) node;
if("USERNAME".equals(node.getChildNodes())){
node.setTextContent("mivnadic");
}
}
// Write the content into xml file
TransformerFactory transformerFactory = TransformerFactory.newInstance();
Transformer transformer = transformerFactory.newTransformer();
DOMSource source = new DOMSource(doc);
StreamResult result = new StreamResult(new File(filepath));
transformer.transform(source, result);
}
System.out.println("File saved");
} catch (ParserConfigurationException pce) {
pce.printStackTrace();
} catch (TransformerException tfe) {
tfe.printStackTrace();
} catch (IOException ioe) {
ioe.printStackTrace();
} catch (SAXException sae) {
sae.printStackTrace();
}
}
}
I have an XML file having many nodes with attributes as well as child elements with same field name:
<doc>
<str name="eventId">54605a22aa7d649f085242e3</str>
<arr name="toolLogExt">
<str>.xls.lck</str>
<str>.xls.lck</str>
<str>.xls.lck</str>
</arr>
<arr name="messageTech">
<str>Java run-time error</str>
<str>Java run-time error</str>
<str>Java run-time error</str>
</arr>
<arr name="messageId">
<str>546066238d194b463e365194</str>
<str>546090b48d194b463e365196</str>
<str>546090f78d194b463e365198</str>
</arr>
<arr name="eventType">
<str>Run-time error</str>
</arr>
<str name="type">acme</str>
<arr name="messageSolution">
<str>XXXXX</str>
<str>YYYYY</str>
<str>ZZZZZ</str>
</arr>
<arr name="toolID">
<str>54605d7d8d194b463e36517e</str>
<str>54605d7d8d194b463e36517e</str>
<str>54605d7d8d194b463e36517e</str>
</arr>
</doc>
I have read many posts on Stack-Overflow but I haven't come up across an XML Format like this. One of the regular ways is to do individual string processing after getting every node with their respective attributes and maintain a count to construct a Document Model later. However, is there a direct method to obtain all the fields?
Edit1
My approach so far....
import java.io.IOException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.List;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.w3c.dom.Document;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
public class ParseSAX extends DefaultHandler {
List<String> errorsLister;
String inpXMLFileName;
public ParseSAX(String xmlFileName) {
this.inpXMLFileName = xmlFileName;
errorsLister = new ArrayList<String>();
parseDocument();
}
private void parseDocument() {
// parse
SAXParserFactory factory = SAXParserFactory.newInstance();
try {
SAXParser parser = factory.newSAXParser();
parser.parse(inpXMLFileName, this);
} catch (ParserConfigurationException e) {
System.out.println("ParserConfig error");
} catch (SAXException e) {
System.out.println("SAXException : xml not well formed");
} catch (IOException e) {
System.out.println("IO error");
}
}
#Override
public void startElement(String s, String s1, String elementName, Attributes attributes) throws SAXException {
if (elementName.equalsIgnoreCase("str")) {
String temp = (attributes.getValue("eventId"));
// This would give me the event ID
// Further usage
}
// if current element is publisher
if (elementName.equalsIgnoreCase("arr")) {
String temp = attributes.getValue("messageTech");
}
}
#Override
public void endElement(String s, String s1, String element) throws SAXException {
// Can't seem to figure out what to do here!!!
}
public static void main(String[] args) {
new ParseSAX("..//input2.xml");
// To individually get field values having attribute names
// I know we can do this ....
/**
try {
DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = domFactory.newDocumentBuilder();
Document dDoc;
dDoc = builder.parse("..//input2.xml");
XPath xPath = XPathFactory.newInstance().newXPath();
String string = (String) xPath.evaluate("/response/result[#name='response']/doc/arr[#name='messageId']/str", dDoc, XPathConstants.STRING);
} catch (SAXException | IOException | XPathExpressionException | ParserConfigurationException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
**/
}
}
You can read elements inside any XML tag using below function.
public class XmlFileReader{
public NodeList readXML(String filePath, String tagName, String subTagName, String tagAttr) {
try {
// Get XML file object.
File fXmlFile = new File(filePath);
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
Document doc = dBuilder.parse(fXmlFile);
doc.getDocumentElement().normalize();
System.out.println("Root element :" + doc.getDocumentElement().getNodeName());
NodeList nodeList = doc.getElementsByTagName(tagName);
for (int i = 0; i < nodeList.getLength(); i++) {
Node node = nodeList.item(i);
if (node.getNodeType() == Node.ELEMENT_NODE) {
Element element = (Element) node;
if (element.getAttribute("name").equalsIgnoreCase(tagAttr)) {
NodeList elementsByTagName = element.getElementsByTagName(subTagName);
return elementsByTagName ;
}
}
}
} catch (Exception e) {
StringWriter stack = new StringWriter();
e.printStackTrace(new PrintWriter(stack));
LogManager.fatal(stack.toString(), ReadTemplate.class.getName());
}
return elementsByTagName;
}
}
Function Call :
XmlFileReader xmlFileReader = new XmlFileReader();
NodeList toolLogExtChilds = xmlFileReader.readXML("Path to XML file",
"arr", "str", "toolLogExt");
From the given XML String, i have to update End Date value .
Even though I'm updating the xml in updateNodeValue() method, my final output xml is same as the input xml.
Can someone tell me what is the mistake in this code
import java.io.StringReader;
import java.io.StringWriter;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
public class MyClass{
static String strXml = "<INFO><BeginDate>2013-12-02</BeginDate><EndDate>2014-01-31</EndDate></INFO>";
public static void main(String[] args) throws Exception {
System.out.println(strXml);
Document doc = StringToDocument(strXml);
updateNodeValue(doc);
String newxml = DocumentToString(doc);
System.out.println(newxml);
}
public static void updateNodeValue(Document doc) {
Node rootNode = doc.getFirstChild();
NodeList list = rootNode.getChildNodes();
for (int i = 0; i < list.getLength(); i++) {
Element element = (Element) list.item(i);
Node node = list.item(i);
if ("EndDate".equals(node.getNodeName())) {
element.setNodeValue("2013-12-12");
return;
}
}
}
public static String DocumentToString(Document doc) throws Exception {
TransformerFactory tf = TransformerFactory.newInstance();
Transformer transformer = tf.newTransformer();
transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
StringWriter writer = new StringWriter();
transformer.transform(new DOMSource(doc), new StreamResult(writer));
String output = writer.getBuffer().toString();
return output;
}
public static Document StringToDocument(String strXml) throws Exception {
Document doc = null;
try {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
StringReader strReader = new StringReader(strXml);
InputSource is = new InputSource(strReader);
doc = (Document) builder.parse(is);
} catch (Exception e) {
e.printStackTrace();
throw e;
}
return doc;
}
}
Useelement.setTextContent(...) in your updateNodeValue method.
The method you should use is not setNodeValue() but setTextContent()
See http://docs.oracle.com/javase/1.5.0/docs/api/org/w3c/dom/Node.html#setNodeValue(java.lang.String)