I have an XML file having many nodes with attributes as well as child elements with same field name:
<doc>
<str name="eventId">54605a22aa7d649f085242e3</str>
<arr name="toolLogExt">
<str>.xls.lck</str>
<str>.xls.lck</str>
<str>.xls.lck</str>
</arr>
<arr name="messageTech">
<str>Java run-time error</str>
<str>Java run-time error</str>
<str>Java run-time error</str>
</arr>
<arr name="messageId">
<str>546066238d194b463e365194</str>
<str>546090b48d194b463e365196</str>
<str>546090f78d194b463e365198</str>
</arr>
<arr name="eventType">
<str>Run-time error</str>
</arr>
<str name="type">acme</str>
<arr name="messageSolution">
<str>XXXXX</str>
<str>YYYYY</str>
<str>ZZZZZ</str>
</arr>
<arr name="toolID">
<str>54605d7d8d194b463e36517e</str>
<str>54605d7d8d194b463e36517e</str>
<str>54605d7d8d194b463e36517e</str>
</arr>
</doc>
I have read many posts on Stack-Overflow but I haven't come up across an XML Format like this. One of the regular ways is to do individual string processing after getting every node with their respective attributes and maintain a count to construct a Document Model later. However, is there a direct method to obtain all the fields?
Edit1
My approach so far....
import java.io.IOException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.List;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.w3c.dom.Document;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
public class ParseSAX extends DefaultHandler {
List<String> errorsLister;
String inpXMLFileName;
public ParseSAX(String xmlFileName) {
this.inpXMLFileName = xmlFileName;
errorsLister = new ArrayList<String>();
parseDocument();
}
private void parseDocument() {
// parse
SAXParserFactory factory = SAXParserFactory.newInstance();
try {
SAXParser parser = factory.newSAXParser();
parser.parse(inpXMLFileName, this);
} catch (ParserConfigurationException e) {
System.out.println("ParserConfig error");
} catch (SAXException e) {
System.out.println("SAXException : xml not well formed");
} catch (IOException e) {
System.out.println("IO error");
}
}
#Override
public void startElement(String s, String s1, String elementName, Attributes attributes) throws SAXException {
if (elementName.equalsIgnoreCase("str")) {
String temp = (attributes.getValue("eventId"));
// This would give me the event ID
// Further usage
}
// if current element is publisher
if (elementName.equalsIgnoreCase("arr")) {
String temp = attributes.getValue("messageTech");
}
}
#Override
public void endElement(String s, String s1, String element) throws SAXException {
// Can't seem to figure out what to do here!!!
}
public static void main(String[] args) {
new ParseSAX("..//input2.xml");
// To individually get field values having attribute names
// I know we can do this ....
/**
try {
DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = domFactory.newDocumentBuilder();
Document dDoc;
dDoc = builder.parse("..//input2.xml");
XPath xPath = XPathFactory.newInstance().newXPath();
String string = (String) xPath.evaluate("/response/result[#name='response']/doc/arr[#name='messageId']/str", dDoc, XPathConstants.STRING);
} catch (SAXException | IOException | XPathExpressionException | ParserConfigurationException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
**/
}
}
You can read elements inside any XML tag using below function.
public class XmlFileReader{
public NodeList readXML(String filePath, String tagName, String subTagName, String tagAttr) {
try {
// Get XML file object.
File fXmlFile = new File(filePath);
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
Document doc = dBuilder.parse(fXmlFile);
doc.getDocumentElement().normalize();
System.out.println("Root element :" + doc.getDocumentElement().getNodeName());
NodeList nodeList = doc.getElementsByTagName(tagName);
for (int i = 0; i < nodeList.getLength(); i++) {
Node node = nodeList.item(i);
if (node.getNodeType() == Node.ELEMENT_NODE) {
Element element = (Element) node;
if (element.getAttribute("name").equalsIgnoreCase(tagAttr)) {
NodeList elementsByTagName = element.getElementsByTagName(subTagName);
return elementsByTagName ;
}
}
}
} catch (Exception e) {
StringWriter stack = new StringWriter();
e.printStackTrace(new PrintWriter(stack));
LogManager.fatal(stack.toString(), ReadTemplate.class.getName());
}
return elementsByTagName;
}
}
Function Call :
XmlFileReader xmlFileReader = new XmlFileReader();
NodeList toolLogExtChilds = xmlFileReader.readXML("Path to XML file",
"arr", "str", "toolLogExt");
Related
I have an xml that has namespace but without prefix. I have xpaths but with prefix that are generated from Schematron. I want to get element detail from that xml using xpath with prefixes.
Here is my XML
<?xml version="1.0" encoding="UTF-8"?>
<section xmlns="http://foo" type="abc" id="ad">
<title>Document Title</title>
<body>
<p>
<t>Document Body</t>
</p>
<p>
<t>Document Body</t>
</p>
<p>
<t>Document Body</t>
</p>
</body>
</section>
And here is the java code
package com;
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import javax.xml.namespace.NamespaceContext;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
public class XPathTest {
public static void main(String[] args) {
File inputFile = new File("files/my_xml.xml");
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder dBuilder;
try {
dBuilder = dbFactory.newDocumentBuilder();
Document doc = dBuilder.parse(inputFile);
doc.getDocumentElement().normalize();
XPath xPath = XPathFactory.newInstance().newXPath();
HashMap<String, String> prefMap = new HashMap<String, String>() {
{
put("q", "http://foo");
}
};
SmartContentNameSpaceContext namespaces = new SmartContentNameSpaceContext(prefMap);
xPath.setNamespaceContext(namespaces);
String expression = "//q:section[1]";
NodeList nodeList = (NodeList) xPath.compile(expression).evaluate(doc, XPathConstants.NODESET);
for (int i = 0; i < nodeList.getLength(); i++) {
Node node = nodeList.item(i);
System.out.println(node.getAttributes().item(0));
System.out.println("current element : " + node.getNodeName());
}
} catch (ParserConfigurationException e) {
e.printStackTrace();
} catch (SAXException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (XPathExpressionException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
I read about NameSpaceContext interface so I have tried
HashMap<String, String> prefMap = new HashMap<String, String>() {
{
put("q", "foo");
}
};
MyNameSpaceContext namespaces = new MyNameSpaceContext(prefMap);
xPath.setNamespaceContext(namespaces);
and here is the MyNameSpaceContext
package com;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import javax.xml.namespace.NamespaceContext;
public class MyNameSpaceContext implements NamespaceContext {
private final Map<String, String> PREF_MAP = new HashMap<String, String>();
public SmartContentNameSpaceContext(final Map<String, String> prefMap) {
PREF_MAP.putAll(prefMap);
}
#Override
public String getNamespaceURI(String prefix) {
return PREF_MAP.get(prefix);
}
#Override
public String getPrefix(String namespaceURI) {
return null;
}
#Override
public Iterator getPrefixes(String namespaceURI) {
return null;
}
}
But this isnot working?
You need to make sure you use
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
dbFactory.setNamespaceAware(true);
if you want to use XPath on a DOM tree. I have not looked further for other problems.
I have the following source XML file named customers.xml:
<?xml version="1.0" encoding="utf-8"?>
<p:CustomerElement xmlns:p="http://www.dog.com/customer" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:schemaLocation="http://www.dog.com/customer Customer.xsd">
<Customer>
<Sender>
<transmitDate>2016-02-21T00:00:00</transmitDate>
<transmitter>Dog ETL v2.0</transmitter>
<dealerCode><![CDATA[P020]]></dealerCode>
<DMSSystem><![CDATA[DBS]]></DMSSystem>
<DMSReleaseNumber><![CDATA[5.0]]></DMSReleaseNumber>
</Sender>
<Identifier>
<updateInd><![CDATA[A]]></updateInd>
<dealerCustNumber><![CDATA[AMADOR]]></dealerCustNumber>
<dealerCustName><![CDATA[AMADOR COMPUTERS]]></dealerCustName>
<phoneNumber><![CDATA[800 111 4444]]></phoneNumber>
<faxNumber><![CDATA[780 111 4444]]></faxNumber>
<email xsi:nil="true" />
<customerType><![CDATA[R]]></customerType>
<activeCustomerInd>false</activeCustomerInd>
<parentCustomerNumber xsi:nil="true" />
<primaryStoreNumber><![CDATA[00]]></primaryStoreNumber>
<preferredLanguage><![CDATA[ENG]]></preferredLanguage>
<dealerDateInSystem>2000-01-11T00:00:00</dealerDateInSystem>
<dealerLastUpdatedDate>2015-02-05T00:00:00</dealerLastUpdatedDate>
</Identifier>
<Location>
<address2><![CDATA[ACCOUNT FLAGGED FOR DELETION]]></address2>
<address3><![CDATA[AS PER BILL FEB AA/15]]></address3>
<city><![CDATA[CHICAGO]]></city>
<postalCode><![CDATA[Q5S 1E5]]></postalCode>
<state><![CDATA[AB]]></state>
<country><![CDATA[CA]]></country>
<location><![CDATA[FLAGGED FOR DELETION]]></location>
<addressType><![CDATA[M]]></addressType>
</Location>
<Division>
<divisionCode><![CDATA[G]]></divisionCode>
<divisionName><![CDATA[CAR]]></divisionName>
<IndustryCode>
<industryCode><![CDATA[AQ99]]></industryCode>
<primaryIndustryCodeInd>true</primaryIndustryCodeInd>
</IndustryCode>
<SalesRep>
<number><![CDATA[XXX]]></number>
<name><![CDATA[KILL ACCOUNT IN PROCESS]]></name>
<type><![CDATA[M]]></type>
<par>0</par>
<email xsi:nil="true" />
<phoneNumber><![CDATA[000 000 0000]]></phoneNumber>
</SalesRep>
</Division>
</Customer>
<Customer>
<Sender>
<transmitDate>2016-02-21T00:00:00</transmitDate>
<transmitter>Dog ETL v2.0</transmitter>
<dealerCode><![CDATA[P000]]></dealerCode>
<DMSSystem><![CDATA[DBS]]></DMSSystem>
<DMSReleaseNumber><![CDATA[5.0]]></DMSReleaseNumber>
</Sender>
<Identifier>
<updateInd><![CDATA[A]]></updateInd>
<dealerCustNumber><![CDATA[UU20888]]></dealerCustNumber>
<dealerCustName><![CDATA[ ADVERTISING AND PR]]></dealerCustName>
<phoneNumber xsi:nil="true" />
<faxNumber xsi:nil="true" />
<email xsi:nil="true" />
<customerType><![CDATA[I]]></customerType>
<activeCustomerInd>true</activeCustomerInd>
<parentCustomerNumber xsi:nil="true" />
<primaryStoreNumber><![CDATA[M2]]></primaryStoreNumber>
<preferredLanguage><![CDATA[ENG]]></preferredLanguage>
<dealerDateInSystem>2015-11-18T00:00:00</dealerDateInSystem>
<dealerLastUpdatedDate>2015-11-19T00:00:00</dealerLastUpdatedDate>
</Identifier>
<Location>
<address2><![CDATA[EQUIP]]></address2>
<city><![CDATA[ADER]]></city>
<country><![CDATA[CA]]></country>
<addressType><![CDATA[M]]></addressType>
</Location>
<Division>
<divisionCode><![CDATA[A]]></divisionCode>
<divisionName><![CDATA[AGRO]]></divisionName>
<IndustryCode>
<industryCode><![CDATA[EQ00]]></industryCode>
<primaryIndustryCodeInd>true</primaryIndustryCodeInd>
</IndustryCode>
</Division>
</Customer>
</p:CustomerElement>
I have the following java code, which parses customers.xml into individual "Customer" entities, and then attempts to convert each of them into an AVRO format:
package com.dogsoft.data.xmltoavro;
import java.io.*;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.FactoryConfigurationError;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import jdk.nashorn.internal.runtime.regexp.joni.constants.NodeType;
import org.w3c.dom.Attr;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import org.apache.avro.Protocol;
import org.apache.avro.Schema;
import org.apache.avro.file.DataFileReader;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.generic.GenericArray;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.io.DatumReader;
import org.apache.avro.io.DatumWriter;
import org.apache.avro.specific.SpecificDatumWriter;
import org.apache.avro.util.Utf8;
import org.w3c.dom.*;
import org.xml.sax.SAXException;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Pattern;
public class ParseXmlFile {
private static Protocol protocol;
public static void xmlToAvro(File xmlFile, File avroFile) throws IOException, SAXException {
try {
InputStream stream = new FileInputStream("/tmp/xml.avsc");
if (stream == null) throw new IllegalStateException("Classpath should include xml.avsc");
protocol = Protocol.parse(stream);
} catch (IOException e) {
throw new RuntimeException(e);
}
Schema schema = protocol.getType("Element");
Document doc = parse(xmlFile);
DatumWriter<GenericRecord> datumWriter = new SpecificDatumWriter<>(schema);
try (DataFileWriter<GenericRecord> fileWriter = new DataFileWriter<>(datumWriter)) {
fileWriter.create(schema, avroFile);
Object docElement = doc.getDocumentElement();
fileWriter.append(wrapElement(doc.getDocumentElement()));
}
}
private static GenericData.Record wrapElement(Element el) {
GenericData.Record record = new GenericData.Record(protocol.getType("Element"));
record.put("name", el.getNodeName());
NamedNodeMap attributeNodes = el.getAttributes();
List<GenericData.Record> attrRecords = new ArrayList<>();
for (int i = 0; i < attributeNodes.getLength(); i++) {
Attr attr = (Attr) attributeNodes.item(i);
attrRecords.add(wrapAttr(attr));
}
record.put("attributes", attrRecords);
List<Object> childArray = new ArrayList<>();
NodeList childNodes = el.getChildNodes();
for (int i = 0; i < childNodes.getLength(); i++) {
Node node = childNodes.item(i);
Object nt = node.getNodeType();
if (node.getNodeType() == Node.ELEMENT_NODE)
childArray.add(wrapElement((Element) node));
if (node.getNodeType() == Node.TEXT_NODE)
childArray.add(node.getTextContent());
}
record.put("children", childArray);
return record;
}
private static GenericData.Record wrapAttr(Attr attr) {
GenericData.Record record = new GenericData.Record(protocol.getType("Attribute"));
record.put("name", attr.getName());
record.put("value", attr.getValue());
return record;
}
private static Document parse(File file) throws IOException, SAXException {
try {
DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
return builder.parse(file);
} catch (ParserConfigurationException e) {
throw new RuntimeException(e);
}
}
public static void avroToXml(File avroFile, File xmlFile) throws IOException {
try {
InputStream stream = new FileInputStream("/tmp/xml.avsc");
if (stream == null) throw new IllegalStateException("Classpath should include xml.avsc");
protocol = Protocol.parse(stream);
} catch (IOException e) {
throw new RuntimeException(e);
}
DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(protocol.getType("Element"));
DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(avroFile, datumReader);
GenericRecord record = dataFileReader.next();
Document doc;
try {
doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
} catch (ParserConfigurationException e) {
throw new RuntimeException(e);
}
Element el = unwrapElement(record, doc);
doc.appendChild(el);
saveDocument(doc, xmlFile);
}
private static Element unwrapElement(GenericRecord record, Document doc) {
String name = "" + record.get("name");
Element el = doc.createElement(name);
#SuppressWarnings("unchecked")
GenericArray<GenericRecord> attrArray = (GenericArray<GenericRecord>) record.get("attributes");
for (GenericRecord attrRecord : attrArray)
el.setAttributeNode(unwrapAttr(attrRecord, doc));
#SuppressWarnings("unchecked")
GenericArray<Object> childArray = (GenericArray<Object>) record.get("children");
for (Object childObj : childArray) {
if (childObj instanceof GenericRecord)
el.appendChild(unwrapElement((GenericRecord) childObj, doc));
if (childObj instanceof Utf8)
el.appendChild(doc.createTextNode("" + childObj));
}
return el;
}
private static Attr unwrapAttr(GenericRecord record, Document doc) {
Attr attr = doc.createAttribute("" + record.get("name"));
attr.setValue("" + record.get("value"));
return attr;
}
private static void saveDocument(Document doc, File file) {
try {
Transformer transformer = TransformerFactory.newInstance().newTransformer();
transformer.transform(new DOMSource(doc), new StreamResult(file));
} catch (TransformerException e) {
throw new RuntimeException(e);
}
}
public static void main(String[] args)
{
Object nodeObject = null;
Node myNode = null;
Transformer transformer = null;
try
{
try {
transformer =
TransformerFactory.newInstance().newTransformer();
} catch (TransformerConfigurationException e) {
e.printStackTrace();
}
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
dbf.setNamespaceAware(true);
DocumentBuilder db = dbf.newDocumentBuilder();
Document doc = db.parse("/tmp/customers.xml");
System.out.printf("Version = %s%n", doc.getXmlVersion());
System.out.printf("Encoding = %s%n", doc.getXmlEncoding());
System.out.printf("Standalone = %b%n%n", doc.getXmlStandalone());
if (doc.hasChildNodes())
{
int customerNumber = 0;
NodeList nl = doc.getDocumentElement().getChildNodes();
for (int i = 0; i < nl.getLength(); i++) {
Node node = nl.item(i);
if (node.getNodeType() == Node.ELEMENT_NODE) {
System.out.println(node.toString());
customerNumber++;
File avroFile = new File("/tmp/customer" + customerNumber + ".avro");
File xmlFile = new File("/tmp/customer" + customerNumber + ".xml");
File xmlFile1 = new File("/tmp/customer" + customerNumber + "-foo.xml");
try {
transformer.transform(
new DOMSource(node), new StreamResult(xmlFile));
File outputFile = new File("/tmp/customer" + customerNumber + ".avro");
xmlToAvro(xmlFile, outputFile);
} catch (TransformerException e) {
e.printStackTrace();
}
}
}
}
}
catch (IOException ioe)
{
System.err.println("IOE: " + ioe);
}
catch (SAXException saxe)
{
System.err.println("SAXE: " + saxe);
}
catch (FactoryConfigurationError fce)
{
System.err.println("FCE: " + fce);
}
catch (ParserConfigurationException pce)
{
System.err.println("PCE: " + pce);
}
}
}
This code works overall, but it ignores any content, which is enclosed into
![CDATA[
tag. As it happens, most of the actual useful data in the customers.xml files is enclosed into these tags.
Is there a way to modify this code, to make it not ignore the CDATA contents?
Instead of hand-writing parser code, you might want to split the problem in two parts: first, bind XML into POJO (using JAXB or Jackson XML module); and then write POJO as Avro (using Apache Avro lib, or Jackson Avro module). All you need for that would be POJO definition that matches expected structure for data as XML and Avro. Result should be less code, and basically specifying what needs to happen and now how to do it.
I made a method to read XML files, but now I need to write or override that same XML file with the same tags, nodes and objects, but with different data inside child nodes.
First I want to make read and write working and then I have some ideas to maybe put the whole XML file into a buffer. Then I could put the whole XML under one class and just write that class again into the same XML this is just an idea, any suggestion or idea is welcome.
My XML file looks like this:
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!-- Document Root -->
<DATA>
<Settings USERNAME="test" PASSWORD="test" STATUS="active" / >
</DATA>
This is my code for reading:
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
public class Read {
private final static String SETTINGS_LINE = Settings;
public void readXML() {
try {
File xmlFile = new File("Test.xml");
DocumentBuilderFactory documentFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder documentBuilder = documentFactory.newDocumentBuilder();
Document doc = documentBuilder.parse(xmlFile);
// Normalize the XML file
doc.getDocumentElement().normalize();
NodeList nodeList = doc.getDocumentElement().getChildNodes();
for(int temp = 0; temp < nodeList.getLength(); temp++) {
Node node = nodeList.item(temp);
if(node instanceof Element && node.getNodeName() == SETTINGS_LINE) {
Element settings = (Element) node;
System.out.println("User" +settings.getAttribute("USERNAME"));
System.out.println("Password" +settings.getAttribute("PASSWORD"));
System.out.println("Status" +settings.getAttribute("STATUS"));
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
And this is the code for writing that is not working:
import java.io.File;
import java.io.IOException;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
public class Modify {
private final static String SETTINGS_LINE = "Settings";
public static void main(String argv[]) {
try {
String filepath = "test.xml";
DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder docBuilder = docFactory.newDocumentBuilder();
// Normalize the XML File
doc.getDocumentElement().normalize();
NodeList nodeList = doc.getDocumentElement().getChildNodes();
for (int i = 0; i < nodeList.getLength(); i++) {
Node node = nodeList.item(i);
if (node instanceof Element && node.getNodeName() == SETTINGS_LINE) {
Element settings = (Element) node;
if("USERNAME".equals(node.getChildNodes())){
node.setTextContent("mivnadic");
}
}
// Write the content into xml file
TransformerFactory transformerFactory = TransformerFactory.newInstance();
Transformer transformer = transformerFactory.newTransformer();
DOMSource source = new DOMSource(doc);
StreamResult result = new StreamResult(new File(filepath));
transformer.transform(source, result);
}
System.out.println("File saved");
} catch (ParserConfigurationException pce) {
pce.printStackTrace();
} catch (TransformerException tfe) {
tfe.printStackTrace();
} catch (IOException ioe) {
ioe.printStackTrace();
} catch (SAXException sae) {
sae.printStackTrace();
}
}
}
I have the following simplified XML:
<?xml version="1.0" encoding="UTF-8"?>
<ExportData>
<Rows>
<R>
<companyCodestringtrue>101</companyCodestringtrue>
<transactionQualifierstring>Sales</transactionQualifierstring>
<menuItemNumberlong>4302150</menuItemNumberlong>
<productQuantityinttrue>14</productQuantityinttrue>
<productValueInclVATdecimaltrue>1.90</productValueInclVATdecimaltrue>
<productValueExclVATdecimaltrue>1.775701</productValueExclVATdecimaltrue>
</R>
<R>
<companyCodestringtrue>101</companyCodestringtrue>
<transactionQualifierstring>Sales</transactionQualifierstring>
<menuItemNumberlong>333555</menuItemNumberlong>
<productQuantityinttrue>0</productQuantityinttrue>
<productValueInclVATdecimaltrue>3.90</productValueInclVATdecimaltrue>
<productValueExclVATdecimaltrue>3.775701</productValueExclVATdecimaltrue>
</R>
<R>
<companyCodestringtrue>101</companyCodestringtrue>
<transactionQualifierstring>Sales</transactionQualifierstring>
<menuItemNumberlong>1235665</menuItemNumberlong>
<productQuantityinttrue>5</productQuantityinttrue>
<productValueInclVATdecimaltrue>4.90</productValueInclVATdecimaltrue>
<productValueExclVATdecimaltrue>4.775701</productValueExclVATdecimaltrue>
</R>
</Rows>
</ExportData>
I need to delete each complete <R> element if the <productQuantityinttrue> element equals "0".
I came up with the following Java code:
package filterPositions;
import java.io.File;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.Result;
import javax.xml.transform.Source;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
public class FilterPositions {
public static String result = "";
public static void main(String[] args) throws Exception {
try {
DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder docBuilder = docFactory.newDocumentBuilder();
File filePath = new File("C:/LSA_SALES_EXPORT_1507_test_zero_qu.xml");
Document doc = docBuilder.parse(filePath);
Node rootNode = doc.getDocumentElement();
final Element element = doc.getDocumentElement();
// output new XML Document
DocumentBuilder parser = docFactory.newDocumentBuilder();
Document newdoc = parser.newDocument();
newdoc.adoptNode(traversingXML(element));
writeXmlFile(newdoc, "LSA_SALES_EXPORT_1507_test_zero_qu_OUT.xml");
System.out.println("Done...");
System.out.println("Exiting...");
} catch (Exception e) {
e.printStackTrace();
}
}
public static Element traversingXML(Element element) {
NodeList positionen = element.getElementsByTagName("R");
Element e = null;
for (int i = 0; i < positionen.getLength(); i++) {
e = (Element) positionen.item(i);
for (Node child = e.getFirstChild(); child != null; child = child.getNextSibling()) {
if (child instanceof Element && "productQuantityinttrue".equals(child.getNodeName())&& "0".equals(child.getTextContent())) {
e.getParentNode().removeChild(e);
}
}
}
System.out.println(e);
return e;
}
public static void writeXmlFile(Document doc, String filename) {
try {
// Prepare the DOM document for writing
Source source = new DOMSource();
// Prepare the output file
File file = new File(filename);
Result result = new StreamResult(file);
// Write the DOM document to the file
Transformer xformer = TransformerFactory.newInstance()
.newTransformer();
xformer.transform(source, result);
} catch (TransformerConfigurationException e) {
} catch (TransformerException e) {
}
}
}
I am not sure if my method "traversingXML" is working properly. My problem right now is that the adapted XML structure (one deleted) is not written to newdoc.
You don't copy the original document to newdoc; instead you create a new, empty XML document.
Instead, try this code:
...
final Element element = doc.getDocumentElement(); // original code up to here
traversingXML(element); // delete the node
writeXmlFile(doc, "LSA_SALES_EXPORT_1507_test_zero_qu_OUT.xml"); // save modified document
I've written class to parse some xml into an object and it's not working correctly, when I try and get the value of a node I get a null rather than the contents of the node.
Here is a simplified version of my class that just does the xml parsing of a single node:
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.apache.log4j.Logger;
import org.apache.log4j.xml.DOMConfigurator;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
public class XmlReaderCutDown {
private static Logger logger = Logger.getLogger(CtiButtonsXmlReader.class);
public static void testXml(String confFile){
DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance();
try {
DocumentBuilder docBuilder = docBuilderFactory.newDocumentBuilder();
Document doc = docBuilder.parse(new File(confFile));
doc.getDocumentElement().normalize();
if (logger.isDebugEnabled()){
logger.debug("The root element is " + doc.getDocumentElement().getNodeName());
}
NodeList rows = doc.getElementsByTagName("row");
NodeList topRowButtons = ((Element)rows.item(0)).getElementsByTagName("button");
logger.debug("Top row has " +topRowButtons.getLength() + " items.");
Node buttonNode = topRowButtons.item(0);
NodeList nodeList = ((Element)buttonNode).getElementsByTagName("id");
logger.debug("Node list count for "+ "id" + " = " + nodeList.getLength());
Element element = (Element)nodeList.item(0);
String xx = element.getNodeValue();
logger.debug(xx);
String elementValue = ((Node)element).getNodeValue();
if (elementValue != null) {
elementValue = elementValue.trim();
}
else {
logger.debug("Value was null");
}
logger.debug("Node id = "+ elementValue);
} catch (ParserConfigurationException e) {
logger.fatal(e.toString(),e);
} catch (SAXException e) {
logger.fatal(e.toString(),e);
} catch (IOException e) {
logger.fatal(e.toString(),e);
} catch (Exception e) {
logger.fatal(e.toString(),e);
}
}
public static void main(String[] args){
DOMConfigurator.configure("log4j.xml");
testXml("test.xml");
}
}
And here is a stripped down version of my xml file:
<?xml version="1.0"?>
<root>
<row>
<button>
<id>this is an id</id>
<action>action</action>
<image-src>../images/img.png</image-src>
<alt-text>alt txt</alt-text>
<tool-tip>Tool tip</tool-tip>
</button>
</row>
</root>
This is what the logging statments output:
DEBUG XmlReaderCutDown - The root element is root
DEBUG XmlReaderCutDown - Top row has 1 items.
DEBUG XmlReaderCutDown - Node list count for id = 1
DEBUG XmlReaderCutDown -
DEBUG XmlReaderCutDown - Value was null
DEBUG XmlReaderCutDown - Node id = null
Why isn't it getting the text in the xml node?
I'm running using JDK 1.6_10
Because the element you get is the parent element of the text one, containing the text you need. To access the text of this element, you shall use getTextContent instead of getNodeValue.
For more information, see the table in the Node javadoc.
See http://java.sun.com/j2se/1.5.0/docs/api/org/w3c/dom/Node.html . Element.getNodeValue() always returns null. You should use Element.getTextContent()
Consider using XPath as an alternative to manually walking nodes:
public static void testXml(String confFile)
throws XPathExpressionException {
XPathFactory xpFactory = XPathFactory.newInstance();
XPath xpath = xpFactory.newXPath();
NodeList rows = (NodeList) xpath.evaluate("root/row",
new InputSource(confFile), XPathConstants.NODESET);
for (int i = 0; i < rows.getLength(); i++) {
Node row = rows.item(i);
String id = xpath.evaluate("button/id", row);
System.out.println("id=" + id);
}
}
public static void main(String[] args)
throws XPathExpressionException {
testXml("test.xml");
}