How to parse xml with namespace using Xpath java - java

I have an xml that has namespace but without prefix. I have xpaths but with prefix that are generated from Schematron. I want to get element detail from that xml using xpath with prefixes.
Here is my XML
<?xml version="1.0" encoding="UTF-8"?>
<section xmlns="http://foo" type="abc" id="ad">
<title>Document Title</title>
<body>
<p>
<t>Document Body</t>
</p>
<p>
<t>Document Body</t>
</p>
<p>
<t>Document Body</t>
</p>
</body>
</section>
And here is the java code
package com;
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import javax.xml.namespace.NamespaceContext;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
public class XPathTest {
public static void main(String[] args) {
File inputFile = new File("files/my_xml.xml");
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder dBuilder;
try {
dBuilder = dbFactory.newDocumentBuilder();
Document doc = dBuilder.parse(inputFile);
doc.getDocumentElement().normalize();
XPath xPath = XPathFactory.newInstance().newXPath();
HashMap<String, String> prefMap = new HashMap<String, String>() {
{
put("q", "http://foo");
}
};
SmartContentNameSpaceContext namespaces = new SmartContentNameSpaceContext(prefMap);
xPath.setNamespaceContext(namespaces);
String expression = "//q:section[1]";
NodeList nodeList = (NodeList) xPath.compile(expression).evaluate(doc, XPathConstants.NODESET);
for (int i = 0; i < nodeList.getLength(); i++) {
Node node = nodeList.item(i);
System.out.println(node.getAttributes().item(0));
System.out.println("current element : " + node.getNodeName());
}
} catch (ParserConfigurationException e) {
e.printStackTrace();
} catch (SAXException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (XPathExpressionException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
I read about NameSpaceContext interface so I have tried
HashMap<String, String> prefMap = new HashMap<String, String>() {
{
put("q", "foo");
}
};
MyNameSpaceContext namespaces = new MyNameSpaceContext(prefMap);
xPath.setNamespaceContext(namespaces);
and here is the MyNameSpaceContext
package com;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import javax.xml.namespace.NamespaceContext;
public class MyNameSpaceContext implements NamespaceContext {
private final Map<String, String> PREF_MAP = new HashMap<String, String>();
public SmartContentNameSpaceContext(final Map<String, String> prefMap) {
PREF_MAP.putAll(prefMap);
}
#Override
public String getNamespaceURI(String prefix) {
return PREF_MAP.get(prefix);
}
#Override
public String getPrefix(String namespaceURI) {
return null;
}
#Override
public Iterator getPrefixes(String namespaceURI) {
return null;
}
}
But this isnot working?

You need to make sure you use
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
dbFactory.setNamespaceAware(true);
if you want to use XPath on a DOM tree. I have not looked further for other problems.

Related

Count number of tags in a XAML file

I have a XAML file where the tags are something like these:
<ActorName Name="Nancy" Enabled="False">
<ActorName Name="Shivani" Enabled="True">
I have attached the code below which are counting these tags but I am not able to sum it up. How to return the sum of all the tags which have Enabled="True">?
package com.test;
import java.io.IOException;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
public class Test {
public static void main(String[] args) {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder;
try {
builder = factory.newDocumentBuilder();
Document doc=builder.parse("C:\\Users\\nprabhasini\\Desktop\\ShivaSoft.xml");
NodeList nodes = doc.getElementsByTagName("ActorName");
for(int i=0;i<nodes.getLength();i++)
{
printNodeInfo(nodes.item(i));
}
} catch (ParserConfigurationException e) {
e.printStackTrace();
} catch (SAXException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
static void printNodeInfo(Node node)
{
if(node.hasAttributes())
{
NamedNodeMap rootAtt = node.getAttributes();
for(int i=0;i<rootAtt.getLength();i++)
{
String s1=rootAtt.item(i).getNodeValue();
if(s1.equalsIgnoreCase("True"))
{
i++;
System.out.println(i);
}
}
}
}
static String nodeType(short type) {
switch(type) {
case Node.ELEMENT_NODE: return "Element";
case Node.ATTRIBUTE_NODE: return "Attribute";
}
return "Unidentified";
}
}
You can use a simple XPath-1.0 expression like
count(//ActorName[#Enabled='True'])
with Java. Simply adjust the XPathExpression below to your needs. Of course, the filename could be sample.xaml or similar.
This is the sample code:
import java.io.File;
import java.io.IOException;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.w3c.dom.Node;
import org.w3c.dom.Element;
import org.xml.sax.SAXException;
public class a {
public static void main(String[] args) {
try {
File inputFile = new File("sample.xml");
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
dbFactory.setValidating(false);
DocumentBuilder dBuilder;
dBuilder=dbFactory.newDocumentBuilder();
Document doc = dBuilder.parse(inputFile);
doc.getDocumentElement().normalize();
XPath xPath = XPathFactory.newInstance().newXPath();
String XPathExpression = "count(//ActorName[#Enabled='True'])";
// Get the count of the elements with the given properties
Number cnt = (Number)xPath.compile(XPathExpression).evaluate(doc, XPathConstants.NUMBER);
// Convert the Float value to an int value and output
System.out.println("Items: "+cnt.intValue());
} catch (ParserConfigurationException e) {
e.printStackTrace();
} catch (SAXException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} catch (XPathExpressionException e) {
e.printStackTrace();
}
}
}

how to convert xml to avro without ignoring !CDATA content?

I have the following source XML file named customers.xml:
<?xml version="1.0" encoding="utf-8"?>
<p:CustomerElement xmlns:p="http://www.dog.com/customer" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:schemaLocation="http://www.dog.com/customer Customer.xsd">
<Customer>
<Sender>
<transmitDate>2016-02-21T00:00:00</transmitDate>
<transmitter>Dog ETL v2.0</transmitter>
<dealerCode><![CDATA[P020]]></dealerCode>
<DMSSystem><![CDATA[DBS]]></DMSSystem>
<DMSReleaseNumber><![CDATA[5.0]]></DMSReleaseNumber>
</Sender>
<Identifier>
<updateInd><![CDATA[A]]></updateInd>
<dealerCustNumber><![CDATA[AMADOR]]></dealerCustNumber>
<dealerCustName><![CDATA[AMADOR COMPUTERS]]></dealerCustName>
<phoneNumber><![CDATA[800 111 4444]]></phoneNumber>
<faxNumber><![CDATA[780 111 4444]]></faxNumber>
<email xsi:nil="true" />
<customerType><![CDATA[R]]></customerType>
<activeCustomerInd>false</activeCustomerInd>
<parentCustomerNumber xsi:nil="true" />
<primaryStoreNumber><![CDATA[00]]></primaryStoreNumber>
<preferredLanguage><![CDATA[ENG]]></preferredLanguage>
<dealerDateInSystem>2000-01-11T00:00:00</dealerDateInSystem>
<dealerLastUpdatedDate>2015-02-05T00:00:00</dealerLastUpdatedDate>
</Identifier>
<Location>
<address2><![CDATA[ACCOUNT FLAGGED FOR DELETION]]></address2>
<address3><![CDATA[AS PER BILL FEB AA/15]]></address3>
<city><![CDATA[CHICAGO]]></city>
<postalCode><![CDATA[Q5S 1E5]]></postalCode>
<state><![CDATA[AB]]></state>
<country><![CDATA[CA]]></country>
<location><![CDATA[FLAGGED FOR DELETION]]></location>
<addressType><![CDATA[M]]></addressType>
</Location>
<Division>
<divisionCode><![CDATA[G]]></divisionCode>
<divisionName><![CDATA[CAR]]></divisionName>
<IndustryCode>
<industryCode><![CDATA[AQ99]]></industryCode>
<primaryIndustryCodeInd>true</primaryIndustryCodeInd>
</IndustryCode>
<SalesRep>
<number><![CDATA[XXX]]></number>
<name><![CDATA[KILL ACCOUNT IN PROCESS]]></name>
<type><![CDATA[M]]></type>
<par>0</par>
<email xsi:nil="true" />
<phoneNumber><![CDATA[000 000 0000]]></phoneNumber>
</SalesRep>
</Division>
</Customer>
<Customer>
<Sender>
<transmitDate>2016-02-21T00:00:00</transmitDate>
<transmitter>Dog ETL v2.0</transmitter>
<dealerCode><![CDATA[P000]]></dealerCode>
<DMSSystem><![CDATA[DBS]]></DMSSystem>
<DMSReleaseNumber><![CDATA[5.0]]></DMSReleaseNumber>
</Sender>
<Identifier>
<updateInd><![CDATA[A]]></updateInd>
<dealerCustNumber><![CDATA[UU20888]]></dealerCustNumber>
<dealerCustName><![CDATA[ ADVERTISING AND PR]]></dealerCustName>
<phoneNumber xsi:nil="true" />
<faxNumber xsi:nil="true" />
<email xsi:nil="true" />
<customerType><![CDATA[I]]></customerType>
<activeCustomerInd>true</activeCustomerInd>
<parentCustomerNumber xsi:nil="true" />
<primaryStoreNumber><![CDATA[M2]]></primaryStoreNumber>
<preferredLanguage><![CDATA[ENG]]></preferredLanguage>
<dealerDateInSystem>2015-11-18T00:00:00</dealerDateInSystem>
<dealerLastUpdatedDate>2015-11-19T00:00:00</dealerLastUpdatedDate>
</Identifier>
<Location>
<address2><![CDATA[EQUIP]]></address2>
<city><![CDATA[ADER]]></city>
<country><![CDATA[CA]]></country>
<addressType><![CDATA[M]]></addressType>
</Location>
<Division>
<divisionCode><![CDATA[A]]></divisionCode>
<divisionName><![CDATA[AGRO]]></divisionName>
<IndustryCode>
<industryCode><![CDATA[EQ00]]></industryCode>
<primaryIndustryCodeInd>true</primaryIndustryCodeInd>
</IndustryCode>
</Division>
</Customer>
</p:CustomerElement>
I have the following java code, which parses customers.xml into individual "Customer" entities, and then attempts to convert each of them into an AVRO format:
package com.dogsoft.data.xmltoavro;
import java.io.*;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.FactoryConfigurationError;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import jdk.nashorn.internal.runtime.regexp.joni.constants.NodeType;
import org.w3c.dom.Attr;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import org.apache.avro.Protocol;
import org.apache.avro.Schema;
import org.apache.avro.file.DataFileReader;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.generic.GenericArray;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.io.DatumReader;
import org.apache.avro.io.DatumWriter;
import org.apache.avro.specific.SpecificDatumWriter;
import org.apache.avro.util.Utf8;
import org.w3c.dom.*;
import org.xml.sax.SAXException;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Pattern;
public class ParseXmlFile {
private static Protocol protocol;
public static void xmlToAvro(File xmlFile, File avroFile) throws IOException, SAXException {
try {
InputStream stream = new FileInputStream("/tmp/xml.avsc");
if (stream == null) throw new IllegalStateException("Classpath should include xml.avsc");
protocol = Protocol.parse(stream);
} catch (IOException e) {
throw new RuntimeException(e);
}
Schema schema = protocol.getType("Element");
Document doc = parse(xmlFile);
DatumWriter<GenericRecord> datumWriter = new SpecificDatumWriter<>(schema);
try (DataFileWriter<GenericRecord> fileWriter = new DataFileWriter<>(datumWriter)) {
fileWriter.create(schema, avroFile);
Object docElement = doc.getDocumentElement();
fileWriter.append(wrapElement(doc.getDocumentElement()));
}
}
private static GenericData.Record wrapElement(Element el) {
GenericData.Record record = new GenericData.Record(protocol.getType("Element"));
record.put("name", el.getNodeName());
NamedNodeMap attributeNodes = el.getAttributes();
List<GenericData.Record> attrRecords = new ArrayList<>();
for (int i = 0; i < attributeNodes.getLength(); i++) {
Attr attr = (Attr) attributeNodes.item(i);
attrRecords.add(wrapAttr(attr));
}
record.put("attributes", attrRecords);
List<Object> childArray = new ArrayList<>();
NodeList childNodes = el.getChildNodes();
for (int i = 0; i < childNodes.getLength(); i++) {
Node node = childNodes.item(i);
Object nt = node.getNodeType();
if (node.getNodeType() == Node.ELEMENT_NODE)
childArray.add(wrapElement((Element) node));
if (node.getNodeType() == Node.TEXT_NODE)
childArray.add(node.getTextContent());
}
record.put("children", childArray);
return record;
}
private static GenericData.Record wrapAttr(Attr attr) {
GenericData.Record record = new GenericData.Record(protocol.getType("Attribute"));
record.put("name", attr.getName());
record.put("value", attr.getValue());
return record;
}
private static Document parse(File file) throws IOException, SAXException {
try {
DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
return builder.parse(file);
} catch (ParserConfigurationException e) {
throw new RuntimeException(e);
}
}
public static void avroToXml(File avroFile, File xmlFile) throws IOException {
try {
InputStream stream = new FileInputStream("/tmp/xml.avsc");
if (stream == null) throw new IllegalStateException("Classpath should include xml.avsc");
protocol = Protocol.parse(stream);
} catch (IOException e) {
throw new RuntimeException(e);
}
DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(protocol.getType("Element"));
DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(avroFile, datumReader);
GenericRecord record = dataFileReader.next();
Document doc;
try {
doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
} catch (ParserConfigurationException e) {
throw new RuntimeException(e);
}
Element el = unwrapElement(record, doc);
doc.appendChild(el);
saveDocument(doc, xmlFile);
}
private static Element unwrapElement(GenericRecord record, Document doc) {
String name = "" + record.get("name");
Element el = doc.createElement(name);
#SuppressWarnings("unchecked")
GenericArray<GenericRecord> attrArray = (GenericArray<GenericRecord>) record.get("attributes");
for (GenericRecord attrRecord : attrArray)
el.setAttributeNode(unwrapAttr(attrRecord, doc));
#SuppressWarnings("unchecked")
GenericArray<Object> childArray = (GenericArray<Object>) record.get("children");
for (Object childObj : childArray) {
if (childObj instanceof GenericRecord)
el.appendChild(unwrapElement((GenericRecord) childObj, doc));
if (childObj instanceof Utf8)
el.appendChild(doc.createTextNode("" + childObj));
}
return el;
}
private static Attr unwrapAttr(GenericRecord record, Document doc) {
Attr attr = doc.createAttribute("" + record.get("name"));
attr.setValue("" + record.get("value"));
return attr;
}
private static void saveDocument(Document doc, File file) {
try {
Transformer transformer = TransformerFactory.newInstance().newTransformer();
transformer.transform(new DOMSource(doc), new StreamResult(file));
} catch (TransformerException e) {
throw new RuntimeException(e);
}
}
public static void main(String[] args)
{
Object nodeObject = null;
Node myNode = null;
Transformer transformer = null;
try
{
try {
transformer =
TransformerFactory.newInstance().newTransformer();
} catch (TransformerConfigurationException e) {
e.printStackTrace();
}
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
dbf.setNamespaceAware(true);
DocumentBuilder db = dbf.newDocumentBuilder();
Document doc = db.parse("/tmp/customers.xml");
System.out.printf("Version = %s%n", doc.getXmlVersion());
System.out.printf("Encoding = %s%n", doc.getXmlEncoding());
System.out.printf("Standalone = %b%n%n", doc.getXmlStandalone());
if (doc.hasChildNodes())
{
int customerNumber = 0;
NodeList nl = doc.getDocumentElement().getChildNodes();
for (int i = 0; i < nl.getLength(); i++) {
Node node = nl.item(i);
if (node.getNodeType() == Node.ELEMENT_NODE) {
System.out.println(node.toString());
customerNumber++;
File avroFile = new File("/tmp/customer" + customerNumber + ".avro");
File xmlFile = new File("/tmp/customer" + customerNumber + ".xml");
File xmlFile1 = new File("/tmp/customer" + customerNumber + "-foo.xml");
try {
transformer.transform(
new DOMSource(node), new StreamResult(xmlFile));
File outputFile = new File("/tmp/customer" + customerNumber + ".avro");
xmlToAvro(xmlFile, outputFile);
} catch (TransformerException e) {
e.printStackTrace();
}
}
}
}
}
catch (IOException ioe)
{
System.err.println("IOE: " + ioe);
}
catch (SAXException saxe)
{
System.err.println("SAXE: " + saxe);
}
catch (FactoryConfigurationError fce)
{
System.err.println("FCE: " + fce);
}
catch (ParserConfigurationException pce)
{
System.err.println("PCE: " + pce);
}
}
}
This code works overall, but it ignores any content, which is enclosed into
![CDATA[
tag. As it happens, most of the actual useful data in the customers.xml files is enclosed into these tags.
Is there a way to modify this code, to make it not ignore the CDATA contents?
Instead of hand-writing parser code, you might want to split the problem in two parts: first, bind XML into POJO (using JAXB or Jackson XML module); and then write POJO as Avro (using Apache Avro lib, or Jackson Avro module). All you need for that would be POJO definition that matches expected structure for data as XML and Avro. Result should be less code, and basically specifying what needs to happen and now how to do it.

Writing (overriding) same XML file that I read

I made a method to read XML files, but now I need to write or override that same XML file with the same tags, nodes and objects, but with different data inside child nodes.
First I want to make read and write working and then I have some ideas to maybe put the whole XML file into a buffer. Then I could put the whole XML under one class and just write that class again into the same XML this is just an idea, any suggestion or idea is welcome.
My XML file looks like this:
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!-- Document Root -->
<DATA>
<Settings USERNAME="test" PASSWORD="test" STATUS="active" / >
</DATA>
This is my code for reading:
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
public class Read {
private final static String SETTINGS_LINE = Settings;
public void readXML() {
try {
File xmlFile = new File("Test.xml");
DocumentBuilderFactory documentFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder documentBuilder = documentFactory.newDocumentBuilder();
Document doc = documentBuilder.parse(xmlFile);
// Normalize the XML file
doc.getDocumentElement().normalize();
NodeList nodeList = doc.getDocumentElement().getChildNodes();
for(int temp = 0; temp < nodeList.getLength(); temp++) {
Node node = nodeList.item(temp);
if(node instanceof Element && node.getNodeName() == SETTINGS_LINE) {
Element settings = (Element) node;
System.out.println("User" +settings.getAttribute("USERNAME"));
System.out.println("Password" +settings.getAttribute("PASSWORD"));
System.out.println("Status" +settings.getAttribute("STATUS"));
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
And this is the code for writing that is not working:
import java.io.File;
import java.io.IOException;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
public class Modify {
private final static String SETTINGS_LINE = "Settings";
public static void main(String argv[]) {
try {
String filepath = "test.xml";
DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder docBuilder = docFactory.newDocumentBuilder();
// Normalize the XML File
doc.getDocumentElement().normalize();
NodeList nodeList = doc.getDocumentElement().getChildNodes();
for (int i = 0; i < nodeList.getLength(); i++) {
Node node = nodeList.item(i);
if (node instanceof Element && node.getNodeName() == SETTINGS_LINE) {
Element settings = (Element) node;
if("USERNAME".equals(node.getChildNodes())){
node.setTextContent("mivnadic");
}
}
// Write the content into xml file
TransformerFactory transformerFactory = TransformerFactory.newInstance();
Transformer transformer = transformerFactory.newTransformer();
DOMSource source = new DOMSource(doc);
StreamResult result = new StreamResult(new File(filepath));
transformer.transform(source, result);
}
System.out.println("File saved");
} catch (ParserConfigurationException pce) {
pce.printStackTrace();
} catch (TransformerException tfe) {
tfe.printStackTrace();
} catch (IOException ioe) {
ioe.printStackTrace();
} catch (SAXException sae) {
sae.printStackTrace();
}
}
}

Java Xml Parsing :Distinguish two Xml Elements by missing tag

From a Webserver request i get an XML Response,which contains my needed data.
it looks like (excerpt):
<ctc:BasePrice>
<cgc:PriceAmount amountCurrencyID="EUR">18.75</cbc:PriceAmount>
<cgc:BaseQuantity quantityUnitCode="EA">1</cbc:BaseQuantity>
</ctc:BasePrice>
<ctc:BasePrice>
<cgc:PriceAmount amountCurrencyID="EUR">18.25</cbc:PriceAmount>
<cgc:BaseQuantity quantityUnitCode="EA">1</cbc:BaseQuantity>
<cgc:MinimumQuantity quantityUnitCode="EA">3</cbc:MinimumQuantity>
<ctc:BasePrice>
What i need is the first "PriceAmount" value,which could be a different price then the second.
But how can i make sure to retrieve the correct one,by "telling" the parser he should take the element which does not contain the "MinimumQuantity" Field and distinguish them ?
I read a lot in Sax etc but could find an idea how to implement a "logic" for that.
Maybe someone ran into similar problem.Thanks in advance for every hint.
You could use xpath for this. The expression "//*[local-name()='MinimumQuantity']/../*[local-name()='PriceAmount']" should return what you want. eg
import java.io.ByteArrayInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
public class XpathParser {
public static void main(String[] args) {
try {
String xml = "<root>"
+ "<ctc:BasePrice>"
+ "<cgc:PriceAmount amountCurrencyID=\"EUR\">18.75</cgc:PriceAmount>"
+ "<cgc:BaseQuantity quantityUnitCode=\"EA\">1</cgc:BaseQuantity>"
+ "</ctc:BasePrice>"
+ "<ctc:BasePrice>"
+ "<cgc:PriceAmount amountCurrencyID=\"EUR\">18.25</cgc:PriceAmount>"
+ "<cgc:BaseQuantity quantityUnitCode=\"EA\">1</cgc:BaseQuantity>"
+ "<cgc:MinimumQuantity quantityUnitCode=\"EA\">3</cgc:MinimumQuantity>"
+ "</ctc:BasePrice>"
+ "</root>";
InputStream xmlStream = new ByteArrayInputStream(xml.getBytes());
DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = builderFactory.newDocumentBuilder();
Document xmlDocument = builder.parse(xmlStream);
XPath xPath = XPathFactory.newInstance().newXPath();
String expression = "//*[local-name() = 'BasePrice' and not(descendant::*[local-name() = 'MinimumQuantity'])]/*[local-name()='PriceAmount']";
NodeList nodeList = (NodeList) xPath.compile(expression).evaluate(xmlDocument, XPathConstants.NODESET);
for (int i = 0; i < nodeList.getLength(); i++) {
System.out.println(nodeList.item(i).getFirstChild().getNodeValue());
}
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (SAXException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} catch (ParserConfigurationException e) {
e.printStackTrace();
} catch (XPathExpressionException e) {
e.printStackTrace();
}
}
}
Dom Parser way
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
public class DomParser{
public static void main(String[] args) {
try {
String xml = "<root>"
+ "<ctc:BasePrice>"
+ "<cgc:PriceAmount amountCurrencyID=\"EUR\">18.75</cgc:PriceAmount>"
+ "<cgc:BaseQuantity quantityUnitCode=\"EA\">1</cgc:BaseQuantity>"
+ "</ctc:BasePrice>"
+ "<ctc:BasePrice>"
+ "<cgc:PriceAmount amountCurrencyID=\"EUR\">18.25</cgc:PriceAmount>"
+ "<cgc:BaseQuantity quantityUnitCode=\"EA\">1</cgc:BaseQuantity>"
+ "<cgc:MinimumQuantity quantityUnitCode=\"EA\">3</cgc:MinimumQuantity>"
+ "</ctc:BasePrice>"
+ "</root>";
InputStream xmlStream = new ByteArrayInputStream(xml.getBytes());
DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = builderFactory.newDocumentBuilder();
Document xmlDocument = builder.parse(xmlStream);
xmlDocument.getDocumentElement().normalize();
NodeList nList = xmlDocument.getElementsByTagName("ctc:BasePrice");
for (int temp = 0; temp < nList.getLength(); temp++) {
Node nNode = nList.item(temp);
if (nNode.getNodeType() == Node.ELEMENT_NODE) {
Element eElement = (Element) nNode;
if(eElement.getElementsByTagName("cgc:MinimumQuantity").getLength() == 0){
System.out.println(eElement.getElementsByTagName("cgc:PriceAmount").item(0).getTextContent());
}
}
}
} catch (ParserConfigurationException e) {
e.printStackTrace();
} catch (SAXException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}

Java DOM: cannot write adapted XML to file

I have the following simplified XML:
<?xml version="1.0" encoding="UTF-8"?>
<ExportData>
<Rows>
<R>
<companyCodestringtrue>101</companyCodestringtrue>
<transactionQualifierstring>Sales</transactionQualifierstring>
<menuItemNumberlong>4302150</menuItemNumberlong>
<productQuantityinttrue>14</productQuantityinttrue>
<productValueInclVATdecimaltrue>1.90</productValueInclVATdecimaltrue>
<productValueExclVATdecimaltrue>1.775701</productValueExclVATdecimaltrue>
</R>
<R>
<companyCodestringtrue>101</companyCodestringtrue>
<transactionQualifierstring>Sales</transactionQualifierstring>
<menuItemNumberlong>333555</menuItemNumberlong>
<productQuantityinttrue>0</productQuantityinttrue>
<productValueInclVATdecimaltrue>3.90</productValueInclVATdecimaltrue>
<productValueExclVATdecimaltrue>3.775701</productValueExclVATdecimaltrue>
</R>
<R>
<companyCodestringtrue>101</companyCodestringtrue>
<transactionQualifierstring>Sales</transactionQualifierstring>
<menuItemNumberlong>1235665</menuItemNumberlong>
<productQuantityinttrue>5</productQuantityinttrue>
<productValueInclVATdecimaltrue>4.90</productValueInclVATdecimaltrue>
<productValueExclVATdecimaltrue>4.775701</productValueExclVATdecimaltrue>
</R>
</Rows>
</ExportData>
I need to delete each complete <R> element if the <productQuantityinttrue> element equals "0".
I came up with the following Java code:
package filterPositions;
import java.io.File;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.Result;
import javax.xml.transform.Source;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
public class FilterPositions {
public static String result = "";
public static void main(String[] args) throws Exception {
try {
DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder docBuilder = docFactory.newDocumentBuilder();
File filePath = new File("C:/LSA_SALES_EXPORT_1507_test_zero_qu.xml");
Document doc = docBuilder.parse(filePath);
Node rootNode = doc.getDocumentElement();
final Element element = doc.getDocumentElement();
// output new XML Document
DocumentBuilder parser = docFactory.newDocumentBuilder();
Document newdoc = parser.newDocument();
newdoc.adoptNode(traversingXML(element));
writeXmlFile(newdoc, "LSA_SALES_EXPORT_1507_test_zero_qu_OUT.xml");
System.out.println("Done...");
System.out.println("Exiting...");
} catch (Exception e) {
e.printStackTrace();
}
}
public static Element traversingXML(Element element) {
NodeList positionen = element.getElementsByTagName("R");
Element e = null;
for (int i = 0; i < positionen.getLength(); i++) {
e = (Element) positionen.item(i);
for (Node child = e.getFirstChild(); child != null; child = child.getNextSibling()) {
if (child instanceof Element && "productQuantityinttrue".equals(child.getNodeName())&& "0".equals(child.getTextContent())) {
e.getParentNode().removeChild(e);
}
}
}
System.out.println(e);
return e;
}
public static void writeXmlFile(Document doc, String filename) {
try {
// Prepare the DOM document for writing
Source source = new DOMSource();
// Prepare the output file
File file = new File(filename);
Result result = new StreamResult(file);
// Write the DOM document to the file
Transformer xformer = TransformerFactory.newInstance()
.newTransformer();
xformer.transform(source, result);
} catch (TransformerConfigurationException e) {
} catch (TransformerException e) {
}
}
}
I am not sure if my method "traversingXML" is working properly. My problem right now is that the adapted XML structure (one deleted) is not written to newdoc.
You don't copy the original document to newdoc; instead you create a new, empty XML document.
Instead, try this code:
...
final Element element = doc.getDocumentElement(); // original code up to here
traversingXML(element); // delete the node
writeXmlFile(doc, "LSA_SALES_EXPORT_1507_test_zero_qu_OUT.xml"); // save modified document

Categories

Resources