I am using XPATH to parse xml document,please find the xml below
<?xml version="1.0" encoding="UTF-8"?>
<soap:Envelope xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/">
<soap:Body>
<bookEvent>
<bookName>harry_potter</bookName>
<bookEntity>comic</bookEntity>
<bookEntityId>10987645</bookEntityId>
<bookParameter>
<name>Name1</name>
<value>value1</value>
</bookParameter>
<bookParameter>
<name>Name2</name>
<value>value2</value>
</bookParameter>
<bookParameter>
<name>Name3</name>
<value>value3</value>
</bookParameter>
<bookParameter>
<name>Name4</name>
<value>value4</value>
</bookParameter>
<bookParameter>
<name>Name5</name>
<value>value5</value>
</bookParameter>
</bookEvent>
</soap:Body>
</soap:Envelope>
Here I would like to convert BookParameters to Map like below
{"Name1":"value1","Name2":"value2" etc}
I have tried the below code and i can get a Map but not in the expected format,
try{
Map<String,String> eventParameters = new HashMap<>();
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
Document doc = builder.parse("book.xml");
XPathFactory xPathfactory = XPathFactory.newInstance();
XPath xpath = xPathfactory.newXPath();
NodeList nodeList = (NodeList)xpath.compile("//bookEvent//eventParameter").evaluate(doc, XPathConstants.NODESET);
for (int i = 0; i < nodeList.getLength(); i++) {
Node node = nodeList.item(i);
if(node.hasChildNodes()) {
NodeList childNodes = node.getChildNodes();
for (int j = 0; j < childNodes.getLength(); j++) {
Node childNode = childNodes.item(j);
if (childNode.getNodeType() == Node.ELEMENT_NODE) {
System.out.println(childNode.getNodeName()+"::"+childNode.getNodeValue()+"::"+childNode.getTextContent());
eventParameters.put(childNode.getTextContent(),childNode.getTextContent());
}
}
}
}
System.out.println("print map::"+eventParameters);
} catch (Exception e) {
e.printStackTrace();
}
The output looks like this
print map::{Name3=Name3, Name4=Name4, value5=value5, Name5=Name5, value2=value2, value1=value1, value4=value4, value3=value3, Name1=Name1, Name2=Name2}
Please somebody guide me to create a below map from the xml,Any help would be appreciable.
{"Name1":"value1","Name2":"value2" etc}
You can do it as a one-liner in XPath 3.1:
map:merge(//bookParameter!map{string(name): string(value)})
=> serialize(map{'method':'json'})
You can run XPath 3.1 from Java by installing Saxon-HE 9.8 (open source)
Use Below code :
import java.io.File;
import java.util.LinkedHashMap;
import java.util.Map;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
public class ReadXMLFile {
public static Map<String,String> hMap = new LinkedHashMap<>();
public static void main(String argv[]) {
try {
File fXmlFile = new File("C:\\Users\\jaikant\\Desktop\\QUESTION.xml");
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
Document doc = dBuilder.parse(fXmlFile);
doc.getDocumentElement().normalize();
NodeList nodeList = doc.getElementsByTagName("bookParameter");
for (int parameter = 0; parameter < nodeList.getLength(); parameter++) {
Node node = nodeList.item(parameter);
if (node.getNodeType() == Node.ELEMENT_NODE) {
Element eElement = (Element) node;
String name = eElement.getElementsByTagName("name").item(0).getTextContent();
String value = eElement.getElementsByTagName("value").item(0).getTextContent();
hMap.put(name, value);
}
}
} catch (Exception e) {
e.printStackTrace();
}
hMap.forEach((h,k) -> {
System.out.println(h + ":" + k);
});
}
}
It will print exactly what you are looking for.
Related
I want to delete a XML node that contains a PDF in Base64. This is an example:
<?xml version="1.0" encoding="UTF-8"?>
<getResult>
<id>null</id>
<pdf>ioje98fh23fjkiwf72322342</pdf>
</getResult>
First, I transform the XML in String to Document but the result is null. This is my code:
DocumentBuilder dbf = null;
Document doc = null;
try {
dbf = DocumentBuilderFactory.newInstance().newDocumentBuilder();
InputSource is = new InputSource();
is.setCharacterStream(new StringReader("<getResult><id>null</id><pdf>ioje98fh23fjkiwf72322342</pdf></getResult>"));
doc = dbf.parse(is);
NodeList children = doc. getChildNodes();
for (int i = 0; i < children.getLength(); i++) {
Node currentChild = children.item(i);
System.out.println(currentChild);
}
} catch (Exception e) {
System.out.println(e.getMessage().toString());
}
The result is always: [getResult: null]
Considering that the main node can vary but the structure does not, How can I get the PDF node?
Here is the could you could use to retrieve the data.
import java.io.StringReader;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.w3c.dom.CharacterData;
public class LabFour {
public static void main(String[] args) {
DocumentBuilder dbf = null;
Document doc = null;
try {
dbf = DocumentBuilderFactory.newInstance().newDocumentBuilder();
InputSource is = new InputSource();
is.setCharacterStream(
new StringReader("<getResult><id>null</id><pdf>ioje98fh23fjkiwf72322342</pdf></getResult>"));
doc = dbf.parse(is);
NodeList nodes = doc.getElementsByTagName("getResult");
for (int i = 0; i < nodes.getLength(); i++) {
Element element = (Element) nodes.item(i);
NodeList name = element.getElementsByTagName("id");
Element line = (Element) name.item(0);
System.out.println("id: " + getCharacterDataFromElement(line));
NodeList pdf = element.getElementsByTagName("pdf");
line = (Element) title.item(0);
System.out.println("pdf: " + getCharacterDataFromElement(pdf));
}
} catch (Exception e) {
e.printStackTrace();
}
}
public static String getCharacterDataFromElement(Element e) {
Node child = e.getFirstChild();
if (child instanceof CharacterData) {
CharacterData cd = (CharacterData) child;
return cd.getData();
}
return "?";
}
}
SimpleXml can do it:
final SimpleXml simple = new SimpleXml();
final Element element = simple.fromXml(data);
element.children.remove(1);
System.out.println(simple.domToXml(element));
Will output:
<getResult><id>null</id></getResult>
From maven central:
<dependency>
<groupId>com.github.codemonstur</groupId>
<artifactId>simplexml</artifactId>
<version>1.4.0</version>
</dependency>
I have an xml as follows
<?xml version="1.0" encoding="ISO-8859-1"?><TXNEXP FileDate="2017-05-23" FileName="/cortex/tsd/out/OPTSKRtxnexp20170523.xml" Instcode="SKR" TotNumTxns="74330">
<AUTHADV>
<LOCALDATE>2017-05-22</LOCALDATE>
<LOCALTIME>200011</LOCALTIME>
<PAN>336890380<PAN>
</AUTHADV>
<AUTHREV>
<LOCALDATE>2017-05-22</LOCALDATE>
<LOCALTIME>200011</LOCALTIME>
<PAN>336890380<PAN>
</AUTHREV>
<FINAL>
<LOCALDATE>2017-05-22</LOCALDATE>
<LOCALTIME>200011</LOCALTIME>
<PAN>336890380<PAN>
</FINAL>
</TXNEXP>
Now, I am modifying the value of PAN tag and writing it back to the xml but I am not able to do so for all the PAN tags.
Here is what I am doing.
NodeList node = doc.getElementsByTagName("TXNEXP");
Element emp = null;
for (int i = 0; i < node.getLength(); i++) {
emp = (Element) node.item(i);
Node name = emp.getElementsByTagName("PAN").item(0).getFirstChild();
//Modifying the tag
}
From the above code only PAN under AUTHADV tag gets modified and the rest two values don't change.
How can I ensure all the PAN tags to get modified ?
This is not the prettiest solution but after you fix the missing slash in the PAN closing tags this will work.
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.DocumentBuilder;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.w3c.dom.Node;
import org.w3c.dom.Element;
import java.io.File;
class Extract {
public static void main(String[] args) {
try {
File fXmlFile = new File("data.xml");
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
Document doc = dBuilder.parse(fXmlFile);
NodeList topNodes = doc.getElementsByTagName("TXNEXP");
for (int i = 0; i < topNodes.getLength(); i++) {
NodeList middleNodes = topNodes.item(i).getChildNodes();
for (int j = 0; j < middleNodes.getLength(); j++) {
try {
NodeList theNodes = ((Element)middleNodes.item(j)).getElementsByTagName("PAN");
System.out.println(theNodes.item(0).getFirstChild().getNodeValue());
if (j == 1) {
// modify a value
theNodes.item(0).getFirstChild().setNodeValue("4567");
System.out.println(theNodes.item(0).getFirstChild().getNodeValue());
}
} catch (ClassCastException e) {}
}
}
} catch (Exception e) {
System.out.println(e);
}
}
}
For a slightly better approach you could use XPaths.
import javax.xml.xpath.*;
class Extract {
public static void main(String[] args) {
try {
File fXmlFile = new File("data.xml");
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
Document doc = dBuilder.parse(fXmlFile);
XPath xpath = XPathFactory.newInstance().newXPath();
NodeList nodes = (NodeList)xpath.evaluate("/TXNEXP/*/PAN", doc, XPathConstants.NODESET);
for (int n = 0; n < nodes.getLength(); n++) {
System.out.println(nodes.item(n).getFirstChild().getNodeValue());
if (n == 1) {
nodes.item(n).getFirstChild().setNodeValue("4567");
System.out.println(nodes.item(n).getFirstChild().getNodeValue());
}
}
} catch (Exception e) {
System.out.println(e);
}
}
}
<?xml version="1.0" encoding="UTF-8"?>
<JDF DescriptiveName="DescriptiveName" ID="n0001" JobID="1101-CCC-0" JobPartID="1" ProjectID="">
<Comment Name="Materialnummer">XXXXXXX</Comment>
<NodeInfo LastEnd="2014-03-12T18:00:00+01:00">
<EmployeeRef rRef="EMPCSR"/>
</NodeInfo>
<CustomerInfo CustomerID="1740">
</CustomerInfo>
<ResourcePool>
</ResourcePool>
<ResourceLinkPool>
</ResourceLinkPool>
<JDF Category="FinalImaging" ID="n0002" Status="Waiting" Type="ProcessGroup" Types="XXX">
<ResourcePool>
</ResourcePool>
<ResourceLinkPool>
</ResourceLinkPool>
</JDF>
<JDF ID="n0002" Status="Waiting" Type="ProcessGroup" Types="PrePressPreparation">
<ResourcePool>
</ResourcePool>
<ResourceLinkPool>
</ResourceLinkPool>
</JDF>
</JDF>
How do I get the root element value. For this example I want to get the DescriptiveName,ID,JobID and ProjectID. I managed to read other values but stuck in root emlement. Please advice.
EDITED
DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = builderFactory.newDocumentBuilder();
Document xmlDocument = builder.parse(file);
XPath xPath = XPathFactory.newInstance().newXPath();
//System.out.println("*************************");
String expression = "/JDF";
NodeList nodeList = (NodeList) xPath.compile(expression).evaluate(xmlDocument, XPathConstants.NODESET);
A sample here :
import java.io.File;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
public class Main {
public static void main(String[] argv) throws Exception{
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setValidating(true);
factory.setExpandEntityReferences(false);
Document doc = factory.newDocumentBuilder().parse(new File("filename"));
Element root = null;
NodeList list = doc.getChildNodes();
for (int i = 0; i < list.getLength(); i++) {
if (list.item(i) instanceof Element) {
root = (Element) list.item(i);
break;
}
}
root = doc.getDocumentElement();
}
}
You can get the attributes from root object
I have an xml file in the following pattern which contains a few Complex Empty Elements(elements with no content, only attributes).
<items>
<item id="0" name="a" />
<item id="1" name="b" />
</items>
I'm at lose to parse the attributes from them. This is what I have done so far :
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
Document document = builder.parse(inputStream);
Element itemsElement = document.getDocumentElement();
if (itemsElement.getTagName().equals(TAG_ITEMS)) {
NodeList nodeList = itemsElement.getChildNodes();
for (int i = 0; i < nodeList.getLength(); i++) {
// process each item node
Node node = nodeList.item(i);
if (node.getNodeType() == Node.TEXT_NODE) { // Is this the right way?
Text text = (Text) node;
// Do stuff with attributes
}
}
}
I cannot cast these Text nodes to Element nodes and get attributes, I cannot get attributes from node using getAttributes - NPE at NamedNodeMap attributes.getLength(), I cannot cast it to Text and get attributes. How can I parse the attributes?
You are not interested in the text context of the nodes inside of items but in the attributes of the nodes item. you could proceed as follow:
//process each item node
Node node = nodeList.item(i);
if (node.getNodeName().equals("item")) {
NamedNodeMap attributes = node.getAttributes();
System.out.printf("id=%s, name=%s%n",
attributes.getNamedItem("id").getTextContent(),
attributes.getNamedItem("name").getTextContent());
}
This would print:
id=0, name=a
id=1, name=b
Assuming you want to get the indiviudal attributes of the nodes you need to one of two things (or both depending on your needs)...
You need to test if the current node is an ELEMENT_NODE or if the current node's name is equal to item (assuming all the node names are the same), for example...
import java.io.InputStream;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.Text;
public class Test {
public static final String TAG_ITEMS = "items";
public static void main(String[] args) {
try (InputStream is = Test.class.getResourceAsStream("/Test.xml")) {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
Document document = builder.parse(is);
Element itemsElement = document.getDocumentElement();
if (itemsElement.getTagName().equals(TAG_ITEMS)) {
NodeList nodeList = itemsElement.getChildNodes();
for (int i = 0; i < nodeList.getLength(); i++) {
Node node = nodeList.item(i);
if (node.getNodeType() == Node.ELEMENT_NODE) {
NamedNodeMap attributes = node.getAttributes();
Node idAtt = attributes.getNamedItem("id");
Node nameAtt = attributes.getNamedItem("name");
System.out.println("id = " + idAtt.getNodeValue());
System.out.println("name = " + nameAtt.getNodeValue());
}
}
}
} catch (Exception exp) {
exp.printStackTrace();
}
}
}
Which will output...
id = 0
name = a
id = 1
name = b
All of this could be greatly reduced by using XPath, for example, if all the item nodes are the same name, then you could just use
/items/item
As the query. If the node names are different, but the attributes are the same, then you could use
/items/*[#id]
which will list all the nodes under items which has an id attribute, or
/items/*[#name]
which will list all the nodes under items which has an name attribute...
import java.io.InputStream;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathFactory;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
public class Test {
public static void main(String[] args) {
try (InputStream is = Test.class.getResourceAsStream("/Test.xml")) {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
Document document = builder.parse(is);
XPath xpath = XPathFactory.newInstance().newXPath();
XPathExpression expression = xpath.compile("/items/item");
NodeList nodes = (NodeList) expression.evaluate(document, XPathConstants.NODESET);
process(nodes);
expression = xpath.compile("/items/*[#id]");
nodes = (NodeList) expression.evaluate(document, XPathConstants.NODESET);
process(nodes);
expression = xpath.compile("/items/*[#name]");
nodes = (NodeList) expression.evaluate(document, XPathConstants.NODESET);
process(nodes);
} catch (Exception exp) {
exp.printStackTrace();
}
}
protected static void process(NodeList nodes) {
for (int index = 0; index < nodes.getLength(); index++) {
Node item = nodes.item(index);
NamedNodeMap attributes = item.getAttributes();
Node idAtt = attributes.getNamedItem("id");
Node nameAtt = attributes.getNamedItem("name");
System.out.println("id = " + idAtt.getNodeValue() + "; name = " + nameAtt.getNodeValue());
}
}
}
I would like to check is code = "ABC" exists in my xml file using xPath.Can you please suggest me some methods for it?
<metadata>
<codes class = "class1">
<code code = "ABC">
<detail "blah blah"/>
</code>
</codes>
<codes class = "class2">
<code code = "123">
<detail "blah blah"/>
</code>
</codes>
</metadata>
[EDIT]
I did following. It reuturns null.
XPath xPath = XPathFactory.newInstance().newXPath();
XPathExpression expr = xPath.compile("//codes/code[# code ='ABC']");
Object result = expr.evaluate(doc, XPathConstants.NODESET);
NodeList nodes = (NodeList) result;
for (int i = 0; i < nodes.getLength(); i++) {
System.out.println("nodes: "+ nodes.item(i).getNodeValue());
}
I don't know how you tested your code because <detail "blah blah"/> is an incorrect xml construct it should be <detail x="blah blah"/> i.e. a name-value pair !!
For the XPath Expression "//codes/code[# code ='ABC']" the nodes.item(i).getNodeValue()) is going to be null because it will return an Element. See the below Javadoc comment:
A working sample:
import java.io.ByteArrayInputStream;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathFactory;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
public class Test
{
public static void main(String[] args) throws Exception
{
Document doc = getDoc();
XPath xPath = XPathFactory.newInstance().newXPath();
XPathExpression expr = xPath.compile("//codes/code[#code ='ABC']");
Object result = expr.evaluate(doc, XPathConstants.NODESET);
NodeList nodes = (NodeList) result;
System.out.println("Have I found anything? " + (nodes.getLength() > 0 ? "Yes": "No"));
for (int i = 0; i < nodes.getLength(); i++) {
System.out.println("nodes: "+ nodes.item(i).getNodeValue());
}
}
private static Document getDoc()
{
String xml = "<metadata>"+
"<codes class = 'class1'>"+
"<code code='ABC'>"+
"<detail x='blah blah'/>"+
"</code>"+
"</codes>"+
"<codes class = 'class2'>"+
"<code code = '123'>"+
"<detail x='blah blah'/>"+
"</code>"+
"</codes>"+
"</metadata>";
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
try {
DocumentBuilder db = dbf.newDocumentBuilder();
Document dom = db.parse(new ByteArrayInputStream(xml.getBytes()));
return dom;
}catch(Exception pce) {
pce.printStackTrace();
}
return null;
}
}