Related
I want to read XML data using XPath in Java, so for the information I have gathered I am not able to parse XML according to my requirement.
here is what I want to do:
Get XML file from online via its URL, then use XPath to parse it, I want to create two methods in it. One is in which I enter a specific node attribute id, and I get all the child nodes as result, and second is suppose I just want to get a specific child node value only
<?xml version="1.0"?>
<howto>
<topic name="Java">
<url>http://www.rgagnonjavahowto.htm</url>
<car>taxi</car>
</topic>
<topic name="PowerBuilder">
<url>http://www.rgagnon/pbhowto.htm</url>
<url>http://www.rgagnon/pbhowtonew.htm</url>
</topic>
<topic name="Javascript">
<url>http://www.rgagnon/jshowto.htm</url>
</topic>
<topic name="VBScript">
<url>http://www.rgagnon/vbshowto.htm</url>
</topic>
</howto>
In above example I want to read all the elements if I search via #name and also one function in which I just want the url from #name 'Javascript' only return one node element.
You need something along the lines of this:
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
Document doc = builder.parse(<uri_as_string>);
XPathFactory xPathfactory = XPathFactory.newInstance();
XPath xpath = xPathfactory.newXPath();
XPathExpression expr = xpath.compile(<xpath_expression>);
Then you call expr.evaluate() passing in the document defined in that code and the return type you are expecting, and cast the result to the object type of the result.
If you need help with a specific XPath expressions, you should probably ask it as separate questions (unless that was your question in the first place here - I understood your question to be how to use the API in Java).
Edit: (Response to comment): This XPath expression will get you the text of the first URL element under PowerBuilder:
/howto/topic[#name='PowerBuilder']/url/text()
This will get you the second:
/howto/topic[#name='PowerBuilder']/url[2]/text()
You get that with this code:
expr.evaluate(doc, XPathConstants.STRING);
If you don't know how many URLs are in a given node, then you should rather do something like this:
XPathExpression expr = xpath.compile("/howto/topic[#name='PowerBuilder']/url");
NodeList nl = (NodeList) expr.evaluate(doc, XPathConstants.NODESET);
And then loop over the NodeList.
You can try this.
XML Document
Save as employees.xml.
<?xml version="1.0" encoding="UTF-8"?>
<Employees>
<Employee id="1">
<age>29</age>
<name>Pankaj</name>
<gender>Male</gender>
<role>Java Developer</role>
</Employee>
<Employee id="2">
<age>35</age>
<name>Lisa</name>
<gender>Female</gender>
<role>CEO</role>
</Employee>
<Employee id="3">
<age>40</age>
<name>Tom</name>
<gender>Male</gender>
<role>Manager</role>
</Employee>
<Employee id="4">
<age>25</age>
<name>Meghan</name>
<gender>Female</gender>
<role>Manager</role>
</Employee>
</Employees>
Parser class
The class have following methods
List item
A Method that will return the Employee Name for input ID.
A Method that will return list of Employees Name with age greater than the input age.
A Method that will return list of Female Employees Name.
Source Code
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
public class Parser {
public static void main(String[] args) {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setNamespaceAware(true);
DocumentBuilder builder;
Document doc = null;
try {
builder = factory.newDocumentBuilder();
doc = builder.parse("employees.xml");
// Create XPathFactory object
XPathFactory xpathFactory = XPathFactory.newInstance();
// Create XPath object
XPath xpath = xpathFactory.newXPath();
String name = getEmployeeNameById(doc, xpath, 4);
System.out.println("Employee Name with ID 4: " + name);
List<String> names = getEmployeeNameWithAge(doc, xpath, 30);
System.out.println("Employees with 'age>30' are:" + Arrays.toString(names.toArray()));
List<String> femaleEmps = getFemaleEmployeesName(doc, xpath);
System.out.println("Female Employees names are:" +
Arrays.toString(femaleEmps.toArray()));
} catch (ParserConfigurationException | SAXException | IOException e) {
e.printStackTrace();
}
}
private static List<String> getFemaleEmployeesName(Document doc, XPath xpath) {
List<String> list = new ArrayList<>();
try {
//create XPathExpression object
XPathExpression expr =
xpath.compile("/Employees/Employee[gender='Female']/name/text()");
//evaluate expression result on XML document
NodeList nodes = (NodeList) expr.evaluate(doc, XPathConstants.NODESET);
for (int i = 0; i < nodes.getLength(); i++)
list.add(nodes.item(i).getNodeValue());
} catch (XPathExpressionException e) {
e.printStackTrace();
}
return list;
}
private static List<String> getEmployeeNameWithAge(Document doc, XPath xpath, int age) {
List<String> list = new ArrayList<>();
try {
XPathExpression expr =
xpath.compile("/Employees/Employee[age>" + age + "]/name/text()");
NodeList nodes = (NodeList) expr.evaluate(doc, XPathConstants.NODESET);
for (int i = 0; i < nodes.getLength(); i++)
list.add(nodes.item(i).getNodeValue());
} catch (XPathExpressionException e) {
e.printStackTrace();
}
return list;
}
private static String getEmployeeNameById(Document doc, XPath xpath, int id) {
String name = null;
try {
XPathExpression expr =
xpath.compile("/Employees/Employee[#id='" + id + "']/name/text()");
name = (String) expr.evaluate(doc, XPathConstants.STRING);
} catch (XPathExpressionException e) {
e.printStackTrace();
}
return name;
}
}
Getting started example:
xml file:
<inventory>
<book year="2000">
<title>Snow Crash</title>
<author>Neal Stephenson</author>
<publisher>Spectra</publisher>
<isbn>0553380958</isbn>
<price>14.95</price>
</book>
<book year="2005">
<title>Burning Tower</title>
<author>Larry Niven</author>
<author>Jerry Pournelle</author>
<publisher>Pocket</publisher>
<isbn>0743416910</isbn>
<price>5.99</price>
</book>
<book year="1995">
<title>Zodiac</title>
<author>Neal Stephenson</author>
<publisher>Spectra</publisher>
<isbn>0553573862</isbn>
<price>7.50</price>
</book>
<!-- more books... -->
</inventory>
Java code:
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
try {
DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder docBuilder = docBuilderFactory.newDocumentBuilder();
Document doc = docBuilder.parse (new File("c:\\tmp\\my.xml"));
// normalize text representation
doc.getDocumentElement().normalize();
System.out.println ("Root element of the doc is " + doc.getDocumentElement().getNodeName());
NodeList listOfBooks = doc.getElementsByTagName("book");
int totalBooks = listOfBooks.getLength();
System.out.println("Total no of books : " + totalBooks);
for(int i=0; i<listOfBooks.getLength() ; i++) {
Node firstBookNode = listOfBooks.item(i);
if(firstBookNode.getNodeType() == Node.ELEMENT_NODE) {
Element firstElement = (Element)firstBookNode;
System.out.println("Year :"+firstElement.getAttribute("year"));
//-------
NodeList firstNameList = firstElement.getElementsByTagName("title");
Element firstNameElement = (Element)firstNameList.item(0);
NodeList textFNList = firstNameElement.getChildNodes();
System.out.println("title : " + ((Node)textFNList.item(0)).getNodeValue().trim());
}
}//end of for loop with s var
} catch (SAXParseException err) {
System.out.println ("** Parsing error" + ", line " + err.getLineNumber () + ", uri " + err.getSystemId ());
System.out.println(" " + err.getMessage ());
} catch (SAXException e) {
Exception x = e.getException ();
((x == null) ? e : x).printStackTrace ();
} catch (Throwable t) {
t.printStackTrace ();
}
Here is an example of processing xpath with vtd-xml... for heavy duty XML processing it is second to none. here is the a recent paper on this subject Processing XML with Java – A Performance Benchmark
import com.ximpleware.*;
public class changeAttrVal {
public static void main(String s[]) throws VTDException,java.io.UnsupportedEncodingException,java.io.IOException{
VTDGen vg = new VTDGen();
if (!vg.parseFile("input.xml", false))
return;
VTDNav vn = vg.getNav();
AutoPilot ap = new AutoPilot(vn);
XMLModifier xm = new XMLModifier(vn);
ap.selectXPath("/*/place[#id=\"p14\" and #initialMarking=\"2\"]/#initialMarking");
int i=0;
while((i=ap.evalXPath())!=-1){
xm.updateToken(i+1, "499");// change initial marking from 2 to 499
}
xm.output("new.xml");
}
}
If you have a xml like below
<e:Envelope
xmlns:d = "http://www.w3.org/2001/XMLSchema"
xmlns:e = "http://schemas.xmlsoap.org/soap/envelope/"
xmlns:wn0 = "http://systinet.com/xsd/SchemaTypes/"
xmlns:i = "http://www.w3.org/2001/XMLSchema-instance">
<e:Header>
<Friends>
<friend>
<Name>Testabc</Name>
<Age>12121</Age>
<Phone>Testpqr</Phone>
</friend>
</Friends>
</e:Header>
<e:Body>
<n0:ForAnsiHeaderOperResponse xmlns:n0 = "http://systinet.com/wsdl/com/magicsoftware/ibolt/localhost/ForAnsiHeader/ForAnsiHeaderImpl#ForAnsiHeaderOper?KExqYXZhL2xhbmcvU3RyaW5nOylMamF2YS9sYW5nL1N0cmluZzs=">
<response i:type = "d:string">12--abc--pqr</response>
</n0:ForAnsiHeaderOperResponse>
</e:Body>
</e:Envelope>
and wanted to extract the below xml
<e:Header>
<Friends>
<friend>
<Name>Testabc</Name>
<Age>12121</Age>
<Phone>Testpqr</Phone>
</friend>
</Friends>
</e:Header>
The below code helps to achieve the same
public static void main(String[] args) {
File fXmlFile = new File("C://Users//abhijitb//Desktop//Test.xml");
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
Document document;
Node result = null;
try {
document = dbf.newDocumentBuilder().parse(fXmlFile);
XPath xPath = XPathFactory.newInstance().newXPath();
String xpathStr = "//Envelope//Header";
result = (Node) xPath.evaluate(xpathStr, document, XPathConstants.NODE);
System.out.println(nodeToString(result));
} catch (SAXException | IOException | ParserConfigurationException | XPathExpressionException
| TransformerException e) {
e.printStackTrace();
}
}
private static String nodeToString(Node node) throws TransformerException {
StringWriter buf = new StringWriter();
Transformer xform = TransformerFactory.newInstance().newTransformer();
xform.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
xform.transform(new DOMSource(node), new StreamResult(buf));
return (buf.toString());
}
Now if you want only the xml like below
<Friends>
<friend>
<Name>Testabc</Name>
<Age>12121</Age>
<Phone>Testpqr</Phone>
</friend>
</Friends>
You need to change the
String xpathStr = "//Envelope//Header"; to String xpathStr = "//Envelope//Header/*";
This shows you how to
Read in an XML file to a DOM
Filter out a set of Nodes with XPath
Perform a certain action on each of the extracted Nodes.
We will call the code with the following statement
processFilteredXml(xmlIn, xpathExpr,(node) -> {/*Do something...*/;});
In our case we want to print some creatorNames from a book.xml using "//book/creators/creator/creatorName" as xpath to perform a printNode action on each Node that matches the XPath.
Full code
#Test
public void printXml() {
try (InputStream in = readFile("book.xml")) {
processFilteredXml(in, "//book/creators/creator/creatorName", (node) -> {
printNode(node, System.out);
});
} catch (Exception e) {
throw new RuntimeException(e);
}
}
private InputStream readFile(String yourSampleFile) {
return Thread.currentThread().getContextClassLoader().getResourceAsStream(yourSampleFile);
}
private void processFilteredXml(InputStream in, String xpath, Consumer<Node> process) {
Document doc = readXml(in);
NodeList list = filterNodesByXPath(doc, xpath);
for (int i = 0; i < list.getLength(); i++) {
Node node = list.item(i);
process.accept(node);
}
}
public Document readXml(InputStream xmlin) {
try {
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db = dbf.newDocumentBuilder();
return db.parse(xmlin);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
private NodeList filterNodesByXPath(Document doc, String xpathExpr) {
try {
XPathFactory xPathFactory = XPathFactory.newInstance();
XPath xpath = xPathFactory.newXPath();
XPathExpression expr = xpath.compile(xpathExpr);
Object eval = expr.evaluate(doc, XPathConstants.NODESET);
return (NodeList) eval;
} catch (Exception e) {
throw new RuntimeException(e);
}
}
private void printNode(Node node, PrintStream out) {
try {
Transformer transformer = TransformerFactory.newInstance().newTransformer();
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2");
StreamResult result = new StreamResult(new StringWriter());
DOMSource source = new DOMSource(node);
transformer.transform(source, result);
String xmlString = result.getWriter().toString();
out.println(xmlString);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
Prints
<creatorName>Fosmire, Michael</creatorName>
<creatorName>Wertz, Ruth</creatorName>
<creatorName>Purzer, Senay</creatorName>
For book.xml
<book>
<creators>
<creator>
<creatorName>Fosmire, Michael</creatorName>
<givenName>Michael</givenName>
<familyName>Fosmire</familyName>
</creator>
<creator>
<creatorName>Wertz, Ruth</creatorName>
<givenName>Ruth</givenName>
<familyName>Wertz</familyName>
</creator>
<creator>
<creatorName>Purzer, Senay</creatorName>
<givenName>Senay</givenName>
<familyName>Purzer</familyName>
</creator>
</creators>
<titles>
<title>Critical Engineering Literacy Test (CELT)</title>
</titles>
</book>
Expanding on the excellent answer by #bluish and #Yishai, here is how you make the NodeLists and node attributes support iterators, i.e. the for(Node n: nodelist) interface.
Use it like:
NodeList nl = ...
for(Node n : XmlUtil.asList(nl))
{...}
and
Node n = ...
for(Node attr : XmlUtil.asList(n.getAttributes())
{...}
The code:
/**
* Converts NodeList to an iterable construct.
* From: https://stackoverflow.com/a/19591302/779521
*/
public final class XmlUtil {
private XmlUtil() {}
public static List<Node> asList(NodeList n) {
return n.getLength() == 0 ? Collections.<Node>emptyList() : new NodeListWrapper(n);
}
static final class NodeListWrapper extends AbstractList<Node> implements RandomAccess {
private final NodeList list;
NodeListWrapper(NodeList l) {
this.list = l;
}
public Node get(int index) {
return this.list.item(index);
}
public int size() {
return this.list.getLength();
}
}
public static List<Node> asList(NamedNodeMap n) {
return n.getLength() == 0 ? Collections.<Node>emptyList() : new NodeMapWrapper(n);
}
static final class NodeMapWrapper extends AbstractList<Node> implements RandomAccess {
private final NamedNodeMap list;
NodeMapWrapper(NamedNodeMap l) {
this.list = l;
}
public Node get(int index) {
return this.list.item(index);
}
public int size() {
return this.list.getLength();
}
}
}
Read XML file using XPathFactory, SAXParserFactory and StAX (JSR-173).
Using XPath get node and its child data.
public static void main(String[] args) {
String xml = "<soapenv:Body xmlns:soapenv='http://schemas.xmlsoap.org/soap/envelope/'>"
+ "<Yash:Data xmlns:Yash='http://Yash.stackoverflow.com/Services/Yash'>"
+ "<Yash:Tags>Java</Yash:Tags><Yash:Tags>Javascript</Yash:Tags><Yash:Tags>Selenium</Yash:Tags>"
+ "<Yash:Top>javascript</Yash:Top><Yash:User>Yash-777</Yash:User>"
+ "</Yash:Data></soapenv:Body>";
String jsonNameSpaces = "{'soapenv':'http://schemas.xmlsoap.org/soap/envelope/',"
+ "'Yash':'http://Yash.stackoverflow.com/Services/Yash'}";
String xpathExpression = "//Yash:Data";
Document doc1 = getDocument(false, "fileName", xml);
getNodesFromXpath(doc1, xpathExpression, jsonNameSpaces);
System.out.println("\n===== ***** =====");
Document doc2 = getDocument(true, "./books.xml", xml);
getNodesFromXpath(doc2, "//person", "{}");
}
static Document getDocument( boolean isFileName, String fileName, String xml ) {
Document doc = null;
try {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setValidating(false);
factory.setNamespaceAware(true);
factory.setIgnoringComments(true);
factory.setIgnoringElementContentWhitespace(true);
DocumentBuilder builder = factory.newDocumentBuilder();
if( isFileName ) {
File file = new File( fileName );
FileInputStream stream = new FileInputStream( file );
doc = builder.parse( stream );
} else {
doc = builder.parse( string2Source( xml ) );
}
} catch (SAXException | IOException e) {
e.printStackTrace();
} catch (ParserConfigurationException e) {
e.printStackTrace();
}
return doc;
}
/**
* ELEMENT_NODE[1],ATTRIBUTE_NODE[2],TEXT_NODE[3],CDATA_SECTION_NODE[4],
* ENTITY_REFERENCE_NODE[5],ENTITY_NODE[6],PROCESSING_INSTRUCTION_NODE[7],
* COMMENT_NODE[8],DOCUMENT_NODE[9],DOCUMENT_TYPE_NODE[10],DOCUMENT_FRAGMENT_NODE[11],NOTATION_NODE[12]
*/
public static void getNodesFromXpath( Document doc, String xpathExpression, String jsonNameSpaces ) {
try {
XPathFactory xpf = XPathFactory.newInstance();
XPath xpath = xpf.newXPath();
JSONObject namespaces = getJSONObjectNameSpaces(jsonNameSpaces);
if ( namespaces.size() > 0 ) {
NamespaceContextImpl nsContext = new NamespaceContextImpl();
Iterator<?> key = namespaces.keySet().iterator();
while (key.hasNext()) { // Apache WebServices Common Utilities
String pPrefix = key.next().toString();
String pURI = namespaces.get(pPrefix).toString();
nsContext.startPrefixMapping(pPrefix, pURI);
}
xpath.setNamespaceContext(nsContext );
}
XPathExpression compile = xpath.compile(xpathExpression);
NodeList nodeList = (NodeList) compile.evaluate(doc, XPathConstants.NODESET);
displayNodeList(nodeList);
} catch (XPathExpressionException e) {
e.printStackTrace();
}
}
static void displayNodeList( NodeList nodeList ) {
for (int i = 0; i < nodeList.getLength(); i++) {
Node node = nodeList.item(i);
String NodeName = node.getNodeName();
NodeList childNodes = node.getChildNodes();
if ( childNodes.getLength() > 1 ) {
for (int j = 0; j < childNodes.getLength(); j++) {
Node child = childNodes.item(j);
short nodeType = child.getNodeType();
if ( nodeType == 1 ) {
System.out.format( "\n\t Node Name:[%s], Text[%s] ", child.getNodeName(), child.getTextContent() );
}
}
} else {
System.out.format( "\n Node Name:[%s], Text[%s] ", NodeName, node.getTextContent() );
}
}
}
static InputSource string2Source( String str ) {
InputSource inputSource = new InputSource( new StringReader( str ) );
return inputSource;
}
static JSONObject getJSONObjectNameSpaces( String jsonNameSpaces ) {
if(jsonNameSpaces.indexOf("'") > -1) jsonNameSpaces = jsonNameSpaces.replace("'", "\"");
JSONParser parser = new JSONParser();
JSONObject namespaces = null;
try {
namespaces = (JSONObject) parser.parse(jsonNameSpaces);
} catch (ParseException e) {
e.printStackTrace();
}
return namespaces;
}
XML Document
<?xml version="1.0" encoding="UTF-8"?>
<book>
<person>
<first>Yash</first>
<last>M</last>
<age>22</age>
</person>
<person>
<first>Bill</first>
<last>Gates</last>
<age>46</age>
</person>
<person>
<first>Steve</first>
<last>Jobs</last>
<age>40</age>
</person>
</book>
Out put for the given XPathExpression:
String xpathExpression = "//person/first";
/*OutPut:
Node Name:[first], Text[Yash]
Node Name:[first], Text[Bill]
Node Name:[first], Text[Steve] */
String xpathExpression = "//person";
/*OutPut:
Node Name:[first], Text[Yash]
Node Name:[last], Text[M]
Node Name:[age], Text[22]
Node Name:[first], Text[Bill]
Node Name:[last], Text[Gates]
Node Name:[age], Text[46]
Node Name:[first], Text[Steve]
Node Name:[last], Text[Jobs]
Node Name:[age], Text[40] */
String xpathExpression = "//Yash:Data";
/*OutPut:
Node Name:[Yash:Tags], Text[Java]
Node Name:[Yash:Tags], Text[Javascript]
Node Name:[Yash:Tags], Text[Selenium]
Node Name:[Yash:Top], Text[javascript]
Node Name:[Yash:User], Text[Yash-777] */
See this link for our own Implementation of NamespaceContext
I am very new to XML parsing. I am trying to read the XML file from a shared drive on my computer and moving them to another shared drive. I have the below XML file. i am trying to read the Test.pdf value from this XML document
<?xml version="1.0" encoding="utf-8" ?>
<xml>
<IndexData FileName="Test.pdf">
<AttachmentID>3221929</AttachmentID>
<URI>test234555..pdf</URI>
<postmarkDate>2018-07-02T12:52:00.9</postmarkDate>
<pin>305270036</pin>
<scanDate>2018-07-02T12:52:00.9</scanDate>
<UserLogin>admin</UserLogin>
</IndexData>
<IndexData FileName="Test2.pdf">
<AttachmentID>3221931</AttachmentID>
<URI>Appp2.pdf</URI>
<postmarkDate>2018-07-02T14:19:22.5</postmarkDate>
<pin>305270036</pin>
<scanDate>2018-07-02T14:19:22.5</scanDate>
<UserLogin>admin</UserLogin>
</IndexData>
</xml>
I tried importing import org.w3c.dom.Node; for this. Below is my code:
String processXml(Node doc) {
String fileName = null;
try {
DfLogger.debug(this, "Loading: " + doc.getNodeName(), null, null);
Map<String, String> indexData = getXmlData(doc);
fileName = indexData.get("IndexData FileName");
if (new File(fileName).exists()) {
import(fileName, indexData);
}
} catch (Exception ex) {
DfLogger.error(this, "Error processing document.", null, ex);
return null;
}
return fileName;
}
My value for FileName is always NULL when I am trying to read the value by doing this:
fileName = indexData.get("IndexData FileName");
below is my getXmlData method.
protected Map<String, String> getXmlData(Node xmlDataNode) {
Map<String, String> xmlData = new HashMap<>();
NodeList nodeList = xmlDataNode.getChildNodes();
for (int i = 0; i < nodeList.getLength(); i++) {
Node node = nodeList.item(i);
if (node.getNodeType() == Node.ELEMENT_NODE) {
xmlData.put(node.getNodeName(), node.getTextContent().trim());
}
}
return xmlData;
}
The caller method for processXML is below:
Public void processIncomingfiles(String documentTagName) throws Exception {
DfLogger.debug(this, "Import Process Begin ---- exportPath=" + exportPath, null, null);
try {
File dir = new File(exportPath);
if (dir.isDirectory()) {
FilenameFilter xmlFiles = new FilenameFilter() {
public boolean accept(File dir, String name) {
return name.toLowerCase().endsWith(".xml");
}
};
for (File file : dir.listFiles(xmlFiles)) {
if (!file.isDirectory()) {
DfLogger.debug(this, "Loading XML file: " + file.getAbsolutePath(), null, null);
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder documentBuilder = dbFactory.newDocumentBuilder();
FileInputStream fileStream = new FileInputStream(file);
try {
// Use FileInputStream instead of File since parse will leave file locked on error
Document doc = documentBuilder.parse(fileStream);
fileStream.close();
fileStream = null;
doc.getDocumentElement().normalize();
NodeList nodeList = doc.getElementsByTagName(documentTagName);
List<Node> errors = new ArrayList<>();
for (int i = 0; i < nodeList.getLength(); i++) {
String documentFilename = processXml(nodeList.item(i));
if (documentFilename != null) {
moveFileToProcessedSuccessful(documentFilename);
} else {
DfLogger.debug(
this,
"Error processing document in file: " + file.getName(),
null,
null);
errors.add(nodeList.item(i));
}
}
if (!errors.isEmpty()) {
if (errors.size() == nodeList.getLength()) {
safeMove(file, file.getAbsolutePath() + ".errors");
} else {
Node parent = nodeList.item(0).getParentNode();
for (Node errorDoc : errors) {
parent.removeChild(errorDoc);
}
writeXml(doc, file.getAbsolutePath());
moveFileToProcessedSuccessful(file);
while (nodeList.getLength() > 0) {
parent.removeChild(nodeList.item(0));
}
for (Node errorDoc : errors) {
parent.appendChild(errorDoc);
}
writeXml(doc, file.getAbsolutePath() + ".errors");
}
} else {
moveFileToProcessedSuccessful(file);
}
} catch (Exception ex) {
DfLogger.error(this, "Error parsing XML File.", null, ex);
if (fileStream != null) {
fileStream.close(); // If DocBuilder.parse fails, leaves file locked
}
safeMove(file, file.getAbsolutePath() + ".error");
}
}
}
}
} catch (Exception ex) {
DfLogger.error(this, "Error in XML Parser.", null, ex);
throw ex;
}
DfLogger.debug(this, "Import Process Ends -----------", null, null);
}
/**
* Process the Xml for the give document node.
* #param doc xml node
* #return filename of successfully processed document, otherwise null
*/
any help will be appreciated.
Lets assume you have your xml data in test.xml file. You can read file and get specific data from your xml using the below code:
package yourPackage;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.xml.sax.SAXException;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
public class Main {
public static void main(String[] args) throws IOException, ParserConfigurationException, SAXException {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setNamespaceAware(true);
Document doc = factory.newDocumentBuilder().parse(Files.newInputStream(Paths.get("test.xml")));
doc.getDocumentElement().normalize();
Element data = (Element)doc.getElementsByTagName("IndexData").item(0);
System.out.println(data.getAttribute("FileName"));
}
}
The output is :
Test.pdf
I am converting CSV file to XML , it is converting but not getting desired structured output .
My java Code :-
public static void main(String[] args){
List<String> headers=new ArrayList<String>(5);
File file=new File("C:/Users/Admin/Desktop/data.csv");
BufferedReader reader=null;
try {
DocumentBuilderFactory domFactory =DocumentBuilderFactory.newInstance();
DocumentBuilder domBuilder=domFactory.newDocumentBuilder();
Document newDoc=domBuilder.newDocument();
// Root element
Element rootElement=newDoc.createElement("root");
newDoc.appendChild(rootElement);
reader = new BufferedReader(new FileReader(file));
int line=0;
String text=null;
while ((text=reader.readLine())!=null) {
StringTokenizer st=new StringTokenizer(text, "?", false);
String[] rowValues=new String[st.countTokens()];
int index=0;
while (st.hasMoreTokens()) {
String next=st.nextToken();
rowValues[index++]=next;
}
//String[] rowValues = text.split(",");
if (line == 0) { // Header row
for (String col:rowValues) {
headers.add(col);
Element rowElement=newDoc.createElement("header");
rootElement.appendChild(rowElement);
for (int col1=0;col1<headers.size();col1++) {
String header = headers.get(col1);
String value = null;
if (col1<rowValues.length) {
value=rowValues[col1];
} else {
// ?? Default value
value=" ";
}
rowElement.setTextContent(value);
System.out.println(headers+" "+value);
}
}} else { // Data row
Element rowElement=newDoc.createElement("row");
rootElement.appendChild(rowElement);
for (int col=0;col<headers.size();col++) {
String header = headers.get(col);
String value = null;
if (col<rowValues.length) {
value=rowValues[col];
} else {
// ?? Default value
value=" ";
}
rowElement.setTextContent(value);
System.out.println(header+" "+value);
}
}
line++;
}
try {
TransformerFactory tranFactory = TransformerFactory.newInstance();
Transformer aTransformer = tranFactory.newTransformer();
aTransformer.setOutputProperty(OutputKeys.INDENT, "yes");
aTransformer.setOutputProperty(OutputKeys.METHOD, "xml");
aTransformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "4");
Source src = new DOMSource(newDoc);
Result result = new StreamResult(new File("C:/Users/Admin/Desktop/data.xml"));
aTransformer.transform(src, result);
System.out.println("File creation successfully!");
} catch (Exception exp) {
exp.printStackTrace();
} finally {
try {
} catch (Exception e1) {
}
try {
} catch (Exception e1) {
}
}
} catch (Exception e1) {
e1.printStackTrace();
}
}
This is my CSV file:-
Symbol,Open,High,Low,Last Traded Price,Change
"NIFTY 50","9,645.90","9,650.65","9,600.95","9,609.30","-5.70"
"RELIANCE","1,390.00","1,414.20","1,389.00","1,407.55","26.50"
"BPCL","647.70","665.00","645.95","660.10","10.75"
"ADANIPORTS","368.50","373.80","368.00","372.25","4.25"
"ONGC","159.50","161.75","159.35","160.80","1.70"
And this is the output I am getting:-
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<root>
<header>Symbol,Open,High,Low,Last Traded Price,Change</header>
<row>"NIFTY 50","9,645.90","9,650.65","9,600.95","9,609.30","-5.70"</row>
<row>"RELIANCE","1,390.00","1,414.20","1,389.00","1,407.55","26.50"</row>
<row>"BPCL","647.70","665.00","645.95","660.10","10.75"</row>
<row>"ADANIPORTS","368.50","373.80","368.00","372.25","4.25"</row>
<row>"ONGC","159.50","161.75","159.35","160.80","1.70"</row>
</root>
Suggest me where am I going wrong ? I tried according to me , but getting confuse where in header and row section should I make changes.
ADDED :-
Expected output
<root>
<header>symbol</header>
<row>NIFTY 50</row>
<row>RELIANCE</row>
<row>BPCL></row>
.
.
<header>Open</header>
<row>9,645.90</row>
<row>1,390.00</row>
.
.
</root>
For your reference:
import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.List;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Result;
import javax.xml.transform.Source;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
import org.apache.commons.csv.QuoteMode;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
public class CsvToXml {
public static void main(String[] args) {
File inputFile = new File("C:/Users/Admin/Desktop/data.csv");
CSVParser inParser = null;
Document newDoc = null;
try {
inParser = CSVParser.parse(inputFile, StandardCharsets.UTF_8,
CSVFormat.EXCEL.withHeader().withQuoteMode(QuoteMode.NON_NUMERIC));
DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder domBuilder = domFactory.newDocumentBuilder();
newDoc = domBuilder.newDocument();
// Root element
Element rootElement = newDoc.createElement("root");
newDoc.appendChild(rootElement);
List<CSVRecord> records = inParser.getRecords();
for (String key : inParser.getHeaderMap().keySet()) {
Element rowElement = newDoc.createElement("header");
rootElement.appendChild(rowElement);
rowElement.setTextContent(key);
for (CSVRecord record : records) {
rowElement = newDoc.createElement("row");
rootElement.appendChild(rowElement);
rowElement.setTextContent(record.get(key));
}
}
TransformerFactory tranFactory = TransformerFactory.newInstance();
Transformer aTransformer = tranFactory.newTransformer();
aTransformer.setOutputProperty(OutputKeys.INDENT, "yes");
aTransformer.setOutputProperty(OutputKeys.METHOD, "xml");
aTransformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "4");
Source src = new DOMSource(newDoc);
Result result = new StreamResult(new File("C:/Users/Admin/Desktop/data.xml"));
aTransformer.transform(src, result);
System.out.println("File creation successfully!");
} catch (Exception e) {
e.printStackTrace();
} finally {
if (inParser != null) {
try {
inParser.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
}
This is using Apache Commons CSV.
I have an xml and I want to extract some part of that. But I am unable to get it.
If I used variables and put every key in variables I can get that part but it is a very lengthy process. So is there any short process for it?
Below is the XML :
<?xml version=\"1.0\" encoding=\"UTF-8\"?><xs:nml
xmlns:xs=\"http://www.netgear.com/protocol/transaction/NMLSchema-0.9\" src=\"nas\" dst=\"dpv_1461117132000\" locale=\"en-us\">
<xs:transaction ref-id=\"\" type=\"0\">
<xs:response ref-id=\"njl_id_1941\" status=\"success\">
<xs:result>
<xs:get-s resource-id=\"network_link_list\" resource-type=\"network_link_collection\">
<network_link_collection>
<network_link resource-id=\"eth0\">
<link>eth0</link>
<ifname>eth0</ifname>
<speed>1000</speed>
<path/>
<duplex>full</duplex>
<vlanid>0</vlanid>
<iptype>ipv4dhcp</iptype>
<ipv6type>ipv6dhcp</ipv6type>
<ip>0.0.0.0</ip>
<subnet>255.255.255.0</subnet>
<broadcast>0.0.0.0</broadcast>
<ipv6>::</ipv6>
<subnet6>::</subnet6>
<prefixlength>64</prefixlength>
<ipv6_link>::</ipv6_link>
<prefixlength_link>64</prefixlength_link>
<mac>6C:B0:CE:1C:CA:AE</mac>
<mtu>1500</mtu>
<router>0.0.0.0</router>
<router6>0.0.0.0</router6>
<state>down</state>
<dnscollection/>
<routecollection/>
<ntpcollection/>
</network_link>
</network_link_collection>
</xs:get-s>
</xs:result>
</xs:response>
</xs:transaction>
I want the xml which comes inside network link collection.
You can create a map of property key-value pairs fairly easily. You just need to find the nodes that you want to pull out.
NodeList nodeList = doc.getElementsByTagName("network_link").item(0).getChildNodes();
ParseResponseXML.java
import java.io.*;
import java.net.*;
import java.util.*;
import javax.xml.parsers.*;
import org.w3c.dom.*;
public class ParseResponseXML {
public static void main(String[] args) {
try {
File fXmlFile = getResourceAsFile("resources/Response.xml");
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
Document doc = dBuilder.parse(fXmlFile);
doc.getDocumentElement().normalize(); // http://stackoverflow.com/questions/13786607
NodeList nodeList = doc.getElementsByTagName("network_link").item(0).getChildNodes();
Map<String, String> propertyMap = nodeListToMap(nodeList);
for (Map.Entry<String, String> entry : propertyMap.entrySet()) {
System.out.printf("%-18s => %s%n", entry.getKey(), entry.getValue());
}
} catch (Exception e) {
e.printStackTrace();
}
}
private static Map<String, String> nodeListToMap(NodeList nodeList) {
Map<String, String> result = new LinkedHashMap<String, String>();
for (int temp = 0; temp < nodeList.getLength(); temp++) {
Node node = nodeList.item(temp);
if (node.getNodeType() == Node.ELEMENT_NODE) {
Element element = (Element) node;
result.put(element.getTagName(), element.getTextContent());
}
}
return result;
}
private static File getResourceAsFile(String resource) throws IOException {
ClassLoader loader = Parse.class.getClassLoader();
File resourceFile = null;
if (loader instanceof URLClassLoader) {
URLClassLoader urlClassLoader = URLClassLoader.class.cast(loader);
URL resourceUrl = urlClassLoader.findResource(resource);
if ("file".equals(resourceUrl.getProtocol())) {
try {
URI uri = resourceUrl.toURI();
resourceFile = new File(uri);
} catch (URISyntaxException e) {
IOException ioException = new IOException("Unable to get file through class loader: " + loader);
ioException.initCause(e);
throw ioException;
}
}
}
if (resourceFile == null) {
throw new IOException("Unable to get file through class loader: " + loader);
}
return resourceFile;
}
}
Response.xml
Make sure you have the </xs:nml> closing tag at the end of the XML.
<?xml version="1.0" encoding="UTF-8"?>
<xs:nml xmlns:xs="http://www.netgear.com/protocol/transaction/NMLSchema-0.9"
src="nas" dst="dpv_1461117132000" locale="en-us">
<xs:transaction ref-id="" type="0">
<xs:response ref-id="njl_id_1941" status="success">
<xs:result>
<xs:get-s resource-id="network_link_list" resource-type="network_link_collection">
<network_link_collection>
<network_link resource-id="eth0">
<link>eth0</link>
<ifname>eth0</ifname>
<speed>1000</speed>
<path />
<duplex>full</duplex>
<vlanid>0</vlanid>
<iptype>ipv4dhcp</iptype>
<ipv6type>ipv6dhcp</ipv6type>
<ip>0.0.0.0</ip>
<subnet>255.255.255.0</subnet>
<broadcast>0.0.0.0</broadcast>
<ipv6>::</ipv6>
<subnet6>::</subnet6>
<prefixlength>64</prefixlength>
<ipv6_link>::</ipv6_link>
<prefixlength_link>64</prefixlength_link>
<mac>6C:B0:CE:1C:CA:AE</mac>
<mtu>1500</mtu>
<router>0.0.0.0</router>
<router6>0.0.0.0</router6>
<state>down</state>
<dnscollection />
<routecollection />
<ntpcollection />
</network_link>
</network_link_collection>
</xs:get-s>
</xs:result>
</xs:response>
</xs:transaction>
</xs:nml>
Output
link => eth0
ifname => eth0
speed => 1000
path =>
duplex => full
vlanid => 0
iptype => ipv4dhcp
ipv6type => ipv6dhcp
ip => 0.0.0.0
subnet => 255.255.255.0
broadcast => 0.0.0.0
ipv6 => ::
subnet6 => ::
prefixlength => 64
ipv6_link => ::
prefixlength_link => 64
mac => 6C:B0:CE:1C:CA:AE
mtu => 1500
router => 0.0.0.0
router6 => 0.0.0.0
state => down
dnscollection =>
routecollection =>
ntpcollection =>
Unwrap XML
If you want to unwrap a node, you can perform the following.
import java.io.*;
import java.net.*;
import javax.xml.parsers.*;
import javax.xml.transform.*;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.w3c.dom.*;
import org.xml.sax.SAXException;
public class ParseResponseXML {
public static void main(String[] args) {
try {
Document inputDoc = load("resources/Response.xml");
Document outputDoc = unwrap(inputDoc, "network_link_collection");
write(outputDoc, "NetworkLinkCollection.xml");
} catch (Exception e) {
e.printStackTrace();
}
}
public static Document load(String resource) throws IOException, ParserConfigurationException, SAXException {
File file = getResourceAsFile(resource);
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
return dBuilder.parse(file);
}
public static void write(Document doc, String filename) throws TransformerException {
TransformerFactory transformerFactory = TransformerFactory.newInstance();
Transformer transformer = transformerFactory.newTransformer();
DOMSource source = new DOMSource(doc);
StreamResult result = new StreamResult(new File(filename));
// StreamResult result = new StreamResult(System.out); // Output to console.
transformer.transform(source, result);
}
public static Document unwrap(Document doc, String tagName) throws ParserConfigurationException {
Node node = doc.getElementsByTagName(tagName).item(0);
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
Document result = dBuilder.newDocument();
Node importNode = result.importNode(node, true);
result.appendChild(importNode);
return result;
}
private static File getResourceAsFile(String resourceName) throws IOException {
ClassLoader loader = ParseResponseXML.class.getClassLoader();
File resourceFile = null;
if (loader instanceof URLClassLoader) {
URLClassLoader urlClassLoader = URLClassLoader.class.cast(loader);
URL resourceUrl = urlClassLoader.findResource(resourceName);
if ("file".equals(resourceUrl.getProtocol())) {
try {
URI uri = resourceUrl.toURI();
resourceFile = new File(uri);
} catch (URISyntaxException e) {
IOException ioException = new IOException("Unable to get file through class loader: " + loader);
ioException.initCause(e);
throw ioException;
}
}
}
if (resourceFile == null) {
throw new IOException("Unable to get file through class loader: " + loader);
}
return resourceFile;
}
}
NetworkLinkCollection.xml
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<network_link_collection>
<network_link resource-id="eth0">
<link>eth0</link>
<ifname>eth0</ifname>
<speed>1000</speed>
<path />
<duplex>full</duplex>
<vlanid>0</vlanid>
<iptype>ipv4dhcp</iptype>
<ipv6type>ipv6dhcp</ipv6type>
<ip>0.0.0.0</ip>
<subnet>255.255.255.0</subnet>
<broadcast>0.0.0.0</broadcast>
<ipv6>::</ipv6>
<subnet6>::</subnet6>
<prefixlength>64</prefixlength>
<ipv6_link>::</ipv6_link>
<prefixlength_link>64</prefixlength_link>
<mac>6C:B0:CE:1C:CA:AE</mac>
<mtu>1500</mtu>
<router>0.0.0.0</router>
<router6>0.0.0.0</router6>
<state>down</state>
<dnscollection />
<routecollection />
<ntpcollection />
</network_link>
</network_link_collection>
Great response from Mr. Polywhirl!! Thanks a lot!!
I only want to add that if what you want is to extract a part of the xml but without including xml header (), like me, you have to add this in the "write" method:
transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
I want to read XML data using XPath in Java, so for the information I have gathered I am not able to parse XML according to my requirement.
here is what I want to do:
Get XML file from online via its URL, then use XPath to parse it, I want to create two methods in it. One is in which I enter a specific node attribute id, and I get all the child nodes as result, and second is suppose I just want to get a specific child node value only
<?xml version="1.0"?>
<howto>
<topic name="Java">
<url>http://www.rgagnonjavahowto.htm</url>
<car>taxi</car>
</topic>
<topic name="PowerBuilder">
<url>http://www.rgagnon/pbhowto.htm</url>
<url>http://www.rgagnon/pbhowtonew.htm</url>
</topic>
<topic name="Javascript">
<url>http://www.rgagnon/jshowto.htm</url>
</topic>
<topic name="VBScript">
<url>http://www.rgagnon/vbshowto.htm</url>
</topic>
</howto>
In above example I want to read all the elements if I search via #name and also one function in which I just want the url from #name 'Javascript' only return one node element.
You need something along the lines of this:
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
Document doc = builder.parse(<uri_as_string>);
XPathFactory xPathfactory = XPathFactory.newInstance();
XPath xpath = xPathfactory.newXPath();
XPathExpression expr = xpath.compile(<xpath_expression>);
Then you call expr.evaluate() passing in the document defined in that code and the return type you are expecting, and cast the result to the object type of the result.
If you need help with a specific XPath expressions, you should probably ask it as separate questions (unless that was your question in the first place here - I understood your question to be how to use the API in Java).
Edit: (Response to comment): This XPath expression will get you the text of the first URL element under PowerBuilder:
/howto/topic[#name='PowerBuilder']/url/text()
This will get you the second:
/howto/topic[#name='PowerBuilder']/url[2]/text()
You get that with this code:
expr.evaluate(doc, XPathConstants.STRING);
If you don't know how many URLs are in a given node, then you should rather do something like this:
XPathExpression expr = xpath.compile("/howto/topic[#name='PowerBuilder']/url");
NodeList nl = (NodeList) expr.evaluate(doc, XPathConstants.NODESET);
And then loop over the NodeList.
You can try this.
XML Document
Save as employees.xml.
<?xml version="1.0" encoding="UTF-8"?>
<Employees>
<Employee id="1">
<age>29</age>
<name>Pankaj</name>
<gender>Male</gender>
<role>Java Developer</role>
</Employee>
<Employee id="2">
<age>35</age>
<name>Lisa</name>
<gender>Female</gender>
<role>CEO</role>
</Employee>
<Employee id="3">
<age>40</age>
<name>Tom</name>
<gender>Male</gender>
<role>Manager</role>
</Employee>
<Employee id="4">
<age>25</age>
<name>Meghan</name>
<gender>Female</gender>
<role>Manager</role>
</Employee>
</Employees>
Parser class
The class have following methods
List item
A Method that will return the Employee Name for input ID.
A Method that will return list of Employees Name with age greater than the input age.
A Method that will return list of Female Employees Name.
Source Code
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
public class Parser {
public static void main(String[] args) {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setNamespaceAware(true);
DocumentBuilder builder;
Document doc = null;
try {
builder = factory.newDocumentBuilder();
doc = builder.parse("employees.xml");
// Create XPathFactory object
XPathFactory xpathFactory = XPathFactory.newInstance();
// Create XPath object
XPath xpath = xpathFactory.newXPath();
String name = getEmployeeNameById(doc, xpath, 4);
System.out.println("Employee Name with ID 4: " + name);
List<String> names = getEmployeeNameWithAge(doc, xpath, 30);
System.out.println("Employees with 'age>30' are:" + Arrays.toString(names.toArray()));
List<String> femaleEmps = getFemaleEmployeesName(doc, xpath);
System.out.println("Female Employees names are:" +
Arrays.toString(femaleEmps.toArray()));
} catch (ParserConfigurationException | SAXException | IOException e) {
e.printStackTrace();
}
}
private static List<String> getFemaleEmployeesName(Document doc, XPath xpath) {
List<String> list = new ArrayList<>();
try {
//create XPathExpression object
XPathExpression expr =
xpath.compile("/Employees/Employee[gender='Female']/name/text()");
//evaluate expression result on XML document
NodeList nodes = (NodeList) expr.evaluate(doc, XPathConstants.NODESET);
for (int i = 0; i < nodes.getLength(); i++)
list.add(nodes.item(i).getNodeValue());
} catch (XPathExpressionException e) {
e.printStackTrace();
}
return list;
}
private static List<String> getEmployeeNameWithAge(Document doc, XPath xpath, int age) {
List<String> list = new ArrayList<>();
try {
XPathExpression expr =
xpath.compile("/Employees/Employee[age>" + age + "]/name/text()");
NodeList nodes = (NodeList) expr.evaluate(doc, XPathConstants.NODESET);
for (int i = 0; i < nodes.getLength(); i++)
list.add(nodes.item(i).getNodeValue());
} catch (XPathExpressionException e) {
e.printStackTrace();
}
return list;
}
private static String getEmployeeNameById(Document doc, XPath xpath, int id) {
String name = null;
try {
XPathExpression expr =
xpath.compile("/Employees/Employee[#id='" + id + "']/name/text()");
name = (String) expr.evaluate(doc, XPathConstants.STRING);
} catch (XPathExpressionException e) {
e.printStackTrace();
}
return name;
}
}
Getting started example:
xml file:
<inventory>
<book year="2000">
<title>Snow Crash</title>
<author>Neal Stephenson</author>
<publisher>Spectra</publisher>
<isbn>0553380958</isbn>
<price>14.95</price>
</book>
<book year="2005">
<title>Burning Tower</title>
<author>Larry Niven</author>
<author>Jerry Pournelle</author>
<publisher>Pocket</publisher>
<isbn>0743416910</isbn>
<price>5.99</price>
</book>
<book year="1995">
<title>Zodiac</title>
<author>Neal Stephenson</author>
<publisher>Spectra</publisher>
<isbn>0553573862</isbn>
<price>7.50</price>
</book>
<!-- more books... -->
</inventory>
Java code:
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
try {
DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder docBuilder = docBuilderFactory.newDocumentBuilder();
Document doc = docBuilder.parse (new File("c:\\tmp\\my.xml"));
// normalize text representation
doc.getDocumentElement().normalize();
System.out.println ("Root element of the doc is " + doc.getDocumentElement().getNodeName());
NodeList listOfBooks = doc.getElementsByTagName("book");
int totalBooks = listOfBooks.getLength();
System.out.println("Total no of books : " + totalBooks);
for(int i=0; i<listOfBooks.getLength() ; i++) {
Node firstBookNode = listOfBooks.item(i);
if(firstBookNode.getNodeType() == Node.ELEMENT_NODE) {
Element firstElement = (Element)firstBookNode;
System.out.println("Year :"+firstElement.getAttribute("year"));
//-------
NodeList firstNameList = firstElement.getElementsByTagName("title");
Element firstNameElement = (Element)firstNameList.item(0);
NodeList textFNList = firstNameElement.getChildNodes();
System.out.println("title : " + ((Node)textFNList.item(0)).getNodeValue().trim());
}
}//end of for loop with s var
} catch (SAXParseException err) {
System.out.println ("** Parsing error" + ", line " + err.getLineNumber () + ", uri " + err.getSystemId ());
System.out.println(" " + err.getMessage ());
} catch (SAXException e) {
Exception x = e.getException ();
((x == null) ? e : x).printStackTrace ();
} catch (Throwable t) {
t.printStackTrace ();
}
Here is an example of processing xpath with vtd-xml... for heavy duty XML processing it is second to none. here is the a recent paper on this subject Processing XML with Java – A Performance Benchmark
import com.ximpleware.*;
public class changeAttrVal {
public static void main(String s[]) throws VTDException,java.io.UnsupportedEncodingException,java.io.IOException{
VTDGen vg = new VTDGen();
if (!vg.parseFile("input.xml", false))
return;
VTDNav vn = vg.getNav();
AutoPilot ap = new AutoPilot(vn);
XMLModifier xm = new XMLModifier(vn);
ap.selectXPath("/*/place[#id=\"p14\" and #initialMarking=\"2\"]/#initialMarking");
int i=0;
while((i=ap.evalXPath())!=-1){
xm.updateToken(i+1, "499");// change initial marking from 2 to 499
}
xm.output("new.xml");
}
}
If you have a xml like below
<e:Envelope
xmlns:d = "http://www.w3.org/2001/XMLSchema"
xmlns:e = "http://schemas.xmlsoap.org/soap/envelope/"
xmlns:wn0 = "http://systinet.com/xsd/SchemaTypes/"
xmlns:i = "http://www.w3.org/2001/XMLSchema-instance">
<e:Header>
<Friends>
<friend>
<Name>Testabc</Name>
<Age>12121</Age>
<Phone>Testpqr</Phone>
</friend>
</Friends>
</e:Header>
<e:Body>
<n0:ForAnsiHeaderOperResponse xmlns:n0 = "http://systinet.com/wsdl/com/magicsoftware/ibolt/localhost/ForAnsiHeader/ForAnsiHeaderImpl#ForAnsiHeaderOper?KExqYXZhL2xhbmcvU3RyaW5nOylMamF2YS9sYW5nL1N0cmluZzs=">
<response i:type = "d:string">12--abc--pqr</response>
</n0:ForAnsiHeaderOperResponse>
</e:Body>
</e:Envelope>
and wanted to extract the below xml
<e:Header>
<Friends>
<friend>
<Name>Testabc</Name>
<Age>12121</Age>
<Phone>Testpqr</Phone>
</friend>
</Friends>
</e:Header>
The below code helps to achieve the same
public static void main(String[] args) {
File fXmlFile = new File("C://Users//abhijitb//Desktop//Test.xml");
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
Document document;
Node result = null;
try {
document = dbf.newDocumentBuilder().parse(fXmlFile);
XPath xPath = XPathFactory.newInstance().newXPath();
String xpathStr = "//Envelope//Header";
result = (Node) xPath.evaluate(xpathStr, document, XPathConstants.NODE);
System.out.println(nodeToString(result));
} catch (SAXException | IOException | ParserConfigurationException | XPathExpressionException
| TransformerException e) {
e.printStackTrace();
}
}
private static String nodeToString(Node node) throws TransformerException {
StringWriter buf = new StringWriter();
Transformer xform = TransformerFactory.newInstance().newTransformer();
xform.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
xform.transform(new DOMSource(node), new StreamResult(buf));
return (buf.toString());
}
Now if you want only the xml like below
<Friends>
<friend>
<Name>Testabc</Name>
<Age>12121</Age>
<Phone>Testpqr</Phone>
</friend>
</Friends>
You need to change the
String xpathStr = "//Envelope//Header"; to String xpathStr = "//Envelope//Header/*";
This shows you how to
Read in an XML file to a DOM
Filter out a set of Nodes with XPath
Perform a certain action on each of the extracted Nodes.
We will call the code with the following statement
processFilteredXml(xmlIn, xpathExpr,(node) -> {/*Do something...*/;});
In our case we want to print some creatorNames from a book.xml using "//book/creators/creator/creatorName" as xpath to perform a printNode action on each Node that matches the XPath.
Full code
#Test
public void printXml() {
try (InputStream in = readFile("book.xml")) {
processFilteredXml(in, "//book/creators/creator/creatorName", (node) -> {
printNode(node, System.out);
});
} catch (Exception e) {
throw new RuntimeException(e);
}
}
private InputStream readFile(String yourSampleFile) {
return Thread.currentThread().getContextClassLoader().getResourceAsStream(yourSampleFile);
}
private void processFilteredXml(InputStream in, String xpath, Consumer<Node> process) {
Document doc = readXml(in);
NodeList list = filterNodesByXPath(doc, xpath);
for (int i = 0; i < list.getLength(); i++) {
Node node = list.item(i);
process.accept(node);
}
}
public Document readXml(InputStream xmlin) {
try {
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db = dbf.newDocumentBuilder();
return db.parse(xmlin);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
private NodeList filterNodesByXPath(Document doc, String xpathExpr) {
try {
XPathFactory xPathFactory = XPathFactory.newInstance();
XPath xpath = xPathFactory.newXPath();
XPathExpression expr = xpath.compile(xpathExpr);
Object eval = expr.evaluate(doc, XPathConstants.NODESET);
return (NodeList) eval;
} catch (Exception e) {
throw new RuntimeException(e);
}
}
private void printNode(Node node, PrintStream out) {
try {
Transformer transformer = TransformerFactory.newInstance().newTransformer();
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2");
StreamResult result = new StreamResult(new StringWriter());
DOMSource source = new DOMSource(node);
transformer.transform(source, result);
String xmlString = result.getWriter().toString();
out.println(xmlString);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
Prints
<creatorName>Fosmire, Michael</creatorName>
<creatorName>Wertz, Ruth</creatorName>
<creatorName>Purzer, Senay</creatorName>
For book.xml
<book>
<creators>
<creator>
<creatorName>Fosmire, Michael</creatorName>
<givenName>Michael</givenName>
<familyName>Fosmire</familyName>
</creator>
<creator>
<creatorName>Wertz, Ruth</creatorName>
<givenName>Ruth</givenName>
<familyName>Wertz</familyName>
</creator>
<creator>
<creatorName>Purzer, Senay</creatorName>
<givenName>Senay</givenName>
<familyName>Purzer</familyName>
</creator>
</creators>
<titles>
<title>Critical Engineering Literacy Test (CELT)</title>
</titles>
</book>
Expanding on the excellent answer by #bluish and #Yishai, here is how you make the NodeLists and node attributes support iterators, i.e. the for(Node n: nodelist) interface.
Use it like:
NodeList nl = ...
for(Node n : XmlUtil.asList(nl))
{...}
and
Node n = ...
for(Node attr : XmlUtil.asList(n.getAttributes())
{...}
The code:
/**
* Converts NodeList to an iterable construct.
* From: https://stackoverflow.com/a/19591302/779521
*/
public final class XmlUtil {
private XmlUtil() {}
public static List<Node> asList(NodeList n) {
return n.getLength() == 0 ? Collections.<Node>emptyList() : new NodeListWrapper(n);
}
static final class NodeListWrapper extends AbstractList<Node> implements RandomAccess {
private final NodeList list;
NodeListWrapper(NodeList l) {
this.list = l;
}
public Node get(int index) {
return this.list.item(index);
}
public int size() {
return this.list.getLength();
}
}
public static List<Node> asList(NamedNodeMap n) {
return n.getLength() == 0 ? Collections.<Node>emptyList() : new NodeMapWrapper(n);
}
static final class NodeMapWrapper extends AbstractList<Node> implements RandomAccess {
private final NamedNodeMap list;
NodeMapWrapper(NamedNodeMap l) {
this.list = l;
}
public Node get(int index) {
return this.list.item(index);
}
public int size() {
return this.list.getLength();
}
}
}
Read XML file using XPathFactory, SAXParserFactory and StAX (JSR-173).
Using XPath get node and its child data.
public static void main(String[] args) {
String xml = "<soapenv:Body xmlns:soapenv='http://schemas.xmlsoap.org/soap/envelope/'>"
+ "<Yash:Data xmlns:Yash='http://Yash.stackoverflow.com/Services/Yash'>"
+ "<Yash:Tags>Java</Yash:Tags><Yash:Tags>Javascript</Yash:Tags><Yash:Tags>Selenium</Yash:Tags>"
+ "<Yash:Top>javascript</Yash:Top><Yash:User>Yash-777</Yash:User>"
+ "</Yash:Data></soapenv:Body>";
String jsonNameSpaces = "{'soapenv':'http://schemas.xmlsoap.org/soap/envelope/',"
+ "'Yash':'http://Yash.stackoverflow.com/Services/Yash'}";
String xpathExpression = "//Yash:Data";
Document doc1 = getDocument(false, "fileName", xml);
getNodesFromXpath(doc1, xpathExpression, jsonNameSpaces);
System.out.println("\n===== ***** =====");
Document doc2 = getDocument(true, "./books.xml", xml);
getNodesFromXpath(doc2, "//person", "{}");
}
static Document getDocument( boolean isFileName, String fileName, String xml ) {
Document doc = null;
try {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setValidating(false);
factory.setNamespaceAware(true);
factory.setIgnoringComments(true);
factory.setIgnoringElementContentWhitespace(true);
DocumentBuilder builder = factory.newDocumentBuilder();
if( isFileName ) {
File file = new File( fileName );
FileInputStream stream = new FileInputStream( file );
doc = builder.parse( stream );
} else {
doc = builder.parse( string2Source( xml ) );
}
} catch (SAXException | IOException e) {
e.printStackTrace();
} catch (ParserConfigurationException e) {
e.printStackTrace();
}
return doc;
}
/**
* ELEMENT_NODE[1],ATTRIBUTE_NODE[2],TEXT_NODE[3],CDATA_SECTION_NODE[4],
* ENTITY_REFERENCE_NODE[5],ENTITY_NODE[6],PROCESSING_INSTRUCTION_NODE[7],
* COMMENT_NODE[8],DOCUMENT_NODE[9],DOCUMENT_TYPE_NODE[10],DOCUMENT_FRAGMENT_NODE[11],NOTATION_NODE[12]
*/
public static void getNodesFromXpath( Document doc, String xpathExpression, String jsonNameSpaces ) {
try {
XPathFactory xpf = XPathFactory.newInstance();
XPath xpath = xpf.newXPath();
JSONObject namespaces = getJSONObjectNameSpaces(jsonNameSpaces);
if ( namespaces.size() > 0 ) {
NamespaceContextImpl nsContext = new NamespaceContextImpl();
Iterator<?> key = namespaces.keySet().iterator();
while (key.hasNext()) { // Apache WebServices Common Utilities
String pPrefix = key.next().toString();
String pURI = namespaces.get(pPrefix).toString();
nsContext.startPrefixMapping(pPrefix, pURI);
}
xpath.setNamespaceContext(nsContext );
}
XPathExpression compile = xpath.compile(xpathExpression);
NodeList nodeList = (NodeList) compile.evaluate(doc, XPathConstants.NODESET);
displayNodeList(nodeList);
} catch (XPathExpressionException e) {
e.printStackTrace();
}
}
static void displayNodeList( NodeList nodeList ) {
for (int i = 0; i < nodeList.getLength(); i++) {
Node node = nodeList.item(i);
String NodeName = node.getNodeName();
NodeList childNodes = node.getChildNodes();
if ( childNodes.getLength() > 1 ) {
for (int j = 0; j < childNodes.getLength(); j++) {
Node child = childNodes.item(j);
short nodeType = child.getNodeType();
if ( nodeType == 1 ) {
System.out.format( "\n\t Node Name:[%s], Text[%s] ", child.getNodeName(), child.getTextContent() );
}
}
} else {
System.out.format( "\n Node Name:[%s], Text[%s] ", NodeName, node.getTextContent() );
}
}
}
static InputSource string2Source( String str ) {
InputSource inputSource = new InputSource( new StringReader( str ) );
return inputSource;
}
static JSONObject getJSONObjectNameSpaces( String jsonNameSpaces ) {
if(jsonNameSpaces.indexOf("'") > -1) jsonNameSpaces = jsonNameSpaces.replace("'", "\"");
JSONParser parser = new JSONParser();
JSONObject namespaces = null;
try {
namespaces = (JSONObject) parser.parse(jsonNameSpaces);
} catch (ParseException e) {
e.printStackTrace();
}
return namespaces;
}
XML Document
<?xml version="1.0" encoding="UTF-8"?>
<book>
<person>
<first>Yash</first>
<last>M</last>
<age>22</age>
</person>
<person>
<first>Bill</first>
<last>Gates</last>
<age>46</age>
</person>
<person>
<first>Steve</first>
<last>Jobs</last>
<age>40</age>
</person>
</book>
Out put for the given XPathExpression:
String xpathExpression = "//person/first";
/*OutPut:
Node Name:[first], Text[Yash]
Node Name:[first], Text[Bill]
Node Name:[first], Text[Steve] */
String xpathExpression = "//person";
/*OutPut:
Node Name:[first], Text[Yash]
Node Name:[last], Text[M]
Node Name:[age], Text[22]
Node Name:[first], Text[Bill]
Node Name:[last], Text[Gates]
Node Name:[age], Text[46]
Node Name:[first], Text[Steve]
Node Name:[last], Text[Jobs]
Node Name:[age], Text[40] */
String xpathExpression = "//Yash:Data";
/*OutPut:
Node Name:[Yash:Tags], Text[Java]
Node Name:[Yash:Tags], Text[Javascript]
Node Name:[Yash:Tags], Text[Selenium]
Node Name:[Yash:Top], Text[javascript]
Node Name:[Yash:User], Text[Yash-777] */
See this link for our own Implementation of NamespaceContext