How to remove child elements from XML using Java? - java

<definitions>
<form name="enable">
<data1>...</data1>
<data2>...</data2>
<data3>...</data3>
</form>
<form>
<data1>...</data1>
<data2>...</data2>
<data3>...</data3>
</form>
...
</definitions>
I want to remove the definitions tag child elements(form) those are having the name attribute. For eg: in the above case, I want to remove the first form element from the document. How to do this in java?

The code that removes all forms that have the name attribute:
import java.io.File;
import javax.xml.parsers.*;
import javax.xml.transform.*;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.xpath.*;
import org.w3c.dom.*;
public class XMLDemo {
public static void main(String[] args) throws Exception {
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
Document document = dbf.newDocumentBuilder().parse(new File("input.xml"));
XPathFactory xpf = XPathFactory.newInstance();
XPath xpath = xpf.newXPath();
// Create XPathExpression object
XPathExpression expression = xpath.compile("/definitions/form[#name]");
// Evaluate expression result on XML document
NodeList nodes = (NodeList) expression.evaluate(document, XPathConstants.NODESET);
if (nodes.getLength() > 0) {
for (int i = 0; i < nodes.getLength(); i++) {
Node node = nodes.item(i);
node.getParentNode().removeChild(node);
}
}
TransformerFactory tf = TransformerFactory.newInstance();
Transformer t = tf.newTransformer();
t.transform(new DOMSource(document), new StreamResult(System.out));
}
}
If you want to remove only forms with name = "enable" make this change:
// Create XPathExpression object
XPathExpression expression = xpath.compile("/definitions/form[contains(#name, 'enable')]");

Related

Store xml value as a Map using XPATH using JAVA

I am using XPATH to parse xml document,please find the xml below
<?xml version="1.0" encoding="UTF-8"?>
<soap:Envelope xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/">
<soap:Body>
<bookEvent>
<bookName>harry_potter</bookName>
<bookEntity>comic</bookEntity>
<bookEntityId>10987645</bookEntityId>
<bookParameter>
<name>Name1</name>
<value>value1</value>
</bookParameter>
<bookParameter>
<name>Name2</name>
<value>value2</value>
</bookParameter>
<bookParameter>
<name>Name3</name>
<value>value3</value>
</bookParameter>
<bookParameter>
<name>Name4</name>
<value>value4</value>
</bookParameter>
<bookParameter>
<name>Name5</name>
<value>value5</value>
</bookParameter>
</bookEvent>
</soap:Body>
</soap:Envelope>
Here I would like to convert BookParameters to Map like below
{"Name1":"value1","Name2":"value2" etc}
I have tried the below code and i can get a Map but not in the expected format,
try{
Map<String,String> eventParameters = new HashMap<>();
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
Document doc = builder.parse("book.xml");
XPathFactory xPathfactory = XPathFactory.newInstance();
XPath xpath = xPathfactory.newXPath();
NodeList nodeList = (NodeList)xpath.compile("//bookEvent//eventParameter").evaluate(doc, XPathConstants.NODESET);
for (int i = 0; i < nodeList.getLength(); i++) {
Node node = nodeList.item(i);
if(node.hasChildNodes()) {
NodeList childNodes = node.getChildNodes();
for (int j = 0; j < childNodes.getLength(); j++) {
Node childNode = childNodes.item(j);
if (childNode.getNodeType() == Node.ELEMENT_NODE) {
System.out.println(childNode.getNodeName()+"::"+childNode.getNodeValue()+"::"+childNode.getTextContent());
eventParameters.put(childNode.getTextContent(),childNode.getTextContent());
}
}
}
}
System.out.println("print map::"+eventParameters);
} catch (Exception e) {
e.printStackTrace();
}
The output looks like this
print map::{Name3=Name3, Name4=Name4, value5=value5, Name5=Name5, value2=value2, value1=value1, value4=value4, value3=value3, Name1=Name1, Name2=Name2}
Please somebody guide me to create a below map from the xml,Any help would be appreciable.
{"Name1":"value1","Name2":"value2" etc}
You can do it as a one-liner in XPath 3.1:
map:merge(//bookParameter!map{string(name): string(value)})
=> serialize(map{'method':'json'})
You can run XPath 3.1 from Java by installing Saxon-HE 9.8 (open source)
Use Below code :
import java.io.File;
import java.util.LinkedHashMap;
import java.util.Map;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
public class ReadXMLFile {
public static Map<String,String> hMap = new LinkedHashMap<>();
public static void main(String argv[]) {
try {
File fXmlFile = new File("C:\\Users\\jaikant\\Desktop\\QUESTION.xml");
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
Document doc = dBuilder.parse(fXmlFile);
doc.getDocumentElement().normalize();
NodeList nodeList = doc.getElementsByTagName("bookParameter");
for (int parameter = 0; parameter < nodeList.getLength(); parameter++) {
Node node = nodeList.item(parameter);
if (node.getNodeType() == Node.ELEMENT_NODE) {
Element eElement = (Element) node;
String name = eElement.getElementsByTagName("name").item(0).getTextContent();
String value = eElement.getElementsByTagName("value").item(0).getTextContent();
hMap.put(name, value);
}
}
} catch (Exception e) {
e.printStackTrace();
}
hMap.forEach((h,k) -> {
System.out.println(h + ":" + k);
});
}
}
It will print exactly what you are looking for.

Java - XML read root element value

<?xml version="1.0" encoding="UTF-8"?>
<JDF DescriptiveName="DescriptiveName" ID="n0001" JobID="1101-CCC-0" JobPartID="1" ProjectID="">
<Comment Name="Materialnummer">XXXXXXX</Comment>
<NodeInfo LastEnd="2014-03-12T18:00:00+01:00">
<EmployeeRef rRef="EMPCSR"/>
</NodeInfo>
<CustomerInfo CustomerID="1740">
</CustomerInfo>
<ResourcePool>
</ResourcePool>
<ResourceLinkPool>
</ResourceLinkPool>
<JDF Category="FinalImaging" ID="n0002" Status="Waiting" Type="ProcessGroup" Types="XXX">
<ResourcePool>
</ResourcePool>
<ResourceLinkPool>
</ResourceLinkPool>
</JDF>
<JDF ID="n0002" Status="Waiting" Type="ProcessGroup" Types="PrePressPreparation">
<ResourcePool>
</ResourcePool>
<ResourceLinkPool>
</ResourceLinkPool>
</JDF>
</JDF>
How do I get the root element value. For this example I want to get the DescriptiveName,ID,JobID and ProjectID. I managed to read other values but stuck in root emlement. Please advice.
EDITED
DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = builderFactory.newDocumentBuilder();
Document xmlDocument = builder.parse(file);
XPath xPath = XPathFactory.newInstance().newXPath();
//System.out.println("*************************");
String expression = "/JDF";
NodeList nodeList = (NodeList) xPath.compile(expression).evaluate(xmlDocument, XPathConstants.NODESET);
A sample here :
import java.io.File;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
public class Main {
public static void main(String[] argv) throws Exception{
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setValidating(true);
factory.setExpandEntityReferences(false);
Document doc = factory.newDocumentBuilder().parse(new File("filename"));
Element root = null;
NodeList list = doc.getChildNodes();
for (int i = 0; i < list.getLength(); i++) {
if (list.item(i) instanceof Element) {
root = (Element) list.item(i);
break;
}
}
root = doc.getDocumentElement();
}
}
You can get the attributes from root object

Remove duplicates node tags on a XML document using Java

I have the following xml document:
<?xml version="1.0" ?>
<tag>text<b><b>bold</b> bold again</b><b><br/>the end </tag>
I need to remove the duplicate tags but keep it's content so the result is:
<?xml version="1.0" ?>
<tag>text<b>bold bold again</b>the end </tag>
I have the following code:
import java.io.*;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import org.w3c.dom.Document;
import org.w3c.dom.*;
import java.util.Arrays;
import javax.xml.transform.*;
import javax.xml.transform.dom.*;
import javax.xml.transform.stream.*;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.DocumentBuilder;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
public class TakeDuplicatesXml{
public static void main(String[] args){
try{
DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder docBuilder = docFactory.newDocumentBuilder();
Document doc = docBuilder.parse("/Users/youruser/code/Exercises/file.xml");
//get node list
List<String> aux = new ArrayList<String>();
removeDuplicate(doc.getDocumentElement(), aux);
//print the new document out
printXmlDocument(doc);
} catch (Exception ex) {
ex.printStackTrace();
}
}
public static void printXmlDocument(Document doc){
try{
DOMSource domSource = new DOMSource(doc);
StringWriter writer = new StringWriter();
StreamResult result = new StreamResult(writer);
TransformerFactory tf = TransformerFactory.newInstance();
Transformer transformer = tf.newTransformer();
transformer.transform(domSource, result);
System.out.println("XML IN String format is: \n" + writer.toString());
}catch (Exception ex) {
ex.printStackTrace();
}
}
//with recursion
public static void removeDuplicate(Node node, List<String> aux){
System.out.println(node.getNodeName());
//check if that node exists already
if(aux.contains(node.getNodeName())){
node.getParentNode().removeChild(node);
}else{
//add node name to aux list
aux.add(node.getNodeName());
}
NodeList nodeList = node.getChildNodes();
for (int i = 0; i < nodeList.getLength(); i++) {
Node currentNode = nodeList.item(i);
if (currentNode.getNodeType() == Node.ELEMENT_NODE) {
//calls this method for all the children which is Element
removeDuplicate(currentNode, aux);
}
}
}
}
But the result is not what I want because is taking the node with it's contents (the word bold is gone):
<tag>text<b> bold again</b><br/>the end </tag>
How can I fix it? And how can I make it more performant?
This kind of problem is best tackled using a simple XSLT transformation. You need a stylesheet with two rules: an identity rule that copies everything unchanged
<xsl:template match="*">
<xsl:copy>
<xsl:copy-of select="#*"/>
<xsl:apply-templates select="child::node()"/>
</xsl:copy>
</xsl:template>
and another (higher priority) rule that deletes nested b tags:
<xsl:template match="b/b">
<xsl:apply-templates/>
</xsl:template>
Wrap these in the usual boilerplate:
<xsl:stylesheet version="1.0" xmlns:xsl="http:www.w3.org/1999/XSL/Transform">
.. template rules go here ...
</xsl:stylesheet>
Then invoke this from your Java program using:
public class TakeDuplicatesXml{
public static void main(String[] args){
try{
TransformerFactory tFactory = TransformerFactory.newInstance();
Templates t = tFactory.newTemplates(new File(... stylesheet file ....));
Source doc = new StreamSource(
new File("/Users/youruser/code/Exercises/file.xml"));
t.newTransformer().transform(doc, new StreamResult(System.out));
} catch (Exception ex) {
ex.printStackTrace();
}
}
<b>bold</b> is a child of <b> bold again</b>. If you perform node.getParentNode().removeChild(node) it is gone. Add the value of the child to the value of the parent before removing it.
Another possibility for querying XML is XPath. See XPath Tutorial:
Example 12: "The descendant axis contains the descendants of the context node; a descendant is a child or a child of a child and so on".
Example 8: "Function name() returns name of the element".
Can't tell if this is more performant. But I guess it is - in favour of a self-written recursion.
So I found the solution. I am still not sure if it's the most optimal solution but works well and puts the contents in the right order:
//with recursion
public static void removeDuplicate(Node node, List<String> aux){
//check if that node exists already
if(aux.contains(node.getNodeName())){
Node parentNode = node.getParentNode();
String value = parentNode.getTextContent();
parentNode.removeChild(node);
parentNode.setTextContent(value);
}else{
//add node name to aux list
aux.add(node.getNodeName());
}
NodeList nodeList = node.getChildNodes();
for (int i = 0; i < nodeList.getLength(); i++) {
Node currentNode = nodeList.item(i);
if (currentNode.getNodeType() == Node.ELEMENT_NODE) {
//calls this method for all the children which is Element
removeDuplicate(currentNode, aux);
}
}

Parsing XML attributes from Complex Empty Elements

I have an xml file in the following pattern which contains a few Complex Empty Elements(elements with no content, only attributes).
<items>
<item id="0" name="a" />
<item id="1" name="b" />
</items>
I'm at lose to parse the attributes from them. This is what I have done so far :
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
Document document = builder.parse(inputStream);
Element itemsElement = document.getDocumentElement();
if (itemsElement.getTagName().equals(TAG_ITEMS)) {
NodeList nodeList = itemsElement.getChildNodes();
for (int i = 0; i < nodeList.getLength(); i++) {
// process each item node
Node node = nodeList.item(i);
if (node.getNodeType() == Node.TEXT_NODE) { // Is this the right way?
Text text = (Text) node;
// Do stuff with attributes
}
}
}
I cannot cast these Text nodes to Element nodes and get attributes, I cannot get attributes from node using getAttributes - NPE at NamedNodeMap attributes.getLength(), I cannot cast it to Text and get attributes. How can I parse the attributes?
You are not interested in the text context of the nodes inside of items but in the attributes of the nodes item. you could proceed as follow:
//process each item node
Node node = nodeList.item(i);
if (node.getNodeName().equals("item")) {
NamedNodeMap attributes = node.getAttributes();
System.out.printf("id=%s, name=%s%n",
attributes.getNamedItem("id").getTextContent(),
attributes.getNamedItem("name").getTextContent());
}
This would print:
id=0, name=a
id=1, name=b
Assuming you want to get the indiviudal attributes of the nodes you need to one of two things (or both depending on your needs)...
You need to test if the current node is an ELEMENT_NODE or if the current node's name is equal to item (assuming all the node names are the same), for example...
import java.io.InputStream;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.Text;
public class Test {
public static final String TAG_ITEMS = "items";
public static void main(String[] args) {
try (InputStream is = Test.class.getResourceAsStream("/Test.xml")) {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
Document document = builder.parse(is);
Element itemsElement = document.getDocumentElement();
if (itemsElement.getTagName().equals(TAG_ITEMS)) {
NodeList nodeList = itemsElement.getChildNodes();
for (int i = 0; i < nodeList.getLength(); i++) {
Node node = nodeList.item(i);
if (node.getNodeType() == Node.ELEMENT_NODE) {
NamedNodeMap attributes = node.getAttributes();
Node idAtt = attributes.getNamedItem("id");
Node nameAtt = attributes.getNamedItem("name");
System.out.println("id = " + idAtt.getNodeValue());
System.out.println("name = " + nameAtt.getNodeValue());
}
}
}
} catch (Exception exp) {
exp.printStackTrace();
}
}
}
Which will output...
id = 0
name = a
id = 1
name = b
All of this could be greatly reduced by using XPath, for example, if all the item nodes are the same name, then you could just use
/items/item
As the query. If the node names are different, but the attributes are the same, then you could use
/items/*[#id]
which will list all the nodes under items which has an id attribute, or
/items/*[#name]
which will list all the nodes under items which has an name attribute...
import java.io.InputStream;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathFactory;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
public class Test {
public static void main(String[] args) {
try (InputStream is = Test.class.getResourceAsStream("/Test.xml")) {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
Document document = builder.parse(is);
XPath xpath = XPathFactory.newInstance().newXPath();
XPathExpression expression = xpath.compile("/items/item");
NodeList nodes = (NodeList) expression.evaluate(document, XPathConstants.NODESET);
process(nodes);
expression = xpath.compile("/items/*[#id]");
nodes = (NodeList) expression.evaluate(document, XPathConstants.NODESET);
process(nodes);
expression = xpath.compile("/items/*[#name]");
nodes = (NodeList) expression.evaluate(document, XPathConstants.NODESET);
process(nodes);
} catch (Exception exp) {
exp.printStackTrace();
}
}
protected static void process(NodeList nodes) {
for (int index = 0; index < nodes.getLength(); index++) {
Node item = nodes.item(index);
NamedNodeMap attributes = item.getAttributes();
Node idAtt = attributes.getNamedItem("id");
Node nameAtt = attributes.getNamedItem("name");
System.out.println("id = " + idAtt.getNodeValue() + "; name = " + nameAtt.getNodeValue());
}
}
}

Checking if particular node exists in Xml file using Xpath

I would like to check is code = "ABC" exists in my xml file using xPath.Can you please suggest me some methods for it?
<metadata>
<codes class = "class1">
<code code = "ABC">
<detail "blah blah"/>
</code>
</codes>
<codes class = "class2">
<code code = "123">
<detail "blah blah"/>
</code>
</codes>
</metadata>
[EDIT]
I did following. It reuturns null.
XPath xPath = XPathFactory.newInstance().newXPath();
XPathExpression expr = xPath.compile("//codes/code[# code ='ABC']");
Object result = expr.evaluate(doc, XPathConstants.NODESET);
NodeList nodes = (NodeList) result;
for (int i = 0; i < nodes.getLength(); i++) {
System.out.println("nodes: "+ nodes.item(i).getNodeValue());
}
I don't know how you tested your code because <detail "blah blah"/> is an incorrect xml construct it should be <detail x="blah blah"/> i.e. a name-value pair !!
For the XPath Expression "//codes/code[# code ='ABC']" the nodes.item(i).getNodeValue()) is going to be null because it will return an Element. See the below Javadoc comment:
A working sample:
import java.io.ByteArrayInputStream;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathFactory;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
public class Test
{
public static void main(String[] args) throws Exception
{
Document doc = getDoc();
XPath xPath = XPathFactory.newInstance().newXPath();
XPathExpression expr = xPath.compile("//codes/code[#code ='ABC']");
Object result = expr.evaluate(doc, XPathConstants.NODESET);
NodeList nodes = (NodeList) result;
System.out.println("Have I found anything? " + (nodes.getLength() > 0 ? "Yes": "No"));
for (int i = 0; i < nodes.getLength(); i++) {
System.out.println("nodes: "+ nodes.item(i).getNodeValue());
}
}
private static Document getDoc()
{
String xml = "<metadata>"+
"<codes class = 'class1'>"+
"<code code='ABC'>"+
"<detail x='blah blah'/>"+
"</code>"+
"</codes>"+
"<codes class = 'class2'>"+
"<code code = '123'>"+
"<detail x='blah blah'/>"+
"</code>"+
"</codes>"+
"</metadata>";
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
try {
DocumentBuilder db = dbf.newDocumentBuilder();
Document dom = db.parse(new ByteArrayInputStream(xml.getBytes()));
return dom;
}catch(Exception pce) {
pce.printStackTrace();
}
return null;
}
}

Categories

Resources