In my simple XSLT transformation using standard JDK stuff I am getting unexpected result when using // shortcut.
package my.xslt.test;
import java.io.StringReader;
import java.io.StringWriter;
import javax.xml.transform.Templates;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
public class IssuePresenter {
public static void main(String[] args) throws Exception {
StringBuilder xslt = new StringBuilder();
xslt.append("<xsl:stylesheet xmlns:xsl='http://www.w3.org/1999/XSL/Transform'");
xslt.append(" xmlns:exsl='http://exslt.org/common'");
xslt.append(" exclude-result-prefixes='exsl'");
xslt.append(" version='1.0'>");
xslt.append("<xsl:output indent='yes' omit-xml-declaration='yes'/>");
xslt.append("<xsl:variable name='tree'>");
xslt.append(" <entry level='0'>");
xslt.append(" <entry level='1'/>");
xslt.append(" </entry>");
xslt.append("</xsl:variable>");
xslt.append("<xsl:template match='/'>");
xslt.append(" <a><xsl:copy-of select='exsl:node-set($tree)//entry'/></a>");
xslt.append(" <b><xsl:copy-of select='exsl:node-set($tree)/descendant::entry'/></b>");
xslt.append("</xsl:template>");
xslt.append("</xsl:stylesheet>");
StringWriter writer = new StringWriter();
TransformerFactory transformerFactory = TransformerFactory.newInstance();
Templates templates = transformerFactory.newTemplates(new StreamSource(new StringReader(xslt.toString())));
Transformer transformer = templates.newTransformer();
transformer.transform(new StreamSource(new StringReader("<document/>")), new StreamResult(writer));
System.out.println(writer);
}
}
I expect <a> to be same as <b>. Is it bug or am I missing something?
<a>
<entry level="1"/>
</a><b>
<entry level="0">
<entry level="1"/>
</entry>
<entry level="1"/>
</b>
This issue has been confirmed as a JDK bug, it can be tracked here https://bugs.java.com/bugdatabase/view_bug.do?bug_id=JDK-8265134
Related
I have following XML String :
<Aaaa>
<Bbbb>
<GroupC>
<KeyId>10001</KeyId>
</GroupC>
<DetailC>
<Dddd>
<Eeee>Eeee 001</Eeee>
<Ffff>Ffff 001</Ffff>
</Dddd>
</DetailC>
<DetailC>
<Dddd>
<Eeee>Eeee 002</Eeee>
<Ffff>Ffff 002</Ffff>
</Dddd>
</DetailC>
</Bbbb>
</Aaaa>
I would like to split "DetailC" it into the smaller XML:
XML 01:
<Aaaa>
<Bbbb>
<GroupC>
<KeyId>10001</KeyId>
</GroupC>
<DetailC>
<Dddd>
<Eeee>Eeee 001</Eeee>
<Ffff>Ffff 001</Ffff>
</Dddd>
</DetailC>
</Bbbb>
</Aaaa>
XML 02:
<Aaaa>
<Bbbb>
<GroupC>
<KeyId>10001</KeyId>
</GroupC>
<DetailC>
<Dddd>
<Eeee>Eeee 002</Eeee>
<Ffff>Ffff 002</Ffff>
</Dddd>
</DetailC>
</Bbbb>
</Aaaa>
Can I know how can I do so using Java?
Currently I only able to split into separate XML,
but it is without <Aaaa>, <Bbbb>, <GroupC>
Java code:
package message;
import java.io.IOException;
import java.io.StringReader;
import java.io.StringWriter;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.apache.xpath.CachedXPathAPI;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.traversal.NodeIterator;
import org.xml.sax.InputSource;
public class mainClass {
public static void main(String[] args) throws Exception{
// TODO Auto-generated method stub
String path = "D:\\abc.xml";
String xml = readFile(path);
List<String> xmlList2 = splitXML(xml, "/Aaaa/Bbbb/DetailC");
for (String xmlC : xmlList2) {
System.out.println("xmlC: " + xmlC);
}
}
private static List<String> splitXML(String xmlMessage, String xPath) throws Exception {
List<String> xmlList = new ArrayList<>();
Transformer xform = TransformerFactory.newInstance().newTransformer();
xform.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
InputSource parameterSource = new InputSource(new StringReader(xmlMessage));
Document doc = dBuilder.parse(parameterSource);
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setNamespaceAware(true); // never forget this!
CachedXPathAPI cachedXPathAPI = new CachedXPathAPI();
NodeIterator nl = cachedXPathAPI.selectNodeIterator(doc, xPath);
Node node;
while ((node = nl.nextNode()) != null) {
StringWriter buf = new StringWriter();
DOMSource dom = new DOMSource(node);
xform.transform(dom, new StreamResult(buf));
xmlList.add(buf.toString());
}
return xmlList;
}
private static String readFile(String path) {
String content = "";
try (Stream<String> lines = Files.lines(Paths.get(path))) {
content = lines.collect(Collectors.joining(System.lineSeparator()));
} catch (IOException e) {
e.printStackTrace();
}
return content;
}
}
If you use Saxon 9 HE (availabe on Sourceforge and Maven for Java) you can solve that with XSLT 3, see the approach from Split XML file into multiple files using XSLT where you can change the code to
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xs="http://www.w3.org/2001/XMLSchema" version="3.0"
exclude-result-prefixes="xs">
<xsl:template match="DetailC">
<xsl:variable name="pos" as="xs:integer">
<xsl:number/>
</xsl:variable>
<xsl:result-document href="XML{format-number($pos, '000')}.xml">
<xsl:apply-templates select="/" mode="split">
<xsl:with-param name="this-detail" select="." tunnel="yes"/>
</xsl:apply-templates>
</xsl:result-document>
</xsl:template>
<xsl:template match="#* | node()" mode="split">
<xsl:copy>
<xsl:apply-templates select="#* | node()" mode="#current"/>
</xsl:copy>
</xsl:template>
<xsl:template match="DetailC" mode="split">
<xsl:param name="this-detail" tunnel="yes"/>
<xsl:if test=". is $this-detail">
<xsl:next-match/>
</xsl:if>
</xsl:template>
</xsl:stylesheet>
To run Saxon 9 in Java you can use either the JAXP transformation API http://saxonica.com/html/documentation/using-xsl/embedding/jaxp-transformation.html or the Saxon 9 specific s9api http://saxonica.com/html/documentation/using-xsl/embedding/s9api-transformation.html.
Keep in mind that Transformer can directly transform a file with StreamSource (e.g. https://docs.oracle.com/javase/8/docs/api/javax/xml/transform/stream/StreamSource.html#StreamSource-java.lang.String- or https://docs.oracle.com/javase/8/docs/api/javax/xml/transform/stream/StreamSource.html#StreamSource-java.io.File-) so there is no need to read in the file contents in a string or to build a DOM by hand, you can load any XML file directly as the input to XSLT.
I like to set prefix for attribute while creating XML file using Java.
The result that I needed is
<entry gd:etag="*">
<id>Text</id>
</entry>
But I'm getting
<entry etag="*">
<id>Text</id>
</entry>
The code that I have written is
DocumentBuilderFactory docBuildFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder docBuilder = docBuildFactory.newDocumentBuilder();
Document doc = docBuilder.newDocument();
Element entry = doc.createElement("entry");
entry.setAttribute("etag","*");
doc.appendChild(entry);
Element id = doc.createElement("id");
id.setTextContent("Text");
entry.appendChild(id);
.....
I have no idea how to set prefix for the attributes. Can someone suggest me how to do this?
You need to set the DocumentBuilderFactory to be namespace aware, and you need to set the namespace for that prefix and use setAttributeNS for an attribute with an XML namespace.
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
public class Entry {
public static void main(String[] args)
throws IOException, ParserConfigurationException, TransformerException {
String ns = "http://schemas.google.com/g/2005"; // just a guess
DocumentBuilderFactory docBuildFactory = DocumentBuilderFactory.newInstance();
docBuildFactory.setNamespaceAware(true);
DocumentBuilder docBuilder = docBuildFactory.newDocumentBuilder();
Document doc = docBuilder.newDocument();
Element entry = doc.createElement("entry");
entry.setAttributeNS(ns, "gd:etag", "*");
doc.appendChild(entry);
Element id = doc.createElement("id");
id.setTextContent("Text");
entry.appendChild(id);
printDocument(doc, System.out);
}
public static void printDocument(Document doc, OutputStream out)
throws IOException, TransformerException {
TransformerFactory tf = TransformerFactory.newInstance();
Transformer transformer = tf.newTransformer();
transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "no");
transformer.setOutputProperty(OutputKeys.METHOD, "xml");
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "4");
transformer.transform(new DOMSource(doc),
new StreamResult(new OutputStreamWriter(out, "UTF-8")));
}
}
Output:
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<entry xmlns:gd="http://schemas.google.com/g/2005" gd:etag="*">
<id>Text</id>
</entry>
Replace the namespace URN with the correct one; I just guessed at it based on Googling the gd:etag prefix, which brought up some Google Data API and Google Contacts API stuff.
SimpleXml can do it:
final SimpleXml simple = new SimpleXml();
final Element element =
element("entry").attribute("gd:etag", "*")
.child(element("id").text("Text"));
System.out.println(simple.domToXml(element));
Will output:
<entry gd:etag="*"><id>Text</id></entry>
From maven central:
<dependency>
<groupId>com.github.codemonstur</groupId>
<artifactId>simplexml</artifactId>
<version>1.4.0</version>
</dependency>
How do I delete XML tags from XML document in java?
For Example, I have the following XML:
<root>
<item>
<code>100001</code>
<price>456</price>
<name>ABC</name>
</item>
<item>
<code>100002</code>
<price>123</price>
<name>DEF</name>
</item>
<item>
<code>100003</code>
<price>887</price>
<name>XYZ</name>
</item>
</root>
I want to delete the price tag from the XML and produce the XML in the following manner:
<root>
<item>
<code>100001</code>
<name>ABC</name>
</item>
<item>
<code>100002</code>
<name>DEF</name>
</item>
<item>
<code>100003</code>
<name>XYZ</name>
</item>
</root>
Additionally the requirement is such that the XML tags can change as I get this information from a webservice which can change the information it sends.
import javax.xml.bind.JAXBContext;
import javax.xml.bind.JAXBException;
import javax.xml.bind.Marshaller;
import javax.xml.bind.Unmarshaller;
import java.io.File;
import java.util.ArrayList;
public class JAXBExample {
static Root root = new Root();
public static void main(String[] args) throws JAXBException {
File file = new File("C:\\file.xml");
File file1 = new File("C:\\result.xml");
JAXBContext jaxbContext = JAXBContext.newInstance(Root.class);
Unmarshaller jaxbUnmarshaller = jaxbContext.createUnmarshaller();
Root rootEle = (Root) jaxbUnmarshaller.unmarshal(file);
ArrayList<item> itemList =new ArrayList<>();
for (int i = 0; i < rootEle.getItemList().size() ; i++){
item itemLocal = new item();
itemLocal.setCode(rootEle.getItemList().get(i).getCode());
itemLocal.setName(rootEle.getItemList().get(i).getName());
itemList.add(itemLocal);
}
root.setItemList(itemList);
Marshaller jaxbMarshaller = jaxbContext.createMarshaller();
// output pretty printed
jaxbMarshaller.setProperty(Marshaller.JAXB_FORMATTED_OUTPUT, true);
jaxbMarshaller.marshal(root, file1);
jaxbMarshaller.marshal(root, System.out);
}
}
Root class:
import org.apache.activemq.kaha.impl.data.Item;
import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlRootElement;
import java.util.ArrayList;
#XmlRootElement(name = "root")
public class Root {
public ArrayList<item> getItem() {
return itemList;
}
public void setItem(ArrayList<item> item) {
this.itemList = item;
}
private ArrayList<item> itemList;
}
Item calss:
public class item {
public String getCode() {
return code;
}
public void setCode(String code) {
this.code = code;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
private String code;
private String name;
}
I hope this will help.
As commented, consider XSLT that runs the Identity Transform and an empty template on the node you wish to remove. No for loops, if logic, or use of data structures (e.g., arraylist, hashmaps) needed for this solution.
As information, XSLT is a special purpose language (sibling to XPath) designed to transform xml files into other xml, html, even text files (csv/txt). General purpose languages like Java, PHP, Python, and others maintain libraries to run XSLT 1.0 scripts and can even call external dedicated XSLT processors like libxslt/Saxon. Additionally, XSLT scripts are themselves well-formed XML files and can be parsed from file or string for any dynamic needs.
XSLT (save as .xsl script to be parsed from file in Java below)
<xsl:transform xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
<xsl:output version="1.0" encoding="UTF-8" indent="yes" />
<xsl:strip-space elements="*"/>
<!-- Identity Transform -->
<xsl:template match="#*|node()">
<xsl:copy>
<xsl:apply-templates select="#*|node()"/>
</xsl:copy>
</xsl:template>
<!-- Removes all price tags -->
<xsl:template match="price"/>
</xsl:transform>
Java
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.*;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.TransformerException;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
import javax.xml.transform.OutputKeys;
import java.io.File;
import java.io.IOException;
import java.net.URISyntaxException;
import org.w3c.dom.Document;
import org.xml.sax.SAXException;
public class CourseList {
public static void main(String[] args) throws IOException, URISyntaxException,
SAXException,
ParserConfigurationException,
TransformerException {
// LOAD XML AND XSL DOCUMENTS
String inputXML = "C:\\Path\\To\\Input.xml";
String xslFile = "C:\\Path\\To\\XSLTScript.xsl";
String outputXML = "C:\\Path\\To\\Output.xml";
DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder docBuilder = docFactory.newDocumentBuilder();
Document doc = docBuilder.parse (new File(inputXML));
Source xslt = new StreamSource(new File(xslFile));
// XSLT TRANSFORMATION WITH PRETTY PRINT
TransformerFactory prettyPrint = TransformerFactory.newInstance();
Transformer transformer = prettyPrint.newTransformer(xslt);
transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "no");
transformer.setOutputProperty(OutputKeys.STANDALONE, "yes");
transformer.setOutputProperty(OutputKeys.METHOD, "xml");
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "4");
DOMSource source = new DOMSource(doc);
StreamResult result = new StreamResult(new File(outputXML));
transformer.transform(source, result);
}
}
My Code Now:
import org.w3c.dom.Node;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.StringWriter;
private String getStringByJAXP(Node input) {
StreamResult xmlOutput;
try {
xmlOutput = new StreamResult(new StringWriter());
transformerFactory = TransformerFactory.newInstance();
Transformer transformer = transformerFactory.newTransformer();
transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
transformer.transform(new DOMSource(input), xmlOutput);
} catch (TransformerException e) {
throw new IllegalArgumentException();
}
return xmlOutput.getWriter().toString();
}
Output:
<aaa>
<a>text a</a>
<b>
<c>text c</c>
</b>
<f>
<g><h a="xxx"/></g>
</f>
</aaa>
But I want to output as follows:
<aaa><a>text a</a><b><c>text c</c></b><f><g><h a="xxx" /></g></f></aaa>
Notice that I can't do that task by some simple string replaces, because the space in <a>text a</a> shouldn't be replaced(<a>texta</a> is total different from <a>text a</a>).
EDIT:
OutputKeys.INDENT, "no" not works. Updated code:
private String getStringByJAXP(Node input) {
StreamResult xmlOutput;
try {
xmlOutput = new StreamResult(new StringWriter());
transformerFactory = TransformerFactory.newInstance();
Transformer transformer = transformerFactory.newTransformer();
transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
transformer.setOutputProperty(OutputKeys.INDENT, "no");
transformer.transform(new DOMSource(input), xmlOutput);
} catch (TransformerException e) {
throw new IllegalArgumentException();
}
return xmlOutput.getWriter().toString();
}
I had a similar case once.
I tried transformer.setOutputProperty(OutputKeys.INDENT,"no"); first, but this did not work.
The problem was that my original node had additional "new line" text nodes.
The answer to Strip whitespace and newlines from XML in Java fixed it for me. Basically, you just remove the unnecessary text nodes before you transform the parent node.
I ended up using this:
public static void trimWhitespace(Node node)
{
NodeList children = node.getChildNodes();
for(int i = 0; i < children.getLength(); ++i) {
Node child = children.item(i);
if(child.getNodeType() == Node.TEXT_NODE) {
child.setTextContent(child.getTextContent().trim());
}
trimWhitespace(child);
}
}
You can pass an XSLT stylesheet to your Transformer which has the advantage that you will not have to parse your document twice.
InputStream xsltStream = getClass().getResourceAsStream("trim-whitespace.xslt");
TransformerFactory transformerFactory = TransformerFactory.newInstance();
Transformer transformer = transformerFactory.newTransformer(new StreamSource(xsltStream));
trim-whitespace.xslt
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<!-- copy all elements as they are -->
<xsl:template match="*">
<xsl:copy>
<xsl:copy-of select="#*" />
<xsl:apply-templates />
</xsl:copy>
</xsl:template>
<xsl:template match="*/text()[not(normalize-space())]" />
</xsl:stylesheet>
for eg. root=
<root>
<param value="abc">
<param value="bc">
</root>
NodeToInsert could be
<insert><parameterDesc>afds</parameterDesc></insert>
The output should be:
<root>
<insert><parameterDesc>afds</parameterDesc></insert>
<param value="abc">
<param value="bc">
</root>
I'll be really irritated if it turns out I just did your homework for you.
package com.akonizo.examples;
import java.io.ByteArrayInputStream;
import java.io.StringWriter;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.Text;
public class XmlInsertExample {
/**
* #param args
*/
public static void main(String[] args) {
String initial = "<root><param value=\"abc\"/><param value=\"bc\"/></root>";
try {
// Parse the initial document
ByteArrayInputStream is = new ByteArrayInputStream(initial.getBytes());
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db = dbf.newDocumentBuilder();
Document doc = db.parse(is);
// Create the new xml fragment
Text a = doc.createTextNode("afds");
Node p = doc.createElement("parameterDesc");
p.appendChild(a);
Node i = doc.createElement("insert");
i.appendChild(p);
Element r = doc.getDocumentElement();
r.insertBefore(i, r.getFirstChild());
r.normalize();
// Format the xml for output
Transformer transformer = TransformerFactory.newInstance().newTransformer();
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
// initialize StreamResult with File object to save to file
StreamResult result = new StreamResult(new StringWriter());
DOMSource source = new DOMSource(doc);
transformer.transform(source, result);
System.out.println(result.getWriter().toString());
} catch (Exception e) {
e.printStackTrace();
}
}
}
The result will be:
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<root>
<insert>
<parameterDesc>afds</parameterDesc>
</insert>
<param value="abc"/>
<param value="bc"/>
</root>