Reading a value from XMLnode - java

I am very new to XML parsing. I am trying to read the XML file from a shared drive on my computer and moving them to another shared drive. I have the below XML file. i am trying to read the Test.pdf value from this XML document
<?xml version="1.0" encoding="utf-8" ?>
<xml>
<IndexData FileName="Test.pdf">
<AttachmentID>3221929</AttachmentID>
<URI>test234555..pdf</URI>
<postmarkDate>2018-07-02T12:52:00.9</postmarkDate>
<pin>305270036</pin>
<scanDate>2018-07-02T12:52:00.9</scanDate>
<UserLogin>admin</UserLogin>
</IndexData>
<IndexData FileName="Test2.pdf">
<AttachmentID>3221931</AttachmentID>
<URI>Appp2.pdf</URI>
<postmarkDate>2018-07-02T14:19:22.5</postmarkDate>
<pin>305270036</pin>
<scanDate>2018-07-02T14:19:22.5</scanDate>
<UserLogin>admin</UserLogin>
</IndexData>
</xml>
I tried importing import org.w3c.dom.Node; for this. Below is my code:
String processXml(Node doc) {
String fileName = null;
try {
DfLogger.debug(this, "Loading: " + doc.getNodeName(), null, null);
Map<String, String> indexData = getXmlData(doc);
fileName = indexData.get("IndexData FileName");
if (new File(fileName).exists()) {
import(fileName, indexData);
}
} catch (Exception ex) {
DfLogger.error(this, "Error processing document.", null, ex);
return null;
}
return fileName;
}
My value for FileName is always NULL when I am trying to read the value by doing this:
fileName = indexData.get("IndexData FileName");
below is my getXmlData method.
protected Map<String, String> getXmlData(Node xmlDataNode) {
Map<String, String> xmlData = new HashMap<>();
NodeList nodeList = xmlDataNode.getChildNodes();
for (int i = 0; i < nodeList.getLength(); i++) {
Node node = nodeList.item(i);
if (node.getNodeType() == Node.ELEMENT_NODE) {
xmlData.put(node.getNodeName(), node.getTextContent().trim());
}
}
return xmlData;
}
The caller method for processXML is below:
Public void processIncomingfiles(String documentTagName) throws Exception {
DfLogger.debug(this, "Import Process Begin ---- exportPath=" + exportPath, null, null);
try {
File dir = new File(exportPath);
if (dir.isDirectory()) {
FilenameFilter xmlFiles = new FilenameFilter() {
public boolean accept(File dir, String name) {
return name.toLowerCase().endsWith(".xml");
}
};
for (File file : dir.listFiles(xmlFiles)) {
if (!file.isDirectory()) {
DfLogger.debug(this, "Loading XML file: " + file.getAbsolutePath(), null, null);
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder documentBuilder = dbFactory.newDocumentBuilder();
FileInputStream fileStream = new FileInputStream(file);
try {
// Use FileInputStream instead of File since parse will leave file locked on error
Document doc = documentBuilder.parse(fileStream);
fileStream.close();
fileStream = null;
doc.getDocumentElement().normalize();
NodeList nodeList = doc.getElementsByTagName(documentTagName);
List<Node> errors = new ArrayList<>();
for (int i = 0; i < nodeList.getLength(); i++) {
String documentFilename = processXml(nodeList.item(i));
if (documentFilename != null) {
moveFileToProcessedSuccessful(documentFilename);
} else {
DfLogger.debug(
this,
"Error processing document in file: " + file.getName(),
null,
null);
errors.add(nodeList.item(i));
}
}
if (!errors.isEmpty()) {
if (errors.size() == nodeList.getLength()) {
safeMove(file, file.getAbsolutePath() + ".errors");
} else {
Node parent = nodeList.item(0).getParentNode();
for (Node errorDoc : errors) {
parent.removeChild(errorDoc);
}
writeXml(doc, file.getAbsolutePath());
moveFileToProcessedSuccessful(file);
while (nodeList.getLength() > 0) {
parent.removeChild(nodeList.item(0));
}
for (Node errorDoc : errors) {
parent.appendChild(errorDoc);
}
writeXml(doc, file.getAbsolutePath() + ".errors");
}
} else {
moveFileToProcessedSuccessful(file);
}
} catch (Exception ex) {
DfLogger.error(this, "Error parsing XML File.", null, ex);
if (fileStream != null) {
fileStream.close(); // If DocBuilder.parse fails, leaves file locked
}
safeMove(file, file.getAbsolutePath() + ".error");
}
}
}
}
} catch (Exception ex) {
DfLogger.error(this, "Error in XML Parser.", null, ex);
throw ex;
}
DfLogger.debug(this, "Import Process Ends -----------", null, null);
}
/**
* Process the Xml for the give document node.
* #param doc xml node
* #return filename of successfully processed document, otherwise null
*/
any help will be appreciated.

Lets assume you have your xml data in test.xml file. You can read file and get specific data from your xml using the below code:
package yourPackage;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.xml.sax.SAXException;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
public class Main {
public static void main(String[] args) throws IOException, ParserConfigurationException, SAXException {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setNamespaceAware(true);
Document doc = factory.newDocumentBuilder().parse(Files.newInputStream(Paths.get("test.xml")));
doc.getDocumentElement().normalize();
Element data = (Element)doc.getElementsByTagName("IndexData").item(0);
System.out.println(data.getAttribute("FileName"));
}
}
The output is :
Test.pdf

Related

No getting desired XML output in Java

I am converting CSV file to XML , it is converting but not getting desired structured output .
My java Code :-
public static void main(String[] args){
List<String> headers=new ArrayList<String>(5);
File file=new File("C:/Users/Admin/Desktop/data.csv");
BufferedReader reader=null;
try {
DocumentBuilderFactory domFactory =DocumentBuilderFactory.newInstance();
DocumentBuilder domBuilder=domFactory.newDocumentBuilder();
Document newDoc=domBuilder.newDocument();
// Root element
Element rootElement=newDoc.createElement("root");
newDoc.appendChild(rootElement);
reader = new BufferedReader(new FileReader(file));
int line=0;
String text=null;
while ((text=reader.readLine())!=null) {
StringTokenizer st=new StringTokenizer(text, "?", false);
String[] rowValues=new String[st.countTokens()];
int index=0;
while (st.hasMoreTokens()) {
String next=st.nextToken();
rowValues[index++]=next;
}
//String[] rowValues = text.split(",");
if (line == 0) { // Header row
for (String col:rowValues) {
headers.add(col);
Element rowElement=newDoc.createElement("header");
rootElement.appendChild(rowElement);
for (int col1=0;col1<headers.size();col1++) {
String header = headers.get(col1);
String value = null;
if (col1<rowValues.length) {
value=rowValues[col1];
} else {
// ?? Default value
value=" ";
}
rowElement.setTextContent(value);
System.out.println(headers+" "+value);
}
}} else { // Data row
Element rowElement=newDoc.createElement("row");
rootElement.appendChild(rowElement);
for (int col=0;col<headers.size();col++) {
String header = headers.get(col);
String value = null;
if (col<rowValues.length) {
value=rowValues[col];
} else {
// ?? Default value
value=" ";
}
rowElement.setTextContent(value);
System.out.println(header+" "+value);
}
}
line++;
}
try {
TransformerFactory tranFactory = TransformerFactory.newInstance();
Transformer aTransformer = tranFactory.newTransformer();
aTransformer.setOutputProperty(OutputKeys.INDENT, "yes");
aTransformer.setOutputProperty(OutputKeys.METHOD, "xml");
aTransformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "4");
Source src = new DOMSource(newDoc);
Result result = new StreamResult(new File("C:/Users/Admin/Desktop/data.xml"));
aTransformer.transform(src, result);
System.out.println("File creation successfully!");
} catch (Exception exp) {
exp.printStackTrace();
} finally {
try {
} catch (Exception e1) {
}
try {
} catch (Exception e1) {
}
}
} catch (Exception e1) {
e1.printStackTrace();
}
}
This is my CSV file:-
Symbol,Open,High,Low,Last Traded Price,Change
"NIFTY 50","9,645.90","9,650.65","9,600.95","9,609.30","-5.70"
"RELIANCE","1,390.00","1,414.20","1,389.00","1,407.55","26.50"
"BPCL","647.70","665.00","645.95","660.10","10.75"
"ADANIPORTS","368.50","373.80","368.00","372.25","4.25"
"ONGC","159.50","161.75","159.35","160.80","1.70"
And this is the output I am getting:-
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<root>
<header>Symbol,Open,High,Low,Last Traded Price,Change</header>
<row>"NIFTY 50","9,645.90","9,650.65","9,600.95","9,609.30","-5.70"</row>
<row>"RELIANCE","1,390.00","1,414.20","1,389.00","1,407.55","26.50"</row>
<row>"BPCL","647.70","665.00","645.95","660.10","10.75"</row>
<row>"ADANIPORTS","368.50","373.80","368.00","372.25","4.25"</row>
<row>"ONGC","159.50","161.75","159.35","160.80","1.70"</row>
</root>
Suggest me where am I going wrong ? I tried according to me , but getting confuse where in header and row section should I make changes.
ADDED :-
Expected output
<root>
<header>symbol</header>
<row>NIFTY 50</row>
<row>RELIANCE</row>
<row>BPCL></row>
.
.
<header>Open</header>
<row>9,645.90</row>
<row>1,390.00</row>
.
.
</root>
For your reference:
import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.List;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Result;
import javax.xml.transform.Source;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
import org.apache.commons.csv.QuoteMode;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
public class CsvToXml {
public static void main(String[] args) {
File inputFile = new File("C:/Users/Admin/Desktop/data.csv");
CSVParser inParser = null;
Document newDoc = null;
try {
inParser = CSVParser.parse(inputFile, StandardCharsets.UTF_8,
CSVFormat.EXCEL.withHeader().withQuoteMode(QuoteMode.NON_NUMERIC));
DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder domBuilder = domFactory.newDocumentBuilder();
newDoc = domBuilder.newDocument();
// Root element
Element rootElement = newDoc.createElement("root");
newDoc.appendChild(rootElement);
List<CSVRecord> records = inParser.getRecords();
for (String key : inParser.getHeaderMap().keySet()) {
Element rowElement = newDoc.createElement("header");
rootElement.appendChild(rowElement);
rowElement.setTextContent(key);
for (CSVRecord record : records) {
rowElement = newDoc.createElement("row");
rootElement.appendChild(rowElement);
rowElement.setTextContent(record.get(key));
}
}
TransformerFactory tranFactory = TransformerFactory.newInstance();
Transformer aTransformer = tranFactory.newTransformer();
aTransformer.setOutputProperty(OutputKeys.INDENT, "yes");
aTransformer.setOutputProperty(OutputKeys.METHOD, "xml");
aTransformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "4");
Source src = new DOMSource(newDoc);
Result result = new StreamResult(new File("C:/Users/Admin/Desktop/data.xml"));
aTransformer.transform(src, result);
System.out.println("File creation successfully!");
} catch (Exception e) {
e.printStackTrace();
} finally {
if (inParser != null) {
try {
inParser.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
}
This is using Apache Commons CSV.

Extract some part of an XML

I have an xml and I want to extract some part of that. But I am unable to get it.
If I used variables and put every key in variables I can get that part but it is a very lengthy process. So is there any short process for it?
Below is the XML :
<?xml version=\"1.0\" encoding=\"UTF-8\"?><xs:nml
xmlns:xs=\"http://www.netgear.com/protocol/transaction/NMLSchema-0.9\" src=\"nas\" dst=\"dpv_1461117132000\" locale=\"en-us\">
<xs:transaction ref-id=\"\" type=\"0\">
<xs:response ref-id=\"njl_id_1941\" status=\"success\">
<xs:result>
<xs:get-s resource-id=\"network_link_list\" resource-type=\"network_link_collection\">
<network_link_collection>
<network_link resource-id=\"eth0\">
<link>eth0</link>
<ifname>eth0</ifname>
<speed>1000</speed>
<path/>
<duplex>full</duplex>
<vlanid>0</vlanid>
<iptype>ipv4dhcp</iptype>
<ipv6type>ipv6dhcp</ipv6type>
<ip>0.0.0.0</ip>
<subnet>255.255.255.0</subnet>
<broadcast>0.0.0.0</broadcast>
<ipv6>::</ipv6>
<subnet6>::</subnet6>
<prefixlength>64</prefixlength>
<ipv6_link>::</ipv6_link>
<prefixlength_link>64</prefixlength_link>
<mac>6C:B0:CE:1C:CA:AE</mac>
<mtu>1500</mtu>
<router>0.0.0.0</router>
<router6>0.0.0.0</router6>
<state>down</state>
<dnscollection/>
<routecollection/>
<ntpcollection/>
</network_link>
</network_link_collection>
</xs:get-s>
</xs:result>
</xs:response>
</xs:transaction>
I want the xml which comes inside network link collection.
You can create a map of property key-value pairs fairly easily. You just need to find the nodes that you want to pull out.
NodeList nodeList = doc.getElementsByTagName("network_link").item(0).getChildNodes();
ParseResponseXML.java
import java.io.*;
import java.net.*;
import java.util.*;
import javax.xml.parsers.*;
import org.w3c.dom.*;
public class ParseResponseXML {
public static void main(String[] args) {
try {
File fXmlFile = getResourceAsFile("resources/Response.xml");
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
Document doc = dBuilder.parse(fXmlFile);
doc.getDocumentElement().normalize(); // http://stackoverflow.com/questions/13786607
NodeList nodeList = doc.getElementsByTagName("network_link").item(0).getChildNodes();
Map<String, String> propertyMap = nodeListToMap(nodeList);
for (Map.Entry<String, String> entry : propertyMap.entrySet()) {
System.out.printf("%-18s => %s%n", entry.getKey(), entry.getValue());
}
} catch (Exception e) {
e.printStackTrace();
}
}
private static Map<String, String> nodeListToMap(NodeList nodeList) {
Map<String, String> result = new LinkedHashMap<String, String>();
for (int temp = 0; temp < nodeList.getLength(); temp++) {
Node node = nodeList.item(temp);
if (node.getNodeType() == Node.ELEMENT_NODE) {
Element element = (Element) node;
result.put(element.getTagName(), element.getTextContent());
}
}
return result;
}
private static File getResourceAsFile(String resource) throws IOException {
ClassLoader loader = Parse.class.getClassLoader();
File resourceFile = null;
if (loader instanceof URLClassLoader) {
URLClassLoader urlClassLoader = URLClassLoader.class.cast(loader);
URL resourceUrl = urlClassLoader.findResource(resource);
if ("file".equals(resourceUrl.getProtocol())) {
try {
URI uri = resourceUrl.toURI();
resourceFile = new File(uri);
} catch (URISyntaxException e) {
IOException ioException = new IOException("Unable to get file through class loader: " + loader);
ioException.initCause(e);
throw ioException;
}
}
}
if (resourceFile == null) {
throw new IOException("Unable to get file through class loader: " + loader);
}
return resourceFile;
}
}
Response.xml
Make sure you have the </xs:nml> closing tag at the end of the XML.
<?xml version="1.0" encoding="UTF-8"?>
<xs:nml xmlns:xs="http://www.netgear.com/protocol/transaction/NMLSchema-0.9"
src="nas" dst="dpv_1461117132000" locale="en-us">
<xs:transaction ref-id="" type="0">
<xs:response ref-id="njl_id_1941" status="success">
<xs:result>
<xs:get-s resource-id="network_link_list" resource-type="network_link_collection">
<network_link_collection>
<network_link resource-id="eth0">
<link>eth0</link>
<ifname>eth0</ifname>
<speed>1000</speed>
<path />
<duplex>full</duplex>
<vlanid>0</vlanid>
<iptype>ipv4dhcp</iptype>
<ipv6type>ipv6dhcp</ipv6type>
<ip>0.0.0.0</ip>
<subnet>255.255.255.0</subnet>
<broadcast>0.0.0.0</broadcast>
<ipv6>::</ipv6>
<subnet6>::</subnet6>
<prefixlength>64</prefixlength>
<ipv6_link>::</ipv6_link>
<prefixlength_link>64</prefixlength_link>
<mac>6C:B0:CE:1C:CA:AE</mac>
<mtu>1500</mtu>
<router>0.0.0.0</router>
<router6>0.0.0.0</router6>
<state>down</state>
<dnscollection />
<routecollection />
<ntpcollection />
</network_link>
</network_link_collection>
</xs:get-s>
</xs:result>
</xs:response>
</xs:transaction>
</xs:nml>
Output
link => eth0
ifname => eth0
speed => 1000
path =>
duplex => full
vlanid => 0
iptype => ipv4dhcp
ipv6type => ipv6dhcp
ip => 0.0.0.0
subnet => 255.255.255.0
broadcast => 0.0.0.0
ipv6 => ::
subnet6 => ::
prefixlength => 64
ipv6_link => ::
prefixlength_link => 64
mac => 6C:B0:CE:1C:CA:AE
mtu => 1500
router => 0.0.0.0
router6 => 0.0.0.0
state => down
dnscollection =>
routecollection =>
ntpcollection =>
Unwrap XML
If you want to unwrap a node, you can perform the following.
import java.io.*;
import java.net.*;
import javax.xml.parsers.*;
import javax.xml.transform.*;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.w3c.dom.*;
import org.xml.sax.SAXException;
public class ParseResponseXML {
public static void main(String[] args) {
try {
Document inputDoc = load("resources/Response.xml");
Document outputDoc = unwrap(inputDoc, "network_link_collection");
write(outputDoc, "NetworkLinkCollection.xml");
} catch (Exception e) {
e.printStackTrace();
}
}
public static Document load(String resource) throws IOException, ParserConfigurationException, SAXException {
File file = getResourceAsFile(resource);
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
return dBuilder.parse(file);
}
public static void write(Document doc, String filename) throws TransformerException {
TransformerFactory transformerFactory = TransformerFactory.newInstance();
Transformer transformer = transformerFactory.newTransformer();
DOMSource source = new DOMSource(doc);
StreamResult result = new StreamResult(new File(filename));
// StreamResult result = new StreamResult(System.out); // Output to console.
transformer.transform(source, result);
}
public static Document unwrap(Document doc, String tagName) throws ParserConfigurationException {
Node node = doc.getElementsByTagName(tagName).item(0);
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
Document result = dBuilder.newDocument();
Node importNode = result.importNode(node, true);
result.appendChild(importNode);
return result;
}
private static File getResourceAsFile(String resourceName) throws IOException {
ClassLoader loader = ParseResponseXML.class.getClassLoader();
File resourceFile = null;
if (loader instanceof URLClassLoader) {
URLClassLoader urlClassLoader = URLClassLoader.class.cast(loader);
URL resourceUrl = urlClassLoader.findResource(resourceName);
if ("file".equals(resourceUrl.getProtocol())) {
try {
URI uri = resourceUrl.toURI();
resourceFile = new File(uri);
} catch (URISyntaxException e) {
IOException ioException = new IOException("Unable to get file through class loader: " + loader);
ioException.initCause(e);
throw ioException;
}
}
}
if (resourceFile == null) {
throw new IOException("Unable to get file through class loader: " + loader);
}
return resourceFile;
}
}
NetworkLinkCollection.xml
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<network_link_collection>
<network_link resource-id="eth0">
<link>eth0</link>
<ifname>eth0</ifname>
<speed>1000</speed>
<path />
<duplex>full</duplex>
<vlanid>0</vlanid>
<iptype>ipv4dhcp</iptype>
<ipv6type>ipv6dhcp</ipv6type>
<ip>0.0.0.0</ip>
<subnet>255.255.255.0</subnet>
<broadcast>0.0.0.0</broadcast>
<ipv6>::</ipv6>
<subnet6>::</subnet6>
<prefixlength>64</prefixlength>
<ipv6_link>::</ipv6_link>
<prefixlength_link>64</prefixlength_link>
<mac>6C:B0:CE:1C:CA:AE</mac>
<mtu>1500</mtu>
<router>0.0.0.0</router>
<router6>0.0.0.0</router6>
<state>down</state>
<dnscollection />
<routecollection />
<ntpcollection />
</network_link>
</network_link_collection>
Great response from Mr. Polywhirl!! Thanks a lot!!
I only want to add that if what you want is to extract a part of the xml but without including xml header (), like me, you have to add this in the "write" method:
transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");

Trying to bring the properties/values such as the names or dates of an XML file into java from a directory

Trying to bring the properties/values such as the names or dates of an XML file into java from a directory. but the problem is i can only bring in the names of the files such as Employee or Dates but cannot bring in the elements inside the file
public class ProcessXML {
public static void main2(String[] args) {
File f = null;
File[] paths;
try{
// file
f = new File("/Users/Adrian/Dropbox/XML.xml");
// array of files and directory
paths = f.listFiles();
// for each name in the path array
for(File path:paths)
{
path.isDirectory();
// prints filename and directory name
System.out.println(path);
}
}catch(Exception e){
// if any error occurs
e.printStackTrace();
}
}
public static void main(String[] args) {
File f = null;
try{
// file
f = new File("Users/Adrian/Dropbox/XML.xml");
//other file
//f = new File("/Users/Adrian/Dropbox/");
listFile(f, " ");
}catch(Exception e){
// if any error occurs
e.printStackTrace();
}
}
private static void listFile(final File file, final String indent) throws IOException, ParserConfigurationException, SAXException {
if (file.isFile()) {
if (file.getName().endsWith(".xml")) {
System.out.println(indent + "File " + file.getName());
// final InputStream is = new FileInputStream(file);
processXML(file);
}
} else if (file.isDirectory()) {
System.out.println(indent + "Dir " + file.getName());
final File[] children = file.listFiles();
for (final File child : children) {
listFile(child, indent + " ");
}
}
}
private static void processXML(final File file) throws
IOException, ParserConfigurationException, SAXException {
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db = dbf.newDocumentBuilder();
Document doc = db.parse(file);
}
}
Trying to bring the properties/values such as the names or dates of an XML file into java from a directory.the problem i am now having is trying to get the xml values out of the xml files i am nout sure what way to about it so if anyone has examples or can tell me which way to go about it it would be very appreciated
My guess is that it can not find the file since you didn't include the extension to the xml file.
try add .xml to the end of the file path and see if that works
import java.io.File;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
public class XPathTest {
public static void main(String args[]) {
try {
File Employee = new File("Employee.xml");
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
Document doc = dBuilder.parse(Employee);
doc.getDocumentElement().normalize();
System.out.println("root of xml file" + doc.getDocumentElement().getNodeName());
NodeList nodes = doc.getElementsByTagName("Employee");
System.out.println("==========================");
for (int i = 0; i < nodes.getLength(); i++) {
Node node = nodes.item(i);
if (node.getNodeType() == Node.ELEMENT_NODE) {
Element element = (Element) node;
System.out.println("Employee Age: " + getValue("age", element));
System.out.println("Employee Name: " + getValue("name", element));
System.out.println("Employee Gender: " + getValue("gender", element));
System.out.println("Employee Role: " + getValue("role", element));
}
}
} catch (Exception ex) {
ex.printStackTrace();
}
}
private static String getValue(String tag, Element element) {
NodeList nodes = element.getElementsByTagName(tag).item(0).getChildNodes();
Node node = (Node) nodes.item(0);
return node.getNodeValue();
}
}
went onto change everything and now it works, brought in xml values without a problem

Read xml data in java

i am trying to read the xml file data using a java program. But the o/p shows null only.
The idea is to read the value for example the content name from the below xml file 1.e. Content Value="AssetFlow_Trial_07.mpg" from the below xml file. But all i get is blank output only.
Below is my xml file,
xml file:
<?xml version="1.0" encoding="UTF-8"?>
<ADI>
<Metadata>
<AMS Asset_Name="mntario8616_8889pk_026" Provider="Rogers" Product="DRAOD" Version_Major="4" Version_Minor="12" Description="Kids_in_Hall_Ep__1Package_Asset" Creation_Date="2010-06-11" Provider_ID="HBOworld" Asset_ID="TJPK0000000000000026" Asset_Class="package"/>
<App_Data App="MOD" Name="Provider_Content_Tier" Value="IFCC_FREE_10"/>
<App_Data App="MOD" Name="Metadata_Spec_Version" Value="CableLabsVOD1.1"/>
</Metadata>
<Asset>
<Metadata>
<AMS Asset_Name="mntario8616_8889m_024" Provider="Rogers" Product="DRAOD" Version_Major="1" Version_Minor="0" Description="Kids_in_Hall_Ep__1Title_Movie" Creation_Date="2010-06-11" Provider_ID="HBOworld" Asset_ID="TJMV0000000000000024" Asset_Class="movie"/>
<App_Data App="MOD" Name="Type" Value="movie"/>
<App_Data App="MOD" Name="Encryption" Value="N"/>
<App_Data App="MOD" Name="Audio_Type" Value="Stereo"/>
<App_Data App="MOD" Name="Languages" Value="en"/>
<App_Data App="MOD" Name="Viewing_Can_Be_Resumed" Value="Y"/>
<App_Data App="MOD" Name="HDContent" Value="Y"/>
</Metadata>
<Content Value="AssetFlow_Trial_07.mpg"/>
</Asset>
This is my java code,
Java code:
XMLInputFactory factory = null;
XMLStreamReader reader = null;
XMLInputFactory factory = null;
XMLStreamReader reader = null;
try {
factory = XMLInputFactory.newInstance();
factory.setProperty(XMLInputFactory.IS_COALESCING, true);
reader = factory.createXMLStreamReader(new FileInputStream(new File(
"D:\\seachange\\AssetFlow Test Files\\DR-Sun3.xml")));
boolean readCharacters = false;
while (reader.hasNext()) {
int event = reader.next();
switch (event) {
case (XMLStreamConstants.START_ELEMENT): {
if (reader.getLocalName().equals("Metadata")) {
readCharacters = true;
}
break;
}
case (XMLStreamConstants.CHARACTERS): {
if (readCharacters) {
System.out.println(reader.getText());
readCharacters = false;
}
break;
}
}
}
}
catch (Throwable t) {
t.printStackTrace();
}
finally {
try {
reader.close();
}
catch (Throwable t) {
t.printStackTrace();
}
}
Any help appreciated!1!
I am using spring to read file but the point is once you get the xml document you can query into it. If your project is maven. Simply do this.
POM:
<properties>
<java-version>1.8</java-version>
<spring.version>4.1.0.RELEASE</spring.version>
<junit.version>4.11</junit.version>
</properties>
<dependencies>
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-context</artifactId>
<version>${spring.version}</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>${junit.version}</version>
<scope>test</scope>
</dependency>
</dependencies>
then simply in your main method call the function
public static void main( String[] args ) throws Exception {
readXmlFile();
}
private static void readXmlFile() throws Exception{
Resource resource = new ClassPathResource("myXmlFile.xml");
File xmlFile = resource.getFile();
DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = builderFactory.newDocumentBuilder();
builder.setErrorHandler(new DomErrorHandler());
BufferedInputStream in = new BufferedInputStream(new FileInputStream(xmlFile));
Document xmlDocument = builder.parse(in);
xmlDocument.getDocumentElement().normalize();
XPathFactory factory = XPathFactory.newInstance();
XPath xPath = factory.newXPath();
String selectFirstNode = "/ADI/Asset/Content[#Value='AssetFlow_Trial_07.mpg']";
Object result = executeXpathExpression(xmlDocument, selectFirstNode, xPath, XPathConstants.NODE);
Node firstNode = (Node) result;
String nodeName = firstNode.getNodeName();
System.out.println( nodeName );
}
public static Object executeXpathExpression(Document xmlDocument, String expression, XPath xPath, QName returnType) {
Object result = null;
try {
XPathExpression xPathExpression = xPath.compile(expression);
result = xPathExpression.evaluate(xmlDocument, returnType);
} catch (XPathExpressionException e) {
e.printStackTrace();
}
return result;
} //end of executeXpathExpression()
First of all, you are missing ADI closing tag at the end of your XML file. You can read the attribute 'Value' of content tag.
File file = new File("test.xml");
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db;
try {
db = dbf.newDocumentBuilder();
Document doc = db.parse(file);
doc.getDocumentElement().normalize();
NodeList l = doc.getElementsByTagName("Content");
for (int j = 0; j < l.getLength(); ++j) {
Node prop = l.item(j);
NamedNodeMap attr = prop.getAttributes();
if (null != attr) {
Node p = attr.getNamedItem("Value");
System.out.println(p.getNodeValue());
}
}
} catch (ParserConfigurationException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (SAXException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
Finally made a code,
import java.io.File;
import javax.print.Doc;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import java.io.File;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
abstract class AbstractClass
{
abstract NodeList getTagName( String tagName);
}
class ActualClass extends AbstractClass
{
DocumentBuilderFactory dbFactory;
DocumentBuilder dBuilder;
org.w3c.dom.Document doc;
String[] attribute;
public void document()
{
try
{
dbFactory = DocumentBuilderFactory.newInstance();
dBuilder = dbFactory.newDocumentBuilder();
doc = dBuilder.parse(new File("D:\\seachange\\AssetFlow Test Files\\VIDEOTRON_1.xml"));
doc.getDocumentElement().normalize();
}
catch (Exception e)
{
e.printStackTrace();
}
}
NodeList getTagName( String tagName )
{
NodeList mainNode = null;
if (doc != null)
{
mainNode=doc.getElementsByTagName(tagName);
}
return mainNode;
}
void readAttribute( NodeList mainNode, String attName)
{
if (doc != null)
{
attribute = new String[mainNode.getLength()];
if (mainNode != null) {
for (int i = 0; i < mainNode.getLength(); i++) {
Node node = mainNode.item(i);
if (node.hasAttributes()) {
attribute[i] = node.getAttributes().getNamedItem(attName).getNodeValue();
}
}
}
}
}
void printOutput(NodeList mainNode, String attValue)
{
for (int i=0; i< mainNode.getLength(); i++)
{
if ((attribute[i]).equals(attValue))
{
System.out.println(attribute[i]);
}
}
}
}
public class TestAbstract {
public static void main(String[] args) {
ActualClass A1= new ActualClass();
A1.document();
NodeList N1= A1.getTagName("Content");
A1.readAttribute(N1, "Value");
A1.printOutput(N1, "Movie_012.mpg");
}
}
I like to present an alternative solution which needs much less code using XMLBeam (Disclosure: I'm affiliated with that project) Having less code to maintain is important when tasks get more complicated.
public class XMLDemo {
public interface Projection {
#XBRead("/ADI/Content/#Value")
String getContentValue();
}
public static main(String[] args) {
Projection projection = new XBProjector().io().file("D:\\seachange\\AssetFlow Test Files\\DR-Sun3.xml").read(Projection.class);
System.out.println(projection.getContentValue());
}
}

Is it possible to parse MS Word using Apache POI and convert it into XML?

Is it possible to convert a MS Word to XML file using Apache POI ?
If it is, can you point me to any tutorials for doing that?
I'd say you have two options, both powered by Apache POI
One is to use Apache Tika. Tika is a text and metadata extraction toolkit, and is able to extract fairly rich text from Word documents by making appropriate calls to POI. The result is that Tika will give you XHTML style XML for the contents of your word document.
The other option is to use a class that was added fairly recently to POI, which is WordToHtmlConverter. This will turn your word document into HTML for you, and generally will preserve slightly more of the structure and formatting than Tika will.
Depending on the kind of XML you're hoping to get out, one of these should be a good bet for you. I'd suggest you try both against some of your sample files, and see which one is the best fit for your problem domain and needs.
The purpose of HWPF subproject is exactly that: process Word files.
http://poi.apache.org/hwpf/index.html
Then, to convert the data to XML you have to build XML by the ususal ways: StAX, JDOM, XStream...
Apache offers a Quick Guide:
http://poi.apache.org/hwpf/quick-guide.html
and I also have found that:
http://sanjaal.com/java/tag/simple-java-tutorial-to-read-microsoft-document-in-java/
If you want to process docx files, you might want to look at the OpenXML4J subproject:
http://poi.apache.org/oxml4j/index.html
package com.govind.service;
import java.io.File;
import java.io.FileInputStream;
import java.util.List;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.apache.log4j.Logger;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.hwpf.model.StyleDescription;
import org.apache.poi.hwpf.usermodel.CharacterRun;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
/**
* DOC to XML converter service
*
* #author govind.sharma
*
*/
public class DocToXmlConverter {
static final Logger logger = Logger.getLogger(DocToXmlConverter.class);
DocumentBuilderFactory docFactory = null;
DocumentBuilder docBuilder = null;
Element rootElement = null;
Document docxml = null;
boolean subHeaders = false;
Element UrlElement = null;
/**
* #param path
* #param fileName
*/
public void processDocxToXml(String path, String fileName) {
XWPFDocument xdoc = null;
FileInputStream fis = null;
String fullPath = path + "/" + fileName + ".docx";
try {
// Read file
fis = new FileInputStream(fullPath);
xdoc = new XWPFDocument(OPCPackage.open(fis));
initializeXml();
// get Document Body Paragraph content
List < XWPFParagraph > paragraphList = xdoc.getParagraphs();
for (XWPFParagraph paragraph: paragraphList) {
String styleName = paragraph.getStyle();
String paraText = paragraph.getParagraphText();
String bulletsPoints = paragraph.getNumFmt();
createXmlTags(styleName, paraText, bulletsPoints);
}
// write the content into XML file
generateXml(path, fileName);
logger.info("Doc to Xml Convertion completed.");
} catch (Exception ex) {
logger.error("Exception while generating XML from DOC" + ex.getMessage());
System.exit(0);
}
}
/**
* #param path
* #param fileName
*/
public void processDocToXml(String path, String fileName) {
HWPFDocument doc = null;
String fullPath = path + "/" + fileName + ".doc";
WordExtractor we = null;
try {
POIFSFileSystem fis = new POIFSFileSystem(new FileInputStream(fullPath));
doc = new HWPFDocument(fis);
} catch (Exception e) {
logger.error("Unable to Read File..." + e.getMessage());
System.exit(0);
}
try {
we = new WordExtractor(doc);
Range range = doc.getRange();
initializeXml();
String[] paragraphs = we.getParagraphText();
for (int i = 0; i < paragraphs.length; i++) {
org.apache.poi.hwpf.usermodel.Paragraph pr = range.getParagraph(i);
int j = 0;
while (true) {
CharacterRun run = pr.getCharacterRun(j++);
StyleDescription style = doc.getStyleSheet().getStyleDescription(run.getStyleIndex());
String styleName = style.getName();
String paraText = run.text();
String bulletsPoints = null;
createXmlTags(styleName, paraText, bulletsPoints);
if (run.getEndOffset() == pr.getEndOffset()) {
break;
}
}
}
generateXml(path, fileName);
logger.info("Document to Xml Convertion completed.");
} catch (Exception ex) {
logger.error("Exception while generating XML from DOC" + ex.getMessage());
System.exit(0);
}
}
/**
*
*/
private void initializeXml() {
// initialize XML Document
try {
docFactory = DocumentBuilderFactory.newInstance();
docBuilder = docFactory.newDocumentBuilder();
docxml = docBuilder.newDocument();
rootElement = docxml.createElement("ROOT");
docxml.appendChild(rootElement);
} catch (ParserConfigurationException e) {
logger.error("Exception while initializing XML" + e.getMessage());
}
}
/**
* #param styleName
* #param paragraphText
* #param bulletsPoints
*/
private void createXmlTags(String styleName, String paragraphText, String bulletsPoints) {
// create XML Tags
if (styleName != null && paragraphText.length() > 1) {
if (styleName.equalsIgnoreCase("Style4")) {
Element pragElement = docxml.createElement("TITLE");
pragElement.appendChild(docxml.createTextNode(paragraphText.trim()));
rootElement.appendChild(pragElement);
subHeaders = true;
} else if (styleName.equalsIgnoreCase("Default")) {
Element pragElement = docxml.createElement("P");
pragElement.appendChild(docxml.createTextNode(paragraphText));
rootElement.appendChild(pragElement);
subHeaders = true;
} else if (styleName.equalsIgnoreCase("Normal")) {
Element pragElement = docxml.createElement("P");
pragElement.appendChild(docxml.createTextNode(paragraphText));
rootElement.appendChild(pragElement);
subHeaders = true;
} else if (styleName.equalsIgnoreCase("BodyCopy") && bulletsPoints != null) {
Element pragElement = docxml.createElement("LI");
pragElement.appendChild(docxml.createTextNode(paragraphText));
UrlElement.appendChild(pragElement);
subHeaders = false;
} else if (styleName.equalsIgnoreCase("BodyCopy")) {
Element pragElement = docxml.createElement("PS");
pragElement.appendChild(docxml.createTextNode(paragraphText));
rootElement.appendChild(pragElement);
subHeaders = true;
} else if (styleName.equalsIgnoreCase("ListParagraph")) {
Element pragElement = docxml.createElement("LI");
pragElement.appendChild(docxml.createTextNode(paragraphText));
UrlElement.appendChild(pragElement);
subHeaders = false;
} else if (styleName.equalsIgnoreCase("Subheader1")) {
UrlElement = docxml.createElement("UL");
Element pragElement = docxml.createElement("LI");
pragElement.appendChild(docxml.createTextNode(paragraphText));
UrlElement.appendChild(pragElement);
rootElement.appendChild(UrlElement);
subHeaders = false;
} else {
Element pragElement = docxml.createElement("PS");
pragElement.appendChild(docxml.createTextNode(paragraphText));
rootElement.appendChild(pragElement);
subHeaders = true;
}
} else if (paragraphText.trim().length() > 1) {
Element pragElement = docxml.createElement("P");
pragElement.appendChild(docxml.createTextNode(paragraphText));
rootElement.appendChild(pragElement);
subHeaders = true;
}
if (subHeaders) {
Element pragElement = docxml.createElement("NEWLINE");
pragElement.appendChild(docxml.createTextNode(""));
rootElement.appendChild(pragElement);
}
}
/**
* #param path
* #param fileName
*/
private void generateXml(String path, String fileName) {
try {
// write the content into xml file
TransformerFactory transformerFactory = TransformerFactory.newInstance();
Transformer transformer = transformerFactory.newTransformer();
transformer.setOutputProperty(OutputKeys.METHOD, "xml");
transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "4");
transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "no");
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
DOMSource source = new DOMSource(docxml);
StreamResult result = new StreamResult(new File(path + "/" + fileName + ".xml"));
transformer.transform(source, result);
} catch (Exception e) {
logger.error("Exception while generating XML" + e.getMessage());
}
}
}

Categories

Resources