Read XML using dom4j or mycila - java

I have the following xml as a String but I am having problem reading in a loop manner for parameter "PrdInfoTable" and "OrdInfoTable" as they are dynamic so I need to read it to an arraylist or something. I have tried several method but still unable to get it done. How can I do this?
<?xml version="1.0" encoding="utf-8"?>
<soap:Envelope xmlns:soap="http://www.w3.org/2003/05/soap-envelope"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema">
<soap:Body>
<GetCardResponse xmlns="http://tempuri.org/">
<GetCardResult>
<ReturnResult>
<Return>
<ReturnMsgNo>1</ReturnMsgNo>
<ReturnMsg>交易成功</ReturnMsg>
</Return>
<GetCardResult>
<OrdTable>
<Facno>1234</Facno>
<TrdDate>2015/5/6 11:04:20</TrdDate>
<TrdSeq>ABCD1234</TrdSeq>
<TrdBarCode>123456789</TrdBarCode>
</OrdTable>
<PrdTable>
<GameFacName></GameFacName>
<PrdName>abc123</PrdName>
<CardId>ABCD012345</CardId>
<CardPwd>KKSDHASBDH</CardPwd>
<ExpDate>2015/02/12</ExpDate>
</PrdTable>
<PrdInfoTable>
<PrdNote>* 測12346666666666666666666666666666666</PrdNote>
</PrdInfoTable>
<PrdInfoTable>
<PrdNote>* 測56787777777777777777777</PrdNote>
</PrdInfoTable>
<PrdInfoTable>
<PrdNote>* 測12345611111111111111111</PrdNote>
</PrdInfoTable>
<OrdInfoTable>
<TxetContent>測1111111111111111111111111111111111111111</TxetContent>
</OrdInfoTable>
<OrdInfoTable>
<TxetContent>22222測22222222222222222222222222222222222</TxetContent>
</OrdInfoTable>
<OrdInfoTable>
<TxetContent>3333333333333333333333測333333333333333333</TxetContent>
</OrdInfoTable>
<OrdInfoTable>
<TxetContent>4444444測444444444444444444444444444444444</TxetContent>
</OrdInfoTable>
<OrdInfoTable>
<TxetContent>55555555555555555555555555555測55555555555</TxetContent>
</OrdInfoTable>
<FreeSnTable />
</GetCardResult>
</ReturnResult>
</GetCardResult>
</GetCardResponse>
</soap:Body>
Below is the code:
HttpClient httpClient = new HttpClient();
PostMethod post = new PostMethod(url);
post.setRequestEntity(new StringRequestEntity(xmlRequest.toString()));
post.setRequestHeader("Content-type", "application/soap+xml; charset=utf-8");
post.setRequestHeader("Content-Length", xmlRequest.length()+"");
responseCode = httpClient.executeMethod(post);
InputStream in = post.getResponseBodyAsStream();
BufferedReader reader = new BufferedReader(new InputStreamReader(in, "UTF-8"));
String line;
String xmlresponse = "";
while( ( line = reader.readLine() ) != null ) {
xmlresponse = xmlresponse+line;
}
System.out.println("XML Response:\n");
xmlresponse = xmlresponse.replaceAll("<", "<");
xmlresponse = xmlresponse.replaceAll(">", ">");
System.out.println(xmlresponse+"\n");
// clean everything
reader.close();
XMLTag xmlTag = XMLDoc.from(xmlresponse, true);
System.out.println("\n" + xmlTag.gotoRoot().gotoChild().gotoChild().gotoTag("GetCardResult/ReturnResult/GetCardResult/PrdInfoTable").getCurrentTagName());
System.out.println("PrdNote: "+ xmlTag.gotoRoot().gotoChild().gotoChild().getText("GetCardResult/ReturnResult/GetCardResult/PrdInfoTable/PrdNote[1]"));
System.out.println("\n" + xmlTag.gotoRoot().gotoChild().gotoChild().gotoTag("GetCardResult/ReturnResult/GetCardResult/PrdInfoTable").getCurrentTagName());
System.out.println("PrdNote: "+ xmlTag.gotoRoot().gotoChild().gotoChild().getText("GetCardResult/ReturnResult/GetCardResult/PrdInfoTable/PrdNote[2]"));

Another easy way to achieve this is using XPath expressions.
This code reads a xml file (tested with your XML code), look for PrdNote and TxetContent elements. The method extractNodesValues return an ArrayList<String> with it's values:
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
public class Test{
public static void main(String[] args){
try {
File myXmlFile = new File("src/test.xml");
String xpath_PrdNotes = "/Envelope/Body/GetCardResponse/GetCardResult/ReturnResult/GetCardResult/PrdInfoTable/PrdNote/text()";
String xpath_TxetContent = "/Envelope/Body/GetCardResponse/GetCardResult/ReturnResult/GetCardResult/OrdInfoTable/TxetContent/text()";
ArrayList<String> prdNotesValues = extractNodesValues(myXmlFile, xpath_PrdNotes );
ArrayList<String> txetContentValues = extractNodesValues(myXmlFile, xpath_TxetContent );
System.out.println("PrdNotesValues:");
for(String val : prdNotesValues){
System.out.println(val);
}
System.out.println("");
System.out.println("TxetContentValues:");
for(String val : txetContentValues){
System.out.println(val);
}
}
catch(XPathExpressionException e){ System.out.println(e.getMessage()); }
catch(IOException e){ System.out.println(e.getMessage()); }
catch(SAXException e){ System.out.println(e.getMessage()); }
catch(ParserConfigurationException e){ System.out.println(e.getMessage()); }
}
public static ArrayList<String> extractNodesValues(File f, String xpath_expression) throws XPathExpressionException, IOException, SAXException, ParserConfigurationException {
Document xmlDocument;
DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder;
XPath xPath;
NodeList nodeList;
Node n;
ArrayList<String> result;
builder = builderFactory.newDocumentBuilder();
xmlDocument = builder.parse(f);
xPath = XPathFactory.newInstance().newXPath();
result = new ArrayList<String>();
// here all values from PrdNote elements are stored
nodeList = (NodeList)xPath.compile(xpath_expression).evaluate(xmlDocument, XPathConstants.NODESET);
if(nodeList != null && nodeList.getLength() > 0) {
//iterate over all obtained nodes matching the xpath expression
for(int i=0; i<nodeList.getLength(); i++){
result.add(nodeList.item(i).getNodeValue());
}
}
return result;
}
}
Output:
PrdNotesValues:
* 測12346666666666666666666666666666666
* 測56787777777777777777777
* 測12345611111111111111111
TxetContentValues:
測1111111111111111111111111111111111111111
22222測22222222222222222222222222222222222
3333333333333333333333測333333333333333333
4444444測444444444444444444444444444444444
55555555555555555555555555555測55555555555
Hope this helps.

Related

No getting desired XML output in Java

I am converting CSV file to XML , it is converting but not getting desired structured output .
My java Code :-
public static void main(String[] args){
List<String> headers=new ArrayList<String>(5);
File file=new File("C:/Users/Admin/Desktop/data.csv");
BufferedReader reader=null;
try {
DocumentBuilderFactory domFactory =DocumentBuilderFactory.newInstance();
DocumentBuilder domBuilder=domFactory.newDocumentBuilder();
Document newDoc=domBuilder.newDocument();
// Root element
Element rootElement=newDoc.createElement("root");
newDoc.appendChild(rootElement);
reader = new BufferedReader(new FileReader(file));
int line=0;
String text=null;
while ((text=reader.readLine())!=null) {
StringTokenizer st=new StringTokenizer(text, "?", false);
String[] rowValues=new String[st.countTokens()];
int index=0;
while (st.hasMoreTokens()) {
String next=st.nextToken();
rowValues[index++]=next;
}
//String[] rowValues = text.split(",");
if (line == 0) { // Header row
for (String col:rowValues) {
headers.add(col);
Element rowElement=newDoc.createElement("header");
rootElement.appendChild(rowElement);
for (int col1=0;col1<headers.size();col1++) {
String header = headers.get(col1);
String value = null;
if (col1<rowValues.length) {
value=rowValues[col1];
} else {
// ?? Default value
value=" ";
}
rowElement.setTextContent(value);
System.out.println(headers+" "+value);
}
}} else { // Data row
Element rowElement=newDoc.createElement("row");
rootElement.appendChild(rowElement);
for (int col=0;col<headers.size();col++) {
String header = headers.get(col);
String value = null;
if (col<rowValues.length) {
value=rowValues[col];
} else {
// ?? Default value
value=" ";
}
rowElement.setTextContent(value);
System.out.println(header+" "+value);
}
}
line++;
}
try {
TransformerFactory tranFactory = TransformerFactory.newInstance();
Transformer aTransformer = tranFactory.newTransformer();
aTransformer.setOutputProperty(OutputKeys.INDENT, "yes");
aTransformer.setOutputProperty(OutputKeys.METHOD, "xml");
aTransformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "4");
Source src = new DOMSource(newDoc);
Result result = new StreamResult(new File("C:/Users/Admin/Desktop/data.xml"));
aTransformer.transform(src, result);
System.out.println("File creation successfully!");
} catch (Exception exp) {
exp.printStackTrace();
} finally {
try {
} catch (Exception e1) {
}
try {
} catch (Exception e1) {
}
}
} catch (Exception e1) {
e1.printStackTrace();
}
}
This is my CSV file:-
Symbol,Open,High,Low,Last Traded Price,Change
"NIFTY 50","9,645.90","9,650.65","9,600.95","9,609.30","-5.70"
"RELIANCE","1,390.00","1,414.20","1,389.00","1,407.55","26.50"
"BPCL","647.70","665.00","645.95","660.10","10.75"
"ADANIPORTS","368.50","373.80","368.00","372.25","4.25"
"ONGC","159.50","161.75","159.35","160.80","1.70"
And this is the output I am getting:-
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<root>
<header>Symbol,Open,High,Low,Last Traded Price,Change</header>
<row>"NIFTY 50","9,645.90","9,650.65","9,600.95","9,609.30","-5.70"</row>
<row>"RELIANCE","1,390.00","1,414.20","1,389.00","1,407.55","26.50"</row>
<row>"BPCL","647.70","665.00","645.95","660.10","10.75"</row>
<row>"ADANIPORTS","368.50","373.80","368.00","372.25","4.25"</row>
<row>"ONGC","159.50","161.75","159.35","160.80","1.70"</row>
</root>
Suggest me where am I going wrong ? I tried according to me , but getting confuse where in header and row section should I make changes.
ADDED :-
Expected output
<root>
<header>symbol</header>
<row>NIFTY 50</row>
<row>RELIANCE</row>
<row>BPCL></row>
.
.
<header>Open</header>
<row>9,645.90</row>
<row>1,390.00</row>
.
.
</root>
For your reference:
import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.List;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Result;
import javax.xml.transform.Source;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
import org.apache.commons.csv.QuoteMode;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
public class CsvToXml {
public static void main(String[] args) {
File inputFile = new File("C:/Users/Admin/Desktop/data.csv");
CSVParser inParser = null;
Document newDoc = null;
try {
inParser = CSVParser.parse(inputFile, StandardCharsets.UTF_8,
CSVFormat.EXCEL.withHeader().withQuoteMode(QuoteMode.NON_NUMERIC));
DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder domBuilder = domFactory.newDocumentBuilder();
newDoc = domBuilder.newDocument();
// Root element
Element rootElement = newDoc.createElement("root");
newDoc.appendChild(rootElement);
List<CSVRecord> records = inParser.getRecords();
for (String key : inParser.getHeaderMap().keySet()) {
Element rowElement = newDoc.createElement("header");
rootElement.appendChild(rowElement);
rowElement.setTextContent(key);
for (CSVRecord record : records) {
rowElement = newDoc.createElement("row");
rootElement.appendChild(rowElement);
rowElement.setTextContent(record.get(key));
}
}
TransformerFactory tranFactory = TransformerFactory.newInstance();
Transformer aTransformer = tranFactory.newTransformer();
aTransformer.setOutputProperty(OutputKeys.INDENT, "yes");
aTransformer.setOutputProperty(OutputKeys.METHOD, "xml");
aTransformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "4");
Source src = new DOMSource(newDoc);
Result result = new StreamResult(new File("C:/Users/Admin/Desktop/data.xml"));
aTransformer.transform(src, result);
System.out.println("File creation successfully!");
} catch (Exception e) {
e.printStackTrace();
} finally {
if (inParser != null) {
try {
inParser.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
}
This is using Apache Commons CSV.

how to convert xml to avro without ignoring !CDATA content?

I have the following source XML file named customers.xml:
<?xml version="1.0" encoding="utf-8"?>
<p:CustomerElement xmlns:p="http://www.dog.com/customer" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:schemaLocation="http://www.dog.com/customer Customer.xsd">
<Customer>
<Sender>
<transmitDate>2016-02-21T00:00:00</transmitDate>
<transmitter>Dog ETL v2.0</transmitter>
<dealerCode><![CDATA[P020]]></dealerCode>
<DMSSystem><![CDATA[DBS]]></DMSSystem>
<DMSReleaseNumber><![CDATA[5.0]]></DMSReleaseNumber>
</Sender>
<Identifier>
<updateInd><![CDATA[A]]></updateInd>
<dealerCustNumber><![CDATA[AMADOR]]></dealerCustNumber>
<dealerCustName><![CDATA[AMADOR COMPUTERS]]></dealerCustName>
<phoneNumber><![CDATA[800 111 4444]]></phoneNumber>
<faxNumber><![CDATA[780 111 4444]]></faxNumber>
<email xsi:nil="true" />
<customerType><![CDATA[R]]></customerType>
<activeCustomerInd>false</activeCustomerInd>
<parentCustomerNumber xsi:nil="true" />
<primaryStoreNumber><![CDATA[00]]></primaryStoreNumber>
<preferredLanguage><![CDATA[ENG]]></preferredLanguage>
<dealerDateInSystem>2000-01-11T00:00:00</dealerDateInSystem>
<dealerLastUpdatedDate>2015-02-05T00:00:00</dealerLastUpdatedDate>
</Identifier>
<Location>
<address2><![CDATA[ACCOUNT FLAGGED FOR DELETION]]></address2>
<address3><![CDATA[AS PER BILL FEB AA/15]]></address3>
<city><![CDATA[CHICAGO]]></city>
<postalCode><![CDATA[Q5S 1E5]]></postalCode>
<state><![CDATA[AB]]></state>
<country><![CDATA[CA]]></country>
<location><![CDATA[FLAGGED FOR DELETION]]></location>
<addressType><![CDATA[M]]></addressType>
</Location>
<Division>
<divisionCode><![CDATA[G]]></divisionCode>
<divisionName><![CDATA[CAR]]></divisionName>
<IndustryCode>
<industryCode><![CDATA[AQ99]]></industryCode>
<primaryIndustryCodeInd>true</primaryIndustryCodeInd>
</IndustryCode>
<SalesRep>
<number><![CDATA[XXX]]></number>
<name><![CDATA[KILL ACCOUNT IN PROCESS]]></name>
<type><![CDATA[M]]></type>
<par>0</par>
<email xsi:nil="true" />
<phoneNumber><![CDATA[000 000 0000]]></phoneNumber>
</SalesRep>
</Division>
</Customer>
<Customer>
<Sender>
<transmitDate>2016-02-21T00:00:00</transmitDate>
<transmitter>Dog ETL v2.0</transmitter>
<dealerCode><![CDATA[P000]]></dealerCode>
<DMSSystem><![CDATA[DBS]]></DMSSystem>
<DMSReleaseNumber><![CDATA[5.0]]></DMSReleaseNumber>
</Sender>
<Identifier>
<updateInd><![CDATA[A]]></updateInd>
<dealerCustNumber><![CDATA[UU20888]]></dealerCustNumber>
<dealerCustName><![CDATA[ ADVERTISING AND PR]]></dealerCustName>
<phoneNumber xsi:nil="true" />
<faxNumber xsi:nil="true" />
<email xsi:nil="true" />
<customerType><![CDATA[I]]></customerType>
<activeCustomerInd>true</activeCustomerInd>
<parentCustomerNumber xsi:nil="true" />
<primaryStoreNumber><![CDATA[M2]]></primaryStoreNumber>
<preferredLanguage><![CDATA[ENG]]></preferredLanguage>
<dealerDateInSystem>2015-11-18T00:00:00</dealerDateInSystem>
<dealerLastUpdatedDate>2015-11-19T00:00:00</dealerLastUpdatedDate>
</Identifier>
<Location>
<address2><![CDATA[EQUIP]]></address2>
<city><![CDATA[ADER]]></city>
<country><![CDATA[CA]]></country>
<addressType><![CDATA[M]]></addressType>
</Location>
<Division>
<divisionCode><![CDATA[A]]></divisionCode>
<divisionName><![CDATA[AGRO]]></divisionName>
<IndustryCode>
<industryCode><![CDATA[EQ00]]></industryCode>
<primaryIndustryCodeInd>true</primaryIndustryCodeInd>
</IndustryCode>
</Division>
</Customer>
</p:CustomerElement>
I have the following java code, which parses customers.xml into individual "Customer" entities, and then attempts to convert each of them into an AVRO format:
package com.dogsoft.data.xmltoavro;
import java.io.*;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.FactoryConfigurationError;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import jdk.nashorn.internal.runtime.regexp.joni.constants.NodeType;
import org.w3c.dom.Attr;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import org.apache.avro.Protocol;
import org.apache.avro.Schema;
import org.apache.avro.file.DataFileReader;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.generic.GenericArray;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.io.DatumReader;
import org.apache.avro.io.DatumWriter;
import org.apache.avro.specific.SpecificDatumWriter;
import org.apache.avro.util.Utf8;
import org.w3c.dom.*;
import org.xml.sax.SAXException;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Pattern;
public class ParseXmlFile {
private static Protocol protocol;
public static void xmlToAvro(File xmlFile, File avroFile) throws IOException, SAXException {
try {
InputStream stream = new FileInputStream("/tmp/xml.avsc");
if (stream == null) throw new IllegalStateException("Classpath should include xml.avsc");
protocol = Protocol.parse(stream);
} catch (IOException e) {
throw new RuntimeException(e);
}
Schema schema = protocol.getType("Element");
Document doc = parse(xmlFile);
DatumWriter<GenericRecord> datumWriter = new SpecificDatumWriter<>(schema);
try (DataFileWriter<GenericRecord> fileWriter = new DataFileWriter<>(datumWriter)) {
fileWriter.create(schema, avroFile);
Object docElement = doc.getDocumentElement();
fileWriter.append(wrapElement(doc.getDocumentElement()));
}
}
private static GenericData.Record wrapElement(Element el) {
GenericData.Record record = new GenericData.Record(protocol.getType("Element"));
record.put("name", el.getNodeName());
NamedNodeMap attributeNodes = el.getAttributes();
List<GenericData.Record> attrRecords = new ArrayList<>();
for (int i = 0; i < attributeNodes.getLength(); i++) {
Attr attr = (Attr) attributeNodes.item(i);
attrRecords.add(wrapAttr(attr));
}
record.put("attributes", attrRecords);
List<Object> childArray = new ArrayList<>();
NodeList childNodes = el.getChildNodes();
for (int i = 0; i < childNodes.getLength(); i++) {
Node node = childNodes.item(i);
Object nt = node.getNodeType();
if (node.getNodeType() == Node.ELEMENT_NODE)
childArray.add(wrapElement((Element) node));
if (node.getNodeType() == Node.TEXT_NODE)
childArray.add(node.getTextContent());
}
record.put("children", childArray);
return record;
}
private static GenericData.Record wrapAttr(Attr attr) {
GenericData.Record record = new GenericData.Record(protocol.getType("Attribute"));
record.put("name", attr.getName());
record.put("value", attr.getValue());
return record;
}
private static Document parse(File file) throws IOException, SAXException {
try {
DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
return builder.parse(file);
} catch (ParserConfigurationException e) {
throw new RuntimeException(e);
}
}
public static void avroToXml(File avroFile, File xmlFile) throws IOException {
try {
InputStream stream = new FileInputStream("/tmp/xml.avsc");
if (stream == null) throw new IllegalStateException("Classpath should include xml.avsc");
protocol = Protocol.parse(stream);
} catch (IOException e) {
throw new RuntimeException(e);
}
DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(protocol.getType("Element"));
DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(avroFile, datumReader);
GenericRecord record = dataFileReader.next();
Document doc;
try {
doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
} catch (ParserConfigurationException e) {
throw new RuntimeException(e);
}
Element el = unwrapElement(record, doc);
doc.appendChild(el);
saveDocument(doc, xmlFile);
}
private static Element unwrapElement(GenericRecord record, Document doc) {
String name = "" + record.get("name");
Element el = doc.createElement(name);
#SuppressWarnings("unchecked")
GenericArray<GenericRecord> attrArray = (GenericArray<GenericRecord>) record.get("attributes");
for (GenericRecord attrRecord : attrArray)
el.setAttributeNode(unwrapAttr(attrRecord, doc));
#SuppressWarnings("unchecked")
GenericArray<Object> childArray = (GenericArray<Object>) record.get("children");
for (Object childObj : childArray) {
if (childObj instanceof GenericRecord)
el.appendChild(unwrapElement((GenericRecord) childObj, doc));
if (childObj instanceof Utf8)
el.appendChild(doc.createTextNode("" + childObj));
}
return el;
}
private static Attr unwrapAttr(GenericRecord record, Document doc) {
Attr attr = doc.createAttribute("" + record.get("name"));
attr.setValue("" + record.get("value"));
return attr;
}
private static void saveDocument(Document doc, File file) {
try {
Transformer transformer = TransformerFactory.newInstance().newTransformer();
transformer.transform(new DOMSource(doc), new StreamResult(file));
} catch (TransformerException e) {
throw new RuntimeException(e);
}
}
public static void main(String[] args)
{
Object nodeObject = null;
Node myNode = null;
Transformer transformer = null;
try
{
try {
transformer =
TransformerFactory.newInstance().newTransformer();
} catch (TransformerConfigurationException e) {
e.printStackTrace();
}
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
dbf.setNamespaceAware(true);
DocumentBuilder db = dbf.newDocumentBuilder();
Document doc = db.parse("/tmp/customers.xml");
System.out.printf("Version = %s%n", doc.getXmlVersion());
System.out.printf("Encoding = %s%n", doc.getXmlEncoding());
System.out.printf("Standalone = %b%n%n", doc.getXmlStandalone());
if (doc.hasChildNodes())
{
int customerNumber = 0;
NodeList nl = doc.getDocumentElement().getChildNodes();
for (int i = 0; i < nl.getLength(); i++) {
Node node = nl.item(i);
if (node.getNodeType() == Node.ELEMENT_NODE) {
System.out.println(node.toString());
customerNumber++;
File avroFile = new File("/tmp/customer" + customerNumber + ".avro");
File xmlFile = new File("/tmp/customer" + customerNumber + ".xml");
File xmlFile1 = new File("/tmp/customer" + customerNumber + "-foo.xml");
try {
transformer.transform(
new DOMSource(node), new StreamResult(xmlFile));
File outputFile = new File("/tmp/customer" + customerNumber + ".avro");
xmlToAvro(xmlFile, outputFile);
} catch (TransformerException e) {
e.printStackTrace();
}
}
}
}
}
catch (IOException ioe)
{
System.err.println("IOE: " + ioe);
}
catch (SAXException saxe)
{
System.err.println("SAXE: " + saxe);
}
catch (FactoryConfigurationError fce)
{
System.err.println("FCE: " + fce);
}
catch (ParserConfigurationException pce)
{
System.err.println("PCE: " + pce);
}
}
}
This code works overall, but it ignores any content, which is enclosed into
![CDATA[
tag. As it happens, most of the actual useful data in the customers.xml files is enclosed into these tags.
Is there a way to modify this code, to make it not ignore the CDATA contents?
Instead of hand-writing parser code, you might want to split the problem in two parts: first, bind XML into POJO (using JAXB or Jackson XML module); and then write POJO as Avro (using Apache Avro lib, or Jackson Avro module). All you need for that would be POJO definition that matches expected structure for data as XML and Avro. Result should be less code, and basically specifying what needs to happen and now how to do it.

Parsing XML with XmlUtils

I am using XmlUtils to parse and extract the values of the id attribute in a List but it returns empty.
Where am I going wrong ? Please suggest
XML:
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<address-book xmlns="qwerty" xmlns:ab2="asdfgh">
<contact time-stamp="2014-02-26T16:35:20.678+02:00" id="12345">
<ns2:person-details index="9AmmUzHXBPsK:96">
<ns2:name index="1">
<ns2:name-entry index="1">
<ns2:display-name>DISP0dNXoq</ns2:display-name>
<ns2:given display-order="1">GIVENw17JCb</ns2:given>
<ns2:family display-order="1">FAMILYcl7h2y</ns2:family>
</ns2:name-entry>
</ns2:name>
<ns2:comm-addr xml:lang="en">
<ns2:uri-entry addr-uri-type="trn" index="1:1111">
<ns2:addr-uri>cnaFC#hOog6.com</ns2:addr-uri>
<ns2:label xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:nil="true"/>
</ns2:uri-entry>
<ns2:tel tel-type="Home" index="2:22222">
<ns2:tel-nb>
<ns2:tel-str>97235852622</ns2:tel-str>
</ns2:tel-nb>
</ns2:tel>
<ns2:tel tel-type="Work" index="3:33333">
<ns2:tel-nb>
<ns2:tel-str>97230557837</ns2:tel-str>
</ns2:tel-nb>
</ns2:tel>
<ns2:tel tel-type="Mobile" index="3:33333">
<ns2:tel-nb>
<ns2:tel-str>972542993697</ns2:tel-str>
</ns2:tel-nb>
</ns2:tel>
</ns2:comm-addr>
</ns2:person-details>
<contact-status>
<contact-source>contact-source-sim-1393425320678</contact-source>
</contact-status>
</contact>
<contact time-stamp="2014-02-26T16:37:19.370+02:00" id="12346">
<contact time-stamp="2014-02-26T16:38:53.345+02:00" id="12347">
<contact time-stamp="2014-02-26T16:37:30.828+02:00" id="12348">
Code:
Document document = XmlUtils.createDocument(responseString);
List<Element> list = document.getRootElement().getChildren("address-book");
for( Element ele : list){
System.out.println(ele.getChild("contact").getAttribute("id").getValue());
}
Class XmlUtils-
import org.jdom.Document;
import org.jdom.Element;
import org.jdom.JDOMException;
import org.jdom.input.SAXBuilder;
import org.jdom.output.Format;
import org.jdom.output.XMLOutputter;
import org.xml.sax.InputSource;
import org.apache.log4j.Logger;
import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import java.io.FileWriter;
public class XmlUtils
{
private static final Logger logger = Logger.getLogger(XmlUtils.class);
public static String getFormatedXMLString(String doc) throws JDOMException, IOException
{
return ( makeDomToFormatedString( createDocument(doc) ) ) ;
}
public static String makeDomToFormatedString(Document doc)
{
return makeDomToFormatedString(doc.getRootElement());
}
public static String makeDomToFormatedString(Element elem)
{
XMLOutputter output = new XMLOutputter();
Format format = Format.getPrettyFormat();
format.setExpandEmptyElements( true );
format.setTextMode( Format.TextMode.TRIM_FULL_WHITE );
output.setFormat( format );
return output.outputString(elem);
}
public static Document createDocument(String xml) throws JDOMException, IOException
{
InputSource in = new InputSource(new StringReader(xml));
SAXBuilder saxB = new SAXBuilder();
return ((saxB.build(in)));
}
public static Element createElement(File xmlFile) throws JDOMException, IOException
{
SAXBuilder saxB = new SAXBuilder();
Document document = saxB.build(xmlFile);
return document.getRootElement();
}
public static void writeXmlFile(Document doc,String path){
try {
XMLOutputter xmlOutputer = new XMLOutputter();
xmlOutputer.setFormat( Format.getPrettyFormat() );
xmlOutputer.output( doc , new FileWriter( path ) );
} catch (IOException e) {
logger.error("cant write xml file",e);
}
}
}
Here we go
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.StringReader;
import java.util.List;
import org.jdom.Document;
import org.jdom.Element;
import org.jdom.JDOMException;
import org.jdom.Namespace;
import org.jdom.input.SAXBuilder;
import org.jdom.output.Format;
import org.jdom.output.XMLOutputter;
import org.xml.sax.InputSource;
public class XmlUtils
{
public static void main(String[] args) throws JDOMException, IOException {
String test="<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?><address-book xmlns=\"qwerty\" xmlns:ab2=\"asdfgh\"><contact time-stamp=\"2014-02-26T16:35:20.678+02:00\" id=\"12345\"></contact><contact time-stamp=\"2014-02-26T16:37:19.370+02:00\" id=\"12346\"></contact><contact time-stamp=\"2014-02-26T16:38:53.345+02:00\" id=\"12347\"></contact><contact time-stamp=\"2014-02-26T16:37:30.828+02:00\" id=\"12348\"></contact></address-book>";
Document document = XmlUtils.createDocument(test);
Element rootNode=document.getRootElement();
Namespace namespace=Namespace.getNamespace("qwerty");
rootNode.setNamespace(namespace);
List list = rootNode.getChildren("contact",namespace);
for (int i = 0; i < list.size(); i++) {
Element node = (Element) list.get(i);
System.out.println("id values using Style 1 : " + node.getAttribute("id").getValue());
}
List<Element> list2 = document.getRootElement().getChildren("contact",namespace);
for( Element ele : list2){
System.out.println(ele.getAttribute("id").getValue());
}
}
public static String getFormatedXMLString(String doc) throws JDOMException, IOException
{
return ( makeDomToFormatedString( createDocument(doc) ) ) ;
}
public static String makeDomToFormatedString(Document doc)
{
return makeDomToFormatedString(doc.getRootElement());
}
public static String makeDomToFormatedString(Element elem)
{
XMLOutputter output = new XMLOutputter();
Format format = Format.getPrettyFormat();
format.setExpandEmptyElements( true );
format.setTextMode( Format.TextMode.TRIM_FULL_WHITE );
output.setFormat( format );
return output.outputString(elem);
}
public static Document createDocument(String xml) throws JDOMException, IOException
{
InputSource in = new InputSource(new StringReader(xml));
SAXBuilder saxB = new SAXBuilder();
return ((saxB.build(in)));
}
public static Element createElement(File xmlFile) throws JDOMException, IOException
{
SAXBuilder saxB = new SAXBuilder();
Document document = saxB.build(xmlFile);
return document.getRootElement();
}
public static void writeXmlFile(Document doc,String path){
try {
XMLOutputter xmlOutputer = new XMLOutputter();
xmlOutputer.setFormat( Format.getPrettyFormat() );
xmlOutputer.output( doc , new FileWriter( path ) );
} catch (IOException e) {
e.printStackTrace();
}
}
}
output will be
id values using Style 1 : 12345
id values using Style 1 : 12346
id values using Style 1 : 12347
id values using Style 1 : 12348
12345
12346
12347
12348
let me know if u face any issues :)

Parsing XML into a Hashmap Java

I have some information stored in XML, and I need to parse XML and store some of the values in Hashmap.
Here is the XML:
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<sections xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<section ID="1">
<Room>Room1</Room>
<Capactiy>25</Capactiy>
<Approval_Mode>personally</Approval_Mode>
<Building>Building1</Building>
<Address>Streer, 1. Stock, links</Address>
<Room_Number>ZA0115</Room_Number>
<CoordLt>16.412094</CoordLt>
<CoordLn>48.19719</CoordLn>
</section>
<section ID="2">
<Room>Room2</Room>
<Capactiy>120</Capactiy>
<Institute>E401</Institute>
<Approval_Mode>personally</Approval_Mode>
<Building>Building2</Building>
<Address>Street 2, Building2, Stiege 7, 1.Stock</Address>
<Room_Number>AH0105</Room_Number>
<CoordLt>16.369865</CoordLt>
<CoordLn>48.199006</CoordLn>
</section>
----
I want that key be:Room1 and values: 16.412094,48.19719 (example for Section ID=1)
That is example for first section.I have more than 100 section so I would like to store key and values for every section like I explained for the first example.
Output would be:
Room1: 16.412094,48.19719;
Room2: 16.369865,48.199006;
Room3: 16,48;
.
.
.
Room100: 16,49;
Can anyone help me?
Here is my code:
import java.io.File;
import java.sql.ResultSet;
import java.util.HashMap;
import org.xml.sax.*;
import org.xml.sax.helpers.DefaultHandler;
import javax.xml.parsers.SAXParserFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
public class XML extends DefaultHandler
{
static HashMap<StringBuffer, String> hashMap;
String elementName;
StringBuffer elementValue;
private HashMap<String, String> newMap;
public static void main(String[] args)
{
DefaultHandler handler = new XML();
SAXParserFactory factory = SAXParserFactory.newInstance();
try
{
hashMap = new HashMap<StringBuffer, String>();
//out = new OutputStreamWriter(System.out, "UTF8");
SAXParser saxParser = factory.newSAXParser();
saxParser.parse(new File("xml1.xml"), handler);
System.out.println(hashMap);
}
catch(Throwable t)
{
t.printStackTrace();
}
System.exit(0);
}
public void startElement(String namespaceURI, String sName, String qName, Attributes attrs)
throws SAXException
{
String eName = sName;
if("".equals(eName)) eName = qName;
elementName = eName;
if(attrs != null)
{
for(int i = 0; i < attrs.getLength(); i++)
{
String aName = attrs.getLocalName(i);
if("".equals(aName)) aName = attrs.getQName(i);
}
}
}
public void endElement(String namespaceURI, String sName, String qName)
throws SAXException
{
String eName = sName;
if("".equals(eName)) eName = qName;
if(eName.equals(elementName))
hashMap.put(elementValue,""+ elementName );
elementValue = null;
}
public void characters(char[] ch, int start, int length)
throws SAXException
{
String str = new String(ch, start, length);
if(elementValue == null)
elementValue = new StringBuffer(str);
else
elementValue.append(str);
}
}
With this code I don't get desired output.
output is:
Room=Room1, Capacity=25......
Assume your xml file is "c:/test.xml"
Then use the following code to read and put into a hash map in in the following format as you said
key=Roomnumber value=CoordLt,CoordLn
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.naming.spi.DirStateFactory.Result;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Source;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
public class xml {
public static void main(String[] args)
{
HashMap<String,String>hMap=new HashMap<String, String>();
File file=new File("c:/test.xml");
if(file.exists())
{
DocumentBuilderFactory factory=DocumentBuilderFactory.newInstance();
try
{
DocumentBuilder builder = factory.newDocumentBuilder();
Document document=builder.parse(file);
Element documentElement=document.getDocumentElement();
NodeList sList=documentElement.getElementsByTagName("section");
if (sList != null && sList.getLength() > 0)
{
for (int i = 0; i < sList.getLength(); i++)
{
Node node = sList.item(i);
if(node.getNodeType()==Node.ELEMENT_NODE)
{
Element e = (Element) node;
NodeList nodeList = e.getElementsByTagName("Room");
String roomName= nodeList.item(0).getChildNodes().item(0)
.getNodeValue();
nodeList = e.getElementsByTagName("CoordLt");
String coordValues= nodeList.item(0).getChildNodes().item(0)
.getNodeValue();
nodeList = e.getElementsByTagName("CoordLn");
coordValues=coordValues+","+ nodeList.item(0).getChildNodes().item(0)
.getNodeValue();
hMap.put(roomName, coordValues);
}
}
}
} catch(Exception e){
System.out.println("exception occured");
}
}else
{
System.out.println("File not exists");
}
}
}
If you transform the XML using this xslt (which can be done in Java) you get your desired output, If someone sle knows howto load in a hashmap you'll be fine.
<?xml version="1.0" encoding="utf-8"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="text" indent="no" omit-xml-declaration ="yes" />
<xsl:template match="/sections">
<xsl:apply-templates select="section"/>
</xsl:template>
<xsl:template match="section" xml:space="default">
<xsl:apply-templates select="Room"/>
<xsl:text>:</xsl:text>
<xsl:apply-templates select="CoordLt" />
<xsl:text>,</xsl:text>
<xsl:apply-templates select="CoordLn"/>
<xsl:text>;</xsl:text>
</xsl:template>
</xsl:stylesheet>

Java DOM: cannot write adapted XML to file

I have the following simplified XML:
<?xml version="1.0" encoding="UTF-8"?>
<ExportData>
<Rows>
<R>
<companyCodestringtrue>101</companyCodestringtrue>
<transactionQualifierstring>Sales</transactionQualifierstring>
<menuItemNumberlong>4302150</menuItemNumberlong>
<productQuantityinttrue>14</productQuantityinttrue>
<productValueInclVATdecimaltrue>1.90</productValueInclVATdecimaltrue>
<productValueExclVATdecimaltrue>1.775701</productValueExclVATdecimaltrue>
</R>
<R>
<companyCodestringtrue>101</companyCodestringtrue>
<transactionQualifierstring>Sales</transactionQualifierstring>
<menuItemNumberlong>333555</menuItemNumberlong>
<productQuantityinttrue>0</productQuantityinttrue>
<productValueInclVATdecimaltrue>3.90</productValueInclVATdecimaltrue>
<productValueExclVATdecimaltrue>3.775701</productValueExclVATdecimaltrue>
</R>
<R>
<companyCodestringtrue>101</companyCodestringtrue>
<transactionQualifierstring>Sales</transactionQualifierstring>
<menuItemNumberlong>1235665</menuItemNumberlong>
<productQuantityinttrue>5</productQuantityinttrue>
<productValueInclVATdecimaltrue>4.90</productValueInclVATdecimaltrue>
<productValueExclVATdecimaltrue>4.775701</productValueExclVATdecimaltrue>
</R>
</Rows>
</ExportData>
I need to delete each complete <R> element if the <productQuantityinttrue> element equals "0".
I came up with the following Java code:
package filterPositions;
import java.io.File;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.Result;
import javax.xml.transform.Source;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
public class FilterPositions {
public static String result = "";
public static void main(String[] args) throws Exception {
try {
DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder docBuilder = docFactory.newDocumentBuilder();
File filePath = new File("C:/LSA_SALES_EXPORT_1507_test_zero_qu.xml");
Document doc = docBuilder.parse(filePath);
Node rootNode = doc.getDocumentElement();
final Element element = doc.getDocumentElement();
// output new XML Document
DocumentBuilder parser = docFactory.newDocumentBuilder();
Document newdoc = parser.newDocument();
newdoc.adoptNode(traversingXML(element));
writeXmlFile(newdoc, "LSA_SALES_EXPORT_1507_test_zero_qu_OUT.xml");
System.out.println("Done...");
System.out.println("Exiting...");
} catch (Exception e) {
e.printStackTrace();
}
}
public static Element traversingXML(Element element) {
NodeList positionen = element.getElementsByTagName("R");
Element e = null;
for (int i = 0; i < positionen.getLength(); i++) {
e = (Element) positionen.item(i);
for (Node child = e.getFirstChild(); child != null; child = child.getNextSibling()) {
if (child instanceof Element && "productQuantityinttrue".equals(child.getNodeName())&& "0".equals(child.getTextContent())) {
e.getParentNode().removeChild(e);
}
}
}
System.out.println(e);
return e;
}
public static void writeXmlFile(Document doc, String filename) {
try {
// Prepare the DOM document for writing
Source source = new DOMSource();
// Prepare the output file
File file = new File(filename);
Result result = new StreamResult(file);
// Write the DOM document to the file
Transformer xformer = TransformerFactory.newInstance()
.newTransformer();
xformer.transform(source, result);
} catch (TransformerConfigurationException e) {
} catch (TransformerException e) {
}
}
}
I am not sure if my method "traversingXML" is working properly. My problem right now is that the adapted XML structure (one deleted) is not written to newdoc.
You don't copy the original document to newdoc; instead you create a new, empty XML document.
Instead, try this code:
...
final Element element = doc.getDocumentElement(); // original code up to here
traversingXML(element); // delete the node
writeXmlFile(doc, "LSA_SALES_EXPORT_1507_test_zero_qu_OUT.xml"); // save modified document

Categories

Resources