Parsing XML into a Hashmap Java

Parsing XML into a Hashmap Java - java

I have some information stored in XML, and I need to parse XML and store some of the values in Hashmap.
Here is the XML:
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<sections xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<section ID="1">
<Room>Room1</Room>
<Capactiy>25</Capactiy>
<Approval_Mode>personally</Approval_Mode>
<Building>Building1</Building>
<Address>Streer, 1. Stock, links</Address>
<Room_Number>ZA0115</Room_Number>
<CoordLt>16.412094</CoordLt>
<CoordLn>48.19719</CoordLn>
</section>
<section ID="2">
<Room>Room2</Room>
<Capactiy>120</Capactiy>
<Institute>E401</Institute>
<Approval_Mode>personally</Approval_Mode>
<Building>Building2</Building>
<Address>Street 2, Building2, Stiege 7, 1.Stock</Address>
<Room_Number>AH0105</Room_Number>
<CoordLt>16.369865</CoordLt>
<CoordLn>48.199006</CoordLn>
</section>
----
I want that key be:Room1 and values: 16.412094,48.19719 (example for Section ID=1)
That is example for first section.I have more than 100 section so I would like to store key and values for every section like I explained for the first example.
Output would be:
Room1: 16.412094,48.19719;
Room2: 16.369865,48.199006;
Room3: 16,48;
.
.
.
Room100: 16,49;
Can anyone help me?
Here is my code:
import java.io.File;
import java.sql.ResultSet;
import java.util.HashMap;
import org.xml.sax.*;
import org.xml.sax.helpers.DefaultHandler;
import javax.xml.parsers.SAXParserFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
public class XML extends DefaultHandler
{
static HashMap<StringBuffer, String> hashMap;
String elementName;
StringBuffer elementValue;
private HashMap<String, String> newMap;
public static void main(String[] args)
{
DefaultHandler handler = new XML();
SAXParserFactory factory = SAXParserFactory.newInstance();
try
{
hashMap = new HashMap<StringBuffer, String>();
//out = new OutputStreamWriter(System.out, "UTF8");
SAXParser saxParser = factory.newSAXParser();
saxParser.parse(new File("xml1.xml"), handler);
System.out.println(hashMap);
}
catch(Throwable t)
{
t.printStackTrace();
}
System.exit(0);
}
public void startElement(String namespaceURI, String sName, String qName, Attributes attrs)
throws SAXException
{
String eName = sName;
if("".equals(eName)) eName = qName;
elementName = eName;
if(attrs != null)
{
for(int i = 0; i < attrs.getLength(); i++)
{
String aName = attrs.getLocalName(i);
if("".equals(aName)) aName = attrs.getQName(i);
}
}
}
public void endElement(String namespaceURI, String sName, String qName)
throws SAXException
{
String eName = sName;
if("".equals(eName)) eName = qName;
if(eName.equals(elementName))
hashMap.put(elementValue,""+ elementName );
elementValue = null;
}
public void characters(char[] ch, int start, int length)
throws SAXException
{
String str = new String(ch, start, length);
if(elementValue == null)
elementValue = new StringBuffer(str);
else
elementValue.append(str);
}
}
With this code I don't get desired output.
output is:
Room=Room1, Capacity=25......

Assume your xml file is "c:/test.xml"
Then use the following code to read and put into a hash map in in the following format as you said
key=Roomnumber value=CoordLt,CoordLn
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.naming.spi.DirStateFactory.Result;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Source;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
public class xml {
public static void main(String[] args)
{
HashMap<String,String>hMap=new HashMap<String, String>();
File file=new File("c:/test.xml");
if(file.exists())
{
DocumentBuilderFactory factory=DocumentBuilderFactory.newInstance();
try
{
DocumentBuilder builder = factory.newDocumentBuilder();
Document document=builder.parse(file);
Element documentElement=document.getDocumentElement();
NodeList sList=documentElement.getElementsByTagName("section");
if (sList != null && sList.getLength() > 0)
{
for (int i = 0; i < sList.getLength(); i++)
{
Node node = sList.item(i);
if(node.getNodeType()==Node.ELEMENT_NODE)
{
Element e = (Element) node;
NodeList nodeList = e.getElementsByTagName("Room");
String roomName= nodeList.item(0).getChildNodes().item(0)
.getNodeValue();
nodeList = e.getElementsByTagName("CoordLt");
String coordValues= nodeList.item(0).getChildNodes().item(0)
.getNodeValue();
nodeList = e.getElementsByTagName("CoordLn");
coordValues=coordValues+","+ nodeList.item(0).getChildNodes().item(0)
.getNodeValue();
hMap.put(roomName, coordValues);
}
}
}
} catch(Exception e){
System.out.println("exception occured");
}
}else
{
System.out.println("File not exists");
}
}
}

If you transform the XML using this xslt (which can be done in Java) you get your desired output, If someone sle knows howto load in a hashmap you'll be fine.
<?xml version="1.0" encoding="utf-8"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="text" indent="no" omit-xml-declaration ="yes" />
<xsl:template match="/sections">
<xsl:apply-templates select="section"/>
</xsl:template>
<xsl:template match="section" xml:space="default">
<xsl:apply-templates select="Room"/>
<xsl:text>:</xsl:text>
<xsl:apply-templates select="CoordLt" />
<xsl:text>,</xsl:text>
<xsl:apply-templates select="CoordLn"/>
<xsl:text>;</xsl:text>
</xsl:template>
</xsl:stylesheet>

Related

how to convert xml to avro without ignoring !CDATA content?

I have the following source XML file named customers.xml:
<?xml version="1.0" encoding="utf-8"?>
<p:CustomerElement xmlns:p="http://www.dog.com/customer" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:schemaLocation="http://www.dog.com/customer Customer.xsd">
<Customer>
<Sender>
<transmitDate>2016-02-21T00:00:00</transmitDate>
<transmitter>Dog ETL v2.0</transmitter>
<dealerCode><![CDATA[P020]]></dealerCode>
<DMSSystem><![CDATA[DBS]]></DMSSystem>
<DMSReleaseNumber><![CDATA[5.0]]></DMSReleaseNumber>
</Sender>
<Identifier>
<updateInd><![CDATA[A]]></updateInd>
<dealerCustNumber><![CDATA[AMADOR]]></dealerCustNumber>
<dealerCustName><![CDATA[AMADOR COMPUTERS]]></dealerCustName>
<phoneNumber><![CDATA[800 111 4444]]></phoneNumber>
<faxNumber><![CDATA[780 111 4444]]></faxNumber>
<email xsi:nil="true" />
<customerType><![CDATA[R]]></customerType>
<activeCustomerInd>false</activeCustomerInd>
<parentCustomerNumber xsi:nil="true" />
<primaryStoreNumber><![CDATA[00]]></primaryStoreNumber>
<preferredLanguage><![CDATA[ENG]]></preferredLanguage>
<dealerDateInSystem>2000-01-11T00:00:00</dealerDateInSystem>
<dealerLastUpdatedDate>2015-02-05T00:00:00</dealerLastUpdatedDate>
</Identifier>
<Location>
<address2><![CDATA[ACCOUNT FLAGGED FOR DELETION]]></address2>
<address3><![CDATA[AS PER BILL FEB AA/15]]></address3>
<city><![CDATA[CHICAGO]]></city>
<postalCode><![CDATA[Q5S 1E5]]></postalCode>
<state><![CDATA[AB]]></state>
<country><![CDATA[CA]]></country>
<location><![CDATA[FLAGGED FOR DELETION]]></location>
<addressType><![CDATA[M]]></addressType>
</Location>
<Division>
<divisionCode><![CDATA[G]]></divisionCode>
<divisionName><![CDATA[CAR]]></divisionName>
<IndustryCode>
<industryCode><![CDATA[AQ99]]></industryCode>
<primaryIndustryCodeInd>true</primaryIndustryCodeInd>
</IndustryCode>
<SalesRep>
<number><![CDATA[XXX]]></number>
<name><![CDATA[KILL ACCOUNT IN PROCESS]]></name>
<type><![CDATA[M]]></type>
<par>0</par>
<email xsi:nil="true" />
<phoneNumber><![CDATA[000 000 0000]]></phoneNumber>
</SalesRep>
</Division>
</Customer>
<Customer>
<Sender>
<transmitDate>2016-02-21T00:00:00</transmitDate>
<transmitter>Dog ETL v2.0</transmitter>
<dealerCode><![CDATA[P000]]></dealerCode>
<DMSSystem><![CDATA[DBS]]></DMSSystem>
<DMSReleaseNumber><![CDATA[5.0]]></DMSReleaseNumber>
</Sender>
<Identifier>
<updateInd><![CDATA[A]]></updateInd>
<dealerCustNumber><![CDATA[UU20888]]></dealerCustNumber>
<dealerCustName><![CDATA[ ADVERTISING AND PR]]></dealerCustName>
<phoneNumber xsi:nil="true" />
<faxNumber xsi:nil="true" />
<email xsi:nil="true" />
<customerType><![CDATA[I]]></customerType>
<activeCustomerInd>true</activeCustomerInd>
<parentCustomerNumber xsi:nil="true" />
<primaryStoreNumber><![CDATA[M2]]></primaryStoreNumber>
<preferredLanguage><![CDATA[ENG]]></preferredLanguage>
<dealerDateInSystem>2015-11-18T00:00:00</dealerDateInSystem>
<dealerLastUpdatedDate>2015-11-19T00:00:00</dealerLastUpdatedDate>
</Identifier>
<Location>
<address2><![CDATA[EQUIP]]></address2>
<city><![CDATA[ADER]]></city>
<country><![CDATA[CA]]></country>
<addressType><![CDATA[M]]></addressType>
</Location>
<Division>
<divisionCode><![CDATA[A]]></divisionCode>
<divisionName><![CDATA[AGRO]]></divisionName>
<IndustryCode>
<industryCode><![CDATA[EQ00]]></industryCode>
<primaryIndustryCodeInd>true</primaryIndustryCodeInd>
</IndustryCode>
</Division>
</Customer>
</p:CustomerElement>
I have the following java code, which parses customers.xml into individual "Customer" entities, and then attempts to convert each of them into an AVRO format:
package com.dogsoft.data.xmltoavro;
import java.io.*;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.FactoryConfigurationError;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import jdk.nashorn.internal.runtime.regexp.joni.constants.NodeType;
import org.w3c.dom.Attr;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import org.apache.avro.Protocol;
import org.apache.avro.Schema;
import org.apache.avro.file.DataFileReader;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.generic.GenericArray;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.io.DatumReader;
import org.apache.avro.io.DatumWriter;
import org.apache.avro.specific.SpecificDatumWriter;
import org.apache.avro.util.Utf8;
import org.w3c.dom.*;
import org.xml.sax.SAXException;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Pattern;
public class ParseXmlFile {
private static Protocol protocol;
public static void xmlToAvro(File xmlFile, File avroFile) throws IOException, SAXException {
try {
InputStream stream = new FileInputStream("/tmp/xml.avsc");
if (stream == null) throw new IllegalStateException("Classpath should include xml.avsc");
protocol = Protocol.parse(stream);
} catch (IOException e) {
throw new RuntimeException(e);
}
Schema schema = protocol.getType("Element");
Document doc = parse(xmlFile);
DatumWriter<GenericRecord> datumWriter = new SpecificDatumWriter<>(schema);
try (DataFileWriter<GenericRecord> fileWriter = new DataFileWriter<>(datumWriter)) {
fileWriter.create(schema, avroFile);
Object docElement = doc.getDocumentElement();
fileWriter.append(wrapElement(doc.getDocumentElement()));
}
}
private static GenericData.Record wrapElement(Element el) {
GenericData.Record record = new GenericData.Record(protocol.getType("Element"));
record.put("name", el.getNodeName());
NamedNodeMap attributeNodes = el.getAttributes();
List<GenericData.Record> attrRecords = new ArrayList<>();
for (int i = 0; i < attributeNodes.getLength(); i++) {
Attr attr = (Attr) attributeNodes.item(i);
attrRecords.add(wrapAttr(attr));
}
record.put("attributes", attrRecords);
List<Object> childArray = new ArrayList<>();
NodeList childNodes = el.getChildNodes();
for (int i = 0; i < childNodes.getLength(); i++) {
Node node = childNodes.item(i);
Object nt = node.getNodeType();
if (node.getNodeType() == Node.ELEMENT_NODE)
childArray.add(wrapElement((Element) node));
if (node.getNodeType() == Node.TEXT_NODE)
childArray.add(node.getTextContent());
}
record.put("children", childArray);
return record;
}
private static GenericData.Record wrapAttr(Attr attr) {
GenericData.Record record = new GenericData.Record(protocol.getType("Attribute"));
record.put("name", attr.getName());
record.put("value", attr.getValue());
return record;
}
private static Document parse(File file) throws IOException, SAXException {
try {
DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
return builder.parse(file);
} catch (ParserConfigurationException e) {
throw new RuntimeException(e);
}
}
public static void avroToXml(File avroFile, File xmlFile) throws IOException {
try {
InputStream stream = new FileInputStream("/tmp/xml.avsc");
if (stream == null) throw new IllegalStateException("Classpath should include xml.avsc");
protocol = Protocol.parse(stream);
} catch (IOException e) {
throw new RuntimeException(e);
}
DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(protocol.getType("Element"));
DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(avroFile, datumReader);
GenericRecord record = dataFileReader.next();
Document doc;
try {
doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
} catch (ParserConfigurationException e) {
throw new RuntimeException(e);
}
Element el = unwrapElement(record, doc);
doc.appendChild(el);
saveDocument(doc, xmlFile);
}
private static Element unwrapElement(GenericRecord record, Document doc) {
String name = "" + record.get("name");
Element el = doc.createElement(name);
#SuppressWarnings("unchecked")
GenericArray<GenericRecord> attrArray = (GenericArray<GenericRecord>) record.get("attributes");
for (GenericRecord attrRecord : attrArray)
el.setAttributeNode(unwrapAttr(attrRecord, doc));
#SuppressWarnings("unchecked")
GenericArray<Object> childArray = (GenericArray<Object>) record.get("children");
for (Object childObj : childArray) {
if (childObj instanceof GenericRecord)
el.appendChild(unwrapElement((GenericRecord) childObj, doc));
if (childObj instanceof Utf8)
el.appendChild(doc.createTextNode("" + childObj));
}
return el;
}
private static Attr unwrapAttr(GenericRecord record, Document doc) {
Attr attr = doc.createAttribute("" + record.get("name"));
attr.setValue("" + record.get("value"));
return attr;
}
private static void saveDocument(Document doc, File file) {
try {
Transformer transformer = TransformerFactory.newInstance().newTransformer();
transformer.transform(new DOMSource(doc), new StreamResult(file));
} catch (TransformerException e) {
throw new RuntimeException(e);
}
}
public static void main(String[] args)
{
Object nodeObject = null;
Node myNode = null;
Transformer transformer = null;
try
{
try {
transformer =
TransformerFactory.newInstance().newTransformer();
} catch (TransformerConfigurationException e) {
e.printStackTrace();
}
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
dbf.setNamespaceAware(true);
DocumentBuilder db = dbf.newDocumentBuilder();
Document doc = db.parse("/tmp/customers.xml");
System.out.printf("Version = %s%n", doc.getXmlVersion());
System.out.printf("Encoding = %s%n", doc.getXmlEncoding());
System.out.printf("Standalone = %b%n%n", doc.getXmlStandalone());
if (doc.hasChildNodes())
{
int customerNumber = 0;
NodeList nl = doc.getDocumentElement().getChildNodes();
for (int i = 0; i < nl.getLength(); i++) {
Node node = nl.item(i);
if (node.getNodeType() == Node.ELEMENT_NODE) {
System.out.println(node.toString());
customerNumber++;
File avroFile = new File("/tmp/customer" + customerNumber + ".avro");
File xmlFile = new File("/tmp/customer" + customerNumber + ".xml");
File xmlFile1 = new File("/tmp/customer" + customerNumber + "-foo.xml");
try {
transformer.transform(
new DOMSource(node), new StreamResult(xmlFile));
File outputFile = new File("/tmp/customer" + customerNumber + ".avro");
xmlToAvro(xmlFile, outputFile);
} catch (TransformerException e) {
e.printStackTrace();
}
}
}
}
}
catch (IOException ioe)
{
System.err.println("IOE: " + ioe);
}
catch (SAXException saxe)
{
System.err.println("SAXE: " + saxe);
}
catch (FactoryConfigurationError fce)
{
System.err.println("FCE: " + fce);
}
catch (ParserConfigurationException pce)
{
System.err.println("PCE: " + pce);
}
}
}
This code works overall, but it ignores any content, which is enclosed into
![CDATA[
tag. As it happens, most of the actual useful data in the customers.xml files is enclosed into these tags.
Is there a way to modify this code, to make it not ignore the CDATA contents?

Instead of hand-writing parser code, you might want to split the problem in two parts: first, bind XML into POJO (using JAXB or Jackson XML module); and then write POJO as Avro (using Apache Avro lib, or Jackson Avro module). All you need for that would be POJO definition that matches expected structure for data as XML and Avro. Result should be less code, and basically specifying what needs to happen and now how to do it.

Read XML using dom4j or mycila

I have the following xml as a String but I am having problem reading in a loop manner for parameter "PrdInfoTable" and "OrdInfoTable" as they are dynamic so I need to read it to an arraylist or something. I have tried several method but still unable to get it done. How can I do this?
<?xml version="1.0" encoding="utf-8"?>
<soap:Envelope xmlns:soap="http://www.w3.org/2003/05/soap-envelope"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema">
<soap:Body>
<GetCardResponse xmlns="http://tempuri.org/">
<GetCardResult>
<ReturnResult>
<Return>
<ReturnMsgNo>1</ReturnMsgNo>
<ReturnMsg>交易成功</ReturnMsg>
</Return>
<GetCardResult>
<OrdTable>
<Facno>1234</Facno>
<TrdDate>2015/5/6 11:04:20</TrdDate>
<TrdSeq>ABCD1234</TrdSeq>
<TrdBarCode>123456789</TrdBarCode>
</OrdTable>
<PrdTable>
<GameFacName></GameFacName>
<PrdName>abc123</PrdName>
<CardId>ABCD012345</CardId>
<CardPwd>KKSDHASBDH</CardPwd>
<ExpDate>2015/02/12</ExpDate>
</PrdTable>
<PrdInfoTable>
<PrdNote>* 測12346666666666666666666666666666666</PrdNote>
</PrdInfoTable>
<PrdInfoTable>
<PrdNote>* 測56787777777777777777777</PrdNote>
</PrdInfoTable>
<PrdInfoTable>
<PrdNote>* 測12345611111111111111111</PrdNote>
</PrdInfoTable>
<OrdInfoTable>
<TxetContent>測1111111111111111111111111111111111111111</TxetContent>
</OrdInfoTable>
<OrdInfoTable>
<TxetContent>22222測22222222222222222222222222222222222</TxetContent>
</OrdInfoTable>
<OrdInfoTable>
<TxetContent>3333333333333333333333測333333333333333333</TxetContent>
</OrdInfoTable>
<OrdInfoTable>
<TxetContent>4444444測444444444444444444444444444444444</TxetContent>
</OrdInfoTable>
<OrdInfoTable>
<TxetContent>55555555555555555555555555555測55555555555</TxetContent>
</OrdInfoTable>
<FreeSnTable />
</GetCardResult>
</ReturnResult>
</GetCardResult>
</GetCardResponse>
</soap:Body>
Below is the code:
HttpClient httpClient = new HttpClient();
PostMethod post = new PostMethod(url);
post.setRequestEntity(new StringRequestEntity(xmlRequest.toString()));
post.setRequestHeader("Content-type", "application/soap+xml; charset=utf-8");
post.setRequestHeader("Content-Length", xmlRequest.length()+"");
responseCode = httpClient.executeMethod(post);
InputStream in = post.getResponseBodyAsStream();
BufferedReader reader = new BufferedReader(new InputStreamReader(in, "UTF-8"));
String line;
String xmlresponse = "";
while( ( line = reader.readLine() ) != null ) {
xmlresponse = xmlresponse+line;
}
System.out.println("XML Response:\n");
xmlresponse = xmlresponse.replaceAll("<", "<");
xmlresponse = xmlresponse.replaceAll(">", ">");
System.out.println(xmlresponse+"\n");
// clean everything
reader.close();
XMLTag xmlTag = XMLDoc.from(xmlresponse, true);
System.out.println("\n" + xmlTag.gotoRoot().gotoChild().gotoChild().gotoTag("GetCardResult/ReturnResult/GetCardResult/PrdInfoTable").getCurrentTagName());
System.out.println("PrdNote: "+ xmlTag.gotoRoot().gotoChild().gotoChild().getText("GetCardResult/ReturnResult/GetCardResult/PrdInfoTable/PrdNote[1]"));
System.out.println("\n" + xmlTag.gotoRoot().gotoChild().gotoChild().gotoTag("GetCardResult/ReturnResult/GetCardResult/PrdInfoTable").getCurrentTagName());
System.out.println("PrdNote: "+ xmlTag.gotoRoot().gotoChild().gotoChild().getText("GetCardResult/ReturnResult/GetCardResult/PrdInfoTable/PrdNote[2]"));

Another easy way to achieve this is using XPath expressions.
This code reads a xml file (tested with your XML code), look for PrdNote and TxetContent elements. The method extractNodesValues return an ArrayList<String> with it's values:
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
public class Test{
public static void main(String[] args){
try {
File myXmlFile = new File("src/test.xml");
String xpath_PrdNotes = "/Envelope/Body/GetCardResponse/GetCardResult/ReturnResult/GetCardResult/PrdInfoTable/PrdNote/text()";
String xpath_TxetContent = "/Envelope/Body/GetCardResponse/GetCardResult/ReturnResult/GetCardResult/OrdInfoTable/TxetContent/text()";
ArrayList<String> prdNotesValues = extractNodesValues(myXmlFile, xpath_PrdNotes );
ArrayList<String> txetContentValues = extractNodesValues(myXmlFile, xpath_TxetContent );
System.out.println("PrdNotesValues:");
for(String val : prdNotesValues){
System.out.println(val);
}
System.out.println("");
System.out.println("TxetContentValues:");
for(String val : txetContentValues){
System.out.println(val);
}
}
catch(XPathExpressionException e){ System.out.println(e.getMessage()); }
catch(IOException e){ System.out.println(e.getMessage()); }
catch(SAXException e){ System.out.println(e.getMessage()); }
catch(ParserConfigurationException e){ System.out.println(e.getMessage()); }
}
public static ArrayList<String> extractNodesValues(File f, String xpath_expression) throws XPathExpressionException, IOException, SAXException, ParserConfigurationException {
Document xmlDocument;
DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder;
XPath xPath;
NodeList nodeList;
Node n;
ArrayList<String> result;
builder = builderFactory.newDocumentBuilder();
xmlDocument = builder.parse(f);
xPath = XPathFactory.newInstance().newXPath();
result = new ArrayList<String>();
// here all values from PrdNote elements are stored
nodeList = (NodeList)xPath.compile(xpath_expression).evaluate(xmlDocument, XPathConstants.NODESET);
if(nodeList != null && nodeList.getLength() > 0) {
//iterate over all obtained nodes matching the xpath expression
for(int i=0; i<nodeList.getLength(); i++){
result.add(nodeList.item(i).getNodeValue());
}
}
return result;
}
}
Output:
PrdNotesValues:
* 測12346666666666666666666666666666666
* 測56787777777777777777777
* 測12345611111111111111111
TxetContentValues:
測1111111111111111111111111111111111111111
22222測22222222222222222222222222222222222
3333333333333333333333測333333333333333333
4444444測444444444444444444444444444444444
55555555555555555555555555555測55555555555
Hope this helps.

java xml parsing dblp

this is the xml file
please how to parse the tag author example we dont know how many author for each inproceeding ?
<?xml version="1.0" encoding="ISO-8859-1"?>
<dblp>
<inproceedings mdate="2014-01-18" key="series/sci/AzzagL13">
<author>Hanane Azzag</author>
<author>Mustapha Lebbah</author>
<title>A New Way for Hierarchical and Topological Clustering.</title>
<pages>85-97</pages>
<year>2011</year>
<booktitle>EGC (best of volume)</booktitle>
<ee>http://dx.doi.org/10.1007/978-3-642-35855-5_5</ee>
<crossref>series/sci/2013-471</crossref>
<url>db/series/sci/sci471.html#AzzagL13</url>
</inproceedings>
<inproceedings mdate="2014-01-18" key="series/sci/RabatelBP13">
<author>Julien Rabatel</author>
<author>Sandra Bringay</author>
<author>Pascal Poncelet</author>
<title>Mining Sequential Patterns: A Context-Aware Approach.</title>
<pages>23-41</pages>
<year>2011</year>
<booktitle>EGC (best of volume)</booktitle>
<ee>http://dx.doi.org/10.1007/978-3-642-35855-5_2</ee>
<crossref>series/sci/2013-471</crossref>
<url>db/series/sci/sci471.html#RabatelBP13</url>
</inproceedings>
</dblp>

Use Xpath, it's fast and powerfull , these lines for your example return 5 lines
Code:
final Document document = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(new FileInputStream("input.xml"));
final XPath xPath = XPathFactory.newInstance().newXPath();
final NodeList nodeList = (NodeList) xPath.compile("//author").evaluate(document, XPathConstants.NODESET);
for (int i = 0; i < nodeList.getLength(); i++) {
System.out.println(nodeList.item(i).getFirstChild().getNodeValue());
}
Displays:
Hanane Azzag
Mustapha Lebbah
Julien Rabatel
Sandra Bringay
Pascal Poncelet

Following code parse using apache digester which is commonly used while parsing in real projects. Nice one from apache community
// Updated code as per you need.
import java.io.ByteArrayInputStream;
import java.io.FileInputStream;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map.Entry;
import org.apache.commons.digester.Digester;
import org.apache.commons.digester.Rule;
import org.apache.commons.digester.Rules;
import org.xml.sax.InputSource;
public class Parsing {
public static void main(String[] args) throws Exception{
InputStream data = new FileInputStream("E:\\workspace\\trunk\\Parsing\\src\\data.xml");
byte[] b = new byte[data.available()];
// data.read(b);
Digester digester = new Digester();
//Genearting Array list while encountering dblp xpath
digester.addObjectCreate("dblp", HashMap.class);
digester.addObjectCreate("dblp/inproceedings", ArrayList.class);
//Calling add method while encountering author xpath
AuthorRule rule = new AuthorRule();
digester.addRule("dblp/inproceedings/author", rule);
digester.addRule("dblp/inproceedings/title", rule);
digester.addRule("dblp/inproceedings", rule);
HashMap parsedData = (HashMap) digester.parse(data);
Iterator<Entry<String, ArrayList>> dataItr = parsedData.entrySet().iterator();
while(dataItr.hasNext()){
Entry<String, ArrayList> entry = dataItr.next();
System.out.println("Title : " + entry.getKey() + ", Authors" + entry.getValue().toString());
}
}
private static class AuthorRule extends Rule{
String currentTitle = "";
#Override
public void body(String namespace, String name, String text)
throws Exception {
HashMap object = (HashMap) digester.peek(1);
ArrayList authors = (ArrayList) digester.peek(0);
if(name.equals("title")){
currentTitle = text;
}
else if(name.equals("author")){
authors.add(text);
}
}
#Override
public void end(String namespace, String name) throws Exception {
HashMap object = (HashMap) digester.peek(1);
ArrayList authors = (ArrayList) digester.peek(0);
if(name.equals("inproceedings")){
object.put(currentTitle, authors);
}
}
}
}
output::
Title : A New Way for Hierarchical and Topological Clustering., Authros[Hanane Azzag, Mustapha Lebbah]
Title : Mining Sequential Patterns: A Context-Aware Approach., Authros[Julien Rabatel, Sandra Bringay, Pascal Poncelet]

there are number of ways, e.g. via DOM:
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import java.io.File;
public class XmlAuthorReader {
public static void main(String argv[]) {
try {
File fXmlFile = new File(<filePath>);
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
Document doc = dBuilder.parse(fXmlFile);
NodeList nList = doc.getElementsByTagName("author");
System.out.println(nList.getLength()+ " author(s) found");
for (int temp = 0; temp < nList.getLength(); temp++) {
Node nNode = nList.item(temp);
System.out.println("Author: " + nNode.getTextContent());
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
you can find more variants here: http://www.mkyong.com/tutorials/java-xml-tutorials/

Why my attribute values are not printed?

I am trying to print the attributes values for following xml file:
<data/>
<request/>
<type>City</type>
<query>Jaipur, India</query>
</request>
<current_condition/>
<observation_time>06:37 AM</observation_time>
<temp_C>40</temp_C>
<temp_F>104</temp_F>
<weatherCode>113</weatherCode>
<weatherIconUrl/>
<weatherDesc/>
<windspeedMiles>8</windspeedMiles>
<windspeedKmph>13</windspeedKmph>
<winddirDegree>280</winddirDegree>
<winddir16Point>W</winddir16Point>
<precipMM>0.0</precipMM>
<humidity>34</humidity>
<visibility>4</visibility>
<pressure>997</pressure>
<cloudcover>25</cloudcover>
</current_condition>
<weather/>
<date>2012-07-03</date>
<tempMaxC>46</tempMaxC>
<tempMaxF>115</tempMaxF>
<tempMinC>36</tempMinC>
<tempMinF>97</tempMinF>
<windspeedMiles>6</windspeedMiles>
<windspeedKmph>9</windspeedKmph>
<winddirection>N</winddirection>
<winddir16Point>N</winddir16Point>
<winddirDegree>7</winddirDegree>
<weatherCode>113</weatherCode>
<weatherIconUrl/>
<weatherDesc/>
<precipMM>0.0</precipMM>
</weather>
<weather/>
<date>2012-07-04</date>
<tempMaxC>46</tempMaxC>
<tempMaxF>114</tempMaxF>
<tempMinC>34</tempMinC>
<tempMinF>94</tempMinF>
<windspeedMiles>12</windspeedMiles>
<windspeedKmph>20</windspeedKmph>
<winddirection>NW</winddirection>
<winddir16Point>NW</winddir16Point>
<winddirDegree>319</winddirDegree>
<weatherCode>113</weatherCode>
<weatherIconUrl/>
<weatherDesc/>
<precipMM>0.0</precipMM>
</weather>
</data>
Following is the java code which I am using to print the attribute values:
public class WorldWeatherOnline {
public static void main (String[] args) throws IOException, ParserConfigurationException, SAXException, TransformerException {
URL url = new URL("http://free.worldweatheronline.com/feed/weather.ashx?q=jaipur,india&format=xml&num_of_days=2&key=c5774216f9120304120207");
URLConnection conn = url.openConnection();
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
Document doc = builder.parse(conn.getInputStream());
printElementAttributes(doc);
}
static void printElementAttributes(Document doc)
{
NodeList nl = doc.getElementsByTagName("*");
Element e;
Node n;
NamedNodeMap nnm;
String attrname;
String attrval;
int i, len;
len = nl.getLength();
for (int j=0; j < len; j++)
{
e = (Element)nl.item(j);
System.out.println(e.getTagName() + ":");
System.out.println("check-1");
nnm = e.getAttributes();
if (nnm != null)
{
for (i=0; i<nnm.getLength(); i++)
{
System.out.println("check");
n = nnm.item(i);
attrname = n.getNodeName();
attrval = n.getNodeValue();
System.out.print(" " + attrname + " = " + attrval);
}
}
System.out.println();
}
}
Basically my NamedNodeMap nnm is null, so it isn't going inside the loop. I have used these imports:
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

Your XML doesn't have any attributes. It just has elements with text contents. An element with an attribute looks like this:
<element attributeName="attribute value" />
If you want to use your current XML, you'll need to get the text values of the elements instead, and presumably store those against the element name.

Wrap XML Element wth text using Java?

I want to transform XML like this:
<account>Wally World</account>
<city>Anywhere</city>
<state>CA</state>
Into this by wrapping each element with equals and single quote i.e. =''
<account>='Wally World'</account>
<city>='Anywhere'</city>
<state>='CA'</state>
Need to transform all the element text in large XML files using Java, maybe with XSLT?

In XSLT it could be something like this:
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
version="1.0">
<xsl:template match="account|city|state">
<xsl:element name="{local-name(.)}">
<xsl:text disable-output-escaping="yes">=&apos;</xsl:text>
<xsl:value-of select="text()"/>
<xsl:text disable-output-escaping="yes">&apos;</xsl:text>
</xsl:element>
</xsl:template>
<xsl:template match="#*|node()">
<xsl:copy>
<xsl:apply-templates select="#*|node()"/>
</xsl:copy>
</xsl:template>
</xsl:stylesheet>
Note that it will only change account, city and state elements, and leave all other elements intact. So a document like this:
<items>
<item>
<account>Wally World</account>
<city>Anywhere</city>
<state>CA</state>
</item>
</items>
... would change into this:
<items>
<item>
<account>='Wally World'</account>
<city>='Anywhere'</city>
<state>='CA'</state>
</item>
</items>

If you want to just use SAX to make this type of simple change, here is the code:
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.XMLFilterImpl;
import org.xml.sax.helpers.XMLReaderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.sax.SAXSource;
import javax.xml.transform.sax.SAXTransformerFactory;
import javax.xml.transform.stream.StreamResult;
import java.io.IOException;
import java.io.OutputStream;
import java.io.PrintStream;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
public class XMLPipeline {
public static void main(String[] args) throws Exception {
String inputFile = "a.xml";
PrintStream outputStream = System.out;
new XMLPipeline().process(inputFile, outputStream);
}
//default JDK
public void process(String inputFile, OutputStream outputStream) throws
SAXException, ParserConfigurationException, IOException, TransformerException {
StreamResult xwriter = new StreamResult(outputStream);
XMLReader xreader = XMLReaderFactory.createXMLReader();
XMLAnalyzer analyzer = new XMLAnalyzer(xreader);
TransformerFactory stf = SAXTransformerFactory.newInstance();
SAXSource ss = new SAXSource(analyzer, new InputSource(inputFile));
stf.newTransformer().transform(ss, xwriter);
}
public static class XMLAnalyzer extends XMLFilterImpl {
private Set<String> tags = new HashSet<String>(Arrays.asList("account", "city", "state"));
boolean bufferText = false;
StringBuilder buffer;
public XMLAnalyzer(XMLReader xmlReader) {
super(xmlReader);
}
#Override
public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException {
super.startElement(uri, localName, qName, atts);
if (tags.contains(localName)) {
buffer = new StringBuilder("='");
bufferText = true;
}
}
#Override
public void characters(char[] chars, int i, int i1) throws SAXException {
if (bufferText) {
buffer.append(chars, i, i1);
} else {
super.characters(chars, i, i1);
}
}
#Override
public void endElement(String s, String s1, String s2) throws SAXException {
if (bufferText) {
buffer.append('\'');
super.characters(buffer.toString().toCharArray(), 0, buffer.length());
bufferText = false;
}
super.endElement(s, s1, s2);
}
}
}

Develop Reference

Java is a programming language and computing platform first released by Sun Microsystems in 1995.

Parsing XML into a Hashmap Java - java

Related

how to convert xml to avro without ignoring !CDATA content?

Read XML using dom4j or mycila

java xml parsing dblp

Why my attribute values are not printed?

Wrap XML Element wth text using Java?

Categories

Resources