some suggestions for using STAX Parsing in java

some suggestions for using STAX Parsing in java - java

I have a XML document which has at the top of it database configurations like username, password and URL and after that it has a tag called data and inside that it has data about employees and data about department that I want to pull it from the XML and insert it into a database.
I have managed to read the JDBC config and set it in a bean that return a connection and successfully worked, but now my problem is with the data tag how can I ignore the JDBC config from the XML file and read the data tag and insert it to a database, I know I can do it like
if (event.isStartElement()) {
StartElement startElement = event.asStartElement();
currentName = startElement.getName();
if(!currentName.equals("connection-setteings")) ...
But its not a good idea as I believe so can someone advice a better approach for that?
Here is the XML file:
<?xml version="1.0" encoding="UTF-8"?>
<import-request xmlns="http://www.phi01tech.com/tools/data-import" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.phi01tech.com/tools/data-import data-import.xsd ">
<connection-settings>
<username>root</username>
<password>root</password>
<url>jdbc:mysql://localhost:3306/sample</url>
<driverClassName>com.mysql.jdbc.Driver</driverClassName>
</connection-settings>
<data>
<departments dept_no="d009" dept_name="Customer Service"/>
<departments dept_no="d005" dept_name="Development"/>
<departments dept_no="d002" dept_name="Finance"/>
<departments dept_no="d003" dept_name="Human Resources"/>
<departments dept_no="d001" dept_name="Marketing"/>
<departments dept_no="d004" dept_name="Production"/>
<departments dept_no="d006" dept_name="Quality Management"/>
<departments dept_no="d008" dept_name="Research"/>
<departments dept_no="d007" dept_name="Sales"/>
<employees emp_no="10001" birth_date="1953-09-02" first_name="Georgi" last_name="Facello" gender="M"
hire_date="1986-06-26"/>
<employees emp_no="10002" birth_date="1964-06-02" first_name="Bezalel" last_name="Simmel" gender="F"
hire_date="1985-11-21"/>
<employees emp_no="10003" birth_date="1959-12-03" first_name="Parto" last_name="Bamford" gender="M"
hire_date="1986-08-28"/>
<employees emp_no="10004" birth_date="1954-05-01" first_name="Chirstian" last_name="Koblick" gender="M"
hire_date="1986-12-01"/>
<employees emp_no="10005" birth_date="1955-01-21" first_name="Kyoichi" last_name="Maliniak" gender="M"
hire_date="1989-09-12"/>
<employees emp_no="10006" birth_date="1953-04-20" first_name="Anneke" last_name="Preusig" gender="F"
hire_date="1989-06-02"/>
<employees emp_no="10007" birth_date="1957-05-23" first_name="Tzvetan" last_name="Zielinski" gender="F"
hire_date="1989-02-10"/>
<employees emp_no="10008" birth_date="1958-02-19" first_name="Saniya" last_name="Kalloufi" gender="M"
hire_date="1994-09-15"/>
</data>
</import-request>
And here is my java class:
public class STAXParser {
static ConnectionManager connectionManager = new ConnectionManager();
String currentName;
public void parseDocuments() throws IOException, XMLStreamException {
XMLInputFactory inputFactory = XMLInputFactory.newFactory();
try (InputStream stream = Files.newInputStream(Paths.get("emp.xml"))) {
XMLEventReader reader = inputFactory.createXMLEventReader(stream);
while (reader.hasNext()) {
XMLEvent event = reader.nextEvent();
if (event.isStartElement()) {
StartElement startElement = event.asStartElement();
currentName = startElement.getName().getLocalPart();
System.out.println(startElement.getName());
Iterator attributes = startElement.getAttributes();
while(attributes.hasNext()) {
//System.out.println(attributes.next());
}
}
if (event.isCharacters()) {
if (!event.asCharacters().isWhiteSpace()) {
String data = event.asCharacters().getData();
checkName(currentName, data.trim());
}
}
}
}
}
private void checkName(String name, String event) {
if (name.equals("url"))
connectionManager.setUrl(event);
else if (name.equals("username"))
connectionManager.setUsername(event);
else if (name.equals("password"))
connectionManager.setPassword(event);
else if (name.equals("driverClassName"))
connectionManager.setDriver(event);
}
public static void main(String[] args) {
STAXParser s = new STAXParser();
try {
s.parseDocuments();
} catch (IOException e) {
e.printStackTrace();
} catch (XMLStreamException e) {
e.printStackTrace();
}
try {
connectionManager.getConnection().toString();
JDBCEmployeeDao d = new JDBCEmployeeDao(connectionManager);
// d.create();
} catch (SQLException e) {
e.printStackTrace();
}
}
}

I find that XMLStreamReader is much easier to use, since elements can be processed in context of the parent, unlike XMLEventReader that returns all events in a flat sequence.
Example of parsing your XML using XMLStreamReader. For simplicity of the example, the code ignores namespaces.
public static void main(String[] args) throws Exception {
XMLInputFactory inputFactory = XMLInputFactory.newFactory();
Connection connection = null;
try (InputStream stream = Files.newInputStream(Paths.get("src/main/resources/test.xml"))) {
XMLStreamReader reader = inputFactory.createXMLStreamReader(stream);
reader.nextTag(); // Position on root tag
if (! reader.getLocalName().equals("import-request"))
throw new XMLStreamException("Invalid root element: " + reader.getLocalName());
while (reader.nextTag() == XMLStreamConstants.START_ELEMENT) {
switch (reader.getLocalName()) {
case "connection-settings":
connection = parseConnectionSettings(reader);
break;
case "data":
if (connection == null)
throw new XMLStreamException("Missing <connection-settings> before <data>");
parseData(reader, connection);
break;
default:
// ignore unknown content
}
}
} finally {
if (connection != null)
connection.close();
}
}
private static Connection parseConnectionSettings(XMLStreamReader reader) throws Exception {
String username = null, password = null, url = null, driverClassName = null;
while (reader.nextTag() == XMLStreamConstants.START_ELEMENT) {
switch (reader.getLocalName()) {
case "username":
username = reader.getElementText();
break;
case "password":
password = reader.getElementText();
break;
case "url":
url = reader.getElementText();
break;
case "driverClassName":
driverClassName = reader.getElementText();
break;
default:
throw new XMLStreamException("Invalid element in <connection-settings>: " + reader.getLocalName());
}
}
Class.forName(driverClassName);
return DriverManager.getConnection(url, username, password);
}
private static void parseData(XMLStreamReader reader, Connection connection) throws Exception {
while (reader.nextTag() == XMLStreamConstants.START_ELEMENT) {
switch (reader.getLocalName()) {
case "departments":
processDepartments(reader, connection);
break;
case "employees":
processEmployees(reader, connection);
break;
default:
throw new XMLStreamException("Invalid element in <data>: " + reader.getLocalName());
}
}
}
private static void processDepartments(XMLStreamReader reader, Connection connection) throws Exception {
String dept_no = reader.getAttributeValue(null, "dept_no");
String dept_name = reader.getAttributeValue(null, "dept_name");
if (! reader.getElementText().isEmpty())
throw new XMLStreamException("<departments> must be empty element");
// process here
}
private static void processEmployees(XMLStreamReader reader, Connection connection) throws Exception {
// code here
}

Related

How to extract sub element as String from XML

I have a XML in String format:
<?xml version="1.0" encoding="UTF-8"?>
<ns4:DataRequest xmlns:ns4="urn:com:sony:xsd:DataRequest001" xmlns:ns2="urn:com:sony:xsd:pay001" xmlns:ns3="urn:com:sony:xsd:common">
<Hdr>
<Date>2021-01-28T13:29:00</Date>
<CallType>Item_Check</CallType>
<CallSubType>TFT</CallSubType>
<CallId>12345</CallId>
</Hdr>
<Pld>
<SpecData>
<MainSpec>
<AppId>123</AppId>
<Size>123</Size>
<Amt>2000</Amt>
<Payld><data><id>Id001</tag></data></Payld>
</MainSpec>
</SpecData>
</Pld>
</ns4:DataRequest>
I am extracting payld using the below code:
String payload = null;
XMLStreamReader reader = xmlInputFactory.createXMLStreamReader(new StringReader(xmlAsString));
while (reader.hasNext()) {
reader.next();
if (reader.isStartElement()) {
if ("payld".equalsIgnoreCase(reader.getLocalName())) {
payload = reader.getElementText();
break;
}
}
}
Now want to extract SpecData tag value in a string something like:
String specData = "<MainSpec><AppId>....</AppId><MainSpec>";
new to XMLStreamReader, not getting how this can be achieved.

This should work:
String specData = null;
String payload = null;
XMLStreamReader reader = xmlInputFactory.createXMLStreamReader(new StringReader(xmlAsString));
while (reader.hasNext()) {
reader.next();
if (reader.isStartElement()) {
if ("appId".equalsIgnoreCase(reader.getLocalName())) {
specData = reader.getElementText();
continue;
}
else if ("payld".equalsIgnoreCase(reader.getLocalName())) {
payload = reader.getElementText();
break;
}
}
}

Convert BufferedReader To InputStream

I just finished writing a bunch of code that works fine when reading from internal resource :))))
try {
SeriesDataXMLPullParserHandler seriesDataXmlPullParserHandler = new SeriesDataXMLPullParserHandler();
entries = seriesDataXmlPullParserHandler.parse(getAssets().open("series_box.xml"));
} catch (IOException e) {
e.printStackTrace();
Log.d("errorOpeningSeries", e.getMessage());
}
Collections.sort(entries, new Comparator<Entry>() {
#Override
public int compare(Entry entryOne, Entry entryTwo) {
return (entryOne.getSeriesName().compareTo(entryTwo.getSeriesName()));
}
});
listView.setAdapter(new MyAdapter(this, R.id.details_SeriesName, entries));
"SeriesDataXMLPullParserHandler" class parse data from xml file that uses InputStream as argument
here is "SeriesDataXMLPullParserHandler" class
public class SeriesDataXMLPullParserHandler {
List<Entry> entries;
private Entry entry;
private String text;
public SeriesDataXMLPullParserHandler() {
entries = new ArrayList<>();
}
public List<Entry> getEntries() {
return entries;
}
public List<Entry> parse(InputStream inputStream) {
XmlPullParserFactory xmlPullParserFactory = null;
XmlPullParser xmlPullParser = null;
try {
xmlPullParserFactory = XmlPullParserFactory.newInstance();
xmlPullParserFactory.setNamespaceAware(true);
xmlPullParser = xmlPullParserFactory.newPullParser();
xmlPullParser.setInput(inputStream, null);
int eventType = xmlPullParser.getEventType();
while (eventType != XmlPullParser.END_DOCUMENT) {
String tagname = xmlPullParser.getName();
switch (eventType) {
case XmlPullParser.START_TAG:
if (tagname.equalsIgnoreCase("series")) {
entry = new Entry();
}
break;
case XmlPullParser.TEXT:
text = xmlPullParser.getText();
break;
case XmlPullParser.END_TAG:
if (tagname.equalsIgnoreCase("series")) {
entries.add(entry);
} else if (tagname.equalsIgnoreCase("id")) {
entry.setId(text);
} else if (tagname.equalsIgnoreCase("Actors")) {
entry.setActors(text);
}else if (tagname.equalsIgnoreCase("Genre")) {
entry.setGenre(text);
} else if (tagname.equalsIgnoreCase("IMDB_ID")) {
entry.setImdb_id(text);
} else if (tagname.equalsIgnoreCase("Language")) {
entry.setLanguage(text);
} else if (tagname.equalsIgnoreCase("Network")) {
entry.setNetwork(text);
} else if (tagname.equalsIgnoreCase("NetworkID")) {
entry.setNetwork_id(text);
} else if (tagname.equalsIgnoreCase("Overview")) {
entry.setOverview(text);
} else if (tagname.equalsIgnoreCase("SeriesID")) {
entry.setSeriesId(text);
} else if (tagname.equalsIgnoreCase("SeriesName")) {
entry.setSeriesName(text);
}
break;
default:
break;
}
eventType = xmlPullParser.next();
}
} catch (XmlPullParserException | IOException e) {
e.printStackTrace();
}
return entries;
}
}
but the problem is when I want to get data from server, it comes in "InputStreamReader" type
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(url.openStream()));
the question is how do I change the type of "BufferedReader" to "InputStream" for parsing data???
or the best way to do such a thing ???
sorry for bad english :)

Spring Jaxb2: How to append batch data to XML file with no reading it to memory?

I need to write data to xml in batches.
There are following domain objects:
#XmlRootElement(name = "country")
public class Country {
#XmlElements({#XmlElement(name = "town", type = Town.class)})
private Collection<Town> towns = new ArrayList<>();
....
}
And:
#XmlRootElement(name = "town")
public class Town {
#XmlElement
private String townName;
// etc
}
I'm marhalling objects with Jaxb2. Configuration as follows:
marshaller = new Jaxb2Marshaller();
marshaller.setClassesToBeBound(Country.class, Town.class);
Because simple marshalling doesn't work here as marhaller.marshall(fileName, country) - it malformes xml.
Is there a way to tweek marhaller so that it would create file if it's not exists with all marhalled data or if exists just append it at the end of xml file ?
Also as this files are potentially large I don't want to read whole file in memory, append data and then write to disk.

I've used StAX for xml processing as it stream based, consumes less memory then DOM and has ability to read and write comparing to SAX which can only parse xml data, but can't write it.
The is the approach I came up with:
public enum StAXBatchWriter {
INSTANCE;
private static final Logger LOGGER = LoggerFactory.getLogger(StAXBatchWriter.class);
public void writeUrls(File original, Collection<Town> towns) {
XMLEventReader eventReader = null;
XMLEventWriter eventWriter = null;
try {
String originalPath = original.getPath();
File from = new File(original.getParent() + "/old-" + original.getName());
boolean isRenamed = original.renameTo(from);
if (!isRenamed)
throw new IllegalStateException("Failed to rename file: " + original.getPath() + " to " + from.getPath());
File to = new File(originalPath);
XMLInputFactory inFactory = XMLInputFactory.newInstance();
eventReader = inFactory.createXMLEventReader(new FileInputStream(from));
XMLOutputFactory outFactory = XMLOutputFactory.newInstance();
eventWriter = outFactory.createXMLEventWriter(new FileWriter(to));
XMLEventFactory eventFactory = XMLEventFactory.newInstance();
while (eventReader.hasNext()) {
XMLEvent event = eventReader.nextEvent();
eventWriter.add(event);
if (event.getEventType() == XMLEvent.START_ELEMENT && event.asStartElement().getName().toString().contains("country")) {
for (Town town : towns) {
writeTown(eventWriter, eventFactory, town);
}
}
}
boolean isDeleted = from.delete();
if (!isDeleted)
throw new IllegalStateException("Failed to delete old file: " + from.getPath());
} catch (IOException | XMLStreamException e) {
LOGGER.error(e.getMessage(), e);
throw new RuntimeException(e);
} finally {
try {
if (eventReader != null)
eventReader.close();
} catch (XMLStreamException e) {
LOGGER.error(e.getMessage(), e);
}
try {
if (eventWriter != null)
eventWriter.close();
} catch (XMLStreamException e) {
LOGGER.error(e.getMessage(), e);
}
}
}
private void writeTown(XMLEventWriter eventWriter, XMLEventFactory eventFactory, Town town) throws XMLStreamException {
eventWriter.add(eventFactory.createStartElement("", null, "town"));
// write town id
eventWriter.add(eventFactory.createStartElement("", null, "id"));
eventWriter.add(eventFactory.createCharacters(town.getId()));
eventWriter.add(eventFactory.createEndElement("", null, "id"));
//write town name
if (StringUtils.isNotEmpty(town.getName())) {
eventWriter.add(eventFactory.createStartElement("", null, "name"));
eventWriter.add(eventFactory.createCharacters(town.getName()));
eventWriter.add(eventFactory.createEndElement("", null, "name"));
}
// write other fields
eventWriter.add(eventFactory.createEndElement("", null, "town"));
}
}
It's not the best approach, dispite the fact that it's stream based and it's working, it has some overhead. When a batch will be added - the old file has to be re-read.
It will be nice to have an option to append the data at some point in file (like "append data to that file after 4 line"), but seems this can't be done.

Unexcepted NullPointerException RSS Reader in Java

I am trying change my RSS Reader code. I have something like this:
public class RSSFeedParser {
static final String TITLE = "title";
static final String DESCRIPTION = "description";
static final String CHANNEL = "channel";
static final String LANGUAGE = "language";
static final String COPYRIGHT = "copyright";
static final String LINK = "link";
static final String AUTHOR = "author";
static final String ITEM = "item";
static final String PUB_DATE = "pubDate";
static final String GUID = "guid";
public InputStream in = read();
private XMLInputFactory inputFactory = XMLInputFactory.newInstance();
private XMLEventReader eventReader;
final URL url;
public RSSFeedParser(String feedUrl) {
try {
url = new URL(feedUrl);
eventReader = inputFactory.createXMLEventReader(in);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
public Feed readFeed() {
Feed feed = null;
try {
boolean isFeedHeader = true;
String description = "";
String title = "";
String link = "";
String language = "";
String copyright = "";
String author = "";
String pubDate = "";
String guid = "";
while (eventReader.hasNext()) {
XMLEvent event = eventReader.nextEvent();
if (event.isStartElement()) {
String localPart = event.asStartElement().getName().getLocalPart();
switch (localPart) {
case ITEM:
if (isFeedHeader) {
isFeedHeader = false;
feed = new Feed(title, link, description, language, copyright, pubDate);
}
event = eventReader.nextEvent();
break;
case TITLE:
title = getCharacterData(event, eventReader);
break;
case DESCRIPTION:
description = getCharacterData(event, eventReader);
break;
case LINK:
link = getCharacterData(event, eventReader);
break;
case GUID:
guid = getCharacterData(event, eventReader);
break;
case LANGUAGE:
language = getCharacterData(event, eventReader);
break;
case AUTHOR:
author = getCharacterData(event, eventReader);
break;
case PUB_DATE:
pubDate = getCharacterData(event, eventReader);
break;
case COPYRIGHT:
copyright = getCharacterData(event, eventReader);
break;
}
}
else if(event.isEndElement()) {
if (event.asEndElement().getName().getLocalPart() == (ITEM)) {
FeedMessage message = new FeedMessage();
message.setAuthor(author);
message.setDescription(description);
message.setGuid(guid);
message.setLink(link);
message.setTitle(title);
feed.getMessages().add(message);
}
}
}
} catch (XMLStreamException e) {
throw new RuntimeException(e);
}
return feed;
}
private InputStream read(){
try{
return url.openStream();
}catch (IOException e){
throw new RuntimeException(e);
}
}
private String getCharacterData(XMLEvent event, XMLEventReader eventReader) throws XMLStreamException {
String results="";
event = eventReader.nextEvent();
if(event instanceof Characters){
results = event.asCharacters().getData();
}
return results;
}
And main:
public static void main(String[] args) {
RSSFeedParser parser = new RSSFeedParser("http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/other_sports/rss.xml");
Feed feed = parser.readFeed();
System.out.println(feed);
for (FeedMessage message : feed.getMessages()) {
System.out.println(message);
RSSFeedWriter writer = new RSSFeedWriter(feed, "articles.rss");
try {
writer.write();
} catch (Exception e) {
e.printStackTrace();
}
}
}
I get NullPointerException in
public InputStream in = read();
return url.openStream();
RSSFeedParser parser = new RSSFeedParser("http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/other_sports/rss.xml");
What's wrong with this code? Everything was working when the InputStream, XMLEventReader and XMLInputFactory was in separate class.

You are calling the read() method before the URL object is initialized. Try something like this:
public InputStream in;
public RSSFeedParser(String feedUrl) {
try {
url = new URL(feedUrl);
in = read();
eventReader = inputFactory.createXMLEventReader(in);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
Also, It is good practice to close resources after use (streams & readers).

Locating Specific Attributes in Digester - Java

I'm using the Apache Commons Digester and trying to locate a particular tag in the structure to include in the object.
<parent>
<image size="small">some url</image>
<image size="medium">some url</image>
<image size="large">some url</image>
<image size="huge">some url</image>
</parent>
I really only want the medium image to be included in my partent object but I'm not sure how I would do that.
Right now I'm using digester.addBeanPropertySetter(PathToParent+"/image","image"); but this gets updated for every image tag (as it should).
Ideally I would like something like digester.addBeanPropertySetter(PathToParent+"/image/medium","image"); but you can't do that.

I omitted generic getters/setters.
public class Parent {
private Image image;
public void setImage(Image image) {
if ("medium".equals(image.getSize())) {
this.image = image;
}
}
}
public class Image {
private String size;
private String url;
}
public static void main(String[] args) throws IOException, SAXException {
String s = "<parent>"
+ "<image size='small'>some url1</image>"
+ "<image size='medium'>some url2</image>"
+ "<image size='large'>some url3</image>"
+ "<image size='huge'>some url4</image>"
+ "</parent>";
Digester digester = new Digester();
digester.addObjectCreate("parent", Parent.class);
digester.addFactoryCreate("parent/image", new ImageCreationFactory());
digester.addBeanPropertySetter("parent/image", "url");
digester.addSetNext("parent/image", "setImage");
Parent p = (Parent) digester.parse(new StringReader(s));
}
public class ImageCreationFactory implements ObjectCreationFactory {
public Object createObject(Attributes attributes) throws Exception {
Image i = new Image();
i.setSize(attributes.getValue("size"));
return i;
}
}

I actually figured this out using the xmlpullparser - here is the code to get the image attribute "large" only and ignore the rest - it's the last "if" in the case statement.
public class XmlPullFeedParser extends BaseFeedParser {
public XmlPullFeedParser(String feedUrl) {
super(feedUrl);
}
public ArrayList<Message> parse() {
ArrayList<Message> messages = null;
XmlPullParser parser = Xml.newPullParser();
try {
// auto-detect the encoding from the stream
parser.setInput(this.getInputStream(), null);
int eventType = parser.getEventType();
Message currentMessage = null;
boolean done = false;
while (eventType != XmlPullParser.END_DOCUMENT && !done){
String name = null;
String attrib = null;
switch (eventType){
case XmlPullParser.START_DOCUMENT:
messages = new ArrayList<Message>();
break;
case XmlPullParser.START_TAG:
name = parser.getName();
attrib = parser.getAttributeValue(0);
if (name.equalsIgnoreCase(EVENT)){
currentMessage = new Message();
} else if (currentMessage != null){
if (name.equalsIgnoreCase(WEBSITE)){
currentMessage.setWebsite(parser.nextText());
} else if (name.equalsIgnoreCase(DESCRIPTION)){
currentMessage.setDescription(parser.nextText());
} else if (name.equalsIgnoreCase(START_DATE)){
currentMessage.setDate(parser.nextText());
} else if (name.equalsIgnoreCase(TITLE)){
currentMessage.setTitle(parser.nextText());
} else if (name.equalsIgnoreCase(HEADLINER)){
currentMessage.setHeadliner(parser.nextText());
} else if ((name.equalsIgnoreCase(IMAGE)) && (attrib.equalsIgnoreCase("large"))) {
currentMessage.setImage(parser.nextText());
}
}
break;
case XmlPullParser.END_TAG:
name = parser.getName();
if (name.equalsIgnoreCase(EVENT) && currentMessage != null){
messages.add(currentMessage);
} else if (name.equalsIgnoreCase(EVENTS)){
done = true;
}
break;
}
eventType = parser.next();
}
} catch (Exception e) {
Log.e("AndroidNews::PullFeedParser", e.getMessage(), e);
throw new RuntimeException(e);
}
return messages;
}
}

I do not think that it is possible. You have to write your own code to perform this kind of filtering.
But it is very simple. If you wish to create clean code write class named ImageAccessor with method getImage(String size). This method will get the data from digester and compare it with predefined size string (or pattern).

Develop Reference

Java is a programming language and computing platform first released by Sun Microsystems in 1995.

some suggestions for using STAX Parsing in java - java

Related

How to extract sub element as String from XML

Convert BufferedReader To InputStream

Spring Jaxb2: How to append batch data to XML file with no reading it to memory?

Unexcepted NullPointerException RSS Reader in Java

Locating Specific Attributes in Digester - Java

Categories

Resources