reading xml file using sax parser problems - java

i need to read this xml file using sax parser , but the problem is beacuse of the same child node name in different nodes. This is the file:
<MasterData>
<node1>200000</node1>
<Location>
<oa:Code>88</oa:Code>
<oa:Description languageID="en-us">addres1</oa:Description>
<oa:Description languageID="ar-ae">addres2</oa:Description>
</Location>
<address>
<oa:Code>55</oa:Code>
<oa:Description languageID="en-us">Loc1</oa:Description>
<oa:Description languageID="ar-ae">Loc2</oa:Description>
</address>
</MasterData>
import java.util.ArrayList;
import java.util.List;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
public class MyHandler2 extends DefaultHandler {
private MasterData masterdata= null;
private IssueAuthority issueAuth = null ;
private List<IssueAuthority> issueAuthList = null ;
public MasterData getMasterData() {
return masterdata;
}
boolean bmaster = false ;
boolean bLiceNum = false;
boolean bissueAuth = false ;
boolean bautCode = false ;
boolean bautDescAr = false ;
boolean bautDescEn = false ;
#Override
public void startElement(String uri, String localName, String qName, Attributes attributes)
throws SAXException {
if (qName.equalsIgnoreCase("MasterData")) {
//initialize Employee object and set id attribute
masterdata = new MasterData();
System.out.println("MasterData");
}else if (qName.equalsIgnoreCase("LicenseNumber")) {
bLiceNum = true;
System.out.println("qName"+qName);
}else if (qName.equalsIgnoreCase("IssueAuthority")) {
bissueAuth = true;
}else if (qName.equalsIgnoreCase("oa:Code")) {
bautCode = true;
}
}
#Override
public void endElement(String uri, String localName, String qName) throws SAXException {
if (qName.equalsIgnoreCase("IssueAuthority")) {
issueAuthList = new ArrayList<IssueAuthority>() ;
issueAuthList.add(issueAuth) ;
}
if (qName.equalsIgnoreCase("MasterData")) {
//add Employee object to list
masterdata.setIssueAuthority(issueAuthList);
}
}
#Override
public void characters(char ch[], int start, int length) throws SAXException {
if (bLiceNum) {
masterdata.setLicenseNumber(new String(ch, start, length));
bLiceNum = false;
} else if (bissueAuth) {
issueAuth = new IssueAuthority();
bissueAuth = false ;
}else if(bautCode){
issueAuth.setCode(Integer.parseInt(new String(ch, start, length)));
bautCode = false ;
}
}
}
this my handler class

You can differentiate between the location & adress Description child nodes by flagging the entry & exit on the parent. Like that you know exactly in which element you child list you are when accessing the text on the child nodes.
The code snippet below illustrates this:
List<String> locationDescriptions = new ArrayList<String>();
List<String> addressDescriptions = new ArrayList<String>();
boolean insideAddress = false ;
boolean insideLocation = false ;
#Override
public void startElement(String uri, String localName, String qName, Attributes attributes)
throws SAXException {
...
if (qName.equalsIgnoreCase("Location")) {
insideLocation = true;
}
if (qName.equalsIgnoreCase("address")) {
insideAddress = true;
}
...
}
#Override
public void endElement(String uri, String localName, String qName) throws SAXException {
...
if (qName.equalsIgnoreCase("Location")) {
insideLocation = false;
}
if (qName.equalsIgnoreCase("address")) {
insideAddress = false;
}
...
}
#Override
public void characters(char ch[], int start, int length) throws SAXException {
..
if (insideLocation && (bautDescAr || bautDescEn) {
locationDescriptions.add(new String(ch, start, length));
}
if (insideAddress && (bautDescAr || bautDescEn) {
addressDescriptions.add(new String(ch, start, length));
}
...
}

Related

SAX Parser does not display multiple identical tags

Previously, I was able to display the data of one tag, but this time not several values are displayed, but only one.
This my parser code:
public class Runner {
public static void main(String[] args) throws ParserConfigurationException, SAXException, IOException {
SAXParserFactory spf = SAXParserFactory.newInstance();
SAXParser saxParser = spf.newSAXParser();
XMLReader xmlReader = saxParser.getXMLReader();
MyHandler handler = new MyHandler();
xmlReader.setContentHandler(handler);
xmlReader.parse("src/countries.xml");
Countries branches = handler.getBranches();
try (FileWriter files = new FileWriter("src/diploma/SAX.txt")) {
files.write("Item " + "\n" + String.valueOf(branches.itemList) + "\n");
}
}
private static class MyHandler extends DefaultHandler{
static final String HISTORY_TAG = "history";
static final String ITEM_TAG = "item";
static final String NAME_ATTRIBUTE = "name";
public Countries branches;
public Item currentItem;
private String currencyElement;
Countries getBranches(){
return branches;
}
public void startDocument() throws SAXException {
}
#Override
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
currencyElement = qName;
switch (currencyElement) {
case HISTORY_TAG: {
branches.itemList = new ArrayList<>();
currentItem = new Item();
currentItem.setHistoryName(String.valueOf(attributes.getValue(NAME_ATTRIBUTE)));
} break;
default: {}
}
}
#Override
public void characters(char[] ch, int start, int length) throws SAXException {
String text = new String(ch, start, length);
if (text.contains("<") || currencyElement == null){
return;
}
switch (currencyElement) {
case ITEM_TAG: {
currentItem.setItem(text);
} break;
default: { }
}
}
#Override
public void endElement(String uri, String localName, String qName) throws SAXException{
switch (qName) {
case HISTORY_TAG: {
branches.itemList.add(currentItem);
currentItem = null;
} break;
default: {
}
}
currencyElement = null;
}
public void endDocument() throws SAXException {
System.out.println("SAX parsing is completed...");
}
}
}
Class Item:
public class Item {
private String historyName;
private String item;
public String getItem() {
return item;
}
public void setItem(String item) {
this.item = item;
}
public String getHistoryName() {
return historyName;
}
public void setHistoryName(String historyName) {
this.historyName = historyName;
}
#Override
public String toString() {
return
"historyName = " + historyName + ", " + "\n" + "item = " + item + ", ";
}
}
And class Countries
public class Countries {
public List<Item> itemList;
}
I have problems with this part
<history name="История">
<item>
История белорусских земель очень богата и самобытна.
</item>
<item>
Эту страну постоянно раздирали внутренние конфликты и противоречия, много раз она была втянута в войны.
</item>
<item>
В 1945 году Беларусь вступила в состав членов-основателей Организации Объединенных Наций.
</item>
</history>
I only display the last "item" tag, and other duplicate tags are displayed only in the singular. I can't figure out where the error is, but I noticed that in "endElement" all values are displayed, but as one element. Maybe someone knows what's the matter?
You are creating a new ArrayList every time you encounter the item tag.
That is why you only see one item displayed after parsing.

RSS Reader using Sax Parser losing characters from title

I'm trying to use a SAX parser in order to return the contents of an RSS feed from a URL - http://pitchfork.com/rss/news/, but often characters are lost in displaying the title, showing partial text or just a closing tag ">"
How can i modify my handler class to prevent this? I think I should probably use StringBuilder or StringBuffer, but i'm not sure how to implement it.
ParseHandler.java
public class RssParseHandler extends DefaultHandler {
//Parsed items
private List<RssItem> rssItems;
private RssItem currentItem;
private boolean parsingTitle;
private boolean parsingLink;
private boolean parsing_id;
private boolean parsingDescription;
public RssParseHandler() {
rssItems = new ArrayList<RssItem>();
}
public List<RssItem> getItems() {
return rssItems;
}
//Creates empty RssItem object during the process of an item start tag
//Indicators are set to true when particular tag is being processed
#Override
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
if ("item".equals(qName)) {
currentItem = new RssItem();
} else if ("title".equals(qName)) {
parsingTitle = true;
} else if ("link".equals(qName)) {
parsingLink = true;
} else if ("_id".equals(qName)) {
parsing_id = true;
} else if ("description".equals(qName)) {
parsingDescription = true;
}
}
//Current RssItem is added to the list following process of end tag
#Override
public void endElement(String uri, String localName, String qName) throws SAXException {
if ("item".equals(qName)) {
rssItems.add(currentItem);
currentItem = null;
} else if ("title".equals(qName)) {
parsingTitle = false;
} else if ("link".equals(qName)) {
parsingLink = false;
} else if ("_id".equals(qName)) {
parsing_id = false;
} else if ("description".equals(qName)) {
parsingDescription = false;
}
}
#Override
public void characters(char[] ch, int start, int length) throws SAXException {
if (parsingTitle) {
if (currentItem != null)
currentItem.setTitle(new String(ch, start, length));
} else if (parsingLink) {
if (currentItem != null) {
currentItem.setLink(new String(ch, start, length));
parsingLink = false;
}
} else if (parsing_id) {
if (currentItem != null) {
currentItem.set_id(new String(ch, start, length));
parsing_id = false;
}
} else if (parsingDescription) {
if (currentItem != null) {
currentItem.setDescription(new String(ch, start, length));
parsingDescription = false;
}
}
}}//rssHandlerClass
Use a StringBuilder to build the tag, rather than using a new String instance as the documentation says:
The Parser will call this method to report each chunk of character data. SAX parsers may return all contiguous character data in a single chunk, or they may split it into several chunks; however, all of the characters in any single event must come from the same external entity so that the Locator provides useful information.
And #CommonWares says this exactly in his post Here.
Build your tag as it is found using StringBuilder, since there is chunks coming in at once rather than the entire string (This explains the incomplete tags!). You may or may not need the isBuilding flag, but I don't know your entire implementation so I added it incase.
StringBuilder mSb;
boolean isBuilding;
#Override
public void startElement(String uri, String localName, String qName,
Attributes attributes) throws SAXException {
mSb = new StringBuilder();
isBuilding = true;
if(qName.equals("title")){
parsingTitle = true;
}
...
...
}
#Override
public void characters (char ch[], int start, int length) {
if (mSb !=null && isBuilding) {
for (int i=start; i<start+length; i++) {
mSb.append(ch[i]);
}
}
}
#Override
public void endElement(String uri, String localName, String qName)
throws SAXException {
if(parsingTitle){
currentItem.setTitle(sb.toString().trim());
parsingTitle = false;
isBuilding = false;
}
}

My Parser Skips Elements

I'm trying to make an RSS reader that uses an XML from the web. For some reason it only reads the last element.
This is pretty much the XML file:
<rss version="2.0">
<channel>
<item>
<mainTitle>...</mainTitle>
<headline>
<title>...</title>
<description>...</description>
<subTitle>...</subTitle>
<link>...</link>
</headline>
<headline>
<title>...</title>
<description>...</description>
<subTitle>...</subTitle>
<link>...</link>
</headline>
</item>
<item>
<mainTitle>...</mainTitle>
<headline>
<title>...</title>
<description>...</description>
<subTitle>...</subTitle>
<link>...</link>
</headline>
<headline>
<title>...</title>
<description>...</description>
<subTitle>...</subTitle>
<link>...</link>
</headline>
</item>
</channel>
</rss>
This is the parser:
public class RssHandler extends DefaultHandler {
// Feed and Article objects to use for temporary storage
private Article currentArticle = new Article();
private List<Article> articleList = new ArrayList<Article>();
// Number of articles added so far
private int articlesAdded = 0;
// Number of articles to download
private static final int ARTICLES_LIMIT = 20;
// Current characters being accumulated
StringBuffer chars = new StringBuffer();
// Current characters being accumulated
int cap = new StringBuffer().capacity();
// Basic Booleans
private boolean wantedItem = false;
private boolean wantedHeadline = false;
private boolean wantedTitle = false;
public List<Article> getArticleList() {
return articleList;
}
public Article getParsedData() {
return this.currentArticle;
}
public RssHandler() {
this.currentArticle = new Article();
}
public void startElement(String uri, String localName, String qName,
Attributes atts) {
chars = new StringBuffer();
}
public void endElement(String uri, String localName, String qName)
throws SAXException {
if (localName.equalsIgnoreCase("title")) {
currentArticle.setTitle(chars.toString());
} else if (localName.equalsIgnoreCase("subtitle")) {
currentArticle.setDescription(chars.toString());
} else if (localName.equalsIgnoreCase("pubdate")) {
currentArticle.setPubDate(chars.toString());
} else if (localName.equalsIgnoreCase("guid")) {
currentArticle.setGuid(chars.toString());
} else if (localName.equalsIgnoreCase("author")) {
currentArticle.setAuthor(chars.toString());
} else if (localName.equalsIgnoreCase("link")) {
currentArticle.setEncodedContent(chars.toString());
} else if (localName.equalsIgnoreCase("item"))
// Check if looking for article, and if article is complete
if (localName.equalsIgnoreCase("item")) {
articleList.add(currentArticle);
currentArticle = new Article();
// Lets check if we've hit our limit on number of articles
articlesAdded++;
if (articlesAdded >= ARTICLES_LIMIT) {
throw new SAXException();
}
}
chars.setLength(0);
}
public void characters(char ch[], int start, int length) {
chars.append(ch, start, length);
}
}
Whenever I debug the application qName is never a direct child of Item.
It reads rss -> channel -> item -> title -> description ...
I'm clueless. Please help!
1) At the end of endElement() method, you are not resetting the chars length i.e
public void endElement(String uri, String localName, String qName)
throws SAXException {
//...
//Reset 'chars' length at the end always.
chars.setLength(0);
}
2) Change your characters(...) method like below:
public void characters(char ch[], int start, int length) {
chars.append(ch, start, length);
}
[EDIT
3) Move initialization of 'chars' from 'startElement' to constructor. i.e:
public RssHandler() {
this.currentArticle = new Article();
//Add here..
chars = new StringBuffer();
}
and,
public void startElement(String uri, String localName, String qName,
Attributes atts) {
//Remove below line..
//chars = new StringBuffer();
}
4) Finally, use qName to check matching tags instead of localName i.e
if (qName.equalsIgnoreCase("title")) {
currentArticle.setTitle(chars.toString().trim());
} else if (qName.equalsIgnoreCase("subtitle")) {
currentArticle.setDescription(chars.toString().trim());
} //...
EDIT]
More info # Using SAXParser in Android

Java Sax Parser only returning one line of a tag

I am trying to parse the description tag in the xml but it only outputs one line:
description: <img src=http://www.ovations365.com/sites/ovations365.com/images/event/441705771/sparkswebsite_medium.jpg alt="SPARKS: Understanding Energy">
That is only a small part of the text in the CDATA and I'm trying to output the description for multiple items. Why can't I get the whole CDATA?
The XML is located: http://feeds.feedburner.com/Events-Ovations365
package com.example.ovations_proj;
import java.util.ArrayList;
import java.util.List;
import java.util.StringTokenizer;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
import com.example.ovations_proj.RssItem;
public class RssParseHandler extends DefaultHandler {
private List<RssItem> rssItems;
// Used to reference item while parsing
private RssItem currentItem;
// Parsing title indicator
private boolean parsingTitle;
// Parsing link indicator
private boolean parsingLink;
private boolean parsingDes;
public RssParseHandler() {
rssItems = new ArrayList<RssItem>();
}
public List<RssItem> getItems() {
return rssItems;
}
#Override
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
System.out.println("Start Element :" + qName);
if ("item".equals(qName)) { //item
currentItem = new RssItem();
} else if ("title".equals(qName)) { //title
parsingTitle = true;
} else if ("link".equals(qName)) { //link
parsingLink = true;
}else if ("description".equals(qName) ) { //description
parsingDes = true;
}
}
#Override
public void endElement(String uri, String localName, String qName) throws SAXException {
System.out.println("End Element :" + qName);
if ("item".equals(qName)) {
rssItems.add(currentItem);//item
currentItem = null;
} else if ("title".equals(qName)) {//title
parsingTitle = false;
} else if ("link".equals(qName)) {//link
parsingLink = false;
} else if ("description".equals(qName) ) { //description
parsingDes = false;
}
}
#Override
public void characters(char[] ch, int start, int length) throws SAXException {
if (parsingTitle) {
if (currentItem != null){
currentItem.setTitle(new String(ch, start, length));
}
} else if (parsingLink) {
if (currentItem != null) {
currentItem.setLink(new String(ch, start, length));
parsingLink = false;
}
} else if (parsingDes) {
if (currentItem != null) {
currentItem.setDes(new String(ch, start, length));
System.out.println("description: " + currentItem.getDes());
parsingDes = false;
}
}
}
}
It seems that the character data in the <![CDATA[...]]> sections is being sent in multiple chunks, i.e. in multiple calls to the characters method.
The ContentHandler documentation for the characters method mentions that SAX parsers are free to do this:
SAX parsers may return all contiguous character data in a single chunk, or they may split it into several chunks[....]
You'll therefore need to adjust your characters method to handle being called multiple times for the same chunk of contiguous character data.

Error in output of a simple SAX parser

I saw an example in mykong - http://www.mkyong.com/java/how-to-read-xml-file-in-java-sax-parser/
I tried to make it work for the xml file (below) by making the following modifications to the code in the above page -
1 - Have only two if blocks in startElement() and characters() methods.
2 - Change the print statements in above methods, ie
FIRSTNAME and First Name = passenger id
LASTNAME and Last Name = name
The problem is - In the output, I see the word passenger instead of the value of passenger id. How do i fix that ?
<?xml version="1.0" encoding="utf-8"?>
<root xmlns:android="www.google.com">
<passenger id="001">
<name>Tom Cruise</name>
</passenger>
<passenger id="002">
<name>Tom Hanks</name>
</passenger>
</root>
Java Code
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
public class ReadXMLFileSAX{
public static void main(String argv[]) {
try {
SAXParserFactory factory = SAXParserFactory.newInstance();
SAXParser saxParser = factory.newSAXParser();
DefaultHandler handler = new DefaultHandler() {
boolean bfname = false;
boolean blname = false;
public void startElement(String uri, String localName,String qName,
Attributes attributes) throws SAXException {
System.out.println("Start Element :" + qName);
if (qName.equalsIgnoreCase("passenger id")) {
bfname = true;
}
if (qName.equalsIgnoreCase("name")) {
blname = true;
}
}
public void endElement(String uri, String localName,
String qName) throws SAXException {
System.out.println("End Element :" + qName);
}
public void characters(char ch[], int start, int length) throws SAXException {
if (bfname) {
System.out.println("passenger id : " + new String(ch, start, length));
bfname = false;
}
if (blname) {
System.out.println("name : " + new String(ch, start, length));
blname = false;
}
}
};
saxParser.parse("c:\\flight.xml", handler);
} catch (Exception e) {
e.printStackTrace();
}
}
}
In the startElement, when it's for "passenger", Attributes argument you get will have that value.
public void startElement(String uri, String localName,
String qName, Attributes attributes)
throws SAXException {
if (qName.equalsIgnoreCase("passenger") && attributes != null){
System.out.println(attributes.getValue("id"));
}
}

Categories

Resources