parse XML and convert to a Collection - java

<inputs>
<MAT_NO>123</MAT_NO>
<MAT_NO>323</MAT_NO>
<MAT_NO>4223</MAT_NO>
<FOO_BAR>122</FOO_BAR>
<FOO_BAR>125</FOO_BAR>
</inputs>
I've to parse the above the XML. After parsing, i want the values to be in a Map<String, List<String>> with Key values corresponding to the child nodes - MAT_NO, FOO_BAR
and values - the values of the child nodes -123, 323 etc.
Following is my shot. Is there any better way of doing this??
public class UserInputsXmlParser extends DefaultHandler {
private final SaveSubscriptionValues subscriptionValues = null;
private String nodeValue = "";
private final String inputKey = "";
private final List<String> valuesList = null;
private Map<String, List<String>> userInputs;
public Map<String, List<String>> parse(final String strXML) {
try {
final SAXParserFactory parserFactory = SAXParserFactory
.newInstance();
final SAXParser saxParser = parserFactory.newSAXParser();
saxParser.parse(new InputSource(new StringReader(strXML)), this);
return userInputs;
} catch (final SAXException e) {
e.printStackTrace();
throw new MyException("", e);
} catch (final IOException e) {
e.printStackTrace();
throw new MyException("", e);
} catch (final ParserConfigurationException e) {
e.printStackTrace();
throw new MyException("", e);
} catch (final Exception e) {
e.printStackTrace();
throw new MyException("", e);
}
}
#Override
public void startElement(final String uri, final String localName,
final String qName, final Attributes attributes)
throws SAXException {
nodeValue = "";
if ("inputs".equalsIgnoreCase(qName)) {
userInputs = MyUtil.getNewHashMap();
return;
}
}
#Override
public void characters(final char[] ch, final int start, final int length)
throws SAXException {
if (!MyUtil.isEmpty(nodeValue)) {
nodeValue += new String(ch, start, length);
} else {
nodeValue = new String(ch, start, length);
}
}
#Override
public void endElement(final String uri, final String localName,
final String qName) throws SAXException {
if (!"inputs".equalsIgnoreCase(qName)) {
storeUserInputs(qName, nodeValue);
}
}
/**
* #param qName
* #param nodeValue2
*/
private void storeUserInputs(final String qName, final String nodeValue2) {
if (nodeValue2 == null || nodeValue2.trim().equals("")) { return; }
final String trimmedValue = nodeValue2.trim();
final List<String> values = userInputs.get(qName);
if (values != null) {
values.add(trimmedValue);
} else {
final List<String> valueList = new ArrayList<String>();
valueList.add(trimmedValue);
userInputs.put(qName, valueList);
}
}
public static void main(final String[] args) {
final String sample = "<inputs>" + "<MAT_NO>154400-0000</MAT_NO>"
+ "<MAT_NO> </MAT_NO>" + "<MAT_NO>154400-0002</MAT_NO>"
+ "<PAT_NO>123</PAT_NO><PAT_NO>1111</PAT_NO></inputs>";
System.out.println(new UserInputsXmlParser().parse(sample));
}
}
UPDATE: The children of <inputs> nodes are dynamic. I'll be knowing just the root node.

Do you have to provide a solution as part of a SAX event handler? If not then you could use one of the many XML libraries around, such as dom4j. Make the solution a lot simpler;
public static void main(String[] args) throws Exception
{
String sample = "<inputs>" + "<MAT_NO>154400-0000</MAT_NO>"
+ "<MAT_NO> </MAT_NO>" + "<MAT_NO>154400-0002</MAT_NO>"
+ "<PAT_NO>123</PAT_NO><PAT_NO>1111</PAT_NO></inputs>";
System.out.println(parse(sample));
}
static Map<String,List<String>> parse(String xml) throws Exception
{
Map<String,List<String>> map = new HashMap<String,List<String>>();
SAXReader reader = new SAXReader();
Document doc = reader.read(new StringReader(xml));
for (Iterator i = doc.getRootElement().elements().iterator(); i.hasNext();)
{
Element element = (Element)i.next();
//Maybe handle elements with only whitespace text content
List<String> list = map.get(element.getName());
if (list == null)
{
list = new ArrayList<String>();
map.put(element.getName(), list);
}
list.add(element.getText());
}
return map;
}

I would check xstream....( http://x-stream.github.io/tutorial.html )
XStream is a simple library to serialize objects to XML and back again.

For something this basic, look into xpath.

Related

SAX Parser does not display multiple identical tags

Previously, I was able to display the data of one tag, but this time not several values are displayed, but only one.
This my parser code:
public class Runner {
public static void main(String[] args) throws ParserConfigurationException, SAXException, IOException {
SAXParserFactory spf = SAXParserFactory.newInstance();
SAXParser saxParser = spf.newSAXParser();
XMLReader xmlReader = saxParser.getXMLReader();
MyHandler handler = new MyHandler();
xmlReader.setContentHandler(handler);
xmlReader.parse("src/countries.xml");
Countries branches = handler.getBranches();
try (FileWriter files = new FileWriter("src/diploma/SAX.txt")) {
files.write("Item " + "\n" + String.valueOf(branches.itemList) + "\n");
}
}
private static class MyHandler extends DefaultHandler{
static final String HISTORY_TAG = "history";
static final String ITEM_TAG = "item";
static final String NAME_ATTRIBUTE = "name";
public Countries branches;
public Item currentItem;
private String currencyElement;
Countries getBranches(){
return branches;
}
public void startDocument() throws SAXException {
}
#Override
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
currencyElement = qName;
switch (currencyElement) {
case HISTORY_TAG: {
branches.itemList = new ArrayList<>();
currentItem = new Item();
currentItem.setHistoryName(String.valueOf(attributes.getValue(NAME_ATTRIBUTE)));
} break;
default: {}
}
}
#Override
public void characters(char[] ch, int start, int length) throws SAXException {
String text = new String(ch, start, length);
if (text.contains("<") || currencyElement == null){
return;
}
switch (currencyElement) {
case ITEM_TAG: {
currentItem.setItem(text);
} break;
default: { }
}
}
#Override
public void endElement(String uri, String localName, String qName) throws SAXException{
switch (qName) {
case HISTORY_TAG: {
branches.itemList.add(currentItem);
currentItem = null;
} break;
default: {
}
}
currencyElement = null;
}
public void endDocument() throws SAXException {
System.out.println("SAX parsing is completed...");
}
}
}
Class Item:
public class Item {
private String historyName;
private String item;
public String getItem() {
return item;
}
public void setItem(String item) {
this.item = item;
}
public String getHistoryName() {
return historyName;
}
public void setHistoryName(String historyName) {
this.historyName = historyName;
}
#Override
public String toString() {
return
"historyName = " + historyName + ", " + "\n" + "item = " + item + ", ";
}
}
And class Countries
public class Countries {
public List<Item> itemList;
}
I have problems with this part
<history name="История">
<item>
История белорусских земель очень богата и самобытна.
</item>
<item>
Эту страну постоянно раздирали внутренние конфликты и противоречия, много раз она была втянута в войны.
</item>
<item>
В 1945 году Беларусь вступила в состав членов-основателей Организации Объединенных Наций.
</item>
</history>
I only display the last "item" tag, and other duplicate tags are displayed only in the singular. I can't figure out where the error is, but I noticed that in "endElement" all values are displayed, but as one element. Maybe someone knows what's the matter?
You are creating a new ArrayList every time you encounter the item tag.
That is why you only see one item displayed after parsing.

Java Xml Element - Get node position in the file

I have XML file and I want to get position of a node.
<Root>
<Node1></Node1>
</Root>
I want to get 1) start and end positions; 2) start and end lines
How to do that? Thanks
Create a Reader that keeps track of position information you want:
static public class MyReader extends Reader {
final private Reader internalReader;
private int pos;
private int line;
public MyReader(Reader internalReader) {
this.internalReader = internalReader;
}
public int getPos() {
return pos;
}
public int getLine() {
return line;
}
#Override
public int read(char[] cbuf, int off, int len) throws IOException {
int chars_read = internalReader.read(cbuf, off, 1);
pos += chars_read;
if(cbuf[off] =='\n' && chars_read > 0) {
line++;
}
return chars_read;
}
#Override
public void close() throws IOException {
internalReader.close();
}
}
Use that with a ContentHandler that stores the position information in some data structure associated with the elements.
String xmlString = "<Root>\n"
+ " <Node1></Node1>\n"
+ "</Root>";
StringReader strReader = new StringReader(xmlString);
MyReader reader = new MyReader(strReader);
SAXParserFactory factory = SAXParserFactory.newInstance();
SAXParser parser = factory.newSAXParser();
XMLReader xmlreader = parser.getXMLReader();
Map<String,List<Integer>> startMap = new HashMap<>();
Map<String,List<Integer>> endMap = new HashMap<>();
Map<String,List<Integer>> startLineMap = new HashMap<>();
Map<String,List<Integer>> endLineMap = new HashMap<>();
DefaultHandler handler = new DefaultHandler() {
#Override
public void endElement(String uri, String localName, String qName) throws SAXException {
super.endElement(uri, localName, qName); //To change body of generated methods, choose Tools | Templates.
List<Integer> l = endMap.get(qName);
if(null == l) {
l = new ArrayList<>();
}
l.add(reader.getPos());
endMap.put(qName, l);
List<Integer> ll = endLineMap.get(qName);
if(null == ll) {
ll= new ArrayList<>();
}
ll.add(reader.getLine());
endLineMap.put(qName, ll);
}
#Override
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
super.startElement(uri, localName, qName, attributes); //To change body of generated methods, choose Tools | Templates.
List<Integer> l = startMap.get(qName);
if(null == l) {
l = new ArrayList<>();
}
l.add(reader.getPos());
startMap.put(qName, l);
List<Integer> ll = startLineMap.get(qName);
if(null == ll) {
ll= new ArrayList<>();
}
ll.add(reader.getLine());
startLineMap.put(qName, ll);
}
};
xmlreader.setContentHandler(handler);
xmlreader.parse(new InputSource(reader));
System.out.println("startMap = " + startMap);
System.out.println("endMap = " + endMap);
System.out.println("startLineMap = " + startLineMap);
System.out.println("endLineMap = " + endLineMap);

Issue while fetching Imagesg from RSS feed

I'm trying to get the RSS feed into my android application, I am retrieving feeds like title, description and link of feed but not able to get image for particular feed.
The fallowing is my DefaultXmlHandler class. please go through and help me out.
public class XmlHandler extends DefaultHandler {
private RssFeedStructure feedStr = new RssFeedStructure();
private List<RssFeedStructure> rssList = new ArrayList<RssFeedStructure>();
private int articlesAdded = 0;
// Number of articles to download
private static final int ARTICLES_LIMIT = 15;
StringBuffer chars = new StringBuffer();
public void startElement(String uri, String localName, String qName, Attributes atts) {
chars = new StringBuffer();
if (qName.equalsIgnoreCase("media:content"))
{
if(!atts.getValue("url").toString().equalsIgnoreCase("null")){
feedStr.setImgLink(atts.getValue("url").toString());
}
else{
feedStr.setImgLink("");
}
}
}
public void endElement(String uri, String localName, String qName) throws SAXException {
if (localName.equalsIgnoreCase("title"))
{
feedStr.setTitle(chars.toString());
}
else if (localName.equalsIgnoreCase("description"))
{
feedStr.setDescription(chars.toString());
}
else if (localName.equalsIgnoreCase("pubDate"))
{
feedStr.setPubDate(chars.toString());
}
else if (localName.equalsIgnoreCase("encoded"))
{
feedStr.setEncodedContent(chars.toString());
}
else if (qName.equalsIgnoreCase("media:content"))
{
}
else if (localName.equalsIgnoreCase("link"))
{
try {
feedStr.setUrl(new URL(chars.toString()));
}catch (Exception e){}
}
if (localName.equalsIgnoreCase("item")) {
rssList.add(feedStr);
feedStr = new RssFeedStructure();
articlesAdded++;
if (articlesAdded >= ARTICLES_LIMIT)
{
throw new SAXException();
}
}
}
public void characters(char ch[], int start, int length) {
chars.append(new String(ch, start, length));
}
public List<RssFeedStructure> getLatestArticles(String feedUrl) {
URL url = null;
try {
SAXParserFactory spf = SAXParserFactory.newInstance();
SAXParser sp = spf.newSAXParser();
XMLReader xr = sp.getXMLReader();
url = new URL(feedUrl);
xr.setContentHandler(this);
xr.parse(new InputSource(url.openStream()));
} catch (IOException e) {
} catch (SAXException e) {
} catch (ParserConfigurationException e) {
}
return rssList;
}
}

RSS Parser returns 403

I'm new to Java and we were given an assignment about XML Parsing. We have done DOM and now we are on SAX. That's why I'm using SAX Parser for parsing an rss feed. Its already working on files but when I try to parse an online rss feed, it returns an Error 403. I haven't tried parsing the same site on DOM because my laptop is so slow it takes me 5 minutes just to open a file.
Thanks for the help.
public class NewsHandler extends DefaultHandler {
private String url = "http://tomasinoweb.org/feed/rss";
private boolean inDescription = false;
private String[] descs = new String[11];
int i = 0;
public void processFeed() {
try {
SAXParserFactory factory =
SAXParserFactory.newInstance();
SAXParser parser = factory.newSAXParser();
XMLReader reader = parser.getXMLReader();
reader.setContentHandler(this);
InputStream inputStream = new URL(url).openStream();
reader.parse(new InputSource(inputStream));
} catch (Exception e) { e.printStackTrace(); }
}
public void startElement(String uri, String localName, String qName,
Attributes attributes) throws SAXException {
if(qName.equals("description")) inDescription = true;
}
public void characters(char ch[], int start, int length) {
String chars = new String(ch).substring(start, start + length);
if(inDescription) descs[i] = chars;
}
public void endElement(String uri, String localName, String qName) throws SAXException {
if(qName.equals("description")) {
inDescription = false;
i++;
}
}
public String getDesc(int index) { return descs[index]; }
public static void main(String[] args) {
NewsHandler nh = new NewsHandler();
nh.processFeed();
for(int i=0; i<10; i++) {
System.out.println(nh.getDesc(i));
}
}
}
Solution:
Instead of using String url = "url", I used URL url = new URL("url") and URLConnection con = url.openConnection() and then con.addRequestProperty("user-agent", user-agent string);

Can't add a value to ArrayList because it's in an Inner Class?

I am writing a file that can parse rdf and owl files. I am using SAX and Java.
My problem is on the line activeObject.add(file);
I get the error "Syntax error on tokens, Misplaced construct(s)" - I don't know what this means. And it doesn't seem to make sense, any help would be much appreciated.
PS: I might be completely wrong about what is causing the error, it might have nothing to do with an inner class.
public static void main(String args[]) throws URISyntaxException, MalformedURLException, IOException {
// String file =
// "http://www.srdc.metu.edu.tr/ubl/contextOntology/cpc.owl";
// final String file = "http://onto.eva.mpg.de/obo/image.owl";
// final String file =
// "http://www.informatik.uni-ulm.de/ki/Liebig/owl/SUMO-LiteTB.rdf";
// final String file =
// "http://www.csd.abdn.ac.uk/~apreece/research/TowardsAnIntelligentWeb/PUB_findallbyKen_result.rdf";
// final String file =
// "http://www.srdc.metu.edu.tr/ubl/contextOntology/naics.owl";
final String file = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
URI uri = new URI(file);
InputStream is = uri.toURL().openStream();
try {
SAXParserFactory factory = SAXParserFactory.newInstance();
SAXParser saxParser = factory.newSAXParser();
DefaultHandler handler = new DefaultHandler() {
ArrayList<String> activeProperty = new ArrayList<String>();
ArrayList<Triple> triplesList = new ArrayList<Triple>();
ArrayList<String> activeObject = new ArrayList<String>();
boolean name = false;
activeObject.add(file);
public void startElement(String uri, String localName,String qName, Attributes attributes) throws SAXException {
activeProperty.add(qName);
int attrLength = attributes.getLength();
for (int i = 0; i < attrLength; i++) {
String attrName = attributes.getQName(i).toLowerCase();
String attrValue = attributes.getValue(i).toLowerCase();
if (attrName.equals("rdf:about") || attrName.equals("rdf:resource") || attrName.equals("rdf:id")) {
activeObject.add(attrValue);
System.out.println(activeObject);
}
else{
String subject = activeObject.get(activeObject.size()-1);
String predicate = attrName;
String object = attrValue;
Triple newTriple = new Triple(subject, predicate, object);
}
}
}
public void characters(char[] ch, int start, int length) throws SAXException {
// tempVal = new String(ch, start, length);
}
public void endElement(String uri, String localName, String rawName) {
String subject = activeObject.get(activeObject.size()-2);
String predicate = activeProperty.get(activeProperty.size()-1);
String object = activeObject.get(activeObject.size()-1);
Triple newTriple = new Triple(subject,predicate, object);
if (rawName.equals(activeProperty.get(activeProperty.size() - 1))) {
activeProperty.remove(activeProperty.size() - 1);
}
else{
System.out.println("Something is seriosuly wrong ...sdf.sdf.we8ryw98fsydh");
}
// System.out.println(activeProperty);
}
// if (qName.equalsIgnoreCase("NAME")) {
// name = true;
// }
// public void characters(char ch[], int start, int length)
// throws SAXException {
// if (name) {
// System.out.println("Name: "
// + new String(ch, start, length));
// name = false;
// }
// }
};
saxParser.parse(is, handler);
} catch (Exception e) {
e.printStackTrace();
}
}
You're trying to write normal statements within an anonymous class, as if you'd tried this:
class Foo extends DefaultHandler {
ArrayList<String> activeProperty = new ArrayList<String>();
ArrayList<Triple> triplesList = new ArrayList<Triple>();
ArrayList<String> activeObject = new ArrayList<String>();
boolean name = false;
activeObject.add(file);
}
You can't do that. If you want this to be performed on construction, you can put it in an initializer block, like this:
ArrayList<String> activeProperty = new ArrayList<String>();
ArrayList<Triple> triplesList = new ArrayList<Triple>();
ArrayList<String> activeObject = new ArrayList<String>();
boolean name = false;
{
activeObject.add(file);
}
Or you could populate the list in some other way, perhaps - for instance with Guava you could write:
ArrayList<String> activeObject = Lists.newArrayList(file);

Categories

Resources