parse an xml string in java? - java

how do you parse xml stored in a java string object?
Java's XMLReader only parses XML documents from a URI or inputstream. is it not possible to parse from a String containing an xml data?
Right now I have the following:
try {
SAXParserFactory factory = SAXParserFactory.newInstance();
SAXParser sp = factory.newSAXParser();
XMLReader xr = sp.getXMLReader();
ContactListXmlHandler handler = new ContactListXmlHandler();
xr.setContentHandler(handler);
xr.p
} catch (ParserConfigurationException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (SAXException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
And on my handler i have this:
public class ContactListXmlHandler extends DefaultHandler implements Resources {
private List<ContactName> contactNameList = new ArrayList<ContactName>();
private ContactName contactItem;
private StringBuffer sb;
public List<ContactName> getContactNameList() {
return contactNameList;
}
#Override
public void startDocument() throws SAXException {
// TODO Auto-generated method stub
super.startDocument();
sb = new StringBuffer();
}
#Override
public void startElement(String uri, String localName, String qName,
Attributes attributes) throws SAXException {
// TODO Auto-generated method stub
super.startElement(uri, localName, qName, attributes);
if(localName.equals(XML_CONTACT_NAME)){
contactItem = new ContactName();
}
sb.setLength(0);
}
#Override
public void characters(char[] ch, int start, int length){
// TODO Auto-generated method stub
try {
super.characters(ch, start, length);
} catch (SAXException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
sb.append(ch, start, length);
}
#Override
public void endDocument() throws SAXException {
// TODO Auto-generated method stub
super.endDocument();
}
/**
* where the real stuff happens
*/
#Override
public void endElement(String uri, String localName, String qName)
throws SAXException {
// TODO Auto-generated method stub
//super.endElement(arg0, arg1, arg2);
if(contactItem != null){
if (localName.equalsIgnoreCase("title")) {
contactItem.setUid(sb.toString());
Log.d("handler", "setTitle = " + sb.toString());
} else if (localName.equalsIgnoreCase("link")) {
contactItem.setFullName(sb.toString());
} else if (localName.equalsIgnoreCase("item")){
Log.d("handler", "adding rss item");
contactNameList.add(contactItem);
}
sb.setLength(0);
}
}
Thanks in advance

The SAXParser can read an InputSource.
An InputSource can take a Reader in its constructor
So, you can put parse XML string via a StringReader
new InputSource(new StringReader("... your xml here....")));

Try jcabi-xml (see this blog post) with a one-liner:
XML xml = new XMLDocument("<document>...</document>")

Your XML might be simple enough to parse manually using the DOM or SAX API, but I'd still suggest using an XML serialization API such as JAXB, XStream, or Simple instead because writing your own XML serialization/deserialization code is a drag.
Note that the XStream FAQ erroneously claims that you must use generated classes with JAXB:
How does XStream compare to JAXB (Java API for XML Binding)?
JAXB is a Java binding tool. It generates Java code from a schema and
you are able to transform from those classes into XML matching the
processed schema and back. Note, that you cannot use your own objects,
you have to use what is generated.
It seems this was true was true at one time, but JAXB 2.0 no longer requires you to use Java classes generated from a schema.
If you go this route, be sure to check out the side-by-side comparisons of the serialization/marshalling APIs I've mentioned:
http://blog.bdoughan.com/2010/10/how-does-jaxb-compare-to-xstream.html
http://blog.bdoughan.com/2010/10/how-does-jaxb-compare-to-simple.html

Take a look at this: http://www.rgagnon.com/javadetails/java-0573.html
import javax.xml.parsers.*;
import org.xml.sax.InputSource;
import org.w3c.dom.*;
import java.io.*;
public class ParseXMLString {
public static void main(String arg[]) {
String xmlRecords =
"<data>" +
" <employee>" +
" <name>John</name>" +
" <title>Manager</title>" +
" </employee>" +
" <employee>" +
" <name>Sara</name>" +
" <title>Clerk</title>" +
" </employee>" +
"</data>";
try {
DocumentBuilderFactory dbf =
DocumentBuilderFactory.newInstance();
DocumentBuilder db = dbf.newDocumentBuilder();
InputSource is = new InputSource();
is.setCharacterStream(new StringReader(xmlRecords));
Document doc = db.parse(is);
NodeList nodes = doc.getElementsByTagName("employee");
// iterate the employees
for (int i = 0; i < nodes.getLength(); i++) {
Element element = (Element) nodes.item(i);
NodeList name = element.getElementsByTagName("name");
Element line = (Element) name.item(0);
System.out.println("Name: " + getCharacterDataFromElement(line));
NodeList title = element.getElementsByTagName("title");
line = (Element) title.item(0);
System.out.println("Title: " + getCharacterDataFromElement(line));
}
}
catch (Exception e) {
e.printStackTrace();
}
/*
output :
Name: John
Title: Manager
Name: Sara
Title: Clerk
*/
}
public static String getCharacterDataFromElement(Element e) {
Node child = e.getFirstChild();
if (child instanceof CharacterData) {
CharacterData cd = (CharacterData) child;
return cd.getData();
}
return "?";
}
}

Related

java parser sax doesn't get value & on my field

I have more elements on my xml file contains & and others characters html >.
I tested my code but it obtain the first part of my field for example:
SERIES & FILMS
It give only the word SERIES.
And other example:
C>SUDO
It give only C.
My code, my field name is "summary":
try {
SAXParserFactory factory = SAXParserFactory.newInstance();
SAXParser saxParser = factory.newSAXParser();
chars = new StringBuffer();
DefaultHandler handler = new DefaultHandler() {
public void startElement(String uri, String localName,
String qName, Attributes attributes)
throws SAXException {
System.out.println("Start Element :" + qName);
if (qName.equals(SUMMARY2)) {
bfSummary = true;
}
if (qName.equals(SERVICE_DATA)) {
idServiceData = attributes.getValue("id");
bfServicedata = true;
}
}
public void endElement(String uri, String localName,
String qName) throws SAXException {
System.out.println("End Element :" + qName + ""
+ mListBaseLineByEpgId.size());
// maliste.put(listeId, summary);
malisteParThem.add(summary);
if (mListBaseLineByEpgId.get(idServiceData) != null) {
List<String> listeModif = mListBaseLineByEpgId
.get(idServiceData);
for (String chaine : malisteParThem) {
listeModif.add(chaine);
}
mListBaseLineByEpgId.replace(idServiceData, listeModif);
} else {
mListBaseLineByEpgId.put(idServiceData, malisteParThem);
}
malisteParThem = new ArrayList<String>();
}
public void characters(char ch[], int start, int length)
throws SAXException {
if (bfSummary) {
summary = new String(ch, start, length);
summary = summary.replace(BEFORETILESUMMARY, "");
// chars.append(summary);
// summary=chars.toString();
summary = removeHtmlFrom(summary);
System.out.println("Summary : " + summary);
bfSummary = false;
}
if (bfServicedata) {
System.out.println("listeId : " + idServiceData);
bfServicedata = false;
}
}
};
File file = new File(cheminFichier);
InputStream inputStream = new FileInputStream(file);
Reader reader = new InputStreamReader(inputStream);
InputSource is = new InputSource(reader);
//is.setEncoding("ISO-8859-1");
saxParser.parse(is, handler);
} catch (Exception e) {
e.printStackTrace();
}
Thank you.
Perhaps this problem is related to the unexpected behavior of SAX parser: it is allowed (per spec) to split the text part of an element and call characters() method multiple times for the same element.
What you need to do is have a StringBuffer or StringBuilder instance variable. You initialize it in startElement(), append to it on characters() and get the full text on endElement()
see this question for more info JAVA SAX parser split calls to characters()

Handle external Entities and Stylesheet in Sax Parser (XML)

I want to ignore external entities and external stylesheets (eg. <?xml-stylesheet type="text/xsl" href="......."?>).
I know I have to set XMLReader property to ignore external entities but I don't know how to ignore stylesheets...
import org.apache.xerces.parsers.SAXParser;
import org.xml.sax.XMLReader;
//...
final XMLReader parser = new SAXParser();
// Ignore entities
parser.setProperty("http://xml.org/sax/features/external-general-entities", false);
// IS CORRECT???
parser.setProperty("http://xml.org/sax/features/external-general-entities", false);
There are more properties to set to avoid external entities and stylesheet?
How Can I understand if there are external entities o stylesheets?
Working for me:
public class SaxParser extends DefaultHandler
implements ContentHandler, DTDHandler, EntityResolver{
public transient static final String STYLE_SHEET_TAG = "xml-stylesheet";
public transient static final String EXTERNAL_ENTITY = "ExternalEntity";
public static void main(String[] args) {
new SaxParser().execute();
}
public void execute() {
String pathFileXml = "test/XML.xml";
final XMLReader parser = new SAXParser();
parser.setContentHandler(this);
parser.setDTDHandler(this);
parser.setEntityResolver(this);
try {
parser.parse(pathFileXml);
} catch (IOException e) {
e.printStackTrace();
} catch (SAXException e) {
if (SaxParser.STYLE_SHEET_TAG.equals(e.getMessage())
|| SaxParser.EXTERNAL_ENTITY.equals(e.getMessage())) {
System.out.println("CATCH ERRORE");
}
e.printStackTrace();
}
System.out.println("OK");
}
#Override
public void processingInstruction(String target, String data)
throws SAXException {
System.out.println("Processing Instruction");
System.out.println("PI=> target: " + target + ", data: " + data);
if (STYLE_SHEET_TAG.equalsIgnoreCase(target.trim())) {
throw new SAXException(STYLE_SHEET_TAG);
}
return;
}
#Override
public InputSource resolveEntity(String publicId, String systemId)
throws IOException, SAXException {
System.out.println("publicId: " + publicId + ", systemId: " + systemId);
throw new SAXException(SaxParser.EXTERNAL_ENTITY);
}
}
The external stylesheet declaration is a standard processing instruction.
You can ignore processing instructions by not implementing the handler method:
void processingInstruction(java.lang.String target, java.lang.String data) {}
in your SAX handler.

How to read the child nodes from XML using DOM or SAX parser in android

This is my XML.
<Operations>
<Operation Name="OperationName1">Entity details1</Operation>
<Operation Name="OperationName2">Entity details2</Operation>
<Operation Name="OperationName3">Entity details3</Operation>
<Operation Name="OperationName4">Entity details4</Operation>
</Operations>
In this I need to read each child nodes as a string variable. Using DOM I am trying like this.
NodeList items = root.getElementsByTagName("Operation");
for (int i=0;i<items.getLength();i++)
{
Node item = items.item(i);
NodeList properties = item.getChildNodes();
for (int j=0;j<properties.getLength();j++){
Node property = properties.item(j);
}
}
Now as for my understand the items is having all the child nodes now I need to store each child node like this.
String ch_node="<Operation Name="OperationName4">Entity details4</Operation>"
Is there any default method that will give me the child node xml or I need to create again with node name,value and atrributes?
I have tried with SAX parser also but do not know how to get.
public void startElement(String uri, String localName, String qName,
Attributes attributes) throws SAXException {
if (qName.equalsIgnoreCase("operation")) {
op_Name=attributes.getValue(0);
}
}
public void characters(char[] ch, int start, int length)
throws SAXException {
}
public void endElement(String uri, String localName, String qName)
throws SAXException {
}
You can try DOM and Transformer
Transformer tx = TransformerFactory.newInstance().newTransformer();
tx.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(new File("1.xml"));
NodeList list = doc.getElementsByTagName("Operation");
for (int i = 0; i < list.getLength(); i++) {
DOMSource src = new DOMSource(list.item(i));
StringWriter sr = new StringWriter();
Result res = new StreamResult(sr);
tx.transform(src, res);
System.out.println(sr);
}
output
<Operation Name="OperationName1">Entity details1</Operation>
<Operation Name="OperationName2">Entity details2</Operation>
<Operation Name="OperationName3">Entity details3</Operation>
<Operation Name="OperationName4">Entity details4</Operation>
try this
String elemName;
public void startElement(String uri, String localName, String qName,
Attributes attributes) throws SAXException {
elemName=qName;
}
public void characters(char[] ch, int start, int length)
throws SAXException {
if(elemName.equals("OperationName1")) {
String OperationName1Text=new String(ch);
}
}
public void endElement(String uri, String localName, String qName)
throws SAXException {
}
Please take look at the code below, Here I have fetched a child node "description"
URL url;
try {
url = new URL(urls);
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
if ((conn.getResponseCode() == HttpURLConnection.HTTP_OK)) {
DocumentBuilderFactory dbf = DocumentBuilderFactory
.newInstance();
DocumentBuilder db = dbf.newDocumentBuilder();
Document doc;
doc = db.parse(url.openStream());
doc.getDocumentElement().normalize();
NodeList itemLst = doc.getElementsByTagName("item");
nl = doc.getElementsByTagName(KEY_HEAD);
Description = new String[itemLst.getLength()];// ........
for (int i = 0; i < itemLst.getLength(); i++) {
Node item = itemLst.item(i);
if (item.getNodeType() == Node.ELEMENT_NODE) {
Element ielem = (Element) item;
NodeList description = ielem
.getElementsByTagName("description");
Desc[i] = description.item(0).getChildNodes().item(0)
.getNodeValue();
}
}
}
} catch (MalformedURLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (DOMException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (ParserConfigurationException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (SAXException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}

Java -Android. Parser problem

I am making a very simple app with an RSS reader. The reader works great, but it's only giving me the title, and i want the description too.
I'am very new to android, and I have tried a lot of things, but I can't get it to work.
I've found a lot of parsers but they are to complicated for me to understand, so I was hoping to find a simple solution, since it's only title and description i want.
Can anyone help me?
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import android.app.Activity;
import android.os.Bundle;
import android.widget.TextView;
public class NyhedActivity extends Activity {
String streamTitle = "";
#Override
protected void onCreate(Bundle savedInstanceState) {
// TODO Auto-generated method stub
super.onCreate(savedInstanceState);
setContentView(R.layout.nyheder);
TextView result = (TextView)findViewById(R.id.result);
try {
URL rssUrl = new URL("http://tv2sport.dk/rss/*/*/*/248/*/*");
SAXParserFactory mySAXParserFactory = SAXParserFactory.newInstance();
SAXParser mySAXParser = mySAXParserFactory.newSAXParser();
XMLReader myXMLReader = mySAXParser.getXMLReader();
RSSHandler myRSSHandler = new RSSHandler();
myXMLReader.setContentHandler(myRSSHandler);
InputSource myInputSource = new InputSource(rssUrl.openStream());
myXMLReader.parse(myInputSource);
result.setText(streamTitle);
} catch (MalformedURLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
result.setText("Cannot connect RSS!");
} catch (ParserConfigurationException e) {
// TODO Auto-generated catch block
e.printStackTrace();
result.setText("Cannot connect RSS!");
} catch (SAXException e) {
// TODO Auto-generated catch block
e.printStackTrace();
result.setText("Cannot connect RSS!");
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
result.setText("Cannot connect RSS!");
}
}
private class RSSHandler extends DefaultHandler
{
final int stateUnknown = 0;
final int stateTitle = 1;
int state = stateUnknown;
int numberOfTitle = 0;
String strTitle = "";
String strElement = "";
#Override
public void startDocument() throws SAXException {
// TODO Auto-generated method stub
strTitle = "Nyheder fra ";
}
#Override
public void endDocument() throws SAXException {
// TODO Auto-generated method stub
strTitle += "";
streamTitle = "" + strTitle;
}
#Override
public void startElement(String uri, String localName, String qName,
Attributes attributes) throws SAXException {
// TODO Auto-generated method stub
if (localName.equalsIgnoreCase("title"))
{
state = stateTitle;
strElement = "";
numberOfTitle++;
}
else
{
state = stateUnknown;
}
}
#Override
public void endElement(String uri, String localName, String qName)
throws SAXException {
// TODO Auto-generated method stub
if (localName.equalsIgnoreCase("title"))
{
strTitle += strElement + "\n"+"\n";
}
state = stateUnknown;
}
#Override
public void characters(char[] ch, int start, int length)
throws SAXException {
// TODO Auto-generated method stub
String strCharacters = new String(ch, start, length);
if (state == stateTitle)
{
strElement += strCharacters;
}
}
}
}
I've never really used SAX, when it comes to parsing XML in Java. I allways use JDOM. It's simple and really easy to use.
To read a an XML-file with JDOM, you create a document and fill it using an InputStream and a SAXBuilder:
SAXBuilder builder = new SAXBuilder();
Document document = builder.builder( myInputStream );
In your posted case: myInputStream = url.openStream();
Then you need to fetch the root of the XML-document:
Element root = document.getRootElement();
Now it's very simple. Since I don't know the structure of the XML you're getting I'll just assume that it looks something like:
<rssfeed>
<news>
<title> Title </title>
<description> Description </description>
</news>
<news>
<title> ... </title>
<description> ... </description>
</news>
<news>
<title> ... </title>
<description> ... </description>
</news>
<rssfeed>
You can then list all elements of like this:
List<Element> news = root.getChildren( "news" );
Then you run through the list in a for-each loop, getting the title and description (Having a data-class to hold these information would be a help e.g. a News-class):
ArrayList<News> newsList = new ArrayList<News>();
for( Element child : news ) {
News news = new News();
news.setTitle( child.getChildText( "title" );
news.setDescription( child.getChildText( "description" );
newsList.add( news );
}
Now you have a list of news that you can play around with.
Kano,
You can simplify your life and get top-notch performance by utilizing SJXP to write this RSS feed parser with (disclaimer: I am the author).
SJXP is a very-very thin abstraction layer that sits on top of the XML Pull Parsing API (Android provides its own so you only have the sjxp.JAR dependency, XPP3 for every other platform) and allows you to use XPath-like parsing rules for matching simple rules against certain places of a document and then telling the parsing what information you want from those locations.
I wrote an example Eclipse project for you that parses that TV2 Sports feed for you in 6 minutes (I'll link it at the bottom).
The main method looks like this so you get an idea of the flow:
public static void main(String[] args) throws IllegalArgumentException,
XMLParserException, IOException {
// Location we want to parse.
URL feedURL = new URL("http://tv2sport.dk/rss/*/*/*/248/*/*");
// List we will hold all parsed stories in.
List<Item> itemList = new ArrayList<Item>();
// Get all the rules we will use to parse this file
IRule[] rules = createRules();
// Create the parser and populate it with the rules.
XMLParser<List<Item>> parser = new XMLParser<List<Item>>(rules);
// Parse the RSS feed.
parser.parse(feedURL.openStream(), itemList);
// Print the results.
System.out.println("Parsed " + itemList.size() + " RSS items.");
for (Item i : itemList)
System.out.println("\t" + i);
}
You see the flow starts with creating our List to hold our Items in as we parse them from the doc. Then we get a set of IRule instances to give the parser, then create the parser and give it the rules to use while working.
We then invoke the parse method on the contents of the feed and pass it what is called a "user object", more specifically, just an instance of anything that we would like it to pass-through to the rules when they execute.
In this case, we want access to our List so we can add items to it, so we just pass that in and the parser passes it right through to our IRule logic when it executes so we can use it.
The Item class utilizes is just a simple POJO to hold the data and make printing look nice:
public class Item {
public String title;
public String description;
#Override
public String toString() {
return "Item [title='" + title + "', description='" + description + "']";
}
}
All the interesting stuff happens in your IRule where you define what kind of element you are targeting (character data, attribute data or just tag open/close events) and then override the appropriate method from the IRule interface to provide a handler that does something.
For example, here is the handler that parses the titles:
IRule<List<Item>> itemDescRule = new DefaultRule<List<Item>>(Type.CHARACTER, "/rss/channel/item/description") {
#Override
public void handleParsedCharacters(XMLParser<List<Item>> parser, String text, List<Item> userObject) {
Item item = userObject.get(userObject.size() - 1);
item.description = text;
}
};
You see that you get given the parser instance itself (so you can trigger the 'stop' method if you want to end parsing early), you get the text that was the character data and you get that 'user object' which happened to be our list passed through to you.
We grab the item we are populating out of the list, give it the description and that's it. 2 lines of code.
There is another IRule that adds a new Item to the list every time an open-tag is encountered, that is what allows our subsequent rules like this one to just pop the end element off the list and populate it.
When you run the project, the output looks like this:
Parsed 50 RSS items.
Item [title='Barcas bøddel beæret over Barca-føler', description='Tirsdag snød Thiago Silva Barcelona for tre point, da han headede AC Milans udligning i kassen i Champions League-kampens overtid.']
Item [title='Guardiola: Pato hurtigere end Usain Bolt', description='FC Barcelona-træner, Josep Guardiola, er dybt imponeret af Milan-målscoreren Alexandre Patos hurtighed.']
Item [title='Milan-profil: Vi kan nå semifinalen', description='Clarence Seedorf mener, at AC Milan kan nå semifinalerne i Champions League efter 2-2 i Barcelona.']
<SNIP...>
You can download the entire Eclipse project I created for you here.
Hope that helps.
I hope I can help you:
#Override
public void endElement(String uri, String localName, String qName)
throws SAXException {
// TODO Auto-generated method stub
if (localName.equalsIgnoreCase("title"))
{
strTitle += strElement + "\n"+"\n";
}
else if (localName.equalsIgnoreCase("lead"))
{
lead += strElement + "\n"+"\n";
}
}

JAXB: How to ignore namespace during unmarshalling XML document?

My schema specifies a namespace, but the documents don't. What's the simplest way to ignore namespace during JAXB unmarshalling (XML -> object)?
In other words, I have
<foo><bar></bar></foo>
instead of,
<foo xmlns="http://tempuri.org/"><bar></bar></foo>
Here is an extension/edit of VonCs solution just in case someone doesn´t want to go through the hassle of implementing their own filter to do this. It also shows how to output a JAXB element without the namespace present. This is all accomplished using a SAX Filter.
Filter implementation:
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.XMLFilterImpl;
public class NamespaceFilter extends XMLFilterImpl {
private String usedNamespaceUri;
private boolean addNamespace;
//State variable
private boolean addedNamespace = false;
public NamespaceFilter(String namespaceUri,
boolean addNamespace) {
super();
if (addNamespace)
this.usedNamespaceUri = namespaceUri;
else
this.usedNamespaceUri = "";
this.addNamespace = addNamespace;
}
#Override
public void startDocument() throws SAXException {
super.startDocument();
if (addNamespace) {
startControlledPrefixMapping();
}
}
#Override
public void startElement(String arg0, String arg1, String arg2,
Attributes arg3) throws SAXException {
super.startElement(this.usedNamespaceUri, arg1, arg2, arg3);
}
#Override
public void endElement(String arg0, String arg1, String arg2)
throws SAXException {
super.endElement(this.usedNamespaceUri, arg1, arg2);
}
#Override
public void startPrefixMapping(String prefix, String url)
throws SAXException {
if (addNamespace) {
this.startControlledPrefixMapping();
} else {
//Remove the namespace, i.e. don´t call startPrefixMapping for parent!
}
}
private void startControlledPrefixMapping() throws SAXException {
if (this.addNamespace && !this.addedNamespace) {
//We should add namespace since it is set and has not yet been done.
super.startPrefixMapping("", this.usedNamespaceUri);
//Make sure we dont do it twice
this.addedNamespace = true;
}
}
}
This filter is designed to both be able to add the namespace if it is not present:
new NamespaceFilter("http://www.example.com/namespaceurl", true);
and to remove any present namespace:
new NamespaceFilter(null, false);
The filter can be used during parsing as follows:
//Prepare JAXB objects
JAXBContext jc = JAXBContext.newInstance("jaxb.package");
Unmarshaller u = jc.createUnmarshaller();
//Create an XMLReader to use with our filter
XMLReader reader = XMLReaderFactory.createXMLReader();
//Create the filter (to add namespace) and set the xmlReader as its parent.
NamespaceFilter inFilter = new NamespaceFilter("http://www.example.com/namespaceurl", true);
inFilter.setParent(reader);
//Prepare the input, in this case a java.io.File (output)
InputSource is = new InputSource(new FileInputStream(output));
//Create a SAXSource specifying the filter
SAXSource source = new SAXSource(inFilter, is);
//Do unmarshalling
Object myJaxbObject = u.unmarshal(source);
To use this filter to output XML from a JAXB object, have a look at the code below.
//Prepare JAXB objects
JAXBContext jc = JAXBContext.newInstance("jaxb.package");
Marshaller m = jc.createMarshaller();
//Define an output file
File output = new File("test.xml");
//Create a filter that will remove the xmlns attribute
NamespaceFilter outFilter = new NamespaceFilter(null, false);
//Do some formatting, this is obviously optional and may effect performance
OutputFormat format = new OutputFormat();
format.setIndent(true);
format.setNewlines(true);
//Create a new org.dom4j.io.XMLWriter that will serve as the
//ContentHandler for our filter.
XMLWriter writer = new XMLWriter(new FileOutputStream(output), format);
//Attach the writer to the filter
outFilter.setContentHandler(writer);
//Tell JAXB to marshall to the filter which in turn will call the writer
m.marshal(myJaxbObject, outFilter);
This will hopefully help someone since I spent a day doing this and almost gave up twice ;)
I have encoding problems with XMLFilter solution, so I made XMLStreamReader to ignore namespaces:
class XMLReaderWithoutNamespace extends StreamReaderDelegate {
public XMLReaderWithoutNamespace(XMLStreamReader reader) {
super(reader);
}
#Override
public String getAttributeNamespace(int arg0) {
return "";
}
#Override
public String getNamespaceURI() {
return "";
}
}
InputStream is = new FileInputStream(name);
XMLStreamReader xsr = XMLInputFactory.newFactory().createXMLStreamReader(is);
XMLReaderWithoutNamespace xr = new XMLReaderWithoutNamespace(xsr);
Unmarshaller um = jc.createUnmarshaller();
Object res = um.unmarshal(xr);
I believe you must add the namespace to your xml document, with, for example, the use of a SAX filter.
That means:
Define a ContentHandler interface with a new class which will intercept SAX events before JAXB can get them.
Define a XMLReader which will set the content handler
then link the two together:
public static Object unmarshallWithFilter(Unmarshaller unmarshaller,
java.io.File source) throws FileNotFoundException, JAXBException
{
FileReader fr = null;
try {
fr = new FileReader(source);
XMLReader reader = new NamespaceFilterXMLReader();
InputSource is = new InputSource(fr);
SAXSource ss = new SAXSource(reader, is);
return unmarshaller.unmarshal(ss);
} catch (SAXException e) {
//not technically a jaxb exception, but close enough
throw new JAXBException(e);
} catch (ParserConfigurationException e) {
//not technically a jaxb exception, but close enough
throw new JAXBException(e);
} finally {
FileUtil.close(fr); //replace with this some safe close method you have
}
}
In my situation, I have many namespaces and after some debug I find another solution just changing the NamespaceFitler class. For my situation (just unmarshall) this work fine.
import javax.xml.namespace.QName;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.XMLFilterImpl;
import com.sun.xml.bind.v2.runtime.unmarshaller.SAXConnector;
public class NamespaceFilter extends XMLFilterImpl {
private SAXConnector saxConnector;
#Override
public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException {
if(saxConnector != null) {
Collection<QName> expected = saxConnector.getContext().getCurrentExpectedElements();
for(QName expectedQname : expected) {
if(localName.equals(expectedQname.getLocalPart())) {
super.startElement(expectedQname.getNamespaceURI(), localName, qName, atts);
return;
}
}
}
super.startElement(uri, localName, qName, atts);
}
#Override
public void setContentHandler(ContentHandler handler) {
super.setContentHandler(handler);
if(handler instanceof SAXConnector) {
saxConnector = (SAXConnector) handler;
}
}
}
Another way to add a default namespace to an XML Document before feeding it to JAXB is to use JDom:
Parse XML to a Document
Iterate through and set namespace on all Elements
Unmarshall using a JDOMSource
Like this:
public class XMLObjectFactory {
private static Namespace DEFAULT_NS = Namespace.getNamespace("http://tempuri.org/");
public static Object createObject(InputStream in) {
try {
SAXBuilder sb = new SAXBuilder(false);
Document doc = sb.build(in);
setNamespace(doc.getRootElement(), DEFAULT_NS, true);
Source src = new JDOMSource(doc);
JAXBContext context = JAXBContext.newInstance("org.tempuri");
Unmarshaller unmarshaller = context.createUnmarshaller();
JAXBElement root = unmarshaller.unmarshal(src);
return root.getValue();
} catch (Exception e) {
throw new RuntimeException("Failed to create Object", e);
}
}
private static void setNamespace(Element elem, Namespace ns, boolean recurse) {
elem.setNamespace(ns);
if (recurse) {
for (Object o : elem.getChildren()) {
setNamespace((Element) o, ns, recurse);
}
}
}
This is just a modification of lunicon's answer (https://stackoverflow.com/a/24387115/3519572) if you want to replace one namespace for another during parsing. And if you want to see what exactly is going on, just uncomment the output lines and set a breakpoint.
public class XMLReaderWithNamespaceCorrection extends StreamReaderDelegate {
private final String wrongNamespace;
private final String correctNamespace;
public XMLReaderWithNamespaceCorrection(XMLStreamReader reader, String wrongNamespace, String correctNamespace) {
super(reader);
this.wrongNamespace = wrongNamespace;
this.correctNamespace = correctNamespace;
}
#Override
public String getAttributeNamespace(int arg0) {
// System.out.println("--------------------------\n");
// System.out.println("arg0: " + arg0);
// System.out.println("getAttributeName: " + getAttributeName(arg0));
// System.out.println("super.getAttributeNamespace: " + super.getAttributeNamespace(arg0));
// System.out.println("getAttributeLocalName: " + getAttributeLocalName(arg0));
// System.out.println("getAttributeType: " + getAttributeType(arg0));
// System.out.println("getAttributeValue: " + getAttributeValue(arg0));
// System.out.println("getAttributeValue(correctNamespace, LN):"
// + getAttributeValue(correctNamespace, getAttributeLocalName(arg0)));
// System.out.println("getAttributeValue(wrongNamespace, LN):"
// + getAttributeValue(wrongNamespace, getAttributeLocalName(arg0)));
String origNamespace = super.getAttributeNamespace(arg0);
boolean replace = (((wrongNamespace == null) && (origNamespace == null))
|| ((wrongNamespace != null) && wrongNamespace.equals(origNamespace)));
return replace ? correctNamespace : origNamespace;
}
#Override
public String getNamespaceURI() {
// System.out.println("getNamespaceCount(): " + getNamespaceCount());
// for (int i = 0; i < getNamespaceCount(); i++) {
// System.out.println(i + ": " + getNamespacePrefix(i));
// }
//
// System.out.println("super.getNamespaceURI: " + super.getNamespaceURI());
String origNamespace = super.getNamespaceURI();
boolean replace = (((wrongNamespace == null) && (origNamespace == null))
|| ((wrongNamespace != null) && wrongNamespace.equals(origNamespace)));
return replace ? correctNamespace : origNamespace;
}
}
usage:
InputStream is = new FileInputStream(xmlFile);
XMLStreamReader xsr = XMLInputFactory.newFactory().createXMLStreamReader(is);
XMLReaderWithNamespaceCorrection xr =
new XMLReaderWithNamespaceCorrection(xsr, "http://wrong.namespace.uri", "http://correct.namespace.uri");
rootJaxbElem = (JAXBElement<SqgRootType>) um.unmarshal(xr);
handleSchemaError(rootJaxbElem, pmRes);

Categories

Resources