java parser sax doesn't get value & on my field - java

I have more elements on my xml file contains & and others characters html >.
I tested my code but it obtain the first part of my field for example:
SERIES & FILMS
It give only the word SERIES.
And other example:
C>SUDO
It give only C.
My code, my field name is "summary":
try {
SAXParserFactory factory = SAXParserFactory.newInstance();
SAXParser saxParser = factory.newSAXParser();
chars = new StringBuffer();
DefaultHandler handler = new DefaultHandler() {
public void startElement(String uri, String localName,
String qName, Attributes attributes)
throws SAXException {
System.out.println("Start Element :" + qName);
if (qName.equals(SUMMARY2)) {
bfSummary = true;
}
if (qName.equals(SERVICE_DATA)) {
idServiceData = attributes.getValue("id");
bfServicedata = true;
}
}
public void endElement(String uri, String localName,
String qName) throws SAXException {
System.out.println("End Element :" + qName + ""
+ mListBaseLineByEpgId.size());
// maliste.put(listeId, summary);
malisteParThem.add(summary);
if (mListBaseLineByEpgId.get(idServiceData) != null) {
List<String> listeModif = mListBaseLineByEpgId
.get(idServiceData);
for (String chaine : malisteParThem) {
listeModif.add(chaine);
}
mListBaseLineByEpgId.replace(idServiceData, listeModif);
} else {
mListBaseLineByEpgId.put(idServiceData, malisteParThem);
}
malisteParThem = new ArrayList<String>();
}
public void characters(char ch[], int start, int length)
throws SAXException {
if (bfSummary) {
summary = new String(ch, start, length);
summary = summary.replace(BEFORETILESUMMARY, "");
// chars.append(summary);
// summary=chars.toString();
summary = removeHtmlFrom(summary);
System.out.println("Summary : " + summary);
bfSummary = false;
}
if (bfServicedata) {
System.out.println("listeId : " + idServiceData);
bfServicedata = false;
}
}
};
File file = new File(cheminFichier);
InputStream inputStream = new FileInputStream(file);
Reader reader = new InputStreamReader(inputStream);
InputSource is = new InputSource(reader);
//is.setEncoding("ISO-8859-1");
saxParser.parse(is, handler);
} catch (Exception e) {
e.printStackTrace();
}
Thank you.

Perhaps this problem is related to the unexpected behavior of SAX parser: it is allowed (per spec) to split the text part of an element and call characters() method multiple times for the same element.
What you need to do is have a StringBuffer or StringBuilder instance variable. You initialize it in startElement(), append to it on characters() and get the full text on endElement()
see this question for more info JAVA SAX parser split calls to characters()

Related

Parsing a big xml file Java

I have big xml files (~1GB) with this structure:
<?xml version="1.0" encoding="UTF-8"?>
<GenoExchange xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://www.ncbi.nlm.nih.gov/SNP/geno" xsi:schemaLocation="http://www.ncbi.nlm.nih.gov/SNP/geno ftp://ftp.ncbi.nlm.nih.gov/snp/specs/genoex_1_5.xsd" dbSNPBuildNo="146" reportId="MT" reportType="chromosome">
<Population popId="638" handle="TSC-CSHL" locPopId="TSC_42_AA">
<popClass self="NORTH AMERICA"/>
</Population>
<SnpInfo rsId="1041870" observed="C/T">
<SnpLoc genomicAssembly="107:GRCh38.p2" geneId="4512" geneSymbol="COX1" chrom="MT" start="6150" locType="2" rsOrientToChrom="fwd" contigAllele="T" contig="NC_012920:1"/>
<SsInfo ssId="1508548" locSnpId="TSC0349089" ssOrientToRs="fwd">
<ByPop popId="1303" sampleSize="184">
<AlleleFreq allele="T" freq="1"/>
<AlleleFreq allele="C" freq="0"/>
</ByPop>
</SsInfo>
</SnpInfo>
<SnpInfo rsId="1029293" observed="C/T">
<SnpLoc genomicAssembly="107:GRCh38.p2" geneId="4512" geneSymbol="COX1" chrom="MT" start="6307" locType="2" rsOrientToChrom="fwd" contigAllele="C" contig="NC_012920:1"/>
<SsInfo ssId="1494519" locSnpId="TSC0254145" ssOrientToRs="fwd">
<ByPop popId="639" sampleSize="82">
<AlleleFreq allele="T" freq="0"/>
<AlleleFreq allele="C" freq="1"/>
</ByPop>
<ByPop popId="1303" sampleSize="184">
<AlleleFreq allele="T" freq="0"/>
<AlleleFreq allele="C" freq="1"/>
</ByPop>
</SsInfo>
</SnpInfo>
I want to find a specific rsID, for example rsID="1029293" and extract all the information inside that node. I don't want to run all the file. I only want to find that ID, extract that information and end the iteration.
From what I read it's better if I use SAX or Stax parsers. I'm using SAX, this is my code:
class UserHandler extends DefaultHandler {
String rsID = null;
String i = "1029293";
#Override
public void startElement(String uri,
String localName, String qName, Attributes attributes) throws SAXException {
if (qName.equalsIgnoreCase("SnpInfo")) {
rsID = attributes.getValue("rsId");
//System.out.println("value: " + rsID);
}
if((i).equals(rsID) &&
qName.equalsIgnoreCase("SnpInfo")){
System.out.println("Start Element: " + qName + " " + rsID);
}
if ((i).equals(rsID) && qName.equalsIgnoreCase("SsInfo")) {
String a = attributes.getValue("ssId");
System.out.println("SSID: " + a);
}
if ((i).equals(rsID) && qName.equalsIgnoreCase("ByPop")) {
String p = attributes.getValue("popId");
System.out.println("POPID: " + p);
}
if ((i).equals(rsID) && qName.equalsIgnoreCase("AlleleFreq")) {
String p = attributes.getValue("allele");
String f = attributes.getValue("freq");
System.out.println("ALLELE: " + p + " FREQ: " + f);
}
if ((i).equals(rsID) && qName.equalsIgnoreCase("GTypeFreq")) {
String p = attributes.getValue("gtype");
String f = attributes.getValue("freq");
System.out.println("GTYPE: " + p + " FREQ: " + f);
}
}
#Override
public void endElement(String uri,
String localName, String qName) throws SAXException {
if (qName.equalsIgnoreCase("SnpInfo")) {
if((i).equals(rsID)
&& qName.equalsIgnoreCase("SnpInfo"))
System.out.println("End Element: " + qName);
}
}
}
public class XMLParser {
public static void main(String argv[]) {
try {
InputStream fileStream = new FileInputStream("/home/xml/gt_chr10.xml.gz");
InputStream gzipStream = new GZIPInputStream(fileStream);
SAXParserFactory factory = SAXParserFactory.newInstance();
SAXParser saxParser = factory.newSAXParser();
UserHandler userhandler = new UserHandler();
saxParser.parse(gzipStream, userhandler);
} catch (Exception e) {
e.printStackTrace();
}
}
My problem is that my code searches the whole file for the ID and that takes more than 2 minutes each time. I can't have a code that takes so long.
Is there a better approach for this?
Using STAX gives you more control when parsing XML, since you actively pull elements from the stream. This way you can pull the next event, handle it and once you found your data, simply terminate the loop (using a flag or even a return statement if you must)
InputStream in = ...
XMLInputFactory factory = XMLInputFactory.newInstance();
XMLEventReader eventReader = factory.createXMLEventReader(in);
boolean found = false;
while (!found && eventReader.hasNext()) {
XMLEvent event = eventReader.nextEvent();
switch (event.getEventType()) {
case XMLStreamConstants.START_ELEMENT:
// your logic here
// once you found your element, you can terminate the loop
found = true;
break;
case XMLStreamConstants.END_ELEMENT:
// your logic here
break;
}
}
(omitted exception and resource handling for brevity)
On a side note, you will gain some performance by combining your if ((i).equals(rsID) && ... into a single one, with detail checks in nested ifs
if ((i).equals(rsID)) {
if(qName.equalsIgnoreCase("GTypeFreq")) {
...
}
}
You can throw an exception in your end element handler, to indicate to the parser that it aborts parsing (http://www.ibm.com/developerworks/library/x-tipsaxstop/):
#Override
public void endElement(String uri,
String localName, String qName) throws SAXException {
if (qName.equalsIgnoreCase("SnpInfo")) {
if((i).equals(rsID)
&& qName.equalsIgnoreCase("SnpInfo"))
System.out.println("End Element: " + qName);
throw SAXException("Element found.");
}
}
The only way to avoid parsing the whole file every time you run this is to put the data in an XML database. Parsing a 1Gb file is going to take about a minute, plus or minus depending on the speed of your machine and what processing you do on each node.
A streamed XSLT 3.0 solution is simply:
<xsl:transform version="3.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xpath-default-namespace="http://www.ncbi.nlm.nih.gov/SNP/geno">
<xsl:template name="xsl:initial-template">
<xsl:stream href="input.xml">
<xsl:copy-of select="/GenoExchange/SnpInfo[#rsId='1041870'][1]"/>
</xsl:stream>
</xsl:template>
</xsl:transform>
No need to write all that pesky SAX or StAX code.
I put the "[1]" predicate in to allow the processor to abandon the search when it has found the first hit.
The best approach is to use vtd-xml and xpath... 1GB xml file takes about 1.5GB heap space and < 10 sec in a 3~4 year old intel processor.see code example below.. One more thing, if you want to eliminate parsing entirely, you can create a vtd+XML file format so any subsequent query can directly access the vtd index portion, which could easily triple or quadruple your app performance...
import com.ximpleware.*;
public class simpleXpathSearch{
public static void main(String s[]) throws VTDException,java.io.UnsupportedEncodingException,java.io.IOException{
VTDGen vg = new VTDGen();
vg.setLCLevel(5);
if (!vg.parseFile("input.xml", false))
return;
VTDNav vn = vg.getNav();
AutoPilot ap = new AutoPilot(vn);
ap.selectXPath("/*/*[#rsID='1029293']");
int i=0;
while((i=ap.evalXPath())!=-1){
// your code logic here
}
//Main class
public static void main(String[] args) {
SAXReader.read();
}
//SAXReader
public static void read(){
try {
XMLReader processor = XMLReaderFactory.createXMLReader();
processor.setContentHandler(new SAXController());
processor.parse(new InputSource("MyXML.xml"));
} catch (SAXException | IOException e) {
System.err.println(e.getMessage());
}
}
//SAXController
// The SAXController extends DefaultHandler
private int tab = 0;
private void tabulation() {
for (int i=0; i<tab; i++)
System.out.print(" ");
}
#Override
public void startDocument() {
tabulation();
System.out.println("Starting XML Document");
tab++;
}
#Override
public void endDocument() {
tab--;
tabulation();
System.out.println("Ending XML Document");
}
#Override
public void startElement(String uri, String localName, String qName, Attributes attributes)
throws SAXException {
tabulation();
System.out.print(localName);
if (attributes.getLength()>0) {
for (int i=0; i<attributes.getLength(); i++) {
System.out.print(attributes.getLocalName(i)+": "+attributes.getValue(i));
}
}
System.out.println();
tab++;
}
#Override
public void endElement(String uri, String localName, String qName)
throws SAXException {
tab--;
tabulation();
System.out.println(localName);
}
#Override
public void characters(char[] ch, int start, int length)
throws SAXException {
String content= new String(ch, start, length);
content= content.replaceAll("[\t\n]", "").trim();
if (!content.equals("")) {
tabulation();
System.out.println(content);
}
}

RSS Parser returns 403

I'm new to Java and we were given an assignment about XML Parsing. We have done DOM and now we are on SAX. That's why I'm using SAX Parser for parsing an rss feed. Its already working on files but when I try to parse an online rss feed, it returns an Error 403. I haven't tried parsing the same site on DOM because my laptop is so slow it takes me 5 minutes just to open a file.
Thanks for the help.
public class NewsHandler extends DefaultHandler {
private String url = "http://tomasinoweb.org/feed/rss";
private boolean inDescription = false;
private String[] descs = new String[11];
int i = 0;
public void processFeed() {
try {
SAXParserFactory factory =
SAXParserFactory.newInstance();
SAXParser parser = factory.newSAXParser();
XMLReader reader = parser.getXMLReader();
reader.setContentHandler(this);
InputStream inputStream = new URL(url).openStream();
reader.parse(new InputSource(inputStream));
} catch (Exception e) { e.printStackTrace(); }
}
public void startElement(String uri, String localName, String qName,
Attributes attributes) throws SAXException {
if(qName.equals("description")) inDescription = true;
}
public void characters(char ch[], int start, int length) {
String chars = new String(ch).substring(start, start + length);
if(inDescription) descs[i] = chars;
}
public void endElement(String uri, String localName, String qName) throws SAXException {
if(qName.equals("description")) {
inDescription = false;
i++;
}
}
public String getDesc(int index) { return descs[index]; }
public static void main(String[] args) {
NewsHandler nh = new NewsHandler();
nh.processFeed();
for(int i=0; i<10; i++) {
System.out.println(nh.getDesc(i));
}
}
}
Solution:
Instead of using String url = "url", I used URL url = new URL("url") and URLConnection con = url.openConnection() and then con.addRequestProperty("user-agent", user-agent string);

Android: get HTML text from XML

I have implemented in my app reading a XML. It works fine. But I want to format the text. I've tried in the XML:
<monumento>
<horarios><b>L-V:</b> 10 a 20<br/>S-D: 11 a 15</horarios>
<tarifas>4000</tarifas>
</monumento>
But the only thing I get if I put HTML character is that the text does not display in my app.
I'll have many xml so that I will not always know where to place <b>, <br/>...
How I can do?
Main
StringBuilder builder = new StringBuilder();
for (HorariosTarifasObj post : helper.posts) {
builder.append(post.getHorarios());
}
horario2.setText(builder.toString());
builder = new StringBuilder();
for (HorariosTarifasObj post : helper.posts) {
builder.append(post.getTarifas());
}
tarifa2.setText(builder.toString());
XMLReader
public void get() {
try {
SAXParserFactory factory = SAXParserFactory.newInstance();
SAXParser parser = factory.newSAXParser();
XMLReader reader = parser.getXMLReader();
reader.setContentHandler(this);
InputStream inputStream = new URL(URL + monumento + ".xml").openStream();
reader.parse(new InputSource(inputStream));
} catch (Exception e) {
}
}
#Override
public void startElement(String uri, String localName, String qName,
Attributes attributes) throws SAXException {
currTag = true;
currTagVal = "";
if (localName.equals("monumento")) {
post = new HorariosTarifasObj();
}
}
#Override
public void endElement(String uri, String localName, String qName)
throws SAXException {
currTag = false;
if(localName.equalsIgnoreCase("horarios")) {
post.setHorarios(currTagVal);
} else if(localName.equalsIgnoreCase("tarifas")) {
post.setTarifas(currTagVal);
} else if (localName.equalsIgnoreCase("monumento")) {
posts.add(post);
}
}
#Override
public void characters(char[] ch, int start, int length)
throws SAXException {
if (currTag) {
currTagVal = currTagVal + new String(ch, start, length);
currTag = false;
}
}
Try CDATA:
<monumento>
<horarios><![CDATA[<b>L-V:</b> 10 a 20<br/>S-D: 11 a 15]]></horarios>
<tarifas>4000</tarifas>
</monumento>
In XML, < and > characters are reserved for XML tags. You will need to protect them by replacing them with special encoding characters.
You can use > for > and < for <
(edit) Eomm answer is right, CDATA does this as well, and more simple
Also, to use HTML coding in TextView, you will need to use Html.fromHtml() method
For instance :
tarifa2.setText(Html.fromHtml(builder.toString()));

problem with using SAX XML Parser

I am using the SAX Parser for XML Parsing. The problem is for the following XML code:
<description>
Designer:Paul Smith Color:Plain Black Fabric/Composition:100% cotton Weave/Pattern:pinpoint Sleeve:Long-sleeved Fit:Classic Front style:Placket front Back style:Side pleat back Collar:Classic/straight collar Button:Pearlescent front button Pocket:rounded chest pocket Hem:Rounded hem
</description>
I get this:
Designer:Paul Smith
Color:Plain Black
The other parts are missing. The same thing happens for a few other lines. Can anyone kindly tell me whats the problem with my approach ?
My code is given below:
Parser code:
try {
/** Handling XML */
SAXParserFactory spf = SAXParserFactory.newInstance();
SAXParser sp = spf.newSAXParser();
XMLReader xr = sp.getXMLReader();
/** Send URL to parse XML Tags */
URL sourceUrl = new URL(
"http://50.19.125.224/Demo/VeryGoodSex_and_the_City_S6E6.xml");
/** Create handler to handle XML Tags ( extends DefaultHandler ) */
MyXMLHandler myXMLHandler = new MyXMLHandler();
xr.setContentHandler((ContentHandler) myXMLHandler);
xr.parse(new InputSource(sourceUrl.openStream()));
} catch (Exception e) {
System.out.println("XML Pasing Excpetion = " + e);
}
Object to hold XML parsed Info:
public class ParserObject {
String name=null;
String description=null;
String bitly=null; //single
String productLink=null;//single
String productPrice=null;//single
Vector<String> price=new Vector<String>();
}
Handler class:
public void endElement(String uri, String localName, String qName)
throws SAXException {
currentElement = false;
if (qName.equalsIgnoreCase("title"))
{
xmlDataObject[index].name=currentValue;
}
else if (qName.equalsIgnoreCase("artist"))
{
xmlDataObject[index].artist=currentValue;
}
}
public void startElement(String uri, String localName, String qName,
Attributes attributes) throws SAXException {
currentElement = true;
if (qName.equalsIgnoreCase("allinfo"))
{
System.out.println("started");
}
else if (qName.equalsIgnoreCase("tags"))
{
insideTag=1;
}
}
public void characters(char[] ch, int start, int length)
throws SAXException {
if (currentElement) {
currentValue = new String(ch, start, length);
currentElement = false;
}
}
You have to concatenate characters which the parser gives to you until it calls endElement.
Try removing currentElement = false; from characters handler, and
currentValue = currentValue + new String(ch, start, length);
Initialize currentValue with an empty string or handle null value in the expression above.
I think characters read some, but not all characters at the same time.
Thus, you only get the first "chunk".
Try printing each character chunk on a separate line, as debugging (before the if).

JAXB: How to ignore namespace during unmarshalling XML document?

My schema specifies a namespace, but the documents don't. What's the simplest way to ignore namespace during JAXB unmarshalling (XML -> object)?
In other words, I have
<foo><bar></bar></foo>
instead of,
<foo xmlns="http://tempuri.org/"><bar></bar></foo>
Here is an extension/edit of VonCs solution just in case someone doesn´t want to go through the hassle of implementing their own filter to do this. It also shows how to output a JAXB element without the namespace present. This is all accomplished using a SAX Filter.
Filter implementation:
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.XMLFilterImpl;
public class NamespaceFilter extends XMLFilterImpl {
private String usedNamespaceUri;
private boolean addNamespace;
//State variable
private boolean addedNamespace = false;
public NamespaceFilter(String namespaceUri,
boolean addNamespace) {
super();
if (addNamespace)
this.usedNamespaceUri = namespaceUri;
else
this.usedNamespaceUri = "";
this.addNamespace = addNamespace;
}
#Override
public void startDocument() throws SAXException {
super.startDocument();
if (addNamespace) {
startControlledPrefixMapping();
}
}
#Override
public void startElement(String arg0, String arg1, String arg2,
Attributes arg3) throws SAXException {
super.startElement(this.usedNamespaceUri, arg1, arg2, arg3);
}
#Override
public void endElement(String arg0, String arg1, String arg2)
throws SAXException {
super.endElement(this.usedNamespaceUri, arg1, arg2);
}
#Override
public void startPrefixMapping(String prefix, String url)
throws SAXException {
if (addNamespace) {
this.startControlledPrefixMapping();
} else {
//Remove the namespace, i.e. don´t call startPrefixMapping for parent!
}
}
private void startControlledPrefixMapping() throws SAXException {
if (this.addNamespace && !this.addedNamespace) {
//We should add namespace since it is set and has not yet been done.
super.startPrefixMapping("", this.usedNamespaceUri);
//Make sure we dont do it twice
this.addedNamespace = true;
}
}
}
This filter is designed to both be able to add the namespace if it is not present:
new NamespaceFilter("http://www.example.com/namespaceurl", true);
and to remove any present namespace:
new NamespaceFilter(null, false);
The filter can be used during parsing as follows:
//Prepare JAXB objects
JAXBContext jc = JAXBContext.newInstance("jaxb.package");
Unmarshaller u = jc.createUnmarshaller();
//Create an XMLReader to use with our filter
XMLReader reader = XMLReaderFactory.createXMLReader();
//Create the filter (to add namespace) and set the xmlReader as its parent.
NamespaceFilter inFilter = new NamespaceFilter("http://www.example.com/namespaceurl", true);
inFilter.setParent(reader);
//Prepare the input, in this case a java.io.File (output)
InputSource is = new InputSource(new FileInputStream(output));
//Create a SAXSource specifying the filter
SAXSource source = new SAXSource(inFilter, is);
//Do unmarshalling
Object myJaxbObject = u.unmarshal(source);
To use this filter to output XML from a JAXB object, have a look at the code below.
//Prepare JAXB objects
JAXBContext jc = JAXBContext.newInstance("jaxb.package");
Marshaller m = jc.createMarshaller();
//Define an output file
File output = new File("test.xml");
//Create a filter that will remove the xmlns attribute
NamespaceFilter outFilter = new NamespaceFilter(null, false);
//Do some formatting, this is obviously optional and may effect performance
OutputFormat format = new OutputFormat();
format.setIndent(true);
format.setNewlines(true);
//Create a new org.dom4j.io.XMLWriter that will serve as the
//ContentHandler for our filter.
XMLWriter writer = new XMLWriter(new FileOutputStream(output), format);
//Attach the writer to the filter
outFilter.setContentHandler(writer);
//Tell JAXB to marshall to the filter which in turn will call the writer
m.marshal(myJaxbObject, outFilter);
This will hopefully help someone since I spent a day doing this and almost gave up twice ;)
I have encoding problems with XMLFilter solution, so I made XMLStreamReader to ignore namespaces:
class XMLReaderWithoutNamespace extends StreamReaderDelegate {
public XMLReaderWithoutNamespace(XMLStreamReader reader) {
super(reader);
}
#Override
public String getAttributeNamespace(int arg0) {
return "";
}
#Override
public String getNamespaceURI() {
return "";
}
}
InputStream is = new FileInputStream(name);
XMLStreamReader xsr = XMLInputFactory.newFactory().createXMLStreamReader(is);
XMLReaderWithoutNamespace xr = new XMLReaderWithoutNamespace(xsr);
Unmarshaller um = jc.createUnmarshaller();
Object res = um.unmarshal(xr);
I believe you must add the namespace to your xml document, with, for example, the use of a SAX filter.
That means:
Define a ContentHandler interface with a new class which will intercept SAX events before JAXB can get them.
Define a XMLReader which will set the content handler
then link the two together:
public static Object unmarshallWithFilter(Unmarshaller unmarshaller,
java.io.File source) throws FileNotFoundException, JAXBException
{
FileReader fr = null;
try {
fr = new FileReader(source);
XMLReader reader = new NamespaceFilterXMLReader();
InputSource is = new InputSource(fr);
SAXSource ss = new SAXSource(reader, is);
return unmarshaller.unmarshal(ss);
} catch (SAXException e) {
//not technically a jaxb exception, but close enough
throw new JAXBException(e);
} catch (ParserConfigurationException e) {
//not technically a jaxb exception, but close enough
throw new JAXBException(e);
} finally {
FileUtil.close(fr); //replace with this some safe close method you have
}
}
In my situation, I have many namespaces and after some debug I find another solution just changing the NamespaceFitler class. For my situation (just unmarshall) this work fine.
import javax.xml.namespace.QName;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.XMLFilterImpl;
import com.sun.xml.bind.v2.runtime.unmarshaller.SAXConnector;
public class NamespaceFilter extends XMLFilterImpl {
private SAXConnector saxConnector;
#Override
public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException {
if(saxConnector != null) {
Collection<QName> expected = saxConnector.getContext().getCurrentExpectedElements();
for(QName expectedQname : expected) {
if(localName.equals(expectedQname.getLocalPart())) {
super.startElement(expectedQname.getNamespaceURI(), localName, qName, atts);
return;
}
}
}
super.startElement(uri, localName, qName, atts);
}
#Override
public void setContentHandler(ContentHandler handler) {
super.setContentHandler(handler);
if(handler instanceof SAXConnector) {
saxConnector = (SAXConnector) handler;
}
}
}
Another way to add a default namespace to an XML Document before feeding it to JAXB is to use JDom:
Parse XML to a Document
Iterate through and set namespace on all Elements
Unmarshall using a JDOMSource
Like this:
public class XMLObjectFactory {
private static Namespace DEFAULT_NS = Namespace.getNamespace("http://tempuri.org/");
public static Object createObject(InputStream in) {
try {
SAXBuilder sb = new SAXBuilder(false);
Document doc = sb.build(in);
setNamespace(doc.getRootElement(), DEFAULT_NS, true);
Source src = new JDOMSource(doc);
JAXBContext context = JAXBContext.newInstance("org.tempuri");
Unmarshaller unmarshaller = context.createUnmarshaller();
JAXBElement root = unmarshaller.unmarshal(src);
return root.getValue();
} catch (Exception e) {
throw new RuntimeException("Failed to create Object", e);
}
}
private static void setNamespace(Element elem, Namespace ns, boolean recurse) {
elem.setNamespace(ns);
if (recurse) {
for (Object o : elem.getChildren()) {
setNamespace((Element) o, ns, recurse);
}
}
}
This is just a modification of lunicon's answer (https://stackoverflow.com/a/24387115/3519572) if you want to replace one namespace for another during parsing. And if you want to see what exactly is going on, just uncomment the output lines and set a breakpoint.
public class XMLReaderWithNamespaceCorrection extends StreamReaderDelegate {
private final String wrongNamespace;
private final String correctNamespace;
public XMLReaderWithNamespaceCorrection(XMLStreamReader reader, String wrongNamespace, String correctNamespace) {
super(reader);
this.wrongNamespace = wrongNamespace;
this.correctNamespace = correctNamespace;
}
#Override
public String getAttributeNamespace(int arg0) {
// System.out.println("--------------------------\n");
// System.out.println("arg0: " + arg0);
// System.out.println("getAttributeName: " + getAttributeName(arg0));
// System.out.println("super.getAttributeNamespace: " + super.getAttributeNamespace(arg0));
// System.out.println("getAttributeLocalName: " + getAttributeLocalName(arg0));
// System.out.println("getAttributeType: " + getAttributeType(arg0));
// System.out.println("getAttributeValue: " + getAttributeValue(arg0));
// System.out.println("getAttributeValue(correctNamespace, LN):"
// + getAttributeValue(correctNamespace, getAttributeLocalName(arg0)));
// System.out.println("getAttributeValue(wrongNamespace, LN):"
// + getAttributeValue(wrongNamespace, getAttributeLocalName(arg0)));
String origNamespace = super.getAttributeNamespace(arg0);
boolean replace = (((wrongNamespace == null) && (origNamespace == null))
|| ((wrongNamespace != null) && wrongNamespace.equals(origNamespace)));
return replace ? correctNamespace : origNamespace;
}
#Override
public String getNamespaceURI() {
// System.out.println("getNamespaceCount(): " + getNamespaceCount());
// for (int i = 0; i < getNamespaceCount(); i++) {
// System.out.println(i + ": " + getNamespacePrefix(i));
// }
//
// System.out.println("super.getNamespaceURI: " + super.getNamespaceURI());
String origNamespace = super.getNamespaceURI();
boolean replace = (((wrongNamespace == null) && (origNamespace == null))
|| ((wrongNamespace != null) && wrongNamespace.equals(origNamespace)));
return replace ? correctNamespace : origNamespace;
}
}
usage:
InputStream is = new FileInputStream(xmlFile);
XMLStreamReader xsr = XMLInputFactory.newFactory().createXMLStreamReader(is);
XMLReaderWithNamespaceCorrection xr =
new XMLReaderWithNamespaceCorrection(xsr, "http://wrong.namespace.uri", "http://correct.namespace.uri");
rootJaxbElem = (JAXBElement<SqgRootType>) um.unmarshal(xr);
handleSchemaError(rootJaxbElem, pmRes);

Categories

Resources