Transform XML to CSV with Saxon = error: Output character 160 - java

I am try to transform from XML (UTF-8 encoding) to CSV (win-1251 encoding) - I get an error
net.sf.saxon.trans.DynamicError: Output character not available in this encoding (decimal 160)
I understand that in the xml text there is a character with code 160 which is not in win-1251.
Tried to clear XML before transformation process, but it doesn't help
Charset charset = Charset.forName("windows-1251");
CharsetDecoder decoder = charset.newDecoder();
CharsetEncoder encoder = charset.newEncoder();
encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
String result = s;
try {
ByteBuffer bbuf = encoder.encode(CharBuffer.wrap(s));
CharBuffer cbuf = decoder.decode(bbuf);
result = cbuf.toString();
} catch (CharacterCodingException cce) {
log.error("Exception during character encoding/decoding: " + cce.getMessage());
}
Please tell me the best way to solve this problem?
my xsl sample
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE csv-style [
<!ENTITY semicolons ';;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;'>
<!ENTITY commas ',,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,'>
]>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0" >
<xsl:output method="text" indent="no" omit-xml-declaration="yes" encoding="windows-1251"/>
<xsl:param name="delim">semicolon</xsl:param>
<xsl:param name="showHead">yes</xsl:param>
<xsl:variable name="delimStr">
<xsl:choose>
<xsl:when test="$delim = 'comma'">&commas;</xsl:when>
<xsl:otherwise>&semicolons;</xsl:otherwise>
</xsl:choose>
</xsl:variable>
<xsl:template match="blocks">
<xsl:apply-templates select="*"/>
</xsl:template>
<xsl:template match="description|pair|foot|body/table/head">
<!-- don't do anything just skip it-->
</xsl:template>
<xsl:template match="table">
<xsl:apply-templates select="table|head|body"/>
</xsl:template>
<xsl:template match="col">
<xsl:if test="position()=1">
<xsl:value-of select="substring($delimStr, 1, #id - 1)"/>
</xsl:if>
<xsl:choose>
<xsl:when test="#value">
<xsl:text>"</xsl:text><xsl:variable name="escape">
<xsl:call-template name="_replace_string">
<xsl:with-param name="string" select="#value" />
</xsl:call-template>
</xsl:variable>
<xsl:value-of select="$escape" /><xsl:text>"</xsl:text>
</xsl:when>
<xsl:otherwise>
<xsl:text>""</xsl:text>
<xsl:apply-templates/>
</xsl:otherwise>
</xsl:choose>
<xsl:choose>
<xsl:when test="position()=last()">
<xsl:value-of select="substring($delimStr, 1, ancestor::table[1]/#colNum - #id)"/>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="substring($delimStr, 1, following-sibling::col[1]/#id - #id)"/>
</xsl:otherwise>
</xsl:choose>
</xsl:template> <!-- col -->
<xsl:template match="row">
<xsl:if test="col[#value][1]">
<xsl:apply-templates select="col"/>
<xsl:text>
</xsl:text>
</xsl:if>
</xsl:template>
<xsl:template match="head">
<xsl:if test="$showHead = 'yes'">
<xsl:apply-templates select="*"/>
</xsl:if>
</xsl:template>
<xsl:template match="body">
<xsl:apply-templates select="*"/>
</xsl:template>
<xsl:template name="_replace_string">
<xsl:param name="string" select="''"/>
<xsl:variable name="find">"</xsl:variable>
<xsl:variable name="replace">""</xsl:variable>
<xsl:choose>
<xsl:when test="contains($string,$find)">
<xsl:value-of select="concat(substring-before($string,$find),$replace)"/>
<xsl:call-template name="_replace_string">
<xsl:with-param name="string" select="substring-after($string,$find)"/>
<xsl:with-param name="find" select="$find"/>
<xsl:with-param name="replace" select="$replace"/>
</xsl:call-template>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="$string"/>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
</xsl:stylesheet>
my xml sample
<?xml version="1.0" encoding="UTF-8" ?><blocks type="report"><functions><func num="4" text=" nameOf_10031"></func><func num="5" text="name Of_10071"></func><func num="6" text="name Of_10006"></func></functions><description name="[441] testesttest with 160 "><rows total="44" start="1" end="44" show-data="yes"></rows><columns count="10"><column id="1" type="4" position="1" width="" format="&apos;dd.mm.yyyy&apos;"></column><column id="2" type="4" position="2" width="" format="&apos;dd.mm.yyyy&apos;"></column><column id="3" type="3" position="3" width=""></column><column id="4" type="2" position="4" width=""></column><column id="5" type="2" position="5" width=""></column><column id="6" type="2" position="6" width=""></column><column id="7" type="2" position="7" width=""></column><column id="8" type="2" position="8" width=""></column><column id="9" type="2" position="9" width=""></column><column id="10" type="2" position="10" width=""></column></columns></description><pair name="ReportName" value="test test test "></pair><table colNum="10" id="12561"><head><row><col id="1" value="test test test"></col><col id="2" value=" test test test"></col><col id="3" value="test test test"></col><col id="4" value="test test test"></col><col id="5" value="test test test"></col><col id="6" value="test test test"></col><col id="7" value="test test test"></col><col id="8" value=" test test test"></col><col id="9" value="test test test"></col><col id="10" value="test test test"></col></row></head><body><row num="1"><col id="1" value="01.07.2006"></col><col id="2"></col><col id="3" value="53363"></col><col id="4" value="65187" record-id="65187"></col><col id="5" value="53363" record-id="53368"></col><col id="6" value="test test test" record-id="1974"></col><col id="7"></col><col id="8"></col><col id="9" value="test test test"></col><col id="10"></col></row></body></table></blocks>
when i try
java -cp saxon-9.1.0.8.jar net.sf.saxon.Transform -t -s:myxml.xml -xsl:myxsl.xsl -o:result.csv
i get an same error (160)
Saxon 9.1.0.8J from Saxonica
Java version 1.8.0_333
Warning: at xsl:stylesheet on line 11 column 81 of myxsl.xsl:
Running an XSLT 1.0 stylesheet with an XSLT 2.0 processor
Stylesheet compilation time: 378 milliseconds
Processing file:/D:/111/myxml2.xml
Building tree for file:/D:/111/myxml2.xml using class net.sf.saxon.tinytree.TinyBuilder
Tree built in 4 milliseconds
Tree size: 46 nodes, 0 characters, 99 attributes
Loading net.sf.saxon.event.MessageEmitter
Error at xsl:value-of on line 46 of myxsl.xsl:
Output character not available in this encoding (decimal 160)
at xsl:apply-templates (file:/D:/111/myxsl.xsl#66)
processing /blocks/table[1]/head[1]/row[1]/col[2]
at xsl:apply-templates (file:/D:/111/myxsl.xsl#73)
processing /blocks/table[1]/head[1]/row[1]
at xsl:apply-templates (file:/D:/111/myxsl.xsl#32)
processing /blocks/table[1]/head[1]
at xsl:apply-templates (file:/D:/111/myxsl.xsl#24)
processing /blocks/table[1]
in built-in template rule
Transformation failed: Run-time errors were reported
When I use a newer version, for example Saxon-HE-10.3.jar, there are no problems, but unfortunately I can't upgrade to it

A character map mapping e.g the non-breaking space 160 to a normal space 32 would be
<xsl:character-map name="m1">
<xsl:output-character character=" " string=" "/>
</xsl:character-map>
<xsl:output use-character-maps="m1"/>
Character maps are supported since XSLT 2 and Saxon 8.9 I think was the first version to implement the 2.0 standard so 9.1 should cover that.

You are using a very old (and unsupported) version of Saxon. In Saxon 9.1 (released in 2009) the software maintained its own data tables for character encoding, rather than getting it all from the JDK. According to the definition of CP1251 used in the Saxon 9.1 data tables, there is no mapping for the Unicode codepoint 160. The relevant source code contains a link to the URI http://www.microsoft.com/globaldev/reference/sbcs/1251.htm as its source of information, but that web page is no longer available.
Sorry we can't help you more, but this kind of thing happens if you don't upgrade your software from time to time.
Your best way forward is probably to output the data in UTF-8 encoding and then use some other utility to convert the CSV file from UTF-8 to CP1251.

Related

Converting unusual XML data to CSV through XSLT

<?xml version="1.0" encoding="UTF-8"?>
<FirstTag version="1.0" createTime="15:59:59" DATE="20161209">
<SecondTag Name="House01">
<a>
<Furniture FURN_ID="FUR00001" FURN_AMT="2" price="10000"/>
<Furniture FURN_ID="FUR00002" FURN_AMT="1" price="20000"/>
</a>
<b>
<Furniture FURN_ID="FUR00003" FURN_AMT="2" price="30000"/>
<Furniture FURN_ID="FUR00004" FURN_AMT="1" price="40000"/>
</b>
<c>
<Furniture FURN_ID="FUR00005" FURN_AMT="2" price="50000"/>
<Furniture FURN_ID="FUR00006" FURN_AMT="1" price="60000"/>
</c>
<d>
<Furniture FURN_ID="FUR00007" FURN_AMT="1" price="70000"/>
<Furniture FURN_ID="FUR00008" FURN_AMT="1" price="80000"/>
</d>
<e>
<Furniture FURN_ID="FUR00009" FURN_AMT="1" price="90000"/>
<Furniture FURN_ID="FUR00010" FURN_AMT="1" price="100000"/>
</e>
<f>
<Furniture FURN_ID="FUR00011" FURN_AMT="1" price="110000"/>
<Furniture FURN_ID="FUR00012" FURN_AMT="2" price="120000"/>
<Furniture FURN_ID="FUR00013" FURN_AMT="2" price="120000"/>
</f>
</SecondTag>
</FirstTag>
Above is the simple xml (with node value), that I produced from my Java program. The point is, I want to send this xml data to another application, where there's already a csv load function from the UI/batch processes. I've heard of XSLT but never use of it, tried some of the tutorial but got confused in the time to get all the values into a csv.
Here's what it should look like in csv (to start, after success need to do some calculation):
In this example in one house (HOUSE01) I would like to output all the furniture in different room (i.e. a is room 1, b is room 2, c is room 3, etc).
I've been trying to build the XSLT, below is the XSLT:
<?xml version="1.0" encoding="utf-8"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="text" />
<xsl:variable name="delimiter" select="','" />
<!-- define an array containing the fields we are interested in -->
<xsl:variable name="fieldArray">
<field>Name</field>
<field>a</field>
<field>b</field>
<field>c</field>
<field>d</field>
<field>e</field>
<field>f</field>
</xsl:variable>
<xsl:param name="fields" select="document('')/*/xsl:variable[#name='fieldArray']/*" />
<xsl:template match="/">
<!-- output the header row -->
<xsl:for-each select="$fields">
<xsl:if test="position() != 1">
<xsl:value-of select="$delimiter"/>
</xsl:if>
<xsl:value-of select="." />
</xsl:for-each>
<!-- output newline -->
<xsl:text>
</xsl:text>
<xsl:apply-templates select="/*/*"/>
</xsl:template>
<xsl:template match="a">
<xsl:variable name="currNode" select="." />
<!-- output the data row -->
<!-- loop over the field names and find the value of each one in the xml -->
<xsl:for-each select="$fields">
<xsl:if test="position() != 1">
<xsl:value-of select="$delimiter"/>
</xsl:if>
<xsl:value-of select="$currNode/*[name() = current()]/#FURN_ID" />
<!-- <xsl:value-of select="$currNode/*[name() = current()]" /> -->
</xsl:for-each>
<!-- output newline -->
<xsl:text>
</xsl:text>
</xsl:template>
</xsl:stylesheet>
I'm using some reference from another page, and can build some simple XSLT to transform XML to CSV, however, I need some guidance in order to solve my main XML issue. In the future after I can get the node value inside the loop, I'd like to sum the total price of every furniture for each room.
Expected final csv result:
Name,a,b,c,d,e,f
House01,40000,100000,160000,150000,190000,350000
Thank you.
Getting the value of an attribute in XML
This XSLT will give the output you specified. See demo.
Updated: I missed the a value in the output.
<?xml version="1.0" encoding="utf-8"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="text" />
<xsl:template match="/">
<xsl:text>Name,a,b,c,d,e,f
</xsl:text>
<xsl:apply-templates select="FirstTag/SecondTag/a/Furniture"/>
</xsl:template>
<xsl:template match="Furniture">
<xsl:variable name="pos" select="position()"/>
<xsl:value-of select="../../#Name"/>
<xsl:text>,</xsl:text>
<xsl:value-of select="#FURN_ID"/>
<xsl:text>,</xsl:text>
<xsl:value-of select="../../b/Furniture[position()=$pos]/#FURN_ID"/>
<xsl:text>,</xsl:text>
<xsl:value-of select="../../c/Furniture[position()=$pos]/#FURN_ID"/>
<xsl:text>,</xsl:text>
<xsl:value-of select="../../d/Furniture[position()=$pos]/#FURN_ID"/>
<xsl:text>,</xsl:text>
<xsl:value-of select="../../e/Furniture[position()=$pos]/#FURN_ID"/>
<xsl:text>,</xsl:text>
<xsl:value-of select="../../f/Furniture[position()=$pos]/#FURN_ID"/>
<xsl:text>
</xsl:text>
</xsl:template>
</xsl:stylesheet>
The second (final) .csv can be produced as follows:
XSLT 1.0
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:exsl="http://exslt.org/common"
extension-element-prefixes="exsl">
<xsl:output method="text" encoding="UTF-8" />
<xsl:template match="/FirstTag">
<!-- first pass -->
<xsl:variable name="values-rtf">
<xsl:for-each select="SecondTag/*">
<xsl:copy>
<xsl:for-each select="Furniture">
<value>
<xsl:value-of select="#FURN_AMT * #price"/>
</value>
</xsl:for-each>
</xsl:copy>
</xsl:for-each>
</xsl:variable>
<xsl:variable name="values" select="exsl:node-set($values-rtf)/*" />
<!-- header -->
<xsl:text>Name,</xsl:text>
<xsl:for-each select="$values">
<xsl:value-of select="name()"/>
<xsl:if test="position()!=last()">
<xsl:text>,</xsl:text>
</xsl:if>
</xsl:for-each>
<xsl:text>
</xsl:text>
<!-- summary -->
<xsl:value-of select="SecondTag/#Name"/>
<xsl:text>,</xsl:text>
<xsl:for-each select="$values">
<xsl:value-of select="sum(value)"/>
<xsl:if test="position()!=last()">
<xsl:text>,</xsl:text>
</xsl:if>
</xsl:for-each>
</xsl:template>
</xsl:stylesheet>
This assumes you are limited to XSLT 1.0; in XSLT 2.0 this could be done in a single pass.
Note that I am assuming the input XML will contain a single "house" (SecondTag), with a variable number of "rooms" (a, b, c, etc.). Otherwise it's not clear what the header of the .csv should be.
I am not sure if you need to also have the interim .csv - and in any case, the logic required to create it is not clear (why is FUR00013 missing from the output?).

Convert text to xml using XSLT

I have a text file as given below:
value1 value2 value3 value4
I want to convert it as following xml using XSLT
<values>
<value>value1</value>
<value>value2</value>
<value>value3</value>
<value>value4</value>
</values>
Thanks in advance.
Assuming XSLT 2.0,
<xsl:template name="main">
<values>
<xsl:for-each select="tokenize(unparsed-text('input.txt'), '\s+')">
<value><xsl:value-of select="."/></value>
</xsl:for-each>
</values>
</xsl:template>
if you can edit your input such that it contains a root element, and a tab character as a separator, such as below:
<root>value1 value2 value3 value4</root>
then, you can apply the following stylesheet:
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
version="1.0">
<xsl:output indent="yes" omit-xml-declaration="yes"/>
<xsl:template match="/">
<values>
<xsl:call-template name="tokenizeString">
<xsl:with-param name="list" select="."/>
<xsl:with-param name="delimiter" select="' '"/>
</xsl:call-template>
</values>
</xsl:template>
<xsl:template name="tokenizeString">
<!--passed template parameter -->
<xsl:param name="list"/>
<xsl:param name="delimiter"/>
<xsl:choose>
<xsl:when test="contains($list, $delimiter)">
<value>
<!-- get everything in front of the first delimiter -->
<xsl:value-of select="substring-before($list,$delimiter)"/>
</value>
<xsl:call-template name="tokenizeString">
<!-- store anything left in another variable -->
<xsl:with-param name="list" select="substring-after($list,$delimiter)"/>
<xsl:with-param name="delimiter" select="$delimiter"/>
</xsl:call-template>
</xsl:when>
<xsl:otherwise>
<xsl:choose>
<xsl:when test="$list = ''">
<xsl:text/>
</xsl:when>
<xsl:otherwise>
<value>
<xsl:value-of select="$list"/>
</value>
</xsl:otherwise>
</xsl:choose>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
</xsl:stylesheet>
and produce:
<values>
<value>value1</value>
<value>value2</value>
<value>value3</value>
<value>value4</value>
</values>

XSLT 1: XPath - select/extract a number from a string, with default value

I'm transforming a document using the XSL engine provided by out-of-the-box Java 6.
I need to grab a value from an attribute, and use it as an index attribute in a document I am transforming to.
For example, the element:
<myElem bogen="K [1]"/>
the attribute 'bogen' may have values:
"K [1]"
"K [2]"
up to 'n' values.
It may also simply have
"K" to indicate the same as "K [1]"
the corresponding transformation result (from "K [1]" would look like this:
<myTransformedElem index="1"/>
I could use the transformation:
<xsl:value-of select="translate(#bogen,translate(#bogen, '0123456789', ''), '')"/>
if it were just values expected as "K [1]".
But where there is the possibility of having no actual number included, such as "K", this stuffs me up.
The only way I can do it is if I abuse the 'number' function, expect a 'NaN' and translate that. Which seems exceedingly ugly.
For example:
<xsl:value-of select="translate(string(number(translate(#bogen,translate(#bogen, '0123456789', ''), ''))), 'aN', '1')"/>
gets me the correct result each time.
Is there a better, nicer way to do this in XSLT without resorting to custom methods (i.e., import static java methods) ?
For example; were I to need a default value of a multi-character string, I couldn't use the above solution.
thanks in advance.
sean
Here's one way to do this:
<xsl:template name="GetIndex">
<xsl:param name="value" />
<xsl:param name="default" select="1" />
<xsl:variable name="foundIndex"
select="substring-before(
substring-after($value, '['), ']')" />
<xsl:choose>
<xsl:when test="$foundIndex">
<xsl:value-of select="$foundIndex"/>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="$default"/>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
This call produces the value 1:
<xsl:call-template name="GetIndex">
<xsl:with-param name="value" select="'K[1]'" />
</xsl:call-template>
This call produces the value 37:
<xsl:call-template name="GetIndex">
<xsl:with-param name="value" select="'K[37]'" />
</xsl:call-template>
This call produces the value 1:
<xsl:call-template name="GetIndex">
<xsl:with-param name="value" select="'K'" />
</xsl:call-template>
This call produces the value 999:
<xsl:call-template name="GetIndex">
<xsl:with-param name="value" select="'K'" />
<xsl:with-param name="default" select="999" />
</xsl:call-template>
If you find the xsl:choose to be too verbose, you can rewrite GetIndex like this and get the same results:
<xsl:template name="GetIndex">
<xsl:param name="value" />
<xsl:param name="default" select="1" />
<xsl:variable name="foundIndex"
select="substring-before(
substring-after($value, '['), ']')" />
<xsl:value-of select="concat($foundIndex,
substring($default, 1,
not($foundIndex) *
string-length($default)))"/>
</xsl:template>
Personally, I think the version with xsl:choose is clearer, even if it is longer.
with this XSL:
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
version="1.0">
<xsl:template match="myElem">
<xsl:choose>
<xsl:when test="translate(#bogen, '0123456789', '') != #bogen">
<myTransformedElem index="{translate(#bogen,translate(#bogen, '0123456789', ''), '')}"/>
</xsl:when>
<xsl:otherwise>
<myTransformedElem index="1"/>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
</xsl:stylesheet>
when applied to this XML:
<root>
<myElem bogen="K"/>
</root>
produces
<myTransformedElem index="1"/>
and when applied to this XML:
<root>
<myElem bogen="K [2]"/>
</root>
produces
<myTransformedElem index="2"/>

XSLT custom function returning nodeset or XML fragment (not simple datatype)

I am trying to develop an XSLT custom function that could return node set or an XML fragment, let's say something like:
Input document:
<root>
<!--
author: blablabla
usage: more blablabla
labelC: [in=2] <b>formatted</b> blablabla
-->
<tag1 name="first">
<tag2>content a</tag2>
<tag2>content b</tag2>
<tag3 attrib="val">content c</tag3>
</tag1>
<!--
author: blebleble
usage: more blebleble
labelC: blebleble
-->
<tag1 name="second">
<tag2>content x</tag2>
<tag2>content y</tag2>
<tag3 attrib="val">content z</tag3>
</tag1>
</root>
So that an XSLT template such as:
<xsl:template match="//tag1/preceding::comment()[1]" xmlns:d="java:com.dummy.func">
<section>
<para>
<xsl:value-of select="d:genDoc(.)"/>
</para>
</section>
</xsl:template>
Would produce:
<section>
<para>
<author>blablabla</author>
<usage>more blablabla</usage>
<labelC in="2"><b>formatted</b> blablabla</labelC>
</para>
</section>
When matched on the first occurrence of tag1
and
<section>
<para>
<author>blebleble</author>
<usage>more blebleble</usage>
<labelC>blebleble</labelC>
</para>
</section>
When matched on the second occurrence.
Basically what I want to achieve with this custom function is to parse some meta-data present in the comments and use it to generate XML.
I found some examples online, one at:
http://cafeconleche.org/books/xmljava/chapters/ch17s03.html
According to the example, my function should return one of the following
org.w3c.dom.traversal.NodeIterator,
org.apache.xml.dtm.DTM,
org.apache.xml.dtm.DTMAxisIterator,
org.apache.xml.dtm.DTMIterator,
org.w3c.dom.Node and its subtypes (Element, Attr, etc),
org.w3c.dom.DocumentFragment
I was able to implement a function returning the XML as simple type String.
This, however poses several other problems: the main being the markers characters get escaped when inserted in the original XML.
Does anybody have an example of how to implement such function?
I am mostly interested in how to return a proper XML node set to the calling template.
The below may get you a long way along the road you want to go. Note that this requires XSLT 2.0 version (in XSLT 1.0 it will be possible too, when supplying a replacement function for tokenize). Also note that this assumes a specific comment contents structure.
Explanation: comments are first split up into rows (delimiter & #xD; which is a line-feed), then in tag+value (delimiter ":", splitting into author, usage, labelC, the order is not important here), then in attributes and value for labelC (delimiter "] ", recognizing attributes as starting with "[").
Note that a lot of whitespace-wiping is done using normalize-space().
Edited: xslt version with function see at the bottom
XSLT
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="xml" version="1.0" encoding="UTF-8" indent="yes"/>
<xsl:template match="/">
<output>
<xsl:apply-templates/>
</output>
</xsl:template>
<xsl:template match="tag1/*">
</xsl:template>
<xsl:template match="comment()">
<section>
<para>
<xsl:for-each select="tokenize(., '
')[string-length() != 0]">
<xsl:variable name="splitup" select="tokenize(normalize-space(current()), ':')"/>
<xsl:choose>
<xsl:when test="$splitup[1]='author'">
<author><xsl:value-of select="normalize-space($splitup[2])"/></author>
</xsl:when>
<xsl:when test="$splitup[1]='usage'">
<usage><xsl:value-of select="normalize-space($splitup[2])"/></usage>
</xsl:when>
<xsl:when test="$splitup[1]='labelC'">
<labelC>
<xsl:for-each select="tokenize($splitup[2], '] ')[string-length() != 0]">
<xsl:variable name="labelCpart" select="normalize-space(current())"/>
<xsl:choose>
<xsl:when test="substring($labelCpart, 1,1) = '['">
<xsl:variable name="attr" select="tokenize(substring($labelCpart, 2), '=')"/>
<xsl:attribute name="{$attr[1]}"><xsl:value-of select="$attr[2]"/></xsl:attribute>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="$labelCpart"/>
</xsl:otherwise>
</xsl:choose>
</xsl:for-each>
</labelC>
</xsl:when>
</xsl:choose>
</xsl:for-each>
</para>
</section>
</xsl:template>
</xsl:stylesheet>
when applied to the following XML
<?xml version="1.0" encoding="UTF-8"?>
<root>
<!--
author: blablabla
usage: more blablabla
labelC: [in=2] <b>formatted</b> blablabla
-->
<tag1 name="first">
<tag2>content a</tag2>
<tag2>content b</tag2>
<tag3 attrib="val">content c</tag3>
</tag1>
<!--
author: blebleble
usage: more blebleble
labelC: blebleble
-->
<tag1 name="second">
<tag2>content x</tag2>
<tag2>content y</tag2>
<tag3 attrib="val">content z</tag3>
</tag1>
</root>
gives the following output
<?xml version="1.0" encoding="UTF-8"?>
<output>
<section>
<para>
<author>blablabla</author>
<usage>more blablabla</usage>
<labelC in="2"><b>formatted</b> blablabla</labelC>
</para>
</section>
<section>
<para>
<author>blebleble</author>
<usage>more blebleble</usage>
<labelC>blebleble</labelC>
</para>
</section>
</output>
EDITED xslt with function call (gives the same output)
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="2.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:d="java:com.dummy.func"
exclude-result-prefixes="d">
<xsl:output method="xml" version="1.0" encoding="UTF-8" indent="yes"/>
<xsl:template match="/">
<output>
<xsl:apply-templates/>
</output>
</xsl:template>
<xsl:template match="tag1/*">
</xsl:template>
<xsl:function name="d:section">
<xsl:param name="comm"/>
<section>
<para>
<xsl:for-each select="tokenize($comm, '
')[string-length() != 0]">
<xsl:variable name="splitup" select="tokenize(normalize-space(current()), ':')"/>
<xsl:choose>
<xsl:when test="$splitup[1]='author'">
<author><xsl:value-of select="normalize-space($splitup[2])"/></author>
</xsl:when>
<xsl:when test="$splitup[1]='usage'">
<usage><xsl:value-of select="normalize-space($splitup[2])"/></usage>
</xsl:when>
<xsl:when test="$splitup[1]='labelC'">
<labelC>
<xsl:for-each select="tokenize($splitup[2], '] ')[string-length() != 0]">
<xsl:variable name="labelCpart" select="normalize-space(current())"/>
<xsl:choose>
<xsl:when test="substring($labelCpart, 1,1) = '['">
<xsl:variable name="attr" select="tokenize(substring($labelCpart, 2), '=')"/>
<xsl:attribute name="{$attr[1]}"><xsl:value-of select="$attr[2]"/></xsl:attribute>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="$labelCpart"/>
</xsl:otherwise>
</xsl:choose>
</xsl:for-each>
</labelC>
</xsl:when>
</xsl:choose>
</xsl:for-each>
</para>
</section>
</xsl:function>
<xsl:template match="comment()">
<xsl:copy-of select="d:section(.)"/>
</xsl:template>
</xsl:stylesheet>

How to insert text with XSLT v1.0 instead of using an XSLT v2.0 regex?

I have an xml file which describes (among other things) elements with attribute values that describe fully qualified java class names. I am trying to write an XSLT transformation to modify the class names described in this file, such that (for example) ocurrances of com.example.MyClass will become com.example.MockMyClass.
Here's that example again in the context of a fragment of the original file:
<event type="node-enter">
<action name="MyActionName" class="com.example.MyClass">
<bodyTemplate>
templates/MyTemplate.vm
</bodyTemplate>
</action>
</event>
I want the result to be:
<event type="node-enter">
<action name="MyActionName" class="com.example.MockMyClass">
<bodyTemplate>
templates/MyTemplate.vm
</bodyTemplate>
</action>
</event>
I'm doing this transformation using the Java JAXP API, and had written a lovely XSLT 2.0 compliant regex routine to get the results I want, only to discover that Java 5 doesn't support XSLT 2.0, which is required for regex support.
So my question is, what is the best way to achieve this using the archaic JAXP XSLT 1.0 API? That is, without the use of regular expressions. I looked for similar problems, but the requirement for backreferencing regex groups seems to make this a tricky one. This question is a start, but I need to insert text, within a matching string, rather than just replacing.
For reference, here is my regex (XSLT 2.0) attempt:
<xsl:stylesheet version='1.0' xmlns:xsl='http://www.w3.org/1999/XSL/Transform'>
<xsl:template match='/'>
<xsl:analyze-string select='action/#class' regex='([A-Za-z0-9]+[$\.])+([A-Za-z0-9]+)'>
<xsl:matching-substring>
<xsl:value-of select='regex-group(1)'/>
<xsl:text>Mock</xsl:text>
<xsl:value-of select='regex-group(2)'/>
</xsl:matching-substring>
<xsl:non-matching-substring>
<xsl:value-of select='.'/>
</xsl:non-matching-substring>
</xsl:analyze-string>
</xsl:template>
</xsl:stylesheet>
How about the following?
<xsl:template name="classname">
<xsl:param name="class"/>
<xsl:choose>
<xsl:when test="contains($class,'.')">
<xsl:value-of select="concat(substring-before($class,'.'),'.')"/>
<xsl:call-template name="classname">
<xsl:with-param name="class"
select="substring-after($class,'.')"/>
</xsl:call-template>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="concat('Mock',$class)"/>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
This takes a classname as an input parameter and adds "Mock" after the final ".". You can call it with, for example,
<xsl:call-template name="classname">
<xsl:with-param name="class" select="#class"/>
</xsl:call-template>
(I just gave it a quick try in Firefox, you might find you need to do some tidying up of white space.)
The following seems long, however it uses ready parts (the strRev template is provided by FXSL and needs not be re-written). Also, nearly half of the code is the identity template and passing params to <xsl:call-template>. This is much shorted in XSLT 2.0.
When we have ready smaller parts/functions like the strRev template / reverse() function, then this solution doesn't require writing long and error-prone home-made recursive code.
The basic idea is that the last '.' character in a string is the first '.' character in the reversed string.
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output omit-xml-declaration="yes" indent="yes"/>
<xsl:param name="pPrepend" select="'Mock'"/>
<xsl:variable name="vRevPrepend">
<xsl:call-template name="strRev">
<xsl:with-param name="pText" select="$pPrepend"/>
</xsl:call-template>
</xsl:variable>
<xsl:template match="node()|#*">
<xsl:copy>
<xsl:apply-templates select="node()|#*"/>
</xsl:copy>
</xsl:template>
<xsl:template match="action/#class">
<xsl:variable name="vRevText">
<xsl:call-template name="strRev"/>
</xsl:variable>
<xsl:variable name="vRevNew" select=
"concat(substring-before($vRevText,'.'), $vRevPrepend,
'.', substring-after($vRevText,'.'))"/>
<xsl:variable name="vNewText">
<xsl:call-template name="strRev">
<xsl:with-param name="pText" select="$vRevNew"/>
</xsl:call-template>
</xsl:variable>
<xsl:attribute name="class">
<xsl:value-of select="$vNewText"/>
</xsl:attribute>
</xsl:template>
<xsl:template name="strRev">
<xsl:param name="pText" select="."/>
<xsl:if test="string-length($pText)">
<xsl:call-template name="strRev">
<xsl:with-param name="pText" select="substring($pText,2)"/>
</xsl:call-template>
<xsl:value-of select="substring($pText,1,1)"/>
</xsl:if>
</xsl:template>
</xsl:stylesheet>
when this transformation is applied on the provided XML document:
<event type="node-enter">
<action name="MyActionName" class="com.example.MyClass">
<bodyTemplate>
templates/MyTemplate.vm
</bodyTemplate>
</action>
</event>
the wanted, correct result is produced:
<event type="node-enter">
<action name="MyActionName" class="com.example.MockMyClass">
<bodyTemplate>
templates/MyTemplate.vm
</bodyTemplate>
</action>
</event>
II. XSLT 2.0 solution:
Exactly the same algorithm, but in XSLT 2.0 is really short:
<xsl:stylesheet version="2.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:my="my:my">
<xsl:output omit-xml-declaration="yes"/>
<xsl:param name="pPrepend" select="'Mock'"/>
<xsl:template match="node()|#*">
<xsl:copy>
<xsl:apply-templates select="node()|#*"/>
</xsl:copy>
</xsl:template>
<xsl:template match="action/#class">
<xsl:attribute name="class" select=
"my:strRev(concat(substring-before(my:strRev(.),'.'),
my:strRev($pPrepend),'.',
substring-after(my:strRev(.),'.')
)
)
"/>
</xsl:template>
<xsl:function name="my:strRev" as="xs:string">
<xsl:param name="pText" as="xs:string"/>
<xsl:sequence select=
"codepoints-to-string(reverse(string-to-codepoints($pText)))
"/>
</xsl:function>
</xsl:stylesheet>

Categories

Resources