My goal is to extract and process any JavasSript code that a PDF document might contain. By opening a PDF in editor I can see objects like this:
402 0 obj
<</S/JavaScript/JS(\n\r\n /* Set day 25 */\r\n FormRouter_SetCurrentDate\("25"\);\r)>>
endobj
I am trying to use Apache PDFBox to accomplish this but so far with no luck.
This line returns an empty list:
jsObj = doc.getObjectsByType(COSName.JAVA_SCRIPT);
Can anyone can give me some direction?
This tool is based on the PrintFields example in PDFBox. It will show the Javascript fields in forms. I wrote it last year for a guy who had problems with relationship between AcroForm fields (some fields were enabled / disabled depending on the values of other fields). There are still other places where there can be Javascript.
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package pdfboxpageimageextraction;
import java.io.File;
import java.io.IOException;
import java.util.List;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentCatalog;
import org.apache.pdfbox.pdmodel.interactive.action.PDAction;
import org.apache.pdfbox.pdmodel.interactive.action.PDActionJavaScript;
import org.apache.pdfbox.pdmodel.interactive.action.PDFormFieldAdditionalActions;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationWidget;
import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm;
import org.apache.pdfbox.pdmodel.interactive.form.PDField;
import org.apache.pdfbox.pdmodel.interactive.form.PDNonTerminalField;
import org.apache.pdfbox.pdmodel.interactive.form.PDTerminalField;
/**
* This example will take a PDF document and print all the fields from the file.
*
* #author Ben Litchfield
*
*/
public class PrintJavaScriptFields
{
/**
* This will print all the fields from the document.
*
* #param pdfDocument The PDF to get the fields from.
*
* #throws IOException If there is an error getting the fields.
*/
public void printFields(PDDocument pdfDocument) throws IOException
{
PDDocumentCatalog docCatalog = pdfDocument.getDocumentCatalog();
PDAcroForm acroForm = docCatalog.getAcroForm();
List<PDField> fields = acroForm.getFields();
//System.out.println(fields.size() + " top-level fields were found on the form");
for (PDField field : fields)
{
processField(field, "|--", field.getPartialName());
}
}
private void processField(PDField field, String sLevel, String sParent) throws IOException
{
String partialName = field.getPartialName();
if (field instanceof PDTerminalField)
{
PDTerminalField termField = (PDTerminalField) field;
PDFormFieldAdditionalActions fieldActions = field.getActions();
if (fieldActions != null)
{
System.out.println(field.getFullyQualifiedName() + ": " + fieldActions.getClass().getSimpleName() + " js field actionS:\n" + fieldActions.getCOSObject());
printPossibleJS(fieldActions.getK());
printPossibleJS(fieldActions.getC());
printPossibleJS(fieldActions.getF());
printPossibleJS(fieldActions.getV());
}
for (PDAnnotationWidget widgetAction : termField.getWidgets())
{
PDAction action = widgetAction.getAction();
if (action instanceof PDActionJavaScript)
{
System.out.println(field.getFullyQualifiedName() + ": " + action.getClass().getSimpleName() + " js widget action:\n" + action.getCOSObject());
printPossibleJS(action);
}
}
}
if (field instanceof PDNonTerminalField)
{
if (!sParent.equals(field.getPartialName()))
{
if (partialName != null)
{
sParent = sParent + "." + partialName;
}
}
//System.out.println(sLevel + sParent);
for (PDField child : ((PDNonTerminalField) field).getChildren())
{
processField(child, "| " + sLevel, sParent);
}
}
else
{
String fieldValue = field.getValueAsString();
StringBuilder outputString = new StringBuilder(sLevel);
outputString.append(sParent);
if (partialName != null)
{
outputString.append(".").append(partialName);
}
outputString.append(" = ").append(fieldValue);
outputString.append(", type=").append(field.getClass().getName());
//System.out.println(outputString);
}
}
private void printPossibleJS(PDAction kAction)
{
if (kAction instanceof PDActionJavaScript)
{
PDActionJavaScript jsAction = (PDActionJavaScript) kAction;
String jsString = jsAction.getAction();
if (!jsString.contains("\n"))
{
// avoid display problems with netbeans
jsString = jsString.replaceAll("\r", "\n").replaceAll("\n\n", "\n");
}
System.out.println(jsString);
System.out.println();
}
}
/**
* This will read a PDF file and print out the form elements. <br />
* see usage() for commandline
*
* #param args command line arguments
*
* #throws IOException If there is an error importing the FDF document.
*/
public static void main(String[] args) throws IOException
{
PDDocument pdf = null;
try
{
pdf = PDDocument.load(new File("XXXX", "YYYYY.pdf"));
PrintJavaScriptFields exporter = new PrintJavaScriptFields();
exporter.printFields(pdf);
}
finally
{
if (pdf != null)
{
pdf.close();
}
}
}
}
As a bonus, here's code to show all COSString objects:
public class ShowAllCOSStrings
{
static Set<COSString> strings = new HashSet<COSString>();
static void crawl(COSBase base)
{
if (base instanceof COSString)
{
strings.add((COSString)base);
return;
}
if (base instanceof COSDictionary)
{
COSDictionary dict = (COSDictionary) base;
for (COSName key : dict.keySet())
{
crawl(dict.getDictionaryObject(key));
}
return;
}
if (base instanceof COSArray)
{
COSArray ar = (COSArray) base;
for (COSBase item : ar)
{
crawl(item);
}
return;
}
if (base instanceof COSNull ||
base instanceof COSObject ||
base instanceof COSName ||
base instanceof COSNumber ||
base instanceof COSBoolean ||
base == null)
{
return;
}
System.out.println("huh? " + base);
}
public static void main(String[] args) throws IOException
{
PDDocument doc = PDDocument.load(new File("XXX","YYY.pdf"));
for (COSObject obj : doc.getDocument().getObjects())
{
COSBase base = obj.getObject();
//System.out.println(obj + ": " + base);
crawl(base);
}
System.out.println(strings.size() + " strings:");
for (COSString s : strings)
{
String str = s.getString();
if (!str.contains("\n"))
{
// avoid display problems with netbeans
str = str.replaceAll("\r", "\n").replaceAll("\n\n", "\n");
}
System.out.println(str);
}
doc.close();
}
}
However Javascript can also be in a stream. See in the PDF spec "Additional entries specific to a rendition action", the JS entry:
A text string or stream containing a JavaScript script that shall be
executed when the action is triggered.
You can change the code above to catch COSStream objects too; COSStream is extended from COSDictionary.
Related
As Adobe article "Digital Signatures in a PDF" stating:
PDF defines two types of signatures: approval and certification. The
differences are as follows:
Approval: There can be any number of approval signatures in a document. The field may optionally be associated with FieldMDP
permissions.
Certification: There can be only one certification signature and it must be the first one in a document. The field is always associated
with DocMDP.
Using PDFBox examples I was able to successfully apply multiple signatures to my document: https://github.com/apache/pdfbox/blob/trunk/examples/src/main/java/org/apache/pdfbox/examples/signature/CreateVisibleSignature.java
In order to apply multiple signatures I was just running same code multiple times with different signature placeholders and images.
But what I have distinguished is that, even though I am running same code it always sets first signature as Certified, and all other once as Approval.
But in my case, I don't want a document to be certified, I just need all signatures to be of Apploval type including the first one. I know I can invisible first Certifying signature, but still I do not want to certify document at all.
I was trying to find a way to setup signature, but couldn't figure it out.
Here is my usage of example code (other classes are in the GitHub link above):
public class SignnerPDFBoxExample extends CreateSignatureBase {
private SignatureOptions signatureOptions;
private PDVisibleSignDesigner visibleSignDesigner;
private final PDVisibleSigProperties visibleSignatureProperties = new PDVisibleSigProperties();
private boolean lateExternalSigning = false;
public static void main(String[] args) throws Exception {
File ksFile = new File("keystore.jks");
KeyStore keystore = KeyStore.getInstance("JKS");
char[] pin = "123456".toCharArray();
keystore.load(new FileInputStream(ksFile), pin);
SignnerPDFBoxExample signer = new SignnerPDFBoxExample(keystore, pin.clone());
String inputFilename = "Four_Signature_template.pdf";
File documentFile = new File(inputFilename);
File signedDocumentFile;
int page = 1;
try (FileInputStream imageStream = new FileInputStream("client_signature.jpg"))
{
String name = documentFile.getName();
String substring = name.substring(0, name.lastIndexOf('.'));
signedDocumentFile = new File(documentFile.getParent(), substring + "_signed.pdf");
// page is 1-based here
signer.setVisibleSignDesigner(inputFilename, 0, 0, -50, imageStream, page);
}
signer.setVisibleSignatureProperties("name", "location", "Signed using PDFBox", 0, page, true);
signer.signPDF(documentFile, signedDocumentFile, null, "certifySignature");
}
public boolean isLateExternalSigning()
{
return lateExternalSigning;
}
/**
* Set late external signing. Enable this if you want to activate the demo code where the
* signature is kept and added in an extra step without using PDFBox methods. This is disabled
* by default.
*
* #param lateExternalSigning
*/
public void setLateExternalSigning(boolean lateExternalSigning)
{
this.lateExternalSigning = lateExternalSigning;
}
/**
* Set visible signature designer for a new signature field.
*
* #param filename
* #param x position of the signature field
* #param y position of the signature field
* #param zoomPercent
* #param imageStream
* #param page the signature should be placed on
* #throws IOException
*/
public void setVisibleSignDesigner(String filename, int x, int y, int zoomPercent,
FileInputStream imageStream, int page)
throws IOException
{
visibleSignDesigner = new PDVisibleSignDesigner(filename, imageStream, page);
visibleSignDesigner.xAxis(x).yAxis(y).zoom(zoomPercent).adjustForRotation();
}
/**
* Set visible signature designer for an existing signature field.
*
* #param zoomPercent
* #param imageStream
* #throws IOException
*/
public void setVisibleSignDesigner(int zoomPercent, FileInputStream imageStream)
throws IOException
{
visibleSignDesigner = new PDVisibleSignDesigner(imageStream);
visibleSignDesigner.zoom(zoomPercent);
}
/**
* Set visible signature properties for new signature fields.
*
* #param name
* #param location
* #param reason
* #param preferredSize
* #param page
* #param visualSignEnabled
* #throws IOException
*/
public void setVisibleSignatureProperties(String name, String location, String reason, int preferredSize,
int page, boolean visualSignEnabled) throws IOException
{
visibleSignatureProperties.signerName(name).signerLocation(location).signatureReason(reason).
preferredSize(preferredSize).page(page).visualSignEnabled(visualSignEnabled).
setPdVisibleSignature(visibleSignDesigner);
}
/**
* Set visible signature properties for existing signature fields.
*
* #param name
* #param location
* #param reason
* #param visualSignEnabled
* #throws IOException
*/
public void setVisibleSignatureProperties(String name, String location, String reason,
boolean visualSignEnabled) throws IOException
{
visibleSignatureProperties.signerName(name).signerLocation(location).signatureReason(reason).
visualSignEnabled(visualSignEnabled).setPdVisibleSignature(visibleSignDesigner);
}
/**
* Initialize the signature creator with a keystore (pkcs12) and pin that
* should be used for the signature.
*
* #param keystore is a pkcs12 keystore.
* #param pin is the pin for the keystore / private key
* #throws KeyStoreException if the keystore has not been initialized (loaded)
* #throws NoSuchAlgorithmException if the algorithm for recovering the key cannot be found
* #throws UnrecoverableKeyException if the given password is wrong
* #throws CertificateException if the certificate is not valid as signing time
* #throws IOException if no certificate could be found
*/
public SignnerPDFBoxExample(KeyStore keystore, char[] pin)
throws KeyStoreException, UnrecoverableKeyException, NoSuchAlgorithmException, IOException, CertificateException
{
super(keystore, pin);
}
/**
* Sign pdf file and create new file that ends with "_signed.pdf".
*
* #param inputFile The source pdf document file.
* #param signedFile The file to be signed.
* #param tsaClient optional TSA client
* #throws IOException
*/
public void signPDF(File inputFile, File signedFile, TSAClient tsaClient) throws IOException
{
this.signPDF(inputFile, signedFile, tsaClient, null);
}
/**
* Sign pdf file and create new file that ends with "_signed.pdf".
*
* #param inputFile The source pdf document file.
* #param signedFile The file to be signed.
* #param tsaClient optional TSA client
* #param signatureFieldName optional name of an existing (unsigned) signature field
* #throws IOException
*/
public void signPDF(File inputFile, File signedFile, TSAClient tsaClient, String signatureFieldName) throws IOException
{
setTsaClient(tsaClient);
if (inputFile == null || !inputFile.exists())
{
throw new IOException("Document for signing does not exist");
}
// creating output document and prepare the IO streams.
FileOutputStream fos = new FileOutputStream(signedFile);
try (PDDocument doc = PDDocument.load(inputFile))
{
int accessPermissions = SigUtils.getMDPPermission(doc);
if (accessPermissions == 1)
{
throw new IllegalStateException("No changes to the document are permitted due to DocMDP transform parameters dictionary");
}
// Note that PDFBox has a bug that visual signing on certified files with permission 2
// doesn't work properly, see PDFBOX-3699. As long as this issue is open, you may want to
// be careful with such files.
PDSignature signature;
// sign a PDF with an existing empty signature, as created by the CreateEmptySignatureForm example.
signature = findExistingSignature(doc, signatureFieldName);
if (signature == null)
{
// create signature dictionary
signature = new PDSignature();
}
// Optional: certify
// can be done only if version is at least 1.5 and if not already set
// doing this on a PDF/A-1b file fails validation by Adobe preflight (PDFBOX-3821)
// PDF/A-1b requires PDF version 1.4 max, so don't increase the version on such files.
if (doc.getVersion() >= 1.5f && accessPermissions == 0)
{
SigUtils.setMDPPermission(doc, signature, 2);
}
PDAcroForm acroForm = doc.getDocumentCatalog().getAcroForm();
if (acroForm != null && acroForm.getNeedAppearances())
{
// PDFBOX-3738 NeedAppearances true results in visible signature becoming invisible
// with Adobe Reader
if (acroForm.getFields().isEmpty())
{
// we can safely delete it if there are no fields
acroForm.getCOSObject().removeItem(COSName.NEED_APPEARANCES);
// note that if you've set MDP permissions, the removal of this item
// may result in Adobe Reader claiming that the document has been changed.
// and/or that field content won't be displayed properly.
// ==> decide what you prefer and adjust your code accordingly.
}
else
{
System.out.println("/NeedAppearances is set, signature may be ignored by Adobe Reader");
}
}
// default filter
signature.setFilter(PDSignature.FILTER_ADOBE_PPKLITE);
// subfilter for basic and PAdES Part 2 signatures
signature.setSubFilter(PDSignature.SUBFILTER_ADBE_PKCS7_DETACHED);
if (visibleSignatureProperties != null)
{
// this builds the signature structures in a separate document
visibleSignatureProperties.buildSignature();
signature.setName(visibleSignatureProperties.getSignerName());
signature.setLocation(visibleSignatureProperties.getSignerLocation());
signature.setReason(visibleSignatureProperties.getSignatureReason());
}
// the signing date, needed for valid signature
signature.setSignDate(Calendar.getInstance());
// do not set SignatureInterface instance, if external signing used
SignatureInterface signatureInterface = isExternalSigning() ? null : this;
// register signature dictionary and sign interface
if (visibleSignatureProperties != null && visibleSignatureProperties.isVisualSignEnabled())
{
signatureOptions = new SignatureOptions();
signatureOptions.setVisualSignature(visibleSignatureProperties.getVisibleSignature());
signatureOptions.setPage(visibleSignatureProperties.getPage() - 1);
doc.addSignature(signature, signatureInterface, signatureOptions);
}
else
{
doc.addSignature(signature, signatureInterface);
}
if (isExternalSigning())
{
System.out.println("Signing externally " + signedFile.getName());
ExternalSigningSupport externalSigning = doc.saveIncrementalForExternalSigning(fos);
// invoke external signature service
byte[] cmsSignature = sign(externalSigning.getContent());
// Explanation of late external signing (off by default):
// If you want to add the signature in a separate step, then set an empty byte array
// and call signature.getByteRange() and remember the offset signature.getByteRange()[1]+1.
// you can write the ascii hex signature at a later time even if you don't have this
// PDDocument object anymore, with classic java file random access methods.
// If you can't remember the offset value from ByteRange because your context has changed,
// then open the file with PDFBox, find the field with findExistingSignature() or
// PODDocument.getLastSignatureDictionary() and get the ByteRange from there.
// Close the file and then write the signature as explained earlier in this comment.
if (isLateExternalSigning())
{
// this saves the file with a 0 signature
externalSigning.setSignature(new byte[0]);
// remember the offset (add 1 because of "<")
int offset = signature.getByteRange()[1] + 1;
// now write the signature at the correct offset without any PDFBox methods
try (RandomAccessFile raf = new RandomAccessFile(signedFile, "rw"))
{
raf.seek(offset);
raf.write(Hex.getBytes(cmsSignature));
}
}
else
{
// set signature bytes received from the service and save the file
externalSigning.setSignature(cmsSignature);
}
}
else
{
// write incremental (only for signing purpose)
doc.saveIncremental(fos);
}
}
// Do not close signatureOptions before saving, because some COSStream objects within
// are transferred to the signed document.
// Do not allow signatureOptions get out of scope before saving, because then the COSDocument
// in signature options might by closed by gc, which would close COSStream objects prematurely.
// See https://issues.apache.org/jira/browse/PDFBOX-3743
IOUtils.closeQuietly(signatureOptions);
}
// Find an existing signature (assumed to be empty). You will usually not need this.
private PDSignature findExistingSignature(PDDocument doc, String sigFieldName)
{
PDSignature signature = null;
PDSignatureField signatureField;
PDAcroForm acroForm = doc.getDocumentCatalog().getAcroForm();
if (acroForm != null)
{
signatureField = (PDSignatureField) acroForm.getField(sigFieldName);
if (signatureField != null)
{
// retrieve signature dictionary
signature = signatureField.getSignature();
if (signature == null)
{
signature = new PDSignature();
// after solving PDFBOX-3524
// signatureField.setValue(signature)
// until then:
signatureField.getCOSObject().setItem(COSName.V, signature);
}
else
{
throw new IllegalStateException("The signature field " + sigFieldName + " is already signed.");
}
}
}
return signature;
}
/**
* This will print the usage for this program.
*/
private static void usage()
{
System.err.println("Usage: java " + CreateVisibleSignature.class.getName()
+ " <pkcs12-keystore-file> <pin> <input-pdf> <sign-image>\n" + "" +
"options:\n" +
" -tsa <url> sign timestamp using the given TSA server\n"+
" -e sign using external signature creation scenario");
}
}
Your signPDF method contains this code:
// Optional: certify
// can be done only if version is at least 1.5 and if not already set
// doing this on a PDF/A-1b file fails validation by Adobe preflight (PDFBOX-3821)
// PDF/A-1b requires PDF version 1.4 max, so don't increase the version on such files.
if (doc.getVersion() >= 1.5f && accessPermissions == 0)
{
SigUtils.setMDPPermission(doc, signature, 2);
}
If you don't want a certification signature to start with, remove this setMDPPermission call.
I have a requirement where I need to replace some text in a Powerpoint File at runtime. (Powerpoint file is being used as a template with some placeholders/tokes e.g. {{USER_NAME}})
I have tried using POI but with no luck.
I referred to the other links on the forum and started with 'docx4j' but am not able to go beyond a point and the documentation is not very clear (at least for me).
Here is what I have done so far:
Got the PPTX loaded to 'PresentationMLPackage'
Got the 'MainPresentationPart' and the slides (Using mainPresentationPart.getSlide(n);)
But I am not sure of the next steps from here (or if this is the right approach in the first place).
Any suggestions will be greatly appreciated.
Thanks a Lot,
-Vini
SlidePart extends JaxbPmlPart<Sld>
JaxbPmlPart<E> extends JaxbXmlPartXPathAware<E>
JaxbXmlPartXPathAware<E> extends JaxbXmlPart<E>
JaxbXmlPart contains:
/**
* unmarshallFromTemplate. Where jaxbElement has not been
* unmarshalled yet, this is more efficient (3 times
* faster, in some testing) than calling
* XmlUtils.marshaltoString directly, since it avoids
* some JAXB processing.
*
* #param mappings
* #throws JAXBException
* #throws Docx4JException
*
* #since 3.0.0
*/
public void variableReplace(java.util.HashMap<String, String> mappings) throws JAXBException, Docx4JException {
// Get the contents as a string
String wmlTemplateString = null;
if (jaxbElement==null) {
PartStore partStore = this.getPackage().getSourcePartStore();
String name = this.getPartName().getName();
InputStream is = partStore.loadPart(
name.substring(1));
if (is==null) {
log.warn(name + " missing from part store");
throw new Docx4JException(name + " missing from part store");
} else {
log.info("Lazily unmarshalling " + name);
// This seems to be about 5% faster than the Scanner approach
try {
wmlTemplateString = IOUtils.toString(is, "UTF-8");
} catch (IOException e) {
throw new Docx4JException(e.getMessage(), e);
}
}
} else {
wmlTemplateString = XmlUtils.marshaltoString(jaxbElement, true, false, jc);
}
// Do the replacement
jaxbElement = (E)XmlUtils.unwrap(
XmlUtils.unmarshallFromTemplate(wmlTemplateString, mappings, jc));
}
So once you have the slide part, you can invoke variableReplace on it. You'll need your variables to be in the format expected by XmlUtils.unmarshallFromTemplate
This question already has answers here:
How to generate xpath from xsd?
(3 answers)
Closed 7 years ago.
I need to represent all the elements from an XSD Schema as XPath. Is there any way for it? Like consider there are five elements in XSD Schema I need to display XPath of all the five elements separately.
My suggestion is at the background XML corresponding to XSD has to be created and XPath has to be generated. Please suggest solution for the same if the approach is correct or suggest other approaches..
Thanks.
M.Sasi kumar
import java.io.File;
import java.util.HashMap;
import java.util.Map;
import java.util.Stack;
import javax.xml.parsers.*;
import org.xml.sax.*;
import org.xml.sax.helpers.DefaultHandler;
/**
* SAX handler that creates and prints XPath expressions for each element encountered.
*
* The algorithm is not infallible, if elements appear on different levels in the hierarchy.
* Something like the following is an example:
* - <elemA/>
* - <elemA/>
* - <elemB/>
* - <elemA/>
* - <elemC>
* - <elemB/>
* - </elemC>
*
* will report
*
* //elemA[0]
* //elemA[1]
* //elemB[0]
* //elemA[2]
* //elemC[0]
* //elemC[0]/elemB[1] (this is wrong: should be //elemC[0]/elemB[0] )
*
* It also ignores namespaces, and thus treats <foo:elemA> the same as <bar:elemA>.
*/
public class SAXCreateXPath extends DefaultHandler {
// map of all encountered tags and their running count
private Map<String, Integer> tagCount;
// keep track of the succession of elements
private Stack<String> tags;
// set to the tag name of the recently closed tag
String lastClosedTag;
/**
* Construct the XPath expression
*/
private String getCurrentXPath() {
String str = "//";
boolean first = true;
for (String tag : tags) {
if (first)
str = str + tag;
else
str = str + "/" + tag;
str += "["+tagCount.get(tag)+"]";
first = false;
}
return str;
}
#Override
public void startDocument() throws SAXException {
tags = new Stack();
tagCount = new HashMap<String, Integer>();
}
#Override
public void startElement (String namespaceURI, String localName, String qName, Attributes atts)
throws SAXException
{
boolean isRepeatElement = false;
if (tagCount.get(localName) == null) {
tagCount.put(localName, 0);
} else {
tagCount.put(localName, 1 + tagCount.get(localName));
}
if (lastClosedTag != null) {
// an element was recently closed ...
if (lastClosedTag.equals(localName)) {
// ... and it's the same as the current one
isRepeatElement = true;
} else {
// ... but it's different from the current one, so discard it
tags.pop();
}
}
// if it's not the same element, add the new element and zero count to list
if (! isRepeatElement) {
tags.push(localName);
}
System.out.println(getCurrentXPath());
lastClosedTag = null;
}
#Override
public void endElement (String uri, String localName, String qName) throws SAXException {
// if two tags are closed in succession (without an intermediate opening tag),
// then the information about the deeper nested one is discarded
if (lastClosedTag != null) {
tags.pop();
}
lastClosedTag = localName;
}
public static void main (String[] args) throws Exception {
if (args.length < 1) {
System.err.println("Usage: SAXCreateXPath <file.xml>");
System.exit(1);
}
// Create a JAXP SAXParserFactory and configure it
SAXParserFactory spf = SAXParserFactory.newInstance();
spf.setNamespaceAware(true);
spf.setValidating(false);
// Create a JAXP SAXParser
SAXParser saxParser = spf.newSAXParser();
// Get the encapsulated SAX XMLReader
XMLReader xmlReader = saxParser.getXMLReader();
// Set the ContentHandler of the XMLReader
xmlReader.setContentHandler(new SAXCreateXPath());
String filename = args[0];
String path = new File(filename).getAbsolutePath();
if (File.separatorChar != '/') {
path = path.replace(File.separatorChar, '/');
}
if (!path.startsWith("/")) {
path = "/" + path;
}
// Tell the XMLReader to parse the XML document
xmlReader.parse("file:"+path);
}
}
When working with JSP or with other languages that are converted to Java source code (or to stubs), there is often a SMAP file generated which can be later embedded to the Class file for debuggers to show better stack traces (or in case of Jasper it gets embedded automatically).
There is an old JVM bug (or RFE) to add support to include SMAP information in stack traces, but from lack of activity it seems the Sun/Oracle guys prefer that everyone post-processes his stack traces himself.
So here is my question: How to do this? Are there libraries around that do the hard work for you, or do you have to implement everything yourself?
I already found a good place where I have access to both the exception object and the class loader that loaded the "SMAP enabled" classes. Now I'd have to
iterate over the stack trace
Check for each entry if I can find the class
Analyze the class with e. g. ASM to extract the SMAP info
Write a SMAP parser that parses the reverse line mapping and the file names out of the SMAP info
Replace the stack trace element by a new one based on the mapping (or alternatively add a new one? What is better?)
Cache some of the info so that I don't have to do the same stuff again if the exact same (or a similar) stack trace re-appears a few seconds later.
And since it seems to be a tedious and error-prone task, I'm hoping that someone already did this and I just have to add a library to my dependencies and call a makeStacktraceFancy method for my exceptions to make the stacktraces fancy before I log them.
As no one seems to know of an existing solution, I rolled my own quick&dirty one.
It does not support all SMAP features (it parses only the first stratum, and ignores vendor sections and default statum information), but it is enough for my needs.
Since the code to extract the SMAP attribute from the class is only about 50 lines, I decided to reimplement it instead of adding ASM as a dependency. The code for how to use it with ASM is in the comments.
As it is only tested very little (on a few test cases), I'll edit the post if I encounter any severe errors.
Code is below:
/*
* SMAPSourceDebugExtension.java - Parse source debug extensions and
* enhance stack traces.
*
* Copyright (c) 2012 Michael Schierl
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* - Neither name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND THE CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* HOLDERS OR THE CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
* TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package smap;
import java.io.*;
import java.util.*;
import java.util.regex.*;
/**
* Utility class to parse Source Debug Extensions and enhance stack traces.
*
* Note that only the first stratum is parsed and used.
*
* #author Michael Schierl
*/
public class SMAPSourceDebugExtension {
/**
* Enhance a stack trace with information from source debug extensions.
*
* #param t
* Throwable whose stack trace should be enhanced
* #param cl
* Class loader to load source debug extensions from
* #param keepOriginalFrames
* Whether to keep the original frames referring to Java source
* or drop them
* #param packageNames
* Names of packages that should be scanned for source debug
* extensions, or empty to scan all packages
* #throws IOException
* if an I/O error occurs
*/
public static void enhanceStackTrace(Throwable t, ClassLoader cl, boolean keepOriginalFrames, String... packageNames) throws IOException {
enhanceStackTrace(t, new HashMap<String, SMAPSourceDebugExtension>(), cl, keepOriginalFrames, packageNames);
}
/**
* Enhance a stack trace with information from source debug extensions.
* Provide a custom cache of already resolved and parsed source debug
* extensions, to avoid parsing them for every new exception.
*
* #param t
* Throwable whose stack trace should be enhanced
* #param cache
* Cache to be used and filled
* #param cl
* Class loader to load source debug extensions from
* #param keepOriginalFrames
* Whether to keep the original frames referring to Java source
* or drop them
* #param packageNames
* Names of packages that should be scanned for source debug
* extensions, or empty to scan all packages
* #throws IOException
* if an I/O error occurs
*/
public static void enhanceStackTrace(Throwable t, Map<String, SMAPSourceDebugExtension> cache, ClassLoader cl, boolean keepOriginalFrames, String... packageNames) throws IOException {
StackTraceElement[] elements = t.getStackTrace();
List<StackTraceElement> newElements = null;
for (int i = 0; i < elements.length; i++) {
String className = elements[i].getClassName();
SMAPSourceDebugExtension smap = cache.get(className);
if (smap == null) {
boolean found = false;
for (String packageName : packageNames) {
if (className.startsWith(packageName + ".")) {
found = true;
break;
}
}
if (found || packageNames.length == 0) {
InputStream in = cl.getResourceAsStream(className.replace('.', '/') + ".class");
if (in != null) {
String value = extractSourceDebugExtension(in);
in.close();
if (value != null) {
value = value.replaceAll("\r\n?", "\n");
if (value.startsWith("SMAP\n")) {
smap = new SMAPSourceDebugExtension(value);
cache.put(className, smap);
}
}
}
}
}
StackTraceElement newFrame = null;
if (smap != null) {
int[] inputLineInfo = smap.reverseLineMapping.get(elements[i].getLineNumber());
if (inputLineInfo != null && elements[i].getFileName().equals(smap.generatedFileName)) {
FileInfo inputFileInfo = smap.fileinfo.get(inputLineInfo[0]);
if (inputFileInfo != null) {
newFrame = new StackTraceElement("[" + smap.firstStratum + "]", inputFileInfo.path, inputFileInfo.name, inputLineInfo[1]);
}
}
}
if (newFrame != null) {
if (newElements == null) {
newElements = new ArrayList<StackTraceElement>(Arrays.asList(elements).subList(0, i));
}
if (keepOriginalFrames)
newElements.add(elements[i]);
newElements.add(newFrame);
} else if (newElements != null) {
newElements.add(elements[i]);
}
}
if (newElements != null) {
t.setStackTrace(newElements.toArray(new StackTraceElement[newElements.size()]));
}
if (t.getCause() != null)
enhanceStackTrace(t.getCause(), cache, cl, keepOriginalFrames, packageNames);
}
/**
* Extract source debug extension from a class file, provided as an input
* stream
*
* #param in
* Input stream to read the class file
* #return Source debug extension as a String, or <code>null</code> if none
* was found.
* #throws IOException
* if an I/O error occurs
*/
// // ASM version of the same method:
// private static String extractSourceDebugExtension0(InputStream in) throws IOException {
// ClassReader cr = new ClassReader(in);
// final String[] result = new String[1];
// cr.accept(new ClassVisitor(Opcodes.ASM4) {
// #Override
// public void visitSource(String source, String debug) {
// result[0] = debug;
// }
// }, 0);
// return result[0];
// }
private static String extractSourceDebugExtension(InputStream in) throws IOException {
DataInputStream dis = new DataInputStream(in);
boolean[] isSourceDebugExtension;
dis.skipBytes(8);
// read constant pool
isSourceDebugExtension = new boolean[dis.readUnsignedShort()];
int[] skipSizes = new int[] { 0, 0, 2, 4, 4, 0, 0, 2, 2, 4, 4, 4, 4, 2, 2, 3, 2, 2, 4 };
for (int i = 1; i < isSourceDebugExtension.length; i++) {
byte type = dis.readByte();
int skipSize;
if (type == 1) {
String value = dis.readUTF();
isSourceDebugExtension[i] = value.equals("SourceDebugExtension");
skipSize = 0;
} else if (type == 5 || type == 6) {
skipSize = 8;
i++;
} else if (type > 1 && type < 19) {
skipSize = skipSizes[type];
} else {
skipSize = 2;
}
dis.skipBytes(skipSize);
}
dis.skipBytes(6);
int ifaces = dis.readUnsignedShort();
dis.skipBytes(2 * ifaces);
// skip fields and methods
for (int k = 0; k < 2; k++) {
int count = dis.readUnsignedShort();
for (int i = 0; i < count; i++) {
dis.skipBytes(6);
int attrCount = dis.readUnsignedShort();
for (int j = 0; j < attrCount; j++) {
dis.skipBytes(2);
int skip = dis.readInt();
dis.skipBytes(skip);
}
}
}
// read attributes and find SourceDebugExtension
int attrCount = dis.readUnsignedShort();
for (int i = 0; i < attrCount; i++) {
int idx = dis.readUnsignedShort();
int len = dis.readInt();
if (isSourceDebugExtension[idx]) {
byte[] buf = new byte[len];
dis.readFully(buf);
return new String(buf, "UTF-8");
} else {
dis.skipBytes(len);
}
}
return null;
}
private final String generatedFileName, firstStratum;
private final Map<Integer, FileInfo> fileinfo = new HashMap<Integer, FileInfo>();
private final Map<Integer, int[]> reverseLineMapping = new HashMap<Integer, int[]>();
private static final Pattern LINE_INFO_PATTERN = Pattern.compile("([0-9]+)(?:#([0-9]+))?(?:,([0-9]+))?:([0-9]+)(?:,([0-9]+))?");
private SMAPSourceDebugExtension(String value) {
String[] lines = value.split("\n");
if (!lines[0].equals("SMAP") || !lines[3].startsWith("*S ") || !lines[4].equals("*F"))
throw new IllegalArgumentException(value);
generatedFileName = lines[1];
firstStratum = lines[3].substring(3);
int idx = 5;
while (!lines[idx].startsWith("*")) {
String infoline = lines[idx++], path = null;
if (infoline.startsWith("+ ")) {
path = lines[idx++];
infoline = infoline.substring(2);
}
int pos = infoline.indexOf(" ");
int filenum = Integer.parseInt(infoline.substring(0, pos));
String name = infoline.substring(pos + 1);
fileinfo.put(filenum, new FileInfo(name, path == null ? name : path));
}
if (lines[idx].equals("*L")) {
idx++;
int lastLFI = 0;
while (!lines[idx].startsWith("*")) {
Matcher m = LINE_INFO_PATTERN.matcher(lines[idx++]);
if (!m.matches())
throw new IllegalArgumentException(lines[idx - 1]);
int inputStartLine = Integer.parseInt(m.group(1));
int lineFileID = m.group(2) == null ? lastLFI : Integer.parseInt(m.group(2));
int repeatCount = m.group(3) == null ? 1 : Integer.parseInt(m.group(3));
int outputStartLine = Integer.parseInt(m.group(4));
int outputLineIncrement = m.group(5) == null ? 1 : Integer.parseInt(m.group(5));
for (int i = 0; i < repeatCount; i++) {
int[] inputMapping = new int[] { lineFileID, inputStartLine + i };
int baseOL = outputStartLine + i * outputLineIncrement;
for (int ol = baseOL; ol < baseOL + outputLineIncrement; ol++) {
if (!reverseLineMapping.containsKey(ol))
reverseLineMapping.put(ol, inputMapping);
}
}
lastLFI = lineFileID;
}
}
}
private static class FileInfo {
public final String name, path;
public FileInfo(String name, String path) {
this.name = name;
this.path = path;
}
}
}
Not sure what you are trying to achieve here. If you just need to show jsp and line number in the stack trace when it is logged, then the easiest would be to replace logger and show jsp line number from smap in the printed stack trace. Here is a patch for log4j that does somewhat similar changes to the stack trace.
Though, you won't get automatic navigation from a class name to jsp in current IDEs...
PS: BTW, if you'll implement SMAP parser, it would be a great idea to contribute it back to ASM project...
How can I generate xpath from an xsd? XSD validates an xml. I am working in a project where I am generating a sample XML from the xsd using java and then generating xpath from that XML. If there is any way to generate xpath directly from xsd please let me know.
This might be of use:
import java.io.File;
import java.util.HashMap;
import java.util.Map;
import java.util.Stack;
import javax.xml.parsers.*;
import org.xml.sax.*;
import org.xml.sax.helpers.DefaultHandler;
/**
* SAX handler that creates and prints XPath expressions for each element encountered.
*
* The algorithm is not infallible, if elements appear on different levels in the hierarchy.
* Something like the following is an example:
* - <elemA/>
* - <elemA/>
* - <elemB/>
* - <elemA/>
* - <elemC>
* - <elemB/>
* - </elemC>
*
* will report
*
* //elemA[0]
* //elemA[1]
* //elemB[0]
* //elemA[2]
* //elemC[0]
* //elemC[0]/elemB[1] (this is wrong: should be //elemC[0]/elemB[0] )
*
* It also ignores namespaces, and thus treats <foo:elemA> the same as <bar:elemA>.
*/
public class SAXCreateXPath extends DefaultHandler {
// map of all encountered tags and their running count
private Map<String, Integer> tagCount;
// keep track of the succession of elements
private Stack<String> tags;
// set to the tag name of the recently closed tag
String lastClosedTag;
/**
* Construct the XPath expression
*/
private String getCurrentXPath() {
String str = "//";
boolean first = true;
for (String tag : tags) {
if (first)
str = str + tag;
else
str = str + "/" + tag;
str += "["+tagCount.get(tag)+"]";
first = false;
}
return str;
}
#Override
public void startDocument() throws SAXException {
tags = new Stack();
tagCount = new HashMap<String, Integer>();
}
#Override
public void startElement (String namespaceURI, String localName, String qName, Attributes atts)
throws SAXException
{
boolean isRepeatElement = false;
if (tagCount.get(localName) == null) {
tagCount.put(localName, 0);
} else {
tagCount.put(localName, 1 + tagCount.get(localName));
}
if (lastClosedTag != null) {
// an element was recently closed ...
if (lastClosedTag.equals(localName)) {
// ... and it's the same as the current one
isRepeatElement = true;
} else {
// ... but it's different from the current one, so discard it
tags.pop();
}
}
// if it's not the same element, add the new element and zero count to list
if (! isRepeatElement) {
tags.push(localName);
}
System.out.println(getCurrentXPath());
lastClosedTag = null;
}
#Override
public void endElement (String uri, String localName, String qName) throws SAXException {
// if two tags are closed in succession (without an intermediate opening tag),
// then the information about the deeper nested one is discarded
if (lastClosedTag != null) {
tags.pop();
}
lastClosedTag = localName;
}
public static void main (String[] args) throws Exception {
if (args.length < 1) {
System.err.println("Usage: SAXCreateXPath <file.xml>");
System.exit(1);
}
// Create a JAXP SAXParserFactory and configure it
SAXParserFactory spf = SAXParserFactory.newInstance();
spf.setNamespaceAware(true);
spf.setValidating(false);
// Create a JAXP SAXParser
SAXParser saxParser = spf.newSAXParser();
// Get the encapsulated SAX XMLReader
XMLReader xmlReader = saxParser.getXMLReader();
// Set the ContentHandler of the XMLReader
xmlReader.setContentHandler(new SAXCreateXPath());
String filename = args[0];
String path = new File(filename).getAbsolutePath();
if (File.separatorChar != '/') {
path = path.replace(File.separatorChar, '/');
}
if (!path.startsWith("/")) {
path = "/" + path;
}
// Tell the XMLReader to parse the XML document
xmlReader.parse("file:"+path);
}
}
I've been working on a little library to do just this, though for larger and more complex schemas, there are issues you will need to address on a case-by-case basis (e.g., filters for certain nodes). See https://stackoverflow.com/a/45020739/3096687 for a description of the solution.
There are a number of problems with such tools:
The XPath expression generated rarely is a good one. No such tool will produce meaningful predicates beyond position information.
There is no tool (to my knowledge) that would generate an XPath expression that selects exactly a set of selected nodes.
Apart from this, such tools used without learning XPath are really harmful -- they support ignorance.
I would recommend serious learning of XPath using books and other resources such as following.
https://stackoverflow.com/questions/339930/any-good-xslt-tutorial-book-blog-site-online/341589#341589
See the following answer for more information..
Is there an online tester for xPath selectors?