Creating a new header in docx4j - java

I have a maven project with docx4j. I have managed to successfully convert html file to docx. However I'm interested into inserting a header to the docx file.
In github docx4j there is a sample (link) which I used the it worked as expected, i.e.
Relationship relationship = createHeaderPart(wordMLPackage);
public static Relationship createHeaderPart(
WordprocessingMLPackage wordprocessingMLPackage)
throws Exception {
HeaderPart headerPart = new HeaderPart();
Relationship rel = wordprocessingMLPackage.getMainDocumentPart()
.addTargetPart(headerPart);
// After addTargetPart, so image can be added properly
headerPart.setJaxbElement(getHdr(wordprocessingMLPackage, headerPart));
return rel;
}
public static Hdr getHdr(WordprocessingMLPackage wordprocessingMLPackage,
Part sourcePart) throws Exception {
Hdr hdr = objectFactory.createHdr();
// I modified it for simplicity
P headerParagraph = docx.getMainDocumentPart().createParagraphOfText("hi there");
hdr.getContent().add(headerParagraph);
return hdr;
}
This is working as expected
However I'm interested into using dynamic content from html so I used:
public static Hdr getHdr(WordprocessingMLPackage wordprocessingMLPackage,
Part sourcePart) throws Exception {
Hdr hdr = objectFactory.createHdr();
String html = "<html><body><p>hi there</p></body></html>";
XHTMLImporter XHTMLImporter = new XHTMLImporterImpl(wordprocessingMLPackage);
hdr.getContent().add(XHTMLImporter.convert(html, null));
return hdr;
}
This doesn't work at all. Any ideas?

I just noticed that XHTMLImporter is creating a list of objects, i.e.
public static Hdr getHdr(WordprocessingMLPackage wordprocessingMLPackage,
Part sourcePart) throws Exception {
Hdr hdr = objectFactory.createHdr();
String html = "<html><body><p>hi there</p></body></html>";
XHTMLImporter XHTMLImporter = new XHTMLImporterImpl(wordprocessingMLPackage);
List<Object> list = XHTMLImporter.convert(html, null);
hdr.getContent().add(list.get(0));
return hdr;
}

Related

PlantUML graph generation in java Illegal acces error

I'm trying to make a simple graph using java but keep getting error
Code:
public class PlantUMLDemoMain {
public static void main(String[] args) throws Exception {
generateFromStringSource(new File("from-string.png"));
generateFromApi(new File("from-api.png"));
}
private static void generateFromApi(File file) throws IOException {
// 1. setup:
SequenceDiagramFactory f = new SequenceDiagramFactory();
SequenceDiagram diagram = f.createEmptyDiagram();
// 2. Build the diagram:
// "Bob -> Alice : hello"
// See net.sourceforge.plantuml.sequencediagram.command.CommandArrow#executeArg
Display bobD = Display.getWithNewlines("Bob");
Participant bobP = diagram.getOrCreateParticipant("Bob", bobD);
Display aliceD = Display.getWithNewlines("Alice");
Participant aliceP = diagram.getOrCreateParticipant("Alice", aliceD);
Display label = Display.getWithNewlines("hello");
ArrowConfiguration config = ArrowConfiguration.withDirectionNormal();
Message msg = new Message(bobP, aliceP, label, config, diagram.getNextMessageNumber());
checkState(null == diagram.addMessage(msg));
// 3. Output the diagram
// See net.sourceforge.plantuml.SourceStringReader#generateImage
diagram.makeDiagramReady();
checkState(1 == diagram.getNbImages());
try (OutputStream os = new FileOutputStream(file)) {
ImageData imageData = diagram.exportDiagram(os, 0, new FileFormatOption(FileFormat.PNG));
System.out.println("generateFromApi: " + diagram.getDescription().getDescription());
}
}
private static void generateFromStringSource(File file) throws IOException {
String source = "#startuml\n";
source += "Bob -> Alice : hello\n";
source += "#enduml\n";
StringBuffer stringBuffer = new StringBuffer();
SourceStringReader reader = new SourceStringReader(source);
// Write the first image to "png"
String desc = reader.generateImage(file);
// Return a null string if no generation
System.out.println("generateFromStringSource: " + desc);
}
}
Error: Exception in thread "main" java.lang.IllegalAccessError: class net.sourceforge.plantuml.png.PngIOMetadata (in unnamed module #0x9597028) cannot access class com.sun.imageio.plugins.png.PNGMetadata (in module java.desktop) because module java.desktop does not export com.sun.imageio.plugins.png to unnamed module #0x9597028
at net.sourceforge.plantuml.png.PngIOMetadata.writeWithMetadata(PngIOMetadata.java:60)
at net.sourceforge.plantuml.png.PngIO.write(PngIO.java:86)
at net.sourceforge.plantuml.png.PngIO.write(PngIO.java:80)
at net.sourceforge.plantuml.ugraphic.g2d.UGraphicG2d.writeImageTOBEMOVED(UGraphicG2d.java:219)
at net.sourceforge.plantuml.ugraphic.ImageBuilder.writeImageInternal(ImageBuilder.java:249)
at net.sourceforge.plantuml.ugraphic.ImageBuilder.writeImageTOBEMOVED(ImageBuilder.java:171)
at net.sourceforge.plantuml.sequencediagram.graphic.SequenceDiagramFileMakerPuma2.createOne(SequenceDiagramFileMakerPuma2.java:234)
at net.sourceforge.plantuml.sequencediagram.SequenceDiagram.exportDiagramInternal(SequenceDiagram.java:222)
at net.sourceforge.plantuml.UmlDiagram.exportDiagramNow(UmlDiagram.java:236)
at net.sourceforge.plantuml.AbstractPSystem.exportDiagram(AbstractPSystem.java:127)
at net.sourceforge.plantuml.SourceStringReader.generateImage(SourceStringReader.java:124)
at net.sourceforge.plantuml.SourceStringReader.generateImage(SourceStringReader.java:111)
at net.sourceforge.plantuml.SourceStringReader.generateImage(SourceStringReader.java:101)
at scr.graphviz.sk.PlantUMLDemoMain.generateFromStringSource(PlantUMLDemoMain.java:66)
at scr.graphviz.sk.PlantUMLDemoMain.main(PlantUMLDemoMain.java:23)
I found someone with similar problem and older version of plantuml worked for him. I have jar file of the older version but I'm not sure how to apply it. I tried inspecting the file and find out versions of libraries used and added maven dependencies for them but it didnt seem to work.
This is similar problem i mentioned https://github.com/plantuml/plantuml/issues/69

encoding issue after pdfbox

I want to extract text in PDF on Java, so I use pdfbox library. PDF file seems to have been written by hwp(korea word edit software) before it was converted to a PDF file.
This is my simple API.
#RestController
#RequiredArgsConstructor
public class QuestionController {
private final QuestionParseService questionParseService;
#GetMapping("/")
public ResponseEntity<?> parsePDF() throws IOException {
return ResponseEntity.ok(questionParseService.parsePDF());
}
}
#Service
public class QuestionParseService {
public String parsePDF() throws IOException {
File file = new File("filePath");
PDDocument document = PDDocument.load(file);
PDFTextStripper s = new PDFTextStripper();
String content = s.getText(document);
return content;
}
}
This is my PDF file PDF file
But, the API result of question 1 was


×
 

의 값은? [2점]
①  ②  ③  ④  ⑤ 
How can I get correctly encoded text?

Adding hyperlink to inner PDF files

I have to create a PDF file by adding two PDF files inside a generated PDF file as a tree structure using iText in Java.
I have to create bookmarks with PDF file names and add a hyperlink to the bookmark. When the bookmark is clicked, the respective PDF should be opened in that PDF file itself, not as a separate PDF.
PDFTREE
pdf1
pdf2
Such bookmarks are referred to as outline elements in the PDF specification (PDF 32000-1:2008, p.367):
The outline consists of a tree-structured hierarchy of outline items (sometimes called bookmarks), which serve as a visual table of contents to display the document’s structure to the user.
If you merge the documents with PdfMerger, the outlines are copied to the resulting PDF by default. However, you want a main-node per document and not a flat list of bookmarks. Since cloning and copying outlines in no trivial task, it is best to let iText handle this. Unfortunately, we have little direct control how outlines are being merged.
We can build a SpecialMerger as a wrapper around PdfMerger to extract the cloned outlines (first step) and get them into a hierarchical structure afterwards (second step). The outline of each merged PDF is temporarily stored in the outlineList together with the desired name of the main node and its reference (page number in the merged PDF). After all the PDFs are merged, we can attach the temporarily stored outlines back to the root-node.
public static class SpecialMerger {
private final PdfDocument outputPdf;
private final PdfMerger merger;
private final PdfOutline rootOutline;
private final List<DocumentOutline> outlineList = new ArrayList<>();
private int nextPageNr = 1;
public SpecialMerger(final PdfDocument outputPdf) {
if (outputPdf.getNumberOfPages() != 0) {
throw new IllegalArgumentException("PDF must be empty");
}
this.outputPdf = outputPdf;
this.merger = new PdfMerger(outputPdf, true, true);
this.rootOutline = outputPdf.getOutlines(false);
}
public void merge(PdfDocument from, int fromPage, int toPage, String filename) {
merger.merge(from, fromPage, toPage); // merge with normal PdfMerger
// extract and clone outline of merged document
final List<PdfOutline> children = new ArrayList<>(rootOutline.getAllChildren());
rootOutline.getAllChildren().clear(); // clear root outline
outlineList.add(new DocumentOutline(filename, nextPageNr, children));
nextPageNr = outputPdf.getNumberOfPages() + 1; // update next page number
}
public void writeOutline() {
outlineList.forEach(o -> {
final PdfOutline outline = rootOutline.addOutline(o.getName()); // bookmark with PDF name
outline.addDestination(PdfExplicitDestination.createFit(outputPdf.getPage(o.getPageNr())));
outline.setStyle(PdfOutline.FLAG_BOLD);
o.getChildern().forEach(outline::addOutline); // add all extracted child bookmarks
});
}
private static class DocumentOutline {
private final String name;
private final int pageNr;
private final List<PdfOutline> childern;
public DocumentOutline(final String pdfName, final int pageNr, final List<PdfOutline> childern) {
this.name = pdfName;
this.pageNr = pageNr;
this.childern = childern;
}
public String getName() {
return name;
}
public int getPageNr() {
return pageNr;
}
public List<PdfOutline> getChildern() {
return childern;
}
}
}
Now, we can use this custom merger to merge the PDFs and then add the outline with writeOutline:
public static void main(String[] args) throws IOException {
String filename1 = "pdf1.pdf";
String filename2 = "pdf2.pdf";
try (
PdfDocument generatedPdf = new PdfDocument(new PdfWriter("output.pdf"));
PdfDocument pdfDocument1 = new PdfDocument(new PdfReader(filename1));
PdfDocument pdfDocument2 = new PdfDocument(new PdfReader(filename2))
) {
final SpecialMerger merger = new SpecialMerger(generatedPdf);
merger.merge(pdfDocument1, 1, pdfDocument1.getNumberOfPages(), filename1);
merger.merge(pdfDocument2, 1, pdfDocument2.getNumberOfPages(), filename2);
merger.writeOutline();
}
}
The result looks like this (Preview and Adobe Acrobat Reader on macOS):
Another option is to make a portfolio by embedding the PDFs. However, this is not supported by all PDF viewers and most users are not accustomed to these portfolios.
public static void main(String[] args) throws IOException {
String filename1 = "pdf1.pdf";
String filename2 = "pdf2.pdf";
try (PdfDocument generatedPdf = new PdfDocument(new PdfWriter("portfolio.pdf"))) {
Document doc = new Document(generatedPdf);
doc.add(new Paragraph("This PDF contains embedded documents."));
doc.add(new Paragraph("Use a compatible PDF viewer if you cannot see them."));
PdfCollection collection = new PdfCollection();
collection.setView(PdfCollection.TILE);
generatedPdf.getCatalog().setCollection(collection);
addAttachment(generatedPdf, filename1, filename1);
addAttachment(generatedPdf, filename2, filename2);
}
}
private static void addAttachment(PdfDocument doc, String attachmentPath, String name) throws IOException {
PdfFileSpec fileSpec = PdfFileSpec.createEmbeddedFileSpec(doc, attachmentPath, name, name, null, null);
doc.addFileAttachment(name, fileSpec);
}
The result in Adobe Acrobat Reader on macOS:

how do i prevent downloading duplicate file for normanization

Sending the request to Server with the below xml for downloading
<?xml version="1.0" encoding="UTF-8"?>
<ResourceSet xmlns:v01"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<cycleTime>123</cycleTime>
<object>
<sourceUrl>http://10.34894.494/23.png</sourceUrl>
<accessUrl>http://10.126.45.72/cme/23.png</accessUrl>
<objectMetadata>
<headerName>Content-Length</headerName>
<headerName>E-Tag</headerName>
</objectMetadata>
</object>
<object>
<sourceUrl>http://10.84375.72/cme/23.png</sourceUrl>
<accessUrl>http://10.4575.572/cme/logo/23.png</accessUrl>
<objectMetadata>
<headerName>Content-Length</headerName>
<headerName>E-Tag</headerName>
</objectMetadata>
</object>
</ResourceSet>
There are 2 objects and which has same source URL and different Access URl .
My job is to download the image only once because source URL’s are duplicated .
Am iterating through the objects , but how I will know two objects has same source URL to download ?
There are 2 objects and which has same source URL and different Access URl .
My job is to download the image only once because source URL’s are duplicated .
Am iterating through the objects , but how I will know two objects has same source URL to download ?
public void download_resourceset_object_urls_images_to_local() throws Throwable {
List<String> sourceURis = GFDUtils.getSourceOrAccessURLs(xmlPath + xmlFileName, "sourceUrl");
dwInfoList = new HashMap<String, DownloadFileInfo>();
NSAUtils.removeFiles(ConfigLoader.DOWNLOAD_DIR);
boolean flag = HTTPClientFileDownload.downloadFile(sourceURis, ConfigLoader.DOWNLOAD_DIR, dwInfoList);
if (flag == true) {
logger.info("All URL files / images are downloaded successfully....");
} else
throw new GenericException("Files are not available / Failed download ");
}
here am iterating Xml and getting the Source URL to download
public static List<String> getSourceOrAccessURLs(String xmlPath, String urlname) throws IOException {
XStream xs = new XStream();
boolean flag = XMLValidation.validateXMLSchema(xmlPath);
File file = new File(xmlPath);
String xml = FileUtils.readFileToString(file);
if (flag == true) {
List<String> urls = new ArrayList<>();
try {
xs.processAnnotations(Resourceset.class);
Resourceset rs = (Resourceset) xs.fromXML(xml);
List<ResourcesetObject> rsoOject = rs.getResourcesetObject();
for (ResourcesetObject resourcesetObject : rsoOject) {
if (urlname.equals("sourceUrl")) {
urls.add(resourcesetObject.getSourceUrl());
} else {
urls.add(resourcesetObject.getAccessUrl());
}
}
} catch (Exception e) {
e.printStackTrace();
}
return urls;
}
return null;
}
This URL Am passing for downloading.
Please help
Thanks,

How to generate multiple, slightly different XSD schemas from one Java model with JAXB?

I have a set of related Java classes, which are able to hold data I need. Below is a simplified class diagram of what I have:
Now I need to import data from XML and for that I want to generate XSD schema. The problem is that I want several XSD schemas like this:
One that allows the whole data graph to be imported.
One that allows only RootNote.fieldA and ChildNodeA.
One that allows only RootNote.fieldB and ChildNodeB.
I can easily generate XSD that meets the requirements of nr.1 using JAXB (programmatically). But is there a way to do that for cases nr.2 and nr.3 for the same classes? In other words, it seems I need something like "profiles" in JAXB.
Update:
Here is how I generate XSD schema:
JAXBContext jc = JAXBContext.newInstance(RootNode.class);
final File baseDir = new File(".");
class MySchemaOutputResolver extends SchemaOutputResolver {
public Result createOutput( String namespaceUri, String suggestedFileName ) throws IOException {
return new StreamResult(new File(baseDir,suggestedFileName));
}
}
jc.generateSchema(new MySchemaOutputResolver());
This is not a full answer, just an idea.
You probably use the javax.xml.bind.JAXBContext.generateSchema(SchemaOutputResolver) method to generate your schema, so you basically use a specific JAXBContext instance. This instance is built based on the annotations in classes. When building the context, these annotations are read an organized into a model which is then used for all the operations.
So to generate different schemas you probably need to create different contexts. You can't change the annotations per case, but you can read annotations in different ways.
Take a look at the AnnotationReader. This is what JAXB RI uses behind the scenes to load annotations from Java classes. You can create your own implementation and use it when creating the JAXBContext. Here's an example of something similar:
final AnnotationReader<Type, Class, Field, Method> annotationReader = new AnnoxAnnotationReader();
final Map<String, Object> properties = new HashMap<String, Object>();
properties.put(JAXBRIContext.ANNOTATION_READER, annotationReader);
final JAXBContext context = JAXBContext.newInstance(
"org.jvnet.annox.samples.po",
Thread.currentThread().getContextClassLoader(),
properties);
So how about writing your own annotation reader, which would consider what you call "profiles"? You can invent your own annotation #XmlSchemaProfile(name="foo"). Your annotation reader would then check if this annotation is present with the desired value and then either return it or ignore it. You'll be able to build different contexts from the same Java model - and consequently produce different schemas according to profiles defined by your #XmlSchemaProfile annotations.
I found a solution that suited me. The idea is to output the result of XSD generation into an XML Document (in-memory DOM). JAXB allows that. After this, you can manipulate the document any way you wish, adding or removing parts.
I wrote some filters that whitelist or blacklist fields (in XSD they are elements) and classes (in XSD they are complex types). While I see a lot of potential problems with this approach, it did the job in my case. Below is the code for case 2 schema:
// This SchemaOutputResolver implementation saves XSD into DOM
static class DOMResultSchemaOutputResolver extends SchemaOutputResolver {
private List<DOMResult> results = new LinkedList<DOMResult>();
#Override
public Result createOutput(String ns, String file) throws IOException {
DOMResult result = new DOMResult();
result.setSystemId(file);
results.add(result);
return result;
}
public Document getDocument() {
return (Document)results.get(0).getNode();
}
public String getFilename() {
return results.get(0).getSystemId();
}
}
// This method serializes the DOM into file
protected void serializeXsdToFile(Document xsdDocument, String filename) throws IOException {
OutputFormat format = new OutputFormat(xsdDocument);
format.setIndenting(true);
FileOutputStream os = new FileOutputStream(filename);
XMLSerializer serializer = new XMLSerializer(os, format);
serializer.serialize(xsdDocument);
}
#Test
public void generateSchema2() throws JAXBException, IOException, XPathExpressionException {
JAXBContext context = JAXBContext.newInstance(RootNode.class);
DOMResultSchemaOutputResolver schemaOutputResolver = new DOMResultSchemaOutputResolver();
context.generateSchema(schemaOutputResolver);
// Do your manipulations here as you want. Below is just an example!
filterXsdDocumentComplexTypes(schemaOutputResolver.getDocument(), asList("childNodeA"), true);
filterXsdDocumentElements(schemaOutputResolver.getDocument(), asList("fieldB"));
serializeXsdToFile(schemaOutputResolver.getDocument(), "xf.xsd");
}
private boolean shouldComplexTypeBeDeleted(String complexTypeName, List<String> complexTypes, boolean whitelist) {
return (whitelist && !complexTypes.contains(complexTypeName)) || (!whitelist && complexTypes.contains(complexTypeName));
}
protected void filterXsdDocumentComplexTypes(Document xsdDocument, List<String> complexTypes, boolean whitelist) throws XPathExpressionException {
XPath xPath = XPathFactory.newInstance().newXPath();
NodeList complexTypeNodes = (NodeList)xPath.evaluate("//*[local-name() = 'complexType']", xsdDocument, XPathConstants.NODESET);
for (int i = 0; i < complexTypeNodes.getLength(); i++) {
Node node = complexTypeNodes.item(i);
Node complexTypeNameNode = node.getAttributes().getNamedItem("name");
if (complexTypeNameNode != null) {
if (shouldComplexTypeBeDeleted(complexTypeNameNode.getNodeValue(), complexTypes, whitelist)) {
node.getParentNode().removeChild(node);
}
}
}
NodeList elements = (NodeList)xPath.evaluate("//*[local-name() = 'element']", xsdDocument, XPathConstants.NODESET);
for (int i = 0; i < elements.getLength(); i++) {
Node node = elements.item(i);
Node typeNameNode = node.getAttributes().getNamedItem("type");
if (typeNameNode != null) {
if (shouldComplexTypeBeDeleted(typeNameNode.getNodeValue(), complexTypes, whitelist) && !typeNameNode.getNodeValue().startsWith("xs")) {
node.getParentNode().removeChild(node);
}
}
}
}
protected void filterXsdDocumentElements(Document xsdDocument, List<String> blacklistedElements) throws XPathExpressionException {
XPath xPath = XPathFactory.newInstance().newXPath();
NodeList elements = (NodeList)xPath.evaluate("//*[local-name() = 'element']", xsdDocument, XPathConstants.NODESET);
for (int i = 0; i < elements.getLength(); i++) {
Node node = elements.item(i);
if (blacklistedElements.contains(node.getAttributes().getNamedItem("name").getNodeValue())) {
node.getParentNode().removeChild(node);
}
}
}

Categories

Resources