Java Apache POI newlines randomly inserted into generated page? - java

I am working on a project for fun that handles creating my custom letterhead. I'm using Apache POI to handle word documents. I plan on expanding it once I have the base framework done to add a GUI using AWT and allowing for customization through it, which explains how I have some things set up in code. I am getting some really bizarre results when trying to format my header, it appears that Apache POI is inserting newlines where it wants to? I think I am not understanding something.
CreateDocument.java
package letterHeader;
import java.io.File;
import java.io.FileOutputStream;
import java.math.BigInteger;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.poi.xwpf.usermodel.XWPFRun;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPageMar;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSectPr;
public class CreateDocument
{
// create blank document
static XWPFDocument document = new XWPFDocument();
// create paragraphs for header
static XWPFParagraph name = document.createParagraph();
static XWPFParagraph address = document.createParagraph();
static ArrayList<XWPFParagraph> phoneNumbers = new ArrayList<XWPFParagraph>();
static ArrayList<XWPFParagraph> emails = new ArrayList<XWPFParagraph>();
static XWPFParagraph date = document.createParagraph();
// create runner objects
static XWPFRun nameRunner = name.createRun();
static XWPFRun addressRunner = address.createRun();
// remember to make a runner for each email
static ArrayList<XWPFRun> phoneRunners = new ArrayList<XWPFRun>();
static ArrayList<XWPFRun> emailRunners = new ArrayList<XWPFRun>();
public static void main(String[] args) throws Exception
{
// make datetime for timestamp
DateFormat dateFormat = new SimpleDateFormat("MM:dd:yyyy");
Date date = new Date();
// create IO stream with document name
FileOutputStream out = new FileOutputStream( new File("letterhead" + dateFormat.format(date) + ".docx"));
CTSectPr sectPr = document.getDocument().getBody().addNewSectPr();
CTPageMar pageMar = sectPr.addNewPgMar();
pageMar.setLeft(BigInteger.valueOf(720L));
pageMar.setTop(BigInteger.valueOf(720L));
pageMar.setRight(BigInteger.valueOf(720L));
pageMar.setBottom(BigInteger.valueOf(720L));
// storing the ID automatically makes the objects
int phoneID = addListParagraph("phone");
int emailID1 = addListParagraph("email");
int emailID2 = addListParagraph("email");
// make name
nameRunner.setText("Michael Simanski");
nameRunner.setBold(true);
nameRunner.setFontSize(18);
nameRunner.setFontFamily("Times");
// make address
addressRunner.setText("address");
addressRunner.setFontSize(12);
addressRunner.setFontFamily("Times");
// make phone
phoneRunners.get(phoneID).setText("phone");
phoneRunners.get(phoneID).setFontSize(12);
phoneRunners.get(phoneID).setFontFamily("Times");
// make emails
emailRunners.get(emailID1).setText("mfsimanski#gmail.com");
emailRunners.get(emailID1).setFontSize(12);
emailRunners.get(emailID1).setFontFamily("Times");
emailRunners.get(emailID2).setText("secondemail");
emailRunners.get(emailID2).setFontSize(12);
emailRunners.get(emailID2).setFontFamily("Times");
emailRunners.get(emailID2).addCarriageReturn();
document.write(out);
out.close();
}
public static int addListParagraph(String type)
{
switch (type)
{
case "phone":
phoneNumbers.add(document.createParagraph());
phoneRunners.add(phoneNumbers.get(phoneNumbers.size() - 1).createRun());
return phoneNumbers.size() - 1;
case "email":
emails.add(document.createParagraph());
emailRunners.add(emails.get(emails.size() - 1).createRun());
return emails.size() - 1;
default:
System.out.println("ERROR: Paragraph type not found!");
return 0;
}
}
}
Pretty simple right? I expect the following results:
letterhead07/08/2019.docx
Michael Simanski
address
phone
mfsimanski#gmail.com
secondemail
But perplexingly I get:
letterhead07/08/2019.docx
Michael Simanski
address
phone
mfsimanski#gmail.com
secondemail
Am I missing something, stupid, or both?

Related

com.itextpdf.kernel.PdfException: There is no associate PdfWriter for making indirects

I keep getting this error in some pdf file. It works perfectly from some pdf while fails and give error on other pdfs.
Jar used:
forms-7.1.4.jar
io-7.1.4.jar
layout-7.1.4.jar
kernel-7.1.4.jar
package test;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.PrintWriter;
import java.nio.file.Files;
import java.util.*;
import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfName;
import com.itextpdf.kernel.pdf.PdfReader;
import com.itextpdf.forms.PdfAcroForm;
public class test5 {
public static final String DATASHEET
= "2.pdf";
public static void main(String[] args) throws Exception {
PdfReader reader = new PdfReader(DATASHEET);
PdfDocument pdfDoc = new PdfDocument(reader);
PdfAcroForm form = PdfAcroForm.getAcroForm(pdfDoc, true);
Set<String> fields = form.getFormFields().keySet();
for (String key : fields) {
PdfName type = form.getField(key).getFormType();
if(type!= null && 0 == PdfName.Btn.compareTo(type) )
{
String[] states = form.getField(key).getAppearanceStates();
for (int i = 0; i < states.length; i++) {
System.out.println(states[i]);
}
}
}
}
}
PDF FILE
This program finds the radio button values in the pdf
You open the PdfDocument with only a PdfReader, no PdfWriter:
PdfDocument pdfDoc = new PdfDocument(reader);
Thus, you cannot (deeply) change the document. On the other hand you retrieve the AcroForm with the second argument true:
PdfAcroForm form = PdfAcroForm.getAcroForm(pdfDoc, true);
This signals to iText that you want it to add a new AcroForm structure to the document if it does not have one yet. This is a deep change.
Thus, your code works for pdfs that already have an AcroForm structure and fail for pdfs that don't.
So either use a writable PdfDocument (with also a PdfWriter) or don't tell iText to create AcroForm structures (with a false parameter). For the latter option you may have to add a null check.

Using PdfCleanUpTool or PdfAutoSweep causes some text to change to bold, line weights increase and double points change to hearts

I have weird problem when I try to use iText 7. Some parts of the PDF are modified (text to change to bold, line weights increase and double points change to hearts). In iText version 5.4.4 this didn't happened, but every version since that I have tried cause this same problem (5 or 7).
Does anyone have a clued why this is happening and is there anything I could to do to bypass this problem? Any help would be appreciated!
If more information is needed, I will try to provide it.
Below is simple code that I used to test iText 7.
Example PDF Files
package javaapplication1;
import com.itextpdf.kernel.colors.ColorConstants;
import com.itextpdf.kernel.geom.Rectangle;
import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfReader;
import com.itextpdf.kernel.pdf.PdfWriter;
import com.itextpdf.pdfcleanup.PdfCleanUpLocation;
import com.itextpdf.pdfcleanup.PdfCleanUpTool;
import com.itextpdf.pdfcleanup.autosweep.ICleanupStrategy;
import com.itextpdf.pdfcleanup.autosweep.PdfAutoSweep;
import com.itextpdf.pdfcleanup.autosweep.RegexBasedCleanupStrategy;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;
public class JavaApplication1 {
public static final String DEST = "D:/TEMP/TEMP/PDF/result/orientation_result.pdf";
public static final String DEST2 = "D:/TEMP/TEMP/PDF/result/orientation_result2.pdf";
public static final String DEST3 = "D:/TEMP/TEMP/PDF/result/orientation_result3.pdf";
public static final String SRC = "D:/TEMP/TEMP/PDF/TEST_PDF.pdf";
public static void main(String[] args) throws IOException {
File file = new File(DEST);
file.getParentFile().mkdirs();
new JavaApplication1().manipulatePdf(DEST);
new JavaApplication1().manipulatePdf2(DEST2);
try (PdfDocument pdf = new PdfDocument(new PdfReader(SRC), new PdfWriter(DEST3))) {
final ICleanupStrategy cleanupStrategy = new RegexBasedCleanupStrategy(Pattern.compile("2019", Pattern.CASE_INSENSITIVE)).setRedactionColor(ColorConstants.PINK);
final PdfAutoSweep autoSweep = new PdfAutoSweep(cleanupStrategy);
autoSweep.cleanUp(pdf);
} catch (Exception e) {
System.out.println(e.toString());
}
}
protected void manipulatePdf(String dest) throws IOException {
PdfDocument pdfDoc = new PdfDocument(new PdfReader(SRC), new PdfWriter(dest));
List<PdfCleanUpLocation> cleanUpLocations = new ArrayList<PdfCleanUpLocation>();
// The arguments of the PdfCleanUpLocation constructor: the number of page to be cleaned up,
// a Rectangle defining the area on the page we want to clean up,
// a color which will be used while filling the cleaned area.
PdfCleanUpLocation location = new PdfCleanUpLocation(1, new Rectangle(97, 405, 383, 40),
ColorConstants.GRAY);
cleanUpLocations.add(location);
PdfCleanUpTool cleaner = new PdfCleanUpTool(pdfDoc, cleanUpLocations);
cleaner.cleanUp();
pdfDoc.close();
}
protected void manipulatePdf2(String dest) throws IOException {
PdfDocument pdfDoc = new PdfDocument(new PdfReader(SRC), new PdfWriter(dest));
// If the second argument is true, then regions to be erased are extracted from the redact annotations
// contained inside the given document. If the second argument is false (that's default behavior),
// then use PdfCleanUpTool.addCleanupLocation(PdfCleanUpLocation)
// method to set regions to be erased from the document.
PdfCleanUpTool cleaner = new PdfCleanUpTool(pdfDoc, true);
cleaner.cleanUp();
pdfDoc.close();
}
}
Sorry for late response. I managed now verify that updating to latest versions corrected this problem. Thanks to #mkl pointing this out.
Problem solved

Insert piece of .doc .docx file to another by using the Apache POI HWPF or XWPF

Can somebody help me to integrate some MS Word document to another.
I can open, edit and save, but only with one MS Word document.
My simple code only creates, edits and saves .docx:
import java.io.FileOutputStream;
import org.apache.poi.xwpf.usermodel.*;
public class SimpleDocument {
public void SimpleDocument() throws Exception {
XWPFDocument doc = new XWPFDocument();
XWPFParagraph p1 = doc.createParagraph();
p1.setAlignment(ParagraphAlignment.CENTER);
p1.setAlignment(ParagraphAlignment.LEFT);//setVerticalAlignment(TextAlignment.TOP);
XWPFRun r1 = p1.createRun();
r1.setBold(true);
r1.setText("The quick brown fox");
r1.setFontFamily("Courier");
r1.setUnderline(UnderlinePatterns.DOT_DOT_DASH);
XWPFParagraph p2 = doc.createParagraph();
p2.setAlignment(ParagraphAlignment.RIGHT);
XWPFRun r2 = p2.createRun();
r2.setText("jumped over the lazy dog");
FileOutputStream out = new FileOutputStream("C:/simple.docx");
doc.write(out);
out.close();
}
}
How to combine two pieces of formatted text (RANGE, PARAGRAPH) ?
try the following code:
import java.io.*;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.usermodel.*;
public class test {
public static void main(String[] args) throws Exception {
// POI apparently can't create a document from scratch,
// so we need an existing empty dummy document
HWPFDocument doc = new HWPFDocument(new FileInputStream("D:\\src.doc"));
Range range = doc.getRange();
CharacterRun run = range
.insertAfter("Text After copied file contents!");
run.setBold(true);
OutputStream out = new FileOutputStream("D:\\result.doc");
doc.write(out);
out.flush();
out.close();
}
}

Searching for UUID in lucene not working

I've got a UUID field I'm adding to my document in the following format: 372d325c-e01b-432f-98bd-bc4c949f15b8. However, when I try to query for documents by the UUID it will not return them no matter how I try to escape the expression. For example:
+uuid:372d325c-e01b-432f-98bd-bc4c949f15b8
+uuid:"372d325c-e01b-432f-98bd-bc4c949f15b8"
+uuid:372d325c\-e01b\-432f\-98bd\-bc4c949f15b8
+uuid:(372d325c-e01b-432f-98bd-bc4c949f15b8)
+uuid:("372d325c-e01b-432f-98bd-bc4c949f15b8")
And even skipping the QueryParser altogether using TermQuery like so:
new TermQuery(new Term("uuid", uuid.toString()))
Or
new TermQuery(new Term("uuid", QueryParser.escape(uuid.toString())))
None of these searches will return a document, but if I search for portions of the UUID it will return a document. For example these will return something:
+uuid:372d325c
+uuid:e01b
+uuid:432f
What should I do to index these documents so I can pull them back by their UUID? I've considered reformatting the UUID to remove the hyphens, but I haven't implemented it yet.
The only way I got this to work is to use WhitespaceAnalyzer instead of StandardAnalyzer. Then using a TermQuery like so:
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36, new WhitespaceAnalyzer(Version.LUCENE_36))
.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
writer = new IndexWriter( directory, config);
Then searching:
TopDocs docs = searcher.search(new TermQuery(new Term("uuid", uuid.toString())), 1);
WhitespaceAnalyzer prevented Lucene from splitting apart the UUID by the hyphens. Another option could be to eliminate the dashes from the UUID, but using the WhitespaceAnalyzer works just as well for my purposes.
According to the Lucene Query Syntax rules, the query
+uuid:372d325c\-e01b\-432f\-98bd\-bc4c949f15b8
should work.
I guess that if it don't, that is because the uuid field is not populated as it should when the document is inserted in the index. Could you make sure of what exactly is inserted for this field? You can use Luke to crawl the index and look for the actual values stored for the uuid field.
If you plan to a UUID field as a lookup key, you will need to ask Lucene to index the whole field as a single string without doing tokenization. This is done by setting the right FieldType for your UUID field. In Lucene 4+, you can use StringField.
import java.io.IOException;
import java.util.UUID;
import junit.framework.Assert;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
import org.junit.Test;
/**
* Using Lucene 4.7 on Java 7.
*/
public class LuceneUUIDFieldLookupTest {
private Directory directory;
private Analyzer analyzer;
#Test
public void testUsingUUIDAsLookupKey() throws IOException, ParseException {
directory = new RAMDirectory();
analyzer = new StandardAnalyzer(Version.LUCENE_47);
UUID docUUID = UUID.randomUUID();
String docContentText1 = "Stack Overflow is a question and answer site for professional and enthusiast programmers.";
index(docUUID, docContentText1);
QueryParser parser = new QueryParser(Version.LUCENE_47, MyIndexedFields.DOC_TEXT_FIELD.name(), analyzer);
Query queryForProgrammer = parser.parse("programmers");
IndexSearcher indexSearcher = getIndexSearcher();
TopDocs hits = indexSearcher.search(queryForProgrammer, Integer.MAX_VALUE);
Assert.assertTrue(hits.scoreDocs.length == 1);
Integer internalDocId1 = hits.scoreDocs[0].doc;
Document docRetrieved1 = indexSearcher.doc(internalDocId1);
indexSearcher.getIndexReader().close();
String docText1 = docRetrieved1.get(MyIndexedFields.DOC_TEXT_FIELD.name());
Assert.assertEquals(docText1, docContentText1);
String docContentText2 = "TechCrunch is a leading technology media property, dedicated to ... according to a new report from the Wall Street Journal confirmed by Google to TechCrunch.";
reindex(docUUID, docContentText2);
Query queryForTechCrunch = parser.parse("technology");
indexSearcher = getIndexSearcher(); //you must reopen directory because the previous IndexSearcher only sees a snapshoted directory.
hits = indexSearcher.search(queryForTechCrunch, Integer.MAX_VALUE);
Assert.assertTrue(hits.scoreDocs.length == 1);
Integer internalDocId2 = hits.scoreDocs[0].doc;
Document docRetrieved2 = indexSearcher.doc(internalDocId2);
indexSearcher.getIndexReader().close();
String docText2 = docRetrieved2.get(MyIndexedFields.DOC_TEXT_FIELD.name());
Assert.assertEquals(docText2, docContentText2);
}
private void reindex(UUID myUUID, String docContentText) throws IOException {
try (IndexWriter indexWriter = new IndexWriter(directory, getIndexWriterConfig())) {
Term term = new Term(MyIndexedFields.MY_UUID_FIELD.name(), myUUID.toString());
indexWriter.updateDocument(term, buildDoc(myUUID, docContentText));
}//auto-close
}
private void index(UUID myUUID, String docContentText) throws IOException {
try (IndexWriter indexWriter = new IndexWriter(directory, getIndexWriterConfig())) {
indexWriter.addDocument(buildDoc(myUUID, docContentText));
}//auto-close
}
private IndexWriterConfig getIndexWriterConfig() {
return new IndexWriterConfig(Version.LUCENE_47, analyzer).setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
}
private Document buildDoc(UUID myUUID, String docContentText) {
Document doc = new Document();
doc.add(new Field(
MyIndexedFields.MY_UUID_FIELD.name(),
myUUID.toString(),
StringField.TYPE_STORED));//use TYPE_STORED if you want to read it back in search result.
doc.add(new Field(
MyIndexedFields.DOC_TEXT_FIELD.name(),
docContentText,
TextField.TYPE_STORED));
return doc;
}
private IndexSearcher getIndexSearcher() throws IOException {
DirectoryReader ireader = DirectoryReader.open(directory);
IndexSearcher indexSearcher = new IndexSearcher(ireader);
return indexSearcher;
}
enum MyIndexedFields {
MY_UUID_FIELD,
DOC_TEXT_FIELD
}
}

Merging MS Word documents with Java

I'm looking for java libraries that read and write MS Word Document.
What I have to do is:
read a template file, .dot or .doc, and fill it with some data read from DB
take data from another Word document and merging that with the file described above, preserving paragraphs formats
users may make updates to the file.
I've searched and found POI Apache and UNO OpenOffice.
The first one can easily read a template and replace any placeholders with my own data from DB. I didn't found anything about merging two, or more, documents.
OpenOffice UNO looks more stable but complex too. Furthermore I'm not sure that it has the ability to merge documents..
We are looking the right direction?
Another solution i've thought was to convert doc file to docx. In that way I found more libraries that can help us merging documents.
But how can I do that?
Thanks!
You could take a look at Docmosis since it provides the four features you have mentioned (data population, template/document merging, DOC format and java interface). It has a couple of flavours (download, online service), but you could sign up for a free trial of the cloud service to see if Docmosis can do what you want (then you don't have to install anything) or read the online documentation.
It uses OpenOffice under the hood (you can see from the developer guide installation instructions) which does pretty decent conversions between documents. The UNO API has some complications - I would suggest either Docmosis or JODReports to isolate your project from UNO directly.
Hope that helps.
import java.io.File;
import java.util.List;
import javax.xml.bind.JAXBException;
import org.docx4j.dml.CTBlip;
import org.docx4j.openpackaging.exceptions.Docx4JException;
import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
import org.docx4j.openpackaging.parts.Part;
import org.docx4j.openpackaging.parts.PartName;
import org.docx4j.openpackaging.parts.WordprocessingML.ImageBmpPart;
import org.docx4j.openpackaging.parts.WordprocessingML.ImageEpsPart;
import org.docx4j.openpackaging.parts.WordprocessingML.ImageGifPart;
import org.docx4j.openpackaging.parts.WordprocessingML.ImageJpegPart;
import org.docx4j.openpackaging.parts.WordprocessingML.ImagePngPart;
import org.docx4j.openpackaging.parts.WordprocessingML.ImageTiffPart;
import org.docx4j.openpackaging.parts.relationships.RelationshipsPart;
import org.docx4j.openpackaging.parts.relationships.RelationshipsPart.AddPartBehaviour;
import org.docx4j.relationships.Relationship;
public class MultipleDocMerge {
public static void main(String[] args) throws Docx4JException, JAXBException {
File first = new File("D:\\Mreg.docx");
File second = new File("D:\\Mreg1.docx");
File third = new File("D:\\Mreg4&19.docx");
File fourth = new File("D:\\test12.docx");
WordprocessingMLPackage f = WordprocessingMLPackage.load(first);
WordprocessingMLPackage s = WordprocessingMLPackage.load(second);
WordprocessingMLPackage a = WordprocessingMLPackage.load(third);
WordprocessingMLPackage e = WordprocessingMLPackage.load(fourth);
List body = s.getMainDocumentPart().getJAXBNodesViaXPath("//w:body", false);
for(Object b : body){
List filhos = ((org.docx4j.wml.Body)b).getContent();
for(Object k : filhos)
f.getMainDocumentPart().addObject(k);
}
List body1 = a.getMainDocumentPart().getJAXBNodesViaXPath("//w:body", false);
for(Object b : body1){
List filhos = ((org.docx4j.wml.Body)b).getContent();
for(Object k : filhos)
f.getMainDocumentPart().addObject(k);
}
List body2 = e.getMainDocumentPart().getJAXBNodesViaXPath("//w:body", false);
for(Object b : body2){
List filhos = ((org.docx4j.wml.Body)b).getContent();
for(Object k : filhos)
f.getMainDocumentPart().addObject(k);
}
List<Object> blips = e.getMainDocumentPart().getJAXBNodesViaXPath("//a:blip", false);
for(Object el : blips){
try {
CTBlip blip = (CTBlip) el;
RelationshipsPart parts = e.getMainDocumentPart().getRelationshipsPart();
Relationship rel = parts.getRelationshipByID(blip.getEmbed());
Part part = parts.getPart(rel);
if(part instanceof ImagePngPart)
System.out.println(((ImagePngPart) part).getBytes());
if(part instanceof ImageJpegPart)
System.out.println(((ImageJpegPart) part).getBytes());
if(part instanceof ImageBmpPart)
System.out.println(((ImageBmpPart) part).getBytes());
if(part instanceof ImageGifPart)
System.out.println(((ImageGifPart) part).getBytes());
if(part instanceof ImageEpsPart)
System.out.println(((ImageEpsPart) part).getBytes());
if(part instanceof ImageTiffPart)
System.out.println(((ImageTiffPart) part).getBytes());
Relationship newrel = f.getMainDocumentPart().addTargetPart(part,AddPartBehaviour.RENAME_IF_NAME_EXISTS);
blip.setEmbed(newrel.getId());
f.getMainDocumentPart().addTargetPart(e.getParts().getParts().get(new PartName("/word/"+rel.getTarget())));
} catch (Exception ex){
ex.printStackTrace();
} }
File saved = new File("D:\\saved1.docx");
f.save(saved);
}
}
I've developed the next class (using Apache POI):
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.List;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBody;
public class WordMerge {
private final OutputStream result;
private final List<InputStream> inputs;
private XWPFDocument first;
public WordMerge(OutputStream result) {
this.result = result;
inputs = new ArrayList<>();
}
public void add(InputStream stream) throws Exception{
inputs.add(stream);
OPCPackage srcPackage = OPCPackage.open(stream);
XWPFDocument src1Document = new XWPFDocument(srcPackage);
if(inputs.size() == 1){
first = src1Document;
} else {
CTBody srcBody = src1Document.getDocument().getBody();
first.getDocument().addNewBody().set(srcBody);
}
}
public void doMerge() throws Exception{
first.write(result);
}
public void close() throws Exception{
result.flush();
result.close();
for (InputStream input : inputs) {
input.close();
}
}
}
And its use:
public static void main(String[] args) throws Exception {
FileOutputStream faos = new FileOutputStream("/home/victor/result.docx");
WordMerge wm = new WordMerge(faos);
wm.add( new FileInputStream("/home/victor/001.docx") );
wm.add( new FileInputStream("/home/victor/002.docx") );
wm.doMerge();
wm.close();
}
The Apache POI code does not work for Images.

Categories

Resources