I need to edit .doc & .docx files header and maintain the style of the document.
I tried doing it by using:
poi api : I managed to read the file header but couldn't find how to replace a text in it and save the result with the original style .
public static void mFix(String iFilePath , HashMap<String, String> iOldNewCouples)
{
aOldNewCouples = iOldNewCouples;
try {
if(iFilePath==null)
return;
File file = new File(iFilePath);
FileInputStream fis=new FileInputStream(file.getAbsolutePath());
HWPFDocument document=new HWPFDocument(fis);
WordExtractor extractor = new WordExtractor(document); // read the doc as rtf
String fileData = extractor.getHeaderText();
String fileDataResult =fileData ;
for (Entry<String, String> entry : aOldNewCouples.entrySet())
{
if(fileData.contains(entry.getKey())) {
System.out.println("replace " +entry.getKey());
fileDataResult = fileData.replace(entry.getKey(), entry.getValue());
}
}
document.getHeaderStoryRange().replaceText(fileData, fileDataResult);
saveWord(iFilePath ,document);
fis.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace( );
}
}
private static void saveWord(String filePath, HWPFDocument doc) throws FileNotFoundException, IOException
{
FileOutputStream fileOutputStream = null;
try{
fileOutputStream = new FileOutputStream(new File(filePath.replace(".doc", "-test.doc")));
BufferedOutputStream buffOutputStream = new BufferedOutputStream(fileOutputStream);
doc.write(buffOutputStream);
buffOutputStream.close();
fileOutputStream.close();
}
finally{
if( fileOutputStream != null)
fileOutputStream.close();
}
}
I tried doc4j api for docx : I found how to edit the header but didn't found how to keep the style.
public static void mFix(String iFilePath , HashMap<String, String> iOldNewCouples) {
aOldNewCouples = iOldNewCouples;
WordprocessingMLPackage output;
try {
output = WordprocessingMLPackage.load(new java.io.File(iFilePath));
replaceText(output.getDocumentModel().getSections().get(0).getHeaderFooterPolicy().getDefaultHeader());
output.save(new File(iFilePath));
}
catch (Exception e) {
e.printStackTrace();
}
}
public static void replaceText(ContentAccessor c) throws Exception
{
for (Object p: c.getContent())
{
if (p instanceof ContentAccessor)
replaceText((ContentAccessor) p);
else if (p instanceof JAXBElement)
{
Object v = ((JAXBElement) p).getValue();
if (v instanceof ContentAccessor)
replaceText((ContentAccessor) v);
else if (v instanceof org.docx4j.wml.Text)
{
org.docx4j.wml.Text t = (org.docx4j.wml.Text) v;
String text = t.getValue();
if (text != null)
{
boolean flag = false;
for (Entry<String, String> entry : aOldNewCouples.entrySet())
{
if(text.contains(entry.getKey())) {
flag =true;
text = text.replaceAll(entry.getKey(), entry.getValue());
t.setSpace("preserve");
t.setValue(text);
}
}
}
}
}
}
}
I would like to have examples for those api.
If there is other free solution for this for Java projects , please write them with example.
thanks
Tami
Related
I'm working on a JAVA web application, I need to merge multiple docx files into one docx file using JAVA.
the method takes a list of File as parameter, the output is a single docx file that contains all data concatenated from Files in input.
I tried this code but it did not work for me:
public File mergeInOneFile(List<File> files) throws IOException, InvalidFormatException, XmlException {
List<CTBody> sourceBody = new ArrayList<>();
for (File file : files) {
OPCPackage srcFile = OPCPackage.open(file);
XWPFDocument srcDocument = new XWPFDocument(srcFile);
CTBody srcBody = srcDocument.getDocument().getBody();
sourceBody.add(srcBody);
}
CTBody source = sourceBody.get(0);
sourceBody.remove(0);
while (sourceBody.size() != 0){
appendBody(source, sourceBody.get(0));
sourceBody.remove(0);
}
return (File) source;
}
private static void appendBody(CTBody src, CTBody append) throws XmlException {
XmlOptions optionsOuter = new XmlOptions();
optionsOuter.setSaveOuter();
String appendString = append.xmlText(optionsOuter);
String srcString = src.xmlText();
String prefix = srcString.substring(0,srcString.indexOf(">")+1);
String mainPart = srcString.substring(srcString.indexOf(">")+1,srcString.lastIndexOf("<"));
String suffix = srcString.substring( srcString.lastIndexOf("<") );
String addPart = appendString.substring(appendString.indexOf(">") + 1, appendString.lastIndexOf("<"));
CTBody makeBody = CTBody.Factory.parse(prefix+mainPart+addPart+suffix);
src.set(makeBody);
}
}
This method did not acheive my purpose. Any other suggestions ?
The error is clearly because of this line: return (File) source.
source is a CTBody type and you cast it to File while they are not related.
I have solved the issue of merging a list of docx files in one file, this is my code :
public File merge(final List<File> files) throws IoAppException, OtherAppException {
Path outPath = this.fileStorageService.getPath()
.resolve(UUID.randomUUID().toString() + Directory.DOCX_EXTENSION);
File outFile = outPath.toFile();
try (OutputStream os = new FileOutputStream(outFile);
InputStream is = new FileInputStream(files.get(0));
XWPFDocument doc = new XWPFDocument(is);) {
CTBody ctBody = doc.getDocument().getBody();
for (int i = 1; i < files.size(); i++) {
try (InputStream isI = new FileInputStream(files.get(i)); XWPFDocument docI = new XWPFDocument(isI);) {
CTBody ctBodyI = docI.getDocument().getBody();
appendBody(ctBody, ctBodyI);
} catch (Exception e) {
e.printStackTrace();
throw new OtherAppException("", e);
}
}
doc.write(os);
return outFile;
} catch (FileNotFoundException e) {
e.printStackTrace();
throw new IoAppException("", e);
} catch (IOException e) {
e.printStackTrace();
throw new IoAppException("", e);
}
}
private static void appendBody(CTBody src, CTBody append) throws XmlException {
XmlOptions optionsOuter = new XmlOptions();
optionsOuter.setSaveOuter();
String appendString = append.xmlText(optionsOuter);
String srcString = src.xmlText();
String prefix = srcString.substring(0, srcString.indexOf(">") + 1);
String mainPart = srcString.substring(srcString.indexOf(">") + 1, srcString.lastIndexOf("<"));
String suffix = srcString.substring(srcString.lastIndexOf("<"));
String addPart = appendString.substring(appendString.indexOf(">") + 1, appendString.lastIndexOf("<"));
CTBody makeBody;
try {
makeBody = CTBody.Factory.parse(prefix + mainPart + addPart + suffix);
} catch (XmlException e) {
e.printStackTrace();
throw new XmlException("", e);
}
src.set(makeBody);
}
I had write a code by using Apache POI 3.6.
The code is use to insert paragraph ,table and image to one docx file, but as the paragraph, table and image is from different docx file, so I need to read content from different docx file then insert to the target file.
But I found the file is only readable at the first content insert, so is there any thing need to be changed?
My method defined like following:
public class DocDomGroupUtilImpl implements DocDomGroupUtil {
private FileInputStream fips;
private XWPFDocument document;
private FileOutputStream fops;
#Override
public void generateDomGroupFile(File templateFile, List<Item> items) {
// initial parameters
Map<String, String> parameters = new HashMap<String, String>();
for (Item item : items) {
parameters.put(item.getParaName(), item.getParaValue());
}
// get domGroup type
String templateFileName = templateFile.getName();
String type = templateFileName.substring(0, templateFileName.indexOf("."));
try {
fips = new FileInputStream(templateFile);
document = new XWPFDocument(fips);
// create tempt file for document domGroup, named like
// docDomGroup_<type>_<domName>.docx
String domGroupFilePath = CONSTAINTS.temptDomGroupPath + "docDomGroup_" + type + "_"
+ parameters.get("$DomGroupName_Value") + ".docx";
File domGroupFile = new File(domGroupFilePath);
if (domGroupFile.exists()) {
domGroupFile.delete();
}
domGroupFile.createNewFile();
fops = new FileOutputStream(domGroupFile, true);
// modified the groupName
// replace content
String regularExpression = "\\$(.*)_Value";
List<XWPFParagraph> paragraphs = document.getParagraphs();
for (XWPFParagraph paragraph : paragraphs) {
List<XWPFRun> runs = paragraph.getRuns();
if (runs != null) {
for (XWPFRun run : runs) {
String text = run.getText(0);
if (text != null && Pattern.matches(regularExpression, text)) {
text = parameters.get(text);
run.setText(text, 0);
}
}
}
}
document.write(fops);
close();
// copy all the information from dom related files
File dir = new File(CONSTAINTS.temptDomPath);
for (File file : dir.listFiles()) {
if (!file.isDirectory()) {
fops = new FileOutputStream(domGroupFile, true);
fips = new FileInputStream(file);
document = new XWPFDocument(fips);
document.write(fops);
}
close();
}
// clean up tempt dom folder removeAllDomFile();
removeAllDomFile();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
close();
}
}
/**
* remove all the generated tempt dom files
*/
private void removeAllDomFile() {
// loop directory and delete tempt file
File dir = new File(CONSTAINTS.temptDomPath);
for (File file : dir.listFiles())
if (!file.isDirectory()) {
file.delete();
}
}
private void close() {
try {
fips.close();
fops.flush();
fops.close();
document.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
I'd like to get all filenames of attachments/embedded files of a PDF document. I've been searching for a long time now, but my code still doesn't work.
What I tried:
File input = new File(inputfile); // Input File Path, Given as param from args[]
pd = PDDocument.load(input);
PDDocumentNameDictionary names = new PDDocumentNameDictionary(pd.getDocumentCatalog());
PDEmbeddedFilesNameTreeNode efTree = names.getEmbeddedFiles();
Map<String, COSObjectable> existedNames = efTree.getNames();
System.out.println(existedNames);//Print Embedded-Filenames to console
pd.close();
I don't know if it is even possible to print the content of a MAP to console. I'm coding in eclipse which doesn't give me any errors. But when I run the jar File I get always: NullPointerException at org.apache.pdfbox.pdmodel.PDDocument.getDocumentCatalog(PDDocument.java:778)
Any ideas or help? Many thanks...
Finally found a solution. For anyone with the same problem, the following code worked for me:
PDDocument pd;
File input = new File(inputfile); // Input File
pd = PDDocument.load(input);
//Writes all embedded Filenames (from pdf document) into Logfile
try{
PDDocumentCatalog catalog = pd.getDocumentCatalog();
PDDocumentNameDictionary names = catalog.getNames();
PDEmbeddedFilesNameTreeNode embeddedFiles = names.getEmbeddedFiles();
Map<String, COSObjectable> embeddedFileNames = embeddedFiles.getNames();
//For-Each Loop is used to list all embedded files (if there is more than one)
for (Map.Entry<String, COSObjectable> entry : embeddedFileNames.entrySet())
{
//You might need to configure the logger first
logger.info("Inputfile: " + inputfile +"Found embedded File: " + entry.getKey() + ":");
}
}
catch (Exception e){
System.out.println("Document has no attachments. ");
}
Here's the ExtractEmbeddedFiles example from the source code download:
public final class ExtractEmbeddedFiles
{
private ExtractEmbeddedFiles()
{
}
/**
* This is the main method.
*
* #param args The command line arguments.
*
* #throws IOException If there is an error parsing the document.
*/
public static void main( String[] args ) throws IOException
{
if( args.length != 1 )
{
usage();
System.exit(1);
}
else
{
PDDocument document = null;
try
{
File pdfFile = new File(args[0]);
String filePath = pdfFile.getParent() + System.getProperty("file.separator");
document = PDDocument.load(pdfFile );
PDDocumentNameDictionary namesDictionary =
new PDDocumentNameDictionary( document.getDocumentCatalog() );
PDEmbeddedFilesNameTreeNode efTree = namesDictionary.getEmbeddedFiles();
if (efTree != null)
{
Map<String, PDComplexFileSpecification> names = efTree.getNames();
if (names != null)
{
extractFiles(names, filePath);
}
else
{
List<PDNameTreeNode<PDComplexFileSpecification>> kids = efTree.getKids();
for (PDNameTreeNode<PDComplexFileSpecification> node : kids)
{
names = node.getNames();
extractFiles(names, filePath);
}
}
}
// extract files from annotations
for (PDPage page : document.getPages())
{
for (PDAnnotation annotation : page.getAnnotations())
{
if (annotation instanceof PDAnnotationFileAttachment)
{
PDAnnotationFileAttachment annotationFileAttachment = (PDAnnotationFileAttachment) annotation;
PDComplexFileSpecification fileSpec = (PDComplexFileSpecification) annotationFileAttachment.getFile();
PDEmbeddedFile embeddedFile = getEmbeddedFile(fileSpec);
extractFile(filePath, fileSpec.getFilename(), embeddedFile);
}
}
}
}
finally
{
if( document != null )
{
document.close();
}
}
}
}
private static void extractFiles(Map<String, PDComplexFileSpecification> names, String filePath)
throws IOException
{
for (Entry<String, PDComplexFileSpecification> entry : names.entrySet())
{
String filename = entry.getKey();
PDComplexFileSpecification fileSpec = entry.getValue();
PDEmbeddedFile embeddedFile = getEmbeddedFile(fileSpec);
extractFile(filePath, filename, embeddedFile);
}
}
private static void extractFile(String filePath, String filename, PDEmbeddedFile embeddedFile)
throws IOException
{
String embeddedFilename = filePath + filename;
File file = new File(filePath + filename);
System.out.println("Writing " + embeddedFilename);
FileOutputStream fos = null;
try
{
fos = new FileOutputStream(file);
fos.write(embeddedFile.toByteArray());
}
finally
{
IOUtils.closeQuietly(fos);
}
}
private static PDEmbeddedFile getEmbeddedFile(PDComplexFileSpecification fileSpec )
{
// search for the first available alternative of the embedded file
PDEmbeddedFile embeddedFile = null;
if (fileSpec != null)
{
embeddedFile = fileSpec.getEmbeddedFileUnicode();
if (embeddedFile == null)
{
embeddedFile = fileSpec.getEmbeddedFileDos();
}
if (embeddedFile == null)
{
embeddedFile = fileSpec.getEmbeddedFileMac();
}
if (embeddedFile == null)
{
embeddedFile = fileSpec.getEmbeddedFileUnix();
}
if (embeddedFile == null)
{
embeddedFile = fileSpec.getEmbeddedFile();
}
}
return embeddedFile;
}
/**
* This will print the usage for this program.
*/
private static void usage()
{
System.err.println( "Usage: java " + ExtractEmbeddedFiles.class.getName() + " <input-pdf>" );
}
}
I have large excel file with several worksheets.
I want to process just one sheet in file...Read value from two columns and update two columns.
Using this code, I am able to read data from sheet.But unable to figure out, how to save output back.
public class ExcelFunctions {
private class ExcelData implements SheetContentsHandler {
private Record rec ;
public void startRow(int rowNum) {
rec = new Record();
output.put("R"+rowNum, rec);
}
public void endRow(int rowNum) {
}
public void cell(String cellReference, String formattedValue,
XSSFComment comment) {
int thisCol = (new CellReference(cellReference)).getCol();
if(thisCol==7){
try {
rec.setK1(formattedValue);
} catch (Exception e) {
}
}
if(thisCol==8){
try {
rec.setK2(formattedValue);
} catch (Exception e) {
}
}
if(thisCol == 27){
String key = rec.full_key();
System.out.println(key);
///////Process Matched Key...get Data
//////Set value to column 27
}
if(thisCol == 28){
String key = rec.full_key();
System.out.println(key);
///////Process Matched Key...get Data
//////Set value to column 28
}
}
public void headerFooter(String text, boolean isHeader, String tagName) {
}
}
///////////////////////////////////////
private final OPCPackage xlsxPackage;
private final Map<String, Record> output;
public ExcelFunctions(OPCPackage pkg, Map<String, Record> output) {
this.xlsxPackage = pkg;
this.output = output;
}
public void processSheet(
StylesTable styles,
ReadOnlySharedStringsTable strings,
SheetContentsHandler sheetHandler,
InputStream sheetInputStream)
throws IOException, ParserConfigurationException, SAXException {
DataFormatter formatter = new DataFormatter();
InputSource sheetSource = new InputSource(sheetInputStream);
try {
XMLReader sheetParser = SAXHelper.newXMLReader();
ContentHandler handler = new XSSFSheetXMLHandler(
styles, null, strings, sheetHandler, formatter, false);
sheetParser.setContentHandler(handler);
sheetParser.parse(sheetSource);
} catch(ParserConfigurationException e) {
throw new RuntimeException("SAX parser appears to be broken - " + e.getMessage());
}
}
public void process()
throws IOException, OpenXML4JException, ParserConfigurationException, SAXException {
ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(this.xlsxPackage);
XSSFReader xssfReader = new XSSFReader(this.xlsxPackage);
StylesTable styles = xssfReader.getStylesTable();
XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
boolean found = false;
while (iter.hasNext() && !found) {
InputStream stream = iter.next();
String sheetName = iter.getSheetName();
if(sheetName.equals("All Notes") ){
processSheet(styles, strings, new ExcelData(), stream);
found = true;
}
stream.close();
}
}
#SuppressWarnings("unused")
public static void main(String[] args) throws Exception {
File xlsxFile = new File("C:\\Users\\admin\\Downloads\\Unique Name Macro\\big.xlsm");
if (!xlsxFile.exists()) {
System.err.println("Not found or not a file: " + xlsxFile.getPath());
return;
}
// The package open is instantaneous, as it should be.
OPCPackage p = OPCPackage.open(xlsxFile.getPath(), PackageAccess.READ_WRITE);
Map<String, Record> output = new HashMap<String, Record>();
ExcelFunctions xlFunctions = new ExcelFunctions(p, output);
xlFunctions.process();
p.close();
if (output != null){
for(Record rec : output.values()){
System.out.println(rec.full_key());
}
}
}
}
File is very large and I only want to use Event API.
I have successfully tested Using this code.
But this loads Whole file in memory(causing application to crash)...While I only need to edit One sheet.
public static void saveToExcel(String ofn, Map<String, Record> data) {
FileInputStream infile;
try {
infile = new FileInputStream(new File("C:\\Users\\admin\\Downloads\\Unique Name Macro\\big.xlsm"));
XSSFWorkbook workbook = new XSSFWorkbook (infile);
XSSFSheet sheet = workbook.getSheet("All Notes");
for(Record rec : output.values()){
Row dataRow = rec.getRow(rev.getRownum-1);
setCellValue(dataRow, 26, "SomeValue");
setCellValue(dataRow, 27, "SomeValue");
}
FileOutputStream out = new FileOutputStream(new File(ofn));
workbook.write(out);
infile.close();
out.close();
workbook.close();
}
catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
private static void setCellValue(Row row,int col, String value){
Cell c0 = row.getCell(col);
if (c0 == null){
c0 = row.createCell(col);
}
c0.setCellValue(value);
}
I don't think there is anything provided in POI out of the box which allows to do that.
Therefore you might be better off doing this by unzipping the XLSX/XLSM file (they are actually a bunch of xml-files inside a zip) and reading the xml-files as text-files or with a normal XML Parser so that you can easily write out the changed file again to produce the XLSX/XLSM file again.
I'm trying to extract the files in an .iso file I have. There are no errors and the console prints out all the files inside. However, none of the files get extracted. Any help is greatly appreciated!
I was able to find this link but I can't seem to integrate it in to my project.
Link to code example
class DownloadWorker extends SwingWorker<String, Object> {
private String flocation;
private JLabel out;
public DownloadWorker(String location, JLabel fout)
{
this.flocation = location;
this.out = fout;
}
#Override
public String doInBackground() {
//download here
out.setText("Beginning download. This may take a few minutes.");
try {
//ProgressBar/Install
System.out.println("FILELOCATION:\n----------");
System.out.println(flocation);
String URL_LOCATION = "http://www.futureretrogaming.tk/gamefiles/ProfessorPhys.iso";
String LOCAL_FILE = (flocation + "\\ProfessorPhys\\");
File localfile = new File(LOCAL_FILE);
if (localfile.exists()) {
System.out.println("Directory exists!");
}
else {
System.out.println("Directory doesn't exist! Creating...");
localfile.mkdir();
if (localfile.exists())
System.out.println("Directory created!");
}
System.out.println("LOCALFILE:\n-------");
System.out.println(LOCAL_FILE);
URL website = new URL(URL_LOCATION);
ReadableByteChannel rbc = Channels.newChannel(website.openStream());
FileOutputStream fos = new FileOutputStream(LOCAL_FILE+"\\ProfessorPhys.iso\\");
fos.getChannel().transferFrom(rbc, 0, Long.MAX_VALUE);
System.out.println("--------\nDone Downloading\n---------");
RandomAccessFile randomAccessFile = null;
ISevenZipInArchive inArchive = null;
try {
randomAccessFile = new RandomAccessFile(localfile +"\\ProfessorPhys.iso\\", "r");
inArchive = SevenZip.openInArchive(null, // autodetect archive type
new RandomAccessFileInStream(randomAccessFile));
// Getting simple interface of the archive inArchive
ISimpleInArchive simpleInArchive = inArchive.getSimpleInterface();
System.out.println(" Hash | Size | Filename");
System.out.println("----------+------------+---------");
for (ISimpleInArchiveItem item : simpleInArchive.getArchiveItems()) {
final int[] hash = new int[] { 0 };
if (!item.isFolder()) {
ExtractOperationResult result;
final long[] sizeArray = new long[1];
result = item.extractSlow(new ISequentialOutStream() {
public int write(byte[] data) throws SevenZipException {
hash[0] ^= Arrays.hashCode(data); // Consume data
sizeArray[0] += data.length;
return data.length; // Return amount of consumed data
}
});
if (result == ExtractOperationResult.OK) {
System.out.println(String.format("%9X | %10s | %s", //
hash[0], sizeArray[0], item.getPath()));
} else {
System.err.println("Error extracting item: " + result);
}
}
}
} catch (Exception e) {
System.err.println("Error occurs: " + e);
System.exit(1);
} finally {
if (inArchive != null) {
try {
inArchive.close();
} catch (SevenZipException e) {
System.err.println("Error closing archive: " + e);
}
}
if (randomAccessFile != null) {
try {
randomAccessFile.close();
} catch (IOException e) {
System.err.println("Error closing file: " + e);
}
}
}
} catch (Exception e) {
System.out.println(e);
}
return "done";
}
#Override
protected void done() {
//done
out.setText(out.getText() + "\n Operation Done");
}
}
Code I'm trying to implement:
public class ExtractorUtil {
private static Logger logger = Logger.getLogger(ExtractorUtil.class.getCanonicalName());
public static void extract(File file, String extractPath) throws Exception {
ISevenZipInArchive inArchive = null;
RandomAccessFile randomAccessFile = null;
randomAccessFile = new RandomAccessFile(file, "r");
inArchive = SevenZip.openInArchive(null, new RandomAccessFileInStream(randomAccessFile));
inArchive.extract(null, false, new MyExtractCallback(inArchive, extractPath));
if (inArchive != null) {
inArchive.close();
}
if (randomAccessFile != null) {
randomAccessFile.close();
}
}
public static class MyExtractCallback implements IArchiveExtractCallback {
private final ISevenZipInArchive inArchive;
private final String extractPath;
public MyExtractCallback(ISevenZipInArchive inArchive, String extractPath) {
this.inArchive = inArchive;
this.extractPath = extractPath;
}
#Override
public ISequentialOutStream getStream(final int index, ExtractAskMode extractAskMode) throws SevenZipException {
return new ISequentialOutStream() {
#Override
public int write(byte[] data) throws SevenZipException {
String filePath = inArchive.getStringProperty(index, PropID.PATH);
FileOutputStream fos = null;
try {
File dir = new File(extractPath);
File path = new File(extractPath + filePath);
if (!dir.exists()) {
dir.mkdirs();
}
if (!path.exists()) {
path.createNewFile();
}
fos = new FileOutputStream(path, true);
fos.write(data);
} catch (IOException e) {
logger.severe(e.getLocalizedMessage());
} finally {
try {
if (fos != null) {
fos.flush();
fos.close();
}
} catch (IOException e) {
logger.severe(e.getLocalizedMessage());
}
}
return data.length;
}
};
}