I want to clone an Excel sheet and all of its contents. I tried the XSSFWorkbook cloneSheet method, but it seems the workbook is corrupted if my sheet contains an Excel table. See the examle workbook below with a simple table:
When I try to open the output workbook, I get a prompt telling me that the file is broken and needs to be repaired. If I recover the workbook, it is clear the table has not been copied correctly; the original totals row is now a data row.
try (InputStream is = Table.class.getResourceAsStream("table.xlsx")) {
XSSFWorkbook workbook = new XSSFWorkbook(is);
workbook.cloneSheet(0, "Test");
try (OutputStream fileOut = new FileOutputStream("table-2.xlsx")) {
workbook.write(fileOut);
}
} catch (IOException e) {
e.printStackTrace();
}
How I would go about copying this sheet? Any help is appreciated!
XSSFWorkbook.cloneSheet clones a sheet. But it does not considering the possible defined tables in it. It simply clones the table references. But two table-ranges in sheets cannot refer to the same table reference. The tables itself needs to be cloned. That's why the corrupted workbook as result.
I've tried to solve this by programming a method cloneTables(XSSFSheet sheet) which simply creates clones of each table in a sheet which then refer to their own table reference each. I consider table styles, auto-filter, a totals-row and calculated column formulas. I hope I have not overlooked something, but I doubt that.
The code its tested and works using current apache poi 5.2.2.
It contains fixes for following bugs too:
XSSFTable.updateHeaders fails in Excel workbooks created using current Excel versions. This is because of the test row.getCTRow().validate() which always will be false because of the usage of new name spaces. See Renaming headers of XSSFTable with Apache Poi leads to corrupt XLSX-file.
XSSFSheet.removeTable does not remove the links to the table part reference from the sheet.
Complete example to test:
import org.apache.poi.ss.usermodel.*;
import org.apache.poi.xssf.usermodel.*;
import org.apache.poi.ss.util.*;
import org.apache.poi.ss.SpreadsheetVersion;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTTableColumn;
class ExcelCloneSheetHavingTable {
static void updateHeaders(XSSFTable table) {
XSSFSheet sheet = (XSSFSheet)table.getParent();
CellReference ref = table.getStartCellReference();
if (ref == null) return;
int headerRow = ref.getRow();
int firstHeaderColumn = ref.getCol();
XSSFRow row = sheet.getRow(headerRow);
DataFormatter formatter = new DataFormatter();
if (row != null /*&& row.getCTRow().validate()*/) { // see bug: https://stackoverflow.com/questions/55532006/renaming-headers-of-xssftable-with-apache-poi-leads-to-corrupt-xlsx-file/55539181#55539181
int cellnum = firstHeaderColumn;
org.openxmlformats.schemas.spreadsheetml.x2006.main.CTTableColumns ctTableColumns = table.getCTTable().getTableColumns();
if(ctTableColumns != null) {
for (org.openxmlformats.schemas.spreadsheetml.x2006.main.CTTableColumn col : ctTableColumns.getTableColumnList()) {
XSSFCell cell = row.getCell(cellnum);
if (cell != null) {
String colName = formatter.formatCellValue(cell);
colName = colName.replace("\n", "_x000a_");
colName = colName.replace("\r", "_x000d_");
col.setName(colName);
}
cellnum++;
}
}
}
//tableColumns = null;
//columnMap = null;
//xmlColumnPrs = null;
//commonXPath = null;
try {
java.lang.reflect.Field tableColumns = XSSFTable.class.getDeclaredField("tableColumns");
tableColumns.setAccessible(true);
tableColumns.set(table, null);
java.lang.reflect.Field columnMap = XSSFTable.class.getDeclaredField("columnMap");
columnMap.setAccessible(true);
columnMap.set(table, null);
java.lang.reflect.Field xmlColumnPrs = XSSFTable.class.getDeclaredField("xmlColumnPrs");
xmlColumnPrs.setAccessible(true);
xmlColumnPrs.set(table, null);
java.lang.reflect.Field commonXPath = XSSFTable.class.getDeclaredField("commonXPath");
commonXPath.setAccessible(true);
commonXPath.set(table, null);
} catch (Exception ex) {
ex.printStackTrace();
}
}
static String getSubtotalFormulaStartFromTotalsRowFunction(int intTotalsRowFunction) {
final int INT_NONE = 1;
final int INT_SUM = 2;
final int INT_MIN = 3;
final int INT_MAX = 4;
final int INT_AVERAGE = 5;
final int INT_COUNT = 6;
final int INT_COUNT_NUMS = 7;
final int INT_STD_DEV = 8;
final int INT_VAR = 9;
final int INT_CUSTOM = 10;
String subtotalFormulaStart = null;
switch (intTotalsRowFunction) {
case INT_NONE:
subtotalFormulaStart = null;
break;
case INT_SUM:
subtotalFormulaStart = "SUBTOTAL(109";
break;
case INT_MIN:
subtotalFormulaStart = "SUBTOTAL(105";
break;
case INT_MAX:
subtotalFormulaStart = "SUBTOTAL(104";
break;
case INT_AVERAGE:
subtotalFormulaStart = "SUBTOTAL(101";
break;
case INT_COUNT:
subtotalFormulaStart = "SUBTOTAL(103";
break;
case INT_COUNT_NUMS:
subtotalFormulaStart = "SUBTOTAL(102";
break;
case INT_STD_DEV:
subtotalFormulaStart = "SUBTOTAL(107";
break;
case INT_VAR:
subtotalFormulaStart = "SUBTOTAL(110";
break;
case INT_CUSTOM:
subtotalFormulaStart = null;
break;
default:
subtotalFormulaStart = null;
}
return subtotalFormulaStart;
}
static void cloneTables(XSSFSheet sheet) {
for (XSSFTable table : sheet.getTables()) {
// clone table; XSSFTable.setArea fails and throws exception for too small tables
XSSFTable clonedTable = null;
int rowCount = (table.getArea().getLastCell().getRow() - table.getArea().getFirstCell().getRow()) + 1;
int headerRowCount = table.getHeaderRowCount(); if (headerRowCount == 0) headerRowCount = 1;
int minimumRowCount = 1 + headerRowCount + table.getTotalsRowCount();
if (rowCount >= minimumRowCount) {
clonedTable = sheet.createTable(table.getArea());
}
if (clonedTable != null) {
//clonedTable.updateHeaders(); // don't work, see bug: https://stackoverflow.com/questions/55532006/renaming-headers-of-xssftable-with-apache-poi-leads-to-corrupt-xlsx-file/55539181#55539181
updateHeaders(clonedTable);
// clone style
clonedTable.setStyleName(table.getStyleName());
XSSFTableStyleInfo style = (XSSFTableStyleInfo)table.getStyle();
XSSFTableStyleInfo clonedStyle = (XSSFTableStyleInfo)clonedTable.getStyle();
if (style != null && clonedStyle != null) {
clonedStyle.setShowColumnStripes(style.isShowColumnStripes());
clonedStyle.setShowRowStripes(style.isShowRowStripes());
clonedStyle.setFirstColumn(style.isShowFirstColumn());
clonedStyle.setLastColumn(style.isShowLastColumn());
}
//clone autofilter
clonedTable.getCTTable().setAutoFilter(table.getCTTable().getAutoFilter());
//clone totalsrow
int totalsRowCount = table.getTotalsRowCount();
if (totalsRowCount == 1) { // never seen more than one totals row
XSSFRow totalsRow = sheet.getRow(clonedTable.getEndCellReference().getRow());
if (clonedTable.getCTTable().getTableColumns().getTableColumnList().size() > 0) {
clonedTable.getCTTable().setTotalsRowCount(totalsRowCount);
for (int i = 0; i < clonedTable.getCTTable().getTableColumns().getTableColumnList().size(); i++) {
org.openxmlformats.schemas.spreadsheetml.x2006.main.CTTableColumn tableCol = table.getCTTable().getTableColumns().getTableColumnList().get(i);
org.openxmlformats.schemas.spreadsheetml.x2006.main.CTTableColumn clonedTableCol = clonedTable.getCTTable().getTableColumns().getTableColumnList().get(i);
clonedTableCol.setTotalsRowFunction(tableCol.getTotalsRowFunction());
int intTotalsRowFunction = clonedTableCol.getTotalsRowFunction().intValue();
sheet.getWorkbook().setCellFormulaValidation(false);
if (intTotalsRowFunction == 10) { //custom
org.openxmlformats.schemas.spreadsheetml.x2006.main.CTTableFormula totalsRowFormula = tableCol.getTotalsRowFormula();
clonedTableCol.setTotalsRowFormula(totalsRowFormula);
totalsRow.getCell(clonedTable.getStartCellReference().getCol()+i).setCellFormula(totalsRowFormula.getStringValue());
} else if (intTotalsRowFunction == 1) { //none
//totalsRow.getCell(clonedTable.getStartCellReference().getCol()+i).setBlank();
} else {
String subtotalFormulaStart = getSubtotalFormulaStartFromTotalsRowFunction(intTotalsRowFunction);
if (subtotalFormulaStart != null)
totalsRow.getCell(clonedTable.getStartCellReference().getCol()+i).setCellFormula(subtotalFormulaStart + "," + clonedTable.getName() +"[" + clonedTableCol.getName()+ "])");
}
}
}
}
// clone calculated column formulas
if (clonedTable.getCTTable().getTableColumns().getTableColumnList().size() > 0) {
clonedTable.getCTTable().setTotalsRowCount(totalsRowCount);
for (int i = 0; i < clonedTable.getCTTable().getTableColumns().getTableColumnList().size(); i++) {
org.openxmlformats.schemas.spreadsheetml.x2006.main.CTTableColumn tableCol = table.getCTTable().getTableColumns().getTableColumnList().get(i);
org.openxmlformats.schemas.spreadsheetml.x2006.main.CTTableColumn clonedTableCol = clonedTable.getCTTable().getTableColumns().getTableColumnList().get(i);
if (tableCol.getCalculatedColumnFormula() != null) {
clonedTableCol.setCalculatedColumnFormula(tableCol.getCalculatedColumnFormula());
org.openxmlformats.schemas.spreadsheetml.x2006.main.CTTableFormula calculatedColumnFormula = clonedTableCol.getCalculatedColumnFormula();
String formula = tableCol.getCalculatedColumnFormula().getStringValue();
String clonedFormula = formula.replace(table.getName(), clonedTable.getName());
calculatedColumnFormula.setStringValue(clonedFormula);
int rFirst = clonedTable.getStartCellReference().getRow() + clonedTable.getHeaderRowCount();
int rLast = clonedTable.getEndCellReference().getRow() - clonedTable.getTotalsRowCount();
int c = clonedTable.getStartCellReference().getCol() + i;
sheet.getWorkbook().setCellFormulaValidation(false);
for (int r = rFirst; r <= rLast; r++) {
XSSFRow row = sheet.getRow(r); if (row == null) row = sheet.createRow(r);
XSSFCell cell = row.getCell(c); if (cell == null) cell = row.createCell(c);
cell.setCellFormula(clonedFormula);
}
}
}
}
}
// remove old table; do that even if XSSFsheet.createTable failed, because a one-cell-table doesn't make any sense
String rId = sheet.getRelationId(table);
sheet.removeTable(table);
// remove links to the table part reference
org.openxmlformats.schemas.spreadsheetml.x2006.main.CTTableParts tblParts = sheet.getCTWorksheet().getTableParts();
if (tblParts != null && tblParts.getTablePartList().size() > 0) {
for (int i = 0; i < tblParts.getTablePartList().size(); i++) {
org.openxmlformats.schemas.spreadsheetml.x2006.main.CTTablePart tblPart = tblParts.getTablePartArray​(i);
if(tblPart.getId().equals(rId)) {
tblParts.removeTablePart​(i);
}
}
}
}
}
public static void main(String[] args) throws Exception {
try (Workbook workbook = WorkbookFactory.create(new FileInputStream("SAMPLE.xlsx"));
FileOutputStream out = new FileOutputStream("SAMPLE_NEW.xlsx")) {
XSSFSheet sheet = ((XSSFWorkbook)workbook).cloneSheet(0, "Test");
cloneTables(sheet);
workbook.write(out);
}
}
}
This code needs the full jar of all of the schemas, which is poi-ooxml-full-5.2.2.jar for apache poi 5.2.2, as mentioned in FAQ. Note, since apache poi 5.* the formerly used ooxml-schemas-*.jar cannot be used anymore. There must not be any ooxml-schemas-*.jar in class path when using apache poi 5.*.
Note, you need this additional. The file names poi-ooxml-5.2.2.jar and poi-ooxml-lite-5.2.2.jar respective poi-ooxml-full-5.2.2.jar are misleading. The *-lite*.jar respective *-full*.jar do not replace, but complement the poi-ooxml-5.2.2.jar.
Is there a way to not read the entire Excel documents row, I'm reading the cell's defined in the document, but, it's pulling in the entire sheet's columns???
I'm converting an Excel document to a CSV document. I am getting this result.
Aircraft ID (FADEC_SHIP_POS),Fleet (Fleet Name),Fault Date (TIMESTAMP or FADEC_DATE_TIME),Fault ID (FADEC_ID),Fault Description (FADEC_FAULT_DESCRIPTION),Priority (FADEC_PRIORITY),ATA,Flight Phase (FM),Report Type (REPORT_TYPE),Flight Number (FLT),Origin (ORIG),Destination (DEST),Site Reference Name,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
I don't need all those extra undefined columns, (,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,)
also, there can be empty values in the row's defined columns: - these are expected and needed
6808-1,B757,2019-09-25 20:50:18,351-21A,P(S) 351-21,3,,Climb,901,1278,,,Sprint Airlines
my class
public class XlsxToCsv implements Processor {
private Logger log = Logger.getLogger(XlsxToCsv.class.getName());
private static final String CSV_SEPERATOR_CHAR=",";
private static final String NEW_LINE_CHARACTER="\r\n";
#Override
public void process(Exchange exchange) throws Exception {
log.info("Entering XLSX Excel to CSV ...");
int sheetIdx = 0;
byte[] in = exchange.getIn().getBody(byte[].class);
String out = excelToCSV(in,sheetIdx);
exchange.getIn().setBody(out);
log.info("Exiting XLSX Excel to CSV ...");
}
private String excelToCSV(byte[] xlsxFile, int sheetIdx) throws Exception {
InputStream is = null;
StringBuffer sb = new StringBuffer();
String cellVal = "";
try {
is = new ByteArrayInputStream(xlsxFile);
} catch (Exception e) {
e.printStackTrace();
}
XSSFWorkbook workBook = new XSSFWorkbook(is);
XSSFSheet workSheet = workBook.getSheetAt(sheetIdx);
for (int i = 0; i < workSheet.getPhysicalNumberOfRows(); i++) {
XSSFRow row = workSheet.getRow(i);
for (int j = 0; j < row.getPhysicalNumberOfCells(); j++) {
cellVal = getCellValueAsString(workBook, row.getCell(j));
if ( j == row.getPhysicalNumberOfCells() - 1) {
sb.append(cellVal).append(NEW_LINE_CHARACTER);
} else {
sb.append(cellVal).append(CSV_SEPERATOR_CHAR);
}
}
}
workBook.close();
return sb.toString();
}
private String getCellValueAsString(XSSFWorkbook workbook, XSSFCell cell) {
String cellValue = "";
if (cell != null) {
CellType cellType = cell.getCellType();
if (cellType == CellType.FORMULA) {
FormulaEvaluator evaluator = workbook.getCreationHelper().createFormulaEvaluator();
cellType = evaluator.evaluateFormulaCell(cell);
}
switch (cellType) {
case STRING:
cellValue = cell.getStringCellValue();
break;
case NUMERIC:
Double doubleValue = cell.getNumericCellValue();
int intValue = doubleValue.intValue();
cellValue = Integer.toString(intValue);
break;
case BOOLEAN:
cellValue = Boolean.toString(cell.getBooleanCellValue());
break;
case ERROR:
cellValue = cell.getErrorCellString();
break;
case BLANK:
break;
default:
break;
}
}
return cellValue;
}
}
It turns out that the template I'm using had spaces entered in the last elements in the spreadsheet, I removed and cleaned up the xlsx template and it is working as expected.
always learning, I wouldn't expect that, but, it is what it is,