How to extract text from a specific table java apache POI - java

I have a .doc document with three tables in it. Each table is slightly different and so the text has to be extracted differently. I am trying to do this using apache POI library I would like to deal with each table with a different bit of code but I can't figure out how to do this as there doesn't seem to be a tableIndex as part of the apache POI. At the moment I am treating them the same as below but does anyone know how to specify a particular table:
import java.io.*;
import java.util.*;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.model.*;
import org.apache.poi.hwpf.usermodel.*;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
public class Main {
public static void main (String[] args) throws Exception {
String fileName = "MyFile.doc";
InputStream fis = new FileInputStream(fileName);
POIFSFileSystem fs = new POIFSFileSystem(fis);
HWPFDocument doc = new HWPFDocument(fs);
Range range = doc.getRange();
Paragraph para = range.getParagraph(0);
String str="";
TableIterator itr = new TableIterator(range);
while(itr.hasNext()){
Table table = itr.next();
for(int rowIndex = 0; rowIndex < table.numRows(); rowIndex++){
TableRow row = table.getRow(rowIndex);
for(int colIndex = 0; colIndex < 1; colIndex++){
//Getting the first two cell text values out of the table
TableCell cell0 = row.getCell(0);
TableCell cell1 = row.getCell(1);
str= cell0.text()+" : "+cell1.text()+" : ";
System.out.println(str);
}
}
}
}
}

Related

How to remove rows of a word table that contains a string using java

this code remove all the rows of the table, but i want to remove a specific rows(if rows contain for example number 2)
import java.io.FileOutputStream;
import java.io.FileInputStream;
import java.util.List;
import org.apache.poi.xwpf.usermodel.*;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTc;
public static void main(String[] args) throws Exception {
FileInputStream fis = new FileInputStream("C:\\TMPL.docx");
XWPFDocument doc = new XWPFDocument(fis);
List<XWPFTable> tables = doc.getTables();
XWPFTable table = tables.get(0);
XWPFTableRow[] rows = table.getRows().toArray(new XWPFTableRow[0]);
for (int r = 0; r < rows.length; r++) {
if (r > 0) {
XWPFTableRow row = rows[r];
CTTc[] cells = row.getCtRow().getTcList().toArray(new CTTc[0]);
for (int c = 0; c < cells.length; c++) {
CTTc cTTc = cells[c];
//clear only the paragraphs in the cell, keep cell styles
cTTc.setPArray(new CTP[] {CTP.Factory.newInstance()});
cells[c] = cTTc;
}
row.getCtRow().setTcArray(cells);
//System.out.println(row.getCtRow());
}
}
doc.write(new FileOutputStream("new document.docx"));
}
Once you find a row containing the number 2, you can use the method table.removeRow(i) (documented here) to remove the complete row at position i.

Getting Exception when reorder RowLabel when hiding the Subtotal -Apache POI

As per below code i dont want to add addRowLabel(1) but need addRowLabel(2) . After running the application and open the pivot table its giving exception but if you add addRowLabel(1)(currently commented) its working as expected. This is happening after adding the logic to hide Subtotal. Is this is the expected behaviour of apache POI or It can be fixed?
Please find the code below.
Note: This issue comes when hiding subtotal.
package com.test.pivottables;
import org.apache.poi.xssf.usermodel.*;
import org.apache.poi.ss.usermodel.*;
import org.apache.poi.ss.util.*;
import org.apache.poi.ss.*;
import java.io.*;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.*;
class TestPivotTables {
public static void main(String[] args) throws IOException{
Workbook wb = new XSSFWorkbook();
String[][] data = new String[][]{{"STATUS","PASSED","DATA","VALUE"},
{"BLUE","Y","TTT","20"},
{"RED","N","UUU","10"},{"BLUE","N","PPP","30"}};
XSSFSheet sheet = (XSSFSheet) wb.createSheet("data");
XSSFSheet pivot = (XSSFSheet) wb.createSheet("summary");
for(String[] dataRow : data){
XSSFRow row = sheet.createRow(sheet.getPhysicalNumberOfRows());
for(String dataCell : dataRow){
XSSFCell cell =
row.createCell(row.getPhysicalNumberOfCells());
cell.setCellValue(dataCell);
}
}
XSSFTable table = sheet.createTable();
CTTable cttable = table.getCTTable();
table.setDisplayName("table");
cttable.setRef("A1:D4");
cttable.setId(1);
CTTableColumns columns = cttable.addNewTableColumns();
columns.setCount(3);
int i = 1;
for (String colName : data[0]){
CTTableColumn column = columns.addNewTableColumn();
column.setId(++i);
column.setName(colName);
}
XSSFPivotTable pivotTable = pivot.createPivotTable(new
AreaReference("A1:D4", SpreadsheetVersion.EXCEL2007),
new CellReference("A4"), sheet);
pivotTable.addRowLabel(0);
pivotTable.addRowLabel(2);
//pivotTable.addRowLabel(1);
List<Integer> iterList = new ArrayList<Integer>();
iterList.add(0);
iterList.add(2);
//iterList.add(1);
pivotTable.getCTPivotTableDefinition().setRowHeaderCaption("Colour");
for (int j=0;j<iterList.size();j++) {
CTPivotField ctPivotField =
pivotTable.getCTPivotTableDefinition().getPivotFields().
getPivotFieldList().get(iterList.get(j));
for (i = 0; i < sheet.getLastRowNum()-1; i++) {
if(ctPivotField.getItems().getItemArray(i)!=null) {
ctPivotField.getItems().getItemArray(i).unsetT();
ctPivotField.getItems().getItemArray(i).setX((long)i);
}
}
for (i = sheet.getLastRowNum(); i > sheet.getLastRowNum()-2; i--)
{
if(ctPivotField.getItems().getItemArray(i)!=null) {
ctPivotField.getItems().removeItem(i);
}
}
ctPivotField.getItems().setCount(2);
Set<String> collection = new HashSet<String>();
int ctr = 0;
Row row = null;
Cell cell = null;
boolean isNull = false;
do{
try{
row = sheet.getRow(ctr);
cell = row.getCell(0);
collection.add(cell.toString());
ctr++;
} catch(Exception e) {
isNull = true;
}
}while(isNull!=true);
if(collection!=null && collection.size()>0) {
Iterator value = collection.iterator();
while (value.hasNext()) {
pivotTable.getPivotCacheDefinition().
getCTPivotCacheDefinition().getCacheFields().
getCacheFieldList().get(j).getSharedItems().addNewS().
setV(value.next().toString());
}
}
ctPivotField.setAutoShow(false);
ctPivotField.setDefaultSubtotal(false);
ctPivotField.setSubtotalTop(false);
ctPivotField.setSubtotalCaption(null);
ctPivotField.setCompact(false);
}
System.out.println("----end---");
pivotTable.addColumnLabel(DataConsolidateFunction.COUNT, 3, "test");
FileOutputStream fileOut = new FileOutputStream("pivotsample1.xlsx");
wb.write(fileOut);
wb.close();
}}
The problem is how you are building the pivot table definition and the pivot cache definition. This must be done because apache poi creates as much fields for each row label as rows are in the pivot table data range. This is wrong when special settings shall be made for pivot fields. You try to do that, but you do it wrong.
I cannot go into detail where exactly you go wrong because that would be too much effort. But what needs to be done is:
For each column which is row label:
Determine unique labels in that column. This is necessary to build
the cache.
Then build pivot table and cache.
For each unique label:
Build pivot field item as numbered item.
Build a cache definition which has a shared element for this label.
Then remove further items from pivot table definition. But leave one default element there, if there should be subtotals. If not, then not.
Complete example:
import org.apache.poi.xssf.usermodel.*;
import org.apache.poi.ss.usermodel.*;
import org.apache.poi.ss.util.*;
import org.apache.poi.ss.*;
import java.io.*;
import java.util.ArrayList;
import java.util.TreeSet;
import java.util.List;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.*;
class TestPivotTables {
public static void main(String[] args) throws IOException{
Workbook wb = new XSSFWorkbook();
String[][] data = new String[][]{
{"STATUS","PASSED","DATA","VALUE"},
{"BLUE","Y","TTT","20"},
{"RED","N","UUU","10"},
{"BLUE","N","PPP","30"}
};
XSSFSheet sheet = (XSSFSheet) wb.createSheet("data");
XSSFSheet pivot = (XSSFSheet) wb.createSheet("summary");
for(String[] dataRow : data){
XSSFRow row = sheet.createRow(sheet.getPhysicalNumberOfRows());
for(String dataCell : dataRow){
XSSFCell cell = row.createCell(row.getPhysicalNumberOfCells());
cell.setCellValue(dataCell);
}
}
AreaReference areaReference = new AreaReference("A1:D4", SpreadsheetVersion.EXCEL2007);
XSSFPivotTable pivotTable = pivot.createPivotTable(areaReference, new CellReference("A4"), sheet);
pivotTable.getCTPivotTableDefinition().setRowHeaderCaption("Colour");
List<Integer> iterList = new ArrayList<Integer>();
iterList.add(0);
iterList.add(2);
iterList.add(1);
for (Integer j : iterList) {
//create row label - apache poi creates as much fields for each as rows are in the pivot table data range
pivotTable.addRowLabel(j);
//determine unique labels in column j
TreeSet<String> uniqueItems = new java.util.TreeSet<String>(String.CASE_INSENSITIVE_ORDER);
for (int r = areaReference.getFirstCell().getRow()+1; r < areaReference.getLastCell().getRow()+1; r++) {
uniqueItems.add(sheet.getRow(r).getCell(j).getStringCellValue());
}
System.out.println(uniqueItems);
//build pivot table and cache
CTPivotField ctPivotField = pivotTable.getCTPivotTableDefinition().getPivotFields().getPivotFieldArray(j);
int i = 0;
for (String item : uniqueItems) {
//take the items as numbered items: <item x="0"/><item x="1"/>
ctPivotField.getItems().getItemArray(i).unsetT();
ctPivotField.getItems().getItemArray(i).setX((long)i);
//build a cache definition which has shared elements for those items
//<sharedItems><s v="BLUE"/><s v="RED"/></sharedItems>
pivotTable.getPivotCacheDefinition().getCTPivotCacheDefinition().getCacheFields().getCacheFieldArray(j)
.getSharedItems().addNewS().setV(item);
i++;
}
ctPivotField.setAutoShow(false);
ctPivotField.setDefaultSubtotal(false);
//ctPivotField.setSubtotalTop(false);
//ctPivotField.setSubtotalCaption(null);
ctPivotField.setCompact(false);
//remove further items
if (ctPivotField.getDefaultSubtotal()) i++; //let one default item be if there shall be subtotals
for (int k = ctPivotField.getItems().getItemList().size()-1; k >= i; k--) {
ctPivotField.getItems().removeItem(k);
}
ctPivotField.getItems().setCount(i);
}
System.out.println("----end---");
pivotTable.addColumnLabel(DataConsolidateFunction.COUNT, 3, "test");
FileOutputStream fileOut = new FileOutputStream("pivotsample1.xlsx");
wb.write(fileOut);
fileOut.close();
wb.close();
}
}

How to access excel cell using cell name with java [duplicate]

I am writing a Java program to read data from excel sheet (having XLSX extension) using Apache POI library. I am able to iterate through all the cells and get all the values. But I am unable to get a specific cell value, say E10.
Is there any way to do this?
Please see the code below that I used for iterating through the cells.
package application;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Iterator;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.xssf.usermodel.XSSFCell;
import org.apache.poi.xssf.usermodel.XSSFRow;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
public class ReadFromXLSX {
public static void readXLSXFile() throws IOException
{
InputStream ExcelFileToRead = new FileInputStream("C:\\Test.xlsx");
XSSFWorkbook wb = new XSSFWorkbook(ExcelFileToRead);
XSSFWorkbook test = new XSSFWorkbook();
XSSFSheet sheet = wb.getSheetAt(0);
XSSFRow row;
XSSFCell cell;
Iterator rows = sheet.rowIterator();
while (rows.hasNext())
{
row=(XSSFRow) rows.next();
Iterator cells = row.cellIterator();
while (cells.hasNext())
{
cell=(XSSFCell) cells.next();
if (cell.getCellType() == XSSFCell.CELL_TYPE_STRING)
{
System.out.print(cell.getStringCellValue()+" ");
}
else if(cell.getCellType() == XSSFCell.CELL_TYPE_NUMERIC)
{
System.out.print(cell.getNumericCellValue()+" ");
}
else
{
}
}
System.out.println();
}
}
}
For example, to get E10 of the first worksheet:
wb.getSheetAt(0).getRow(9).getCell(4);
Note: subtract one because the indices are null-based.
You can also use this convenience method to map E to 4.
wb.getSheetAt(0).getRow(9).getCell(CellReference.convertColStringToIndex("E"));
To get a value from a specific cell in excel you can use the below code line.
wb.getSheetAt(0).getRow(1).getCell(1);
XSSFSheet has the method getRow(int rownum)
It returns the logical row ( 0-based). If you ask for a row that is not defined you get a null. This is to say row 4 represents the fifth row on a sheet.
Once you get the row, you can call getCell(int cellnum) method of XSSFRow object. It returns the cell at the given (0 based) index.
Just version-up the getCell method
public XSSFCell getCell(String cellName){
Pattern r = Pattern.compile("^([A-Z]+)([0-9]+)$");
Matcher m = r.matcher(cellName);
XSSFWorkbook wb = new XSSFWorkbook();
if(m.matches()) {
String columnName = m.group(1);
int rowNumber = Integer.parseInt(m.group(2));
if(rowNumber > 0) {
return wb.getSheetAt(0).getRow(rowNumber-1).getCell(CellReference.convertColStringToIndex(columnName));
}
}
return null;
}
Now you can get the cell easily by this line
getCell("E10")
public class XmlFileRead {
public static void main(String[] args) throws IOException {
FileInputStream fi = new FileInputStream("abc.xls");
ArrayList<EmployeeVo> al = new ArrayList<>();
EmployeeVo evo = null;
Scanner scanner = null;
Workbook wb = new XSSFWorkbook(fi);
Sheet sh = wb.getSheet("Sheet0");
int starRow = sh.getFirstRowNum();
int endRow = sh.getLastRowNum();
for (int i = starRow + 1; i < endRow; i++) {
scanner = new Scanner(System.in);
evo = new EmployeeVo();
Cell c = wb.getSheetAt(0).getRow(i).getCell(1);
evo.setEmployeeId((int) c.getNumericCellValue());
Cell c2 = wb.getSheetAt(0).getRow(i).getCell(2);
evo.setEmployeeName(c2.toString());
// add to collection
al.add(evo);
} // for
al.forEach(i -> {
System.out.println(i.getEmployeeId() + " " + i.getEmployeeName());
});
}
}

Error when read Column value from excel with java using POI

I need to read the excel file, so I can reference the column index by name, and I do like that :
package main;
import java.io.FileInputStream;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
public class ReadExcel {
public static void main(String[] args) {
try {
InputStream fs = new FileInputStream("/.../ListProducts.xls");
HSSFWorkbook wb = new HSSFWorkbook(fs);
HSSFSheet sheet = wb.getSheetAt(0);
Map<String, Integer> map = new HashMap<String,Integer>(); //Create map
HSSFRow row = sheet.getRow(0); //Get first row
//following is boilerplate from the java doc
short minColIx = row.getFirstCellNum(); //get the first column index for a row
short maxColIx = row.getLastCellNum(); //get the last column index for a row
for(short colIx=minColIx; colIx<maxColIx; colIx++) { //loop from first to last index
HSSFCell cell = row.getCell(colIx); //get the cell
map.put(cell.getStringCellValue(),cell.getColumnIndex()); //add the cell contents (name of column) and cell index to the map
}
List<ReportRow> listOfDataFromReport = new ArrayList<ReportRow>();
for(int x = 1; x<=sheet.getPhysicalNumberOfRows(); x++){
ReportRow rr = new ReportRow();
HSSFRow dataRow = sheet.getRow(x);
int idxForColumn1 = map.get("Id");
int idxForColumn2 = map.get("Name");
int idxForColumn3 = map.get("Price");
HSSFCell cell1 = dataRow.getCell(idxForColumn1);
HSSFCell cell2 = dataRow.getCell(idxForColumn2);
HSSFCell cell3 = dataRow.getCell(idxForColumn3);
rr.setColumn1(cell1.getStringCellValue());
rr.setColumn2(cell2.getStringCellValue());
rr.setColumn3(cell3.getStringCellValue());
listOfDataFromReport.add(rr);
}
for(int j = 0; j< listOfDataFromReport.size();j++){
System.out.println("Column 1 Value: " + listOfDataFromReport.get(j).getColumn1());
//etc...
}
} catch (Exception e) {
System.out.println(e.getMessage());
}
}
When I run the program, I get this in output :
null
I have already the excel file in the correct destination and with the right name of column.
EDIT
When I add e.printStackTrace();, I get this:
java.lang.NullPointerException
at main.ReadExcel.main(ReadExcel.java:51)
null
EDIT2
I notice that variables of setColumn1,setColumn2,... methods are double.
I do this :
rr.setColumn1(cell1.getNumericCellValue());
rr.setColumn2(cell2.getNumericCellValue());
rr.setColumn3(cell3.getNumericCellValue());
I get the following error when I try to run the program to read data:
Exception in thread "main" java.lang.IllegalStateException: Cannot get
a NUMERIC value from a STRING cell at
org.apache.poi.hssf.usermodel.HSSFCell.typeMismatch(HSSFCell.java:654)
at org.apache.poi.hssf.usermodel.HSSFCell.getNumericCellValue(HSSFCell.java:679)
Well I got kinda different opinion than #maytham-ɯɐɥʇʎɐɯ. Assuming names of the columns are correct in your code, I think that this may be the problem:
for(int x = 1; x<=sheet.getPhysicalNumberOfRows(); x++){
Let's say number of physical rows is 3, so they are numbered 0, 1, 2 and you are iterating till 3, thus when you trying to get the cell later it throws the NullPointerException, because there is no such row.
Try doing x < sheet.getPhysicalNumberOfRows(); (less than, not less/equal than) in the for loop I mentioned earlier.

Getting specific data from an excel sheet JAVA

I am trying to get specific data from an excel sheet, The data is dynamic. It can be anything really. The column headers are the only things i can use for placeholders, but the column header positions can vary in the sheet.
For example i have a sheet like this :
|Name| Surname| Value|
|bar | poo | 5|
|baz | foo | 7|
But for example i need to traverse the sheet to get the surname column and then if i find surname = 'poo' i must then pull its corresponding value which in the sheet is in the next colum but this is dynamic. The surname and value column arent always next to each other, they can be in any position at the top. But if i find a specific 'thing' in the surname column i need to pull its value.
I have managed to traverse through the sheet and store all the data in a 2d array And display that data. from the research ive done , this isnt an efficient approach as traversing and storing large data from sheets can use alot of memory. Ive read that you can read through an excel sheet and instead of storing those values in an array you can write them immediately to another sheet, if they match a certain condition. EG: (pseudo) If(columnheader == surname && surname == foo )then get corresponding value, then write that value to a new sheet.
Okay so my questions are :
1.How do i achieve iterating through the sheet not storing it in an array and writing it straight to another sheet if it matches a condition?
2.From the code i have below, how do i achieve sorting through the data in the array and finding if surname = foo get its corresponding value?
Like i said the data in the sheet is dynamic except for the column headers, but there positions as headers are dynamic.
Sorry for the long post , any help will be greatly appreciated.
package demo.poi;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.math.BigDecimal;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.usermodel.XSSFCell;
import org.apache.poi.xssf.usermodel.XSSFRow;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
public class test {
public static void main(String[] args) throws Exception {
File excel = new File("test.xlsx");
FileInputStream fis = new FileInputStream(excel);
XSSFWorkbook wb = new XSSFWorkbook(fis);
XSSFSheet ws = wb.getSheetAt(0);
ws.setForceFormulaRecalculation(true);
int rowNum = ws.getLastRowNum() + 1;
int colNum = ws.getRow(0).getLastCellNum();
int surnameHeaderIndex = -1, valueHeaderIndex = -1;
//Read the headers first. Locate the ones you need
XSSFRow rowHeader = ws.getRow(0);
for (int j = 0; j < colNum; j++) {
XSSFCell cell = rowHeader.getCell(j);
String cellValue = cellToString(cell);
if("SURNAME".equalsIgnoreCase(cellValue)) {
surnameHeaderIndex = j;
} else if("VALUE".equalsIgnoreCase(cellValue)) {
valueHeaderIndex = j;
}
}
if(surnameHeaderIndex == -1 || valueHeaderIndex == -1) {
throw new Exception("Could not find header indexes\nSurname : " + surnameHeaderIndex + " | Value : " + valueHeaderIndex);
}
//createnew workbook
XSSFWorkbook workbook = new XSSFWorkbook();
//Create a blank sheet
XSSFSheet sheet = workbook.createSheet("data");
for (int i = 1; i < rowNum; i++) {
XSSFRow row = ws.getRow(i);
row = sheet.createRow(rowNum++);
String surname = cellToString(row.getCell(surnameHeaderIndex));
String value = cellToString(row.getCell(valueHeaderIndex));
int cellIndex = 0;
row.createCell(cellIndex++).setCellValue(surname);
row.createCell(cellIndex++).setCellValue(value);
}
FileOutputStream fos = new FileOutputStream(new File("test1.xlsx"));
workbook.write(fos);
fos.close();
}
public static String cellToString(XSSFCell cell) {
int type;
Object result = null;
type = cell.getCellType();
switch (type) {
case XSSFCell.CELL_TYPE_NUMERIC:
result = BigDecimal.valueOf(cell.getNumericCellValue())
.toPlainString();
break;
case XSSFCell.CELL_TYPE_STRING:
result = cell.getStringCellValue();
break;
case XSSFCell.CELL_TYPE_BLANK:
result = "";
break;
case XSSFCell.CELL_TYPE_FORMULA:
result = cell.getCellFormula();
}
return result.toString();
}
}
Something like this should be a good starting point.
Basically you parse the first row, where the headers are located.
You find the position of the headers you want and keep them.
In this example there are only two headers (surname, value) that are needed so I just keep two variables. If there are more, then the solution would be to keep the position of those headers in a HashMap, where the key is the name of the header. After that an iteration of the rows begins. The program parses the values of the columns that are needed (row.getCell(index)). Now you have the values that you need, and only them. You can do whatever you want, you can print them or write a file or whatnot.
Here is an example. The error handling is up to you. This is only an example.
package POIParser;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.math.BigDecimal;
import org.apache.poi.xssf.usermodel.XSSFCell;
import org.apache.poi.xssf.usermodel.XSSFRow;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
public class MainPoi {
public static void main(String[] args) throws Exception {
File excel = new File("test.xlsx");
FileInputStream fis = new FileInputStream(excel);
XSSFWorkbook wb = new XSSFWorkbook(fis);
XSSFSheet ws = wb.getSheetAt(0);
ws.setForceFormulaRecalculation(true);
int rowNum = ws.getLastRowNum() + 1;
int colNum = ws.getRow(0).getLastCellNum();
int surnameHeaderIndex = -1, valueHeaderIndex = -1;
// Read the headers first. Locate the ones you need
XSSFRow rowHeader = ws.getRow(0);
for (int j = 0; j < colNum; j++) {
XSSFCell cell = rowHeader.getCell(j);
String cellValue = cellToString(cell);
if ("SURNAME".equalsIgnoreCase(cellValue)) {
surnameHeaderIndex = j;
} else if ("VALUE".equalsIgnoreCase(cellValue)) {
valueHeaderIndex = j;
}
}
if (surnameHeaderIndex == -1 || valueHeaderIndex == -1) {
throw new Exception("Could not find header indexes\nSurname : "
+ surnameHeaderIndex + " | Value : " + valueHeaderIndex);
}
// createnew workbook
XSSFWorkbook workbook = new XSSFWorkbook();
// Create a blank sheet
XSSFSheet sheet = workbook.createSheet("data");
for (int i = 1; i < rowNum; i++) {
XSSFRow row = ws.getRow(i);
String surname = cellToString(row.getCell(surnameHeaderIndex));
String value = cellToString(row.getCell(valueHeaderIndex));
int cellIndex = 0;
//Create a newRow object for the output excel.
//We begin for i = 1, because of the headers from the input excel, so we go minus 1 in the new (no headers).
//If for the output we need headers, add them outside this for loop, and go with i, not i-1
XSSFRow newRow = sheet.createRow(i-1);
newRow.createCell(cellIndex++).setCellValue(surname);
newRow.createCell(cellIndex++).setCellValue(value);
}
FileOutputStream fos = new FileOutputStream(new File("test1.xlsx"));
workbook.write(fos);
fos.close();
}
public static String cellToString(XSSFCell cell) {
int type;
Object result = null;
type = cell.getCellType();
switch (type) {
case XSSFCell.CELL_TYPE_NUMERIC:
result = BigDecimal.valueOf(cell.getNumericCellValue())
.toPlainString();
break;
case XSSFCell.CELL_TYPE_STRING:
result = cell.getStringCellValue();
break;
case XSSFCell.CELL_TYPE_BLANK:
result = "";
break;
case XSSFCell.CELL_TYPE_FORMULA:
result = cell.getCellFormula();
}
return result.toString();
}
}

Categories

Resources