how to read & search excel data using Apache POI - java

i am using Apache POI for reading Excel rows and using as needed. In order to Enhance the script for better reusability, how can i search and find a String value in all sheets under Column A and read corresponding row. For Example in Sheet2 ColumnA i have Name called Peter and in ColumnB Date of birth of Peter is 12/18/1984. Can you give code sample to search Peter in ColumnA in Excel work book and return his Date of Birth from ColumnB? Below is the code i am using currently may not suit above criteria.
package com.Sample.GenericFunctionsLibrary;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.Calendar;
import java.util.GregorianCalendar;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
import org.apache.commons.io.FileUtils;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.xssf.usermodel.XSSFCell;
import org.apache.poi.xssf.usermodel.XSSFRow;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.openqa.selenium.OutputType;
import org.openqa.selenium.TakesScreenshot;
public class TestUtil {
public static Xls_Reader excel = null;
public static String path = "./XLFile/Data.xlsx";
public static String mailscreenshotpath;
public static String generateTimeStamp() {
Calendar cal = new GregorianCalendar();
int month = cal.get(Calendar.MONTH); // 3
int year = cal.get(Calendar.YEAR); // 2014
int sec = cal.get(Calendar.SECOND);
int min = cal.get(Calendar.MINUTE);
int date = cal.get(Calendar.DATE);
int day = cal.get(Calendar.HOUR_OF_DAY);
String timestamp = year + "_" + date + "_" + (month + 1) + "_" + day + "_" + min + "_" + sec;
return timestamp;
}
public static boolean isExecutable(String tcid) {
for (int rowNum = 2; rowNum <= excel.getRowCount("Credentials"); rowNum++) {
if (excel.getCellData("Credentials", "TestCase_Name", rowNum).equals(tcid)) {
if (excel.getCellData("Credentials", "runmode", rowNum).equalsIgnoreCase("Y")) {
return true;
} else {
return false;
}
}
}
return false;
}
public static Object[][] getData(String sheetName) {
int rows = excel.getRowCount(sheetName);
int cols = excel.getColumnCount(sheetName);
Object[][] data = new Object[rows - 1][cols];
for (int rowNum = 2; rowNum <= rows; rowNum++) { // 2
for (int colNum = 0; colNum < cols; colNum++) {
data[rowNum - 2][colNum] = excel.getCellData(sheetName, colNum, rowNum); // -2
}
}
return data;
}
public static void zip(String filepath) {
try {
File inFolder = new File(filepath);
File outFolder = new File("Reports.zip");
ZipOutputStream out = new ZipOutputStream(new BufferedOutputStream(new FileOutputStream(outFolder)));
BufferedInputStream in = null;
byte[] data = new byte[1000];
String files[] = inFolder.list();
for (int i = 0; i < files.length; i++) {
in = new BufferedInputStream(new FileInputStream(inFolder.getPath() + "/" + files[i]), 1000);
out.putNextEntry(new ZipEntry(files[i]));
int count;
while ((count = in.read(data, 0, 1000)) != -1) {
out.write(data, 0, count);
}
out.closeEntry();
}
out.flush();
out.close();
} catch (Exception e) {
e.printStackTrace();
}
}
// --------------------------------------Read Data From
// Excel------------------------------------
public static String[][] GetValue(String Pathfile, String sheetName, int startrow) throws IOException {
File excel = new File(Pathfile);
FileInputStream fis = new FileInputStream(excel);
#SuppressWarnings("resource")
XSSFWorkbook wb = new XSSFWorkbook(fis);
XSSFSheet ws = wb.getSheet(sheetName);
// System.out.println(startrow);
int colNum = ws.getRow(startrow).getLastCellNum();
// System.out.println(colNum);
String[][] arrays = new String[1][colNum];
for (int i = 0; i < colNum; i++) {
XSSFRow row = ws.getRow(startrow);
XSSFCell cell = row.getCell(i);
arrays[0][i] = cellToString(cell);
// System.out.println(arrays[0][i]);
}
return arrays;
}
// private static String cellToString(XSSFCell cell) {
// Object result;
// int type = cell.getCellType();
//
// switch(type)
// {
// case 0:
// result = cell.getNumericCellValue();
// break;
// case 1:
// result = cell.getStringCellValue();
// break;
// default:
// throw new RuntimeException("there are no support for this type of cell");
// }
private static String cellToString(XSSFCell cell) {
Object result;
int type;
try {
type = cell.getCellType();
} catch (NullPointerException e) {
type = 2;
}
switch (type) {
case Cell.CELL_TYPE_NUMERIC:
DataFormatter formatter = new DataFormatter();
result = formatter.formatCellValue(cell);
break;
case Cell.CELL_TYPE_STRING:
result = cell.getStringCellValue();
break;
case Cell.CELL_TYPE_BLANK:
result = "";
break;
default:
throw new RuntimeException("there are no support for this type of cell");
}
//
return result.toString();
}
}

This method will take a String value for the name to search and return the address for the first record found in the column next to it, assuming that the name is in the first column and the address is in the second column. It will iterate over all sheets as asked. It returns empty String if name is not found. Try/catch block excluded for readability.
public static String findAddressByName(String nameToSearch) {
String fileLocation = "I:\\foo.xlsx";
XSSFWorkbook wb = new XSSFWorkbook(new File(fileLocation));
for (int sheetIndex = 0; sheetIndex < wb.getNumberOfSheets(); sheetIndex++) {
XSSFSheet sheet = wb.getSheetAt(sheetIndex);
for (int rowIndex = 0; rowIndex < sheet.getLastRowNum(); rowIndex++) {
XSSFRow row = sheet.getRow(rowIndex);
if (row != null && row.getCell(0).getStringCellValue().equals(nameToSearch)) {
return row.getCell(1).getRawValue();
}
}
}
return "";
}

Related

Merge more than one excel files into one excel file using Apache POI Java

I am having 100 excel files and I want to merge all of them into one excel file. Here in my example I am having 2 excel files and I want to merge them into one. I can't do it. I am using Apache POI API.
In one excel workbook there can be more than one sheets also so I want to iterate through sheets of each workbook also.
I tried and researched but I got this link and it's not working for me
https://dev.to/eiceblue/merge-excel-files-in-java-2lo2#:~:text=A%20quick%20way%20to%20merge,data%20table%20into%20another%20worksheet.
Please help me out here.
package com.cas.ExcelTest;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.Iterator;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.xssf.usermodel.XSSFCell;
import org.apache.poi.xssf.usermodel.XSSFRow;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
public class Combine {
public static void main(String args[]) {
String[] files = new String[] {"Test2.xlsx","Test3.xlsx"};
XSSFWorkbook workbook = new XSSFWorkbook();
try {
for (int f = 0; f < files.length; f++) {
String file = files[f];
FileInputStream inputStream = new FileInputStream(file);
XSSFWorkbook tempWorkbook = new XSSFWorkbook(inputStream);
int numOfSheets = tempWorkbook.getNumberOfSheets();
for (int i = 0; i < numOfSheets; i++) {
XSSFSheet tempSheet = tempWorkbook.getSheetAt(i);
String newSheetName = ""+f+""+tempSheet.getSheetName();
XSSFSheet sheet = workbook.createSheet(newSheetName);
Iterator<Row> itRow = tempSheet.rowIterator();
while(itRow.hasNext()) {
Row tempRow = itRow.next();
XSSFRow row = sheet.createRow(tempRow.getRowNum());
Iterator<Cell> itCell = tempRow.cellIterator();
while(itCell.hasNext()) {
Cell tempCell = itCell.next();
XSSFCell cell = row.createCell(tempCell.getColumnIndex());
switch (tempCell.getCellType()) {
case NUMERIC:
cell.setCellValue(tempCell.getNumericCellValue());
break;
case STRING:
cell.setCellValue(tempCell.getStringCellValue());
break;
case BLANK:
break;
case BOOLEAN:
break;
case ERROR:
break;
case FORMULA:
cell.setCellValue(tempCell.getNumericCellValue());
break;
case _NONE:
break;
default:
break;
}
}
}
}
}
} catch (IOException ex1) {
System.out.println("Error reading file");
ex1.printStackTrace();
}
try (FileOutputStream outputStream = new FileOutputStream("result.xlsx")) {
workbook.write(outputStream);
}
catch(Exception ex) {
System.out.println("Something went wrong");
}
}
}
My Excel files:
Test2.xlsx
Test3.xlsx
Here some columns are extra in Test3.xlsx and in both files as you can see in the heading row its all string but after that it has numeric values.
Here you have an approximation of the code you need, format it, extract functionalities to methods and check the naming of sheets.
String[] files = new String[] {"Test2.xlsx","Test3.xlsx"};
XSSFWorkbook workbook = new XSSFWorkbook();
XSSFSheet sheet = createSheetWithHeader(workbook);
try {
for (int f = 0; f < files.length; f++) {
String file = files[f];
FileInputStream inputStream = new FileInputStream(file);
XSSFWorkbook tempWorkbook = new XSSFWorkbook(inputStream);
int numOfSheets = tempWorkbook.getNumberOfSheets();
for (int i = 0; i < numOfSheets; i++) {
XSSFSheet tempSheet = tempWorkbook.getSheetAt(i);
int indexLastDataInserted = sheet.getLastRowNum();
int firstDataRow = getFirstDataRow(tempSheet);
Iterator<Row> itRow = tempSheet.rowIterator();
while(itRow.hasNext()) {
Row tempRow = itRow.next();
if (tempRow.getRowNum() >= firstDataRow) {
XSSFRow row = sheet.createRow(indexLastDataInserted + 1);
Iterator<Cell> itCell = tempRow.cellIterator();
while(itCell.hasNext()) {
Cell tempCell = itCell.next();
XSSFCell cell = row.createCell(tempCell.getColumnIndex());
//At this point you will have to set the value of the cell depending on the type of data it is
switch (tempCell.getCellType()) {
case NUMERIC:
cell.setCellValue(tempCell.getNumericCellValue());
break;
case STRING:
cell.setCellValue(tempCell.getStringCellValue());
break;
/**
* Add your other types, here is your problem!!!!!
*/
}
}
}
}
}
}
}catch (IOException ex1) {
System.out.println("Error reading file");
ex1.printStackTrace();
}
try (FileOutputStream outputStream = new FileOutputStream("result.xlsx")) {
workbook.write(outputStream);
}
Function to get the first data row (necessary to avoid having to enter by hand where the header of each excel ends):
/**
* If the tab has a filter, it returns the row index of the filter + 1, otherwise it returns 0
* #param tempSheet
* #return index of first data row
*/
public static Integer getFirstDataRow(XSSFSheet tempSheet) {
Integer result = 0;
Boolean isAutoFilter = tempSheet.getCTWorksheet().isSetAutoFilter();
if (isAutoFilter) {
String autoFilterRef = tempSheet.getCTWorksheet().getAutoFilter().getRef();
result = new CellReference(autoFilterRef.substring(0, autoFilterRef.indexOf(":"))).getRow() + 1;
}
return result;
}
Create the sheet with header in the method:
public static XSSFSheet createSheetWithHeader(XSSFWorkbook workbook){
XSSFSheet sheet = workbook.createSheet("NEW_SHEET_NAME");
//Implement the header
[...]
return sheet;
}

Write Webtable Values in Excel

I want to write the webtable values in excel . I tried it with list but its not writing in the correct format.Its writing all the values in the row only.
ArrayList<String> Storetablevalues = new ArrayList<String>();
WebDriver driver = new FirefoxDriver();
#BeforeTest
public void setup() throws Exception {
driver.manage().window().maximize();
driver.manage().timeouts().implicitlyWait(15, TimeUnit.SECONDS);
driver.get("URL");
}
#Test
public void run() throws Exception {
/* driver.findElement(By.xpath(".//*[#id='results']/div/a")).click(); */
Thread.sleep(9000);
System.out.println("Values are loaded");
int Row_count = driver.findElements(By.xpath("//*[#id='assetsTable']/tbody/tr")).size();
System.out.println("Number Of Rows = " + Row_count);
// Get number of columns In table.
int Col_count = driver.findElements(By.xpath("//*[#id='assetsTable']/tbody/tr[2]/td")).size();
System.out.println("Number Of Columns = " + Col_count);
// divided xpath In three parts to pass Row_count and Col_count values.
String first_part = "//*[#id='assetsTable']/tbody/tr[";
String second_part = "]/td[";
String third_part = "]";
String[][] arr = new String[Row_count][Col_count]; // Used for loop for
// number of rows.
for (int i = 2; i <= Row_count; i++) {
// Used for loop for number of columns.
for (int j = 1; j <= Col_count; j++) {
// Prepared final xpath of specific cell as per values of i and
// j.
String final_xpath = first_part + i + second_part + j + third_part;
// Will retrieve value from located cell and print It.
String Table_data = driver.findElement(By.xpath(final_xpath)).getText();
Storetablevalues.add(Table_data);
System.out.print(Table_data + " ");
}
System.out.println("");
System.out.println("");
}
}
public void WriteXL() throws Exception {
// Write a xl
try {
File exlFile = new File("C:/Users/Kishor/Desktop/gtmetrix.xls");
WritableWorkbook writableWorkbook = Workbook.createWorkbook(exlFile);
WritableSheet writableSheet = writableWorkbook.createSheet("Sheet2", 0);
Label Header_Url_label = new Label(0, 0, "URL");
Label Header_Image_label = new Label(1, 0, "Link Check Status");
writableSheet.addCell(Header_Url_label);
writableSheet.addCell(Header_Image_label);
for (int i = 0; i < Storetablevalues.size(); i++) {
int j = i + 1;
Label labelURL = new Label(0, j, Storetablevalues.get(i));
writableSheet.addCell(labelURL);
}
// Write and close the workbook
writableWorkbook.write();
writableWorkbook.close();
System.out.println("Xls Writer...");
} catch (IOException e) {
e.printStackTrace();
} catch (RowsExceededException e) {
e.printStackTrace();
} catch (WriteException e) {
e.printStackTrace();
}
}
}
This is my console Output I want to write in this format in excle
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.firefox.FirefoxDriver;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.List;
public class TableToExcel {
String[][] tableVal;
int rowCount,columnCount;
WebDriver driver = new FirefoxDriver();
private static final String FILE_NAME = "C:/MyFirstExcel.xls";
//https://www.w3schools.com/html/tryit.asp?filename=tryhtml_table_intro
#BeforeClass
public void getTableSize(){
driver.get("https://www.w3schools.com/html/tryit.asp?filename=tryhtml_table_intro");
driver.switchTo().frame("iframeResult");
//get Row size
List<WebElement> row = driver.findElements(By.xpath(".//table/tbody/tr"));
//get Column size
List<WebElement> column = driver.findElements(By.xpath(".//table/tbody/tr/th"));
rowCount = row.size();
columnCount = column.size();
System.out.println("Row :"+rowCount+" Clounm :"+columnCount);
tableVal = new String[rowCount][columnCount];
}
#Test
public void test() throws IOException{
for(int i =1 ; i <= rowCount ; i++ ){
for(int j =1 ; j <= columnCount ; j++ ) {
if(i == 1) {
//Get header value
tableVal[i - 1][j - 1] = driver.findElement(By.xpath(".//table/tbody/tr[" + i + "]/th[" + j + "]")).getText();
System.out.println(driver.findElement(By.xpath(".//table/tbody/tr[" + i + "]/th[" + j + "]")).getText());
}
else{
//get table data values
tableVal[i-1][j-1] =driver.findElement(By.xpath(".//table/tbody/tr["+i+"]/td["+j+"]")).getText();
System.out.println(driver.findElement(By.xpath(".//table/tbody/tr["+i+"]/td["+j+"]")).getText());
}
}
}
HSSFWorkbook workbook = new HSSFWorkbook();
HSSFSheet sheet = workbook.createSheet("Datatypes in Java");
int rowNum = 0;
System.out.println("Creating excel");
for (Object[] datatype : tableVal) {
Row row = sheet.createRow(rowNum++);
int colNum = 0;
for (Object field : datatype) {
Cell cell = row.createCell(colNum++);
if (field instanceof String) {
cell.setCellValue((String) field);
} else if (field instanceof Integer) {
cell.setCellValue((Integer) field);
}
}
}
try {
FileOutputStream outputStream = new FileOutputStream(FILE_NAME);
workbook.write(outputStream);
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}finally {
workbook.close();
}
System.out.println("Done");
}
}

Apache POI Streaming API doesn't recognize Excel (xlsx) content

I have a class which ingests .xlsx-files. I took it from this example and modified it for my needs:
https://svn.apache.org/repos/asf/poi/trunk/src/examples/src/org/apache/poi/xssf/eventusermodel/XLSX2CSV.java
Now the application processes some files just fine, others not at all. If I change one single field or even character in one of the not working files and save them again, the whole content is processed correctly. Does anyone have an idea what might be the reason for (imho it lies somewhere within the original excel files).
To whom it may help, here is my code:
package com.goodgamestudios.icosphere.service.fileReader;
import com.goodgamestudios.icosphere.datamodel.DataSet;
import com.goodgamestudios.icosphere.datamodel.Tuple;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.ss.usermodel.BuiltinFormats;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFCellStyle;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;
public class ExcelFileReader implements FileReader {
static final Logger LOG = LoggerFactory.getLogger(ExcelFileReader.class);
private SheetHandler handler;
#Override
public DataSet getDataFromFile(File file) throws IOException {
LOG.info("Start ingesting file {}");
try {
OPCPackage pkg = OPCPackage.open(file);
XSSFReader reader = new XSSFReader(pkg);
StylesTable styles = reader.getStylesTable();
ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(pkg);
SharedStringsTable sst = reader.getSharedStringsTable();
XMLReader parser = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser");
handler = new SheetHandler(styles, strings, 24);
parser.setContentHandler(handler);
// rId2 found by processing the Workbook
// Seems to either be rId# or rSheet#
System.out.println("yooooo 1");
InputStream sheet2 = reader.getSheet("rId2");
System.out.println("yooooo 2");
InputSource sheetSource = new InputSource(sheet2);
System.out.println("yooooo 3");
parser.parse(sheetSource);
LOG.debug("{} rows parsed", handler.getData().getRows().size() + 1);
sheet2.close();
return handler.getData();
} catch (OpenXML4JException | SAXException ex) {
LOG.warn("Unable to parse file {}", file.getName());
LOG.warn("Exception: {}: ", ex);
}
return null;
}
/**
* See org.xml.sax.helpers.DefaultHandler javadocs
*
* Derived from http://poi.apache.org/spreadsheet/how-to.html#xssf_sax_api
* <p/>
* Also see Standard ECMA-376, 1st edition, part 4, pages 1928ff, at
* http://www.ecma-international.org/publications/standards/Ecma-376.htm
* <p/>
* A web-friendly version is http://openiso.org/Ecma/376/Part4
*/
private static class SheetHandler extends DefaultHandler {
boolean isFirstRow = true;
private int quantityOfColumns;
private int currentColumnNumber = 1;
int currentRowNumber = 1;
private int rowNumberOfLastCell = 1;
private DataSet data = new DataSet();
private Tuple tuple;
/**
* Table with styles
*/
private StylesTable stylesTable;
/**
* Table with unique strings
*/
private ReadOnlySharedStringsTable sharedStringsTable;
/**
* Number of columns to read starting with leftmost
*/
private final int minColumnCount;
// Set when V start element is seen
private boolean vIsOpen;
// Set when cell start element is seen;
// used when cell close element is seen.
private xssfDataType nextDataType;
// Used to format numeric cell values.
private short formatIndex;
private String formatString;
private final DataFormatter formatter;
// The last column printed to the output stream
private int lastColumnNumber = -1;
// Gathers characters as they are seen.
private StringBuffer value;
static final Logger LOG = LoggerFactory.getLogger(SheetHandler.class);
private SheetHandler(StylesTable styles,
ReadOnlySharedStringsTable strings,
int cols) {
this.stylesTable = styles;
this.sharedStringsTable = strings;
this.minColumnCount = cols;
this.value = new StringBuffer();
this.nextDataType = xssfDataType.NUMBER;
this.formatter = new DataFormatter();
LOG.debug("Sheethandler created");
}
/*
* (non-Javadoc)
* #see org.xml.sax.helpers.DefaultHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes)
*/
public void startElement(String uri, String localName, String name,
Attributes attributes) throws SAXException {
System.out.println("yooooooooooo start:uri:" + uri + " localname: " + localName + " name: " + name);
if ("inlineStr".equals(name) || "v".equals(name)) {
vIsOpen = true;
// Clear contents cache
value.setLength(0);
} // c => cell
else if ("c".equals(name)) {
// Get the cell reference
String r = attributes.getValue("r");
int firstDigit = -1;
for (int c = 0; c < r.length(); ++c) {
if (Character.isDigit(r.charAt(c))) {
firstDigit = c;
break;
}
}
currentColumnNumber = nameToColumn(r.substring(0, firstDigit));
System.out.println("colu mn " + currentColumnNumber);
// Set up defaults.
this.nextDataType = xssfDataType.NUMBER;
this.formatIndex = -1;
this.formatString = null;
String cellType = attributes.getValue("t");
String cellStyleStr = attributes.getValue("s");
if ("b".equals(cellType)) {
nextDataType = xssfDataType.BOOL;
} else if ("e".equals(cellType)) {
nextDataType = xssfDataType.ERROR;
} else if ("inlineStr".equals(cellType)) {
nextDataType = xssfDataType.INLINESTR;
} else if ("s".equals(cellType)) {
nextDataType = xssfDataType.SSTINDEX;
} else if ("str".equals(cellType)) {
nextDataType = xssfDataType.FORMULA;
} else if (cellStyleStr != null) {
// It's a number, but almost certainly one
// with a special style or format
XSSFCellStyle style = null;
if (cellStyleStr != null) {
int styleIndex = Integer.parseInt(cellStyleStr);
style = stylesTable.getStyleAt(styleIndex);
} else if (stylesTable.getNumCellStyles() > 0) {
style = stylesTable.getStyleAt(0);
}
if (style != null) {
this.formatIndex = style.getDataFormat();
this.formatString = style.getDataFormatString();
if (this.formatString == null) {
this.formatString = BuiltinFormats.getBuiltinFormat(this.formatIndex);
}
}
}
}
}
/*
* (non-Javadoc)
* #see org.xml.sax.helpers.DefaultHandler#endElement(java.lang.String, java.lang.String, java.lang.String)
*/
public void endElement(String uri, String localName, String name)
throws SAXException {
String thisStr = null;
// v => contents of a cell
if ("v".equals(name)) {
// Process the value contents as required.
// Do now, as characters() may be called more than once
switch (nextDataType) {
case BOOL:
char first = value.charAt(0);
thisStr = first == '0' ? "FALSE" : "TRUE";
break;
case ERROR:
thisStr = "\"ERROR:" + value.toString() + '"';
break;
case FORMULA:
// A formula could result in a string value,
// so always add double-quote characters.
thisStr = '"' + value.toString() + '"';
break;
case INLINESTR:
// TODO: have seen an example of this, so it's untested.
XSSFRichTextString rtsi = new XSSFRichTextString(value.toString());
thisStr = '"' + rtsi.toString() + '"';
break;
case SSTINDEX:
String sstIndex = value.toString();
try {
int idx = Integer.parseInt(sstIndex);
XSSFRichTextString rtss = new XSSFRichTextString(sharedStringsTable.getEntryAt(idx));
thisStr = rtss.toString();
} catch (NumberFormatException ex) {
System.out.println("Failed to parse SST index '" + sstIndex + "': " + ex.toString());
}
break;
case NUMBER:
String n = value.toString();
if (this.formatString != null && n.length() > 0) {
thisStr = formatter.formatRawCellContents(Double.parseDouble(n), this.formatIndex, this.formatString);
} else {
thisStr = n;
}
break;
default:
thisStr = "(TODO: Unexpected type: " + nextDataType + ")";
break;
}
// Output after we've seen the string contents
// Emit commas for any fields that were missing on this row
if (lastColumnNumber == -1) {
lastColumnNumber = 0;
}
for (int i = lastColumnNumber; i < currentColumnNumber; ++i) {
}
// Might be the empty string.
System.out.println(thisStr);
if (isFirstRow) {
data.getHeaders().add(thisStr);
} else {
tuple.getRowEntries()[currentColumnNumber] = thisStr;
}
// Update column
if (currentColumnNumber > -1) {
lastColumnNumber = currentColumnNumber;
}
} else if ("row".equals(name)) {
// We're onto a new row
System.out.println("nextrow");
lastColumnNumber = -1;
System.out.println("yoooooo tuple:" + tuple);
if (isFirstRow) {
isFirstRow = false;
quantityOfColumns = data.getHeaders().size();
tuple = new Tuple(quantityOfColumns);
} else if (!tuple.isEmpty()) {
data.addRow(tuple);
tuple = new Tuple(quantityOfColumns);
}
}
}
/**
* Captures characters only if a suitable element is open. Originally
* was just "v"; extended for inlineStr also.
*/
public void characters(char[] ch, int start, int length)
throws SAXException {
if (vIsOpen) {
value.append(ch, start, length);
}
}
/**
* Converts an Excel column name like "C" to a zero-based index.
*
* #param name
* #return Index corresponding to the specified name
*/
private int nameToColumn(String name) {
int column = -1;
for (int i = 0; i < name.length(); ++i) {
int c = name.charAt(i);
column = (column + 1) * 26 + c - 'A';
}
return column;
}
public DataSet getData() {
return data;
}
}
/**
* The type of the data value is indicated by an attribute on the cell. The
* value is usually in a "v" element within the cell.
*/
enum xssfDataType {
BOOL,
ERROR,
FORMULA,
INLINESTR,
SSTINDEX,
NUMBER,
}
}
Here is the xml example of a working and a not working worksheet:
http://www.file-upload.net/download-10909789/not_working.xml.html
http://www.file-upload.net/download-10909790/working.xml.html
and here the xlsx-files:
http://www.file-upload.net/download-10909802/not_working.xlsx.html
http://www.file-upload.net/download-10909803/working.xlsx.html
Thanks!
The problem was, that LibreOffice Calc saves the first worksheet under "rId2", whereas MSOffice does so under "rId1". So now I'm now going through sheetIds until a sheet with content is parsed or no more sheets are found. Works with both files:
private void parseFirstWorksheetWithContent(XSSFReader reader) throws IOException, InvalidFormatException, SAXException {
//Sheet-ID seems to differ, seems to be "rId2" for files saved by MS Excel and "rId1" for those saved by LibreOffice Calc
try {
for (int i = 1; handler.getData().isEmpty(); i++) {
parseSheet(reader, "rId" + i);
}
} catch (IllegalArgumentException e) {
//No more sheets, file empty
}
}
private void parseSheet(XSSFReader reader, String sheetId) throws InvalidFormatException, SAXException, IOException {
XMLReader parser = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser");
parser.setContentHandler(handler);
InputStream sheetStream = reader.getSheet(sheetId);
InputSource sheetSource = new InputSource(sheetStream);
parser.parse(sheetSource);
sheetStream.close();
}

Filtering column in excel using java poi

I have a large excel file. I want to filter a column "Mainly used for" for values "mainly used for mobile". Then I need to store the corresponding values in the "Number Series" column in a list. I have a code to start with. However I am not able to do the filtering part and storing it to an array list. Could you please help me out here.
I did some digging and have modified the code. However I have not been able to meet my requirement. I have following problems -
*The code only selects two columns and displays their contents. Not able to filter :(
*The excel has column names with spaces. So I am getting the error. As the excel is generated by the user,
we have no control over column names. How to deal with the column name with spaces ??
*Excel has alpha-numeric values, how to deal with them?
Could you please help me out here.
package com.excel;
import java.io.File;
import java.io.FileInputStream;
import java.math.BigDecimal;
import java.io.FileOutputStream;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;*/
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.hssf.usermodel.HSSFSheet;
public class Test {
public static void main(String[] args) throws Exception {
File excel = new File("D:\\FileDownload\\example.xls");
//File excel = new File("D:\\FileDownload\\Sample_Filtered.xls");
FileInputStream fis = new FileInputStream(excel);
//XSSFWorkbook wb = new XSSFWorkbook(fis);
HSSFWorkbook wb = new HSSFWorkbook(fis);
//org.apache.poi.ss.usermodel.Workbook wb = WorkbookFactory.create(fis);
HSSFSheet ws = wb.getSheetAt(0);
// org.apache.poi.ss.usermodel.Sheet ws = wb.getSheetAt(0);
ws.setForceFormulaRecalculation(true);
int rowNum = ws.getLastRowNum() + 1;
int colNum = ws.getRow(0).getLastCellNum();
int mainlyUsedForHeaderIndex = -1, mobileSeriesHeaderIndex = -1;
//Read the headers first. Locate the ones you need
HSSFRow rowHeader = ws.getRow(0);
for (int j = 0; j < colNum; j++) {
HSSFCell cell = rowHeader.getCell(j);
String cellValue = cellToString(cell);
if("Mainly used for".equalsIgnoreCase(cellValue)) {
//if("MainlyFor".equalsIgnoreCase(cellValue)) {
mainlyUsedForHeaderIndex = j;
} else if("Number Series".equalsIgnoreCase(cellValue)) {
//else if("MobileSeries".equalsIgnoreCase(cellValue)) {
mobileSeriesHeaderIndex = j;
}
}
if(mainlyUsedForHeaderIndex == -1 || mobileSeriesHeaderIndex == -1) {
throw new Exception("Could not find header indexes\n Mainly used for : " + mainlyUsedForHeaderIndex + " | Number Series: " + mobileSeriesHeaderIndex);
}else{
System.out.println("Indexes are found!!!");
}
//createnew workbook
XSSFWorkbook workbook = new XSSFWorkbook();
//Create a blank sheet
XSSFSheet sheet = workbook.createSheet("data");
for (int i = 1; i < rowNum; i++) {
HSSFRow row = ws.getRow(i);
//row = sheet.createRow(rowNum++);
String MainlyUsed = cellToString(row.getCell(mainlyUsedForHeaderIndex));
String ForMobile = cellToString(row.getCell(mobileSeriesHeaderIndex));
int cellIndex = 0;
XSSFRow newRow = sheet.createRow(i-1);
newRow.createCell(cellIndex++).setCellValue(MainlyUsed);
newRow.createCell(cellIndex++).setCellValue(ForMobile );
}
FileOutputStream fos = new FileOutputStream(new File("D:\\FileDownload\\test1.xlsx"));
System.out.println("File generated");
workbook.write(fos);
fos.close();
}
public static String cellToString(HSSFCell cell) {
int type;
Object result = null;
type = cell.getCellType();
switch (type) {/*
case HSSFCell.CELL_TYPE_NUMERIC:
result = BigDecimal.valueOf(cell.getNumericCellValue())
.toPlainString();
break;
case HSSFCell.CELL_TYPE_STRING:
result = cell.getStringCellValue();
break;
case HSSFCell.CELL_TYPE_BLANK:
result = "";
break;
case HSSFCell.CELL_TYPE_FORMULA:
result = cell.getCellFormula();*/
case HSSFCell.CELL_TYPE_BLANK:
result="";
break;
case HSSFCell.CELL_TYPE_BOOLEAN:
//
result = cell.getBooleanCellValue();
break;
case HSSFCell.CELL_TYPE_ERROR:
//
break;
case HSSFCell.CELL_TYPE_FORMULA:
result = cell.getCellFormula();
break;
case HSSFCell.CELL_TYPE_NUMERIC:
//
result = cell.getNumericCellValue();
break;
case HSSFCell.CELL_TYPE_STRING:
result= cell.getRichStringCellValue();
// result = cell.getStringCellValue();
break;
}
return result.toString();
}
}
I am able to meet my requirement using following entirely different approach.
package com.excel;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
public class ExcelRead {
public static void main(String[] args) throws IOException{
String fileName = "D:\\FileDownload\\example.xls";
String cellContent = "Mainly used for mobile";
int rownr=0;
int colnr = 0; //column from which you need data to store in array list
InputStream input = new FileInputStream(fileName);
HSSFWorkbook wb = new HSSFWorkbook(input);
HSSFSheet sheet = wb.getSheetAt(0);
List MobileSeries=new ArrayList();
MobileSeries = findRow(sheet, cellContent);
if(MobileSeries !=null){
for(Iterator iter=MobileSeries.iterator();iter.hasNext();){
System.out.println(iter.next());
}
}
//output(sheet, rownr, colnr);
finish();
}
private static void output(HSSFSheet sheet, int rownr, int colnr) {
/*
* This method displays the total value of the month
*/
HSSFRow row = sheet.getRow(rownr);
HSSFCell cell = row.getCell(colnr);
System.out.println("Your total is: " + cell);
}
private static List findRow(HSSFSheet sheet, String cellContent) {
List MobileSeries=new ArrayList();
for (Row row : sheet) {
for (Cell cell : row) {
if (cell.getCellType() == Cell.CELL_TYPE_STRING) {
if (cell.getRichStringCellValue().getString().trim().equals(cellContent)) {
//System.out.println("Row numbers are"+row.getRowNum());
int rownumber=row.getRowNum();
//return row.getRowNum();
HSSFRow row1 = sheet.getRow(rownumber);
HSSFCell cell1 = row1.getCell(0);
MobileSeries.add(cell1);
}
}
}
}
return MobileSeries;
}
private static void finish() {
System.exit(0);
}
}

reading specific column of excel into java program

I need to read specific column of an excel sheet and then declare the variables in java. The program that I have done reads the entire content of excel sheet. But I need to read a fixed column like C.
This is what I have done:
import java.io.File;
import java.io.IOException;
import jxl.Cell;
import jxl.Sheet;
import jxl.Workbook;
import jxl.read.biff.BiffException;
public class JavaApplication4
{
private String inputFile;
String[][] data = null;
public void setInputFile(String inputFile)
{
this.inputFile = inputFile;
}
public String[][] read() throws IOException
{
File inputWorkbook = new File(inputFile);
Workbook w;
try
{
w = Workbook.getWorkbook(inputWorkbook);
// Get the first sheet
Sheet sheet = w.getSheet(0);
data = new String[sheet.getColumns()][sheet.getRows()];
// Loop over first 10 column and lines
// System.out.println(sheet.getColumns() + " " +sheet.getRows());
for (int j = 0; j <sheet.getColumns(); j++)
{
for (int i = 0; i < sheet.getRows(); i++)
{
Cell cell = sheet.getCell(j, i);
data[j][i] = cell.getContents();
// System.out.println(cell.getContents());
}
}
for (int j = 0; j < data.length; j++)
{
for (int i = 0; i <data[j].length; i++)
{
System.out.println(data[j][i]);
}
}
}
catch (BiffException e)
{
e.printStackTrace();
}
return data;
}
public static void main(String[] args) throws IOException
{
JavaApplication4 test = new JavaApplication4();
test.setInputFile("C://users/admin/Desktop/Content.xls");
test.read();
}
}
Here is my excel sheet,
From a bowl of chits numbered /#v1#/ to /#v2#/ , a single chit is randomly drawn. Find the probability that the chit drawn is a number that is a multiple of /#v3#/ or /# v4#/?
I need to read this data and by matching the pattern /#v1#1, I need to declare the variables. How can I do this?
What you can do, you should first get all the columns from the sheet by using sheet.getColumns() and store all columns in a list . Then you can match get all values based on columns. or you can get for only column "C".try using below code. let me know if this works.
int masterSheetColumnIndex = sheet.getColumns();
List<String> ExpectedColumns = new ArrayList<String>();
for (int x = 0; x < masterSheetColumnIndex; x++) {
Cell celll = sheet.getCell(x, 0);
String d = celll.getContents();
ExpectedColumns.add(d);
}
LinkedHashMap<String, List<String>> columnDataValues = new LinkedHashMap<String, List<String>>();
List<String> column1 = new ArrayList<String>();
// read values from driver sheet for each column
for (int j = 0; j < masterSheetColumnIndex; j++) {
column1 = new ArrayList<String>();
for (int i = 1; i < sheet.getRows(); i++) {
Cell cell = sheet.getCell(j, i);
column1.add(cell.getContents());
}
columnDataValues.put(ExpectedColumns.get(j), column1);
}
This is the very simple and efficient code and Working as expected
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
public class TestExcelFile {
public static void main(String[] args) {
String envFilePath = System.getenv("AZURE_FILE_PATH");
// upload list of files/directory to blob storage
File folder = new File(envFilePath);
File[] listOfFiles = folder.listFiles();
for (int i = 0; i < listOfFiles.length; i++) {
if (listOfFiles[i].isFile()) {
System.out.println("File " + listOfFiles[i].getName());
Workbook workbook;
//int masterSheetColumnIndex = 0;
try {
workbook = WorkbookFactory.create(new FileInputStream(envFilePath + "\\"+ listOfFiles[i].getName()));
// Get the first sheet.
Sheet sheet = workbook.getSheetAt(0);
//we will search for column index containing string "Your Column Name" in the row 0 (which is first row of a worksheet
String columnWanted = "Column_Name";
Integer columnNo = null;
//output all not null values to the list
List<Cell> cells = new ArrayList<Cell>();
// Get the first cell.
Row row = sheet.getRow(0);
//Cell cell = row.getCell(0);
for (Cell cell : row) {
// Column header names.
//System.out.println(cell.toString());
if (cell.getStringCellValue().equals(columnWanted)){
columnNo = cell.getColumnIndex();
}
}
if (columnNo != null){
for (Row row1 : sheet) {
Cell c = row1.getCell(columnNo);
if (c == null || c.getCellType() == Cell.CELL_TYPE_BLANK) {
// Nothing in the cell in this row, skip it
} else {
cells.add(c);
//System.out.println(c);
}
}
}else{
System.out.println("could not find column " + columnWanted + " in first row of " + listOfFiles[i].getName());
}
} catch (InvalidFormatException | IOException e) {
e.printStackTrace();
}
}
}
}
}
Reading Particular column from excel file
File myFile = new File(path);
FileInputStream fis = new FileInputStream(myFile);
// Finds the workbook instance for XLSX file
XSSFWorkbook myWorkBook = new XSSFWorkbook (fis);
//XSSFWorkbook workBook = new XSSFWorkbook();
//Reading sheet at number 0 in spreadsheet(image attached for reference
Sheet sheet = myWorkBook.getSheetAt(0);
//creating a Sheet object to retrieve object
Iterator<Row> itr = sheet.iterator();//iterating over excel file
while (itr.hasNext())
{
Row row = itr.next();
Iterator<Cell> cellIterator = row.cellIterator();//iterating over each column
//Reading cell in my case column name is ppm
Cell ppmEx= row.getCell(0);
//Cell cell = cellIterator.next();
while (cellIterator.hasNext())
{
Cell cell = cellIterator.next();
//Check the cell type and format accordingly
switch (cell.getCellType())
{
case Cell.CELL_TYPE_NUMERIC:
//System.out.println(cell.getNumericCellValue() + " ");
al.add(cell.getNumericCellValue());
break;
case Cell.CELL_TYPE_STRING:
//System.out.println(cell.getStringCellValue()+" ");
al.add(cell.getStringCellValue());
break;
case Cell.CELL_TYPE_BOOLEAN:
//System.out.println(cell.getBooleanCellValue()+" ");
al.add(cell.getBooleanCellValue());
case Cell.CELL_TYPE_BLANK:
//System.out.println("blank");
al.add("blank");
}
}
System.out.println("-");
}
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package xlsxreader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.ss.usermodel.*;
/**
*
* #author khaled
*/
public class XlsxReader {
/**
* #param args the command line arguments
*/
public static void main(String[] args) throws FileNotFoundException, IOException, InvalidFormatException {
File file = new File("C:\\Users\\khaled\\Desktop\\myXLSX file.xlsx");
Workbook workbook = WorkbookFactory.create(new FileInputStream(file));
Sheet sheet = workbook.getSheetAt(0);
int column_index_1 = 0;
int column_index_2 = 0;
int column_index_3 = 0;
Row row = sheet.getRow(0);
for (Cell cell : row) {
// Column header names.
switch (cell.getStringCellValue()) {
case "MyFirst Column":
column_index_1 = cell.getColumnIndex();
break;
case "3rd Column":
column_index_2 = cell.getColumnIndex();
break;
case "forth Column":
column_index_3 = cell.getColumnIndex();
break;
}
}
for (Row r : sheet) {
if (r.getRowNum()==0) continue;//hearders
Cell c_1 = r.getCell(column_index_1);
Cell c_2 = r.getCell(column_index_2);
Cell c_3 = r.getCell(column_index_3);
if (c_1 != null && c_1.getCellType() != Cell.CELL_TYPE_BLANK
&&c_2 != null && c_2.getCellType() != Cell.CELL_TYPE_BLANK
&&c_3 != null && c_3.getCellType() != Cell.CELL_TYPE_BLANK) {
System.out.print(" "+c_1 + " " + c_2+" "+c_3+"\n");
}
}
}
}

Categories

Resources