Spraping data from a table is slow but uncertain why - java

I am scraping data from a table using selenium with Java but is slow and I am not sure why. Is there a reason why and how can I speed it up? The other thing I noticed is that it seems to slow down more as it progresses. I noticed this by observing the print statements to the console.
Here is my code:
package mypackage;
import java.io.IOException;
import java.time.Duration;
import java.util.List;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;
import com.seleniumpractice.utilities.XLUtils;
import io.github.bonigarcia.wdm.WebDriverManager;
public class CovidWebTable {
static WebDriver driver;
static XLUtils xl;
static List<WebElement> header;
static List<WebElement> rows;
public static void main(String[] args) throws IOException {
WebDriverManager.chromedriver().setup();
driver = new ChromeDriver();
driver.get("https://www.worldometers.info/coronavirus");
driver.manage().window().maximize();
driver.manage().timeouts().implicitlyWait(Duration.ofSeconds(10));
WebElement table = driver.findElement(By.xpath("//table[#id='main_table_countries_today']"));
rows = table.findElements(By.xpath(".//tr[#role='row']"));
System.out.println("Total rows: "+rows.size());
xl = new XLUtils(".\\datafiles\\covid.xls");
//xl.setCellData(null, rows, rows, null);
//Add header
header = table.findElements(By.xpath(".//thead//th"));
System.out.println("Header cols: "+ header.size());
for(int col=1; col<header.size()-1; col++) {
xl.setCellData("Covid Data", 0, col-1, header.get(col).getText());
}
int xlRow = 1;
for(int r=1; r<rows.size(); r++) {
String a = rows.get(r).getText();
if(rows.get(r).getText().equals("")) {
System.out.println("Skipped row: "+r);
continue;
}
System.out.println("Writing row "+r);
for(int c=1; c<header.size(); c++) {
//String data = rows.get(r).findElement(By.xpath(".//td["+(c+1)+"]")).getText();
xl.setCellData("Covid Data", xlRow, c-1, rows.get(r).findElement(By.xpath(".//td["+(c+1)+"]")).getText());
}
xlRow++;
}
System.out.println("Complete.");
driver.close();
}
}
The code that contains the code for writing to excel:
package com.internetBanking.utilities;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.DataFormatter;
public class XLUtils {
public static FileInputStream fi;
public static FileOutputStream fo;
public static HSSFWorkbook wb;
public static HSSFSheet ws;
public static HSSFRow row;
public static HSSFCell cell;
public static int getRowCount(String xlfile, String xlsheet) throws IOException {
fi = new FileInputStream(xlfile);
wb = new HSSFWorkbook(fi);
ws = wb.getSheet(xlsheet);
int rowcount = ws.getLastRowNum();
wb.close();
fi.close();
return rowcount;
}
public static int getCellCount(String xlFile, String xlSheet, int rowNum) throws IOException {
fi = new FileInputStream(xlFile);
wb = new HSSFWorkbook(fi);
ws = wb.getSheet(xlSheet);
row = ws.getRow(rowNum);
int cellCount = row.getLastCellNum();
wb.close();
fi.close();
return cellCount;
}
public static String getCellData(String xlFile, String xlSheet, int rowNum, int colNum) throws IOException {
fi = new FileInputStream(xlFile);
wb = new HSSFWorkbook(fi);
ws = wb.getSheet(xlSheet);
row = ws.getRow(rowNum);
cell = row.getCell(colNum);
String data;
try {
String cellData = new DataFormatter().formatCellValue(cell);
return cellData;
}
catch(Exception e) {
data = "";
}
wb.close();
fi.close();
return data;
}
public static void setCellData(String xlFile, String xlSheet, int rowNum, int colNum, String data) throws IOException{
fi = new FileInputStream(xlFile);
wb = new HSSFWorkbook(fi);
ws = wb.getSheet(xlSheet);
row = ws.getRow(rowNum);
Cell cell = row.createCell(colNum);
cell.setCellValue(data);
//cell = row.getCell(colNum);
//cell.setCellValue(data);
fo = new FileOutputStream(xlFile);
wb.write(fo);
wb.close();
fi.close();
fo.close();
}
}

Ok getting to understand the code will give you a hint of what you should do, the code is iterating 231 read data from the table and write it to an excel file (daaa!) Ok but when writing into the excel file you write row by row (so!) You then iterate cell by cell on each row on each cell you call setCellData(...) in that XLUtils and here is when it starts to slow down!
In that setCellData each time you call it, it read a file from the disk, opens it, appends data then close the file and since you call it by cell, you end up calling it ~231 (rows) x 15 (col) = 3465
Imagine the time consumed (opening file/writing data/ closing file) 3465 times
So what to do,
You need to create a list of list
List<ArrayList> rows = new ArrayList()
This is a list of list ^ where each record in the list is another list
and on each row you read from the table (even headers), you create a list of cells and then you add this list to the rows list!
and eventually, you add some utility function in that XLUtils that accept List<ArrayList> that will
Open the file once, Iterate on that rows list write their values, and close the file.
Imagine it as if you are trying to move from house to house, and you were using your personal small pickup truck car to move stuff. It won't take all the house stuff so you would go back and forth for a while. On the other hand, you could have used a furniture truck that picks up all furniture and loads it at once.

Related

Reading data from Excel in selenium getting "Error occurred during initialization of boot layer":java.lang.module.ResolutionException:

Code :
package Selenium;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.CellType;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.util.NumberToTextConverter;
public class excelReader {
//Identify Testcases column by scanning the entire 1st row
//once column is identified then scan entire testcase column to identify purchase testcase row
//after you grab purchase testcase row = pull all the data of that row and feed into test
public static ArrayList<String> getData(String testcaseName) throws IOException
{
// fileInputStream argument
ArrayList<String> a = new ArrayList<String>();
FileInputStream fis = new FileInputStream("C://Users//Sivaranjani Gopal//Desktop//siva.xlsx");
XSSFWorkbook workbook=new XSSFWorkbook(fis);
int sheets = workbook.getNumberOfSheets();
for (int i = 0; i < sheets; i++)
{
if (workbook.getSheetName(i).equalsIgnoreCase("testdata"))
{
XSSFSheet sheet = workbook.getSheetAt(i);
// Identify Testcases column by scanning the entire 1st row
Iterator<Row> rows = sheet.iterator();
Row firstrow = rows.next();
Iterator<Cell> ce = firstrow.cellIterator();//row is collection of cells
int k = 0;
int column = 0;
while (ce.hasNext())
{
Cell value = ce.next();
if (value.getStringCellValue().equalsIgnoreCase("TestCases"))
{
column = k;
}
k++;
}
System.out.println(column);
// once column is identified then scan entire testcase column to identify purchase testcase row
while (rows.hasNext())
{
Row r = rows.next();
if (r.getCell(column).getStringCellValue().equalsIgnoreCase(testcaseName))
{
// after you grab purchase testcase row = pull all the data of that row and feed into test
Iterator<Cell> cv = r.cellIterator();
while (cv.hasNext())
{
Cell c = cv.next();
if (c.getCellTypeEnum() == CellType.STRING)
{
a.add(c.getStringCellValue());
}
else
{
a.add(NumberToTextConverter.toText(c.getNumericCellValue()));
}
}
}
}
}
}
return a;
}
public static void main(String[] args) throws IOException
{
// TODO Auto-generated method stub
getData("siva");
}
Could some one help on the error?
I am getting the below exception when I run the code:
Error occurred during initialization of boot layer
java.lang.module.ResolutionException: Modules jaxb.impl and jaxb.core export package com.sun.xml.bind.v2.model.annotation to module poi
Need help on the above exception.

java.lang.ArrayIndexOutOfBoundsException reading from excel file

I am trying to read the following data from an Excel sheet
With the following code
import jxl.Cell;
import jxl.Sheet;
import jxl.Workbook;
import jxl.read.biff.BiffException;
public String readUsernameFromExcel() {
File src = new File("C:/filepath.xls");
try {
Workbook wb = Workbook.getWorkbook(src);
Sheet sh1 = wb.getSheet(0);
Cell a2 = sh1.getCell(0, 2);
data1 = a2.getContents().trim();
} catch (BiffException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return data1;
}
So when I try and get the cell 0,1 I can pick up the username 1000483 just fine. But when I try to read 0,2 and I get java.lang.ArrayIndexOutOfBoundsException: 2.
What I'm trying to do is read data from an excel sheet return it as a String and then pass it in to login my application. But it seems when I try 0,2 I'm going outside of what is expected. I've tried a few things such as a for loop
for (int rows = 0; rows < sh1.getRows(); rows++) {
Sheet sh1 = wb.getSheet(0);
Cell a2 = sh1.getCell(0, 2);
}
I understand the first number is the column and the second is the row. I also understand that the code isn't able to see past 0,1. I'm just at a loss as to how to get it to see the rest of the sheet after trying other solutions of the same problem.
sh1.getRows() returns 3. As loop starts from 0, sh1.getRows() needs to be decremented by 1 (as below). Below loop works fine and returns value properly.
import java.io.File;
import java.io.IOException;
import jxl.Cell;
import jxl.Sheet;
import jxl.Workbook;
public class Excel {
public static void main(String[] args) {
File src = new File("c:/filepath.xls");
try {
String data1;
Workbook wb = Workbook.getWorkbook(src);
Sheet sh1 = wb.getSheet(0);
for (int rows = 1; rows < sh1.getRows(); rows++) {
for (int column = 0; column <= sh1.getColumns()-1; column++) {
Cell a2 = sh1.getCell(column, rows);
data1 = a2.getContents().trim();
System.out.println(data1);
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
The above code works and fetches the date without error
I use the same data with you, and I could get 1000484 value through my code.
Here is my code :
package com.jason.xls;
import java.io.File;
import jxl.Cell;
import jxl.Sheet;
import jxl.Workbook;
public class XlsParser {
public static void main(String[] args) {
final String path = "/home/coder/filepath.xls";
System.out.println(readUserNameFromXls(path));
}
public static String readUserNameFromXls(final String path) {
File file = new File(path);
try {
Workbook wb = Workbook.getWorkbook(file);
Sheet sheet = wb.getSheet(0);
Cell a2 = sheet.getCell(0, 2);
return a2.getContents().trim();
} catch (Exception e) {
return null;
}
}
}
I download jxl.jar from jxl.jar download here
My code result is : Code Result Image

How to access Excel as Database using Java 8

I am using Java 8.
When i am trying to access Excel data(basically this is my test data) through jdbc-odbc, i am getting "java.lang.ClassNotFoundException: sun.jdbc.odbc.JdbcOdbcDriver"
And also i am trying to access data as non DSN.
I surfed net and came to know that Oracle deprecated support to jdbc-odbc.
So what is the easiest way to access this Excel data using Java?
Connection con=null;
Statement stmt=null;
ResultSet rs=null;
String query = "select TestScript from [TS 360 Scripts$]";
try
{
Class.forName( "sun.jdbc.odbc.JdbcOdbcDriver" );
con = DriverManager.getConnection("jdbc:odbc:;Driver={Microsoft Excel Driver(*.xlsx)};DBQ=D://TS 360 Script with Count.xlsx");
stmt=con.createStatement();
rs=stmt.executeQuery(query);
while(rs.next())
{
System.out.println(rs.getString("TestScript"));
}
con.close();
rs.close();
stmt.close();
}
catch(Exception e)
{
e.printStackTrace();
}
Uday- you can easily do whatever you want to do with Apache POI jar
As Your are mentioned your requirement: of all rows having isExecuted String Yes. I tried with this jar.
Try this
package com.dd.selenium;
import java.io.FileInputStream;
import java.io.IOException;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
public class PerformDDTest {
private static HSSFWorkbook xlWBook;
private static HSSFSheet xlSheet;
private static HSSFRow xlRow;
private static HSSFCell xlCell;
private static String filePath = "/home/dinesh/";
private static String fileName = "test.xls";
public static void main(String[] args) throws InterruptedException {
try {
FileInputStream xlFile = new FileInputStream(filePath + fileName);
// Access the required test data sheet
xlWBook = new HSSFWorkbook(xlFile);
// Assuming your data is in Sheet1- if not use your own sheet name
xlSheet = xlWBook.getSheet("Sheet1");
// gives row count in sheet
int noOfRows = xlSheet.getPhysicalNumberOfRows();
// gives column count in sheet
xlRow = xlSheet.getRow(0);
int noOfColumns = xlRow.getLastCellNum();
// excelData - 2 dimm array - stores all the excel data -Sheet1 only
String[][] excelData = new String[noOfRows][noOfColumns];
// r - row c- column
for (int r = 1; r < noOfRows; r++) {
for (int c = 0; c < noOfColumns; c++) {
xlRow = xlSheet.getRow(r);
xlCell = xlRow.getCell(c);
// Here we have complete excel data in an array -excelData-
excelData[r][c] = xlCell.getStringCellValue();
// System.out.println("row: " + r + " column: " + c);
// System.out.println(excelData[r][c]);
}
}
// creating an array to store isExected column
String[][] isExecuted = new String[noOfRows][1];
for (int row = 1; row < noOfRows; row++) {
// here column is always only one
// so c=0
// extracting a isExecuted column - and considering it as last
// column in sheet
// in your case it is not then - count the column position : use
// position-1
// ex: if column position is 7 then use 6 as below
// isExecuted[row][0]= excelData[row][6];
isExecuted[row][0] = excelData[row][noOfColumns - 1];
if (isExecuted[row][0].equalsIgnoreCase("yes")) {
// accessing complete row -which isExecuted=Yes
// *********IMPORTANT*****
for (int col = 0; col < noOfColumns; col++) {
// prints all the rows where isExecuted column has Yes
System.out.println(excelData[row][col]);
}
}
// System.out.println(isExecuted[row][0]);
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
I used this Excel Data:
Test Case Name Username Password Results IsExecute
APACHE_POI_TC testuser_1 Test#123 Pass Yes
APACHE_POI_TC testuser_2 Test#124 Pass No
APACHE_POI_TC testuser_3 Test#125 Pass Yes
APACHE_POI_TC testuser_4 Test#126 Pass Yes
APACHE_POI_TC testuser_5 Test#127 Pass No
APACHE_POI_TC testuser_6 Test#128 Pass Yes
Dont Access Excel file as a database. Instead use a jar such
as Apache POI For Microsoft Documents
Download Link: Apache POI For MS Docs- Jar
An Example For using this API:
Note: you must add apache poi jar to your build path before running it
package com.dd.selenium;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.concurrent.TimeUnit;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.openqa.selenium.By;
import org.openqa.selenium.firefox.FirefoxDriver;
public class PerformDDTest {
private static HSSFWorkbook xlWBook;
private static HSSFSheet xlSheet;
private static HSSFRow xlRow;
private static HSSFCell xlCell;
private static String filePath = "/home/dinesh/";
private static String fileName = "test.xls";
private static String url = "http://store.demoqa.com/";
private static String result = "Pass";
public static void main(String[] args) throws InterruptedException {
try {
FileInputStream xlFile =
new FileInputStream(filePath+fileName);
//Access the required test data sheet
xlWBook = new HSSFWorkbook(xlFile);
xlSheet = xlWBook.getSheet("Sheet1");
xlRow = xlSheet.getRow(1);
String username = xlRow.getCell(1).getStringCellValue();
String password = xlRow.getCell(2).getStringCellValue();
FirefoxDriver driver = new FirefoxDriver();
driver.manage().timeouts().implicitlyWait(10, TimeUnit.SECONDS);
driver.manage().window().maximize();
driver.get(url);
driver.findElement(By.xpath(".//*[#id='account']/a")).click();
driver.findElement(By.id("log")).sendKeys(username);
driver.findElement(By.id("pwd")).sendKeys(password);
driver.findElement(By.id("login")).click();
driver.findElement(By.xpath(".//*[#id='account_logout']/a")).click();
Thread.sleep(5000);
driver.quit();
setResultCell();
FileOutputStream fout = new FileOutputStream(filePath+fileName);
xlWBook.write(fout);
fout.flush();
fout.close();
} catch (IOException e) {
// TODO Auto-generated catch block
result = "Failed";
setResultCell();
e.printStackTrace();
}
}
private static void setResultCell() {
xlCell = xlRow.getCell(3, xlRow.RETURN_BLANK_AS_NULL);
if(xlCell == null ){
xlCell = xlRow.createCell(3);
xlCell.setCellValue(result);
}else{
xlCell.setCellValue(result);
}
}
}
This might be a little late but in case you still have the issue you can retain access excel as a db with java 8 using Fillo: http://codoid.com/fillo/

Read data from Excel file in Selenium Java

I am trying to read data from excel sheet to automate my testing(with a number of login credentials). I am using a utility that I found on web. But it is not running successfully.
Here is the utility
package google;
import java.io.File;
import java.io.IOException;
import java.util.Hashtable;
import jxl.Cell;
import jxl.Sheet;
import jxl.Workbook;
import jxl.read.biff.BiffException;
public class class2 {
static Sheet wrksheet;
static Workbook wrkbook =null;
static Hashtable dict= new Hashtable();
//Create a Constructor
public class2(String ExcelSheetPath) throws BiffException, IOException
{
//Initialize
wrkbook = Workbook.getWorkbook(new File(ExcelSheetPath));
//For Demo purpose the excel sheet path is hardcoded, but not recommended :)
wrksheet = wrkbook.getSheet("Sheet1");
}
//Returns the Number of Rows
public static int RowCount()
{
return wrksheet.getRows();
`enter code here` }
//Returns the Cell value by taking row and Column values as argument
public static String ReadCell(int column,int row)
{
return wrksheet.getCell(column,row).getContents();
}
//Create Column Dictionary to hold all the Column Names
public static void ColumnDictionary()
{`enter code here`
//Iterate through all the columns in the Excel sheet and store the value
for(int col=0; col <= wrksheet.getColumns();col++)
{
dict.put(ReadCell(col,0), col);
}
}
//Read Column Names
public static int GetCell(String colName)
{
try {
int value;
value = ((Integer) dict.get(colName)).intValue();
return value;
} catch (NullPointerException e) {
return (0);
}
}
}
And following is the class that calls this utility.
package google;
import java.io.IOException;
import jxl.read.biff.BiffException;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.ie.InternetExplorerDriver;
import org.testng.annotations.BeforeTest;
import org.testng.annotations.Test;
import google.class2;
public class class3 {
//Global initialization of Variables
static class2 xlsUtil;
WebDriver driver = new InternetExplorerDriver();
//Constructor to initialze Excel for Data source
public class3() throws BiffException, IOException
{
//Let's assume we have only one Excel File which holds all Testcases. Demo !!!
xlsUtil = new class2("C:/Users/admin/workspace/login.xls");
//Load the Excel Sheet Col in to Dictionary for Further use in our Test cases.
xlsUtil.ColumnDictionary();
}
#BeforeTest
public void EnvironmentalSetup()
{
System.setProperty("webdriver.chrome.driver",
"C:/Users/admin/Downloads/chromedriver.exe");
WebDriver driver = new ChromeDriver();
driver.get("http://192.168.1.20/dental/userlogin");
}
#Test
public void GmailLoginPage() throws InterruptedException {
//Create a for loop.. for iterate through our Excel sheet for all the test cases.
for(int rowCnt = 1;rowCnt <= xlsUtil.RowCount();rowCnt++)
{
//Enter User Name by reading data from Excel
WebElement userName = driver.findElement(By.name("UserName"));
userName.clear();
userName.sendKeys(xlsUtil.ReadCell(xlsUtil.GetCell("EmailUserName"), rowCnt));
//Enter Password
WebElement password = driver.findElement(By.name("Password"));
password.clear();
password.sendKeys(xlsUtil.ReadCell(xlsUtil.GetCell("Emailpassword"), rowCnt));
//Click on the Sign In Button
// WebElement signin = driver.findElement(By.name("signIn"));
password.submit();
//Sleep for some time,so that we can see things in action # Screen :)
Thread.sleep(2000);
}
}
}
But when I run dis cass it says 'cant instantiate google.class3
I don't get the mistake here.
Please help me run this code successfully.
FileInputStream file = newFileInputStream(newFile("C:/Users/admin/workspace/login.xls"));
//Get the workbook instance for XLS file
HSSFWorkbook workbook = new HSSFWorkbook(file);
//Get first sheet from the workbook
HSSFSheet sheet = workbook.getSheetAt(0);
//Iterate through each rows from first sheet
Iterator<Row> rowIterator = sheet.iterator();
while(rowIterator.hasNext()) {
Row row = rowIterator.next();
//For each row, iterate through each columns
Iterator<Cell> cellIterator = row.cellIterator();
while(cellIterator.hasNext()) {
Cell cell = cellIterator.next();
if(cell.getColumnIndex() == 0){
driver.findElement(By.name("UserName")).sendKeys(cell.getStringCellValue());
}
else
driver.findElement(By.name("Password")).sendKeys(cell.getStringCellValue());
}
public String getExcelData(String sheetName , int rowNum , int colNum) throws InvalidFormatException, IOException{
FileInputStream fis = new FileInputStream(filePath);
Workbook wb = WorkbookFactory.create(fis);
Sheet sh = wb.getSheet(sheetName);
Row row = sh.getRow(rowNum);
String data = row.getCell(colNum).getStringCellValue();
return data;
}
public int getRowCount(String sheetName) throws InvalidFormatException, IOException{
FileInputStream fis = new FileInputStream(filePath);
Workbook wb = WorkbookFactory.create(fis);
Sheet sh = wb.getSheet(sheetName);
int rowCount = sh.getLastRowNum()+1;
return rowCount;
}
public void setExcelData(String sheetName,int rowNum,int colNum,String data) throws InvalidFormatException, IOException{
FileInputStream fis = new FileInputStream(filePath);
Workbook wb = WorkbookFactory.create(fis);
Sheet sh = wb.getSheet(sheetName);
Row row = sh.getRow(rowNum);
Cell cel = row.createCell(colNum);
cel.setCellType(cel.CELL_TYPE_STRING);
cel.setCellValue(data);
FileOutputStream fos = new FileOutputStream(filePath);
wb.write(fos);
}
public int getcellCount(String sheetName,int rowNum) throws InvalidFormatException, IOException{
FileInputStream fis = new FileInputStream(filePath);
Workbook wb = WorkbookFactory.create(fis);
Sheet sh = wb.getSheet(sheetName);
Row row = sh.getRow(rowNum);
return row.getLastCellNum();
}
public class ExcelLIb {
public static String filePath;
public String getExcelData(String sheetName , String testID , String columnHeader) throws InvalidFormatException, IOException{
String userDir = System.getProperty("user.dir");
filePath = userDir+"\\testdata\\Test_Data.xlsx";
String data = null;
FileInputStream fis = new FileInputStream(filePath);
Workbook wb = WorkbookFactory.create(fis);
Sheet sh = wb.getSheet(sheetName);
int rowcount =getRowCount(sheetName);
for(int r=0 ; r<rowcount; r++){
Row row = sh.getRow(r);
if(row.getCell(0).getStringCellValue().toLowerCase().equals(testID.toLowerCase())){
int col = row.getLastCellNum();
for(int c=0; c<col ; c++){
if(row.getCell(c).getStringCellValue().toLowerCase().equals(columnHeader.toLowerCase())){
row = sh.getRow(r+1);
data = row.getCell(c).getStringCellValue();
break;
}
}
}
}
return data;
}

Writing a large resultset to an Excel file using POI

This is sort of inline w/ Writing a large ResultSet to a File but the file in question is an Excel file.
I'm using the Apache POI library to write an Excel file with a large data set retrieved from a ResultSet object. The data could range from a few thousand records to about 1 million; not sure how this translates into file system bytes in Excel format.
The following is a test code I wrote to check out the time taken to write such a large result set and also the performance implication w.r.t CPU & Memory.
protected void writeResultsetToExcelFile(ResultSet rs, int numSheets, String fileNameAndPath) throws Exception {
BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(fileNameAndPath));
int numColumns = rs.getMetaData().getColumnCount();
Workbook wb = ExcelFileUtil.createExcelWorkBook(true, numSheets);
Row heading = wb.getSheetAt(0).createRow(1);
ResultSetMetaData rsmd = rs.getMetaData();
for(int x = 0; x < numColumns; x++) {
Cell cell = heading.createCell(x+1);
cell.setCellValue(rsmd.getColumnLabel(x+1));
}
int rowNumber = 2;
int sheetNumber = 0;
while(rs.next()) {
if(rowNumber == 65001) {
log("Sheet " + sheetNumber + "written; moving onto to sheet " + (sheetNumber + 1));
sheetNumber++;
rowNumber = 2;
}
Row row = wb.getSheetAt(sheetNumber).createRow(rowNumber);
for(int y = 0; y < numColumns; y++) {
row.createCell(y+1).setCellValue(rs.getString(y+1));
wb.write(bos);
}
rowNumber++;
}
//wb.write(bos);
bos.close();
}
Not much luck with the above code. The file which is created seems to grow rapidly (~70Mb per sec). So I stopped the execution after about 10 minutes (killed the JVM when the file reaches 7Gb) and tried to open the file in Excel 2007. The moment I open it, the file size becomes 8k(!) and only the header and the first row are created. Not sure what I'm missing here.
Any ideas?
Using SXSSF poi 3.8
package example;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.util.CellReference;
import org.apache.poi.xssf.streaming.SXSSFSheet;
import org.apache.poi.xssf.streaming.SXSSFWorkbook;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
public class SXSSFexample {
public static void main(String[] args) throws Throwable {
FileInputStream inputStream = new FileInputStream("mytemplate.xlsx");
XSSFWorkbook wb_template = new XSSFWorkbook(inputStream);
inputStream.close();
SXSSFWorkbook wb = new SXSSFWorkbook(wb_template);
wb.setCompressTempFiles(true);
SXSSFSheet sh = (SXSSFSheet) wb.getSheetAt(0);
sh.setRandomAccessWindowSize(100);// keep 100 rows in memory, exceeding rows will be flushed to disk
for(int rownum = 4; rownum < 100000; rownum++){
Row row = sh.createRow(rownum);
for(int cellnum = 0; cellnum < 10; cellnum++){
Cell cell = row.createCell(cellnum);
String address = new CellReference(cell).formatAsString();
cell.setCellValue(address);
}
}
FileOutputStream out = new FileOutputStream("tempsxssf.xlsx");
wb.write(out);
out.close();
}
}
It requires:
poi-ooxml-3.8.jar,
poi-3.8.jar,
poi-ooxml-schemas-3.8.jar,
stax-api-1.0.1.jar,
xml-apis-1.0.b2.jar,
xmlbeans-2.3.0.jar,
commons-codec-1.5.jar,
dom4j-1.6.1.jar
Useful link
Oh. I think you're writing the workbook out 944,000 times. Your wb.write(bos) call is in the inner loop. I'm not sure this is quite consistent with the semantics of the Workbook class? From what I can tell in the Javadocs of that class, that method writes out the entire workbook to the output stream specified. And it's gonna write out every row you've added so far once for every row as the thing grows.
This explains why you're seeing exactly 1 row, too. The first workbook (with one row) to be written out to the file is all that is being displayed - and then 7GB of junk thereafter.
Unless you have to write formulas or formatting you should consider writing out a .csv file. Infinitely simpler, infinitely faster, and Excel will do the conversion to .xls or .xlsx automatically and correctly by definition.
You can using SXSSFWorkbook implementation of Workbook, if you use style in your excel ,You can caching style by Flyweight Pattern to improve your performance.
You can increase the performance of excel export by following these steps:
1) When you fetch data from database, avoid casting the result set to the list of entity classes. Instead assign it directly to List
List<Object[]> resultList =session.createSQLQuery("SELECT t1.employee_name, t1.employee_id ... from t_employee t1 ").list();
instead of
List<Employee> employeeList =session.createSQLQuery("SELECT t1.employee_name, t1.employee_id ... from t_employee t1 ").list();
2) Create excel workbook object using SXSSFWorkbook instead of XSSFWorkbook and create new row using SXSSFRow when the data is not empty.
3) Use java.util.Iterator to iterate the data list.
Iterator itr = resultList.iterator();
4) Write data into excel using column++.
int rowCount = 0;
int column = 0;
while(itr.hasNext()){
SXSSFRow row = xssfSheet.createRow(rowCount++);
Object[] object = (Object[]) itr.next();
//column 1
row.setCellValue(object[column++]); // write logic to create cell with required style in setCellValue method
//column 2
row.setCellValue(object[column++]);
itr.remove();
}
5) While iterating the list, write the data into excel sheet and remove the row from list using remove method. This is to avoid holding unwanted data from the list and clear the java heap size.
itr.remove();
For now I took #Gian's advice & limited the number of records per Workbook to 500k and rolled over the rest to the next Workbook. Seems to be working decent. For the above configuration, it took me about 10 mins per workbook.
I updated BigGridDemo to support multiple sheets.
BigExcelWriterImpl.java
package com.gdais.common.apache.poi.bigexcelwriter;
import static com.google.common.base.Preconditions.*;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import java.util.zip.ZipOutputStream;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import org.apache.commons.io.FilenameUtils;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import com.google.common.base.Function;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
public class BigExcelWriterImpl implements BigExcelWriter {
private static final String XML_ENCODING = "UTF-8";
#Nonnull
private final File outputFile;
#Nullable
private final File tempFileOutputDir;
#Nullable
private File templateFile = null;
#Nullable
private XSSFWorkbook workbook = null;
#Nonnull
private LinkedHashMap<String, XSSFSheet> addedSheets = new LinkedHashMap<String, XSSFSheet>();
#Nonnull
private Map<XSSFSheet, File> sheetTempFiles = new HashMap<XSSFSheet, File>();
BigExcelWriterImpl(#Nonnull File outputFile) {
this.outputFile = outputFile;
this.tempFileOutputDir = outputFile.getParentFile();
}
#Override
public BigExcelWriter createWorkbook() {
workbook = new XSSFWorkbook();
return this;
}
#Override
public BigExcelWriter addSheets(String... sheetNames) {
checkState(workbook != null, "workbook must be created before adding sheets");
for (String sheetName : sheetNames) {
XSSFSheet sheet = workbook.createSheet(sheetName);
addedSheets.put(sheetName, sheet);
}
return this;
}
#Override
public BigExcelWriter writeWorkbookTemplate() throws IOException {
checkState(workbook != null, "workbook must be created before writing template");
checkState(templateFile == null, "template file already written");
templateFile = File.createTempFile(FilenameUtils.removeExtension(outputFile.getName())
+ "-template", ".xlsx", tempFileOutputDir);
System.out.println(templateFile);
FileOutputStream os = new FileOutputStream(templateFile);
workbook.write(os);
os.close();
return this;
}
#Override
public SpreadsheetWriter createSpreadsheetWriter(String sheetName) throws IOException {
if (!addedSheets.containsKey(sheetName)) {
addSheets(sheetName);
}
return createSpreadsheetWriter(addedSheets.get(sheetName));
}
#Override
public SpreadsheetWriter createSpreadsheetWriter(XSSFSheet sheet) throws IOException {
checkState(!sheetTempFiles.containsKey(sheet), "writer already created for this sheet");
File tempSheetFile = File.createTempFile(
FilenameUtils.removeExtension(outputFile.getName())
+ "-sheet" + sheet.getSheetName(), ".xml", tempFileOutputDir);
Writer out = null;
try {
out = new OutputStreamWriter(new FileOutputStream(tempSheetFile), XML_ENCODING);
SpreadsheetWriter sw = new SpreadsheetWriterImpl(out);
sheetTempFiles.put(sheet, tempSheetFile);
return sw;
} catch (RuntimeException e) {
if (out != null) {
out.close();
}
throw e;
}
}
private static Function<XSSFSheet, String> getSheetName = new Function<XSSFSheet, String>() {
#Override
public String apply(XSSFSheet sheet) {
return sheet.getPackagePart().getPartName().getName().substring(1);
}
};
#Override
public File completeWorkbook() throws IOException {
FileOutputStream out = null;
try {
out = new FileOutputStream(outputFile);
ZipOutputStream zos = new ZipOutputStream(out);
Iterable<String> sheetEntries = Iterables.transform(sheetTempFiles.keySet(),
getSheetName);
System.out.println("Sheet Entries: " + sheetEntries);
copyTemplateMinusEntries(templateFile, zos, sheetEntries);
for (Map.Entry<XSSFSheet, File> entry : sheetTempFiles.entrySet()) {
XSSFSheet sheet = entry.getKey();
substituteSheet(entry.getValue(), getSheetName.apply(sheet), zos);
}
zos.close();
out.close();
return outputFile;
} finally {
if (out != null) {
out.close();
}
}
}
private static void copyTemplateMinusEntries(File templateFile,
ZipOutputStream zos, Iterable<String> entries) throws IOException {
ZipFile templateZip = new ZipFile(templateFile);
#SuppressWarnings("unchecked")
Enumeration<ZipEntry> en = (Enumeration<ZipEntry>) templateZip.entries();
while (en.hasMoreElements()) {
ZipEntry ze = en.nextElement();
if (!Iterables.contains(entries, ze.getName())) {
System.out.println("Adding template entry: " + ze.getName());
zos.putNextEntry(new ZipEntry(ze.getName()));
InputStream is = templateZip.getInputStream(ze);
copyStream(is, zos);
is.close();
}
}
}
private static void substituteSheet(File tmpfile, String entry,
ZipOutputStream zos)
throws IOException {
System.out.println("Adding sheet entry: " + entry);
zos.putNextEntry(new ZipEntry(entry));
InputStream is = new FileInputStream(tmpfile);
copyStream(is, zos);
is.close();
}
private static void copyStream(InputStream in, OutputStream out) throws IOException {
byte[] chunk = new byte[1024];
int count;
while ((count = in.read(chunk)) >= 0) {
out.write(chunk, 0, count);
}
}
#Override
public Workbook getWorkbook() {
return workbook;
}
#Override
public ImmutableList<XSSFSheet> getSheets() {
return ImmutableList.copyOf(addedSheets.values());
}
}
SpreadsheetWriterImpl.java
package com.gdais.common.apache.poi.bigexcelwriter;
import java.io.IOException;
import java.io.Writer;
import java.util.Calendar;
import org.apache.poi.ss.usermodel.DateUtil;
import org.apache.poi.ss.util.CellReference;
class SpreadsheetWriterImpl implements SpreadsheetWriter {
private static final String XML_ENCODING = "UTF-8";
private final Writer _out;
private int _rownum;
SpreadsheetWriterImpl(Writer out) {
_out = out;
}
#Override
public SpreadsheetWriter closeFile() throws IOException {
_out.close();
return this;
}
#Override
public SpreadsheetWriter beginSheet() throws IOException {
_out.write("<?xml version=\"1.0\" encoding=\""
+ XML_ENCODING
+ "\"?>"
+
"<worksheet xmlns=\"http://schemas.openxmlformats.org/spreadsheetml/2006/main\">");
_out.write("<sheetData>\n");
return this;
}
#Override
public SpreadsheetWriter endSheet() throws IOException {
_out.write("</sheetData>");
_out.write("</worksheet>");
closeFile();
return this;
}
/**
* Insert a new row
*
* #param rownum
* 0-based row number
*/
#Override
public SpreadsheetWriter insertRow(int rownum) throws IOException {
_out.write("<row r=\"" + (rownum + 1) + "\">\n");
this._rownum = rownum;
return this;
}
/**
* Insert row end marker
*/
#Override
public SpreadsheetWriter endRow() throws IOException {
_out.write("</row>\n");
return this;
}
#Override
public SpreadsheetWriter createCell(int columnIndex, String value, int styleIndex)
throws IOException {
String ref = new CellReference(_rownum, columnIndex).formatAsString();
_out.write("<c r=\"" + ref + "\" t=\"inlineStr\"");
if (styleIndex != -1) {
_out.write(" s=\"" + styleIndex + "\"");
}
_out.write(">");
_out.write("<is><t>" + value + "</t></is>");
_out.write("</c>");
return this;
}
#Override
public SpreadsheetWriter createCell(int columnIndex, String value) throws IOException {
createCell(columnIndex, value, -1);
return this;
}
#Override
public SpreadsheetWriter createCell(int columnIndex, double value, int styleIndex)
throws IOException {
String ref = new CellReference(_rownum, columnIndex).formatAsString();
_out.write("<c r=\"" + ref + "\" t=\"n\"");
if (styleIndex != -1) {
_out.write(" s=\"" + styleIndex + "\"");
}
_out.write(">");
_out.write("<v>" + value + "</v>");
_out.write("</c>");
return this;
}
#Override
public SpreadsheetWriter createCell(int columnIndex, double value) throws IOException {
createCell(columnIndex, value, -1);
return this;
}
#Override
public SpreadsheetWriter createCell(int columnIndex, Calendar value, int styleIndex)
throws IOException {
createCell(columnIndex, DateUtil.getExcelDate(value, false), styleIndex);
return this;
}
#Override
public SpreadsheetWriter createCell(int columnIndex, Calendar value)
throws IOException {
createCell(columnIndex, value, -1);
return this;
}
}

Categories

Resources