extracting contents from csv using java - java

I need a java program that would extract info from a .csv file and run commands on a sql bank.
I've made a program that is working great so far, but I've made it to work with .xsl by mistake and can't make it to work with a csv file
here is the code:
package teste3;
import java.io.File;
import java.io.IOException;
import jxl.Cell;
import jxl.Sheet;
import jxl.Workbook;
import jxl.read.biff.BiffException;
public class Teste3 {
private String inputFile;
String[][] data = null;
String[][] user = null;
public void setInputFile(String inputFile)
{
this.inputFile = inputFile;
}
public String[][] read() throws IOException
{
File inputWorkbook = new File(inputFile);
Workbook w;
try
{
w = Workbook.getWorkbook(inputWorkbook);
Sheet sheet = w.getSheet(0);
data = new String[sheet.getColumns()][sheet.getRows()];
user = new String[sheet.getColumns()][sheet.getRows()];
int j = 11;
int a = 0;
sheet.getColumns();
{
for (int i = 1; i < sheet.getRows(); i++)
{
Cell cell = sheet.getCell(j, i);
data[0][i] = cell.getContents();
System.out.println(cell.getContents());
}
}
j= 9;
for (int i = 1; i < sheet.getRows(); i++)
{
Cell cell = sheet.getCell(j, i);
user[0][i] = cell.getContents();
System.out.println(user[0] [i]);
}
}
catch (BiffException e)
{
}
return data;
}
public static void main(String[] args) throws IOException
{
Teste3 test = new Teste3();
test.setInputFile("C:\\Users\\a0046059\\Documents\\teste.xls");
test.read();
}
}
the program is really simple because of my lack of skills.
Can anyone help me ?
Thank you.

Related

Randomly select 10 words from a text file

I am attempting to create a program that reads two text files, which both contain thousands of words. I need to be able to select 10 words at random from each file and store them in an array of four string lists. I have created the following code so far, however this only selects one word from each file, not 10. How can this be done - preferably using an if statement?
import java.io.File;
import java.io.FileNotFoundException;
import java.util.ArrayList;
import java.util.Random;
import java.util.Scanner;
public class RandomWordGenerator {
public static void main(String[] args) throws FileNotFoundException {
public class RandomWordGenerator {
public static void main(String[] args) throws IOException {
Path outputFile = Paths.get("output.txt");
ArrayList<String> randomWords1 = randomWordsFromFile("textfile1.txt", 10);
ArrayList<String> randomWords2 = randomWordsFromFile("textfile2.txt", 10);
OutputStream outputStream = new BufferedOutputStream(Files.newOutputStream(outputFile, CREATE));
System.out.println(randomWords1);
System.out.println(randomWords2);
outputStream.flush();
for (int i = 0; i < randomWords1.size(); i++) {
outputStream.write(randomWords1.get(i).getBytes());
}
for (int i = 0; i < randomWords2.size(); i++) {
outputStream.write(randomWords2.get(i).getBytes());
}
outputStream.close();
}
private static ArrayList<String> randomFromFile(String fileName, int count) throws FileNotFoundException {
Scanner scanner = new Scanner(new File(fileName));
ArrayList<String> words = new ArrayList<>();
while (scanner.hasNext()) {
words.add(scanner.next());
}
return randomFromWords(words, count);
}
static private ArrayList<String> randomFromWords(ArrayList<String> words, int count) {
ArrayList<String> randomWords = new ArrayList<>();
for (int i = 0; i < count; ) {
int random = new Random().nextInt(words.size());
if (randomWords.add(words.get(random))) {
i++;
}
}
return randomWords;
}
}
Generate 10 random numbers using Math.random() and times them by the size of your array of the numbers (as they are a value between 0 and 1) and use these numbers to select items from your array. For example try looping this 10 times: Array[Math.random()*sizeOfArray]
import java.io.*;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Random;
import java.util.Scanner;
import static java.nio.file.StandardOpenOption.CREATE;
public class RandomWordGenerator {
public static void main(String[] args) throws IOException {
Path outputFile = Paths.get("output.txt");
ArrayList<String> randomWords1 = randomWordsFromFile("input1.txt", 10);
ArrayList<String> randomWords2 = randomWordsFromFile("input2.txt", 10);
OutputStream outputStream = new BufferedOutputStream(Files.newOutputStream(outputFile, CREATE));
System.out.println(randomWords1);
System.out.println(randomWords2);
outputStream.flush();
for (int i = 0; i < randomWords1.size(); i++) {
outputStream.write(randomWords1.get(i).getBytes());
}
for (int i = 0; i < randomWords2.size(); i++) {
outputStream.write(randomWords2.get(i).getBytes());
}
outputStream.close();
}
private static ArrayList<String> randomWordsFromFile(String fileName, int count) throws FileNotFoundException {
Scanner scanner = new Scanner(new File(fileName));
ArrayList<String> words = new ArrayList<>();
while (scanner.hasNext()) {
words.add(scanner.next());
}
return randomFromWords(words, count);
}
static private ArrayList<String> randomFromWords(ArrayList<String> words, int count) {
ArrayList<String> randomWords = new ArrayList<>();
for (int i = 0; i < count; ) {
int random = new Random().nextInt(words.size());
if (randomWords.add(words.get(random))) {
i++;
}
}
return randomWords;
}
}

Reading a 10 MB file in Apache POI

The project that I'm working on is trying to read a very large Excel file (a couple hundred columns and around 3000 rows) and recognize patterns in a series of letters. It works just fine on smaller files, but when I try to run it using this file I receive a java.lang.OutOfMemoryError: Java heap space error even when I'm only trying to analyze the first few rows. The error appears to be at Workbook wb = WorkbookFactory.create(new File(filepath));
I've tried a few of the solutions on this website, but have not come across any success. My code is as follows:
import java.awt.List;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import org.apache.poi.EncryptedDocumentException;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
import org.apache.poi.xssf.usermodel.XSSFCell;
import org.apache.poi.xssf.usermodel.XSSFRow;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
public class ExcelReader {
public int Reader(File file) throws IOException, EncryptedDocumentException, InvalidFormatException {
String filepath = file.getPath();
Workbook wb = WorkbookFactory.create(new File(filepath));
XSSFSheet sheet = (XSSFSheet) wb.getSheetAt(0);
XSSFRow row;
XSSFCell cell;
ArrayList<Integer> list = new ArrayList<Integer>();
int rows;
int cols = 0;
int temp = 0;
rows = sheet.getPhysicalNumberOfRows();
for (int i = 0; i <= 1; i++) {
row = sheet.getRow(i);
if (row != null) {
temp = sheet.getRow(i).getPhysicalNumberOfCells();
if (temp > cols)
cols = temp;
}
}
for (int r = 0; r <= 60; r++) {
row = sheet.getRow(r);
if (row != null) {
for (int c = 0; c <= cols; c++) {
int numblanks = 0;
cell = row.getCell((short) c);
if (cell != null) {
//System.out.print(cell + "\t\t");
} else {
//System.out.print("\t\t");
}
if (cell != null && cell.getCellType() == XSSFCell.CELL_TYPE_STRING) {
if ("N".equals(cell.getStringCellValue())) {
for (int k = c; k <= cols; k++) {
if ("-".equals(row.getCell(k).getStringCellValue())) {
numblanks++;
continue;
}
if ("S".equals(row.getCell(c + 2 + numblanks).getStringCellValue())
|| "T".equals(row.getCell(c + 2 + numblanks).getStringCellValue())) {
list.add((int) sheet.getRow(1).getCell(c).getNumericCellValue());
break;
}
}
}
}
}
System.out.println();
}
}
System.out.println();
System.out.println("Rows: " + rows);
System.out.println("Columns: " + cols);
System.out.println(list);
return temp;
}
}
Thank you for any help you can give me!
I solved this issue before. My case was to read a 23M Excel file which contains 230k rows.
Increase maximum heap size is not a good solution. Apache poi does not have a streaming mode to read data. This non-streaming mode costs too much memory.
My solution is to convert the data to xml and then use XMLReader to parse the data.
Please check the following sample code:
protected List<Entity> parseData(InputStream in) throws Exception {
OPCPackage pkg = OPCPackage.open(in);
XSSFReader r = new XSSFReader(pkg);
SharedStringsTable sst = r.getSharedStringsTable();
XMLReader parser = fetchSheetParser(sst);
XSSFReader.SheetIterator sheets = (XSSFReader.SheetIterator) r.getSheetsData();
while (sheets.hasNext()) {
InputStream sheet = sheets.next();
InputSource sheetSource = new InputSource(sheet);
parser.parse(sheetSource);
sheet.close();
break; // if only need to process one sheet.
}
return SheetHandler.getRawData();
}
private XMLReader fetchSheetParser(SharedStringsTable sst) throws SAXException {
XMLReader parser =
XMLReaderFactory.createXMLReader();
ContentHandler handler = new SheetHandler(sst);
parser.setContentHandler(handler);
return parser;
}
private static class SheetHandler extends DefaultHandler {
private SharedStringsTable sst;
private String lastContents;
private boolean nextIsString;
private boolean nextIsInlineString;
private boolean nextIsNull;
private SheetHandler(SharedStringsTable sst) {
this.sst = sst;
rawData = new ArrayList<Entity>();
}
public static List<Entity> getRawData() {
return rawData;
}
#Override
public void startElement(String uri, String localName, String name,
Attributes attributes) throws SAXException {
}
#Override
public void endElement(String uri, String localName, String name)
throws SAXException {
}
#Override
public void characters(char[] ch, int start, int length)
throws SAXException {
lastContents += new String(ch, start, length);
}
}
}

Unable to recognize OLE stream

I want to read xml file and I use jxl. But I get error jxl.read.biff.BiffException: Unable to recognize OLE stream.
When I search the internet , Everbody say that You should save as Excel 97-2003 Workbook.But My excel file is Excel 97-2003 .How can I solve this ?
import java.io.File;
import java.io.IOException;
import jxl.Cell;
import jxl.CellType;
import jxl.Sheet;
import jxl.Workbook;
import jxl.read.biff.BiffException;
public class deneme {
private String inputFile;
public void setInputFile(String inputFile) {
this.inputFile = inputFile;
}
public void read() throws IOException {
File inputWorkbook = new File(inputFile);
Workbook w;
try {
w = Workbook.getWorkbook(inputWorkbook);
// Get the first sheet
Sheet sheet = w.getSheet(0);
// Loop over first 10 column and lines
for (int j = 0; j < sheet.getColumns(); j++) {
for (int i = 0; i < sheet.getRows(); i++) {
Cell cell = sheet.getCell(j, i);
CellType type = cell.getType();
if (type == CellType.LABEL) {
System.out.println("I got a label "
+ cell.getContents());
}
if (type == CellType.NUMBER) {
System.out.println("I got a number "
+ cell.getContents());
}
}
}
} catch (BiffException e) {
e.printStackTrace();
}
}
public static void main(String[] args) throws IOException {
deneme test = new deneme();
test.setInputFile("c:/data.xls");
test.read();
}
}
Just go and save again by clicking on Save As and choose Excel 97-2003 WorkBook.
it work for me....
Just renaming it to .xls file does not work.
You will have to manually go to Save as -> Excel 97-2003 Workbook and save it.

reading multiple csv files and putting it in different sheets in excel not working

I am trying to read few csv files which are present inside a folder and putting the absolute paths inside a list and then looping to the list and inserting data into excel in different sheets but for some reason only last loop is overwriting everything.
package excel;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.util.ArrayList;
import java.util.List;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
public class FileReading {
public static void csvToXLS(String filename, String sheetno) {
try {
String csvFile = filename; //csv file address
String excelFile = "E:\\test.xls"; //xlsx file address
HSSFWorkbook workBook = new HSSFWorkbook();
HSSFSheet sheet = workBook.createSheet(sheetno);
String currentLine = null;
int RowNum = 0;
BufferedReader br = new BufferedReader(new FileReader(csvFile));
while ((currentLine = br.readLine()) != null) {
String str[] = currentLine.split(",");
HSSFRow currentRow = sheet.createRow(RowNum);
RowNum++;
for (int i = 0; i < str.length; i++) {
currentRow.createCell(i).setCellValue(str[i]);
}
}
FileOutputStream fileOutputStream = new FileOutputStream(excelFile);
workBook.write(fileOutputStream);
fileOutputStream.close();
System.out.println("Done");
} catch (Exception ex) {
System.out.println(ex.getMessage() + "Exception in try");
}
}
public static List listOfFiles() {
List<String> results = new ArrayList<String>();
File[] files = new File("f:\\csv").listFiles();
for (File file : files) {
if (file.isFile()) {
results.add(file.getAbsolutePath());
}
}
return results;
}
public static void main(String[] args) {
List list = listOfFiles();
int size = list.size();
for (int i = 0; i < size; i++) {
csvToXLS(list.get(i).toString(), "sheet" + i + 1);
}
}
}
You're re-creating and saving the Workbook every time you call csvToXLS, so only the last call survives, overwriting all others.
Create the Workbook only once, before you start your for loop, passing it into csvToXLS. then that method will create a new Sheet on an existing Workbook. Then after the for loop ends, you can write out and save the Workbook.
I did this thanks for the idea #rgettman
package excel;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.util.ArrayList;
import java.util.List;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
public class CsvToXlx {
public static void csvToXLSX(HSSFWorkbook workBook,String filename, String sheetno) {
try {
String csvFile = filename; //csv file address
String excelFile = "E:\\test.xls"; //xlsx file address
//HSSFWorkbook workBook = new HSSFWorkbook();
HSSFSheet sheet = workBook.createSheet(sheetno);
String currentLine = null;
int RowNum = 0;
BufferedReader br = new BufferedReader(new FileReader(csvFile));
while ((currentLine = br.readLine()) != null) {
String str[] = currentLine.split(",");
HSSFRow currentRow = sheet.createRow(RowNum);
RowNum++;
for (int i = 0; i < str.length; i++) {
currentRow.createCell(i).setCellValue(str[i]);
}
}
FileOutputStream fileOutputStream = new FileOutputStream(excelFile);
workBook.write(fileOutputStream);
fileOutputStream.close();
System.out.println("Done");
} catch (Exception ex) {
System.out.println(ex.getMessage() + "Exception in try");
}
}
public static List listOfFiles() {
List<String> results = new ArrayList<String>();
File[] files = new File("f:\\csv").listFiles();
for (File file : files) {
if (file.isFile()) {
results.add(file.getAbsolutePath());
}
}
return results;
}
public static void main(String[] args) {
List list = listOfFiles();
int size = list.size();
HSSFWorkbook workBook = new HSSFWorkbook();
for (int i = 0; i < size; i++) {
csvToXLSX(workBook,list.get(i).toString(), "sheet" + i + 1);
}
}
}

How to read values from an excel file using Apache POI

I am currently working on a project for my computer science class, and I was trying to figure out how to retrieve and print certain values from a file in excel. Such as, how would I go about printing the integer in column J, row 6?
Better yet, is there a way for me to return the row number of a string in column 1? Such as, if I had a string "Phone" in column 1, could I use a command to return the row number of the first instance of "phone"?
I have looked at other questions, none of which sufficiently answered my own.
Here you go refer to this class file for iterating over an excel file
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.text.ParseException;
import java.util.Iterator;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
public class Test {
private static final String FILE_NAME = "/users/developer/Documents/myFile.xlsx";
public void employeesUpload() {
String fName = "";
String lName = "";
String phoneNumber = "";
String email = "";
String gender = "";
String employeeCode = "";
try {
FileInputStream excelFile = new FileInputStream(new File(FILE_NAME));
Workbook workbook = new XSSFWorkbook(excelFile);
Sheet datatypeSheet = workbook.getSheetAt(0);
Iterator<Row> iterator = datatypeSheet.iterator();
int rowIndex = 0;
DataFormatter formatter = new DataFormatter();
while (iterator.hasNext()) {
Row currentRow = iterator.next();
if (rowIndex > 0) {
Iterator<Cell> cellIterator = currentRow.iterator();
employeeCode = fName = lName = phoneNumber = email = gender = "";
int cellIndex = 0;
while (cellIndex <= 5) {
Cell currentCell = currentRow.getCell(cellIndex);
if (cellIndex == 4) {
employeeCode = formatter.formatCellValue(currentCell).trim();
}
if (cellIndex == 1) {
fName = formatter.formatCellValue(currentCell).trim();
}
if (cellIndex == 2) {
lName = formatter.formatCellValue(currentCell).trim();
}
if (cellIndex == 0) {
email = formatter.formatCellValue(currentCell);
email = email.trim().toLowerCase();
}
if (cellIndex == 3) {
phoneNumber = formatter.formatCellValue(currentCell).trim();
}
cellIndex++;
}
Cell resultCell = currentRow.getCell(7);
if (resultCell == null) {
resultCell = currentRow.createCell(7);
}
Cell employementIdCell = currentRow.getCell(8);
if (employementIdCell == null) {
employementIdCell = currentRow.createCell(8);
}
if (true) {
resultCell.setCellType(Cell.CELL_TYPE_STRING);
employementIdCell.setCellValue("Success");
resultCell.setCellValue(email);
} else {
resultCell.setCellType(Cell.CELL_TYPE_STRING);
resultCell.setCellValue("Error");
}
}
rowIndex++;
}
FileOutputStream outputStream = new FileOutputStream(FILE_NAME);
workbook.write(outputStream);
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
public static void main(String[] args) throws ParseException, UnsupportedEncodingException {
Test employeesBulkUpload = new Test();
employeesBulkUpload.employeesUpload();
}
}
Hope this helps :)
user https://github.com/jueyue/easypoi this jar
use annotion to easy read excel
public class ExcelImportNewDateTest {
#Test
public void importTest() {
ImportParams params = new ImportParams();
params.setTitleRows(1);
params.setHeadRows(1);
long start = new Date().getTime();
List<NewDateEntity> list = ExcelImportUtil.importExcel(
new File(FileUtilTest.getWebRootPath("import/ExcelNewDateTest.xlsx")), NewDateEntity.class, params);
System.out.println(new Date().getTime() - start);
Assert.assertEquals(list.size(), 100);
System.out.println(list.size());
System.out.println(ReflectionToStringBuilder.toString(list.get(1)));
}
}

Categories

Resources