public class DataMiner {
private static BigData app = new BigData();
private static DomainOfConstants doc = new DomainOfConstants();
private static Logger log = Logger.getLogger(DataMiner.class);
private static DBManager conn = new DBManager();
private static java.sql.Connection con = null;
private static AmazonS3 s3Client;
private static Iterator<String> itr;
private static List<String> entries = new ArrayList<String>();
private static S3Object s3Object;
private static ObjectMetadata meta;
public static InputStream dataStream;
public static byte[] buffer = new byte[1024];
public static File file = new File(app.getCurrentPacsId()+".txt");
private static void obtainConnection(){
conn.connection();
entries = conn.grabDataSet();
conn.closeDb();
downloadBucket();
}
/*
*
* The Java heap size limits for Windows are:
* maximum possible heap size on 32-bit Java: 1.8 GB
* recommended heap size limit on 32-bit Java: 1.5 GB (or 1.8 GB with /3GB option)
*
* */
/*-------------Download and un-zip backup file-------------*/
private static void downloadBucket(){
try {
app.setAwsCredentials(doc.getAccessKey(), doc.getSecretKey());
s3Client = AmazonS3ClientBuilder.standard().withCredentials(new AWSStaticCredentialsProvider(app.getAwsCredentials())).withRegion(Regions.US_EAST_1).build();
System.out.println("Connected to S3");
itr = entries.iterator();
while(itr.hasNext()){
app.setBucketKey(itr.next());
String key = app.getBucketKey();
app.setCurrentPacsId(key);
s3Object = s3Client.getObject(new GetObjectRequest(doc.getDesiredBucket(), app.getBucketKey()));
try {
ZipInputStream zis = new ZipInputStream(s3Object.getObjectContent());
ZipEntry entry = zis.getNextEntry();
extractObjects(buffer, s3Client, zis, entry);
} catch (AmazonServiceException e) {
log.error(e);
} catch (SdkClientException e) {
log.error(e);
} catch (IOException e) {
log.error(e);
}
}
System.out.println("Processing complete");
} catch (IllegalArgumentException e) {
e.printStackTrace();
}
}
public static void extractObjects(byte[] buffer, AmazonS3 s3Client, ZipInputStream zis, ZipEntry entry) throws IOException {
PipedOutputStream outputStream = null;
PipedInputStream is = null;
try {
while (entry != null)
{
String fileName = entry.getName();
if (fileName == "lib") {
fileName = entry.getName();
}
boolean containsBackup = fileName.contains(doc.getDesiredFile());
if (containsBackup == true) {
System.out.println("A back up file was found");
long start = System.currentTimeMillis();
formatSchemaName();
System.out.println("Extracting :" + app.getCurrentPacsId());
log.info("Extracting " + app.getCurrentPacsId() + ",
compressed: " + entry.getCompressedSize() + " bytes,
extracted: " +
entry.getSize() + " bytes");
//ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
outputStream = new PipedOutputStream();
is = new PipedInputStream(outputStream);
int len;
while ((len = zis.read(buffer)) >= 0)
{
outputStream.write(buffer, 0, len);
}
//InputStream is = new ByteArrayInputStream(outputStream.toByteArray());
meta = new ObjectMetadata();
meta.setContentLength(file.length());
fileName = app.getCurrentPacsId();
runDataConversion(is,s3Client,fileName);
recordTime(start);
is.close();
outputStream.close();
System.out.println("Unzip complete");
}
else{
System.out.println("No back up found");
}
entry = zis.getNextEntry();
}
zis.closeEntry();
zis.close();
} catch (AmazonServiceException e) {
log.error(e);
} catch (SdkClientException e) {
log.error(e);
}
}
/*------------Formating the replacment file name---------*/
private static void formatSchemaName(){
String s3Key = app.getCurrentPacsId();
String id = s3Key.replace(".zip", ".txt");
id = id.substring(id.indexOf("_"));
id = id.replaceFirst("_", "");
app.setCurrentPacsId(id);
}
/*---------------Process the data file----------------------*/
private static void runDataConversion(PipedInputStream is, AmazonS3 s3Client, String fileName) {
DataProcessor convert = new DataProcessor(s3Client);
convert.downloadBucket(is,fileName);
}
/*-------Records execution time of program in min/sec------*/
private static void recordTime(long start) throws IOException {
long end = System.currentTimeMillis();
long minutes = TimeUnit.MILLISECONDS.toMinutes(end - start);
long seconds = TimeUnit.MILLISECONDS.toSeconds(end - start);
System.out.println("Execution speed "+ minutes + ":" + (seconds % 60) +" min/sec\n");
}
And here is the class that does some text file processing.The code is very slow overall when processing files up to 3.5gb. It takes 3 hours to do so while running. I have tried using piped streams over byte streams. Java heap size set to -xms2800m on a 64 bit JDK.
public class DataProcessor {
private static AmazonS3 s3Client;
private static ObjectMetadata meta;
private static DomainOfConstants doc = new DomainOfConstants();
private static BigData app = new BigData();
public static File file = new File(app.getCurrentPacsId()+".txt");
private static Logger log = Logger.getLogger(DataProcessor.class);
//Construct connection
public DataProcessor (AmazonS3 s3Client){
this.s3Client = s3Client;
}
//
public void downloadBucket(PipedInputStream is, String fileName) {
try {
File dataStream = dataConversion(is);
s3Client.putObject(doc.getDestinationBucket(),FilenameUtils.getFullPath(doc.getDestinationKey()) + "Modified_"+ fileName, dataStream);
} catch (AmazonServiceException e) {
e.printStackTrace();
log.error(e);
} catch (SdkClientException e) {
e.printStackTrace();
log.error(e);
}
}
//Setup reading and writing streams
public static File dataConversion(PipedInputStream stream) {
BufferedReader reader = null;
BufferedOutputStream streamOut = null;
String line;
try {
reader = new BufferedReader(new InputStreamReader(stream,doc.getFileFormat()));
streamOut = new BufferedOutputStream(new FileOutputStream(file));
meta = new ObjectMetadata();
while(( line = reader.readLine() ) != null)
{
processLine(reader, streamOut, line);
}
}
catch (IOException e) {
e.printStackTrace();
} finally {
try {
streamOut.close();
reader.close();
} catch (IOException e) {
e.printStackTrace();
log.error(e);
}
}
return file;
}
/*---------------------------------------Data processing------------------------------------------------*/
/*-----------Process and print lines---------*/
private static void processLine(BufferedReader reader, BufferedOutputStream streamOut, String line) {
try {
String newLine = System.getProperty("line.separator");
while (reader.ready()) {
if (line.contains(doc.getInsert())) {
handleData(streamOut, line);
} else if (line.contains(doc.getUse())) {
handleSchemaName(streamOut, line);
} else {
streamOut.write(line.toLowerCase().getBytes(Charset.forName(doc.getFileFormat()).toString()));
streamOut.write(newLine.getBytes());
}
line = reader.readLine();
}
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
log.error(e);
} catch (IOException e) {
e.printStackTrace();
log.error(e);
}
}
/*-----------Replace-Schema-Name-----------*/
private static void handleSchemaName(BufferedOutputStream streamOut, String line) throws IOException {
line = line.replace(line, "USE " + "`" + doc.getSchemaName() + app.getCurrentPacsId() + "`;");
streamOut.write(line.getBytes(Charset.forName(doc.getFileFormat())));
}
/*--------Avoid-Formating-Data-Portion-of-file--------*/
private static void handleData(BufferedOutputStream streamOut, String line) throws IOException {
StringTokenizer tk = new StringTokenizer(line);
while (tk.hasMoreTokens()) {
String data = tk.nextToken();
if (data.equals(doc.getValue())) {
streamOut.write(data.toLowerCase().getBytes(Charset.forName(doc.getFileFormat()).toString()));
data = tk.nextToken();
while (tk.hasMoreTokens()) {
streamOut.write(data.getBytes(Charset.forName(doc.getFileFormat())));
data = tk.nextToken();
}
}
streamOut.write(line.toLowerCase().getBytes(Charset.forName(doc.getFileFormat().toString())));
streamOut.write(" ".getBytes(Charset.forName(doc.getFileFormat())));
}
}
Rule 1 is always to use a bigger buffer. 1024 is pitifully small. Try 32-64K.
You need to start the pipe reading thread before doing any writes to the pipe. In fact I'm surprised you don't get 'read end dead' errors. Does this code really work at all?
In fact get rid of the piped streams. Use a single thread and do all the processing as you go.
Get rid of the ready() test. It is an extra system call for nothing. Just read until end of stream.
Use a BufferedWriter instead of a BufferedOutputStream and stop converting all those strings to bytes (and use BufferedWriter.newLine() instead of the system property).
Related
i need to achieve the task as below:-
1). File upload on primary location:-
I want to read from a file and write it to primary location(remote file server).
2). File upload on multiple secondary locations:-
Same time while writing to primary location is running, parallelly
I want to read some chunks of bytes from primary location file
and writing it to multiple secondary location.
I have tried a below program for above approach:-
BufferedInputStream bin = null;
ReadableByteChannel channel = null;
int bufferSize = 1048576;
int readBufferSize = 1024*4;
java.nio.ByteBuffer byteBuffer = java.nio.ByteBuffer.allocate(readBufferSize);
InputStream is = new FileInputStream(new File("D:\\Harisingh\\300MB.txt"));
bin = new BufferedInputStream(is,bufferSize);
channel = Channels.newChannel(bin);
int retryCnt = 0;
ByteArrayOutputStream baOS = new ByteArrayOutputStream(bufferSize);
int totalBytes=0;
int itrCount=0;
int maxIterateCnt = 1;
int len;
//primary location writing
SmbFile smbFile = new SmbFile("smb://user:Password#fileserver1ind1.hqdev.india/data/Harisingh/collab_4_1_4/primary.txt");
BufferedOutputStream bFout = new BufferedOutputStream(new SmbFileOutputStream(smbFile));
SmbFileInputStream fis = new SmbFileInputStream("smb://user:Password#fileserver1ind1.hqdev.india/data/Harisingh/collab_4_1_4/primary.txt");
BufferedInputStream binPrimary = new BufferedInputStream(fis);
SmbFileOutputStream secLocation1= new SmbFileOutputStream(new SmbFile("smb://user:Password#fileserver1ind1.hqdev.india/data/Harisingh/collab_4_1_4/Secondary1.txt"));
SmbFileOutputStream secLocation2 = new SmbFileOutputStream(new SmbFile("smb://user:Password#fileserver1ind1.hqdev.india/data/Harisingh/collab_4_1_4/Secondary2.txt"));
SmbFileOutputStream secLocation3 = new SmbFileOutputStream(new SmbFile("smb://user:Password#fileserver1ind1.hqdev.india/data/Harisingh/Secondary/Secondary3.txt"));
try {
if(bufferSize > readBufferSize){
maxIterateCnt = bufferSize/readBufferSize;
}
while((len=channel.read(byteBuffer))>=0)
{
itrCount++;
totalBytes+=len;
baOS.write(byteBuffer.array(),0,len);
if(itrCount>=maxIterateCnt)
{
//primary location writing
try{
bFout.write(baOS.toByteArray(),0,totalBytes);
}catch(Exception se)
{
}
// secondary location writing
new Thread(){
public void run(){
System.out.println("Thread Running");
try {
int count;
byte[] readByteArray = new byte[1024*4];
while ((count = binPrimary.read(readByteArray)) != -1)
{
secLocation1.write(readByteArray, 0, count);
secLocation2.write(readByteArray, 0, count);
secLocation3.write(readByteArray, 0, count);
readByteArray = new byte[1024*4];
count= 0;
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}.start();
totalBytes=0;
baOS.reset();
itrCount=0;
}
byteBuffer.clear();
}
//primary location writing
try{
bFout.write(baOS.toByteArray(),0,totalBytes);
}catch(Exception se)
{
}
bFout.flush();
bFout.close();
int count;
// secondary location writing
new Thread(){
public void run(){
System.out.println("Thread Running");
try {
int count;
byte[] readByteArray = new byte[1024*4];
while ((count = binPrimary.read(readByteArray)) != -1)
{
secLocation1.write(readByteArray, 0, count);
secLocation2.write(readByteArray, 0, count);
secLocation3.write(readByteArray, 0, count);
readByteArray = new byte[1024*4];
count= 0;
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}.start();
Now with above program, it writes a file to primary location by main thread and secondary location writing is running in separate thread but i am facing the problem of some bytes writing missing on some secondary locations due to multi threading.
FYI
This question is related to io stream only. It is not specific to JCIFS so you can use same program with simple io stream, don't require smb io stream.
Can you please help me to sort out this?
Here is an example which I do not encourage to use "as is" - it's intention is to act as Proof Of Concept. In example the primary process is done first to achieve best performance for this phase. Then the secondaries are done each in own Thread parallel.
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.UnknownHostException;
import jcifs.smb.NtlmPasswordAuthentication;
import jcifs.smb.SmbException;
import jcifs.smb.SmbFile;
import jcifs.smb.SmbFileInputStream;
import jcifs.smb.SmbFileOutputStream;
public class testSmb {
static boolean append = true;
static int threadCount = 0;
static int bufferSize = 2048;
static NtlmPasswordAuthentication auth;
static File localFile;
static SmbFile primarySmbFile;
static BufferedInputStream input;
static SmbFileOutputStream output;
static SmbFile secondary1SmbFile;
static BufferedInputStream sec1Input;
static SmbFileOutputStream sec1Output;
static SmbFile secondary2SmbFile;
static BufferedInputStream sec2Input;
static SmbFileOutputStream sec2Output;
static SmbFile secondary3SmbFile;
static BufferedInputStream sec3Input;
static SmbFileOutputStream sec3Output;
public static Object lock = new Object();
public static void main(String... args) throws IOException {
System.out.println("Main thread Started");
init();
write(input, output);
writeInThread(sec1Input, sec1Output);
writeInThread(sec2Input, sec2Output);
writeInThread(sec3Input, sec3Output);
System.out.println("Main thread Finished");
}
public static void init() throws MalformedURLException,
FileNotFoundException, SmbException, UnknownHostException {
localFile = new File("c:\\temp\\myFile.txt");
if (localFile.length() > 20971520l) {
bufferSize = 131072;
}
String server = "myServer";
String username = "myUser";
String password = "myPass";
String path = "myPath";
auth = new NtlmPasswordAuthentication(server, username, password);
input = new BufferedInputStream(new FileInputStream(localFile));
primarySmbFile = new SmbFile("smb://" + server + "/" + path
+ "/primary.txt", auth, SmbFile.FILE_SHARE_READ
| SmbFile.FILE_SHARE_WRITE | SmbFile.FILE_SHARE_DELETE);
output = new SmbFileOutputStream(primarySmbFile, append);
if (!primarySmbFile.exists()) {
primarySmbFile.createNewFile();
}
sec1Input = new BufferedInputStream(new SmbFileInputStream(new SmbFile(
primarySmbFile, primarySmbFile.getName())));
secondary1SmbFile = new SmbFile("smb://" + server + "/" + path
+ "/secondary1.txt", auth, SmbFile.FILE_SHARE_READ
| SmbFile.FILE_SHARE_WRITE | SmbFile.FILE_SHARE_DELETE);
sec1Output = new SmbFileOutputStream(secondary1SmbFile, append);
if (!secondary1SmbFile.exists()) {
secondary1SmbFile.createNewFile();
}
sec2Input = new BufferedInputStream(new SmbFileInputStream(new SmbFile(
primarySmbFile, primarySmbFile.getName())));
secondary2SmbFile = new SmbFile("smb://" + server + "/" + path
+ "/secondary2.txt", auth, SmbFile.FILE_SHARE_READ
| SmbFile.FILE_SHARE_WRITE | SmbFile.FILE_SHARE_DELETE);
sec2Output = new SmbFileOutputStream(secondary2SmbFile, append);
if (!secondary2SmbFile.exists()) {
secondary2SmbFile.createNewFile();
}
sec3Input = new BufferedInputStream(new SmbFileInputStream(new SmbFile(
primarySmbFile, primarySmbFile.getName())));
secondary3SmbFile = new SmbFile("smb://" + server + "/" + path
+ "/secondary3.txt", auth, SmbFile.FILE_SHARE_READ
| SmbFile.FILE_SHARE_WRITE | SmbFile.FILE_SHARE_DELETE);
sec3Output = new SmbFileOutputStream(secondary3SmbFile, append);
if (!secondary3SmbFile.exists()) {
secondary3SmbFile.createNewFile();
}
}
public static void write(BufferedInputStream bufferedInputStream,
SmbFileOutputStream smbFileOutputStream) throws IOException {
byte[] buffer = new byte[bufferSize];
int len = 0;
try {
while ((len = bufferedInputStream.read(buffer)) > 0) {
synchronized (lock) {
System.out.println("'" + Thread.currentThread().getName()
+ "' writing " + bufferSize + "bytes");
smbFileOutputStream.write(buffer, 0, len);
smbFileOutputStream.flush();
}
}
} catch (IOException e) {
throw e;
} finally {
try {
bufferedInputStream.close();
} catch (Exception e) {
}
try {
smbFileOutputStream.flush();
smbFileOutputStream.close();
} catch (Exception e) {
}
}
}
public static void writeInThread(
final BufferedInputStream bufferedInputStream,
final SmbFileOutputStream smbFileOutputStream) {
threadCount++;
new Thread("Secondary thread " + threadCount) {
public void run() {
System.out.println(Thread.currentThread().getName()
+ ": started");
try {
write(bufferedInputStream, smbFileOutputStream);
} catch (Exception e) {
e.printStackTrace();
}
System.out.println(Thread.currentThread().getName()
+ ": finished");
}
}.start();
}
}
I'm sending a http request to get binary files (here i'm trying an image)
public class Main {
public static void main(String[] args) throws UnknownHostException,
IOException {
new Main(args);
}
public Main(String[] args) throws UnknownHostException, IOException {
lance(args);
}
private void lance(String[] args) throws UnknownHostException, IOException {
Lanceur lan = new Lanceur("www.cril.univ-artois.fr", "/IMG/arton451.jpg");
lan.requete();
}
}
public class Lanceur {
Socket s;
InputStream readStream;
OutputStream writeStream;
String host;
String ressource;
public Lanceur(String host, String ressource) throws UnknownHostException,
IOException {
s = new Socket(InetAddress.getByName(host), 80);
readStream = s.getInputStream();
writeStream = s.getOutputStream();
this.host = host;
this.ressource = ressource;
}
public void requete() throws IOException {
// String[] length = null;
writeStream.write(new String("GET " + ressource + " HTTP/1.1\r\n"
+ "Host: www.google.com\r\n" + "\r\n").getBytes());
writeStream.flush();
AnswerReader as = new AnswerReader(readStream);
as.read();
as.writeFile(this.ressource);
s.close();
}
}
public class AnswerReader {
BufferedReader br;
DataInputStream dis;
String status;
Map<String, String> attrs;
byte[] content;
public AnswerReader(InputStream is) {
br = new BufferedReader(new InputStreamReader(is));
dis = new DataInputStream(new BufferedInputStream(is));
}
public void read() throws NumberFormatException {
readStatus();
try {
readAttrs();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
String contentL = attrs.get("Content-Length");
readContent(Integer.valueOf(contentL));
}
public void readStatus() {
try {
status = br.readLine();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public void readAttrs() throws IOException {
attrs = new HashMap<String, String>();
String line;
for (line = br.readLine(); line.length() > 0; line = br.readLine()) {
int index = line.indexOf(':');
attrs.put(line.substring(0, index), line.substring(index + 2));
}
}
private void readContent(int size) {
this.content = new byte[size];
byte[] buff = new byte[1024];
int copied = 0;
int read = 0;
while (copied < size) {
try {
read = dis.read(buff);
if (read == -1)
break;
} catch (IOException e) {
e.printStackTrace();
}
// byte[] byteArray = new String(buff).getBytes();
System.arraycopy(buff, 0, content, copied, read);
copied += read;
}
System.out.println(copied + "///" + size);
}
public void writeFile(String name) throws IOException {
String tab[] = name.split("/");
String filename = tab[tab.length - 1];
FileOutputStream fos = new FileOutputStream("./" + filename);
fos.write(content);
fos.flush();
fos.close();
}
}
The problem comes from readContent(). The content-length is fine, but it doesn't read all the data. From this example it will reads that :
22325///38125
The BufferedReader is buffering some of the binary data. You'll have to find another way to read the header lines using the unbuffered input stream instead, e.g. DataInputStream.readLine(), deprecation or no.
But you should use HttpURLConnection. It's easier.
I EDITED CODE
I'm making file transfer program with java
I have to send 21 files.
my code stops at Client's while loop in run()
(It doesn't print "file receive complete") <- see run() in Client
Server's CODE
class SendFileThread extends Thread {
private ServerSocket fileTransferServerSocket;
private Socket fileTransferSocket;
private BufferedReader requestReader;
private PrintWriter requestAnswerer;
private BufferedOutputStream fileWriter;
private int fileTransferPort = 12345;
public SendFileThread() {
try {
fileTransferServerSocket = new ServerSocket(fileTransferPort);
fileTransferSocket = fileTransferServerSocket.accept();
requestReader = new BufferedReader(new InputStreamReader(fileTransferSocket.getInputStream()));
fileWriter = new BufferedOutputStream(fileTransferSocket.getOutputStream());
} catch (IOException e) {
e.printStackTrace();
}
}
public void CloseTransferStream() {
try {
requestAnswerer.close();
requestReader.close();
fileWriter.close();
fileTransferSocket.close();
fileTransferServerSocket.close();
}
catch (IOException ioe) {
ioe.printStackTrace();
}
}
public void SendFile(String filename) {
try {
File file = new File(CLIENT_PATH + "/" + filename);
BufferedInputStream fileReader = new BufferedInputStream(new FileInputStream(file));
int packet;
while((packet = fileReader.read()) != -1)
fileWriter.write(packet);
fileWriter.flush();
fileReader.close();
}
catch (IOException ioe) {
System.out.println(ioe.getMessage());
}
//System.out.print(filename + " send complete (" + count + " times)");
}
public void ListenForRequester() {
try {
String input;
while((input = requestReader.readLine()) != null) {
if(input.equals("request file")) {
SendFile(requestReader.readLine());
}
else if(input.equals("end transfer"))
break;
else {
System.out.println("Something wrong");
}
}
}
catch(IOException ioe) {
ioe.getStackTrace();
}
}
public void run() {
ListenForRequester();
CloseTransferStream();
}
}
Client's CODE
class ReceiveFileThread extends Thread {
private Socket fileTransferSocket;
private int fileTransferPort = 12345;
private BufferedInputStream fileReader;
private PrintWriter fileRequester;
public ReceiveFileThread() {
try {
fileTransferSocket = new Socket(serverIP, fileTransferPort);
fileRequester = new PrintWriter(fileTransferSocket.getOutputStream(), true);
fileReader = new BufferedInputStream(fileTransferSocket.getInputStream());
}
catch (UnknownHostException e) {
e.printStackTrace();
}
catch (IOException e) {
e.printStackTrace();
}
}
public void CloseTransferStream() {
try {
fileRequester.close();
fileReader.close();
fileTransferSocket.close();
}
catch (IOException e) {
e.printStackTrace();
}
}
public synchronized void RequestFile(String filename) {
fileRequester.println("request file");
fileRequester.println(filename);
}
public synchronized void SendEndMsg() {
fileRequester.println("end transfer");
}
public void run() {
for(int i = 0;i < fileList.size();i++) {
String filename = (String)fileList.get(i);
RequestFile(filename);
try {
BufferedOutputStream fileWriter = new BufferedOutputStream(new FileOutputStream(new File(PROGRAM_PATH + "/" + filename)));
int packet = 0;
while((packet = fileReader.read()) > -1)
fileWriter.write(packet);
System.out.println("file receive complete");
fileWriter.close();
}
catch (FileNotFoundException e) {
e.printStackTrace();
}
catch (IOException e) {
e.printStackTrace();
}
}
SendEndMsg();
CloseTransferStream();
}
}
It became 5 days that this error bothers me :(
Could anyone save me from this error?
Closing the socket's output stream will close the socket, In order to send multiple files you will have to make a couple of changes.
Server:
Before you start to send a file, send that file's length.
Client:
After you receive the file's length start to read that many bytes from the input stream and save them to a file, when you're done read the next file's length.
I have a requirement to change the encoding of a file from ANSI(windows-1252) to UTF8. I wrote below program to do it through java. This program converts the characters to UTF8, but when I opened the file in notepad++ the encoding type was displayed as ANSI as UTF8. This gives me error when I import this file in access db. A file with UTF8 encoding only is desired. Also the requirement is to convert the file without opening it in any editor.
public class ConvertFromAnsiToUtf8 {
private static final char BYTE_ORDER_MARK = '\uFEFF';
private static final String ANSI_CODE = "windows-1252";
private static final String UTF_CODE = "UTF8";
private static final Charset ANSI_CHARSET = Charset.forName(ANSI_CODE);
public static void main(String[] args) {
List<File> fileList;
File inputFolder = new File(args[0]);
if (!inputFolder.isDirectory()) {
return;
}
File parentDir = new File(inputFolder.getParent() + "\\"
+ inputFolder.getName() + "_converted");
if (parentDir.exists()) {
return;
}
if (parentDir.mkdir()) {
} else {
return;
}
fileList = new ArrayList<File>();
for (final File fileEntry : inputFolder.listFiles()) {
fileList.add(fileEntry);
}
InputStream in;
Reader reader = null;
Writer writer = null;
try {
for (File file : fileList) {
in = new FileInputStream(file.getAbsoluteFile());
reader = new InputStreamReader(in, ANSI_CHARSET);
OutputStream out = new FileOutputStream(
parentDir.getAbsoluteFile() + "\\"
+ file.getName());
writer = new OutputStreamWriter(out, UTF_CODE);
writer.write(BYTE_ORDER_MARK);
char[] buffer = new char[10];
int read;
while ((read = reader.read(buffer)) != -1) {
System.out.println(read);
writer.write(buffer, 0, read);
}
}
reader.close();
writer.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
Any pointers will be helpful.
Thanks,
Ashish
The posted code correctly transcodes from windows-1252 to UTF-8.
The Notepad++ message is confusing because "ANSI as UTF-8" has no obvious meaning; it appears to be an open defect in Notepad++. I believe Notepad++ means UTF-8 without BOM (see the encoding menu.)
Microsoft Access, being a Windows program, probably expects UTF-8 files to start with a byte-order-mark (BOM).
You can inject a BOM into the document by writing the code point U+FEFF at the start of the file:
import java.io.*;
import java.nio.charset.*;
public class Ansi1252ToUtf8 {
private static final char BYTE_ORDER_MARK = '\uFEFF';
public static void main(String[] args) throws IOException {
Charset windows1252 = Charset.forName("windows-1252");
try (InputStream in = new FileInputStream(args[0]);
Reader reader = new InputStreamReader(in, windows1252);
OutputStream out = new FileOutputStream(args[1]);
Writer writer = new OutputStreamWriter(out, StandardCharsets.UTF_8)) {
writer.write(BYTE_ORDER_MARK);
char[] buffer = new char[1024];
int read;
while ((read = reader.read(buffer)) != -1) {
writer.write(buffer, 0, read);
}
}
}
}
On Windows 7 (64-Bit), running Java 8, I had to close every file. Otherwise, files get truncated to multiples of 4 kB. It is not enough to close the last set of files, I had to close every file to get the desired result. Posting my adapted version that adds error messages:
import java.io.*;
import java.nio.charset.*;
import java.util.ArrayList;
public class ConvertFromAnsiToUtf8 {
private static final char BYTE_ORDER_MARK = '\uFEFF';
private static final String ANSI_CODE = "windows-1252";
private static final String UTF_CODE = "UTF8";
private static final Charset ANSI_CHARSET = Charset.forName(ANSI_CODE);
private static final String PATH_SEP = "\\";
private static final boolean WRITE_BOM = false;
public static void main(String[] args)
{
if (args.length != 2) {
System.out.println("Please name a source and a target directory");
return;
}
File inputFolder = new File(args[0]);
if (!inputFolder.isDirectory()) {
System.out.println("Input folder " + inputFolder + " does not exist");
return;
}
File outputFolder = new File(args[1]);
if (outputFolder.exists()) {
System.out.println("Folder " + outputFolder + " exists - aborting");
return;
}
if (outputFolder.mkdir()) {
System.out.println("Placing converted files in " + outputFolder);
} else {
System.out.println("Output folder " + outputFolder + " exists - aborting");
return;
}
ArrayList<File> fileList = new ArrayList<File>();
for (final File fileEntry : inputFolder.listFiles()) {
fileList.add(fileEntry);
}
InputStream in;
Reader reader = null;
Writer writer = null;
int converted = 0;
try {
for (File file : fileList) {
try {
in = new FileInputStream(file.getAbsoluteFile());
reader = new InputStreamReader(in, ANSI_CHARSET);
OutputStream out = new FileOutputStream(outputFolder.getAbsoluteFile() + PATH_SEP + file.getName());
writer = new OutputStreamWriter(out, UTF_CODE);
if (WRITE_BOM)
writer.write(BYTE_ORDER_MARK);
char[] buffer = new char[1024];
int read;
while ((read = reader.read(buffer)) != -1) {
writer.write(buffer, 0, read);
}
++converted;
} finally {
reader.close();
writer.close();
}
}
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
System.out.println(converted + " files converted");
}
}
I use JTextArea to show status of unzipping file.
But for a reason it does not display the appended text.
Can any one suggest a solution?
public class UnzipFile extends Thread{
private static FtpTabPanel panel;
private File archive,outputDir;
public UnzipFile(File archive, File outputDir, FtpTabPanel panel) {
UnzipFile.panel = panel;
this.archive = archive;
this.outputDir = outputDir;
}
#Override
public void run() {
super.run();
unzipArchive();
}
public void unzipArchive() {
try {
ZipFile zipfile = new ZipFile(archive);
for (Enumeration e = zipfile.entries(); e.hasMoreElements(); ) {
ZipEntry entry = (ZipEntry) e.nextElement();
unzipEntry(zipfile, entry, outputDir);
}
panel.statusTextArea.append(String.valueOf(System.currentTimeMillis()));
} catch (Exception e) {
OeExceptionDialog.show(e);
}
}
private void unzipEntry(ZipFile zipfile, final ZipEntry entry, File outputDir) {
if (entry.isDirectory()) {
createDir(new File(outputDir, entry.getName()));
return;
}
File outputFile = new File(outputDir, entry.getName());
if (!outputFile.getParentFile().exists()){
createDir(outputFile.getParentFile());
}
SwingUtilities.invokeLater(new Runnable() {
public void run() {
panel.statusTextArea.append("Extracting: " + entry + "\n");
}
});
try {
BufferedInputStream inputStream = new BufferedInputStream(zipfile.getInputStream(entry));
BufferedOutputStream outputStream = new BufferedOutputStream(new FileOutputStream(outputFile));
IOUtils.copy(inputStream, outputStream);
outputStream.close();
inputStream.close();
}catch (IOException io){
OeExceptionDialog.show(io);
}catch (NullPointerException n){
OeExceptionDialog.show(n);
}catch (ArithmeticException a){
OeExceptionDialog.show(a);
}
}
}
In the below code i use SwingWorkers but it unzip just one item from zip file and nothing appears in the jtextArea
public class UnzipWorkers extends SwingWorker<String,Void> {
private WebTextArea statusTextArea;
private File archive,outputDir;
public UnzipWorkers(WebTextArea statusTextArea,File archive,File outputDir) {
this.archive=archive;
this.outputDir=outputDir;
this.statusTextArea = statusTextArea;
}
#Override
protected String doInBackground() throws Exception {
statusTextArea.append(String.valueOf(System.currentTimeMillis()));
try {
ZipFile zipfile = new ZipFile(archive);
for (Enumeration e = zipfile.entries(); e.hasMoreElements(); ) {
ZipEntry entry = (ZipEntry) e.nextElement();
String status = unzipEntry(zipfile, entry, outputDir);
return status;
}
} catch (Exception e) {
OeExceptionDialog.show(e);
}
return null;
}
#Override
protected void done() {
super.done();
try {
statusTextArea.append( get() + "\n");
FileTreePanel.btnRefresh.doClick();
} catch (InterruptedException e) {
e.printStackTrace();
} catch (ExecutionException e) {
e.printStackTrace();
}
}
private String unzipEntry(ZipFile zipfile, final ZipEntry entry, File outputDir) {
String success = "Extracted failed: "+ entry + "\n";
if (entry.isDirectory()) {
createDir(new File(outputDir, entry.getName()));
}
File outputFile = new File(outputDir, entry.getName());
if (!outputFile.getParentFile().exists()){
createDir(outputFile.getParentFile());
}
try {
BufferedInputStream inputStream = new BufferedInputStream(zipfile.getInputStream(entry));
BufferedOutputStream outputStream = new BufferedOutputStream(new FileOutputStream(outputFile));
IOUtils.copy(inputStream, outputStream);
outputStream.close();
inputStream.close();
success="Extracted successfully: " + entry + "\n";
}catch (IOException io){
OeExceptionDialog.show(io);
}catch (NullPointerException n){
OeExceptionDialog.show(n);
}catch (ArithmeticException a){
OeExceptionDialog.show(a);
}
return success;
}
private void createDir(File dir) {
if (!dir.exists()) {
try {
dir.mkdirs();
} catch (RuntimeException re) {
OeExceptionDialog.show(re);
}
}
}
}
Judging by this line:
panel.statusTextArea.append(String.valueOf(System.currentTimeMillis()));
you are running your code on the EDT, otherwise you'd get an IllegalThreadAccess exception. So, in effect, your whole extraction procedure is done as the handling of a single event. Your requests to update the TextArea are just being pushed to the event queue and wait there until you have done "handling" the event that triggered the extraction code.
You must run your code on a dedicated thread (use SwingWorker).
Did you try to move the
panel.statusTextArea.append(String.valueOf(System.currentTimeMillis()));
inside of the loop?