Split binary file [closed] - java

Closed. This question needs details or clarity. It is not currently accepting answers.
Want to improve this question? Add details and clarify the problem by editing this post.
Closed 7 years ago.
Improve this question
The split method takes two arguments, name of file to split, and size of each split. Could you check if I'm on the write track? And the pseudocode on what to put in the for loop?
import java.io.*;
public class SplitFile {
public static void main(String[] args) throws IOException {
Split("testfile.pdf", 256);
}
public static Split(String filename, int splitSize) throws IOException {
int numberOfFiles = 0;
File file = new File(filename);
numberOfFiles = ((int) file.length() / splitSize) + 1;
for (; numberOfFiles >= 0; numberOfFiles--) {
DataInputStream in = new DataInputStream(new BufferedInputStream(
new FileInputStream(filename)));
DataOutputStream out = new DataOutputStream(
new BufferedOutputStream(new FileOutputStream(file))); //What do I put here?
}
}
}

Required changes
File object per output part, e.g.
Initialize data input stream outside the loop, not inside
Code
File original = new File(filename);
int numberOfFiles = ((int) original.length() / splitSize) + 1;
DataInputStream in =
new DataInputStream(new BufferedInputStream(new FileInputStream(filename)));
// <== just count through parts.
for (int i = 0; i < numberOfFiles; i++) {
File output = new File(String.format("%s-%d", filename, i));
// <== Part of file being output e.g. testfile.pdf-1, testfile.pdf-2
DataOutputStream out = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(output)));
}
For the actual writing...
read bytes from input stream using read() call
write bytes to output stream using write() call
Two approaches, either 1 byte at a time - easiest, but less efficient, or use a buffer, harder to code, but more efficient.
Buffered approach
long length = original.length();
DataInputStream in = new DataInputStream(new BufferedInputStream(new FileInputStream(filename)));
int pos = 0;
byte[] buffer = new byte[splitSize];
for (...) {
...
// make sure you deal with file not being exactly divisible,
// last chunk might be smaller
long remaining = length - pos;
in.read(buffer, pos, (int) Math.min(splitSize, remaining));
out.write(buffer, 0, (int) Math.min(splitSize, remaining));
pos += splitSize;
}
1 byte at a time.
for (...) {
...
for (int i = 0; i < splitSize && pos < length; i++) {
out.write(in.read());
pos++;
}
}

You can do it using the Java NIO API in the following way.
import java.io.IOException;
import java.nio.channels.FileChannel;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardOpenOption;
public final class SplitFile {
public static void main(String[] args) throws IOException {
split("testfile.pdf", 256);
}
private static void split(String filename, int splitSize) throws IOException {
int i = filename.lastIndexOf('.');
String basename = filename.substring(0, i);
String ext = filename.substring(i + 1);
Path inputPath = Paths.get(filename);
int numberOfFiles = (int) (Files.size(inputPath) / splitSize) + 1;
try (FileChannel inputChannel = FileChannel.open(inputPath, StandardOpenOption.READ)) {
for (int j = 0; j < numberOfFiles; j++) {
String outputFilename = String.format("%s-%04d.%s", basename, j + 1, ext);
Path outputPath = inputPath.getParent().resolve(outputFilename);
try (FileChannel outputChannel = FileChannel.open(outputPath, StandardOpenOption.CREATE, StandardOpenOption.WRITE)) {
inputChannel.transferTo(j * splitSize, splitSize, outputChannel);
}
}
}
}
}

Related

How to create 7z file with Password protection?

I am able to create a 7z file but want to create the file with a password, I tried with the set compression method but there is not an option to set the key, Please help me how I can create a password-protected 7Z file in Java.
public static void main(String args[]) throws FileNotFoundException, IOException {
SevenZOutputFile sevenZOutput = new SevenZOutputFile(new File("D:\\Test\\outFile.7z"));
File entryFile = new File("D:\\Test\\Test_20200210200232.dat");
SevenZArchiveEntry entry = sevenZOutput.createArchiveEntry(entryFile, entryFile.getName());
sevenZOutput.putArchiveEntry(entry);
FileInputStream in = new FileInputStream(entryFile);
int len;
byte buffer[] = new byte[8192];
int transferedMegaBytes2=0;
while ((len = in.read(buffer)) > 0) {
sevenZOutput.write(buffer, 0, len);
transferredBytes += len;
int transferedMegaBytes = (int) (transferredBytes / 1048576);
if(transferedMegaBytes>transferedMegaBytes2){
System.out.println("Transferred: " + transferedMegaBytes + " Megabytes.");
transferedMegaBytes2=transferedMegaBytes;
}
}
sevenZOutput.closeArchiveEntry();
sevenZOutput.setContentCompression(SevenZMethod.AES256SHA256);
sevenZOutput.close();
}
Apache Commons Compress does not support creating 7Z with passwords.
https://commons.apache.org/proper/commons-compress/limitations.html
I've tidied up your code, added try-with-resources & annotated a couple of problem-areas:
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import org.apache.commons.compress.archivers.sevenz.SevenZArchiveEntry;
import org.apache.commons.compress.archivers.sevenz.SevenZMethod;
import org.apache.commons.compress.archivers.sevenz.SevenZOutputFile;
public class Q66451111 {
private static final int KB = 1024;
private static final int MB = KB * KB;
public static void main(final String[] args) throws IOException {
final File entryFile = new File("D:\\Test\\Test_20200210200232.dat");
final File new7Z = new File("D:\\Test\\outFile.7z");
try ( final InputStream fin = new FileInputStream(entryFile);
final InputStream in = new BufferedInputStream(fin);
final SevenZOutputFile szof = new SevenZOutputFile (new7Z) )
{
final SevenZArchiveEntry entry = szof.createArchiveEntry(entryFile, entryFile.getName());
szof.putArchiveEntry(entry);
final byte buffer[] = new byte[8192];
int transferredBytes = 0;
int transferredBytesSincePrint = 0;
int len;
while ((len = in.read (buffer)) != -1 /* TODO Note: do NOT use '> 0' */) {
szof.write(buffer, 0, len);
transferredBytes += len;
transferredBytesSincePrint += len;
if (transferredBytesSincePrint > MB) {
transferredBytesSincePrint = 0;
System.out.println("Transferring.: " + ((double) transferredBytes / MB) + " Megabytes.");
}
}
System .out.println("Transferred..: " + ((double) transferredBytes / MB) + " Megabytes.");
szof.closeArchiveEntry();
szof.setContentCompression(SevenZMethod.AES256SHA256 /* FIXME Unsupported 7z Method!! */);
}
}
}

Testing disk performance: differs with and without using Java

I've been asked to measure current disk performance, as we are planning to replace local disk with network attached storage on our application servers. Since our applications which write data are written in Java, I thought I would measure the performance directly in Linux, and also using a simple Java test. However I'm getting significantly different results, particularly for reading data, using what appear to me to be similar tests. Directly in Linux I'm doing:
dd if=/dev/zero of=/data/cache/test bs=1048576 count=8192
dd if=/data/cache/test of=/dev/null bs=1048576 count=8192
My Java test looks like this:
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
public class TestDiskSpeed {
private byte[] oneMB = new byte[1024 * 1024];
public static void main(String[] args) throws IOException {
new TestDiskSpeed().execute(args);
}
private void execute(String[] args) throws IOException {
long size = Long.parseLong(args[1]);
testWriteSpeed(args[0], size);
testReadSpeed(args[0], size);
}
private void testWriteSpeed(String filePath, long size) throws IOException {
File file = new File(filePath);
BufferedOutputStream writer = null;
long start = System.currentTimeMillis();
try {
writer = new BufferedOutputStream(new FileOutputStream(file), 1024 * 1024);
for (int i = 0; i < size; i++) {
writer.write(oneMB);
}
writer.flush();
} finally {
if (writer != null) {
writer.close();
}
}
long elapsed = System.currentTimeMillis() - start;
String message = "Wrote " + size + "MB in " + elapsed + "ms at a speed of " + calculateSpeed(size, elapsed) + "MB/s";
System.out.println(message);
}
private void testReadSpeed(String filePath, long size) throws IOException {
File file = new File(filePath);
BufferedInputStream reader = null;
long start = System.currentTimeMillis();
try {
reader = new BufferedInputStream(new FileInputStream(file), 1024 * 1024);
for (int i = 0; i < size; i++) {
reader.read(oneMB);
}
} finally {
if (reader != null) {
reader.close();
}
}
long elapsed = System.currentTimeMillis() - start;
String message = "Read " + size + "MB in " + elapsed + "ms at a speed of " + calculateSpeed(size, elapsed) + "MB/s";
System.out.println(message);
}
private double calculateSpeed(long size, long elapsed) {
double seconds = ((double) elapsed) / 1000L;
double speed = ((double) size) / seconds;
return speed;
}
}
This is being invoked with "java TestDiskSpeed /data/cache/test 8192"
Both of these should be creating 8GB files of zeros, 1MB at a time, measuring the speed, and then reading it back and measuring again. Yet the speeds I'm consistently getting are:
Linux: write - ~650MB/s
Linux: read - ~4.2GB/s
Java: write - ~500MB/s
Java: read - ~1.9GB/s
Can anyone explain the large discrepancy?
When I run this using NIO on my system. Ubuntu 15.04 with an i7-3970X
public class Main {
static final int SIZE_GB = Integer.getInteger("sizeGB", 8);
static final int BLOCK_SIZE = 64 * 1024;
public static void main(String[] args) throws IOException {
ByteBuffer buffer = ByteBuffer.allocateDirect(BLOCK_SIZE);
File tmp = File.createTempFile("delete", "me");
tmp.deleteOnExit();
int blocks = (int) (((long) SIZE_GB << 30) / BLOCK_SIZE);
long start = System.nanoTime();
try (FileChannel fc = new FileOutputStream(tmp).getChannel()) {
for (int i = 0; i < blocks; i++) {
buffer.clear();
while (buffer.remaining() > 0)
fc.write(buffer);
}
}
long mid = System.nanoTime();
try (FileChannel fc = new FileInputStream(tmp).getChannel()) {
for (int i = 0; i < blocks; i++) {
buffer.clear();
while (buffer.remaining() > 0)
fc.read(buffer);
}
}
long end = System.nanoTime();
long size = tmp.length();
System.out.printf("Write speed %.1f GB/s, read Speed %.1f GB/s%n",
(double) size/(mid-start), (double) size/(end-mid));
}
}
prints
Write speed 3.8 GB/s, read Speed 6.8 GB/s
You may get better performance if you drop the BufferedXxxStream. It's not helping since you're doing 1Mb read/writes, and is cause extra memory copy of the data.
Better yet, you should be using the NIO classes instead of the regular IO classes.
try-finally
You should clean up your try-finally code.
// Original code
BufferedOutputStream writer = null;
try {
writer = new ...;
// use writer
} finally {
if (writer != null) {
writer.close();
}
}
// Cleaner code
BufferedOutputStream writer = new ...;
try {
// use writer
} finally {
writer.close();
}
// Even cleaner, using try-with-resources (since Java 7)
try (BufferedOutputStream writer = new ...) {
// use writer
}
To complement Peter's great answer, I am adding the code below. It compares head-to-head the performance of the good-old java.io with NIO. Unlike Peter, instead of just reading data into a direct buffer, I do a typical thing with it: transfer it into an on-heap byte array. This steals surprisingly little from the performance: where I was getting 7.5 GB/s with Peter's code, here I get 6.0 GB/s.
For the java.io approach I can't have a direct buffer, but instead I call the read method directly with my target on-heap byte array. Note that this array is smallish and has an awkward size of 555 bytes. Nevertheless I retrieve almost identical performance: 5.6 GB/s. The difference is so small that it would evaporate completely in normal usage, and even in this artificial scenario if I wasn't reading directly from the disk cache.
As a bonus I include at the bottom a method which can be used on Linux and Mac to purge the disk caches. You'll see a dramatic turn in performance if you decide to call it between the write and the read step.
public final class MeasureIOPerformance {
static final int SIZE_GB = Integer.getInteger("sizeGB", 8);
static final int BLOCK_SIZE = 64 * 1024;
static final int blocks = (int) (((long) SIZE_GB << 30) / BLOCK_SIZE);
static final byte[] acceptBuffer = new byte[555];
public static void main(String[] args) throws IOException {
for (int i = 0; i < 3; i++) {
measure(new ChannelRw());
measure(new StreamRw());
}
}
private static void measure(RW rw) throws IOException {
File file = File.createTempFile("delete", "me");
file.deleteOnExit();
System.out.println("Writing " + SIZE_GB + " GB " + " with " + rw);
long start = System.nanoTime();
rw.write(file);
long mid = System.nanoTime();
System.out.println("Reading " + SIZE_GB + " GB " + " with " + rw);
long checksum = rw.read(file);
long end = System.nanoTime();
long size = file.length();
System.out.printf("Write speed %.1f GB/s, read Speed %.1f GB/s%n",
(double) size/(mid-start), (double) size/(end-mid));
System.out.println(checksum);
file.delete();
}
interface RW {
void write(File f) throws IOException;
long read(File f) throws IOException;
}
static class ChannelRw implements RW {
final ByteBuffer directBuffer = ByteBuffer.allocateDirect(BLOCK_SIZE);
#Override public String toString() {
return "Channel";
}
#Override public void write(File f) throws IOException {
FileChannel fc = new FileOutputStream(f).getChannel();
try {
for (int i = 0; i < blocks; i++) {
directBuffer.clear();
while (directBuffer.remaining() > 0) {
fc.write(directBuffer);
}
}
} finally {
fc.close();
}
}
#Override public long read(File f) throws IOException {
ByteBuffer buffer = ByteBuffer.allocateDirect(BLOCK_SIZE);
FileChannel fc = new FileInputStream(f).getChannel();
long checksum = 0;
try {
for (int i = 0; i < blocks; i++) {
buffer.clear();
while (buffer.hasRemaining()) {
fc.read(buffer);
}
buffer.flip();
while (buffer.hasRemaining()) {
buffer.get(acceptBuffer, 0, Math.min(acceptBuffer.length, buffer.remaining()));
checksum += acceptBuffer[acceptBuffer[0]];
}
}
} finally {
fc.close();
}
return checksum;
}
}
static class StreamRw implements RW {
final byte[] buffer = new byte[BLOCK_SIZE];
#Override public String toString() {
return "Stream";
}
#Override public void write(File f) throws IOException {
FileOutputStream out = new FileOutputStream(f);
try {
for (int i = 0; i < blocks; i++) {
out.write(buffer);
}
} finally {
out.close();
}
}
#Override public long read(File f) throws IOException {
FileInputStream in = new FileInputStream(f);
long checksum = 0;
try {
for (int i = 0; i < blocks; i++) {
for (int remaining = acceptBuffer.length, read;
(read = in.read(buffer)) != -1 && (remaining -= read) > 0; )
{
in.read(acceptBuffer, acceptBuffer.length - remaining, remaining);
}
checksum += acceptBuffer[acceptBuffer[0]];
}
} finally {
in.close();
}
return checksum;
}
}
public static void purgeCache() throws IOException, InterruptedException {
if (System.getProperty("os.name").startsWith("Mac")) {
new ProcessBuilder("sudo", "purge")
// .inheritIO()
.start().waitFor();
} else {
new ProcessBuilder("sudo", "su", "-c", "echo 3 > /proc/sys/vm/drop_caches")
// .inheritIO()
.start().waitFor();
}
}
}

Splitting files into chunks with size bigger than 127

I'm trying to make a simplified HDFS (Hadoop Distributed File System) for a final project in a Distributed System course.
So, the first thing that I'm trying is to write a program which split an arbitrary file into blocks (chunks) of an arbitrary dimension.
I found this useful example, which code is:
package javabeat.net.io;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
/**
* Split File Example
*
* #author Krishna
*
*/
public class SplitFileExample {
private static String FILE_NAME = "TextFile.txt";
private static byte PART_SIZE = 5;
public static void main(String[] args) {
File inputFile = new File(FILE_NAME);
FileInputStream inputStream;
String newFileName;
FileOutputStream filePart;
int fileSize = (int) inputFile.length();
int nChunks = 0, read = 0, readLength = PART_SIZE;
byte[] byteChunkPart;
try {
inputStream = new FileInputStream(inputFile);
while (fileSize > 0) {
if (fileSize <= 5) {
readLength = fileSize;
}
byteChunkPart = new byte[readLength];
read = inputStream.read(byteChunkPart, 0, readLength);
fileSize -= read;
assert (read == byteChunkPart.length);
nChunks++;
newFileName = FILE_NAME + ".part"
+ Integer.toString(nChunks - 1);
filePart = new FileOutputStream(new File(newFileName));
filePart.write(byteChunkPart);
filePart.flush();
filePart.close();
byteChunkPart = null;
filePart = null;
}
inputStream.close();
} catch (IOException exception) {
exception.printStackTrace();
}
}
}
But I think that there is a big issue: the value of PART_SIZE cannot be greater than 127, otherwise an error: possible loss of precision will occur.
How can I solve without totally changing the code?
The problem is that PART_SIZE is a byte; its maximum value is therefore indeed 127.
The code you have at the moment however is full of problems; for one, incorrect resource handling etc.
Here is a version using java.nio.file:
private static final String FILENAME = "TextFile.txt";
private static final int PART_SIZE = xxx; // HERE
public static void main(final String... args)
throws IOException
{
final Path file = Paths.get(FILENAME).toRealPath();
final String filenameBase = file.getFileName().toString();
final byte[] buf = new byte[PART_SIZE];
int partNumber = 0;
Path part;
int bytesRead;
byte[] toWrite;
try (
final InputStream in = Files.newInputStream(file);
) {
while ((bytesRead = in.read(buf)) != -1) {
part = file.resolveSibling(filenameBase + ".part" + partNumber);
toWrite = bytesRead == PART_SIZE ? buf : Arrays.copyOf(buf, bytesRead);
Files.write(part, toWrite, StandardOpenOption.CREATE_NEW);
partNumber++;
}
}
}
List<PDDocument> Pages=new ArrayList<PDDocument>();
Document.load(filePath);
try {
Splitter splitter = new Splitter();
splitter.setSplitAtPage(NoOfPagesDocumentWillContain);
Pages = splitter.split(document);
}catch(Exception e)
{
l
e.getCause().printStackTrace();
}

Reading from one file using FileInputStream and writing to another file using FileOutputStream

I was trying to read from one file and write the bytes read to another file using the classes specified in the title.I successfully did it but while i was trying to try different things i came across a problem which i do not understand.
Here is the code
import java.io.*;
public class FileInputStreamDemo {
public static void main(String[] args)
throws Exception {
// TODO Auto-generated method stub
int size;
InputStream f = new
FileInputStream("G:/Eclipse Workspace/FileInputStream Demo/src/FileInputStreamDemo.java");
System.out.println("Total available bytes: " + (size = f.available()));
/*int n=size/40;
System.out.println("first " + n + " bytes of file one read() at a time");
for (int i=0;i<n;i++)
{
System.out.print((char) f.read());
}
System.out.println("\n Still available: " + f.available());
System.out.println("reading the next" + n + "with one read(b[])");
byte b[] = new byte[n]; */
/*for(int i=0;i<size;i++)
{
System.out.print((char) f.read());
}*/
OutputStream f1 = new
FileOutputStream("G:/Eclipse Workspace/FileInputStream Demo/test.txt");
for (int count = 0; count < size; count++) {
f1.write(f.read());
}
for (int i = 0; i < size; i++) {
System.out.print(f.read());
}
f.close();
f1.close();
}
}
The problem that i am talking about is that when i first read from the FileInputStream object f i.e f.read() and write it to the f1 i.e FileOutputStream object it goes on to do what it is meant to do ,but when i try to read it again it returns -1. why so ?
Use RandomAccessFile and seek(0) method to come back at the beginning.
RandomAccessFile file = new RandomAccessFile(new File("G:/Eclipse Workspace/FileInputStream Demo/src/FileInputStreamDemo.java"), "r");
Here is sample code:
import java.io.File;
import java.io.FileOutputStream;
import java.io.OutputStream;
import java.io.RandomAccessFile;
public class FileInputStreamDemo {
public static void main(String[] args) throws Exception {
long size;
File file = new File("D:/Workspace/JavaProject/src/com/test/FileInputStreamDemo.java");
RandomAccessFile f = new RandomAccessFile(file, "r");
System.out.println("Total available bytes: " + (size = file.length()));
OutputStream f1 = new FileOutputStream(new File(
"D:/Workspace/JavaProject/resources/test.txt"));
for (int count = 0; count < size; count++) {
f1.write(f.read());
}
f.seek(0);
for (int i = 0; i < size; i++) {
System.out.print((char)f.read());
}
f.close();
f1.close();
}
}

Java: How to Read numbers from a file using FileChannel [closed]

Closed. This question does not meet Stack Overflow guidelines. It is not currently accepting answers.
Questions asking for code must demonstrate a minimal understanding of the problem being solved. Include attempted solutions, why they didn't work, and the expected results. See also: Stack Overflow question checklist
Closed 9 years ago.
Improve this question
I have a large text file having thousands of numbers in it that are separated by spaces. I want to read each number using Java FileChannel.
I was able to read the file using the first method mentioned here: http://howtodoinjava.com/2013/05/01/3-ways-to-read-files-using-java-nio/ but i am not sure how i would read each number of two, three and four digits.
My Code:
public static void main(String args[]) {
try
{
String file_name="abc.txt";
RandomAccessFile input_file = new RandomAccessFile(file_name,"r");
FileInputStream in = new FileInputStream(file_name);
FileChannel ch = in.getChannel();
FileChannel inChannel = input_file.getChannel();
long file_size = inChannel.size();
ByteBuffer buffer = ByteBuffer.allocate((int) file_size);
Charset cs = Charset.forName("ASCII");
// inChannel.read(buffer);
int rd;
while ( (rd = ch.read( buffer )) != -1 )
{
buffer.rewind();
System.out.println("String read: ");
CharBuffer chbuf = cs.decode(buffer);
for ( int i = 0; i < chbuf.length(); i++ )
{
System.out.print(chbuf.get());
}
buffer.clear();
inChannel.close();
input_file.close();
}
}
catch (IOException exc){}
}
}
I've modified your code to isolate each number separately (it didn't need much modification):
import java.io.FileInputStream;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.IntBuffer;
import java.nio.channels.FileChannel;
import java.nio.charset.Charset;
import java.util.ArrayList;
public class ReadFile {
public static void main(String args[]) {
try
{
String s;
Integer I;
String file_name="C:/Users/User/test.txt";
RandomAccessFile input_file = new RandomAccessFile(file_name,"r");
FileInputStream in = new FileInputStream(file_name);
FileChannel ch = in.getChannel();
FileChannel inChannel = input_file.getChannel();
long file_size = inChannel.size();
ByteBuffer buffer = ByteBuffer.allocate((int) file_size);
Charset cs = Charset.forName("ASCII");
ArrayList<Character> character = new ArrayList<Character>();
// inChannel.read(buffer);
int rd;
while ( (rd = ch.read( buffer )) != -1 )
{
buffer.rewind();
CharBuffer chbuf = cs.decode(buffer);
System.out.println("ASCII values read: ");
System.out.println();
for ( int i = 0; i < chbuf.length(); i++ )
{
int j = chbuf.get();
character.add((char)j);
System.out.println("j("+i+"): "+j+" ");
}
System.out.println();
System.out.println("Chars they represent: ");
System.out.println();
for ( int i = 0; i < character.size(); i++ )
{
System.out.println("character("+i+"): "+character.get(i)+" ");
System.out.println();
s = character.get(i).toString();
if(!(s.equals(" ")||s.equals("\r")))
{
System.out.println("s("+i+"): "+s+" ");
System.out.println();
System.out.println("s("+i+").length(): "+s.length()+" ");
System.out.println();
I = new Integer(s);
System.out.println("I("+i+"): "+I+" ");
System.out.println();
}
}
buffer.clear();
inChannel.close();
input_file.close();
}
}
catch (IOException exc){}
}
}

Categories

Resources