Deflate and Inflate Java String in Memory Zip Exception Error - java
I am writing code to deflate and inflate a string in base 64 encode but I am getting the following error:
Exception in thread "main" java.util.zip.ZipException: incorrect header check
at java.util.zip.InflaterOutputStream.write(InflaterOutputStream.java:284)
at java.io.FilterOutputStream.write(FilterOutputStream.java:108)
at serializer.test.SerializerTest.main(SerializerTest.java:43)
My code is:
XsltObject Xslt = new XsltObject();
Xslt.setXslt(readFile("C:\\codebase\\OverallSystem\\EBE_TEMPERED_XMLS\\bank_timestamp-0.xml"));
System.out.println("Original String Length: "+ Xslt.getXslt().length());
//JSONObject jsonObj = new JSONObject( Xslt );
// System.out.println( jsonObj );
//System.out.println( "Json Length:" + jsonObj);
DeflaterOutputStream outputStream;
for ( int i = 1; i <= 9; ++i ) {
ByteArrayOutputStream arrayOutputStream = new ByteArrayOutputStream();
outputStream = new DeflaterOutputStream(arrayOutputStream, new Deflater( i, true ));
outputStream.write(Xslt.getXslt().getBytes());
outputStream.close();
//System.out.println("Deflate (lvl=" + i + ");" + arrayOutputStream.toString("ISO-8859-1"));
System.out.println("Deflate (lvl=" + i + ");" + arrayOutputStream.toString("ISO-8859-1").length());
String temp = DatatypeConverter.printBase64Binary(arrayOutputStream.toString("UTF-8").getBytes());
System.out.println(temp);
System.out.println("Base 64 len: " + temp.length());
byte[] data =DatatypeConverter.parseBase64Binary(temp);
ByteArrayOutputStream inflateArrayOutputStream = new ByteArrayOutputStream();
InflaterOutputStream iis = new InflaterOutputStream(inflateArrayOutputStream, new Inflater());
iis.write(data);
iis.close();
System.out.println("Inflate (lvl=" + i + ");" + inflateArrayOutputStream.toString("ISO-8859-1"));
System.out.println("Inflate (lvl=" + i + ");" + inflateArrayOutputStream.toString("ISO-8859-1").length());
What am I doing wrong?
This fixed all my issues,and is all JDK usage:
package serializer.test;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.Arrays;
import java.util.zip.*;
import javax.xml.bind.DatatypeConverter;
public class DeflationApp
{
private String compressBase64(String stringToCompress, int level)
throws UnsupportedEncodingException
{
byte[] compressedData = new byte[1024];
byte[] stringAsBytes = stringToCompress.getBytes("UTF-8");
Deflater compressor = new Deflater(level, false);
compressor.setInput(stringAsBytes);
compressor.finish();
int compressedDataLength = compressor.deflate(compressedData);
byte[] bytes = Arrays.copyOf(compressedData, compressedDataLength);
return DatatypeConverter.printBase64Binary(bytes);
}
private String decompressToStringBase64(String base64String)
throws UnsupportedEncodingException, DataFormatException
{
byte[] compressedData = DatatypeConverter
.parseBase64Binary(base64String);
Inflater deCompressor = new Inflater();
deCompressor.setInput(compressedData, 0, compressedData.length);
byte[] output = new byte[100000];
int decompressedDataLength = deCompressor.inflate(output);
deCompressor.end();
return new String(output, 0, decompressedDataLength, "UTF-8");
}
public static void main(String[] args) throws DataFormatException,
IOException
{
DeflationApp m = new DeflationApp();
String strToBeCompressed = readFile(
"C:\\codebase\\OverallSystem\\MappingMapToEBECommon.xslt")
.trim();
for (int i = 1; i <= 9; ++i)
{
String compressedData = m.compressBase64(strToBeCompressed, i);
String deCompressedString = m.decompressToStringBase64(compressedData);
System.out.println("Base 64:");
System.out.println("Original Length with level("+i+"): " + strToBeCompressed.length());
System.out.println("Compressed with level("+i+"): " + compressedData.toString());
System.out.println("Compressed with level("+i+") Length: " + compressedData.toString().length());
System.out.println("Decompressed with level("+i+"): " +
+ deCompressedString.length());
System.out.println("Decompressed with level("+i+"): " + deCompressedString);
}
for (int i = 1; i <= 9; ++i)
{
byte[] compressedData = m.compress(strToBeCompressed, i);
String deCompressedString = m.decompressToString(compressedData);
System.out.println("Without Base 64:");
System.out.println("Original Length with level("+i+"): " + strToBeCompressed.length());
System.out.println("Compressed with level("+i+"): " + new String(compressedData));
System.out.println("Compressed with level("+i+") Length: " + new String(compressedData).length());
System.out.println("Decompressed with level("+i+"): " +
+ deCompressedString.length());
System.out.println("Decompressed with level("+i+"): " + deCompressedString);
}
}
private byte[] compress(String stringToCompress, int level) throws UnsupportedEncodingException
{
byte[] compressedData = new byte[1024];
byte[] stringAsBytes = stringToCompress.getBytes("UTF-8");
Deflater compressor = new Deflater(level, false);
compressor.setInput(stringAsBytes);
compressor.finish();
int compressedDataLength = compressor.deflate(compressedData);
return Arrays.copyOf(compressedData, compressedDataLength);
}
private String decompressToString(byte[] compressedData) throws UnsupportedEncodingException, DataFormatException
{
Inflater deCompressor = new Inflater();
deCompressor.setInput(compressedData, 0, compressedData.length);
byte[] output = new byte[100000];
int decompressedDataLength = deCompressor.inflate(output);
deCompressor.end();
return new String(output, 0, decompressedDataLength, "UTF-8");
}
public static String readFile(String file) throws IOException
{
BufferedReader reader = new BufferedReader(new FileReader(file));
String line = null;
StringBuilder stringBuilder = new StringBuilder();
String ls = System.getProperty("line.separator");
try
{
while ((line = reader.readLine()) != null)
{
stringBuilder.append(line);
stringBuilder.append(ls);
}
return stringBuilder.toString();
}
finally
{
reader.close();
}
}
}
I too had memory problems with DeflaterOutputStream - it works if you let it use the default constructor. This works fine :
for (Entry<String, String> entry : valueMap.entrySet()) {
String key = entry.getKey();
String value = entry.getValue();
ByteArrayOutputStream baos = new ByteArrayOutputStream();
DeflaterOutputStream dos = new DeflaterOutputStream(baos);
try {
dos.write(value.getBytes());
dos.flush();
dos.close();
}
catch (IOException e) {
throw new RuntimeException(e);
}
byte[] zipData = baos.toByteArray();
zipValueMap.put(key, zipData);
}
But change that to :
ByteArrayOutputStream baos = new ByteArrayOutputStream();
Deflater deflater = new Deflater(Deflater.BEST_SPEED);
DeflaterOutputStream dos = new DeflaterOutputStream(baos, deflater);
And that gives me memory leak in JVM C code that takes up 80g and crashes my mint system. So why would the default constructor work and yet when I pass my own deflator in it fails so badly :
Decoding DeflaterOutputStream (java 1.8_40) I find some special code in the close method :
public void close() throws IOException {
if (!closed) {
finish();
if (usesDefaultDeflater)
def.end();
out.close();
closed = true;
}
}
I guess they put that in to workaround problem with deflater.
The best solution was to call it explicitly in the loop :
try {
dos.write(value.getBytes());
dos.flush();
dos.close();
deflater.end();
}
And no more memory leak. It's a bad memory leak as well, since it is from the C side, so it never threw a JVM error, it just chewed up all the 40g of ram I had, and then started on the swap space. I had to ssh into the box and kill it.
Related
Compressing Base64 String is not of less size
I'm trying to compress a Base64 String using the java.util.zip.GZIPInputStream and Deflater clases. My problem is that after compression the size is not less from both cases. For the first case with the GZIPInputStream the size is bigger, and in the second case with the Deflater class the size is almost the same. The output of my code is: Original String Size: 8799 CompressedGZip String Size: 8828 UncompressedGZip String Size: 8799 Original_String_Length=8799 Compressed_String_Length Deflater=8812, Compression_Ratio=-0.147% Decompressed_String_Length Deflater=8799 == Original_String_Length (8799) Original_String == Decompressed_String=True As you can see in both cases the compressed string is not less. I need to compress the input base64 String because in some cases is too long. Is there any way to achieve this? This is my code: private static String compressFileGZip(String data) { try { // Create an output stream, and a gzip stream to wrap over. ByteArrayOutputStream bos = new ByteArrayOutputStream(data.length()); GZIPOutputStream gzip = new GZIPOutputStream(bos); // Compress the input string gzip.write(data.getBytes()); gzip.close(); byte[] compressed = bos.toByteArray(); bos.close(); // Convert to base64 compressed = Base64.getEncoder().encode(compressed); // return the newly created string return new String(compressed); } catch(IOException e) { return null; } } private static String decompressFileGZip(String compressedText) throws IOException { ByteArrayOutputStream stream = new ByteArrayOutputStream(); // get the bytes for the compressed string byte[] compressed = compressedText.getBytes("UTF8"); // convert the bytes from base64 to normal string Base64.Decoder d = Base64.getDecoder(); compressed = d.decode(compressed); // decode. final int BUFFER_SIZE = 32; ByteArrayInputStream is = new ByteArrayInputStream(compressed); GZIPInputStream gis = new GZIPInputStream(is, BUFFER_SIZE); StringBuilder string = new StringBuilder(); byte[] data = new byte[BUFFER_SIZE]; int bytesRead; while ((bytesRead = gis.read(data)) != -1) { string.append(new String(data, 0, bytesRead)); } gis.close(); is.close(); return string.toString(); } public static void main(String args[]) { String input = "data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD/2wCEAAkGBxMTEhMTExIVFhUXFxUXFxUYFRUVFRUSFRIXFhUVFRUYHSggGBolGxUVITEhJSkrLi4uFx8zODMtNygtLisBCgoKDg0OGxAQGi0lHyUvLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0rNy0tLS0tLS0rK//AABEIAKgBLAMBIgACEQEDEQH/xAAcAAACAgMBAQAAAAAAAAAAAAAEBQIDAAEGBwj/xAA9EAABAwMCAwYDBQgCAQUAAAABAAIRAwQhEjEFQVEGImFxgZETMrEjUpKhwQcUQmJy0eHwM7LxFSRTc4L/xAAZAQADAQEBAAAAAAAAAAAAAAABAgMEAAX/xAAkEQACAgICAQUBAQEAAAAAAAAAAQIRAyESMUETIjJCUQSRFP/aAAwDAQACEQMRAD8A8YMLQWHdSLOhXHGmhFBo6D2Q9LdF0cmFxxdeWheGPY2WkQSAMHmiadoIGB7DdD8Ov3UHxuwnI5J7emmAXg/NsrRSITkzneJ0QMgCPABHdkqLXVstBgE5AjCjdadDk67C8OD6T6nMEj8glcfcPF+0Y9q+G0zTpXDWN0nuvhoADuRIC5b92HzNAjyC6+4uwLevTOxaYHiOfmuOsHjS4ErppWCDbQytNBiGt9gnVG1YR/xs/C3+yR8JogCV0Fnlqyy0x6A+M2rPhmGN5/wj+yR8G06iC1vq0FdJxIzTcFy9iYf6oxbsK6Oi+Az7jPwN/soGgz7jPwhSa5bJWgQpdRZ9xn4QtfBZ9xn4QrSFqEAlX7u37jPwhbFuz7jfwhWtKi5y44gbVn3G/hCw2zPuN/CFY1yk5cEHNBv3GfhCvpNY+m6n8Nk9dI+qgHNmC9o89vWNkNQv203yTg8x+qMWrBJOgqxsaNLUHNaT4gFILzQXGGt9AEZXug97nAyOSW1MHZPJokk3sstKDTUYIGSOQ6r6L7O8LofBZNvROBk0mE7DnC+bqdWHB3Qyvf8AsxdOrWbXMMHSD6wMLg2zXbmyoCkGNoUWlx5U2Ax6BcozhlFtMAUqf4G/2Vl9xCs+s5tfZm3ioniDIgE7KkVSJzkA/uFOD9mz8Df7JDxK2Y0f8bQ7kdIghdJ8QekZXL8evRmBscJ1ROxbcUmkiGt8e6FMWjXCNLZ8glpuySmXA7kGoAeqTnHorUkJbugAY0gR+ay0twTmI8ldxWkW1nt/mJ91RVr6WwNz9Fnapmq9F1Sk3oPZTZSEbD2VFs8kZRzBhIxWc2VtYsRGLaO6JpOhyHpq1pyEUcMatAHkpMp4jkrKQlqicKyISBLsEArr/wBm9b7N7PEnzwFyXEflHmnfYm50Qf5v0Q+wfqVcau8vZEGT7SkpwE07R3EXNQRzx5HKWVSDhSlJ2UjFKI34Ke4cp7Z1owuT4PX0mOSe06uVnmtjsOvvlPkuTYYqLpalWRC5y5ZFQLosVD+mcBXMVNJ3dCnTctKehKLHBVOUyVqFwTTVF6mUFcXg1BjZJ5xyHieSFhJvug3cgYnPPwHUoG8v67ubRjI+WQOZM4RVxYa92HGdXLbLesICpbVMuJjkB1CZY5NA9RJ9i+pcvJ+aB4bD+6qqVDyeSPGRPoUZVtTvEef9kJUt3eaDg0FTTNsrA/M31GD/AGKup1SM/M3r0PRw5eaBcOhVltWIJxPXxbzBHNL0MM2UWvkB0H7vn0K9n/ZJLLbQ7+EuHoF4caUd5hIIy3+k7+YHRd92E7WCnRqMPzhpcAObjEFvgU8XsSSHPa+/1XD2UwJHzHkASuOouf8AHIgxIE8jKM4JxP41xcGr/GYg9Gjb6pvQ4eILh1wqK29MlOo9oMrWuimCcyP0XB8YZM+a7riV034BkxAx7Lz66ug4QD6KrIQ7ArWhseY5I+yZ9oOUZKrpd3KoFUgkyo8Etl+VgvFLkmrUcObilpMq6rsVSpN7NHSQTaPgprPRJKZyum4fQY5gP6pWA5QKYCgrWjCITYV7W7IdqNY3CKAMKJgT1VrXAqlg7ixmVaJGQLxmBEcwmVkNDWAdJ90n4r8w8kx4eZpNI32QXyG+pLtDTmqCebQfySt1PoU2rPmo2RIaBI8FVxD4bnzSBAPLxUpq5Fo/EXW5LXZTugJKVuoF3ojLVphJKDFbQ1YMwEq4tTh4TIOLRKWcSqa4KikchhZVJYESCgeG/LH+7I4LVHoV9lzSsBUHqupW0iVxzLzSfUfTo0xNSq4Mb0BPM+AGfRdnb9mKNABo75B77zBLnc/SUk/Z60urVblwgUaehn/2VTv6Bv5rrHuLWEESCeWSCTunxrySyS1Quv7NpkCPJIOIUWiJ2aMeJ6p29tV1WBhoH/xiZSjiloQ3vE7kDlK0JmaSOcr98yAqHsY0HAkoqvRdMAaR+ao/dYyQSmYYsT3NoXEkINx08gHCIPSP7rpH05EGGhJeJ2+kzuFDJDVmiE/BQKhIDTgiS1w6uzB6hW2NVxeHM7tVpwNg482odxgMMcvoUTWsnOqF4Ia3DtR3GAcBQouFXLiz7RoI1faAHcE/M0+RkLs+DcT+JSYRz/0rj/3jU0N+YAky4ZJdvgJlwa+az7F2NWWxAjwXYppTo7+nG5Y1L8GHaK0pOGKhBnIBwuU0sacfmurr0yBDKOvq4zuuevbd8kuY0eSvLlZmg48Shz522VVZ8ArHGELXqckrddjxWweqcKpW1jgKoKJol2SaUytqrtOEtCvp1SAuFBiFYdlFrZVgC4JGm1MG7QqbWnzRLGp0hGwljoaAraDxKEefosougqiJsq423vNjomdjRgNCWcUqSWnwTZ1bTSa/+VBfJh+qFnEHkVCRMbIqg4FmRnkUC+oSIPiURQuJYW4kbLO3bs0rWjKjyfNUtrwVlW4a4TsQhDUlVTIyWzpm1NVMH/YSy/qDYK/gtSWlp9ERdcPMSRHSRyUJKmNEr4U7BRo3Srh9QB0JrSOVSPQH2Xoctl7QVc94S/ilxoIcNwEyAz0zhFzRtqB1EAvId6NEDHuqXcZpVD9lOJM8iBuuM4jxf4z9FNshmluuDHQTybnmehR/ZGoKhLQBONQ5ZkSPNaE4tmSUZJHRjiukudGAJ88QlPEuP0WNdqYS4Dedic4wnF9bMY12o4gAY6ZXnvHTJMeZ80dAX4Qr9oJPdpiepURxGodwCOiRaoBdpLpwAOTuR2Tc8EqNb8TWAQ3VpOCR4Dmpf9FOmaPQ/B3w6g2q1xiCBsULx+3lgxCt7NXY1jVAkQehlW9rKeJGytytE6pnGaZafDV/vujy1xZvjHePIR9UvnB8nf7+aa3TWtp0nPeIdnQNz4x0WTyakQsWFx0s9yenildzVcTqnyPTomttdt+K2BDQ9ojn0P1Sq+pOY5zTycR5ZS0M5Nof8K7SVXN+HGQN+qC4jc1TlxS7hT4qtXT39qHs8QrxuS7Mk0oy6OYNQlZRp6nADmtVGQYRFhh3oVNmiC2gS7ZGCqAiLwzt4qhIh5dmBWNKrCm1EBuh1U2NJ9VClsAjrZoRSsDdEmNiFcQsqM2WPcqVSJWacVBV1H5WMcuTA0Q4hs1H3xinSb4ZQHEDhiOqan6BEmAISvyPFdA5+WTvyVQBGUfxS0NNrQ75uiE0YUF0aZbZU5oIwhmhW1GwVCluqIjI6HsVS1XDARIJz7L0jtlY0xbfQrguwj/t2j+b9F6F21bqoAD8krWwJnj1MkOTSkSh2UftAPFOjbQFyl4CwAyUv4q3qnNOiZ2SjiTCSQqJNi2h9w6wo1KGmvVNIVmtc7cSA0aSBEGCD7ro+zYthVY227zadIh9Q4LjqbpB680XwezoXVpQ1sDtLGYyIGmIPqEc1jKLmUmNa3EwMQ0EDb1TRwrlyJyy6qgDtdcaQIJk+XI9F59fuLpjnhdN2tqHXuOu+wk7rkbmvpI81dqkSXZPh1CozLWNceh/QIqv8eo7VVx4YAgcgByWUr1oyh7niLn4U/Sh2U9ST0MOGU9VRo8fJF9rHHTHohuz9cMeHOPUepWu1FaQPEqiFo5Se76H/sq7kd71HoOim7Yen/ZRuPn9ffKyGktoCQ885BHmDKL7SNmrqbMOax2fvFolCWZztuSI9EdxdpdToVAQe4Wnza7H5IBArG2748MldY0930XP8G69U9Gy041SMmZ3I566t++VW+nDgn1O01GUurUNTn8oCE1SbKYfdNJCOq0jdQUnMMxk+QJ+i0WEbgjzBCgXZorAVhWlxwbb0YE9UQwLKB7oVtNoKqkSkyZ2CFe/KKuRAAnJQIpkFCb3R0Y+SLzlbpqsnKvt90qOaNcQbhi6Lsxb941amGsGJ6rmLypqcI2CaX3FiababcCBPikyfiLY9dlfFr01qrncuSm0yzKCpZVrKkBBKtBuyiuVUwZRQZqKpc2CnJtjvsrV012nxXr9Gy+Kxpcf95LxXgTvt2DqQPdeziq6lRwZx+iEgRPKeP0PhXLhtBwjGVyQ1Adparn1tRCK4Vn0SqNseT0PbVzA0zuktzTb3zH/AICFv7ioKsDZGiu34RB3W60tGR32Fdm71zKDdJgtJx/KXEj9VDh17VuLyo4PILW6QfEnmOeQkVPiJpfCB/45h/8AQ4wfbf0Ti2uhZC5qHJLmgEeP8XtlRx5G9PwX/oxxTTh5S/0Ucdp3QrObVJceRGx842QDLY85R3E+JVXnUYaOjn97IwSPJLWVXcjnPXklclYFF0Y4lpgqxhVb6pcBLYKutgnTFYZbNiCSrOK1y9zQdgq2dShn1NTp5CU4gsfsPT6lQr/OR4/UKbo7g8R/v5qus77Rx8f8LM+zSgnho74HV36I+5pONBob/C4zHUk/4QFgDrBiAefkMiU14bdFkxEkOInILoDh+qUYzgdrVz9m7zOPqnz6UNgkA+aQW/FqtU99xAnlhMalmHAEEq8XJrRnmop7LX3zWCASeqTXN23vaRB890/tqIY2AAXHmlFfh7dYA+ae85TzPiqbL/zRcncVVBtv2ooUrduim34sQe6MR1XKX9++q7U8+gwFZxegGv7ux5eSBU412NJVo2FpxWwFhCIodYVZEFG02JRbPhwKf24DshWg7JS0BcQfkDoEK1ytuzLyVUwYU5bZSPRovJWE4hEUaambaZXcReSQvKsqbAqFVsGOiIfkYS0PZlu7ZTrYVVDCsq5RSOb0E22yruaSsoDCuOQiuyTA7WoWuDhuCD7L0LhnGqlWkAQMYXnr+fqu17C0y7cSNMgJpBiLO1LwDGCUP2Wd33Fx5YCL7d0wKjYwYJI9cfqkFhULSXBGPYJdD2vWa6o7wSh5lx6T+qs4TSNR7iTzR9S0ACeuWyV8dCbi7O6n3AL0VWUXOALqZ0PHI6R9mSD1b9ClXFx9nHj/AL9Vb2RuAz94JEhoa49YbMws0p+k3I2Ri8sVEO4vxyKkGg06TjuA45ZSOpxBz3ZEeEQu1v7ygGNIAOsBwdjLSMLkr25YTjHstTj5MsZsFqjUt0zC2XhUVLhAJdVqmIV/D4Bg8xHkgWOO6LonIK6wUKnjLB4x7FU1D3neZ+pV9c5H9Tv+xVES4+ZUWXXQxsRhvTWc/wD4VtiYLRg7A+5aRKDtzhw5BrjtzIA3Vk6gC2SXbD+f/wAhKMW8PAbVdTP3iAfEGAugtvibBjvZZbU2ik2uKILjEmCTqGCR6o11aqXNHIiekHopPM4t0aYfzRnFNltpTLJ1ZeRhu8IfibW0qZMDUcTzRX7/AE2NOwMbk81xXEuKvqOIB7sx5qS5TlbLSccUeKArp+oz5/VUQm3Ga7XlukAANAEDpzSyFoTMbVs0wLCsYMqw05XNhjG0QphFWVUsMj2QoEImgRBHNOmyJa8EyVU0q2zfIKk6lhN2C9hFoj+HEaih7CkCAra9EtEhWSISdsXcUpfaxsDzVVFuYWqxLniVNghQ8l18TH0YJWmnCJHenqhMynqhL8BNB/JEsGEviMlMKRwpz1sPgDuWxK9X7F8H/wDbtcdywHHiJC8ru8r1nsndv/dWgcmgecBO9o6Ojk+3fDSS17ZOPXfK5G2JgiF6VxvNN2rlPouA+OM4RUfIrl4CeFQzfEpk57TzCRurSqnvIzqTcuK0LwUnst4w+YGI7/n0/RQ7NHvV2/eo1PcAQq6hmN/lPvnZV9nqumo88tD59WuWTLuLNmKotIVMuXaQ2cch08kZw2h8QOyQWxjwPNLyEw4G4iq0fe7v6hWj2RktOiFam4GCVjE54ja7lKIhPJUycXZYH8lbQJQ8q9jXAtkRJA6nJjYbrrSDxbBD9HfVxVNL5z5/qj7+z+HgO1DUMlhYcEjYoF7NLj05Kfe0Upp0yyjOmp/SfzKlZ1Yb4tc1w8gcqLCQ2p5fRwP6Kuk6CDyP05hcEf0uLPpUnBo7oft4Ek/qtf8ArlQmQBt5oOzhzarCJJZLTAnUzO89AqJApiAQ6cnkpOMW+i0ZyS7Kblzqhc9x9P8ACGCtrNjnvutBqd1FCJOTGF1THwaZ5oCE1uWfYU/KUtDVCDNLiQYzMq7UAtnAUG0i7KZ7Co8eiy6ti3y5FDvGJTx7cQRgpTe0YOkZWzJCtnnQleiPDZL4HNPG0AcJdwizIlxEEJxbQPFHHHRPI9mralpwrrunLVN7VVUJmDsqkrEdGmdeeRRDxkqVIwXCPmdAKwtyVmXk1S0kRp4yh7od6RzRQGFlWlIBhVatEk9i+pKNs6khU1wrbPZRmtDxK7qplerdgrgOtg3ovIaru8V2/wCzniuguYduS6PQ3Qy7aXehrhzK87aSXea6ztvVNSsGjbn7lc5WoaSOqacqFgk7LG2jiQBzR1bs/U0TKN4WydJTq/f9m4jk0/RUUbRKeRxdI4SqCHEbd0n00pe2tAfG+lon1EpvxN32jyPuaR5Bun9Ehn5h4fSFBo1JlblbaVgx7XEEgEEgGCQDkA8jCqOy0VwDqv3h2sh7TodlpBBIYcjOyVXj2lxLW6fCZTPhDtdu0ndpLT6EkfkQlt5a5lpMJpxk6aZGLSbTBQZyrKQhwJM5HjzU6VBQuDEGNuW49R0R46H5bCL6oCwbDvbRGxPiVu4tw5hPOW/mYQt1Xc8AuABJkQA0QT0CJqP7pA5rsVVQ2R7sEs3RUh3PBHnhU0u64sd1g+HQoy9LXHG6les+NoeB33Q08gXDHvEe6Eo0FOyFsTTeHD+E5/pOFlSgWvIfgbjxactR1S0+E2g81mOquJ+zE/KMiXeO0KnjBIawbsglj/D7h8ip6CxZUAnBwrmsCHp7o5jRGyTI6NOCNh1z/wALfIeiWsR9Z/2X5IBoUomnIto0RJ8Fp1XotPM45Ks1QMBORcqOgpv6rGUYqsf0IVAejLZ4OF6bV6PLuth/F6Q3GCUDRqBsSt3NRwOdkHckFuCslyxyNLUMq0N6rpQd3VAHildG7cMFTqVQYJOOir6qM6wOyTany/1SpVHZlVNqA5ClSypJlJ+C0KTT3COa1TCsLQrraIPsWV1ZbOwqrgqdoptXoojK1D80ZaXXwSCFQ9+VG5pE97kuSo7sc2vEWveXOQnGHhzpCWMKtIJCSTbHSSOhsLgCm1FXV19i8+Ee+ErtKzfhgc0TXyxreRIB8t1Vv2kZU30c9eGHOM/wj3Pe+qUzn3Te+Or4h6uAHp/j6pSfm9VE0Loi7ZaAwpOP1W+g9VwR9wJuiWkyHjV5Ef4UqrclDcNpv+LTqEdzIB5HEGAmt7Qggq8VcTNP5A7KGEq4hTgev6LpbajLSY5JDxGiSWDqTjnjcrpLR0H7ga3AEyeQ5NP/AGK1VfHqiDDQ/JmBsAefOf0QtCkSZKkr5Oi7dxtk7W2Lk0oWr3lrKVIv72agDtJqA4EnGBiQibCiRpDWai4xJHdZTB7zjOCeULuzxJhq29NghoY9wEbuGiCI6CQVHNlcWopD44X7mcjwSqGOfSq02u72Q8AierTyPiEdxrsh8dpfau8fgu2nnod6Kvj7B+9vxGxjlkckRZ1XDmQPMhbMcVPGmzNklxm0jz2vavpP0VGOY4fwuBB9OoV7HeK9Xe2jcs+HcaXDlrAcRyw/5m+hXO8X/Zy9oL7Z+tvJpIPoH8vVRy4fw1YM6XZyLnfZlAkyjLijUph1N9NzXDcOEe3XzCXHxWdRrTNU8qdNEnv6KsLYCkHJ+iL2N9Q6j3CnTeAfmHuFixbFIxUMWVw5sEt9wlFeGncGfELFiTJtD4/bLRupSa5uCPcIN1ODEj3WLFmXZpk9F9qBG4x4qyg8dR7hbWJ0QnsKpub94e4VtRzY3HuFixVi9EZITXREnI91pjxG6xYlY6ROlEjPPqnvEhTFEQ5s+Y6LFiMWBoQ0njaQnIYwUtx7hYsXQ8hl4F1GuAIkb9UdVuxpBkYB5jciFixK3o6vcKnvBpuyN+qAdv7rSxKVLqVHVqgjutLiSQPQdVlhSa5wL/kG45u8AtrFwDo7KuKlVpOlrGiGiRATa8tWluHtOfvD+6xYtMPiZsiuQdbUGNaQXs2+83c+q5vjrKNGrrbUD3lpaGYLWbd4kfTxWli6T0GC2J6Fk6qZkQeZIEpsOHCm3U4ggcg4aj4DxWLEkXqwvbC+E2tS+e0mrToW9Jw0jUA7G0NnPifyTP4YpVqYD2wyu4TrB7rg2TM7GVixY0qmbG7gXcTDDcVHB7DhudQ+6gLi4HJ7fxBYsW7C6gkY8sE5tg4vIPzj3Cc8H7UmiR3wWncSIWLE7dixR0/Erayv6OX0w4jB1Ma9h8J+i8a43wl9tVNJ7mujLXtIIe3kR0ONltYs+RGmDYvc5aW1ikUP/9k="; String compressedGZip = compressFileGZip(input); String compressedDeflater = null; String uncompressedGZip = null; String decompressed = null; try { compressedDeflater = compress(input); uncompressedGZip = decompressFileGZip(compressedGZip); decompressed = decompress(decodeBase64(compressedDeflater)); } catch (IOException e) { e.printStackTrace(); } catch (Exception e) { e.printStackTrace(); } System.out.println("Original String Size: " + input.length()); System.out.println("CompressedGZip String Size: " + compressedGZip.length()); System.out.println("UncompressedGZip String Size: " + uncompressedGZip.length()); Integer savedLength = input.length() - compressedDeflater.length(); Double saveRatio = (new Double(savedLength) * 100) / input.length(); String ratioString = saveRatio.toString() + "00000000"; ratioString = ratioString.substring(0, ratioString.indexOf(".") + 4); println("Original_String_Length=" + input.length()); println("Compressed_String_Length Deflater=" + compressedDeflater.length() + ", Compression_Ratio=" + ratioString + "%"); println("Decompressed_String_Length Deflater=" + decompressed.length() + " == Original_String_Length (" + input.length() + ")"); println("Original_String == Decompressed_String=" + (input.equals(decompressed) ? "True" : "False")); // end } public static String compress(String str) throws Exception { return compress(str.getBytes("UTF-8")); } public static String compress(byte[] bytes) throws Exception { Deflater deflater = new Deflater(); deflater.setInput(bytes); deflater.finish(); //deflater.deflate(bytes, 2, bytes.length); ByteArrayOutputStream bos = new ByteArrayOutputStream(bytes.length); byte[] buffer = new byte[1024]; while(!deflater.finished()) { int count = deflater.deflate(buffer); bos.write(buffer, 0, count); } bos.close(); byte[] output = bos.toByteArray(); return encodeBase64(output); } public static String decompress(byte[] bytes) throws Exception { Inflater inflater = new Inflater(); inflater.setInput(bytes); ByteArrayOutputStream bos = new ByteArrayOutputStream(bytes.length); byte[] buffer = new byte[1024]; while (!inflater.finished()) { int count = inflater.inflate(buffer); bos.write(buffer, 0, count); } bos.close(); byte[] output = bos.toByteArray(); return new String(output); } public static String encodeBase64(byte[] bytes) throws Exception { BASE64Encoder base64Encoder = new BASE64Encoder(); return base64Encoder.encodeBuffer(bytes).replace("\r\n", "").replace("\n", ""); } public static byte[] decodeBase64(String str) throws Exception { BASE64Decoder base64Decoder = new BASE64Decoder(); return base64Decoder.decodeBuffer(str); } public static void println(Object o) { System.out.println("" + o); }
Strange behavior in GZIPOutputStream/GZIPInputStream
I have reduced the strange issue in this code to the minimum. This program writes 128,000 times the bytes for (int)90000 into a file and then tries to read it back in. set zipped=false and everything works like a charm set zipped=true and everything works like a charm until the 496th chunk of 1024 bytes. At that point a single byte is lost and everything is shifted to the left by one byte (see output) ... 0 1 95 -112- which is byte code for int 90,000 Counters: 496 126937 1 95 -112 0- which is byte code for int 23,040,000 ... this is the code i came up with. I just can't figure out why it suddenly breaks in the middle of doing the same thing over and over. Any help/insights/explainers much appreciated. public class TestApp7 { static final boolean zipped = true; static File theFile = null; private static void writeZipData() throws Exception { FileOutputStream fos = new FileOutputStream(theFile); BufferedOutputStream bos = null; if (zipped) { GZIPOutputStream gzout = new GZIPOutputStream(fos); bos = new BufferedOutputStream(gzout); } else bos = new BufferedOutputStream(fos); byte[] bs9 = RHUtilities.toByteArray((int)90000); for (int i=0; i<128000; i++) bos.write(bs9); bos.flush(); bos.close(); } private static void readZipData() throws Exception { byte[] buf = new byte[1024]; int chunkCounter = 0; int intCounter = 0; FileInputStream fin = new FileInputStream(theFile); int rdLen = 0; if (zipped) { GZIPInputStream gin = new GZIPInputStream(fin); while ((rdLen = gin.read(buf)) != -1) { System.out.println("Counters: " + chunkCounter + " " + intCounter); for (int i=0; i<rdLen/4; i++) { byte[] bs = Arrays.copyOfRange(buf,(i*4),((i+1)*4)); intCounter++; System.out.print(bs[0] + " " + bs[1] + " " + bs[2] + " " + bs[3]); } chunkCounter++; } gin.close(); } else { while ((rdLen = fin.read(buf)) != -1) { System.out.println("Counters: " + chunkCounter + " " + intCounter); for (int i=0; i<rdLen/4; i++) { byte[] bs = Arrays.copyOfRange(buf,(i*4),((i+1)*4)); intCounter++; System.out.print(bs[0] + " " + bs[1] + " " + bs[2] + " " + bs[3]); } chunkCounter++; } } fin.close(); } public static void main(String args[]) { try { if (zipped) theFile = new File("Test.gz"); else theFile = new File("Test.dat"); writeZipData(); readZipData(); } catch (Throwable e) { e.printStackTrace(); } } }
So based on Jon's wonderful comments ... you cannot rely on .read(buffer) filling the buffer even when there are more bytes in the stream - it stops at the boundary where the BufferedOutputStream-wrapped GZIPOutputStream saved a chunk of data. just add another read to go beyond the boundary and complete the chunk while ((rdLen = gin.read(buf)) != -1) { if (rdLen<chunksize) { byte[] missBytes = new byte[chunksize-rdLen]; int rdLine_miss = 0; if ((rdLine_miss = gin.read(missBytes)) > 0) System.arraycopy(missBytes,0,buf,rdLen,rdLine_miss); rdLen += rdLine_miss; } for (int i=0; i<rdLen/4; i++) { byte[] bs = Arrays.copyOfRange(buf,(i*4),((i+1)*4)); intCounter++; System.out.println(bs[0] + " " + bs[1] + " " + bs[2] + " " + bs[3] + " "); } chunkCounter++; }
java OutOfMemoryError about FileOutputStream?
Thanks for everyone ^_^,the problem is solved:there is a single line is too big(over 400M...I download a damaged file while I didn't realize), so throw a OutOfMemoryError I want to split a file by using java,but it always throw OutOfMemoryError: Java heap space,I searched on the whole Internet,but it looks like no help :( ps. the file's size is 600M,and it have over 30,000,000 lines,every line is no longer than 100 chars. (maybe you can generate a "level file" like this:{ id:0000000001,level:1 id:0000000002,level:2 ....(over 30 millions) }) pss. set the Jvm memory size larger is not work,:( psss. I changed to another PC, problem remains/(ćoć)/~~ no matter how large the -Xms or -Xmx I set,the outputFile's size is always same,(and the Runtime.getRuntime().totalMemory() is truely changed) here's the stack trace: Heap Size = 2058027008 Exception in thread "main" java.lang.OutOfMemoryError: Java heap space at java.util.Arrays.copyOf(Arrays.java:2882) at java.lang.AbstractStringBuilder.expandCapacity(AbstractStringBuilder.java:100) at java.lang.AbstractStringBuilder.append(AbstractStringBuilder.java:515) at java.lang.StringBuffer.append(StringBuffer.java:306) at java.io.BufferedReader.readLine(BufferedReader.java:345) at java.io.BufferedReader.readLine(BufferedReader.java:362) at com.xiaomi.vip.tools.ptupdate.updator.Spilt.main(Spilt.java:39) ... here's my code: package com.updator; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.FileReader; public class Spilt { public static void main(String[] args) throws Exception { long heapSize = Runtime.getRuntime().totalMemory(); // Print the jvm heap size. System.out.println("Heap Size = " + heapSize); String mainPath = "/home/work/bingo/"; File mainFilePath = new File(mainPath); FileInputStream inputStream = null; FileOutputStream outputStream = null; try { if (!mainFilePath.exists()) mainFilePath.mkdir(); String sourcePath = "/home/work/bingo/level.txt"; inputStream = new FileInputStream(sourcePath); BufferedReader bufferedReader = new BufferedReader(new FileReader( new File(sourcePath))); String savePath = mainPath + "tmp/"; Integer i = 0; File file = new File(savePath + "part" + String.format("%0" + 5 + "d", i) + ".txt"); if (!file.getParentFile().exists()) file.getParentFile().mkdir(); file.createNewFile(); outputStream = new FileOutputStream(file); int count = 0, total = 0; String line = null; while ((line = bufferedReader.readLine()) != null) { line += '\n'; outputStream.write(line.getBytes("UTF-8")); count++; total++; if (count > 4000000) { outputStream.flush(); outputStream.close(); System.gc(); count = 0; i++; file = new File(savePath + "part" + String.format("%0" + 5 + "d", i) + ".txt"); file.createNewFile(); outputStream = new FileOutputStream(file); } } outputStream.close(); file = new File(mainFilePath + "_SUCCESS"); file.createNewFile(); outputStream = new FileOutputStream(file); outputStream.write(i.toString().getBytes("UTF-8")); } finally { if (inputStream != null) inputStream.close(); if (outputStream != null) outputStream.close(); } } } I think maybe: when outputStream.close(),the memory did not release?
So you open the original file and create a BufferedReaderand a counter for the lines. char[] buffer = new char[5120]; BufferedReader reader = Files.newBufferedReader(Paths.get(sourcePath), StandardCharsets.UTF_8); int lineCount = 0; Now you read into your buffer, and write the characters as they come in. int read; BufferedWriter writer = Files.newBufferedWriter(Paths.get(fileName), StandardCharsets.UTF_8); while((read = reader.read(buffer, 0, 5120))>0){ int offset = 0; for(int i = 0; i<read; i++){ char c = buffer[i]; if(c=='\n'){ lineCount++; if(lineCount==maxLineCount){ //write the range from 0 to i to your old writer. writer.write(buffer, offset, i-offset); writer.close(); offset=i; lineCount=0; writer = Files.newBufferedWriter(Paths.get(newName), StandarCharset.UTF_8); } } writer.write(buffer, offset, read-offset); } writer.close(); } That should keep the memory usage lower and prevent you from reading too large of a line at once. You could go without BufferedWriters and control the memory even more, but I don't think that is necessary.
I've tested with large text file.(250Mb) it works well. You need to add try catch exception codes for file stream. public class MyTest { public static void main(String[] args) { String mainPath = "/home/work/bingo/"; File mainFilePath = new File(mainPath); FileInputStream inputStream = null; FileOutputStream outputStream = null; try { if (!mainFilePath.exists()) mainFilePath.mkdir(); String sourcePath = "/home/work/bingo/level.txt"; inputStream = new FileInputStream(sourcePath); Scanner scanner = new Scanner(inputStream, "UTF-8"); String savePath = mainPath + "tmp/"; Integer i = 0; File file = new File(savePath + "part" + String.format("%0" + 5 + "d", i) + ".txt"); if (!file.getParentFile().exists()) file.getParentFile().mkdir(); file.createNewFile(); outputStream = new FileOutputStream(file); int count = 0, total = 0; while (scanner.hasNextLine()) { String line = scanner.nextLine() + "\n"; outputStream.write(line.getBytes("UTF-8")); count++; total++; if (count > 4000000) { outputStream.flush(); outputStream.close(); count = 0; i++; file = new File(savePath + "part" + String.format("%0" + 5 + "d", i) + ".txt"); file.createNewFile(); outputStream = new FileOutputStream(file); } } outputStream.close(); file = new File(mainFilePath + "_SUCCESS"); file.createNewFile(); outputStream = new FileOutputStream(file); outputStream.write(i.toString().getBytes("UTF-8")); } catch (FileNotFoundException e) { System.out.println("ERROR: FileNotFoundException :: " + e.getStackTrace()); } catch (IOException e) { System.out.println("ERROR: IOException :: " + e.getStackTrace()); } finally { if (inputStream != null) try { inputStream.close(); if (outputStream != null) outputStream.close(); } catch (IOException e) { e.printStackTrace(); } } } } if the problem still occurs, change java heap memory size with following command on the shell prompt. ex) Xmx1g : 1Gb heap memory size, MyTest : class name java -Xmx1g MyTest
Is there any compression method in java to reduce the number of charaters in a string?
I am currently facing a problem while compressing a string to fewer characters in java. I have a huge string which is about 751396 characters and there is a requirement of compressing the string into a 1500 characters. I have tried GZIP Compressor, Inflater & Deflater but these libraries return byte arrays Then I tried LZ-String compressor in which I was able to get satisfactory results using UTF16 encoding and base64 encoding, But these compression return some characters which are neither alphanumeric nor are they included in the symbols list provided. N.B. The list for the Symbols is [+,-,*,/,!,#,#] is there any other technique of compressing the string into another string with fewer characters and providing at least 30% of compression ratio. The codes which I am using for GZip compression is as follows:- import java.io.BufferedReader; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStreamReader; import java.util.zip.GZIPInputStream; import java.util.zip.GZIPOutputStream; public class GZIPCompression { public static byte[] compress(final String str) throws IOException { if ((str == null) || (str.length() == 0)) { return null; } ByteArrayOutputStream obj = new ByteArrayOutputStream(); GZIPOutputStream gzip = new GZIPOutputStream(obj); gzip.write(str.getBytes("UTF-8")); gzip.close(); return obj.toByteArray(); } public static String decompress(final byte[] compressed) throws IOException { String outStr = ""; if ((compressed == null) || (compressed.length == 0)) { return ""; } if (isCompressed(compressed)) { GZIPInputStream gis = new GZIPInputStream(new ByteArrayInputStream(compressed)); BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(gis, "UTF-8")); String line; while ((line = bufferedReader.readLine()) != null) { outStr += line; } } else { outStr = new String(compressed); } return outStr; } public static boolean isCompressed(final byte[] compressed) { return (compressed[0] == (byte) (GZIPInputStream.GZIP_MAGIC)) && (compressed[1] == (byte) (GZIPInputStream.GZIP_MAGIC >> 8)); } } The code for the Inflater & Deflater program is as follows:- import java.io.ByteArrayOutputStream; import java.io.IOException; import java.util.Arrays; import java.util.zip.DataFormatException; import java.util.zip.Deflater; import java.util.zip.Inflater; public class Apple { public static void main(String[] args) { String sr = " [120,-100,-19,89,91,79,-21,56,16,-2,43,40,-49,104,55,113,-18,-68,-91,-23,21,104,90,-38,-62,10,-83,120,48,-83,91,34,-46,-92,-21,-92,8,-124,-8,-17,103,-100,-92,77,-22,-38,-25,112,86,27,-119,106,65,84,-86,103,-58,-10,124,-98,-15,101,-66,-66,43,25,126,-99,-112,116,-109,-60,41,81,46,-34,-107,-57,36,121,14,-29,-43,-20,109,3,77,101,-94,-100,43,120,-79,-115,50,63,-39,-58,25,8,52,16,-52,-97,-62,104,-79,19,-88,32,8,-29,37,-114,-77,-70,-92,71,-78,89,125,-36,-65,-33,-107,5,-50,-40,-120,-86,-11,39,82,-31,95,-77,-40,-48,-21,-94,15,82,13,11,-58,-115,112,-102,-6,-55,-126,-103,-7,126,-65,53,-22,-113,-64,-58,123,-63,97,52,37,-85,53,97,-106,-17,74,55,10,87,79,-39,96,-63,-100,65,76,31,-46,40,-116,73,-39,111,-38,3,81,97,18,108,-41,-113,-124,-126,-52,-48,-100,-62,-50,-89,120,-103,-107,-56,108,-99,9,71,52,92,-123,49,52,91,-41,-109,125,19,44,55,9,-51,102,-124,-82,-61,24,71,-96,5,85,-101,-92,25,-76,-78,48,-55,-51,71,-61,67,-103,-92,-49,6,-45,108,75,73,27,-80,-49,-62,53,-101,23,-64,-25,75,-96,89,103,72,-67,48,-44,11,-107,-83,-105,71,105,-8,-126,35,-119,29,-70,-48,74,-69,-10,-106,-18,92,-48,-98,104,122,-90,-85,48,93,10,-118,2,108,-78,-100,102,-55,38,85,46,-44,115,-27,46,-60,-123,23,-2,-106,82,18,-49,-33,-54,21,26,4,-109,-35,-86,-114,-15,107,23,-125,119,36,-125,70,-102,71,-55,23,-58,96,47,5,-60,-13,-61,-24,-80,-28,96,97,105,-31,52,-100,123,101,60,53,-61,112,33,12,48,54,19,-61,-56,74,-112,116,41,-127,-42,74,41,-28,-69,-4,34,-53,109,-68,-64,-113,17,1,-7,-3,77,-18,-8,44,-55,112,4,-39,-77,27,12,68,61,-102,-92,-23,126,112,-45,48,-64,-91,100,-67,14,-45,-76,88,11,-45,-2,-61,-75,-108,-113,-113,-13,67,0,-99,-114,12,64,-91,17,3,-128,-124,108,14,0,-46,28,-99,3,-32,104,66,0,-35,-82,12,64,-91,-111,0,48,-35,6,1,-40,-74,-54,1,-48,84,93,-120,-96,-33,-105,33,-88,52,98,4,-70,-34,96,8,116,-45,114,121,4,-70,24,-63,-27,-91,12,65,-91,-111,32,112,27,-116,-127,-127,44,-115,71,96,-70,66,4,87,87,50,4,-107,70,-116,-64,112,26,-116,-127,-87,89,54,-113,-64,21,-57,-32,-6,90,-122,-96,-46,-120,17,-104,77,34,-80,-112,-50,111,100,36,-55,-94,-31,80,-122,-96,-46,-120,17,-40,77,30,69,-74,-87,-15,89,-124,36,103,81,16,-56,16,84,26,49,2,71,111,112,31,56,-82,-55,-97,69,-70,110,10,17,-116,70,50,4,-107,70,-116,-64,117,26,68,-96,-87,-90,-31,-16,16,92,49,-124,-101,27,25,-124,74,35,-71,-110,-31,-38,108,16,3,-46,85,126,51,27,-106,56,-111,38,19,25,-122,74,35,-63,-48,-24,-99,-96,25,8,-103,-4,-61,66,-78,-99,-89,83,25,-122,74,35,-63,96,-94,38,115,-55,-46,85,-2,72,-78,52,113,28,102,51,25,-122,74,35,-63,96,55,-71,-93,53,-57,52,-8,45,109,-19,-10,-61,-61,-57,-71,-78,43,43,-90,25,-50,-74,48,57,-100,96,-73,41,-95,51,-118,-25,-49,121,93,48,25,-34,-113,6,-82,-83,-62,-3,91,124,116,-37,-123,67,-56,50,12,3,-23,-102,-19,-72,-106,-125,92,56,-25,-64,39,-16,99,-76,-51,54,-37,18,-28,106,-123,87,-60,-117,23,67,-126,-93,-76,27,1,-100,-36,-29,-68,-108,41,-86,-118,-78,18,41,94,-53,71,-75,8,91,46,-80,-50,-21,20,-74,58,-108,-32,-25,-37,-51,-2,-127,93,-82,-93,-16,69,46,20,10,94,-43,-99,127,-74,-84,84,0,31,-40,12,59,41,76,90,127,-58,-77,64,-46,112,83,-106,10,-29,105,-105,57,-73,-49,64,-107,-91,-62,-95,-55,109,-69,110,-94,-101,-24,-40,-92,55,-99,-43,76,28,-29,-40,-30,-22,-54,-81,15,-14,-15,112,28,108,-45,97,-50,82,28,-89,120,-50,122,117,9,-55,-105,120,74,-24,75,56,39,-2,19,-90,43,-112,56,-4,-117,51,47,16,-123,111,126,54,-53,-124,-64,-94,80,-78,-24,-122,36,90,-28,-21,-100,71,2,-60,53,-55,42,-65,-59,-64,-63,-63,98,76,-109,100,89,-74,-58,-112,-5,-84,116,-37,-105,-117,65,94,-65,-58,-117,-68,113,-49,-118,46,-88,-54,70,-53,86,72,-77,39,-82,79,-25,117,19,-46,-73,118,81,-39,-42,21,-125,52,-35,66,21,-99,-105,-60,-12,-83,84,6,121,-23,-122,-93,48,43,36,-112,-54,62,43,-91,-65,-66,-101,-125,-68,-64,111,-60,-49,-5,-1,-50,79,112,52,-33,-73,-5,-8,-105,45,-40,15,-20,91,-71,-79,-18,122,67,118,-78,49,-55,97,-10,6,-55,89,-47,-95,80,-18,-113,39,-106,-25,-121,-3,-81,-123,-3,107,-118,-86,80,50,-71,-34,99,-65,-27,11,123,-113,59,94,112,59,-101,-98,121,65,-5,-84,-43,-71,-13,38,94,-81,-61,-115,-90,25,-68,47,-63,-99,-60,-105,-102,66,-18,75,32,-13,37,-16,-4,-2,-24,55,93,-71,12,36,-82,92,122,-125,-32,108,-40,-15,120,127,116,-109,31,-94,-35,-110,12,-47,30,120,-83,-50,108,-32,127,110,24,95,6,-53,-9,-90,-3,-65,58,-97,89,-27,-121,-113,-4,-74,-84,81,86,-58,17,101,5,23,-54,33,101,85,-29,20,126,66,89,-63,-33,39,73,43,-3,-41,-92,85,-50,66,125,-98,-76,-54,57,-82,127,69,90,25,123,50,74,117,-10,100,-108,-128,-76,-86,-20,-64,72,64,90,33,70,90,53,-55,89,125,85,-54,7,-23,-4,-3,117,98,-108,-113,-125,-8,119,-27,-87,81,62,22,66,39,78,-7,-24,26,79,124,-98,26,-27,-125,-48,17,113,120,106,-108,-113,-61,111,-28,-109,-93,124,44,62,-117,78,-116,-14,113,-43,-93,26,-9,-28,40,31,75,-27,121,-73,19,-92,124,44,126,51,-97,32,-27,99,-13,-44,-37,9,82,62,38,127,36,-99,32,-27,-29,30,-47,86,95,-97,-14,41,-33,-14,-115,-110,62,-59,-77,-108,39,125,10,-23,79,73,-97,-39,-92,-50,-24,-120,56,-97,-33,-90,-123,12,83,-1,21,45,-92,33,1,115,116,-56,11,25,34,94,-56,-74,63,-59,11,-79,95,43,-72,119,-87,-50,-1,-112,-73,-69,-52,-66,-119,-95,-26,-35,-4,38,-122,-66,-119,-95,-1,27,49,4,-97,31,15,0,-88,84]"; byte[] data = sr.getBytes(); try { String x = new String(decompress(compress(data))); System.out.println("decompressed " + x); } catch (IOException | DataFormatException e) { e.printStackTrace(); } } public static byte[] compress(byte[] data) throws IOException { Deflater deflater = new Deflater(); deflater.setInput(data); ByteArrayOutputStream outputStream = new ByteArrayOutputStream(data.length); deflater.finish(); byte[] buffer = new byte[1024]; while (!deflater.finished()) { int count = deflater.deflate(buffer); outputStream.write(buffer, 0, count); } outputStream.close(); byte[] output = outputStream.toByteArray(); System.out.println("Original: " + data.length); System.out.println("Compressed: " + output.length); return output; } public static byte[] decompress(byte[] data) throws IOException, DataFormatException { Inflater inflater = new Inflater(); inflater.setInput(data); ByteArrayOutputStream outputStream = new ByteArrayOutputStream(data.length); byte[] buffer = new byte[1024]; while (!inflater.finished()) { int count = inflater.inflate(buffer); outputStream.write(buffer, 0, count); } outputStream.close(); byte[] output = outputStream.toByteArray(); System.out.println(); return output; } } A sample of how the data will look like:- "120,-100,-19,89,91,79,-21,56,16,-2,43,40,-49,104,55,113,-18,-68,-91,-23,21,104,90,-38,-62,10,-83,120,48,-83,91,34,-46,-92,-21,-92,8,-124,-8,-17,103,-100,-92,77,-22,-38,-25,112,86,27,-119,106,65,84,-86,103,-58,-10,124,-98,-15,101,-66,-66,43,25,126,-99,-112,116,-109,-60,41,81,46,-34,-107,-57,36,121,14,-29,-43,-20,109,3,77,101,-94,-100,43,120,-79,-115,50,63,-39,-58,25,8,52,16,-52,-97,-62,104,-79,19,-88,32,8,-29,37,-114,-77,-70,-92,71,-78,89,125,-36,-65,-33,-107,5,-50,-40,-120,-86,-11,39,82,-31,95,-77,-40,-48,-21,-94,15,82,13,11,-58,-115,112,-102,-6,-55,-126,-103,-7,126,-65,53,-22,-113,-64,-58,123,-63,97,52,37,-85,53,97,-106,-17,74,55,10,87,79,-39,96,-63,-100,65,76,31,-46,40,-116,73,-39,111,-38,3,81,97,18,108,-41,-113,-124,-126,-52,-48,-100,-62,-50,-89,120,-103,-107,-56,108,-99,9,71,52,92,-123,49,52,91,-41,-109,125,19,44,55,9,-51,102,-124,-82,-61,24,71,-96,5,85,-101,-92,25,-76,-78,48,-55,-51,71,-61,67,-103,-92,-49,6,-45,108,75,73,27,-80,-49,-62,53,-101,23,-64,-25,75,-96,89,103,72,-67,48,-44,11,-107,-83,-105,71,105,-8,-126,35,-119,29,-70,-48,74,-69,-10,-106,-18,92,-48,-98,104,122,-90,-85,48,93,10,-118,2,108,-78,-100,102,-55,38,85,46,-44,115,-27,46,-60,-123,23,-2,-106,82,18,-49,-33,-54,21,26,4,-109,-35,-86,-114,-15,107,23,-125,119,36,-125,70,-102,71,-55,23,-58,96,47,5,-60,-13,-61,-24,-80,-28,96,97,105,-31,52,-100,123,101,60,53,-61,112,33,12,48,54,19,-61,-56,74,-112,116,41,-127,-42,74,41,-28,-69,-4,34,-53,109,-68,-64,-113,17,1,-7,-3,77,-18,-8,44,-55,112,4,-39,-77,27,12,68,61,-102,-92,-23,126,112,-45,48,-64,-91,100,-67,14,-45,-76,88,11,-45,-2,-61,-75,-108,-113,-113,-13,67,0,-99,-114,12,64,-91,17,3,-128,-124,108,14,0,-46,28,-99,3,-32,104,66,0,-35,-82,12,64,-91,-111,0,48,-35,6,1,-40,-74,-54,1,-48,84,93,-120,-96,-33,-105,33,-88,52,98,4,-70,-34,96,8,116,-45,114,121,4,-70,24,-63,-27,-91,12,65,-91,-111,32,112,27,-116,-127,-127,44,-115,71,96,-70,66,4,87,87,50,4,-107,70,-116,-64,112,26,-116,-127,-87,89,54,-113,-64,21,-57,-32,-6,90,-122,-96,-46,-120,17,-104,77,34,-80,-112,-50,111,100,36,-55,-94,-31,80,-122,-96,-46,-120,17,-40,77,30,69,-74,-87,-15,89,-124,36,103,81,16,-56,16,84,26,49,2,71,111,112,31,56,-82,-55,-97,69,-70,110,10,17,-116,70,50,4,-107,70,-116,-64,117,26,68,-96,-87,-90,-31,-16,16,92,49,-124,-101,27,25,-124,74,35,-71,-110,-31,-38,108,16,3,-46,85,126,51,27,-106,56,-111,38,19,25,-122,74,35,-63,-48,-24,-99,-96,25,8,-103,-4,-61,66,-78,-99,-89,83,25,-122,74,35,-63,96,-94,38,115,-55,-46,85,-2,72,-78,52,113,28,102,51,25,-122,74,35,-63,96,55,-71,-93,53,-57,52,-8,45,109,-19,-10,-61,-61,-57,-71,-78,43,43,-90,25,-50,-74,48,57,-100,96,-73,41,-95,51,-118,-25,-49,121,93,48,25,-34,-113,6,-82,-83,-62,-3,91,124,116,-37,-123,67,-56,50,12,3,-23,-102,-19,-72,-106,-125,92,56,-25,-64,39,-16,99,-76,-51,54,-37,18,-28,106,-123,87,-60,-117,23,67,-126,-93,-76,27,1,-100,-36,-29,-68,-108,41,-86,-118,-78,18,41,94,-53,71,-75,8,91,46,-80,-50,-21,20,-74,58,-108,-32,-25,-37,-51,-2,-127,93,-82,-93,-16,69,46,20,10,94,-43,-99,127,-74,-84,84,0,31,-40,12,59,41,76,90,127,-58,-77,64,-46,112,83,-106,10,-29,105,-105,57,-73,-49,64,-107,-91,-62,-95,-55,109,-69,110,-94,-101,-24,-40,-92,55,-99,-43,76,28,-29,-40,-30,-22,-54,-81,15,-14,-15,112,28,108,-45,97,-50,82,28,-89,120,-50,122,117,9,-55,-105,120,74,-24,75,56,39,-2,19,-90,43,-112,56,-4,-117,51,47,16,-123,111,126,54,-53,-124,-64,-94,80,-78,-24,-122,36,90,-28,-21,-100,71,2,-60,53,-55,42,-65,-59,-64,-63,-63,98,76,-109,100,89,-74,-58,-112,-5,-84,116,-37,-105,-117,65,94,-65,-58,-117,-68,113,-49,-118,46,-88,-54,70,-53,86,72,-77,39,-82,79,-25,117,19,-46,-73,118,81,-39,-42,21,-125,52,-35,66,21,-99,-105,-60,-12,-83,84,6,121,-23,-122,-93,48,43,36,-112,-54,62,43,-91,-65,-66,-101,-125,-68,-64,111,-60,-49,-5,-1,-50,79,112,52,-33,-73,-5,-8,-105,45,-40,15,-20,91,-71,-79,-18,122,67,118,-78,49,-55,97,-10,6,-55,89,-47,-95,80,-18,-113,39,-106,-25,-121,-3,-81,-123,-3,107,-118,-86,80,50,-71,-34,99,-65,-27,11,123,-113,59,94,112,59,-101,-98,121,65,-5,-84,-43,-71,-13,38,94,-81,-61,-115,-90,25,-68,47,-63,-99,-60,-105,-102,66,-18,75,32,-13,37,-16,-4,-2,-24,55,93,-71,12,36,-82,92,122,-125,-32,108,-40,-15,120,127,116,-109,31,-94,-35,-110,12,-47,30,120,-83,-50,108,-32,127,110,24,95,6,-53,-9,-90,-3,-65,58,-97,89,-27,-121,-113,-4,-74,-84,81,86,-58,17,101,5,23,-54,33,101,85,-29,20,126,66,89,-63,-33,39,73,43,-3,-41,-92,85,-50,66,125,-98,-76,-54,57,-82,127,69,90,25,123,50,74,117,-10,100,-108,-128,-76,-86,-20,-64,72,64,90,33,70,90,53,-55,89,125,85,-54,7,-23,-4,-3,117,98,-108,-113,-125,-8,119,-27,-87,81,62,22,66,39,78,-7,-24,26,79,124,-98,26,-27,-125,-48,17,113,120,106,-108,-113,-61,111,-28,-109,-93,124,44,62,-117,78,-116,-14,113,-43,-93,26,-9,-28,40,31,75,-27,121,-73,19,-92,124,44,126,51,-97,32,-27,99,-13,-44,-37,9,82,62,38,127,36,-99,32,-27,-29,30,-47,86,95,-97,-14,41,-33,-14,-115,-110,62,-59,-77,-108,39,125,10,-23,79,73,-97,-39,-92,-50,-24,-120,56,-97,-33,-90,-123,12,83,-1,21,45,-92,33,1,115,116,-56,11,25,34,94,-56,-74,63,-59,11,-79,95,43,-72,119,-87,-50,-1,-112,-73,-69,-52,-66,-119,-95,-26,-35,-4,38,-122,-66,-119,-95,-1,27,49,4,-97,31,15,0,-88,84" Is there a better Option for reducing the number of characters in a string without converting it to byte array and unwanted characters? Thanks in advance,
You can compress to a byte[] and then encode the result in Base64. This will only use alphanumeric and fewer symbols which are safe for transfering as text. i.e. it is widely used for this. public static void main(String[] args) { StringBuilder sb = new StringBuilder(); while (sb.length() < 751396) sb.append("Size: ").append(sb.length()).append("\n"); String s = sb.toString(); String s2 = deflateBase64(s); System.out.println("Uncompressed size = " + s.length() + ", compressed size=" + s2.length()); String s3 = inflateBase64(s2); System.out.println("Same after inflating is " + s3.equals(s)); } public static String deflateBase64(String text) { try { ByteArrayOutputStream baos = new ByteArrayOutputStream(); try (Writer writer = new OutputStreamWriter(new DeflaterOutputStream(baos))) { writer.write(text); } return Base64.getEncoder().encodeToString(baos.toByteArray()); } catch (IOException e) { throw new AssertionError(e); } } public static String inflateBase64(String base64) { try (Reader reader = new InputStreamReader( new InflaterInputStream( new ByteArrayInputStream( Base64.getDecoder().decode(base64))))) { StringWriter sw = new StringWriter(); char[] chars = new char[1024]; for (int len; (len = reader.read(chars)) > 0; ) sw.write(chars, 0, len); return sw.toString(); } catch (IOException e) { throw new AssertionError(e); } } prints Uncompressed size = 751400, compressed size=219564 Same after inflating is true
You can use the Deflater a little more: public static byte[] compress(byte[] data) throws IOException { new Deflater(Deflater.BEST_COMPRESSION, true); //... } So you'll have the strongest compression and you'll skip some of the header data. This is the best you can do with the builtin algorithms.
Exception in thread "main" java.util.zip.ZipException: Not in GZIP format. when compressing file using ISO charset
I'm trying in my program to Compress/Decompress data using GZIP streams and when using the charset "ISO-8859-1", everything working well but when changing the charset to "UTF-8", i'm getting the Error message "Exception in thread "main" java.util.zip.ZipException: Not in GZIP format". this is my code: public static String compress(String str) throws IOException { if (str == null || str.length() == 0) { return str; } System.out.println("String length : " + str.length()); ByteArrayOutputStream out = new ByteArrayOutputStream(); GZIPOutputStream gzip = new GZIPOutputStream(out); gzip.write(str.getBytes()); gzip.close(); String outStr = out.toString("UTF-8"); System.out.println("Output String lenght : " + outStr.length()); System.out.println("Output : " + outStr.toString()); return outStr; } public static String decompress(String str) throws IOException { if (str == null || str.length() == 0) { return str; } System.out.println("Input String length : " + str.length()); GZIPInputStream gis = new GZIPInputStream(new ByteArrayInputStream(str.getBytes("UTF-8"))); BufferedReader bf = new BufferedReader(new InputStreamReader(gis, "UTF-8")); String outStr = ""; String line; while ((line=bf.readLine())!=null) { outStr += line; } System.out.println("Output String lenght : " + outStr.length()); return outStr; } public static void main(String[] args) throws IOException { String string = "my data"; System.out.println("after compress:"); String compressed = compress(string); System.out.println(compressed); System.out.println("after decompress:"); String decomp = decompress(compressed); System.out.println(decomp); }
String outStr = out.toString("UTF-8"); This "out" is ziped byte stream,encode it to String then decode it from String will be lose some bytes.This maybe a bug of java. To Resolve it,you can encode bytes to String in compress() to return, such as : String infoBase64Encode = new String(Base64.encodeBase64(out.toByteArray())) decode String to bytes in decompress() to return, such as : String infoBase64Decode = Base64.decodeBase64(decryptAESinfo) complete code as follows: public static String compress(String str) throws IOException { if (str == null || str.length() == 0) { return str; } System.out.println("String length : " + str.length()); ByteArrayOutputStream out = new ByteArrayOutputStream(); GZIPOutputStream gzip = new GZIPOutputStream(out); gzip.write(str.getBytes()); gzip.close(); String outStr = new String(Base64.encodeBase64(out.toByteArray())); System.out.println("Output String lenght : " + outStr.length()); System.out.println("Output : " + outStr.toString()); return outStr; } public static String decompress(String str) throws IOException { if (str == null || str.length() == 0) { return str; } System.out.println("Input String length : " + str.length()); GZIPInputStream gis = new GZIPInputStream(new ByteArrayInputStream(Base64.decodeBase64(str))); String outStr = ""; ByteArrayOutputStream out = new ByteArrayOutputStream(); byte[] buffer = new byte[256]; int n; while ((n = gis.read(buffer)) >= 0) { out.write(buffer, 0, n); } System.out.println("Output String lenght : " + outStr.length()); return new String(out.toByteArray()); } public static void main(String[] args) throws IOException { String string = "my data"; System.out.println("after compress:"); String compressed = compress(string); System.out.println(compressed); System.out.println("after decompress:"); String decomp = decompress(compressed); System.out.println(decomp); }