Get progress information during JAXB de-/serialization - java

Is there a way to register some progress monitor on JAXB Marshaller and Unmarshaller?
I would like to show some progress information in my GUI while data is de-/serialized.
I see that you can set a Unmarshaller.Listener and Marshaller.Listener, which have a "before" and "after" method. Nevertheless, I do not see any straight forward way to get the total number of elements to serialize.
I would need that obviously to calculate some "percentage done" info.

Is it ok to parse before unmarshalling?
If so, assuming you have a list of objects, you could do something like...
final String tagName = *** name of tag you are counting ***;
InputStream in = *** stream of your xml ***;
SAXParserFactory spf = SAXParserFactory.newInstance();
SAXParser saxParser = spf.newSAXParser();
final AtomicInteger counter = new AtomicInteger();
saxParser.parse(in, new DefaultHandler() {
#Override
public void startElement (String uri, String localName, String qName, Attributes attributes) {
if (localName.equals(tagName))
counter.incrementAndGet();
}
});

Would doing a more low-level approach by leveraging on the InputStream be an acceptable solution?
E.g.
import java.io.IOException;
import java.io.InputStream;
import java.util.function.DoubleConsumer;
public class InputStreamWithProgressDecorator extends InputStream {
/** Input stream to be decorated */ private final InputStream inputStream;
/** Amount of byte read */ private long position = 0L;
/** File size */ private final long length;
/** Mark */ private int mark = 0;
/** Consumer of the progress */ private final DoubleConsumer callBack;
public InputStreamWithProgressDecorator(final InputStream is, final long l, final DoubleConsumer cb) {
inputStream = is;
length = l;
callBack = cb;
}
private void setPosition(final long fp) {
position = fp;
callBack.accept(getProgress());
}
public double getProgress() {
return length == 0L ? 100d : ((double) position) * 100d / ((double) length);
}
public long getPosition() {
return position;
}
#Override
public int read(byte[] b) throws IOException {
final int rc = inputStream.read(b);
setPosition(position + rc);
return rc;
}
#Override
public int read(byte[] b, int off, int len) throws IOException {
final int rc = inputStream.read(b, off, len);
setPosition(position + rc);
return rc;
}
#Override
public byte[] readAllBytes() throws IOException {
final byte[] result = inputStream.readAllBytes();
setPosition(position + result.length);
return result;
}
#Override
public byte[] readNBytes(int len) throws IOException {
final byte[] result = inputStream.readNBytes(len);
setPosition(position + result.length);
return result;
}
#Override
public int readNBytes(byte[] b, int off, int len) throws IOException {
final int rc = inputStream.readNBytes(b, off, len);
setPosition(position + rc);
return rc;
}
#Override
public long skip(long n) throws IOException {
final long rc = inputStream.skip(n);
setPosition(position + rc);
return rc;
}
#Override
public int available() throws IOException {
return inputStream.available();
}
#Override
public void close() throws IOException {
inputStream.close();
}
#Override
public synchronized void mark(int readlimit) {
inputStream.mark(readlimit);
mark = readlimit;
}
#Override
public synchronized void reset() throws IOException {
inputStream.reset();
setPosition(mark);
}
#Override
public boolean markSupported() {
return inputStream.markSupported();
}
#Override
public int read() throws IOException {
final int c = inputStream.read();
setPosition(position + 1);
return c;
}
}
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.function.DoubleConsumer;
public class Demo1 {
public static void main(String[] args) throws IOException {
final File file = new File(args[0]);
final DoubleConsumer callBack = p -> System.out.printf("%.0f%%\n", p);
try (final FileInputStream fis = new FileInputStream(file); final InputStreamWithProgressDecorator is = new InputStreamWithProgressDecorator(fis, file.length(), callBack)) {
// Simulating JAXB unmarshaller reads
byte[] buffer = is.readNBytes(1024);
while (buffer.length != 0) buffer = is.readNBytes(1024);
}
}
}
Or if you have a FileInputStream with a separate Thread approach :
public class FileInputStreamReadProgressThread extends Thread implements UncaughtExceptionHandler {
/** Input stream */ private final FileInputStream fileInputStream;
/** File size */ private final long length;
/** Read progress in percents */ private double progress = 0d;
/** Exception from thread */ private Throwable exception = null;
/** Consumer of the progress */ private final DoubleConsumer callBack;
public FileInputStreamReadProgressThread(final FileInputStream fis, final long l, final DoubleConsumer cb) {
fileInputStream = fis;
length = l;
callBack = cb;
setUncaughtExceptionHandler(this);
setName(getClass().getSimpleName());
}
public double getProgress() { return progress; }
public Throwable getException() { return exception; }
#Override public void uncaughtException(final Thread t, final Throwable e) { exception = e; }
#Override
public void run() {
try {
long position = -1L;
final FileChannel channel = fileInputStream.getChannel();
while (!isInterrupted() && channel.isOpen() && position < length) {
position = channel.position();
progress = length == 0L ? 100d : ((double)position) * 100d / ((double)length);
callBack.accept(progress);
sleep(100L);
}
} catch (final IOException e) {
exception = e;
} catch (final InterruptedException e) {
// Do nothing
}
}
}
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.channels.Channels;
import java.util.function.DoubleConsumer;
public class Demo2 {
public static void main(String[] args) throws IOException {
final File file = new File(args[0]);
final DoubleConsumer callBack = p -> System.out.printf("%.0f%%\n", p);
try (final FileInputStream fis = new FileInputStream(file); final InputStream is = Channels.newInputStream(fis.getChannel())) {
final FileInputStreamReadProgressThread readProgressThread = new FileInputStreamReadProgressThread(fis, file.length(), callBack);
readProgressThread.start();
// Simulating JAXB unmarshaller reads
is.readAllBytes();
}
}
}

Related

How we can read the request body in a filter without affecting the original request in java?

(Java ver. 8)
I need to process the request body in a filter. Using the below code, I read the body.
private static String convertInputStreamToString(InputStream is) throws IOException {
ByteArrayOutputStream result = new ByteArrayOutputStream();
byte[] buffer = new byte[1024 * 50];
int length;
while ((length = is.read(buffer)) != -1) {
result.write(buffer, 0, length);
}
return result.toString("UTF-8");
}
The issue is if there are parameters posted by request body with the content type "application/x-www-form-urlencoded", then the parameters won't be available after reading the body. They are available to get using request.getParameter(), if I don't read the body.
Moreover, I tried using the below code to wrap the request and provide the body, so it would be available to the rest of the solution (e.g. servlets), but the issue with losing the parameters happens yet. code is copied/adopted from this post
public class RequestWrapper extends HttpServletRequestWrapper {
private final String body;
public RequestWrapper(HttpServletRequest request) throws IOException {
super(request);
body = convertInputStreamToString(request.getInputStream());
}
private static String convertInputStreamToString(InputStream is) throws IOException {
ByteArrayOutputStream result = new ByteArrayOutputStream();
byte[] buffer = new byte[1024 * 50];
int length;
while ((length = is.read(buffer)) != -1) {
result.write(buffer, 0, length);
}
return result.toString("UTF-8");
}
#Override
public ServletInputStream getInputStream() throws IOException {
final byte[] myBytes = body.getBytes("UTF-8");
ServletInputStream servletInputStream = new ServletInputStream() {
private int lastIndexRetrieved = -1;
private ReadListener readListener = null;
#Override
public boolean isFinished() {
return (lastIndexRetrieved == myBytes.length - 1);
}
#Override
public boolean isReady() {
return isFinished();
}
#Override
public void setReadListener(ReadListener readListener) {
this.readListener = readListener;
if (!isFinished()) {
try {
readListener.onDataAvailable();
} catch (IOException e) {
readListener.onError(e);
}
} else {
try {
readListener.onAllDataRead();
} catch (IOException e) {
readListener.onError(e);
}
}
}
#Override
public int read() throws IOException {
int i;
if (!isFinished()) {
i = myBytes[lastIndexRetrieved + 1];
lastIndexRetrieved++;
if (isFinished() && (readListener != null)) {
try {
readListener.onAllDataRead();
} catch (IOException ex) {
readListener.onError(ex);
throw ex;
}
}
return i;
} else {
return -1;
}
}
};
return servletInputStream;
}
#Override
public BufferedReader getReader() throws IOException {
return new BufferedReader(new InputStreamReader(this.getInputStream()));
}
}
I tried to run the code you mentioned you're using and I think the accepted answer may not solve your issue as it's quite old. Seems you also need to overwrite the getParameter, getParameterMap and getParameterValues methods. I tried to do that based on this answer from the same post and seems it works. Here is the code:
public class MultiReadHttpServletRequest extends HttpServletRequestWrapper {
private ByteArrayOutputStream cachedBytes;
private String body;
private Map<String, String[]> parameterMap;
public MultiReadHttpServletRequest(HttpServletRequest request) throws IOException {
super(request);
parameterMap = super.getParameterMap();
cacheBodyAsString();
System.out.println("The Body read into a String is: " + body);
}
#Override
public ServletInputStream getInputStream() throws IOException {
if (cachedBytes == null)
cacheInputStream();
return new CachedServletInputStream(cachedBytes.toByteArray());
}
#Override
public BufferedReader getReader() throws IOException {
return new BufferedReader(new InputStreamReader(getInputStream()));
}
#Override
public String getParameter(String key) {
Map<String, String[]> parameterMap = getParameterMap();
String[] values = parameterMap.get(key);
return values != null && values.length > 0 ? values[0] : null;
}
#Override
public String[] getParameterValues(String key) {
Map<String, String[]> parameterMap = getParameterMap();
return parameterMap.get(key);
}
#Override
public Map<String, String[]> getParameterMap() {
return parameterMap;
}
private void cacheInputStream() throws IOException {
// Cache the inputstream in order to read it multiple times
cachedBytes = new ByteArrayOutputStream();
byte[] buffer = new byte[1024 * 50];
int length;
InputStream is = super.getInputStream();
while ((length = is.read(buffer)) != -1) {
cachedBytes.write(buffer, 0, length);
}
}
private void cacheBodyAsString() throws IOException {
ByteArrayOutputStream result = new ByteArrayOutputStream();
byte[] buffer = new byte[1024 * 50];
int length;
InputStream is = getInputStream();
while ((length = is.read(buffer)) != -1) {
result.write(buffer, 0, length);
}
body = result.toString("UTF-8");
}
}
public class CachedServletInputStream extends ServletInputStream {
private final ByteArrayInputStream buffer;
public CachedServletInputStream(byte[] contents) {
this.buffer = new ByteArrayInputStream(contents);
}
#Override
public int read() {
return buffer.read();
}
#Override
public boolean isFinished() {
return buffer.available() == 0;
}
#Override
public boolean isReady() {
return true;
}
#Override
public void setReadListener(ReadListener listener) {
throw new RuntimeException("Not implemented");
}
}
This is just a sample implementation. I highly recommend to follow the steps specified in the answer mentioned above as it seems to be newer and it also ensures that the parameters are being read from both body and query string. My code is just a sample sketch to see if it works as expected.
Thank you #zaerymoghaddam for helping with this.
I was concerning if I am affecting the request object implicitly, so the rest of the solution is lacking something in it.
Moreover, I found that parameterMap = super.getParameterMap(); is not icluding the parameters from body (in case of post with content type of "application/x-www-form-urlencoded")
With a little bit of change of your code I came up with below solution:
public class MyRequestWrapper extends HttpServletRequestWrapper {
private ByteArrayOutputStream cachedBytes;
private String body;
private Map<String, String[]> parameterMap;
private static int bufferLength = 1024 * 50;
public MyRequestWrapper(final HttpServletRequest request) throws IOException {
super(request);
cacheBodyAsString();
parameterMap = new HashMap<>(super.getParameterMap());
addParametersFromBody();
}
#Override
public ServletInputStream getInputStream() throws IOException {
return new CachedServletInputStream(cachedBytes.toByteArray());
}
#Override
public BufferedReader getReader() throws IOException {
return new BufferedReader(new InputStreamReader(this.getInputStream()));
}
public String GetRequestBodyAsString() {
return this.body;
}
#Override
public String getParameter(String key) {
Map<String, String[]> parameterMap = getParameterMap();
String[] values = parameterMap.get(key);
return values != null && values.length > 0 ? values[0] : null;
}
#Override
public String[] getParameterValues(String key) {
Map<String, String[]> parameterMap = getParameterMap();
return parameterMap.get(key);
}
#Override
public Map<String, String[]> getParameterMap() {
return parameterMap;
}
private void cacheInputStream() throws IOException {
cachedBytes = new ByteArrayOutputStream();
byte[] buffer = new byte[bufferLength];
int length;
InputStream is = super.getInputStream();
while ((length = is.read(buffer)) != -1) {
cachedBytes.write(buffer, 0, length);
}
}
private void cacheBodyAsString() throws IOException {
if (cachedBytes == null)
cacheInputStream();
this.body = cachedBytes.toString("UTF-8");
}
private void addParametersFromBody() {
if(this.body == null || this.body.isEmpty())
return;
String[] params = this.body.split("&");
String[] value = new String[1];
for (String param : params) {
String key = param.split("=")[0];
value[0] = param.split("=")[1];
parameterMap.putIfAbsent(key, value);
}
}
class CachedServletInputStream extends ServletInputStream {
private final ByteArrayInputStream buffer;
public CachedServletInputStream(byte[] contents) {
this.buffer = new ByteArrayInputStream(contents);
}
#Override
public int read() {
return buffer.read();
}
#Override
public boolean isFinished() {
return buffer.available() == 0;
}
#Override
public boolean isReady() {
return true;
}
#Override
public void setReadListener(ReadListener listener) {
throw new RuntimeException("Not implemented");
}
}
}
Strangely HttpServletRequest content may only be read once. It comes as a stream so once you read the stream it is gone. So you need some wrapper that allows you multiple reads. Spring actually provides such wrapper. The name of the class is ContentCachingRequestWrapper. Here its Javadoc. Here is the answer that explains how to use it if you work with Spring boot: How to get request body params in spring filter?

How to store an ArrayList in a file?

I have a class which represents an ArrayList stored in a file, because I need an ArrayList with multiple gigabytes of data in it which is obviously too large to be stored in memory. The data is represented by a class called Field and the function Field.parse() is just for converting the Field into a String and the other way.
The Field class stores a list of (strange) chess pieces and their coordinates.
My class is working fine, but it takes a long time to add an element to the file and I need my program to run as fast as possible. Does anyone know a more efficient/faster way of doing things?
Also, I am not allowed to use external libraries/apis. Please keep that in mind.
This is the class which is responsible for storing Field objects in a temp file:
private File file;
private BufferedReader reader;
private BufferedWriter writer;
public FieldSaver() {
try {
file = File.createTempFile("chess-moves-", ".temp");
System.out.println(file.getAbsolutePath());
} catch (IOException e) {
e.printStackTrace();
}
}
public void add(Field field) {
try {
File temp = File.createTempFile("chess-moves-", ".temp");
writer = new BufferedWriter(new FileWriter(temp));
reader = new BufferedReader(new FileReader(file));
String line;
while((line = reader.readLine()) != null ) {
writer.write(line);
writer.newLine();
}
reader.close();
writer.write(field.parse());
writer.close();
file.delete();
file = new File(temp.getAbsolutePath());
} catch (IOException e) {
e.printStackTrace();
}
}
public Field get(int n) {
try {
reader = new BufferedReader(new FileReader(file));
for (int i = 0; i < n; i++) {
reader.readLine();
}
String line = reader.readLine();
reader.close();
return Field.parse(line);
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
And this is the Field class:
private WildBoar wildBoar;
private HuntingDog[] huntingDogs;
private Hunter hunter;
private int size;
#Override
public String toString() {
String result = "Wildschwein: " + wildBoar.toString();
for (HuntingDog dog : huntingDogs) {
result += "; Hund: " + dog.toString();
}
return result + "; Jäger: " + hunter.toString();
}
#Override
public boolean equals(Object obj) {
if (obj instanceof Field) {
Field field = (Field) obj;
HuntingDog[] dogs = field.getHuntingDogs();
return wildBoar.equals(field.getWildBoar()) && hunter.equals(field.getHunter()) && huntingDogs[0].equals(dogs[0]) && huntingDogs[1].equals(dogs[1]) && huntingDogs[2].equals(dogs[2]);
}
return false;
}
public Field(int size, WildBoar wildBoar, HuntingDog[] huntingDogs, Hunter hunter) {
this.size = size;
this.wildBoar = wildBoar;
this.huntingDogs = huntingDogs;
this.hunter = hunter;
}
public WildBoar getWildBoar() {
return wildBoar;
}
public HuntingDog[] getHuntingDogs() {
return huntingDogs;
}
public Hunter getHunter() {
return hunter;
}
public int getSize() {
return size;
}
public static Field parse(String s) {
String[] arr = s.split(",");
WildBoar boar = WildBoar.parse(arr[0]);
Hunter hunter = Hunter.parse(arr[1]);
HuntingDog[] dogs = new HuntingDog[arr.length - 2];
for(int i = 2; i < arr.length; i++) {
dogs[i - 2] = HuntingDog.parse(arr[i]);
}
return new Field(8, boar, dogs, hunter);
}
public String parse() {
String result = wildBoar.parse() + "," + hunter.parse();
for(HuntingDog dog : huntingDogs) {
result += "," + dog.parse();
}
return result;
}
Here's an MCVE to do what you want, based on the information you provided.
You can run it and see that it can save a Field to the file and get a Field by index very quickly.
The Fields are constant length, so you can get a Field by index by going to byte offset of index times field length in bytes. This would be significantly more difficult if the field were not constant length.
import java.io.Closeable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.RandomAccessFile;
public class FieldSaver implements Closeable {
public static void main(String[] args) throws IOException {
File f = File.createTempFile("chess-moves-", ".temp");
try (FieldSaver test = new FieldSaver(f);) {
for (byte i = 0; i < 100; i++) {
test.add(new Field(8, new WildBoar(i, i), new Hunter(i, i), new HuntingDog[] {
new HuntingDog(i, i),
new HuntingDog(i, i),
new HuntingDog(i, i) }));
}
// Get a few Fields by index
System.out.println(test.get(0));
System.out.println(test.get(50));
System.out.println(test.get(99));
// EOF exception, there is no Field 100
// System.out.println(test.get(100));
}
}
private final RandomAccessFile data;
public FieldSaver(File f) throws FileNotFoundException {
data = new RandomAccessFile(f, "rw");
}
public void add(Field field) throws IOException {
data.seek(data.length());
field.write(data);
}
public Field get(int index) throws IOException {
data.seek(index * Field.STORAGE_LENGTH_BYTES);
return Field.read(data);
}
public void close() throws IOException { data.close(); }
static abstract class Piece {
protected byte xPos;
protected byte yPos;
public Piece(DataInput data) throws IOException {
xPos = data.readByte();
yPos = data.readByte();
}
public Piece(byte xPos, byte yPos) {
this.xPos = xPos;
this.yPos = yPos;
}
public void write(DataOutput data) throws IOException {
data.writeByte(xPos);
data.writeByte(yPos);
}
public String toString() { return "[" + xPos + ", " + yPos + "]"; }
}
static class Hunter extends Piece {
public Hunter(byte xPos, byte yPos) { super(xPos, yPos); }
public Hunter(DataInput data) throws IOException { super(data); }
}
static class HuntingDog extends Piece {
public HuntingDog(byte xPos, byte yPos) { super(xPos, yPos); }
public HuntingDog(DataInput data) throws IOException { super(data); }
}
static class WildBoar extends Piece {
public WildBoar(byte xPos, byte yPos) { super(xPos, yPos); }
public WildBoar(DataInput data) throws IOException { super(data); }
}
static class Field {
// size of boar + hunter + 3 dogs
public static final int STORAGE_LENGTH_BYTES = 2 + 2 + (3 * 2);
private int size;
private WildBoar boar;
private Hunter hunter;
private final HuntingDog[] dogs;
public Field(int size, WildBoar wildBoar, Hunter hunter, HuntingDog[] huntingDogs) {
this.size = size;
this.boar = wildBoar;
this.hunter = hunter;
this.dogs = huntingDogs;
}
public String toString() {
String result = "Wildschwein: " + boar.toString();
for (HuntingDog dog : dogs) {
result += "; Hund: " + dog.toString();
}
return result + "; Jäger: " + hunter.toString();
}
public static Field read(DataInput data) throws IOException {
WildBoar boar = new WildBoar(data);
Hunter hunter = new Hunter(data);
HuntingDog[] dogs = new HuntingDog[3];
for (int i = 0; i < 3; i++) {
dogs[i] = new HuntingDog(data);
}
return new Field(8, boar, hunter, dogs);
}
public void write(DataOutput data) throws IOException {
boar.write(data);
hunter.write(data);
for (HuntingDog dog : dogs) {
dog.write(data);
}
}
}
}
Use a Map implementation like Cache from ehcache. This library will optimize for you so you don't have to handle writing and reading to disk and manage when to keep it in memory or on disk. You can just use it as a normal map. You probably want a map instead of a list for faster lookup so the library can optimize even more for you.
http://www.ehcache.org/
CacheManager cacheManager = CacheManagerBuilder.newCacheManagerBuilder()
.withCache("preConfigured",
CacheConfigurationBuilder.newCacheConfigurationBuilder(Long.class, String.class,
ResourcePoolsBuilder.heap(100))
.build())
.build(true);
Cache<Long, String> preConfigured
= cacheManager.getCache("preConfigured", Long.class, String.class);

How to getProgress of large files using XMLStreamReader

I am using below code to read large xml file (in GBs) in hadoop RecordReader using XMLStreamReader
public class RecordReader {
int progressCouunt = 0;
public RecordReader() {
XMLInputFactory factory = XMLInputFactory.newInstance();
FSDataInputStream fdDataInputStream = fs.open(file); //hdfs file
try {
reader = factory.createXMLStreamReader(fdDataInputStream);
} catch (XMLStreamException exception) {
throw new RuntimeException("XMLStreamException exception : ", exception);
}
}
#Override
public float getProgress() throws IOException, InterruptedException {
return progressCouunt;
}
}
My question is how to get reading progress of the file with XMLStreamReader as it does not provide any start or end position to calculate the progress percentage.
I have refered to How do I keep track of parsing progress of large files in StAX?, but cannot user filterReader.
Please help me here.
You could wrap the InputStream by extending FilterInputStream.
public interface InputStreamListener {
void onBytesRead(long totalBytes);
}
public class PublishingInputStream extends FilterInputStream {
private final InputStreamListener;
private long totalBytes = 0;
public PublishingInputStream(InputStream in, InputStreamListener listener) {
super(in);
this.listener = listener;
}
#Override
public int read(byte[] b) {
int count = super.read(b);
this.totalBytes += count;
this.listener.onBytesRead(totalBytes);
}
// TODO: override the other read() methods
}
Usage
XMLInputFactory factory = XMLInputFactory.newInstance();
InputStream in = fs.open(file);
final long fileSize = someHadoopService.getFileLength(file);
InputStremListener listener = new InputStreamListener() {
public void onBytesRead(long totalBytes) {
System.out.println(String.format("Read %s of %s bytes", totalBytes, fileSize));
}
};
InputStream publishingIn = new PublishingInputStream(in, listener);
try {
reader = factory.createXMLStreamReader(publishingIn);
// etc

Logging all network traffic in Spring mvc

I have spring mvc application using RequestBody and ResponseBody annotations. They are configured with MappingJackson2HttpMessageConverter. I also have slf4j set up. I would like to log all json as it comes in and out from my controller.
I did extend
MappingJackson2HttpMessageConverter
#Override
public Object read(Type type, Class<?> contextClass, HttpInputMessage inputMessage)
throws IOException, HttpMessageNotReadableException {
logStream(inputMessage.getBody());
return super.read(type, contextClass, inputMessage);
}
I can get the input stream, but if I read the content it becomes empty and I loose the message. Moreover mark() and reset() is not supported. It is implemented by PushbackInputStream, so I tried to read it's content and push it back like this:
public void logStream(InputStream is) {
if (is instanceof PushbackInputStream)
try {
PushbackInputStream pushbackInputStream = (PushbackInputStream) is;
byte[] bytes = new byte[20000];
StringBuilder sb = new StringBuilder(is.available());
int red = is.read();
int pos =0;
while (red > -1) {
bytes[pos] = (byte) red;
pos=1 + pos;
red = is.read();
}
pushbackInputStream.unread(bytes,0, pos-1);
log.info("Json payload " + sb.toString());
} catch (Exception e) {
log.error("ignoring exception in logger ", e);
}
}
but I get exception
java.io.IOException: Push back buffer is full
I also tried to turn on logging on http level as described here:Spring RestTemplate - how to enable full debugging/logging of requests/responses? without luck.
After more than whole work day of experimenting I got working solution.
It consists of Logging filter, two wrappers for request and response and registration of Logging filter:
the filter class is:
/**
* Http logging filter, which wraps around request and response in
* each http call and logs
* whole request and response bodies. It is enabled by
* putting this instance into filter chain
* by overriding getServletFilters() in
* AbstractAnnotationConfigDispatcherServletInitializer.
*/
public class LoggingFilter extends AbstractRequestLoggingFilter {
private static final Logger log = LoggerFactory.getLogger(LoggingFilter.class);
#Override
protected void doFilterInternal(HttpServletRequest request, HttpServletResponse response, FilterChain filterChain)
throws ServletException, IOException {
long id = System.currentTimeMillis();
RequestLoggingWrapper requestLoggingWrapper = new RequestLoggingWrapper(id, request);
ResponseLoggingWrapper responseLoggingWrapper = new ResponseLoggingWrapper(id, response);
log.debug(id + ": http request " + request.getRequestURI());
super.doFilterInternal(requestLoggingWrapper, responseLoggingWrapper, filterChain);
log.debug(id + ": http response " + response.getStatus() + " finished in " + (System.currentTimeMillis() - id) + "ms");
}
#Override
protected void beforeRequest(HttpServletRequest request, String message) {
}
#Override
protected void afterRequest(HttpServletRequest request, String message) {
}
}
this class is using stream wrappers, which was suggested by
Master Slave and David Ehrmann.
Request wrapper looks like this:
/**
* Request logging wrapper using proxy split stream to extract request body
*/
public class RequestLoggingWrapper extends HttpServletRequestWrapper {
private static final Logger log = LoggerFactory.getLogger(RequestLoggingWrapper.class);
private final ByteArrayOutputStream bos = new ByteArrayOutputStream();
private long id;
/**
* #param requestId and id which gets logged to output file. It's used to bind request with
* response
* #param request request from which we want to extract post data
*/
public RequestLoggingWrapper(Long requestId, HttpServletRequest request) {
super(request);
this.id = requestId;
}
#Override
public ServletInputStream getInputStream() throws IOException {
final ServletInputStream servletInputStream = RequestLoggingWrapper.super.getInputStream();
return new ServletInputStream() {
private TeeInputStream tee = new TeeInputStream(servletInputStream, bos);
#Override
public int read() throws IOException {
return tee.read();
}
#Override
public int read(byte[] b, int off, int len) throws IOException {
return tee.read(b, off, len);
}
#Override
public int read(byte[] b) throws IOException {
return tee.read(b);
}
#Override
public boolean isFinished() {
return servletInputStream.isFinished();
}
#Override
public boolean isReady() {
return servletInputStream.isReady();
}
#Override
public void setReadListener(ReadListener readListener) {
servletInputStream.setReadListener(readListener);
}
#Override
public void close() throws IOException {
super.close();
// do the logging
logRequest();
}
};
}
public void logRequest() {
log.info(getId() + ": http request " + new String(toByteArray()));
}
public byte[] toByteArray() {
return bos.toByteArray();
}
public long getId() {
return id;
}
public void setId(long id) {
this.id = id;
}
}
and response wrapper is different only in close/flush method (close doesn't get called)
public class ResponseLoggingWrapper extends HttpServletResponseWrapper {
private static final Logger log = LoggerFactory.getLogger(ResponseLoggingWrapper.class);
private final ByteArrayOutputStream bos = new ByteArrayOutputStream();
private long id;
/**
* #param requestId and id which gets logged to output file. It's used to bind response with
* response (they will have same id, currenttimemilis is used)
* #param response response from which we want to extract stream data
*/
public ResponseLoggingWrapper(Long requestId, HttpServletResponse response) {
super(response);
this.id = requestId;
}
#Override
public ServletOutputStream getOutputStream() throws IOException {
final ServletOutputStream servletOutputStream = ResponseLoggingWrapper.super.getOutputStream();
return new ServletOutputStream() {
private TeeOutputStream tee = new TeeOutputStream(servletOutputStream, bos);
#Override
public void write(byte[] b) throws IOException {
tee.write(b);
}
#Override
public void write(byte[] b, int off, int len) throws IOException {
tee.write(b, off, len);
}
#Override
public void flush() throws IOException {
tee.flush();
logRequest();
}
#Override
public void write(int b) throws IOException {
tee.write(b);
}
#Override
public boolean isReady() {
return servletOutputStream.isReady();
}
#Override
public void setWriteListener(WriteListener writeListener) {
servletOutputStream.setWriteListener(writeListener);
}
#Override
public void close() throws IOException {
super.close();
// do the logging
logRequest();
}
};
}
public void logRequest() {
byte[] toLog = toByteArray();
if (toLog != null && toLog.length > 0)
log.info(getId() + ": http response " + new String(toLog));
}
/**
* this method will clear the buffer, so
*
* #return captured bytes from stream
*/
public byte[] toByteArray() {
byte[] ret = bos.toByteArray();
bos.reset();
return ret;
}
public long getId() {
return id;
}
public void setId(long id) {
this.id = id;
}
}
at last LoggingFilter needs to be registered in AbstractAnnotationConfigDispatcherServletInitializer like this:
#Override
protected Filter[] getServletFilters() {
LoggingFilter requestLoggingFilter = new LoggingFilter();
return new Filter[]{requestLoggingFilter};
}
I know, there is maven lib for this, but I don't want to include whole lib because of small logging utility. It was much harder than I originally thought. I expected to achieve this just by modifying log4j.properties. I still think this should be part of Spring.
It sounds like you want to decorate HttpInputMessage so it returns a decorated InputStream that logs all reads in an internal buffer, then on close() or finalize() logs what was read.
Here's an InputStream that will capture what was read:
public class LoggingInputStream extends FilterInputStream {
private ByteArrayOutputStream out = new ByteArrayOutputStream();
private boolean logged = false;
protected LoggingInputStream(InputStream in) {
super(in);
}
#Override
protected void finalize() throws Throwable {
try {
this.log();
} finally {
super.finalize();
}
}
#Override
public void close() throws IOException {
try {
this.log();
} finally {
super.close();
}
}
#Override
public int read() throws IOException {
int r = super.read();
if (r >= 0) {
out.write(r);
}
return r;
}
#Override
public int read(byte[] b) throws IOException {
int read = super.read(b);
if (read > 0) {
out.write(b, 0, read);
}
return read;
}
#Override
public int read(byte[] b, int off, int len) throws IOException {
int read = super.read(b, off, len);
if (read > 0) {
out.write(b, off, read);
}
return read;
}
#Override
public long skip(long n) throws IOException {
long skipped = 0;
byte[] b = new byte[4096];
int read;
while ((read = this.read(b, 0, (int)Math.min(n, b.length))) >= 0) {
skipped += read;
n -= read;
}
return skipped;
}
private void log() {
if (!logged) {
logged = true;
try {
log.info("Json payload " + new String(out.toByteArray(), "UTF-8");
} catch (UnsupportedEncodingException e) { }
}
}
}
And now
#Override
public Object read(Type type, Class<?> contextClass, final HttpInputMessage inputMessage)
throws IOException, HttpMessageNotReadableException {
return super.read(type, contextClass, new HttpInputMessage() {
#Override
public InputStream getBody() {
return new LoggingInputStream(inputMessage.getBody());
}
#Override
public HttpHeaders getHeaders() {
return inputMessage.getHeaders();
}
});
}
Decorating HttpInputMessage as David Ehrmann suggested is one likely solution.
The whole trouble with this functionality is that it requires InputStream to be read more than once. However, this is not possible, once you read a portion or a stream, its "consumed" and there no way to go back and read it again.
A typical solution is to apply a filter that will create a wrapper for a request that will allow re-reads of the inputStream. One approach is by using the TeeInputStream which copies all the bytes read from InputStream to a secondary OutputStream.
There's a github project that uses just that kind of a filter and in fact just for the same purpose spring-mvc-logger The RequestWrapper class used
public class RequestWrapper extends HttpServletRequestWrapper {
private final ByteArrayOutputStream bos = new ByteArrayOutputStream();
private long id;
public RequestWrapper(Long requestId, HttpServletRequest request) {
super(request);
this.id = requestId;
}
#Override
public ServletInputStream getInputStream() throws IOException {
return new ServletInputStream() {
private TeeInputStream tee = new TeeInputStream(RequestWrapper.super.getInputStream(), bos);
#Override
public int read() throws IOException {
return tee.read();
}
};
}
public byte[] toByteArray(){
return bos.toByteArray();
}
public long getId() {
return id;
}
public void setId(long id) {
this.id = id;
}
}
A similar implementation wraps the response as well

PIG Custom loader's getNext() is being called again and again

I have started working with Apache Pig for one of our projects. I have to create a custom input format to load our data files. For this, I followed this example Hadoop:Custom Input format. I also created my custom RecordReader implementation to read the data (we get our data in binary format from some other application) and parse that to proper JSON format.
The problem occurs when I use my custom loader in Pig script. As soon as my loader's getNext() method is invoked, it calls my custom RecordReader's nextKeyValue() method, which works fine. It reads the data properly, passes it back to my loader which parses the data and returns a Tuple. So far so good.
The problem arises when my loader's getNext() method is called again and again. It gets called, works fine, and returns the proper output (I debugged it till return statement). But then, instead of letting the execution go further, my loader gets called again. I tried to see the number of times my loader is called, and I could see the number go till 20K!
Can somebody please help me understand the problem in my code?
Loader
public class SimpleTextLoaderCustomFormat extends LoadFunc {
protected RecordReader in = null;
private byte fieldDel = '\t';
private ArrayList<Object> mProtoTuple = null;
private TupleFactory mTupleFactory = TupleFactory.getInstance();
#Override
public Tuple getNext() throws IOException {
Tuple t = null;
try {
boolean notDone = in.nextKeyValue();
if (!notDone) {
return null;
}
String value = (String) in.getCurrentValue();
byte[] buf = value.getBytes();
int len = value.length();
int start = 0;
for (int i = 0; i < len; i++) {
if (buf[i] == fieldDel) {
readField(buf, start, i);
start = i + 1;
}
}
// pick up the last field
readField(buf, start, len);
t = mTupleFactory.newTupleNoCopy(mProtoTuple);
mProtoTuple = null;
} catch (InterruptedException e) {
int errCode = 6018;
String errMsg = "Error while reading input";
e.printStackTrace();
throw new ExecException(errMsg, errCode,
PigException.REMOTE_ENVIRONMENT, e);
}
return t;
}
private void readField(byte[] buf, int start, int end) {
if (mProtoTuple == null) {
mProtoTuple = new ArrayList<Object>();
}
if (start == end) {
// NULL value
mProtoTuple.add(null);
} else {
mProtoTuple.add(new DataByteArray(buf, start, end));
}
}
#Override
public InputFormat getInputFormat() throws IOException {
//return new TextInputFormat();
return new CustomStringInputFormat();
}
#Override
public void setLocation(String location, Job job) throws IOException {
FileInputFormat.setInputPaths(job, location);
}
#Override
public void prepareToRead(RecordReader reader, PigSplit split)
throws IOException {
in = reader;
}
Custom InputFormat
public class CustomStringInputFormat extends FileInputFormat<String, String> {
#Override
public RecordReader<String, String> createRecordReader(InputSplit arg0,
TaskAttemptContext arg1) throws IOException, InterruptedException {
return new CustomStringInputRecordReader();
}
}
Custom RecordReader
public class CustomStringInputRecordReader extends RecordReader<String, String> {
private String fileName = null;
private String data = null;
private Path file = null;
private Configuration jc = null;
private static int count = 0;
#Override
public void close() throws IOException {
// jc = null;
// file = null;
}
#Override
public String getCurrentKey() throws IOException, InterruptedException {
return fileName;
}
#Override
public String getCurrentValue() throws IOException, InterruptedException {
return data;
}
#Override
public float getProgress() throws IOException, InterruptedException {
return 0;
}
#Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context)
throws IOException, InterruptedException {
FileSplit split = (FileSplit) genericSplit;
file = split.getPath();
jc = context.getConfiguration();
}
#Override
public boolean nextKeyValue() throws IOException, InterruptedException {
InputStream is = FileSystem.get(jc).open(file);
StringWriter writer = new StringWriter();
IOUtils.copy(is, writer, "UTF-8");
data = writer.toString();
fileName = file.getName();
writer.close();
is.close();
System.out.println("Count : " + ++count);
return true;
}
}
Try this in Loader
//....
boolean notDone = ((CustomStringInputFormat)in).nextKeyValue();
//...
Text value = new Text(((CustomStringInputFormat))in.getCurrentValue().toString())

Categories

Resources