I am building a collection program in Java that collects data from websites using their apis. I am encountering this problem where it will hang on an http call. I tried to work around it by executing an http call over an executor service with a timeout. That doesn't seem to work as it would keep timing out and retrying. I figured it might be something to do with the API so after a retry I would reinitialize a whole new object per website API. Still no solution. I am trying to identify the root cause of this but can't seem to put my finger on it.
Here is a look at my flickr manager class that handles the calls to flickr.
import java.net.SocketException;
import java.net.UnknownHostException;
import java.util.Collection;
import java.util.Collections;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.scribe.exceptions.OAuthConnectionException;
import com.flickr4java.flickr.Flickr;
import com.flickr4java.flickr.FlickrException;
import com.flickr4java.flickr.FlickrRuntimeException;
import com.flickr4java.flickr.REST;
import com.flickr4java.flickr.RequestContext;
import com.flickr4java.flickr.auth.Auth;
import com.flickr4java.flickr.auth.Permission;
import com.flickr4java.flickr.people.User;
import com.flickr4java.flickr.photos.Exif;
import com.flickr4java.flickr.photos.Extras;
import com.flickr4java.flickr.photos.Photo;
import com.flickr4java.flickr.photos.PhotoList;
import com.flickr4java.flickr.photos.SearchParameters;
import com.flickr4java.flickr.photos.Size;
import com.google.common.util.concurrent.RateLimiter;
public class FlickrManager {
private final static Logger LOG = Logger.getLogger(FlickrManager.class.getName());
private final static ExecutorService executorService = Executors.newSingleThreadExecutor();
private Flickr flickr;
private final int MAX_PER_PAGE = 500;
private final RateLimiter rateLimiter;
private String ApiKey;
private String ApiSecret;
private String authToken;
private String authTokenSecret;
private Integer hostPort;
private String hostAddress;
private String httpScheme;
public FlickrManager(Flickr flickr, double apiCallsPerSecond) throws FlickrException {
this.flickr = flickr;
flickr.getTestInterface().echo(Collections.emptyMap());
//get flickr info to reinitialize flickr object if necessary
this.ApiKey = flickr.getApiKey();
this.ApiSecret = flickr.getSharedSecret();
this.hostPort = flickr.getTransport().getPort();
this.hostAddress = flickr.getTransport().getHost();
this.httpScheme = flickr.getTransport().getScheme();
if(flickr.getAuth() != null){
this.authToken = flickr.getAuth().getToken();
this.authTokenSecret = flickr.getAuth().getTokenSecret();
}
this.rateLimiter = RateLimiter.create(apiCallsPerSecond);
}
private void initialize(){
this.flickr = null;
REST rest = new REST(this.hostAddress,this.hostPort);
rest.setScheme(this.httpScheme);
this.flickr = new Flickr(this.ApiKey, this.ApiSecret,rest);
if(this.authToken != null && this.authTokenSecret != null){
RequestContext requestContext = RequestContext.getRequestContext();
Auth auth = new Auth();
auth.setPermission(Permission.READ);
auth.setToken(this.authToken);
auth.setTokenSecret(this.authTokenSecret);
requestContext.setAuth(auth);
flickr.setAuth(auth);
}
}
public User getUserInfo(String flickrProfileId) throws FlickrException{
return doFlickrAction(new CallableFlickrTask<User>(){
#Override
public User execute() throws FlickrException {
return flickr.getPeopleInterface().getInfo(flickrProfileId);
}
});
}
public PhotoList<Photo> search(SearchParameters params, int page) throws FlickrException{
return doFlickrAction(new CallableFlickrTask<PhotoList<Photo>>(){
#Override
public PhotoList<Photo> execute() throws FlickrException {
return flickr.getPhotosInterface().search(params, MAX_PER_PAGE, page);
}
});
}
public PhotoList<Photo> getUserPhotos(String userNSID, int page) throws FlickrException{
return doFlickrAction(new CallableFlickrTask<PhotoList<Photo>>(){
#Override
public PhotoList<Photo> execute() throws FlickrException {
return flickr.getPeopleInterface().getPhotos(
userNSID,
null, null, null, null, null,
Flickr.CONTENTTYPE_PHOTO, null,
Extras.ALL_EXTRAS, 100, page);
}
});
}
//Catch the execption inside the function for failure to get exif
public Collection<Exif> getPhotoExif(Photo photo) throws FlickrException, FlickrRuntimeException {
return doFlickrAction(new CallableFlickrTask<Collection<Exif>>(){
#Override
public Collection<Exif> execute() throws FlickrException {
return flickr.getPhotosInterface().getExif(photo.getId(),photo.getSecret());
}
});
}
public Collection<Size> getAvailablePhotoSizes(Photo photo) throws FlickrException{
return doFlickrAction(new CallableFlickrTask<Collection<Size>>(){
#Override
public Collection<Size> execute() throws FlickrException {
return flickr.getPhotosInterface().getSizes(photo.getId());
}
});
}
private abstract class CallableFlickrTask<T> {
public abstract T execute() throws FlickrException, FlickrRuntimeException;
}
private <T> T doFlickrAction(CallableFlickrTask<T> callable) throws FlickrException {
while(true){
rateLimiter.acquire();
Future<T> future = executorService.submit(new Callable<T>() {
#Override
public T call() throws Exception {
return callable.execute();
}});
try {
return future.get(5, TimeUnit.MINUTES);
} catch (InterruptedException e) {
LOG.log(Level.INFO,"Interrupted exception: {0}",e.getMessage());
initialize(); //initialize if it's been interupted
} catch (ExecutionException e) {
Throwable cause = e.getCause();
if( cause instanceof UnknownHostException ||
cause instanceof SocketException ||
cause instanceof OAuthConnectionException ){
//sleep and retry
LOG.log(Level.INFO,"Unknown Host or Socket exception. Retry: {0}",e.getMessage());
try {
Thread.sleep(10000);
initialize();
} catch (InterruptedException ex) {
LOG.log(Level.INFO, "Thread sleep was interrupted exception: {0}", ex.getMessage());
}
}
//if it's not of the above exceptions, then rethrow
else if (cause instanceof FlickrException) {
throw (FlickrException) cause;
}
else {
throw new IllegalStateException(e);
}
} catch (TimeoutException e) {
LOG.log(Level.INFO,"Timeout Exception: {0}",e.getMessage());
initialize(); //initialize again after timeout
}
}
}
}
I also used jvisualvm to get a look at what the collection is doing while it's hanging. The thread dump is here: Thread dump
Related
When I'm running a Java WebSocketStompClient, I got below error:
org.eclipse.jetty.websocket.api.MessageTooLargeException: Text message size [73728] exceeds maximum size [65536]
Sample code:
import org.apache.log4j.Logger;
import org.springframework.messaging.simp.stomp.StompFrameHandler;
import org.springframework.messaging.simp.stomp.StompHeaders;
import org.springframework.messaging.simp.stomp.StompSession;
import org.springframework.messaging.simp.stomp.StompSessionHandlerAdapter;
import org.springframework.util.concurrent.ListenableFuture;
import org.springframework.web.socket.WebSocketHttpHeaders;
import org.springframework.web.socket.client.WebSocketClient;
import org.springframework.web.socket.client.standard.StandardWebSocketClient;
import org.springframework.web.socket.messaging.WebSocketStompClient;
import org.springframework.web.socket.sockjs.client.SockJsClient;
import org.springframework.web.socket.sockjs.client.Transport;
import org.springframework.web.socket.sockjs.client.WebSocketTransport;
import org.springframework.web.socket.sockjs.frame.Jackson2SockJsMessageCodec;
import java.lang.reflect.Type;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.ExecutionException;
public class HelloClient {
private static Logger logger = Logger.getLogger(HelloClient.class);
StompSession session;
private final static WebSocketHttpHeaders headers = new WebSocketHttpHeaders();
public ListenableFuture<StompSession> connect() {
Transport webSocketTransport = new WebSocketTransport(new StandardWebSocketClient());
List<Transport> transports = Collections.singletonList(webSocketTransport);
SockJsClient sockJsClient = new SockJsClient(transports);
sockJsClient.setMessageCodec(new Jackson2SockJsMessageCodec());
WebSocketStompClient stompClient = new WebSocketStompClient(sockJsClient);
long[] hb = stompClient.getDefaultHeartbeat();
boolean en = stompClient.isDefaultHeartbeatEnabled();
long timeout = stompClient.getReceiptTimeLimit();
String url = "https://www.test.com";
return stompClient.connect(url, headers, new MyHandler());
}
public void subscribeMsg(StompSession stompSession) throws ExecutionException, InterruptedException {
stompSession.subscribe("/topic/test", new StompFrameHandler() {
public Type getPayloadType(StompHeaders stompHeaders) {
return byte[].class;
}
public void handleFrame(StompHeaders stompHeaders, Object o) {
logger.info("Received message " + new String((byte[]) o));
String response = new String((byte[]) o);
}
});
}
private class MyHandler extends StompSessionHandlerAdapter {
public void afterConnected(StompSession stompSession, StompHeaders stompHeaders) {
logger.info("Now connected");
session = stompSession;
}
}
public boolean isConnected() {
try {
Thread.sleep(500);
return session != null && session.isConnected();
} catch (Exception e) {
logger.warn("Error happens when checking connection status, ", e);
return false;
}
}
public static void main(String[] args) throws Exception {
HelloClient helloClient = new HelloClient();
ListenableFuture<StompSession> f = helloClient.connect();
StompSession stompSession = f.get();
helloClient.subscribeMsg(stompSession);
while (true) {
if (!helloClient.isConnected()) {
logger.info("wss diconnected ");
logger.info("need re-create ");
}
}
}
}
How to increase the limitation for a Java stomp websocket client? I found some not related answers How can I set max buffer size for web socket client(Jetty) in Java which are not suitable for stomp websocket client.
Also tried stompClient.setInboundMessageSizeLimit(Integer.MAX_VALUE); which doesn't work.
I am trying to figure out why my pipeline is being executed out of order.
I took the HexDumpProxy example and was trying to turn it into a http-proxy where I can look at all the traffic. For some reason the code is being executed backwards and I can't figure out why.
My server listens on 8443 and takes in the http content. I wanted to read the host header and create a frontend handler to route the data to the server, but my frontend handler executes first despite being last in the pipeline. I am unsure why it is running first I thought it would be execute in the following order.
LoggingHandler
HttpRequestDecoder
HttpObjectAggregator
HttpProxyListener
HttpReEncoder
HTTPProxyFrontEnd
The goal is to remove frontendhandler from the pipeline and have the HTTPProxy listener add it to the pipeline after reading the host header. but if I remove the frontend handler no data is transferred. Using breakpoints HTTPProxyFrontEnd is hit before HttpProxyListener. I am unsure why it is being executed so out of order.
Main
```
EventLoopGroup bossGroup = new NioEventLoopGroup(1);
EventLoopGroup workerGroup = new NioEventLoopGroup();
try {
ServerBootstrap b = new ServerBootstrap();
b.group(bossGroup, workerGroup)
.channel(NioServerSocketChannel.class)
.handler(new LoggingHandler(LogLevel.INFO))
.childHandler(new HttpProxyServerInitializer(REMOTE_HOST, REMOTE_PORT))
.childOption(ChannelOption.AUTO_READ, false)
.bind(LOCAL_PORT).sync().channel().closeFuture().sync();
} finally {
bossGroup.shutdownGracefully();
workerGroup.shutdownGracefully();
}
```
Pipeline
```
import io.netty.buffer.ByteBuf;
import io.netty.channel.Channel;
import io.netty.channel.ChannelHandlerContext;
import io.netty.channel.ChannelInitializer;
import io.netty.channel.ChannelPipeline;
import io.netty.handler.codec.MessageToByteEncoder;
import io.netty.handler.codec.http.*;
import io.netty.handler.logging.LogLevel;
import io.netty.handler.logging.LoggingHandler;
import io.netty.handler.ssl.SslContext;
import io.netty.handler.ssl.SslContextBuilder;
import io.netty.handler.ssl.SslHandler;
import io.netty.handler.ssl.util.SelfSignedCertificate;
import javax.net.ssl.SSLEngine;
public class HttpProxyServerInitializer extends ChannelInitializer {
private final String remoteHost;
private final int remotePort;
public HttpProxyServerInitializer(String remoteHost, int remotePort) {
this.remoteHost = remoteHost;
this.remotePort = remotePort;
}
#Override
protected void initChannel(Channel ch) throws Exception {
ch.pipeline().addLast(
new LoggingHandler(LogLevel.INFO),
new HttpRequestDecoder(),
new HttpObjectAggregator(8192),
new HttpProxyListener(),
new HttpReEncoder(),
new HTTPProxyFrontEnd(remoteHost, remotePort));
}
}
```
Proxy Front end
```
import io.netty.bootstrap.Bootstrap;
import io.netty.buffer.ByteBuf;
import io.netty.buffer.Unpooled;
import io.netty.channel.*;
import io.netty.channel.embedded.EmbeddedChannel;
import io.netty.handler.codec.DecoderResult;
import io.netty.handler.codec.http.*;
import io.netty.handler.codec.http.cookie.ServerCookieDecoder;
import io.netty.handler.codec.http.cookie.ServerCookieEncoder;
import io.netty.util.CharsetUtil;
import java.net.SocketAddress;
import java.util.List;
import java.util.Map;
import java.util.Set;
import static io.netty.handler.codec.http.HttpResponseStatus.BAD_REQUEST;
import static io.netty.handler.codec.http.HttpResponseStatus.OK;
import static io.netty.handler.codec.http.HttpVersion.HTTP_1_1;
public class HTTPProxyFrontEnd extends ChannelInboundHandlerAdapter {
private final String remoteHost;
private final int remotePort;
private final StringBuilder buf = new StringBuilder();
private HttpRequest request;
// As we use inboundChannel.eventLoop() when building the Bootstrap this does not need to be volatile as
// the outboundChannel will use the same EventLoop (and therefore Thread) as the inboundChannel.
private Channel outboundChannel;
public HTTPProxyFrontEnd(String remoteHost, int remotePort) {
this.remoteHost = remoteHost;
this.remotePort = remotePort;
}
#Override
public void channelActive(ChannelHandlerContext ctx) {
System.out.println("HTTPFrontEnd");
final Channel inboundChannel = ctx.channel();
// Start the connection attempt.
Bootstrap b = new Bootstrap();
b.group(inboundChannel.eventLoop())
.channel(ctx.channel().getClass())
.handler(new HexDumpProxyBackendHandler(inboundChannel))
.option(ChannelOption.AUTO_READ, false);
ChannelFuture f = b.connect(remoteHost, remotePort);
SocketAddress test = ctx.channel().remoteAddress();
outboundChannel = f.channel();
f.addListener(new ChannelFutureListener() {
#Override
public void operationComplete(ChannelFuture future) {
if (future.isSuccess()) {
// connection complete start to read first data
inboundChannel.read();
} else {
// Close the connection if the connection attempt has failed.
inboundChannel.close();
}
}
});
}
#Override
public void channelRead(final ChannelHandlerContext ctx, Object msg) throws InterruptedException {
if (outboundChannel.isActive()) {
outboundChannel.writeAndFlush(msg).addListener(new ChannelFutureListener() {
#Override
public void operationComplete(ChannelFuture future) {
if (future.isSuccess()) {
// was able to flush out data, start to read the next chunk
ctx.channel().read();
} else {
future.channel().close();
}
}
});
}
}
private boolean writeResponse(HttpObject currentObj, ChannelHandlerContext ctx) {
// Decide whether to close the connection or not.
boolean keepAlive = HttpUtil.isKeepAlive(request);
// Build the response object.
FullHttpResponse response = new DefaultFullHttpResponse(
HTTP_1_1, currentObj.decoderResult().isSuccess()? OK : BAD_REQUEST,
Unpooled.copiedBuffer(buf.toString(), CharsetUtil.UTF_8));
response.headers().set(HttpHeaderNames.CONTENT_TYPE, "text/plain; charset=UTF-8");
if (keepAlive) {
// Add 'Content-Length' header only for a keep-alive connection.
response.headers().setInt(HttpHeaderNames.CONTENT_LENGTH, response.content().readableBytes());
// Add keep alive header as per:
// - http://www.w3.org/Protocols/HTTP/1.1/draft-ietf-http-v11-spec-01.html#Connection
response.headers().set(HttpHeaderNames.CONNECTION, HttpHeaderValues.KEEP_ALIVE);
}
// Encode the cookie.
String cookieString = request.headers().get(HttpHeaderNames.COOKIE);
if (cookieString != null) {
Set<io.netty.handler.codec.http.cookie.Cookie> cookies = ServerCookieDecoder.STRICT.decode(cookieString);
if (!cookies.isEmpty()) {
// Reset the cookies if necessary.
for (io.netty.handler.codec.http.cookie.Cookie cookie: cookies) {
response.headers().add(HttpHeaderNames.SET_COOKIE, io.netty.handler.codec.http.cookie.ServerCookieEncoder.STRICT.encode(cookie));
}
}
} else {
// Browser sent no cookie. Add some.
response.headers().add(HttpHeaderNames.SET_COOKIE, io.netty.handler.codec.http.cookie.ServerCookieEncoder.STRICT.encode("key1", "value1"));
response.headers().add(HttpHeaderNames.SET_COOKIE, ServerCookieEncoder.STRICT.encode("key2", "value2"));
}
// Write the response.
//ctx.writeAndFlush(response);
return keepAlive;
}
private static void appendDecoderResult(StringBuilder buf, HttpObject o) {
DecoderResult result = o.decoderResult();
if (result.isSuccess()) {
return;
}
buf.append(".. WITH DECODER FAILURE: ");
buf.append(result.cause());
buf.append("\r\n");
}
#Override
public void channelInactive(ChannelHandlerContext ctx) {
if (outboundChannel != null) {
closeOnFlush(outboundChannel);
}
}
#Override
public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) {
cause.printStackTrace();
closeOnFlush(ctx.channel());
}
/**
* Closes the specified channel after all queued write requests are flushed.
*/
static void closeOnFlush(Channel ch) {
if (ch.isActive()) {
ch.writeAndFlush(Unpooled.EMPTY_BUFFER).addListener(ChannelFutureListener.CLOSE);
}
}
}
```
I am trying to receive an http request using non-blocking io, then make another http request to another server using non-blocking io, and return some response, here is the code for my servlet:
package learn;
import java.io.IOException;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.TimeUnit;
import javax.servlet.AsyncContext;
import javax.servlet.ReadListener;
import javax.servlet.ServletException;
import javax.servlet.ServletInputStream;
import javax.servlet.ServletOutputStream;
import javax.servlet.WriteListener;
import javax.servlet.annotation.WebServlet;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import javax.ws.rs.client.Client;
import javax.ws.rs.client.ClientBuilder;
import javax.ws.rs.client.InvocationCallback;
import javax.ws.rs.core.Response;
import org.jboss.resteasy.client.jaxrs.ResteasyClientBuilder;
#WebServlet(urlPatterns = {"/async"}, asyncSupported = true)
public class AsyncProcessing extends HttpServlet {
private static final long serialVersionUID = -535924906221872329L;
public CompletableFuture<String> readRequestAsync(final HttpServletRequest req) {
final CompletableFuture<String> request = new CompletableFuture<>();
final StringBuilder httpRequestData = new StringBuilder();
try (ServletInputStream inputStream = req.getInputStream()){
inputStream.setReadListener(new ReadListener() {
final int BUFFER_SIZE = 4*1024;
final byte buffer[] = new byte[BUFFER_SIZE];
#Override
public void onError(Throwable t) {
request.completeExceptionally(t);
}
#Override
public void onDataAvailable() {
if(inputStream.isFinished()) return;
System.out.println("----------------------------------------");
System.out.println("onDataAvailable: " + Thread.currentThread().getName());
try {
while(inputStream.isReady()) {
int length = inputStream.read(buffer);
httpRequestData.append(new String(buffer, 0, length));
}
} catch (IOException ex) {
request.completeExceptionally(ex);
}
}
#Override
public void onAllDataRead() throws IOException {
try {
request.complete(httpRequestData.toString());
}
catch(Exception e) {
request.completeExceptionally(e);
}
}
});
} catch (IOException e) {
request.completeExceptionally(e);
}
return request;
}
private Client createAsyncHttpClient() {
ResteasyClientBuilder restEasyClientBuilder = (ResteasyClientBuilder)ClientBuilder.newBuilder();
return restEasyClientBuilder.useAsyncHttpEngine().connectTimeout(640, TimeUnit.SECONDS).build();
}
public CompletableFuture<Response> process(String httpRequest){
System.out.println("----------------------------------------");
System.out.println("process: " + Thread.currentThread());
CompletableFuture<Response> futureResponse = new CompletableFuture<>();
Client client = createAsyncHttpClient();
client.target("http://localhost:3000").request().async().get(new InvocationCallback<Response>() {
#Override
public void completed(Response response) {
System.out.println("----------------------------------------");
System.out.println("completed: " + Thread.currentThread());
futureResponse.complete(response);
}
#Override
public void failed(Throwable throwable) {
System.out.println(throwable);
futureResponse.completeExceptionally(throwable);
}
});
return futureResponse;
}
public CompletableFuture<Integer> outputResponseAsync(Response httpResponseData, HttpServletResponse resp){
System.out.println("----------------------------------------");
System.out.println("outputResponseAsync: " + Thread.currentThread().getName());
CompletableFuture<Integer> total = new CompletableFuture<>();
try (ServletOutputStream outputStream = resp.getOutputStream()){
outputStream.setWriteListener(new WriteListener() {
#Override
public void onWritePossible() throws IOException {
System.out.println("----------------------------------------");
System.out.println("onWritePossible: " + Thread.currentThread().getName());
outputStream.print(httpResponseData.getStatus());
total.complete(httpResponseData.getLength());
}
#Override
public void onError(Throwable t) {
System.out.println(t);
total.completeExceptionally(t);
}
});
} catch (IOException e) {
System.out.println(e);
total.completeExceptionally(e);
}
return total;
}
#Override
protected void doGet(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException {
System.out.println("----------------------------------------");
System.out.println("doGet: " + Thread.currentThread().getName());
final AsyncContext asyncContext = req.startAsync();
readRequestAsync(req)
.thenCompose(this::process)
.thenCompose(httpResponseData -> outputResponseAsync(httpResponseData, resp))
.thenAccept(a -> asyncContext.complete());
}
}
The server at http://localhost:3000 is an http server written in node which just returns a response after 27 seconds, i would like to make a request to the node server and while this request is being processed i would want to make another http request to the servlet to see if the same thread is being used. Currently i'm trying to use payara 5.194 to do this but even if i set the two thread pools to have one thread the app server seems to create another threads. So, i would like to know from your knowledge if this servlet is really doing non-blocking io and not blocking at any time, also it would be amazing if i could do some experiment to ensure this. I think it's important to point out that the class ServletInputStream is a subclass of InputStream, so i really don't know if this is non-blocking io. Thank you.
I have a recursive watch service that I'm using to monitor directories while the application is running. For an unknown reason, the watchservice appears stop working after about a day. At that point I can add a new file to a monitored directory and get no log statements and my observers are not notified.
I thought Spring might be destroying the bean, so I added a log statement to the #pre-destroy section of the class, but that log statement doesn't show up after the watchservice stops working, so it seems that bean still exists, it's just not functioning as expected. The class is as follows
import com.sun.nio.file.SensitivityWatchEventModifier;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Service;
import java.io.File;
import java.io.IOException;
import java.nio.file.*;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.function.Consumer;
import javax.annotation.PostConstruct;
import javax.annotation.PreDestroy;
import static java.nio.file.StandardWatchEventKinds.ENTRY_CREATE;
import static java.nio.file.StandardWatchEventKinds.ENTRY_DELETE;
#Service
public class DirectoryMonitor {
private static final Logger logger = LoggerFactory.getLogger(DirectoryMonitor.class);
private WatchService watcher;
private ExecutorService executor;
private List<DirectoryMonitorObserver> observerList = new ArrayList<>();
private final Map<WatchKey, Path> keys = new HashMap<>();
public void addObserver(DirectoryMonitorObserver observer){
observerList.add(observer);
}
private void notifyObservers(){
observerList.forEach(DirectoryMonitorObserver::directoryModified);
}
#PostConstruct
public void init() throws IOException {
watcher = FileSystems.getDefault().newWatchService();
executor = Executors.newSingleThreadExecutor();
}
#PreDestroy
public void cleanup() {
try {
logger.info("Stopping directory monitor");
watcher.close();
} catch (IOException e) {
logger.error("Error closing watcher service", e);
}
executor.shutdown();
}
#SuppressWarnings("unchecked")
public void startRecursiveWatcher(String pathToMonitor) {
logger.info("Starting Recursive Watcher");
Consumer<Path> register = p -> {
if (!p.toFile().exists() || !p.toFile().isDirectory())
throw new RuntimeException("folder " + p + " does not exist or is not a directory");
try {
Files.walkFileTree(p, new SimpleFileVisitor<Path>() {
#Override
public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) throws IOException {
logger.info("registering " + dir + " in watcher service");
WatchKey watchKey = dir.register(watcher, new WatchEvent.Kind[]{ENTRY_CREATE, ENTRY_DELETE}, SensitivityWatchEventModifier.HIGH);
keys.put(watchKey, dir);
return FileVisitResult.CONTINUE;
}
});
} catch (IOException e) {
throw new RuntimeException("Error registering path " + p);
}
};
register.accept(Paths.get(pathToMonitor));
executor.submit(() -> {
while (true) {
final WatchKey key;
try {
key = watcher.take();
} catch (InterruptedException ex) {
logger.error(ex.toString());
continue;
}
final Path dir = keys.get(key);
key.pollEvents().stream()
.map(e -> ((WatchEvent<Path>) e).context())
.forEach(p -> {
final Path absPath = dir.resolve(p);
if (absPath.toFile().isDirectory()) {
register.accept(absPath);
} else {
final File f = absPath.toFile();
logger.info("Detected new file " + f.getAbsolutePath());
}
});
notifyObservers();
key.reset();
}
});
}
}
This is where I'm creating the monitor bean..
#Component
public class MovieInfoFacade {
#Value("${media.path}")
private String mediaPath;
private MovieInfoControl movieInfoControl;
private DirectoryMonitor directoryMonitor;
private FileListProvider fileListProvider;
#Autowired
public MovieInfoFacade(MovieInfoControl movieInfoControl, DirectoryMonitor directoryMonitor, FileListProvider fileListProvider){
this.movieInfoControl = movieInfoControl;
this.directoryMonitor = directoryMonitor;
this.fileListProvider = fileListProvider;
}
#PostConstruct
public void startDirectoryMonitor(){
if(!mediaPath.equalsIgnoreCase("none")) {
directoryMonitor.addObserver(fileListProvider);
directoryMonitor.startRecursiveWatcher(mediaPath);
}
}
public int loadMovieListLength(String directoryPath){
return fileListProvider.listFiles(directoryPath).length;
}
public List<MovieInfo> loadMovieList(MovieSearchCriteria searchCriteria) {
List<File> files = Arrays.asList(fileListProvider.listFiles(searchCriteria.getPath()));
return files.parallelStream()
.sorted()
.skip(searchCriteria.getPage() * searchCriteria.getItemsPerPage())
.limit(searchCriteria.getItemsPerPage())
.map(file -> movieInfoControl.loadMovieInfoFromCache(file.getAbsolutePath()))
.collect(Collectors.toList());
}
public MovieInfo loadSingleMovie(String filePath) {
return movieInfoControl.loadMovieInfoFromCache(filePath);
}
}
It appears that the error was in my exception handling. After removing the throw statements (and replacing them with logs) I have not had any issues.
How can re-implement this using concurrent executor, or just a much better way. meaning threadpool executor .
Basically i want the crawler to crawl the given url and maybe later follow the urls found to another website and so one.
package Mainpackge;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class main {
public static void main(String[] args) {
//List of urs to collect data from
String[] urls = new String[]{
"http://www.answers.com/",
"http://www.britannica.com/",
"https://ie.yahoo.com/?p=us",
"https://en.wikipedia.org/wiki/Main_Page",
"http://ww w.worldbook.com/",
"http://www.computerlanguage.com/",
"http://www.howstuffworks.com/",
"http://www.dmoz.org/Computers/Computer_Science/"
};
// Create and start workers
List<Worker> workers = new ArrayList<>(urls.length);
for (String url : urls) {
Worker w = new Worker(url);
workers.add(w);
new Thread(w).start();
}
// Retrieve results
for (Worker w : workers) {
Elements results = w.waitForResults();
if (results != null)
for (Element result : results) { result.absUrl("a") ;
System.out.println(w.getName()+": "+result.absUrl("href"));
}
else
System.err.println(w.getName()+" had some error!");
}
}
}
class Worker implements Runnable {
private String url;
private Elements results;
private String name;
private static int number = 0;
private final Object lock = new Object();
public Worker(String url) {
this.url = url;
this.name = "Worker-" + (number++);
}
public String getName() {
return name;
}
#Override
public void run() {
try {
Document doc = Jsoup.connect(this.url).get();
Elements links = doc.select("a");
// Update results
synchronized (lock) {
this.results = links;
lock.notifyAll();
}
} catch (IOException e) {
// You should implement a better error handling code..
System.err.println("Error while parsing: "+this.url);
e.printStackTrace();
}
}
public Elements waitForResults() {
synchronized (lock) {
try {
while (this.results == null) {
lock.wait();
}
return this.results;
} catch (InterruptedException e) {
// Again better error handling
e.printStackTrace();
}
return null;
}
}
}
Full example using an ExecutorService and Callable implementation for your threads.
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
public class ThreadPoolExample {
public static void main(String[] args) throws InterruptedException, ExecutionException {
List<String> urls = Arrays.asList(new String[]{
"http://www.answers.com/",
"http://www.britannica.com/",
"https://ie.yahoo.com/?p=us",
"https://en.wikipedia.org/wiki/Main_Page",
"http://ww w.worldbook.com/",
"http://www.computerlanguage.com/",
"http://www.howstuffworks.com/",
"http://www.dmoz.org/Computers/Computer_Science/"
});
ExecutorService ex = Executors.newFixedThreadPool(10);
ex.awaitTermination(2, TimeUnit.SECONDS);
List<Future<Element>> results = new ArrayList<>();
for (String string : urls) {
results.add(ex.submit(new Crawler(string)));
}
for (Future<Element> future : results) {
// Get will wait for the thread to be done
for (String url : future.get().urls) {
// ADD A NEW THREAD FOR EACH URLS YOU FOUND !
ex.submit(new Crawler(url));
}
}
ex.shutdown();
}
public static class Crawler implements Callable<Element>{
String url;
public Crawler(String url) {
this.url = url;
}
#Override
public Element call() throws Exception {
// Implement your crawling logic and return your elements
return new Element(Arrays.asList(new String[]{"all new urls", "that you found while crwaling"}));
}
}
public static class Element{
List<String> urls;
public Element(List<String> urls) {
this.urls = urls;
}
#Override
public String toString() {
return "Elements found : " + urls.size();
}
}
}