Java WatchService, perform action on event using threads - java

I can monitor a directory by registering cwith a WatchKey (there are tons of examples on the web) however this watcher catches every single event. E.g. On windows If am monitoring the d:/temp dir and I create a new .txt file and rename it I get the following events.
ENTRY_CREATE: d:\temp\test\New Text Document.txt
ENTRY_MODIFY: d:\temp\test
ENTRY_DELETE: d:\temp\test\New Text Document.txt
ENTRY_CREATE: d:\temp\test\test.txt
ENTRY_MODIFY: d:\temp\test
I want to perform an action when the new file is created or updated. However I don't want the action to run 5 times in the above example.
My 1st Idea: As I only need to run the action (in this case a push to a private Git server) once every now an then (e.g. check every 10 seconds only if there are changes to the monitored directory and only then perform the push) I thought of having an object with a boolean parameter that I can get and set from within separate threads.
Now this works kinda ok (unless the gurus can help educated me as to why this is a terrible idea) The problem is that if a file event is caught during the SendToGit thread's operation and this operation completes it sets the "Found" parameter to false. Immediately thereafter one of the other events are caught (as in the example above) they will set the "Found" parameter to true again. This is not ideal as I will then run the SendToGit operation immediately again which will be unnecessary.
My 2nd Idea Investigate pausing the check for changes in the MonitorFolder thread until the SendToGit operation is complete (I.e. Keep checking if the ChangesFound Found parameter has been set back to false. When this parameter is false start checking for changes again.
Questions
Is this an acceptable way to go or have I gone down a rabbit hole with no hope of return?
If I go down the road of my 2nd idea what happens if I am busy with the SendToGit operation and a change is made in the monitoring folder? I suspect that this will not be identified and I may miss changes.
The Rest of the code
ChangesFound.java
package com.acme;
public class ChangesFound {
private boolean found = false;
public boolean wereFound() {
return this.found;
}
public void setFound(boolean commitToGit) {
this.found = commitToGit;
}
}
In my main app I start 2 threads.
MonitorFolder.java Monitors the directory and when Watcher events are found set the ChangesFound variable "found" to true.
SendToGit.java Every 10 seconds checks if the ChangesFound variable found is true and if it is performs the push. (or in this case just prints a message)
Here is my App that starts the threads:
package com.acme;
import java.io.IOException;
import java.nio.file.Path;
import java.nio.file.Paths;
public class App {
private static ChangesFound chg;
public static void main(String[] args) throws IOException {
String dirToMonitor = "D:/Temp";
boolean recursive = true;
chg = new ChangesFound();
Runnable r = new SendToGit(chg);
new Thread(r).start();
Path dir = Paths.get(dirToMonitor);
Runnable m = new MonitorFolder(chg, dir, recursive);
new Thread(m).start();
}
}
SendToGit.java
package com.acme;
public class SendToGit implements Runnable {
private ChangesFound changes;
public SendToGit(ChangesFound chg) {
changes = chg;
}
public void run() {
while (true) {
try {
Thread.sleep(10000);
} catch (InterruptedException ex) {
Thread.currentThread().interrupt();
}
System.out.println(java.time.LocalDateTime.now() + " [SendToGit] waking up.");
if (changes.wereFound()) {
System.out.println("\t***** CHANGES FOUND push to Git.");
changes.setFound(false);
} else {
System.out.println("\t***** Nothing changed.");
}
}
}
}
MonitorFolder.java (Apologies for the long class I only added this here in case it helps someone else.)
package com.acme;
import java.io.IOException;
import java.nio.file.FileSystems;
import java.nio.file.FileVisitResult;
import java.nio.file.Files;
import static java.nio.file.LinkOption.NOFOLLOW_LINKS;
import java.nio.file.Path;
import java.nio.file.SimpleFileVisitor;
import static java.nio.file.StandardWatchEventKinds.ENTRY_CREATE;
import static java.nio.file.StandardWatchEventKinds.ENTRY_DELETE;
import static java.nio.file.StandardWatchEventKinds.ENTRY_MODIFY;
import static java.nio.file.StandardWatchEventKinds.OVERFLOW;
import java.nio.file.WatchEvent;
import java.nio.file.WatchKey;
import java.nio.file.WatchService;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.HashMap;
import java.util.Map;
public class MonitorFolder implements Runnable {
private static WatchService watcher;
private static Map<WatchKey, Path> keys;
private static boolean recursive;
private static boolean trace = false;
private static boolean commitGit = false;
private static ChangesFound changes;
#SuppressWarnings("unchecked")
static <T> WatchEvent<T> cast(WatchEvent<?> event) {
return (WatchEvent<T>) event;
}
/**
* Creates a WatchService and registers the given directory
*/
MonitorFolder(ChangesFound chg, Path dir, boolean rec) throws IOException {
changes = chg;
watcher = FileSystems.getDefault().newWatchService();
keys = new HashMap<WatchKey, Path>();
recursive = rec;
if (recursive) {
System.out.format("[MonitorFolder] Scanning %s ...\n", dir);
registerAll(dir);
System.out.println("Done.");
} else {
register(dir);
}
// enable trace after initial registration
this.trace = true;
}
/**
* Register the given directory with the WatchService
*/
private static void register(Path dir) throws IOException {
WatchKey key = dir.register(watcher, ENTRY_CREATE, ENTRY_DELETE, ENTRY_MODIFY);
if (trace) {
Path prev = keys.get(key);
if (prev == null) {
System.out.format("register: %s\n", dir);
} else {
if (!dir.equals(prev)) {
System.out.format("update: %s -> %s\n", prev, dir);
}
}
}
keys.put(key, dir);
}
/**
* Register the given directory, and all its sub-directories, with the
* WatchService.
*/
private static void registerAll(final Path start) throws IOException {
// register directory and sub-directories
Files.walkFileTree(start, new SimpleFileVisitor<Path>() {
#Override
public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs)
throws IOException {
register(dir);
return FileVisitResult.CONTINUE;
}
});
}
/**
* Process all events for keys queued to the watcher
*/
public void run() {
for (;;) {
// wait for key to be signalled
WatchKey key;
try {
key = watcher.take();
} catch (InterruptedException x) {
return;
}
Path dir = keys.get(key);
if (dir == null) {
System.err.println("WatchKey not recognized!!");
continue;
}
for (WatchEvent<?> event : key.pollEvents()) {
WatchEvent.Kind kind = event.kind();
// TBD - provide example of how OVERFLOW event is handled
if (kind == OVERFLOW) {
System.out.println("Something about Overflow");
continue;
}
// Context for directory entry event is the file name of entry
WatchEvent<Path> ev = cast(event);
Path name = ev.context();
Path child = dir.resolve(name);
// print out event and set ChangesFound object Found parameter to True
System.out.format("[MonitorFolder] " + java.time.LocalDateTime.now() + " - %s: %s\n", event.kind().name(), child);
changes.setFound(true);
// if directory is created, and watching recursively, then
// register it and its sub-directories
if (recursive && (kind == ENTRY_CREATE)) {
try {
if (Files.isDirectory(child, NOFOLLOW_LINKS)) {
registerAll(child);
}
} catch (IOException x) {
// ignore to keep sample readbale
}
}
}
// reset key and remove from set if directory no longer accessible
boolean valid = key.reset();
if (!valid) {
keys.remove(key);
// all directories are inaccessible
if (keys.isEmpty()) {
System.out.println("keys.isEmpty");
break;
}
}
}
}
}

Both of your strategies will lead to issues because the Watch Service is very verbose and sends many messages when maybe one or two is actually needed to your downstream handling - so sometimes you may do unnecessary work or miss events.
When using WatchService you could collate multiple notifications together and pass on as ONE event listing a sets of recent deletes, creates and updates:
DELETE followed by CREATE => sent as UPDATE
CREATE followed by MODIFY => sent as CREATE
CREATE or MODIFY followed by DELETE => sent as DELETE
Instead of calling WatchService.take() and acting on each message, use WatchService.poll(timeout) and only when nothing is returned act on the union of preceeding set of events as one - not individually after each successful poll.
It is easier to decouple the problems as two components so that you don't repeat the WatchService code the next time you need it:
A watch manager which handles watch service + dir registrations and collates the duplicates to send to event listeners as ONE group
A Listener class to receive the group of changes and act on the set.
This example may help illustrate - see WatchExample which is the manager which sets up the registrations BUT passes on much fewer events to the callback defined by setListener. You could set up MonitorFolder like WatchExample to reduce the events discovered, and make your code in SendToGit as a Listener which is called on demand with the aggregated set of fileChange(deletes, creates, updates).
public static void main(String[] args) throws IOException, InterruptedException {
final List<Path> dirs = Arrays.stream(args).map(Path::of).map(Path::toAbsolutePath).collect(Collectors.toList());
Kind<?> [] kinds = { StandardWatchEventKinds.ENTRY_CREATE, StandardWatchEventKinds.ENTRY_MODIFY, StandardWatchEventKinds.ENTRY_DELETE};
// Should launch WatchExample PER Filesystem:
WatchExample w = new WatchExample();
w.setListener(WatchExample::fireEvents);
for(Path dir : dirs)
w.register(kinds, dir);
// For 2 or more WatchExample use: new Thread(w[n]::run).start();
w.run();
}
public class WatchExample implements Runnable {
private final Set<Path> created = new LinkedHashSet<>();
private final Set<Path> updated = new LinkedHashSet<>();
private final Set<Path> deleted = new LinkedHashSet<>();
private volatile boolean appIsRunning = true;
// Decide how sensitive the polling is:
private final int pollmillis = 100;
private WatchService ws;
private Listener listener = WatchExample::fireEvents;
#FunctionalInterface
interface Listener
{
public void fileChange(Set<Path> deleted, Set<Path> created, Set<Path> modified);
}
WatchExample() {
}
public void setListener(Listener listener) {
this.listener = listener;
}
public void shutdown() {
System.out.println("shutdown()");
this.appIsRunning = false;
}
public void run() {
System.out.println();
System.out.println("run() START watch");
System.out.println();
try(WatchService autoclose = ws) {
while(appIsRunning) {
boolean hasPending = created.size() + updated.size() + deleted.size() > 0;
System.out.println((hasPending ? "ws.poll("+pollmillis+")" : "ws.take()")+" as hasPending="+hasPending);
// Use poll if last cycle has some events, as take() may block
WatchKey wk = hasPending ? ws.poll(pollmillis,TimeUnit.MILLISECONDS) : ws.take();
if (wk != null) {
for (WatchEvent<?> event : wk.pollEvents()) {
Path parent = (Path) wk.watchable();
Path eventPath = (Path) event.context();
storeEvent(event.kind(), parent.resolve(eventPath));
}
boolean valid = wk.reset();
if (!valid) {
System.out.println("Check the path, dir may be deleted "+wk);
}
}
System.out.println("PENDING: cre="+created.size()+" mod="+updated.size()+" del="+deleted.size());
// This only sends new notifications when there was NO event this cycle:
if (wk == null && hasPending) {
listener.fileChange(deleted, created, updated);
deleted.clear();
created.clear();
updated.clear();
}
}
}
catch (InterruptedException e) {
System.out.println("Watch was interrupted, sending final updates");
fireEvents(deleted, created, updated);
}
catch (IOException e) {
throw new UncheckedIOException(e);
}
System.out.println("run() END watch");
}
public void register(Kind<?> [] kinds, Path dir) throws IOException {
System.out.println("register watch for "+dir);
// If dirs are from different filesystems WatchService will give errors later
if (this.ws == null) {
ws = dir.getFileSystem().newWatchService();
}
dir.register(ws, kinds);
}
/**
* Save event for later processing by event kind EXCEPT for:
* <li>DELETE followed by CREATE => store as MODIFY
* <li>CREATE followed by MODIFY => store as CREATE
* <li>CREATE or MODIFY followed by DELETE => store as DELETE
*/
private void
storeEvent(Kind<?> kind, Path path) {
System.out.println("STORE "+kind+" path:"+path);
boolean cre = false;
boolean mod = false;
boolean del = kind == StandardWatchEventKinds.ENTRY_DELETE;
if (kind == StandardWatchEventKinds.ENTRY_CREATE) {
mod = deleted.contains(path);
cre = !mod;
}
else if (kind == StandardWatchEventKinds.ENTRY_MODIFY) {
cre = created.contains(path);
mod = !cre;
}
addOrRemove(created, cre, path);
addOrRemove(updated, mod, path);
addOrRemove(deleted, del, path);
}
// Add or remove from the set:
private static void addOrRemove(Set<Path> set, boolean add, Path path) {
if (add) set.add(path);
else set.remove(path);
}
public static void fireEvents(Set<Path> deleted, Set<Path> created, Set<Path> modified) {
System.out.println();
System.out.println("fireEvents START");
for (Path path : deleted)
System.out.println(" DELETED: "+path);
for (Path path : created)
System.out.println(" CREATED: "+path);
for (Path path : modified)
System.out.println(" UPDATED: "+path);
System.out.println("fireEvents END");
System.out.println();
}
}

Related

file.listFiles() does not list recently created file

I have a component that should do something every time a file in the watched directory is created,updated or deleted.
For the sake of simplicity, lets say it prints the file names to Sysout.
#Component
public class MyComponent {
#Autowired
MyComponentWatcher myComponentWatcher
#PostConstruct
public void init(){
myComponentWatcher.registerWatchService();
}
}
#Component
public class MyComponentWatcher {
#Value("${somePath}")
private String myPath;
private File myDir;
#PostConstruct
public void init() {
this.myDir = new File(myPath);
}
#Async
public void registerWatchService() throws IOException, InterruptedException {
WatchService watchService
= FileSystems.getDefault().newWatchService();
Path path = myDir.toPath();
path.register(
watchService,
StandardWatchEventKinds.ENTRY_CREATE,
StandardWatchEventKinds.ENTRY_DELETE,
StandardWatchEventKinds.ENTRY_MODIFY);
WatchKey key;
while ((key = watchService.take()) != null) {
for (WatchEvent<?> event : key.pollEvents()) {
System.out.println(
"Event kind:" + event.kind()
+ ". File affected: " + event.context() + ".");
}
key.reset();
File[] files = myDir.listFiles();
System.out.println(files); //!!!
}
}
}
GIVEN my dir contains 1 file
WHEN I create another file
THEN System.out.println(files); returns the old and the new file
-> I only get the old file!
I wrote a test case that uses Files.write(filePath, fileContent.getBytes(), CREATE_NEW); to create the test file and
#AfterEach
void tearDown() {
File toDelete = new File(filePath.toString());
toDelete.delete();
}
to delete the test file after the test.
In total the watcher triggers 3 times with the following lists of events:
[ENTRY_CREATE, ENTRY_MODIFY]
[ENTRY_MODIFY]
[ENTRY_MODIFY]
[ENTRY_DELETE]
Your code is working fine as expected and written.
to achieve your output, you need to recreate the object for File class.
// we are creating the object just after the constructor
#PostConstruct
public void init() {
this.myDir = new File(myPath);
}
but then you modified your path but not the actual instance variable:
please change your code File[] files = new File(myPath).listFiles();

How to get Workflow event type and Processstep name or Arguments in to java service in AEM?

I created a workflow for whenever I upload or remove asset from /content/dam/MyAsset folder I am able to Triggering JavaServices. I create two launcher with Created Event Type and Removed Event Type.
I need to get That event Type and Process step name or arguments in to Services
inside Triggering execute function
Here is My Code:
public void execute(WorkItem arg0, WorkflowSession arg1, MetaDataMap arg2)
throws WorkflowException
{
log.info("Workflow created ::::: ");
}
Is there any way to get Launcher event type and process arguments]to Services ?
You can not get launcher event type as that information is not passed down to the workflow. What you could do is check the payload to identify if the node exists or not. The workflow called on removal will have path to the node that is removed and if you try to resolve that path it will give you exception (yeah, bad approach but thats the only possibility).
For the process step arguments passed, all that information is available in meta data. Look into arg0.getWorkflowData().getMetaDataMap().get("MyIDkey",<Type>); where arg0 is your WorkItem instance.
Why don't you use a org.osgi.service.event.EventHandler instead? You will have all the information you need. Here is a small code snippet that waits for an event and adds the relevant data into a map that is then passed to the JobManager to generate a new Job that is executed by a matching JobConsumer:
import org.apache.sling.event.jobs.Job;
import org.apache.sling.event.jobs.JobManager;
import org.apache.sling.event.jobs.consumer.JobConsumer;
// and more imports...
#Service
#Component(immediate = true, policy = ConfigurationPolicy.OPTIONAL, description = "Listen to page modification events.")
#Properties(value = { #Property(name = "event.topics", value = { PageEvent.EVENT_TOPIC, DamEvent.EVENT_TOPIC}, propertyPrivate = true),
#Property(name = JobConsumer.PROPERTY_TOPICS, value = ModificationEventHandler.JOB_TOPICS, propertyPrivate = true) })
public class ModificationEventHandler implements EventHandler, JobConsumer {
/**
* Modification Job Topics.
*/
public static final String JOB_TOPICS = "de/rockware/aem/modification";
#Reference
private JobManager jobManager;
#Override
public void handleEvent(Event event) {
logger.trace("Checking event.");
PageEvent pageEvent = PageEvent.fromEvent(event);
DamEvent damEvent = DamEvent.fromEvent(event);
Map<String, Object> properties = new HashMap<>();
if (damEvent != null) {
// DamEvent is not serializable, so we cannot add the complete event to the map.
logger.trace("Event on {} is a dam event ({}).", damEvent.getAssetPath(), damEvent.getType().name());
properties.put(DAM_EVENT_ASSET_PATH, damEvent.getAssetPath());
}
if (pageEvent != null) {
logger.trace("Event is a page event.");
properties.put(PAGE_EVENT, pageEvent);
}
logger.trace("Adding new job.");
jobManager.addJob(JOB_TOPICS, properties);
}
#Override
public JobResult process(Job job) {
if (job.getProperty(PAGE_EVENT) != null) {
PageEvent pageEvent = (PageEvent) job.getProperty(PAGE_EVENT);
if(pageEvent.isLocal()) {
Iterator<PageModification> modificationsIterator;
modificationsIterator = pageEvent.getModifications();
while (modificationsIterator.hasNext()) {
PageModification modification = modificationsIterator.next();
logger.info("Handling modification {} on path {}.", modification.getType(), modification.getPath());
if(isRelevantModification(modification.getType())) {
logger.info(modification.getPath());
}
}
} else {
logger.trace("Page event is not local.");
}
} else if (job.getProperty(DAM_EVENT_ASSET_PATH) != null) {
String assetPath = (String) job.getProperty(DAM_EVENT_ASSET_PATH);
logger.trace(assetPath);
backupService.trackModification(assetPath);
} else {
logger.trace("Invalid event type. Cannot help.");
}
return JobResult.OK;
}
}

Java threads - waiting on all child threads in order to proceed

So a little background;
I am working on a project in which a servlet is going to release crawlers upon a lot of text files within a file system. I was thinking of dividing the load under multiple threads, for example:
a crawler enters a directory, finds 3 files and 6 directories. it will start processing the files and start a thread with a new crawler for the other directories. So from my creator class I would create a single crawler upon a base directory. The crawler would assess the workload and if deemed needed it would spawn another crawler under another thread.
My crawler class looks like this
package com.fujitsu.spider;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
public class DocumentSpider implements Runnable, Serializable {
private static final long serialVersionUID = 8401649393078703808L;
private Spidermode currentMode = null;
private String URL = null;
private String[] terms = null;
private float score = 0;
private ArrayList<SpiderDataPair> resultList = null;
public enum Spidermode {
FILE, DIRECTORY
}
public DocumentSpider(String resourceURL, Spidermode mode, ArrayList<SpiderDataPair> resultList) {
currentMode = mode;
setURL(resourceURL);
this.setResultList(resultList);
}
#Override
public void run() {
try {
if (currentMode == Spidermode.FILE) {
doCrawlFile();
} else {
doCrawlDirectory();
}
} catch (Exception e) {
e.printStackTrace();
}
System.out.println("SPIDER # " + URL + " HAS FINISHED.");
}
public Spidermode getCurrentMode() {
return currentMode;
}
public void setCurrentMode(Spidermode currentMode) {
this.currentMode = currentMode;
}
public String getURL() {
return URL;
}
public void setURL(String uRL) {
URL = uRL;
}
public void doCrawlFile() throws Exception {
File target = new File(URL);
if (target.isDirectory()) {
throw new Exception(
"This URL points to a directory while the spider is in FILE mode. Please change this spider to FILE mode.");
}
procesFile(target);
}
public void doCrawlDirectory() throws Exception {
File baseDir = new File(URL);
if (!baseDir.isDirectory()) {
throw new Exception(
"This URL points to a FILE while the spider is in DIRECTORY mode. Please change this spider to DIRECTORY mode.");
}
File[] directoryContent = baseDir.listFiles();
for (File f : directoryContent) {
if (f.isDirectory()) {
DocumentSpider spider = new DocumentSpider(f.getPath(), Spidermode.DIRECTORY, this.resultList);
spider.terms = this.terms;
(new Thread(spider)).start();
} else {
DocumentSpider spider = new DocumentSpider(f.getPath(), Spidermode.FILE, this.resultList);
spider.terms = this.terms;
(new Thread(spider)).start();
}
}
}
public void procesDirectory(String target) throws IOException {
File base = new File(target);
File[] directoryContent = base.listFiles();
for (File f : directoryContent) {
if (f.isDirectory()) {
procesDirectory(f.getPath());
} else {
procesFile(f);
}
}
}
public void procesFile(File target) throws IOException {
BufferedReader br = new BufferedReader(new FileReader(target));
String line;
while ((line = br.readLine()) != null) {
String[] words = line.split(" ");
for (String currentWord : words) {
for (String a : terms) {
if (a.toLowerCase().equalsIgnoreCase(currentWord)) {
score += 1f;
}
;
if (currentWord.toLowerCase().contains(a)) {
score += 1f;
}
;
}
}
}
br.close();
resultList.add(new SpiderDataPair(this, URL));
}
public String[] getTerms() {
return terms;
}
public void setTerms(String[] terms) {
this.terms = terms;
}
public float getScore() {
return score;
}
public void setScore(float score) {
this.score = score;
}
public ArrayList<SpiderDataPair> getResultList() {
return resultList;
}
public void setResultList(ArrayList<SpiderDataPair> resultList) {
this.resultList = resultList;
}
}
The problem I am facing is that in my root crawler I have this list of results from every crawler that I want to process further. The operation to process the data from this list is called from the servlet (or main method for this example). However the operations is always called before all of the crawlers have completed their processing. thus launching the operation to process the results too soon, which leads to incomplete data.
I tried solving this using the join methods but unfortunately I cant seems to figure this one out.
package com.fujitsu.spider;
import java.util.ArrayList;
import com.fujitsu.spider.DocumentSpider.Spidermode;
public class Main {
public static void main(String[] args) throws InterruptedException {
ArrayList<SpiderDataPair> results = new ArrayList<SpiderDataPair>();
String [] terms = {"SERVER","CHANGE","MO"};
DocumentSpider spider1 = new DocumentSpider("C:\\Users\\Mark\\workspace\\Spider\\Files", Spidermode.DIRECTORY, results);
spider1.setTerms(terms);
DocumentSpider spider2 = new DocumentSpider("C:\\Users\\Mark\\workspace\\Spider\\File2", Spidermode.DIRECTORY, results);
spider2.setTerms(terms);
Thread t1 = new Thread(spider1);
Thread t2 = new Thread(spider2);
t1.start();
t1.join();
t2.start();
t2.join();
for(SpiderDataPair d : spider1.getResultList()){
System.out.println("PATH -> " + d.getFile() + " SCORE -> " + d.getSpider().getScore());
}
for(SpiderDataPair d : spider2.getResultList()){
System.out.println("PATH -> " + d.getFile() + " SCORE -> " + d.getSpider().getScore());
}
}
}
TL:DR
I really wish to understand this subject so any help would be immensely appreciated!.
You need a couple of changes in your code:
In the spider:
List<Thread> threads = new LinkedList<Thread>();
for (File f : directoryContent) {
if (f.isDirectory()) {
DocumentSpider spider = new DocumentSpider(f.getPath(), Spidermode.DIRECTORY, this.resultList);
spider.terms = this.terms;
Thread thread = new Thread(spider);
threads.add(thread)
thread.start();
} else {
DocumentSpider spider = new DocumentSpider(f.getPath(), Spidermode.FILE, this.resultList);
spider.terms = this.terms;
Thread thread = new Thread(spider);
threads.add(thread)
thread.start();
}
}
for (Thread thread: threads) thread.join()
The idea is to create a new thread for each spider and start it. Once they are all running, you wait until each on is done before the Spider itself finishes. This way each spider thread keeps running until all of its work is done (thus the top thread runs until all children and their children are finished).
You also need to change your runner so that it runs the two spiders in parallel instead of one after another like this:
Thread t1 = new Thread(spider1);
Thread t2 = new Thread(spider2);
t1.start();
t2.start();
t1.join();
t2.join();
You should use a higher-level library than bare Thread for this task. I would suggest looking into ExecutorService in particular and all of java.util.concurrent generally. There are abstractions there that can manage all of the threading issues while providing well-formed tasks a properly protected environment in which to run.
For your specific problem, I would recommend some sort of blocking queue of tasks and a standard producer-consumer architecture. Each task knows how to determine if its path is a file or directory. If it is a file, process the file; if it is a directory, crawl the directory's immediate contents and enqueue new tasks for each sub-path. You could also use some properly-synchronized shared state to cap the number of files processed, depth, etc. Also, the service provides the ability to await termination of its tasks, making the "join" simpler.
With this architecture, you decouple the notion of threads and thread management (handled by the ExecutorService) with your business logic of tasks (typically a Runnable or Callable). The service itself has the ability to tune how to instantiate, such as a fixed maximum number of threads or a scalable number depending on how many concurrent tasks exist (See factory methods on java.util.concurrent.Executors). Threads, which are more expensive than the Runnables they execute, are re-used to conserve resources.
If your objective is primarily something functional that works in production quality, then the library is the way to go. However, if your objective is to understand the lower-level details of thread management, then you may want to investigate the use of latches and perhaps thread groups to manage them at a lower level, exposing the details of the implementation so you can work with the details.

Capture events happening inside a directory

Im watching a directory using Java 7 nio WatchService by using below method.
Path myDir = Paths.get("/rootDir");
try {
WatchService watcher = myDir.getFileSystem().newWatchService();
myDir.register(watcher, StandardWatchEventKinds.ENTRY_CREATE,
StandardWatchEventKinds.ENTRY_DELETE, StandardWatchEventKinds.ENTRY_MODIFY);
WatchKey watckKey = watcher.take();
List<WatchEvent<?>> events = watckKey.pollEvents();
for (WatchEvent event : events) {
if (event.kind() == StandardWatchEventKinds.ENTRY_CREATE) {
System.out.println("Created: " + event.context().toString());
JOptionPane.showMessageDialog(null,"Created: " + event.context().toString());
}
if (event.kind() == StandardWatchEventKinds.ENTRY_DELETE) {
System.out.println("Delete: " + event.context().toString());
JOptionPane.showMessageDialog(null,"Delete: " + event.context().toString());
}
if (event.kind() == StandardWatchEventKinds.ENTRY_MODIFY) {
System.out.println("Modify: " + event.context().toString());
JOptionPane.showMessageDialog(null,"Modify: " + event.context().toString());
}
}
} catch (Exception e) {
System.out.println("Error: " + e.toString());
}
But the above method only respond in one events happen in the directory after that watcher does not respond to the events happen in that folder. Is there a way that I can modify this to capture all events happen inside the folder. I also want to modify this to capture events happen in sub folders too. Can someone help me with that.
Thank you.
From the JavaDoc of WatchService:
A Watchable object is registered with a watch service by invoking its register method, returning a WatchKey to represent the registration. When an event for an object is detected the key is signalled, and if not currently signalled, it is queued to the watch service so that it can be retrieved by consumers that invoke the poll or take methods to retrieve keys and process events. Once the events have been processed the consumer invokes the key's reset method to reset the key which allows the key to be signalled and re-queued with further events.
You are only calling watcher.take() once.
To watch for further events, you must call watchKey.reset() after consuming the WatchEvents. Put all this in a loop.
while (true) {
WatchKey watckKey = watcher.take();
List<WatchEvent<?>> events = watckKey.pollEvents();
for (WatchEvent event : events) {
// process event
}
watchKey.reset();
}
Also take a look at the relevant section of the Java Tutorial.
Use Apache Commons IO File Monitoring
it will also capture events happening in sub folders too
import java.io.File;
import java.io.IOException;
import org.apache.commons.io.monitor.FileAlterationListener;
import org.apache.commons.io.monitor.FileAlterationListenerAdaptor;
import org.apache.commons.io.monitor.FileAlterationMonitor;
import org.apache.commons.io.monitor.FileAlterationObserver;
public class Monitor {
public Monitor() {
}
//path to a folder you are monitoring .
public static final String FOLDER = MYPATH;
public static void main(String[] args) throws Exception {
System.out.println("monitoring started");
// The monitor will perform polling on the folder every 5 seconds
final long pollingInterval = 5 * 1000;
File folder = new File(FOLDER);
if (!folder.exists()) {
// Test to see if monitored folder exists
throw new RuntimeException("Directory not found: " + FOLDER);
}
FileAlterationObserver observer = new FileAlterationObserver(folder);
FileAlterationMonitor monitor =
new FileAlterationMonitor(pollingInterval);
FileAlterationListener listener = new FileAlterationListenerAdaptor() {
// Is triggered when a file is created in the monitored folder
#Override
public void onFileCreate(File file) {
// "file" is the reference to the newly created file
System.out.println("File created: "+ file.getCanonicalPath());
}
// Is triggered when a file is deleted from the monitored folder
#Override
public void onFileDelete(File file) {
try {
// "file" is the reference to the removed file
System.out.println("File removed: "+ file.getCanonicalPath());
// "file" does not exists anymore in the location
System.out.println("File still exists in location: "+ file.exists());
} catch (IOException e) {
e.printStackTrace(System.err);
}
}
};
observer.addListener(listener);
monitor.addObserver(observer);
monitor.start();
}
}

Can I watch for single file change with WatchService (not the whole directory)?

When I'm trying to register a file instead of a directory java.nio.file.NotDirectoryException is thrown. Can I listen for a single file change, not the whole directory?
Just filter the events for the file you want in the directory:
final Path path = FileSystems.getDefault().getPath(System.getProperty("user.home"), "Desktop");
System.out.println(path);
try (final WatchService watchService = FileSystems.getDefault().newWatchService()) {
final WatchKey watchKey = path.register(watchService, StandardWatchEventKinds.ENTRY_MODIFY);
while (true) {
final WatchKey wk = watchService.take();
for (WatchEvent<?> event : wk.pollEvents()) {
//we only register "ENTRY_MODIFY" so the context is always a Path.
final Path changed = (Path) event.context();
System.out.println(changed);
if (changed.endsWith("myFile.txt")) {
System.out.println("My file has changed");
}
}
// reset the key
boolean valid = wk.reset();
if (!valid) {
System.out.println("Key has been unregisterede");
}
}
}
Here we check whether the changed file is "myFile.txt", if it is then do whatever.
Other answers are right that you must watch a directory and filter for your particular file. However, you probably want a thread running in the background. The accepted answer can block indefinitely on watchService.take(); and doesn't close the WatchService. A solution suitable for a separate thread might look like:
public class FileWatcher extends Thread {
private final File file;
private AtomicBoolean stop = new AtomicBoolean(false);
public FileWatcher(File file) {
this.file = file;
}
public boolean isStopped() { return stop.get(); }
public void stopThread() { stop.set(true); }
public void doOnChange() {
// Do whatever action you want here
}
#Override
public void run() {
try (WatchService watcher = FileSystems.getDefault().newWatchService()) {
Path path = file.toPath().getParent();
path.register(watcher, StandardWatchEventKinds.ENTRY_MODIFY);
while (!isStopped()) {
WatchKey key;
try { key = watcher.poll(25, TimeUnit.MILLISECONDS); }
catch (InterruptedException e) { return; }
if (key == null) { Thread.yield(); continue; }
for (WatchEvent<?> event : key.pollEvents()) {
WatchEvent.Kind<?> kind = event.kind();
#SuppressWarnings("unchecked")
WatchEvent<Path> ev = (WatchEvent<Path>) event;
Path filename = ev.context();
if (kind == StandardWatchEventKinds.OVERFLOW) {
Thread.yield();
continue;
} else if (kind == java.nio.file.StandardWatchEventKinds.ENTRY_MODIFY
&& filename.toString().equals(file.getName())) {
doOnChange();
}
boolean valid = key.reset();
if (!valid) { break; }
}
Thread.yield();
}
} catch (Throwable e) {
// Log or rethrow the error
}
}
}
I tried working from the accepted answer and this article. You should be able to use this thread with new FileWatcher(new File("/home/me/myfile")).start() and stop it by calling stopThread() on the thread.
No it isn't possible to register a file, the watch service doesn't work this way. But registering a directory actually watches changes on the directory children (the files and sub-directories), not the changes on the directory itself.
If you want to watch a file, then you register the containing directory with the watch service. Path.register() documentation says:
WatchKey java.nio.file.Path.register(WatchService watcher, Kind[] events, Modifier...
modifiers) throws IOException
Registers the file located by this path with a watch service.
In this release, this path locates a directory that exists. The directory is registered with the watch service so that entries in the directory can be watched
Then you need to process events on entries, and detect those related to the file you are interested in, by checking the context value of the event. The context value represents the name of the entry (actually the path of the entry relatively to the path of its parent, which is exactly the child name). You have an example here.
Apache offers a FileWatchDog class with a doOnChange method.
private class SomeWatchFile extends FileWatchdog {
protected SomeWatchFile(String filename) {
super(filename);
}
#Override
protected void doOnChange() {
fileChanged= true;
}
}
And where ever you want you can start this thread:
SomeWatchFile someWatchFile = new SomeWatchFile (path);
someWatchFile.start();
The FileWatchDog class polls a file's lastModified() timestamp. The native WatchService from Java NIO is more efficient, since notifications are immediate.
You cannot watch an individual file directly but you can filter out what you don't need.
Here is my FileWatcher class implementation:
import java.io.File;
import java.nio.file.*;
import java.nio.file.WatchEvent.Kind;
import static java.nio.file.StandardWatchEventKinds.*;
public abstract class FileWatcher
{
private Path folderPath;
private String watchFile;
public FileWatcher(String watchFile)
{
Path filePath = Paths.get(watchFile);
boolean isRegularFile = Files.isRegularFile(filePath);
if (!isRegularFile)
{
// Do not allow this to be a folder since we want to watch files
throw new IllegalArgumentException(watchFile + " is not a regular file");
}
// This is always a folder
folderPath = filePath.getParent();
// Keep this relative to the watched folder
this.watchFile = watchFile.replace(folderPath.toString() + File.separator, "");
}
public void watchFile() throws Exception
{
// We obtain the file system of the Path
FileSystem fileSystem = folderPath.getFileSystem();
// We create the new WatchService using the try-with-resources block
try (WatchService service = fileSystem.newWatchService())
{
// We watch for modification events
folderPath.register(service, ENTRY_MODIFY);
// Start the infinite polling loop
while (true)
{
// Wait for the next event
WatchKey watchKey = service.take();
for (WatchEvent<?> watchEvent : watchKey.pollEvents())
{
// Get the type of the event
Kind<?> kind = watchEvent.kind();
if (kind == ENTRY_MODIFY)
{
Path watchEventPath = (Path) watchEvent.context();
// Call this if the right file is involved
if (watchEventPath.toString().equals(watchFile))
{
onModified();
}
}
}
if (!watchKey.reset())
{
// Exit if no longer valid
break;
}
}
}
}
public abstract void onModified();
}
To use this, you just have to extend and implement the onModified() method like so:
import java.io.File;
public class MyFileWatcher extends FileWatcher
{
public MyFileWatcher(String watchFile)
{
super(watchFile);
}
#Override
public void onModified()
{
System.out.println("Modified!");
}
}
Finally, start watching the file:
String watchFile = System.getProperty("user.home") + File.separator + "Desktop" + File.separator + "Test.txt";
FileWatcher fileWatcher = new MyFileWatcher(watchFile);
fileWatcher.watchFile();
Not sure about others, but I groan at the amount of code needed to watch a single file for changes using the basic WatchService API. It has to be simpler!
Here are a couple of alternatives using third party libraries:
Using Apache Commons Configuration
Using spring-loaded package from the Spring Framework (didn't find an example implementation for this off-hand, but it looks straight-forward to use)
I have created a wrapper around Java 1.7's WatchService that allows registering a directory and any number of glob patterns. This class will take care of the filtering and only emit events you are interested in.
try {
DirectoryWatchService watchService = new SimpleDirectoryWatchService(); // May throw
watchService.register( // May throw
new DirectoryWatchService.OnFileChangeListener() {
#Override
public void onFileCreate(String filePath) {
// File created
}
#Override
public void onFileModify(String filePath) {
// File modified
}
#Override
public void onFileDelete(String filePath) {
// File deleted
}
},
<directory>, // Directory to watch
<file-glob-pattern-1>, // E.g. "*.log"
<file-glob-pattern-2>, // E.g. "input-?.txt"
<file-glob-pattern-3>, // E.g. "config.ini"
... // As many patterns as you like
);
watchService.start(); // The actual watcher runs on a new thread
} catch (IOException e) {
LOGGER.error("Unable to register file change listener for " + fileName);
}
Complete code is in this repo.
I extended the solution by BullyWiiPlaza a bit, for integration with javafx.concurrent, e.g. javafx.concurrent.Taskand javafx.concurrent.Service.
Also I added possibility to track multiple files.
Task:
import javafx.concurrent.Task;
import lombok.extern.slf4j.Slf4j;
import java.io.File;
import java.nio.file.*;
import java.util.*;
import static java.nio.file.StandardWatchEventKinds.ENTRY_MODIFY;
#Slf4j
public abstract class FileWatcherTask extends Task<Void> {
static class Entry {
private final Path folderPath;
private final String watchFile;
Entry(Path folderPath, String watchFile) {
this.folderPath = folderPath;
this.watchFile = watchFile;
}
#Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
Entry entry = (Entry) o;
return Objects.equals(folderPath, entry.folderPath) && Objects.equals(watchFile, entry.watchFile);
}
#Override
public int hashCode() {
return Objects.hash(folderPath, watchFile);
}
}
private final List<Entry> entryList;
private final Map<WatchKey, Entry> watchKeyEntryMap;
public FileWatcherTask(Iterable<String> watchFiles) {
this.entryList = new ArrayList<>();
this.watchKeyEntryMap = new LinkedHashMap<>();
for (String watchFile : watchFiles) {
Path filePath = Paths.get(watchFile);
boolean isRegularFile = Files.isRegularFile(filePath);
if (!isRegularFile) {
// Do not allow this to be a folder since we want to watch files
throw new IllegalArgumentException(watchFile + " is not a regular file");
}
// This is always a folder
Path folderPath = filePath.getParent();
// Keep this relative to the watched folder
watchFile = watchFile.replace(folderPath.toString() + File.separator, "");
Entry entry = new Entry(folderPath, watchFile);
entryList.add(entry);
log.debug("Watcher initialized for {} entries. ({})", entryList.size(), entryList.stream().map(e -> e.watchFile + "-" + e.folderPath).findFirst().orElse("<>"));
}
}
public FileWatcherTask(String... watchFiles) {
this(Arrays.asList(watchFiles));
}
public void watchFile() throws Exception {
// We obtain the file system of the Path
// FileSystem fileSystem = folderPath.getFileSystem();
// TODO: use the actual file system instead of default
FileSystem fileSystem = FileSystems.getDefault();
// We create the new WatchService using the try-with-resources block
try (WatchService service = fileSystem.newWatchService()) {
log.debug("Watching filesystem {}", fileSystem);
for (Entry e : entryList) {
// We watch for modification events
WatchKey key = e.folderPath.register(service, ENTRY_MODIFY);
watchKeyEntryMap.put(key, e);
}
// Start the infinite polling loop
while (true) {
// Wait for the next event
WatchKey watchKey = service.take();
for (Entry e : entryList) {
// Call this if the right file is involved
var hans = watchKeyEntryMap.get(watchKey);
if (hans != null) {
for (WatchEvent<?> watchEvent : watchKey.pollEvents()) {
// Get the type of the event
WatchEvent.Kind<?> kind = watchEvent.kind();
if (kind == ENTRY_MODIFY) {
Path watchEventPath = (Path) watchEvent.context();
onModified(e.watchFile);
}
if (!watchKey.reset()) {
// Exit if no longer valid
log.debug("Watch key {} was reset", watchKey);
break;
}
}
}
}
}
}
}
#Override
protected Void call() throws Exception {
watchFile();
return null;
}
public abstract void onModified(String watchFile);
}
Service:
public abstract class FileWatcherService extends Service<Void> {
private final Iterable<String> files;
public FileWatcherService(Iterable<String> files) {
this.files = files;
}
#Override
protected Task<Void> createTask() {
return new FileWatcherTask(files) {
#Override
public void onModified(String watchFile) {
FileWatcherService.this.onModified(watchFile);
}
};
}
abstract void onModified(String watchFile);
}

Categories

Resources