Critical Path Method Algorithm - java

Where can I find a Java implementation of the Critical Path Method Algorithm? I am sure there's some implementation in the cloud. I have already searched on google obviously, but haven't found any implementation that works well. That's why I am asking.
Thanks in advance.

Here is an implementation of the algorithm based on the explanation provided on this page
There is a wrapper class to hold the task, cost, and critical path cost. It starts by calculating the critical cost as the maximum critical cost of all dependencies plus its own cost. Then once the critical costs are available it uses a comparator to sort the tasks based on the critical cost with dependency as a tie breaker (choosing randomly if there is no dependency). Note that an exception will be thrown if there is a cycle and it will fail if any of the costs are negative.
Here is the implementation:
public class CriticalPath {
public static void main(String[] args) {
//The example dependency graph from
//http://www.ctl.ua.edu/math103/scheduling/scheduling_algorithms.htm
HashSet<Task> allTasks = new HashSet<Task>();
Task end = new Task("End", 0);
Task F = new Task("F", 2, end);
Task A = new Task("A", 3, end);
Task X = new Task("X", 4, F, A);
Task Q = new Task("Q", 2, A, X);
Task start = new Task("Start", 0, Q);
allTasks.add(end);
allTasks.add(F);
allTasks.add(A);
allTasks.add(X);
allTasks.add(Q);
allTasks.add(start);
System.out.println("Critical Path: "+Arrays.toString(criticalPath(allTasks)));
}
//A wrapper class to hold the tasks during the calculation
public static class Task{
//the actual cost of the task
public int cost;
//the cost of the task along the critical path
public int criticalCost;
//a name for the task for printing
public String name;
//the tasks on which this task is dependant
public HashSet<Task> dependencies = new HashSet<Task>();
public Task(String name, int cost, Task... dependencies) {
this.name = name;
this.cost = cost;
for(Task t : dependencies){
this.dependencies.add(t);
}
}
#Override
public String toString() {
return name+": "+criticalCost;
}
public boolean isDependent(Task t){
//is t a direct dependency?
if(dependencies.contains(t)){
return true;
}
//is t an indirect dependency
for(Task dep : dependencies){
if(dep.isDependent(t)){
return true;
}
}
return false;
}
}
public static Task[] criticalPath(Set<Task> tasks){
//tasks whose critical cost has been calculated
HashSet<Task> completed = new HashSet<Task>();
//tasks whose ciritcal cost needs to be calculated
HashSet<Task> remaining = new HashSet<Task>(tasks);
//Backflow algorithm
//while there are tasks whose critical cost isn't calculated.
while(!remaining.isEmpty()){
boolean progress = false;
//find a new task to calculate
for(Iterator<Task> it = remaining.iterator();it.hasNext();){
Task task = it.next();
if(completed.containsAll(task.dependencies)){
//all dependencies calculated, critical cost is max dependency
//critical cost, plus our cost
int critical = 0;
for(Task t : task.dependencies){
if(t.criticalCost > critical){
critical = t.criticalCost;
}
}
task.criticalCost = critical+task.cost;
//set task as calculated an remove
completed.add(task);
it.remove();
//note we are making progress
progress = true;
}
}
//If we haven't made any progress then a cycle must exist in
//the graph and we wont be able to calculate the critical path
if(!progress) throw new RuntimeException("Cyclic dependency, algorithm stopped!");
}
//get the tasks
Task[] ret = completed.toArray(new Task[0]);
//create a priority list
Arrays.sort(ret, new Comparator<Task>() {
#Override
public int compare(Task o1, Task o2) {
//sort by cost
int i= o2.criticalCost-o1.criticalCost;
if(i != 0)return i;
//using dependency as a tie breaker
//note if a is dependent on b then
//critical cost a must be >= critical cost of b
if(o1.isDependent(o2))return -1;
if(o2.isDependent(o1))return 1;
return 0;
}
});
return ret;
}
}

Here is the another version of Jessup's code. I simply add some other functions and now the code calculates earliest/latest start and finish times, slack and whether the node is on the critical path or not. (I simply added the functions and get the result, I haven't put much effort on algorithm and coding)
public class CriticalPath {
public static int maxCost;
public static String format = "%1$-10s %2$-5s %3$-5s %4$-5s %5$-5s %6$-5s %7$-10s\n";
public static void main(String[] args) {
// The example dependency graph
HashSet<Task> allTasks = new HashSet<Task>();
Task end = new Task("End", 0);
Task F = new Task("F", 2, end);
Task A = new Task("A", 3, end);
Task X = new Task("X", 4, F, A);
Task Q = new Task("Q", 2, A, X);
Task start = new Task("Start", 0, Q);
allTasks.add(end);
allTasks.add(F);
allTasks.add(A);
allTasks.add(X);
allTasks.add(Q);
allTasks.add(start);
Task[] result = criticalPath(allTasks);
print(result);
// System.out.println("Critical Path: " + Arrays.toString(result));
}
// A wrapper class to hold the tasks during the calculation
public static class Task {
// the actual cost of the task
public int cost;
// the cost of the task along the critical path
public int criticalCost;
// a name for the task for printing
public String name;
// the earliest start
public int earlyStart;
// the earliest finish
public int earlyFinish;
// the latest start
public int latestStart;
// the latest finish
public int latestFinish;
// the tasks on which this task is dependant
public HashSet<Task> dependencies = new HashSet<Task>();
public Task(String name, int cost, Task... dependencies) {
this.name = name;
this.cost = cost;
for (Task t : dependencies) {
this.dependencies.add(t);
}
this.earlyFinish = -1;
}
public void setLatest() {
latestStart = maxCost - criticalCost;
latestFinish = latestStart + cost;
}
public String[] toStringArray() {
String criticalCond = earlyStart == latestStart ? "Yes" : "No";
String[] toString = { name, earlyStart + "", earlyFinish + "", latestStart + "", latestFinish + "",
latestStart - earlyStart + "", criticalCond };
return toString;
}
public boolean isDependent(Task t) {
// is t a direct dependency?
if (dependencies.contains(t)) {
return true;
}
// is t an indirect dependency
for (Task dep : dependencies) {
if (dep.isDependent(t)) {
return true;
}
}
return false;
}
}
public static Task[] criticalPath(Set<Task> tasks) {
// tasks whose critical cost has been calculated
HashSet<Task> completed = new HashSet<Task>();
// tasks whose critical cost needs to be calculated
HashSet<Task> remaining = new HashSet<Task>(tasks);
// Backflow algorithm
// while there are tasks whose critical cost isn't calculated.
while (!remaining.isEmpty()) {
boolean progress = false;
// find a new task to calculate
for (Iterator<Task> it = remaining.iterator(); it.hasNext();) {
Task task = it.next();
if (completed.containsAll(task.dependencies)) {
// all dependencies calculated, critical cost is max
// dependency
// critical cost, plus our cost
int critical = 0;
for (Task t : task.dependencies) {
if (t.criticalCost > critical) {
critical = t.criticalCost;
}
}
task.criticalCost = critical + task.cost;
// set task as calculated an remove
completed.add(task);
it.remove();
// note we are making progress
progress = true;
}
}
// If we haven't made any progress then a cycle must exist in
// the graph and we wont be able to calculate the critical path
if (!progress)
throw new RuntimeException("Cyclic dependency, algorithm stopped!");
}
// get the cost
maxCost(tasks);
HashSet<Task> initialNodes = initials(tasks);
calculateEarly(initialNodes);
// get the tasks
Task[] ret = completed.toArray(new Task[0]);
// create a priority list
Arrays.sort(ret, new Comparator<Task>() {
#Override
public int compare(Task o1, Task o2) {
return o1.name.compareTo(o2.name);
}
});
return ret;
}
public static void calculateEarly(HashSet<Task> initials) {
for (Task initial : initials) {
initial.earlyStart = 0;
initial.earlyFinish = initial.cost;
setEarly(initial);
}
}
public static void setEarly(Task initial) {
int completionTime = initial.earlyFinish;
for (Task t : initial.dependencies) {
if (completionTime >= t.earlyStart) {
t.earlyStart = completionTime;
t.earlyFinish = completionTime + t.cost;
}
setEarly(t);
}
}
public static HashSet<Task> initials(Set<Task> tasks) {
HashSet<Task> remaining = new HashSet<Task>(tasks);
for (Task t : tasks) {
for (Task td : t.dependencies) {
remaining.remove(td);
}
}
System.out.print("Initial nodes: ");
for (Task t : remaining)
System.out.print(t.name + " ");
System.out.print("\n\n");
return remaining;
}
public static void maxCost(Set<Task> tasks) {
int max = -1;
for (Task t : tasks) {
if (t.criticalCost > max)
max = t.criticalCost;
}
maxCost = max;
System.out.println("Critical path length (cost): " + maxCost);
for (Task t : tasks) {
t.setLatest();
}
}
public static void print(Task[] tasks) {
System.out.format(format, "Task", "ES", "EF", "LS", "LF", "Slack", "Critical?");
for (Task t : tasks)
System.out.format(format, (Object[]) t.toStringArray());
}
}

There's a Java applet at cut-the-knot.org. There's also an online calculator at sporkforge.com.
References
Wikipedia/Critical path method

Related

Adding more threads to executorservice only makes it slower

I have this code, where I have my own homemade array class, that I want to use to test the speed of some different concurrency tools in java
public class LongArrayListUnsafe {
private static final ExecutorService executor
= Executors.newFixedThreadPool(1);
public static void main(String[] args) {
LongArrayList dal1 = new LongArrayList();
int n = 100_000_000;
Timer t = new Timer();
List<Callable<Void>> tasks = new ArrayList<>();
tasks.add(() -> {
for (int i = 0; i <= n; i+=2){
dal1.add(i);
}
return null;
});
tasks.add(() -> {
for (int i = 0; i < n; i++){
dal1.set(i, i + 1);
}
return null;});
tasks.add(() -> {
for (int i = 0; i < n; i++) {
dal1.get(i);
}
return null;});
tasks.add(() -> {
for (int i = n; i < n * 2; i++) {
dal1.add(i + 1);
}
return null;});
try {
executor.invokeAll(tasks);
} catch (InterruptedException exn) {
System.out.println("Interrupted: " + exn);
}
executor.shutdown();
try {
executor.awaitTermination(1000, TimeUnit.MILLISECONDS);
} catch (Exception e){
System.out.println("what?");
}
System.out.println("Using toString(): " + t.check() + " ms");
}
}
class LongArrayList {
// Invariant: 0 <= size <= items.length
private long[] items;
private int size;
public LongArrayList() {
reset();
}
public static LongArrayList withElements(long... initialValues){
LongArrayList list = new LongArrayList();
for (long l : initialValues) list.add( l );
return list;
}
public void reset(){
items = new long[2];
size = 0;
}
// Number of items in the double list
public int size() {
return size;
}
// Return item number i
public long get(int i) {
if (0 <= i && i < size)
return items[i];
else
throw new IndexOutOfBoundsException(String.valueOf(i));
}
// Replace item number i, if any, with x
public long set(int i, long x) {
if (0 <= i && i < size) {
long old = items[i];
items[i] = x;
return old;
} else
throw new IndexOutOfBoundsException(String.valueOf(i));
}
// Add item x to end of list
public LongArrayList add(long x) {
if (size == items.length) {
long[] newItems = new long[items.length * 2];
for (int i=0; i<items.length; i++)
newItems[i] = items[i];
items = newItems;
}
items[size] = x;
size++;
return this;
}
public String toString() {
return Arrays.stream(items, 0,size)
.mapToObj( Long::toString )
.collect(Collectors.joining(", ", "[", "]"));
}
}
public class Timer {
private long start, spent = 0;
public Timer() { play(); }
public double check() { return (System.nanoTime()-start+spent)/1e9; }
public void pause() { spent += System.nanoTime()-start; }
public void play() { start = System.nanoTime(); }
}
The implementation of a LongArrayList class is not so important,it's not threadsafe.
The drivercode with the executorservice performs a bunch of operations on the arraylist, and has 4 different tasks doing it, each 100_000_000 times.
The problem is that when I give the threadpool more threads "Executors.newFixedThreadPool(2);" it only becomes slower.
For example, for one thread, a typical timing is 1.0366974 ms, but if I run it with 3 threads, the time ramps up to 5.7932714 ms.
What is going on? why is more threads so much slower?
EDIT:
To boil the issue down, I made this much simpler drivercode, that has four tasks that simply add elements:
ExecutorService executor
= Executors.newFixedThreadPool(2);
LongArrayList dal1 = new LongArrayList();
int n = 100_000_00;
Timer t = new Timer();
for (int i = 0; i < 4 ; i++){
executor.execute(new Runnable() {
#Override
public void run() {
for (int j = 0; j < n ; j++)
dal1.add(j);
}
});
}
executor.shutdown();
try {
executor.awaitTermination(1000, TimeUnit.MILLISECONDS);
} catch (Exception e){
System.out.println("what?");
}
System.out.println("Using toString(): " + t.check() + " ms");
Here it still does not seem to matter how many threads i allocate, there is no speedup at all, could this simply be because of overhead?
There are some problems with your code that make it hard to reason why with more threads the time increases.
btw
public double check() { return (System.nanoTime()-start+spent)/1e9; }
gives you back seconds not milliseconds, so change this:
System.out.println("Using toString(): " + t.check() + " ms");
to
System.out.println("Using toString(): " + t.check() + "s");
First problem:
LongArrayList dal1 = new LongArrayList();
dal1 is shared among all threads, and those threads are updating that shared variable without any mutual exclusion around it, consequently, leading to race conditions. Moreover, this can also lead to cache invalidation, which can increase your overall execution time.
The other thing is that you may have load balancing problems. You have 4 parallel tasks, but clearly the last one
tasks.add(() -> {
for (int i = n; i < n * 2; i++) {
dal1.add(i + 1);
}
return null;});
is the most computing-intensive task. Even if the 4 tasks run in parallel, without the problems that I have mention (i.e., lack of synchronization around the shared data), the last task will dictate the overall execution time.
Not to mention that parallelism does not come for free, it adds overhead (e.g., scheduling the parallel work and so on), which might be high enough that makes it not worth to parallelize the code in the first place. In your code, there is at least the overhead of waiting for the tasks to be completed, and also the overhead of shutting down the pool of executors.
Another possibility that would also explain why you are not getting ArrayIndexOutOfBoundsException all over the place is that the first 3 tasks are so small that they are being executed by the same thread. This would also again make your overall execution time very dependent on the last task, the on the overhead of executor.shutdown(); and executor.awaitTermination. However, even if that is the case, the order of execution of tasks, and which threads will execute then, is typically non-deterministic, and consequently, is not something that your application should rely upon. Funny enough, when I changed your code to immediately execute the tasks (i.e., executor.execute) I got ArrayIndexOutOfBoundsException all over the place.

Get smallest available key (>= 1) in a HashMap

I am using a Map in which I save all tracks that the user registers. Every new track should be assigned a new ID which starts at 1. However, if tracks 1, 2, 3, 4 exist and the user deletes track with ID 1 the next added track gets the smallest available ID >=1 which in this case would be 1.
How is it possible to do this efficiently? Or is there a better datatype available?
private Map<Integer, Track> tracks;
public Register() {
this.trains = new HashMap<>();
}
public void addTrack(Track track) {
int id = <get Smallest Value Available >= 1>;
this.tracks.put(id, track);
}
public void removeTrack(int id) {
if (tracks.containsKey(id)) {
this.tracks.remove(id);
} else {
Terminal.printError("track with ID " + id + " doesn't exist.");
}
}
Approach 1: You can use TreeMap and iterate through its keys, and if there is a gap between two keys, you can insert your element in this gap. Addition will work in O(currentKeysCount) worst-case, deletion will work in O(log(currentKeysCount)).
private TreeMap<Integer, Track> tracks;
public Register() {
this.trains = new TreeMap<>();
}
public void addTrack(Track track) {
int id = 1;
for (int key : this.trains.keySet) {
if (key > id) break;
id = key + 1;
}
this.tracks.put(id, track);
}
public void removeTrack(int id) {
if (tracks.containsKey(id)) {
this.tracks.remove(id);
} else {
Terminal.printError("track with ID " + track.getId() + " doesn't exist.");
}
}
Approach 2: You can create a PriorityQueue that will store deleted keys. Addition and deletion will work in O(log(currentKeysCount) + log(deletedKeysCount)).
private Map<Integer, Track> tracks;
private PriorityQueue<Integer> deletedKeys;
private int nextKey;
public Register() {
this.trains = new HashMap<>();
this.deletedKeys = new PriorityQueue<>();
this.nextKey = 0;
}
public void addTrack(Track track) {
int id = nextKey;
if (!deletedKeys.isEmpty()) id = deletedKeys.poll();
this.tracks.put(id, track);
}
public void removeTrack(int id) {
if (tracks.containsKey(id)) {
this.tracks.remove(id);
this.deletedKeys.add(id);
} else {
Terminal.printError("track with ID " + track.getId() + " doesn't exist.");
}
}
Approach 3: It may be much easier to ignore missing keys and just increase nextKey counter on every addition (you can even use long instead of int). Unless you add a new key more often than once per millisecond, your program won't fail earlier than all code that uses System.currentTimeMillis() (and it will fail in more than 292 million years). Addition and deletion will work in O(log(currentKeysCount))
I would do it with a loop and see if the which value is not yet included in the map
public Integer getId(Map<Integer, Track> tracks) {
// Set on max-value
Integer id = Collections.max(tracks.keySet()) + 1;
for (int i = 1; i <= tracks.keySet().size(); i++) {
if (!tracks.keySet().contains(i)) {
// lower value available
id = i;
break;
}
}
return id;
}
Say if from 100 trains number 40 and number 60 are free, you want to get 40 from {40, 60}.
private final Map<Integer, Track> tracks = new HashMap<>();;
private final SortedSet<Integer> freeIds = new TreeSet<>();
public synchronized void addTrack(Track track) {
int id;
if (freeIds.isEmpty()) {
id = 1 + tracks.size(); // Numbering from 1
} else {
id = freeIds.first();
freeIds.remove(id);
}
track.setId(id);
tracks.put(id, track);
}
public synchronized void removeTrack(int id) {
Track track = tracks.remove(id);
if (track != null) {
track.setId(-1);
freeIds.add(id);
} else {
Terminal.printError("track with ID " + track.getId() + " doesn't exist.");
}
}

Interval lock Implementation

I am looking for an implementation of interval lock. Given an interval (x, y), a thread can acquire the lock if no-one else is acquiring any interval that contains point p where x <= p <= y.
My current idea is maintaining an array of existing granted intervals (x1, y1, x2, y2, ..., xn, yn) where x1 < y1 < x2 < y2 < ... < xn < yn and checks to see if (x, y) overlaps with any of those intervals.
The search takes O(logn) time which makes me happy. However, when the search returns that there is some overlaps, the lock function needs to somehow retry efficiently until it can acquire the lock when others release their interval locks. Busy-waiting or sleep seems not a good idea.
Is there a way to implement the retry efficiently?
As #c0der suggested I've made an implementation that simply tracks the locked intervals.
My code implies a Range class that ...
is immutable
has a lower and upper bound (extending to unbounded ranges shouldn't be too hard)
properly implements equals() and hashCode()
The RangeLock class currently only implements a blocking lock method. Unlocking is done through a returned Unlocker instance. This is to avoid threads not having acquired the lock, being able to unlock a given Range.
public class RangeLock<T extends Comparable<? super T>> {
private final SortedSet<Range<T>> locked = new TreeSet<>(Comparator.comparing(Range::lower));
private final Object lock = new Object();
public Unlocker lock(Range<T> range) throws InterruptedException {
synchronized (lock) {
while (!available(range)) {
lock.wait();
}
locked.add(range);
return () -> {
synchronized (lock) {
locked.remove(range);
lock.notifyAll();
}
};
}
}
private boolean available(Range<T> range) {
SortedSet<Range<T>> tailSet = locked.tailSet(range);
SortedSet<Range<T>> headSet = locked.headSet(range);
return (tailSet.isEmpty() || !tailSet.first().overlaps(range)) && (headSet.isEmpty() || !headSet.last().overlaps(range));
}
public interface Unlocker {
void unlock();
}
}
I think the question is essentially about an efficient way to have a thread wait and retry.
How about listening to changes in the
array of existing granted intervals
and retry only when it has changed ?
The following should not be considered a proper implementation (my experience with thread is very limited), but a demonstration of the proposed mechanism:
Ranges.java and Range.java
//represents all ranges
//see also: https://stackoverflow.com/a/7721388/3992939
public class Ranges {
private List<Range> ranges = new ArrayList<>();
private PropertyChangeSupport rangeChangedProperty = new PropertyChangeSupport(this);
public Range getRange(int rangeStart, int rangeEnd) {
if(contains(rangeStart) || contains(rangeEnd)) {
return null;
}
Range range = new Range(rangeStart, rangeEnd);
range.addListener( (observable, oldValue, newValue) -> {
rangeChangedProperty.firePropertyChange("Range", "-" , "changed");
}
);
ranges.add(range);
return range;
}
private boolean contains(int number){
for(Range range : ranges) {
if(range.contains(number)) {return true;}
}
return false;
}
public boolean removeRange(Range range) {
boolean isContains = ranges.remove(range);
rangeChangedProperty.firePropertyChange("Range", "-" , "removed");
return isContains;
}
/**
* Listen to {#link #rangeChangedProperty}. Fires whenever a range changes
* or removed.
* <br/>A client and a listener and when it fires, notify all threads.
*/
public void addChangeListener(PropertyChangeListener listener) {
rangeChangedProperty.addPropertyChangeListener(listener);
}
//represents a single range
//It is muttable
//can be implemented using ValueRange (https://stackoverflow.com/a/40716042/3992939)
class Range{
private SimpleIntegerProperty low = new SimpleIntegerProperty();
private SimpleIntegerProperty high = new SimpleIntegerProperty();
private SimpleObjectProperty<int[]> rangeProperty = new SimpleObjectProperty<>();
private Range(int rangeStart, int rangeEnd){
low.set(rangeStart) ; high.set(rangeEnd);
updateRange();
low.addListener((observable, oldValue, newValue) -> { updateRange(); });
high.addListener((observable, oldValue, newValue) -> { updateRange(); });
}
/**
* Listen to {#link #rangeProperty} that changes whenever the range changes
*/
void addListener(ChangeListener<int[]> listener) {
rangeProperty.addListener(listener);
}
private void updateRange() {rangeProperty.set(new int[] {low.get(), high.get()});}
public int getRangeStart() { return low.get(); }
public void setRangeStart(int rangeStart) { low.set(rangeStart);}
public int getRangeEnd() {return high.get();}
public void setRangeEnd(int rangeEnd) { high.set(rangeEnd);}
public boolean contains(int number){
int min = Math.min(low.get(), high.get());
int max = Math.max(low.get(), high.get());
return ((number >= min) && (number <= max));
}
}
}
GetRange.java
//used to simulate a thread trying to get a range
public class GetRange implements Runnable{
private Ranges ranges;
private int low, high;
private String id;
GetRange(Ranges ranges, int low, int high, String id) {
this.ranges = ranges;
this.low = low; this.high = high; this.id = id;
}
#Override
public void run() {
synchronized (ranges) {
while(ranges.getRange(low,high) == null) {
System.out.println("Tread "+ id + " is waiting");
try {
ranges.wait();
} catch (InterruptedException ex) { ex.printStackTrace();}
}
}
System.out.println("Tread "+ id + " got range. All done");
}
}
Test is with :
//test
public static void main(String[] args) throws InterruptedException {
Ranges ranges = new Ranges();
ranges.addChangeListener( (evt) -> {
synchronized (ranges) {
ranges.notifyAll();
System.out.println(evt.getPropertyName() + " "+ evt.getNewValue());
}
});
Range range1 = ranges.getRange(10,15);
Range range2 = ranges.getRange(20,25);
new Thread(new GetRange(ranges, 10, 12, "A")).start();
new Thread(new GetRange(ranges, 21, 28, "B")).start();
new Thread(new GetRange(ranges, 10, 12, "C")).start();
Thread.sleep(50);
System.out.println("-- Changing end of range 1. Threads notifyied and keep waiting -----");
range1.setRangeEnd(16); //no thread effected
Thread.sleep(50);
System.out.println("-- Changing start of range 1. Threads notifyied and A or C get range -----");
range1.setRangeStart(13); //effects thread A or C
Thread.sleep(50);
System.out.println("-- Removing range 2. Threads notifyied and B get range -----");
ranges.removeRange(range2);//effects thread B
Thread.sleep(50);
System.exit(1);
}
Output:
Tread A is waiting Tread C is waiting Tread B is waiting
-- Changing end of range 1. Threads notifyied and keep waiting -----
Range changed
Tread B is waiting
Tread C is waiting
Tread A is waiting
-- Changing start of range 1. Threads notifyied and A or C get range ----- Range changed Tread A got range. All done
Thread C is waiting
Tread B is waiting
-- Removing range 2. Threads notifyied and B get range -----
Range removed
Tread B got range. All done
Tread C is waiting
Guava's Striped locks may be of interest to you.
If you have a function int key(int p) which returns the index i of the interval [x_i,y_i] which p belongs to, you could probably use a Striped lock to achieve your goal.
For instance, if we had as interval bounds the points x_1, x_2, ... x_n such that x_i < x_(i+1) and x_(i+1) - x_i remains constant over all i from 1 to n, we could use something like key(p) = p -> (p - x_1) / n.
However, based on the notation you chose, this assumption may not hold and the function key be not as straightforward - but hopefully a lock striping solution will work for you.
This is my implementation for IntervalLock that supports Read and Write locks. Reads may acquire locks that have ranges overlapped, while a write must wait if its range overlaps with any other read or write. The basic idea is to use an interval tree to store the ranges. At a given time, each range may hold a write lock or multiple read locks. Insertion and deletion ranges from the tree must done carefully to prevent any race conditions. The code uses an implementation of interval tree from here.
SemaphoreInterval.java
package intervallock;
import java.util.ArrayList;
import java.util.concurrent.Semaphore;
import datastructures.Interval;
public class SemaphoreInterval implements Interval {
private ArrayList<Semaphore> semaphores;
private int start;
private int end;
private int mode;
public SemaphoreInterval(int start, int end, int mode) {
this.semaphores = new ArrayList<>(1);
this.start = start;
this.end = end;
this.mode = mode;
}
public int getMode() {
return mode;
}
public ArrayList<Semaphore> getSemaphores() {
return semaphores;
}
#Override
public int start() {
return start;
}
#Override
public int end() {
return end+1;
}
}
IntervalLock.java
package intervallock;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import java.util.concurrent.Semaphore;
import datastructures.IntervalTree;
/**
* An implementation of Interval Lock
*
* #author Hieu
*
*/
public class IntervalLock {
public IntervalTree<SemaphoreInterval> tree;
private Semaphore treeLock;
private int maxPermits;
public static final int READ = 0;
public static final int WRITE = 1;
public IntervalLock(int maxPermits) {
tree = new IntervalTree<>();
treeLock = new Semaphore(1);
this.maxPermits = maxPermits;
}
/**
* Acquire a lock on range [start, end] with the specified mode.
* #param start The start of the interval
* #param end The end of the interval
* #param mode The mode, either IntervalLock.READ or IntervalLock.WRITE.
* #return The SemaphoreInterval instance.
*/
public SemaphoreInterval acquire(int start, int end, int mode) {
SemaphoreInterval si = new SemaphoreInterval(start, end, mode);
Set<Semaphore> semaphores = new HashSet<>();
try {
treeLock.acquire();
} catch (InterruptedException e) {
e.printStackTrace(System.out);
System.exit(-1);
}
Iterator<SemaphoreInterval> overlappers = tree.overlappers(si);
while (overlappers.hasNext()) {
SemaphoreInterval i = overlappers.next();
if (i == null) {
System.out.println("Error: Getting a null interval");
System.exit(-1);
}
if (i.compareTo(si) == 0)
continue;
switch (i.getMode()) {
case READ:
if (mode == WRITE)
semaphores.addAll(i.getSemaphores());
break;
case WRITE:
semaphores.addAll(i.getSemaphores());
break;
}
}
SemaphoreInterval result = tree.insert(si);
if (result != null)
si = result;
si.getSemaphores().add(new Semaphore(0));
treeLock.release();
for (Semaphore s: semaphores) {
try {
s.acquire();
} catch (InterruptedException e) {
e.printStackTrace(System.out);
System.exit(-1);
}
}
return si;
}
/**
* Release the range lock hold on specified SemaphoreInterval.
* #param si The semaphore interval returned by the acquire().
*/
public void release(SemaphoreInterval si) {
try {
treeLock.acquire();
} catch (InterruptedException e) {
e.printStackTrace(System.out);
System.exit(-1);
}
if (si.getSemaphores() == null || si.getSemaphores().size() == 0) {
System.out.println("Error: Empty array of semaphores");
treeLock.release();
return;
}
Semaphore sm = si.getSemaphores().remove(0);
if (si.getSemaphores().size() == 0) {
boolean success = tree.delete(si);
if (!success) {
System.out.println("Error: Cannot remove an interval.");
treeLock.release();
return;
}
}
treeLock.release();
sm.release(maxPermits);
}
}
Usage
// init the lock with the max permits per semaphore (should be the max number of threads)
public static final IntervalLock lock = new IntervalLock(1000);
// ...
// acquire the lock on range [a, b] (inclusive), with mode (either IntervalLock.READ or IntervalLock.WRITE)
// it returns a SemaphoreInterval instance
SemaphoreInterval si = lock.acquire(a, b, mode);
// ...
// release the acquired lock
lock.release(si);

Thread Pool per key in Java

Suppose that you have a grid G of n x m cells, where n and m are huge.
Further, suppose that we have numerous tasks, where each task belong to a single cell in G, and should be executed in parallel (in a thread pool or other resource pool).
However, task belonging to the same cell must be done serially, that is, it should wait that previous task in the same cell to be done.
How can I solve this issue?
I've search and used several thread pools (Executors, Thread), but no luck.
Minimum Working Example
import java.util.Random;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
public class MWE {
public static void main(String[] args) {
ExecutorService threadPool = Executors.newFixedThreadPool(16);
Random r = new Random();
for (int i = 0; i < 10000; i++) {
int nx = r.nextInt(10);
int ny = r.nextInt(10);
Runnable task = new Runnable() {
public void run() {
try {
System.out.println("Task is running");
Thread.sleep(1000);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
};
threadPool.submit(new Thread(task)); // Should use nx,ny here somehow
}
}
}
You can create a list of n Executors.newFixedThreadPool(1).
Then submit to the corresponding thread by using a hash function.
Ex. threadPool[key%n].submit(new Thread(task)).
A callback mechanism with a synchronized block could work efficiently here.
I have previously answered a similar question here.
There are some limitations (see the linked answer), but it is simple enough to keep track of what is going on (good maintainability).
I have adapted the source code and made it more efficient for your case where most tasks will be executed in parallel
(since n and m are huge), but on occasion must be serial (when a task is for the same point in the grid G).
import java.util.*;
import java.util.concurrent.*;
import java.util.concurrent.locks.ReentrantLock;
// Adapted from https://stackoverflow.com/a/33113200/3080094
public class GridTaskExecutor {
public static void main(String[] args) {
final int maxTasks = 10_000;
final CountDownLatch tasksDone = new CountDownLatch(maxTasks);
ThreadPoolExecutor executor = (ThreadPoolExecutor) Executors.newFixedThreadPool(16);
try {
GridTaskExecutor gte = new GridTaskExecutor(executor);
Random r = new Random();
for (int i = 0; i < maxTasks; i++) {
final int nx = r.nextInt(10);
final int ny = r.nextInt(10);
Runnable task = new Runnable() {
public void run() {
try {
// System.out.println("Task " + nx + " / " + ny + " is running");
Thread.sleep(1);
} catch (Exception e) {
e.printStackTrace();
} finally {
tasksDone.countDown();
}
}
};
gte.addTask(task, nx, ny);
}
tasksDone.await();
System.out.println("All tasks done, task points remaining: " + gte.size());
} catch (Exception e) {
e.printStackTrace();
} finally {
executor.shutdownNow();
}
}
private final Executor executor;
private final Map<Long, List<CallbackPointTask>> tasksWaiting = new HashMap<>();
// make lock fair so that adding and removing tasks is balanced.
private final ReentrantLock lock = new ReentrantLock(true);
public GridTaskExecutor(Executor executor) {
this.executor = executor;
}
public void addTask(Runnable r, int x, int y) {
Long point = toPoint(x, y);
CallbackPointTask pr = new CallbackPointTask(point, r);
boolean runNow = false;
lock.lock();
try {
List<CallbackPointTask> pointTasks = tasksWaiting.get(point);
if (pointTasks == null) {
if (tasksWaiting.containsKey(point)) {
pointTasks = new LinkedList<CallbackPointTask>();
pointTasks.add(pr);
tasksWaiting.put(point, pointTasks);
} else {
tasksWaiting.put(point, null);
runNow = true;
}
} else {
pointTasks.add(pr);
}
} finally {
lock.unlock();
}
if (runNow) {
executor.execute(pr);
}
}
private void taskCompleted(Long point) {
lock.lock();
try {
List<CallbackPointTask> pointTasks = tasksWaiting.get(point);
if (pointTasks == null || pointTasks.isEmpty()) {
tasksWaiting.remove(point);
} else {
System.out.println(Arrays.toString(fromPoint(point)) + " executing task " + pointTasks.size());
executor.execute(pointTasks.remove(0));
}
} finally {
lock.unlock();
}
}
// for a general callback-task, see https://stackoverflow.com/a/826283/3080094
private class CallbackPointTask implements Runnable {
final Long point;
final Runnable original;
CallbackPointTask(Long point, Runnable original) {
this.point = point;
this.original = original;
}
#Override
public void run() {
try {
original.run();
} finally {
taskCompleted(point);
}
}
}
/** Amount of points with tasks. */
public int size() {
int l = 0;
lock.lock();
try {
l = tasksWaiting.size();
} finally {
lock.unlock();
}
return l;
}
// https://stackoverflow.com/a/12772968/3080094
public static long toPoint(int x, int y) {
return (((long)x) << 32) | (y & 0xffffffffL);
}
public static int[] fromPoint(long p) {
return new int[] {(int)(p >> 32), (int)p };
}
}
This is were systems like Akka in java world make sense.If both X and Y are large, you may want to look at processing them using a message passing mechanism rather than bunch them up in a huge chain of callbacks and futures. One actor has the list of tasks to be done and is handed a cell and the actor would eventually compute the result and persist it. If something fails in the intermediate step, it's not end of world.
If I get you right, you want to execute X tasks (X is very big) in Y queues (Y is much smaller than X).
Java 8 has CompletableFuture class, which represents an (asynchronous) computation. Basically, it's Java's implementation of Promise. Here is how you can organize a chain of computations (generic types omitted):
// start the queue with a "completed" task
CompletableFuture queue = CompletableFuture.completedFuture(null);
// append a first task to the queue
queue = queue.thenRunAsync(() -> System.out.println("first task running"));
// append a second task to the queue
queue = queue.thenRunAsync(() -> System.out.println("second task running"));
// ... and so on
When you use thenRunAsync(Runnable), tasks will be executed using a thread pool (there are other possibilites - see API docs). You can also supply your own thread pool as well.
You can create Y of such chains (possibly keeping references to them in some table).
This library should do the job: https://github.com/jano7/executor
int maxTasks = 16;
ExecutorService threadPool = Executors.newFixedThreadPool(maxTasks);
KeySequentialBoundedExecutor executor = new KeySequentialBoundedExecutor(maxTasks, threadPool);
Random r = new Random();
for (int i = 0; i < 10000; i++) {
int nx = r.nextInt(10);
int ny = r.nextInt(10);
Runnable task = new Runnable() {
public void run() {
try {
System.out.println("Task is running");
Thread.sleep(1000);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
};
executor.execute(new KeyRunnable<>((ny * 10) + nx, task));
}
The Scala example given below demonstrates how keys in a map can be executed in parallel and values of a key are executed in serial. Change it to Java syntax if you want to try it in Java (Scala uses JVM libraries). Basically chain the tasks future to have them execute sequentially.
import java.util.concurrent.{CompletableFuture, ExecutorService, Executors, Future, TimeUnit}
import scala.collection.concurrent.TrieMap
import scala.collection.mutable.ListBuffer
import scala.util.Random
/**
* For a given Key-Value pair with tasks as values, demonstrates sequential execution of tasks
* within a key and parallel execution across keys.
*/
object AsyncThreads {
val cachedPool: ExecutorService = Executors.newCachedThreadPool
var initialData: Map[String, ListBuffer[Int]] = Map()
var processedData: TrieMap[String, ListBuffer[Int]] = TrieMap()
var runningTasks: TrieMap[String, CompletableFuture[Void]] = TrieMap()
/**
* synchronous execution across keys and values
*/
def processSync(key: String, value: Int, initialSleep: Long) = {
Thread.sleep(initialSleep)
if (key.equals("key_0")) {
println(s"${Thread.currentThread().getName} -> sleep: $initialSleep. Inserting key_0 -> $value")
}
processedData.getOrElseUpdate(key, new ListBuffer[Int]).addOne(value)
}
/**
* parallel execution across keys
*/
def processASync(key: String, value: Int, initialSleep: Long) = {
val task: Runnable = () => {
processSync(key, value, initialSleep)
}
// 1. Chain the futures for sequential execution within a key
val prevFuture = runningTasks.getOrElseUpdate(key, CompletableFuture.completedFuture(null))
runningTasks.put(key, prevFuture.thenRunAsync(task, cachedPool))
// 2. Parallel execution across keys and values
// cachedPool.submit(task)
}
def process(key: String, value: Int, initialSleep: Int): Unit = {
//processSync(key, value, initialSleep) // synchronous execution across keys and values
processASync(key, value, initialSleep) // parallel execution across keys
}
def main(args: Array[String]): Unit = {
checkDiff()
0.to(9).map(kIndex => {
var key = "key_" + kIndex
var values = ListBuffer[Int]()
initialData += (key -> values)
1.to(10).map(vIndex => {
values += kIndex * 10 + vIndex
})
})
println(s"before data:$initialData")
initialData.foreach(entry => {
entry._2.foreach(value => {
process(entry._1, value, Random.between(0, 100))
})
})
cachedPool.awaitTermination(5, TimeUnit.SECONDS)
println(s"after data:$processedData")
println("diff: " + (initialData.toSet diff processedData.toSet).toMap)
cachedPool.shutdown()
}
def checkDiff(): Unit = {
var a1: TrieMap[String, List[Int]] = new TrieMap()
a1.put("one", List(1, 2, 3, 4, 5))
a1.put("two", List(11, 12, 13, 14, 15))
var a2: TrieMap[String, List[Int]] = new TrieMap()
a2.put("one", List(2, 1, 3, 4, 5))
a2.put("two", List(11, 12, 13, 14, 15))
println("a1: " + a1)
println("a2: " + a2)
println("check.diff: " + (a1.toSet diff a2.toSet).toMap)
}
}

Implementing a Map where keys are sets of non-overlapping ranges

I am facing a performance issue with my current implementation using List and loops. I was thinking to make some custom Map but is it possible to override the getter properly to work with following setup:
Map contains custom objects and key can be following:
case A key: "10"
calling get("10") would return matching object
case B key: "10;12;14"
calling get("10"),get("12"),get("14") would return same object
case C key: "10;20-30"
calling get("10"), get(value between 20 and 30) would return same object
Is using a Map in this kind of scenario the best way, what could be the alternatives?
Thanks.
UPDATE: Added full implementation
UPDATE 2: If you want you can use RangeMap for internal theMap as suggested in the comments.
If you key ranges don't overlap, you can create a custom container which internally stores data in TreeMap with a custom key which implements Comparable:
class MyStorage<T> {
private static final class Range implements Comparable<Range> {
private int first;
private int last;
public Range(int first_, int last_) {
first = first_;
last = last_;
}
// This heavily relies on that the ranges don't overlap
#Override public int compareTo(Range other) {
if (last < other.first)
return -1;
if (first > other.last)
return 1;
return 0;
}
}
private Map<Range, T> theMap = new TreeMap<Range, T>();
public void put(String key, T obj) {
String[] ranges = key.split(";");
for (String range : ranges) {
//System.out.println("Adding " + range);
String[] bounds = range.split("-");
//System.out.println("Bounds " + bounds.length);
int first = Integer.parseInt(bounds[0]);
if (bounds.length == 1)
theMap.put(new Range(first, first), obj);
else
theMap.put(new Range(first, Integer.parseInt(bounds[1])), obj);
}
}
public T get(String key) {
return get(Integer.parseInt(key));
}
public T get(int key) {
return theMap.get(new Range(key, key));
}
}
class Main
{
public static void main (String[] args) throws java.lang.Exception
{
MyStorage<Integer> storage = new MyStorage<Integer>();
storage.put("10;20-30", 123);
storage.put("15;31-50", 456);
System.out.println(storage.get("42"));
}
}
There is a structure called an Interval Tree that may fit your needs. Here's an implementation of it.
It allows you to attach objects to intervals rather than the usual object.
Note that this implementation does not implement the sorted indexes suggested by the original algorithm as the use case I needed it for did not require that level of speed.
/**
* #author OldCurmudgeon
* #param <T> - The type stored in the tree. Must implement IntervalTree.Interval but beyond that you can do what you like. Probably store that value in there too.
*/
public class IntervalTree<T extends IntervalTree.Interval> {
// My intervals.
private final List<T> intervals;
// My center value. All my intervals contain this center.
private final long center;
// My interval range.
private final long lBound;
private final long uBound;
// My left tree. All intervals that end below my center.
private final IntervalTree<T> left;
// My right tree. All intervals that start above my center.
private final IntervalTree<T> right;
public IntervalTree(List<T> intervals) {
if (intervals == null) {
throw new NullPointerException();
}
// Initially, my root contains all intervals.
this.intervals = intervals;
// Find my center.
center = findCenter();
/*
* Builds lefts out of all intervals that end below my center.
* Builds rights out of all intervals that start above my center.
* What remains contains all the intervals that contain my center.
*/
// Lefts contains all intervals that end below my center point.
final List<T> lefts = new ArrayList<>();
// Rights contains all intervals that start above my center point.
final List<T> rights = new ArrayList<>();
// Track my bounds while distributing.
long uB = Long.MIN_VALUE;
long lB = Long.MAX_VALUE;
for (T i : intervals) {
long start = i.getStart();
long end = i.getEnd();
if (end < center) {
// It ends below me - move it to my left.
lefts.add(i);
} else if (start > center) {
// It starts above me - move it to my right.
rights.add(i);
} else {
// One of mine.
lB = Math.min(lB, start);
uB = Math.max(uB, end);
}
}
// Remove all those not mine.
intervals.removeAll(lefts);
intervals.removeAll(rights);
// Record my bounds.
uBound = uB;
lBound = lB;
// Build the subtrees.
left = lefts.size() > 0 ? new IntervalTree<>(lefts) : null;
right = rights.size() > 0 ? new IntervalTree<>(rights) : null;
// Build my ascending and descending arrays.
/**
* #todo Build my ascending and descending arrays.
*/
}
/*
* Returns a list of all intervals containing the point.
*/
List<T> query(long point) {
// Check my range.
if (point >= lBound) {
if (point <= uBound) {
// In my range but remember, there may also be contributors from left or right.
List<T> found = new ArrayList<>();
// Gather all intersecting ones.
// Could be made faster (perhaps) by holding two sorted lists by start and end.
for (T i : intervals) {
if (i.getStart() <= point && point <= i.getEnd()) {
found.add(i);
}
}
// Gather others.
if (point < center && left != null) {
found.addAll(left.query(point));
}
if (point > center && right != null) {
found.addAll(right.query(point));
}
return found;
} else {
// To right.
return right != null ? right.query(point) : Collections.<T>emptyList();
}
} else {
// To left.
return left != null ? left.query(point) : Collections.<T>emptyList();
}
}
private long findCenter() {
//return average();
return median();
}
protected long median() {
// Choose the median of all centers. Could choose just ends etc or anything.
long[] points = new long[intervals.size()];
int x = 0;
for (T i : intervals) {
// Take the mid point.
points[x++] = (i.getStart() + i.getEnd()) / 2;
}
Arrays.sort(points);
return points[points.length / 2];
}
/*
* What an interval looks like.
*/
public interface Interval {
public long getStart();
public long getEnd();
}
/*
* A simple implemementation of an interval.
*/
public static class SimpleInterval implements Interval {
private final long start;
private final long end;
public SimpleInterval(long start, long end) {
this.start = start;
this.end = end;
}
#Override
public long getStart() {
return start;
}
#Override
public long getEnd() {
return end;
}
#Override
public String toString() {
return "{" + start + "," + end + "}";
}
}
public static void main(String[] args) {
// Make some test data.
final int testEntries = 1 * 100;
ArrayList<SimpleInterval> intervals = new ArrayList<>();
Random random = new Random();
for (int i = 0; i < testEntries; i++) {
// Make a random interval.
long start = random.nextLong();
intervals.add(new SimpleInterval(start, start + 1000));
}
ProcessTimer timer = new ProcessTimer();
IntervalTree<SimpleInterval> tree = new IntervalTree<>(intervals);
System.out.println("Took " + timer);
}
}

Categories

Resources