I search for a way to correctly employ Publishers from Project Reactor without producing useless GC pressure by instantiating the whole pipeline on each call.
In our code a typical handle function answering inter service HTTP requests looks like so:
final List<Function<ChangeEvent, Mono<Void>>> triggerOtherMicroservices;
#PostMapping("/handle")
public Mono<Void> handle(#RequestBody ChangeEvent changeEvent) {
return Mono
.defer(() -> someService.callToAnotherMicroServiceToFetchData(changeEvent))
.subscribeOn(Schedulers.parallel())
.map(this::mapping)
.flatMap(data -> databaseService.save(data))
.thenMany(Flux.fromIterable(triggerOtherMicroservices).flatMap(t -> t.apply(changeEvent)))
.then();
}
If I understand correctly this means, on each invocation of handle the whole pipeline (which normally has huge stacktraces) needs to be instantiated (and thus collected later).
My question is: Can I somehow "prepare" the whole flow once and reuse it later?
I was thinking about something like Mono.create( ... ) ..... Or, am I completely wrong and there is no need to think about optimization here?
EDIT:
Thinking further I could do:
final List<Function<ChangeEvent, Mono<Void>>> triggerOtherMicroservices;
final Mono<Void> mono = Mono
.defer(() -> Mono
.subscriberContext()
.map(context -> context.get("event"))
.flatMap(event -> someService.callToAnotherMicroServiceToFetchData(event))
)
.subscribeOn(Schedulers.parallel())
.flatMap(data -> databaseService.save(data))
.thenMany(Mono
.subscriberContext()
.map(context -> context.get("event"))
.flatMap(event -> Flux
.fromIterable(triggerOtherMicroservices)
.flatMap(t -> t.apply(event)))
)
.then();
public Mono<Void> handle(#Validated ChangeEvent changeEvent) throws NoSuchElementException {
return mono.subscriberContext(context -> context.put("event", changeEvent));
}
Anyway, I doubt this is what subscriberContext is meant for.
Note: There are many JVM implementations and this answer doesn't claim to have tested all of them, nor to be a general statement for all possible situations.
According to https://www.bettercodebytes.com/the-cost-of-object-creation-in-java-including-garbage-collection/, it is possible that there is no overhead of object creation when objects only live within a method. This is, since the JIT doesn't actually instantiate the object but rather executes the contained methods directly.
Hence, there is also no garbage collection required later on.
A test of this combined with the question can be implemented like so:
Controller:
final List<Function<Event, Mono<Void>>> triggerOtherMicroservices = Arrays.asList(
event -> Mono.empty(),
event -> Mono.empty(),
event -> Mono.empty()
);
final Mono<Void> mono = Mono
.defer(() -> Mono
.subscriberContext()
.<Event>map(context -> context.get("event"))
.flatMap(this::fetch)
)
.subscribeOn(Schedulers.parallel())
.flatMap(this::duplicate)
.flatMap(this::duplicate)
.flatMap(this::duplicate)
.flatMap(this::duplicate)
.thenMany(Mono
.subscriberContext()
.<Event>map(context -> context.get("event"))
.flatMapMany(event -> Flux
.fromIterable(triggerOtherMicroservices)
.flatMap(t -> t.apply(event))
)
)
.then();
#PostMapping("/event-prepared")
public Mono<Void> handle(#RequestBody #Validated Event event) throws NoSuchElementException {
return mono.subscriberContext(context -> context.put("event", event));
}
#PostMapping("/event-on-the-fly")
public Mono<Void> handleOld(#RequestBody #Validated Event event) throws NoSuchElementException {
return Mono
.defer(() -> fetch(event))
.subscribeOn(Schedulers.parallel())
.flatMap(this::duplicate)
.flatMap(this::duplicate)
.flatMap(this::duplicate)
.flatMap(this::duplicate)
.thenMany(Flux.fromIterable(triggerOtherMicroservices).flatMap(t -> t.apply(event)))
.then();
}
private Mono<Data> fetch(Event event) {
return Mono.just(new Data(event.timestamp));
}
private Mono<Data> duplicate(Data data) {
return Mono.just(new Data(data.a * 2));
}
Data:
long a;
public Data(long a) {
this.a = a;
}
#Override
public String toString() {
return "Data{" +
"a=" + a +
'}';
}
Event:
#JsonSerialize(using = EventSerializer.class)
public class Event {
UUID source;
long timestamp;
#JsonCreator
public Event(#JsonProperty("source") UUID source, #JsonProperty("timestamp") long timestamp) {
this.source = source;
this.timestamp = timestamp;
}
#Override
public String toString() {
return "Event{" +
"source=" + source +
", timestamp=" + timestamp +
'}';
}
}
EventSerializer:
public class EventSerializer extends StdSerializer<Event> {
public EventSerializer() {
this(null);
}
public EventSerializer(Class<Event> t) {
super(t);
}
#Override
public void serialize(Event value, JsonGenerator jsonGenerator, SerializerProvider provider) throws IOException {
jsonGenerator.writeStartObject();
jsonGenerator.writeStringField("source", value.source.toString());
jsonGenerator.writeNumberField("timestamp", value.timestamp);
jsonGenerator.writeEndObject();
}
}
and finally the test itself:
#SpringBootTest
#AutoConfigureWebTestClient
class MonoAssemblyTimeTest {
#Autowired
private WebTestClient webTestClient;
final int number_of_requests = 500000;
#Test
void measureExecutionTime() throws IOException {
measureExecutionTime("on-the-fly");
measureExecutionTime("prepared");
}
private void measureExecutionTime(String testCase) throws IOException {
warmUp("/event-" + testCase);
final GCStatisticsDifferential gcStatistics = new GCStatisticsDifferential();
long[] duration = benchmark("/event-" + testCase);
StringBuilder output = new StringBuilder();
int plotPointsInterval = (int) Math.ceil((float) number_of_requests / 1000);
for (int i = 0; i < number_of_requests; i++) {
if (i % plotPointsInterval == 0) {
output.append(String.format("%d , %d %n", i, duration[i]));
}
}
Files.writeString(Paths.get(testCase + ".txt"), output.toString());
long totalDuration = LongStream.of(duration).sum();
System.out.println(testCase + " duration: " + totalDuration / 1000000 + " ms.");
System.out.println(testCase + " average: " + totalDuration / number_of_requests + " ns.");
System.out.println(testCase + ": " + gcStatistics.get());
}
private void warmUp(String path) {
UUID source = UUID.randomUUID();
IntStream.range(0, number_of_requests).forEach(i -> call(new Event(source, i), path));
System.out.println("done with warm-up for path: " + path);
}
private long[] benchmark(String path) {
long[] duration = new long[number_of_requests];
UUID source = UUID.randomUUID();
IntStream.range(0, number_of_requests).forEach(i -> {
long start = System.nanoTime();
call(new Event(source, i), path).returnResult().getResponseBody();
duration[i] = System.nanoTime() - start;
});
System.out.println("done with benchmark for path: " + path);
return duration;
}
private WebTestClient.BodySpec<Void, ?> call(Event event, String path) {
return webTestClient
.post()
.uri(path)
.contentType(MediaType.APPLICATION_JSON)
.bodyValue(event)
.exchange()
.expectBody(Void.class);
}
private static class GCStatisticsDifferential extends GCStatistics {
GCStatistics old = new GCStatistics(0, 0);
public GCStatisticsDifferential() {
super(0, 0);
calculateIncrementalGCStats();
}
public GCStatistics get() {
calculateIncrementalGCStats();
return this;
}
private void calculateIncrementalGCStats() {
long timeNew = 0;
long countNew = 0;
for (GarbageCollectorMXBean gc : ManagementFactory.getGarbageCollectorMXBeans()) {
long count = gc.getCollectionCount();
if (count >= 0) {
countNew += count;
}
long time = gc.getCollectionTime();
if (time >= 0) {
timeNew += time;
}
}
time = timeNew - old.time;
count = countNew - old.count;
old = new GCStatistics(timeNew, countNew);
}
}
private static class GCStatistics {
long count, time;
public GCStatistics(long count, long time) {
this.count = count;
this.time = time;
}
#Override
public String toString() {
return "GCStatistics{" +
"count=" + count +
", time=" + time +
'}';
}
}
}
The results are not always the same, but the "on-the-fly" method constantly outperforms the "prepared" method. Plus, the "on-the-fly" method has way less garbage collections.
A typical result looks like:
done with warm-up for path: /event-on-the-fly
done with benchmark for path: /event-on-the-fly
on-the-fly duration: 42679 ms.
on-the-fly average: 85358 ns.
on-the-fly: GCStatistics{count=29, time=128}
done with warm-up for path: /event-prepared
done with benchmark for path: /event-prepared
prepared duration: 44678 ms.
prepared average: 89357 ns.
prepared: GCStatistics{count=86, time=67}
This result were done on a MacBook Pro (16-inch, 2019), 2,4 GHz 8-Core Intel Core i9, 64 GB 2667 MHz DDR4.
Note: Comments, better answers, or ... are still very welcome.
First, take some measurment to decide if GC pressure is really high and is worth to bother.
Then, use some object-oriented library which allows you to explicitely create pipline objects, and reuse it for multiple requests. Look at Vert.x, for example (I never use it). My library Df4j allows to create and reuse computational graph of any topology, not only linear pipelines , but it does not contain modules to perform HTTP requests. However, Df4j implements reactive streams protocol and so can be connected to any compatible network library.
Related
I have written a logic using spring reactor library to get all operators and then all devices for each operator (paginated) in async mode.
Created a flux to get all operator and then subscribing to it.
final Flux<List<OperatorDetails>> operatorDetailsFlux = reactiveResourceProvider.getOperators();
operatorDetailsFlux
.subscribe(operatorDetailsList -> {
for (final OperatorDetails operatorDetails : operatorDetailsList) {
getAndCacheDevicesForOperator(operatorDetails.getId());
}
});
Now, for each operator I'm fetching the devices which requires multiple subscriptions to get device mono which gets all pages async by subscribing to the MONO.
private void getAndCacheDevicesForOperator(final int operatorId) {
Mono<DeviceListResponseEntity> deviceListResponseEntityMono = reactiveResourceProvider.getConnectedDeviceMonoWithRetryAndErrorSpec(
operatorId, 0);
deviceListResponseEntityMono.subscribe(deviceListResponseEntity -> {
final PaginatedResponseEntity PaginatedResponseEntity = deviceListResponseEntity.getData();
final long totalDevicesInOperator = PaginatedResponseEntity.getTotalCount();
int deviceCount = PaginatedResponseEntity.getCount();
while (deviceCount < totalDevicesInOperator) {
final Mono<DeviceListResponseEntity> deviceListResponseEntityPageMono = reactiveResourceProvider.getConnectedDeviceMonoWithRetryAndErrorSpec(
operatorId, deviceCount);
deviceListResponseEntityPageMono.subscribe(deviceListResponseEntityPage -> {
final List<DeviceDetails> deviceDetailsList = deviceListResponseEntityPage.getData()
.getItems();
// work on devices
});
deviceCount += DEVICE_PAGE_SIZE;
}
});
}
This code works fine. But my question is it a good idea to subscribe to mono from inside subscribe?
I broke it down to two flows 1st getting all operators and then getting all devices for each operator.
For pagination I'm using Flux.expand to extract all pages.
public Flux<OperatorDetails> getAllOperators() {
return getOperatorsMonoWithRetryAndErrorSpec(0)
.expand(paginatedResponse -> {
final PaginatedEntity operatorDetailsPage = paginatedResponse.getData();
if (morePagesAvailable(operatorDetailsPage) {
return getOperatorsMonoWithRetryAndErrorSpec(operatorDetailsPage.getOffset() + operatorDetailsPage.getCount());
}
return Mono.empty();
})
.flatMap(responseEntity -> fromIterable(responseEntity.getData().getItems()))
.subscribeOn(apiScheduler);
}
public Flux<Device> getAllDevices(final int opId, final int offset) {
return getConnectedDeviceMonoWithRetryAndErrorSpec(opId, offset)
.expand(paginatedResponse -> {
final PaginatedEntity deviceDetailsPage = paginatedResponse.getData();
if (morePagesAvailabile(deviceDetailsPage)) {
return getConnectedDeviceMonoWithRetryAndErrorSpec(opId,
deviceDetailsPage.getOffset() + deviceDetailsPage.getCount());
}
return Mono.empty();
})
.flatMap(responseEntity -> fromIterable(responseEntity.getData().getItems()))
.subscribeOn(apiScheduler);
}
Finally I'm creating a pipeline and subscribing to it to trigger the pipeline.
operatorDetailsFlux
.flatMap(operatorDetails -> {
return reactiveResourceProvider.getAllDevices(operatorDetails.getId(), 0);
})
.subscribe(deviceDetails -> {
// act on devices
});
In RxJava and Reactor there is this notion of virtual time to tests operators that are dependent of time. I cant figure out how to do this in Flink. For example I have put together the following example where I would like to play around with late arriving events to understand how they are handled. However im not able to understand how such a test would look? Is there a way to combine Flink and Reactor to make the tests better?
public class PlayWithFlink {
public static void main(String[] args) throws Exception {
final OutputTag<MyEvent> lateOutputTag = new OutputTag<MyEvent>("late-data"){};
// TODO understand how BoundedOutOfOrderness is related to allowedLateness
BoundedOutOfOrdernessTimestampExtractor<MyEvent> eventTimeFunction = new BoundedOutOfOrdernessTimestampExtractor<MyEvent>(Time.seconds(10)) {
#Override
public long extractTimestamp(MyEvent element) {
return element.getEventTime();
}
};
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
DataStream<MyEvent> events = env.fromCollection(MyEvent.examples())
.assignTimestampsAndWatermarks(eventTimeFunction);
AggregateFunction<MyEvent, MyAggregate, MyAggregate> aggregateFn = new AggregateFunction<MyEvent, MyAggregate, MyAggregate>() {
#Override
public MyAggregate createAccumulator() {
return new MyAggregate();
}
#Override
public MyAggregate add(MyEvent myEvent, MyAggregate myAggregate) {
if (myEvent.getTracingId().equals("trace1")) {
myAggregate.getTrace1().add(myEvent);
return myAggregate;
}
myAggregate.getTrace2().add(myEvent);
return myAggregate;
}
#Override
public MyAggregate getResult(MyAggregate myAggregate) {
return myAggregate;
}
#Override
public MyAggregate merge(MyAggregate myAggregate, MyAggregate acc1) {
acc1.getTrace1().addAll(myAggregate.getTrace1());
acc1.getTrace2().addAll(myAggregate.getTrace2());
return acc1;
}
};
KeySelector<MyEvent, String> keyFn = new KeySelector<MyEvent, String>() {
#Override
public String getKey(MyEvent myEvent) throws Exception {
return myEvent.getTracingId();
}
};
SingleOutputStreamOperator<MyAggregate> result = events
.keyBy(keyFn)
.window(EventTimeSessionWindows.withGap(Time.seconds(10)))
.allowedLateness(Time.seconds(20))
.sideOutputLateData(lateOutputTag)
.aggregate(aggregateFn);
DataStream lateStream = result.getSideOutput(lateOutputTag);
result.print("SessionData");
lateStream.print("LateData");
env.execute();
}
}
class MyEvent {
private final String tracingId;
private final Integer count;
private final long eventTime;
public MyEvent(String tracingId, Integer count, long eventTime) {
this.tracingId = tracingId;
this.count = count;
this.eventTime = eventTime;
}
public String getTracingId() {
return tracingId;
}
public Integer getCount() {
return count;
}
public long getEventTime() {
return eventTime;
}
public static List<MyEvent> examples() {
long now = System.currentTimeMillis();
MyEvent e1 = new MyEvent("trace1", 1, now);
MyEvent e2 = new MyEvent("trace2", 1, now);
MyEvent e3 = new MyEvent("trace2", 1, now - 1000);
MyEvent e4 = new MyEvent("trace1", 1, now - 200);
MyEvent e5 = new MyEvent("trace1", 1, now - 50000);
return Arrays.asList(e1,e2,e3,e4, e5);
}
#Override
public String toString() {
return "MyEvent{" +
"tracingId='" + tracingId + '\'' +
", count=" + count +
", eventTime=" + eventTime +
'}';
}
}
class MyAggregate {
private final List<MyEvent> trace1 = new ArrayList<>();
private final List<MyEvent> trace2 = new ArrayList<>();
public List<MyEvent> getTrace1() {
return trace1;
}
public List<MyEvent> getTrace2() {
return trace2;
}
#Override
public String toString() {
return "MyAggregate{" +
"trace1=" + trace1 +
", trace2=" + trace2 +
'}';
}
}
The output of running this is:
SessionData:1> MyAggregate{trace1=[], trace2=[MyEvent{tracingId='trace2', count=1, eventTime=1551034666081}, MyEvent{tracingId='trace2', count=1, eventTime=1551034665081}]}
SessionData:3> MyAggregate{trace1=[MyEvent{tracingId='trace1', count=1, eventTime=1551034166081}], trace2=[]}
SessionData:3> MyAggregate{trace1=[MyEvent{tracingId='trace1', count=1, eventTime=1551034666081}, MyEvent{tracingId='trace1', count=1, eventTime=1551034665881}], trace2=[]}
However I would expect to see the lateStream trigger for the e5 event that should be 50seconds before the first event triggers.
If you modify your watermark assigner to be like this
AssignerWithPunctuatedWatermarks eventTimeFunction = new AssignerWithPunctuatedWatermarks<MyEvent>() {
long maxTs = 0;
#Override
public long extractTimestamp(MyEvent myEvent, long l) {
long ts = myEvent.getEventTime();
if (ts > maxTs) {
maxTs = ts;
}
return ts;
}
#Override
public Watermark checkAndGetNextWatermark(MyEvent event, long extractedTimestamp) {
return new Watermark(maxTs - 10000);
}
};
then you will get the results you expect. I'm not recommending this -- just using it to illustrate what's going on.
What's happening here is that a BoundedOutOfOrdernessTimestampExtractor is a periodic watermark generator that will only insert a watermark into the stream every 200 msec (by default). Because your job completes long before then, the only watermark your job is experiencing is the one the Flink injects at the end of every finite stream (with value MAX_WATERMARK). Lateness is relative to watermarks, and the event that you expected to be late is managing to arrive before that watermark.
By switching to punctuated watermarks you can force watermarking to occur more often, or more precisely at specific points in the stream. This is generally unnecessary (and too frequent watermarking causes overhead), but is helpful when you want to have strong control over the sequencing of watermarks.
As for how to write tests, you might take a look at the test harnesses used in Flink's own tests, or at flink-spector.
Update:
The time interval associated with the BoundedOutOfOrdernessTimestampExtractor is a specification of how out-of-order the stream is expected to be. Events that arrive within this bound are not considered late, and event time timers won't fire until this delay has elapsed, thereby giving time for out-of-order events to arrive. allowedLateness only applies to the window API, and describes for how long past the normal window firing time the framework keeps window state so that events can still be added to a window and cause late firings. After this additional interval, window state is cleared and subsequent events are sent to the side output (if configured).
So when you use BoundedOutOfOrdernessTimestampExtractor<MyEvent>(Time.seconds(10)) you are not saying "wait 10 seconds after every event in case earlier events might still arrive". But you are saying that your events should be at most 10 seconds out of order. So if you are processing a live, real-time event stream, this means you will wait for at most 10 seconds in case earlier events arrive. (And if you are processing historic data, then you may be able to process 10 seconds of data in 1 second, or not -- knowing you will wait for n seconds of event time to pass says nothing about how long it will actually take.)
For more on this topic, see Event Time and Watermarks.
I want to make parallelism each parent and child entities, in a process which must be return quickly childEntities. So I couldn't decide clearly, which way is suitable for this process. Because in that parallel threads also calls http call and springdataRepository's save method one time(I will manage thread size because of JDBC connection pool size).
By the way, I have just tried RxJava-2 library yet.
I expected that -> If a parallel flow process throws an exception, onErrorResumeNextmethod (or near something) must be go on and complete all process after exception. But it suspends the flow completely.
So what I need -> Completely Non/Blocking parallel flows, if one of throws exception, just catch it and then continue the rest of the parallel process.
Any ideas ? Any other solution ideas is acceptable.(Like manual thread management)
That is what I tried, but not working as expected.
package com.mypackage;
import io.reactivex.Flowable;
import io.reactivex.schedulers.Schedulers;
import lombok.extern.slf4j.Slf4j;
import java.util.ArrayList;
import java.util.List;
#Slf4j
public class TestApp {
public static void main(String[] args) {
long start = System.currentTimeMillis();
List<String> createdParentEntities = new ArrayList<>();
List<String> erroredResponses = new ArrayList<>();
List<String> childEntities = new ArrayList<>();
Flowable.range(1, 100) // 100: is not fixed normalle
.parallel(100) // It will be changed according to size
.runOn(Schedulers.io())
.map(integer -> createParentEntity(String.valueOf(integer)))
.sequential()
.onErrorResumeNext(t -> {
System.out.println(t.getMessage());
if (t instanceof Exception) {
erroredResponses.add(t.getMessage());
return Flowable.empty();
} else {
return Flowable.error(t);
}
})
.blockingSubscribe(createdParentEntities::add);
if (!createdParentEntities.isEmpty()) {
Flowable.fromIterable(createdParentEntities)
.parallel(createdParentEntities.size())
.runOn(Schedulers.io())
.doOnNext(TestApp::createChildEntity)
.sequential()
.blockingSubscribe(childEntities::add);
}
System.out.println("====================");
long time = System.currentTimeMillis() - start;
log.info("Total Time : " + time);
log.info("TOTAL CREATED ENTITIES : " + createdParentEntities.size());
log.info("CREATED ENTITIES " + createdParentEntities.toString());
log.info("ERRORED RESPONSES " + erroredResponses.toString());
log.info("TOTAL ENTITIES : " + childEntities.size());
}
public static String createParentEntity(String id) throws Exception {
Thread.sleep(1000); // Simulated for creation call
if (id.equals("35") || id.equals("75")) {
throw new Exception("ENTITIY SAVE ERROR " + id);
}
log.info("Parent entity saved : " + id);
return id;
}
public static String createChildEntity(String parentId) throws Exception {
Thread.sleep(1000);// Simulated for creation call
log.info("Incoming entity: " + parentId);
return "Child Entity: " + parentId + " parentId";
}
}
I am writing a controller, that I need to make it asynchronous. How can I deal with a list of ListenableFuture? Because I have a list of URLs that I need to send GET request one by one, what is the best solution for it?
#RequestMapping(value = "/repositories", method = RequestMethod.GET)
private void getUsername(#RequestParam(value = "username") String username) {
System.out.println(username);
List<ListenableFuture> futureList = githubRestAsync.getRepositoryLanguages(username);
System.out.println(futureList.size());
}
In the service I use List<ListanbleFuture> which seems does not work, since it is asynchronous, in the controller method I cannot have the size of futureList to run a for loop on it for the callbacks.
public List<ListenableFuture> getRepositoryLanguages(String username){
return getRepositoryLanguages(username, getUserRepositoriesFuture(username));
}
private ListenableFuture getUserRepositoriesFuture(String username) throws HttpClientErrorException {
HttpEntity entity = new HttpEntity(httpHeaders);
ListenableFuture future = restTemplate.exchange(githubUsersUrl + username + "/repos", HttpMethod.GET, entity, String.class);
return future;
}
private List<ListenableFuture> getRepositoryLanguages(final String username, ListenableFuture<ResponseEntity<String>> future) {
final List<ListenableFuture> futures = new ArrayList<>();
future.addCallback(new ListenableFutureCallback<ResponseEntity<String>>() {
#Override
public void onSuccess(ResponseEntity<String> response) {
ObjectMapper mapper = new ObjectMapper();
try {
repositories = mapper.readValue(response.getBody(), new TypeReference<List<Repositories>>() {
});
HttpEntity entity = new HttpEntity(httpHeaders);
System.out.println("Repo size: " + repositories.size());
for (int i = 0; i < repositories.size(); i++) {
futures.add(restTemplate.exchange(githubReposUrl + username + "/" + repositories.get(i).getName() + "/languages", HttpMethod.GET, entity, String.class));
}
} catch (IOException e) {
e.printStackTrace();
}
}
#Override
public void onFailure(Throwable throwable) {
System.out.println("FAILURE in getRepositoryLanguages: " + throwable.getMessage());
}
});
return futures;
}
Should I use something like ListenableFuture<List> instead of List<ListenableFuture> ?
It seems like you have a List<ListenableFuture<Result>>, but you want a ListenableFuture<List<Result>>, so you can take one action when all of the futures are complete.
public static <T> ListenableFuture<List<T>> allOf(final List<? extends ListenableFuture<? extends T>> futures) {
// we will return this ListenableFuture, and modify it from within callbacks on each input future
final SettableListenableFuture<List<T>> groupFuture = new SettableListenableFuture<>();
// use a defensive shallow copy of the futures list, to avoid errors that could be caused by
// someone inserting/removing a future from `futures` list after they call this method
final List<? extends ListenableFuture<? extends T>> futuresCopy = new ArrayList<>(futures);
// Count the number of completed futures with an AtomicInt (to avoid race conditions)
final AtomicInteger resultCount = new AtomicInteger(0);
for (int i = 0; i < futuresCopy.size(); i++) {
futuresCopy.get(i).addCallback(new ListenableFutureCallback<T>() {
#Override
public void onSuccess(final T result) {
int thisCount = resultCount.incrementAndGet();
// if this is the last result, build the ArrayList and complete the GroupFuture
if (thisCount == futuresCopy.size()) {
List<T> resultList = new ArrayList<T>(futuresCopy.size());
try {
for (ListenableFuture<? extends T> future : futuresCopy) {
resultList.add(future.get());
}
groupFuture.set(resultList);
} catch (Exception e) {
// this should never happen, but future.get() forces us to deal with this exception.
groupFuture.setException(e);
}
}
}
#Override
public void onFailure(final Throwable throwable) {
groupFuture.setException(throwable);
// if one future fails, don't waste effort on the others
for (ListenableFuture future : futuresCopy) {
future.cancel(true);
}
}
});
}
return groupFuture;
}
Im not quite sure if you are starting a new project or working on a legacy one, but if the main requirement for you is none blocking and asynchronous rest service I would suggest you to have a look into upcoming Spring Framework 5 and it integration with reactive streams. Particularly Spring 5 will allow you to create fully reactive and asynchronous web services with little of coding.
So for example fully functional version of your code can be written with this small code snippet.
#RestController
public class ReactiveController {
#GetMapping(value = "/repositories")
public Flux<String> getUsername(#RequestParam(value = "username") String username) {
WebClient client = WebClient.create(new ReactorClientHttpConnector());
ClientRequest<Void> listRepoRequest = ClientRequest.GET("https://api.github.com/users/{username}/repos", username)
.accept(MediaType.APPLICATION_JSON).header("user-agent", "reactive.java").build();
return client.exchange(listRepoRequest).flatMap(response -> response.bodyToFlux(Repository.class)).flatMap(
repository -> client
.exchange(ClientRequest
.GET("https://api.github.com/repos/{username}/{repo}/languages", username,
repository.getName())
.accept(MediaType.APPLICATION_JSON).header("user-agent", "reactive.java").build())
.map(r -> r.bodyToMono(String.class)))
.concatMap(Flux::merge);
}
static class Repository {
private String name;
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
}
}
To run this code locally just clone the spring-boot-starter-web-reactive and copy the code into it.
The result is something like {"Java":50563,"JavaScript":11541,"CSS":1177}{"Java":50469}{"Java":130182}{"Shell":21222,"Makefile":7169,"JavaScript":1156}{"Java":30754,"Shell":7058,"JavaScript":5486,"Batchfile":5006,"HTML":4865} still you can map it to something more usable in asynchronous way :)
The math library colt (version 1.2) depends on the library EDU.oswego.cs.dl.util.concurrent (gee.cs.oswego.edu/dl/classes/EDU/oswego/cs/dl/util/concurrent/intro.html). Compiling concurrent (version 1.3.4) worked on java version 7 or previous releases. However compiling fails on java 8 (javac version 1.8). Compiler options -source 1.4 -target 1.4 do not resolve the issue.
The reason is, that java 8 introduced a new method "remove" in the interface java.util.Map: default boolean remove(Object key, Object value).
This new method clashes with the method "remove" in the library class ConcurrentHashMap.java which implements java.util.Map: protected Object remove(Object key, Object value).
Once the cause of the problem was identified I could resolve the issue by renaming the method in the library class ConcurrentHashMap.java. This was acceptable because the library method was protected only (and not public).
Are there other possibilities to ensure java 8 compatibility?
compiler options?
annotations ("#ForceOverride") ?
There are no compiler options nor annotations that will ignore conflicting method signatures.
If you (or, in this case, colt) don't use the new remove method, just compile it under Java 7. Compiling it under Java 8 won't give you any advantage.
But I actually like your solution better in this case.
Considering that this class is the groundwork of the JRE class also known as ConcurrentHashMap, there is no name clash here, as that method has exactly the intended semantic. A clash occurs, because the method is protected, a decision which has already revised long ago. I.e. when you look at the Java 5 version of the class, you’ll see that it already has the method remove(Object, Object) and it’s public. It’s also demanded by the ConcurrentMap interface and thus must be public.
So the simplest fix is not to rename it, but to change the modifier to public and adapt the return type.
But you are right in your comment, in the long term, the best solution is to migrate to the JRE version of that class as recommend by the author himself:
Note: Upon release of J2SE 5.0, this package enters maintenance mode: Only essential corrections will be released. J2SE5 package java.util.concurrent includes improved, more efficient, standardized versions of the main components in this package. Please plan to convert your applications to use them.
And this was more than a decade ago…
Migrate colt from EDU.oswego.cs.dl.util.concurrent to java.util.concurrent classes. As cited in Holger's answer, the concurrent library author recommends doing so.
Gentoo provides a patch for the colt 1.2.0 source code:
--- src/cern/colt/matrix/linalg/SmpBlas.java.orig 2015-10-07 22:23:44.969486000 +0000
+++ src/cern/colt/matrix/linalg/SmpBlas.java 2015-10-07 22:29:15.475486000 +0000
## -10,7 +10,8 ##
import cern.colt.matrix.DoubleMatrix1D;
import cern.colt.matrix.DoubleMatrix2D;
-import EDU.oswego.cs.dl.util.concurrent.FJTask;
+
+import java.util.concurrent.ForkJoinTask;
/**
Parallel implementation of the Basic Linear Algebra System for symmetric multi processing boxes.
Currently only a few algorithms are parallelised; the others are fully functional, but run in sequential mode.
## -198,7 +199,7 ##
// set up concurrent tasks
int span = width/noOfTasks;
- final FJTask[] subTasks = new FJTask[noOfTasks];
+ final ForkJoinTask[] subTasks = new ForkJoinTask[noOfTasks];
for (int i=0; i<noOfTasks; i++) {
final int offset = i*span;
if (i==noOfTasks-1) span = width - span*i; // last span may be a bit larger
## -217,24 +218,30 ##
CC = C.viewPart(offset,0,span,p);
}
- subTasks[i] = new FJTask() {
+ subTasks[i] = new ForkJoinTask() {
public void run() {
seqBlas.dgemm(transposeA,transposeB,alpha,AA,BB,beta,CC);
//System.out.println("Hello "+offset);
}
+
+ public boolean exec() { return true; }
+ public void setRawResult(Object o) {}
+ public Object getRawResult() {return null;}
};
}
// run tasks and wait for completion
- try {
- this.smp.taskGroup.invoke(
- new FJTask() {
- public void run() {
- coInvoke(subTasks);
- }
- }
- );
- } catch (InterruptedException exc) {}
+ this.smp.taskGroup.invoke(
+ new ForkJoinTask() {
+ public void run() {
+ invokeAll(subTasks);
+ }
+
+ public boolean exec() { return true; }
+ public void setRawResult(Object o) {}
+ public Object getRawResult() {return null;}
+ }
+ );
}
public void dgemv(final boolean transposeA, final double alpha, DoubleMatrix2D A, final DoubleMatrix1D x, final double beta, DoubleMatrix1D y) {
/*
## -271,7 +278,7 ##
// set up concurrent tasks
int span = width/noOfTasks;
- final FJTask[] subTasks = new FJTask[noOfTasks];
+ final ForkJoinTask[] subTasks = new ForkJoinTask[noOfTasks];
for (int i=0; i<noOfTasks; i++) {
final int offset = i*span;
if (i==noOfTasks-1) span = width - span*i; // last span may be a bit larger
## -280,24 +287,30 ##
final DoubleMatrix2D AA = A.viewPart(offset,0,span,n);
final DoubleMatrix1D yy = y.viewPart(offset,span);
- subTasks[i] = new FJTask() {
+ subTasks[i] = new ForkJoinTask() {
public void run() {
seqBlas.dgemv(transposeA,alpha,AA,x,beta,yy);
//System.out.println("Hello "+offset);
}
+
+ public boolean exec() { return true; }
+ public void setRawResult(Object o) {}
+ public Object getRawResult() {return null;}
};
}
// run tasks and wait for completion
- try {
- this.smp.taskGroup.invoke(
- new FJTask() {
- public void run() {
- coInvoke(subTasks);
- }
- }
- );
- } catch (InterruptedException exc) {}
+ this.smp.taskGroup.invoke(
+ new ForkJoinTask() {
+ public void run() {
+ invokeAll(subTasks);
+ }
+
+ public boolean exec() { return true; }
+ public void setRawResult(Object o) {}
+ public Object getRawResult() {return null;}
+ }
+ );
}
public void dger(double alpha, DoubleMatrix1D x, DoubleMatrix1D y, DoubleMatrix2D A) {
seqBlas.dger(alpha,x,y,A);
## -369,9 +382,6 ##
/**
* Prints various snapshot statistics to System.out; Simply delegates to {#link EDU.oswego.cs.dl.util.concurrent.FJTaskRunnerGroup#stats}.
*/
-public void stats() {
- if (this.smp!=null) this.smp.stats();
-}
private double xsum(DoubleMatrix2D A) {
double[] sums = run(A,true,
new Matrix2DMatrix2DFunction() {
--- src/cern/colt/matrix/linalg/Smp.java.orig 2015-10-07 21:08:19.443486000 +0000
+++ src/cern/colt/matrix/linalg/Smp.java 2015-10-07 22:28:24.722486000 +0000
## -9,12 +9,13 ##
package cern.colt.matrix.linalg;
import cern.colt.matrix.DoubleMatrix2D;
-import EDU.oswego.cs.dl.util.concurrent.FJTask;
-import EDU.oswego.cs.dl.util.concurrent.FJTaskRunnerGroup;
+import java.util.concurrent.ForkJoinTask;
+import java.util.concurrent.ForkJoinPool;
+
/*
*/
class Smp {
- protected FJTaskRunnerGroup taskGroup; // a very efficient and light weight thread pool
+ protected ForkJoinPool taskGroup; // a very efficient and light weight thread pool
protected int maxThreads;
/**
## -24,41 +25,39 ##
maxThreads = Math.max(1,maxThreads);
this.maxThreads = maxThreads;
if (maxThreads>1) {
- this.taskGroup = new FJTaskRunnerGroup(maxThreads);
+ this.taskGroup = new ForkJoinPool(maxThreads);
}
else { // avoid parallel overhead
this.taskGroup = null;
}
}
-/**
- * Clean up deamon threads, if necessary.
- */
-public void finalize() {
- if (this.taskGroup!=null) this.taskGroup.interruptAll();
-}
protected void run(final DoubleMatrix2D[] blocksA, final DoubleMatrix2D[] blocksB, final double[] results, final Matrix2DMatrix2DFunction function) {
- final FJTask[] subTasks = new FJTask[blocksA.length];
+ final ForkJoinTask[] subTasks = new ForkJoinTask[blocksA.length];
for (int i=0; i<blocksA.length; i++) {
final int k = i;
- subTasks[i] = new FJTask() {
+ subTasks[i] = new ForkJoinTask() {
public void run() {
double result = function.apply(blocksA[k],blocksB != null ? blocksB[k] : null);
if (results!=null) results[k] = result;
//System.out.print(".");
}
+ public boolean exec() { return true; }
+ public void setRawResult(Object o) {}
+ public Object getRawResult() {return null;}
};
}
// run tasks and wait for completion
- try {
- this.taskGroup.invoke(
- new FJTask() {
- public void run() {
- coInvoke(subTasks);
- }
- }
- );
- } catch (InterruptedException exc) {}
+ this.taskGroup.invoke(
+ new ForkJoinTask() {
+ public void run() {
+ invokeAll(subTasks);
+ }
+ public boolean exec() { return true; }
+ public void setRawResult(Object o) {}
+ public Object getRawResult() {return null;}
+ }
+ );
}
protected DoubleMatrix2D[] splitBlockedNN(DoubleMatrix2D A, int threshold, long flops) {
/*
## -186,10 +185,4 ##
}
return blocks;
}
-/**
- * Prints various snapshot statistics to System.out; Simply delegates to {#link EDU.oswego.cs.dl.util.concurrent.FJTaskRunnerGroup#stats}.
- */
-public void stats() {
- if (this.taskGroup!=null) this.taskGroup.stats();
-}
}