The DataStream job:
public static final FunctionType DEVICE = new FunctionType("com.github.f1xman.era.anomalydetection.device", "DeviceFunction");
public static void main(String[] args) throws Exception {
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
StatefulFunctionsConfig statefunConfig = StatefulFunctionsConfig.fromEnvironment(env);
statefunConfig.setFactoryType(MessageFactoryType.WITH_KRYO_PAYLOADS);
DataStreamSource<String> names = env.addSource(new NamesSourceFunction());
DataStream<RoutableMessage> namesIngress = names.map(name -> RoutableMessageBuilder.builder()
.withTargetAddress(DEVICE, name)
.withMessageBody(name)
.build());
StatefulFunctionDataStreamBuilder.builder("example")
.withDataStreamAsIngress(namesIngress)
.withRequestReplyRemoteFunction(
requestReplyFunctionBuilder(DEVICE, URI.create("http://localhost:8080/statefun"))
)
.withConfiguration(statefunConfig)
.build(env);
env.execute("Flink Streaming Java API Skeleton");
}
When String value passed to .withMessageBody(...) the following exception occurred:
Exception in thread "main" org.apache.flink.runtime.client.JobExecutionException: Job execution failed.
at org.apache.flink.runtime.jobmaster.JobResult.toJobExecutionResult(JobResult.java:144)
at org.apache.flink.runtime.minicluster.MiniClusterJobClient.lambda$getJobExecutionResult$3(MiniClusterJobClient.java:137)
at java.util.concurrent.CompletableFuture.uniApply(CompletableFuture.java:616)
at java.util.concurrent.CompletableFuture$UniApply.tryFire(CompletableFuture.java:591)
at java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:488)
at java.util.concurrent.CompletableFuture.complete(CompletableFuture.java:1975)
at org.apache.flink.runtime.rpc.akka.AkkaInvocationHandler.lambda$invokeRpc$1(AkkaInvocationHandler.java:258)
at java.util.concurrent.CompletableFuture.uniWhenComplete(CompletableFuture.java:774)
at java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(CompletableFuture.java:750)
at java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:488)
at java.util.concurrent.CompletableFuture.complete(CompletableFuture.java:1975)
at org.apache.flink.util.concurrent.FutureUtils.doForward(FutureUtils.java:1389)
at org.apache.flink.runtime.concurrent.akka.ClassLoadingUtils.lambda$null$1(ClassLoadingUtils.java:93)
at org.apache.flink.runtime.concurrent.akka.ClassLoadingUtils.runWithContextClassLoader(ClassLoadingUtils.java:68)
at org.apache.flink.runtime.concurrent.akka.ClassLoadingUtils.lambda$guardCompletionWithContextClassLoader$2(ClassLoadingUtils.java:92)
at java.util.concurrent.CompletableFuture.uniWhenComplete(CompletableFuture.java:774)
at java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(CompletableFuture.java:750)
at java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:488)
at java.util.concurrent.CompletableFuture.complete(CompletableFuture.java:1975)
at org.apache.flink.runtime.concurrent.akka.AkkaFutureUtils$1.onComplete(AkkaFutureUtils.java:47)
at akka.dispatch.OnComplete.internal(Future.scala:300)
at akka.dispatch.OnComplete.internal(Future.scala:297)
at akka.dispatch.japi$CallbackBridge.apply(Future.scala:224)
at akka.dispatch.japi$CallbackBridge.apply(Future.scala:221)
at scala.concurrent.impl.CallbackRunnable.run$$$capture(Promise.scala:60)
at scala.concurrent.impl.CallbackRunnable.run(Promise.scala)
at org.apache.flink.runtime.concurrent.akka.AkkaFutureUtils$DirectExecutionContext.execute(AkkaFutureUtils.java:65)
at scala.concurrent.impl.CallbackRunnable.executeWithValue(Promise.scala:68)
at scala.concurrent.impl.Promise$DefaultPromise.$anonfun$tryComplete$1(Promise.scala:284)
at scala.concurrent.impl.Promise$DefaultPromise.$anonfun$tryComplete$1$adapted(Promise.scala:284)
at scala.concurrent.impl.Promise$DefaultPromise.tryComplete(Promise.scala:284)
at akka.pattern.PromiseActorRef.$bang(AskSupport.scala:621)
at akka.pattern.PipeToSupport$PipeableFuture$$anonfun$pipeTo$1.applyOrElse(PipeToSupport.scala:24)
at akka.pattern.PipeToSupport$PipeableFuture$$anonfun$pipeTo$1.applyOrElse(PipeToSupport.scala:23)
at scala.concurrent.Future.$anonfun$andThen$1(Future.scala:532)
at scala.concurrent.impl.Promise.liftedTree1$1(Promise.scala:29)
at scala.concurrent.impl.Promise.$anonfun$transform$1(Promise.scala:29)
at scala.concurrent.impl.CallbackRunnable.run$$$capture(Promise.scala:60)
at scala.concurrent.impl.CallbackRunnable.run(Promise.scala)
at akka.dispatch.BatchingExecutor$AbstractBatch.processBatch(BatchingExecutor.scala:63)
at akka.dispatch.BatchingExecutor$BlockableBatch.$anonfun$run$1(BatchingExecutor.scala:100)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:12)
at scala.concurrent.BlockContext$.withBlockContext(BlockContext.scala:81)
at akka.dispatch.BatchingExecutor$BlockableBatch.run(BatchingExecutor.scala:100)
at akka.dispatch.TaskInvocation.run(AbstractDispatcher.scala:49)
at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(ForkJoinExecutorConfigurator.scala:48)
at java.util.concurrent.ForkJoinTask.doExec$$$capture(ForkJoinTask.java:289)
at java.util.concurrent.ForkJoinTask.doExec(ForkJoinTask.java)
at java.util.concurrent.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1056)
at java.util.concurrent.ForkJoinPool.runWorker(ForkJoinPool.java:1692)
at java.util.concurrent.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:175)
Caused by: org.apache.flink.runtime.JobException: Recovery is suppressed by NoRestartBackoffTimeStrategy
at org.apache.flink.runtime.executiongraph.failover.flip1.ExecutionFailureHandler.handleFailure(ExecutionFailureHandler.java:138)
at org.apache.flink.runtime.executiongraph.failover.flip1.ExecutionFailureHandler.getFailureHandlingResult(ExecutionFailureHandler.java:82)
at org.apache.flink.runtime.scheduler.DefaultScheduler.handleTaskFailure(DefaultScheduler.java:252)
at org.apache.flink.runtime.scheduler.DefaultScheduler.maybeHandleTaskFailure(DefaultScheduler.java:242)
at org.apache.flink.runtime.scheduler.DefaultScheduler.updateTaskExecutionStateInternal(DefaultScheduler.java:233)
at org.apache.flink.runtime.scheduler.SchedulerBase.updateTaskExecutionState(SchedulerBase.java:684)
at org.apache.flink.runtime.scheduler.SchedulerNG.updateTaskExecutionState(SchedulerNG.java:79)
at org.apache.flink.runtime.jobmaster.JobMaster.updateTaskExecutionState(JobMaster.java:444)
at sun.reflect.GeneratedMethodAccessor14.invoke(Unknown Source)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.flink.runtime.rpc.akka.AkkaRpcActor.lambda$handleRpcInvocation$1(AkkaRpcActor.java:316)
at org.apache.flink.runtime.concurrent.akka.ClassLoadingUtils.runWithContextClassLoader(ClassLoadingUtils.java:83)
at org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcInvocation(AkkaRpcActor.java:314)
at org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcMessage(AkkaRpcActor.java:217)
at org.apache.flink.runtime.rpc.akka.FencedAkkaRpcActor.handleRpcMessage(FencedAkkaRpcActor.java:78)
at org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleMessage(AkkaRpcActor.java:163)
at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:24)
at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:20)
at scala.PartialFunction.applyOrElse(PartialFunction.scala:123)
at scala.PartialFunction.applyOrElse$(PartialFunction.scala:122)
at akka.japi.pf.UnitCaseStatement.applyOrElse(CaseStatements.scala:20)
at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171)
at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:172)
at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:172)
at akka.actor.Actor.aroundReceive(Actor.scala:537)
at akka.actor.Actor.aroundReceive$(Actor.scala:535)
at akka.actor.AbstractActor.aroundReceive(AbstractActor.scala:220)
at akka.actor.ActorCell.receiveMessage$$$capture(ActorCell.scala:580)
at akka.actor.ActorCell.receiveMessage(ActorCell.scala)
at akka.actor.ActorCell.invoke(ActorCell.scala:548)
at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:270)
at akka.dispatch.Mailbox.run(Mailbox.scala:231)
at akka.dispatch.Mailbox.exec(Mailbox.scala:243)
... 5 more
Caused by: org.apache.flink.statefun.flink.core.functions.StatefulFunctionInvocationException: An error occurred when attempting to invoke function FunctionType(com.github.f1xman.era.anomalydetection.device, DeviceFunction).
at org.apache.flink.statefun.flink.core.functions.StatefulFunction.receive(StatefulFunction.java:50)
at org.apache.flink.statefun.flink.core.functions.ReusableContext.apply(ReusableContext.java:74)
at org.apache.flink.statefun.flink.core.functions.LocalFunctionGroup.processNextEnvelope(LocalFunctionGroup.java:60)
at org.apache.flink.statefun.flink.core.functions.Reductions.processEnvelopes(Reductions.java:164)
at org.apache.flink.statefun.flink.core.functions.Reductions.apply(Reductions.java:149)
at org.apache.flink.statefun.flink.core.functions.FunctionGroupOperator.processElement(FunctionGroupOperator.java:90)
at org.apache.flink.streaming.runtime.tasks.CopyingChainingOutput.pushToOperator(CopyingChainingOutput.java:82)
at org.apache.flink.streaming.runtime.tasks.CopyingChainingOutput.collect(CopyingChainingOutput.java:57)
at org.apache.flink.streaming.runtime.tasks.CopyingChainingOutput.collect(CopyingChainingOutput.java:29)
at org.apache.flink.streaming.api.operators.CountingOutput.collect(CountingOutput.java:56)
at org.apache.flink.streaming.api.operators.CountingOutput.collect(CountingOutput.java:29)
at org.apache.flink.statefun.flink.core.feedback.FeedbackUnionOperator.sendDownstream(FeedbackUnionOperator.java:180)
at org.apache.flink.statefun.flink.core.feedback.FeedbackUnionOperator.processElement(FeedbackUnionOperator.java:86)
at org.apache.flink.streaming.runtime.tasks.OneInputStreamTask$StreamTaskNetworkOutput.emitRecord(OneInputStreamTask.java:233)
at org.apache.flink.streaming.runtime.io.AbstractStreamTaskNetworkInput.processElement(AbstractStreamTaskNetworkInput.java:134)
at org.apache.flink.streaming.runtime.io.AbstractStreamTaskNetworkInput.emitNext(AbstractStreamTaskNetworkInput.java:105)
at org.apache.flink.streaming.runtime.io.StreamOneInputProcessor.processInput(StreamOneInputProcessor.java:65)
at org.apache.flink.streaming.runtime.tasks.StreamTask.processInput(StreamTask.java:496)
at org.apache.flink.streaming.runtime.tasks.mailbox.MailboxProcessor.runMailboxLoop(MailboxProcessor.java:203)
at org.apache.flink.streaming.runtime.tasks.StreamTask.runMailboxLoop(StreamTask.java:809)
at org.apache.flink.streaming.runtime.tasks.StreamTask.invoke(StreamTask.java:761)
at org.apache.flink.runtime.taskmanager.Task.runWithSystemExitMonitoring(Task.java:958)
at org.apache.flink.runtime.taskmanager.Task.restoreAndInvoke(Task.java:937)
at org.apache.flink.runtime.taskmanager.Task.doRun(Task.java:766)
at org.apache.flink.runtime.taskmanager.Task.run(Task.java:575)
at java.lang.Thread.run(Thread.java:750)
Caused by: java.lang.ClassCastException: java.lang.String cannot be cast to org.apache.flink.statefun.sdk.reqreply.generated.TypedValue
at org.apache.flink.statefun.flink.core.reqreply.RequestReplyFunction.invoke(RequestReplyFunction.java:118)
at org.apache.flink.statefun.flink.core.functions.StatefulFunction.receive(StatefulFunction.java:48)
... 25 more
Though, sending a String value to an embedded function works well. The workaround I've found is to wrap the value with TypedValue:
.withMessageBody(TypedValue.newBuilder()
.setValue(ByteString.copyFrom(name, StandardCharsets.UTF_8))
.setHasValue(true)
.setTypename("example/Name")
.build()
)
This approach requires the receiver function to unwrap the TypedValue and deserialize the ByteString. It looks too low-level for this kind of API. I believe this is the wrong usage of Stateful Function's SDK for Flink DataStream Integration. What is the correct way to implement Flink DataStream and remote Stateful Functions interoperability?
The job is inspired by the official examples.
I'm trying to use save point on a job that I have implemented a customized parallelizable socket source. The source looks something similar to this
#Override
public void run(SourceContext<String> sourceContext) throws Exception {
int idx = getRuntimeContext().getIndexOfThisSubtask();
String[] hosts = (config.hostsStr).split(":");
String[] portStrArr = (config.portsStr).split(":");
int[] ports = new int[portStrArr.length];
for (int i = 0; i < portStrArr.length; i++) {
ports[i] = Integer.parseInt(portStrArr[i]);
}
Socket s = new Socket(hosts[idx], ports[idx]);
BufferedReader in = new BufferedReader(new InputStreamReader(s.getInputStream()));
//ois = new ObjectInputStream(s.getInputStream());
while (running) {
String str = in.readLine();
sourceContext.collect(str);
}
sourceContext.close();
}
#Override
public void cancel() {
running = false;
}
The exception on the cluster looks something like this
flink-1.1.3/bin//flink cancel -s hdfs://flink-master:19000/flink-checkpoints a18499a80099045eb5120ecacdabd421
Retrieving JobManager.
Using address flink-master/10.0.0.16:6123 to connect to JobManager.
Cancelling job a18499a80099045eb5120ecacdabd421 with savepoint to hdfs://flink-master:19000/flink-checkpoints.
java.lang.Exception: Canceling the job with ID a18499a80099045eb5120ecacdabd421 failed.
at org.apache.flink.client.CliFrontend.cancel(CliFrontend.java:637)
at org.apache.flink.client.CliFrontend.parseParameters(CliFrontend.java:1092)
at org.apache.flink.client.CliFrontend$2.call(CliFrontend.java:1133)
at org.apache.flink.client.CliFrontend$2.call(CliFrontend.java:1130)
at org.apache.flink.runtime.security.HadoopSecurityContext$1.run(HadoopSecurityContext.java:43)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1657)
at org.apache.flink.runtime.security.HadoopSecurityContext.runSecured(HadoopSecurityContext.java:40)
at org.apache.flink.client.CliFrontend.main(CliFrontend.java:1130)
Caused by: java.lang.Exception: Failed to trigger savepoint.
at org.apache.flink.runtime.jobmanager.JobManager$$anonfun$handleMessage$1$$anon$6.apply(JobManager.scala:639)
at org.apache.flink.runtime.jobmanager.JobManager$$anonfun$handleMessage$1$$anon$6.apply(JobManager.scala:629)
at org.apache.flink.runtime.concurrent.impl.FlinkFuture$5.onComplete(FlinkFuture.java:272)
at akka.dispatch.OnComplete.internal(Future.scala:247)
at akka.dispatch.OnComplete.internal(Future.scala:245)
at akka.dispatch.japi$CallbackBridge.apply(Future.scala:175)
at akka.dispatch.japi$CallbackBridge.apply(Future.scala:172)
at scala.concurrent.impl.CallbackRunnable.run(Promise.scala:32)
at akka.dispatch.BatchingExecutor$AbstractBatch.processBatch(BatchingExecutor.scala:55)
at akka.dispatch.BatchingExecutor$BlockableBatch$$anonfun$run$1.apply$mcV$sp(BatchingExecutor.scala:91)
at akka.dispatch.BatchingExecutor$BlockableBatch$$anonfun$run$1.apply(BatchingExecutor.scala:91)
at akka.dispatch.BatchingExecutor$BlockableBatch$$anonfun$run$1.apply(BatchingExecutor.scala:91)
at scala.concurrent.BlockContext$.withBlockContext(BlockContext.scala:72)
at akka.dispatch.BatchingExecutor$BlockableBatch.run(BatchingExecutor.scala:90)
at akka.dispatch.TaskInvocation.run(AbstractDispatcher.scala:40)
at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:397)
at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
Caused by: java.io.EOFException: Premature EOF: no length prefix available
at org.apache.hadoop.hdfs.protocolPB.PBHelper.vintPrefixed(PBHelper.java:2282)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.createBlockOutputStream(DFSOutputStream.java:1347)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.nextBlockOutputStream(DFSOutputStream.java:1266)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.run(DFSOutputStream.java:449)
Suppressed: java.lang.IllegalArgumentException: Self-suppression not permitted
at java.lang.Throwable.addSuppressed(Throwable.java:1043)
at java.io.FilterOutputStream.close(FilterOutputStream.java:159)
at org.apache.flink.runtime.checkpoint.savepoint.SavepointStore.storeSavepointToHandle(SavepointStore.java:207)
at org.apache.flink.runtime.checkpoint.savepoint.SavepointStore.storeSavepointToHandle(SavepointStore.java:150)
at org.apache.flink.runtime.checkpoint.PendingCheckpoint.finalizeCheckpointExternalized(PendingCheckpoint.java:281)
at org.apache.flink.runtime.checkpoint.CheckpointCoordinator.completePendingCheckpoint(CheckpointCoordinator.java:888)
at org.apache.flink.runtime.checkpoint.CheckpointCoordinator.receiveAcknowledgeMessage(CheckpointCoordinator.java:813)
at org.apache.flink.runtime.jobmanager.JobManager$$anonfun$org$apache$flink$runtime$jobmanager$JobManager$$handleCheckpointMessage$1.apply$mcV$sp(JobManager.scala:1462)
at org.apache.flink.runtime.jobmanager.JobManager$$anonfun$org$apache$flink$runtime$jobmanager$JobManager$$handleCheckpointMessage$1.apply(JobManager.scala:1461)
at org.apache.flink.runtime.jobmanager.JobManager$$anonfun$org$apache$flink$runtime$jobmanager$JobManager$$handleCheckpointMessage$1.apply(JobManager.scala:1461)
at scala.concurrent.impl.Future$PromiseCompletingRunnable.liftedTree1$1(Future.scala:24)
at scala.concurrent.impl.Future$PromiseCompletingRunnable.run(Future.scala:24)
at akka.dispatch.TaskInvocation.run(AbstractDispatcher.scala:40)
at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:397)
at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
[CIRCULAR REFERENCE:java.io.EOFException: Premature EOF: no length prefix available]
On my local machine the save point is rejected by the following exception:
Cancelling job 4c99e0220c8c4683d1287269073b5c2c with savepoint to savepoints/.
java.lang.Exception: Canceling the job with ID 4c99e0220c8c4683d1287269073b5c2c failed.
at org.apache.flink.client.CliFrontend.cancel(CliFrontend.java:637)
at org.apache.flink.client.CliFrontend.parseParameters(CliFrontend.java:1092)
at org.apache.flink.client.CliFrontend$2.call(CliFrontend.java:1133)
at org.apache.flink.client.CliFrontend$2.call(CliFrontend.java:1130)
at org.apache.flink.runtime.security.HadoopSecurityContext$1.run(HadoopSecurityContext.java:43)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1657)
at org.apache.flink.runtime.security.HadoopSecurityContext.runSecured(HadoopSecurityContext.java:40)
at org.apache.flink.client.CliFrontend.main(CliFrontend.java:1130)
Caused by: java.lang.Exception: Failed to trigger savepoint.
at org.apache.flink.runtime.jobmanager.JobManager$$anonfun$handleMessage$1$$anon$6.apply(JobManager.scala:639)
at org.apache.flink.runtime.jobmanager.JobManager$$anonfun$handleMessage$1$$anon$6.apply(JobManager.scala:629)
at org.apache.flink.runtime.concurrent.impl.FlinkFuture$5.onComplete(FlinkFuture.java:272)
at akka.dispatch.OnComplete.internal(Future.scala:247)
at akka.dispatch.OnComplete.internal(Future.scala:245)
at akka.dispatch.japi$CallbackBridge.apply(Future.scala:175)
at akka.dispatch.japi$CallbackBridge.apply(Future.scala:172)
at scala.concurrent.impl.CallbackRunnable.run(Promise.scala:32)
at akka.dispatch.BatchingExecutor$AbstractBatch.processBatch(BatchingExecutor.scala:55)
at akka.dispatch.BatchingExecutor$BlockableBatch$$anonfun$run$1.apply$mcV$sp(BatchingExecutor.scala:91)
at akka.dispatch.BatchingExecutor$BlockableBatch$$anonfun$run$1.apply(BatchingExecutor.scala:91)
at akka.dispatch.BatchingExecutor$BlockableBatch$$anonfun$run$1.apply(BatchingExecutor.scala:91)
at scala.concurrent.BlockContext$.withBlockContext(BlockContext.scala:72)
at akka.dispatch.BatchingExecutor$BlockableBatch.run(BatchingExecutor.scala:90)
at akka.dispatch.TaskInvocation.run(AbstractDispatcher.scala:40)
at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:397)
at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
Caused by: java.lang.Exception: Checkpoint was declined (tasks not ready)
at org.apache.flink.runtime.checkpoint.PendingCheckpoint.abortDeclined(PendingCheckpoint.java:510)
at org.apache.flink.runtime.checkpoint.CheckpointCoordinator.receiveDeclineMessage(CheckpointCoordinator.java:735)
at org.apache.flink.runtime.jobmanager.JobManager$$anonfun$org$apache$flink$runtime$jobmanager$JobManager$$handleCheckpointMessage$2.apply$mcV$sp(JobManager.scala:1491)
at org.apache.flink.runtime.jobmanager.JobManager$$anonfun$org$apache$flink$runtime$jobmanager$JobManager$$handleCheckpointMessage$2.apply(JobManager.scala:1490)
at org.apache.flink.runtime.jobmanager.JobManager$$anonfun$org$apache$flink$runtime$jobmanager$JobManager$$handleCheckpointMessage$2.apply(JobManager.scala:1490)
at scala.concurrent.impl.Future$PromiseCompletingRunnable.liftedTree1$1(Future.scala:24)
at scala.concurrent.impl.Future$PromiseCompletingRunnable.run(Future.scala:24)
... 6 more
Is it because my source cannot be stopped properly so that the checkpoint would not happen? On the cluster it does say it is successful and return the location to the save point but there is no file on that path.
Given the source function excerpt it almost looks good to me. What you should do is to output elements under the checkpoint lock. Otherwise you might run into problems when an element is output at the same time as a checkpoint is triggered. The SourceContext#getCheckpointLock makes sure that these two operations don't happen concurrently.
The first error looks a little bit as if you have a problem on the HDFS side. Could you check the logs whether they contain something suspicious? Maybe the data nodes ran out of disk space.
The second exception indicates that something went wrong while doing the checkpoint. The JobManager logs should contain a log statement saying why the checkpoint has failed. It should have the format: Discarding checkpoint CHECKPOINT_ID because of checkpoint decline from task EXECUTION_ID : REASON.
I am using the below configuration
ClouderaTwitterAgent.sources = Twitter
ClouderaTwitterAgent.channels = MemChannel
ClouderaTwitterAgent.sinks = HDFS
ClouderaTwitterAgent.sources.Twitter.type = com.cloudera.flume.source.TwitterSource
ClouderaTwitterAgent.sources.Twitter.channels = MemChannel
ClouderaTwitterAgent.sources.Twitter.consumerKey = xxxxxxxxxxxxx
ClouderaTwitterAgent.sources.Twitter.consumerSecret = xxxxxxxxxxxxx
ClouderaTwitterAgent.sources.Twitter.accessToken = xxxxxxxxxxxxxxxx
ClouderaTwitterAgent.sources.Twitter.accessTokenSecret = xxxxxxxxxxxxxx
ClouderaTwitterAgent.sources.Twitter.keywords = Sully
ClouderaTwitterAgent.sinks.HDFS.channel = MemChannel
ClouderaTwitterAgent.sinks.HDFS.type = hdfs
ClouderaTwitterAgent.sinks.HDFS.hdfs.path = hdfs://localhost:9000/user/tweets
ClouderaTwitterAgent.sinks.HDFS.hdfs.fileType = DataStream
ClouderaTwitterAgent.sinks.HDFS.hdfs.writeFormat = Text
ClouderaTwitterAgent.sinks.HDFS.hdfs.batchSize = 1000
ClouderaTwitterAgent.sinks.HDFS.hdfs.rollSize = 0
ClouderaTwitterAgent.sinks.HDFS.hdfs.rollCount = 10000
ClouderaTwitterAgent.channels.MemChannel.type = memory
ClouderaTwitterAgent.channels.MemChannel.capacity = 10000
ClouderaTwitterAgent.channels.MemChannel.transactionCapacity = 100
and this is the command that I am using to run flume
`bin/flume-ng agent --conf ./conf/ -f conf/flume-cloudera.conf -Dflume.root.logger=DEBUG,console -n ClouderaTwitterAgent`
and this is the error that I am getting
2016-09-20 14:53:14,245 (Twitter4J Async Dispatcher[0]) [DEBUG - com.cloudera.flume.source.TwitterSource$1.onStatus(TwitterSource.java:121)] tweet arrived
2016-09-20 14:53:16,073 (SinkRunner-PollingRunner-DefaultSinkProcessor) [INFO - org.apache.flume.sink.hdfs.BucketWriter.open(BucketWriter.java:234)] Creating hdfs://localhost:9000/user/tweets/FlumeData.1474363316543.tmp
2016-09-20 14:53:16,113 (SinkRunner-PollingRunner-DefaultSinkProcessor) [ERROR - org.apache.flume.sink.hdfs.HDFSEventSink.process(HDFSEventSink.java:459)] process failed
java.lang.RuntimeException: java.lang.reflect.InvocationTargetException
at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:131)
at org.apache.hadoop.security.Groups.<init>(Groups.java:64)
at org.apache.hadoop.security.Groups.getUserToGroupsMappingService(Groups.java:240)
at org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:255)
at org.apache.hadoop.security.UserGroupInformation.ensureInitialized(UserGroupInformation.java:232)
at org.apache.hadoop.security.UserGroupInformation.loginUserFromSubject(UserGroupInformation.java:718)
at org.apache.hadoop.security.UserGroupInformation.getLoginUser(UserGroupInformation.java:703)
at org.apache.hadoop.security.UserGroupInformation.getCurrentUser(UserGroupInformation.java:605)
at org.apache.hadoop.fs.FileSystem$Cache$Key.<init>(FileSystem.java:2554)
at org.apache.hadoop.fs.FileSystem$Cache$Key.<init>(FileSystem.java:2546)
at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:2412)
at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:368)
at org.apache.hadoop.fs.Path.getFileSystem(Path.java:296)
at org.apache.flume.sink.hdfs.BucketWriter$1.call(BucketWriter.java:243)
at org.apache.flume.sink.hdfs.BucketWriter$1.call(BucketWriter.java:235)
at org.apache.flume.sink.hdfs.BucketWriter$9$1.run(BucketWriter.java:679)
at org.apache.flume.auth.SimpleAuthenticator.execute(SimpleAuthenticator.java:50)
at org.apache.flume.sink.hdfs.BucketWriter$9.call(BucketWriter.java:676)
at java.util.concurrent.FutureTask.run(FutureTask.java:262)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:744)
... 21 more
Caused by: java.lang.NoClassDefFoundError: Could not initialize class org.apache.hadoop.security.JniBasedUnixGroupsMapping
at org.apache.hadoop.security.JniBasedUnixGroupsMappingWithFallback.<init>(JniBasedUnixGroupsMappingWithFallback.java:38)
... 26 more
2016-09-20 14:53:16,121 (SinkRunner-PollingRunner-DefaultSinkProcessor) [ERROR-org.apache.flume.SinkRunner$PollingRunner.run(SinkRunner.java:160)] Unable to deliver event. Exception follows.
Can anyone please help me. I am new to this and I got this configuration from the internet and I followed every instruction.
I even searched for any solution to this problem.
I will list out what I have done
1.I checked my system clock
2.removed the hdfs folder and then made a new folder
3.formatted the namenode
4.restarted the agent several times
I am relatively new to scala and to couchbase, but I need to learn both fast. Recently while trying to run a sample application using the Java couchbase SDK through Scala I have run into the following problem.
[cb-core-3-2] WARN com.couchbase.client.core.CouchbaseCore - Exception while Handling Request Events RequestEvent{request=null}
java.lang.IndexOutOfBoundsException: Index: 1854, Size: 0
at java.util.ArrayList.rangeCheck(ArrayList.java:653)
at java.util.ArrayList.get(ArrayList.java:429)
at com.couchbase.client.core.config.DefaultCouchbaseBucketConfig.nodeIndexForMaster(DefaultCouchbaseBucketConfig.java:135)
at com.couchbase.client.core.node.locate.KeyValueLocator.calculateNodeId(KeyValueLocator.java:165)
at com.couchbase.client.core.node.locate.KeyValueLocator.locateForCouchbaseBucket(KeyValueLocator.java:124)
at com.couchbase.client.core.node.locate.KeyValueLocator.locateAndDispatch(KeyValueLocator.java:84)
at com.couchbase.client.core.RequestHandler.dispatchRequest(RequestHandler.java:219)
at com.couchbase.client.core.RequestHandler.onEvent(RequestHandler.java:176)
at com.couchbase.client.core.RequestHandler.onEvent(RequestHandler.java:71)
at com.couchbase.client.deps.com.lmax.disruptor.BatchEventProcessor.run(BatchEventProcessor.java:129)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at com.couchbase.client.deps.io.netty.util.concurrent.DefaultThreadFactory$DefaultRunnableDecorator.run(DefaultThreadFactory.java:137)
at java.lang.Thread.run(Thread.java:745)
[error] (run-main-0) java.lang.RuntimeException: java.util.concurrent.TimeoutException
java.lang.RuntimeException: java.util.concurrent.TimeoutException
at com.couchbase.client.java.util.Blocking.blockForSingle(Blocking.java:71)
at com.couchbase.client.java.CouchbaseBucket.upsert(CouchbaseBucket.java:354)
at com.couchbase.client.java.CouchbaseBucket.upsert(CouchbaseBucket.java:349)
at App$.main(Application.scala:28)
at App.main(Application.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
Caused by: java.util.concurrent.TimeoutException
at com.couchbase.client.java.util.Blocking.blockForSingle(Blocking.java:71)
at com.couchbase.client.java.CouchbaseBucket.upsert(CouchbaseBucket.java:354)
at com.couchbase.client.java.CouchbaseBucket.upsert(CouchbaseBucket.java:349)
at App$.main(Application.scala:28)
at App.main(Application.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
[trace] Stack trace suppressed: run last compile:run for the full output.
[cb-core-3-1] WARN com.couchbase.client.core.CouchbaseCore - Exception while Handling Response Events null
java.lang.InterruptedException
at java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.reportInterruptAfterWait(AbstractQueuedSynchronizer.java:2014)
at java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:2048)
at com.couchbase.client.deps.com.lmax.disruptor.BlockingWaitStrategy.waitFor(BlockingWaitStrategy.java:45)
at com.couchbase.client.deps.com.lmax.disruptor.ProcessingSequenceBarrier.waitFor(ProcessingSequenceBarrier.java:56)
at com.couchbase.client.deps.com.lmax.disruptor.BatchEventProcessor.run(BatchEventProcessor.java:124)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at com.couchbase.client.deps.io.netty.util.concurrent.DefaultThreadFactory$DefaultRunnableDecorator.run(DefaultThreadFactory.java:137)
at java.lang.Thread.run(Thread.java:745)
[cb-core-3-2] WARN com.couchbase.client.core.CouchbaseCore - Exception while Handling Request Events RequestEvent{request=null}
java.lang.InterruptedException
at java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.reportInterruptAfterWait(AbstractQueuedSynchronizer.java:2014)
at java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:2048)
at com.couchbase.client.deps.com.lmax.disruptor.BlockingWaitStrategy.waitFor(BlockingWaitStrategy.java:45)
at com.couchbase.client.deps.com.lmax.disruptor.ProcessingSequenceBarrier.waitFor(ProcessingSequenceBarrier.java:56)
at com.couchbase.client.deps.com.lmax.disruptor.BatchEventProcessor.run(BatchEventProcessor.java:124)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at com.couchbase.client.deps.io.netty.util.concurrent.DefaultThreadFactory$DefaultRunnableDecorator.run(DefaultThreadFactory.java:137)
at java.lang.Thread.run(Thread.java:745)
java.lang.RuntimeException: Nonzero exit code: 1
at scala.sys.package$.error(package.scala:27)
And this is the code that generated the error
import com.couchbase.client.java._
import com.couchbase.client.core.time._
import com.couchbase.client.java.document._
import com.couchbase.client.java.document.json._
import com.couchbase.client.java.query._
import com.couchbase.client.java.env.DefaultCouchbaseEnvironment
object App {
def main(args: Array[String]): Unit = {
// Initialize the Connection
// Connects to localhost
val env = DefaultCouchbaseEnvironment.builder()
.connectTimeout(5000)
.bootstrapCarrierEnabled(false)
.build()
val cluster = CouchbaseCluster.create(env, "127.0.0.1")
// Opens the "default" bucket
val bucket = cluster.openBucket("default")
// Create a JSON Document
val user: JsonObject = JsonObject.create()
.put("firstname", "Walter")
.put("lastname", "White")
.put("job", "chemistry teacher")
.put("age", 50)
val stored: JsonDocument = bucket.upsert(JsonDocument.create("walter", user));
// Load the Document and print it
// Prints Content and Metadata of the stored Document
println(bucket.get("walter"))
// Just close a single bucket
bucket.close();
// Disconnect and close all buckets
cluster.disconnect();
}
}
EDIT:
I am a little new to this but here is what I managed to get out of the debugger.
this = {DefaultCouchbaseBucketConfig#2385} "DefaultCouchbaseBucketConfig{name='testBucket', locator=VBUCKET, uri='/pools/default/buckets/testBucket?bucket_uuid=54c1356c57dea1d640837c678f87d5e4', streamingUri='/pools/default/bucketsStreaming/testBucket?bucket_uuid=54c1356c57dea1d640837c678f87d5e4', nodeInfo=[NodeInfo{, hostname=localhost/127.0.0.1, configPort=0, directServices={CONFIG=8091, QUERY=8093, VIEW=8092, BINARY=11210}, sslServices={CONFIG=18091, QUERY=18093, VIEW=18092, BINARY=11207}}], partitionInfo=PartitionInfo{numberOfReplicas=1, partitionHosts=[localhost], partitions=[], tainted=false}, tainted=false, rev=23}"
partitionInfo = {CouchbasePartitionInfo#2391} "PartitionInfo{numberOfReplicas=1, partitionHosts=[localhost], partitions=[], tainted=false}"
numberOfReplicas = 1
partitionHosts = {String[1]#2428}
partitions = {ArrayList#2390} size = 0
forwardPartitions = null
tainted = false
partitionHosts = {ArrayList#2396} size = 1
0 = {DefaultNodeInfo#2425} "NodeInfo{, hostname=localhost/127.0.0.1, configPort=8091, directServices={CONFIG=8091, BINARY=11210, VIEW=8092}, sslServices={}}"
nodesWithPrimaryPartitions = {HashSet#2397} size = 0
tainted = false
rev = 23
name = "testBucket"
value = {char[10]#2422}
hash = 1241531676
password = ""
value = {char[0]#2421}
hash = 0
locator = {BucketNodeLocator#2400} "VBUCKET"
name = "VBUCKET"
ordinal = 0
uri = "/pools/default/buckets/testBucket?bucket_uuid=54c1356c57dea1d640837c678f87d5e4"
value = {char[78]#2411}
hash = 0
streamingUri = "/pools/default/bucketsStreaming/testBucket?bucket_uuid=54c1356c57dea1d640837c678f87d5e4"
value = {char[87]#2420}
hash = 0
nodeInfo = {ArrayList#2403} size = 1
0 = {DefaultNodeInfo#2408} "NodeInfo{, hostname=localhost/127.0.0.1, configPort=0, directServices={CONFIG=8091, QUERY=8093, VIEW=8092, BINARY=11210}, sslServices={CONFIG=18091, QUERY=18093, VIEW=18092, BINARY=11207}}"
enabledServices = 15
partition = 7620
useFastForward = false
EDIT 2:
I had a look at the Couchbase Console log and I am constantly getting the following
Service 'memcached' exited with status 1. Restarting. Messages: Failed to open library "/Users/luishreis/Downloads/couchbase-server-enterprise_4/Couchbase Server.app/Contents/Resources/couchbase-core/lib/memcached/stdin_term_handler.so": dlopen(/Users/luishreis/Downloads/couchbase-server-enterprise_4/Couchbase Server.app/Contents/Resources/couchbase-core/lib/memcached/stdin_term_handler.dylib, 6): image not found
Unable to load extension /Users/luishreis/Downloads/couchbase-server-enterprise_4/Couchbase Server.app/Contents/Resources/couchbase-core/lib/memcached/stdin_term_handler.so using the config
Any help on the matter would be appreciated.
Many thanks.
I am using Spring Hibernate with app engine and cloudSQL for my project but i am getting one error frequently. This occurs when application becomes ideal for sometimes.
For every query (fetching or save/update to database), i open session and close session after its use. like this --
My code for database fetching is
try{
Session session = getSessionFactory().openSession();
if(session != null)
{
List<Account> accounts = session.createQuery("from " + this.clazz.getName() + " where subDomainName = '"+subDomain+"'").list();
session.close();
if(accounts != null)
{
if(accounts.size()>0){
return accounts.get(0);
}
else{
return null;
}
}
else
{
return null;
}
}
else {
return null;
}
}
catch(Exception e){
log.info("Error in retriving subdomain details :: in Account Dao");
return null;
}
I am using Autowired structuring. but when application becomes ideal for sometime and after some time when i refresh page its display the error as Stream Closed of CloudSQL on app engine, error is shown below...
Error is :
6 Jan, 2014 6:21:04 AM com.google.appengine.repackaged.org.apache.http.impl.client.DefaultRequestDirector handleResponse
WARNING: Authentication error: Unable to respond to any of these challenges: {bearer=WWW-Authenticate: Bearer realm="https://www.google.com/accounts/AuthSubRequest", error=invalid_token}
6 Jan, 2014 6:21:04 AM com.google.appengine.api.rdbms.dev.LocalRdbmsServiceRemoteDriver openConnection
WARNING: openConnection
java.sql.SQLException: Stream closed
at com.google.cloud.sql.jdbc.internal.googleapi.RpcGoogleApi.newOpenConnectionIOException(RpcGoogleApi.java:187)
at com.google.cloud.sql.jdbc.internal.googleapi.RpcGoogleApi.openConnection(RpcGoogleApi.java:105)
at com.google.appengine.api.rdbms.dev.LocalRdbmsServiceRemoteDriver.openConnection(LocalRdbmsServiceRemoteDriver.java:206)
at com.google.appengine.api.rdbms.dev.LocalRdbmsService.openConnection(LocalRdbmsService.java:119)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
at java.lang.reflect.Method.invoke(Method.java:597)
at com.google.appengine.tools.development.ApiProxyLocalImpl$AsyncApiCall.callInternal(ApiProxyLocalImpl.java:498)
at com.google.appengine.tools.development.ApiProxyLocalImpl$AsyncApiCall.call(ApiProxyLocalImpl.java:452)
at com.google.appengine.tools.development.ApiProxyLocalImpl$AsyncApiCall.call(ApiProxyLocalImpl.java:430)
at java.util.concurrent.Executors$PrivilegedCallable$1.run(Executors.java:461)
at java.security.AccessController.doPrivileged(Native Method)
at java.util.concurrent.Executors$PrivilegedCallable.call(Executors.java:458)
at java.util.concurrent.FutureTask$Sync.innerRun(FutureTask.java:303)
at java.util.concurrent.FutureTask.run(FutureTask.java:138)
at java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:895)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:918)
at java.lang.Thread.run(Thread.java:662)
Caused by: java.io.IOException: Stream closed
at java.util.zip.GZIPInputStream.ensureOpen(GZIPInputStream.java:42)
at java.util.zip.GZIPInputStream.read(GZIPInputStream.java:85)
For this to engage app engine and cloudSQL i have wriiten cron job of certains request but this is not feasible solution, i can not gets why cloudSQL closes its stream for app engine.