Spark kafka streaming - java.lang.NoClassDefFoundError: akka/util/Helpers$ConfigOps$ - java

I am writing a spark application written in Scala that listens to a Kafka topic. The application just prints out the message received that's all. I am running this locally on my machine...
......
import org.apache.spark.SparkConf
import org.apache.spark.sql.SQLContext
import org.apache.spark.streaming.kafka.KafkaUtils
import org.apache.spark.streaming.{Seconds, StreamingContext}
.....
val topics = "topicName";//args(2)
val numThreads = 1;
val zkQuorum = "zookeeper quorum"
val group = "groupName"
//val inputTable = args(5)
// Set application name (when we run this on the cluster this is what will appear in the spark UI)
val sparkConf = new SparkConf().setAppName("appName").setMaster("local")
// On what topics shall I listen? Create an array of topics to pass to the stream configurations...
val topicMap = topics.split(",").map((_, numThreads.toInt)).toMap
// Will poll every X seconds
val ssc = new StreamingContext(sparkConf, Seconds(15))
// Create stream (has not been triggered yet)
val stream = KafkaUtils.createStream(ssc, zkQuorum, group, topicMap).map(_._2)
// For each stream!
stream.foreachRDD { rdd =>
// Check if empty...
if (rdd.toLocalIterator.nonEmpty)
{
// Create SQL Context out of the streaming
val sqlContext = new SQLContext(ssc.sparkContext)
// Register temporary table out of the recieved
sqlContext.read.json(rdd).registerTempTable("resultRecieved")
// Create SQL Context, and you can filter
val result = sqlContext.sql(
"select * FROM resultRecieved")
if(result.count() == 0 )
println("No results!")
else{
println(result.count() + " Results!")
}
}
}
// Print stream
stream.print
// Trigger!
ssc.start()
// Await stopping of the service...
ssc.awaitTermination()
I am using Scala 2.10.4, and here is my maven configuration
<dependencies>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.10</artifactId>
<version>1.5.2</version>
</dependency>
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka_2.10</artifactId>
<version>0.8.2.1</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_2.10</artifactId>
<version>1.0.2</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.10</artifactId>
<version>1.5.1</version>
</dependency>
<dependency>
<groupId>com.typesafe</groupId>
<artifactId>config</artifactId>
<version>1.2.1</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming-kafka_2.10</artifactId>
<version>0.9.0-incubating</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.10</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-swing</artifactId>
<version>2.10.4</version>
</dependency>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>2.10.1</version>
</dependency>
<dependency>
<groupId>com.typesafe.akka</groupId>
<artifactId>akka-actor_2.10</artifactId>
<version>2.2.1</version>
</dependency>
</dependencies>
With scala.version being 2.10.1, and spark.version being 1.5.1.
I am getting
Exception in thread "main" java.lang.NoClassDefFoundError: akka/util/Helpers$ConfigOps$
at akka.remote.RemoteSettings.<init>(RemoteSettings.scala:49)
at akka.remote.RemoteActorRefProvider.<init>(RemoteActorRefProvider.scala:114)
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:422)
at akka.actor.ReflectiveDynamicAccess$$anonfun$createInstanceFor$2.apply(DynamicAccess.scala:78)
at scala.util.Try$.apply(Try.scala:161)
at akka.actor.ReflectiveDynamicAccess.createInstanceFor(DynamicAccess.scala:73)
at akka.actor.ReflectiveDynamicAccess$$anonfun$createInstanceFor$3.apply(DynamicAccess.scala:84)
at akka.actor.ReflectiveDynamicAccess$$anonfun$createInstanceFor$3.apply(DynamicAccess.scala:84)
at scala.util.Success.flatMap(Try.scala:200)
at akka.actor.ReflectiveDynamicAccess.createInstanceFor(DynamicAccess.scala:84)
at akka.actor.ActorSystemImpl.<init>(ActorSystem.scala:550)
at akka.actor.ActorSystem$.apply(ActorSystem.scala:111)
at akka.actor.ActorSystem$.apply(ActorSystem.scala:104)
at org.apache.spark.util.AkkaUtils$.org$apache$spark$util$AkkaUtils$$doCreateActorSystem(AkkaUtils.scala:121)
at org.apache.spark.util.AkkaUtils$$anonfun$1.apply(AkkaUtils.scala:53)
at org.apache.spark.util.AkkaUtils$$anonfun$1.apply(AkkaUtils.scala:52)
at org.apache.spark.util.Utils$$anonfun$startServiceOnPort$1.apply$mcVI$sp(Utils.scala:1913)
at scala.collection.immutable.Range.foreach$mVc$sp(Range.scala:141)
at org.apache.spark.util.Utils$.startServiceOnPort(Utils.scala:1904)
at org.apache.spark.util.AkkaUtils$.createActorSystem(AkkaUtils.scala:55)
at org.apache.spark.rpc.akka.AkkaRpcEnvFactory.create(AkkaRpcEnv.scala:253)
at org.apache.spark.rpc.RpcEnv$.create(RpcEnv.scala:53)
at org.apache.spark.SparkEnv$.create(SparkEnv.scala:252)
at org.apache.spark.SparkEnv$.createDriverEnv(SparkEnv.scala:193)
at org.apache.spark.SparkContext.createSparkEnv(SparkContext.scala:277)
at org.apache.spark.SparkContext.<init>(SparkContext.scala:450)
at org.apache.spark.streaming.StreamingContext$.createNewSparkContext(StreamingContext.scala:549)
at org.apache.spark.streaming.StreamingContext.<init>(StreamingContext.scala:75)
at SIVConsumer$.main(SIVConsumer.scala:33)
at SIVConsumer.main(SIVConsumer.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:497)
at com.intellij.rt.execution.application.AppMain.main(AppMain.java:144)
Caused by: java.lang.ClassNotFoundException: akka.util.Helpers$ConfigOps$
at java.net.URLClassLoader.findClass(URLClassLoader.java:381)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:331)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
... 38 more
Process finished with exit code 1
Any idea which dependencies aren't getting along well together? I have tried using the latest Scala binaries but it still fails.

I guess the problem is with spark-streaming version. It should be same as spark-core (1.5.2). Same for spark-streaming-kafka_2.10.
You can refer a working pom.xml from my sample project https://github.com/atulsm/Test_Projects/blob/master/pom.xml

The issue was incorrect versions of spark-streaming. For others, the following dependency list in maven allows
Listen to a kafka topic
Create a spark stream
Write to an hbase table
<dependencies>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>2.10.4</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.10</artifactId>
<version>1.4.1</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_2.10</artifactId>
<version>1.4.1</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming-kafka_2.10</artifactId>
<version>1.4.1</version>
</dependency>
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka_2.10</artifactId>
<version>0.8.2.2</version>
</dependency>
<dependency>
<groupId>org.json</groupId>
<artifactId>json</artifactId>
<version>20150729</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-common</artifactId>
<version>1.1.1</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-server</artifactId>
<version>1.1.1</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>1.1.1</version>
</dependency>
<dependency>
<groupId>org.codehaus.jackson</groupId>
<artifactId>jackson-core-asl</artifactId>
<version>1.8.3</version>
</dependency>
<dependency>
<groupId>org.codehaus.jackson</groupId>
<artifactId>jackson-mapper-asl</artifactId>
<version>1.8.3</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.10</artifactId>
<version>1.4.1</version>
</dependency>

Related

Using confluent avro serializer in Java - NoSuchFieldError: PASSWORD

I'm using confluent avro producer in java but while running it getting below issue.I have added all the required jar .Don't have any clue what did i miss?
Exception in thread "main" java.lang.NoSuchFieldError: PASSWORD
at io.confluent.kafka.serializers.AbstractKafkaAvroSerDeConfig.baseConfigDef(AbstractKafkaAvroSerDeConfig.java:92)
at io.confluent.kafka.serializers.KafkaAvroSerializerConfig.<clinit>(KafkaAvroSerializerConfig.java:28)
at io.confluent.kafka.serializers.KafkaAvroSerializer.configure(KafkaAvroSerializer.java:48)
at org.apache.kafka.common.serialization.ExtendedSerializer$Wrapper.configure(ExtendedSerializer.java:60)
at org.apache.kafka.clients.producer.KafkaProducer.<init>(KafkaProducer.java:341)
at org.apache.kafka.clients.producer.KafkaProducer.<init>(KafkaProducer.java:302)
I have tried with below configurations.
Properties prop = new Properties();
prop.put("bootstrap.servers", "localhost:9092");
prop.put("schema.registry.url", "http://localhost:8081");
//prop.put("basic.auth.user.info", "");
prop.put("ssl.keystore.type",null);
prop.put("acks", "all");
prop.put("retries", "0");
prop.put("key.serializer", "io.confluent.kafka.serializers.KafkaAvroSerializer");
prop.put("value.serializer", "io.confluent.kafka.serializers.KafkaAvroSerializer");
prop.put("group.id", "test");
These are the dependency which i used it.All the jars with latest version.
<dependency>
<groupId>io.confluent</groupId>
<artifactId>kafka-schema-registry</artifactId>
<version>4.1.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/io.confluent/common-config -->
<dependency>
<groupId>io.confluent</groupId>
<artifactId>common-config</artifactId>
<version>3.3.1</version>
</dependency>
<!-- https://mvnrepository.com/artifact/io.confluent/common-utils -->
<dependency>
<groupId>io.confluent</groupId>
<artifactId>common-utils</artifactId>
<version>3.3.1</version>
</dependency>
<!-- https://mvnrepository.com/artifact/io.confluent/kafka-connect-avro-converter -->
<dependency>
<groupId>io.confluent</groupId>
<artifactId>kafka-connect-avro-converter</artifactId>
<version>5.2.1</version>
</dependency>
<!-- https://mvnrepository.com/artifact/io.confluent/kafka-schema-registry-client -->
<dependency>
<groupId>io.confluent</groupId>
<artifactId>kafka-schema-registry-client</artifactId>
<version>5.2.1</version>
</dependency>
<!-- https://mvnrepository.com/artifact/io.confluent.kafka/connect-utils -->
<dependency>
<groupId>io.confluent.kafka</groupId>
<artifactId>connect-utils</artifactId>
<version>0.1.18</version>
</dependency>
<dependency>
<groupId>io.confluent.kafka</groupId>
<artifactId>connect-checkstyle</artifactId>
<version>0.1.18</version>
</dependency>
<!-- https://mvnrepository.com/artifact/io.confluent/rest-utils -->
<dependency>
<groupId>io.confluent</groupId>
<artifactId>rest-utils</artifactId>
<version>4.1.0</version>
</dependency>
I've seen this error when mixing versions.
You should only need this as the common util, client, rest-utils, etc, are all transitive dependencies
<dependency>
<groupId>io.confluent</groupId>
<artifactId>kafka-avro-serializer</artifactId>
<version>5.2.1</version>
</dependency>
https://docs.confluent.io/current/app-development/index.html#native-clients-with-serializers
Remove the rest of the dependencies, or at least create a property for the version and re-use it

Which versions should be used for Spark cassandra connection using java Language?

I'm trying to connect spark and cassandra database using Java language. For connecting spark and cassandra I'm using latest version of Spark-cassandra-Connector i.e 2.4.0. Currently I can connect spark and cassandra using connector. I am getting data in RDD format but I can not read data from that data structure. If I used row reader factory as third parameter of cassandraTable() I am getting
> Wrong 3rd argument type. Found:
> 'java.lang.Class<com.journaldev.sparkdemo.JohnnyDeppDetails>',
> required:
> 'com.datastax.spark.connector.rdd.reader.RowReaderFactory<T>'
Can any one tell me which version I should use or what is problem here?
CassandraTableScanJavaRDD pricesRDD2 =
CassandraJavaUtil.javaFunctions(sc).cassandraTable(keyspace,table,JohnnyDeppDetails.class);
My pom.xml:
<!-- Import Spark -->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>2.4.0</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
<scope>test</scope>
</dependency>
<!-- https://mvnrepository.com/artifact/com.datastax.spark/spark-cassandra-connector -->
<dependency>
<groupId>com.datastax.spark</groupId>
<artifactId>spark-cassandra-connector_2.11</artifactId>
<version>2.4.0</version>
</dependency>
<dependency>
<groupId>com.datastax.spark</groupId>
<artifactId>spark-cassandra-connector-java_2.10</artifactId>
<version>1.5.0-M2</version>
</dependency>
<dependency>
<groupId>com.datastax.cassandra</groupId>
<artifactId>cassandra-driver-core</artifactId>
<version>2.1.9</version>
</dependency>
<dependency>
<groupId>com.datastax.cassandra</groupId>
<artifactId>cassandra-driver-mapping</artifactId>
<version>2.1.9</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.spark/spark-sql -->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.12</artifactId>
<version>2.4.0</version>
</dependency>
</dependencies>
Instead of passing the class instance, you need to create a RowReaderFactory using the mapRowTo function, like this (this is from my example):
CassandraJavaRDD<UUIDData> uuids = javaFunctions(spark.sparkContext())
.cassandraTable("test", "utest", mapRowTo(UUIDData.class));
when you'll write back, you can convert class into corresponding factory via mapToRow function.

Twitter Spark Streaming using Apache Spark 2.2.1 - Exception in thread "main" java.lang.NoClassDefFoundError: org/apache/spark/Logging [duplicate]

I'm always getting the following error.Can somebody help me please?
Exception in thread "main" java.lang.NoClassDefFoundError: org/apache/spark/Logging
at java.lang.ClassLoader.defineClass1(Native Method)
at java.lang.ClassLoader.defineClass(ClassLoader.java:763)
at java.security.SecureClassLoader.defineClass(SecureClassLoader.java:142)
at java.net.URLClassLoader.defineClass(URLClassLoader.java:467)
at java.net.URLClassLoader.access$100(URLClassLoader.java:73)
at java.net.URLClassLoader$1.run(URLClassLoader.java:368)
at java.net.URLClassLoader$1.run(URLClassLoader.java:362)
at java.security.AccessController.doPrivileged(Native Method)
at java.net.URLClassLoader.findClass(URLClassLoader.java:361)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:331)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
at com.datastax.spark.connector.japi.DStreamJavaFunctions.<init>(DStreamJavaFunctions.java:24)
at com.datastax.spark.connector.japi.CassandraStreamingJavaUtil.javaFunctions(CassandraStreamingJavaUtil.java:55)
at SparkStream.main(SparkStream.java:51)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at com.intellij.rt.execution.application.AppMain.main(AppMain.java:147)
Caused by: java.lang.ClassNotFoundException: org.apache.spark.Logging
at java.net.URLClassLoader.findClass(URLClassLoader.java:381)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:331)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
... 20 more
When I compile the following code. I've searched the web but didn't find a solution. I've got the error when I added the saveToCassandra.
import com.datastax.spark.connector.japi.CassandraStreamingJavaUtil;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.streaming.Duration;
import org.apache.spark.streaming.api.java.JavaDStream;
import org.apache.spark.streaming.api.java.JavaPairInputDStream;
import org.apache.spark.streaming.api.java.JavaStreamingContext;
import org.apache.spark.streaming.kafka.KafkaUtils;
import java.io.Serializable;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import static com.datastax.spark.connector.japi.CassandraJavaUtil.mapToRow;
/**
* Created by jonas on 10/10/16.
*/
public class SparkStream implements Serializable{
public static void main(String[] args) throws Exception{
SparkConf conf = new SparkConf(true)
.setAppName("TwitterToCassandra")
.setMaster("local[*]")
.set("spark.cassandra.connection.host", "127.0.0.1")
.set("spark.cassandra.connection.port", "9042");
;
JavaSparkContext sc = new JavaSparkContext(conf);
JavaStreamingContext ssc = new JavaStreamingContext(sc, new Duration(5000));
Map<String, String> kafkaParams = new HashMap<>();
kafkaParams.put("bootstrap.servers", "localhost:9092");
Set<String> topics = Collections.singleton("Test");
JavaPairInputDStream<String, String> directKafkaStream = KafkaUtils.createDirectStream(
ssc,
String.class,
String.class,
kafka.serializer.StringDecoder.class,
kafka.serializer.StringDecoder.class,
kafkaParams,
topics
);
JavaDStream<Tweet> createTweet = directKafkaStream.map(s -> createTweet(s._2));
CassandraStreamingJavaUtil.javaFunctions(createTweet)
.writerBuilder("mykeyspace", "rawtweet", mapToRow(Tweet.class))
.saveToCassandra();
ssc.start();
ssc.awaitTermination();
}
public static Tweet createTweet(String rawKafka){
String[] splitted = rawKafka.split("\\|");
Tweet t = new Tweet(splitted[0], splitted[1], splitted[2], splitted[3]);
return t;
}
}
My pom is the following.
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.company</groupId>
<artifactId>Sentiment</artifactId>
<version>1.0-SNAPSHOT</version>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
</plugins>
</build>
<repositories>
<repository>
<id>twitter4j.org</id>
<name>twitter4j.org Repository</name>
<url>http://twitter4j.org/maven2</url>
<releases>
<enabled>true</enabled>
</releases>
<snapshots>
<enabled>true</enabled>
</snapshots>
</repository>
</repositories>
<dependencies>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_2.11</artifactId>
<version>2.0.1</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.10</artifactId>
<version>2.0.0</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.10</artifactId>
<version>2.0.0</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming-kafka-0-8_2.11</artifactId>
<version>2.0.1</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.scala-lang/scala-library -->
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>2.11.8</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.datastax.spark/spark-cassandra-connector_2.10 -->
<dependency>
<groupId>com.datastax.spark</groupId>
<artifactId>spark-cassandra-connector_2.10</artifactId>
<version>1.6.2</version>
</dependency>
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka_2.10</artifactId>
<version>0.9.0.0</version>
</dependency>
<dependency>
<groupId>org.twitter4j</groupId>
<artifactId>twitter4j-core</artifactId>
<version>[4.0,)</version>
</dependency>
<dependency>
<groupId>org.twitter4j</groupId>
<artifactId>twitter4j-stream</artifactId>
<version>4.0.4</version>
</dependency>
<dependency>
<groupId>org.twitter4j</groupId>
<artifactId>twitter4j-async</artifactId>
<version>4.0.4</version>
</dependency>
</dependencies>
</project>
org.apache.spark.Logging is available in Spark version 1.5.2 or lower version. It is not in the 2.0.0. Pls change versions as follows
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_2.11</artifactId>
<version>1.5.2</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.10</artifactId>
<version>1.5.2</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.10</artifactId>
<version>1.5.2</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming-kafka-0-8_2.11</artifactId>
<version>1.6.2</version>
</dependency>
The error is because you are using Spark 2.0 libraries with the connector from Spark 1.6 (which looks for the Spark 1.6 logging class. Use the 2.0.5 version of the connector.
It's because of the missing of org.apache.spark.Logging class since 1.5.2, just like everybody say. (Only org.apache.spark.internal.Logging exists in later version... )
But it seems none of the solution against maven can solve this dependency, so I just try to add this class to lib manually. Here is my way to fixed the problem:
Package the scala org.apache.spark.internal.Logging into a public jar. Or download it from https://raw.githubusercontent.com/swordsmanliu/SparkStreamingHbase/master/lib/spark-core_2.11-1.5.2.logging.jar (Thanks to this host.)
Move the jar into your spark cluster's jars directory.
Submit your project again, wish it will help u.
I got the solution by changing above mention jar.
Initially, I was having degraded jar for spark-kafka-streaming:
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming-kafka-0-8_2.11</artifactId>
<version>2.1.1</version></dependency>
I also removed multilple sl4j-log4j.jars and log4j.jars which I had added externally from spark and kafka jar library.
if you're using IntelliJ, just check the 'include dependencies with provided scope' box and that will fix the issue for you without mucking around with the pom or manually downloading the files.
Download spark-core_2.11-1.5.2.logging.jar and use as --jar option
spark-submit --class com.SentimentTwiteer --packages "org.apache.spark:spark-streaming-twitter_2.11:1.6.3" --jars /root/Desktop/spark-core_2.11-1.5.2.logging.jar /root/Desktop/SentimentTwiteer.jar consumerKey consumerSecret accessToken accessTokenSecret yoursearchTag
https://github.com/sinhavicky4/SentimentTwiteer
One reason that may cause this problem is lib and class conflict.
I faced this problem and solved it using some maven exclusions:
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>2.0.0</version>
<scope>provided</scope>
<exclusions>
<exclusion>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_2.11</artifactId>
<version>2.0.0</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming-kafka-0-10_2.11</artifactId>
<version>2.0.0</version>
<exclusions>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
<exclusion>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
</exclusion>
</exclusions>
</dependency>
this pom.xml is resolve my issue:
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.10</artifactId>
<version>1.6.1</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_2.10</artifactId>
<version>1.6.1</version>
</dependency>
Downloaded the jar and use --jars in spark-submit ,worked for me
spark-submit --class com.SentimentTwiteer --packages "org.apache.spark:spark-streaming-twitter_2.11:1.6.3" --jars /root/Desktop/spark-core_2.11-1.5.2.logging.jar /root/Desktop/SentimentTwiteer.jar XX XX XX XX
Download the below Jar ,and it into you library ,and it will work as expected.
https://raw.githubusercontent.com/swordsmanliu/SparkStreamingHbase/master/lib/spark-core_2.11-1.5.2.logging.jar
its a version issue try with latest version
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId>
<version>2.1.0</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming-kafka-0-8_2.11</artifactId>
<version>2.1.1</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_2.11</artifactId>
<version>2.1.0</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>2.1.0</version>
</dependency>
Missing logging Jar in your dependency jars list. Try to download "spark-core_2.11-1.5.2.logging" jar from mvn repository, then add it as external jar to your spark project, you don't get "java.lang.NoClassDefFoundError: org/apache/spark/Logging" error.
Based on scala version you can download the jar{2.10,2.11,etc}.

java.lang.NoClassDefFoundError: org/apache/spark/Logging

I'm always getting the following error.Can somebody help me please?
Exception in thread "main" java.lang.NoClassDefFoundError: org/apache/spark/Logging
at java.lang.ClassLoader.defineClass1(Native Method)
at java.lang.ClassLoader.defineClass(ClassLoader.java:763)
at java.security.SecureClassLoader.defineClass(SecureClassLoader.java:142)
at java.net.URLClassLoader.defineClass(URLClassLoader.java:467)
at java.net.URLClassLoader.access$100(URLClassLoader.java:73)
at java.net.URLClassLoader$1.run(URLClassLoader.java:368)
at java.net.URLClassLoader$1.run(URLClassLoader.java:362)
at java.security.AccessController.doPrivileged(Native Method)
at java.net.URLClassLoader.findClass(URLClassLoader.java:361)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:331)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
at com.datastax.spark.connector.japi.DStreamJavaFunctions.<init>(DStreamJavaFunctions.java:24)
at com.datastax.spark.connector.japi.CassandraStreamingJavaUtil.javaFunctions(CassandraStreamingJavaUtil.java:55)
at SparkStream.main(SparkStream.java:51)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at com.intellij.rt.execution.application.AppMain.main(AppMain.java:147)
Caused by: java.lang.ClassNotFoundException: org.apache.spark.Logging
at java.net.URLClassLoader.findClass(URLClassLoader.java:381)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:331)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
... 20 more
When I compile the following code. I've searched the web but didn't find a solution. I've got the error when I added the saveToCassandra.
import com.datastax.spark.connector.japi.CassandraStreamingJavaUtil;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.streaming.Duration;
import org.apache.spark.streaming.api.java.JavaDStream;
import org.apache.spark.streaming.api.java.JavaPairInputDStream;
import org.apache.spark.streaming.api.java.JavaStreamingContext;
import org.apache.spark.streaming.kafka.KafkaUtils;
import java.io.Serializable;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import static com.datastax.spark.connector.japi.CassandraJavaUtil.mapToRow;
/**
* Created by jonas on 10/10/16.
*/
public class SparkStream implements Serializable{
public static void main(String[] args) throws Exception{
SparkConf conf = new SparkConf(true)
.setAppName("TwitterToCassandra")
.setMaster("local[*]")
.set("spark.cassandra.connection.host", "127.0.0.1")
.set("spark.cassandra.connection.port", "9042");
;
JavaSparkContext sc = new JavaSparkContext(conf);
JavaStreamingContext ssc = new JavaStreamingContext(sc, new Duration(5000));
Map<String, String> kafkaParams = new HashMap<>();
kafkaParams.put("bootstrap.servers", "localhost:9092");
Set<String> topics = Collections.singleton("Test");
JavaPairInputDStream<String, String> directKafkaStream = KafkaUtils.createDirectStream(
ssc,
String.class,
String.class,
kafka.serializer.StringDecoder.class,
kafka.serializer.StringDecoder.class,
kafkaParams,
topics
);
JavaDStream<Tweet> createTweet = directKafkaStream.map(s -> createTweet(s._2));
CassandraStreamingJavaUtil.javaFunctions(createTweet)
.writerBuilder("mykeyspace", "rawtweet", mapToRow(Tweet.class))
.saveToCassandra();
ssc.start();
ssc.awaitTermination();
}
public static Tweet createTweet(String rawKafka){
String[] splitted = rawKafka.split("\\|");
Tweet t = new Tweet(splitted[0], splitted[1], splitted[2], splitted[3]);
return t;
}
}
My pom is the following.
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.company</groupId>
<artifactId>Sentiment</artifactId>
<version>1.0-SNAPSHOT</version>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
</plugins>
</build>
<repositories>
<repository>
<id>twitter4j.org</id>
<name>twitter4j.org Repository</name>
<url>http://twitter4j.org/maven2</url>
<releases>
<enabled>true</enabled>
</releases>
<snapshots>
<enabled>true</enabled>
</snapshots>
</repository>
</repositories>
<dependencies>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_2.11</artifactId>
<version>2.0.1</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.10</artifactId>
<version>2.0.0</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.10</artifactId>
<version>2.0.0</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming-kafka-0-8_2.11</artifactId>
<version>2.0.1</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.scala-lang/scala-library -->
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>2.11.8</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.datastax.spark/spark-cassandra-connector_2.10 -->
<dependency>
<groupId>com.datastax.spark</groupId>
<artifactId>spark-cassandra-connector_2.10</artifactId>
<version>1.6.2</version>
</dependency>
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka_2.10</artifactId>
<version>0.9.0.0</version>
</dependency>
<dependency>
<groupId>org.twitter4j</groupId>
<artifactId>twitter4j-core</artifactId>
<version>[4.0,)</version>
</dependency>
<dependency>
<groupId>org.twitter4j</groupId>
<artifactId>twitter4j-stream</artifactId>
<version>4.0.4</version>
</dependency>
<dependency>
<groupId>org.twitter4j</groupId>
<artifactId>twitter4j-async</artifactId>
<version>4.0.4</version>
</dependency>
</dependencies>
</project>
org.apache.spark.Logging is available in Spark version 1.5.2 or lower version. It is not in the 2.0.0. Pls change versions as follows
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_2.11</artifactId>
<version>1.5.2</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.10</artifactId>
<version>1.5.2</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.10</artifactId>
<version>1.5.2</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming-kafka-0-8_2.11</artifactId>
<version>1.6.2</version>
</dependency>
The error is because you are using Spark 2.0 libraries with the connector from Spark 1.6 (which looks for the Spark 1.6 logging class. Use the 2.0.5 version of the connector.
It's because of the missing of org.apache.spark.Logging class since 1.5.2, just like everybody say. (Only org.apache.spark.internal.Logging exists in later version... )
But it seems none of the solution against maven can solve this dependency, so I just try to add this class to lib manually. Here is my way to fixed the problem:
Package the scala org.apache.spark.internal.Logging into a public jar. Or download it from https://raw.githubusercontent.com/swordsmanliu/SparkStreamingHbase/master/lib/spark-core_2.11-1.5.2.logging.jar (Thanks to this host.)
Move the jar into your spark cluster's jars directory.
Submit your project again, wish it will help u.
I got the solution by changing above mention jar.
Initially, I was having degraded jar for spark-kafka-streaming:
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming-kafka-0-8_2.11</artifactId>
<version>2.1.1</version></dependency>
I also removed multilple sl4j-log4j.jars and log4j.jars which I had added externally from spark and kafka jar library.
if you're using IntelliJ, just check the 'include dependencies with provided scope' box and that will fix the issue for you without mucking around with the pom or manually downloading the files.
Download spark-core_2.11-1.5.2.logging.jar and use as --jar option
spark-submit --class com.SentimentTwiteer --packages "org.apache.spark:spark-streaming-twitter_2.11:1.6.3" --jars /root/Desktop/spark-core_2.11-1.5.2.logging.jar /root/Desktop/SentimentTwiteer.jar consumerKey consumerSecret accessToken accessTokenSecret yoursearchTag
https://github.com/sinhavicky4/SentimentTwiteer
One reason that may cause this problem is lib and class conflict.
I faced this problem and solved it using some maven exclusions:
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>2.0.0</version>
<scope>provided</scope>
<exclusions>
<exclusion>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_2.11</artifactId>
<version>2.0.0</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming-kafka-0-10_2.11</artifactId>
<version>2.0.0</version>
<exclusions>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
<exclusion>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
</exclusion>
</exclusions>
</dependency>
this pom.xml is resolve my issue:
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.10</artifactId>
<version>1.6.1</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_2.10</artifactId>
<version>1.6.1</version>
</dependency>
Downloaded the jar and use --jars in spark-submit ,worked for me
spark-submit --class com.SentimentTwiteer --packages "org.apache.spark:spark-streaming-twitter_2.11:1.6.3" --jars /root/Desktop/spark-core_2.11-1.5.2.logging.jar /root/Desktop/SentimentTwiteer.jar XX XX XX XX
Download the below Jar ,and it into you library ,and it will work as expected.
https://raw.githubusercontent.com/swordsmanliu/SparkStreamingHbase/master/lib/spark-core_2.11-1.5.2.logging.jar
its a version issue try with latest version
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId>
<version>2.1.0</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming-kafka-0-8_2.11</artifactId>
<version>2.1.1</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_2.11</artifactId>
<version>2.1.0</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>2.1.0</version>
</dependency>
Missing logging Jar in your dependency jars list. Try to download "spark-core_2.11-1.5.2.logging" jar from mvn repository, then add it as external jar to your spark project, you don't get "java.lang.NoClassDefFoundError: org/apache/spark/Logging" error.
Based on scala version you can download the jar{2.10,2.11,etc}.

HBase Java API for HBaseTable creation shows Error

I am a beginner in Hadoop and Hbase. I am getting error when I try to create an HBase Table using Java API. My project is a maven Project. I am using cloudera machine for running my application. Code sample and error and POM.xml is given below
public static void main(String[] args) throws IOException {
// Instantiating configuration class
Configuration con = HBaseConfiguration.create();
HBaseAdmin admin = new HBaseAdmin(con);
// Instantiating table descriptor class
HTableDescriptor tableDescriptor = new HTableDescriptor(TableName.valueOf("empdata"));
// Adding column families to table descriptor
tableDescriptor.addFamily(new HColumnDescriptor("personal"));
tableDescriptor.addFamily(new HColumnDescriptor("official"));
// Execute the table through admin
admin.createTable(tableDescriptor);
System.out.println(" Table created ");
}
Command I am giving like this :
-bash-4.1$ hadoop jar HBaseTableCreation-1.0-SNAPSHOT.jar com.HBaseTable
Exception in thread "main" java.lang.NoClassDefFoundError: org/apache/hadoop/hbase/HBaseConfiguration
at com.Feathersoft.HBaseTable.main(HBaseTable.java:17)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at org.apache.hadoop.util.RunJar.main(RunJar.java:212)
Caused by: java.lang.ClassNotFoundException: org.apache.hadoop.hbase.HBaseConfiguration
at java.net.URLClassLoader$1.run(URLClassLoader.java:366)
at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
at java.security.AccessController.doPrivileged(Native Method)
at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
at java.lang.ClassLoader.loadClass(ClassLoader.java:425)
at java.lang.ClassLoader.loadClass(ClassLoader.java:358)
... 6 more
-bash-4.1$
POM.xml dependancies
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-common</artifactId>
<version>0.98.8-hadoop2</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-server</artifactId>
<version>0.95.0</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-shell</artifactId>
<version>0.98.8-hadoop2</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-protocol</artifactId>
<version>0.98.8-hadoop2</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-thrift</artifactId>
<version>0.98.8-hadoop2</version>
</dependency>
<dependency>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
<version>3.4.5</version>
</dependency>
<dependency>
<groupId>com.google.protobuf</groupId>
<artifactId>protobuf-java</artifactId>
<version>2.4.0a</version>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>11.0.2</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.0.0-cdh4.2.0</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>0.98.8-hadoop2</version>
</dependency>
How can I set classpath in Cloudera Machine. I am not so much familiar in Linux.
Can any one help, It would be great ....

Categories

Resources