No such file or class on classpath (scala) - java

//package com.examples
/**
* Created by kalit_000 on 27/09/2015.
*/
import org.apache.spark.SparkConf
import org.apache.log4j.Logger
import org.apache.log4j.Level
import org.apache.spark._
import java.sql.{ResultSet, DriverManager, Connection}
import kafka.producer.KeyedMessage
import kafka.producer.Producer
import kafka.producer.ProducerConfig
import java.util.Properties
import org.apache.spark.streaming.{Seconds,StreamingContext}
import org.apache.spark._
object SqlServerKafkaProducer {
def main(args: Array[String]): Unit =
{
Logger.getLogger("org").setLevel(Level.WARN)
Logger.getLogger("akka").setLevel(Level.WARN)
val conf = new SparkConf().setMaster("local[2]").setAppName("MSSQL_KAFKA_PRODUCER")
val sc=new SparkContext(conf)
val driver = "com.microsoft.sqlserver.jdbc.SQLServerDriver"
val url = "jdbc:sqlserver://localhost;user=admin;password=oracle;database=AdventureWorks2014"
val username = "admin"
val password = "oracle"
var connection: Connection = null
Class.forName(driver)
/*Create connection and statement to run against sql server and execute*/
connection = DriverManager.getConnection(url, username, password)
val statement = connection.createStatement()
val resultSet = statement.executeQuery("select top 10 CustomerID,StoreID,TerritoryID,AccountNumber from AdventureWorks2014.dbo.Customer")
resultSet.setFetchSize(10);
val columnnumber = resultSet.getMetaData().getColumnCount.toInt
/*OP COLUMN NAMES*/
var i = 0.toInt;
for (i <- 1 to columnnumber.toInt)
{
val columnname=resultSet.getMetaData().getColumnName(i)
println("Column Names are:- %s".format(columnname))
}
/*OP DATA*/
while (resultSet.next())
{
var list = new java.util.ArrayList[String]()
for (i <- 1 to columnnumber.toInt)
{
list.add(resultSet.getObject(i).toString())
//println("Column Names are:- %s".format(columnname))
}
println(list)
/*Buils kafka properties file*/
val props:Properties = new Properties()
props.put("metadata.broker.list", "localhost:9092")
props.put("serializer.class", "kafka.serializer.StringEncoder")
/*send message using kafka producer.send to topic trade*/
val config= new ProducerConfig(props)
val producer= new Producer[String,String](config)
//val x=list.collect().mkString("\n").replace("[","").replace("]","").replace(",","~")
producer.send(new KeyedMessage[String, String]("trade", list.toString().replace("[","").replace("]","").replace(",","~")))
}
/*close SQL Server database connection*/
connection.close()
}
}
I built jar using Maven in intellijIDEA this is scala spark project the jar file is created using under folder (C:\Users\kalit_000\IdeaProjects\SparkCookBook\target\SparkCookBook-0.0.1-SNAPSHOT-jar-with-dependencies.jar) when I tried to run the jar file using command
scala -classpath "C:\Users\kalit_000\IdeaProjects\SparkCookBook\target\SparkCookBook-1.0-SNAPSHOT-jar-with-dependencies.jar" SqlServerKafkaProducer
I am getting error which says
Error:-
No such file or class on classpath: SqlServerKafkaProducer.class
I can see my class inside the jar file I used Java decompiler software to open up Jar file.
can anyone help ?
I am able to compile in intellij Idea successfully.

Related

How to resolve current committed offsets differing from current available offsets?

I am attempting to read avro data from Kafka using Spark Streaming but I receive the following error message:
Streaming Query Exception caught!: org.apache.spark.sql.streaming.StreamingQueryException: Job aborted.
=== Streaming Query ===
Identifier: [id = 8b54c92d-6bbc-4dbc-84d0-55b762c21ba2, runId = 4bc92b3c-343e-4886-b0bc-0777b89f9ec8]
Current Committed Offsets: {KafkaV2[Subscribe[customer-avro4]]: {"customer-avro":{"0":17}}}
Current Available Offsets: {KafkaV2[Subscribe[customer-avro4]]: {"customer-avro":{"0":20}}}
Current State: ACTIVE
Thread State: RUNNABLE
Any idea on what the issue might be and how to resolve it? Code is the following (inspired from xebia-france spark-structured-streaming-blog). Actually, I think it ran earlier already but now there is a problem.
import com.databricks.spark.avro.SchemaConverters
import io.confluent.kafka.schemaregistry.client.{CachedSchemaRegistryClient, SchemaRegistryClient}
import io.confluent.kafka.serializers.AbstractKafkaAvroDeserializer
import org.apache.avro.Schema
import org.apache.avro.generic.GenericRecord
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.streaming.StreamingQueryException
object AvroConsumer {
private val topic = "customer-avro4"
private val kafkaUrl = "http://localhost:9092"
private val schemaRegistryUrl = "http://localhost:8081"
private val schemaRegistryClient = new CachedSchemaRegistryClient(schemaRegistryUrl, 128)
private val kafkaAvroDeserializer = new AvroDeserializer(schemaRegistryClient)
private val avroSchema = schemaRegistryClient.getLatestSchemaMetadata(topic + "-value").getSchema
private val sparkSchema = SchemaConverters.toSqlType(new Schema.Parser().parse(avroSchema))
def main(args: Array[String]): Unit = {
val spark = SparkSession
.builder
.appName("ConfluentConsumer")
.master("local[*]")
.getOrCreate()
spark.sparkContext.setLogLevel("ERROR")
spark.udf.register("deserialize", (bytes: Array[Byte]) =>
DeserializerWrapper.deserializer.deserialize(bytes)
)
val kafkaDataFrame = spark
.readStream
.format("kafka")
.option("kafka.bootstrap.servers", kafkaUrl)
.option("subscribe", topic)
.load()
val valueDataFrame = kafkaDataFrame.selectExpr("""deserialize(value) AS message""")
import org.apache.spark.sql.functions._
val formattedDataFrame = valueDataFrame.select(
from_json(col("message"), sparkSchema.dataType).alias("parsed_value"))
.select("parsed_value.*")
val writer = formattedDataFrame
.writeStream
.format("parquet")
.option("checkpointLocation", "hdfs://localhost:9000/data/spark/parquet/checkpoint")
while (true) {
val query = writer.start("hdfs://localhost:9000/data/spark/parquet/total")
try {
query.awaitTermination()
}
catch {
case e: StreamingQueryException => println("Streaming Query Exception caught!: " + e);
}
}
}
object DeserializerWrapper {
val deserializer: AvroDeserializer = kafkaAvroDeserializer
}
class AvroDeserializer extends AbstractKafkaAvroDeserializer {
def this(client: SchemaRegistryClient) {
this()
this.schemaRegistry = client
}
override def deserialize(bytes: Array[Byte]): String = {
val genericRecord = super.deserialize(bytes).asInstanceOf[GenericRecord]
genericRecord.toString
}
}
}
Figured it out - the problem was not as I had thought with the Spark-Kafka integration directly, but with the checkpoint information inside the hdfs filesystem instead. Deleting and recreating the checkpoint folder in hdfs solved it for me.

How to Connect to Databricks Delta table using JDBC driver

How can I connect to Databricks Delta table using JDBC?
I have tried connecting simba driver but im getting hard time for driver class name and url configuration.
Any solution is appreciated.
I cannot paste code here as its company code.
Thanks in advance.
Check this link below. This has steps to configure delta using JDBC
http://sedeks.blogspot.com/2019/05/how-to-connect-to-databricks-delta.html
code provided in this link:
import java.sql.DriverManager
import java.sql.Driver
import java.sql.Connection
import javax.sql.DataSource
object ScalaJdbcConnectSelect {
def main(args: Array[String]) {
val driver = "com.simba.spark.jdbc41.Driver" //attach the Spark jar to the Classpath.
val url = "jdbc:spark://field-eng.cloud.databricks.com:443/default;transportMode=http;ssl=true;httpPath=sql/protocolvl/o/0/0911-153027-hopes19";
val username = "token"
val password = "<token-value>" //Token generated from databricks profile page.
var connection:Connection = null
try {
// Create the connection
Class.forName(driver)
connection = DriverManager.getConnection(url, username, password)
if(connection != null){
println("Connection Established");
}
else {
println("Connection Failed");
}
// create the statement
val statement = connection.createStatement()
val resultSet = statement.executeQuery("<<Query>") // Profile your query here.
while ( resultSet.next() ) {
// Iterate through Result set
}
catch {
case e => e.printStackTrace
}
connection.close()
}
}

Table "myTable" not found error in while accessing the H2 DB

I am using the H2 in-memory database in my grails project. My application is running properly with the H2 database.
I want to connect with the H2 database using groovy to get the data from the database .
import groovy.sql.Sql
import java.sql.Driver
class psqlh2 {
static void main(String[] args) {
def driver = Class.forName('org.h2.Driver').newInstance() as Driver
def props = new Properties()
props.setProperty("user", "sa")
props.setProperty("password", "")
def conn = driver.connect("jdbc:h2:mem:~/databaseName;DB_CLOSE_DELAY=-1",props)
def sql = new Sql(conn)
def query = "SELECT * FROM company"
try {
sql.eachRow(query) { row ->
println(row)
}
} finally {
sql.close()
conn.close()
}
}
WARNING: Failed to execute: SELECT * FROM company because: Table "COMPANY" not found; SQL statement:
SELECT * FROM company [42102-199]
Exception in thread "main" org.h2.jdbc.JdbcSQLSyntaxErrorException: Table "COMPANY" not found;
Please help me out.
Replace mem to file
def driver = Class.forName('org.h2.Driver').newInstance() as Driver
def props = new Properties()
props.setProperty("user", username)
props.setProperty("password", password)
return driver.connect("jdbc:h2:file:${absolutePath};DB_CLOSE_DELAY=-1;IFEXISTS=true", props)
Also make sure use the same version of H2 jar, which is using by h2-server.

Add custom file to jar path in spark-submit cli

I am creating spark jar file with following scala code embedded in it:
import com.typesafe.config.ConfigFactory
object GetRequest {
def main(args: Array[String]): Unit = {
val api_credentials = ConfigFactory.load("application.conf")
val username = api_credentials.getString("pi.api.username")
val password = api_credentials.getString("pi.api.password")
}
While submitting the jar,it is not able to find application.conf file which is inside path C:\Users\abc\Desktop\ApiSparkJob\resource.How to mention the same in spark-submit command in cli?
The resource file bundled inside a jar wouldn't be available for each spark worker therefore you need to pass the file using --files argument
--files application.conf
If your resource manager is YARN, refer to the below code
import org.apache.hadoop.fs.{FileSystem, Path}
import java.io.{BufferedReader, File, InputStreamReader}
import com.typesafe.config.{Config, ConfigFactory}
import org.apache.spark.sql.SparkSession
object GetRequest {
def main(args: Array[String]): Unit = {
val sparkSession: SparkSession = SparkSession.builder.enableHiveSupport().getOrCreate()
val yarnStagingDir: String = System.getenv("SPARK_YARN_STAGING_DIR")
val confFile: Path = new Path(yarnStagingDir.concat("/application.conf")
val fs: FileSystem = FileSystem.get(sparkSession.sparkContext.hadoopConfiguration)
val br: BufferedReader = new BufferedReader(new InputStreamReader(fileSystem.open(confFile)))
val api_credentials: Config = ConfigFactory.parseReader(br).resolve()
val username: String = api_credentials.getString("pi.api.username")
val password: String = api_credentials.getString("pi.api.password")
br.close()
}
}
// Don't close the filesystem fs.close() as it ends your job since same filesystem is used to access hive warehouse directory.

Jenkins groovy init script for sonarqube configuration

I am trying to set sonarqube settings in Jenkins system property using groovy init script but I am getting below error. Can somebody help me to resolve this?
Error
+++++
groovy.lang.GroovyRuntimeException: Could not find matching constructor for:
hudson.plugins.sonar.SonarInstallation(java.lang.String, java.lang.String,
java.lang.String, hudson.plugins.sonar.model.TriggersConfig,
java.lang.String)
at groovy.lang.MetaClassImpl.invokeConstructor(MetaClassImpl.java:1732)
at groovy.lang.MetaClassImpl.invokeConstructor(MetaClassImpl.java:1532)
This is the script that I am using
import hudson.model.*
import jenkins.model.*
import hudson.plugins.sonar.SonarGlobalConfiguration
import hudson.plugins.sonar.*
import hudson.plugins.sonar.model.TriggersConfig
import hudson.tools.*
def inst = Jenkins.getInstance()
println "--> Configuring SonarQube"
SonarGlobalConfiguration global = Hudson.instance.getDescriptorByType(SonarGlobalConfiguration.class)
def sonar_inst = new SonarInstallation(
"SonarQ",
"http://localhost:9000",
"yy", // Token
new TriggersConfig(),
""
)
// Only add ADOP Sonar if it does not exist - do not overwrite existing config
def sonar_installations = sonar_conf.getInstallations()
def sonar_inst_exists = false
sonar_installations.each {
installation = (SonarInstallation) it
if (sonar_inst.getName() == installation.getName()) {
sonar_inst_exists = true
println("Found existing installation: " + installation.getName())
}
}
if (!sonar_inst_exists) {
sonar_installations += sonar_inst
sonar_conf.setInstallations((SonarInstallation[]) sonar_installations)
sonar_conf.save()
}
You missed some parameters. SonarInstallation constructor needs 7 parameters, not 5:
#DataBoundConstructor
public SonarInstallation(String name,
String serverUrl, String serverAuthenticationToken,
String mojoVersion, String additionalProperties, TriggersConfig triggers,
String additionalAnalysisProperties) {
this.name = name;
this.serverUrl = serverUrl;
this.serverAuthenticationToken = serverAuthenticationToken;
this.additionalAnalysisProperties = additionalAnalysisProperties;
this.mojoVersion = mojoVersion;
this.additionalProperties = additionalProperties;
this.triggers = triggers;
}

Categories

Resources