//package com.examples
/**
* Created by kalit_000 on 27/09/2015.
*/
import org.apache.spark.SparkConf
import org.apache.log4j.Logger
import org.apache.log4j.Level
import org.apache.spark._
import java.sql.{ResultSet, DriverManager, Connection}
import kafka.producer.KeyedMessage
import kafka.producer.Producer
import kafka.producer.ProducerConfig
import java.util.Properties
import org.apache.spark.streaming.{Seconds,StreamingContext}
import org.apache.spark._
object SqlServerKafkaProducer {
def main(args: Array[String]): Unit =
{
Logger.getLogger("org").setLevel(Level.WARN)
Logger.getLogger("akka").setLevel(Level.WARN)
val conf = new SparkConf().setMaster("local[2]").setAppName("MSSQL_KAFKA_PRODUCER")
val sc=new SparkContext(conf)
val driver = "com.microsoft.sqlserver.jdbc.SQLServerDriver"
val url = "jdbc:sqlserver://localhost;user=admin;password=oracle;database=AdventureWorks2014"
val username = "admin"
val password = "oracle"
var connection: Connection = null
Class.forName(driver)
/*Create connection and statement to run against sql server and execute*/
connection = DriverManager.getConnection(url, username, password)
val statement = connection.createStatement()
val resultSet = statement.executeQuery("select top 10 CustomerID,StoreID,TerritoryID,AccountNumber from AdventureWorks2014.dbo.Customer")
resultSet.setFetchSize(10);
val columnnumber = resultSet.getMetaData().getColumnCount.toInt
/*OP COLUMN NAMES*/
var i = 0.toInt;
for (i <- 1 to columnnumber.toInt)
{
val columnname=resultSet.getMetaData().getColumnName(i)
println("Column Names are:- %s".format(columnname))
}
/*OP DATA*/
while (resultSet.next())
{
var list = new java.util.ArrayList[String]()
for (i <- 1 to columnnumber.toInt)
{
list.add(resultSet.getObject(i).toString())
//println("Column Names are:- %s".format(columnname))
}
println(list)
/*Buils kafka properties file*/
val props:Properties = new Properties()
props.put("metadata.broker.list", "localhost:9092")
props.put("serializer.class", "kafka.serializer.StringEncoder")
/*send message using kafka producer.send to topic trade*/
val config= new ProducerConfig(props)
val producer= new Producer[String,String](config)
//val x=list.collect().mkString("\n").replace("[","").replace("]","").replace(",","~")
producer.send(new KeyedMessage[String, String]("trade", list.toString().replace("[","").replace("]","").replace(",","~")))
}
/*close SQL Server database connection*/
connection.close()
}
}
I built jar using Maven in intellijIDEA this is scala spark project the jar file is created using under folder (C:\Users\kalit_000\IdeaProjects\SparkCookBook\target\SparkCookBook-0.0.1-SNAPSHOT-jar-with-dependencies.jar) when I tried to run the jar file using command
scala -classpath "C:\Users\kalit_000\IdeaProjects\SparkCookBook\target\SparkCookBook-1.0-SNAPSHOT-jar-with-dependencies.jar" SqlServerKafkaProducer
I am getting error which says
Error:-
No such file or class on classpath: SqlServerKafkaProducer.class
I can see my class inside the jar file I used Java decompiler software to open up Jar file.
can anyone help ?
I am able to compile in intellij Idea successfully.
Related
I am attempting to read avro data from Kafka using Spark Streaming but I receive the following error message:
Streaming Query Exception caught!: org.apache.spark.sql.streaming.StreamingQueryException: Job aborted.
=== Streaming Query ===
Identifier: [id = 8b54c92d-6bbc-4dbc-84d0-55b762c21ba2, runId = 4bc92b3c-343e-4886-b0bc-0777b89f9ec8]
Current Committed Offsets: {KafkaV2[Subscribe[customer-avro4]]: {"customer-avro":{"0":17}}}
Current Available Offsets: {KafkaV2[Subscribe[customer-avro4]]: {"customer-avro":{"0":20}}}
Current State: ACTIVE
Thread State: RUNNABLE
Any idea on what the issue might be and how to resolve it? Code is the following (inspired from xebia-france spark-structured-streaming-blog). Actually, I think it ran earlier already but now there is a problem.
import com.databricks.spark.avro.SchemaConverters
import io.confluent.kafka.schemaregistry.client.{CachedSchemaRegistryClient, SchemaRegistryClient}
import io.confluent.kafka.serializers.AbstractKafkaAvroDeserializer
import org.apache.avro.Schema
import org.apache.avro.generic.GenericRecord
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.streaming.StreamingQueryException
object AvroConsumer {
private val topic = "customer-avro4"
private val kafkaUrl = "http://localhost:9092"
private val schemaRegistryUrl = "http://localhost:8081"
private val schemaRegistryClient = new CachedSchemaRegistryClient(schemaRegistryUrl, 128)
private val kafkaAvroDeserializer = new AvroDeserializer(schemaRegistryClient)
private val avroSchema = schemaRegistryClient.getLatestSchemaMetadata(topic + "-value").getSchema
private val sparkSchema = SchemaConverters.toSqlType(new Schema.Parser().parse(avroSchema))
def main(args: Array[String]): Unit = {
val spark = SparkSession
.builder
.appName("ConfluentConsumer")
.master("local[*]")
.getOrCreate()
spark.sparkContext.setLogLevel("ERROR")
spark.udf.register("deserialize", (bytes: Array[Byte]) =>
DeserializerWrapper.deserializer.deserialize(bytes)
)
val kafkaDataFrame = spark
.readStream
.format("kafka")
.option("kafka.bootstrap.servers", kafkaUrl)
.option("subscribe", topic)
.load()
val valueDataFrame = kafkaDataFrame.selectExpr("""deserialize(value) AS message""")
import org.apache.spark.sql.functions._
val formattedDataFrame = valueDataFrame.select(
from_json(col("message"), sparkSchema.dataType).alias("parsed_value"))
.select("parsed_value.*")
val writer = formattedDataFrame
.writeStream
.format("parquet")
.option("checkpointLocation", "hdfs://localhost:9000/data/spark/parquet/checkpoint")
while (true) {
val query = writer.start("hdfs://localhost:9000/data/spark/parquet/total")
try {
query.awaitTermination()
}
catch {
case e: StreamingQueryException => println("Streaming Query Exception caught!: " + e);
}
}
}
object DeserializerWrapper {
val deserializer: AvroDeserializer = kafkaAvroDeserializer
}
class AvroDeserializer extends AbstractKafkaAvroDeserializer {
def this(client: SchemaRegistryClient) {
this()
this.schemaRegistry = client
}
override def deserialize(bytes: Array[Byte]): String = {
val genericRecord = super.deserialize(bytes).asInstanceOf[GenericRecord]
genericRecord.toString
}
}
}
Figured it out - the problem was not as I had thought with the Spark-Kafka integration directly, but with the checkpoint information inside the hdfs filesystem instead. Deleting and recreating the checkpoint folder in hdfs solved it for me.
How can I connect to Databricks Delta table using JDBC?
I have tried connecting simba driver but im getting hard time for driver class name and url configuration.
Any solution is appreciated.
I cannot paste code here as its company code.
Thanks in advance.
Check this link below. This has steps to configure delta using JDBC
http://sedeks.blogspot.com/2019/05/how-to-connect-to-databricks-delta.html
code provided in this link:
import java.sql.DriverManager
import java.sql.Driver
import java.sql.Connection
import javax.sql.DataSource
object ScalaJdbcConnectSelect {
def main(args: Array[String]) {
val driver = "com.simba.spark.jdbc41.Driver" //attach the Spark jar to the Classpath.
val url = "jdbc:spark://field-eng.cloud.databricks.com:443/default;transportMode=http;ssl=true;httpPath=sql/protocolvl/o/0/0911-153027-hopes19";
val username = "token"
val password = "<token-value>" //Token generated from databricks profile page.
var connection:Connection = null
try {
// Create the connection
Class.forName(driver)
connection = DriverManager.getConnection(url, username, password)
if(connection != null){
println("Connection Established");
}
else {
println("Connection Failed");
}
// create the statement
val statement = connection.createStatement()
val resultSet = statement.executeQuery("<<Query>") // Profile your query here.
while ( resultSet.next() ) {
// Iterate through Result set
}
catch {
case e => e.printStackTrace
}
connection.close()
}
}
I am using the H2 in-memory database in my grails project. My application is running properly with the H2 database.
I want to connect with the H2 database using groovy to get the data from the database .
import groovy.sql.Sql
import java.sql.Driver
class psqlh2 {
static void main(String[] args) {
def driver = Class.forName('org.h2.Driver').newInstance() as Driver
def props = new Properties()
props.setProperty("user", "sa")
props.setProperty("password", "")
def conn = driver.connect("jdbc:h2:mem:~/databaseName;DB_CLOSE_DELAY=-1",props)
def sql = new Sql(conn)
def query = "SELECT * FROM company"
try {
sql.eachRow(query) { row ->
println(row)
}
} finally {
sql.close()
conn.close()
}
}
WARNING: Failed to execute: SELECT * FROM company because: Table "COMPANY" not found; SQL statement:
SELECT * FROM company [42102-199]
Exception in thread "main" org.h2.jdbc.JdbcSQLSyntaxErrorException: Table "COMPANY" not found;
Please help me out.
Replace mem to file
def driver = Class.forName('org.h2.Driver').newInstance() as Driver
def props = new Properties()
props.setProperty("user", username)
props.setProperty("password", password)
return driver.connect("jdbc:h2:file:${absolutePath};DB_CLOSE_DELAY=-1;IFEXISTS=true", props)
Also make sure use the same version of H2 jar, which is using by h2-server.
I am creating spark jar file with following scala code embedded in it:
import com.typesafe.config.ConfigFactory
object GetRequest {
def main(args: Array[String]): Unit = {
val api_credentials = ConfigFactory.load("application.conf")
val username = api_credentials.getString("pi.api.username")
val password = api_credentials.getString("pi.api.password")
}
While submitting the jar,it is not able to find application.conf file which is inside path C:\Users\abc\Desktop\ApiSparkJob\resource.How to mention the same in spark-submit command in cli?
The resource file bundled inside a jar wouldn't be available for each spark worker therefore you need to pass the file using --files argument
--files application.conf
If your resource manager is YARN, refer to the below code
import org.apache.hadoop.fs.{FileSystem, Path}
import java.io.{BufferedReader, File, InputStreamReader}
import com.typesafe.config.{Config, ConfigFactory}
import org.apache.spark.sql.SparkSession
object GetRequest {
def main(args: Array[String]): Unit = {
val sparkSession: SparkSession = SparkSession.builder.enableHiveSupport().getOrCreate()
val yarnStagingDir: String = System.getenv("SPARK_YARN_STAGING_DIR")
val confFile: Path = new Path(yarnStagingDir.concat("/application.conf")
val fs: FileSystem = FileSystem.get(sparkSession.sparkContext.hadoopConfiguration)
val br: BufferedReader = new BufferedReader(new InputStreamReader(fileSystem.open(confFile)))
val api_credentials: Config = ConfigFactory.parseReader(br).resolve()
val username: String = api_credentials.getString("pi.api.username")
val password: String = api_credentials.getString("pi.api.password")
br.close()
}
}
// Don't close the filesystem fs.close() as it ends your job since same filesystem is used to access hive warehouse directory.
I am trying to set sonarqube settings in Jenkins system property using groovy init script but I am getting below error. Can somebody help me to resolve this?
Error
+++++
groovy.lang.GroovyRuntimeException: Could not find matching constructor for:
hudson.plugins.sonar.SonarInstallation(java.lang.String, java.lang.String,
java.lang.String, hudson.plugins.sonar.model.TriggersConfig,
java.lang.String)
at groovy.lang.MetaClassImpl.invokeConstructor(MetaClassImpl.java:1732)
at groovy.lang.MetaClassImpl.invokeConstructor(MetaClassImpl.java:1532)
This is the script that I am using
import hudson.model.*
import jenkins.model.*
import hudson.plugins.sonar.SonarGlobalConfiguration
import hudson.plugins.sonar.*
import hudson.plugins.sonar.model.TriggersConfig
import hudson.tools.*
def inst = Jenkins.getInstance()
println "--> Configuring SonarQube"
SonarGlobalConfiguration global = Hudson.instance.getDescriptorByType(SonarGlobalConfiguration.class)
def sonar_inst = new SonarInstallation(
"SonarQ",
"http://localhost:9000",
"yy", // Token
new TriggersConfig(),
""
)
// Only add ADOP Sonar if it does not exist - do not overwrite existing config
def sonar_installations = sonar_conf.getInstallations()
def sonar_inst_exists = false
sonar_installations.each {
installation = (SonarInstallation) it
if (sonar_inst.getName() == installation.getName()) {
sonar_inst_exists = true
println("Found existing installation: " + installation.getName())
}
}
if (!sonar_inst_exists) {
sonar_installations += sonar_inst
sonar_conf.setInstallations((SonarInstallation[]) sonar_installations)
sonar_conf.save()
}
You missed some parameters. SonarInstallation constructor needs 7 parameters, not 5:
#DataBoundConstructor
public SonarInstallation(String name,
String serverUrl, String serverAuthenticationToken,
String mojoVersion, String additionalProperties, TriggersConfig triggers,
String additionalAnalysisProperties) {
this.name = name;
this.serverUrl = serverUrl;
this.serverAuthenticationToken = serverAuthenticationToken;
this.additionalAnalysisProperties = additionalAnalysisProperties;
this.mojoVersion = mojoVersion;
this.additionalProperties = additionalProperties;
this.triggers = triggers;
}