kafka to hdfs with confluent source code - java

For the requirement of my project, I need to build a class from the confluent java code to write data from kafka topic to the hdfs filesystem.
It is actually working in CLI with connect-standalone, but I need to do the same thing with the source code which I built successfully.
I have a problem with SinkTask and hdfsConnector classes.
An exception is showing up in the put method.
Here below is my class code:
package io.confluent.connect.hdfs;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import org.apache.kafka.connect.errors.ConnectException;
import org.apache.kafka.connect.sink.SinkConnector;
import org.apache.kafka.connect.sink.SinkRecord;
import org.apache.kafka.connect.sink.SinkTaskContext;
import io.confluent.connect.avro.AvroData;
import io.confluent.connect.hdfs.avro.AvroFormat;
import io.confluent.connect.hdfs.partitioner.DefaultPartitioner;
import io.confluent.connect.storage.common.StorageCommonConfig;
import io.confluent.connect.storage.partitioner.PartitionerConfig;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.common.config.ConfigDef;
public class main{
private static Map<String, String> props = new HashMap<>();
protected static final TopicPartition TOPIC_PARTITION = new TopicPartition(TOPIC, PARTITION);
protected static String url = "hdfs://localhost:9000";
protected static SinkTaskContext context;
public static void main(String[] args) {
HdfsSinkConnector hk = new HdfsSinkConnector();
HdfsSinkTask h = new HdfsSinkTask();
props.put(StorageCommonConfig.STORE_URL_CONFIG, url);
props.put(HdfsSinkConnectorConfig.HDFS_URL_CONFIG, url);
props.put(HdfsSinkConnectorConfig.FLUSH_SIZE_CONFIG, "3");
props.put(HdfsSinkConnectorConfig.FORMAT_CLASS_CONFIG, AvroFormat.class.getName());
try {
hk.start(props);
Collection<SinkRecord> sinkRecords = new ArrayList<>();
SinkRecord record = new SinkRecord("test", 0, null, null, null, null, 0);
sinkRecords.add(record);
h.initialize(context);
h.put(sinkRecords);
hk.stop();
} catch (Exception e) {
throw new ConnectException("Couldn't start HdfsSinkConnector due to configuration error", e);
}
}
}

Related

Converting Shape File to RDF document, in Java

import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.net.URI;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import javax.management.AttributeChangeNotification;
import org.apache.jena.datatypes.xsd.XSDDatatype;
import org.apache.jena.rdf.model.Model;
import org.apache.jena.rdf.model.ModelFactory;
import org.apache.jena.rdf.model.Property;
import org.apache.jena.rdf.model.RDFNode;
import org.apache.jena.rdf.model.RDFReaderI;
import org.apache.jena.rdf.model.Resource;
import org.apache.jena.rdf.model.Statement;
import org.apache.jena.rdf.model.StmtIterator;
import org.apache.jena.riot.Lang;
import org.apache.jena.riot.RDFDataMgr;
import org.apache.jena.riot.system.StreamRDFWriter;
import org.apache.jena.vocabulary.VCARD;
import org.geotools.data.DataStore;
import org.geotools.data.DataStoreFinder;
import org.geotools.data.DataUtilities;
import org.geotools.data.FeatureSource;
import org.geotools.data.FileDataStore;
import org.geotools.data.FileDataStoreFinder;
import org.geotools.data.Query;
import org.geotools.data.ServiceInfo;
import org.geotools.data.shapefile.ShapefileDataStore;
import org.geotools.data.simple.SimpleFeatureCollection;
import org.geotools.data.simple.SimpleFeatureIterator;
import org.geotools.data.simple.SimpleFeatureSource;
import org.geotools.feature.FeatureCollection;
import org.geotools.feature.FeatureIterator;
import org.geotools.swing.data.JFileDataStoreChooser;
import org.opengis.feature.ComplexAttribute;
import org.opengis.feature.simple.SimpleFeature;
import org.opengis.feature.simple.SimpleFeatureType;
import org.opengis.feature.type.FeatureType;
import org.opengis.filter.Filter;
public class ShpToRdf {
public static void main(String[] args) throws IOException {
ArrayList<String> names = new ArrayList<String>();
ArrayList<String> values = new ArrayList<String>();
File file = JFileDataStoreChooser.showOpenFile("shp", null);
if (file == null) {
return;
}
FileDataStore myData = FileDataStoreFinder.getDataStore(file);
SimpleFeatureSource source = myData.getFeatureSource();
SimpleFeatureType schema = source.getSchema();
Query query = new Query(schema.getTypeName());
query.setMaxFeatures(100);
Model model = ModelFactory.createDefaultModel();
String shpURI = "http://www.shp.fake/";
Resource shapeFile = model.createResource(shpURI);
FeatureCollection<SimpleFeatureType, SimpleFeature> collection = source.getFeatures(query);
try (FeatureIterator<SimpleFeature> features = collection.features()) {
while (features.hasNext()) {
SimpleFeature feature = features.next();
model.setNsPrefix("shp", shpURI);
for (org.opengis.feature.Property attribute : feature.getProperties()) {
names.add(attribute.getName().toString());
values.add(attribute.getValue().toString());
}
}
}
ArrayList<Integer> ids = new ArrayList<Integer>();
for(int i=0; i<names.size();i++) {
if (names.get(i).equals("Id")) {
ids.add(i);
}
}
Property features = model.createProperty(shpURI,"features");
for(int i = 0; i<ids.size();i++) {
Property id = model.createProperty(shpURI,names.get(ids.get(i)));
shapeFile = model.createResource(shpURI)
.addProperty(features, model.createResource()
.addProperty(id,model.createResource()
.addProperty(id, values.get(ids.get(i)))
.addProperty(features, "feature1")
.addProperty(features, "feature2")
.addProperty(features, "feature3")));
}
RDFDataMgr.write(System.out, model, Lang.RDFXML);
}
}
I am trying to create an application that converts Shape File(shp) to RDF.
The problem is that I can get two ArrayLists from the shp. The one has the names of the values (id,name,geometry etc.), and the other has the values.
To create the RDF, I have to match each Id with the matching values(ex. Id =1 has name = road 1, geometry = line etc.)
Could you help me with this?
Thank you!
I think you should be able to do this by tweaking the following bit of logic
for (org.opengis.feature.Property attribute : feature.getProperties()) {
names.add(attribute.getName().toString());
values.add(attribute.getValue().toString());
}
Instead of putting them in two lists, you can put them in a list of pairs. This way when you iterate over the list, you know the mapping between the subject and object.
It should look something similar to
List<Pair<String, Integer>> contentList = new ArrayList<Pair<String, String>>();
for (org.opengis.feature.Property attribute : feature.getProperties()) {
Pair<String, Integer> subjectObjectPairs = new Pair<String, String>(attribute.getName().toString(), attribute.getValue().toString());
contentList.add(subjectObjectPairs);
}
I'm not sure what the ids ArrayList is for, but you could move that logic into the for loop above to make sure you're only getting identifiers.

DataSource cannot be resolved - Weka

I have the following class to perform PCA on a arff file. I have added the Weka jar to my project but I am still getting an error saying DataSource cannot be resolved and I don't know what to do to resolve it. Can anyone suggest what could be wrong?
package project;
import weka.core.Instances;
import weka.core.converters.ArffLoader;
import weka.core.converters.ConverterUtils;
import weka.core.converters.ConverterUtils.DataSource;
import weka.core.converters.TextDirectoryLoader;
import weka.gui.visualize.Plot2D;
import weka.gui.visualize.PlotData2D;
import weka.gui.visualize.VisualizePanel;
import java.awt.BorderLayout;
import java.io.File;
import java.util.ArrayList;
import javax.swing.JFrame;
import org.math.plot.FrameView;
import org.math.plot.Plot2DPanel;
import org.math.plot.PlotPanel;
import org.math.plot.plots.ScatterPlot;
import weka.attributeSelection.PrincipalComponents;
import weka.attributeSelection.Ranker;
public class PCA {
public static void main(String[] args) {
try {
// Load the Data.
DataSource source = new DataSource("../data/ingredients.arff");
Instances data = source.getDataSet();
// Perform PCA.
PrincipalComponents pca = new PrincipalComponents();
pca.setVarianceCovered(1.0);
//pca.setCenterData(true);
pca.setNormalize(true);
pca.setTransformBackToOriginal(false);
pca.buildEvaluator(data);
// Show transform data into eigenvector basis.
Instances transformedData = pca.transformedData();
System.out.println(transformedData);
} catch (Exception e) {
e.printStackTrace();
}
}
}

How to run particular Test step of soapUi in java

I want to run particular testStep of my testcase of soap ui using java code. My problem is when I try to run at test step level it need argument of TestCase runner which is anonymous inner type and TestCaseRunContext which is interface. Do I have to implement both to run the same? if yes can please any sample how to do that??
here's my code
package com.testauto.soaprunner.soap.impl;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.eviware.soapui.SoapUI;
import com.eviware.soapui.StandaloneSoapUICore;
import com.eviware.soapui.impl.wsdl.WsdlProject;
import com.eviware.soapui.impl.wsdl.WsdlTestSuite;
import com.eviware.soapui.impl.wsdl.testcase.WsdlTestCase;
import com.eviware.soapui.impl.wsdl.testcase.WsdlTestCaseRunner;
import com.eviware.soapui.impl.wsdl.teststeps.WsdlTestStep;
import com.eviware.soapui.model.TestPropertyHolder;
import com.eviware.soapui.model.iface.MessageExchange;
import com.eviware.soapui.model.propertyexpansion.PropertyExpansionUtils;
import com.eviware.soapui.model.testsuite.TestCase;
import com.eviware.soapui.model.testsuite.TestCaseRunContext;
import com.eviware.soapui.model.testsuite.TestProperty;
import com.eviware.soapui.model.testsuite.TestStepResult;
import com.eviware.soapui.model.testsuite.TestSuite;
import com.eviware.soapui.support.types.StringToObjectMap;
import com.eviware.soapui.support.types.StringToStringsMap;
import com.testauto.soaprunner.data.InputData;
import com.testauto.soaprunner.data.ReportData;
public class RunTestImpl{
static Logger logger = LoggerFactory.getLogger(RunTestImpl.class);
List<ReportData> reportDatList=new ArrayList<ReportData>();
public List<ReportData> process(Map<String, String> readDataMap, InputData input, Map<List<String>, String> configurationMap, List<String> configuration, WsdlTestSuite testSuite)
{
List<ReportData> report = new ArrayList<ReportData>();
logger.info("Into the Class for running test cases");
try{
report= getTestSuite(readDataMap,input,configurationMap,configuration,testSuite);
}
catch(Exception e)
{
logger.info(e.getMessage());
}
return report;
}
private List<ReportData> getTestSuite(Map<String, String> readDataMap, InputData input, Map<List<String>, String> configurationMap, List<String> configuration, WsdlTestSuite testSuite) throws Exception {
ReportData report=new ReportData();
logger.info("Into the Class for running test cases");
String suiteName = "";
String reportStr = "";
List<String> testCaseNameList= setPropertyValues(readDataMap,input);
WsdlTestCaseRunner runner = null;
List<TestSuite> suiteList = new ArrayList<TestSuite>();
List<TestCase> caseList = new ArrayList<TestCase>();
SoapUI.setSoapUICore(new StandaloneSoapUICore(true));
System.out.println("testcase name "+ configurationMap.get(configuration));
// WsdlTestCase testCase= testSuite.getTestCaseByName(input.getApiName()+"_"+testCaseName+"_TestCase");
WsdlTestCase testCase= testSuite.getTestCaseByName("my_TESTCASE");
WsdlTestStep tesStep=testCase.getTestStepByName(configurationMap.get(testCaseNameList));
System.out.println("test case name:"+testCase.getName());
report.setTestCase(testCase.getName());
suiteList.add(testSuite);
runner= tesStep.run(?,?);
return reportDatList;
}
private List<String> setPropertyValues(Map<String, String> readDataMap, InputData input) {
String testCaseName="";
TestPropertyHolder holder = PropertyExpansionUtils.getGlobalProperties();
List<String> dataConfigurationList=new ArrayList<String>();
Iterator entries = readDataMap.entrySet().iterator();
while (entries.hasNext()) {
Entry thisEntry = (Entry) entries.next();
String key = (String) thisEntry.getKey();
String value = (String) thisEntry.getValue();
testCaseName+=key;
holder.setPropertyValue(key, holder.getPropertyValue(key));
dataConfigurationList.add(key);
}
System.out.println("testCaseName"+testCaseName);
return dataConfigurationList;
}
}
}
After trying different things I got something like this.
TestCaseRunContext context = new MockTestRunContext(new MockTestRunner(testStep.getTestCase()), testStep);
MockTestRunner runner = new MockTestRunner(testStep.getTestCase());
TestStepResult testStepResult= testStep.run(runner, context);
I don't know how it works this trick worked for me. if someone know the reason behind this please share

Remove user from Active Directory

I have an email distribution list "CTW DEV". I would like to remove the 1 user 'rakeshdw' from the ActiveDirectory using java. Please Find below code.
Its giving an exception. User is not getting removed. Please suggest the required changes. Thanks !
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.util.Iterator;
import java.util.Properties;
import java.util.HashSet;
import javax.naming.Context;
import javax.naming.NamingEnumeration;
import javax.naming.NamingException;
import javax.naming.directory.Attribute;
import javax.naming.directory.Attributes;
import javax.naming.directory.BasicAttribute;
import javax.naming.directory.DirContext;
import javax.naming.directory.InitialDirContext;
import javax.naming.directory.ModificationItem;
import javax.naming.directory.SearchControls;
import javax.naming.directory.SearchResult;
import java.util.Properties;
import javax.naming.Context;
import javax.naming.directory.BasicAttribute;
import javax.naming.directory.DirContext;
import javax.naming.directory.InitialDirContext;
import javax.naming.directory.ModificationItem;
public class DeleteFromADGroup {
private String adGroup,dn;
private DirContext ctx;
private String adminName = "intranet\\patilume";
DeleteFromADGroup(){
try{
this.adGroup = "CN=CTW_DEV";
this.dn= "OU=DistributionLists,OU=Messaging,DC=INTRANET,DC=INFOSYSINT,DC=com";
Properties pr = new Properties();
pr.setProperty(Context.INITIAL_CONTEXT_FACTORY, "com.sun.jndi.ldap.LdapCtxFactory");
pr.setProperty(Context.PROVIDER_URL, "ldap://intranet.infosysint.com");
pr.setProperty(Context.SECURITY_AUTHENTICATION,"simple");
pr.setProperty(Context.SECURITY_CREDENTIALS, "myPassword"); //its password
pr.setProperty(Context.SECURITY_PRINCIPAL, this.adminName);
pr.setProperty(Context.REFERRAL, "ignore");
this.ctx = new InitialDirContext(pr);
}
catch(Exception e){
System.out.println("in constructor..");
}
}
public static void main(String[] args) {
DeleteFromADGroup dadg=new DeleteFromADGroup();
dadg.deleteUser("CTW_DEV","rakeshdw");
}
private void deleteUser(String ADGroup, String username){
String groupName = "CN="+ADGroup+",OU=DistributionLists,OU=Messaging,DC=INTRANET,DC=INFOSYSINT,DC=com";
try{
ModificationItem mods[] = new ModificationItem[1];
mods[0]= new ModificationItem(DirContext.REMOVE_ATTRIBUTE, new BasicAttribute("member", username));
//update the group
ctx.modifyAttributes(groupName,mods);
ctx.close();
}
catch(Exception e){
System.out.println("Exception while removing user from DL");
}
}
}
Exception I get is as below:
javax.naming.OperationNotSupportedException: [LDAP: error code 53 - 0000054F: SvcErr: DSID-031A0FC0, problem 5003 (WILL_NOT_PERFORM), data 0
You need to pass the DN of the user to the username attribute in your deleteUser method. For example, it might be something like cn=rakeshw,ou=people,dc=contoso,dc=com.

MapReduce-Cassandra wordcount compilation error: ConfigHelper not found

I am trying to run WordCount MapReduce program to read and count data stored in Cassandra table (Column Family) but, when I compile my program I got the same error repeated times. Below is my source code and error I got. Can anyone help me to solve this issue? Thanks in advance.
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.cassandra.db.IColumn;
import org.apache.cassandra.hadoop.*;
import org.apache.cassandra.hadoop.ColumnFamilyInputFormat;
import org.apache.cassandra.hadoop.ConfigHelper;
import org.apache.cassandra.thrift.*;
import org.apache.cassandra.utils.ByteBufferUtil;
/**
* This sums the word count stored in the input_words_count ColumnFamily for the key "key-if-verse1".
*
* Output is written to a text file.
*/
public class WordCountCounters extends Configured implements Tool
{
private static final Logger logger = LoggerFactory.getLogger(WordCountCounters.class);
static final String COUNTER_COLUMN_FAMILY = "input_words";
private static final String OUTPUT_PATH_PREFIX = "/Users/Deepu/Documents/dse-3.2.4/dse-data/word_count_counters";
public static void main(String[] args) throws Exception
{
// Let ToolRunner handle generic command-line options
ToolRunner.run(new Configuration(), new WordCountCounters(), args);
System.exit(0);
}
public static class SumMapper extends Mapper<ByteBuffer, SortedMap<ByteBuffer, IColumn>, Text, LongWritable>
{
public void map(ByteBuffer key, SortedMap<ByteBuffer, IColumn> columns, Context context) throws IOException, InterruptedException
{
long sum = 0;
for (IColumn column : columns.values())
{
logger.debug("read " + key + ":" + column.name() + " from " + context.getInputSplit());
sum += ByteBufferUtil.toLong(column.value());
}
context.write(new Text(ByteBufferUtil.string(key)), new LongWritable(sum));
}
}
public int run(String[] args) throws Exception
{
Job job = new Job(getConf(), "wordcountcounters");
job.setJarByClass(WordCountCounters.class);
job.setMapperClass(SumMapper.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH_PREFIX));
job.setInputFormatClass(ColumnFamilyInputFormat.class);
ConfigHelper.setRpcPort(job.getConfiguration(), "9160");
ConfigHelper.setInitialAddress(job.getConfiguration(), "localhost");
ConfigHelper.setPartitioner(job.getConfiguration(), "org.apache.cassandra.dht.RandomPartitioner");
ConfigHelper.setInputColumnFamily(job.getConfiguration(), WordCount.KEYSPACE, WordCountCounters.COUNTER_COLUMN_FAMILY);
SlicePredicate predicate = new SlicePredicate().setSlice_range(
new SliceRange().
setStart(ByteBufferUtil.EMPTY_BYTE_BUFFER).
setFinish(ByteBufferUtil.EMPTY_BYTE_BUFFER).
setCount(100));
ConfigHelper.setInputSlicePredicate(job.getConfiguration(), predicate);
job.waitForCompletion(true);
return 0;
}
}
Compiation Errors are:
Because you commented out these two lines perhaps:
//import org.apache.cassandra.hadoop.ColumnFamilyInputFormat;
//import org.apache.cassandra.hadoop.ConfigHelper;

Categories

Resources