cannot find HibInputFormat class. Getting excetion classDef not found - java

hduser#akshay-Lenovo-G580:~$ hadoop jar /home/hduser/HipiDemo.jar HelloWorld sampleimages.hib sampleimages_average
Warning: $HADOOP_HOME is deprecated.
Exception in thread "main" java.lang.NoClassDefFoundError: org/hipi/imagebundle/mapreduce/HibInputFormat
at HelloWorld.run(HelloWorld.java:44)
at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:65)
at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:79)
at HelloWorld.main(HelloWorld.java:67)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at org.apache.hadoop.util.RunJar.main(RunJar.java:160)
Caused by: java.lang.ClassNotFoundException: org.hipi.imagebundle.mapreduce.HibInputFormat
at java.net.URLClassLoader$1.run(URLClassLoader.java:366)
at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
at java.security.AccessController.doPrivileged(Native Method)
at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
at java.lang.ClassLoader.loadClass(ClassLoader.java:425)
at java.lang.ClassLoader.loadClass(ClassLoader.java:358)
... 9 more
my Code:
import hipi.image.FloatImage;
import java.io.IOException;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.hipi.image.HipiImageHeader;
import org.hipi.imagebundle.mapreduce.HibInputFormat;
public class HelloWorld extends Configured implements Tool {
public static class HelloWorldMapper extends Mapper<HipiImageHeader, FloatImage, IntWritable, FloatImage> {
public void map(HipiImageHeader key, FloatImage value, Context context)
throws IOException, InterruptedException {
}
}
public static class HelloWorldReducer extends Reducer<IntWritable, FloatImage, IntWritable, Text> {
public void reduce(IntWritable key, Iterable<FloatImage> values, Context context)
throws IOException, InterruptedException {
}
}
public int run(String[] args) throws Exception {
// Check input arguments
if (args.length != 2) {
System.out.println("Usage: helloWorld <input HIB> <output directory>");
System.exit(0);
}
// Initialize and configure MapReduce job
//Job job = Job.getInstance();
Job job = new Job(getConf(), "Employee Salary");
// Set input format class which parses the input HIB and spawns map tasks
job.setInputFormatClass(HibInputFormat.class);
// Set the driver, mapper, and reducer classes which express the computation
job.setJarByClass(HelloWorld.class);
job.setMapperClass(HelloWorldMapper.class);
job.setReducerClass(HelloWorldReducer.class);
// Set the types for the key/value pairs passed to/from map and reduce layers
job.setMapOutputKeyClass(IntWritable.class);
job.setMapOutputValueClass(FloatImage.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(Text.class);
// Set the input and output paths on the HDFS
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
// Execute the MapReduce job and block until it complets
boolean success = job.waitForCompletion(true);
// Return success or failure
return success ? 0 : 1;
}
public static void main(String[] args) throws Exception {
ToolRunner.run(new HelloWorld(), args);
System.exit(0);
}
}

add the jar containing the class HibInputFormat to your classpath.
Or if you use line commands while compiling:
ex :
javac -classpath /lib/jarContainingTheClass.jar /examples/HelloWorld.java

Related

org.apache.hadoop.io.LongWritable cannot be cast to org.apache.avro.mapred.AvroKey

I am using hadoop 2.6, spring hadoop, avro 1.7.4 to process an avro input file. But got blocked with a wired error. I have set the input file format as the avrokeyinputformat, however, hadoop still can't recognize the key Here is the stack trace:
Stack trace
4-04 12:15:58 INFO org.apache.hadoop.mapred.LocalJobRunner map task executor complete.
04-04 12:15:59 WARN org.apache.hadoop.mapred.LocalJobRunner job_local1850669149_0002
java.lang.Exception: java.lang.ClassCastException: org.apache.hadoop.io.LongWritable cannot be cast to org.apache.avro.mapred.AvroKey
at org.apache.hadoop.mapred.LocalJobRunner$Job.runTasks(LocalJobRunner.java:462)
at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:522)
Caused by: java.lang.ClassCastException: org.apache.hadoop.io.LongWritable cannot be cast to org.apache.avro.mapred.AvroKey
at com.truven.bp.data.mapred.Bdual$BdualMapper.map(Bdual.java:32)
at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:145)
at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:784)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:341)
at org.apache.hadoop.mapred.LocalJobRunner$Job$MapTaskRunnable.run(LocalJobRunner.java:243)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471)
at java.util.concurrent.FutureTask.run(FutureTask.java:262)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
04-04 12:15:59 INFO org.apache.hadoop.mapreduce.Job Job job_local1850669149_0002 running in uber mode : false
Map reduce class code is
import com.truven.bp.data.converter.GenericConverter;
import com.truven.bp.data.utils.Constant;
import com.truven.dataforge.paidclaims.avro.pojo.EnrollmentMonthlyEligibilityPrograms;
import org.apache.avro.mapred.AvroKey;
import org.apache.avro.mapreduce.AvroJob;
import org.apache.avro.mapreduce.AvroKeyInputFormat;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import java.io.IOException;
public class Bdual extends Configured implements Tool {
private static final Log logger = LogFactory.getLog(Bdual.class);
private static class BdualMapper extends Mapper<AvroKey<EnrollmentMonthlyEligibilityPrograms>, NullWritable, Text, NullWritable> {
GenericConverter genericConverter = new GenericConverter();
public void setup(Context context)
throws IOException, InterruptedException {
genericConverter.initSetup(Constant.BDUAL_FIELDS_TEXT_MAPPING_FILE_NAME);
}
#Override
public void map(AvroKey<EnrollmentMonthlyEligibilityPrograms> key, NullWritable value, Context context)
throws IOException, InterruptedException {
String fields = genericConverter.convert(key.datum(), null);
context.write(new Text(fields), NullWritable.get());
}
}
private static class BdualReducer extends Reducer<Text, NullWritable, Text, NullWritable> {
GenericConverter genericConverter = new GenericConverter();
#Override
public void setup(Context context) throws IOException, InterruptedException {
genericConverter.initSetup(Constant.BDUAL_FIELDS_TEXT_MAPPING_FILE_NAME);
context.write(new Text(genericConverter.getHeader()), NullWritable.get());
}
#Override
public void reduce(Text key, Iterable<NullWritable> values, Context context)
throws IOException, InterruptedException {
context.write(key, NullWritable.get());
}
}
#Override
public int run(String[] args) throws Exception {
Configuration conf = getConf();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length != 2) {
System.err.println("Usage: bdual <in> <out>");
System.exit(2);
}
Job job = new Job(conf, "Avro Bdual Conversion");
job.setJarByClass(Bdual.class);
Path inPath = new Path(args[0]);
Path outPath = new Path(args[1]);
FileInputFormat.addInputPath(job, inPath);
FileOutputFormat.setOutputPath(job, outPath);
job.setInputFormatClass(AvroKeyInputFormat.class);
job.setMapperClass(BdualMapper.class);
AvroJob.setInputKeySchema(job, EnrollmentMonthlyEligibilityPrograms.getClassSchema());
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(NullWritable.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setReducerClass(BdualReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
job.setNumReduceTasks(1);
return (job.waitForCompletion(true) ? 0 : 1);
}
public static void main(String[] args) throws Exception {
int result = ToolRunner.run(new Bdual(), args);
System.exit(result);
}
}

hadoop exception type mismatch in wordcount program

I am new and try to run my first hadoop program. and I am facing some problem when I execute my wordcount job in hadoop.
WordCount.java
package hdp;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class WordCount extends Configured implements Tool{
public static void main(String[] args) throws Exception {
System.out.println("application starting ....");
int exitCode = ToolRunner.run(new WordCount(), args);
System.out.println(exitCode);
}
#Override
public int run(String[] args) throws Exception {
if (args.length < 2) {
System.out.println("Plz enter input and output directory properly... ");
return -1;
}
JobConf conf = new JobConf(WordCount.class);
FileInputFormat.setInputPaths(conf, new Path(args[0]));
FileOutputFormat.setOutputPath(conf, new Path(args[1]));
conf.setMapperClass(WordMapper.class);
conf.setReducerClass(WordReducer.class);
conf.setMapOutputKeyClass(Text.class);
conf.setMapOutputKeyClass(IntWritable.class);
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(IntWritable.class);
JobClient.runJob(conf);
return 0;
}
#Override
public Configuration getConf() {
return null;
}
#Override
public void setConf(Configuration arg0) {
}
}
WordMapper.java
package hdp;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
public class WordMapper extends MapReduceBase implements Mapper<LongWritable, Text, Text, IntWritable>{
#Override
public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> collect, Reporter reporter) throws IOException {
String str = value.toString();
for (String s : str.split(" ")) {
if (s.length() > 0) {
collect.collect(new Text(s), new IntWritable(1));
}
}
}
}
WordReducer
package hdp;
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
public class WordReducer extends MapReduceBase implements Reducer<Text, IntWritable, Text, IntWritable> {
#Override
public void reduce(Text key, Iterator<IntWritable> values, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException {
int count = 0;
while (values.hasNext()) {
IntWritable intWritable = values.next();
count += intWritable.get();
}
output.collect(key, new IntWritable(count));
}
}
When I run my program then I get following error message.
16/12/23 00:22:41 INFO mapreduce.Job: Task Id : attempt_1482432671993_0001_m_000001_1, Status : FAILED
Error: java.io.IOException: Type mismatch in key from map: expected org.apache.hadoop.io.IntWritable, received org.apache.hadoop.io.Text
at org.apache.hadoop.mapred.MapTask$MapOutputBuffer.collect(MapTask.java:1072)
at org.apache.hadoop.mapred.MapTask$OldOutputCollector.collect(MapTask.java:610)
at hdp.WordMapper.map(WordMapper.java:19)
at hdp.WordMapper.map(WordMapper.java:1)
at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:54)
at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:453)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:343)
at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:164)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1698)
at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:158)
16/12/23 00:22:47 INFO mapreduce.Job: Task Id : attempt_1482432671993_0001_m_000000_2, Status : FAILED
Error: java.io.IOException: Type mismatch in key from map: expected org.apache.hadoop.io.IntWritable, received org.apache.hadoop.io.Text
at org.apache.hadoop.mapred.MapTask$MapOutputBuffer.collect(MapTask.java:1072)
at org.apache.hadoop.mapred.MapTask$OldOutputCollector.collect(MapTask.java:610)
at hdp.WordMapper.map(WordMapper.java:19)
at hdp.WordMapper.map(WordMapper.java:1)
at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:54)
at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:453)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:343)
at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:164)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1698)
at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:158)
Please tell me where i was wrong and what kind of changes i require. Either in WordCount.java or WordReducer or in WordMapper.java
You accidentally set the output key class twice:
conf.setMapOutputKeyClass(IntWritable.class);
Should become
conf.setMapOutputValueClass(IntWritable.class);

Exception in thread "main" java.lang.ClassNotFoundException: org.myorg.MaxTemperatureWithCompression

I am trying to run a MapReduce job using(using new API) on Hadoop 2.7.1 using command line. I have followed the below steps. No error in compiling and creating a jar file.
javac -cp `hadoop classpath` MaxTemperatureWithCompression.java -d /Users/gangadharkadam/hadoopdata/build
jar -cvf MaxTemperatureWithCompression.jar /Users/gangadharkadam/hadoopdata/build
hadoop jar MaxTemperatureWithCompression.jar org.myorg.MaxTemperatureWithCompression user/ncdc/input /user/ncdc/output
Error Messages-
Exception in thread "main" java.lang.ClassNotFoundException: org.myorg.MaxTemperatureWithCompression
at java.net.URLClassLoader$1.run(URLClassLoader.java:366)
at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
at java.security.AccessController.doPrivileged(Native Method)
at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
at java.lang.ClassLoader.loadClass(ClassLoader.java:425)
at java.lang.ClassLoader.loadClass(ClassLoader.java:358)
at java.lang.Class.forName0(Native Method)
at java.lang.Class.forName(Class.java:274)
at org.apache.hadoop.util.RunJar.run(RunJar.java:214)
at org.apache.hadoop.util.RunJar.main(RunJar.java:136)
Java Code-
package org.myorg;
//Standard Java Classes
import java.io.IOException;
import java.util.regex.Pattern;
//extends the class Configured, and implements the Tool utility class
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.hadoop.util.GenericOptionsParser;
//send debugging messages from inside the mapper and reducer classes
import org.apache.log4j.Logger;
//Job class in order to create, configure, and run an instance of your MapReduce
import org.apache.hadoop.mapreduce.Job;
//extend the Mapper class with your own Map class and add your own processing instructions
import org.apache.hadoop.mapreduce.Mapper;
//extend it to create and customize your own Reduce class
import org.apache.hadoop.mapreduce.Reducer;
//Path class to access files in HDFS
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.FileSystem;
//pass required paths using the FileInputFormat and FileOutputFormat classes
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
//Writable objects for writing, reading,and comparing values during map and reduce processing
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.GzipCodec;
public class MaxTemperatureWithCompression extends Configured implements Tool {
private static final Logger LOG = Logger.getLogger(MaxTemperatureWithCompression.class);
//main menhod to invoke the toolrunner to create instance of MaxTemperatureWithCompression
public static void main(String[] args) throws Exception {
int res = ToolRunner.run(new MaxTemperatureWithCompression(), args);
System.exit(res);
}
//call the run method to configure the job
public int run(String[] args) throws Exception {
if (args.length != 2) {
System.err.println("Usage: MaxTemperatureWithCompression <input path> " + "<output path>");
System.exit(-1);
}
Job job = Job.getInstance(getConf(), "MaxTemperatureWithCompression");
//set the jar to use based on the class
job.setJarByClass(MaxTemperatureWithCompression.class);
//set the input and output path
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
//set the output key and value
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
//set the compressionformat
/*[*/FileOutputFormat.setCompressOutput(job, true);
FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);/*]*/
//set the mapper and reducer class
job.setMapperClass(Map.class);
job.setCombinerClass(Reduce.class);
job.setReducerClass(Reduce.class);
return job.waitForCompletion(true) ? 0 : 1;
}
//mapper
public static class Map extends Mapper<LongWritable, Text, Text, IntWritable> {
private static final int MISSING = 9999;
#Override
public void map(LongWritable key, Text value, Context context)
throws IOException,InterruptedException {
String line = value.toString();
String year = line.substring(15,19);
int airTemperature;
if (line.charAt(87) == '+') {
airTemperature = Integer.parseInt(line.substring(88, 92));
}
else {
airTemperature = Integer.parseInt(line.substring(87, 92));
}
String quality = line.substring(92,93);
if (airTemperature != MISSING && quality.matches("[01459]")) {
context.write(new Text(year), new IntWritable(airTemperature));
}
}
}
//reducer
public static class Reduce extends Reducer<Text, IntWritable, Text, IntWritable> {
#Override
public void reduce(Text key, Iterable<IntWritable> values, Context context)
throws IOException, InterruptedException {
int maxValue = Integer.MIN_VALUE;
for (IntWritable value : values) {
maxValue = Math.max(maxValue, value.get());
}
context.write(key, new IntWritable(maxValue));
}
}
}
I see few posts on the same issue but those couldn't help me to resolve this issue. Any help on resolving this is highly appreciated. Thanks in advance.

Caused by: java.lang.ClassNotFoundException: com.example.Example$ReduceTask while running maprduce job

I am trying to run the mapreduce job show below. But, its giving me ClassNotFound exception, even though this inner class is present in the jar. Can anyone give hint?
package com.example;
import java.io.IOException;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
public class Example {
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = new Job(conf);
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(Text.class);
job.setInputFormatClass(TextInputFormat.class);
job.setReducerClass(Example.ReduceTask.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.waitForCompletion(true);
}
public static class ReduceTask
extends Reducer<LongWritable, Text, Text, Text> {
public void reduce(LongWritable key, Iterable<Text> values, Context context)
throws IOException, InterruptedException {
for (Text value: values) {
String[] cols = value.toString().split(",");
context.write(new Text(cols[0]), value);
break;
}
}
}
}
java.lang.RuntimeException: java.lang.ClassNotFoundException: com.example.Example$ReduceTask
at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:996)
at org.apache.hadoop.mapreduce.JobContext.getReducerClass(JobContext.java:236)
at org.apache.hadoop.mapred.ReduceTask.runNewReducer(ReduceTask.java:556)
at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:414)
at org.apache.hadoop.mapred.Child$4.run(Child.java:270)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:396)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1127)
at org.apache.hadoop.mapred.Child.main(Child.java:264)
I am running it via command:
hadoop jar PracticeHadoop.jar com.example.Example workspace/input workspace/op

How to run wordcount program in hadoop 2.7.1 with 2.6.1 eclipse plugin?

I've installed hadoop 2.7.1 in my windows 10 environment and have placed eclipse 2.6.0 plugin jar in eclipse(luna).After running the following code I am getting errors.
Mapper Function
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Mapper.Context;
public class TokenizerMapper
extends Mapper<Object, Text, Text, IntWritable>{
IntWritable one = new IntWritable(1);
Text word = new Text();
public void map(Object key, Text value, Context context
) throws IOException, InterruptedException {
StringTokenizer itr = new StringTokenizer(value.toString());
while (itr.hasMoreTokens()) {
word.set(itr.nextToken());
context.write(word, one);
}
}
}
Reducer Function
package Exp2;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.Reducer.Context;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class IntSumReducer
extends Reducer<Text,IntWritable,Text,IntWritable> {
IntWritable result = new IntWritable();
public void reduce(Text key, Iterable<IntWritable> values,
Context context
) throws IOException, InterruptedException {
int sum = 0;
for (IntWritable val : values) {
sum += val.get();
}
result.set(sum);
context.write(key, result);
}
}
Driver function
package Exp2;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class WordCount {
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "word count");
job.setJarByClass(Exp2.WordCount.class);
// TODO: specify a mapper
job.setMapperClass(TokenizerMapper.class);
// TODO: specify a reducer
job.setReducerClass(IntSumReducer.class);
// TODO: specify output types
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
// TODO: specify input and output DIRECTORIES (not files)
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
if (!job.waitForCompletion(true))
return;
}
}
my execution console begins with
16/04/13 21:40:57 DEBUG conf.Configuration: Handling deprecation for dfs.namenode.edits.dir
and ends up with error messages like
java.lang.Exception: java.lang.NullPointerException
at org.apache.hadoop.mapred.LocalJobRunner$Job.runTasks(LocalJobRunner.java:462)
at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:522)
Caused by: java.lang.NullPointerException
at java.lang.ProcessBuilder.start(Unknown Source)
at org.apache.hadoop.util.Shell.runCommand(Shell.java:483)
at org.apache.hadoop.util.Shell.run(Shell.java:456)
at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:722)
at org.apache.hadoop.yarn.util.WindowsBasedProcessTree.isAvailable(WindowsBasedProcessTree.java:57)
at org.apache.hadoop.yarn.util.ResourceCalculatorProcessTree.getResourceCalculatorProcessTree(ResourceCalculatorProcessTree.java:233)
at org.apache.hadoop.mapred.Task.initialize(Task.java:610)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:324)
at org.apache.hadoop.mapred.LocalJobRunner$Job$MapTaskRunnable.run(LocalJobRunner.java:243)
at java.util.concurrent.Executors$RunnableAdapter.call(Unknown Source)
at java.util.concurrent.FutureTask.run(Unknown Source)
at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
at java.lang.Thread.run(Unknown Source)
Any remedies ???

Categories

Resources