hduser#akshay-Lenovo-G580:~$ hadoop jar /home/hduser/HipiDemo.jar HelloWorld sampleimages.hib sampleimages_average
Warning: $HADOOP_HOME is deprecated.
Exception in thread "main" java.lang.NoClassDefFoundError: org/hipi/imagebundle/mapreduce/HibInputFormat
at HelloWorld.run(HelloWorld.java:44)
at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:65)
at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:79)
at HelloWorld.main(HelloWorld.java:67)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at org.apache.hadoop.util.RunJar.main(RunJar.java:160)
Caused by: java.lang.ClassNotFoundException: org.hipi.imagebundle.mapreduce.HibInputFormat
at java.net.URLClassLoader$1.run(URLClassLoader.java:366)
at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
at java.security.AccessController.doPrivileged(Native Method)
at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
at java.lang.ClassLoader.loadClass(ClassLoader.java:425)
at java.lang.ClassLoader.loadClass(ClassLoader.java:358)
... 9 more
my Code:
import hipi.image.FloatImage;
import java.io.IOException;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.hipi.image.HipiImageHeader;
import org.hipi.imagebundle.mapreduce.HibInputFormat;
public class HelloWorld extends Configured implements Tool {
public static class HelloWorldMapper extends Mapper<HipiImageHeader, FloatImage, IntWritable, FloatImage> {
public void map(HipiImageHeader key, FloatImage value, Context context)
throws IOException, InterruptedException {
}
}
public static class HelloWorldReducer extends Reducer<IntWritable, FloatImage, IntWritable, Text> {
public void reduce(IntWritable key, Iterable<FloatImage> values, Context context)
throws IOException, InterruptedException {
}
}
public int run(String[] args) throws Exception {
// Check input arguments
if (args.length != 2) {
System.out.println("Usage: helloWorld <input HIB> <output directory>");
System.exit(0);
}
// Initialize and configure MapReduce job
//Job job = Job.getInstance();
Job job = new Job(getConf(), "Employee Salary");
// Set input format class which parses the input HIB and spawns map tasks
job.setInputFormatClass(HibInputFormat.class);
// Set the driver, mapper, and reducer classes which express the computation
job.setJarByClass(HelloWorld.class);
job.setMapperClass(HelloWorldMapper.class);
job.setReducerClass(HelloWorldReducer.class);
// Set the types for the key/value pairs passed to/from map and reduce layers
job.setMapOutputKeyClass(IntWritable.class);
job.setMapOutputValueClass(FloatImage.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(Text.class);
// Set the input and output paths on the HDFS
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
// Execute the MapReduce job and block until it complets
boolean success = job.waitForCompletion(true);
// Return success or failure
return success ? 0 : 1;
}
public static void main(String[] args) throws Exception {
ToolRunner.run(new HelloWorld(), args);
System.exit(0);
}
}
add the jar containing the class HibInputFormat to your classpath.
Or if you use line commands while compiling:
ex :
javac -classpath /lib/jarContainingTheClass.jar /examples/HelloWorld.java
Related
I am using hadoop 2.6, spring hadoop, avro 1.7.4 to process an avro input file. But got blocked with a wired error. I have set the input file format as the avrokeyinputformat, however, hadoop still can't recognize the key Here is the stack trace:
Stack trace
4-04 12:15:58 INFO org.apache.hadoop.mapred.LocalJobRunner map task executor complete.
04-04 12:15:59 WARN org.apache.hadoop.mapred.LocalJobRunner job_local1850669149_0002
java.lang.Exception: java.lang.ClassCastException: org.apache.hadoop.io.LongWritable cannot be cast to org.apache.avro.mapred.AvroKey
at org.apache.hadoop.mapred.LocalJobRunner$Job.runTasks(LocalJobRunner.java:462)
at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:522)
Caused by: java.lang.ClassCastException: org.apache.hadoop.io.LongWritable cannot be cast to org.apache.avro.mapred.AvroKey
at com.truven.bp.data.mapred.Bdual$BdualMapper.map(Bdual.java:32)
at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:145)
at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:784)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:341)
at org.apache.hadoop.mapred.LocalJobRunner$Job$MapTaskRunnable.run(LocalJobRunner.java:243)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471)
at java.util.concurrent.FutureTask.run(FutureTask.java:262)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
04-04 12:15:59 INFO org.apache.hadoop.mapreduce.Job Job job_local1850669149_0002 running in uber mode : false
Map reduce class code is
import com.truven.bp.data.converter.GenericConverter;
import com.truven.bp.data.utils.Constant;
import com.truven.dataforge.paidclaims.avro.pojo.EnrollmentMonthlyEligibilityPrograms;
import org.apache.avro.mapred.AvroKey;
import org.apache.avro.mapreduce.AvroJob;
import org.apache.avro.mapreduce.AvroKeyInputFormat;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import java.io.IOException;
public class Bdual extends Configured implements Tool {
private static final Log logger = LogFactory.getLog(Bdual.class);
private static class BdualMapper extends Mapper<AvroKey<EnrollmentMonthlyEligibilityPrograms>, NullWritable, Text, NullWritable> {
GenericConverter genericConverter = new GenericConverter();
public void setup(Context context)
throws IOException, InterruptedException {
genericConverter.initSetup(Constant.BDUAL_FIELDS_TEXT_MAPPING_FILE_NAME);
}
#Override
public void map(AvroKey<EnrollmentMonthlyEligibilityPrograms> key, NullWritable value, Context context)
throws IOException, InterruptedException {
String fields = genericConverter.convert(key.datum(), null);
context.write(new Text(fields), NullWritable.get());
}
}
private static class BdualReducer extends Reducer<Text, NullWritable, Text, NullWritable> {
GenericConverter genericConverter = new GenericConverter();
#Override
public void setup(Context context) throws IOException, InterruptedException {
genericConverter.initSetup(Constant.BDUAL_FIELDS_TEXT_MAPPING_FILE_NAME);
context.write(new Text(genericConverter.getHeader()), NullWritable.get());
}
#Override
public void reduce(Text key, Iterable<NullWritable> values, Context context)
throws IOException, InterruptedException {
context.write(key, NullWritable.get());
}
}
#Override
public int run(String[] args) throws Exception {
Configuration conf = getConf();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length != 2) {
System.err.println("Usage: bdual <in> <out>");
System.exit(2);
}
Job job = new Job(conf, "Avro Bdual Conversion");
job.setJarByClass(Bdual.class);
Path inPath = new Path(args[0]);
Path outPath = new Path(args[1]);
FileInputFormat.addInputPath(job, inPath);
FileOutputFormat.setOutputPath(job, outPath);
job.setInputFormatClass(AvroKeyInputFormat.class);
job.setMapperClass(BdualMapper.class);
AvroJob.setInputKeySchema(job, EnrollmentMonthlyEligibilityPrograms.getClassSchema());
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(NullWritable.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setReducerClass(BdualReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
job.setNumReduceTasks(1);
return (job.waitForCompletion(true) ? 0 : 1);
}
public static void main(String[] args) throws Exception {
int result = ToolRunner.run(new Bdual(), args);
System.exit(result);
}
}
I am new and try to run my first hadoop program. and I am facing some problem when I execute my wordcount job in hadoop.
WordCount.java
package hdp;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class WordCount extends Configured implements Tool{
public static void main(String[] args) throws Exception {
System.out.println("application starting ....");
int exitCode = ToolRunner.run(new WordCount(), args);
System.out.println(exitCode);
}
#Override
public int run(String[] args) throws Exception {
if (args.length < 2) {
System.out.println("Plz enter input and output directory properly... ");
return -1;
}
JobConf conf = new JobConf(WordCount.class);
FileInputFormat.setInputPaths(conf, new Path(args[0]));
FileOutputFormat.setOutputPath(conf, new Path(args[1]));
conf.setMapperClass(WordMapper.class);
conf.setReducerClass(WordReducer.class);
conf.setMapOutputKeyClass(Text.class);
conf.setMapOutputKeyClass(IntWritable.class);
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(IntWritable.class);
JobClient.runJob(conf);
return 0;
}
#Override
public Configuration getConf() {
return null;
}
#Override
public void setConf(Configuration arg0) {
}
}
WordMapper.java
package hdp;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
public class WordMapper extends MapReduceBase implements Mapper<LongWritable, Text, Text, IntWritable>{
#Override
public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> collect, Reporter reporter) throws IOException {
String str = value.toString();
for (String s : str.split(" ")) {
if (s.length() > 0) {
collect.collect(new Text(s), new IntWritable(1));
}
}
}
}
WordReducer
package hdp;
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
public class WordReducer extends MapReduceBase implements Reducer<Text, IntWritable, Text, IntWritable> {
#Override
public void reduce(Text key, Iterator<IntWritable> values, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException {
int count = 0;
while (values.hasNext()) {
IntWritable intWritable = values.next();
count += intWritable.get();
}
output.collect(key, new IntWritable(count));
}
}
When I run my program then I get following error message.
16/12/23 00:22:41 INFO mapreduce.Job: Task Id : attempt_1482432671993_0001_m_000001_1, Status : FAILED
Error: java.io.IOException: Type mismatch in key from map: expected org.apache.hadoop.io.IntWritable, received org.apache.hadoop.io.Text
at org.apache.hadoop.mapred.MapTask$MapOutputBuffer.collect(MapTask.java:1072)
at org.apache.hadoop.mapred.MapTask$OldOutputCollector.collect(MapTask.java:610)
at hdp.WordMapper.map(WordMapper.java:19)
at hdp.WordMapper.map(WordMapper.java:1)
at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:54)
at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:453)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:343)
at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:164)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1698)
at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:158)
16/12/23 00:22:47 INFO mapreduce.Job: Task Id : attempt_1482432671993_0001_m_000000_2, Status : FAILED
Error: java.io.IOException: Type mismatch in key from map: expected org.apache.hadoop.io.IntWritable, received org.apache.hadoop.io.Text
at org.apache.hadoop.mapred.MapTask$MapOutputBuffer.collect(MapTask.java:1072)
at org.apache.hadoop.mapred.MapTask$OldOutputCollector.collect(MapTask.java:610)
at hdp.WordMapper.map(WordMapper.java:19)
at hdp.WordMapper.map(WordMapper.java:1)
at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:54)
at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:453)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:343)
at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:164)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1698)
at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:158)
Please tell me where i was wrong and what kind of changes i require. Either in WordCount.java or WordReducer or in WordMapper.java
You accidentally set the output key class twice:
conf.setMapOutputKeyClass(IntWritable.class);
Should become
conf.setMapOutputValueClass(IntWritable.class);
I am trying to run a MapReduce job using(using new API) on Hadoop 2.7.1 using command line. I have followed the below steps. No error in compiling and creating a jar file.
javac -cp `hadoop classpath` MaxTemperatureWithCompression.java -d /Users/gangadharkadam/hadoopdata/build
jar -cvf MaxTemperatureWithCompression.jar /Users/gangadharkadam/hadoopdata/build
hadoop jar MaxTemperatureWithCompression.jar org.myorg.MaxTemperatureWithCompression user/ncdc/input /user/ncdc/output
Error Messages-
Exception in thread "main" java.lang.ClassNotFoundException: org.myorg.MaxTemperatureWithCompression
at java.net.URLClassLoader$1.run(URLClassLoader.java:366)
at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
at java.security.AccessController.doPrivileged(Native Method)
at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
at java.lang.ClassLoader.loadClass(ClassLoader.java:425)
at java.lang.ClassLoader.loadClass(ClassLoader.java:358)
at java.lang.Class.forName0(Native Method)
at java.lang.Class.forName(Class.java:274)
at org.apache.hadoop.util.RunJar.run(RunJar.java:214)
at org.apache.hadoop.util.RunJar.main(RunJar.java:136)
Java Code-
package org.myorg;
//Standard Java Classes
import java.io.IOException;
import java.util.regex.Pattern;
//extends the class Configured, and implements the Tool utility class
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.hadoop.util.GenericOptionsParser;
//send debugging messages from inside the mapper and reducer classes
import org.apache.log4j.Logger;
//Job class in order to create, configure, and run an instance of your MapReduce
import org.apache.hadoop.mapreduce.Job;
//extend the Mapper class with your own Map class and add your own processing instructions
import org.apache.hadoop.mapreduce.Mapper;
//extend it to create and customize your own Reduce class
import org.apache.hadoop.mapreduce.Reducer;
//Path class to access files in HDFS
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.FileSystem;
//pass required paths using the FileInputFormat and FileOutputFormat classes
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
//Writable objects for writing, reading,and comparing values during map and reduce processing
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.GzipCodec;
public class MaxTemperatureWithCompression extends Configured implements Tool {
private static final Logger LOG = Logger.getLogger(MaxTemperatureWithCompression.class);
//main menhod to invoke the toolrunner to create instance of MaxTemperatureWithCompression
public static void main(String[] args) throws Exception {
int res = ToolRunner.run(new MaxTemperatureWithCompression(), args);
System.exit(res);
}
//call the run method to configure the job
public int run(String[] args) throws Exception {
if (args.length != 2) {
System.err.println("Usage: MaxTemperatureWithCompression <input path> " + "<output path>");
System.exit(-1);
}
Job job = Job.getInstance(getConf(), "MaxTemperatureWithCompression");
//set the jar to use based on the class
job.setJarByClass(MaxTemperatureWithCompression.class);
//set the input and output path
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
//set the output key and value
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
//set the compressionformat
/*[*/FileOutputFormat.setCompressOutput(job, true);
FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);/*]*/
//set the mapper and reducer class
job.setMapperClass(Map.class);
job.setCombinerClass(Reduce.class);
job.setReducerClass(Reduce.class);
return job.waitForCompletion(true) ? 0 : 1;
}
//mapper
public static class Map extends Mapper<LongWritable, Text, Text, IntWritable> {
private static final int MISSING = 9999;
#Override
public void map(LongWritable key, Text value, Context context)
throws IOException,InterruptedException {
String line = value.toString();
String year = line.substring(15,19);
int airTemperature;
if (line.charAt(87) == '+') {
airTemperature = Integer.parseInt(line.substring(88, 92));
}
else {
airTemperature = Integer.parseInt(line.substring(87, 92));
}
String quality = line.substring(92,93);
if (airTemperature != MISSING && quality.matches("[01459]")) {
context.write(new Text(year), new IntWritable(airTemperature));
}
}
}
//reducer
public static class Reduce extends Reducer<Text, IntWritable, Text, IntWritable> {
#Override
public void reduce(Text key, Iterable<IntWritable> values, Context context)
throws IOException, InterruptedException {
int maxValue = Integer.MIN_VALUE;
for (IntWritable value : values) {
maxValue = Math.max(maxValue, value.get());
}
context.write(key, new IntWritable(maxValue));
}
}
}
I see few posts on the same issue but those couldn't help me to resolve this issue. Any help on resolving this is highly appreciated. Thanks in advance.
I am trying to run the mapreduce job show below. But, its giving me ClassNotFound exception, even though this inner class is present in the jar. Can anyone give hint?
package com.example;
import java.io.IOException;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
public class Example {
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = new Job(conf);
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(Text.class);
job.setInputFormatClass(TextInputFormat.class);
job.setReducerClass(Example.ReduceTask.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.waitForCompletion(true);
}
public static class ReduceTask
extends Reducer<LongWritable, Text, Text, Text> {
public void reduce(LongWritable key, Iterable<Text> values, Context context)
throws IOException, InterruptedException {
for (Text value: values) {
String[] cols = value.toString().split(",");
context.write(new Text(cols[0]), value);
break;
}
}
}
}
java.lang.RuntimeException: java.lang.ClassNotFoundException: com.example.Example$ReduceTask
at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:996)
at org.apache.hadoop.mapreduce.JobContext.getReducerClass(JobContext.java:236)
at org.apache.hadoop.mapred.ReduceTask.runNewReducer(ReduceTask.java:556)
at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:414)
at org.apache.hadoop.mapred.Child$4.run(Child.java:270)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:396)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1127)
at org.apache.hadoop.mapred.Child.main(Child.java:264)
I am running it via command:
hadoop jar PracticeHadoop.jar com.example.Example workspace/input workspace/op
I've installed hadoop 2.7.1 in my windows 10 environment and have placed eclipse 2.6.0 plugin jar in eclipse(luna).After running the following code I am getting errors.
Mapper Function
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Mapper.Context;
public class TokenizerMapper
extends Mapper<Object, Text, Text, IntWritable>{
IntWritable one = new IntWritable(1);
Text word = new Text();
public void map(Object key, Text value, Context context
) throws IOException, InterruptedException {
StringTokenizer itr = new StringTokenizer(value.toString());
while (itr.hasMoreTokens()) {
word.set(itr.nextToken());
context.write(word, one);
}
}
}
Reducer Function
package Exp2;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.Reducer.Context;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class IntSumReducer
extends Reducer<Text,IntWritable,Text,IntWritable> {
IntWritable result = new IntWritable();
public void reduce(Text key, Iterable<IntWritable> values,
Context context
) throws IOException, InterruptedException {
int sum = 0;
for (IntWritable val : values) {
sum += val.get();
}
result.set(sum);
context.write(key, result);
}
}
Driver function
package Exp2;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class WordCount {
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "word count");
job.setJarByClass(Exp2.WordCount.class);
// TODO: specify a mapper
job.setMapperClass(TokenizerMapper.class);
// TODO: specify a reducer
job.setReducerClass(IntSumReducer.class);
// TODO: specify output types
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
// TODO: specify input and output DIRECTORIES (not files)
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
if (!job.waitForCompletion(true))
return;
}
}
my execution console begins with
16/04/13 21:40:57 DEBUG conf.Configuration: Handling deprecation for dfs.namenode.edits.dir
and ends up with error messages like
java.lang.Exception: java.lang.NullPointerException
at org.apache.hadoop.mapred.LocalJobRunner$Job.runTasks(LocalJobRunner.java:462)
at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:522)
Caused by: java.lang.NullPointerException
at java.lang.ProcessBuilder.start(Unknown Source)
at org.apache.hadoop.util.Shell.runCommand(Shell.java:483)
at org.apache.hadoop.util.Shell.run(Shell.java:456)
at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:722)
at org.apache.hadoop.yarn.util.WindowsBasedProcessTree.isAvailable(WindowsBasedProcessTree.java:57)
at org.apache.hadoop.yarn.util.ResourceCalculatorProcessTree.getResourceCalculatorProcessTree(ResourceCalculatorProcessTree.java:233)
at org.apache.hadoop.mapred.Task.initialize(Task.java:610)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:324)
at org.apache.hadoop.mapred.LocalJobRunner$Job$MapTaskRunnable.run(LocalJobRunner.java:243)
at java.util.concurrent.Executors$RunnableAdapter.call(Unknown Source)
at java.util.concurrent.FutureTask.run(Unknown Source)
at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
at java.lang.Thread.run(Unknown Source)
Any remedies ???