Exception in thread "main" java.lang.NoSuchMethodException - java

This is my program for word count in Mapreduce and I have created jar file successfully but when I am executing on Hadoop I am facing the following error.
package com.niit;
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
public class WordCount {
public static class Map extends Mapper<LongWritable,Text,Text,IntWritable> {
public void map(LongWritable key, Text value,Context context) throws IOException,InterruptedException{
String line = value.toString();
StringTokenizer tokenizer = new StringTokenizer(line);
while (tokenizer.hasMoreTokens()) {
value.set(tokenizer.nextToken());
context.write(value, new IntWritable(1));
}
}
}
public static class Reduce extends Reducer<Text,IntWritable,Text,IntWritable> {
public void reduce(Text key, Iterable<IntWritable> values,Context context) throws IOException,InterruptedException {
int sum=0;
for(IntWritable x: values) {
sum+=x.get();
}
context.write(key, new IntWritable(sum));
}
}
public static void main(String[] args) throws Exception {
Configuration conf= new Configuration();
Job job = new Job(conf,"My Word Count Program");
job.setJarByClass(WordCount.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
Path outputPath = new Path(args[1]);
//Configuring the input/output path from the filesystem into the job
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
//deleting the output path automatically from hdfs so that we don't have to delete it explicitly
outputPath.getFileSystem(conf).delete(outputPath);
//exiting the job only if the flag value becomes false
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
and this is the error message I'm getting:
Command: yarn jar WordCount-1.0.jar com.niit.WordCount /usr/lib/hadoop-2.2.0/workspace/input/simple.txt /usr/lib/hadoop-2.2.0/workspace/output
Exception in thread "main" java.lang.NoSuchMethodException: com.niit.WordCount.main([Ljava.lang.String;)
at java.lang.Class.getMethod(Class.java:1665)
at org.apache.hadoop.util.RunJar.main(RunJar.java:206)

Related

Hadoop mapreduce with dictionary

I am trying to add another function to the code provided by hadoop apache https://hadoop.apache.org/docs/stable/hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduceTutorial.html#Partitioner. It will compare with the input file and only count the words that are in the dictionary file. I have read up but there are not many examples for me to learn from.
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class WordCount {
public static class TokenizerMapper
extends Mapper<Object, Text, Text, IntWritable>{
private final static IntWritable one = new IntWritable(1);
private Text word = new Text();
public void map(Object key, Text value, Context context
) throws IOException, InterruptedException {
StringTokenizer itr = new StringTokenizer(value.toString());
while (itr.hasMoreTokens()) {
word.set(itr.nextToken());
context.write(word, one);
}
}
}
public static class IntSumReducer
extends Reducer<Text,IntWritable,Text,IntWritable> {
private IntWritable result = new IntWritable();
public void reduce(Text key, Iterable<IntWritable> values,
Context context
) throws IOException, InterruptedException {
int sum = 0;
for (IntWritable val : values) {
sum += val.get();
}
result.set(sum);
context.write(key, result);
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "word count");
job.setJarByClass(WordCount.class);
job.setMapperClass(TokenizerMapper.class);
job.setCombinerClass(IntSumReducer.class);
job.setReducerClass(IntSumReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}

How can I compile Java source code?

I try to build a project that find the maximum of Average temperature of each month. Here is my code:
File Map.java
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class Map extends Mapper<LongWritable, Text, Text, FloatWritable> {
private FloatWritable average = new FloatWritable();
private float maxFloat, minFloat, averageFloat;
private Text word = new Text();
#Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
StringTokenizer line = new StringTokenizer(value.toString(), ",");
if (line.countTokens() > 0) {
word.set(line.nextToken().substring(2,8));
if (line.hasMoreTokens()) {
maxFloat = Float.parseFloat(line.nextToken());
}
if (line.hasMoreTokens()) {
minFloat = Float.parseFloat(line.nextToken());
}
averageFloat = (minFloat + maxFloat) / 2;
average.set(averageFloat);
context.write(word, average);
}
}
}
File Reduce.java
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
import java.util.Iterator;
public class Reduce extends Reducer<Text, FloatWritable, Text, FloatWritable> {
private float max_temp = Float.MIN_VALUE;
private float temp = 0;
#Override
protected void reduce(Text key, Iterable<FloatWritable> values, Context context)
throws IOException, InterruptedException {
Iterator<FloatWritable> itr = values.iterator();
while (itr.hasNext()) {
temp = itr.next().get();
if (temp > max_temp) {
max_temp = temp;
}
}
context.write(key, new FloatWritable(max_temp));
}
}
File MaxTempDriver.java
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class MaxTempDriver {
public static void main(String[] args) throws Exception {
// Create a new job
Job job = new Job();
// Set job name to locate it in the distributed environment
job.setJarByClass(MaxTempDriver.class);
job.setJobName("Max Temperature");
// Set input and output Path, note that we use the default input format
// which is TextInputFormat (each record is a line of input)
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
// Set Mapper and Reducer class
job.setMapperClass(Map.class);
job.setCombinerClass(Reduce.class);
job.setReducerClass(Reduce.class);
// Set Output key and value
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(FloatWritable.class);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
And now I don't know how to compile these 3 files. I have read from the some tutorials from internet but it seems that they only had 1 file with map.class and reduce.class at the same file. How to compile these file?

Error:(63, 40) java: incompatible types: org.apache.hadoop.mapreduce.Job cannot be converted to org.apache.hadoop.mapred.JobConf

I was just running a simple hadooop programm in intellj IDE. but there is an error mentioned when I try to compile
$Error:(63, 40) java: incompatible types:
org.apache.hadoop.mapreduce.Job cannot be converted to
org.apache.hadoop.mapred.JobConf
Here is my code for this small program:
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import java.io.IOException;
import java.util.StringTokenizer;
public class WordCount {
public static class TokenizerMapper
extends Mapper<Object, Text, Text, IntWritable> {
private final static IntWritable one = new IntWritable(1);
private Text word = new Text();
public void map(Object key, Text value, Context context
) throws IOException, InterruptedException {
StringTokenizer itr = new StringTokenizer(value.toString());
while (itr.hasMoreTokens()) {
word.set(itr.nextToken());
context.write(word, one);
}
}
}
public static class IntSumReducer
extends Reducer<Text, IntWritable, Text, IntWritable> {
private IntWritable result = new IntWritable();
public void reduce(Text key, Iterable<IntWritable> values,
Context context
) throws IOException, InterruptedException {
int sum = 0;
for (IntWritable val : values) {
sum += val.get();
}
result.set(sum);
context.write(key, result);
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "word count");
job.setJarByClass(WordCount.class);
job.setMapperClass(TokenizerMapper.class);
job.setCombinerClass(IntSumReducer.class);
job.setReducerClass(IntSumReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
Root cause:
you have used old API' FileOutputFormat(mapred) in your job which takes JobConf object as first parameter not Job, but FileInputFormat method you used from new API(maprecude) which takes Job object as first parameter.
(Job is also from new API, JobConf is from old API)
Solution:
Change this line in your code:
import org.apache.hadoop.mapred.FileOutputFormat;
to
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

ClassNotFound Exception while running MapReduce program

I am writing a mapreduce program for matrix addition. Since it requires 2 input files, i am using MultipleInputs. I have these following classes
MatAddMapper1.java
package mapred;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class MatAddMapper1 extends Mapper<LongWritable, Text, Text, IntWritable>
{
//private static final int MISSING = 9999;
#Override
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException
{
String line = value.toString();
String[] content = line.split (" ");
String key1 = content[0] + " " + content[1];
int val = Integer.parseInt(content[2]);
// Key is (i,j)
context.write(new Text(key1), new IntWritable(val));
}
}
MatAddMapper2.java is similar.
MatAddReducer.java
package mapred;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class MatAddReducer
extends Reducer<Text, IntWritable, Text, IntWritable>
{
#Override
public void reduce(Text key, Iterable<IntWritable> values, Context context)
throws IOException, InterruptedException
{
int val = 0;
for (IntWritable value : values)
{
val = val + value.get();
}
context.write(key, new IntWritable(val));
}
}
MatAddApp.java (Main class)
package mapred;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.util.*;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.*;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class MatAddApp extends Configured implements Tool
{
public int run(String[] args) throws Exception
{
Configuration conf = new Configuration();
#SuppressWarnings("deprecation")
Job job = new Job(conf, "Matrix Addition");
job.setJarByClass(MatAddApp.class);
MultipleInputs.addInputPath(job,new Path(args[0]),TextInputFormat.class,MatAddMapper1.class);
MultipleInputs.addInputPath(job,new Path(args[1]),TextInputFormat.class,MatAddMapper2.class);
FileOutputFormat.setOutputPath(job, new Path(args[2]));
job.setReducerClass(MatAddReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
return (job.waitForCompletion(true) ? 0 : 1);
}
public static void main(String[] args) throws Exception
{
int ecode = ToolRunner.run(new MatAddApp(), args);
System.exit(ecode);
}
}
I am using eclipse and created a jar file MatAddition.jar. M.txt and N.txt are input matrices. When I tried to run the program in my hadoop cluster, I got the following error
Exception in thread "main" java.lang.ClassNotFoundException: MatAddApp
at java.net.URLClassLoader$1.run(URLClassLoader.java:366)
at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
at java.security.AccessController.doPrivileged(Native Method)
at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
at java.lang.ClassLoader.loadClass(ClassLoader.java:425)
at java.lang.ClassLoader.loadClass(ClassLoader.java:358)
at java.lang.Class.forName0(Native Method)
at java.lang.Class.forName(Class.java:278)
at org.apache.hadoop.util.RunJar.run(RunJar.java:214)
at org.apache.hadoop.util.RunJar.main(RunJar.java:136)
The issue is because of the classname.The driver classname should be fully qualified when setting in configuration as follows:
job.setJarByClass(mapred.MatAddApp.class);
Input.txt
A,0|0,1.0
A,0|1,2.0
A,0|2,3.0
A,0|3,4.0
A,1|0,5.0
A,1|1,6.0
A,1|2,7.0
A,1|3,8.0
B,0|0,1.0
B,0|1,2.0
B,0|2,3.0
B,0|3,4.0
B,1|0,5.0
B,1|1,6.0
B,1|2,7.0
B,1|3,8.0
Here, the first column represents the name of the matrix, second column represents the index and the third represents the value.
MatrixAdd.java
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.Text;
import java.util.ArrayList;
import java.util.Iterator;
import java.io.*;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class MatrixAdd {
public static class MatMapper extends Mapper<Object, Text, Text, DoubleWritable>{
private Text index = new Text();
private final static DoubleWritable num = new DoubleWritable();
public void map(Object key, Text value, Context context) throws IOException, InterruptedException{
String record = value.toString();
String[] parts = record.split(",");
index.set(parts[1]);
num.set(Double.parseDouble(parts[2]));
context.write(index, num);
}
}
public static class MatReducer extends Reducer<Text,DoubleWritable,Text,DoubleWritable> {
private DoubleWritable result = new DoubleWritable();
public void reduce(Text key, Iterable<DoubleWritable> values, Context context) throws IOException, InterruptedException {
double sumValue = 0;
for(DoubleWritable val: values) {
sumValue += val.get();
}
result.set(sumValue);
context.write(key, result);
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "max temp");
job.setJarByClass(MatrixAdd.class);
job.setMapperClass(MatMapper.class);
job.setCombinerClass(MatReducer.class);
job.setReducerClass(MatReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(DoubleWritable.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
Output:
0|0 2.0
0|1 4.0
0|2 6.0
0|3 8.0
1|0 10.0
1|1 12.0
1|2 14.0
1|3 16.0

./bin/hadoop jar appears :in the thread when the "main" Java. Lang. Classnotfoundexception: org.apache.hadoop.examples.WordCount2

I put the hadoop source wordcount.Java copied to my own created directory, compile passed and generate a JAR and passed to./bin/hadoop. Then I modified wordcount.Java and saved as a wordcount2.java in my own directory and compile passed,generate a JAR file but when run with ./bin/hadoop jar. It throws me below error.
in the thread when the "main" Java. Lang. Classnotfoundexception: org.apache.hadoop.examples.WordCount2,
This is how i ran the hadoop job
./usr/local/hadoop/bin/hadoop jar ./usr/local/hadoop/bin/hadoop/playground/wordcount2.jar org.apache.hadoop.examples.wordcount2 ./usr/local/hadoop/bin/hadoop/input ./usr/local/hadoop/bin/hadoop/output
This is my codes:
package org.apache.hadoop.examples;
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class WordCount2 {
public static class TokenizerMapper
extends Mapper<Object, Text, Text, IntWritable>{
private final static IntWritable one = new IntWritable(1);
private Text word = new Text();
public void map(Object key, Text value, Context context
) throws IOException, InterruptedException {
StringTokenizer itr = new StringTokenizer(value.toString()," \t\n\r\f,.:;?![]'");
while (itr.hasMoreTokens()) {
word.set(itr.nextToken().toLowerCase());
context.write(word, one);
}
}
}
public static class IntSumReducer
extends Reducer<Text,IntWritable,Text,IntWritable> {
private IntWritable result = new IntWritable();
public void reduce(Text key, Iterable<IntWritable> values,
Context context
) throws IOException, InterruptedException {
int sum = 0;
for (IntWritable val : values) {
sum += val.get();
}
result.set(sum);
if (sum > 4) context.write(key, result);
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length < 2) {
System.err.println("Usage: wordcount <in> [<in>...] <out>");
System.exit(2);
}
Job job = Job.getInstance(conf, "word count");
job.setJarByClass(WordCount2.class);
job.setMapperClass(TokenizerMapper.class);
job.setCombinerClass(IntSumReducer.class);
job.setReducerClass(IntSumReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
for (int i = 0; i < otherArgs.length - 1; ++i) {
FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
}
FileOutputFormat.setOutputPath(job,
new Path(otherArgs[otherArgs.length - 1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}

Categories

Resources