Reduce method in Reducer class is not executing - java

In the below code,inside reducer class reduce method is not executing. please help me.In my reduce method i want to write output in multiple files. so i have used multipleoutputs.
public class DataValidation {
public static class Map extends Mapper<LongWritable, Text, Text, Text> {
int flag = 1;
boolean result;
private HashMap<String, FileConfig> fileConfigMaps = new HashMap<String, FileConfig>();
private HashMap<String, List<LineValidator>> mapOfValidators = new HashMap<String, List<LineValidator>>();
private HashMap<String, List<Processor>> mapOfProcessors = new HashMap<String, List<Processor>>();
protected void setup(Context context) throws IOException {
System.out.println("configure inside map class");
ConfigurationParser parser = new ConfigurationParser();
Config config = parser.parse(new Configuration());
List<FileConfig> file = config.getFiles();
for (FileConfig f : file) {
try {
fileConfigMaps.put(f.getName(), f);
System.out.println("quotes in" + f.isQuotes());
System.out.println("file from xml : " + f.getName());
ValidationBuilder builder = new ValidationBuilder();
// ProcessorBuilder constructor = new ProcessorBuilder();
List<LineValidator> validators;
validators = builder.build(f);
// List<Processor> processors = constructor.build(f);
mapOfValidators.put(f.getName(), validators);
// mapOfProcessors.put(f.getName(),processors);
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
// String filename = ((FileSplit) context.getInputSplit()).getPath()
// .getName();
FileSplit fs = (FileSplit) context.getInputSplit();
String fileName = fs.getPath().getName();
System.out.println("filename : " + fileName);
String line = value.toString();
String[] csvDataArray = null;
List<LineValidator> lvs = mapOfValidators.get(fileName);
flag = 1;
csvDataArray = line.split(",", -1);
FileConfig fc = fileConfigMaps.get(fileName);
System.out.println("filename inside fileconfig " + fc.getName());
System.out.println("quote values" + fc.isQuotes());
if (fc.isQuotes()) {
for (int i = 0; i < csvDataArray.length; i++) {
csvDataArray[i] = csvDataArray[i].replaceAll("\"", "");
}
}
for (LineValidator lv : lvs) {
if (flag == 1) {
result = lv.validate(csvDataArray, fileName);
if (result == false) {
String write = line + "," + lv.getFailureDesc();
System.out.println("write" + write);
System.out.println("key" + new Text(fileName));
// output.collect(new Text(filename), new Text(write));
context.write(new Text(fileName), new Text(write));
flag = 0;
if (lv.stopValidation(csvDataArray) == true) {
break;
}
}
}
}
}
protected void cleanup(Context context) {
System.out.println("clean up in mapper");
}
}
public static class Reduce extends Reducer<Text, Text, NullWritable, Text> {
protected void reduce(Text key, Iterator<Text> values, Context context)
throws IOException, InterruptedException {
System.out.println("inside reduce method");
while (values.hasNext()) {
System.out.println(" Nullwritable value" + NullWritable.get());
System.out.println("key inside reduce method" + key.toString());
context.write(NullWritable.get(), values.next());
// out.write(NullWritable.get(), values.next(), "/user/hadoop/"
// + context.getJobID() + "/" + key.toString() + "/part-");
}
}
}
public static void main(String[] args) throws Exception {
System.out.println("hello");
Configuration configuration = getConf();
Job job = Job.getInstance(configuration);
job.setJarByClass(DataValidation.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(Text.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.waitForCompletion(true);
}
private static Configuration getConf() {
return new Configuration();
}
}

You have not properly over-ridden reduce method. Use this:
public void reduce(Key key, Iterable values,
Context context) throws IOException, InterruptedException

Related

why is the first output line in map reduce null in java

I don't understand why the first output of my map reduce job is 0 and null
The output is : url ; number of visits
and here is the mapper class :
public class WordCountMapper extends
Mapper<LongWritable, Text, Text, IntWritable>
{
public void map(LongWritable cle, Text valeur, Context sortie)
throws IOException
{
String url="";
int nbVisites=0;
Pattern httplogPattern = Pattern.compile("([^\\s]+) - - \\[(.+)\\] \"([^\\s]+) (/[^\\s]*) HTTP/[^\\s]+\" [^\\s]+ ([0-9]+)");
String ligne = valeur.toString();
if (ligne.length()>0) {
Matcher matcher = httplogPattern.matcher(ligne);
if (matcher.matches()) {
url = matcher.group(1);
nbVisites = Integer.parseInt(matcher.group(5));
}
}
Text urlText = new Text(url);
IntWritable value = new IntWritable(nbVisites);
try
{
sortie.write(urlText, value);
System.out.println(urlText + " ; " + value);
}
catch (InterruptedException e)
{
e.printStackTrace();
}
}
and reducer :
public class WordCountReducer extends
Reducer<Text, IntWritable, Text, IntWritable>
{
public void reduce(Text key, Iterable<IntWritable> values, Context sortie) throws IOException, InterruptedException
{
Iterator<IntWritable> it = values.iterator();
int nb=0;
while (it.hasNext()) {
nb = nb + it.next().get();
}
try {
sortie.write(key, new IntWritable(nb));
System.out.println(key.toString() + ";" + nb);
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
Each line of the input file looks like this :
199.72.81.55 - - [01/Jul/1995:00:00:01 -0400] "GET /history/apollo/ HTTP/1.0" 200 6245
and here is the output :
0
04-dynamic-c.rotterdam.luna.net 4
06-dynamic-c.rotterdam.luna.net 1
10.salc.wsu.edu 3
11.ts2.mnet.medstroms.se 1
128.100.183.222 4
128.102.149.149 4
As you can see first line is a couple of null values
Thank you
You get an empty key (not null) because your default mapper Text is an empty string. Then the reducer counts that as 0...
It works fine if you check that your lines actually match before writing the output
Here's a refactored version of your code
public class WebLogDriver extends Configured implements Tool {
public static final String APP_NAME = WebLogDriver.class.getSimpleName();
public static void main(String[] args) throws Exception {
final int status = ToolRunner.run(new Configuration(), new WebLogDriver(), args);
System.exit(status);
}
#Override
public int run(String[] args) throws Exception {
Configuration conf = getConf();
Job job = Job.getInstance(conf, APP_NAME);
job.setJarByClass(WebLogDriver.class);
// outputs for mapper and reducer
job.setOutputKeyClass(Text.class);
// setup mapper
job.setMapperClass(WebLogDriver.WebLogMapper.class);
job.setMapOutputValueClass(IntWritable.class);
// setup reducer
job.setReducerClass(WebLogDriver.WebLogReducer.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
final Path outputDir = new Path(args[1]);
FileOutputFormat.setOutputPath(job, outputDir);
return job.waitForCompletion(true) ? 0 : 1;
}
static class WebLogMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
static final Pattern HTTP_LOG_PATTERN = Pattern.compile("(\\S+) - - \\[(.+)] \"(\\S+) (/\\S*) HTTP/\\S+\" \\S+ (\\d+)");
final Text keyOut = new Text();
final IntWritable valueOut = new IntWritable();
#Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context) throws IOException, InterruptedException {
String line = value.toString();
if (line.isEmpty()) return;
Matcher matcher = HTTP_LOG_PATTERN.matcher(line);
if (matcher.matches()) {
keyOut.set(matcher.group(1));
try {
valueOut.set(Integer.parseInt(matcher.group(5)));
context.write(keyOut, valueOut);
} catch (NumberFormatException e) {
e.printStackTrace();
}
}
}
}
static class WebLogReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
static final IntWritable valueOut = new IntWritable();
#Override
protected void reduce(Text key, Iterable<IntWritable> values, Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException {
int nb = StreamSupport.stream(values.spliterator(), true)
.mapToInt(IntWritable::get)
.sum();
valueOut.set(nb);
context.write(key, valueOut);
}
}
}

In MapReduce program, reducer is not getting called by Driver

According to map reduce programming model I wrote this program where Driver code is as follows
MY DRIVER CLASS
public class MRDriver extends Configured implements Tool
{
#Override
public int run(String[] strings) throws Exception {
if(strings.length != 2)
{
System.err.println("usage : <inputlocation> <inputlocation> <outputlocation>");
System.exit(0);
}
Job job = new Job(getConf(), "multiple files");
job.setJarByClass(MRDriver.class);
job.setMapperClass(MRMapper.class);
job.setReducerClass(MRReducer.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
FileInputFormat.addInputPath(job, new Path(strings[0]));
FileOutputFormat.setOutputPath(job, new Path(strings[1]));
return job.waitForCompletion(true) ? 0 : 1;
//throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates.
}
public static void main(String[] args) throws Exception
{
Configuration conf = new Configuration();
System.exit(ToolRunner.run(conf, new MRDriver(), args));
}
}
MY MAPPER CLASS
class MRMapper extends Mapper<LongWritable, Text, Text, Text>
{
#Override
public void map(LongWritable key, Text value, Context context)
{
try
{
StringTokenizer iterator;
String idsimval = null;
iterator = new StringTokenizer(value.toString(), "\t");
String id = iterator.nextToken();
String sentival = iterator.nextToken();
if(iterator.hasMoreTokens())
idsimval = iterator.nextToken();
context.write(new Text("unique"), new Text(id + "_" + sentival + "_" + idsimval));
} catch (IOException | InterruptedException e)
{
System.out.println(e);
}
}
MY REDUCER CLASS
class MRReducer extends Reducer<Text, Text, Text, Text> {
String[] records;
HashMap<Long, String> sentiMap = new HashMap<>();
HashMap<Long, String> cosiMap = new HashMap<>();
private String leftIdStr;
private ArrayList<String> rightIDList, rightSimValList, matchingSimValList, matchingIDList;
private double leftVal;
private double rightVal;
private double currDiff;
private double prevDiff;
private int finalIndex;
Context newContext;
private int i;
public void reducer(Text key, Iterable<Text> value, Context context) throws IOException, InterruptedException {
for (Text string : value) {
records = string.toString().split("_");
sentiMap.put(Long.parseLong(records[0]), records[1]);
if (records[2] != null) {
cosiMap.put(Long.parseLong(records[0]), records[2]);
}
if(++i == 2588)
{
newContext = context;
newfun();
}
context.write(new Text("hello"), new Text("hii"));
}
context.write(new Text("hello"), new Text("hii"));
}
void newfun() throws IOException, InterruptedException
{
for (HashMap.Entry<Long, String> firstEntry : cosiMap.entrySet()) {
try {
leftIdStr = firstEntry.getKey().toString();
rightIDList = new ArrayList<>();
rightSimValList = new ArrayList<>();
matchingSimValList = new ArrayList<>();
matchingIDList = new ArrayList<>();
for (String strTmp : firstEntry.getValue().split(" ")) {
rightIDList.add(strTmp.substring(0, 18));
rightSimValList.add(strTmp.substring(19));
}
String tmp = sentiMap.get(Long.parseLong(leftIdStr));
if ("NULL".equals(tmp)) {
leftVal = Double.parseDouble("0");
} else {
leftVal = Double.parseDouble(tmp);
}
tmp = sentiMap.get(Long.parseLong(rightIDList.get(0)));
if ("NULL".equals(tmp)) {
rightVal = Double.parseDouble("0");
} else {
rightVal = Double.parseDouble(tmp);
}
prevDiff = Math.abs(leftVal - rightVal);
int oldIndex = 0;
for (String s : rightIDList) {
try {
oldIndex++;
tmp = sentiMap.get(Long.parseLong(s));
if ("NULL".equals(tmp)) {
rightVal = Double.parseDouble("0");
} else {
rightVal = Double.parseDouble(tmp);
}
currDiff = Math.abs(leftVal - rightVal);
if (prevDiff > currDiff) {
prevDiff = currDiff;
}
} catch (Exception e) {
}
}
oldIndex = 0;
for (String s : rightIDList) {
tmp = sentiMap.get(Long.parseLong(s));
if ("NULL".equals(tmp)) {
rightVal = Double.parseDouble("0");
} else {
rightVal = Double.parseDouble(tmp);
}
currDiff = Math.abs(leftVal - rightVal);
if (Objects.equals(prevDiff, currDiff)) {
matchingSimValList.add(rightSimValList.get(oldIndex));
matchingIDList.add(rightIDList.get(oldIndex));
}
oldIndex++;
}
finalIndex = rightSimValList.indexOf(Collections.max(matchingSimValList));
newContext.write(new Text(leftIdStr), new Text(" " + rightIDList.get(finalIndex) + ":" + rightSimValList.get(finalIndex)));
} catch (NumberFormatException nfe) {
}
}
}
}
What is the problem and does it belong to map reduce program or hadoop system configuration? Whenever I run this program, it only writes mapper ouput into hdfs.
Inside your Reducer class you must override the reduce method. You are declaring a reducer method, which is not correct.
Try modifying your function inside the Reducer class:
#Override
public void reduce(Text key, Iterable<Text> value, Context context) throws IOException, InterruptedException {

Hadoop MapReduce reducer does not start

The map phase runs and then just quits without bothering with the reducer. The job alternately prints "Hello from mapper." and "Writing CellWithTotalAmount" and that's it. The output directory it creates is empty.
I've checked at least a dozen of other "reducer won't start" questions and have not found an answer. I've checked that the output of map is the same as input into reduce, that reduce uses Iterable, that correct output classes have been set, etc.
Job config
public class HoursJob {
public static void main(String[] args) throws Exception {
if (args.length != 2) {
System.err.println("Usage: HoursJob <input path> <output path>");
System.exit(-1);
}
Job job = Job.getInstance();
job.setJarByClass(HoursJob.class);
job.setJobName("Hours job");
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setMapperClass(HoursMapper.class);
job.setReducerClass(HoursReducer.class);
job.setMapOutputKeyClass(IntWritable.class);
job.setMapOutputValueClass(CellWithTotalAmount.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
int ret = job.waitForCompletion(true) ? 0 : 1;
System.exit(ret);
}
}
Mapper
public class HoursMapper
extends Mapper<LongWritable, Text, IntWritable, CellWithTotalAmount> {
static double BEGIN_LONG = -74.913585;
static double BEGIN_LAT = 41.474937;
static double GRID_LENGTH = 0.011972;
static double GRID_HEIGHT = 0.008983112;
#Override
public void map(LongWritable key, Text value, Mapper.Context context)
throws IOException, InterruptedException {
System.out.println("Hello from mapper.");
String recordString = value.toString();
try {
DEBSFullRecord record = new DEBSFullRecord(recordString);
Date pickupDate = record.getPickup();
Calendar calendar = GregorianCalendar.getInstance();
calendar.setTime(pickupDate);
int pickupHour = calendar.get(Calendar.HOUR_OF_DAY);
int cellX = (int)
((record.getPickupLongitude() - BEGIN_LONG) / GRID_LENGTH) + 1;
int cellY = (int)
((BEGIN_LAT - record.getPickupLatitude()) / GRID_HEIGHT) + 1;
CellWithTotalAmount hourInfo =
new CellWithTotalAmount(cellX, cellY, record.getTotal());
context.write(new IntWritable(pickupHour), hourInfo);
} catch (Exception ex) {
System.out.println(
"Cannot parse: " + recordString + "due to the " + ex);
}
}
}
Reducer
public class HoursReducer
extends Reducer<IntWritable, CellWithTotalAmount, Text, NullWritable> {
#Override
public void reduce(IntWritable key, Iterable<CellWithTotalAmount> values,
Context context) throws IOException, InterruptedException {
System.out.println("Hello from reducer.");
int[][] cellRideCounters = getCellRideCounters(values);
CellWithRideCount cellWithMostRides =
getCellWithMostRides(cellRideCounters);
int[][] cellTotals = getCellTotals(values);
CellWithTotalAmount cellWithGreatestTotal =
getCellWithGreatestTotal(cellTotals);
String output = key + " "
+ cellWithMostRides.toString() + " "
+ cellWithGreatestTotal.toString();
context.write(new Text(output), NullWritable.get());
}
//omitted for brevity
}
Custom writable class
public class CellWithTotalAmount implements Writable {
public int cellX;
public int cellY;
public double totalAmount;
public CellWithTotalAmount(int cellX, int cellY, double totalAmount) {
this.cellX = cellX;
this.cellY = cellY;
this.totalAmount = totalAmount;
}
#Override
public void write(DataOutput out) throws IOException {
System.out.println("Writing CellWithTotalAmount");
out.writeInt(cellX);
out.writeInt(cellY);
out.writeDouble(totalAmount);
}
#Override
public void readFields(DataInput in) throws IOException {
System.out.println("Reading CellWithTotalAmount");
cellX = in.readInt();
cellY = in.readInt();
totalAmount = in.readDouble();
}
#Override
public String toString() {
return cellX + " " + cellY + " " + totalAmount;
}
}
I think there is a lot of exception in reduce function so Framework can not complete the job properly
public class HoursReducer
extends Reducer<IntWritable, CellWithTotalAmount, Text, NullWritable> {
#Override
public void reduce(IntWritable key, Iterable<CellWithTotalAmount> values,
Context context) throws IOException, InterruptedException {
System.out.println("Hello from reducer.");
try{
int[][] cellRideCounters = getCellRideCounters(values);
if(cellRideCounter[0].length>0){ // control it before executing it. more explanation is above
CellWithRideCount cellWithMostRides =
getCellWithMostRides(cellRideCounters);
int[][] cellTotals = getCellTotals(values);
CellWithTotalAmount cellWithGreatestTotal =
getCellWithGreatestTotal(cellTotals);
String output = key + " "
+ cellWithMostRides.toString() + " "
+ cellWithGreatestTotal.toString();
context.write(new Text(output), NullWritable.get());
}
}catch(Exception e)
e.printstack();
return;
{
}
}
add try-catch to get exceptions in reduce function
. Return from function in catch
. Also add an if statement before calling getCellWithMostRiders(..) I think the issue is in here. Fill the if statement as you want I made a guess and fill it according to my guess change it however you want if it is not proper for you

Hadoop mapper is never called, custom input format might be the issue

So I am doing a little test program just to get the hang of hadoops inputformat classes. I had a word search already built which took in lines as values and searched for the word line by line. I wanted to see if I could get hadoop to take in values word by word, hadoop doesn't seem to like that and keeps giving me results using the default mapper. My mappers initialize function is never even called.
I do know my record reader is called and that it is doing more or less what it is supposed to and I'm pretty sure the output of the record reader is what my mapper is searching for so why does hadoop decide not to call it?
Here is the relevant code
Input Format Class
public class WordReader extends FileInputFormat<Text, Text> {
#Override
public RecordReader<Text, Text> createRecordReader(InputSplit split,
TaskAttemptContext context) {
return new MyWholeFileReader();
}
}
Record Reader
public class MyWholeFileReader extends RecordReader<Text, Text> {
private long start;
private LineReader in;
private Text key = null;
private Text value = null;
private ArrayList<String> outputvalues;
public void initialize(InputSplit genericSplit,
TaskAttemptContext context) throws IOException {
outputvalues = new ArrayList<String>();
FileSplit split = (FileSplit) genericSplit;
Configuration job = context.getConfiguration();
start = split.getStart();
final Path file = split.getPath();
// open the file and seek to the start of the split
FileSystem fs = file.getFileSystem(job);
FSDataInputStream fileIn = fs.open(split.getPath());
in = new LineReader(fileIn, job);
if (key == null) {
key = new Text();
}
key.set(split.getPath().getName());
if (value == null) {
value = new Text();
}
}
public boolean nextKeyValue() throws IOException {
if (outputvalues.size() == 0) {
Text buffer = new Text();
int i = in.readLine(buffer);
String str = buffer.toString();
for (String vals : str.split(" ")) {
outputvalues.add(vals);
}
if (i == 0 || outputvalues.size() == 0) {
key = null;
value = null;
return false;
}
}
value.set(outputvalues.remove(0));
System.out.println(value.toString());
return true;
}
#Override
public Text getCurrentKey() {
return key;
}
#Override
public Text getCurrentValue() {
return value;
}
/**
*
* Get the progress within the split
*/
public float getProgress() {
return 0.0f;
}
public synchronized void close() throws IOException {
if (in != null) {
in.close();
}
}
}
Mapper
public class WordSearchMapper extends Mapper<Text, Text, OutputCollector<Text,IntWritable>, Reporter> {
static String keyword;
BloomFilter<String> b;
public void configure(JobContext jobConf) {
keyword = jobConf.getConfiguration().get("keyword");
System.out.println("keyword>> " + keyword);
b = new BloomFilter<String>(.01,10000);
b.add(keyword);
System.out.println(b.getExpectedBitsPerElement());
}
public void map(Text key, Text value, OutputCollector<Text,IntWritable> output,
Reporter reporter) throws IOException {
int wordPos;
System.out.println("value.toString()>> " + value.toString());
System.out.println(((FileSplit) reporter.getInputSplit()).getPath()
.getName());
String[] tokens = value.toString().split("[\\p{P} \\t\\n\\r]");
for (String st :tokens) {
if (b.contains(st)) {
if (value.toString().contains(keyword)) {
System.out.println("Found one");
wordPos = ((Text) value).find(keyword);
output.collect(value, new IntWritable(wordPos));
}
}
}
}
}
Driver:
public class WordSearch {
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = new Job(conf,"WordSearch");
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setMapperClass(WordSearchMapper.class);
job.setInputFormatClass( WordReader.class);
job.setOutputFormatClass(TextOutputFormat.class);
conf.set("keyword", "the");
FileInputFormat.setInputPaths(job, new Path("search.txt"));
FileOutputFormat.setOutputPath(job, new Path("outputs"+System.currentTimeMillis()));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
And I figured it out... this is why hadoop needs to stop supporting multiple versions of itself or why I should stop jamming multiple tutorials together. Turns out my mapper needs to be set up like this for the way my mapper and record reader are set up to interact.
'public class WordSearchMapper extends Mapper { static String keyword;`
I only realized this after looking at my imports and seeing that reporter was from package org.apache.hadoop.mapred as opposed to org.apache.hadoop.mapreduce –

Getting 0 output records in hadoop MapReduce

I have the following code in hadoop where the mapper and reducer are as follows:
public static class Map2 extends Mapper<LongWritable, Text, NullWritable, Text>
{
TreeMap<Text, Text> top10 = new TreeMap<Text, Text>();
HashMap<String, String> userInfo = new HashMap<String, String>();
public void setup(Context context) throws IOException, InterruptedException
{
try
{
URI[] uris = DistributedCache.getCacheFiles(context.getConfiguration());
FileSystem fs = FileSystem.get(context.getConfiguration());
if (uris == null || uris.length == 0)
{
throw new IOException("Error reading file from distributed cache. No URIs found.");
}
String path = "./users.dat";
fs.copyToLocalFile(new Path(uris[0]), new Path(path));
BufferedReader br = new BufferedReader(new FileReader(path));
String line = null;
while((line = br.readLine()) != null)
{
String split[] = line.split("\\::");
String age = split[2];
String gender = split[1];
userInfo.put(split[0], gender + "\t" + age);
}
br.close();
}
catch(Exception e)
{
}
}
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException
{
try
{
String line = value.toString();
int sum = Integer.parseInt(line.split("\\t")[1]);
String userID = line.split("\\t")[0];
String newKey = sum + " " + userID;
if(userInfo.containsKey(userID))
{
String record = userInfo.get(userID);
String val = userID + "\t" + record + "\t" + sum;
top10.put(new Text(newKey), new Text(val));
if (top10.size() > 10)
{
top10.remove(top10.firstKey());
}
}
}
catch(Exception e)
{
}
}
protected void cleanup(Context context) throws IOException, InterruptedException
{
try
{
for (Text s1 : top10.descendingMap().values())
{
context.write(NullWritable.get(), s1);
}
}
catch(Exception e)
{
}
}
}
public static class Reduce2 extends Reducer<NullWritable, Text, NullWritable, Text>
{
private TreeMap<Text, Text> top10 = new TreeMap<Text, Text>();
public void reduce(NullWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException
{
try
{
String line = values.toString();
String sum = line.split("\\t")[3];
String userID = line.split("\\t")[0];
String gender = line.split("\\t")[1];
String age = line.split("\\t")[2];
String newKey = sum + " " + userID;
String val = userID + "\t" + gender + "\t" + age + "\t" + sum;
top10.put(new Text(newKey), new Text(val));
if(top10.size() > 10)
{
top10.remove(top10.firstKey());
}
}
catch(Exception e)
{
}
}
protected void cleanup(Context context) throws IOException, InterruptedException
{
try
{
for (Text s1 : top10.descendingMap().values())
{
context.write(NullWritable.get(), s1);
}
}
catch(Exception e)
{
}
}
}
The driver method is as follows:
Configuration conf2 = new Configuration();
DistributedCache.addCacheFile(new Path("/Spring2014_HW-1/input_HW-1/users.dat").toUri(), conf2);
Job job2 = new Job(conf2, "Phase2");
job2.setOutputKeyClass(NullWritable.class);
job2.setOutputValueClass(Text.class);
job2.setJarByClass(MapSideJoin.class);
job2.setMapperClass(Map2.class);
job2.setReducerClass(Reduce2.class);
job2.setInputFormatClass(TextInputFormat.class);
job2.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job2, new Path(args[1]));
FileOutputFormat.setOutputPath(job2, new Path(args[2]));
//job2.setNumReduceTasks(1);
job2.waitForCompletion(true);
I get the message as map output records = 10 and reduce output records = 0, even though I've emit output from the reducer? Where does this output from the reducer disappear?
Thanks.

Categories

Resources