Reduce-Side Join ArrayIndexOutOfBounds Exception using Java - java

trying to run a Reduce Side join on 3 datasets. Unfortunalyy, i keep getting an ArrayIndex exception. I have tried to handle it with a try & Catch, but to no avail. Can someone please recommend a solution?
package Joins;
import java.io.IOException;
import java.util.*;
import java.util.Map.Entry;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.Reducer;
public class JoinReducer extends Reducer<Text, Text, Text, Text>
{
Map<String, String> divStkJoin = new HashMap<String, String>();
Map<String, String> divStkMetaJoin = new HashMap<String, String>();
Map<String, String> stockData = new HashMap<String, String>();
Map<String, String> metaData = new HashMap<String, String>();
Map<String, String> divData = new HashMap<String, String>();
Text k = new Text();
Text v = new Text();
public void setup(Context context)
{
metaData.clear();
divData.clear();
stockData.clear();
divStkJoin.clear();
divStkMetaJoin.clear();
}
public void reduce(Text keys, Iterable<Text> values, Context context)
{
Iterator it = values.iterator();
while(it.hasNext()){
String [] keyLine = keys.toString().split(",");
String valueLine = values.toString();
try {
if(keyLine[4].equals("_s"))
{
String keyLineStock = keyLine[0] + "," + keyLine[1] +"," + keyLine[2] + "," + keyLine[3];
stockData.put(keyLineStock, valueLine);
}
else if(keyLine[4].equals("_d"))
{
String keyLineDiv = keyLine[0] + "," + keyLine[1] +"," + keyLine[2] + "," + keyLine[3];
divData.put(keyLineDiv, valueLine);
}
else if (keyLine[1].equals("_m"))
{
String keyLineMeta = keyLine[0];
metaData.put(keyLineMeta, valueLine);
}
else
return;
} catch (ArrayIndexOutOfBoundsException e){return;}
}
//JOINS
for(Entry<String, String> entryStock: stockData.entrySet())
for(Entry<String, String> entryDiv: divData.entrySet())
{
if(entryStock.getKey().equals(entryDiv.getKey()))
{
divStkJoin.put(entryStock.getKey(), entryStock.getValue()+ ","+ entryDiv.getValue());
}
}
for(Entry<String, String> entrydivStkJoin: divStkJoin.entrySet())
{
String [] entrydivStkJoinKeyArr = entrydivStkJoin.getKey().toString().split(",");
for(Entry<String, String> meta: metaData.entrySet())
{
String [] metaArr = meta.getKey().split(",");
if(metaArr[0].equals(entrydivStkJoinKeyArr[1]))
{
divStkMetaJoin.put(entrydivStkJoin.toString(), meta.getValue());
}
}
}
}
public void cleanup(Context context) throws IOException, InterruptedException
{
String keyJ;
String valJ;
for(Map.Entry<String, String> entry : divStkMetaJoin.entrySet())
{
keyJ=entry.getKey();
valJ=entry.getValue();
Text k = new Text(keyJ);
Text v = new Text(valJ);
context.write(k, v);
}
}
}

I think the error comes from this line:
if(keyLine[4].equals("_s")),
My solucion would be ckecking if keyLine is null or if keyLine < 4:
if(ss == null || ss.length()<4){
return;
}

Related

Group and Aggregate List of Map<String, Object>

I have a List<Map<String, Object>> input like below:
[{
CURRENCY = USD,
STATUS = NEW,
PUBLISH_REGION = DEL,
SOURCE = ALADDIN,
RECON_STATUS = null,
JOB_ID_COUNT = 783
}, {
CURRENCY = USD,
STATUS = IN_PROGRESS,
PUBLISH_REGION = DEL,
SOURCE = ALADDIN,
RECON_STATUS = null,
JOB_ID_COUNT = 462
}, {
CURRENCY = USD,
STATUS = NEW,
PUBLISH_REGION = DEL,
SOURCE = GROUP,
RECON_STATUS = null,
JOB_ID_COUNT = 4
}]
I am trying to create another List<Map<String, Object>> by grouping on CURRENCY, PUBLISH_REGION, SOURCE and RECON_STATUS columns. And add all unique STATUS values as pivot to the output map and use JOB_ID_COUNT to summarize/aggregate the count.
List<String> groups = new ArrayList<>(asList("SOURCE", "RECON_STATUS", "PUBLISH_REGION", "CURRENCY"));
List<Map<String, Object>> = input.stream()
.collect(groupingBy(row -> row.get(groups.get(0)), mapping(map -> map.get(groups.get(0)), toList())));
I am expecting below response:
Output:
[{
CURRENCY = USD,
PUBLISH_REGION = DEL,
SOURCE = ALADDIN,
RECON_STATUS = null,
NEW = 783,
IN_PROGRESS = 462
}, {
CURRENCY = USD,
PUBLISH_REGION = DEL,
SOURCE = GROUP,
RECON_STATUS = null,
NEW = 4,
IN_PROGRESS = 0
}]
I am getting compile time error when trying to group by multiple map fields. Single field groupingBy is working fine. Any help is greatly appriciated.
Without Using Custom Class
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class MultipleFieldSorting2 {
private static Map<String, Object> map, map1, map2;
private static List<Map<String, Object>> lst = new ArrayList<>();
static {
map = new HashMap<>();
map.put("CURRENCY", "USD");
map.put("STATUS", "NEW");
map.put("PUBLISH_REGION", "DEL");
map.put("SOURCE", "ALADDIN");
map.put("RECON_STATUS", null);
map.put("JOB_ID_COUNT", "783");
map1 = new HashMap<>();
map1.put("CURRENCY", "USD");
map1.put("STATUS", "IN_PROGRESS");
map1.put("PUBLISH_REGION", "DEL");
map1.put("SOURCE", "ALADDIN");
map1.put("RECON_STATUS", null);
map1.put("JOB_ID_COUNT", "462");
map2 = new HashMap<>();
map2.put("CURRENCY", "USD");
map2.put("STATUS", "NEW");
map2.put("PUBLISH_REGION", "DEL");
map2.put("SOURCE", "GROUP");
map2.put("RECON_STATUS", null);
map2.put("JOB_ID_COUNT", "4");
lst.add(map);
lst.add(map1);
lst.add(map2);
}
public static Map<String, Object> mapper(Map<String, Object> e){
String key = e.get("CURRENCY") + "-" + e.get("PUBLISH_REGION") + "-" + e.get("SOURCE") + "-" + e.get("RECON_STATUS");
Map<String, Object> groupedValue = res.get(key);
if(groupedValue!=null){
groupedValue.put((String) e.get("STATUS"), groupedValue.get("STATUS")!=null ? groupedValue.get("STATUS")+","+e.get("JOB_ID_COUNT") : e.get("JOB_ID_COUNT"));
if(groupedValue.get("NEW")==null){
groupedValue.put("NEW", 0);
}
if(groupedValue.get("IN_PROGRESS")==null){
groupedValue.put("IN_PROGRESS", 0);
}
}else{
groupedValue = new HashMap<>();
res.put(key, groupedValue);
groupedValue.put("CURRENCY", e.get("CURRENCY"));
groupedValue.put("PUBLISH_REGION", e.get("PUBLISH_REGION"));
groupedValue.put("SOURCE", e.get("SOURCE"));
groupedValue.put("RECON_STATUS", e.get("RECON_STATUS"));
groupedValue.put((String) e.get("STATUS"), e.get("JOB_ID_COUNT"));
}
return groupedValue;
}
static Map<String, Map<String, Object>> res = new HashMap<>();
public static void main(String[] args) {
List<Map<String, Object>> finalResult = new ArrayList<>();
lst.stream()
.map(MultipleFieldSorting2::mapper)
.forEach(result -> {
if(!finalResult.contains(result))
finalResult.add(result);
});
System.out.println(finalResult);
}
}
Tried this solution and it is working
Stream the source List
Map each value of map in the list to Class MapWrapper(a pojo where each key is a field)
GroupBy using the groupByKey defined in MapWrapper(uses CURRENCY, PUBLISH_REGION, SOURCE and RECON_STATUS columns)
3.a The result is a Map<String, List<MapWrapper>>
4.Stream through the entry set
map - and get the value alone from (Map<String, List<MapWrapper>>)
Map - convert from List<MapWrapper> to Map<String, Object> using MapWrapper::map
Collect to a list
In Short the solution is
List<Map<String, Object>> value = lst.stream()
.map(map -> new MapWrapper(map))
.collect(groupingBy(MapWrapper::groupByKey))
.entrySet()
.stream()
.map(e -> e.getValue())
.map(MapWrapper::map).collect(toList());
Working Code
public class MultipleFieldSorting {
private static Map<String, Object> map, map1, map2;
private static List<Map<String, Object>> lst = new ArrayList<>();
static {
map = new HashMap<>();
map.put("CURRENCY", "USD");
map.put("STATUS", "NEW");
map.put("PUBLISH_REGION", "DEL");
map.put("SOURCE", "ALADDIN");
map.put("RECON_STATUS", null);
map.put("JOB_ID_COUNT", "783");
map1 = new HashMap<>();
map1.put("CURRENCY", "USD");
map1.put("STATUS", "IN_PROGRESS");
map1.put("PUBLISH_REGION", "DEL");
map1.put("SOURCE", "ALADDIN");
map1.put("RECON_STATUS", null);
map1.put("JOB_ID_COUNT", "462");
map2 = new HashMap<>();
map2.put("CURRENCY", "USD");
map2.put("STATUS", "NEW");
map2.put("PUBLISH_REGION", "DEL");
map2.put("SOURCE", "GROUP");
map2.put("RECON_STATUS", null);
map2.put("JOB_ID_COUNT", "4");
lst.add(map);
lst.add(map1);
lst.add(map2);
}
public static void main(String[] args) {
List<Map<String, Object>> value = lst.stream()
.map(map -> new MapWrapper(map))
.collect(groupingBy(MapWrapper::groupByKey))
.entrySet()
.stream()
.map(e -> e.getValue())
.map(MapWrapper::map).collect(toList());
System.out.println(value);
}
}
class MapWrapper {
private String currency;
private String status;
private String publish;
private String source;
private String recon_status;
private String job_id;
public MapWrapper(Map<String, Object> map) {
this.currency = (String) map.get("CURRENCY");
this.status = (String) map.get("STATUS");
this.publish = (String) map.get("PUBLISH_REGION");
this.source = (String) map.get("SOURCE");
this.recon_status = (String) map.get("RECON_STATUS");
this.job_id = (String) map.get("JOB_ID_COUNT");
}
String groupByKey() {
return new StringBuilder().append(this.getCurrency()).append("-").append(this.publish).append("-")
.append(this.source).append("-").append(this.recon_status).toString();
}
public static Map<String, Object> map(List<MapWrapper> lst){
Map<String, Object> res = new HashMap<>();
res.put("CURRENCY",lst.get(0).getCurrency());
res.put("PUBLISH_REGION",lst.get(0).getPublish());
res.put("SOURCE",lst.get(0).getSource());
res.put("RECON_STATUS",lst.get(0).getRecon_status());
for(MapWrapper m : lst){
res.put(m.getStatus(), m.getJob_id());
}
if(res.get("NEW")==null){
res.put("NEW", 0);
}
if(res.get("IN_PROGRESS")==null){
res.put("IN_PROGRESS", 0);
}
return res;
}
String getCurrency() {
return currency;
}
void setCurrency(String currency) {
this.currency = currency;
}
String getStatus() {
return status;
}
void setStatus(String status) {
this.status = status;
}
String getPublish() {
return publish;
}
void setPublish(String publish) {
this.publish = publish;
}
String getSource() {
return source;
}
void setSource(String source) {
this.source = source;
}
String getJob_id() {
return job_id;
}
void setJob_id(String job_id) {
this.job_id = job_id;
}
String getRecon_status() {
return recon_status;
}
void setRecon_status(String recon_status) {
this.recon_status = recon_status;
}
}

Restructuring JSON file in JAVA

I am having the following sample from a JSON file:
[
{
"0":
{
"File":"file1.java",
"Class":"com.ETransitionActionType",
"Method":"values",
"Annotation":"Not Found"
}
},
{
"1":
{
"File":"file2.java",
"Class":"com.ETransitionParams",
"Method":"values",
"Annotation":"Not Found"
}
},
{
"2":
{
"File":"file3.java",
"Class":"com.phloc.commons.id.IHasID",
"Method":"getID",
"Annotation":"Not Found"
}
},
{
"4":
{
"File":"file3.java",
"Class":"com.ExecuteTransitionActionHandler",
"Method":"createBadRequestResponse",
"Annotation":"Not Found"
}
},
{
"5":
{
"File":"file3.java",
"Class":"com.ExecuteTransitionActionHandler",
"Method":"extractParametersFromAction",
"Annotation":"Not Found"
}
}]
How can I restructure this file using java so that it looks like:
[{
"file1.java": {
"com.ETransitionActionType": {
"values": {
"Annotation": "Not Found"
}
}
}
},
{
"file2.java": {
"com.ETransitionParams": {
"values": {
"Annotation": "Not Found"
}
}
}
},
{
"file3.java": {
"com.phloc.commons.id.IHasID": {
"getID": {
"Annotation": "Not Found"
}
},
"com.ExecuteTransitionActionHandler": {
"getID": {
"Annotation": "Not Found"
},
"extractParametersFromAction": {
"Annotation": "Not Found"
}
}
}
}
]
i.e. Going through the JSON file, searching it, and wherever the "File" attribute has the same value("file3.java" for example), we list all the relevant classes and methods inside and the same applies for the "Class" attribute, if it has the same name, we list all the methods inside it(So it's like comparing and sorting the values for the "File" and "Class" attributes).
I started with JSON simple library and wrote like the code below, but don't know how to go further!
Object object = (JSONArray)parser.parse(new FileReader("rawOutput.json"));
JSONArray jsonArray = (JSONArray) object;
for(int i = 0; i < jsonArray.size(); i++) {
System.out.println(jsonArray.get(i));
JSONObject jsonObject = (JSONObject)jsonArray.get(i);
String c = jsonObject.get("" + i + "").toString();
}
Any ideas? Your help is really appreciated!!!
I wrote a code to do what do you need but first you have to add this library to your project if you don't have already org.json.zip library, because I didn't have a library for parsing Json texts so I used this library for formatting the Json data, and I'm sorry if you don't understand the code completely because your request isn't so easy as yourself know and I created three functions to get the result and although I wrote some comments to understand easily, this is the code:-
Edit
...
import org.json.*;
...
...
public static void main(String[] args) throws JSONException {
System.out.println(getFormattedJson("json text"));
}
private static String getFormattedJson(String text) throws JSONException{
JSONArray result = new JSONArray();
JSONArray jsonArray = null;
//get the json array
jsonArray = new JSONArray(text);
//loop through items in the array and insert them formatted to the result
for (int i = 0; i < jsonArray.length(); i++) {
//get object inside the number
JSONObject object = getJsonChild(jsonArray.getJSONObject(i));
//get these attributes
String file = object.getString("File");
String clas = object.getString("Class");
String meth = object.getString("Method");
String anno = object.getString("Annotation");
//create a custom type of the object's attributes
Map<String, String> map = new HashMap<>();
map.put("Annotation", anno);
Map<String, Object> map1 = new HashMap<>();
map1.put(meth, map);
Map<String, Object> map2 = new HashMap<>();
map2.put(clas, map1);
Map<String, Object> map3 = new HashMap<>();
map3.put(file, map2);
//loop through repeating values to also add them to one value as you expected
for (int j = jsonArray.length() - 1; j > i; j--) {
JSONObject obj = getJsonChild(jsonArray.getJSONObject(j));
String file1 = obj.getString("File");
String clas1 = obj.getString("Class");
String meth1 = obj.getString("Method");
String anno1 = obj.getString("Annotation");
if (file1.equals(file)) {
if (map2.containsKey(clas1)) {
if (childrenContains(map2, meth1)) {
//if the difference was annotation value
map.put("Annotation", anno1);
} else {
//if the difference was method names
Map<String, String> map_ = new HashMap<>();
map_.put("Annotation", anno1);
((Map<String, Object>) map2.get(clas1)).put(meth1, map_);
}
} else {
//if the difference was class names
Map<String, String> map_ = new HashMap<>();
map_.put("Annotation", anno1);
Map<String, Object> map1_ = new HashMap<>();
map1_.put(meth1, map_);
map2.put(clas1, map1_);
}
//remove the (value added) object
jsonArray.remove(j);
}
}
//add the map to the result
result.put(map3);
}
return result.toString(4);
}
private static boolean childrenContains(Map<String, Object> map1, String meth1) {
for (String childKey : map1.keySet()) {
Map<String, Object> child = (Map<String, Object>) map1.get(childKey);
if (child.containsKey(meth1))
return true;
}
return false;
}
private static JSONObject getJsonChild(JSONObject object) throws JSONException {
Iterator<String> keys = object.keys();
String key = "";
while (keys.hasNext()) {
key = (String) keys.next();
}
return object.getJSONObject(key);
}
And the result for your sample using my code is:-
[
{"file1.java": {"com.ETransitionActionType": {"values": {"Annotation": "Not Found"}}}},
{"file2.java": {"com.ETransitionParams": {"values": {"Annotation": "Not Found"}}}},
{"file3.java": {
"com.ExecuteTransitionActionHandler": {
"createBadRequestResponse": {"Annotation": "Not Found"},
"extractParametersFromAction": {"Annotation": "Not Found"}
},
"com.phloc.commons.id.IHasID": {"getID": {"Annotation": "Not Found"}}
}}
]
And if you want to get the json data from a file so use the following function to create the JSONArray easily:-
private static JSONArray readFromFile(String filePath){
try {
BufferedReader br = new BufferedReader(new FileReader(filePath));
StringBuilder sb = new StringBuilder();
String line = br.readLine();
while (line != null) {
sb.append(line);
sb.append(System.lineSeparator());
line = br.readLine();
}
return new JSONArray(sb.toString());
} catch (Exception e) {
System.out.println(e.getMessage());
return null;
}
}
And use it instead the text json data:-
...
//get the json array
jsonArray = readFromFile("FilePath");
...
import com.fasterxml.jackson.databind.ObjectMapper;
import java.io.FileReader;
import java.io.IOException;
import java.io.Reader;
import java.util.*;
import java.util.function.Function;
import java.util.stream.Collectors;
public class Foo {
public static void main(String... args) throws IOException {
String json = formatJson(new FileReader("rawOutput.json"));
System.out.println(json);
}
public static String formatJson(Reader reader) throws IOException {
// group array items by fileName
final Function<List<Map<String, Object>>, Map<String, List<Object>>> groupByFileName =
data -> data.stream().collect(Collectors.groupingBy(map -> (String)map.get("File"), TreeMap::new,
Collectors.mapping(Function.identity(), Collectors.toList())));
// convert source item structure into required
final Function<Map.Entry<String, List<Object>>, Map<String, Object>> convert = entry -> {
Map<String, Map<String, Map<String, String>>> tmp = new LinkedHashMap<>();
entry.getValue().stream()
.map(value -> (Map<String, String>)value)
.forEach(map -> {
Map<String, Map<String, String>> classes = tmp.computeIfAbsent(map.get("Class"), cls -> new TreeMap<>());
Map<String, String> methods = classes.computeIfAbsent(map.get("Method"), method -> new TreeMap<>());
map.entrySet().stream()
.filter(e -> !"Class".equals(e.getKey()) && !"Method".equals(e.getKey()) && !"File".equals(e.getKey()))
.forEach(e -> methods.put(e.getKey(), e.getValue()));
});
return Collections.singletonMap(entry.getKey(), tmp);
};
ObjectMapper mapper = new ObjectMapper();
// read json as array of Maps
List<Map<String, Object>> data = Arrays.stream(mapper.readValue(reader, Map[].class))
.map(map -> map.values().iterator().next())
.map(item -> (Map<String, Object>)item)
.collect(Collectors.toList());
return mapper.writerWithDefaultPrettyPrinter().writeValueAsString(groupByFileName.apply(data).entrySet().stream()
.map(convert).collect(Collectors.toList()));
}
}
You could create a map of maps to represent your grouping by "File" and "Class" for your list of (inner) JSON objects. It might look similar to
final Function<JSONObject, String> fileFunction = (JSONObject jsonObject) -> jsonObject.getString("File");
final Function<JSONObject, String> classFunction = (JSONObject jsonObject) -> jsonObject.getString("Class");
final Map<String, Map<String, List<JSONObject>>> groupedJsonObjects = jsonObjects.stream()
.collect(Collectors.groupingBy(fileFunction, Collectors.groupingBy(classFunction)));

I want a method to extract Values that may sometimes contain "& and =" from URL parameters in Java

package com.bml.icbs.ws.param;
import java.util.HashMap;
public class RequestParameter {
private String param;
private HashMap<String, String> hashmap;
public RequestParameter(String param){
this.param=param;
this.hashmap = new HashMap<String, String>();
Integer i;
String key,value;
String myArray[] = this.param.split("&");
for (i=0;i<myArray.length;i++){
key=myArray[i].substring(0,myArray[i].indexOf("="));
value=myArray[i].substring(myArray[i].indexOf("=")+1);
this.hashmap.put(key,value);
}
}
public String getParameterValue(String key){
return this.hashmap.get(key);
}
public String getParam() {
return param;
}
public void setParam(String param) {
this.param = param;
}
public HashMap<String, String> getHashmap() {
return hashmap;
}
public void setHashmap(HashMap<String, String> hashmap) {
this.hashmap = hashmap;
}
}
When I Test the above method in case the URL is "param2=ffffhhh&param1=oooo&param3=pppp&param4=iiii&param5=kkkkk"
The result is:
SIZE 5
URL param2=ffffhhh&param1=oooo&param3=pppp&param4=iiii&param5=kkkkk
HASHMAP {param1=oooo, param2=ffffhhh, param3=pppp, param4=iiii, param5=kkkkk}
PARAM1 = oooo
which is correct
public static void main(String[] args) {
RequestParameter reqParam = new RequestParameter("param2=ffffhhh&param1=oooo&param3=pppp&param4=iiii&param5=kkkkk");
System.out.println("SIZE " + reqParam.getHashmap().size());
System.out.println("URL " + reqParam.getParam());
System.out.println("HASHMAP " + reqParam.getHashmap());
System.out.println("PARAM1 = " reqParam.getParameterValue("param1"));
}
but In case I pass the URL as the following one:
"param2=ffff&&&hhh=&param1=oooo=&&param3=pppp&param4=iiii&&yy=&param5=kkkkk"
public static void main(String[] args) {
RequestParameter reqParam = new RequestParameter("param2=ffff&&&hhh=&param1=oooo=&&param3=pppp&param4=iiii&&yy=&param5=kkkkk");
System.out.println("SIZE " + reqParam.getHashmap().size());
System.out.println("URL " + reqParam.getParam());
System.out.println("HASHMAP " + reqParam.getHashmap());
System.out.println("PARAM1 = " reqParam.getParameterValue("param1"));
}
it throws the following exception: Exception in thread "main" java.lang.StringIndexOutOfBoundsException: String index out of range: -1
at java.lang.String.substring(String.java:1937)
at com.bml.icbs.ws.param.RequestParameter.(RequestParameter.java:188)
at com.bml.icbs.ws.param.main.main(main.java:9)
make sure array element !empy.
for (i=0;i<myArray.length;i++){
if(myArray[i] == null || myArray[i].length()==0){
continue;
}
key=myArray[i].substring(0,myArray[i].indexOf("="));
value=myArray[i].substring(myArray[i].indexOf("=")+1);
this.hashmap.put(key,value);
}

Getting 0 output records in hadoop MapReduce

I have the following code in hadoop where the mapper and reducer are as follows:
public static class Map2 extends Mapper<LongWritable, Text, NullWritable, Text>
{
TreeMap<Text, Text> top10 = new TreeMap<Text, Text>();
HashMap<String, String> userInfo = new HashMap<String, String>();
public void setup(Context context) throws IOException, InterruptedException
{
try
{
URI[] uris = DistributedCache.getCacheFiles(context.getConfiguration());
FileSystem fs = FileSystem.get(context.getConfiguration());
if (uris == null || uris.length == 0)
{
throw new IOException("Error reading file from distributed cache. No URIs found.");
}
String path = "./users.dat";
fs.copyToLocalFile(new Path(uris[0]), new Path(path));
BufferedReader br = new BufferedReader(new FileReader(path));
String line = null;
while((line = br.readLine()) != null)
{
String split[] = line.split("\\::");
String age = split[2];
String gender = split[1];
userInfo.put(split[0], gender + "\t" + age);
}
br.close();
}
catch(Exception e)
{
}
}
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException
{
try
{
String line = value.toString();
int sum = Integer.parseInt(line.split("\\t")[1]);
String userID = line.split("\\t")[0];
String newKey = sum + " " + userID;
if(userInfo.containsKey(userID))
{
String record = userInfo.get(userID);
String val = userID + "\t" + record + "\t" + sum;
top10.put(new Text(newKey), new Text(val));
if (top10.size() > 10)
{
top10.remove(top10.firstKey());
}
}
}
catch(Exception e)
{
}
}
protected void cleanup(Context context) throws IOException, InterruptedException
{
try
{
for (Text s1 : top10.descendingMap().values())
{
context.write(NullWritable.get(), s1);
}
}
catch(Exception e)
{
}
}
}
public static class Reduce2 extends Reducer<NullWritable, Text, NullWritable, Text>
{
private TreeMap<Text, Text> top10 = new TreeMap<Text, Text>();
public void reduce(NullWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException
{
try
{
String line = values.toString();
String sum = line.split("\\t")[3];
String userID = line.split("\\t")[0];
String gender = line.split("\\t")[1];
String age = line.split("\\t")[2];
String newKey = sum + " " + userID;
String val = userID + "\t" + gender + "\t" + age + "\t" + sum;
top10.put(new Text(newKey), new Text(val));
if(top10.size() > 10)
{
top10.remove(top10.firstKey());
}
}
catch(Exception e)
{
}
}
protected void cleanup(Context context) throws IOException, InterruptedException
{
try
{
for (Text s1 : top10.descendingMap().values())
{
context.write(NullWritable.get(), s1);
}
}
catch(Exception e)
{
}
}
}
The driver method is as follows:
Configuration conf2 = new Configuration();
DistributedCache.addCacheFile(new Path("/Spring2014_HW-1/input_HW-1/users.dat").toUri(), conf2);
Job job2 = new Job(conf2, "Phase2");
job2.setOutputKeyClass(NullWritable.class);
job2.setOutputValueClass(Text.class);
job2.setJarByClass(MapSideJoin.class);
job2.setMapperClass(Map2.class);
job2.setReducerClass(Reduce2.class);
job2.setInputFormatClass(TextInputFormat.class);
job2.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job2, new Path(args[1]));
FileOutputFormat.setOutputPath(job2, new Path(args[2]));
//job2.setNumReduceTasks(1);
job2.waitForCompletion(true);
I get the message as map output records = 10 and reduce output records = 0, even though I've emit output from the reducer? Where does this output from the reducer disappear?
Thanks.

Display key as header in csv file

I am trying to create a csv file in this data are stored as key-value pair and value are properly displayed. How to show key as header of the field (value)
for (Map<String, String> map : test) {
Iterator iterator = map.entrySet().iterator();
while (iterator.hasNext()) {
Map.Entry<String,String> mapEntry = (Map.Entry<String,String>) iterator.next();
//System.out.println("key: " + mapEntry.getKey() + ", value:" + mapEntry.getValue());
writer.append(mapEntry.getValue());
writer.append(",");
iterator.remove();
}
writer.append("\n");
}
writer.flush();
writer.close();
}
Try this code in your for loop:
Collection<String> kys = map.keySet();
Collection<String> vls = map.values();
for (String k : kys) {
writer.append(k);
writer.append(",");
}
writer.append("\n");
for (String v : vls) {
writer.append(v);
writer.append(",");
}
writer.flush();
writer.close();
Looking for something like this?
package com.stackoverflow.csv;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
public class CreateFile {
public static final String LINE_BREAK = "\n";
public static final String QUOTE = "\"";
private final StringBuilder sb;
public CreateFile(Map<String, String> table) {
sb = new StringBuilder();
Iterator<String> keys = table.keySet().iterator();
while (keys.hasNext()){
sb.append(QUOTE + keys.next() + QUOTE);
if (keys.hasNext())
sb.append(", ");
}
sb.append(LINE_BREAK);
Iterator<String> entries = table.values().iterator();
while (entries.hasNext()) {
sb.append(QUOTE + entries.next() + QUOTE);
if (entries.hasNext())
sb.append(", ");
}
}
public String getCSV() {
return sb.toString();
}
public static void main(String[] args) {
HashMap<String,String> map = new HashMap<>();
map.put("KEY1", "VALUE1");
map.put("KEY2", "VALUE2");
map.put("KEY3", "VALUE3");
map.put("KEY4", "VALUE4");
map.put("KEY5", "VALUE5");
map.put("KEY6", "VALUE6");
map.put("KEY7", "VALUE7");
map.put("KEY8", "VALUE8");
map.put("KEY9", "VALUE9");
map.put("KEY10", "VALUE10");
CreateFile app = new CreateFile(map);
System.out.println(app.getCSV());
}
}

Categories

Resources