Can someone please help me with this error?
Error is as follows
Exception in thread "main" java.lang.ArrayIndexOutOfBoundsException: 2
at DCache.main(Dcache.java:197)
197th line would be DistributedCache.addCacheFile(new URI(args[2]), conf)
Java code:
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.net.URI;
import java.util.HashSet;
import java.util.StringTokenizer;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;
/**
* An example MapReduce program that uses the distributed cache. It uses the NYSE_daily dataset, which has a schem of:
* exchange,stock_symbol,date,stock_price_open,stock_price_high,stock_price_low,stock_price_close,stock_volume,stock_price_adj_close
* and the NYSE_dividends data set, which has a schema of:
* exchange,stock_symbol,date,dividends
* It finds the adjusted closing price for each day that a stock reported a dividend. The dividends data is placed in the distributed
* cache and then loaded into a lookup table so that the join can be done on the map side.
*/
public class DCache {
public static class Pair <T, U> {
public T first;
public U second;
public Pair(T f, U s) {
first = f;
second = s;
}
#Override
public int hashCode() {
return (((this.first == null ? 1 : this.first.hashCode()) * 17)
+ (this.second == null ? 1 : this.second.hashCode()) * 19);
}
#Override
public boolean equals(Object other) {
if(other == null) {
return false;
}
if(! (other instanceof Pair)) {
return false;
}
Pair otherPair = (Pair) other;
boolean examinedFirst = false;
boolean examinedSecond = false;
if (this.first == null) {
if (otherPair.first != null) {
return false;
}
examinedFirst = true;
}
if (this.second == null) {
if (otherPair.second != null) {
return false;
}
examinedSecond = true;
}
if (!examinedFirst && !this.first.equals(otherPair.first)) {
return false;
}
if (!examinedSecond && !this.second.equals(otherPair.second)) {
return false;
}
return true;
}
}
public static class Map extends MapReduceBase implements Mapper<LongWritable, Text, NullWritable, Text> {
private HashSet<Pair<String, String>> lookup = null;
private Path[] localFiles;
public void configure(JobConf job) {
// Get the cached archives/files
try {
localFiles = DistributedCache.getLocalCacheFiles(job);
lookup = new HashSet<Pair<String, String>>();
// Open the file as a local file
FileReader fr = new FileReader(localFiles[0].toString());
BufferedReader d = new BufferedReader(fr);
String line;
while ((line = d.readLine()) != null) {
String[] toks = new String[4];
toks = line.split(",", 4);
// put the stock symbol
lookup.add(new Pair<String, String>(toks[1], toks[2]));
}
fr.close();
} catch (IOException e) {
throw new RuntimeException(e);
}
}
public void map(LongWritable key, Text value, OutputCollector<NullWritable, Text> output, Reporter reporter) throws IOException {
// The first time we are invoked, open up our file from the distributed cache and populate our lookup table
/*
if (lookup == null) {
lookup = new HashSet<Pair<String, String>>();
// Open the file as a local file
FileReader fr = new FileReader(localFiles[0].toString());
BufferedReader d = new BufferedReader(fr);
String line;
while ((line = d.readLine()) != null) {
String[] toks = new String[4];
toks = line.split(",", 4);
// put the stock symbol
lookup.add(new Pair<String, String>(toks[1], toks[2]));
}
fr.close();
}
*/
// Convert the value from Text to a String so we can use the StringTokenizer on it.
String line = value.toString();
// Split the line into fields, using comma as the delimiter
StringTokenizer tokenizer = new StringTokenizer(line, ",");
// We only care about the 2nd, 3rd, and 9th fields (stock_symbol, date, and stock_price_adj_close)
String stock_symbol = null;
String date = null;
String stock_price_adj_close = null;
for (int i = 0; i < 9 && tokenizer.hasMoreTokens(); i++) {
switch (i) {
case 1:
stock_symbol = tokenizer.nextToken();
break;
case 2:
date = tokenizer.nextToken();
break;
case 8:
stock_price_adj_close = tokenizer.nextToken();
break;
default:
tokenizer.nextToken();
break;
}
}
if (stock_symbol == null || date == null || stock_price_adj_close == null) {
// This is a bad record, throw it out
System.err.println("Warning, bad record!");
return;
}
if (stock_symbol.equals("stock_symbol")) {
// NOP, throw out the schema line at the head of each file
return;
}
// Lookup the stock symbol and date in the lookup table
if (lookup.contains(new Pair<String, String>(stock_symbol, date))) {
StringBuilder buf = new StringBuilder(stock_symbol);
buf.append(',').append(date).append(',').append(stock_price_adj_close);
output.collect(NullWritable.get(), new Text(buf.toString()));
}
}
}
public static void main(String[] args) throws Exception {
JobConf conf = new JobConf(DCache.class);
conf.setJobName("DistributedCache_Example");
conf.setOutputKeyClass(NullWritable.class);
conf.setOutputValueClass(Text.class);
conf.setMapperClass(Map.class);
conf.setNumReduceTasks(0);
conf.setInputFormat(TextInputFormat.class);
conf.setOutputFormat(TextOutputFormat.class);
FileInputFormat.setInputPaths(conf, new Path(args[0]));
FileOutputFormat.setOutputPath(conf, new Path(args[1]));
DistributedCache.addCacheFile(new URI(args[2]), conf);
JobClient.runJob(conf);
}
}
There are two cases.
lookup.add(new Pair<String, String>(toks[1], toks[2]));
First, toks length is equal to 2. It means that the line you read does not have enough commas.
DistributedCache.addCacheFile(new URI(args[2]), conf);
Second, you are not passing enough arguments.
You need to pass three arguments.
Following line seems to be returning just 1 element in array
toks = line.split(",", 4);
Try inspecting line on above statement, and toks after this statement
Related
I had a file to read and with this code I succeeded my JUnit tests. As you can see, I pass the String line as parameter to the readPrevisione(...) method.
package oroscopo.persistence;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.Set;
import java.util.StringTokenizer;
import oroscopo.model.Previsione;
import oroscopo.model.SegnoZodiacale;
public class TextFileOroscopoRepository implements OroscopoRepository {
private HashMap<String, List<Previsione>> mapSettore = new HashMap<>();
public TextFileOroscopoRepository(Reader baseReader) throws IOException, BadFileFormatException{
if (baseReader == null)
throw new IllegalArgumentException("baseReader is null");
BufferedReader bufReader = new BufferedReader(baseReader);
String line;
while((line=bufReader.readLine()) != null){
readPrevisione(line,bufReader);
}
}
private void readPrevisione(String line, BufferedReader bufReader) throws IOException, BadFileFormatException{
String nomeSettore = line.trim();
if (!Character.isUpperCase(nomeSettore.charAt(0)))
throw new BadFileFormatException();
List<Previsione> listaPrev = new ArrayList<>();
while (!(line = bufReader.readLine()).equalsIgnoreCase("FINE")){
try{
StringTokenizer st1 = new StringTokenizer(line, "\t");
if(st1.countTokens() < 2)
throw new BadFileFormatException();
String prev = st1.nextToken("\t").trim();
int val = Integer.parseInt(st1.nextToken("\t").trim());
Set<SegnoZodiacale> segni = new HashSet<>();
if (st1.hasMoreTokens()){
while(st1.hasMoreTokens()){
try{
segni.add(SegnoZodiacale.valueOf(st1.nextToken(",").trim()));
}
catch (IllegalArgumentException e){
throw new BadFileFormatException();
}
}
Previsione p = new Previsione(prev,val,segni);
listaPrev.add(p);
}
else{
Previsione p2 = new Previsione(prev,val);
listaPrev.add(p2);
}
}
catch (NumberFormatException e){
throw new BadFileFormatException();
}
catch (NoSuchElementException e){
throw new BadFileFormatException();
}
}
mapSettore.put(nomeSettore, listaPrev);
}
#Override
public Set<String> getSettori() {
return mapSettore.keySet();
}
#Override
public List<Previsione> getPrevisioni(String settore) {
return mapSettore.get(settore.toUpperCase());
}
}
Here with the same code, instead passing the read line as parameter, I pass the StringTokenizer that already has read the line. It should work like above but my JUnit tests fail. What did I do wrong?
package oroscopo.persistence;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.Set;
import java.util.StringTokenizer;
import oroscopo.model.Previsione;
import oroscopo.model.SegnoZodiacale;
public class TextFileOroscopoRepository implements OroscopoRepository {
private HashMap<String, List<Previsione>> mapSettore = new HashMap<>();
public TextFileOroscopoRepository(Reader baseReader) throws IOException, BadFileFormatException{
if (baseReader == null)
throw new IllegalArgumentException("baseReader is null");
BufferedReader bufReader = new BufferedReader(baseReader);
String line;
while((line=bufReader.readLine()) != null){
StringTokenizer st = new StringTokenizer(line);
readPrevisione(st,bufReader);
}
}
private void readPrevisione(StringTokenizer st, BufferedReader bufReader) throws IOException, BadFileFormatException{
String nomeSettore = st.nextToken().trim();
if (!Character.isUpperCase(nomeSettore.charAt(0)))
throw new BadFileFormatException();
List<Previsione> listaPrev = new ArrayList<>();
String line;
while (!(line = bufReader.readLine()).equalsIgnoreCase("FINE")){
try{
StringTokenizer st1 = new StringTokenizer(line, "\t");
if(st1.countTokens() < 2)
throw new BadFileFormatException();
String prev = st1.nextToken("\t").trim();
int val = Integer.parseInt(st1.nextToken("\t").trim());
Set<SegnoZodiacale> segni = new HashSet<>();
if (st1.hasMoreTokens()){
while(st1.hasMoreTokens()){
try{
segni.add(SegnoZodiacale.valueOf(st1.nextToken(",").trim()));
}
catch (IllegalArgumentException e){
throw new BadFileFormatException();
}
}
Previsione p = new Previsione(prev,val,segni);
listaPrev.add(p);
}
else{
Previsione p2 = new Previsione(prev,val);
listaPrev.add(p2);
}
}
catch (NumberFormatException e){
throw new BadFileFormatException();
}
catch (NoSuchElementException e){
throw new BadFileFormatException();
}
}
mapSettore.put(nomeSettore, listaPrev);
}
#Override
public Set<String> getSettori() {
return mapSettore.keySet();
}
#Override
public List<Previsione> getPrevisioni(String settore) {
return mapSettore.get(settore.toUpperCase());
}
}
EDIT: Here is the File.txt that I want to read.
And here is an example of one of my JUnit test:
#Test
public void testLetturaCorrettaPrevisioni1() throws IOException, BadFileFormatException {
Reader mr = new StringReader(
"NOMESEZIONE\navrai la testa un po' altrove\t\t4\tARIETE,TORO,GEMELLI\ngrande intimita'\t9\nFINE\n"
+ "SEZIONE2\ntesto di prova\t\t\t\t\t66\t\nFINE");
OroscopoRepository or = new TextFileOroscopoRepository(mr);
assertEquals("avrai la testa un po' altrove", or.getPrevisioni("nomesezione").get(0).getPrevisione());
assertEquals(4, or.getPrevisioni("nomesezione").get(0).getValore());
Set<SegnoZodiacale> validi = new HashSet<SegnoZodiacale>() {
private static final long serialVersionUID = 1L;
{
add(SegnoZodiacale.ARIETE);
add(SegnoZodiacale.TORO);
add(SegnoZodiacale.GEMELLI);
}
};
for (SegnoZodiacale s : SegnoZodiacale.values()) {
if (validi.contains(s))
assertTrue(or.getPrevisioni("nomesezione").get(0).validaPerSegno(s));
else
assertFalse(or.getPrevisioni("nomesezione").get(0).validaPerSegno(s));
}
assertEquals("grande intimita'", or.getPrevisioni("nomesezione").get(1).getPrevisione());
assertEquals(9, or.getPrevisioni("nomesezione").get(1).getValore());
for (SegnoZodiacale s : SegnoZodiacale.values()) {
assertTrue(or.getPrevisioni("nomesezione").get(1).validaPerSegno(s));
}
}
You are creating StringTokenizer with default delimiter, that is, "the space character, the tab character, the newline character, the carriage-return character, and the form-feed character."
So in the first case you set as value of the "nomeSettore" variable the whole line but when you use StringTokenizer.nextToken() you are giving to "nomeSettore" just the value of the first token. So, "nomeSettore" can have different values if your String "line" contains whitespaces and you will have different key-value pairs inside you map.
You can take a look at this example:
public class TestSO {
public static void main(String[] args) {
String line = "abcdfs faf afd fa";
StringTokenizer st = new StringTokenizer(line);
readPrevisione(st, null);
readPrevisione(line, null);
}
private static void readPrevisione(StringTokenizer st, BufferedReader bufReader) {
String nomeSettore = st.nextToken().trim();
System.out.println(nomeSettore);
}
private static void readPrevisione(String st, BufferedReader bufReader) {
String nomeSettore = st.trim();
System.out.println(nomeSettore);
}
}
It prints as output:
abcdfs
abcdfs faf afd fa
I've understood why it didn't work..
The String line was : "EXAMPLE\n"
but after
while((line=bufReader.readLine()) != null){
...}
line = "EXAMPLE" because the readLine() eats the newline.
So I passed to the readPrevisione() a StringTokenizer as parameter
while((line=bufReader.readLine()) != null){
StringTokenizer st = new StringTokenizer(line);
readPrevisione(st,bufReader);
}
private void readPrevisione(StringTokenizer st, BufferedReader bufReader) throws IOException, BadFileFormatException{
String nomeSettore = st.nextToken().trim();
...}
And st.nextToken() search for a \n that is not contained in "EXAMPLE". That's why it didn't work.
I try to create program which can:
1. read characters from file
2. add these characters to ArrayList
3. Check if in line are only characters a,b,c (no other/no spaces)
If 3 is true -
1. compare first & last character in ArrayList, if they are different print "OK"
example file:
abbcb - OK
abbca - NOT OK
a bbc - NOT OK
abdcb - NOT OK
bbbca - OK
At the moment I got:
import java.io.*;
import java.util.ArrayList;
import java.util.List;
import java.util.Scanner;
public class Projekt3
{
public static void main(String[] args) throws IOException
{
List<String> Lista = new ArrayList<String>();
Scanner sc = new Scanner(System.in).useDelimiter("\\s*");
while (!sc.hasNext("z"))
{
char ch = sc.next().charAt(0);
Lista.add(ch);
//System.out.print("[" + ch + "] ");
}
}
}
I have problems with adding character to list. I'll be grateful for help.
import java.io.*;
import java.util.ArrayList;
public class Project3 {
public static void main(String[] args) throws FileNotFoundException, IOException {
BufferedReader reader = new BufferedReader(new FileReader("//home//azeez//Documents//sample")); //replace with your file path
ArrayList<String> wordList = new ArrayList<>();
String line = null;
while ((line = reader.readLine()) != null) {
wordList.add(line);
}
for (String word : wordList) {
if (word.matches("^[abc]+$")) {
if (word.charAt(0) == word.charAt(word.length() - 1)) {
System.out.print(word + "-NOT OK" + " ");
} else {
System.out.print(word + "-OK" + " ");
}
}
}
}
}
i think this is good start for you:
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
public class Project3 {
public static void main(String[] args) {
String path = "/Users/David/sandbox/java/test.txt";
try (BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(path)))) {
String currentLine = null;
// Array list for your words
List<String> arrayList = new ArrayList<>();
while ((currentLine = br.readLine()) != null) {
// only a, b and c
if (currentLine.contains("a") && currentLine.contains("b") && currentLine.contains("c")) {
// start character equal end character
if (currentLine.substring(0, 1)
.equals(currentLine.substring(currentLine.length()-1, currentLine.length()))) {
arrayList.add(currentLine);
System.out.println(currentLine);
}
}
}
} catch (Throwable e) {
System.err.println("error on read file " + e.getMessage());
e.printStackTrace();
}
}
}
I am using this code from (by author: lchen) which reads contents from excel file based on number of rows I provide into method 'readRow()'.
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackageAccess;
import org.apache.poi.ss.util.CellReference;
import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.xml.sax.InputSource;
public class TestLargeFileRead {
private int rowNum = 0;
private OPCPackage opcPkg;
private ReadOnlySharedStringsTable stringsTable;
private XMLStreamReader xmlReader;
public void XExcelFileReader(String excelPath) throws Exception {
opcPkg = OPCPackage.open(excelPath, PackageAccess.READ);
this.stringsTable = new ReadOnlySharedStringsTable(opcPkg);
XSSFReader xssfReader = new XSSFReader(opcPkg);
XMLInputFactory factory = XMLInputFactory.newInstance();
InputStream inputStream = xssfReader.getSheetsData().next();
xmlReader = factory.createXMLStreamReader(inputStream);
while (xmlReader.hasNext()) {
xmlReader.next();
if (xmlReader.isStartElement()) {
if (xmlReader.getLocalName().equals("sheetData"))
break;
}
}
}
public int rowNum() {
return rowNum;
}
public List<String[]> readRows(int batchSize) throws XMLStreamException {
String elementName = "row";
List<String[]> dataRows = new ArrayList<String[]>();
if (batchSize > 0) {
while (xmlReader.hasNext()) {
xmlReader.next();
if (xmlReader.isStartElement()) {
if (xmlReader.getLocalName().equals(elementName)) {
rowNum++;
dataRows.add(getDataRow());
if (dataRows.size() == batchSize)
break;
}
}
}
}
return dataRows;
}
private String[] getDataRow() throws XMLStreamException {
List<String> rowValues = new ArrayList<String>();
while (xmlReader.hasNext()) {
xmlReader.next();
if (xmlReader.isStartElement()) {
if (xmlReader.getLocalName().equals("c")) {
CellReference cellReference = new CellReference(
xmlReader.getAttributeValue(null, "r"));
// Fill in the possible blank cells!
while (rowValues.size() < cellReference.getCol()) {
rowValues.add("");
}
String cellType = xmlReader.getAttributeValue(null, "t");
rowValues.add(getCellValue(cellType));
}
} else if (xmlReader.isEndElement()
&& xmlReader.getLocalName().equals("row")) {
break;
}
}
return rowValues.toArray(new String[rowValues.size()]);
}
private String getCellValue(String cellType) throws XMLStreamException {
String value = ""; // by default
while (xmlReader.hasNext()) {
xmlReader.next();
if (xmlReader.isStartElement()) {
if (xmlReader.getLocalName().equals("v")) {
if (cellType != null && cellType.equals("s")) {
int idx = Integer.parseInt(xmlReader.getElementText());
return new XSSFRichTextString(
stringsTable.getEntryAt(idx)).toString();
} else {
return xmlReader.getElementText();
}
}
} else if (xmlReader.isEndElement()
&& xmlReader.getLocalName().equals("c")) {
break;
}
}
return value;
}
#Override
protected void finalize() throws Throwable {
if (opcPkg != null)
opcPkg.close();
super.finalize();
}
public static void main(String[] args) {
try {
TestLargeFileRead howto = new TestLargeFileRead();
howto.XExcelFileReader("D:\\TEMP_CATALOG\\H1.xlsx");
} catch (Exception e) {
e.printStackTrace();
}
}
}
But it reads only First SHEET's contents and discards other subsequent SHEETS. My requirement is to read SHEET name; and based on name read that SHEET's contents. Can anyone help me to customize this above code fetch SHEET NAME and their contents ? please ?
The key class you need to work with, and tweak your use of, is XSSFReader. If you take a look at the Javadocs for it, you'll see it provides an Iterator of InputStreams of all the sheets, and a way to get at the root Workbook stream.
If you want to access all the sheets, you need to change these lines:
InputStream inputStream = xssfReader.getSheetsData().next();
xmlReader = factory.createXMLStreamReader(inputStream);
Into something more like:
Iterator<InputStream> sheetsData = xssfReader.getSheetsData();
while (sheetsData.hasNext()) {
InputStream inputStream = sheetsData.next();
xmlReader = factory.createXMLStreamReader(inputStream);
....
}
If you also want to get the sheet name as well, you'll want to do something like what is shown in the Apache POI XLSX event-based text extractor
XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
while (sheetsData.hasNext()) {
InputStream inputStream = sheetsData.next();
String sheetName = iter.getSheetName();
if (sheetName.equalsIgnoreCase("TheSheetIWant")) {
xmlReader = factory.createXMLStreamReader(inputStream);
....
}
}
If you want to know more about doing this stuff, then one of the best examples, that's easy to read and follow, is XSSFEventBasedExcelExtractor that comes with Apache POI - read the code for that and learn!
my data in the text file looks like this...
3
movie title
4
movie title
1
movie title
the number on top is the movie rating and the text under it is the movie title.
The code I have so far is below. But It's not printing anything out except empty brackets! Sample code would be appreciated!
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
public class MovieReview {
public static void main(String[] args) {
Map<Integer, String> map = new HashMap<Integer, String>();
BufferedReader br = new BufferedReader(new FileReader("C:/Users/sgoetz/Desktop/movieReviews.txt"));
String line = br.readLine();
while (line != null) {
line = br.readLine();
System.out.println(map);
}
}
Try This
public static void main(String[] args) throws IOException {
Map<Integer, String> map = new HashMap<Integer, String>();
BufferedReader br = new BufferedReader(new FileReader("movieReviews.txt"));
String line="";
int i=0;
while (line != null) {
line = br.readLine();
map.put(i,line);
i++;
}
for(int j=0;j<map.size();j++){
System.out.println(map.get(j));
}
}
Your code is printing nothing because you are printing the map on which you put nothing. So the map remains empty. Also your first while iteration is buggy, you read one line from the stream before the while loop then you enter it an immediately read the next line. The first line is lost.
For reading from a buffered stream the following pattern should be considered:
while(null != (line = br.readLine())) {
// do what ever you want with the line.
}
If you want to store moving names of against its rating you have to declare map as Map>. Following code populates the movie title against its rating. Hope this is helpful.
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.List;
public class MovieReview {
public static void main(String[] args) {
Map<Integer, List<String>> map = new HashMap<Integer, List<String>>();
BufferedReader br = new BufferedReader(new FileReader("C:/Users/sgoetz/Desktop/movieReviews.txt"));
String line = null;
while ((line = br.readLine()) != null) {
//Rating
int rating = Integer.parseInt(line);
//Movie name
line = br.readLine();
List<String> movieList = map.get(rating);
if(movieList == null) {
movieList = new ArrayList<String>();
map.put(rating, movieList);
}
//Adding movie name to list
movieList.add(line);
}
}
}
}
here we go
File:
3,movie title,rating,other,other
4,movie title,rating,other,other
1,movie title,rating,other,other
code:
public static void main(String[] args) throws IOException {
Map<Integer, String> map = new HashMap<Integer, String>();
BufferedReader br = new BufferedReader(new FileReader("movieReviews.txt"));
String line="";
int i=0;
while (line != null) {
line = br.readLine();
map.put(i,line);
i++;
}
String movieNumber="";
String movieTitle="";
String movieRating="";
String movieOther1="";
String movieOther2="";
for(int j=0;j<map.size();j++){
if(!(map.get(j)== null)){
String[] getData=map.get(j).toString().split("\\,");
movieNumber = getData[0];
movieTitle = getData[1];
movieRating = getData[2];
movieOther1 = getData[3];
movieOther2 = getData[4];
System.out.println("|"+movieNumber+"|"+movieTitle+"|"+movieRating+"|"+movieOther1+"|"+movieOther2);
}
}
}
A more example:
while (true) {
// Number line
String value = br.readLine();
if (value == null || value.trim().isEmpty()) {
break;
}
Integer valueInt = Integer.parseInt(value);
// Name line
String title = br.readLine();
if (title == null || value.trim().isEmpty()) {
break;
}
map.put(valueInt, title);
}
System.out.println(map);
And the output is:
{1=movie title, 3=movie title, 4=movie title}
I recent updated to hadoop 2.2 (using this tutorial here).
My main job class looks like so, and throws an IOException:
import java.io.*;
import java.net.*;
import java.util.*;
import java.util.regex.*;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.mapreduce.lib.chain.*;
import org.apache.hadoop.mapreduce.lib.input.*;
import org.apache.hadoop.mapreduce.lib.output.*;
import org.apache.hadoop.mapreduce.lib.reduce.*;
public class UFOLocation2
{
public static class MapClass extends Mapper<LongWritable, Text, Text, LongWritable>
{
private final static LongWritable one = new LongWritable(1);
private static Pattern locationPattern = Pattern.compile("[a-zA-Z]{2}[^a-zA-Z]*$");
private Map<String, String> stateNames;
#Override
public void setup(Context context)
{
try
{
URI[] cacheFiles = context.getCacheFiles();
setupStateMap(cacheFiles[0].toString());
}
catch (IOException ioe)
{
System.err.println("Error reading state file.");
System.exit(1);
}
}
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException
{
String line = value.toString();
String[] fields = line.split("\t");
String location = fields[2].trim();
if (location.length() >= 2)
{
Matcher matcher = locationPattern.matcher(location);
if (matcher.find())
{
int start = matcher.start();
String state = location.substring(start, start + 2);
context.write(new Text(lookupState(state.toUpperCase())), one);
}
}
}
private void setupStateMap(String filename) throws IOException
{
Map<String, String> states = new HashMap<String, String>();
BufferedReader reader = new BufferedReader(new FileReader(filename));
String line = reader.readLine();
while (line != null)
{
String[] split = line.split("\t");
states.put(split[0], split[1]);
line = reader.readLine();
}
stateNames = states;
}
private String lookupState(String state)
{
String fullName = stateNames.get(state);
return fullName == null ? "Other" : fullName;
}
}
public static void main(String[] args) throws Exception
{
Configuration config = new Configuration();
Job job = Job.getInstance(config, "UFO Location 2");
job.setJarByClass(UFOLocation2.class);
job.addCacheFile(new URI("/user/kevin/data/states.txt"));
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
Configuration mapconf1 = new Configuration(false);
ChainMapper.addMapper(job, UFORecordValidationMapper.class, LongWritable.class,
Text.class, LongWritable.class,Text.class, mapconf1);
Configuration mapconf2 = new Configuration(false);
ChainMapper.addMapper(job, MapClass.class, LongWritable.class,
Text.class, Text.class, LongWritable.class, mapconf2);
job.setMapperClass(ChainMapper.class);
job.setCombinerClass(LongSumReducer.class);
job.setReducerClass(LongSumReducer.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
I get an IOException because it can't find the file "/user/kevin/data/states.txt" when it tries to instantiate the BufferredReader in the method setupStateMap()
Yes, it is deprecated and Job.addCacheFile() should be used to add the files and in your tasks( map or reduce) files can be accessed with Context.getCacheFiles().
//its fine addCacheFile and getCacheFiles are from 2.x u can use something like this
Path path = new Path(uri[0].getPath().toString());
if (fileSystem.exists(path)) {
FSDataInputStream dataInputStream = fileSystem.open(path);
byte[] data = new byte[1024];
while (dataInputStream.read(data) > 0) {
//do your stuff here
}
dataInputStream.close();
}
Deprecated functionality shall work anyway.