String line or StringTokenizer with a Reader? - java

I had a file to read and with this code I succeeded my JUnit tests. As you can see, I pass the String line as parameter to the readPrevisione(...) method.
package oroscopo.persistence;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.Set;
import java.util.StringTokenizer;
import oroscopo.model.Previsione;
import oroscopo.model.SegnoZodiacale;
public class TextFileOroscopoRepository implements OroscopoRepository {
private HashMap<String, List<Previsione>> mapSettore = new HashMap<>();
public TextFileOroscopoRepository(Reader baseReader) throws IOException, BadFileFormatException{
if (baseReader == null)
throw new IllegalArgumentException("baseReader is null");
BufferedReader bufReader = new BufferedReader(baseReader);
String line;
while((line=bufReader.readLine()) != null){
readPrevisione(line,bufReader);
}
}
private void readPrevisione(String line, BufferedReader bufReader) throws IOException, BadFileFormatException{
String nomeSettore = line.trim();
if (!Character.isUpperCase(nomeSettore.charAt(0)))
throw new BadFileFormatException();
List<Previsione> listaPrev = new ArrayList<>();
while (!(line = bufReader.readLine()).equalsIgnoreCase("FINE")){
try{
StringTokenizer st1 = new StringTokenizer(line, "\t");
if(st1.countTokens() < 2)
throw new BadFileFormatException();
String prev = st1.nextToken("\t").trim();
int val = Integer.parseInt(st1.nextToken("\t").trim());
Set<SegnoZodiacale> segni = new HashSet<>();
if (st1.hasMoreTokens()){
while(st1.hasMoreTokens()){
try{
segni.add(SegnoZodiacale.valueOf(st1.nextToken(",").trim()));
}
catch (IllegalArgumentException e){
throw new BadFileFormatException();
}
}
Previsione p = new Previsione(prev,val,segni);
listaPrev.add(p);
}
else{
Previsione p2 = new Previsione(prev,val);
listaPrev.add(p2);
}
}
catch (NumberFormatException e){
throw new BadFileFormatException();
}
catch (NoSuchElementException e){
throw new BadFileFormatException();
}
}
mapSettore.put(nomeSettore, listaPrev);
}
#Override
public Set<String> getSettori() {
return mapSettore.keySet();
}
#Override
public List<Previsione> getPrevisioni(String settore) {
return mapSettore.get(settore.toUpperCase());
}
}
Here with the same code, instead passing the read line as parameter, I pass the StringTokenizer that already has read the line. It should work like above but my JUnit tests fail. What did I do wrong?
package oroscopo.persistence;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.Set;
import java.util.StringTokenizer;
import oroscopo.model.Previsione;
import oroscopo.model.SegnoZodiacale;
public class TextFileOroscopoRepository implements OroscopoRepository {
private HashMap<String, List<Previsione>> mapSettore = new HashMap<>();
public TextFileOroscopoRepository(Reader baseReader) throws IOException, BadFileFormatException{
if (baseReader == null)
throw new IllegalArgumentException("baseReader is null");
BufferedReader bufReader = new BufferedReader(baseReader);
String line;
while((line=bufReader.readLine()) != null){
StringTokenizer st = new StringTokenizer(line);
readPrevisione(st,bufReader);
}
}
private void readPrevisione(StringTokenizer st, BufferedReader bufReader) throws IOException, BadFileFormatException{
String nomeSettore = st.nextToken().trim();
if (!Character.isUpperCase(nomeSettore.charAt(0)))
throw new BadFileFormatException();
List<Previsione> listaPrev = new ArrayList<>();
String line;
while (!(line = bufReader.readLine()).equalsIgnoreCase("FINE")){
try{
StringTokenizer st1 = new StringTokenizer(line, "\t");
if(st1.countTokens() < 2)
throw new BadFileFormatException();
String prev = st1.nextToken("\t").trim();
int val = Integer.parseInt(st1.nextToken("\t").trim());
Set<SegnoZodiacale> segni = new HashSet<>();
if (st1.hasMoreTokens()){
while(st1.hasMoreTokens()){
try{
segni.add(SegnoZodiacale.valueOf(st1.nextToken(",").trim()));
}
catch (IllegalArgumentException e){
throw new BadFileFormatException();
}
}
Previsione p = new Previsione(prev,val,segni);
listaPrev.add(p);
}
else{
Previsione p2 = new Previsione(prev,val);
listaPrev.add(p2);
}
}
catch (NumberFormatException e){
throw new BadFileFormatException();
}
catch (NoSuchElementException e){
throw new BadFileFormatException();
}
}
mapSettore.put(nomeSettore, listaPrev);
}
#Override
public Set<String> getSettori() {
return mapSettore.keySet();
}
#Override
public List<Previsione> getPrevisioni(String settore) {
return mapSettore.get(settore.toUpperCase());
}
}
EDIT: Here is the File.txt that I want to read.
And here is an example of one of my JUnit test:
#Test
public void testLetturaCorrettaPrevisioni1() throws IOException, BadFileFormatException {
Reader mr = new StringReader(
"NOMESEZIONE\navrai la testa un po' altrove\t\t4\tARIETE,TORO,GEMELLI\ngrande intimita'\t9\nFINE\n"
+ "SEZIONE2\ntesto di prova\t\t\t\t\t66\t\nFINE");
OroscopoRepository or = new TextFileOroscopoRepository(mr);
assertEquals("avrai la testa un po' altrove", or.getPrevisioni("nomesezione").get(0).getPrevisione());
assertEquals(4, or.getPrevisioni("nomesezione").get(0).getValore());
Set<SegnoZodiacale> validi = new HashSet<SegnoZodiacale>() {
private static final long serialVersionUID = 1L;
{
add(SegnoZodiacale.ARIETE);
add(SegnoZodiacale.TORO);
add(SegnoZodiacale.GEMELLI);
}
};
for (SegnoZodiacale s : SegnoZodiacale.values()) {
if (validi.contains(s))
assertTrue(or.getPrevisioni("nomesezione").get(0).validaPerSegno(s));
else
assertFalse(or.getPrevisioni("nomesezione").get(0).validaPerSegno(s));
}
assertEquals("grande intimita'", or.getPrevisioni("nomesezione").get(1).getPrevisione());
assertEquals(9, or.getPrevisioni("nomesezione").get(1).getValore());
for (SegnoZodiacale s : SegnoZodiacale.values()) {
assertTrue(or.getPrevisioni("nomesezione").get(1).validaPerSegno(s));
}
}

You are creating StringTokenizer with default delimiter, that is, "the space character, the tab character, the newline character, the carriage-return character, and the form-feed character."
So in the first case you set as value of the "nomeSettore" variable the whole line but when you use StringTokenizer.nextToken() you are giving to "nomeSettore" just the value of the first token. So, "nomeSettore" can have different values if your String "line" contains whitespaces and you will have different key-value pairs inside you map.
You can take a look at this example:
public class TestSO {
public static void main(String[] args) {
String line = "abcdfs faf afd fa";
StringTokenizer st = new StringTokenizer(line);
readPrevisione(st, null);
readPrevisione(line, null);
}
private static void readPrevisione(StringTokenizer st, BufferedReader bufReader) {
String nomeSettore = st.nextToken().trim();
System.out.println(nomeSettore);
}
private static void readPrevisione(String st, BufferedReader bufReader) {
String nomeSettore = st.trim();
System.out.println(nomeSettore);
}
}
It prints as output:
abcdfs
abcdfs faf afd fa

I've understood why it didn't work..
The String line was : "EXAMPLE\n"
but after
while((line=bufReader.readLine()) != null){
...}
line = "EXAMPLE" because the readLine() eats the newline.
So I passed to the readPrevisione() a StringTokenizer as parameter
while((line=bufReader.readLine()) != null){
StringTokenizer st = new StringTokenizer(line);
readPrevisione(st,bufReader);
}
private void readPrevisione(StringTokenizer st, BufferedReader bufReader) throws IOException, BadFileFormatException{
String nomeSettore = st.nextToken().trim();
...}
And st.nextToken() search for a \n that is not contained in "EXAMPLE". That's why it didn't work.

Related

I'm trying to read a text file and store it in an arraylist of objects

I'm trying to read a text file and store it in an arraylist of objects, but I keep getting an error saying I cannot convert a String to an Item, which is type of arraylist I am using. I have tried various solutions, but am not quite sure how its is suppossed to be done. I am new to coding and have this assignment due soon. Anything helps!
private void loadFile(String FileName)
{
Scanner in;
Item line;
try
{
in = new Scanner(new File(FileName));
while (in.hasNext())
{
line = in.nextLine();
MyStore.add(line);
}
in.close();
}
catch (IOException e)
{
System.out.println("FILE NOT FOUND.");
}
}
my apologies for not adding the Item class
public class Item
{
private int myId;
private int myInv;
//default constructor
public Item()
{
myId = 0;
myInv = 0;
}
//"normal" constructor
public Item(int id, int inv)
{
myId = id;
myInv = inv;
}
//copy constructor
public Item(Item OtherItem)
{
myId = OtherItem.getId();
myInv = OtherItem.getInv();
}
public int getId()
{
return myId;
}
public int getInv()
{
return myInv;
}
public int compareTo(Item Other)
{
int compare = 0;
if (myId > Other.getId())
{
compare = 1;
}
else if (myId < Other.getId())
{
compare = -1;
}
return compare;
}
public boolean equals(Item Other)
{
boolean equal = false;
if (myId == Other.getId())
{
equal = true;;
}
return equal;
}
public String toString()
{
String Result;
Result = String.format("%8d%8d", myId, myInv);
return Result;
}
}
This is the creation of my arraylist.
private ArrayList MyStore = new ArrayList ();
Here is a sample of my text file.
3679 87
196 60
12490 12
18618 14
2370 65
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package com.mycompany.rosmery;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
/**
*
* #author Sem-6-INGENIERIAINDU
*/
public class aaa {
public static void main(String arg[]) throws FileNotFoundException, IOException{
BufferedReader files=new BufferedReader(new FileReader(new File("")));
List<String> dto=new ArrayList<>();
String line;
while((line= files.readLine())!= null){
line= files.readLine();
dto.add(line);
//Hacer la logica para esos datos
}
}
}
in.nextLine() returns a String.
So, you cannot assign in.nextLine() to an instance of Item.
Your code may need to correct it as:
List<String> myStore = new ArrayList<String>();
private void loadFile(String FileName)
{
Scanner in;
try
{
in = new Scanner(new File(FileName));
while (in.hasNext())
{
myStore.add(in.nextLine());
}
in.close();
}
catch (IOException e)
{
System.out.println("FILE NOT FOUND.");
}
}
If you want to have a list of Item after reading a file, then you need provide the logic that convert given line of information into an instance of Item.
let's say your file content is in the following format.
id1,inv1
id2,inv2
.
.
Then, you can use the type Item as the following.
List<Item> myStore = new ArrayList<Item>();
private void loadFile(String FileName)
{
Scanner in;
String[] line;
try
{
in = new Scanner(new File(FileName));
while (in.hasNext())
{
line = in.nextLine().split(",");
myStore.add(new Item(line[0], line[1]));
}
in.close();
}
catch (IOException e)
{
System.out.println("FILE NOT FOUND.");
}
}
One of the possible solutions (assuming that the data in file lines is separated by a comma), with using streams:
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;
public class Main {
public static void main(String[] args) throws IOException {
List<Item> items = loadFile("myfile.txt");
System.out.println(items);
}
private static List<Item> loadFile(String fileName) throws IOException {
try (Stream<String> stream = Files.lines(Paths.get(fileName))) {
return stream
.map(s -> Stream.of(s.split(",")).mapToInt(Integer::parseInt).toArray())
.map(i -> new Item(i[0], i[1]))
.collect(Collectors.toList());
}
}
}
or with foreach:
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;
public class Main {
public static void main(String[] args) throws IOException {
List<Item> items = new ArrayList<>();
for (String line : loadFile("myfile.txt")) {
String[] data = line.split(",");
int id = Integer.parseInt(data[0]);
int inv = Integer.parseInt(data[1]);
items.add(new Item(id, inv));
}
System.out.println(items);
}
private static List<String> loadFile(String fileName) throws IOException {
try (Stream<String> stream = Files.lines(Paths.get(fileName))) {
return stream.collect(Collectors.toList());
}
}
}

How do I print my CSV file to an ArrayList?

Here is my code
package sequentialFilePractice;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.logging.Level;
import java.util.logging.Logger;
public class ReadFile{
static String line = "";
ReadFile() throws FileNotFoundException{
readTheFile();
CSVtoArrayList();
}
public String readTheFile() throws FileNotFoundException{
String csvFile = "H:\\S6\\AH Computing\\Java Practice\\test.csv";
BufferedReader br = null;
String cvsSplitBy = ",";
try {
br = new BufferedReader(new FileReader(csvFile));
while ((line = br.readLine()) != null) {
}
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
if (br != null) {
try {
br.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
return line;
}
public static ArrayList<String> CSVtoArrayList() {
ArrayList<String> splitCSV = new ArrayList<>();
if (line != null) {
String[] splitData = line.split("\\s*,\\s*");
for (int i = 0; i < splitData.length; i++) {
if (!(splitData[i] == null) || !(splitData[i].length() == 0)) {
splitCSV.add(splitData[i].trim());
}
}
}
for(int j = 0;j < splitCSV.size();j++){
System.out.println(splitCSV.get(j));
}
return splitCSV;
}
public static void main(String[]args) throws IOException{
ReadFile f = new ReadFile();
}
}
The code compiles and the file exists. I can print line and it prints the contents of the file however when I print the arrayList, nothing is output so it has not been copied. This is my first use of sequential files in java.
Do you HAVE to read the file manually? If not, you should check out http://opencsv.sourceforge.net/, it allows you to read a CSV directly into a List<String[]> instead of having to deal with the admin of looping, splitting the line and creating a list.
In essence reducing your code to:
CSVReader reader = new CSVReader(new FileReader("yourfile.csv"));
List myEntries = reader.readAll();

Extracting links of a facebook page

How can I extract all the links of a facebook page. Can I extract it using jsoup and pass "like" link as parameter to extract all the user's info who liked that particular page
private static String readAll(Reader rd) throws IOException
{
StringBuilder sb = new StringBuilder();
int cp;
while ((cp = rd.read()) != -1)
{
sb.append((char) cp);
}
return sb.toString();
}
public static JSONObject readurl(String url) throws IOException, JSONException
{
InputStream is = new URL(url).openStream();
try
{
BufferedReader rd = new BufferedReader
(new InputStreamReader(is, Charset.forName("UTF-8")));
String jsonText = readAll(rd);
JSONObject json = new JSONObject(jsonText);
return json;
}
finally
{
is.close();
}
}
public static void main(String[] args) throws IOException,
JSONException, FacebookException
{
try
{
System.out.println("\nEnter the search string:");
#SuppressWarnings("resource")
Scanner sc=new Scanner(System.in);
String s=sc.nextLine();
JSONObject json = readurl("https://graph.facebook.com/"+s);
System.out.println(json);
}}
CAN i MODIFY THIS AND INTEGRATE THIS CODE. BELOW CODE EXTRACTS ALL LINKS OF A PARTICULAR PAGE. i TRIED TO THE ABOVE CODE BUT IT'S NOT WORKING
String url = "http://www.firstpost.com/tag/crime-in-india";
Document doc = Jsoup.connect(url).get();
Elements links = doc.getElementsByTag("a");
System.out.println(links.size());
for (Element link : links)
{
System.out.println(link.absUrl("href") +trim(link.text(), 35));
}
}
public static String trim(String s, int width) {
if (s.length() > width)
return s.substring(0, width-1) + ".";
else
return s;
}
}
you can try alternative way also like this :
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.LinkedHashSet;
import java.util.Set;
import javax.swing.text.MutableAttributeSet;
import javax.swing.text.html.HTML;
import javax.swing.text.html.HTML.Tag;
import javax.swing.text.html.HTMLEditorKit;
import javax.swing.text.html.parser.ParserDelegator;
public class URLExtractor {
private static class HTMLPaserCallBack extends HTMLEditorKit.ParserCallback {
private Set<String> urls;
public HTMLPaserCallBack() {
urls = new LinkedHashSet<String>();
}
public Set<String> getUrls() {
return urls;
}
#Override
public void handleSimpleTag(Tag t, MutableAttributeSet a, int pos) {
handleTag(t, a, pos);
}
#Override
public void handleStartTag(Tag t, MutableAttributeSet a, int pos) {
handleTag(t, a, pos);
}
private void handleTag(Tag t, MutableAttributeSet a, int pos) {
if (t == Tag.A) {
Object href = a.getAttribute(HTML.Attribute.HREF);
if (href != null) {
String url = href.toString();
if (!urls.contains(url)) {
urls.add(url);
}
}
}
}
}
public static void main(String[] args) throws IOException {
InputStream is = null;
try {
String u = "https://www.facebook.com/";
URL url = new URL(u);
is = url.openStream(); // throws an IOException
HTMLPaserCallBack cb = new HTMLPaserCallBack();
new ParserDelegator().parse(new BufferedReader(new InputStreamReader(is)), cb, true);
for (String aUrl : cb.getUrls()) {
System.out.println("Found URL: " + aUrl);
}
} catch (MalformedURLException mue) {
mue.printStackTrace();
} catch (IOException ioe) {
ioe.printStackTrace();
} finally {
try {
is.close();
} catch (IOException ioe) {
// nothing to see here
}
}
}
}
Kind of works, but im not sure you could use jsoup for this I would rather look into casperjs or phantomjs
import java.io.IOException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class getFaceBookLinks {
public static Elements getElementsByTag_then_FilterBySelector (String tag, String httplink, String selector){
Document doc = null;
try {
doc = Jsoup.connect(httplink).get();
} catch (IOException e) {
e.printStackTrace();
}
Elements links = doc.getElementsByTag(tag);
return links.select(selector);
}
//Test functionality
public static void main(String[] args){
// The class name for the like links on facebook is UFILikeLink
Elements likeLinks = getElementsByTag_then_FilterBySelector("a", "http://www.facebook.com", ".UFILikeLink");
System.out.println(likeLinks);
}
}

Exception in thread "main" java.lang.ArrayIndexOutOfBoundsException:2

Can someone please help me with this error?
Error is as follows
Exception in thread "main" java.lang.ArrayIndexOutOfBoundsException: 2
at DCache.main(Dcache.java:197)
197th line would be DistributedCache.addCacheFile(new URI(args[2]), conf)
Java code:
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.net.URI;
import java.util.HashSet;
import java.util.StringTokenizer;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;
/**
* An example MapReduce program that uses the distributed cache. It uses the NYSE_daily dataset, which has a schem of:
* exchange,stock_symbol,date,stock_price_open,stock_price_high,stock_price_low,stock_price_close,stock_volume,stock_price_adj_close
* and the NYSE_dividends data set, which has a schema of:
* exchange,stock_symbol,date,dividends
* It finds the adjusted closing price for each day that a stock reported a dividend. The dividends data is placed in the distributed
* cache and then loaded into a lookup table so that the join can be done on the map side.
*/
public class DCache {
public static class Pair <T, U> {
public T first;
public U second;
public Pair(T f, U s) {
first = f;
second = s;
}
#Override
public int hashCode() {
return (((this.first == null ? 1 : this.first.hashCode()) * 17)
+ (this.second == null ? 1 : this.second.hashCode()) * 19);
}
#Override
public boolean equals(Object other) {
if(other == null) {
return false;
}
if(! (other instanceof Pair)) {
return false;
}
Pair otherPair = (Pair) other;
boolean examinedFirst = false;
boolean examinedSecond = false;
if (this.first == null) {
if (otherPair.first != null) {
return false;
}
examinedFirst = true;
}
if (this.second == null) {
if (otherPair.second != null) {
return false;
}
examinedSecond = true;
}
if (!examinedFirst && !this.first.equals(otherPair.first)) {
return false;
}
if (!examinedSecond && !this.second.equals(otherPair.second)) {
return false;
}
return true;
}
}
public static class Map extends MapReduceBase implements Mapper<LongWritable, Text, NullWritable, Text> {
private HashSet<Pair<String, String>> lookup = null;
private Path[] localFiles;
public void configure(JobConf job) {
// Get the cached archives/files
try {
localFiles = DistributedCache.getLocalCacheFiles(job);
lookup = new HashSet<Pair<String, String>>();
// Open the file as a local file
FileReader fr = new FileReader(localFiles[0].toString());
BufferedReader d = new BufferedReader(fr);
String line;
while ((line = d.readLine()) != null) {
String[] toks = new String[4];
toks = line.split(",", 4);
// put the stock symbol
lookup.add(new Pair<String, String>(toks[1], toks[2]));
}
fr.close();
} catch (IOException e) {
throw new RuntimeException(e);
}
}
public void map(LongWritable key, Text value, OutputCollector<NullWritable, Text> output, Reporter reporter) throws IOException {
// The first time we are invoked, open up our file from the distributed cache and populate our lookup table
/*
if (lookup == null) {
lookup = new HashSet<Pair<String, String>>();
// Open the file as a local file
FileReader fr = new FileReader(localFiles[0].toString());
BufferedReader d = new BufferedReader(fr);
String line;
while ((line = d.readLine()) != null) {
String[] toks = new String[4];
toks = line.split(",", 4);
// put the stock symbol
lookup.add(new Pair<String, String>(toks[1], toks[2]));
}
fr.close();
}
*/
// Convert the value from Text to a String so we can use the StringTokenizer on it.
String line = value.toString();
// Split the line into fields, using comma as the delimiter
StringTokenizer tokenizer = new StringTokenizer(line, ",");
// We only care about the 2nd, 3rd, and 9th fields (stock_symbol, date, and stock_price_adj_close)
String stock_symbol = null;
String date = null;
String stock_price_adj_close = null;
for (int i = 0; i < 9 && tokenizer.hasMoreTokens(); i++) {
switch (i) {
case 1:
stock_symbol = tokenizer.nextToken();
break;
case 2:
date = tokenizer.nextToken();
break;
case 8:
stock_price_adj_close = tokenizer.nextToken();
break;
default:
tokenizer.nextToken();
break;
}
}
if (stock_symbol == null || date == null || stock_price_adj_close == null) {
// This is a bad record, throw it out
System.err.println("Warning, bad record!");
return;
}
if (stock_symbol.equals("stock_symbol")) {
// NOP, throw out the schema line at the head of each file
return;
}
// Lookup the stock symbol and date in the lookup table
if (lookup.contains(new Pair<String, String>(stock_symbol, date))) {
StringBuilder buf = new StringBuilder(stock_symbol);
buf.append(',').append(date).append(',').append(stock_price_adj_close);
output.collect(NullWritable.get(), new Text(buf.toString()));
}
}
}
public static void main(String[] args) throws Exception {
JobConf conf = new JobConf(DCache.class);
conf.setJobName("DistributedCache_Example");
conf.setOutputKeyClass(NullWritable.class);
conf.setOutputValueClass(Text.class);
conf.setMapperClass(Map.class);
conf.setNumReduceTasks(0);
conf.setInputFormat(TextInputFormat.class);
conf.setOutputFormat(TextOutputFormat.class);
FileInputFormat.setInputPaths(conf, new Path(args[0]));
FileOutputFormat.setOutputPath(conf, new Path(args[1]));
DistributedCache.addCacheFile(new URI(args[2]), conf);
JobClient.runJob(conf);
}
}
There are two cases.
lookup.add(new Pair<String, String>(toks[1], toks[2]));
First, toks length is equal to 2. It means that the line you read does not have enough commas.
DistributedCache.addCacheFile(new URI(args[2]), conf);
Second, you are not passing enough arguments.
You need to pass three arguments.
Following line seems to be returning just 1 element in array
toks = line.split(",", 4);
Try inspecting line on above statement, and toks after this statement

file reading into array

I am trying to read contents of a file using string tokenizer and store all the tokens in an array but i keep getting exception in main error. I need advise on how to do this.Below is the code am using for that;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.List;
import java.util.Scanner;
import java.util.StringTokenizer;
public class FileTokenizer
{
private static final String DEFAULT_DELIMITERS = "< , { } >";
private static final String DEFAULT_TEST_FILE = "trans1.txt";
public List<String> tokenize(Reader reader) throws IOException
{
List<String> tokens = new ArrayList<String>();
BufferedReader br = null;
try
{
int i = 0;
br = new BufferedReader(reader);
Scanner scanner = new Scanner(br);
while (scanner.hasNext())
{
StringTokenizer st = new StringTokenizer(scanner.next(), DEFAULT_DELIMITERS, true);
while (st.hasMoreElements())
{
String[] t = new String[200];
tokens.add(st.nextToken());
t[i] = st.nextToken();
System.out.println(t[i]);
i++;
}
}
}
finally
{
close(br);
}
return tokens;
}
public static void close(Reader r)
{
try
{
if (r != null)
{
r.close();
}
}
catch (IOException e)
{
e.printStackTrace();
}
}
public static void main(String[] args)
{
try
{
String fileName = ((args.length > 0) ? args[0] : DEFAULT_TEST_FILE);
FileReader fileReader = new FileReader(new File(fileName));
FileTokenizer fileTokenizer = new FileTokenizer();
List<String> tokens = fileTokenizer.tokenize(fileReader);
//System.out.println(tokens);
}
catch (IOException e)
{
e.printStackTrace();
}
}
}
My file looks like;
PDA = (
{ q1, q2, q3, q4},
{ 0, 1 },
{ 0, $ },
{ (q1, #, #) -> { (q2, $) }, (q2, 0, #) -> { (q2, 0) },
(q2, 1, 0) -> { (q3, #) }, (q3, 1, 0) -> { (q3, #) },
(q3, #, $) -> { (q4, #) } },
q1,
{ q1, q4}
)
You will get the java.util.NoSuchElementException since you are calling st.nextToken() twice within the loop
while (st.hasMoreElements())
Modifying harigm's example, you can then add t[i] to tokens as you require
String[] t = new String[200];
System.out.println(t[i]);
tokens.add(t[i]);
Delimiters shouldn't be separated by spaces:
private static final String DEFAULT_DELIMITERS = "<,{}>";
Also, keep the following in mind (from the Javadoc):
StringTokenizer is a legacy class that is retained for compatibility reasons although its use is discouraged in new code. It is recommended that anyone seeking this functionality use the split method of String or the java.util.regex package instead.
String.split() was introduced in JDK 1.4.
That said:
Using a Scanner to tokenize a stream together with a StringTokenizer looks a bit weird to me;
You call st.nextToken() twice in the inner loop;
t is useless. You re-create it each time in your inner loop and use only one element of it.
It seems that what you are trying to build is a lexical analyzer. Maybe you should look up some documentation on the subject.
HI,
I have modified your code and Now works perfectly fine, check this
package org.sample;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.List;
import java.util.Scanner;
import java.util.StringTokenizer;
public class FileTokenizer
{
private static final String DEFAULT_DELIMITERS = "< , { } >";
// private static final String DEFAULT_TEST_FILE = "trans1.txt";
public List<String> tokenize(Reader reader) throws IOException
{
List<String> tokens = new ArrayList<String>();
BufferedReader br = null;
try
{
int i = 0;
br = new BufferedReader(reader);
Scanner scanner = new Scanner(br);
while (scanner.hasNext())
{
StringTokenizer st = new StringTokenizer(scanner.next(), DEFAULT_DELIMITERS, true);
while (st.hasMoreElements())
{
String[] t = new String[200];
// tokens.add(st.nextToken());
// t[i] = st.nextToken();
System.out.println(t[i]);
i++;
}
}
}
finally
{
close(br);
}
return tokens;
}
public static void close(Reader r)
{
try
{
if (r != null)
{
r.close();
}
}
catch (IOException e)
{
e.printStackTrace();
}
}
public static void main(String[] args)
{
try
{
// String fileName = ((args.length > 0) ? args[0] : DEFAULT_TEST_FILE);
FileReader fileReader = new FileReader(new File("c:\\DevTest\\1.txt"));
FileTokenizer fileTokenizer = new FileTokenizer();
List<String> tokens = fileTokenizer.tokenize(fileReader);
//System.out.println(tokens);
}
catch (IOException e)
{
e.printStackTrace();
}
}
}
Looking at your input file, I should point out that its hierarchical and irregular structure makes it more suited to be parsed by an actual parser. You may have to learn how to use a parser generator and write a lexer and grammar for it etc, but in the end you'll end up with a much more maintainable code. Doing this yourself is rather painstaking and error-prone.
I recommend ANTLR. It's quite mature, and it has a wide enough user base that I'm sure you can get help easily.

Categories

Resources