I have an assignment which involves creating a Trie of company names (read from a file) and then reading a news article input and counting the number of times a company name from the Trie occurs in the article.
I have coded a pretty standard Trie structure, however for the assignment it made more sense to have the TrieNodes hold the full word rather than just each character.
To make things more complicated, each company name from the file has one "primary name" and can have multiple "secondary names". For example: Microsoft Corporation, Microsoft, Xbox - where the first name is always the primary.
The assignment requires that I count all matches in the article for any of the company names, but only return the company's primary name when printing the results. Because of this, my TrieNode has the String primeName datafield, along with the standard isEnd bool. However, in my case, isEnd represents whether or not the specified node and its parent(s) form a full company name.
For example, with the article input "Microsoft Corporation just released a new Xbox console." I would need to return something along the lines of "Microsoft:2" because both Microsoft Corporation and Xbox share the same primary company name which is Microsoft.
I am using an iterator in the getHits() method but when I do find a hit, I need to look at the next word in the array to make sure it is not a continuation before I decide whether to stop or continue. The problem is that calling iter.next() doesn't just "peek" the next value but it moves forward, essentially causing me to skip words.
For example, if you look at the below code and my example, after "Best" gets a hit, it should see that "Buy" is a child and the next time it loops get a match on "Buy", but since I already call iter.next() to look at "Buy" within the While loop, the next iteration entirely skips "Buy". Is there some way I can simply peek at the next iter value within the While loop without actually moving to it? Also, any improvements to this code are greatly appreciated! I am sure there are many places where I sloppily implemented something.
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.*;
public class BuildTrie {
// Class Methods
public static void main(String[] args) throws IOException {
Trie Companies = new Trie();
String filename = "companies.dat";
try {
BufferedReader reader = new BufferedReader(new FileReader(filename));
String line;
while ((line = reader.readLine()) != null) {
// Split line by tab character
String[] aliases = line.replaceAll("\\p{P}", "").split("\t");
// Loop over each "alias" of specific company
for (int n = 0; n < aliases.length; n++) {
String[] name = aliases[n].split(" ");
// Insert each alias into Trie with index 0 as primary
Companies.insert(name, aliases[0]);
}
}
reader.close();
} catch (Exception e) {
System.err.format("Exception occurred trying to read '%s'.", filename);
e.printStackTrace();
}
/*System.out.println("Article Input: ");
try (BufferedReader input = new BufferedReader(new InputStreamReader(System.in))) {
String line;
while ((line = input.readLine()) != null) {
if (".".equals(line)) break;
String[] items = line.trim().replaceAll("\\p{P}", "").split("\\s+");
for (int i = 0; i < items.length; i++) {
Companies.words.add(items[i]);
//System.out.println(items[i]);
}
}
}*/
Companies.articleAdd("The");
Companies.articleAdd("company");
Companies.articleAdd("Best");
Companies.articleAdd("Buy");
Companies.articleAdd("sell");
Companies.articleAdd("Xbox");
Companies.getHits();
}
}
// Trie Node, which stores a character and the children in a HashMap
class TrieNode {
// Data Fields
private String word;
HashMap<String,TrieNode> children;
boolean bIsEnd;
private String primary = "";
// Constructors
public TrieNode() {
children = new HashMap<>();
bIsEnd = false;
}
public TrieNode(String st, String prime) {
word = st;
children = new HashMap<>();
bIsEnd = false;
primary = prime;
}
// Trie Node Methods
public HashMap<String,TrieNode> getChildren() {
return children;
}
public String getValue() {
return word;
}
public void setIsEnd(boolean val) {
bIsEnd = val;
}
public boolean isEnd() {
return bIsEnd;
}
public String getPrime() {
return primary;
}
}
class Trie {
private ArrayList<String> article = new ArrayList<String>();
private HashMap<String,Integer> hits = new HashMap<String,Integer>();
// Constructor
public Trie() {
root = new TrieNode();
}
// Insert article text
public void articleAdd(String word) {
article.add(word);
}
// Method to insert a new company name to Trie
public void insert(String[] names, String prime) {
// Find length of the given name
int length = names.length;
//TrieNode currNode = root;
HashMap<String,TrieNode> children = root.children;
// Traverse through all words of given name
for( int i=0; i<length; i++)
{
String name = names[i];
System.out.println("Iter: " + name);
TrieNode t;
// If there is already a child for current word of given name
if( children.containsKey(name))
t = children.get(name);
else // Else create a child
{
System.out.println("Inserting node " + name + " prime is " + prime);
t = new TrieNode(name, prime);
children.put( name, t );
}
children = t.getChildren();
int j = names.length-1;
if(i==j){
t.setIsEnd(true);
System.out.println("WordEnd");
}
}
}
public void getHits() {
// String[] articleArr = article.toArray(new String[0]);
// Initialize reference to traverse through Trie
// TrieNode crawl = root;
// int level, prevMatch = 0;
Iterator<String> iter = article.iterator();
TrieNode currNode = root;
while (iter.hasNext()) {
String word = iter.next();
System.out.println("Iter: " + word);
// HashMap of current node's children
HashMap<String,TrieNode> child = currNode.getChildren();
// If hit in currNode's children
if (child.containsKey(word)) {
System.out.println("Node exists: " + word);
// Update currNode to be node that matched
currNode = child.get(word);
System.out.println(currNode.isEnd());
String next = "";
// If currNode is leaf and next node has no match in children, were done
if (iter.hasNext()) {next = iter.next();}
if (currNode.isEnd() && !child.containsKey(next)) {
System.out.println("Matched word: " + word);
System.out.println("Primary: " + currNode.getPrime());
currNode = root;
} else {
// Else next node is continuation
}
} else {
// Else ignore next word and reset
currNode = root;
}
}
}
private TrieNode root;
}
I think instead of using while and iter.next() you can use for loop as below
for (Map.Entry entry : article.entrySet()) {
String word = entry.getKey();
}
So you are not really moving to the next items of your hashmap.
If this is not your point, please clarify us.
Thanks,
Nghia
I opted to use a for-loop style instead of While loop for this, as well as tweaked some logic to get it working. For those interested, the new code is below, as well as an example of the "companies.dat" file (what is populated to the Trie). The stdin is any text excerpt which ends with a "." on new line.
Companies.dat:
Microsoft Corporation Microsoft Xbox
Apple Computer Apple Mac
Best Buy
Dell
TrieBuilder:
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.*;
public class BuildTrie {
// Class Methods
public static void main(String[] args) throws IOException {
Trie Companies = new Trie();
String filename = "companies.dat";
try {
BufferedReader reader = new BufferedReader(new FileReader(filename));
String line;
while ((line = reader.readLine()) != null) {
// Split line by tab character
String[] aliases = line.replaceAll("\\p{P}", "").split("\t");
// Loop over each "alias" of specific company
for (int n = 0; n < aliases.length; n++) {
String[] name = aliases[n].split(" ");
// Insert each alias into Trie with index 0 as primary
Companies.insert(name, aliases[0]);
}
}
reader.close();
} catch (Exception e) {
System.err.format("Exception occurred trying to read '%s'.", filename);
e.printStackTrace();
}
System.out.println("Article Input: ");
try (BufferedReader input = new BufferedReader(new InputStreamReader(System.in))) {
String line;
while ((line = input.readLine()) != null) {
if (".".equals(line)) break;
String[] items = line.trim().replaceAll("\\p{P}", "").split("\\s+");
for (int i = 0; i < items.length; i++) {
Companies.articleAdd(items[i]);
}
}
}
Companies.getHits();
}
}
// Trie Node, which stores a character and the children in a HashMap
class TrieNode {
// Data Fields
private String word;
HashMap<String,TrieNode> children;
boolean bIsEnd;
private String primary = "";
// Constructors
public TrieNode() {
children = new HashMap<>();
bIsEnd = false;
}
public TrieNode(String st, String prime) {
word = st;
children = new HashMap<>();
bIsEnd = false;
primary = prime;
}
// Trie Node Methods
public HashMap<String,TrieNode> getChildren() {
return children;
}
public String getValue() {
return word;
}
public void setIsEnd(boolean val) {
bIsEnd = val;
}
public boolean isEnd() {
return bIsEnd;
}
public String getPrime() {
return primary;
}
}
class Trie {
private ArrayList<String> article = new ArrayList<String>();
private HashMap<String,Integer> hits = new HashMap<String,Integer>();
// Constructor
public Trie() {
root = new TrieNode();
}
// Insert article text
public void articleAdd(String word) {
article.add(word);
}
// Method to insert a new company name to Trie
public void insert(String[] names, String prime) {
// Find length of the given name
int length = names.length;
HashMap<String,TrieNode> children = root.children;
// Traverse through all words of given name
for( int i=0; i<length; i++)
{
String name = names[i];
TrieNode t;
// If there is already a child for current word of given name
if( children.containsKey(name))
t = children.get(name);
else // Else create a child
{
t = new TrieNode(name, prime);
children.put( name, t );
}
children = t.getChildren();
int j = names.length-1;
if(i==j){
t.setIsEnd(true);
}
}
}
public void getHits() {
// Initialize reference to traverse through Trie
TrieNode currNode = root;
for (int i=0; i < article.size(); i++) {
String word = article.get(i);
System.out.println("Searching: " + word);
// HashMap of current node's children
HashMap<String, TrieNode> child = currNode.getChildren();
// If hit in currNode's children
if (child.containsKey(word)) {
System.out.println("Node exists: " + word);
// Update currNode to be node that matched
currNode = child.get(word);
child = currNode.getChildren();
System.out.println("isEnd?: " + currNode.isEnd());
String next = "";
if (i+1 < article.size()) {
next = article.get(i+1);
}
// If currNode is leaf and next node has no match in children, were done
if (currNode.isEnd() && !child.containsKey(next)) {
System.out.println("Primary of match: " + currNode.getPrime());
currNode = root;
}
} else {
// Else ignore next word and reset
System.out.println("No match.");
currNode = root;
}
}
}
private TrieNode root;
}
Related
I am working on a project and need to search in data of millions of customers. I want to implement radix(trie) search algorithm. I have read and implement radix for a simple string collections. But Here I have a collection of customers and want to search it by name or by mobile number.
Customer Class:
public class Customer {
String name;
String mobileNumer;
public Customer (String name, String phoneNumer) {
this.name = name;
this.mobileNumer = phoneNumer;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getPhoneNumer() {
return mobileNumer;
}
public void setPhoneNumer(String phoneNumer) {
this.mobileNumer = phoneNumer;
}
}
RadixNode Class:
import java.util.HashMap;
import java.util.Map;
class RadixNode {
private final Map<Character, RadixNode> child = new HashMap<>();
private final Map<Customer, RadixNode> mobileNum = new HashMap<>();
private boolean endOfWord;
Map<Character, RadixNode> getChild() {
return child;
}
Map<Customer, RadixNode> getChildPhoneDir() {
return mobileNum;
}
boolean isEndOfWord() {
return endOfWord;
}
void setEndOfWord(boolean endOfWord) {
this.endOfWord = endOfWord;
}
}
Radix Class:
class Radix {
private RadixNode root;
Radix() {
root = new RadixNode();
}
void insert(String word) {
RadixNode current = root;
for (int i = 0; i < word.length(); i++) {
current = current.getChild().computeIfAbsent(word.charAt(i), c -> new RadixNode());
}
current.setEndOfWord(true);
}
void insert(Customer word) {
RadixNode current = root;
System.out.println("==========================================");
System.out.println(word.mobileNumer.length());
for (int i = 0; i < word.mobileNumer.length(); i++) {
current = current.getChildPhoneDir().computeIfAbsent(word.mobileNumer.charAt(i), c -> new RadixNode());
System.out.println(current);
}
current.setEndOfWord(true);
}
boolean delete(String word) {
return delete(root, word, 0);
}
boolean containsNode(String word) {
RadixNode current = root;
for (int i = 0; i < word.length(); i++) {
char ch = word.charAt(i);
RadixNode node = current.getChild().get(ch);
if (node == null) {
return false;
}
current = node;
}
return current.isEndOfWord();
}
boolean isEmpty() {
return root == null;
}
private boolean delete(RadixNode current, String word, int index) {
if (index == word.length()) {
if (!current.isEndOfWord()) {
return false;
}
current.setEndOfWord(false);
return current.getChild().isEmpty();
}
char ch = word.charAt(index);
RadixNode node = current.getChild().get(ch);
if (node == null) {
return false;
}
boolean shouldDeleteCurrentNode = delete(node, word, index + 1) && !node.isEndOfWord();
if (shouldDeleteCurrentNode) {
current.getChild().remove(ch);
return current.getChild().isEmpty();
}
return false;
}
public void displayContactsUtil(RadixNode curNode, String prefix)
{
// Check if the string 'prefix' ends at this Node
// If yes then display the string found so far
if (curNode.isEndOfWord())
System.out.println(prefix);
// Find all the adjacent Nodes to the current
// Node and then call the function recursively
// This is similar to performing DFS on a graph
for (char i = 'a'; i <= 'z'; i++)
{
RadixNode nextNode = curNode.getChild().get(i);
if (nextNode != null)
{
displayContactsUtil(nextNode, prefix + i);
}
}
}
public boolean displayContacts(String str)
{
RadixNode prevNode = root;
// 'flag' denotes whether the string entered
// so far is present in the Contact List
String prefix = "";
int len = str.length();
// Display the contact List for string formed
// after entering every character
int i;
for (i = 0; i < len; i++)
{
// 'str' stores the string entered so far
prefix += str.charAt(i);
// Get the last character entered
char lastChar = prefix.charAt(i);
// Find the Node corresponding to the last
// character of 'str' which is pointed by
// prevNode of the Trie
RadixNode curNode = prevNode.getChild().get(lastChar);
// If nothing found, then break the loop as
// no more prefixes are going to be present.
if (curNode == null)
{
System.out.println("No Results Found for \"" + prefix + "\"");
i++;
break;
}
// If present in trie then display all
// the contacts with given prefix.
System.out.println("Suggestions based on \"" + prefix + "\" are");
displayContactsUtil(curNode, prefix);
// Change prevNode for next prefix
prevNode = curNode;
}
for ( ; i < len; i++)
{
prefix += str.charAt(i);
System.out.println("No Results Found for \"" + prefix + "\"");
}
return true;
}
public void displayContactsUtil(RadixNode curNode, String prefix, boolean isPhoneNumber)
{
// Check if the string 'prefix' ends at this Node
// If yes then display the string found so far
if (curNode.isEndOfWord())
System.out.println(prefix);
// Find all the adjacent Nodes to the current
// Node and then call the function recursively
// This is similar to performing DFS on a graph
for (char i = '0'; i <= '9'; i++)
{
RadixNode nextNode = curNode.getChildPhoneDir().get(i);
if (nextNode != null)
{
displayContactsUtil(nextNode, prefix + i);
}
}
}
public boolean displayContacts(String str, boolean isPhoneNumber)
{
RadixNode prevNode = root;
// 'flag' denotes whether the string entered
// so far is present in the Contact List
String prefix = "";
int len = str.length();
// Display the contact List for string formed
// after entering every character
int i;
for (i = 0; i < len; i++)
{
// 'str' stores the string entered so far
prefix += str.charAt(i);
// Get the last character entered
char lastChar = prefix.charAt(i);
// Find the Node corresponding to the last
// character of 'str' which is pointed by
// prevNode of the Trie
RadixNode curNode = prevNode.getChildPhoneDir().get(lastChar);
// If nothing found, then break the loop as
// no more prefixes are going to be present.
if (curNode == null)
{
System.out.println("No Results Found for \"" + prefix + "\"");
i++;
break;
}
// If present in trie then display all
// the contacts with given prefix.
System.out.println("Suggestions based on \"" + prefix + "\" are");
displayContactsUtil(curNode, prefix, isPhoneNumber);
// Change prevNode for next prefix
prevNode = curNode;
}
for ( ; i < len; i++)
{
prefix += str.charAt(i);
System.out.println("No Results Found for \"" + prefix + "\"");
}
return true;
}
}
I have tried to search in a collection but got stuck. Any help / suggestion would be appreciated.
I propose you 2 ways of doing it.
First way: with a single trie.
It is possible to store all you need in a single trie. Your customer class is fine, and here is a possible RadixNode implementation.
I consider that there cannot be two customers with the same name, or with the same phone number. If it is not the case (possibility to have people with same name and different phone nb for instance) tell me in a comment I'll edit.
The thing that is important to understand, is that if you want to have two different ways of finding a customer, and you use a single trie, each customer will appear twice in your trie. Once at the end of the path corresponding to its name, and once after the end of the path corresponding to its phone number.
import java.util.HashMap;
import java.util.Map;
class RadixNode {
private Map<Character, RadixNode> children;
private Customer customer;
public RadixNode(){
this.children = new Map<Character, RadixNode>();
this.Customer = NULL;
}
Map<Character, RadixNode> getChildren() {
return children;
}
boolean hasCustomer() {
return this.customer != NULL;
}
Customer getCustomer() {
return customer;
}
void setCustomer(Customer customer) {
this.customer = customer;
}
}
As you can see, there is only one map storing the node's children. That is because we can see a phone number as a string of digits, so this trie will store all the customers ... twice. Once per name, once per phone number.
Now let's see an insert function. Your trie will need a root,n let's call it root.
public void insert(RadixNode root, Customer customer){
insert_with_name(root, customer, 0);
insert_with_phone_nb(root, customer, 0);
}
public void insert_with_name(RadixNode node, Customer customer, int idx){
if (idx == customer.getName().length()){
node.setCustomer(customer);
} else {
Character current_char = customer.getName().chatAt(idx);
if (! node.getChlidren().containsKey(current_char){
RadixNode new_child = new RadixNode();
node.getChildren().put(current_char, new_child);
}
insert_with_name(node.getChildren().get(current_char), customer, idx+1);
}
}
The insert_with_phone_nb() method is similar. This will work as long as people has unique names, unique phone numbers, and that someone's name cannot be someone's phone number.
As you can see, the method is recursive. I advice you to build your trie structure (and generally, everything based on tree structures) recursively, as it makes for simpler, and generallay cleaner code.
The search function is almost a copy-paste of the insert function:
public void search_by_name(RadixNode node, String name, int idx){
// returns NULL if there is no user going by that name
if (idx == name.length()){
return node.getCustomer();
} else {
Character current_char = name.chatAt(idx);
if (! node.getChlidren().containsKey(current_char){
return NULL;
} else {
return search_by_name(node.getChildren().get(current_char), name, idx+1);
}
}
}
Second way: with 2 tries
The principle is the same, all you have to do is reuse the code above, but keep two distinct root nodes, each of them will build a trie (one for names, one for phone numbers).
The only difference will be the insert function (as it will call insert_with_name and insert_with_phone_nb with 2 different roots), and the search function which will have to search in the right trie as well.
public void insert(RadixNode root_name_trie, RadixNode root_phone_trie, Customer customer){
insert_with_name(root_name_trie, customer, 0);
insert_with_phone_nb(root_phone_trie, customer, 0);
}
Edit: After comment precising there might be customers with the same name, here is an alternative implementation, to allow a RadixNode to contain references toward several Customer.
Replace the Customer customer attribute in RadixNode by, for example, a Vector<Customer>. The methods will have to be modified accordingly of course, and a search by name will then return to you a vector of customers (possibly empty), since this search can then lead to several results.
In your case, I'd go for a single trie, containing vectors of customers. So you can have both a search by name and phone (cast the number as a String), and a single data structure to maintain.
Fully develop the classes for the Linked Implementation of the ADT Bag (i.e., LinkedBag, Node).
Test your classes well (call all methods) before you proceed.
Spell checker
Write a spell checker that:
a. Reads in words from an external file.
b. Tests each word against a dictionary (an instance of your LinkedBag class) of correctly spelled words (if the word is found in the dictionary then it’s spelled correctly).
I have already developed the classes for the linked implementation of the ADT Bag and also made the spellchecker.java
I need help to understand how to use and implement linkedbag class and my words.txt file for this problem.
Dictionary.java
package Bags;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
public class Dictionary {
private int M = 1319; // prime number
final private Bucket[] array;
public Dictionary() {
this.M = M;
array = new Bucket[M];
for (int i = 0; i < M; i++) {
array[i] = new Bucket();
}
}
private int hash(String key) {
return (key.hashCode() & 0x7fffffff) % M;
}
// call hash() to decide which bucket to put it in, do it.
public void add(String key) {
array[hash(key)].put(key);
}
// call hash() to find what bucket it's in, get it from that bucket.
public boolean contains(String input) {
input = input.toLowerCase();
return array[hash(input)].get(input);
}
public void build(String filePath) {
try {
BufferedReader reader = new BufferedReader(new FileReader(filePath));
String line;
while ((line = reader.readLine()) != null) {
add(line);
}
} catch (IOException ioe) {
ioe.printStackTrace();
}
}
//
public String[] getRandomEntries(int num) {
String[] toRet = new String[num];
for (int i = 0; i < num; i++) {
// pick a random bucket, go out a random number
Node n = array[(int) Math.random() * M].first;
int rand = (int) Math.random() * (int) Math.sqrt(num);
for (int j = 0; j < rand && n.next != null; j++)
n = n.next;
toRet[i] = n.word;
}
return toRet;
}
class Bucket {
private Node first;
public boolean get(String in) { // return key true if key exists
Node next = first;
while (next != null) {
if (next.word.equals(in)) {
return true;
}
next = next.next;
}
return false;
}
public void put(String key) {
for (Node curr = first; curr != null; curr = curr.next) {
if (key.equals(curr.word)) {
return; // search hit: return
}
}
first = new Node(key, first); // search miss: add new node
}
class Node {
String word;
Node next;
public Node(String key, Node next) {
this.word = key;
this.next = next;
}
}
}
My problem is that I am calling Hash() to decide which bucket to put it in and do the whole process. Instead of this I want to use an Instance of my linkedbag class to look through my word.txt file and return output if the input matches or not..
I want to create a program that use as a word counter. I intended to use a tree structure for store words. I also want to count the frequencies of word. I already built the binary tree and the method to get user input. How should i do this? Help please.
This is my code.
public class Node {
int a;
String b;
Node leftChild;
Node rightChild;
Node(String b){
this.a = 0;
this.b = b;
this.leftChild = null;
this.rightChild = null;
}
public void display(){
System.out.println(" "+ b + " ");
}
public class Tree {
public Node root;
public Tree(){
root = null;
}
public void insert(String word){
Node newNode = new Node(word);
newNode.b = word;
if(root==null){
root = newNode;
}else{
Node current = root;
Node parent;
while(true){
parent = current;
if(word.compareToIgnoreCase(current.b)<0){
current = current.leftChild;
if(current == null){
parent.leftChild = newNode;
return;
}
}else{
current = current.rightChild;
if(current == null){
parent.rightChild = newNode;
return;
}
}
}
}
public Node search(String word){
Node current = root;
while(!current.b.equalsIgnoreCase(word)){
if(word.compareToIgnoreCase(current.b)<0){
current= current.leftChild;
}else{
current = current.rightChild;
}
if (current == null)
return null;
}
return current;
}
public void inOrder(Node localRoot){
if(localRoot != null){
inOrder(localRoot.leftChild);
localRoot.display();
inOrder(localRoot.rightChild);
}
}
This is main method. (Though it is not even near complete)
import java.util.Scanner;
public class Main {
public static void main(String[] args) {
Tree newTree = new Tree();
Scanner inputString = new Scanner(System.in);
System.out.println("Type the paragraph and press \"Enter\" :");
String input = inputString.nextLine();
newTree.inOrder(newTree.root);
}
It would be easier to use a map or something like that for storing the number of occurrences of the words.
But if you want to use the tree structure for some reason and I understand your question correctly, you should modify your insert method:
//...
}else{
current = current.rightChild;
if(current == null){
parent.rightChild = newNode;
return;
} /*begin modification*/ else if (current.b.equalsIgnoreCase(word)) { // check if word already is stored in tree
++current.a; // increase counter
} /*end modification*/
}
Now you can use your insert method and count the words add the same time. But note that your counter begins with 0 at the moment because of this.a = 0; in the Node constructor.
In your main method you can then split the paragraph into words (String[] words = input.split(" ");) and add every word in the array with newTree.insert(words[i]); in a for loop.
If this is not what you wanted to know you would have to specify your question more clearly.
IMHO the problem with your tree is that it exists - you don't need one!
Counting word frequencies has a canonical, and quite terse, solution:
public static Map<String, Integer> wordCounts(String input) {
Map<String, Integer> map = new HashMap<String, Integer>();
for (String word : input.toLowerCase().split("(?s)\\s+"))
map.put(word, map.containsKey(word) ? map.get(word) + 1 : 1);
return map;
}
I am creating a binary search tree using Nodes with MovieInfo objects as keys. MovieInfo objects are objects with three fields: ID, fullName, and shortName. The binary search tree will store information on an input text file containing every movie listed on IMDB. The insert would be based on an ID randomly associated with each movie. The search features would have a user input a String and pull up the other data of the object (shortName, ID, etc).
Now -- the errors I am having are with the findExact method.
First, no matter what my input is -- I get the message "Current Node is null" which means my starting currentNode is ALWAYS going null. The other issue is, I am not sure how to make sure my root node (the first currentNode searched for in the tree) is initialized properly. I have a feeling this might have something to do with my problem.
The other problem might lie in how I am inserting the nodes in the first place.
And for reference, the way I am testing this code/running in the text input file is IndexTester.java.
UPDATE: Okay. Everything works now. The only issue I am having now is that my findExact method seems to change the ID field of the MovieInfo class. So my search does not end up working. For example, if I input:
1568482 2 White Kids with Problems 2 White Kids with Problems
1568487 Disorient Disorient
1568488 DIY Playgrounds DIY Playgrounds
and search "disorient," the return is "1568488 Disorient Disorient" (with the ID for the "2 White Kids with Problems" object). Additionally, since the ID is taken...DIY Playgrounds can't be searched for succesfully. The insertion method works, but the findExact method is giving me issues. Any ideas as to what might be causing this change in the ID?
The MovieInfo class (separate .java file) -- can't be edited
public class MovieInfo {
public String shortName;
public String fullName;
static int ID;
public String key;
public MovieInfo(int id, String s, String f) {
ID = id;
shortName = s;
fullName = f;
}
}
BSTIndex.java -- the class in which I create the internal BST
import java.util.*;
import java.io.*;
public class BSTIndex extends MovieInfo {
public Node root;
public class Node{
public MovieInfo movie;
public Node left, right;
public Node(MovieInfo movie)
{
this.movie = movie;
//this.val = val;
//this.N = N;
}
}
public BSTIndex()
/**
* constructor to initialize the internal binary search tree.
* The data element should be an object of the type MovieInfo, described above.
*/
{
super(0, "", "");
}
public MovieInfo findExact(String key) {
return findExact(root, key);
}
private MovieInfo findExact(Node x, String key) {
if (x == null) return null;
int cmp = key.compareToIgnoreCase(x.movie.fullName);
if (cmp < 0) return findExact(x.left, key);
else if (cmp > 0) return findExact(x.right, key);
else return x.movie;
}
public void insert(MovieInfo data)
{
if (data == null) {return; }
root = insert(root, data);
}
public Node insert(Node x, MovieInfo data)
{
if (x == null) return new Node(data);
int cmp = data.ID - x.movie.ID;
if (cmp > 0 ) x.right = insert(x.right, data);
else if (cmp < 0) x.left = insert(x.left, data);
else if (cmp == 0) x.movie = data;
return x;
}
}
IndexTester.java
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.util.Scanner;
public class IndexTester {
// Test program
public static void main( String [ ] args ) throws FileNotFoundException
{
BSTIndex t = new BSTIndex( );
Scanner scan = new Scanner(new FileInputStream(args[0]));
long start = System.currentTimeMillis();
int i=0;
while(scan.hasNext()){
String line = scan.nextLine();
String [] fields = line.split("\t");
int id = Integer.parseInt(fields[0].trim());
String shortName = fields[1].trim();
String fullName = fields[2].trim();
MovieInfo info = new MovieInfo(id, shortName, fullName);
t.insert(info);
i++;
if(i % 10000 == 0){
System.out.println("Inserted " + i + " records.");
}
}
long end = System.currentTimeMillis();
System.out.println("Index building complete. Inserted " + i +
" records.Elapsed time = " + (end - start )/1000 + " seconds.");
Scanner input = new Scanner(System.in);
System.out.println("Enter search string, end in a '*' for
prefix search. q to quit");
while(input.hasNext()){
String search = input.nextLine().trim();
if(search.equals("q")) break;
if(search.indexOf('*')>0){
//call prefix search.
MovieInfo item = t.findPrefix(search);
if(item==null) System.out.println("Not Found");
else System.out.println(item.ID + " " + item.shortName + "
" + item.fullName);
}
else{
//call exact search, modify to return MovieInfo
MovieInfo item = t.findExact(search);
if(item==null) System.out.println("Not Found");
else System.out.println(item.ID + " " + item.shortName + "
" + item.fullName);
}
}
}
}
public class BSTIndex should not extend MovieInfo, preferably MovieInfo should extend Node.
Ok, so MovieInfo cannot be modified so I would populate the MovieInfo class with the data and set it as the nodevalue of an extended Node class.
public class BSTNode extends Node {
private BSTNode left,right;
private MovieInfo val;
public void setVal(MovieInfo val){
this.val=val;
}
public void setLeft(MovieInfo m){
left=new BSTNode(m);
}
public void setRight(MovieInfo m){
right=new BSTNode(m);
}
//override some of the Node methods
}
I'm very close to being done, but can't quite figure out how to tie everything together. I have the separate methods responsible for their particular task, but i feel like my cohesion is really bad. not real sure how they tie together and what needs to be called in main. The goal here is to read a text file from the command line and list the words in the story lexicographically.
% java Ten < story.txt
Word Occurs
==== ======
a 21
animal 3
.
.
.
zebra 1
%
Here's my code thus far:
import java.util.Scanner;
public class Ten2
{
public static void main(String [] arg)
{
Scanner input = new Scanner(System.in);
String word;
List sortedList = new List();
word = nextWord(input);
while (word!=null) {
sortedList.insert(word);
word = nextWord(input);
}
sortedList.printWords();
}
private static String nextWord(Scanner input)
{
// return null if there are no more words
if (input.hasNext() == false )
return null;
// take next element, convert to lowercase, store in s
else {
String s = input.next().toLowerCase() ;
// empty string
String token = "";
// loop through s and concatonate chars onto token
for (int i =0; i < s.length(); i++) {
if (Character.isLetter(s.charAt(i)) == true)
token = token + s.charAt(i);
else if (s.charAt(i) == '\'' )
token = token + s.charAt(i);
else if (s.charAt(i) == '-')
token = token + s.charAt(i);
}
return token;
}
}
}
class List
{
/*
* Internally, the list of strings is represented by a linked chain
* of nodes belonging to the class ListNode. The strings are stored
* in lexicographical order.
*/
private static class ListNode
{
// instance variables for ListNode objects
public String word;
public ListNode next;
public int count;
// Listnode constructor
public ListNode(String w, ListNode nxt)
{
word = w; // token from nextWord()?
next = nxt; // link to next ListNode
count = 1; // number of word occurences
}
}
// instance variables for List object
private ListNode first;
private int numWords;
// constructor postcondition: creates new Listnode storing object
public List()
{
first = null; // pointer to ListNode?
numWords = 0; // counter for nodes in list
}
// Insert a specified word into the list, keeping the list
// in lexicographical order.
public void insert(String word)
{
// check if first is null
if (first == null) {
ListNode newNode;
newNode = addNode(word, null);
first = newNode;
}
// else if (first is not null) check if word matches first word in List
else if (word.equals(first.word)) {
// increase count
first.count++;
}
// else (first is not null && doesn't match first word)
else {
ListNode newNode;
ListNode current;
current = first;
ListNode previous;
previous = null;
int cmp = word.compareTo(current.word);
/*
* Fist two cases of empty list and word already existing
* handled in above if and else statements, now by using compareTo()
* method between the words, the insertion postion can be determined.
* Links between ListNode variables current and previous need to be
* modified in order to maintain the list
*/
// loop as long as value comparing to is positive
// when compareTo() returns positive this means the "word" parameter is greater than the word in the list
while ((cmp >0) && (current.next != null)) {
previous = current;
current = current.next;
cmp = word.compareTo(current.word);
}
// insert after current at end of list
if ((cmp >0 && current.next == null)) {
newNode = addNode(word, null);
current.next = newNode;
}
// increments count when word already exists
else if (cmp==0) {
current.count++;
}
// else (cmp < 0) we insert BEFORE current
else {
newNode = addNode(word, current);
// first node in list comes after new word
if (previous == null) {
first = newNode;
}
else {
// inserting new word in middle of list
previous.next = newNode;
}
}
}
}
// method to add new ListNode and increase counter
private ListNode addNode(String word, ListNode next)
{
ListNode newNode = new ListNode(word, next);
numWords++;
return newNode;
}
// Returns a string array that contains all the words in the list.
public String[] getWords()
{
String[] Words = new String[numWords];
ListNode current = first;
int i =0;
while (current != null) {
Words[i] = current.word;
current = current.next;
i++;
}
return Words;
}
// Returns an int array that contains the number of times
// each word occurs in the list.
public int[] getNumbers()
{
int[] Numbers = new int[numWords];
ListNode current = first;
int i =0;
while (current != null) {
Numbers[i] = current.count;
current = current.next;
i++;
}
return Numbers;
}
// Outputs the string array and int array containing all the
// words in the list and the number of times each occurs.
public void printWords()
{
int[] Numbers = getNumbers();
String[] Words = getWords();
System.out.println("Word \t \t Occurs");
System.out.println("==== \t \t ======");
for (int i =0; i < numWords; i++) {
System.out.println(Words[i] + " \t " + Numbers[i]);
}
}
}
Well, I would start by defining what you want your program to do, which you've already done:
The goal here is to read a text file from the command line and list the words in the story lexicographically.
You're main function does this almost. Basically, what you need is a loop to tie it together:
public static void main(String [] arg)
{
// print out your initial information first (i.e. your column headers)
// ...
List sortedList = new List();
String word = nextWord();
// now, the question is... what is the end condition of the loop?
// probably that there aren't any more words, so word in this case
// will be null
while (word != null)
{
sortedList.insert(word);
word = nextWord();
}
// now, your list takes care of the sorting, so let's just print the list
sortedList.printWords();
}
I think that's all there is to it. Normally, I don't like to post solutions to homework questions, but in this case, since you already had all of the code and you just needed a little nudge to drive you in the right direction, I think it's fine.
There are a few things I noticed that are incorrect with your
Your list constructor has a 'void' return type - there should be no return type on constructors:
public List() //make an empty list
{
first = null;
numWords = 0;
}
The 'else' statement in this method is unneeded:
public static String nextWord()
{
if ( keyboard.hasNext() == false )
return null;
else {
String start = keyboard.next().toLowerCase() ;
String organized = "";
for (int i =0; i < start.length(); i++) {
if (Character.isLetter(start.charAt(i)) == true)
organized = organized + start.charAt(i);
else if (start.charAt(i) == '\'' )
organized = organized + start.charAt(i);
else if (start.charAt(i) == '-')
organized = organized + start.charAt(i);
}
return organized;
}
}
So, this should be:
public static String nextWord()
{
if ( keyboard.hasNext() == false )
return null;
String start = keyboard.next().toLowerCase() ;
String organized = "";
for (int i =0; i < start.length(); i++) {
if (Character.isLetter(start.charAt(i)) == true)
organized = organized + start.charAt(i);
else if (start.charAt(i) == '\'' )
organized = organized + start.charAt(i);
else if (start.charAt(i) == '-')
organized = organized + start.charAt(i);
}
return organized;
}
If you want to use a BufferedReader, it's pretty easy. Just set it up in your main method:
if (arg.length > 0)
{
// open our file and read everything into a string buffer
BufferedReader bRead = null;
try {
bRead = new BufferedReader(new FileReader(arg[0]));
} catch(FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
System.exit(0);
}
setupScanner(bRead);
}
Then, create a new method that sets up the scanner object:
public static void setupScanner(BufferedReader rdr)
{
keyboard = new Scanner(rdr);
}
And then just pass it in on the command line (i.e. java ten2 [filename])
import java.util.Scanner;
public class Ten2
{
public static void main(String [] arg)
{
Scanner input = new Scanner(System.in);
String word;
List sortedList = new List();
word = nextWord(input);
while (word!=null) {
sortedList.insert(word);
word = nextWord(input);
}
sortedList.printWords();
}
private static String nextWord(Scanner input)
{
// return null if there are no more words
if (input.hasNext() == false )
return null;
// take next element, convert to lowercase, store in s
else {
String s = input.next().toLowerCase() ;
// empty string
String token = "";
// loop through s and concatonate chars onto token
for (int i =0; i < s.length(); i++) {
if (Character.isLetter(s.charAt(i)) == true)
token = token + s.charAt(i);
else if (s.charAt(i) == '\'' )
token = token + s.charAt(i);
else if (s.charAt(i) == '-')
token = token + s.charAt(i);
}
return token;
}
}
}
class List
{
/*
* Internally, the list of strings is represented by a linked chain
* of nodes belonging to the class ListNode. The strings are stored
* in lexicographical order.
*/
private static class ListNode
{
// instance variables for ListNode objects
public String word;
public ListNode next;
public int count;
// Listnode constructor
public ListNode(String w, ListNode nxt)
{
word = w; // token from nextWord()?
next = nxt; // link to next ListNode
count = 1; // number of word occurences
}
}
// instance variables for List object
private ListNode first;
private int numWords;
// constructor postcondition: creates new Listnode storing object
public List()
{
first = null; // pointer to ListNode?
numWords = 0; // counter for nodes in list
}
// Insert a specified word into the list, keeping the list
// in lexicographical order.
public void insert(String word)
{
// check if first is null
if (first == null) {
ListNode newNode;
newNode = addNode(word, null);
first = newNode;
}
// else if (first is not null) check if word matches first word in List
else if (word.equals(first.word)) {
// increase count
first.count++;
}
// else (first is not null && doesn't match first word)
else {
ListNode newNode;
ListNode current;
current = first;
ListNode previous;
previous = null;
int cmp = word.compareTo(current.word);
/*
* Fist two cases of empty list and word already existing
* handled in above if and else statements, now by using compareTo()
* method between the words, the insertion postion can be determined.
* Links between ListNode variables current and previous need to be
* modified in order to maintain the list
*/
// loop as long as value comparing to is positive
// when compareTo() returns positive this means the "word" parameter is greater than the word in the list
while ((cmp >0) && (current.next != null)) {
previous = current;
current = current.next;
cmp = word.compareTo(current.word);
}
// insert after current at end of list
if ((cmp >0 && current.next == null)) {
newNode = addNode(word, null);
current.next = newNode;
}
// increments count when word already exists
else if (cmp==0) {
current.count++;
}
// else (cmp < 0) we insert BEFORE current
else {
newNode = addNode(word, current);
// first node in list comes after new word
if (previous == null) {
first = newNode;
}
else {
// inserting new word in middle of list
previous.next = newNode;
}
}
}
}
// method to add new ListNode and increase counter
private ListNode addNode(String word, ListNode next)
{
ListNode newNode = new ListNode(word, next);
numWords++;
return newNode;
}
// Returns a string array that contains all the words in the list.
public String[] getWords()
{
String[] Words = new String[numWords];
ListNode current = first;
int i =0;
while (current != null) {
Words[i] = current.word;
current = current.next;
i++;
}
return Words;
}
// Returns an int array that contains the number of times
// each word occurs in the list.
public int[] getNumbers()
{
int[] Numbers = new int[numWords];
ListNode current = first;
int i =0;
while (current != null) {
Numbers[i] = current.count;
current = current.next;
i++;
}
return Numbers;
}
// Outputs the string array and int array containing all the
// words in the list and the number of times each occurs.
public void printWords()
{
int[] Numbers = getNumbers();
String[] Words = getWords();
System.out.println("Word \t \t Occurs");
System.out.println("==== \t \t ======");
for (int i =0; i < numWords; i++) {
System.out.println(Words[i] + " \t " + Numbers[i]);
}
}
}