Radix(Trie) Tree implementation for Cutomer search in Java

Radix(Trie) Tree implementation for Cutomer search in Java - java

I am working on a project and need to search in data of millions of customers. I want to implement radix(trie) search algorithm. I have read and implement radix for a simple string collections. But Here I have a collection of customers and want to search it by name or by mobile number.
Customer Class:
public class Customer {
String name;
String mobileNumer;
public Customer (String name, String phoneNumer) {
this.name = name;
this.mobileNumer = phoneNumer;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getPhoneNumer() {
return mobileNumer;
}
public void setPhoneNumer(String phoneNumer) {
this.mobileNumer = phoneNumer;
}
}
RadixNode Class:
import java.util.HashMap;
import java.util.Map;
class RadixNode {
private final Map<Character, RadixNode> child = new HashMap<>();
private final Map<Customer, RadixNode> mobileNum = new HashMap<>();
private boolean endOfWord;
Map<Character, RadixNode> getChild() {
return child;
}
Map<Customer, RadixNode> getChildPhoneDir() {
return mobileNum;
}
boolean isEndOfWord() {
return endOfWord;
}
void setEndOfWord(boolean endOfWord) {
this.endOfWord = endOfWord;
}
}
Radix Class:
class Radix {
private RadixNode root;
Radix() {
root = new RadixNode();
}
void insert(String word) {
RadixNode current = root;
for (int i = 0; i < word.length(); i++) {
current = current.getChild().computeIfAbsent(word.charAt(i), c -> new RadixNode());
}
current.setEndOfWord(true);
}
void insert(Customer word) {
RadixNode current = root;
System.out.println("==========================================");
System.out.println(word.mobileNumer.length());
for (int i = 0; i < word.mobileNumer.length(); i++) {
current = current.getChildPhoneDir().computeIfAbsent(word.mobileNumer.charAt(i), c -> new RadixNode());
System.out.println(current);
}
current.setEndOfWord(true);
}
boolean delete(String word) {
return delete(root, word, 0);
}
boolean containsNode(String word) {
RadixNode current = root;
for (int i = 0; i < word.length(); i++) {
char ch = word.charAt(i);
RadixNode node = current.getChild().get(ch);
if (node == null) {
return false;
}
current = node;
}
return current.isEndOfWord();
}
boolean isEmpty() {
return root == null;
}
private boolean delete(RadixNode current, String word, int index) {
if (index == word.length()) {
if (!current.isEndOfWord()) {
return false;
}
current.setEndOfWord(false);
return current.getChild().isEmpty();
}
char ch = word.charAt(index);
RadixNode node = current.getChild().get(ch);
if (node == null) {
return false;
}
boolean shouldDeleteCurrentNode = delete(node, word, index + 1) && !node.isEndOfWord();
if (shouldDeleteCurrentNode) {
current.getChild().remove(ch);
return current.getChild().isEmpty();
}
return false;
}
public void displayContactsUtil(RadixNode curNode, String prefix)
{
// Check if the string 'prefix' ends at this Node
// If yes then display the string found so far
if (curNode.isEndOfWord())
System.out.println(prefix);
// Find all the adjacent Nodes to the current
// Node and then call the function recursively
// This is similar to performing DFS on a graph
for (char i = 'a'; i <= 'z'; i++)
{
RadixNode nextNode = curNode.getChild().get(i);
if (nextNode != null)
{
displayContactsUtil(nextNode, prefix + i);
}
}
}
public boolean displayContacts(String str)
{
RadixNode prevNode = root;
// 'flag' denotes whether the string entered
// so far is present in the Contact List
String prefix = "";
int len = str.length();
// Display the contact List for string formed
// after entering every character
int i;
for (i = 0; i < len; i++)
{
// 'str' stores the string entered so far
prefix += str.charAt(i);
// Get the last character entered
char lastChar = prefix.charAt(i);
// Find the Node corresponding to the last
// character of 'str' which is pointed by
// prevNode of the Trie
RadixNode curNode = prevNode.getChild().get(lastChar);
// If nothing found, then break the loop as
// no more prefixes are going to be present.
if (curNode == null)
{
System.out.println("No Results Found for \"" + prefix + "\"");
i++;
break;
}
// If present in trie then display all
// the contacts with given prefix.
System.out.println("Suggestions based on \"" + prefix + "\" are");
displayContactsUtil(curNode, prefix);
// Change prevNode for next prefix
prevNode = curNode;
}
for ( ; i < len; i++)
{
prefix += str.charAt(i);
System.out.println("No Results Found for \"" + prefix + "\"");
}
return true;
}
public void displayContactsUtil(RadixNode curNode, String prefix, boolean isPhoneNumber)
{
// Check if the string 'prefix' ends at this Node
// If yes then display the string found so far
if (curNode.isEndOfWord())
System.out.println(prefix);
// Find all the adjacent Nodes to the current
// Node and then call the function recursively
// This is similar to performing DFS on a graph
for (char i = '0'; i <= '9'; i++)
{
RadixNode nextNode = curNode.getChildPhoneDir().get(i);
if (nextNode != null)
{
displayContactsUtil(nextNode, prefix + i);
}
}
}
public boolean displayContacts(String str, boolean isPhoneNumber)
{
RadixNode prevNode = root;
// 'flag' denotes whether the string entered
// so far is present in the Contact List
String prefix = "";
int len = str.length();
// Display the contact List for string formed
// after entering every character
int i;
for (i = 0; i < len; i++)
{
// 'str' stores the string entered so far
prefix += str.charAt(i);
// Get the last character entered
char lastChar = prefix.charAt(i);
// Find the Node corresponding to the last
// character of 'str' which is pointed by
// prevNode of the Trie
RadixNode curNode = prevNode.getChildPhoneDir().get(lastChar);
// If nothing found, then break the loop as
// no more prefixes are going to be present.
if (curNode == null)
{
System.out.println("No Results Found for \"" + prefix + "\"");
i++;
break;
}
// If present in trie then display all
// the contacts with given prefix.
System.out.println("Suggestions based on \"" + prefix + "\" are");
displayContactsUtil(curNode, prefix, isPhoneNumber);
// Change prevNode for next prefix
prevNode = curNode;
}
for ( ; i < len; i++)
{
prefix += str.charAt(i);
System.out.println("No Results Found for \"" + prefix + "\"");
}
return true;
}
}
I have tried to search in a collection but got stuck. Any help / suggestion would be appreciated.

I propose you 2 ways of doing it.
First way: with a single trie.
It is possible to store all you need in a single trie. Your customer class is fine, and here is a possible RadixNode implementation.
I consider that there cannot be two customers with the same name, or with the same phone number. If it is not the case (possibility to have people with same name and different phone nb for instance) tell me in a comment I'll edit.
The thing that is important to understand, is that if you want to have two different ways of finding a customer, and you use a single trie, each customer will appear twice in your trie. Once at the end of the path corresponding to its name, and once after the end of the path corresponding to its phone number.
import java.util.HashMap;
import java.util.Map;
class RadixNode {
private Map<Character, RadixNode> children;
private Customer customer;
public RadixNode(){
this.children = new Map<Character, RadixNode>();
this.Customer = NULL;
}
Map<Character, RadixNode> getChildren() {
return children;
}
boolean hasCustomer() {
return this.customer != NULL;
}
Customer getCustomer() {
return customer;
}
void setCustomer(Customer customer) {
this.customer = customer;
}
}
As you can see, there is only one map storing the node's children. That is because we can see a phone number as a string of digits, so this trie will store all the customers ... twice. Once per name, once per phone number.
Now let's see an insert function. Your trie will need a root,n let's call it root.
public void insert(RadixNode root, Customer customer){
insert_with_name(root, customer, 0);
insert_with_phone_nb(root, customer, 0);
}
public void insert_with_name(RadixNode node, Customer customer, int idx){
if (idx == customer.getName().length()){
node.setCustomer(customer);
} else {
Character current_char = customer.getName().chatAt(idx);
if (! node.getChlidren().containsKey(current_char){
RadixNode new_child = new RadixNode();
node.getChildren().put(current_char, new_child);
}
insert_with_name(node.getChildren().get(current_char), customer, idx+1);
}
}
The insert_with_phone_nb() method is similar. This will work as long as people has unique names, unique phone numbers, and that someone's name cannot be someone's phone number.
As you can see, the method is recursive. I advice you to build your trie structure (and generally, everything based on tree structures) recursively, as it makes for simpler, and generallay cleaner code.
The search function is almost a copy-paste of the insert function:
public void search_by_name(RadixNode node, String name, int idx){
// returns NULL if there is no user going by that name
if (idx == name.length()){
return node.getCustomer();
} else {
Character current_char = name.chatAt(idx);
if (! node.getChlidren().containsKey(current_char){
return NULL;
} else {
return search_by_name(node.getChildren().get(current_char), name, idx+1);
}
}
}
Second way: with 2 tries
The principle is the same, all you have to do is reuse the code above, but keep two distinct root nodes, each of them will build a trie (one for names, one for phone numbers).
The only difference will be the insert function (as it will call insert_with_name and insert_with_phone_nb with 2 different roots), and the search function which will have to search in the right trie as well.
public void insert(RadixNode root_name_trie, RadixNode root_phone_trie, Customer customer){
insert_with_name(root_name_trie, customer, 0);
insert_with_phone_nb(root_phone_trie, customer, 0);
}
Edit: After comment precising there might be customers with the same name, here is an alternative implementation, to allow a RadixNode to contain references toward several Customer.
Replace the Customer customer attribute in RadixNode by, for example, a Vector<Customer>. The methods will have to be modified accordingly of course, and a search by name will then return to you a vector of customers (possibly empty), since this search can then lead to several results.
In your case, I'd go for a single trie, containing vectors of customers. So you can have both a search by name and phone (cast the number as a String), and a single data structure to maintain.

Related

Java Trie Matching using Iterator

I have an assignment which involves creating a Trie of company names (read from a file) and then reading a news article input and counting the number of times a company name from the Trie occurs in the article.
I have coded a pretty standard Trie structure, however for the assignment it made more sense to have the TrieNodes hold the full word rather than just each character.
To make things more complicated, each company name from the file has one "primary name" and can have multiple "secondary names". For example: Microsoft Corporation, Microsoft, Xbox - where the first name is always the primary.
The assignment requires that I count all matches in the article for any of the company names, but only return the company's primary name when printing the results. Because of this, my TrieNode has the String primeName datafield, along with the standard isEnd bool. However, in my case, isEnd represents whether or not the specified node and its parent(s) form a full company name.
For example, with the article input "Microsoft Corporation just released a new Xbox console." I would need to return something along the lines of "Microsoft:2" because both Microsoft Corporation and Xbox share the same primary company name which is Microsoft.
I am using an iterator in the getHits() method but when I do find a hit, I need to look at the next word in the array to make sure it is not a continuation before I decide whether to stop or continue. The problem is that calling iter.next() doesn't just "peek" the next value but it moves forward, essentially causing me to skip words.
For example, if you look at the below code and my example, after "Best" gets a hit, it should see that "Buy" is a child and the next time it loops get a match on "Buy", but since I already call iter.next() to look at "Buy" within the While loop, the next iteration entirely skips "Buy". Is there some way I can simply peek at the next iter value within the While loop without actually moving to it? Also, any improvements to this code are greatly appreciated! I am sure there are many places where I sloppily implemented something.
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.*;
public class BuildTrie {
// Class Methods
public static void main(String[] args) throws IOException {
Trie Companies = new Trie();
String filename = "companies.dat";
try {
BufferedReader reader = new BufferedReader(new FileReader(filename));
String line;
while ((line = reader.readLine()) != null) {
// Split line by tab character
String[] aliases = line.replaceAll("\\p{P}", "").split("\t");
// Loop over each "alias" of specific company
for (int n = 0; n < aliases.length; n++) {
String[] name = aliases[n].split(" ");
// Insert each alias into Trie with index 0 as primary
Companies.insert(name, aliases[0]);
}
}
reader.close();
} catch (Exception e) {
System.err.format("Exception occurred trying to read '%s'.", filename);
e.printStackTrace();
}
/*System.out.println("Article Input: ");
try (BufferedReader input = new BufferedReader(new InputStreamReader(System.in))) {
String line;
while ((line = input.readLine()) != null) {
if (".".equals(line)) break;
String[] items = line.trim().replaceAll("\\p{P}", "").split("\\s+");
for (int i = 0; i < items.length; i++) {
Companies.words.add(items[i]);
//System.out.println(items[i]);
}
}
}*/
Companies.articleAdd("The");
Companies.articleAdd("company");
Companies.articleAdd("Best");
Companies.articleAdd("Buy");
Companies.articleAdd("sell");
Companies.articleAdd("Xbox");
Companies.getHits();
}
}
// Trie Node, which stores a character and the children in a HashMap
class TrieNode {
// Data Fields
private String word;
HashMap<String,TrieNode> children;
boolean bIsEnd;
private String primary = "";
// Constructors
public TrieNode() {
children = new HashMap<>();
bIsEnd = false;
}
public TrieNode(String st, String prime) {
word = st;
children = new HashMap<>();
bIsEnd = false;
primary = prime;
}
// Trie Node Methods
public HashMap<String,TrieNode> getChildren() {
return children;
}
public String getValue() {
return word;
}
public void setIsEnd(boolean val) {
bIsEnd = val;
}
public boolean isEnd() {
return bIsEnd;
}
public String getPrime() {
return primary;
}
}
class Trie {
private ArrayList<String> article = new ArrayList<String>();
private HashMap<String,Integer> hits = new HashMap<String,Integer>();
// Constructor
public Trie() {
root = new TrieNode();
}
// Insert article text
public void articleAdd(String word) {
article.add(word);
}
// Method to insert a new company name to Trie
public void insert(String[] names, String prime) {
// Find length of the given name
int length = names.length;
//TrieNode currNode = root;
HashMap<String,TrieNode> children = root.children;
// Traverse through all words of given name
for( int i=0; i<length; i++)
{
String name = names[i];
System.out.println("Iter: " + name);
TrieNode t;
// If there is already a child for current word of given name
if( children.containsKey(name))
t = children.get(name);
else // Else create a child
{
System.out.println("Inserting node " + name + " prime is " + prime);
t = new TrieNode(name, prime);
children.put( name, t );
}
children = t.getChildren();
int j = names.length-1;
if(i==j){
t.setIsEnd(true);
System.out.println("WordEnd");
}
}
}
public void getHits() {
// String[] articleArr = article.toArray(new String[0]);
// Initialize reference to traverse through Trie
// TrieNode crawl = root;
// int level, prevMatch = 0;
Iterator<String> iter = article.iterator();
TrieNode currNode = root;
while (iter.hasNext()) {
String word = iter.next();
System.out.println("Iter: " + word);
// HashMap of current node's children
HashMap<String,TrieNode> child = currNode.getChildren();
// If hit in currNode's children
if (child.containsKey(word)) {
System.out.println("Node exists: " + word);
// Update currNode to be node that matched
currNode = child.get(word);
System.out.println(currNode.isEnd());
String next = "";
// If currNode is leaf and next node has no match in children, were done
if (iter.hasNext()) {next = iter.next();}
if (currNode.isEnd() && !child.containsKey(next)) {
System.out.println("Matched word: " + word);
System.out.println("Primary: " + currNode.getPrime());
currNode = root;
} else {
// Else next node is continuation
}
} else {
// Else ignore next word and reset
currNode = root;
}
}
}
private TrieNode root;
}

I think instead of using while and iter.next() you can use for loop as below
for (Map.Entry entry : article.entrySet()) {
String word = entry.getKey();
}
So you are not really moving to the next items of your hashmap.
If this is not your point, please clarify us.
Thanks,
Nghia

I opted to use a for-loop style instead of While loop for this, as well as tweaked some logic to get it working. For those interested, the new code is below, as well as an example of the "companies.dat" file (what is populated to the Trie). The stdin is any text excerpt which ends with a "." on new line.
Companies.dat:
Microsoft Corporation Microsoft Xbox
Apple Computer Apple Mac
Best Buy
Dell
TrieBuilder:
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.*;
public class BuildTrie {
// Class Methods
public static void main(String[] args) throws IOException {
Trie Companies = new Trie();
String filename = "companies.dat";
try {
BufferedReader reader = new BufferedReader(new FileReader(filename));
String line;
while ((line = reader.readLine()) != null) {
// Split line by tab character
String[] aliases = line.replaceAll("\\p{P}", "").split("\t");
// Loop over each "alias" of specific company
for (int n = 0; n < aliases.length; n++) {
String[] name = aliases[n].split(" ");
// Insert each alias into Trie with index 0 as primary
Companies.insert(name, aliases[0]);
}
}
reader.close();
} catch (Exception e) {
System.err.format("Exception occurred trying to read '%s'.", filename);
e.printStackTrace();
}
System.out.println("Article Input: ");
try (BufferedReader input = new BufferedReader(new InputStreamReader(System.in))) {
String line;
while ((line = input.readLine()) != null) {
if (".".equals(line)) break;
String[] items = line.trim().replaceAll("\\p{P}", "").split("\\s+");
for (int i = 0; i < items.length; i++) {
Companies.articleAdd(items[i]);
}
}
}
Companies.getHits();
}
}
// Trie Node, which stores a character and the children in a HashMap
class TrieNode {
// Data Fields
private String word;
HashMap<String,TrieNode> children;
boolean bIsEnd;
private String primary = "";
// Constructors
public TrieNode() {
children = new HashMap<>();
bIsEnd = false;
}
public TrieNode(String st, String prime) {
word = st;
children = new HashMap<>();
bIsEnd = false;
primary = prime;
}
// Trie Node Methods
public HashMap<String,TrieNode> getChildren() {
return children;
}
public String getValue() {
return word;
}
public void setIsEnd(boolean val) {
bIsEnd = val;
}
public boolean isEnd() {
return bIsEnd;
}
public String getPrime() {
return primary;
}
}
class Trie {
private ArrayList<String> article = new ArrayList<String>();
private HashMap<String,Integer> hits = new HashMap<String,Integer>();
// Constructor
public Trie() {
root = new TrieNode();
}
// Insert article text
public void articleAdd(String word) {
article.add(word);
}
// Method to insert a new company name to Trie
public void insert(String[] names, String prime) {
// Find length of the given name
int length = names.length;
HashMap<String,TrieNode> children = root.children;
// Traverse through all words of given name
for( int i=0; i<length; i++)
{
String name = names[i];
TrieNode t;
// If there is already a child for current word of given name
if( children.containsKey(name))
t = children.get(name);
else // Else create a child
{
t = new TrieNode(name, prime);
children.put( name, t );
}
children = t.getChildren();
int j = names.length-1;
if(i==j){
t.setIsEnd(true);
}
}
}
public void getHits() {
// Initialize reference to traverse through Trie
TrieNode currNode = root;
for (int i=0; i < article.size(); i++) {
String word = article.get(i);
System.out.println("Searching: " + word);
// HashMap of current node's children
HashMap<String, TrieNode> child = currNode.getChildren();
// If hit in currNode's children
if (child.containsKey(word)) {
System.out.println("Node exists: " + word);
// Update currNode to be node that matched
currNode = child.get(word);
child = currNode.getChildren();
System.out.println("isEnd?: " + currNode.isEnd());
String next = "";
if (i+1 < article.size()) {
next = article.get(i+1);
}
// If currNode is leaf and next node has no match in children, were done
if (currNode.isEnd() && !child.containsKey(next)) {
System.out.println("Primary of match: " + currNode.getPrime());
currNode = root;
}
} else {
// Else ignore next word and reset
System.out.println("No match.");
currNode = root;
}
}
}
private TrieNode root;
}

Insert a new string to the trie graph

I am trying to implement the insert method of the Patricia Trie data structure and I am trying to handle this case:
first string: abaxyzalexsky,
second string: abaxyzalex,
third string: abaxyz,
fourth string: aba
I want to mark the trie as the following aba-xyz-alex-sky after inserting the fourth string, but I don't know how can I get it work.
How can I mark the words in the trie in the case above?
public void insert(String s) {
if (nodeRoot == null) {
nodeRoot = new TrieNode(s);
nodeRoot.isWord = true;
} else {
insert(nodeRoot, s);
}
}
private void insert(TrieNode node, String s) {
int len1 = node.edge.length();
int len2 = s.length();
int len = Math.min(len1, len2);
ArrayList<TrieNode> nextNode = node.getNext();
for (int index = 0; index < len; index++) {
if (s.charAt(index) != node.edge.charAt(index)) {
// In case the both words have common substrings and after the
// common substrings the words are split. For example abad, abac
} else if (index == (s.length() - 1)
|| index == (node.edge.length() - 1)) {
// In case the node just needs one path since one word is
// substring of the other.
// For example (aba and abac)
if (len1 > len2) {
// node edge string is longer than the inserted one. For example (abac
// and aba).
String samesubString = node.edge.substring(0, index + 1);
String different = node.edge.substring(index + 1);
node.edge = samesubString;
if (node.getNext() != null && !node.getNext().isEmpty()) {
for (TrieNode subword : node.getNext()) {
//I am here when I insert the third string. The code below retrives wrong data structure.
TrieNode node1 = new TrieNode(different);
node1.isWord = true;
node1.next.add(subword);
node.next.add(node1);
}
} else {
TrieNode leaf = new TrieNode(different);
leaf.isWord = true;
node.next.add(leaf);
for (TrieNode subword : node.getNext()) {
System.out.println(node.getEdge() + "---"
+ subword.getEdge());
}
}
} else {
// new inserted string value is longer. For example (aba
// and abac).
}
} else {
System.out.println("The strings are the same - " + index);
}
}
}
NodeTrie class
package patriciaTrie;
import java.util.ArrayList;
public class TrieNode {
ArrayList<TrieNode> next = new ArrayList<TrieNode>();
String edge;
boolean isWord;
TrieNode(String edge){
this.edge = edge;
}
public ArrayList<TrieNode> getNext() {
return next;
}
public void setNext(ArrayList<TrieNode> next) {
this.next = next;
}
public String getEdge() {
return edge;
}
public void setEdge(String edge) {
this.edge = edge;
}
}

How to use string frequencies list in Trie data structure?

I am working on some performance test on various data structures. In my list I have HashMap and Trie data structure. I am done with HashMap but not sure how to use Trie for below problem -
I have a text file which contains 2 million english words with their frequencies in this format -
hello 100
world 5000
good 2000
bad 9000
...
Now I am reading this file line by line and storing it in HashMap - First splitted string goes as the key in the HashMap and next splitted string goes as the value in the HashMap and so I am able to measure the insertion performance with the below code.
Map<String, String> wordTest = new HashMap<String, String>();
try {
fis = new FileInputStream(FILE_LOCATION);
reader = new BufferedReader(new InputStreamReader(fis));
String line = reader.readLine();
while (line != null) {
String[] splitString = line.split("\\s+");
// now put it in HashMap as key value pair
wordTest.put(splitString[0].toLowerCase().trim(), splitString[1].trim());
line = reader.readLine();
}
}
Now how would I implement Trie data structure to load the same thing in Trie as I did for HashMap? And then do a lookup basis on String as well? This is my first time with Trie data structure so little bit confuse.
Update:-
Below is my TrieImpl class
public class TrieImpl {
//root node
private TrieNode r;
public TrieImpl() {
r = new TrieNode();
}
public boolean has(String word) {
return r.has(word);
}
public void insert(String word){
r.insert(word);
}
public String toString() {
return r.toString();
}
public static void main(String[] args) {
TrieImpl t = new TrieImpl();
System.out.println("Testing some strings");
t.insert("HELLO"); // how do I pass string and its count
t.insert("WORLD"); // how do I pass string and its count
}
}
And below is my TrieNode class -
public class TrieNode {
// make child nodes
private TrieNode[] c;
// flag for end of word
private boolean flag = false;
public TrieNode() {
c = new TrieNode[26]; // 1 for each letter in alphabet
}
protected void insert(String word) {
int val = word.charAt(0) - 64;
// if the value of the child node at val is null, make a new node
// there to represent the letter
if (c[val] == null) {
c[val] = new TrieNode();
}
// if word length > 1, then word is not finished being added.
// otherwise, set the flag to true so we know a word ends there.
if (word.length() > 1) {
c[val].insert(word.substring(1));
} else {
c[val].flag = true;
}
}
public boolean has(String word) {
int val = word.charAt(0) - 64;
if (c[val] != null && word.length() > 1) {
c[val].has(word.substring(1));
} else if (c[val].flag == true && word.length() == 1) {
return true;
}
return false;
}
public String toString() {
return "";
}
}
Now how would I extend this to passs a particular string and its count and then do a lookup basis on String?

You can just add a element frequency to your TrieNode class.
public class TrieNode {
// make child nodes
private TrieNode[] c;
// flag for end of word
private boolean flag = false;
//stores frequency if flag is set
private int frequency;
Now in the insert method, add the frequency while setting the flag..change method signature appropriately
protected void insert(String word, int frequency) {
int val = word.charAt(0) - 64;
..........
..........
// if the value of the child node at val is null, make a new nod
if (word.length() > 1) {
c[val].insert(word.substring(1),frequency);
} else {
c[val].flag = true;
c[val].frequency = frequency;
}
}
Now create a new method to get the frequency.It can be done similar to has method, where you follow the branches till the end and finally when you find that the flag is set, return the frequency.
public int getFreq(String word) {
int val = word.charAt(0) - 64;
if (word.length() > 1) {
return c[val].getFreq(word.substring(1));
} else if (c[val].flag == true && word.length() == 1) {
return c[val].frequency;
} else
return -1;
}
-------------------------------EDIT------------------------
Use has method first to check for the string, then use getFreq method
public int getFreq(String word) {
if(has(word))
return getFreqHelper(word);
else
return -1; //this indicates word is not present
}
private int getFreqHelper(String word) {
int val = word.charAt(0) - 64;
if (word.length() > 1) {
return c[val].getFreq(word.substring(1));
} else if (c[val].flag == true && word.length() == 1) {
return c[val].frequency;
} else
return -1;
}

Here is a hint:
Define a class FrequencyString like so:
class FrequencyString {
private String string;
private int frequency;
public FrequencyString(String str, int freq) {
this.string = str;
this.frequency = freq;
}
public getString() {
return string;
}
public getFrequency() {
return frequency;
}
}
Now modify your Trie implementation methods to accept this new FrequencyString. These will be your new signatures:
TrieImpl:
boolean has(String word);
void insert(String word, int freq);
TrieNode:
boolean has(String word);
void insert(FrequencyString word);
If you want to find the frequency for a given word if it exists, change the has methods' signatures to this:
Integer find(String word);
When implementing find, return null if the word does not exist, or new Integer(result.getFrequency()); (where result is the found FrequencyString) if it does.

Detecting if a word is valid when it contains a blank

I'm working on a phone based word game, and there could potentially be quite a few blanks (representing any letter) that a player could have the option to use.
I store all the possible words in a hashSet, so detecting if a word is valid when it has one blank is simply a matter of looping through the alphabet replacing the blank with a letter and testing the word. I have a recursive call so this will work with any number of blanks. The code is as follows:
public boolean isValidWord(String word) {
if (word.contains(" ")){
for (char i = 'A'; i <= 'Z'; i++) {
if (isValidWord(word.replaceFirst(" ", Character.toString(i))))
return true;
}
return false;
}
else
return wordHashSet.contains(word);
}
As the number of blanks increases, the number of words we have to test increase exponentially. By the time we get to 3 blanks we're having to do 17576 lookups before we can reject a word, and this is affecting game play. Once there are 4 blanks the game will just freeze for a while.
What is the most efficient way for me to check words with multiple blanks. Should I just iterate through the hashset and check if we have a match against each word? If so, then what's the fastest way for me to compare two strings taking the blanks into account? I've tried doing this using a regular expression and String.matches(xx), but it's too slow. A straight String.equals(xx) is fast enough, but that obviously doesn't take blanks into account.

A very fast method althrough somewhat challenging to implement would be to store your words in a Trie - http://en.wikipedia.org/wiki/Trie
A trie is a tree structure that contains a char in every node and an array of pointers pointing to next nodes.
Without blank spaces it would be easy - just follow the trie structure, you can check this in linear time. When you have a blank, you will have a loop to search all possible routes.
This can sound complicated and difficult if you are not familiar with tries but if you get stuck I can help you with some code.
EDIT:
Ok, here is some c# code for your problem using tries, I think you will have no problems converting it in JAVA. If you do, leave a comment and I will help.
Trie.cs
public class Trie
{
private char blank = '_';
public Node Root { get; set; }
public void Insert(String key)
{
Root = Insert(Root, key, 0);
}
public bool Contains(String key)
{
Node x = Find(Root, key, 0);
return x != null && x.NullNode;
}
private Node Find(Node x, String key, int d)
{ // Return value associated with key in the subtrie rooted at x.
if (x == null)
return null;
if (d == key.Length)
{
if (x.NullNode)
return x;
else
return null;
}
char c = key[d]; // Use dth key char to identify subtrie.
if (c == blank)
{
foreach (var child in x.Children)
{
var node = Find(child, key, d + 1);
if (node != null)
return node;
}
return null;
}
else
return Find(x.Children[c], key, d + 1);
}
private Node Insert(Node x, String key, int d)
{ // Change value associated with key if in subtrie rooted at x.
if (x == null) x = new Node();
if (d == key.Length)
{
x.NullNode = true;
return x;
}
char c = key[d]; // Use dth key char to identify subtrie.
x.Children[c] = Insert(x.Children[c], key, d + 1);
return x;
}
public IEnumerable<String> GetAllKeys()
{
return GetKeysWithPrefix("");
}
public IEnumerable<String> GetKeysWithPrefix(String pre)
{
Queue<String> q = new Queue<String>();
Collect(Find(Root, pre, 0), pre, q);
return q;
}
private void Collect(Node x, String pre, Queue<String> q)
{
if (x == null) return;
if (x.NullNode) q.Enqueue(pre);
for (int c = 0; c < 256; c++)
Collect(x.Children[c], pre + ((char)c), q);
}
}
Node.cs
public class Node
{
public bool NullNode { get; set; }
public Node[] Children { get; set; }
public Node()
{
NullNode = false;
Children = new Node[256];
}
}
Sample usage:
Trie tr = new Trie();
tr.Insert("telephone");
while (true)
{
string str = Console.ReadLine();
if( tr.Contains( str ) )
Console.WriteLine("contains!");
else
Console.WriteLine("does not contain!");
}

A straight String.equals(xx) is fast enough, but that obviously
doesn't take blanks into account.
So I recommend to implement this simple solution, which is very close to String.equals(), and takes blanks into account:
public boolean isValidWord(String word) {
if (wordHashSet.contains(word)) {
return true;
}
for (String fromHashSet: wordHashSet){
if (compareIgnoreBlanks(fromHashSet, word)) {
return true;
}
}
return false;
}
/**
* Inspired by String.compareTo(String). Compares two String's, ignoring blanks in the String given as
* second argument.
*
* #param s1
* String from the HashSet
* #param s2
* String with potential blanks
* #return true if s1 and s2 match, false otherwise
*/
public static boolean compareIgnoreBlanks(String s1, String s2) {
int len = s1.length();
if (len != s2.length()) {
return false;
}
int k = 0;
while (k < len) {
char c1 = s1.charAt(k);
char c2 = s2.charAt(k);
if (c2 != ' ' && c1 != c2) {
return false;
}
k++;
}
return true;
}

public boolean isValidWord(String word) {
word = word.replaceAll(" ", "[a-z]");
Pattern pattern = Pattern.compile(word);
for (String wordFromHashSet: hashSet){
Matcher matcher = pattern.matcher(wordFromHashSet);
if (matcher.matches()) return true;
}
return false;
}

public boolean isValidWord(String word) {
ArrayList<Integer> pos = new ArrayList<Integer>();
for (int i=0; i!=word.length();i++){
if (word.charAt(i) == ' ') pos.add(i);
}
for (String hashSetWord: hashSet){
for (Integer i: pos){
hashSetWord = hashSetWord.substring(0,i)+" "+hashSetWord.substring(i+1);
}
if (hashSetWord.equals(word)) return true;
}
return false;
}

A kind of ugly, but I would guess fairly fast method would be to create a string containing all valid words like this:
WORD1
WORD2
WORD3
etc.
Then use a regex like (^|\n)A[A-Z]PL[A-Z]\n (i.e. replacing all blanks with [A-Z]), and match it on that string.

Assignment on sorting a list

I'm very close to being done, but can't quite figure out how to tie everything together. I have the separate methods responsible for their particular task, but i feel like my cohesion is really bad. not real sure how they tie together and what needs to be called in main. The goal here is to read a text file from the command line and list the words in the story lexicographically.
% java Ten < story.txt
Word Occurs
==== ======
a 21
animal 3
.
.
.
zebra 1
%
Here's my code thus far:
import java.util.Scanner;
public class Ten2
{
public static void main(String [] arg)
{
Scanner input = new Scanner(System.in);
String word;
List sortedList = new List();
word = nextWord(input);
while (word!=null) {
sortedList.insert(word);
word = nextWord(input);
}
sortedList.printWords();
}
private static String nextWord(Scanner input)
{
// return null if there are no more words
if (input.hasNext() == false )
return null;
// take next element, convert to lowercase, store in s
else {
String s = input.next().toLowerCase() ;
// empty string
String token = "";
// loop through s and concatonate chars onto token
for (int i =0; i < s.length(); i++) {
if (Character.isLetter(s.charAt(i)) == true)
token = token + s.charAt(i);
else if (s.charAt(i) == '\'' )
token = token + s.charAt(i);
else if (s.charAt(i) == '-')
token = token + s.charAt(i);
}
return token;
}
}
}
class List
{
/*
* Internally, the list of strings is represented by a linked chain
* of nodes belonging to the class ListNode. The strings are stored
* in lexicographical order.
*/
private static class ListNode
{
// instance variables for ListNode objects
public String word;
public ListNode next;
public int count;
// Listnode constructor
public ListNode(String w, ListNode nxt)
{
word = w; // token from nextWord()?
next = nxt; // link to next ListNode
count = 1; // number of word occurences
}
}
// instance variables for List object
private ListNode first;
private int numWords;
// constructor postcondition: creates new Listnode storing object
public List()
{
first = null; // pointer to ListNode?
numWords = 0; // counter for nodes in list
}
// Insert a specified word into the list, keeping the list
// in lexicographical order.
public void insert(String word)
{
// check if first is null
if (first == null) {
ListNode newNode;
newNode = addNode(word, null);
first = newNode;
}
// else if (first is not null) check if word matches first word in List
else if (word.equals(first.word)) {
// increase count
first.count++;
}
// else (first is not null && doesn't match first word)
else {
ListNode newNode;
ListNode current;
current = first;
ListNode previous;
previous = null;
int cmp = word.compareTo(current.word);
/*
* Fist two cases of empty list and word already existing
* handled in above if and else statements, now by using compareTo()
* method between the words, the insertion postion can be determined.
* Links between ListNode variables current and previous need to be
* modified in order to maintain the list
*/
// loop as long as value comparing to is positive
// when compareTo() returns positive this means the "word" parameter is greater than the word in the list
while ((cmp >0) && (current.next != null)) {
previous = current;
current = current.next;
cmp = word.compareTo(current.word);
}
// insert after current at end of list
if ((cmp >0 && current.next == null)) {
newNode = addNode(word, null);
current.next = newNode;
}
// increments count when word already exists
else if (cmp==0) {
current.count++;
}
// else (cmp < 0) we insert BEFORE current
else {
newNode = addNode(word, current);
// first node in list comes after new word
if (previous == null) {
first = newNode;
}
else {
// inserting new word in middle of list
previous.next = newNode;
}
}
}
}
// method to add new ListNode and increase counter
private ListNode addNode(String word, ListNode next)
{
ListNode newNode = new ListNode(word, next);
numWords++;
return newNode;
}
// Returns a string array that contains all the words in the list.
public String[] getWords()
{
String[] Words = new String[numWords];
ListNode current = first;
int i =0;
while (current != null) {
Words[i] = current.word;
current = current.next;
i++;
}
return Words;
}
// Returns an int array that contains the number of times
// each word occurs in the list.
public int[] getNumbers()
{
int[] Numbers = new int[numWords];
ListNode current = first;
int i =0;
while (current != null) {
Numbers[i] = current.count;
current = current.next;
i++;
}
return Numbers;
}
// Outputs the string array and int array containing all the
// words in the list and the number of times each occurs.
public void printWords()
{
int[] Numbers = getNumbers();
String[] Words = getWords();
System.out.println("Word \t \t Occurs");
System.out.println("==== \t \t ======");
for (int i =0; i < numWords; i++) {
System.out.println(Words[i] + " \t " + Numbers[i]);
}
}
}

Well, I would start by defining what you want your program to do, which you've already done:
The goal here is to read a text file from the command line and list the words in the story lexicographically.
You're main function does this almost. Basically, what you need is a loop to tie it together:
public static void main(String [] arg)
{
// print out your initial information first (i.e. your column headers)
// ...
List sortedList = new List();
String word = nextWord();
// now, the question is... what is the end condition of the loop?
// probably that there aren't any more words, so word in this case
// will be null
while (word != null)
{
sortedList.insert(word);
word = nextWord();
}
// now, your list takes care of the sorting, so let's just print the list
sortedList.printWords();
}
I think that's all there is to it. Normally, I don't like to post solutions to homework questions, but in this case, since you already had all of the code and you just needed a little nudge to drive you in the right direction, I think it's fine.
There are a few things I noticed that are incorrect with your
Your list constructor has a 'void' return type - there should be no return type on constructors:
public List() //make an empty list
{
first = null;
numWords = 0;
}
The 'else' statement in this method is unneeded:
public static String nextWord()
{
if ( keyboard.hasNext() == false )
return null;
else {
String start = keyboard.next().toLowerCase() ;
String organized = "";
for (int i =0; i < start.length(); i++) {
if (Character.isLetter(start.charAt(i)) == true)
organized = organized + start.charAt(i);
else if (start.charAt(i) == '\'' )
organized = organized + start.charAt(i);
else if (start.charAt(i) == '-')
organized = organized + start.charAt(i);
}
return organized;
}
}
So, this should be:
public static String nextWord()
{
if ( keyboard.hasNext() == false )
return null;
String start = keyboard.next().toLowerCase() ;
String organized = "";
for (int i =0; i < start.length(); i++) {
if (Character.isLetter(start.charAt(i)) == true)
organized = organized + start.charAt(i);
else if (start.charAt(i) == '\'' )
organized = organized + start.charAt(i);
else if (start.charAt(i) == '-')
organized = organized + start.charAt(i);
}
return organized;
}
If you want to use a BufferedReader, it's pretty easy. Just set it up in your main method:
if (arg.length > 0)
{
// open our file and read everything into a string buffer
BufferedReader bRead = null;
try {
bRead = new BufferedReader(new FileReader(arg[0]));
} catch(FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
System.exit(0);
}
setupScanner(bRead);
}
Then, create a new method that sets up the scanner object:
public static void setupScanner(BufferedReader rdr)
{
keyboard = new Scanner(rdr);
}
And then just pass it in on the command line (i.e. java ten2 [filename])

import java.util.Scanner;
public class Ten2
{
public static void main(String [] arg)
{
Scanner input = new Scanner(System.in);
String word;
List sortedList = new List();
word = nextWord(input);
while (word!=null) {
sortedList.insert(word);
word = nextWord(input);
}
sortedList.printWords();
}
private static String nextWord(Scanner input)
{
// return null if there are no more words
if (input.hasNext() == false )
return null;
// take next element, convert to lowercase, store in s
else {
String s = input.next().toLowerCase() ;
// empty string
String token = "";
// loop through s and concatonate chars onto token
for (int i =0; i < s.length(); i++) {
if (Character.isLetter(s.charAt(i)) == true)
token = token + s.charAt(i);
else if (s.charAt(i) == '\'' )
token = token + s.charAt(i);
else if (s.charAt(i) == '-')
token = token + s.charAt(i);
}
return token;
}
}
}
class List
{
/*
* Internally, the list of strings is represented by a linked chain
* of nodes belonging to the class ListNode. The strings are stored
* in lexicographical order.
*/
private static class ListNode
{
// instance variables for ListNode objects
public String word;
public ListNode next;
public int count;
// Listnode constructor
public ListNode(String w, ListNode nxt)
{
word = w; // token from nextWord()?
next = nxt; // link to next ListNode
count = 1; // number of word occurences
}
}
// instance variables for List object
private ListNode first;
private int numWords;
// constructor postcondition: creates new Listnode storing object
public List()
{
first = null; // pointer to ListNode?
numWords = 0; // counter for nodes in list
}
// Insert a specified word into the list, keeping the list
// in lexicographical order.
public void insert(String word)
{
// check if first is null
if (first == null) {
ListNode newNode;
newNode = addNode(word, null);
first = newNode;
}
// else if (first is not null) check if word matches first word in List
else if (word.equals(first.word)) {
// increase count
first.count++;
}
// else (first is not null && doesn't match first word)
else {
ListNode newNode;
ListNode current;
current = first;
ListNode previous;
previous = null;
int cmp = word.compareTo(current.word);
/*
* Fist two cases of empty list and word already existing
* handled in above if and else statements, now by using compareTo()
* method between the words, the insertion postion can be determined.
* Links between ListNode variables current and previous need to be
* modified in order to maintain the list
*/
// loop as long as value comparing to is positive
// when compareTo() returns positive this means the "word" parameter is greater than the word in the list
while ((cmp >0) && (current.next != null)) {
previous = current;
current = current.next;
cmp = word.compareTo(current.word);
}
// insert after current at end of list
if ((cmp >0 && current.next == null)) {
newNode = addNode(word, null);
current.next = newNode;
}
// increments count when word already exists
else if (cmp==0) {
current.count++;
}
// else (cmp < 0) we insert BEFORE current
else {
newNode = addNode(word, current);
// first node in list comes after new word
if (previous == null) {
first = newNode;
}
else {
// inserting new word in middle of list
previous.next = newNode;
}
}
}
}
// method to add new ListNode and increase counter
private ListNode addNode(String word, ListNode next)
{
ListNode newNode = new ListNode(word, next);
numWords++;
return newNode;
}
// Returns a string array that contains all the words in the list.
public String[] getWords()
{
String[] Words = new String[numWords];
ListNode current = first;
int i =0;
while (current != null) {
Words[i] = current.word;
current = current.next;
i++;
}
return Words;
}
// Returns an int array that contains the number of times
// each word occurs in the list.
public int[] getNumbers()
{
int[] Numbers = new int[numWords];
ListNode current = first;
int i =0;
while (current != null) {
Numbers[i] = current.count;
current = current.next;
i++;
}
return Numbers;
}
// Outputs the string array and int array containing all the
// words in the list and the number of times each occurs.
public void printWords()
{
int[] Numbers = getNumbers();
String[] Words = getWords();
System.out.println("Word \t \t Occurs");
System.out.println("==== \t \t ======");
for (int i =0; i < numWords; i++) {
System.out.println(Words[i] + " \t " + Numbers[i]);
}
}
}

Develop Reference

Java is a programming language and computing platform first released by Sun Microsystems in 1995.

Radix(Trie) Tree implementation for Cutomer search in Java - java

Related

Java Trie Matching using Iterator

Insert a new string to the trie graph

How to use string frequencies list in Trie data structure?

Detecting if a word is valid when it contains a blank

Assignment on sorting a list

Categories

Resources