Java HashMap: Changing Bucket Implementation to Linear Probing method - java

In advance, I apologize for my lack of experience, these are advanced concepts that are difficult to wrap my head around. From what I understand, linear probing is circular, it won't stop until it finds an empty cell.
However I am not sure how to implement it. Some example on how to would be greatly appreciated. Sorry again for the inexperience, I'm not some vetted programmer, I'm picking this up very slowly.
public boolean ContainsElement(V element)
{
for(int i = 0; i < capacity; i++)
{
if(table[i] != null)
{
LinkedList<Entry<K, V>> bucketMethod = table[i];
for(Entry<K, V> entry : bucketMethod)
{
if(entry.getElement().equals(element))
{
return true;
}
}
}
}
return false;
}

Here's a working hash table based on the pseudocode examples found in the Wikipedia article for open addressing.
I think the main differences between the Wikipedia example and mine are:
Treating the hashCode() a little bit due to the way Java does modulo (%) with negative numbers.
Implemented simple resizing logic.
Changed the logic in the remove method a little bit because Java doesn't have goto.
Otherwise, it's more or less just a direct translation.
package mcve;
import java.util.*;
import java.util.stream.*;
public class OAHashTable {
private Entry[] table = new Entry[16]; // Must be >= 4. See findSlot.
private int size = 0;
public int size() {
return size;
}
private int hash(Object key) {
int hashCode = Objects.hashCode(key)
& 0x7F_FF_FF_FF; // <- This is like abs, but it works
// for Integer.MIN_VALUE. We do this
// so that hash(key) % table.length
// is never negative.
return hashCode;
}
private int findSlot(Object key) {
int i = hash(key) % table.length;
// Search until we either find the key, or find an empty slot.
//
// Note: this becomes an infinite loop if the key is not already
// in the table AND every element in the array is occupied.
// With the resizing logic (below), this will only happen
// if the table is smaller than length=4.
while ((table[i] != null) && !Objects.equals(table[i].key, key)) {
i = (i + 1) % table.length;
}
return i;
}
public Object get(Object key) {
int i = findSlot(key);
if (table[i] != null) { // Key is in table.
return table[i].value;
} else { // Key is not in table
return null;
}
}
private boolean tableIsThreeQuartersFull() {
return ((double) size / (double) table.length) >= 0.75;
}
private void resizeTableToTwiceAsLarge() {
Entry[] old = table;
table = new Entry[2 * old.length];
size = 0;
for (Entry e : old) {
if (e != null) {
put(e.key, e.value);
}
}
}
public void put(Object key, Object value) {
int i = findSlot(key);
if (table[i] != null) { // We found our key.
table[i].value = value;
return;
}
if (tableIsThreeQuartersFull()) {
resizeTableToTwiceAsLarge();
i = findSlot(key);
}
table[i] = new Entry(key, value);
++size;
}
public void remove(Object key) {
int i = findSlot(key);
if (table[i] == null) {
return; // Key is not in the table.
}
int j = i;
table[i] = null;
--size;
while (true) {
j = (j + 1) % table.length;
if (table[j] == null) {
break;
}
int k = hash(table[j].key) % table.length;
// Determine if k lies cyclically in (i,j]
// | i.k.j |
// |....j i.k.| or |.k..j i...|
if ( (i<=j) ? ((i<k)&&(k<=j)) : ((i<k)||(k<=j)) ) {
continue;
}
table[i] = table[j];
i = j;
table[i] = null;
}
}
public Stream<Entry> entries() {
return Arrays.stream(table).filter(Objects::nonNull);
}
#Override
public String toString() {
return entries().map(e -> e.key + "=" + e.value)
.collect(Collectors.joining(", ", "{", "}"));
}
public static class Entry {
private Object key;
private Object value;
private Entry(Object key, Object value) {
this.key = key;
this.value = value;
}
public Object getKey() { return key; }
public Object getValue() { return value; }
}
public static void main(String[] args) {
OAHashTable t = new OAHashTable();
t.put("A", 1);
t.put("B", 2);
t.put("C", 3);
System.out.println("size = " + t.size());
System.out.println(t);
t.put("X", 4);
t.put("Y", 5);
t.put("Z", 6);
t.remove("C");
t.remove("B");
t.remove("A");
t.entries().map(e -> e.key)
.map(key -> key + ": " + t.get(key))
.forEach(System.out::println);
}
}

java.util.HashMap implementation of java.util.Map internally provides linear probing that is HashMap can resolve collisions in hash tables.

Related

Implementation of Custom HashMap code issues

I am preparing my own custom HashMap implementation in Java. Below is my imlementation.
public class Entry<K,V> {
private final K key;
private V value;
private Entry<K,V> next;
public Entry(K key, V value, Entry<K,V> next) {
this.key = key;
this.value = value;
this.next = next;
}
public V getValue() {
return value;
}
public void setValue(V value) {
this.value = value;
}
public Entry<K, V> getNext() {
return next;
}
public void setNext(Entry<K, V> next) {
this.next = next;
}
public K getKey() {
return key;
}
}
public class MyCustomHashMap<K,V> {
private int DEFAULT_BUCKET_COUNT = 10;
private Entry<K,V>[] buckets;
public MyCustomHashMap() {
buckets = new Entry[DEFAULT_BUCKET_COUNT];
for (int i = 0;i<DEFAULT_BUCKET_COUNT;i++)
buckets[i] = null;
}
public void put(K key,V value){
/**
* This is the new node.
*/
Entry<K,V> newEntry = new Entry<K,V>(key, value, null);
/**
* If key is null, then null keys always map to hash 0, thus index 0
*/
if(key == null){
buckets[0] = newEntry;
}
/**
* get the hashCode of the key.
*/
int hash = hash(key);
/**
* if the index does of the bucket does not contain any element then assign the node to the index.
*/
if(buckets[hash] == null) {
buckets[hash] = newEntry;
} else {
/**
* we need to traverse the list and compare the key with each of the keys till the keys match OR if the keys does not match then we need
* to add the node at the end of the linked list.
*/
Entry<K,V> previous = null;
Entry<K,V> current = buckets[hash];
while(current != null) {
boolean done = false;
while(!done) {
if(current.getKey().equals(key)) {
current.setValue(value);
done = true; // if the keys are same then replace the old value with the new value;
} else if (current.getNext() == null) {
current.setNext(newEntry);
done = true;
}
current = current.getNext();
previous = current;
}
}
previous.setNext(newEntry);
}
}
public V getKey(K key) {
int hash = hash(key);
if(buckets[hash] == null) {
return null;
} else {
Entry<K,V> temp = buckets[hash];
while(temp != null) {
if(temp.getKey().equals(key))
return temp.getValue(); // returns value corresponding to key.
temp = temp.getNext();
}
return null; //return null if key is not found.
}
}
public void display() {
for(int i = 0; i < DEFAULT_BUCKET_COUNT; i++) {
if(buckets[i] != null) {
Entry<K,V> entry = buckets[i];
while(entry != null){
System.out.print("{"+entry.getKey()+"="+entry.getValue()+"}" +" ");
entry=entry.getNext();
}
}
}
}
public int bucketIndexForKey(K key) {
int bucketIndex = key.hashCode() % buckets.length;
return bucketIndex;
}
/**
*
* #param key
* #return
*/
private int hash(K key){
return Math.abs(key.hashCode()) % buckets.length;
}
public static void main(String[] args) {
// TODO Auto-generated method stub
MyCustomHashMap<String, Integer> myCustomHashMap = new MyCustomHashMap<String, Integer>();
myCustomHashMap.put("S", 22);
myCustomHashMap.put("S", 1979);
myCustomHashMap.put("V", 5);
myCustomHashMap.put("R", 31);
System.out.println("Value corresponding to key R: "+myCustomHashMap.getKey("R"));
System.out.println("Value corresponding to key V: "+myCustomHashMap.getKey("V"));
System.out.println("Displaying the contents of the HashMap:: ");
myCustomHashMap.display();
}
}
1) I feel that put (K key,V value) is somewhat flawed. Please do kindly validate and let me know what's wrong here. On entering the same key its giving me wrong result. I have not yet tested it for collision cases having different keys.
2) It is said that we rehash the hashCode so that it eliminates wrong implementation of hashCode. how do I do it because if I give hashCode of key i.e. hash(key.hashCode()) then it dosn't take as it can't compute hashCode of int. How to do this?
Any help would be highly appreciated.
Thanks
Sid
You handle null key incorrectly :
if(key == null){
buckets[0] = newEntry;
}
It's possible that buckets[0] already contains entries, in which case you will lose those entries.
The following loop has some issues :
Entry<K,V> previous = null;
Entry<K,V> current = buckets[hash];
while(current != null) {
boolean done = false;
while(!done) {
if(current.getKey().equals(key)) {
current.setValue(value);
done = true;
} else if (current.getNext() == null) {
current.setNext(newEntry);
done = true;
}
current = current.getNext();
previous = current; // you are not really setting previous to
// to the previous Entry in the list - you
// are setting it to the current Entry
}
}
previous.setNext(newEntry); // you don't need this statement. You
// already have a statement inside the
// loop that adds the new Entry to the list
It looks like removing any statements related to previous will fix this loop.
EDIT:
As kolakao commented, in order for your implementation to be efficient (i.e. require expected constant time for get and put), you must resize the HashMap when the number of entries exceeds some threshold (in order for the average number of entries in each bucket to be bound by a constant).
It is said that we rehash the hashCode so that it eliminates wrong implementation of hashCode. how do I do it because if I give hashCode of key i.e. hash(key.hashCode()) then it dosn't take as it can't compute hashCode of int. How to do this?
The idea of re-hashing doesn't involve calling hashCode for the hashCode of the key. It involves running some hardcoded function on the value obtained by key.hashCode().
For example, in Java 7 implementation of HashMap, the following function is used :
static int hash(int h) {
// This function ensures that hashCodes that differ only by
// constant multiples at each bit position have a bounded
// number of collisions (approximately 8 at default load factor).
h ^= (h >>> 20) ^ (h >>> 12);
return h ^ (h >>> 7) ^ (h >>> 4);
}
Then you use it with :
int hash = hash(key.hashCode());
int bucket = hash % buckets.length;

Null pointer Exception in CompareTo method

Structure of my class:
public class Priorityy implement Comparable {
public int compareTo(Object pe) {
Priorityy p = (Priorityy) pe;
if (this.key < p.key) {
return 1;
} else if (this.key > p.key) {
return -1;
} else {
return 0;
}
}
}
Th problem is that p.key is always null, why exactly is that? I have my array initialized with elements in it but it always throws NullPointerException whenever I try Arrays.sort(arr).
How can I fix this?
Edit: Here is the complete code and print did print the elements of array arr:
import java.util.Arrays;
class Priorityy implements Comparable {
int size;
int front = 0;
int rear = 0;
static Priorityy[] arr = new Priorityy[3];
int key;
String value;
public Priorityy(int key, String value) {
this.key = key;
this.value = value;
insert();
}
public void insert() {
arr[front] = this;
System.out.println(arr[front].value);
while (front + 1 != 3) {
front = front + 1;
}
}
public Priorityy remove() {
Priorityy x = arr[front];
front = front - 1;
return x;
}
public int compareTo(Object pe) {
Priorityy p = (Priorityy) pe;
if (this.key < p.key) {
System.out.println(p.key);
return 1;
} else if (this.key > p.key) {
System.out.println("3");
return -1;
} else {
System.out.println("4");
return 0;
}
}
public static void main(String... s) {
new Priorityy(10, "Watch");
new Priorityy(40, "Laptop");
new Priorityy(60, "Wallet");
Arrays.sort(arr);
for (Priorityy element : arr) {
System.out.println(element.key);
System.out.println(element.value);
}
}
}
As per your code
Priorityy p = (Priorityy)pe;
^^ ---------- this is null
You have null object in the array. Handle null object gracefully.
For example
if(pe instanceof Priorityy){ // return false for null object
// your code goes here
}
Better use Generic Comparable and use Integer.compare(int,int) to compare two int values.
class Priorityy implements Comparable<Priorityy> {
public int compareTo(Priorityy pe) {
if (pe != null) {
return Integer.compare(this.key, pe.key);
} else {
// return what ever if pe is null
}
}
}
You're putting things into your array in a really strange manner.
But given that, the problem is that you're not using a static field to store the next position to insert an element into, so the next time you create an instance of Priorityy, the field first contains the value zero again. So you're inserting all three objects into element zero of the array.
Change one line of your code and it will work:
int front = 0;
To:
static int front = 0;
I don't see where you are using size and rear but you probably want these to be static too.
One other suggestion: Java has a nice short syntax for increasing or decreasing the value of a variable by one using the ++ or -- operator, so you can shorten things by saying:
front++;
instead of
front = front + 1;
(and front-- instead of front = front - 1)

Linear Probing on Java HashTable implementation

So I have a HashTable implementation here that I wrote using only Arrays and had a little bit of help with the code. Unfortunately, I don't quite understand one of the lines someone added while running the "get" or "put" method. What exactly is happening in the while loop below? It is a method for linear probing correct? Also why is the loop checking the conditions it's checking?
Specifically,
int hash = hashThis(key);
while(data[hash] != AVAILABLE && data[hash].key() != key) {
hash = (hash + 1) % capacity;
}
Here's the whole Java class below for full reference.
public class Hashtable2 {
private Node[] data;
private int capacity;
private static final Node AVAILABLE = new Node("Available", null);
public Hashtable2(int capacity) {
this.capacity = capacity;
data = new Node[capacity];
for(int i = 0; i < data.length; i++) {
data[i] = AVAILABLE;
}
}
public int hashThis(String key) {
return key.hashCode() % capacity;
}
public Object get(String key) {
int hash = hashThis(key);
while(data[hash] != AVAILABLE && data[hash].key() != key) {
hash = (hash + 1) % capacity;
}
return data[hash].element();
}
public void put(String key, Object element) {
if(key != null) {
int hash = hashThis(key);
while(data[hash] != AVAILABLE && data[hash].key() != key) {
hash = (hash + 1) % capacity;
}
data[hash] = new Node(key, element);
}
}
public String toString(){
String s="<";
for (int i=0;i<this.capacity;i++)
{
s+=data[i]+", ";
}
s+=">";
return s;
}
Thank you.
I just rewrote some part of the code and added the findHash-method - try to avoid code-duplication!
private int findHash(String key) {
int hash = hashThis(key);
// search for the next available element or for the next matching key
while(data[hash] != AVAILABLE && data[hash].key() != key) {
hash = (hash + 1) % capacity;
}
return hash;
}
public Object get(String key) {
return data[findHash(key)].element();
}
public void put(String key, Object element) {
data[findHash(key)] = new Node(key, element);
}
What you asked for is - what exactly does this findHash-loop? The data was initialized with AVAILABLE - meaning: the data does not (yet) contain any actual data. Now - when we add an element with put - first a hashValue is calculated, that is just an index in the data array where to put the data. Now - if we encounter that the position has already been taken by another element with the same hash value but a different key, we try to find the next AVAILABLE position. And the get method essentially works the same - if a data element with a different key is detected, the next element is probed and so on.
The data itself is a so called ring-buffer. That is, it is searched until the end of the array and is next search again at the beginning, starting with index 0. This is done with the modulo % operator.
Alright?
Sample Hashtable implementation using Generics and Linear Probing for collision resolution. There are some assumptions made during implementation and they are documented in javadoc above class and methods.
This implementation doesn't have all the methods of Hashtable like keySet, putAll etc but covers most frequently used methods like get, put, remove, size etc.
There is repetition of code in get, put and remove to find the index and it can be improved to have a new method to find index.
class HashEntry<K, V> {
private K key;
private V value;
public HashEntry(K key, V value) {
this.key = key;
this.value = value;
}
public void setKey(K key) { this.key = key; }
public K getKey() { return this.key; }
public void setValue(V value) { this.value = value; }
public V getValue() { return this.value; }
}
/**
* Hashtable implementation ...
* - with linear probing
* - without loadfactor & without rehash implementation.
* - throws exception when table is full
* - returns null when trying to remove non existent key
*
* #param <K>
* #param <V>
*/
public class Hashtable<K, V> {
private final static int DEFAULT_CAPACITY = 16;
private int count;
private int capacity;
private HashEntry<K, V>[] table;
public Hashtable() {
this(DEFAULT_CAPACITY);
}
public Hashtable(int capacity) {
super();
this.capacity = capacity;
table = new HashEntry[capacity];
}
public boolean isEmpty() { return (count == 0); }
public int size() { return count; }
public void clear() { table = new HashEntry[this.capacity]; count = 0; }
/**
* Returns null if either probe count is higher than capacity else couldn't find the element.
*
* #param key
* #return
*/
public V get(K key) {
V value = null;
int probeCount = 0;
int hash = this.hashCode(key);
while (table[hash] != null && !table[hash].getKey().equals(key) && probeCount <= this.capacity) {
hash = (hash + 1) % this.capacity;
probeCount++;
}
if (table[hash] != null && probeCount <= this.capacity) {
value = table[hash].getValue();
}
return value;
}
/**
* Check on the no of probes done and terminate if probe count reaches to its capacity.
*
* Throw Exception if table is full.
*
* #param key
* #param value
* #return
* #throws Exception
*/
public V put(K key, V value) throws Exception {
int probeCount = 0;
int hash = this.hashCode(key);
while (table[hash] != null && !table[hash].getKey().equals(key) && probeCount <= this.capacity) {
hash = (hash + 1) % this.capacity;
probeCount++;
}
if (probeCount <= this.capacity) {
if (table[hash] != null) {
table[hash].setValue(value);
} else {
table[hash] = new HashEntry(key, value);
count++;
}
return table[hash].getValue();
} else {
throw new Exception("Table Full!!");
}
}
/**
* If key present then mark table[hash] = null & return value, else return null.
*
* #param key
* #return
*/
public V remove(K key) {
V value = null;
int probeCount = 0;
int hash = this.hashCode(key);
while (table[hash] != null && !table[hash].getKey().equals(key) && probeCount <= this.capacity) {
hash = (hash + 1) % this.capacity;
probeCount++;
}
if (table[hash] != null && probeCount <= this.capacity) {
value = table[hash].getValue();
table[hash] = null;
count--;
}
return value;
}
public boolean contains(Object value) {
return this.containsValue(value);
}
public boolean containsKey(Object key) {
for (HashEntry<K, V> entry : table) {
if (entry != null && entry.getKey().equals(key)) {
return true;
}
}
return false;
}
public boolean containsValue(Object value) {
for (HashEntry<K, V> entry : table) {
if (entry != null && entry.getValue().equals(value)) {
return true;
}
}
return false;
}
#Override
public String toString() {
StringBuilder data = new StringBuilder();
data.append("{");
for (HashEntry<K, V> entry : table) {
if (entry != null) {
data.append(entry.getKey()).append("=").append(entry.getValue()).append(", ");
}
}
if (data.toString().endsWith(", ")) {
data.delete(data.length() - 2, data.length());
}
data.append("}");
return data.toString();
}
private int hashCode(K key) { return (key.hashCode() % this.capacity); }
public static void main(String[] args) throws Exception {
Hashtable<Integer, String> table = new Hashtable<Integer, String>(2);
table.put(1, "1");
table.put(2, "2");
System.out.println(table);
table.put(1, "3");
table.put(2, "4");
System.out.println(table);
table.remove(1);
System.out.println(table);
table.put(1, "1");
System.out.println(table);
System.out.println(table.get(1));
System.out.println(table.get(3));
// table is full so below line
// will throw an exception
table.put(3, "2");
}
}
Sample run of the above code.
{2=2, 1=1}
{2=4, 1=3}
{2=4}
{2=4, 1=1}
1
null
Exception in thread "main" java.lang.Exception: Table Full!!
at Hashtable.put(Hashtable.java:95)
at Hashtable.main(Hashtable.java:177)

Simple HashTable implementation using an array in Java?

I'm having a problem with implementing a very simple HashTable using an array. The problem is that the first Item put in the HashTable is always AVAILABLE. Maybe you guys can see what is going wrong. This is the HashTable class:
public class HashTable {
private Item[] data;
private int capacity;
private int size;
private static final Item AVAILABLE = new Item("Available", null);
public HashTable(int capacity) {
this.capacity = capacity;
data = new Item[capacity];
for(int i = 0; i < data.length; i++) {
data[i] = AVAILABLE;
}
size = 0;
}
public int size() {
return size;
}
public int hashThis(String key) {
return key.hashCode() % capacity;
}
public Object get(String key) {
int hash = hashThis(key);
while(data[hash] != AVAILABLE && data[hash].key() != key) {
hash = (hash + 1) % capacity;
}
return data[hash].element();
}
public void put(String key, Object element) {
if(key != null) {
size++;
int hash = hashThis(key);
while(data[hash] != AVAILABLE && data[hash].key() != key) {
hash = (hash + 1) % capacity;
}
data[hash] = new Item(key, element);
}
}
public Object remove(String key) {
// not important now.
throw new UnsupportedOperationException("Can't remove");
}
public String toString() {
String s = "<HashTable[";
for(int i = 0; i < this.size(); i++) {
s += data[i].toString();
if(i < this.size() - 1) {
s += ",";
}
}
s += "]>";
return s;
}
}
For more clarity, this is the Item class:
public class Item {
private String key;
private Object element;
public Item(String key, Object element) {
this.setKey(key);
this.setElement(element);
}
public String key() {
return key;
}
public void setKey(String key) {
this.key = key;
}
public Object element() {
return element;
}
public void setElement(Object element) {
this.element = element;
}
public String toString() {
String s = "<Item(";
s += this.key() + "," + this.element() + ")>";
return s;
}
}
To give an example:
HashTable ht = new HashTable(10);
ht.put("1", "a");
The output of toString() after putting has to be:
"<HashTable[<Item(1,a)>]>"
but I get:
"<HashTable[<Item(Available,null)>]>"
update: I should probably mention that the next Item gets put correctly and the one after that is not again.
I think the problem is in your toString method. You loop for 0 - size when size = 1 so once so you only print out the first value in your hashTable problem is the first value in your hash table is not a real value it's an AVAILABLE you have to do something like this
EDIT: Sorry I forgot to move the index over.
public String toString() {
String s = "<HashTable[";
int i = 0;
int count = 0;
while(count < this.size()) {
//Skip the AVAILABLE cells
if(data[i] == AVAILABLE) {
i++;
continue;
}
s += data[i].toString();
if(count < this.size() - 1) {
s += ",";
}
count++;
}
s += "]>";
return s;
}
Try this for toString() if still interested in the solution, I ran it and its fine:
public String toString()
{
String s = "<HashTable[";
for (int i = 0; i < this.capacity; i++)
{
if (data[i].Element != null)
{
s += data[i].toString();
if (i < this.size - 1)
{
s += ",";
}
}
}
s += "]>";
return s;
}

Where do I find a standard Trie based map implementation in Java? [closed]

Closed. This question does not meet Stack Overflow guidelines. It is not currently accepting answers.
We don’t allow questions seeking recommendations for books, tools, software libraries, and more. You can edit the question so it can be answered with facts and citations.
Closed 2 years ago.
Improve this question
I have a Java program that stores a lot of mappings from Strings to various objects.
Right now, my options are either to rely on hashing (via HashMap) or on binary searches (via TreeMap). I am wondering if there is an efficient and standard trie-based map implementation in a popular and quality collections library?
I've written my own in the past, but I'd rather go with something standard, if available.
Quick clarification: While my question is general, in the current project I am dealing with a lot of data that is indexed by fully-qualified class name or method signature. Thus, there are many shared prefixes.
You might want to look at the Trie implementation that Limewire is contributing to the Google Guava.
There is no trie data structure in the core Java libraries.
This may be because tries are usually designed to store character strings, while Java data structures are more general, usually holding any Object (defining equality and a hash operation), though they are sometimes limited to Comparable objects (defining an order). There's no common abstraction for "a sequence of symbols," although CharSequence is suitable for character strings, and I suppose you could do something with Iterable for other types of symbols.
Here's another point to consider: when trying to implement a conventional trie in Java, you are quickly confronted with the fact that Java supports Unicode. To have any sort of space efficiency, you have to restrict the strings in your trie to some subset of symbols, or abandon the conventional approach of storing child nodes in an array indexed by symbol. This might be another reason why tries are not considered general-purpose enough for inclusion in the core library, and something to watch out for if you implement your own or use a third-party library.
Apache Commons Collections v4.0 now supports trie structures.
See the org.apache.commons.collections4.trie package info for more information. In particular, check the PatriciaTrie class:
Implementation of a PATRICIA Trie (Practical Algorithm to Retrieve Information Coded in Alphanumeric).
A PATRICIA Trie is a compressed Trie. Instead of storing all data at the edges of the Trie (and having empty internal nodes), PATRICIA stores data in every node. This allows for very efficient traversal, insert, delete, predecessor, successor, prefix, range, and select(Object) operations. All operations are performed at worst in O(K) time, where K is the number of bits in the largest item in the tree. In practice, operations actually take O(A(K)) time, where A(K) is the average number of bits of all items in the tree.
Also check out concurrent-trees. They support both Radix and Suffix trees and are designed for high concurrency environments.
I wrote and published a simple and fast implementation here.
What you need is org.apache.commons.collections.FastTreeMap , I think.
Below is a basic HashMap implementation of a Trie. Some people might find this useful...
class Trie {
HashMap<Character, HashMap> root;
public Trie() {
root = new HashMap<Character, HashMap>();
}
public void addWord(String word) {
HashMap<Character, HashMap> node = root;
for (int i = 0; i < word.length(); i++) {
Character currentLetter = word.charAt(i);
if (node.containsKey(currentLetter) == false) {
node.put(currentLetter, new HashMap<Character, HashMap>());
}
node = node.get(currentLetter);
}
}
public boolean containsPrefix(String word) {
HashMap<Character, HashMap> node = root;
for (int i = 0; i < word.length(); i++) {
Character currentLetter = word.charAt(i);
if (node.containsKey(currentLetter)) {
node = node.get(currentLetter);
} else {
return false;
}
}
return true;
}
}
Apache's commons collections:
org.apache.commons.collections4.trie.PatriciaTrie
You can try the Completely Java library, it features a PatriciaTrie implementation. The API is small and easy to get started, and it's available in the Maven central repository.
You might look at this TopCoder one as well (registration required...).
If you required sorted map, then tries are worthwhile.
If you don't then hashmap is better.
Hashmap with string keys can be improved over the standard Java implementation:
Array hash map
If you're not worried about pulling in the Scala library, you can use this space efficient implementation I wrote of a burst trie.
https://github.com/nbauernfeind/scala-burst-trie
here is my implementation, enjoy it via: GitHub - MyTrie.java
/* usage:
MyTrie trie = new MyTrie();
trie.insert("abcde");
trie.insert("abc");
trie.insert("sadas");
trie.insert("abc");
trie.insert("wqwqd");
System.out.println(trie.contains("abc"));
System.out.println(trie.contains("abcd"));
System.out.println(trie.contains("abcdefg"));
System.out.println(trie.contains("ab"));
System.out.println(trie.getWordCount("abc"));
System.out.println(trie.getAllDistinctWords());
*/
import java.util.*;
public class MyTrie {
private class Node {
public int[] next = new int[26];
public int wordCount;
public Node() {
for(int i=0;i<26;i++) {
next[i] = NULL;
}
wordCount = 0;
}
}
private int curr;
private Node[] nodes;
private List<String> allDistinctWords;
public final static int NULL = -1;
public MyTrie() {
nodes = new Node[100000];
nodes[0] = new Node();
curr = 1;
}
private int getIndex(char c) {
return (int)(c - 'a');
}
private void depthSearchWord(int x, String currWord) {
for(int i=0;i<26;i++) {
int p = nodes[x].next[i];
if(p != NULL) {
String word = currWord + (char)(i + 'a');
if(nodes[p].wordCount > 0) {
allDistinctWords.add(word);
}
depthSearchWord(p, word);
}
}
}
public List<String> getAllDistinctWords() {
allDistinctWords = new ArrayList<String>();
depthSearchWord(0, "");
return allDistinctWords;
}
public int getWordCount(String str) {
int len = str.length();
int p = 0;
for(int i=0;i<len;i++) {
int j = getIndex(str.charAt(i));
if(nodes[p].next[j] == NULL) {
return 0;
}
p = nodes[p].next[j];
}
return nodes[p].wordCount;
}
public boolean contains(String str) {
int len = str.length();
int p = 0;
for(int i=0;i<len;i++) {
int j = getIndex(str.charAt(i));
if(nodes[p].next[j] == NULL) {
return false;
}
p = nodes[p].next[j];
}
return nodes[p].wordCount > 0;
}
public void insert(String str) {
int len = str.length();
int p = 0;
for(int i=0;i<len;i++) {
int j = getIndex(str.charAt(i));
if(nodes[p].next[j] == NULL) {
nodes[curr] = new Node();
nodes[p].next[j] = curr;
curr++;
}
p = nodes[p].next[j];
}
nodes[p].wordCount++;
}
}
I have just tried my own Concurrent TRIE implementation but not based on characters, it is based on HashCode. Still We can use this having Map of Map for each CHAR hascode.
You can test this using the code # https://github.com/skanagavelu/TrieHashMap/blob/master/src/TrieMapPerformanceTest.java
https://github.com/skanagavelu/TrieHashMap/blob/master/src/TrieMapValidationTest.java
import java.util.concurrent.atomic.AtomicReferenceArray;
public class TrieMap {
public static int SIZEOFEDGE = 4;
public static int OSIZE = 5000;
}
abstract class Node {
public Node getLink(String key, int hash, int level){
throw new UnsupportedOperationException();
}
public Node createLink(int hash, int level, String key, String val) {
throw new UnsupportedOperationException();
}
public Node removeLink(String key, int hash, int level){
throw new UnsupportedOperationException();
}
}
class Vertex extends Node {
String key;
volatile String val;
volatile Vertex next;
public Vertex(String key, String val) {
this.key = key;
this.val = val;
}
#Override
public boolean equals(Object obj) {
Vertex v = (Vertex) obj;
return this.key.equals(v.key);
}
#Override
public int hashCode() {
return key.hashCode();
}
#Override
public String toString() {
return key +"#"+key.hashCode();
}
}
class Edge extends Node {
volatile AtomicReferenceArray<Node> array; //This is needed to ensure array elements are volatile
public Edge(int size) {
array = new AtomicReferenceArray<Node>(8);
}
#Override
public Node getLink(String key, int hash, int level){
int index = Base10ToBaseX.getBaseXValueOnAtLevel(Base10ToBaseX.Base.BASE8, hash, level);
Node returnVal = array.get(index);
for(;;) {
if(returnVal == null) {
return null;
}
else if((returnVal instanceof Vertex)) {
Vertex node = (Vertex) returnVal;
for(;node != null; node = node.next) {
if(node.key.equals(key)) {
return node;
}
}
return null;
} else { //instanceof Edge
level = level + 1;
index = Base10ToBaseX.getBaseXValueOnAtLevel(Base10ToBaseX.Base.BASE8, hash, level);
Edge e = (Edge) returnVal;
returnVal = e.array.get(index);
}
}
}
#Override
public Node createLink(int hash, int level, String key, String val) { //Remove size
for(;;) { //Repeat the work on the current node, since some other thread modified this node
int index = Base10ToBaseX.getBaseXValueOnAtLevel(Base10ToBaseX.Base.BASE8, hash, level);
Node nodeAtIndex = array.get(index);
if ( nodeAtIndex == null) {
Vertex newV = new Vertex(key, val);
boolean result = array.compareAndSet(index, null, newV);
if(result == Boolean.TRUE) {
return newV;
}
//continue; since new node is inserted by other thread, hence repeat it.
}
else if(nodeAtIndex instanceof Vertex) {
Vertex vrtexAtIndex = (Vertex) nodeAtIndex;
int newIndex = Base10ToBaseX.getBaseXValueOnAtLevel(Base10ToBaseX.Base.BASE8, vrtexAtIndex.hashCode(), level+1);
int newIndex1 = Base10ToBaseX.getBaseXValueOnAtLevel(Base10ToBaseX.Base.BASE8, hash, level+1);
Edge edge = new Edge(Base10ToBaseX.Base.BASE8.getLevelZeroMask()+1);
if(newIndex != newIndex1) {
Vertex newV = new Vertex(key, val);
edge.array.set(newIndex, vrtexAtIndex);
edge.array.set(newIndex1, newV);
boolean result = array.compareAndSet(index, vrtexAtIndex, edge); //REPLACE vertex to edge
if(result == Boolean.TRUE) {
return newV;
}
//continue; since vrtexAtIndex may be removed or changed to Edge already.
} else if(vrtexAtIndex.key.hashCode() == hash) {//vrtex.hash == hash) { HERE newIndex == newIndex1
synchronized (vrtexAtIndex) {
boolean result = array.compareAndSet(index, vrtexAtIndex, vrtexAtIndex); //Double check this vertex is not removed.
if(result == Boolean.TRUE) {
Vertex prevV = vrtexAtIndex;
for(;vrtexAtIndex != null; vrtexAtIndex = vrtexAtIndex.next) {
prevV = vrtexAtIndex; // prevV is used to handle when vrtexAtIndex reached NULL
if(vrtexAtIndex.key.equals(key)){
vrtexAtIndex.val = val;
return vrtexAtIndex;
}
}
Vertex newV = new Vertex(key, val);
prevV.next = newV; // Within SYNCHRONIZATION since prevV.next may be added with some other.
return newV;
}
//Continue; vrtexAtIndex got changed
}
} else { //HERE newIndex == newIndex1 BUT vrtex.hash != hash
edge.array.set(newIndex, vrtexAtIndex);
boolean result = array.compareAndSet(index, vrtexAtIndex, edge); //REPLACE vertex to edge
if(result == Boolean.TRUE) {
return edge.createLink(hash, (level + 1), key, val);
}
}
}
else { //instanceof Edge
return nodeAtIndex.createLink(hash, (level + 1), key, val);
}
}
}
#Override
public Node removeLink(String key, int hash, int level){
for(;;) {
int index = Base10ToBaseX.getBaseXValueOnAtLevel(Base10ToBaseX.Base.BASE8, hash, level);
Node returnVal = array.get(index);
if(returnVal == null) {
return null;
}
else if((returnVal instanceof Vertex)) {
synchronized (returnVal) {
Vertex node = (Vertex) returnVal;
if(node.next == null) {
if(node.key.equals(key)) {
boolean result = array.compareAndSet(index, node, null);
if(result == Boolean.TRUE) {
return node;
}
continue; //Vertex may be changed to Edge
}
return null; //Nothing found; This is not the same vertex we are looking for. Here hashcode is same but key is different.
} else {
if(node.key.equals(key)) { //Removing the first node in the link
boolean result = array.compareAndSet(index, node, node.next);
if(result == Boolean.TRUE) {
return node;
}
continue; //Vertex(node) may be changed to Edge, so try again.
}
Vertex prevV = node; // prevV is used to handle when vrtexAtIndex is found and to be removed from its previous
node = node.next;
for(;node != null; prevV = node, node = node.next) {
if(node.key.equals(key)) {
prevV.next = node.next; //Removing other than first node in the link
return node;
}
}
return null; //Nothing found in the linked list.
}
}
} else { //instanceof Edge
return returnVal.removeLink(key, hash, (level + 1));
}
}
}
}
class Base10ToBaseX {
public static enum Base {
/**
* Integer is represented in 32 bit in 32 bit machine.
* There we can split this integer no of bits into multiples of 1,2,4,8,16 bits
*/
BASE2(1,1,32), BASE4(3,2,16), BASE8(7,3,11)/* OCTAL*/, /*BASE10(3,2),*/
BASE16(15, 4, 8){
public String getFormattedValue(int val){
switch(val) {
case 10:
return "A";
case 11:
return "B";
case 12:
return "C";
case 13:
return "D";
case 14:
return "E";
case 15:
return "F";
default:
return "" + val;
}
}
}, /*BASE32(31,5,1),*/ BASE256(255, 8, 4), /*BASE512(511,9),*/ Base65536(65535, 16, 2);
private int LEVEL_0_MASK;
private int LEVEL_1_ROTATION;
private int MAX_ROTATION;
Base(int levelZeroMask, int levelOneRotation, int maxPossibleRotation) {
this.LEVEL_0_MASK = levelZeroMask;
this.LEVEL_1_ROTATION = levelOneRotation;
this.MAX_ROTATION = maxPossibleRotation;
}
int getLevelZeroMask(){
return LEVEL_0_MASK;
}
int getLevelOneRotation(){
return LEVEL_1_ROTATION;
}
int getMaxRotation(){
return MAX_ROTATION;
}
String getFormattedValue(int val){
return "" + val;
}
}
public static int getBaseXValueOnAtLevel(Base base, int on, int level) {
if(level > base.getMaxRotation() || level < 1) {
return 0; //INVALID Input
}
int rotation = base.getLevelOneRotation();
int mask = base.getLevelZeroMask();
if(level > 1) {
rotation = (level-1) * rotation;
mask = mask << rotation;
} else {
rotation = 0;
}
return (on & mask) >>> rotation;
}
}

Categories

Resources