Related
I am trying to build a simple implementation of the HashMap class in Java, for learning purposes. I know how rehashing works (Rehashing process in hashmap or hashtable).
When rehashing, all the elements present in the internal array are identified and where they go in the new array can be determined by recomputing their hashes based on the new hash function. However, how are all the elements present in the array identified?
Is there some kind of mechanism which keeps track of all keys, or there is a mechanism which keeps track of the indexes in the internal array which contain elements?
The alternative (which I used in my implementation) would be to scan the entire array for elements. This might be inefficient however as a lot of time would be wasted scanning empty buckets. Is there a better way?
Here is my implementation. The focus here is the rehash(int) function.
public class HashMap<T, U> {
private static final int MIN_CAPACITY = 16;
private static final double LOAD_FACTOR = 0.75;
private int mCount = 0;
private HashMapItem<T, U>[] mArray = (HashMapItem<T, U>[]) new HashMapItem[MIN_CAPACITY];
public HashMap() {
}
private void rehash(int newCapacity) {
HashMapItem<T, U>[] newArray = (HashMapItem<T, U>[]) new HashMapItem[newCapacity];
for (HashMapItem<T, U> hashMapItem : mArray) {
if (hashMapItem != null) {
HashMapItem<T, U> currentNode = hashMapItem;
while (currentNode != null) {
putInArray(currentNode.key, currentNode.value, newArray);
currentNode = currentNode.next;
}
}
}
mArray = newArray;
}
private int hashFunction(T key, int arrayCapacity) {
return Math.abs(key.hashCode()) % arrayCapacity;
}
private boolean putInArray(T key, U value, HashMapItem<T, U>[] array) {
boolean duplicateKey = false;
int index = hashFunction(key, array.length);
HashMapItem<T, U> hashMapItem = array[index];
if (hashMapItem == null) array[index] = new HashMapItem<T, U>(key, value);
else {
HashMapItem<T, U> currentNode = hashMapItem;
while (true) {
if (currentNode.key.equals(key)) {
currentNode.value = value;
duplicateKey = true;
break;
}
else if (currentNode.next != null) currentNode = currentNode.next;
else break;
}
if (!duplicateKey) currentNode.next = new HashMapItem<T, U>(key, value);
}
return duplicateKey;
}
public void put(T key, U value) {
if (mCount >= mArray.length * LOAD_FACTOR) rehash(mArray.length << 1);
boolean duplicateKey = putInArray(key, value, mArray);
if (!duplicateKey) mCount++;
}
public U get(T key) {
int index = hashFunction(key, mArray.length);
HashMapItem<T, U> hashMapItem = mArray[index];
if (hashMapItem != null) {
HashMapItem<T, U> currentNode = hashMapItem;
while (currentNode != null) {
if (currentNode.key.equals(key)) return currentNode.value;
currentNode = currentNode.next;
}
}
return null;
}
public U remove(T key) {
U removedItem = null;
int index = hashFunction(key, mArray.length);
HashMapItem<T, U> hashMapItem = mArray[index];
if (hashMapItem != null) {
HashMapItem<T, U> currentNode = hashMapItem;
HashMapItem<T, U> previousNode = null;
while (currentNode != null) {
if (currentNode.key.equals(key)) {
removedItem = currentNode.value;
if (previousNode == null) mArray[index] = currentNode.next;
else previousNode.next = currentNode.next;
break;
}
previousNode = currentNode;
currentNode = currentNode.next;
}
}
if (removedItem != null) mCount--;
return removedItem;
}
public int count() {
return mCount;
}
private class HashMapItem<T, U> {
T key;
U value;
HashMapItem<T, U> next;
public HashMapItem(T key, U value) {
this.key = key;
this.value = value;
}
}
}
There are two approaches to this problem:
Maintain a linked list-like structure of non-empty buckets - this could be done reasonably efficiently. It could also give you predictability on iteration, similar to LinkedHashMap, or
Scan all locations on re-hashing - this is exactly what you are doing.
Effectively, the choice boils down to paying with memory for reduced use of CPU. If you must iterate the hash map often, the first solution is better. If you do it only when you rehash, the second solution is better, because rehashing happens only when your map is relatively full. In other words, the majority of checks during the scan are going to succeed.
I was doing this exercice:
Write code to partition a linked list around a value x, such that all nodes less than x come before all nodes greater than or equal to x. Example input: 3 -> 5 -> 8 -> 5 -> 10 -> 2 -> 1 output: 3 -> 1 -> 2 -> 10 -> 5 -> 5 -> 8
I found it hard to find a solution for Singly linked list (that created by my own, not using library), I would like to know if there is uncessary code blocks in my code and is there a way to avoid putting in two lists and then merge? because it seems to have very slow performance like that.
public CustomLinkedList partition(CustomLinkedList list, int x) {
CustomLinkedList beforeL = new CustomLinkedList();
CustomLinkedList afterL = new CustomLinkedList();
LinkedListNode current = list.getHead();
while (current != null) {
if (current.getData() < x) {
addToLinkedList(beforeL, current.getData());
} else {
addToLinkedList(afterL, current.getData());
}
// increment current
current = current.getNext();
}
if (beforeL.getHead() == null)
return afterL;
mergeLinkedLists(beforeL, afterL);
return beforeL;
}
public void addToLinkedList(CustomLinkedList list, int value) {
LinkedListNode newEnd = new LinkedListNode(value);
LinkedListNode cur = list.getHead();
if (cur == null)
list.setHead(newEnd);
else {
while (cur.getNext() != null) {
cur = cur.getNext();
}
cur.setNext(newEnd);
cur = newEnd;
}
}
public void mergeLinkedLists(CustomLinkedList list1, CustomLinkedList list2) {
LinkedListNode start = list1.getHead();
LinkedListNode prev = null;
while (start != null) {
prev = start;
start = start.getNext();
}
prev.setNext(list2.getHead());
}
CustumLinkedList contains two attributes: -LinkedListNode which is the head and an int which is the size.
LinkedListNode contains two attributes: One of type LinkedListNode pointing to next node and one of type int: data value
Thank you.
The problem of your code is not merging two lists as you mentioned. It's wrong to use the word merge here because you're only linking up the tail of the left list with head of right list which is a constant time operation.
The real problem is - on inserting a new element on the left or right list, you are iterating from head to tail every time which yields in-total O(n^2) operation and is definitely slow.
Here I've wrote a simpler version and avoid iterating every time from head to insert a new item by keeping track of the current tail.
The code is very simple and is definitely faster than yours(O(n)). Let me know if you need explanation on any part.
// I don't know how your CustomLinkedList is implemented. Here I wrote a simple LinkedList node
public class ListNode {
private int val;
private ListNode next;
public ListNode(int x) {
val = x;
}
public int getValue() {
return this.val;
}
public ListNode getNext() {
return this.next;
}
public void setNext(ListNode next) {
this.next = next;
}
}
public ListNode partition(ListNode head, int x) {
if(head == null) return null;
ListNode left = null;
ListNode right = null;
ListNode iterL = left;
ListNode iterR = right;
while(iter != null) {
if(iter.getValue() < x) {
iterL = addNode(iterL, iter.getValue());
}
else {
iterR = addNode(iterR, iter.getValue());
}
iter = iter.getNext();
}
// link up the left and right list
iterL.setNext(iterR);
return left;
}
public ListNode addNode(ListNode curr, int value) {
ListNode* newNode = new ListNode(value);
if(curr == null) {
curr = newNode;
} else {
curr.setNext(newNode);
curr = curr.getNext();
}
return curr;
}
Hope it helps!
If you have any list of data, access orderByX Method.
Hope it would help you.
public class OrderByX {
Nodes root = null;
OrderByX() {
root = null;
}
void create(int[] array, int k) {
for (int i = 0; i < array.length; ++i) {
root = insert(root, array[i]);
}
}
Nodes insert(Nodes root, int data) {
if (root == null) {
root = new Nodes(data);
} else {
Nodes tempNew = new Nodes(data);
tempNew.setNext(root);
root = tempNew;
}
return root;
}
void display() {
Nodes tempNode = root;
while (tempNode != null) {
System.out.print(tempNode.getData() + ", ");
tempNode = tempNode.getNext();
}
}
void displayOrder(Nodes root) {
if (root == null) {
return;
} else {
displayOrder(root.getNext());
System.out.print(root.getData() + ", ");
}
}
Nodes orderByX(Nodes root, int x) {
Nodes resultNode = null;
Nodes lessNode = null;
Nodes greatNode = null;
Nodes midNode = null;
while (root != null) {
if (root.getData() < x) {
if (lessNode == null) {
lessNode = root;
root = root.getNext();
lessNode.setNext(null);
} else {
Nodes temp = root.getNext();
root.setNext(lessNode);
lessNode = root;
root = temp;
}
} else if (root.getData() > x) {
if (greatNode == null) {
greatNode = root;
root = root.getNext();
greatNode.setNext(null);
} else {
Nodes temp = root.getNext();
root.setNext(greatNode);
greatNode = root;
root = temp;
}
} else {
if (midNode == null) {
midNode = root;
root = root.getNext();
midNode.setNext(null);
} else {
Nodes temp = root.getNext();
root.setNext(midNode);
midNode = root;
root = temp;
}
}
}
resultNode = lessNode;
while (lessNode.getNext() != null) {
lessNode = lessNode.getNext();
}
lessNode.setNext(midNode);
while (midNode.getNext() != null) {
midNode = midNode.getNext();
}
midNode.setNext(greatNode);
return resultNode;
}
public static void main(String... args) {
int[] array = { 7, 1, 6, 2, 8 };
OrderByX obj = new OrderByX();
obj.create(array, 0);
obj.display();
System.out.println();
obj.displayOrder(obj.root);
System.out.println();
obj.root = obj.orderByX(obj.root, 2);
obj.display();
}
}
class Nodes {
private int data;
private Nodes next;
Nodes(int data) {
this.data = data;
this.next = null;
}
public Nodes getNext() {
return next;
}
public void setNext(Nodes next) {
this.next = next;
}
public int getData() {
return data;
}
public void setData(int data) {
this.data = data;
}
}
I think that maintaining two lists is not an issue. It is possible to use a single list, but at the cost of loosing some of the simplicity.
The principal problem seems to be the addToLinkedList(CustomLinkedList list, int value) method.
It iterates throughout the entire list in order to add a new element.
One alternative is to always add elements at the front of the list. This would also produce a valid solution, and would run faster.
I'm trying to write a code that handles a linked list. This linked list is somehow different because it's a key-value pair linked list (singly). The linked list should provide the user with basic functions, such as retrieving the size of linked list, checking for a key whether it's in the list or not, insertion, and deletion. It seems that all the functions are working properly except for the deletion. The code for the deletion method run correctly with no run time error, but it gives me results that's not what I wanted to have. Here is my code:
public class SequentialSearch<Key,Value> {
private int N; // number of key-value pairs
private Node head; // the linked list of key-value pairs
private Node tail;
// a helper linked list data type
private class Node {
private Key key;
private Value val;
private Node next;
public Node(Key key, Value val, Node next) {
this.key = key;
this.val = val;
this.next = next;
}
public void setNext(Node next) {
this.next = next;
}
}
public SequentialSearch() {
}
public int size() {
if (head == null)
return 0;
else {
Node x = head;
while (x.next != null) {
N++;
x = x.next;
}
}
return N;
}
public boolean isEmpty() {
return size() == 0;
}
public boolean contains(Key key) {
return get(key) != null;
}
public Value get(Key key) {
for (Node x = head; x != null; x = x.next) {
if (key.equals(x.key))
return x.val;
}
return null;
}
public void put(Key key, Value val) {
if (val == null) {
delete(key);
return;
}
for (Node x = first; x != null; x = x.next) {
if (key.equals(x.key)) {
x.val = val;
return;
}
}
first = new Node(key, val, first);
N++;
}
public boolean delete(Key key) {
Node curr = head;
Node prev = null;
boolean result = false;
if(isEmpty())
System.err.println("Error: The list is empty.");
while(curr != null) {
if(key.equals(curr.key)) {
if(curr.equals(head)) {
head = curr = null;
N--;
result = true;
return result;
} else {
prev.next = curr.next;
curr.setNext(null);
N--;
result = true;
return result;
}
} else {
prev = curr;
curr = curr.next;
}
}
return result;
}
}
I've written a main program to test for the add (put) and deletion, but it seems to be working for the insertion but not for the deletion. I think I might have a problem the deletion in case there is only one node in the list and the case of deleting a node from the middle of the list.
I'm also trying to modify the deletion method by writing a new method using the recursion, but I faced some errors -also logically. Here is the code of that function:
public void delete(Key key) {
first = delete(first, key);
}
private Node delete(Node x, Key key) {
if (x == null) return null;
if (key.equals(x.key)) {
N--;
return x.next;
}
x.next = delete(x.next, key);
return x;
}
Could you please tell me what did I do wrong?
head = curr = null;
This statement is a little confusing but I think it might be your problem for when deleting. If you're deleting a match that is at 'head', you just want to set head to 'curr.next'
What I think is going on:
delete(a)
a -> b -> c -> d
head = a
curr = a
curr.next = b
you set head to null, but head needs to point to the first item in the new list, which if you deleted 'a', would be 'b', or curr.next
Least Frequently Used (LFU) is a type of cache algorithm used to manage memory within a computer. The standard characteristics of this method involve the system keeping track of the number of times a block is referenced in memory. When the cache is full and requires more room the system will purge the item with the lowest reference frequency.
What would be the best way to implement a most-recently-used cache of objects, say in Java?
I've already implemented one using LinkedHashMap(by maintaining the no. of times objects are accessed) But I'm curious if any of the new concurrent collections would be better candidates.
Consider this case : Suppose cache is full and we need to make space for another one. Say two objects are noted in cache which are accessed for one time only. Which one to remove if we come to know that other(which is not in cache)object is being accessed for more than once ?
Thanks!
You might benefit from the LFU implementation of ActiveMQ: LFUCache
They have provided some good functionality.
I think, the LFU data structure must combine priority queue (for maintaining fast access to lfu item) and hash map (for providing fast access to any item by its key); I would suggest the following node definition for each object stored in cache:
class Node<T> {
// access key
private int key;
// counter of accesses
private int numAccesses;
// current position in pq
private int currentPos;
// item itself
private T item;
//getters, setters, constructors go here
}
You need key for referring to an item.
You need numAccesses as a key for priority queue.
You need currentPos to be able to quickly find a pq position of item by key.
Now you organize hash map (key(Integer) -> node(Node<T>)) to quickly access items and min heap-based priority queue using number of accesses as priority. Now you can very quickly perform all operations (access, add new item, update number of acceses, remove lfu). You need to write each operation carefully, so that it maintains all the nodes consistent (their number of accesses, their position in pq and there existence in hash map). All operations will work with constant average time complexity which is what you expect from cache.
According to me, the best way to implement a most-recently-used cache of objects would be to include a new variable as 'latestTS' for each object. TS stands for timestamp.
// A static method that returns the current date and time as milliseconds since January 1st 1970
long latestTS = System.currentTimeMillis();
ConcurrentLinkedHashMap is not yet implemented in Concurrent Java Collections.
(Ref: Java Concurrent Collection API). However, you can try and use ConcurrentHashMap and DoublyLinkedList
About the case to be considered: in such case, as I have said that you can declare latestTS variable, based upon the value of latestTS variable, you can remove an entry and add the new object. (Don't forget to update frequency and latestTS of the new object added)
As you have mentioned, you can use LinkedHashMap as it gives element access in O(1) and also, you get the order traversal.
Please, find the below code for LFU Cache:
(PS: The below code is the answer for the question in the title i.e. "How to implement LFU cache")
import java.util.LinkedHashMap;
import java.util.Map;
public class LFUCache {
class CacheEntry
{
private String data;
private int frequency;
// default constructor
private CacheEntry()
{}
public String getData() {
return data;
}
public void setData(String data) {
this.data = data;
}
public int getFrequency() {
return frequency;
}
public void setFrequency(int frequency) {
this.frequency = frequency;
}
}
private static int initialCapacity = 10;
private static LinkedHashMap<Integer, CacheEntry> cacheMap = new LinkedHashMap<Integer, CacheEntry>();
/* LinkedHashMap is used because it has features of both HashMap and LinkedList.
* Thus, we can get an entry in O(1) and also, we can iterate over it easily.
* */
public LFUCache(int initialCapacity)
{
this.initialCapacity = initialCapacity;
}
public void addCacheEntry(int key, String data)
{
if(!isFull())
{
CacheEntry temp = new CacheEntry();
temp.setData(data);
temp.setFrequency(0);
cacheMap.put(key, temp);
}
else
{
int entryKeyToBeRemoved = getLFUKey();
cacheMap.remove(entryKeyToBeRemoved);
CacheEntry temp = new CacheEntry();
temp.setData(data);
temp.setFrequency(0);
cacheMap.put(key, temp);
}
}
public int getLFUKey()
{
int key = 0;
int minFreq = Integer.MAX_VALUE;
for(Map.Entry<Integer, CacheEntry> entry : cacheMap.entrySet())
{
if(minFreq > entry.getValue().frequency)
{
key = entry.getKey();
minFreq = entry.getValue().frequency;
}
}
return key;
}
public String getCacheEntry(int key)
{
if(cacheMap.containsKey(key)) // cache hit
{
CacheEntry temp = cacheMap.get(key);
temp.frequency++;
cacheMap.put(key, temp);
return temp.data;
}
return null; // cache miss
}
public static boolean isFull()
{
if(cacheMap.size() == initialCapacity)
return true;
return false;
}
}
Here's the o(1) implementation for LFU - http://dhruvbird.com/lfu.pdf
I have tried to implement this below LFU cache implementation. Took reference from this -
LFU paper. My implementation is working nicely.
If anyone wants to provide any further suggestion to improve it again, please let me know.
import java.util.HashMap;
import java.util.Map;
import java.util.Objects;
import java.util.TreeMap;
public class LFUCacheImplementation {
private Map<Integer, Node> cache = new HashMap<>();
private Map<Integer, Integer> counts = new HashMap<>();
private TreeMap<Integer, DoublyLinkedList> frequencies = new TreeMap<>();
private final int CAPACITY;
public LFUCache(int capacity) {
this.CAPACITY = capacity;
}
public int get(int key) {
if (!cache.containsKey(key)) {
return -1;
}
Node node = cache.get(key);
int frequency = counts.get(key);
frequencies.get(frequency).remove(new Node(node.key(), node.value()));
removeFreq(frequency);
frequencies.computeIfAbsent(frequency + 1, k -> new DoublyLinkedList()).add(new Node(node.key(), node.value()));
counts.put(key, frequency + 1);
return cache.get(key).value();
}
public void set(int key, int value) {
if (!cache.containsKey(key)) {
Node node = new Node(key, value);
if (cache.size() == CAPACITY) {
int l_count = frequencies.firstKey();
Node deleteThisNode = frequencies.get(l_count).head();
frequencies.get(l_count).remove(deleteThisNode);
int deleteThisKey = deleteThisNode.key();
removeFreq(l_count);
cache.remove(deleteThisKey);
counts.remove(deleteThisKey);
}
cache.put(key, node);
counts.put(key, 1);
frequencies.computeIfAbsent(1, k -> new DoublyLinkedList()).add(node);
}
}
private void removeFreq(int frequency) {
if (frequencies.get(frequency).size() == 0) {
frequencies.remove(frequency);
}
}
public Map<Integer, Node> getCache() {
return cache;
}
public Map<Integer, Integer> getCounts() {
return counts;
}
public TreeMap<Integer, DoublyLinkedList> getFrequencies() {
return frequencies;
}
}
class Node {
private int key;
private int value;
private Node next;
private Node prev;
public Node(int key, int value) {
this.key = key;
this.value = value;
}
public Node getNext() {
return next;
}
public void setNext(Node next) {
this.next = next;
}
public Node getPrev() {
return prev;
}
public void setPrev(Node prev) {
this.prev = prev;
}
public int key() {
return key;
}
public int value() {
return value;
}
#Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof Node)) return false;
Node node = (Node) o;
return key == node.key &&
value == node.value;
}
#Override
public int hashCode() {
return Objects.hash(key, value);
}
#Override
public String toString() {
return "Node{" +
"key=" + key +
", value=" + value +
'}';
}
}
class DoublyLinkedList {
private int size;
private Node head;
private Node tail;
public void add(Node node) {
if (null == head) {
head = node;
} else {
tail.setNext(node);
node.setPrev(tail);
}
tail = node;
size++;
}
public void remove(Node node) {
if(null == head || null == node) {
return;
}
if(this.size() == 1 && head.equals(node)) {
head = null;
tail = null;
} else if (head.equals(node)) {
head = node.getNext();
head.setPrev(null);
} else if (tail.equals(node)) {
Node prevToTail = tail.getPrev();
prevToTail.setNext(null);
tail = prevToTail;
} else {
Node current = head.getNext();
while(!current.equals(tail)) {
if(current.equals(node)) {
Node prevToCurrent = current.getPrev();
Node nextToCurrent = current.getNext();
prevToCurrent.setNext(nextToCurrent);
nextToCurrent.setPrev(prevToCurrent);
break;
}
current = current.getNext();
}
}
size--;
}
public Node head() {
return head;
}
public int size() {
return size;
}
}
Client code to use the above cache implementation -
import java.util.Map;
public class Client {
public static void main(String[] args) {
Client client = new Client();
LFUCache cache = new LFUCache(4);
cache.set(11, function(11));
cache.set(12, function(12));
cache.set(13, function(13));
cache.set(14, function(14));
cache.set(15, function(15));
client.print(cache.getFrequencies());
cache.get(13);
cache.get(13);
cache.get(13);
cache.get(14);
cache.get(14);
cache.get(14);
cache.get(14);
client.print(cache.getCache());
client.print(cache.getCounts());
client.print(cache.getFrequencies());
}
public void print(Map<Integer, ? extends Object> map) {
for(Map.Entry<Integer, ? extends Object> entry : map.entrySet()) {
if(entry.getValue() instanceof Node) {
System.out.println("Cache Key => "+entry.getKey()+", Cache Value => "+((Node) entry.getValue()).toString());
} else if (entry.getValue() instanceof DoublyLinkedList) {
System.out.println("Frequency Key => "+entry.getKey()+" Frequency Values => [");
Node head = ((DoublyLinkedList) entry.getValue()).head();
while(null != head) {
System.out.println(head.toString());
head = head.getNext();
}
System.out.println(" ]");
} else {
System.out.println("Count Key => "+entry.getKey()+", Count Value => "+entry.getValue());
}
}
}
public static int function(int key) {
int prime = 31;
return key*prime;
}
}
How about a priority queue? You can keep elements sorted there with keys representing the frequency. Just update the object position in the queue after visiting it. You can update just from time to time for optimizing the performance (but reducing precision).
Many implementations I have seen have runtime complexity O(log(n)). This means, when the cache size is n, the time needed to insert/remove an element into/from chache is logarithmic. Such implementations use usually a min heap to maintain usage frequencies of elements. The root of the heap contains the element with lowest frequency, and can be accessed in O(1) time. But to maintain the heap property we have to move an element, every time it is used (and frequency is incremented) inside of the heap, to place it into proper position, or when we have to insert new element into the cache (and so put it into the heap).
But the runtime complexity can be reduced to O(1), when we maintain a hashmap (Java) or unordered_map (C++) with the element as key. Additinally we need two sorts of lists, frequency list and elements lists. The elements lists contain elements that have same frequency, and the frequency list contain the element lists.
frequency list
1 3 6 7
a k y x
c l z
m n
Here in the example we see the frequency list that has 4 elements (4 elements lists). The element list 1 contains elements (a,c,m), the elements list 3 contains elements (k, l, n) etc.
Now, when we use say element y, we have to increment its frequency and put it in the next list. Because the elements list with frequency 6 becomes empty, we delete it. The result is:
frequency list
1 3 7
a k y
c l x
m n z
We place the element y in the begin of the elements list 7. When we have to remove elements from the list later, we will start from the end (first z, then x and then y).
Now, when we use element n, we have to increment its frequency and put it into the new list, with frequencies 4:
frequency list
1 3 4 7
a k n y
c l x
m z
I hope the idea is clear. I provide now my C++ implementation of the LFU cache, and will add later a Java implementation.
The class has just 2 public methods, void set(key k, value v)
and bool get(key k, value &v). In the get method the value to retrieve will be set per reference when the element is found, in this case the method returns true. When the element is not found, the method returns false.
#include<unordered_map>
#include<list>
using namespace std;
typedef unsigned uint;
template<typename K, typename V = K>
struct Entry
{
K key;
V value;
};
template<typename K, typename V = K>
class LFUCache
{
typedef typename list<typename Entry<K, V>> ElementList;
typedef typename list <pair <uint, ElementList>> FrequencyList;
private:
unordered_map <K, pair<typename FrequencyList::iterator, typename ElementList::iterator>> cacheMap;
FrequencyList elements;
uint maxSize;
uint curSize;
void incrementFrequency(pair<typename FrequencyList::iterator, typename ElementList::iterator> p) {
if (p.first == prev(elements.end())) {
//frequency list contains single list with some frequency, create new list with incremented frequency (p.first->first + 1)
elements.push_back({ p.first->first + 1, { {p.second->key, p.second->value} } });
// erase and insert the key with new iterator pair
cacheMap[p.second->key] = { prev(elements.end()), prev(elements.end())->second.begin() };
}
else {
// there exist element(s) with higher frequency
auto pos = next(p.first);
if (p.first->first + 1 == pos->first)
// same frequency in the next list, add the element in the begin
pos->second.push_front({ p.second->key, p.second->value });
else
// insert new list before next list
pos = elements.insert(pos, { p.first->first + 1 , {{p.second->key, p.second->value}} });
// update cachMap iterators
cacheMap[p.second->key] = { pos, pos->second.begin() };
}
// if element list with old frequency contained this singe element, erase the list from frequency list
if (p.first->second.size() == 1)
elements.erase(p.first);
else
// erase only the element with updated frequency from the old list
p.first->second.erase(p.second);
}
void eraseOldElement() {
if (elements.size() > 0) {
auto key = prev(elements.begin()->second.end())->key;
if (elements.begin()->second.size() < 2)
elements.erase(elements.begin());
else
elements.begin()->second.erase(prev(elements.begin()->second.end()));
cacheMap.erase(key);
curSize--;
}
}
public:
LFUCache(uint size) {
if (size > 0)
maxSize = size;
else
maxSize = 10;
curSize = 0;
}
void set(K key, V value) {
auto entry = cacheMap.find(key);
if (entry == cacheMap.end()) {
if (curSize == maxSize)
eraseOldElement();
if (elements.begin() == elements.end()) {
elements.push_front({ 1, { {key, value} } });
}
else if (elements.begin()->first == 1) {
elements.begin()->second.push_front({ key,value });
}
else {
elements.push_front({ 1, { {key, value} } });
}
cacheMap.insert({ key, {elements.begin(), elements.begin()->second.begin()} });
curSize++;
}
else {
entry->second.second->value = value;
incrementFrequency(entry->second);
}
}
bool get(K key, V &value) {
auto entry = cacheMap.find(key);
if (entry == cacheMap.end())
return false;
value = entry->second.second->value;
incrementFrequency(entry->second);
return true;
}
};
Here are examples of usage:
int main()
{
LFUCache<int>cache(3); // cache of size 3
cache.set(1, 1);
cache.set(2, 2);
cache.set(3, 3);
cache.set(2, 4);
rc = cache.get(1, r);
assert(rc);
assert(r == 1);
// evict old element, in this case 3
cache.set(4, 5);
rc = cache.get(3, r);
assert(!rc);
rc = cache.get(4, r);
assert(rc);
assert(r == 5);
LFUCache<int, string>cache2(2);
cache2.set(1, "one");
cache2.set(2, "two");
string val;
rc = cache2.get(1, val);
if (rc)
assert(val == "one");
else
assert(false);
cache2.set(3, "three"); // evict 2
rc = cache2.get(2, val);
assert(rc == false);
rc = cache2.get(3, val);
assert(rc);
assert(val == "three");
}
Here is a simple implementation of LFU cache in Go/Golang based on here.
import "container/list"
type LFU struct {
cache map[int]*list.Element
freqQueue map[int]*list.List
cap int
maxFreq int
lowestFreq int
}
type entry struct {
key, val int
freq int
}
func NewLFU(capacity int) *LFU {
return &LFU{
cache: make(map[int]*list.Element),
freqQueue: make(map[int]*list.List),
cap: capacity,
maxFreq: capacity - 1,
lowestFreq: 0,
}
}
// O(1)
func (c *LFU) Get(key int) int {
if e, ok := c.cache[key]; ok {
val := e.Value.(*entry).val
c.updateEntry(e, val)
return val
}
return -1
}
// O(1)
func (c *LFU) Put(key int, value int) {
if e, ok := c.cache[key]; ok {
c.updateEntry(e, value)
} else {
if len(c.cache) == c.cap {
c.evict()
}
if c.freqQueue[0] == nil {
c.freqQueue[0] = list.New()
}
e := c.freqQueue[0].PushFront(&entry{key, value, 0})
c.cache[key] = e
c.lowestFreq = 0
}
}
func (c *LFU) updateEntry(e *list.Element, val int) {
key := e.Value.(*entry).key
curFreq := e.Value.(*entry).freq
c.freqQueue[curFreq].Remove(e)
delete(c.cache, key)
nextFreq := curFreq + 1
if nextFreq > c.maxFreq {
nextFreq = c.maxFreq
}
if c.lowestFreq == curFreq && c.freqQueue[curFreq].Len() == 0 {
c.lowestFreq = nextFreq
}
if c.freqQueue[nextFreq] == nil {
c.freqQueue[nextFreq] = list.New()
}
newE := c.freqQueue[nextFreq].PushFront(&entry{key, val, nextFreq})
c.cache[key] = newE
}
func (c *LFU) evict() {
back := c.freqQueue[c.lowestFreq].Back()
delete(c.cache, back.Value.(*entry).key)
c.freqQueue[c.lowestFreq].Remove(back)
}
Closed. This question does not meet Stack Overflow guidelines. It is not currently accepting answers.
We don’t allow questions seeking recommendations for books, tools, software libraries, and more. You can edit the question so it can be answered with facts and citations.
Closed 2 years ago.
Improve this question
I have a Java program that stores a lot of mappings from Strings to various objects.
Right now, my options are either to rely on hashing (via HashMap) or on binary searches (via TreeMap). I am wondering if there is an efficient and standard trie-based map implementation in a popular and quality collections library?
I've written my own in the past, but I'd rather go with something standard, if available.
Quick clarification: While my question is general, in the current project I am dealing with a lot of data that is indexed by fully-qualified class name or method signature. Thus, there are many shared prefixes.
You might want to look at the Trie implementation that Limewire is contributing to the Google Guava.
There is no trie data structure in the core Java libraries.
This may be because tries are usually designed to store character strings, while Java data structures are more general, usually holding any Object (defining equality and a hash operation), though they are sometimes limited to Comparable objects (defining an order). There's no common abstraction for "a sequence of symbols," although CharSequence is suitable for character strings, and I suppose you could do something with Iterable for other types of symbols.
Here's another point to consider: when trying to implement a conventional trie in Java, you are quickly confronted with the fact that Java supports Unicode. To have any sort of space efficiency, you have to restrict the strings in your trie to some subset of symbols, or abandon the conventional approach of storing child nodes in an array indexed by symbol. This might be another reason why tries are not considered general-purpose enough for inclusion in the core library, and something to watch out for if you implement your own or use a third-party library.
Apache Commons Collections v4.0 now supports trie structures.
See the org.apache.commons.collections4.trie package info for more information. In particular, check the PatriciaTrie class:
Implementation of a PATRICIA Trie (Practical Algorithm to Retrieve Information Coded in Alphanumeric).
A PATRICIA Trie is a compressed Trie. Instead of storing all data at the edges of the Trie (and having empty internal nodes), PATRICIA stores data in every node. This allows for very efficient traversal, insert, delete, predecessor, successor, prefix, range, and select(Object) operations. All operations are performed at worst in O(K) time, where K is the number of bits in the largest item in the tree. In practice, operations actually take O(A(K)) time, where A(K) is the average number of bits of all items in the tree.
Also check out concurrent-trees. They support both Radix and Suffix trees and are designed for high concurrency environments.
I wrote and published a simple and fast implementation here.
What you need is org.apache.commons.collections.FastTreeMap , I think.
Below is a basic HashMap implementation of a Trie. Some people might find this useful...
class Trie {
HashMap<Character, HashMap> root;
public Trie() {
root = new HashMap<Character, HashMap>();
}
public void addWord(String word) {
HashMap<Character, HashMap> node = root;
for (int i = 0; i < word.length(); i++) {
Character currentLetter = word.charAt(i);
if (node.containsKey(currentLetter) == false) {
node.put(currentLetter, new HashMap<Character, HashMap>());
}
node = node.get(currentLetter);
}
}
public boolean containsPrefix(String word) {
HashMap<Character, HashMap> node = root;
for (int i = 0; i < word.length(); i++) {
Character currentLetter = word.charAt(i);
if (node.containsKey(currentLetter)) {
node = node.get(currentLetter);
} else {
return false;
}
}
return true;
}
}
Apache's commons collections:
org.apache.commons.collections4.trie.PatriciaTrie
You can try the Completely Java library, it features a PatriciaTrie implementation. The API is small and easy to get started, and it's available in the Maven central repository.
You might look at this TopCoder one as well (registration required...).
If you required sorted map, then tries are worthwhile.
If you don't then hashmap is better.
Hashmap with string keys can be improved over the standard Java implementation:
Array hash map
If you're not worried about pulling in the Scala library, you can use this space efficient implementation I wrote of a burst trie.
https://github.com/nbauernfeind/scala-burst-trie
here is my implementation, enjoy it via: GitHub - MyTrie.java
/* usage:
MyTrie trie = new MyTrie();
trie.insert("abcde");
trie.insert("abc");
trie.insert("sadas");
trie.insert("abc");
trie.insert("wqwqd");
System.out.println(trie.contains("abc"));
System.out.println(trie.contains("abcd"));
System.out.println(trie.contains("abcdefg"));
System.out.println(trie.contains("ab"));
System.out.println(trie.getWordCount("abc"));
System.out.println(trie.getAllDistinctWords());
*/
import java.util.*;
public class MyTrie {
private class Node {
public int[] next = new int[26];
public int wordCount;
public Node() {
for(int i=0;i<26;i++) {
next[i] = NULL;
}
wordCount = 0;
}
}
private int curr;
private Node[] nodes;
private List<String> allDistinctWords;
public final static int NULL = -1;
public MyTrie() {
nodes = new Node[100000];
nodes[0] = new Node();
curr = 1;
}
private int getIndex(char c) {
return (int)(c - 'a');
}
private void depthSearchWord(int x, String currWord) {
for(int i=0;i<26;i++) {
int p = nodes[x].next[i];
if(p != NULL) {
String word = currWord + (char)(i + 'a');
if(nodes[p].wordCount > 0) {
allDistinctWords.add(word);
}
depthSearchWord(p, word);
}
}
}
public List<String> getAllDistinctWords() {
allDistinctWords = new ArrayList<String>();
depthSearchWord(0, "");
return allDistinctWords;
}
public int getWordCount(String str) {
int len = str.length();
int p = 0;
for(int i=0;i<len;i++) {
int j = getIndex(str.charAt(i));
if(nodes[p].next[j] == NULL) {
return 0;
}
p = nodes[p].next[j];
}
return nodes[p].wordCount;
}
public boolean contains(String str) {
int len = str.length();
int p = 0;
for(int i=0;i<len;i++) {
int j = getIndex(str.charAt(i));
if(nodes[p].next[j] == NULL) {
return false;
}
p = nodes[p].next[j];
}
return nodes[p].wordCount > 0;
}
public void insert(String str) {
int len = str.length();
int p = 0;
for(int i=0;i<len;i++) {
int j = getIndex(str.charAt(i));
if(nodes[p].next[j] == NULL) {
nodes[curr] = new Node();
nodes[p].next[j] = curr;
curr++;
}
p = nodes[p].next[j];
}
nodes[p].wordCount++;
}
}
I have just tried my own Concurrent TRIE implementation but not based on characters, it is based on HashCode. Still We can use this having Map of Map for each CHAR hascode.
You can test this using the code # https://github.com/skanagavelu/TrieHashMap/blob/master/src/TrieMapPerformanceTest.java
https://github.com/skanagavelu/TrieHashMap/blob/master/src/TrieMapValidationTest.java
import java.util.concurrent.atomic.AtomicReferenceArray;
public class TrieMap {
public static int SIZEOFEDGE = 4;
public static int OSIZE = 5000;
}
abstract class Node {
public Node getLink(String key, int hash, int level){
throw new UnsupportedOperationException();
}
public Node createLink(int hash, int level, String key, String val) {
throw new UnsupportedOperationException();
}
public Node removeLink(String key, int hash, int level){
throw new UnsupportedOperationException();
}
}
class Vertex extends Node {
String key;
volatile String val;
volatile Vertex next;
public Vertex(String key, String val) {
this.key = key;
this.val = val;
}
#Override
public boolean equals(Object obj) {
Vertex v = (Vertex) obj;
return this.key.equals(v.key);
}
#Override
public int hashCode() {
return key.hashCode();
}
#Override
public String toString() {
return key +"#"+key.hashCode();
}
}
class Edge extends Node {
volatile AtomicReferenceArray<Node> array; //This is needed to ensure array elements are volatile
public Edge(int size) {
array = new AtomicReferenceArray<Node>(8);
}
#Override
public Node getLink(String key, int hash, int level){
int index = Base10ToBaseX.getBaseXValueOnAtLevel(Base10ToBaseX.Base.BASE8, hash, level);
Node returnVal = array.get(index);
for(;;) {
if(returnVal == null) {
return null;
}
else if((returnVal instanceof Vertex)) {
Vertex node = (Vertex) returnVal;
for(;node != null; node = node.next) {
if(node.key.equals(key)) {
return node;
}
}
return null;
} else { //instanceof Edge
level = level + 1;
index = Base10ToBaseX.getBaseXValueOnAtLevel(Base10ToBaseX.Base.BASE8, hash, level);
Edge e = (Edge) returnVal;
returnVal = e.array.get(index);
}
}
}
#Override
public Node createLink(int hash, int level, String key, String val) { //Remove size
for(;;) { //Repeat the work on the current node, since some other thread modified this node
int index = Base10ToBaseX.getBaseXValueOnAtLevel(Base10ToBaseX.Base.BASE8, hash, level);
Node nodeAtIndex = array.get(index);
if ( nodeAtIndex == null) {
Vertex newV = new Vertex(key, val);
boolean result = array.compareAndSet(index, null, newV);
if(result == Boolean.TRUE) {
return newV;
}
//continue; since new node is inserted by other thread, hence repeat it.
}
else if(nodeAtIndex instanceof Vertex) {
Vertex vrtexAtIndex = (Vertex) nodeAtIndex;
int newIndex = Base10ToBaseX.getBaseXValueOnAtLevel(Base10ToBaseX.Base.BASE8, vrtexAtIndex.hashCode(), level+1);
int newIndex1 = Base10ToBaseX.getBaseXValueOnAtLevel(Base10ToBaseX.Base.BASE8, hash, level+1);
Edge edge = new Edge(Base10ToBaseX.Base.BASE8.getLevelZeroMask()+1);
if(newIndex != newIndex1) {
Vertex newV = new Vertex(key, val);
edge.array.set(newIndex, vrtexAtIndex);
edge.array.set(newIndex1, newV);
boolean result = array.compareAndSet(index, vrtexAtIndex, edge); //REPLACE vertex to edge
if(result == Boolean.TRUE) {
return newV;
}
//continue; since vrtexAtIndex may be removed or changed to Edge already.
} else if(vrtexAtIndex.key.hashCode() == hash) {//vrtex.hash == hash) { HERE newIndex == newIndex1
synchronized (vrtexAtIndex) {
boolean result = array.compareAndSet(index, vrtexAtIndex, vrtexAtIndex); //Double check this vertex is not removed.
if(result == Boolean.TRUE) {
Vertex prevV = vrtexAtIndex;
for(;vrtexAtIndex != null; vrtexAtIndex = vrtexAtIndex.next) {
prevV = vrtexAtIndex; // prevV is used to handle when vrtexAtIndex reached NULL
if(vrtexAtIndex.key.equals(key)){
vrtexAtIndex.val = val;
return vrtexAtIndex;
}
}
Vertex newV = new Vertex(key, val);
prevV.next = newV; // Within SYNCHRONIZATION since prevV.next may be added with some other.
return newV;
}
//Continue; vrtexAtIndex got changed
}
} else { //HERE newIndex == newIndex1 BUT vrtex.hash != hash
edge.array.set(newIndex, vrtexAtIndex);
boolean result = array.compareAndSet(index, vrtexAtIndex, edge); //REPLACE vertex to edge
if(result == Boolean.TRUE) {
return edge.createLink(hash, (level + 1), key, val);
}
}
}
else { //instanceof Edge
return nodeAtIndex.createLink(hash, (level + 1), key, val);
}
}
}
#Override
public Node removeLink(String key, int hash, int level){
for(;;) {
int index = Base10ToBaseX.getBaseXValueOnAtLevel(Base10ToBaseX.Base.BASE8, hash, level);
Node returnVal = array.get(index);
if(returnVal == null) {
return null;
}
else if((returnVal instanceof Vertex)) {
synchronized (returnVal) {
Vertex node = (Vertex) returnVal;
if(node.next == null) {
if(node.key.equals(key)) {
boolean result = array.compareAndSet(index, node, null);
if(result == Boolean.TRUE) {
return node;
}
continue; //Vertex may be changed to Edge
}
return null; //Nothing found; This is not the same vertex we are looking for. Here hashcode is same but key is different.
} else {
if(node.key.equals(key)) { //Removing the first node in the link
boolean result = array.compareAndSet(index, node, node.next);
if(result == Boolean.TRUE) {
return node;
}
continue; //Vertex(node) may be changed to Edge, so try again.
}
Vertex prevV = node; // prevV is used to handle when vrtexAtIndex is found and to be removed from its previous
node = node.next;
for(;node != null; prevV = node, node = node.next) {
if(node.key.equals(key)) {
prevV.next = node.next; //Removing other than first node in the link
return node;
}
}
return null; //Nothing found in the linked list.
}
}
} else { //instanceof Edge
return returnVal.removeLink(key, hash, (level + 1));
}
}
}
}
class Base10ToBaseX {
public static enum Base {
/**
* Integer is represented in 32 bit in 32 bit machine.
* There we can split this integer no of bits into multiples of 1,2,4,8,16 bits
*/
BASE2(1,1,32), BASE4(3,2,16), BASE8(7,3,11)/* OCTAL*/, /*BASE10(3,2),*/
BASE16(15, 4, 8){
public String getFormattedValue(int val){
switch(val) {
case 10:
return "A";
case 11:
return "B";
case 12:
return "C";
case 13:
return "D";
case 14:
return "E";
case 15:
return "F";
default:
return "" + val;
}
}
}, /*BASE32(31,5,1),*/ BASE256(255, 8, 4), /*BASE512(511,9),*/ Base65536(65535, 16, 2);
private int LEVEL_0_MASK;
private int LEVEL_1_ROTATION;
private int MAX_ROTATION;
Base(int levelZeroMask, int levelOneRotation, int maxPossibleRotation) {
this.LEVEL_0_MASK = levelZeroMask;
this.LEVEL_1_ROTATION = levelOneRotation;
this.MAX_ROTATION = maxPossibleRotation;
}
int getLevelZeroMask(){
return LEVEL_0_MASK;
}
int getLevelOneRotation(){
return LEVEL_1_ROTATION;
}
int getMaxRotation(){
return MAX_ROTATION;
}
String getFormattedValue(int val){
return "" + val;
}
}
public static int getBaseXValueOnAtLevel(Base base, int on, int level) {
if(level > base.getMaxRotation() || level < 1) {
return 0; //INVALID Input
}
int rotation = base.getLevelOneRotation();
int mask = base.getLevelZeroMask();
if(level > 1) {
rotation = (level-1) * rotation;
mask = mask << rotation;
} else {
rotation = 0;
}
return (on & mask) >>> rotation;
}
}