Last active
January 2, 2024 02:49
-
-
Save ttappr/cb0c285d6b1004e538bccdbb6f7c69c0 to your computer and use it in GitHub Desktop.
A simple map that takes lowercase ascii keys and can hold any sort of value. This can be much faster than a HashMap depending on use case.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//! # TrieMap, A Trie-based Map for Rust | |
//! | |
//! Like a HashMap, values are stored and accessible by keys. Lookups can be | |
//! much faster than a HashMap, but there are some limitations on what can be | |
//! used for a key, and the memory is potentially higher, but not always. | |
//! | |
//! The keys are u8 bytes and the base value and range can be specified. The | |
//! greater the range, the more memory is used per node. The base value is used | |
//! to adjust the byte values of the keys to serve as indices into the internal | |
//! arrays within nodes that hold handles to descendant nodes. The base value | |
//! must be less than the range. It is up to the user to ensure that the base | |
//! value and range are appropriate for the keys that will be used. | |
//! | |
//! Because the keys are bytes, they can be used to store strings, but they can | |
//! also be used to store any other sequence of bytes. The keys are not | |
//! required to be valid UTF-8. | |
//! | |
/// A handle used interally to reference values stored in the trie. | |
/// | |
#[repr(transparent)] | |
#[derive(Copy, Clone)] | |
struct ValueHandle(usize); | |
/// A handle used interally to reference nodes stored in the trie. | |
/// | |
#[repr(transparent)] | |
#[derive(Copy, Clone)] | |
struct NodeHandle(usize); | |
/// This is a struct used internally by the TrieMap to represent a node in the | |
/// trie. If it's a leaf node, it will have a value. If it's an internal node, | |
/// it will have a value of None. The `brats` array is used to reference the | |
/// next descendant nodes indexed by the byte value of the next character in | |
/// the key. | |
/// | |
/// `RANGE` controls the size of the `brats` array. The larger the `RANGE`, the | |
/// larger the array, the more characters that can be used in keys, however at | |
/// the cost of some more memory usage. | |
/// | |
struct Node<const RANGE: usize> { | |
brats : [Option<NodeHandle>; RANGE], | |
value : Option<ValueHandle>, | |
} | |
impl<const RANGE: usize> Node<RANGE> { | |
fn new() -> Self { | |
Self { brats: [None; RANGE], value: None } | |
} | |
} | |
/// A trie-based Map keyed on sequences of byts with values stored at the leaves | |
/// of the trie. `RANGE` controls the size of the `brats` array in each node. | |
/// And `BASE_CHAR` is the byte value of the first character in the range of | |
/// characters that can be used in keys. It is used to adjust each character in | |
/// a key to serve as indices into the their descendant arrays. | |
/// | |
pub struct TrieMap<V, const RANGE: usize, const BASE_CHAR: u8> { | |
root : NodeHandle, | |
nodes : Vec<Node<RANGE>>, | |
values : Vec<Option<V>>, | |
} | |
impl<V, const RANGE: usize, const BASE_CHAR: u8> TrieMap<V, RANGE, BASE_CHAR> { | |
/// Creates a new empty trie. | |
/// | |
pub fn new() -> Self { | |
Self { | |
root : NodeHandle(0), | |
nodes : vec![Node::new()], | |
values : vec![], | |
} | |
} | |
/// Creates a new empty trie with the given capacity. Only the internal | |
/// vector that holds the values is affected by this method. | |
/// | |
pub fn with_capacity(capacity: usize) -> Self { | |
Self { | |
root : NodeHandle(0), | |
nodes : vec![Node::new()], | |
values : Vec::with_capacity(capacity), | |
} | |
} | |
/// Inserts a value into the trie at the given key. | |
/// | |
pub fn insert<'a, K>(&mut self, key: K, value: V) | |
where | |
K: IntoIterator<Item = &'a u8>, | |
{ | |
let mut curr = self.root; | |
for b in key { | |
let idx = (b - BASE_CHAR) as usize; | |
if let Some(next) = self.hderef(curr).brats[idx] { | |
curr = next; | |
} else { | |
let next = self.new_node(); | |
self.hderef_mut(curr).brats[idx] = Some(next); | |
curr = next; | |
} | |
} | |
self.values.push(Some(value)); | |
self.hderef_mut(curr).value = Some(ValueHandle(self.values.len() - 1)); | |
} | |
/// Accesses a value in the trie at the given key, if it exists, a reference | |
/// is returned, otherwise `None` is returned. | |
/// | |
pub fn get<'a, K>(&self, key: K) -> Option<&V> | |
where | |
K: IntoIterator<Item = &'a u8>, | |
{ | |
let mut curr = self.root; | |
for b in key { | |
let idx = (b - BASE_CHAR) as usize; | |
if let Some(next) = self.hderef(curr).brats[idx] { | |
curr = next; | |
} else { | |
return None; | |
} | |
} | |
if let Some(value) = self.hderef(curr).value { | |
self.values[value.0].as_ref() | |
} else { | |
None | |
} | |
} | |
/// Returns a mutable reference to a value in the trie at the given key, if | |
/// it exists, otherwise `None` is returned. | |
/// | |
pub fn get_mut<'a, K>(&mut self, key: K) -> Option<&mut V> | |
where | |
K: IntoIterator<Item = &'a u8>, | |
{ | |
let mut curr = self.root; | |
for b in key { | |
let idx = (b - BASE_CHAR) as usize; | |
if let Some(next) = self.hderef(curr).brats[idx] { | |
curr = next; | |
} else { | |
return None; | |
} | |
} | |
if let Some(value) = self.hderef(curr).value { | |
self.values[value.0].as_mut() | |
} else { | |
None | |
} | |
} | |
/// Returns a mutable reference to a value in the trie at the given key, if | |
/// it exists, otherwise the given value is inserted and a reference to it | |
/// is returned. | |
/// | |
pub fn get_or_insert<'a, K>(&mut self, key: K, value: V) -> &mut V | |
where | |
K: IntoIterator<Item = &'a u8>, | |
{ | |
self.get_or_insert_with(key, ||value) | |
} | |
/// Returns a mutable reference to a value in the trie at the given key, if | |
/// it exists, otherwise the given function is called to produce a value | |
/// which is inserted and a reference to it is returned. | |
/// | |
pub fn get_or_insert_with<'a, K, F>(&mut self, key: K, f: F) -> &mut V | |
where | |
K: IntoIterator<Item = &'a u8>, | |
F: FnOnce() -> V, | |
{ | |
let mut curr = self.root; | |
for b in key { | |
let idx = (b - BASE_CHAR) as usize; | |
if let Some(next) = self.hderef(curr).brats[idx] { | |
curr = next; | |
} else { | |
let next = self.new_node(); | |
self.hderef_mut(curr).brats[idx] = Some(next); | |
curr = next; | |
} | |
} | |
let handle = { | |
if let Some(handle) = self.hderef(curr).value { | |
handle | |
} else { | |
let handle = ValueHandle(self.values.len()); | |
self.values.push(Some(f())); | |
self.hderef_mut(curr).value = Some(handle); | |
handle | |
} | |
}; | |
self.values[handle.0].as_mut().unwrap() | |
} | |
/// Returns `true` if the trie contains a value at the given key, otherwise | |
/// `false` is returned. | |
/// | |
pub fn contains<'a, K>(&self, key: K) -> bool | |
where | |
K: IntoIterator<Item = &'a u8>, | |
{ | |
let mut curr = self.root; | |
for b in key { | |
let idx = (b - BASE_CHAR) as usize; | |
if let Some(next) = self.hderef(curr).brats[idx] { | |
curr = next; | |
} else { | |
return false; | |
} | |
} | |
self.hderef(curr).value.is_some() | |
} | |
pub fn iter(&self) -> KeyValueIter<V, RANGE, BASE_CHAR> { | |
KeyValueIter::new(self) | |
} | |
pub fn values(&self) -> impl Iterator<Item = &V> { | |
self.values.iter().filter_map(|v| v.as_ref()) | |
} | |
/// Used internally to create a new node and return a handle to it. | |
/// | |
fn new_node(&mut self) -> NodeHandle { | |
self.nodes.push(Node::new()); | |
NodeHandle(self.nodes.len() - 1) | |
} | |
/// Used internally to dereference a node handle. | |
/// | |
fn hderef(&self, handle: NodeHandle) -> &Node<RANGE> { | |
&self.nodes[handle.0] | |
} | |
/// Used internally to dereference a node handle mutably. | |
/// | |
fn hderef_mut(&mut self, handle: NodeHandle) -> &mut Node<RANGE> { | |
&mut self.nodes[handle.0] | |
} | |
} | |
impl<V, const R: usize, const B: u8> Default for TrieMap<V, R, B> { | |
fn default() -> Self { | |
Self::new() | |
} | |
} | |
pub struct KeyValueIterator<V, const R: usize, const B: u8> { | |
trie : TrieMap<V, R, B>, | |
key : Vec<u8>, | |
stack : Vec<(NodeHandle, usize)>, | |
} | |
impl<V, const R: usize, const B: u8> KeyValueIterator<V, R, B> { | |
fn new(trie: TrieMap<V, R, B>) -> Self { | |
let curr = trie.root; | |
let stack = vec![(curr, 0)]; | |
Self { trie, key: Vec::new(), stack } | |
} | |
} | |
impl<V, const R: usize, const B: u8> Iterator for KeyValueIterator<V, R, B> { | |
type Item = (Vec<u8>, V); | |
fn next(&mut self) -> Option<Self::Item> { | |
while let Some((handle, mut i)) = self.stack.pop() { | |
if self.trie.hderef(handle).value.is_some() && i == 0 { | |
self.stack.push((handle, 0)); | |
let hval = self.trie.hderef_mut(handle).value.take().unwrap(); | |
let value = self.trie.values[hval.0].take().unwrap(); | |
return Some((self.key.clone(), value)); | |
} | |
while i < R && self.trie.hderef(handle).brats[i].is_none() { | |
i += 1; | |
} | |
if i < R { | |
let brat = self.trie.hderef(handle).brats[i].unwrap(); | |
self.key.push(i as u8 + B); | |
self.stack.push((handle, i + 1)); | |
self.stack.push((brat, 0)); | |
} else { | |
self.key.pop(); | |
} | |
} | |
None | |
} | |
} | |
impl<V, const R: usize, const B: u8> IntoIterator for TrieMap<V, R, B> { | |
type Item = (Vec<u8>, V); | |
type IntoIter = KeyValueIterator<V, R, B>; | |
fn into_iter(self) -> Self::IntoIter { | |
KeyValueIterator::new(self) | |
} | |
} | |
pub struct KeyValueIter<'a, V, const R: usize, const B: u8> { | |
trie : &'a TrieMap<V, R, B>, | |
key : Vec<u8>, | |
stack : Vec<(NodeHandle, usize, bool)>, | |
} | |
impl<'a, V, const R: usize, const B: u8> KeyValueIter<'a, V, R, B> { | |
fn new(trie: &'a TrieMap<V, R, B>) -> Self { | |
let curr = trie.root; | |
let stack = vec![(curr, 0, true)]; | |
Self { trie, key: Vec::new(), stack } | |
} | |
} | |
impl<'a, V, const R: usize, const B: u8> Iterator | |
for KeyValueIter<'a, V, R, B> | |
{ | |
type Item = (Box<[u8]>, &'a V); | |
fn next(&mut self) -> Option<Self::Item> { | |
while let Some((handle, mut i, b)) = self.stack.pop() { | |
if self.trie.hderef(handle).value.is_some() && b { | |
self.stack.push((handle, 0, false)); | |
let hval = self.trie.hderef(handle).value.as_ref().unwrap(); | |
let value = self.trie.values[hval.0].as_ref().unwrap(); | |
let key = self.key.clone().into_boxed_slice(); | |
return Some((key, value)); | |
} | |
while i < R && self.trie.hderef(handle).brats[i].is_none() { | |
i += 1; | |
} | |
if i < R { | |
let brat = self.trie.hderef(handle).brats[i].unwrap(); | |
self.key.push(i as u8 + B); | |
self.stack.push((handle, i + 1, false)); | |
self.stack.push((brat, 0, true)); | |
} else { | |
self.key.pop(); | |
} | |
} | |
None | |
} | |
} | |
impl<'a, V, const R: usize, const B: u8> IntoIterator | |
for &'a TrieMap<V, R, B> | |
{ | |
type Item = (Box<[u8]>, &'a V); | |
type IntoIter = KeyValueIter<'a, V, R, B>; | |
fn into_iter(self) -> Self::IntoIter { | |
KeyValueIter::new(self) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment