diff options
Diffstat (limited to 'htab')
-rw-r--r-- | htab/+test.ha | 61 | ||||
-rw-r--r-- | htab/README | 8 | ||||
-rw-r--r-- | htab/table.ha | 254 |
3 files changed, 323 insertions, 0 deletions
diff --git a/htab/+test.ha b/htab/+test.ha new file mode 100644 index 0000000..518e97d --- /dev/null +++ b/htab/+test.ha @@ -0,0 +1,61 @@ +use fmt; +use hash; +use hash::fnv; +use math::random; + +fn test_eq(ctx: *opaque, entry: *opaque) bool = + *(ctx: *u64) == *(entry: *u64); + +fn test_fnvhash(x: u64) u64 = { + let h = fnv::fnv64a(); + hash::write(&h, &x: *[8]u8); + return fnv::sum64(&h); +}; + +fn test_garbagehash(x: u64) u64 = + 0x0123456789abcdef; + +fn test_table(sz: size, hash_fn: *fn(x: u64) u64, rng: *random::random) void = { + fmt::printf("{} ", sz)!; + static let array: [1024]bool = [false...]; + for (let i = 0z; i < len(array); i += 1) { + array[i] = false; + }; + let tab = new(0, sz); + defer finish(&tab); + + for (let i = 0; i < 100000; i += 1) { + const x = random::u64n(rng, len(array): u64); + const hash = hash_fn(x); + match (get(&tab, hash, &test_eq, &x, sz)) { + case let entry: *opaque => + assert(array[x: size]); + assert(*(entry: *u64) == x); + del(&tab, entry, sz); + array[x: size] = false; + case null => + assert(!array[x: size]); + const entry = add(&tab, hash, sz); + array[x: size] = true; + *(entry: *u64) = x; + }; + }; + + for (let x = 0u64; x < len(array): u64; x += 1) { + const hash = hash_fn(x); + get(&tab, hash, &test_eq, &x, sz); + }; + + fmt::println()!; +}; + +@test fn table() void = { + let rng = random::init(42); + + fmt::println("fnvhash:")!; + for (let i = 8z; i < 128; i += 8) + test_table(i, &test_fnvhash, &rng); + fmt::println("garbagehash:")!; + for (let i = 8z; i < 128; i += 8) + test_table(i, &test_garbagehash, &rng); +}; diff --git a/htab/README b/htab/README new file mode 100644 index 0000000..cd296b2 --- /dev/null +++ b/htab/README @@ -0,0 +1,8 @@ +Generic open addressing hash table. + +The table-related functions in this module expect a sz argument specifying the +table's element size. This value must remain consistent across all function +calls referencing the table. + +The hash value 0 is reserved as a sentinel value for use by the hash table +implementation, and must not be used. diff --git a/htab/table.ha b/htab/table.ha new file mode 100644 index 0000000..b90b0aa --- /dev/null +++ b/htab/table.ha @@ -0,0 +1,254 @@ +use fmt; +use math; +use rt; + +fn cap2nslots(cap: size) size = { + const nslots_ = cap << 1; + let nslots = 1z; + for (nslots < nslots_) { + nslots <<= 1; + }; + return nslots; +}; + +fn nslots2cap(nslots: size) size = { + return nslots >> 1; +}; + +fn dbg(fmt: str, args: fmt::field...) void = { + fmt::errorfln(fmt, args...)!; +}; + +// temporary hack before inlining works. +def DBG = false; + +export type table = struct { + table: *[*]u64, + nslots: size, // must be a power of 2. + nleft: size, +}; + +// The interface of user-specified equality comparison functions. +export type eq_fn = fn(ctx: *opaque, elem: *opaque) bool; + +export const empty: [1]u64 = [0]; + +// An empty hash table. +def EMPTY = table { + table = &empty, + nslots = 1, + nleft = 0, +}; + +// Returns a new hash table preallocated with the specified capacity. +// +// If cap is 0, no memory is allocated, and the result is equivalent +// to [[EMPTY]]. +export fn new(cap: size, sz: size) table = { + if (cap == 0) { + return EMPTY; + }; + + const nslots = cap2nslots(cap); + + // TODO: need to check for overflow here + // TODO: should probably deal with alignment > 8 bytes + const allocsz = nslots * size(u64) + nslots * sz; + const ptr = rt::malloc(allocsz): *opaque; + + let tab = table { + table = ptr: *[*]u64, + nslots = nslots, + ... + }; + clear(&tab, sz); + return tab; +}; + +// Clears the table without deallocating it. +// +// Calling this function causes all previously obtained element pointers and +// iterators belonging to the table to become invalid. +export fn clear(tab: *table, sz: size) void = { + for (let i = 0z; i < tab.nslots; i += 1) { + tab.table[i] = 0; + }; + tab.nleft = nslots2cap(tab.nslots); +}; + +// Clears and deallocates the table. +// +// The table is left in a valid, empty state. +// +// Calling this function causes all previously obtained element pointers and +// iterators belonging to the table to become invalid. +export fn finish(tab: *table) void = { + if (tab.nslots != 1) { + free(tab.table); + }; + *tab = EMPTY; +}; + +// Returns the number of elements in the table. +export fn count(tab: *table) size = { + return nslots2cap(tab.nslots) - tab.nleft; +}; + +fn at(tab: *table, i: size, sz: size) *opaque = { + const elems: *opaque = &tab.table[tab.nslots]; + return (elems: uintptr + i * sz): *opaque; +}; + +// Resizes the table to the specified capacity. +// +// The capacity specifies the maximum number of elements storable without +// growing the table. The resulting memory allocation will be larger. +// +// If the number of elements in the table exceeds the capacity, +// the program aborts. +// +// Calling this function causes all previously obtained element pointers and +// iterators belonging to the table to become invalid. +export fn resize(tab: *table, newcap: size, sz: size) void = { + if (cap2nslots(newcap) == tab.nslots) { + return; + }; + assert(count(tab) <= newcap, + "table won't fit into requested capacity"); + if (DBG) dbg("resize {} {{", newcap); + let newtab = new(newcap, sz); + for (let i = 0z; i < tab.nslots; i += 1) { + if (tab.table[i] == 0) { + continue; + }; + const entry = add(&newtab, tab.table[i], sz); + rt::memcpy(entry, at(tab, i, sz), sz); + }; + finish(tab); + *tab = newtab; + if (DBG) dbg("}} resize {}", newcap); +}; + +// Finds an element with the specified hash in the table. +// +// If no element is found, null is returned. +// +// hash must not equal 0. +// +// If eq is non-null, it is called on each element matching the hash until it +// returns true. If it is null, an arbitrary matching element is returned. +// +// ctx specifies the value of the ctx argument passed to eq. +export fn get(tab: *table, hash: u64, eq: *eq_fn, ctx: *opaque, sz: size) + nullable *opaque = { + if (DBG) dbg("get {}", hash); + let i = hash: size & tab.nslots - 1; + for (let inc = 0z; true; inc = 1) { + i = i + inc & tab.nslots - 1; + if (DBG) dbg("[{}] -> {}", i, tab.table[i]); + if (tab.table[i] == 0) { + break; + }; + if (tab.table[i] != hash) { + continue; + }; + const elem = at(tab, i, sz); + if (!eq(ctx, elem)) { + continue; + }; + return elem; + }; + return null; +}; + +// Adds an element with the specified hash to the table and returns a pointer +// to it. +// +// The element is left uninitialized. +// +// hash must not equal 0. +// +// Calling this function causes all previously obtained element pointers and +// iterators belonging to the table to become invalid. +// +// Unless the table is known not to already contain an identical element, users +// should first use [[get]] to check for that possibility. +export fn add(tab: *table, hash: u64, sz: size) *opaque = { + if (DBG) dbg("add {}", hash); + + if (tab.nleft == 0) { + resize(tab, nslots2cap(tab.nslots) + 1, sz); + }; + tab.nleft -= 1; + + let i = hash: size & tab.nslots - 1; + for (let inc = 0z; true; inc = 1) { + i = i + inc & tab.nslots - 1; + if (DBG) dbg("[{}] -> {}", i, tab.table[i]); + if (tab.table[i] != 0) { + continue; + }; + if (DBG) dbg("[{}] <- {}", i, hash); + tab.table[i] = hash; + return at(tab, i, sz); + }; +}; + +// Removes the specified element from the table. +// +// Calling this function causes all previously obtained element pointers and +// iterators belonging to the table to become invalid. +export fn del(tab: *table, elem: *opaque, sz: size) void = { + tab.nleft += 1; + + let i = (elem: uintptr - at(tab, 0, sz): uintptr): size / sz; + if (DBG) dbg("del {}", i); + if (DBG) dbg("[{}] <- {}", i, 0); + tab.table[i] = 0; + + for (let j = i; true) { + j = j + 1 & tab.nslots - 1; + if (DBG) dbg("[{}] -> {}", j, tab.table[j]); + if (tab.table[j] == 0) { + break; + }; + const k = tab.table[j]: size & tab.nslots - 1; + if (if (i < j) i < k && k <= j else i < k || k <= j) { + continue; + }; + if (DBG) dbg("[{}] <- {}", i, tab.table[j]); + tab.table[i] = tab.table[j]; + rt::memcpy(at(tab, i, sz), at(tab, j, sz), sz); + if (DBG) dbg("[{}] <- {}", j, 0); + tab.table[j] = 0; + i = j; + }; +}; + +export type iterator = struct { + table: *table, + i: size, +}; + +// Returns a new iterator for the table. +// +// The order of iteration is unspecified. +export fn iter(tab: *table) iterator = { + return iterator { + table = tab, + i = 0, + }; +}; + +// Returns a pointer to the next element from the iterator, or null if iteration +// has completed. +export fn next(it: *iterator, sz: size) nullable *opaque = { + for (it.i < it.table.nslots) { + const i = it.i; + it.i += 1; + if (it.table.table[i] != 0) { + return at(it.table, i, sz); + }; + }; + return null; +}; |