Files
c3c/lib/std/collections/hashset.c3
2026-01-17 15:12:31 +01:00

655 lines
15 KiB
Plaintext

<*
@require $defined((Value){}.hash()) : `No .hash function found on the value`
*>
module std::collections::set {Value};
import std::math;
import std::io @norecurse;
const uint DEFAULT_INITIAL_CAPACITY = 16;
const uint MAXIMUM_CAPACITY = 1u << 31;
const float DEFAULT_LOAD_FACTOR = 0.75;
const Allocator SET_HEAP_ALLOCATOR = (Allocator)&dummy;
<* Copy the ONHEAP allocator to initialize to a set that is heap allocated *>
const HashSet ONHEAP = { .allocator = SET_HEAP_ALLOCATOR };
struct Entry
{
uint hash;
Value value;
Entry* next;
}
struct HashSet (Printable)
{
Entry*[] table;
Allocator allocator;
<* Number of elements *>
usz count;
<* Resize limit *>
usz threshold;
float load_factor;
}
fn usz HashSet.len(&self) @operator(len) => self.count;
<*
@param [&inout] allocator : "The allocator to use"
@require capacity > 0 : "The capacity must be 1 or higher"
@require load_factor > 0.0 : "The load factor must be higher than 0"
@require !self.is_initialized() : "Set was already initialized"
@require capacity < MAXIMUM_CAPACITY : "Capacity cannot exceed maximum"
*>
fn HashSet* HashSet.init(&self, Allocator allocator, usz capacity = DEFAULT_INITIAL_CAPACITY, float load_factor = DEFAULT_LOAD_FACTOR)
{
capacity = math::next_power_of_2(capacity);
self.allocator = allocator;
self.threshold = (usz) (capacity * load_factor);
self.load_factor = load_factor;
self.table = allocator::new_array(allocator, Entry*, capacity);
return self;
}
<*
@require capacity > 0 : "The capacity must be 1 or higher"
@require load_factor > 0.0 : "The load factor must be higher than 0"
@require !self.is_initialized() : "Set was already initialized"
@require capacity < MAXIMUM_CAPACITY : "Capacity cannot exceed maximum"
*>
fn HashSet* HashSet.tinit(&self, uint capacity = DEFAULT_INITIAL_CAPACITY, float load_factor = DEFAULT_LOAD_FACTOR)
{
return self.init(tmem, capacity, load_factor) @inline;
}
<*
@param [&inout] allocator : "The allocator to use"
@require capacity > 0 : "The capacity must be 1 or higher"
@require load_factor > 0.0 : "The load factor must be higher than 0"
@require !self.is_initialized() : "Set was already initialized"
@require capacity < MAXIMUM_CAPACITY : "Capacity cannot exceed maximum"
*>
macro HashSet* HashSet.init_with_values(&self, Allocator allocator, ..., uint capacity = DEFAULT_INITIAL_CAPACITY, float load_factor = DEFAULT_LOAD_FACTOR)
{
self.init(allocator, capacity, load_factor);
$for var $i = 0; $i < $vacount; $i++:
self.add($vaarg[$i]);
$endfor
return self;
}
<*
@require capacity > 0 : "The capacity must be 1 or higher"
@require load_factor > 0.0 : "The load factor must be higher than 0"
@require !self.is_initialized() : "Set was already initialized"
@require capacity < MAXIMUM_CAPACITY : "Capacity cannot exceed maximum"
*>
macro HashSet* HashSet.tinit_with_values(&self, ..., uint capacity = DEFAULT_INITIAL_CAPACITY, float load_factor = DEFAULT_LOAD_FACTOR)
{
return self.init_with_values(tmem, $vasplat, capacity: capacity, load_factor: load_factor);
}
<*
@param [in] values : "The values for the HashSet"
@param [&inout] allocator : "The allocator to use"
@require capacity > 0 : "The capacity must be 1 or higher"
@require load_factor > 0.0 : "The load factor must be higher than 0"
@require !self.is_initialized() : "Set was already initialized"
@require capacity < MAXIMUM_CAPACITY : "Capacity cannot exceed maximum"
*>
fn HashSet* HashSet.init_from_values(&self, Allocator allocator, Value[] values, uint capacity = DEFAULT_INITIAL_CAPACITY, float load_factor = DEFAULT_LOAD_FACTOR)
{
self.init(allocator, capacity, load_factor);
foreach (v : values) self.add(v);
return self;
}
<*
@param [in] values : "The values for the HashSet entries"
@require capacity > 0 : "The capacity must be 1 or higher"
@require load_factor > 0.0 : "The load factor must be higher than 0"
@require !self.is_initialized() : "Set was already initialized"
@require capacity < MAXIMUM_CAPACITY : "Capacity cannot exceed maximum"
*>
fn HashSet* HashSet.tinit_from_values(&self, Value[] values, uint capacity = DEFAULT_INITIAL_CAPACITY, float load_factor = DEFAULT_LOAD_FACTOR)
{
return self.init_from_values(tmem, values, capacity, load_factor);
}
<*
Has this hash set been initialized yet?
@param [&in] set : "The hash set we are testing"
@return "Returns true if it has been initialized, false otherwise"
*>
fn bool HashSet.is_initialized(&set)
{
return set.allocator && set.allocator.ptr != &dummy;
}
<*
@param [&inout] allocator : "The allocator to use"
@param [&in] other_set : "The set to copy from."
@require !self.is_initialized() : "Set was already initialized"
*>
fn HashSet* HashSet.init_from_set(&self, Allocator allocator, HashSet* other_set)
{
self.init(allocator, other_set.table.len, other_set.load_factor);
self.put_all_for_create(other_set);
return self;
}
<*
@param [&in] other_set : "The set to copy from."
@require !set.is_initialized() : "Set was already initialized"
*>
fn HashSet* HashSet.tinit_from_set(&set, HashSet* other_set)
{
return set.init_from_set(tmem, other_set) @inline;
}
<*
Check if the set is empty
@return "true if it is empty"
@pure
*>
fn bool HashSet.is_empty(&set) @inline
{
return !set.count;
}
<*
Add all elements in the slice to the set.
@param [in] list
@return "The number of new elements added"
@ensure total <= list.len
*>
fn usz HashSet.add_all(&set, Value[] list)
{
usz total;
foreach (v : list)
{
if (set.add(v)) total++;
}
return total;
}
<*
@param [&in] other
@return "The number of new elements added"
@ensure return <= other.count
*>
fn usz HashSet.add_all_from(&set, HashSet* other)
{
usz total;
other.@each(;Value value)
{
if (set.add(value)) total++;
};
return total;
}
<*
@param value : "The value to add"
@return "true if the value didn't exist in the set"
*>
fn bool HashSet.add(&set, Value value)
{
// If the set isn't initialized, use the defaults to initialize it.
switch (set.allocator.ptr)
{
case &dummy:
set.init(mem);
case null:
set.tinit();
default:
break;
}
uint hash = rehash(value.hash());
uint index = index_for(hash, set.table.len);
for (Entry *e = set.table[index]; e != null; e = e.next)
{
if (e.hash == hash && equals(value, e.value)) return false;
}
set.add_entry(hash, value, index);
return true;
}
<*
Iterate over all the values in the set
*>
macro HashSet.@each(set; @body(value))
{
if (!set.count) return;
foreach (Entry* entry : set.table)
{
while (entry)
{
@body(entry.value);
entry = entry.next;
}
}
}
<*
Check if the set contains the given value.
@param value : "The value to check"
@return "true if it exists in the set"
*>
fn bool HashSet.contains(&set, Value value)
{
if (!set.count) return false;
uint hash = rehash(value.hash());
for (Entry *e = set.table[index_for(hash, set.table.len)]; e != null; e = e.next)
{
if (e.hash == hash && equals(value, e.value)) return true;
}
return false;
}
<*
Remove a single value from the set.
@param value : "The value to remove"
@return? NOT_FOUND : "If the entry is not found"
*>
fn void? HashSet.remove(&set, Value value) @maydiscard
{
if (!set.remove_entry_for_value(value)) return NOT_FOUND?;
}
fn usz HashSet.remove_all(&set, Value[] values)
{
usz total;
foreach (v : values)
{
if (set.remove_entry_for_value(v)) total++;
}
return total;
}
<*
@param [&in] other : "Other set"
*>
fn usz HashSet.remove_all_from(&set, HashSet* other)
{
usz total;
other.@each(;Value val)
{
if (set.remove_entry_for_value(val)) total++;
};
return total;
}
<*
Free all memory allocated by the hash set.
*>
fn void HashSet.free(&set)
{
if (!set.is_initialized()) return;
set.clear();
set.free_internal(set.table.ptr);
*set = {};
}
<*
Clear all elements from the set while keeping the underlying storage
@ensure set.count == 0
*>
fn void HashSet.clear(&set)
{
if (!set.count) return;
foreach (Entry** &entry_ref : set.table)
{
Entry* entry = *entry_ref;
if (!entry) continue;
Entry *next = entry.next;
while (next)
{
Entry *to_delete = next;
next = next.next;
set.free_entry(to_delete);
}
set.free_entry(entry);
*entry_ref = null;
}
set.count = 0;
}
fn void HashSet.reserve(&set, usz capacity)
{
if (capacity > set.threshold)
{
set.resize(math::next_power_of_2(capacity));
}
}
fn Value[] HashSet.tvalues(&self) => self.values(tmem) @inline;
fn Value[] HashSet.values(&self, Allocator allocator)
{
if (!self.count) return {};
Value[] list = allocator::alloc_array(allocator, Value, self.count);
usz index = 0;
foreach (Entry* entry : self.table)
{
while (entry)
{
list[index++] = entry.value;
entry = entry.next;
}
}
return list;
}
// --- Set Operations ---
<*
Returns the union of two sets (A | B)
@param [&in] other : "The other set to union with"
@param [&inout] allocator : "Allocator for the new set"
@return "A new set containing the union of both sets"
*>
fn HashSet HashSet.set_union(&self, Allocator allocator, HashSet* other)
{
usz new_capacity = math::next_power_of_2(self.count + other.count);
HashSet result;
result.init(allocator, new_capacity, self.load_factor);
result.add_all_from(self);
result.add_all_from(other);
return result;
}
fn HashSet HashSet.tset_union(&self, HashSet* other) => self.set_union(tmem, other);
<*
Returns the intersection of the two sets (A & B)
@param [&in] other : "The other set to intersect with"
@param [&inout] allocator : "Allocator for the new set"
@return "A new set containing the intersection of both sets"
*>
fn HashSet HashSet.intersection(&self, Allocator allocator, HashSet* other)
{
HashSet result;
result.init(allocator, math::min(self.table.len, other.table.len), self.load_factor);
// Iterate through the smaller set for efficiency
HashSet* smaller = self.count <= other.count ? self : other;
HashSet* larger = self.count > other.count ? self : other;
smaller.@each(;Value value)
{
if (larger.contains(value)) result.add(value);
};
return result;
}
fn HashSet HashSet.tintersection(&self, HashSet* other) => self.intersection(tmem, other);
<*
Return this set - other, so (A & ~B)
@param [&in] other : "The other set to compare with"
@param [&inout] allocator : "Allocator for the new set"
@return "A new set containing elements in this set but not in the other"
*>
fn HashSet HashSet.difference(&self, Allocator allocator, HashSet* other)
{
HashSet result;
result.init(allocator, self.table.len, self.load_factor);
self.@each(;Value value)
{
if (!other.contains(value))
{
result.add(value);
}
};
return result;
}
fn HashSet HashSet.tdifference(&self, HashSet* other) => self.difference(tmem, other) @inline;
<*
Return (A ^ B)
@param [&in] other : "The other set to compare with"
@param [&inout] allocator : "Allocator for the new set"
@return "A new set containing elements in this set or the other, but not both"
*>
fn HashSet HashSet.symmetric_difference(&self, Allocator allocator, HashSet* other)
{
HashSet result;
result.init(allocator, self.table.len, self.load_factor);
result.add_all_from(self);
other.@each(;Value value)
{
if (!result.add(value))
{
result.remove(value);
}
};
return result;
}
fn HashSet HashSet.tsymmetric_difference(&self, HashSet* other) => self.symmetric_difference(tmem, other) @inline;
<*
Check if this hash set is a subset of another set.
@param [&in] other : "The other set to check against"
@return "True if all elements of this set are in the other set"
*>
fn bool HashSet.is_subset(&self, HashSet* other)
{
if (self.count == 0) return true;
if (self.count > other.count) return false;
self.@each(;Value value)
{
if (!other.contains(value)) return false;
};
return true;
}
// --- private methods
fn void HashSet.add_entry(&set, uint hash, Value value, uint bucket_index) @private
{
Entry* entry = allocator::new(set.allocator, Entry, { .hash = hash, .value = value, .next = set.table[bucket_index] });
set.table[bucket_index] = entry;
if (set.count++ >= set.threshold)
{
set.resize(set.table.len * 2);
}
}
fn void HashSet.resize(&self, usz new_capacity) @private
{
Entry*[] old_table = self.table;
usz old_capacity = old_table.len;
if (old_capacity == MAXIMUM_CAPACITY)
{
self.threshold = uint.max;
return;
}
Entry*[] new_table = allocator::new_array(self.allocator, Entry*, new_capacity);
self.transfer(new_table);
self.table = new_table;
self.free_internal(old_table.ptr);
self.threshold = (uint)(new_capacity * self.load_factor);
}
fn usz? HashSet.to_format(&self, Formatter* f) @dynamic
{
usz len;
len += f.print("{ ")!;
self.@each(; Value value)
{
if (len > 2) len += f.print(", ")!;
len += f.printf("%s", value)!;
};
return len + f.print(" }");
}
fn void HashSet.transfer(&self, Entry*[] new_table) @private
{
Entry*[] src = self.table;
uint new_capacity = new_table.len;
foreach (uint j, Entry *e : src)
{
if (!e) continue;
do
{
Entry* next = e.next;
uint i = index_for(e.hash, new_capacity);
e.next = new_table[i];
new_table[i] = e;
e = next;
}
while (e);
}
}
fn void HashSet.put_all_for_create(&set, HashSet* other_set) @private
{
if (!other_set.count) return;
foreach (Entry *e : other_set.table)
{
while (e)
{
set.put_for_create(e.value);
e = e.next;
}
}
}
fn void HashSet.put_for_create(&set, Value value) @private
{
uint hash = rehash(value.hash());
uint i = index_for(hash, set.table.len);
for (Entry *e = set.table[i]; e != null; e = e.next)
{
if (e.hash == hash && equals(value, e.value))
{
// Value already exists, no need to do anything
return;
}
}
set.create_entry(hash, value, i);
}
fn void HashSet.free_internal(&self, void* ptr) @inline @private
{
allocator::free(self.allocator, ptr);
}
fn void HashSet.create_entry(&set, uint hash, Value value, int bucket_index) @private
{
Entry* entry = allocator::new(set.allocator, Entry, {
.hash = hash,
.value = value,
.next = set.table[bucket_index]
});
set.table[bucket_index] = entry;
set.count++;
}
<*
Removes the entry for the specified value if present
@return "true if found and removed, false otherwise"
*>
fn bool HashSet.remove_entry_for_value(&set, Value value) @private
{
if (!set.count) return false;
uint hash = rehash(value.hash());
uint i = index_for(hash, set.table.len);
Entry* prev = set.table[i];
Entry* e = prev;
while (e)
{
Entry *next = e.next;
if (e.hash == hash && equals(value, e.value))
{
set.count--;
if (prev == e)
{
set.table[i] = next;
}
else
{
prev.next = next;
}
set.free_entry(e);
return true;
}
prev = e;
e = next;
}
return false;
}
fn void HashSet.free_entry(&set, Entry *entry) @private
{
allocator::free(set.allocator, entry);
}
struct HashSetIterator
{
HashSet* set;
usz bucket_index;
Entry* current;
}
fn HashSetIterator HashSet.iter(&set) => { .set = set, .bucket_index = 0, .current = null };
fn Value? HashSetIterator.next(&self)
{
if (self.current)
{
Value value = self.current.value;
self.current = self.current.next;
return value;
}
while (self.bucket_index < self.set.table.len)
{
self.current = self.set.table[self.bucket_index++];
if (self.current)
{
Value value = self.current.value;
self.current = self.current.next;
return value;
}
}
return NOT_FOUND?;
}
fn usz HashSetIterator.len(&self) @operator(len)
{
return self.set.count;
}
<* @pure *>
fn uint rehash(uint hash) @inline @private
{
hash ^= (hash >> 20) ^ (hash >> 12);
return hash ^ ((hash >> 7) ^ (hash >> 4));
}
macro uint index_for(uint hash, uint capacity) @private => hash & (capacity - 1);
int dummy @local;