Updated sorting code.

This commit is contained in:
Christoffer Lerno
2024-07-09 01:04:11 +02:00
parent c50630989e
commit fba706f10b
8 changed files with 167 additions and 138 deletions

View File

@@ -346,6 +346,11 @@ macro uint String.hash(String c) => (uint)fnv32a::encode(c);
macro uint char[].hash(char[] c) => (uint)fnv32a::encode(c);
macro uint void*.hash(void* ptr) => ((ulong)(uptr)ptr).hash();
distinct EmptySlot = void*;
const EmptySlot EMPTY_MACRO_SLOT @builtin = null;
macro @is_empty_macro_slot(#arg) @builtin => @typeis(#arg, EmptySlot);
macro @is_valid_macro_slot(#arg) @builtin => !@typeis(#arg, EmptySlot);
const MAX_FRAMEADDRESS = 128;
/**
* @require n >= 0

View File

@@ -4,16 +4,19 @@ module std::sort;
* Perform a binary search over the sorted array and return the index
* in [0, array.len) where x would be inserted or cmp(i) is true and cmp(j) is true for j in [i, array.len).
* @require @is_sortable(list) "The list must be sortable"
* @require $or(@typeid(cmp) == void*.typeid, @is_cmp_fn(cmp, list)) "Expected a comparison function which compares values"
* @require @is_valid_cmp_fn(cmp, list, context) "Expected a comparison function which compares values"
* @require @is_valid_context(cmp, context) "Expected a valid context"
**/
macro usz binarysearch(list, x, cmp = null) @builtin
macro usz binarysearch(list, x, cmp = EMPTY_MACRO_SLOT, context = EMPTY_MACRO_SLOT) @builtin
{
usz i;
usz len = @len_from_list(list);
var $no_cmp = @is_empty_macro_slot(cmp);
var $has_context = @is_valid_macro_slot(context);
for (usz j = len; i < j;)
{
usz half = i + (j - i) / 2;
$if @typeid(cmp) == void*.typeid:
$if $no_cmp:
switch
{
case greater(list[half], x): j = half;
@@ -21,11 +24,20 @@ macro usz binarysearch(list, x, cmp = null) @builtin
default: return half;
}
$else
$switch
$case $typeof(cmp).params[0] == @typeid(list[0]):
$case $defined(cmp(list[0], list[0], context)):
int res = cmp(list[half], x, context);
$case $defined(cmp(list[0], list[0])):
assert(!$has_context);
int res = cmp(list[half], x);
$default:
$case $defined(cmp(&list[0], &list[0], context)):
int res = cmp(&list[half], &x, context);
$case $defined(cmp(&list[0], &list[0])):
assert(!$has_context);
int res = cmp(&list[half], &x);
$default:
assert(false, "Invalid comparison function");
$endswitch
switch
{

View File

@@ -5,104 +5,98 @@ import std::sort::qs;
/**
* Sort list using the counting sort algorithm.
* @require $defined(list[0]) && $defined(list.len) "The list must be indexable and support .len or .len()"
* @require $or(@typeid(key) == void*.typeid, @is_keyer(key, list)) "Expected a transformation function which returns a key-like type, think integral types"
* @require @is_sortable(list) "The list must be indexable and support .len or .len()"
* @require @is_cmp_key_fn(key_fn, list) "Expected a transformation function which returns an unsigned integer."
**/
macro countingsort(list, key = null) @builtin
macro countingsort(list, key_fn = EMPTY_MACRO_SLOT) @builtin
{
var $Type = $typeof(list);
var $KeyFuncType = $typeof(key);
usz len = sort::@len_from_list(list);
cs::csort(<$Type, $KeyFuncType>)(list, 0, len, key, ~((uint)0));
cs::csort(<$typeof(list), $typeof(key_fn)>)(list, 0, len, key_fn, ~((uint)0));
}
macro insertionsort_indexed(list, start, end, cmp = null) @builtin
macro insertionsort_indexed(list, start, end, cmp = EMPTY_MACRO_SLOT, context = EMPTY_MACRO_SLOT) @builtin
{
var $Type = $typeof(list);
var $CmpFuncType = $typeof(cmp);
is::isort(<$Type, $CmpFuncType>)(list, (usz)start, (usz)end, cmp);
is::isort(<$typeof(list), $typeof(cmp), $typeof(context)>)(list, (usz)start, (usz)end, cmp, context);
}
macro quicksort_indexed(list, start, end, cmp = null) @builtin
macro quicksort_indexed(list, start, end, cmp = EMPTY_MACRO_SLOT, context = EMPTY_MACRO_SLOT) @builtin
{
var $Type = $typeof(list);
var $CmpFuncType = $typeof(cmp);
qs::qsort(<$Type, $CmpFuncType>)(list, (isz)start, (isz)(end-1), cmp);
qs::qsort(<$typeof(list), $typeof(cmp), $typeof(context)>)(list, (isz)start, (isz)(end-1), cmp, context);
}
module std::sort::cs(<Type, Keyer>);
module std::sort::cs(<Type, KeyFn>);
def Counts = ulong[256] @private;
def Ranges = ulong[257] @private;
def Indexs = char[256] @private;
def ElementType = $typeof(Type{}[0]);
//def KeyerReturnTypeid = $typeof(Keyer.returns); //Keyer{}(Keyer.params[0]{})
def KeyerReturnType = $evaltype(Keyer.returns.qnameof);
def CmpCallback = fn bool(ElementType*, ElementType*);
fn void csort(Type list, usz low, usz high, Keyer key, uint byte_idx)
const bool NO_KEY_FN @private = KeyFn.typeid == void*.typeid;
const bool KEY_BY_VALUE @private = $or(NO_KEY_FN, $assignable(Type{}[0], $typefrom(KeyFn.params[0])));
const bool LIST_HAS_REF @private = $defined(&Type{}[0]);
def KeyFnReturnType = $typefrom(KeyFn.returns) @if(!NO_KEY_FN);
def KeyFnReturnType = ElementType @if(NO_KEY_FN);
def CmpCallback = fn int(ElementType, ElementType, KeyFn) @if(KEY_BY_VALUE);
def CmpCallback = fn int(ElementType*, ElementType*, KeyFn) @if(!KEY_BY_VALUE);
fn void csort(Type list, usz low, usz high, KeyFn key_fn, uint byte_idx)
{
var $no_key = Keyer.typeid == void*.typeid;
var $key_by_value = $and(!$no_key, Keyer.params[0] == @typeid(list[0]));
var $list_has_get_ref = $defined(list.get_ref(0));
if (high <= low) return;
CmpCallback compare_fn = fn (lhs, rhs, key_fn) {
$switch
$case NO_KEY_FN:
return compare_to(lhs, rhs);
$case KEY_BY_VALUE:
return compare_to(key_fn(lhs), key_fn(rhs));
$default:
return compare_to(key_fn(lhs), key_fn(rhs));
$endswitch;
};
byte_idx = byte_idx >= KeyerReturnType.sizeof ? KeyerReturnType.sizeof-1 : byte_idx;
if (high <= low) {
return;
}
byte_idx = byte_idx >= KeyFnReturnType.sizeof ? KeyFnReturnType.sizeof - 1 : byte_idx;
Counts counts;
Ranges ranges;
Indexs indexs;
KeyerReturnType mn = ~((KeyerReturnType)0);
KeyerReturnType mx = (KeyerReturnType)0;
KeyFnReturnType mn = ~(KeyFnReturnType)0;
KeyFnReturnType mx = 0;
char last_key = 0;
char keys_ordered = 1;
for (usz i = 0; i < 256; i++) {
counts[i] = 0;
}
for (usz i = low; i < high; i++)
{
$switch
$case NO_KEY_FN:
KeyFnReturnType k = list[i];
$case KEY_BY_VALUE:
KeyFnReturnType k = key_fn(list[i]);
$case LIST_HAS_REF:
KeyFnReturnType k = key_fn(&list[i]);
$default:
KeyFnReturnType k = key_fn(&&list[i]);
$endswitch;
for (usz i = low; i < high; i++) {
$if $list_has_get_ref:
$if $key_by_value:
KeyerReturnType k = key(*list.get_ref(i));
$else
KeyerReturnType k = key(list.get_ref(i));
$endif;
$else
$if $key_by_value:
KeyerReturnType k = key(list[i]);
$else
KeyerReturnType k = key(&list[i]);
$endif;
$endif
char key_byte = (char)((k >> (byte_idx*8)) & 0xff);
char key_byte = (char)((k >> (byte_idx * 8)) & 0xff);
++counts[key_byte];
mn = k < mn ? k : mn;
mx = k > mx ? k : mx;
keys_ordered = keys_ordered & (char)(key_byte >= last_key);
last_key = key_byte;
last_key = key_byte;
}
KeyerReturnType diff = mx-mn;
if (diff == 0) {
return;
}
KeyFnReturnType diff = mx - mn;
if (diff == 0) return;
ushort parition_count = 0;
usz total = 0;
for (usz i = 0; i < 256; i++) {
usz count = counts[i];
indexs[parition_count] = (char)i;
foreach (char i, count : counts)
{
indexs[parition_count] = i;
parition_count += (ushort)(count > 0);
counts[i] = total;
ranges[i] = total;
@@ -110,62 +104,55 @@ fn void csort(Type list, usz low, usz high, Keyer key, uint byte_idx)
}
ranges[256] = total;
if (!keys_ordered) {
if (!keys_ordered)
{
usz sorted_count = 0;
//ElementType* first = list.first();
do {
for (usz x = 0; x < 256; x++) {
usz s = counts[x];
do
{
foreach (x, s : counts)
{
usz e = ranges[x + 1];
sorted_count += (e - s);
for (; s < e; s++) {
$if $list_has_get_ref:
$if $key_by_value:
KeyerReturnType k = key(*list.get_ref(low+s));
$else
KeyerReturnType k = key(list.get_ref(low+s));
$endif;
char k_idx = (char)(k >> (byte_idx*8));
usz target_idx = counts[k_idx];
@swap(list[low+s], list[low+target_idx]);
counts[k_idx] += 1;
$else
$if $key_by_value:
KeyerReturnType k = key(list[low+s]);
$else
KeyerReturnType k = key(&list[low+s]);
$endif;
char k_idx = (char)(k >> (byte_idx*8));
usz target_idx = counts[k_idx];
@swap(list[low+s], list[low+target_idx]);
counts[k_idx] += 1;
$endif;
for (; s < e; s++)
{
$switch
$case NO_KEY_FN:
KeyFnReturnType k = list[low + s];
$case KEY_BY_VALUE:
KeyFnReturnType k = key_fn(list[low + s]);
$case LIST_HAS_REF:
KeyFnReturnType k = key_fn(&list[low + s]);
$default:
KeyFnReturnType k = key_fn(&&list[low + s]);
$endswitch;
char k_idx = (char)(k >> (byte_idx * 8));
usz target_idx = counts[k_idx];
@swap(list[low + s], list[low + target_idx]);
counts[k_idx]++;
}
}
} while (sorted_count < ranges[256]);
}
if (byte_idx) {
for (usz p = 0; p < parition_count; ++p) {
if (byte_idx)
{
for (usz p = 0; p < parition_count; p++)
{
usz i = indexs[p];
usz start_offset = ranges[i];
usz end_offset = ranges[i + 1];
usz end_offset = ranges[i + 1];
usz items = end_offset - start_offset;
usz items = end_offset - start_offset;
if (items <= 32) {
insertionsort_indexed(list, low+start_offset, low+end_offset, fn bool (ElementType* lhs, ElementType* rhs) {
return Keyer{}(lhs) < Keyer{}(rhs);
});
} else if (items <= 128) {
quicksort_indexed(list, low+start_offset, low+end_offset, fn long (ElementType* lhs, ElementType* rhs) {
return (long)(Keyer{}(lhs) - Keyer{}(rhs));
});
} else {
csort(list, low+start_offset, low+end_offset, key, byte_idx-1);
switch (items)
{
case 0..32:
insertionsort_indexed(list, low + start_offset, low + end_offset, compare_fn, key_fn);
case 33..128:
quicksort_indexed(list, low + start_offset, low + end_offset, compare_fn, key_fn);
default:
csort(list, low + start_offset, low + end_offset, key_fn, byte_idx - 1);
}
}
}

View File

@@ -4,24 +4,24 @@ import std::sort::is;
/**
* Sort list using the quick sort algorithm.
* @require @is_sortable(list) "The list must be indexable and support .len or .len()"
* @require $or(@typeid(cmp) == void*.typeid, @is_cmp_fn(cmp, list)) "Expected a comparison function which compares values"
* @require @is_valid_cmp_fn(cmp, list, context) "Expected a comparison function which compares values"
**/
macro insertionsort(list, cmp = null) @builtin
macro insertionsort(list, cmp = EMPTY_MACRO_SLOT, context = EMPTY_MACRO_SLOT) @builtin
{
usz len = sort::@len_from_list(list);
is::isort(<$typeof(list), $typeof(cmp)>)(list, 0, (isz)len, cmp);
is::isort(<$typeof(list), $typeof(cmp), $typeof(context)>)(list, 0, (isz)len, cmp, context);
}
module std::sort::is(<Type, Comparer>);
module std::sort::is(<Type, CmpFn, Context>);
def ElementType = $typeof(Type{}[0]);
fn void isort(Type list, usz low, usz high, Comparer comp)
fn void isort(Type list, usz low, usz high, CmpFn comp, Context context)
{
var $no_cmp = Comparer.typeid == void*.typeid;
var $cmp_by_value = $and(!$no_cmp, Comparer.params[0] == @typeid(list[0]));
var $has_cmp = @is_valid_macro_slot(comp);
var $has_context = @is_valid_macro_slot(context);
var $cmp_by_value = $and($has_cmp, $assignable(list[0], $typefrom(CmpFn.params[0])));
var $has_get_ref = $defined(&list[0]);
assert(sort::@is_sortable(list));
for (usz i = low; i < high; ++i)
{
usz j = i;
@@ -31,9 +31,13 @@ fn void isort(Type list, usz low, usz high, Comparer comp)
ElementType *rhs = &list[j];
ElementType *lhs = &list[--j];
$switch
$case $cmp_by_value && $has_context:
if (comp(*rhs, *lhs, context) >= 0) break;
$case $cmp_by_value:
if (comp(*rhs, *lhs) >= 0) break;
$case !$no_cmp:
$case $has_cmp && $has_context:
if (comp(rhs, lhs, context) >= 0) break;
$case $has_cmp:
if (comp(rhs, lhs) >= 0) break;
$default:
if (!less(*rhs, *lhs)) break;
@@ -42,11 +46,14 @@ fn void isort(Type list, usz low, usz high, Comparer comp)
$else
usz r = j;
--j;
$switch
$case $cmp_by_value && $has_context:
if (comp(list[r], list[j], context) >= 0) break;
$case $cmp_by_value:
if (comp(list[r], list[j]) >= 0) break;
$case !$no_cmp:
$case $has_cmp && $has_context:
if (comp(&list[r], &list[j], context) >= 0) break;
$case $has_cmp:
if (comp(&list[r], &list[j]) >= 0) break;
$default:
if (!less(list[r], list[j])) break;

View File

@@ -4,15 +4,16 @@ import std::sort::qs;
/**
* Sort list using the quick sort algorithm.
* @require @is_sortable(list) "The list must be indexable and support .len or .len()"
* @require $or(@typeid(cmp) == void*.typeid, @is_cmp_fn(cmp, list)) "Expected a comparison function which compares values"
* @require @is_valid_cmp_fn(cmp, list, context) "Expected a comparison function which compares values"
* @require @is_valid_context(cmp, context) "Expected a valid context"
**/
macro quicksort(list, cmp = null) @builtin
macro quicksort(list, cmp = EMPTY_MACRO_SLOT, context = EMPTY_MACRO_SLOT) @builtin
{
usz len = sort::@len_from_list(list);
qs::qsort(<$typeof(list), $typeof(cmp)>)(list, 0, (isz)len - 1, cmp);
qs::qsort(<$typeof(list), $typeof(cmp), $typeof(context)>)(list, 0, (isz)len - 1, cmp, context);
}
module std::sort::qs(<Type, Comparer>);
module std::sort::qs(<Type, CmpFn, Context>);
def ElementType = $typeof(Type{}[0]);
@@ -26,10 +27,11 @@ def Stack = StackElementItem[64] @private;
// Based on https://alienryderflex.com/quicksort by Darel Rex Finley, Public Domain.
fn void qsort(Type list, isz low, isz high, Comparer cmp)
fn void qsort(Type list, isz low, isz high, CmpFn cmp, Context context)
{
var $no_cmp = Comparer.typeid == void*.typeid;
var $cmp_by_value = $and(!$no_cmp, Comparer.params[0] == @typeid(list[0]));
var $has_cmp = @is_valid_macro_slot(cmp);
var $has_context = @is_valid_macro_slot(context);
var $cmp_by_value = $and($has_cmp, $assignable(list[0], $typefrom(CmpFn.params[0])));
if (low >= 0 && high >= 0 && low < high)
{
Stack stack;
@@ -49,20 +51,25 @@ fn void qsort(Type list, isz low, isz high, Comparer cmp)
while (l < h)
{
$switch
$case $cmp_by_value && $has_context:
while (cmp(list[h], pivot, context) >= 0 && l < h) h--;
if (l < h) list[l++] = list[h];
while (cmp(list[l], pivot, context) <= 0 && l < h) l++;
$case $cmp_by_value:
while (cmp(list[h], pivot) >= 0 && l < h) h--;
$case !$no_cmp:
while (cmp(&list[h], &pivot) >= 0 && l < h) h--;
$default:
while (greater_eq(list[h], pivot) && l < h) h--;
$endswitch
if (l < h) list[l++] = list[h];
$switch
$case $cmp_by_value:
if (l < h) list[l++] = list[h];
while (cmp(list[l], pivot) <= 0 && l < h) l++;
$case !$no_cmp:
$case $has_cmp && $has_context:
while (cmp(&list[h], &pivot, context) >= 0 && l < h) h--;
if (l < h) list[l++] = list[h];
while (cmp(&list[l], &pivot, context) <= 0 && l < h) l++;
$case $has_cmp:
while (cmp(&list[h], &pivot) >= 0 && l < h) h--;
if (l < h) list[l++] = list[h];
while (cmp(&list[l], &pivot) <= 0 && l < h) l++;
$default:
while (greater_eq(list[h], pivot) && l < h) h--;
if (l < h) list[l++] = list[h];
while (less_eq(list[l], pivot) && l < h) l++;
$endswitch
if (l < h) list[h--] = list[l];

View File

@@ -1,6 +1,5 @@
module std::sort;
macro usz @len_from_list(&list)
{
$if $defined(list.len()):
@@ -24,13 +23,22 @@ macro bool @is_sortable(#list)
$endswitch;
}
macro bool @is_cmp_fn(#cmp, #list)
macro bool @is_valid_context(#cmp, #context)
{
return @is_valid_macro_slot(#cmp) || @is_empty_macro_slot(#context);
}
macro bool @is_valid_cmp_fn(#cmp, #list, #context)
{
var $Type = $typeof(#cmp);
var $no_context = @is_empty_macro_slot(#context);
$switch
$case @is_empty_macro_slot(#cmp): return true;
$case $or($Type.kindof != FUNC, $Type.returns.kindof != SIGNED_INT): return false;
$case $defined(#cmp(#list[0], #list[0])): return true;
$case $defined(#cmp(&&(#list[0]), &&(#list[0]))): return true;
$case $defined(#cmp(#list[0], #list[0], #context)): return true;
$case $defined(#cmp(#list[0], #list[0])): return $no_context;
$case $defined(#cmp(&#list[0], &#list[0], #context)): return true;
$case $defined(#cmp(&#list[0], &#list[0])): return $no_context;
$default: return false;
$endswitch
}
@@ -38,6 +46,7 @@ macro bool @is_cmp_fn(#cmp, #list)
macro bool @is_cmp_key_fn(#key_fn, #list)
{
$switch
$case @is_empty_macro_slot(#key_fn): return true;
$case $typeof(#key_fn).kindof != FUNC: return false;
$case $typeof(#key_fn).returns.kindof != UNSIGNED_INT: return false;
$case $defined(#key_fn(#list[0])): return true;

View File

@@ -107,6 +107,8 @@
- Path normalization - fix possible null terminator out of bounds.
- Add 'zstr' variants for `string::new_format` / `string::tformat`.
- Fix mutex and wait signatures for Win32.
- Updated sorting API.
- Insertion sort and counting sort added.
## 0.5.5 Change list

View File

@@ -7931,7 +7931,7 @@ static inline bool sema_expr_analyse_lambda(SemaContext *context, Type *target_t
decl->resolve_status = RESOLVE_DONE;
return true;
FAIL_NO_INFER:
SEMA_ERROR(decl, "Inferred lambda expressions cannot be used unless the type can be determined.");
SEMA_ERROR(expr, "Inferred lambda expressions cannot be used unless the type can be determined.");
return false;
}