Add new builtins $$str_snakecase $$str_replace and $$str_pascalcase.

Added `@str_snakecase`, `@str_replace` and `@str_pascalcase` builtin compile time macros based on the `$$` builtins.
This commit is contained in:
Christoffer Lerno
2025-10-09 22:13:59 +02:00
parent 0d85caf21c
commit a000ae560a
8 changed files with 187 additions and 6 deletions

View File

@@ -547,6 +547,27 @@ macro isz @str_find(String $string, String $needle) @builtin => $$str_find($stri
macro String @str_upper(String $str) @builtin => $$str_upper($str);
macro String @str_lower(String $str) @builtin => $$str_lower($str);
macro uint @str_hash(String $str) @builtin => $$str_hash($str);
macro String @str_pascalcase(String $str) @builtin => $$str_pascalcase($str);
macro String @str_snakecase(String $str) @builtin => $$str_snakecase($str);
macro String @str_camelcase(String $str) @builtin => @str_capitalize($$str_pascalcase($str));
macro String @str_constantcase(String $str) @builtin => @str_upper($$str_snakecase($str));
macro String @str_replace(String $str, String $pattern, String $replace, uint $limit = 0) @builtin => $$str_replace($str, $pattern, $replace, $limit);
macro String @str_capitalize(String $str) @builtin
{
$switch $str.len:
$case 0: return $str;
$case 1: return $$str_upper($str);
$default: return $$str_upper($str[0:1]) +++ $str[1..];
$endswitch
}
macro String @str_uncapitalize(String $str) @builtin
{
$switch $str.len:
$case 0: return $str;
$case 1: return $$str_lower($str);
$default: return $$str_lower($str[0:1]) +++ $str[1..];
$endswitch
}
macro @generic_hash_core(h, value)
{

View File

@@ -5,6 +5,7 @@
### Changes / improvements
- Error when using $vaarg/$vacount/$vasplat and similar in a macro without vaargs #2510.
- Add splat defaults for designated initialization #2441.
- Add new builtins `$$str_snakecase` `$$str_replace` and `$$str_pascalcase`.
### Fixes
- Bug in `io::write_using_write_byte`.
@@ -18,6 +19,7 @@
### Stdlib changes
- Sorting functions correctly took slices by value, but also other types by value. Now, only slices are accepted by value, other containers are always by ref.
- Added `@str_snakecase`, `@str_replace` and `@str_pascalcase` builtin compile time macros based on the `$$` builtins.
## 0.7.6 Change list

View File

@@ -507,10 +507,13 @@ typedef enum
BUILTIN_SELECT,
BUILTIN_SET_ROUNDING_MODE,
BUILTIN_SPRINTF,
BUILTIN_STR_HASH,
BUILTIN_STR_UPPER,
BUILTIN_STR_LOWER,
BUILTIN_STR_FIND,
BUILTIN_STR_HASH,
BUILTIN_STR_LOWER,
BUILTIN_STR_PASCALCASE,
BUILTIN_STR_REPLACE,
BUILTIN_STR_UPPER,
BUILTIN_STR_SNAKECASE,
BUILTIN_SWIZZLE,
BUILTIN_SWIZZLE2,
BUILTIN_SIN,

View File

@@ -1101,6 +1101,9 @@ void llvm_emit_builtin_call(GenContext *c, BEValue *result_value, Expr *expr)
case BUILTIN_STR_LOWER:
case BUILTIN_STR_UPPER:
case BUILTIN_STR_FIND:
case BUILTIN_STR_REPLACE:
case BUILTIN_STR_SNAKECASE:
case BUILTIN_STR_PASCALCASE:
case BUILTIN_WIDESTRING_16:
case BUILTIN_WIDESTRING_32:
case BUILTIN_RND:

View File

@@ -2,8 +2,8 @@
// Use of this source code is governed by a LGPLv3.0
// a copy of which can be found in the LICENSE file.
#include <math.h>
#include "sema_internal.h"
#include <ctype.h>
typedef enum
@@ -305,7 +305,60 @@ bool sema_expr_analyse_rnd(SemaContext *context UNUSED, Expr *expr)
return true;
}
bool sema_expr_analyse_str_hash(SemaContext *context, Expr *expr)
static bool sema_expr_analyse_str_replace(SemaContext *context, Expr *expr, Expr *arg, Expr *pattern, Expr *replace, Expr *limit)
{
if (!sema_analyse_expr_rvalue(context, arg)) return false;
if (!sema_cast_const(arg) || !expr_is_const_string(arg))
{
RETURN_SEMA_ERROR(arg, "Expected a constant string replace a pattern in.");
}
if (!sema_analyse_expr_rvalue(context, pattern)) return false;
if (!sema_cast_const(pattern) || !expr_is_const_string(pattern))
{
RETURN_SEMA_ERROR(pattern, "Expected a constant pattern to replace.");
}
if (!sema_analyse_expr_rvalue(context, replace)) return false;
if (!sema_cast_const(replace) || !expr_is_const_string(replace))
{
RETURN_SEMA_ERROR(replace, "Expected a constant replacement string.");
}
if (!sema_analyse_expr_rvalue(context, limit)) return false;
if (!sema_cast_const(limit) || !expr_is_const_int(limit))
{
RETURN_SEMA_ERROR(limit, "Expected a constant limit.");
}
const char *inner_str = arg->const_expr.bytes.ptr;
ArraySize len = arg->const_expr.bytes.len;
const char *pattern_str = pattern->const_expr.bytes.ptr;
ArraySize pattern_len = pattern->const_expr.bytes.len;
const char *replace_str = replace->const_expr.bytes.ptr;
ArraySize limit_int = int_ucomp(limit->const_expr.ixx, MAX_ARRAY_SIZE, BINARYOP_GT) ? 0 : limit->const_expr.ixx.i.low;
scratch_buffer_clear();
ArrayIndex index = 0;
if (limit_int == 0) limit_int = UINT64_MAX;
while (index < len)
{
const char *end = strstr(inner_str + index, pattern_str);
if (end == NULL)
{
scratch_buffer_append(inner_str + index);
break;
}
scratch_buffer_append_len(inner_str + index, end - inner_str - index);
scratch_buffer_append(replace_str);
index = end - inner_str + pattern_len;
limit_int--;
if (limit_int == 0)
{
scratch_buffer_append(inner_str + index);
break;
}
}
expr_rewrite_const_string(expr, scratch_buffer_copy());
return true;
}
static bool sema_expr_analyse_str_hash(SemaContext *context, Expr *expr)
{
Expr *inner = expr->call_expr.arguments[0];
if (!sema_analyse_expr_rvalue(context, inner)) return true;
@@ -354,7 +407,20 @@ bool sema_expr_analyse_str_conv(SemaContext *context, Expr *expr, BuiltinFunctio
expr_replace(expr, inner);
return true;
}
char *new_string = malloc_string(len + 1);
char *new_string;
if (func == BUILTIN_STR_SNAKECASE)
{
int uppers = 0;
for (ArrayIndex i = 0; i < len; i++)
{
if (isupper(string[i])) uppers++;
}
new_string = malloc_string(len + 1 + uppers);
}
else
{
new_string = malloc_string(len + 1);
}
switch (func)
{
case BUILTIN_STR_LOWER:
@@ -364,6 +430,49 @@ bool sema_expr_analyse_str_conv(SemaContext *context, Expr *expr, BuiltinFunctio
new_string[i] = (char)(char_is_upper(c) ? (c | 0x20) : c);
}
break;
case BUILTIN_STR_SNAKECASE:
{
size_t index = 0;
for (ArraySize i = 0; i < len; i++)
{
char c = string[i];
if (isupper(c))
{
if (i > 0 && ((islower(string[i - 1]) || isdigit(string[i - 1])) || (i < len - 1 && islower(string[i + 1]))))
{
new_string[index++] = '_';
}
new_string[index++] = tolower(c);
continue;
}
new_string[index++] = c;
}
len = index;
break;
}
case BUILTIN_STR_PASCALCASE:
{
bool capitalize = true;
size_t j = 0;
for (ArraySize i = 0; i < len; i++)
{
char c = string[i];
if (!isalpha(c))
{
capitalize = true;
continue;
}
if (capitalize)
{
new_string[j++] = toupper(c);
capitalize = false;
continue;
}
new_string[j++] = tolower(c);
}
len = j;
break;
}
case BUILTIN_STR_UPPER:
for (ArraySize i = 0; i < len; i++)
{
@@ -538,8 +647,12 @@ bool sema_expr_analyse_builtin_call(SemaContext *context, Expr *expr)
return sema_expr_analyse_rnd(context, expr);
case BUILTIN_STR_HASH:
return sema_expr_analyse_str_hash(context, expr);
case BUILTIN_STR_REPLACE:
return sema_expr_analyse_str_replace(context, expr, args[0], args[1], args[2], args[3]);
case BUILTIN_STR_UPPER:
case BUILTIN_STR_LOWER:
case BUILTIN_STR_PASCALCASE:
case BUILTIN_STR_SNAKECASE:
return sema_expr_analyse_str_conv(context, expr, func);
case BUILTIN_STR_FIND:
return sema_expr_analyse_str_find(context, expr);
@@ -592,7 +705,10 @@ bool sema_expr_analyse_builtin_call(SemaContext *context, Expr *expr)
case BUILTIN_STR_HASH:
case BUILTIN_STR_UPPER:
case BUILTIN_STR_LOWER:
case BUILTIN_STR_PASCALCASE:
case BUILTIN_STR_SNAKECASE:
case BUILTIN_STR_FIND:
case BUILTIN_STR_REPLACE:
case BUILTIN_WIDESTRING_16:
case BUILTIN_WIDESTRING_32:
case BUILTIN_SPRINTF:
@@ -1274,6 +1390,8 @@ static inline int builtin_expected_args(BuiltinFunction func)
case BUILTIN_STR_HASH:
case BUILTIN_STR_UPPER:
case BUILTIN_STR_LOWER:
case BUILTIN_STR_SNAKECASE:
case BUILTIN_STR_PASCALCASE:
case BUILTIN_TRUNC:
case BUILTIN_VOLATILE_LOAD:
case BUILTIN_WASM_MEMORY_SIZE:
@@ -1326,6 +1444,7 @@ static inline int builtin_expected_args(BuiltinFunction func)
case BUILTIN_MASKED_LOAD:
case BUILTIN_GATHER:
case BUILTIN_SCATTER:
case BUILTIN_STR_REPLACE:
return 4;
case BUILTIN_ATOMIC_FETCH_EXCHANGE:
case BUILTIN_ATOMIC_FETCH_ADD:

View File

@@ -608,6 +608,7 @@ bool sema_expr_analyse_sprintf(SemaContext *context, Expr *expr, Expr *format_st
return true;
}
static bool sema_binary_is_expr_lvalue(SemaContext *context, Expr *top_expr, Expr *expr, bool *failed_ref)
{
if (expr->expr_kind == EXPR_CT_SUBSCRIPT) return true;

View File

@@ -280,6 +280,9 @@ void symtab_init(uint32_t capacity)
builtin_list[BUILTIN_STR_UPPER] = KW_DEF("str_upper");
builtin_list[BUILTIN_STR_LOWER] = KW_DEF("str_lower");
builtin_list[BUILTIN_STR_FIND] = KW_DEF("str_find");
builtin_list[BUILTIN_STR_PASCALCASE] = KW_DEF("str_pascalcase");
builtin_list[BUILTIN_STR_SNAKECASE] = KW_DEF("str_snakecase");
builtin_list[BUILTIN_STR_REPLACE] = KW_DEF("str_replace");
builtin_list[BUILTIN_SWIZZLE] = KW_DEF("swizzle");
builtin_list[BUILTIN_SWIZZLE2] = KW_DEF("swizzle2");
builtin_list[BUILTIN_SPRINTF] = KW_DEF("sprintf");

View File

@@ -0,0 +1,29 @@
// #target: macos-x64
module test;
import std;
macro void @test($a)
{
io::printn(@str_snakecase($a));
io::printn(@str_pascalcase($a));
}
fn void main()
{
@test("abc deF");
@test("abc_def");
@test("AbcDef");
@test("parseHTTPResponse");
io::printn(@str_replace("hello dear! No fear, you are hear!", "ea", "ee", 1));
}
/* #expect: test.ll
@.str = private unnamed_addr constant [9 x i8] c"abc de_f\00", align 1
@.str.1 = private unnamed_addr constant [7 x i8] c"AbcDef\00", align 1
@.str.2 = private unnamed_addr constant [8 x i8] c"abc_def\00", align 1
@.str.3 = private unnamed_addr constant [7 x i8] c"AbcDef\00", align 1
@.str.4 = private unnamed_addr constant [8 x i8] c"abc_def\00", align 1
@.str.5 = private unnamed_addr constant [7 x i8] c"Abcdef\00", align 1
@.str.6 = private unnamed_addr constant [20 x i8] c"parse_http_response\00", align 1
@.str.7 = private unnamed_addr constant [18 x i8] c"Parsehttpresponse\00", align 1
@.str.8 = private unnamed_addr constant [35 x i8] c"hello deer! No fear, you are hear!\00", align 1