From a000ae560acd5a4742683a2f053b1d8f0e0d520c Mon Sep 17 00:00:00 2001 From: Christoffer Lerno Date: Thu, 9 Oct 2025 22:13:59 +0200 Subject: [PATCH] Add new builtins `$$str_snakecase` `$$str_replace` and `$$str_pascalcase`. Added `@str_snakecase`, `@str_replace` and `@str_pascalcase` builtin compile time macros based on the `$$` builtins. --- lib/std/core/builtin.c3 | 21 ++++ releasenotes.md | 2 + src/compiler/enums.h | 9 +- src/compiler/llvm_codegen_builtins.c | 3 + src/compiler/sema_builtins.c | 125 ++++++++++++++++++- src/compiler/sema_expr.c | 1 + src/compiler/symtab.c | 3 + test/test_suite/builtins/string_builtins.c3t | 29 +++++ 8 files changed, 187 insertions(+), 6 deletions(-) create mode 100644 test/test_suite/builtins/string_builtins.c3t diff --git a/lib/std/core/builtin.c3 b/lib/std/core/builtin.c3 index dc0d430c3..8c6e967bd 100644 --- a/lib/std/core/builtin.c3 +++ b/lib/std/core/builtin.c3 @@ -547,6 +547,27 @@ macro isz @str_find(String $string, String $needle) @builtin => $$str_find($stri macro String @str_upper(String $str) @builtin => $$str_upper($str); macro String @str_lower(String $str) @builtin => $$str_lower($str); macro uint @str_hash(String $str) @builtin => $$str_hash($str); +macro String @str_pascalcase(String $str) @builtin => $$str_pascalcase($str); +macro String @str_snakecase(String $str) @builtin => $$str_snakecase($str); +macro String @str_camelcase(String $str) @builtin => @str_capitalize($$str_pascalcase($str)); +macro String @str_constantcase(String $str) @builtin => @str_upper($$str_snakecase($str)); +macro String @str_replace(String $str, String $pattern, String $replace, uint $limit = 0) @builtin => $$str_replace($str, $pattern, $replace, $limit); +macro String @str_capitalize(String $str) @builtin +{ + $switch $str.len: + $case 0: return $str; + $case 1: return $$str_upper($str); + $default: return $$str_upper($str[0:1]) +++ $str[1..]; + $endswitch +} +macro String @str_uncapitalize(String $str) @builtin +{ + $switch $str.len: + $case 0: return $str; + $case 1: return $$str_lower($str); + $default: return $$str_lower($str[0:1]) +++ $str[1..]; + $endswitch +} macro @generic_hash_core(h, value) { diff --git a/releasenotes.md b/releasenotes.md index 24011fb10..1e2b6dd86 100644 --- a/releasenotes.md +++ b/releasenotes.md @@ -5,6 +5,7 @@ ### Changes / improvements - Error when using $vaarg/$vacount/$vasplat and similar in a macro without vaargs #2510. - Add splat defaults for designated initialization #2441. +- Add new builtins `$$str_snakecase` `$$str_replace` and `$$str_pascalcase`. ### Fixes - Bug in `io::write_using_write_byte`. @@ -18,6 +19,7 @@ ### Stdlib changes - Sorting functions correctly took slices by value, but also other types by value. Now, only slices are accepted by value, other containers are always by ref. +- Added `@str_snakecase`, `@str_replace` and `@str_pascalcase` builtin compile time macros based on the `$$` builtins. ## 0.7.6 Change list diff --git a/src/compiler/enums.h b/src/compiler/enums.h index ff43c35b9..9a572b20a 100644 --- a/src/compiler/enums.h +++ b/src/compiler/enums.h @@ -507,10 +507,13 @@ typedef enum BUILTIN_SELECT, BUILTIN_SET_ROUNDING_MODE, BUILTIN_SPRINTF, - BUILTIN_STR_HASH, - BUILTIN_STR_UPPER, - BUILTIN_STR_LOWER, BUILTIN_STR_FIND, + BUILTIN_STR_HASH, + BUILTIN_STR_LOWER, + BUILTIN_STR_PASCALCASE, + BUILTIN_STR_REPLACE, + BUILTIN_STR_UPPER, + BUILTIN_STR_SNAKECASE, BUILTIN_SWIZZLE, BUILTIN_SWIZZLE2, BUILTIN_SIN, diff --git a/src/compiler/llvm_codegen_builtins.c b/src/compiler/llvm_codegen_builtins.c index 4c8d2a92f..e49eb52e4 100644 --- a/src/compiler/llvm_codegen_builtins.c +++ b/src/compiler/llvm_codegen_builtins.c @@ -1101,6 +1101,9 @@ void llvm_emit_builtin_call(GenContext *c, BEValue *result_value, Expr *expr) case BUILTIN_STR_LOWER: case BUILTIN_STR_UPPER: case BUILTIN_STR_FIND: + case BUILTIN_STR_REPLACE: + case BUILTIN_STR_SNAKECASE: + case BUILTIN_STR_PASCALCASE: case BUILTIN_WIDESTRING_16: case BUILTIN_WIDESTRING_32: case BUILTIN_RND: diff --git a/src/compiler/sema_builtins.c b/src/compiler/sema_builtins.c index 6fa062c09..fd16c1ea3 100644 --- a/src/compiler/sema_builtins.c +++ b/src/compiler/sema_builtins.c @@ -2,8 +2,8 @@ // Use of this source code is governed by a LGPLv3.0 // a copy of which can be found in the LICENSE file. #include - #include "sema_internal.h" +#include typedef enum @@ -305,7 +305,60 @@ bool sema_expr_analyse_rnd(SemaContext *context UNUSED, Expr *expr) return true; } -bool sema_expr_analyse_str_hash(SemaContext *context, Expr *expr) +static bool sema_expr_analyse_str_replace(SemaContext *context, Expr *expr, Expr *arg, Expr *pattern, Expr *replace, Expr *limit) +{ + if (!sema_analyse_expr_rvalue(context, arg)) return false; + if (!sema_cast_const(arg) || !expr_is_const_string(arg)) + { + RETURN_SEMA_ERROR(arg, "Expected a constant string replace a pattern in."); + } + if (!sema_analyse_expr_rvalue(context, pattern)) return false; + if (!sema_cast_const(pattern) || !expr_is_const_string(pattern)) + { + RETURN_SEMA_ERROR(pattern, "Expected a constant pattern to replace."); + } + if (!sema_analyse_expr_rvalue(context, replace)) return false; + if (!sema_cast_const(replace) || !expr_is_const_string(replace)) + { + RETURN_SEMA_ERROR(replace, "Expected a constant replacement string."); + } + if (!sema_analyse_expr_rvalue(context, limit)) return false; + if (!sema_cast_const(limit) || !expr_is_const_int(limit)) + { + RETURN_SEMA_ERROR(limit, "Expected a constant limit."); + } + const char *inner_str = arg->const_expr.bytes.ptr; + ArraySize len = arg->const_expr.bytes.len; + const char *pattern_str = pattern->const_expr.bytes.ptr; + ArraySize pattern_len = pattern->const_expr.bytes.len; + const char *replace_str = replace->const_expr.bytes.ptr; + ArraySize limit_int = int_ucomp(limit->const_expr.ixx, MAX_ARRAY_SIZE, BINARYOP_GT) ? 0 : limit->const_expr.ixx.i.low; + scratch_buffer_clear(); + ArrayIndex index = 0; + if (limit_int == 0) limit_int = UINT64_MAX; + while (index < len) + { + const char *end = strstr(inner_str + index, pattern_str); + if (end == NULL) + { + scratch_buffer_append(inner_str + index); + break; + } + scratch_buffer_append_len(inner_str + index, end - inner_str - index); + scratch_buffer_append(replace_str); + index = end - inner_str + pattern_len; + limit_int--; + if (limit_int == 0) + { + scratch_buffer_append(inner_str + index); + break; + } + } + expr_rewrite_const_string(expr, scratch_buffer_copy()); + return true; +} + +static bool sema_expr_analyse_str_hash(SemaContext *context, Expr *expr) { Expr *inner = expr->call_expr.arguments[0]; if (!sema_analyse_expr_rvalue(context, inner)) return true; @@ -354,7 +407,20 @@ bool sema_expr_analyse_str_conv(SemaContext *context, Expr *expr, BuiltinFunctio expr_replace(expr, inner); return true; } - char *new_string = malloc_string(len + 1); + char *new_string; + if (func == BUILTIN_STR_SNAKECASE) + { + int uppers = 0; + for (ArrayIndex i = 0; i < len; i++) + { + if (isupper(string[i])) uppers++; + } + new_string = malloc_string(len + 1 + uppers); + } + else + { + new_string = malloc_string(len + 1); + } switch (func) { case BUILTIN_STR_LOWER: @@ -364,6 +430,49 @@ bool sema_expr_analyse_str_conv(SemaContext *context, Expr *expr, BuiltinFunctio new_string[i] = (char)(char_is_upper(c) ? (c | 0x20) : c); } break; + case BUILTIN_STR_SNAKECASE: + { + size_t index = 0; + for (ArraySize i = 0; i < len; i++) + { + char c = string[i]; + if (isupper(c)) + { + if (i > 0 && ((islower(string[i - 1]) || isdigit(string[i - 1])) || (i < len - 1 && islower(string[i + 1])))) + { + new_string[index++] = '_'; + } + new_string[index++] = tolower(c); + continue; + } + new_string[index++] = c; + } + len = index; + break; + } + case BUILTIN_STR_PASCALCASE: + { + bool capitalize = true; + size_t j = 0; + for (ArraySize i = 0; i < len; i++) + { + char c = string[i]; + if (!isalpha(c)) + { + capitalize = true; + continue; + } + if (capitalize) + { + new_string[j++] = toupper(c); + capitalize = false; + continue; + } + new_string[j++] = tolower(c); + } + len = j; + break; + } case BUILTIN_STR_UPPER: for (ArraySize i = 0; i < len; i++) { @@ -538,8 +647,12 @@ bool sema_expr_analyse_builtin_call(SemaContext *context, Expr *expr) return sema_expr_analyse_rnd(context, expr); case BUILTIN_STR_HASH: return sema_expr_analyse_str_hash(context, expr); + case BUILTIN_STR_REPLACE: + return sema_expr_analyse_str_replace(context, expr, args[0], args[1], args[2], args[3]); case BUILTIN_STR_UPPER: case BUILTIN_STR_LOWER: + case BUILTIN_STR_PASCALCASE: + case BUILTIN_STR_SNAKECASE: return sema_expr_analyse_str_conv(context, expr, func); case BUILTIN_STR_FIND: return sema_expr_analyse_str_find(context, expr); @@ -592,7 +705,10 @@ bool sema_expr_analyse_builtin_call(SemaContext *context, Expr *expr) case BUILTIN_STR_HASH: case BUILTIN_STR_UPPER: case BUILTIN_STR_LOWER: + case BUILTIN_STR_PASCALCASE: + case BUILTIN_STR_SNAKECASE: case BUILTIN_STR_FIND: + case BUILTIN_STR_REPLACE: case BUILTIN_WIDESTRING_16: case BUILTIN_WIDESTRING_32: case BUILTIN_SPRINTF: @@ -1274,6 +1390,8 @@ static inline int builtin_expected_args(BuiltinFunction func) case BUILTIN_STR_HASH: case BUILTIN_STR_UPPER: case BUILTIN_STR_LOWER: + case BUILTIN_STR_SNAKECASE: + case BUILTIN_STR_PASCALCASE: case BUILTIN_TRUNC: case BUILTIN_VOLATILE_LOAD: case BUILTIN_WASM_MEMORY_SIZE: @@ -1326,6 +1444,7 @@ static inline int builtin_expected_args(BuiltinFunction func) case BUILTIN_MASKED_LOAD: case BUILTIN_GATHER: case BUILTIN_SCATTER: + case BUILTIN_STR_REPLACE: return 4; case BUILTIN_ATOMIC_FETCH_EXCHANGE: case BUILTIN_ATOMIC_FETCH_ADD: diff --git a/src/compiler/sema_expr.c b/src/compiler/sema_expr.c index c9a92ac98..1009b22f6 100644 --- a/src/compiler/sema_expr.c +++ b/src/compiler/sema_expr.c @@ -608,6 +608,7 @@ bool sema_expr_analyse_sprintf(SemaContext *context, Expr *expr, Expr *format_st return true; } + static bool sema_binary_is_expr_lvalue(SemaContext *context, Expr *top_expr, Expr *expr, bool *failed_ref) { if (expr->expr_kind == EXPR_CT_SUBSCRIPT) return true; diff --git a/src/compiler/symtab.c b/src/compiler/symtab.c index aa13abca3..936a882fa 100644 --- a/src/compiler/symtab.c +++ b/src/compiler/symtab.c @@ -280,6 +280,9 @@ void symtab_init(uint32_t capacity) builtin_list[BUILTIN_STR_UPPER] = KW_DEF("str_upper"); builtin_list[BUILTIN_STR_LOWER] = KW_DEF("str_lower"); builtin_list[BUILTIN_STR_FIND] = KW_DEF("str_find"); + builtin_list[BUILTIN_STR_PASCALCASE] = KW_DEF("str_pascalcase"); + builtin_list[BUILTIN_STR_SNAKECASE] = KW_DEF("str_snakecase"); + builtin_list[BUILTIN_STR_REPLACE] = KW_DEF("str_replace"); builtin_list[BUILTIN_SWIZZLE] = KW_DEF("swizzle"); builtin_list[BUILTIN_SWIZZLE2] = KW_DEF("swizzle2"); builtin_list[BUILTIN_SPRINTF] = KW_DEF("sprintf"); diff --git a/test/test_suite/builtins/string_builtins.c3t b/test/test_suite/builtins/string_builtins.c3t new file mode 100644 index 000000000..4cdb1e19c --- /dev/null +++ b/test/test_suite/builtins/string_builtins.c3t @@ -0,0 +1,29 @@ +// #target: macos-x64 +module test; +import std; + +macro void @test($a) +{ + io::printn(@str_snakecase($a)); + io::printn(@str_pascalcase($a)); +} +fn void main() +{ + @test("abc deF"); + @test("abc_def"); + @test("AbcDef"); + @test("parseHTTPResponse"); + io::printn(@str_replace("hello dear! No fear, you are hear!", "ea", "ee", 1)); +} + +/* #expect: test.ll + +@.str = private unnamed_addr constant [9 x i8] c"abc de_f\00", align 1 +@.str.1 = private unnamed_addr constant [7 x i8] c"AbcDef\00", align 1 +@.str.2 = private unnamed_addr constant [8 x i8] c"abc_def\00", align 1 +@.str.3 = private unnamed_addr constant [7 x i8] c"AbcDef\00", align 1 +@.str.4 = private unnamed_addr constant [8 x i8] c"abc_def\00", align 1 +@.str.5 = private unnamed_addr constant [7 x i8] c"Abcdef\00", align 1 +@.str.6 = private unnamed_addr constant [20 x i8] c"parse_http_response\00", align 1 +@.str.7 = private unnamed_addr constant [18 x i8] c"Parsehttpresponse\00", align 1 +@.str.8 = private unnamed_addr constant [35 x i8] c"hello deer! No fear, you are hear!\00", align 1