From f2daf2e11e2975d21b87b71630c671f2fa5dc981 Mon Sep 17 00:00:00 2001 From: Christoffer Lerno Date: Fri, 6 Jun 2025 03:18:28 +0200 Subject: [PATCH] `@sprintf` macro (based on the `$$sprintf` builtin) allows compile time format strings #1874. --- lib/std/core/string.c3 | 4 ++ releasenotes.md | 1 + src/compiler/compiler_internal.h | 1 + src/compiler/enums.h | 9 ++- src/compiler/lexer.c | 11 +++- src/compiler/llvm_codegen_builtins.c | 1 + src/compiler/number.c | 63 ++++++++++++++++++++- src/compiler/parse_expr.c | 21 ++++--- src/compiler/parse_global.c | 6 -- src/compiler/parse_stmt.c | 2 - src/compiler/parser_internal.h | 1 - src/compiler/sema_builtins.c | 57 ++++++++++++++++++- src/compiler/sema_decls.c | 2 +- src/compiler/sema_stmts.c | 39 +------------ src/compiler/symtab.c | 1 + src/compiler/tokens.c | 4 -- test/test_suite/compile_time/ct_sprintf.c3t | 14 +++++ 17 files changed, 168 insertions(+), 69 deletions(-) create mode 100644 test/test_suite/compile_time/ct_sprintf.c3t diff --git a/lib/std/core/string.c3 b/lib/std/core/string.c3 index fd2e5f2d8..89dfd0bc7 100644 --- a/lib/std/core/string.c3 +++ b/lib/std/core/string.c3 @@ -78,6 +78,10 @@ macro Char16[] @char16(String $string) @builtin return $$wstr16($string)[..^2]; } +macro String @sprintf(String $format, ...) @builtin @const +{ + return $$sprintf($format, $vasplat); +} <* Return a temporary ZString created using the formatting function. diff --git a/releasenotes.md b/releasenotes.md index 84ac1b888..816ab5127 100644 --- a/releasenotes.md +++ b/releasenotes.md @@ -13,6 +13,7 @@ - Allow inference across `&&` #2172. - Added support for custom file extensions in project.json targets. - `$eval` now also works with `@foo`, `#foo`, `$Foo` and `$foo` parameters #2114. +- `@sprintf` macro (based on the `$$sprintf` builtin) allows compile time format strings #1874. ### Fixes - `-2147483648`, MIN literals work correctly. diff --git a/src/compiler/compiler_internal.h b/src/compiler/compiler_internal.h index 81fa8c16f..3da90478f 100644 --- a/src/compiler/compiler_internal.h +++ b/src/compiler/compiler_internal.h @@ -2273,6 +2273,7 @@ void expr_contract_array(ExprConst *expr_const, ConstKind contract_type); bool expr_const_will_overflow(const ExprConst *expr, TypeKind kind); const char *expr_const_to_error_string(const ExprConst *expr); bool expr_const_float_fits_type(const ExprConst *expr_const, TypeKind kind); +void expr_const_to_scratch_buffer(const ExprConst *expr); // --- Lexer functions diff --git a/src/compiler/enums.h b/src/compiler/enums.h index d15ad23cd..1420246fd 100644 --- a/src/compiler/enums.h +++ b/src/compiler/enums.h @@ -500,6 +500,7 @@ typedef enum BUILTIN_SCATTER, BUILTIN_SELECT, BUILTIN_SET_ROUNDING_MODE, + BUILTIN_SPRINTF, BUILTIN_STR_HASH, BUILTIN_STR_UPPER, BUILTIN_STR_LOWER, @@ -1133,8 +1134,6 @@ typedef enum // We want to parse #foo separately. TOKEN_HASH_IDENT, // #foobar - TOKEN_HASH_CONST_IDENT, // #FOOBAR - TOKEN_HASH_TYPE_IDENT, // #Foobar TOKEN_AT_IDENT, // @macro TOKEN_AT_CONST_IDENT, // @MACRO @@ -1259,11 +1258,11 @@ typedef enum TOKEN_CT_VAEXPR, // $vaexpr, TOKEN_CT_VASPLAT, // $vasplat, TOKEN_LAST_KEYWORD = TOKEN_CT_VASPLAT, - TOKEN_DOCS_START, // <* - TOKEN_DOCS_END, // *> + TOKEN_DOCS_START, // <* + TOKEN_DOCS_END, // *> TOKEN_DOCS_EOL, - TOKEN_EOF, // \n - SHOULD ALWAYS BE THE LAST TOKEN. + TOKEN_EOF, // \n - SHOULD ALWAYS BE THE LAST TOKEN. TOKEN_LAST = TOKEN_EOF, } TokenType; diff --git a/src/compiler/lexer.c b/src/compiler/lexer.c index f90f1f186..7305c0141 100644 --- a/src/compiler/lexer.c +++ b/src/compiler/lexer.c @@ -351,6 +351,7 @@ EXIT:; default: break; } + assert(type != TOKEN_INVALID_TOKEN); return new_token(lexer, type, interned_string); } @@ -1219,6 +1220,13 @@ EXIT:; return true; } +static bool next_is_ident(Lexer *lexer) +{ + size_t i = 0; + while (lexer->current[i] == '_') i++; + return char_is_lower(lexer->current[i]); +} + static bool lexer_scan_token_inner(Lexer *lexer) { // Now skip the whitespace. @@ -1249,7 +1257,8 @@ static bool lexer_scan_token_inner(Lexer *lexer) case '"': return scan_string(lexer); case '#': - return scan_ident(lexer, TOKEN_HASH_IDENT, TOKEN_HASH_CONST_IDENT, TOKEN_HASH_TYPE_IDENT, '#'); + if (!next_is_ident(lexer)) return new_token(lexer, TOKEN_HASH, "#"); + return scan_ident(lexer, TOKEN_HASH_IDENT, TOKEN_INVALID_TOKEN, TOKEN_INVALID_TOKEN, '#'); case '$': if (match(lexer, '$')) { diff --git a/src/compiler/llvm_codegen_builtins.c b/src/compiler/llvm_codegen_builtins.c index 8953421c6..99dfeac61 100644 --- a/src/compiler/llvm_codegen_builtins.c +++ b/src/compiler/llvm_codegen_builtins.c @@ -1052,6 +1052,7 @@ void llvm_emit_builtin_call(GenContext *c, BEValue *result_value, Expr *expr) case BUILTIN_WIDESTRING_16: case BUILTIN_WIDESTRING_32: case BUILTIN_RND: + case BUILTIN_SPRINTF: UNREACHABLE case BUILTIN_NONE: UNREACHABLE diff --git a/src/compiler/number.c b/src/compiler/number.c index 464097386..4b9950728 100644 --- a/src/compiler/number.c +++ b/src/compiler/number.c @@ -292,7 +292,68 @@ bool expr_const_will_overflow(const ExprConst *expr, TypeKind kind) } - +void expr_const_to_scratch_buffer(const ExprConst *expr) +{ + switch (expr->const_kind) + { + case CONST_POINTER: + if (!expr->ptr) + { + scratch_buffer_append("null"); + } + else + { + scratch_buffer_printf("%p", (void *)(intptr_t)expr->ptr); + } + return; + case CONST_BOOL: + scratch_buffer_append(expr->b ? "true" : "false"); + return; + case CONST_INTEGER: + scratch_buffer_append(int_to_str(expr->ixx, 10, false)); + return; + case CONST_FLOAT: + scratch_buffer_printf("%g", expr->fxx.f); + return; + case CONST_STRING: + scratch_buffer_append_len(expr->bytes.ptr, expr->bytes.len); + return; + case CONST_BYTES: + scratch_buffer_append(""); + return; + case CONST_REF: + scratch_buffer_append(expr->global_ref->name); + return; + case CONST_FAULT: + scratch_buffer_append(expr->fault->name); + return; + case CONST_ENUM: + scratch_buffer_append(expr->enum_val->name); + return; + case CONST_TYPEID: + scratch_buffer_append(expr->typeid->name); + return; + case CONST_MEMBER: + scratch_buffer_append(expr->member.decl->name); + return; + case CONST_SLICE: + case CONST_INITIALIZER: + scratch_buffer_append("constant list"); + return; + case CONST_UNTYPED_LIST: + { + scratch_buffer_append("{"); + FOREACH_IDX(i, Expr *, e, expr->untyped_list) + { + if (i != 0) scratch_buffer_append(", "); + expr_const_to_scratch_buffer(&e->const_expr); + } + scratch_buffer_append("}"); + return; + } + } + UNREACHABLE +} const char *expr_const_to_error_string(const ExprConst *expr) { switch (expr->const_kind) diff --git a/src/compiler/parse_expr.c b/src/compiler/parse_expr.c index 651a64763..d93eacb8d 100644 --- a/src/compiler/parse_expr.c +++ b/src/compiler/parse_expr.c @@ -2071,6 +2071,7 @@ ParseRule rules[TOKEN_EOF + 1] = { [TOKEN_LPAREN] = { parse_grouping_expr, parse_call_expr, PREC_CALL }, [TOKEN_BANGBANG] = { parse_unary_expr, parse_force_unwrap_expr, PREC_CALL }, [TOKEN_LBRACKET] = { NULL, parse_subscript_expr, PREC_CALL }, + [TOKEN_LBRACE] = { parse_initializer_list, parse_generic_expr, PREC_PRIMARY }, [TOKEN_MINUS] = { parse_unary_expr, parse_binary, PREC_ADDITIVE }, [TOKEN_PLUS] = { parse_unary_expr, parse_binary, PREC_ADDITIVE }, [TOKEN_DIV] = { NULL, parse_binary, PREC_MULTIPLICATIVE }, @@ -2124,29 +2125,27 @@ ParseRule rules[TOKEN_EOF + 1] = { [TOKEN_CT_TYPE_IDENT] = { parse_type_identifier, NULL, PREC_NONE }, [TOKEN_HASH_IDENT] = { parse_hash_ident, NULL, PREC_NONE }, [TOKEN_AT_IDENT] = { parse_identifier, NULL, PREC_NONE }, - //[TOKEN_HASH_TYPE_IDENT] = { parse_type_identifier, NULL, PREC_NONE } [TOKEN_ELLIPSIS] = { parse_splat, NULL, PREC_NONE }, [TOKEN_FN] = { parse_lambda, NULL, PREC_NONE }, - [TOKEN_CT_SIZEOF] = { parse_ct_sizeof, NULL, PREC_NONE }, [TOKEN_CT_ALIGNOF] = { parse_ct_call, NULL, PREC_NONE }, [TOKEN_CT_ASSIGNABLE] = { parse_ct_castable, NULL, PREC_NONE }, [TOKEN_CT_DEFINED] = { parse_ct_defined, NULL, PREC_NONE }, - [TOKEN_CT_IS_CONST] = {parse_ct_is_const, NULL, PREC_NONE }, [TOKEN_CT_EMBED] = { parse_ct_embed, NULL, PREC_NONE }, + [TOKEN_CT_EVALTYPE] = { parse_type_expr, NULL, PREC_NONE }, [TOKEN_CT_EVAL] = { parse_ct_eval, NULL, PREC_NONE }, - [TOKEN_CT_FEATURE] = { parse_ct_call, NULL, PREC_NONE }, [TOKEN_CT_EXTNAMEOF] = { parse_ct_call, NULL, PREC_NONE }, - [TOKEN_CT_OFFSETOF] = { parse_ct_call, NULL, PREC_NONE }, + [TOKEN_CT_FEATURE] = { parse_ct_call, NULL, PREC_NONE }, + [TOKEN_CT_IS_CONST] = {parse_ct_is_const, NULL, PREC_NONE }, [TOKEN_CT_NAMEOF] = { parse_ct_call, NULL, PREC_NONE }, + [TOKEN_CT_OFFSETOF] = { parse_ct_call, NULL, PREC_NONE }, [TOKEN_CT_QNAMEOF] = { parse_ct_call, NULL, PREC_NONE }, + [TOKEN_CT_SIZEOF] = { parse_ct_sizeof, NULL, PREC_NONE }, + [TOKEN_CT_STRINGIFY] = { parse_ct_stringify, NULL, PREC_NONE }, [TOKEN_CT_TYPEFROM] = { parse_type_expr, NULL, PREC_NONE }, [TOKEN_CT_TYPEOF] = { parse_type_expr, NULL, PREC_NONE }, - [TOKEN_CT_STRINGIFY] = { parse_ct_stringify, NULL, PREC_NONE }, - [TOKEN_CT_EVALTYPE] = { parse_type_expr, NULL, PREC_NONE }, - [TOKEN_LBRACE] = { parse_initializer_list, parse_generic_expr, PREC_PRIMARY }, - [TOKEN_CT_VACOUNT] = { parse_ct_arg, NULL, PREC_NONE }, [TOKEN_CT_VAARG] = { parse_ct_arg, NULL, PREC_NONE }, - [TOKEN_CT_VATYPE] = { parse_type_expr, NULL, PREC_NONE }, - [TOKEN_CT_VAEXPR] = { parse_ct_arg, NULL, PREC_NONE }, [TOKEN_CT_VACONST] = { parse_ct_arg, NULL, PREC_NONE }, + [TOKEN_CT_VACOUNT] = { parse_ct_arg, NULL, PREC_NONE }, + [TOKEN_CT_VAEXPR] = { parse_ct_arg, NULL, PREC_NONE }, + [TOKEN_CT_VATYPE] = { parse_type_expr, NULL, PREC_NONE }, }; diff --git a/src/compiler/parse_global.c b/src/compiler/parse_global.c index 95347dcac..97277e95c 100644 --- a/src/compiler/parse_global.c +++ b/src/compiler/parse_global.c @@ -1574,10 +1574,6 @@ bool parse_parameters(ParseContext *c, Decl ***params_ref, Variadic *variadic, i ref = true; param_kind = VARDECL_PARAM; break; - case TOKEN_HASH_TYPE_IDENT: - // #Foo (not allowed) - PRINT_ERROR_HERE("An unevaluated expression can never be a type, did you mean to use $Type?"); - return false; case TOKEN_HASH_IDENT: // expression #foo name = symstr(c); @@ -2815,8 +2811,6 @@ static inline bool parse_contract_param(ParseContext *c, AstId *docs, AstId **do case TOKEN_CT_IDENT: case TOKEN_TYPE_IDENT: case TOKEN_CT_CONST_IDENT: - case TOKEN_HASH_CONST_IDENT: - case TOKEN_HASH_TYPE_IDENT: case TOKEN_CT_TYPE_IDENT: case TOKEN_CONST_IDENT: case TOKEN_HASH_IDENT: diff --git a/src/compiler/parse_stmt.c b/src/compiler/parse_stmt.c index a30e66883..be377dce8 100644 --- a/src/compiler/parse_stmt.c +++ b/src/compiler/parse_stmt.c @@ -1259,8 +1259,6 @@ Ast *parse_stmt(ParseContext *c) case TOKEN_LBRACE: return parse_compound_stmt(c); case TYPELIKE_TOKENS: - case TOKEN_HASH_TYPE_IDENT: - case TOKEN_HASH_CONST_IDENT: case TOKEN_HASH_IDENT: case TOKEN_IDENT: case TOKEN_CONST_IDENT: diff --git a/src/compiler/parser_internal.h b/src/compiler/parser_internal.h index 1c50ad4ee..b9febafac 100644 --- a/src/compiler/parser_internal.h +++ b/src/compiler/parser_internal.h @@ -179,7 +179,6 @@ static inline bool parse_next_may_be_type_or_ident(ParseContext *c) { case TOKEN_CONST_IDENT: case TOKEN_IDENT: - case TOKEN_HASH_CONST_IDENT: case TOKEN_HASH_IDENT: case TOKEN_CT_IDENT: case TOKEN_CT_CONST_IDENT: diff --git a/src/compiler/sema_builtins.c b/src/compiler/sema_builtins.c index f09fb7686..01a653ca8 100644 --- a/src/compiler/sema_builtins.c +++ b/src/compiler/sema_builtins.c @@ -38,6 +38,7 @@ static inline bool sema_expr_analyse_swizzle(SemaContext *context, Expr *expr, b static inline int builtin_expected_args(BuiltinFunction func); static inline bool is_valid_atomicity(SemaContext *context, Expr *expr); static bool sema_check_alignment_expression(SemaContext *context, Expr *align); +static bool sema_expr_analyse_sprintf(SemaContext *context, Expr *expr); static bool sema_expr_is_valid_mask_for_value(SemaContext *context, Expr *expr, Expr *value) { @@ -305,6 +306,56 @@ bool sema_expr_analyse_rnd(SemaContext *context UNUSED, Expr *expr) return true; } +static bool sema_expr_analyse_sprintf(SemaContext *context, Expr *expr) +{ + Expr **args = expr->call_expr.arguments; + FOREACH(Expr *, e, args) + { + if (!sema_analyse_expr(context, e)) return false; + if (!sema_cast_const(e)) + { + RETURN_SEMA_ERROR(e, "Expected a constant expression."); + } + } + Expr *format = args[0]; + if (!expr_is_const_string(format)) + { + RETURN_SEMA_ERROR(format, "Expected a constant format string."); + } + const char *inner_str = format->const_expr.bytes.ptr; + ArraySize len = format->const_expr.bytes.len; + scratch_buffer_clear(); + ArrayIndex current_index = 1; + ArraySize param_count = vec_size(args); + for (ArraySize i = 0; i < len; i++) + { + char c = inner_str[i]; + if (c == '%') + { + i++; + switch (inner_str[i]) + { + case 's': + if (current_index == param_count) RETURN_SEMA_ERROR(format, "Too many arguments in format string."); + expr_const_to_scratch_buffer(&(args[current_index++]->const_expr)); + continue; + case '%': + scratch_buffer_append_char('%'); + continue; + default: + RETURN_SEMA_ERROR(format, "Only '%%s' is supported for compile time sprintf."); + } + } + scratch_buffer_append_char(c); + } + if (current_index != param_count) + { + RETURN_SEMA_ERROR(format, "Too many arguments to sprintf."); + } + expr_rewrite_const_string(expr, scratch_buffer_copy()); + return true; +} + bool sema_expr_analyse_str_hash(SemaContext *context, Expr *expr) { Expr *inner = expr->call_expr.arguments[0]; @@ -532,6 +583,8 @@ bool sema_expr_analyse_builtin_call(SemaContext *context, Expr *expr) switch (func) { + case BUILTIN_SPRINTF: + return sema_expr_analyse_sprintf(context, expr); case BUILTIN_RND: return sema_expr_analyse_rnd(context, expr); case BUILTIN_STR_HASH: @@ -593,6 +646,7 @@ bool sema_expr_analyse_builtin_call(SemaContext *context, Expr *expr) case BUILTIN_STR_FIND: case BUILTIN_WIDESTRING_16: case BUILTIN_WIDESTRING_32: + case BUILTIN_SPRINTF: UNREACHABLE case BUILTIN_VECCOMPGE: case BUILTIN_VECCOMPEQ: @@ -1199,10 +1253,11 @@ static inline int builtin_expected_args(BuiltinFunction func) { switch (func) { + case BUILTIN_SPRINTF: case BUILTIN_SYSCALL: case BUILTIN_WIDESTRING_16: case BUILTIN_WIDESTRING_32: - return -1; + return -1; case BUILTIN_SWIZZLE: return -2; case BUILTIN_SWIZZLE2: diff --git a/src/compiler/sema_decls.c b/src/compiler/sema_decls.c index aa39444e5..4a178f3fb 100755 --- a/src/compiler/sema_decls.c +++ b/src/compiler/sema_decls.c @@ -1205,7 +1205,7 @@ static inline bool sema_analyse_signature(SemaContext *context, Signature *sig, { RETURN_SEMA_ERROR(decl, "The format '@format()' index was out of range."); } - if (sig->variadic != VARIADIC_ANY) + if (sig->variadic != VARIADIC_ANY && !is_macro) { RETURN_SEMA_ERROR(decl, "'@format()' is only valid for a function or macro with 'args...' style vaargs."); } diff --git a/src/compiler/sema_stmts.c b/src/compiler/sema_stmts.c index 5a0f32549..3b14f0cd3 100644 --- a/src/compiler/sema_stmts.c +++ b/src/compiler/sema_stmts.c @@ -2919,42 +2919,9 @@ bool sema_analyse_ct_echo_stmt(SemaContext *context, Ast *statement) return false; } printf("] "); - switch (message->const_expr.const_kind) - { - case CONST_FLOAT: - printf("%f\n", (double)message->const_expr.fxx.f); - break; - case CONST_INTEGER: - puts(int_to_str(message->const_expr.ixx, 10, false)); - break; - case CONST_BOOL: - puts(message->const_expr.b ? "true" : "false"); - break; - case CONST_REF: - puts(message->const_expr.global_ref->name); - break; - case CONST_FAULT: - puts(message->const_expr.fault->name); - break; - case CONST_ENUM: - puts(message->const_expr.enum_val->name); - break; - case CONST_STRING: - printf("%.*s\n", EXPAND_EXPR_STRING(message)); - break; - case CONST_POINTER: - printf("%p\n", (void*)(intptr_t)message->const_expr.ptr); - break; - case CONST_TYPEID: - puts(type_to_error_string(message->const_expr.typeid)); - break; - case CONST_BYTES: - case CONST_SLICE: - case CONST_INITIALIZER: - case CONST_UNTYPED_LIST: - case CONST_MEMBER: - RETURN_SEMA_ERROR(message, "Unsupported type for '$echo'"); - } + scratch_buffer_clear(); + expr_const_to_scratch_buffer(&message->const_expr); + puts(scratch_buffer_to_string()); statement->ast_kind = AST_NOP_STMT; return true; } diff --git a/src/compiler/symtab.c b/src/compiler/symtab.c index f2da0c806..f050f4422 100644 --- a/src/compiler/symtab.c +++ b/src/compiler/symtab.c @@ -280,6 +280,7 @@ void symtab_init(uint32_t capacity) builtin_list[BUILTIN_STR_FIND] = KW_DEF("str_find"); builtin_list[BUILTIN_SWIZZLE] = KW_DEF("swizzle"); builtin_list[BUILTIN_SWIZZLE2] = KW_DEF("swizzle2"); + builtin_list[BUILTIN_SPRINTF] = KW_DEF("sprintf"); builtin_list[BUILTIN_SQRT] = KW_DEF("sqrt"); builtin_list[BUILTIN_SYSCALL] = KW_DEF("syscall"); builtin_list[BUILTIN_SYSCLOCK] = KW_DEF("sysclock"); diff --git a/src/compiler/tokens.c b/src/compiler/tokens.c index 5a072e849..592b61c4a 100644 --- a/src/compiler/tokens.c +++ b/src/compiler/tokens.c @@ -152,10 +152,6 @@ const char *token_type_to_string(TokenType type) return "CT_TYPE_IDENT"; case TOKEN_HASH_IDENT: return "HASH_IDENT"; - case TOKEN_HASH_CONST_IDENT: - return "HASH_CONST_IDENT"; - case TOKEN_HASH_TYPE_IDENT: - return "HASH_TYPE_IDENT"; case TOKEN_CONST_IDENT: return "CONST_IDENT"; case TOKEN_TYPE_IDENT: diff --git a/test/test_suite/compile_time/ct_sprintf.c3t b/test/test_suite/compile_time/ct_sprintf.c3t new file mode 100644 index 000000000..eeb49cc6e --- /dev/null +++ b/test/test_suite/compile_time/ct_sprintf.c3t @@ -0,0 +1,14 @@ +// #target: macos-x64 +module test; +fn void main() +{ + var $foo = "hello"; + var $c = { 1, 3 }; + String x = $$sprintf("%s %s", $foo, $c); + String y = @sprintf("%s %s", $c, $foo); +} + +/* #expect: test.ll + +@.str = private unnamed_addr constant [13 x i8] c"hello {1, 3}\00", align 1 +@.str.1 = private unnamed_addr constant [13 x i8] c"{1, 3} hello\00", align 1 \ No newline at end of file