diff --git a/src/compiler/ast.c b/src/compiler/ast.c index 1bb1b1b0e..d918265b6 100644 --- a/src/compiler/ast.c +++ b/src/compiler/ast.c @@ -615,6 +615,9 @@ void fprint_expr_recursive(Context *context, FILE *file, Expr *expr, int indent) if (!expr) return; switch (expr->expr_kind) { + case EXPR_BYTES: + DUMP("(bytes"); + break; case EXPR_DECL: DUMP("(decl"); DUMPEXPC(expr); @@ -1111,9 +1114,13 @@ void fprint_decl_recursive(Context *context, FILE *file, Decl *decl, int indent) { DUMPI("(func)"); } - if (decl->attr.domains & ATTR_VAR) + if (decl->attr.domains & ATTR_GLOBAL) { - DUMPI("(var)"); + DUMPI("(global)"); + } + if (decl->attr.domains & ATTR_LOCAL) + { + DUMPI("(local)"); } if (decl->attr.domains & ATTR_ENUM) { diff --git a/src/compiler/compiler_internal.h b/src/compiler/compiler_internal.h index 6b2803088..c4923ed8b 100644 --- a/src/compiler/compiler_internal.h +++ b/src/compiler/compiler_internal.h @@ -98,6 +98,11 @@ typedef struct Real i; Real r; } complex; + struct + { + const char *ptr; + uint64_t len; + } bytes; }; // Valid type kinds: // bool, ints, floats, string @@ -924,7 +929,6 @@ typedef struct } ExprLen; - struct Expr_ { ExprKind expr_kind : 8; @@ -1369,6 +1373,11 @@ typedef union const char *string; Real value; struct + { + bool is_base64 : 1; + uint64_t len : 63; + }; + struct { union { @@ -1905,7 +1914,7 @@ bool sema_analyse_expr_of_required_type(Context *context, Type *to, Expr *expr, ArrayIndex sema_get_initializer_const_array_size(Context *context, Expr *initializer, bool *may_be_array, bool *is_const_size); bool sema_analyse_expr(Context *context, Type *to, Expr *expr); bool sema_analyse_decl(Context *context, Decl *decl); -bool sema_analyse_local_decl(Context *context, Decl *decl); +bool sema_analyse_var_decl(Context *context, Decl *decl); bool sema_analyse_ct_assert_stmt(Context *context, Ast *statement); bool sema_analyse_statement(Context *context, Ast *statement); bool sema_expr_analyse_assign_right_side(Context *context, Expr *expr, Type *left_type, Expr *right, ExprFailableStatus lhs_is_failable); diff --git a/src/compiler/copying.c b/src/compiler/copying.c index c03ca0a0e..4580ca2dc 100644 --- a/src/compiler/copying.c +++ b/src/compiler/copying.c @@ -76,6 +76,7 @@ Expr *copy_expr(Expr *source_expr) case EXPR_FLATPATH: case EXPR_UNDEF: case EXPR_NOP: + case EXPR_BYTES: return expr; case EXPR_TRY_DECL: MACRO_COPY_DECL(expr->try_decl_expr.decl); diff --git a/src/compiler/enums.h b/src/compiler/enums.h index 542ec00cd..856f6126f 100644 --- a/src/compiler/enums.h +++ b/src/compiler/enums.h @@ -180,6 +180,7 @@ typedef enum EXPR_POISONED, EXPR_ACCESS, EXPR_BINARY, + EXPR_BYTES, EXPR_MACRO_BODY_EXPANSION, EXPR_CALL, EXPR_CAST, @@ -406,6 +407,7 @@ typedef enum TOKEN_INTEGER, // 123 0x23 0b10010 0o327 TOKEN_CHAR_LITERAL, // 'a' 'FO' 'BARS' '\u1232' TOKEN_REAL, // 0x23.2p-2a 43.23e23 + TOKEN_BYTES, // Base64 or Hex TOKEN_COMMENT, // Comment TOKEN_DOC_COMMENT, // Doc Comment @@ -603,17 +605,18 @@ typedef enum typedef enum { ATTR_FUNC = 1 << 0, - ATTR_VAR = 1 << 1, - ATTR_ENUM = 1 << 2, - ATTR_STRUCT = 1 << 3, - ATTR_UNION = 1 << 4, - ATTR_CONST = 1 << 5, - ATTR_ERROR = 1 << 6, - ATTR_TYPEDEF = 1 << 7, - ATTR_MEMBER = 1 << 8, - ATTR_INTERFACE = 1 << 9, - ATTR_CALL = 1 << 10, - ATTR_BITSTRUCT = 1 << 11, + ATTR_GLOBAL = 1 << 1, + ATTR_LOCAL = 1 << 2, + ATTR_ENUM = 1 << 3, + ATTR_STRUCT = 1 << 4, + ATTR_UNION = 1 << 5, + ATTR_CONST = 1 << 6, + ATTR_ERROR = 1 << 7, + ATTR_TYPEDEF = 1 << 8, + ATTR_MEMBER = 1 << 9, + ATTR_INTERFACE = 1 << 10, + ATTR_CALL = 1 << 11, + ATTR_BITSTRUCT = 1 << 12, } AttributeDomain; typedef enum diff --git a/src/compiler/lexer.c b/src/compiler/lexer.c index 459fb6898..4119f2046 100644 --- a/src/compiler/lexer.c +++ b/src/compiler/lexer.c @@ -709,6 +709,100 @@ static inline bool scan_string(Lexer *lexer) return add_token(lexer, TOKEN_STRING, lexer->lexing_start); } +static inline bool scan_hex_array(Lexer *lexer) +{ + char start_char = next(lexer); // Step past ' or " + const char *hexdata = lexer->current; + char c; + uint64_t len = 0; + while (1) + { + c = next(lexer); + if (c == start_char) break; + if (c == 0) + { + lexer->lexing_start = lexer->current - 1; + return add_error_token(lexer, "The hex string seems to be missing a terminating '%c'", start_char); + } + if (is_hex(c)) + { + len++; + continue; + } + if (!is_whitespace(c)) + { + lexer->lexing_start = hexdata - 1; + lexer->current = hexdata; + return add_error_token(lexer, + "'%c' isn't a valid hexadecimal digit, all digits should be a-z, A-Z and 0-9.", + c); + } + } + if (len % 2) + { + return add_error_token(lexer, "The hexadecimal string is not an even length, did you miss a digit somewhere?"); + } + if (!add_token(lexer, TOKEN_BYTES, lexer->lexing_start)) return false; + lexer->latest_token_data->is_base64 = false; + lexer->latest_token_data->len = len / 2; + return true; +} + +static inline bool scan_base64(Lexer *lexer) +{ + next(lexer); // Step past 6 + next(lexer); // Step past 4 + char start_char = next(lexer); // Step past ' or " + const char *b64data = lexer->current; + char c; + unsigned end_len = 0; + uint64_t len = 0; + while (1) + { + c = next(lexer); + if (c == start_char) break; + if (c == 0) + { + lexer->lexing_start = lexer->current - 1; + return add_error_token(lexer, "The base64 string seems to be missing a terminating '%c'", start_char); + } + if (is_base64(c)) + { + if (end_len) + { + lexer->lexing_start = lexer->current - 1; + return add_error_token(lexer, "'%c' can't be placed after an ending '='", c); + } + len++; + continue; + } + if (c == '=') + { + if (end_len > 3) + { + lexer->lexing_start = b64data - 1; + lexer->current = b64data; + return add_error_token(lexer, "There cannot be more than 3 '=' at the end of a base64 string.", c); + } + end_len++; + continue; + } + if (!is_whitespace(c)) + { + lexer->lexing_start = b64data - 1; + lexer->current = b64data; + return add_error_token(lexer, "'%c' is not a valid base64 character.", c); + } + } + uint64_t decoded_len = (3 * len - end_len) / 4; + if (!add_token(lexer, TOKEN_BYTES, lexer->lexing_start)) return false; + lexer->latest_token_data->is_base64 = true; + lexer->latest_token_data->len = decoded_len; + return true; +} + + + #pragma mark --- Lexer doc lexing /** @@ -1116,7 +1210,17 @@ static bool lexer_scan_token_inner(Lexer *lexer, LexMode mode) if (match(lexer, '-')) return add_token(lexer, TOKEN_MINUSMINUS, "--"); if (match(lexer, '=')) return add_token(lexer, TOKEN_MINUS_ASSIGN, "-="); return add_token(lexer, TOKEN_MINUS, "-"); + case 'b': + if (peek(lexer) == '6' && peek_next(lexer) == '4' && (lexer->current[2] == '\'' || lexer->current[2] == '"')) + { + return scan_base64(lexer); + } + FALLTHROUGH; default: + if (c == 'x' && (peek(lexer) == '"' || peek(lexer) == '\'')) + { + return scan_hex_array(lexer); + } if (is_alphanum_(c)) { backtrack(lexer); diff --git a/src/compiler/llvm_codegen.c b/src/compiler/llvm_codegen.c index 5495dab19..22fa16b50 100644 --- a/src/compiler/llvm_codegen.c +++ b/src/compiler/llvm_codegen.c @@ -293,7 +293,17 @@ static void gencontext_emit_global_variable_definition(GenContext *c, Decl *decl // Skip real constants. if (!decl->type) return; - decl->backend_ref = LLVMAddGlobal(c->module, llvm_get_type(c, decl->type), "tempglobal"); + if (decl->type != type_void) + { + decl->backend_ref = LLVMAddGlobal(c->module, llvm_get_type(c, decl->type), "tempglobal"); + } + if (decl->var.failable) + { + scratch_buffer_clear(); + scratch_buffer_append(decl->external_name); + scratch_buffer_append(".f"); + decl->var.failable_ref = LLVMAddGlobal(c->module, llvm_get_type(c, type_anyerr), scratch_buffer_to_string()); + } } @@ -479,6 +489,10 @@ LLVMValueRef llvm_emit_alloca_aligned(GenContext *c, Type *type, const char *nam void llvm_emit_and_set_decl_alloca(GenContext *c, Decl *decl) { + if (decl->type == type_void) + { + return; + } LLVMTypeRef type = llvm_get_type(c, decl->type); decl->backend_ref = llvm_emit_alloca(c, type, decl->alignment, decl->name ?: "anon"); } @@ -1137,6 +1151,7 @@ void llvm_store_bevalue_dest_aligned(GenContext *c, LLVMValueRef destination, BE void llvm_store_bevalue(GenContext *c, BEValue *destination, BEValue *value) { + if (value->type == type_void) return; assert(llvm_value_is_addr(destination)); llvm_store_bevalue_aligned(c, destination->value, value, destination->alignment); } diff --git a/src/compiler/llvm_codegen_expr.c b/src/compiler/llvm_codegen_expr.c index 0df9ecef1..e4e4731d6 100644 --- a/src/compiler/llvm_codegen_expr.c +++ b/src/compiler/llvm_codegen_expr.c @@ -2486,6 +2486,22 @@ static void llvm_emit_const_expr(GenContext *c, BEValue *be_value, Expr *expr) Type *type = type_reduced_from_expr(expr)->canonical; switch (expr->const_expr.kind) { + case TYPE_ARRAY: + assert(type->array.base == type_char); + { + LLVMValueRef global_name = LLVMAddGlobal(c->module, LLVMArrayType(llvm_get_type(c, type_char), expr->const_expr.bytes.len), ".bytes"); + LLVMSetLinkage(global_name, LLVMPrivateLinkage); + LLVMSetGlobalConstant(global_name, 1); + + LLVMSetInitializer(global_name, LLVMConstStringInContext(c->context, + expr->const_expr.bytes.ptr, + expr->const_expr.bytes.len, + 1)); + llvm_set_alignment(global_name, 1); + global_name = LLVMConstBitCast(global_name, LLVMPointerType(llvm_get_type(c, type_char), 0)); + llvm_value_set(be_value, global_name, type); + return; + } case ALL_INTS: if (type_is_unsigned(type)) { @@ -3623,6 +3639,7 @@ void llvm_emit_expr(GenContext *c, BEValue *value, Expr *expr) llvm_emit_try_assign_expr(c, value, expr); return; case EXPR_NOP: + case EXPR_BYTES: return; case EXPR_ELSE: gencontext_emit_else_expr(c, value, expr); diff --git a/src/compiler/number.c b/src/compiler/number.c index 647fe6d55..3831170a3 100644 --- a/src/compiler/number.c +++ b/src/compiler/number.c @@ -67,6 +67,9 @@ void expr_const_fprint(FILE *__restrict file, ExprConst *expr) case TYPE_STRLIT: fprintf(file, "%.*s", expr->string.len, expr->string.chars); break; + case TYPE_ARRAY: + fprintf(file, "[byte data]"); + break; default: UNREACHABLE } @@ -187,7 +190,20 @@ bool expr_const_compare(const ExprConst *left, const ExprConst *right, BinaryOp is_eq = true; break; } - is_eq = strncmp(left->string.chars, right->string.chars, left->string.len); + is_eq = !strncmp(left->string.chars, right->string.chars, left->string.len); + break; + case TYPE_ARRAY: + if (left->bytes.len != right->bytes.len) + { + is_eq = false; + break; + } + if (right->bytes.ptr == left->bytes.ptr) + { + is_eq = true; + break; + } + is_eq = !memcmp(left->bytes.ptr, right->bytes.ptr, left->bytes.len); break; default: UNREACHABLE @@ -265,6 +281,8 @@ const char *expr_const_to_error_string(const ExprConst *expr) case TYPE_STRLIT: asprintf(&buff, "\"%*.s\"", expr->string.len, expr->string.chars); return buff; + case TYPE_ARRAY: + return ""; default: UNREACHABLE } diff --git a/src/compiler/parse_expr.c b/src/compiler/parse_expr.c index 1ed0d93dd..e6bda79ba 100644 --- a/src/compiler/parse_expr.c +++ b/src/compiler/parse_expr.c @@ -980,6 +980,111 @@ static Expr *parse_integer(Context *context, Expr *left) return expr_int; } +/** + * Parse hex, skipping over invalid characters. + * @param result_pointer ref to place to put the data + * @param data start pointer + * @param end end pointer + */ +static void parse_hex(char **result_pointer, const char *data, const char *end) +{ + char *data_current = *result_pointer; + assert(data_current); + while (data < end) + { + int val; + int val2; + while ((val = char_to_nibble(*(data++))) < 0) if (data == end) goto DONE; + while ((val2 = char_to_nibble(*(data++))) < 0); + + *(data_current++) = (val << 4) | val2; + } + DONE: + *result_pointer = data_current; +} + +/** + * Slow base64 -> sextet + */ +static char base64_to_sextet(char c) +{ + if (c >= 'A' && c <= 'Z') return c - 'A'; + if (c >= 'a' && c <= 'z') return c - 'a' + 26; + if (c >= '0' && c <= '9') return c - '0' + 52; + if (c == '+') return 62; + if (c == '/') return 63; + if (c == '=') return 0; + return -1; +} +/** + * Parse hex, skipping over invalid characters. + * @param result_pointer ref to place to put the data + * @param data start pointer + * @param end end pointer + */ +static void parse_base64(char **result_pointer, char *result_pointer_end, const char *data, const char *end) +{ + char *data_current = *result_pointer; + assert(data_current); + while (data < end) + { + int val; + int val2; + int val3; + int val4; + while ((val = base64_to_sextet(*(data++))) < 0) if (data == end) goto DONE; + while ((val2 = base64_to_sextet(*(data++))) < 0); + while ((val3 = base64_to_sextet(*(data++))) < 0); + while ((val4 = base64_to_sextet(*(data++))) < 0); + uint32_t triplet = (val << 3 * 6) + (val2 << 2 * 6) + (val3 << 6) + val4; + if (data_current < result_pointer_end) *(data_current++) = (triplet >> 16) & 0xFF; + if (data_current < result_pointer_end) *(data_current++) = (triplet >> 8) & 0xFF; + if (data_current < result_pointer_end) *(data_current++) = triplet & 0xFF; + } + DONE: + *result_pointer = data_current; +} + +static Expr *parse_bytes_expr(Context *context, Expr *left) +{ + assert(!left && "Had left hand side"); + TokenId tok = context->tok.id; + uint64_t len = 0; + while (TOKTYPE(tok) == TOKEN_BYTES) + { + len += TOKDATA(tok)->len; + tok.index++; + } + char *data = len > 0 ? malloc_arena(len) : NULL; + char *data_current = data; + + Expr *expr_bytes = EXPR_NEW_TOKEN(EXPR_CONST, context->tok); + while (context->tok.type == TOKEN_BYTES) + { + TokenData *token_data = tokendata_from_token(context->tok); + SourceLocation *loc = TOKLOC(context->tok); + if (token_data->is_base64) + { + const char *base64data = &loc->file->contents[loc->start] + 4; + const char *end = base64data + loc->length - 1; + parse_base64(&data_current, data_current + token_data->len, base64data, end); + } + else + { + const char *hexdata = &loc->file->contents[loc->start] + 2; + const char *end = hexdata + loc->length - 1; + parse_hex(&data_current, hexdata, end); + } + advance(context); + } + expr_bytes->const_expr.bytes.ptr = data; + expr_bytes->const_expr.bytes.len = len; + expr_bytes->const_expr.kind = TYPE_ARRAY; + expr_set_type(expr_bytes, type_get_array(type_char, len)); + assert(data + len == data_current); + return expr_bytes; +} + static Expr *parse_char_lit(Context *context, Expr *left) { assert(!left && "Had left hand side"); @@ -1274,6 +1379,7 @@ ParseRule rules[TOKEN_EOF + 1] = { [TOKEN_STAR] = { parse_unary_expr, parse_binary, PREC_MULTIPLICATIVE }, [TOKEN_DOT] = { NULL, parse_access_expr, PREC_CALL }, [TOKEN_BANG] = { parse_unary_expr, parse_failable, PREC_UNARY }, + [TOKEN_BYTES] = { parse_bytes_expr, NULL, PREC_NONE }, [TOKEN_BIT_NOT] = { parse_unary_expr, NULL, PREC_UNARY }, [TOKEN_BIT_XOR] = { NULL, parse_binary, PREC_BIT }, [TOKEN_BIT_OR] = { NULL, parse_binary, PREC_BIT }, diff --git a/src/compiler/parse_global.c b/src/compiler/parse_global.c index a1b6c1d74..f6a47d695 100644 --- a/src/compiler/parse_global.c +++ b/src/compiler/parse_global.c @@ -981,14 +981,19 @@ static inline Decl *parse_global_declaration(Context *context, Visibility visibi { TypeInfo *type = TRY_TYPE_OR(parse_type(context), poisoned_decl); + bool failable = try_consume(context, TOKEN_BANG); + Decl *decl = decl_new_var(context->tok.id, type, VARDECL_GLOBAL, visibility); + decl->var.failable = failable; + if (TOKEN_IS(TOKEN_CONST_IDENT)) { SEMA_TOKEN_ERROR(context->tok, "This looks like a constant variable, did you forget 'const'?"); return poisoned_decl; } + if (!try_consume(context, TOKEN_IDENT)) { if (token_is_some_ident(context->tok.type)) @@ -996,7 +1001,8 @@ static inline Decl *parse_global_declaration(Context *context, Visibility visibi SEMA_TOKEN_ERROR(context->tok, "I expected a variable name here, but global variables need to start with lower case."); return poisoned_decl; } - CONSUME_OR(TOKEN_IDENT, poisoned_decl); + SEMA_TOKEN_ERROR(context->tok, "The name of a global variable was expected here"); + return poisoned_decl; } if (!parse_attributes(context, &decl->attributes)) return poisoned_decl; diff --git a/src/compiler/parse_stmt.c b/src/compiler/parse_stmt.c index f2fd93016..827143acb 100644 --- a/src/compiler/parse_stmt.c +++ b/src/compiler/parse_stmt.c @@ -1059,6 +1059,7 @@ Ast *parse_stmt(Context *context) case TOKEN_CT_NAMEOF: case TOKEN_TRY: case TOKEN_CATCH: + case TOKEN_BYTES: return parse_expr_stmt(context); case TOKEN_ASSERT: return parse_assert_stmt(context); diff --git a/src/compiler/sema_decls.c b/src/compiler/sema_decls.c index 19f3099ef..c641e77b3 100644 --- a/src/compiler/sema_decls.c +++ b/src/compiler/sema_decls.c @@ -227,7 +227,7 @@ static bool sema_analyse_struct_members(Context *context, Decl *decl, Decl **mem for (unsigned j = 0; j < count; j++) { Attr *attribute = attributes[j]; - if (!sema_analyse_attribute(context, attribute, ATTR_VAR)) return false; + if (!sema_analyse_attribute(context, attribute, ATTR_GLOBAL)) return false; if (TOKSTR(attribute->name) == kw_align) { member_alignment = attribute->alignment; @@ -821,6 +821,8 @@ static const char *attribute_domain_to_string(AttributeDomain domain) { switch (domain) { + case ATTR_LOCAL: + return "local variable"; case ATTR_BITSTRUCT: return "bitstruct"; case ATTR_INTERFACE: @@ -829,8 +831,8 @@ static const char *attribute_domain_to_string(AttributeDomain domain) return "member"; case ATTR_FUNC: return "function"; - case ATTR_VAR: - return "variable"; + case ATTR_GLOBAL: + return "global variable"; case ATTR_ENUM: return "enum"; case ATTR_STRUCT: @@ -857,12 +859,12 @@ AttributeType sema_analyse_attribute(Context *context, Attr *attr, AttributeDoma return ATTRIBUTE_NONE; } static AttributeDomain attribute_domain[NUMBER_OF_ATTRIBUTES] = { - [ATTRIBUTE_WEAK] = ATTR_FUNC | ATTR_CONST | ATTR_VAR, + [ATTRIBUTE_WEAK] = ATTR_FUNC | ATTR_CONST | ATTR_GLOBAL, [ATTRIBUTE_EXTNAME] = ~ATTR_CALL, - [ATTRIBUTE_SECTION] = ATTR_FUNC | ATTR_CONST | ATTR_VAR, + [ATTRIBUTE_SECTION] = ATTR_FUNC | ATTR_CONST | ATTR_GLOBAL, [ATTRIBUTE_PACKED] = ATTR_STRUCT | ATTR_UNION, [ATTRIBUTE_NORETURN] = ATTR_FUNC, - [ATTRIBUTE_ALIGN] = ATTR_FUNC | ATTR_CONST | ATTR_VAR | ATTR_STRUCT | ATTR_UNION | ATTR_MEMBER, + [ATTRIBUTE_ALIGN] = ATTR_FUNC | ATTR_CONST | ATTR_LOCAL | ATTR_GLOBAL | ATTR_STRUCT | ATTR_UNION | ATTR_MEMBER, [ATTRIBUTE_INLINE] = ATTR_FUNC | ATTR_CALL, [ATTRIBUTE_NOINLINE] = ATTR_FUNC | ATTR_CALL, [ATTRIBUTE_OPAQUE] = ATTR_STRUCT | ATTR_UNION, @@ -1233,20 +1235,22 @@ static inline bool sema_analyse_macro(Context *context, Decl *decl) } - -static inline bool sema_analyse_global(Context *context, Decl *decl) +bool sema_analyse_attributes_for_var(Context *context, Decl *decl) { - if (decl->var.type_info) + + AttributeDomain domain; + switch (decl->var.kind) { - if (!sema_resolve_type_info_maybe_inferred(context, decl->var.type_info, decl->var.init_expr != NULL)) return false; - decl->type = decl->var.type_info->type; + case VARDECL_CONST: + domain = ATTR_CONST; + break; + case VARDECL_GLOBAL: + domain = ATTR_GLOBAL; + break; + default: + domain = ATTR_LOCAL; + break; } - - // We expect a constant to actually be parsed correctly so that it has a value, so - // this should always be true. - assert(decl->type || decl->var.kind == VARDECL_CONST); - - AttributeDomain domain = decl->var.kind == VARDECL_CONST ? ATTR_CONST : ATTR_FUNC; VECEACH(decl->attributes, i) { Attr *attr = decl->attributes[i]; @@ -1270,7 +1274,8 @@ static inline bool sema_analyse_global(Context *context, Decl *decl) had = decl->alignment != 0; decl->alignment = attr->alignment; break; - case ATTRIBUTE_WEAK: SET_ATTR(attr_weak); + case ATTRIBUTE_WEAK: + SET_ATTR(attr_weak); default: UNREACHABLE } @@ -1281,25 +1286,116 @@ static inline bool sema_analyse_global(Context *context, Decl *decl) return decl_poison(decl); } } + return true; +} - // If we already have the type resolved then we can pretend to be done, - // this will help in case we otherwise would get circular references. - if (decl->type) +/** + * Analyse a regular global or local declaration, e.g. int x = 123 + */ +bool sema_analyse_var_decl(Context *context, Decl *decl) +{ + assert(decl->decl_kind == DECL_VAR && "Unexpected declaration type"); + + // We expect a constant to actually be parsed correctly so that it has a value, so + // this should always be true. + assert(decl->var.type_info || decl->var.kind == VARDECL_CONST); + + bool is_global = decl->var.kind == VARDECL_GLOBAL || decl->var.kind == VARDECL_CONST; + + if (!sema_analyse_attributes_for_var(context, decl)) return false; + + // TODO unify with global decl analysis + if (is_global) { - decl->resolve_status = RESOLVE_DONE; - if (!decl->alignment) decl->alignment = type_alloca_alignment(decl->type); + + } + else + { + // Add a local to the current context, will throw error on shadowing. + if (!sema_add_local(context, decl)) return decl_poison(decl); } - // Check the initializer. - if (decl->var.init_expr && decl->type) + // 1. Local constants: const int FOO = 123. + if (decl->var.kind == VARDECL_CONST) { + Expr *init_expr = decl->var.init_expr; + // 1a. We require an init expression. + if (!init_expr) + { + SEMA_ERROR(decl, "Constants need to have an initial value."); + return false; + } + // 1b. We require defined constants + if (init_expr->expr_kind == EXPR_UNDEF) + { + SEMA_ERROR(decl, "Constants cannot be undefined."); + return false; + } + if (!decl->var.type_info) + { + if (!sema_analyse_expr(context, NULL, init_expr)) return false; + decl->type = init_expr->type; + if (!decl->alignment) decl->alignment = type_alloca_alignment(decl->type); + + // Skip further evaluation. + goto EXIT_OK; + } + } + + if (!sema_resolve_type_info_maybe_inferred(context, decl->var.type_info, decl->var.init_expr != NULL)) return decl_poison(decl); + decl->type = decl->var.type_info->type; + + if (decl->var.is_static) + { + scratch_buffer_clear(); + scratch_buffer_append(context->active_function_for_analysis->name); + scratch_buffer_append_char('.'); + scratch_buffer_append(decl->name); + decl->external_name = scratch_buffer_interned(); + } + + if (decl->var.init_expr) + { + bool type_is_inferred = decl->type->type_kind == TYPE_INFERRED_ARRAY; + Expr *init = decl->var.init_expr; + + // Handle explicit undef + if (init->expr_kind == EXPR_UNDEF) + { + if (type_is_inferred) + { + SEMA_ERROR(decl->var.type_info, "Size of the array cannot be inferred with explicit undef."); + return false; + } + goto EXIT_OK; + } + + if (!type_is_inferred) + { + // Pre resolve to avoid problem with recursive definitions. + decl->resolve_status = RESOLVE_DONE; + if (!decl->alignment) decl->alignment = type_alloca_alignment(decl->type); + } + if (!sema_expr_analyse_assign_right_side(context, NULL, decl->type, init, decl->var.failable || decl->var.unwrap ? FAILABLE_YES : FAILABLE_NO)) return decl_poison(decl); + + if (type_is_inferred) + { + Type *right_side_type = init->type->canonical; + assert(right_side_type->type_kind == TYPE_ARRAY); + decl->type = type_get_array(decl->type->array.base, right_side_type->array.len); + } + else if (decl->type) + { + expr_set_type(decl->var.init_expr, decl->type); + } + Expr *init_expr = decl->var.init_expr; // 1. Check type. if (!sema_analyse_expr_of_required_type(context, decl->type, init_expr, false)) return false; // 2. Check const-ness - if (!init_expr->constant) + if ((is_global || decl->var.is_static) && !init_expr->constant) { // 3. Special case is when the init expression is the reference // to a constant global structure. @@ -1322,30 +1418,22 @@ static inline bool sema_analyse_global(Context *context, Decl *decl) return false; } } - - if (decl->type->type_kind == TYPE_INFERRED_ARRAY) + else { - assert(init_expr->type->canonical->type_kind == TYPE_ARRAY); - decl->type = type_get_array(decl->type->array.base, init_expr->type->canonical->array.len); + if (decl->var.unwrap && !init->failable) + { + SEMA_ERROR(decl->var.init_expr, "A failable expression was expected here."); + return decl_poison(decl); + } } } - - switch (decl->var.kind) - { - case VARDECL_CONST: - assert(decl->var.init_expr); - return true; - case VARDECL_GLOBAL: - return true; - default: - eprintf("Decl %s %d\n", decl->name, decl->var.kind); - UNREACHABLE - } + EXIT_OK: + if (!decl->alignment) decl->alignment = type_alloca_alignment(decl->type); + return true; } - static Context *copy_context(Module *module, Context *c) { Context *copy = context_create(c->file); @@ -1532,7 +1620,7 @@ bool sema_analyse_decl(Context *context, Decl *decl) if (!sema_analyse_macro(context, decl)) return decl_poison(decl); break; case DECL_VAR: - if (!sema_analyse_global(context, decl)) return decl_poison(decl); + if (!sema_analyse_var_decl(context, decl)) return decl_poison(decl); decl_set_external_name(decl); break; case DECL_DISTINCT: diff --git a/src/compiler/sema_expr.c b/src/compiler/sema_expr.c index 5e7238be4..b47f9a327 100644 --- a/src/compiler/sema_expr.c +++ b/src/compiler/sema_expr.c @@ -4581,17 +4581,22 @@ static bool sema_expr_analyse_deref(Context *context, Expr *expr, Expr *inner) // 1. Check that we have a pointer, or dereference is not allowed. if (canonical->type_kind != TYPE_POINTER) { - SEMA_ERROR(inner, "Cannot dereference a value of type '%s'", type_to_error_string(inner->type)); + SEMA_ERROR(inner, "Cannot dereference a value of type '%s', it must be a pointer.", type_to_error_string(inner->type)); + return false; + } + if (canonical->pointer == type_void) + { + SEMA_ERROR(inner, "A 'void*' cannot be dereferenced, you need to first cast it to a concrete type."); return false; } // 2. This could be a constant, in which case it is a null which is an error. if (inner->expr_kind == EXPR_CONST) { - SEMA_ERROR(inner, "Dereferencing null is not allowed."); + SEMA_ERROR(inner, "Dereferencing null is not allowed, did you do it by mistake?"); return false; } // 3. Now the type might not be a pointer because of a typedef, - // otherwise we need to use the the canonical representation. + // otherwise we need to use the canonical representation. Type *deref_type = inner->type->type_kind != TYPE_POINTER ? inner->type : canonical; // 4. And... set the type. @@ -4608,11 +4613,30 @@ static inline bool sema_take_addr_of_var(Expr *expr, Decl *decl, bool *is_consta switch (decl->var.kind) { case VARDECL_GLOBAL: + if (decl->type == type_void) + { + SEMA_ERROR(expr, "You cannot take the address of a global of type '%s'.", + decl->var.failable ? "void!" : "void"); + return false; + } *is_constant = true; return true; case VARDECL_LOCAL: + if (decl->type == type_void) + { + SEMA_ERROR(expr, "You cannot take the address of a variable with type '%s'.", + decl->var.failable ? "void!" : "void"); + return false; + } + return true; case VARDECL_PARAM: case VARDECL_PARAM_REF: + if (decl->type == type_void) + { + SEMA_ERROR(expr, "You cannot take the address of a parameter with type '%s'.", + decl->var.failable ? "void!" : "void"); + return false; + } return true; case VARDECL_CONST: *is_constant = true; @@ -4622,6 +4646,7 @@ static inline bool sema_take_addr_of_var(Expr *expr, Decl *decl, bool *is_consta SEMA_PREV(decl, "The constant was defined here."); return false; } + assert(decl->type != type_void); return true; case VARDECL_PARAM_EXPR: SEMA_ERROR(expr, "It is not possible to take the address of a captured expression, but you can use && to take a reference to the temporary value."); @@ -4639,6 +4664,7 @@ static inline bool sema_take_addr_of_var(Expr *expr, Decl *decl, bool *is_consta UNREACHABLE } UNREACHABLE + } static inline bool sema_take_addr_of_ident(Expr *inner, bool *is_constant) @@ -4836,6 +4862,9 @@ static bool sema_expr_analyse_not(Expr *expr, Expr *inner) case TYPE_STRLIT: expr->const_expr.b = !inner->const_expr.string.len; break; + case TYPE_ARRAY: + expr->const_expr.b = !inner->const_expr.bytes.len; + break; case TYPE_ERRTYPE: case TYPE_ENUM: TODO @@ -6050,7 +6079,7 @@ static inline bool sema_expr_analyse_ct_call(Context *context, Type *to, Expr *e static inline bool sema_expr_analyse_decl(Context *context, Type *to, Expr *expr) { - if (!sema_analyse_local_decl(context, expr->decl_expr)) return false; + if (!sema_analyse_var_decl(context, expr->decl_expr)) return false; expr_set_type(expr, expr->decl_expr->type); expr->pure = !expr->decl_expr->var.init_expr || expr->decl_expr->var.init_expr->pure; expr->constant = expr->decl_expr->var.kind == VARDECL_CONST; @@ -6114,6 +6143,7 @@ static inline bool sema_analyse_expr_dispatch(Context *context, Type *to, Expr * case EXPR_GUARD: return sema_expr_analyse_guard(context, to, expr); case EXPR_CONST: + case EXPR_BYTES: return sema_expr_analyse_const(to, expr); case EXPR_BINARY: if (!sema_expr_analyse_binary(context, to, expr)) return false; diff --git a/src/compiler/sema_stmts.c b/src/compiler/sema_stmts.c index 0af5daae1..9e060867f 100644 --- a/src/compiler/sema_stmts.c +++ b/src/compiler/sema_stmts.c @@ -242,7 +242,7 @@ static inline bool sema_analyse_try_unwrap(Context *context, Expr *expr) Decl *decl = decl_new_var(ident_token, var_type, VARDECL_LOCAL, VISIBLE_LOCAL); // 4e. Analyse it - if (!sema_analyse_local_decl(context, decl)) return false; + if (!sema_analyse_var_decl(context, decl)) return false; expr->try_unwrap_expr.decl = decl; } @@ -351,7 +351,7 @@ static inline bool sema_analyse_catch_unwrap(Context *context, Expr *expr) decl->var.init_expr = expr_new(EXPR_UNDEF, decl->span); // 4e. Analyse it - if (!sema_analyse_local_decl(context, decl)) return false; + if (!sema_analyse_var_decl(context, decl)) return false; expr->catch_unwrap_expr.decl = decl; expr->catch_unwrap_expr.lhs = NULL; diff --git a/src/compiler/tokens.c b/src/compiler/tokens.c index a24c76ebd..e3b1bea11 100644 --- a/src/compiler/tokens.c +++ b/src/compiler/tokens.c @@ -166,6 +166,8 @@ const char *token_type_to_string(TokenType type) return "FLOAT"; case TOKEN_CHAR_LITERAL: return "CHAR_LITERAL"; + case TOKEN_BYTES: + return "BYTES"; // Comments case TOKEN_COMMENT: diff --git a/src/compiler/types.c b/src/compiler/types.c index 990f8ba1f..5013ec98b 100644 --- a/src/compiler/types.c +++ b/src/compiler/types.c @@ -655,7 +655,6 @@ AlignSize type_abi_alignment(Type *type) { case TYPE_POISONED: case TYPE_TYPEINFO: - case TYPE_INFERRED_ARRAY: UNREACHABLE; case TYPE_BITSTRUCT: return type_abi_alignment(type->decl->bitstruct.base_type->type); @@ -698,6 +697,7 @@ AlignSize type_abi_alignment(Type *type) case TYPE_STRLIT: return t.iptr.canonical->builtin.abi_alignment; case TYPE_ARRAY: + case TYPE_INFERRED_ARRAY: return type_abi_alignment(type->array.base); case TYPE_SUBARRAY: return alignment_subarray; diff --git a/src/utils/lib.h b/src/utils/lib.h index 2e0f73495..68aa7ab64 100644 --- a/src/utils/lib.h +++ b/src/utils/lib.h @@ -217,6 +217,14 @@ static inline signed char is_valid_escape(char c) } } +static inline bool is_base64(char c) +{ + return (c >= 'A' && c <= 'Z') + || (c >= 'a' && c <= 'z') + || (c >= '0' && c <= '9') + || c == '+' || c == '/'; +} + static inline bool is_hex(char c) { switch (c) @@ -233,6 +241,20 @@ static inline bool is_hex(char c) } } +static inline bool is_whitespace(char c) +{ + switch (c) + { + case ' ': + case '\t': + case '\n': + case '\r': + return true; + default: + return false; + } +} + static inline bool is_alphanum_(char c) { switch (c) diff --git a/test/test_suite/constants/byte_literal_fail_base64.c3 b/test/test_suite/constants/byte_literal_fail_base64.c3 new file mode 100644 index 000000000..ce3837ea6 --- /dev/null +++ b/test/test_suite/constants/byte_literal_fail_base64.c3 @@ -0,0 +1 @@ +char[*] foo64 = b64"SGVsbG8g!V29ybGQ="; // #error: '!' is not a valid base64 character \ No newline at end of file diff --git a/test/test_suite/constants/byte_literal_fail_base64_2.c3 b/test/test_suite/constants/byte_literal_fail_base64_2.c3 new file mode 100644 index 000000000..443883c14 --- /dev/null +++ b/test/test_suite/constants/byte_literal_fail_base64_2.c3 @@ -0,0 +1 @@ +char[*] foo64 = b64"SGVsbG8gV29y=bGQ="; // #error: 'b' can't be placed after an ending '=' \ No newline at end of file diff --git a/test/test_suite/constants/byte_literal_fail_base64_4.c3 b/test/test_suite/constants/byte_literal_fail_base64_4.c3 new file mode 100644 index 000000000..c4771d466 --- /dev/null +++ b/test/test_suite/constants/byte_literal_fail_base64_4.c3 @@ -0,0 +1 @@ +char[*] foo64 = b64"SGVsbG8gV29ybGQ===="; \ No newline at end of file diff --git a/test/test_suite/constants/byte_literal_fail_hex.c3 b/test/test_suite/constants/byte_literal_fail_hex.c3 new file mode 100644 index 000000000..115e3d4ba --- /dev/null +++ b/test/test_suite/constants/byte_literal_fail_hex.c3 @@ -0,0 +1 @@ +char[*] foo64 = x"abc def ^"; // #error: '^' isn't a valid hexadecimal digit, all digits should be a-z, A-Z and 0-9. diff --git a/test/test_suite/constants/byte_literals.c3t b/test/test_suite/constants/byte_literals.c3t new file mode 100644 index 000000000..fa1647720 --- /dev/null +++ b/test/test_suite/constants/byte_literals.c3t @@ -0,0 +1,13 @@ +char[*] foob = x"a0"; +char[*] fooz = x"00aabbccddeeff"; +char[*] fooy = x"dead beef"; +char[*] foow = x"4549234d e d"; +char[*] foo64 = b64"SGVsbG8gV29ybGQ="; + +// #expect: byte_literals.ll + +@.bytes = private constant [1 x i8] c"\A0", align 1 +@.bytes.5 = private constant [7 x i8] c"\00\AA\BB\CC\DD\EE\FF", align 1 +@.bytes.6 = private constant [4 x i8] c"\DE\AD\BE\EF", align 1 +@.bytes.7 = private constant [5 x i8] c"EI#M\ED", align 1 +@.bytes.8 = private constant [11 x i8] c"Hello World", align 1