From ce68bda86c276bcb2d2989aac3029240aefe6489 Mon Sep 17 00:00:00 2001 From: Christoffer Lerno Date: Tue, 28 Jul 2020 17:28:20 +0200 Subject: [PATCH] Corrected ptr->bool conversion. Avoid checking function body if function is invalid. Switch defer test cases. Slices with test cases. --- resources/testfragments/super_simple.c3 | 23 +- src/compiler/ast.c | 23 +- src/compiler/compiler_internal.h | 26 ++ src/compiler/enums.h | 7 +- src/compiler/llvm_codegen.c | 2 - src/compiler/llvm_codegen_expr.c | 395 ++++++++++++++++-- src/compiler/llvm_codegen_internal.h | 6 +- src/compiler/llvm_codegen_stmt.c | 2 +- src/compiler/parse_expr.c | 84 ++-- src/compiler/sema_expr.c | 293 +++++++++---- src/compiler/sema_types.c | 6 + src/compiler/symtab.c | 3 +- .../subarrays/slice_negative_len.c3 | 65 +++ test/test_suite/subarrays/slice_offset.c3t | 22 + .../subarrays/slice_offset_neg_end.c3t | 22 + .../subarrays/slice_offset_neg_start.c3t | 22 + test/test_suite/subarrays/slice_start.c3t | 22 + test/test_suite/subarrays/slice_syntax.c3 | 24 ++ 18 files changed, 874 insertions(+), 173 deletions(-) create mode 100644 test/test_suite/subarrays/slice_negative_len.c3 create mode 100644 test/test_suite/subarrays/slice_offset.c3t create mode 100644 test/test_suite/subarrays/slice_offset_neg_end.c3t create mode 100644 test/test_suite/subarrays/slice_offset_neg_start.c3t create mode 100644 test/test_suite/subarrays/slice_start.c3t create mode 100644 test/test_suite/subarrays/slice_syntax.c3 diff --git a/resources/testfragments/super_simple.c3 b/resources/testfragments/super_simple.c3 index 2a0073f40..0f73da979 100644 --- a/resources/testfragments/super_simple.c3 +++ b/resources/testfragments/super_simple.c3 @@ -1218,11 +1218,30 @@ public func int! decode(char[] infile, byte[] out) func int main(int x) { - int[3] feok2 = { 1, 8, 100}; + int[6] feok2 = { 1, 8, 100, 293, 23982, 34}; int[] feok = &feok2; + int[] flok = feok2[3..6]; + int[] flak = flok[1..2]; + printf("HEllo\n"); printf("Feok: %d\n", feok[0]); printf("Feok: %d\n", feok[1]); - printf("Len: %d", feok.len); + printf("Len: %d\n", feok.len); + printf("Len: %d\n", feok2.len); + printf("Len: %d\n", flok.len); + printf("Flok: %d\n", flok[0]); + printf("Flak: %d\n", flak[0]); + printf("Len: %d\n", flak.len); + feok2[1..^2] = 0xAA; + printf("Flok[0]: %d\n", flok[0]); + printf("Flok[1]: %d\n", flok[1]); + printf("Flok[2]: %d\n", flok[2]); + printf("Feok[0]: %d\n", feok[0]); + printf("Feok[1]: %d\n", feok[1]); + printf("Feok[2]: %d\n", feok[2]); + for (int ef = 0; ef < 6; ef++) + { + printf("feok2[%d] = %d\n", ef, feok2[ef]); + } baz::runBaz(); show(); testMacros(); diff --git a/src/compiler/ast.c b/src/compiler/ast.c index 73fa7602e..accb42ff8 100644 --- a/src/compiler/ast.c +++ b/src/compiler/ast.c @@ -483,6 +483,17 @@ void fprint_expr_recursive(Context *context, FILE *file, Expr *expr, int indent) if (!expr) return; switch (expr->expr_kind) { + case EXPR_SLICE_ASSIGN: + DUMP("(sliceassign"); + DUMPEXPC(expr); + DUMPEXPR(expr->slice_assign_expr.left); + DUMPEXPR(expr->slice_assign_expr.right); + DUMPEND(); + case EXPR_LEN: + DUMP("(len"); + DUMPEXPC(expr); + DUMPEXPR(expr->len_expr.inner); + DUMPEND(); case EXPR_DECL_LIST: DUMP("(decllist"); DUMPASTS(expr->dexpr_list_expr); @@ -626,6 +637,13 @@ void fprint_expr_recursive(Context *context, FILE *file, Expr *expr, int indent) DUMPEXPR(expr->subscript_expr.expr); DUMPEXPC(expr->subscript_expr.index); DUMPEND(); + case EXPR_SLICE: + DUMP("(slice"); + DUMPEXPC(expr); + DUMPEXPR(expr->slice_expr.expr); + DUMPEXPR(expr->slice_expr.start); + DUMPEXPR(expr->slice_expr.end); + DUMPEND(); case EXPR_GUARD: DUMP("(guard"); DUMPEXPR(expr->guard_expr.inner); @@ -675,11 +693,6 @@ void fprint_expr_recursive(Context *context, FILE *file, Expr *expr, int indent) DUMPEXPR(expr->expr_scope.expr); // TODO defers. DUMPEND(); - case EXPR_RANGE: - DUMP("(range"); - DUMPEXPR(expr->range_expr.left); - DUMPEXPR(expr->range_expr.right); - DUMPEND(); case EXPR_DESIGNATED_INITIALIZER: DUMP("(designated-initializer"); // TODO path diff --git a/src/compiler/compiler_internal.h b/src/compiler/compiler_internal.h index b4b48d41c..c2eacc94e 100644 --- a/src/compiler/compiler_internal.h +++ b/src/compiler/compiler_internal.h @@ -524,10 +524,27 @@ typedef struct typedef struct { + bool from_back : 1; Expr *expr; Expr *index; } ExprSubscript; +typedef struct +{ + Expr *expr; + bool start_from_back : 1; + bool end_from_back : 1; + Expr *start; + Expr *end; +} ExprSlice; + +typedef struct +{ + Expr *left; + Expr *right; +} ExprSliceAssign; + + typedef struct { Expr *parent; @@ -621,6 +638,11 @@ typedef struct AstId defer; } ExprGuard; +typedef struct +{ + Expr *inner; +} ExprLen; + struct _Expr { ExprKind expr_kind : 8; @@ -631,6 +653,7 @@ struct _Expr union { ExprDesignatedInit designated_init_expr; Expr *group_expr; + ExprLen len_expr; ExprCast cast_expr; Expr *typeof_expr; ExprConst const_expr; @@ -640,11 +663,13 @@ struct _Expr ExprGuard guard_expr; Expr *trycatch_expr; ExprElse else_expr; + ExprSliceAssign slice_assign_expr; ExprBinary binary_expr; ExprTernary ternary_expr; ExprUnary unary_expr; ExprPostUnary post_expr; ExprCall call_expr; + ExprSlice slice_expr; ExprSubscript subscript_expr; ExprAccess access_expr; ExprIdentifier identifier_expr; @@ -1106,6 +1131,7 @@ extern const char *attribute_list[NUMBER_OF_ATTRIBUTES]; extern const char *kw_main; extern const char *kw_sizeof; extern const char *kw_offsetof; +extern const char *kw_len; #define AST_NEW_TOKEN(_kind, _token) new_ast(_kind, source_span_from_token_id(_token.id)) #define AST_NEW(_kind, _loc) new_ast(_kind, _loc) diff --git a/src/compiler/enums.h b/src/compiler/enums.h index d6918c71d..b30ab3426 100644 --- a/src/compiler/enums.h +++ b/src/compiler/enums.h @@ -177,6 +177,8 @@ typedef enum EXPR_CALL, EXPR_GROUP, EXPR_SUBSCRIPT, + EXPR_SLICE, + EXPR_SLICE_ASSIGN, EXPR_ACCESS, EXPR_INITIALIZER_LIST, EXPR_EXPRESSION_LIST, @@ -185,11 +187,11 @@ typedef enum EXPR_SCOPED_EXPR, EXPR_EXPR_BLOCK, EXPR_MACRO_BLOCK, - EXPR_RANGE, EXPR_DESIGNATED_INITIALIZER, EXPR_COMPOUND_LITERAL, EXPR_FAILABLE, - EXPR_DECL_LIST + EXPR_DECL_LIST, + EXPR_LEN, } ExprKind; typedef enum @@ -206,7 +208,6 @@ typedef enum PREC_ASSIGNMENT, // =, *=, /=, %=, +=, etc PREC_TRY_ELSE, // try and else PREC_TERNARY, // ?: - PREC_RANGE, // ... PREC_LOGICAL, // && || PREC_RELATIONAL, // < > <= >= == != PREC_ADDITIVE, // + - diff --git a/src/compiler/llvm_codegen.c b/src/compiler/llvm_codegen.c index ffb886473..34f29e7fe 100644 --- a/src/compiler/llvm_codegen.c +++ b/src/compiler/llvm_codegen.c @@ -58,8 +58,6 @@ LLVMValueRef gencontext_emit_memclear(GenContext *context, LLVMValueRef ref, Typ } - - static void gencontext_emit_global_variable_definition(GenContext *context, Decl *decl) { assert(decl->var.kind == VARDECL_GLOBAL); diff --git a/src/compiler/llvm_codegen_expr.c b/src/compiler/llvm_codegen_expr.c index 79512c571..e0cdfadb4 100644 --- a/src/compiler/llvm_codegen_expr.c +++ b/src/compiler/llvm_codegen_expr.c @@ -6,6 +6,7 @@ #include "compiler_internal.h" #include "bigint.h" +static LLVMValueRef gencontext_emit_int_comparison(GenContext *context, Type *lhs_type, Type *rhs_type, LLVMValueRef lhs_value, LLVMValueRef rhs_value, BinaryOp binary_op); static inline LLVMValueRef gencontext_emit_add_int(GenContext *context, Type *type, bool use_mod, LLVMValueRef left, LLVMValueRef right) { @@ -17,17 +18,16 @@ static inline LLVMValueRef gencontext_emit_add_int(GenContext *context, Type *ty if (build_options.debug_mode) { LLVMTypeRef type_to_use = llvm_type(type->canonical); - LLVMTypeRef types[2] = { type_to_use, type_to_use }; LLVMValueRef args[2] = { left, right }; assert(type->canonical == type); LLVMValueRef add_res; if (type_is_unsigned(type)) { - add_res = gencontext_emit_call_intrinsic(context, uadd_overflow_intrinsic_id, types, args, 2); + add_res = gencontext_emit_call_intrinsic(context, uadd_overflow_intrinsic_id, &type_to_use, 1, args, 2); } else { - add_res = gencontext_emit_call_intrinsic(context, sadd_overflow_intrinsic_id, types, args, 2); + add_res = gencontext_emit_call_intrinsic(context, sadd_overflow_intrinsic_id, &type_to_use, 1, args, 2); } LLVMValueRef result = LLVMBuildExtractValue(context->builder, add_res, 0, ""); LLVMValueRef ok = LLVMBuildExtractValue(context->builder, add_res, 1, ""); @@ -49,17 +49,16 @@ static inline LLVMValueRef gencontext_emit_sub_int(GenContext *context, Type *ty if (build_options.debug_mode) { LLVMTypeRef type_to_use = llvm_type(type); - LLVMTypeRef types[2] = { type_to_use, type_to_use }; LLVMValueRef args[2] = { left, right }; assert(type->canonical == type); LLVMValueRef add_res; if (type_is_unsigned(type)) { - add_res = gencontext_emit_call_intrinsic(context, usub_overflow_intrinsic_id, types, args, 2); + add_res = gencontext_emit_call_intrinsic(context, usub_overflow_intrinsic_id, &type_to_use, 1, args, 2); } else { - add_res = gencontext_emit_call_intrinsic(context, ssub_overflow_intrinsic_id, types, args, 2); + add_res = gencontext_emit_call_intrinsic(context, ssub_overflow_intrinsic_id, &type_to_use, 1, args, 2); } LLVMValueRef result = LLVMBuildExtractValue(context->builder, add_res, 0, ""); LLVMValueRef ok = LLVMBuildExtractValue(context->builder, add_res, 1, ""); @@ -72,45 +71,25 @@ static inline LLVMValueRef gencontext_emit_sub_int(GenContext *context, Type *ty ? LLVMBuildNUWSub(context->builder, left, right, "usub") : LLVMBuildNSWSub(context->builder, left, right, "sub"); } -static inline LLVMValueRef gencontext_emit_subscript_addr(GenContext *context, Expr *expr) + +static inline LLVMValueRef gencontext_emit_subscript_addr_base(GenContext *context, Expr *parent) { - Expr *parent = expr->subscript_expr.expr; - Expr *index = expr->subscript_expr.index; - if (index->expr_kind == EXPR_RANGE) TODO; - LLVMValueRef index_value = gencontext_emit_expr(context, index); LLVMValueRef parent_value; Type *type = parent->type->canonical; switch (type->type_kind) { case TYPE_POINTER: - parent_value = gencontext_emit_expr(context, expr->subscript_expr.expr); - return LLVMBuildInBoundsGEP2(context->builder, - llvm_type(type->pointer), - parent_value, &index_value, 1, "ptridx"); + return gencontext_emit_expr(context, parent); case TYPE_ARRAY: - { - // TODO insert trap on overflow. - LLVMValueRef zero = llvm_int(type_int, 0); - LLVMValueRef indices[2] = { - zero, - index_value, - }; - parent_value = gencontext_emit_address(context, expr->subscript_expr.expr); - return LLVMBuildInBoundsGEP2(context->builder, - llvm_type(type), - parent_value, indices, 2, "arridx"); - } + return gencontext_emit_address(context, parent); case TYPE_SUBARRAY: { // TODO insert trap on overflow. LLVMTypeRef subarray_type = llvm_type(type); - parent_value = gencontext_emit_address(context, expr->subscript_expr.expr); + parent_value = gencontext_emit_address(context, parent); LLVMValueRef pointer_addr = LLVMBuildStructGEP2(context->builder, subarray_type, parent_value, 0, ""); LLVMTypeRef pointer_type = llvm_type(type_get_ptr(type->array.base)); - LLVMValueRef pointer = LLVMBuildLoad2(context->builder, pointer_type, pointer_addr, ""); - return LLVMBuildInBoundsGEP2(context->builder, - llvm_type(type->array.base), - pointer, &index_value, 1, "sarridx"); + return LLVMBuildLoad2(context->builder, pointer_type, pointer_addr, ""); } case TYPE_VARARRAY: case TYPE_STRING: @@ -121,6 +100,49 @@ static inline LLVMValueRef gencontext_emit_subscript_addr(GenContext *context, E } } +static inline LLVMValueRef gencontext_emit_subscript_addr_with_base(GenContext *context, Type *parent_type, LLVMValueRef parent_value, LLVMValueRef index_value) +{ + Type *type = parent_type; + switch (type->type_kind) + { + case TYPE_POINTER: + return LLVMBuildInBoundsGEP2(context->builder, + llvm_type(type->pointer), + parent_value, &index_value, 1, "ptridx"); + case TYPE_ARRAY: + { + // TODO insert trap on overflow. + LLVMValueRef zero = llvm_int(type_int, 0); + LLVMValueRef indices[2] = { + zero, + index_value, + }; + return LLVMBuildInBoundsGEP2(context->builder, + llvm_type(type), + parent_value, indices, 2, "arridx"); + } + case TYPE_SUBARRAY: + { + // TODO insert trap on overflow. + return LLVMBuildInBoundsGEP2(context->builder, + llvm_type(type->array.base), + parent_value, &index_value, 1, "sarridx"); + } + case TYPE_VARARRAY: + case TYPE_STRING: + TODO + default: + UNREACHABLE + + } +} +static inline LLVMValueRef gencontext_emit_subscript_addr(GenContext *context, Expr *parent, LLVMValueRef index_value) +{ + LLVMValueRef parent_value = gencontext_emit_subscript_addr_base(context, parent); + return gencontext_emit_subscript_addr_with_base(context, parent->type->canonical, parent_value, index_value); +} + + static int find_member_index(Decl *parent, Decl *member) { VECEACH(parent->strukt.members, i) @@ -197,13 +219,15 @@ LLVMValueRef gencontext_emit_address(GenContext *context, Expr *expr) { switch (expr->expr_kind) { - case EXPR_RANGE: - TODO case EXPR_DESIGNATED_INITIALIZER: // Should only appear when generating designated initializers. UNREACHABLE case EXPR_MACRO_BLOCK: TODO + case EXPR_SLICE_ASSIGN: + case EXPR_SLICE: + // Should never be an lvalue + UNREACHABLE case EXPR_IDENTIFIER: return decl_ref(expr->identifier_expr.decl); case EXPR_UNARY: @@ -214,7 +238,7 @@ LLVMValueRef gencontext_emit_address(GenContext *context, Expr *expr) case EXPR_ACCESS: return gencontext_emit_access_addr(context, expr); case EXPR_SUBSCRIPT: - return gencontext_emit_subscript_addr(context, expr); + return gencontext_emit_subscript_addr(context, expr->subscript_expr.expr, gencontext_emit_expr(context, expr->subscript_expr.index)); case EXPR_SCOPED_EXPR: return gencontext_emit_scoped_expr_address(context, expr); case EXPR_GROUP: @@ -238,6 +262,7 @@ LLVMValueRef gencontext_emit_address(GenContext *context, Expr *expr) case EXPR_EXPR_BLOCK: case EXPR_DECL_LIST: case EXPR_ELSE: + case EXPR_LEN: UNREACHABLE } UNREACHABLE @@ -526,8 +551,9 @@ LLVMValueRef gencontext_emit_unary_expr(GenContext *context, Expr *expr) { LLVMTypeRef type_to_use = llvm_type(type->canonical); LLVMValueRef args[2] = { zero, to_negate }; - LLVMTypeRef types[2] = { type_to_use, type_to_use }; - LLVMValueRef call_res = gencontext_emit_call_intrinsic(context, ssub_overflow_intrinsic_id, types, args, 2); + LLVMValueRef call_res = gencontext_emit_call_intrinsic(context, + ssub_overflow_intrinsic_id, + &type_to_use, 1, args, 2); LLVMValueRef result = LLVMBuildExtractValue(context->builder, call_res, 0, ""); LLVMValueRef ok = LLVMBuildExtractValue(context->builder, call_res, 1, ""); gencontext_emit_panic_on_true(context, ok, "Signed negation overflow"); @@ -548,7 +574,275 @@ LLVMValueRef gencontext_emit_unary_expr(GenContext *context, Expr *expr) UNREACHABLE } +static LLVMValueRef gencontext_emit_len_value(GenContext *context, Expr *inner, LLVMValueRef inner_value) +{ + Type *type = inner->type; + switch (type->canonical->type_kind) + { + case TYPE_SUBARRAY: + { + LLVMTypeRef subarray_type = llvm_type(type); + LLVMValueRef len_addr = LLVMBuildStructGEP2(context->builder, subarray_type, inner_value, 1, "len"); + return gencontext_emit_load(context, type_usize, len_addr); + } + case TYPE_ARRAY: + return gencontext_emit_const_int(context, type_usize, type->array.len); + default: + UNREACHABLE + } +} + +static LLVMValueRef gencontext_emit_len(GenContext *context, Expr *expr) +{ + Expr *inner = expr->len_expr.inner; + LLVMValueRef value = gencontext_emit_address(context, inner); + return gencontext_emit_len_value(context, inner, value); +} + +static void gencontext_emit_trap_negative(GenContext *context, Expr *expr, LLVMValueRef value, const char *error) +{ + if (!build_options.debug_mode) return; + if (type_is_unsigned_integer(expr->type->canonical)) return; + + LLVMValueRef zero = gencontext_emit_const_int(context, expr->type, 0); + LLVMValueRef ok = LLVMBuildICmp(context->builder, LLVMIntSLT, value, zero, "underflow"); + gencontext_emit_panic_on_true(context, ok, error); +} + +static void +gencontext_emit_slice_values(GenContext *context, Expr *slice, Type **parent_type_ref, LLVMValueRef *parent_base_ref, + Type **start_type_ref, LLVMValueRef *start_index_ref, Type **end_type_ref, + LLVMValueRef *end_index_ref) +{ + assert(slice->expr_kind == EXPR_SLICE); + + Expr *parent_expr = slice->slice_expr.expr; + Type *parent_type = parent_expr->type->canonical; + LLVMValueRef parent_addr = gencontext_emit_address(context, parent_expr); + LLVMValueRef parent_load_value; + LLVMValueRef parent_base; + switch (parent_type->type_kind) + { + case TYPE_POINTER: + parent_load_value = parent_base = gencontext_emit_load(context, parent_type, parent_addr); + break; + case TYPE_SUBARRAY: + parent_load_value = gencontext_emit_load(context, parent_type, parent_addr); + parent_base = LLVMBuildExtractValue(context->builder, parent_load_value, 0, ""); + break; + case TYPE_ARRAY: + parent_base = parent_addr; + break; + case TYPE_VARARRAY: + case TYPE_STRING: + TODO + default: + UNREACHABLE + } + // Endpoints + Expr *start = slice->slice_expr.start; + Expr *end = slice->slice_expr.end; + + // Emit the start and end + Type *start_type = start->type->canonical; + LLVMValueRef start_index = gencontext_emit_expr(context, start); + + LLVMValueRef len; + if (!end || slice->slice_expr.start_from_back || slice->slice_expr.end_from_back || build_options.debug_mode) + { + switch (parent_type->type_kind) + { + case TYPE_POINTER: + len = NULL; + break; + case TYPE_SUBARRAY: + len = LLVMBuildExtractValue(context->builder, parent_load_value, 1, ""); + break; + case TYPE_ARRAY: + len = gencontext_emit_const_int(context, type_usize, parent_type->array.len); + break; + case TYPE_VARARRAY: + case TYPE_STRING: + TODO + default: + UNREACHABLE + } + } + + // Walk from end if it is slice from the back. + if (slice->slice_expr.start_from_back) + { + start_index = gencontext_emit_sub_int(context, start_type, false, len, start_index); + } + + // Check that index does not extend beyond the length. + if (parent_type->type_kind != TYPE_POINTER && build_options.debug_mode) + { + LLVMValueRef exceeds_size = gencontext_emit_int_comparison(context, type_usize, start_type, len, start_index, BINARYOP_GE); + gencontext_emit_panic_on_true(context, exceeds_size, "Index exceeds array length."); + } + + // Insert trap for negative start offset for non pointers. + if (parent_type->type_kind != TYPE_POINTER) + { + gencontext_emit_trap_negative(context, start, start_index, "Negative index"); + } + + Type *end_type; + LLVMValueRef end_index; + + if (end) + { + // Get the index. + end_index = gencontext_emit_expr(context, end); + end_type = end->type->canonical; + + // Reverse if it is "from back" + if (slice->slice_expr.end_from_back) + { + end_index = gencontext_emit_sub_int(context, end_type, false, len, end_index); + } + + // This will trap any bad negative index, so we're fine. + if (build_options.debug_mode) + { + LLVMValueRef excess = gencontext_emit_int_comparison(context, start_type, end_type, start_index, *end_index_ref, BINARYOP_GT); + gencontext_emit_panic_on_true(context, excess, "Negative size"); + + if (len) + { + LLVMValueRef exceeds_size = gencontext_emit_int_comparison(context, type_usize, end_type, len, end_index, BINARYOP_LT); + gencontext_emit_panic_on_true(context, exceeds_size, "Size exceeds index"); + } + } + } + else + { + assert(len && "Pointer should never end up here."); + // Otherwise everything is fine and dandy. Our len is our end index. + end_index = len; + end_type = type_usize; + } + + *end_index_ref = end_index; + *end_type_ref = end_type; + *start_index_ref = start_index; + *start_type_ref = start_type; + *parent_base_ref = parent_base; + *parent_type_ref = parent_type; +} + +static LLVMValueRef gencontext_emit_slice(GenContext *context, Expr *expr) +{ + Type *parent_type; + Type *end_type; + LLVMValueRef end_index; + LLVMValueRef parent_base; + Type *start_type; + LLVMValueRef start_index; + // Use general function to get all the values we need (a lot!) + gencontext_emit_slice_values(context, expr, &parent_type, + &parent_base, + &start_type, &start_index, &end_type, &end_index); + + + // Calculate the size + LLVMValueRef size = LLVMBuildSub(context->builder, end_index, start_index, "size"); + + LLVMValueRef start_pointer; + switch (parent_type->type_kind) + { + case TYPE_ARRAY: + { + Type *pointer_type = type_get_ptr(parent_type->array.base); + // Change pointer from Foo[x] to Foo* + parent_base = gencontext_emit_bitcast(context, parent_base, pointer_type); + // Move pointer + start_pointer = LLVMBuildInBoundsGEP2(context->builder, llvm_type(pointer_type->pointer), parent_base, &start_index, 1, "offset"); + break; + } + case TYPE_SUBARRAY: + { + start_pointer = LLVMBuildInBoundsGEP(context->builder, parent_base, &start_index, 1, "offsetsub"); + break; + } + default: + TODO + } + + // Create a new subarray type + LLVMValueRef result = LLVMGetUndef(llvm_type(expr->type)); + result = LLVMBuildInsertValue(context->builder, result, start_pointer, 0, ""); + return LLVMBuildInsertValue(context->builder, result, size, 1, ""); + +} + +static LLVMValueRef gencontext_emit_slice_assign(GenContext *context, Expr *expr) +{ + // We will be replacing the slice assign with code that roughly looks like this: + // size_t end = slice_end; + // size_t slice_current = slice_start; + // while (slice_current < end) pointer[slice_current++] = value; + + // First, find the value assigned. + Expr *assigned_value = expr->slice_assign_expr.right; + LLVMValueRef value = gencontext_emit_expr(context, assigned_value); + + Type *parent_type; + Type *end_type; + LLVMValueRef end_index; + LLVMValueRef parent_base; + Type *start_type; + LLVMValueRef start_index; + // Use general function to get all the values we need (a lot!) + gencontext_emit_slice_values(context, expr->slice_assign_expr.left, &parent_type, + &parent_base, + &start_type, &start_index, &end_type, &end_index); + + // We will need to iterate for the general case. + LLVMBasicBlockRef start_block = context->current_block; + LLVMBasicBlockRef cond_block = gencontext_create_free_block(context, "cond"); + LLVMBasicBlockRef exit_block = gencontext_create_free_block(context, "exit"); + LLVMBasicBlockRef assign_block = gencontext_create_free_block(context, "assign"); + + // First jump to the cond block. + gencontext_emit_br(context, cond_block); + gencontext_emit_block(context, cond_block); + + // We emit a phi here: value is either the start value (start_offset) or the next value (next_offset) + // but we haven't generated the latter yet, so we defer that. + LLVMValueRef offset = LLVMBuildPhi(context->builder, llvm_type(start_type), ""); + + // Check if we're not at the end. + LLVMValueRef not_at_end = gencontext_emit_int_comparison(context, start_type, end_type, offset, end_index, BINARYOP_LT); + + // If jump to the assign block if we're not at the end index. + gencontext_emit_cond_br(context, not_at_end, assign_block, exit_block); + + // Emit the assign. + gencontext_emit_block(context, assign_block); + // Reuse this calculation + LLVMValueRef target = gencontext_emit_subscript_addr_with_base(context, parent_type, parent_base, offset); + // And store the value. + LLVMBuildStore(context->builder, value, target); + + // Create the new offset + LLVMValueRef next_offset = gencontext_emit_add_int(context, start_type, false, offset, gencontext_emit_const_int(context, start_type, 1)); + + // And jump back + gencontext_emit_br(context, cond_block); + + // Finally set up our phi + LLVMValueRef logic_values[2] = { start_index, next_offset }; + LLVMBasicBlockRef blocks[2] = { start_block, assign_block }; + LLVMAddIncoming(offset, logic_values, blocks, 2); + + // And emit the exit block. + gencontext_emit_block(context, exit_block); + + return value; +} static LLVMValueRef gencontext_emit_logical_and_or(GenContext *context, Expr *expr, BinaryOp op) { @@ -762,7 +1056,12 @@ static LLVMValueRef gencontext_emit_binary(GenContext *context, Expr *expr, LLVM LLVMTypeRef type_to_use = llvm_type(lhs_type); LLVMValueRef args[2] = { lhs_value, rhs_value }; LLVMTypeRef types[2] = { type_to_use, type_to_use }; - LLVMValueRef call_res = gencontext_emit_call_intrinsic(context, umul_overflow_intrinsic_id, types, args, 2); + LLVMValueRef call_res = gencontext_emit_call_intrinsic(context, + umul_overflow_intrinsic_id, + types, + 1, + args, + 2); LLVMValueRef result = LLVMBuildExtractValue(context->builder, call_res, 0, ""); LLVMValueRef ok = LLVMBuildExtractValue(context->builder, call_res, 1, ""); gencontext_emit_panic_on_true(context, ok, "Unsigned multiplication overflow"); @@ -775,7 +1074,12 @@ static LLVMValueRef gencontext_emit_binary(GenContext *context, Expr *expr, LLVM LLVMTypeRef type_to_use = llvm_type(lhs_type); LLVMValueRef args[2] = { lhs_value, rhs_value }; LLVMTypeRef types[2] = { type_to_use, type_to_use }; - LLVMValueRef call_res = gencontext_emit_call_intrinsic(context, smul_overflow_intrinsic_id, types, args, 2); + LLVMValueRef call_res = gencontext_emit_call_intrinsic(context, + smul_overflow_intrinsic_id, + types, + 1, + args, + 2); LLVMValueRef result = LLVMBuildExtractValue(context->builder, call_res, 0, ""); LLVMValueRef ok = LLVMBuildExtractValue(context->builder, call_res, 1, ""); gencontext_emit_panic_on_true(context, ok, "Signed multiplication overflow"); @@ -1335,10 +1639,10 @@ static inline LLVMValueRef gencontext_emit_macro_block(GenContext *context, Expr return return_out ? gencontext_emit_load(context, expr->type, return_out) : NULL; } -LLVMValueRef gencontext_emit_call_intrinsic(GenContext *context, unsigned intrinsic_id, LLVMTypeRef *types, - LLVMValueRef *values, unsigned arg_count) +LLVMValueRef gencontext_emit_call_intrinsic(GenContext *context, unsigned intrinsic_id, LLVMTypeRef *types, unsigned type_count, + LLVMValueRef *values, unsigned arg_count) { - LLVMValueRef decl = LLVMGetIntrinsicDeclaration(context->module, intrinsic_id, types, arg_count); + LLVMValueRef decl = LLVMGetIntrinsicDeclaration(context->module, intrinsic_id, types, type_count); LLVMTypeRef type = LLVMIntrinsicGetType(context->context, intrinsic_id, types, arg_count); return LLVMBuildCall2(context->builder, type, decl, values, arg_count, ""); } @@ -1436,13 +1740,18 @@ LLVMValueRef gencontext_emit_expr(GenContext *context, Expr *expr) NESTED_RETRY: switch (expr->expr_kind) { - case EXPR_RANGE: case EXPR_POISONED: case EXPR_DECL_LIST: UNREACHABLE case EXPR_DESIGNATED_INITIALIZER: // Should only appear when generating designated initializers. UNREACHABLE + case EXPR_SLICE_ASSIGN: + return gencontext_emit_slice_assign(context, expr); + case EXPR_SLICE: + return gencontext_emit_slice(context, expr); + case EXPR_LEN: + return gencontext_emit_len(context, expr); case EXPR_FAILABLE: return gencontext_emit_failable(context, expr); case EXPR_TRY: diff --git a/src/compiler/llvm_codegen_internal.h b/src/compiler/llvm_codegen_internal.h index 76a09e7b9..ad49611aa 100644 --- a/src/compiler/llvm_codegen_internal.h +++ b/src/compiler/llvm_codegen_internal.h @@ -115,8 +115,9 @@ gencontext_add_attribute(GenContext *context, LLVMValueRef value_to_add_attribut void gencontext_emit_stmt(GenContext *context, Ast *ast); void gencontext_generate_catch_block_if_needed(GenContext *context, Ast *ast); -LLVMValueRef gencontext_emit_call_intrinsic(GenContext *context, unsigned intrinsic_id, LLVMTypeRef *types, - LLVMValueRef *values, unsigned arg_count); +LLVMValueRef +gencontext_emit_call_intrinsic(GenContext *context, unsigned intrinsic_id, LLVMTypeRef *types, unsigned type_count, + LLVMValueRef *values, unsigned arg_count); void gencontext_emit_panic_on_true(GenContext *context, LLVMValueRef value, const char *panic_name); void gencontext_emit_defer(GenContext *context, AstId defer_start, AstId defer_end); @@ -130,6 +131,7 @@ void gencontext_emit_compound_stmt(GenContext *context, Ast *ast); void gencontext_emit_block(GenContext *context, LLVMBasicBlockRef next_block); LLVMValueRef gencontext_emit_memclear_size_align(GenContext *context, LLVMValueRef ref, uint64_t size, unsigned align, bool bitcast); LLVMValueRef gencontext_emit_memclear(GenContext *context, LLVMValueRef ref, Type *type); + void gencontext_emit_br(GenContext *context, LLVMBasicBlockRef next_block); bool gencontext_check_block_branch_emit(GenContext *context); void gencontext_emit_cond_br(GenContext *context, LLVMValueRef value, LLVMBasicBlockRef thenBlock, LLVMBasicBlockRef elseBlock); diff --git a/src/compiler/llvm_codegen_stmt.c b/src/compiler/llvm_codegen_stmt.c index 235695b76..8f228ba76 100644 --- a/src/compiler/llvm_codegen_stmt.c +++ b/src/compiler/llvm_codegen_stmt.c @@ -874,7 +874,7 @@ void gencontext_emit_panic_on_true(GenContext *context, LLVMValueRef value, cons LLVMBasicBlockRef ok_block = gencontext_create_free_block(context, "checkok"); gencontext_emit_cond_br(context, value, panic_block, ok_block); gencontext_emit_block(context, panic_block); - gencontext_emit_call_intrinsic(context, trap_intrinsic_id, NULL, NULL, 0); + gencontext_emit_call_intrinsic(context, trap_intrinsic_id, NULL, 0, NULL, 0); gencontext_emit_br(context, ok_block); gencontext_emit_block(context, ok_block); } diff --git a/src/compiler/parse_expr.c b/src/compiler/parse_expr.c index 600fdf511..84e29ad46 100644 --- a/src/compiler/parse_expr.c +++ b/src/compiler/parse_expr.c @@ -197,32 +197,6 @@ static Expr *parse_post_unary(Context *context, Expr *left) return unary; } -static Expr *parse_range_expr(Context *context, Expr *left_side) -{ - assert(expr_ok(left_side)); - advance_and_verify(context, TOKEN_ELLIPSIS); - Expr *right = TRY_EXPR_OR(parse_precedence(context, PREC_RANGE + 1), poisoned_expr); - Expr *range = expr_new(EXPR_RANGE, left_side->span); - range->range_expr.left = left_side; - range->range_expr.right = right; - RANGE_EXTEND_PREV(range); - return range; -} - -static bool token_may_end_expression(TokenType type) -{ - switch (type) - { - case TOKEN_RPAREN: - case TOKEN_RBRACE: - case TOKEN_RBRACKET: - case TOKEN_EOS: - case TOKEN_COMMA: - return true; - default: - return false; - } -} @@ -369,18 +343,61 @@ static Expr *parse_call_expr(Context *context, Expr *left) } + static Expr *parse_subscript_expr(Context *context, Expr *left) { assert(left && expr_ok(left)); - advance_and_verify(context, TOKEN_LBRACKET); - Expr *index = TRY_EXPR_OR(parse_expr(context), poisoned_expr); + + Expr *subs_expr = EXPR_NEW_EXPR(EXPR_SUBSCRIPT, left); + Expr *index = NULL; + bool is_range = false; + bool from_back = false; + bool end_from_back = false; + Expr *end = NULL; + + // Not range with missing entry + if (!TOKEN_IS(TOKEN_DOTDOT)) + { + // Might be ^ prefix + from_back = try_consume(context, TOKEN_BIT_XOR); + index = TRY_EXPR_OR(parse_expr(context), poisoned_expr); + } + else + { + index = EXPR_NEW_TOKEN(EXPR_CONST, context->tok); + index->type = type_usize; + index->resolve_status = RESOLVE_DONE; + expr_const_set_int(&index->const_expr, 0, type_usize->canonical->type_kind); + } + if (try_consume(context, TOKEN_DOTDOT)) + { + is_range = true; + if (!TOKEN_IS(TOKEN_RBRACKET)) + { + end_from_back = try_consume(context, TOKEN_BIT_XOR); + end = TRY_EXPR_OR(parse_expr(context), poisoned_expr); + } + } CONSUME_OR(TOKEN_RBRACKET, poisoned_expr); - Expr *subscript_ast = EXPR_NEW_EXPR(EXPR_SUBSCRIPT, left); - subscript_ast->subscript_expr.expr = left; - subscript_ast->subscript_expr.index = index; - RANGE_EXTEND_PREV(subscript_ast); - return subscript_ast; + RANGE_EXTEND_PREV(subs_expr); + + if (is_range) + { + subs_expr->expr_kind = EXPR_SLICE; + subs_expr->slice_expr.expr = left; + subs_expr->slice_expr.start = index; + subs_expr->slice_expr.start_from_back = from_back; + subs_expr->slice_expr.end = end; + subs_expr->slice_expr.end_from_back = end_from_back; + } + else + { + subs_expr->subscript_expr.expr = left; + subs_expr->subscript_expr.index = index; + subs_expr->subscript_expr.from_back = from_back; + } + return subs_expr; } @@ -848,7 +865,6 @@ static Expr* parse_expr_block(Context *context, Expr *left) ParseRule rules[TOKEN_EOF + 1] = { [TOKEN_ELSE] = { NULL, parse_else_expr, PREC_TRY_ELSE }, - [TOKEN_ELLIPSIS] = { NULL, parse_range_expr, PREC_RANGE }, [TOKEN_QUESTION] = { NULL, parse_ternary_expr, PREC_TERNARY }, [TOKEN_ELVIS] = { NULL, parse_ternary_expr, PREC_TERNARY }, [TOKEN_PLUSPLUS] = { parse_unary_expr, parse_post_unary, PREC_CALL }, diff --git a/src/compiler/sema_expr.c b/src/compiler/sema_expr.c index 4a16c2dbc..e5d4d5fcb 100644 --- a/src/compiler/sema_expr.c +++ b/src/compiler/sema_expr.c @@ -119,6 +119,7 @@ static bool expr_is_ltype(Expr *expr) case EXPR_GROUP: return expr_is_ltype(expr->group_expr); case EXPR_SUBSCRIPT: + case EXPR_SLICE: return true; default: return false; @@ -148,11 +149,7 @@ static inline bool sema_type_error_on_binop(Context *context, Expr *expr) static bool expr_cast_to_index(Context *context, Expr *index) { if (index->type->canonical->type_kind == type_usize->canonical->type_kind) return true; - if (index->expr_kind != EXPR_RANGE) return cast_implicit(context, index, type_isize); - if (!cast_implicit(context, index->range_expr.left, type_isize)) return false; - if (!cast_implicit(context, index->range_expr.right, type_isize)) return false; - index->type = type_isize; - return true; + return cast_implicit(context, index, type_isize); } static inline bool sema_expr_analyse_ternary(Context *context, Type *to, Expr *expr) @@ -266,6 +263,7 @@ static inline bool sema_expr_analyse_identifier(Context *context, Type *to, Expr { Decl *ambiguous_decl = NULL; Decl *private_symbol = NULL; + DEBUG_LOG("Now resolving %s", expr->identifier_expr.identifier); Decl *decl = sema_resolve_symbol(context, expr->identifier_expr.identifier, expr->identifier_expr.path, @@ -349,6 +347,7 @@ static inline bool sema_expr_analyse_identifier(Context *context, Type *to, Expr assert(decl->type); expr->identifier_expr.decl = decl; expr->type = decl->type; + DEBUG_LOG("Resolution successful of %s.", decl->name); return true; } @@ -695,10 +694,10 @@ static inline bool sema_expr_analyse_range(Context *context, Type *to, Expr *exp { Expr *left = expr->range_expr.left; Expr *right = expr->range_expr.right; - bool success = sema_analyse_expr(context, to, left) & sema_analyse_expr(context, to, right); + bool success = sema_analyse_expr(context, to, left) & (!right || sema_analyse_expr(context, to, right)); if (!success) return expr_poison(expr); Type *left_canonical = left->type->canonical; - Type *right_canonical = right->type->canonical; + Type *right_canonical = right ? right->type->canonical : left_canonical; if (!type_is_any_integer(left_canonical)) { SEMA_ERROR(left, "Expected an integer value in the range expression."); @@ -714,7 +713,7 @@ static inline bool sema_expr_analyse_range(Context *context, Type *to, Expr *exp Type *type = type_find_max_type(left_canonical, right_canonical); if (!cast_implicit(context, left, type) || !cast_implicit(context, right, type)) return expr_poison(expr); } - if (left->expr_kind == EXPR_CONST && right->expr_kind == EXPR_CONST) + if (left->expr_kind == EXPR_CONST && right && right->expr_kind == EXPR_CONST) { if (expr_const_compare(&left->const_expr, &right->const_expr, BINARYOP_GT)) { @@ -726,44 +725,67 @@ static inline bool sema_expr_analyse_range(Context *context, Type *to, Expr *exp return true; } -static bool expr_check_index_in_range(Context *context, Type *type, Expr *index) +static bool expr_check_index_in_range(Context *context, Type *type, Expr *index_expr, bool end_index, bool from_end) { - if (index->expr_kind == EXPR_RANGE) - { - return expr_check_index_in_range(context, type, index->range_expr.left) & expr_check_index_in_range(context, type, index->range_expr.right); - } assert(type == type->canonical); - if (index->expr_kind == EXPR_CONST) + if (index_expr->expr_kind != EXPR_CONST) return true; + if (!bigint_fits_in_bits(&index_expr->const_expr.i, 64, true)) { - switch (type->type_kind) + SEMA_ERROR(index_expr, "Index does not fit into an 64-signed integer."); + return false; + } + int64_t index = bigint_as_signed(&index_expr->const_expr.i); + if (from_end && index < 0) + { + SEMA_ERROR(index_expr, "Negative numbers are not allowed when indexing from the end."); + return false; + } + switch (type->type_kind) + { + case TYPE_POINTER: + assert(!from_end); + return true; + case TYPE_ARRAY: { - case TYPE_POINTER: - return true; - case TYPE_ARRAY: + int64_t len = (int64_t)type->array.len; + if (from_end) { - BigInt size; - bigint_init_unsigned(&size, type->array.len); - if (bigint_cmp(&size, &index->const_expr.i) != CMP_GT) - { - SEMA_ERROR(index, "Array index out of bounds, was %s, exceeding max index of %llu.", - bigint_to_error_string(&index->const_expr.i, 10), type->array.len - 1); - return false; - } - FALLTHROUGH; + index = len - index; } - case TYPE_VARARRAY: - case TYPE_SUBARRAY: - if (bigint_cmp_zero(&index->const_expr.i) == CMP_LT) - { - SEMA_ERROR(index, "Array index out of bounds, was %s.", bigint_to_error_string(&index->const_expr.i, 10)); - return false; - } - break; - case TYPE_STRING: - TODO - default: - UNREACHABLE + // Checking end can only be done for arrays. + if (end_index && index > len) + { + SEMA_ERROR(index_expr, "Array end index out of bounds, was %lld, exceeding array length %lld.", (long long)index, (long long)len); + return false; + } + if (!end_index && index >= len) + { + SEMA_ERROR(index_expr, "Array index out of bounds, was %lld, exceeding max array index %lld.", (long long)index, (long long)len - 1); + return false; + } + break; } + case TYPE_VARARRAY: + case TYPE_SUBARRAY: + case TYPE_STRING: + // If not from end, just check the negative values. + if (!from_end) break; + // From end we can only do sanity checks ^0 is invalid for non-end index. ^-1 and less is invalid for all. + if (index == 0 && !end_index) + { + SEMA_ERROR(index_expr, + "Array index out of bounds, index from end (%lld) must be greater than zero or it will exceed the max array index.", + (long long) index); + return false; + } + return true; + default: + UNREACHABLE + } + if (index < 0) + { + SEMA_ERROR(index_expr, "Array index out of bounds, using a negative array index is only allowed with pointers."); + return false; } return true; } @@ -781,27 +803,15 @@ static inline bool sema_expr_analyse_subscript_after_parent_resolution(Context * return false; } - if (index->expr_kind == EXPR_RANGE) - { - if (!sema_expr_analyse_range(context, type_isize, index)) return false; - } - else - { - if (!sema_analyse_expr(context, type_isize, index)) return false; - } + if (!sema_analyse_expr(context, type_isize, index)) return false; // Unless we already have type_usize, cast to type_isize; if (!expr_cast_to_index(context, index)) return false; // Check range - if (!expr_check_index_in_range(context, type, index)) return false; + if (!expr_check_index_in_range(context, type, index, false, expr->subscript_expr.from_back)) return false; expr->failable |= index->failable; - if (index->expr_kind == EXPR_RANGE) - { - expr->type = type_get_subarray(inner_type); - return true; - } expr->type = inner_type; return true; } @@ -813,6 +823,76 @@ static inline bool sema_expr_analyse_subscript(Context *context, Expr *expr) return sema_expr_analyse_subscript_after_parent_resolution(context, NULL, expr); } +static inline bool sema_expr_analyse_slice_after_parent_resolution(Context *context, Type *parent, Expr *expr) +{ + assert(expr->expr_kind == EXPR_SLICE); + Expr *subscripted = expr->slice_expr.expr; + Type *type = parent ? parent->canonical : subscripted->type->canonical; + Expr *start = expr->slice_expr.start; + Expr *end = expr->slice_expr.end; + Type *inner_type = type_get_indexed_type(type); + if (!inner_type) + { + SEMA_ERROR((parent ? expr : subscripted), "Cannot slice '%s'.", type_to_error_string(type)); + return false; + } + + if (!sema_analyse_expr(context, type_isize, start)) return false; + if (end && !sema_analyse_expr(context, type_isize, end)) return false; + + // Unless we already have type_usize, cast to type_isize; + if (!expr_cast_to_index(context, start)) return false; + if (end && !expr_cast_to_index(context, end)) return false; + + // Check range + if (type->type_kind == TYPE_POINTER) + { + if (expr->slice_expr.start_from_back) + { + SEMA_ERROR(expr->slice_expr.start, "Indexing from the end is not allowed for pointers."); + return false; + } + if (end && expr->slice_expr.end_from_back) + { + SEMA_ERROR(expr->slice_expr.end, "Indexing from the end is not allowed for pointers."); + return false; + } + } + if (!expr_check_index_in_range(context, type, start, false, expr->slice_expr.start_from_back)) return false; + if (end && !expr_check_index_in_range(context, type, end, true, expr->slice_expr.end_from_back)) return false; + + if (start && end && start->expr_kind == EXPR_CONST && end->expr_kind == EXPR_CONST) + { + if (expr->slice_expr.start_from_back && expr->slice_expr.end_from_back) + { + if (expr_const_compare(&start->const_expr, &end->const_expr, BINARYOP_LT)) + { + SEMA_ERROR(start, "Start index greater than end index."); + return false; + } + } + else + { + if (expr_const_compare(&start->const_expr, &end->const_expr, BINARYOP_GT)) + { + SEMA_ERROR(start, "Start index greater than end index."); + return false; + } + } + } + + expr->failable |= start->failable; + expr->type = type_get_subarray(inner_type); + return true; +} + +static inline bool sema_expr_analyse_slice(Context *context, Expr *expr) +{ + if (!sema_analyse_expr(context, NULL, expr->slice_expr.expr)) return false; + expr->failable = expr->slice_expr.expr->failable; + return sema_expr_analyse_slice_after_parent_resolution(context, NULL, expr); +} + static inline void insert_access_deref(Expr *expr) { Expr *deref = expr_new(EXPR_UNARY, expr->span); @@ -886,7 +966,7 @@ static inline void expr_rewrite_to_int_const(Expr *expr_to_rewrite, Type *type, expr_to_rewrite->expr_kind = EXPR_CONST; expr_const_set_int(&expr_to_rewrite->const_expr, value, type->canonical->type_kind); expr_to_rewrite->type = type; - expr_to_rewrite->resolve_status = true; + expr_to_rewrite->resolve_status = RESOLVE_DONE; } static bool sema_expr_analyse_type_access(Context *context, Type *to, Expr *expr) @@ -1049,14 +1129,15 @@ static inline bool sema_expr_analyse_member_access(Context *context, Expr *expr) static inline bool sema_expr_analyse_access(Context *context, Expr *expr) { - if (!sema_analyse_expr(context, NULL, expr->access_expr.parent)) return false; + Expr *parent = expr->access_expr.parent; + if (!sema_analyse_expr(context, NULL, parent)) return false; - expr->failable = expr->access_expr.parent->failable; + expr->failable = parent->failable; assert(expr->expr_kind == EXPR_ACCESS); - assert(expr->access_expr.parent->resolve_status == RESOLVE_DONE); + assert(parent->resolve_status == RESOLVE_DONE); - Type *parent_type = expr->access_expr.parent->type; + Type *parent_type = parent->type; Type *type = parent_type->canonical; bool is_pointer = type->type_kind == TYPE_POINTER; @@ -1064,24 +1145,48 @@ static inline bool sema_expr_analyse_access(Context *context, Expr *expr) { type = type->pointer; } - - if (!type_may_have_sub_elements(type)) + const char *kw = TOKSTR(expr->access_expr.sub_element); + switch (type->type_kind) { - SEMA_ERROR(expr, "Cannot access '%s' on '%s'", TOKSTR(expr->access_expr.sub_element), type_to_error_string(parent_type)); - return false; - } - Decl *decl = type->decl; - switch (decl->decl_kind) - { - case DECL_ENUM: - return sema_expr_analyse_method(context, expr, decl, is_pointer); - case DECL_ERR: - case DECL_STRUCT: - case DECL_UNION: + case TYPE_SUBARRAY: + if (kw == kw_sizeof) + { + expr_rewrite_to_int_const(expr, type_usize, type_size(type)); + return true; + } + if (kw == kw_len) + { + expr->expr_kind = EXPR_LEN; + expr->len_expr.inner = parent; + expr->type = type_usize; + expr->resolve_status = RESOLVE_DONE; + return true; + } + goto NO_MATCH; + case TYPE_ARRAY: + if (kw == kw_sizeof) + { + expr_rewrite_to_int_const(expr, type_usize, type_size(type)); + return true; + } + if (kw == kw_len) + { + expr_rewrite_to_int_const(expr, type_usize, type->array.len); + return true; + } + goto NO_MATCH; + case TYPE_ENUM: + return sema_expr_analyse_method(context, expr, type->decl, is_pointer); + case TYPE_ERRTYPE: + case TYPE_STRUCT: + case TYPE_UNION: break; default: - UNREACHABLE + NO_MATCH: + SEMA_ERROR(expr, "Cannot access '%s' on '%s'", TOKSTR(expr->access_expr.sub_element), type_to_error_string(parent_type)); + return false; } + Decl *decl = type->decl; Decl *member = strukt_recursive_search_member(decl, TOKSTR(expr->access_expr.sub_element)); if (!member) { @@ -1190,7 +1295,7 @@ static DesignatedPath *sema_analyse_init_subscript(Context *context, DesignatedP } // Check range - if (!expr_check_index_in_range(context, type->canonical, index)) + if (!expr_check_index_in_range(context, type->canonical, index, false, false)) { *has_reported_error = true; return NULL; @@ -1452,8 +1557,26 @@ static inline bool sema_expr_analyse_cast(Context *context, Type *to, Expr *expr return true; } +static inline bool sema_expr_analyse_slice_assign(Context *context, Expr *expr, Type *left_type, Expr *right, ExprFailableStatus lhs_is_failable) +{ + // 1. Evaluate right side to required type. + if (!sema_analyse_expr_of_required_type(context, left_type->array.base, right, lhs_is_failable != FAILABLE_NO)) return false; + + Expr *left = expr->binary_expr.left; + expr->type = right->type; + expr->expr_kind = EXPR_SLICE_ASSIGN; + expr->slice_assign_expr.left = left; + expr->slice_assign_expr.right = right; + + return true; +} + bool sema_expr_analyse_assign_right_side(Context *context, Expr *expr, Type *left_type, Expr *right, ExprFailableStatus lhs_is_failable) { + if (expr && expr->binary_expr.left->expr_kind == EXPR_SLICE) + { + return sema_expr_analyse_slice_assign(context, expr, left_type, right, lhs_is_failable); + } // 1. Evaluate right side to required type. if (!sema_analyse_expr_of_required_type(context, left_type, right, lhs_is_failable != FAILABLE_NO)) return false; @@ -2884,6 +3007,18 @@ static Expr *expr_copy_from_macro(Context *context, Expr *source_expr) Expr *expr = expr_shallow_copy(source_expr); switch (source_expr->expr_kind) { + case EXPR_SLICE_ASSIGN: + MACRO_COPY_EXPR(expr->slice_assign_expr.left); + MACRO_COPY_EXPR(expr->slice_assign_expr.right); + return expr; + case EXPR_SLICE: + MACRO_COPY_EXPR(expr->slice_expr.expr); + MACRO_COPY_EXPR(expr->slice_expr.start); + MACRO_COPY_EXPR(expr->slice_expr.end); + return expr; + case EXPR_LEN: + MACRO_COPY_EXPR(expr->len_expr.inner); + return expr; case EXPR_CATCH: case EXPR_TRY: MACRO_COPY_EXPR(expr->trycatch_expr); @@ -2917,10 +3052,6 @@ static Expr *expr_copy_from_macro(Context *context, Expr *source_expr) case EXPR_DESIGNATED_INITIALIZER: // Created during semantic analysis UNREACHABLE - case EXPR_RANGE: - MACRO_COPY_EXPR(expr->range_expr.left); - MACRO_COPY_EXPR(expr->range_expr.right); - return expr; case EXPR_EXPR_BLOCK: MACRO_COPY_AST_LIST(expr->expr_block.stmts); return expr; @@ -3249,6 +3380,7 @@ static inline bool sema_expr_analyse_typeof(Context *context, Expr *expr) return true; } + static inline bool sema_expr_analyse_failable(Context *context, Type *to, Expr *expr) { Expr *inner = expr->failable_expr; @@ -3289,12 +3421,16 @@ static inline bool sema_analyse_expr_dispatch(Context *context, Type *to, Expr * return sema_expr_analyse_failable(context, to, expr); case EXPR_POISONED: return false; + case EXPR_LEN: case EXPR_DESIGNATED_INITIALIZER: + case EXPR_SLICE_ASSIGN: // Created during semantic analysis UNREACHABLE case EXPR_MACRO_BLOCK: case EXPR_SCOPED_EXPR: UNREACHABLE + case EXPR_SLICE: + return sema_expr_analyse_slice(context, expr); case EXPR_CATCH: return sema_expr_analyse_catch(context, expr); case EXPR_TRY: @@ -3309,9 +3445,6 @@ static inline bool sema_analyse_expr_dispatch(Context *context, Type *to, Expr * return sema_expr_analyse_expr_block(context, to, expr); case EXPR_GUARD: return sema_expr_analyse_guard(context, to, expr); - case EXPR_RANGE: - SEMA_ERROR(expr, "Range expression was not expected here."); - return false; case EXPR_CONST: return true; case EXPR_BINARY: diff --git a/src/compiler/sema_types.c b/src/compiler/sema_types.c index f2ff3b34d..1d77de9f7 100644 --- a/src/compiler/sema_types.c +++ b/src/compiler/sema_types.c @@ -51,6 +51,12 @@ static inline bool sema_resolve_array_type(Context *context, TypeInfo *type) SEMA_ERROR(type->array.len, "An array may not have a negative size."); return type_info_poison(type); } + if (!bigint_fits_in_bits(&type->array.len->const_expr.i, 64, true)) + { + SEMA_ERROR(type->array.len, "An array may not exceed the max of an 64 bit signed int."); + return type_info_poison(type); + } + len = bigint_as_unsigned(&type->array.len->const_expr.i); type->type = type_get_array(type->array.base->type, len); break; diff --git a/src/compiler/symtab.c b/src/compiler/symtab.c index 01645cfec..d8124df4f 100644 --- a/src/compiler/symtab.c +++ b/src/compiler/symtab.c @@ -39,7 +39,7 @@ const char *attribute_list[NUMBER_OF_ATTRIBUTES]; const char *kw_main; const char *kw_sizeof; const char *kw_offsetof; - +const char *kw_len; void symtab_init(uint32_t capacity) { @@ -76,6 +76,7 @@ void symtab_init(uint32_t capacity) kw_main = KW_DEF("main"); kw_sizeof = KW_DEF("sizeof"); kw_offsetof = KW_DEF("offsetof"); + kw_len = KW_DEF("len"); attribute_list[ATTRIBUTE_INLINE] = KW_DEF("inline"); attribute_list[ATTRIBUTE_NOINLINE] = KW_DEF("noinline"); attribute_list[ATTRIBUTE_STDCALL] = KW_DEF("stdcall"); diff --git a/test/test_suite/subarrays/slice_negative_len.c3 b/test/test_suite/subarrays/slice_negative_len.c3 new file mode 100644 index 000000000..b1f9bc121 --- /dev/null +++ b/test/test_suite/subarrays/slice_negative_len.c3 @@ -0,0 +1,65 @@ +func void test() +{ + int[3] x = { 1, 2, 3}; + int[] z = x[2..2]; + z = x[2..1]; // #error: Start index greater than end index. +} + +func void test2() +{ + int[3] x = { 1, 2, 3}; + int[] z = x[^2..^2]; + z = x[^3..]; + z = x[^1..^2]; // #error: Start index greater than end index. +} + +func void test3() +{ + int[3] x = { 1, 2, 3 }; + int[] z = x[..4]; // #error: Array end index out of bounds, was 4, exceeding array length 3. +} + +func void test4() +{ + int[3] x = { 1, 2, 3 }; + int[] z = x[..^0]; + z = x[..^-1]; // #error: Negative numbers are not allowed when indexing from the end. +} + +func void test5() +{ + int[3] x = { 1, 2, 3 }; + int[] z = x[..^4]; // #error: Array index out of bounds, using a negative array index is only allowed with pointers. +} + +func void test6() +{ + int[3] x = { 1, 2, 3 }; + int[] z = x[3..]; // #error: Array index out of bounds, was 3, exceeding max array index 2. +} + +func void test7() +{ + int[3] x = { 1, 2, 3 }; + int[] z = x[-1..]; // #error: Array index out of bounds, using a negative array index is only allowed with pointers. +} + +func void test8() +{ + int[3] x = { 1, 2, 3 }; + int[] z = x[^4..]; // #error: Array index out of bounds, using a negative array index is only allowed with pointers. +} + +func void test9() +{ + int[3] x = { 1, 2, 3 }; + int[] z = x[^0..]; // #error: Array index out of bounds, was 3, exceeding max array index 2. +} + +func void test10() +{ + int* x = nil; + x[-10..-3]; + int[] z = x[^2..]; // #error: Indexing from the end is not allowed for pointers. + int[] y = x[..^2]; // #error: Indexing from the end is not allowed for pointers. +} \ No newline at end of file diff --git a/test/test_suite/subarrays/slice_offset.c3t b/test/test_suite/subarrays/slice_offset.c3t new file mode 100644 index 000000000..8f2bb14f6 --- /dev/null +++ b/test/test_suite/subarrays/slice_offset.c3t @@ -0,0 +1,22 @@ +func void test() +{ + int[3] x = { 1, 2, 3 }; + int[] y = x[1..2]; +} + +// #expect: slice_offset.ll + +%x = alloca [3 x i32] +%y = alloca %"int[]" +%0 = getelementptr inbounds [3 x i32], [3 x i32]* %x, i32 0, i32 0 +store i32 1, i32* %0 +%1 = getelementptr inbounds [3 x i32], [3 x i32]* %x, i32 0, i32 1 +store i32 2, i32* %1 +%2 = getelementptr inbounds [3 x i32], [3 x i32]* %x, i32 0, i32 2 +store i32 3, i32* %2 +%3 = load [3 x i32], [3 x i32]* %x +%4 = bitcast [3 x i32]* %x to i32* +%offset = getelementptr inbounds i32, i32* %4, i64 1 +%5 = insertvalue %"int[]" undef, i32* %offset, 0 +%6 = insertvalue %"int[]" %5, i64 1, 1 +store %"int[]" %6, %"int[]"* %y diff --git a/test/test_suite/subarrays/slice_offset_neg_end.c3t b/test/test_suite/subarrays/slice_offset_neg_end.c3t new file mode 100644 index 000000000..0346ca685 --- /dev/null +++ b/test/test_suite/subarrays/slice_offset_neg_end.c3t @@ -0,0 +1,22 @@ +func void test() +{ + int[3] x = { 1, 2, 3 }; + int[] y = x[1..^1]; +} + +// #expect: slice_offset_neg_end.ll + +%x = alloca [3 x i32] +%y = alloca %"int[]" +%0 = getelementptr inbounds [3 x i32], [3 x i32]* %x, i32 0, i32 0 +store i32 1, i32* %0 +%1 = getelementptr inbounds [3 x i32], [3 x i32]* %x, i32 0, i32 1 +store i32 2, i32* %1 +%2 = getelementptr inbounds [3 x i32], [3 x i32]* %x, i32 0, i32 2 +store i32 3, i32* %2 +%3 = load [3 x i32], [3 x i32]* %x +%4 = bitcast [3 x i32]* %x to i32* +%offset = getelementptr inbounds i32, i32* %4, i64 1 +%5 = insertvalue %"int[]" undef, i32* %offset, 0 +%6 = insertvalue %"int[]" %5, i64 1, 1 +store %"int[]" %6, %"int[]"* %y diff --git a/test/test_suite/subarrays/slice_offset_neg_start.c3t b/test/test_suite/subarrays/slice_offset_neg_start.c3t new file mode 100644 index 000000000..36fef5b24 --- /dev/null +++ b/test/test_suite/subarrays/slice_offset_neg_start.c3t @@ -0,0 +1,22 @@ +func void test() +{ + int[3] x = { 1, 2, 3 }; + int[] y = x[^2..2]; +} + +// #expect: slice_offset_neg_start.ll + +%x = alloca [3 x i32] +%y = alloca %"int[]" +%0 = getelementptr inbounds [3 x i32], [3 x i32]* %x, i32 0, i32 0 +store i32 1, i32* %0 +%1 = getelementptr inbounds [3 x i32], [3 x i32]* %x, i32 0, i32 1 +store i32 2, i32* %1 +%2 = getelementptr inbounds [3 x i32], [3 x i32]* %x, i32 0, i32 2 +store i32 3, i32* %2 +%3 = load [3 x i32], [3 x i32]* %x +%4 = bitcast [3 x i32]* %x to i32* +%offset = getelementptr inbounds i32, i32* %4, i64 1 +%5 = insertvalue %"int[]" undef, i32* %offset, 0 +%6 = insertvalue %"int[]" %5, i64 1, 1 +store %"int[]" %6, %"int[]"* %y diff --git a/test/test_suite/subarrays/slice_start.c3t b/test/test_suite/subarrays/slice_start.c3t new file mode 100644 index 000000000..bdf3bb25a --- /dev/null +++ b/test/test_suite/subarrays/slice_start.c3t @@ -0,0 +1,22 @@ +func void test() +{ + int[3] x = { 1, 2, 3 }; + int[] y = x[..]; +} + +// #expect: slice_start.ll + +%x = alloca [3 x i32] +%y = alloca %"int[]" +%0 = getelementptr inbounds [3 x i32], [3 x i32]* %x, i32 0, i32 0 +store i32 1, i32* %0 +%1 = getelementptr inbounds [3 x i32], [3 x i32]* %x, i32 0, i32 1 +store i32 2, i32* %1 +%2 = getelementptr inbounds [3 x i32], [3 x i32]* %x, i32 0, i32 2 +store i32 3, i32* %2 +%3 = load [3 x i32], [3 x i32]* %x +%4 = bitcast [3 x i32]* %x to i32* +%offset = getelementptr inbounds i32, i32* %4, i64 0 +%5 = insertvalue %"int[]" undef, i32* %offset, 0 +%6 = insertvalue %"int[]" %5, i64 3, 1 +store %"int[]" %6, %"int[]"* %y diff --git a/test/test_suite/subarrays/slice_syntax.c3 b/test/test_suite/subarrays/slice_syntax.c3 new file mode 100644 index 000000000..7b542a6ef --- /dev/null +++ b/test/test_suite/subarrays/slice_syntax.c3 @@ -0,0 +1,24 @@ +func void test() +{ + int[6] feok2 = { 1, 8, 100, 293, 23982, 34}; + int[] feok = &feok2; + int[] flok = feok2[3..6]; + int[] flak = flok[1..2]; + flok = feok2[..6]; + flok = feok2[..^2]; + flok = feok2[..]; + flok = feok2[^3..]; + flok = feok2[^4..5]; + flok = feok2[2..^2]; + flok = feok2[^3..^1]; + flok = feok2[..]; + flak = flok[..6]; + flak = flok[..^2]; + flak = flok[..]; + flak = flok[^3..]; + flak = flok[^4..5]; + flak = flok[2..^2]; + flak = flok[^3..^1]; + int* p = nil; + // TODO p[-1..20]; +} \ No newline at end of file