From 407ed5a63de8088afbee6c733f48bf817767fb44 Mon Sep 17 00:00:00 2001 From: Christoffer Lerno Date: Sat, 8 Oct 2022 14:30:17 +0200 Subject: [PATCH] Fix vector init and float promotion. Fix of 1.0f. --- src/compiler/float.c | 2 +- src/compiler/llvm_codegen_expr.c | 29 +- src/version.h | 2 +- test/test_suite/constants/float_type.c3t | 11 + .../vector/vector_init_regression.c3t | 428 ++++++++++++++++++ test/test_suite2/constants/float_type.c3t | 11 + .../vector/vector_init_regression.c3t | 428 ++++++++++++++++++ 7 files changed, 908 insertions(+), 3 deletions(-) create mode 100644 test/test_suite/constants/float_type.c3t create mode 100644 test/test_suite/vector/vector_init_regression.c3t create mode 100644 test/test_suite2/constants/float_type.c3t create mode 100644 test/test_suite2/vector/vector_init_regression.c3t diff --git a/src/compiler/float.c b/src/compiler/float.c index 059d46f43..e6fbbeefc 100644 --- a/src/compiler/float.c +++ b/src/compiler/float.c @@ -150,6 +150,7 @@ Float float_from_string(const char *string, char **error) } switch (i) { + case 0: case 32: kind = TYPE_F32; break; @@ -157,7 +158,6 @@ Float float_from_string(const char *string, char **error) kind = TYPE_F16; break; case 64: - case 0: kind = TYPE_F64; break; case 128: diff --git a/src/compiler/llvm_codegen_expr.c b/src/compiler/llvm_codegen_expr.c index 459e5f99d..72e431a73 100644 --- a/src/compiler/llvm_codegen_expr.c +++ b/src/compiler/llvm_codegen_expr.c @@ -1533,6 +1533,19 @@ static inline void llvm_emit_initialize_reference_const(GenContext *c, BEValue * llvm_emit_inititialize_reference_const(c, ref, initializer); } +static inline void llvm_emit_initialize_reference_vector(GenContext *c, BEValue *ref, Type *real_type, Expr **elements) +{ + llvm_value_addr(c, ref); + LLVMTypeRef llvm_type = llvm_get_type(c, real_type); + LLVMValueRef vector_val = LLVMGetUndef(llvm_type); + BEValue element_val; + FOREACH_BEGIN_IDX(i, Expr *element, elements) + llvm_emit_expr(c, &element_val, element); + llvm_value_rvalue(c, &element_val); + vector_val = LLVMBuildInsertElement(c->builder, vector_val, element_val.value, llvm_const_int(c, type_usize, i), ""); + FOREACH_END(); + llvm_store_raw(c, ref, vector_val); +} static inline void llvm_emit_initialize_reference_list(GenContext *c, BEValue *ref, Expr *expr) { @@ -1540,10 +1553,17 @@ static inline void llvm_emit_initialize_reference_list(GenContext *c, BEValue *r Type *real_type = type_lowering(ref->type); Expr **elements = expr->initializer_list; + if (real_type->type_kind == TYPE_VECTOR) + { + llvm_emit_initialize_reference_vector(c, ref, real_type, elements); + return; + } + // Make sure we have an address. llvm_value_addr(c, ref); LLVMValueRef value = ref->value; + // If this is a union, we assume it's initializing the first element. if (real_type->type_kind == TYPE_UNION) { @@ -1557,6 +1577,8 @@ static inline void llvm_emit_initialize_reference_list(GenContext *c, BEValue *r LLVMTypeRef llvm_type = llvm_get_type(c, real_type); bool is_struct = type_is_union_or_strukt(real_type); bool is_array = real_type->type_kind == TYPE_ARRAY; + + bool is_vector = real_type->type_kind == TYPE_VECTOR; // Now walk through the elements. VECEACH(elements, i) { @@ -1865,7 +1887,12 @@ static inline void llvm_emit_const_initialize_reference(GenContext *c, BEValue * { assert(expr->expr_kind == EXPR_CONST && expr->const_expr.const_kind == CONST_INITIALIZER); ConstInitializer *initializer = expr->const_expr.initializer; - assert(!type_flat_is_vector(initializer->type) && "Vectors should be handled elsewhere."); + if (initializer->type->type_kind == TYPE_VECTOR) + { + LLVMValueRef val = llvm_emit_const_initializer(c, initializer); + llvm_store_raw(c, ref, val); + return; + } if (initializer->type->type_kind == TYPE_BITSTRUCT) { llvm_emit_const_initialize_bitstruct_ref(c, ref, initializer); diff --git a/src/version.h b/src/version.h index 8fef5002d..212a4e1e7 100644 --- a/src/version.h +++ b/src/version.h @@ -1 +1 @@ -#define COMPILER_VERSION "0.3.75" \ No newline at end of file +#define COMPILER_VERSION "0.3.76" \ No newline at end of file diff --git a/test/test_suite/constants/float_type.c3t b/test/test_suite/constants/float_type.c3t new file mode 100644 index 000000000..5ee96b84e --- /dev/null +++ b/test/test_suite/constants/float_type.c3t @@ -0,0 +1,11 @@ +// #target: macos-x64 +module test; +uint f0 = $typeof(1.0f).sizeof; +uint f32 = $typeof(1.0f32).sizeof; +uint f64 = $typeof(1.0f64).sizeof; + +/* #expect: test.ll + +@test_f0 = local_unnamed_addr global i32 4, align 4 +@test_f32 = local_unnamed_addr global i32 4, align 4 +@test_f64 = local_unnamed_addr global i32 8, align 4 \ No newline at end of file diff --git a/test/test_suite/vector/vector_init_regression.c3t b/test/test_suite/vector/vector_init_regression.c3t new file mode 100644 index 000000000..2ce7a8a5a --- /dev/null +++ b/test/test_suite/vector/vector_init_regression.c3t @@ -0,0 +1,428 @@ +// #target: macos-x64 +module test; +extern fn int printf(char* format, ...); + +fn void main() { + float radians = 3.1415 / 4; + float[<3>] axis = {0.0, 0.0, 1.0}; + + float cosr = (float) $$cos(radians); + float sinr = (float) $$sin(radians); + float x = axis[0]; + float y = axis[1]; + float z = axis[2]; + + float[<4>][4] a = {}; + a[0] = { + cosr + (x * x) * (float) (1.0 - cosr), + (x * y) * (float) (1.0 - cosr) - (z * sinr), + (x * z) * (float) (1.0 - cosr) + (y * sinr), + 0.0 + }; + + a[1] = { + (y * x) * (float) (1.0 - cosr) + (z * sinr), + cosr + (y * y) * (float) (1.0 - cosr), + (y * z) * (float) (1.0 - cosr) - (x * sinr), + 0.0 + }; + + a[2] = { + (z * x) * (float) (1.0 - cosr) - (y * sinr), + (z * y) * (float) (1.0 - cosr) + (x * sinr), + cosr + (z * z) * (float) (1.0 - cosr), + 0.0 + }; + + a[3] = { + 0.0, + 0.0, + 0.0, + 1.0 + }; + + float[<4>][4] b = { + { + cosr + (x * x) * (float) (1.0 - cosr), + (x * y) * (float) (1.0 - cosr) - (z * sinr), + (x * z) * (float) (1.0 - cosr) + (y * sinr), + 0.0 + }, + { + (y * x) * (float) (1.0 - cosr) + (z * sinr), + cosr + (y * y) * (float) (1.0 - cosr), + (y * z) * (float) (1.0 - cosr) - (x * sinr), + 0.0 + }, + { + (z * x) * (float) (1.0 - cosr) - (y * sinr), + (z * y) * (float) (1.0 - cosr) + (x * sinr), + cosr + (z * z) * (float) (1.0 - cosr), + 0.0 + }, + { + 0.0, + 0.0, + 0.0, + 1.0 + } + }; + + foreach(v : a) { + printf("A: %f %f %f %f\n", v[0], v[1], v[2], v[3]); + } + + printf("\n"); + + foreach(v : b) { + printf("B: %f %f %f %f\n", v[0], v[1], v[2], v[3]); + } +} + +/* #expect: test.ll + +define void @test_main() #0 { +entry: + %radians = alloca float, align 4 + %axis = alloca <3 x float>, align 16 + %cosr = alloca float, align 4 + %sinr = alloca float, align 4 + %x = alloca float, align 4 + %y = alloca float, align 4 + %z = alloca float, align 4 + %a = alloca [4 x <4 x float>], align 16 + %b = alloca [4 x <4 x float>], align 16 + %.anon = alloca i64, align 8 + %v = alloca <4 x float>, align 16 + %.anon120 = alloca i64, align 8 + %v124 = alloca <4 x float>, align 16 + store float 0x3FE921CAC0000000, float* %radians, align 4 + store <3 x float> , <3 x float>* %axis, align 16 + %0 = load float, float* %radians, align 4 + %1 = call float @llvm.cos.f32(float %0) + store float %1, float* %cosr, align 4 + %2 = load float, float* %radians, align 4 + %3 = call float @llvm.sin.f32(float %2) + store float %3, float* %sinr, align 4 + %4 = load <3 x float>, <3 x float>* %axis, align 16 + %5 = extractelement <3 x float> %4, i64 0 + store float %5, float* %x, align 4 + %6 = load <3 x float>, <3 x float>* %axis, align 16 + %7 = extractelement <3 x float> %6, i64 1 + store float %7, float* %y, align 4 + %8 = load <3 x float>, <3 x float>* %axis, align 16 + %9 = extractelement <3 x float> %8, i64 2 + store float %9, float* %z, align 4 + %10 = bitcast [4 x <4 x float>]* %a to i8* + call void @llvm.memset.p0i8.i64(i8* align 16 %10, i8 0, i64 64, i1 false) + %11 = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* %a, i64 0, i64 0 + %12 = load float, float* %cosr, align 4 + %13 = load float, float* %x, align 4 + %14 = load float, float* %x, align 4 + %fmul = fmul float %13, %14 + %15 = load float, float* %cosr, align 4 + %fpfpext = fpext float %15 to double + %fsub = fsub double 1.000000e+00, %fpfpext + %fpfptrunc = fptrunc double %fsub to float + %fmul1 = fmul float %fmul, %fpfptrunc + %fadd = fadd float %12, %fmul1 + %16 = insertelement <4 x float> undef, float %fadd, i64 0 + %17 = load float, float* %x, align 4 + %18 = load float, float* %y, align 4 + %fmul2 = fmul float %17, %18 + %19 = load float, float* %cosr, align 4 + %fpfpext3 = fpext float %19 to double + %fsub4 = fsub double 1.000000e+00, %fpfpext3 + %fpfptrunc5 = fptrunc double %fsub4 to float + %fmul6 = fmul float %fmul2, %fpfptrunc5 + %20 = load float, float* %z, align 4 + %21 = load float, float* %sinr, align 4 + %fmul7 = fmul float %20, %21 + %fsub8 = fsub float %fmul6, %fmul7 + %22 = insertelement <4 x float> %16, float %fsub8, i64 1 + %23 = load float, float* %x, align 4 + %24 = load float, float* %z, align 4 + %fmul9 = fmul float %23, %24 + %25 = load float, float* %cosr, align 4 + %fpfpext10 = fpext float %25 to double + %fsub11 = fsub double 1.000000e+00, %fpfpext10 + %fpfptrunc12 = fptrunc double %fsub11 to float + %fmul13 = fmul float %fmul9, %fpfptrunc12 + %26 = load float, float* %y, align 4 + %27 = load float, float* %sinr, align 4 + %fmul14 = fmul float %26, %27 + %fadd15 = fadd float %fmul13, %fmul14 + %28 = insertelement <4 x float> %22, float %fadd15, i64 2 + %29 = insertelement <4 x float> %28, float 0.000000e+00, i64 3 + store <4 x float> %29, <4 x float>* %11, align 16 + %30 = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* %a, i64 0, i64 1 + %31 = load float, float* %y, align 4 + %32 = load float, float* %x, align 4 + %fmul16 = fmul float %31, %32 + %33 = load float, float* %cosr, align 4 + %fpfpext17 = fpext float %33 to double + %fsub18 = fsub double 1.000000e+00, %fpfpext17 + %fpfptrunc19 = fptrunc double %fsub18 to float + %fmul20 = fmul float %fmul16, %fpfptrunc19 + %34 = load float, float* %z, align 4 + %35 = load float, float* %sinr, align 4 + %fmul21 = fmul float %34, %35 + %fadd22 = fadd float %fmul20, %fmul21 + %36 = insertelement <4 x float> undef, float %fadd22, i64 0 + %37 = load float, float* %cosr, align 4 + %38 = load float, float* %y, align 4 + %39 = load float, float* %y, align 4 + %fmul23 = fmul float %38, %39 + %40 = load float, float* %cosr, align 4 + %fpfpext24 = fpext float %40 to double + %fsub25 = fsub double 1.000000e+00, %fpfpext24 + %fpfptrunc26 = fptrunc double %fsub25 to float + %fmul27 = fmul float %fmul23, %fpfptrunc26 + %fadd28 = fadd float %37, %fmul27 + %41 = insertelement <4 x float> %36, float %fadd28, i64 1 + %42 = load float, float* %y, align 4 + %43 = load float, float* %z, align 4 + %fmul29 = fmul float %42, %43 + %44 = load float, float* %cosr, align 4 + %fpfpext30 = fpext float %44 to double + %fsub31 = fsub double 1.000000e+00, %fpfpext30 + %fpfptrunc32 = fptrunc double %fsub31 to float + %fmul33 = fmul float %fmul29, %fpfptrunc32 + %45 = load float, float* %x, align 4 + %46 = load float, float* %sinr, align 4 + %fmul34 = fmul float %45, %46 + %fsub35 = fsub float %fmul33, %fmul34 + %47 = insertelement <4 x float> %41, float %fsub35, i64 2 + %48 = insertelement <4 x float> %47, float 0.000000e+00, i64 3 + store <4 x float> %48, <4 x float>* %30, align 16 + %49 = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* %a, i64 0, i64 2 + %50 = load float, float* %z, align 4 + %51 = load float, float* %x, align 4 + %fmul36 = fmul float %50, %51 + %52 = load float, float* %cosr, align 4 + %fpfpext37 = fpext float %52 to double + %fsub38 = fsub double 1.000000e+00, %fpfpext37 + %fpfptrunc39 = fptrunc double %fsub38 to float + %fmul40 = fmul float %fmul36, %fpfptrunc39 + %53 = load float, float* %y, align 4 + %54 = load float, float* %sinr, align 4 + %fmul41 = fmul float %53, %54 + %fsub42 = fsub float %fmul40, %fmul41 + %55 = insertelement <4 x float> undef, float %fsub42, i64 0 + %56 = load float, float* %z, align 4 + %57 = load float, float* %y, align 4 + %fmul43 = fmul float %56, %57 + %58 = load float, float* %cosr, align 4 + %fpfpext44 = fpext float %58 to double + %fsub45 = fsub double 1.000000e+00, %fpfpext44 + %fpfptrunc46 = fptrunc double %fsub45 to float + %fmul47 = fmul float %fmul43, %fpfptrunc46 + %59 = load float, float* %x, align 4 + %60 = load float, float* %sinr, align 4 + %fmul48 = fmul float %59, %60 + %fadd49 = fadd float %fmul47, %fmul48 + %61 = insertelement <4 x float> %55, float %fadd49, i64 1 + %62 = load float, float* %cosr, align 4 + %63 = load float, float* %z, align 4 + %64 = load float, float* %z, align 4 + %fmul50 = fmul float %63, %64 + %65 = load float, float* %cosr, align 4 + %fpfpext51 = fpext float %65 to double + %fsub52 = fsub double 1.000000e+00, %fpfpext51 + %fpfptrunc53 = fptrunc double %fsub52 to float + %fmul54 = fmul float %fmul50, %fpfptrunc53 + %fadd55 = fadd float %62, %fmul54 + %66 = insertelement <4 x float> %61, float %fadd55, i64 2 + %67 = insertelement <4 x float> %66, float 0.000000e+00, i64 3 + store <4 x float> %67, <4 x float>* %49, align 16 + %68 = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* %a, i64 0, i64 3 + store <4 x float> , <4 x float>* %68, align 16 + %69 = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* %b, i64 0, i64 0 + %70 = load float, float* %cosr, align 4 + %71 = load float, float* %x, align 4 + %72 = load float, float* %x, align 4 + %fmul56 = fmul float %71, %72 + %73 = load float, float* %cosr, align 4 + %fpfpext57 = fpext float %73 to double + %fsub58 = fsub double 1.000000e+00, %fpfpext57 + %fpfptrunc59 = fptrunc double %fsub58 to float + %fmul60 = fmul float %fmul56, %fpfptrunc59 + %fadd61 = fadd float %70, %fmul60 + %74 = insertelement <4 x float> undef, float %fadd61, i64 0 + %75 = load float, float* %x, align 4 + %76 = load float, float* %y, align 4 + %fmul62 = fmul float %75, %76 + %77 = load float, float* %cosr, align 4 + %fpfpext63 = fpext float %77 to double + %fsub64 = fsub double 1.000000e+00, %fpfpext63 + %fpfptrunc65 = fptrunc double %fsub64 to float + %fmul66 = fmul float %fmul62, %fpfptrunc65 + %78 = load float, float* %z, align 4 + %79 = load float, float* %sinr, align 4 + %fmul67 = fmul float %78, %79 + %fsub68 = fsub float %fmul66, %fmul67 + %80 = insertelement <4 x float> %74, float %fsub68, i64 1 + %81 = load float, float* %x, align 4 + %82 = load float, float* %z, align 4 + %fmul69 = fmul float %81, %82 + %83 = load float, float* %cosr, align 4 + %fpfpext70 = fpext float %83 to double + %fsub71 = fsub double 1.000000e+00, %fpfpext70 + %fpfptrunc72 = fptrunc double %fsub71 to float + %fmul73 = fmul float %fmul69, %fpfptrunc72 + %84 = load float, float* %y, align 4 + %85 = load float, float* %sinr, align 4 + %fmul74 = fmul float %84, %85 + %fadd75 = fadd float %fmul73, %fmul74 + %86 = insertelement <4 x float> %80, float %fadd75, i64 2 + %87 = insertelement <4 x float> %86, float 0.000000e+00, i64 3 + store <4 x float> %87, <4 x float>* %69, align 16 + %88 = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* %b, i64 0, i64 1 + %89 = load float, float* %y, align 4 + %90 = load float, float* %x, align 4 + %fmul76 = fmul float %89, %90 + %91 = load float, float* %cosr, align 4 + %fpfpext77 = fpext float %91 to double + %fsub78 = fsub double 1.000000e+00, %fpfpext77 + %fpfptrunc79 = fptrunc double %fsub78 to float + %fmul80 = fmul float %fmul76, %fpfptrunc79 + %92 = load float, float* %z, align 4 + %93 = load float, float* %sinr, align 4 + %fmul81 = fmul float %92, %93 + %fadd82 = fadd float %fmul80, %fmul81 + %94 = insertelement <4 x float> undef, float %fadd82, i64 0 + %95 = load float, float* %cosr, align 4 + %96 = load float, float* %y, align 4 + %97 = load float, float* %y, align 4 + %fmul83 = fmul float %96, %97 + %98 = load float, float* %cosr, align 4 + %fpfpext84 = fpext float %98 to double + %fsub85 = fsub double 1.000000e+00, %fpfpext84 + %fpfptrunc86 = fptrunc double %fsub85 to float + %fmul87 = fmul float %fmul83, %fpfptrunc86 + %fadd88 = fadd float %95, %fmul87 + %99 = insertelement <4 x float> %94, float %fadd88, i64 1 + %100 = load float, float* %y, align 4 + %101 = load float, float* %z, align 4 + %fmul89 = fmul float %100, %101 + %102 = load float, float* %cosr, align 4 + %fpfpext90 = fpext float %102 to double + %fsub91 = fsub double 1.000000e+00, %fpfpext90 + %fpfptrunc92 = fptrunc double %fsub91 to float + %fmul93 = fmul float %fmul89, %fpfptrunc92 + %103 = load float, float* %x, align 4 + %104 = load float, float* %sinr, align 4 + %fmul94 = fmul float %103, %104 + %fsub95 = fsub float %fmul93, %fmul94 + %105 = insertelement <4 x float> %99, float %fsub95, i64 2 + %106 = insertelement <4 x float> %105, float 0.000000e+00, i64 3 + store <4 x float> %106, <4 x float>* %88, align 16 + %107 = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* %b, i64 0, i64 2 + %108 = load float, float* %z, align 4 + %109 = load float, float* %x, align 4 + %fmul96 = fmul float %108, %109 + %110 = load float, float* %cosr, align 4 + %fpfpext97 = fpext float %110 to double + %fsub98 = fsub double 1.000000e+00, %fpfpext97 + %fpfptrunc99 = fptrunc double %fsub98 to float + %fmul100 = fmul float %fmul96, %fpfptrunc99 + %111 = load float, float* %y, align 4 + %112 = load float, float* %sinr, align 4 + %fmul101 = fmul float %111, %112 + %fsub102 = fsub float %fmul100, %fmul101 + %113 = insertelement <4 x float> undef, float %fsub102, i64 0 + %114 = load float, float* %z, align 4 + %115 = load float, float* %y, align 4 + %fmul103 = fmul float %114, %115 + %116 = load float, float* %cosr, align 4 + %fpfpext104 = fpext float %116 to double + %fsub105 = fsub double 1.000000e+00, %fpfpext104 + %fpfptrunc106 = fptrunc double %fsub105 to float + %fmul107 = fmul float %fmul103, %fpfptrunc106 + %117 = load float, float* %x, align 4 + %118 = load float, float* %sinr, align 4 + %fmul108 = fmul float %117, %118 + %fadd109 = fadd float %fmul107, %fmul108 + %119 = insertelement <4 x float> %113, float %fadd109, i64 1 + %120 = load float, float* %cosr, align 4 + %121 = load float, float* %z, align 4 + %122 = load float, float* %z, align 4 + %fmul110 = fmul float %121, %122 + %123 = load float, float* %cosr, align 4 + %fpfpext111 = fpext float %123 to double + %fsub112 = fsub double 1.000000e+00, %fpfpext111 + %fpfptrunc113 = fptrunc double %fsub112 to float + %fmul114 = fmul float %fmul110, %fpfptrunc113 + %fadd115 = fadd float %120, %fmul114 + %124 = insertelement <4 x float> %119, float %fadd115, i64 2 + %125 = insertelement <4 x float> %124, float 0.000000e+00, i64 3 + store <4 x float> %125, <4 x float>* %107, align 16 + %126 = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* %b, i64 0, i64 3 + store <4 x float> , <4 x float>* %126, align 16 + store i64 0, i64* %.anon, align 8 + br label %loop.cond + +loop.cond: ; preds = %loop.body, %entry + %127 = load i64, i64* %.anon, align 8 + %gt = icmp ugt i64 4, %127 + br i1 %gt, label %loop.body, label %loop.exit + +loop.body: ; preds = %loop.cond + %128 = load i64, i64* %.anon, align 8 + %129 = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* %a, i64 0, i64 %128 + %130 = load <4 x float>, <4 x float>* %129, align 16 + store <4 x float> %130, <4 x float>* %v, align 16 + %131 = load <4 x float>, <4 x float>* %v, align 16 + %132 = extractelement <4 x float> %131, i64 0 + %fpfpext116 = fpext float %132 to double + %133 = load <4 x float>, <4 x float>* %v, align 16 + %134 = extractelement <4 x float> %133, i64 1 + %fpfpext117 = fpext float %134 to double + %135 = load <4 x float>, <4 x float>* %v, align 16 + %136 = extractelement <4 x float> %135, i64 2 + %fpfpext118 = fpext float %136 to double + %137 = load <4 x float>, <4 x float>* %v, align 16 + %138 = extractelement <4 x float> %137, i64 3 + %fpfpext119 = fpext float %138 to double + %139 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str, i32 0, i32 0), double %fpfpext116, double %fpfpext117, double %fpfpext118, double %fpfpext119) + %140 = load i64, i64* %.anon, align 8 + %add = add i64 %140, 1 + store i64 %add, i64* %.anon, align 8 + br label %loop.cond + +loop.exit: ; preds = %loop.cond + %141 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i32 0, i32 0)) + store i64 0, i64* %.anon120, align 8 + br label %loop.cond121 + +loop.cond121: ; preds = %loop.body123, %loop.exit + %142 = load i64, i64* %.anon120, align 8 + %gt122 = icmp ugt i64 4, %142 + br i1 %gt122, label %loop.body123, label %loop.exit130 + +loop.body123: ; preds = %loop.cond121 + %143 = load i64, i64* %.anon120, align 8 + %144 = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* %b, i64 0, i64 %143 + %145 = load <4 x float>, <4 x float>* %144, align 16 + store <4 x float> %145, <4 x float>* %v124, align 16 + %146 = load <4 x float>, <4 x float>* %v124, align 16 + %147 = extractelement <4 x float> %146, i64 0 + %fpfpext125 = fpext float %147 to double + %148 = load <4 x float>, <4 x float>* %v124, align 16 + %149 = extractelement <4 x float> %148, i64 1 + %fpfpext126 = fpext float %149 to double + %150 = load <4 x float>, <4 x float>* %v124, align 16 + %151 = extractelement <4 x float> %150, i64 2 + %fpfpext127 = fpext float %151 to double + %152 = load <4 x float>, <4 x float>* %v124, align 16 + %153 = extractelement <4 x float> %152, i64 3 + %fpfpext128 = fpext float %153 to double + %154 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str.2, i32 0, i32 0), double %fpfpext125, double %fpfpext126, double %fpfpext127, double %fpfpext128) + %155 = load i64, i64* %.anon120, align 8 + %add129 = add i64 %155, 1 + store i64 %add129, i64* %.anon120, align 8 + br label %loop.cond121 + +loop.exit130: ; preds = %loop.cond121 + ret void +} \ No newline at end of file diff --git a/test/test_suite2/constants/float_type.c3t b/test/test_suite2/constants/float_type.c3t new file mode 100644 index 000000000..5ee96b84e --- /dev/null +++ b/test/test_suite2/constants/float_type.c3t @@ -0,0 +1,11 @@ +// #target: macos-x64 +module test; +uint f0 = $typeof(1.0f).sizeof; +uint f32 = $typeof(1.0f32).sizeof; +uint f64 = $typeof(1.0f64).sizeof; + +/* #expect: test.ll + +@test_f0 = local_unnamed_addr global i32 4, align 4 +@test_f32 = local_unnamed_addr global i32 4, align 4 +@test_f64 = local_unnamed_addr global i32 8, align 4 \ No newline at end of file diff --git a/test/test_suite2/vector/vector_init_regression.c3t b/test/test_suite2/vector/vector_init_regression.c3t new file mode 100644 index 000000000..2ce7a8a5a --- /dev/null +++ b/test/test_suite2/vector/vector_init_regression.c3t @@ -0,0 +1,428 @@ +// #target: macos-x64 +module test; +extern fn int printf(char* format, ...); + +fn void main() { + float radians = 3.1415 / 4; + float[<3>] axis = {0.0, 0.0, 1.0}; + + float cosr = (float) $$cos(radians); + float sinr = (float) $$sin(radians); + float x = axis[0]; + float y = axis[1]; + float z = axis[2]; + + float[<4>][4] a = {}; + a[0] = { + cosr + (x * x) * (float) (1.0 - cosr), + (x * y) * (float) (1.0 - cosr) - (z * sinr), + (x * z) * (float) (1.0 - cosr) + (y * sinr), + 0.0 + }; + + a[1] = { + (y * x) * (float) (1.0 - cosr) + (z * sinr), + cosr + (y * y) * (float) (1.0 - cosr), + (y * z) * (float) (1.0 - cosr) - (x * sinr), + 0.0 + }; + + a[2] = { + (z * x) * (float) (1.0 - cosr) - (y * sinr), + (z * y) * (float) (1.0 - cosr) + (x * sinr), + cosr + (z * z) * (float) (1.0 - cosr), + 0.0 + }; + + a[3] = { + 0.0, + 0.0, + 0.0, + 1.0 + }; + + float[<4>][4] b = { + { + cosr + (x * x) * (float) (1.0 - cosr), + (x * y) * (float) (1.0 - cosr) - (z * sinr), + (x * z) * (float) (1.0 - cosr) + (y * sinr), + 0.0 + }, + { + (y * x) * (float) (1.0 - cosr) + (z * sinr), + cosr + (y * y) * (float) (1.0 - cosr), + (y * z) * (float) (1.0 - cosr) - (x * sinr), + 0.0 + }, + { + (z * x) * (float) (1.0 - cosr) - (y * sinr), + (z * y) * (float) (1.0 - cosr) + (x * sinr), + cosr + (z * z) * (float) (1.0 - cosr), + 0.0 + }, + { + 0.0, + 0.0, + 0.0, + 1.0 + } + }; + + foreach(v : a) { + printf("A: %f %f %f %f\n", v[0], v[1], v[2], v[3]); + } + + printf("\n"); + + foreach(v : b) { + printf("B: %f %f %f %f\n", v[0], v[1], v[2], v[3]); + } +} + +/* #expect: test.ll + +define void @test_main() #0 { +entry: + %radians = alloca float, align 4 + %axis = alloca <3 x float>, align 16 + %cosr = alloca float, align 4 + %sinr = alloca float, align 4 + %x = alloca float, align 4 + %y = alloca float, align 4 + %z = alloca float, align 4 + %a = alloca [4 x <4 x float>], align 16 + %b = alloca [4 x <4 x float>], align 16 + %.anon = alloca i64, align 8 + %v = alloca <4 x float>, align 16 + %.anon120 = alloca i64, align 8 + %v124 = alloca <4 x float>, align 16 + store float 0x3FE921CAC0000000, float* %radians, align 4 + store <3 x float> , <3 x float>* %axis, align 16 + %0 = load float, float* %radians, align 4 + %1 = call float @llvm.cos.f32(float %0) + store float %1, float* %cosr, align 4 + %2 = load float, float* %radians, align 4 + %3 = call float @llvm.sin.f32(float %2) + store float %3, float* %sinr, align 4 + %4 = load <3 x float>, <3 x float>* %axis, align 16 + %5 = extractelement <3 x float> %4, i64 0 + store float %5, float* %x, align 4 + %6 = load <3 x float>, <3 x float>* %axis, align 16 + %7 = extractelement <3 x float> %6, i64 1 + store float %7, float* %y, align 4 + %8 = load <3 x float>, <3 x float>* %axis, align 16 + %9 = extractelement <3 x float> %8, i64 2 + store float %9, float* %z, align 4 + %10 = bitcast [4 x <4 x float>]* %a to i8* + call void @llvm.memset.p0i8.i64(i8* align 16 %10, i8 0, i64 64, i1 false) + %11 = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* %a, i64 0, i64 0 + %12 = load float, float* %cosr, align 4 + %13 = load float, float* %x, align 4 + %14 = load float, float* %x, align 4 + %fmul = fmul float %13, %14 + %15 = load float, float* %cosr, align 4 + %fpfpext = fpext float %15 to double + %fsub = fsub double 1.000000e+00, %fpfpext + %fpfptrunc = fptrunc double %fsub to float + %fmul1 = fmul float %fmul, %fpfptrunc + %fadd = fadd float %12, %fmul1 + %16 = insertelement <4 x float> undef, float %fadd, i64 0 + %17 = load float, float* %x, align 4 + %18 = load float, float* %y, align 4 + %fmul2 = fmul float %17, %18 + %19 = load float, float* %cosr, align 4 + %fpfpext3 = fpext float %19 to double + %fsub4 = fsub double 1.000000e+00, %fpfpext3 + %fpfptrunc5 = fptrunc double %fsub4 to float + %fmul6 = fmul float %fmul2, %fpfptrunc5 + %20 = load float, float* %z, align 4 + %21 = load float, float* %sinr, align 4 + %fmul7 = fmul float %20, %21 + %fsub8 = fsub float %fmul6, %fmul7 + %22 = insertelement <4 x float> %16, float %fsub8, i64 1 + %23 = load float, float* %x, align 4 + %24 = load float, float* %z, align 4 + %fmul9 = fmul float %23, %24 + %25 = load float, float* %cosr, align 4 + %fpfpext10 = fpext float %25 to double + %fsub11 = fsub double 1.000000e+00, %fpfpext10 + %fpfptrunc12 = fptrunc double %fsub11 to float + %fmul13 = fmul float %fmul9, %fpfptrunc12 + %26 = load float, float* %y, align 4 + %27 = load float, float* %sinr, align 4 + %fmul14 = fmul float %26, %27 + %fadd15 = fadd float %fmul13, %fmul14 + %28 = insertelement <4 x float> %22, float %fadd15, i64 2 + %29 = insertelement <4 x float> %28, float 0.000000e+00, i64 3 + store <4 x float> %29, <4 x float>* %11, align 16 + %30 = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* %a, i64 0, i64 1 + %31 = load float, float* %y, align 4 + %32 = load float, float* %x, align 4 + %fmul16 = fmul float %31, %32 + %33 = load float, float* %cosr, align 4 + %fpfpext17 = fpext float %33 to double + %fsub18 = fsub double 1.000000e+00, %fpfpext17 + %fpfptrunc19 = fptrunc double %fsub18 to float + %fmul20 = fmul float %fmul16, %fpfptrunc19 + %34 = load float, float* %z, align 4 + %35 = load float, float* %sinr, align 4 + %fmul21 = fmul float %34, %35 + %fadd22 = fadd float %fmul20, %fmul21 + %36 = insertelement <4 x float> undef, float %fadd22, i64 0 + %37 = load float, float* %cosr, align 4 + %38 = load float, float* %y, align 4 + %39 = load float, float* %y, align 4 + %fmul23 = fmul float %38, %39 + %40 = load float, float* %cosr, align 4 + %fpfpext24 = fpext float %40 to double + %fsub25 = fsub double 1.000000e+00, %fpfpext24 + %fpfptrunc26 = fptrunc double %fsub25 to float + %fmul27 = fmul float %fmul23, %fpfptrunc26 + %fadd28 = fadd float %37, %fmul27 + %41 = insertelement <4 x float> %36, float %fadd28, i64 1 + %42 = load float, float* %y, align 4 + %43 = load float, float* %z, align 4 + %fmul29 = fmul float %42, %43 + %44 = load float, float* %cosr, align 4 + %fpfpext30 = fpext float %44 to double + %fsub31 = fsub double 1.000000e+00, %fpfpext30 + %fpfptrunc32 = fptrunc double %fsub31 to float + %fmul33 = fmul float %fmul29, %fpfptrunc32 + %45 = load float, float* %x, align 4 + %46 = load float, float* %sinr, align 4 + %fmul34 = fmul float %45, %46 + %fsub35 = fsub float %fmul33, %fmul34 + %47 = insertelement <4 x float> %41, float %fsub35, i64 2 + %48 = insertelement <4 x float> %47, float 0.000000e+00, i64 3 + store <4 x float> %48, <4 x float>* %30, align 16 + %49 = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* %a, i64 0, i64 2 + %50 = load float, float* %z, align 4 + %51 = load float, float* %x, align 4 + %fmul36 = fmul float %50, %51 + %52 = load float, float* %cosr, align 4 + %fpfpext37 = fpext float %52 to double + %fsub38 = fsub double 1.000000e+00, %fpfpext37 + %fpfptrunc39 = fptrunc double %fsub38 to float + %fmul40 = fmul float %fmul36, %fpfptrunc39 + %53 = load float, float* %y, align 4 + %54 = load float, float* %sinr, align 4 + %fmul41 = fmul float %53, %54 + %fsub42 = fsub float %fmul40, %fmul41 + %55 = insertelement <4 x float> undef, float %fsub42, i64 0 + %56 = load float, float* %z, align 4 + %57 = load float, float* %y, align 4 + %fmul43 = fmul float %56, %57 + %58 = load float, float* %cosr, align 4 + %fpfpext44 = fpext float %58 to double + %fsub45 = fsub double 1.000000e+00, %fpfpext44 + %fpfptrunc46 = fptrunc double %fsub45 to float + %fmul47 = fmul float %fmul43, %fpfptrunc46 + %59 = load float, float* %x, align 4 + %60 = load float, float* %sinr, align 4 + %fmul48 = fmul float %59, %60 + %fadd49 = fadd float %fmul47, %fmul48 + %61 = insertelement <4 x float> %55, float %fadd49, i64 1 + %62 = load float, float* %cosr, align 4 + %63 = load float, float* %z, align 4 + %64 = load float, float* %z, align 4 + %fmul50 = fmul float %63, %64 + %65 = load float, float* %cosr, align 4 + %fpfpext51 = fpext float %65 to double + %fsub52 = fsub double 1.000000e+00, %fpfpext51 + %fpfptrunc53 = fptrunc double %fsub52 to float + %fmul54 = fmul float %fmul50, %fpfptrunc53 + %fadd55 = fadd float %62, %fmul54 + %66 = insertelement <4 x float> %61, float %fadd55, i64 2 + %67 = insertelement <4 x float> %66, float 0.000000e+00, i64 3 + store <4 x float> %67, <4 x float>* %49, align 16 + %68 = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* %a, i64 0, i64 3 + store <4 x float> , <4 x float>* %68, align 16 + %69 = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* %b, i64 0, i64 0 + %70 = load float, float* %cosr, align 4 + %71 = load float, float* %x, align 4 + %72 = load float, float* %x, align 4 + %fmul56 = fmul float %71, %72 + %73 = load float, float* %cosr, align 4 + %fpfpext57 = fpext float %73 to double + %fsub58 = fsub double 1.000000e+00, %fpfpext57 + %fpfptrunc59 = fptrunc double %fsub58 to float + %fmul60 = fmul float %fmul56, %fpfptrunc59 + %fadd61 = fadd float %70, %fmul60 + %74 = insertelement <4 x float> undef, float %fadd61, i64 0 + %75 = load float, float* %x, align 4 + %76 = load float, float* %y, align 4 + %fmul62 = fmul float %75, %76 + %77 = load float, float* %cosr, align 4 + %fpfpext63 = fpext float %77 to double + %fsub64 = fsub double 1.000000e+00, %fpfpext63 + %fpfptrunc65 = fptrunc double %fsub64 to float + %fmul66 = fmul float %fmul62, %fpfptrunc65 + %78 = load float, float* %z, align 4 + %79 = load float, float* %sinr, align 4 + %fmul67 = fmul float %78, %79 + %fsub68 = fsub float %fmul66, %fmul67 + %80 = insertelement <4 x float> %74, float %fsub68, i64 1 + %81 = load float, float* %x, align 4 + %82 = load float, float* %z, align 4 + %fmul69 = fmul float %81, %82 + %83 = load float, float* %cosr, align 4 + %fpfpext70 = fpext float %83 to double + %fsub71 = fsub double 1.000000e+00, %fpfpext70 + %fpfptrunc72 = fptrunc double %fsub71 to float + %fmul73 = fmul float %fmul69, %fpfptrunc72 + %84 = load float, float* %y, align 4 + %85 = load float, float* %sinr, align 4 + %fmul74 = fmul float %84, %85 + %fadd75 = fadd float %fmul73, %fmul74 + %86 = insertelement <4 x float> %80, float %fadd75, i64 2 + %87 = insertelement <4 x float> %86, float 0.000000e+00, i64 3 + store <4 x float> %87, <4 x float>* %69, align 16 + %88 = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* %b, i64 0, i64 1 + %89 = load float, float* %y, align 4 + %90 = load float, float* %x, align 4 + %fmul76 = fmul float %89, %90 + %91 = load float, float* %cosr, align 4 + %fpfpext77 = fpext float %91 to double + %fsub78 = fsub double 1.000000e+00, %fpfpext77 + %fpfptrunc79 = fptrunc double %fsub78 to float + %fmul80 = fmul float %fmul76, %fpfptrunc79 + %92 = load float, float* %z, align 4 + %93 = load float, float* %sinr, align 4 + %fmul81 = fmul float %92, %93 + %fadd82 = fadd float %fmul80, %fmul81 + %94 = insertelement <4 x float> undef, float %fadd82, i64 0 + %95 = load float, float* %cosr, align 4 + %96 = load float, float* %y, align 4 + %97 = load float, float* %y, align 4 + %fmul83 = fmul float %96, %97 + %98 = load float, float* %cosr, align 4 + %fpfpext84 = fpext float %98 to double + %fsub85 = fsub double 1.000000e+00, %fpfpext84 + %fpfptrunc86 = fptrunc double %fsub85 to float + %fmul87 = fmul float %fmul83, %fpfptrunc86 + %fadd88 = fadd float %95, %fmul87 + %99 = insertelement <4 x float> %94, float %fadd88, i64 1 + %100 = load float, float* %y, align 4 + %101 = load float, float* %z, align 4 + %fmul89 = fmul float %100, %101 + %102 = load float, float* %cosr, align 4 + %fpfpext90 = fpext float %102 to double + %fsub91 = fsub double 1.000000e+00, %fpfpext90 + %fpfptrunc92 = fptrunc double %fsub91 to float + %fmul93 = fmul float %fmul89, %fpfptrunc92 + %103 = load float, float* %x, align 4 + %104 = load float, float* %sinr, align 4 + %fmul94 = fmul float %103, %104 + %fsub95 = fsub float %fmul93, %fmul94 + %105 = insertelement <4 x float> %99, float %fsub95, i64 2 + %106 = insertelement <4 x float> %105, float 0.000000e+00, i64 3 + store <4 x float> %106, <4 x float>* %88, align 16 + %107 = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* %b, i64 0, i64 2 + %108 = load float, float* %z, align 4 + %109 = load float, float* %x, align 4 + %fmul96 = fmul float %108, %109 + %110 = load float, float* %cosr, align 4 + %fpfpext97 = fpext float %110 to double + %fsub98 = fsub double 1.000000e+00, %fpfpext97 + %fpfptrunc99 = fptrunc double %fsub98 to float + %fmul100 = fmul float %fmul96, %fpfptrunc99 + %111 = load float, float* %y, align 4 + %112 = load float, float* %sinr, align 4 + %fmul101 = fmul float %111, %112 + %fsub102 = fsub float %fmul100, %fmul101 + %113 = insertelement <4 x float> undef, float %fsub102, i64 0 + %114 = load float, float* %z, align 4 + %115 = load float, float* %y, align 4 + %fmul103 = fmul float %114, %115 + %116 = load float, float* %cosr, align 4 + %fpfpext104 = fpext float %116 to double + %fsub105 = fsub double 1.000000e+00, %fpfpext104 + %fpfptrunc106 = fptrunc double %fsub105 to float + %fmul107 = fmul float %fmul103, %fpfptrunc106 + %117 = load float, float* %x, align 4 + %118 = load float, float* %sinr, align 4 + %fmul108 = fmul float %117, %118 + %fadd109 = fadd float %fmul107, %fmul108 + %119 = insertelement <4 x float> %113, float %fadd109, i64 1 + %120 = load float, float* %cosr, align 4 + %121 = load float, float* %z, align 4 + %122 = load float, float* %z, align 4 + %fmul110 = fmul float %121, %122 + %123 = load float, float* %cosr, align 4 + %fpfpext111 = fpext float %123 to double + %fsub112 = fsub double 1.000000e+00, %fpfpext111 + %fpfptrunc113 = fptrunc double %fsub112 to float + %fmul114 = fmul float %fmul110, %fpfptrunc113 + %fadd115 = fadd float %120, %fmul114 + %124 = insertelement <4 x float> %119, float %fadd115, i64 2 + %125 = insertelement <4 x float> %124, float 0.000000e+00, i64 3 + store <4 x float> %125, <4 x float>* %107, align 16 + %126 = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* %b, i64 0, i64 3 + store <4 x float> , <4 x float>* %126, align 16 + store i64 0, i64* %.anon, align 8 + br label %loop.cond + +loop.cond: ; preds = %loop.body, %entry + %127 = load i64, i64* %.anon, align 8 + %gt = icmp ugt i64 4, %127 + br i1 %gt, label %loop.body, label %loop.exit + +loop.body: ; preds = %loop.cond + %128 = load i64, i64* %.anon, align 8 + %129 = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* %a, i64 0, i64 %128 + %130 = load <4 x float>, <4 x float>* %129, align 16 + store <4 x float> %130, <4 x float>* %v, align 16 + %131 = load <4 x float>, <4 x float>* %v, align 16 + %132 = extractelement <4 x float> %131, i64 0 + %fpfpext116 = fpext float %132 to double + %133 = load <4 x float>, <4 x float>* %v, align 16 + %134 = extractelement <4 x float> %133, i64 1 + %fpfpext117 = fpext float %134 to double + %135 = load <4 x float>, <4 x float>* %v, align 16 + %136 = extractelement <4 x float> %135, i64 2 + %fpfpext118 = fpext float %136 to double + %137 = load <4 x float>, <4 x float>* %v, align 16 + %138 = extractelement <4 x float> %137, i64 3 + %fpfpext119 = fpext float %138 to double + %139 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str, i32 0, i32 0), double %fpfpext116, double %fpfpext117, double %fpfpext118, double %fpfpext119) + %140 = load i64, i64* %.anon, align 8 + %add = add i64 %140, 1 + store i64 %add, i64* %.anon, align 8 + br label %loop.cond + +loop.exit: ; preds = %loop.cond + %141 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i32 0, i32 0)) + store i64 0, i64* %.anon120, align 8 + br label %loop.cond121 + +loop.cond121: ; preds = %loop.body123, %loop.exit + %142 = load i64, i64* %.anon120, align 8 + %gt122 = icmp ugt i64 4, %142 + br i1 %gt122, label %loop.body123, label %loop.exit130 + +loop.body123: ; preds = %loop.cond121 + %143 = load i64, i64* %.anon120, align 8 + %144 = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* %b, i64 0, i64 %143 + %145 = load <4 x float>, <4 x float>* %144, align 16 + store <4 x float> %145, <4 x float>* %v124, align 16 + %146 = load <4 x float>, <4 x float>* %v124, align 16 + %147 = extractelement <4 x float> %146, i64 0 + %fpfpext125 = fpext float %147 to double + %148 = load <4 x float>, <4 x float>* %v124, align 16 + %149 = extractelement <4 x float> %148, i64 1 + %fpfpext126 = fpext float %149 to double + %150 = load <4 x float>, <4 x float>* %v124, align 16 + %151 = extractelement <4 x float> %150, i64 2 + %fpfpext127 = fpext float %151 to double + %152 = load <4 x float>, <4 x float>* %v124, align 16 + %153 = extractelement <4 x float> %152, i64 3 + %fpfpext128 = fpext float %153 to double + %154 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str.2, i32 0, i32 0), double %fpfpext125, double %fpfpext126, double %fpfpext127, double %fpfpext128) + %155 = load i64, i64* %.anon120, align 8 + %add129 = add i64 %155, 1 + store i64 %add129, i64* %.anon120, align 8 + br label %loop.cond121 + +loop.exit130: ; preds = %loop.cond121 + ret void +} \ No newline at end of file