From 0f4a43717ede10f9f2b6bec268a130bd3370ec5f Mon Sep 17 00:00:00 2001 From: Christoffer Lerno Date: Sat, 15 Jan 2022 01:37:43 +0100 Subject: [PATCH] Fixes to the aarch64 ABI. --- src/compiler/c_abi_internal.h | 1 + src/compiler/llvm_codegen_c_abi.c | 5 + src/compiler/llvm_codegen_c_abi_aarch64.c | 24 +++- src/compiler/llvm_codegen_c_abi_x86.c | 10 +- test/test_suite/abi/aarch64_hfa_args.c3t | 80 +------------- test/test_suite/abi/literal_load_aarch64.c3t | 8 +- test/test_suite/abi/vec2_aarch64.c3t | 103 +++++++----------- test/test_suite/functions/splat_aarch64.c3t | 90 ++++++++------- .../struct/struct_as_value_aarch64.c3t | 5 +- 9 files changed, 135 insertions(+), 191 deletions(-) diff --git a/src/compiler/c_abi_internal.h b/src/compiler/c_abi_internal.h index b21f8db61..e3ea16ffd 100644 --- a/src/compiler/c_abi_internal.h +++ b/src/compiler/c_abi_internal.h @@ -22,6 +22,7 @@ ABIArgInfo *abi_arg_new_direct_int_ext_by_reg(Type *int_to_extend, bool by_reg); ABIArgInfo *abi_arg_new_direct_coerce_bits(BitSize bits); ABIArgInfo *abi_arg_new_direct_coerce_type(Type *type); ABIArgInfo *abi_arg_new_direct_coerce_array_type(Type *type, int8_t elements); +ABIArgInfo *abi_arg_new_direct_coerce_to_struct_with_elements(Type *type, int8_t elements); ABIArgInfo *abi_arg_new_direct_coerce(AbiType type); ABIArgInfo *abi_arg_new_expand_coerce(AbiType target_type, unsigned offset); ABIArgInfo *abi_arg_new_expand_coerce_pair(AbiType first_element, unsigned initial_offset, AbiType second_element, unsigned padding, bool is_packed); diff --git a/src/compiler/llvm_codegen_c_abi.c b/src/compiler/llvm_codegen_c_abi.c index 718c25163..69a96fa8c 100644 --- a/src/compiler/llvm_codegen_c_abi.c +++ b/src/compiler/llvm_codegen_c_abi.c @@ -189,6 +189,11 @@ ABIArgInfo *abi_arg_new_direct_coerce_type(Type *type) return info; } +ABIArgInfo *abi_arg_new_direct_coerce_to_struct_with_elements(Type *type, int8_t elements) +{ + TODO +} + ABIArgInfo *abi_arg_new_direct_coerce_array_type(Type *type, int8_t elements) { assert(elements > 0); diff --git a/src/compiler/llvm_codegen_c_abi_aarch64.c b/src/compiler/llvm_codegen_c_abi_aarch64.c index 366f00b29..cb5d77c03 100644 --- a/src/compiler/llvm_codegen_c_abi_aarch64.c +++ b/src/compiler/llvm_codegen_c_abi_aarch64.c @@ -48,7 +48,11 @@ ABIArgInfo *aarch64_classify_argument_type(Type *type) if (type_is_homogenous_aggregate(type, &base, &members)) { assert(members < 128); - return abi_arg_new_direct_coerce_array_type(base, (int8_t)members); + if (members > 1) + { + return abi_arg_new_direct_coerce_type(type_get_array(base, members)); + } + return abi_arg_new_direct_coerce_type(base); } // Aggregates <= in registers @@ -71,8 +75,12 @@ ABIArgInfo *aarch64_classify_argument_type(Type *type) // We use a pair of i64 for 16-byte aggregate with 8-byte alignment. // For aggregates with 16-byte alignment, we use i128. assert(alignment == 8 || alignment == 16); - assert(size / alignment < 128); - return abi_arg_new_direct_coerce_array_type(alignment == 8 ? type_ulong : type_u128, (int8_t)(size / alignment)); + + if (alignment == 16) return abi_arg_new_direct_coerce_type(type_u128); + ArraySize m = size / alignment; + if (m > 1) return abi_arg_new_direct_coerce_type(type_get_array(type_ulong, m)); + return abi_arg_new_direct_coerce_type(type_ulong); + } return abi_arg_new_indirect_not_by_val(type); @@ -122,7 +130,15 @@ ABIArgInfo *aarch64_classify_return_type(Type *type, bool variadic) // Aggregates <= in registers if (size <= 16) { - // For RenderScript <= 16 needs to be coerced. + // For RenderScript <= 16 needs to be coerced to ints + // this is case is ignored here but needs to be added + // in case it is to be supported. + + if (size <= 8 && !platform_target.big_endian) + { + return abi_arg_new_direct_coerce_type(type_int_unsigned_by_bitsize(size * 8)); + } + unsigned alignment = type_abi_alignment(type); // Align to multiple of 8. unsigned aligned_size = aligned_offset(size, 8); diff --git a/src/compiler/llvm_codegen_c_abi_x86.c b/src/compiler/llvm_codegen_c_abi_x86.c index e459f7168..985768e5e 100644 --- a/src/compiler/llvm_codegen_c_abi_x86.c +++ b/src/compiler/llvm_codegen_c_abi_x86.c @@ -491,7 +491,15 @@ static inline ABIArgInfo *x86_classify_aggregate(CallABI call, Regs *regs, Type // but we do not generate this struct immediately here. unsigned size_in_regs = (size + 3) / 4; assert(size_in_regs < 8); - ABIArgInfo *info = abi_arg_new_direct_coerce_array_type(type_uint, (int8_t)size_in_regs); + ABIArgInfo *info; + if (size_in_regs > 1) + { + info = abi_arg_new_direct_coerce_to_struct_with_elements(type_uint, (int8_t)size_in_regs); + } + else + { + info = abi_arg_new_direct_coerce_type(type_uint); + } // Not in reg on MCU if (!platform_target.x86.is_mcu_api) info->attributes.by_reg = true; return info; diff --git a/test/test_suite/abi/aarch64_hfa_args.c3t b/test/test_suite/abi/aarch64_hfa_args.c3t index 49e080dc6..2eec5e75c 100644 --- a/test/test_suite/abi/aarch64_hfa_args.c3t +++ b/test/test_suite/abi/aarch64_hfa_args.c3t @@ -1,4 +1,3 @@ -// #skipped // #target: aarch64-darwin module test; define Int8x16 = ichar[<16>]; @@ -15,12 +14,10 @@ struct MixedHFAv3 Int8x16 b; } -// CHECK: define{{.*}} %struct.HFAv3 @test([4 x <4 x float>] %{{.*}}, [4 x <4 x float>] %{{.*}}, [4 x <4 x float>] %{{.*}}) fn HFAv3 test(HFAv3 a0, HFAv3 a1, HFAv3 a2) { return a2; } -// CHECK: define{{.*}} %struct.MixedHFAv3 @test_mixed([4 x <4 x float>] %{{.*}}, [4 x <4 x float>] %{{.*}}, [4 x <4 x float>] %{{.*}}) fn MixedHFAv3 test_mixed(MixedHFAv3 a0, MixedHFAv3 a1, MixedHFAv3 a2) { return a2; } @@ -31,78 +28,5 @@ fn MixedHFAv3 test_mixed(MixedHFAv3 a0, MixedHFAv3 a1, MixedHFAv3 a2) { %HFAv3 = type { [4 x <3 x float>] } %MixedHFAv3 = type { [3 x <3 x float>], <16 x i8> } -// CHECK: define{{.*}} %struct.HFAv3 @test([4 x <4 x float>] %{{.*}}, [4 x <4 x float>] %{{.*}}, [4 x <4 x float>] %{{.*}}) - -define %HFAv3 @test.test(<4 x float> %0, <4 x float> %1, <4 x float> %2, <4 x float> %3, <4 x float> %4, <4 x float> %5, <4 x float> %6, <4 x float> %7, <4 x float> %8, <4 x float> %9, <4 x float> %10, <4 x float> %11) #0 { -entry: - %a0 = alloca %HFAv3, align 16 - %a1 = alloca %HFAv3, align 16 - %a2 = alloca %HFAv3, align 16 - %coerce = bitcast %HFAv3* %a0 to { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* - %12 = getelementptr inbounds { <4 x float>, <4 x float>, <4 x float>, <4 x float> }, { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* %coerce, i32 0, i32 0 - store <4 x float> %0, <4 x float>* %12, align 16 - %13 = getelementptr inbounds { <4 x float>, <4 x float>, <4 x float>, <4 x float> }, { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* %coerce, i32 0, i32 1 - store <4 x float> %1, <4 x float>* %13, align 16 - %14 = getelementptr inbounds { <4 x float>, <4 x float>, <4 x float>, <4 x float> }, { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* %coerce, i32 0, i32 2 - store <4 x float> %2, <4 x float>* %14, align 16 - %15 = getelementptr inbounds { <4 x float>, <4 x float>, <4 x float>, <4 x float> }, { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* %coerce, i32 0, i32 3 - store <4 x float> %3, <4 x float>* %15, align 16 - %coerce1 = bitcast %HFAv3* %a1 to { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* - %16 = getelementptr inbounds { <4 x float>, <4 x float>, <4 x float>, <4 x float> }, { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* %coerce1, i32 0, i32 0 - store <4 x float> %4, <4 x float>* %16, align 16 - %17 = getelementptr inbounds { <4 x float>, <4 x float>, <4 x float>, <4 x float> }, { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* %coerce1, i32 0, i32 1 - store <4 x float> %5, <4 x float>* %17, align 16 - %18 = getelementptr inbounds { <4 x float>, <4 x float>, <4 x float>, <4 x float> }, { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* %coerce1, i32 0, i32 2 - store <4 x float> %6, <4 x float>* %18, align 16 - %19 = getelementptr inbounds { <4 x float>, <4 x float>, <4 x float>, <4 x float> }, { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* %coerce1, i32 0, i32 3 - store <4 x float> %7, <4 x float>* %19, align 16 - %coerce2 = bitcast %HFAv3* %a2 to { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* - %20 = getelementptr inbounds { <4 x float>, <4 x float>, <4 x float>, <4 x float> }, { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* %coerce2, i32 0, i32 0 - store <4 x float> %8, <4 x float>* %20, align 16 - %21 = getelementptr inbounds { <4 x float>, <4 x float>, <4 x float>, <4 x float> }, { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* %coerce2, i32 0, i32 1 - store <4 x float> %9, <4 x float>* %21, align 16 - %22 = getelementptr inbounds { <4 x float>, <4 x float>, <4 x float>, <4 x float> }, { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* %coerce2, i32 0, i32 2 - store <4 x float> %10, <4 x float>* %22, align 16 - %23 = getelementptr inbounds { <4 x float>, <4 x float>, <4 x float>, <4 x float> }, { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* %coerce2, i32 0, i32 3 - store <4 x float> %11, <4 x float>* %23, align 16 - %24 = load %HFAv3, %HFAv3* %a2, align 16 - ret %HFAv3 %24 -} - -; Function Attrs: nounwind -define %MixedHFAv3 @test.test_mixed(<4 x float> %0, <4 x float> %1, <4 x float> %2, <4 x float> %3, <4 x float> %4, <4 x float> %5, <4 x float> %6, <4 x float> %7, <4 x float> %8, <4 x float> %9, <4 x float> %10, <4 x float> %11) #0 { -entry: - %a0 = alloca %MixedHFAv3, align 16 - %a1 = alloca %MixedHFAv3, align 16 - %a2 = alloca %MixedHFAv3, align 16 - %coerce = bitcast %MixedHFAv3* %a0 to { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* - %12 = getelementptr inbounds { <4 x float>, <4 x float>, <4 x float>, <4 x float> }, { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* %coerce, i32 0, i32 0 - store <4 x float> %0, <4 x float>* %12, align 16 - %13 = getelementptr inbounds { <4 x float>, <4 x float>, <4 x float>, <4 x float> }, { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* %coerce, i32 0, i32 1 - store <4 x float> %1, <4 x float>* %13, align 16 - %14 = getelementptr inbounds { <4 x float>, <4 x float>, <4 x float>, <4 x float> }, { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* %coerce, i32 0, i32 2 - store <4 x float> %2, <4 x float>* %14, align 16 - %15 = getelementptr inbounds { <4 x float>, <4 x float>, <4 x float>, <4 x float> }, { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* %coerce, i32 0, i32 3 - store <4 x float> %3, <4 x float>* %15, align 16 - %coerce1 = bitcast %MixedHFAv3* %a1 to { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* - %16 = getelementptr inbounds { <4 x float>, <4 x float>, <4 x float>, <4 x float> }, { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* %coerce1, i32 0, i32 0 - store <4 x float> %4, <4 x float>* %16, align 16 - %17 = getelementptr inbounds { <4 x float>, <4 x float>, <4 x float>, <4 x float> }, { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* %coerce1, i32 0, i32 1 - store <4 x float> %5, <4 x float>* %17, align 16 - %18 = getelementptr inbounds { <4 x float>, <4 x float>, <4 x float>, <4 x float> }, { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* %coerce1, i32 0, i32 2 - store <4 x float> %6, <4 x float>* %18, align 16 - %19 = getelementptr inbounds { <4 x float>, <4 x float>, <4 x float>, <4 x float> }, { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* %coerce1, i32 0, i32 3 - store <4 x float> %7, <4 x float>* %19, align 16 - %coerce2 = bitcast %MixedHFAv3* %a2 to { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* - %20 = getelementptr inbounds { <4 x float>, <4 x float>, <4 x float>, <4 x float> }, { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* %coerce2, i32 0, i32 0 - store <4 x float> %8, <4 x float>* %20, align 16 - %21 = getelementptr inbounds { <4 x float>, <4 x float>, <4 x float>, <4 x float> }, { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* %coerce2, i32 0, i32 1 - store <4 x float> %9, <4 x float>* %21, align 16 - %22 = getelementptr inbounds { <4 x float>, <4 x float>, <4 x float>, <4 x float> }, { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* %coerce2, i32 0, i32 2 - store <4 x float> %10, <4 x float>* %22, align 16 - %23 = getelementptr inbounds { <4 x float>, <4 x float>, <4 x float>, <4 x float> }, { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* %coerce2, i32 0, i32 3 - store <4 x float> %11, <4 x float>* %23, align 16 - %24 = load %MixedHFAv3, %MixedHFAv3* %a2, align 16 - ret %MixedHFAv3 %24 -} - +define %HFAv3 @test.test([4 x <4 x float>] %0, [4 x <4 x float>] %1, [4 x <4 x float>] %2) +define %MixedHFAv3 @test.test_mixed([4 x <4 x float>] %0, [4 x <4 x float>] %1, [4 x <4 x float>] %2) #0 { diff --git a/test/test_suite/abi/literal_load_aarch64.c3t b/test/test_suite/abi/literal_load_aarch64.c3t index eced81adc..a633c84a1 100644 --- a/test/test_suite/abi/literal_load_aarch64.c3t +++ b/test/test_suite/abi/literal_load_aarch64.c3t @@ -18,8 +18,10 @@ fn Test creator() // #expect: literal_load.ll -declare void @blorg(i64) +declare void @blorg(i64) #0 +define i32 @literal_load.creator() #0 { +entry: %literal = alloca %Test, align 4 %literal1 = alloca %Test, align 4 %0 = bitcast %Test* %literal to i32* @@ -32,5 +34,5 @@ declare void @blorg(i64) store i32 0, i32* %4, align 4 %5 = getelementptr inbounds %Test, %Test* %literal1, i32 0, i32 0 %6 = load i32, i32* %5, align 4 - %7 = zext i32 %6 to i64 - ret i64 %7 \ No newline at end of file + ret i32 %6 +} \ No newline at end of file diff --git a/test/test_suite/abi/vec2_aarch64.c3t b/test/test_suite/abi/vec2_aarch64.c3t index 24f350614..20a1a25a6 100644 --- a/test/test_suite/abi/vec2_aarch64.c3t +++ b/test/test_suite/abi/vec2_aarch64.c3t @@ -15,7 +15,7 @@ extern fn Vector2 vector2_subtract_value(Vector2 v, float sub) { return Vector2 // #expect: abi.ll %Vector2 = type { float, float } -define %Vector2 @vector2_zero() +define %Vector2 @vector2_zero() #0 { entry: %literal = alloca %Vector2, align 4 %0 = getelementptr inbounds %Vector2, %Vector2* %literal, i32 0, i32 0 @@ -26,7 +26,7 @@ entry: ret %Vector2 %2 } -define %Vector2 @vector2_one() +define %Vector2 @vector2_one() #0 { entry: %literal = alloca %Vector2, align 4 %0 = getelementptr inbounds %Vector2, %Vector2* %literal, i32 0, i32 0 @@ -37,83 +37,64 @@ entry: ret %Vector2 %2 } - -define %Vector2 @vector2_add(float %0, float %1, float %2, float %3) +define %Vector2 @vector2_add([2 x float] %0, [2 x float] %1) #0 { entry: %v1 = alloca %Vector2, align 4 %v2 = alloca %Vector2, align 4 %literal = alloca %Vector2, align 4 - %coerce = bitcast %Vector2* %v1 to { float, float }* - %4 = getelementptr inbounds { float, float }, { float, float }* %coerce, i32 0, i32 0 - store float %0, float* %4, align 4 - %5 = getelementptr inbounds { float, float }, { float, float }* %coerce, i32 0, i32 1 - store float %1, float* %5, align 4 - %coerce1 = bitcast %Vector2* %v2 to { float, float }* - %6 = getelementptr inbounds { float, float }, { float, float }* %coerce1, i32 0, i32 0 - store float %2, float* %6, align 4 - %7 = getelementptr inbounds { float, float }, { float, float }* %coerce1, i32 0, i32 1 - store float %3, float* %7, align 4 - %8 = getelementptr inbounds %Vector2, %Vector2* %literal, i32 0, i32 0 - store float 0.000000e+00, float* %8, align 4 - %9 = getelementptr inbounds %Vector2, %Vector2* %literal, i32 0, i32 1 - store float 0.000000e+00, float* %9, align 4 - %10 = load %Vector2, %Vector2* %literal, align 4 - ret %Vector2 %10 + %2 = bitcast %Vector2* %v1 to [2 x float]* + store [2 x float] %0, [2 x float]* %2, align 4 + %3 = bitcast %Vector2* %v2 to [2 x float]* + store [2 x float] %1, [2 x float]* %3, align 4 + %4 = getelementptr inbounds %Vector2, %Vector2* %literal, i32 0, i32 0 + store float 0.000000e+00, float* %4, align 4 + %5 = getelementptr inbounds %Vector2, %Vector2* %literal, i32 0, i32 1 + store float 0.000000e+00, float* %5, align 4 + %6 = load %Vector2, %Vector2* %literal, align 4 + ret %Vector2 %6 } -define %Vector2 @vector2_add_value(float %0, float %1, float %2) +define %Vector2 @vector2_add_value([2 x float] %0, float %1) #0 { entry: %v = alloca %Vector2, align 4 %literal = alloca %Vector2, align 4 - %coerce = bitcast %Vector2* %v to { float, float }* - %3 = getelementptr inbounds { float, float }, { float, float }* %coerce, i32 0, i32 0 - store float %0, float* %3, align 4 - %4 = getelementptr inbounds { float, float }, { float, float }* %coerce, i32 0, i32 1 - store float %1, float* %4, align 4 - %5 = getelementptr inbounds %Vector2, %Vector2* %literal, i32 0, i32 0 - store float 0.000000e+00, float* %5, align 4 - %6 = getelementptr inbounds %Vector2, %Vector2* %literal, i32 0, i32 1 - store float 0.000000e+00, float* %6, align 4 - %7 = load %Vector2, %Vector2* %literal, align 4 - ret %Vector2 %7 + %2 = bitcast %Vector2* %v to [2 x float]* + store [2 x float] %0, [2 x float]* %2, align 4 + %3 = getelementptr inbounds %Vector2, %Vector2* %literal, i32 0, i32 0 + store float 0.000000e+00, float* %3, align 4 + %4 = getelementptr inbounds %Vector2, %Vector2* %literal, i32 0, i32 1 + store float 0.000000e+00, float* %4, align 4 + %5 = load %Vector2, %Vector2* %literal, align 4 + ret %Vector2 %5 } -define %Vector2 @vector2_subtract(float %0, float %1, float %2, float %3) +define %Vector2 @vector2_subtract([2 x float] %0, [2 x float] %1) #0 { entry: %v1 = alloca %Vector2, align 4 %v2 = alloca %Vector2, align 4 %literal = alloca %Vector2, align 4 - %coerce = bitcast %Vector2* %v1 to { float, float }* - %4 = getelementptr inbounds { float, float }, { float, float }* %coerce, i32 0, i32 0 - store float %0, float* %4, align 4 - %5 = getelementptr inbounds { float, float }, { float, float }* %coerce, i32 0, i32 1 - store float %1, float* %5, align 4 - %coerce1 = bitcast %Vector2* %v2 to { float, float }* - %6 = getelementptr inbounds { float, float }, { float, float }* %coerce1, i32 0, i32 0 - store float %2, float* %6, align 4 - %7 = getelementptr inbounds { float, float }, { float, float }* %coerce1, i32 0, i32 1 - store float %3, float* %7, align 4 - %8 = getelementptr inbounds %Vector2, %Vector2* %literal, i32 0, i32 0 - store float 0.000000e+00, float* %8, align 4 - %9 = getelementptr inbounds %Vector2, %Vector2* %literal, i32 0, i32 1 - store float 0.000000e+00, float* %9, align 4 - %10 = load %Vector2, %Vector2* %literal, align 4 - ret %Vector2 %10 + %2 = bitcast %Vector2* %v1 to [2 x float]* + store [2 x float] %0, [2 x float]* %2, align 4 + %3 = bitcast %Vector2* %v2 to [2 x float]* + store [2 x float] %1, [2 x float]* %3, align 4 + %4 = getelementptr inbounds %Vector2, %Vector2* %literal, i32 0, i32 0 + store float 0.000000e+00, float* %4, align 4 + %5 = getelementptr inbounds %Vector2, %Vector2* %literal, i32 0, i32 1 + store float 0.000000e+00, float* %5, align 4 + %6 = load %Vector2, %Vector2* %literal, align 4 + ret %Vector2 %6 } -define %Vector2 @vector2_subtract_value(float %0, float %1, float %2) +define %Vector2 @vector2_subtract_value([2 x float] %0, float %1) #0 { entry: %v = alloca %Vector2, align 4 %literal = alloca %Vector2, align 4 - %coerce = bitcast %Vector2* %v to { float, float }* - %3 = getelementptr inbounds { float, float }, { float, float }* %coerce, i32 0, i32 0 - store float %0, float* %3, align 4 - %4 = getelementptr inbounds { float, float }, { float, float }* %coerce, i32 0, i32 1 - store float %1, float* %4, align 4 - %5 = getelementptr inbounds %Vector2, %Vector2* %literal, i32 0, i32 0 - store float 0.000000e+00, float* %5, align 4 - %6 = getelementptr inbounds %Vector2, %Vector2* %literal, i32 0, i32 1 - store float 0.000000e+00, float* %6, align 4 - %7 = load %Vector2, %Vector2* %literal, align 4 - ret %Vector2 %7 + %2 = bitcast %Vector2* %v to [2 x float]* + store [2 x float] %0, [2 x float]* %2, align 4 + %3 = getelementptr inbounds %Vector2, %Vector2* %literal, i32 0, i32 0 + store float 0.000000e+00, float* %3, align 4 + %4 = getelementptr inbounds %Vector2, %Vector2* %literal, i32 0, i32 1 + store float 0.000000e+00, float* %4, align 4 + %5 = load %Vector2, %Vector2* %literal, align 4 + ret %Vector2 %5 } diff --git a/test/test_suite/functions/splat_aarch64.c3t b/test/test_suite/functions/splat_aarch64.c3t index 9262b0504..b069960f9 100644 --- a/test/test_suite/functions/splat_aarch64.c3t +++ b/test/test_suite/functions/splat_aarch64.c3t @@ -15,13 +15,23 @@ fn void test() // #expect: splat.ll -%vararg = alloca %"int[]", align 8 +%"int[]" = type { i32*, i64 } + +declare i32 @sum_us([2 x i64]) #0 + +define void @splat.test() #0 { +entry: + %vararg = alloca %"int[]", align 8 %varargslots = alloca [3 x i32], align 4 + %tempcoerce = alloca [2 x i64], align 8 %x = alloca [3 x i32], align 4 %z = alloca %"int[]", align 8 %vararg1 = alloca %"int[]", align 8 - %vararg2 = alloca %"int[]", align 8 + %tempcoerce2 = alloca [2 x i64], align 8 %vararg3 = alloca %"int[]", align 8 + %tempcoerce4 = alloca [2 x i64], align 8 + %vararg5 = alloca %"int[]", align 8 + %tempcoerce6 = alloca [2 x i64], align 8 %0 = getelementptr inbounds [3 x i32], [3 x i32]* %varargslots, i64 0, i64 0 store i32 1, i32* %0, align 4 %1 = getelementptr inbounds [3 x i32], [3 x i32]* %varargslots, i64 0, i64 1 @@ -33,42 +43,40 @@ fn void test() %4 = getelementptr inbounds %"int[]", %"int[]"* %vararg, i32 0, i32 0 %5 = bitcast [3 x i32]* %varargslots to i32* store i32* %5, i32** %4, align 8 - %6 = bitcast %"int[]"* %vararg to { i64, i64 }* - %7 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %6, i32 0, i32 0 - %8 = load i64, i64* %7, align 8 - %9 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %6, i32 0, i32 1 - %10 = load i64, i64* %9, align 8 - %11 = call i32 @sum_us(i64 %8, i64 %10) - %12 = bitcast [3 x i32]* %x to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %12, i8* align 4 bitcast ([3 x i32]* @.__const to i8*), i32 12, i1 false) - %13 = bitcast [3 x i32]* %x to i32* - %14 = insertvalue %"int[]" undef, i32* %13, 0 - %15 = insertvalue %"int[]" %14, i64 3, 1 - store %"int[]" %15, %"int[]"* %z, align 8 - %16 = getelementptr inbounds %"int[]", %"int[]"* %vararg1, i32 0, i32 1 - %17 = getelementptr inbounds %"int[]", %"int[]"* %vararg1, i32 0, i32 0 - store i64 3, i64* %16, align 8 - %18 = bitcast [3 x i32]* %x to i32* - store i32* %18, i32** %17, align 8 - %19 = bitcast %"int[]"* %vararg1 to { i64, i64 }* - %20 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %19, i32 0, i32 0 - %21 = load i64, i64* %20, align 8 - %22 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %19, i32 0, i32 1 - %23 = load i64, i64* %22, align 8 - %24 = call i32 @sum_us(i64 %21, i64 %23) - %25 = getelementptr inbounds %"int[]", %"int[]"* %vararg2, i32 0, i32 1 - %26 = getelementptr inbounds %"int[]", %"int[]"* %vararg2, i32 0, i32 0 - %27 = bitcast %"int[]"* %z to { i64, i64 }* - %28 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %27, i32 0, i32 0 - %29 = load i64, i64* %28, align 8 - %30 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %27, i32 0, i32 1 - %31 = load i64, i64* %30, align 8 - %32 = call i32 @sum_us(i64 %29, i64 %31) - %33 = getelementptr inbounds %"int[]", %"int[]"* %vararg3, i32 0, i32 1 - store i64 0, i64* %33, align 8 - %34 = bitcast %"int[]"* %vararg3 to { i64, i64 }* - %35 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %34, i32 0, i32 0 - %36 = load i64, i64* %35, align 8 - %37 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %34, i32 0, i32 1 - %38 = load i64, i64* %37, align 8 - %39 = call i32 @sum_us(i64 %36, i64 %38) + %6 = bitcast [2 x i64]* %tempcoerce to i8* + %7 = bitcast %"int[]"* %vararg to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 %6, i8* align 8 %7, i32 16, i1 false) + %8 = load [2 x i64], [2 x i64]* %tempcoerce, align 8 + %9 = call i32 @sum_us([2 x i64] %8) + %10 = bitcast [3 x i32]* %x to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %10, i8* align 4 bitcast ([3 x i32]* @.__const to i8*), i32 12, i1 false) + %11 = bitcast [3 x i32]* %x to i32* + %12 = insertvalue %"int[]" undef, i32* %11, 0 + %13 = insertvalue %"int[]" %12, i64 3, 1 + store %"int[]" %13, %"int[]"* %z, align 8 + %14 = getelementptr inbounds %"int[]", %"int[]"* %vararg1, i32 0, i32 1 + %15 = getelementptr inbounds %"int[]", %"int[]"* %vararg1, i32 0, i32 0 + store i64 3, i64* %14, align 8 + %16 = bitcast [3 x i32]* %x to i32* + store i32* %16, i32** %15, align 8 + %17 = bitcast [2 x i64]* %tempcoerce2 to i8* + %18 = bitcast %"int[]"* %vararg1 to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 %17, i8* align 8 %18, i32 16, i1 false) + %19 = load [2 x i64], [2 x i64]* %tempcoerce2, align 8 + %20 = call i32 @sum_us([2 x i64] %19) + %21 = getelementptr inbounds %"int[]", %"int[]"* %vararg3, i32 0, i32 1 + %22 = getelementptr inbounds %"int[]", %"int[]"* %vararg3, i32 0, i32 0 + %23 = bitcast [2 x i64]* %tempcoerce4 to i8* + %24 = bitcast %"int[]"* %z to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 %23, i8* align 8 %24, i32 16, i1 false) + %25 = load [2 x i64], [2 x i64]* %tempcoerce4, align 8 + %26 = call i32 @sum_us([2 x i64] %25) + %27 = getelementptr inbounds %"int[]", %"int[]"* %vararg5, i32 0, i32 1 + store i64 0, i64* %27, align 8 + %28 = bitcast [2 x i64]* %tempcoerce6 to i8* + %29 = bitcast %"int[]"* %vararg5 to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 %28, i8* align 8 %29, i32 16, i1 false) + %30 = load [2 x i64], [2 x i64]* %tempcoerce6, align 8 + %31 = call i32 @sum_us([2 x i64] %30) + ret void +} diff --git a/test/test_suite/struct/struct_as_value_aarch64.c3t b/test/test_suite/struct/struct_as_value_aarch64.c3t index e3c6ab4af..22c83ccb3 100644 --- a/test/test_suite/struct/struct_as_value_aarch64.c3t +++ b/test/test_suite/struct/struct_as_value_aarch64.c3t @@ -15,7 +15,7 @@ fn Event test(int x) // #expect: test.ll -define i64 @test.test(i32 %0) #0 { +define i32 @test.test(i32 %0) #0 { entry: %foo = alloca %Event, align 4 %bar = alloca %Event, align 4 @@ -40,6 +40,5 @@ cond.phi: ; preds = %cond.rhs, %cond.lhs store %Event %val, %Event* %taddr, align 4 %5 = getelementptr inbounds %Event, %Event* %taddr, i32 0, i32 0 %6 = load i32, i32* %5, align 4 - %7 = zext i32 %6 to i64 - ret i64 %7 + ret i32 %6 }