diff --git a/releasenotes.md b/releasenotes.md index f8b2cc771..d3724f558 100644 --- a/releasenotes.md +++ b/releasenotes.md @@ -7,6 +7,7 @@ ### Fixes - List.remove_at would incorrectly trigger ASAN. +- With avx512, passing a 512 bit vector in a union would be lowered incorrectly, causing an assert. #2362 ### Stdlib changes diff --git a/src/compiler/abi/c_abi_x64.c b/src/compiler/abi/c_abi_x64.c index 744c3d799..719aebcf2 100644 --- a/src/compiler/abi/c_abi_x64.c +++ b/src/compiler/abi/c_abi_x64.c @@ -207,17 +207,9 @@ void x64_classify_post_merge(ByteSize size, X64Class *lo_class, X64Class *hi_cla // If X87UP is not before X87 => mem // If size > 16 && first isn't SSE or any other is not SSEUP => mem // If SSEUP is not preceded by SSE/SSEUP => convert to SSE. - if (*hi_class == CLASS_MEMORY) goto DEFAULT_TO_MEMORY; - if (size > 16 && (*lo_class != CLASS_SSE || *hi_class != CLASS_SSEUP)) goto DEFAULT_TO_MEMORY; - if (*hi_class == CLASS_SSEUP && *lo_class != CLASS_SSE && *lo_class != CLASS_SSEUP) - { - // This can happen for unions for example - *hi_class = CLASS_SSE; - } - return; - - DEFAULT_TO_MEMORY: - *lo_class = CLASS_MEMORY; + if (*hi_class == CLASS_MEMORY) *lo_class = CLASS_MEMORY; + if (size > 16 && (*lo_class != CLASS_SSE || *hi_class != CLASS_SSEUP)) *lo_class = CLASS_MEMORY; + if (*hi_class == CLASS_SSEUP && *lo_class != CLASS_SSE) *hi_class = CLASS_SSE; } void x64_classify_struct_union(Type *type, ByteSize offset_base, X64Class *current, X64Class *lo_class, X64Class *hi_class, NamedArgument named_arg) diff --git a/test/test_suite/abi/merge_union_bool_avx512.c3t b/test/test_suite/abi/merge_union_bool_avx512.c3t new file mode 100644 index 000000000..2534ba66e --- /dev/null +++ b/test/test_suite/abi/merge_union_bool_avx512.c3t @@ -0,0 +1,41 @@ +// #target: macos-x64 +// #opt: --x86cpu=avx512 +module abi; + +union Mask64 +{ + bool[<64>] m; + ulong ul; +} + +fn Mask64 make_mask(ulong n) +{ + Mask64 mask; + mask.ul = n; + return mask; +} + +fn int main() +{ + make_mask(20); + return 0; +} + +/* #expect: abi.ll + +define void @abi.make_mask(ptr noalias sret(%Mask64) align 64 %0, i64 %1) #0 { +entry: + %mask = alloca %Mask64, align 64 + call void @llvm.memset.p0.i64(ptr align 64 %mask, i8 0, i64 64, i1 false) + store i64 %1, ptr %mask, align 64 + call void @llvm.memcpy.p0.p0.i32(ptr align 64 %0, ptr align 64 %mask, i32 64, i1 false) + ret void +} + +; Function Attrs: nounwind uwtable +define i32 @main() #0 { +entry: + %sretparam = alloca %Mask64, align 64 + call void @abi.make_mask(ptr sret(%Mask64) align 64 %sretparam, i64 20) + ret i32 0 +}