mirror of
https://github.com/c3lang/c3c.git
synced 2026-02-27 03:51:18 +00:00
* Optimize vector load / store. Fixes to alignment. Support typedef with `@simd` and `@align` #2543. Update vector ABI #2542 * Fix alignment issue with indirect arguments.
142 lines
3.7 KiB
Plaintext
142 lines
3.7 KiB
Plaintext
// #target: macos-x64
|
|
// #opt: --x86cpu=avx512
|
|
module test;
|
|
|
|
typedef Mm256 = float[<8>] @simd;
|
|
typedef Mm512 = float[<16>] @simd;
|
|
struct St512
|
|
{
|
|
Mm512 m;
|
|
}
|
|
|
|
St512 x55;
|
|
Mm512 x56;
|
|
|
|
extern fn void f55(St512 x);
|
|
|
|
extern fn void f56(Mm512 x);
|
|
fn void f57() { f55(x55); f56(x56); }
|
|
|
|
struct Two256
|
|
{
|
|
Mm256 m;
|
|
Mm256 n;
|
|
}
|
|
|
|
extern fn void f58(Two256 s);
|
|
fn void f59(Two256 s)
|
|
{
|
|
f58(s);
|
|
}
|
|
|
|
struct Atwo256
|
|
{
|
|
Mm256[2] array;
|
|
}
|
|
|
|
struct SAtwo256
|
|
{
|
|
Atwo256 x;
|
|
}
|
|
|
|
extern fn void f60(SAtwo256 s);
|
|
fn void f61(SAtwo256 s)
|
|
{
|
|
f60(s);
|
|
}
|
|
|
|
struct Complex { double i; double c; }
|
|
|
|
// AVX512: @f62_helper(i32 0, <16 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double {{%[a-zA-Z0-9]+}}, double {{%[a-zA-Z0-9]+}})
|
|
extern fn void f62_helper(int, ...);
|
|
Mm512 x62;
|
|
fn void f62()
|
|
{
|
|
f62_helper(0, x62, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, (Complex) { 1.0, 1.0 });
|
|
}
|
|
|
|
extern fn void f64_helper(Mm512, ...);
|
|
Mm512 x64;
|
|
fn void f64()
|
|
{
|
|
f64_helper(x64, x64, 1.0, 1.0, 1.0, 1.0, 1.0, (Complex) { 1.0, 1.0 });
|
|
f64_helper(x64, x64, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, (Complex) { 1.0, 1.0 });
|
|
}
|
|
|
|
|
|
/* #expect: test.ll
|
|
|
|
|
|
declare void @f55(<16 x float>) #0
|
|
|
|
; Function Attrs:
|
|
declare void @f56(<16 x float>) #0
|
|
|
|
; Function Attrs:
|
|
define void @test.f57() #0 {
|
|
entry:
|
|
%0 = load <16 x float>, ptr @test.x55, align 64
|
|
call void @f55(<16 x float> %0)
|
|
%1 = load <16 x float>, ptr @test.x56, align 64
|
|
call void @f56(<16 x float> %1)
|
|
ret void
|
|
}
|
|
|
|
; Function Attrs:
|
|
declare void @f58(ptr byval(%Two256) align 32) #0
|
|
|
|
; Function Attrs:
|
|
define void @test.f59(ptr byval(%Two256) align 32 %0) #0 {
|
|
entry:
|
|
call void @f58(ptr byval(%Two256) align 32 %0)
|
|
ret void
|
|
}
|
|
|
|
; Function Attrs:
|
|
declare void @f60(ptr byval(%SAtwo256) align 32) #0
|
|
|
|
; Function Attrs:
|
|
define void @test.f61(ptr byval(%SAtwo256) align 32 %0) #0 {
|
|
entry:
|
|
call void @f60(ptr byval(%SAtwo256) align 32 %0)
|
|
ret void
|
|
}
|
|
|
|
; Function Attrs:
|
|
declare void @f62_helper(i32, ...) #0
|
|
|
|
; Function Attrs:
|
|
define void @test.f62() #0 {
|
|
entry:
|
|
%literal = alloca %Complex, align 8
|
|
call void @llvm.memcpy.p0.p0.i32(ptr align 8 %literal, ptr align 8 @.__const, i32 16, i1 false)
|
|
%0 = load <16 x float>, ptr @test.x62, align 64
|
|
%lo = load double, ptr %literal, align 8
|
|
%ptradd = getelementptr inbounds i8, ptr %literal, i64 8
|
|
%hi = load double, ptr %ptradd, align 8
|
|
call void (i32, ...) @f62_helper(i32 0, <16 x float> %0, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double %lo, double %hi)
|
|
ret void
|
|
}
|
|
|
|
; Function Attrs:
|
|
declare void @f64_helper(<16 x float>, ...) #0
|
|
|
|
; Function Attrs:
|
|
define void @test.f64() #0 {
|
|
entry:
|
|
%literal = alloca %Complex, align 8
|
|
%literal1 = alloca %Complex, align 8
|
|
call void @llvm.memcpy.p0.p0.i32(ptr align 8 %literal, ptr align 8 @.__const.4, i32 16, i1 false)
|
|
%0 = load <16 x float>, ptr @test.x64, align 64
|
|
%1 = load <16 x float>, ptr @test.x64, align 64
|
|
%lo = load double, ptr %literal, align 8
|
|
%ptradd = getelementptr inbounds i8, ptr %literal, i64 8
|
|
%hi = load double, ptr %ptradd, align 8
|
|
call void (<16 x float>, ...) @f64_helper(<16 x float> %0, <16 x float> %1, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double %lo, double %hi)
|
|
call void @llvm.memcpy.p0.p0.i32(ptr align 8 %literal1, ptr align 8 @.__const.5, i32 16, i1 false)
|
|
%2 = load <16 x float>, ptr @test.x64, align 64
|
|
%3 = load <16 x float>, ptr @test.x64, align 64
|
|
call void (<16 x float>, ...) @f64_helper(<16 x float> %2, <16 x float> %3, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, ptr byval(%Complex) align 8 %literal1)
|
|
ret void
|
|
}
|