mirror of
https://github.com/c3lang/c3c.git
synced 2026-02-27 12:01:16 +00:00
* Optimize vector load / store. Fixes to alignment. Support typedef with `@simd` and `@align` #2543. Update vector ABI #2542 * Fix alignment issue with indirect arguments.
92 lines
2.3 KiB
Plaintext
92 lines
2.3 KiB
Plaintext
// #target: macos-x64
|
|
|
|
module test;
|
|
import std::io;
|
|
|
|
typedef Vc2 = float[<2>] @simd;
|
|
typedef Vc4 = float[<4>] @simd;
|
|
struct Matrix2x2
|
|
{
|
|
union
|
|
{
|
|
struct
|
|
{
|
|
float m00, m01;
|
|
float m10, m11;
|
|
}
|
|
struct
|
|
{
|
|
Vc2 m0;
|
|
Vc2 m1;
|
|
}
|
|
Vc4 m;
|
|
}
|
|
}
|
|
|
|
fn Vc2 apply1(Matrix2x2* mat, Vc2 vec)
|
|
{
|
|
return (Vc2) {
|
|
mat.m00 * vec[0] + mat.m01 * vec[1],
|
|
mat.m10 * vec[0] + mat.m11 * vec[1],
|
|
};
|
|
}
|
|
|
|
fn Vc2 apply2(Matrix2x2* mat, Vc2 vec)
|
|
{
|
|
return (Vc2) {
|
|
mat.m0[0] * vec[0] + mat.m0[1] * vec[1],
|
|
mat.m1[0] * vec[0] + mat.m1[1] * vec[1],
|
|
};
|
|
}
|
|
|
|
fn Vc2 apply3(Matrix2x2* mat, Vc2 vec)
|
|
{
|
|
Vc2 a = (Vc2)$$swizzle2(mat.m0, mat.m1, 0, 3);
|
|
Vc2 b = (Vc2)$$swizzle2(mat.m0, mat.m1, 1, 2);
|
|
Vc2 flip = (Vc2)$$swizzle(vec, 1, 0);
|
|
return a * vec + b * flip;
|
|
}
|
|
|
|
|
|
fn void main()
|
|
{
|
|
Matrix2x2 a = { 1, -3, 5, -7 };
|
|
io::printfn("1: %s", apply1(&a, (Vc2) { 11, 13 }));
|
|
io::printfn("2: %s", apply2(&a, (Vc2) { 11, 13 }));
|
|
io::printfn("3: %s", apply3(&a, (Vc2) { 11, 13 }));
|
|
}
|
|
|
|
/* #expect: test.ll
|
|
|
|
define double @test.apply3(ptr %0, double %1) #0 {
|
|
entry:
|
|
%vec = alloca <2 x float>, align 8
|
|
%a = alloca <2 x float>, align 8
|
|
%b = alloca <2 x float>, align 8
|
|
%flip = alloca <2 x float>, align 8
|
|
%taddr = alloca <2 x float>, align 8
|
|
store double %1, ptr %vec, align 8
|
|
%2 = load <2 x float>, ptr %0, align 16
|
|
%ptradd = getelementptr inbounds i8, ptr %0, i64 8
|
|
%3 = load <2 x float>, ptr %ptradd, align 8
|
|
%shuffle = shufflevector <2 x float> %2, <2 x float> %3, <2 x i32> <i32 0, i32 3>
|
|
store <2 x float> %shuffle, ptr %a, align 8
|
|
%4 = load <2 x float>, ptr %0, align 16
|
|
%ptradd1 = getelementptr inbounds i8, ptr %0, i64 8
|
|
%5 = load <2 x float>, ptr %ptradd1, align 8
|
|
%shuffle2 = shufflevector <2 x float> %4, <2 x float> %5, <2 x i32> <i32 1, i32 2>
|
|
store <2 x float> %shuffle2, ptr %b, align 8
|
|
%6 = load <2 x float>, ptr %vec, align 8
|
|
%shuffle3 = shufflevector <2 x float> %6, <2 x float> poison, <2 x i32> <i32 1, i32 0>
|
|
store <2 x float> %shuffle3, ptr %flip, align 8
|
|
%7 = load <2 x float>, ptr %a, align 8
|
|
%8 = load <2 x float>, ptr %vec, align 8
|
|
%fmul = fmul <2 x float> %7, %8
|
|
%9 = load <2 x float>, ptr %b, align 8
|
|
%10 = load <2 x float>, ptr %flip, align 8
|
|
%fmul4 = fmul <2 x float> %9, %10
|
|
%fadd = fadd <2 x float> %fmul, %fmul4
|
|
store <2 x float> %fadd, ptr %taddr, align 8
|
|
%11 = load double, ptr %taddr, align 8
|
|
ret double %11
|
|
} |