c3c/test/test_suite/builtins/shufflevector.c3t

// #target: macos-x64

module test;
import std::io;

typedef Vc2 = float[<2>] @simd;
typedef Vc4 = float[<4>] @simd;
struct Matrix2x2
{
	union
	{
		struct
		{
			float m00, m01;
			float m10, m11;
		}
		struct
		{
			Vc2 m0;
			Vc2 m1;
		}
		Vc4 m;
	}
}

fn Vc2 apply1(Matrix2x2* mat, Vc2 vec)
{
	return (Vc2) {
		mat.m00 * vec[0] + mat.m01 * vec[1],
		mat.m10 * vec[0] + mat.m11 * vec[1],
	};
}

fn Vc2 apply2(Matrix2x2* mat, Vc2 vec)
{
	return (Vc2) {
		mat.m0[0] * vec[0] + mat.m0[1] * vec[1],
		mat.m1[0] * vec[0] + mat.m1[1] * vec[1],
	};
}

fn Vc2 apply3(Matrix2x2* mat, Vc2 vec)
{
	Vc2 a = (Vc2)$$swizzle2(mat.m0, mat.m1, 0, 3);
	Vc2 b = (Vc2)$$swizzle2(mat.m0, mat.m1, 1, 2);
	Vc2 flip = (Vc2)$$swizzle(vec, 1, 0);
	return a * vec + b * flip;
}


fn void main()
{
	Matrix2x2 a = { 1, -3, 5, -7 };
	io::printfn("1: %s", apply1(&a, (Vc2) { 11, 13 }));
	io::printfn("2: %s", apply2(&a, (Vc2) { 11, 13 }));
	io::printfn("3: %s", apply3(&a, (Vc2) { 11, 13 }));
}

/* #expect: test.ll

define double @test.apply3(ptr %0, double %1) #0 {
entry:
  %vec = alloca <2 x float>, align 8
  %a = alloca <2 x float>, align 8
  %b = alloca <2 x float>, align 8
  %flip = alloca <2 x float>, align 8
  %taddr = alloca <2 x float>, align 8
  store double %1, ptr %vec, align 8
  %2 = load <2 x float>, ptr %0, align 16
  %ptradd = getelementptr inbounds i8, ptr %0, i64 8
  %3 = load <2 x float>, ptr %ptradd, align 8
  %shuffle = shufflevector <2 x float> %2, <2 x float> %3, <2 x i32> <i32 0, i32 3>
  store <2 x float> %shuffle, ptr %a, align 8
  %4 = load <2 x float>, ptr %0, align 16
  %ptradd1 = getelementptr inbounds i8, ptr %0, i64 8
  %5 = load <2 x float>, ptr %ptradd1, align 8
  %shuffle2 = shufflevector <2 x float> %4, <2 x float> %5, <2 x i32> <i32 1, i32 2>
  store <2 x float> %shuffle2, ptr %b, align 8
  %6 = load <2 x float>, ptr %vec, align 8
  %shuffle3 = shufflevector <2 x float> %6, <2 x float> poison, <2 x i32> <i32 1, i32 0>
  store <2 x float> %shuffle3, ptr %flip, align 8
  %7 = load <2 x float>, ptr %a, align 8
  %8 = load <2 x float>, ptr %vec, align 8
  %fmul = fmul <2 x float> %7, %8
  %9 = load <2 x float>, ptr %b, align 8
  %10 = load <2 x float>, ptr %flip, align 8
  %fmul4 = fmul <2 x float> %9, %10
  %fadd = fadd <2 x float> %fmul, %fmul4
  store <2 x float> %fadd, ptr %taddr, align 8
  %11 = load double, ptr %taddr, align 8
  ret double %11
}