mirror of
https://github.com/c3lang/c3c.git
synced 2026-02-27 03:51:18 +00:00
* Optimize vector load / store. Fixes to alignment. Support typedef with `@simd` and `@align` #2543. Update vector ABI #2542 * Fix alignment issue with indirect arguments.
540 lines
14 KiB
Plaintext
540 lines
14 KiB
Plaintext
// #target: linux-riscv32
|
|
module test;
|
|
|
|
|
|
fn void f_void() {}
|
|
|
|
// Scalar arguments and return values smaller than the word size are extended
|
|
// according to the sign of their type, up to 32 bits
|
|
|
|
fn bool f_scalar_0(bool x) { return x; }
|
|
|
|
fn ichar f_scalar_1(ichar x) { return x; }
|
|
|
|
fn char f_scalar_2(char x) { return x; }
|
|
|
|
fn int f_scalar_3(int x) { return x; }
|
|
|
|
fn long f_scalar_4(long x) { return x; }
|
|
|
|
fn int128 f_scalar_5(int128 x) { return x; }
|
|
|
|
fn float f_fp_scalar_1(float x) { return x; }
|
|
|
|
fn double f_fp_scalar_2(double x) { return x; }
|
|
|
|
// Scalars larger than 2*xlen are passed/returned indirect. However, the
|
|
// RISC-V LLVM backend can handle this fine, so the function doesn't need to
|
|
// be modified.
|
|
|
|
fn float128 f_fp_scalar_3(float128 x) { return x; }
|
|
|
|
// Aggregates <= 2*xlen may be passed in registers, so will be coerced to
|
|
// integer arguments. The rules for return are the same.
|
|
|
|
struct Tiny
|
|
{
|
|
char a, b, c, d;
|
|
}
|
|
|
|
fn void f_agg_tiny(Tiny x) {
|
|
x.a += x.b;
|
|
x.c += x.d;
|
|
}
|
|
|
|
fn Tiny f_agg_tiny_ret() {
|
|
return {1, 2, 3, 4};
|
|
}
|
|
|
|
typedef Char4 = char[<4>] @simd;
|
|
fn void f_vec_tiny_v4i8(Char4 x)
|
|
{
|
|
x[0] = x[1];
|
|
x[2] = x[3];
|
|
}
|
|
|
|
fn Char4 f_vec_tiny_v4i8_ret()
|
|
{
|
|
return {1, 2, 3, 4};
|
|
}
|
|
|
|
typedef Int1 = int[<1>] @simd;
|
|
fn void f_vec_tiny_v1i32(Int1 x)
|
|
{
|
|
x[0] = 114;
|
|
}
|
|
|
|
fn Int1 f_vec_tiny_v1i32_ret()
|
|
{
|
|
return {1};
|
|
}
|
|
|
|
struct Small
|
|
{
|
|
int a;
|
|
int* b;
|
|
}
|
|
|
|
fn void f_agg_small(Small x)
|
|
{
|
|
x.a += *x.b;
|
|
x.b = &x.a;
|
|
}
|
|
|
|
fn Small f_agg_small_ret()
|
|
{
|
|
return {1, null};
|
|
}
|
|
|
|
typedef Char8 = char[<8>] @simd;
|
|
|
|
fn void f_vec_small_v8i8(Char8 x)
|
|
{
|
|
x[0] = x[7];
|
|
}
|
|
|
|
fn Char8 f_vec_small_v8i8_ret() => {1, 2, 3, 4, 5, 6, 7, 8};
|
|
|
|
typedef Long1 = long[<1>] @simd;
|
|
fn void f_vec_small_v1i64(Long1 x)
|
|
{
|
|
x[0] = 114;
|
|
}
|
|
|
|
fn Long1 f_vec_small_v1i64_ret() => {1};
|
|
|
|
// Aggregates of 2*xlen size and 2*xlen alignment should be coerced to a
|
|
// single 2*xlen-sized argument, to ensure that alignment can be maintained if
|
|
// passed on the stack.
|
|
|
|
struct Small_aligned
|
|
{
|
|
long a;
|
|
}
|
|
|
|
fn void f_agg_small_aligned(Small_aligned x)
|
|
{
|
|
x.a += x.a;
|
|
}
|
|
|
|
fn Small_aligned f_agg_small_aligned_ret(Small_aligned x)
|
|
{
|
|
return {10};
|
|
}
|
|
|
|
// Aggregates greater > 2*xlen will be passed and returned indirectly
|
|
struct Large {
|
|
int a, b, c, d;
|
|
}
|
|
|
|
fn void f_agg_large(Large x) {
|
|
x.a = x.b + x.c + x.d;
|
|
}
|
|
|
|
// The address where the struct should be written to will be the first
|
|
// argument
|
|
fn Large f_agg_large_ret(int i, ichar j) {
|
|
return {1, 2, 3, 4};
|
|
}
|
|
|
|
typedef CharV16 = char[<16>] @simd;
|
|
|
|
fn void f_vec_large_v16i8(CharV16 x)
|
|
{
|
|
x[0] = x[7];
|
|
}
|
|
|
|
fn CharV16 f_vec_large_v16i8_ret() => {1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8};
|
|
|
|
// Scalars passed on the stack should not have signext/zeroext attributes
|
|
// (they are anyext).
|
|
|
|
fn int f_scalar_stack_1(Tiny a, Small b, Small_aligned c,
|
|
Large d, char e, ichar f, char g, ichar h) {
|
|
return g + h;
|
|
}
|
|
|
|
// Ensure that scalars passed on the stack are still determined correctly in
|
|
// the presence of large return values that consume a register due to the need
|
|
// to pass a pointer.
|
|
|
|
fn Large f_scalar_stack_2(int a, long b, long c, float128 d,
|
|
char e, ichar f, char g)
|
|
{
|
|
return {a, e, f, g};
|
|
}
|
|
|
|
fn float128 f_scalar_stack_4(int a, long b, long c, float128 d,
|
|
char e, ichar f, char g) {
|
|
return d;
|
|
}
|
|
|
|
// Aggregates and >=XLen scalars passed on the stack should be lowered just as
|
|
// they would be if passed via registers.
|
|
|
|
fn void f_scalar_stack_5(double a, long b, double c, long d, int e,
|
|
long f, float g, double h, float128 i) {}
|
|
|
|
fn void f_agg_stack(double a, long b, double c, long d, Tiny e,
|
|
Small f, Small_aligned g, Large h) {}
|
|
|
|
// Ensure that ABI lowering happens as expected for vararg calls. For RV32
|
|
// with the base integer calling convention there will be no observable
|
|
// differences in the lowered IR for a call with varargs vs without.
|
|
|
|
extern fn int f_va_callee(int, ...);
|
|
|
|
fn void f_va_caller() {
|
|
f_va_callee(1, 2, 3, 4.0, 5.0, (Tiny){6, 7, 8, 9},
|
|
(Small){10, null}, (Small_aligned){11},
|
|
(Large){12, 13, 14, 15});
|
|
}
|
|
|
|
|
|
/* #expect: test.ll
|
|
|
|
define void @test.f_void() #0 {
|
|
entry:
|
|
ret void
|
|
}
|
|
|
|
; Function Attrs:
|
|
define zeroext i8 @test.f_scalar_0(i8 zeroext %0) #0 {
|
|
entry:
|
|
ret i8 %0
|
|
}
|
|
|
|
; Function Attrs:
|
|
define signext i8 @test.f_scalar_1(i8 signext %0) #0 {
|
|
entry:
|
|
ret i8 %0
|
|
}
|
|
|
|
; Function Attrs:
|
|
define zeroext i8 @test.f_scalar_2(i8 zeroext %0) #0 {
|
|
entry:
|
|
ret i8 %0
|
|
}
|
|
|
|
; Function Attrs:
|
|
define i32 @test.f_scalar_3(i32 %0) #0 {
|
|
entry:
|
|
ret i32 %0
|
|
}
|
|
|
|
; Function Attrs:
|
|
define i64 @test.f_scalar_4(i64 %0) #0 {
|
|
entry:
|
|
ret i64 %0
|
|
}
|
|
|
|
; Function Attrs:
|
|
define i128 @test.f_scalar_5(i128 %0) #0 {
|
|
entry:
|
|
ret i128 %0
|
|
}
|
|
|
|
; Function Attrs:
|
|
define float @test.f_fp_scalar_1(float %0) #0 {
|
|
entry:
|
|
ret float %0
|
|
}
|
|
|
|
; Function Attrs:
|
|
define double @test.f_fp_scalar_2(double %0) #0 {
|
|
entry:
|
|
ret double %0
|
|
}
|
|
|
|
; Function Attrs:
|
|
define fp128 @test.f_fp_scalar_3(fp128 %0) #0 {
|
|
entry:
|
|
ret fp128 %0
|
|
}
|
|
|
|
; Function Attrs:
|
|
define void @test.f_agg_tiny(i32 %0) #0 {
|
|
entry:
|
|
%x = alloca %Tiny, align 1
|
|
store i32 %0, ptr %x, align 1
|
|
%1 = load i8, ptr %x, align 1
|
|
%ptradd = getelementptr inbounds i8, ptr %x, i32 1
|
|
%2 = load i8, ptr %ptradd, align 1
|
|
%add = add i8 %1, %2
|
|
store i8 %add, ptr %x, align 1
|
|
%ptradd1 = getelementptr inbounds i8, ptr %x, i32 2
|
|
%3 = load i8, ptr %ptradd1, align 1
|
|
%ptradd2 = getelementptr inbounds i8, ptr %x, i32 3
|
|
%4 = load i8, ptr %ptradd2, align 1
|
|
%add3 = add i8 %3, %4
|
|
store i8 %add3, ptr %ptradd1, align 1
|
|
ret void
|
|
}
|
|
|
|
; Function Attrs:
|
|
define i32 @test.f_agg_tiny_ret() #0 {
|
|
entry:
|
|
%literal = alloca %Tiny, align 1
|
|
call void @llvm.memcpy.p0.p0.i32(ptr align 1 %literal, ptr align 1 @.__const, i32 4, i1 false)
|
|
%0 = load i32, ptr %literal, align 1
|
|
ret i32 %0
|
|
}
|
|
|
|
; Function Attrs:
|
|
define void @test.f_vec_tiny_v4i8(i32 %0) #0 {
|
|
entry:
|
|
%x = alloca <4 x i8>, align 4
|
|
store i32 %0, ptr %x, align 4
|
|
%1 = load <4 x i8>, ptr %x, align 4
|
|
%2 = load <4 x i8>, ptr %x, align 4
|
|
%3 = extractelement <4 x i8> %2, i32 1
|
|
%elemset = insertelement <4 x i8> %1, i8 %3, i32 0
|
|
store <4 x i8> %elemset, ptr %x, align 4
|
|
%4 = load <4 x i8>, ptr %x, align 4
|
|
%5 = load <4 x i8>, ptr %x, align 4
|
|
%6 = extractelement <4 x i8> %5, i32 3
|
|
%elemset1 = insertelement <4 x i8> %4, i8 %6, i32 2
|
|
store <4 x i8> %elemset1, ptr %x, align 4
|
|
ret void
|
|
}
|
|
|
|
; Function Attrs:
|
|
define i32 @test.f_vec_tiny_v4i8_ret() #0 {
|
|
entry:
|
|
%taddr = alloca <4 x i8>, align 4
|
|
store <4 x i8> <i8 1, i8 2, i8 3, i8 4>, ptr %taddr, align 4
|
|
%0 = load i32, ptr %taddr, align 4
|
|
ret i32 %0
|
|
}
|
|
|
|
; Function Attrs:
|
|
define void @test.f_vec_tiny_v1i32(i32 %0) #0 {
|
|
entry:
|
|
%x = alloca <1 x i32>, align 4
|
|
store i32 %0, ptr %x, align 4
|
|
%1 = load <1 x i32>, ptr %x, align 4
|
|
%elemset = insertelement <1 x i32> %1, i32 114, i32 0
|
|
store <1 x i32> %elemset, ptr %x, align 4
|
|
ret void
|
|
}
|
|
|
|
; Function Attrs:
|
|
define i32 @test.f_vec_tiny_v1i32_ret() #0 {
|
|
entry:
|
|
%taddr = alloca <1 x i32>, align 4
|
|
store <1 x i32>
|
|
%0 = load i32, ptr %taddr, align 4
|
|
ret i32 %0
|
|
}
|
|
|
|
; Function Attrs:
|
|
define void @test.f_agg_small([2 x i32] %0) #0 {
|
|
entry:
|
|
%x = alloca %Small, align 4
|
|
store [2 x i32] %0, ptr %x, align 4
|
|
%1 = load i32, ptr %x, align 4
|
|
%ptradd = getelementptr inbounds i8, ptr %x, i32 4
|
|
%2 = load ptr, ptr %ptradd, align 4
|
|
%3 = load i32, ptr %2, align 4
|
|
%add = add i32 %1, %3
|
|
store i32 %add, ptr %x, align 4
|
|
%ptradd1 = getelementptr inbounds i8, ptr %x, i32 4
|
|
store ptr %x, ptr %ptradd1, align 4
|
|
ret void
|
|
}
|
|
|
|
; Function Attrs:
|
|
define [2 x i32] @test.f_agg_small_ret() #0 {
|
|
entry:
|
|
%literal = alloca %Small, align 4
|
|
call void @llvm.memcpy.p0.p0.i32(ptr align 4 %literal, ptr align 4 @.__const.1, i32 8, i1 false)
|
|
%0 = load [2 x i32], ptr %literal, align 4
|
|
ret [2 x i32] %0
|
|
}
|
|
|
|
; Function Attrs:
|
|
define void @test.f_vec_small_v8i8(i64 %0) #0 {
|
|
entry:
|
|
%x = alloca <8 x i8>, align 8
|
|
store i64 %0, ptr %x, align 8
|
|
%1 = load <8 x i8>, ptr %x, align 8
|
|
%2 = load <8 x i8>, ptr %x, align 8
|
|
%3 = extractelement <8 x i8> %2, i32 7
|
|
%elemset = insertelement <8 x i8> %1, i8 %3, i32 0
|
|
store <8 x i8> %elemset, ptr %x, align 8
|
|
ret void
|
|
}
|
|
|
|
; Function Attrs:
|
|
define i64 @test.f_vec_small_v8i8_ret() #0 {
|
|
entry:
|
|
%taddr = alloca <8 x i8>, align 8
|
|
store <8 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, ptr %taddr, align 8
|
|
%0 = load i64, ptr %taddr, align 8
|
|
ret i64 %0
|
|
}
|
|
|
|
; Function Attrs:
|
|
define void @test.f_vec_small_v1i64(i64 %0) #0 {
|
|
entry:
|
|
%x = alloca <1 x i64>, align 8
|
|
store i64 %0, ptr %x, align 8
|
|
%1 = load <1 x i64>, ptr %x, align 8
|
|
%elemset = insertelement <1 x i64> %1, i64 114, i32 0
|
|
store <1 x i64> %elemset, ptr %x, align 8
|
|
ret void
|
|
}
|
|
|
|
; Function Attrs:
|
|
define i64 @test.f_vec_small_v1i64_ret() #0 {
|
|
entry:
|
|
%taddr = alloca <1 x i64>, align 8
|
|
store <1 x i64>
|
|
%0 = load i64, ptr %taddr, align 8
|
|
ret i64 %0
|
|
}
|
|
|
|
; Function Attrs:
|
|
define void @test.f_agg_small_aligned(i64 %0) #0 {
|
|
entry:
|
|
%x = alloca %Small_aligned, align 8
|
|
store i64 %0, ptr %x, align 8
|
|
%1 = load i64, ptr %x, align 8
|
|
%2 = load i64, ptr %x, align 8
|
|
%add = add i64 %1, %2
|
|
store i64 %add, ptr %x, align 8
|
|
ret void
|
|
}
|
|
|
|
; Function Attrs:
|
|
define i64 @test.f_agg_small_aligned_ret(i64 %0) #0 {
|
|
entry:
|
|
%x = alloca %Small_aligned, align 8
|
|
%literal = alloca %Small_aligned, align 8
|
|
store i64 %0, ptr %x, align 8
|
|
call void @llvm.memcpy.p0.p0.i32(ptr align 8 %literal, ptr align 8 @.__const.2, i32 8, i1 false)
|
|
%1 = load i64, ptr %literal, align 8
|
|
ret i64 %1
|
|
}
|
|
|
|
; Function Attrs:
|
|
define void @test.f_agg_large(ptr align 4 %0) #0 {
|
|
entry:
|
|
%ptradd = getelementptr inbounds i8, ptr %0, i32 4
|
|
%1 = load i32, ptr %ptradd, align 4
|
|
%ptradd1 = getelementptr inbounds i8, ptr %0, i32 8
|
|
%2 = load i32, ptr %ptradd1, align 4
|
|
%add = add i32 %1, %2
|
|
%ptradd2 = getelementptr inbounds i8, ptr %0, i32 12
|
|
%3 = load i32, ptr %ptradd2, align 4
|
|
%add3 = add i32 %add, %3
|
|
store i32 %add3, ptr %0, align 4
|
|
ret void
|
|
}
|
|
|
|
; Function Attrs:
|
|
define void @test.f_agg_large_ret(ptr noalias sret(%Large) align 4 %0, i32 %1, i8 signext %2) #0 {
|
|
entry:
|
|
%literal = alloca %Large, align 4
|
|
call void @llvm.memcpy.p0.p0.i32(ptr align 4 %literal, ptr align 4 @.__const.3, i32 16, i1 false)
|
|
call void @llvm.memcpy.p0.p0.i32(ptr align 4 %0, ptr align 4 %literal, i32 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
; Function Attrs:
|
|
define void @test.f_vec_large_v16i8(ptr align 16 %0) #0 {
|
|
entry:
|
|
%1 = load <16 x i8>, ptr %0, align 16
|
|
%2 = load <16 x i8>, ptr %0, align 16
|
|
%3 = extractelement <16 x i8> %2, i32 7
|
|
%elemset = insertelement <16 x i8> %1, i8 %3, i32 0
|
|
store <16 x i8> %elemset, ptr %0, align 16
|
|
ret void
|
|
}
|
|
|
|
; Function Attrs:
|
|
define void @test.f_vec_large_v16i8_ret(ptr noalias sret(<16 x i8>) align 16 %0) #0 {
|
|
entry:
|
|
store <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, ptr %0, align 16
|
|
ret void
|
|
}
|
|
|
|
; Function Attrs:
|
|
define i32 @test.f_scalar_stack_1(i32 %0, [2 x i32] %1, i64 %2, ptr align 4 %3, i8 zeroext %4, i8 signext %5, i8 %6, i8 %7) #0 {
|
|
entry:
|
|
%a = alloca %Tiny, align 1
|
|
%b = alloca %Small, align 4
|
|
%c = alloca %Small_aligned, align 8
|
|
store i32 %0, ptr %a, align 1
|
|
store [2 x i32] %1, ptr %b, align 4
|
|
store i64 %2, ptr %c, align 8
|
|
%zext = zext i8 %6 to i32
|
|
%sext = sext i8 %7 to i32
|
|
%add = add i32 %zext, %sext
|
|
ret i32 %add
|
|
}
|
|
|
|
; Function Attrs:
|
|
define void @test.f_scalar_stack_2(ptr noalias sret(%Large) align 4 %0, i32 %1, i64 %2, i64 %3, fp128 %4, i8 zeroext %5, i8 %6, i8 %7) #0 {
|
|
entry:
|
|
%literal = alloca %Large, align 4
|
|
store i32 %1, ptr %literal, align 4
|
|
%ptradd = getelementptr inbounds i8, ptr %literal, i32 4
|
|
%zext = zext i8 %5 to i32
|
|
store i32 %zext, ptr %ptradd, align 4
|
|
%ptradd1 = getelementptr inbounds i8, ptr %literal, i32 8
|
|
%sext = sext i8 %6 to i32
|
|
store i32 %sext, ptr %ptradd1, align 4
|
|
%ptradd2 = getelementptr inbounds i8, ptr %literal, i32 12
|
|
%zext3 = zext i8 %7 to i32
|
|
store i32 %zext3, ptr %ptradd2, align 4
|
|
call void @llvm.memcpy.p0.p0.i32(ptr align 4 %0, ptr align 4 %literal, i32 16, i1 false)
|
|
ret void
|
|
}
|
|
|
|
; Function Attrs:
|
|
define fp128 @test.f_scalar_stack_4(i32 %0, i64 %1, i64 %2, fp128 %3, i8 zeroext %4, i8 %5, i8 %6) #0 {
|
|
entry:
|
|
ret fp128 %3
|
|
}
|
|
|
|
; Function Attrs:
|
|
define void @test.f_scalar_stack_5(double %0, i64 %1, double %2, i64 %3, i32 %4, i64 %5, float %6, double %7, fp128 %8) #0 {
|
|
entry:
|
|
ret void
|
|
}
|
|
|
|
; Function Attrs:
|
|
define void @test.f_agg_stack(double %0, i64 %1, double %2, i64 %3, i32 %4, [2 x i32] %5, i64 %6, ptr align 4 %7) #0 {
|
|
entry:
|
|
%e = alloca %Tiny, align 1
|
|
%f = alloca %Small, align 4
|
|
%g = alloca %Small_aligned, align 8
|
|
store i32 %4, ptr %e, align 1
|
|
store [2 x i32] %5, ptr %f, align 4
|
|
store i64 %6, ptr %g, align 8
|
|
ret void
|
|
}
|
|
|
|
; Function Attrs:
|
|
declare i32 @f_va_callee(i32, ...) #0
|
|
|
|
; Function Attrs:
|
|
define void @test.f_va_caller() #0 {
|
|
entry:
|
|
%literal = alloca %Tiny, align 1
|
|
%literal1 = alloca %Small, align 4
|
|
%literal2 = alloca %Small_aligned, align 8
|
|
%literal3 = alloca %Large, align 4
|
|
%indirectarg = alloca %Large, align 4
|
|
call void @llvm.memcpy.p0.p0.i32(ptr align 1 %literal, ptr align 1 @.__const.4, i32 4, i1 false)
|
|
call void @llvm.memcpy.p0.p0.i32(ptr align 4 %literal1, ptr align 4 @.__const.5, i32 8, i1 false)
|
|
call void @llvm.memcpy.p0.p0.i32(ptr align 8 %literal2, ptr align 8 @.__const.6, i32 8, i1 false)
|
|
call void @llvm.memcpy.p0.p0.i32(ptr align 4 %literal3, ptr align 4 @.__const.7, i32 16, i1 false)
|
|
%0 = load i32, ptr %literal, align 1
|
|
%1 = load [2 x i32], ptr %literal1, align 4
|
|
%2 = load i64, ptr %literal2, align 8
|
|
call void @llvm.memcpy.p0.p0.i32(ptr align 4 %indirectarg, ptr align 4 %literal3, i32 16, i1 false)
|
|
%3 = call i32 (i32, ...) @f_va_callee(i32 1, i32 2, i32 3, double 4.000000e+00, double 5.000000e+00, i32 %0, [2 x i32] %1, i64 %2, ptr align 4 %indirectarg)
|
|
ret void
|
|
} |