diff --git a/src/compiler/enums.h b/src/compiler/enums.h index 473b01cb6..7473c9bfb 100644 --- a/src/compiler/enums.h +++ b/src/compiler/enums.h @@ -826,6 +826,7 @@ typedef enum BUILTIN_MEMSET, BUILTIN_MEMMOVE, BUILTIN_MIN, + BUILTIN_MULADD, BUILTIN_NEARBYINT, BUILTIN_POPCOUNT, BUILTIN_POW, diff --git a/src/compiler/llvm_codegen.c b/src/compiler/llvm_codegen.c index f169a0db1..5d3981dec 100644 --- a/src/compiler/llvm_codegen.c +++ b/src/compiler/llvm_codegen.c @@ -654,6 +654,7 @@ static void llvm_codegen_setup() intrinsic_id.memmove = lookup_intrinsic("llvm.memmove"); intrinsic_id.minimum = lookup_intrinsic("llvm.minimum"); intrinsic_id.minnum = lookup_intrinsic("llvm.minnum"); + intrinsic_id.muladd = lookup_intrinsic("llvm.fmuladd"); intrinsic_id.nearbyint = lookup_intrinsic("llvm.nearbyint"); intrinsic_id.pow = lookup_intrinsic("llvm.pow"); intrinsic_id.powi = lookup_intrinsic("llvm.powi"); diff --git a/src/compiler/llvm_codegen_builtins.c b/src/compiler/llvm_codegen_builtins.c index e01e41377..63dd4c858 100644 --- a/src/compiler/llvm_codegen_builtins.c +++ b/src/compiler/llvm_codegen_builtins.c @@ -507,6 +507,9 @@ void llvm_emit_builtin_call(GenContext *c, BEValue *result_value, Expr *expr) case BUILTIN_LOG10: llvm_emit_simple_builtin(c, result_value, expr, intrinsic_id.log10); return; + case BUILTIN_MULADD: + llvm_emit_simple_builtin(c, result_value, expr, intrinsic_id.muladd); + return; case BUILTIN_POW: llvm_emit_simple_builtin(c, result_value, expr, intrinsic_id.pow); return; diff --git a/src/compiler/llvm_codegen_internal.h b/src/compiler/llvm_codegen_internal.h index dd94f1331..6693c68c9 100644 --- a/src/compiler/llvm_codegen_internal.h +++ b/src/compiler/llvm_codegen_internal.h @@ -149,6 +149,7 @@ typedef struct unsigned memmove; unsigned minimum; unsigned minnum; + unsigned muladd; unsigned nearbyint; unsigned pow; unsigned powi; diff --git a/src/compiler/sema_builtins.c b/src/compiler/sema_builtins.c index 530b99ed7..25ac6927f 100644 --- a/src/compiler/sema_builtins.c +++ b/src/compiler/sema_builtins.c @@ -463,6 +463,13 @@ bool sema_expr_analyse_builtin_call(SemaContext *context, Expr *expr) if (!sema_check_builtin_args_match(args, arg_count)) return false; rtype = args[0]->type; break; + case BUILTIN_MULADD: + if (!sema_check_builtin_args(args, + (BuiltinArg[]) { BA_FLOAT, BA_FLOAT, BA_FLOAT }, + arg_count)) return false; + if (!sema_check_builtin_args_match(args, arg_count)) return false; + rtype = args[0]->type; + break; case BUILTIN_VOLATILE_LOAD: { if (!sema_check_builtin_args(args, (BuiltinArg[]) { BA_POINTER }, 1)) return false; @@ -553,6 +560,7 @@ static inline unsigned builtin_expected_args(BuiltinFunction func) case BUILTIN_FMA: case BUILTIN_FSHL: case BUILTIN_FSHR: + case BUILTIN_MULADD: case BUILTIN_PREFETCH: return 3; case BUILTIN_MEMSET: diff --git a/src/compiler/symtab.c b/src/compiler/symtab.c index 21d4d0c27..9edd3b949 100644 --- a/src/compiler/symtab.c +++ b/src/compiler/symtab.c @@ -200,6 +200,7 @@ void symtab_init(uint32_t capacity) builtin_list[BUILTIN_MEMCOPY] = KW_DEF("memcpy"); builtin_list[BUILTIN_MEMSET] = KW_DEF("memset"); builtin_list[BUILTIN_MEMMOVE] = KW_DEF("memmove"); + builtin_list[BUILTIN_MULADD] = KW_DEF("muladd"); builtin_list[BUILTIN_NEARBYINT] = KW_DEF("nearbyint"); builtin_list[BUILTIN_POPCOUNT] = KW_DEF("popcount"); builtin_list[BUILTIN_POW] = KW_DEF("pow"); diff --git a/test/test_suite/builtins/simple_builtins.c3t b/test/test_suite/builtins/simple_builtins.c3t index b22cbbaeb..6d586c9e0 100644 --- a/test/test_suite/builtins/simple_builtins.c3t +++ b/test/test_suite/builtins/simple_builtins.c3t @@ -6,6 +6,7 @@ fn int foo(double b) double d = $$ceil(b); double e = $$max(1.0, d); double f = $$fma(d, 2.0, 3.0); + double m = $$muladd(d, 2.0, 3.0); int xeb = 13; int[3] abcd; @@ -16,13 +17,14 @@ fn int foo(double b) return 1; } -// #expect: foo.ll +/* #expect: foo.ll define i32 @foo_foo(double %0) #0 { entry: %d = alloca double, align 8 %e = alloca double, align 8 %f = alloca double, align 8 + %m = alloca double, align 8 %xeb = alloca i32, align 4 %abcd = alloca [3 x i32], align 4 %sy = alloca i32, align 4 @@ -34,24 +36,27 @@ entry: %4 = load double, double* %d, align 8 %5 = call double @llvm.fma.f64(double %4, double 2.000000e+00, double 3.000000e+00) store double %5, double* %f, align 8 + %6 = load double, double* %d, align 8 + %7 = call double @llvm.fmuladd.f64(double %6, double 2.000000e+00, double 3.000000e+00) + store double %7, double* %m, align 8 store i32 13, i32* %xeb, align 4 - %6 = getelementptr inbounds [3 x i32], [3 x i32]* %abcd, i64 0, i64 0 - store i32 0, i32* %6, align 4 - %7 = getelementptr inbounds [3 x i32], [3 x i32]* %abcd, i64 0, i64 1 - store i32 0, i32* %7, align 4 - %8 = getelementptr inbounds [3 x i32], [3 x i32]* %abcd, i64 0, i64 2 + %8 = getelementptr inbounds [3 x i32], [3 x i32]* %abcd, i64 0, i64 0 store i32 0, i32* %8, align 4 - %9 = load volatile i32, i32* %xeb, align 4 - store i32 %9, i32* %sy, align 4 - %10 = load i32, i32* %sy, align 4 - %add = add i32 %10, 1 - store volatile i32 %add, i32* %xeb, align 4 - %11 = getelementptr inbounds [3 x i32], [3 x i32]* %abcd, i64 0, i64 2 + %9 = getelementptr inbounds [3 x i32], [3 x i32]* %abcd, i64 0, i64 1 + store i32 0, i32* %9, align 4 + %10 = getelementptr inbounds [3 x i32], [3 x i32]* %abcd, i64 0, i64 2 + store i32 0, i32* %10, align 4 + %11 = load volatile i32, i32* %xeb, align 4 + store i32 %11, i32* %sy, align 4 %12 = load i32, i32* %sy, align 4 - %add1 = add i32 %12, 2 - store volatile i32 %add1, i32* %11, align 4 + %add = add i32 %12, 1 + store volatile i32 %add, i32* %xeb, align 4 %13 = getelementptr inbounds [3 x i32], [3 x i32]* %abcd, i64 0, i64 2 - %14 = load volatile i32, i32* %13, align 4 - store i32 %14, i32* %sy, align 4 + %14 = load i32, i32* %sy, align 4 + %add1 = add i32 %14, 2 + store volatile i32 %add1, i32* %13, align 4 + %15 = getelementptr inbounds [3 x i32], [3 x i32]* %abcd, i64 0, i64 2 + %16 = load volatile i32, i32* %15, align 4 + store i32 %16, i32* %sy, align 4 ret i32 1 } diff --git a/test/test_suite2/builtins/simple_builtins.c3t b/test/test_suite2/builtins/simple_builtins.c3t index 0f0278cb1..3cb43eba7 100644 --- a/test/test_suite2/builtins/simple_builtins.c3t +++ b/test/test_suite2/builtins/simple_builtins.c3t @@ -6,6 +6,7 @@ fn int foo(double b) double d = $$ceil(b); double e = $$max(1.0, d); double f = $$fma(d, 2.0, 3.0); + double m = $$muladd(d, 2.0, 3.0); int xeb = 13; int[3] abcd; @@ -23,6 +24,7 @@ entry: %d = alloca double, align 8 %e = alloca double, align 8 %f = alloca double, align 8 + %m = alloca double, align 8 %xeb = alloca i32, align 4 %abcd = alloca [3 x i32], align 4 %sy = alloca i32, align 4 @@ -34,24 +36,27 @@ entry: %4 = load double, ptr %d, align 8 %5 = call double @llvm.fma.f64(double %4, double 2.000000e+00, double 3.000000e+00) store double %5, ptr %f, align 8 + %6 = load double, ptr %d, align 8 + %7 = call double @llvm.fmuladd.f64(double %6, double 2.000000e+00, double 3.000000e+00) + store double %7, ptr %m, align 8 store i32 13, ptr %xeb, align 4 - %6 = getelementptr inbounds [3 x i32], ptr %abcd, i64 0, i64 0 - store i32 0, ptr %6, align 4 - %7 = getelementptr inbounds [3 x i32], ptr %abcd, i64 0, i64 1 - store i32 0, ptr %7, align 4 - %8 = getelementptr inbounds [3 x i32], ptr %abcd, i64 0, i64 2 + %8 = getelementptr inbounds [3 x i32], ptr %abcd, i64 0, i64 0 store i32 0, ptr %8, align 4 - %9 = load volatile i32, ptr %xeb, align 4 - store i32 %9, ptr %sy, align 4 - %10 = load i32, ptr %sy, align 4 - %add = add i32 %10, 1 - store volatile i32 %add, ptr %xeb, align 4 - %11 = getelementptr inbounds [3 x i32], ptr %abcd, i64 0, i64 2 + %9 = getelementptr inbounds [3 x i32], ptr %abcd, i64 0, i64 1 + store i32 0, ptr %9, align 4 + %10 = getelementptr inbounds [3 x i32], ptr %abcd, i64 0, i64 2 + store i32 0, ptr %10, align 4 + %11 = load volatile i32, ptr %xeb, align 4 + store i32 %11, ptr %sy, align 4 %12 = load i32, ptr %sy, align 4 - %add1 = add i32 %12, 2 - store volatile i32 %add1, ptr %11, align 4 + %add = add i32 %12, 1 + store volatile i32 %add, ptr %xeb, align 4 %13 = getelementptr inbounds [3 x i32], ptr %abcd, i64 0, i64 2 - %14 = load volatile i32, ptr %13, align 4 - store i32 %14, ptr %sy, align 4 + %14 = load i32, ptr %sy, align 4 + %add1 = add i32 %14, 2 + store volatile i32 %add1, ptr %13, align 4 + %15 = getelementptr inbounds [3 x i32], ptr %abcd, i64 0, i64 2 + %16 = load volatile i32, ptr %15, align 4 + store i32 %16, ptr %sy, align 4 ret i32 1 }