From 4fa4b2a63103ed8a8657b641c9e79f1396b0e20f Mon Sep 17 00:00:00 2001 From: Christoffer Lerno Date: Mon, 19 Sep 2022 14:49:01 +0200 Subject: [PATCH] Refactored builtins. Added reduce operations and powi. Version bump. --- CMakeLists.txt | 2 +- lib/std/core/builtin.c3 | 4 + src/compiler/enums.h | 10 + src/compiler/llvm_codegen.c | 7 + src/compiler/llvm_codegen_builtins.c | 526 ++++++++++++++++++ src/compiler/llvm_codegen_expr.c | 405 +------------- src/compiler/llvm_codegen_internal.h | 8 + src/compiler/sema_expr.c | 57 +- src/compiler/symtab.c | 10 + src/version.h | 2 +- .../builtins/reduce_arithmetics.c3t | 43 ++ .../builtins/reduce_arithmetics.c3t | 43 ++ 12 files changed, 710 insertions(+), 407 deletions(-) create mode 100644 src/compiler/llvm_codegen_builtins.c create mode 100644 test/test_suite/builtins/reduce_arithmetics.c3t create mode 100644 test/test_suite2/builtins/reduce_arithmetics.c3t diff --git a/CMakeLists.txt b/CMakeLists.txt index b2fadeb08..c943a2972 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -254,7 +254,7 @@ add_executable(c3c src/compiler/tilde_codegen_type.c src/compiler/windows_support.c src/compiler/codegen_asm.c - src/compiler/asm_target.c) + src/compiler/asm_target.c src/compiler/llvm_codegen_builtins.c) target_include_directories(c3c PRIVATE diff --git a/lib/std/core/builtin.c3 b/lib/std/core/builtin.c3 index 244a8f877..826aff83a 100644 --- a/lib/std/core/builtin.c3 +++ b/lib/std/core/builtin.c3 @@ -114,6 +114,10 @@ macro enum_by_name($Type, char[] enum_name) @builtin return SearchResult.MISSING!; } +/** + * Locality for prefetch, levels 0 - 3, corresponding + * to "extremely local" to "no locality" + **/ enum PrefetchLocality { NO_LOCALITY, diff --git a/src/compiler/enums.h b/src/compiler/enums.h index 41e107c4d..c3bf2e653 100644 --- a/src/compiler/enums.h +++ b/src/compiler/enums.h @@ -816,7 +816,17 @@ typedef enum BUILTIN_NEARBYINT, BUILTIN_POPCOUNT, BUILTIN_POW, + BUILTIN_POW_INT, BUILTIN_PREFETCH, + BUILTIN_REDUCE_ADD, + BUILTIN_REDUCE_AND, + BUILTIN_REDUCE_FADD, + BUILTIN_REDUCE_FMUL, + BUILTIN_REDUCE_MUL, + BUILTIN_REDUCE_OR, + BUILTIN_REDUCE_XOR, + BUILTIN_REDUCE_MAX, + BUILTIN_REDUCE_MIN, BUILTIN_REVERSE, BUILTIN_RINT, BUILTIN_ROUND, diff --git a/src/compiler/llvm_codegen.c b/src/compiler/llvm_codegen.c index 544b63069..455da5319 100644 --- a/src/compiler/llvm_codegen.c +++ b/src/compiler/llvm_codegen.c @@ -661,6 +661,13 @@ void llvm_codegen_setup() intrinsic_id.vector_reduce_smin = lookup_intrinsic("llvm.vector.reduce.smin"); intrinsic_id.vector_reduce_umax = lookup_intrinsic("llvm.vector.reduce.umax"); intrinsic_id.vector_reduce_umin = lookup_intrinsic("llvm.vector.reduce.umin"); + intrinsic_id.vector_reduce_add = lookup_intrinsic("llvm.vector.reduce.add"); + intrinsic_id.vector_reduce_fadd = lookup_intrinsic("llvm.vector.reduce.fadd"); + intrinsic_id.vector_reduce_mul = lookup_intrinsic("llvm.vector.reduce.mul"); + intrinsic_id.vector_reduce_fmul = lookup_intrinsic("llvm.vector.reduce.fmul"); + intrinsic_id.vector_reduce_and = lookup_intrinsic("llvm.vector.reduce.and"); + intrinsic_id.vector_reduce_or = lookup_intrinsic("llvm.vector.reduce.or"); + intrinsic_id.vector_reduce_xor = lookup_intrinsic("llvm.vector.reduce.xor"); attribute_id.align = lookup_attribute("align"); attribute_id.alwaysinline = lookup_attribute("alwaysinline"); diff --git a/src/compiler/llvm_codegen_builtins.c b/src/compiler/llvm_codegen_builtins.c new file mode 100644 index 000000000..51c4b065d --- /dev/null +++ b/src/compiler/llvm_codegen_builtins.c @@ -0,0 +1,526 @@ +// Copyright (c) 2022 Christoffer Lerno. All rights reserved. +// Use of this source code is governed by a LGPLv3.0 +// a copy of which can be found in the LICENSE file. + +#include "llvm_codegen_internal.h" + +INLINE void llvm_emit_reverse(GenContext *c, BEValue *result_value, Expr *expr) +{ + Expr **args = expr->call_expr.arguments; + llvm_emit_expr(c, result_value, args[0]); + llvm_value_rvalue(c, result_value); + Type *rtype = result_value->type; + LLVMValueRef arg1 = result_value->value; + LLVMValueRef arg2 = LLVMGetPoison(LLVMTypeOf(arg1)); + LLVMValueRef buff[128]; + unsigned elements = rtype->array.len; + LLVMValueRef *mask_element = elements > 128 ? MALLOC(sizeof(LLVMValueRef)) : buff; + LLVMTypeRef mask_element_type = llvm_get_type(c, type_int); + for (unsigned i = 0; i < elements; i++) + { + mask_element[i] = LLVMConstInt(mask_element_type, elements - i - 1, false); + } + LLVMValueRef mask = LLVMConstVector(mask_element, elements); + llvm_value_set(result_value, LLVMBuildShuffleVector(c->builder, arg1, arg2, mask, "reverse"), rtype); +} + +INLINE void llvm_emit_shufflevector(GenContext *c, BEValue *result_value, Expr *expr) +{ + Expr **args = expr->call_expr.arguments; + unsigned count = vec_size(args); + LLVMValueRef arg1; + LLVMValueRef arg2; + LLVMValueRef mask; + llvm_emit_expr(c, result_value, args[0]); + llvm_value_rvalue(c, result_value); + Type *rtype = result_value->type; + arg1 = result_value->value; + llvm_emit_expr(c, result_value, args[count - 1]); + llvm_value_rvalue(c, result_value); + mask = result_value->value; + assert(LLVMIsConstant(mask)); + if (count == 2) + { + arg2 = LLVMGetPoison(LLVMTypeOf(arg1)); + } + else + { + llvm_emit_expr(c, result_value, args[1]); + llvm_value_rvalue(c, result_value); + arg2 = result_value->value; + } + LLVMValueRef val = LLVMBuildShuffleVector(c->builder, arg1, arg2, mask, "shuffle"); + llvm_value_set(result_value, val, rtype); + return; +} + +INLINE void llvm_emit_unreachable(GenContext *c, BEValue *result_value, Expr *expr) +{ + llvm_value_set(result_value, LLVMBuildUnreachable(c->builder), type_void); + c->current_block = NULL; + c->current_block_is_target = false; + LLVMBasicBlockRef after_unreachable = llvm_basic_block_new(c, "after.unreachable"); + llvm_emit_block(c, after_unreachable); +} + +INLINE void llvm_emit_stacktrace(GenContext *c, BEValue *result_value, Expr *expr) +{ + if (!c->debug.enable_stacktrace) + { + llvm_value_set(result_value, llvm_get_zero(c, type_voidptr), type_voidptr); + return; + } + llvm_value_set(result_value, llvm_emit_bitcast(c, c->debug.stack_slot, type_voidptr), type_voidptr); +} + +INLINE void llvm_emit_volatile_store(GenContext *c, BEValue *result_value, Expr *expr) +{ + BEValue value; + llvm_emit_expr(c, &value, expr->call_expr.arguments[0]); + llvm_emit_expr(c, result_value, expr->call_expr.arguments[1]); + llvm_value_rvalue(c, &value); + value.kind = BE_ADDRESS; + BEValue store_value = *result_value; + LLVMValueRef store = llvm_store(c, &value, &store_value); + if (store) LLVMSetVolatile(store, true); +} + +INLINE void llvm_emit_volatile_load(GenContext *c, BEValue *result_value, Expr *expr) +{ + llvm_emit_expr(c, result_value, expr->call_expr.arguments[0]); + llvm_value_rvalue(c, result_value); + result_value->kind = BE_ADDRESS; + result_value->type = type_lowering(result_value->type->pointer); + llvm_value_rvalue(c, result_value); + LLVMSetVolatile(result_value->value, true); +} + +static inline LLVMValueRef llvm_syscall_asm(GenContext *c, LLVMTypeRef func_type, char *call) +{ + return LLVMGetInlineAsm(func_type, call, strlen(call), + scratch_buffer_to_string(), scratch_buffer.len, + true, true, LLVMInlineAsmDialectATT, /* can throw */ false); +} + +static inline void llvm_syscall_write_regs_to_scratch(const char** registers, unsigned args) +{ + for (unsigned i = 0; i < args; i++) + { + scratch_buffer_append(",{"); + scratch_buffer_append(registers[i]); + scratch_buffer_append("}"); + } +} + +static inline void llvm_emit_syscall(GenContext *c, BEValue *be_value, Expr *expr) +{ + unsigned arguments = vec_size(expr->call_expr.arguments); + assert(arguments < 10 && "Only has room for 10"); + LLVMValueRef arg_results[10]; + LLVMTypeRef arg_types[10]; + Expr **args = expr->call_expr.arguments; + LLVMTypeRef type = llvm_get_type(c, type_uptr); + for (unsigned i = 0; i < arguments; i++) + { + llvm_emit_expr(c, be_value, args[i]); + llvm_value_rvalue(c, be_value); + arg_results[i] = be_value->value; + arg_types[i] = type; + } + LLVMTypeRef func_type = LLVMFunctionType(type, arg_types, arguments, false); + scratch_buffer_clear(); + LLVMValueRef inline_asm; + switch (platform_target.arch) + { + case ARCH_TYPE_AARCH64: + case ARCH_TYPE_AARCH64_BE: + scratch_buffer_append("={x0}"); + assert(arguments < 8); + if (os_is_apple(platform_target.os)) + { + static char const *regs[] = { "x16", "x0", "x1", "x2", "x3", "x4", "x5" }; + llvm_syscall_write_regs_to_scratch(regs, arguments); + } + else + { + static char const *regs[] = { "x8", "x0", "x1", "x2", "x3", "x4", "x5" }; + llvm_syscall_write_regs_to_scratch(regs, arguments); + } + inline_asm = llvm_syscall_asm(c, func_type, "svc #0x80"); + break; + case ARCH_TYPE_X86: + { + scratch_buffer_append("={eax}"); + assert(arguments < 8); + static char const *regs[] = { "eax", "ebx", "ecx", "edx", "esi", "edi" }; + llvm_syscall_write_regs_to_scratch(regs, arguments < 6 ? arguments : 6); + if (arguments == 7) + { + scratch_buffer_append(",rm"); + char *asm_str = "push %[arg6]\npush %%ebp\nmov 4(%%esp), %%ebp\nint $0x80\npop %%ebp\nadd $4, %%esp"; + inline_asm = llvm_syscall_asm(c, func_type, asm_str); + break; + } + inline_asm = llvm_syscall_asm(c, func_type, "int $0x80"); + break; + } + case ARCH_TYPE_X86_64: + scratch_buffer_append("={rax}"); + assert(arguments < 8); + { + static char const *regs[] = { "rax", "rdi", "rsi", "rdx", "r10", "r8", "r9" }; + llvm_syscall_write_regs_to_scratch(regs, arguments); + } + // Check clobbers on different OSes + scratch_buffer_append(",~{rcx},~{r11},~{memory}"); + inline_asm = llvm_syscall_asm(c, func_type, "syscall"); + break; + case ARCH_UNSUPPORTED: + default: + UNREACHABLE + } + LLVMValueRef result = LLVMBuildCall2(c->builder, func_type, inline_asm, arg_results, arguments, "syscall"); + llvm_value_set(be_value, result, type_uptr); +} + +INLINE unsigned llvm_intrinsic_by_type(Type *type, unsigned int_intrinsic, unsigned uint_intrinsic, unsigned float_intrinsic) +{ + type = type_flatten(type); + RETRY: + switch (type->type_kind) + { + case ALL_SIGNED_INTS: + return int_intrinsic; + case TYPE_BOOL: + case ALL_UNSIGNED_INTS: + return uint_intrinsic; + case ALL_FLOATS: + return float_intrinsic; + case TYPE_VECTOR: + type = type->array.base; + goto RETRY; + default: + UNREACHABLE + } +} + +INLINE void llvm_emit_intrinsic_args(GenContext *c, Expr **args, LLVMValueRef *slots, unsigned count) +{ + BEValue be_value; + for (unsigned i = 0; i < count; i++) + { + llvm_emit_expr(c, &be_value, args[i]); + llvm_value_rvalue(c, &be_value); + slots[i] = be_value.value; + } +} + +INLINE void llvm_emit_memcpy_builtin(GenContext *c, BEValue *be_value, Expr *expr) +{ + Expr **args = expr->call_expr.arguments; + LLVMValueRef arg_slots[4]; + llvm_emit_intrinsic_args(c, args, arg_slots, 4); + LLVMTypeRef call_type[3]; + call_type[0] = call_type[1] = llvm_get_type(c, type_voidptr); + call_type[2] = llvm_get_type(c, type_usize); + LLVMValueRef result = llvm_emit_call_intrinsic(c, intrinsic_id.memcpy, call_type, 3, arg_slots, 4); + assert(args[4]->const_expr.const_kind == CONST_INTEGER); + assert(args[5]->const_expr.const_kind == CONST_INTEGER); + uint64_t dst_align = int_to_u64(args[4]->const_expr.ixx); + uint64_t src_align = int_to_u64(args[5]->const_expr.ixx); + if (dst_align > 0) llvm_attribute_add_call(c, result, attribute_id.align, 1, dst_align); + if (src_align > 0) llvm_attribute_add_call(c, result, attribute_id.align, 2, src_align); + llvm_value_set(be_value, result, type_void); +} + +INLINE void llvm_emit_memset_builtin(GenContext *c, BEValue *be_value, Expr *expr) +{ + Expr **args = expr->call_expr.arguments; + LLVMValueRef arg_slots[4]; + llvm_emit_intrinsic_args(c, args, arg_slots, 4); + LLVMTypeRef call_type[2] = { llvm_get_type(c, type_voidptr), llvm_get_type(c, type_usize) }; + LLVMValueRef result = llvm_emit_call_intrinsic(c, intrinsic_id.memset, call_type, 2, arg_slots, 4); + assert(args[4]->const_expr.const_kind == CONST_INTEGER); + uint64_t dst_align = int_to_u64(args[4]->const_expr.ixx); + if (dst_align > 0) llvm_attribute_add_call(c, result, attribute_id.align, 1, dst_align); + llvm_value_set(be_value, result, type_void); +} + +INLINE void llvm_emit_prefetch(GenContext *c, BEValue *be_value, Expr *expr) +{ + Expr **args = expr->call_expr.arguments; + LLVMValueRef arg_slots[4]; + llvm_emit_intrinsic_args(c, args, arg_slots, 3); + arg_slots[3] = llvm_const_int(c, type_int, 1); + LLVMTypeRef call_type[1] = { llvm_get_type(c, type_voidptr) }; + LLVMValueRef result = llvm_emit_call_intrinsic(c, intrinsic_id.prefetch, call_type, 1, arg_slots, 4); + llvm_value_set(be_value, result, type_void); +} + +void llvm_emit_reduce_int_builtin(GenContext *c, unsigned intrinsic, BEValue *be_value, Expr *expr) +{ + Expr **args = expr->call_expr.arguments; + LLVMValueRef arg_slots[1]; + llvm_emit_intrinsic_args(c, args, arg_slots, 1); + LLVMTypeRef call_type[1] = { LLVMTypeOf(arg_slots[0]) }; + LLVMValueRef result = llvm_emit_call_intrinsic(c, intrinsic, call_type, 1, arg_slots, 1); + llvm_value_set(be_value, result, expr->type); +} + +void llvm_emit_reduce_float_builtin(GenContext *c, unsigned intrinsic, BEValue *be_value, Expr *expr) +{ + Expr **args = expr->call_expr.arguments; + LLVMValueRef arg_slots[2]; + llvm_emit_intrinsic_args(c, args, arg_slots, 2); + LLVMTypeRef call_type[1] = { LLVMTypeOf(arg_slots[1]) }; + LLVMValueRef result = llvm_emit_call_intrinsic(c, intrinsic, call_type, 1, arg_slots, 2); + llvm_value_set(be_value, result, expr->type); +} + +void llvm_emit_int_with_bool_builtin(GenContext *c, unsigned intrinsic, BEValue *be_value, Expr *expr, bool bool_val) +{ + Expr **args = expr->call_expr.arguments; + LLVMValueRef arg_slots[2]; + llvm_emit_intrinsic_args(c, args, arg_slots, 1); + arg_slots[1] = llvm_get_zero_raw(c->bool_type); + LLVMTypeRef call_type[1] = { LLVMTypeOf(arg_slots[1]) }; + LLVMValueRef result = llvm_emit_call_intrinsic(c, intrinsic, call_type, 1, arg_slots, 2); + llvm_value_set(be_value, result, expr->type); +} + +void llvm_emit_pow_int_builtin(GenContext *c, BEValue *be_value, Expr *expr) +{ + Expr **args = expr->call_expr.arguments; + LLVMValueRef arg_slots[2]; + llvm_emit_intrinsic_args(c, args, arg_slots, 2); + LLVMTypeRef call_type[2] = { LLVMTypeOf(arg_slots[0]), LLVMTypeOf(arg_slots[1]) }; + LLVMValueRef result = llvm_emit_call_intrinsic(c, intrinsic_id.powi, call_type, 2, arg_slots, 2); + llvm_value_set(be_value, result, expr->type); +} + +void llvm_emit_3_variant_builtin(GenContext *c, BEValue *be_value, Expr *expr, unsigned sid, unsigned uid, unsigned fid) +{ + Expr **args = expr->call_expr.arguments; + unsigned count = vec_size(args); + assert(count <= 3); + LLVMValueRef arg_slots[3]; + unsigned intrinsic = llvm_intrinsic_by_type(args[0]->type, sid, uid, fid); + llvm_emit_intrinsic_args(c, args, arg_slots, count); + LLVMTypeRef call_type[1] = { LLVMTypeOf(arg_slots[0]) }; + LLVMValueRef result = llvm_emit_call_intrinsic(c, intrinsic, call_type, 1, arg_slots, count); + llvm_value_set(be_value, result, expr->type); +} + +void llvm_emit_abs_builtin(GenContext *c, BEValue *be_value, Expr *expr) +{ + Expr **args = expr->call_expr.arguments; + LLVMValueRef arg_slots[2]; + llvm_emit_intrinsic_args(c, args, arg_slots, 1); + unsigned intrinsic = llvm_intrinsic_by_type(args[0]->type, intrinsic_id.abs, intrinsic_id.abs, intrinsic_id.fabs); + LLVMTypeRef call_type[1] = { LLVMTypeOf(arg_slots[0]) }; + LLVMValueRef result; + if (intrinsic == intrinsic_id.abs) + { + arg_slots[1] = llvm_get_zero_raw(c->bool_type); + result = llvm_emit_call_intrinsic(c, intrinsic, call_type, 1, arg_slots, 2); + } + else + { + result = llvm_emit_call_intrinsic(c, intrinsic, call_type, 1, arg_slots, 1); + } + llvm_value_set(be_value, result, expr->type); +} + +void llvm_emit_simple_builtin(GenContext *c, BEValue *be_value, Expr *expr, unsigned intrinsic) +{ + Expr **args = expr->call_expr.arguments; + unsigned count = vec_size(args); + assert(count <= 3); + LLVMValueRef arg_slots[3]; + llvm_emit_intrinsic_args(c, args, arg_slots, count); + LLVMTypeRef call_type[1] = { LLVMTypeOf(arg_slots[0]) }; + LLVMValueRef result = llvm_emit_call_intrinsic(c, intrinsic, call_type, 1, arg_slots, count); + llvm_value_set(be_value, result, expr->type); +} + +void llvm_emit_builtin_call(GenContext *c, BEValue *result_value, Expr *expr) +{ + BuiltinFunction func = exprptr(expr->call_expr.function)->builtin_expr.builtin; + unsigned intrinsic; + LLVMValueRef val = NULL; + switch (func) + { + case BUILTIN_UNREACHABLE: + llvm_emit_unreachable(c, result_value, expr); + return; + case BUILTIN_SHUFFLEVECTOR: + llvm_emit_shufflevector(c, result_value, expr); + return; + case BUILTIN_REVERSE: + llvm_emit_reverse(c, result_value, expr); + return; + case BUILTIN_STACKTRACE: + llvm_emit_stacktrace(c, result_value, expr); + return; + case BUILTIN_VOLATILE_STORE: + llvm_emit_volatile_store(c, result_value, expr); + return; + case BUILTIN_VOLATILE_LOAD: + llvm_emit_volatile_load(c, result_value, expr); + return; + case BUILTIN_SYSCALL: + llvm_emit_syscall(c, result_value, expr); + return; + case BUILTIN_MEMCOPY: + llvm_emit_memcpy_builtin(c, result_value, expr); + return; + case BUILTIN_MEMSET: + llvm_emit_memset_builtin(c, result_value, expr); + return; + case BUILTIN_SYSCLOCK: + llvm_value_set(result_value, llvm_emit_call_intrinsic(c, intrinsic_id.readcyclecounter, NULL, 0, NULL, 0), expr->type); + return; + case BUILTIN_TRAP: + llvm_value_set(result_value, llvm_emit_call_intrinsic(c, intrinsic_id.trap, NULL, 0, NULL, 0), type_void); + return; + case BUILTIN_PREFETCH: + llvm_emit_prefetch(c, result_value, expr); + return; + case BUILTIN_REDUCE_AND: + llvm_emit_reduce_int_builtin(c, intrinsic_id.vector_reduce_and, result_value, expr); + return; + case BUILTIN_REDUCE_OR: + llvm_emit_reduce_int_builtin(c, intrinsic_id.vector_reduce_or, result_value, expr); + return; + case BUILTIN_REDUCE_MIN: + llvm_emit_3_variant_builtin(c, result_value, expr, intrinsic_id.vector_reduce_smin, intrinsic_id.vector_reduce_umin, intrinsic_id.vector_reduce_fmin); + return; + case BUILTIN_REDUCE_MAX: + llvm_emit_3_variant_builtin(c, result_value, expr, intrinsic_id.vector_reduce_smax, intrinsic_id.vector_reduce_umax, intrinsic_id.vector_reduce_fmax); + return; + case BUILTIN_REDUCE_XOR: + llvm_emit_reduce_int_builtin(c, intrinsic_id.vector_reduce_xor, result_value, expr); + return; + case BUILTIN_REDUCE_ADD: + llvm_emit_reduce_int_builtin(c, intrinsic_id.vector_reduce_add, result_value, expr); + return; + case BUILTIN_REDUCE_MUL: + llvm_emit_reduce_int_builtin(c, intrinsic_id.vector_reduce_mul, result_value, expr); + return; + case BUILTIN_REDUCE_FADD: + llvm_emit_reduce_float_builtin(c, intrinsic_id.vector_reduce_fadd, result_value, expr); + return; + case BUILTIN_REDUCE_FMUL: + llvm_emit_reduce_float_builtin(c, intrinsic_id.vector_reduce_fmul, result_value, expr); + return; + case BUILTIN_CTTZ: + llvm_emit_int_with_bool_builtin(c, intrinsic_id.cttz, result_value, expr, false); + return; + case BUILTIN_CTLZ: + llvm_emit_int_with_bool_builtin(c, intrinsic_id.ctlz, result_value, expr, false); + return; + case BUILTIN_MAX: + llvm_emit_3_variant_builtin(c, result_value, expr, intrinsic_id.smax, intrinsic_id.umax, intrinsic_id.maxnum); + return; + case BUILTIN_MIN: + llvm_emit_3_variant_builtin(c, result_value, expr, intrinsic_id.smin, intrinsic_id.umin, intrinsic_id.minnum); + return; + case BUILTIN_SAT_SHL: + llvm_emit_3_variant_builtin(c, result_value, expr, intrinsic_id.sshl_sat, intrinsic_id.ushl_sat, 0); + return; + case BUILTIN_SAT_ADD: + llvm_emit_3_variant_builtin(c, result_value, expr, intrinsic_id.sadd_sat, intrinsic_id.uadd_sat, 0); + return; + case BUILTIN_SAT_SUB: + llvm_emit_3_variant_builtin(c, result_value, expr, intrinsic_id.ssub_sat, intrinsic_id.usub_sat, 0); + return; + case BUILTIN_ABS: + llvm_emit_abs_builtin(c, result_value, expr); + return; + case BUILTIN_POW_INT: + llvm_emit_pow_int_builtin(c, result_value, expr); + return; + case BUILTIN_BITREVERSE: + llvm_emit_simple_builtin(c, result_value, expr, intrinsic_id.bitreverse); + return; + case BUILTIN_BSWAP: + llvm_emit_simple_builtin(c, result_value, expr, intrinsic_id.bswap); + return; + case BUILTIN_CEIL: + llvm_emit_simple_builtin(c, result_value, expr, intrinsic_id.ceil); + return; + case BUILTIN_COS: + llvm_emit_simple_builtin(c, result_value, expr, intrinsic_id.cos); + return; + case BUILTIN_COPYSIGN: + llvm_emit_simple_builtin(c, result_value, expr, intrinsic_id.copysign); + return; + case BUILTIN_FLOOR: + llvm_emit_simple_builtin(c, result_value, expr, intrinsic_id.floor); + return; + case BUILTIN_EXP: + llvm_emit_simple_builtin(c, result_value, expr, intrinsic_id.exp); + return; + case BUILTIN_EXP2: + llvm_emit_simple_builtin(c, result_value, expr, intrinsic_id.exp2); + return; + case BUILTIN_FMA: + llvm_emit_simple_builtin(c, result_value, expr, intrinsic_id.fma); + return; + case BUILTIN_FSHL: + llvm_emit_simple_builtin(c, result_value, expr, intrinsic_id.fshl); + return; + case BUILTIN_FSHR: + llvm_emit_simple_builtin(c, result_value, expr, intrinsic_id.fshr); + return; + case BUILTIN_LOG: + llvm_emit_simple_builtin(c, result_value, expr, intrinsic_id.log); + return; + case BUILTIN_LOG2: + llvm_emit_simple_builtin(c, result_value, expr, intrinsic_id.log2); + return; + case BUILTIN_LOG10: + llvm_emit_simple_builtin(c, result_value, expr, intrinsic_id.log10); + return; + case BUILTIN_POW: + llvm_emit_simple_builtin(c, result_value, expr, intrinsic_id.pow); + return; + case BUILTIN_NEARBYINT: + llvm_emit_simple_builtin(c, result_value, expr, intrinsic_id.nearbyint); + return; + case BUILTIN_POPCOUNT: + llvm_emit_simple_builtin(c, result_value, expr, intrinsic_id.ctpop); + return; + case BUILTIN_RINT: + llvm_emit_simple_builtin(c, result_value, expr, intrinsic_id.rint); + return; + case BUILTIN_ROUND: + llvm_emit_simple_builtin(c, result_value, expr, intrinsic_id.round); + return; + case BUILTIN_ROUNDEVEN: + llvm_emit_simple_builtin(c, result_value, expr, intrinsic_id.roundeven); + return; + case BUILTIN_SIN: + llvm_emit_simple_builtin(c, result_value, expr, intrinsic_id.sin); + return; + case BUILTIN_SQRT: + llvm_emit_simple_builtin(c, result_value, expr, intrinsic_id.sqrt); + return; + case BUILTIN_TRUNC: + llvm_emit_simple_builtin(c, result_value, expr, intrinsic_id.trunc); + return; + case BUILTIN_LRINT: + llvm_emit_simple_builtin(c, result_value, expr, intrinsic_id.lrint); + return; + case BUILTIN_LROUND: + llvm_emit_simple_builtin(c, result_value, expr, intrinsic_id.lround); + return; + case BUILTIN_LLRINT: + TODO + case BUILTIN_LLROUND: + TODO + case BUILTIN_NONE: + UNREACHABLE + } + UNREACHABLE +} diff --git a/src/compiler/llvm_codegen_expr.c b/src/compiler/llvm_codegen_expr.c index 89802e3ab..eeeaabc9a 100644 --- a/src/compiler/llvm_codegen_expr.c +++ b/src/compiler/llvm_codegen_expr.c @@ -4129,71 +4129,6 @@ void llvm_value_struct_gep(GenContext *c, BEValue *element, BEValue *struct_poin element->alignment = alignment; } -static void llvm_emit_intrinsic_expr(GenContext *c, unsigned intrinsic, BEValue *be_value, Expr *expr) -{ - unsigned arguments = vec_size(expr->call_expr.arguments); - assert(arguments < 10 && "Only has room for 10"); - LLVMValueRef arg_results[10]; - if (intrinsic == intrinsic_id.memcpy) arguments -= 2; - if (intrinsic == intrinsic_id.memset) arguments--; - - Expr **args = expr->call_expr.arguments; - LLVMTypeRef call_type[3]; - int call_args = 0; - for (unsigned i = 0; i < arguments; i++) - { - llvm_emit_expr(c, be_value, args[i]); - llvm_value_rvalue(c, be_value); - arg_results[i] = be_value->value; - } - if (intrinsic == intrinsic_id.ctlz || intrinsic == intrinsic_id.cttz || intrinsic == intrinsic_id.abs) - { - arg_results[1] = llvm_get_zero_raw(c->bool_type); - arguments++; - } - else if (intrinsic == intrinsic_id.prefetch) - { - arg_results[arguments++] = llvm_const_int(c, type_int, 1); - call_args = 1; - call_type[0] = llvm_get_type(c, type_voidptr); - } - if (expr->type != type_void) - { - call_args = 1; - call_type[0] = llvm_get_type(c, expr->type); - if (intrinsic == intrinsic_id.readcyclecounter) call_args = 0; - } - else if (intrinsic == intrinsic_id.memcpy) - { - call_type[0] = call_type[1] = llvm_get_type(c, type_voidptr); - call_type[2] = llvm_get_type(c, type_usize); - call_args = 3; - } - else if (intrinsic == intrinsic_id.memset) - { - call_type[0] = llvm_get_type(c, type_voidptr); - call_type[1] = llvm_get_type(c, type_usize); - call_args = 2; - } - LLVMValueRef result = llvm_emit_call_intrinsic(c, intrinsic, call_type, call_args, arg_results, arguments); - llvm_value_set(be_value, result, expr->type); - if (intrinsic == intrinsic_id.memcpy) - { - assert(args[4]->const_expr.const_kind == CONST_INTEGER); - assert(args[5]->const_expr.const_kind == CONST_INTEGER); - uint64_t dst_align = int_to_u64(args[4]->const_expr.ixx); - uint64_t src_align = int_to_u64(args[5]->const_expr.ixx); - if (dst_align > 0) llvm_attribute_add_call(c, result, attribute_id.align, 1, dst_align); - if (src_align > 0) llvm_attribute_add_call(c, result, attribute_id.align, 2, src_align); - } - else if (intrinsic == intrinsic_id.memset) - { - assert(args[4]->const_expr.const_kind == CONST_INTEGER); - uint64_t dst_align = int_to_u64(args[4]->const_expr.ixx); - if (dst_align > 0) llvm_attribute_add_call(c, result, attribute_id.align, 1, dst_align); - } -} - void llvm_emit_parameter(GenContext *c, LLVMValueRef **args, ABIArgInfo *info, BEValue *be_value, Type *type) { @@ -4343,99 +4278,6 @@ static void llvm_emit_splatted_variadic_arg(GenContext *c, Expr *expr, BEValue * } } -unsigned llvm_get_intrinsic(BuiltinFunction func) -{ - switch (func) - { - case BUILTIN_NONE: - case BUILTIN_UNREACHABLE: - case BUILTIN_STACKTRACE: - case BUILTIN_ABS: - case BUILTIN_SHUFFLEVECTOR: - case BUILTIN_REVERSE: - case BUILTIN_SAT_ADD: - case BUILTIN_SAT_SHL: - case BUILTIN_SAT_SUB: - UNREACHABLE - case BUILTIN_SYSCLOCK: - return intrinsic_id.readcyclecounter; - case BUILTIN_TRAP: - return intrinsic_id.trap; - case BUILTIN_CEIL: - return intrinsic_id.ceil; - case BUILTIN_TRUNC: - return intrinsic_id.trunc; - case BUILTIN_SQRT: - return intrinsic_id.sqrt; - case BUILTIN_COS: - return intrinsic_id.cos; - case BUILTIN_SIN: - return intrinsic_id.sin; - case BUILTIN_LOG: - return intrinsic_id.log; - case BUILTIN_LOG10: - return intrinsic_id.log10; - case BUILTIN_MAX: - return intrinsic_id.maxnum; - case BUILTIN_MIN: - return intrinsic_id.minnum; - case BUILTIN_FMA: - return intrinsic_id.fma; - case BUILTIN_FSHL: - return intrinsic_id.fshl; - case BUILTIN_FSHR: - return intrinsic_id.fshr; - case BUILTIN_BITREVERSE: - return intrinsic_id.bitreverse; - case BUILTIN_BSWAP: - return intrinsic_id.bswap; - case BUILTIN_CTLZ: - return intrinsic_id.ctlz; - case BUILTIN_CTTZ: - return intrinsic_id.cttz; - case BUILTIN_POPCOUNT: - return intrinsic_id.ctpop; - case BUILTIN_LOG2: - return intrinsic_id.log2; - case BUILTIN_POW: - return intrinsic_id.pow; - case BUILTIN_PREFETCH: - return intrinsic_id.prefetch; - case BUILTIN_EXP: - return intrinsic_id.exp; - case BUILTIN_MEMCOPY: - return intrinsic_id.memcpy; - case BUILTIN_MEMSET: - return intrinsic_id.memset; - case BUILTIN_COPYSIGN: - return intrinsic_id.copysign; - case BUILTIN_EXP2: - return intrinsic_id.exp2; - case BUILTIN_FLOOR: - return intrinsic_id.floor; - case BUILTIN_LLRINT: - return intrinsic_id.llrint; - case BUILTIN_LLROUND: - return intrinsic_id.llround; - case BUILTIN_LRINT: - return intrinsic_id.lrint; - case BUILTIN_LROUND: - return intrinsic_id.lround; - case BUILTIN_NEARBYINT: - return intrinsic_id.nearbyint; - case BUILTIN_RINT: - return intrinsic_id.rint; - case BUILTIN_ROUND: - return intrinsic_id.round; - case BUILTIN_ROUNDEVEN: - return intrinsic_id.roundeven; - case BUILTIN_VOLATILE_STORE: - case BUILTIN_VOLATILE_LOAD: - case BUILTIN_SYSCALL: - UNREACHABLE - } - UNREACHABLE -} LLVMAtomicOrdering llvm_atomic_ordering(Atomicity atomicity) { @@ -4452,254 +4294,8 @@ LLVMAtomicOrdering llvm_atomic_ordering(Atomicity atomicity) UNREACHABLE } -static inline void llvm_syscall_write_regs_to_scratch(const char** registers, unsigned args) -{ - for (unsigned i = 0; i < args; i++) - { - scratch_buffer_append(",{"); - scratch_buffer_append(registers[i]); - scratch_buffer_append("}"); - } -} -static inline LLVMValueRef llvm_syscall_asm(GenContext *c, LLVMTypeRef func_type, char *call) -{ - return LLVMGetInlineAsm(func_type, call, strlen(call), - scratch_buffer_to_string(), scratch_buffer.len, - true, true, LLVMInlineAsmDialectATT, /* can throw */ false); -} -static inline void llvm_emit_syscall(GenContext *c, BEValue *be_value, Expr *expr) -{ - unsigned arguments = vec_size(expr->call_expr.arguments); - assert(arguments < 10 && "Only has room for 10"); - LLVMValueRef arg_results[10]; - LLVMTypeRef arg_types[10]; - Expr **args = expr->call_expr.arguments; - LLVMTypeRef type = llvm_get_type(c, type_uptr); - for (unsigned i = 0; i < arguments; i++) - { - llvm_emit_expr(c, be_value, args[i]); - llvm_value_rvalue(c, be_value); - arg_results[i] = be_value->value; - arg_types[i] = type; - } - LLVMTypeRef func_type = LLVMFunctionType(type, arg_types, arguments, false); - scratch_buffer_clear(); - LLVMValueRef inline_asm; - switch (platform_target.arch) - { - case ARCH_TYPE_AARCH64: - case ARCH_TYPE_AARCH64_BE: - scratch_buffer_append("={x0}"); - assert(arguments < 8); - if (os_is_apple(platform_target.os)) - { - static char const *regs[] = { "x16", "x0", "x1", "x2", "x3", "x4", "x5" }; - llvm_syscall_write_regs_to_scratch(regs, arguments); - } - else - { - static char const *regs[] = { "x8", "x0", "x1", "x2", "x3", "x4", "x5" }; - llvm_syscall_write_regs_to_scratch(regs, arguments); - } - inline_asm = llvm_syscall_asm(c, func_type, "svc #0x80"); - break; - case ARCH_TYPE_X86: - { - scratch_buffer_append("={eax}"); - assert(arguments < 8); - static char const *regs[] = { "eax", "ebx", "ecx", "edx", "esi", "edi" }; - llvm_syscall_write_regs_to_scratch(regs, arguments < 6 ? arguments : 6); - if (arguments == 7) - { - scratch_buffer_append(",rm"); - char *asm_str = "push %[arg6]\npush %%ebp\nmov 4(%%esp), %%ebp\nint $0x80\npop %%ebp\nadd $4, %%esp"; - inline_asm = llvm_syscall_asm(c, func_type, asm_str); - break; - } - inline_asm = llvm_syscall_asm(c, func_type, "int $0x80"); - break; - } - case ARCH_TYPE_X86_64: - scratch_buffer_append("={rax}"); - assert(arguments < 8); - { - static char const *regs[] = { "rax", "rdi", "rsi", "rdx", "r10", "r8", "r9" }; - llvm_syscall_write_regs_to_scratch(regs, arguments); - } - // Check clobbers on different OSes - scratch_buffer_append(",~{rcx},~{r11},~{memory}"); - inline_asm = llvm_syscall_asm(c, func_type, "syscall"); - break; - case ARCH_UNSUPPORTED: - default: - UNREACHABLE - } - LLVMValueRef result = LLVMBuildCall2(c->builder, func_type, inline_asm, arg_results, arguments, "syscall"); - llvm_value_set(be_value, result, type_uptr); -} - -INLINE void llvm_emit_shufflevector(GenContext *c, BEValue *result_value, Expr *expr) -{ - Expr **args = expr->call_expr.arguments; - unsigned count = vec_size(args); - LLVMValueRef arg1; - LLVMValueRef arg2; - LLVMValueRef mask; - llvm_emit_expr(c, result_value, args[0]); - llvm_value_rvalue(c, result_value); - Type *rtype = result_value->type; - arg1 = result_value->value; - llvm_emit_expr(c, result_value, args[count - 1]); - llvm_value_rvalue(c, result_value); - mask = result_value->value; - assert(LLVMIsConstant(mask)); - if (count == 2) - { - arg2 = LLVMGetPoison(LLVMTypeOf(arg1)); - } - else - { - llvm_emit_expr(c, result_value, args[1]); - llvm_value_rvalue(c, result_value); - arg2 = result_value->value; - } - LLVMValueRef val = LLVMBuildShuffleVector(c->builder, arg1, arg2, mask, "shuffle"); - llvm_value_set(result_value, val, rtype); - return; -} - -INLINE void llvm_emit_reverse(GenContext *c, BEValue *result_value, Expr *expr) -{ - Expr **args = expr->call_expr.arguments; - llvm_emit_expr(c, result_value, args[0]); - llvm_value_rvalue(c, result_value); - Type *rtype = result_value->type; - LLVMValueRef arg1 = result_value->value; - LLVMValueRef arg2 = LLVMGetPoison(LLVMTypeOf(arg1)); - LLVMValueRef buff[128]; - unsigned elements = rtype->array.len; - LLVMValueRef *mask_element = elements > 128 ? MALLOC(sizeof(LLVMValueRef)) : buff; - LLVMTypeRef mask_element_type = llvm_get_type(c, type_int); - for (unsigned i = 0; i < elements; i++) - { - mask_element[i] = LLVMConstInt(mask_element_type, elements - i - 1, false); - } - LLVMValueRef mask = LLVMConstVector(mask_element, elements); - llvm_value_set(result_value, LLVMBuildShuffleVector(c->builder, arg1, arg2, mask, "reverse"), rtype); -} - -INLINE unsigned llvm_intrinsic_by_type(Type *type, unsigned int_intrinsic, unsigned uint_intrinsic, unsigned float_intrinsic) -{ - type = type_flatten(type); - RETRY: - switch (type->type_kind) - { - case ALL_SIGNED_INTS: - return int_intrinsic; - case TYPE_BOOL: - case ALL_UNSIGNED_INTS: - return uint_intrinsic; - case ALL_FLOATS: - return float_intrinsic; - case TYPE_VECTOR: - type = type->array.base; - goto RETRY; - default: - UNREACHABLE - } -} -void llvm_emit_builtin_call(GenContext *c, BEValue *result_value, Expr *expr) -{ - BuiltinFunction func = exprptr(expr->call_expr.function)->builtin_expr.builtin; - unsigned intrinsic; - switch (func) - { - case BUILTIN_UNREACHABLE: - llvm_value_set(result_value, LLVMBuildUnreachable(c->builder), type_void); - c->current_block = NULL; - c->current_block_is_target = false; - LLVMBasicBlockRef after_unreachable = llvm_basic_block_new(c, "after.unreachable"); - llvm_emit_block(c, after_unreachable); - return; - case BUILTIN_SHUFFLEVECTOR: - llvm_emit_shufflevector(c, result_value, expr); - return; - case BUILTIN_REVERSE: - llvm_emit_reverse(c, result_value, expr); - return; - case BUILTIN_STACKTRACE: - if (!c->debug.enable_stacktrace) - { - llvm_value_set(result_value, llvm_get_zero(c, type_voidptr), type_voidptr); - return; - } - llvm_value_set(result_value, llvm_emit_bitcast(c, c->debug.stack_slot, type_voidptr), type_voidptr); - case BUILTIN_VOLATILE_STORE: - { - BEValue value; - llvm_emit_expr(c, &value, expr->call_expr.arguments[0]); - llvm_emit_expr(c, result_value, expr->call_expr.arguments[1]); - llvm_value_rvalue(c, &value); - value.kind = BE_ADDRESS; - BEValue store_value = *result_value; - LLVMValueRef store = llvm_store(c, &value, &store_value); - if (store) LLVMSetVolatile(store, true); - return; - } - case BUILTIN_VOLATILE_LOAD: - { - llvm_emit_expr(c, result_value, expr->call_expr.arguments[0]); - llvm_value_rvalue(c, result_value); - result_value->kind = BE_ADDRESS; - result_value->type = type_lowering(result_value->type->pointer); - llvm_value_rvalue(c, result_value); - LLVMSetVolatile(result_value->value, true); - return; - } - case BUILTIN_SYSCALL: - llvm_emit_syscall(c, result_value, expr); - return; - case BUILTIN_MAX: - intrinsic = llvm_intrinsic_by_type(expr->call_expr.arguments[0]->type, - intrinsic_id.smax, - intrinsic_id.umax, - intrinsic_id.maxnum); - break; - case BUILTIN_MIN: - intrinsic = llvm_intrinsic_by_type(expr->call_expr.arguments[0]->type, - intrinsic_id.smin, - intrinsic_id.umin, - intrinsic_id.minnum); - break; - case BUILTIN_ABS: - intrinsic = llvm_intrinsic_by_type(expr->call_expr.arguments[0]->type, - intrinsic_id.abs, - intrinsic_id.abs, - intrinsic_id.fabs); - break; - case BUILTIN_SAT_SHL: - intrinsic = llvm_intrinsic_by_type(expr->call_expr.arguments[0]->type, - intrinsic_id.sshl_sat, - intrinsic_id.ushl_sat, 0); - break; - case BUILTIN_SAT_ADD: - intrinsic = llvm_intrinsic_by_type(expr->call_expr.arguments[0]->type, - intrinsic_id.sadd_sat, - intrinsic_id.uadd_sat, 0); - break; - case BUILTIN_SAT_SUB: - intrinsic = llvm_intrinsic_by_type(expr->call_expr.arguments[0]->type, - intrinsic_id.ssub_sat, - intrinsic_id.usub_sat, 0); - break; - default: - intrinsic = llvm_get_intrinsic(func); - break; - } - llvm_emit_intrinsic_expr(c, intrinsic, result_value, expr); -} void llvm_add_abi_call_attributes(GenContext *c, LLVMValueRef call_value, int count, ABIArgInfo **infos) { @@ -4774,6 +4370,7 @@ static inline void llvm_emit_vararg_parameter(GenContext *c, BEValue *value, Typ llvm_store_raw(c, &pointer_addr, llvm_emit_bitcast_ptr(c, array_ref, pointee_type)); } + void llvm_emit_call_expr(GenContext *c, BEValue *result_value, Expr *expr) { diff --git a/src/compiler/llvm_codegen_internal.h b/src/compiler/llvm_codegen_internal.h index 78dd68868..79aead5f5 100644 --- a/src/compiler/llvm_codegen_internal.h +++ b/src/compiler/llvm_codegen_internal.h @@ -175,6 +175,13 @@ typedef struct unsigned vector_reduce_smin; unsigned vector_reduce_umax; unsigned vector_reduce_umin; + unsigned vector_reduce_add; + unsigned vector_reduce_fadd; + unsigned vector_reduce_mul; + unsigned vector_reduce_fmul; + unsigned vector_reduce_and; + unsigned vector_reduce_or; + unsigned vector_reduce_xor; } LLVMIntrinsics; extern LLVMIntrinsics intrinsic_id; @@ -423,6 +430,7 @@ LLVMValueRef llvm_emit_call_intrinsic(GenContext *c, unsigned intrinsic, LLVMTyp void llvm_emit_cast(GenContext *c, CastKind cast_kind, Expr *expr, BEValue *value, Type *to_type, Type *from_type); void llvm_emit_local_var_alloca(GenContext *c, Decl *decl); void llvm_emit_local_decl(GenContext *c, Decl *decl, BEValue *value); +void llvm_emit_builtin_call(GenContext *c, BEValue *result_value, Expr *expr); // -- Optional -- LLVMValueRef llvm_emit_is_no_opt(GenContext *c, LLVMValueRef error_value); diff --git a/src/compiler/sema_expr.c b/src/compiler/sema_expr.c index 47837e094..3c614c968 100644 --- a/src/compiler/sema_expr.c +++ b/src/compiler/sema_expr.c @@ -2557,15 +2557,25 @@ static inline unsigned builtin_expected_args(BuiltinFunction func) case BUILTIN_SYSCALL: case BUILTIN_TRUNC: case BUILTIN_VOLATILE_LOAD: + case BUILTIN_REDUCE_MUL: + case BUILTIN_REDUCE_AND: + case BUILTIN_REDUCE_ADD: + case BUILTIN_REDUCE_OR: + case BUILTIN_REDUCE_XOR: + case BUILTIN_REDUCE_MAX: + case BUILTIN_REDUCE_MIN: return 1; case BUILTIN_COPYSIGN: case BUILTIN_MAX: case BUILTIN_MIN: case BUILTIN_POW: + case BUILTIN_POW_INT: case BUILTIN_VOLATILE_STORE: case BUILTIN_SAT_ADD: case BUILTIN_SAT_SUB: case BUILTIN_SAT_SHL: + case BUILTIN_REDUCE_FMUL: + case BUILTIN_REDUCE_FADD: return 2; case BUILTIN_FMA: case BUILTIN_FSHL: @@ -2591,6 +2601,7 @@ typedef enum BA_CHAR, BA_FLOATLIKE, BA_INTEGER, + BA_FLOAT, BA_INTLIKE, BA_NUMLIKE, BA_INTVEC, @@ -2708,6 +2719,13 @@ static bool sema_check_builtin_args(Expr **args, BuiltinArg *arg_type, size_t ar return false; } break; + case BA_FLOAT: + if (!type_is_float(type)) + { + SEMA_ERROR(args[i], "Expected a float or double."); + return false; + } + break; } } return true; @@ -2964,6 +2982,43 @@ static inline bool sema_expr_analyse_builtin_call(SemaContext *context, Expr *ex if (!sema_check_builtin_args_match(args, arg_count)) return false; rtype = args[0]->type; break; + case BUILTIN_POW_INT: + if (!sema_check_builtin_args(args, + (BuiltinArg[]) { BA_FLOATLIKE, BA_INTLIKE }, + arg_count)) return false; + if (!cast_implicit(args[1], type_cint)) return false; + rtype = args[0]->type; + break; + case BUILTIN_REDUCE_FMUL: + case BUILTIN_REDUCE_FADD: + if (!sema_check_builtin_args(args, + (BuiltinArg[]) { BA_FLOATVEC, BA_FLOAT }, + arg_count)) return false; + if (!cast_implicit(args[1], args[0]->type->canonical->array.base)) return false; + { + Expr *arg = args[0]; + args[0] = args[1]; + args[1] = arg; + } + rtype = args[0]->type; + break; + case BUILTIN_REDUCE_MAX: + case BUILTIN_REDUCE_MIN: + if (!sema_check_builtin_args(args, + (BuiltinArg[]) { BA_VEC }, + arg_count)) return false; + rtype = args[0]->type->canonical->array.base; + break; + case BUILTIN_REDUCE_ADD: + case BUILTIN_REDUCE_AND: + case BUILTIN_REDUCE_OR: + case BUILTIN_REDUCE_XOR: + case BUILTIN_REDUCE_MUL: + if (!sema_check_builtin_args(args, + (BuiltinArg[]) { BA_INTVEC }, + arg_count)) return false; + rtype = args[0]->type->canonical->array.base; + break; case BUILTIN_ABS: if (!sema_check_builtin_args(args, (BuiltinArg[]) { BA_NUMLIKE }, arg_count)) return false; if (!sema_check_builtin_args_match(args, arg_count)) return false; @@ -8022,7 +8077,7 @@ RETRY: } default: SEMA_ERROR(main_var, "Expected an identifier here."); - break; + return false; } VECEACH(flat_path, i) diff --git a/src/compiler/symtab.c b/src/compiler/symtab.c index b8c65d9a3..f3d605abc 100644 --- a/src/compiler/symtab.c +++ b/src/compiler/symtab.c @@ -194,7 +194,17 @@ void symtab_init(uint32_t capacity) builtin_list[BUILTIN_NEARBYINT] = KW_DEF("nearbyint"); builtin_list[BUILTIN_POPCOUNT] = KW_DEF("popcount"); builtin_list[BUILTIN_POW] = KW_DEF("pow"); + builtin_list[BUILTIN_POW_INT] = KW_DEF("pow_int"); builtin_list[BUILTIN_PREFETCH] = KW_DEF("prefetch"); + builtin_list[BUILTIN_REDUCE_ADD] = KW_DEF("reduce_add"); + builtin_list[BUILTIN_REDUCE_AND] = KW_DEF("reduce_and"); + builtin_list[BUILTIN_REDUCE_FADD] = KW_DEF("reduce_fadd"); + builtin_list[BUILTIN_REDUCE_FMUL] = KW_DEF("reduce_fmul"); + builtin_list[BUILTIN_REDUCE_MAX] = KW_DEF("reduce_max"); + builtin_list[BUILTIN_REDUCE_MIN] = KW_DEF("reduce_min"); + builtin_list[BUILTIN_REDUCE_MUL] = KW_DEF("reduce_mul"); + builtin_list[BUILTIN_REDUCE_OR] = KW_DEF("reduce_or"); + builtin_list[BUILTIN_REDUCE_XOR] = KW_DEF("reduce_xor"); builtin_list[BUILTIN_REVERSE] = KW_DEF("reverse"); builtin_list[BUILTIN_RINT] = KW_DEF("rint"); builtin_list[BUILTIN_ROUND] = KW_DEF("round"); diff --git a/src/version.h b/src/version.h index f2a1ae266..7e069e1cb 100644 --- a/src/version.h +++ b/src/version.h @@ -1 +1 @@ -#define COMPILER_VERSION "0.3.51" \ No newline at end of file +#define COMPILER_VERSION "0.3.52" \ No newline at end of file diff --git a/test/test_suite/builtins/reduce_arithmetics.c3t b/test/test_suite/builtins/reduce_arithmetics.c3t new file mode 100644 index 000000000..e3dde3378 --- /dev/null +++ b/test/test_suite/builtins/reduce_arithmetics.c3t @@ -0,0 +1,43 @@ +// #target: macos-x64 +module test; +import std::io; + +fn void main() +{ + int a; + io::printfln("%s", $$pow_int(double[<2>] { 23.3, 2.1 }, 3)); + io::printfln("%s", $$reduce_add(int[<2>] { 3, 10 })); + io::printfln("%s", $$reduce_fadd(double[<2>] { 3, 10 }, -0.0)); + io::printfln("%s", $$reduce_fadd(double[<2>] { 3, 10 }, 3.2)); + io::printfln("%s", $$reduce_mul(int[<2>] { 3, 10 })); + io::printfln("%s", $$reduce_fmul(double[<2>] { 3, 10 }, 3.4)); + io::printfln("%s", $$reduce_and(int[<2>] { 3, 11 })); + io::printfln("%s", $$reduce_or(int[<2>] { 3, 10 })); + io::printfln("%s", $$reduce_xor(int[<2>] { 3, 10 })); + io::printfln("%s", $$reduce_max(double[<2>] { 23.2, 23.3 })); + io::printfln("%s", $$reduce_max(int[<2>] { -23, 32 })); + io::printfln("%s", $$reduce_max(char[<2>] { 4, 253 })); + io::printfln("%s", $$reduce_min(double[<2>] { 23.2, 23.3 })); + io::printfln("%s", $$reduce_min(int[<2>] { -23, 32 })); + io::printfln("%s", $$reduce_min(char[<2>] { 4, 253 })); + +} + +/* #expect: test.ll + + + call <2 x double> @llvm.powi.v2f64.i32(<2 x double> , i32 3) + call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> ) + call double @llvm.vector.reduce.fadd.v2f64(double -0.000000e+00, <2 x double> ) + call double @llvm.vector.reduce.fadd.v2f64(double 3.200000e+00, <2 x double> ) + call i32 @llvm.vector.reduce.mul.v2i32(<2 x i32> ) + call double @llvm.vector.reduce.fmul.v2f64(double 3.400000e+00, <2 x double> ) + call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> ) + call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> ) + call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> ) + call double @llvm.vector.reduce.fmax.v2f64(<2 x double> ) + call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> ) + call i8 @llvm.vector.reduce.umax.v2i8(<2 x i8> ) + call double @llvm.vector.reduce.fmin.v2f64(<2 x double> ) + call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> ) + call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> ) diff --git a/test/test_suite2/builtins/reduce_arithmetics.c3t b/test/test_suite2/builtins/reduce_arithmetics.c3t new file mode 100644 index 000000000..e3dde3378 --- /dev/null +++ b/test/test_suite2/builtins/reduce_arithmetics.c3t @@ -0,0 +1,43 @@ +// #target: macos-x64 +module test; +import std::io; + +fn void main() +{ + int a; + io::printfln("%s", $$pow_int(double[<2>] { 23.3, 2.1 }, 3)); + io::printfln("%s", $$reduce_add(int[<2>] { 3, 10 })); + io::printfln("%s", $$reduce_fadd(double[<2>] { 3, 10 }, -0.0)); + io::printfln("%s", $$reduce_fadd(double[<2>] { 3, 10 }, 3.2)); + io::printfln("%s", $$reduce_mul(int[<2>] { 3, 10 })); + io::printfln("%s", $$reduce_fmul(double[<2>] { 3, 10 }, 3.4)); + io::printfln("%s", $$reduce_and(int[<2>] { 3, 11 })); + io::printfln("%s", $$reduce_or(int[<2>] { 3, 10 })); + io::printfln("%s", $$reduce_xor(int[<2>] { 3, 10 })); + io::printfln("%s", $$reduce_max(double[<2>] { 23.2, 23.3 })); + io::printfln("%s", $$reduce_max(int[<2>] { -23, 32 })); + io::printfln("%s", $$reduce_max(char[<2>] { 4, 253 })); + io::printfln("%s", $$reduce_min(double[<2>] { 23.2, 23.3 })); + io::printfln("%s", $$reduce_min(int[<2>] { -23, 32 })); + io::printfln("%s", $$reduce_min(char[<2>] { 4, 253 })); + +} + +/* #expect: test.ll + + + call <2 x double> @llvm.powi.v2f64.i32(<2 x double> , i32 3) + call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> ) + call double @llvm.vector.reduce.fadd.v2f64(double -0.000000e+00, <2 x double> ) + call double @llvm.vector.reduce.fadd.v2f64(double 3.200000e+00, <2 x double> ) + call i32 @llvm.vector.reduce.mul.v2i32(<2 x i32> ) + call double @llvm.vector.reduce.fmul.v2f64(double 3.400000e+00, <2 x double> ) + call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> ) + call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> ) + call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> ) + call double @llvm.vector.reduce.fmax.v2f64(<2 x double> ) + call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> ) + call i8 @llvm.vector.reduce.umax.v2i8(<2 x i8> ) + call double @llvm.vector.reduce.fmin.v2f64(<2 x double> ) + call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> ) + call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> )