diff --git a/lib/std/core/mem.c3 b/lib/std/core/mem.c3 index 398db9f68..51d0eaa56 100644 --- a/lib/std/core/mem.c3 +++ b/lib/std/core/mem.c3 @@ -26,7 +26,6 @@ macro void* aligned_pointer(void* ptr, usz alignment) return (void*)(uptr)aligned_offset((uptr)ptr, alignment); } - /** * @require math::is_power_of_2(alignment) **/ @@ -35,14 +34,22 @@ fn bool ptr_is_aligned(void* ptr, usz alignment) @inline return (uptr)ptr & ((uptr)alignment - 1) == 0; } -macro void copy(void* dst, void* src, usz len, usz $dst_align = 0, usz $src_align = 0, bool $is_volatile = false) +macro void clear(void* dst, usz len, usz $dst_align = 0, bool $is_volatile = false, bool $inlined = false) { - $$memcpy(dst, src, len, $is_volatile, $dst_align, $src_align); + $if ($inlined): + $$memset_inline(dst, (char)0, len, $is_volatile, $dst_align); + $else: + $$memset(dst, (char)0, len, $is_volatile, $dst_align); + $endif; } -macro void set(void* dst, char val, usz len, usz $dst_align = 0, bool $is_volatile = false) +macro void copy(void* dst, void* src, usz len, usz $dst_align = 0, usz $src_align = 0, bool $is_volatile = false, bool $inlined = false) { - $$memset(dst, val, len, $is_volatile, $dst_align); + $if ($inlined): + $$memcpy_inline(dst, src, len, $is_volatile, $dst_align, $src_align); + $else: + $$memcpy(dst, src, len, $is_volatile, $dst_align, $src_align); + $endif; } macro void move(void* dst, void* src, usz len, usz $dst_align = 0, usz $src_align = 0, bool $is_volatile = false) @@ -50,9 +57,13 @@ macro void move(void* dst, void* src, usz len, usz $dst_align = 0, usz $src_alig $$memmove(dst, src, len, $is_volatile, $dst_align, $src_align); } -macro void clear(void* dst, usz len, usz $dst_align = 0, bool $is_volatile = false) +macro void set(void* dst, char val, usz len, usz $dst_align = 0, bool $is_volatile = false, bool $inlined = false) { - $$memset(dst, (char)0, len, $is_volatile, $dst_align); + $if ($inlined): + $$memset_inline(dst, val, len, $is_volatile, $dst_align); + $else: + $$memset(dst, val, len, $is_volatile, $dst_align); + $endif; } /** diff --git a/src/compiler/enums.h b/src/compiler/enums.h index 2c1689b90..052e04f9f 100644 --- a/src/compiler/enums.h +++ b/src/compiler/enums.h @@ -850,8 +850,10 @@ typedef enum BUILTIN_LOG2, BUILTIN_MAX, BUILTIN_MEMCOPY, + BUILTIN_MEMCOPY_INLINE, BUILTIN_MEMMOVE, BUILTIN_MEMSET, + BUILTIN_MEMSET_INLINE, BUILTIN_MIN, BUILTIN_NEARBYINT, BUILTIN_OVERFLOW_ADD, diff --git a/src/compiler/llvm_codegen.c b/src/compiler/llvm_codegen.c index 53dfd6fa3..1ea946b06 100644 --- a/src/compiler/llvm_codegen.c +++ b/src/compiler/llvm_codegen.c @@ -658,8 +658,10 @@ static void llvm_codegen_setup() intrinsic_id.maximum = lookup_intrinsic("llvm.maximum"); intrinsic_id.maxnum = lookup_intrinsic("llvm.maxnum"); intrinsic_id.memcpy = lookup_intrinsic("llvm.memcpy"); - intrinsic_id.memset = lookup_intrinsic("llvm.memset"); + intrinsic_id.memcpy_inline = lookup_intrinsic("llvm.memcpy.inline"); intrinsic_id.memmove = lookup_intrinsic("llvm.memmove"); + intrinsic_id.memset = lookup_intrinsic("llvm.memset"); + intrinsic_id.memset_inline = lookup_intrinsic("llvm.memset.inline"); intrinsic_id.minimum = lookup_intrinsic("llvm.minimum"); intrinsic_id.minnum = lookup_intrinsic("llvm.minnum"); intrinsic_id.fmuladd = lookup_intrinsic("llvm.fmuladd"); diff --git a/src/compiler/llvm_codegen_builtins.c b/src/compiler/llvm_codegen_builtins.c index 169dc0431..82e3ffcd9 100644 --- a/src/compiler/llvm_codegen_builtins.c +++ b/src/compiler/llvm_codegen_builtins.c @@ -215,7 +215,7 @@ INLINE void llvm_emit_intrinsic_args(GenContext *c, Expr **args, LLVMValueRef *s } } -INLINE void llvm_emit_memcpy_builtin(GenContext *c, BEValue *be_value, Expr *expr) +INLINE void llvm_emit_memcpy_builtin(GenContext *c, unsigned intrinsic, BEValue *be_value, Expr *expr) { Expr **args = expr->call_expr.arguments; LLVMValueRef arg_slots[4]; @@ -225,7 +225,7 @@ INLINE void llvm_emit_memcpy_builtin(GenContext *c, BEValue *be_value, Expr *exp LLVMTypeRef call_type[3]; call_type[0] = call_type[1] = llvm_get_type(c, type_voidptr); call_type[2] = llvm_get_type(c, type_usize); - LLVMValueRef result = llvm_emit_call_intrinsic(c, intrinsic_id.memcpy, call_type, 3, arg_slots, 4); + LLVMValueRef result = llvm_emit_call_intrinsic(c, intrinsic, call_type, 3, arg_slots, 4); assert(args[4]->const_expr.const_kind == CONST_INTEGER); assert(args[5]->const_expr.const_kind == CONST_INTEGER); uint64_t dst_align = int_to_u64(args[4]->const_expr.ixx); @@ -235,20 +235,6 @@ INLINE void llvm_emit_memcpy_builtin(GenContext *c, BEValue *be_value, Expr *exp llvm_value_set(be_value, result, type_void); } -INLINE void llvm_emit_memset_builtin(GenContext *c, BEValue *be_value, Expr *expr) -{ - Expr **args = expr->call_expr.arguments; - LLVMValueRef arg_slots[4]; - llvm_emit_intrinsic_args(c, args, arg_slots, 4); - arg_slots[0] = llvm_emit_bitcast(c, arg_slots[0], type_voidptr); - LLVMTypeRef call_type[2] = { llvm_get_type(c, type_voidptr), llvm_get_type(c, type_usize) }; - LLVMValueRef result = llvm_emit_call_intrinsic(c, intrinsic_id.memset, call_type, 2, arg_slots, 4); - assert(args[4]->const_expr.const_kind == CONST_INTEGER); - uint64_t dst_align = int_to_u64(args[4]->const_expr.ixx); - if (dst_align > 0) llvm_attribute_add_call(c, result, attribute_id.align, 1, dst_align); - llvm_value_set(be_value, result, type_void); -} - INLINE void llvm_emit_memmove_builtin(GenContext *c, BEValue *be_value, Expr *expr) { Expr **args = expr->call_expr.arguments; @@ -268,6 +254,21 @@ INLINE void llvm_emit_memmove_builtin(GenContext *c, BEValue *be_value, Expr *ex if (src_align > 0) llvm_attribute_add_call(c, result, attribute_id.align, 2, src_align); llvm_value_set(be_value, result, type_void); } + +INLINE void llvm_emit_memset_builtin(GenContext *c, unsigned intrinsic, BEValue *be_value, Expr *expr) +{ + Expr **args = expr->call_expr.arguments; + LLVMValueRef arg_slots[4]; + llvm_emit_intrinsic_args(c, args, arg_slots, 4); + arg_slots[0] = llvm_emit_bitcast(c, arg_slots[0], type_voidptr); + LLVMTypeRef call_type[2] = { llvm_get_type(c, type_voidptr), llvm_get_type(c, type_usize) }; + LLVMValueRef result = llvm_emit_call_intrinsic(c, intrinsic, call_type, 2, arg_slots, 4); + assert(args[4]->const_expr.const_kind == CONST_INTEGER); + uint64_t dst_align = int_to_u64(args[4]->const_expr.ixx); + if (dst_align > 0) llvm_attribute_add_call(c, result, attribute_id.align, 1, dst_align); + llvm_value_set(be_value, result, type_void); +} + INLINE void llvm_emit_prefetch(GenContext *c, BEValue *be_value, Expr *expr) { Expr **args = expr->call_expr.arguments; @@ -473,14 +474,20 @@ void llvm_emit_builtin_call(GenContext *c, BEValue *result_value, Expr *expr) llvm_emit_syscall(c, result_value, expr); return; case BUILTIN_MEMCOPY: - llvm_emit_memcpy_builtin(c, result_value, expr); + llvm_emit_memcpy_builtin(c, intrinsic_id.memcpy, result_value, expr); return; - case BUILTIN_MEMSET: - llvm_emit_memset_builtin(c, result_value, expr); + case BUILTIN_MEMCOPY_INLINE: + llvm_emit_memcpy_builtin(c, intrinsic_id.memcpy_inline, result_value, expr); return; case BUILTIN_MEMMOVE: llvm_emit_memmove_builtin(c, result_value, expr); return; + case BUILTIN_MEMSET: + llvm_emit_memset_builtin(c, intrinsic_id.memset, result_value, expr); + return; + case BUILTIN_MEMSET_INLINE: + llvm_emit_memset_builtin(c, intrinsic_id.memset_inline, result_value, expr); + return; case BUILTIN_SYSCLOCK: llvm_value_set(result_value, llvm_emit_call_intrinsic(c, intrinsic_id.readcyclecounter, NULL, 0, NULL, 0), expr->type); return; diff --git a/src/compiler/llvm_codegen_internal.h b/src/compiler/llvm_codegen_internal.h index 9f2477cab..96db89d81 100644 --- a/src/compiler/llvm_codegen_internal.h +++ b/src/compiler/llvm_codegen_internal.h @@ -134,6 +134,7 @@ typedef struct unsigned floor; unsigned flt_rounds; unsigned fma; + unsigned fmuladd; unsigned frameaddress; unsigned fshl; unsigned fshr; @@ -149,11 +150,12 @@ typedef struct unsigned maximum; unsigned maxnum; unsigned memcpy; - unsigned memset; + unsigned memcpy_inline; unsigned memmove; + unsigned memset; + unsigned memset_inline; unsigned minimum; unsigned minnum; - unsigned fmuladd; unsigned nearbyint; unsigned pow; unsigned powi; diff --git a/src/compiler/sema_builtins.c b/src/compiler/sema_builtins.c index 9773212dc..adbf856cc 100644 --- a/src/compiler/sema_builtins.c +++ b/src/compiler/sema_builtins.c @@ -349,6 +349,7 @@ bool sema_expr_analyse_builtin_call(SemaContext *context, Expr *expr) rtype = type_no_optional(args[0]->type->canonical); break; case BUILTIN_MEMCOPY: + case BUILTIN_MEMCOPY_INLINE: case BUILTIN_MEMMOVE: if (!sema_check_builtin_args(args, (BuiltinArg[]) { BA_POINTER, BA_POINTER, BA_SIZE, BA_BOOL, BA_SIZE, BA_SIZE }, @@ -357,6 +358,7 @@ bool sema_expr_analyse_builtin_call(SemaContext *context, Expr *expr) rtype = type_void; break; case BUILTIN_MEMSET: + case BUILTIN_MEMSET_INLINE: if (!sema_check_builtin_args(args, (BuiltinArg[]) { BA_POINTER, BA_CHAR, BA_SIZE, BA_BOOL, BA_SIZE }, arg_count)) return false; @@ -620,11 +622,13 @@ static inline unsigned builtin_expected_args(BuiltinFunction func) case BUILTIN_OVERFLOW_SUB: case BUILTIN_PREFETCH: return 3; - case BUILTIN_MEMSET: - return 5; case BUILTIN_MEMCOPY: + case BUILTIN_MEMCOPY_INLINE: case BUILTIN_MEMMOVE: return 6; + case BUILTIN_MEMSET: + case BUILTIN_MEMSET_INLINE: + return 5; case BUILTIN_SHUFFLEVECTOR: case BUILTIN_NONE: UNREACHABLE diff --git a/src/compiler/symtab.c b/src/compiler/symtab.c index b3ef2a6d1..21fa69f69 100644 --- a/src/compiler/symtab.c +++ b/src/compiler/symtab.c @@ -211,8 +211,10 @@ void symtab_init(uint32_t capacity) builtin_list[BUILTIN_LOG2] = KW_DEF("log2"); builtin_list[BUILTIN_LOG10] = KW_DEF("log10"); builtin_list[BUILTIN_MEMCOPY] = KW_DEF("memcpy"); - builtin_list[BUILTIN_MEMSET] = KW_DEF("memset"); + builtin_list[BUILTIN_MEMCOPY_INLINE] = KW_DEF("memcpy_inline"); builtin_list[BUILTIN_MEMMOVE] = KW_DEF("memmove"); + builtin_list[BUILTIN_MEMSET] = KW_DEF("memset"); + builtin_list[BUILTIN_MEMSET_INLINE] = KW_DEF("memset_inline"); builtin_list[BUILTIN_NEARBYINT] = KW_DEF("nearbyint"); builtin_list[BUILTIN_OVERFLOW_ADD] = KW_DEF("overflow_add"); builtin_list[BUILTIN_OVERFLOW_SUB] = KW_DEF("overflow_sub"); diff --git a/test/test_suite/builtins/mem.c3t b/test/test_suite/builtins/mem.c3t index 3770931f0..416fd4196 100644 --- a/test/test_suite/builtins/mem.c3t +++ b/test/test_suite/builtins/mem.c3t @@ -9,10 +9,14 @@ fn void main() $$memcpy(&dst, &src, ushort.sizeof, false, $alignof(dst), $alignof(src)); $$memmove(&dst, &src, ushort.sizeof, false, $alignof(dst), $alignof(src)); $$memset(&dst, 0u8, ushort.sizeof, false, $alignof(dst)); + + $$memcpy_inline(&dst, &src, ushort.sizeof, false, $alignof(dst), $alignof(src)); + $$memset_inline(&dst, 0u8, ushort.sizeof, false, $alignof(dst)); } /* #expect: test.ll call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 2 %0, i8* align 2 %1, i64 2, i1 false) call void @llvm.memmove.p0i8.p0i8.i64(i8* align 2 %2, i8* align 2 %3, i64 2, i1 false) - call void @llvm.memset.p0i8.i64(i8* align 2 %4, i8 0, i64 2, i1 false) \ No newline at end of file + call void @llvm.memset.p0i8.i64(i8* align 2 %4, i8 0, i64 2, i1 false) + call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* align 2 %5, i8* align 2 %6, i64 2, i1 false) diff --git a/test/test_suite/stdlib/map.c3t b/test/test_suite/stdlib/map.c3t index 9863ce5b7..2cf149c0c 100644 --- a/test/test_suite/stdlib/map.c3t +++ b/test/test_suite/stdlib/map.c3t @@ -508,7 +508,7 @@ after_check100: ; preds = %if.then panic_block: ; preds = %assign_optional %160 = load void (i8*, i64, i8*, i64, i8*, i64, i32)*, void (i8*, i64, i8*, i64, i8*, i64, i32)** @std_core_builtin_panic, align 8 - call void %160(i8* getelementptr inbounds ([28 x i8], [28 x i8]* @.panic_msg, i64 0, i64 0), i64 27, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.file, i64 0, i64 0), i64 6, i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.func, i64 0, i64 0), i64 4, i32 246) + call void %160(i8* getelementptr inbounds ([28 x i8], [28 x i8]* @.panic_msg, i64 0, i64 0), i64 27, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.file, i64 0, i64 0), i64 6, i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.func, i64 0, i64 0), i64 4, i32 257) unreachable noerr_block: ; preds = %after_check100 diff --git a/test/test_suite2/builtins/mem.c3t b/test/test_suite2/builtins/mem.c3t index 2acaf19bb..08c1ea6e1 100644 --- a/test/test_suite2/builtins/mem.c3t +++ b/test/test_suite2/builtins/mem.c3t @@ -9,6 +9,9 @@ fn void main() $$memcpy(&dst, &src, ushort.sizeof, false, $alignof(dst), $alignof(src)); $$memmove(&dst, &src, ushort.sizeof, false, $alignof(dst), $alignof(src)); $$memset(&dst, 0u8, ushort.sizeof, false, $alignof(dst)); + + $$memcpy_inline(&dst, &src, ushort.sizeof, false, $alignof(dst), $alignof(src)); + $$memset_inline(&dst, 0u8, ushort.sizeof, false, $alignof(dst)); } /* #expect: test.ll @@ -16,3 +19,5 @@ fn void main() call void @llvm.memcpy.p0.p0.i64(ptr align 2 %dst, ptr align 2 %src, i64 2, i1 false) call void @llvm.memmove.p0.p0.i64(ptr align 2 %dst, ptr align 2 %src, i64 2, i1 false) call void @llvm.memset.p0.i64(ptr align 2 %dst, i8 0, i64 2, i1 false) + call void @llvm.memcpy.inline.p0.p0.i64(ptr align 2 %dst, ptr align 2 %src, i64 2, i1 false) + call void @llvm.memset.inline.p0.i64(ptr align 2 %dst, i8 0, i64 2, i1 false) diff --git a/test/test_suite2/stdlib/map.c3t b/test/test_suite2/stdlib/map.c3t index 77f9bc6cb..55c0e3a2b 100644 --- a/test/test_suite2/stdlib/map.c3t +++ b/test/test_suite2/stdlib/map.c3t @@ -364,7 +364,7 @@ after_check67: ; preds = %if.then panic_block: ; preds = %assign_optional %81 = load ptr, ptr @std_core_builtin_panic, align 8 - call void %81(ptr @.panic_msg, i64 27, ptr @.file, i64 6, ptr @.func, i64 4, i32 246) + call void %81(ptr @.panic_msg, i64 27, ptr @.file, i64 6, ptr @.func, i64 4, i32 257) unreachable noerr_block: ; preds = %after_check67