diff --git a/releasenotes.md b/releasenotes.md index 4bded3aa6..a6ea24d3f 100644 --- a/releasenotes.md +++ b/releasenotes.md @@ -17,6 +17,7 @@ - Allow using $defined(&a[1]) to check if the operation is supported. - Max number of members in a struct is limited to 65535. - The maximum number of parameters in a call is now 255, up from 127. +- Array comparison now uses built-in memcmp on LLVM to enable optimizations. ### Fixes - Error with unsigned compare in `@ensure` when early returning 0 #1207. diff --git a/src/compiler/llvm_codegen_expr.c b/src/compiler/llvm_codegen_expr.c index ecdc217f9..01f1245c7 100644 --- a/src/compiler/llvm_codegen_expr.c +++ b/src/compiler/llvm_codegen_expr.c @@ -33,7 +33,6 @@ static inline void llvm_emit_try_unwrap(GenContext *c, BEValue *value, Expr *exp static inline void llvm_emit_vector_initializer_list(GenContext *c, BEValue *value, Expr *expr); static inline void llvm_extract_bitvalue_from_array(GenContext *c, BEValue *be_value, Decl *member, Decl *parent_decl); static inline void llvm_emit_type_from_any(GenContext *c, BEValue *be_value); -static inline void llvm_emit_memcmp(GenContext *c, BEValue *be_value, LLVMValueRef ptr, LLVMValueRef other_ptr, BinaryOp binary_op, AlignSize lhs_align, AlignSize rhs_align, ByteSize size); static void llvm_convert_vector_comparison(GenContext *c, BEValue *be_value, LLVMValueRef val, Type *vector_type, bool is_equals); static void llvm_emit_any_pointer(GenContext *c, BEValue *any, BEValue *pointer); @@ -44,6 +43,7 @@ static void llvm_emit_initialize_designated_element(GenContext *c, BEValue *ref, static void llvm_emit_macro_body_expansion(GenContext *c, BEValue *value, Expr *body_expr); static void llvm_emit_post_unary_expr(GenContext *context, BEValue *be_value, Expr *expr); static void llvm_emit_unary_expr(GenContext *c, BEValue *value, Expr *expr); +static inline void llvm_emit_memcmp(GenContext *c, BEValue *be_value, LLVMValueRef ptr, LLVMValueRef other_ptr, LLVMValueRef size); static LLVMTypeRef llvm_find_inner_struct_type_for_coerce(GenContext *c, LLVMTypeRef struct_type, ByteSize dest_size); static void llvm_expand_type_to_args(GenContext *context, Type *param_type, LLVMValueRef expand_ptr, LLVMValueRef *args, unsigned *arg_count_ref, AlignSize alignment); static inline void llvm_emit_initialize_reference_designated_bitstruct(GenContext *c, BEValue *ref, Decl *bitstruct, Expr **elements); @@ -3564,8 +3564,8 @@ static void llvm_emit_struct_comparison(GenContext *c, BEValue *result, BEValue llvm_value_fold_optional(c, rhs); llvm_value_addr(c, lhs); llvm_value_addr(c, rhs); - llvm_emit_memcmp(c, result, lhs->value, rhs->value, binary_op, lhs->alignment, rhs->alignment, - type_size(lhs->type)); + llvm_emit_memcmp(c, result, lhs->value, rhs->value, llvm_const_int(c, type_usz, type_size(lhs->type))); + llvm_emit_int_comp_zero(c, result, result, binary_op); } static inline LLVMValueRef llvm_emit_mult_int(GenContext *c, Type *type, LLVMValueRef left, LLVMValueRef right, SourceSpan loc) @@ -3687,116 +3687,74 @@ INLINE bool should_inline_array_comp(ArraySize len, Type *base_type_lowered) } } -static void llvm_emit_memcmp_inline(GenContext *c, BEValue *be_value, LLVMValueRef lhs, - LLVMValueRef rhs, ByteSize element_size, - AlignSize lhs_align, AlignSize rhs_align, int len, bool want_match) +static inline void llvm_emit_memcmp(GenContext *c, BEValue *be_value, LLVMValueRef ptr, LLVMValueRef other_ptr, LLVMValueRef size) { - assert(element_size <= platform_target.width_register / 8); - lhs_align = type_min_alignment(element_size, lhs_align); - rhs_align = type_min_alignment(element_size, rhs_align); - LLVMTypeRef element_type = LLVMIntTypeInContext(c->context, element_size * 8); - LLVMBasicBlockRef exit = llvm_basic_block_new(c, "array_cmp_exit"); - LLVMBasicBlockRef loop_begin = llvm_basic_block_new(c, "array_loop_start"); - LLVMBasicBlockRef comparison = llvm_basic_block_new(c, "array_loop_comparison"); - LLVMBasicBlockRef comparison_phi; - LLVMBasicBlockRef loop_begin_phi; - LLVMValueRef len_val = llvm_const_int(c, type_usz, len); - LLVMValueRef one = llvm_const_int(c, type_usz, 1); - BEValue index_var; - llvm_value_set_address_abi_aligned(&index_var, llvm_emit_alloca_aligned(c, type_usz, "cmp.idx"), type_usz); - llvm_store_raw(c, &index_var, llvm_get_zero(c, type_usz)); - - llvm_emit_br(c, loop_begin); - llvm_emit_block(c, loop_begin); - - AlignSize align_lhs; - BEValue lhs_v; - BEValue index_copy = index_var; - llvm_value_rvalue(c, &index_copy); - - LLVMValueRef index_val = index_copy.value; - LLVMValueRef lhs_ptr = llvm_emit_pointer_inbounds_gep_raw(c, element_type, lhs, index_val); - LLVMValueRef rhs_ptr = llvm_emit_pointer_inbounds_gep_raw(c, element_type, rhs, index_val); - LLVMValueRef lhs_value = llvm_load(c, element_type, lhs_ptr, lhs_align, "lhs"); - LLVMValueRef rhs_value = llvm_load(c, element_type, rhs_ptr, rhs_align, "rhs"); - LLVMValueRef comp_val = LLVMBuildICmp(c->builder, LLVMIntEQ, lhs_value, rhs_value, "cmp"); - loop_begin_phi = c->current_block; - llvm_emit_cond_br_raw(c, comp_val, comparison, exit); - llvm_emit_block(c, comparison); - - LLVMValueRef new_index = LLVMBuildAdd(c->builder, index_copy.value, one, "inc"); - llvm_store_raw(c, &index_var, new_index); - BEValue comp; - llvm_emit_int_comp_raw(c, &comp, type_usz, type_usz, new_index, len_val, BINARYOP_LT); - comparison_phi = c->current_block; - llvm_emit_cond_br(c, &comp, loop_begin, exit); - llvm_emit_block(c, exit); - LLVMValueRef success = LLVMConstInt(c->bool_type, want_match ? 1 : 0, false); - LLVMValueRef failure = LLVMConstInt(c->bool_type, want_match ? 0 : 1, false); - llvm_new_phi(c, be_value, "array_cmp_phi", type_bool, success, comparison_phi, failure, loop_begin_phi); - -} -static void llvm_emit_memcmp_unrolled(GenContext *c, BEValue *be_value, LLVMValueRef lhs_ptr, - LLVMValueRef rhs_ptr, ByteSize element_size, - AlignSize lhs_align, AlignSize rhs_align, int len, bool want_match) -{ - assert(len < 17); - assert(element_size <= platform_target.width_register / 8); - LLVMTypeRef element_type = LLVMIntTypeInContext(c->context, element_size * 8); - LLVMBasicBlockRef blocks[17]; - LLVMValueRef value_block[17]; - LLVMBasicBlockRef ok_block = llvm_basic_block_new(c, "match"); - LLVMBasicBlockRef exit_block = llvm_basic_block_new(c, "exit"); - LLVMValueRef success = LLVMConstInt(c->bool_type, want_match ? 1 : 0, false); - LLVMValueRef failure = LLVMConstInt(c->bool_type, want_match ? 0 : 1, false); - LLVMValueRef one = llvm_const_int(c, type_usz, 1); - for (unsigned i = 0; i < len; i++) + if (!c->memcmp_function) { - value_block[i] = failure; - if (i > 0) + c->memcmp_function = LLVMGetNamedFunction(c->module, "memcmp"); + if (!c->memcmp_function) { - lhs_ptr = llvm_emit_pointer_inbounds_gep_raw(c, element_type, lhs_ptr, one); - rhs_ptr = llvm_emit_pointer_inbounds_gep_raw(c, element_type, rhs_ptr, one); + c->memcmp_function = LLVMAddFunction(c->module, "memcmp", c->memcmp_function_type); } - AlignSize lhs_align_current = type_min_alignment(lhs_align + i * element_size, lhs_align); - AlignSize rhs_align_current = type_min_alignment(rhs_align + i * element_size, rhs_align); - LLVMValueRef lhs_value = llvm_load(c, element_type, lhs_ptr, lhs_align_current, "lhs"); - LLVMValueRef rhs_value = llvm_load(c, element_type, rhs_ptr, rhs_align_current, "rhs"); - LLVMValueRef comp = LLVMBuildICmp(c->builder, LLVMIntEQ, lhs_value, rhs_value, "cmp"); - blocks[i] = c->current_block; - LLVMBasicBlockRef block = ok_block; - block = i < len - 1 ? llvm_basic_block_new(c, "next_check") : block; - llvm_emit_cond_br_raw(c, comp, block, exit_block); - llvm_emit_block(c, block); } - llvm_emit_br(c, exit_block); - llvm_emit_block(c, exit_block); - value_block[len] = success; - blocks[len] = ok_block; - LLVMValueRef phi = LLVMBuildPhi(c->builder, c->bool_type, "memcmp_phi"); - LLVMAddIncoming(phi, value_block, blocks, len + 1); - llvm_value_set(be_value, phi, type_bool); + LLVMValueRef args[3] = { ptr, other_ptr, size }; + LLVMValueRef function = LLVMBuildCall2(c->builder, c->memcmp_function_type, c->memcmp_function, args, 3, "cmp"); + llvm_value_set(be_value, function, type_cint); } -static inline void llvm_emit_memcmp(GenContext *c, BEValue *be_value, LLVMValueRef ptr, LLVMValueRef other_ptr, BinaryOp binary_op, AlignSize lhs_align, AlignSize rhs_align, ByteSize size) -{ - ByteSize element_size = lhs_align > platform_target.width_register / 8 ? platform_target.width_register / 8 : lhs_align; - if (element_size > rhs_align) element_size = rhs_align; - if (element_size > size) element_size = size; - ByteSize repeats = size / element_size; - assert(size % element_size == 0 && "Expected size padded to alignment"); - if (repeats <= MEMCMP_INLINE_REGS) - { - llvm_emit_memcmp_unrolled(c, be_value, ptr, other_ptr, element_size, lhs_align, rhs_align, repeats, binary_op == BINARYOP_EQ); - return; - } - llvm_emit_memcmp_inline(c, be_value, ptr, other_ptr, element_size, lhs_align, rhs_align, repeats, binary_op == BINARYOP_EQ); -} static void llvm_emit_array_comp(GenContext *c, BEValue *be_value, BEValue *lhs, BEValue *rhs, BinaryOp binary_op) { + Type *array_base = type_flatten(lhs->type->array.base); + switch (array_base->type_kind) + { + + case ALL_INTS: + case TYPE_POINTER: + case TYPE_ENUM: + case TYPE_FUNC_PTR: + case TYPE_INTERFACE: + case TYPE_ANY: + case TYPE_ANYFAULT: + case TYPE_FAULTTYPE: + case TYPE_TYPEID: +MEMCMP: + llvm_value_addr(c, lhs); + llvm_value_addr(c, rhs); + llvm_emit_memcmp(c, be_value, lhs->value, rhs->value, llvm_const_int(c, type_usz, type_size(lhs->type))); + llvm_emit_int_comp_zero(c, be_value, be_value, binary_op); + return; + case TYPE_VECTOR: + if (is_power_of_two(array_base->array.len)) goto MEMCMP; + break; + case TYPE_UNION: + case TYPE_STRUCT: + case TYPE_BITSTRUCT: + if (array_base->decl->attr_compact) goto MEMCMP; + break; + case TYPE_POISONED: + case TYPE_VOID: + case TYPE_DISTINCT: + case TYPE_FUNC_RAW: + case TYPE_TYPEDEF: + case TYPE_INFERRED_ARRAY: + case TYPE_INFERRED_VECTOR: + case TYPE_UNTYPED_LIST: + case TYPE_OPTIONAL: + case TYPE_WILDCARD: + case TYPE_TYPEINFO: + case TYPE_MEMBER: + UNREACHABLE + case ALL_FLOATS: + case TYPE_SLICE: + case TYPE_ARRAY: + case TYPE_FLEXIBLE_ARRAY: + case TYPE_BOOL: + break; + } + bool want_match = binary_op == BINARYOP_EQ; ArraySize len = lhs->type->array.len; - Type *array_base_type = type_lowering(lhs->type->array.base); + Type *array_base_type = type_lowering(array_base); LLVMTypeRef array_type = llvm_get_type(c, lhs->type); if (should_inline_array_comp(len, array_base_type)) { diff --git a/src/compiler/llvm_codegen_internal.h b/src/compiler/llvm_codegen_internal.h index 2055619c6..152f46cfa 100644 --- a/src/compiler/llvm_codegen_internal.h +++ b/src/compiler/llvm_codegen_internal.h @@ -146,6 +146,8 @@ typedef struct GenContext_ LLVMValueRef dyn_find_function; // The type of the find function. LLVMTypeRef dyn_find_function_type; + LLVMValueRef memcmp_function; + LLVMTypeRef memcmp_function_type; } GenContext; // LLVM Intrinsics diff --git a/src/compiler/llvm_codegen_module.c b/src/compiler/llvm_codegen_module.c index 95fa9f936..fe82b69c7 100644 --- a/src/compiler/llvm_codegen_module.c +++ b/src/compiler/llvm_codegen_module.c @@ -139,6 +139,9 @@ void gencontext_begin_module(GenContext *c) c->xtor_func_type = LLVMFunctionType(LLVMVoidTypeInContext(c->context), NULL, 0, false); c->introspect_type = create_introspection_type(c); c->fault_type = create_fault_type(c); + c->memcmp_function = NULL; + LLVMTypeRef memcmp_types[3] = {c->ptr_type, c->ptr_type, c->size_type }; + c->memcmp_function_type = LLVMFunctionType(llvm_get_type(c, type_cint), memcmp_types, 3, false); if (c->panic_var) c->panic_var->backend_ref = NULL; if (c->panicf) c->panicf->backend_ref = NULL; diff --git a/test/test_suite/arrays/array_comparison.c3t b/test/test_suite/arrays/array_comparison.c3t index 38b1f4359..be9f2991a 100644 --- a/test/test_suite/arrays/array_comparison.c3t +++ b/test/test_suite/arrays/array_comparison.c3t @@ -82,7 +82,6 @@ entry: %1 = load double, ptr %b, align 8 %eq = fcmp oeq double %0, %1 br i1 %eq, label %next_check, label %exit - next_check: ; preds = %entry %ptradd = getelementptr inbounds i8, ptr %a, i64 8 %ptradd1 = getelementptr inbounds i8, ptr %b, i64 8 @@ -90,10 +89,8 @@ next_check: ; preds = %entry %3 = load double, ptr %ptradd1, align 8 %eq2 = fcmp oeq double %2, %3 br i1 %eq2, label %match, label %exit - match: ; preds = %next_check br label %exit - exit: ; preds = %match, %next_check, %entry %array_cmp_phi = phi i1 [ false, %entry ], [ false, %next_check ], [ true, %match ] %4 = zext i1 %array_cmp_phi to i8 @@ -102,7 +99,6 @@ exit: ; preds = %match, %next_check, %6 = load double, ptr %b, align 8 %eq3 = fcmp oeq double %5, %6 br i1 %eq3, label %next_check4, label %exit9 - next_check4: ; preds = %exit %ptradd5 = getelementptr inbounds i8, ptr %a, i64 8 %ptradd6 = getelementptr inbounds i8, ptr %b, i64 8 @@ -110,106 +106,66 @@ next_check4: ; preds = %exit %8 = load double, ptr %ptradd6, align 8 %eq7 = fcmp oeq double %7, %8 br i1 %eq7, label %match8, label %exit9 - match8: ; preds = %next_check4 br label %exit9 - exit9: ; preds = %match8, %next_check4, %exit %array_cmp_phi10 = phi i1 [ true, %exit ], [ true, %next_check4 ], [ false, %match8 ] %9 = zext i1 %array_cmp_phi10 to i8 store i8 %9, ptr %y, align 1 call void @get2(ptr %a2) call void @get2(ptr %b2) - %10 = load i32, ptr %a2, align 4 - %11 = load i32, ptr %b2, align 4 - %eq11 = icmp eq i32 %10, %11 - br i1 %eq11, label %next_check12, label %exit17 - -next_check12: ; preds = %exit9 - %ptradd13 = getelementptr inbounds i8, ptr %a2, i64 4 - %ptradd14 = getelementptr inbounds i8, ptr %b2, i64 4 - %12 = load i32, ptr %ptradd13, align 4 - %13 = load i32, ptr %ptradd14, align 4 - %eq15 = icmp eq i32 %12, %13 - br i1 %eq15, label %match16, label %exit17 - -match16: ; preds = %next_check12 - br label %exit17 - -exit17: ; preds = %match16, %next_check12, %exit9 - %array_cmp_phi18 = phi i1 [ false, %exit9 ], [ false, %next_check12 ], [ true, %match16 ] - %14 = zext i1 %array_cmp_phi18 to i8 - store i8 %14, ptr %x2, align 1 - %15 = load i32, ptr %a2, align 4 - %16 = load i32, ptr %b2, align 4 - %eq19 = icmp eq i32 %15, %16 - br i1 %eq19, label %next_check20, label %exit25 - -next_check20: ; preds = %exit17 - %ptradd21 = getelementptr inbounds i8, ptr %a2, i64 4 - %ptradd22 = getelementptr inbounds i8, ptr %b2, i64 4 - %17 = load i32, ptr %ptradd21, align 4 - %18 = load i32, ptr %ptradd22, align 4 - %eq23 = icmp eq i32 %17, %18 - br i1 %eq23, label %match24, label %exit25 - -match24: ; preds = %next_check20 - br label %exit25 - -exit25: ; preds = %match24, %next_check20, %exit17 - %array_cmp_phi26 = phi i1 [ true, %exit17 ], [ true, %next_check20 ], [ false, %match24 ] - %19 = zext i1 %array_cmp_phi26 to i8 - store i8 %19, ptr %y2, align 1 + %cmp = call i32 @memcmp(ptr %a2, ptr %b2, i64 8) + %eq11 = icmp eq i32 %cmp, 0 + %10 = zext i1 %eq11 to i8 + store i8 %10, ptr %x2, align 1 + %cmp12 = call i32 @memcmp(ptr %a2, ptr %b2, i64 8) + %neq = icmp ne i32 %cmp12, 0 + %11 = zext i1 %neq to i8 + store i8 %11, ptr %y2, align 1 call void @get3(ptr %a3) call void @get3(ptr %b3) - %20 = load i8, ptr %a3, align 1 - %21 = trunc i8 %20 to i1 - %22 = load i8, ptr %b3, align 1 - %23 = trunc i8 %22 to i1 - %eq27 = icmp eq i1 %21, %23 - br i1 %eq27, label %next_check28, label %exit33 - -next_check28: ; preds = %exit25 - %ptradd29 = getelementptr inbounds i8, ptr %a3, i64 1 - %ptradd30 = getelementptr inbounds i8, ptr %b3, i64 1 - %24 = load i8, ptr %ptradd29, align 1 - %25 = trunc i8 %24 to i1 - %26 = load i8, ptr %ptradd30, align 1 - %27 = trunc i8 %26 to i1 - %eq31 = icmp eq i1 %25, %27 - br i1 %eq31, label %match32, label %exit33 - -match32: ; preds = %next_check28 - br label %exit33 - -exit33: ; preds = %match32, %next_check28, %exit25 - %array_cmp_phi34 = phi i1 [ false, %exit25 ], [ false, %next_check28 ], [ true, %match32 ] - %28 = zext i1 %array_cmp_phi34 to i8 - store i8 %28, ptr %x3, align 1 - %29 = load i8, ptr %a3, align 1 - %30 = trunc i8 %29 to i1 - %31 = load i8, ptr %b3, align 1 - %32 = trunc i8 %31 to i1 - %eq35 = icmp eq i1 %30, %32 - br i1 %eq35, label %next_check36, label %exit41 - -next_check36: ; preds = %exit33 - %ptradd37 = getelementptr inbounds i8, ptr %a3, i64 1 - %ptradd38 = getelementptr inbounds i8, ptr %b3, i64 1 - %33 = load i8, ptr %ptradd37, align 1 - %34 = trunc i8 %33 to i1 - %35 = load i8, ptr %ptradd38, align 1 - %36 = trunc i8 %35 to i1 - %eq39 = icmp eq i1 %34, %36 - br i1 %eq39, label %match40, label %exit41 - -match40: ; preds = %next_check36 - br label %exit41 - -exit41: ; preds = %match40, %next_check36, %exit33 - %array_cmp_phi42 = phi i1 [ true, %exit33 ], [ true, %next_check36 ], [ false, %match40 ] - %37 = zext i1 %array_cmp_phi42 to i8 - store i8 %37, ptr %y3, align 1 + %12 = load i8, ptr %a3, align 1 + %13 = trunc i8 %12 to i1 + %14 = load i8, ptr %b3, align 1 + %15 = trunc i8 %14 to i1 + %eq13 = icmp eq i1 %13, %15 + br i1 %eq13, label %next_check14, label %exit19 +next_check14: ; preds = %exit9 + %ptradd15 = getelementptr inbounds i8, ptr %a3, i64 1 + %ptradd16 = getelementptr inbounds i8, ptr %b3, i64 1 + %16 = load i8, ptr %ptradd15, align 1 + %17 = trunc i8 %16 to i1 + %18 = load i8, ptr %ptradd16, align 1 + %19 = trunc i8 %18 to i1 + %eq17 = icmp eq i1 %17, %19 + br i1 %eq17, label %match18, label %exit19 +match18: ; preds = %next_check14 + br label %exit19 +exit19: ; preds = %match18, %next_check14, %exit9 + %array_cmp_phi20 = phi i1 [ false, %exit9 ], [ false, %next_check14 ], [ true, %match18 ] + %20 = zext i1 %array_cmp_phi20 to i8 + store i8 %20, ptr %x3, align 1 + %21 = load i8, ptr %a3, align 1 + %22 = trunc i8 %21 to i1 + %23 = load i8, ptr %b3, align 1 + %24 = trunc i8 %23 to i1 + %eq21 = icmp eq i1 %22, %24 + br i1 %eq21, label %next_check22, label %exit27 +next_check22: ; preds = %exit19 + %ptradd23 = getelementptr inbounds i8, ptr %a3, i64 1 + %ptradd24 = getelementptr inbounds i8, ptr %b3, i64 1 + %25 = load i8, ptr %ptradd23, align 1 + %26 = trunc i8 %25 to i1 + %27 = load i8, ptr %ptradd24, align 1 + %28 = trunc i8 %27 to i1 + %eq25 = icmp eq i1 %26, %28 + br i1 %eq25, label %match26, label %exit27 +match26: ; preds = %next_check22 + br label %exit27 +exit27: ; preds = %match26, %next_check22, %exit19 + %array_cmp_phi28 = phi i1 [ true, %exit19 ], [ true, %next_check22 ], [ false, %match26 ] + %29 = zext i1 %array_cmp_phi28 to i8 + store i8 %29, ptr %y3, align 1 ret void } define void @test.test2() #0 { @@ -223,20 +179,17 @@ entry: %a2 = alloca [200 x i32], align 16 %b2 = alloca [200 x i32], align 16 %x2 = alloca i8, align 1 - %cmp.idx12 = alloca i64, align 8 %y2 = alloca i8, align 1 - %cmp.idx22 = alloca i64, align 8 %a3 = alloca [200 x i8], align 16 %b3 = alloca [200 x i8], align 16 %x3 = alloca i8, align 1 - %cmp.idx32 = alloca i64, align 8 + %cmp.idx14 = alloca i64, align 8 %y3 = alloca i8, align 1 - %cmp.idx41 = alloca i64, align 8 + %cmp.idx23 = alloca i64, align 8 call void @aget(ptr %a) call void @aget(ptr %b) store i64 0, ptr %cmp.idx, align 8 br label %array_loop_start - array_loop_start: ; preds = %array_loop_comparison, %entry %0 = load i64, ptr %cmp.idx, align 8 %ptroffset = getelementptr inbounds [8 x i8], ptr %a, i64 %0 @@ -245,20 +198,17 @@ array_loop_start: ; preds = %array_loop_comparis %2 = load double, ptr %ptroffset1, align 8 %eq = fcmp oeq double %1, %2 br i1 %eq, label %array_loop_comparison, label %array_cmp_exit - array_loop_comparison: ; preds = %array_loop_start %inc = add i64 %0, 1 store i64 %inc, ptr %cmp.idx, align 8 %lt = icmp ult i64 %inc, 200 br i1 %lt, label %array_loop_start, label %array_cmp_exit - array_cmp_exit: ; preds = %array_loop_comparison, %array_loop_start %array_cmp_phi = phi i1 [ true, %array_loop_comparison ], [ false, %array_loop_start ] %3 = zext i1 %array_cmp_phi to i8 store i8 %3, ptr %x, align 1 store i64 0, ptr %cmp.idx2, align 8 br label %array_loop_start3 - array_loop_start3: ; preds = %array_loop_comparison7, %array_cmp_exit %4 = load i64, ptr %cmp.idx2, align 8 %ptroffset4 = getelementptr inbounds [8 x i8], ptr %a, i64 %4 @@ -267,112 +217,68 @@ array_loop_start3: ; preds = %array_loop_comparis %6 = load double, ptr %ptroffset5, align 8 %eq6 = fcmp oeq double %5, %6 br i1 %eq6, label %array_loop_comparison7, label %array_cmp_exit10 - array_loop_comparison7: ; preds = %array_loop_start3 %inc8 = add i64 %4, 1 store i64 %inc8, ptr %cmp.idx2, align 8 %lt9 = icmp ult i64 %inc8, 200 br i1 %lt9, label %array_loop_start3, label %array_cmp_exit10 - array_cmp_exit10: ; preds = %array_loop_comparison7, %array_loop_start3 %array_cmp_phi11 = phi i1 [ false, %array_loop_comparison7 ], [ true, %array_loop_start3 ] %7 = zext i1 %array_cmp_phi11 to i8 store i8 %7, ptr %y, align 1 call void @aget2(ptr %a2) call void @aget2(ptr %b2) - store i64 0, ptr %cmp.idx12, align 8 - br label %array_loop_start13 - -array_loop_start13: ; preds = %array_loop_comparison17, %array_cmp_exit10 - %8 = load i64, ptr %cmp.idx12, align 8 - %ptroffset14 = getelementptr inbounds [4 x i8], ptr %a2, i64 %8 - %ptroffset15 = getelementptr inbounds [4 x i8], ptr %b2, i64 %8 - %9 = load i32, ptr %ptroffset14, align 4 - %10 = load i32, ptr %ptroffset15, align 4 - %eq16 = icmp eq i32 %9, %10 - br i1 %eq16, label %array_loop_comparison17, label %array_cmp_exit20 - -array_loop_comparison17: ; preds = %array_loop_start13 - %inc18 = add i64 %8, 1 - store i64 %inc18, ptr %cmp.idx12, align 8 - %lt19 = icmp ult i64 %inc18, 200 - br i1 %lt19, label %array_loop_start13, label %array_cmp_exit20 - -array_cmp_exit20: ; preds = %array_loop_comparison17, %array_loop_start13 - %array_cmp_phi21 = phi i1 [ true, %array_loop_comparison17 ], [ false, %array_loop_start13 ] - %11 = zext i1 %array_cmp_phi21 to i8 - store i8 %11, ptr %x2, align 1 - store i64 0, ptr %cmp.idx22, align 8 - br label %array_loop_start23 - -array_loop_start23: ; preds = %array_loop_comparison27, %array_cmp_exit20 - %12 = load i64, ptr %cmp.idx22, align 8 - %ptroffset24 = getelementptr inbounds [4 x i8], ptr %a2, i64 %12 - %ptroffset25 = getelementptr inbounds [4 x i8], ptr %b2, i64 %12 - %13 = load i32, ptr %ptroffset24, align 4 - %14 = load i32, ptr %ptroffset25, align 4 - %eq26 = icmp eq i32 %13, %14 - br i1 %eq26, label %array_loop_comparison27, label %array_cmp_exit30 - -array_loop_comparison27: ; preds = %array_loop_start23 - %inc28 = add i64 %12, 1 - store i64 %inc28, ptr %cmp.idx22, align 8 - %lt29 = icmp ult i64 %inc28, 200 - br i1 %lt29, label %array_loop_start23, label %array_cmp_exit30 - -array_cmp_exit30: ; preds = %array_loop_comparison27, %array_loop_start23 - %array_cmp_phi31 = phi i1 [ false, %array_loop_comparison27 ], [ true, %array_loop_start23 ] - %15 = zext i1 %array_cmp_phi31 to i8 - store i8 %15, ptr %y2, align 1 + %cmp = call i32 @memcmp(ptr %a2, ptr %b2, i64 800) + %eq12 = icmp eq i32 %cmp, 0 + %8 = zext i1 %eq12 to i8 + store i8 %8, ptr %x2, align 1 + %cmp13 = call i32 @memcmp(ptr %a2, ptr %b2, i64 800) + %neq = icmp ne i32 %cmp13, 0 + %9 = zext i1 %neq to i8 + store i8 %9, ptr %y2, align 1 call void @aget3(ptr %a3) call void @aget3(ptr %b3) - store i64 0, ptr %cmp.idx32, align 8 - br label %array_loop_start33 - -array_loop_start33: ; preds = %array_loop_comparison36, %array_cmp_exit30 - %16 = load i64, ptr %cmp.idx32, align 8 - %ptradd = getelementptr inbounds i8, ptr %a3, i64 %16 - %ptradd34 = getelementptr inbounds i8, ptr %b3, i64 %16 - %17 = load i8, ptr %ptradd, align 1 + store i64 0, ptr %cmp.idx14, align 8 + br label %array_loop_start15 +array_loop_start15: ; preds = %array_loop_comparison18, %array_cmp_exit10 + %10 = load i64, ptr %cmp.idx14, align 8 + %ptradd = getelementptr inbounds i8, ptr %a3, i64 %10 + %ptradd16 = getelementptr inbounds i8, ptr %b3, i64 %10 + %11 = load i8, ptr %ptradd, align 1 + %12 = trunc i8 %11 to i1 + %13 = load i8, ptr %ptradd16, align 1 + %14 = trunc i8 %13 to i1 + %eq17 = icmp eq i1 %12, %14 + br i1 %eq17, label %array_loop_comparison18, label %array_cmp_exit21 +array_loop_comparison18: ; preds = %array_loop_start15 + %inc19 = add i64 %10, 1 + store i64 %inc19, ptr %cmp.idx14, align 8 + %lt20 = icmp ult i64 %inc19, 200 + br i1 %lt20, label %array_loop_start15, label %array_cmp_exit21 +array_cmp_exit21: ; preds = %array_loop_comparison18, %array_loop_start15 + %array_cmp_phi22 = phi i1 [ true, %array_loop_comparison18 ], [ false, %array_loop_start15 ] + %15 = zext i1 %array_cmp_phi22 to i8 + store i8 %15, ptr %x3, align 1 + store i64 0, ptr %cmp.idx23, align 8 + br label %array_loop_start24 +array_loop_start24: ; preds = %array_loop_comparison28, %array_cmp_exit21 + %16 = load i64, ptr %cmp.idx23, align 8 + %ptradd25 = getelementptr inbounds i8, ptr %a3, i64 %16 + %ptradd26 = getelementptr inbounds i8, ptr %b3, i64 %16 + %17 = load i8, ptr %ptradd25, align 1 %18 = trunc i8 %17 to i1 - %19 = load i8, ptr %ptradd34, align 1 + %19 = load i8, ptr %ptradd26, align 1 %20 = trunc i8 %19 to i1 - %eq35 = icmp eq i1 %18, %20 - br i1 %eq35, label %array_loop_comparison36, label %array_cmp_exit39 - -array_loop_comparison36: ; preds = %array_loop_start33 - %inc37 = add i64 %16, 1 - store i64 %inc37, ptr %cmp.idx32, align 8 - %lt38 = icmp ult i64 %inc37, 200 - br i1 %lt38, label %array_loop_start33, label %array_cmp_exit39 - -array_cmp_exit39: ; preds = %array_loop_comparison36, %array_loop_start33 - %array_cmp_phi40 = phi i1 [ true, %array_loop_comparison36 ], [ false, %array_loop_start33 ] - %21 = zext i1 %array_cmp_phi40 to i8 - store i8 %21, ptr %x3, align 1 - store i64 0, ptr %cmp.idx41, align 8 - br label %array_loop_start42 - -array_loop_start42: ; preds = %array_loop_comparison46, %array_cmp_exit39 - %22 = load i64, ptr %cmp.idx41, align 8 - %ptradd43 = getelementptr inbounds i8, ptr %a3, i64 %22 - %ptradd44 = getelementptr inbounds i8, ptr %b3, i64 %22 - %23 = load i8, ptr %ptradd43, align 1 - %24 = trunc i8 %23 to i1 - %25 = load i8, ptr %ptradd44, align 1 - %26 = trunc i8 %25 to i1 - %eq45 = icmp eq i1 %24, %26 - br i1 %eq45, label %array_loop_comparison46, label %array_cmp_exit49 - -array_loop_comparison46: ; preds = %array_loop_start42 - %inc47 = add i64 %22, 1 - store i64 %inc47, ptr %cmp.idx41, align 8 - %lt48 = icmp ult i64 %inc47, 200 - br i1 %lt48, label %array_loop_start42, label %array_cmp_exit49 - -array_cmp_exit49: ; preds = %array_loop_comparison46, %array_loop_start42 - %array_cmp_phi50 = phi i1 [ false, %array_loop_comparison46 ], [ true, %array_loop_start42 ] - %27 = zext i1 %array_cmp_phi50 to i8 - store i8 %27, ptr %y3, align 1 + %eq27 = icmp eq i1 %18, %20 + br i1 %eq27, label %array_loop_comparison28, label %array_cmp_exit31 +array_loop_comparison28: ; preds = %array_loop_start24 + %inc29 = add i64 %16, 1 + store i64 %inc29, ptr %cmp.idx23, align 8 + %lt30 = icmp ult i64 %inc29, 200 + br i1 %lt30, label %array_loop_start24, label %array_cmp_exit31 +array_cmp_exit31: ; preds = %array_loop_comparison28, %array_loop_start24 + %array_cmp_phi32 = phi i1 [ false, %array_loop_comparison28 ], [ true, %array_loop_start24 ] + %21 = zext i1 %array_cmp_phi32 to i8 + store i8 %21, ptr %y3, align 1 ret void } diff --git a/test/test_suite/arrays/array_comparison_2.c3t b/test/test_suite/arrays/array_comparison_2.c3t index 673f16b87..23370b44a 100644 --- a/test/test_suite/arrays/array_comparison_2.c3t +++ b/test/test_suite/arrays/array_comparison_2.c3t @@ -35,125 +35,52 @@ entry: store i32 0, ptr %ptradd4, align 4 %ptradd5 = getelementptr inbounds i8, ptr %ptradd4, i64 4 store i32 0, ptr %ptradd5, align 4 - %0 = load i32, ptr %x, align 4 - %1 = load i32, ptr %y, align 4 - %eq = icmp eq i32 %0, %1 + %cmp = call i32 @memcmp(ptr %x, ptr %y, i64 8) + %eq = icmp eq i32 %cmp, 0 br i1 %eq, label %next_check, label %exit - next_check: ; preds = %entry - %ptradd6 = getelementptr inbounds i8, ptr %x, i64 4 - %ptradd7 = getelementptr inbounds i8, ptr %y, i64 4 - %2 = load i32, ptr %ptradd6, align 4 - %3 = load i32, ptr %ptradd7, align 4 - %eq8 = icmp eq i32 %2, %3 - br i1 %eq8, label %match9, label %exit - -match9: ; preds = %next_check + %ptradd6 = getelementptr inbounds i8, ptr %x, i64 8 + %ptradd7 = getelementptr inbounds i8, ptr %y, i64 8 + %cmp8 = call i32 @memcmp(ptr %ptradd6, ptr %ptradd7, i64 8) + %eq9 = icmp eq i32 %cmp8, 0 + br i1 %eq9, label %match10, label %exit +match10: ; preds = %next_check br label %exit - -exit: ; preds = %match9, %next_check, %entry - %array_cmp_phi = phi i1 [ false, %entry ], [ false, %next_check ], [ true, %match9 ] - br i1 %array_cmp_phi, label %next_check10, label %exit22 - -next_check10: ; preds = %exit - %ptradd11 = getelementptr inbounds i8, ptr %x, i64 8 - %ptradd12 = getelementptr inbounds i8, ptr %y, i64 8 - %4 = load i32, ptr %ptradd11, align 4 - %5 = load i32, ptr %ptradd12, align 4 - %eq13 = icmp eq i32 %4, %5 - br i1 %eq13, label %next_check14, label %exit19 - -next_check14: ; preds = %next_check10 - %ptradd15 = getelementptr inbounds i8, ptr %ptradd11, i64 4 - %ptradd16 = getelementptr inbounds i8, ptr %ptradd12, i64 4 - %6 = load i32, ptr %ptradd15, align 4 - %7 = load i32, ptr %ptradd16, align 4 - %eq17 = icmp eq i32 %6, %7 - br i1 %eq17, label %match18, label %exit19 - -match18: ; preds = %next_check14 - br label %exit19 - -exit19: ; preds = %match18, %next_check14, %next_check10 - %array_cmp_phi20 = phi i1 [ false, %next_check10 ], [ false, %next_check14 ], [ true, %match18 ] - br i1 %array_cmp_phi20, label %match21, label %exit22 - -match21: ; preds = %exit19 - br label %exit22 - -exit22: ; preds = %match21, %exit19, %exit - %array_cmp_phi23 = phi i1 [ false, %exit ], [ false, %exit19 ], [ true, %match21 ] - %8 = zext i1 %array_cmp_phi23 to i8 - store i8 %8, ptr %match, align 1 +exit: ; preds = %match10, %next_check, %entry + %array_cmp_phi = phi i1 [ false, %entry ], [ false, %next_check ], [ true, %match10 ] + %0 = zext i1 %array_cmp_phi to i8 + store i8 %0, ptr %match, align 1 call void @llvm.memset.p0.i64(ptr align 16 %z, i8 0, i64 128, i1 false) call void @llvm.memset.p0.i64(ptr align 16 %w, i8 0, i64 128, i1 false) store i64 0, ptr %cmp.idx, align 8 br label %array_loop_start - -array_loop_start: ; preds = %array_loop_comparison, %exit22 - %9 = load i64, ptr %cmp.idx, align 8 - %ptroffset = getelementptr inbounds [16 x i8], ptr %z, i64 %9 - %ptroffset24 = getelementptr inbounds [16 x i8], ptr %w, i64 %9 - %10 = load i32, ptr %ptroffset, align 4 - %11 = load i32, ptr %ptroffset24, align 4 - %eq25 = icmp eq i32 %10, %11 - br i1 %eq25, label %next_check26, label %exit31 - -next_check26: ; preds = %array_loop_start - %ptradd27 = getelementptr inbounds i8, ptr %ptroffset, i64 4 - %ptradd28 = getelementptr inbounds i8, ptr %ptroffset24, i64 4 - %12 = load i32, ptr %ptradd27, align 4 - %13 = load i32, ptr %ptradd28, align 4 - %eq29 = icmp eq i32 %12, %13 - br i1 %eq29, label %match30, label %exit31 - -match30: ; preds = %next_check26 - br label %exit31 - -exit31: ; preds = %match30, %next_check26, %array_loop_start - %array_cmp_phi32 = phi i1 [ false, %array_loop_start ], [ false, %next_check26 ], [ true, %match30 ] - br i1 %array_cmp_phi32, label %next_check33, label %exit45 - -next_check33: ; preds = %exit31 - %ptradd34 = getelementptr inbounds i8, ptr %ptroffset, i64 8 - %ptradd35 = getelementptr inbounds i8, ptr %ptroffset24, i64 8 - %14 = load i32, ptr %ptradd34, align 4 - %15 = load i32, ptr %ptradd35, align 4 - %eq36 = icmp eq i32 %14, %15 - br i1 %eq36, label %next_check37, label %exit42 - -next_check37: ; preds = %next_check33 - %ptradd38 = getelementptr inbounds i8, ptr %ptradd34, i64 4 - %ptradd39 = getelementptr inbounds i8, ptr %ptradd35, i64 4 - %16 = load i32, ptr %ptradd38, align 4 - %17 = load i32, ptr %ptradd39, align 4 - %eq40 = icmp eq i32 %16, %17 - br i1 %eq40, label %match41, label %exit42 - -match41: ; preds = %next_check37 - br label %exit42 - -exit42: ; preds = %match41, %next_check37, %next_check33 - %array_cmp_phi43 = phi i1 [ false, %next_check33 ], [ false, %next_check37 ], [ true, %match41 ] - br i1 %array_cmp_phi43, label %match44, label %exit45 - -match44: ; preds = %exit42 - br label %exit45 - -exit45: ; preds = %match44, %exit42, %exit31 - %array_cmp_phi46 = phi i1 [ false, %exit31 ], [ false, %exit42 ], [ true, %match44 ] - br i1 %array_cmp_phi46, label %array_loop_comparison, label %array_cmp_exit - -array_loop_comparison: ; preds = %exit45 - %inc = add i64 %9, 1 +array_loop_start: ; preds = %array_loop_comparison, %exit + %1 = load i64, ptr %cmp.idx, align 8 + %ptroffset = getelementptr inbounds [16 x i8], ptr %z, i64 %1 + %ptroffset11 = getelementptr inbounds [16 x i8], ptr %w, i64 %1 + %cmp12 = call i32 @memcmp(ptr %ptroffset, ptr %ptroffset11, i64 8) + %eq13 = icmp eq i32 %cmp12, 0 + br i1 %eq13, label %next_check14, label %exit20 +next_check14: ; preds = %array_loop_start + %ptradd15 = getelementptr inbounds i8, ptr %ptroffset, i64 8 + %ptradd16 = getelementptr inbounds i8, ptr %ptroffset11, i64 8 + %cmp17 = call i32 @memcmp(ptr %ptradd15, ptr %ptradd16, i64 8) + %eq18 = icmp eq i32 %cmp17, 0 + br i1 %eq18, label %match19, label %exit20 +match19: ; preds = %next_check14 + br label %exit20 +exit20: ; preds = %match19, %next_check14, %array_loop_start + %array_cmp_phi21 = phi i1 [ false, %array_loop_start ], [ false, %next_check14 ], [ true, %match19 ] + br i1 %array_cmp_phi21, label %array_loop_comparison, label %array_cmp_exit +array_loop_comparison: ; preds = %exit20 + %inc = add i64 %1, 1 store i64 %inc, ptr %cmp.idx, align 8 %lt = icmp ult i64 %inc, 8 br i1 %lt, label %array_loop_start, label %array_cmp_exit - -array_cmp_exit: ; preds = %array_loop_comparison, %exit45 - %array_cmp_phi47 = phi i1 [ true, %array_loop_comparison ], [ false, %exit45 ] - %18 = zext i1 %array_cmp_phi47 to i8 - store i8 %18, ptr %match, align 1 +array_cmp_exit: ; preds = %array_loop_comparison, %exit20 + %array_cmp_phi22 = phi i1 [ true, %array_loop_comparison ], [ false, %exit20 ] + %2 = zext i1 %array_cmp_phi22 to i8 + store i8 %2, ptr %match, align 1 ret void } diff --git a/test/test_suite/vector/gather_scatter.c3t b/test/test_suite/vector/gather_scatter.c3t index 6b229cbed..6093ff84e 100644 --- a/test/test_suite/vector/gather_scatter.c3t +++ b/test/test_suite/vector/gather_scatter.c3t @@ -26,6 +26,5 @@ fn void main() %14 = load <2 x ptr>, ptr %ptrvec3, align 16 call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> , <2 x ptr> %14, i32 4, <2 x i1> ) - - %26 = load <2 x ptr>, ptr %ptrvec20, align 16 - call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> , <2 x ptr> %26, i32 4, <2 x i1> ) + %16 = load <2 x ptr>, ptr %ptrvec5, align 16 + call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> , <2 x ptr> %16, i32 4, <2 x i1> )