From 9942be54dc0815bd7fb94bf3e1f56dcfc8d1f3ad Mon Sep 17 00:00:00 2001
From: Christoffer Lerno <christoffer@aegik.com>
Date: Thu, 21 Oct 2021 23:37:14 +0200
Subject: [PATCH] More optimized memclear. Added helper function on array gep.

---
 src/compiler/copying.c                        |   4 +-
 src/compiler/float.c                          |  14 ++
 src/compiler/lexer.c                          |   2 -
 src/compiler/llvm_codegen.c                   |  20 ++-
 src/compiler/llvm_codegen_expr.c              |  19 ++-
 src/compiler/llvm_codegen_internal.h          |   3 +-
 src/compiler/llvm_codegen_stmt.c              |   4 +-
 src/compiler/llvm_codegen_type.c              |   2 +
 test/test_suite/abi/literal_load.c3t          |  24 +--
 test/test_suite/abi/literal_load_aarch64.c3t  |  28 ++--
 test/test_suite/abi/literal_load_mingw.c3t    |  22 +--
 test/test_suite/abi/pass_large_aarch.c3t      |   2 +-
 .../arrays/global_array_non_const.c3          |   9 +-
 test/test_suite/methods/extension_method.c3t  |   4 +-
 .../struct/struct_codegen_empty.c3t           |  21 +--
 test/test_suite/union/union_codegen_empty.c3t |  37 ++---
 .../union/union_codegen_overwrite_call.c3t    |  16 +-
 test/test_suite/vector/vector_bit.c3t         | 153 +++++++++---------
 18 files changed, 214 insertions(+), 170 deletions(-)

diff --git a/src/compiler/copying.c b/src/compiler/copying.c
index 3c313360b..efad697b3 100644
--- a/src/compiler/copying.c
+++ b/src/compiler/copying.c
@@ -98,8 +98,8 @@ Expr *copy_expr(Expr *source_expr)
 		case EXPR_PLACEHOLDER:
 		case EXPR_CONST_IDENTIFIER:
 		case EXPR_CT_IDENT:
+		case EXPR_IDENTIFIER:
 		case EXPR_HASH_IDENT:
-			// TODO
 			return expr;
 		case EXPR_MACRO_EXPANSION:
 			MACRO_COPY_EXPR(expr->macro_expansion_expr.inner);
@@ -182,8 +182,6 @@ Expr *copy_expr(Expr *source_expr)
 		case EXPR_TYPEID:
 			MACRO_COPY_TYPE(expr->typeid_expr);
 			return expr;
-		case EXPR_IDENTIFIER:
-			return expr;
 		case EXPR_CALL:
 			if (expr->resolve_status != RESOLVE_DONE || expr->call_expr.is_pointer_call)
 			{
diff --git a/src/compiler/float.c b/src/compiler/float.c
index ce8f3518c..b2e1442cc 100644
--- a/src/compiler/float.c
+++ b/src/compiler/float.c
@@ -95,6 +95,13 @@ static char *err_invalid_float_width = "The float width is not valid, it must be
 static char *err_float_out_of_range = "The float value is out of range.";
 static char *err_float_format_invalid = "The float format is invalid.";
 
+/**
+ * This parses a float from a string. Unfortunately it is limited to parsing doubles as of now.
+ *
+ * @param string
+ * @param error
+ * @return the resulting Float, with type = TYPE_POISONED on error.
+ */
 Float float_from_string(const char *string, char **error)
 {
 	const char *index = string;
@@ -180,6 +187,13 @@ Float float_from_string(const char *string, char **error)
 	return (Float){ d, kind };
 }
 
+/**
+ * This parses a float from hex. Unfortunately it is limited to parsing doubles as of now.
+ *
+ * @param string
+ * @param error
+ * @return the resulting Float, with type = TYPE_POISONED on error.
+ */
 Float float_from_hex(const char *string, char **error)
 {
 	const char *index = string + 2;
diff --git a/src/compiler/lexer.c b/src/compiler/lexer.c
index abda788e0..8ea04c938 100644
--- a/src/compiler/lexer.c
+++ b/src/compiler/lexer.c
@@ -476,7 +476,6 @@ static inline bool scan_hex(Lexer *lexer)
 		if (!scan_exponent(lexer)) return false;
 	}
 	if (prev(lexer) == '_') return add_error_token(lexer, "The number ended with '_', but that character needs to be between, not after, digits.");
-	// TODO this does not currently work.
 	if (!scan_number_suffix(lexer, &is_float)) return false;
 	if (is_float)
 	{
@@ -537,7 +536,6 @@ static inline bool scan_dec(Lexer *lexer)
 	}
 
 	if (prev(lexer) == '_') return add_error_token(lexer, "The number ended with '_', but that character needs to be between, not after, digits.");
-	// TODO this does not currently work.
 	if (!scan_number_suffix(lexer, &is_float)) return false;
 	if (is_float)
 	{
diff --git a/src/compiler/llvm_codegen.c b/src/compiler/llvm_codegen.c
index 929d9fe45..f0fac26ce 100644
--- a/src/compiler/llvm_codegen.c
+++ b/src/compiler/llvm_codegen.c
@@ -64,11 +64,25 @@ LLVMValueRef llvm_emit_memclear_size_align(GenContext *c, LLVMValueRef ref, uint
 
 }
 
-LLVMValueRef llvm_emit_memclear(GenContext *c, BEValue *ref)
+void llvm_emit_memclear(GenContext *c, BEValue *ref)
 {
-	// TODO avoid bitcast on those that do not need them.
 	llvm_value_addr(c, ref);
-	return llvm_emit_memclear_size_align(c, ref->value, type_size(ref->type), ref->alignment, true);
+	Type *type = ref->type;
+	if (!type_is_abi_aggregate(type))
+	{
+		llvm_store_bevalue_raw(c, ref, llvm_get_zero(c, type));
+		return;
+	}
+	Type *single_type = type_abi_find_single_struct_element(type);
+
+	if (single_type && !type_is_abi_aggregate(single_type))
+	{
+		BEValue element;
+		llvm_value_set_address_align(&element, llvm_emit_bitcast(c, ref->value, type_get_ptr(single_type)), single_type, ref->alignment);
+		llvm_emit_memclear(c, &element);
+		return;
+	}
+	llvm_emit_memclear_size_align(c, ref->value, type_size(ref->type), ref->alignment, true);
 }
 
 LLVMValueRef llvm_emit_const_array_padding(LLVMTypeRef element_type, IndexDiff diff, bool *modified)
diff --git a/src/compiler/llvm_codegen_expr.c b/src/compiler/llvm_codegen_expr.c
index 22fdfabb6..0c609197e 100644
--- a/src/compiler/llvm_codegen_expr.c
+++ b/src/compiler/llvm_codegen_expr.c
@@ -3163,8 +3163,23 @@ static void llvm_expand_type_to_args(GenContext *context, Type *param_type, LLVM
 LLVMValueRef llvm_emit_struct_gep_raw(GenContext *context, LLVMValueRef ptr, LLVMTypeRef struct_type, unsigned index, unsigned struct_alignment, unsigned offset, unsigned *alignment)
 {
 	*alignment = type_min_alignment(offset, struct_alignment);
-	LLVMValueRef addr = LLVMBuildStructGEP2(context->builder, struct_type, ptr, index, "");
-	return addr;
+	if (LLVMIsConstant(ptr))
+	{
+		LLVMValueRef idx[2] = { llvm_get_zero(context, type_int), llvm_const_int(context, type_int, index) };
+		return LLVMConstInBoundsGEP(ptr, idx, 2);
+	}
+	return LLVMBuildStructGEP2(context->builder, struct_type, ptr, index, "");
+}
+
+LLVMValueRef llvm_emit_array_gep_raw(GenContext *c, LLVMValueRef ptr, LLVMTypeRef array_type, unsigned index, unsigned array_alignment, unsigned *alignment)
+{
+	*alignment = type_min_alignment(llvm_store_size(c, LLVMGetElementType(array_type)), array_alignment);
+	LLVMValueRef idx[2] = { llvm_get_zero(c, type_int), llvm_const_int(c, type_int, index) };
+	if (LLVMIsConstant(ptr))
+	{
+		return LLVMConstInBoundsGEP(ptr, idx, 2);
+	}
+	return LLVMBuildInBoundsGEP2(c->builder, array_type, ptr, idx, 2, "");
 }
 
 void llvm_emit_subarray_len(GenContext *c, BEValue *subarray, BEValue *len)
diff --git a/src/compiler/llvm_codegen_internal.h b/src/compiler/llvm_codegen_internal.h
index 739189184..ccbb83a4b 100644
--- a/src/compiler/llvm_codegen_internal.h
+++ b/src/compiler/llvm_codegen_internal.h
@@ -254,7 +254,7 @@ void llvm_emit_local_var_alloca(GenContext *c, Decl *decl);
 LLVMValueRef llvm_emit_local_decl(GenContext *c, Decl *decl);
 LLVMValueRef llvm_emit_aggregate_value(GenContext *c, Type *type, ...);
 LLVMValueRef llvm_emit_memclear_size_align(GenContext *c, LLVMValueRef ref, uint64_t size, unsigned align, bool bitcast);
-LLVMValueRef llvm_emit_memclear(GenContext *c, BEValue *ref);
+void llvm_emit_memclear(GenContext *c, BEValue *ref);
 void llvm_emit_memcpy(GenContext *c, LLVMValueRef dest, unsigned dest_align, LLVMValueRef source, unsigned src_align, uint64_t len);
 void llvm_emit_memcpy_to_decl(GenContext *c, Decl *decl, LLVMValueRef source, unsigned source_alignment);
 void llvm_emit_stmt(GenContext *c, Ast *ast);
@@ -266,6 +266,7 @@ void llvm_emit_debug_output(GenContext *c, const char *message, const char *file
 void llvm_emit_return_abi(GenContext *c, BEValue *return_value, BEValue *failable);
 void llvm_emit_return_implicit(GenContext *c);
 LLVMValueRef llvm_emit_struct_gep_raw(GenContext *context, LLVMValueRef ptr, LLVMTypeRef struct_type, unsigned index, unsigned struct_alignment, unsigned offset, unsigned *alignment);
+LLVMValueRef llvm_emit_array_gep_raw(GenContext *c, LLVMValueRef ptr, LLVMTypeRef array_type, unsigned index, unsigned array_alignment, unsigned *alignment);
 void llvm_emit_subarray_len(GenContext *context, BEValue *subarray, BEValue *len);
 void llvm_emit_subarray_pointer(GenContext *context, BEValue *subarray, BEValue *pointer);
 LLVMValueRef llvm_get_next_param(GenContext *context, unsigned *index);
diff --git a/src/compiler/llvm_codegen_stmt.c b/src/compiler/llvm_codegen_stmt.c
index 86ff99428..9d1eb45eb 100644
--- a/src/compiler/llvm_codegen_stmt.c
+++ b/src/compiler/llvm_codegen_stmt.c
@@ -105,7 +105,9 @@ LLVMValueRef llvm_emit_local_decl(GenContext *c, Decl *decl)
 		}
 		else
 		{
-			llvm_emit_memclear_size_align(c, decl->backend_ref, type_size(decl->type), decl->alignment, true);
+			BEValue value;
+			llvm_value_set_decl_address(&value, decl);
+			llvm_emit_memclear(c, &value);
 		}
 	}
 	return decl->backend_ref;
diff --git a/src/compiler/llvm_codegen_type.c b/src/compiler/llvm_codegen_type.c
index d8c1ed9a0..2dbd9f634 100644
--- a/src/compiler/llvm_codegen_type.c
+++ b/src/compiler/llvm_codegen_type.c
@@ -93,6 +93,7 @@ static inline LLVMTypeRef llvm_type_from_decl(GenContext *c, Decl *decl)
 	}
 	UNREACHABLE
 }
+
 static inline LLVMTypeRef llvm_type_from_ptr(GenContext *context, Type *type)
 {
 	if (type->canonical != type)
@@ -106,6 +107,7 @@ static inline LLVMTypeRef llvm_type_from_ptr(GenContext *context, Type *type)
 	return type->backend_type = LLVMPointerType(llvm_get_type(context, type->pointer), /** TODO **/0);
 }
 
+
 static inline LLVMTypeRef llvm_type_from_array(GenContext *context, Type *type)
 {
 	if (type->canonical != type)
diff --git a/test/test_suite/abi/literal_load.c3t b/test/test_suite/abi/literal_load.c3t
index 888fdf2eb..9ee9ec911 100644
--- a/test/test_suite/abi/literal_load.c3t
+++ b/test/test_suite/abi/literal_load.c3t
@@ -17,15 +17,15 @@ func Test creator()
 
 // #expect: literal_load.ll
 
-  %literal = alloca %Test, align 4
-  %literal1 = alloca %Test, align 4
-  %0 = bitcast %Test* %literal to i8*
-  call void @llvm.memset.p0i8.i64(i8* align 4 %0, i8 0, i64 4, i1 false)
-  %dive = getelementptr inbounds %Test, %Test* %literal, i32 0, i32 0
-  %1 = load i32, i32* %dive, align 4
-  call void @blorg(i32 %1)
-  %2 = bitcast %Test* %literal1 to i8*
-  call void @llvm.memset.p0i8.i64(i8* align 4 %2, i8 0, i64 4, i1 false)
-  %dive2 = getelementptr inbounds %Test, %Test* %literal1, i32 0, i32 0
-  %3 = load i32, i32* %dive2, align 4
-  ret i32 %3
\ No newline at end of file
+    %literal = alloca %Test, align 4
+    %literal1 = alloca %Test, align 4
+    %0 = bitcast %Test* %literal to i32*
+    store i32 0, i32* %0, align 4
+    %dive = getelementptr inbounds %Test, %Test* %literal, i32 0, i32 0
+    %1 = load i32, i32* %dive, align 4
+    call void @blorg(i32 %1)
+    %2 = bitcast %Test* %literal1 to i32*
+    store i32 0, i32* %2, align 4
+    %dive2 = getelementptr inbounds %Test, %Test* %literal1, i32 0, i32 0
+    %3 = load i32, i32* %dive2, align 4
+    ret i32 %3
\ No newline at end of file
diff --git a/test/test_suite/abi/literal_load_aarch64.c3t b/test/test_suite/abi/literal_load_aarch64.c3t
index 4bea52ddc..95a0994e4 100644
--- a/test/test_suite/abi/literal_load_aarch64.c3t
+++ b/test/test_suite/abi/literal_load_aarch64.c3t
@@ -20,17 +20,17 @@ func Test creator()
 
 declare void @blorg(i64)
 
-  %literal = alloca %Test, align 4
-  %literal1 = alloca %Test, align 4
-  %0 = bitcast %Test* %literal to i8*
-  call void @llvm.memset.p0i8.i64(i8* align 4 %0, i8 0, i64 4, i1 false)
-  %dive = getelementptr inbounds %Test, %Test* %literal, i32 0, i32 0
-  %1 = load i32, i32* %dive, align 4
-  %2 = zext i32 %1 to i64
-  call void @blorg(i64 %2)
-  %3 = bitcast %Test* %literal1 to i8*
-  call void @llvm.memset.p0i8.i64(i8* align 4 %3, i8 0, i64 4, i1 false)
-  %dive2 = getelementptr inbounds %Test, %Test* %literal1, i32 0, i32 0
-  %4 = load i32, i32* %dive2, align 4
-  %5 = zext i32 %4 to i64
-  ret i64 %5
\ No newline at end of file
+    %literal = alloca %Test, align 4
+    %literal1 = alloca %Test, align 4
+    %0 = bitcast %Test* %literal to i32*
+    store i32 0, i32* %0, align 4
+    %dive = getelementptr inbounds %Test, %Test* %literal, i32 0, i32 0
+    %1 = load i32, i32* %dive, align 4
+    %2 = zext i32 %1 to i64
+    call void @blorg(i64 %2)
+    %3 = bitcast %Test* %literal1 to i32*
+    store i32 0, i32* %3, align 4
+    %dive2 = getelementptr inbounds %Test, %Test* %literal1, i32 0, i32 0
+    %4 = load i32, i32* %dive2, align 4
+    %5 = zext i32 %4 to i64
+    ret i64 %5
\ No newline at end of file
diff --git a/test/test_suite/abi/literal_load_mingw.c3t b/test/test_suite/abi/literal_load_mingw.c3t
index 6bfad3154..aecadf19d 100644
--- a/test/test_suite/abi/literal_load_mingw.c3t
+++ b/test/test_suite/abi/literal_load_mingw.c3t
@@ -19,14 +19,14 @@ func Test creator()
 // #expect: literal_load.ll
 
   %literal = alloca %Test, align 4
-  %literal1 = alloca %Test, align 4
-  %0 = bitcast %Test* %literal to i8*
-  call void @llvm.memset.p0i8.i64(i8* align 4 %0, i8 0, i64 4, i1 false)
-  %dive = getelementptr inbounds %Test, %Test* %literal, i32 0, i32 0
-  %1 = load i32, i32* %dive, align 4
-  call void @blorg(i32 %1)
-  %2 = bitcast %Test* %literal1 to i8*
-  call void @llvm.memset.p0i8.i64(i8* align 4 %2, i8 0, i64 4, i1 false)
-  %dive2 = getelementptr inbounds %Test, %Test* %literal1, i32 0, i32 0
-  %3 = load i32, i32* %dive2, align 4
-  ret i32 %3
\ No newline at end of file
+    %literal1 = alloca %Test, align 4
+    %0 = bitcast %Test* %literal to i32*
+    store i32 0, i32* %0, align 4
+    %dive = getelementptr inbounds %Test, %Test* %literal, i32 0, i32 0
+    %1 = load i32, i32* %dive, align 4
+    call void @blorg(i32 %1)
+    %2 = bitcast %Test* %literal1 to i32*
+    store i32 0, i32* %2, align 4
+    %dive2 = getelementptr inbounds %Test, %Test* %literal1, i32 0, i32 0
+    %3 = load i32, i32* %dive2, align 4
+    ret i32 %3
\ No newline at end of file
diff --git a/test/test_suite/abi/pass_large_aarch.c3t b/test/test_suite/abi/pass_large_aarch.c3t
index 75fbf237a..300f94132 100644
--- a/test/test_suite/abi/pass_large_aarch.c3t
+++ b/test/test_suite/abi/pass_large_aarch.c3t
@@ -17,7 +17,7 @@ func void example()
     pass_large(l);
 }
 
-// #expect: pass_large.ll
+/* #expect: pass_large.ll
 
 define void @pass_large.example()
 entry:
diff --git a/test/test_suite/arrays/global_array_non_const.c3 b/test/test_suite/arrays/global_array_non_const.c3
index 406bc9f87..f31803328 100644
--- a/test/test_suite/arrays/global_array_non_const.c3
+++ b/test/test_suite/arrays/global_array_non_const.c3
@@ -1,10 +1,9 @@
 
-// TODO
-//const int CONSTANT = 1;
-//int[CONSTANT] a;
+const int CONSTANT = 1;
+int[CONSTANT] a2;
 
-//const bool B = true;
-//i32[B] c;      // @error{size of array has non-integer type 'bool'}
+const bool B = true;
+int[B] c2;      // #error: Expected an integer size.
 
 int non_constant = 10;
 int[non_constant] b;        // #error: Expected a constant value as
diff --git a/test/test_suite/methods/extension_method.c3t b/test/test_suite/methods/extension_method.c3t
index 2e8922da0..c88790923 100644
--- a/test/test_suite/methods/extension_method.c3t
+++ b/test/test_suite/methods/extension_method.c3t
@@ -31,7 +31,7 @@ declare void @foo.Bar__test(%Bar*)
 define void @main()
 entry:
   %bar = alloca %Bar, align 4
-  %0 = bitcast %Bar* %bar to i8*
-  call void @llvm.memset.p0i8.i64(i8* align 4 %0, i8 0, i64 4, i1 false)
+  %0 = bitcast %Bar* %bar to i32*
+  store i32 0, i32* %0, align 4
   call void @foo.Bar__test(%Bar* %bar)
   ret void
diff --git a/test/test_suite/struct/struct_codegen_empty.c3t b/test/test_suite/struct/struct_codegen_empty.c3t
index bf1318cc3..cc2a738c5 100644
--- a/test/test_suite/struct/struct_codegen_empty.c3t
+++ b/test/test_suite/struct/struct_codegen_empty.c3t
@@ -27,13 +27,14 @@ func void test()
   %b = alloca %StructB, align 4
   %b2 = alloca %StructB, align 4
   %b3 = alloca %StructB, align 4
-  %0 = bitcast %StructA* %a to i8*
-  call void @llvm.memset.p0i8.i64(i8* align 4 %0, i8 0, i64 4, i1 false)
-  %1 = bitcast %StructA* %a2 to i8*
-  call void @llvm.memset.p0i8.i64(i8* align 4 %1, i8 0, i64 4, i1 false)
-  %2 = bitcast %StructB* %b to i8*
-  call void @llvm.memset.p0i8.i64(i8* align 4 %2, i8 0, i64 4, i1 false)
-  %3 = bitcast %StructB* %b2 to i8*
-  call void @llvm.memset.p0i8.i64(i8* align 4 %3, i8 0, i64 4, i1 false)
-  %4 = bitcast %StructB* %b3 to i8*
-  call void @llvm.memset.p0i8.i64(i8* align 4 %4, i8 0, i64 4, i1 false)
\ No newline at end of file
+  %0 = bitcast %StructA* %a to i32*
+  store i32 0, i32* %0, align 4
+  %1 = bitcast %StructA* %a2 to i32*
+  store i32 0, i32* %1, align 4
+  %2 = bitcast %StructB* %b to i32*
+  store i32 0, i32* %2, align 4
+  %3 = bitcast %StructB* %b2 to i32*
+  store i32 0, i32* %3, align 4
+  %4 = bitcast %StructB* %b3 to i32*
+  store i32 0, i32* %4, align 4
+  ret void
\ No newline at end of file
diff --git a/test/test_suite/union/union_codegen_empty.c3t b/test/test_suite/union/union_codegen_empty.c3t
index 53384beab..75cac11ff 100644
--- a/test/test_suite/union/union_codegen_empty.c3t
+++ b/test/test_suite/union/union_codegen_empty.c3t
@@ -25,21 +25,22 @@ func void test()
 
 // #expect: union_codegen_empty.ll
 
-%a = alloca %UnionA, align 4
-%a2 = alloca %UnionA, align 4
-%b = alloca %UnionB, align 8
-%b2 = alloca %UnionB, align 8
-%b3 = alloca %UnionB, align 8
-%b4 = alloca %UnionB, align 8
-%0 = bitcast %UnionA* %a to i8*
-call void @llvm.memset.p0i8.i64(i8* align 4 %0, i8 0, i64 4, i1 false)
-%1 = bitcast %UnionA* %a2 to i8*
-call void @llvm.memset.p0i8.i64(i8* align 4 %1, i8 0, i64 4, i1 false)
-%2 = bitcast %UnionB* %b to i8*
-call void @llvm.memset.p0i8.i64(i8* align 8 %2, i8 0, i64 8, i1 false)
- %3 = bitcast %UnionB* %b2 to i8*
-call void @llvm.memset.p0i8.i64(i8* align 8 %3, i8 0, i64 8, i1 false)
-%4 = bitcast %UnionB* %b3 to i8*
-call void @llvm.memset.p0i8.i64(i8* align 8 %4, i8 0, i64 8, i1 false)
-%5 = bitcast %UnionB* %b4 to i8*
-call void @llvm.memset.p0i8.i64(i8* align 8 %5, i8 0, i64 8, i1 false)
+  %a = alloca %UnionA, align 4
+  %a2 = alloca %UnionA, align 4
+  %b = alloca %UnionB, align 8
+  %b2 = alloca %UnionB, align 8
+  %b3 = alloca %UnionB, align 8
+  %b4 = alloca %UnionB, align 8
+  %0 = bitcast %UnionA* %a to i32*
+  store i32 0, i32* %0, align 4
+  %1 = bitcast %UnionA* %a2 to i32*
+  store i32 0, i32* %1, align 4
+  %2 = bitcast %UnionB* %b to i8*
+  call void @llvm.memset.p0i8.i64(i8* align 8 %2, i8 0, i64 8, i1 false)
+  %3 = bitcast %UnionB* %b2 to i8*
+  call void @llvm.memset.p0i8.i64(i8* align 8 %3, i8 0, i64 8, i1 false)
+  %4 = bitcast %UnionB* %b3 to i8*
+  call void @llvm.memset.p0i8.i64(i8* align 8 %4, i8 0, i64 8, i1 false)
+  %5 = bitcast %UnionB* %b4 to i8*
+  call void @llvm.memset.p0i8.i64(i8* align 8 %5, i8 0, i64 8, i1 false)
+  ret void
diff --git a/test/test_suite/union/union_codegen_overwrite_call.c3t b/test/test_suite/union/union_codegen_overwrite_call.c3t
index 16a76194c..5e82f38c7 100644
--- a/test/test_suite/union/union_codegen_overwrite_call.c3t
+++ b/test/test_suite/union/union_codegen_overwrite_call.c3t
@@ -20,12 +20,12 @@ func void test()
 // #expect: test.ll
 
 entry:
-%b = alloca %UnionB, align 8
-%0 = bitcast %UnionB* %b to i32*
-%1 = call i32 @bar()
-store i32 %1, i32* %0, align 8
-%2 = bitcast %UnionB* %b to %b*
-%3 = bitcast %b* %2 to i8*
-call void @llvm.memset.p0i8.i64(i8* align 8 %3, i8 0, i64 4, i1 false)
-ret void
+  %b = alloca %UnionB, align 8
+  %0 = bitcast %UnionB* %b to i32*
+  %1 = call i32 @bar()
+  store i32 %1, i32* %0, align 8
+  %2 = bitcast %UnionB* %b to %b*
+  %3 = bitcast %b* %2 to i32*
+  store i32 0, i32* %3, align 8
+  ret void
 
diff --git a/test/test_suite/vector/vector_bit.c3t b/test/test_suite/vector/vector_bit.c3t
index 2de0c7840..3f6efa448 100644
--- a/test/test_suite/vector/vector_bit.c3t
+++ b/test/test_suite/vector/vector_bit.c3t
@@ -68,90 +68,89 @@ entry:
   %w = alloca <4 x i32>, align 16
   store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32>* %y, align 16
   store <4 x i32> <i32 2, i32 3, i32 13, i32 -100>, <4 x i32>* %z, align 16
-  %0 = bitcast <4 x i32>* %w to i8*
-  call void @llvm.memset.p0i8.i64(i8* align 16 %0, i8 0, i64 16, i1 false)
-  %1 = load <4 x i32>, <4 x i32>* %y, align 16
-  %2 = load <4 x i32>, <4 x i32>* %z, align 16
-  %and = and <4 x i32> %1, %2
+  store <4 x i32> zeroinitializer, <4 x i32>* %w, align 16
+  %0 = load <4 x i32>, <4 x i32>* %y, align 16
+  %1 = load <4 x i32>, <4 x i32>* %z, align 16
+  %and = and <4 x i32> %0, %1
   store <4 x i32> %and, <4 x i32>* %w, align 16
-  %3 = load <4 x i32>, <4 x i32>* %w, align 16
-  %4 = extractelement <4 x i32> %3, i64 0
-  %5 = load <4 x i32>, <4 x i32>* %w, align 16
-  %6 = extractelement <4 x i32> %5, i64 1
-  %7 = load <4 x i32>, <4 x i32>* %w, align 16
-  %8 = extractelement <4 x i32> %7, i64 2
-  %9 = load <4 x i32>, <4 x i32>* %w, align 16
-  %10 = extractelement <4 x i32> %9, i64 3
-  %11 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([17 x i8], [17 x i8]* @.str.1, i32 0, i32 0), i32 %4, i32 %6, i32 %8, i32 %10)
-  %12 = load <4 x i32>, <4 x i32>* %y, align 16
-  %13 = load <4 x i32>, <4 x i32>* %z, align 16
-  %or = or <4 x i32> %12, %13
+  %2 = load <4 x i32>, <4 x i32>* %w, align 16
+  %3 = extractelement <4 x i32> %2, i64 0
+  %4 = load <4 x i32>, <4 x i32>* %w, align 16
+  %5 = extractelement <4 x i32> %4, i64 1
+  %6 = load <4 x i32>, <4 x i32>* %w, align 16
+  %7 = extractelement <4 x i32> %6, i64 2
+  %8 = load <4 x i32>, <4 x i32>* %w, align 16
+  %9 = extractelement <4 x i32> %8, i64 3
+  %10 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([17 x i8], [17 x i8]* @.str.1, i32 0, i32 0), i32 %3, i32 %5, i32 %7, i32 %9)
+  %11 = load <4 x i32>, <4 x i32>* %y, align 16
+  %12 = load <4 x i32>, <4 x i32>* %z, align 16
+  %or = or <4 x i32> %11, %12
   store <4 x i32> %or, <4 x i32>* %w, align 16
-  %14 = load <4 x i32>, <4 x i32>* %w, align 16
-  %15 = extractelement <4 x i32> %14, i64 0
-  %16 = load <4 x i32>, <4 x i32>* %w, align 16
-  %17 = extractelement <4 x i32> %16, i64 1
-  %18 = load <4 x i32>, <4 x i32>* %w, align 16
-  %19 = extractelement <4 x i32> %18, i64 2
-  %20 = load <4 x i32>, <4 x i32>* %w, align 16
-  %21 = extractelement <4 x i32> %20, i64 3
-  %22 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str.2, i32 0, i32 0), i32 %15, i32 %17, i32 %19, i32 %21)
-  %23 = load <4 x i32>, <4 x i32>* %y, align 16
-  %24 = load <4 x i32>, <4 x i32>* %z, align 16
-  %xor = xor <4 x i32> %23, %24
+  %13 = load <4 x i32>, <4 x i32>* %w, align 16
+  %14 = extractelement <4 x i32> %13, i64 0
+  %15 = load <4 x i32>, <4 x i32>* %w, align 16
+  %16 = extractelement <4 x i32> %15, i64 1
+  %17 = load <4 x i32>, <4 x i32>* %w, align 16
+  %18 = extractelement <4 x i32> %17, i64 2
+  %19 = load <4 x i32>, <4 x i32>* %w, align 16
+  %20 = extractelement <4 x i32> %19, i64 3
+  %21 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str.2, i32 0, i32 0), i32 %14, i32 %16, i32 %18, i32 %20)
+  %22 = load <4 x i32>, <4 x i32>* %y, align 16
+  %23 = load <4 x i32>, <4 x i32>* %z, align 16
+  %xor = xor <4 x i32> %22, %23
   store <4 x i32> %xor, <4 x i32>* %w, align 16
-  %25 = load <4 x i32>, <4 x i32>* %w, align 16
-  %26 = extractelement <4 x i32> %25, i64 0
-  %27 = load <4 x i32>, <4 x i32>* %w, align 16
-  %28 = extractelement <4 x i32> %27, i64 1
-  %29 = load <4 x i32>, <4 x i32>* %w, align 16
-  %30 = extractelement <4 x i32> %29, i64 2
-  %31 = load <4 x i32>, <4 x i32>* %w, align 16
-  %32 = extractelement <4 x i32> %31, i64 3
-  %33 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([17 x i8], [17 x i8]* @.str.3, i32 0, i32 0), i32 %26, i32 %28, i32 %30, i32 %32)
-  %34 = load <4 x i32>, <4 x i32>* %y, align 16
-  %bnot = xor <4 x i32> %34, <i32 -1, i32 -1, i32 -1, i32 -1>
+  %24 = load <4 x i32>, <4 x i32>* %w, align 16
+  %25 = extractelement <4 x i32> %24, i64 0
+  %26 = load <4 x i32>, <4 x i32>* %w, align 16
+  %27 = extractelement <4 x i32> %26, i64 1
+  %28 = load <4 x i32>, <4 x i32>* %w, align 16
+  %29 = extractelement <4 x i32> %28, i64 2
+  %30 = load <4 x i32>, <4 x i32>* %w, align 16
+  %31 = extractelement <4 x i32> %30, i64 3
+  %32 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([17 x i8], [17 x i8]* @.str.3, i32 0, i32 0), i32 %25, i32 %27, i32 %29, i32 %31)
+  %33 = load <4 x i32>, <4 x i32>* %y, align 16
+  %bnot = xor <4 x i32> %33, <i32 -1, i32 -1, i32 -1, i32 -1>
   store <4 x i32> %bnot, <4 x i32>* %w, align 16
-  %35 = load <4 x i32>, <4 x i32>* %w, align 16
-  %36 = extractelement <4 x i32> %35, i64 0
-  %37 = load <4 x i32>, <4 x i32>* %w, align 16
-  %38 = extractelement <4 x i32> %37, i64 1
-  %39 = load <4 x i32>, <4 x i32>* %w, align 16
-  %40 = extractelement <4 x i32> %39, i64 2
-  %41 = load <4 x i32>, <4 x i32>* %w, align 16
-  %42 = extractelement <4 x i32> %41, i64 3
-  %43 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([20 x i8], [20 x i8]* @.str.4, i32 0, i32 0), i32 %36, i32 %38, i32 %40, i32 %42)
+  %34 = load <4 x i32>, <4 x i32>* %w, align 16
+  %35 = extractelement <4 x i32> %34, i64 0
+  %36 = load <4 x i32>, <4 x i32>* %w, align 16
+  %37 = extractelement <4 x i32> %36, i64 1
+  %38 = load <4 x i32>, <4 x i32>* %w, align 16
+  %39 = extractelement <4 x i32> %38, i64 2
+  %40 = load <4 x i32>, <4 x i32>* %w, align 16
+  %41 = extractelement <4 x i32> %40, i64 3
+  %42 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([20 x i8], [20 x i8]* @.str.4, i32 0, i32 0), i32 %35, i32 %37, i32 %39, i32 %41)
+  %43 = load <4 x i32>, <4 x i32>* %y, align 16
   %44 = load <4 x i32>, <4 x i32>* %y, align 16
-  %45 = load <4 x i32>, <4 x i32>* %y, align 16
-  %neg = sub <4 x i32> zeroinitializer, %45
+  %neg = sub <4 x i32> zeroinitializer, %44
   store <4 x i32> %neg, <4 x i32>* %w, align 16
-  %46 = load <4 x i32>, <4 x i32>* %w, align 16
-  %47 = extractelement <4 x i32> %46, i64 0
-  %48 = load <4 x i32>, <4 x i32>* %w, align 16
-  %49 = extractelement <4 x i32> %48, i64 1
-  %50 = load <4 x i32>, <4 x i32>* %w, align 16
-  %51 = extractelement <4 x i32> %50, i64 2
-  %52 = load <4 x i32>, <4 x i32>* %w, align 16
-  %53 = extractelement <4 x i32> %52, i64 3
-  %54 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([17 x i8], [17 x i8]* @.str.5, i32 0, i32 0), i32 %47, i32 %49, i32 %51, i32 %53)
-  %55 = load <4 x i32>, <4 x i32>* %y, align 16
-  %not = icmp eq <4 x i32> %55, zeroinitializer
-  %56 = sext <4 x i1> %not to <4 x i32>
-  store <4 x i32> %56, <4 x i32>* %w, align 16
+  %45 = load <4 x i32>, <4 x i32>* %w, align 16
+  %46 = extractelement <4 x i32> %45, i64 0
+  %47 = load <4 x i32>, <4 x i32>* %w, align 16
+  %48 = extractelement <4 x i32> %47, i64 1
+  %49 = load <4 x i32>, <4 x i32>* %w, align 16
+  %50 = extractelement <4 x i32> %49, i64 2
+  %51 = load <4 x i32>, <4 x i32>* %w, align 16
+  %52 = extractelement <4 x i32> %51, i64 3
+  %53 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([17 x i8], [17 x i8]* @.str.5, i32 0, i32 0), i32 %46, i32 %48, i32 %50, i32 %52)
+  %54 = load <4 x i32>, <4 x i32>* %y, align 16
+  %not = icmp eq <4 x i32> %54, zeroinitializer
+  %55 = sext <4 x i1> %not to <4 x i32>
+  store <4 x i32> %55, <4 x i32>* %w, align 16
   store <4 x i32> <i32 -1, i32 13, i32 0, i32 0>, <4 x i32>* %w, align 16
-  %57 = load <4 x i32>, <4 x i32>* %w, align 16
-  %not1 = icmp eq <4 x i32> %57, zeroinitializer
-  %58 = sext <4 x i1> %not1 to <4 x i32>
-  store <4 x i32> %58, <4 x i32>* %w, align 16
-  %59 = load <4 x i32>, <4 x i32>* %w, align 16
-  %60 = extractelement <4 x i32> %59, i64 0
-  %61 = load <4 x i32>, <4 x i32>* %w, align 16
-  %62 = extractelement <4 x i32> %61, i64 1
-  %63 = load <4 x i32>, <4 x i32>* %w, align 16
-  %64 = extractelement <4 x i32> %63, i64 2
-  %65 = load <4 x i32>, <4 x i32>* %w, align 16
-  %66 = extractelement <4 x i32> %65, i64 3
-  %67 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([17 x i8], [17 x i8]* @.str.6, i32 0, i32 0), i32 %60, i32 %62, i32 %64, i32 %66)
+  %56 = load <4 x i32>, <4 x i32>* %w, align 16
+  %not1 = icmp eq <4 x i32> %56, zeroinitializer
+  %57 = sext <4 x i1> %not1 to <4 x i32>
+  store <4 x i32> %57, <4 x i32>* %w, align 16
+  %58 = load <4 x i32>, <4 x i32>* %w, align 16
+  %59 = extractelement <4 x i32> %58, i64 0
+  %60 = load <4 x i32>, <4 x i32>* %w, align 16
+  %61 = extractelement <4 x i32> %60, i64 1
+  %62 = load <4 x i32>, <4 x i32>* %w, align 16
+  %63 = extractelement <4 x i32> %62, i64 2
+  %64 = load <4 x i32>, <4 x i32>* %w, align 16
+  %65 = extractelement <4 x i32> %64, i64 3
+  %66 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([17 x i8], [17 x i8]* @.str.6, i32 0, i32 0), i32 %59, i32 %61, i32 %63, i32 %65)
   ret void
 }