From efe4f8c74573fba558a513e429a6f61efdb9a7ac Mon Sep 17 00:00:00 2001
From: Christoffer Lerno <christoffer@aegik.com>
Date: Thu, 29 Jul 2021 03:04:31 +0200
Subject: [PATCH] Coerce lowering for the C ABI updated. Updated aarch64 fixes.

---
 src/compiler/llvm_codegen_expr.c              | 205 ++++++++++++++++--
 src/compiler/llvm_codegen_function.c          |  10 +-
 src/compiler/llvm_codegen_internal.h          |   1 +
 src/compiler/types.c                          |   2 +-
 test/test_suite/abi/literal_load.c3t          |  12 +-
 test/test_suite/abi/literal_load_aarch64.c3t  |  36 +++
 test/test_suite/abi/literal_load_mingw.c3t    |  32 +++
 test/test_suite/errors/rethrow.c3t            |  29 ++-
 test/test_suite/functions/splat.c3t           |   3 +-
 test/test_suite/functions/splat_aarch64.c3t   |  74 +++++++
 test/test_suite/functions/splat_mingw.c3t     |  71 ++++++
 test/test_suite/functions/test_regression.c3t | 156 ++++++-------
 .../functions/test_regression_mingw.c3t       | 197 +++++++++--------
 test/test_suite/struct/struct_as_value.c3t    |  31 ++-
 .../struct/struct_as_value_aarch64.c3t        |  45 ++++
 15 files changed, 687 insertions(+), 217 deletions(-)
 create mode 100644 test/test_suite/abi/literal_load_aarch64.c3t
 create mode 100644 test/test_suite/abi/literal_load_mingw.c3t
 create mode 100644 test/test_suite/functions/splat_aarch64.c3t
 create mode 100644 test/test_suite/functions/splat_mingw.c3t
 create mode 100644 test/test_suite/struct/struct_as_value_aarch64.c3t

diff --git a/src/compiler/llvm_codegen_expr.c b/src/compiler/llvm_codegen_expr.c
index ae6bd6f63..3a6d23e09 100644
--- a/src/compiler/llvm_codegen_expr.c
+++ b/src/compiler/llvm_codegen_expr.c
@@ -58,44 +58,206 @@ static inline LLVMValueRef llvm_emit_add_int(GenContext *c, Type *type, LLVMValu
 	return LLVMBuildAdd(c->builder, left, right, "add");
 }
 
+void llvm_enter_struct_for_coerce(GenContext *c, LLVMValueRef *struct_ptr, LLVMTypeRef *type, ByteSize dest_size)
+{
+	while (1)
+	{
+		if (!LLVMCountStructElementTypes(*type)) return;
+		LLVMTypeRef first_element = LLVMStructGetTypeAtIndex(*type, 0);
+		ByteSize first_element_size = llvm_store_size(c, first_element);
+		// If the size is smaller than the total size and smaller than the destination size
+		// then we're done.
+		if (first_element_size < dest_size && first_element_size < llvm_store_size(c, *type))
+		{
+			return;
+		}
+		LLVMValueRef ref = LLVMBuildStructGEP(c->builder, *struct_ptr, 0, "dive");
+		*struct_ptr = ref;
+		*type = first_element;
+	}
+}
+
+LLVMValueRef llvm_int_resize(GenContext *c, LLVMValueRef value, LLVMTypeRef from, LLVMTypeRef to)
+{
+	if (llvm_store_size(c, from) >= llvm_store_size(c, to))
+	{
+		return LLVMBuildTruncOrBitCast(c->builder, value, to, "trunc");
+	}
+	return LLVMBuildZExt(c->builder, value, to, "ext");
+}
+
+/**
+ * General functionality to convert int <-> int ptr <-> int
+ */
+LLVMValueRef llvm_coerce_int_ptr(GenContext *c, LLVMValueRef value, LLVMTypeRef from, LLVMTypeRef to)
+{
+	// 1. Are they the same?
+	if (from == to) return value;
+
+	// 2. If the source is a pointer, then.
+	bool to_is_pointer = LLVMGetTypeKind(to) == LLVMPointerTypeKind;
+	if (LLVMGetTypeKind(from) == LLVMPointerTypeKind)
+	{
+		// 2a. Destination is a pointer, perform a bitcast.
+		if (to_is_pointer)
+		{
+			return LLVMBuildBitCast(c->builder, value, to, "coerce.val");
+		}
+		// 2b. Otherwise perform ptr -> int
+		from = llvm_get_type(c, type_iptr);
+		value = LLVMBuildPtrToInt(c->builder, value, from, "");
+	}
+
+	// 3. Find the to int type to convert to.
+	LLVMTypeRef to_int_type = to_is_pointer ? llvm_get_type(c, type_iptr) : to;
+
+	// 4. Are int types not matching?
+	if (to_int_type != from)
+	{
+		if (platform_target.little_endian)
+		{
+			// Little-endian targets preserve the low bits. No shifts required.
+			value = LLVMBuildIntCast2(c->builder, value, to_int_type, false, "");
+		}
+		else
+		{
+			// Big endian, preserve the high bits.
+			ByteSize to_size = llvm_abi_size(c, to_int_type);
+			ByteSize from_size = llvm_abi_size(c, from);
+			if (from_size > to_size)
+			{
+				value = LLVMBuildLShr(c->builder, value, LLVMConstInt(from, (from_size - to_size) * 8, false), "");
+				value = LLVMBuildTrunc(c->builder, value, to_int_type, "");
+			}
+			else
+			{
+				value = LLVMBuildZExt(c->builder, value, to_int_type, "");
+				value = LLVMBuildShl(c->builder, value, LLVMConstInt(from, (to_size - from_size) * 8, false), "");
+			}
+		}
+	}
+	if (to_is_pointer)
+	{
+		value = LLVMBuildIntToPtr(c->builder, value, to, "");
+	}
+	return value;
+}
+
 LLVMValueRef llvm_emit_coerce(GenContext *c, LLVMTypeRef coerced, BEValue *value, Type *original_type)
 {
-	LLVMValueRef cast;
-	AlignSize target_alignment = llvm_abi_alignment(c, coerced);
+	assert(original_type->canonical == value->type->canonical);
+	LLVMTypeRef llvm_source_type = llvm_get_type(c, value->type);
+
+	// 1. If the types match then we're done, just load.
+	if (llvm_source_type == coerced)
+	{
+		llvm_value_rvalue_store(c, value);
+		return value->value;
+	}
+
+	// 2. Both are integer types and values, then just truncate / extend
+	if (!llvm_value_is_addr(value)
+		&& LLVMGetTypeKind(coerced) == LLVMIntegerTypeKind
+		&& LLVMGetTypeKind(llvm_source_type) == LLVMIntegerTypeKind)
+	{
+		return llvm_int_resize(c, value->value, llvm_source_type, coerced);
+	}
+
+	// 2. From now on we need th address.
+	llvm_value_addr(c, value);
+	LLVMValueRef addr = value->value;
+
+	ByteSize target_size = llvm_store_size(c, coerced);
+
+	// 3. If this is a struct, we index into it.
+	if (LLVMGetTypeKind(llvm_source_type) == LLVMStructTypeKind)
+	{
+		llvm_enter_struct_for_coerce(c, &addr, &llvm_source_type, target_size);
+	}
+	// --> from now on we only use LLVM types.
+
+	ByteSize source_size = llvm_store_size(c, llvm_source_type);
+
+	LLVMTypeKind source_type_kind = LLVMGetTypeKind(llvm_source_type);
+	LLVMTypeKind coerced_type_kind = LLVMGetTypeKind(coerced);
+
+	if ((coerced_type_kind == LLVMPointerTypeKind || coerced_type_kind == LLVMIntegerTypeKind)
+		&& (source_type_kind == LLVMPointerTypeKind || source_type_kind == LLVMIntegerTypeKind))
+	{
+		LLVMValueRef val = llvm_emit_load_aligned(c, llvm_source_type, addr, value->alignment, "");
+		return llvm_coerce_int_ptr(c, val, llvm_source_type, coerced);
+	}
+
+	// TODO for scalable vectors this is not true.
+	if (source_size > target_size)
+	{
+		LLVMValueRef val = LLVMBuildBitCast(c->builder, addr, LLVMPointerType(coerced, 0), "");
+		return llvm_emit_load_aligned(c, coerced, val, value->alignment, "");
+	}
+
+	// Otherwise, do it through memory.
 	AlignSize max_align = MAX(value->alignment, llvm_abi_alignment(c, coerced));
 
-	// If we are loading something with greater alignment than what we have, we cannot directly memcpy.
-	if (llvm_value_is_addr(value) && value->alignment < target_alignment)
+	LLVMValueRef temp = llvm_emit_alloca(c, coerced, max_align, "tempcoerce");
+	llvm_emit_memcpy(c, temp, max_align, addr, value->alignment, source_size);
+	return llvm_emit_load_aligned(c, coerced, temp, max_align, "");
+}
+
+
+void llvm_emit_coerce_store(GenContext *c, LLVMValueRef addr, AlignSize alignment, LLVMTypeRef coerced, LLVMValueRef value, LLVMTypeRef target_type)
+{
+
+	// 1. Simplest case, the underlying types match.
+	if (coerced == target_type)
 	{
-		// So load it instead.
-		llvm_value_rvalue(c, value);
+		llvm_store_aligned(c, addr, value, alignment);
+		return;
 	}
 
-	// In this case we have something nicely aligned, so we just do a cast.
-	if (llvm_value_is_addr(value))
+	ByteSize src_size = llvm_store_size(c, coerced);
+
+	// 3. Enter into a struct in case the result is a struct.
+	if (LLVMGetTypeKind(target_type) == LLVMStructTypeKind)
 	{
-		cast = LLVMBuildBitCast(c->builder, value->value, LLVMPointerType(coerced, 0), "");
+		llvm_enter_struct_for_coerce(c, &addr, &target_type, src_size);
 	}
-	else
+
+	// 4. If we are going from int/ptr <-> ptr/int
+	LLVMTypeKind source_type_kind = LLVMGetTypeKind(target_type);
+	LLVMTypeKind coerced_type_kind = LLVMGetTypeKind(coerced);
+	if ((coerced_type_kind == LLVMPointerTypeKind || coerced_type_kind == LLVMIntegerTypeKind)
+	    && (source_type_kind == LLVMPointerTypeKind || source_type_kind == LLVMIntegerTypeKind))
 	{
-		cast = llvm_emit_alloca(c, coerced, max_align, "coerce");
-		LLVMValueRef target = LLVMBuildBitCast(c->builder, cast, llvm_get_ptr_type(c, value->type), "");
-		llvm_store_bevalue_aligned(c, target, value, max_align);
+		value = llvm_coerce_int_ptr(c, value, coerced, target_type);
+		llvm_store_aligned(c, addr, value, alignment);
+		return;
 	}
-	return llvm_emit_load_aligned(c, coerced, cast, max_align, "coerced");
+
+	// TODO for scalable vectors this is not true.
+	ByteSize target_size = llvm_store_size(c, target_type);
+	if (src_size <= target_size)
+	{
+		LLVMValueRef val = LLVMBuildBitCast(c->builder, addr, LLVMPointerType(coerced, 0), "");
+		llvm_store_aligned(c, val, value, alignment);
+		return;
+	}
+
+	// Otherwise, do it through memory.
+	AlignSize coerce_align = llvm_abi_alignment(c, coerced);
+	LLVMValueRef temp = llvm_emit_alloca(c, coerced, coerce_align, "tempcoerce");
+	llvm_store_aligned(c, temp, value, coerce_align);
+	llvm_emit_memcpy(c, addr, alignment, temp, coerce_align, target_size);
 }
 
 void llvm_emit_convert_value_from_coerced(GenContext *c, BEValue *result, LLVMTypeRef coerced, LLVMValueRef value, Type *original_type)
 {
-	unsigned max_align = MAX(llvm_abi_alignment(c, coerced), type_abi_alignment(original_type));
-	LLVMValueRef temp = llvm_emit_alloca(c, coerced, max_align, "coerce_temp");
-	llvm_store_aligned(c, temp, value, max_align);
-	temp = LLVMBuildBitCast(c->builder, temp, llvm_get_type(c, type_get_ptr(original_type)), "");
-	llvm_value_set_address_align(result, temp, original_type, max_align);
+	LLVMTypeRef target_type = llvm_get_type(c, original_type);
+	LLVMValueRef addr = llvm_emit_alloca(c, target_type, type_abi_alignment(original_type), "result");
+	llvm_emit_coerce_store(c, addr, type_abi_alignment(original_type), coerced, value, target_type);
+	llvm_value_set_address(result, addr, original_type);
 }
 
-static inline LLVMValueRef
-llvm_emit_sub_int(GenContext *c, Type *type, LLVMValueRef left, LLVMValueRef right)
+static inline LLVMValueRef llvm_emit_sub_int(GenContext *c, Type *type, LLVMValueRef left, LLVMValueRef right)
 {
 	if (active_target.feature.trap_on_wrap)
 	{
@@ -2409,6 +2571,7 @@ void gencontext_emit_call_intrinsic_expr(GenContext *c, BEValue *be_value, Expr
 
 void llvm_emit_parameter(GenContext *c, LLVMValueRef **args, ABIArgInfo *info, BEValue *be_value, Type *type)
 {
+	assert(be_value->type->canonical == type->canonical);
 	switch (info->kind)
 	{
 		case ABI_ARG_IGNORE:
diff --git a/src/compiler/llvm_codegen_function.c b/src/compiler/llvm_codegen_function.c
index ab1b61225..22e34809f 100644
--- a/src/compiler/llvm_codegen_function.c
+++ b/src/compiler/llvm_codegen_function.c
@@ -178,20 +178,22 @@ static inline void llvm_process_parameter_value(GenContext *c, Decl *decl, unsig
 				llvm_store_aligned_decl(c, decl, llvm_get_next_param(c, index));
 				return;
 			}
-			// Cast to the coerce type.
-			LLVMValueRef cast = LLVMBuildBitCast(c->builder, decl->backend_ref, LLVMPointerType(coerce_type, 0), "coerce");
 
 			// If we're not flattening, we simply do a store.
 			if (!abi_info_should_flatten(info))
 			{
 				LLVMValueRef param = llvm_get_next_param(c, index);
 				// Store it with the alignment of the decl.
-				llvm_store_aligned(c, cast, param, decl->alignment);
+				llvm_emit_coerce_store(c, decl->backend_ref, decl->alignment, coerce_type, param, llvm_get_type(c, decl->type));
 				return;
 			}
 
 			// In this case we've been flattening the parameter into multiple registers.
 			LLVMTypeRef element_type = llvm_abi_type(c, info->direct_coerce.type);
+
+			// Cast to the coerce type.
+			LLVMValueRef cast = LLVMBuildBitCast(c->builder, decl->backend_ref, LLVMPointerType(coerce_type, 0), "coerce");
+
 			// Store each expanded parameter.
 			for (unsigned idx = 0; idx < info->direct_coerce.elements; idx++)
 			{
@@ -469,7 +471,7 @@ void llvm_emit_function_body(GenContext *context, Decl *decl)
 static void llvm_emit_param_attributes(GenContext *context, LLVMValueRef function, ABIArgInfo *info, bool is_return, int index, int last_index)
 {
 	assert(last_index == index || info->kind == ABI_ARG_DIRECT_PAIR || info->kind == ABI_ARG_IGNORE
-	       || info->kind == ABI_ARG_EXPAND);
+	       || info->kind == ABI_ARG_EXPAND || info->kind == ABI_ARG_DIRECT_COERCE);
 
 	if (info->attributes.zeroext)
 	{
diff --git a/src/compiler/llvm_codegen_internal.h b/src/compiler/llvm_codegen_internal.h
index 4bbbe8856..3de6d6329 100644
--- a/src/compiler/llvm_codegen_internal.h
+++ b/src/compiler/llvm_codegen_internal.h
@@ -218,6 +218,7 @@ void llvm_emit_br(GenContext *c, LLVMBasicBlockRef next_block);
 void llvm_emit_compound_stmt(GenContext *context, Ast *ast);
 void llvm_emit_and_set_decl_alloca(GenContext *c, Decl *decl);
 void llvm_emit_convert_value_from_coerced(GenContext *c, BEValue *result, LLVMTypeRef coerced, LLVMValueRef value, Type *original_type);
+void llvm_emit_coerce_store(GenContext *c, LLVMValueRef addr, AlignSize alignment, LLVMTypeRef coerced, LLVMValueRef value, LLVMTypeRef target_type);
 void llvm_emit_function_body(GenContext *context, Decl *decl);
 void llvm_emit_function_decl(GenContext *c, Decl *decl);
 LLVMValueRef llvm_emit_call_intrinsic(GenContext *c, unsigned intrinsic_id, LLVMTypeRef *types, unsigned type_count, LLVMValueRef *values, unsigned arg_count);
diff --git a/src/compiler/types.c b/src/compiler/types.c
index 2561f2129..07ce8c876 100644
--- a/src/compiler/types.c
+++ b/src/compiler/types.c
@@ -571,7 +571,7 @@ bool type_is_homogenous_aggregate(Type *type, Type **base, unsigned *elements)
 			break;
 		case ALL_SIGNED_INTS:
 			// Lower signed to unsigned
-			type = type_int_unsigned_by_bitsize(type->builtin.bytesize);
+			type = type_int_unsigned_by_bitsize(type->builtin.bitsize);
 			break;
 		case ALL_UNSIGNED_INTS:
 		case ALL_REAL_FLOATS:
diff --git a/test/test_suite/abi/literal_load.c3t b/test/test_suite/abi/literal_load.c3t
index 951fe4fc0..b06591eef 100644
--- a/test/test_suite/abi/literal_load.c3t
+++ b/test/test_suite/abi/literal_load.c3t
@@ -21,11 +21,11 @@ func Test creator()
   %literal1 = alloca %Test, align 4
   %0 = bitcast %Test* %literal to i8*
   call void @llvm.memset.p0i8.i64(i8* align 4 %0, i8 0, i64 4, i1 false)
-  %1 = bitcast %Test* %literal to i32*
-  %coerced = load i32, i32* %1, align 4
-  call void @blorg(i32 %coerced)
+  %dive = getelementptr inbounds %Test, %Test* %literal, i32 0, i32 0
+  %1 = load i32, i32* %dive, align 4
+  call void @blorg(i32 %1)
   %2 = bitcast %Test* %literal1 to i8*
   call void @llvm.memset.p0i8.i64(i8* align 4 %2, i8 0, i64 4, i1 false)
-  %3 = bitcast %Test* %literal1 to i32*
-  %coerced2 = load i32, i32* %3, align 4
-  ret i32 %coerced2
\ No newline at end of file
+  %dive2 = getelementptr inbounds %Test, %Test* %literal1, i32 0, i32 0
+  %3 = load i32, i32* %dive2, align 4
+  ret i32 %3
\ No newline at end of file
diff --git a/test/test_suite/abi/literal_load_aarch64.c3t b/test/test_suite/abi/literal_load_aarch64.c3t
new file mode 100644
index 000000000..390e32a2e
--- /dev/null
+++ b/test/test_suite/abi/literal_load_aarch64.c3t
@@ -0,0 +1,36 @@
+// #target: aarch64_linux
+module literal_load;
+
+struct Test
+{
+    int x;
+}
+
+Test foo = {};
+
+extern func void blorg(Test t);
+
+func Test creator()
+{
+    blorg(Test({}));
+    return Test({});
+}
+
+// #expect: literal_load.ll
+
+declare void @blorg(i64)
+
+  %literal = alloca %Test, align 4
+  %literal1 = alloca %Test, align 4
+  %0 = bitcast %Test* %literal to i8*
+  call void @llvm.memset.p0i8.i64(i8* align 4 %0, i8 0, i64 4, i1 false)
+  %dive = getelementptr inbounds %Test, %Test* %literal, i32 0, i32 0
+  %1 = load i32, i32* %dive, align 4
+  %2 = zext i32 %1 to i64
+  call void @blorg(i64 %2)
+  %3 = bitcast %Test* %literal1 to i8*
+  call void @llvm.memset.p0i8.i64(i8* align 4 %3, i8 0, i64 4, i1 false)
+  %dive2 = getelementptr inbounds %Test, %Test* %literal1, i32 0, i32 0
+  %4 = load i32, i32* %dive2, align 4
+  %5 = zext i32 %4 to i64
+  ret i64 %5
\ No newline at end of file
diff --git a/test/test_suite/abi/literal_load_mingw.c3t b/test/test_suite/abi/literal_load_mingw.c3t
new file mode 100644
index 000000000..3f5900bdd
--- /dev/null
+++ b/test/test_suite/abi/literal_load_mingw.c3t
@@ -0,0 +1,32 @@
+// #target: x64_mingw
+module literal_load;
+
+struct Test
+{
+    int x;
+}
+
+Test foo = {};
+
+extern func void blorg(Test t);
+
+func Test creator()
+{
+    blorg(Test({}));
+    return Test({});
+}
+
+// #expect: literal_load.ll
+
+  %literal = alloca %Test, align 4
+  %literal1 = alloca %Test, align 4
+  %0 = bitcast %Test* %literal to i8*
+  call void @llvm.memset.p0i8.i64(i8* align 4 %0, i8 0, i64 4, i1 false)
+  %dive = getelementptr inbounds %Test, %Test* %literal, i32 0, i32 0
+  %1 = load i32, i32* %dive, align 4
+  call void @blorg(i32 %1)
+  %2 = bitcast %Test* %literal1 to i8*
+  call void @llvm.memset.p0i8.i64(i8* align 4 %2, i8 0, i64 4, i1 false)
+  %dive2 = getelementptr inbounds %Test, %Test* %literal1, i32 0, i32 0
+  %3 = load i32, i32* %dive2, align 4
+  ret i32 %3
\ No newline at end of file
diff --git a/test/test_suite/errors/rethrow.c3t b/test/test_suite/errors/rethrow.c3t
index 0b3390961..aa9059162 100644
--- a/test/test_suite/errors/rethrow.c3t
+++ b/test/test_suite/errors/rethrow.c3t
@@ -9,10 +9,12 @@ func void! test()
 // #expect: rethrow.ll
 
 entry:
-  %i = alloca i32, align 4
+ %i = alloca i32, align 4
   %i.f = alloca %error_union, align 8
   %error_var = alloca %error_union, align 8
-  %coerce = alloca { i64, i64 }, align 8
+  %tempcoerce = alloca { i64, i64 }, align 8
+  %tempaddr = alloca %error_union, align 8
+  %tempcoerce1 = alloca { i64, i64 }, align 8
   store %error_union zeroinitializer, %error_union* %i.f, align 8
   store i32 0, i32* %i, align 4
   %err_domain = getelementptr inbounds %error_union, %error_union* %i.f, i32 0, i32 0
@@ -30,13 +32,18 @@ after_check:
   %3 = load i32, i32* %i, align 4
   br label %noerr_block
 
-guard_block:
-  %4 = bitcast %error_union* %error_var to { i64, i64 }*
-  %coerced = load { i64, i64 }, { i64, i64 }* %4, align 8
-  ret { i64, i64 } %coerced
+guard_block:                                      ; preds = %error
+  %4 = bitcast { i64, i64 }* %tempcoerce to i8*
+  %5 = bitcast %error_union* %error_var to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 %4, i8* align 8 %5, i32 16, i1 false)
+  %6 = load { i64, i64 }, { i64, i64 }* %tempcoerce, align 8
+  ret { i64, i64 } %6
 
-noerr_block:
-  %5 = bitcast { i64, i64 }* %coerce to %error_union*
-  store %error_union zeroinitializer, %error_union* %5, align 8
-  %coerced1 = load { i64, i64 }, { i64, i64 }* %coerce, align 8
-  ret { i64, i64 } %coerced1
+noerr_block:                                      ; preds = %after_check
+  store %error_union zeroinitializer, %error_union* %tempaddr, align 8
+  %7 = bitcast { i64, i64 }* %tempcoerce1 to i8*
+  %8 = bitcast %error_union* %tempaddr to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 %7, i8* align 8 %8, i32 16, i1 false)
+  %9 = load { i64, i64 }, { i64, i64 }* %tempcoerce1, align 8
+  ret { i64, i64 } %9
+}
\ No newline at end of file
diff --git a/test/test_suite/functions/splat.c3t b/test/test_suite/functions/splat.c3t
index 81c382ec2..84d5b75a3 100644
--- a/test/test_suite/functions/splat.c3t
+++ b/test/test_suite/functions/splat.c3t
@@ -1,3 +1,4 @@
+// #target: x64_darwin
 module splat;
 
 extern func int sum_us(int... x);
@@ -14,7 +15,7 @@ func void test()
 
 // #expect: splat.ll
 
- %vararg = alloca %"int[]", align 8
+  %vararg = alloca %"int[]", align 8
   %varargslots = alloca [3 x i32], align 4
   %x = alloca [3 x i32], align 4
   %z = alloca %"int[]", align 8
diff --git a/test/test_suite/functions/splat_aarch64.c3t b/test/test_suite/functions/splat_aarch64.c3t
new file mode 100644
index 000000000..41177c633
--- /dev/null
+++ b/test/test_suite/functions/splat_aarch64.c3t
@@ -0,0 +1,74 @@
+// #target: aarch64_linux
+module splat;
+
+extern func int sum_us(int... x);
+
+func void test()
+{
+    sum_us(1, 2, 3);
+    int[3] x = { 1, 2, 3 };
+    int[] z = &x;
+    sum_us(...x);
+    sum_us(...z);
+    sum_us();
+}
+
+// #expect: splat.ll
+
+%vararg = alloca %"int[]", align 8
+  %varargslots = alloca [3 x i32], align 4
+  %x = alloca [3 x i32], align 4
+  %z = alloca %"int[]", align 8
+  %vararg1 = alloca %"int[]", align 8
+  %vararg2 = alloca %"int[]", align 8
+  %vararg3 = alloca %"int[]", align 8
+  %0 = getelementptr inbounds [3 x i32], [3 x i32]* %varargslots, i64 0, i64 0
+  store i32 1, i32* %0, align 4
+  %1 = getelementptr inbounds [3 x i32], [3 x i32]* %varargslots, i64 0, i64 1
+  store i32 2, i32* %1, align 4
+  %2 = getelementptr inbounds [3 x i32], [3 x i32]* %varargslots, i64 0, i64 2
+  store i32 3, i32* %2, align 4
+  %3 = getelementptr inbounds %"int[]", %"int[]"* %vararg, i32 0, i32 1
+  store i64 3, i64* %3, align 8
+  %4 = getelementptr inbounds %"int[]", %"int[]"* %vararg, i32 0, i32 0
+  %5 = bitcast [3 x i32]* %varargslots to i32*
+  store i32* %5, i32** %4, align 8
+  %6 = bitcast %"int[]"* %vararg to { i64, i64 }*
+  %7 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %6, i32 0, i32 0
+  %8 = load i64, i64* %7, align 8
+  %9 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %6, i32 0, i32 1
+  %10 = load i64, i64* %9, align 8
+  %11 = call i32 @sum_us(i64 %8, i64 %10)
+  %12 = bitcast [3 x i32]* %x to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %12, i8* align 4 bitcast ([3 x i32]* @.__const to i8*), i32 12, i1 false)
+  %13 = bitcast [3 x i32]* %x to i32*
+  %14 = insertvalue %"int[]" undef, i32* %13, 0
+  %15 = insertvalue %"int[]" %14, i64 3, 1
+  store %"int[]" %15, %"int[]"* %z, align 8
+  %16 = getelementptr inbounds %"int[]", %"int[]"* %vararg1, i32 0, i32 1
+  %17 = getelementptr inbounds %"int[]", %"int[]"* %vararg1, i32 0, i32 0
+  store i64 3, i64* %16, align 8
+  %18 = bitcast [3 x i32]* %x to i32*
+  store i32* %18, i32** %17, align 8
+  %19 = bitcast %"int[]"* %vararg1 to { i64, i64 }*
+  %20 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %19, i32 0, i32 0
+  %21 = load i64, i64* %20, align 8
+  %22 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %19, i32 0, i32 1
+  %23 = load i64, i64* %22, align 8
+  %24 = call i32 @sum_us(i64 %21, i64 %23)
+  %25 = getelementptr inbounds %"int[]", %"int[]"* %vararg2, i32 0, i32 1
+  %26 = getelementptr inbounds %"int[]", %"int[]"* %vararg2, i32 0, i32 0
+  %27 = bitcast %"int[]"* %z to { i64, i64 }*
+  %28 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %27, i32 0, i32 0
+  %29 = load i64, i64* %28, align 8
+  %30 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %27, i32 0, i32 1
+  %31 = load i64, i64* %30, align 8
+  %32 = call i32 @sum_us(i64 %29, i64 %31)
+  %33 = getelementptr inbounds %"int[]", %"int[]"* %vararg3, i32 0, i32 1
+  store i64 0, i64* %33, align 8
+  %34 = bitcast %"int[]"* %vararg3 to { i64, i64 }*
+  %35 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %34, i32 0, i32 0
+  %36 = load i64, i64* %35, align 8
+  %37 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %34, i32 0, i32 1
+  %38 = load i64, i64* %37, align 8
+  %39 = call i32 @sum_us(i64 %36, i64 %38)
diff --git a/test/test_suite/functions/splat_mingw.c3t b/test/test_suite/functions/splat_mingw.c3t
new file mode 100644
index 000000000..68b59dd36
--- /dev/null
+++ b/test/test_suite/functions/splat_mingw.c3t
@@ -0,0 +1,71 @@
+// #target: x64_mingw
+module splat;
+
+extern func int sum_us(int... x);
+
+func void test()
+{
+    sum_us(1, 2, 3);
+    int[3] x = { 1, 2, 3 };
+    int[] z = &x;
+    sum_us(...x);
+    sum_us(...z);
+    sum_us();
+}
+
+// #expect: splat.ll
+
+   %vararg = alloca %"int[]", align 8
+   %varargslots = alloca [3 x i32], align 4
+   %indirectarg = alloca %"int[]", align 8
+   %x = alloca [3 x i32], align 4
+   %z = alloca %"int[]", align 8
+   %vararg1 = alloca %"int[]", align 8
+   %indirectarg2 = alloca %"int[]", align 8
+   %vararg3 = alloca %"int[]", align 8
+   %indirectarg4 = alloca %"int[]", align 8
+   %vararg5 = alloca %"int[]", align 8
+   %indirectarg6 = alloca %"int[]", align 8
+   %0 = getelementptr inbounds [3 x i32], [3 x i32]* %varargslots, i64 0, i64 0
+   store i32 1, i32* %0, align 4
+   %1 = getelementptr inbounds [3 x i32], [3 x i32]* %varargslots, i64 0, i64 1
+   store i32 2, i32* %1, align 4
+   %2 = getelementptr inbounds [3 x i32], [3 x i32]* %varargslots, i64 0, i64 2
+   store i32 3, i32* %2, align 4
+   %3 = getelementptr inbounds %"int[]", %"int[]"* %vararg, i32 0, i32 1
+   store i64 3, i64* %3, align 8
+   %4 = getelementptr inbounds %"int[]", %"int[]"* %vararg, i32 0, i32 0
+   %5 = bitcast [3 x i32]* %varargslots to i32*
+   store i32* %5, i32** %4, align 8
+   %6 = bitcast %"int[]"* %indirectarg to i8*
+   %7 = bitcast %"int[]"* %vararg to i8*
+   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 %6, i8* align 8 %7, i32 16, i1 false)
+   %8 = call i32 @sum_us(%"int[]"* %indirectarg)
+   %9 = bitcast [3 x i32]* %x to i8*
+   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %9, i8* align 4 bitcast ([3 x i32]* @.__const to i8*), i32 12, i1 false)
+   %10 = bitcast [3 x i32]* %x to i32*
+   %11 = insertvalue %"int[]" undef, i32* %10, 0
+   %12 = insertvalue %"int[]" %11, i64 3, 1
+   store %"int[]" %12, %"int[]"* %z, align 8
+   %13 = getelementptr inbounds %"int[]", %"int[]"* %vararg1, i32 0, i32 1
+   %14 = getelementptr inbounds %"int[]", %"int[]"* %vararg1, i32 0, i32 0
+   store i64 3, i64* %13, align 8
+   %15 = bitcast [3 x i32]* %x to i32*
+   store i32* %15, i32** %14, align 8
+   %16 = bitcast %"int[]"* %indirectarg2 to i8*
+   %17 = bitcast %"int[]"* %vararg1 to i8*
+   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 %16, i8* align 8 %17, i32 16, i1 false)
+   %18 = call i32 @sum_us(%"int[]"* %indirectarg2)
+   %19 = getelementptr inbounds %"int[]", %"int[]"* %vararg3, i32 0, i32 1
+   %20 = getelementptr inbounds %"int[]", %"int[]"* %vararg3, i32 0, i32 0
+   %21 = bitcast %"int[]"* %indirectarg4 to i8*
+   %22 = bitcast %"int[]"* %z to i8*
+   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 %21, i8* align 8 %22, i32 16, i1 false)
+   %23 = call i32 @sum_us(%"int[]"* %indirectarg4)
+   %24 = getelementptr inbounds %"int[]", %"int[]"* %vararg5, i32 0, i32 1
+   store i64 0, i64* %24, align 8
+   %25 = bitcast %"int[]"* %indirectarg6 to i8*
+   %26 = bitcast %"int[]"* %vararg5 to i8*
+   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 %25, i8* align 8 %26, i32 16, i1 false)
+   %27 = call i32 @sum_us(%"int[]"* %indirectarg6)
+   ret void
diff --git a/test/test_suite/functions/test_regression.c3t b/test/test_suite/functions/test_regression.c3t
index d6e6a41a5..64b5a2cee 100644
--- a/test/test_suite/functions/test_regression.c3t
+++ b/test/test_suite/functions/test_regression.c3t
@@ -445,6 +445,7 @@ entry:
   %i1 = alloca i32, align 4
   %a = alloca %Blob, align 4
   %b = alloca %Blob.0, align 8
+  %tempcoerce = alloca double, align 8
   %ddx = alloca %Foo, align 4
   %fro = alloca i32, align 4
   %x = alloca [4 x i32], align 16
@@ -529,111 +530,114 @@ entry:
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %21, i8* align 4 bitcast (%Blob* @.__const.6 to i8*), i32 4, i1 false)
   %22 = bitcast %Blob.0* %b to i8*
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 %22, i8* align 8 bitcast (%Blob.0* @.__const.7 to i8*), i32 8, i1 false)
-  %23 = bitcast %Blob* %a to i32*
-  %coerced = load i32, i32* %23, align 4
-  %24 = call i32 @test2.int.getValue(i32 %coerced)
+  %dive = getelementptr inbounds %Blob, %Blob* %a, i32 0, i32 0
+  %23 = load i32, i32* %dive, align 4
+  %24 = call i32 @test2.int.getValue(i32 %23)
   %25 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.8, i32 0, i32 0), i32 %24)
-  %26 = bitcast %Blob.0* %b to double*
-  %coerced10 = load double, double* %26, align 8
-  %27 = call double @test2.double.getValue(double %coerced10)
-  %28 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.9, i32 0, i32 0), double %27)
-  %29 = call i32 @test2.int.getMult(i32 25)
-  %30 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([17 x i8], [17 x i8]* @.str.10, i32 0, i32 0), i32 %29)
-  %31 = call double @test2.double.getMult(double 3.300000e+00)
-  %32 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([20 x i8], [20 x i8]* @.str.11, i32 0, i32 0), double %31)
+  %dive10 = getelementptr inbounds %Blob.0, %Blob.0* %b, i32 0, i32 0
+  %26 = bitcast double* %tempcoerce to i8*
+  %27 = bitcast double* %dive10 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 %26, i8* align 8 %27, i32 8, i1 false)
+  %28 = load double, double* %tempcoerce, align 8
+  %29 = call double @test2.double.getValue(double %28)
+  %30 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.9, i32 0, i32 0), double %29)
+  %31 = call i32 @test2.int.getMult(i32 25)
+  %32 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([17 x i8], [17 x i8]* @.str.10, i32 0, i32 0), i32 %31)
+  %33 = call double @test2.double.getMult(double 3.300000e+00)
+  %34 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([20 x i8], [20 x i8]* @.str.11, i32 0, i32 0), double %33)
   call void @test.helloWorld()
-  %33 = bitcast %Foo* %ddx to i8*
-  call void @llvm.memset.p0i8.i64(i8* align 4 %33, i8 0, i64 8, i1 false)
+  %35 = bitcast %Foo* %ddx to i8*
+  call void @llvm.memset.p0i8.i64(i8* align 4 %35, i8 0, i64 8, i1 false)
   store i32 3, i32* %fro, align 4
-  %34 = bitcast [4 x i32]* %x to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %34, i8* align 16 bitcast ([4 x i32]* @.__const.12 to i8*), i32 16, i1 false)
-  %35 = load i32, i32* %fro, align 4
-  %36 = getelementptr inbounds %"int[]", %"int[]"* %vararg, i32 0, i32 1
-  %37 = getelementptr inbounds %"int[]", %"int[]"* %vararg, i32 0, i32 0
-  store i64 4, i64* %36, align 8
-  %38 = bitcast [4 x i32]* %x to i32*
-  store i32* %38, i32** %37, align 8
+  %36 = bitcast [4 x i32]* %x to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %36, i8* align 16 bitcast ([4 x i32]* @.__const.12 to i8*), i32 16, i1 false)
+  %37 = load i32, i32* %fro, align 4
+  %38 = getelementptr inbounds %"int[]", %"int[]"* %vararg, i32 0, i32 1
+  %39 = getelementptr inbounds %"int[]", %"int[]"* %vararg, i32 0, i32 0
+  store i64 4, i64* %38, align 8
+  %40 = bitcast [4 x i32]* %x to i32*
+  store i32* %40, i32** %39, align 8
   %casttemp = bitcast %"int[]"* %vararg to { i64, i8* }*
   %lo = getelementptr inbounds { i64, i8* }, { i64, i8* }* %casttemp, i32 0, i32 0
   %lo11 = load i64, i64* %lo, align 8
   %hi = getelementptr inbounds { i64, i8* }, { i64, i8* }* %casttemp, i32 0, i32 1
   %hi12 = load i8*, i8** %hi, align 8
-  %39 = call i32 @test.sum_us(i64 %lo11, i8* %hi12)
-  %40 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([22 x i8], [22 x i8]* @.str.13, i32 0, i32 0), i32 %39)
-  %add13 = add i32 %35, %40
+  %41 = call i32 @test.sum_us(i64 %lo11, i8* %hi12)
+  %42 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([22 x i8], [22 x i8]* @.str.13, i32 0, i32 0), i32 %41)
+  %add13 = add i32 %37, %42
   store i32 %add13, i32* %fro, align 4
-  %41 = load i32, i32* %fro, align 4
-  %42 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.14, i32 0, i32 0), i32 %41)
-  %43 = bitcast [4 x i32]* %x to i32*
-  %44 = insertvalue %"int[]" undef, i32* %43, 0
-  %45 = insertvalue %"int[]" %44, i64 4, 1
-  store %"int[]" %45, %"int[]"* %z, align 8
-  %46 = bitcast [3 x i32]* %de to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %46, i8* align 4 bitcast ([3 x i32]* @.__const.15 to i8*), i32 12, i1 false)
-  %47 = getelementptr inbounds %"int[]", %"int[]"* %vararg14, i32 0, i32 1
-  %48 = getelementptr inbounds %"int[]", %"int[]"* %vararg14, i32 0, i32 0
-  store i64 4, i64* %47, align 8
-  %49 = bitcast [4 x i32]* %x to i32*
-  store i32* %49, i32** %48, align 8
+  %43 = load i32, i32* %fro, align 4
+  %44 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.14, i32 0, i32 0), i32 %43)
+  %45 = bitcast [4 x i32]* %x to i32*
+  %46 = insertvalue %"int[]" undef, i32* %45, 0
+  %47 = insertvalue %"int[]" %46, i64 4, 1
+  store %"int[]" %47, %"int[]"* %z, align 8
+  %48 = bitcast [3 x i32]* %de to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %48, i8* align 4 bitcast ([3 x i32]* @.__const.15 to i8*), i32 12, i1 false)
+  %49 = getelementptr inbounds %"int[]", %"int[]"* %vararg14, i32 0, i32 1
+  %50 = getelementptr inbounds %"int[]", %"int[]"* %vararg14, i32 0, i32 0
+  store i64 4, i64* %49, align 8
+  %51 = bitcast [4 x i32]* %x to i32*
+  store i32* %51, i32** %50, align 8
   %casttemp15 = bitcast %"int[]"* %vararg14 to { i64, i8* }*
   %lo16 = getelementptr inbounds { i64, i8* }, { i64, i8* }* %casttemp15, i32 0, i32 0
   %lo17 = load i64, i64* %lo16, align 8
   %hi18 = getelementptr inbounds { i64, i8* }, { i64, i8* }* %casttemp15, i32 0, i32 1
   %hi19 = load i8*, i8** %hi18, align 8
-  %50 = call i32 @test.sum_us(i64 %lo17, i8* %hi19)
-  %51 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([21 x i8], [21 x i8]* @.str.16, i32 0, i32 0), i32 %50)
-  %52 = getelementptr inbounds %"int[]", %"int[]"* %vararg20, i32 0, i32 1
-  %53 = getelementptr inbounds %"int[]", %"int[]"* %vararg20, i32 0, i32 0
+  %52 = call i32 @test.sum_us(i64 %lo17, i8* %hi19)
+  %53 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([21 x i8], [21 x i8]* @.str.16, i32 0, i32 0), i32 %52)
+  %54 = getelementptr inbounds %"int[]", %"int[]"* %vararg20, i32 0, i32 1
+  %55 = getelementptr inbounds %"int[]", %"int[]"* %vararg20, i32 0, i32 0
   %casttemp21 = bitcast %"int[]"* %z to { i64, i8* }*
   %lo22 = getelementptr inbounds { i64, i8* }, { i64, i8* }* %casttemp21, i32 0, i32 0
   %lo23 = load i64, i64* %lo22, align 8
   %hi24 = getelementptr inbounds { i64, i8* }, { i64, i8* }* %casttemp21, i32 0, i32 1
   %hi25 = load i8*, i8** %hi24, align 8
-  %54 = call i32 @test.sum_us(i64 %lo23, i8* %hi25)
-  %55 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([21 x i8], [21 x i8]* @.str.17, i32 0, i32 0), i32 %54)
-  %56 = getelementptr inbounds [4 x i32], [4 x i32]* %varargslots, i64 0, i64 0
-  store i32 1, i32* %56, align 4
-  %57 = getelementptr inbounds [4 x i32], [4 x i32]* %varargslots, i64 0, i64 1
-  store i32 2, i32* %57, align 4
-  %58 = getelementptr inbounds [4 x i32], [4 x i32]* %varargslots, i64 0, i64 2
-  store i32 4, i32* %58, align 4
-  %59 = getelementptr inbounds [4 x i32], [4 x i32]* %varargslots, i64 0, i64 3
-  store i32 5, i32* %59, align 4
-  %60 = getelementptr inbounds %"int[]", %"int[]"* %vararg26, i32 0, i32 1
-  store i64 4, i64* %60, align 8
-  %61 = getelementptr inbounds %"int[]", %"int[]"* %vararg26, i32 0, i32 0
-  %62 = bitcast [4 x i32]* %varargslots to i32*
-  store i32* %62, i32** %61, align 8
+  %56 = call i32 @test.sum_us(i64 %lo23, i8* %hi25)
+  %57 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([21 x i8], [21 x i8]* @.str.17, i32 0, i32 0), i32 %56)
+  %58 = getelementptr inbounds [4 x i32], [4 x i32]* %varargslots, i64 0, i64 0
+  store i32 1, i32* %58, align 4
+  %59 = getelementptr inbounds [4 x i32], [4 x i32]* %varargslots, i64 0, i64 1
+  store i32 2, i32* %59, align 4
+  %60 = getelementptr inbounds [4 x i32], [4 x i32]* %varargslots, i64 0, i64 2
+  store i32 4, i32* %60, align 4
+  %61 = getelementptr inbounds [4 x i32], [4 x i32]* %varargslots, i64 0, i64 3
+  store i32 5, i32* %61, align 4
+  %62 = getelementptr inbounds %"int[]", %"int[]"* %vararg26, i32 0, i32 1
+  store i64 4, i64* %62, align 8
+  %63 = getelementptr inbounds %"int[]", %"int[]"* %vararg26, i32 0, i32 0
+  %64 = bitcast [4 x i32]* %varargslots to i32*
+  store i32* %64, i32** %63, align 8
   %casttemp27 = bitcast %"int[]"* %vararg26 to { i64, i8* }*
   %lo28 = getelementptr inbounds { i64, i8* }, { i64, i8* }* %casttemp27, i32 0, i32 0
   %lo29 = load i64, i64* %lo28, align 8
   %hi30 = getelementptr inbounds { i64, i8* }, { i64, i8* }* %casttemp27, i32 0, i32 1
   %hi31 = load i8*, i8** %hi30, align 8
-  %63 = call i32 @test.sum_us(i64 %lo29, i8* %hi31)
-  %64 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str.18, i32 0, i32 0), i32 %63)
-  %65 = getelementptr inbounds [1 x i32], [1 x i32]* %varargslots33, i64 0, i64 0
-  store i32 1, i32* %65, align 4
-  %66 = getelementptr inbounds %"int[]", %"int[]"* %vararg32, i32 0, i32 1
-  store i64 1, i64* %66, align 8
-  %67 = getelementptr inbounds %"int[]", %"int[]"* %vararg32, i32 0, i32 0
-  %68 = bitcast [1 x i32]* %varargslots33 to i32*
-  store i32* %68, i32** %67, align 8
+  %65 = call i32 @test.sum_us(i64 %lo29, i8* %hi31)
+  %66 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str.18, i32 0, i32 0), i32 %65)
+  %67 = getelementptr inbounds [1 x i32], [1 x i32]* %varargslots33, i64 0, i64 0
+  store i32 1, i32* %67, align 4
+  %68 = getelementptr inbounds %"int[]", %"int[]"* %vararg32, i32 0, i32 1
+  store i64 1, i64* %68, align 8
+  %69 = getelementptr inbounds %"int[]", %"int[]"* %vararg32, i32 0, i32 0
+  %70 = bitcast [1 x i32]* %varargslots33 to i32*
+  store i32* %70, i32** %69, align 8
   %casttemp34 = bitcast %"int[]"* %vararg32 to { i64, i8* }*
   %lo35 = getelementptr inbounds { i64, i8* }, { i64, i8* }* %casttemp34, i32 0, i32 0
   %lo36 = load i64, i64* %lo35, align 8
   %hi37 = getelementptr inbounds { i64, i8* }, { i64, i8* }* %casttemp34, i32 0, i32 1
   %hi38 = load i8*, i8** %hi37, align 8
-  %69 = call i32 @test.sum_us(i64 %lo36, i8* %hi38)
-  %70 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str.19, i32 0, i32 0), i32 %69)
-  %71 = getelementptr inbounds %"int[]", %"int[]"* %vararg39, i32 0, i32 1
-  store i64 0, i64* %71, align 8
+  %71 = call i32 @test.sum_us(i64 %lo36, i8* %hi38)
+  %72 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str.19, i32 0, i32 0), i32 %71)
+  %73 = getelementptr inbounds %"int[]", %"int[]"* %vararg39, i32 0, i32 1
+  store i64 0, i64* %73, align 8
   %casttemp40 = bitcast %"int[]"* %vararg39 to { i64, i8* }*
   %lo41 = getelementptr inbounds { i64, i8* }, { i64, i8* }* %casttemp40, i32 0, i32 0
   %lo42 = load i64, i64* %lo41, align 8
   %hi43 = getelementptr inbounds { i64, i8* }, { i64, i8* }* %casttemp40, i32 0, i32 1
   %hi44 = load i8*, i8** %hi43, align 8
-  %72 = call i32 @test.sum_us(i64 %lo42, i8* %hi44)
-  %73 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str.20, i32 0, i32 0), i32 %72)
+  %74 = call i32 @test.sum_us(i64 %lo42, i8* %hi44)
+  %75 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str.20, i32 0, i32 0), i32 %74)
   store i32 (double, %Bobo*)* null, i32 (double, %Bobo*)** %a1, align 8
   store i32 (double, %Bobo*)* null, i32 (double, %Bobo*)** %b2, align 8
   ret void
@@ -683,8 +687,8 @@ entry:
 define i32 @test2.int.getValue(i32 %0)
 entry:
   %blob = alloca %Blob, align 4
-  %coerce = bitcast %Blob* %blob to i32*
-  store i32 %0, i32* %coerce, align 4
+  %dive = getelementptr inbounds %Blob, %Blob* %blob, i32 0, i32 0
+  store i32 %0, i32* %dive, align 4
   %1 = getelementptr inbounds %Blob, %Blob* %blob, i32 0, i32 0
   %2 = load i32, i32* %1, align 4
   ret i32 %2
@@ -711,8 +715,8 @@ entry:
 define double @test2.double.getValue(double %0)
 entry:
   %blob = alloca %Blob, align 8
-  %coerce = bitcast %Blob* %blob to double*
-  store double %0, double* %coerce, align 8
+  %dive = getelementptr inbounds %Blob, %Blob* %blob, i32 0, i32 0
+  store double %0, double* %dive, align 8
   %1 = getelementptr inbounds %Blob, %Blob* %blob, i32 0, i32 0
   %2 = load double, double* %1, align 8
-  ret double %2
+  ret double %2
\ No newline at end of file
diff --git a/test/test_suite/functions/test_regression_mingw.c3t b/test/test_suite/functions/test_regression_mingw.c3t
index e7ac06de8..abfc977cd 100644
--- a/test/test_suite/functions/test_regression_mingw.c3t
+++ b/test/test_suite/functions/test_regression_mingw.c3t
@@ -445,6 +445,7 @@ entry:
   %i1 = alloca i32, align 4
   %a = alloca %Blob, align 4
   %b = alloca %Blob.0, align 8
+  %tempcoerce = alloca i64, align 8
   %ddx = alloca %Foo, align 4
   %fro = alloca i32, align 4
   %x = alloca [4 x i32], align 4
@@ -543,99 +544,102 @@ for.exit9:
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %21, i8* align 4 bitcast (%Blob* @.__const.6 to i8*), i32 4, i1 false)
   %22 = bitcast %Blob.0* %b to i8*
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 %22, i8* align 8 bitcast (%Blob.0* @.__const.7 to i8*), i32 8, i1 false)
-  %23 = bitcast %Blob* %a to i32*
-  %coerced = load i32, i32* %23, align 4
-  %24 = call i32 @test2.int.getValue(i32 %coerced)
+  %dive = getelementptr inbounds %Blob, %Blob* %a, i32 0, i32 0
+  %23 = load i32, i32* %dive, align 4
+  %24 = call i32 @test2.int.getValue(i32 %23)
   %25 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.8, i32 0, i32 0), i32 %24)
-  %26 = bitcast %Blob.0* %b to i64*
-  %coerced10 = load i64, i64* %26, align 8
-  %27 = call double @test2.double.getValue(i64 %coerced10)
-  %28 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.9, i32 0, i32 0), double %27)
-  %29 = call i32 @test2.int.getMult(i32 25)
-  %30 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([17 x i8], [17 x i8]* @.str.10, i32 0, i32 0), i32 %29)
-  %31 = call double @test2.double.getMult(double 3.300000e+00)
-  %32 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([20 x i8], [20 x i8]* @.str.11, i32 0, i32 0), double %31)
+  %dive10 = getelementptr inbounds %Blob.0, %Blob.0* %b, i32 0, i32 0
+  %26 = bitcast i64* %tempcoerce to i8*
+  %27 = bitcast double* %dive10 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 %26, i8* align 8 %27, i32 8, i1 false)
+  %28 = load i64, i64* %tempcoerce, align 8
+  %29 = call double @test2.double.getValue(i64 %28)
+  %30 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.9, i32 0, i32 0), double %29)
+  %31 = call i32 @test2.int.getMult(i32 25)
+  %32 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([17 x i8], [17 x i8]* @.str.10, i32 0, i32 0), i32 %31)
+  %33 = call double @test2.double.getMult(double 3.300000e+00)
+  %34 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([20 x i8], [20 x i8]* @.str.11, i32 0, i32 0), double %33)
   call void @test.helloWorld()
-  %33 = bitcast %Foo* %ddx to i8*
-  call void @llvm.memset.p0i8.i64(i8* align 4 %33, i8 0, i64 8, i1 false)
+  %35 = bitcast %Foo* %ddx to i8*
+  call void @llvm.memset.p0i8.i64(i8* align 4 %35, i8 0, i64 8, i1 false)
   store i32 3, i32* %fro, align 4
-  %34 = bitcast [4 x i32]* %x to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %34, i8* align 4 bitcast ([4 x i32]* @.__const.12 to i8*), i32 16, i1 false)
-  %35 = load i32, i32* %fro, align 4
-  %36 = getelementptr inbounds %"int[]", %"int[]"* %vararg, i32 0, i32 1
-  %37 = getelementptr inbounds %"int[]", %"int[]"* %vararg, i32 0, i32 0
-  store i64 4, i64* %36, align 8
-  %38 = bitcast [4 x i32]* %x to i32*
-  store i32* %38, i32** %37, align 8
-  %39 = bitcast %"int[]"* %indirectarg to i8*
-  %40 = bitcast %"int[]"* %vararg to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 %39, i8* align 8 %40, i32 16, i1 false)
-  %41 = call i32 @test.sum_us(%"int[]"* %indirectarg)
-  %42 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([22 x i8], [22 x i8]* @.str.13, i32 0, i32 0), i32 %41)
-  %add11 = add i32 %35, %42
+  %36 = bitcast [4 x i32]* %x to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %36, i8* align 4 bitcast ([4 x i32]* @.__const.12 to i8*), i32 16, i1 false)
+  %37 = load i32, i32* %fro, align 4
+  %38 = getelementptr inbounds %"int[]", %"int[]"* %vararg, i32 0, i32 1
+  %39 = getelementptr inbounds %"int[]", %"int[]"* %vararg, i32 0, i32 0
+  store i64 4, i64* %38, align 8
+  %40 = bitcast [4 x i32]* %x to i32*
+  store i32* %40, i32** %39, align 8
+  %41 = bitcast %"int[]"* %indirectarg to i8*
+  %42 = bitcast %"int[]"* %vararg to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 %41, i8* align 8 %42, i32 16, i1 false)
+  %43 = call i32 @test.sum_us(%"int[]"* %indirectarg)
+  %44 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([22 x i8], [22 x i8]* @.str.13, i32 0, i32 0), i32 %43)
+  %add11 = add i32 %37, %44
   store i32 %add11, i32* %fro, align 4
-  %43 = load i32, i32* %fro, align 4
-  %44 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.14, i32 0, i32 0), i32 %43)
-  %45 = bitcast [4 x i32]* %x to i32*
-  %46 = insertvalue %"int[]" undef, i32* %45, 0
-  %47 = insertvalue %"int[]" %46, i64 4, 1
-  store %"int[]" %47, %"int[]"* %z, align 8
-  %48 = bitcast [3 x i32]* %de to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %48, i8* align 4 bitcast ([3 x i32]* @.__const.15 to i8*), i32 12, i1 false)
-  %49 = getelementptr inbounds %"int[]", %"int[]"* %vararg12, i32 0, i32 1
-  %50 = getelementptr inbounds %"int[]", %"int[]"* %vararg12, i32 0, i32 0
-  store i64 4, i64* %49, align 8
-  %51 = bitcast [4 x i32]* %x to i32*
-  store i32* %51, i32** %50, align 8
-  %52 = bitcast %"int[]"* %indirectarg13 to i8*
-  %53 = bitcast %"int[]"* %vararg12 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 %52, i8* align 8 %53, i32 16, i1 false)
-  %54 = call i32 @test.sum_us(%"int[]"* %indirectarg13)
-  %55 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([21 x i8], [21 x i8]* @.str.16, i32 0, i32 0), i32 %54)
-  %56 = getelementptr inbounds %"int[]", %"int[]"* %vararg14, i32 0, i32 1
-  %57 = getelementptr inbounds %"int[]", %"int[]"* %vararg14, i32 0, i32 0
-  %58 = bitcast %"int[]"* %indirectarg15 to i8*
-  %59 = bitcast %"int[]"* %z to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 %58, i8* align 8 %59, i32 16, i1 false)
-  %60 = call i32 @test.sum_us(%"int[]"* %indirectarg15)
-  %61 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([21 x i8], [21 x i8]* @.str.17, i32 0, i32 0), i32 %60)
-  %62 = getelementptr inbounds [4 x i32], [4 x i32]* %varargslots, i64 0, i64 0
-  store i32 1, i32* %62, align 4
-  %63 = getelementptr inbounds [4 x i32], [4 x i32]* %varargslots, i64 0, i64 1
-  store i32 2, i32* %63, align 4
-  %64 = getelementptr inbounds [4 x i32], [4 x i32]* %varargslots, i64 0, i64 2
-  store i32 4, i32* %64, align 4
-  %65 = getelementptr inbounds [4 x i32], [4 x i32]* %varargslots, i64 0, i64 3
-  store i32 5, i32* %65, align 4
-  %66 = getelementptr inbounds %"int[]", %"int[]"* %vararg16, i32 0, i32 1
-  store i64 4, i64* %66, align 8
-  %67 = getelementptr inbounds %"int[]", %"int[]"* %vararg16, i32 0, i32 0
-  %68 = bitcast [4 x i32]* %varargslots to i32*
-  store i32* %68, i32** %67, align 8
-  %69 = bitcast %"int[]"* %indirectarg17 to i8*
-  %70 = bitcast %"int[]"* %vararg16 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 %69, i8* align 8 %70, i32 16, i1 false)
-  %71 = call i32 @test.sum_us(%"int[]"* %indirectarg17)
-  %72 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str.18, i32 0, i32 0), i32 %71)
-  %73 = getelementptr inbounds [1 x i32], [1 x i32]* %varargslots19, i64 0, i64 0
-  store i32 1, i32* %73, align 4
-  %74 = getelementptr inbounds %"int[]", %"int[]"* %vararg18, i32 0, i32 1
-  store i64 1, i64* %74, align 8
-  %75 = getelementptr inbounds %"int[]", %"int[]"* %vararg18, i32 0, i32 0
-  %76 = bitcast [1 x i32]* %varargslots19 to i32*
-  store i32* %76, i32** %75, align 8
-  %77 = bitcast %"int[]"* %indirectarg20 to i8*
-  %78 = bitcast %"int[]"* %vararg18 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 %77, i8* align 8 %78, i32 16, i1 false)
-  %79 = call i32 @test.sum_us(%"int[]"* %indirectarg20)
-  %80 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str.19, i32 0, i32 0), i32 %79)
-  %81 = getelementptr inbounds %"int[]", %"int[]"* %vararg21, i32 0, i32 1
-  store i64 0, i64* %81, align 8
-  %82 = bitcast %"int[]"* %indirectarg22 to i8*
-  %83 = bitcast %"int[]"* %vararg21 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 %82, i8* align 8 %83, i32 16, i1 false)
-  %84 = call i32 @test.sum_us(%"int[]"* %indirectarg22)
-  %85 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str.20, i32 0, i32 0), i32 %84)
+  %45 = load i32, i32* %fro, align 4
+  %46 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.14, i32 0, i32 0), i32 %45)
+  %47 = bitcast [4 x i32]* %x to i32*
+  %48 = insertvalue %"int[]" undef, i32* %47, 0
+  %49 = insertvalue %"int[]" %48, i64 4, 1
+  store %"int[]" %49, %"int[]"* %z, align 8
+  %50 = bitcast [3 x i32]* %de to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %50, i8* align 4 bitcast ([3 x i32]* @.__const.15 to i8*), i32 12, i1 false)
+  %51 = getelementptr inbounds %"int[]", %"int[]"* %vararg12, i32 0, i32 1
+  %52 = getelementptr inbounds %"int[]", %"int[]"* %vararg12, i32 0, i32 0
+  store i64 4, i64* %51, align 8
+  %53 = bitcast [4 x i32]* %x to i32*
+  store i32* %53, i32** %52, align 8
+  %54 = bitcast %"int[]"* %indirectarg13 to i8*
+  %55 = bitcast %"int[]"* %vararg12 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 %54, i8* align 8 %55, i32 16, i1 false)
+  %56 = call i32 @test.sum_us(%"int[]"* %indirectarg13)
+  %57 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([21 x i8], [21 x i8]* @.str.16, i32 0, i32 0), i32 %56)
+  %58 = getelementptr inbounds %"int[]", %"int[]"* %vararg14, i32 0, i32 1
+  %59 = getelementptr inbounds %"int[]", %"int[]"* %vararg14, i32 0, i32 0
+  %60 = bitcast %"int[]"* %indirectarg15 to i8*
+  %61 = bitcast %"int[]"* %z to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 %60, i8* align 8 %61, i32 16, i1 false)
+  %62 = call i32 @test.sum_us(%"int[]"* %indirectarg15)
+  %63 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([21 x i8], [21 x i8]* @.str.17, i32 0, i32 0), i32 %62)
+  %64 = getelementptr inbounds [4 x i32], [4 x i32]* %varargslots, i64 0, i64 0
+  store i32 1, i32* %64, align 4
+  %65 = getelementptr inbounds [4 x i32], [4 x i32]* %varargslots, i64 0, i64 1
+  store i32 2, i32* %65, align 4
+  %66 = getelementptr inbounds [4 x i32], [4 x i32]* %varargslots, i64 0, i64 2
+  store i32 4, i32* %66, align 4
+  %67 = getelementptr inbounds [4 x i32], [4 x i32]* %varargslots, i64 0, i64 3
+  store i32 5, i32* %67, align 4
+  %68 = getelementptr inbounds %"int[]", %"int[]"* %vararg16, i32 0, i32 1
+  store i64 4, i64* %68, align 8
+  %69 = getelementptr inbounds %"int[]", %"int[]"* %vararg16, i32 0, i32 0
+  %70 = bitcast [4 x i32]* %varargslots to i32*
+  store i32* %70, i32** %69, align 8
+  %71 = bitcast %"int[]"* %indirectarg17 to i8*
+  %72 = bitcast %"int[]"* %vararg16 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 %71, i8* align 8 %72, i32 16, i1 false)
+  %73 = call i32 @test.sum_us(%"int[]"* %indirectarg17)
+  %74 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str.18, i32 0, i32 0), i32 %73)
+  %75 = getelementptr inbounds [1 x i32], [1 x i32]* %varargslots19, i64 0, i64 0
+  store i32 1, i32* %75, align 4
+  %76 = getelementptr inbounds %"int[]", %"int[]"* %vararg18, i32 0, i32 1
+  store i64 1, i64* %76, align 8
+  %77 = getelementptr inbounds %"int[]", %"int[]"* %vararg18, i32 0, i32 0
+  %78 = bitcast [1 x i32]* %varargslots19 to i32*
+  store i32* %78, i32** %77, align 8
+  %79 = bitcast %"int[]"* %indirectarg20 to i8*
+  %80 = bitcast %"int[]"* %vararg18 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 %79, i8* align 8 %80, i32 16, i1 false)
+  %81 = call i32 @test.sum_us(%"int[]"* %indirectarg20)
+  %82 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str.19, i32 0, i32 0), i32 %81)
+  %83 = getelementptr inbounds %"int[]", %"int[]"* %vararg21, i32 0, i32 1
+  store i64 0, i64* %83, align 8
+  %84 = bitcast %"int[]"* %indirectarg22 to i8*
+  %85 = bitcast %"int[]"* %vararg21 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 %84, i8* align 8 %85, i32 16, i1 false)
+  %86 = call i32 @test.sum_us(%"int[]"* %indirectarg22)
+  %87 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str.20, i32 0, i32 0), i32 %86)
   store i32 (double, %Bobo*)* null, i32 (double, %Bobo*)** %a1, align 8
   store i32 (double, %Bobo*)* null, i32 (double, %Bobo*)** %b2, align 8
   ret void
@@ -684,8 +688,8 @@ entry:
 define i32 @test2.int.getValue(i32 %0)
 entry:
   %blob = alloca %Blob, align 4
-  %coerce = bitcast %Blob* %blob to i32*
-  store i32 %0, i32* %coerce, align 4
+  %dive = getelementptr inbounds %Blob, %Blob* %blob, i32 0, i32 0
+  store i32 %0, i32* %dive, align 4
   %1 = getelementptr inbounds %Blob, %Blob* %blob, i32 0, i32 0
   %2 = load i32, i32* %1, align 4
   ret i32 %2
@@ -713,9 +717,10 @@ entry:
 define double @test2.double.getValue(i64 %0)
 entry:
   %blob = alloca %Blob, align 8
-  %coerce = bitcast %Blob* %blob to i64*
-  store i64 %0, i64* %coerce, align 8
-  %1 = getelementptr inbounds %Blob, %Blob* %blob, i32 0, i32 0
-  %2 = load double, double* %1, align 8
-  ret double %2
+  %dive = getelementptr inbounds %Blob, %Blob* %blob, i32 0, i32 0
+  %1 = bitcast double* %dive to i64*
+  store i64 %0, i64* %1, align 8
+  %2 = getelementptr inbounds %Blob, %Blob* %blob, i32 0, i32 0
+  %3 = load double, double* %2, align 8
+  ret double %3
 
diff --git a/test/test_suite/struct/struct_as_value.c3t b/test/test_suite/struct/struct_as_value.c3t
index 7d2db133e..0ea6840bc 100644
--- a/test/test_suite/struct/struct_as_value.c3t
+++ b/test/test_suite/struct/struct_as_value.c3t
@@ -1,3 +1,4 @@
+// #target: x64_darwin
 module test;
 
 struct Event
@@ -12,4 +13,32 @@ func Event test(int x)
   return x ? foo : bar;
 }
 
-// TODO possibly look at the IR
\ No newline at end of file
+// #expect: test.ll
+
+   %x = alloca i32, align 4
+   %foo = alloca %Event, align 4
+   %bar = alloca %Event, align 4
+   %tempaddr = alloca %Event, align 4
+   store i32 %0, i32* %x, align 4
+   %1 = bitcast %Event* %foo to i8*
+   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %1, i8* align 4 bitcast (%Event* @.__const to i8*), i32 4, i1 false)
+   %2 = bitcast %Event* %bar to i8*
+   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %2, i8* align 4 bitcast (%Event* @.__const.1 to i8*), i32 4, i1 false)
+   %3 = load i32, i32* %x, align 4
+   %intbool = icmp ne i32 %3, 0
+   br i1 %intbool, label %cond.lhs, label %cond.rhs
+
+ cond.lhs:
+   %4 = load %Event, %Event* %foo, align 4
+   br label %cond.phi
+
+ cond.rhs:
+   %5 = load %Event, %Event* %bar, align 4
+   br label %cond.phi
+
+ cond.phi:
+   %val = phi %Event [ %4, %cond.lhs ], [ %5, %cond.rhs ]
+   store %Event %val, %Event* %tempaddr, align 4
+   %dive = getelementptr inbounds %Event, %Event* %tempaddr, i32 0, i32 0
+   %6 = load i32, i32* %dive, align 4
+   ret i32 %6
\ No newline at end of file
diff --git a/test/test_suite/struct/struct_as_value_aarch64.c3t b/test/test_suite/struct/struct_as_value_aarch64.c3t
new file mode 100644
index 000000000..6126f1ba5
--- /dev/null
+++ b/test/test_suite/struct/struct_as_value_aarch64.c3t
@@ -0,0 +1,45 @@
+// #target: aarch64_linux
+module test;
+
+struct Event
+{
+    int op;
+}
+
+func Event test(int x)
+{
+  Event foo = { 1 };
+  Event bar = { 2 };
+  return x ? foo : bar;
+}
+
+// #expect: test.ll
+
+  %x = alloca i32, align 4
+  %foo = alloca %Event, align 4
+  %bar = alloca %Event, align 4
+  %tempaddr = alloca %Event, align 4
+  store i32 %0, i32* %x, align 4
+  %1 = bitcast %Event* %foo to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %1, i8* align 4 bitcast (%Event* @.__const to i8*), i32 4, i1 false)
+  %2 = bitcast %Event* %bar to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %2, i8* align 4 bitcast (%Event* @.__const.1 to i8*), i32 4, i1 false)
+  %3 = load i32, i32* %x, align 4
+  %intbool = icmp ne i32 %3, 0
+  br i1 %intbool, label %cond.lhs, label %cond.rhs
+
+cond.lhs:
+  %4 = load %Event, %Event* %foo, align 4
+  br label %cond.phi
+
+cond.rhs:
+  %5 = load %Event, %Event* %bar, align 4
+  br label %cond.phi
+
+cond.phi:
+  %val = phi %Event [ %4, %cond.lhs ], [ %5, %cond.rhs ]
+  store %Event %val, %Event* %tempaddr, align 4
+  %dive = getelementptr inbounds %Event, %Event* %tempaddr, i32 0, i32 0
+  %6 = load i32, i32* %dive, align 4
+  %7 = zext i32 %6 to i64
+  ret i64 %7
\ No newline at end of file