From 4ea81fe636c88f86bf16442a91af56af3ccb9f50 Mon Sep 17 00:00:00 2001
From: Christoffer Lerno <christoffer@aegik.com>
Date: Tue, 31 Jan 2023 10:04:53 +0100
Subject: [PATCH] More riscv tests. Fix missing zero/signext attributes on
 calls. Fixes and simplification to riscv ABI.

---
 src/compiler/c_abi_internal.h                 |   2 +-
 src/compiler/compiler_internal.h              |  10 +-
 src/compiler/llvm_codegen_c_abi.c             |  21 +-
 src/compiler/llvm_codegen_c_abi_riscv.c       |  28 +-
 src/compiler/llvm_codegen_expr.c              |  84 ++-
 src/compiler/llvm_codegen_function.c          |  60 +--
 src/compiler/llvm_codegen_internal.h          |   2 +
 src/compiler/llvm_codegen_type.c              |  39 +-
 test/test_suite/abi/riscv32-ilp32-abi.c3t     |  41 ++
 .../abi/riscv32-ilp32-ilp32f-abi-1.c3t        |  43 +-
 .../abi/riscv32-ilp32-ilp32f-ilp32d-abi-1.c3t | 383 ++++++++++++--
 test/test_suite/abi/riscv32-ilp32d-abi.c3t    | 382 ++++++++++++--
 test/test_suite/abi/riscv32-ilp32f-abi.c3t    |  56 +-
 .../abi/riscv32-ilp32f-ilp32d-abi-2.c3t       | 306 ++++++++++-
 .../abi/riscv64-lp64-lp64f-abi-1.c3t          |  51 ++
 .../abi/riscv64-lp64-lp64f-abi-2.c3t          |  52 ++
 .../abi/riscv64-lp64-lp64f-lp64d-abi-1.c3t    | 208 ++++++++
 .../abi/riscv64-lp64-lp64f-lp64d-abi-2.c3t    | 208 ++++++++
 .../abi/riscv64-lp64-lp64f-lp64d-abi-3.c3t    | 208 ++++++++
 test/test_suite/abi/riscv64-lp64d-abi.c3t     | 494 ++++++++++++++++++
 .../abi/riscv64-lp64f-lp64d-abi-1.c3t         | 184 +++++++
 .../abi/riscv64-lp64f-lp64d-abi-2.c3t         | 429 +++++++++++++++
 test/test_suite/clang/2002-04.c3t             |   2 +-
 test/test_suite/clang/2002-07.c3t             |   4 +-
 test/test_suite/errors/error_regression_2.c3t |   2 +-
 25 files changed, 3017 insertions(+), 282 deletions(-)
 create mode 100644 test/test_suite/abi/riscv64-lp64-lp64f-abi-1.c3t
 create mode 100644 test/test_suite/abi/riscv64-lp64-lp64f-abi-2.c3t
 create mode 100644 test/test_suite/abi/riscv64-lp64-lp64f-lp64d-abi-1.c3t
 create mode 100644 test/test_suite/abi/riscv64-lp64-lp64f-lp64d-abi-2.c3t
 create mode 100644 test/test_suite/abi/riscv64-lp64-lp64f-lp64d-abi-3.c3t
 create mode 100644 test/test_suite/abi/riscv64-lp64d-abi.c3t
 create mode 100644 test/test_suite/abi/riscv64-lp64f-lp64d-abi-1.c3t
 create mode 100644 test/test_suite/abi/riscv64-lp64f-lp64d-abi-2.c3t

diff --git a/src/compiler/c_abi_internal.h b/src/compiler/c_abi_internal.h
index 0342159ea..7205c50bb 100644
--- a/src/compiler/c_abi_internal.h
+++ b/src/compiler/c_abi_internal.h
@@ -25,7 +25,7 @@ ABIArgInfo *abi_arg_new_direct_coerce_int(void);
 ABIArgInfo *abi_arg_new_direct_coerce_type(Type *type);
 ABIArgInfo *abi_arg_new_direct_struct_expand_i32(uint8_t elements);
 ABIArgInfo *abi_arg_new_expand_coerce(AbiType target_type, unsigned offset);
-ABIArgInfo *abi_arg_new_expand_coerce_pair(AbiType first_element, unsigned initial_offset, AbiType second_element, unsigned padding, bool is_packed);
+ABIArgInfo *abi_arg_new_expand_coerce_pair(Type *first_element, Type *second_element, unsigned hi_offset, bool packed);
 ABIArgInfo *abi_arg_new_indirect_realigned(AlignSize alignment, Type *by_val_type);
 ABIArgInfo *abi_arg_new_indirect_by_val(Type *by_val_type);
 ABIArgInfo *abi_arg_new_indirect_not_by_val(Type *type);
diff --git a/src/compiler/compiler_internal.h b/src/compiler/compiler_internal.h
index 5bf90ad58..1847904e7 100644
--- a/src/compiler/compiler_internal.h
+++ b/src/compiler/compiler_internal.h
@@ -1753,14 +1753,10 @@ typedef struct ABIArgInfo_
 		} direct_pair;
 		struct
 		{
-			uint8_t offset_lo;
-			uint8_t padding_hi;
-			uint8_t lo_index;
-			uint8_t hi_index;
 			uint8_t offset_hi;
-			bool packed : 1;
-			AbiType lo;
-			AbiType hi;
+			bool packed;
+			Type *lo;
+			Type *hi;
 		} coerce_expand;
 		Type *direct_coerce_type;
 		uint8_t direct_struct_expand;
diff --git a/src/compiler/llvm_codegen_c_abi.c b/src/compiler/llvm_codegen_c_abi.c
index ea513d9e5..eddba7b71 100644
--- a/src/compiler/llvm_codegen_c_abi.c
+++ b/src/compiler/llvm_codegen_c_abi.c
@@ -159,29 +159,14 @@ ABIArgInfo *abi_arg_new_expand(void)
 	return abi_arg_new(ABI_ARG_EXPAND);
 }
 
-ABIArgInfo *abi_arg_new_expand_coerce(AbiType target_type, unsigned offset)
-{
-	ABIArgInfo *arg = abi_arg_new(ABI_ARG_EXPAND_COERCE);
-	arg->coerce_expand.packed = offset > 0;
-	assert(offset <= 0xFF);
-	arg->coerce_expand.offset_lo = (unsigned char)offset;
-	arg->coerce_expand.lo_index = offset > 0 ? 1 : 0;
-	arg->coerce_expand.lo = target_type;
-	return arg;
-}
 
-ABIArgInfo *abi_arg_new_expand_coerce_pair(AbiType first_element, unsigned initial_offset, AbiType second_element, unsigned padding, bool is_packed)
+ABIArgInfo *abi_arg_new_expand_coerce_pair(Type *first_element, Type *second_element, unsigned hi_offset, bool packed)
 {
 	ABIArgInfo *arg = abi_arg_new(ABI_ARG_EXPAND_COERCE);
-	arg->coerce_expand.packed = is_packed;
-	assert(initial_offset <= 0xFF && padding <= 0xFF);
-	arg->coerce_expand.offset_lo = (unsigned char)initial_offset;
-	arg->coerce_expand.lo_index = initial_offset > 0 ? 1 : 0;
 	arg->coerce_expand.lo = first_element;
 	arg->coerce_expand.hi = second_element;
-	arg->coerce_expand.padding_hi = (uint8_t)padding;
-	arg->coerce_expand.offset_hi = (uint8_t)(padding + initial_offset + abi_type_size(first_element));
-	arg->coerce_expand.hi_index = arg->coerce_expand.lo_index + (padding > 0 ? 1U : 0U);
+	arg->coerce_expand.offset_hi = hi_offset;
+	arg->coerce_expand.packed = packed;
 	return arg;
 }
 
diff --git a/src/compiler/llvm_codegen_c_abi_riscv.c b/src/compiler/llvm_codegen_c_abi_riscv.c
index fd5874a3d..6f3498c92 100644
--- a/src/compiler/llvm_codegen_c_abi_riscv.c
+++ b/src/compiler/llvm_codegen_c_abi_riscv.c
@@ -7,34 +7,26 @@
 
 static ABIArgInfo *riscv_coerce_and_expand_fpcc_struct(AbiType field1, unsigned field1_offset, AbiType field2, unsigned field2_offset)
 {
+	assert(abi_type_is_type(field1));
 	if (!abi_type_is_valid(field2))
 	{
-		return abi_arg_new_expand_coerce(field1, field1_offset);
+		return abi_arg_new_direct_coerce_type(field1.type);
 	}
 
-	unsigned field2_alignment = abi_type_abi_alignment(field2);
-	unsigned field1_size = abi_type_size(field1);
-	unsigned field2_offset_no_pad = aligned_offset(field1_size, field2_alignment);
-
-	unsigned padding = 0;
-
-	if (field2_offset > field2_offset_no_pad)
+	assert(abi_type_is_type(field2));
+	Type *type2 = field2.type;
+	ByteSize abi_type_size = type_size(type2);
+	// Not on even offset, use packed semantics.
+	if (field2_offset % abi_type_size != 0)
 	{
-		padding = field2_offset - field2_offset_no_pad;
+		return abi_arg_new_expand_coerce_pair(field1.type, field2.type, field2_offset, true);
 	}
-	else if (field2_offset != field2_alignment && field2_offset > field1_size)
-	{
-		padding = field2_offset - field1_size;
-	}
-
-	bool is_packed = field2_offset % field2_alignment != 0;
-
-	return abi_arg_new_expand_coerce_pair(field1, field1_offset, field2, padding, is_packed);
+	return abi_arg_new_expand_coerce_pair(field1.type, field2.type, field2_offset / abi_type_size, false);
 }
 
 static bool riscv_detect_fpcc_struct_internal(Type *type, unsigned current_offset, AbiType *field1_ref, unsigned *field1_offset, AbiType *field2_ref, unsigned *field2_offset)
 {
-	bool is_int = type_is_integer(type);
+	bool is_int = type_is_integer_or_bool_kind(type);
 	bool is_float = type_is_float(type);
 	unsigned flen = platform_target.riscv.flen;
 	ByteSize size = type_size(type);
diff --git a/src/compiler/llvm_codegen_expr.c b/src/compiler/llvm_codegen_expr.c
index d472dbb42..f9ab4b122 100644
--- a/src/compiler/llvm_codegen_expr.c
+++ b/src/compiler/llvm_codegen_expr.c
@@ -303,6 +303,19 @@ static LLVMTypeRef llvm_find_inner_struct_type_for_coerce(GenContext *c, LLVMTyp
 	return type;
 }
 
+LLVMTypeRef llvm_coerce_expand_hi_offset(GenContext *c, LLVMValueRef *addr, ABIArgInfo *info, AlignSize *align)
+{
+	LLVMTypeRef type2 = llvm_get_type(c, info->coerce_expand.hi);
+	if (info->coerce_expand.packed)
+	{
+		*align = type_min_alignment(*align, *align + info->coerce_expand.offset_hi);
+		llvm_emit_pointer_inbounds_gep_raw_index(c, c->byte_type, *addr, info->coerce_expand.offset_hi);
+		return type2;
+	}
+	*align = type_min_alignment(*align, *align + llvm_store_size(c, type2) * info->coerce_expand.offset_hi);
+	llvm_emit_pointer_inbounds_gep_raw_index(c, type2, *addr, info->coerce_expand.offset_hi);
+	return type2;
+}
 /**
  * General functionality to convert ptr <-> int
  */
@@ -4498,6 +4511,12 @@ LLVMValueRef llvm_emit_pointer_inbounds_gep_raw(GenContext *c, LLVMTypeRef point
 	return LLVMBuildInBoundsGEP2(c->builder, pointee_type, ptr, &offset, 1, "ptroffset");
 }
 
+LLVMValueRef llvm_emit_pointer_inbounds_gep_raw_index(GenContext *c, LLVMTypeRef pointee_type, LLVMValueRef ptr, ByteSize offset)
+{
+	LLVMValueRef offset_val = LLVMConstInt(c->size_type, offset, false);
+	return LLVMBuildInBoundsGEP2(c->builder, pointee_type, ptr, &offset_val, 1, "ptroffset");
+}
+
 void llvm_emit_subarray_len(GenContext *c, BEValue *subarray, BEValue *len)
 {
 	llvm_value_addr(c, subarray);
@@ -4696,18 +4715,11 @@ void llvm_emit_parameter(GenContext *c, LLVMValueRef *args, unsigned *arg_count_
 		{
 			// Move this to an address (if needed)
 			llvm_value_addr(c, be_value);
-			LLVMTypeRef coerce_type = llvm_get_coerce_type(c, info);
-			AlignSize alignment;
-			LLVMValueRef temp = llvm_emit_coerce_alignment(c, be_value, coerce_type, llvm_abi_alignment(c, coerce_type), &alignment);
-
-			AlignSize align;
-			LLVMValueRef gep_first = llvm_emit_struct_gep_raw(c, temp, coerce_type, info->coerce_expand.lo_index, alignment, &align);
-			args[(*arg_count_ref)++] = llvm_load(c, llvm_abi_type(c, info->coerce_expand.lo), gep_first, align, "");
-			if (abi_type_is_valid(info->coerce_expand.hi))
-			{
-				LLVMValueRef gep_second = llvm_emit_struct_gep_raw(c, temp, coerce_type, info->coerce_expand.hi_index, alignment, &align);
-				args[(*arg_count_ref)++] = llvm_load(c, llvm_abi_type(c, info->coerce_expand.hi), gep_second, align, "");
-			}
+			LLVMValueRef addr = be_value->value;
+			AlignSize align = be_value->alignment;
+			args[(*arg_count_ref)++] = llvm_load(c, llvm_get_type(c, info->coerce_expand.lo), addr, align, "");
+			LLVMTypeRef type2 = llvm_coerce_expand_hi_offset(c, &addr, info, &align);
+			args[(*arg_count_ref)++] = llvm_load(c, type2, addr, align, "");
 			return;
 		}
 		case ABI_ARG_EXPAND:
@@ -4769,6 +4781,14 @@ void llvm_add_abi_call_attributes(GenContext *c, LLVMValueRef call_value, int co
 	for (unsigned i = 0; i < count; i++)
 	{
 		ABIArgInfo *info = infos[i];
+		if (info->attributes.signext)
+		{
+			llvm_attribute_add_call(c, call_value, attribute_id.sext, (int)info->param_index_start + 1, 0);
+		}
+		if (info->attributes.zeroext)
+		{
+			llvm_attribute_add_call(c, call_value, attribute_id.zext, (int)info->param_index_start + 1, 0);
+		}
 		switch (info->kind)
 		{
 			case ABI_ARG_INDIRECT:
@@ -4913,41 +4933,17 @@ void llvm_emit_raw_call(GenContext *c, BEValue *result_value, FunctionPrototype
 
 			// 15a. Create memory to hold the return type.
 			// COERCE UPDATE bitcast removed, check for ways to optimize
-			LLVMValueRef coerce = llvm_emit_alloca_aligned(c, call_return_type, "");
-			llvm_value_set_address_abi_aligned(result_value, coerce, call_return_type);
 
-			// COERCE UPDATE bitcast removed, check for ways to optimize
-			// 15b. Construct our coerce type which is { pad, lo, pad, hi }
-			LLVMTypeRef coerce_type = llvm_get_coerce_type(c, ret_info);
+			LLVMValueRef addr = llvm_emit_alloca_aligned(c, call_return_type, "");
+			llvm_value_set_address_abi_aligned(result_value, addr, call_return_type);
 
-			// 15d. Find the address to the low value
-			AlignSize alignment;
-			LLVMValueRef lo = llvm_emit_struct_gep_raw(c, coerce, coerce_type, ret_info->coerce_expand.lo_index,
-			                                           type_abi_alignment(call_return_type), &alignment);
-
-			// 15e. If there is only a single field, we simply store the value,
-			//      so { lo } set into { pad, lo, pad } -> original type.
-			if (!abi_type_is_valid(ret_info->coerce_expand.hi))
-			{
-				// Here we do a store to call -> lo (leaving the rest undefined)
-				llvm_store_to_ptr_raw_aligned(c, lo, call_value, alignment);
-				break;
-			}
-
-			// 15g. We can now extract { lo, hi } to lo_value and hi_value.
-			LLVMValueRef lo_value = llvm_emit_extract_value(c, call_value, 0);
-			LLVMValueRef hi_value = llvm_emit_extract_value(c, call_value, 1);
-
-			// 15h. Store lo_value into the { pad, lo, pad, hi } struct.
-			llvm_store_to_ptr_raw_aligned(c, lo, lo_value, alignment);
-
-			// 15i. Calculate the address to the high value (like for the low in 15d.
-			LLVMValueRef hi = llvm_emit_struct_gep_raw(c, coerce, coerce_type, ret_info->coerce_expand.hi_index,
-			                                           type_abi_alignment(call_return_type), &alignment);
-
-			// 15h. Store the high value.
-			llvm_store_to_ptr_raw_aligned(c, hi, hi_value, alignment);
+			// Store lower
+			AlignSize align = result_value->alignment;
+			llvm_store_to_ptr_raw_aligned(c, addr, llvm_emit_extract_value(c, call_value, 0), align);
 
+			// Store upper
+			(void)llvm_coerce_expand_hi_offset(c, &addr, ret_info, &align);
+			llvm_store_to_ptr_raw_aligned(c, addr, llvm_emit_extract_value(c, call_value, 1), align);
 			break;
 		}
 		case ABI_ARG_DIRECT:
diff --git a/src/compiler/llvm_codegen_function.c b/src/compiler/llvm_codegen_function.c
index b4c865953..8efb28d0b 100644
--- a/src/compiler/llvm_codegen_function.c
+++ b/src/compiler/llvm_codegen_function.c
@@ -14,6 +14,7 @@ static inline void llvm_emit_body(GenContext *c, LLVMValueRef function, const ch
                                   const char *function_name,
                                   FileId file_id, FunctionPrototype *prototype, Signature *signature, Ast *body);
 
+
 bool llvm_emit_check_block_branch(GenContext *context)
 {
 	if (!context->current_block) return false;
@@ -126,21 +127,12 @@ static inline void llvm_process_parameter_value(GenContext *c, Decl *decl, ABIAr
 			return;
 		case ABI_ARG_EXPAND_COERCE:
 		{
-			// Create the expand type:
-			LLVMTypeRef coerce_type = llvm_get_coerce_type(c, info);
-			// COERCE UPDATE bitcast removed, check for ways to optimize
 			llvm_emit_and_set_decl_alloca(c, decl);
-			LLVMValueRef temp = decl->backend_ref;
-
-			AlignSize alignment = decl->alignment;
-			AlignSize element_align;
-			LLVMValueRef gep_first = llvm_emit_struct_gep_raw(c, temp, coerce_type, info->coerce_expand.lo_index, alignment, &element_align);
-			llvm_store_to_ptr_raw_aligned(c, gep_first, llvm_get_next_param(c, index), element_align);
-			if (abi_type_is_valid(info->coerce_expand.hi))
-			{
-				LLVMValueRef gep_second = llvm_emit_struct_gep_raw(c, temp, coerce_type, info->coerce_expand.hi_index, alignment, &element_align);
-				llvm_store_to_ptr_raw_aligned(c, gep_second, llvm_get_next_param(c, index), element_align);
-			}
+			LLVMValueRef addr = decl->backend_ref;
+			AlignSize align = decl->alignment;
+			llvm_store_to_ptr_raw_aligned(c, addr, llvm_get_next_param(c, index), align);
+			(void)llvm_coerce_expand_hi_offset(c, &addr, info, &align);
+			llvm_store_to_ptr_raw_aligned(c, addr, llvm_get_next_param(c, index), align);
 			break;
 		}
 		case ABI_ARG_DIRECT_PAIR:
@@ -325,38 +317,16 @@ void llvm_emit_return_abi(GenContext *c, BEValue *return_value, BEValue *optiona
 		{
 			// Pick the return as an address.
 			llvm_value_addr(c, return_value);
-			// Get the coerce type.
-			LLVMTypeRef coerce_type = llvm_get_coerce_type(c, info);
-			// Create the new pointer
-			assert(return_value);
-			// COERCE UPDATE bitcast removed, check for ways to optimize
-			LLVMValueRef coerce = return_value->value;
-			// We might have only one value, in that case, build a GEP to that one.
-			LLVMValueRef lo_val;
-			AlignSize alignment;
-			LLVMValueRef lo = llvm_emit_struct_gep_raw(c, coerce, coerce_type, info->coerce_expand.lo_index,
-			                                           return_value->alignment, &alignment);
-			LLVMTypeRef lo_type = llvm_abi_type(c, info->coerce_expand.lo);
-			lo_val = llvm_load(c, lo_type, lo, alignment, "");
+			LLVMValueRef addr = return_value->value;
+			AlignSize align = return_value->alignment;
+			LLVMValueRef lo = llvm_load(c, llvm_get_type(c, info->coerce_expand.lo), addr, align, "");
+			LLVMTypeRef type2 = llvm_coerce_expand_hi_offset(c, &addr, info, &align);
+			LLVMValueRef hi = llvm_load(c, type2, addr, align, "");
+			LLVMTypeRef type = llvm_get_twostruct(c, LLVMTypeOf(lo), LLVMTypeOf(hi));
+			LLVMValueRef composite = llvm_get_undef_raw(type);
 
-			// We're done if there's a single field.
-			if (!abi_type_is_valid(info->coerce_expand.hi))
-			{
-				llvm_emit_return_value(c, lo_val);
-				return;
-			}
-
-			// Let's make a first class aggregate
-			LLVMValueRef hi = llvm_emit_struct_gep_raw(c, coerce, coerce_type, info->coerce_expand.hi_index,
-			                                           return_value->alignment, &alignment);
-			LLVMTypeRef hi_type = llvm_abi_type(c, info->coerce_expand.hi);
-			LLVMValueRef hi_val = llvm_load(c, hi_type, hi, alignment, "");
-
-			LLVMTypeRef unpadded_type = llvm_get_twostruct(c, lo_type, hi_type);
-			LLVMValueRef composite = llvm_get_undef_raw(unpadded_type);
-
-			composite = llvm_emit_insert_value(c, composite, lo_val, 0);
-			composite = llvm_emit_insert_value(c, composite, hi_val, 1);
+			composite = llvm_emit_insert_value(c, composite, lo, 0);
+			composite = llvm_emit_insert_value(c, composite, hi, 1);
 
 			// And return that unpadded result
 			llvm_emit_return_value(c, composite);
diff --git a/src/compiler/llvm_codegen_internal.h b/src/compiler/llvm_codegen_internal.h
index 7ab6626a8..e4e1bdf77 100644
--- a/src/compiler/llvm_codegen_internal.h
+++ b/src/compiler/llvm_codegen_internal.h
@@ -395,6 +395,8 @@ LLVMValueRef llvm_emit_array_gep_raw(GenContext *c, LLVMValueRef ptr, LLVMTypeRe
 LLVMValueRef llvm_emit_array_gep_raw_index(GenContext *c, LLVMValueRef ptr, LLVMTypeRef array_type, LLVMValueRef index, AlignSize array_alignment, AlignSize *alignment);
 LLVMValueRef llvm_emit_pointer_gep_raw(GenContext *c, LLVMTypeRef pointee_type, LLVMValueRef ptr, LLVMValueRef offset);
 LLVMValueRef llvm_emit_pointer_inbounds_gep_raw(GenContext *c, LLVMTypeRef pointee_type, LLVMValueRef ptr, LLVMValueRef offset);
+LLVMValueRef llvm_emit_pointer_inbounds_gep_raw_index(GenContext *c, LLVMTypeRef pointee_type, LLVMValueRef ptr, ByteSize offset);
+LLVMTypeRef llvm_coerce_expand_hi_offset(GenContext *c, LLVMValueRef *addr, ABIArgInfo *info, AlignSize *align);
 void llvm_emit_ptr_from_array(GenContext *c, BEValue *value);
 void llvm_emit_struct_member_ref(GenContext *c, BEValue *struct_ref, BEValue *member_ref, unsigned member_id);
 INLINE LLVMValueRef llvm_emit_extract_value(GenContext *c, LLVMValueRef agg, unsigned index);
diff --git a/src/compiler/llvm_codegen_type.c b/src/compiler/llvm_codegen_type.c
index af61a6cbc..ab49076cc 100644
--- a/src/compiler/llvm_codegen_type.c
+++ b/src/compiler/llvm_codegen_type.c
@@ -163,11 +163,8 @@ static inline void add_func_type_param(GenContext *c, Type *param_type, ABIArgIn
 			vec_add(*params, c->ptr_type);
 			break;
 		case ABI_ARG_EXPAND_COERCE:
-			vec_add(*params, llvm_abi_type(c, arg_info->coerce_expand.lo));
-			if (abi_type_is_valid(arg_info->coerce_expand.hi))
-			{
-				vec_add(*params, llvm_abi_type(c, arg_info->coerce_expand.hi));
-			}
+			vec_add(*params, llvm_get_type(c, arg_info->coerce_expand.lo));
+			vec_add(*params, llvm_get_type(c, arg_info->coerce_expand.hi));
 			break;
 		case ABI_ARG_EXPAND:
 			// Expanding a structs
@@ -226,13 +223,8 @@ LLVMTypeRef llvm_update_prototype_abi(GenContext *c, FunctionPrototype *prototyp
 			break;
 		case ABI_ARG_EXPAND_COERCE:
 		{
-			LLVMTypeRef lo = llvm_abi_type(c, ret_arg_info->coerce_expand.lo);
-			if (!abi_type_is_valid(ret_arg_info->coerce_expand.hi))
-			{
-				retval = lo;
-				break;
-			}
-			LLVMTypeRef hi = llvm_abi_type(c, ret_arg_info->coerce_expand.hi);
+			LLVMTypeRef lo = llvm_get_type(c, ret_arg_info->coerce_expand.lo);
+			LLVMTypeRef hi = llvm_get_type(c, ret_arg_info->coerce_expand.hi);
 			retval = llvm_get_twostruct(c, lo, hi);
 			break;
 		}
@@ -393,28 +385,6 @@ LLVMTypeRef llvm_get_coerce_type(GenContext *c, ABIArgInfo *arg_info)
 {
 	switch (arg_info->kind)
 	{
-		case ABI_ARG_EXPAND_COERCE:
-		{
-			unsigned element_index = 0;
-			LLVMTypeRef elements[4];
-			// Add optional padding to make the data appear at the correct offset.
-			if (arg_info->coerce_expand.offset_lo)
-			{
-				elements[element_index++] = llvm_const_padding_type(c, arg_info->coerce_expand.offset_lo);
-			}
-			elements[element_index++] = llvm_abi_type(c, arg_info->coerce_expand.lo);
-			// Add optional padding to make the high field appear at the correct off.
-			if (arg_info->coerce_expand.padding_hi)
-			{
-				elements[element_index++] = LLVMArrayType(llvm_get_type(c, type_char), arg_info->coerce_expand.padding_hi);
-			}
-			// Check if there is a top type as well.
-			if (abi_type_is_valid(arg_info->coerce_expand.hi))
-			{
-				elements[element_index++] = llvm_abi_type(c, arg_info->coerce_expand.hi);
-			}
-			return LLVMStructTypeInContext(c->context, elements, element_index, arg_info->coerce_expand.packed);
-		}
 		case ABI_ARG_DIRECT_SPLIT_STRUCT_I32:
 		{
 			LLVMTypeRef coerce_type = llvm_get_type(c, type_uint);
@@ -438,6 +408,7 @@ LLVMTypeRef llvm_get_coerce_type(GenContext *c, ABIArgInfo *arg_info)
 		case ABI_ARG_INDIRECT:
 		case ABI_ARG_EXPAND:
 		case ABI_ARG_DIRECT_COERCE_INT:
+		case ABI_ARG_EXPAND_COERCE:
 			UNREACHABLE
 	}
 	UNREACHABLE
diff --git a/test/test_suite/abi/riscv32-ilp32-abi.c3t b/test/test_suite/abi/riscv32-ilp32-abi.c3t
index 9ed4eb37e..84f6606b3 100644
--- a/test/test_suite/abi/riscv32-ilp32-abi.c3t
+++ b/test/test_suite/abi/riscv32-ilp32-abi.c3t
@@ -49,6 +49,47 @@ fn void f_agg_stack(double a, long b, double c, long d, Tiny e,
 
 
 define i32 @test_f_scalar_stack_1(i32 %0, i64 %1, float %2, double %3, fp128 %4, i8 zeroext %5, i8 %6, i8 %7) #0 {
+entry:
+  %sisiext = sext i8 %6 to i32
+  %uisiext = zext i8 %7 to i32
+  %add = add i32 %sisiext, %uisiext
+  ret i32 %add
+}
+
+
 define void @test_f_scalar_stack_2(ptr noalias sret(%Large) align 4 %0, float %1, i64 %2, double %3, fp128 %4, i8 zeroext %5, i8 %6, i8 %7) #0 {
+entry:
+  %literal = alloca %Large, align 4
+  %8 = getelementptr inbounds %Large, ptr %literal, i32 0, i32 0
+  %fpsi = fptosi float %1 to i32
+  store i32 %fpsi, ptr %8, align 4
+  %9 = getelementptr inbounds %Large, ptr %literal, i32 0, i32 1
+  %uisiext = zext i8 %5 to i32
+  store i32 %uisiext, ptr %9, align 4
+  %10 = getelementptr inbounds %Large, ptr %literal, i32 0, i32 2
+  %sisiext = sext i8 %6 to i32
+  store i32 %sisiext, ptr %10, align 4
+  %11 = getelementptr inbounds %Large, ptr %literal, i32 0, i32 3
+  %uisiext1 = zext i8 %7 to i32
+  store i32 %uisiext1, ptr %11, align 4
+  call void @llvm.memcpy.p0.p0.i32(ptr align 4 %0, ptr align 4 %literal, i32 16, i1 false)
+  ret void
+}
+
+
 define void @test_f_scalar_stack_3(double %0, i64 %1, double %2, i64 %3, i32 %4, i64 %5, float %6, double %7, fp128 %8) #0 {
+entry:
+  ret void
+}
+
+
 define void @test_f_agg_stack(double %0, i64 %1, double %2, i64 %3, i32 %4, [2 x i32] %5, i64 %6, ptr align 4 %7) #0 {
+entry:
+  %e = alloca %Tiny, align 1
+  %f = alloca %Small, align 4
+  %g = alloca %Small_aligned, align 8
+  store i32 %4, ptr %e, align 1
+  store [2 x i32] %5, ptr %f, align 4
+  store i64 %6, ptr %g, align 8
+  ret void
+}
diff --git a/test/test_suite/abi/riscv32-ilp32-ilp32f-abi-1.c3t b/test/test_suite/abi/riscv32-ilp32-ilp32f-abi-1.c3t
index a4de893e9..99ef18073 100644
--- a/test/test_suite/abi/riscv32-ilp32-ilp32f-abi-1.c3t
+++ b/test/test_suite/abi/riscv32-ilp32-ilp32f-abi-1.c3t
@@ -45,12 +45,45 @@ fn void f_agg_stack(double a, long b, double c, long d, Tiny e,
 
 /* #expect: test.ll
 
-define i32 @test_f_scalar_stack_1(i32 %0, i64 %1, i32 %2, double %3, fp128 %4, i8 zeroext %5, i8 %6, i8 %7)
+define i32 @test_f_scalar_stack_1(i32 %0, i64 %1, i32 %2, double %3, fp128 %4, i8 zeroext %5, i8 %6, i8 %7) #0 {
+entry:
+  %sisiext = sext i8 %6 to i32
+  %uisiext = zext i8 %7 to i32
+  %add = add i32 %sisiext, %uisiext
+  ret i32 %add
+}
 
-define void @test_f_scalar_stack_2(ptr noalias sret(%Large) align 4 %0, i32 %1, i64 %2, double %3, fp128 %4, i8 zeroext %5, i8 %6, i8 %7)
+define void @test_f_scalar_stack_2(ptr noalias sret(%Large) align 4 %0, i32 %1, i64 %2, double %3, fp128 %4, i8 zeroext %5, i8 %6, i8 %7) #0 {
+entry:
+  %literal = alloca %Large, align 4
+  %8 = getelementptr inbounds %Large, ptr %literal, i32 0, i32 0
+  store i32 %1, ptr %8, align 4
+  %9 = getelementptr inbounds %Large, ptr %literal, i32 0, i32 1
+  %uisiext = zext i8 %5 to i32
+  store i32 %uisiext, ptr %9, align 4
+  %10 = getelementptr inbounds %Large, ptr %literal, i32 0, i32 2
+  %sisiext = sext i8 %6 to i32
+  store i32 %sisiext, ptr %10, align 4
+  %11 = getelementptr inbounds %Large, ptr %literal, i32 0, i32 3
+  %uisiext1 = zext i8 %7 to i32
+  store i32 %uisiext1, ptr %11, align 4
+  call void @llvm.memcpy.p0.p0.i32(ptr align 4 %0, ptr align 4 %literal, i32 16, i1 false)
+  ret void
+}
 
-define void @test_f_scalar_stack_3(double %0, i64 %1, double %2, i64 %3, i32 %4, i64 %5, i32 %6, double %7, fp128 %8)
-
-define void @test_f_agg_stack(double %0, i64 %1, double %2, i64 %3, i32 %4, [2 x i32] %5, i64 %6, ptr align 4 %7)
+define void @test_f_scalar_stack_3(double %0, i64 %1, double %2, i64 %3, i32 %4, i64 %5, i32 %6, double %7, fp128 %8) #0 {
+entry:
+  ret void
+}
 
+define void @test_f_agg_stack(double %0, i64 %1, double %2, i64 %3, i32 %4, [2 x i32] %5, i64 %6, ptr align 4 %7) #0 {
+entry:
+  %e = alloca %Tiny, align 1
+  %f = alloca %Small, align 4
+  %g = alloca %Small_aligned, align 8
+  store i32 %4, ptr %e, align 1
+  store [2 x i32] %5, ptr %f, align 4
+  store i64 %6, ptr %g, align 8
+  ret void
+}
 
diff --git a/test/test_suite/abi/riscv32-ilp32-ilp32f-ilp32d-abi-1.c3t b/test/test_suite/abi/riscv32-ilp32-ilp32f-ilp32d-abi-1.c3t
index d3ce71e8f..ff6ee5383 100644
--- a/test/test_suite/abi/riscv32-ilp32-ilp32f-ilp32d-abi-1.c3t
+++ b/test/test_suite/abi/riscv32-ilp32-ilp32f-ilp32d-abi-1.c3t
@@ -181,71 +181,386 @@ fn void f_va_caller() {
 
 /* #expect: test.ll
 
-define void @test_f_void()
+define void @test_f_void() #0 {
+entry:
+  ret void
+}
+
 
 define zeroext i8 @test_f_scalar_0(i8 zeroext %0) #0 {
+entry:
+  ret i8 %0
+}
 
-define signext i8 @test_f_scalar_1(i8 signext %0)
 
-define zeroext i8 @test_f_scalar_2(i8 zeroext %0)
+define signext i8 @test_f_scalar_1(i8 signext %0) #0 {
+entry:
+  ret i8 %0
+}
 
-define i32 @test_f_scalar_3(i32 %0)
 
-define i64 @test_f_scalar_4(i64 %0)
+define zeroext i8 @test_f_scalar_2(i8 zeroext %0) #0 {
+entry:
+  ret i8 %0
+}
 
-define i128 @test_f_scalar_5(i128 %0)
 
-define float @test_f_fp_scalar_1(float %0)
+define i32 @test_f_scalar_3(i32 %0) #0 {
+entry:
+  ret i32 %0
+}
 
-define double @test_f_fp_scalar_2(double %0)
 
-define fp128 @test_f_fp_scalar_3(fp128 %0)
+define i64 @test_f_scalar_4(i64 %0) #0 {
+entry:
+  ret i64 %0
+}
 
-define void @test_f_agg_tiny(i32 %0)
 
-define i32 @test_f_agg_tiny_ret()
+define i128 @test_f_scalar_5(i128 %0) #0 {
+entry:
+  ret i128 %0
+}
 
-define void @test_f_vec_tiny_v4i8(i32 %0)
 
-define i32 @test_f_vec_tiny_v4i8_ret()
+define float @test_f_fp_scalar_1(float %0) #0 {
+entry:
+  ret float %0
+}
 
-define void @test_f_vec_tiny_v1i32(i32 %0)
 
-define i32 @test_f_vec_tiny_v1i32_ret()
+define double @test_f_fp_scalar_2(double %0) #0 {
+entry:
+  ret double %0
+}
 
-define void @test_f_agg_small([2 x i32] %0)
 
-define [2 x i32] @test_f_agg_small_ret()
+define fp128 @test_f_fp_scalar_3(fp128 %0) #0 {
+entry:
+  ret fp128 %0
+}
 
-define void @test_f_vec_small_v8i8(i64 %0)
 
-define i64 @test_f_vec_small_v8i8_ret()
+define void @test_f_agg_tiny(i32 %0) #0 {
+entry:
+  %x = alloca %Tiny, align 1
+  store i32 %0, ptr %x, align 1
+  %1 = getelementptr inbounds %Tiny, ptr %x, i32 0, i32 0
+  %2 = load i8, ptr %1, align 1
+  %3 = getelementptr inbounds %Tiny, ptr %x, i32 0, i32 1
+  %4 = load i8, ptr %3, align 1
+  %add = add i8 %2, %4
+  store i8 %add, ptr %1, align 1
+  %5 = getelementptr inbounds %Tiny, ptr %x, i32 0, i32 2
+  %6 = load i8, ptr %5, align 1
+  %7 = getelementptr inbounds %Tiny, ptr %x, i32 0, i32 3
+  %8 = load i8, ptr %7, align 1
+  %add1 = add i8 %6, %8
+  store i8 %add1, ptr %5, align 1
+  ret void
+}
 
-define void @test_f_vec_small_v1i64(i64 %0)
 
-define i64 @test_f_vec_small_v1i64_ret()
+define i32 @test_f_agg_tiny_ret() #0 {
+entry:
+  %literal = alloca %Tiny, align 1
+  %0 = getelementptr inbounds %Tiny, ptr %literal, i32 0, i32 0
+  store i8 1, ptr %0, align 1
+  %1 = getelementptr inbounds %Tiny, ptr %literal, i32 0, i32 1
+  store i8 2, ptr %1, align 1
+  %2 = getelementptr inbounds %Tiny, ptr %literal, i32 0, i32 2
+  store i8 3, ptr %2, align 1
+  %3 = getelementptr inbounds %Tiny, ptr %literal, i32 0, i32 3
+  store i8 4, ptr %3, align 1
+  %4 = load i32, ptr %literal, align 1
+  ret i32 %4
+}
 
-define void @test_f_agg_small_aligned(i64 %0)
 
-define i64 @test_f_agg_small_aligned_ret(i64 %0)
+define void @test_f_vec_tiny_v4i8(i32 %0) #0 {
+entry:
+  %x = alloca <4 x i8>, align 4
+  store i32 %0, ptr %x, align 4
+  %1 = load <4 x i8>, ptr %x, align 4
+  %2 = load <4 x i8>, ptr %x, align 4
+  %3 = extractelement <4 x i8> %2, i32 1
+  %elemset = insertelement <4 x i8> %1, i8 %3, i32 0
+  store <4 x i8> %elemset, ptr %x, align 4
+  %4 = load <4 x i8>, ptr %x, align 4
+  %5 = load <4 x i8>, ptr %x, align 4
+  %6 = extractelement <4 x i8> %5, i32 3
+  %elemset1 = insertelement <4 x i8> %4, i8 %6, i32 2
+  store <4 x i8> %elemset1, ptr %x, align 4
+  ret void
+}
 
-define void @test_f_agg_large(ptr align 4 %0)
 
-define void @test_f_agg_large_ret(ptr noalias sret(%Large) align 4 %0, i32 %1, i8 signext %2)
+define i32 @test_f_vec_tiny_v4i8_ret() #0 {
+entry:
+  %taddr = alloca <4 x i8>, align 4
+  store <4 x i8> <i8 1, i8 2, i8 3, i8 4>, ptr %taddr, align 4
+  %0 = load i32, ptr %taddr, align 4
+  ret i32 %0
+}
 
-define void @test_f_vec_large_v16i8(ptr align 16 %0)
 
-define void @test_f_vec_large_v16i8_ret(ptr noalias sret(<16 x i8>) align 16 %0)
+define void @test_f_vec_tiny_v1i32(i32 %0) #0 {
+entry:
+  %x = alloca <1 x i32>, align 4
+  store i32 %0, ptr %x, align 4
+  %1 = load <1 x i32>, ptr %x, align 4
+  %elemset = insertelement <1 x i32> %1, i32 114, i32 0
+  store <1 x i32> %elemset, ptr %x, align 4
+  ret void
+}
 
-define i32 @test_f_scalar_stack_1(i32 %0, [2 x i32] %1, i64 %2, ptr align 4 %3, i8 zeroext %4, i8 signext %5, i8 %6, i8 %7)
 
-define void @test_f_scalar_stack_2(ptr noalias sret(%Large) align 4 %0, i32 %1, i64 %2, i64 %3, fp128 %4, i8 zeroext %5, i8 %6, i8 %7)
+define i32 @test_f_vec_tiny_v1i32_ret() #0 {
+entry:
+  %taddr = alloca <1 x i32>, align 4
+  store <1 x i32> <i32 1>, ptr %taddr, align 4
+  %0 = load i32, ptr %taddr, align 4
+  ret i32 %0
+}
 
-define fp128 @test_f_scalar_stack_4(i32 %0, i64 %1, i64 %2, fp128 %3, i8 zeroext %4, i8 %5, i8 %6)
 
-define void @test_f_scalar_stack_5(double %0, i64 %1, double %2, i64 %3, i32 %4, i64 %5, float %6, double %7, fp128 %8)
+define void @test_f_agg_small([2 x i32] %0) #0 {
+entry:
+  %x = alloca %Small, align 4
+  store [2 x i32] %0, ptr %x, align 4
+  %1 = getelementptr inbounds %Small, ptr %x, i32 0, i32 0
+  %2 = load i32, ptr %1, align 4
+  %3 = getelementptr inbounds %Small, ptr %x, i32 0, i32 1
+  %4 = load ptr, ptr %3, align 4
+  %5 = load i32, ptr %4, align 4
+  %add = add i32 %2, %5
+  store i32 %add, ptr %1, align 4
+  %6 = getelementptr inbounds %Small, ptr %x, i32 0, i32 1
+  %7 = getelementptr inbounds %Small, ptr %x, i32 0, i32 0
+  store ptr %7, ptr %6, align 4
+  ret void
+}
 
-define void @test_f_agg_stack(double %0, i64 %1, double %2, i64 %3, i32 %4, [2 x i32] %5, i64 %6, ptr align 4 %7)
 
-define void @test_f_va_caller()
-call i32 (i32, ...) @f_va_callee(i32 1, i32 2, i32 3, double 4.000000e+00, double 5.000000e+00, i32 %4, [2 x i32] %7, i64 %9, ptr align 4 %indirectarg)
+define [2 x i32] @test_f_agg_small_ret() #0 {
+entry:
+  %literal = alloca %Small, align 4
+  %0 = getelementptr inbounds %Small, ptr %literal, i32 0, i32 0
+  store i32 1, ptr %0, align 4
+  %1 = getelementptr inbounds %Small, ptr %literal, i32 0, i32 1
+  store ptr null, ptr %1, align 4
+  %2 = load [2 x i32], ptr %literal, align 4
+  ret [2 x i32] %2
+}
+
+
+define void @test_f_vec_small_v8i8(i64 %0) #0 {
+entry:
+  %x = alloca <8 x i8>, align 8
+  store i64 %0, ptr %x, align 8
+  %1 = load <8 x i8>, ptr %x, align 8
+  %2 = load <8 x i8>, ptr %x, align 8
+  %3 = extractelement <8 x i8> %2, i32 7
+  %elemset = insertelement <8 x i8> %1, i8 %3, i32 0
+  store <8 x i8> %elemset, ptr %x, align 8
+  ret void
+}
+
+
+define i64 @test_f_vec_small_v8i8_ret() #0 {
+entry:
+  %taddr = alloca <8 x i8>, align 8
+  store <8 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, ptr %taddr, align 8
+  %0 = load i64, ptr %taddr, align 8
+  ret i64 %0
+}
+
+
+define void @test_f_vec_small_v1i64(i64 %0) #0 {
+entry:
+  %x = alloca <1 x i64>, align 8
+  store i64 %0, ptr %x, align 8
+  %1 = load <1 x i64>, ptr %x, align 8
+  %elemset = insertelement <1 x i64> %1, i64 114, i32 0
+  store <1 x i64> %elemset, ptr %x, align 8
+  ret void
+}
+
+
+define i64 @test_f_vec_small_v1i64_ret() #0 {
+entry:
+  %taddr = alloca <1 x i64>, align 8
+  store <1 x i64> <i64 1>, ptr %taddr, align 8
+  %0 = load i64, ptr %taddr, align 8
+  ret i64 %0
+}
+
+
+define void @test_f_agg_small_aligned(i64 %0) #0 {
+entry:
+  %x = alloca %Small_aligned, align 8
+  store i64 %0, ptr %x, align 8
+  %1 = getelementptr inbounds %Small_aligned, ptr %x, i32 0, i32 0
+  %2 = load i64, ptr %1, align 8
+  %3 = getelementptr inbounds %Small_aligned, ptr %x, i32 0, i32 0
+  %4 = load i64, ptr %3, align 8
+  %add = add i64 %2, %4
+  store i64 %add, ptr %1, align 8
+  ret void
+}
+
+
+define i64 @test_f_agg_small_aligned_ret(i64 %0) #0 {
+entry:
+  %x = alloca %Small_aligned, align 8
+  %literal = alloca %Small_aligned, align 8
+  store i64 %0, ptr %x, align 8
+  %1 = getelementptr inbounds %Small_aligned, ptr %literal, i32 0, i32 0
+  store i64 10, ptr %1, align 8
+  %2 = load i64, ptr %literal, align 8
+  ret i64 %2
+}
+
+
+define void @test_f_agg_large(ptr align 4 %0) #0 {
+entry:
+  %1 = getelementptr inbounds %Large, ptr %0, i32 0, i32 0
+  %2 = getelementptr inbounds %Large, ptr %0, i32 0, i32 1
+  %3 = load i32, ptr %2, align 4
+  %4 = getelementptr inbounds %Large, ptr %0, i32 0, i32 2
+  %5 = load i32, ptr %4, align 4
+  %add = add i32 %3, %5
+  %6 = getelementptr inbounds %Large, ptr %0, i32 0, i32 3
+  %7 = load i32, ptr %6, align 4
+  %add1 = add i32 %add, %7
+  store i32 %add1, ptr %1, align 4
+  ret void
+}
+
+
+define void @test_f_agg_large_ret(ptr noalias sret(%Large) align 4 %0, i32 %1, i8 signext %2) #0 {
+entry:
+  %literal = alloca %Large, align 4
+  %3 = getelementptr inbounds %Large, ptr %literal, i32 0, i32 0
+  store i32 1, ptr %3, align 4
+  %4 = getelementptr inbounds %Large, ptr %literal, i32 0, i32 1
+  store i32 2, ptr %4, align 4
+  %5 = getelementptr inbounds %Large, ptr %literal, i32 0, i32 2
+  store i32 3, ptr %5, align 4
+  %6 = getelementptr inbounds %Large, ptr %literal, i32 0, i32 3
+  store i32 4, ptr %6, align 4
+  call void @llvm.memcpy.p0.p0.i32(ptr align 4 %0, ptr align 4 %literal, i32 16, i1 false)
+  ret void
+}
+
+
+define void @test_f_vec_large_v16i8(ptr align 16 %0) #0 {
+entry:
+  %1 = load <16 x i8>, ptr %0, align 16
+  %2 = load <16 x i8>, ptr %0, align 16
+  %3 = extractelement <16 x i8> %2, i32 7
+  %elemset = insertelement <16 x i8> %1, i8 %3, i32 0
+  store <16 x i8> %elemset, ptr %0, align 16
+  ret void
+}
+
+
+define void @test_f_vec_large_v16i8_ret(ptr noalias sret(<16 x i8>) align 16 %0) #0 {
+entry:
+  store <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, ptr %0, align 16
+  ret void
+}
+
+
+define i32 @test_f_scalar_stack_1(i32 %0, [2 x i32] %1, i64 %2, ptr align 4 %3, i8 zeroext %4, i8 signext %5, i8 %6, i8 %7) #0 {
+entry:
+  %a = alloca %Tiny, align 1
+  %b = alloca %Small, align 4
+  %c = alloca %Small_aligned, align 8
+  store i32 %0, ptr %a, align 1
+  store [2 x i32] %1, ptr %b, align 4
+  store i64 %2, ptr %c, align 8
+  %uisiext = zext i8 %6 to i32
+  %sisiext = sext i8 %7 to i32
+  %add = add i32 %uisiext, %sisiext
+  ret i32 %add
+}
+
+
+define void @test_f_scalar_stack_2(ptr noalias sret(%Large) align 4 %0, i32 %1, i64 %2, i64 %3, fp128 %4, i8 zeroext %5, i8 %6, i8 %7) #0 {
+entry:
+  %literal = alloca %Large, align 4
+  %8 = getelementptr inbounds %Large, ptr %literal, i32 0, i32 0
+  store i32 %1, ptr %8, align 4
+  %9 = getelementptr inbounds %Large, ptr %literal, i32 0, i32 1
+  %uisiext = zext i8 %5 to i32
+  store i32 %uisiext, ptr %9, align 4
+  %10 = getelementptr inbounds %Large, ptr %literal, i32 0, i32 2
+  %sisiext = sext i8 %6 to i32
+  store i32 %sisiext, ptr %10, align 4
+  %11 = getelementptr inbounds %Large, ptr %literal, i32 0, i32 3
+  %uisiext1 = zext i8 %7 to i32
+  store i32 %uisiext1, ptr %11, align 4
+  call void @llvm.memcpy.p0.p0.i32(ptr align 4 %0, ptr align 4 %literal, i32 16, i1 false)
+  ret void
+}
+
+
+define fp128 @test_f_scalar_stack_4(i32 %0, i64 %1, i64 %2, fp128 %3, i8 zeroext %4, i8 %5, i8 %6) #0 {
+entry:
+  ret fp128 %3
+}
+
+define void @test_f_scalar_stack_5(double %0, i64 %1, double %2, i64 %3, i32 %4, i64 %5, float %6, double %7, fp128 %8) #0 {
+entry:
+  ret void
+}
+
+define void @test_f_agg_stack(double %0, i64 %1, double %2, i64 %3, i32 %4, [2 x i32] %5, i64 %6, ptr align 4 %7) #0 {
+entry:
+  %e = alloca %Tiny, align 1
+  %f = alloca %Small, align 4
+  %g = alloca %Small_aligned, align 8
+  store i32 %4, ptr %e, align 1
+  store [2 x i32] %5, ptr %f, align 4
+  store i64 %6, ptr %g, align 8
+  ret void
+}
+
+
+define void @test_f_va_caller() #0 {
+entry:
+  %literal = alloca %Tiny, align 1
+  %literal1 = alloca %Small, align 4
+  %literal2 = alloca %Small_aligned, align 8
+  %literal3 = alloca %Large, align 4
+  %indirectarg = alloca %Large, align 4
+  %0 = getelementptr inbounds %Tiny, ptr %literal, i32 0, i32 0
+  store i8 6, ptr %0, align 1
+  %1 = getelementptr inbounds %Tiny, ptr %literal, i32 0, i32 1
+  store i8 7, ptr %1, align 1
+  %2 = getelementptr inbounds %Tiny, ptr %literal, i32 0, i32 2
+  store i8 8, ptr %2, align 1
+  %3 = getelementptr inbounds %Tiny, ptr %literal, i32 0, i32 3
+  store i8 9, ptr %3, align 1
+  %4 = load i32, ptr %literal, align 1
+  %5 = getelementptr inbounds %Small, ptr %literal1, i32 0, i32 0
+  store i32 10, ptr %5, align 4
+  %6 = getelementptr inbounds %Small, ptr %literal1, i32 0, i32 1
+  store ptr null, ptr %6, align 4
+  %7 = load [2 x i32], ptr %literal1, align 4
+  %8 = getelementptr inbounds %Small_aligned, ptr %literal2, i32 0, i32 0
+  store i64 11, ptr %8, align 8
+  %9 = load i64, ptr %literal2, align 8
+  %10 = getelementptr inbounds %Large, ptr %literal3, i32 0, i32 0
+  store i32 12, ptr %10, align 4
+  %11 = getelementptr inbounds %Large, ptr %literal3, i32 0, i32 1
+  store i32 13, ptr %11, align 4
+  %12 = getelementptr inbounds %Large, ptr %literal3, i32 0, i32 2
+  store i32 14, ptr %12, align 4
+  %13 = getelementptr inbounds %Large, ptr %literal3, i32 0, i32 3
+  store i32 15, ptr %13, align 4
+  call void @llvm.memcpy.p0.p0.i32(ptr align 4 %indirectarg, ptr align 4 %literal3, i32 16, i1 false)
+  %14 = call i32 (i32, ...) @f_va_callee(i32 1, i32 2, i32 3, double 4.000000e+00, double 5.000000e+00, i32 %4, [2 x i32] %7, i64 %9, ptr align 4 %indirectarg)
+  ret void
+}
diff --git a/test/test_suite/abi/riscv32-ilp32d-abi.c3t b/test/test_suite/abi/riscv32-ilp32d-abi.c3t
index 64a55973c..30bf3384a 100644
--- a/test/test_suite/abi/riscv32-ilp32d-abi.c3t
+++ b/test/test_suite/abi/riscv32-ilp32d-abi.c3t
@@ -175,77 +175,383 @@ fn Double_double_s f_ret_double_double_s_double_int32_s_just_sufficient_gprs(
 
 /* #expect: test.ll
 
-define void @test_f_fpr_tracking(double %0, double %1, double %2, double %3, double %4, double %5, double %6, double %7, i8 zeroext %8)
-
-define void @test_f_double_s_arg(double %0)
-
-define double @test_f_ret_double_s()
-
-define void @test_f_double_double_s_arg(double %0, double %1)
-
-define { double, double } @test_f_ret_double_double_s()
-
-define void @test_f_double_float_s_arg(double %0, float %1)
-
-define { double, float } @test_f_ret_double_float_s()
+define void @test_f_fpr_tracking(double %0, double %1, double %2, double %3, double %4, double %5, double %6, double %7, i8 zeroext %8) #0 {
+entry:
+  ret void
+}
 
 
-define void @test_f_double_double_s_arg_insufficient_fprs(float %0, double %1, double %2, double %3, double %4, double %5, double %6, ptr align 8 %7)
+define void @test_f_double_s_arg(double %0) #0 {
+entry:
+  %a = alloca %Double_s, align 8
+  store double %0, ptr %a, align 8
+  ret void
+}
 
-define void @test_f_double_int8_s_arg(double %0, i8 %1)
 
-define { double, i8 } @test_f_ret_double_int8_s()
+define double @test_f_ret_double_s() #0 {
+entry:
+  %literal = alloca %Double_s, align 8
+  %0 = getelementptr inbounds %Double_s, ptr %literal, i32 0, i32 0
+  store double 1.000000e+00, ptr %0, align 8
+  %1 = load double, ptr %literal, align 8
+  ret double %1
+}
 
-define void @test_f_double_uint8_s_arg(double %0, i8 %1)
 
-define { double, i8 } @test_f_ret_double_uint8_s()
+define void @test_f_double_double_s_arg(double %0, double %1) #0 {
+entry:
+  %a = alloca %Double_double_s, align 8
+  store double %0, ptr %a, align 8
+  %ptroffset = getelementptr inbounds double, ptr %a, i32 1
+  store double %1, ptr %a, align 8
+  ret void
+}
 
-define void @test_f_double_int32_s_arg(double %0, i32 %1)
 
-define { double, i32 } @test_f_ret_double_int32_s()
+define { double, double } @test_f_ret_double_double_s() #0 {
+entry:
+  %literal = alloca %Double_double_s, align 8
+  %0 = getelementptr inbounds %Double_double_s, ptr %literal, i32 0, i32 0
+  store double 1.000000e+00, ptr %0, align 8
+  %1 = getelementptr inbounds %Double_double_s, ptr %literal, i32 0, i32 1
+  store double 2.000000e+00, ptr %1, align 8
+  %2 = load double, ptr %literal, align 8
+  %ptroffset = getelementptr inbounds double, ptr %literal, i32 1
+  %3 = load double, ptr %literal, align 8
+  %4 = insertvalue { double, double } undef, double %2, 0
+  %5 = insertvalue { double, double } %4, double %3, 1
+  ret { double, double } %5
+}
+
+
+define void @test_f_double_float_s_arg(double %0, float %1) #0 {
+entry:
+  %a = alloca %Double_float_s, align 8
+  store double %0, ptr %a, align 8
+  %ptroffset = getelementptr inbounds float, ptr %a, i32 2
+  store float %1, ptr %a, align 8
+  ret void
+}
+
+
+define { double, float } @test_f_ret_double_float_s() #0 {
+entry:
+  %literal = alloca %Double_float_s, align 8
+  %0 = getelementptr inbounds %Double_float_s, ptr %literal, i32 0, i32 0
+  store double 1.000000e+00, ptr %0, align 8
+  %1 = getelementptr inbounds %Double_float_s, ptr %literal, i32 0, i32 1
+  store float 2.000000e+00, ptr %1, align 8
+  %2 = load double, ptr %literal, align 8
+  %ptroffset = getelementptr inbounds float, ptr %literal, i32 2
+  %3 = load float, ptr %literal, align 8
+  %4 = insertvalue { double, float } undef, double %2, 0
+  %5 = insertvalue { double, float } %4, float %3, 1
+  ret { double, float } %5
+}
+
+
+define void @test_f_double_double_s_arg_insufficient_fprs(float %0, double %1, double %2, double %3, double %4, double %5, double %6, ptr align 8 %7) #0 {
+entry:
+  ret void
+}
+
+
+define void @test_f_double_int8_s_arg(double %0, i8 %1) #0 {
+entry:
+  %a = alloca %Double_int8_s, align 8
+  store double %0, ptr %a, align 8
+  %ptroffset = getelementptr inbounds i8, ptr %a, i32 8
+  store i8 %1, ptr %a, align 8
+  ret void
+}
+
+
+define { double, i8 } @test_f_ret_double_int8_s() #0 {
+entry:
+  %literal = alloca %Double_int8_s, align 8
+  %0 = getelementptr inbounds %Double_int8_s, ptr %literal, i32 0, i32 0
+  store double 1.000000e+00, ptr %0, align 8
+  %1 = getelementptr inbounds %Double_int8_s, ptr %literal, i32 0, i32 1
+  store i8 2, ptr %1, align 8
+  %2 = load double, ptr %literal, align 8
+  %ptroffset = getelementptr inbounds i8, ptr %literal, i32 8
+  %3 = load i8, ptr %literal, align 8
+  %4 = insertvalue { double, i8 } undef, double %2, 0
+  %5 = insertvalue { double, i8 } %4, i8 %3, 1
+  ret { double, i8 } %5
+}
+
+
+define void @test_f_double_uint8_s_arg(double %0, i8 %1) #0 {
+entry:
+  %a = alloca %Double_uint8_s, align 8
+  store double %0, ptr %a, align 8
+  %ptroffset = getelementptr inbounds i8, ptr %a, i32 8
+  store i8 %1, ptr %a, align 8
+  ret void
+}
+
+
+define { double, i8 } @test_f_ret_double_uint8_s() #0 {
+entry:
+  %literal = alloca %Double_uint8_s, align 8
+  %0 = getelementptr inbounds %Double_uint8_s, ptr %literal, i32 0, i32 0
+  store double 1.000000e+00, ptr %0, align 8
+  %1 = getelementptr inbounds %Double_uint8_s, ptr %literal, i32 0, i32 1
+  store i8 2, ptr %1, align 8
+  %2 = load double, ptr %literal, align 8
+  %ptroffset = getelementptr inbounds i8, ptr %literal, i32 8
+  %3 = load i8, ptr %literal, align 8
+  %4 = insertvalue { double, i8 } undef, double %2, 0
+  %5 = insertvalue { double, i8 } %4, i8 %3, 1
+  ret { double, i8 } %5
+}
+
+
+define void @test_f_double_int32_s_arg(double %0, i32 %1) #0 {
+entry:
+  %a = alloca %Double_int32_s, align 8
+  store double %0, ptr %a, align 8
+  %ptroffset = getelementptr inbounds i32, ptr %a, i32 2
+  store i32 %1, ptr %a, align 8
+  ret void
+}
+
+
+define { double, i32 } @test_f_ret_double_int32_s() #0 {
+entry:
+  %literal = alloca %Double_int32_s, align 8
+  %0 = getelementptr inbounds %Double_int32_s, ptr %literal, i32 0, i32 0
+  store double 1.000000e+00, ptr %0, align 8
+  %1 = getelementptr inbounds %Double_int32_s, ptr %literal, i32 0, i32 1
+  store i32 2, ptr %1, align 8
+  %2 = load double, ptr %literal, align 8
+  %ptroffset = getelementptr inbounds i32, ptr %literal, i32 2
+  %3 = load i32, ptr %literal, align 8
+  %4 = insertvalue { double, i32 } undef, double %2, 0
+  %5 = insertvalue { double, i32 } %4, i32 %3, 1
+  ret { double, i32 } %5
+}
+
 
-; Function Attrs: nounwind
 define void @test_f_double_int64_s_arg(ptr align 8 %0) #0 {
 entry:
   ret void
 }
 
-define void @test_f_ret_double_int64_s(ptr noalias sret(%Double_int64_s) align 8 %0)
+
+define void @test_f_ret_double_int64_s(ptr noalias sret(%Double_int64_s) align 8 %0) #0 {
+entry:
+  %literal = alloca %Double_int64_s, align 8
+  %1 = getelementptr inbounds %Double_int64_s, ptr %literal, i32 0, i32 0
+  store double 1.000000e+00, ptr %1, align 8
+  %2 = getelementptr inbounds %Double_int64_s, ptr %literal, i32 0, i32 1
+  store i64 2, ptr %2, align 8
+  call void @llvm.memcpy.p0.p0.i32(ptr align 8 %0, ptr align 8 %literal, i32 16, i1 false)
+  ret void
+}
 
 
-define void @test_f_double_int8_s_arg_insufficient_gprs(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, ptr align 8 %8)
+define void @test_f_double_int8_s_arg_insufficient_gprs(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, ptr align 8 %8) #0 {
+entry:
+  ret void
+}
 
-define void @test_f_struct_double_int8_insufficient_fprs(float %0, double %1, double %2, double %3, double %4, double %5, double %6, double %7, ptr align 8 %8)
 
-define void @test_f_doublearr1_s_arg(double %0)
+define void @test_f_struct_double_int8_insufficient_fprs(float %0, double %1, double %2, double %3, double %4, double %5, double %6, double %7, ptr align 8 %8) #0 {
+entry:
+  ret void
+}
 
-define double @test_f_ret_doublearr1_s()
 
-define void @test_f_doublearr2_s_arg(double %0, double %1)
+define void @test_f_doublearr1_s_arg(double %0) #0 {
+entry:
+  %a = alloca %Doublearr1_s, align 8
+  store double %0, ptr %a, align 8
+  ret void
+}
 
-define { double, double } @test_f_ret_doublearr2_s()
 
-define void @test_f_doublearr2_tricky1_s_arg(double %0, double %1)
+define double @test_f_ret_doublearr1_s() #0 {
+entry:
+  %literal = alloca %Doublearr1_s, align 8
+  %0 = getelementptr inbounds %Doublearr1_s, ptr %literal, i32 0, i32 0
+  %1 = getelementptr inbounds [1 x double], ptr %0, i32 0, i32 0
+  store double 1.000000e+00, ptr %1, align 8
+  %2 = load double, ptr %literal, align 8
+  ret double %2
+}
+
+
+define void @test_f_doublearr2_s_arg(double %0, double %1) #0 {
+entry:
+  %a = alloca %Doublearr2_s, align 8
+  store double %0, ptr %a, align 8
+  %ptroffset = getelementptr inbounds double, ptr %a, i32 1
+  store double %1, ptr %a, align 8
+  ret void
+}
+
+
+define { double, double } @test_f_ret_doublearr2_s() #0 {
+entry:
+  %literal = alloca %Doublearr2_s, align 8
+  %0 = getelementptr inbounds %Doublearr2_s, ptr %literal, i32 0, i32 0
+  %1 = getelementptr inbounds [2 x double], ptr %0, i32 0, i32 0
+  store double 1.000000e+00, ptr %1, align 8
+  %2 = getelementptr inbounds [2 x double], ptr %0, i32 0, i32 1
+  store double 2.000000e+00, ptr %2, align 8
+  %3 = load double, ptr %literal, align 8
+  %ptroffset = getelementptr inbounds double, ptr %literal, i32 1
+  %4 = load double, ptr %literal, align 8
+  %5 = insertvalue { double, double } undef, double %3, 0
+  %6 = insertvalue { double, double } %5, double %4, 1
+  ret { double, double } %6
+}
+
+
+define void @test_f_doublearr2_tricky1_s_arg(double %0, double %1) #0 {
+entry:
+  %a = alloca %Doublearr2_tricky1_s, align 8
+  store double %0, ptr %a, align 8
+  %ptroffset = getelementptr inbounds double, ptr %a, i32 1
+  store double %1, ptr %a, align 8
+  ret void
+}
+
 
 define { double, double } @test_f_ret_doublearr2_tricky1_s() #0 {
+entry:
+  %literal = alloca %Doublearr2_tricky1_s, align 8
+  %0 = getelementptr inbounds %Doublearr2_tricky1_s, ptr %literal, i32 0, i32 0
+  %1 = getelementptr inbounds [2 x %Inner], ptr %0, i32 0, i32 0
+  %2 = getelementptr inbounds %Inner, ptr %1, i32 0, i32 0
+  %3 = getelementptr inbounds [1 x double], ptr %2, i32 0, i32 0
+  store double 1.000000e+00, ptr %3, align 8
+  %4 = getelementptr inbounds [2 x %Inner], ptr %0, i32 0, i32 1
+  %5 = getelementptr inbounds %Inner, ptr %4, i32 0, i32 0
+  %6 = getelementptr inbounds [1 x double], ptr %5, i32 0, i32 0
+  store double 2.000000e+00, ptr %6, align 8
+  %7 = load double, ptr %literal, align 8
+  %ptroffset = getelementptr inbounds double, ptr %literal, i32 1
+  %8 = load double, ptr %literal, align 8
+  %9 = insertvalue { double, double } undef, double %7, 0
+  %10 = insertvalue { double, double } %9, double %8, 1
+  ret { double, double } %10
+}
+
+
+define void @test_f_int_double_int_s_arg(ptr align 8 %0) #0 {
+entry:
+  ret void
+}
 
-define void @test_f_int_double_int_s_arg(ptr align 8 %0) #0
 
 define void @test_f_ret_int_double_int_s(ptr noalias sret(%Int_double_int_s) align 8 %0) #0 {
+entry:
+  %literal = alloca %Int_double_int_s, align 8
+  %1 = getelementptr inbounds %Int_double_int_s, ptr %literal, i32 0, i32 0
+  store i32 1, ptr %1, align 8
+  %2 = getelementptr inbounds %Int_double_int_s, ptr %literal, i32 0, i32 1
+  store double 2.000000e+00, ptr %2, align 8
+  %3 = getelementptr inbounds %Int_double_int_s, ptr %literal, i32 0, i32 2
+  store i32 3, ptr %3, align 8
+  call void @llvm.memcpy.p0.p0.i32(ptr align 8 %0, ptr align 8 %literal, i32 24, i1 false)
+  ret void
+}
 
-define void @test_f_int64_double_s_arg(ptr align 8 %0)
 
-define void @test_f_ret_int64_double_s(ptr noalias sret(%Int64_double_s) align 8 %0)
+define void @test_f_int64_double_s_arg(ptr align 8 %0) #0 {
+entry:
+  ret void
+}
 
-define void @test_f_char_char_double_s_arg(ptr align 8 %0)
 
-define void @test_f_ret_char_char_double_s(ptr noalias sret(%Char_char_double_s) align 8 %0)
+define void @test_f_ret_int64_double_s(ptr noalias sret(%Int64_double_s) align 8 %0) #0 {
+entry:
+  %literal = alloca %Int64_double_s, align 8
+  %1 = getelementptr inbounds %Int64_double_s, ptr %literal, i32 0, i32 0
+  store i64 1, ptr %1, align 8
+  %2 = getelementptr inbounds %Int64_double_s, ptr %literal, i32 0, i32 1
+  store double 2.000000e+00, ptr %2, align 8
+  call void @llvm.memcpy.p0.p0.i32(ptr align 8 %0, ptr align 8 %literal, i32 16, i1 false)
+  ret void
+}
 
-define void @test_f_double_u_arg(i64 %0)
 
-define i64 @test_f_ret_double_u()
+define void @test_f_char_char_double_s_arg(ptr align 8 %0) #0 {
+entry:
+  ret void
+}
+
+
+define void @test_f_ret_char_char_double_s(ptr noalias sret(%Char_char_double_s) align 8 %0) #0 {
+entry:
+  %literal = alloca %Char_char_double_s, align 8
+  %1 = getelementptr inbounds %Char_char_double_s, ptr %literal, i32 0, i32 0
+  store i8 1, ptr %1, align 8
+  %2 = getelementptr inbounds %Char_char_double_s, ptr %literal, i32 0, i32 1
+  store i8 2, ptr %2, align 1
+  %3 = getelementptr inbounds %Char_char_double_s, ptr %literal, i32 0, i32 2
+  store double 3.000000e+00, ptr %3, align 8
+  call void @llvm.memcpy.p0.p0.i32(ptr align 8 %0, ptr align 8 %literal, i32 16, i1 false)
+  ret void
+}
+
+
+define void @test_f_double_u_arg(i64 %0) #0 {
+entry:
+  %a = alloca %Double_u, align 8
+  store i64 %0, ptr %a, align 8
+  ret void
+}
+
+
+define i64 @test_f_ret_double_u() #0 {
+entry:
+  %literal = alloca %Double_u, align 8
+  store double 1.000000e+00, ptr %literal, align 8
+  %0 = load i64, ptr %literal, align 8
+  ret i64 %0
+}
+
 
-; Function Attrs: nounwind
 define { double, i32 } @test_f_ret_double_int32_s_double_int32_s_just_sufficient_gprs(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, double %7, i32 %8) #0 {
+entry:
+  %h = alloca %Double_int32_s, align 8
+  %literal = alloca %Double_int32_s, align 8
+  store double %7, ptr %h, align 8
+  %ptroffset = getelementptr inbounds i32, ptr %h, i32 2
+  store i32 %8, ptr %h, align 8
+  %9 = getelementptr inbounds %Double_int32_s, ptr %literal, i32 0, i32 0
+  store double 1.000000e+00, ptr %9, align 8
+  %10 = getelementptr inbounds %Double_int32_s, ptr %literal, i32 0, i32 1
+  store i32 2, ptr %10, align 8
+  %11 = load double, ptr %literal, align 8
+  %ptroffset1 = getelementptr inbounds i32, ptr %literal, i32 2
+  %12 = load i32, ptr %literal, align 8
+  %13 = insertvalue { double, i32 } undef, double %11, 0
+  %14 = insertvalue { double, i32 } %13, i32 %12, 1
+  ret { double, i32 } %14
+}
 
-define { double, double } @test_f_ret_double_double_s_double_int32_s_just_sufficient_gprs(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, double %7, i32 %8)
+
+define { double, double } @test_f_ret_double_double_s_double_int32_s_just_sufficient_gprs(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, double %7, i32 %8) #0 {
+entry:
+  %h = alloca %Double_int32_s, align 8
+  %literal = alloca %Double_double_s, align 8
+  store double %7, ptr %h, align 8
+  %ptroffset = getelementptr inbounds i32, ptr %h, i32 2
+  store i32 %8, ptr %h, align 8
+  %9 = getelementptr inbounds %Double_double_s, ptr %literal, i32 0, i32 0
+  store double 1.000000e+00, ptr %9, align 8
+  %10 = getelementptr inbounds %Double_double_s, ptr %literal, i32 0, i32 1
+  store double 2.000000e+00, ptr %10, align 8
+  %11 = load double, ptr %literal, align 8
+  %ptroffset1 = getelementptr inbounds double, ptr %literal, i32 1
+  %12 = load double, ptr %literal, align 8
+  %13 = insertvalue { double, double } undef, double %11, 0
+  %14 = insertvalue { double, double } %13, double %12, 1
+  ret { double, double } %14
+}
diff --git a/test/test_suite/abi/riscv32-ilp32f-abi.c3t b/test/test_suite/abi/riscv32-ilp32f-abi.c3t
index 3acb39c1e..06658dbc8 100644
--- a/test/test_suite/abi/riscv32-ilp32f-abi.c3t
+++ b/test/test_suite/abi/riscv32-ilp32f-abi.c3t
@@ -34,17 +34,63 @@ fn Int_double_s f_ret_int_double_s() {
 
 /* #expect: test.ll
 
-define void @test_f_fpr_tracking(double %0, double %1, double %2, double %3, i8 %4)
 
-define void @test_f_double_s_arg(i64 %0)
+define void @test_f_fpr_tracking(double %0, double %1, double %2, double %3, i8 %4) #0 {
+entry:
+  ret void
+}
 
-define i64 @test_f_ret_double_s()
 
-define void @test_f_double_double_s_arg(ptr align 8 %0)
+define void @test_f_double_s_arg(i64 %0) #0 {
+entry:
+  %a = alloca %Double_s, align 8
+  store i64 %0, ptr %a, align 8
+  ret void
+}
+
+
+define i64 @test_f_ret_double_s() #0 {
+entry:
+  %literal = alloca %Double_s, align 8
+  %0 = getelementptr inbounds %Double_s, ptr %literal, i32 0, i32 0
+  store double 1.000000e+00, ptr %0, align 8
+  %1 = load i64, ptr %literal, align 8
+  ret i64 %1
+}
+
+
+define void @test_f_double_double_s_arg(ptr align 8 %0) #0 {
+entry:
+  ret void
+}
+
+
+define void @test_f_ret_double_double_s(ptr noalias sret(%Double_double_s) align 8 %0) #0 {
+entry:
+  %literal = alloca %Double_double_s, align 8
+  %1 = getelementptr inbounds %Double_double_s, ptr %literal, i32 0, i32 0
+  store double 1.000000e+00, ptr %1, align 8
+  %2 = getelementptr inbounds %Double_double_s, ptr %literal, i32 0, i32 1
+  store double 2.000000e+00, ptr %2, align 8
+  call void @llvm.memcpy.p0.p0.i32(ptr align 8 %0, ptr align 8 %literal, i32 16, i1 false)
+  ret void
+}
 
-define void @test_f_ret_double_double_s(ptr noalias sret(%Double_double_s) align 8 %0)
 
 define void @test_f_int_double_s_arg(ptr align 8 %0) #0 {
+entry:
+  ret void
+}
+
 
 define void @test_f_ret_int_double_s(ptr noalias sret(%Int_double_s) align 8 %0) #0 {
+entry:
+  %literal = alloca %Int_double_s, align 8
+  %1 = getelementptr inbounds %Int_double_s, ptr %literal, i32 0, i32 0
+  store i32 1, ptr %1, align 8
+  %2 = getelementptr inbounds %Int_double_s, ptr %literal, i32 0, i32 1
+  store double 2.000000e+00, ptr %2, align 8
+  call void @llvm.memcpy.p0.p0.i32(ptr align 8 %0, ptr align 8 %literal, i32 16, i1 false)
+  ret void
+}
 
diff --git a/test/test_suite/abi/riscv32-ilp32f-ilp32d-abi-2.c3t b/test/test_suite/abi/riscv32-ilp32f-ilp32d-abi-2.c3t
index a1143e772..fd606ed09 100644
--- a/test/test_suite/abi/riscv32-ilp32f-ilp32d-abi-2.c3t
+++ b/test/test_suite/abi/riscv32-ilp32f-ilp32d-abi-2.c3t
@@ -153,48 +153,296 @@ fn Float_u f_ret_float_u() {
 
 /* #expect: test.ll
 
-define void @test_f_fpr_tracking(float %0, float %1, float %2, float %3, float %4, float %5, float %6, float %7, i8 zeroext %8)
+define void @test_f_fpr_tracking(float %0, float %1, float %2, float %3, float %4, float %5, float %6, float %7, i8 zeroext %8) #0 {
+entry:
+  ret void
+}
 
-define void @test_f_float_s_arg(float %0)
-define float @test_f_ret_float_s()
+define void @test_f_float_s_arg(float %0) #0 {
+entry:
+  %a = alloca %Float_s, align 4
+  store float %0, ptr %a, align 4
+  ret void
+}
 
-define void @test_f_float_float_s_arg(float %0, float %1)
-define { float, float } @test_f_ret_float_float_s()
+define float @test_f_ret_float_s() #0 {
+entry:
+  %literal = alloca %Float_s, align 4
+  %0 = getelementptr inbounds %Float_s, ptr %literal, i32 0, i32 0
+  store float 1.000000e+00, ptr %0, align 4
+  %1 = load float, ptr %literal, align 4
+  ret float %1
+}
+
+define void @test_f_float_float_s_arg(float %0, float %1) #0 {
+entry:
+  %a = alloca %Float_float_s, align 4
+  store float %0, ptr %a, align 4
+  %ptroffset = getelementptr inbounds float, ptr %a, i32 1
+  store float %1, ptr %a, align 4
+  ret void
+}
+
+define { float, float } @test_f_ret_float_float_s() #0 {
+entry:
+  %literal = alloca %Float_float_s, align 4
+  %0 = getelementptr inbounds %Float_float_s, ptr %literal, i32 0, i32 0
+  store float 1.000000e+00, ptr %0, align 4
+  %1 = getelementptr inbounds %Float_float_s, ptr %literal, i32 0, i32 1
+  store float 2.000000e+00, ptr %1, align 4
+  %2 = load float, ptr %literal, align 4
+  %ptroffset = getelementptr inbounds float, ptr %literal, i32 1
+  %3 = load float, ptr %literal, align 4
+  %4 = insertvalue { float, float } undef, float %2, 0
+  %5 = insertvalue { float, float } %4, float %3, 1
+  ret { float, float } %5
+}
 
 define void @test_f_float_float_s_arg_insufficient_fprs(float %0, float %1, float %2, float %3, float %4, float %5, float %6, [2 x i32] %7) #0 {
+entry:
+  %h = alloca %Float_float_s, align 4
+  store [2 x i32] %7, ptr %h, align 4
+  ret void
+}
 
-define void @test_f_float_int8_s_arg(float %0, i8 %1)
-define { float, i8 } @test_f_ret_float_int8_s()
+define void @test_f_float_int8_s_arg(float %0, i8 %1) #0 {
+entry:
+  %a = alloca %Float_int8_s, align 4
+  store float %0, ptr %a, align 4
+  %ptroffset = getelementptr inbounds i8, ptr %a, i32 4
+  store i8 %1, ptr %a, align 4
+  ret void
+}
 
-define void @test_f_float_uint8_s_arg(float %0, i8 %1)
-define { float, i8 } @test_f_ret_float_uint8_s()
+define { float, i8 } @test_f_ret_float_int8_s() #0 {
+entry:
+  %literal = alloca %Float_int8_s, align 4
+  %0 = getelementptr inbounds %Float_int8_s, ptr %literal, i32 0, i32 0
+  store float 1.000000e+00, ptr %0, align 4
+  %1 = getelementptr inbounds %Float_int8_s, ptr %literal, i32 0, i32 1
+  store i8 2, ptr %1, align 4
+  %2 = load float, ptr %literal, align 4
+  %ptroffset = getelementptr inbounds i8, ptr %literal, i32 4
+  %3 = load i8, ptr %literal, align 4
+  %4 = insertvalue { float, i8 } undef, float %2, 0
+  %5 = insertvalue { float, i8 } %4, i8 %3, 1
+  ret { float, i8 } %5
+}
 
-define void @test_f_float_int32_s_arg(float %0, i32 %1)
-define { float, i32 } @test_f_ret_float_int32_s()
+define void @test_f_float_uint8_s_arg(float %0, i8 %1) #0 {
+entry:
+  %a = alloca %Float_uint8_s, align 4
+  store float %0, ptr %a, align 4
+  %ptroffset = getelementptr inbounds i8, ptr %a, i32 4
+  store i8 %1, ptr %a, align 4
+  ret void
+}
 
-define void @test_f_float_int64_s_arg(ptr align 8 %0)
-define void @test_f_ret_float_int64_s(ptr noalias sret(%Float_int64_s) align 8 %0)
+define { float, i8 } @test_f_ret_float_uint8_s() #0 {
+entry:
+  %literal = alloca %Float_uint8_s, align 4
+  %0 = getelementptr inbounds %Float_uint8_s, ptr %literal, i32 0, i32 0
+  store float 1.000000e+00, ptr %0, align 4
+  %1 = getelementptr inbounds %Float_uint8_s, ptr %literal, i32 0, i32 1
+  store i8 2, ptr %1, align 4
+  %2 = load float, ptr %literal, align 4
+  %ptroffset = getelementptr inbounds i8, ptr %literal, i32 4
+  %3 = load i8, ptr %literal, align 4
+  %4 = insertvalue { float, i8 } undef, float %2, 0
+  %5 = insertvalue { float, i8 } %4, i8 %3, 1
+  ret { float, i8 } %5
+}
 
-define void @test_f_float_int8_s_arg_insufficient_gprs(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, [2 x i32] %8)
-define void @test_f_struct_float_int8_insufficient_fprs(float %0, float %1, float %2, float %3, float %4, float %5, float %6, float %7, [2 x i32] %8)
+define void @test_f_float_int32_s_arg(float %0, i32 %1) #0 {
+entry:
+  %a = alloca %Float_int32_s, align 4
+  store float %0, ptr %a, align 4
+  %ptroffset = getelementptr inbounds i32, ptr %a, i32 1
+  store i32 %1, ptr %a, align 4
+  ret void
+}
 
-define void @test_f_floatarr1_s_arg(float %0)
-define float @test_f_ret_floatarr1_s()
+define { float, i32 } @test_f_ret_float_int32_s() #0 {
+entry:
+  %literal = alloca %Float_int32_s, align 4
+  %0 = getelementptr inbounds %Float_int32_s, ptr %literal, i32 0, i32 0
+  store float 1.000000e+00, ptr %0, align 4
+  %1 = getelementptr inbounds %Float_int32_s, ptr %literal, i32 0, i32 1
+  store i32 2, ptr %1, align 4
+  %2 = load float, ptr %literal, align 4
+  %ptroffset = getelementptr inbounds i32, ptr %literal, i32 1
+  %3 = load i32, ptr %literal, align 4
+  %4 = insertvalue { float, i32 } undef, float %2, 0
+  %5 = insertvalue { float, i32 } %4, i32 %3, 1
+  ret { float, i32 } %5
+}
 
-define void @test_f_floatarr2_s_arg(float %0, float %1)
-define { float, float } @test_f_ret_floatarr2_s()
+define void @test_f_float_int64_s_arg(ptr align 8 %0) #0 {
+entry:
+  ret void
+}
 
-define void @test_f_floatarr2_tricky1_s_arg(float %0, float %1)
-define { float, float } @test_f_ret_floatarr2_tricky1_s()
+define void @test_f_ret_float_int64_s(ptr noalias sret(%Float_int64_s) align 8 %0) #0 {
+entry:
+  %literal = alloca %Float_int64_s, align 8
+  %1 = getelementptr inbounds %Float_int64_s, ptr %literal, i32 0, i32 0
+  store float 1.000000e+00, ptr %1, align 8
+  %2 = getelementptr inbounds %Float_int64_s, ptr %literal, i32 0, i32 1
+  store i64 2, ptr %2, align 8
+  call void @llvm.memcpy.p0.p0.i32(ptr align 8 %0, ptr align 8 %literal, i32 16, i1 false)
+  ret void
+}
 
-define void @test_f_int_float_int_s_arg(ptr align 4 %0)
-define void @test_f_ret_int_float_int_s(ptr noalias sret(%Int_float_int_s) align 4 %0)
+define void @test_f_float_int8_s_arg_insufficient_gprs(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, [2 x i32] %8) #0 {
+entry:
+  %i = alloca %Float_int8_s, align 4
+  store [2 x i32] %8, ptr %i, align 4
+  ret void
+}
 
-define void @test_f_int64_float_s_arg(ptr align 8 %0)
-define void @test_f_ret_int64_float_s(ptr noalias sret(%Int64_float_s) align 8 %0)
+define void @test_f_struct_float_int8_insufficient_fprs(float %0, float %1, float %2, float %3, float %4, float %5, float %6, float %7, [2 x i32] %8) #0 {
+entry:
+  %i = alloca %Float_int8_s, align 4
+  store [2 x i32] %8, ptr %i, align 4
+  ret void
+}
 
-define void @test_f_char_char_float_s_arg([2 x i32] %0)
-define [2 x i32] @test_f_ret_char_char_float_s()
+define void @test_f_floatarr1_s_arg(float %0) #0 {
+entry:
+  %a = alloca %Floatarr1_s, align 4
+  store float %0, ptr %a, align 4
+  ret void
+}
 
-define void @test_f_float_u_arg(i32 %0)
-define i32 @test_f_ret_float_u()
\ No newline at end of file
+define float @test_f_ret_floatarr1_s() #0 {
+entry:
+  %literal = alloca %Floatarr1_s, align 4
+  %0 = getelementptr inbounds %Floatarr1_s, ptr %literal, i32 0, i32 0
+  %1 = getelementptr inbounds [1 x float], ptr %0, i32 0, i32 0
+  store float 1.000000e+00, ptr %1, align 4
+  %2 = load float, ptr %literal, align 4
+  ret float %2
+}
+
+define void @test_f_floatarr2_s_arg(float %0, float %1) #0 {
+entry:
+  %a = alloca %Floatarr2_s, align 4
+  store float %0, ptr %a, align 4
+  %ptroffset = getelementptr inbounds float, ptr %a, i32 1
+  store float %1, ptr %a, align 4
+  ret void
+}
+
+define { float, float } @test_f_ret_floatarr2_s() #0 {
+entry:
+  %literal = alloca %Floatarr2_s, align 4
+  %0 = getelementptr inbounds %Floatarr2_s, ptr %literal, i32 0, i32 0
+  %1 = getelementptr inbounds [2 x float], ptr %0, i32 0, i32 0
+  store float 1.000000e+00, ptr %1, align 4
+  %2 = getelementptr inbounds [2 x float], ptr %0, i32 0, i32 1
+  store float 2.000000e+00, ptr %2, align 4
+  %3 = load float, ptr %literal, align 4
+  %ptroffset = getelementptr inbounds float, ptr %literal, i32 1
+  %4 = load float, ptr %literal, align 4
+  %5 = insertvalue { float, float } undef, float %3, 0
+  %6 = insertvalue { float, float } %5, float %4, 1
+  ret { float, float } %6
+}
+
+define void @test_f_floatarr2_tricky1_s_arg(float %0, float %1) #0 {
+entry:
+  %a = alloca %Floatarr2_tricky1_s, align 4
+  store float %0, ptr %a, align 4
+  %ptroffset = getelementptr inbounds float, ptr %a, i32 1
+  store float %1, ptr %a, align 4
+  ret void
+}
+
+define { float, float } @test_f_ret_floatarr2_tricky1_s() #0 {
+entry:
+  %literal = alloca %Floatarr2_tricky1_s, align 4
+  %0 = getelementptr inbounds %Floatarr2_tricky1_s, ptr %literal, i32 0, i32 0
+  %1 = getelementptr inbounds [2 x %Inner], ptr %0, i32 0, i32 0
+  %2 = getelementptr inbounds %Inner, ptr %1, i32 0, i32 0
+  %3 = getelementptr inbounds [1 x float], ptr %2, i32 0, i32 0
+  store float 1.000000e+00, ptr %3, align 4
+  %4 = getelementptr inbounds [2 x %Inner], ptr %0, i32 0, i32 1
+  %5 = getelementptr inbounds %Inner, ptr %4, i32 0, i32 0
+  %6 = getelementptr inbounds [1 x float], ptr %5, i32 0, i32 0
+  store float 2.000000e+00, ptr %6, align 4
+  %7 = load float, ptr %literal, align 4
+  %ptroffset = getelementptr inbounds float, ptr %literal, i32 1
+  %8 = load float, ptr %literal, align 4
+  %9 = insertvalue { float, float } undef, float %7, 0
+  %10 = insertvalue { float, float } %9, float %8, 1
+  ret { float, float } %10
+}
+
+define void @test_f_int_float_int_s_arg(ptr align 4 %0) #0 {
+entry:
+  ret void
+}
+
+define void @test_f_ret_int_float_int_s(ptr noalias sret(%Int_float_int_s) align 4 %0) #0 {
+entry:
+  %literal = alloca %Int_float_int_s, align 4
+  %1 = getelementptr inbounds %Int_float_int_s, ptr %literal, i32 0, i32 0
+  store i32 1, ptr %1, align 4
+  %2 = getelementptr inbounds %Int_float_int_s, ptr %literal, i32 0, i32 1
+  store float 2.000000e+00, ptr %2, align 4
+  %3 = getelementptr inbounds %Int_float_int_s, ptr %literal, i32 0, i32 2
+  store i32 3, ptr %3, align 4
+  call void @llvm.memcpy.p0.p0.i32(ptr align 4 %0, ptr align 4 %literal, i32 12, i1 false)
+  ret void
+}
+
+define void @test_f_int64_float_s_arg(ptr align 8 %0) #0 {
+entry:
+  ret void
+}
+
+define void @test_f_ret_int64_float_s(ptr noalias sret(%Int64_float_s) align 8 %0) #0 {
+entry:
+  %literal = alloca %Int64_float_s, align 8
+  %1 = getelementptr inbounds %Int64_float_s, ptr %literal, i32 0, i32 0
+  store i64 1, ptr %1, align 8
+  %2 = getelementptr inbounds %Int64_float_s, ptr %literal, i32 0, i32 1
+  store float 2.000000e+00, ptr %2, align 8
+  call void @llvm.memcpy.p0.p0.i32(ptr align 8 %0, ptr align 8 %literal, i32 16, i1 false)
+  ret void
+}
+
+define void @test_f_char_char_float_s_arg([2 x i32] %0) #0 {
+entry:
+  %a = alloca %Char_char_float_s, align 4
+  store [2 x i32] %0, ptr %a, align 4
+  ret void
+}
+
+define [2 x i32] @test_f_ret_char_char_float_s() #0 {
+entry:
+  %literal = alloca %Char_char_float_s, align 4
+  %0 = getelementptr inbounds %Char_char_float_s, ptr %literal, i32 0, i32 0
+  store i8 1, ptr %0, align 4
+  %1 = getelementptr inbounds %Char_char_float_s, ptr %literal, i32 0, i32 1
+  store i8 2, ptr %1, align 1
+  %2 = getelementptr inbounds %Char_char_float_s, ptr %literal, i32 0, i32 2
+  store float 3.000000e+00, ptr %2, align 4
+  %3 = load [2 x i32], ptr %literal, align 4
+  ret [2 x i32] %3
+}
+
+define void @test_f_float_u_arg(i32 %0) #0 {
+entry:
+  %a = alloca %Float_u, align 4
+  store i32 %0, ptr %a, align 4
+  ret void
+}
+
+define i32 @test_f_ret_float_u() #0 {
+entry:
+  %literal = alloca %Float_u, align 4
+  store float 1.000000e+00, ptr %literal, align 4
+  %0 = load i32, ptr %literal, align 4
+  ret i32 %0
+}
diff --git a/test/test_suite/abi/riscv64-lp64-lp64f-abi-1.c3t b/test/test_suite/abi/riscv64-lp64-lp64f-abi-1.c3t
new file mode 100644
index 000000000..38f20c9b1
--- /dev/null
+++ b/test/test_suite/abi/riscv64-lp64-lp64f-abi-1.c3t
@@ -0,0 +1,51 @@
+// #target: linux-riscv64
+module test;
+
+struct Large {
+  long a, b, c, d;
+}
+// Scalars passed on the stack should not have signext/zeroext attributes
+// (they are anyext).
+
+fn int f_scalar_stack_1(int a, int128 b, double c, float128 d, char[<32>] e,
+                     char f, ichar g, char h) {
+  return g + h;
+}
+
+// Ensure that scalars passed on the stack are still determined correctly in
+// the presence of large return values that consume a register due to the need
+// to pass a pointer.
+
+fn Large f_scalar_stack_2(double a, int128 b, float128 c, char[<32>] d,
+                              char e, ichar f, char g) {
+  return {(long)a, e, f, g};
+}
+
+/* #expect: test.ll
+
+define signext i32 @test_f_scalar_stack_1(i32 signext %0, i128 %1, double %2, fp128 %3, ptr align 32 %4, i8 zeroext %5, i8 %6, i8 %7) #0 {
+entry:
+  %sisiext = sext i8 %6 to i32
+  %uisiext = zext i8 %7 to i32
+  %add = add i32 %sisiext, %uisiext
+  ret i32 %add
+}
+
+define void @test_f_scalar_stack_2(ptr noalias sret(%Large) align 8 %0, double %1, i128 %2, fp128 %3, ptr align 32 %4, i8 zeroext %5, i8 %6, i8 %7) #0 {
+entry:
+  %literal = alloca %Large, align 8
+  %8 = getelementptr inbounds %Large, ptr %literal, i32 0, i32 0
+  %fpsi = fptosi double %1 to i64
+  store i64 %fpsi, ptr %8, align 8
+  %9 = getelementptr inbounds %Large, ptr %literal, i32 0, i32 1
+  %uisiext = zext i8 %5 to i64
+  store i64 %uisiext, ptr %9, align 8
+  %10 = getelementptr inbounds %Large, ptr %literal, i32 0, i32 2
+  %sisiext = sext i8 %6 to i64
+  store i64 %sisiext, ptr %10, align 8
+  %11 = getelementptr inbounds %Large, ptr %literal, i32 0, i32 3
+  %uisiext1 = zext i8 %7 to i64
+  store i64 %uisiext1, ptr %11, align 8
+  call void @llvm.memcpy.p0.p0.i32(ptr align 8 %0, ptr align 8 %literal, i32 32, i1 false)
+  ret void
+}
\ No newline at end of file
diff --git a/test/test_suite/abi/riscv64-lp64-lp64f-abi-2.c3t b/test/test_suite/abi/riscv64-lp64-lp64f-abi-2.c3t
new file mode 100644
index 000000000..753ecdefa
--- /dev/null
+++ b/test/test_suite/abi/riscv64-lp64-lp64f-abi-2.c3t
@@ -0,0 +1,52 @@
+// #target: linux-riscv64
+// #opt: --riscvfloat=float
+module test;
+
+struct Large {
+  long a, b, c, d;
+}
+// Scalars passed on the stack should not have signext/zeroext attributes
+// (they are anyext).
+
+fn int f_scalar_stack_1(int a, int128 b, double c, float128 d, char[<32>] e,
+                     char f, ichar g, char h) {
+  return g + h;
+}
+
+// Ensure that scalars passed on the stack are still determined correctly in
+// the presence of large return values that consume a register due to the need
+// to pass a pointer.
+
+fn Large f_scalar_stack_2(double a, int128 b, float128 c, char[<32>] d,
+                              char e, ichar f, char g) {
+  return {(long)a, e, f, g};
+}
+
+/* #expect: test.ll
+
+define signext i32 @test_f_scalar_stack_1(i32 signext %0, i128 %1, double %2, fp128 %3, ptr align 32 %4, i8 zeroext %5, i8 %6, i8 %7) #0 {
+entry:
+  %sisiext = sext i8 %6 to i32
+  %uisiext = zext i8 %7 to i32
+  %add = add i32 %sisiext, %uisiext
+  ret i32 %add
+}
+
+define void @test_f_scalar_stack_2(ptr noalias sret(%Large) align 8 %0, double %1, i128 %2, fp128 %3, ptr align 32 %4, i8 zeroext %5, i8 %6, i8 %7) #0 {
+entry:
+  %literal = alloca %Large, align 8
+  %8 = getelementptr inbounds %Large, ptr %literal, i32 0, i32 0
+  %fpsi = fptosi double %1 to i64
+  store i64 %fpsi, ptr %8, align 8
+  %9 = getelementptr inbounds %Large, ptr %literal, i32 0, i32 1
+  %uisiext = zext i8 %5 to i64
+  store i64 %uisiext, ptr %9, align 8
+  %10 = getelementptr inbounds %Large, ptr %literal, i32 0, i32 2
+  %sisiext = sext i8 %6 to i64
+  store i64 %sisiext, ptr %10, align 8
+  %11 = getelementptr inbounds %Large, ptr %literal, i32 0, i32 3
+  %uisiext1 = zext i8 %7 to i64
+  store i64 %uisiext1, ptr %11, align 8
+  call void @llvm.memcpy.p0.p0.i32(ptr align 8 %0, ptr align 8 %literal, i32 32, i1 false)
+  ret void
+}
\ No newline at end of file
diff --git a/test/test_suite/abi/riscv64-lp64-lp64f-lp64d-abi-1.c3t b/test/test_suite/abi/riscv64-lp64-lp64f-lp64d-abi-1.c3t
new file mode 100644
index 000000000..4c63b6fb6
--- /dev/null
+++ b/test/test_suite/abi/riscv64-lp64-lp64f-lp64d-abi-1.c3t
@@ -0,0 +1,208 @@
+// #target: linux-riscv64
+module test;
+
+fn void f_void() {}
+
+fn bool f_scalar_0(bool x) { return x; }
+fn int f_scalar_1(ichar x) { return x; }
+fn char f_scalar_2(char x) { return x; }
+fn uint f_scalar_3(int x) { return x; }
+fn long f_scalar_4(long x) { return x; }
+fn float f_fp_scalar_1(float x) { return x; }
+fn double f_fp_scalar_2(double x) { return x; }
+fn float128 f_fp_scalar_3(float128 x) { return x; }
+
+// Aggregates <= 2*xlen may be passed in registers, so will be coerced to
+// integer arguments. The rules for return are the same.
+
+struct Tiny {
+  ushort a, b, c, d;
+}
+
+fn void f_agg_tiny(Tiny x) {
+  x.a += x.b;
+  x.c += x.d;
+}
+
+fn Tiny f_agg_tiny_ret() {
+  return {1, 2, 3, 4};
+}
+
+fn void f_vec_tiny_v4i16(short[<4>] x) {
+  x[0] = x[1];
+  x[2] = x[3];
+}
+
+fn short[<4>] f_vec_tiny_v4i16_ret() {
+  return {1, 2, 3, 4};
+}
+
+fn void f_vec_tiny_v1i64(long[<1>] x) {
+  x[0] = 114;
+}
+
+fn long[<1>] f_vec_tiny_v1i64_ret() {
+  return {1};
+}
+
+struct Small {
+  long a;
+  long *b;
+}
+
+fn void f_agg_small(Small x) {
+  x.a += *x.b;
+  x.b = &x.a;
+}
+
+fn Small f_agg_small_ret() {
+  return {1, null};
+}
+
+fn void f_vec_small_v8i16(short[<8>] x) {
+  x[0] = x[7];
+}
+
+fn short[<8>] f_vec_small_v8i16_ret() {
+  return {1, 2, 3, 4, 5, 6, 7, 8};
+}
+
+fn void f_vec_small_v1i128(int128[<1>] x) {
+  x[0] = 114;
+}
+
+fn int128[<1>] f_vec_small_v1i128_ret() {
+  return {1};
+}
+
+// Aggregates of 2*xlen size and 2*xlen alignment should be coerced to a
+// single 2*xlen-sized argument, to ensure that alignment can be maintained if
+// passed on the stack.
+
+struct Small_aligned {
+  int128 a;
+}
+
+fn void f_agg_small_aligned(Small_aligned x) {
+  x.a += x.a;
+}
+
+fn Small_aligned f_agg_small_aligned_ret(Small_aligned x) {
+  return {10};
+}
+
+// Aggregates greater > 2*xlen will be passed and returned indirectly
+struct Large {
+  long a, b, c, d;
+}
+
+fn void f_agg_large(Large x) {
+  x.a = x.b + x.c + x.d;
+}
+
+// The address where the struct should be written to will be the first
+// argument
+fn Large f_agg_large_ret(int i, ichar j) {
+  return {1, 2, 3, 4};
+}
+
+fn void f_vec_large_v32i8(char[<32>] x) {
+  x[0] = x[7];
+}
+
+fn char[<32>] f_vec_large_v32i8_ret() {
+  return { [1] = 1, [31] = 31 };
+}
+
+// Scalars passed on the stack should not have signext/zeroext attributes
+// (they are anyext).
+
+fn int f_scalar_stack_1(Tiny a, Small b, Small_aligned c,
+                     Large d, char e, ichar f, char g, ichar h) {
+  return g + h;
+}
+
+fn int f_scalar_stack_2(int a, int128 b, long c, float128 d, char[<32>] e,
+                     char f, ichar g, char h) {
+  return g + h;
+}
+
+// Ensure that scalars passed on the stack are still determined correctly in
+// the presence of large return values that consume a register due to the need
+// to pass a pointer.
+
+fn Large f_scalar_stack_3(uint a, int128 b, float128 c, char[<32>] d,
+                              char e, ichar f, char g) {
+  return {a, e, f, g};
+}
+
+// Ensure that ABI lowering happens as expected for vararg calls.
+// Specifically, ensure that signext is emitted for varargs that will be
+// passed in registers but not on the stack. Ensure this takes into account
+// the use of "aligned" register pairs for varargs with 2*xlen alignment.
+
+fn int f_va_callee(int, ...);
+
+// CHECK-LABEL: define{{.*}} void @f_va_caller()
+fn void f_va_caller() {
+	float128 fq;
+  f_va_callee(1, 2, 3i64, 4.0f, 5.0, Tiny{6, 7, 8, 9},
+              Small{10, null}, Small_aligned{11},
+              Large{12, 13, 14, 15});
+  f_va_callee(1, 2, 3, 4, fq, 6, 7, 8, 9);
+  f_va_callee(1, 2, 3, 4, Small_aligned{5}, 6, 7, 8, 9);
+  f_va_callee(1, 2, 3, 4, Small{5,null}, 6, 7, 8, 9);
+  f_va_callee(1, 2, 3, 4, 5, fq, 7, 8, 9);
+  f_va_callee(1, 2, 3, 4, 5, Small_aligned{6}, 7, 8, 9);
+  f_va_callee(1, 2, 3, 4, 5, Small{6, null}, 7, 8, 9);
+  f_va_callee(1, 2, 3, 4, 5, 6, fq, 8, 9);
+  f_va_callee(1, 2, 3, 4, 5, 6, Small_aligned{7}, 8, 9);
+  f_va_callee(1, 2, 3, 4, 5, 6, Small{7, null}, 8, 9);
+}
+
+
+/* #expect: test.ll
+
+define void @test_f_void()
+define zeroext i8 @test_f_scalar_0(i8 zeroext %0)
+define signext i32 @test_f_scalar_1(i8 signext %0)
+define zeroext i8 @test_f_scalar_2(i8 zeroext %0)
+define signext i32 @test_f_scalar_3(i32 signext %0)
+define i64 @test_f_scalar_4(i64 %0)
+define float @test_f_fp_scalar_1(float %0)
+define double @test_f_fp_scalar_2(double %0)
+define fp128 @test_f_fp_scalar_3(fp128 %0)
+define void @test_f_agg_tiny(i64 %0)
+define i64 @test_f_agg_tiny_ret()
+define void @test_f_vec_tiny_v4i16(i64 %0)
+define i64 @test_f_vec_tiny_v4i16_ret()
+define void @test_f_vec_tiny_v1i64(i64 %0)
+define i64 @test_f_vec_tiny_v1i64_ret()
+define void @test_f_agg_small([2 x i64]
+define [2 x i64] @test_f_agg_small_ret()
+define void @test_f_vec_small_v8i16(i128 %0)
+define i128 @test_f_vec_small_v8i16_ret()
+define void @test_f_vec_small_v1i128(i128 %0)
+define i128 @test_f_vec_small_v1i128_ret()
+define void @test_f_agg_small_aligned(i128 %0)
+define i128 @test_f_agg_small_aligned_ret(i128 %0)
+define void @test_f_agg_large(ptr align 8 %0)
+define void @test_f_agg_large_ret(ptr noalias sret(%Large) align 8 %0, i32 signext %1, i8 signext %2)
+define void @test_f_vec_large_v32i8(ptr align 32 %0)
+define void @test_f_vec_large_v32i8_ret(ptr noalias sret(<32 x i8>) align 32 %0)
+define signext i32 @test_f_scalar_stack_1(i64 %0, [2 x i64] %1, i128 %2, ptr align 8 %3, i8 zeroext %4, i8 signext %5, i8 %6, i8 %7)
+define signext i32 @test_f_scalar_stack_2(i32 signext %0, i128 %1, i64 %2, fp128 %3, ptr align 32 %4, i8 zeroext %5, i8 %6, i8 %7) #0 {
+define void @test_f_scalar_stack_3(ptr noalias sret(%Large) align 8 %0, i32 signext %1, i128 %2, fp128 %3, ptr align 32 %4, i8 zeroext %5, i8 %6, i8 %7)
+declare signext i32 @test_f_va_callee(i32 signext, ...) #0
+call void @llvm.memcpy.p0.p0.i32(ptr align 8 %indirectarg, ptr align 8 %literal3, i32 32, i1 false)
+call i32 (i32, ...) @test_f_va_callee(i32 signext 1, i32 signext 2, i64 3, double 4.000000e+00, double 5.000000e+00, i64 %4, [2 x i64] %7, i128 %9, ptr align 8 %indirectarg)
+call i32 (i32, ...) @test_f_va_callee(i32 signext 1, i32 signext 2, i32 signext 3, i32 signext 4, fp128 %15, i32 signext 6, i32 signext 7, i32 8, i32 9)
+call i32 (i32, ...) @test_f_va_callee(i32 signext 1, i32 signext 2, i32 signext 3, i32 signext 4, i128 %18, i32 signext 6, i32 signext 7, i32 8, i32 9)
+call i32 (i32, ...) @test_f_va_callee(i32 signext 1, i32 signext 2, i32 signext 3, i32 signext 4, [2 x i64] %22, i32 signext 6, i32 signext 7, i32 8, i32 9)
+call i32 (i32, ...) @test_f_va_callee(i32 signext 1, i32 signext 2, i32 signext 3, i32 signext 4, i32 signext 5, fp128 %24, i32 7, i32 8, i32 9)
+call i32 (i32, ...) @test_f_va_callee(i32 signext 1, i32 signext 2, i32 signext 3, i32 signext 4, i32 signext 5, i128 %27, i32 7, i32 8, i32 9)
+call i32 (i32, ...) @test_f_va_callee(i32 signext 1, i32 signext 2, i32 signext 3, i32 signext 4, i32 signext 5, [2 x i64] %31, i32 signext 7, i32 8, i32 9)
+call i32 (i32, ...) @test_f_va_callee(i32 signext 1, i32 signext 2, i32 signext 3, i32 signext 4, i32 signext 5, i32 signext 6, fp128 %33, i32 8, i32 9)
+call i32 (i32, ...) @test_f_va_callee(i32 signext 1, i32 signext 2, i32 signext 3, i32 signext 4, i32 signext 5, i32 signext 6, i128 %36, i32 8, i32 9)
+call i32 (i32, ...) @test_f_va_callee(i32 signext 1, i32 signext 2, i32 signext 3, i32 signext 4, i32 signext 5, i32 signext 6, [2 x i64] %40, i32 8, i32 9)
+d
\ No newline at end of file
diff --git a/test/test_suite/abi/riscv64-lp64-lp64f-lp64d-abi-2.c3t b/test/test_suite/abi/riscv64-lp64-lp64f-lp64d-abi-2.c3t
new file mode 100644
index 000000000..53eb8134e
--- /dev/null
+++ b/test/test_suite/abi/riscv64-lp64-lp64f-lp64d-abi-2.c3t
@@ -0,0 +1,208 @@
+// #target: linux-riscv64
+// #opt: --riscvfloat=float
+module test;
+
+fn void f_void() {}
+
+fn bool f_scalar_0(bool x) { return x; }
+fn int f_scalar_1(ichar x) { return x; }
+fn char f_scalar_2(char x) { return x; }
+fn uint f_scalar_3(int x) { return x; }
+fn long f_scalar_4(long x) { return x; }
+fn float f_fp_scalar_1(float x) { return x; }
+fn double f_fp_scalar_2(double x) { return x; }
+fn float128 f_fp_scalar_3(float128 x) { return x; }
+
+// Aggregates <= 2*xlen may be passed in registers, so will be coerced to
+// integer arguments. The rules for return are the same.
+
+struct Tiny {
+  ushort a, b, c, d;
+}
+
+fn void f_agg_tiny(Tiny x) {
+  x.a += x.b;
+  x.c += x.d;
+}
+
+fn Tiny f_agg_tiny_ret() {
+  return {1, 2, 3, 4};
+}
+
+fn void f_vec_tiny_v4i16(short[<4>] x) {
+  x[0] = x[1];
+  x[2] = x[3];
+}
+
+fn short[<4>] f_vec_tiny_v4i16_ret() {
+  return {1, 2, 3, 4};
+}
+
+fn void f_vec_tiny_v1i64(long[<1>] x) {
+  x[0] = 114;
+}
+
+fn long[<1>] f_vec_tiny_v1i64_ret() {
+  return {1};
+}
+
+struct Small {
+  long a;
+  long *b;
+}
+
+fn void f_agg_small(Small x) {
+  x.a += *x.b;
+  x.b = &x.a;
+}
+
+fn Small f_agg_small_ret() {
+  return {1, null};
+}
+
+fn void f_vec_small_v8i16(short[<8>] x) {
+  x[0] = x[7];
+}
+
+fn short[<8>] f_vec_small_v8i16_ret() {
+  return {1, 2, 3, 4, 5, 6, 7, 8};
+}
+
+fn void f_vec_small_v1i128(int128[<1>] x) {
+  x[0] = 114;
+}
+
+fn int128[<1>] f_vec_small_v1i128_ret() {
+  return {1};
+}
+
+// Aggregates of 2*xlen size and 2*xlen alignment should be coerced to a
+// single 2*xlen-sized argument, to ensure that alignment can be maintained if
+// passed on the stack.
+
+struct Small_aligned {
+  int128 a;
+}
+
+fn void f_agg_small_aligned(Small_aligned x) {
+  x.a += x.a;
+}
+
+fn Small_aligned f_agg_small_aligned_ret(Small_aligned x) {
+  return {10};
+}
+
+// Aggregates greater > 2*xlen will be passed and returned indirectly
+struct Large {
+  long a, b, c, d;
+}
+
+fn void f_agg_large(Large x) {
+  x.a = x.b + x.c + x.d;
+}
+
+// The address where the struct should be written to will be the first
+// argument
+fn Large f_agg_large_ret(int i, ichar j) {
+  return {1, 2, 3, 4};
+}
+
+fn void f_vec_large_v32i8(char[<32>] x) {
+  x[0] = x[7];
+}
+
+fn char[<32>] f_vec_large_v32i8_ret() {
+  return { [1] = 1, [31] = 31 };
+}
+
+// Scalars passed on the stack should not have signext/zeroext attributes
+// (they are anyext).
+
+fn int f_scalar_stack_1(Tiny a, Small b, Small_aligned c,
+                     Large d, char e, ichar f, char g, ichar h) {
+  return g + h;
+}
+
+fn int f_scalar_stack_2(int a, int128 b, long c, float128 d, char[<32>] e,
+                     char f, ichar g, char h) {
+  return g + h;
+}
+
+// Ensure that scalars passed on the stack are still determined correctly in
+// the presence of large return values that consume a register due to the need
+// to pass a pointer.
+
+fn Large f_scalar_stack_3(uint a, int128 b, float128 c, char[<32>] d,
+                              char e, ichar f, char g) {
+  return {a, e, f, g};
+}
+
+// Ensure that ABI lowering happens as expected for vararg calls.
+// Specifically, ensure that signext is emitted for varargs that will be
+// passed in registers but not on the stack. Ensure this takes into account
+// the use of "aligned" register pairs for varargs with 2*xlen alignment.
+
+fn int f_va_callee(int, ...);
+
+// CHECK-LABEL: define{{.*}} void @f_va_caller()
+fn void f_va_caller() {
+	float128 fq;
+  f_va_callee(1, 2, 3i64, 4.0f, 5.0, Tiny{6, 7, 8, 9},
+              Small{10, null}, Small_aligned{11},
+              Large{12, 13, 14, 15});
+  f_va_callee(1, 2, 3, 4, fq, 6, 7, 8, 9);
+  f_va_callee(1, 2, 3, 4, Small_aligned{5}, 6, 7, 8, 9);
+  f_va_callee(1, 2, 3, 4, Small{5,null}, 6, 7, 8, 9);
+  f_va_callee(1, 2, 3, 4, 5, fq, 7, 8, 9);
+  f_va_callee(1, 2, 3, 4, 5, Small_aligned{6}, 7, 8, 9);
+  f_va_callee(1, 2, 3, 4, 5, Small{6, null}, 7, 8, 9);
+  f_va_callee(1, 2, 3, 4, 5, 6, fq, 8, 9);
+  f_va_callee(1, 2, 3, 4, 5, 6, Small_aligned{7}, 8, 9);
+  f_va_callee(1, 2, 3, 4, 5, 6, Small{7, null}, 8, 9);
+}
+
+
+/* #expect: test.ll
+
+define void @test_f_void()
+define zeroext i8 @test_f_scalar_0(i8 zeroext %0)
+define signext i32 @test_f_scalar_1(i8 signext %0)
+define zeroext i8 @test_f_scalar_2(i8 zeroext %0)
+define signext i32 @test_f_scalar_3(i32 signext %0)
+define i64 @test_f_scalar_4(i64 %0)
+define float @test_f_fp_scalar_1(float %0)
+define double @test_f_fp_scalar_2(double %0)
+define fp128 @test_f_fp_scalar_3(fp128 %0)
+define void @test_f_agg_tiny(i64 %0)
+define i64 @test_f_agg_tiny_ret()
+define void @test_f_vec_tiny_v4i16(i64 %0)
+define i64 @test_f_vec_tiny_v4i16_ret()
+define void @test_f_vec_tiny_v1i64(i64 %0)
+define i64 @test_f_vec_tiny_v1i64_ret()
+define void @test_f_agg_small([2 x i64]
+define [2 x i64] @test_f_agg_small_ret()
+define void @test_f_vec_small_v8i16(i128 %0)
+define i128 @test_f_vec_small_v8i16_ret()
+define void @test_f_vec_small_v1i128(i128 %0)
+define i128 @test_f_vec_small_v1i128_ret()
+define void @test_f_agg_small_aligned(i128 %0)
+define i128 @test_f_agg_small_aligned_ret(i128 %0)
+define void @test_f_agg_large(ptr align 8 %0)
+define void @test_f_agg_large_ret(ptr noalias sret(%Large) align 8 %0, i32 signext %1, i8 signext %2)
+define void @test_f_vec_large_v32i8(ptr align 32 %0)
+define void @test_f_vec_large_v32i8_ret(ptr noalias sret(<32 x i8>) align 32 %0)
+define signext i32 @test_f_scalar_stack_1(i64 %0, [2 x i64] %1, i128 %2, ptr align 8 %3, i8 zeroext %4, i8 signext %5, i8 %6, i8 %7)
+define signext i32 @test_f_scalar_stack_2(i32 signext %0, i128 %1, i64 %2, fp128 %3, ptr align 32 %4, i8 zeroext %5, i8 %6, i8 %7) #0 {
+define void @test_f_scalar_stack_3(ptr noalias sret(%Large) align 8 %0, i32 signext %1, i128 %2, fp128 %3, ptr align 32 %4, i8 zeroext %5, i8 %6, i8 %7)
+declare signext i32 @test_f_va_callee(i32 signext, ...) #0
+call void @llvm.memcpy.p0.p0.i32(ptr align 8 %indirectarg, ptr align 8 %literal3, i32 32, i1 false)
+call i32 (i32, ...) @test_f_va_callee(i32 signext 1, i32 signext 2, i64 3, double 4.000000e+00, double 5.000000e+00, i64 %4, [2 x i64] %7, i128 %9, ptr align 8 %indirectarg)
+call i32 (i32, ...) @test_f_va_callee(i32 signext 1, i32 signext 2, i32 signext 3, i32 signext 4, fp128 %15, i32 signext 6, i32 signext 7, i32 8, i32 9)
+call i32 (i32, ...) @test_f_va_callee(i32 signext 1, i32 signext 2, i32 signext 3, i32 signext 4, i128 %18, i32 signext 6, i32 signext 7, i32 8, i32 9)
+call i32 (i32, ...) @test_f_va_callee(i32 signext 1, i32 signext 2, i32 signext 3, i32 signext 4, [2 x i64] %22, i32 signext 6, i32 signext 7, i32 8, i32 9)
+call i32 (i32, ...) @test_f_va_callee(i32 signext 1, i32 signext 2, i32 signext 3, i32 signext 4, i32 signext 5, fp128 %24, i32 7, i32 8, i32 9)
+call i32 (i32, ...) @test_f_va_callee(i32 signext 1, i32 signext 2, i32 signext 3, i32 signext 4, i32 signext 5, i128 %27, i32 7, i32 8, i32 9)
+call i32 (i32, ...) @test_f_va_callee(i32 signext 1, i32 signext 2, i32 signext 3, i32 signext 4, i32 signext 5, [2 x i64] %31, i32 signext 7, i32 8, i32 9)
+call i32 (i32, ...) @test_f_va_callee(i32 signext 1, i32 signext 2, i32 signext 3, i32 signext 4, i32 signext 5, i32 signext 6, fp128 %33, i32 8, i32 9)
+call i32 (i32, ...) @test_f_va_callee(i32 signext 1, i32 signext 2, i32 signext 3, i32 signext 4, i32 signext 5, i32 signext 6, i128 %36, i32 8, i32 9)
+call i32 (i32, ...) @test_f_va_callee(i32 signext 1, i32 signext 2, i32 signext 3, i32 signext 4, i32 signext 5, i32 signext 6, [2 x i64] %40, i32 8, i32 9)
diff --git a/test/test_suite/abi/riscv64-lp64-lp64f-lp64d-abi-3.c3t b/test/test_suite/abi/riscv64-lp64-lp64f-lp64d-abi-3.c3t
new file mode 100644
index 000000000..eea2a9ed9
--- /dev/null
+++ b/test/test_suite/abi/riscv64-lp64-lp64f-lp64d-abi-3.c3t
@@ -0,0 +1,208 @@
+// #target: linux-riscv64
+// #opt: --riscvfloat=double
+module test;
+
+fn void f_void() {}
+
+fn bool f_scalar_0(bool x) { return x; }
+fn int f_scalar_1(ichar x) { return x; }
+fn char f_scalar_2(char x) { return x; }
+fn uint f_scalar_3(int x) { return x; }
+fn long f_scalar_4(long x) { return x; }
+fn float f_fp_scalar_1(float x) { return x; }
+fn double f_fp_scalar_2(double x) { return x; }
+fn float128 f_fp_scalar_3(float128 x) { return x; }
+
+// Aggregates <= 2*xlen may be passed in registers, so will be coerced to
+// integer arguments. The rules for return are the same.
+
+struct Tiny {
+  ushort a, b, c, d;
+}
+
+fn void f_agg_tiny(Tiny x) {
+  x.a += x.b;
+  x.c += x.d;
+}
+
+fn Tiny f_agg_tiny_ret() {
+  return {1, 2, 3, 4};
+}
+
+fn void f_vec_tiny_v4i16(short[<4>] x) {
+  x[0] = x[1];
+  x[2] = x[3];
+}
+
+fn short[<4>] f_vec_tiny_v4i16_ret() {
+  return {1, 2, 3, 4};
+}
+
+fn void f_vec_tiny_v1i64(long[<1>] x) {
+  x[0] = 114;
+}
+
+fn long[<1>] f_vec_tiny_v1i64_ret() {
+  return {1};
+}
+
+struct Small {
+  long a;
+  long *b;
+}
+
+fn void f_agg_small(Small x) {
+  x.a += *x.b;
+  x.b = &x.a;
+}
+
+fn Small f_agg_small_ret() {
+  return {1, null};
+}
+
+fn void f_vec_small_v8i16(short[<8>] x) {
+  x[0] = x[7];
+}
+
+fn short[<8>] f_vec_small_v8i16_ret() {
+  return {1, 2, 3, 4, 5, 6, 7, 8};
+}
+
+fn void f_vec_small_v1i128(int128[<1>] x) {
+  x[0] = 114;
+}
+
+fn int128[<1>] f_vec_small_v1i128_ret() {
+  return {1};
+}
+
+// Aggregates of 2*xlen size and 2*xlen alignment should be coerced to a
+// single 2*xlen-sized argument, to ensure that alignment can be maintained if
+// passed on the stack.
+
+struct Small_aligned {
+  int128 a;
+}
+
+fn void f_agg_small_aligned(Small_aligned x) {
+  x.a += x.a;
+}
+
+fn Small_aligned f_agg_small_aligned_ret(Small_aligned x) {
+  return {10};
+}
+
+// Aggregates greater > 2*xlen will be passed and returned indirectly
+struct Large {
+  long a, b, c, d;
+}
+
+fn void f_agg_large(Large x) {
+  x.a = x.b + x.c + x.d;
+}
+
+// The address where the struct should be written to will be the first
+// argument
+fn Large f_agg_large_ret(int i, ichar j) {
+  return {1, 2, 3, 4};
+}
+
+fn void f_vec_large_v32i8(char[<32>] x) {
+  x[0] = x[7];
+}
+
+fn char[<32>] f_vec_large_v32i8_ret() {
+  return { [1] = 1, [31] = 31 };
+}
+
+// Scalars passed on the stack should not have signext/zeroext attributes
+// (they are anyext).
+
+fn int f_scalar_stack_1(Tiny a, Small b, Small_aligned c,
+                     Large d, char e, ichar f, char g, ichar h) {
+  return g + h;
+}
+
+fn int f_scalar_stack_2(int a, int128 b, long c, float128 d, char[<32>] e,
+                     char f, ichar g, char h) {
+  return g + h;
+}
+
+// Ensure that scalars passed on the stack are still determined correctly in
+// the presence of large return values that consume a register due to the need
+// to pass a pointer.
+
+fn Large f_scalar_stack_3(uint a, int128 b, float128 c, char[<32>] d,
+                              char e, ichar f, char g) {
+  return {a, e, f, g};
+}
+
+// Ensure that ABI lowering happens as expected for vararg calls.
+// Specifically, ensure that signext is emitted for varargs that will be
+// passed in registers but not on the stack. Ensure this takes into account
+// the use of "aligned" register pairs for varargs with 2*xlen alignment.
+
+fn int f_va_callee(int, ...);
+
+// CHECK-LABEL: define{{.*}} void @f_va_caller()
+fn void f_va_caller() {
+	float128 fq;
+  f_va_callee(1, 2, 3i64, 4.0f, 5.0, Tiny{6, 7, 8, 9},
+              Small{10, null}, Small_aligned{11},
+              Large{12, 13, 14, 15});
+  f_va_callee(1, 2, 3, 4, fq, 6, 7, 8, 9);
+  f_va_callee(1, 2, 3, 4, Small_aligned{5}, 6, 7, 8, 9);
+  f_va_callee(1, 2, 3, 4, Small{5,null}, 6, 7, 8, 9);
+  f_va_callee(1, 2, 3, 4, 5, fq, 7, 8, 9);
+  f_va_callee(1, 2, 3, 4, 5, Small_aligned{6}, 7, 8, 9);
+  f_va_callee(1, 2, 3, 4, 5, Small{6, null}, 7, 8, 9);
+  f_va_callee(1, 2, 3, 4, 5, 6, fq, 8, 9);
+  f_va_callee(1, 2, 3, 4, 5, 6, Small_aligned{7}, 8, 9);
+  f_va_callee(1, 2, 3, 4, 5, 6, Small{7, null}, 8, 9);
+}
+
+
+/* #expect: test.ll
+
+define void @test_f_void()
+define zeroext i8 @test_f_scalar_0(i8 zeroext %0)
+define signext i32 @test_f_scalar_1(i8 signext %0)
+define zeroext i8 @test_f_scalar_2(i8 zeroext %0)
+define signext i32 @test_f_scalar_3(i32 signext %0)
+define i64 @test_f_scalar_4(i64 %0)
+define float @test_f_fp_scalar_1(float %0)
+define double @test_f_fp_scalar_2(double %0)
+define fp128 @test_f_fp_scalar_3(fp128 %0)
+define void @test_f_agg_tiny(i64 %0)
+define i64 @test_f_agg_tiny_ret()
+define void @test_f_vec_tiny_v4i16(i64 %0)
+define i64 @test_f_vec_tiny_v4i16_ret()
+define void @test_f_vec_tiny_v1i64(i64 %0)
+define i64 @test_f_vec_tiny_v1i64_ret()
+define void @test_f_agg_small([2 x i64]
+define [2 x i64] @test_f_agg_small_ret()
+define void @test_f_vec_small_v8i16(i128 %0)
+define i128 @test_f_vec_small_v8i16_ret()
+define void @test_f_vec_small_v1i128(i128 %0)
+define i128 @test_f_vec_small_v1i128_ret()
+define void @test_f_agg_small_aligned(i128 %0)
+define i128 @test_f_agg_small_aligned_ret(i128 %0)
+define void @test_f_agg_large(ptr align 8 %0)
+define void @test_f_agg_large_ret(ptr noalias sret(%Large) align 8 %0, i32 signext %1, i8 signext %2)
+define void @test_f_vec_large_v32i8(ptr align 32 %0)
+define void @test_f_vec_large_v32i8_ret(ptr noalias sret(<32 x i8>) align 32 %0)
+define signext i32 @test_f_scalar_stack_1(i64 %0, [2 x i64] %1, i128 %2, ptr align 8 %3, i8 zeroext %4, i8 signext %5, i8 %6, i8 %7)
+define signext i32 @test_f_scalar_stack_2(i32 signext %0, i128 %1, i64 %2, fp128 %3, ptr align 32 %4, i8 zeroext %5, i8 %6, i8 %7) #0 {
+define void @test_f_scalar_stack_3(ptr noalias sret(%Large) align 8 %0, i32 signext %1, i128 %2, fp128 %3, ptr align 32 %4, i8 zeroext %5, i8 %6, i8 %7)
+declare signext i32 @test_f_va_callee(i32 signext, ...) #0
+call void @llvm.memcpy.p0.p0.i32(ptr align 8 %indirectarg, ptr align 8 %literal3, i32 32, i1 false)
+call i32 (i32, ...) @test_f_va_callee(i32 signext 1, i32 signext 2, i64 3, double 4.000000e+00, double 5.000000e+00, i64 %4, [2 x i64] %7, i128 %9, ptr align 8 %indirectarg)
+call i32 (i32, ...) @test_f_va_callee(i32 signext 1, i32 signext 2, i32 signext 3, i32 signext 4, fp128 %15, i32 signext 6, i32 signext 7, i32 8, i32 9)
+call i32 (i32, ...) @test_f_va_callee(i32 signext 1, i32 signext 2, i32 signext 3, i32 signext 4, i128 %18, i32 signext 6, i32 signext 7, i32 8, i32 9)
+call i32 (i32, ...) @test_f_va_callee(i32 signext 1, i32 signext 2, i32 signext 3, i32 signext 4, [2 x i64] %22, i32 signext 6, i32 signext 7, i32 8, i32 9)
+call i32 (i32, ...) @test_f_va_callee(i32 signext 1, i32 signext 2, i32 signext 3, i32 signext 4, i32 signext 5, fp128 %24, i32 7, i32 8, i32 9)
+call i32 (i32, ...) @test_f_va_callee(i32 signext 1, i32 signext 2, i32 signext 3, i32 signext 4, i32 signext 5, i128 %27, i32 7, i32 8, i32 9)
+call i32 (i32, ...) @test_f_va_callee(i32 signext 1, i32 signext 2, i32 signext 3, i32 signext 4, i32 signext 5, [2 x i64] %31, i32 signext 7, i32 8, i32 9)
+call i32 (i32, ...) @test_f_va_callee(i32 signext 1, i32 signext 2, i32 signext 3, i32 signext 4, i32 signext 5, i32 signext 6, fp128 %33, i32 8, i32 9)
+call i32 (i32, ...) @test_f_va_callee(i32 signext 1, i32 signext 2, i32 signext 3, i32 signext 4, i32 signext 5, i32 signext 6, i128 %36, i32 8, i32 9)
+call i32 (i32, ...) @test_f_va_callee(i32 signext 1, i32 signext 2, i32 signext 3, i32 signext 4, i32 signext 5, i32 signext 6, [2 x i64] %40, i32 8, i32 9)
diff --git a/test/test_suite/abi/riscv64-lp64d-abi.c3t b/test/test_suite/abi/riscv64-lp64d-abi.c3t
new file mode 100644
index 000000000..b61b64508
--- /dev/null
+++ b/test/test_suite/abi/riscv64-lp64d-abi.c3t
@@ -0,0 +1,494 @@
+// #target: linux-riscv64
+// #opt: --riscvfloat=double
+module test;
+
+// Verify that the tracking of used GPRs and FPRs works correctly by checking
+// that small integers are sign/zero extended when passed in registers.
+
+// Doubles are passed in FPRs, so argument 'i' will be passed zero-extended
+// because it will be passed in a GPR.
+
+fn void f_fpr_tracking(double a, double b, double c, double d, double e, double f,
+                    double g, double h, char i) {}
+
+// Check that fp, fp+fp, and int+fp structs are lowered correctly. These will
+// be passed in FPR, FPR+FPR, or GPR+FPR regs if sufficient registers are
+// available the widths are <= XLEN and FLEN, and should be expanded to
+// separate arguments in IR. They are passed by the same rules for returns,
+// but will be lowered to simple two-element structs if necessary (as LLVM IR
+// functions cannot return multiple values).
+
+// A struct containing just one floating-point real is passed as though it
+// were a standalone floating-point real.
+
+struct Double_s { double f; }
+
+fn void f_double_s_arg(Double_s a) {}
+
+fn Double_s f_ret_double_s() {
+  return {1.0};
+}
+
+// A struct containing a double and any number of zero-width bitfields is
+// passed as though it were a standalone floating-point real.
+
+// Check that structs containing two floating point values (FLEN <= width) are
+// expanded provided sufficient FPRs are available.
+
+struct Double_double_s { double f; double g; }
+struct Double_float_s { double f; float g; }
+
+fn void f_double_double_s_arg(Double_double_s a) {}
+
+fn Double_double_s f_ret_double_double_s() {
+  return {1.0, 2.0};
+}
+
+fn void f_double_float_s_arg(Double_float_s a) {}
+fn Double_float_s f_ret_double_float_s() {
+  return {1.0, 2.0};
+}
+
+fn void f_double_double_s_arg_insufficient_fprs(float a, double b, double c, double d,
+    double e, double f, double g, Double_double_s h) {}
+
+// Check that structs containing int+double values are expanded, provided
+// sufficient FPRs and GPRs are available. The integer components are neither
+// sign or zero-extended.
+
+struct Double_int8_s { double f; ichar i; }
+struct Double_uint8_s { double f; char i; }
+struct Double_int32_s { double f; int i; }
+struct Double_int64_s { double f; long i; }
+
+fn void f_double_int8_s_arg(Double_int8_s a) {}
+
+fn Double_int8_s f_ret_double_int8_s() {
+  return {1.0, 2};
+}
+
+fn void f_double_uint8_s_arg(Double_uint8_s a) {}
+
+fn Double_uint8_s f_ret_double_uint8_s() {
+  return {1.0, 2};
+}
+
+fn void f_double_int32_s_arg(Double_int32_s a) {}
+
+fn Double_int32_s f_ret_double_int32_s() {
+  return {1.0, 2};
+}
+
+fn void f_double_int64_s_arg(Double_int64_s a) {}
+
+fn Double_int64_s f_ret_double_int64_s() {
+  return {1.0, 2};
+}
+
+
+fn void f_double_int8_s_arg_insufficient_gprs(int a, int b, int c, int d, int e,
+                                          int f, int g, int h, Double_int8_s i) {}
+
+fn void f_struct_double_int8_insufficient_fprs(float a, double b, double c, double d,
+                                           double e, double f, double g, double h, Double_int8_s i) {}
+
+
+// Test single or two-element structs that need flattening. e.g. those
+// containing nested structs, doubles in small arrays, zero-length structs etc.
+
+struct Doublearr1_s { double[1] a; }
+
+fn void f_doublearr1_s_arg(Doublearr1_s a) {}
+
+fn Doublearr1_s f_ret_doublearr1_s() {
+  return {{1.0}};
+}
+
+struct Doublearr2_s { double[2] a; }
+
+fn void f_doublearr2_s_arg(Doublearr2_s a) {}
+
+fn Doublearr2_s f_ret_doublearr2_s() {
+  return {{1.0, 2.0}};
+}
+
+struct Inner { double[1] f; }
+struct Doublearr2_tricky1_s { Inner[2] g; }
+
+fn void f_doublearr2_tricky1_s_arg(Doublearr2_tricky1_s a) {}
+
+fn Doublearr2_tricky1_s f_ret_doublearr2_tricky1_s() {
+  return {{{{1.0}}, {{2.0}}}};
+}
+
+// Test structs that should be passed according to the normal integer calling
+// convention.
+
+struct Int_double_int_s { int a; double b; int c; }
+
+fn void f_int_double_int_s_arg(Int_double_int_s a) {}
+
+fn Int_double_int_s f_ret_int_double_int_s() {
+  return {1, 2.0, 3};
+}
+
+struct Char_char_double_s { char a; char b; double c; }
+
+fn void f_char_char_double_s_arg(Char_char_double_s a) {}
+
+fn Char_char_double_s f_ret_char_char_double_s() {
+  return {1, 2, 3.0};
+}
+
+// Unions are always passed according to the integer calling convention, even
+// if they can only contain a double.
+
+union Double_u { double a; }
+
+fn void f_double_u_arg(Double_u a) {}
+
+fn Double_u f_ret_double_u() {
+  return {1.0};
+}
+
+/* #expect: test.ll
+
+define void @test_f_fpr_tracking(double %0, double %1, double %2, double %3, double %4, double %5, double %6, double %7, i8 zeroext %8) #0 {
+entry:
+  ret void
+}
+
+
+define void @test_f_double_s_arg(double %0) #0 {
+entry:
+  %a = alloca %Double_s, align 8
+  store double %0, ptr %a, align 8
+  ret void
+}
+
+
+define double @test_f_ret_double_s() #0 {
+entry:
+  %literal = alloca %Double_s, align 8
+  %0 = getelementptr inbounds %Double_s, ptr %literal, i32 0, i32 0
+  store double 1.000000e+00, ptr %0, align 8
+  %1 = load double, ptr %literal, align 8
+  ret double %1
+}
+
+
+define void @test_f_double_double_s_arg(double %0, double %1) #0 {
+entry:
+  %a = alloca %Double_double_s, align 8
+  store double %0, ptr %a, align 8
+  %ptroffset = getelementptr inbounds double, ptr %a, i64 1
+  store double %1, ptr %a, align 8
+  ret void
+}
+
+
+define { double, double } @test_f_ret_double_double_s() #0 {
+entry:
+  %literal = alloca %Double_double_s, align 8
+  %0 = getelementptr inbounds %Double_double_s, ptr %literal, i32 0, i32 0
+  store double 1.000000e+00, ptr %0, align 8
+  %1 = getelementptr inbounds %Double_double_s, ptr %literal, i32 0, i32 1
+  store double 2.000000e+00, ptr %1, align 8
+  %2 = load double, ptr %literal, align 8
+  %ptroffset = getelementptr inbounds double, ptr %literal, i64 1
+  %3 = load double, ptr %literal, align 8
+  %4 = insertvalue { double, double } undef, double %2, 0
+  %5 = insertvalue { double, double } %4, double %3, 1
+  ret { double, double } %5
+}
+
+
+define void @test_f_double_float_s_arg(double %0, float %1) #0 {
+entry:
+  %a = alloca %Double_float_s, align 8
+  store double %0, ptr %a, align 8
+  %ptroffset = getelementptr inbounds float, ptr %a, i64 2
+  store float %1, ptr %a, align 8
+  ret void
+}
+
+
+define { double, float } @test_f_ret_double_float_s() #0 {
+entry:
+  %literal = alloca %Double_float_s, align 8
+  %0 = getelementptr inbounds %Double_float_s, ptr %literal, i32 0, i32 0
+  store double 1.000000e+00, ptr %0, align 8
+  %1 = getelementptr inbounds %Double_float_s, ptr %literal, i32 0, i32 1
+  store float 2.000000e+00, ptr %1, align 8
+  %2 = load double, ptr %literal, align 8
+  %ptroffset = getelementptr inbounds float, ptr %literal, i64 2
+  %3 = load float, ptr %literal, align 8
+  %4 = insertvalue { double, float } undef, double %2, 0
+  %5 = insertvalue { double, float } %4, float %3, 1
+  ret { double, float } %5
+}
+
+
+define void @test_f_double_double_s_arg_insufficient_fprs(float %0, double %1, double %2, double %3, double %4, double %5, double %6, [2 x i64] %7) #0 {
+entry:
+  %h = alloca %Double_double_s, align 8
+  store [2 x i64] %7, ptr %h, align 8
+  ret void
+}
+
+
+define void @test_f_double_int8_s_arg(double %0, i8 %1) #0 {
+entry:
+  %a = alloca %Double_int8_s, align 8
+  store double %0, ptr %a, align 8
+  %ptroffset = getelementptr inbounds i8, ptr %a, i64 8
+  store i8 %1, ptr %a, align 8
+  ret void
+}
+
+
+define { double, i8 } @test_f_ret_double_int8_s() #0 {
+entry:
+  %literal = alloca %Double_int8_s, align 8
+  %0 = getelementptr inbounds %Double_int8_s, ptr %literal, i32 0, i32 0
+  store double 1.000000e+00, ptr %0, align 8
+  %1 = getelementptr inbounds %Double_int8_s, ptr %literal, i32 0, i32 1
+  store i8 2, ptr %1, align 8
+  %2 = load double, ptr %literal, align 8
+  %ptroffset = getelementptr inbounds i8, ptr %literal, i64 8
+  %3 = load i8, ptr %literal, align 8
+  %4 = insertvalue { double, i8 } undef, double %2, 0
+  %5 = insertvalue { double, i8 } %4, i8 %3, 1
+  ret { double, i8 } %5
+}
+
+
+define void @test_f_double_uint8_s_arg(double %0, i8 %1) #0 {
+entry:
+  %a = alloca %Double_uint8_s, align 8
+  store double %0, ptr %a, align 8
+  %ptroffset = getelementptr inbounds i8, ptr %a, i64 8
+  store i8 %1, ptr %a, align 8
+  ret void
+}
+
+
+define { double, i8 } @test_f_ret_double_uint8_s() #0 {
+entry:
+  %literal = alloca %Double_uint8_s, align 8
+  %0 = getelementptr inbounds %Double_uint8_s, ptr %literal, i32 0, i32 0
+  store double 1.000000e+00, ptr %0, align 8
+  %1 = getelementptr inbounds %Double_uint8_s, ptr %literal, i32 0, i32 1
+  store i8 2, ptr %1, align 8
+  %2 = load double, ptr %literal, align 8
+  %ptroffset = getelementptr inbounds i8, ptr %literal, i64 8
+  %3 = load i8, ptr %literal, align 8
+  %4 = insertvalue { double, i8 } undef, double %2, 0
+  %5 = insertvalue { double, i8 } %4, i8 %3, 1
+  ret { double, i8 } %5
+}
+
+
+define void @test_f_double_int32_s_arg(double %0, i32 %1) #0 {
+entry:
+  %a = alloca %Double_int32_s, align 8
+  store double %0, ptr %a, align 8
+  %ptroffset = getelementptr inbounds i32, ptr %a, i64 2
+  store i32 %1, ptr %a, align 8
+  ret void
+}
+
+
+define { double, i32 } @test_f_ret_double_int32_s() #0 {
+entry:
+  %literal = alloca %Double_int32_s, align 8
+  %0 = getelementptr inbounds %Double_int32_s, ptr %literal, i32 0, i32 0
+  store double 1.000000e+00, ptr %0, align 8
+  %1 = getelementptr inbounds %Double_int32_s, ptr %literal, i32 0, i32 1
+  store i32 2, ptr %1, align 8
+  %2 = load double, ptr %literal, align 8
+  %ptroffset = getelementptr inbounds i32, ptr %literal, i64 2
+  %3 = load i32, ptr %literal, align 8
+  %4 = insertvalue { double, i32 } undef, double %2, 0
+  %5 = insertvalue { double, i32 } %4, i32 %3, 1
+  ret { double, i32 } %5
+}
+
+
+define void @test_f_double_int64_s_arg(double %0, i64 %1) #0 {
+entry:
+  %a = alloca %Double_int64_s, align 8
+  store double %0, ptr %a, align 8
+  %ptroffset = getelementptr inbounds i64, ptr %a, i64 1
+  store i64 %1, ptr %a, align 8
+  ret void
+}
+
+
+define { double, i64 } @test_f_ret_double_int64_s() #0 {
+entry:
+  %literal = alloca %Double_int64_s, align 8
+  %0 = getelementptr inbounds %Double_int64_s, ptr %literal, i32 0, i32 0
+  store double 1.000000e+00, ptr %0, align 8
+  %1 = getelementptr inbounds %Double_int64_s, ptr %literal, i32 0, i32 1
+  store i64 2, ptr %1, align 8
+  %2 = load double, ptr %literal, align 8
+  %ptroffset = getelementptr inbounds i64, ptr %literal, i64 1
+  %3 = load i64, ptr %literal, align 8
+  %4 = insertvalue { double, i64 } undef, double %2, 0
+  %5 = insertvalue { double, i64 } %4, i64 %3, 1
+  ret { double, i64 } %5
+}
+
+
+define void @test_f_double_int8_s_arg_insufficient_gprs(i32 signext %0, i32 signext %1, i32 signext %2, i32 signext %3, i32 signext %4, i32 signext %5, i32 signext %6, i32 signext %7, [2 x i64] %8) #0 {
+entry:
+  %i = alloca %Double_int8_s, align 8
+  store [2 x i64] %8, ptr %i, align 8
+  ret void
+}
+
+
+define void @test_f_struct_double_int8_insufficient_fprs(float %0, double %1, double %2, double %3, double %4, double %5, double %6, double %7, [2 x i64] %8) #0 {
+entry:
+  %i = alloca %Double_int8_s, align 8
+  store [2 x i64] %8, ptr %i, align 8
+  ret void
+}
+
+
+define void @test_f_doublearr1_s_arg(double %0) #0 {
+entry:
+  %a = alloca %Doublearr1_s, align 8
+  store double %0, ptr %a, align 8
+  ret void
+}
+
+
+define double @test_f_ret_doublearr1_s() #0 {
+entry:
+  %literal = alloca %Doublearr1_s, align 8
+  %0 = getelementptr inbounds %Doublearr1_s, ptr %literal, i32 0, i32 0
+  %1 = getelementptr inbounds [1 x double], ptr %0, i64 0, i64 0
+  store double 1.000000e+00, ptr %1, align 8
+  %2 = load double, ptr %literal, align 8
+  ret double %2
+}
+
+
+define void @test_f_doublearr2_s_arg(double %0, double %1) #0 {
+entry:
+  %a = alloca %Doublearr2_s, align 8
+  store double %0, ptr %a, align 8
+  %ptroffset = getelementptr inbounds double, ptr %a, i64 1
+  store double %1, ptr %a, align 8
+  ret void
+}
+
+
+define { double, double } @test_f_ret_doublearr2_s() #0 {
+entry:
+  %literal = alloca %Doublearr2_s, align 8
+  %0 = getelementptr inbounds %Doublearr2_s, ptr %literal, i32 0, i32 0
+  %1 = getelementptr inbounds [2 x double], ptr %0, i64 0, i64 0
+  store double 1.000000e+00, ptr %1, align 8
+  %2 = getelementptr inbounds [2 x double], ptr %0, i64 0, i64 1
+  store double 2.000000e+00, ptr %2, align 8
+  %3 = load double, ptr %literal, align 8
+  %ptroffset = getelementptr inbounds double, ptr %literal, i64 1
+  %4 = load double, ptr %literal, align 8
+  %5 = insertvalue { double, double } undef, double %3, 0
+  %6 = insertvalue { double, double } %5, double %4, 1
+  ret { double, double } %6
+}
+
+
+define void @test_f_doublearr2_tricky1_s_arg(double %0, double %1) #0 {
+entry:
+  %a = alloca %Doublearr2_tricky1_s, align 8
+  store double %0, ptr %a, align 8
+  %ptroffset = getelementptr inbounds double, ptr %a, i64 1
+  store double %1, ptr %a, align 8
+  ret void
+}
+
+
+define { double, double } @test_f_ret_doublearr2_tricky1_s() #0 {
+entry:
+  %literal = alloca %Doublearr2_tricky1_s, align 8
+  %0 = getelementptr inbounds %Doublearr2_tricky1_s, ptr %literal, i32 0, i32 0
+  %1 = getelementptr inbounds [2 x %Inner], ptr %0, i64 0, i64 0
+  %2 = getelementptr inbounds %Inner, ptr %1, i32 0, i32 0
+  %3 = getelementptr inbounds [1 x double], ptr %2, i64 0, i64 0
+  store double 1.000000e+00, ptr %3, align 8
+  %4 = getelementptr inbounds [2 x %Inner], ptr %0, i64 0, i64 1
+  %5 = getelementptr inbounds %Inner, ptr %4, i32 0, i32 0
+  %6 = getelementptr inbounds [1 x double], ptr %5, i64 0, i64 0
+  store double 2.000000e+00, ptr %6, align 8
+  %7 = load double, ptr %literal, align 8
+  %ptroffset = getelementptr inbounds double, ptr %literal, i64 1
+  %8 = load double, ptr %literal, align 8
+  %9 = insertvalue { double, double } undef, double %7, 0
+  %10 = insertvalue { double, double } %9, double %8, 1
+  ret { double, double } %10
+}
+
+
+define void @test_f_int_double_int_s_arg(ptr align 8 %0) #0 {
+entry:
+  ret void
+}
+
+
+define void @test_f_ret_int_double_int_s(ptr noalias sret(%Int_double_int_s) align 8 %0) #0 {
+entry:
+  %literal = alloca %Int_double_int_s, align 8
+  %1 = getelementptr inbounds %Int_double_int_s, ptr %literal, i32 0, i32 0
+  store i32 1, ptr %1, align 8
+  %2 = getelementptr inbounds %Int_double_int_s, ptr %literal, i32 0, i32 1
+  store double 2.000000e+00, ptr %2, align 8
+  %3 = getelementptr inbounds %Int_double_int_s, ptr %literal, i32 0, i32 2
+  store i32 3, ptr %3, align 8
+  call void @llvm.memcpy.p0.p0.i32(ptr align 8 %0, ptr align 8 %literal, i32 24, i1 false)
+  ret void
+}
+
+
+define void @test_f_char_char_double_s_arg([2 x i64] %0) #0 {
+entry:
+  %a = alloca %Char_char_double_s, align 8
+  store [2 x i64] %0, ptr %a, align 8
+  ret void
+}
+
+
+define [2 x i64] @test_f_ret_char_char_double_s() #0 {
+entry:
+  %literal = alloca %Char_char_double_s, align 8
+  %0 = getelementptr inbounds %Char_char_double_s, ptr %literal, i32 0, i32 0
+  store i8 1, ptr %0, align 8
+  %1 = getelementptr inbounds %Char_char_double_s, ptr %literal, i32 0, i32 1
+  store i8 2, ptr %1, align 1
+  %2 = getelementptr inbounds %Char_char_double_s, ptr %literal, i32 0, i32 2
+  store double 3.000000e+00, ptr %2, align 8
+  %3 = load [2 x i64], ptr %literal, align 8
+  ret [2 x i64] %3
+}
+
+
+define void @test_f_double_u_arg(i64 %0) #0 {
+entry:
+  %a = alloca %Double_u, align 8
+  store i64 %0, ptr %a, align 8
+  ret void
+}
+
+
+define i64 @test_f_ret_double_u() #0 {
+entry:
+  %literal = alloca %Double_u, align 8
+  store double 1.000000e+00, ptr %literal, align 8
+  %0 = load i64, ptr %literal, align 8
+  ret i64 %0
+}
+
diff --git a/test/test_suite/abi/riscv64-lp64f-lp64d-abi-1.c3t b/test/test_suite/abi/riscv64-lp64f-lp64d-abi-1.c3t
new file mode 100644
index 000000000..bf0c793fe
--- /dev/null
+++ b/test/test_suite/abi/riscv64-lp64f-lp64d-abi-1.c3t
@@ -0,0 +1,184 @@
+// #target: linux-riscv64
+// #opt: --riscvfloat=float
+module test;
+
+// Verify that the tracking of used GPRs and FPRs works correctly by checking
+// that small integers are sign/zero extended when passed in registers.
+
+// Floats are passed in FPRs, so argument 'i' will be passed zero-extended
+// because it will be passed in a GPR.
+
+fn void f_fpr_tracking(float a, float b, float c, float d, float e, float f,
+                    float g, float h, char i) {}
+
+// Check that fp, fp+fp, and int+fp structs are lowered correctly. These will
+// be passed in FPR, FPR+FPR, or GPR+FPR regs if sufficient registers are
+// available the widths are <= XLEN and FLEN, and should be expanded to
+// separate arguments in IR. They are passed by the same rules for returns,
+// but will be lowered to simple two-element structs if necessary (as LLVM IR
+// functions cannot return multiple values).
+
+// A struct containing just one floating-point real is passed as though it
+// were a standalone floating-point real.
+
+struct Float_s { float f; }
+
+fn void f_float_s_arg(Float_s a) {}
+fn Float_s f_ret_float_s() {
+  return {1.0};
+}
+
+
+// Check that structs containing two float values (FLEN <= width) are expanded
+// provided sufficient FPRs are available.
+
+struct Float_float_s { float f; float g; }
+
+fn void f_float_float_s_arg(Float_float_s a) {}
+
+fn Float_float_s f_ret_float_float_s() {
+  return {1.0, 2.0};
+}
+
+fn void f_float_float_s_arg_insufficient_fprs(float a, float b, float c, float d,
+    float e, float f, float g, Float_float_s h) {}
+
+// Check that structs containing int+float values are expanded, provided
+// sufficient FPRs and GPRs are available. The integer components are neither
+// sign or zero-extended.
+
+struct Float_int8_s { float f; ichar i; }
+struct Float_uint8_s { float f; char i; }
+struct Float_int32_s { float f; int i; }
+struct Float_int64_s { float f; long i; }
+
+fn void f_float_int8_s_arg(Float_int8_s a) {}
+fn Float_int8_s f_ret_float_int8_s() {
+  return {1.0, 2};
+}
+
+fn void f_float_uint8_s_arg(Float_uint8_s a) {}
+fn Float_uint8_s f_ret_float_uint8_s() {
+  return {1.0, 2};
+}
+
+fn void f_float_int32_s_arg(Float_int32_s a) {}
+fn Float_int32_s f_ret_float_int32_s() {
+  return {1.0, 2};
+}
+
+fn void f_float_int64_s_arg(Float_int64_s a) {}
+fn Float_int64_s f_ret_float_int64_s() {
+  return {1.0, 2};
+}
+
+
+fn void f_float_int8_s_arg_insufficient_gprs(int a, int b, int c, int d, int e,
+                                          int f, int g, int h, Float_int8_s i) {}
+
+fn void f_struct_float_int8_insufficient_fprs(float a, float b, float c, float d,
+                                           float e, float f, float g, float h, Float_int8_s i) {}
+
+// Test single or two-element structs that need flattening. e.g. those
+// containing nested structs, floats in small arrays, zero-length structs etc.
+
+struct Floatarr1_s { float[1] a; }
+
+fn void f_floatarr1_s_arg(Floatarr1_s a) {}
+fn Floatarr1_s f_ret_floatarr1_s() {
+  return {{1.0}};
+}
+
+struct Floatarr2_s { float[2] a; }
+fn void f_floatarr2_s_arg(Floatarr2_s a) {}
+
+fn Floatarr2_s f_ret_floatarr2_s() {
+  return {{1.0, 2.0}};
+}
+
+struct Inner { float[1] f; }
+struct Floatarr2_tricky1_s { Inner[2] g; }
+
+fn void f_floatarr2_tricky1_s_arg(Floatarr2_tricky1_s a) {}
+fn Floatarr2_tricky1_s f_ret_floatarr2_tricky1_s() {
+  return {{{{1.0}}, {{2.0}}}};
+}
+
+
+// Test structs that should be passed according to the normal integer calling
+// convention.
+
+struct Int_float_int_s { int a; float b; int c; }
+
+fn void f_int_float_int_s_arg(Int_float_int_s a) {}
+
+fn Int_float_int_s f_ret_int_float_int_s() {
+  return {1, 2.0, 3};
+}
+
+struct Char_char_float_s { char a; char b; float c; }
+
+fn void f_char_char_float_s_arg(Char_char_float_s a) {}
+fn Char_char_float_s f_ret_char_char_float_s() {
+  return {1, 2, 3.0};
+}
+
+// Unions are always passed according to the integer calling convention, even
+// if they can only contain a float.
+
+union Float_u { float a; }
+
+fn void f_float_u_arg(Float_u a) {}
+fn Float_u f_ret_float_u() {
+  return {1.0};
+}
+
+/* #expect: test.ll
+
+define void @test_f_fpr_tracking(float %0, float %1, float %2, float %3, float %4, float %5, float %6, float %7, i8 zeroext %8) #0 {
+
+define void @test_f_float_s_arg(float %0) #0 {
+define float @test_f_ret_float_s() #0 {
+
+define void @test_f_float_float_s_arg(float %0, float %1) #0 {
+define { float, float } @test_f_ret_float_float_s() #0 {
+define void @test_f_float_float_s_arg_insufficient_fprs(float %0, float %1, float %2, float %3, float %4, float %5, float %6, i64 %7) #0 {
+define void @test_f_float_int8_s_arg(float %0, i8 %1) #0 {
+define { float, i8 } @test_f_ret_float_int8_s() #0 {
+
+define void @test_f_float_uint8_s_arg(float %0, i8 %1) #0 {
+
+define { float, i8 } @test_f_ret_float_uint8_s() #0 {
+
+define void @test_f_float_int32_s_arg(float %0, i32 %1) #0 {
+
+define { float, i32 } @test_f_ret_float_int32_s() #0 {
+define void @test_f_float_int64_s_arg(float %0, i64 %1) #0 {
+
+define { float, i64 } @test_f_ret_float_int64_s() #0 {
+
+define void @test_f_float_int8_s_arg_insufficient_gprs(i32 signext %0, i32 signext %1, i32 signext %2, i32 signext %3, i32 signext %4, i32 signext %5, i32 signext %6, i32 signext %7, i64 %8) #0 {
+
+define void @test_f_struct_float_int8_insufficient_fprs(float %0, float %1, float %2, float %3, float %4, float %5, float %6, float %7, i64 %8) #0 {
+
+define void @test_f_floatarr1_s_arg(float %0) #0 {
+
+define float @test_f_ret_floatarr1_s() #0 {
+
+define void @test_f_floatarr2_s_arg(float %0, float %1) #0 {
+
+define { float, float } @test_f_ret_floatarr2_s() #0 {
+
+define void @test_f_floatarr2_tricky1_s_arg(float %0, float %1) #0 {
+
+define { float, float } @test_f_ret_floatarr2_tricky1_s() #0 {
+
+define void @test_f_int_float_int_s_arg([2 x i64] %0) #0 {
+
+define [2 x i64] @test_f_ret_int_float_int_s() #0 {
+
+define void @test_f_char_char_float_s_arg(i64 %0) #0 {
+define i64 @test_f_ret_char_char_float_s() #0 {
+
+define void @test_f_float_u_arg(i64 %0) #0 {
+define i64 @test_f_ret_float_u() #0 {
diff --git a/test/test_suite/abi/riscv64-lp64f-lp64d-abi-2.c3t b/test/test_suite/abi/riscv64-lp64f-lp64d-abi-2.c3t
new file mode 100644
index 000000000..c32a0e727
--- /dev/null
+++ b/test/test_suite/abi/riscv64-lp64f-lp64d-abi-2.c3t
@@ -0,0 +1,429 @@
+// #target: linux-riscv64
+// #opt: --riscvfloat=double
+module test;
+
+// Verify that the tracking of used GPRs and FPRs works correctly by checking
+// that small integers are sign/zero extended when passed in registers.
+
+// Floats are passed in FPRs, so argument 'i' will be passed zero-extended
+// because it will be passed in a GPR.
+
+fn void f_fpr_tracking(float a, float b, float c, float d, float e, float f,
+                    float g, float h, char i) {}
+
+// Check that fp, fp+fp, and int+fp structs are lowered correctly. These will
+// be passed in FPR, FPR+FPR, or GPR+FPR regs if sufficient registers are
+// available the widths are <= XLEN and FLEN, and should be expanded to
+// separate arguments in IR. They are passed by the same rules for returns,
+// but will be lowered to simple two-element structs if necessary (as LLVM IR
+// functions cannot return multiple values).
+
+// A struct containing just one floating-point real is passed as though it
+// were a standalone floating-point real.
+
+struct Float_s { float f; }
+
+fn void f_float_s_arg(Float_s a) {}
+fn Float_s f_ret_float_s() {
+  return {1.0};
+}
+
+
+// Check that structs containing two float values (FLEN <= width) are expanded
+// provided sufficient FPRs are available.
+
+struct Float_float_s { float f; float g; }
+
+fn void f_float_float_s_arg(Float_float_s a) {}
+
+fn Float_float_s f_ret_float_float_s() {
+  return {1.0, 2.0};
+}
+
+fn void f_float_float_s_arg_insufficient_fprs(float a, float b, float c, float d,
+    float e, float f, float g, Float_float_s h) {}
+
+// Check that structs containing int+float values are expanded, provided
+// sufficient FPRs and GPRs are available. The integer components are neither
+// sign or zero-extended.
+
+struct Float_int8_s { float f; ichar i; }
+struct Float_uint8_s { float f; char i; }
+struct Float_int32_s { float f; int i; }
+struct Float_int64_s { float f; long i; }
+
+fn void f_float_int8_s_arg(Float_int8_s a) {}
+fn Float_int8_s f_ret_float_int8_s() {
+  return {1.0, 2};
+}
+
+fn void f_float_uint8_s_arg(Float_uint8_s a) {}
+fn Float_uint8_s f_ret_float_uint8_s() {
+  return {1.0, 2};
+}
+
+fn void f_float_int32_s_arg(Float_int32_s a) {}
+fn Float_int32_s f_ret_float_int32_s() {
+  return {1.0, 2};
+}
+
+fn void f_float_int64_s_arg(Float_int64_s a) {}
+fn Float_int64_s f_ret_float_int64_s() {
+  return {1.0, 2};
+}
+
+
+fn void f_float_int8_s_arg_insufficient_gprs(int a, int b, int c, int d, int e,
+                                          int f, int g, int h, Float_int8_s i) {}
+
+fn void f_struct_float_int8_insufficient_fprs(float a, float b, float c, float d,
+                                           float e, float f, float g, float h, Float_int8_s i) {}
+
+// Test single or two-element structs that need flattening. e.g. those
+// containing nested structs, floats in small arrays, zero-length structs etc.
+
+struct Floatarr1_s { float[1] a; }
+
+fn void f_floatarr1_s_arg(Floatarr1_s a) {}
+fn Floatarr1_s f_ret_floatarr1_s() {
+  return {{1.0}};
+}
+
+struct Floatarr2_s { float[2] a; }
+fn void f_floatarr2_s_arg(Floatarr2_s a) {}
+
+fn Floatarr2_s f_ret_floatarr2_s() {
+  return {{1.0, 2.0}};
+}
+
+struct Inner { float[1] f; }
+struct Floatarr2_tricky1_s { Inner[2] g; }
+
+fn void f_floatarr2_tricky1_s_arg(Floatarr2_tricky1_s a) {}
+fn Floatarr2_tricky1_s f_ret_floatarr2_tricky1_s() {
+  return {{{{1.0}}, {{2.0}}}};
+}
+
+
+// Test structs that should be passed according to the normal integer calling
+// convention.
+
+struct Int_float_int_s { int a; float b; int c; }
+
+fn void f_int_float_int_s_arg(Int_float_int_s a) {}
+
+fn Int_float_int_s f_ret_int_float_int_s() {
+  return {1, 2.0, 3};
+}
+
+struct Char_char_float_s { char a; char b; float c; }
+
+fn void f_char_char_float_s_arg(Char_char_float_s a) {}
+fn Char_char_float_s f_ret_char_char_float_s() {
+  return {1, 2, 3.0};
+}
+
+// Unions are always passed according to the integer calling convention, even
+// if they can only contain a float.
+
+union Float_u { float a; }
+
+fn void f_float_u_arg(Float_u a) {}
+fn Float_u f_ret_float_u() {
+  return {1.0};
+}
+
+/* #expect: test.ll
+
+define void @test_f_fpr_tracking(float %0, float %1, float %2, float %3, float %4, float %5, float %6, float %7, i8 zeroext %8) #0 {
+
+define void @test_f_float_s_arg(float %0) #0 {
+entry:
+  %a = alloca %Float_s, align 4
+  store float %0, ptr %a, align 4
+  ret void
+}
+
+define float @test_f_ret_float_s() #0 {
+entry:
+  %literal = alloca %Float_s, align 4
+  %0 = getelementptr inbounds %Float_s, ptr %literal, i32 0, i32 0
+  store float 1.000000e+00, ptr %0, align 4
+  %1 = load float, ptr %literal, align 4
+  ret float %1
+}
+
+define void @test_f_float_float_s_arg(float %0, float %1) #0 {
+entry:
+  %a = alloca %Float_float_s, align 4
+  store float %0, ptr %a, align 4
+  %ptroffset = getelementptr inbounds float, ptr %a, i64 1
+  store float %1, ptr %a, align 4
+  ret void
+}
+
+define { float, float } @test_f_ret_float_float_s() #0 {
+entry:
+  %literal = alloca %Float_float_s, align 4
+  %0 = getelementptr inbounds %Float_float_s, ptr %literal, i32 0, i32 0
+  store float 1.000000e+00, ptr %0, align 4
+  %1 = getelementptr inbounds %Float_float_s, ptr %literal, i32 0, i32 1
+  store float 2.000000e+00, ptr %1, align 4
+  %2 = load float, ptr %literal, align 4
+  %ptroffset = getelementptr inbounds float, ptr %literal, i64 1
+  %3 = load float, ptr %literal, align 4
+  %4 = insertvalue { float, float } undef, float %2, 0
+  %5 = insertvalue { float, float } %4, float %3, 1
+  ret { float, float } %5
+}
+
+define void @test_f_float_float_s_arg_insufficient_fprs(float %0, float %1, float %2, float %3, float %4, float %5, float %6, i64 %7) #0 {
+entry:
+  %h = alloca %Float_float_s, align 4
+  store i64 %7, ptr %h, align 4
+  ret void
+}
+
+define void @test_f_float_int8_s_arg(float %0, i8 %1) #0 {
+entry:
+  %a = alloca %Float_int8_s, align 4
+  store float %0, ptr %a, align 4
+  %ptroffset = getelementptr inbounds i8, ptr %a, i64 4
+  store i8 %1, ptr %a, align 4
+  ret void
+}
+
+define { float, i8 } @test_f_ret_float_int8_s() #0 {
+entry:
+  %literal = alloca %Float_int8_s, align 4
+  %0 = getelementptr inbounds %Float_int8_s, ptr %literal, i32 0, i32 0
+  store float 1.000000e+00, ptr %0, align 4
+  %1 = getelementptr inbounds %Float_int8_s, ptr %literal, i32 0, i32 1
+  store i8 2, ptr %1, align 4
+  %2 = load float, ptr %literal, align 4
+  %ptroffset = getelementptr inbounds i8, ptr %literal, i64 4
+  %3 = load i8, ptr %literal, align 4
+  %4 = insertvalue { float, i8 } undef, float %2, 0
+  %5 = insertvalue { float, i8 } %4, i8 %3, 1
+  ret { float, i8 } %5
+}
+
+define void @test_f_float_uint8_s_arg(float %0, i8 %1) #0 {
+entry:
+  %a = alloca %Float_uint8_s, align 4
+  store float %0, ptr %a, align 4
+  %ptroffset = getelementptr inbounds i8, ptr %a, i64 4
+  store i8 %1, ptr %a, align 4
+  ret void
+}
+
+define { float, i8 } @test_f_ret_float_uint8_s() #0 {
+entry:
+  %literal = alloca %Float_uint8_s, align 4
+  %0 = getelementptr inbounds %Float_uint8_s, ptr %literal, i32 0, i32 0
+  store float 1.000000e+00, ptr %0, align 4
+  %1 = getelementptr inbounds %Float_uint8_s, ptr %literal, i32 0, i32 1
+  store i8 2, ptr %1, align 4
+  %2 = load float, ptr %literal, align 4
+  %ptroffset = getelementptr inbounds i8, ptr %literal, i64 4
+  %3 = load i8, ptr %literal, align 4
+  %4 = insertvalue { float, i8 } undef, float %2, 0
+  %5 = insertvalue { float, i8 } %4, i8 %3, 1
+  ret { float, i8 } %5
+}
+
+define void @test_f_float_int32_s_arg(float %0, i32 %1) #0 {
+entry:
+  %a = alloca %Float_int32_s, align 4
+  store float %0, ptr %a, align 4
+  %ptroffset = getelementptr inbounds i32, ptr %a, i64 1
+  store i32 %1, ptr %a, align 4
+  ret void
+}
+
+define { float, i32 } @test_f_ret_float_int32_s() #0 {
+entry:
+  %literal = alloca %Float_int32_s, align 4
+  %0 = getelementptr inbounds %Float_int32_s, ptr %literal, i32 0, i32 0
+  store float 1.000000e+00, ptr %0, align 4
+  %1 = getelementptr inbounds %Float_int32_s, ptr %literal, i32 0, i32 1
+  store i32 2, ptr %1, align 4
+  %2 = load float, ptr %literal, align 4
+  %ptroffset = getelementptr inbounds i32, ptr %literal, i64 1
+  %3 = load i32, ptr %literal, align 4
+  %4 = insertvalue { float, i32 } undef, float %2, 0
+  %5 = insertvalue { float, i32 } %4, i32 %3, 1
+  ret { float, i32 } %5
+}
+
+define void @test_f_float_int64_s_arg(float %0, i64 %1) #0 {
+entry:
+  %a = alloca %Float_int64_s, align 8
+  store float %0, ptr %a, align 8
+  %ptroffset = getelementptr inbounds i64, ptr %a, i64 1
+  store i64 %1, ptr %a, align 8
+  ret void
+}
+
+define { float, i64 } @test_f_ret_float_int64_s() #0 {
+entry:
+  %literal = alloca %Float_int64_s, align 8
+  %0 = getelementptr inbounds %Float_int64_s, ptr %literal, i32 0, i32 0
+  store float 1.000000e+00, ptr %0, align 8
+  %1 = getelementptr inbounds %Float_int64_s, ptr %literal, i32 0, i32 1
+  store i64 2, ptr %1, align 8
+  %2 = load float, ptr %literal, align 8
+  %ptroffset = getelementptr inbounds i64, ptr %literal, i64 1
+  %3 = load i64, ptr %literal, align 8
+  %4 = insertvalue { float, i64 } undef, float %2, 0
+  %5 = insertvalue { float, i64 } %4, i64 %3, 1
+  ret { float, i64 } %5
+}
+
+define void @test_f_float_int8_s_arg_insufficient_gprs(i32 signext %0, i32 signext %1, i32 signext %2, i32 signext %3, i32 signext %4, i32 signext %5, i32 signext %6, i32 signext %7, i64 %8) #0 {
+entry:
+  %i = alloca %Float_int8_s, align 4
+  store i64 %8, ptr %i, align 4
+  ret void
+}
+
+define void @test_f_struct_float_int8_insufficient_fprs(float %0, float %1, float %2, float %3, float %4, float %5, float %6, float %7, i64 %8) #0 {
+entry:
+  %i = alloca %Float_int8_s, align 4
+  store i64 %8, ptr %i, align 4
+  ret void
+}
+
+define void @test_f_floatarr1_s_arg(float %0) #0 {
+entry:
+  %a = alloca %Floatarr1_s, align 4
+  store float %0, ptr %a, align 4
+  ret void
+}
+
+define float @test_f_ret_floatarr1_s() #0 {
+entry:
+  %literal = alloca %Floatarr1_s, align 4
+  %0 = getelementptr inbounds %Floatarr1_s, ptr %literal, i32 0, i32 0
+  %1 = getelementptr inbounds [1 x float], ptr %0, i64 0, i64 0
+  store float 1.000000e+00, ptr %1, align 4
+  %2 = load float, ptr %literal, align 4
+  ret float %2
+}
+
+define void @test_f_floatarr2_s_arg(float %0, float %1) #0 {
+entry:
+  %a = alloca %Floatarr2_s, align 4
+  store float %0, ptr %a, align 4
+  %ptroffset = getelementptr inbounds float, ptr %a, i64 1
+  store float %1, ptr %a, align 4
+  ret void
+}
+
+define { float, float } @test_f_ret_floatarr2_s() #0 {
+entry:
+  %literal = alloca %Floatarr2_s, align 4
+  %0 = getelementptr inbounds %Floatarr2_s, ptr %literal, i32 0, i32 0
+  %1 = getelementptr inbounds [2 x float], ptr %0, i64 0, i64 0
+  store float 1.000000e+00, ptr %1, align 4
+  %2 = getelementptr inbounds [2 x float], ptr %0, i64 0, i64 1
+  store float 2.000000e+00, ptr %2, align 4
+  %3 = load float, ptr %literal, align 4
+  %ptroffset = getelementptr inbounds float, ptr %literal, i64 1
+  %4 = load float, ptr %literal, align 4
+  %5 = insertvalue { float, float } undef, float %3, 0
+  %6 = insertvalue { float, float } %5, float %4, 1
+  ret { float, float } %6
+}
+
+define void @test_f_floatarr2_tricky1_s_arg(float %0, float %1) #0 {
+entry:
+  %a = alloca %Floatarr2_tricky1_s, align 4
+  store float %0, ptr %a, align 4
+  %ptroffset = getelementptr inbounds float, ptr %a, i64 1
+  store float %1, ptr %a, align 4
+  ret void
+}
+
+define { float, float } @test_f_ret_floatarr2_tricky1_s() #0 {
+entry:
+  %literal = alloca %Floatarr2_tricky1_s, align 4
+  %0 = getelementptr inbounds %Floatarr2_tricky1_s, ptr %literal, i32 0, i32 0
+  %1 = getelementptr inbounds [2 x %Inner], ptr %0, i64 0, i64 0
+  %2 = getelementptr inbounds %Inner, ptr %1, i32 0, i32 0
+  %3 = getelementptr inbounds [1 x float], ptr %2, i64 0, i64 0
+  store float 1.000000e+00, ptr %3, align 4
+  %4 = getelementptr inbounds [2 x %Inner], ptr %0, i64 0, i64 1
+  %5 = getelementptr inbounds %Inner, ptr %4, i32 0, i32 0
+  %6 = getelementptr inbounds [1 x float], ptr %5, i64 0, i64 0
+  store float 2.000000e+00, ptr %6, align 4
+  %7 = load float, ptr %literal, align 4
+  %ptroffset = getelementptr inbounds float, ptr %literal, i64 1
+  %8 = load float, ptr %literal, align 4
+  %9 = insertvalue { float, float } undef, float %7, 0
+  %10 = insertvalue { float, float } %9, float %8, 1
+  ret { float, float } %10
+}
+
+define void @test_f_int_float_int_s_arg([2 x i64] %0) #0 {
+entry:
+  %a = alloca %Int_float_int_s, align 4
+  %tempcoerce = alloca [2 x i64], align 8
+  store [2 x i64] %0, ptr %tempcoerce, align 8
+  call void @llvm.memcpy.p0.p0.i32(ptr align 4 %a, ptr align 8 %tempcoerce, i32 12, i1 false)
+  ret void
+}
+
+define [2 x i64] @test_f_ret_int_float_int_s() #0 {
+entry:
+  %literal = alloca %Int_float_int_s, align 4
+  %tempcoerce = alloca [2 x i64], align 8
+  %0 = getelementptr inbounds %Int_float_int_s, ptr %literal, i32 0, i32 0
+  store i32 1, ptr %0, align 4
+  %1 = getelementptr inbounds %Int_float_int_s, ptr %literal, i32 0, i32 1
+  store float 2.000000e+00, ptr %1, align 4
+  %2 = getelementptr inbounds %Int_float_int_s, ptr %literal, i32 0, i32 2
+  store i32 3, ptr %2, align 4
+  call void @llvm.memcpy.p0.p0.i32(ptr align 8 %tempcoerce, ptr align 4 %literal, i32 12, i1 false)
+  %3 = load [2 x i64], ptr %tempcoerce, align 8
+  ret [2 x i64] %3
+}
+
+define void @test_f_char_char_float_s_arg(i64 %0) #0 {
+entry:
+  %a = alloca %Char_char_float_s, align 4
+  store i64 %0, ptr %a, align 4
+  ret void
+}
+
+define i64 @test_f_ret_char_char_float_s() #0 {
+entry:
+  %literal = alloca %Char_char_float_s, align 4
+  %0 = getelementptr inbounds %Char_char_float_s, ptr %literal, i32 0, i32 0
+  store i8 1, ptr %0, align 4
+  %1 = getelementptr inbounds %Char_char_float_s, ptr %literal, i32 0, i32 1
+  store i8 2, ptr %1, align 1
+  %2 = getelementptr inbounds %Char_char_float_s, ptr %literal, i32 0, i32 2
+  store float 3.000000e+00, ptr %2, align 4
+  %3 = load i64, ptr %literal, align 4
+  ret i64 %3
+}
+
+define void @test_f_float_u_arg(i64 %0) #0 {
+entry:
+  %a = alloca %Float_u, align 4
+  %tempcoerce = alloca i64, align 8
+  store i64 %0, ptr %tempcoerce, align 8
+  call void @llvm.memcpy.p0.p0.i32(ptr align 4 %a, ptr align 8 %tempcoerce, i32 4, i1 false)
+  ret void
+}
+
+define i64 @test_f_ret_float_u() #0 {
+entry:
+  %literal = alloca %Float_u, align 4
+  %tempcoerce = alloca i64, align 8
+  store float 1.000000e+00, ptr %literal, align 4
+  call void @llvm.memcpy.p0.p0.i32(ptr align 8 %tempcoerce, ptr align 4 %literal, i32 4, i1 false)
+  %0 = load i64, ptr %tempcoerce, align 8
+  ret i64 %0
+}
diff --git a/test/test_suite/clang/2002-04.c3t b/test/test_suite/clang/2002-04.c3t
index 14dea515b..d135417e6 100644
--- a/test/test_suite/clang/2002-04.c3t
+++ b/test/test_suite/clang/2002-04.c3t
@@ -154,7 +154,7 @@ entry:
   %9 = getelementptr inbounds %FooSt, ptr %0, i32 0, i32 4
   %10 = load i16, ptr %9, align 4
   %sisiext = sext i16 %10 to i32
-  %11 = call i32 @testE(i8 %2, i16 %4, i8 %6, i32 %8, i32 %sisiext, float 0x3FB99999A0000000)
+  %11 = call i32 @testE(i8 zeroext %2, i16 signext %4, i8 zeroext %6, i32 %8, i32 %sisiext, float 0x3FB99999A0000000)
   call void @llvm.memcpy.p0.p0.i32(ptr align 8 %indirectarg, ptr align 4 %0, i32 20, i1 false)
   %12 = call i32 @testF(ptr byval(%FooSt) align 8 %indirectarg, float 0x3FB99999A0000000)
   call void @llvm.memcpy.p0.p0.i32(ptr align 8 %indirectarg1, ptr align 4 %0, i32 20, i1 false)
diff --git a/test/test_suite/clang/2002-07.c3t b/test/test_suite/clang/2002-07.c3t
index 34e9a9020..327eb8701 100644
--- a/test/test_suite/clang/2002-07.c3t
+++ b/test/test_suite/clang/2002-07.c3t
@@ -571,7 +571,7 @@ entry:
   %add = add i32 %uisiext1, 1
   %siuitrunc2 = trunc i32 %add to i8
   %uisiext3 = zext i8 %2 to i64
-  %5 = call i8 @smallArgs2(i8 %siuitrunc, i8 %siuitrunc2, i64 %uisiext3, i8 %3, i8 %0)
+  %5 = call i8 @smallArgs2(i8 zeroext %siuitrunc, i8 zeroext %siuitrunc2, i64 %uisiext3, i8 zeroext %3, i8 zeroext %0)
   ret i8 %5
 }
 
@@ -813,7 +813,7 @@ entry:
   %i = alloca i32, align 4
   %2 = call i32 @puts(ptr @.str.10)
   %sisitrunc = trunc i32 %0 to i16
-  %3 = call i32 @foo_externFunc(i64 -1, ptr null, i16 %sisitrunc, i8 2)
+  %3 = call i32 @foo_externFunc(i64 -1, ptr null, i16 signext %sisitrunc, i8 zeroext 2)
   store i32 0, ptr %i, align 4
   br label %loop.cond
 
diff --git a/test/test_suite/errors/error_regression_2.c3t b/test/test_suite/errors/error_regression_2.c3t
index 368217384..af63c964c 100644
--- a/test/test_suite/errors/error_regression_2.c3t
+++ b/test/test_suite/errors/error_regression_2.c3t
@@ -825,7 +825,7 @@ after_assign:                                     ; preds = %after_check, %assig
 
 after_check6:                                     ; preds = %after_assign
   %39 = load i8, ptr %has_title, align 1
-  %40 = call ptr @test_bool_to_string(i8 %39)
+  %40 = call ptr @test_bool_to_string(i8 zeroext %39)
   br label %phi_block
 
 else_block:                                       ; preds = %after_assign