From 423152202fb56c4c264df58e1ebc7701fa5150a9 Mon Sep 17 00:00:00 2001
From: Christoffer Lerno <christoffer@aegik.com>
Date: Sat, 25 Oct 2025 12:31:06 +0200
Subject: [PATCH] Dev (#2545)

* Optimize vector load / store. Fixes to alignment. Support typedef with `@simd` and `@align` #2543. Update vector ABI #2542
* Fix alignment issue with indirect arguments.
---
 CMakeLists.txt                                |   4 +-
 releasenotes.md                               |   4 +-
 src/compiler/abi/c_abi.c                      | 184 +++++++----
 src/compiler/abi/c_abi_aarch64.c              |  78 ++---
 src/compiler/abi/c_abi_riscv.c                |  62 ++--
 src/compiler/abi/c_abi_wasm.c                 |  37 +--
 src/compiler/abi/c_abi_win64.c                |  60 ++--
 src/compiler/abi/c_abi_x64.c                  | 134 ++++----
 src/compiler/abi/c_abi_x86.c                  | 121 ++++---
 src/compiler/c_abi_internal.h                 |  53 +--
 src/compiler/c_codegen.c                      |  16 +-
 src/compiler/codegen_general.c                |  13 +-
 src/compiler/codegen_internal.h               |  85 +++++
 src/compiler/compiler_internal.h              |  53 ++-
 src/compiler/enums.h                          |  13 +
 src/compiler/expr.c                           |   3 +-
 src/compiler/llvm_codegen.c                   |  27 +-
 src/compiler/llvm_codegen_debug_info.c        |  20 +-
 src/compiler/llvm_codegen_expr.c              | 173 ++++++----
 src/compiler/llvm_codegen_function.c          |  98 ++++--
 src/compiler/llvm_codegen_internal.h          |   5 +-
 src/compiler/llvm_codegen_internal_impl.h     |   2 +-
 src/compiler/llvm_codegen_stmt.c              |  11 +-
 src/compiler/llvm_codegen_storeload.c         |  64 +++-
 src/compiler/llvm_codegen_type.c              |  30 +-
 src/compiler/parse_global.c                   |  20 ++
 src/compiler/sema_decls.c                     | 122 ++++---
 src/compiler/sema_expr.c                      |  22 +-
 src/compiler/sema_internal.h                  |  18 +-
 src/compiler/sema_stmts.c                     |   4 +-
 src/compiler/sema_types.c                     |  50 +--
 src/compiler/symtab.c                         |   7 +-
 src/compiler/types.c                          | 152 +++++----
 test/test_suite/abi/aarch64_hfa_args.c3t      |  32 +-
 test/test_suite/abi/aarch64_hfa_args_no.c3t   |  77 +++++
 test/test_suite/abi/darwin64_avx.c3t          |  65 ++--
 test/test_suite/abi/darwin64_avx512.c3t       |  48 +--
 test/test_suite/abi/darwin64_sret.c3t         |   5 +-
 test/test_suite/abi/darwin64_sse.c3t          |  36 +-
 test/test_suite/abi/darwinx64_2.c3t           | 118 +++----
 .../abi/merge_union_bool_avx512.c3t           |  16 +-
 .../abi/riscv32-ilp32-ilp32f-ilp32d-abi-1.c3t |  96 +++---
 .../abi/riscv32-ilp32-ilp32f-ilp32d-abi-2.c3t | 160 +++++----
 .../abi/riscv32-ilp32-ilp32f-ilp32d-abi-3.c3t | 174 +++++-----
 test/test_suite/abi/riscv64-lp64-abi.c3t      |  19 +-
 .../abi/riscv64-lp64-lp64f-abi-1.c3t          |  13 +-
 .../abi/riscv64-lp64-lp64f-abi-2.c3t          |  13 +-
 .../abi/riscv64-lp64-lp64f-lp64d-abi-1.c3t    | 172 +++++-----
 .../abi/riscv64-lp64-lp64f-lp64d-abi-2.c3t    | 166 ++++++----
 .../abi/riscv64-lp64-lp64f-lp64d-abi-3.c3t    | 167 +++++-----
 test/test_suite/abi/sysv_abi_avx.c3t          |  14 +-
 test/test_suite/abi/sysv_abi_noavx.c3t        |  14 +-
 .../abi/sysv_vec_array_indirect.c3t           |  32 ++
 test/test_suite/abi/vec_update_align.c3t      |  60 ++++
 test/test_suite/arrays/index_from_back.c3t    |  38 +--
 test/test_suite/builtins/matrix_builtin.c3t   |  18 +-
 test/test_suite/builtins/shufflevector.c3t    |  30 +-
 .../cast/implicit_infer_len_cast.c3t          |  17 +-
 test/test_suite/clang/2002-04.c3t             |  12 +-
 .../compile_time_access_subscript.c3t         |   2 +-
 .../compile_time/untyped_conversions.c3t      |  10 +-
 test/test_suite/debug_symbols/defer_macro.c3t |   2 +-
 test/test_suite/expressions/ternary_infer.c3t |   8 +-
 test/test_suite/functions/test_regression.c3t |   8 +-
 .../slices/slice_to_slice_vector_assign.c3t   |   4 +-
 test/test_suite/statements/foreach_common.c3t | 120 +++----
 .../statements/foreach_r_common.c3t           | 116 +++----
 .../union/designated_union_zeroing.c3t        |  15 +-
 test/test_suite/vector/vector_consts.c3t      |   3 +-
 .../vector/vector_init_regression.c3t         | 309 +++++++++---------
 test/test_suite/vector/vector_param.c3t       |   4 +-
 test/unit/stdlib/math/matrix.c3               | 163 +++++----
 72 files changed, 2403 insertions(+), 1718 deletions(-)
 create mode 100644 test/test_suite/abi/aarch64_hfa_args_no.c3t
 create mode 100644 test/test_suite/abi/sysv_vec_array_indirect.c3t
 create mode 100644 test/test_suite/abi/vec_update_align.c3t

diff --git a/CMakeLists.txt b/CMakeLists.txt
index f5453b7e7..24fff428a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -65,8 +65,8 @@ if(MSVC)
 else()
     add_compile_options(-gdwarf-3 -fno-exceptions)
 
-    # add_compile_options(-fsanitize=address,undefined)
-    # add_link_options(-fsanitize=address,undefined)
+    #add_compile_options(-fsanitize=address,undefined)
+    #add_link_options(-fsanitize=address,undefined)
 endif()
 
 # Options
diff --git a/releasenotes.md b/releasenotes.md
index b9eb9a590..95ecf59c4 100644
--- a/releasenotes.md
+++ b/releasenotes.md
@@ -12,7 +12,9 @@
 - Add `--max-macro-iterations` to set macro iteration limit.
 - Improved generic inference in initializers #2541. 
 - "Maybe-deref" subscripting `foo.[i] += 1` #2540.
-
+- ABI change for vectors: store and pass them as arrays #2542
+- Add @simd and @align attributes to typedef #2543
+- 
 ### Fixes
 - Bug in `io::write_using_write_byte`.
 - Bitstruct value cannot be used to index a const array in compile time. #2512
diff --git a/src/compiler/abi/c_abi.c b/src/compiler/abi/c_abi.c
index 1170c3f42..9caf03cc8 100644
--- a/src/compiler/abi/c_abi.c
+++ b/src/compiler/abi/c_abi.c
@@ -5,10 +5,12 @@
 #include "compiler/c_abi_internal.h"
 
 
-static ABIArgInfo *abi_arg_new(ABIKind kind)
+static ABIArgInfo *abi_arg_new(ABIKind kind, ParamInfo param)
 {
 	ABIArgInfo *info = CALLOCS(ABIArgInfo);
 	info->kind = kind;
+	info->original_type = param.type;
+	info->rewrite = param.rewrite;
 	return info;
 }
 
@@ -97,10 +99,10 @@ bool abi_arg_is_indirect(ABIArgInfo *info)
 	UNREACHABLE
 }
 
-ABIArgInfo *abi_arg_new_indirect_realigned(AlignSize alignment, Type *by_val_type)
+ABIArgInfo *abi_arg_new_indirect_realigned(AlignSize alignment, Type *by_val_type, ParamInfo param)
 {
 	ASSERT(alignment > 0);
-	ABIArgInfo *info = abi_arg_new(ABI_ARG_INDIRECT);
+	ABIArgInfo *info = abi_arg_new(ABI_ARG_INDIRECT, param);
 	info->indirect.alignment = alignment;
 	ASSERT(info->indirect.alignment);
 	info->attributes.realign = true;
@@ -109,9 +111,9 @@ ABIArgInfo *abi_arg_new_indirect_realigned(AlignSize alignment, Type *by_val_typ
 	return info;
 }
 
-ABIArgInfo *abi_arg_new_indirect_by_val(Type *by_val_type)
+ABIArgInfo *abi_arg_new_indirect_by_val(Type *by_val_type, ParamInfo param)
 {
-	ABIArgInfo *info = abi_arg_new(ABI_ARG_INDIRECT);
+	ABIArgInfo *info = abi_arg_new(ABI_ARG_INDIRECT, param);
 	info->indirect.alignment = type_abi_alignment(by_val_type);
 	info->indirect.type = by_val_type;
 	info->attributes.by_val = true;
@@ -119,9 +121,9 @@ ABIArgInfo *abi_arg_new_indirect_by_val(Type *by_val_type)
 	return info;
 }
 
-ABIArgInfo *abi_arg_new_indirect_not_by_val(Type *type)
+ABIArgInfo *abi_arg_new_indirect_not_by_val(Type *type, ParamInfo param)
 {
-	ABIArgInfo *info = abi_arg_new(ABI_ARG_INDIRECT);
+	ABIArgInfo *info = abi_arg_new(ABI_ARG_INDIRECT, param);
 	info->indirect.alignment = type_abi_alignment(type);
 	ASSERT(info->indirect.alignment);
 	info->indirect.type = type;
@@ -129,19 +131,19 @@ ABIArgInfo *abi_arg_new_indirect_not_by_val(Type *type)
 	return info;
 }
 
-ABIArgInfo *abi_arg_new_direct_int_ext(Type *int_to_extend)
+ABIArgInfo *abi_arg_new_direct_int_ext(Type *int_to_extend, ParamInfo param)
 {
-	return abi_arg_new_direct_int_ext_by_reg(int_to_extend, false);
+	return abi_arg_new_direct_int_ext_by_reg(int_to_extend, false, param);
 }
 
-ABIArgInfo *abi_arg_new_direct_coerce_int_ext(Type *int_to_extend)
+ABIArgInfo *abi_arg_new_direct_coerce_int_ext(Type *int_to_extend, ParamInfo param)
 {
-	return abi_arg_new_direct_coerce_int_ext_by_reg(int_to_extend, false);
+	return abi_arg_new_direct_coerce_int_ext_by_reg(int_to_extend, false, param);
 }
 
-ABIArgInfo *abi_arg_new_direct_coerce_int_ext_by_reg(Type *int_to_extend, bool by_reg)
+ABIArgInfo *abi_arg_new_direct_coerce_int_ext_by_reg(Type *int_to_extend, bool by_reg, ParamInfo param)
 {
-	ABIArgInfo *info = abi_arg_new_direct_coerce_type(abi_type_get(int_to_extend));
+	ABIArgInfo *info = abi_arg_new_direct_coerce_type(abi_type_get(int_to_extend), param);
 	if (type_is_signed(int_to_extend))
 	{
 		info->attributes.signext = true;
@@ -154,9 +156,9 @@ ABIArgInfo *abi_arg_new_direct_coerce_int_ext_by_reg(Type *int_to_extend, bool b
 	return info;
 }
 
-ABIArgInfo *abi_arg_new_direct_int_ext_by_reg(Type *int_to_extend, bool by_reg)
+ABIArgInfo *abi_arg_new_direct_int_ext_by_reg(Type *int_to_extend, bool by_reg, ParamInfo param)
 {
-	ABIArgInfo *info = abi_arg_new(ABI_ARG_DIRECT);
+	ABIArgInfo *info = abi_arg_new(ABI_ARG_DIRECT, param);
 	if (type_is_signed(int_to_extend))
 	{
 		info->attributes.signext = true;
@@ -169,36 +171,36 @@ ABIArgInfo *abi_arg_new_direct_int_ext_by_reg(Type *int_to_extend, bool by_reg)
 	return info;
 }
 
-ABIArgInfo *abi_arg_new_direct_pair(AbiType low_type, AbiType high_type)
+ABIArgInfo *abi_arg_new_direct_pair(AbiType low_type, AbiType high_type, ParamInfo param)
 {
-	ABIArgInfo *arg_info = abi_arg_new(ABI_ARG_DIRECT_PAIR);
+	ABIArgInfo *arg_info = abi_arg_new(ABI_ARG_DIRECT_PAIR, param);
 	arg_info->direct_pair.hi = high_type;
 	arg_info->direct_pair.lo = low_type;
 	return arg_info;
 }
 
-ABIArgInfo *abi_arg_new_direct_by_reg(bool by_reg)
+ABIArgInfo *abi_arg_new_direct_by_reg(bool by_reg, ParamInfo param)
 {
-	ABIArgInfo *info = abi_arg_new(ABI_ARG_DIRECT);
+	ABIArgInfo *info = abi_arg_new(ABI_ARG_DIRECT, param);
 	info->attributes.by_reg = by_reg;
 	return info;
 }
 
-ABIArgInfo *abi_arg_new_direct(void)
+ABIArgInfo *abi_arg_new_direct(ParamInfo param)
 {
-	return abi_arg_new_direct_by_reg(false);
+	return abi_arg_new_direct_by_reg(false, param);
 }
 
 
-ABIArgInfo *abi_arg_new_expand(void)
+ABIArgInfo *abi_arg_new_expand(ParamInfo param)
 {
-	return abi_arg_new(ABI_ARG_EXPAND);
+	return abi_arg_new(ABI_ARG_EXPAND, param);
 }
 
 
-ABIArgInfo *abi_arg_new_expand_coerce_pair(Type *first_element, Type *second_element, unsigned hi_offset, bool packed)
+ABIArgInfo *abi_arg_new_expand_coerce_pair(Type *first_element, Type *second_element, unsigned hi_offset, bool packed, ParamInfo param)
 {
-	ABIArgInfo *arg = abi_arg_new(ABI_ARG_EXPAND_COERCE);
+	ABIArgInfo *arg = abi_arg_new(ABI_ARG_EXPAND_COERCE, param);
 	arg->coerce_expand.lo = first_element;
 	arg->coerce_expand.hi = second_element;
 	arg->coerce_expand.offset_hi = hi_offset;
@@ -206,66 +208,120 @@ ABIArgInfo *abi_arg_new_expand_coerce_pair(Type *first_element, Type *second_ele
 	return arg;
 }
 
-ABIArgInfo *abi_arg_new_direct_coerce_int(void)
+ABIArgInfo *abi_arg_new_direct_coerce_int(ParamInfo param)
 {
-	return abi_arg_new(ABI_ARG_DIRECT_COERCE_INT);
+	return abi_arg_new(ABI_ARG_DIRECT_COERCE_INT, param);
 }
 
-ABIArgInfo *abi_arg_new_direct_coerce_type_spec(AbiSpecType type)
+ABIArgInfo *abi_arg_new_direct_coerce_type_spec(AbiSpecType type, ParamInfo param)
 {
-	ABIArgInfo *info = abi_arg_new(ABI_ARG_DIRECT_COERCE);
+	ABIArgInfo *info = abi_arg_new(ABI_ARG_DIRECT_COERCE, param);
 	info->direct_coerce_type = (AbiType){ .abi_type = type };
 	return info;
 }
 
-ABIArgInfo *abi_arg_new_direct_coerce_type(AbiType type)
+ABIArgInfo *abi_arg_new_direct_coerce_type(AbiType type, ParamInfo param)
 {
-	ABIArgInfo *info = abi_arg_new(ABI_ARG_DIRECT_COERCE);
+	ABIArgInfo *info = abi_arg_new(ABI_ARG_DIRECT_COERCE, param);
 	info->direct_coerce_type = type;
 	return info;
 }
 
-ABIArgInfo *abi_arg_new_direct_coerce_type_bits(int bits)
+ABIArgInfo *abi_arg_new_direct_coerce_type_bits(int bits, ParamInfo param)
 {
-	ABIArgInfo *info = abi_arg_new(ABI_ARG_DIRECT_COERCE);
+	ABIArgInfo *info = abi_arg_new(ABI_ARG_DIRECT_COERCE, param);
 	info->direct_coerce_type = abi_type_get_int_bits(bits);
 	return info;
 }
 
-ABIArgInfo *abi_arg_new_direct_struct_expand_i32(uint8_t elements)
+ABIArgInfo *abi_arg_new_direct_struct_expand_i32(uint8_t elements, ParamInfo param)
 {
-	ABIArgInfo *info = abi_arg_new(ABI_ARG_DIRECT_SPLIT_STRUCT_I32);
+	ABIArgInfo *info = abi_arg_new(ABI_ARG_DIRECT_SPLIT_STRUCT_I32, param);
 	info->direct_struct_expand = elements;
 	return info;
 }
 
-
-void c_abi_func_create(FunctionPrototype *proto)
+// Fully set up the prototype correctly
+void c_abi_func_create(Signature *sig, FunctionPrototype *proto, Expr **vaargs)
 {
 	ASSERT(!proto->is_resolved);
+	ParamInfo vaarg_params[512];
+	ParamInfo params[512];
+	proto->raw_variadic = sig->variadic == VARIADIC_RAW;
+	proto->vararg_index = sig->vararg_index;
+	Type *rtype = type_infoptr(sig->rtype)->type;
+	Type *rtype_flat = type_flatten(rtype);
+	unsigned param_count = 0;
+	if (rtype_flat->type_kind == TYPE_VECTOR && !type_is_simd(rtype))
+	{
+		rtype_flat = type_array_from_vector(rtype_flat);
+		proto->return_rewrite = PARAM_RW_VEC_TO_ARRAY;
+	}
+	if (type_is_optional(rtype))
+	{
+		proto->return_info = (ParamInfo){ .type = type_fault };
+		proto->return_result = type_no_optional(rtype);
+		if (type_is_void(rtype_flat))
+		{
+			proto->ret_rewrite = RET_OPTIONAL_VOID;
+		}
+		else
+		{
+			proto->ret_rewrite = RET_OPTIONAL_VALUE;
+			params[param_count++] = (ParamInfo){ .type = type_get_ptr(rtype_flat) };
+		}
+	}
+	else
+	{
+		proto->return_info = (ParamInfo){ .type = rtype_flat };
+		proto->return_result = rtype;
+		proto->ret_rewrite = RET_NORMAL;
+	}
+	proto->call_abi = sig->abi;
+
+	unsigned param_decl_count = vec_size(sig->params);
+	for (unsigned i = 0; i < param_decl_count; i++)
+	{
+		Decl *decl = sig->params[i];
+		Type *flat_type = type_flatten(decl->type);
+		ParamInfo param_info = (ParamInfo) { .type = flat_type };
+		if (flat_type->type_kind == TYPE_VECTOR && !type_is_simd(decl->type))
+		{
+			param_info.rewrite = PARAM_RW_VEC_TO_ARRAY;
+			param_info.type = type_array_from_vector(flat_type);
+		}
+		params[param_count++] = param_info;
+	}
+	unsigned vaarg_count = 0;
+	FOREACH(Expr *, val, vaargs)
+	{
+		vaarg_params[vaarg_count++] = (ParamInfo) { .type = type_flatten(val->type) };
+	}
+	proto->param_vacount = vaarg_count;
+	proto->param_count = param_count;
 	proto->is_resolved = true;
 	switch (compiler.platform.abi)
 	{
 		case ABI_X64:
-			c_abi_func_create_x64(proto);
+			c_abi_func_create_x64(proto, params, param_count, vaarg_params, vaarg_count);
 			return;
 		case ABI_X86:
-			c_abi_func_create_x86(proto);
+			c_abi_func_create_x86(proto, params, param_count, vaarg_params, vaarg_count);
 			return;
 		case ABI_WIN64:
-			c_abi_func_create_win64(proto);
+			c_abi_func_create_win64(proto, params, param_count, vaarg_params, vaarg_count);
 			return;
 		case ABI_AARCH64:
-			c_abi_func_create_aarch64(proto);
+			c_abi_func_create_aarch64(proto, params, param_count, vaarg_params, vaarg_count);
 			return;
 		case ABI_RISCV:
-			c_abi_func_create_riscv(proto);
+			c_abi_func_create_riscv(proto, params, param_count, vaarg_params, vaarg_count);
 			return;
 		case ABI_WASM:
-			c_abi_func_create_wasm(proto);
+			c_abi_func_create_wasm(proto, params, param_count, vaarg_params, vaarg_count);
 			return;
 		case ABI_XTENSA:
-			c_abi_func_create_default(proto);
+			c_abi_func_create_default(proto, params, param_count, vaarg_params, vaarg_count);
 			return;
 		case ABI_UNKNOWN:
 		case ABI_ARM:
@@ -277,39 +333,34 @@ void c_abi_func_create(FunctionPrototype *proto)
 }
 
 
-ABIArgInfo *c_abi_classify_return_type_default(Type *type)
+ABIArgInfo *c_abi_classify_return_type_default(ParamInfo param)
 {
+	Type *type = type_lowering(param.type);
 	if (type_is_void(type)) return abi_arg_ignore();
-	return c_abi_classify_argument_type_default(type);
+	return c_abi_classify_argument_type_default(param);
 }
 
-ABIArgInfo *c_abi_classify_argument_type_default(Type *type)
+ABIArgInfo *c_abi_classify_argument_type_default(ParamInfo param)
 {
 	// Perform general lowering.
-	type = type_lowering(type);
+	Type *type = type_lowering(param.type);
 
 	// Struct-likes are returned by sret
-	if (type_is_abi_aggregate(type)) return abi_arg_new_indirect_by_val(type);
+	if (type_is_abi_aggregate(type)) return abi_arg_new_indirect_by_val(type, param);
 
-	if (type_is_int128(type) && !compiler.platform.int128) return abi_arg_new_indirect_by_val(type);
+	if (type_is_int128(type) && !compiler.platform.int128) return abi_arg_new_indirect_by_val(type, param);
 
 	// Otherwise do we have a type that needs promotion?
-	if (type_is_promotable_int_bool(type)) return abi_arg_new_direct_int_ext(type);
+	if (type_is_promotable_int_bool(type)) return abi_arg_new_direct_int_ext(type, param);
 
 	// No, then do a direct pass.
-	return abi_arg_new_direct();
+	return abi_arg_new_direct(param);
 }
 
-void c_abi_func_create_default(FunctionPrototype *prototype)
+void c_abi_func_create_default(FunctionPrototype *prototype, ParamInfo *params, unsigned param_count, ParamInfo *vaargs, unsigned vaarg_count)
 {
-	prototype->ret_abi_info = c_abi_classify_return_type_default(prototype->abi_ret_type);
-	if (prototype->ret_by_ref)
-	{
-		prototype->ret_by_ref_abi_info = c_abi_classify_return_type_default(type_get_ptr(type_flatten(prototype->ret_by_ref_type)));
-	}
+	prototype->ret_abi_info = c_abi_classify_return_type_default(prototype->return_info);
 
-	Type **params = prototype->param_types;
-	unsigned param_count = vec_size(prototype->param_types);
 	if (param_count)
 	{
 		ABIArgInfo **args = MALLOC(sizeof(ABIArgInfo) * param_count);
@@ -319,15 +370,14 @@ void c_abi_func_create_default(FunctionPrototype *prototype)
 		}
 		prototype->abi_args = args;
 	}
-	Type **va_params = prototype->varargs;
-	unsigned va_param_count = vec_size(va_params);
-	if (va_param_count)
+	if (vaarg_count)
 	{
-		ABIArgInfo **args = MALLOC(sizeof(ABIArgInfo) * va_param_count);
-		for (unsigned i = 0; i < va_param_count; i++)
+		ABIArgInfo **args = MALLOC(sizeof(ABIArgInfo) * vaarg_count);
+		for (unsigned i = 0; i < vaarg_count; i++)
 		{
-			args[i] = c_abi_classify_argument_type_default(va_params[i]);
+			args[i] = c_abi_classify_argument_type_default(vaargs[i]);
 		}
 		prototype->abi_varargs = args;
 	}
-}
\ No newline at end of file
+}
+
diff --git a/src/compiler/abi/c_abi_aarch64.c b/src/compiler/abi/c_abi_aarch64.c
index 93489d799..36e9f0087 100644
--- a/src/compiler/abi/c_abi_aarch64.c
+++ b/src/compiler/abi/c_abi_aarch64.c
@@ -12,7 +12,7 @@ INLINE bool is_aarch64_illegal_vector(Type *type)
 		return false;
 	}
 	ArraySize len = type->array.len;
-	if (!is_power_of_two(len)) return true;
+	ASSERT(is_power_of_two(len) && "@simd is enforced to pot sizes, otherwise this would be 'illegal' and handled.");
 	switch (type_size(type))
 	{
 		case 8:
@@ -24,7 +24,7 @@ INLINE bool is_aarch64_illegal_vector(Type *type)
 	}
 }
 
-ABIArgInfo *aarch64_coerce_illegal_vector(Type *type)
+ABIArgInfo *aarch64_coerce_illegal_vector(Type *type, ParamInfo param)
 {
 	if (false /*type->type_kind == TYPE_SCALED_VECTOR*/)
 	{
@@ -63,27 +63,27 @@ ABIArgInfo *aarch64_coerce_illegal_vector(Type *type)
 	// CLANG: Android promotes char[<2>] to ushort, not uint
 	if ((compiler.platform.environment_type == ENV_TYPE_ANDROID || compiler.platform.os == OS_TYPE_ANDROID) && size <= 2)
 	{
-		return abi_arg_new_direct_coerce_type_bits(16);
+		return abi_arg_new_direct_coerce_type_bits(16, param);
 	}
 	// 32 bits or fewer? Put in int.
-	if (size <= 4) return abi_arg_new_direct_coerce_type_bits(32);
+	if (size <= 4) return abi_arg_new_direct_coerce_type_bits(32, param);
 
 	// 64 bits or less? Put in uint[<2>]
-	if (size <= 8) return abi_arg_new_direct_coerce_type((AbiType) { .abi_type = ABI_TYPE_INT_VEC_2 });
+	if (size <= 8) return abi_arg_new_direct_coerce_type(abi_type_spec(ABI_TYPE_INT_VEC_2), param);
 	// 128 bits in a single val? Put in uint[<4>]
-	if (size == 128) return abi_arg_new_direct_coerce_type((AbiType) { .abi_type = ABI_TYPE_INT_VEC_4 });
-	return abi_arg_new_indirect_not_by_val(type);
+	if (size == 128) return abi_arg_new_direct_coerce_type(abi_type_spec(ABI_TYPE_INT_VEC_4), param);
+	return abi_arg_new_indirect_not_by_val(type, param);
 }
 
-ABIArgInfo *aarch64_classify_argument_type(Type *type)
+static ABIArgInfo *aarch64_classify_argument_type(ParamInfo param)
 {
-	type = type_lowering(type);
+	Type *type = type_lowering(param.type);
 
 	if (type_is_void(type)) return abi_arg_ignore();
 
 	if (is_aarch64_illegal_vector(type))
 	{
-		return aarch64_coerce_illegal_vector(type);
+		return aarch64_coerce_illegal_vector(type, param);
 	}
 
 	TypeSize size = type_size(type);
@@ -94,9 +94,9 @@ ABIArgInfo *aarch64_classify_argument_type(Type *type)
 		// we don't have that (yet?)
 		if (type_is_promotable_int_bool(type) && compiler.platform.aarch64.is_darwin_pcs)
 		{
-			return abi_arg_new_direct_int_ext(type);
+			return abi_arg_new_direct_int_ext(type, param);
 		}
-		return abi_arg_new_direct();
+		return abi_arg_new_direct(param);
 	}
 
 	// Is empty
@@ -110,9 +110,9 @@ ABIArgInfo *aarch64_classify_argument_type(Type *type)
 		ASSERT(members < 128);
 		if (members > 1)
 		{
-			return abi_arg_new_direct_coerce_type(abi_type_get(type_get_array(base, members)));
+			return abi_arg_new_direct_coerce_type(abi_type_get(type_get_array(base, members)), param);
 		}
-		return abi_arg_new_direct_coerce_type(abi_type_get(base));
+		return abi_arg_new_direct_coerce_type(abi_type_get(base), param);
 	}
 
 	// Aggregates <= in registers
@@ -136,25 +136,25 @@ ABIArgInfo *aarch64_classify_argument_type(Type *type)
 		// For aggregates with 16-byte alignment, we use i128.
 		ASSERT(alignment == 8 || alignment == 16);
 
-		if (alignment == 16) return abi_arg_new_direct_coerce_type_bits(128);
+		if (alignment == 16) return abi_arg_new_direct_coerce_type_bits(128, param);
 		ArraySize m = size / alignment;
-		if (m > 1) return abi_arg_new_direct_coerce_type(abi_type_get(type_get_array(type_ulong, m)));
-		return abi_arg_new_direct_coerce_type_bits(64);
+		if (m > 1) return abi_arg_new_direct_coerce_type(abi_type_get(type_get_array(type_ulong, m)), param);
+		return abi_arg_new_direct_coerce_type_bits(64, param);
 
 	}
 
-	return abi_arg_new_indirect_not_by_val(type);
+	return abi_arg_new_indirect_not_by_val(type, param);
 }
 
-ABIArgInfo *aarch64_classify_return_type(Type *type, bool variadic)
+ABIArgInfo *aarch64_classify_return_type(ParamInfo param, bool variadic)
 {
-	type = type_lowering(type);
+	Type *type = type_lowering(param.type);
 
 	if (type_is_void(type)) return abi_arg_ignore();
 
 	if (is_aarch64_illegal_vector(type))
 	{
-		return aarch64_coerce_illegal_vector(type);
+		return aarch64_coerce_illegal_vector(type, param);
 	}
 
 	TypeSize size = type_size(type);
@@ -162,16 +162,16 @@ ABIArgInfo *aarch64_classify_return_type(Type *type, bool variadic)
 	// Large vectors by mem.
 	if (type->type_kind == TYPE_VECTOR && size > 16)
 	{
-		return abi_arg_new_direct_coerce_type(abi_type_get(type));
+		return abi_arg_new_direct_coerce_type(abi_type_get(type), param);
 	}
 
 	if (!type_is_abi_aggregate(type))
 	{
 		if (type_is_promotable_int_bool(type) && compiler.platform.aarch64.is_darwin_pcs)
 		{
-			return abi_arg_new_direct_int_ext(type);
+			return abi_arg_new_direct_int_ext(type, param);
 		}
-		return abi_arg_new_direct();
+		return abi_arg_new_direct(param);
 	}
 
 	// Abi aggregate:
@@ -184,7 +184,7 @@ ABIArgInfo *aarch64_classify_return_type(Type *type, bool variadic)
 	if (type_is_homogenous_aggregate(type, &base, &members) &&
 		!(compiler.platform.arch == ARCH_TYPE_AARCH64_32 && variadic))
 	{
-		return abi_arg_new_direct();
+		return abi_arg_new_direct(param);
 	}
 
 	// Aggregates <= in registers
@@ -196,7 +196,7 @@ ABIArgInfo *aarch64_classify_return_type(Type *type, bool variadic)
 
 		if (size <= 8 && !compiler.platform.big_endian)
 		{
-			return abi_arg_new_direct_coerce_int();
+			return abi_arg_new_direct_coerce_int(param);
 		}
 
 		unsigned alignment = type_abi_alignment(type);
@@ -204,26 +204,20 @@ ABIArgInfo *aarch64_classify_return_type(Type *type, bool variadic)
 		size = aligned_offset(size, 8);
 		if (alignment < 16 && size == 16)
 		{
-			return abi_arg_new_direct_coerce_type(abi_type_get(type_get_array(type_ulong, size / 8)));
+			return abi_arg_new_direct_coerce_type(abi_type_get(type_get_array(type_ulong, size / 8)), param);
 		}
-		return abi_arg_new_direct_coerce_type_bits(size * 8);
+		return abi_arg_new_direct_coerce_type_bits(size * 8, param);
 	}
 
-	return abi_arg_new_indirect_by_val(type);
+	return abi_arg_new_indirect_by_val(type, param);
 }
 
 
-void c_abi_func_create_aarch64(FunctionPrototype *prototype)
+void c_abi_func_create_aarch64(FunctionPrototype *prototype, ParamInfo *params, unsigned param_count, ParamInfo *vaargs, unsigned vaarg_count)
 {
 
-	prototype->ret_abi_info = aarch64_classify_return_type(prototype->abi_ret_type, prototype->raw_variadic);
-	if (prototype->ret_by_ref)
-	{
-		prototype->ret_by_ref_abi_info = aarch64_classify_argument_type(type_get_ptr(type_flatten(prototype->ret_by_ref_type)));
-	}
+	prototype->ret_abi_info = aarch64_classify_return_type(prototype->return_info, prototype->raw_variadic);
 
-	Type **params = prototype->param_types;
-	unsigned param_count = vec_size(prototype->param_types);
 	if (param_count)
 	{
 		ABIArgInfo **args = MALLOC(sizeof(ABIArgInfo) * param_count);
@@ -233,14 +227,12 @@ void c_abi_func_create_aarch64(FunctionPrototype *prototype)
 		}
 		prototype->abi_args = args;
 	}
-	Type **va_params = prototype->varargs;
-	unsigned va_param_count = vec_size(va_params);
-	if (va_param_count)
+	if (vaarg_count)
 	{
-		ABIArgInfo **args = MALLOC(sizeof(ABIArgInfo) * va_param_count);
-		for (unsigned i = 0; i < va_param_count; i++)
+		ABIArgInfo **args = MALLOC(sizeof(ABIArgInfo) * vaarg_count);
+		for (unsigned i = 0; i < vaarg_count; i++)
 		{
-			args[i] = aarch64_classify_argument_type(va_params[i]);
+			args[i] = aarch64_classify_argument_type(vaargs[i]);
 		}
 		prototype->abi_varargs = args;
 	}
diff --git a/src/compiler/abi/c_abi_riscv.c b/src/compiler/abi/c_abi_riscv.c
index caf542980..18e6ea038 100644
--- a/src/compiler/abi/c_abi_riscv.c
+++ b/src/compiler/abi/c_abi_riscv.c
@@ -1,24 +1,24 @@
-// Copyright (c) 2020 Christoffer Lerno. All rights reserved.
+// Copyright (c) 2020-2025 Christoffer Lerno. All rights reserved.
 // Use of this source code is governed by a LGPLv3.0
 // a copy of which can be found in the LICENSE file.
 
 #include "compiler/c_abi_internal.h"
 
 
-static ABIArgInfo *riscv_coerce_and_expand_fpcc_struct(AbiType field1, unsigned field1_offset, AbiType field2, unsigned field2_offset)
+static ABIArgInfo *riscv_coerce_and_expand_fpcc_struct(AbiType field1, unsigned field1_offset, AbiType field2, unsigned field2_offset, ParamInfo param)
 {
 	if (!abi_type_is_valid(field2))
 	{
-		return abi_arg_new_direct_coerce_type(field1);
+		return abi_arg_new_direct_coerce_type(field1, param);
 	}
 
 	ByteSize abi_size = abi_type_size(field2);
 	// Not on even offset, use packed semantics.
 	if (field2_offset % abi_size != 0)
 	{
-		return abi_arg_new_expand_coerce_pair(field1.type, field2.type, field2_offset, true);
+		return abi_arg_new_expand_coerce_pair(field1.type, field2.type, field2_offset, true, param);
 	}
-	return abi_arg_new_expand_coerce_pair(field1.type, field2.type, field2_offset / abi_size, false);
+	return abi_arg_new_expand_coerce_pair(field1.type, field2.type, field2_offset / abi_size, false, param);
 }
 
 static bool riscv_detect_fpcc_struct_internal(Type *type, unsigned current_offset, AbiType *field1_ref, unsigned *field1_offset, AbiType *field2_ref, unsigned *field2_offset)
@@ -55,7 +55,7 @@ static bool riscv_detect_fpcc_struct_internal(Type *type, unsigned current_offse
 	if (type->type_kind == TYPE_ARRAY)
 	{
 		ByteSize array_len = type->array.len;
-		Type *element_type = type->array.base;
+		Type *element_type = lowered_array_element_type(type);
 		ByteSize element_size = type_size(element_type);
 		for (ByteSize i = 0; i < array_len; i++)
 		{
@@ -76,7 +76,7 @@ static bool riscv_detect_fpcc_struct_internal(Type *type, unsigned current_offse
 		if (type->type_kind == TYPE_UNION) return false;
 		FOREACH(Decl *, member, type->decl->strukt.members)
 		{
-			if (!riscv_detect_fpcc_struct_internal(member->type,
+			if (!riscv_detect_fpcc_struct_internal(lowered_member_type(member),
 												   (unsigned)(current_offset + member->offset),
 												   field1_ref,
 												   field1_offset,
@@ -126,10 +126,10 @@ static bool riscv_detect_fpcc_struct(Type *type, AbiType *field1_ref, unsigned *
 	return true;
 }
 
-static ABIArgInfo *riscv_classify_argument_type(Type *type, bool is_fixed, unsigned *gprs, unsigned *fprs)
+static ABIArgInfo *riscv_classify_argument_type(ParamInfo param, bool is_fixed, unsigned *gprs, unsigned *fprs)
 {
 
-	ASSERT(type == type->canonical);
+	Type *type = type_lowering(param.type);
 
 	unsigned xlen = compiler.platform.riscv.xlen;
 	ASSERT(is_power_of_two(xlen));
@@ -140,7 +140,7 @@ static ABIArgInfo *riscv_classify_argument_type(Type *type, bool is_fixed, unsig
 	if (is_fixed && type_is_float(type) && compiler.platform.riscv.flen >= size && *fprs)
 	{
 		(*fprs)--;
-		return abi_arg_new_direct();
+		return abi_arg_new_direct(param);
 	}
 
 	if (is_fixed && compiler.platform.riscv.flen && type->type_kind == TYPE_STRUCT)
@@ -161,7 +161,7 @@ static ABIArgInfo *riscv_classify_argument_type(Type *type, bool is_fixed, unsig
 		{
 			*gprs -= needed_gprs;
 			*fprs -= needed_fprs;
-			return riscv_coerce_and_expand_fpcc_struct(field1, offset1, field2, offset2);
+			return riscv_coerce_and_expand_fpcc_struct(field1, offset1, field2, offset2, param);
 		}
 	}
 
@@ -196,11 +196,11 @@ static ABIArgInfo *riscv_classify_argument_type(Type *type, bool is_fixed, unsig
 			// Clang: RV64 ABI requires unsigned 32-bit integers to be sign extended.
 			if (xlen == 8 && type == type_uint)
 			{
-				return abi_arg_new_direct_int_ext(type_int);
+				return abi_arg_new_direct_int_ext(type_int, param);
 			}
-			return abi_arg_new_direct_int_ext(type);
+			return abi_arg_new_direct_int_ext(type, param);
 		}
-		return abi_arg_new_direct();
+		return abi_arg_new_direct(param);
 	}
 
 	// Aggregates which are <= 2*XLen will be passed in registers if possible,
@@ -211,20 +211,21 @@ static ABIArgInfo *riscv_classify_argument_type(Type *type, bool is_fixed, unsig
 		// required, and a 2-field XLen array if only XLen alignment is required.
 		if (size <= xlen)
 		{
-			return abi_arg_new_direct_coerce_type_bits(xlen * 8);
+			return abi_arg_new_direct_coerce_type_bits(xlen * 8, param);
 		}
 		if (alignment == 2 * compiler.platform.riscv.xlen)
 		{
-			return abi_arg_new_direct_coerce_type_bits(xlen * 16);
+			return abi_arg_new_direct_coerce_type_bits(xlen * 16, param);
 		}
 		Type *ret_type = type_int_unsigned_by_bitsize(xlen * 8);
-		return abi_arg_new_direct_coerce_type(abi_type_get(type_get_array(ret_type, 2)));
+		return abi_arg_new_direct_coerce_type(abi_type_get(type_get_array(ret_type, 2)), param);
 	}
-	return abi_arg_new_indirect_not_by_val(type);
+	return abi_arg_new_indirect_not_by_val(type, param);
 }
 
-static ABIArgInfo *riscv_classify_return(Type *return_type)
+static ABIArgInfo *riscv_classify_return(ParamInfo param)
 {
+	Type *return_type = type_lowering(param.type);
 	if (type_is_void(return_type)) return abi_arg_ignore();
 
 	unsigned arg_gpr_left = 2;
@@ -232,27 +233,26 @@ static ABIArgInfo *riscv_classify_return(Type *return_type)
 
 	// The rules for return and argument types are the same, so defer to
 	// classifyArgumentType.
-	return riscv_classify_argument_type(return_type, true, &arg_gpr_left, &arg_fpr_left);
+	return riscv_classify_argument_type(param, true, &arg_gpr_left, &arg_fpr_left);
 }
-ABIArgInfo **riscv_create_params(Type** params, bool is_fixed, unsigned *arg_gprs_left, unsigned *arg_fprs_left)
+ABIArgInfo **riscv_create_params(ParamInfo* params, unsigned param_count, bool is_fixed, unsigned *arg_gprs_left, unsigned *arg_fprs_left)
 {
-	unsigned param_count = vec_size(params);
 	if (!param_count) return NULL;
 	ABIArgInfo **args = MALLOC(sizeof(ABIArgInfo) * param_count);
 	for (unsigned i = 0; i < param_count; i++)
 	{
-		args[i] = riscv_classify_argument_type(type_lowering(params[i]), is_fixed, arg_gprs_left, arg_fprs_left);
+		args[i] = riscv_classify_argument_type(params[i], is_fixed, arg_gprs_left, arg_fprs_left);
 	}
 	return args;
 }
-void c_abi_func_create_riscv(FunctionPrototype *prototype)
+void c_abi_func_create_riscv(FunctionPrototype *prototype, ParamInfo *params, unsigned param_count, ParamInfo *vaargs, unsigned vaarg_count)
 {
 	// Registers
 	unsigned gpr = 8;
 	unsigned fpr = 8;
 
-	Type *ret_type = type_lowering(prototype->abi_ret_type);
-	ABIArgInfo *ret_abi = prototype->ret_abi_info = riscv_classify_return(ret_type);
+	Type *ret_type = type_lowering(prototype->return_info.type);
+	ABIArgInfo *ret_abi = prototype->ret_abi_info = riscv_classify_return(prototype->return_info);
 
 	// IsRetIndirect is true if classifyArgumentType indicated the value should
 	// be passed indirect, or if the type size is a scalar greater than 2*XLen
@@ -273,13 +273,7 @@ void c_abi_func_create_riscv(FunctionPrototype *prototype)
 	unsigned arg_gprs_left = is_ret_indirect ? gpr - 1 : gpr;
 	unsigned arg_fprs_left = compiler.platform.riscv.flen ? fpr : 0;
 
-	// If we have an optional, then the return type is a parameter.
-	if (prototype->ret_by_ref)
-	{
-		prototype->ret_by_ref_abi_info = riscv_classify_argument_type(type_get_ptr(type_lowering(prototype->ret_by_ref_type)),
-																	  true, &arg_gprs_left, &arg_fprs_left);
-	}
 
-	prototype->abi_args = riscv_create_params(prototype->param_types, true, &arg_gprs_left, &arg_fprs_left);
-	prototype->abi_varargs = riscv_create_params(prototype->varargs, false, &arg_gprs_left, &arg_fprs_left);
+	prototype->abi_args = riscv_create_params(params, param_count, true, &arg_gprs_left, &arg_fprs_left);
+	prototype->abi_varargs = riscv_create_params(vaargs, vaarg_count, false, &arg_gprs_left, &arg_fprs_left);
 }
diff --git a/src/compiler/abi/c_abi_wasm.c b/src/compiler/abi/c_abi_wasm.c
index c13c9addc..4dd49917a 100644
--- a/src/compiler/abi/c_abi_wasm.c
+++ b/src/compiler/abi/c_abi_wasm.c
@@ -4,16 +4,16 @@
 
 #include "compiler/c_abi_internal.h"
 
-static ABIArgInfo *wasm_classify_argument_type(Type *type)
+static ABIArgInfo *wasm_classify_argument_type(ParamInfo param)
 {
-	type = type_lowering(type);
+	Type *type = type_lowering(param.type);
 	if (type_is_abi_aggregate(type))
 	{
 		// Clang: Lower single-field structs to just pass a regular value. TODO: We
 		// could do reasonable-size multiple-field structs too, using getExpand(),
 		// though watch out for things like bitfields.
-		Type *single_type = type_abi_find_single_struct_element(type);
-		if (single_type) return abi_arg_new_direct_coerce_type(abi_type_get(single_type));
+		Type *single_type = type_abi_find_single_struct_element(type, true);
+		if (single_type) return abi_arg_new_direct_coerce_type(abi_type_get(single_type), param);
 
 		// For the experimental multivalue ABI, fully expand all other aggregates
 		/*if (Kind == ABIKind::ExperimentalMV) {
@@ -32,15 +32,16 @@ static ABIArgInfo *wasm_classify_argument_type(Type *type)
 	}
 
 	// Otherwise just do the default thing.
-	return c_abi_classify_argument_type_default(type);
+	return c_abi_classify_argument_type_default(param);
 }
 
-static ABIArgInfo *wasm_classify_return(Type *type)
+static ABIArgInfo *wasm_classify_return(ParamInfo param)
 {
+	Type *type = type_lowering(param.type);
 	if (type_is_abi_aggregate(type))
 	{
-		Type *single_type = type_abi_find_single_struct_element(type);
-		if (single_type) return abi_arg_new_direct_coerce_type(abi_type_get(single_type));
+		Type *single_type = type_abi_find_single_struct_element(type, true);
+		if (single_type) return abi_arg_new_direct_coerce_type(abi_type_get(single_type), param);
 		/*
 		 * 			// For the experimental multivalue ABI, return all other aggregates
 			if (Kind == ABIKind::ExperimentalMV)
@@ -48,29 +49,23 @@ static ABIArgInfo *wasm_classify_return(Type *type)
 		 */
 	}
 	// Use default classification
-	return c_abi_classify_return_type_default(type);
+	return c_abi_classify_return_type_default(param);
 }
 
-ABIArgInfo **wasm_create_params(Type **params)
+ABIArgInfo **wasm_create_params(ParamInfo *params, unsigned param_count)
 {
-	unsigned param_count = vec_size(params);
 	if (!param_count) return NULL;
 	ABIArgInfo **args = MALLOC(sizeof(ABIArgInfo) * param_count);
 	for (unsigned i = 0; i < param_count; i++)
 	{
-		args[i] = wasm_classify_argument_type(type_lowering(params[i]));
+		args[i] = wasm_classify_argument_type(params[i]);
 	}
 	return args;
 }
 
-void c_abi_func_create_wasm(FunctionPrototype *prototype)
+void c_abi_func_create_wasm(FunctionPrototype *prototype, ParamInfo *params, unsigned param_count, ParamInfo *vaargs, unsigned vaarg_count)
 {
-	prototype->ret_abi_info = wasm_classify_return(type_lowering(prototype->abi_ret_type));
-	if (prototype->ret_by_ref)
-	{
-		prototype->ret_by_ref_abi_info = wasm_classify_argument_type(type_get_ptr(prototype->ret_by_ref_type));
-	}
-
-	prototype->abi_args = wasm_create_params(prototype->param_types);
-	prototype->abi_varargs = wasm_create_params(prototype->varargs);
+	prototype->ret_abi_info = wasm_classify_return(prototype->return_info);
+	prototype->abi_args = wasm_create_params(params, param_count);
+	prototype->abi_varargs = wasm_create_params(vaargs, vaarg_count);
 }
\ No newline at end of file
diff --git a/src/compiler/abi/c_abi_win64.c b/src/compiler/abi/c_abi_win64.c
index 32e4a0397..a02bfc57a 100644
--- a/src/compiler/abi/c_abi_win64.c
+++ b/src/compiler/abi/c_abi_win64.c
@@ -4,8 +4,9 @@
 
 #include "compiler/c_abi_internal.h"
 
-ABIArgInfo *win64_classify(Regs *regs, Type *type, bool is_return, bool is_vector_call)
+ABIArgInfo *win64_classify(Regs *regs, ParamInfo param, bool is_return, bool is_vector_call)
 {
+	Type *type = param.type;
 	if (type_is_void(type)) return abi_arg_ignore();
 	
 	// Lower enums etc.
@@ -14,7 +15,7 @@ ABIArgInfo *win64_classify(Regs *regs, Type *type, bool is_return, bool is_vecto
 	// Variable array has to be passed indirectly.
 	if (type_is_union_or_strukt(type) && type->decl->has_variable_array)
 	{
-		return abi_arg_new_indirect_not_by_val(type);
+		return abi_arg_new_indirect_not_by_val(type, param);
 	}
 
 	Type *base = NULL;
@@ -26,90 +27,87 @@ ABIArgInfo *win64_classify(Regs *regs, Type *type, bool is_return, bool is_vecto
 			(is_return || type_is_builtin(type->type_kind) || type->type_kind == TYPE_VECTOR))
 		{
 			regs->float_regs -= elements;
-			return abi_arg_new_direct();
+			return abi_arg_new_direct(param);
 		}
 		// HVAs are handled later.
 		if (is_return || (!type_is_builtin(type->type_kind) && type->type_kind != TYPE_VECTOR))
 		{
-			return abi_arg_new_indirect_not_by_val(type);
+			return abi_arg_new_indirect_not_by_val(type, param);
 		}
 		// => to main handling.
 	}
 	ByteSize size = type_size(type);
-	bool type_is_vector_to_pass_as_array = compiler.build.feature.pass_win64_simd_as_arrays && type_flat_is_vector(type);
+	bool type_is_vector_to_pass_as_array = compiler.build.feature.pass_win64_simd_as_arrays && type->type_kind == TYPE_VECTOR;
 	if (type_is_vector_to_pass_as_array || type_is_abi_aggregate(type))
 	{
 		// Not 1, 2, 4, 8? Pass indirect.
 		if (size > 8 || !is_power_of_two(size))
 		{
-			return abi_arg_new_indirect_not_by_val(type);
+			return abi_arg_new_indirect_not_by_val(type, param);
 		}
 		// Coerce to integer.
-		return abi_arg_new_direct_coerce_type_bits(size * 8);
+		return abi_arg_new_direct_coerce_type_bits(size * 8, param);
 	}
 	if (type_is_builtin(type->type_kind))
 	{
 		switch (type->type_kind)
 		{
 			case TYPE_BOOL:
-				return abi_arg_new_direct_int_ext(type_bool);
+				return abi_arg_new_direct_int_ext(type_bool, param);
 			case TYPE_U128:
 			case TYPE_I128:
 				// Pass by val since greater than 8 bytes.
-				if (!is_return) return abi_arg_new_indirect_not_by_val(type);
+				if (!is_return) return abi_arg_new_indirect_not_by_val(type, param);
 				// Make i128 return in XMM0
-				return abi_arg_new_direct_coerce_type_spec(ABI_TYPE_LONG_VEC_2);
+				return abi_arg_new_direct_coerce_type_spec(ABI_TYPE_LONG_VEC_2, param);
 			default:
 				break;
 		}
 	}
 	if (size > 8)
 	{
-		return abi_arg_new_indirect_not_by_val(type);
+		return abi_arg_new_indirect_not_by_val(type, param);
 	}
-	return abi_arg_new_direct();
+	return abi_arg_new_direct(param);
 }
 
-ABIArgInfo *win64_reclassify_hva_arg(Regs *regs, Type *type, ABIArgInfo *info)
+ABIArgInfo *win64_reclassify_hva_arg(Regs *regs, ParamInfo param, ABIArgInfo *info)
 {
 	// Assumes vectorCall calling convention.
 	Type *base = NULL;
 	unsigned elements = 0;
-	type = type_lowering(type);
+	Type *type = type_lowering(param.type);
 	if (!type_is_builtin(type->type_kind) && type->type_kind != TYPE_VECTOR && type_is_homogenous_aggregate(type, &base, &elements))
 	{
 		if (regs->float_regs >= elements)
 		{
 			regs->float_regs -= elements;
-			ABIArgInfo *new_info = abi_arg_new_direct_by_reg(true);
+			ABIArgInfo *new_info = abi_arg_new_direct_by_reg(true, param);
 			return new_info;
 		}
 	}
 	return info;
 }
 
-static void win64_vector_call_args(Regs *regs, FunctionPrototype *prototype, bool is_vector)
+static void win64_vector_call_args(Regs *regs, FunctionPrototype *prototype, ParamInfo *params, unsigned param_count, bool is_vector)
 {
 	static const unsigned max_param_vector_calls_as_reg = 6;
 	unsigned count = 0;
-	Type **params = prototype->param_types;
-	unsigned param_count = vec_size(prototype->param_types);
 	if (param_count)
 	{
 		ABIArgInfo **args = MALLOC(sizeof(ABIArgInfo) * param_count);
 		for (unsigned i = 0; i < param_count; i++)
 		{
-			Type *type = params[i];
 			if (count < max_param_vector_calls_as_reg)
 			{
-				args[i] = win64_classify(regs, type, false, is_vector);
+				args[i] = win64_classify(regs, params[i], false, is_vector);
 			}
 			else
 			{
 				// Cannot be passed in registers pretend no registers.
 				unsigned float_regs = regs->float_regs;
 				regs->float_regs = 0;
-				args[i] = win64_classify(regs, type, false, is_vector);
+				args[i] = win64_classify(regs, params[i], false, is_vector);
 				regs->float_regs = float_regs;
 			}
 			count++;
@@ -122,9 +120,8 @@ static void win64_vector_call_args(Regs *regs, FunctionPrototype *prototype, boo
 	}
 }
 
-ABIArgInfo **win64_create_params(Type **params, Regs *regs, bool is_vector_call)
+ABIArgInfo **win64_create_params(ParamInfo *params, unsigned param_count, Regs *regs,bool is_vector_call)
 {
-	unsigned param_count = vec_size(params);
 	if (!param_count) return NULL;
 	ABIArgInfo **args = MALLOC(sizeof(ABIArgInfo) * param_count);
 	for (unsigned i = 0; i < param_count; i++)
@@ -134,7 +131,7 @@ ABIArgInfo **win64_create_params(Type **params, Regs *regs, bool is_vector_call)
 	return args;
 }
 
-void c_abi_func_create_win64(FunctionPrototype *prototype)
+void c_abi_func_create_win64(FunctionPrototype *prototype, ParamInfo *params, unsigned param_count, ParamInfo *vaargs, unsigned vaarg_count)
 {
 	// allow calling sysv?
 
@@ -152,14 +149,7 @@ void c_abi_func_create_win64(FunctionPrototype *prototype)
 			break;
 	}
 
-	prototype->ret_abi_info = win64_classify(&regs, prototype->abi_ret_type, true, is_vector_call);
-	if (prototype->ret_by_ref)
-	{
-		prototype->ret_by_ref_abi_info = win64_classify(&regs,
-														type_get_ptr(type_lowering(prototype->ret_by_ref_type)),
-														false,
-														is_vector_call);
-	}
+	prototype->ret_abi_info = win64_classify(&regs, prototype->return_info, true, is_vector_call);
 
 	// Set up parameter registers.
 	switch (prototype->call_abi)
@@ -174,10 +164,10 @@ void c_abi_func_create_win64(FunctionPrototype *prototype)
 	}
 	if (is_vector_call)
 	{
-		win64_vector_call_args(&regs, prototype, is_vector_call);
+		win64_vector_call_args(&regs, prototype, params, param_count, is_vector_call);
 		return;
 	}
 
-	prototype->abi_args = win64_create_params(prototype->param_types, &regs, is_vector_call);
-	prototype->abi_varargs = win64_create_params(prototype->varargs, &regs, is_vector_call);
+	prototype->abi_args = win64_create_params(params, param_count, &regs, is_vector_call);
+	prototype->abi_varargs = win64_create_params(vaargs, vaarg_count, &regs, is_vector_call);
 }
\ No newline at end of file
diff --git a/src/compiler/abi/c_abi_x64.c b/src/compiler/abi/c_abi_x64.c
index 2c496c177..03f422848 100644
--- a/src/compiler/abi/c_abi_x64.c
+++ b/src/compiler/abi/c_abi_x64.c
@@ -35,22 +35,22 @@ typedef enum
 	CLASS_SSEUP,
 } X64Class;
 
-static ABIArgInfo *x64_classify_argument_type(Type *type, unsigned free_int_regs, Registers *needed_registers, NamedArgument is_named);
+static ABIArgInfo *x64_classify_argument_type(Type *type, unsigned free_int_regs, Registers *needed_registers, NamedArgument is_named, ParamInfo param);
 static bool x64_type_is_structure(Type *type);
 static void x64_classify(Type *type, ByteSize offset_base, X64Class *lo_class, X64Class *hi_class, NamedArgument named);
 
-ABIArgInfo *x64_indirect_return_result(Type *type)
+ABIArgInfo *x64_indirect_return_result(Type *type, ParamInfo param)
 {
 	if (type_is_abi_aggregate(type))
 	{
-		return abi_arg_new_indirect_not_by_val(type);
+		return abi_arg_new_indirect_not_by_val(type, param);
 	}
 	type = type_lowering(type);
 	if (type_is_promotable_int_bool(type))
 	{
-		return abi_arg_new_direct_int_ext(type);
+		return abi_arg_new_direct_int_ext(type, param);
 	}
-	return abi_arg_new_direct();
+	return abi_arg_new_direct(param);
 }
 
 
@@ -72,7 +72,7 @@ static bool x64_type_is_illegal_vector(Type *type)
 	return false;
 }
 
-ABIArgInfo *x64_indirect_result(Type *type, unsigned free_int_regs)
+ABIArgInfo *x64_indirect_result(Type *type, unsigned free_int_regs, ParamInfo param)
 {
 	// If this is a scalar LLVM value then assume LLVM will pass it in the right
 	// place naturally.
@@ -87,10 +87,10 @@ ABIArgInfo *x64_indirect_result(Type *type, unsigned free_int_regs)
 	{
 		if (type_is_promotable_int_bool(type))
 		{
-			return abi_arg_new_direct_int_ext(type);
+			return abi_arg_new_direct_int_ext(type, param);
 		}
 		// No change, just put it on the stack
-		return abi_arg_new_direct();
+		return abi_arg_new_direct(param);
 	}
 
 	// The byval alignment
@@ -103,14 +103,14 @@ ABIArgInfo *x64_indirect_result(Type *type, unsigned free_int_regs)
 		ByteSize size = type_size(type);
 		if (align <= 8 && size <= 8)
 		{
-			return abi_arg_new_direct_coerce_int();
+			return abi_arg_new_direct_coerce_int(param);
 		}
 	}
 	if (align < 8)
 	{
-		return abi_arg_new_indirect_realigned(8, type);
+		return abi_arg_new_indirect_realigned(8, type, param);
 	}
-	return abi_arg_new_indirect_by_val(type);
+	return abi_arg_new_indirect_by_val(type, param);
 }
 
 
@@ -120,7 +120,7 @@ ABIArgInfo *x64_indirect_result(Type *type, unsigned free_int_regs)
  * @param needed_registers
  * @return
  */
-ABIArgInfo *x64_classify_reg_call_struct_type_check(Type *type, Registers *needed_registers)
+ABIArgInfo *x64_classify_reg_call_struct_type_check(Type *type, Registers *needed_registers, ParamInfo param)
 {
 	ASSERT(x64_type_is_structure(type));
 
@@ -128,40 +128,40 @@ ABIArgInfo *x64_classify_reg_call_struct_type_check(Type *type, Registers *neede
 	if (type->type_kind == TYPE_SLICE || type->type_kind == TYPE_ANY)
 	{
 		needed_registers->int_registers += 2;
-		return abi_arg_new_direct();
+		return abi_arg_new_direct(param);
 	}
 
 	// Struct, err type handled =>
 	ASSERT(type->type_kind == TYPE_STRUCT);
 
 	// Variable array structs are always passed by pointer.
-	if (type->decl->has_variable_array) return x64_indirect_return_result(type);
+	if (type->decl->has_variable_array) return x64_indirect_return_result(type, param);
 
 	FOREACH(Decl *, member, type->decl->strukt.members)
 	{
-		Type *member_type = type_lowering(member->type->canonical);
+		Type *member_type = lowered_member_type(member);
 		ABIArgInfo *member_info;
 		Registers temp_needed_registers = { 0, 0 };
 		if (x64_type_is_structure(member_type))
 		{
 			// Recursively check the structure.
-			member_info = x64_classify_reg_call_struct_type_check(member_type, &temp_needed_registers);
+			member_info = x64_classify_reg_call_struct_type_check(member_type, &temp_needed_registers, param);
 		}
 		else
 		{
 			// Pass as single argument.
-			member_info = x64_classify_argument_type(member_type, ~(0U), &temp_needed_registers, NAMED);
+			member_info = x64_classify_argument_type(member_type, ~(0U), &temp_needed_registers, NAMED, param);
 		}
 		if (abi_arg_is_indirect(member_info))
 		{
 			*needed_registers = (Registers) { 0, 0 };
-			return x64_indirect_return_result(type);
+			return x64_indirect_return_result(type, param);
 		}
 		needed_registers->sse_registers += temp_needed_registers.sse_registers;
 		needed_registers->int_registers += temp_needed_registers.int_registers;
 	}
 	// Send as direct.
-	return abi_arg_new_direct();
+	return abi_arg_new_direct(param);
 }
 
 
@@ -232,8 +232,9 @@ void x64_classify_struct_union(Type *type, ByteSize offset_base, X64Class *curre
 		// The only case a 256-bit or a 512-bit wide vector could be used is when
 		// the struct contains a single 256-bit or 512-bit field. Early check
 		// and fallback to memory.
+		Type *member_type = lowered_member_type(member);
 		if (size > 16 &&
-			((!is_union && size != type_size(member->type))
+			((!is_union && size != type_size(member_type))
 			|| size > compiler.platform.x64.native_vector_size_avx))
 		{
 			*lo_class = CLASS_MEMORY;
@@ -241,7 +242,7 @@ void x64_classify_struct_union(Type *type, ByteSize offset_base, X64Class *curre
 			return;
 		}
 		// Not aligned?
-		if (offset % type_abi_alignment(member->type))
+		if (offset % type_abi_alignment(member_type))
 		{
 			*lo_class = CLASS_MEMORY;
 			x64_classify_post_merge(size, lo_class, hi_class);
@@ -250,7 +251,7 @@ void x64_classify_struct_union(Type *type, ByteSize offset_base, X64Class *curre
 
 		X64Class field_lo;
 		X64Class field_hi;
-		x64_classify(member->type, offset, &field_lo, &field_hi, named_arg);
+		x64_classify(member_type, offset, &field_lo, &field_hi, named_arg);
 		*lo_class = x64_merge(*lo_class, field_lo);
 		*hi_class = x64_merge(*hi_class, field_hi);
 		if (*lo_class == CLASS_MEMORY || *hi_class == CLASS_MEMORY) break;
@@ -262,7 +263,7 @@ void x64_classify_struct_union(Type *type, ByteSize offset_base, X64Class *curre
 void x64_classify_array(Type *type, ByteSize offset_base, X64Class *current, X64Class *lo_class, X64Class *hi_class, NamedArgument named_arg)
 {
 	ByteSize size = type_size(type);
-	Type *element = type->array.base;
+	Type *element = type_lowering(type->array.base);
 	ByteSize element_size = type_size(element);
 	// Bigger than 64 bytes => MEM
 	if (size > 64) return;
@@ -432,14 +433,15 @@ bool x64_bits_contain_no_user_data(Type *type, unsigned start, unsigned end)
 	if (type->type_kind == TYPE_ARRAY)
 	{
 		// Check each field to see if the field overlaps with the queried range.
-		TypeSize element_size = type_size(type->array.base);
+		Type *element_type = lowered_array_element_type(type);
+		TypeSize element_size = type_size(element_type);
 		for (unsigned i = 0; i < type->array.len; i++)
 		{
 			// If the field is after the span we care about, then we're done..
 			TypeSize offset = i * element_size;
 			if (offset >= end) break;
 			unsigned element_start = offset < start ? start - offset : 0;
-			if (!x64_bits_contain_no_user_data(type->array.base, element_start, end - offset)) return false;
+			if (!x64_bits_contain_no_user_data(element_type, element_start, end - offset)) return false;
 		}
 		// No overlap
 		return true;
@@ -451,7 +453,7 @@ bool x64_bits_contain_no_user_data(Type *type, unsigned start, unsigned end)
 			unsigned offset = member->offset;
 			if (offset >= end) break;
 			unsigned field_start = offset < start ? start - offset : 0;
-			if (!x64_bits_contain_no_user_data(member->type, field_start, end - offset)) return false;
+			if (!x64_bits_contain_no_user_data(lowered_member_type(member), field_start, end - offset)) return false;
 		}
 		// No overlap
 		return true;
@@ -459,7 +461,7 @@ bool x64_bits_contain_no_user_data(Type *type, unsigned start, unsigned end)
 	return false;
 }
 
-bool x64_contains_float_at_offset(Type *type, unsigned offset)
+bool x64_contains_float_at_offset(LoweredType *type, unsigned offset)
 {
 	if (offset == 0 && type->type_kind == TYPE_F32) return true;
 
@@ -468,11 +470,11 @@ bool x64_contains_float_at_offset(Type *type, unsigned offset)
 	{
 		Decl *member = x64_get_member_at_offset(type->decl, offset);
 		offset -= member->offset;
-		return x64_contains_float_at_offset(member->type, offset);
+		return x64_contains_float_at_offset(lowered_member_type(member), offset);
 	}
 	if (type->type_kind == TYPE_ARRAY)
 	{
-		Type *element_type = type->array.base;
+		Type *element_type = lowered_array_element_type(type);
 		unsigned element_size = type_size(element_type);
 		offset -= (offset / element_size) * element_size;
 		return x64_contains_float_at_offset(element_type, offset);
@@ -490,7 +492,7 @@ static Type *x64_get_fp_type_at_offset(Type *type, unsigned ir_offset)
 	}
 	if (type->type_kind == TYPE_ARRAY)
 	{
-		Type *element_type = type_lowering(type->array.base);
+		Type *element_type = lowered_array_element_type(type);
 		ByteSize size = type_size(element_type);
 		return x64_get_fp_type_at_offset(element_type, ir_offset - size * (ir_offset / size));
 	}
@@ -584,7 +586,7 @@ AbiType x64_get_int_type_at_offset(Type *type, unsigned offset, Type *source_typ
 			Decl *member = x64_get_member_at_offset(type->decl, offset);
 			if (member)
 			{
-				return x64_get_int_type_at_offset(member->type, offset - member->offset, source_type, source_offset);
+				return x64_get_int_type_at_offset(lowered_member_type(member), offset - member->offset, source_type, source_offset);
 			}
 			break;
 		}
@@ -600,7 +602,7 @@ AbiType x64_get_int_type_at_offset(Type *type, unsigned offset, Type *source_typ
 			UNREACHABLE_VOID
 		case TYPE_ARRAY:
 		{
-			Type *element = type->array.base;
+			Type *element = lowered_array_element_type(type);
 			TypeSize element_size = type_size(element);
 			TypeSize element_offset = (offset / element_size) * element_size;
 			return x64_get_int_type_at_offset(element, offset - element_offset, source_type, source_offset);
@@ -626,7 +628,7 @@ static AbiType x64_get_byte_vector_type(Type *type)
 {
 	// Wrapper structs/arrays that only contain vectors are passed just like
 	// vectors; strip them off if present.
-	Type *inner_type = type_abi_find_single_struct_element(type);
+	Type *inner_type = type_abi_find_single_struct_element(type, true);
 	if (inner_type) type = inner_type;
 	type = type_lowering(type);
 
@@ -660,17 +662,18 @@ static AbiType x64_get_byte_vector_type(Type *type)
 	}
 }
 
-static ABIArgInfo *x64_get_argument_pair_return(AbiType low_type, AbiType high_type)
+static ABIArgInfo *x64_get_argument_pair_return(AbiType low_type, AbiType high_type, ParamInfo param)
 {
 	TypeSize low_size = abi_type_size(low_type);
 	unsigned hi_start = aligned_offset(low_size, abi_type_abi_alignment(high_type));
 	ASSERT(hi_start == 8 && "Expected aligned with C-style structs.");
-	return abi_arg_new_direct_pair(low_type, high_type);
+	return abi_arg_new_direct_pair(low_type, high_type, param);
 }
 
 
-ABIArgInfo *x64_classify_return(Type *return_type)
+ABIArgInfo *x64_classify_return(ParamInfo param)
 {
+	Type *return_type = type_lowering(param.type);
 	// AMD64-ABI 3.2.3p4: Rule 1. Classify the return type with the
 	// classification algorithm.
 	X64Class hi_class;
@@ -697,14 +700,14 @@ ABIArgInfo *x64_classify_return(Type *return_type)
 		case CLASS_MEMORY:
 			// AMD64-ABI 3.2.3p4: Rule 2. Types of class memory are returned via
 			// hidden argument.
-			return x64_indirect_return_result(return_type);
+			return x64_indirect_return_result(return_type, param);
 		case CLASS_INTEGER:
 			// AMD64-ABI 3.2.3p4: Rule 3. If the class is INTEGER, the next
 			// available register of the sequence %rax, %rdx is used.
 			result_type = x64_get_int_type_at_offset(return_type, 0, return_type, 0);
 			if (hi_class == CLASS_NO_CLASS && type_is_promotable_int_bool(return_type))
 			{
-				return abi_arg_new_direct_coerce_int_ext(return_type);
+				return abi_arg_new_direct_coerce_int_ext(return_type, param);
 			}
 			break;
 		case CLASS_SSE:
@@ -743,13 +746,13 @@ ABIArgInfo *x64_classify_return(Type *return_type)
 	// If a high part was specified, merge it together with the low part.  It is
 	// known to pass in the high eightbyte of the result.  We do this by forming a
 	// first class struct aggregate with the high and low part: {low, high}
-	if (abi_type_is_valid(high_part)) return x64_get_argument_pair_return(result_type, high_part);
+	if (abi_type_is_valid(high_part)) return x64_get_argument_pair_return(result_type, high_part, param);
 
 	if (abi_type_match(result_type, return_type))
 	{
-		return abi_arg_new_direct();
+		return abi_arg_new_direct(param);
 	}
-	return abi_arg_new_direct_coerce_type(result_type);
+	return abi_arg_new_direct_coerce_type(result_type, param);
 }
 
 /**
@@ -762,7 +765,7 @@ ABIArgInfo *x64_classify_return(Type *return_type)
  * @param is_named
  * @return
  */
-static ABIArgInfo *x64_classify_argument_type(Type *type, unsigned free_int_regs, Registers *needed_registers, NamedArgument is_named)
+static ABIArgInfo *x64_classify_argument_type(Type *type, unsigned free_int_regs, Registers *needed_registers, NamedArgument is_named, ParamInfo param)
 {
 	ASSERT(type == type_lowering(type));
 	X64Class hi_class;
@@ -786,14 +789,14 @@ static ABIArgInfo *x64_classify_argument_type(Type *type, unsigned free_int_regs
 		case CLASS_SSEUP:
 			UNREACHABLE
 		case CLASS_MEMORY:
-			return x64_indirect_result(type, free_int_regs);
+			return x64_indirect_result(type, free_int_regs, param);
 		case CLASS_INTEGER:
 			needed_registers->int_registers++;
 			result_type = x64_get_int_type_at_offset(type, 0, type, 0);
 			if (hi_class == CLASS_NO_CLASS && type_is_promotable_int_bool(type))
 			{
 				ASSERT(abi_type_is_type(result_type));
-				return abi_arg_new_direct_coerce_int_ext(result_type.type);
+				return abi_arg_new_direct_coerce_int_ext(result_type.type, param);
 			}
 			break;
 		case CLASS_SSE:
@@ -830,21 +833,21 @@ static ABIArgInfo *x64_classify_argument_type(Type *type, unsigned free_int_regs
 	// If a high part was specified, merge it together with the low part.  It is
 	// known to pass in the high eightbyte of the result.  We do this by forming a
 	// first class struct aggregate with the high and low part: {low, high}
-	if (abi_type_is_valid(high_part)) return x64_get_argument_pair_return(result_type, high_part);
+	if (abi_type_is_valid(high_part)) return x64_get_argument_pair_return(result_type, high_part, param);
 
 	if (abi_type_match(result_type, type))
 	{
-		return abi_arg_new_direct();
+		return abi_arg_new_direct(param);
 	}
 	if (abi_type_is_type(result_type))
 	{
 		Type *result = result_type.type->canonical;
 		if (type_is_integer(type) && type_is_integer(result) && type->builtin.bytesize == result->builtin.bytesize)
 		{
-			return abi_arg_new_direct();
+			return abi_arg_new_direct(param);
 		}
 	}
-	return abi_arg_new_direct_coerce_type(result_type);
+	return abi_arg_new_direct_coerce_type(result_type, param);
 }
 
 bool x64_type_is_structure(Type *type)
@@ -862,31 +865,30 @@ bool x64_type_is_structure(Type *type)
 
 /**
  * This code is based on the loop operations in X86_64ABIInfo::computeInfo in Clang
- * @param type
+ * @param param param info
  * @param available_registers to update
- * @param is_regcall true if this is a regcall
  * @param named whether this is a named (non-vararg) parameter or not.
  * @return the calculated ABI
  */
-static ABIArgInfo *x64_classify_parameter(Type *type, Registers *available_registers, NamedArgument named)
+static ABIArgInfo *x64_classify_parameter(ParamInfo param, Registers *available_registers, NamedArgument named)
 {
 	Registers needed_registers = { 0, 0 };
-	type = type_lowering(type);
-	ABIArgInfo *info = x64_classify_argument_type(type, available_registers->int_registers, &needed_registers, named);
+	Type *type = type_lowering(param.type);
+	ABIArgInfo *info = x64_classify_argument_type(type, available_registers->int_registers, &needed_registers, named, param);
 
 	// Check if we can fit in a register, we're golden.
 	if (try_use_registers(available_registers, &needed_registers)) return info;
 
 	// The rest needs to be passed indirectly.
-	return x64_indirect_result(type, available_registers->int_registers);
+	return x64_indirect_result(type, available_registers->int_registers, param);
 
 }
 
-void c_abi_func_create_x64(FunctionPrototype *prototype)
+void c_abi_func_create_x64(FunctionPrototype *prototype, ParamInfo *params, unsigned param_count, ParamInfo *vaargs, unsigned vaarg_count)
 {
 	if (prototype->use_win64)
 	{
-		c_abi_func_create_win64(prototype);
+		c_abi_func_create_win64(prototype, params, param_count, vaargs, vaarg_count);
 		return;
 	}
 
@@ -895,18 +897,9 @@ void c_abi_func_create_x64(FunctionPrototype *prototype)
 			.sse_registers = 8
 	};
 
-	prototype->ret_abi_info = x64_classify_return(type_lowering(prototype->abi_ret_type));
+	prototype->ret_abi_info = x64_classify_return(prototype->return_info);
 	if (abi_arg_is_indirect(prototype->ret_abi_info)) available_registers.int_registers--;
 
-	if (prototype->ret_by_ref)
-	{
-		prototype->ret_by_ref_abi_info = x64_classify_parameter(type_get_ptr(type_lowering(prototype->ret_by_ref_type)),
-																&available_registers,
-																NAMED);
-	}
-
-	Type **params = prototype->param_types;
-	unsigned param_count = vec_size(prototype->param_types);
 	if (param_count)
 	{
 		ABIArgInfo **args = MALLOC(sizeof(ABIArgInfo) * param_count);
@@ -916,13 +909,12 @@ void c_abi_func_create_x64(FunctionPrototype *prototype)
 		}
 		prototype->abi_args = args;
 	}
-	unsigned vararg_count = vec_size(prototype->varargs);
-	if (vararg_count)
+	if (vaarg_count)
 	{
-		ABIArgInfo **args = MALLOC(sizeof(ABIArgInfo) * vararg_count);
-		for (unsigned i = 0; i < vararg_count; i++)
+		ABIArgInfo **args = MALLOC(sizeof(ABIArgInfo) * vaarg_count);
+		for (unsigned i = 0; i < vaarg_count; i++)
 		{
-			args[i] = x64_classify_parameter(prototype->varargs[i], &available_registers, UNNAMED);
+			args[i] = x64_classify_parameter(vaargs[i], &available_registers, UNNAMED);
 		}
 		prototype->abi_varargs = args;
 	}
diff --git a/src/compiler/abi/c_abi_x86.c b/src/compiler/abi/c_abi_x86.c
index e43250db3..0869c73de 100644
--- a/src/compiler/abi/c_abi_x86.c
+++ b/src/compiler/abi/c_abi_x86.c
@@ -8,7 +8,7 @@
 
 static bool x86_try_use_free_regs(Regs *regs, Type *type);
 
-static ABIArgInfo **x86_create_params(CallABI abi, Type **p_type, Regs *ptr);
+static ABIArgInfo **x86_create_params(CallABI abi, ParamInfo *params, unsigned param_count, Regs *ptr);
 
 static inline bool type_is_simd_vector(Type *type)
 {
@@ -23,7 +23,7 @@ static bool type_is_union_struct_with_simd_vector(Type *type)
 	Decl **members = type->decl->strukt.members;
 	FOREACH(Decl *, member, members)
 	{
-		Type *member_type = type_lowering(member->type);
+		Type *member_type = lowered_member_type(member);
 		if (type_is_simd_vector(member_type)) return true;
 		if (type_is_union_struct_with_simd_vector(member_type)) return true;
 	}
@@ -45,11 +45,11 @@ static unsigned x86_stack_alignment(Type *type, unsigned alignment)
 }
 
 
-static ABIArgInfo *x86_create_indirect_result(Regs *regs, Type *type, ByVal by_val)
+static ABIArgInfo *x86_create_indirect_result(Regs *regs, Type *type, ByVal by_val, ParamInfo param)
 {
 	if (by_val != BY_VAL)
 	{
-		ABIArgInfo *info = abi_arg_new_indirect_not_by_val(type);
+		ABIArgInfo *info = abi_arg_new_indirect_not_by_val(type, param);
 
 		if (regs->int_regs)
 		{
@@ -71,16 +71,16 @@ static ABIArgInfo *x86_create_indirect_result(Regs *regs, Type *type, ByVal by_v
 	// Realign if alignment is greater.
 	if (alignment > stack_alignment)
 	{
-		return abi_arg_new_indirect_realigned(stack_alignment, type);
+		return abi_arg_new_indirect_realigned(stack_alignment, type, param);
 	}
 
-	return abi_arg_new_indirect_by_val(type);
+	return abi_arg_new_indirect_by_val(type, param);
 }
 
 
-static ABIArgInfo *create_indirect_return_x86(Type *type, Regs *regs)
+static ABIArgInfo *create_indirect_return_x86(Type *type, Regs *regs, ParamInfo param)
 {
-	ABIArgInfo *info = abi_arg_new_indirect_not_by_val(type);
+	ABIArgInfo *info = abi_arg_new_indirect_not_by_val(type, param);
 	if (!regs->int_regs) return info;
 	// Consume a register for the return.
 	regs->int_regs--;
@@ -132,7 +132,7 @@ static bool x86_should_return_type_in_reg(Type *type)
 			return true;
 		case TYPE_ARRAY:
 			// Small arrays <= 8 bytes.
-			return x86_should_return_type_in_reg(type->array.base);
+			return x86_should_return_type_in_reg(lowered_array_element_type(type));
 		case TYPE_STRUCT:
 		case TYPE_UNION:
 			// Handle below
@@ -143,8 +143,7 @@ static bool x86_should_return_type_in_reg(Type *type)
 	Decl** members = type->decl->strukt.members;
 	FOREACH(Decl *, member, members)
 	{
-		Type *member_type = member->type->canonical;
-		if (!x86_should_return_type_in_reg(member_type)) return false;
+		if (!x86_should_return_type_in_reg(lowered_member_type(member))) return false;
 	}
 	return true;
 }
@@ -153,13 +152,13 @@ static bool x86_should_return_type_in_reg(Type *type)
  * This code is based on X86_32ABIInfo::classifyReturnType in Clang.
  * @param call convention used.
  * @param regs registers available
- * @param type type of the return.
+ * @param param type of the return.
  * @return
  */
-ABIArgInfo *x86_classify_return(CallABI call, Regs *regs, Type *type)
+ABIArgInfo *x86_classify_return(CallABI call, Regs *regs, ParamInfo param)
 {
 	// 1. Lower any type like enum etc.
-	type = type_lowering(type);
+	Type *type = type_lowering(param.type);
 
 	// 2. Void is ignored
 	if (type_is_void(type)) return abi_arg_ignore();
@@ -169,49 +168,49 @@ ABIArgInfo *x86_classify_return(CallABI call, Regs *regs, Type *type)
 	Type *base = NULL;
 	unsigned elements = 0;
 
-	if (type->type_kind == TYPE_VECTOR) return abi_arg_new_direct();
+	if (type->type_kind == TYPE_VECTOR) return abi_arg_new_direct(param);
 
 	if (type_is_abi_aggregate(type))
 	{
 		// Structs with variable arrays are always indirect.
 		if (type_is_union_or_strukt(type) && type->decl->has_variable_array)
 		{
-			return create_indirect_return_x86(type, regs);
+			return create_indirect_return_x86(type, regs, param);
 		}
 
 		// Check if we can return it in a register.
 		if (x86_should_return_type_in_reg(type))
 		{
 			// Special case is floats and pointers in single field structs (except for MSVC)
-			Type *single_element = type_abi_find_single_struct_element(type);
+			Type *single_element = type_abi_find_single_struct_element(type, true);
 			if (single_element)
 			{
 				if (type_is_float(single_element))
 				{
-					return abi_arg_new_expand();
+					return abi_arg_new_expand(param);
 				}
 				if (type_is_pointer_type(type))
 				{
-					return abi_arg_new_expand();
+					return abi_arg_new_expand(param);
 				}
 			}
 			// This is not a single field struct, so we wrap it in an int.
-			return abi_arg_new_direct_coerce_int();
+			return abi_arg_new_direct_coerce_int(param);
 		}
-		return create_indirect_return_x86(type, regs);
+		return create_indirect_return_x86(type, regs, param);
 	}
 
 	// Is this small enough to need to be extended?
 	if (type_is_promotable_int_bool(type))
 	{
-		return abi_arg_new_direct_int_ext(type);
+		return abi_arg_new_direct_int_ext(type, param);
 	}
 
 	// If we support something like int128, then this is an indirect return.
-	if (type_is_integer(type) && type_size(type) > 8) return create_indirect_return_x86(type, regs);
+	if (type_is_integer(type) && type_size(type) > 8) return create_indirect_return_x86(type, regs, param);
 
 	// Otherwise we expect to just pass this nicely in the return.
-	return abi_arg_new_direct();
+	return abi_arg_new_direct(param);
 
 }
 
@@ -219,8 +218,9 @@ static inline bool x86_is_mmxtype(Type *type)
 {
 	// Return true if the type is an MMX type <2 x i32>, <4 x i16>, or <8 x i8>.
 	if (type->type_kind != TYPE_VECTOR) return false;
-	if (type_size(type->array.base) >= 8) return false;
-	if (!type_is_integer(type->array.base)) return false;
+	Type *element = lowered_array_element_type(type);
+	if (type_size(element) >= 8) return false;
+	if (!type_is_integer(element)) return false;
 	return type_size(type) == 8;
 }
 
@@ -239,7 +239,7 @@ static inline bool x86_can_expand_indirect_aggregate_arg(Type *type)
 	Decl **members = type->decl->strukt.members;
 	FOREACH(Decl *, member, members)
 	{
-		Type *member_type = type_lowering(member->type);
+		Type *member_type = lowered_member_type(member);
 		switch (member_type->type_kind)
 		{
 			case TYPE_I32:
@@ -320,7 +320,7 @@ static bool x86_try_put_primitive_in_reg(CallABI call, Regs *regs, Type *type)
 /**
  * Handle the vector/regcalls with HVAs.
  */
-UNUSED static inline ABIArgInfo *x86_classify_homogenous_aggregate(Regs *regs, Type *type, unsigned elements, bool is_vec_call)
+UNUSED static inline ABIArgInfo *x86_classify_homogenous_aggregate(Regs *regs, Type *type, unsigned elements, bool is_vec_call, ParamInfo param)
 {
 	// We now know it's a float/double or a vector,
 	// since only those are valid for x86
@@ -330,7 +330,7 @@ UNUSED static inline ABIArgInfo *x86_classify_homogenous_aggregate(Regs *regs, T
 	// just send this by pointer.
 	if (regs->float_regs < elements)
 	{
-		return x86_create_indirect_result(regs, type, BY_VAL_SKIP);
+		return x86_create_indirect_result(regs, type, BY_VAL_SKIP, param);
 	}
 
 	// Use the SSE registers.
@@ -340,29 +340,29 @@ UNUSED static inline ABIArgInfo *x86_classify_homogenous_aggregate(Regs *regs, T
 	// don't flatten.
 	if (is_vec_call)
 	{
-		return abi_arg_new_direct_by_reg(true);
+		return abi_arg_new_direct_by_reg(true, param);
 	}
 
 	// If it is a builtin, then expansion is not needed.
 	if (type_is_builtin(type->type_kind) || type->type_kind == TYPE_VECTOR)
 	{
-		return abi_arg_new_direct();
+		return abi_arg_new_direct(param);
 	}
 
 	// Otherwise just a normal expand.
-	return abi_arg_new_expand();
+	return abi_arg_new_expand(param);
 }
 
-static inline ABIArgInfo *x86_classify_vector(Regs *regs, Type *type)
+static inline ABIArgInfo *x86_classify_vector(Regs *regs, Type *type, ParamInfo param)
 {
 	// MMX passed as i64
 	if (x86_is_mmxtype(type))
 	{
-		return abi_arg_new_direct_coerce_type_bits(64);
+		return abi_arg_new_direct_coerce_type_bits(64, param);
 	}
 
 	// Send as a normal parameter
-	return abi_arg_new_direct();
+	return abi_arg_new_direct(param);
 }
 
 /**
@@ -370,7 +370,7 @@ static inline ABIArgInfo *x86_classify_vector(Regs *regs, Type *type)
  * error type, struct, union, slice,
  * string, array, error union, complex.
  */
-static inline ABIArgInfo *x86_classify_aggregate(CallABI call, Regs *regs, Type *type)
+static inline ABIArgInfo *x86_classify_aggregate(CallABI call, Regs *regs, Type *type, ParamInfo param)
 {
 	// Only called for aggregates.
 	ASSERT(type_is_abi_aggregate(type));
@@ -378,7 +378,7 @@ static inline ABIArgInfo *x86_classify_aggregate(CallABI call, Regs *regs, Type
 	if (type_is_union_or_strukt(type) && type->decl->has_variable_array)
 	{
 		// TODO, check why this should not be by_val
-		return x86_create_indirect_result(regs, type, BY_VAL);
+		return x86_create_indirect_result(regs, type, BY_VAL, param);
 	}
 
 	unsigned size = type_size(type);
@@ -394,11 +394,11 @@ static inline ABIArgInfo *x86_classify_aggregate(CallABI call, Regs *regs, Type
 		ABIArgInfo *info;
 		if (size_in_regs > 1)
 		{
-			info = abi_arg_new_direct_struct_expand_i32((uint8_t)size_in_regs);
+			info = abi_arg_new_direct_struct_expand_i32((uint8_t)size_in_regs, param);
 		}
 		else
 		{
-			info = abi_arg_new_direct_coerce_type_bits(32);
+			info = abi_arg_new_direct_coerce_type_bits(32, param);
 		}
 		// Not in reg on MCU
 		if (!compiler.platform.x86.is_mcu_api) info->attributes.by_reg = true;
@@ -414,44 +414,38 @@ static inline ABIArgInfo *x86_classify_aggregate(CallABI call, Regs *regs, Type
 	if (size <= 16 && (!compiler.platform.x86.is_mcu_api || !regs->int_regs) &&
 		x86_can_expand_indirect_aggregate_arg(type))
 	{
-		return abi_arg_new_expand();
+		return abi_arg_new_expand(param);
 	}
-	return x86_create_indirect_result(regs, type, BY_VAL);
+	return x86_create_indirect_result(regs, type, BY_VAL, param);
 }
 
 /**
  * Pointer / Vararray / int / float / bool
- * @param context
- * @param type
- * @return
  */
-static ABIArgInfo *x86_classify_primitives(CallABI call, Regs *regs, Type *type)
+static ABIArgInfo *x86_classify_primitives(CallABI call, Regs *regs, Type *type, ParamInfo param)
 {
 	// f128 i128 u128 on stack.
-	if (type_size(type) > 8) return x86_create_indirect_result(regs, type, BY_VAL_SKIP);
+	if (type_size(type) > 8) return x86_create_indirect_result(regs, type, BY_VAL_SKIP, param);
 
 	bool in_reg = x86_try_put_primitive_in_reg(call, regs, type);
 
 	if (type_is_promotable_int_bool(type))
 	{
-		return abi_arg_new_direct_int_ext_by_reg(type, in_reg);
+		return abi_arg_new_direct_int_ext_by_reg(type, in_reg, param);
 	}
 
-	return abi_arg_new_direct_by_reg(in_reg);
+	return abi_arg_new_direct_by_reg(in_reg, param);
 }
 
 /**
  * Classify an argument to an x86 function.
  */
-static ABIArgInfo *x86_classify_argument(CallABI call, Regs *regs, Type *type)
+static ABIArgInfo *x86_classify_argument(CallABI call, Regs *regs, ParamInfo param)
 {
 	// FIXME: Set alignment on indirect arguments.
 
 	// We lower all types here first to avoid enums and typedefs.
-	type = type_lowering(type);
-
-	Type *base = NULL;
-	unsigned elements = 0;
+	Type *type = type_lowering(param.type);
 
 	switch (type->type_kind)
 	{
@@ -465,23 +459,22 @@ static ABIArgInfo *x86_classify_argument(CallABI call, Regs *regs, Type *type)
 		case TYPE_BOOL:
 		case TYPE_FUNC_PTR:
 		case TYPE_POINTER:
-			return x86_classify_primitives(call, regs, type);
+			return x86_classify_primitives(call, regs, type, param);
 		case TYPE_VECTOR:
-			return x86_classify_vector(regs, type);
+			return x86_classify_vector(regs, type, param);
 		case TYPE_STRUCT:
 		case TYPE_UNION:
 		case TYPE_SLICE:
 		case TYPE_ANY:
 		case TYPE_ARRAY:
-			return x86_classify_aggregate(call, regs, type);
+			return x86_classify_aggregate(call, regs, type, param);
 			UNREACHABLE
 	}
 	UNREACHABLE
 }
 
-static ABIArgInfo **x86_create_params(CallABI abi, Type **params, Regs *regs)
+static ABIArgInfo **x86_create_params(CallABI abi, ParamInfo *params, unsigned param_count, Regs *regs)
 {
-	unsigned param_count = vec_size(params);
 	if (!param_count) return NULL;
 	ABIArgInfo **args = MALLOC(sizeof(ABIArgInfo) * param_count);
 	for (unsigned i = 0; i < param_count; i++)
@@ -491,7 +484,7 @@ static ABIArgInfo **x86_create_params(CallABI abi, Type **params, Regs *regs)
 	return args;
 }
 
-void c_abi_func_create_x86(FunctionPrototype *prototype)
+void c_abi_func_create_x86(FunctionPrototype *prototype, ParamInfo *params, unsigned param_count, ParamInfo *vaargs, unsigned vaarg_count)
 {
 	// 1. Calculate the registers we have available
 	//    Normal: 0 / 0 (3 on win32 struct ABI)
@@ -516,11 +509,7 @@ void c_abi_func_create_x86(FunctionPrototype *prototype)
 
 	// 4. Classify the return type. In the case of optional, we need to classify the optional itself as the
 	//    return type.
-	prototype->ret_abi_info = x86_classify_return(prototype->call_abi, &regs, prototype->abi_ret_type);
-	if (prototype->ret_by_ref)
-	{
-		prototype->ret_by_ref_abi_info = x86_classify_argument(prototype->call_abi, &regs, type_get_ptr(type_lowering(prototype->ret_by_ref_type)));
-	}
+	prototype->ret_abi_info = x86_classify_return(prototype->call_abi, &regs, prototype->return_info);
 
 	/*
 	 * // The chain argument effectively gives us another free register.
@@ -533,8 +522,8 @@ void c_abi_func_create_x86(FunctionPrototype *prototype)
 	runVectorCallFirstPass(FI, State);
 	 */
 
-	prototype->abi_args = x86_create_params(prototype->call_abi, prototype->param_types, &regs);
-	prototype->abi_varargs = x86_create_params(prototype->call_abi, prototype->varargs, &regs);
+	prototype->abi_args = x86_create_params(prototype->call_abi, params, param_count, &regs);
+	prototype->abi_varargs = x86_create_params(prototype->call_abi, vaargs, vaarg_count, &regs);
 }
 
 
diff --git a/src/compiler/c_abi_internal.h b/src/compiler/c_abi_internal.h
index edd0b9930..16698a168 100644
--- a/src/compiler/c_abi_internal.h
+++ b/src/compiler/c_abi_internal.h
@@ -13,23 +13,23 @@ typedef enum
 
 bool abi_arg_is_indirect(ABIArgInfo *info);
 ABIArgInfo *abi_arg_ignore(void);
-ABIArgInfo *abi_arg_new_direct_pair(AbiType low_type, AbiType high_type);
-ABIArgInfo *abi_arg_new_direct(void);
-ABIArgInfo *abi_arg_new_direct_by_reg(bool by_reg);
-ABIArgInfo *abi_arg_new_expand(void);
-ABIArgInfo *abi_arg_new_direct_int_ext(Type *type_to_extend);
-ABIArgInfo *abi_arg_new_direct_int_ext_by_reg(Type *int_to_extend, bool by_reg);
-ABIArgInfo *abi_arg_new_direct_coerce_int_ext_by_reg(Type *int_to_extend, bool by_reg);
-ABIArgInfo *abi_arg_new_direct_coerce_int_ext(Type *int_to_extend);
-ABIArgInfo *abi_arg_new_direct_coerce_int(void);
-ABIArgInfo *abi_arg_new_direct_coerce_type(AbiType type);
-ABIArgInfo *abi_arg_new_direct_coerce_type_spec(AbiSpecType type);
-ABIArgInfo *abi_arg_new_direct_coerce_type_bits(int bits);
-ABIArgInfo *abi_arg_new_direct_struct_expand_i32(uint8_t elements);
-ABIArgInfo *abi_arg_new_expand_coerce_pair(Type *first_element, Type *second_element, unsigned hi_offset, bool packed);
-ABIArgInfo *abi_arg_new_indirect_realigned(AlignSize alignment, Type *by_val_type);
-ABIArgInfo *abi_arg_new_indirect_by_val(Type *by_val_type);
-ABIArgInfo *abi_arg_new_indirect_not_by_val(Type *type);
+ABIArgInfo *abi_arg_new_direct_pair(AbiType low_type, AbiType high_type, ParamInfo param);
+ABIArgInfo *abi_arg_new_direct(ParamInfo param);
+ABIArgInfo *abi_arg_new_direct_by_reg(bool by_reg, ParamInfo param);
+ABIArgInfo *abi_arg_new_expand(ParamInfo param);
+ABIArgInfo *abi_arg_new_direct_int_ext(Type *type_to_extend, ParamInfo param);
+ABIArgInfo *abi_arg_new_direct_int_ext_by_reg(Type *int_to_extend, bool by_reg, ParamInfo param);
+ABIArgInfo *abi_arg_new_direct_coerce_int_ext_by_reg(Type *int_to_extend, bool by_reg, ParamInfo param);
+ABIArgInfo *abi_arg_new_direct_coerce_int_ext(Type *int_to_extend, ParamInfo param);
+ABIArgInfo *abi_arg_new_direct_coerce_int(ParamInfo param);
+ABIArgInfo *abi_arg_new_direct_coerce_type(AbiType type, ParamInfo param);
+ABIArgInfo *abi_arg_new_direct_coerce_type_spec(AbiSpecType type, ParamInfo param);
+ABIArgInfo *abi_arg_new_direct_coerce_type_bits(int bits, ParamInfo param);
+ABIArgInfo *abi_arg_new_direct_struct_expand_i32(uint8_t elements, ParamInfo param);
+ABIArgInfo *abi_arg_new_expand_coerce_pair(Type *first_element, Type *second_element, unsigned hi_offset, bool packed, ParamInfo param);
+ABIArgInfo *abi_arg_new_indirect_realigned(AlignSize alignment, Type *by_val_type, ParamInfo param);
+ABIArgInfo *abi_arg_new_indirect_by_val(Type *by_val_type, ParamInfo param);
+ABIArgInfo *abi_arg_new_indirect_not_by_val(Type *type, ParamInfo param);
 
 AlignSize abi_type_abi_alignment(AbiType type);
 bool abi_type_is_integer(AbiType type);
@@ -47,15 +47,16 @@ typedef struct
 
 
 
-ABIArgInfo *c_abi_classify_return_type_default(Type *type);
-ABIArgInfo *c_abi_classify_argument_type_default(Type *type);
-void c_abi_func_create_win64(FunctionPrototype *prototype);
-void c_abi_func_create_x86(FunctionPrototype *prototype);
-void c_abi_func_create_x64(FunctionPrototype *prototype);
-void c_abi_func_create_aarch64(FunctionPrototype *prototype);
-void c_abi_func_create_riscv(FunctionPrototype *prototype);
-void c_abi_func_create_wasm(FunctionPrototype *prototype);
-void c_abi_func_create_default(FunctionPrototype *prototype);
+ABIArgInfo *c_abi_classify_return_type_default(ParamInfo param);
+ABIArgInfo *c_abi_classify_argument_type_default(ParamInfo param);
+void c_abi_func_create_win64(FunctionPrototype *prototype, ParamInfo *params, unsigned param_count, ParamInfo *vaargs, unsigned vaarg_count);
+void c_abi_func_create_x86(FunctionPrototype *prototype, ParamInfo *params, unsigned param_count, ParamInfo *vaargs, unsigned vaarg_count);
+void c_abi_func_create_x64(FunctionPrototype *prototype, ParamInfo *params, unsigned param_count, ParamInfo *vaargs, unsigned vaarg_count);
+void c_abi_func_create_aarch64(FunctionPrototype *prototype, ParamInfo *params, unsigned param_count, ParamInfo *vaargs, unsigned vaarg_count);
+void c_abi_func_create_riscv(FunctionPrototype *prototype, ParamInfo *params, unsigned param_count, ParamInfo *vaargs, unsigned vaarg_count);
+void c_abi_func_create_wasm(FunctionPrototype *prototype, ParamInfo *params, unsigned param_count, ParamInfo *vaargs, unsigned vaarg_count);
+void c_abi_func_create_default(FunctionPrototype *prototype, ParamInfo *params, unsigned param_count, ParamInfo *vaargs, unsigned vaarg_count);
+
 
 static inline AbiType abi_type_get(Type *type)
 {
diff --git a/src/compiler/c_codegen.c b/src/compiler/c_codegen.c
index 1be39325b..acbf1029d 100644
--- a/src/compiler/c_codegen.c
+++ b/src/compiler/c_codegen.c
@@ -174,23 +174,25 @@ static bool c_emit_type_decl(GenContext *c, Type *type)
 			if (prev) return false;
 			Type *base = type;
 			type = type->pointer;
+			TODO
+			/*
 			FunctionPrototype *proto = type->function.prototype;
-			c_emit_type_decl(c, proto->rtype);
-			FOREACH (Type *, t, proto->param_types)
+			c_emit_type_decl(c, proto->param_infos->type);
+			FOREACH (ParamInfo, t, proto->param_infos)
 			{
-				c_emit_type_decl(c, t);
+				c_emit_type_decl(c, t.type);
 			}
 			int id = ++c->typename;
-			PRINTF("typedef %s(*__c3_fn%d)(", c_type_name(c, proto->rtype), id);
-			FOREACH_IDX(i, Type *, t, proto->param_types)
+			PRINTF("typedef %s(*__c3_fn%d)(", c_type_name(c, proto->return_info.type), id);
+			FOREACH_IDX(i, ParamInfo, t, proto->param_infos)
 			{
 				if (i != 0) PRINT(",");
-				PRINT(c_type_name(c, t));
+				PRINT(c_type_name(c, t.type));
 			}
 			PRINT(");\n");
 			scratch_buffer_clear();
 			scratch_buffer_printf("__c3_fn%d", id);
-			htable_set(&c->gen_decl, base, scratch_buffer_copy());
+			htable_set(&c->gen_decl, base, scratch_buffer_copy());*/
 			return true;
 		}
 		case TYPE_STRUCT:
diff --git a/src/compiler/codegen_general.c b/src/compiler/codegen_general.c
index 6a40e6dbb..99ff0485e 100644
--- a/src/compiler/codegen_general.c
+++ b/src/compiler/codegen_general.c
@@ -8,7 +8,7 @@ const char * const test_names_var_name = "__$C3_TEST_NAMES_LIST";
 /**
  * Based on isSingleElementStruct in Clang
  */
-Type *type_abi_find_single_struct_element(Type *type)
+Type *type_abi_find_single_struct_element(Type *type, bool in_abi)
 {
 	if (!type_is_union_or_strukt(type)) return NULL;
 
@@ -32,7 +32,7 @@ Type *type_abi_find_single_struct_element(Type *type)
 
 		if (type_is_union_or_strukt(field_type))
 		{
-			field_type = type_abi_find_single_struct_element(field_type);
+			field_type = type_abi_find_single_struct_element(field_type, in_abi);
 			if (!field_type) return NULL;
 		}
 		found = field_type;
@@ -166,10 +166,9 @@ bool type_homogenous_aggregate_small_enough(Type *type, unsigned members)
  * @param elements the elements found
  * @return true if it is an aggregate, false otherwise.
  */
-bool type_is_homogenous_aggregate(Type *type, Type **base, unsigned *elements)
+bool type_is_homogenous_aggregate(LoweredType *type, Type **base, unsigned *elements)
 {
 	ASSERT(base && type && elements);
-	ASSERT(type_lowering(type) == type);
 	*elements = 0;
 	switch (type->type_kind)
 	{
@@ -192,7 +191,7 @@ bool type_is_homogenous_aggregate(Type *type, Type **base, unsigned *elements)
 				{
 					unsigned member_mult = 1;
 					// Flatten the type.
-					Type *member_type = type_lowering(member->type);
+					LoweredType *member_type = lowered_member_type(member);
 					// Go down deep into  a nester array.
 					while (member_type->type_kind == TYPE_ARRAY)
 					{
@@ -203,7 +202,7 @@ bool type_is_homogenous_aggregate(Type *type, Type **base, unsigned *elements)
 					unsigned member_members = 0;
 
 					// Check recursively if the field member is homogenous
-					if (!type_is_homogenous_aggregate(type_lowering(member_type), base, &member_members)) return false;
+					if (!type_is_homogenous_aggregate(member_type, base, &member_members)) return false;
 					member_members *= member_mult;
 					// In the case of a union, grab the bigger set of elements.
 					if (type->type_kind == TYPE_UNION)
@@ -229,7 +228,7 @@ bool type_is_homogenous_aggregate(Type *type, Type **base, unsigned *elements)
 			// Empty arrays? Not homogenous.
 			if (type->array.len == 0) return false;
 			// Check the underlying type and multiply by length.
-			if (!type_is_homogenous_aggregate(type_lowering(type->array.base), base, elements)) return false;
+			if (!type_is_homogenous_aggregate(lowered_array_element_type(type), base, elements)) return false;
 			*elements *= type->array.len;
 			goto TYPECHECK;
 		case TYPE_BOOL:
diff --git a/src/compiler/codegen_internal.h b/src/compiler/codegen_internal.h
index 350dc3179..48f7e0836 100644
--- a/src/compiler/codegen_internal.h
+++ b/src/compiler/codegen_internal.h
@@ -17,6 +17,7 @@ static inline bool abi_type_is_type(AbiType type);
 
 static inline bool abi_type_is_valid(AbiType type);
 
+
 static inline LoweredType *type_lowering(Type *type)
 {
 	while (1)
@@ -89,6 +90,80 @@ static inline LoweredType *type_lowering(Type *type)
 	}
 }
 
+static inline LoweredType *type_lowering_abi(Type *type)
+{
+	while (1)
+	{
+		type = type->canonical;
+		switch (type->type_kind)
+		{
+			case TYPE_ALIAS:
+				UNREACHABLE
+			case TYPE_OPTIONAL:
+				type = type->optional;
+				continue;
+			case TYPE_TYPEDEF:
+				if (type->decl->attr_simd)
+				{
+					type = type->decl->distinct->type;
+					return type_get_vector(type_lowering(type->array.base), type->array.len);
+				}
+				type = type->decl->distinct->type;
+				continue;
+			case TYPE_CONST_ENUM:
+			case TYPE_ENUM:
+				type = enum_inner_type(type);
+				continue;
+			case TYPE_FUNC_PTR:
+			{
+				Type *raw_func = type->pointer;
+				if (raw_func->function.prototype && raw_func->function.prototype->raw_type == raw_func) return type;
+				FunctionPrototype *proto = type_get_resolved_prototype(raw_func);
+				return type_get_func_ptr(proto->raw_type);
+			}
+			case TYPE_INTERFACE:
+				return type_any;
+			case TYPE_ANYFAULT:
+			case TYPE_TYPEID:
+				return type_iptr->canonical;
+			case TYPE_BITSTRUCT:
+				type = type->decl->strukt.container_type->type;
+				continue;
+			case TYPE_WILDCARD:
+				type = type_void;
+				break;
+			case TYPE_POINTER:
+			{
+				Type *pointer = type->pointer;
+				Type *flat = type_lowering_abi(pointer);
+				if (flat == pointer) return type;
+				return type_get_ptr(flat);
+			}
+			case TYPE_SLICE:
+			case TYPE_ARRAY:
+			case TYPE_VECTOR:
+			case TYPE_FLEXIBLE_ARRAY:
+			{
+				Type *flat = type_lowering_abi(type->array.base);
+				switch (type->type_kind)
+				{
+					case TYPE_SLICE:
+						return type_get_slice(flat);
+					case TYPE_ARRAY:
+					case TYPE_VECTOR:
+						return type_get_array(flat, type->array.len);
+					case TYPE_FLEXIBLE_ARRAY:
+						return type_get_flexible_array(flat);
+					default:
+						UNREACHABLE
+				}
+			}
+			default:
+				return type;
+		}
+	}
+}
+
 static inline bool abi_type_match(AbiType type, Type *other_type)
 {
 	other_type = other_type->canonical;
@@ -153,3 +228,13 @@ extern const char * const benchmark_fns_var_name;
 extern const char * const benchmark_names_var_name;
 extern const char * const test_fns_var_name;
 extern const char * const test_names_var_name;
+
+INLINE Type *lowered_member_type(Decl *member)
+{
+	return type_lowering_abi(member->type);
+}
+
+INLINE Type *lowered_array_element_type(Type *array_type)
+{
+	return type_lowering_abi(array_type->array.base);
+}
diff --git a/src/compiler/compiler_internal.h b/src/compiler/compiler_internal.h
index 5e5c5e023..5eebb1d04 100644
--- a/src/compiler/compiler_internal.h
+++ b/src/compiler/compiler_internal.h
@@ -43,6 +43,7 @@ typedef uint16_t FileId;
 #define INITIAL_GENERIC_SYMBOL_MAP 0x1000
 #define MAX_INCLUDE_DIRECTIVES 2048
 #define MAX_PARAMS 255
+#define MAX_VAARGS 512
 #define MAX_BITSTRUCT 0x1000
 #define MAX_MEMBERS ((StructIndex)1) << 15
 #define MAX_ALIGNMENT ((ArrayIndex)(((uint64_t)2) << 28))
@@ -304,6 +305,7 @@ typedef struct
 	bool is_pure : 1;
 	bool noreturn : 1;
 	bool always_const : 1;
+	bool is_simd : 1;
 	uint8_t format : 8;
 } CalleeAttributes;
 
@@ -330,6 +332,7 @@ struct Type_
 			uint16_t tb_type;
 		};
 	};
+	ByteSize size;
 	void *backend_typeid;
 	void *backend_debug_type;
 	union
@@ -686,6 +689,7 @@ typedef struct Decl_
 	bool resolved_attributes : 1;
 	bool allow_deprecated : 1;
 	bool attr_structlike : 1;
+	bool attr_simd : 1;
 	union
 	{
 		void *backend_ref;
@@ -716,7 +720,11 @@ typedef struct Decl_
 			{
 				// Enums and Fault
 				EnumDecl enums;
-				TypeInfo *distinct;
+				struct
+				{
+					TypeInfo *distinct;
+					Expr *distinct_align;
+				};
 				// Unions, Struct, Bitstruct use strukt
 				StructDecl strukt;
 				Decl **interface_methods;
@@ -803,8 +811,8 @@ typedef struct
 {
 	Expr* expr;
 	UnaryOp operator : 8;
-	bool no_wrap;
-	bool no_read;
+	bool no_wrap : 1;
+	bool no_read : 1;
 } ExprUnary;
 
 
@@ -1887,26 +1895,30 @@ typedef struct ABIArgInfo_
 			Type *type;
 		} indirect;
 	};
-
+	Type *original_type;
+	ParamRewrite rewrite;
 } ABIArgInfo;
 
+typedef struct ParamInfo
+{
+	Type *type;
+	ParamRewrite rewrite;
+} ParamInfo;
+
 typedef struct FunctionPrototype_
 {
 	CallABI call_abi : 4;
 	bool raw_variadic : 1;
 	bool use_win64 : 1;
-	bool is_optional : 1;
-	bool ret_by_ref : 1;
 	bool is_resolved : 1;
 	unsigned short vararg_index;
-	Type *rtype;
-	Type **param_types;
-	Decl **param_copy;
-	Type **varargs;
-	Type *ret_by_ref_type;
-	Type *abi_ret_type;
+	RetValType ret_rewrite : 8;
+	ParamRewrite return_rewrite : 3;
+	ParamInfo return_info;
+	Type *return_result;
+	unsigned param_count;
+	unsigned short param_vacount;
 	ABIArgInfo *ret_abi_info;
-	ABIArgInfo *ret_by_ref_abi_info;
 	ABIArgInfo **abi_args;
 	ABIArgInfo **abi_varargs;
 	Type *raw_type;
@@ -2032,14 +2044,16 @@ extern const char *kw_typekind;
 extern const char *kw_FILE_NOT_FOUND;
 extern const char *kw_IoError;
 
+extern const char *kw_at_align;
 extern const char *kw_at_deprecated;
 extern const char *kw_at_ensure;
 extern const char *kw_at_enum_lookup;
+extern const char *kw_at_jump;
 extern const char *kw_at_param;
 extern const char *kw_at_pure;
 extern const char *kw_at_require;
 extern const char *kw_at_return;
-extern const char *kw_at_jump;
+extern const char *kw_at_simd;
 extern const char *kw_in;
 extern const char *kw_inout;
 extern const char *kw_len;
@@ -2575,7 +2589,7 @@ MacSDK *macos_sysroot_sdk_information(const char *sdk_path);
 WindowsSDK *windows_get_sdk(void);
 const char *windows_cross_compile_library(void);
 
-void c_abi_func_create(FunctionPrototype *proto);
+void c_abi_func_create(Signature *sig, FunctionPrototype *proto, Expr **vaargs);
 
 bool token_is_any_type(TokenType type);
 const char *token_type_to_string(TokenType type);
@@ -2587,15 +2601,18 @@ bool type_is_ordered(Type *type);
 unsigned type_get_introspection_kind(TypeKind kind);
 void type_mangle_introspect_name_to_buffer(Type *type);
 AlignSize type_abi_alignment(Type *type);
+AlignSize type_simd_alignment(CanonicalType *type);
 bool type_func_match(Type *fn_type, Type *rtype, unsigned arg_count, ...);
 Type *type_find_largest_union_element(Type *type);
 Type *type_find_max_type(Type *type, Type *other, Expr *first, Expr *second);
 Type *type_find_max_type_may_fail(Type *type, Type *other);
-Type *type_abi_find_single_struct_element(Type *type);
+Type *type_abi_find_single_struct_element(Type *type, bool in_abi);
 Module *type_base_module(Type *type);
 bool type_is_valid_for_vector(Type *type);
 bool type_is_valid_for_array(Type *type);
 Type *type_get_array(Type *arr_type, ArraySize len);
+Type *type_array_from_vector(Type *vec_type);
+Type *type_vector_from_array(Type *vec_type);
 Type *type_get_indexed_type(Type *type);
 Type *type_get_ptr(Type *ptr_type);
 Type *type_get_func_ptr(Type *func_type);
@@ -2607,7 +2624,6 @@ Type *type_get_flexible_array(Type *arr_type);
 AlignSize type_alloca_alignment(Type *type);
 Type *type_get_optional(Type *optional_type);
 Type *type_get_vector(Type *vector_type, unsigned len);
-Type *type_get_simd(Type *vector_type, unsigned len);
 Type *type_get_vector_bool(Type *original_type);
 Type *type_int_signed_by_bitsize(BitSize bitsize);
 Type *type_int_unsigned_by_bitsize(BitSize bit_size);
@@ -2619,6 +2635,8 @@ void type_func_prototype_init(uint32_t capacity);
 Type *type_find_parent_type(Type *type);
 bool type_is_subtype(Type *type, Type *possible_subtype);
 bool type_is_abi_aggregate(Type *type);
+bool type_is_simd(Type *type);
+bool type_is_aggregate(Type *type);
 bool type_is_int128(Type *type);
 
 Type *type_from_token(TokenType type);
@@ -3063,6 +3081,7 @@ INLINE Type *type_new(TypeKind kind, const char *name)
 {
 	Type *type = CALLOCS(Type);
 	type->type_kind = kind;
+	type->size = ~(ByteSize)0;
 	ASSERT(name);
 	type->name = name;
 	global_context_add_type(type);
diff --git a/src/compiler/enums.h b/src/compiler/enums.h
index b9b503957..9bb36c880 100644
--- a/src/compiler/enums.h
+++ b/src/compiler/enums.h
@@ -309,6 +309,7 @@ typedef enum
 	ATTRIBUTE_REFLECT,
 	ATTRIBUTE_SAFEINFER,
 	ATTRIBUTE_SAFEMACRO,
+	ATTRIBUTE_SIMD,
 	ATTRIBUTE_SECTION,
 	ATTRIBUTE_STRUCTLIKE,
 	ATTRIBUTE_TAG,
@@ -1651,6 +1652,18 @@ typedef enum
 	X86_FEAT_XSAVES,
 	X86_FEATURE_LAST = X86_FEAT_XSAVES,
 } X86Feature;
+typedef enum
+{
+	RET_NORMAL,
+	RET_OPTIONAL_VOID,
+	RET_OPTIONAL_VALUE,
+} RetValType;
+typedef enum
+{
+	PARAM_RW_NONE,
+	PARAM_RW_VEC_TO_ARRAY,
+	PARAM_RW_EXPAND_ELEMENTS,
+} ParamRewrite;
 
 // -- Arch helper macros
 #define ARCH_UNSUPPORTED ARCH_TYPE_AARCH64_32: case ARCH_TYPE_BPFEL: case ARCH_TYPE_BPFEB: case ARCH_TYPE_SPARCEL: \
diff --git a/src/compiler/expr.c b/src/compiler/expr.c
index 86498cbb7..7ec04168f 100644
--- a/src/compiler/expr.c
+++ b/src/compiler/expr.c
@@ -590,8 +590,7 @@ void expr_insert_addr(Expr *original)
 	Expr *inner = expr_copy(original);
 	original->expr_kind = EXPR_UNARY;
 	original->type = new_type;
-	original->unary_expr.operator = UNARYOP_ADDR;
-	original->unary_expr.expr = inner;
+	original->unary_expr = (ExprUnary) { .operator = UNARYOP_ADDR, .expr = inner };
 }
 
 Expr *expr_generated_local(Expr *assign, Decl **decl_ref)
diff --git a/src/compiler/llvm_codegen.c b/src/compiler/llvm_codegen.c
index 7d8c5b238..d479b6c8a 100644
--- a/src/compiler/llvm_codegen.c
+++ b/src/compiler/llvm_codegen.c
@@ -282,6 +282,7 @@ LLVMValueRef llvm_emit_const_initializer(GenContext *c, ConstInitializer *const_
 			unsigned alignment = 0;
 			LLVMValueRef *parts = NULL;
 			bool pack = false;
+			bool is_vec = type_flat_is_vector(array_type);
 			FOREACH(ConstInitializer *, element, elements)
 			{
 				ASSERT(element->kind == CONST_INIT_ARRAY_VALUE);
@@ -295,7 +296,17 @@ LLVMValueRef llvm_emit_const_initializer(GenContext *c, ConstInitializer *const_
 				// Add zeroes
 				if (diff > 0)
 				{
-					vec_add(parts, llvm_emit_const_array_padding(element_type_llvm, diff, &was_modified));
+					if (is_vec)
+					{
+						for (int i = 0; i < diff; i++)
+						{
+							vec_add(parts, llvm_get_zero_raw(element_type_llvm));
+						}
+					}
+					else
+					{
+						vec_add(parts, llvm_emit_const_array_padding(element_type_llvm, diff, &was_modified));
+					}
 				}
 				LLVMValueRef value = llvm_emit_const_initializer(c, element->init_array_value.element);
 				if (LLVMTypeOf(value) != element_type_llvm) was_modified = true;
@@ -1205,7 +1216,6 @@ void llvm_append_function_attributes(GenContext *c, Decl *decl)
 	LLVMValueRef function = decl->backend_ref;
 	ABIArgInfo *ret_abi_info = prototype->ret_abi_info;
 	llvm_emit_param_attributes(c, function, ret_abi_info, true, 0, 0, NULL);
-	unsigned params = vec_size(prototype->param_types);
 	if (c->debug.enable_stacktrace)
 	{
 		llvm_attribute_add_string(c, function, "frame-pointer", "all", -1);
@@ -1213,17 +1223,14 @@ void llvm_append_function_attributes(GenContext *c, Decl *decl)
 	}
 	llvm_attribute_add_string(c, function, "stack-protector-buffer-size", "8", -1);
 	llvm_attribute_add_string(c, function, "no-trapping-math", "true", -1);
+	int offset = prototype->ret_rewrite == RET_OPTIONAL_VALUE ? 1 : 0;
 
-	if (prototype->ret_by_ref)
-	{
-		ABIArgInfo *info = prototype->ret_by_ref_abi_info;
-		llvm_emit_param_attributes(c, function, prototype->ret_by_ref_abi_info, false, info->param_index_start + 1,
-		                           info->param_index_end, NULL);
-	}
-	for (unsigned i = 0; i < params; i++)
+	Signature *sig = prototype->raw_type->function.signature;
+	for (unsigned i = offset; i < prototype->param_count; i++)
 	{
 		ABIArgInfo *info = prototype->abi_args[i];
-		llvm_emit_param_attributes(c, function, info, false, info->param_index_start + 1, info->param_index_end, decl->func_decl.signature.params[i]);
+		Decl *param_decl = sig->params[i - offset];
+		llvm_emit_param_attributes(c, function, info, false, info->param_index_start + 1, info->param_index_end, param_decl);
 	}
 	// We ignore decl->func_decl.attr_inline and place it in every call instead.
 	if (decl->func_decl.attr_noinline)
diff --git a/src/compiler/llvm_codegen_debug_info.c b/src/compiler/llvm_codegen_debug_info.c
index c452475ca..a3dd22e7f 100644
--- a/src/compiler/llvm_codegen_debug_info.c
+++ b/src/compiler/llvm_codegen_debug_info.c
@@ -577,10 +577,11 @@ static LLVMMetadataRef llvm_debug_vector_type(GenContext *c, Type *type)
 static LLVMMetadataRef llvm_debug_func_type(GenContext *c, Type *type)
 {
 	FunctionPrototype *prototype = type_get_resolved_prototype(type);
+	Signature *sig = prototype->raw_type->function.signature;
 	// 1. Generate all the parameter types, this may cause this function to be called again!
-	FOREACH(Type *, param_type, prototype->param_types)
+	FOREACH(Decl *, param, sig->params)
 	{
-		llvm_get_debug_type(c, param_type);
+		llvm_get_debug_type(c, param->type);
 	}
 	// 2. We might be done!
 	if (type->backend_debug_type) return type->backend_debug_type;
@@ -588,19 +589,10 @@ static LLVMMetadataRef llvm_debug_func_type(GenContext *c, Type *type)
 	// 3. Otherwise generate:
 	static LLVMMetadataRef *buffer = NULL;
 	vec_resize(buffer, 0);
-	Type *return_type = prototype->rtype;
-	if (!type_is_optional(return_type))
+	vec_add(buffer, llvm_get_debug_type(c, typeget(sig->rtype)));
+	FOREACH(Decl *, param, sig->params)
 	{
-		vec_add(buffer, llvm_get_debug_type(c, return_type));
-	}
-	else
-	{
-		vec_add(buffer, llvm_get_debug_type(c, type_fault));
-		vec_add(buffer, llvm_get_debug_type(c, type_get_ptr(type_no_optional(return_type))));
-	}
-	FOREACH(Type *, param_type, prototype->param_types)
-	{
-		vec_add(buffer, llvm_get_debug_type(c, param_type));
+		vec_add(buffer, llvm_get_debug_type(c, param->type));
 	}
 	if (prototype->raw_variadic)
 	{
diff --git a/src/compiler/llvm_codegen_expr.c b/src/compiler/llvm_codegen_expr.c
index aabd24490..c7a926783 100644
--- a/src/compiler/llvm_codegen_expr.c
+++ b/src/compiler/llvm_codegen_expr.c
@@ -17,7 +17,6 @@ static inline void llvm_emit_bitassign_array(GenContext *c, LLVMValueRef result,
 static inline void llvm_emit_builtin_access(GenContext *c, BEValue *be_value, Expr *expr);
 static inline void llvm_emit_const_initialize_reference(GenContext *c, BEValue *ref, Expr *expr);
 static void llvm_emit_swizzle_from_value(GenContext *c, LLVMValueRef vector_value, BEValue *value, Expr *expr);
-
 static inline void llvm_emit_optional(GenContext *c, BEValue *be_value, Expr *expr);
 static inline void llvm_emit_inc_dec_change(GenContext *c, BEValue *addr, BEValue *after, BEValue *before, Expr *expr, int diff,
                          bool allow_wrap);
@@ -4183,7 +4182,7 @@ void llvm_emit_binary(GenContext *c, BEValue *be_value, Expr *expr, BEValue *lhs
 			if (type_is_pointer_vector(lhs_type))
 			{
 				Type *element_type = lhs_type->array.base->pointer;
-				unsigned len = lhs_type->array.len;
+				unsigned len = LLVMGetVectorSize(LLVMTypeOf(lhs_value));
 				LLVMTypeRef int_vec_type = llvm_get_type(c, type_get_vector(type_isz, len));
 				if (lhs_type == rhs_type)
 				{
@@ -5067,19 +5066,29 @@ LLVMValueRef llvm_emit_ptradd_inbounds_raw(GenContext *c, LLVMValueRef ptr, LLVM
 	return LLVMBuildInBoundsGEP2(c->builder, LLVMArrayType(c->byte_type, mult), ptr, &offset, 1, "ptroffset");
 }
 
-LLVMValueRef llvm_emit_const_vector(LLVMValueRef value, ArraySize len)
+static LLVMValueRef vec_slots[MAX_VECTOR_WIDTH];
+
+LLVMValueRef llvm_emit_const_vector_pot(LLVMValueRef value, ArraySize len)
 {
-	LLVMValueRef slots[256];
-	LLVMValueRef *ptr = slots;
-	if (len > 256)
-	{
-		ptr = MALLOC(len * sizeof(LLVMValueRef));
-	}
+	ArraySize npot = next_highest_power_of_2(len);
 	for (ArraySize i = 0; i < len; i++)
 	{
-		ptr[i] = value;
+		vec_slots[i] = value;
 	}
-	return LLVMConstVector(ptr, len);
+	for (ArraySize i = len; i < npot; i++)
+	{
+		vec_slots[i] = LLVMGetUndef(LLVMTypeOf(value));
+	}
+	return LLVMConstVector(vec_slots, npot);
+}
+
+LLVMValueRef llvm_emit_const_vector(LLVMValueRef value, ArraySize len)
+{
+	for (ArraySize i = 0; i < len; i++)
+	{
+		vec_slots[i] = value;
+	}
+	return LLVMConstVector(vec_slots, len);
 }
 
 
@@ -5184,9 +5193,19 @@ void llvm_value_struct_gep(GenContext *c, BEValue *element, BEValue *struct_poin
 }
 
 
-void llvm_emit_parameter(GenContext *c, LLVMValueRef *args, unsigned *arg_count_ref, ABIArgInfo *info, BEValue *be_value, Type *type)
+void llvm_emit_parameter(GenContext *c, LLVMValueRef *args, unsigned *arg_count_ref, ABIArgInfo *info, BEValue *be_value)
 {
-	type = type_lowering(type);
+	Type *type = type_lowering(info->original_type);
+	switch (info->rewrite)
+	{
+		case PARAM_RW_NONE:
+			break;
+		case PARAM_RW_VEC_TO_ARRAY:
+			llvm_emit_vec_to_array(c, be_value, type);
+			break;
+		case PARAM_RW_EXPAND_ELEMENTS:
+			TODO
+	}
 	ASSERT(be_value->type->canonical == type);
 	switch (info->kind)
 	{
@@ -5374,7 +5393,7 @@ void llvm_add_abi_call_attributes(GenContext *c, LLVMValueRef call_value, int co
 void llvm_emit_raw_call(GenContext *c, BEValue *result_value, FunctionPrototype *prototype, LLVMTypeRef func_type, LLVMValueRef func, LLVMValueRef *args, unsigned arg_count, int inline_flag, LLVMValueRef error_var, bool sret_return, BEValue *synthetic_return_param, bool no_return)
 {
 	ABIArgInfo *ret_info = prototype->ret_abi_info;
-	Type *call_return_type = prototype->abi_ret_type;
+	Type *call_return_type = prototype->return_info.type;
 
 	LLVMValueRef call_value = LLVMBuildCall2(c->builder, func_type, func, args, arg_count, "");
 	if (prototype->call_abi)
@@ -5396,14 +5415,12 @@ void llvm_emit_raw_call(GenContext *c, BEValue *result_value, FunctionPrototype
 		default:
 			break;
 	}
-	ASSERT(!prototype->ret_by_ref || prototype->ret_by_ref_abi_info->kind != ABI_ARG_INDIRECT);
-
-	llvm_add_abi_call_attributes(c, call_value, vec_size(prototype->param_types), prototype->abi_args);
+	llvm_add_abi_call_attributes(c, call_value, prototype->param_count, prototype->abi_args);
 	if (prototype->abi_varargs)
 	{
 		llvm_add_abi_call_attributes(c,
 									 call_value,
-									 vec_size(prototype->varargs),
+									 prototype->param_vacount,
 									 prototype->abi_varargs);
 	}
 
@@ -5416,7 +5433,7 @@ void llvm_emit_raw_call(GenContext *c, BEValue *result_value, FunctionPrototype
 		case ABI_ARG_IGNORE:
 			// 12. Basically void returns or empty structs.
 			//     Here we know we don't have an optional or any return value that can be used.
-			ASSERT(!prototype->is_optional && "Optional should have produced a return value.");
+			ASSERT(prototype->ret_rewrite == RET_NORMAL && "Optional should have produced a return value.");
 			*result_value = (BEValue) { .type = type_void, .kind = BE_VALUE };
 			return;
 		case ABI_ARG_INDIRECT:
@@ -5517,7 +5534,7 @@ void llvm_emit_raw_call(GenContext *c, BEValue *result_value, FunctionPrototype
 		*result_value = (BEValue) { .type = type_void, .kind = BE_VALUE };
 		return;
 	}
-	if (prototype->is_optional)
+	if (prototype->ret_rewrite != RET_NORMAL)
 	{
 		// 17a. If we used the error var as the indirect recipient, then that will hold the error.
 		//      otherwise it's whatever value in be_value.
@@ -5542,7 +5559,7 @@ void llvm_emit_raw_call(GenContext *c, BEValue *result_value, FunctionPrototype
 
 
 		// 17g. If void, be_value contents should be skipped.
-		if (!prototype->ret_by_ref)
+		if (prototype->ret_rewrite != RET_OPTIONAL_VALUE)
 		{
 			*result_value = (BEValue) { .type = type_void, .kind = BE_VALUE };
 			return;
@@ -5550,11 +5567,18 @@ void llvm_emit_raw_call(GenContext *c, BEValue *result_value, FunctionPrototype
 
 		// 17h. Assign the return param to be_value.
 		*result_value = *synthetic_return_param;
-		return;
 	}
 
-	// 17i. The simple case here is where there is a normal return.
-	//      In this case be_value already holds the result
+	switch (prototype->return_rewrite)
+	{
+		case RET_NORMAL:
+			break;
+		case PARAM_RW_VEC_TO_ARRAY:
+			if (result_value->value) llvm_emit_array_to_vector(c, result_value, type_vector_from_array(result_value->type));
+			break;
+		case PARAM_RW_EXPAND_ELEMENTS:
+			UNREACHABLE_VOID;
+	}
 }
 
 static LLVMValueRef llvm_emit_dynamic_search(GenContext *c, LLVMValueRef type_id_ptr, LLVMValueRef selector)
@@ -5707,46 +5731,37 @@ static LLVMValueRef llvm_emit_dynamic_search(GenContext *c, LLVMValueRef type_id
  * We assume all optionals are already folded for the arguments.
  */
 INLINE void llvm_emit_call_invocation(GenContext *c, BEValue *result_value,
-									  BEValue *target,
-									  SourceSpan span,
-									  FunctionPrototype *prototype,
-									  Expr **args,
+                                      BEValue *target,
+                                      SourceSpan span,
+                                      FunctionPrototype *prototype,
 									  BEValue *values,
-									  int inline_flag,
-									  bool no_return,
-									  LLVMValueRef func,
-									  LLVMTypeRef func_type,
-									  Expr **varargs)
+                                      int inline_flag,
+                                      bool no_return,
+                                      LLVMValueRef func,
+                                      LLVMTypeRef func_type,
+                                      Expr **vaargs)
 {
 	LLVMValueRef arg_values[512];
 	unsigned arg_count = 0;
-	Type **params = prototype->param_types;
 	ABIArgInfo **abi_args = prototype->abi_args;
-	unsigned param_count = vec_size(params);
+	unsigned param_count = prototype->param_count;
 	FunctionPrototype copy;
 	if (prototype->raw_variadic)
 	{
-		if (varargs)
+		if (vaargs)
 		{
 			copy = *prototype;
-			copy.varargs = NULL;
-
-			FOREACH(Expr *, val, varargs)
-			{
-				vec_add(copy.varargs, type_flatten(val->type));
-			}
 			copy.is_resolved = false;
 			copy.ret_abi_info = NULL;
-			copy.ret_by_ref_abi_info = NULL;
 			copy.abi_args = NULL;
-			c_abi_func_create(&copy);
+			c_abi_func_create(prototype->raw_type->function.signature, &copy, vaargs);
 			prototype = &copy;
 			LLVMTypeRef *params_type = NULL;
 			llvm_update_prototype_abi(c, prototype, &params_type);
 		}
 	}
 	ABIArgInfo *ret_info = prototype->ret_abi_info;
-	Type *call_return_type = prototype->abi_ret_type;
+	Type *call_return_type = prototype->return_info.type;
 
 	// 5. In the case of an optional, the error is replacing the regular return abi.
 	LLVMValueRef error_var = NULL;
@@ -5758,7 +5773,7 @@ INLINE void llvm_emit_call_invocation(GenContext *c, BEValue *result_value,
 	{
 		case ABI_ARG_INDIRECT:
 			// 6a. We can use the stored error var if there is no redirect.
-			if (prototype->is_optional && c->catch.fault && !ret_info->attributes.realign)
+			if (prototype->ret_rewrite != RET_NORMAL && c->catch.fault && !ret_info->attributes.realign)
 			{
 				error_var = c->catch.fault;
 				arg_values[arg_count++] = error_var;
@@ -5799,50 +5814,52 @@ INLINE void llvm_emit_call_invocation(GenContext *c, BEValue *result_value,
 	// 7. We might have an optional indirect return and a normal return.
 	//    In this case we need to add it by hand.
 	BEValue synthetic_return_param = { 0 };
-	if (prototype->ret_by_ref)
+	int start = 0;
+	if (prototype->ret_rewrite == RET_OPTIONAL_VALUE)
 	{
 		// 7b. Create the address to hold the return.
-		Type *actual_return_type = type_lowering(prototype->ret_by_ref_type);
-		llvm_value_set(&synthetic_return_param, llvm_emit_alloca_aligned(c, actual_return_type, "retparam"), type_get_ptr(actual_return_type));
+		Type *actual_return_type_ptr = abi_args[0]->original_type;
+		Type *actual_return_type = actual_return_type_ptr->pointer;
+		llvm_value_set(&synthetic_return_param, llvm_emit_alloca_aligned(c, actual_return_type, "retparam"), actual_return_type_ptr);
 		// 7c. Emit it as a parameter as a pointer (will implicitly add it to the value list)
-		llvm_emit_parameter(c, arg_values, &arg_count, prototype->ret_by_ref_abi_info, &synthetic_return_param, synthetic_return_param.type);
+		llvm_emit_parameter(c, arg_values, &arg_count, abi_args[0], &synthetic_return_param);
 		// 7d. Update the be_value to actually be an address.
 		llvm_value_set_address_abi_aligned(c, &synthetic_return_param, synthetic_return_param.value, actual_return_type);
+		start = 1;
 	}
 
 	// 8. Add all other arguments.
-	for (unsigned i = 0; i < param_count; i++)
+	for (unsigned i = start; i < param_count; i++)
 	{
 		// 8a. Evaluate the expression.
-		Type *param = params[i];
 		ABIArgInfo *info = abi_args[i];
 
 		// 8b. Emit the parameter according to ABI rules.
-		BEValue value_copy = values[i];
-		llvm_emit_parameter(c, arg_values, &arg_count, info, &value_copy, param);
+		BEValue value_copy = values[i - start];
+		llvm_emit_parameter(c, arg_values, &arg_count, info, &value_copy);
 	}
 
-	// 9. Typed varargs
+	// 9. Typed vaargs
 
 	if (prototype->raw_variadic)
 	{
-		unsigned vararg_count = vec_size(varargs);
+		unsigned vararg_count = vec_size(vaargs);
 		if (prototype->abi_varargs)
 		{
-			// 9. Emit varargs.
+			// 9. Emit vaargs.
 			unsigned index = 0;
 			ABIArgInfo **abi_varargs = prototype->abi_varargs;
 			for (unsigned i = 0; i < vararg_count; i++)
 			{
 				ABIArgInfo *info = abi_varargs[index];
 				BEValue value_copy = values[i + param_count];
-				llvm_emit_parameter(c, arg_values, &arg_count, info, &value_copy, prototype->varargs[index]);
+				llvm_emit_parameter(c, arg_values, &arg_count, info, &value_copy);
 				index++;
 			}
 		}
 		else
 		{
-			// 9. Emit varargs.
+			// 9. Emit vaargs.
 			for (unsigned i = 0; i < vararg_count; i++)
 			{
 				REMINDER("Varargs should be expanded correctly");
@@ -5995,6 +6012,7 @@ static void llvm_emit_call_expr(GenContext *c, BEValue *result_value, Expr *expr
 		varargs = expr->call_expr.varargs;
 	}
 
+	Signature *sig = prototype->raw_type->function.signature;
 	for (unsigned i = 0; i < arg_count; i++)
 	{
 		BEValue *value_ref = &values[i];
@@ -6005,7 +6023,8 @@ static void llvm_emit_call_expr(GenContext *c, BEValue *result_value, Expr *expr
 			llvm_value_fold_optional(c, value_ref);
 			continue;
 		}
-		Type *param = prototype->param_types[i];
+		Decl *decl = sig->params[i];
+		Type *param = decl->type;
 		if (vararg_splat)
 		{
 			llvm_emit_vasplat_expr(c, value_ref, vararg_splat, param);
@@ -6062,7 +6081,7 @@ static void llvm_emit_call_expr(GenContext *c, BEValue *result_value, Expr *expr
 			LLVMBasicBlockRef after = llvm_basic_block_new(c, "after_call");
 			FunctionPrototype *default_prototype = type_get_resolved_prototype(default_method->type);
 			BEValue default_res;
-			llvm_emit_call_invocation(c, &default_res, target, expr->span, default_prototype, args, values, inline_flag, no_return,
+			llvm_emit_call_invocation(c, &default_res, target, expr->span, default_prototype, values, inline_flag, no_return,
 			                          llvm_get_ref(c, default_method),
 			                          llvm_get_type(c, default_method->type),
 			                          varargs);
@@ -6074,7 +6093,7 @@ static void llvm_emit_call_expr(GenContext *c, BEValue *result_value, Expr *expr
 			func_type = llvm_get_type(c, dyn_fn->type);
 			BEValue normal_res;
 			values[0] = result;
-			llvm_emit_call_invocation(c, &normal_res, target, expr->span, prototype, args, values, inline_flag, no_return, func, func_type,
+			llvm_emit_call_invocation(c, &normal_res, target, expr->span, prototype, values, inline_flag, no_return, func, func_type,
 			                          varargs);
 			LLVMValueRef normal_val = llvm_load_value(c, &normal_res);
 			LLVMBasicBlockRef normal_block = c->current_block;
@@ -6099,7 +6118,7 @@ static void llvm_emit_call_expr(GenContext *c, BEValue *result_value, Expr *expr
 
 	}
 
-	llvm_emit_call_invocation(c, result_value, target, expr->span, prototype, args, values, inline_flag, no_return, func, func_type,
+	llvm_emit_call_invocation(c, result_value, target, expr->span, prototype, values, inline_flag, no_return, func, func_type,
 							  varargs);
 }
 
@@ -6978,13 +6997,9 @@ static void llvm_emit_int_to_bool(GenContext *c, BEValue *value, Expr *expr)
 				   expr->type);
 }
 
-static void llvm_emit_vector_from_array(GenContext *c, BEValue *value, Expr *expr)
+void llvm_emit_array_to_vector(GenContext *c, BEValue *value, Type *to)
 {
-	Expr *inner = expr->inner_expr;
-	llvm_emit_expr(c, value, inner);
-	llvm_value_fold_optional(c, value);
-
-	Type *to_type = type_lowering(expr->type);
+	Type *to_type = type_lowering(to);
 	if (llvm_value_is_addr(value))
 	{
 		// Unaligned load
@@ -7000,6 +7015,15 @@ static void llvm_emit_vector_from_array(GenContext *c, BEValue *value, Expr *exp
 	}
 	llvm_value_set(value, vector, to_type);
 }
+
+static void llvm_emit_vector_from_array(GenContext *c, BEValue *value, Expr *expr)
+{
+	Expr *inner = expr->inner_expr;
+	llvm_emit_expr(c, value, inner);
+	llvm_value_fold_optional(c, value);
+	llvm_emit_array_to_vector(c, value, expr->type);
+}
+
 static void llvm_emit_ptr_access(GenContext *c, BEValue *value, Expr *expr)
 {
 	llvm_emit_expr(c, value, expr->inner_expr);
@@ -7095,12 +7119,12 @@ void llvm_emit_scalar_to_vector(GenContext *c, BEValue *value, Expr *expr)
 	llvm_value_set(value, res, expr->type);
 }
 
-static inline void llvm_emit_vector_to_array(GenContext *c, BEValue *value, Expr *expr)
+void llvm_emit_vec_to_array(GenContext *c, BEValue *value, Type *type)
 {
-	llvm_emit_expr(c, value, expr->inner_expr);
 	llvm_value_rvalue(c, value);
-	Type *to_type = type_lowering(expr->type);
+	Type *to_type = type_lowering(type);
 	LLVMValueRef array = llvm_get_undef(c, to_type);
+
 	for (unsigned i = 0; i < to_type->array.len; i++)
 	{
 		LLVMValueRef element = llvm_emit_extract_value(c, value->value, i);
@@ -7108,6 +7132,11 @@ static inline void llvm_emit_vector_to_array(GenContext *c, BEValue *value, Expr
 	}
 	llvm_value_set(value, array, to_type);
 }
+static inline void llvm_emit_vector_to_array(GenContext *c, BEValue *value, Expr *expr)
+{
+	llvm_emit_expr(c, value, expr->inner_expr);
+	llvm_emit_vec_to_array(c, value, expr->type);
+}
 
 void llvm_emit_slice_to_vec_array(GenContext *c, BEValue *value, Expr *expr)
 {
diff --git a/src/compiler/llvm_codegen_function.c b/src/compiler/llvm_codegen_function.c
index b3b96cc17..0a1e852a8 100644
--- a/src/compiler/llvm_codegen_function.c
+++ b/src/compiler/llvm_codegen_function.c
@@ -9,7 +9,7 @@ static void llvm_append_xxlizer(GenContext *c, unsigned  priority, bool is_initi
 static inline void llvm_emit_return_value(GenContext *context, LLVMValueRef value);
 static void llvm_expand_from_args(GenContext *c, Type *type, LLVMValueRef ref, unsigned *index, AlignSize alignment);
 static inline void llvm_process_parameter_value(GenContext *c, Decl *decl, ABIArgInfo *info, unsigned *index);
-static inline void llvm_emit_func_parameter(GenContext *context, Decl *decl, ABIArgInfo *abi_info, unsigned *index, unsigned real_index);
+static inline void llvm_emit_func_parameter(GenContext *context, Decl *decl, ABIArgInfo ***abi_info_ref, unsigned *index, unsigned real_index);
 static inline void llvm_emit_body(GenContext *c, LLVMValueRef function, FunctionPrototype *prototype, Signature *signature, Ast *body, Decl *decl, bool is_naked);
 
 
@@ -77,6 +77,7 @@ static void llvm_expand_from_args(GenContext *c, Type *type, LLVMValueRef ref, u
 	switch (type->type_kind)
 	{
 		case TYPE_ARRAY:
+		case TYPE_VECTOR:
 		{
 			LLVMTypeRef array_type = llvm_get_type(c, type);
 			for (unsigned i = 0; i < type->array.len; i++)
@@ -94,7 +95,7 @@ static void llvm_expand_from_args(GenContext *c, Type *type, LLVMValueRef ref, u
 			{
 				AlignSize element_align;
 				LLVMValueRef target = llvm_emit_struct_gep_raw(c, ref, struct_type, i, alignment, &element_align);
-				llvm_expand_from_args(c, member->type, target, index, element_align);
+				llvm_expand_from_args(c, type_lowering(member->type), target, index, element_align);
 			}
 			break;
 		}
@@ -117,7 +118,7 @@ LLVMValueRef llvm_get_next_param(GenContext *c, unsigned *index)
 }
 
 
-static inline void llvm_process_parameter_value(GenContext *c, Decl *decl, ABIArgInfo *info, unsigned *index)
+static inline void llvm_process_parameter_value_inner(GenContext *c, Decl *decl, ABIArgInfo *info, unsigned *index)
 {
 	switch (info->kind)
 	{
@@ -126,6 +127,7 @@ static inline void llvm_process_parameter_value(GenContext *c, Decl *decl, ABIAr
 		case ABI_ARG_INDIRECT:
 			// Indirect is caller copied.
 			decl->backend_ref = llvm_get_next_param(c, index);
+			decl->alignment = info->indirect.alignment;
 			return;
 		case ABI_ARG_EXPAND_COERCE:
 		{
@@ -257,12 +259,45 @@ static inline void llvm_process_parameter_value(GenContext *c, Decl *decl, ABIAr
 		}
 	}
 }
-static inline void llvm_emit_func_parameter(GenContext *context, Decl *decl, ABIArgInfo *abi_info, unsigned *index, unsigned real_index)
+
+static inline void llvm_process_parameter_value(GenContext *c, Decl *decl, ABIArgInfo *info, unsigned *index)
+{
+	switch (info->rewrite)
+	{
+		case PARAM_RW_NONE:
+			llvm_process_parameter_value_inner(c, decl, info, index);
+			break;
+		case PARAM_RW_VEC_TO_ARRAY:
+		{
+			Decl *temp = decl_new_generated_var(info->original_type, VARDECL_PARAM, decl->span);
+			llvm_process_parameter_value_inner(c, temp, info, index);
+			BEValue value;
+			llvm_value_set_decl(c, &value, temp);
+			llvm_emit_array_to_vector(c, &value, decl->type);
+			BEValue param;
+			if (decl->is_value)
+			{
+				llvm_value_rvalue(c, &value);
+				decl->backend_value = value.value;
+			}
+			else
+			{
+				llvm_emit_and_set_decl_alloca(c, decl);
+				llvm_value_set_decl(c, &param, decl);
+				llvm_store(c, &param, &value);
+			}
+			break;
+		}
+		case PARAM_RW_EXPAND_ELEMENTS:
+			TODO;
+	}
+}
+static inline void llvm_emit_func_parameter(GenContext *context, Decl *decl, ABIArgInfo ***abi_info_ref, unsigned *index, unsigned real_index)
 {
 	ASSERT(decl->decl_kind == DECL_VAR && decl->var.kind == VARDECL_PARAM);
 
-	// Allocate room on stack, but do not copy.
-	llvm_process_parameter_value(context, decl, abi_info, index);
+	ABIArgInfo *info = *((*abi_info_ref)++);
+	llvm_process_parameter_value(context, decl, info, index);
 	if (llvm_use_debug(context))
 	{
 		llvm_emit_debug_parameter(context, decl, real_index);
@@ -299,14 +334,27 @@ void llvm_emit_return_abi(GenContext *c, BEValue *return_value, BEValue *optiona
 	// If we have an optional it's always the return argument, so we need to copy
 	// the return value into the return value holder.
 	LLVMValueRef return_out = c->return_out;
-	Type *call_return_type = prototype->abi_ret_type;
+	Type *call_return_type = prototype->return_info.type;
 
 	BEValue no_fail;
 
 	// In this case we use the optional as the actual return.
-	if (prototype->is_optional)
+	switch (prototype->return_rewrite)
 	{
-		if (return_value && return_value->type != type_void)
+		case PARAM_RW_NONE:
+			break;
+		case PARAM_RW_VEC_TO_ARRAY:
+			if (return_value)
+			{
+				llvm_emit_vec_to_array(c, return_value, type_array_from_vector(return_value->type));
+			}
+			break;
+		case PARAM_RW_EXPAND_ELEMENTS:
+			UNREACHABLE_VOID;
+	}
+	if (prototype->ret_rewrite != RET_NORMAL)
+	{
+		if (return_value && prototype->ret_rewrite == RET_OPTIONAL_VALUE)
 		{
 			ASSERT(return_value->value);
 			llvm_store_to_ptr_aligned(c, c->return_out, return_value, type_alloca_alignment(return_value->type));
@@ -388,17 +436,23 @@ DIRECT_RETURN:
 
 void llvm_emit_return_implicit(GenContext *c)
 {
-	Type *rtype_real = c->cur_func.prototype ? c->cur_func.prototype->rtype : type_void;
-	if (type_lowering(type_no_optional(rtype_real)) != type_void)
+	if (!c->cur_func.prototype) goto VOID;
+	Type *rtype_real = c->cur_func.prototype->return_info.type;
+	switch (c->cur_func.prototype->ret_rewrite)
 	{
-		LLVMBuildUnreachable(c->builder);
-		return;
-	}
-	if (type_is_optional(rtype_real))
-	{
-		llvm_emit_return_abi(c, NULL, NULL);
-		return;
+		case RET_NORMAL:
+			if (type_is_void(type_flatten(rtype_real))) goto VOID;
+			FALLTHROUGH;
+		case RET_OPTIONAL_VALUE:
+			LLVMBuildUnreachable(c->builder);
+			return;
+		case RET_OPTIONAL_VOID:
+			llvm_emit_return_abi(c, NULL, NULL);
+			return;
+		default:
+			UNREACHABLE_VOID;
 	}
+VOID:;
 	BEValue value;
 	llvm_value_set(&value, llvm_get_zero(c, type_fault), type_fault);
 	llvm_emit_return_abi(c, NULL, &value);
@@ -479,7 +533,7 @@ void llvm_emit_body(GenContext *c, LLVMValueRef function, FunctionPrototype *pro
 	c->return_out = NULL;
 	if (prototype->ret_abi_info->kind == ABI_ARG_INDIRECT)
 	{
-		if (prototype->is_optional)
+		if (prototype->ret_rewrite != RET_NORMAL)
 		{
 			c->optional_out = llvm_get_next_param(c, &arg);
 		}
@@ -488,9 +542,11 @@ void llvm_emit_body(GenContext *c, LLVMValueRef function, FunctionPrototype *pro
 			c->return_out = llvm_get_next_param(c, &arg);
 		}
 	}
-	if (prototype->ret_by_ref_abi_info)
+	ABIArgInfo **abi_args = prototype->abi_args;
+	if (prototype->ret_rewrite == RET_OPTIONAL_VALUE)
 	{
 		ASSERT(!c->return_out);
+		abi_args++;
 		c->return_out = llvm_get_next_param(c, &arg);
 	}
 
@@ -500,7 +556,7 @@ void llvm_emit_body(GenContext *c, LLVMValueRef function, FunctionPrototype *pro
 	{
 		FOREACH_IDX(i, Decl *, param, signature->params)
 		{
-			llvm_emit_func_parameter(c, param, prototype->abi_args[i], &arg, i);
+			llvm_emit_func_parameter(c, param, &abi_args, &arg, i);
 		}
 	}
 
diff --git a/src/compiler/llvm_codegen_internal.h b/src/compiler/llvm_codegen_internal.h
index 9bb71f56d..a036afb4b 100644
--- a/src/compiler/llvm_codegen_internal.h
+++ b/src/compiler/llvm_codegen_internal.h
@@ -474,6 +474,7 @@ bool llvm_temp_as_address(Type *type);
 INLINE LLVMValueRef llvm_emit_insert_value(GenContext *c, LLVMValueRef agg, LLVMValueRef new_value, ArraySize index);
 LLVMValueRef llvm_emit_aggregate_two(GenContext *c, Type *type, LLVMValueRef value1, LLVMValueRef value2);
 LLVMValueRef llvm_emit_const_vector(LLVMValueRef value, ArraySize len);
+LLVMValueRef llvm_emit_const_vector_pot(LLVMValueRef value, ArraySize len);
 LLVMValueRef llvm_emit_struct_gep_raw(GenContext *c, LLVMValueRef ptr, LLVMTypeRef struct_type, unsigned index,
                                       unsigned struct_alignment, AlignSize *alignment);
 LLVMValueRef llvm_emit_array_gep_raw(GenContext *c, LLVMValueRef ptr, LLVMTypeRef array_type, unsigned index, AlignSize array_alignment, AlignSize *alignment);
@@ -515,7 +516,7 @@ LLVMValueRef llvm_emit_coerce(GenContext *c, LLVMTypeRef coerced, BEValue *value
 
 static inline LLVMCallConv llvm_call_convention_from_call(CallABI abi);
 void llvm_emit_raw_call(GenContext *c, BEValue *result_value, FunctionPrototype *prototype, LLVMTypeRef func_type, LLVMValueRef func, LLVMValueRef *args, unsigned arg_count, int inline_flag, LLVMValueRef error_var, bool sret_return, BEValue *synthetic_return_param, bool no_return);
-void llvm_emit_parameter(GenContext *c, LLVMValueRef *args, unsigned *arg_count_ref, ABIArgInfo *info, BEValue *be_value, Type *type);
+void llvm_emit_parameter(GenContext *c, LLVMValueRef *args, unsigned *arg_count_ref, ABIArgInfo *info, BEValue *be_value);
 
 // -- Dynamic interface --
 LLVMValueRef llvm_get_selector(GenContext *c, const char *name);
@@ -554,6 +555,8 @@ void llvm_emit_local_var_alloca(GenContext *c, Decl *decl);
 void llvm_emit_local_decl(GenContext *c, Decl *decl, BEValue *value);
 void llvm_emit_builtin_call(GenContext *c, BEValue *result_value, Expr *expr);
 LLVMMetadataRef llvm_debug_create_macro(GenContext *c, Decl *macro);
+void llvm_emit_array_to_vector(GenContext *c, BEValue *value, Type *to);
+void llvm_emit_vec_to_array(GenContext *c, BEValue *value, Type *type);
 
 // -- Optional --
 LLVMValueRef llvm_emit_is_no_opt(GenContext *c, LLVMValueRef error_value);
diff --git a/src/compiler/llvm_codegen_internal_impl.h b/src/compiler/llvm_codegen_internal_impl.h
index 7e7d3a474..e89795c81 100644
--- a/src/compiler/llvm_codegen_internal_impl.h
+++ b/src/compiler/llvm_codegen_internal_impl.h
@@ -154,7 +154,7 @@ INLINE LLVMValueRef llvm_emit_trunc_bool(GenContext *c, LLVMValueRef value)
 
 INLINE LLVMValueRef llvm_emit_extract_value(GenContext *c, LLVMValueRef agg, unsigned index)
 {
-	if (LLVMGetTypeKind(LLVMTypeOf(agg)) == LLVMVectorTypeKind)
+	if (LLVMGetTypeKind(LLVMTypeOf(agg)) == LLVMVectorTypeKind )
 	{
 		return LLVMBuildExtractElement(c->builder, agg, llvm_const_int(c, type_usz, index), "");
 	}
diff --git a/src/compiler/llvm_codegen_stmt.c b/src/compiler/llvm_codegen_stmt.c
index b577f6cb8..18d104c5c 100644
--- a/src/compiler/llvm_codegen_stmt.c
+++ b/src/compiler/llvm_codegen_stmt.c
@@ -248,7 +248,7 @@ static inline void llvm_emit_return(GenContext *c, Ast *ast)
 
 	LLVMBasicBlockRef error_return_block = NULL;
 	LLVMValueRef error_out = NULL;
-	if (c->cur_func.prototype && type_is_optional(c->cur_func.prototype->rtype))
+	if (c->cur_func.prototype && c->cur_func.prototype->ret_rewrite != RET_NORMAL)
 	{
 		error_return_block = llvm_basic_block_new(c, "err_retblock");
 		error_out = llvm_emit_alloca_aligned(c, type_fault, "reterr");
@@ -259,7 +259,7 @@ static inline void llvm_emit_return(GenContext *c, Ast *ast)
 	BEValue return_value = { 0 };
 	if (has_return_value)
 	{
-		llvm_emit_expr(c, &return_value, ast->return_stmt.expr);
+		llvm_emit_expr(c, &return_value, expr);
 		llvm_value_fold_optional(c, &return_value);
 		c->retval = return_value;
 	}
@@ -1543,12 +1543,11 @@ void llvm_emit_panic(GenContext *c, const char *message, SourceSpan loc, const c
 	LLVMValueRef actual_args[16];
 	unsigned count = 0;
 	ABIArgInfo **abi_args = prototype->abi_args;
-	Type **types = prototype->param_types;
 	for (unsigned i = 0; i < 4; i++)
 	{
-		Type *type = type_lowering(types[i]);
+		Type *type = type_lowering(abi_args[i]->original_type);
 		BEValue value = { .value = panic_args[i], .type = type };
-		llvm_emit_parameter(c, actual_args, &count, abi_args[i], &value, type);
+		llvm_emit_parameter(c, actual_args, &count, abi_args[i], &value);
 	}
 
 	if (panicf)
@@ -1575,7 +1574,7 @@ void llvm_emit_panic(GenContext *c, const char *message, SourceSpan loc, const c
 		llvm_value_aggregate_two(c, &value, any_slice, array_ref, llvm_const_int(c, type_usz, elements));
 		LLVMSetValueName2(value.value, temp_name, 6);
 
-		llvm_emit_parameter(c, actual_args, &count, abi_args[4], &value, any_slice);
+		llvm_emit_parameter(c, actual_args, &count, abi_args[4], &value);
 
 		BEValue res;
 		if (c->debug.builder) llvm_emit_debug_location(c, loc);
diff --git a/src/compiler/llvm_codegen_storeload.c b/src/compiler/llvm_codegen_storeload.c
index f8c08a2e2..6d45b8bfe 100644
--- a/src/compiler/llvm_codegen_storeload.c
+++ b/src/compiler/llvm_codegen_storeload.c
@@ -7,7 +7,31 @@
 LLVMValueRef llvm_store_to_ptr_raw_aligned(GenContext *c, LLVMValueRef pointer, LLVMValueRef value, AlignSize alignment)
 {
 	ASSERT(alignment > 0);
-	assert(LLVMTypeOf(value) != c->bool_type);
+	LLVMTypeRef type = LLVMTypeOf(value);
+	ASSERT(type != c->bool_type);
+	if (LLVMGetTypeKind(type) == LLVMVectorTypeKind)
+	{
+		unsigned len = LLVMGetVectorSize(LLVMTypeOf(value));
+		if (!is_power_of_two(len))
+		{
+			ByteSize size = llvm_store_size(c, type);
+			if (size < aligned_offset(alignment, size))
+			{
+				unsigned npot = next_highest_power_of_2(len);
+				static LLVMValueRef vec[MAX_VECTOR_WIDTH];
+				LLVMTypeRef mask_type = llvm_get_type(c, type_uint);
+				for (unsigned i = 0; i < len; i++)
+				{
+					vec[i] = LLVMConstInt(mask_type, i, 0);
+				}
+				for (unsigned i = len; i < npot; i++)
+				{
+					vec[i] = LLVMGetPoison(mask_type);
+				}
+				value = LLVMBuildShuffleVector(c->builder, value, LLVMGetPoison(type), LLVMConstVector(vec, npot), "expandvec");
+			}
+		}
+	}
 	LLVMValueRef ref = LLVMBuildStore(c->builder, value, pointer);
 	llvm_set_alignment(ref, alignment);
 	return ref;
@@ -30,7 +54,7 @@ bool llvm_temp_as_address(Type *type)
 			// Ok by value.
 			return false;
 		default:
-			return type_is_abi_aggregate(type);
+			return type_is_aggregate(type);
 	}
 }
 
@@ -39,7 +63,7 @@ LLVMValueRef llvm_store_to_ptr_aligned(GenContext *c, LLVMValueRef destination,
 	// If we have an address but not an aggregate, do a load.
 	ASSERT(alignment);
 	llvm_value_fold_optional(c, value);
-	if (value->kind == BE_ADDRESS && !type_is_abi_aggregate(value->type))
+	if (value->kind == BE_ADDRESS && !type_is_aggregate(value->type))
 	{
 		value->value = llvm_load_value_store(c, value);
 		value->kind = BE_VALUE;
@@ -77,6 +101,25 @@ LLVMValueRef llvm_load(GenContext *c, LLVMTypeRef type, LLVMValueRef pointer, Al
 	ASSERT(alignment > 0);
 	ASSERT(!llvm_is_global_eval(c));
 	ASSERT(LLVMGetTypeContext(type) == c->context);
+	if (LLVMGetTypeKind(type) == LLVMVectorTypeKind)
+	{
+		unsigned len = LLVMGetVectorSize(type);
+		if (!is_power_of_two(len) && alignment > llvm_store_size(c, type))
+		{
+			unsigned npot = next_highest_power_of_2(len);
+			LLVMTypeRef t = LLVMVectorType(LLVMGetElementType(type), npot);
+			LLVMValueRef value = LLVMBuildLoad2(c->builder, t, pointer, name);
+			llvm_set_alignment(value, alignment);
+			LLVMValueRef poison = LLVMGetPoison(t);
+			static LLVMValueRef vec[MAX_VECTOR_WIDTH];
+			for (int i = 0; i < len; i++)
+			{
+				vec[i] = llvm_const_int(c, type_uint, i);
+			}
+			value = LLVMBuildShuffleVector(c->builder, value, poison, LLVMConstVector(vec, len), "extractvec");
+			return value;
+		}
+	}
 	LLVMValueRef value = LLVMBuildLoad2(c->builder, type, pointer, name);
 	llvm_set_alignment(value, alignment);
 	return value;
@@ -121,13 +164,22 @@ LLVMValueRef llvm_store_zero(GenContext *c, BEValue *ref)
 {
 	llvm_value_addr(c, ref);
 	Type *type = ref->type;
-	if (!type_is_abi_aggregate(type) || type_is_builtin(type->type_kind))
+	if (!type_is_aggregate(type) || type_is_builtin(type->type_kind))
 	{
+		if (type->type_kind == TYPE_VECTOR)
+		{
+			unsigned len = type->array.len;
+			if (!is_power_of_two(len))
+			{
+				return llvm_store_raw(c, ref, llvm_emit_const_vector_pot(llvm_get_zero(c, type->array.base), len));
+			}
+		}
+
 		return llvm_store_raw(c, ref, llvm_get_zero(c, type));
 	}
-	Type *single_type = type_abi_find_single_struct_element(type);
+	Type *single_type = type_abi_find_single_struct_element(type, false);
 
-	if (single_type && !type_is_abi_aggregate(single_type))
+	if (single_type && !type_is_aggregate(single_type))
 	{
 		BEValue element = *ref;
 		llvm_value_bitcast(c, &element, single_type);
diff --git a/src/compiler/llvm_codegen_type.c b/src/compiler/llvm_codegen_type.c
index 4529ef0d4..d9b8e0b44 100644
--- a/src/compiler/llvm_codegen_type.c
+++ b/src/compiler/llvm_codegen_type.c
@@ -8,7 +8,7 @@ static inline LLVMTypeRef llvm_type_from_decl(GenContext *c, Decl *decl);
 
 static inline LLVMTypeRef llvm_type_from_array(GenContext *context, Type *type);
 static void param_expand(GenContext *context, LLVMTypeRef** params_ref, Type *type);
-static inline void add_func_type_param(GenContext *c, Type *param_type, ABIArgInfo *arg_info, LLVMTypeRef **params);
+static inline void add_func_type_param(GenContext *c, ABIArgInfo *arg_info, LLVMTypeRef **params);
 
 static inline LLVMTypeRef llvm_type_from_decl(GenContext *c, Decl *decl)
 {
@@ -42,7 +42,7 @@ static inline LLVMTypeRef llvm_type_from_decl(GenContext *c, Decl *decl)
 				{
 					vec_add(types, llvm_const_padding_type(c, member->padding));
 				}
-				vec_add(types, llvm_get_type(c, member->type));
+				vec_add(types, llvm_get_type(c, lowered_member_type(member)));
 			}
 			if (decl->strukt.padding)
 			{
@@ -103,6 +103,7 @@ static void param_expand(GenContext *context, LLVMTypeRef** params_ref, Type *ty
 		case TYPE_ALIAS:
 			UNREACHABLE_VOID
 		case TYPE_ARRAY:
+		case TYPE_VECTOR:
 			for (ArraySize i = type->array.len; i > 0; i--)
 			{
 				param_expand(context, params_ref, type->array.base);
@@ -112,7 +113,7 @@ static void param_expand(GenContext *context, LLVMTypeRef** params_ref, Type *ty
 		{
 			FOREACH(Decl *, member, type->decl->strukt.members)
 			{
-				param_expand(context, params_ref, member->type);
+				param_expand(context, params_ref, lowered_member_type(member));
 			}
 			return;
 		}
@@ -128,11 +129,11 @@ static void param_expand(GenContext *context, LLVMTypeRef** params_ref, Type *ty
 			// after flattening. Thus we have to use the "largest" field.
 			FOREACH(Decl *, member, type->decl->strukt.members)
 			{
-				Type *member_type = member->type;
+				Type *member_type = lowered_member_type(member);
 				if (type_size(member_type) > largest)
 				{
 					largest = type_size(member_type);
-					largest_type = type_flatten(member_type);
+					largest_type = member_type;
 				}
 			}
 			if (!largest) return;
@@ -147,8 +148,9 @@ static void param_expand(GenContext *context, LLVMTypeRef** params_ref, Type *ty
 	UNREACHABLE_VOID
 }
 
-static inline void add_func_type_param(GenContext *c, Type *param_type, ABIArgInfo *arg_info, LLVMTypeRef **params)
+static inline void add_func_type_param(GenContext *c, ABIArgInfo *arg_info, LLVMTypeRef **params)
 {
+	Type *param_type = arg_info->original_type;
 	arg_info->param_index_start = (ArrayIndex)vec_size(*params);
 	switch (arg_info->kind)
 	{
@@ -202,7 +204,7 @@ static inline void add_func_type_param(GenContext *c, Type *param_type, ABIArgIn
 LLVMTypeRef llvm_update_prototype_abi(GenContext *c, FunctionPrototype *prototype, LLVMTypeRef **params)
 {
 	LLVMTypeRef retval = NULL;
-	Type *call_return_type = prototype->abi_ret_type;
+	Type *call_return_type = prototype->return_info.type;
 	ABIArgInfo *ret_arg_info = prototype->ret_abi_info;
 
 	ret_arg_info->param_index_end = 0;
@@ -246,21 +248,15 @@ LLVMTypeRef llvm_update_prototype_abi(GenContext *c, FunctionPrototype *prototyp
 			break;
 	}
 
-	// If it's optional and it's not void (meaning ret_abi_info will be NULL)
-	if (prototype->ret_by_ref)
-	{
-		add_func_type_param(c, type_get_ptr(type_lowering(prototype->ret_by_ref_type)), prototype->ret_by_ref_abi_info, params);
-	}
-
 	// Add in all of the required arguments.
-	FOREACH_IDX(i, Type *, type, prototype->param_types)
+	for (unsigned i = 0; i < prototype->param_count; i++)
 	{
-		add_func_type_param(c, type, prototype->abi_args[i], params);
+		add_func_type_param(c, prototype->abi_args[i], params);
 	}
 
-	FOREACH_IDX(j, Type *, type, prototype->varargs)
+	for (unsigned i = 0; i < prototype->param_vacount; i++)
 	{
-		add_func_type_param(c, type, prototype->abi_varargs[j], params);
+		add_func_type_param(c, prototype->abi_varargs[i], params);
 	}
 	return retval;
 }
diff --git a/src/compiler/parse_global.c b/src/compiler/parse_global.c
index 0dec7d2c5..8fa3f3297 100644
--- a/src/compiler/parse_global.c
+++ b/src/compiler/parse_global.c
@@ -1924,6 +1924,26 @@ static inline Decl *parse_typedef_declaration(ParseContext *c)
 
 	ASSERT(!tok_is(c, TOKEN_LBRACE));
 
+	while (tok_is(c, TOKEN_AT_IDENT))
+	{
+		const char *name = symstr(c);
+		if (name == kw_at_align)
+		{
+			advance_and_verify(c, TOKEN_AT_IDENT);
+			CONSUME_OR_RET(TOKEN_LPAREN, poisoned_decl);
+			ASSIGN_EXPR_OR_RET(decl->distinct_align, parse_expr(c), poisoned_decl);
+			CONSUME_OR_RET(TOKEN_RPAREN, poisoned_decl);
+		}
+		else if (name == kw_at_simd)
+		{
+			advance_and_verify(c, TOKEN_AT_IDENT);
+			decl->attr_simd = true;
+		}
+		else
+		{
+			RETURN_PRINT_ERROR_HERE("Expected only attributes '@align' and '@simd'.");
+		}
+	}
 	RANGE_EXTEND_PREV(decl);
 	CONSUME_EOS_OR_RET(poisoned_decl);
 	return decl;
diff --git a/src/compiler/sema_decls.c b/src/compiler/sema_decls.c
index 38f89aebc..e4beb359f 100755
--- a/src/compiler/sema_decls.c
+++ b/src/compiler/sema_decls.c
@@ -48,6 +48,29 @@ static inline bool sema_analyse_typedef(SemaContext *context, Decl *decl, bool *
 
 static CompilationUnit *unit_copy(Module *module, CompilationUnit *unit);
 
+static inline bool sema_resolve_align_expr(SemaContext *context, Expr *expr, AlignSize *result)
+{
+	if (!sema_analyse_expr_rvalue(context, expr)) return false;
+	if (!expr_is_const_int(expr))
+	{
+		RETURN_SEMA_ERROR(expr, "Expected a constant integer value as argument.");
+	}
+	if (int_ucomp(expr->const_expr.ixx, MAX_ALIGNMENT, BINARYOP_GT))
+	{
+		RETURN_SEMA_ERROR(expr, "Alignment must be less or equal to %ull.", MAX_ALIGNMENT);
+	}
+	if (int_ucomp(expr->const_expr.ixx, 0, BINARYOP_LE))
+	{
+		RETURN_SEMA_ERROR(expr, "Alignment must be greater than zero.");
+	}
+	uint64_t align = int_to_u64(expr->const_expr.ixx);
+	if (!is_power_of_two(align))
+	{
+		RETURN_SEMA_ERROR(expr, "Alignment must be a power of two.");
+	}
+	*result = (AlignSize)align;
+	return true;
+}
 static Module *module_instantiate_generic(SemaContext *context, Module *module, Path *path, Expr **params, SourceSpan from_span);
 
 static inline bool sema_analyse_enum_param(SemaContext *context, Decl *param);
@@ -259,7 +282,8 @@ static inline bool sema_analyse_struct_member(SemaContext *context, Decl *parent
 			// Set the nested type as export if this one is exported.
 			decl->is_export = is_export;
 			// Perform the analysis
-			return sema_analyse_decl(context, decl);
+			if (!sema_analyse_decl(context, decl)) return false;
+			return true;
 		default:
 			UNREACHABLE
 	}
@@ -290,6 +314,7 @@ static inline bool sema_check_struct_holes(SemaContext *context, Decl *decl, Dec
 	return true;
 }
 
+
 /**
  * Analyse union members, calculating alignment.
  */
@@ -335,7 +360,7 @@ static bool sema_analyse_union_members(SemaContext *context, Decl *decl)
 			RETURN_SEMA_ERROR(member, "Flexible array members not allowed in unions.");
 		}
 		AlignSize member_alignment;
-		if (!sema_set_abi_alignment(context, member->type, &member_alignment)) return false;
+		if (!sema_set_abi_alignment(context, member->type, &member_alignment, true)) return false;
 		if (!sema_check_struct_holes(context, decl, member)) return false;
 
 		ByteSize member_size = type_size(member->type);
@@ -402,17 +427,23 @@ static bool sema_analyse_union_members(SemaContext *context, Decl *decl)
 	return true;
 }
 
-AlignSize sema_get_max_natural_alignment(Type *type)
+AlignSize sema_get_max_natural_alignment_as_member(Type *type)
 {
 RETRY:;
-	type = type_flatten(type);
 	switch (type->type_kind)
 	{
-		case TYPE_TYPEDEF:
-		case TYPE_POISONED:
-		case TYPE_ALIAS:
-		case TYPE_UNTYPED_LIST:
 		case TYPE_OPTIONAL:
+			type = type->optional;
+			goto RETRY;
+		case TYPE_TYPEDEF:
+			if (type->decl->attr_simd) return type_abi_alignment(type);
+			type = type->decl->distinct->type;
+			goto RETRY;
+		case TYPE_ALIAS:
+			type = type->canonical;
+			goto RETRY;
+		case TYPE_POISONED:
+		case TYPE_UNTYPED_LIST:
 		case TYPE_WILDCARD:
 		case TYPE_TYPEINFO:
 		case TYPE_MEMBER:
@@ -443,7 +474,7 @@ RETRY:;
 			AlignSize max = 0;
 			FOREACH(Decl *, member, type->decl->strukt.members)
 			{
-				AlignSize member_max = sema_get_max_natural_alignment(member->type);
+				AlignSize member_max = sema_get_max_natural_alignment_as_member(member->type);
 				if (member_max > max) max = member_max;
 			}
 			return max;
@@ -454,11 +485,10 @@ RETRY:;
 		case TYPE_ARRAY:
 		case TYPE_FLEXIBLE_ARRAY:
 		case TYPE_INFERRED_ARRAY:
-			type = type->array.base;
-			goto RETRY;
 		case TYPE_VECTOR:
 		case TYPE_INFERRED_VECTOR:
-			return type_abi_alignment(type);
+			type = type->array.base;
+			goto RETRY;
 	}
 	UNREACHABLE
 }
@@ -544,9 +574,9 @@ static bool sema_analyse_struct_members(SemaContext *context, Decl *decl)
 			SEMA_ERROR(member, "Recursive definition of %s.", type_quoted_error_string(member_type));
 			return decl_poison(decl);
 		}
-		if (!sema_set_abi_alignment(context, member->type, &member_type_alignment)) return decl_poison(decl);
+		if (!sema_set_abi_alignment(context, member->type, &member_type_alignment, true)) return decl_poison(decl);
 		// And get the natural alignment
-		AlignSize member_natural_alignment = sema_get_max_natural_alignment(member->type);
+		AlignSize member_natural_alignment = sema_get_max_natural_alignment_as_member(member->type);
 
 		// If packed, then the alignment is 1
 		AlignSize member_alignment = is_packed ? 1 : member_type_alignment;
@@ -1102,6 +1132,7 @@ static inline bool sema_analyse_signature(SemaContext *context, Signature *sig,
 {
 	Variadic variadic_type = sig->variadic;
 	Decl **params = sig->params;
+
 	unsigned param_count = vec_size(params);
 	unsigned vararg_index = sig->vararg_index;
 	bool is_macro = sig->is_macro;
@@ -1406,7 +1437,7 @@ static inline bool sema_analyse_signature(SemaContext *context, Signature *sig,
 		{
 			if (!sema_deep_resolve_function_ptr(context, type_info)) return false;
 			param->type = type_info->type;
-			if (!sema_set_abi_alignment(context, param->type, &param->alignment)) return false;
+			if (!sema_set_abi_alignment(context, param->type, &param->alignment, false)) return false;
 		}
 
 		if (param->var.init_expr)
@@ -1524,10 +1555,11 @@ static inline bool sema_analyse_typedef(SemaContext *context, Decl *decl, bool *
 	TypeInfo *info = decl->distinct;
 	info->in_def = true;
 	if (!sema_resolve_type_info(context, info, RESOLVE_TYPE_DEFAULT)) return false;
-
+	if (!sema_resolve_type_decl(context, info->type)) return false;
+	Type *inner_type = info->type;
 	// Optional isn't allowed of course.
-	if (type_is_optional(info->type)) RETURN_SEMA_ERROR(decl, "You cannot create a distinct type from an optional.");
-	switch (sema_resolve_storage_type(context, info->type))
+	if (type_is_optional(inner_type)) RETURN_SEMA_ERROR(decl, "You cannot create a distinct type from an optional.");
+	switch (sema_resolve_storage_type(context, inner_type))
 	{
 		case STORAGE_ERROR:
 			return false;
@@ -1539,9 +1571,29 @@ static inline bool sema_analyse_typedef(SemaContext *context, Decl *decl, bool *
 			RETURN_SEMA_ERROR(info, "You cannot create a distinct type from the wildcard type.");
 		case STORAGE_COMPILE_TIME:
 			RETURN_SEMA_ERROR(info, "You cannot create a distinct type for %s as it is a compile time type.",
-							  type_invalid_storage_type_name(info->type));
+							  type_invalid_storage_type_name(inner_type));
 	}
 
+	if (decl->distinct_align)
+	{
+		if (!sema_resolve_align_expr(context, decl->distinct_align, &decl->alignment)) return false;
+		AlignSize default_size = type_abi_alignment(inner_type);
+		// Remove "alignment"
+		if (default_size == decl->alignment) decl->distinct_align = NULL;
+	}
+	if (decl->attr_simd)
+	{
+		if (decl->distinct_align) RETURN_SEMA_ERROR(decl, "You cannot set both @simd and @align on a distinct type.");
+		inner_type = inner_type->canonical;
+		if (inner_type->type_kind != TYPE_VECTOR) RETURN_SEMA_ERROR(decl, "You cannot set @simd on a non-vector type.");
+		ArraySize len = inner_type->array.len;
+		if (!is_power_of_two(len)) RETURN_SEMA_ERROR(decl, "The length of a @simd vector must be a power of two.");
+		decl->alignment = type_simd_alignment(inner_type);
+	}
+	if (!decl->alignment)
+	{
+		decl->alignment = type_abi_alignment(inner_type);
+	}
 	// Distinct types drop the canonical part.
 	info->type = info->type->canonical;
 	return true;
@@ -1686,7 +1738,7 @@ static inline bool sema_analyse_enum(SemaContext *context, Decl *decl, bool *era
 	for (unsigned i = 0; i < associated_value_count; i++)
 	{
 		Decl *param = associated_values[i];
-		if (!sema_set_abi_alignment(context, param->type, &param->alignment)) return false;
+		if (!sema_set_abi_alignment(context, param->type, &param->alignment, false)) return false;
 		param->resolve_status = RESOLVE_DONE;
 	}
 	for (unsigned i = 0; i < enums; i++)
@@ -3093,6 +3145,7 @@ static bool sema_analyse_attribute(SemaContext *context, ResolvedAttrData *attr_
 			[ATTRIBUTE_SAFEMACRO] = ATTR_MACRO,
 			[ATTRIBUTE_SAFEINFER] = ATTR_GLOBAL | ATTR_LOCAL,
 			[ATTRIBUTE_SECTION] = ATTR_FUNC | ATTR_CONST | ATTR_GLOBAL,
+			[ATTRIBUTE_SIMD] = 0,
 			[ATTRIBUTE_STRUCTLIKE] = ATTR_TYPEDEF,
 			[ATTRIBUTE_TAG] = ATTR_BITSTRUCT_MEMBER | ATTR_MEMBER | USER_DEFINED_TYPES | CALLABLE_TYPE,
 			[ATTRIBUTE_TEST] = ATTR_FUNC,
@@ -3251,28 +3304,7 @@ static bool sema_analyse_attribute(SemaContext *context, ResolvedAttrData *attr_
 			{
 				RETURN_SEMA_ERROR(attr, "'align' requires an power-of-2 argument, e.g. align(8).");
 			}
-			if (!sema_analyse_expr_rvalue(context, expr)) return false;
-			if (!expr_is_const_int(expr))
-			{
-				RETURN_SEMA_ERROR(expr, "Expected a constant integer value as argument.");
-			}
-			{
-				if (int_ucomp(expr->const_expr.ixx, MAX_ALIGNMENT, BINARYOP_GT))
-				{
-					RETURN_SEMA_ERROR(expr, "Alignment must be less or equal to %ull.", MAX_ALIGNMENT);
-				}
-				if (int_ucomp(expr->const_expr.ixx, 0, BINARYOP_LE))
-				{
-					RETURN_SEMA_ERROR(expr, "Alignment must be greater than zero.");
-				}
-				uint64_t align = int_to_u64(expr->const_expr.ixx);
-				if (!is_power_of_two(align))
-				{
-					RETURN_SEMA_ERROR(expr, "Alignment must be a power of two.");
-				}
-				decl->alignment = (AlignSize)align;
-				return true;
-			}
+			return sema_resolve_align_expr(context, expr, &decl->alignment);
 		case ATTRIBUTE_WASM:
 			if (args > 2) RETURN_SEMA_ERROR(attr->exprs[2], "Too many arguments to '@wasm', expected 0, 1 or 2 arguments");
 			decl->is_export = true;
@@ -3418,6 +3450,8 @@ static bool sema_analyse_attribute(SemaContext *context, ResolvedAttrData *attr_
 		case ATTRIBUTE_STRUCTLIKE:
 			decl->attr_structlike = true;
 			return true;
+		case ATTRIBUTE_SIMD:
+			RETURN_SEMA_ERROR(attr, "'@simd' is only allowed on typedef types.");
 		case ATTRIBUTE_SECTION:
 		case ATTRIBUTE_EXTERN:
 			if (context->unit->module->is_generic)
@@ -4714,7 +4748,7 @@ bool sema_analyse_var_decl(SemaContext *context, Decl *decl, bool local, bool *c
 			return decl_poison(decl);
 		}
 		if (!sema_analyse_expr_rvalue(context, init_expr)) return decl_poison(decl);
-		if (check_defined || global_level_var || !type_is_abi_aggregate(init_expr->type)) sema_cast_const(init_expr);
+		if (check_defined || global_level_var || !type_is_aggregate(init_expr->type)) sema_cast_const(init_expr);
 		if (global_level_var && !expr_is_runtime_const(init_expr))
 		{
 			if (check_defined) return *check_defined = true, false;
@@ -4843,7 +4877,7 @@ bool sema_analyse_var_decl(SemaContext *context, Decl *decl, bool local, bool *c
 			}
 		}
 		if (!success) goto EXIT_OK;
-		if (global_level_var || !type_is_abi_aggregate(init->type)) sema_cast_const(init);
+		if (global_level_var || !type_is_aggregate(init->type)) sema_cast_const(init);
 		if (expr_is_const(init))
 		{
 			init->const_expr.is_hex = false;
diff --git a/src/compiler/sema_expr.c b/src/compiler/sema_expr.c
index e356b7e40..f25e4b09f 100644
--- a/src/compiler/sema_expr.c
+++ b/src/compiler/sema_expr.c
@@ -1005,7 +1005,7 @@ static inline bool sema_cast_ident_rvalue(SemaContext *context, Expr *expr)
 	{
 		case VARDECL_CONST:
 			if (decl->is_extern) return true;
-			if (type_is_abi_aggregate(decl->type)) return true;
+			if (type_is_aggregate(decl->type)) return true;
 			expr_replace(expr, copy_expr_single(decl->var.init_expr));
 			if (!sema_analyse_expr_rvalue(context, expr)) return false;
 			if (!sema_cast_const(expr) && !expr_is_runtime_const(expr))
@@ -2456,7 +2456,6 @@ static inline bool sema_call_analyse_func_invocation(SemaContext *context, Decl
 
 	if (!sema_call_evaluate_arguments(context, &callee, expr, &optional, no_match_ref)) return false;
 
-	Type *rtype = type->function.prototype->rtype;
 	if (expr->call_expr.is_dynamic_dispatch)
 	{
 		Expr *any_val = expr->call_expr.arguments[0];
@@ -2534,6 +2533,7 @@ END_CONTRACT:
 SKIP_CONTRACTS:
 	expr->call_expr.has_optional_arg = optional;
 
+	Type *rtype = typeget(type->function.signature->rtype);
 	if (!type_is_void(rtype))
 	{
 		bool is_optional_return = type_is_optional(rtype);
@@ -3971,14 +3971,11 @@ static inline bool sema_expr_analyse_subscript_lvalue(SemaContext *context, Expr
 		{
 			Expr *inner = expr_copy(subscripted);
 			subscripted->expr_kind = EXPR_UNARY;
-			subscripted->unary_expr.operator = UNARYOP_ADDR;
-			subscripted->unary_expr.expr = inner;
+			subscripted->unary_expr = (ExprUnary) { .operator = UNARYOP_ADDR, .expr = inner };
 
 			inner = expr_copy(subscripted);
 			subscripted->expr_kind = EXPR_UNARY;
-			subscripted->unary_expr.operator = UNARYOP_DEREF;
-			subscripted->unary_expr.expr = inner;
-			subscripted->unary_expr.no_read = true;
+			subscripted->unary_expr = (ExprUnary) { .operator = UNARYOP_DEREF, .expr = inner, .no_read = true };
 			FALLTHROUGH;
 		}
 		default:
@@ -4878,7 +4875,7 @@ static inline bool sema_expr_analyse_type_access(SemaContext *context, Expr *exp
 		expr->expr_kind = EXPR_CONST;
 		expr->resolve_status = RESOLVE_DONE;
 		AlignSize align;
-		if (!sema_set_abi_alignment(context, decl->type, &align)) return false;
+		if (!sema_set_abi_alignment(context, decl->type, &align, true)) return false;
 		expr->const_expr = (ExprConst) {
 			.member.decl = member,
 			.member.align = align,
@@ -5826,7 +5823,7 @@ static bool sema_expr_rewrite_to_type_property(SemaContext *context, Expr *expr,
 		case TYPE_PROPERTY_MEMBERSOF:
 		{
 			AlignSize align;
-			if (!sema_set_abi_alignment(context, parent_type, &align)) return false;
+			if (!sema_set_abi_alignment(context, parent_type, &align, true)) return false;
 			sema_create_const_membersof(expr, flat, align, 0);
 			return true;
 		}
@@ -5856,7 +5853,7 @@ static bool sema_expr_rewrite_to_type_property(SemaContext *context, Expr *expr,
 		case TYPE_PROPERTY_ALIGNOF:
 		{
 			AlignSize align;
-			if (!sema_set_abi_alignment(context, type, &align)) return false;
+			if (!sema_set_abi_alignment(context, type, &align, false)) return false;
 			expr_rewrite_const_int(expr, type_usz, align);
 			return true;
 		}
@@ -5923,8 +5920,7 @@ bool sema_expr_rewrite_insert_deref(SemaContext *context, Expr *original)
 	Expr *inner = expr_copy(original);
 	original->expr_kind = EXPR_UNARY;
 	original->type = NULL;
-	original->unary_expr.operator = UNARYOP_DEREF;
-	original->unary_expr.expr = inner;
+	original->unary_expr = (ExprUnary) { .operator = UNARYOP_DEREF, .expr = inner };
 
 	// In the case the original is already resolved, we want to resolve the deref as well.
 	if (original->resolve_status == RESOLVE_DONE)
@@ -10042,7 +10038,7 @@ static inline bool sema_expr_analyse_ct_alignof(SemaContext *context, Expr *expr
 	}
 	else
 	{
-		if (!sema_set_abi_alignment(context, type, &align)) return false;
+		if (!sema_set_abi_alignment(context, type, &align, false)) return false;
 	}
 	FOREACH_IDX(i, DesignatorElement *, element, path)
 	{
diff --git a/src/compiler/sema_internal.h b/src/compiler/sema_internal.h
index 5d0bde043..fb200492c 100644
--- a/src/compiler/sema_internal.h
+++ b/src/compiler/sema_internal.h
@@ -130,7 +130,7 @@ Decl *sema_analyse_parameterized_identifier(SemaContext *c, Path *decl_path, con
                                             Expr **params, bool *was_recursive_ref, SourceSpan invocation_span);
 bool sema_parameterized_type_is_found(SemaContext *context, Path *decl_path, const char *name, SourceSpan span);
 Type *sema_resolve_type_get_func(Signature *signature, CallABI abi);
-INLINE bool sema_set_abi_alignment(SemaContext *context, Type *type, AlignSize *result);
+INLINE bool sema_set_abi_alignment(SemaContext *context, Type *type, AlignSize *result, bool as_member);
 INLINE bool sema_set_alloca_alignment(SemaContext *context, Type *type, AlignSize *result);
 INLINE void sema_display_deprecated_warning_on_use(SemaContext *context, Decl *decl, SourceSpan span);
 bool sema_expr_analyse_ct_concat(SemaContext *context, Expr *concat_expr, Expr *left, Expr *right, bool *failed_ref);
@@ -170,14 +170,28 @@ INLINE bool sema_check_left_right_const(SemaContext *context, Expr *left, Expr *
 	return true;
 }
 
-INLINE bool sema_set_abi_alignment(SemaContext *context, Type *type, AlignSize *result)
+INLINE bool sema_set_abi_alignment(SemaContext *context, Type *type, AlignSize *result, bool as_member)
 {
+	type = type->canonical;
 	if (type_is_func_ptr(type))
 	{
 		*result = type_abi_alignment(type_voidptr);
 		return true;
 	}
 	if (!sema_resolve_type_decl(context, type)) return false;
+	if (as_member)
+	{
+		while (type->type_kind == TYPE_TYPEDEF)
+		{
+			if (type_is_simd(type)) goto DONE;
+			type = type->decl->distinct->type->canonical;
+		}
+		if (type_kind_is_any_vector(type->type_kind))
+		{
+			type = type->array.base;
+		}
+	}
+DONE:;
 	*result = type_abi_alignment(type);
 	return true;
 }
diff --git a/src/compiler/sema_stmts.c b/src/compiler/sema_stmts.c
index 1d8c6e124..19bc7f118 100644
--- a/src/compiler/sema_stmts.c
+++ b/src/compiler/sema_stmts.c
@@ -3371,7 +3371,7 @@ bool sema_analyse_function_body(SemaContext *context, Decl *func)
 		.ignore_deprecation = func->allow_deprecated || decl_is_deprecated(func)
 	};
 
-	context->rtype = prototype->rtype;
+	Type *rtype = context->rtype = typeget(signature->rtype);
 	context->macro_call_depth = 0;
 	context->active_scope = (DynamicScope) {
 			.depth = 0,
@@ -3411,7 +3411,7 @@ bool sema_analyse_function_body(SemaContext *context, Decl *func)
 		context->call_env.ensures = has_ensures;
 		bool is_naked = func->func_decl.attr_naked;
 		if (!is_naked) sema_append_contract_asserts(assert_first, body);
-		Type *canonical_rtype = type_no_optional(prototype->rtype)->canonical;
+		Type *canonical_rtype = type_no_optional(rtype)->canonical;
 		if (!is_naked && has_ensures && type_is_void(canonical_rtype))
 		{
 			AstId* append_pos = &body->compound_stmt.first_stmt;
diff --git a/src/compiler/sema_types.c b/src/compiler/sema_types.c
index cc0821da9..4aad62188 100644
--- a/src/compiler/sema_types.c
+++ b/src/compiler/sema_types.c
@@ -87,10 +87,7 @@ bool sema_resolve_array_like_len(SemaContext *context, TypeInfo *type_info, Arra
 		{
 			RETURN_VAL_SEMA_ERROR(type_info_poison(type_info), len_expr, "A vector may not exceed %d in bit width.", compiler.build.max_vector_size);
 		}
-		else
-		{
-			RETURN_VAL_SEMA_ERROR(type_info_poison(type_info), len_expr, "The array length may not exceed %lld.", MAX_ARRAY_SIZE);
-		}
+		RETURN_VAL_SEMA_ERROR(type_info_poison(type_info), len_expr, "The array length may not exceed %lld.", MAX_ARRAY_SIZE);
 	}
 	// We're done, return the size and mark it as a success.
 	*len_ref = (ArraySize)len.i.low;
@@ -662,7 +659,9 @@ static Type *flatten_raw_function_type(Type *type)
 static uint32_t hash_function(Signature *sig)
 {
 	uintptr_t hash = sig->variadic == VARIADIC_RAW ? 0 : 1;
-	hash = hash * 31 + (uintptr_t)flatten_raw_function_type(type_infoptr(sig->rtype)->type);
+	Type *rtype = typeget(sig->rtype);
+	hash = hash * 31 + (uintptr_t)flatten_raw_function_type(rtype);
+	if (sig->attrs.is_simd && type_flat_is_vector(rtype)) hash++;
 	Decl **params = sig->params;
 	FOREACH(Decl *, param, params)
 	{
@@ -675,28 +674,11 @@ static inline Type *func_create_new_func_proto(Signature *sig, CallABI abi, uint
 {
 	unsigned param_count = vec_size(sig->params);
 	FunctionPrototype *proto = CALLOCS(FunctionPrototype);
-	proto->raw_variadic = sig->variadic == VARIADIC_RAW;
-	proto->vararg_index = sig->vararg_index;
 	Type *rtype = type_infoptr(sig->rtype)->type;
-	proto->rtype = rtype;
-	if (type_is_optional(rtype))
-	{
-		proto->is_optional = true;
-		Type *real_return_type = rtype->optional;
-		proto->ret_by_ref_type = rtype->optional;
-		proto->ret_by_ref = !type_is_void(real_return_type);
-		proto->abi_ret_type = type_fault;
-	}
-	else
-	{
-		proto->ret_by_ref_type = proto->abi_ret_type = rtype;
-	}
-	proto->call_abi = abi;
-
+	Decl **param_copy = NULL;
 	if (param_count)
 	{
-		Type **param_types = VECNEW(Type*, param_count);
-		Decl **param_copy = VECNEW(Decl*, param_count);
+		param_copy = VECNEW(Decl*, param_count);
 		for (unsigned i = 0; i < param_count; i++)
 		{
 			Decl *decl = decl_copy(sig->params[i]);
@@ -704,28 +686,25 @@ static inline Type *func_create_new_func_proto(Signature *sig, CallABI abi, uint
 			decl->var.type_info = 0;
 			decl->var.init_expr = NULL;
 			decl->name = NULL;
-			vec_add(param_types, decl->type);
 			vec_add(param_copy, decl);
 		}
-		proto->param_types = param_types;
-		proto->param_copy = param_copy;
 	}
 
 	scratch_buffer_clear();
 	scratch_buffer_append("fn ");
-	type_append_name_to_scratch(proto->rtype);
+	type_append_name_to_scratch(rtype->canonical);
 	scratch_buffer_append_char('(');
-	FOREACH_IDX(idx, Type *, val, proto->param_types)
+	FOREACH_IDX(idx, Decl *, val, sig->params)
 	{
 		if (idx != 0) scratch_buffer_append(", ");
-		type_append_name_to_scratch(val);
+		type_append_name_to_scratch(val->type->canonical);
 	}
 	scratch_buffer_append_char(')');
 	Type *type = type_new(TYPE_FUNC_RAW, scratch_buffer_interned());
 	Signature *copy_sig = CALLOCS(Signature);
 	*copy_sig = *sig;
 	copy_sig->attrs = (CalleeAttributes) { .nodiscard = false };
-	copy_sig->params = proto->param_copy;
+	copy_sig->params = param_copy;
 	proto->raw_type = type;
 	type->function.prototype = proto;
 	type->function.decl = NULL;
@@ -798,14 +777,15 @@ static int compare_function(Signature *sig, FunctionPrototype *proto)
 	bool is_raw_variadic = sig->variadic == VARIADIC_RAW;
 	if (is_raw_variadic != proto->raw_variadic) return -1;
 	Decl **params = sig->params;
-	Type **other_params = proto->param_types;
+	Signature *raw_sig = proto->raw_type->function.signature;
+	Decl **other_params = raw_sig->params;
 	unsigned param_count = vec_size(params);
 	if (param_count != vec_size(other_params)) return -1;
-	if (!compare_func_param(type_infoptr(sig->rtype)->type, proto->rtype)) return -1;
+	if (!compare_func_param(typeget(sig->rtype), typeget(proto->raw_type->function.signature->rtype))) return -1;
 	FOREACH_IDX(i, Decl *, param, params)
 	{
-		Type *other_param = other_params[i];
-		if (!compare_func_param(param->type, other_param->canonical)) return -1;
+		Type *other_param = other_params[i]->type;
+		if (!compare_func_param(param->type->canonical, other_param->canonical)) return -1;
 	}
 	return 0;
 }
diff --git a/src/compiler/symtab.c b/src/compiler/symtab.c
index 936a882fa..92e0a50b0 100644
--- a/src/compiler/symtab.c
+++ b/src/compiler/symtab.c
@@ -38,6 +38,7 @@ const char *attribute_list[NUMBER_OF_ATTRIBUTES];
 const char *builtin_list[NUMBER_OF_BUILTINS];
 const char *builtin_defines[NUMBER_OF_BUILTIN_DEFINES];
 const char *type_property_list[NUMBER_OF_TYPE_PROPERTIES];
+const char *kw_at_align;
 const char *kw_at_deprecated;
 const char *kw_at_ensure;
 const char *kw_at_enum_lookup;
@@ -46,6 +47,7 @@ const char *kw_at_param;
 const char *kw_at_pure;
 const char *kw_at_require;
 const char *kw_at_return;
+const char *kw_at_simd;
 const char *kw_in;
 const char *kw_inout;
 const char *kw_len;
@@ -328,11 +330,13 @@ void symtab_init(uint32_t capacity)
 	kw_at_ensure = KW_DEF("@ensure");
 	kw_at_enum_lookup = KW_DEF("@enum_lookup_new");
 	kw_at_jump = KW_DEF("@jump");
+	kw_at_align = KW_DEF("@align");
+	kw_at_simd = KW_DEF("@simd");
 	kw_at_param = KW_DEF("@param");
 	kw_at_pure = KW_DEF("@pure");
 	kw_at_require = KW_DEF("@require");
 	kw_at_return = KW_DEF("@return");
-	attribute_list[ATTRIBUTE_ALIGN] = KW_DEF("@align");
+	attribute_list[ATTRIBUTE_ALIGN] = kw_at_align;
 	attribute_list[ATTRIBUTE_ALLOW_DEPRECATED] = KW_DEF("@allow_deprecated");
 	attribute_list[ATTRIBUTE_BENCHMARK] = KW_DEF("@benchmark");
 	attribute_list[ATTRIBUTE_BIGENDIAN] = KW_DEF("@bigendian");
@@ -378,6 +382,7 @@ void symtab_init(uint32_t capacity)
 	attribute_list[ATTRIBUTE_SAFEINFER] = KW_DEF("@safeinfer");
 	attribute_list[ATTRIBUTE_SAFEMACRO] = KW_DEF("@safemacro");
 	attribute_list[ATTRIBUTE_SECTION] = KW_DEF("@section");
+	attribute_list[ATTRIBUTE_SIMD] = kw_at_simd;
 	attribute_list[ATTRIBUTE_STRUCTLIKE] = KW_DEF("@structlike");
 	attribute_list[ATTRIBUTE_TEST] = KW_DEF("@test");
 	attribute_list[ATTRIBUTE_TAG] = KW_DEF("@tag");
diff --git a/src/compiler/types.c b/src/compiler/types.c
index c337964f4..30e0be4e4 100644
--- a/src/compiler/types.c
+++ b/src/compiler/types.c
@@ -67,7 +67,7 @@ static AlignSize max_alignment_vector;
 #define OPTIONAL_OFFSET 5
 #define ARRAY_OFFSET 6
 
-static void type_append_func_to_scratch(FunctionPrototype *prototype);
+static void type_append_func_to_scratch(Signature *signature);
 
 void type_init_cint(void)
 {
@@ -194,7 +194,7 @@ void type_append_name_to_scratch(Type *type)
 			type = type->pointer;
 			FALLTHROUGH;
 		case TYPE_FUNC_RAW:
-			type_append_func_to_scratch(type->function.prototype);
+			type_append_func_to_scratch(type->function.signature);
 			break;
 		case TYPE_ARRAY:
 			type_append_name_to_scratch(type->array.base);
@@ -205,22 +205,23 @@ void type_append_name_to_scratch(Type *type)
 	}
 }
 
-static void type_append_func_to_scratch(FunctionPrototype *prototype)
+static void type_append_func_to_scratch(Signature *signature)
 {
-	type_append_name_to_scratch(prototype->rtype);
+	type_append_name_to_scratch(typeget(signature->rtype));
 	scratch_buffer_append_char('(');
-	unsigned elements = vec_size(prototype->param_types);
+	unsigned elements = vec_size(signature->params);
 	for (unsigned i = 0; i < elements; i++)
 	{
 		if (i > 0)
 		{
 			scratch_buffer_append_char(',');
 		}
-		type_append_name_to_scratch(prototype->param_types[i]);
+		type_append_name_to_scratch(signature->params[i]->type);
 	}
-	if (prototype->raw_variadic && elements > 0)
+	if (signature->variadic == VARIADIC_RAW)
 	{
-		scratch_buffer_append_char(',');
+		if (elements > 0) scratch_buffer_append_char(',');
+		scratch_buffer_append("...");
 	}
 	scratch_buffer_append_char(')');
 }
@@ -300,7 +301,7 @@ const char *type_to_error_string(Type *type)
 			if (!type->function.prototype) return type->name;
 			scratch_buffer_clear();
 			scratch_buffer_append("fn ");
-			type_append_func_to_scratch(type->function.prototype);
+			type_append_func_to_scratch(type->function.signature);
 			return scratch_buffer_copy();
 		case TYPE_INFERRED_VECTOR:
 			return str_printf("%s[<*>]", type_to_error_string(type->array.base));
@@ -371,7 +372,7 @@ static const char *type_to_error_string_with_path(Type *type)
 			if (!type->function.prototype) return type->name;
 			scratch_buffer_clear();
 			scratch_buffer_append("fn ");
-			type_append_func_to_scratch(type->function.prototype);
+			type_append_func_to_scratch(type->function.signature);
 			return scratch_buffer_copy();
 		case TYPE_INFERRED_VECTOR:
 			return str_printf("%s[<*>]", type_to_error_string_with_path(type->array.base));
@@ -411,61 +412,56 @@ bool type_is_matching_int(CanonicalType *type1, CanonicalType *type2)
 
 TypeSize type_size(Type *type)
 {
-RETRY:
+	if (type->size != ~(ByteSize)0)
+	{
+		ASSERT(type->size != 0 || type_flatten(type)->type_kind == TYPE_FLEXIBLE_ARRAY);
+		return type->size;
+	}
 	switch (type->type_kind)
 	{
 		case TYPE_BITSTRUCT:
 			ASSERT(type->decl->resolve_status == RESOLVE_DONE);
-			type = type->decl->strukt.container_type->type;
-			goto RETRY;
+			return type->size = type_size(type->decl->strukt.container_type->type);
 		case TYPE_TYPEDEF:
 			ASSERT(type->decl->resolve_status == RESOLVE_DONE);
-			type = type->decl->distinct->type;
-			goto RETRY;
-		case TYPE_VECTOR:
-		{
-			TypeSize width = type_size(type->array.base) * type->array.len;
-			if (!is_power_of_two(width)) return next_highest_power_of_2(width);
-			return width;
-		}
+			return type->size = type_size(type->decl->distinct->type);
 		case CT_TYPES:
 		case TYPE_FUNC_RAW:
 			UNREACHABLE;
 		case TYPE_FLEXIBLE_ARRAY:
-			return 0;
+			return type->size = 0;
 		case TYPE_OPTIONAL:
-			type = type->optional;
-			goto RETRY;
+			return type->size = type_size(type->optional);
 		case TYPE_ALIAS:
-			type = type->canonical;
-			goto RETRY;
+			return type->size = type_size(type->canonical);
 		case TYPE_ENUM:
 		case TYPE_CONST_ENUM:
 			ASSERT(type->decl->enums.type_info->resolve_status == RESOLVE_DONE);
-			type = enum_inner_type(type)->canonical;
-			goto RETRY;
+			return type->size = type_size(enum_inner_type(type)->canonical);
 		case TYPE_STRUCT:
 		case TYPE_UNION:
 			ASSERT(type->decl->resolve_status == RESOLVE_DONE);
-			return type->decl->strukt.size;
+			return type->size = type->decl->strukt.size;
 		case TYPE_VOID:
-			return 1;
+			return type->size = 1;
 		case TYPE_BOOL:
 		case TYPE_TYPEID:
 		case ALL_INTS:
 		case ALL_FLOATS:
 		case TYPE_ANYFAULT:
-			return type->builtin.bytesize;
+			// Always cached
+			UNREACHABLE
 		case TYPE_INTERFACE:
 		case TYPE_ANY:
-			return t.iptr.canonical->builtin.bytesize * 2;
+			return type->size = t.iptr.canonical->builtin.bytesize * 2;
 		case TYPE_FUNC_PTR:
 		case TYPE_POINTER:
-			return t.iptr.canonical->builtin.bytesize;
+			return type->size = t.iptr.canonical->builtin.bytesize;
 		case TYPE_ARRAY:
-			return type_size(type->array.base) * type->array.len;
+		case TYPE_VECTOR:
+			return type->size = type_size(type->array.base) * type->array.len;
 		case TYPE_SLICE:
-			return size_slice;
+			return type->size = size_slice;
 	}
 	UNREACHABLE
 }
@@ -474,7 +470,10 @@ FunctionPrototype *type_get_resolved_prototype(Type *type)
 {
 	ASSERT(type->type_kind == TYPE_FUNC_RAW);
 	FunctionPrototype *prototype = type->function.prototype;
-	if (!prototype->is_resolved) c_abi_func_create(prototype);
+	if (!prototype->is_resolved)
+	{
+		c_abi_func_create(type->function.signature, prototype, NULL /* no vaargs */);
+	}
 	return prototype;
 }
 
@@ -518,6 +517,18 @@ bool type_is_int128(Type *type)
 }
 
 bool type_is_abi_aggregate(Type *type)
+{
+	return type_is_aggregate(type);
+}
+
+bool type_is_simd(Type *type)
+{
+	type = type->canonical;
+	if (type->type_kind != TYPE_TYPEDEF) return false;
+	return type->decl->attr_simd;
+}
+
+bool type_is_aggregate(Type *type)
 {
 	RETRY:
 	switch (type->type_kind)
@@ -559,6 +570,7 @@ bool type_is_abi_aggregate(Type *type)
 	UNREACHABLE
 }
 
+
 Type *type_find_largest_union_element(Type *type)
 {
 	ASSERT(type->type_kind == TYPE_UNION);
@@ -767,6 +779,16 @@ bool type_func_match(Type *fn_type, Type *rtype, unsigned arg_count, ...)
 	return true;
 }
 
+AlignSize type_simd_alignment(CanonicalType *type)
+{
+	ASSERT(type->type_kind == TYPE_VECTOR);
+	ByteSize width = type_size(type->array.base) * type->array.len;
+	AlignSize alignment = (AlignSize)(int32_t)width;
+	if (max_alignment_vector && alignment > max_alignment_vector) return max_alignment_vector;
+	ASSERT(is_power_of_two(alignment));
+	return alignment;
+}
+
 AlignSize type_abi_alignment(Type *type)
 {
 	RETRY:
@@ -802,8 +824,8 @@ AlignSize type_abi_alignment(Type *type)
 			type = type->optional;
 			goto RETRY;
 		case TYPE_TYPEDEF:
-			type = type->decl->distinct->type;
-			goto RETRY;
+			ASSERT(type->decl->alignment);
+			return type->decl->alignment;
 		case TYPE_ALIAS:
 			type = type->canonical;
 			goto RETRY;
@@ -831,7 +853,11 @@ AlignSize type_abi_alignment(Type *type)
 		case TYPE_ARRAY:
 		case TYPE_INFERRED_ARRAY:
 		case TYPE_FLEXIBLE_ARRAY:
-			type = type->array.base;
+			type = type->array.base->canonical;
+			if (type->type_kind == TYPE_VECTOR)
+			{
+				type = type->array.base;
+			}
 			goto RETRY;
 		case TYPE_SLICE:
 			return alignment_slice;
@@ -1195,7 +1221,7 @@ Type *type_get_indexed_type(Type *type)
 	}
 }
 
-static Type *type_create_array(Type *element_type, ArraySize len, bool vector, bool canonical)
+static Type *type_create_array(Type *element_type, ArraySize len, TypeKind kind, bool canonical)
 {
 	if (canonical) element_type = element_type->canonical;
 	if (!element_type->type_cache)
@@ -1206,27 +1232,19 @@ static Type *type_create_array(Type *element_type, ArraySize len, bool vector, b
 	for (int i = ARRAY_OFFSET; i < entries; i++)
 	{
 		Type *ptr_vec = element_type->type_cache[i];
-		if (vector)
-		{
-			if (ptr_vec->type_kind != TYPE_VECTOR) continue;
-			if (ptr_vec->array.len == len) return ptr_vec;
-		}
-		else
-		{
-			if (ptr_vec->type_kind == TYPE_VECTOR) continue;
-			if (ptr_vec->array.len == len) return ptr_vec;
-		}
+		if (ptr_vec->type_kind != kind) continue;
+		if (ptr_vec->array.len == len) return ptr_vec;
 	}
 	Type *vec_arr;
-	if (vector)
+	if (kind == TYPE_ARRAY)
 	{
-		vec_arr = type_new(TYPE_VECTOR, str_printf("%s[<%llu>]", element_type->name, (unsigned long long)len));
+		vec_arr = type_new(TYPE_ARRAY, str_printf("%s[%llu]", element_type->name, (unsigned long long)len));
 		vec_arr->array.base = element_type;
 		vec_arr->array.len = len;
 	}
 	else
 	{
-		vec_arr = type_new(TYPE_ARRAY, str_printf("%s[%llu]", element_type->name, (unsigned long long)len));
+		vec_arr = type_new(kind, str_printf("%s[<%llu>]", element_type->name, (unsigned long long)len));
 		vec_arr->array.base = element_type;
 		vec_arr->array.len = len;
 	}
@@ -1236,17 +1254,29 @@ static Type *type_create_array(Type *element_type, ArraySize len, bool vector, b
 	}
 	else
 	{
-		vec_arr->canonical = type_create_array(element_type, len, vector, true);
+		vec_arr->canonical = type_create_array(element_type, len, kind, true);
 	}
 	vec_add(element_type->type_cache, vec_arr);
 	return vec_arr;
 }
 
+Type *type_array_from_vector(Type *vec_type)
+{
+	ASSERT(vec_type->type_kind == TYPE_VECTOR);
+	return type_get_array(vec_type->array.base, vec_type->array.len);
+}
+
+Type *type_vector_from_array(Type *vec_type)
+{
+	ASSERT(vec_type->type_kind == TYPE_ARRAY);
+	return type_get_vector(vec_type->array.base, vec_type->array.len);
+}
+
 Type *type_get_array(Type *arr_type, ArraySize len)
 {
 	ASSERT(len > 0 && "Created a zero length array");
 	ASSERT(type_is_valid_for_array(arr_type));
-	return type_create_array(arr_type, len, false, false);
+	return type_create_array(arr_type, len, TYPE_ARRAY, false);
 }
 
 bool type_is_valid_for_vector(Type *type)
@@ -1330,17 +1360,13 @@ Type *type_get_vector_bool(Type *original_type)
 	return type_get_vector(type_int_signed_by_bitsize((unsigned)size * 8), (unsigned)original_type->array.len);
 }
 
-Type *type_get_simd(Type *vector_type, unsigned len)
-{
-	return type_get_vector(vector_type, len);
-}
-
 Type *type_get_vector(Type *vector_type, unsigned len)
 {
 	ASSERT(type_is_valid_for_vector(vector_type));
-	return type_create_array(vector_type, len, true, false);
+	return type_create_array(vector_type, len, TYPE_VECTOR, false);
 }
 
+
 static void type_create(const char *name, Type *location, TypeKind kind, unsigned bitsize,
 						unsigned align, unsigned pref_align)
 {
@@ -1348,6 +1374,7 @@ static void type_create(const char *name, Type *location, TypeKind kind, unsigne
 	unsigned byte_size = (bitsize + 7) / 8;
 	*location = (Type) {
 		.type_kind = kind,
+		.size = byte_size,
 		.builtin.bytesize = byte_size,
 		.builtin.bitsize = bitsize,
 		.builtin.abi_alignment = align,
@@ -1366,6 +1393,7 @@ static void type_init(const char *name, Type *location, TypeKind kind, unsigned
 	unsigned byte_size = (bitsize + 7) / 8;
 	*location = (Type) {
 		.type_kind = kind,
+		.size = byte_size,
 		.builtin.bytesize = byte_size,
 		.builtin.bitsize = bitsize,
 		.builtin.abi_alignment = align.align / 8,
@@ -1387,6 +1415,7 @@ static void type_create_alias(const char *name, Type *location, Type *canonical)
 	decl->is_export = true;
 	*location = (Type) {
 		.decl = decl,
+		.size = ~(ByteSize)0,
 		.type_kind = TYPE_ALIAS,
 		.name = name,
 		.canonical = canonical
@@ -1497,6 +1526,7 @@ void type_setup(PlatformTarget *target)
 	string_decl->extname = string_decl->name;
 	string_decl->is_substruct = true;
 	string_decl->distinct = type_info_new_base(type_chars, INVALID_SPAN);
+	string_decl->alignment = target->align_pointer.align / 8;
 	string_decl->resolve_status = RESOLVE_DONE;
 	type_string = string_decl->type;
 
diff --git a/test/test_suite/abi/aarch64_hfa_args.c3t b/test/test_suite/abi/aarch64_hfa_args.c3t
index 72f033206..eabef5055 100644
--- a/test/test_suite/abi/aarch64_hfa_args.c3t
+++ b/test/test_suite/abi/aarch64_hfa_args.c3t
@@ -1,32 +1,34 @@
 // #target: macos-aarch64
 module test;
-alias Int8x16 = ichar[<16>];
-alias Float32x3 = float[<3>];
+typedef Int8x16 = ichar[<16>] @simd;
+typedef Float32x4 = float[<4>] @simd;
 
-struct HFAv3
+struct HFAv4
 {
-    Float32x3[4] arr;
+    Float32x4[4] arr;
 }
 
-struct MixedHFAv3
+struct MixedHFAv4
 {
-    Float32x3[3] arr;
+    Float32x4[3] arr;
     Int8x16 b;
 }
 
-fn HFAv3 test(HFAv3 a0, HFAv3 a1, HFAv3 a2) {
-  return a2;
+fn HFAv4 test(HFAv4 a0, HFAv4 a1, HFAv4 a2)
+{
+	return a2;
 }
 
-fn MixedHFAv3 test_mixed(MixedHFAv3 a0, MixedHFAv3 a1, MixedHFAv3 a2) {
-  return a2;
+fn MixedHFAv4 test_mixed(MixedHFAv4 a0, MixedHFAv4 a1, MixedHFAv4 a2)
+{
+	return a2;
 }
 
 /* #expect: test.ll
 
+%.introspect = type { i8, i64, ptr, i64, i64, i64, [0 x i64] }
+%HFAv4 = type { [4 x <4 x float>] }
+%MixedHFAv4 = type { [3 x <4 x float>], <16 x i8> }
 
-%HFAv3 = type { [4 x <3 x float>] }
-%MixedHFAv3 = type { [3 x <3 x float>], <16 x i8> }
-
-define %HFAv3 @test.test([4 x <4 x float>] %0, [4 x <4 x float>] %1, [4 x <4 x float>] %2)
-define %MixedHFAv3 @test.test_mixed([4 x <4 x float>] %0, [4 x <4 x float>] %1, [4 x <4 x float>] %2) #0 {
+define %HFAv4 @test.test([4 x <4 x float>] %0, [4 x <4 x float>] %1, [4 x <4 x float>] %2) #0 {
+define %MixedHFAv4 @test.test_mixed([4 x <4 x float>] %0, [4 x <4 x float>] %1, [4 x <4 x float>] %2) #0 {
diff --git a/test/test_suite/abi/aarch64_hfa_args_no.c3t b/test/test_suite/abi/aarch64_hfa_args_no.c3t
new file mode 100644
index 000000000..610131072
--- /dev/null
+++ b/test/test_suite/abi/aarch64_hfa_args_no.c3t
@@ -0,0 +1,77 @@
+// #target: macos-aarch64
+module test;
+alias Int8x16 = ichar[<16>];
+alias Float32x3 = float[<3>];
+
+struct HFAv3
+{
+    Float32x3[4] arr;
+}
+
+struct HFAv3arr
+{
+    float[3][4] arr;
+}
+
+struct MixedHFAv3
+{
+    Float32x3[3] arr;
+    Int8x16 b;
+}
+
+struct MixedHFAv3arr
+{
+    float[<3>][3] arr;
+    ichar[16] b;
+}
+
+fn HFAv3 test(HFAv3 a0, HFAv3 a1, HFAv3 a2)
+{
+	return a2;
+}
+
+fn HFAv3arr test_arr(HFAv3arr a0, HFAv3arr a1, HFAv3arr a2)
+{
+	return a2;
+}
+
+fn MixedHFAv3 test_mixed(MixedHFAv3 a0, MixedHFAv3 a1, MixedHFAv3 a2)
+{
+	return a2;
+}
+
+fn MixedHFAv3arr test_mixed_arr(MixedHFAv3arr a0, MixedHFAv3arr a1, MixedHFAv3arr a2)
+{
+	return a2;
+}
+
+fn int main()
+{
+	MixedHFAv3 a;
+	MixedHFAv3arr b;
+	Int8x16 c;
+	Float32x3 d;
+	return 0;
+}
+
+/* #expect: test.ll
+
+
+%HFAv3 = type { [4 x [3 x float]] }
+%HFAv3arr = type { [4 x [3 x float]] }
+%MixedHFAv3 = type { [3 x [3 x float]], [16 x i8] }
+%MixedHFAv3arr = type { [3 x [3 x float]], [16 x i8] }
+
+define void @test.test(ptr noalias sret(%HFAv3) align 4 %0, ptr align 4 %1, ptr align 4 %2, ptr align 4 %3) #0 {
+define void @test.test_arr(ptr noalias sret(%HFAv3arr) align 4 %0, ptr align 4 %1, ptr align 4 %2, ptr align 4 %3)
+define void @test.test_mixed(ptr noalias sret(%MixedHFAv3) align 4 %0, ptr align 4 %1, ptr align 4 %2, ptr align 4 %3)
+define void @test.test_mixed_arr(ptr noalias sret(%MixedHFAv3arr) align 4 %0, ptr align 4 %1, ptr align 4 %2, ptr align 4 %3)
+
+  %a = alloca %MixedHFAv3, align 4
+  %b = alloca %MixedHFAv3arr, align 4
+  %c = alloca <16 x i8>, align 16
+  %d = alloca <3 x float>, align 16
+  call void @llvm.memset.p0.i64(ptr align 4 %a, i8 0, i64 52, i1 false)
+  call void @llvm.memset.p0.i64(ptr align 4 %b, i8 0, i64 52, i1 false)
+  store <16 x i8> zeroinitializer, ptr %c, align 16
+  store <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef>, ptr %d, align 16
\ No newline at end of file
diff --git a/test/test_suite/abi/darwin64_avx.c3t b/test/test_suite/abi/darwin64_avx.c3t
index a027c0dee..8feaa78d7 100644
--- a/test/test_suite/abi/darwin64_avx.c3t
+++ b/test/test_suite/abi/darwin64_avx.c3t
@@ -2,7 +2,7 @@
 // #opt: --x86cpu=avx1
 module test;
 
-alias Mm256 = float[<8>];
+typedef Mm256 = float[<8>] @simd;
 struct St256
 {
     Mm256 m;
@@ -20,7 +20,7 @@ fn void f39() { f38(x38); f37(x37); }
 
 // CHECK: declare void @func40(%struct.t128* byval(%struct.t128) align 16)
 
-alias Mm128 = float[<4>];
+typedef Mm128 = float[<4>] @simd;
 struct Two128
 {
 	Mm128 m;
@@ -32,24 +32,25 @@ fn void func41(Two128 s)
 {
     func40(s);
 }
-
 struct Atwo128
 {
 	Mm128[2] array;
 }
 
-struct Sa {
-  Atwo128 x;
+struct Sa
+{
+	Atwo128 x;
 }
 
 extern fn void func42(Sa s);
-fn void func43(Sa s) {
-  func42(s);
+fn void func43(Sa s)
+{
+	func42(s);
 }
 
 
-alias Vec46 = float[<2>];
-extern fn void f46(Vec46,Vec46,Vec46,Vec46,Vec46,Vec46,Vec46,Vec46,Vec46,Vec46);
+typedef Vec46 = float[<2>] @simd;
+extern fn void f46(Vec46 a, Vec46 b, Vec46 c, Vec46 d, Vec46 e, Vec46 f, Vec46 g, Vec46 h, Vec46 i, Vec46 j);
 fn void test46() { Vec46 x = {1,2}; f46(x,x,x,x,x,x,x,x,x,x); }
 
 struct Vec47 { uint a; }
@@ -62,20 +63,23 @@ fn void test49(double d, double e) { test49_helper(d, e); }
 struct Complex { double i; double c; }
 extern fn void test52_helper(int, ...);
 Mm256 x52;
-fn void test52() {
-  test52_helper(0, x52, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, (Complex) { 0, 1.0 });
+fn void test52()
+{
+	test52_helper(0, x52, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, (Complex) { 0, 1.0 });
 }
 
 extern fn void test54_helper(Mm256, ...);
 Mm256 x54;
-fn void test54() {
-  test54_helper(x54, x54, 1.0, 1.0, 1.0, 1.0, 1.0, (Complex) { 0, 1.0 });
-  test54_helper(x54, x54, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, (Complex) { 0, 1.0 });
+fn void test54()
+{
+	test54_helper(x54, x54, 1.0, 1.0, 1.0, 1.0, 1.0, (Complex) { 0, 1.0 });
+	test54_helper(x54, x54, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, (Complex) { 0, 1.0 });
 }
 
-alias Mm512 = float[<16>];
-struct St512 {
-  Mm512 m;
+typedef Mm512 = float[<16>] @simd;
+struct St512
+{
+	Mm512 m;
 }
 
 St512 x55;
@@ -86,27 +90,32 @@ extern fn void f55(St512 x);
 extern fn void f56(Mm512 x);
 fn void f57() { f55(x55); f56(x56); }
 
-struct Two256 {
-  Mm256 m;
-  Mm256 n;
+struct Two256
+{
+	Mm256 m;
+	Mm256 n;
 }
 
 extern fn void f58(Two256 s);
-fn void f59(Two256 s) {
-  f58(s);
+fn void f59(Two256 s)
+{
+	f58(s);
 }
 
-struct Atwo256 {
-  Mm256[2] array;
+struct Atwo256
+{
+	Mm256[2] array;
 }
 
-struct SAtwo256 {
-  Atwo256 x;
+struct SAtwo256
+{
+	Atwo256 x;
 }
 
 extern fn void f60(SAtwo256 s);
-fn void f61(SAtwo256 s) {
-  f60(s);
+fn void f61(SAtwo256 s)
+{
+	f60(s);
 }
 
 
diff --git a/test/test_suite/abi/darwin64_avx512.c3t b/test/test_suite/abi/darwin64_avx512.c3t
index 9bc158a92..04d356cb5 100644
--- a/test/test_suite/abi/darwin64_avx512.c3t
+++ b/test/test_suite/abi/darwin64_avx512.c3t
@@ -2,10 +2,11 @@
 // #opt: --x86cpu=avx512
 module test;
 
-alias Mm256 = float[<8>];
-alias Mm512 = float[<16>];
-struct St512 {
-  Mm512 m;
+typedef Mm256 = float[<8>] @simd;
+typedef Mm512 = float[<16>] @simd;
+struct St512
+{
+	Mm512 m;
 }
 
 St512 x55;
@@ -16,27 +17,32 @@ extern fn void f55(St512 x);
 extern fn void f56(Mm512 x);
 fn void f57() { f55(x55); f56(x56); }
 
-struct Two256 {
-  Mm256 m;
-  Mm256 n;
+struct Two256
+{
+	Mm256 m;
+	Mm256 n;
 }
 
 extern fn void f58(Two256 s);
-fn void f59(Two256 s) {
-  f58(s);
+fn void f59(Two256 s)
+{
+	f58(s);
 }
 
-struct Atwo256 {
-  Mm256[2] array;
+struct Atwo256
+{
+	Mm256[2] array;
 }
 
-struct SAtwo256 {
-  Atwo256 x;
+struct SAtwo256
+{
+	Atwo256 x;
 }
 
 extern fn void f60(SAtwo256 s);
-fn void f61(SAtwo256 s) {
-  f60(s);
+fn void f61(SAtwo256 s)
+{
+	f60(s);
 }
 
 struct Complex { double i; double c; }
@@ -44,15 +50,17 @@ struct Complex { double i; double c; }
 // AVX512: @f62_helper(i32 0, <16 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double {{%[a-zA-Z0-9]+}}, double {{%[a-zA-Z0-9]+}})
 extern fn void f62_helper(int, ...);
 Mm512 x62;
-fn void f62() {
-  f62_helper(0, x62, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, (Complex) { 1.0, 1.0 });
+fn void f62()
+{
+	f62_helper(0, x62, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, (Complex) { 1.0, 1.0 });
 }
 
 extern fn void f64_helper(Mm512, ...);
 Mm512 x64;
-fn void f64() {
-  f64_helper(x64, x64, 1.0, 1.0, 1.0, 1.0, 1.0, (Complex) { 1.0, 1.0 });
-  f64_helper(x64, x64, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, (Complex) { 1.0, 1.0 });
+fn void f64()
+{
+	f64_helper(x64, x64, 1.0, 1.0, 1.0, 1.0, 1.0, (Complex) { 1.0, 1.0 });
+	f64_helper(x64, x64, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, (Complex) { 1.0, 1.0 });
 }
 
 
diff --git a/test/test_suite/abi/darwin64_sret.c3t b/test/test_suite/abi/darwin64_sret.c3t
index 29bd6d7a6..edfe096d5 100644
--- a/test/test_suite/abi/darwin64_sret.c3t
+++ b/test/test_suite/abi/darwin64_sret.c3t
@@ -12,8 +12,9 @@ fn SimdDouble4x4 ident(SimdDouble4x4 x) {
 
 /* #expect: foo.ll
 
-define void @foo.ident(ptr noalias sret(%SimdDouble4x4) align 32 %0, ptr byval(%SimdDouble4x4) align 32 %1) #0 {
+
+define void @foo.ident(ptr noalias sret(%SimdDouble4x4) align 8 %0, ptr byval(%SimdDouble4x4) align 8 %1) #0 {
 entry:
-  call void @llvm.memcpy.p0.p0.i32(ptr align 32 %0, ptr align 32 %1, i32 128, i1 false)
+  call void @llvm.memcpy.p0.p0.i32(ptr align 8 %0, ptr align 8 %1, i32 128, i1 false)
   ret void
 }
\ No newline at end of file
diff --git a/test/test_suite/abi/darwin64_sse.c3t b/test/test_suite/abi/darwin64_sse.c3t
index 68277b69b..efaf9abfa 100644
--- a/test/test_suite/abi/darwin64_sse.c3t
+++ b/test/test_suite/abi/darwin64_sse.c3t
@@ -2,9 +2,10 @@
 // #opt: --x86cpu=sse4
 module test;
 
-alias Mm256 = float[<8>];
-struct St256 {
-  Mm256 m;
+typedef Mm256 = float[<8>] @simd;
+struct St256
+{
+	Mm256 m;
 }
 
 St256 x38;
@@ -19,28 +20,33 @@ fn void f39() { f38(x38); f37(x37); }
 
 // CHECK: declare void @func40(%struct.t128* byval(%struct.t128) align 16)
 
-alias Mm128 = float[<4>];
-struct Two128 {
-  Mm128 m;
-  Mm128 n;
+typedef Mm128 = float[<4>] @simd;
+struct Two128
+{
+	Mm128 m;
+	Mm128 n;
 }
 
 extern fn void func40(Two128 s);
-fn void func41(Two128 s) {
-  func40(s);
+fn void func41(Two128 s)
+{
+	func40(s);
 }
 
-struct Atwo128 {
-  Mm128[2] array;
+struct Atwo128
+{
+	Mm128[2] array;
 }
 
-struct Sa {
-  Atwo128 x;
+struct Sa
+{
+	Atwo128 x;
 }
 
 extern fn void func42(Sa s);
-fn void func43(Sa s) {
-  func42(s);
+fn void func43(Sa s)
+{
+	func42(s);
 }
 
 
diff --git a/test/test_suite/abi/darwinx64_2.c3t b/test/test_suite/abi/darwinx64_2.c3t
index 6481fe395..f297e3455 100644
--- a/test/test_suite/abi/darwinx64_2.c3t
+++ b/test/test_suite/abi/darwinx64_2.c3t
@@ -11,29 +11,28 @@ fn void f12_1(St12 a0) {}
 
 struct St13_0 { long[3] f0; }
 struct St13_1 { long[2] f0; }
-fn St13_0 f13(int a, int b, int c, int d,
-                 St13_1 e, int f) { while (1) {} }
+fn St13_0 f13(int a, int b, int c, int d, St13_1 e, int f) { while (1) {} }
 
 fn void f14(int a, int b, int c, int d, int e, int f, ichar x) {}
 
 fn void f15(int a, int b, int c, int d, int e, int f, void *x) {}
 
-fn void f16(float a, float b, float c, float d, float e, float f, float g, float h,
-         float x) {}
+fn void f16(float a, float b, float c, float d, float e, float f, float g, float h, float x) {}
 
 struct Fl18_s0 { int f0; }
 fn void fl18(int a, Fl18_s0 f18_arg1) { while (1) {} }
 
-struct St20 @align(32) {
-  int x;
-  int y;
+struct St20 @align(32)
+{
+	int x;
+	int y;
 }
 fn void f20(St20 x) {}
 
 struct StringRef
 {
-  int x;
-  char* ptr;
+	int x;
+	char* ptr;
 }
 fn char *f21(StringRef s) { return s.x+s.ptr; }
 
@@ -43,105 +42,114 @@ fn void f22(St22s x, St22s y) { }
 
 
 
-struct St23S {
-  short f0;
-  uint f1;
-  int f2;
+struct St23S
+{
+	short f0;
+	uint f1;
+	int f2;
 }
 
 
-fn void f23(int a, St23S b) {
-}
+fn void f23(int a, St23S b)  {}
 
 struct St24s { int a; int b; }
 
 fn St23S f24(St23S *x, St24s *p2)
 {
-  return *x;
-
+	return *x;
 }
 
-fn float[<4>] f25(float[<4>] x) {
-  return x+x;
+typedef Float4v = float[<4>] @simd;
+fn Float4v f25(Float4v x)
+{
+	return x+x;
 }
 
-struct Foo26 {
-  int *x;
-  float *y;
+struct Foo26
+{
+	int *x;
+	float *y;
 }
 
-fn Foo26 f26(Foo26 *p) {
-  return *p;
+fn Foo26 f26(Foo26 *p)
+{
+	return *p;
 }
 
 
-struct V4f32wrapper {
-  float[<4>] v;
+struct V4f32wrapper
+{
+	Float4v v;
 }
 
-fn V4f32wrapper f27(V4f32wrapper x) {
-  return x;
+fn V4f32wrapper f27(V4f32wrapper x)
+{
+	return x;
 }
 
 // PR22563 - We should unwrap simple structs and arrays to pass
 // and return them in the appropriate vector registers if possible.
 
-alias V8f32 = float[<8>];
-struct V8f32wrapper {
-  V8f32 v;
+typedef V8f32 = float[<8>] @simd;
+struct V8f32wrapper
+{
+	V8f32 v;
 }
 
-fn V8f32wrapper f27a(V8f32wrapper x) {
-  return x;
+fn V8f32wrapper f27a(V8f32wrapper x)
+{
+	return x;
 }
 
-struct V8f32wrapper_wrapper {
-  V8f32[1] v;
+struct V8f32wrapper_wrapper
+{
+	V8f32[1] v;
 }
 
-fn V8f32wrapper_wrapper f27b(V8f32wrapper_wrapper x) {
-  return x;
+fn V8f32wrapper_wrapper f27b(V8f32wrapper_wrapper x)
+{
+	return x;
 }
 
-struct F28c {
-  double x;
-  int y;
+struct F28c
+{
+	double x;
+	int y;
 }
-fn void f28(F28c c) {
+fn void f28(F28c c)
+{
 }
 
 struct Inner
 {
-    double x;
-    int y;
+	double x;
+	int y;
 }
 struct F29a
 {
-  Inner[1] c;
+	Inner[1] c;
 }
 
-fn void f29a(F29a a) {
-}
+fn void f29a(F29a a) {}
 
-struct St0 {
-    char[8] f0; char f2; char f3; char f4; }
-fn void f30(St0 p_4) {
-}
+struct St0  { char[8] f0; char f2; char f3; char f4; }
+fn void f30(St0 p_4) {}
 
 struct F31foo { float a, b, c; }
-fn float f31(F31foo x) {
-  return x.c;
+fn float f31(F31foo x)
+{
+	return x.c;
 }
 
-alias V1i64 = ulong[<1>];
+typedef V1i64 = ulong[<1>] @simd;
 
 fn V1i64 f34(V1i64 arg) { return arg; }
 
 
-alias V1i64_2 = uint[<2>];
+typedef V1i64_2 = uint[<2>] @simd;
 fn V1i64_2 f35(V1i64_2 arg) { return arg+arg; }
 
-alias V2i32 = float[<2>];
+typedef V2i32 = float[<2>] @simd;
 fn V2i32 f36(V2i32 arg) { return arg; }
 
 
diff --git a/test/test_suite/abi/merge_union_bool_avx512.c3t b/test/test_suite/abi/merge_union_bool_avx512.c3t
index 2534ba66e..918de1c5f 100644
--- a/test/test_suite/abi/merge_union_bool_avx512.c3t
+++ b/test/test_suite/abi/merge_union_bool_avx512.c3t
@@ -2,23 +2,25 @@
 // #opt: --x86cpu=avx512
 module abi;
 
+typedef Bool64v = bool[<64>] @simd;
+
 union Mask64
 {
-    bool[<64>] m;
-    ulong ul;
+	Bool64v m;
+	ulong ul;
 }
 
 fn Mask64 make_mask(ulong n)
 {
-    Mask64 mask;
-    mask.ul = n;
-    return mask;
+	Mask64 mask;
+	mask.ul = n;
+	return mask;
 }
 
 fn int main()
 {
-    make_mask(20);
-    return 0;
+	make_mask(20);
+	return 0;
 }
 
 /* #expect: abi.ll
diff --git a/test/test_suite/abi/riscv32-ilp32-ilp32f-ilp32d-abi-1.c3t b/test/test_suite/abi/riscv32-ilp32-ilp32f-ilp32d-abi-1.c3t
index 6bdf8968d..798c627b7 100644
--- a/test/test_suite/abi/riscv32-ilp32-ilp32f-ilp32d-abi-1.c3t
+++ b/test/test_suite/abi/riscv32-ilp32-ilp32f-ilp32d-abi-1.c3t
@@ -32,8 +32,9 @@ fn float128 f_fp_scalar_3(float128 x) { return x; }
 // Aggregates <= 2*xlen may be passed in registers, so will be coerced to
 // integer arguments. The rules for return are the same.
 
-struct Tiny {
-  char a, b, c, d;
+struct Tiny
+{
+	char a, b, c, d;
 }
 
 fn void f_agg_tiny(Tiny x) {
@@ -45,68 +46,80 @@ fn Tiny f_agg_tiny_ret() {
   return {1, 2, 3, 4};
 }
 
-fn void f_vec_tiny_v4i8(char[<4>] x) {
-  x[0] = x[1];
-  x[2] = x[3];
+typedef Char4 = char[<4>] @simd;
+fn void f_vec_tiny_v4i8(Char4 x)
+{
+    x[0] = x[1];
+    x[2] = x[3];
 }
 
-fn char[<4>] f_vec_tiny_v4i8_ret() {
-  return {1, 2, 3, 4};
+fn Char4 f_vec_tiny_v4i8_ret()
+{
+	return {1, 2, 3, 4};
 }
 
-fn void f_vec_tiny_v1i32(int[<1>] x) {
-  x[0] = 114;
+typedef Int1 = int[<1>] @simd;
+fn void f_vec_tiny_v1i32(Int1 x)
+{
+	x[0] = 114;
 }
 
-fn int[<1>] f_vec_tiny_v1i32_ret() {
-  return {1};
+fn Int1 f_vec_tiny_v1i32_ret()
+{
+	return {1};
 }
 
-struct Small {
-  int a;
-   int* b;
+struct Small
+{
+    int a;
+	int* b;
 }
 
-fn void f_agg_small(Small x) {
-  x.a += *x.b;
-  x.b = &x.a;
+fn void f_agg_small(Small x)
+{
+	x.a += *x.b;
+	x.b = &x.a;
 }
 
-fn Small f_agg_small_ret() {
-  return {1, null};
+fn Small f_agg_small_ret()
+{
+	return {1, null};
 }
 
+typedef Char8 = char[<8>] @simd;
 
-fn void f_vec_small_v8i8(char[<8>] x) {
-  x[0] = x[7];
+fn void f_vec_small_v8i8(Char8 x)
+{
+	x[0] = x[7];
 }
 
-fn char[<8>] f_vec_small_v8i8_ret() {
-  return {1, 2, 3, 4, 5, 6, 7, 8};
+fn Char8 f_vec_small_v8i8_ret() => {1, 2, 3, 4, 5, 6, 7, 8};
+
+typedef Long1 = long[<1>] @simd;
+fn void f_vec_small_v1i64(Long1 x)
+{
+	x[0] = 114;
 }
 
-fn void f_vec_small_v1i64(long[<1>] x) {
-  x[0] = 114;
-}
-
-fn long[<1>] f_vec_small_v1i64_ret() {
-  return {1};
-}
+fn Long1 f_vec_small_v1i64_ret() => {1};
 
 // Aggregates of 2*xlen size and 2*xlen alignment should be coerced to a
 // single 2*xlen-sized argument, to ensure that alignment can be maintained if
 // passed on the stack.
 
-struct Small_aligned {
-  long a;
+struct Small_aligned
+{
+	long a;
 }
 
-fn void f_agg_small_aligned(Small_aligned x) {
-  x.a += x.a;
+fn void f_agg_small_aligned(Small_aligned x)
+{
+	x.a += x.a;
 }
 
-fn Small_aligned f_agg_small_aligned_ret(Small_aligned x) {
-  return {10};
+fn Small_aligned f_agg_small_aligned_ret(Small_aligned x)
+{
+	return {10};
 }
 
 // Aggregates greater > 2*xlen will be passed and returned indirectly
@@ -124,13 +137,14 @@ fn Large f_agg_large_ret(int i, ichar j) {
   return {1, 2, 3, 4};
 }
 
-fn void f_vec_large_v16i8(char[<16>] x) {
-  x[0] = x[7];
+typedef CharV16 = char[<16>] @simd;
+
+fn void f_vec_large_v16i8(CharV16 x)
+{
+	x[0] = x[7];
 }
 
-fn char[<16>] f_vec_large_v16i8_ret() {
-  return {1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8};
-}
+fn CharV16 f_vec_large_v16i8_ret() => {1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8};
 
 // Scalars passed on the stack should not have signext/zeroext attributes
 // (they are anyext).
diff --git a/test/test_suite/abi/riscv32-ilp32-ilp32f-ilp32d-abi-2.c3t b/test/test_suite/abi/riscv32-ilp32-ilp32f-ilp32d-abi-2.c3t
index bfec66560..192cc8a76 100644
--- a/test/test_suite/abi/riscv32-ilp32-ilp32f-ilp32d-abi-2.c3t
+++ b/test/test_suite/abi/riscv32-ilp32-ilp32f-ilp32d-abi-2.c3t
@@ -33,137 +33,156 @@ fn float128 f_fp_scalar_3(float128 x) { return x; }
 // Aggregates <= 2*xlen may be passed in registers, so will be coerced to
 // integer arguments. The rules for return are the same.
 
-struct Tiny {
-  char a, b, c, d;
+struct Tiny { char a, b, c, d; }
+
+fn void f_agg_tiny(Tiny x)
+{
+	x.a += x.b;
+	x.c += x.d;
 }
 
-fn void f_agg_tiny(Tiny x) {
-  x.a += x.b;
-  x.c += x.d;
+fn Tiny f_agg_tiny_ret()
+{
+	return {1, 2, 3, 4};
 }
 
-fn Tiny f_agg_tiny_ret() {
-  return {1, 2, 3, 4};
+typedef Char4 = char[<4>] @simd;
+
+fn void f_vec_tiny_v4i8(Char4 x)
+{
+	x[0] = x[1];
+	x[2] = x[3];
 }
 
-fn void f_vec_tiny_v4i8(char[<4>] x) {
-  x[0] = x[1];
-  x[2] = x[3];
+fn Char4 f_vec_tiny_v4i8_ret()
+{
+	return {1, 2, 3, 4};
 }
 
-fn char[<4>] f_vec_tiny_v4i8_ret() {
-  return {1, 2, 3, 4};
+typedef Int1 = int[<1>] @simd;
+
+fn void f_vec_tiny_v1i32(Int1 x)
+{
+	x[0] = 114;
 }
 
-fn void f_vec_tiny_v1i32(int[<1>] x) {
-  x[0] = 114;
+fn Int1 f_vec_tiny_v1i32_ret()
+{
+	return {1};
 }
 
-fn int[<1>] f_vec_tiny_v1i32_ret() {
-  return {1};
+struct Small
+{
+	int a;
+	int* b;
 }
 
-struct Small {
-  int a;
-   int* b;
+fn void f_agg_small(Small x)
+{
+	x.a += *x.b;
+	x.b = &x.a;
 }
 
-fn void f_agg_small(Small x) {
-  x.a += *x.b;
-  x.b = &x.a;
+fn Small f_agg_small_ret() { return {1, null}; }
+
+typedef Char8 = char[<8>] @simd;
+fn void f_vec_small_v8i8(Char8 x)
+{
+	x[0] = x[7];
 }
 
-fn Small f_agg_small_ret() {
-  return {1, null};
+fn Char8 f_vec_small_v8i8_ret()
+{
+	return {1, 2, 3, 4, 5, 6, 7, 8};
 }
 
-
-fn void f_vec_small_v8i8(char[<8>] x) {
-  x[0] = x[7];
+typedef Long1 = long[<1>] @simd;
+fn void f_vec_small_v1i64(Long1 x)
+{
+	x[0] = 114;
 }
 
-fn char[<8>] f_vec_small_v8i8_ret() {
-  return {1, 2, 3, 4, 5, 6, 7, 8};
-}
-
-fn void f_vec_small_v1i64(long[<1>] x) {
-  x[0] = 114;
-}
-
-fn long[<1>] f_vec_small_v1i64_ret() {
-  return {1};
+fn Long1 f_vec_small_v1i64_ret()
+{
+	return {1};
 }
 
 // Aggregates of 2*xlen size and 2*xlen alignment should be coerced to a
 // single 2*xlen-sized argument, to ensure that alignment can be maintained if
 // passed on the stack.
 
-struct Small_aligned {
-  long a;
+struct Small_aligned
+{
+	long a;
 }
 
-fn void f_agg_small_aligned(Small_aligned x) {
-  x.a += x.a;
+fn void f_agg_small_aligned(Small_aligned x)
+{
+	x.a += x.a;
 }
 
-fn Small_aligned f_agg_small_aligned_ret(Small_aligned x) {
-  return {10};
+fn Small_aligned f_agg_small_aligned_ret(Small_aligned x)
+{
+	return {10};
 }
 
 // Aggregates greater > 2*xlen will be passed and returned indirectly
-struct Large {
-  int a, b, c, d;
+struct Large
+{
+	int a, b, c, d;
 }
 
-fn void f_agg_large(Large x) {
-  x.a = x.b + x.c + x.d;
+fn void f_agg_large(Large x)
+{
+	x.a = x.b + x.c + x.d;
 }
 
 // The address where the struct should be written to will be the first
 // argument
-fn Large f_agg_large_ret(int i, ichar j) {
-  return {1, 2, 3, 4};
+fn Large f_agg_large_ret(int i, ichar j)
+{
+	return {1, 2, 3, 4};
 }
 
-fn void f_vec_large_v16i8(char[<16>] x) {
-  x[0] = x[7];
+typedef Char16v = char[<16>] @simd;
+fn void f_vec_large_v16i8(Char16v x)
+{
+	x[0] = x[7];
 }
 
-fn char[<16>] f_vec_large_v16i8_ret() {
-  return {1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8};
+fn Char16v f_vec_large_v16i8_ret()
+{
+	return {1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8};
 }
 
 // Scalars passed on the stack should not have signext/zeroext attributes
 // (they are anyext).
 
-fn int f_scalar_stack_1(Tiny a, Small b, Small_aligned c,
-                     Large d, char e, ichar f, char g, ichar h) {
-  return g + h;
+fn int f_scalar_stack_1(Tiny a, Small b, Small_aligned c, Large d, char e, ichar f, char g, ichar h)
+{
+	return g + h;
 }
 
 // Ensure that scalars passed on the stack are still determined correctly in
 // the presence of large return values that consume a register due to the need
 // to pass a pointer.
 
-fn Large f_scalar_stack_2(int a, long b, long c, float128 d,
-                              char e, ichar f, char g)
+fn Large f_scalar_stack_2(int a, long b, long c, float128 d, char e, ichar f, char g)
 {
-  return {a, e, f, g};
+	return {a, e, f, g};
 }
 
-fn float128 f_scalar_stack_4(int a, long b, long c, float128 d,
-                             char e, ichar f, char g) {
-  return d;
+fn float128 f_scalar_stack_4(int a, long b, long c, float128 d, char e, ichar f, char g)
+{
+	return d;
 }
 
 // Aggregates and >=XLen scalars passed on the stack should be lowered just as
 // they would be if passed via registers.
 
-fn void f_scalar_stack_5(double a, long b, double c, long d, int e,
-                      long f, float g, double h, float128 i) {}
+fn void f_scalar_stack_5(double a, long b, double c, long d, int e, long f, float g, double h, float128 i) {}
 
-fn void f_agg_stack(double a, long b, double c, long d, Tiny e,
-                 Small f, Small_aligned g, Large h) {}
+fn void f_agg_stack(double a, long b, double c, long d, Tiny e, Small f, Small_aligned g, Large h) {}
 
 // Ensure that ABI lowering happens as expected for vararg calls. For RV32
 // with the base integer calling convention there will be no observable
@@ -171,10 +190,11 @@ fn void f_agg_stack(double a, long b, double c, long d, Tiny e,
 
 extern fn int f_va_callee(int, ...);
 
-fn void f_va_caller() {
-  f_va_callee(1, 2, 3, 4.0, 5.0, (Tiny){6, 7, 8, 9},
-              (Small){10, null}, (Small_aligned){11},
-              (Large){12, 13, 14, 15});
+fn void f_va_caller()
+{
+	f_va_callee(1, 2, 3, 4.0, 5.0, (Tiny){6, 7, 8, 9},
+	          (Small){10, null}, (Small_aligned){11},
+	          (Large){12, 13, 14, 15});
 }
 
 
diff --git a/test/test_suite/abi/riscv32-ilp32-ilp32f-ilp32d-abi-3.c3t b/test/test_suite/abi/riscv32-ilp32-ilp32f-ilp32d-abi-3.c3t
index 8bb9b31a8..1bdd85118 100644
--- a/test/test_suite/abi/riscv32-ilp32-ilp32f-ilp32d-abi-3.c3t
+++ b/test/test_suite/abi/riscv32-ilp32-ilp32f-ilp32d-abi-3.c3t
@@ -8,162 +8,187 @@ fn void f_void() {}
 // Scalar arguments and return values smaller than the word size are extended
 // according to the sign of their type, up to 32 bits
 
-fn bool f_scalar_0(bool x) { return x; }
+fn bool f_scalar_0(bool x) => x;
 
-fn ichar f_scalar_1(ichar x) { return x; }
+fn ichar f_scalar_1(ichar x) => x;
 
-fn char f_scalar_2(char x) { return x; }
+fn char f_scalar_2(char x) => x;
 
-fn int f_scalar_3(int x) { return x; }
+fn int f_scalar_3(int x) => x;
 
-fn long f_scalar_4(long x) { return x; }
+fn long f_scalar_4(long x) => x;
 
-fn int128 f_scalar_5(int128 x) { return x; }
+fn int128 f_scalar_5(int128 x) => x;
 
-fn float f_fp_scalar_1(float x) { return x; }
+fn float f_fp_scalar_1(float x) => x;
 
-fn double f_fp_scalar_2(double x) { return x; }
+fn double f_fp_scalar_2(double x) => x;
 
 // Scalars larger than 2*xlen are passed/returned indirect. However, the
 // RISC-V LLVM backend can handle this fine, so the function doesn't need to
 // be modified.
 
-fn float128 f_fp_scalar_3(float128 x) { return x; }
+fn float128 f_fp_scalar_3(float128 x)  => x;
 
 // Aggregates <= 2*xlen may be passed in registers, so will be coerced to
 // integer arguments. The rules for return are the same.
 
-struct Tiny {
-  char a, b, c, d;
+struct Tiny
+{
+	char a, b, c, d;
 }
 
-fn void f_agg_tiny(Tiny x) {
-  x.a += x.b;
-  x.c += x.d;
+fn void f_agg_tiny(Tiny x)
+{
+	x.a += x.b;
+	x.c += x.d;
 }
 
-fn Tiny f_agg_tiny_ret() {
-  return {1, 2, 3, 4};
+fn Tiny f_agg_tiny_ret()
+{
+	return {1, 2, 3, 4};
 }
 
-fn void f_vec_tiny_v4i8(char[<4>] x) {
-  x[0] = x[1];
-  x[2] = x[3];
+typedef Char4v = char[<4>] @simd;
+typedef Char8v = char[<8>] @simd;
+typedef Char16v = char[<16>] @simd;
+typedef Int1v = int[<1>] @simd;
+typedef Long1v = long[<1>] @simd;
+
+fn void f_vec_tiny_v4i8(Char4v x)
+{
+	x[0] = x[1];
+	x[2] = x[3];
 }
 
-fn char[<4>] f_vec_tiny_v4i8_ret() {
-  return {1, 2, 3, 4};
+fn Char4v f_vec_tiny_v4i8_ret()
+{
+	return {1, 2, 3, 4};
 }
 
-fn void f_vec_tiny_v1i32(int[<1>] x) {
-  x[0] = 114;
+fn void f_vec_tiny_v1i32(Int1v x)
+{
+	x[0] = 114;
 }
 
-fn int[<1>] f_vec_tiny_v1i32_ret() {
-  return {1};
+fn Int1v f_vec_tiny_v1i32_ret()
+{
+	return {1};
 }
 
-struct Small {
-  int a;
-   int* b;
+struct Small
+{
+	int a;
+	int* b;
 }
 
-fn void f_agg_small(Small x) {
-  x.a += *x.b;
-  x.b = &x.a;
+fn void f_agg_small(Small x)
+{
+	x.a += *x.b;
+	x.b = &x.a;
 }
 
-fn Small f_agg_small_ret() {
-  return {1, null};
+fn Small f_agg_small_ret()
+{
+	return {1, null};
 }
 
 
-fn void f_vec_small_v8i8(char[<8>] x) {
-  x[0] = x[7];
+fn void f_vec_small_v8i8(Char8v x)
+{
+	x[0] = x[7];
 }
 
-fn char[<8>] f_vec_small_v8i8_ret() {
-  return {1, 2, 3, 4, 5, 6, 7, 8};
+fn Char8v f_vec_small_v8i8_ret()
+{
+	return {1, 2, 3, 4, 5, 6, 7, 8};
 }
 
-fn void f_vec_small_v1i64(long[<1>] x) {
-  x[0] = 114;
+fn void f_vec_small_v1i64(Long1v x)
+{
+	x[0] = 114;
 }
 
-fn long[<1>] f_vec_small_v1i64_ret() {
-  return {1};
+fn Long1v f_vec_small_v1i64_ret()
+{
+	return {1};
 }
 
 // Aggregates of 2*xlen size and 2*xlen alignment should be coerced to a
 // single 2*xlen-sized argument, to ensure that alignment can be maintained if
 // passed on the stack.
 
-struct Small_aligned {
-  long a;
+struct Small_aligned
+{
+	long a;
 }
 
-fn void f_agg_small_aligned(Small_aligned x) {
-  x.a += x.a;
+fn void f_agg_small_aligned(Small_aligned x)
+{
+	x.a += x.a;
 }
 
-fn Small_aligned f_agg_small_aligned_ret(Small_aligned x) {
-  return {10};
+fn Small_aligned f_agg_small_aligned_ret(Small_aligned x)
+{
+	return {10};
 }
 
 // Aggregates greater > 2*xlen will be passed and returned indirectly
-struct Large {
-  int a, b, c, d;
+struct Large
+{
+	int a, b, c, d;
 }
 
-fn void f_agg_large(Large x) {
-  x.a = x.b + x.c + x.d;
+fn void f_agg_large(Large x)
+{
+	x.a = x.b + x.c + x.d;
 }
 
 // The address where the struct should be written to will be the first
 // argument
-fn Large f_agg_large_ret(int i, ichar j) {
-  return {1, 2, 3, 4};
+fn Large f_agg_large_ret(int i, ichar j)
+{
+	return {1, 2, 3, 4};
 }
 
-fn void f_vec_large_v16i8(char[<16>] x) {
-  x[0] = x[7];
+fn void f_vec_large_v16i8(Char16v x)
+{
+	x[0] = x[7];
 }
 
-fn char[<16>] f_vec_large_v16i8_ret() {
-  return {1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8};
+fn Char16v f_vec_large_v16i8_ret()
+{
+	return {1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8};
 }
 
 // Scalars passed on the stack should not have signext/zeroext attributes
 // (they are anyext).
 
-fn int f_scalar_stack_1(Tiny a, Small b, Small_aligned c,
-                     Large d, char e, ichar f, char g, ichar h) {
-  return g + h;
+fn int f_scalar_stack_1(Tiny a, Small b, Small_aligned c, Large d, char e, ichar f, char g, ichar h)
+{
+	return g + h;
 }
 
 // Ensure that scalars passed on the stack are still determined correctly in
 // the presence of large return values that consume a register due to the need
 // to pass a pointer.
 
-fn Large f_scalar_stack_2(int a, long b, long c, float128 d,
-                              char e, ichar f, char g)
+fn Large f_scalar_stack_2(int a, long b, long c, float128 d, char e, ichar f, char g)
 {
-  return {a, e, f, g};
+	return {a, e, f, g};
 }
 
-fn float128 f_scalar_stack_4(int a, long b, long c, float128 d,
-                             char e, ichar f, char g) {
-  return d;
+fn float128 f_scalar_stack_4(int a, long b, long c, float128 d, char e, ichar f, char g)
+{
+	return d;
 }
 
 // Aggregates and >=XLen scalars passed on the stack should be lowered just as
 // they would be if passed via registers.
 
-fn void f_scalar_stack_5(double a, long b, double c, long d, int e,
-                      long f, float g, double h, float128 i) {}
+fn void f_scalar_stack_5(double a, long b, double c, long d, int e, long f, float g, double h, float128 i) {}
 
-fn void f_agg_stack(double a, long b, double c, long d, Tiny e,
-                 Small f, Small_aligned g, Large h) {}
+fn void f_agg_stack(double a, long b, double c, long d, Tiny e, Small f, Small_aligned g, Large h) {}
 
 // Ensure that ABI lowering happens as expected for vararg calls. For RV32
 // with the base integer calling convention there will be no observable
@@ -171,10 +196,11 @@ fn void f_agg_stack(double a, long b, double c, long d, Tiny e,
 
 extern fn int f_va_callee(int, ...);
 
-fn void f_va_caller() {
-  f_va_callee(1, 2, 3, 4.0, 5.0, (Tiny){6, 7, 8, 9},
-              (Small){10, null}, (Small_aligned){11},
-              (Large){12, 13, 14, 15});
+fn void f_va_caller()
+{
+	f_va_callee(1, 2, 3, 4.0, 5.0, (Tiny){6, 7, 8, 9},
+			  (Small){10, null}, (Small_aligned){11},
+			  (Large){12, 13, 14, 15});
 }
 
 
diff --git a/test/test_suite/abi/riscv64-lp64-abi.c3t b/test/test_suite/abi/riscv64-lp64-abi.c3t
index 8006de77f..06bd4d66a 100644
--- a/test/test_suite/abi/riscv64-lp64-abi.c3t
+++ b/test/test_suite/abi/riscv64-lp64-abi.c3t
@@ -1,20 +1,21 @@
 // #target: linux-riscv64
 module test;
 
-struct Large {
-  long a, b, c, d;
+struct Large
+{
+	long a, b, c, d;
 }
 
-alias V32i8 = char[<32>];
+typedef V32i8 = char[<32>] @simd;
 
-fn int f_scalar_stack_1(int a, int128 b, float c, float128 d, V32i8 e,
-                     char f, char g, char h) {
-  return g + h;
+fn int f_scalar_stack_1(int a, int128 b, float c, float128 d, V32i8 e, char f, char g, char h)
+{
+	return g + h;
 }
 
-fn Large f_scalar_stack_2(double a, int128 b, float128 c, V32i8 d,
-                              char e, ichar f, char g) {
-  return (Large) {(long)(a), e, f, g};
+fn Large f_scalar_stack_2(double a, int128 b, float128 c, V32i8 d, char e, ichar f, char g)
+{
+	return {(long)(a), e, f, g};
 }
 
 /* #expect: test.ll
diff --git a/test/test_suite/abi/riscv64-lp64-lp64f-abi-1.c3t b/test/test_suite/abi/riscv64-lp64-lp64f-abi-1.c3t
index bd0ea226d..5766e5531 100644
--- a/test/test_suite/abi/riscv64-lp64-lp64f-abi-1.c3t
+++ b/test/test_suite/abi/riscv64-lp64-lp64f-abi-1.c3t
@@ -6,19 +6,20 @@ struct Large {
 }
 // Scalars passed on the stack should not have signext/zeroext attributes
 // (they are anyext).
+typedef Char32V = char[<32>] @simd;
 
-fn int f_scalar_stack_1(int a, int128 b, double c, float128 d, char[<32>] e,
-                     char f, ichar g, char h) {
-  return g + h;
+fn int f_scalar_stack_1(int a, int128 b, double c, float128 d, Char32V e, char f, ichar g, char h)
+{
+	return g + h;
 }
 
 // Ensure that scalars passed on the stack are still determined correctly in
 // the presence of large return values that consume a register due to the need
 // to pass a pointer.
 
-fn Large f_scalar_stack_2(double a, int128 b, float128 c, char[<32>] d,
-                              char e, ichar f, char g) {
-  return {(long)a, e, f, g};
+fn Large f_scalar_stack_2(double a, int128 b, float128 c, Char32V d, char e, ichar f, char g)
+{
+	return {(long)a, e, f, g};
 }
 
 /* #expect: test.ll
diff --git a/test/test_suite/abi/riscv64-lp64-lp64f-abi-2.c3t b/test/test_suite/abi/riscv64-lp64-lp64f-abi-2.c3t
index d77e6f5bd..7886d6522 100644
--- a/test/test_suite/abi/riscv64-lp64-lp64f-abi-2.c3t
+++ b/test/test_suite/abi/riscv64-lp64-lp64f-abi-2.c3t
@@ -8,18 +8,19 @@ struct Large {
 // Scalars passed on the stack should not have signext/zeroext attributes
 // (they are anyext).
 
-fn int f_scalar_stack_1(int a, int128 b, double c, float128 d, char[<32>] e,
-                     char f, ichar g, char h) {
-  return g + h;
+typedef Char32V = char[<32>] @simd;
+fn int f_scalar_stack_1(int a, int128 b, double c, float128 d, Char32V e, char f, ichar g, char h)
+{
+	return g + h;
 }
 
 // Ensure that scalars passed on the stack are still determined correctly in
 // the presence of large return values that consume a register due to the need
 // to pass a pointer.
 
-fn Large f_scalar_stack_2(double a, int128 b, float128 c, char[<32>] d,
-                              char e, ichar f, char g) {
-  return {(long)a, e, f, g};
+fn Large f_scalar_stack_2(double a, int128 b, float128 c, Char32V d, char e, ichar f, char g)
+{
+	return {(long)a, e, f, g};
 }
 
 /* #expect: test.ll
diff --git a/test/test_suite/abi/riscv64-lp64-lp64f-lp64d-abi-1.c3t b/test/test_suite/abi/riscv64-lp64-lp64f-lp64d-abi-1.c3t
index a52103b3b..5ecef46c7 100644
--- a/test/test_suite/abi/riscv64-lp64-lp64f-lp64d-abi-1.c3t
+++ b/test/test_suite/abi/riscv64-lp64-lp64f-lp64d-abi-1.c3t
@@ -15,125 +15,132 @@ fn float128 f_fp_scalar_3(float128 x) { return x; }
 // Aggregates <= 2*xlen may be passed in registers, so will be coerced to
 // integer arguments. The rules for return are the same.
 
-struct Tiny {
-  ushort a, b, c, d;
+struct Tiny
+{
+	ushort a, b, c, d;
 }
 
-fn void f_agg_tiny(Tiny x) {
-  x.a += x.b;
-  x.c += x.d;
+fn void f_agg_tiny(Tiny x)
+{
+	x.a += x.b;
+	x.c += x.d;
 }
 
-fn Tiny f_agg_tiny_ret() {
-  return {1, 2, 3, 4};
+fn Tiny f_agg_tiny_ret()
+{
+	return {1, 2, 3, 4};
 }
 
-fn void f_vec_tiny_v4i16(short[<4>] x) {
-  x[0] = x[1];
-  x[2] = x[3];
+typedef Short4 = short[<4>] @simd;
+fn void f_vec_tiny_v4i16(Short4 x)
+{
+	x[0] = x[1];
+	x[2] = x[3];
 }
 
-fn short[<4>] f_vec_tiny_v4i16_ret() {
-  return {1, 2, 3, 4};
+fn Short4 f_vec_tiny_v4i16_ret() => {1, 2, 3, 4};
+
+typedef Long1 = long[<1>] @simd;
+
+fn void f_vec_tiny_v1i64(Long1 x)
+{
+	x[0] = 114;
 }
 
-fn void f_vec_tiny_v1i64(long[<1>] x) {
-  x[0] = 114;
+fn Long1 f_vec_tiny_v1i64_ret() => {1};
+
+
+struct Small
+{
+	long a;
+	long *b;
 }
 
-fn long[<1>] f_vec_tiny_v1i64_ret() {
-  return {1};
+fn void f_agg_small(Small x)
+{
+	x.a += *x.b;
+	x.b = &x.a;
 }
 
-struct Small {
-  long a;
-  long *b;
+fn Small f_agg_small_ret() => {1, null};
+
+typedef Short8 = short[<8>] @simd;
+fn void f_vec_small_v8i16(Short8 x)
+{
+	x[0] = x[7];
 }
 
-fn void f_agg_small(Small x) {
-  x.a += *x.b;
-  x.b = &x.a;
+fn Short8 f_vec_small_v8i16_ret() => {1, 2, 3, 4, 5, 6, 7, 8};
+
+typedef Int128_1 = int128[<1>] @simd;
+fn void f_vec_small_v1i128(Int128_1 x)
+{
+	x[0] = 114;
 }
 
-fn Small f_agg_small_ret() {
-  return {1, null};
-}
-
-fn void f_vec_small_v8i16(short[<8>] x) {
-  x[0] = x[7];
-}
-
-fn short[<8>] f_vec_small_v8i16_ret() {
-  return {1, 2, 3, 4, 5, 6, 7, 8};
-}
-
-fn void f_vec_small_v1i128(int128[<1>] x) {
-  x[0] = 114;
-}
-
-fn int128[<1>] f_vec_small_v1i128_ret() {
-  return {1};
-}
+fn Int128_1 f_vec_small_v1i128_ret() => {1};
 
 // Aggregates of 2*xlen size and 2*xlen alignment should be coerced to a
 // single 2*xlen-sized argument, to ensure that alignment can be maintained if
 // passed on the stack.
 
-struct Small_aligned {
-  int128 a;
+struct Small_aligned
+{
+	int128 a;
 }
 
-fn void f_agg_small_aligned(Small_aligned x) {
-  x.a += x.a;
+fn void f_agg_small_aligned(Small_aligned x)
+{
+	x.a += x.a;
 }
 
-fn Small_aligned f_agg_small_aligned_ret(Small_aligned x) {
-  return {10};
-}
+fn Small_aligned f_agg_small_aligned_ret(Small_aligned x) => {10};
 
 // Aggregates greater > 2*xlen will be passed and returned indirectly
-struct Large {
-  long a, b, c, d;
+struct Large
+{
+	long a, b, c, d;
 }
 
-fn void f_agg_large(Large x) {
-  x.a = x.b + x.c + x.d;
+fn void f_agg_large(Large x)
+{
+	x.a = x.b + x.c + x.d;
 }
 
 // The address where the struct should be written to will be the first
 // argument
-fn Large f_agg_large_ret(int i, ichar j) {
-  return {1, 2, 3, 4};
+fn Large f_agg_large_ret(int i, ichar j) => {1, 2, 3, 4};
+
+typedef Char32V = char[<32>] @simd;
+
+fn void f_vec_large_v32i8(Char32V x)
+{
+	x[0] = x[7];
 }
 
-fn void f_vec_large_v32i8(char[<32>] x) {
-  x[0] = x[7];
-}
-
-fn char[<32>] f_vec_large_v32i8_ret() {
-  return { [1] = 1, [31] = 31 };
-}
+fn Char32V f_vec_large_v32i8_ret() => { [1] = 1, [31] = 31 };
 
 // Scalars passed on the stack should not have signext/zeroext attributes
 // (they are anyext).
 
 fn int f_scalar_stack_1(Tiny a, Small b, Small_aligned c,
-                     Large d, char e, ichar f, char g, ichar h) {
-  return g + h;
+                     Large d, char e, ichar f, char g, ichar h)
+{
+	return g + h;
 }
 
-fn int f_scalar_stack_2(int a, int128 b, long c, float128 d, char[<32>] e,
-                     char f, ichar g, char h) {
-  return g + h;
+fn int f_scalar_stack_2(int a, int128 b, long c, float128 d, Char32V e, char f, ichar g, char h)
+{
+	return g + h;
 }
 
 // Ensure that scalars passed on the stack are still determined correctly in
 // the presence of large return values that consume a register due to the need
 // to pass a pointer.
 
-fn Large f_scalar_stack_3(uint a, int128 b, float128 c, char[<32>] d,
-                              char e, ichar f, char g) {
-  return {a, e, f, g};
+fn Large f_scalar_stack_3(uint a, int128 b, float128 c, Char32V d, char e, ichar f, char g)
+{
+	return {a, e, f, g};
 }
 
 // Ensure that ABI lowering happens as expected for vararg calls.
@@ -143,20 +150,21 @@ fn Large f_scalar_stack_3(uint a, int128 b, float128 c, char[<32>] d,
 
 extern fn int f_va_callee(int, ...);
 
-fn void f_va_caller() {
+fn void f_va_caller()
+{
 	float128 fq;
-  f_va_callee(1, 2, 3L, 4.0f, 5.0, (Tiny){6, 7, 8, 9},
-              (Small){10, null}, (Small_aligned){11},
-              (Large){12, 13, 14, 15});
-  f_va_callee(1, 2, 3, 4, fq, 6, 7, 8, 9);
-  f_va_callee(1, 2, 3, 4, (Small_aligned){5}, 6, 7, 8, 9);
-  f_va_callee(1, 2, 3, 4, (Small){5,null}, 6, 7, 8, 9);
-  f_va_callee(1, 2, 3, 4, 5, fq, 7, 8, 9);
-  f_va_callee(1, 2, 3, 4, 5, (Small_aligned){6}, 7, 8, 9);
-  f_va_callee(1, 2, 3, 4, 5, (Small){6, null}, 7, 8, 9);
-  f_va_callee(1, 2, 3, 4, 5, 6, fq, 8, 9);
-  f_va_callee(1, 2, 3, 4, 5, 6, (Small_aligned){7}, 8, 9);
-  f_va_callee(1, 2, 3, 4, 5, 6, (Small){7, null}, 8, 9);
+	f_va_callee(1, 2, 3L, 4.0f, 5.0, (Tiny){6, 7, 8, 9},
+	           (Small){10, null}, (Small_aligned){11},
+	           (Large){12, 13, 14, 15});
+	f_va_callee(1, 2, 3, 4, fq, 6, 7, 8, 9);
+	f_va_callee(1, 2, 3, 4, (Small_aligned){5}, 6, 7, 8, 9);
+	f_va_callee(1, 2, 3, 4, (Small){5,null}, 6, 7, 8, 9);
+	f_va_callee(1, 2, 3, 4, 5, fq, 7, 8, 9);
+	f_va_callee(1, 2, 3, 4, 5, (Small_aligned){6}, 7, 8, 9);
+	f_va_callee(1, 2, 3, 4, 5, (Small){6, null}, 7, 8, 9);
+	f_va_callee(1, 2, 3, 4, 5, 6, fq, 8, 9);
+	f_va_callee(1, 2, 3, 4, 5, 6, (Small_aligned){7}, 8, 9);
+	f_va_callee(1, 2, 3, 4, 5, 6, (Small){7, null}, 8, 9);
 }
 
 
diff --git a/test/test_suite/abi/riscv64-lp64-lp64f-lp64d-abi-2.c3t b/test/test_suite/abi/riscv64-lp64-lp64f-lp64d-abi-2.c3t
index 3eed13a11..29eebba87 100644
--- a/test/test_suite/abi/riscv64-lp64-lp64f-lp64d-abi-2.c3t
+++ b/test/test_suite/abi/riscv64-lp64-lp64f-lp64d-abi-2.c3t
@@ -16,115 +16,143 @@ fn float128 f_fp_scalar_3(float128 x) { return x; }
 // Aggregates <= 2*xlen may be passed in registers, so will be coerced to
 // integer arguments. The rules for return are the same.
 
-struct Tiny {
-  ushort a, b, c, d;
+struct Tiny
+{
+	ushort a, b, c, d;
 }
 
-fn void f_agg_tiny(Tiny x) {
-  x.a += x.b;
-  x.c += x.d;
+fn void f_agg_tiny(Tiny x)
+{
+	x.a += x.b;
+	x.c += x.d;
 }
 
-fn Tiny f_agg_tiny_ret() {
-  return {1, 2, 3, 4};
+fn Tiny f_agg_tiny_ret()
+{
+	return {1, 2, 3, 4};
 }
 
-fn void f_vec_tiny_v4i16(short[<4>] x) {
-  x[0] = x[1];
-  x[2] = x[3];
+typedef Short4 = short[<4>] @simd;
+fn void f_vec_tiny_v4i16(Short4 x)
+{
+	x[0] = x[1];
+	x[2] = x[3];
 }
 
-fn short[<4>] f_vec_tiny_v4i16_ret() {
-  return {1, 2, 3, 4};
+fn Short4 f_vec_tiny_v4i16_ret()
+{
+	return {1, 2, 3, 4};
 }
 
-fn void f_vec_tiny_v1i64(long[<1>] x) {
-  x[0] = 114;
+typedef Long1 = long[<1>] @simd;
+fn void f_vec_tiny_v1i64(Long1 x)
+{
+	x[0] = 114;
 }
 
-fn long[<1>] f_vec_tiny_v1i64_ret() {
-  return {1};
+fn Long1 f_vec_tiny_v1i64_ret()
+{
+	return {1};
 }
 
-struct Small {
-  long a;
-  long *b;
+struct Small
+{
+	long a;
+	long *b;
 }
 
-fn void f_agg_small(Small x) {
-  x.a += *x.b;
-  x.b = &x.a;
+fn void f_agg_small(Small x)
+{
+	x.a += *x.b;
+	x.b = &x.a;
 }
 
-fn Small f_agg_small_ret() {
-  return {1, null};
+fn Small f_agg_small_ret()
+{
+	return {1, null};
 }
 
-fn void f_vec_small_v8i16(short[<8>] x) {
-  x[0] = x[7];
+typedef Short8 = short[<8>] @simd;
+fn void f_vec_small_v8i16(Short8 x)
+{
+	x[0] = x[7];
 }
 
-fn short[<8>] f_vec_small_v8i16_ret() {
-  return {1, 2, 3, 4, 5, 6, 7, 8};
+fn Short8 f_vec_small_v8i16_ret()
+{
+	return {1, 2, 3, 4, 5, 6, 7, 8};
 }
 
-fn void f_vec_small_v1i128(int128[<1>] x) {
-  x[0] = 114;
+typedef Int128_1 = int128[<1>] @simd;
+
+fn void f_vec_small_v1i128(Int128_1 x)
+{
+	x[0] = 114;
 }
 
-fn int128[<1>] f_vec_small_v1i128_ret() {
-  return {1};
+fn Int128_1 f_vec_small_v1i128_ret()
+{
+	return {1};
 }
 
 // Aggregates of 2*xlen size and 2*xlen alignment should be coerced to a
 // single 2*xlen-sized argument, to ensure that alignment can be maintained if
 // passed on the stack.
 
-struct Small_aligned {
-  int128 a;
+struct Small_aligned
+{
+	int128 a;
 }
 
-fn void f_agg_small_aligned(Small_aligned x) {
-  x.a += x.a;
+fn void f_agg_small_aligned(Small_aligned x)
+{
+	x.a += x.a;
 }
 
-fn Small_aligned f_agg_small_aligned_ret(Small_aligned x) {
-  return {10};
+fn Small_aligned f_agg_small_aligned_ret(Small_aligned x)
+{
+	return {10};
 }
 
 // Aggregates greater > 2*xlen will be passed and returned indirectly
-struct Large {
-  long a, b, c, d;
+struct Large
+{
+	long a, b, c, d;
 }
 
-fn void f_agg_large(Large x) {
-  x.a = x.b + x.c + x.d;
+fn void f_agg_large(Large x)
+{
+	x.a = x.b + x.c + x.d;
 }
 
 // The address where the struct should be written to will be the first
 // argument
-fn Large f_agg_large_ret(int i, ichar j) {
-  return {1, 2, 3, 4};
+fn Large f_agg_large_ret(int i, ichar j)
+{
+	return {1, 2, 3, 4};
 }
 
-fn void f_vec_large_v32i8(char[<32>] x) {
-  x[0] = x[7];
+typedef Char32V = char[<32>] @simd;
+fn void f_vec_large_v32i8(Char32V x)
+{
+	x[0] = x[7];
 }
 
-fn char[<32>] f_vec_large_v32i8_ret() {
-  return { [1] = 1, [31] = 31 };
+fn Char32V f_vec_large_v32i8_ret()
+{
+	return { [1] = 1, [31] = 31 };
 }
 
 // Scalars passed on the stack should not have signext/zeroext attributes
 // (they are anyext).
 
-fn int f_scalar_stack_1(Tiny a, Small b, Small_aligned c,
-                     Large d, char e, ichar f, char g, ichar h) {
-  return g + h;
+fn int f_scalar_stack_1(Tiny a, Small b, Small_aligned c, Large d, char e, ichar f, char g, ichar h)
+{
+	return g + h;
 }
 
-fn int f_scalar_stack_2(int a, int128 b, long c, float128 d, char[<32>] e,
-                     char f, ichar g, char h) {
+fn int f_scalar_stack_2(int a, int128 b, long c, float128 d, Char32V e, char f, ichar g, char h)
+{
   return g + h;
 }
 
@@ -132,9 +160,9 @@ fn int f_scalar_stack_2(int a, int128 b, long c, float128 d, char[<32>] e,
 // the presence of large return values that consume a register due to the need
 // to pass a pointer.
 
-fn Large f_scalar_stack_3(uint a, int128 b, float128 c, char[<32>] d,
-                              char e, ichar f, char g) {
-  return {a, e, f, g};
+fn Large f_scalar_stack_3(uint a, int128 b, float128 c, Char32V d, char e, ichar f, char g)
+{
+	return {a, e, f, g};
 }
 
 // Ensure that ABI lowering happens as expected for vararg calls.
@@ -145,19 +173,19 @@ fn Large f_scalar_stack_3(uint a, int128 b, float128 c, char[<32>] d,
 extern fn int f_va_callee(int, ...);
 
 fn void f_va_caller() {
-	float128 fq;
-  f_va_callee(1, 2, 3L, 4.0f, 5.0, (Tiny){6, 7, 8, 9},
-              (Small){10, null}, (Small_aligned){11},
-              (Large){12, 13, 14, 15});
-  f_va_callee(1, 2, 3, 4, fq, 6, 7, 8, 9);
-  f_va_callee(1, 2, 3, 4, (Small_aligned){5}, 6, 7, 8, 9);
-  f_va_callee(1, 2, 3, 4, (Small){5,null}, 6, 7, 8, 9);
-  f_va_callee(1, 2, 3, 4, 5, fq, 7, 8, 9);
-  f_va_callee(1, 2, 3, 4, 5, (Small_aligned){6}, 7, 8, 9);
-  f_va_callee(1, 2, 3, 4, 5, (Small){6, null}, 7, 8, 9);
-  f_va_callee(1, 2, 3, 4, 5, 6, fq, 8, 9);
-  f_va_callee(1, 2, 3, 4, 5, 6, (Small_aligned){7}, 8, 9);
-  f_va_callee(1, 2, 3, 4, 5, 6, (Small){7, null}, 8, 9);
+		float128 fq;
+	f_va_callee(1, 2, 3L, 4.0f, 5.0, (Tiny){6, 7, 8, 9},
+	            (Small){10, null}, (Small_aligned){11},
+	            (Large){12, 13, 14, 15});
+	f_va_callee(1, 2, 3, 4, fq, 6, 7, 8, 9);
+	f_va_callee(1, 2, 3, 4, (Small_aligned){5}, 6, 7, 8, 9);
+	f_va_callee(1, 2, 3, 4, (Small){5,null}, 6, 7, 8, 9);
+	f_va_callee(1, 2, 3, 4, 5, fq, 7, 8, 9);
+	f_va_callee(1, 2, 3, 4, 5, (Small_aligned){6}, 7, 8, 9);
+	f_va_callee(1, 2, 3, 4, 5, (Small){6, null}, 7, 8, 9);
+	f_va_callee(1, 2, 3, 4, 5, 6, fq, 8, 9);
+	f_va_callee(1, 2, 3, 4, 5, 6, (Small_aligned){7}, 8, 9);
+	f_va_callee(1, 2, 3, 4, 5, 6, (Small){7, null}, 8, 9);
 }
 
 
diff --git a/test/test_suite/abi/riscv64-lp64-lp64f-lp64d-abi-3.c3t b/test/test_suite/abi/riscv64-lp64-lp64f-lp64d-abi-3.c3t
index 6f522a482..407743c70 100644
--- a/test/test_suite/abi/riscv64-lp64-lp64f-lp64d-abi-3.c3t
+++ b/test/test_suite/abi/riscv64-lp64-lp64f-lp64d-abi-3.c3t
@@ -16,125 +16,143 @@ fn float128 f_fp_scalar_3(float128 x) { return x; }
 // Aggregates <= 2*xlen may be passed in registers, so will be coerced to
 // integer arguments. The rules for return are the same.
 
-struct Tiny {
-  ushort a, b, c, d;
+struct Tiny
+{
+	ushort a, b, c, d;
 }
 
-fn void f_agg_tiny(Tiny x) {
-  x.a += x.b;
-  x.c += x.d;
+fn void f_agg_tiny(Tiny x)
+{
+	x.a += x.b;
+	x.c += x.d;
 }
 
 fn Tiny f_agg_tiny_ret() {
   return {1, 2, 3, 4};
 }
 
-fn void f_vec_tiny_v4i16(short[<4>] x) {
-  x[0] = x[1];
-  x[2] = x[3];
+typedef Short4 = short[<4>] @simd;
+typedef Short8 = short[<8>] @simd;
+typedef Long1 = long[<1>] @simd;
+typedef Int128_1 = int128[<1>] @simd;
+typedef Char32v = char[<32>] @simd;
+
+fn void f_vec_tiny_v4i16(Short4 x)
+{
+	x[0] = x[1];
+	x[2] = x[3];
 }
 
-fn short[<4>] f_vec_tiny_v4i16_ret() {
-  return {1, 2, 3, 4};
+fn Short4 f_vec_tiny_v4i16_ret() => {1, 2, 3, 4};
+
+fn void f_vec_tiny_v1i64(Long1 x)
+{
+	x[0] = 114;
 }
 
-fn void f_vec_tiny_v1i64(long[<1>] x) {
-  x[0] = 114;
+fn Long1 f_vec_tiny_v1i64_ret()
+{
+	return {1};
 }
 
-fn long[<1>] f_vec_tiny_v1i64_ret() {
-  return {1};
+struct Small
+{
+	long a;
+	long *b;
 }
 
-struct Small {
-  long a;
-  long *b;
+fn void f_agg_small(Small x)
+{
+	x.a += *x.b;
+	x.b = &x.a;
 }
 
-fn void f_agg_small(Small x) {
-  x.a += *x.b;
-  x.b = &x.a;
+fn Small f_agg_small_ret()
+{
+	return {1, null};
 }
 
-fn Small f_agg_small_ret() {
-  return {1, null};
+fn void f_vec_small_v8i16(Short8 x)
+{
+	x[0] = x[7];
 }
 
-fn void f_vec_small_v8i16(short[<8>] x) {
-  x[0] = x[7];
+fn Short8 f_vec_small_v8i16_ret() => {1, 2, 3, 4, 5, 6, 7, 8};
+
+fn void f_vec_small_v1i128(Int128_1 x)
+{
+	x[0] = 114;
 }
 
-fn short[<8>] f_vec_small_v8i16_ret() {
-  return {1, 2, 3, 4, 5, 6, 7, 8};
-}
-
-fn void f_vec_small_v1i128(int128[<1>] x) {
-  x[0] = 114;
-}
-
-fn int128[<1>] f_vec_small_v1i128_ret() {
-  return {1};
+fn Int128_1 f_vec_small_v1i128_ret()
+{
+	return {1};
 }
 
 // Aggregates of 2*xlen size and 2*xlen alignment should be coerced to a
 // single 2*xlen-sized argument, to ensure that alignment can be maintained if
 // passed on the stack.
 
-struct Small_aligned {
-  int128 a;
+struct Small_aligned
+{
+	int128 a;
 }
 
-fn void f_agg_small_aligned(Small_aligned x) {
-  x.a += x.a;
+fn void f_agg_small_aligned(Small_aligned x)
+{
+	x.a += x.a;
 }
 
-fn Small_aligned f_agg_small_aligned_ret(Small_aligned x) {
-  return {10};
+fn Small_aligned f_agg_small_aligned_ret(Small_aligned x)
+{
+	return {10};
 }
 
 // Aggregates greater > 2*xlen will be passed and returned indirectly
-struct Large {
-  long a, b, c, d;
+struct Large
+{
+	long a, b, c, d;
 }
 
-fn void f_agg_large(Large x) {
-  x.a = x.b + x.c + x.d;
+fn void f_agg_large(Large x)
+{
+	x.a = x.b + x.c + x.d;
 }
 
 // The address where the struct should be written to will be the first
 // argument
-fn Large f_agg_large_ret(int i, ichar j) {
-  return {1, 2, 3, 4};
+fn Large f_agg_large_ret(int i, ichar j) => {1, 2, 3, 4};
+
+fn void f_vec_large_v32i8(Char32v x)
+{
+	x[0] = x[7];
 }
 
-fn void f_vec_large_v32i8(char[<32>] x) {
-  x[0] = x[7];
-}
-
-fn char[<32>] f_vec_large_v32i8_ret() {
-  return { [1] = 1, [31] = 31 };
+fn Char32v f_vec_large_v32i8_ret()
+{
+	return { [1] = 1, [31] = 31 };
 }
 
 // Scalars passed on the stack should not have signext/zeroext attributes
 // (they are anyext).
 
-fn int f_scalar_stack_1(Tiny a, Small b, Small_aligned c,
-                     Large d, char e, ichar f, char g, ichar h) {
-  return g + h;
+fn int f_scalar_stack_1(Tiny a, Small b, Small_aligned c, Large d, char e, ichar f, char g, ichar h)
+{
+	return g + h;
 }
 
-fn int f_scalar_stack_2(int a, int128 b, long c, float128 d, char[<32>] e,
-                     char f, ichar g, char h) {
-  return g + h;
+fn int f_scalar_stack_2(int a, int128 b, long c, float128 d, Char32v e, char f, ichar g, char h)
+{
+	return g + h;
 }
 
 // Ensure that scalars passed on the stack are still determined correctly in
 // the presence of large return values that consume a register due to the need
 // to pass a pointer.
 
-fn Large f_scalar_stack_3(uint a, int128 b, float128 c, char[<32>] d,
-                              char e, ichar f, char g) {
-  return {a, e, f, g};
+fn Large f_scalar_stack_3(uint a, int128 b, float128 c, Char32v d, char e, ichar f, char g)
+{
+	return {a, e, f, g};
 }
 
 // Ensure that ABI lowering happens as expected for vararg calls.
@@ -144,20 +162,21 @@ fn Large f_scalar_stack_3(uint a, int128 b, float128 c, char[<32>] d,
 
 extern fn int f_va_callee(int, ...);
 
-fn void f_va_caller() {
+fn void f_va_caller()
+{
 	float128 fq;
-  f_va_callee(1, 2, 3L, 4.0f, 5.0, (Tiny){6, 7, 8, 9},
-              (Small){10, null}, (Small_aligned){11},
-              (Large){12, 13, 14, 15});
-  f_va_callee(1, 2, 3, 4, fq, 6, 7, 8, 9);
-  f_va_callee(1, 2, 3, 4, (Small_aligned){5}, 6, 7, 8, 9);
-  f_va_callee(1, 2, 3, 4, (Small){5,null}, 6, 7, 8, 9);
-  f_va_callee(1, 2, 3, 4, 5, fq, 7, 8, 9);
-  f_va_callee(1, 2, 3, 4, 5, (Small_aligned){6}, 7, 8, 9);
-  f_va_callee(1, 2, 3, 4, 5, (Small){6, null}, 7, 8, 9);
-  f_va_callee(1, 2, 3, 4, 5, 6, fq, 8, 9);
-  f_va_callee(1, 2, 3, 4, 5, 6, (Small_aligned){7}, 8, 9);
-  f_va_callee(1, 2, 3, 4, 5, 6, (Small){7, null}, 8, 9);
+	f_va_callee(1, 2, 3L, 4.0f, 5.0, (Tiny){6, 7, 8, 9},
+	            (Small){10, null}, (Small_aligned){11},
+	            (Large){12, 13, 14, 15});
+	f_va_callee(1, 2, 3, 4, fq, 6, 7, 8, 9);
+	f_va_callee(1, 2, 3, 4, (Small_aligned){5}, 6, 7, 8, 9);
+	f_va_callee(1, 2, 3, 4, (Small){5,null}, 6, 7, 8, 9);
+	f_va_callee(1, 2, 3, 4, 5, fq, 7, 8, 9);
+	f_va_callee(1, 2, 3, 4, 5, (Small_aligned){6}, 7, 8, 9);
+	f_va_callee(1, 2, 3, 4, 5, (Small){6, null}, 7, 8, 9);
+	f_va_callee(1, 2, 3, 4, 5, 6, fq, 8, 9);
+	f_va_callee(1, 2, 3, 4, 5, 6, (Small_aligned){7}, 8, 9);
+	f_va_callee(1, 2, 3, 4, 5, 6, (Small){7, null}, 8, 9);
 }
 
 
diff --git a/test/test_suite/abi/sysv_abi_avx.c3t b/test/test_suite/abi/sysv_abi_avx.c3t
index 95f579afb..127f36f71 100644
--- a/test/test_suite/abi/sysv_abi_avx.c3t
+++ b/test/test_suite/abi/sysv_abi_avx.c3t
@@ -16,16 +16,18 @@ fn void callit()
 	take_stringref(s);
 }
 
-extern fn float[<8>] get_m256();
-extern fn void take_m256(float[<8>] x);
-extern fn float[<16>] get_m512();
-extern fn void take_m512(float[<16>] x);
+typedef Mm256 = float[<8>] @simd;
+typedef Mm512 = float[<16>] @simd;
+extern fn Mm256 get_m256();
+extern fn void take_m256(Mm256 x);
+extern fn Mm512 get_m512();
+extern fn void take_m512(Mm512 x);
 
 fn void use_vectors()
 {
-  float[<8>] v1 = get_m256();
+  Mm256 v1 = get_m256();
   take_m256(v1);
-  float[<16>] v2 = get_m512();
+  Mm512 v2 = get_m512();
   take_m512(v2);
 }
 
diff --git a/test/test_suite/abi/sysv_abi_noavx.c3t b/test/test_suite/abi/sysv_abi_noavx.c3t
index a32ded346..f26fe45de 100644
--- a/test/test_suite/abi/sysv_abi_noavx.c3t
+++ b/test/test_suite/abi/sysv_abi_noavx.c3t
@@ -16,16 +16,18 @@ fn void callit()
 	take_stringref(s);
 }
 
-extern fn float[<8>] get_m256();
-extern fn void take_m256(float[<8>] x);
-extern fn float[<16>] get_m512();
-extern fn void take_m512(float[<16>] x);
+typedef Mv256 = float[<8>] @simd;
+typedef Mv512 = float[<16>] @simd;
+extern fn Mv256 get_m256();
+extern fn void take_m256(Mv256 x);
+extern fn Mv512 get_m512();
+extern fn void take_m512(Mv512 x);
 
 fn void use_vectors()
 {
-  float[<8>] v1 = get_m256();
+  Mv256 v1 = get_m256();
   take_m256(v1);
-  float[<16>] v2 = get_m512();
+  Mv512 v2 = get_m512();
   take_m512(v2);
 }
 
diff --git a/test/test_suite/abi/sysv_vec_array_indirect.c3t b/test/test_suite/abi/sysv_vec_array_indirect.c3t
new file mode 100644
index 000000000..dd343c2f1
--- /dev/null
+++ b/test/test_suite/abi/sysv_vec_array_indirect.c3t
@@ -0,0 +1,32 @@
+// #target: linux-x64
+module test;
+import std;
+fn void test(double[<3>] x)
+{
+	double[<3>]* y = &x;
+}
+
+fn int main()
+{
+	test({ 1, 2, 3 });
+	return 0;
+}
+/* #expect: test.ll
+
+define void @test.test(ptr byval([3 x double]) align 8 %0) #0 {
+entry:
+  %x = alloca <3 x double>, align 32
+  %y = alloca ptr, align 8
+  %1 = load <3 x double>, ptr %0, align 8
+  %expandvec = shufflevector <3 x double> %1, <3 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+  store <4 x double> %expandvec, ptr %x, align 32
+  store ptr %x, ptr %y, align 8
+  ret void
+}
+
+entry:
+  %indirectarg = alloca [3 x double], align 8
+  store [3 x double] [double 1.000000e+00, double 2.000000e+00, double 3.000000e+00], ptr %indirectarg, align 8
+  call void @test.test(ptr byval([3 x double]) align 8 %indirectarg)
+  ret i32 0
+}
diff --git a/test/test_suite/abi/vec_update_align.c3t b/test/test_suite/abi/vec_update_align.c3t
new file mode 100644
index 000000000..a1789dd1f
--- /dev/null
+++ b/test/test_suite/abi/vec_update_align.c3t
@@ -0,0 +1,60 @@
+// #target: macos-x64
+// #opt: --x86cpu=avx512
+module test;
+alias Int8x16 = ichar[<16>];
+alias Float32x3 = float[<3>];
+
+struct HFAv3
+{
+    Float32x3[4] arr;
+}
+
+struct HFAv3arr
+{
+    float[3][4] arr;
+}
+
+struct MixedHFAv3
+{
+    Float32x3[3] arr;
+    Int8x16 b;
+}
+
+struct MixedHFAv3arr
+{
+    float[<3>][3] arr;
+    ichar[16] b;
+}
+
+fn int main()
+{
+	MixedHFAv3 a;
+	MixedHFAv3arr b;
+	b.arr[1].x++;
+	float x1 = b.arr[0].y;
+	return 0;
+}
+
+/* #expect: test.ll
+
+%MixedHFAv3 = type { [3 x [3 x float]], [16 x i8] }
+%MixedHFAv3arr = type { [3 x [3 x float]], [16 x i8] }
+
+define i32 @main() #0 {
+entry:
+  %a = alloca %MixedHFAv3, align 4
+  %b = alloca %MixedHFAv3arr, align 4
+  %x1 = alloca float, align 4
+  call void @llvm.memset.p0.i64(ptr align 4 %a, i8 0, i64 52, i1 false)
+  call void @llvm.memset.p0.i64(ptr align 4 %b, i8 0, i64 52, i1 false)
+  %ptradd = getelementptr inbounds i8, ptr %b, i64 16
+  %0 = load <3 x float>, ptr %ptradd, align 4
+  %1 = extractelement <3 x float> %0, i64 0
+  %fincdec = fadd float %1, 1.000000e+00
+  %2 = insertelement <3 x float> %0, float %fincdec, i64 0
+  store <3 x float> %2, ptr %ptradd, align 4
+  %3 = load <3 x float>, ptr %b, align 4
+  %4 = extractelement <3 x float> %3, i64 1
+  store float %4, ptr %x1, align 4
+  ret i32 0
+}
diff --git a/test/test_suite/arrays/index_from_back.c3t b/test/test_suite/arrays/index_from_back.c3t
index 8b83536e4..137524521 100644
--- a/test/test_suite/arrays/index_from_back.c3t
+++ b/test/test_suite/arrays/index_from_back.c3t
@@ -2,21 +2,21 @@
 
 module test;
 
-fn void test(int[10] x, int[<10>] y)
+typedef Int16 = int[<16>] @simd;
+fn void test(int[16] x, Int16 y)
 {
-    int a = x[4];
-    int b = x[^2];
-    int c = y[4];
-    int d = y[^2];
-    int j = 3;
-    int e = y[^j];
-    int f = x[^j];
+	int a = x[4];
+	int b = x[^2];
+	int c = y[4];
+	int d = y[^2];
+	int j = 3;
+	int e = y[^j];
+	int f = x[^j];
 }
 
 /* #expect: test.ll
 
-; Function Attrs:
-define void @test.test(ptr byval([10 x i32]) align 8 %0, ptr byval(<10 x i32>) align 64 %1) #0 {
+define void @test.test(ptr byval([16 x i32]) align 8 %0, ptr byval(<16 x i32>) align 64 %1) #0 {
 entry:
   %a = alloca i32, align 4
   %b = alloca i32, align 4
@@ -28,25 +28,25 @@ entry:
   %ptradd = getelementptr inbounds i8, ptr %0, i64 16
   %2 = load i32, ptr %ptradd, align 4
   store i32 %2, ptr %a, align 4
-  %ptradd1 = getelementptr inbounds i8, ptr %0, i64 32
+  %ptradd1 = getelementptr inbounds i8, ptr %0, i64 56
   %3 = load i32, ptr %ptradd1, align 4
   store i32 %3, ptr %b, align 4
-  %4 = load <10 x i32>, ptr %1, align 64
-  %5 = extractelement <10 x i32> %4, i64 4
+  %4 = load <16 x i32>, ptr %1, align 64
+  %5 = extractelement <16 x i32> %4, i64 4
   store i32 %5, ptr %c, align 4
-  %6 = load <10 x i32>, ptr %1, align 64
-  %7 = extractelement <10 x i32> %6, i64 8
+  %6 = load <16 x i32>, ptr %1, align 64
+  %7 = extractelement <16 x i32> %6, i64 14
   store i32 %7, ptr %d, align 4
   store i32 3, ptr %j, align 4
-  %8 = load <10 x i32>, ptr %1, align 64
+  %8 = load <16 x i32>, ptr %1, align 64
   %9 = load i32, ptr %j, align 4
   %sext = sext i32 %9 to i64
-  %10 = sub nuw i64 10, %sext
-  %11 = extractelement <10 x i32> %8, i64 %10
+  %10 = sub nuw i64 16, %sext
+  %11 = extractelement <16 x i32> %8, i64 %10
   store i32 %11, ptr %e, align 4
   %12 = load i32, ptr %j, align 4
   %sext2 = sext i32 %12 to i64
-  %13 = sub nuw i64 10, %sext2
+  %13 = sub nuw i64 16, %sext2
   %ptroffset = getelementptr inbounds [4 x i8], ptr %0, i64 %13
   %14 = load i32, ptr %ptroffset, align 4
   store i32 %14, ptr %f, align 4
diff --git a/test/test_suite/builtins/matrix_builtin.c3t b/test/test_suite/builtins/matrix_builtin.c3t
index 165e4fc37..9bd88a213 100644
--- a/test/test_suite/builtins/matrix_builtin.c3t
+++ b/test/test_suite/builtins/matrix_builtin.c3t
@@ -26,13 +26,17 @@ entry:
   %2 = call <4 x i32> @llvm.matrix.multiply.v4i32.v4i32.v4i32(<4 x i32> %0, <4 x i32> %1, i32 2, i32 2, i32 2)
   store <4 x i32> %2, ptr %z, align 16
   store <2 x i32> <i32 1, i32 2>, ptr %a, align 8
-  store <3 x i32> <i32 1, i32 2, i32 3>, ptr %b, align 16
+  store <4 x i32> <i32 1, i32 2, i32 3, i32 undef>, ptr %b, align 16
   %3 = load <2 x i32>, ptr %a, align 8
-  %4 = load <3 x i32>, ptr %b, align 16
-  %5 = call <6 x i32> @llvm.matrix.multiply.v6i32.v2i32.v3i32(<2 x i32> %3, <3 x i32> %4, i32 2, i32 1, i32 3)
-  store <6 x i32> %5, ptr %c, align 32
-  %6 = load <6 x i32>, ptr %c, align 32
-  %7 = call <6 x i32> @llvm.matrix.transpose.v6i32(<6 x i32> %6, i32 2, i32 3)
-  store <6 x i32> %7, ptr %c, align 32
+  %4 = load <4 x i32>, ptr %b, align 16
+  %extractvec = shufflevector <4 x i32> %4, <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
+  %5 = call <6 x i32> @llvm.matrix.multiply.v6i32.v2i32.v3i32(<2 x i32> %3, <3 x i32> %extractvec, i32 2, i32 1, i32 3)
+  %expandvec = shufflevector <6 x i32> %5, <6 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 poison, i32 poison>
+  store <8 x i32> %expandvec, ptr %c, align 32
+  %6 = load <8 x i32>, ptr %c, align 32
+  %extractvec1 = shufflevector <8 x i32> %6, <8 x i32> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
+  %7 = call <6 x i32> @llvm.matrix.transpose.v6i32(<6 x i32> %extractvec1, i32 2, i32 3)
+  %expandvec2 = shufflevector <6 x i32> %7, <6 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 poison, i32 poison>
+  store <8 x i32> %expandvec2, ptr %c, align 32
   ret i32 0
 }
\ No newline at end of file
diff --git a/test/test_suite/builtins/shufflevector.c3t b/test/test_suite/builtins/shufflevector.c3t
index 4faa45d87..9181826df 100644
--- a/test/test_suite/builtins/shufflevector.c3t
+++ b/test/test_suite/builtins/shufflevector.c3t
@@ -3,6 +3,8 @@
 module test;
 import std::io;
 
+typedef Vc2 = float[<2>] @simd;
+typedef Vc4 = float[<4>] @simd;
 struct Matrix2x2
 {
 	union
@@ -14,34 +16,34 @@ struct Matrix2x2
 		}
 		struct
 		{
-			float[<2>] m0;
-			float[<2>] m1;
+			Vc2 m0;
+			Vc2 m1;
 		}
-		float[<4>] m;
+		Vc4 m;
 	}
 }
 
-fn float[<2>] apply1(Matrix2x2* mat, float[<2>] vec)
+fn Vc2 apply1(Matrix2x2* mat, Vc2 vec)
 {
-	return (float[<2>]) {
+	return (Vc2) {
 		mat.m00 * vec[0] + mat.m01 * vec[1],
 		mat.m10 * vec[0] + mat.m11 * vec[1],
 	};
 }
 
-fn float[<2>] apply2(Matrix2x2* mat, float[<2>] vec)
+fn Vc2 apply2(Matrix2x2* mat, Vc2 vec)
 {
-	return (float[<2>]) {
+	return (Vc2) {
 		mat.m0[0] * vec[0] + mat.m0[1] * vec[1],
 		mat.m1[0] * vec[0] + mat.m1[1] * vec[1],
 	};
 }
 
-fn float[<2>] apply3(Matrix2x2* mat, float[<2>] vec)
+fn Vc2 apply3(Matrix2x2* mat, Vc2 vec)
 {
-	float[<2>] a = $$swizzle2(mat.m0, mat.m1, 0, 3);
-	float[<2>] b = $$swizzle2(mat.m0, mat.m1, 1, 2);
-	float[<2>] flip = $$swizzle(vec, 1, 0);
+	Vc2 a = (Vc2)$$swizzle2(mat.m0, mat.m1, 0, 3);
+	Vc2 b = (Vc2)$$swizzle2(mat.m0, mat.m1, 1, 2);
+	Vc2 flip = (Vc2)$$swizzle(vec, 1, 0);
 	return a * vec + b * flip;
 }
 
@@ -49,9 +51,9 @@ fn float[<2>] apply3(Matrix2x2* mat, float[<2>] vec)
 fn void main()
 {
 	Matrix2x2 a = { 1, -3, 5, -7 };
-	io::printfn("1: %s", apply1(&a, (float[<2>]) { 11, 13 }));
-	io::printfn("2: %s", apply2(&a, (float[<2>]) { 11, 13 }));
-	io::printfn("3: %s", apply3(&a, (float[<2>]) { 11, 13 }));
+	io::printfn("1: %s", apply1(&a, (Vc2) { 11, 13 }));
+	io::printfn("2: %s", apply2(&a, (Vc2) { 11, 13 }));
+	io::printfn("3: %s", apply3(&a, (Vc2) { 11, 13 }));
 }
 
 /* #expect: test.ll
diff --git a/test/test_suite/cast/implicit_infer_len_cast.c3t b/test/test_suite/cast/implicit_infer_len_cast.c3t
index 39eee4cc6..9392224b3 100644
--- a/test/test_suite/cast/implicit_infer_len_cast.c3t
+++ b/test/test_suite/cast/implicit_infer_len_cast.c3t
@@ -6,6 +6,7 @@ macro int test(int[*][*]* y)
 	$typeof(*y) z = *y;
 	return z[1][1];
 }
+
 fn void main()
 {
 	int[2][*] x = { { 2, 3}, { 5, 6 }};
@@ -19,19 +20,19 @@ fn void main()
 /* #expect: test.ll
 
   %x = alloca [2 x [2 x i32]], align 16
-  %y = alloca [1 x <2 x i32>], align 8
-  %z = alloca [1 x <2 x i32>], align 8
-  %w = alloca [1 x <2 x i32>], align 8
+  %y = alloca [1 x <2 x i32>], align 4
+  %z = alloca [1 x <2 x i32>], align 4
+  %w = alloca [1 x <2 x i32>], align 4
   %aa = alloca %"int[<2>][]", align 8
-  %literal = alloca [1 x <2 x i32>], align 8
+  %literal = alloca [1 x <2 x i32>], align 4
   %bb = alloca [1 x %"int[]"], align 16
   %literal1 = alloca [2 x i32], align 4
   %z2 = alloca [2 x [2 x i32]], align 16
   call void @llvm.memcpy.p0.p0.i32(ptr align 16 %x, ptr align 16 @.__const, i32 16, i1 false)
-  call void @llvm.memcpy.p0.p0.i32(ptr align 8 %y, ptr align 8 @.__const.1, i32 8, i1 false)
-  call void @llvm.memcpy.p0.p0.i32(ptr align 8 %z, ptr align 8 %y, i32 8, i1 false)
-  call void @llvm.memcpy.p0.p0.i32(ptr align 8 %w, ptr align 8 %z, i32 8, i1 false)
-  call void @llvm.memcpy.p0.p0.i32(ptr align 8 %literal, ptr align 8 @.__const.2, i32 8, i1 false)
+  call void @llvm.memcpy.p0.p0.i32(ptr align 4 %y, ptr align 4 @.__const.1, i32 8, i1 false)
+  call void @llvm.memcpy.p0.p0.i32(ptr align 4 %z, ptr align 4 %y, i32 8, i1 false)
+  call void @llvm.memcpy.p0.p0.i32(ptr align 4 %w, ptr align 4 %z, i32 8, i1 false)
+  call void @llvm.memcpy.p0.p0.i32(ptr align 4 %literal, ptr align 4 @.__const.2, i32 8, i1 false)
   %0 = insertvalue %"int[<2>][]" undef, ptr %literal, 0
   %1 = insertvalue %"int[<2>][]" %0, i64 1, 1
   store %"int[<2>][]" %1, ptr %aa, align 8
diff --git a/test/test_suite/clang/2002-04.c3t b/test/test_suite/clang/2002-04.c3t
index 864a3264d..5fcf596c6 100644
--- a/test/test_suite/clang/2002-04.c3t
+++ b/test/test_suite/clang/2002-04.c3t
@@ -146,23 +146,19 @@ entry:
 ; Function Attrs:
 define void @test.test2(ptr byval(%FooSt) align 8 %0) #0 {
 entry:
-  %indirectarg = alloca %FooSt, align 8
-  %indirectarg4 = alloca %FooSt, align 8
   %ptradd = getelementptr inbounds i8, ptr %0, i64 2
   %ptradd1 = getelementptr inbounds i8, ptr %0, i64 4
   %ptradd2 = getelementptr inbounds i8, ptr %0, i64 8
   %ptradd3 = getelementptr inbounds i8, ptr %0, i64 12
   %1 = load i16, ptr %ptradd3, align 4
   %sext = sext i16 %1 to i32
-  %2 = load i8, ptr %0, align 4
+  %2 = load i8, ptr %0, align 8
   %3 = load i16, ptr %ptradd, align 2
   %4 = load i8, ptr %ptradd1, align 4
-  %5 = load i32, ptr %ptradd2, align 4
+  %5 = load i32, ptr %ptradd2, align 8
   %6 = call i32 @testE(i8 zeroext %2, i16 signext %3, i8 zeroext %4, i32 %5, i32 %sext, float 0x3FB99999A0000000)
-  call void @llvm.memcpy.p0.p0.i32(ptr align 8 %indirectarg, ptr align 4 %0, i32 20, i1 false)
-  %7 = call i32 @testF(ptr byval(%FooSt) align 8 %indirectarg, float 0x3FB99999A0000000)
-  call void @llvm.memcpy.p0.p0.i32(ptr align 8 %indirectarg4, ptr align 4 %0, i32 20, i1 false)
-  call void @test.test2(ptr byval(%FooSt) align 8 %indirectarg4)
+  %7 = call i32 @testF(ptr byval(%FooSt) align 8 %0, float 0x3FB99999A0000000)
+  call void @test.test2(ptr byval(%FooSt) align 8 %0)
   call void @test.test3(ptr %0)
   ret void
 }
diff --git a/test/test_suite/compile_time/compile_time_access_subscript.c3t b/test/test_suite/compile_time/compile_time_access_subscript.c3t
index 3171bf79b..e5e97dac6 100644
--- a/test/test_suite/compile_time/compile_time_access_subscript.c3t
+++ b/test/test_suite/compile_time/compile_time_access_subscript.c3t
@@ -71,7 +71,7 @@ fn void main()
   store ptr null, ptr %z6, align 8
   store i8 0, ptr %z7, align 1
   store i64 0, ptr %z8, align 8
-  store <3 x i32> zeroinitializer, ptr %z9, align 16
+  store <4 x i32> <i32 0, i32 0, i32 0, i32 undef>, ptr %z9, align 16
   store float 0.000000e+00, ptr %z10, align 4
   %ptradd11 = getelementptr inbounds i8, ptr %z10, i64 4
   store float 0.000000e+00, ptr %ptradd11, align 4
diff --git a/test/test_suite/compile_time/untyped_conversions.c3t b/test/test_suite/compile_time/untyped_conversions.c3t
index 050787316..37eefc4f9 100644
--- a/test/test_suite/compile_time/untyped_conversions.c3t
+++ b/test/test_suite/compile_time/untyped_conversions.c3t
@@ -5,7 +5,8 @@ struct Foo
 {
 	int a; int b;
 }
-fn void test(int[2] a, int[] b, int[<2>] c)
+typedef Int2V = int[<2>] @simd;
+fn void test(int[2] a, int[] b, Int2V c)
 {
 	io::printfn("%s %s %s", a, b, c);
 }
@@ -30,11 +31,12 @@ fn void main()
 %any = type { ptr, i64 }
 
 @"$ct.test.Foo" = linkonce global %.introspect { i8 10, i64 0, ptr null, i64 8, i64 0, i64 2, [0 x i64] zeroinitializer }, align 8
+@"$ct.test.Int2V" = linkonce global %.introspect { i8 18, i64 0, ptr null, i64 8, i64 ptrtoint (ptr @"$ct.v2$int" to i64), i64 0, [0 x i64] zeroinitializer }, align 8
+@"$ct.v2$int" = linkonce global %.introspect { i8 17, i64 0, ptr null, i64 8, i64 ptrtoint (ptr @"$ct.int" to i64), i64 2, [0 x i64] zeroinitializer }, align 8
+@"$ct.int" = linkonce global %.introspect { i8 2, i64 0, ptr null, i64 4, i64 0, i64 0, [0 x i64] zeroinitializer }, align 8
 @.str = private unnamed_addr constant [9 x i8] c"%s %s %s\00", align 1
 @"$ct.a2$int" = linkonce global %.introspect { i8 15, i64 0, ptr null, i64 8, i64 ptrtoint (ptr @"$ct.int" to i64), i64 2, [0 x i64] zeroinitializer }, align 8
-@"$ct.int" = linkonce global %.introspect { i8 2, i64 0, ptr null, i64 4, i64 0, i64 0, [0 x i64] zeroinitializer }, align 8
 @"$ct.sa$int" = linkonce global %.introspect { i8 16, i64 0, ptr null, i64 16, i64 ptrtoint (ptr @"$ct.int" to i64), i64 0, [0 x i64] zeroinitializer }, align 8
-@"$ct.v2$int" = linkonce global %.introspect { i8 17, i64 0, ptr null, i64 8, i64 ptrtoint (ptr @"$ct.int" to i64), i64 2, [0 x i64] zeroinitializer }, align 8
 @.__const = private unnamed_addr constant [1 x %Foo] [%Foo { i32 1, i32 2 }], align 4
 @.__const.1 = private unnamed_addr constant %Foo { i32 1, i32 2 }, align 4
 @.__const.2 = private unnamed_addr constant [1 x [2 x i32]] [[2 x i32] [i32 1, i32 2]], align 4
@@ -70,7 +72,7 @@ entry:
   %ptradd1 = getelementptr inbounds i8, ptr %varargslots, i64 16
   store %any %7, ptr %ptradd1, align 16
   %8 = insertvalue %any undef, ptr %c, 0
-  %9 = insertvalue %any %8, i64 ptrtoint (ptr @"$ct.v2$int" to i64), 1
+  %9 = insertvalue %any %8, i64 ptrtoint (ptr @"$ct.test.Int2V" to i64), 1
   %ptradd2 = getelementptr inbounds i8, ptr %varargslots, i64 32
   store %any %9, ptr %ptradd2, align 16
   %10 = call i64 @std.io.printfn(ptr %retparam, ptr @.str, i64 8, ptr %varargslots, i64 3)
diff --git a/test/test_suite/debug_symbols/defer_macro.c3t b/test/test_suite/debug_symbols/defer_macro.c3t
index 6313c2d73..6de671826 100644
--- a/test/test_suite/debug_symbols/defer_macro.c3t
+++ b/test/test_suite/debug_symbols/defer_macro.c3t
@@ -86,7 +86,7 @@ macro Id unique()
 }
 
 
-typedef Color = float[<4>];
+typedef Color = float[<4>] @simd;
 
 const Color BLACK = {0, 0, 0, 1};
 const Color WHITE = {1, 1, 1, 1};
diff --git a/test/test_suite/expressions/ternary_infer.c3t b/test/test_suite/expressions/ternary_infer.c3t
index f29b7dde6..44e85f4cb 100644
--- a/test/test_suite/expressions/ternary_infer.c3t
+++ b/test/test_suite/expressions/ternary_infer.c3t
@@ -1,14 +1,16 @@
 // #target: macos-x64
 module test;
-fn int[<2>] foo(int x)
+typedef Int2V = int[<2>] @simd;
+
+fn Int2V foo(int x)
 {
-  return x > 0 ? {0, 0} : {255, 255};
+	return x > 0 ? {0, 0} : {255, 255};
 }
 
 
 fn int main()
 {
-  return 0;
+	return 0;
 }
 /* #expect: test.ll
 
diff --git a/test/test_suite/functions/test_regression.c3t b/test/test_suite/functions/test_regression.c3t
index b37ba81ac..f0f66118f 100644
--- a/test/test_suite/functions/test_regression.c3t
+++ b/test/test_suite/functions/test_regression.c3t
@@ -292,7 +292,7 @@ entry:
   %c = alloca %Bobo, align 4
   %indirectarg = alloca %Bobo, align 8
   call void @llvm.memcpy.p0.p0.i32(ptr align 4 %de, ptr align 4 @.__const, i32 12, i1 false)
-  call void @llvm.memcpy.p0.p0.i32(ptr align 4 %c, ptr align 4 %1, i32 20, i1 false)
+  call void @llvm.memcpy.p0.p0.i32(ptr align 4 %c, ptr align 8 %1, i32 20, i1 false)
   call void @llvm.memcpy.p0.p0.i32(ptr align 8 %indirectarg, ptr align 4 %c, i32 20, i1 false)
   %2 = call i32 @test.helo(double 1.000000e+00, ptr byval(%Bobo) align 8 %indirectarg)
   ret i32 1
@@ -452,9 +452,9 @@ loop.exit:                                        ; preds = %loop.cond
   store i32 3, ptr %elements, align 4
   %11 = call i32 (ptr, ...) @printf(ptr @.str.4)
   call void @llvm.memset.p0.i64(ptr align 8 %array, i8 0, i64 40, i1 false)
-  call void @"std_collections_list$int$.List.push"(ptr %array, i32 100)
-  call void @"std_collections_list$int$.List.push"(ptr %array, i32 200)
-  call void @"std_collections_list$int$.List.push"(ptr %array, i32 400)
+  call void @"std_collections_list$int$.List.push"(ptr %array, i32 100) #3
+  call void @"std_collections_list$int$.List.push"(ptr %array, i32 200) #3
+  call void @"std_collections_list$int$.List.push"(ptr %array, i32 400) #3
   call void @"std_collections_list$int$.List.push"(ptr %array, i32 600) #3
   call void @"std_collections_list$int$.List.insert_at"(ptr %array, i64 2, i32 300)
   store i32 0, ptr %i1, align 4
diff --git a/test/test_suite/slices/slice_to_slice_vector_assign.c3t b/test/test_suite/slices/slice_to_slice_vector_assign.c3t
index a1e46acae..8d6227b84 100644
--- a/test/test_suite/slices/slice_to_slice_vector_assign.c3t
+++ b/test/test_suite/slices/slice_to_slice_vector_assign.c3t
@@ -41,8 +41,8 @@ entry:
   %retparam14 = alloca i64, align 8
   %varargslots16 = alloca [1 x %any], align 16
   %retparam17 = alloca i64, align 8
-  store <7 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, ptr %z, align 32
-  store <6 x i32> zeroinitializer, ptr %y, align 32
+  store <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef>, ptr %z, align 32
+  store <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 undef, i32 undef>, ptr %y, align 32
   %ptradd = getelementptr inbounds i8, ptr %z, i64 12
   %0 = insertvalue %"int[]" undef, ptr %ptradd, 0
   %1 = insertvalue %"int[]" %0, i64 3, 1
diff --git a/test/test_suite/statements/foreach_common.c3t b/test/test_suite/statements/foreach_common.c3t
index 66ec250d1..6a7b763e0 100644
--- a/test/test_suite/statements/foreach_common.c3t
+++ b/test/test_suite/statements/foreach_common.c3t
@@ -82,13 +82,13 @@ entry:
   %.anon65 = alloca i64, align 8
   %i69 = alloca i64, align 8
   %a70 = alloca float, align 4
-  %.anon74 = alloca i64, align 8
-  %i78 = alloca i8, align 1
-  %a80 = alloca double, align 8
-  %.anon85 = alloca i64, align 8
-  %a89 = alloca double, align 8
+  %.anon75 = alloca i64, align 8
+  %i79 = alloca i8, align 1
+  %a81 = alloca double, align 8
+  %.anon87 = alloca i64, align 8
+  %a91 = alloca double, align 8
   call void @llvm.memcpy.p0.p0.i32(ptr align 4 %foo, ptr align 4 @.__const, i32 12, i1 false)
-  store <3 x float> <float 2.000000e+00, float 4.500000e+00, float 8.000000e+00>, ptr %foo2, align 16
+  store <4 x float> <float 2.000000e+00, float 4.500000e+00, float 8.000000e+00, float undef>, ptr %foo2, align 16
   store i64 0, ptr %.anon, align 8
   br label %loop.cond
 
@@ -242,9 +242,10 @@ loop.cond48:                                      ; preds = %loop.body50, %loop.
   br i1 %gt49, label %loop.body50, label %loop.exit54
 
 loop.body50:                                      ; preds = %loop.cond48
-  %37 = load <3 x float>, ptr %foo2, align 16
+  %37 = load <4 x float>, ptr %foo2, align 16
+  %extractvec = shufflevector <4 x float> %37, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
   %38 = load i64, ptr %.anon47, align 8
-  %39 = extractelement <3 x float> %37, i64 %38
+  %39 = extractelement <3 x float> %extractvec, i64 %38
   store float %39, ptr %a51, align 4
   %40 = load float, ptr %a51, align 4
   %fpfpext52 = fpext float %40 to double
@@ -287,73 +288,76 @@ loop.exit64:                                      ; preds = %loop.cond56
 loop.cond66:                                      ; preds = %loop.body68, %loop.exit64
   %49 = load i64, ptr %.anon65, align 8
   %gt67 = icmp ugt i64 3, %49
-  br i1 %gt67, label %loop.body68, label %loop.exit73
+  br i1 %gt67, label %loop.body68, label %loop.exit74
 
 loop.body68:                                      ; preds = %loop.cond66
   %50 = load i64, ptr %.anon65, align 8
   store i64 %50, ptr %i69, align 8
-  %51 = load <3 x float>, ptr %foo2, align 16
+  %51 = load <4 x float>, ptr %foo2, align 16
+  %extractvec71 = shufflevector <4 x float> %51, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
   %52 = load i64, ptr %.anon65, align 8
-  %53 = extractelement <3 x float> %51, i64 %52
+  %53 = extractelement <3 x float> %extractvec71, i64 %52
   store float %53, ptr %a70, align 4
   %54 = load float, ptr %a70, align 4
-  %fpfpext71 = fpext float %54 to double
+  %fpfpext72 = fpext float %54 to double
   %55 = load i64, ptr %i69, align 8
-  call void (ptr, ...) @printf(ptr @.str.8, i64 %55, double %fpfpext71)
+  call void (ptr, ...) @printf(ptr @.str.8, i64 %55, double %fpfpext72)
   %56 = load i64, ptr %.anon65, align 8
-  %addnuw72 = add nuw i64 %56, 1
-  store i64 %addnuw72, ptr %.anon65, align 8
+  %addnuw73 = add nuw i64 %56, 1
+  store i64 %addnuw73, ptr %.anon65, align 8
   br label %loop.cond66
 
-loop.exit73:                                      ; preds = %loop.cond66
-  store i64 0, ptr %.anon74, align 8
-  br label %loop.cond75
+loop.exit74:                                      ; preds = %loop.cond66
+  store i64 0, ptr %.anon75, align 8
+  br label %loop.cond76
 
-loop.cond75:                                      ; preds = %loop.body77, %loop.exit73
-  %57 = load i64, ptr %.anon74, align 8
-  %gt76 = icmp ugt i64 3, %57
-  br i1 %gt76, label %loop.body77, label %loop.exit84
+loop.cond76:                                      ; preds = %loop.body78, %loop.exit74
+  %57 = load i64, ptr %.anon75, align 8
+  %gt77 = icmp ugt i64 3, %57
+  br i1 %gt77, label %loop.body78, label %loop.exit86
 
-loop.body77:                                      ; preds = %loop.cond75
-  %58 = load i64, ptr %.anon74, align 8
-  %trunc79 = trunc i64 %58 to i8
-  store i8 %trunc79, ptr %i78, align 1
-  %59 = load <3 x float>, ptr %foo2, align 16
-  %60 = load i64, ptr %.anon74, align 8
-  %61 = extractelement <3 x float> %59, i64 %60
-  %fpfpext81 = fpext float %61 to double
-  store double %fpfpext81, ptr %a80, align 8
-  %62 = load i8, ptr %i78, align 1
-  %zext82 = zext i8 %62 to i32
-  %63 = load double, ptr %a80, align 8
-  call void (ptr, ...) @printf(ptr @.str.9, i32 %zext82, double %63)
-  %64 = load i64, ptr %.anon74, align 8
-  %addnuw83 = add nuw i64 %64, 1
-  store i64 %addnuw83, ptr %.anon74, align 8
-  br label %loop.cond75
+loop.body78:                                      ; preds = %loop.cond76
+  %58 = load i64, ptr %.anon75, align 8
+  %trunc80 = trunc i64 %58 to i8
+  store i8 %trunc80, ptr %i79, align 1
+  %59 = load <4 x float>, ptr %foo2, align 16
+  %extractvec82 = shufflevector <4 x float> %59, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
+  %60 = load i64, ptr %.anon75, align 8
+  %61 = extractelement <3 x float> %extractvec82, i64 %60
+  %fpfpext83 = fpext float %61 to double
+  store double %fpfpext83, ptr %a81, align 8
+  %62 = load i8, ptr %i79, align 1
+  %zext84 = zext i8 %62 to i32
+  %63 = load double, ptr %a81, align 8
+  call void (ptr, ...) @printf(ptr @.str.9, i32 %zext84, double %63)
+  %64 = load i64, ptr %.anon75, align 8
+  %addnuw85 = add nuw i64 %64, 1
+  store i64 %addnuw85, ptr %.anon75, align 8
+  br label %loop.cond76
 
-loop.exit84:                                      ; preds = %loop.cond75
-  store i64 0, ptr %.anon85, align 8
-  br label %loop.cond86
+loop.exit86:                                      ; preds = %loop.cond76
+  store i64 0, ptr %.anon87, align 8
+  br label %loop.cond88
 
-loop.cond86:                                      ; preds = %loop.body88, %loop.exit84
-  %65 = load i64, ptr %.anon85, align 8
-  %gt87 = icmp ugt i64 3, %65
-  br i1 %gt87, label %loop.body88, label %loop.exit92
+loop.cond88:                                      ; preds = %loop.body90, %loop.exit86
+  %65 = load i64, ptr %.anon87, align 8
+  %gt89 = icmp ugt i64 3, %65
+  br i1 %gt89, label %loop.body90, label %loop.exit95
 
-loop.body88:                                      ; preds = %loop.cond86
-  %66 = load <3 x float>, ptr %foo2, align 16
-  %67 = load i64, ptr %.anon85, align 8
-  %68 = extractelement <3 x float> %66, i64 %67
-  %fpfpext90 = fpext float %68 to double
-  store double %fpfpext90, ptr %a89, align 8
-  %69 = load double, ptr %a89, align 8
+loop.body90:                                      ; preds = %loop.cond88
+  %66 = load <4 x float>, ptr %foo2, align 16
+  %extractvec92 = shufflevector <4 x float> %66, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
+  %67 = load i64, ptr %.anon87, align 8
+  %68 = extractelement <3 x float> %extractvec92, i64 %67
+  %fpfpext93 = fpext float %68 to double
+  store double %fpfpext93, ptr %a91, align 8
+  %69 = load double, ptr %a91, align 8
   call void (ptr, ...) @printf(ptr @.str.10, double %69)
-  %70 = load i64, ptr %.anon85, align 8
-  %addnuw91 = add nuw i64 %70, 1
-  store i64 %addnuw91, ptr %.anon85, align 8
-  br label %loop.cond86
+  %70 = load i64, ptr %.anon87, align 8
+  %addnuw94 = add nuw i64 %70, 1
+  store i64 %addnuw94, ptr %.anon87, align 8
+  br label %loop.cond88
 
-loop.exit92:                                      ; preds = %loop.cond86
+loop.exit95:                                      ; preds = %loop.cond88
   ret void
 }
diff --git a/test/test_suite/statements/foreach_r_common.c3t b/test/test_suite/statements/foreach_r_common.c3t
index 32e301478..b9b555c26 100644
--- a/test/test_suite/statements/foreach_r_common.c3t
+++ b/test/test_suite/statements/foreach_r_common.c3t
@@ -82,13 +82,13 @@ entry:
   %.anon65 = alloca i64, align 8
   %i70 = alloca i64, align 8
   %a71 = alloca float, align 4
-  %.anon74 = alloca i64, align 8
-  %i79 = alloca i8, align 1
-  %a81 = alloca double, align 8
-  %.anon85 = alloca i64, align 8
-  %a90 = alloca double, align 8
+  %.anon75 = alloca i64, align 8
+  %i80 = alloca i8, align 1
+  %a82 = alloca double, align 8
+  %.anon87 = alloca i64, align 8
+  %a92 = alloca double, align 8
   call void @llvm.memcpy.p0.p0.i32(ptr align 4 %foo, ptr align 4 @.__const, i32 12, i1 false)
-  store <3 x float> <float 2.000000e+00, float 4.500000e+00, float 8.000000e+00>, ptr %foo2, align 16
+  store <4 x float> <float 2.000000e+00, float 4.500000e+00, float 8.000000e+00, float undef>, ptr %foo2, align 16
   store i64 3, ptr %.anon, align 8
   br label %loop.cond
 
@@ -245,9 +245,10 @@ loop.body50:                                      ; preds = %loop.cond48
   %37 = load i64, ptr %.anon47, align 8
   %subnuw51 = sub nuw i64 %37, 1
   store i64 %subnuw51, ptr %.anon47, align 8
-  %38 = load <3 x float>, ptr %foo2, align 16
+  %38 = load <4 x float>, ptr %foo2, align 16
+  %extractvec = shufflevector <4 x float> %38, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
   %39 = load i64, ptr %.anon47, align 8
-  %40 = extractelement <3 x float> %38, i64 %39
+  %40 = extractelement <3 x float> %extractvec, i64 %39
   store float %40, ptr %a52, align 4
   %41 = load float, ptr %a52, align 4
   %fpfpext53 = fpext float %41 to double
@@ -287,7 +288,7 @@ loop.exit64:                                      ; preds = %loop.cond56
 loop.cond66:                                      ; preds = %loop.body68, %loop.exit64
   %49 = load i64, ptr %.anon65, align 8
   %gt67 = icmp ugt i64 %49, 0
-  br i1 %gt67, label %loop.body68, label %loop.exit73
+  br i1 %gt67, label %loop.body68, label %loop.exit74
 
 loop.body68:                                      ; preds = %loop.cond66
   %50 = load i64, ptr %.anon65, align 8
@@ -295,65 +296,68 @@ loop.body68:                                      ; preds = %loop.cond66
   store i64 %subnuw69, ptr %.anon65, align 8
   %51 = load i64, ptr %.anon65, align 8
   store i64 %51, ptr %i70, align 8
-  %52 = load <3 x float>, ptr %foo2, align 16
+  %52 = load <4 x float>, ptr %foo2, align 16
+  %extractvec72 = shufflevector <4 x float> %52, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
   %53 = load i64, ptr %.anon65, align 8
-  %54 = extractelement <3 x float> %52, i64 %53
+  %54 = extractelement <3 x float> %extractvec72, i64 %53
   store float %54, ptr %a71, align 4
   %55 = load float, ptr %a71, align 4
-  %fpfpext72 = fpext float %55 to double
+  %fpfpext73 = fpext float %55 to double
   %56 = load i64, ptr %i70, align 8
-  call void (ptr, ...) @printf(ptr @.str.8, i64 %56, double %fpfpext72)
+  call void (ptr, ...) @printf(ptr @.str.8, i64 %56, double %fpfpext73)
   br label %loop.cond66
 
-loop.exit73:                                      ; preds = %loop.cond66
-  store i64 3, ptr %.anon74, align 8
-  br label %loop.cond75
+loop.exit74:                                      ; preds = %loop.cond66
+  store i64 3, ptr %.anon75, align 8
+  br label %loop.cond76
 
-loop.cond75:                                      ; preds = %loop.body77, %loop.exit73
-  %57 = load i64, ptr %.anon74, align 8
-  %gt76 = icmp ugt i64 %57, 0
-  br i1 %gt76, label %loop.body77, label %loop.exit84
+loop.cond76:                                      ; preds = %loop.body78, %loop.exit74
+  %57 = load i64, ptr %.anon75, align 8
+  %gt77 = icmp ugt i64 %57, 0
+  br i1 %gt77, label %loop.body78, label %loop.exit86
 
-loop.body77:                                      ; preds = %loop.cond75
-  %58 = load i64, ptr %.anon74, align 8
-  %subnuw78 = sub nuw i64 %58, 1
-  store i64 %subnuw78, ptr %.anon74, align 8
-  %59 = load i64, ptr %.anon74, align 8
-  %trunc80 = trunc i64 %59 to i8
-  store i8 %trunc80, ptr %i79, align 1
-  %60 = load <3 x float>, ptr %foo2, align 16
-  %61 = load i64, ptr %.anon74, align 8
-  %62 = extractelement <3 x float> %60, i64 %61
-  %fpfpext82 = fpext float %62 to double
-  store double %fpfpext82, ptr %a81, align 8
-  %63 = load i8, ptr %i79, align 1
-  %zext83 = zext i8 %63 to i32
-  %64 = load double, ptr %a81, align 8
-  call void (ptr, ...) @printf(ptr @.str.9, i32 %zext83, double %64)
-  br label %loop.cond75
+loop.body78:                                      ; preds = %loop.cond76
+  %58 = load i64, ptr %.anon75, align 8
+  %subnuw79 = sub nuw i64 %58, 1
+  store i64 %subnuw79, ptr %.anon75, align 8
+  %59 = load i64, ptr %.anon75, align 8
+  %trunc81 = trunc i64 %59 to i8
+  store i8 %trunc81, ptr %i80, align 1
+  %60 = load <4 x float>, ptr %foo2, align 16
+  %extractvec83 = shufflevector <4 x float> %60, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
+  %61 = load i64, ptr %.anon75, align 8
+  %62 = extractelement <3 x float> %extractvec83, i64 %61
+  %fpfpext84 = fpext float %62 to double
+  store double %fpfpext84, ptr %a82, align 8
+  %63 = load i8, ptr %i80, align 1
+  %zext85 = zext i8 %63 to i32
+  %64 = load double, ptr %a82, align 8
+  call void (ptr, ...) @printf(ptr @.str.9, i32 %zext85, double %64)
+  br label %loop.cond76
 
-loop.exit84:                                      ; preds = %loop.cond75
-  store i64 3, ptr %.anon85, align 8
-  br label %loop.cond86
+loop.exit86:                                      ; preds = %loop.cond76
+  store i64 3, ptr %.anon87, align 8
+  br label %loop.cond88
 
-loop.cond86:                                      ; preds = %loop.body88, %loop.exit84
-  %65 = load i64, ptr %.anon85, align 8
-  %gt87 = icmp ugt i64 %65, 0
-  br i1 %gt87, label %loop.body88, label %loop.exit92
+loop.cond88:                                      ; preds = %loop.body90, %loop.exit86
+  %65 = load i64, ptr %.anon87, align 8
+  %gt89 = icmp ugt i64 %65, 0
+  br i1 %gt89, label %loop.body90, label %loop.exit95
 
-loop.body88:                                      ; preds = %loop.cond86
-  %66 = load i64, ptr %.anon85, align 8
-  %subnuw89 = sub nuw i64 %66, 1
-  store i64 %subnuw89, ptr %.anon85, align 8
-  %67 = load <3 x float>, ptr %foo2, align 16
-  %68 = load i64, ptr %.anon85, align 8
-  %69 = extractelement <3 x float> %67, i64 %68
-  %fpfpext91 = fpext float %69 to double
-  store double %fpfpext91, ptr %a90, align 8
-  %70 = load double, ptr %a90, align 8
+loop.body90:                                      ; preds = %loop.cond88
+  %66 = load i64, ptr %.anon87, align 8
+  %subnuw91 = sub nuw i64 %66, 1
+  store i64 %subnuw91, ptr %.anon87, align 8
+  %67 = load <4 x float>, ptr %foo2, align 16
+  %extractvec93 = shufflevector <4 x float> %67, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
+  %68 = load i64, ptr %.anon87, align 8
+  %69 = extractelement <3 x float> %extractvec93, i64 %68
+  %fpfpext94 = fpext float %69 to double
+  store double %fpfpext94, ptr %a92, align 8
+  %70 = load double, ptr %a92, align 8
   call void (ptr, ...) @printf(ptr @.str.10, double %70)
-  br label %loop.cond86
+  br label %loop.cond88
 
-loop.exit92:                                      ; preds = %loop.cond86
+loop.exit95:                                      ; preds = %loop.cond88
   ret void
 }
diff --git a/test/test_suite/union/designated_union_zeroing.c3t b/test/test_suite/union/designated_union_zeroing.c3t
index 1b3fdf78b..18ad570f4 100644
--- a/test/test_suite/union/designated_union_zeroing.c3t
+++ b/test/test_suite/union/designated_union_zeroing.c3t
@@ -1,13 +1,16 @@
 // #target: macos-x64
 module test;
-union Rect {
-  struct { float[<2>] min, max; }
+typedef Float2 = float[<2>] @simd;
+union Rect
+{
+	struct { Float2 min, max; }
 }
 
-fn Rect test_rect(float[<2>] max) {
-  Rect rect = {.max = max};
-  assert(rect.min == {});
-  return rect;
+fn Rect test_rect(Float2 max)
+{
+	Rect rect = { .max = max };
+	assert(rect.min == {});
+	return rect;
 }
 
 /* #expect: test.ll
diff --git a/test/test_suite/vector/vector_consts.c3t b/test/test_suite/vector/vector_consts.c3t
index 1a2c76710..9e6b1cd84 100644
--- a/test/test_suite/vector/vector_consts.c3t
+++ b/test/test_suite/vector/vector_consts.c3t
@@ -1,7 +1,8 @@
 // #target: macos-x64
 module foo;
 import std::math;
-fn int x(char[<8>] a, char[<8>] b)
+typedef Char8 = inline char[<8>] @simd;
+fn int x(Char8 a, Char8 b)
 {
 	bool[<8>] z = a.comp_eq(b);
 	return ((char[<8>]) { [0..7] = 255 } & (char[<8>])z + ~(char[<8>])z & (char[<8>]) { 0, 1, 2, 3, 4, 5, 6, 7 }).min();
diff --git a/test/test_suite/vector/vector_init_regression.c3t b/test/test_suite/vector/vector_init_regression.c3t
index ed616477d..5b8c66b86 100644
--- a/test/test_suite/vector/vector_init_regression.c3t
+++ b/test/test_suite/vector/vector_init_regression.c3t
@@ -95,24 +95,27 @@ entry:
   %b = alloca [4 x <4 x float>], align 16
   %.anon = alloca i64, align 8
   %v = alloca <4 x float>, align 16
-  %.anon90 = alloca i64, align 8
-  %v94 = alloca <4 x float>, align 16
+  %.anon92 = alloca i64, align 8
+  %v96 = alloca <4 x float>, align 16
   store float 0x3FE921CAC0000000, ptr %radians, align 4
-  store <3 x float> <float 0.000000e+00, float 0.000000e+00, float 1.000000e+00>, ptr %axis, align 16
+  store <4 x float> <float 0.000000e+00, float 0.000000e+00, float 1.000000e+00, float undef>, ptr %axis, align 16
   %0 = load float, ptr %radians, align 4
   %1 = call reassoc arcp contract float @llvm.cos.f32(float %0)
   store float %1, ptr %cosr, align 4
   %2 = load float, ptr %radians, align 4
   %3 = call reassoc arcp contract float @llvm.sin.f32(float %2)
   store float %3, ptr %sinr, align 4
-  %4 = load <3 x float>, ptr %axis, align 16
-  %5 = extractelement <3 x float> %4, i64 0
+  %4 = load <4 x float>, ptr %axis, align 16
+  %extractvec = shufflevector <4 x float> %4, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
+  %5 = extractelement <3 x float> %extractvec, i64 0
   store float %5, ptr %x, align 4
-  %6 = load <3 x float>, ptr %axis, align 16
-  %7 = extractelement <3 x float> %6, i64 1
+  %6 = load <4 x float>, ptr %axis, align 16
+  %extractvec1 = shufflevector <4 x float> %6, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
+  %7 = extractelement <3 x float> %extractvec1, i64 1
   store float %7, ptr %y, align 4
-  %8 = load <3 x float>, ptr %axis, align 16
-  %9 = extractelement <3 x float> %8, i64 2
+  %8 = load <4 x float>, ptr %axis, align 16
+  %extractvec2 = shufflevector <4 x float> %8, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
+  %9 = extractelement <3 x float> %extractvec2, i64 2
   store float %9, ptr %z, align 4
   call void @llvm.memset.p0.i64(ptr align 16 %a, i8 0, i64 64, i1 false)
   %10 = load float, ptr %cosr, align 4
@@ -127,224 +130,224 @@ entry:
   %15 = insertelement <4 x float> undef, float %14, i64 0
   %16 = load float, ptr %x, align 4
   %17 = load float, ptr %y, align 4
-  %fmul1 = fmul reassoc arcp contract float %16, %17
+  %fmul3 = fmul reassoc arcp contract float %16, %17
   %18 = load float, ptr %cosr, align 4
-  %fpfpext2 = fpext
-  %fsub3 = fsub reassoc arcp contract double 1.000000e+00, %fpfpext2
-  %fpfptrunc4 = fptrunc
+  %fpfpext4 = fpext
+  %fsub5 = fsub reassoc arcp contract double 1.000000e+00, %fpfpext4
+  %fpfptrunc6 = fptrunc
   %19 = load float, ptr %z, align 4
   %20 = load float, ptr %sinr, align 4
-  %fmul5 = fmul reassoc arcp contract float %19, %20
-  %21 = fneg reassoc arcp contract float %fmul5
-  %22 = call reassoc arcp contract float @llvm.fmuladd.f32(float %fmul1, float %fpfptrunc4, float %21)
+  %fmul7 = fmul reassoc arcp contract float %19, %20
+  %21 = fneg reassoc arcp contract float %fmul7
+  %22 = call reassoc arcp contract float @llvm.fmuladd.f32(float %fmul3, float %fpfptrunc6, float %21)
   %23 = insertelement <4 x float> %15, float %22, i64 1
   %24 = load float, ptr %x, align 4
   %25 = load float, ptr %z, align 4
-  %fmul6 = fmul reassoc arcp contract float %24, %25
+  %fmul8 = fmul reassoc arcp contract float %24, %25
   %26 = load float, ptr %cosr, align 4
-  %fpfpext7 = fpext
-  %fsub8 = fsub reassoc arcp contract double 1.000000e+00, %fpfpext7
-  %fpfptrunc9 = fptrunc
+  %fpfpext9 = fpext
+  %fsub10 = fsub reassoc arcp contract double 1.000000e+00, %fpfpext9
+  %fpfptrunc11 = fptrunc
   %27 = load float, ptr %y, align 4
   %28 = load float, ptr %sinr, align 4
-  %fmul10 = fmul reassoc arcp contract float %27, %28
-  %29 = call reassoc arcp contract float @llvm.fmuladd.f32(float %fmul6, float %fpfptrunc9, float %fmul10)
+  %fmul12 = fmul reassoc arcp contract float %27, %28
+  %29 = call reassoc arcp contract float @llvm.fmuladd.f32(float %fmul8, float %fpfptrunc11, float %fmul12)
   %30 = insertelement <4 x float> %23, float %29, i64 2
   %31 = insertelement <4 x float> %30, float 0.000000e+00, i64 3
   store <4 x float> %31, ptr %a, align 16
   %32 = load float, ptr %y, align 4
   %33 = load float, ptr %x, align 4
-  %fmul11 = fmul reassoc arcp contract float %32, %33
+  %fmul13 = fmul reassoc arcp contract float %32, %33
   %34 = load float, ptr %cosr, align 4
-  %fpfpext12 = fpext
-  %fsub13 = fsub reassoc arcp contract double 1.000000e+00, %fpfpext12
-  %fpfptrunc14 = fptrunc
+  %fpfpext14 = fpext
+  %fsub15 = fsub reassoc arcp contract double 1.000000e+00, %fpfpext14
+  %fpfptrunc16 = fptrunc
   %35 = load float, ptr %z, align 4
   %36 = load float, ptr %sinr, align 4
-  %fmul15 = fmul reassoc arcp contract float %35, %36
-  %37 = call reassoc arcp contract float @llvm.fmuladd.f32(float %fmul11, float %fpfptrunc14, float %fmul15)
+  %fmul17 = fmul reassoc arcp contract float %35, %36
+  %37 = call reassoc arcp contract float @llvm.fmuladd.f32(float %fmul13, float %fpfptrunc16, float %fmul17)
   %38 = insertelement <4 x float> undef, float %37, i64 0
   %39 = load float, ptr %cosr, align 4
   %40 = load float, ptr %y, align 4
   %41 = load float, ptr %y, align 4
-  %fmul16 = fmul reassoc arcp contract float %40, %41
+  %fmul18 = fmul reassoc arcp contract float %40, %41
   %42 = load float, ptr %cosr, align 4
-  %fpfpext17 = fpext
-  %fsub18 = fsub reassoc arcp contract double 1.000000e+00, %fpfpext17
-  %fpfptrunc19 = fptrunc
-  %43 = call reassoc arcp contract float @llvm.fmuladd.f32(float %fmul16, float %fpfptrunc19, float %39)
+  %fpfpext19 = fpext
+  %fsub20 = fsub reassoc arcp contract double 1.000000e+00, %fpfpext19
+  %fpfptrunc21 = fptrunc
+  %43 = call reassoc arcp contract float @llvm.fmuladd.f32(float %fmul18, float %fpfptrunc21, float %39)
   %44 = insertelement <4 x float> %38, float %43, i64 1
   %45 = load float, ptr %y, align 4
   %46 = load float, ptr %z, align 4
-  %fmul20 = fmul reassoc arcp contract float %45, %46
+  %fmul22 = fmul reassoc arcp contract float %45, %46
   %47 = load float, ptr %cosr, align 4
-  %fpfpext21 = fpext
-  %fsub22 = fsub reassoc arcp contract double 1.000000e+00, %fpfpext21
-  %fpfptrunc23 = fptrunc
+  %fpfpext23 = fpext
+  %fsub24 = fsub reassoc arcp contract double 1.000000e+00, %fpfpext23
+  %fpfptrunc25 = fptrunc
   %48 = load float, ptr %x, align 4
   %49 = load float, ptr %sinr, align 4
-  %fmul24 = fmul reassoc arcp contract float %48, %49
-  %50 = fneg reassoc arcp contract float %fmul24
-  %51 = call reassoc arcp contract float @llvm.fmuladd.f32(float %fmul20, float %fpfptrunc23, float %50)
+  %fmul26 = fmul reassoc arcp contract float %48, %49
+  %50 = fneg reassoc arcp contract float %fmul26
+  %51 = call reassoc arcp contract float @llvm.fmuladd.f32(float %fmul22, float %fpfptrunc25, float %50)
   %52 = insertelement <4 x float> %44, float %51, i64 2
   %53 = insertelement <4 x float> %52, float 0.000000e+00, i64 3
   %ptradd = getelementptr inbounds i8, ptr %a, i64 16
   store <4 x float> %53, ptr %ptradd, align 16
   %54 = load float, ptr %z, align 4
   %55 = load float, ptr %x, align 4
-  %fmul25 = fmul reassoc arcp contract float %54, %55
+  %fmul27 = fmul reassoc arcp contract float %54, %55
   %56 = load float, ptr %cosr, align 4
-  %fpfpext26 = fpext
-  %fsub27 = fsub reassoc arcp contract double 1.000000e+00, %fpfpext26
-  %fpfptrunc28 = fptrunc
+  %fpfpext28 = fpext
+  %fsub29 = fsub reassoc arcp contract double 1.000000e+00, %fpfpext28
+  %fpfptrunc30 = fptrunc
   %57 = load float, ptr %y, align 4
   %58 = load float, ptr %sinr, align 4
-  %fmul29 = fmul reassoc arcp contract float %57, %58
-  %59 = fneg reassoc arcp contract float %fmul29
-  %60 = call reassoc arcp contract float @llvm.fmuladd.f32(float %fmul25, float %fpfptrunc28, float %59)
+  %fmul31 = fmul reassoc arcp contract float %57, %58
+  %59 = fneg reassoc arcp contract float %fmul31
+  %60 = call reassoc arcp contract float @llvm.fmuladd.f32(float %fmul27, float %fpfptrunc30, float %59)
   %61 = insertelement <4 x float> undef, float %60, i64 0
   %62 = load float, ptr %z, align 4
   %63 = load float, ptr %y, align 4
-  %fmul30 = fmul reassoc arcp contract float %62, %63
+  %fmul32 = fmul reassoc arcp contract float %62, %63
   %64 = load float, ptr %cosr, align 4
-  %fpfpext31 = fpext
-  %fsub32 = fsub reassoc arcp contract double 1.000000e+00, %fpfpext31
-  %fpfptrunc33 = fptrunc
+  %fpfpext33 = fpext
+  %fsub34 = fsub reassoc arcp contract double 1.000000e+00, %fpfpext33
+  %fpfptrunc35 = fptrunc
   %65 = load float, ptr %x, align 4
   %66 = load float, ptr %sinr, align 4
-  %fmul34 = fmul reassoc arcp contract float %65, %66
-  %67 = call reassoc arcp contract float @llvm.fmuladd.f32(float %fmul30, float %fpfptrunc33, float %fmul34)
+  %fmul36 = fmul reassoc arcp contract float %65, %66
+  %67 = call reassoc arcp contract float @llvm.fmuladd.f32(float %fmul32, float %fpfptrunc35, float %fmul36)
   %68 = insertelement <4 x float> %61, float %67, i64 1
   %69 = load float, ptr %cosr, align 4
   %70 = load float, ptr %z, align 4
   %71 = load float, ptr %z, align 4
-  %fmul35 = fmul reassoc arcp contract float %70, %71
+  %fmul37 = fmul reassoc arcp contract float %70, %71
   %72 = load float, ptr %cosr, align 4
-  %fpfpext36 = fpext
-  %fsub37 = fsub reassoc arcp contract double 1.000000e+00, %fpfpext36
-  %fpfptrunc38 = fptrunc
-  %73 = call reassoc arcp contract float @llvm.fmuladd.f32(float %fmul35, float %fpfptrunc38, float %69)
+  %fpfpext38 = fpext
+  %fsub39 = fsub reassoc arcp contract double 1.000000e+00, %fpfpext38
+  %fpfptrunc40 = fptrunc
+  %73 = call reassoc arcp contract float @llvm.fmuladd.f32(float %fmul37, float %fpfptrunc40, float %69)
   %74 = insertelement <4 x float> %68, float %73, i64 2
   %75 = insertelement <4 x float> %74, float 0.000000e+00, i64 3
-  %ptradd39 = getelementptr inbounds i8, ptr %a, i64 32
-  store <4 x float> %75, ptr %ptradd39, align 16
-  %ptradd40 = getelementptr inbounds i8, ptr %a, i64 48
-  store <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00>, ptr %ptradd40, align 16
+  %ptradd41 = getelementptr inbounds i8, ptr %a, i64 32
+  store <4 x float> %75, ptr %ptradd41, align 16
+  %ptradd42 = getelementptr inbounds i8, ptr %a, i64 48
+  store <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00>, ptr %ptradd42, align 16
   %76 = load float, ptr %cosr, align 4
   %77 = load float, ptr %x, align 4
   %78 = load float, ptr %x, align 4
-  %fmul41 = fmul reassoc arcp contract float %77, %78
+  %fmul43 = fmul reassoc arcp contract float %77, %78
   %79 = load float, ptr %cosr, align 4
-  %fpfpext42 = fpext
-  %fsub43 = fsub reassoc arcp contract double 1.000000e+00, %fpfpext42
-  %fpfptrunc44 = fptrunc
-  %80 = call reassoc arcp contract float @llvm.fmuladd.f32(float %fmul41, float %fpfptrunc44, float %76)
+  %fpfpext44 = fpext
+  %fsub45 = fsub reassoc arcp contract double 1.000000e+00, %fpfpext44
+  %fpfptrunc46 = fptrunc
+  %80 = call reassoc arcp contract float @llvm.fmuladd.f32(float %fmul43, float %fpfptrunc46, float %76)
   %81 = insertelement <4 x float> undef, float %80, i64 0
   %82 = load float, ptr %x, align 4
   %83 = load float, ptr %y, align 4
-  %fmul45 = fmul reassoc arcp contract float %82, %83
+  %fmul47 = fmul reassoc arcp contract float %82, %83
   %84 = load float, ptr %cosr, align 4
-  %fpfpext46 = fpext
-  %fsub47 = fsub reassoc arcp contract double 1.000000e+00, %fpfpext46
-  %fpfptrunc48 = fptrunc
+  %fpfpext48 = fpext
+  %fsub49 = fsub reassoc arcp contract double 1.000000e+00, %fpfpext48
+  %fpfptrunc50 = fptrunc
   %85 = load float, ptr %z, align 4
   %86 = load float, ptr %sinr, align 4
-  %fmul49 = fmul reassoc arcp contract float %85, %86
-  %87 = fneg reassoc arcp contract float %fmul49
-  %88 = call reassoc arcp contract float @llvm.fmuladd.f32(float %fmul45, float %fpfptrunc48, float %87)
+  %fmul51 = fmul reassoc arcp contract float %85, %86
+  %87 = fneg reassoc arcp contract float %fmul51
+  %88 = call reassoc arcp contract float @llvm.fmuladd.f32(float %fmul47, float %fpfptrunc50, float %87)
   %89 = insertelement <4 x float> %81, float %88, i64 1
   %90 = load float, ptr %x, align 4
   %91 = load float, ptr %z, align 4
-  %fmul50 = fmul reassoc arcp contract float %90, %91
+  %fmul52 = fmul reassoc arcp contract float %90, %91
   %92 = load float, ptr %cosr, align 4
-  %fpfpext51 = fpext
-  %fsub52 = fsub reassoc arcp contract double 1.000000e+00, %fpfpext51
-  %fpfptrunc53 = fptrunc
+  %fpfpext53 = fpext
+  %fsub54 = fsub reassoc arcp contract double 1.000000e+00, %fpfpext53
+  %fpfptrunc55 = fptrunc
   %93 = load float, ptr %y, align 4
   %94 = load float, ptr %sinr, align 4
-  %fmul54 = fmul reassoc arcp contract float %93, %94
-  %95 = call reassoc arcp contract float @llvm.fmuladd.f32(float %fmul50, float %fpfptrunc53, float %fmul54)
+  %fmul56 = fmul reassoc arcp contract float %93, %94
+  %95 = call reassoc arcp contract float @llvm.fmuladd.f32(float %fmul52, float %fpfptrunc55, float %fmul56)
   %96 = insertelement <4 x float> %89, float %95, i64 2
   %97 = insertelement <4 x float> %96, float 0.000000e+00, i64 3
   store <4 x float> %97, ptr %b, align 16
-  %ptradd55 = getelementptr inbounds i8, ptr %b, i64 16
+  %ptradd57 = getelementptr inbounds i8, ptr %b, i64 16
   %98 = load float, ptr %y, align 4
   %99 = load float, ptr %x, align 4
-  %fmul56 = fmul reassoc arcp contract float %98, %99
+  %fmul58 = fmul reassoc arcp contract float %98, %99
   %100 = load float, ptr %cosr, align 4
-  %fpfpext57 = fpext
-  %fsub58 = fsub reassoc arcp contract double 1.000000e+00, %fpfpext57
-  %fpfptrunc59 = fptrunc
+  %fpfpext59 = fpext
+  %fsub60 = fsub reassoc arcp contract double 1.000000e+00, %fpfpext59
+  %fpfptrunc61 = fptrunc
   %101 = load float, ptr %z, align 4
   %102 = load float, ptr %sinr, align 4
-  %fmul60 = fmul reassoc arcp contract float %101, %102
-  %103 = call reassoc arcp contract float @llvm.fmuladd.f32(float %fmul56, float %fpfptrunc59, float %fmul60)
+  %fmul62 = fmul reassoc arcp contract float %101, %102
+  %103 = call reassoc arcp contract float @llvm.fmuladd.f32(float %fmul58, float %fpfptrunc61, float %fmul62)
   %104 = insertelement <4 x float> undef, float %103, i64 0
   %105 = load float, ptr %cosr, align 4
   %106 = load float, ptr %y, align 4
   %107 = load float, ptr %y, align 4
-  %fmul61 = fmul reassoc arcp contract float %106, %107
+  %fmul63 = fmul reassoc arcp contract float %106, %107
   %108 = load float, ptr %cosr, align 4
-  %fpfpext62 = fpext
-  %fsub63 = fsub reassoc arcp contract double 1.000000e+00, %fpfpext62
-  %fpfptrunc64 = fptrunc
-  %109 = call reassoc arcp contract float @llvm.fmuladd.f32(float %fmul61, float %fpfptrunc64, float %105)
+  %fpfpext64 = fpext
+  %fsub65 = fsub reassoc arcp contract double 1.000000e+00, %fpfpext64
+  %fpfptrunc66 = fptrunc
+  %109 = call reassoc arcp contract float @llvm.fmuladd.f32(float %fmul63, float %fpfptrunc66, float %105)
   %110 = insertelement <4 x float> %104, float %109, i64 1
   %111 = load float, ptr %y, align 4
   %112 = load float, ptr %z, align 4
-  %fmul65 = fmul reassoc arcp contract float %111, %112
+  %fmul67 = fmul reassoc arcp contract float %111, %112
   %113 = load float, ptr %cosr, align 4
-  %fpfpext66 = fpext
-  %fsub67 = fsub reassoc arcp contract double 1.000000e+00, %fpfpext66
-  %fpfptrunc68 = fptrunc
+  %fpfpext68 = fpext
+  %fsub69 = fsub reassoc arcp contract double 1.000000e+00, %fpfpext68
+  %fpfptrunc70 = fptrunc
   %114 = load float, ptr %x, align 4
   %115 = load float, ptr %sinr, align 4
-  %fmul69 = fmul reassoc arcp contract float %114, %115
-  %116 = fneg reassoc arcp contract float %fmul69
-  %117 = call reassoc arcp contract float @llvm.fmuladd.f32(float %fmul65, float %fpfptrunc68, float %116)
+  %fmul71 = fmul reassoc arcp contract float %114, %115
+  %116 = fneg reassoc arcp contract float %fmul71
+  %117 = call reassoc arcp contract float @llvm.fmuladd.f32(float %fmul67, float %fpfptrunc70, float %116)
   %118 = insertelement <4 x float> %110, float %117, i64 2
   %119 = insertelement <4 x float> %118, float 0.000000e+00, i64 3
-  store <4 x float> %119, ptr %ptradd55, align 16
-  %ptradd70 = getelementptr inbounds i8, ptr %b, i64 32
+  store <4 x float> %119, ptr %ptradd57, align 16
+  %ptradd72 = getelementptr inbounds i8, ptr %b, i64 32
   %120 = load float, ptr %z, align 4
   %121 = load float, ptr %x, align 4
-  %fmul71 = fmul reassoc arcp contract float %120, %121
+  %fmul73 = fmul reassoc arcp contract float %120, %121
   %122 = load float, ptr %cosr, align 4
-  %fpfpext72 = fpext
-  %fsub73 = fsub reassoc arcp contract double 1.000000e+00, %fpfpext72
-  %fpfptrunc74 = fptrunc
+  %fpfpext74 = fpext
+  %fsub75 = fsub reassoc arcp contract double 1.000000e+00, %fpfpext74
+  %fpfptrunc76 = fptrunc
   %123 = load float, ptr %y, align 4
   %124 = load float, ptr %sinr, align 4
-  %fmul75 = fmul reassoc arcp contract float %123, %124
-  %125 = fneg reassoc arcp contract float %fmul75
-  %126 = call reassoc arcp contract float @llvm.fmuladd.f32(float %fmul71, float %fpfptrunc74, float %125)
+  %fmul77 = fmul reassoc arcp contract float %123, %124
+  %125 = fneg reassoc arcp contract float %fmul77
+  %126 = call reassoc arcp contract float @llvm.fmuladd.f32(float %fmul73, float %fpfptrunc76, float %125)
   %127 = insertelement <4 x float> undef, float %126, i64 0
   %128 = load float, ptr %z, align 4
   %129 = load float, ptr %y, align 4
-  %fmul76 = fmul reassoc arcp contract float %128, %129
+  %fmul78 = fmul reassoc arcp contract float %128, %129
   %130 = load float, ptr %cosr, align 4
-  %fpfpext77 = fpext
-  %fsub78 = fsub reassoc arcp contract double 1.000000e+00, %fpfpext77
-  %fpfptrunc79 = fptrunc
+  %fpfpext79 = fpext
+  %fsub80 = fsub reassoc arcp contract double 1.000000e+00, %fpfpext79
+  %fpfptrunc81 = fptrunc
   %131 = load float, ptr %x, align 4
   %132 = load float, ptr %sinr, align 4
-  %fmul80 = fmul reassoc arcp contract float %131, %132
-  %133 = call reassoc arcp contract float @llvm.fmuladd.f32(float %fmul76, float %fpfptrunc79, float %fmul80)
+  %fmul82 = fmul reassoc arcp contract float %131, %132
+  %133 = call reassoc arcp contract float @llvm.fmuladd.f32(float %fmul78, float %fpfptrunc81, float %fmul82)
   %134 = insertelement <4 x float> %127, float %133, i64 1
   %135 = load float, ptr %cosr, align 4
   %136 = load float, ptr %z, align 4
   %137 = load float, ptr %z, align 4
-  %fmul81 = fmul reassoc arcp contract float %136, %137
+  %fmul83 = fmul reassoc arcp contract float %136, %137
   %138 = load float, ptr %cosr, align 4
-  %fpfpext82 = fpext
-  %fsub83 = fsub reassoc arcp contract double 1.000000e+00, %fpfpext82
-  %fpfptrunc84 = fptrunc
-  %139 = call reassoc arcp contract float @llvm.fmuladd.f32(float %fmul81, float %fpfptrunc84, float %135)
+  %fpfpext84 = fpext
+  %fsub85 = fsub reassoc arcp contract double 1.000000e+00, %fpfpext84
+  %fpfptrunc86 = fptrunc
+  %139 = call reassoc arcp contract float @llvm.fmuladd.f32(float %fmul83, float %fpfptrunc86, float %135)
   %140 = insertelement <4 x float> %134, float %139, i64 2
   %141 = insertelement <4 x float> %140, float 0.000000e+00, i64 3
-  store <4 x float> %141, ptr %ptradd70, align 16
-  %ptradd85 = getelementptr inbounds i8, ptr %b, i64 48
-  store <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00>, ptr %ptradd85, align 16
+  store <4 x float> %141, ptr %ptradd72, align 16
+  %ptradd87 = getelementptr inbounds i8, ptr %b, i64 48
+  store <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00>, ptr %ptradd87, align 16
   store i64 0, ptr %.anon, align 8
   br label %loop.cond
 
@@ -360,17 +363,17 @@ loop.body:                                        ; preds = %loop.cond
   store <4 x float> %144, ptr %v, align 16
   %145 = load <4 x float>, ptr %v, align 16
   %146 = extractelement <4 x float> %145, i64 0
-  %fpfpext86 = fpext
+  %fpfpext88 = fpext
   %147 = load <4 x float>, ptr %v, align 16
   %148 = extractelement <4 x float> %147, i64 1
-  %fpfpext87 = fpext
+  %fpfpext89 = fpext
   %149 = load <4 x float>, ptr %v, align 16
   %150 = extractelement <4 x float> %149, i64 2
-  %fpfpext88 = fpext
+  %fpfpext90 = fpext
   %151 = load <4 x float>, ptr %v, align 16
   %152 = extractelement <4 x float> %151, i64 3
-  %fpfpext89 = fpext
-  %153 = call i32 (ptr, ...) @printf(ptr @.str, double %fpfpext86, double %fpfpext87, double %fpfpext88, double %fpfpext89)
+  %fpfpext91 = fpext
+  %153 = call i32 (ptr, ...) @printf(ptr @.str, double %fpfpext88, double %fpfpext89, double %fpfpext90, double %fpfpext91)
   %154 = load i64, ptr %.anon, align 8
   %addnuw = add nuw i64 %154, 1
   store i64 %addnuw, ptr %.anon, align 8
@@ -378,37 +381,37 @@ loop.body:                                        ; preds = %loop.cond
 
 loop.exit:                                        ; preds = %loop.cond
   %155 = call i32 (ptr, ...) @printf(ptr @.str.1)
-  store i64 0, ptr %.anon90, align 8
-  br label %loop.cond91
+  store i64 0, ptr %.anon92, align 8
+  br label %loop.cond93
 
-loop.cond91:                                      ; preds = %loop.body93, %loop.exit
-  %156 = load i64, ptr %.anon90, align 8
-  %gt92 = icmp ugt i64 4, %156
-  br i1 %gt92, label %loop.body93, label %loop.exit101
+loop.cond93:                                      ; preds = %loop.body95, %loop.exit
+  %156 = load i64, ptr %.anon92, align 8
+  %gt94 = icmp ugt i64 4, %156
+  br i1 %gt94, label %loop.body95, label %loop.exit103
 
-loop.body93:                                      ; preds = %loop.cond91
-  %157 = load i64, ptr %.anon90, align 8
-  %ptroffset95 = getelementptr inbounds [16 x i8], ptr %b, i64 %157
-  %158 = load <4 x float>, ptr %ptroffset95, align 16
-  store <4 x float> %158, ptr %v94, align 16
-  %159 = load <4 x float>, ptr %v94, align 16
+loop.body95:                                      ; preds = %loop.cond93
+  %157 = load i64, ptr %.anon92, align 8
+  %ptroffset97 = getelementptr inbounds [16 x i8], ptr %b, i64 %157
+  %158 = load <4 x float>, ptr %ptroffset97, align 16
+  store <4 x float> %158, ptr %v96, align 16
+  %159 = load <4 x float>, ptr %v96, align 16
   %160 = extractelement <4 x float> %159, i64 0
-  %fpfpext96 = fpext
-  %161 = load <4 x float>, ptr %v94, align 16
-  %162 = extractelement <4 x float> %161, i64 1
-  %fpfpext97 = fpext
-  %163 = load <4 x float>, ptr %v94, align 16
-  %164 = extractelement <4 x float> %163, i64 2
   %fpfpext98 = fpext
-  %165 = load <4 x float>, ptr %v94, align 16
-  %166 = extractelement <4 x float> %165, i64 3
+  %161 = load <4 x float>, ptr %v96, align 16
+  %162 = extractelement <4 x float> %161, i64 1
   %fpfpext99 = fpext
-  %167 = call i32 (ptr, ...) @printf(ptr @.str.2, double %fpfpext96, double %fpfpext97, double %fpfpext98, double %fpfpext99)
-  %168 = load i64, ptr %.anon90, align 8
-  %addnuw100 = add nuw i64 %168, 1
-  store i64 %addnuw100, ptr %.anon90, align 8
-  br label %loop.cond91
+  %163 = load <4 x float>, ptr %v96, align 16
+  %164 = extractelement <4 x float> %163, i64 2
+  %fpfpext100 = fpext
+  %165 = load <4 x float>, ptr %v96, align 16
+  %166 = extractelement <4 x float> %165, i64 3
+  %fpfpext101 = fpext
+  %167 = call i32 (ptr, ...) @printf(ptr @.str.2, double %fpfpext98, double %fpfpext99, double %fpfpext100, double %fpfpext101)
+  %168 = load i64, ptr %.anon92, align 8
+  %addnuw102 = add nuw i64 %168, 1
+  store i64 %addnuw102, ptr %.anon92, align 8
+  br label %loop.cond93
 
-loop.exit101:                                     ; preds = %loop.cond91
+loop.exit103:                                     ; preds = %loop.cond93
   ret void
 }
\ No newline at end of file
diff --git a/test/test_suite/vector/vector_param.c3t b/test/test_suite/vector/vector_param.c3t
index 2cb143223..bb281ccfa 100644
--- a/test/test_suite/vector/vector_param.c3t
+++ b/test/test_suite/vector/vector_param.c3t
@@ -1,7 +1,9 @@
 // #target: macos-x64
 module test;
 
-fn void test(int[<4>] x)
+typedef Int4V = int[<4>] @simd;
+
+fn void test(Int4V x)
 {
 	x[1] = 123;
 	int y = x[1];
diff --git a/test/unit/stdlib/math/matrix.c3 b/test/unit/stdlib/math/matrix.c3
index 6b817e294..3bc4ed771 100644
--- a/test/unit/stdlib/math/matrix.c3
+++ b/test/unit/stdlib/math/matrix.c3
@@ -1,92 +1,91 @@
 module math_matrix @test;
 import std::math;
 
-fn void test_mat4()
+fn void test_mat4_translate()
 {
-	{
-		Matrix4 mat = MATRIX4_IDENTITY;
-		Matrix4 mat2 = { 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1 };
-		Matrix4 calc = mat.mul(mat2);
-		assert(calc.m == mat.m);
-		assert(mat * mat2 == mat);
+	Matrix4 mat = MATRIX4_IDENTITY;
+	Matrix4 mat2 = { 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1 };
+	Matrix4 calc = mat.mul(mat2);
+	assert(calc.m == mat.m);
+	assert(mat * mat2 == mat);
 
-		Matrix4 translated = mat.translate({0.0, 0.0, 0.0});
-		assert(translated.m == mat.m);
-	};
-
-	{
-		Matrix4 mat = { 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8 };
-		Matrix4 mat2 = { 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1 };
-		Matrix4 calc = mat.mul(mat2);
-		Matrix4 value = { 56, 46, 36, 26, 152, 126, 100, 74, 56, 46, 36, 26, 152, 126, 100, 74 };
-		assert(calc.m == value.m);
-		assert(mat * mat2 == value);
-	};
-
-	{
-		Matrix4 result = {
-			0.988936, 0.000000, -0.148340, -0.988936, 
-			-0.014599, 0.995146, -0.097325, -2.970838, 
-			0.147620, 0.098414, 0.984136, -20.765262, 
-			0.000000, 0.000000, 0.000000, 1.000000
-		};
-
-		Matrix4f result_f = {
-			0.988936, 0.000000, -0.148340, -0.988936, 
-			-0.014599, 0.995146, -0.097325, -2.970838, 
-			0.147620, 0.098414, 0.984136, -20.765262, 
-			0.000000, 0.000000, 0.000000, 1.000000
-		};
-
-		Matrix4 result_transposed = {
-			0.988936, -0.014599, 0.147620, 0.000000, 
-			0.000000, 0.995146, 0.098414, 0.000000, 
-			-0.148340, -0.097325, 0.984136, 0.000000, 
-			-0.988936, -2.970838, -20.765262, 1.000000
-		};
-
-		Matrix4f result_transposed_f = {
-			0.988936, -0.014599, 0.147620, 0.000000, 
-			0.000000, 0.995146, 0.098414, 0.000000, 
-			-0.148340, -0.097325, 0.984136, 0.000000, 
-			-0.988936, -2.970838, -20.765262, 1.000000
-		};
-
-		Matrix4 look_at = matrix::look_at{double}({4.0, 5.0, 20.0}, {1.0, 3.0, 0.0}, {0.0, 1.0, 0.0});
-		Matrix4f look_at_f = matrix::look_at{float}({4.0, 5.0, 20.0}, {1.0, 3.0, 0.0}, {0.0, 1.0, 0.0});
-
-		assert(math::round_to_decimals((double[<16>])look_at.m, 4) == math::round_to_decimals((double[<16>])result.m, 4));
-		assert(math::round_to_decimals((float[<16>])look_at_f.m, 4) == math::round_to_decimals((float[<16>])result_f.m, 4));
-
-		assert(math::round_to_decimals((double[<16>])result_transposed.m, 4) == math::round_to_decimals((double[<16>])look_at.transpose().m, 4));
-		assert(math::round_to_decimals((float[<16>])result_transposed_f.m, 4) == math::round_to_decimals((float[<16>])look_at_f.transpose().m, 4));
-	};
-
-	{
-		Matrix4 result = {
-			1.857087, 0.000000, 0.000000, 
-			0.000000, 0.000000, 2.414214, 
-			0.000000, 0.000000, 0.000000, 0.000000, 
-			-1.000200, -0.200020, 0.000000, 0.000000, 
-			-1.000000, 0.000000
-		};
-
-		Matrix4f result_f = {
-			1.857087, 0.000000, 0.000000, 
-			0.000000, 0.000000, 2.414214, 
-			0.000000, 0.000000, 0.000000, 0.000000, 
-			-1.000200, -0.200020, 0.000000, 0.000000, 
-			-1.000000, 0.000000
-		};
-
-		Matrix4 perspective = matrix4_perspective(math::deg_to_rad(45), 1.3, 0.1, 1000);
-		Matrix4f perspective_f = matrix4f_perspective((float)math::deg_to_rad(45), 1.3, 0.1, 1000);
-
-		assert(math::round_to_decimals((double[<16>])result.m, 4) == math::round_to_decimals((double[<16>])perspective.m, 4));
-		assert(math::round_to_decimals((float[<16>])result_f.m, 4) == math::round_to_decimals((float[<16>])perspective_f.m, 4));
-	};
+	Matrix4 translated = mat.translate({0.0, 0.0, 0.0});
+	assert(translated.m == mat.m);
 }
 
+fn void test_mat4_mul()
+{
+	Matrix4 mat = { 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8 };
+	Matrix4 mat2 = { 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1 };
+	Matrix4 calc = mat.mul(mat2);
+	Matrix4 value = { 56, 46, 36, 26, 152, 126, 100, 74, 56, 46, 36, 26, 152, 126, 100, 74 };
+	assert(calc.m == value.m);
+	assert(mat * mat2 == value);
+}
+fn void test_mat4_lookat()
+{
+	Matrix4 result = {
+		0.988936, 0.000000, -0.148340, -0.988936,
+		-0.014599, 0.995146, -0.097325, -2.970838,
+		0.147620, 0.098414, 0.984136, -20.765262,
+		0.000000, 0.000000, 0.000000, 1.000000
+	};
+
+	Matrix4f result_f = {
+		0.988936, 0.000000, -0.148340, -0.988936,
+		-0.014599, 0.995146, -0.097325, -2.970838,
+		0.147620, 0.098414, 0.984136, -20.765262,
+		0.000000, 0.000000, 0.000000, 1.000000
+	};
+
+	Matrix4 result_transposed = {
+		0.988936, -0.014599, 0.147620, 0.000000,
+		0.000000, 0.995146, 0.098414, 0.000000,
+		-0.148340, -0.097325, 0.984136, 0.000000,
+		-0.988936, -2.970838, -20.765262, 1.000000
+	};
+
+	Matrix4f result_transposed_f = {
+		0.988936, -0.014599, 0.147620, 0.000000,
+		0.000000, 0.995146, 0.098414, 0.000000,
+		-0.148340, -0.097325, 0.984136, 0.000000,
+		-0.988936, -2.970838, -20.765262, 1.000000
+	};
+
+	Matrix4 look_at = matrix::look_at{double}({4.0, 5.0, 20.0}, {1.0, 3.0, 0.0}, {0.0, 1.0, 0.0});
+	Matrix4f look_at_f = matrix::look_at{float}({4.0, 5.0, 20.0}, {1.0, 3.0, 0.0}, {0.0, 1.0, 0.0});
+
+	assert(math::round_to_decimals((double[<16>])look_at.m, 4) == math::round_to_decimals((double[<16>])result.m, 4));
+	assert(math::round_to_decimals((float[<16>])look_at_f.m, 4) == math::round_to_decimals((float[<16>])result_f.m, 4));
+
+	assert(math::round_to_decimals((double[<16>])result_transposed.m, 4) == math::round_to_decimals((double[<16>])look_at.transpose().m, 4));
+	assert(math::round_to_decimals((float[<16>])result_transposed_f.m, 4) == math::round_to_decimals((float[<16>])look_at_f.transpose().m, 4));
+}
+
+fn void test_mat4_perspective()
+{
+	Matrix4 result = {
+		1.857087, 0.000000, 0.000000,
+		0.000000, 0.000000, 2.414214,
+		0.000000, 0.000000, 0.000000, 0.000000,
+		-1.000200, -0.200020, 0.000000, 0.000000,
+		-1.000000, 0.000000
+	};
+
+	Matrix4f result_f = {
+		1.857087, 0.000000, 0.000000,
+		0.000000, 0.000000, 2.414214,
+		0.000000, 0.000000, 0.000000, 0.000000,
+		-1.000200, -0.200020, 0.000000, 0.000000,
+		-1.000000, 0.000000
+	};
+
+	Matrix4 perspective = matrix4_perspective(math::deg_to_rad(45), 1.3, 0.1, 1000);
+	Matrix4f perspective_f = matrix4f_perspective((float)math::deg_to_rad(45), 1.3, 0.1, 1000);
+
+	assert(math::round_to_decimals((double[<16>])result.m, 4) == math::round_to_decimals((double[<16>])perspective.m, 4));
+	assert(math::round_to_decimals((float[<16>])result_f.m, 4) == math::round_to_decimals((float[<16>])perspective_f.m, 4));
+}
 
 fn void test_mat3()
 {