diff --git a/src/compiler/asm/x86.h b/src/compiler/asm/x86.h index a98201581..c1255d130 100644 --- a/src/compiler/asm/x86.h +++ b/src/compiler/asm/x86.h @@ -35,38 +35,6 @@ typedef enum X86_XMM13, X86_XMM14, X86_XMM15, - X86_YMM0, - X86_YMM1, - X86_YMM2, - X86_YMM3, - X86_YMM4, - X86_YMM5, - X86_YMM6, - X86_YMM7, - X86_YMM8, - X86_YMM9, - X86_YMM10, - X86_YMM11, - X86_YMM12, - X86_YMM13, - X86_YMM14, - X86_YMM15, - X86_ZMM0, - X86_ZMM1, - X86_ZMM2, - X86_ZMM3, - X86_ZMM4, - X86_ZMM5, - X86_ZMM6, - X86_ZMM7, - X86_ZMM8, - X86_ZMM9, - X86_ZMM10, - X86_ZMM11, - X86_ZMM12, - X86_ZMM13, - X86_ZMM14, - X86_ZMM15, X86_K0, X86_K1, X86_K2, @@ -136,38 +104,6 @@ static const char *X86ClobberNames[] = { [X86_XMM13] = "xmm13", [X86_XMM14] = "xmm14", [X86_XMM15] = "xmm15", - [X86_YMM0] = "ymm0", - [X86_YMM1] = "ymm1", - [X86_YMM2] = "ymm2", - [X86_YMM3] = "ymm3", - [X86_YMM4] = "ymm4", - [X86_YMM5] = "ymm5", - [X86_YMM6] = "ymm6", - [X86_YMM7] = "ymm7", - [X86_YMM8] = "ymm8", - [X86_YMM9] = "ymm9", - [X86_YMM10] = "ymm10", - [X86_YMM11] = "ymm11", - [X86_YMM12] = "ymm12", - [X86_YMM13] = "ymm13", - [X86_YMM14] = "ymm14", - [X86_YMM15] = "ymm15", - [X86_ZMM0] = "zmm0", - [X86_ZMM1] = "zmm1", - [X86_ZMM2] = "zmm2", - [X86_ZMM3] = "zmm3", - [X86_ZMM4] = "zmm4", - [X86_ZMM5] = "zmm5", - [X86_ZMM6] = "zmm6", - [X86_ZMM7] = "zmm7", - [X86_ZMM8] = "zmm8", - [X86_ZMM9] = "zmm9", - [X86_ZMM10] = "zmm10", - [X86_ZMM11] = "zmm11", - [X86_ZMM12] = "zmm12", - [X86_ZMM13] = "zmm13", - [X86_ZMM14] = "zmm14", - [X86_ZMM15] = "zmm15", [X86_K0] = "k0", [X86_K1] = "k1", [X86_K2] = "k2", @@ -224,3 +160,5 @@ static const char *x86_xmm_regs[] = { "$xmm0", "$xmm1", "$xmm2", "$xmm3", "$xmm4 static const char *x86_ymm_regs[] = { "$ymm0", "$ymm1", "$ymm2", "$ymm3", "$ymm4", "$ymm5", "$ymm6", "$ymm7", "$ymm8", "$ymm9", "$ymm10", "$ymm11", "$ymm12", "$ymm13", "$ymm14", "$ymm15" }; +static const char *x86_zmm_regs[] = { "$zmm0", "$zmm1", "$zmm2", "$zmm3", "$zmm4", "$zmm5", "$zmm6", "$zmm7", + "$zmm8", "$zmm9", "$zmm10", "$zmm11", "$zmm12", "$zmm13", "$zmm14", "$zmm15" }; diff --git a/src/compiler/asm_target.c b/src/compiler/asm_target.c index 765d28ed1..c00f36897 100644 --- a/src/compiler/asm_target.c +++ b/src/compiler/asm_target.c @@ -58,9 +58,19 @@ INLINE AsmArgBits parse_bits(const char **desc) } if (memcmp("128", *desc, 3) == 0) { - *desc += 2; + *desc += 3; return ARG_BITS_128; } + if (memcmp("256", *desc, 3) == 0) + { + *desc += 3; + return ARG_BITS_256; + } + if (memcmp("512", *desc, 3) == 0) + { + *desc += 3; + return ARG_BITS_512; + } error_exit("Invalid bits: %s.", *desc); } @@ -95,7 +105,10 @@ INLINE AsmArgType decode_arg_type(const char **desc) *desc += 2; goto NEXT; } - error_exit("Unexpected string %s", &desc[-1]); + error_exit("Unexpected string %s", &(*desc)[-1]); + case 'v': + arg_type.vec_bits |= parse_bits(desc); + goto NEXT; case 'i': if (memcmp("mm", *desc, 2) == 0) { @@ -119,7 +132,7 @@ INLINE AsmArgType decode_arg_type(const char **desc) goto NEXT; } default: - error_exit("Unexpected string '%s'.", &desc[-1]); + error_exit("Unexpected string '%s'.", &(*desc)[-1]); } NEXT: switch (**desc) @@ -236,22 +249,22 @@ static void init_asm_aarch64(void) reg_instr("strh", "r32/r64, w:mem"); reg_instr("stp", "r32/r64, r32/r64, w:mem"); reg_instr("mov", "w:r32/r64, mem"); - reg_register_list(aarch64_quad_regs, 32, ASM_REG_INT, 64, AARCH64_R0); - reg_register_list(aarch64_long_regs, 32, ASM_REG_INT, 32, AARCH64_R0); - reg_register_list(aarch64_f128_regs, 32, ASM_REG_FLOAT, 128, AARCH64_Q0); - reg_register_list(aarch64_double_regs, 32, ASM_REG_FLOAT, 64, AARCH64_Q0); - reg_register_list(aarch64_float_regs, 32, ASM_REG_FLOAT, 32, AARCH64_Q0); - reg_register_list(aarch64_f16_regs, 32, ASM_REG_FLOAT, 16, AARCH64_Q0); - reg_register_list(aarch64_f8_regs, 32, ASM_REG_FLOAT, 8, AARCH64_Q0); - reg_register_list(aarch64_v8b_regs, 32, ASM_REG_IVEC, 64, AARCH64_FIRST_RV_CLOBBER); - reg_register_list(aarch64_v16b_regs, 32, ASM_REG_IVEC, 128,AARCH64_FIRST_RV_CLOBBER); - reg_register_list(aarch64_v4h_regs, 32, ASM_REG_IVEC, 64, AARCH64_FIRST_RV_CLOBBER); - reg_register_list(aarch64_v8h_regs, 32, ASM_REG_IVEC, 128, AARCH64_FIRST_RV_CLOBBER); - reg_register_list(aarch64_v2s_regs, 32, ASM_REG_IVEC, 64, AARCH64_FIRST_RV_CLOBBER); - reg_register_list(aarch64_v4s_regs, 32, ASM_REG_IVEC, 128, AARCH64_FIRST_RV_CLOBBER); - reg_register_list(aarch64_v1d_regs, 32, ASM_REG_IVEC, 64, AARCH64_FIRST_RV_CLOBBER); - reg_register_list(aarch64_v2d_regs, 32, ASM_REG_IVEC, 128, AARCH64_FIRST_RV_CLOBBER); - reg_register("$sp", ASM_REG_INT, 64, AARCH64_R31); + reg_register_list(aarch64_quad_regs, 32, ASM_REG_INT, ARG_BITS_64, AARCH64_R0); + reg_register_list(aarch64_long_regs, 32, ASM_REG_INT, ARG_BITS_32, AARCH64_R0); + reg_register_list(aarch64_f128_regs, 32, ASM_REG_FLOAT, ARG_BITS_128, AARCH64_Q0); + reg_register_list(aarch64_double_regs, 32, ASM_REG_FLOAT, ARG_BITS_64, AARCH64_Q0); + reg_register_list(aarch64_float_regs, 32, ASM_REG_FLOAT, ARG_BITS_32, AARCH64_Q0); + reg_register_list(aarch64_f16_regs, 32, ASM_REG_FLOAT, ARG_BITS_16, AARCH64_Q0); + reg_register_list(aarch64_f8_regs, 32, ASM_REG_FLOAT, ARG_BITS_8, AARCH64_Q0); + reg_register_list(aarch64_v8b_regs, 32, ASM_REG_IVEC, ARG_BITS_64, AARCH64_FIRST_RV_CLOBBER); + reg_register_list(aarch64_v16b_regs, 32, ASM_REG_IVEC, ARG_BITS_128, AARCH64_FIRST_RV_CLOBBER); + reg_register_list(aarch64_v4h_regs, 32, ASM_REG_IVEC, ARG_BITS_64, AARCH64_FIRST_RV_CLOBBER); + reg_register_list(aarch64_v8h_regs, 32, ASM_REG_IVEC, ARG_BITS_128, AARCH64_FIRST_RV_CLOBBER); + reg_register_list(aarch64_v2s_regs, 32, ASM_REG_IVEC, ARG_BITS_64, AARCH64_FIRST_RV_CLOBBER); + reg_register_list(aarch64_v4s_regs, 32, ASM_REG_IVEC, ARG_BITS_128, AARCH64_FIRST_RV_CLOBBER); + reg_register_list(aarch64_v1d_regs, 32, ASM_REG_IVEC, ARG_BITS_64, AARCH64_FIRST_RV_CLOBBER); + reg_register_list(aarch64_v2d_regs, 32, ASM_REG_IVEC, ARG_BITS_128, AARCH64_FIRST_RV_CLOBBER); + reg_register("$sp", ASM_REG_INT, ARG_BITS_64, AARCH64_R31); } static void init_asm_wasm(void) @@ -294,11 +307,24 @@ static void init_asm_x86(void) reg_instr_clob("adcw", cc_flag_mask, "rw:r16/mem, r16/mem/imm16/immi8"); reg_instr_clob("adcl", cc_flag_mask, "rw:r32/mem, r32/mem/imm32/immi8"); reg_instr_clob("adcq", cc_flag_mask, "rw:r64/mem, r64/mem/immi32/immi8"); + + reg_instr_clob("adcxl", cc_flag_mask, "r32, rw:r32/mem"); + reg_instr_clob("adcxq", cc_flag_mask, "r64, rw:r64/mem"); + reg_instr_clob("addb", cc_flag_mask, "rw:r8/mem, r8/mem/imm8"); reg_instr_clob("addw", cc_flag_mask, "rw:r16/mem, r16/mem/imm16/immi8"); reg_instr_clob("addl", cc_flag_mask, "rw:r32/mem, r32/mem/imm32/immi8"); reg_instr_clob("addq", cc_flag_mask, "rw:r64/mem, r64/mem/immi32/immi8"); + reg_instr("addpd", "rw:v128, v128/mem"); + reg_instr("addps", "rw:v128, v128/mem"); + reg_instr("addsd", "rw:v128, v128/mem"); + reg_instr("addss", "rw:v128, v128/mem"); + reg_instr("vaddpd", "w:v128/v256/v512, v128/v256/v512, v128/v256/v512/mem"); + reg_instr("vaddps", "w:v128/v256/v512, v128/v256/v512, v128/v256/v512/mem"); + reg_instr("vaddsd", "w:v128, v128, v128/mem"); + reg_instr("vaddss", "w:v128, v128, v128/mem"); + reg_instr_clob("cbtw", rax_mask, NULL); reg_instr_clob("cwtl", rax_mask, NULL); reg_instr_clob("cltq", rax_mask, NULL); @@ -380,21 +406,22 @@ static void init_asm_x86(void) asm_target.extra_clobbers = "~{flags},~{dirflag},~{fspr}"; if (platform_target.arch == ARCH_TYPE_X86) { - reg_register_list(x86_long_regs, 8, ASM_REG_INT, 32, X86_RAX); - reg_register_list(x86_word_regs, 8, ASM_REG_INT, 16, X86_RAX); - reg_register_list(x86_low_byte_regs, 8, ASM_REG_INT, 8, X86_RAX); - reg_register_list(x86_float_regs, 8, ASM_REG_FLOAT, 80, X86_ST0); - reg_register_list(x86_xmm_regs, 8, ASM_REF_MMX, 128, X86_MM0); + reg_register_list(x86_long_regs, 8, ASM_REG_INT, ARG_BITS_32, X86_RAX); + reg_register_list(x86_word_regs, 8, ASM_REG_INT, ARG_BITS_16, X86_RAX); + reg_register_list(x86_low_byte_regs, 8, ASM_REG_INT, ARG_BITS_8, X86_RAX); + reg_register_list(x86_float_regs, 8, ASM_REG_FLOAT, ARG_BITS_80, X86_ST0); + reg_register_list(x86_xmm_regs, 8, ASM_REF_FVEC, ARG_BITS_128, X86_MM0); } else { - reg_register_list(x64_quad_regs, 15, ASM_REG_INT, 64, X86_RAX); - reg_register_list(x86_long_regs, 15, ASM_REG_INT, 32, X86_RAX); - reg_register_list(x86_word_regs, 15, ASM_REG_INT, 16, X86_RAX); - reg_register_list(x86_low_byte_regs, 15, ASM_REG_INT, 8, X86_RAX); - reg_register_list(x86_high_byte_regs, 4, ASM_REG_INT, 8, X86_RAX); - reg_register_list(x86_xmm_regs, 16, ASM_REF_MMX, 128, X86_XMM0); - reg_register_list(x86_ymm_regs, 16, ASM_REF_MMX, 128, X86_YMM0); + reg_register_list(x64_quad_regs, 15, ASM_REG_INT, ARG_BITS_64, X86_RAX); + reg_register_list(x86_long_regs, 15, ASM_REG_INT, ARG_BITS_32, X86_RAX); + reg_register_list(x86_word_regs, 15, ASM_REG_INT, ARG_BITS_16, X86_RAX); + reg_register_list(x86_low_byte_regs, 15, ASM_REG_INT, ARG_BITS_8, X86_RAX); + reg_register_list(x86_high_byte_regs, 4, ASM_REG_INT, ARG_BITS_8, X86_RAX); + reg_register_list(x86_xmm_regs, 16, ASM_REF_FVEC, ARG_BITS_128, X86_XMM0); + reg_register_list(x86_ymm_regs, 16, ASM_REF_FVEC, ARG_BITS_256, X86_XMM0); + reg_register_list(x86_zmm_regs, 16, ASM_REF_FVEC, ARG_BITS_512, X86_XMM0); } } void init_asm(void) diff --git a/src/compiler/codegen_asm.c b/src/compiler/codegen_asm.c index d9378f447..561b39a2d 100644 --- a/src/compiler/codegen_asm.c +++ b/src/compiler/codegen_asm.c @@ -126,14 +126,10 @@ static inline char *codegen_create_x86_att_asm(AsmInlineBlock *block) Expr** args = ast->asm_stmt.args; unsigned arg_count = vec_size(args); scratch_buffer_append_char(' '); - if (arg_count > 1) + for (unsigned i = arg_count; i > 0; i--) { - codegen_create_x86att_arg(block, input_arg_offset, args[1]); - scratch_buffer_append(", "); - } - if (arg_count) - { - codegen_create_x86att_arg(block, input_arg_offset, args[0]); + if (i != arg_count) scratch_buffer_append(", "); + codegen_create_x86att_arg(block, input_arg_offset, args[i - 1]); } scratch_buffer_append_char('\n'); } diff --git a/src/compiler/compiler_internal.h b/src/compiler/compiler_internal.h index f43666089..566fc2117 100644 --- a/src/compiler/compiler_internal.h +++ b/src/compiler/compiler_internal.h @@ -133,10 +133,11 @@ typedef struct bool is_write : 1; bool is_readwrite : 1; bool is_address : 1; - AsmArgBits imm_arg_ubits : 8; - AsmArgBits imm_arg_ibits : 8; - AsmArgBits ireg_bits : 8; - AsmArgBits float_bits : 8; + AsmArgBits imm_arg_ubits : 16; + AsmArgBits imm_arg_ibits : 16; + AsmArgBits ireg_bits : 16; + AsmArgBits float_bits : 16; + AsmArgBits vec_bits : 16; } AsmArgType; typedef struct diff --git a/src/compiler/enums.h b/src/compiler/enums.h index da3f1de48..64de7490d 100644 --- a/src/compiler/enums.h +++ b/src/compiler/enums.h @@ -273,8 +273,6 @@ typedef enum ASM_REG_FLOAT, ASM_REG_IVEC, ASM_REF_FVEC, - ASM_REF_SSE, - ASM_REF_MMX, } AsmRegisterType; typedef enum @@ -284,7 +282,9 @@ typedef enum ARG_BITS_32 = 1 << 2, ARG_BITS_64 = 1 << 3, ARG_BITS_128 = 1 << 4, - ARG_BITS_80 = 1 << 5, + ARG_BITS_256 = 1 << 5, + ARG_BITS_512 = 1 << 6, + ARG_BITS_80 = 1 << 7, } AsmArgBits; typedef enum diff --git a/src/compiler/sema_asm.c b/src/compiler/sema_asm.c index 5b6828abe..e03a869f2 100644 --- a/src/compiler/sema_asm.c +++ b/src/compiler/sema_asm.c @@ -64,11 +64,9 @@ INLINE bool sema_reg_is_valid_in_slot(AsmRegister *reg, AsmArgType arg_type) return (arg_type.ireg_bits & reg->bits) != 0; case ASM_REG_FLOAT: return (arg_type.float_bits & reg->bits) != 0; - case ASM_REG_IVEC: case ASM_REF_FVEC: - case ASM_REF_SSE: - case ASM_REF_MMX: - TODO + case ASM_REG_IVEC: + return (arg_type.vec_bits & reg->bits) != 0; } UNREACHABLE } @@ -190,7 +188,7 @@ static inline bool sema_check_asm_arg_reg(SemaContext *context, AsmInlineBlock * SEMA_ERROR(expr, "Expected a valid register name."); return false; } - if (sema_reg_is_valid_in_slot(reg, arg_type)) + if (!sema_reg_is_valid_in_slot(reg, arg_type)) { SEMA_ERROR(expr, "'%s' is not valid in this slot.", reg->name); return false; diff --git a/src/version.h b/src/version.h index a08f06f97..e9b1ec502 100644 --- a/src/version.h +++ b/src/version.h @@ -1 +1 @@ -#define COMPILER_VERSION "0.3.42" \ No newline at end of file +#define COMPILER_VERSION "0.3.43" \ No newline at end of file diff --git a/test/test_suite/asm/asm_ops_x64_2.c3t b/test/test_suite/asm/asm_ops_x64_2.c3t new file mode 100644 index 000000000..a9c3db917 --- /dev/null +++ b/test/test_suite/asm/asm_ops_x64_2.c3t @@ -0,0 +1,30 @@ +// #target: macos-x64 +module test; + +fn void main(char[][] args) +{ + int foo; + asm + { + adcxl $eax, $ecx; + adcxq $rax, $rcx; + addpd $xmm1, $xmm2; + addps $xmm1, $xmm2; + addsd $xmm1, $xmm2; + addss $xmm1, $xmm2; + vaddpd $xmm1, $xmm2, $xmm3; + vaddpd $ymm1, $ymm2, $ymm3; + vaddpd $xmm1, $xmm2, [&foo]; + vaddps $xmm1, $xmm2, $xmm3; + vaddps $ymm1, $ymm2, $ymm3; + vaddps $xmm1, $xmm2, [&foo]; + vaddsd $xmm1, $xmm2, $xmm3; + vaddsd $xmm1, $xmm2, [&foo]; + vaddss $xmm1, $xmm2, $xmm3; + vaddss $xmm1, $xmm2, [&foo]; + } +} + +/* #expect: test.ll + +"adcxl %ecx, %eax\0Aadcxq %rcx, %rax\0Aaddpd %xmm2, %xmm1\0Aaddps %xmm2, %xmm1\0Aaddsd %xmm2, %xmm1\0Aaddss %xmm2, %xmm1\0Avaddpd %xmm3, %xmm2, %xmm1\0Avaddpd %ymm3, %ymm2, %ymm1\0Avaddpd $0, %xmm2, %xmm1\0Avaddps %xmm3, %xmm2, %xmm1\0Avaddps %ymm3, %ymm2, %ymm1\0Avaddps $0, %xmm2, %xmm1\0Avaddsd %xmm3, %xmm2, %xmm1\0Avaddsd $0, %xmm2, %xmm1\0Avaddss %xmm3, %xmm2, %xmm1\0Avaddss $0, %xmm2, %xmm1\0A", "*m,~{cc},~{rcx},~{xmm1},~{flags},~{dirflag},~{fspr}" diff --git a/test/test_suite2/asm/asm_ops_x64_2.c3t b/test/test_suite2/asm/asm_ops_x64_2.c3t new file mode 100644 index 000000000..a9c3db917 --- /dev/null +++ b/test/test_suite2/asm/asm_ops_x64_2.c3t @@ -0,0 +1,30 @@ +// #target: macos-x64 +module test; + +fn void main(char[][] args) +{ + int foo; + asm + { + adcxl $eax, $ecx; + adcxq $rax, $rcx; + addpd $xmm1, $xmm2; + addps $xmm1, $xmm2; + addsd $xmm1, $xmm2; + addss $xmm1, $xmm2; + vaddpd $xmm1, $xmm2, $xmm3; + vaddpd $ymm1, $ymm2, $ymm3; + vaddpd $xmm1, $xmm2, [&foo]; + vaddps $xmm1, $xmm2, $xmm3; + vaddps $ymm1, $ymm2, $ymm3; + vaddps $xmm1, $xmm2, [&foo]; + vaddsd $xmm1, $xmm2, $xmm3; + vaddsd $xmm1, $xmm2, [&foo]; + vaddss $xmm1, $xmm2, $xmm3; + vaddss $xmm1, $xmm2, [&foo]; + } +} + +/* #expect: test.ll + +"adcxl %ecx, %eax\0Aadcxq %rcx, %rax\0Aaddpd %xmm2, %xmm1\0Aaddps %xmm2, %xmm1\0Aaddsd %xmm2, %xmm1\0Aaddss %xmm2, %xmm1\0Avaddpd %xmm3, %xmm2, %xmm1\0Avaddpd %ymm3, %ymm2, %ymm1\0Avaddpd $0, %xmm2, %xmm1\0Avaddps %xmm3, %xmm2, %xmm1\0Avaddps %ymm3, %ymm2, %ymm1\0Avaddps $0, %xmm2, %xmm1\0Avaddsd %xmm3, %xmm2, %xmm1\0Avaddsd $0, %xmm2, %xmm1\0Avaddss %xmm3, %xmm2, %xmm1\0Avaddss $0, %xmm2, %xmm1\0A", "*m,~{cc},~{rcx},~{xmm1},~{flags},~{dirflag},~{fspr}"