diff options
Diffstat (limited to 'src/runtime')
| -rw-r--r-- | src/runtime/c/configure.ac | 2 | ||||
| -rw-r--r-- | src/runtime/c/pgf/lightning/asm-common.h | 30 | ||||
| -rw-r--r-- | src/runtime/c/pgf/lightning/core-common.h | 6 | ||||
| -rw-r--r-- | src/runtime/c/pgf/lightning/i386/asm-32.h | 125 | ||||
| -rw-r--r-- | src/runtime/c/pgf/lightning/i386/asm-64.h | 430 | ||||
| -rw-r--r-- | src/runtime/c/pgf/lightning/i386/asm.h | 2004 | ||||
| -rw-r--r-- | src/runtime/c/pgf/lightning/i386/core-32.h | 174 | ||||
| -rw-r--r-- | src/runtime/c/pgf/lightning/i386/core-64.h | 498 | ||||
| -rw-r--r-- | src/runtime/c/pgf/lightning/i386/core.h | 345 | ||||
| -rw-r--r-- | src/runtime/c/pgf/lightning/i386/fp-32.h | 356 | ||||
| -rw-r--r-- | src/runtime/c/pgf/lightning/i386/fp-64.h | 325 | ||||
| -rw-r--r-- | src/runtime/c/pgf/lightning/i386/fp.h | 330 | ||||
| -rw-r--r-- | src/runtime/c/pgf/lightning/i386/funcs.h | 13 |
13 files changed, 3373 insertions, 1265 deletions
diff --git a/src/runtime/c/configure.ac b/src/runtime/c/configure.ac index 82f941cb5..2ead33e3d 100644 --- a/src/runtime/c/configure.ac +++ b/src/runtime/c/configure.ac @@ -43,6 +43,8 @@ AC_C_ASCII case "$target_cpu" in i?86) cpu=i386; AC_DEFINE(LIGHTNING_I386, 1, [Define if lightning is targeting the x86 architecture]) ;; + x86_64) cpu=i386; AC_DEFINE(LIGHTNING_I386, 1, + [Define if lightning is targeting the x86 architecture]) ;; sparc*) cpu=sparc; AC_DEFINE(LIGHTNING_SPARC, 1, [Define if lightning is targeting the x86 architecture]) ;; powerpc) cpu=ppc; AC_DEFINE(LIGHTNING_PPC, 1, diff --git a/src/runtime/c/pgf/lightning/asm-common.h b/src/runtime/c/pgf/lightning/asm-common.h index 42c8814a9..bdaa6c2ea 100644 --- a/src/runtime/c/pgf/lightning/asm-common.h +++ b/src/runtime/c/pgf/lightning/asm-common.h @@ -14,7 +14,7 @@ * * GNU lightning is free software; you can redistribute it and/or modify it * under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; either version 2.1, or (at your option) + * by the Free Software Foundation; either version 3, or (at your option) * any later version. * * GNU lightning is distributed in the hope that it will be useful, but @@ -24,8 +24,8 @@ * * You should have received a copy of the GNU Lesser General Public License * along with GNU lightning; see the file COPYING.LESSER; if not, write to the - * Free Software Foundation, 59 Temple Place - Suite 330, Boston, - * MA 02111-1307, USA. + * Free Software Foundation, 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. * ***********************************************************************/ @@ -37,21 +37,25 @@ #ifndef _ASM_SAFETY #define JITFAIL(MSG) 0 #else -#if defined __GNUC__ && (__GNUC__ == 3 ? __GNUC_MINOR__ >= 2 : __GNUC__ > 3) +#if (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L) || (defined __GNUC__ && (__GNUC__ == 3 ? __GNUC_MINOR__ >= 2 : __GNUC__ > 3)) #define JITFAIL(MSG) jit_fail(MSG, __FILE__, __LINE__, __func__) -#else +#elif defined __GNUC__ #define JITFAIL(MSG) jit_fail(MSG, __FILE__, __LINE__, __FUNCTION__) +#else +#define JITFAIL(MSG) jit_fail(MSG, __FILE__, __LINE__, "(unknown)") #endif #endif -#if defined __GNUC__ && (__GNUC__ == 3 ? __GNUC_MINOR__ >= 2 : __GNUC__ > 3) +#if (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L) || (defined __GNUC__ && (__GNUC__ == 3 ? __GNUC_MINOR__ >= 2 : __GNUC__ > 3)) #define JITSORRY(MSG) jit_fail("sorry, unimplemented: " MSG, __FILE__, __LINE__, __func__) -#else +#elif defined __GNUC__ #define JITSORRY(MSG) jit_fail("sorry, unimplemented: " MSG, __FILE__, __LINE__, __FUNCTION__) +#else +#define JITSORRY(MSG) jit_fail("sorry, unimplemented: " MSG, __FILE__, __LINE__, "(unknown)") #endif #ifdef __GNUC__ -#define JIT_UNUSED __attribute__((unused)) +#define JIT_UNUSED __attribute__((__unused__)) #else #define JIT_UNUSED #endif @@ -91,11 +95,13 @@ typedef unsigned long _ul; #define _jit_UC(X) ((_uc )(X)) #define _jit_US(X) ((_us )(X)) #define _jit_UI(X) ((_ui )(X)) +#define _jit_SI(X) ((int )(X)) #define _jit_SL(X) ((_sl )(X)) #define _jit_UL(X) ((_ul )(X)) # define _PUC(X) ((_uc *)(X)) # define _PUS(X) ((_us *)(X)) # define _PUI(X) ((_ui *)(X)) +# define _PSI(X) ((int *)(X)) # define _PSL(X) ((_sl *)(X)) # define _PUL(X) ((_ul *)(X)) @@ -105,9 +111,9 @@ typedef unsigned long _ul; #define _jit_L(L) _jit_UL(((*_jit.x.ul_pc++)= _jit_UL((L) ))) #define _jit_I_noinc(I) _jit_UL(((*_jit.x.ui_pc)= _jit_UI((I) ))) -#define _MASK(N) ((unsigned)((1<<(N)))-1) -#define _siP(N,I) (!((((unsigned)(I))^(((unsigned)(I))<<1))&~_MASK(N))) -#define _uiP(N,I) (!(((unsigned)(I))&~_MASK(N))) +#define _MASK(N) ((unsigned long)((1L<<(N)))-1L) +#define _siP(N,I) (!((((unsigned long)(I))^(((unsigned long)(I))<<1))&~_MASK(N))) +#define _uiP(N,I) (!(((unsigned long)(I))&~_MASK(N))) #define _suiP(N,I) (_siP(N,I) | _uiP(N,I)) #ifndef _ASM_SAFETY @@ -125,8 +131,10 @@ typedef unsigned long _ul; #define _s0P(I) ((I)==0) #define _s8P(I) _siP(8,I) #define _s16P(I) _siP(16,I) +#define _s32P(I) _siP(32,I) #define _u8P(I) _uiP(8,I) #define _u16P(I) _uiP(16,I) +#define _u32P(I) _uiP(32,I) #define _su8(I) _ck_su(8,I) #define _su16(I) _ck_su(16,I) diff --git a/src/runtime/c/pgf/lightning/core-common.h b/src/runtime/c/pgf/lightning/core-common.h index 9310ee23f..9e29250e1 100644 --- a/src/runtime/c/pgf/lightning/core-common.h +++ b/src/runtime/c/pgf/lightning/core-common.h @@ -45,12 +45,6 @@ typedef struct { struct jit_local_state jitl; } jit_state; -#ifdef jit_init -static jit_state _jit = jit_init (); -#else -static jit_state _jit; -#endif - #define JIT_NOREG (-1) #define JIT_R0 JIT_R(0) #define JIT_R1 JIT_R(1) diff --git a/src/runtime/c/pgf/lightning/i386/asm-32.h b/src/runtime/c/pgf/lightning/i386/asm-32.h new file mode 100644 index 000000000..c5c0f802d --- /dev/null +++ b/src/runtime/c/pgf/lightning/i386/asm-32.h @@ -0,0 +1,125 @@ +/******************************** -*- C -*- **************************** + * + * Run-time assembler for the i386 + * + ***********************************************************************/ + + +/*********************************************************************** + * + * Copyright 2003 Gwenole Beauchesne + * Copyright 2006 Free Software Foundation, Inc. + * + * This file is part of GNU lightning. + * + * GNU lightning is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU lightning is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with GNU lightning; see the file COPYING.LESSER; if not, write to the + * Free Software Foundation, 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + ***********************************************************************/ + + + + +#ifndef __lightning_asm_h +#define __lightning_asm_h + +#ifndef LIGHTNING_DEBUG + +/* OPCODE + i = immediate operand + * + r = register operand + * + m = memory operand (disp,base,index,scale) + * + sr/sm = a star preceding a register or memory + */ + +#if !_ASM_SAFETY +# define _r1(R) _rN(R) +# define _r2(R) _rN(R) +# define _r4(R) _rN(R) +# define _r8(R) _rN(R) +# define _rM(R) _rN(R) +# define _rX(R) _rN(R) +#else +/* _r1() used to check only for _AL and _AH but there is + * usage of _CL and _DL when _*AX is already an operand */ +# define _r1(R) \ + /* Valid 32 bit register? */ \ + ((!((R) & ~0x77) \ + /* 32, 16 or 8 bit register? */ \ + && (((_rC(R) == 0x40 || _rC(R) == 0x30 || _rC(R) == 0x10) \ + /* Yes. Register is _AL, _CL or _DL? */ \ + && ( (_rN(R) | 0x10) == _AL \ + || (_rN(R) | 0x10) == _CL \ + || (_rN(R) | 0x10) == _DL)) \ + /* No. Register is _AH? */ \ + || ((_rC(R) == 0x20 && (_rN(R) | 0x20) == _AH)))) \ + ? _rN(R) : JITFAIL("bad 8-bit register " #R)) +# define _r2(R) \ + /* Valid 32 bit register? */ \ + ((!((R) & ~0x77) \ + /* 32, 16 or 8 bit register? */ \ + && (_rC(R) == 0x40 || _rC(R) == 0x30 || _rC(R) == 0x10)) \ + ? _rN(R) : JITFAIL("bad 16-bit register " #R)) +# define _r4(R) \ + /* Valid 32 bit register? */ \ + ((!((R) & ~0x77) \ + /* 32, 16 or 8 bit register? */ \ + && (_rC(R) == 0x40 || _rC(R) == 0x30 || _rC(R) == 0x10)) \ + ? _rN(R) : JITFAIL("bad 32-bit register " #R)) +# define _r8(R) \ + JITFAIL("bad 64-bit register " #R) +# define _rM(R) \ + /* Valid MMX register? */ \ + ((!((R) & ~0x67) && _rC(R) == 0x60) \ + ? _rN(R) : JITFAIL("bad MMX register " #R)) +# define _rX(R) \ + /* Valid SSE register? */ \ + ((!((R) & ~0x77) && _rC(R) == 0x70) \ + ? _rN(R) : JITFAIL("bad SSE register " #R)) +#endif + +#define _rA(R) _r4(R) + +#define jit_check8(rs) ((_rN(rs) | _AL) == _AL) +#define jit_reg8(rs) \ + ((jit_reg16(rs) == _SI || jit_reg16(rs) == _DI) \ + ? _AL : (_rN(rs) | _AL)) +#define jit_reg16(rs) (_rN(rs) | _AX) + +/* Use RIP-addressing in 64-bit mode, if possible */ +#define _r_X( R, D,B,I,S,O) (_r0P(I) ? (_r0P(B) ? _r_D (R,D ) : \ + (_rsp12P(B) ? _r_DBIS(R,D,_ESP,_ESP,1) : \ + _r_DB (R,D, B ))) : \ + (_r0P(B) ? _r_4IS (R,D, I,S) : \ + (!_rspP(I) ? _r_DBIS(R,D, B, I,S) : \ + JITFAIL("illegal index register: %esp")))) +#define _m32only(X) (X) +#define _m64only(X) JITFAIL("invalid instruction in 32-bit mode") +#define _m64(X) ((void)0) + +#define _AH 0x24 +#define _CH 0x25 +#define _DH 0x26 +#define _BH 0x27 + +#define CALLsr(R) CALLLsr(R) +#define JMPsr(R) JMPLsr(R) + +#define DECWr(RD) (_d16(), _Or (0x48,_r2(RD) )) +#define DECLr(RD) _Or (0x48,_r4(RD) ) +#define INCWr(RD) (_d16(), _Or (0x40,_r2(RD) )) +#define INCLr(RD) _Or (0x40,_r4(RD) ) + +#endif +#endif /* __lightning_asm_h */ diff --git a/src/runtime/c/pgf/lightning/i386/asm-64.h b/src/runtime/c/pgf/lightning/i386/asm-64.h new file mode 100644 index 000000000..e1ce99bae --- /dev/null +++ b/src/runtime/c/pgf/lightning/i386/asm-64.h @@ -0,0 +1,430 @@ +/******************************** -*- C -*- **************************** + * + * Run-time assembler for the x86-64 + * + ***********************************************************************/ + + +/*********************************************************************** + * + * Copyright 2003 Gwenole Beauchesne + * Copyright 2006 Free Software Foundation, Inc. + * + * This file is part of GNU lightning. + * + * GNU lightning is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU lightning is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with GNU lightning; see the file COPYING.LESSER; if not, write to the + * Free Software Foundation, 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + ***********************************************************************/ + + + + +#ifndef __lightning_asm_h +#define __lightning_asm_h + +#ifndef LIGHTNING_DEBUG + +/* OPCODE + i = immediate operand + * + r = register operand + * + m = memory operand (disp,base,index,scale) + * + sr/sm = a star preceding a register or memory + */ + +#if !_ASM_SAFETY +# define _r1(R) _rN(R) +# define _r2(R) _rN(R) +# define _r4(R) _rN(R) +# define _r8(R) _rN(R) +# define _rM(R) _rN(R) +# define _rX(R) _rN(R) +#else +# define _r1(R) \ + /* Valid 64 bit register? */ \ + ((!((R) & ~0xff) \ + /* 64, 32, 16 or 8 bit register? */ \ + && (_rC(R) == 0x50 || _rC(R) == 0x40 \ + || _rC(R) == 0x30 || _rC(R) == 0x10)) \ + ? _rN(R) : JITFAIL("bad 8-bit register " #R)) +# define _r2(R) \ + /* Valid 64 bit register? */ \ + ((!((R) & ~0xff) \ + /* 64, 32, 16 or 8 bit register? */ \ + && (_rC(R) == 0x50 || _rC(R) == 0x40 \ + || _rC(R) == 0x30 || _rC(R) == 0x10)) \ + ? _rN(R) : JITFAIL("bad 16-bit register " #R)) +# define _r4(R) \ + /* Valid 64 bit register? */ \ + ((!((R) & ~0xff) \ + /* 64, 32, 16 or 8 bit register? */ \ + && (_rC(R) == 0x50 || _rC(R) == 0x40 \ + || _rC(R) == 0x30 || _rC(R) == 0x10)) \ + ? _rN(R) : JITFAIL("bad 32-bit register " #R)) +# define _r8(R) \ + /* Valid 64 bit register? */ \ + ((!((R) & ~0xff) \ + /* 64, 32, 16 or 8 bit register? */ \ + && (_rC(R) == 0x50 || _rC(R) == 0x40 \ + || _rC(R) == 0x30 || _rC(R) == 0x10)) \ + ? _rN(R) : JITFAIL("bad 64-bit register " #R)) +# define _rM(R) \ + /* Valid MMX* register? */ \ + ((!((R) & ~0x6f) && _rC(R) == 0x60) \ + ? _rN(R) : JITFAIL("bad MMX register " #R)) +# define _rX(R) \ + /* Valid SSE2 register? */ \ + ((!((R) & ~0x7f) && _rC(R) == 0x70) \ + ? _rN(R) : JITFAIL("bad SSE2 register " #R)) +#endif + +#define _rA(R) _r8(R) + +#define jit_check8(rs) 1 +#define jit_reg8(rs) (_rR(rs) | _AL) +#define jit_reg16(rs) (_rR(rs) | _AX) + +/* Use RIP-addressing in 64-bit mode, if possible */ +#if 0 +#define _x86_RIP_addressing_possible(D,O) (X86_RIP_RELATIVE_ADDR && \ + ((unsigned long)x86_get_target() + 4 + (O) - (D) <= 0xffffffff)) + +#define _r_X( R, D,B,I,S,O) (_r0P(I) ? (_r0P(B) ? (!X86_TARGET_64BIT ? _r_D(R,D) : \ + (_x86_RIP_addressing_possible(D, O) ? \ + _r_D(R, (D) - ((unsigned long)x86_get_target() + 4 + (O))) : \ + _r_DSIB(R,D))) : \ + _r_DSIB(R,D )) : \ + (_rIP(B) ? _r_D (R,D ) : \ + (_rsp12P(B) ? _r_DBIS(R,D,_RSP,_RSP,1) : \ + _r_DB (R,D, B )))) : \ + (_r0P(B) ? _r_4IS (R,D, I,S) : \ + (!_rspP(I) ? _r_DBIS(R,D, B, I,S) : \ + JITFAIL("illegal index register: %esp")))) +#else +#define _r_X( R, D,B,I,S,O) (_r0P(I) ? (_r0P(B) ? _r_DSIB(R,D ) : \ + (_rIP(B) ? _r_D (R,D ) : \ + (_rsp12P(B) ? _r_DBIS(R,D,_RSP,_RSP,1) : \ + _r_DB (R,D, B )))) : \ + (_r0P(B) ? _r_4IS (R,D, I,S) : \ + (!_rspP(I) ? _r_DBIS(R,D, B, I,S) : \ + JITFAIL("illegal index register: %esp")))) +#endif + + +#define _m32only(X) (JITFAIL("invalid instruction in 64-bit mode")) +#define _m64only(X) (X) +#define _m64(X) (X) + +#define _SPL 0x14 +#define _BPL 0x15 +#define _SIL 0x16 +#define _DIL 0x17 +#define _R8B 0x18 +#define _R9B 0x19 +#define _R10B 0x1A +#define _R11B 0x1B +#define _R12B 0x1C +#define _R13B 0x1D +#define _R14B 0x1E +#define _R15B 0x1F + +#define _R8W 0x38 +#define _R9W 0x39 +#define _R10W 0x3A +#define _R11W 0x3B +#define _R12W 0x3C +#define _R13W 0x3D +#define _R14W 0x3E +#define _R15W 0x3F +#define _R8D 0x48 +#define _R9D 0x49 +#define _R10D 0x4A +#define _R11D 0x4B +#define _R12D 0x4C +#define _R13D 0x4D +#define _R14D 0x4E +#define _R15D 0x4F + +#define _RAX 0x50 +#define _RCX 0x51 +#define _RDX 0x52 +#define _RBX 0x53 +#define _RSP 0x54 +#define _RBP 0x55 +#define _RSI 0x56 +#define _RDI 0x57 +#define _R8 0x58 +#define _R9 0x59 +#define _R10 0x5A +#define _R11 0x5B +#define _R12 0x5C +#define _R13 0x5D +#define _R14 0x5E +#define _R15 0x5F +#define _RIP -2 + +#define _r1e8lP(R) ((int)(R) >= _SPL && (int)(R) <= _DIL) + +#define DECWr(RD) (_d16(), _REXLrr(0, RD), _O_Mrm (0xff ,_b11,_b001 ,_r2(RD) )) +#define DECLr(RD) (_REXLrr(0, RD), _O_Mrm (0xff ,_b11,_b001 ,_r4(RD) )) +#define INCWr(RD) (_d16(), _REXLrr(0, RD), _O_Mrm (0xff ,_b11,_b000 ,_r2(RD) )) +#define INCLr(RD) (_REXLrr(0, RD), _O_Mrm (0xff ,_b11,_b000 ,_r4(RD) )) + +#define ADCQrr(RS, RD) _ALUQrr(X86_ADC, RS, RD) +#define ADCQmr(MD, MB, MI, MS, RD) _ALUQmr(X86_ADC, MD, MB, MI, MS, RD) +#define ADCQrm(RS, MD, MB, MI, MS) _ALUQrm(X86_ADC, RS, MD, MB, MI, MS) +#define ADCQir(IM, RD) _ALUQir(X86_ADC, IM, RD) +#define ADCQim(IM, MD, MB, MI, MS) _ALUQim(X86_ADC, IM, MD, MB, MI, MS) + +#define ADDQrr(RS, RD) _ALUQrr(X86_ADD, RS, RD) +#define ADDQmr(MD, MB, MI, MS, RD) _ALUQmr(X86_ADD, MD, MB, MI, MS, RD) +#define ADDQrm(RS, MD, MB, MI, MS) _ALUQrm(X86_ADD, RS, MD, MB, MI, MS) +#define ADDQir(IM, RD) _ALUQir(X86_ADD, IM, RD) +#define ADDQim(IM, MD, MB, MI, MS) _ALUQim(X86_ADD, IM, MD, MB, MI, MS) + +#define ANDQrr(RS, RD) _ALUQrr(X86_AND, RS, RD) +#define ANDQmr(MD, MB, MI, MS, RD) _ALUQmr(X86_AND, MD, MB, MI, MS, RD) +#define ANDQrm(RS, MD, MB, MI, MS) _ALUQrm(X86_AND, RS, MD, MB, MI, MS) +#define ANDQir(IM, RD) _ALUQir(X86_AND, IM, RD) +#define ANDQim(IM, MD, MB, MI, MS) _ALUQim(X86_AND, IM, MD, MB, MI, MS) + +#define CMPQrr(RS, RD) _ALUQrr(X86_CMP, RS, RD) +#define CMPQmr(MD, MB, MI, MS, RD) _ALUQmr(X86_CMP, MD, MB, MI, MS, RD) +#define CMPQrm(RS, MD, MB, MI, MS) _ALUQrm(X86_CMP, RS, MD, MB, MI, MS) +#define CMPQir(IM, RD) _ALUQir(X86_CMP, IM, RD) +#define CMPQim(IM, MD, MB, MI, MS) _ALUQim(X86_CMP, IM, MD, MB, MI, MS) + +#define ORQrr(RS, RD) _ALUQrr(X86_OR, RS, RD) +#define ORQmr(MD, MB, MI, MS, RD) _ALUQmr(X86_OR, MD, MB, MI, MS, RD) +#define ORQrm(RS, MD, MB, MI, MS) _ALUQrm(X86_OR, RS, MD, MB, MI, MS) +#define ORQir(IM, RD) _ALUQir(X86_OR, IM, RD) +#define ORQim(IM, MD, MB, MI, MS) _ALUQim(X86_OR, IM, MD, MB, MI, MS) + +#define SBBQrr(RS, RD) _ALUQrr(X86_SBB, RS, RD) +#define SBBQmr(MD, MB, MI, MS, RD) _ALUQmr(X86_SBB, MD, MB, MI, MS, RD) +#define SBBQrm(RS, MD, MB, MI, MS) _ALUQrm(X86_SBB, RS, MD, MB, MI, MS) +#define SBBQir(IM, RD) _ALUQir(X86_SBB, IM, RD) +#define SBBQim(IM, MD, MB, MI, MS) _ALUQim(X86_SBB, IM, MD, MB, MI, MS) + +#define SUBQrr(RS, RD) _ALUQrr(X86_SUB, RS, RD) +#define SUBQmr(MD, MB, MI, MS, RD) _ALUQmr(X86_SUB, MD, MB, MI, MS, RD) +#define SUBQrm(RS, MD, MB, MI, MS) _ALUQrm(X86_SUB, RS, MD, MB, MI, MS) +#define SUBQir(IM, RD) _ALUQir(X86_SUB, IM, RD) +#define SUBQim(IM, MD, MB, MI, MS) _ALUQim(X86_SUB, IM, MD, MB, MI, MS) + +#define XORQrr(RS, RD) _ALUQrr(X86_XOR, RS, RD) +#define XORQmr(MD, MB, MI, MS, RD) _ALUQmr(X86_XOR, MD, MB, MI, MS, RD) +#define XORQrm(RS, MD, MB, MI, MS) _ALUQrm(X86_XOR, RS, MD, MB, MI, MS) +#define XORQir(IM, RD) _ALUQir(X86_XOR, IM, RD) +#define XORQim(IM, MD, MB, MI, MS) _ALUQim(X86_XOR, IM, MD, MB, MI, MS) + +#define ROLQir(IM, RD) _ROTSHIQir(X86_ROL, IM, RD) +#define ROLQim(IM, MD, MB, MI, MS) _ROTSHIQim(X86_ROL, IM, MD, MB, MI, MS) +#define ROLQrr(RS, RD) _ROTSHIQrr(X86_ROL, RS, RD) +#define ROLQrm(RS, MD, MB, MI, MS) _ROTSHIQrm(X86_ROL, RS, MD, MB, MI, MS) + +#define RORQir(IM, RD) _ROTSHIQir(X86_ROR, IM, RD) +#define RORQim(IM, MD, MB, MI, MS) _ROTSHIQim(X86_ROR, IM, MD, MB, MI, MS) +#define RORQrr(RS, RD) _ROTSHIQrr(X86_ROR, RS, RD) +#define RORQrm(RS, MD, MB, MI, MS) _ROTSHIQrm(X86_ROR, RS, MD, MB, MI, MS) + +#define RCLQir(IM, RD) _ROTSHIQir(X86_RCL, IM, RD) +#define RCLQim(IM, MD, MB, MI, MS) _ROTSHIQim(X86_RCL, IM, MD, MB, MI, MS) +#define RCLQrr(RS, RD) _ROTSHIQrr(X86_RCL, RS, RD) +#define RCLQrm(RS, MD, MB, MI, MS) _ROTSHIQrm(X86_RCL, RS, MD, MB, MI, MS) + +#define RCRQir(IM, RD) _ROTSHIQir(X86_RCR, IM, RD) +#define RCRQim(IM, MD, MB, MI, MS) _ROTSHIQim(X86_RCR, IM, MD, MB, MI, MS) +#define RCRQrr(RS, RD) _ROTSHIQrr(X86_RCR, RS, RD) +#define RCRQrm(RS, MD, MB, MI, MS) _ROTSHIQrm(X86_RCR, RS, MD, MB, MI, MS) + +#define SHLQir(IM, RD) _ROTSHIQir(X86_SHL, IM, RD) +#define SHLQim(IM, MD, MB, MI, MS) _ROTSHIQim(X86_SHL, IM, MD, MB, MI, MS) +#define SHLQrr(RS, RD) _ROTSHIQrr(X86_SHL, RS, RD) +#define SHLQrm(RS, MD, MB, MI, MS) _ROTSHIQrm(X86_SHL, RS, MD, MB, MI, MS) + +#define SHRQir(IM, RD) _ROTSHIQir(X86_SHR, IM, RD) +#define SHRQim(IM, MD, MB, MI, MS) _ROTSHIQim(X86_SHR, IM, MD, MB, MI, MS) +#define SHRQrr(RS, RD) _ROTSHIQrr(X86_SHR, RS, RD) +#define SHRQrm(RS, MD, MB, MI, MS) _ROTSHIQrm(X86_SHR, RS, MD, MB, MI, MS) + +#define SALQir SHLQir +#define SALQim SHLQim +#define SALQrr SHLQrr +#define SALQrm SHLQrm + +#define SARQir(IM, RD) _ROTSHIQir(X86_SAR, IM, RD) +#define SARQim(IM, MD, MB, MI, MS) _ROTSHIQim(X86_SAR, IM, MD, MB, MI, MS) +#define SARQrr(RS, RD) _ROTSHIQrr(X86_SAR, RS, RD) +#define SARQrm(RS, MD, MB, MI, MS) _ROTSHIQrm(X86_SAR, RS, MD, MB, MI, MS) + +#define BTQir(IM, RD) _BTQir(X86_BT, IM, RD) +#define BTQim(IM, MD, MB, MI, MS) _BTQim(X86_BT, IM, MD, MB, MI, MS) +#define BTQrr(RS, RD) _BTQrr(X86_BT, RS, RD) +#define BTQrm(RS, MD, MB, MI, MS) _BTQrm(X86_BT, RS, MD, MB, MI, MS) + +#define BTCQir(IM, RD) _BTQir(X86_BTC, IM, RD) +#define BTCQim(IM, MD, MB, MI, MS) _BTQim(X86_BTC, IM, MD, MB, MI, MS) +#define BTCQrr(RS, RD) _BTQrr(X86_BTC, RS, RD) +#define BTCQrm(RS, MD, MB, MI, MS) _BTQrm(X86_BTC, RS, MD, MB, MI, MS) + +#define BTRQir(IM, RD) _BTQir(X86_BTR, IM, RD) +#define BTRQim(IM, MD, MB, MI, MS) _BTQim(X86_BTR, IM, MD, MB, MI, MS) +#define BTRQrr(RS, RD) _BTQrr(X86_BTR, RS, RD) +#define BTRQrm(RS, MD, MB, MI, MS) _BTQrm(X86_BTR, RS, MD, MB, MI, MS) + +#define BTSQir(IM, RD) _BTQir(X86_BTS, IM, RD) +#define BTSQim(IM, MD, MB, MI, MS) _BTQim(X86_BTS, IM, MD, MB, MI, MS) +#define BTSQrr(RS, RD) _BTQrr(X86_BTS, RS, RD) +#define BTSQrm(RS, MD, MB, MI, MS) _BTQrm(X86_BTS, RS, MD, MB, MI, MS) + +#define LEAQmr(MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _O_r_X (0x8d ,_r8(RD) ,MD,MB,MI,MS )) + +#define MOVQrr(RS, RD) (_REXQrr(RS, RD), _O_Mrm (0x89 ,_b11,_r8(RS),_r8(RD) )) +#define MOVQmr(MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _O_r_X (0x8b ,_r8(RD) ,MD,MB,MI,MS )) +#define MOVQrm(RS, MD, MB, MI, MS) (_REXQrm(RS, MB, MI), _O_r_X (0x89 ,_r8(RS) ,MD,MB,MI,MS )) +#define MOVQir(IM, R) (_REXQrr(0, R), _Or_Q (0xb8,_r8(R) ,IM )) +#define MOVQim(IM, MD, MB, MI, MS) (_REXQrm(0, MB, MI), _O_X_L (0xc7 ,MD,MB,MI,MS ,IM )) + +#define NOTQr(RS) _UNARYQr(X86_NOT, RS) +#define NOTQm(MD, MB, MI, MS) _UNARYQm(X86_NOT, MD, MB, MI, MS) + +#define NEGQr(RS) _UNARYQr(X86_NEG, RS) +#define NEGQm(MD, MB, MI, MS) _UNARYQm(X86_NEG, MD, MB, MI, MS) + +#define MULQr(RS) _UNARYQr(X86_MUL, RS) +#define MULQm(MD, MB, MI, MS) _UNARYQm(X86_MUL, MD, MB, MI, MS) + +#define IMULQr(RS) _UNARYQr(X86_IMUL, RS) +#define IMULQm(MD, MB, MI, MS) _UNARYQm(X86_IMUL, MD, MB, MI, MS) + +#define DIVQr(RS) _UNARYQr(X86_DIV, RS) +#define DIVQm(MD, MB, MI, MS) _UNARYQm(X86_DIV, MD, MB, MI, MS) + +#define IDIVQr(RS) _UNARYQr(X86_IDIV, RS) +#define IDIVQm(MD, MB, MI, MS) _UNARYQm(X86_IDIV, MD, MB, MI, MS) + +#define IMULQir(IM, RD) IMULQirr(IM, RD, RD) +#define IMULQrr(RS, RD) (_REXQrr(RD, RS), _OO_Mrm (0x0faf ,_b11,_r8(RD),_r8(RS) )) +#define IMULQmr(MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _OO_r_X (0x0faf ,_r8(RD) ,MD,MB,MI,MS )) +#define IMULQirr(IM,RS,RD) (_REXQrr(RD, RS), _Os_Mrm_sL (0x69 ,_b11,_r8(RS),_r8(RD) ,IM )) +#define IMULQimr(IM,MD,MB,MI,MS,RD) (_REXQmr(MB, MI, RD), _Os_r_X_sL (0x69 ,_r8(RD) ,MD,MB,MI,MS ,IM )) + +#define CALLQsr(R) (_REXQrr(0, R), _O_Mrm (0xff ,_b11,_b010,_r8(R) )) +#define JMPQsr(R) (_REXQrr(0, R), _O_Mrm (0xff ,_b11,_b100,_r8(R) )) + +#define CMOVQrr(CC,RS,RD) (_REXQrr(RD, RS), _OO_Mrm (0x0f40|(CC) ,_b11,_r8(RD),_r8(RS) )) +#define CMOVQmr(CC,MD,MB,MI,MS,RD) (_REXQmr(MB, MI, RD), _OO_r_X (0x0f40|(CC) ,_r8(RD) ,MD,MB,MI,MS )) + +#define POPQr(RD) _m64only((_REXQr(RD), _Or (0x58,_r8(RD) ))) +#define POPQm(MD, MB, MI, MS) _m64only((_REXQm(MB, MI), _O_r_X (0x8f ,_b000 ,MD,MB,MI,MS ))) + +#define PUSHQr(RS) _m64only((_REXQr(RS), _Or (0x50,_r8(RS) ))) +#define PUSHQm(MD, MB, MI, MS) _m64only((_REXQm(MB, MI), _O_r_X (0xff ,_b110 ,MD,MB,MI,MS ))) +#define PUSHQi(IM) _m64only( _Os_sL (0x68 ,IM )) + +#define TESTQrr(RS, RD) (_REXQrr(RS, RD), _O_Mrm (0x85 ,_b11,_r8(RS),_r8(RD) )) +#define TESTQrm(RS, MD, MB, MI, MS) (_REXQrm(RS, MB, MI), _O_r_X (0x85 ,_r8(RS) ,MD,MB,MI,MS )) +#define TESTQir(IM, RD) \ + /* Immediate fits in 32 bits? */ \ + (_s32P((long)(IM)) \ + /* Yes. Immediate does not fit in 8 bits and reg is %rax? */ \ + ? (!_s8P(IM) && (RD) == _RAX \ + ? (_REXQrr(0, RD), _O_L(0xa9, IM)) \ + : (_REXQrr(0, RD), _O_Mrm_L(0xf7, _b11, _b000, _r8(RD), IM))) \ + /* No. Need immediate in a register */ \ + : (MOVQir(IM, JIT_REXTMP), TESTQrr(JIT_REXTMP, RD))) +#define TESTQim(IM, MD, MB, MI, MS) (_REXQrm(0, MB, MI), _O_r_X_L (0xf7 ,_b000 ,MD,MB,MI,MS ,IM )) + +#define CMPXCHGQrr(RS, RD) (_REXQrr(RS, RD), _OO_Mrm (0x0fb1 ,_b11,_r8(RS),_r8(RD) )) +#define CMPXCHGQrm(RS, MD, MB, MI, MS) (_REXQrm(RS, MB, MI), _OO_r_X (0x0fb1 ,_r8(RS) ,MD,MB,MI,MS )) + +#define XADDQrr(RS, RD) (_REXQrr(RS, RD), _OO_Mrm (0x0fc1 ,_b11,_r8(RS),_r8(RD) )) +#define XADDQrm(RS, MD, MB, MI, MS) (_REXQrm(RS, MB, MI), _OO_r_X (0x0fc1 ,_r8(RS) ,MD,MB,MI,MS )) + +#define XCHGQrr(RS, RD) (_REXQrr(RS, RD), _O_Mrm (0x87 ,_b11,_r8(RS),_r8(RD) )) +#define XCHGQrm(RS, MD, MB, MI, MS) (_REXQrm(RS, MB, MI), _O_r_X (0x87 ,_r8(RS) ,MD,MB,MI,MS )) + +#define DECQm(MD, MB, MI, MS) (_REXQrm(0, MB, MI), _O_r_X (0xff ,_b001 ,MD,MB,MI,MS )) +#define DECQr(RD) (_REXQrr(0, RD), _O_Mrm (0xff ,_b11,_b001 ,_r8(RD) )) +#define INCQm(MD, MB, MI, MS) (_REXQrm(0, MB, MI), _O_r_X (0xff ,_b000 ,MD,MB,MI,MS )) +#define INCQr(RD) (_REXQrr(0, RD), _O_Mrm (0xff ,_b11,_b000 ,_r8(RD) )) + +#define BSFQrr(RS, RD) (_REXQrr(RD, RS), _OO_Mrm (0x0fbc ,_b11,_r8(RD),_r8(RS) )) +#define BSFQmr(MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _OO_r_X (0x0fbc ,_r8(RD) ,MD,MB,MI,MS )) + +#define BSRQrr(RS, RD) (_REXQrr(RD, RS), _OO_Mrm (0x0fbd ,_b11,_r8(RD),_r8(RS) )) +#define BSRQmr(MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _OO_r_X (0x0fbd ,_r8(RD) ,MD,MB,MI,MS )) + +#define MOVSBQrr(RS, RD) (_REXQrr(RD, RS), _OO_Mrm (0x0fbe ,_b11,_r8(RD),_r1(RS) )) +#define MOVSBQmr(MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _OO_r_X (0x0fbe ,_r8(RD) ,MD,MB,MI,MS )) + +#define MOVZBQrr(RS, RD) (_REXQrr(RD, RS), _OO_Mrm (0x0fb6 ,_b11,_r8(RD),_r1(RS) )) +#define MOVZBQmr(MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _OO_r_X (0x0fb6 ,_r8(RD) ,MD,MB,MI,MS )) + +#define MOVSWQrr(RS, RD) (_REXQrr(RD, RS), _OO_Mrm (0x0fbf ,_b11,_r8(RD),_r2(RS) )) +#define MOVSWQmr(MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _OO_r_X (0x0fbf ,_r8(RD) ,MD,MB,MI,MS )) + +#define MOVZWQrr(RS, RD) (_REXQrr(RD, RS), _OO_Mrm (0x0fb7 ,_b11,_r8(RD),_r2(RS) )) +#define MOVZWQmr(MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _OO_r_X (0x0fb7 ,_r8(RD) ,MD,MB,MI,MS )) + +#define MOVSLQrr(RS, RD) _m64only((_REXQrr(RD, RS), _O_Mrm (0x63 ,_b11,_r8(RD),_r4(RS) ))) +#define MOVSLQmr(MD, MB, MI, MS, RD) _m64only((_REXQmr(MB, MI, RD), _O_r_X (0x63 ,_r8(RD) ,MD,MB,MI,MS ))) + +#define BSWAPQr(R) (_REXQrr(0, R), _OOr (0x0fc8,_r8(R) )) + + + +#define __SSEQrr(OP,RS,RSA,RD,RDA) (_REXQrr(RD, RS), _OO_Mrm (0x0f00|(OP) ,_b11,RDA(RD),RSA(RS) )) +#define __SSEQmr(OP,MD,MB,MI,MS,RD,RDA) (_REXQmr(MB, MI, RD), _OO_r_X (0x0f00|(OP) ,RDA(RD) ,MD,MB,MI,MS )) +#define __SSEQrm(OP,RS,RSA,MD,MB,MI,MS) (_REXQrm(RS, MB, MI), _OO_r_X (0x0f00|(OP) ,RSA(RS) ,MD,MB,MI,MS )) +#define __SSEQ1rm(OP,RS,RSA,MD,MB,MI,MS) (_REXQrm(RS, MB, MI), _OO_r_X (0x0f01|(OP) ,RSA(RS) ,MD,MB,MI,MS )) + +#define _SSEQrr(PX,OP,RS,RSA,RD,RDA) (_jit_B(PX), __SSEQrr(OP, RS, RSA, RD, RDA)) +#define _SSEQmr(PX,OP,MD,MB,MI,MS,RD,RDA) (_jit_B(PX), __SSEQmr(OP, MD, MB, MI, MS, RD, RDA)) +#define _SSEQrm(PX,OP,RS,RSA,MD,MB,MI,MS) (_jit_B(PX), __SSEQrm(OP, RS, RSA, MD, MB, MI, MS)) +#define _SSEQ1rm(PX,OP,RS,RSA,MD,MB,MI,MS) (_jit_B(PX), __SSEQ1rm(OP, RS, RSA, MD, MB, MI, MS)) + +#define CVTTSS2SIQrr(RS, RD) _SSEQrr(0xf3, X86_SSE_CVTTSI, RS,_rX, RD,_r8) +#define CVTTSS2SIQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf3, X86_SSE_CVTTSI, MD, MB, MI, MS, RD,_r8) +#define CVTTSD2SIQrr(RS, RD) _SSEQrr(0xf2, X86_SSE_CVTTSI, RS,_rX, RD,_r8) +#define CVTTSD2SIQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf2, X86_SSE_CVTTSI, MD, MB, MI, MS, RD,_r8) + +#define CVTSS2SIQrr(RS, RD) _SSEQrr(0xf3, X86_SSE_CVTSI, RS,_rX, RD,_r8) +#define CVTSS2SIQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf3, X86_SSE_CVTSI, MD, MB, MI, MS, RD,_r8) +#define CVTSD2SIQrr(RS, RD) _SSEQrr(0xf2, X86_SSE_CVTSI, RS,_rX, RD,_r8) +#define CVTSD2SIQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf2, X86_SSE_CVTSI, MD, MB, MI, MS, RD,_r8) + +#define CVTSI2SSQrr(RS, RD) _SSEQrr(0xf3, X86_SSE_CVTIS, RS,_r8, RD,_rX) +#define CVTSI2SSQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf3, X86_SSE_CVTIS, MD, MB, MI, MS, RD,_rX) +#define CVTSI2SDQrr(RS, RD) _SSEQrr(0xf2, X86_SSE_CVTIS, RS,_r8, RD,_rX) +#define CVTSI2SDQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf2, X86_SSE_CVTIS, MD, MB, MI, MS, RD,_rX) + +#define MOVDQXrr(RS, RD) _SSEQrr(0x66, 0x6e, RS,_r8, RD,_rX) +#define MOVDQXmr(MD, MB, MI, MS, RD) _SSEQmr(0x66, 0x6e, MD, MB, MI, MS, RD,_rX) + +#define MOVDXQrr(RS, RD) _SSEQrr(0x66, 0x7e, RS,_rX, RD,_r8) +#define MOVDXQrm(RS, MD, MB, MI, MS) _SSEQrm(0x66, 0x7e, RS,_rX, MD, MB, MI, MS) +#define MOVDQMrr(RS, RD) __SSEQrr( 0x6e, RS,_r8, RD,_rM) +#define MOVDQMmr(MD, MB, MI, MS, RD) __SSEQmr( 0x6e, MD, MB, MI, MS, RD,_rM) +#define MOVDMQrr(RS, RD) __SSEQrr( 0x7e, RS,_rM, RD,_r8) +#define MOVDMQrm(RS, MD, MB, MI, MS) __SSEQrm( 0x7e, RS,_rM, MD, MB, MI, MS) + + + +#define CALLsr(R) CALLQsr(R) +#define JMPsr(R) JMPQsr(R) + +#endif +#endif /* __lightning_asm_h */ diff --git a/src/runtime/c/pgf/lightning/i386/asm.h b/src/runtime/c/pgf/lightning/i386/asm.h index fcc364c05..3d1a1a19b 100644 --- a/src/runtime/c/pgf/lightning/i386/asm.h +++ b/src/runtime/c/pgf/lightning/i386/asm.h @@ -8,12 +8,14 @@ /*********************************************************************** * * Copyright 1999, 2000, 2001, 2002 Ian Piumarta + * Copyright 2003 Gwenole Beauchesne + * Copyright 2006 Free Software Foundation * * This file is part of GNU lightning. * * GNU lightning is free software; you can redistribute it and/or modify it * under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; either version 2.1, or (at your option) + * by the Free Software Foundation; either version 3, or (at your option) * any later version. * * GNU lightning is distributed in the hope that it will be useful, but @@ -23,16 +25,16 @@ * * You should have received a copy of the GNU Lesser General Public License * along with GNU lightning; see the file COPYING.LESSER; if not, write to the - * Free Software Foundation, 59 Temple Place - Suite 330, Boston, - * MA 02111-1307, USA. + * Free Software Foundation, 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. * ***********************************************************************/ -#ifndef __lightning_asm_h -#define __lightning_asm_h +#ifndef __lightning_asm_i386_h +#define __lightning_asm_i386_h /* OPCODE + i = immediate operand * + r = register operand @@ -40,7 +42,6 @@ * + sr/sm = a star preceding a register or memory */ - typedef _uc jit_insn; #ifndef LIGHTNING_DEBUG @@ -60,24 +61,21 @@ typedef _uc jit_insn; /*** REGISTERS ***/ /* [size,,number] */ +#define _NOREG 0 #define _AL 0x10 #define _CL 0x11 #define _DL 0x12 #define _BL 0x13 -#define _AH 0x14 -#define _CH 0x15 -#define _DH 0x16 -#define _BH 0x17 - -#define _AX 0x20 -#define _CX 0x21 -#define _DX 0x22 -#define _BX 0x23 -#define _SP 0x24 -#define _BP 0x25 -#define _SI 0x26 -#define _DI 0x27 + +#define _AX 0x30 +#define _CX 0x31 +#define _DX 0x32 +#define _BX 0x33 +#define _SP 0x34 +#define _BP 0x35 +#define _SI 0x36 +#define _DI 0x37 #define _EAX 0x40 #define _ECX 0x41 @@ -88,6 +86,32 @@ typedef _uc jit_insn; #define _ESI 0x46 #define _EDI 0x47 +#define _MM0 0x60 +#define _MM1 0x61 +#define _MM2 0x62 +#define _MM3 0x63 +#define _MM4 0x64 +#define _MM5 0x65 +#define _MM6 0x66 +#define _MM7 0x67 + +#define _XMM0 0x70 +#define _XMM1 0x71 +#define _XMM2 0x72 +#define _XMM3 0x73 +#define _XMM4 0x74 +#define _XMM5 0x75 +#define _XMM6 0x76 +#define _XMM7 0x77 +#define _XMM8 0x78 +#define _XMM9 0x79 +#define _XMM10 0x7a +#define _XMM11 0x7b +#define _XMM12 0x7c +#define _XMM13 0x7d +#define _XMM14 0x7e +#define _XMM15 0x7f + #define _ST0 0 #define _ST1 1 #define _ST2 2 @@ -97,19 +121,18 @@ typedef _uc jit_insn; #define _ST6 6 #define _ST7 7 -#define _rS(R) ((R)>>4) -#define _rN(R) ((R)&0x7) -#define _r0P(R) ((R)==0) +#define _r0P(R) ((int)(R) == (int)_NOREG) +#define _rIP(R) ((int)(R) == (int)_RIP) -#ifndef _ASM_SAFETY -#define _r1(R) _rN(R) -#define _r2(R) _rN(R) -#define _r4(R) _rN(R) -#else -#define _r1(R) ((_rS(R)==1) ? _rN(R) : JITFAIL( "8-bit register required")) -#define _r2(R) ((_rS(R)==2) ? _rN(R) : JITFAIL("16-bit register required")) -#define _r4(R) ((_rS(R)==4) ? _rN(R) : JITFAIL("32-bit register required")) -#endif +#define _rC(R) ((R) & 0xf0) +#define _rR(R) ((R) & 0x0f) +#define _rN(R) ((R) & 0x07) +#define _rXP(R) ((R) > 0 && _rR(R) > 7) + +#define _rbpP(R) (_rR(R) == _rR(_EBP)) +#define _rspP(R) (_rR(R) == _rR(_ESP)) +#define _rbp13P(R) (_rN(R) == _rN(_EBP)) +#define _rsp12P(R) (_rN(R) == _rN(_ESP)) /*** ASSEMBLER ***/ @@ -117,7 +140,7 @@ typedef _uc jit_insn; #define _CKD8(D) _ck_d(8, ((_uc) _OFF4(D)) ) #define _D8(D) (_jit_B(0), ((*(_PUC(_jit.x.pc)-1))= _CKD8(D))) -#define _D32(D) (_jit_L(0), ((*(_PUL(_jit.x.pc)-1))= _OFF4(D))) +#define _D32(D) (_jit_I(0), ((*(_PUI(_jit.x.pc)-1))= _OFF4(D))) #ifndef _ASM_SAFETY # define _M(M) (M) @@ -147,27 +170,23 @@ typedef _uc jit_insn; /* memory subformats - urgh! */ -#define _r_D( R, D ) (_Mrm(_b00,_rN(R),_b101 ) ,_jit_L((long)(D))) -#define _r_0B( R, B ) (_Mrm(_b00,_rN(R),_r4(B)) ) -#define _r_0BIS(R, B,I,S) (_Mrm(_b00,_rN(R),_b100 ),_SIB(_SCL(S),_r4(I),_r4(B)) ) -#define _r_1B( R, D,B ) (_Mrm(_b01,_rN(R),_r4(B)) ,_jit_B((long)(D))) -#define _r_1BIS(R, D,B,I,S) (_Mrm(_b01,_rN(R),_b100 ),_SIB(_SCL(S),_r4(I),_r4(B)),_jit_B((long)(D))) -#define _r_4B( R, D,B ) (_Mrm(_b10,_rN(R),_r4(B)) ,_jit_L((long)(D))) -#define _r_4IS( R, D,I,S) (_Mrm(_b00,_rN(R),_b100 ),_SIB(_SCL(S),_r4(I),_b101 ),_jit_L((long)(D))) -#define _r_4BIS(R, D,B,I,S) (_Mrm(_b10,_rN(R),_b100 ),_SIB(_SCL(S),_r4(I),_r4(B)),_jit_L((long)(D))) +/* _r_D() is RIP addressing mode if X86_TARGET_64BIT, use _r_DSIB() instead */ +#define _r_D( R, D ) (_Mrm(_b00,_rN(R),_b101 ) ,_jit_I((long)(D))) +#define _r_DSIB(R, D ) (_Mrm(_b00,_rN(R),_b100 ),_SIB(_SCL(1),_b100 ,_b101 ),_jit_I((long)(D))) +#define _r_0B( R, B ) (_Mrm(_b00,_rN(R),_rA(B)) ) +#define _r_0BIS(R, B,I,S) (_Mrm(_b00,_rN(R),_b100 ),_SIB(_SCL(S),_rA(I),_rA(B)) ) +#define _r_1B( R, D,B ) (_Mrm(_b01,_rN(R),_rA(B)) ,_jit_B((long)(D))) +#define _r_1BIS(R, D,B,I,S) (_Mrm(_b01,_rN(R),_b100 ),_SIB(_SCL(S),_rA(I),_rA(B)),_jit_B((long)(D))) +#define _r_4B( R, D,B ) (_Mrm(_b10,_rN(R),_rA(B)) ,_jit_I((long)(D))) +#define _r_4IS( R, D,I,S) (_Mrm(_b00,_rN(R),_b100 ),_SIB(_SCL(S),_rA(I),_b101 ),_jit_I((long)(D))) +#define _r_4BIS(R, D,B,I,S) (_Mrm(_b10,_rN(R),_b100 ),_SIB(_SCL(S),_rA(I),_rA(B)),_jit_I((long)(D))) -#define _r_DB( R, D,B ) ((_s0P(D) && (B != _EBP) ? _r_0B (R, B ) : (_s8P(D) ? _r_1B( R,D,B ) : _r_4B( R,D,B )))) -#define _r_DBIS(R, D,B,I,S) ((_s0P(D) ? _r_0BIS(R, B,I,S) : (_s8P(D) ? _r_1BIS(R,D,B,I,S) : _r_4BIS(R,D,B,I,S)))) - -#define _r_X( R, D,B,I,S) (_r0P(I) ? (_r0P(B) ? _r_D (R,D ) : \ - (_ESP==(B) ? _r_DBIS(R,D,_ESP,_ESP,1) : \ - _r_DB (R,D, B ))) : \ - (_r0P(B) ? _r_4IS (R,D, I,S) : \ - (((I)!=_ESP) ? _r_DBIS(R,D, B, I,S) : \ - JITFAIL("illegal index register: %esp")))) +#define _r_DB( R, D,B ) ((_s0P(D) && (!_rbp13P(B)) ? _r_0B (R, B ) : (_s8P(D) ? _r_1B( R,D,B ) : _r_4B( R,D,B )))) +#define _r_DBIS(R, D,B,I,S) ((_s0P(D) && (!_rbp13P(B)) ? _r_0BIS(R, B,I,S) : (_s8P(D) ? _r_1BIS(R,D,B,I,S) : _r_4BIS(R,D,B,I,S)))) -/* instruction formats */ + +/* --- Instruction formats ------------------------------------------------- */ /* _format Opcd ModR/M dN(rB,rI,Sc) imm... */ @@ -178,8 +197,12 @@ typedef _uc jit_insn; #define _OOr( OP,R ) ( _jit_B((OP)>>8), _jit_B( (OP)|_r(R)) ) #define _Os( OP,B ) ( _s8P(B) ? _jit_B(((OP)|_b10)) : _jit_B(OP) ) #define _sW( W ) ( _s8P(W) ? _jit_B(W):_jit_W(W) ) -#define _sL( L ) ( _s8P(L) ? _jit_B(L):_jit_L(L) ) +#define _sL( L ) ( _s8P(L) ? _jit_B(L):_jit_I(L) ) +#define _sWO( W ) ( _s8P(W) ? 1 : 2 ) +#define _sLO( L ) ( _s8P(L) ? 1 : 4 ) +#define _O_B( OP ,B ) ( _O ( OP ) ,_jit_B(B) ) #define _O_W( OP ,W ) ( _O ( OP ) ,_jit_W(W) ) +#define _O_L( OP ,L ) ( _O ( OP ) ,_jit_I(L) ) #define _O_D8( OP ,D ) ( _O ( OP ) ,_D8(D) ) #define _O_D32( OP ,D ) ( _O ( OP ) ,_D32(D) ) #define _OO_D32( OP ,D ) ( _OO ( OP ) ,_D32(D) ) @@ -188,802 +211,1036 @@ typedef _uc jit_insn; #define _O_W_B( OP ,W,B) ( _O ( OP ) ,_jit_W(W),_jit_B(B)) #define _Or_B( OP,R ,B ) ( _Or ( OP,R) ,_jit_B(B) ) #define _Or_W( OP,R ,W ) ( _Or ( OP,R) ,_jit_W(W) ) -#define _Or_L( OP,R ,L ) ( _Or ( OP,R) ,_jit_L(L) ) +#define _Or_L( OP,R ,L ) ( _Or ( OP,R) ,_jit_I(L) ) +#define _Or_Q( OP,R ,Q ) ( _Or ( OP,R) ,_jit_L(Q) ) #define _O_Mrm( OP ,MO,R,M ) ( _O ( OP ),_Mrm(MO,R,M ) ) #define _OO_Mrm( OP ,MO,R,M ) ( _OO ( OP ),_Mrm(MO,R,M ) ) #define _O_Mrm_B( OP ,MO,R,M ,B ) ( _O ( OP ),_Mrm(MO,R,M ) ,_jit_B(B) ) #define _O_Mrm_W( OP ,MO,R,M ,W ) ( _O ( OP ),_Mrm(MO,R,M ) ,_jit_W(W) ) -#define _O_Mrm_L( OP ,MO,R,M ,L ) ( _O ( OP ),_Mrm(MO,R,M ) ,_jit_L(L) ) +#define _O_Mrm_L( OP ,MO,R,M ,L ) ( _O ( OP ),_Mrm(MO,R,M ) ,_jit_I(L) ) #define _OO_Mrm_B( OP ,MO,R,M ,B ) ( _OO ( OP ),_Mrm(MO,R,M ) ,_jit_B(B) ) #define _Os_Mrm_sW(OP ,MO,R,M ,W ) ( _Os ( OP,W),_Mrm(MO,R,M ),_sW(W) ) #define _Os_Mrm_sL(OP ,MO,R,M ,L ) ( _Os ( OP,L),_Mrm(MO,R,M ),_sL(L) ) -#define _O_r_X( OP ,R ,MD,MB,MI,MS ) ( _O ( OP ),_r_X( R ,MD,MB,MI,MS) ) -#define _OO_r_X( OP ,R ,MD,MB,MI,MS ) ( _OO ( OP ),_r_X( R ,MD,MB,MI,MS) ) -#define _O_r_X_B( OP ,R ,MD,MB,MI,MS,B ) ( _O ( OP ),_r_X( R ,MD,MB,MI,MS) ,_jit_B(B) ) -#define _O_r_X_W( OP ,R ,MD,MB,MI,MS,W ) ( _O ( OP ),_r_X( R ,MD,MB,MI,MS) ,_jit_W(W) ) -#define _O_r_X_L( OP ,R ,MD,MB,MI,MS,L ) ( _O ( OP ),_r_X( R ,MD,MB,MI,MS) ,_jit_L(L) ) -#define _OO_r_X_B( OP ,R ,MD,MB,MI,MS,B ) ( _OO ( OP ),_r_X( R ,MD,MB,MI,MS) ,_jit_B(B) ) -#define _Os_r_X_sW(OP ,R ,MD,MB,MI,MS,W ) ( _Os ( OP,W),_r_X( R ,MD,MB,MI,MS),_sW(W) ) -#define _Os_r_X_sL(OP ,R ,MD,MB,MI,MS,L ) ( _Os ( OP,L),_r_X( R ,MD,MB,MI,MS),_sL(L) ) +#define _O_r_X( OP ,R ,MD,MB,MI,MS ) ( _O ( OP ),_r_X( R ,MD,MB,MI,MS,0) ) +#define _OO_r_X( OP ,R ,MD,MB,MI,MS ) ( _OO ( OP ),_r_X( R ,MD,MB,MI,MS,0) ) +#define _O_r_X_B( OP ,R ,MD,MB,MI,MS,B ) ( _O ( OP ),_r_X( R ,MD,MB,MI,MS,1) ,_jit_B(B) ) +#define _O_r_X_W( OP ,R ,MD,MB,MI,MS,W ) ( _O ( OP ),_r_X( R ,MD,MB,MI,MS,2) ,_jit_W(W) ) +#define _O_r_X_L( OP ,R ,MD,MB,MI,MS,L ) ( _O ( OP ),_r_X( R ,MD,MB,MI,MS,4) ,_jit_I(L) ) +#define _OO_r_X_B( OP ,R ,MD,MB,MI,MS,B ) ( _OO ( OP ),_r_X( R ,MD,MB,MI,MS,1) ,_jit_B(B) ) +#define _Os_r_X_sW(OP ,R ,MD,MB,MI,MS,W ) ( _Os ( OP,W),_r_X( R ,MD,MB,MI,MS,_sWO(W)),_sW(W)) +#define _Os_r_X_sL(OP ,R ,MD,MB,MI,MS,L ) ( _Os ( OP,L),_r_X( R ,MD,MB,MI,MS,_sLO(L)),_sL(L)) #define _O_X_B( OP ,MD,MB,MI,MS,B ) ( _O_r_X_B( OP ,0 ,MD,MB,MI,MS ,B) ) #define _O_X_W( OP ,MD,MB,MI,MS,W ) ( _O_r_X_W( OP ,0 ,MD,MB,MI,MS ,W) ) #define _O_X_L( OP ,MD,MB,MI,MS,L ) ( _O_r_X_L( OP ,0 ,MD,MB,MI,MS ,L) ) -#define _wO( OP ) (_d16(), _O( OP ) ) -#define _wOr( OP,R ) (_d16(), _Or( OP,R ) ) -#define _wOr_W( OP,R ,W ) (_d16(), _Or_W( OP,R ,W) ) -#define _wOs_sW( OP ,W ) (_d16(), _Os_sW( OP ,W) ) -#define _wO_Mrm( OP ,MO,R,M ) (_d16(), _O_Mrm( OP ,MO,R,M ) ) -#define _wOO_Mrm( OP ,MO,R,M ) (_d16(),_OO_Mrm( OP ,MO,R,M ) ) -#define _wO_Mrm_B( OP ,MO,R,M ,B ) (_d16(), _O_Mrm_B( OP ,MO,R,M ,B) ) -#define _wOO_Mrm_B( OP ,MO,R,M ,B ) (_d16(),_OO_Mrm_B( OP ,MO,R,M ,B) ) -#define _wO_Mrm_W( OP ,MO,R,M ,W ) (_d16(), _O_Mrm_W( OP ,MO,R,M ,W) ) -#define _wOs_Mrm_sW(OP ,MO,R,M ,W ) (_d16(), _Os_Mrm_sW(OP ,MO,R,M ,W) ) -#define _wO_X_W( OP ,MD,MB,MI,MS,W ) (_d16(), _O_X_W( OP ,MD,MB,MI,MS ,W) ) -#define _wO_r_X( OP ,R ,MD,MB,MI,MS ) (_d16(), _O_r_X( OP ,R ,MD,MB,MI,MS ) ) -#define _wOO_r_X( OP ,R ,MD,MB,MI,MS ) (_d16(),_OO_r_X( OP ,R ,MD,MB,MI,MS ) ) -#define _wO_r_X_B( OP ,R ,MD,MB,MI,MS,B ) (_d16(), _O_r_X_B( OP ,R ,MD,MB,MI,MS ,B) ) -#define _wOO_r_X_B( OP ,R ,MD,MB,MI,MS,B ) (_d16(),_OO_r_X_B( OP ,R ,MD,MB,MI,MS ,B) ) -#define _wO_r_X_W( OP ,R ,MD,MB,MI,MS,W ) (_d16(), _O_r_X_W( OP ,R ,MD,MB,MI,MS ,W) ) -#define _wOs_r_X_sW(OP ,R ,MD,MB,MI,MS,W ) (_d16(), _Os_r_X_sW(OP ,R ,MD,MB,MI,MS ,W) ) - -/* +++ fully-qualified intrinsic instructions */ - -/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ - -#define ADCBrr(RS, RD) _O_Mrm (0x10 ,_b11,_r1(RS),_r1(RD) ) -#define ADCBmr(MD, MB, MI, MS, RD) _O_r_X (0x12 ,_r1(RD) ,MD,MB,MI,MS ) -#define ADCBrm(RS, MD, MB, MI, MS) _O_r_X (0x10 ,_r1(RS) ,MD,MB,MI,MS ) -#define ADCBir(IM, RD) _O_Mrm_B (0x80 ,_b11,_b010 ,_r1(RD) ,_su8(IM)) -#define ADCBim(IM, MD, MB, MI, MS) _O_r_X_B (0x80 ,_b010 ,MD,MB,MI,MS ,_su8(IM)) - -#define ADCWrr(RS, RD) _wO_Mrm (0x11 ,_b11,_r2(RS),_r2(RD) ) -#define ADCWmr(MD, MB, MI, MS, RD) _wO_r_X (0x13 ,_r2(RD) ,MD,MB,MI,MS ) -#define ADCWrm(RS, MD, MB, MI, MS) _wO_r_X (0x11 ,_r2(RS) ,MD,MB,MI,MS ) -#define ADCWir(IM, RD) _wOs_Mrm_sW (0x81 ,_b11,_b010 ,_r2(RD) ,_su16(IM)) -#define ADCWim(IM, MD, MB, MI, MS) _wOs_r_X_sW (0x81 ,_b010 ,MD,MB,MI,MS ,_su16(IM)) - -#define ADCLrr(RS, RD) _O_Mrm (0x11 ,_b11,_r4(RS),_r4(RD) ) -#define ADCLmr(MD, MB, MI, MS, RD) _O_r_X (0x13 ,_r4(RD) ,MD,MB,MI,MS ) -#define ADCLrm(RS, MD, MB, MI, MS) _O_r_X (0x11 ,_r4(RS) ,MD,MB,MI,MS ) -#define ADCLir(IM, RD) _Os_Mrm_sL (0x81 ,_b11,_b010 ,_r4(RD) ,IM ) -#define ADCLim(IM, MD, MB, MI, MS) _Os_r_X_sL (0x81 ,_b010 ,MD,MB,MI,MS ,IM ) - - -#define ADDBrr(RS, RD) _O_Mrm (0x00 ,_b11,_r1(RS),_r1(RD) ) -#define ADDBmr(MD, MB, MI, MS, RD) _O_r_X (0x02 ,_r1(RD) ,MD,MB,MI,MS ) -#define ADDBrm(RS, MD, MB, MI, MS) _O_r_X (0x00 ,_r1(RS) ,MD,MB,MI,MS ) -#define ADDBir(IM, RD) _O_Mrm_B (0x80 ,_b11,_b000 ,_r1(RD) ,_su8(IM)) -#define ADDBim(IM, MD, MB, MI, MS) _O_r_X_B (0x80 ,_b000 ,MD,MB,MI,MS ,_su8(IM)) - -#define ADDWrr(RS, RD) _wO_Mrm (0x01 ,_b11,_r2(RS),_r2(RD) ) -#define ADDWmr(MD, MB, MI, MS, RD) _wO_r_X (0x03 ,_r2(RD) ,MD,MB,MI,MS ) -#define ADDWrm(RS, MD, MB, MI, MS) _wO_r_X (0x01 ,_r2(RS) ,MD,MB,MI,MS ) -#define ADDWir(IM, RD) _wOs_Mrm_sW (0x81 ,_b11,_b000 ,_r2(RD) ,_su16(IM)) -#define ADDWim(IM, MD, MB, MI, MS) _wOs_r_X_sW (0x81 ,_b000 ,MD,MB,MI,MS ,_su16(IM)) - -#define ADDLrr(RS, RD) _O_Mrm (0x01 ,_b11,_r4(RS),_r4(RD) ) -#define ADDLmr(MD, MB, MI, MS, RD) _O_r_X (0x03 ,_r4(RD) ,MD,MB,MI,MS ) -#define ADDLrm(RS, MD, MB, MI, MS) _O_r_X (0x01 ,_r4(RS) ,MD,MB,MI,MS ) -#define ADDLir(IM, RD) _Os_Mrm_sL (0x81 ,_b11,_b000 ,_r4(RD) ,IM ) -#define ADDLim(IM, MD, MB, MI, MS) _Os_r_X_sL (0x81 ,_b000 ,MD,MB,MI,MS ,IM ) - - -#define ANDBrr(RS, RD) _O_Mrm (0x20 ,_b11,_r1(RS),_r1(RD) ) -#define ANDBmr(MD, MB, MI, MS, RD) _O_r_X (0x22 ,_r1(RD) ,MD,MB,MI,MS ) -#define ANDBrm(RS, MD, MB, MI, MS) _O_r_X (0x20 ,_r1(RS) ,MD,MB,MI,MS ) -#define ANDBir(IM, RD) _O_Mrm_B (0x80 ,_b11,_b100 ,_r1(RD) ,_su8(IM)) -#define ANDBim(IM, MD, MB, MI, MS) _O_r_X_B (0x80 ,_b100 ,MD,MB,MI,MS ,_su8(IM)) - -#define ANDWrr(RS, RD) _wO_Mrm (0x21 ,_b11,_r2(RS),_r2(RD) ) -#define ANDWmr(MD, MB, MI, MS, RD) _wO_r_X (0x23 ,_r2(RD) ,MD,MB,MI,MS ) -#define ANDWrm(RS, MD, MB, MI, MS) _wO_r_X (0x21 ,_r2(RS) ,MD,MB,MI,MS ) -#define ANDWir(IM, RD) _wOs_Mrm_sW (0x81 ,_b11,_b100 ,_r2(RD) ,_su16(IM)) -#define ANDWim(IM, MD, MB, MI, MS) _wOs_r_X_sW (0x81 ,_b100 ,MD,MB,MI,MS ,_su16(IM)) -#define ANDLrr(RS, RD) _O_Mrm (0x21 ,_b11,_r4(RS),_r4(RD) ) -#define ANDLmr(MD, MB, MI, MS, RD) _O_r_X (0x23 ,_r4(RD) ,MD,MB,MI,MS ) -#define ANDLrm(RS, MD, MB, MI, MS) _O_r_X (0x21 ,_r4(RS) ,MD,MB,MI,MS ) -#define ANDLir(IM, RD) _Os_Mrm_sL (0x81 ,_b11,_b100 ,_r4(RD) ,IM ) -#define ANDLim(IM, MD, MB, MI, MS) _Os_r_X_sL (0x81 ,_b100 ,MD,MB,MI,MS ,IM ) +/* --- REX prefixes -------------------------------------------------------- */ -#define BSWAPLr(R) _OOr (0x0fc8,_r4(R) ) +#define _BIT(X) (!!(X)) +#define _d64(W,R,X,B) (_jit_B(0x40|(W)<<3|(R)<<2|(X)<<1|(B))) -#define BTWir(IM,RD) _wOO_Mrm_B (0x0fba ,_b11,_b100 ,_r2(RD) ,_u8(IM)) -#define BTWim(IM,MD,MB,MI,MS) _wOO_r_X_B (0x0fba ,_b100 ,MD,MB,MI,MS ,_u8(IM)) -#define BTWrr(RS,RD) _wOO_Mrm (0x0fa3 ,_b11,_r2(RS),_r2(RD) ) -#define BTWrm(RS,MD,MB,MI,MS) _wOO_r_X (0x0fa3 ,_r2(RS) ,MD,MB,MI,MS ) +#define __REXwrxb(L,W,R,X,B) ((W|R|X|B) || (L) ? (void)_d64(W,R,X,B) : ((void)0)) +#define __REXwrx_(L,W,R,X,MR) (__REXwrxb(L,W,R,X,_BIT(_rIP(MR)?0:_rXP(MR)))) +#define __REXw_x_(L,W,R,X,MR) (__REXwrx_(L,W,_BIT(_rXP(R)),X,MR)) +#define __REX_reg(RR) (__REXwrxb(0,0,0,00,_BIT(_rXP(RR)))) +#define __REX_mem(MB,MI) (__REXwrxb(0,0,0,_BIT(_rXP(MI)),_BIT(_rXP(MB)))) -#define BTLir(IM,RD) _OO_Mrm_B (0x0fba ,_b11,_b100 ,_r4(RD) ,_u8(IM)) -#define BTLim(IM,MD,MB,MI,MS) _OO_r_X_B (0x0fba ,_b100 ,MD,MB,MI,MS ,_u8(IM)) -#define BTLrr(RS,RD) _OO_Mrm (0x0fa3 ,_b11,_r4(RS),_r4(RD) ) -#define BTLrm(RS,MD,MB,MI,MS) _OO_r_X (0x0fa3 ,_r4(RS) ,MD,MB,MI,MS ) +/* FIXME: can't mix new (SPL,BPL,SIL,DIL) with (AH,BH,CH,DH) */ +#define _REXBrr(RR,MR) _m64(__REXw_x_(_r1e8lP(RR)||_r1e8lP(MR),0,RR,0,MR)) +#define _REXBmr(MB,MI,RD) _m64(__REXw_x_(_r1e8lP(RD)||_r1e8lP(MB),0,RD,_BIT(_rXP(MI)),MB)) +#define _REXBrm(RS,MB,MI) _REXBmr(MB,MI,RS) +#define _REXBLrr(RR,MR) _m64(__REXw_x_(_r1e8lP(MR),0,RR,0,MR)) +#define _REXLrr(RR,MR) _m64(__REXw_x_(0,0,RR,0,MR)) +#define _REXLmr(MB,MI,RD) _m64(__REXw_x_(0,0,RD,_BIT(_rXP(MI)),MB)) +#define _REXLrm(RS,MB,MI) _REXLmr(MB,MI,RS) +#define _REXLr(RR) _m64(__REX_reg(RR)) +#define _REXLm(MB,MI) _m64(__REX_mem(MB,MI)) -#define BTCWir(IM,RD) _wOO_Mrm_B (0x0fba ,_b11,_b111 ,_r2(RD) ,_u8(IM)) -#define BTCWim(IM,MD,MB,MI,MS) _wOO_r_X_B (0x0fba ,_b111 ,MD,MB,MI,MS ,_u8(IM)) -#define BTCWrr(RS,RD) _wOO_Mrm (0x0fbb ,_b11,_r2(RS),_r2(RD) ) -#define BTCWrm(RS,MD,MB,MI,MS) _wOO_r_X (0x0fbb ,_r2(RS) ,MD,MB,MI,MS ) +#define _REXQrr(RR,MR) _m64only(__REXw_x_(0,1,RR,0,MR)) +#define _REXQmr(MB,MI,RD) _m64only(__REXw_x_(0,1,RD,_BIT(_rXP(MI)),MB)) +#define _REXQrm(RS,MB,MI) _REXQmr(MB,MI,RS) +#define _REXQr(RR) _m64only(__REX_reg(RR)) +#define _REXQm(MB,MI) _m64only(__REX_mem(MB,MI)) -#define BTCLir(IM,RD) _OO_Mrm_B (0x0fba ,_b11,_b111 ,_r4(RD) ,_u8(IM)) -#define BTCLim(IM,MD,MB,MI,MS) _OO_r_X_B (0x0fba ,_b111 ,MD,MB,MI,MS ,_u8(IM)) -#define BTCLrr(RS,RD) _OO_Mrm (0x0fbb ,_b11,_r4(RS),_r4(RD) ) -#define BTCLrm(RS,MD,MB,MI,MS) _OO_r_X (0x0fbb ,_r4(RS) ,MD,MB,MI,MS ) +/* ========================================================================= */ +/* --- Fully-qualified intrinsic instructions ------------------------------ */ +/* ========================================================================= */ -#define BTRWir(IM,RD) _wOO_Mrm_B (0x0fba ,_b11,_b110 ,_r2(RD) ,_u8(IM)) -#define BTRWim(IM,MD,MB,MI,MS) _wOO_r_X_B (0x0fba ,_b110 ,MD,MB,MI,MS ,_u8(IM)) -#define BTRWrr(RS,RD) _wOO_Mrm (0x0fb3 ,_b11,_r2(RS),_r2(RD) ) -#define BTRWrm(RS,MD,MB,MI,MS) _wOO_r_X (0x0fb3 ,_r2(RS) ,MD,MB,MI,MS ) +/* OPCODE + i = immediate operand + * + r = register operand + * + m = memory operand (disp,base,index,scale) + * + sr/sm = a star preceding a register or memory + * + 0 = top of stack register (for FPU instructions) + * + * NOTE in x86-64 mode: a memory operand with only a valid + * displacement value will lead to the expect absolute mode. If + * RIP addressing is necessary, X86_RIP shall be used as the base + * register argument. + */ -#define BTRLir(IM,RD) _OO_Mrm_B (0x0fba ,_b11,_b110 ,_r4(RD) ,_u8(IM)) -#define BTRLim(IM,MD,MB,MI,MS) _OO_r_X_B (0x0fba ,_b110 ,MD,MB,MI,MS ,_u8(IM)) -#define BTRLrr(RS,RD) _OO_Mrm (0x0fb3 ,_b11,_r4(RS),_r4(RD) ) -#define BTRLrm(RS,MD,MB,MI,MS) _OO_r_X (0x0fb3 ,_r4(RS) ,MD,MB,MI,MS ) +/* --- ALU instructions ---------------------------------------------------- */ + +enum { + X86_ADD = 0, + X86_OR = 1, + X86_ADC = 2, + X86_SBB = 3, + X86_AND = 4, + X86_SUB = 5, + X86_XOR = 6, + X86_CMP = 7, +}; + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ + +#define _ALUBrr(OP,RS, RD) (_REXBrr(RS, RD), _O_Mrm (((OP) << 3) ,_b11,_r1(RS),_r1(RD) )) +#define _ALUBmr(OP, MD, MB, MI, MS, RD) (_REXBmr(MB, MI, RD), _O_r_X (((OP) << 3) + 2,_r1(RD) ,MD,MB,MI,MS )) +#define _ALUBrm(OP, RS, MD, MB, MI, MS) (_REXBrm(RS, MB, MI), _O_r_X (((OP) << 3) , ,_r1(RS) ,MD,MB,MI,MS )) +#define _ALUBir(OP, IM, RD) ((RD) == _AL ? \ + (_REXBrr(0, RD), _O_B (((OP) << 3) + 4 ,_su8(IM))) : \ + (_REXBrr(0, RD), _O_Mrm_B (0x80 ,_b11,OP ,_r1(RD) ,_su8(IM))) ) +#define _ALUBim(OP, IM, MD, MB, MI, MS) (_REXBrm(0, MB, MI), _O_r_X_B (0x80 ,OP ,MD,MB,MI,MS ,_su8(IM))) + +#define _ALUWrr(OP, RS, RD) (_d16(), _REXLrr(RS, RD), _O_Mrm (((OP) << 3) + 1,_b11,_r2(RS),_r2(RD) )) +#define _ALUWmr(OP, MD, MB, MI, MS, RD) (_d16(), _REXLmr(MB, MI, RD), _O_r_X (((OP) << 3) + 3 ,_r2(RD) ,MD,MB,MI,MS )) +#define _ALUWrm(OP, RS, MD, MB, MI, MS) (_d16(), _REXLrm(RS, MB, MI), _O_r_X (((OP) << 3) + 1 ,_r2(RS) ,MD,MB,MI,MS )) +#define _ALUWir(OP, IM, RD) ((RD) == _AX ? \ + (_d16(), _REXLrr(0, RD), _O_W (((OP) << 3) + 5 ,_su16(IM))) : \ + (_d16(), _REXLrr(0, RD), _Os_Mrm_sW (0x81 ,_b11,OP ,_r2(RD) ,_su16(IM))) ) +#define _ALUWim(OP, IM, MD, MB, MI, MS) (_d16(), _REXLrm(0, MB, MI), _Os_r_X_sW (0x81 ,OP ,MD,MB,MI,MS ,_su16(IM))) + +#define _ALULrr(OP, RS, RD) (_REXLrr(RS, RD), _O_Mrm (((OP) << 3) + 1,_b11,_r4(RS),_r4(RD) )) +#define _ALULmr(OP, MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _O_r_X (((OP) << 3) + 3 ,_r4(RD) ,MD,MB,MI,MS )) +#define _ALULrm(OP, RS, MD, MB, MI, MS) (_REXLrm(RS, MB, MI), _O_r_X (((OP) << 3) + 1 ,_r4(RS) ,MD,MB,MI,MS )) +#define _ALULir(OP, IM, RD) (!_s8P(IM) && (RD) == _EAX ? \ + (_REXLrr(0, RD), _O_L (((OP) << 3) + 5 ,IM )) : \ + (_REXLrr(0, RD), _Os_Mrm_sL (0x81 ,_b11,OP ,_r4(RD) ,IM )) ) +#define _ALULim(OP, IM, MD, MB, MI, MS) (_REXLrm(0, MB, MI), _Os_r_X_sL (0x81 ,OP ,MD,MB,MI,MS ,IM )) + +#define _ALUQrr(OP, RS, RD) (_REXQrr(RS, RD), _O_Mrm (((OP) << 3) + 1,_b11,_r8(RS),_r8(RD) )) +#define _ALUQmr(OP, MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _O_r_X (((OP) << 3) + 3 ,_r8(RD) ,MD,MB,MI,MS )) +#define _ALUQrm(OP, RS, MD, MB, MI, MS) (_REXQrm(RS, MB, MI), _O_r_X (((OP) << 3) + 1 ,_r8(RS) ,MD,MB,MI,MS )) +#define _ALUQir(OP, IM, RD) \ + /* Immediate fits in 32 bits? */ \ + (_s32P((long)(IM)) \ + /* Yes. Immediate does not fit in 8 bits and reg is %rax? */ \ + ? (!_s8P(IM) && (RD) == _RAX \ + ? (_REXQrr(0, RD), _O_L(((OP) << 3) + 5, IM)) \ + : (_REXQrr(0, RD), _Os_Mrm_sL(0x81, _b11, OP, _r8(RD), IM))) \ + /* No. Need immediate in a register */ \ + : (MOVQir(IM, JIT_REXTMP), _ALUQrr(OP, JIT_REXTMP, RD))) +#define _ALUQim(OP, IM, MD, MB, MI, MS) (_REXQrm(0, MB, MI), _Os_r_X_sL (0x81 ,OP ,MD,MB,MI,MS ,IM )) + +#define ADCBrr(RS, RD) _ALUBrr(X86_ADC, RS, RD) +#define ADCBmr(MD, MB, MI, MS, RD) _ALUBmr(X86_ADC, MD, MB, MI, MS, RD) +#define ADCBrm(RS, MD, MB, MI, MS) _ALUBrm(X86_ADC, RS, MD, MB, MI, MS) +#define ADCBir(IM, RD) _ALUBir(X86_ADC, IM, RD) +#define ADCBim(IM, MD, MB, MI, MS) _ALUBim(X86_ADC, IM, MD, MB, MI, MS) + +#define ADCWrr(RS, RD) _ALUWrr(X86_ADC, RS, RD) +#define ADCWmr(MD, MB, MI, MS, RD) _ALUWmr(X86_ADC, MD, MB, MI, MS, RD) +#define ADCWrm(RS, MD, MB, MI, MS) _ALUWrm(X86_ADC, RS, MD, MB, MI, MS) +#define ADCWir(IM, RD) _ALUWir(X86_ADC, IM, RD) +#define ADCWim(IM, MD, MB, MI, MS) _ALUWim(X86_ADC, IM, MD, MB, MI, MS) + +#define ADCLrr(RS, RD) _ALULrr(X86_ADC, RS, RD) +#define ADCLmr(MD, MB, MI, MS, RD) _ALULmr(X86_ADC, MD, MB, MI, MS, RD) +#define ADCLrm(RS, MD, MB, MI, MS) _ALULrm(X86_ADC, RS, MD, MB, MI, MS) +#define ADCLir(IM, RD) _ALULir(X86_ADC, IM, RD) +#define ADCLim(IM, MD, MB, MI, MS) _ALULim(X86_ADC, IM, MD, MB, MI, MS) + + +#define ADDBrr(RS, RD) _ALUBrr(X86_ADD, RS, RD) +#define ADDBmr(MD, MB, MI, MS, RD) _ALUBmr(X86_ADD, MD, MB, MI, MS, RD) +#define ADDBrm(RS, MD, MB, MI, MS) _ALUBrm(X86_ADD, RS, MD, MB, MI, MS) +#define ADDBir(IM, RD) _ALUBir(X86_ADD, IM, RD) +#define ADDBim(IM, MD, MB, MI, MS) _ALUBim(X86_ADD, IM, MD, MB, MI, MS) + +#define ADDWrr(RS, RD) _ALUWrr(X86_ADD, RS, RD) +#define ADDWmr(MD, MB, MI, MS, RD) _ALUWmr(X86_ADD, MD, MB, MI, MS, RD) +#define ADDWrm(RS, MD, MB, MI, MS) _ALUWrm(X86_ADD, RS, MD, MB, MI, MS) +#define ADDWir(IM, RD) _ALUWir(X86_ADD, IM, RD) +#define ADDWim(IM, MD, MB, MI, MS) _ALUWim(X86_ADD, IM, MD, MB, MI, MS) + +#define ADDLrr(RS, RD) _ALULrr(X86_ADD, RS, RD) +#define ADDLmr(MD, MB, MI, MS, RD) _ALULmr(X86_ADD, MD, MB, MI, MS, RD) +#define ADDLrm(RS, MD, MB, MI, MS) _ALULrm(X86_ADD, RS, MD, MB, MI, MS) +#define ADDLir(IM, RD) _ALULir(X86_ADD, IM, RD) +#define ADDLim(IM, MD, MB, MI, MS) _ALULim(X86_ADD, IM, MD, MB, MI, MS) + + +#define ANDBrr(RS, RD) _ALUBrr(X86_AND, RS, RD) +#define ANDBmr(MD, MB, MI, MS, RD) _ALUBmr(X86_AND, MD, MB, MI, MS, RD) +#define ANDBrm(RS, MD, MB, MI, MS) _ALUBrm(X86_AND, RS, MD, MB, MI, MS) +#define ANDBir(IM, RD) _ALUBir(X86_AND, IM, RD) +#define ANDBim(IM, MD, MB, MI, MS) _ALUBim(X86_AND, IM, MD, MB, MI, MS) + +#define ANDWrr(RS, RD) _ALUWrr(X86_AND, RS, RD) +#define ANDWmr(MD, MB, MI, MS, RD) _ALUWmr(X86_AND, MD, MB, MI, MS, RD) +#define ANDWrm(RS, MD, MB, MI, MS) _ALUWrm(X86_AND, RS, MD, MB, MI, MS) +#define ANDWir(IM, RD) _ALUWir(X86_AND, IM, RD) +#define ANDWim(IM, MD, MB, MI, MS) _ALUWim(X86_AND, IM, MD, MB, MI, MS) + +#define ANDLrr(RS, RD) _ALULrr(X86_AND, RS, RD) +#define ANDLmr(MD, MB, MI, MS, RD) _ALULmr(X86_AND, MD, MB, MI, MS, RD) +#define ANDLrm(RS, MD, MB, MI, MS) _ALULrm(X86_AND, RS, MD, MB, MI, MS) +#define ANDLir(IM, RD) _ALULir(X86_AND, IM, RD) +#define ANDLim(IM, MD, MB, MI, MS) _ALULim(X86_AND, IM, MD, MB, MI, MS) + + +#define CMPBrr(RS, RD) _ALUBrr(X86_CMP, RS, RD) +#define CMPBmr(MD, MB, MI, MS, RD) _ALUBmr(X86_CMP, MD, MB, MI, MS, RD) +#define CMPBrm(RS, MD, MB, MI, MS) _ALUBrm(X86_CMP, RS, MD, MB, MI, MS) +#define CMPBir(IM, RD) _ALUBir(X86_CMP, IM, RD) +#define CMPBim(IM, MD, MB, MI, MS) _ALUBim(X86_CMP, IM, MD, MB, MI, MS) + +#define CMPWrr(RS, RD) _ALUWrr(X86_CMP, RS, RD) +#define CMPWmr(MD, MB, MI, MS, RD) _ALUWmr(X86_CMP, MD, MB, MI, MS, RD) +#define CMPWrm(RS, MD, MB, MI, MS) _ALUWrm(X86_CMP, RS, MD, MB, MI, MS) +#define CMPWir(IM, RD) _ALUWir(X86_CMP, IM, RD) +#define CMPWim(IM, MD, MB, MI, MS) _ALUWim(X86_CMP, IM, MD, MB, MI, MS) + +#define CMPLrr(RS, RD) _ALULrr(X86_CMP, RS, RD) +#define CMPLmr(MD, MB, MI, MS, RD) _ALULmr(X86_CMP, MD, MB, MI, MS, RD) +#define CMPLrm(RS, MD, MB, MI, MS) _ALULrm(X86_CMP, RS, MD, MB, MI, MS) +#define CMPLir(IM, RD) _ALULir(X86_CMP, IM, RD) +#define CMPLim(IM, MD, MB, MI, MS) _ALULim(X86_CMP, IM, MD, MB, MI, MS) + + +#define ORBrr(RS, RD) _ALUBrr(X86_OR, RS, RD) +#define ORBmr(MD, MB, MI, MS, RD) _ALUBmr(X86_OR, MD, MB, MI, MS, RD) +#define ORBrm(RS, MD, MB, MI, MS) _ALUBrm(X86_OR, RS, MD, MB, MI, MS) +#define ORBir(IM, RD) _ALUBir(X86_OR, IM, RD) +#define ORBim(IM, MD, MB, MI, MS) _ALUBim(X86_OR, IM, MD, MB, MI, MS) + +#define ORWrr(RS, RD) _ALUWrr(X86_OR, RS, RD) +#define ORWmr(MD, MB, MI, MS, RD) _ALUWmr(X86_OR, MD, MB, MI, MS, RD) +#define ORWrm(RS, MD, MB, MI, MS) _ALUWrm(X86_OR, RS, MD, MB, MI, MS) +#define ORWir(IM, RD) _ALUWir(X86_OR, IM, RD) +#define ORWim(IM, MD, MB, MI, MS) _ALUWim(X86_OR, IM, MD, MB, MI, MS) + +#define ORLrr(RS, RD) _ALULrr(X86_OR, RS, RD) +#define ORLmr(MD, MB, MI, MS, RD) _ALULmr(X86_OR, MD, MB, MI, MS, RD) +#define ORLrm(RS, MD, MB, MI, MS) _ALULrm(X86_OR, RS, MD, MB, MI, MS) +#define ORLir(IM, RD) _ALULir(X86_OR, IM, RD) +#define ORLim(IM, MD, MB, MI, MS) _ALULim(X86_OR, IM, MD, MB, MI, MS) + + +#define SBBBrr(RS, RD) _ALUBrr(X86_SBB, RS, RD) +#define SBBBmr(MD, MB, MI, MS, RD) _ALUBmr(X86_SBB, MD, MB, MI, MS, RD) +#define SBBBrm(RS, MD, MB, MI, MS) _ALUBrm(X86_SBB, RS, MD, MB, MI, MS) +#define SBBBir(IM, RD) _ALUBir(X86_SBB, IM, RD) +#define SBBBim(IM, MD, MB, MI, MS) _ALUBim(X86_SBB, IM, MD, MB, MI, MS) + +#define SBBWrr(RS, RD) _ALUWrr(X86_SBB, RS, RD) +#define SBBWmr(MD, MB, MI, MS, RD) _ALUWmr(X86_SBB, MD, MB, MI, MS, RD) +#define SBBWrm(RS, MD, MB, MI, MS) _ALUWrm(X86_SBB, RS, MD, MB, MI, MS) +#define SBBWir(IM, RD) _ALUWir(X86_SBB, IM, RD) +#define SBBWim(IM, MD, MB, MI, MS) _ALUWim(X86_SBB, IM, MD, MB, MI, MS) + +#define SBBLrr(RS, RD) _ALULrr(X86_SBB, RS, RD) +#define SBBLmr(MD, MB, MI, MS, RD) _ALULmr(X86_SBB, MD, MB, MI, MS, RD) +#define SBBLrm(RS, MD, MB, MI, MS) _ALULrm(X86_SBB, RS, MD, MB, MI, MS) +#define SBBLir(IM, RD) _ALULir(X86_SBB, IM, RD) +#define SBBLim(IM, MD, MB, MI, MS) _ALULim(X86_SBB, IM, MD, MB, MI, MS) + + +#define SUBBrr(RS, RD) _ALUBrr(X86_SUB, RS, RD) +#define SUBBmr(MD, MB, MI, MS, RD) _ALUBmr(X86_SUB, MD, MB, MI, MS, RD) +#define SUBBrm(RS, MD, MB, MI, MS) _ALUBrm(X86_SUB, RS, MD, MB, MI, MS) +#define SUBBir(IM, RD) _ALUBir(X86_SUB, IM, RD) +#define SUBBim(IM, MD, MB, MI, MS) _ALUBim(X86_SUB, IM, MD, MB, MI, MS) + +#define SUBWrr(RS, RD) _ALUWrr(X86_SUB, RS, RD) +#define SUBWmr(MD, MB, MI, MS, RD) _ALUWmr(X86_SUB, MD, MB, MI, MS, RD) +#define SUBWrm(RS, MD, MB, MI, MS) _ALUWrm(X86_SUB, RS, MD, MB, MI, MS) +#define SUBWir(IM, RD) _ALUWir(X86_SUB, IM, RD) +#define SUBWim(IM, MD, MB, MI, MS) _ALUWim(X86_SUB, IM, MD, MB, MI, MS) + +#define SUBLrr(RS, RD) _ALULrr(X86_SUB, RS, RD) +#define SUBLmr(MD, MB, MI, MS, RD) _ALULmr(X86_SUB, MD, MB, MI, MS, RD) +#define SUBLrm(RS, MD, MB, MI, MS) _ALULrm(X86_SUB, RS, MD, MB, MI, MS) +#define SUBLir(IM, RD) _ALULir(X86_SUB, IM, RD) +#define SUBLim(IM, MD, MB, MI, MS) _ALULim(X86_SUB, IM, MD, MB, MI, MS) + + +#define XORBrr(RS, RD) _ALUBrr(X86_XOR, RS, RD) +#define XORBmr(MD, MB, MI, MS, RD) _ALUBmr(X86_XOR, MD, MB, MI, MS, RD) +#define XORBrm(RS, MD, MB, MI, MS) _ALUBrm(X86_XOR, RS, MD, MB, MI, MS) +#define XORBir(IM, RD) _ALUBir(X86_XOR, IM, RD) +#define XORBim(IM, MD, MB, MI, MS) _ALUBim(X86_XOR, IM, MD, MB, MI, MS) + +#define XORWrr(RS, RD) _ALUWrr(X86_XOR, RS, RD) +#define XORWmr(MD, MB, MI, MS, RD) _ALUWmr(X86_XOR, MD, MB, MI, MS, RD) +#define XORWrm(RS, MD, MB, MI, MS) _ALUWrm(X86_XOR, RS, MD, MB, MI, MS) +#define XORWir(IM, RD) _ALUWir(X86_XOR, IM, RD) +#define XORWim(IM, MD, MB, MI, MS) _ALUWim(X86_XOR, IM, MD, MB, MI, MS) + +#define XORLrr(RS, RD) _ALULrr(X86_XOR, RS, RD) +#define XORLmr(MD, MB, MI, MS, RD) _ALULmr(X86_XOR, MD, MB, MI, MS, RD) +#define XORLrm(RS, MD, MB, MI, MS) _ALULrm(X86_XOR, RS, MD, MB, MI, MS) +#define XORLir(IM, RD) _ALULir(X86_XOR, IM, RD) +#define XORLim(IM, MD, MB, MI, MS) _ALULim(X86_XOR, IM, MD, MB, MI, MS) + + + +/* --- Shift/Rotate instructions ------------------------------------------- */ + +enum { + X86_ROL = 0, + X86_ROR = 1, + X86_RCL = 2, + X86_RCR = 3, + X86_SHL = 4, + X86_SHR = 5, + X86_SAR = 7, +}; + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ + +#define _ROTSHIBir(OP,IM,RD) ((IM) == 1 ? \ + (_REXBrr(0, RD), _O_Mrm (0xd0 ,_b11,OP,_r1(RD) )) : \ + (_REXBrr(0, RD), _O_Mrm_B (0xc0 ,_b11,OP,_r1(RD) ,_u8(IM))) ) +#define _ROTSHIBim(OP,IM,MD,MB,MI,MS) ((IM) == 1 ? \ + (_REXBrm(0, MB, MI), _O_r_X (0xd0 ,OP ,MD,MB,MI,MS )) : \ + (_REXBrm(0, MB, MI), _O_r_X_B (0xc0 ,OP ,MD,MB,MI,MS ,_u8(IM))) ) +#define _ROTSHIBrr(OP,RS,RD) (((RS) == _CL) ? \ + (_REXBrr(RS, RD), _O_Mrm (0xd2 ,_b11,OP,_r1(RD) )) : \ + JITFAIL("source register must be CL" ) ) +#define _ROTSHIBrm(OP,RS,MD,MB,MI,MS) (((RS) == _CL) ? \ + (_REXBrm(RS, MB, MI), _O_r_X (0xd2 ,OP ,MD,MB,MI,MS )) : \ + JITFAIL("source register must be CL" ) ) + +#define _ROTSHIWir(OP,IM,RD) ((IM) == 1 ? \ + (_d16(), _REXLrr(0, RD), _O_Mrm (0xd1 ,_b11,OP,_r2(RD) )) : \ + (_d16(), _REXLrr(0, RD), _O_Mrm_B (0xc1 ,_b11,OP,_r2(RD) ,_u8(IM))) ) +#define _ROTSHIWim(OP,IM,MD,MB,MI,MS) ((IM) == 1 ? \ + (_d16(), _REXLrm(0, MB, MI), _O_r_X (0xd1 ,OP ,MD,MB,MI,MS )) : \ + (_d16(), _REXLrm(0, MB, MI), _O_r_X_B (0xc1 ,OP ,MD,MB,MI,MS ,_u8(IM))) ) +#define _ROTSHIWrr(OP,RS,RD) (((RS) == _CL) ? \ + (_d16(), _REXLrr(RS, RD), _O_Mrm (0xd3 ,_b11,OP,_r2(RD) )) : \ + JITFAIL("source register must be CL" ) ) +#define _ROTSHIWrm(OP,RS,MD,MB,MI,MS) (((RS) == _CL) ? \ + (_d16(), _REXLrm(RS, MB, MI), _O_r_X (0xd3 ,OP ,MD,MB,MI,MS )) : \ + JITFAIL("source register must be CL" ) ) + +#define _ROTSHILir(OP,IM,RD) ((IM) == 1 ? \ + (_REXLrr(0, RD), _O_Mrm (0xd1 ,_b11,OP,_r4(RD) )) : \ + (_REXLrr(0, RD), _O_Mrm_B (0xc1 ,_b11,OP,_r4(RD) ,_u8(IM))) ) +#define _ROTSHILim(OP,IM,MD,MB,MI,MS) ((IM) == 1 ? \ + (_REXLrm(0, MB, MI), _O_r_X (0xd1 ,OP ,MD,MB,MI,MS )) : \ + (_REXLrm(0, MB, MI), _O_r_X_B (0xc1 ,OP ,MD,MB,MI,MS ,_u8(IM))) ) +#define _ROTSHILrr(OP,RS,RD) (((RS) == _CL) ? \ + (_REXLrr(RS, RD), _O_Mrm (0xd3 ,_b11,OP,_r4(RD) )) : \ + JITFAIL("source register must be CL" ) ) +#define _ROTSHILrm(OP,RS,MD,MB,MI,MS) (((RS) == _CL) ? \ + (_REXLrm(RS, MB, MI), _O_r_X (0xd3 ,OP ,MD,MB,MI,MS )) : \ + JITFAIL("source register must be CL" ) ) + +#define _ROTSHIQir(OP,IM,RD) ((IM) == 1 ? \ + (_REXQrr(0, RD), _O_Mrm (0xd1 ,_b11,OP,_r8(RD) )) : \ + (_REXQrr(0, RD), _O_Mrm_B (0xc1 ,_b11,OP,_r8(RD) ,_u8(IM))) ) +#define _ROTSHIQim(OP,IM,MD,MB,MI,MS) ((IM) == 1 ? \ + (_REXQrm(0, MB, MI), _O_r_X (0xd1 ,OP ,MD,MB,MI,MS )) : \ + (_REXQrm(0, MB, MI), _O_r_X_B (0xc1 ,OP ,MD,MB,MI,MS ,_u8(IM))) ) +#define _ROTSHIQrr(OP,RS,RD) (((RS) == _CL) ? \ + (_REXQrr(RS, RD), _O_Mrm (0xd3 ,_b11,OP,_r8(RD) )) : \ + JITFAIL("source register must be CL" ) ) +#define _ROTSHIQrm(OP,RS,MD,MB,MI,MS) (((RS) == _CL) ? \ + (_REXQrm(RS, MB, MI), _O_r_X (0xd3 ,OP ,MD,MB,MI,MS )) : \ + JITFAIL("source register must be CL" ) ) + +#define ROLBir(IM, RD) _ROTSHIBir(X86_ROL, IM, RD) +#define ROLBim(IM, MD, MB, MI, MS) _ROTSHIBim(X86_ROL, IM, MD, MB, MI, MS) +#define ROLBrr(RS, RD) _ROTSHIBrr(X86_ROL, RS, RD) +#define ROLBrm(RS, MD, MB, MI, MS) _ROTSHIBrm(X86_ROL, RS, MD, MB, MI, MS) + +#define ROLWir(IM, RD) _ROTSHIWir(X86_ROL, IM, RD) +#define ROLWim(IM, MD, MB, MI, MS) _ROTSHIWim(X86_ROL, IM, MD, MB, MI, MS) +#define ROLWrr(RS, RD) _ROTSHIWrr(X86_ROL, RS, RD) +#define ROLWrm(RS, MD, MB, MI, MS) _ROTSHIWrm(X86_ROL, RS, MD, MB, MI, MS) + +#define ROLLir(IM, RD) _ROTSHILir(X86_ROL, IM, RD) +#define ROLLim(IM, MD, MB, MI, MS) _ROTSHILim(X86_ROL, IM, MD, MB, MI, MS) +#define ROLLrr(RS, RD) _ROTSHILrr(X86_ROL, RS, RD) +#define ROLLrm(RS, MD, MB, MI, MS) _ROTSHILrm(X86_ROL, RS, MD, MB, MI, MS) + + +#define RORBir(IM, RD) _ROTSHIBir(X86_ROR, IM, RD) +#define RORBim(IM, MD, MB, MI, MS) _ROTSHIBim(X86_ROR, IM, MD, MB, MI, MS) +#define RORBrr(RS, RD) _ROTSHIBrr(X86_ROR, RS, RD) +#define RORBrm(RS, MD, MB, MI, MS) _ROTSHIBrm(X86_ROR, RS, MD, MB, MI, MS) + +#define RORWir(IM, RD) _ROTSHIWir(X86_ROR, IM, RD) +#define RORWim(IM, MD, MB, MI, MS) _ROTSHIWim(X86_ROR, IM, MD, MB, MI, MS) +#define RORWrr(RS, RD) _ROTSHIWrr(X86_ROR, RS, RD) +#define RORWrm(RS, MD, MB, MI, MS) _ROTSHIWrm(X86_ROR, RS, MD, MB, MI, MS) + +#define RORLir(IM, RD) _ROTSHILir(X86_ROR, IM, RD) +#define RORLim(IM, MD, MB, MI, MS) _ROTSHILim(X86_ROR, IM, MD, MB, MI, MS) +#define RORLrr(RS, RD) _ROTSHILrr(X86_ROR, RS, RD) +#define RORLrm(RS, MD, MB, MI, MS) _ROTSHILrm(X86_ROR, RS, MD, MB, MI, MS) + + +#define RCLBir(IM, RD) _ROTSHIBir(X86_RCL, IM, RD) +#define RCLBim(IM, MD, MB, MI, MS) _ROTSHIBim(X86_RCL, IM, MD, MB, MI, MS) +#define RCLBrr(RS, RD) _ROTSHIBrr(X86_RCL, RS, RD) +#define RCLBrm(RS, MD, MB, MI, MS) _ROTSHIBrm(X86_RCL, RS, MD, MB, MI, MS) + +#define RCLWir(IM, RD) _ROTSHIWir(X86_RCL, IM, RD) +#define RCLWim(IM, MD, MB, MI, MS) _ROTSHIWim(X86_RCL, IM, MD, MB, MI, MS) +#define RCLWrr(RS, RD) _ROTSHIWrr(X86_RCL, RS, RD) +#define RCLWrm(RS, MD, MB, MI, MS) _ROTSHIWrm(X86_RCL, RS, MD, MB, MI, MS) + +#define RCLLir(IM, RD) _ROTSHILir(X86_RCL, IM, RD) +#define RCLLim(IM, MD, MB, MI, MS) _ROTSHILim(X86_RCL, IM, MD, MB, MI, MS) +#define RCLLrr(RS, RD) _ROTSHILrr(X86_RCL, RS, RD) +#define RCLLrm(RS, MD, MB, MI, MS) _ROTSHILrm(X86_RCL, RS, MD, MB, MI, MS) + + +#define RCRBir(IM, RD) _ROTSHIBir(X86_RCR, IM, RD) +#define RCRBim(IM, MD, MB, MI, MS) _ROTSHIBim(X86_RCR, IM, MD, MB, MI, MS) +#define RCRBrr(RS, RD) _ROTSHIBrr(X86_RCR, RS, RD) +#define RCRBrm(RS, MD, MB, MI, MS) _ROTSHIBrm(X86_RCR, RS, MD, MB, MI, MS) + +#define RCRWir(IM, RD) _ROTSHIWir(X86_RCR, IM, RD) +#define RCRWim(IM, MD, MB, MI, MS) _ROTSHIWim(X86_RCR, IM, MD, MB, MI, MS) +#define RCRWrr(RS, RD) _ROTSHIWrr(X86_RCR, RS, RD) +#define RCRWrm(RS, MD, MB, MI, MS) _ROTSHIWrm(X86_RCR, RS, MD, MB, MI, MS) + +#define RCRLir(IM, RD) _ROTSHILir(X86_RCR, IM, RD) +#define RCRLim(IM, MD, MB, MI, MS) _ROTSHILim(X86_RCR, IM, MD, MB, MI, MS) +#define RCRLrr(RS, RD) _ROTSHILrr(X86_RCR, RS, RD) +#define RCRLrm(RS, MD, MB, MI, MS) _ROTSHILrm(X86_RCR, RS, MD, MB, MI, MS) + + +#define SHLBir(IM, RD) _ROTSHIBir(X86_SHL, IM, RD) +#define SHLBim(IM, MD, MB, MI, MS) _ROTSHIBim(X86_SHL, IM, MD, MB, MI, MS) +#define SHLBrr(RS, RD) _ROTSHIBrr(X86_SHL, RS, RD) +#define SHLBrm(RS, MD, MB, MI, MS) _ROTSHIBrm(X86_SHL, RS, MD, MB, MI, MS) + +#define SHLWir(IM, RD) _ROTSHIWir(X86_SHL, IM, RD) +#define SHLWim(IM, MD, MB, MI, MS) _ROTSHIWim(X86_SHL, IM, MD, MB, MI, MS) +#define SHLWrr(RS, RD) _ROTSHIWrr(X86_SHL, RS, RD) +#define SHLWrm(RS, MD, MB, MI, MS) _ROTSHIWrm(X86_SHL, RS, MD, MB, MI, MS) + +#define SHLLir(IM, RD) _ROTSHILir(X86_SHL, IM, RD) +#define SHLLim(IM, MD, MB, MI, MS) _ROTSHILim(X86_SHL, IM, MD, MB, MI, MS) +#define SHLLrr(RS, RD) _ROTSHILrr(X86_SHL, RS, RD) +#define SHLLrm(RS, MD, MB, MI, MS) _ROTSHILrm(X86_SHL, RS, MD, MB, MI, MS) + + +#define SHRBir(IM, RD) _ROTSHIBir(X86_SHR, IM, RD) +#define SHRBim(IM, MD, MB, MI, MS) _ROTSHIBim(X86_SHR, IM, MD, MB, MI, MS) +#define SHRBrr(RS, RD) _ROTSHIBrr(X86_SHR, RS, RD) +#define SHRBrm(RS, MD, MB, MI, MS) _ROTSHIBrm(X86_SHR, RS, MD, MB, MI, MS) + +#define SHRWir(IM, RD) _ROTSHIWir(X86_SHR, IM, RD) +#define SHRWim(IM, MD, MB, MI, MS) _ROTSHIWim(X86_SHR, IM, MD, MB, MI, MS) +#define SHRWrr(RS, RD) _ROTSHIWrr(X86_SHR, RS, RD) +#define SHRWrm(RS, MD, MB, MI, MS) _ROTSHIWrm(X86_SHR, RS, MD, MB, MI, MS) + +#define SHRLir(IM, RD) _ROTSHILir(X86_SHR, IM, RD) +#define SHRLim(IM, MD, MB, MI, MS) _ROTSHILim(X86_SHR, IM, MD, MB, MI, MS) +#define SHRLrr(RS, RD) _ROTSHILrr(X86_SHR, RS, RD) +#define SHRLrm(RS, MD, MB, MI, MS) _ROTSHILrm(X86_SHR, RS, MD, MB, MI, MS) -#define BTSWir(IM,RD) _wOO_Mrm_B (0x0fba ,_b11,_b101 ,_r2(RD) ,_u8(IM)) -#define BTSWim(IM,MD,MB,MI,MS) _wOO_r_X_B (0x0fba ,_b101 ,MD,MB,MI,MS ,_u8(IM)) -#define BTSWrr(RS,RD) _wOO_Mrm (0x0fab ,_b11,_r2(RS),_r2(RD) ) -#define BTSWrm(RS,MD,MB,MI,MS) _wOO_r_X (0x0fab ,_r2(RS) ,MD,MB,MI,MS ) +#define SALBir SHLBir +#define SALBim SHLBim +#define SALBrr SHLBrr +#define SALBrm SHLBrm -#define BTSLir(IM,RD) _OO_Mrm_B (0x0fba ,_b11,_b101 ,_r4(RD) ,_u8(IM)) -#define BTSLim(IM,MD,MB,MI,MS) _OO_r_X_B (0x0fba ,_b101 ,MD,MB,MI,MS ,_u8(IM)) -#define BTSLrr(RS,RD) _OO_Mrm (0x0fab ,_b11,_r4(RS),_r4(RD) ) -#define BTSLrm(RS,MD,MB,MI,MS) _OO_r_X (0x0fab ,_r4(RS) ,MD,MB,MI,MS ) +#define SALWir SHLWir +#define SALWim SHLWim +#define SALWrr SHLWrr +#define SALWrm SHLWrm +#define SALLir SHLLir +#define SALLim SHLLim +#define SALLrr SHLLrr +#define SALLrm SHLLrm -#define CALLm(D,B,I,S) ((_r0P(B) && _r0P(I)) ? _O_D32 (0xe8 ,(int)(D) ) : \ - JITFAIL("illegal mode in direct jump")) -#define CALLsr(R) _O_Mrm (0xff ,_b11,_b010,_r4(R) ) +#define SARBir(IM, RD) _ROTSHIBir(X86_SAR, IM, RD) +#define SARBim(IM, MD, MB, MI, MS) _ROTSHIBim(X86_SAR, IM, MD, MB, MI, MS) +#define SARBrr(RS, RD) _ROTSHIBrr(X86_SAR, RS, RD) +#define SARBrm(RS, MD, MB, MI, MS) _ROTSHIBrm(X86_SAR, RS, MD, MB, MI, MS) -#define CALLsm(D,B,I,S) _O_r_X (0xff ,_b010 ,(int)(D),B,I,S ) +#define SARWir(IM, RD) _ROTSHIWir(X86_SAR, IM, RD) +#define SARWim(IM, MD, MB, MI, MS) _ROTSHIWim(X86_SAR, IM, MD, MB, MI, MS) +#define SARWrr(RS, RD) _ROTSHIWrr(X86_SAR, RS, RD) +#define SARWrm(RS, MD, MB, MI, MS) _ROTSHIWrm(X86_SAR, RS, MD, MB, MI, MS) -#define CBW_() _O (0x98 ) -#define CLC_() _O (0xf8 ) -#define CLTD_() _O (0x99 ) -#define CMC_() _O (0xf5 ) +#define SARLir(IM, RD) _ROTSHILir(X86_SAR, IM, RD) +#define SARLim(IM, MD, MB, MI, MS) _ROTSHILim(X86_SAR, IM, MD, MB, MI, MS) +#define SARLrr(RS, RD) _ROTSHILrr(X86_SAR, RS, RD) +#define SARLrm(RS, MD, MB, MI, MS) _ROTSHILrm(X86_SAR, RS, MD, MB, MI, MS) -#define CMPBrr(RS, RD) _O_Mrm (0x38 ,_b11,_r1(RS),_r1(RD) ) -#define CMPBmr(MD, MB, MI, MS, RD) _O_r_X (0x3a ,_r1(RD) ,MD,MB,MI,MS ) -#define CMPBrm(RS, MD, MB, MI, MS) _O_r_X (0x38 ,_r1(RS) ,MD,MB,MI,MS ) -#define CMPBir(IM, RD) _O_Mrm_B (0x80 ,_b11,_b111 ,_r1(RD) ,_su8(IM)) -#define CMPBim(IM, MD, MB, MI, MS) _O_r_X_B (0x80 ,_b111 ,MD,MB,MI,MS ,_su8(IM)) -#define CMPWrr(RS, RD) _wO_Mrm (0x39 ,_b11,_r2(RS),_r2(RD) ) -#define CMPWmr(MD, MB, MI, MS, RD) _wO_r_X (0x3b ,_r2(RD) ,MD,MB,MI,MS ) -#define CMPWrm(RS, MD, MB, MI, MS) _wO_r_X (0x39 ,_r2(RS) ,MD,MB,MI,MS ) -#define CMPWir(IM, RD) _wOs_Mrm_sW (0x81 ,_b11,_b111 ,_r2(RD) ,_su16(IM)) -#define CMPWim(IM, MD, MB, MI, MS) _wOs_r_X_sW (0x81 ,_b111 ,MD,MB,MI,MS ,_su16(IM)) +/* --- Bit test instructions ----------------------------------------------- */ -#define CMPLrr(RS, RD) _O_Mrm (0x39 ,_b11,_r4(RS),_r4(RD) ) -#define CMPLmr(MD, MB, MI, MS, RD) _O_r_X (0x3b ,_r4(RD) ,MD,MB,MI,MS ) -#define CMPLrm(RS, MD, MB, MI, MS) _O_r_X (0x39 ,_r4(RS) ,MD,MB,MI,MS ) -#define CMPLir(IM, RD) _O_Mrm_L (0x81 ,_b11,_b111 ,_r4(RD) ,IM ) -#define CMPLim(IM, MD, MB, MI, MS) _O_r_X_L (0x81 ,_b111 ,MD,MB,MI,MS ,IM ) +enum { + X86_BT = 4, + X86_BTS = 5, + X86_BTR = 6, + X86_BTC = 7, +}; +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ -#define CWD_() _O (0x99 ) +#define _BTWir(OP, IM, RD) (_d16(), _REXLrr(0, RD), _OO_Mrm_B (0x0fba ,_b11,OP ,_r2(RD) ,_u8(IM))) +#define _BTWim(OP, IM, MD, MB, MI, MS) (_d16(), _REXLrm(0, MB, MI), _OO_r_X_B (0x0fba ,OP ,MD,MB,MI,MS ,_u8(IM))) +#define _BTWrr(OP, RS, RD) (_d16(), _REXLrr(RS, RD), _OO_Mrm (0x0f83|((OP)<<3),_b11,_r2(RS),_r2(RD) )) +#define _BTWrm(OP, RS, MD, MB, MI, MS) (_d16(), _REXLrm(RS, MB, MI), _OO_r_X (0x0f83|((OP)<<3) ,_r2(RS) ,MD,MB,MI,MS )) +#define _BTLir(OP, IM, RD) (_REXLrr(0, RD), _OO_Mrm_B (0x0fba ,_b11,OP ,_r4(RD) ,_u8(IM))) +#define _BTLim(OP, IM, MD, MB, MI, MS) (_REXLrm(0, MB, MI), _OO_r_X_B (0x0fba ,OP ,MD,MB,MI,MS ,_u8(IM))) +#define _BTLrr(OP, RS, RD) (_REXLrr(RS, RD), _OO_Mrm (0x0f83|((OP)<<3),_b11,_r4(RS),_r4(RD) )) +#define _BTLrm(OP, RS, MD, MB, MI, MS) (_REXLrm(RS, MB, MI), _OO_r_X (0x0f83|((OP)<<3) ,_r4(RS) ,MD,MB,MI,MS )) -#define CMPXCHGBrr(RS,RD) _OO_Mrm (0x0fb0 ,_b11,_r1(RS),_r1(RD) ) -#define CMPXCHGBrm(RS,MD,MB,MI,MS) _OO_r_X (0x0fb0 ,_r1(RS) ,MD,MB,MI,MS ) +#define _BTQir(OP, IM, RD) (_REXQrr(0, RD), _OO_Mrm_B (0x0fba ,_b11,OP ,_r8(RD) ,_u8(IM))) +#define _BTQim(OP, IM, MD, MB, MI, MS) (_REXQrm(0, MB, MI), _OO_r_X_B (0x0fba ,OP ,MD,MB,MI,MS ,_u8(IM))) +#define _BTQrr(OP, RS, RD) (_REXQrr(RS, RD), _OO_Mrm (0x0f83|((OP)<<3),_b11,_r8(RS),_r8(RD) )) +#define _BTQrm(OP, RS, MD, MB, MI, MS) (_REXQrm(RS, MB, MI), _OO_r_X (0x0f83|((OP)<<3) ,_r8(RS) ,MD,MB,MI,MS )) -#define CMPXCHGWrr(RS,RD) _wOO_Mrm (0x0fb1 ,_b11,_r2(RS),_r2(RD) ) -#define CMPXCHGWrm(RS,MD,MB,MI,MS) _wOO_r_X (0x0fb1 ,_r2(RS) ,MD,MB,MI,MS ) +#define BTWir(IM, RD) _BTWir(X86_BT, IM, RD) +#define BTWim(IM, MD, MB, MI, MS) _BTWim(X86_BT, IM, MD, MI, MS) +#define BTWrr(RS, RD) _BTWrr(X86_BT, RS, RD) +#define BTWrm(RS, MD, MB, MI, MS) _BTWrm(X86_BT, RS, MD, MB, MI, MS) -#define CMPXCHGLrr(RS,RD) _OO_Mrm (0x0fb1 ,_b11,_r4(RS),_r4(RD) ) -#define CMPXCHGLrm(RS,MD,MB,MI,MS) _OO_r_X (0x0fb1 ,_r4(RS) ,MD,MB,MI,MS ) +#define BTLir(IM, RD) _BTLir(X86_BT, IM, RD) +#define BTLim(IM, MD, MB, MI, MS) _BTLim(X86_BT, IM, MD, MB, MI, MS) +#define BTLrr(RS, RD) _BTLrr(X86_BT, RS, RD) +#define BTLrm(RS, MD, MB, MI, MS) _BTLrm(X86_BT, RS, MD, MB, MI, MS) + + +#define BTCWir(IM, RD) _BTWir(X86_BTC, IM, RD) +#define BTCWim(IM, MD, MB, MI, MS) _BTWim(X86_BTC, IM, MD, MI, MS) +#define BTCWrr(RS, RD) _BTWrr(X86_BTC, RS, RD) +#define BTCWrm(RS, MD, MB, MI, MS) _BTWrm(X86_BTC, RS, MD, MB, MI, MS) + +#define BTCLir(IM, RD) _BTLir(X86_BTC, IM, RD) +#define BTCLim(IM, MD, MB, MI, MS) _BTLim(X86_BTC, IM, MD, MB, MI, MS) +#define BTCLrr(RS, RD) _BTLrr(X86_BTC, RS, RD) +#define BTCLrm(RS, MD, MB, MI, MS) _BTLrm(X86_BTC, RS, MD, MB, MI, MS) + + +#define BTRWir(IM, RD) _BTWir(X86_BTR, IM, RD) +#define BTRWim(IM, MD, MB, MI, MS) _BTWim(X86_BTR, IM, MD, MI, MS) +#define BTRWrr(RS, RD) _BTWrr(X86_BTR, RS, RD) +#define BTRWrm(RS, MD, MB, MI, MS) _BTWrm(X86_BTR, RS, MD, MB, MI, MS) + +#define BTRLir(IM, RD) _BTLir(X86_BTR, IM, RD) +#define BTRLim(IM, MD, MB, MI, MS) _BTLim(X86_BTR, IM, MD, MB, MI, MS) +#define BTRLrr(RS, RD) _BTLrr(X86_BTR, RS, RD) +#define BTRLrm(RS, MD, MB, MI, MS) _BTLrm(X86_BTR, RS, MD, MB, MI, MS) + + +#define BTSWir(IM, RD) _BTWir(X86_BTS, IM, RD) +#define BTSWim(IM, MD, MB, MI, MS) _BTWim(X86_BTS, IM, MD, MI, MS) +#define BTSWrr(RS, RD) _BTWrr(X86_BTS, RS, RD) +#define BTSWrm(RS, MD, MB, MI, MS) _BTWrm(X86_BTS, RS, MD, MB, MI, MS) + +#define BTSLir(IM, RD) _BTLir(X86_BTS, IM, RD) +#define BTSLim(IM, MD, MB, MI, MS) _BTLim(X86_BTS, IM, MD, MB, MI, MS) +#define BTSLrr(RS, RD) _BTLrr(X86_BTS, RS, RD) +#define BTSLrm(RS, MD, MB, MI, MS) _BTLrm(X86_BTS, RS, MD, MB, MI, MS) + + + +/* --- Move instructions --------------------------------------------------- */ + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ +#define MOVBrr(RS, RD) (_REXBrr(RS, RD), _O_Mrm (0x88 ,_b11,_r1(RS),_r1(RD) )) +#define MOVBmr(MD, MB, MI, MS, RD) (_REXBmr(MB, MI, RD), _O_r_X (0x8a ,_r1(RD) ,MD,MB,MI,MS )) +#define MOVBrm(RS, MD, MB, MI, MS) (_REXBrm(RS, MB, MI), _O_r_X (0x88 ,_r1(RS) ,MD,MB,MI,MS )) +#define MOVBir(IM, R) (_REXBrr(0, R), _Or_B (0xb0,_r1(R) ,_su8(IM))) +#define MOVBim(IM, MD, MB, MI, MS) (_REXBrm(0, MB, MI), _O_X_B (0xc6 ,MD,MB,MI,MS ,_su8(IM))) -#define DECBr(RD) _O_Mrm (0xfe ,_b11,_b001 ,_r1(RD) ) -#define DECBm(MD,MB,MI,MS) _O_r_X (0xfe ,_b001 ,MD,MB,MI,MS ) +#define MOVWrr(RS, RD) (_d16(), _REXLrr(RS, RD), _O_Mrm (0x89 ,_b11,_r2(RS),_r2(RD) )) +#define MOVWmr(MD, MB, MI, MS, RD) (_d16(), _REXLmr(MB, MI, RD), _O_r_X (0x8b ,_r2(RD) ,MD,MB,MI,MS )) +#define MOVWrm(RS, MD, MB, MI, MS) (_d16(), _REXLrm(RS, MB, MI), _O_r_X (0x89 ,_r2(RS) ,MD,MB,MI,MS )) +#define MOVWir(IM, R) (_d16(), _REXLrr(0, R), _Or_W (0xb8,_r2(R) ,_su16(IM))) +#define MOVWim(IM, MD, MB, MI, MS) (_d16(), _REXLrm(0, MB, MI), _O_X_W (0xc7 ,MD,MB,MI,MS ,_su16(IM))) -#define DECWr(RD) _wOr (0x48,_r2(RD) ) -#define DECWm(MD,MB,MI,MS) _wO_r_X (0xff ,_b001 ,MD,MB,MI,MS ) +#define MOVLrr(RS, RD) (_REXLrr(RS, RD), _O_Mrm (0x89 ,_b11,_r4(RS),_r4(RD) )) +#define MOVLmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _O_r_X (0x8b ,_r4(RD) ,MD,MB,MI,MS )) +#define MOVLrm(RS, MD, MB, MI, MS) (_REXLrm(RS, MB, MI), _O_r_X (0x89 ,_r4(RS) ,MD,MB,MI,MS )) +#define MOVLir(IM, R) (_REXLrr(0, R), _Or_L (0xb8,_r4(R) ,IM )) +#define MOVLim(IM, MD, MB, MI, MS) (_REXLrm(0, MB, MI), _O_X_L (0xc7 ,MD,MB,MI,MS ,IM )) + + + +/* --- Unary and Multiply/Divide instructions ------------------------------ */ + +enum { + X86_NOT = 2, + X86_NEG = 3, + X86_MUL = 4, + X86_IMUL = 5, + X86_DIV = 6, + X86_IDIV = 7, +}; + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ + +#define _UNARYBr(OP, RS) (_REXBrr(0, RS), _O_Mrm (0xf6 ,_b11,OP ,_r1(RS) )) +#define _UNARYBm(OP, MD, MB, MI, MS) (_REXBrm(0, MB, MI), _O_r_X (0xf6 ,OP ,MD,MB,MI,MS )) +#define _UNARYWr(OP, RS) (_d16(), _REXLrr(0, RS), _O_Mrm (0xf7 ,_b11,OP ,_r2(RS) )) +#define _UNARYWm(OP, MD, MB, MI, MS) (_d16(), _REXLmr(MB, MI, 0), _O_r_X (0xf7 ,OP ,MD,MB,MI,MS )) +#define _UNARYLr(OP, RS) (_REXLrr(0, RS), _O_Mrm (0xf7 ,_b11,OP ,_r4(RS) )) +#define _UNARYLm(OP, MD, MB, MI, MS) (_REXLmr(MB, MI, 0), _O_r_X (0xf7 ,OP ,MD,MB,MI,MS )) +#define _UNARYQr(OP, RS) (_REXQrr(0, RS), _O_Mrm (0xf7 ,_b11,OP ,_r8(RS) )) +#define _UNARYQm(OP, MD, MB, MI, MS) (_REXQmr(MB, MI, 0), _O_r_X (0xf7 ,OP ,MD,MB,MI,MS )) + +#define NOTBr(RS) _UNARYBr(X86_NOT, RS) +#define NOTBm(MD, MB, MI, MS) _UNARYBm(X86_NOT, MD, MB, MI, MS) +#define NOTWr(RS) _UNARYWr(X86_NOT, RS) +#define NOTWm(MD, MB, MI, MS) _UNARYWm(X86_NOT, MD, MB, MI, MS) +#define NOTLr(RS) _UNARYLr(X86_NOT, RS) +#define NOTLm(MD, MB, MI, MS) _UNARYLm(X86_NOT, MD, MB, MI, MS) + +#define NEGBr(RS) _UNARYBr(X86_NEG, RS) +#define NEGBm(MD, MB, MI, MS) _UNARYBm(X86_NEG, MD, MB, MI, MS) +#define NEGWr(RS) _UNARYWr(X86_NEG, RS) +#define NEGWm(MD, MB, MI, MS) _UNARYWm(X86_NEG, MD, MB, MI, MS) +#define NEGLr(RS) _UNARYLr(X86_NEG, RS) +#define NEGLm(MD, MB, MI, MS) _UNARYLm(X86_NEG, MD, MB, MI, MS) + +#define MULBr(RS) _UNARYBr(X86_MUL, RS) +#define MULBm(MD, MB, MI, MS) _UNARYBm(X86_MUL, MD, MB, MI, MS) +#define MULWr(RS) _UNARYWr(X86_MUL, RS) +#define MULWm(MD, MB, MI, MS) _UNARYWm(X86_MUL, MD, MB, MI, MS) +#define MULLr(RS) _UNARYLr(X86_MUL, RS) +#define MULLm(MD, MB, MI, MS) _UNARYLm(X86_MUL, MD, MB, MI, MS) + +#define IMULBr(RS) _UNARYBr(X86_IMUL, RS) +#define IMULBm(MD, MB, MI, MS) _UNARYBm(X86_IMUL, MD, MB, MI, MS) +#define IMULWr(RS) _UNARYWr(X86_IMUL, RS) +#define IMULWm(MD, MB, MI, MS) _UNARYWm(X86_IMUL, MD, MB, MI, MS) +#define IMULLr(RS) _UNARYLr(X86_IMUL, RS) +#define IMULLm(MD, MB, MI, MS) _UNARYLm(X86_IMUL, MD, MB, MI, MS) + +#define DIVBr(RS) _UNARYBr(X86_DIV, RS) +#define DIVBm(MD, MB, MI, MS) _UNARYBm(X86_DIV, MD, MB, MI, MS) +#define DIVWr(RS) _UNARYWr(X86_DIV, RS) +#define DIVWm(MD, MB, MI, MS) _UNARYWm(X86_DIV, MD, MB, MI, MS) +#define DIVLr(RS) _UNARYLr(X86_DIV, RS) +#define DIVLm(MD, MB, MI, MS) _UNARYLm(X86_DIV, MD, MB, MI, MS) + +#define IDIVBr(RS) _UNARYBr(X86_IDIV, RS) +#define IDIVBm(MD, MB, MI, MS) _UNARYBm(X86_IDIV, MD, MB, MI, MS) +#define IDIVWr(RS) _UNARYWr(X86_IDIV, RS) +#define IDIVWm(MD, MB, MI, MS) _UNARYWm(X86_IDIV, MD, MB, MI, MS) +#define IDIVLr(RS) _UNARYLr(X86_IDIV, RS) +#define IDIVLm(MD, MB, MI, MS) _UNARYLm(X86_IDIV, MD, MB, MI, MS) + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ + +#define IMULWrr(RS, RD) (_d16(), _REXLrr(RD, RS), _OO_Mrm (0x0faf ,_b11,_r2(RD),_r2(RS) )) +#define IMULWmr(MD, MB, MI, MS, RD) (_d16(), _REXLmr(MB, MI, RD), _OO_r_X (0x0faf ,_r2(RD) ,MD,MB,MI,MS )) + +#define IMULWirr(IM,RS,RD) (_d16(), _REXLrr(RS, RD), _Os_Mrm_sW (0x69 ,_b11,_r2(RS),_r2(RD) ,_su16(IM) )) +#define IMULWimr(IM,MD,MB,MI,MS,RD) (_d16(), _REXLmr(MB, MI, RD), _Os_r_X_sW (0x69 ,_r2(RD) ,MD,MB,MI,MS ,_su16(IM) )) + +#define IMULLir(IM, RD) (_REXLrr(0, RD), _Os_Mrm_sL (0x69 ,_b11,_r4(RD),_r4(RD) ,IM )) +#define IMULLrr(RS, RD) (_REXLrr(RD, RS), _OO_Mrm (0x0faf ,_b11,_r4(RD),_r4(RS) )) +#define IMULLmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _OO_r_X (0x0faf ,_r4(RD) ,MD,MB,MI,MS )) + + +#define IMULLirr(IM,RS,RD) (_REXLrr(RS, RD), _Os_Mrm_sL (0x69 ,_b11,_r4(RS),_r4(RD) ,IM )) +#define IMULLimr(IM,MD,MB,MI,MS,RD) (_REXLmr(MB, MI, RD), _Os_r_X_sL (0x69 ,_r4(RD) ,MD,MB,MI,MS ,IM )) + + + +/* --- Control Flow related instructions ----------------------------------- */ + +enum { + X86_CC_O = 0x0, + X86_CC_NO = 0x1, + X86_CC_NAE = 0x2, + X86_CC_B = 0x2, + X86_CC_C = 0x2, + X86_CC_AE = 0x3, + X86_CC_NB = 0x3, + X86_CC_NC = 0x3, + X86_CC_E = 0x4, + X86_CC_Z = 0x4, + X86_CC_NE = 0x5, + X86_CC_NZ = 0x5, + X86_CC_BE = 0x6, + X86_CC_NA = 0x6, + X86_CC_A = 0x7, + X86_CC_NBE = 0x7, + X86_CC_S = 0x8, + X86_CC_NS = 0x9, + X86_CC_P = 0xa, + X86_CC_PE = 0xa, + X86_CC_NP = 0xb, + X86_CC_PO = 0xb, + X86_CC_L = 0xc, + X86_CC_NGE = 0xc, + X86_CC_GE = 0xd, + X86_CC_NL = 0xd, + X86_CC_LE = 0xe, + X86_CC_NG = 0xe, + X86_CC_G = 0xf, + X86_CC_NLE = 0xf, +}; + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ + +/* FIXME: no prefix is availble to encode a 32-bit operand size in 64-bit + mode */ +#define CALLm(M) _O_D32 (0xe8 ,(int)(M) ) +#define CALLLsr(R) (_REXLrr(0, R), _O_Mrm (0xff ,_b11,_b010,_r4(R) )) +#define CALLsm(D,B,I,S) (_REXLrm(0, B, I), _O_r_X (0xff ,_b010 ,(int)(D),B,I,S )) + +/* FIXME: no prefix is availble to encode a 32-bit operand size in 64-bit + mode */ +#define JMPSm(M) _O_D8 (0xeb ,(int)(M) ) +#define JMPm(M) _O_D32 (0xe9 ,(int)(M) ) +#define JMPLsr(R) (_REXLrr(0, R), _O_Mrm (0xff ,_b11,_b100,_r4(R) )) +#define JMPsm(D,B,I,S) (_REXLrm(0, B, I), _O_r_X (0xff ,_b100 ,(int)(D),B,I,S )) + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ +#define JCCSii(CC, D) _O_B (0x70|(CC) ,(_sc)(int)(D) ) +#define JCCSim(CC, D) _O_D8 (0x70|(CC) ,(int)(D) ) +#define JOSm(D) JCCSim(0x0, D) +#define JNOSm(D) JCCSim(0x1, D) +#define JBSm(D) JCCSim(0x2, D) +#define JCSm(D) JCCSim(0x2, D) +#define JNAESm(D) JCCSim(0x2, D) +#define JNBSm(D) JCCSim(0x3, D) +#define JNCSm(D) JCCSim(0x3, D) +#define JAESm(D) JCCSim(0x3, D) +#define JESm(D) JCCSim(0x4, D) +#define JZSm(D) JCCSim(0x4, D) +#define JNESm(D) JCCSim(0x5, D) +#define JNZSm(D) JCCSim(0x5, D) +#define JBESm(D) JCCSim(0x6, D) +#define JNASm(D) JCCSim(0x6, D) +#define JNBESm(D) JCCSim(0x7, D) +#define JASm(D) JCCSim(0x7, D) +#define JSSm(D) JCCSim(0x8, D) +#define JNSSm(D) JCCSim(0x9, D) +#define JPSm(D) JCCSim(0xa, D) +#define JPESm(D) JCCSim(0xa, D) +#define JNPSm(D) JCCSim(0xb, D) +#define JPOSm(D) JCCSim(0xb, D) +#define JLSm(D) JCCSim(0xc, D) +#define JNGESm(D) JCCSim(0xc, D) +#define JNLSm(D) JCCSim(0xd, D) +#define JGESm(D) JCCSim(0xd, D) +#define JLESm(D) JCCSim(0xe, D) +#define JNGSm(D) JCCSim(0xe, D) +#define JNLESm(D) JCCSim(0xf, D) +#define JGSm(D) JCCSim(0xf, D) + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ +#define JCCim(CC, D) _OO_D32 (0x0f80|(CC) ,(long)(D) ) +#define JOm(D) JCCim(0x0, D) +#define JNOm(D) JCCim(0x1, D) +#define JBm(D) JCCim(0x2, D) +#define JCm(D) JCCim(0x2, D) +#define JNAEm(D) JCCim(0x2, D) +#define JNBm(D) JCCim(0x3, D) +#define JNCm(D) JCCim(0x3, D) +#define JAEm(D) JCCim(0x3, D) +#define JEm(D) JCCim(0x4, D) +#define JZm(D) JCCim(0x4, D) +#define JNEm(D) JCCim(0x5, D) +#define JNZm(D) JCCim(0x5, D) +#define JBEm(D) JCCim(0x6, D) +#define JNAm(D) JCCim(0x6, D) +#define JNBEm(D) JCCim(0x7, D) +#define JAm(D) JCCim(0x7, D) +#define JSm(D) JCCim(0x8, D) +#define JNSm(D) JCCim(0x9, D) +#define JPm(D) JCCim(0xa, D) +#define JPEm(D) JCCim(0xa, D) +#define JNPm(D) JCCim(0xb, D) +#define JPOm(D) JCCim(0xb, D) +#define JLm(D) JCCim(0xc, D) +#define JNGEm(D) JCCim(0xc, D) +#define JNLm(D) JCCim(0xd, D) +#define JGEm(D) JCCim(0xd, D) +#define JLEm(D) JCCim(0xe, D) +#define JNGm(D) JCCim(0xe, D) +#define JNLEm(D) JCCim(0xf, D) +#define JGm(D) JCCim(0xf, D) + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ +#define SETCCir(CC, RD) (_REXBrr(0, RD), _OO_Mrm (0x0f90|(CC) ,_b11,_b000,_r1(RD) )) +#define SETOr(RD) SETCCir(0x0,RD) +#define SETNOr(RD) SETCCir(0x1,RD) +#define SETBr(RD) SETCCir(0x2,RD) +#define SETNAEr(RD) SETCCir(0x2,RD) +#define SETNBr(RD) SETCCir(0x3,RD) +#define SETAEr(RD) SETCCir(0x3,RD) +#define SETEr(RD) SETCCir(0x4,RD) +#define SETZr(RD) SETCCir(0x4,RD) +#define SETNEr(RD) SETCCir(0x5,RD) +#define SETNZr(RD) SETCCir(0x5,RD) +#define SETBEr(RD) SETCCir(0x6,RD) +#define SETNAr(RD) SETCCir(0x6,RD) +#define SETNBEr(RD) SETCCir(0x7,RD) +#define SETAr(RD) SETCCir(0x7,RD) +#define SETSr(RD) SETCCir(0x8,RD) +#define SETNSr(RD) SETCCir(0x9,RD) +#define SETPr(RD) SETCCir(0xa,RD) +#define SETPEr(RD) SETCCir(0xa,RD) +#define SETNPr(RD) SETCCir(0xb,RD) +#define SETPOr(RD) SETCCir(0xb,RD) +#define SETLr(RD) SETCCir(0xc,RD) +#define SETNGEr(RD) SETCCir(0xc,RD) +#define SETNLr(RD) SETCCir(0xd,RD) +#define SETGEr(RD) SETCCir(0xd,RD) +#define SETLEr(RD) SETCCir(0xe,RD) +#define SETNGr(RD) SETCCir(0xe,RD) +#define SETNLEr(RD) SETCCir(0xf,RD) +#define SETGr(RD) SETCCir(0xf,RD) -#define DECLr(RD) _Or (0x48,_r4(RD) ) -#define DECLm(MD,MB,MI,MS) _O_r_X (0xff ,_b001 ,MD,MB,MI,MS ) +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ +#define SETCCim(CC,MD,MB,MI,MS) (_REXBrm(0, MB, MI), _OO_r_X (0x0f90|(CC) ,_b000 ,MD,MB,MI,MS )) +#define SETOm(D, B, I, S) SETCCim(0x0, D, B, I, S) +#define SETNOm(D, B, I, S) SETCCim(0x1, D, B, I, S) +#define SETBm(D, B, I, S) SETCCim(0x2, D, B, I, S) +#define SETNAEm(D, B, I, S) SETCCim(0x2, D, B, I, S) +#define SETNBm(D, B, I, S) SETCCim(0x3, D, B, I, S) +#define SETAEm(D, B, I, S) SETCCim(0x3, D, B, I, S) +#define SETEm(D, B, I, S) SETCCim(0x4, D, B, I, S) +#define SETZm(D, B, I, S) SETCCim(0x4, D, B, I, S) +#define SETNEm(D, B, I, S) SETCCim(0x5, D, B, I, S) +#define SETNZm(D, B, I, S) SETCCim(0x5, D, B, I, S) +#define SETBEm(D, B, I, S) SETCCim(0x6, D, B, I, S) +#define SETNAm(D, B, I, S) SETCCim(0x6, D, B, I, S) +#define SETNBEm(D, B, I, S) SETCCim(0x7, D, B, I, S) +#define SETAm(D, B, I, S) SETCCim(0x7, D, B, I, S) +#define SETSm(D, B, I, S) SETCCim(0x8, D, B, I, S) +#define SETNSm(D, B, I, S) SETCCim(0x9, D, B, I, S) +#define SETPm(D, B, I, S) SETCCim(0xa, D, B, I, S) +#define SETPEm(D, B, I, S) SETCCim(0xa, D, B, I, S) +#define SETNPm(D, B, I, S) SETCCim(0xb, D, B, I, S) +#define SETPOm(D, B, I, S) SETCCim(0xb, D, B, I, S) +#define SETLm(D, B, I, S) SETCCim(0xc, D, B, I, S) +#define SETNGEm(D, B, I, S) SETCCim(0xc, D, B, I, S) +#define SETNLm(D, B, I, S) SETCCim(0xd, D, B, I, S) +#define SETGEm(D, B, I, S) SETCCim(0xd, D, B, I, S) +#define SETLEm(D, B, I, S) SETCCim(0xe, D, B, I, S) +#define SETNGm(D, B, I, S) SETCCim(0xe, D, B, I, S) +#define SETNLEm(D, B, I, S) SETCCim(0xf, D, B, I, S) +#define SETGm(D, B, I, S) SETCCim(0xf, D, B, I, S) +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ +#define CMOVWrr(CC,RS,RD) (_d16(), _REXLrr(RD, RS), _OO_Mrm (0x0f40|(CC) ,_b11,_r2(RD),_r2(RS) )) +#define CMOVWmr(CC,MD,MB,MI,MS,RD) (_d16(), _REXLmr(MB, MI, RD), _OO_r_X (0x0f40|(CC) ,_r2(RD) ,MD,MB,MI,MS )) +#define CMOVLrr(CC,RS,RD) (_REXLrr(RD, RS), _OO_Mrm (0x0f40|(CC) ,_b11,_r4(RD),_r4(RS) )) +#define CMOVLmr(CC,MD,MB,MI,MS,RD) (_REXLmr(MB, MI, RD), _OO_r_X (0x0f40|(CC) ,_r4(RD) ,MD,MB,MI,MS )) -#define DIVBr(RS) _O_Mrm (0xf6 ,_b11,_b110 ,_r1(RS) ) -#define DIVBm(MD,MB,MI,MS) _O_r_X (0xf6 ,_b110 ,MD,MB,MI,MS ) -#define DIVWr(RS) _wO_Mrm (0xf7 ,_b11,_b110 ,_r2(RS) ) -#define DIVWm(MD,MB,MI,MS) _wO_r_X (0xf7 ,_b110 ,MD,MB,MI,MS ) +/* --- Push/Pop instructions ----------------------------------------------- */ -#define DIVLr(RS) _O_Mrm (0xf7 ,_b11,_b110 ,_r4(RS) ) -#define DIVLm(MD,MB,MI,MS) _O_r_X (0xf7 ,_b110 ,MD,MB,MI,MS ) +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ +#define POPWr(RD) _m32only((_d16(), _Or (0x58,_r2(RD) ))) +#define POPWm(MD, MB, MI, MS) _m32only((_d16(), _O_r_X (0x8f ,_b000 ,MD,MB,MI,MS ))) -#define ENTERii(W, B) _O_W_B (0xc8 ,_su16(W),_su8(B)) -#define HLT_() _O (0xf4 ) +#define POPLr(RD) _m32only( _Or (0x58,_r4(RD) )) +#define POPLm(MD, MB, MI, MS) _m32only( _O_r_X (0x8f ,_b000 ,MD,MB,MI,MS )) -#define IDIVBr(RS) _O_Mrm (0xf6 ,_b11,_b111 ,_r1(RS) ) -#define IDIVBm(MD,MB,MI,MS) _O_r_X (0xf6 ,_b111 ,MD,MB,MI,MS ) +#define PUSHWr(RS) _m32only((_d16(), _Or (0x50,_r2(RS) ))) +#define PUSHWm(MD, MB, MI, MS) _m32only((_d16(), _O_r_X (0xff, ,_b110 ,MD,MB,MI,MS ))) +#define PUSHWi(IM) _m32only((_d16(), _Os_sW (0x68 ,IM ))) -#define IDIVWr(RS) _wO_Mrm (0xf7 ,_b11,_b111 ,_r2(RS) ) -#define IDIVWm(MD,MB,MI,MS) _wO_r_X (0xf7 ,_b111 ,MD,MB,MI,MS ) +#define PUSHLr(RS) _m32only( _Or (0x50,_r4(RS) )) +#define PUSHLm(MD, MB, MI, MS) _m32only( _O_r_X (0xff ,_b110 ,MD,MB,MI,MS )) +#define PUSHLi(IM) _m32only( _Os_sL (0x68 ,IM )) -#define IDIVLr(RS) _O_Mrm (0xf7 ,_b11,_b111 ,_r4(RS) ) -#define IDIVLm(MD,MB,MI,MS) _O_r_X (0xf7 ,_b111 ,MD,MB,MI,MS ) -#define IMULBr(RS) _O_Mrm (0xf6 ,_b11,_b101 ,_r1(RS) ) -#define IMULBm(MD,MB,MI,MS) _O_r_X (0xf6 ,_b101 ,MD,MB,MI,MS ) +#define POPA_() (_d16(), _O (0x61 )) +#define POPAD_() _O (0x61 ) -#define IMULWr(RS) _wO_Mrm (0xf7 ,_b11,_b101 ,_r2(RS) ) -#define IMULWm(MD,MB,MI,MS) _wO_r_X (0xf7 ,_b101 ,MD,MB,MI,MS ) +#define PUSHA_() (_d16(), _O (0x60 )) +#define PUSHAD_() _O (0x60 ) -#define IMULLr(RS) _O_Mrm (0xf7 ,_b11,_b101 ,_r4(RS) ) -#define IMULLm(MD,MB,MI,MS) _O_r_X (0xf7 ,_b101 ,MD,MB,MI,MS ) +#define POPF_() _O (0x9d ) +#define PUSHF_() _O (0x9c ) -#define IMULWrr(RS,RD) _wOO_Mrm (0x0faf ,_b11,_r2(RS),_r2(RD) ) -#define IMULWmr(MD,MB,MI,MS,RD) _wOO_r_X (0x0faf ,_r2(RD) ,MD,MB,MI,MS ) -#define IMULWirr(IM,RS,RD) _wOs_Mrm_sW (0x69 ,_b11,_r2(RS),_r2(RD) ,_su16(IM) ) -#define IMULWimr(IM,MD,MB,MI,MS,RD) _wOs_r_X_sW (0x69 ,_r2(RD) ,MD,MB,MI,MS ,_su16(IM) ) +/* --- Test instructions --------------------------------------------------- */ -#define IMULLir(IM,RD) _Os_Mrm_sL (0x69 ,_b11,_r4(RD),_r4(RD) ,IM ) -#define IMULLrr(RS,RD) _OO_Mrm (0x0faf ,_b11,_r4(RD),_r4(RS) ) -#define IMULLmr(MD,MB,MI,MS,RD) _OO_r_X (0x0faf ,_r4(RD) ,MD,MB,MI,MS ) -#define IMULLirr(IM,RS,RD) _Os_Mrm_sL (0x69 ,_b11,_r4(RS),_r4(RD) ,IM ) -#define IMULLimr(IM,MD,MB,MI,MS,RD) _Os_r_X_sL (0x69 ,_r4(RD) ,MD,MB,MI,MS ,IM ) +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ +#define TESTBrr(RS, RD) (_REXBrr(RS, RD), _O_Mrm (0x84 ,_b11,_r1(RS),_r1(RD) )) +#define TESTBrm(RS, MD, MB, MI, MS) (_REXBrm(RS, MB, MI), _O_r_X (0x84 ,_r1(RS) ,MD,MB,MI,MS )) +#define TESTBir(IM, RD) ((RD) == _AL ? \ + (_REXBrr(0, RD), _O_B (0xa8 ,_u8(IM))) : \ + (_REXBrr(0, RD), _O_Mrm_B (0xf6 ,_b11,_b000 ,_r1(RD) ,_u8(IM))) ) +#define TESTBim(IM, MD, MB, MI, MS) (_REXBrm(0, MB, MI), _O_r_X_B (0xf6 ,_b000 ,MD,MB,MI,MS ,_u8(IM))) -#define INCBr(RD) _O_Mrm (0xfe ,_b11,_b000 ,_r1(RD) ) -#define INCBm(MD,MB,MI,MS) _O_r_X (0xfe ,_b000 ,MD,MB,MI,MS ) +#define TESTWrr(RS, RD) (_d16(), _REXLrr(RS, RD), _O_Mrm (0x85 ,_b11,_r2(RS),_r2(RD) )) +#define TESTWrm(RS, MD, MB, MI, MS) (_d16(), _REXLrm(RS, MB, MI), _O_r_X (0x85 ,_r2(RS) ,MD,MB,MI,MS )) +#define TESTWir(IM, RD) ((RD) == _AX ? \ + (_d16(), _REXLrr(0, RD), _O_W (0xa9 ,_u16(IM))) : \ + (_d16(), _REXLrr(0, RD), _O_Mrm_W (0xf7 ,_b11,_b000 ,_r2(RD) ,_u16(IM))) ) +#define TESTWim(IM, MD, MB, MI, MS) (_d16(), _REXLrm(0, MB, MI), _O_r_X_W (0xf7 ,_b000 ,MD,MB,MI,MS ,_u16(IM))) -#define INCWr(RD) _wOr (0x40,_r2(RD) ) -#define INCWm(MD,MB,MI,MS) _wO_r_X (0xff ,_b000 ,MD,MB,MI,MS ) +#define TESTLrr(RS, RD) (_REXLrr(RS, RD), _O_Mrm (0x85 ,_b11,_r4(RS),_r4(RD) )) +#define TESTLrm(RS, MD, MB, MI, MS) (_REXLrm(RS, MB, MI), _O_r_X (0x85 ,_r4(RS) ,MD,MB,MI,MS )) +#define TESTLir(IM, RD) (!_s8P(IM) && (RD) == _EAX ? \ + (_REXLrr(0, RD), _O_L (0xa9 ,IM )) : \ + (_REXLrr(0, RD), _O_Mrm_L (0xf7 ,_b11,_b000 ,_r4(RD) ,IM )) ) +#define TESTLim(IM, MD, MB, MI, MS) (_REXLrm(0, MB, MI), _O_r_X_L (0xf7 ,_b000 ,MD,MB,MI,MS ,IM )) -#define INCLr(RD) _Or (0x40,_r4(RD) ) -#define INCLm(MD,MB,MI,MS) _O_r_X (0xff ,_b000 ,MD,MB,MI,MS ) -#define INVD_() _OO (0x0f08 ) -#define INVLPGm(MD, MB, MI, MS) _OO_r_X (0x0f01 ,_b111 ,MD,MB,MI,MS ) +/* --- Exchange instructions ----------------------------------------------- */ +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ -#define JCCSim(CC,D,B,I,S) ((_r0P(B) && _r0P(I)) ? _O_D8 (0x70|(CC) ,(int)(D) ) : \ - JITFAIL("illegal mode in conditional jump")) +#define CMPXCHGBrr(RS, RD) (_REXBrr(RS, RD), _OO_Mrm (0x0fb0 ,_b11,_r1(RS),_r1(RD) )) +#define CMPXCHGBrm(RS, MD, MB, MI, MS) (_REXBrm(RS, MB, MI), _OO_r_X (0x0fb0 ,_r1(RS) ,MD,MB,MI,MS )) -#define JOSm(D,B,I,S) JCCSim(0x0,D,B,I,S) -#define JNOSm(D,B,I,S) JCCSim(0x1,D,B,I,S) -#define JBSm(D,B,I,S) JCCSim(0x2,D,B,I,S) -#define JNAESm(D,B,I,S) JCCSim(0x2,D,B,I,S) -#define JNBSm(D,B,I,S) JCCSim(0x3,D,B,I,S) -#define JAESm(D,B,I,S) JCCSim(0x3,D,B,I,S) -#define JESm(D,B,I,S) JCCSim(0x4,D,B,I,S) -#define JZSm(D,B,I,S) JCCSim(0x4,D,B,I,S) -#define JNESm(D,B,I,S) JCCSim(0x5,D,B,I,S) -#define JNZSm(D,B,I,S) JCCSim(0x5,D,B,I,S) -#define JBESm(D,B,I,S) JCCSim(0x6,D,B,I,S) -#define JNASm(D,B,I,S) JCCSim(0x6,D,B,I,S) -#define JNBESm(D,B,I,S) JCCSim(0x7,D,B,I,S) -#define JASm(D,B,I,S) JCCSim(0x7,D,B,I,S) -#define JSSm(D,B,I,S) JCCSim(0x8,D,B,I,S) -#define JNSSm(D,B,I,S) JCCSim(0x9,D,B,I,S) -#define JPSm(D,B,I,S) JCCSim(0xa,D,B,I,S) -#define JPESm(D,B,I,S) JCCSim(0xa,D,B,I,S) -#define JNPSm(D,B,I,S) JCCSim(0xb,D,B,I,S) -#define JPOSm(D,B,I,S) JCCSim(0xb,D,B,I,S) -#define JLSm(D,B,I,S) JCCSim(0xc,D,B,I,S) -#define JNGESm(D,B,I,S) JCCSim(0xc,D,B,I,S) -#define JNLSm(D,B,I,S) JCCSim(0xd,D,B,I,S) -#define JGESm(D,B,I,S) JCCSim(0xd,D,B,I,S) -#define JLESm(D,B,I,S) JCCSim(0xe,D,B,I,S) -#define JNGSm(D,B,I,S) JCCSim(0xe,D,B,I,S) -#define JNLESm(D,B,I,S) JCCSim(0xf,D,B,I,S) -#define JGSm(D,B,I,S) JCCSim(0xf,D,B,I,S) +#define CMPXCHGWrr(RS, RD) (_d16(), _REXLrr(RS, RD), _OO_Mrm (0x0fb1 ,_b11,_r2(RS),_r2(RD) )) +#define CMPXCHGWrm(RS, MD, MB, MI, MS) (_d16(), _REXLrm(RS, MB, MI), _OO_r_X (0x0fb1 ,_r2(RS) ,MD,MB,MI,MS )) -#define JCCim(CC,D,B,I,S) ((_r0P(B) && _r0P(I)) ? _OO_D32 (0x0f80|(CC) ,(int)(D) ) : \ - JITFAIL("illegal mode in conditional jump")) +#define CMPXCHGLrr(RS, RD) (_REXLrr(RS, RD), _OO_Mrm (0x0fb1 ,_b11,_r4(RS),_r4(RD) )) +#define CMPXCHGLrm(RS, MD, MB, MI, MS) (_REXLrm(RS, MB, MI), _OO_r_X (0x0fb1 ,_r4(RS) ,MD,MB,MI,MS )) -#define JOm(D,B,I,S) JCCim(0x0,D,B,I,S) -#define JNOm(D,B,I,S) JCCim(0x1,D,B,I,S) -#define JBm(D,B,I,S) JCCim(0x2,D,B,I,S) -#define JNAEm(D,B,I,S) JCCim(0x2,D,B,I,S) -#define JNBm(D,B,I,S) JCCim(0x3,D,B,I,S) -#define JAEm(D,B,I,S) JCCim(0x3,D,B,I,S) -#define JEm(D,B,I,S) JCCim(0x4,D,B,I,S) -#define JZm(D,B,I,S) JCCim(0x4,D,B,I,S) -#define JNEm(D,B,I,S) JCCim(0x5,D,B,I,S) -#define JNZm(D,B,I,S) JCCim(0x5,D,B,I,S) -#define JBEm(D,B,I,S) JCCim(0x6,D,B,I,S) -#define JNAm(D,B,I,S) JCCim(0x6,D,B,I,S) -#define JNBEm(D,B,I,S) JCCim(0x7,D,B,I,S) -#define JAm(D,B,I,S) JCCim(0x7,D,B,I,S) -#define JSm(D,B,I,S) JCCim(0x8,D,B,I,S) -#define JNSm(D,B,I,S) JCCim(0x9,D,B,I,S) -#define JPm(D,B,I,S) JCCim(0xa,D,B,I,S) -#define JPEm(D,B,I,S) JCCim(0xa,D,B,I,S) -#define JNPm(D,B,I,S) JCCim(0xb,D,B,I,S) -#define JPOm(D,B,I,S) JCCim(0xb,D,B,I,S) -#define JLm(D,B,I,S) JCCim(0xc,D,B,I,S) -#define JNGEm(D,B,I,S) JCCim(0xc,D,B,I,S) -#define JNLm(D,B,I,S) JCCim(0xd,D,B,I,S) -#define JGEm(D,B,I,S) JCCim(0xd,D,B,I,S) -#define JLEm(D,B,I,S) JCCim(0xe,D,B,I,S) -#define JNGm(D,B,I,S) JCCim(0xe,D,B,I,S) -#define JNLEm(D,B,I,S) JCCim(0xf,D,B,I,S) -#define JGm(D,B,I,S) JCCim(0xf,D,B,I,S) +#define XADDBrr(RS, RD) (_REXBrr(RS, RD), _OO_Mrm (0x0fc0 ,_b11,_r1(RS),_r1(RD) )) +#define XADDBrm(RS, MD, MB, MI, MS) (_REXBrm(RS, MB, MI), _OO_r_X (0x0fc0 ,_r1(RS) ,MD,MB,MI,MS )) -#define JMPSm(D,B,I,S) ((_r0P(B) && _r0P(I)) ? _O_D8 (0xeb ,(int)(D) ) : \ - JITFAIL("illegal mode in short jump")) +#define XADDWrr(RS, RD) (_d16(), _REXLrr(RS, RD), _OO_Mrm (0x0fc1 ,_b11,_r2(RS),_r2(RD) )) +#define XADDWrm(RS, MD, MB, MI, MS) (_d16(), _REXLrm(RS, MB, MI), _OO_r_X (0x0fc1 ,_r2(RS) ,MD,MB,MI,MS )) -#define JMPm(D,B,I,S) ((_r0P(B) && _r0P(I)) ? _O_D32 (0xe9 ,(int)(D) ) : \ - JITFAIL("illegal mode in direct jump")) +#define XADDLrr(RS, RD) (_REXLrr(RS, RD), _OO_Mrm (0x0fc1 ,_b11,_r4(RS),_r4(RD) )) +#define XADDLrm(RS, MD, MB, MI, MS) (_REXLrm(RS, MB, MI), _OO_r_X (0x0fc1 ,_r4(RS) ,MD,MB,MI,MS )) -#define JMPsr(R) _O_Mrm (0xff ,_b11,_b100,_r4(R) ) -#define JMPsm(D,B,I,S) _O_r_X (0xff ,_b100 ,(int)(D),B,I,S ) - - -#define LAHF_() _O (0x9f ) -#define LEALmr(MD, MB, MI, MS, RD) _O_r_X (0x8d ,_r4(RD) ,MD,MB,MI,MS ) -#define LEAVE_() _O (0xc9 ) - - -#define LMSWr(RS) _OO_Mrm (0x0f01 ,_b11,_b110,_r4(RS) ) -#define LMSWm(MD,MB,MI,MS) _OO_r_X (0x0f01 ,_b110 ,MD,MB,MI,MS ) +#define XCHGBrr(RS, RD) (_REXBrr(RS, RD), _O_Mrm (0x86 ,_b11,_r1(RS),_r1(RD) )) +#define XCHGBrm(RS, MD, MB, MI, MS) (_REXBrm(RS, MB, MI), _O_r_X (0x86 ,_r1(RS) ,MD,MB,MI,MS )) -#define LOOPm(MD,MB,MI,MS) ((_r0P(MB) && _r0P(MI)) ? _O_D8 (0xe2 ,MD ) : \ - JITFAIL("illegal mode in loop")) - -#define LOOPEm(MD,MB,MI,MS) ((_r0P(MB) && _r0P(MI)) ? _O_D8 (0xe1 ,MD ) : \ - JITFAIL("illegal mode in loope")) - -#define LOOPZm(MD,MB,MI,MS) ((_r0P(MB) && _r0P(MI)) ? _O_D8 (0xe1 ,MD ) : \ - JITFAIL("illegal mode in loopz")) +#define XCHGWrr(RS, RD) (_d16(), _REXLrr(RS, RD), _O_Mrm (0x87 ,_b11,_r2(RS),_r2(RD) )) +#define XCHGWrm(RS, MD, MB, MI, MS) (_d16(), _REXLrm(RS, MB, MI), _O_r_X (0x87 ,_r2(RS) ,MD,MB,MI,MS )) -#define LOOPNEm(MD,MB,MI,MS) ((_r0P(MB) && _r0P(MI)) ? _O_D8 (0xe0 ,MD ) : \ - JITFAIL("illegal mode in loopne")) +#define XCHGLrr(RS, RD) (_REXLrr(RS, RD), _O_Mrm (0x87 ,_b11,_r4(RS),_r4(RD) )) +#define XCHGLrm(RS, MD, MB, MI, MS) (_REXLrm(RS, MB, MI), _O_r_X (0x87 ,_r4(RS) ,MD,MB,MI,MS )) -#define LOOPNZm(MD,MB,MI,MS) ((_r0P(MB) && _r0P(MI)) ? _O_D8 (0xe0 ,MD ) : \ - JITFAIL("illegal mode in loopnz")) -#define MOVBrr(RS, RD) _O_Mrm (0x80 ,_b11,_r1(RS),_r1(RD) ) -#define MOVBmr(MD, MB, MI, MS, RD) _O_r_X (0x8a ,_r1(RD) ,MD,MB,MI,MS ) -#define MOVBrm(RS, MD, MB, MI, MS) _O_r_X (0x88 ,_r1(RS) ,MD,MB,MI,MS ) -#define MOVBir(IM, R) _Or_B (0xb0,_r1(R) ,_su8(IM)) -#define MOVBim(IM, MD, MB, MI, MS) _O_X_B (0xc6 ,MD,MB,MI,MS ,_su8(IM)) - -#define MOVWrr(RS, RD) _wO_Mrm (0x89 ,_b11,_r2(RS),_r2(RD) ) -#define MOVWmr(MD, MB, MI, MS, RD) _wO_r_X (0x8b ,_r2(RD) ,MD,MB,MI,MS ) -#define MOVWrm(RS, MD, MB, MI, MS) _wO_r_X (0x89 ,_r2(RS) ,MD,MB,MI,MS ) -#define MOVWir(IM, R) _wOr_W (0xb8,_r2(R) ,_su16(IM)) -#define MOVWim(IM, MD, MB, MI, MS) _wO_X_W (0xc7 ,MD,MB,MI,MS ,_su16(IM)) +/* --- Increment/Decrement instructions ------------------------------------ */ -#define MOVLrr(RS, RD) _O_Mrm (0x89 ,_b11,_r4(RS),_r4(RD) ) -#define MOVLmr(MD, MB, MI, MS, RD) _O_r_X (0x8b ,_r4(RD) ,MD,MB,MI,MS ) -#define MOVLrm(RS, MD, MB, MI, MS) _O_r_X (0x89 ,_r4(RS) ,MD,MB,MI,MS ) -#define MOVLir(IM, R) _Or_L (0xb8,_r4(R) ,IM ) -#define MOVLim(IM, MD, MB, MI, MS) _O_X_L (0xc7 ,MD,MB,MI,MS ,IM ) +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ -#define MOVZBLrr(RS, RD) _OO_Mrm (0x0fb6 ,_b11,_r1(RD),_r1(RS) ) -#define MOVZBLmr(MD, MB, MI, MS, RD) _OO_r_X (0x0fb6 ,_r1(RD) ,MD,MB,MI,MS ) -#define MOVZBWrr(RS, RD) _wOO_Mrm (0x0fb6 ,_b11,_r2(RD),_r2(RS) ) -#define MOVZBWmr(MD, MB, MI, MS, RD) _wOO_r_X (0x0fb6 ,_r2(RD) ,MD,MB,MI,MS ) -#define MOVZWLrr(RS, RD) _OO_Mrm (0x0fb7 ,_b11,_r1(RD),_r1(RS) ) -#define MOVZWLmr(MD, MB, MI, MS, RD) _OO_r_X (0x0fb7 ,_r1(RD) ,MD,MB,MI,MS ) +#define DECBm(MD, MB, MI, MS) (_REXBrm(0, MB, MI), _O_r_X (0xfe ,_b001 ,MD,MB,MI,MS )) +#define DECBr(RD) (_REXBrr(0, RD), _O_Mrm (0xfe ,_b11,_b001 ,_r1(RD) )) -#define MOVSBLrr(RS, RD) _OO_Mrm (0x0fbe ,_b11,_r1(RD),_r1(RS) ) -#define MOVSBLmr(MD, MB, MI, MS, RD) _OO_r_X (0x0fbe ,_r1(RD) ,MD,MB,MI,MS ) -#define MOVSBWrr(RS, RD) _wOO_Mrm (0x0fbe ,_b11,_r2(RD),_r2(RS) ) -#define MOVSBWmr(MD, MB, MI, MS, RD) _wOO_r_X (0x0fbe ,_r2(RD) ,MD,MB,MI,MS ) -#define MOVSWLrr(RS, RD) _OO_Mrm (0x0fbf ,_b11,_r1(RD),_r1(RS) ) -#define MOVSWLmr(MD, MB, MI, MS, RD) _OO_r_X (0x0fbf ,_r1(RD) ,MD,MB,MI,MS ) +#define DECWm(MD, MB, MI, MS) (_d16(), _REXLrm(0, MB, MI), _O_r_X (0xff ,_b001 ,MD,MB,MI,MS )) +#define DECLm(MD, MB, MI, MS) (_REXLrm(0, MB, MI), _O_r_X (0xff ,_b001 ,MD,MB,MI,MS )) -#define MULBr(RS) _O_Mrm (0xf6 ,_b11,_b100 ,_r1(RS) ) -#define MULBm(MD,MB,MI,MS) _O_r_X (0xf6 ,_b100 ,MD,MB,MI,MS ) -#define MULWr(RS) _wO_Mrm (0xf7 ,_b11,_b100 ,_r2(RS) ) -#define MULWm(MD,MB,MI,MS) _wO_r_X (0xf7 ,_b100 ,MD,MB,MI,MS ) +#define INCBm(MD, MB, MI, MS) (_REXBrm(0, MB, MI), _O_r_X (0xfe ,_b000 ,MD,MB,MI,MS )) +#define INCBr(RD) (_REXBrr(0, RD), _O_Mrm (0xfe ,_b11,_b000 ,_r1(RD) )) -#define MULLr(RS) _O_Mrm (0xf7 ,_b11,_b100 ,_r4(RS) ) -#define MULLm(MD,MB,MI,MS) _O_r_X (0xf7 ,_b100 ,MD,MB,MI,MS ) +#define INCWm(MD, MB, MI, MS) (_d16(), _REXLrm(0, MB, MI), _O_r_X (0xff ,_b000 ,MD,MB,MI,MS )) +#define INCLm(MD, MB, MI, MS) (_REXLrm(0, MB, MI), _O_r_X (0xff ,_b000 ,MD,MB,MI,MS )) -#define NEGBr(RD) _O_Mrm (0xf6 ,_b11,_b011 ,_r1(RD) ) -#define NEGBm(MD,MB,MI,MS) _O_r_X (0xf6 ,_b011 ,MD,MB,MI,MS ) -#define NEGWr(RD) _wO_Mrm (0xf7 ,_b11,_b011 ,_r2(RD) ) -#define NEGWm(MD,MB,MI,MS) _wO_r_X (0xf7 ,_b011 ,MD,MB,MI,MS ) -#define NEGLr(RD) _O_Mrm (0xf7 ,_b11,_b011 ,_r4(RD) ) -#define NEGLm(MD,MB,MI,MS) _O_r_X (0xf7 ,_b011 ,MD,MB,MI,MS ) +/* --- Misc instructions --------------------------------------------------- */ +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ -#define NOP_() _O (0x90 ) +#define BSFWrr(RS, RD) (_d16(), _REXLrr(RD, RS), _OO_Mrm (0x0fbc ,_b11,_r2(RD),_r2(RS) )) +#define BSFWmr(MD, MB, MI, MS, RD) (_d16(), _REXLmr(MB, MI, RD), _OO_r_X (0x0fbc ,_r2(RD) ,MD,MB,MI,MS )) +#define BSRWrr(RS, RD) (_d16(), _REXLrr(RD, RS), _OO_Mrm (0x0fbd ,_b11,_r2(RD),_r2(RS) )) +#define BSRWmr(MD, MB, MI, MS, RD) (_d16(), _REXLmr(MB, MI, RD), _OO_r_X (0x0fbd ,_r2(RD) ,MD,MB,MI,MS )) +#define BSFLrr(RS, RD) (_REXLrr(RD, RS), _OO_Mrm (0x0fbc ,_b11,_r4(RD),_r4(RS) )) +#define BSFLmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _OO_r_X (0x0fbc ,_r4(RD) ,MD,MB,MI,MS )) +#define BSRLrr(RS, RD) (_REXLrr(RD, RS), _OO_Mrm (0x0fbd ,_b11,_r4(RD),_r4(RS) )) +#define BSRLmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _OO_r_X (0x0fbd ,_r4(RD) ,MD,MB,MI,MS )) -#define NOTBr(RD) _O_Mrm (0xf6 ,_b11,_b010 ,_r1(RD) ) -#define NOTBm(MD,MB,MI,MS) _O_r_X (0xf6 ,_b010 ,MD,MB,MI,MS ) -#define NOTWr(RD) _wO_Mrm (0xf7 ,_b11,_b010 ,_r2(RD) ) -#define NOTWm(MD,MB,MI,MS) _wO_r_X (0xf7 ,_b010 ,MD,MB,MI,MS ) +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ -#define NOTLr(RD) _O_Mrm (0xf7 ,_b11,_b010 ,_r4(RD) ) -#define NOTLm(MD,MB,MI,MS) _O_r_X (0xf7 ,_b010 ,MD,MB,MI,MS ) +#define MOVSBWrr(RS, RD) (_d16(), _REXBLrr(RD, RS), _OO_Mrm (0x0fbe ,_b11,_r2(RD),_r1(RS) )) +#define MOVSBWmr(MD, MB, MI, MS, RD) (_d16(), _REXLmr(MB, MI, RD), _OO_r_X (0x0fbe ,_r2(RD) ,MD,MB,MI,MS )) +#define MOVZBWrr(RS, RD) (_d16(), _REXBLrr(RD, RS), _OO_Mrm (0x0fb6 ,_b11,_r2(RD),_r1(RS) )) +#define MOVZBWmr(MD, MB, MI, MS, RD) (_d16(), _REXLmr(MB, MI, RD), _OO_r_X (0x0fb6 ,_r2(RD) ,MD,MB,MI,MS )) +#define MOVSBLrr(RS, RD) (_REXBLrr(RD, RS), _OO_Mrm (0x0fbe ,_b11,_r4(RD),_r1(RS) )) +#define MOVSBLmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _OO_r_X (0x0fbe ,_r4(RD) ,MD,MB,MI,MS )) +#define MOVZBLrr(RS, RD) (_REXBLrr(RD, RS), _OO_Mrm (0x0fb6 ,_b11,_r4(RD),_r1(RS) )) +#define MOVZBLmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _OO_r_X (0x0fb6 ,_r4(RD) ,MD,MB,MI,MS )) -#define ORBrr(RS, RD) _O_Mrm (0x08 ,_b11,_r1(RS),_r1(RD) ) -#define ORBmr(MD, MB, MI, MS, RD) _O_r_X (0x0a ,_r1(RD) ,MD,MB,MI,MS ) -#define ORBrm(RS, MD, MB, MI, MS) _O_r_X (0x08 ,_r1(RS) ,MD,MB,MI,MS ) -#define ORBir(IM, RD) _O_Mrm_B (0x80 ,_b11,_b001 ,_r1(RD) ,_su8(IM)) -#define ORBim(IM, MD, MB, MI, MS) _O_r_X_B (0x80 ,_b001 ,MD,MB,MI,MS ,_su8(IM)) -#define ORWrr(RS, RD) _wO_Mrm (0x09 ,_b11,_r2(RS),_r2(RD) ) -#define ORWmr(MD, MB, MI, MS, RD) _wO_r_X (0x0b ,_r2(RD) ,MD,MB,MI,MS ) -#define ORWrm(RS, MD, MB, MI, MS) _wO_r_X (0x09 ,_r2(RS) ,MD,MB,MI,MS ) -#define ORWir(IM, RD) _wOs_Mrm_sW (0x81 ,_b11,_b001 ,_r2(RD) ,_su16(IM)) -#define ORWim(IM, MD, MB, MI, MS) _wOs_r_X_sW (0x81 ,_b001 ,MD,MB,MI,MS ,_su16(IM)) +#define MOVSWLrr(RS, RD) (_REXLrr(RD, RS), _OO_Mrm (0x0fbf ,_b11,_r4(RD),_r2(RS) )) +#define MOVSWLmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _OO_r_X (0x0fbf ,_r4(RD) ,MD,MB,MI,MS )) +#define MOVZWLrr(RS, RD) (_REXLrr(RD, RS), _OO_Mrm (0x0fb7 ,_b11,_r4(RD),_r2(RS) )) +#define MOVZWLmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _OO_r_X (0x0fb7 ,_r4(RD) ,MD,MB,MI,MS )) -#define ORLrr(RS, RD) _O_Mrm (0x09 ,_b11,_r4(RS),_r4(RD) ) -#define ORLmr(MD, MB, MI, MS, RD) _O_r_X (0x0b ,_r4(RD) ,MD,MB,MI,MS ) -#define ORLrm(RS, MD, MB, MI, MS) _O_r_X (0x09 ,_r4(RS) ,MD,MB,MI,MS ) -#define ORLir(IM, RD) _Os_Mrm_sL (0x81 ,_b11,_b001 ,_r4(RD) ,IM ) -#define ORLim(IM, MD, MB, MI, MS) _Os_r_X_sL (0x81 ,_b001 ,MD,MB,MI,MS ,IM ) -#define POPWr(RD) _wOr (0x58,_r2(RD) ) -#define POPWm(MD,MB,MI,MS) _wO_r_X (0x8f ,_b000 ,MD,MB,MI,MS ) +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ -#define POPLr(RD) _Or (0x58,_r4(RD) ) -#define POPLm(MD,MB,MI,MS) _O_r_X (0x8f ,_b000 ,MD,MB,MI,MS ) +#define LEALmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _O_r_X (0x8d ,_r4(RD) ,MD,MB,MI,MS )) +#define BSWAPLr(R) (_REXLrr(0, R), _OOr (0x0fc8,_r4(R) )) -#define POPA_() _wO (0x61 ) -#define POPAD_() _O (0x61 ) +#define CLC_() _O (0xf8 ) +#define STC_() _O (0xf9 ) -#define POPF_() _wO (0x9d ) -#define POPFD_() _O (0x9d ) +#define CMC_() _O (0xf5 ) +#define CLD_() _O (0xfc ) +#define STD_() _O (0xfd ) +#define CBTW_() (_d16(), _O (0x98 )) +#define CWTL_() _O (0x98 ) +#define CLTQ_() _m64only(_REXQrr(0, 0), _O (0x98 )) -#define PUSHWr(R) _wOr (0x50,_r2(R) ) -#define PUSHWm(MD,MB,MI,MS) _wO_r_X (0xff, ,_b110 ,MD,MB,MI,MS ) -#define PUSHWi(IM) _wOs_sW (0x68 ,IM ) +#define CBW_() CBTW_() +#define CWDE_() CWTL_() +#define CDQE_() CLTQ_() -#define PUSHLr(R) _Or (0x50,_r4(R) ) -#define PUSHLm(MD,MB,MI,MS) _O_r_X (0xff ,_b110 ,MD,MB,MI,MS ) -#define PUSHLi(IM) _Os_sL (0x68 ,IM ) - - -#define PUSHA_() _wO (0x60 ) -#define PUSHAD_() _O (0x60 ) - -#define PUSHF_() _O (0x9c ) -#define PUSHFD_() _wO (0x9c ) - -#define RET_() _O (0xc3 ) -#define RETi(IM) _O_W (0xc2 ,_su16(IM)) +#define CWTD_() (_d16(), _O (0x99 )) +#define CLTD_() _O (0x99 ) +#define CQTO_() _m64only(_REXQrr(0, 0), _O (0x99 )) +#define CWD_() CWTD_() +#define CDQ_() CLTD_() +#define CQO_() CQTO_() -#define ROLBir(IM,RD) (((IM)==1) ? _O_Mrm (0xd0 ,_b11,_b000,_r1(RD) ) : \ - _O_Mrm_B (0xc0 ,_b11,_b000,_r1(RD) ,_u8(IM) ) ) -#define ROLBim(IM,MD,MB,MS,MI) (((IM)==1) ? _O_r_X (0xd0 ,_b000 ,MD,MB,MI,MS ) : \ - _O_r_X_B (0xc0 ,_b000 ,MD,MB,MI,MS ,_u8(IM) ) ) -#define ROLBrr(RS,RD) (((RS)==_CL) ? _O_Mrm (0xd2 ,_b11,_b000,_r1(RD) ) : \ - JITFAIL ("source register must be CL" ) ) -#define ROLBrm(RS,MD,MB,MS,MI) (((RS)==_CL) ? _O_r_X (0xd2 ,_b000 ,MD,MB,MI,MS ) : \ - JITFAIL ("source register must be CL" ) ) - -#define ROLWir(IM,RD) (((IM)==1) ? _wO_Mrm (0xd1 ,_b11,_b000,_r2(RD) ) : \ - _wO_Mrm_B (0xc1 ,_b11,_b000,_r2(RD) ,_u8(IM) ) ) -#define ROLWim(IM,MD,MB,MS,MI) (((IM)==1) ? _wO_r_X (0xd1 ,_b000 ,MD,MB,MI,MS ) : \ - _wO_r_X_B (0xc1 ,_b000 ,MD,MB,MI,MS ,_u8(IM) ) ) -#define ROLWrr(RS,RD) (((RS)==_CL) ? _wO_Mrm (0xd3 ,_b11,_b000,_r2(RD) ) : \ - JITFAIL ("source register must be CL" ) ) -#define ROLWrm(RS,MD,MB,MS,MI) (((RS)==_CL) ? _wO_r_X (0xd3 ,_b000 ,MD,MB,MI,MS ) : \ - JITFAIL ("source register must be CL" ) ) - -#define ROLLir(IM,RD) (((IM)==1) ? _O_Mrm (0xd1 ,_b11,_b000,_r4(RD) ) : \ - _O_Mrm_B (0xc1 ,_b11,_b000,_r4(RD) ,_u8(IM) ) ) -#define ROLLim(IM,MD,MB,MS,MI) (((IM)==1) ? _O_r_X (0xd1 ,_b000 ,MD,MB,MI,MS ) : \ - _O_r_X_B (0xc1 ,_b000 ,MD,MB,MI,MS ,_u8(IM) ) ) -#define ROLLrr(RS,RD) (((RS)==_CL) ? _O_Mrm (0xd3 ,_b11,_b000,_r4(RD) ) : \ - JITFAIL ("source register must be CL" ) ) -#define ROLLrm(RS,MD,MB,MS,MI) (((RS)==_CL) ? _O_r_X (0xd3 ,_b000 ,MD,MB,MI,MS ) : \ - JITFAIL ("source register must be CL" ) ) +#define LAHF_() _m32only( _O (0x9f )) +#define SAHF_() _m32only( _O (0x9e )) +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ -#define RORBir(IM,RD) (((IM)==1) ? _O_Mrm (0xd0 ,_b11,_b001,_r1(RD) ) : \ - _O_Mrm_B (0xc0 ,_b11,_b001,_r1(RD) ,_u8(IM) ) ) -#define RORBim(IM,MD,MB,MS,MI) (((IM)==1) ? _O_r_X (0xd0 ,_b001 ,MD,MB,MI,MS ) : \ - _O_r_X_B (0xc0 ,_b001 ,MD,MB,MI,MS ,_u8(IM) ) ) -#define RORBrr(RS,RD) (((RS)==_CL) ? _O_Mrm (0xd2 ,_b11,_b001,_r1(RD) ) : \ - JITFAIL ("source register must be CL" ) ) -#define RORBrm(RS,MD,MB,MS,MI) (((RS)==_CL) ? _O_r_X (0xd2 ,_b001 ,MD,MB,MI,MS ) : \ - JITFAIL ("source register must be CL" ) ) - -#define RORWir(IM,RD) (((IM)==1) ? _wO_Mrm (0xd1 ,_b11,_b001,_r2(RD) ) : \ - _wO_Mrm_B (0xc1 ,_b11,_b001,_r2(RD) ,_u8(IM) ) ) -#define RORWim(IM,MD,MB,MS,MI) (((IM)==1) ? _wO_r_X (0xd1 ,_b001 ,MD,MB,MI,MS ) : \ - _wO_r_X_B (0xc1 ,_b001 ,MD,MB,MI,MS ,_u8(IM) ) ) -#define RORWrr(RS,RD) (((RS)==_CL) ? _wO_Mrm (0xd3 ,_b11,_b001,_r2(RD) ) : \ - JITFAIL ("source register must be CL" ) ) -#define RORWrm(RS,MD,MB,MS,MI) (((RS)==_CL) ? _wO_r_X (0xd3 ,_b001 ,MD,MB,MI,MS ) : \ - JITFAIL ("source register must be CL" ) ) - -#define RORLir(IM,RD) (((IM)==1) ? _O_Mrm (0xd1 ,_b11,_b001,_r4(RD) ) : \ - _O_Mrm_B (0xc1 ,_b11,_b001,_r4(RD) ,_u8(IM) ) ) -#define RORLim(IM,MD,MB,MS,MI) (((IM)==1) ? _O_r_X (0xd1 ,_b001 ,MD,MB,MI,MS ) : \ - _O_r_X_B (0xc1 ,_b001 ,MD,MB,MI,MS ,_u8(IM) ) ) -#define RORLrr(RS,RD) (((RS)==_CL) ? _O_Mrm (0xd3 ,_b11,_b001,_r4(RD) ) : \ - JITFAIL ("source register must be CL" ) ) -#define RORLrm(RS,MD,MB,MS,MI) (((RS)==_CL) ? _O_r_X (0xd3 ,_b001 ,MD,MB,MI,MS ) : \ - JITFAIL ("source register must be CL" ) ) - - -#define SAHF_() _O (0x9e ) - - -#define SALBir SHLBir -#define SALBim SHLBim -#define SALBrr SHLBrr -#define SALBrm SHLBrm -#define SALWir SHLWir -#define SALWim SHLWim -#define SALWrr SHLWrr -#define SALWrm SHLWrm -#define SALLir SHLLir -#define SALLim SHLLim -#define SALLrr SHLLrr -#define SALLrm SHLLrm - - -#define SARBir(IM,RD) (((IM)==1) ? _O_Mrm (0xd0 ,_b11,_b111,_r1(RD) ) : \ - _O_Mrm_B (0xc0 ,_b11,_b111,_r1(RD) ,_u8(IM) ) ) -#define SARBim(IM,MD,MB,MS,MI) (((IM)==1) ? _O_r_X (0xd0 ,_b111 ,MD,MB,MI,MS ) : \ - _O_r_X_B (0xc0 ,_b111 ,MD,MB,MI,MS ,_u8(IM) ) ) -#define SARBrr(RS,RD) (((RS)==_CL) ? _O_Mrm (0xd2 ,_b11,_b111,_r1(RD) ) : \ - JITFAIL ("source register must be CL" ) ) -#define SARBrm(RS,MD,MB,MS,MI) (((RS)==_CL) ? _O_r_X (0xd2 ,_b111 ,MD,MB,MI,MS ) : \ - JITFAIL ("source register must be CL" ) ) - -#define SARWir(IM,RD) (((IM)==1) ? _wO_Mrm (0xd1 ,_b11,_b111,_r2(RD) ) : \ - _wO_Mrm_B (0xc1 ,_b11,_b111,_r2(RD) ,_u8(IM) ) ) -#define SARWim(IM,MD,MB,MS,MI) (((IM)==1) ? _wO_r_X (0xd1 ,_b111 ,MD,MB,MI,MS ) : \ - _wO_r_X_B (0xc1 ,_b111 ,MD,MB,MI,MS ,_u8(IM) ) ) -#define SARWrr(RS,RD) (((RS)==_CL) ? _wO_Mrm (0xd3 ,_b11,_b111,_r2(RD) ) : \ - JITFAIL ("source register must be CL" ) ) -#define SARWrm(RS,MD,MB,MS,MI) (((RS)==_CL) ? _wO_r_X (0xd3 ,_b111 ,MD,MB,MI,MS ) : \ - JITFAIL ("source register must be CL" ) ) - -#define SARLir(IM,RD) (((IM)==1) ? _O_Mrm (0xd1 ,_b11,_b111,_r4(RD) ) : \ - _O_Mrm_B (0xc1 ,_b11,_b111,_r4(RD) ,_u8(IM) ) ) -#define SARLim(IM,MD,MB,MS,MI) (((IM)==1) ? _O_r_X (0xd1 ,_b111 ,MD,MB,MI,MS ) : \ - _O_r_X_B (0xc1 ,_b111 ,MD,MB,MI,MS ,_u8(IM) ) ) -#define SARLrr(RS,RD) (((RS)==_CL) ? _O_Mrm (0xd3 ,_b11,_b111,_r4(RD) ) : \ - JITFAIL ("source register must be CL" ) ) -#define SARLrm(RS,MD,MB,MS,MI) (((RS)==_CL) ? _O_r_X (0xd3 ,_b111 ,MD,MB,MI,MS ) : \ - JITFAIL ("source register must be CL" ) ) - - -#define SBBBrr(RS, RD) _O_Mrm (0x18 ,_b11,_r1(RS),_r1(RD) ) -#define SBBBmr(MD, MB, MI, MS, RD) _O_r_X (0x1a ,_r1(RD) ,MD,MB,MI,MS ) -#define SBBBrm(RS, MD, MB, MI, MS) _O_r_X (0x18 ,_r1(RS) ,MD,MB,MI,MS ) -#define SBBBir(IM, RD) _O_Mrm_B (0x80 ,_b11,_b011 ,_r1(RD) ,_su8(IM)) -#define SBBBim(IM, MD, MB, MI, MS) _O_r_X_B (0x80 ,_b011 ,MD,MB,MI,MS ,_su8(IM)) - -#define SBBWrr(RS, RD) _wO_Mrm (0x19 ,_b11,_r2(RS),_r2(RD) ) -#define SBBWmr(MD, MB, MI, MS, RD) _wO_r_X (0x1b ,_r2(RD) ,MD,MB,MI,MS ) -#define SBBWrm(RS, MD, MB, MI, MS) _wO_r_X (0x19 ,_r2(RS) ,MD,MB,MI,MS ) -#define SBBWir(IM, RD) _wOs_Mrm_sW (0x81 ,_b11,_b011 ,_r2(RD) ,_su16(IM)) -#define SBBWim(IM, MD, MB, MI, MS) _wOs_r_X_sW (0x81 ,_b011 ,MD,MB,MI,MS ,_su16(IM)) - -#define SBBLrr(RS, RD) _O_Mrm (0x19 ,_b11,_r4(RS),_r4(RD) ) -#define SBBLmr(MD, MB, MI, MS, RD) _O_r_X (0x1b ,_r4(RD) ,MD,MB,MI,MS ) -#define SBBLrm(RS, MD, MB, MI, MS) _O_r_X (0x19 ,_r4(RS) ,MD,MB,MI,MS ) -#define SBBLir(IM, RD) _Os_Mrm_sL (0x81 ,_b11,_b011 ,_r4(RD) ,IM ) -#define SBBLim(IM, MD, MB, MI, MS) _Os_r_X_sL (0x81 ,_b011 ,MD,MB,MI,MS ,IM ) - - -#define SETCCir(CC,RD) _OO_Mrm (0x0f90|(CC) ,_b11,_b000,_r1(RD) ) +#define CPUID_() _OO (0x0fa2 ) +#define RDTSC_() _OO (0xff31 ) -#define SETOr(RD) SETCCir(0x0,RD) -#define SETNOr(RD) SETCCir(0x1,RD) -#define SETBr(RD) SETCCir(0x2,RD) -#define SETNAEr(RD) SETCCir(0x2,RD) -#define SETNBr(RD) SETCCir(0x3,RD) -#define SETAEr(RD) SETCCir(0x3,RD) -#define SETEr(RD) SETCCir(0x4,RD) -#define SETZr(RD) SETCCir(0x4,RD) -#define SETNEr(RD) SETCCir(0x5,RD) -#define SETNZr(RD) SETCCir(0x5,RD) -#define SETBEr(RD) SETCCir(0x6,RD) -#define SETNAr(RD) SETCCir(0x6,RD) -#define SETNBEr(RD) SETCCir(0x7,RD) -#define SETAr(RD) SETCCir(0x7,RD) -#define SETSr(RD) SETCCir(0x8,RD) -#define SETNSr(RD) SETCCir(0x9,RD) -#define SETPr(RD) SETCCir(0xa,RD) -#define SETPEr(RD) SETCCir(0xa,RD) -#define SETNPr(RD) SETCCir(0xb,RD) -#define SETPOr(RD) SETCCir(0xb,RD) -#define SETLr(RD) SETCCir(0xc,RD) -#define SETNGEr(RD) SETCCir(0xc,RD) -#define SETNLr(RD) SETCCir(0xd,RD) -#define SETGEr(RD) SETCCir(0xd,RD) -#define SETLEr(RD) SETCCir(0xe,RD) -#define SETNGr(RD) SETCCir(0xe,RD) -#define SETNLEr(RD) SETCCir(0xf,RD) -#define SETGr(RD) SETCCir(0xf,RD) +#define ENTERii(W, B) _O_W_B (0xc8 ,_su16(W),_su8(B)) -#define SETCCim(CC,MD,MB,MI,MS) _OO_r_X (0x0f90|(CC) ,_b000 ,MD,MB,MI,MS ) - -#define SETOm(D,B,I,S) SETCCim(0x0,D,B,I,S) -#define SETNOm(D,B,I,S) SETCCim(0x1,D,B,I,S) -#define SETBm(D,B,I,S) SETCCim(0x2,D,B,I,S) -#define SETNAEm(D,B,I,S) SETCCim(0x2,D,B,I,S) -#define SETNBm(D,B,I,S) SETCCim(0x3,D,B,I,S) -#define SETAEm(D,B,I,S) SETCCim(0x3,D,B,I,S) -#define SETEm(D,B,I,S) SETCCim(0x4,D,B,I,S) -#define SETZm(D,B,I,S) SETCCim(0x4,D,B,I,S) -#define SETNEm(D,B,I,S) SETCCim(0x5,D,B,I,S) -#define SETNZm(D,B,I,S) SETCCim(0x5,D,B,I,S) -#define SETBEm(D,B,I,S) SETCCim(0x6,D,B,I,S) -#define SETNAm(D,B,I,S) SETCCim(0x6,D,B,I,S) -#define SETNBEm(D,B,I,S) SETCCim(0x7,D,B,I,S) -#define SETAm(D,B,I,S) SETCCim(0x7,D,B,I,S) -#define SETSm(D,B,I,S) SETCCim(0x8,D,B,I,S) -#define SETNSm(D,B,I,S) SETCCim(0x9,D,B,I,S) -#define SETPm(D,B,I,S) SETCCim(0xa,D,B,I,S) -#define SETPEm(D,B,I,S) SETCCim(0xa,D,B,I,S) -#define SETNPm(D,B,I,S) SETCCim(0xb,D,B,I,S) -#define SETPOm(D,B,I,S) SETCCim(0xb,D,B,I,S) -#define SETLm(D,B,I,S) SETCCim(0xc,D,B,I,S) -#define SETNGEm(D,B,I,S) SETCCim(0xc,D,B,I,S) -#define SETNLm(D,B,I,S) SETCCim(0xd,D,B,I,S) -#define SETGEm(D,B,I,S) SETCCim(0xd,D,B,I,S) -#define SETLEm(D,B,I,S) SETCCim(0xe,D,B,I,S) -#define SETNGm(D,B,I,S) SETCCim(0xe,D,B,I,S) -#define SETNLEm(D,B,I,S) SETCCim(0xf,D,B,I,S) -#define SETGm(D,B,I,S) SETCCim(0xf,D,B,I,S) - - -#define SHLBir(IM,RD) (((IM)==1) ? _O_Mrm (0xd0 ,_b11,_b100,_r1(RD) ) : \ - _O_Mrm_B (0xc0 ,_b11,_b100,_r1(RD) ,_u8(IM) ) ) -#define SHLBim(IM,MD,MB,MS,MI) (((IM)==1) ? _O_r_X (0xd0 ,_b100 ,MD,MB,MI,MS ) : \ - _O_r_X_B (0xc0 ,_b100 ,MD,MB,MI,MS ,_u8(IM) ) ) -#define SHLBrr(RS,RD) (((RS)==_CL) ? _O_Mrm (0xd2 ,_b11,_b100,_r1(RD) ) : \ - JITFAIL ("source register must be CL" ) ) -#define SHLBrm(RS,MD,MB,MS,MI) (((RS)==_CL) ? _O_r_X (0xd2 ,_b100 ,MD,MB,MI,MS ) : \ - JITFAIL ("source register must be CL" ) ) - -#define SHLWir(IM,RD) (((IM)==1) ? _wO_Mrm (0xd1 ,_b11,_b100,_r2(RD) ) : \ - _wO_Mrm_B (0xc1 ,_b11,_b100,_r2(RD) ,_u8(IM) ) ) -#define SHLWim(IM,MD,MB,MS,MI) (((IM)==1) ? _wO_r_X (0xd1 ,_b100 ,MD,MB,MI,MS ) : \ - _wO_r_X_B (0xc1 ,_b100 ,MD,MB,MI,MS ,_u8(IM) ) ) -#define SHLWrr(RS,RD) (((RS)==_CL) ? _wO_Mrm (0xd3 ,_b11,_b100,_r2(RD) ) : \ - JITFAIL ("source register must be CL" ) ) -#define SHLWrm(RS,MD,MB,MS,MI) (((RS)==_CL) ? _wO_r_X (0xd3 ,_b100 ,MD,MB,MI,MS ) : \ - JITFAIL ("source register must be CL" ) ) - -#define SHLLir(IM,RD) (((IM)==1) ? _O_Mrm (0xd1 ,_b11,_b100,_r4(RD) ) : \ - _O_Mrm_B (0xc1 ,_b11,_b100,_r4(RD) ,_u8(IM) ) ) -#define SHLLim(IM,MD,MB,MS,MI) (((IM)==1) ? _O_r_X (0xd1 ,_b100 ,MD,MB,MI,MS ) : \ - _O_r_X_B (0xc1 ,_b100 ,MD,MB,MI,MS ,_u8(IM) ) ) -#define SHLLrr(RS,RD) (((RS)==_CL) ? _O_Mrm (0xd3 ,_b11,_b100,_r4(RD) ) : \ - JITFAIL ("source register must be CL" ) ) -#define SHLLrm(RS,MD,MB,MS,MI) (((RS)==_CL) ? _O_r_X (0xd3 ,_b100 ,MD,MB,MI,MS ) : \ - JITFAIL ("source register must be CL" ) ) - - -#define SHRBir(IM,RD) (((IM)==1) ? _O_Mrm (0xd0 ,_b11,_b101,_r1(RD) ) : \ - _O_Mrm_B (0xc0 ,_b11,_b101,_r1(RD) ,_u8(IM) ) ) -#define SHRBim(IM,MD,MB,MS,MI) (((IM)==1) ? _O_r_X (0xd0 ,_b101 ,MD,MB,MI,MS ) : \ - _O_r_X_B (0xc0 ,_b101 ,MD,MB,MI,MS ,_u8(IM) ) ) -#define SHRBrr(RS,RD) (((RS)==_CL) ? _O_Mrm (0xd2 ,_b11,_b101,_r1(RD) ) : \ - JITFAIL ("source register must be CL" ) ) -#define SHRBrm(RS,MD,MB,MS,MI) (((RS)==_CL) ? _O_r_X (0xd2 ,_b101 ,MD,MB,MI,MS ) : \ - JITFAIL ("source register must be CL" ) ) - -#define SHRWir(IM,RD) (((IM)==1) ? _wO_Mrm (0xd1 ,_b11,_b101,_r2(RD) ) : \ - _wO_Mrm_B (0xc1 ,_b11,_b101,_r2(RD) ,_u8(IM) ) ) -#define SHRWim(IM,MD,MB,MS,MI) (((IM)==1) ? _wO_r_X (0xd1 ,_b101 ,MD,MB,MI,MS ) : \ - _wO_r_X_B (0xc1 ,_b101 ,MD,MB,MI,MS ,_u8(IM) ) ) -#define SHRWrr(RS,RD) (((RS)==_CL) ? _wO_Mrm (0xd3 ,_b11,_b101,_r2(RD) ) : \ - JITFAIL ("source register must be CL" ) ) -#define SHRWrm(RS,MD,MB,MS,MI) (((RS)==_CL) ? _wO_r_X (0xd3 ,_b101 ,MD,MB,MI,MS ) : \ - JITFAIL ("source register must be CL" ) ) - -#define SHRLir(IM,RD) (((IM)==1) ? _O_Mrm (0xd1 ,_b11,_b101,_r4(RD) ) : \ - _O_Mrm_B (0xc1 ,_b11,_b101,_r4(RD) ,_u8(IM) ) ) -#define SHRLim(IM,MD,MB,MS,MI) (((IM)==1) ? _O_r_X (0xd1 ,_b101 ,MD,MB,MI,MS ) : \ - _O_r_X_B (0xc1 ,_b101 ,MD,MB,MI,MS ,_u8(IM) ) ) -#define SHRLrr(RS,RD) (((RS)==_CL) ? _O_Mrm (0xd3 ,_b11,_b101,_r4(RD) ) : \ - JITFAIL ("source register must be CL" ) ) -#define SHRLrm(RS,MD,MB,MS,MI) (((RS)==_CL) ? _O_r_X (0xd3 ,_b101 ,MD,MB,MI,MS ) : \ - JITFAIL ("source register must be CL" ) ) - - -#define STC_() _O (0xf9 ) - - -#define SUBBrr(RS, RD) _O_Mrm (0x28 ,_b11,_r1(RS),_r1(RD) ) -#define SUBBmr(MD, MB, MI, MS, RD) _O_r_X (0x2a ,_r1(RD) ,MD,MB,MI,MS ) -#define SUBBrm(RS, MD, MB, MI, MS) _O_r_X (0x28 ,_r1(RS) ,MD,MB,MI,MS ) -#define SUBBir(IM, RD) _O_Mrm_B (0x80 ,_b11,_b101 ,_r1(RD) ,_su8(IM)) -#define SUBBim(IM, MD, MB, MI, MS) _O_r_X_B (0x80 ,_b101 ,MD,MB,MI,MS ,_su8(IM)) - -#define SUBWrr(RS, RD) _wO_Mrm (0x29 ,_b11,_r2(RS),_r2(RD) ) -#define SUBWmr(MD, MB, MI, MS, RD) _wO_r_X (0x2b ,_r2(RD) ,MD,MB,MI,MS ) -#define SUBWrm(RS, MD, MB, MI, MS) _wO_r_X (0x29 ,_r2(RS) ,MD,MB,MI,MS ) -#define SUBWir(IM, RD) _wOs_Mrm_sW (0x81 ,_b11,_b101 ,_r2(RD) ,_su16(IM)) -#define SUBWim(IM, MD, MB, MI, MS) _wOs_r_X_sW (0x81 ,_b101 ,MD,MB,MI,MS ,_su16(IM)) - -#define SUBLrr(RS, RD) _O_Mrm (0x29 ,_b11,_r4(RS),_r4(RD) ) -#define SUBLmr(MD, MB, MI, MS, RD) _O_r_X (0x2b ,_r4(RD) ,MD,MB,MI,MS ) -#define SUBLrm(RS, MD, MB, MI, MS) _O_r_X (0x29 ,_r4(RS) ,MD,MB,MI,MS ) -#define SUBLir(IM, RD) _Os_Mrm_sL (0x81 ,_b11,_b101 ,_r4(RD) ,IM ) -#define SUBLim(IM, MD, MB, MI, MS) _Os_r_X_sL (0x81 ,_b101 ,MD,MB,MI,MS ,IM ) - - -#define TESTBrr(RS, RD) _O_Mrm (0x84 ,_b11,_r1(RS),_r1(RD) ) -#define TESTBrm(RS, MD, MB, MI, MS) _O_r_X (0x84 ,_r1(RS) ,MD,MB,MI,MS ) -#define TESTBir(IM, RD) _O_Mrm_B (0xf6 ,_b11,_b000 ,_r1(RD) ,_u8(IM)) -#define TESTBim(IM, MD, MB, MI, MS) _O_r_X_B (0xf6 ,_b000 ,MD,MB,MI,MS ,_u8(IM)) - -#define TESTWrr(RS, RD) _wO_Mrm (0x85 ,_b11,_r2(RS),_r2(RD) ) -#define TESTWrm(RS, MD, MB, MI, MS) _wO_r_X (0x85 ,_r2(RS) ,MD,MB,MI,MS ) -#define TESTWir(IM, RD) _wO_Mrm_W (0xf7 ,_b11,_b000 ,_r2(RD) ,_u16(IM)) -#define TESTWim(IM, MD, MB, MI, MS) _wO_r_X_W (0xf7 ,_b000 ,MD,MB,MI,MS ,_u16(IM)) - -#define TESTLrr(RS, RD) _O_Mrm (0x85 ,_b11,_r4(RS),_r4(RD) ) -#define TESTLrm(RS, MD, MB, MI, MS) _O_r_X (0x85 ,_r4(RS) ,MD,MB,MI,MS ) -#define TESTLir(IM, RD) _O_Mrm_L (0xf7 ,_b11,_b000 ,_r4(RD) ,IM ) -#define TESTLim(IM, MD, MB, MI, MS) _O_r_X_L (0xf7 ,_b000 ,MD,MB,MI,MS ,IM ) - - -#define XADDBrr(RS,RD) _OO_Mrm (0x0fc0 ,_b11,_r1(RS),_r1(RD) ) -#define XADDBrm(RS,MD,MB,MI,MS) _OO_r_X (0x0fc0 ,_r1(RS) ,MD,MB,MI,MS ) - -#define XADDWrr(RS,RD) _wOO_Mrm (0x0fc1 ,_b11,_r2(RS),_r2(RD) ) -#define XADDWrm(RS,MD,MB,MI,MS) _wOO_r_X (0x0fc1 ,_r2(RS) ,MD,MB,MI,MS ) - -#define XADDLrr(RS,RD) _OO_Mrm (0x0fc1 ,_b11,_r4(RS),_r4(RD) ) -#define XADDLrm(RS,MD,MB,MI,MS) _OO_r_X (0x0fc1 ,_r4(RS) ,MD,MB,MI,MS ) - - -#define XCHGBrr(RS,RD) _O_Mrm (0x86 ,_b11,_r1(RS),_r1(RD) ) -#define XCHGBrm(RS,MD,MB,MI,MS) _O_r_X (0x86 ,_r1(RS) ,MD,MB,MI,MS ) - -#define XCHGWrr(RS,RD) _wO_Mrm (0x87 ,_b11,_r2(RS),_r2(RD) ) -#define XCHGWrm(RS,MD,MB,MI,MS) _wO_r_X (0x87 ,_r2(RS) ,MD,MB,MI,MS ) - -#define XCHGLrr(RS,RD) _O_Mrm (0x87 ,_b11,_r4(RS),_r4(RD) ) -#define XCHGLrm(RS,MD,MB,MI,MS) _O_r_X (0x87 ,_r4(RS) ,MD,MB,MI,MS ) - - -#define XORBrr(RS, RD) _O_Mrm (0x30 ,_b11,_r1(RS),_r1(RD) ) -#define XORBmr(MD, MB, MI, MS, RD) _O_r_X (0x32 ,_r1(RD) ,MD,MB,MI,MS ) -#define XORBrm(RS, MD, MB, MI, MS) _O_r_X (0x30 ,_r1(RS) ,MD,MB,MI,MS ) -#define XORBir(IM, RD) _O_Mrm_B (0x80 ,_b11,_b110 ,_r1(RD) ,_su8(IM)) -#define XORBim(IM, MD, MB, MI, MS) _O_r_X_B (0x80 ,_b110 ,MD,MB,MI,MS ,_su8(IM)) - -#define XORWrr(RS, RD) _wO_Mrm (0x31 ,_b11,_r2(RS),_r2(RD) ) -#define XORWmr(MD, MB, MI, MS, RD) _wO_r_X (0x33 ,_r2(RD) ,MD,MB,MI,MS ) -#define XORWrm(RS, MD, MB, MI, MS) _wO_r_X (0x31 ,_r2(RS) ,MD,MB,MI,MS ) -#define XORWir(IM, RD) _wOs_Mrm_sW (0x81 ,_b11,_b110 ,_r2(RD) ,_su16(IM)) -#define XORWim(IM, MD, MB, MI, MS) _wOs_r_X_sW (0x81 ,_b110 ,MD,MB,MI,MS ,_su16(IM)) +#define LEAVE_() _O (0xc9 ) +#define RET_() _O (0xc3 ) +#define RETi(IM) _O_W (0xc2 ,_su16(IM)) -#define XORLrr(RS, RD) _O_Mrm (0x31 ,_b11,_r4(RS),_r4(RD) ) -#define XORLmr(MD, MB, MI, MS, RD) _O_r_X (0x33 ,_r4(RD) ,MD,MB,MI,MS ) -#define XORLrm(RS, MD, MB, MI, MS) _O_r_X (0x31 ,_r4(RS) ,MD,MB,MI,MS ) -#define XORLir(IM, RD) _Os_Mrm_sL (0x81 ,_b11,_b110 ,_r4(RD) ,IM ) -#define XORLim(IM, MD, MB, MI, MS) _Os_r_X_sL (0x81 ,_b110 ,MD,MB,MI,MS ,IM ) +#define NOP_() _O (0x90 ) /* x87 instructions -- yay, we found a use for octal constants :-) */ -#define ESCmi(D,B,I,S,OP) _O_r_X(0xd8|(OP >> 3), (OP & 7), D,B,I,S) +#define ESCmi(D,B,I,S,OP) (_REXLrm(0,B,I), _O_r_X(0xd8|(OP >> 3), (OP & 7), D,B,I,S)) #define ESCri(RD,OP) _O_Mrm(0xd8|(OP >> 3), _b11, (OP & 7), RD) #define ESCrri(RS,RD,OP) ((RS) == _ST0 ? ESCri(RD,(OP|040)) \ @@ -1037,9 +1294,9 @@ typedef _uc jit_insn; #define FNSTSWr(RD) ((RD == _AX || RD == _EAX) ? _OO (0xdfe0) \ : JITFAIL ("AX or EAX expected")) /* N byte NOPs */ -#define NOPi(N) ((( (N) >= 8) ? (_jit_B(0x8d),_jit_B(0xb4),_jit_B(0x26),_jit_L(0x00),_jit_B(0x90)) : (void) 0), \ - (( ((N)&7) == 7) ? (_jit_B(0x8d),_jit_B(0xb4),_jit_B(0x26),_jit_L(0x00)) : \ - ( ((N)&7) == 6) ? (_jit_B(0x8d),_jit_B(0xb6),_jit_L(0x00)) : \ +#define NOPi(N) ((( (N) >= 8) ? (_jit_B(0x8d),_jit_B(0xb4),_jit_B(0x26),_jit_I(0x00),_jit_B(0x90)) : (void) 0), \ + (( ((N)&7) == 7) ? (_jit_B(0x8d),_jit_B(0xb4),_jit_B(0x26),_jit_I(0x00)) : \ + ( ((N)&7) == 6) ? (_jit_B(0x8d),_jit_B(0xb6),_jit_I(0x00)) : \ ( ((N)&7) == 5) ? (_jit_B(0x90),_jit_B(0x8d),_jit_B(0x74),_jit_B(0x26),_jit_B(0x00)) : \ /* leal 0(,%esi), %esi */ ( ((N)&7) == 4) ? (_jit_B(0x8d),_jit_B(0x74),_jit_B(0x26),_jit_B(0x00)) : \ /* leal (,%esi), %esi */ ( ((N)&7) == 3) ? (_jit_B(0x8d),_jit_B(0x76),_jit_B(0x00)) : \ @@ -1049,6 +1306,286 @@ typedef _uc jit_insn; JITFAIL(".align argument too large"))) +/* --- Media 128-bit instructions ------------------------------------------ */ + +enum { + X86_SSE_MOV = 0x10, + X86_SSE_MOVLP = 0x12, + X86_SSE_MOVHP = 0x16, + X86_SSE_MOVA = 0x28, + X86_SSE_CVTIS = 0x2a, + X86_SSE_CVTTSI = 0x2c, + X86_SSE_CVTSI = 0x2d, + X86_SSE_UCOMI = 0x2e, + X86_SSE_COMI = 0x2f, + X86_SSE_SQRT = 0x51, + X86_SSE_RSQRT = 0x52, + X86_SSE_RCP = 0x53, + X86_SSE_AND = 0x54, + X86_SSE_ANDN = 0x55, + X86_SSE_OR = 0x56, + X86_SSE_XOR = 0x57, + X86_SSE_ADD = 0x58, + X86_SSE_MUL = 0x59, + X86_SSE_CVTSD = 0x5a, + X86_SSE_CVTDT = 0x5b, + X86_SSE_SUB = 0x5c, + X86_SSE_MIN = 0x5d, + X86_SSE_DIV = 0x5e, + X86_SSE_MAX = 0x5f, + X86_SSE_MOV2 = 0xd6 +}; + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ + +#define __SSELrr(OP,RS,RSA,RD,RDA) (_REXLrr(RD, RS), _OO_Mrm (0x0f00|(OP) ,_b11,RDA(RD),RSA(RS) )) +#define __SSELmr(OP,MD,MB,MI,MS,RD,RDA) (_REXLmr(MB, MI, RD), _OO_r_X (0x0f00|(OP) ,RDA(RD) ,MD,MB,MI,MS )) +#define __SSELrm(OP,RS,RSA,MD,MB,MI,MS) (_REXLrm(RS, MB, MI), _OO_r_X (0x0f00|(OP) ,RSA(RS) ,MD,MB,MI,MS )) +#define __SSEL1rm(OP,RS,RSA,MD,MB,MI,MS) (_REXLrm(RS, MB, MI), _OO_r_X (0x0f01|(OP) ,RSA(RS) ,MD,MB,MI,MS )) + +#define _SSELrr(PX,OP,RS,RSA,RD,RDA) (_jit_B(PX), __SSELrr(OP, RS, RSA, RD, RDA)) +#define _SSELmr(PX,OP,MD,MB,MI,MS,RD,RDA) (_jit_B(PX), __SSELmr(OP, MD, MB, MI, MS, RD, RDA)) +#define _SSELrm(PX,OP,RS,RSA,MD,MB,MI,MS) (_jit_B(PX), __SSELrm(OP, RS, RSA, MD, MB, MI, MS)) +#define _SSEL1rm(PX,OP,RS,RSA,MD,MB,MI,MS) (_jit_B(PX), __SSEL1rm(OP, RS, RSA, MD, MB, MI, MS)) + +#define _SSEPSrr(OP,RS,RD) __SSELrr ( OP, RS,_rX, RD,_rX) +#define _SSEPSmr(OP,MD,MB,MI,MS,RD) __SSELmr ( OP, MD, MB, MI, MS, RD,_rX) +#define _SSEPSrm(OP,RS,MD,MB,MI,MS) __SSELrm ( OP, RS,_rX, MD, MB, MI, MS) +#define _SSEPS1rm(OP,RS,MD,MB,MI,MS) __SSEL1rm( OP, RS,_rX, MD, MB, MI, MS) + +#define _SSEPDrr(OP,RS,RD) _SSELrr (0x66, OP, RS,_rX, RD,_rX) +#define _SSEPDmr(OP,MD,MB,MI,MS,RD) _SSELmr (0x66, OP, MD, MB, MI, MS, RD,_rX) +#define _SSEPDrm(OP,RS,MD,MB,MI,MS) _SSELrm (0x66, OP, RS,_rX, MD, MB, MI, MS) +#define _SSEPD1rm(OP,RS,MD,MB,MI,MS) _SSEL1rm(0x66, OP, RS,_rX, MD, MB, MI, MS) + +#define _SSESSrr(OP,RS,RD) _SSELrr (0xf3, OP, RS,_rX, RD,_rX) +#define _SSESSmr(OP,MD,MB,MI,MS,RD) _SSELmr (0xf3, OP, MD, MB, MI, MS, RD,_rX) +#define _SSESSrm(OP,RS,MD,MB,MI,MS) _SSELrm (0xf3, OP, RS,_rX, MD, MB, MI, MS) +#define _SSESS1rm(OP,RS,MD,MB,MI,MS) _SSEL1rm(0xf3, OP, RS,_rX, MD, MB, MI, MS) + +#define _SSESDrr(OP,RS,RD) _SSELrr (0xf2, OP, RS,_rX, RD,_rX) +#define _SSESDmr(OP,MD,MB,MI,MS,RD) _SSELmr (0xf2, OP, MD, MB, MI, MS, RD,_rX) +#define _SSESDrm(OP,RS,MD,MB,MI,MS) _SSELrm (0xf2, OP, RS,_rX, MD, MB, MI, MS) +#define _SSESD1rm(OP,RS,MD,MB,MI,MS) _SSEL1rm(0xf2, OP, RS,_rX, MD, MB, MI, MS) + +#define ADDPSrr(RS, RD) _SSEPSrr(X86_SSE_ADD, RS, RD) +#define ADDPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_ADD, MD, MB, MI, MS, RD) +#define ADDPDrr(RS, RD) _SSEPDrr(X86_SSE_ADD, RS, RD) +#define ADDPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_ADD, MD, MB, MI, MS, RD) + +#define ADDSSrr(RS, RD) _SSESSrr(X86_SSE_ADD, RS, RD) +#define ADDSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_ADD, MD, MB, MI, MS, RD) +#define ADDSDrr(RS, RD) _SSESDrr(X86_SSE_ADD, RS, RD) +#define ADDSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_ADD, MD, MB, MI, MS, RD) + +#define ANDNPSrr(RS, RD) _SSEPSrr(X86_SSE_ANDN, RS, RD) +#define ANDNPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_ANDN, MD, MB, MI, MS, RD) +#define ANDNPDrr(RS, RD) _SSEPDrr(X86_SSE_ANDN, RS, RD) +#define ANDNPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_ANDN, MD, MB, MI, MS, RD) + +#define ANDNSSrr ANDNPSrr +#define ANDNSSmr ANDNPSrr +#define ANDNSDrr ANDNPDrr +#define ANDNSDmr ANDNPDrr + +#define ANDPSrr(RS, RD) _SSEPSrr(X86_SSE_AND, RS, RD) +#define ANDPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_AND, MD, MB, MI, MS, RD) +#define ANDPDrr(RS, RD) _SSEPDrr(X86_SSE_AND, RS, RD) +#define ANDPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_AND, MD, MB, MI, MS, RD) + +#define ANDSSrr ANDPSrr +#define ANDSSmr ANDPSrr +#define ANDSDrr ANDPDrr +#define ANDSDmr ANDPDrr + +#define DIVPSrr(RS, RD) _SSEPSrr(X86_SSE_DIV, RS, RD) +#define DIVPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_DIV, MD, MB, MI, MS, RD) +#define DIVPDrr(RS, RD) _SSEPDrr(X86_SSE_DIV, RS, RD) +#define DIVPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_DIV, MD, MB, MI, MS, RD) + +#define DIVSSrr(RS, RD) _SSESSrr(X86_SSE_DIV, RS, RD) +#define DIVSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_DIV, MD, MB, MI, MS, RD) +#define DIVSDrr(RS, RD) _SSESDrr(X86_SSE_DIV, RS, RD) +#define DIVSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_DIV, MD, MB, MI, MS, RD) + +#define MAXPSrr(RS, RD) _SSEPSrr(X86_SSE_MAX, RS, RD) +#define MAXPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_MAX, MD, MB, MI, MS, RD) +#define MAXPDrr(RS, RD) _SSEPDrr(X86_SSE_MAX, RS, RD) +#define MAXPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_MAX, MD, MB, MI, MS, RD) + +#define MAXSSrr(RS, RD) _SSESSrr(X86_SSE_MAX, RS, RD) +#define MAXSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_MAX, MD, MB, MI, MS, RD) +#define MAXSDrr(RS, RD) _SSESDrr(X86_SSE_MAX, RS, RD) +#define MAXSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_MAX, MD, MB, MI, MS, RD) + +#define MINPSrr(RS, RD) _SSEPSrr(X86_SSE_MIN, RS, RD) +#define MINPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_MIN, MD, MB, MI, MS, RD) +#define MINPDrr(RS, RD) _SSEPDrr(X86_SSE_MIN, RS, RD) +#define MINPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_MIN, MD, MB, MI, MS, RD) + +#define MINSSrr(RS, RD) _SSESSrr(X86_SSE_MIN, RS, RD) +#define MINSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_MIN, MD, MB, MI, MS, RD) +#define MINSDrr(RS, RD) _SSESDrr(X86_SSE_MIN, RS, RD) +#define MINSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_MIN, MD, MB, MI, MS, RD) + +#define MULPSrr(RS, RD) _SSEPSrr(X86_SSE_MUL, RS, RD) +#define MULPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_MUL, MD, MB, MI, MS, RD) +#define MULPDrr(RS, RD) _SSEPDrr(X86_SSE_MUL, RS, RD) +#define MULPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_MUL, MD, MB, MI, MS, RD) + +#define MULSSrr(RS, RD) _SSESSrr(X86_SSE_MUL, RS, RD) +#define MULSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_MUL, MD, MB, MI, MS, RD) +#define MULSDrr(RS, RD) _SSESDrr(X86_SSE_MUL, RS, RD) +#define MULSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_MUL, MD, MB, MI, MS, RD) + +#define ORPSrr(RS, RD) _SSEPSrr(X86_SSE_OR, RS, RD) +#define ORPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_OR, MD, MB, MI, MS, RD) +#define ORPDrr(RS, RD) _SSEPDrr(X86_SSE_OR, RS, RD) +#define ORPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_OR, MD, MB, MI, MS, RD) + +#define ORSSrr ORPSrr +#define ORSSmr ORPSrr +#define ORSDrr ORPDrr +#define ORSDmr ORPDrr + +#define RCPPSrr(RS, RD) _SSEPSrr(X86_SSE_RCP, RS, RD) +#define RCPPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_RCP, MD, MB, MI, MS, RD) +#define RCPSSrr(RS, RD) _SSESSrr(X86_SSE_RCP, RS, RD) +#define RCPSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_RCP, MD, MB, MI, MS, RD) + +#define RSQRTPSrr(RS, RD) _SSEPSrr(X86_SSE_RSQRT, RS, RD) +#define RSQRTPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_RSQRT, MD, MB, MI, MS, RD) +#define RSQRTSSrr(RS, RD) _SSESSrr(X86_SSE_RSQRT, RS, RD) +#define RSQRTSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_RSQRT, MD, MB, MI, MS, RD) + +#define SQRTPSrr(RS, RD) _SSEPSrr(X86_SSE_SQRT, RS, RD) +#define SQRTPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_SQRT, MD, MB, MI, MS, RD) +#define SQRTPDrr(RS, RD) _SSEPDrr(X86_SSE_SQRT, RS, RD) +#define SQRTPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_SQRT, MD, MB, MI, MS, RD) + +#define SQRTSSrr(RS, RD) _SSESSrr(X86_SSE_SQRT, RS, RD) +#define SQRTSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_SQRT, MD, MB, MI, MS, RD) +#define SQRTSDrr(RS, RD) _SSESDrr(X86_SSE_SQRT, RS, RD) +#define SQRTSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_SQRT, MD, MB, MI, MS, RD) + +#define SUBPSrr(RS, RD) _SSEPSrr(X86_SSE_SUB, RS, RD) +#define SUBPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_SUB, MD, MB, MI, MS, RD) +#define SUBPDrr(RS, RD) _SSEPDrr(X86_SSE_SUB, RS, RD) +#define SUBPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_SUB, MD, MB, MI, MS, RD) + +#define SUBSSrr(RS, RD) _SSESSrr(X86_SSE_SUB, RS, RD) +#define SUBSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_SUB, MD, MB, MI, MS, RD) +#define SUBSDrr(RS, RD) _SSESDrr(X86_SSE_SUB, RS, RD) +#define SUBSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_SUB, MD, MB, MI, MS, RD) + +#define XORPSrr(RS, RD) _SSEPSrr(X86_SSE_XOR, RS, RD) +#define XORPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_XOR, MD, MB, MI, MS, RD) +#define XORPDrr(RS, RD) _SSEPDrr(X86_SSE_XOR, RS, RD) +#define XORPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_XOR, MD, MB, MI, MS, RD) + +#define XORSSrr XORPSrr +#define XORSSmr XORPSrr +#define XORSDrr XORPDrr +#define XORSDmr XORPDrr + +/* No prefixes here. */ +#define COMISSrr(RS, RD) _SSEPSrr(X86_SSE_COMI, RS, RD) +#define COMISSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_COMI, MD, MB, MI, MS, RD) +#define COMISDrr(RS, RD) _SSEPDrr(X86_SSE_COMI, RS, RD) +#define COMISDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_COMI, MD, MB, MI, MS, RD) + +/* No prefixes here. */ +#define UCOMISSrr(RS, RD) _SSEPSrr(X86_SSE_UCOMI, RS, RD) +#define UCOMISSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_UCOMI, MD, MB, MI, MS, RD) +#define UCOMISDrr(RS, RD) _SSEPDrr(X86_SSE_UCOMI, RS, RD) +#define UCOMISDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_UCOMI, MD, MB, MI, MS, RD) + +#define MOVSSrr(RS, RD) _SSESSrr (X86_SSE_MOV, RS, RD) +#define MOVSSmr(MD, MB, MI, MS, RD) _SSESSmr (X86_SSE_MOV, MD, MB, MI, MS, RD) +#define MOVSSrm(RS, MD, MB, MI, MS) _SSESS1rm(X86_SSE_MOV, RS, MD, MB, MI, MS) + +#define MOVSDrr(RS, RD) _SSESDrr (X86_SSE_MOV, RS, RD) +#define MOVSDmr(MD, MB, MI, MS, RD) _SSESDmr (X86_SSE_MOV, MD, MB, MI, MS, RD) +#define MOVSDrm(RS, MD, MB, MI, MS) _SSESD1rm(X86_SSE_MOV, RS, MD, MB, MI, MS) + +#define MOVAPSrr(RS, RD) _SSEPSrr (X86_SSE_MOVA, RS, RD) +#define MOVAPSmr(MD, MB, MI, MS, RD) _SSEPSmr (X86_SSE_MOVA, MD, MB, MI, MS, RD) +#define MOVAPSrm(RS, MD, MB, MI, MS) _SSEPS1rm(X86_SSE_MOVA, RS, MD, MB, MI, MS) + +#define MOVAPDrr(RS, RD) _SSEPDrr (X86_SSE_MOVA, RS, RD) +#define MOVAPDmr(MD, MB, MI, MS, RD) _SSEPDmr (X86_SSE_MOVA, MD, MB, MI, MS, RD) +#define MOVAPDrm(RS, MD, MB, MI, MS) _SSEPD1rm(X86_SSE_MOVA, RS, MD, MB, MI, MS) + +#define CVTPS2PIrr(RS, RD) __SSELrr( X86_SSE_CVTSI, RS,_rX, RD,_rM) +#define CVTPS2PImr(MD, MB, MI, MS, RD) __SSELmr( X86_SSE_CVTSI, MD, MB, MI, MS, RD,_rM) +#define CVTPD2PIrr(RS, RD) _SSELrr(0x66, X86_SSE_CVTSI, RS,_rX, RD,_rM) +#define CVTPD2PImr(MD, MB, MI, MS, RD) _SSELmr(0x66, X86_SSE_CVTSI, MD, MB, MI, MS, RD,_rM) + +#define CVTPI2PSrr(RS, RD) __SSELrr( X86_SSE_CVTIS, RS,_rM, RD,_rX) +#define CVTPI2PSmr(MD, MB, MI, MS, RD) __SSELmr( X86_SSE_CVTIS, MD, MB, MI, MS, RD,_rX) +#define CVTPI2PDrr(RS, RD) _SSELrr(0x66, X86_SSE_CVTIS, RS,_rM, RD,_rX) +#define CVTPI2PDmr(MD, MB, MI, MS, RD) _SSELmr(0x66, X86_SSE_CVTIS, MD, MB, MI, MS, RD,_rX) + +#define CVTPS2PDrr(RS, RD) __SSELrr( X86_SSE_CVTSD, RS,_rX, RD,_rX) +#define CVTPS2PDmr(MD, MB, MI, MS, RD) __SSELmr( X86_SSE_CVTSD, MD, MB, MI, MS, RD,_rX) +#define CVTPD2PSrr(RS, RD) _SSELrr(0x66, X86_SSE_CVTSD, RS,_rX, RD,_rX) +#define CVTPD2PSmr(MD, MB, MI, MS, RD) _SSELmr(0x66, X86_SSE_CVTSD, MD, MB, MI, MS, RD,_rX) + +#define CVTSS2SDrr(RS, RD) _SSELrr(0xf3, X86_SSE_CVTSD, RS,_rX, RD,_rX) +#define CVTSS2SDmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, X86_SSE_CVTSD, MD, MB, MI, MS, RD,_rX) +#define CVTSD2SSrr(RS, RD) _SSELrr(0xf2, X86_SSE_CVTSD, RS,_rX, RD,_rX) +#define CVTSD2SSmr(MD, MB, MI, MS, RD) _SSELmr(0xf2, X86_SSE_CVTSD, MD, MB, MI, MS, RD,_rX) + +#define CVTTSS2SILrr(RS, RD) _SSELrr(0xf3, X86_SSE_CVTTSI, RS,_rX, RD,_r4) +#define CVTTSS2SILmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, X86_SSE_CVTTSI, MD, MB, MI, MS, RD,_r4) +#define CVTTSD2SILrr(RS, RD) _SSELrr(0xf2, X86_SSE_CVTTSI, RS,_rX, RD,_r4) +#define CVTTSD2SILmr(MD, MB, MI, MS, RD) _SSELmr(0xf2, X86_SSE_CVTTSI, MD, MB, MI, MS, RD,_r4) + +#define CVTSS2SILrr(RS, RD) _SSELrr(0xf3, X86_SSE_CVTSI, RS,_rX, RD,_r4) +#define CVTSS2SILmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, X86_SSE_CVTSI, MD, MB, MI, MS, RD,_r4) +#define CVTSD2SILrr(RS, RD) _SSELrr(0xf2, X86_SSE_CVTSI, RS,_rX, RD,_r4) +#define CVTSD2SILmr(MD, MB, MI, MS, RD) _SSELmr(0xf2, X86_SSE_CVTSI, MD, MB, MI, MS, RD,_r4) + +#define CVTSI2SSLrr(RS, RD) _SSELrr(0xf3, X86_SSE_CVTIS, RS,_r4, RD,_rX) +#define CVTSI2SSLmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, X86_SSE_CVTIS, MD, MB, MI, MS, RD,_rX) +#define CVTSI2SDLrr(RS, RD) _SSELrr(0xf2, X86_SSE_CVTIS, RS,_r4, RD,_rX) +#define CVTSI2SDLmr(MD, MB, MI, MS, RD) _SSELmr(0xf2, X86_SSE_CVTIS, MD, MB, MI, MS, RD,_rX) + +#define MOVDLXrr(RS, RD) _SSELrr(0x66, 0x6e, RS,_r4, RD,_rX) +#define MOVDLXmr(MD, MB, MI, MS, RD) _SSELmr(0x66, 0x6e, MD, MB, MI, MS, RD,_rX) + +#define MOVDXLrr(RS, RD) _SSELrr(0x66, 0x7e, RS,_rX, RD,_r4) +#define MOVDXLrm(RS, MD, MB, MI, MS) _SSELrm(0x66, 0x7e, RS,_rX, MD, MB, MI, MS) + +#define MOVDLMrr(RS, RD) __SSELrr( 0x6e, RS,_r4, RD,_rM) +#define MOVDLMmr(MD, MB, MI, MS, RD) __SSELmr( 0x6e, MD, MB, MI, MS, RD,_rM) + +#define MOVDMLrr(RS, RD) __SSELrr( 0x7e, RS,_rM, RD,_r4) +#define MOVDMLrm(RS, MD, MB, MI, MS) __SSELrm( 0x7e, RS,_rM, MD, MB, MI, MS) + +#define MOVDQ2Qrr(RS, RD) _SSELrr(0xf2, X86_SSE_MOV2, RS,_rX, RD,_rM) +#define MOVQ2DQrr(RS, RD) _SSELrr(0xf3, X86_SSE_MOV2, RS,_rM, RD,_rX) +#define MOVHLPSrr(RS, RD) __SSELrr( X86_SSE_MOVLP, RS,_rX, RD,_rX) +#define MOVLHPSrr(RS, RD) __SSELrr( X86_SSE_MOVHP, RS,_rX, RD,_rX) + +#define MOVDQArr(RS, RD) _SSELrr(0x66, 0x6f, RS,_rX, RD,_rX) +#define MOVDQAmr(MD, MB, MI, MS, RD) _SSELmr(0x66, 0x6f, MD, MB, MI, MS, RD,_rX) +#define MOVDQArm(RS, MD, MB, MI, MS) _SSELrm(0x66, 0x7f, RS,_rX, MD, MB, MI, MS) + +#define MOVDQUrr(RS, RD) _SSELrr(0xf3, 0x6f, RS,_rX, RD,_rX) +#define MOVDQUmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, 0x6f, MD, MB, MI, MS, RD,_rX) +#define MOVDQUrm(RS, MD, MB, MI, MS) _SSELrm(0xf3, 0x7f, RS,_rX, MD, MB, MI, MS) + +#define MOVHPDmr(MD, MB, MI, MS, RD) _SSELmr (0x66, X86_SSE_MOVHP, MD, MB, MI, MS, RD,_rX) +#define MOVHPDrm(RS, MD, MB, MI, MS) _SSEL1rm(0x66, X86_SSE_MOVHP, RS,_rX, MD, MB, MI, MS) +#define MOVHPSmr(MD, MB, MI, MS, RD) __SSELmr ( X86_SSE_MOVHP, MD, MB, MI, MS, RD,_rX) +#define MOVHPSrm(RS, MD, MB, MI, MS) __SSEL1rm( X86_SSE_MOVHP, RS,_rX, MD, MB, MI, MS) + +#define MOVLPDmr(MD, MB, MI, MS, RD) _SSELmr (0x66, X86_SSE_MOVLP, MD, MB, MI, MS, RD,_rX) +#define MOVLPDrm(RS, MD, MB, MI, MS) _SSEL1rm(0x66, X86_SSE_MOVLP, RS,_rX, MD, MB, MI, MS) +#define MOVLPSmr(MD, MB, MI, MS, RD) __SSELmr ( X86_SSE_MOVLP, MD, MB, MI, MS, RD,_rX) +#define MOVLPSrm(RS, MD, MB, MI, MS) __SSEL1rm( X86_SSE_MOVLP, RS,_rX, MD, MB, MI, MS) + /*** References: */ /* */ /* [1] "Intel Architecture Software Developer's Manual Volume 1: Basic Architecture", */ @@ -1057,6 +1594,13 @@ typedef _uc jit_insn; /* [2] "Intel Architecture Software Developer's Manual Volume 2: Instruction Set Reference", */ /* Intel Corporation 1997. */ +#if LIGHTNING_CROSS \ + ? LIGHTNING_TARGET == LIGHTNING_X86_64 \ + : defined (__x86_64__) +#include "i386/asm-64.h" +#else +#include "i386/asm-32.h" #endif -#endif /* __lightning_asm_h */ +#endif +#endif /* __lightning_asm_i386_h */ diff --git a/src/runtime/c/pgf/lightning/i386/core-32.h b/src/runtime/c/pgf/lightning/i386/core-32.h new file mode 100644 index 000000000..48117ddb9 --- /dev/null +++ b/src/runtime/c/pgf/lightning/i386/core-32.h @@ -0,0 +1,174 @@ +/******************************** -*- C -*- **************************** + * + * Platform-independent layer (i386 version) + * + ***********************************************************************/ + + +/*********************************************************************** + * + * Copyright 2000, 2001, 2002, 2003, 2006 Free Software Foundation, Inc. + * Written by Paolo Bonzini and Matthew Flatt. + * + * This file is part of GNU lightning. + * + * GNU lightning is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU lightning is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with GNU lightning; see the file COPYING.LESSER; if not, write to the + * Free Software Foundation, 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + ***********************************************************************/ + + + +#ifndef __lightning_core_h +#define __lightning_core_h + +#define JIT_CAN_16 1 +#define JIT_AP _EBP + +#define JIT_R_NUM 3 +#define JIT_R(i) (_EAX + (i)) +#define JIT_V_NUM 3 +#define JIT_V(i) ((i) == 0 ? _EBX : _ESI + (i) - 1) + +struct jit_local_state { + int framesize; + int argssize; + int alloca_offset; + int alloca_slack; + jit_insn *finish_ref; +}; + +/* Whether a register is used for the user-accessible registers. */ +#define jit_save(reg) 1 + +#define jit_base_prolog() (_jitl.framesize = 20, _jitl.alloca_offset = _jitl.alloca_slack = 0, \ + PUSHLr(_EBX), PUSHLr(_ESI), PUSHLr(_EDI), PUSHLr(_EBP), MOVLrr(_ESP, _EBP)) +#define jit_base_ret(ofs) \ + (((ofs) < 0 ? LEAVE_() : POPLr(_EBP)), \ + POPLr(_EDI), POPLr(_ESI), POPLr(_EBX), RET_()) + +/* Used internally. SLACK is used by the Darwin ABI which keeps the stack + aligned to 16-bytes. */ + +#define jit_allocai_internal(amount, slack) \ + (((amount) < _jitl.alloca_slack \ + ? (void)0 \ + : (void)(_jitl.alloca_slack += (amount) + (slack), \ + ((amount) + (slack) == sizeof (int) \ + ? PUSHLr(_EAX) \ + : SUBLir((amount) + (slack), _ESP)))), \ + _jitl.alloca_slack -= (amount), \ + _jitl.alloca_offset -= (amount)) + +/* Stack */ +#define jit_pushr_i(rs) PUSHLr(rs) +#define jit_popr_i(rs) POPLr(rs) + +/* The += in argssize allows for stack pollution */ + +#ifdef __APPLE__ +/* Stack must stay 16-byte aligned: */ +# define jit_prepare_i(ni) (((ni & 0x3) \ + ? (void)SUBLir(4 * ((((ni) + 3) & ~(0x3)) - (ni)), JIT_SP) \ + : (void)0), \ + _jitl.argssize += (((ni) + 3) & ~(0x3))) + +#define jit_allocai(n) \ + jit_allocai_internal ((n), (_jitl.alloca_slack - (n)) & 15) + +#define jit_prolog(n) (jit_base_prolog(), jit_subi_i (JIT_SP, JIT_SP, 12)) +#define jit_ret() jit_base_ret (-12) + +#else +# define jit_prepare_i(ni) (_jitl.argssize += (ni)) + +#define jit_allocai(n) \ + jit_allocai_internal ((n), 0) + +#define jit_prolog(n) jit_base_prolog() +#define jit_ret() jit_base_ret (_jitl.alloca_offset) +#endif + +#define jit_calli(label) (CALLm( ((unsigned long) (label))), _jit.x.pc) +#define jit_callr(reg) CALLsr(reg) + +#define jit_pusharg_i(rs) PUSHLr(rs) +#define jit_finish(sub) (_jitl.finish_ref = jit_calli((sub)), ADDLir(sizeof(long) * _jitl.argssize, JIT_SP), _jitl.argssize = 0, _jitl.finish_ref) +#define jit_finishr(reg) (jit_callr((reg)), ) + +#define jit_arg_c() ((_jitl.framesize += sizeof(int)) - sizeof(int)) +#define jit_arg_uc() ((_jitl.framesize += sizeof(int)) - sizeof(int)) +#define jit_arg_s() ((_jitl.framesize += sizeof(int)) - sizeof(int)) +#define jit_arg_us() ((_jitl.framesize += sizeof(int)) - sizeof(int)) +#define jit_arg_i() ((_jitl.framesize += sizeof(int)) - sizeof(int)) +#define jit_arg_ui() ((_jitl.framesize += sizeof(int)) - sizeof(int)) +#define jit_arg_l() ((_jitl.framesize += sizeof(long)) - sizeof(long)) +#define jit_arg_ul() ((_jitl.framesize += sizeof(long)) - sizeof(long)) +#define jit_arg_p() ((_jitl.framesize += sizeof(long)) - sizeof(long)) + +#define jit_movi_p(d, is) (MOVLir (((long)(is)), (d)), _jit.x.pc) +#define jit_patch_long_at(jump_pc,v) (*_PSL((jump_pc) - sizeof(long)) = _jit_SL((jit_insn *)(v) - (jump_pc))) +#define jit_patch_at(jump_pc,v) jit_patch_long_at(jump_pc, v) + +/* Memory */ +#define jit_replace(s, rep, op) \ + (jit_pushr_i(rep), \ + MOVLrr((s), (rep)), \ + op, jit_popr_i(rep)) + +#define jit_movbrm(rs, dd, db, di, ds) \ + (jit_check8(rs) \ + ? MOVBrm(jit_reg8(rs), dd, db, di, ds) \ + : jit_replace(rs, \ + ((dd != _EAX && db != _EAX && di != _EAX) ? _EAX : \ + ((dd != _ECX && db != _ECX && di != _ECX) ? _ECX : _EDX)), \ + MOVBrm(((dd != _EAX && db != _EAX && di != _EAX) ? _AL : \ + ((dd != _ECX && db != _ECX && di != _ECX) ? _CL : _DL)), \ + dd, db, di, ds))) + +#define jit_ldr_c(d, rs) MOVSBLmr(0, (rs), 0, 0, (d)) +#define jit_ldxr_c(d, s1, s2) MOVSBLmr(0, (s1), (s2), 1, (d)) + +#define jit_ldr_s(d, rs) MOVSWLmr(0, (rs), 0, 0, (d)) +#define jit_ldxr_s(d, s1, s2) MOVSWLmr(0, (s1), (s2), 1, (d)) + +#define jit_ldi_c(d, is) MOVSBLmr((is), 0, 0, 0, (d)) +#define jit_ldxi_c(d, rs, is) MOVSBLmr((is), (rs), 0, 0, (d)) + +#define jit_ldi_uc(d, is) MOVZBLmr((is), 0, 0, 0, (d)) +#define jit_ldxi_uc(d, rs, is) MOVZBLmr((is), (rs), 0, 0, (d)) + +#define jit_sti_c(id, rs) jit_movbrm((rs), (id), 0, 0, 0) +#define jit_stxi_c(id, rd, rs) jit_movbrm((rs), (id), (rd), 0, 0) + +#define jit_ldi_s(d, is) MOVSWLmr((is), 0, 0, 0, (d)) +#define jit_ldxi_s(d, rs, is) MOVSWLmr((is), (rs), 0, 0, (d)) + +#define jit_ldi_us(d, is) MOVZWLmr((is), 0, 0, 0, (d)) +#define jit_ldxi_us(d, rs, is) MOVZWLmr((is), (rs), 0, 0, (d)) + +#define jit_sti_s(id, rs) MOVWrm(jit_reg16(rs), (id), 0, 0, 0) +#define jit_stxi_s(id, rd, rs) MOVWrm(jit_reg16(rs), (id), (rd), 0, 0) + +#define jit_ldi_i(d, is) MOVLmr((is), 0, 0, 0, (d)) +#define jit_ldxi_i(d, rs, is) MOVLmr((is), (rs), 0, 0, (d)) + +#define jit_ldr_i(d, rs) MOVLmr(0, (rs), 0, 0, (d)) +#define jit_ldxr_i(d, s1, s2) MOVLmr(0, (s1), (s2), 1, (d)) + +#define jit_sti_i(id, rs) MOVLrm((rs), (id), 0, 0, 0) +#define jit_stxi_i(id, rd, rs) MOVLrm((rs), (id), (rd), 0, 0) + +#endif /* __lightning_core_h */ diff --git a/src/runtime/c/pgf/lightning/i386/core-64.h b/src/runtime/c/pgf/lightning/i386/core-64.h new file mode 100644 index 000000000..46f2daf02 --- /dev/null +++ b/src/runtime/c/pgf/lightning/i386/core-64.h @@ -0,0 +1,498 @@ +/******************************** -*- C -*- **************************** + * + * Platform-independent layer (i386 version) + * + ***********************************************************************/ + + +/*********************************************************************** + * + * Copyright 2000, 2001, 2002, 2003, 2006 Free Software Foundation, Inc. + * Written by Paolo Bonzini and Matthew Flatt. + * + * This file is part of GNU lightning. + * + * GNU lightning is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU lightning is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with GNU lightning; see the file COPYING.LESSER; if not, write to the + * Free Software Foundation, 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + ***********************************************************************/ + + + +#ifndef __lightning_core_h +#define __lightning_core_h + +/* Used to implement ldc, stc, ... */ +#define JIT_CAN_16 0 +#define JIT_REXTMP _R12 + +/* Number or integer argument registers */ +#define JIT_ARG_MAX 6 + +/* Number of float argument registers */ +#define JIT_FP_ARG_MAX 8 + +#define JIT_R_NUM 3 +#define JIT_R(i) ((i) == 0 ? _EAX : _R9 + (i)) +#define JIT_V_NUM 3 +#define JIT_V(i) ((i) == 0 ? _EBX : _R12 + (i)) + +struct jit_local_state { + int long_jumps; + int nextarg_getfp; + int nextarg_putfp; + int nextarg_geti; + int nextarg_puti; + int framesize; + int argssize; + int fprssize; + int alloca_offset; + int alloca_slack; + jit_insn *finish_ref; +}; + +/* Whether a register in the "low" bank is used for the user-accessible + registers. */ +#define jit_save(reg) ((reg) == _EAX || (reg) == _EBX) + +/* Keep the stack 16-byte aligned, the SSE hardware prefers it this way. */ +#define jit_allocai_internal(amount, slack) \ + (((amount) < _jitl.alloca_slack \ + ? 0 \ + : (_jitl.alloca_slack += (amount) + (slack), \ + SUBQir((amount) + (slack), _ESP))), \ + _jitl.alloca_slack -= (amount), \ + _jitl.alloca_offset -= (amount)) + +#define jit_allocai(n) \ + jit_allocai_internal ((n), (_jitl.alloca_slack - (n)) & 15) + +/* 3-parameter operation */ +#define jit_qopr_(d, s1, s2, op1d, op2d) \ + ( ((s2) == (d)) ? op1d : \ + ( (((s1) == (d)) ? (void)0 : (void)MOVQrr((s1), (d))), op2d ) \ + ) + +/* 3-parameter operation, with immediate. TODO: fix the case where mmediate + does not fit! */ +#define jit_qop_small(d, s1, op2d) \ + (((s1) == (d)) ? op2d : (MOVQrr((s1), (d)), op2d)) +#define jit_qop_(d, s1, is, op2d, op2i) \ + (_s32P((long)(is)) \ + ? jit_qop_small ((d), (s1), (op2d)) \ + : (MOVQir ((is), JIT_REXTMP), jit_qop_small ((d), (s1), (op2i)))) + +#define jit_bra_qr(s1, s2, op) (CMPQrr(s2, s1), op, _jit.x.pc) +#define _jit_bra_l(rs, is, op) (CMPQir(is, rs), op, _jit.x.pc) + +#define jit_bra_l(rs, is, op) (_s32P((long)(is)) \ + ? _jit_bra_l(rs, is, op) \ + : (MOVQir(is, JIT_REXTMP), jit_bra_qr(rs, JIT_REXTMP, op))) + +/* When CMP with 0 can be replaced with TEST */ +#define jit_bra_l0(rs, is, op, op0) \ + ( (is) == 0 ? (TESTQrr(rs, rs), op0, _jit.x.pc) : jit_bra_l(rs, is, op)) + +#define jit_reduceQ(op, is, rs) \ + (_u8P(is) ? jit_reduce_(op##Bir(is, jit_reg8(rs))) : \ + jit_reduce_(op##Qir(is, rs)) ) + +#define jit_addi_l(d, rs, is) \ + /* Value is not zero? */ \ + ((is) \ + /* Yes. Value is unsigned and fits in signed 32 bits? */ \ + ? (_uiP(31, is) \ + /* Yes. d == rs? */ \ + ? jit_opi_((d), (rs), \ + /* Yes. Use add opcode */ \ + ADDQir((is), (d)), \ + /* No. Use lea opcode */ \ + LEAQmr((is), (rs), 0, 0, (d))) \ + /* No. Need value in a register */ \ + : (jit_movi_l(JIT_REXTMP, is), \ + jit_addr_l(d, rs, JIT_REXTMP))) \ + /* No. Do nothing. */ \ + : 0) +#define jit_addr_l(d, s1, s2) jit_opo_((d), (s1), (s2), ADDQrr((s2), (d)), ADDQrr((s1), (d)), LEAQmr(0, (s1), (s2), 1, (d)) ) +#define jit_addci_l(d, rs, is) jit_qop_ ((d), (rs), (is), ADCQir((is), (d)), ADCQrr(JIT_REXTMP, (d))) +#define jit_addcr_l(d, s1, s2) jit_qopr_((d), (s1), (s2), ADCQrr((s1), (d)), ADCQrr((s2), (d)) ) +#define jit_addxi_l(d, rs, is) jit_qop_ ((d), (rs), (is), ADDQir((is), (d)), ADDQrr(JIT_REXTMP, (d))) +#define jit_addxr_l(d, s1, s2) jit_qopr_((d), (s1), (s2), ADDQrr((s1), (d)), ADDQrr((s2), (d)) ) +#define jit_andi_l(d, rs, is) jit_qop_ ((d), (rs), (is), ANDQir((is), (d)), ANDQrr(JIT_REXTMP, (d))) +#define jit_andr_l(d, s1, s2) jit_qopr_((d), (s1), (s2), ANDQrr((s1), (d)), ANDQrr((s2), (d)) ) +#define jit_orr_l(d, s1, s2) jit_qopr_((d), (s1), (s2), ORQrr((s1), (d)), ORQrr((s2), (d)) ) +#define jit_subr_l(d, s1, s2) jit_qopr_((d), (s1), (s2), (SUBQrr((s1), (d)), NEGQr(d)), SUBQrr((s2), (d)) ) +#define jit_xorr_l(d, s1, s2) jit_qopr_((d), (s1), (s2), XORQrr((s1), (d)), XORQrr((s2), (d)) ) + +/* These can sometimes use byte or word versions! */ +#define jit_ori_l(d, rs, is) jit_qop_ ((d), (rs), (is), jit_reduceQ(OR, (is), (d)), ORQrr(JIT_REXTMP, (d)) ) +#define jit_xori_l(d, rs, is) jit_qop_ ((d), (rs), (is), jit_reduceQ(XOR, (is), (d)), XORQrr(JIT_REXTMP, (d)) ) + +#define jit_lshi_l(d, rs, is) ((is) <= 3 ? LEAQmr(0, 0, (rs), 1 << (is), (d)) : jit_qop_small ((d), (rs), SHLQir((is), (d)) )) +#define jit_rshi_l(d, rs, is) jit_qop_small ((d), (rs), SARQir((is), (d)) ) +#define jit_rshi_ul(d, rs, is) jit_qop_small ((d), (rs), SHRQir((is), (d)) ) +#define jit_lshr_l(d, r1, r2) jit_shift((d), (r1), (r2), SHLQrr) +#define jit_rshr_l(d, r1, r2) jit_shift((d), (r1), (r2), SARQrr) +#define jit_rshr_ul(d, r1, r2) jit_shift((d), (r1), (r2), SHRQrr) + + +/* Stack */ +#define jit_pushr_i(rs) PUSHQr(rs) +#define jit_popr_i(rs) POPQr(rs) + +/* A return address is 8 bytes, plus 5 registers = 40 bytes, total = 48 bytes. */ +#define jit_prolog(n) (_jitl.framesize = ((n) & 1) ? 56 : 48, _jitl.nextarg_getfp = _jitl.nextarg_geti = 0, _jitl.alloca_offset = 0, \ + PUSHQr(_EBX), PUSHQr(_R12), PUSHQr(_R13), PUSHQr(_R14), PUSHQr(_EBP), MOVQrr(_ESP, _EBP)) + +#define jit_calli(sub) (MOVQir((long) (sub), JIT_REXTMP), CALLsr(JIT_REXTMP)) +#define jit_callr(reg) CALLsr((reg)) + +#define jit_prepare_i(ni) (_jitl.nextarg_puti = (ni), \ + _jitl.argssize = _jitl.nextarg_puti > JIT_ARG_MAX \ + ? _jitl.nextarg_puti - JIT_ARG_MAX : 0) +#define jit_pusharg_i(rs) (--_jitl.nextarg_puti >= JIT_ARG_MAX \ + ? PUSHQr(rs) : MOVQrr(rs, jit_arg_reg_order[_jitl.nextarg_puti])) + +#define jit_finish(sub) (_jitl.fprssize \ + ? (MOVBir(_jitl.fprssize, _AL), _jitl.fprssize = 0) \ + : MOVBir(0, _AL), \ + ((_jitl.argssize & 1) \ + ? (PUSHQr(_EAX), ++_jitl.argssize) : 0), \ + _jitl.finish_ref = jit_calli(sub), \ + (_jitl.argssize \ + ? (ADDQir(sizeof(long) * _jitl.argssize, JIT_SP), _jitl.argssize = 0) \ + : 0), \ + _jitl.finish_ref) +#define jit_reg_is_arg(reg) ((reg) == _ECX || (reg) == _EDX) + +#define jit_finishr(reg) (_jitl.fprssize \ + ? (MOVBir(_jitl.fprssize, _AL), _jitl.fprssize = 0) \ + : MOVBir(0, _AL), \ + ((_jitl.argssize & 1) \ + ? (PUSHQr(_EAX), ++_jitl.argssize) : 0), \ + (jit_reg_is_arg((reg)) \ + ? (MOVQrr(reg, JIT_REXTMP), \ + jit_callr(JIT_REXTMP)) \ + : jit_callr(reg)), \ + (_jitl.argssize \ + ? (ADDQir(sizeof(long) * _jitl.argssize, JIT_SP), _jitl.argssize = 0) \ + : 0)) + +#define jit_retval_l(rd) ((void)jit_movr_l ((rd), _EAX)) +#define jit_arg_i() (_jitl.nextarg_geti < JIT_ARG_MAX \ + ? _jitl.nextarg_geti++ \ + : ((_jitl.framesize += sizeof(long)) - sizeof(long))) +#define jit_arg_c() jit_arg_i() +#define jit_arg_uc() jit_arg_i() +#define jit_arg_s() jit_arg_i() +#define jit_arg_us() jit_arg_i() +#define jit_arg_ui() jit_arg_i() +#define jit_arg_l() jit_arg_i() +#define jit_arg_ul() jit_arg_i() +#define jit_arg_p() jit_arg_i() + +#define jit_getarg_c(reg, ofs) ((ofs) < JIT_ARG_MAX \ + ? jit_extr_c_l((reg), jit_arg_reg_order[(ofs)]) \ + : jit_ldxi_c((reg), JIT_FP, (ofs))) +#define jit_getarg_uc(reg, ofs) ((ofs) < JIT_ARG_MAX \ + ? jit_extr_uc_ul((reg), jit_arg_reg_order[(ofs)]) \ + : jit_ldxi_uc((reg), JIT_FP, (ofs))) +#define jit_getarg_s(reg, ofs) ((ofs) < JIT_ARG_MAX \ + ? jit_extr_s_l((reg), jit_arg_reg_order[(ofs)]) \ + : jit_ldxi_s((reg), JIT_FP, (ofs))) +#define jit_getarg_us(reg, ofs) ((ofs) < JIT_ARG_MAX \ + ? jit_extr_us_ul((reg), jit_arg_reg_order[(ofs)]) \ + : jit_ldxi_us((reg), JIT_FP, (ofs))) +#define jit_getarg_i(reg, ofs) ((ofs) < JIT_ARG_MAX \ + ? jit_movr_l((reg), jit_arg_reg_order[(ofs)]) \ + : jit_ldxi_i((reg), JIT_FP, (ofs))) +#define jit_getarg_ui(reg, ofs) ((ofs) < JIT_ARG_MAX \ + ? jit_movr_ul((reg), jit_arg_reg_order[(ofs)]) \ + : jit_ldxi_ui((reg), JIT_FP, (ofs))) +#define jit_getarg_l(reg, ofs) ((ofs) < JIT_ARG_MAX \ + ? jit_movr_l((reg), jit_arg_reg_order[(ofs)]) \ + : jit_ldxi_l((reg), JIT_FP, (ofs))) +#define jit_getarg_ul(reg, ofs) ((ofs) < JIT_ARG_MAX \ + ? jit_movr_ul((reg), jit_arg_reg_order[(ofs)]) \ + : jit_ldxi_ul((reg), JIT_FP, ofs)) +#define jit_getarg_p(reg, ofs) ((ofs) < JIT_ARG_MAX \ + ? jit_movr_p((reg), jit_arg_reg_order[(ofs)]) \ + : jit_ldxi_p((reg), JIT_FP, (ofs))) + +static int jit_arg_reg_order[] = { _EDI, _ESI, _EDX, _ECX, _R8D, _R9D }; + +#define jit_negr_l(d, rs) jit_opi_((d), (rs), NEGQr(d), (XORQrr((d), (d)), SUBQrr((rs), (d))) ) +#define jit_movr_l(d, rs) ((void)((rs) == (d) ? 0 : MOVQrr((rs), (d)))) +#define jit_movi_p(d, is) (MOVQir(((long)(is)), (d)), _jit.x.pc) +#define jit_movi_l(d, is) \ + /* Value is not zero? */ \ + ((is) \ + /* Yes. Value is unsigned and fits in signed 32 bits? */ \ + ? (_uiP(31, is) \ + /* Yes. Use 32 bits opcode */ \ + ? MOVLir(is, (d)) \ + /* No. Use 64 bits opcode */ \ + : MOVQir(is, (d))) \ + /* No. Set register to zero. */ \ + : XORQrr ((d), (d))) + +#define jit_bmsr_l(label, s1, s2) (TESTQrr((s1), (s2)), JNZm(label), _jit.x.pc) +#define jit_bmcr_l(label, s1, s2) (TESTQrr((s1), (s2)), JZm(label), _jit.x.pc) +#define jit_boaddr_l(label, s1, s2) (ADDQrr((s2), (s1)), JOm(label), _jit.x.pc) +#define jit_bosubr_l(label, s1, s2) (SUBQrr((s2), (s1)), JOm(label), _jit.x.pc) +#define jit_boaddr_ul(label, s1, s2) (ADDQrr((s2), (s1)), JCm(label), _jit.x.pc) +#define jit_bosubr_ul(label, s1, s2) (SUBQrr((s2), (s1)), JCm(label), _jit.x.pc) + +#define jit_boaddi_l(label, rs, is) (ADDQir((is), (rs)), JOm(label), _jit.x.pc) +#define jit_bosubi_l(label, rs, is) (SUBQir((is), (rs)), JOm(label), _jit.x.pc) +#define jit_boaddi_ul(label, rs, is) (ADDQir((is), (rs)), JCm(label), _jit.x.pc) +#define jit_bosubi_ul(label, rs, is) (SUBQir((is), (rs)), JCm(label), _jit.x.pc) + +#define jit_patch_long_at(jump_pc,v) (*_PSL((jump_pc) - sizeof(long)) = _jit_SL((jit_insn *)(v))) +#define jit_patch_short_at(jump_pc,v) (*_PSI((jump_pc) - sizeof(int)) = _jit_SI((jit_insn *)(v) - (jump_pc))) +#define jit_patch_at(jump_pc,v) (_jitl.long_jumps ? jit_patch_long_at((jump_pc)-3, v) : jit_patch_short_at(jump_pc, v)) +#define jit_ret() (LEAVE_(), POPQr(_R14), POPQr(_R13), POPQr(_R12), POPQr(_EBX), RET_()) + +/* Memory */ + +/* Used to implement ldc, stc, ... We have SIL and friends which simplify it all. */ +#define jit_movbrm(rs, dd, db, di, ds) MOVBrm(jit_reg8(rs), dd, db, di, ds) + +#define jit_ldr_c(d, rs) MOVSBQmr(0, (rs), 0, 0, (d)) +#define jit_ldxr_c(d, s1, s2) MOVSBQmr(0, (s1), (s2), 1, (d)) + +#define jit_ldr_s(d, rs) MOVSWQmr(0, (rs), 0, 0, (d)) +#define jit_ldxr_s(d, s1, s2) MOVSWQmr(0, (s1), (s2), 1, (d)) + +#define jit_ldi_c(d, is) (_u32P((long)(is)) ? MOVSBQmr((is), 0, 0, 0, (d)) : (jit_movi_l(JIT_REXTMP, is), jit_ldr_c(d, JIT_REXTMP))) +#define jit_ldxi_c(d, rs, is) (_u32P((long)(is)) ? MOVSBQmr((is), (rs), 0, 0, (d)) : (jit_movi_l(JIT_REXTMP, is), jit_ldxr_c(d, rs, JIT_REXTMP))) + +#define jit_ldi_uc(d, is) (_u32P((long)(is)) ? MOVZBLmr((is), 0, 0, 0, (d)) : (jit_movi_l(JIT_REXTMP, is), jit_ldr_uc(d, JIT_REXTMP))) +#define jit_ldxi_uc(d, rs, is) (_u32P((long)(is)) ? MOVZBLmr((is), (rs), 0, 0, (d)) : (jit_movi_l(JIT_REXTMP, is), jit_ldxr_uc(d, rs, JIT_REXTMP))) + +#define jit_sti_c(id, rs) (_u32P((long)(id)) ? MOVBrm(jit_reg8(rs), (id), 0, 0, 0) : (jit_movi_l(JIT_REXTMP, id), jit_str_c(JIT_REXTMP, rs))) +#define jit_stxi_c(id, rd, rs) (_u32P((long)(id)) ? MOVBrm(jit_reg8(rs), (id), (rd), 0, 0) : (jit_movi_l(JIT_REXTMP, id), jit_stxr_c(JIT_REXTMP, rd, rs))) + +#define jit_ldi_s(d, is) (_u32P((long)(is)) ? MOVSWQmr((is), 0, 0, 0, (d)) : (jit_movi_l(JIT_REXTMP, is), jit_ldr_s(d, JIT_REXTMP))) +#define jit_ldxi_s(d, rs, is) (_u32P((long)(is)) ? MOVSWQmr((is), (rs), 0, 0, (d)) : (jit_movi_l(JIT_REXTMP, is), jit_ldxr_s(d, rs, JIT_REXTMP))) + +#define jit_ldi_us(d, is) (_u32P((long)(is)) ? MOVZWLmr((is), 0, 0, 0, (d)) : (jit_movi_l(JIT_REXTMP, is), jit_ldr_us(d, JIT_REXTMP))) +#define jit_ldxi_us(d, rs, is) (_u32P((long)(is)) ? MOVZWLmr((is), (rs), 0, 0, (d)) : (jit_movi_l(JIT_REXTMP, is), jit_ldxr_us(d, rs, JIT_REXTMP))) + +#define jit_sti_s(id, rs) (_u32P((long)(id)) ? MOVWrm(jit_reg16(rs), (id), 0, 0, 0) : (jit_movi_l(JIT_REXTMP, id), jit_str_s(JIT_REXTMP, rs))) +#define jit_stxi_s(id, rd, rs) (_u32P((long)(id)) ? MOVWrm(jit_reg16(rs), (id), (rd), 0, 0) : (jit_movi_l(JIT_REXTMP, id), jit_stxr_s(JIT_REXTMP, rd, rs))) + +#define jit_ldi_ui(d, is) (_u32P((long)(is)) ? MOVLmr((is), 0, 0, 0, (d)) : (jit_movi_l(JIT_REXTMP, is), jit_ldr_ui(d, JIT_REXTMP))) +#define jit_ldxi_ui(d, rs, is) (_u32P((long)(is)) ? MOVLmr((is), (rs), 0, 0, (d)) : (jit_movi_l(JIT_REXTMP, is), jit_ldxr_ui(d, rs, JIT_REXTMP))) + +#define jit_ldi_i(d, is) (_u32P((long)(is)) ? MOVSLQmr((is), 0, 0, 0, (d)) : (jit_movi_l(JIT_REXTMP, is), jit_ldr_i(d, JIT_REXTMP))) +#define jit_ldxi_i(d, rs, is) (_u32P((long)(is)) ? MOVSLQmr((is), (rs), 0, 0, (d)) : (jit_movi_l(JIT_REXTMP, is), jit_ldxr_i(d, rs, JIT_REXTMP))) + +#define jit_sti_i(id, rs) (_u32P((long)(id)) ? MOVLrm((rs), (id), 0, 0, 0) : (jit_movi_l(JIT_REXTMP, id), jit_str_i(JIT_REXTMP, rs))) +#define jit_stxi_i(id, rd, rs) (_u32P((long)(id)) ? MOVLrm((rs), (id), (rd), 0, 0) : (jit_movi_l(JIT_REXTMP, id), jit_stxr_i(JIT_REXTMP, rd, rs))) + +#define jit_ldi_l(d, is) (_u32P((long)(is)) ? MOVQmr((is), 0, 0, 0, (d)) : (jit_movi_l(JIT_REXTMP, is), jit_ldr_l(d, JIT_REXTMP))) +#define jit_ldxi_l(d, rs, is) (_u32P((long)(is)) ? MOVQmr((is), (rs), 0, 0, (d)) : (jit_movi_l(JIT_REXTMP, is), jit_ldxr_l(d, rs, JIT_REXTMP))) + +#define jit_sti_l(id, rs) (_u32P((long)(id)) ? MOVQrm((rs), (id), 0, 0, 0) : (jit_movi_l(JIT_REXTMP, id), jit_str_l(JIT_REXTMP, rs))) +#define jit_stxi_l(id, rd, rs) (_u32P((long)(id)) ? MOVQrm((rs), (id), (rd), 0, 0) : (jit_movi_l(JIT_REXTMP, id), jit_stxr_l(JIT_REXTMP, rd, rs))) + +#define jit_ldr_ui(d, rs) MOVLmr(0, (rs), 0, 0, (d)) +#define jit_ldxr_ui(d, s1, s2) MOVLmr(0, (s1), (s2), 1, (d)) + +#define jit_ldr_i(d, rs) MOVSLQmr(0, (rs), 0, 0, (d)) +#define jit_ldxr_i(d, s1, s2) MOVSLQmr(0, (s1), (s2), 1, (d)) + +#define jit_ldr_l(d, rs) MOVQmr(0, (rs), 0, 0, (d)) +#define jit_ldxr_l(d, s1, s2) MOVQmr(0, (s1), (s2), 1, (d)) + +#define jit_str_l(rd, rs) MOVQrm((rs), 0, (rd), 0, 0) +#define jit_stxr_l(d1, d2, rs) MOVQrm((rs), 0, (d1), (d2), 1) + +#define jit_blti_l(label, rs, is) jit_bra_l0((rs), (is), JLm(label), JSm(label) ) +#define jit_blei_l(label, rs, is) jit_bra_l ((rs), (is), JLEm(label) ) +#define jit_bgti_l(label, rs, is) jit_bra_l ((rs), (is), JGm(label) ) +#define jit_bgei_l(label, rs, is) jit_bra_l0((rs), (is), JGEm(label), JNSm(label) ) +#define jit_beqi_l(label, rs, is) jit_bra_l0((rs), (is), JEm(label), JEm(label) ) +#define jit_bnei_l(label, rs, is) jit_bra_l0((rs), (is), JNEm(label), JNEm(label) ) +#define jit_blti_ul(label, rs, is) jit_bra_l ((rs), (is), JBm(label) ) +#define jit_blei_ul(label, rs, is) jit_bra_l0((rs), (is), JBEm(label), JEm(label) ) +#define jit_bgti_ul(label, rs, is) jit_bra_l0((rs), (is), JAm(label), JNEm(label) ) +#define jit_bgei_ul(label, rs, is) jit_bra_l ((rs), (is), JAEm(label) ) +#define jit_bmsi_l(label, rs, is) (jit_reduceQ(TEST, (is), (rs)), JNZm(label), _jit.x.pc) +#define jit_bmci_l(label, rs, is) (jit_reduceQ(TEST, (is), (rs)), JZm(label), _jit.x.pc) + +#define jit_pushr_l(rs) jit_pushr_i(rs) +#define jit_popr_l(rs) jit_popr_i(rs) + +#define jit_pusharg_l(rs) jit_pusharg_i(rs) +#define jit_retval_l(rd) ((void)jit_movr_l ((rd), _EAX)) +#define jit_bltr_l(label, s1, s2) jit_bra_qr((s1), (s2), JLm(label) ) +#define jit_bler_l(label, s1, s2) jit_bra_qr((s1), (s2), JLEm(label) ) +#define jit_bgtr_l(label, s1, s2) jit_bra_qr((s1), (s2), JGm(label) ) +#define jit_bger_l(label, s1, s2) jit_bra_qr((s1), (s2), JGEm(label) ) +#define jit_beqr_l(label, s1, s2) jit_bra_qr((s1), (s2), JEm(label) ) +#define jit_bner_l(label, s1, s2) jit_bra_qr((s1), (s2), JNEm(label) ) +#define jit_bltr_ul(label, s1, s2) jit_bra_qr((s1), (s2), JBm(label) ) +#define jit_bler_ul(label, s1, s2) jit_bra_qr((s1), (s2), JBEm(label) ) +#define jit_bgtr_ul(label, s1, s2) jit_bra_qr((s1), (s2), JAm(label) ) +#define jit_bger_ul(label, s1, s2) jit_bra_qr((s1), (s2), JAEm(label) ) + +/* Bool operations. */ +#define jit_bool_qr(d, s1, s2, op) \ + (jit_replace8(d, CMPQrr(s2, s1), op)) + +#define jit_bool_qi(d, rs, is, op) \ + (jit_replace8(d, CMPQir(is, rs), op)) + +/* When CMP with 0 can be replaced with TEST */ +#define jit_bool_qi0(d, rs, is, op, op0) \ + ((is) != 0 \ + ? (jit_replace8(d, CMPQir(is, rs), op)) \ + : (jit_replace8(d, TESTQrr(rs, rs), op0))) + +#define jit_ltr_l(d, s1, s2) jit_bool_qr((d), (s1), (s2), SETLr ) +#define jit_ler_l(d, s1, s2) jit_bool_qr((d), (s1), (s2), SETLEr ) +#define jit_gtr_l(d, s1, s2) jit_bool_qr((d), (s1), (s2), SETGr ) +#define jit_ger_l(d, s1, s2) jit_bool_qr((d), (s1), (s2), SETGEr ) +#define jit_eqr_l(d, s1, s2) jit_bool_qr((d), (s1), (s2), SETEr ) +#define jit_ner_l(d, s1, s2) jit_bool_qr((d), (s1), (s2), SETNEr ) +#define jit_ltr_ul(d, s1, s2) jit_bool_qr((d), (s1), (s2), SETBr ) +#define jit_ler_ul(d, s1, s2) jit_bool_qr((d), (s1), (s2), SETBEr ) +#define jit_gtr_ul(d, s1, s2) jit_bool_qr((d), (s1), (s2), SETAr ) +#define jit_ger_ul(d, s1, s2) jit_bool_qr((d), (s1), (s2), SETAEr ) + +#define jit_lti_l(d, rs, is) jit_bool_qi0((d), (rs), (is), SETLr, SETSr ) +#define jit_lei_l(d, rs, is) jit_bool_qi ((d), (rs), (is), SETLEr ) +#define jit_gti_l(d, rs, is) jit_bool_qi ((d), (rs), (is), SETGr ) +#define jit_gei_l(d, rs, is) jit_bool_qi0((d), (rs), (is), SETGEr, SETNSr ) +#define jit_eqi_l(d, rs, is) jit_bool_qi0((d), (rs), (is), SETEr, SETEr ) +#define jit_nei_l(d, rs, is) jit_bool_qi0((d), (rs), (is), SETNEr, SETNEr ) +#define jit_lti_ul(d, rs, is) jit_bool_qi ((d), (rs), (is), SETBr ) +#define jit_lei_ul(d, rs, is) jit_bool_qi0((d), (rs), (is), SETBEr, SETEr ) +#define jit_gti_ul(d, rs, is) jit_bool_qi0((d), (rs), (is), SETAr, SETNEr ) +#define jit_gei_ul(d, rs, is) jit_bool_qi0((d), (rs), (is), SETAEr, INCLr ) + +/* Multiplication/division. */ +#define jit_mulr_ul_(s1, s2) \ + jit_qopr_(_RAX, s1, s2, MULQr(s1), MULQr(s2)) + +#define jit_mulr_l_(s1, s2) \ + jit_qopr_(_RAX, s1, s2, IMULQr(s1), IMULQr(s2)) + +#define jit_muli_l_(is, rs) \ + (MOVQir(is, rs == _RAX ? _RDX : _RAX), \ + IMULQr(rs == _RAX ? _RDX : rs)) + +#define jit_muli_ul_(is, rs) \ + (MOVQir(is, rs == _RAX ? _RDX : _RAX), \ + IMULQr(rs == _RAX ? _RDX : rs)) + +#define jit_divi_l_(result, d, rs, is) \ + (jit_might (d, _RAX, jit_pushr_l(_RAX)), \ + jit_might (d, _RCX, jit_pushr_l(_RCX)), \ + jit_might (d, _RDX, jit_pushr_l(_RDX)), \ + jit_might (rs, _RAX, MOVQrr(rs, _RAX)), \ + jit_might (rs, _RDX, MOVQrr(rs, _RDX)), \ + MOVQir(is, _RCX), \ + SARQir(63, _RDX), \ + IDIVQr(_RCX), \ + jit_might(d, result, MOVQrr(result, d)), \ + jit_might(d, _RDX, jit_popr_l(_RDX)), \ + jit_might(d, _RCX, jit_popr_l(_RCX)), \ + jit_might(d, _RAX, jit_popr_l(_RAX))) + +#define jit_divr_l_(result, d, s1, s2) \ + (jit_might (d, _RAX, jit_pushr_l(_RAX)), \ + jit_might (d, _RCX, jit_pushr_l(_RCX)), \ + jit_might (d, _RDX, jit_pushr_l(_RDX)), \ + ((s1 == _RCX) ? jit_pushr_l(_RCX) : 0), \ + jit_might (s2, _RCX, MOVQrr(s2, _RCX)), \ + ((s1 == _RCX) ? jit_popr_l(_RDX) : \ + jit_might (s1, _RDX, MOVQrr(s1, _RDX))), \ + MOVQrr(_RDX, _RAX), \ + SARQir(63, _RDX), \ + IDIVQr(_RCX), \ + jit_might(d, result, MOVQrr(result, d)), \ + jit_might(d, _RDX, jit_popr_l(_RDX)), \ + jit_might(d, _RCX, jit_popr_l(_RCX)), \ + jit_might(d, _RAX, jit_popr_l(_RAX))) + +#define jit_divi_ul_(result, d, rs, is) \ + (jit_might (d, _RAX, jit_pushr_l(_RAX)), \ + jit_might (d, _RCX, jit_pushr_l(_RCX)), \ + jit_might (d, _RDX, jit_pushr_l(_RDX)), \ + jit_might (rs, _RAX, MOVQrr(rs, _RAX)), \ + MOVQir(is, _RCX), \ + XORQrr(_RDX, _RDX), \ + DIVQr(_RCX), \ + jit_might(d, result, MOVQrr(result, d)), \ + jit_might(d, _RDX, jit_popr_l(_RDX)), \ + jit_might(d, _RCX, jit_popr_l(_RCX)), \ + jit_might(d, _RAX, jit_popr_l(_RAX))) + +#define jit_divr_ul_(result, d, s1, s2) \ + (jit_might (d, _RAX, jit_pushr_l(_RAX)), \ + jit_might (d, _RCX, jit_pushr_l(_RCX)), \ + jit_might (d, _RDX, jit_pushr_l(_RDX)), \ + ((s1 == _RCX) ? jit_pushr_l(_RCX) : 0), \ + jit_might (s2, _RCX, MOVQrr(s2, _RCX)), \ + ((s1 == _RCX) ? jit_popr_l(_RAX) : \ + jit_might (s1, _RAX, MOVQrr(s1, _RAX))), \ + XORQrr(_RDX, _RDX), \ + DIVQr(_RCX), \ + jit_might(d, result, MOVQrr(result, d)), \ + jit_might(d, _RDX, jit_popr_l(_RDX)), \ + jit_might(d, _RCX, jit_popr_l(_RCX)), \ + jit_might(d, _RAX, jit_popr_l(_RAX))) + +#define jit_muli_l(d, rs, is) jit_qop_ ((d), (rs), (is), IMULQir((is), (d)), IMULQrr(JIT_REXTMP, (d)) ) +#define jit_mulr_l(d, s1, s2) jit_qopr_((d), (s1), (s2), IMULQrr((s1), (d)), IMULQrr((s2), (d)) ) + +/* As far as low bits are concerned, signed and unsigned multiplies are + exactly the same. */ +#define jit_muli_ul(d, rs, is) jit_qop_ ((d), (rs), (is), IMULQir((is), (d)), IMULQrr(JIT_REXTMP, (d)) ) +#define jit_mulr_ul(d, s1, s2) jit_qopr_((d), (s1), (s2), IMULQrr((s1), (d)), IMULQrr((s2), (d)) ) + +#define jit_hmuli_l(d, rs, is) \ + ((d) == _RDX ? ( jit_pushr_l(_RAX), jit_muli_l_((is), (rs)), jit_popr_l(_RAX) ) : \ + ((d) == _RAX ? (jit_pushr_l(_RDX), jit_muli_l_((is), (rs)), MOVQrr(_RDX, _RAX), jit_popr_l(_RDX) ) : \ + (jit_pushr_l(_RDX), jit_pushr_l(_RAX), jit_muli_l_((is), (rs)), MOVQrr(_RDX, (d)), jit_popr_l(_RAX), jit_popr_l(_RDX) ))) + +#define jit_hmulr_l(d, s1, s2) \ + ((d) == _RDX ? ( jit_pushr_l(_RAX), jit_mulr_l_((s1), (s2)), jit_popr_l(_RAX) ) : \ + ((d) == _RAX ? (jit_pushr_l(_RDX), jit_mulr_l_((s1), (s2)), MOVQrr(_RDX, _RAX), jit_popr_l(_RDX) ) : \ + (jit_pushr_l(_RDX), jit_pushr_l(_RAX), jit_mulr_l_((s1), (s2)), MOVQrr(_RDX, (d)), jit_popr_l(_RAX), jit_popr_l(_RDX) ))) + +#define jit_hmuli_ul(d, rs, is) \ + ((d) == _RDX ? ( jit_pushr_l(_RAX), jit_muli_ul_((is), (rs)), jit_popr_l(_RAX) ) : \ + ((d) == _RAX ? (jit_pushr_l(_RDX), jit_muli_ul_((is), (rs)), MOVQrr(_RDX, _RAX), jit_popr_l(_RDX) ) : \ + (jit_pushr_l(_RDX), jit_pushr_l(_RAX), jit_muli_ul_((is), (rs)), MOVQrr(_RDX, (d)), jit_popr_l(_RAX), jit_popr_l(_RDX) ))) + +#define jit_hmulr_ul(d, s1, s2) \ + ((d) == _RDX ? ( jit_pushr_l(_RAX), jit_mulr_ul_((s1), (s2)), jit_popr_l(_RAX) ) : \ + ((d) == _RAX ? (jit_pushr_l(_RDX), jit_mulr_ul_((s1), (s2)), MOVQrr(_RDX, _RAX), jit_popr_l(_RDX) ) : \ + (jit_pushr_l(_RDX), jit_pushr_l(_RAX), jit_mulr_ul_((s1), (s2)), MOVQrr(_RDX, (d)), jit_popr_l(_RAX), jit_popr_l(_RDX) ))) + +#define jit_divi_l(d, rs, is) jit_divi_l_(_RAX, (d), (rs), (is)) +#define jit_divi_ul(d, rs, is) jit_divi_ul_(_RAX, (d), (rs), (is)) +#define jit_modi_l(d, rs, is) jit_divi_l_(_RDX, (d), (rs), (is)) +#define jit_modi_ul(d, rs, is) jit_divi_ul_(_RDX, (d), (rs), (is)) +#define jit_divr_l(d, s1, s2) jit_divr_l_(_RAX, (d), (s1), (s2)) +#define jit_divr_ul(d, s1, s2) jit_divr_ul_(_RAX, (d), (s1), (s2)) +#define jit_modr_l(d, s1, s2) jit_divr_l_(_RDX, (d), (s1), (s2)) +#define jit_modr_ul(d, s1, s2) jit_divr_ul_(_RDX, (d), (s1), (s2)) + +#endif /* __lightning_core_h */ diff --git a/src/runtime/c/pgf/lightning/i386/core.h b/src/runtime/c/pgf/lightning/i386/core.h index dd9d58a87..e6eb7c8a8 100644 --- a/src/runtime/c/pgf/lightning/i386/core.h +++ b/src/runtime/c/pgf/lightning/i386/core.h @@ -7,21 +7,21 @@ /*********************************************************************** * - * Copyright 2000, 2001, 2002, 2003 Free Software Foundation, Inc. - * Written by Paolo Bonzini. + * Copyright 2000, 2001, 2002, 2003, 2006 Free Software Foundation, Inc. + * Written by Paolo Bonzini and Matthew Flatt. * * This file is part of GNU lightning. * * GNU lightning is free software; you can redistribute it and/or modify it * under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; either version 2.1, or (at your option) + * by the Free Software Foundation; either version 3, or (at your option) * any later version. - * - * GNU lightning is distributed in the hope that it will be useful, but + * + * GNU lightning is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public * License for more details. - * + * * You should have received a copy of the GNU Lesser General Public License * along with GNU lightning; see the file COPYING.LESSER; if not, write to the * Free Software Foundation, 59 Temple Place - Suite 330, Boston, @@ -31,22 +31,13 @@ -#ifndef __lightning_core_h -#define __lightning_core_h +#ifndef __lightning_core_i386_h +#define __lightning_core_i386_h #define JIT_FP _EBP #define JIT_SP _ESP #define JIT_RET _EAX -#define JIT_R_NUM 3 -#define JIT_V_NUM 3 -#define JIT_R(i) (_EAX + (i)) -#define JIT_V(i) ((i) == 0 ? _EBX : _ESI + (i) - 1) - -struct jit_local_state { - int framesize; - int argssize; -}; /* 3-parameter operation */ #define jit_opr_(d, s1, s2, op1d, op2d) \ @@ -56,7 +47,7 @@ struct jit_local_state { /* 3-parameter operation, with immediate */ #define jit_op_(d, s1, op2d) \ - ((s1 == d) ? op2d : (MOVLrr(s1, d), op2d)) \ + ((s1 == d) ? op2d : (MOVLrr(s1, d), op2d)) /* 3-parameter operation, optimizable */ #define jit_opo_(d, s1, s2, op1d, op2d, op12d) \ @@ -67,28 +58,27 @@ struct jit_local_state { #define jit_opi_(d, rs, opdi, opdri) \ ((rs == d) ? opdi : opdri) -/* An operand is forced into a register */ -#define jit_replace(rd, rs, forced, op) \ - ((rd == forced) ? JITSORRY("Register conflict for " # op) : \ - (rs == forced) ? op : (PUSHLr(forced), MOVLrr(rs, forced), op, POPLr(forced))) - /* For LT, LE, ... */ -#define jit_replace8(d, op) \ - (jit_check8(d) \ - ? (MOVLir(0, d), op(d)) \ - : (PUSHLr(_EAX), MOVLir(0, _EAX), op(_EAX), MOVLrr(_EAX, (d)), POPLr(_EAX))) +#define jit_replace8(d, cmp, op) \ + (jit_check8(d) \ + ? ((cmp), \ + MOVLir(0, (d)), \ + op(_rR(d) | _AL)) \ + : (jit_pushr_i(_EAX), (cmp), \ + MOVLir(0, _EAX), \ + op(_AL), MOVLrr(_EAX, (d)), jit_popr_i(_EAX))) #define jit_bool_r(d, s1, s2, op) \ - (CMPLrr(s2, s1), jit_replace8(d, op)) + (jit_replace8(d, CMPLrr(s2, s1), op)) #define jit_bool_i(d, rs, is, op) \ - (CMPLir(is, rs), jit_replace8(d, op)) + (jit_replace8(d, CMPLir(is, rs), op)) /* When CMP with 0 can be replaced with TEST */ #define jit_bool_i0(d, rs, is, op, op0) \ ((is) != 0 \ - ? (CMPLir(is, rs), jit_replace8(d, op)) \ - : (TESTLrr(rs, rs), jit_replace8(d, op0))) + ? (jit_replace8(d, CMPLir(is, rs), op)) \ + : (jit_replace8(d, TESTLrr(rs, rs), op0))) /* For BLT, BLE, ... */ #define jit_bra_r(s1, s2, op) (CMPLrr(s2, s1), op, _jit.x.pc) @@ -98,22 +88,11 @@ struct jit_local_state { #define jit_bra_i0(rs, is, op, op0) \ ( (is) == 0 ? (TESTLrr(rs, rs), op0, _jit.x.pc) : (CMPLir(is, rs), op, _jit.x.pc)) -/* Used to implement ldc, stc, ... */ -#define jit_check8(rs) ( (rs) <= _EBX ) -#define jit_reg8(rs) ( ((rs) == _SI || (rs) == _DI) ? _AL : ((rs) & _BH) | _AL ) -#define jit_reg16(rs) ( ((rs) & _BH) | _AX ) - -/* In jit_replace below, _EBX is dummy */ -#define jit_movbrm(rs, dd, db, di, ds) \ - (jit_check8(rs) \ - ? MOVBrm(jit_reg8(rs), dd, db, di, ds) \ - : jit_replace(_EBX, rs, _EAX, MOVBrm(_AL, dd, db, di, ds))) - /* Reduce arguments of XOR/OR/TEST */ #define jit_reduce_(op) op #define jit_reduce(op, is, rs) \ (_u8P(is) && jit_check8(rs) ? jit_reduce_(op##Bir(is, jit_reg8(rs))) : \ - (_u16P(is) ? jit_reduce_(op##Wir(is, jit_reg16(rs))) : \ + (_u16P(is) && JIT_CAN_16 ? jit_reduce_(op##Wir(is, jit_reg16(rs))) : \ jit_reduce_(op##Lir(is, rs)) )) /* Helper macros for MUL/DIV/IDIV */ @@ -133,62 +112,62 @@ struct jit_local_state { IMULLr(rs == _EAX ? _EDX : rs)) #define jit_divi_i_(result, d, rs, is) \ - (jit_might (d, _EAX, PUSHLr(_EAX)), \ - jit_might (d, _ECX, PUSHLr(_ECX)), \ - jit_might (d, _EDX, PUSHLr(_EDX)), \ + (jit_might (d, _EAX, jit_pushr_i(_EAX)), \ + jit_might (d, _ECX, jit_pushr_i(_ECX)), \ + jit_might (d, _EDX, jit_pushr_i(_EDX)), \ jit_might (rs, _EAX, MOVLrr(rs, _EAX)), \ jit_might (rs, _EDX, MOVLrr(rs, _EDX)), \ MOVLir(is, _ECX), \ SARLir(31, _EDX), \ IDIVLr(_ECX), \ jit_might(d, result, MOVLrr(result, d)), \ - jit_might(d, _EDX, POPLr(_EDX)), \ - jit_might(d, _ECX, POPLr(_ECX)), \ - jit_might(d, _EAX, POPLr(_EAX))) + jit_might(d, _EDX, jit_popr_i(_EDX)), \ + jit_might(d, _ECX, jit_popr_i(_ECX)), \ + jit_might(d, _EAX, jit_popr_i(_EAX))) #define jit_divr_i_(result, d, s1, s2) \ - (jit_might (d, _EAX, PUSHLr(_EAX)), \ - jit_might (d, _ECX, PUSHLr(_ECX)), \ - jit_might (d, _EDX, PUSHLr(_EDX)), \ - ((s1 == _ECX) ? PUSHLr(_ECX) : 0), \ + (jit_might (d, _EAX, jit_pushr_i(_EAX)), \ + jit_might (d, _ECX, jit_pushr_i(_ECX)), \ + jit_might (d, _EDX, jit_pushr_i(_EDX)), \ + ((s1 == _ECX) ? jit_pushr_i(_ECX) : 0), \ jit_might (s2, _ECX, MOVLrr(s2, _ECX)), \ - ((s1 == _ECX) ? POPLr(_EDX) : \ + ((s1 == _ECX) ? jit_popr_i(_EDX) : \ jit_might (s1, _EDX, MOVLrr(s1, _EDX))), \ MOVLrr(_EDX, _EAX), \ SARLir(31, _EDX), \ IDIVLr(_ECX), \ jit_might(d, result, MOVLrr(result, d)), \ - jit_might(d, _EDX, POPLr(_EDX)), \ - jit_might(d, _ECX, POPLr(_ECX)), \ - jit_might(d, _EAX, POPLr(_EAX))) + jit_might(d, _EDX, jit_popr_i(_EDX)), \ + jit_might(d, _ECX, jit_popr_i(_ECX)), \ + jit_might(d, _EAX, jit_popr_i(_EAX))) #define jit_divi_ui_(result, d, rs, is) \ - (jit_might (d, _EAX, PUSHLr(_EAX)), \ - jit_might (d, _ECX, PUSHLr(_ECX)), \ - jit_might (d, _EDX, PUSHLr(_EDX)), \ + (jit_might (d, _EAX, jit_pushr_i(_EAX)), \ + jit_might (d, _ECX, jit_pushr_i(_ECX)), \ + jit_might (d, _EDX, jit_pushr_i(_EDX)), \ jit_might (rs, _EAX, MOVLrr(rs, _EAX)), \ MOVLir(is, _ECX), \ XORLrr(_EDX, _EDX), \ DIVLr(_ECX), \ jit_might(d, result, MOVLrr(result, d)), \ - jit_might(d, _EDX, POPLr(_EDX)), \ - jit_might(d, _ECX, POPLr(_ECX)), \ - jit_might(d, _EAX, POPLr(_EAX))) + jit_might(d, _EDX, jit_popr_i(_EDX)), \ + jit_might(d, _ECX, jit_popr_i(_ECX)), \ + jit_might(d, _EAX, jit_popr_i(_EAX))) #define jit_divr_ui_(result, d, s1, s2) \ - (jit_might (d, _EAX, PUSHLr(_EAX)), \ - jit_might (d, _ECX, PUSHLr(_ECX)), \ - jit_might (d, _EDX, PUSHLr(_EDX)), \ - ((s1 == _ECX) ? PUSHLr(_ECX) : 0), \ + (jit_might (d, _EAX, jit_pushr_i(_EAX)), \ + jit_might (d, _ECX, jit_pushr_i(_ECX)), \ + jit_might (d, _EDX, jit_pushr_i(_EDX)), \ + ((s1 == _ECX) ? jit_pushr_i(_ECX) : 0), \ jit_might (s2, _ECX, MOVLrr(s2, _ECX)), \ - ((s1 == _ECX) ? POPLr(_EAX) : \ + ((s1 == _ECX) ? jit_popr_i(_EAX) : \ jit_might (s1, _EAX, MOVLrr(s1, _EAX))), \ XORLrr(_EDX, _EDX), \ DIVLr(_ECX), \ jit_might(d, result, MOVLrr(result, d)), \ - jit_might(d, _EDX, POPLr(_EDX)), \ - jit_might(d, _ECX, POPLr(_ECX)), \ - jit_might(d, _EAX, POPLr(_EAX))) + jit_might(d, _EDX, jit_popr_i(_EDX)), \ + jit_might(d, _ECX, jit_popr_i(_ECX)), \ + jit_might(d, _EAX, jit_popr_i(_EAX))) /* ALU */ @@ -207,6 +186,7 @@ struct jit_local_state { #define jit_subxi_i(d, rs, is) jit_op_ ((d), (rs), SBBLir((is), (d)) ) #define jit_xorr_i(d, s1, s2) jit_opr_((d), (s1), (s2), XORLrr((s1), (d)), XORLrr((s2), (d)) ) + /* These can sometimes use byte or word versions! */ #define jit_ori_i(d, rs, is) jit_op_ ((d), (rs), jit_reduce(OR, (is), (d)) ) #define jit_xori_i(d, rs, is) jit_op_ ((d), (rs), jit_reduce(XOR, (is), (d)) ) @@ -220,24 +200,24 @@ struct jit_local_state { #define jit_mulr_ui(d, s1, s2) jit_opr_((d), (s1), (s2), IMULLrr((s1), (d)), IMULLrr((s2), (d)) ) #define jit_hmuli_i(d, rs, is) \ - ((d) == _EDX ? ( PUSHLr(_EAX), jit_muli_i_((is), (rs)), POPLr(_EAX) ) : \ - ((d) == _EAX ? (PUSHLr(_EDX), jit_muli_i_((is), (rs)), MOVLrr(_EDX, _EAX), POPLr(_EDX) ) : \ - (PUSHLr(_EDX), PUSHLr(_EAX), jit_muli_i_((is), (rs)), MOVLrr(_EDX, (d)), POPLr(_EAX), POPLr(_EDX) ))) + ((d) == _EDX ? ( jit_pushr_i(_EAX), jit_muli_i_((is), (rs)), jit_popr_i(_EAX) ) : \ + ((d) == _EAX ? (jit_pushr_i(_EDX), jit_muli_i_((is), (rs)), MOVLrr(_EDX, _EAX), jit_popr_i(_EDX) ) : \ + (jit_pushr_i(_EDX), jit_pushr_i(_EAX), jit_muli_i_((is), (rs)), MOVLrr(_EDX, (d)), jit_popr_i(_EAX), jit_popr_i(_EDX) ))) #define jit_hmulr_i(d, s1, s2) \ - ((d) == _EDX ? ( PUSHLr(_EAX), jit_mulr_i_((s1), (s2)), POPLr(_EAX) ) : \ - ((d) == _EAX ? (PUSHLr(_EDX), jit_mulr_i_((s1), (s2)), MOVLrr(_EDX, _EAX), POPLr(_EDX) ) : \ - (PUSHLr(_EDX), PUSHLr(_EAX), jit_mulr_i_((s1), (s2)), MOVLrr(_EDX, (d)), POPLr(_EAX), POPLr(_EDX) ))) + ((d) == _EDX ? ( jit_pushr_i(_EAX), jit_mulr_i_((s1), (s2)), jit_popr_i(_EAX) ) : \ + ((d) == _EAX ? (jit_pushr_i(_EDX), jit_mulr_i_((s1), (s2)), MOVLrr(_EDX, _EAX), jit_popr_i(_EDX) ) : \ + (jit_pushr_i(_EDX), jit_pushr_i(_EAX), jit_mulr_i_((s1), (s2)), MOVLrr(_EDX, (d)), jit_popr_i(_EAX), jit_popr_i(_EDX) ))) #define jit_hmuli_ui(d, rs, is) \ - ((d) == _EDX ? ( PUSHLr(_EAX), jit_muli_ui_((is), (rs)), POPLr(_EAX) ) : \ - ((d) == _EAX ? (PUSHLr(_EDX), jit_muli_ui_((is), (rs)), MOVLrr(_EDX, _EAX), POPLr(_EDX) ) : \ - (PUSHLr(_EDX), PUSHLr(_EAX), jit_muli_ui_((is), (rs)), MOVLrr(_EDX, (d)), POPLr(_EAX), POPLr(_EDX) ))) + ((d) == _EDX ? ( jit_pushr_i(_EAX), jit_muli_ui_((is), (rs)), jit_popr_i(_EAX) ) : \ + ((d) == _EAX ? (jit_pushr_i(_EDX), jit_muli_ui_((is), (rs)), MOVLrr(_EDX, _EAX), jit_popr_i(_EDX) ) : \ + (jit_pushr_i(_EDX), jit_pushr_i(_EAX), jit_muli_ui_((is), (rs)), MOVLrr(_EDX, (d)), jit_popr_i(_EAX), jit_popr_i(_EDX) ))) #define jit_hmulr_ui(d, s1, s2) \ - ((d) == _EDX ? ( PUSHLr(_EAX), jit_mulr_ui_((s1), (s2)), POPLr(_EAX) ) : \ - ((d) == _EAX ? (PUSHLr(_EDX), jit_mulr_ui_((s1), (s2)), MOVLrr(_EDX, _EAX), POPLr(_EDX) ) : \ - (PUSHLr(_EDX), PUSHLr(_EAX), jit_mulr_ui_((s1), (s2)), MOVLrr(_EDX, (d)), POPLr(_EAX), POPLr(_EDX) ))) + ((d) == _EDX ? ( jit_pushr_i(_EAX), jit_mulr_ui_((s1), (s2)), jit_popr_i(_EAX) ) : \ + ((d) == _EAX ? (jit_pushr_i(_EDX), jit_mulr_ui_((s1), (s2)), MOVLrr(_EDX, _EAX), jit_popr_i(_EDX) ) : \ + (jit_pushr_i(_EDX), jit_pushr_i(_EAX), jit_mulr_ui_((s1), (s2)), MOVLrr(_EDX, (d)), jit_popr_i(_EAX), jit_popr_i(_EDX) ))) #define jit_divi_i(d, rs, is) jit_divi_i_(_EAX, (d), (rs), (is)) #define jit_divi_ui(d, rs, is) jit_divi_ui_(_EAX, (d), (rs), (is)) @@ -250,49 +230,42 @@ struct jit_local_state { /* Shifts */ +#define jit_shift(d, s1, s2, m) \ + ((d) == _ECX || (d) == (s2) \ + ? ((s2) == _EAX \ + ? jit_fixd(d, _EDX, jit_shift2(_EDX, s1, s2, m)) \ + : jit_fixd(d, _EAX, jit_shift2(_EAX, s1, s2, m))) \ + : jit_shift2(d, s1, s2, m)) + +/* Shift operation, assuming d != s2 or ECX */ +#define jit_shift2(d, s1, s2, m) \ + jit_op_(d, s1, jit_cfixs(s2, _ECX, m(_CL, d))) + +/* Substitute x for destination register d */ +#define jit_fixd(d, x, op) \ + (jit_pushr_i(x), op, jit_movr_i(d, x), jit_popr_i(x)) + +/* Conditionally substitute y for source register s */ +#define jit_cfixs(s, y, op) \ + ((s) == (y) ? op : \ + (jit_pushr_i(y), jit_movr_i(y, s), op, jit_popr_i(y))) + #define jit_lshi_i(d, rs, is) ((is) <= 3 ? LEALmr(0, 0, (rs), 1 << (is), (d)) : jit_op_ ((d), (rs), SHLLir((is), (d)) )) #define jit_rshi_i(d, rs, is) jit_op_ ((d), (rs), SARLir((is), (d)) ) #define jit_rshi_ui(d, rs, is) jit_op_ ((d), (rs), SHRLir((is), (d)) ) -#define jit_lshr_i(d, r1, r2) jit_replace((r1), (r2), _ECX, jit_op_ ((d), (r1), SHLLrr(_CL, (d)) )) -#define jit_rshr_i(d, r1, r2) jit_replace((r1), (r2), _ECX, jit_op_ ((d), (r1), SARLrr(_CL, (d)) )) -#define jit_rshr_ui(d, r1, r2) jit_replace((r1), (r2), _ECX, jit_op_ ((d), (r1), SHRLrr(_CL, (d)) )) +#define jit_lshr_i(d, r1, r2) jit_shift((d), (r1), (r2), SHLLrr) +#define jit_rshr_i(d, r1, r2) jit_shift((d), (r1), (r2), SARLrr) +#define jit_rshr_ui(d, r1, r2) jit_shift((d), (r1), (r2), SHRLrr) /* Stack */ -#define jit_pushr_i(rs) PUSHLr(rs) -#define jit_popr_i(rs) POPLr(rs) -#define jit_prolog(n) (_jitl.framesize = 8, PUSHLr(_EBP), MOVLrr(_ESP, _EBP), PUSHLr(_EBX), PUSHLr(_ESI), PUSHLr(_EDI)) - -/* The += allows for stack pollution */ - -#define jit_prepare_i(ni) (_jitl.argssize += (ni)) -#define jit_prepare_f(nf) (_jitl.argssize += (nf)) -#define jit_prepare_d(nd) (_jitl.argssize += 2 * (nd)) -#define jit_pusharg_i(rs) PUSHLr(rs) -#define jit_finish(sub) (jit_calli((sub)), ADDLir(4 * _jitl.argssize, JIT_SP), _jitl.argssize = 0) -#define jit_finishr(reg) (jit_callr((reg)), ADDLir(4 * _jitl.argssize, JIT_SP), _jitl.argssize = 0) -#define jit_retval_i(rd) jit_movr_i ((rd), _EAX) - -#define jit_arg_c() ((_jitl.framesize += sizeof(int)) - sizeof(int)) -#define jit_arg_uc() ((_jitl.framesize += sizeof(int)) - sizeof(int)) -#define jit_arg_s() ((_jitl.framesize += sizeof(int)) - sizeof(int)) -#define jit_arg_us() ((_jitl.framesize += sizeof(int)) - sizeof(int)) -#define jit_arg_i() ((_jitl.framesize += sizeof(int)) - sizeof(int)) -#define jit_arg_ui() ((_jitl.framesize += sizeof(int)) - sizeof(int)) -#define jit_arg_l() ((_jitl.framesize += sizeof(long)) - sizeof(long)) -#define jit_arg_ul() ((_jitl.framesize += sizeof(long)) - sizeof(long)) -#define jit_arg_p() ((_jitl.framesize += sizeof(long)) - sizeof(long)) - -#define jit_arg_f() ((_jitl.framesize += sizeof(float)) - sizeof(float)) -#define jit_arg_d() ((_jitl.framesize += sizeof(double)) - sizeof(double)) +#define jit_retval_i(rd) ((void)jit_movr_i ((rd), _EAX)) /* Unary */ #define jit_negr_i(d, rs) jit_opi_((d), (rs), NEGLr(d), (XORLrr((d), (d)), SUBLrr((rs), (d))) ) -#define jit_negr_l(d, rs) jit_opi_((d), (rs), NEGLr(d), (XORLrr((d), (d)), SUBLrr((rs), (d))) ) -#define jit_movr_i(d, rs) ((rs) == (d) ? 0 : MOVLrr((rs), (d))) +#define jit_movr_i(d, rs) ((void)((rs) == (d) ? 0 : MOVLrr((rs), (d)))) #define jit_movi_i(d, is) ((is) ? MOVLir((is), (d)) : XORLrr ((d), (d)) ) -#define jit_movi_p(d, is) (MOVLir((is), (d)), _jit.x.pc) -#define jit_patch_movi(pa,pv) (*_PSL((pa) - 4) = _jit_SL((pv))) +#define jit_patch_movi(pa,pv) (*_PSL((pa) - sizeof(long)) = _jit_SL((pv))) #define jit_ntoh_ui(d, rs) jit_op_((d), (rs), BSWAPLr(d)) #define jit_ntoh_us(d, rs) jit_op_((d), (rs), RORWir(8, d)) @@ -321,93 +294,73 @@ struct jit_local_state { #define jit_gei_ui(d, rs, is) jit_bool_i0((d), (rs), (is), SETAEr, INCLr ) /* Jump */ -#define jit_bltr_i(label, s1, s2) jit_bra_r((s1), (s2), JLm(label, 0,0,0) ) -#define jit_bler_i(label, s1, s2) jit_bra_r((s1), (s2), JLEm(label,0,0,0) ) -#define jit_bgtr_i(label, s1, s2) jit_bra_r((s1), (s2), JGm(label, 0,0,0) ) -#define jit_bger_i(label, s1, s2) jit_bra_r((s1), (s2), JGEm(label,0,0,0) ) -#define jit_beqr_i(label, s1, s2) jit_bra_r((s1), (s2), JEm(label, 0,0,0) ) -#define jit_bner_i(label, s1, s2) jit_bra_r((s1), (s2), JNEm(label,0,0,0) ) -#define jit_bltr_ui(label, s1, s2) jit_bra_r((s1), (s2), JBm(label, 0,0,0) ) -#define jit_bler_ui(label, s1, s2) jit_bra_r((s1), (s2), JBEm(label,0,0,0) ) -#define jit_bgtr_ui(label, s1, s2) jit_bra_r((s1), (s2), JAm(label, 0,0,0) ) -#define jit_bger_ui(label, s1, s2) jit_bra_r((s1), (s2), JAEm(label,0,0,0) ) -#define jit_bmsr_i(label, s1, s2) (TESTLrr((s1), (s2)), JNZm(label,0,0,0), _jit.x.pc) -#define jit_bmcr_i(label, s1, s2) (TESTLrr((s1), (s2)), JZm(label,0,0,0), _jit.x.pc) -#define jit_boaddr_i(label, s1, s2) (ADDLrr((s2), (s1)), JOm(label,0,0,0), _jit.x.pc) -#define jit_bosubr_i(label, s1, s2) (SUBLrr((s2), (s1)), JOm(label,0,0,0), _jit.x.pc) -#define jit_boaddr_ui(label, s1, s2) (ADDLrr((s2), (s1)), JCm(label,0,0,0), _jit.x.pc) -#define jit_bosubr_ui(label, s1, s2) (SUBLrr((s2), (s1)), JCm(label,0,0,0), _jit.x.pc) - -#define jit_blti_i(label, rs, is) jit_bra_i0((rs), (is), JLm(label, 0,0,0), JSm(label, 0,0,0) ) -#define jit_blei_i(label, rs, is) jit_bra_i ((rs), (is), JLEm(label,0,0,0) ) -#define jit_bgti_i(label, rs, is) jit_bra_i ((rs), (is), JGm(label, 0,0,0) ) -#define jit_bgei_i(label, rs, is) jit_bra_i0((rs), (is), JGEm(label,0,0,0), JNSm(label,0,0,0) ) -#define jit_beqi_i(label, rs, is) jit_bra_i0((rs), (is), JEm(label, 0,0,0), JEm(label, 0,0,0) ) -#define jit_bnei_i(label, rs, is) jit_bra_i0((rs), (is), JNEm(label,0,0,0), JNEm(label,0,0,0) ) -#define jit_blti_ui(label, rs, is) jit_bra_i ((rs), (is), JBm(label, 0,0,0) ) -#define jit_blei_ui(label, rs, is) jit_bra_i0((rs), (is), JBEm(label,0,0,0), JEm(label, 0,0,0) ) -#define jit_bgti_ui(label, rs, is) jit_bra_i0((rs), (is), JAm(label, 0,0,0), JNEm(label,0,0,0) ) -#define jit_bgei_ui(label, rs, is) jit_bra_i ((rs), (is), JAEm(label,0,0,0) ) -#define jit_boaddi_i(label, rs, is) (ADDLir((is), (rs)), JOm(label,0,0,0), _jit.x.pc) -#define jit_bosubi_i(label, rs, is) (SUBLir((is), (rs)), JOm(label,0,0,0), _jit.x.pc) -#define jit_boaddi_ui(label, rs, is) (ADDLir((is), (rs)), JCm(label,0,0,0), _jit.x.pc) -#define jit_bosubi_ui(label, rs, is) (SUBLir((is), (rs)), JCm(label,0,0,0), _jit.x.pc) - -#define jit_bmsi_i(label, rs, is) (jit_reduce(TEST, (is), (rs)), JNZm(label,0,0,0), _jit.x.pc) -#define jit_bmci_i(label, rs, is) (jit_reduce(TEST, (is), (rs)), JZm(label,0,0,0), _jit.x.pc) - -#define jit_jmpi(label) (JMPm( ((unsigned long) (label)), 0, 0, 0), _jit.x.pc) -#define jit_calli(label) (CALLm( ((unsigned long) (label)), 0, 0, 0), _jit.x.pc) -#define jit_callr(reg) (CALLsr(reg)) -#define jit_jmpr(reg) JMPsr(reg) -#define jit_patch_at(jump_pc,v) (*_PSL((jump_pc) - 4) = _jit_SL((v) - (jump_pc))) -#define jit_ret() (POPLr(_EDI), POPLr(_ESI), POPLr(_EBX), POPLr(_EBP), RET_()) +#define jit_bltr_i(label, s1, s2) jit_bra_r((s1), (s2), JLm(label) ) +#define jit_bler_i(label, s1, s2) jit_bra_r((s1), (s2), JLEm(label) ) +#define jit_bgtr_i(label, s1, s2) jit_bra_r((s1), (s2), JGm(label) ) +#define jit_bger_i(label, s1, s2) jit_bra_r((s1), (s2), JGEm(label) ) +#define jit_beqr_i(label, s1, s2) jit_bra_r((s1), (s2), JEm(label) ) +#define jit_bner_i(label, s1, s2) jit_bra_r((s1), (s2), JNEm(label) ) +#define jit_bltr_ui(label, s1, s2) jit_bra_r((s1), (s2), JBm(label) ) +#define jit_bler_ui(label, s1, s2) jit_bra_r((s1), (s2), JBEm(label) ) +#define jit_bgtr_ui(label, s1, s2) jit_bra_r((s1), (s2), JAm(label) ) +#define jit_bger_ui(label, s1, s2) jit_bra_r((s1), (s2), JAEm(label) ) +#define jit_bmsr_i(label, s1, s2) (TESTLrr((s1), (s2)), JNZm(label), _jit.x.pc) +#define jit_bmcr_i(label, s1, s2) (TESTLrr((s1), (s2)), JZm(label), _jit.x.pc) +#define jit_boaddr_i(label, s1, s2) (ADDLrr((s2), (s1)), JOm(label), _jit.x.pc) +#define jit_bosubr_i(label, s1, s2) (SUBLrr((s2), (s1)), JOm(label), _jit.x.pc) +#define jit_boaddr_ui(label, s1, s2) (ADDLrr((s2), (s1)), JCm(label), _jit.x.pc) +#define jit_bosubr_ui(label, s1, s2) (SUBLrr((s2), (s1)), JCm(label), _jit.x.pc) + +#define jit_blti_i(label, rs, is) jit_bra_i0((rs), (is), JLm(label), JSm(label) ) +#define jit_blei_i(label, rs, is) jit_bra_i ((rs), (is), JLEm(label) ) +#define jit_bgti_i(label, rs, is) jit_bra_i ((rs), (is), JGm(label) ) +#define jit_bgei_i(label, rs, is) jit_bra_i0((rs), (is), JGEm(label), JNSm(label) ) +#define jit_beqi_i(label, rs, is) jit_bra_i0((rs), (is), JEm(label), JEm(label) ) +#define jit_bnei_i(label, rs, is) jit_bra_i0((rs), (is), JNEm(label), JNEm(label) ) +#define jit_blti_ui(label, rs, is) jit_bra_i ((rs), (is), JBm(label) ) +#define jit_blei_ui(label, rs, is) jit_bra_i0((rs), (is), JBEm(label), JEm(label) ) +#define jit_bgti_ui(label, rs, is) jit_bra_i0((rs), (is), JAm(label), JNEm(label) ) +#define jit_bgei_ui(label, rs, is) jit_bra_i ((rs), (is), JAEm(label) ) +#define jit_boaddi_i(label, rs, is) (ADDLir((is), (rs)), JOm(label), _jit.x.pc) +#define jit_bosubi_i(label, rs, is) (SUBLir((is), (rs)), JOm(label), _jit.x.pc) +#define jit_boaddi_ui(label, rs, is) (ADDLir((is), (rs)), JCm(label), _jit.x.pc) +#define jit_bosubi_ui(label, rs, is) (SUBLir((is), (rs)), JCm(label), _jit.x.pc) + +#define jit_bmsi_i(label, rs, is) (jit_reduce(TEST, (is), (rs)), JNZm(label), _jit.x.pc) +#define jit_bmci_i(label, rs, is) (jit_reduce(TEST, (is), (rs)), JZm(label), _jit.x.pc) + +#define jit_jmpi(label) (JMPm( ((unsigned long) (label))), _jit.x.pc) +#define jit_jmpr(reg) JMPsr(reg) /* Memory */ -#define jit_ldi_c(d, is) MOVSBLmr((is), 0, 0, 0, (d)) -#define jit_ldr_c(d, rs) MOVSBLmr(0, (rs), 0, 0, (d)) -#define jit_ldxr_c(d, s1, s2) MOVSBLmr(0, (s1), (s2), 1, (d)) -#define jit_ldxi_c(d, rs, is) MOVSBLmr((is), (rs), 0, 0, (d)) - -#define jit_ldi_uc(d, is) MOVZBLmr((is), 0, 0, 0, (d)) -#define jit_ldr_uc(d, rs) MOVZBLmr(0, (rs), 0, 0, (d)) -#define jit_ldxr_uc(d, s1, s2) MOVZBLmr(0, (s1), (s2), 1, (d)) -#define jit_ldxi_uc(d, rs, is) MOVZBLmr((is), (rs), 0, 0, (d)) - -#define jit_sti_c(id, rs) jit_movbrm((rs), (id), 0, 0, 0) +#define jit_ldr_uc(d, rs) MOVZBLmr(0, (rs), 0, 0, (d)) +#define jit_ldxr_uc(d, s1, s2) MOVZBLmr(0, (s1), (s2), 1, (d)) + #define jit_str_c(rd, rs) jit_movbrm((rs), 0, (rd), 0, 0) #define jit_stxr_c(d1, d2, rs) jit_movbrm((rs), 0, (d1), (d2), 1) -#define jit_stxi_c(id, rd, rs) jit_movbrm((rs), (id), (rd), 0, 0) - -#define jit_ldi_s(d, is) MOVSWLmr((is), 0, 0, 0, (d)) -#define jit_ldr_s(d, rs) MOVSWLmr(0, (rs), 0, 0, (d)) -#define jit_ldxr_s(d, s1, s2) MOVSWLmr(0, (s1), (s2), 1, (d)) -#define jit_ldxi_s(d, rs, is) MOVSWLmr((is), (rs), 0, 0, (d)) - -#define jit_ldi_us(d, is) MOVZWLmr((is), 0, 0, 0, (d)) -#define jit_ldr_us(d, rs) MOVZWLmr(0, (rs), 0, 0, (d)) -#define jit_ldxr_us(d, s1, s2) MOVZWLmr(0, (s1), (s2), 1, (d)) -#define jit_ldxi_us(d, rs, is) MOVZWLmr((is), (rs), 0, 0, (d)) - -#define jit_sti_s(id, rs) MOVWrm(jit_reg16(rs), (id), 0, 0, 0) -#define jit_str_s(rd, rs) MOVWrm(jit_reg16(rs), 0, (rd), 0, 0) -#define jit_stxr_s(d1, d2, rs) MOVWrm(jit_reg16(rs), 0, (d1), (d2), 1) -#define jit_stxi_s(id, rd, rs) MOVWrm(jit_reg16(rs), (id), (rd), 0, 0) - -#define jit_ldi_i(d, is) MOVLmr((is), 0, 0, 0, (d)) -#define jit_ldr_i(d, rs) MOVLmr(0, (rs), 0, 0, (d)) -#define jit_ldxr_i(d, s1, s2) MOVLmr(0, (s1), (s2), 1, (d)) -#define jit_ldxi_i(d, rs, is) MOVLmr((is), (rs), 0, 0, (d)) - -#define jit_sti_i(id, rs) MOVLrm((rs), (id), 0, 0, 0) -#define jit_str_i(rd, rs) MOVLrm((rs), 0, (rd), 0, 0) -#define jit_stxr_i(d1, d2, rs) MOVLrm((rs), 0, (d1), (d2), 1) -#define jit_stxi_i(id, rd, rs) MOVLrm((rs), (id), (rd), 0, 0) - + +#define jit_ldr_us(d, rs) MOVZWLmr(0, (rs), 0, 0, (d)) +#define jit_ldxr_us(d, s1, s2) MOVZWLmr(0, (s1), (s2), 1, (d)) + +#define jit_str_s(rd, rs) MOVWrm(jit_reg16(rs), 0, (rd), 0, 0) +#define jit_stxr_s(d1, d2, rs) MOVWrm(jit_reg16(rs), 0, (d1), (d2), 1) + +#define jit_str_i(rd, rs) MOVLrm((rs), 0, (rd), 0, 0) +#define jit_stxr_i(d1, d2, rs) MOVLrm((rs), 0, (d1), (d2), 1) + /* Extra */ #define jit_nop() NOP_() #define _jit_alignment(pc, n) (((pc ^ _MASK(4)) + 1) & _MASK(n)) #define jit_align(n) NOPi(_jit_alignment(_jit_UL(_jit.x.pc), (n))) -#endif /* __lightning_core_h */ + +#if LIGHTNING_CROSS \ + ? LIGHTNING_TARGET == LIGHTNING_X86_64 \ + : defined (__x86_64__) +#include "i386/core-64.h" +#else +#include "i386/core-32.h" +#endif + +#endif /* __lightning_core_i386_h */ diff --git a/src/runtime/c/pgf/lightning/i386/fp-32.h b/src/runtime/c/pgf/lightning/i386/fp-32.h new file mode 100644 index 000000000..009afc649 --- /dev/null +++ b/src/runtime/c/pgf/lightning/i386/fp-32.h @@ -0,0 +1,356 @@ +/******************************** -*- C -*- **************************** + * + * Support macros for the i386 math coprocessor + * + ***********************************************************************/ + + +/*********************************************************************** + * + * Copyright 2000, 2001, 2002, 2004, 2008 Free Software Foundation, Inc. + * Written by Paolo Bonzini. + * + * This file is part of GNU lightning. + * + * GNU lightning is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU lightning is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with GNU lightning; see the file COPYING.LESSER; if not, write to the + * Free Software Foundation, 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + * + ***********************************************************************/ + + +#ifndef __lightning_fp_h +#define __lightning_fp_h + +/* We really must map the x87 stack onto a flat register file. In practice, + we can provide something sensible and make it work on the x86 using the + stack like a file of eight registers. + + We use six or seven registers so as to have some freedom + for floor, ceil, round, (and log, tan, atn and exp). + + Not hard at all, basically play with FXCH. FXCH is mostly free, + so the generated code is not bad. Of course we special case when one + of the operands turns out to be ST0. + + Here are the macros that actually do the trick. */ + +#define JIT_FPR_NUM 6 +#define JIT_FPRET 0 +#define JIT_FPR(i) (i) + +#define jit_fxch(rs, op) (((rs) != 0 ? FXCHr(rs) : 0), \ + op, ((rs) != 0 ? FXCHr(rs) : 0)) + +#define jit_fp_unary(rd, s1, op) \ + ((rd) == (s1) ? jit_fxch ((rd), op) \ + : (rd) == 0 ? (FSTPr (0), FLDr ((s1)-1), op) \ + : (FLDr ((s1)), op, FSTPr ((rd) + 1))) + +#define jit_fp_binary(rd, s1, s2, op, opr) \ + ((rd) == (s1) ? \ + ((s2) == 0 ? opr(0, (rd)) \ + : (s2) == (s1) ? jit_fxch((rd), op(0, 0)) \ + : jit_fxch((rd), op((s2), 0))) \ + : (rd) == (s2) ? \ + ((s1) == 0 ? op(0, (rd)) \ + : jit_fxch((rd), opr((s1), 0))) \ + : (FLDr (s1), op((s2)+1, 0), FSTPr((rd)+1))) + +#define jit_addr_d(rd,s1,s2) jit_fp_binary((rd),(s1),(s2),FADDrr,FADDrr) +#define jit_subr_d(rd,s1,s2) jit_fp_binary((rd),(s1),(s2),FSUBrr,FSUBRrr) +#define jit_mulr_d(rd,s1,s2) jit_fp_binary((rd),(s1),(s2),FMULrr,FMULrr) +#define jit_divr_d(rd,s1,s2) jit_fp_binary((rd),(s1),(s2),FDIVrr,FDIVRrr) + +#define jit_abs_d(rd,rs) jit_fp_unary ((rd), (rs), _OO (0xd9e1)) +#define jit_negr_d(rd,rs) jit_fp_unary ((rd), (rs), _OO (0xd9e0)) +#define jit_sqrt_d(rd,rs) jit_fp_unary ((rd), (rs), _OO (0xd9fa)) + +/* - moves: + + move FPR0 to FPR3 + FST ST3 + + move FPR3 to FPR0 + FXCH ST3 + FST ST3 + + move FPR3 to FPR1 + FLD ST3 + FSTP ST2 Stack is rotated, so FPRn becomes STn+1 */ + +#define jit_movr_d(rd,s1) \ + ((s1) == (rd) ? 0 \ + : (s1) == 0 ? FSTr ((rd)) \ + : (rd) == 0 ? (FXCHr ((s1)), FSTr ((s1))) \ + : (FLDr ((s1)), FSTPr ((rd)+1))) + +/* - loads: + + load into FPR0 + FSTP ST0 + FLD [FUBAR] + + load into FPR3 + FSTP ST3 Save old st0 into destination register + FLD [FUBAR] + FXCH ST3 Get back old st0 + + (and similarly for immediates, using the stack) */ + +#define jit_movi_f(rd,immf) \ + (_O (0x68), \ + *((float *) _jit.x.pc) = (float) immf, \ + _jit.x.uc_pc += sizeof (float), \ + jit_ldr_f((rd), _ESP), \ + ADDLir(4, _ESP)) + +union jit_double_imm { + double d; + int i[2]; +}; + +#define jit_movi_d(rd,immd) \ + (_O (0x68), \ + _jit.x.uc_pc[4] = 0x68, \ + ((union jit_double_imm *) (_jit.x.uc_pc + 5))->d = (double) immd, \ + *((int *) _jit.x.uc_pc) = ((union jit_double_imm *) (_jit.x.uc_pc + 5))->i[1], \ + _jit.x.uc_pc += 9, \ + jit_ldr_d((rd), _ESP), \ + ADDLir(8, _ESP)) + +#define jit_ldi_f(rd, is) \ + ((rd) == 0 ? (FSTPr (0), FLDSm((is), 0, 0, 0)) \ + : (FLDSm((is), 0, 0, 0), FSTPr ((rd) + 1))) + +#define jit_ldi_d(rd, is) \ + ((rd) == 0 ? (FSTPr (0), FLDLm((is), 0, 0, 0)) \ + : (FLDLm((is), 0, 0, 0), FSTPr ((rd) + 1))) + +#define jit_ldr_f(rd, rs) \ + ((rd) == 0 ? (FSTPr (0), FLDSm(0, (rs), 0, 0)) \ + : (FLDSm(0, (rs), 0, 0), FSTPr ((rd) + 1))) + +#define jit_ldr_d(rd, rs) \ + ((rd) == 0 ? (FSTPr (0), FLDLm(0, (rs), 0, 0)) \ + : (FLDLm(0, (rs), 0, 0), FSTPr ((rd) + 1))) + +#define jit_ldxi_f(rd, rs, is) \ + ((rd) == 0 ? (FSTPr (0), FLDSm((is), (rs), 0, 0)) \ + : (FLDSm((is), (rs), 0, 0), FSTPr ((rd) + 1))) + +#define jit_ldxi_d(rd, rs, is) \ + ((rd) == 0 ? (FSTPr (0), FLDLm((is), (rs), 0, 0)) \ + : (FLDLm((is), (rs), 0, 0), FSTPr ((rd) + 1))) + +#define jit_ldxr_f(rd, s1, s2) \ + ((rd) == 0 ? (FSTPr (0), FLDSm(0, (s1), (s2), 1)) \ + : (FLDSm(0, (s1), (s2), 1), FSTPr ((rd) + 1))) + +#define jit_ldxr_d(rd, s1, s2) \ + ((rd) == 0 ? (FSTPr (0), FLDLm(0, (s1), (s2), 1)) \ + : (FLDLm(0, (s1), (s2), 1), FSTPr ((rd) + 1))) + +#define jit_extr_i_d(rd, rs) (PUSHLr((rs)), \ + ((rd) == 0 ? (FSTPr (0), FILDLm(0, _ESP, 0, 0)) \ + : (FILDLm(0, _ESP, 0, 0), FSTPr ((rd) + 1))), \ + POPLr((rs))) + +#define jit_stxi_f(id, rd, rs) jit_fxch ((rs), FSTSm((id), (rd), 0, 0)) +#define jit_stxr_f(d1, d2, rs) jit_fxch ((rs), FSTSm(0, (d1), (d2), 1)) +#define jit_stxi_d(id, rd, rs) jit_fxch ((rs), FSTLm((id), (rd), 0, 0)) +#define jit_stxr_d(d1, d2, rs) jit_fxch ((rs), FSTLm(0, (d1), (d2), 1)) +#define jit_sti_f(id, rs) jit_fxch ((rs), FSTSm((id), 0, 0, 0)) +#define jit_str_f(rd, rs) jit_fxch ((rs), FSTSm(0, (rd), 0, 0)) +#define jit_sti_d(id, rs) jit_fxch ((rs), FSTLm((id), 0, 0, 0)) +#define jit_str_d(rd, rs) jit_fxch ((rs), FSTLm(0, (rd), 0, 0)) + +/* ABI */ +#define jit_retval_d(rd) FSTPr((rd) + 1) + +/* Assume round to near mode */ +#define jit_floorr_d_i(rd, rs) \ + (FLDr (rs), jit_floor2((rd), ((rd) == _EDX ? _EAX : _EDX))) + +#define jit_ceilr_d_i(rd, rs) \ + (FLDr (rs), jit_ceil2((rd), ((rd) == _EDX ? _EAX : _EDX))) + +#define jit_truncr_d_i(rd, rs) \ + (FLDr (rs), jit_trunc2((rd), ((rd) == _EDX ? _EAX : _EDX))) + +#define jit_calc_diff(ofs) \ + FISTLm(ofs, _ESP, 0, 0), \ + FILDLm(ofs, _ESP, 0, 0), \ + FSUBRPr(1), \ + FSTPSm(4+ofs, _ESP, 0, 0) \ + +/* The real meat */ +#define jit_floor2(rd, aux) \ + (PUSHLr(aux), \ + SUBLir(8, _ESP), \ + jit_calc_diff(0), \ + POPLr(rd), /* floor in rd */ \ + POPLr(aux), /* x-round(x) in aux */ \ + ADDLir(0x7FFFFFFF, aux), /* carry if x-round(x) < -0 */ \ + SBBLir(0, rd), /* subtract 1 if carry */ \ + POPLr(aux)) + +#define jit_ceil2(rd, aux) \ + (PUSHLr(aux), \ + SUBLir(8, _ESP), \ + jit_calc_diff(0), \ + POPLr(rd), /* floor in rd */ \ + POPLr(aux), /* x-round(x) in aux */ \ + TESTLrr(aux, aux), \ + SETGr(jit_reg8(aux)), \ + SHRLir(1, aux), \ + ADCLir(0, rd), \ + POPLr(aux)) + +/* a mingling of the two above */ +#define jit_trunc2(rd, aux) \ + (PUSHLr(aux), \ + SUBLir(12, _ESP), \ + FSTSm(0, _ESP, 0, 0), \ + jit_calc_diff(4), \ + POPLr(aux), \ + POPLr(rd), \ + TESTLrr(aux, aux), \ + POPLr(aux), \ + JSSm(_jit.x.pc + 11), \ + ADDLir(0x7FFFFFFF, aux), /* 6 */ \ + SBBLir(0, rd), /* 3 */ \ + JMPSm(_jit.x.pc + 10), /* 2 */ \ + TESTLrr(aux, aux), /* 2 */ \ + SETGr(jit_reg8(aux)), /* 3 */ \ + SHRLir(1, aux), /* 2 */ \ + ADCLir(0, rd), /* 3 */ \ + POPLr(aux)) + +/* the easy one */ +#define jit_roundr_d_i(rd, rs) \ + (PUSHLr(_EAX), \ + jit_fxch ((rs), FISTLm(0, _ESP, 0, 0)), \ + POPLr((rd))) + +#define jit_fp_test(d, s1, s2, n, _and, res) \ + (((s1) == 0 ? FUCOMr((s2)) : (FLDr((s1)), FUCOMPr((s2) + 1))), \ + ((d) != _EAX ? MOVLrr(_EAX, (d)) : 0), \ + FNSTSWr(_EAX), \ + SHRLir(n, _EAX), \ + ((_and) ? ANDLir((_and), _EAX) : MOVLir(0, _EAX)), \ + res, \ + ((d) != _EAX ? _O (0x90 + ((d) & 7)) : 0)) /* xchg */ + +#define jit_fp_btest(d, s1, s2, n, _and, cmp, res) \ + (((s1) == 0 ? FUCOMr((s2)) : (FLDr((s1)), FUCOMPr((s2) + 1))), \ + PUSHLr(_EAX), \ + FNSTSWr(_EAX), \ + SHRLir(n, _EAX), \ + ((_and) ? ANDLir ((_and), _EAX) : 0), \ + ((cmp) ? CMPLir ((cmp), _EAX) : 0), \ + POPLr(_EAX), \ + res ((d)), \ + _jit.x.pc) + +#define jit_nothing_needed(x) + +/* After FNSTSW we have 1 if <, 40 if =, 0 if >, 45 if unordered. Here + is how to map the values of the status word's high byte to the + conditions. + + < = > unord valid values condition + gt no no yes no 0 STSW & 45 == 0 + lt yes no no no 1 STSW & 45 == 1 + eq no yes no no 40 STSW & 45 == 40 + unord no no no yes 45 bit 2 == 1 + + ge no yes no no 0, 40 bit 0 == 0 + unlt yes no no yes 1, 45 bit 0 == 1 + ltgt yes no yes no 0, 1 bit 6 == 0 + uneq no yes no yes 40, 45 bit 6 == 1 + le yes yes no no 1, 40 odd parity for STSW & 41 + ungt no no yes yes 0, 45 even parity for STSW & 41 + + unle yes yes no yes 1, 40, 45 STSW & 45 != 0 + unge no yes yes yes 0, 40, 45 STSW & 45 != 1 + ne yes no yes yes 0, 1, 45 STSW & 45 != 40 + ord yes yes yes no 0, 1, 40 bit 2 == 0 + + lt, le, ungt, unge are actually computed as gt, ge, unlt, unle with + the operands swapped; it is more efficient this way. */ + +#define jit_gtr_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 8, 0x45, SETZr (_AL)) +#define jit_ger_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 9, 0, SBBBir (-1, _AL)) +#define jit_unler_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 8, 0x45, SETNZr (_AL)) +#define jit_unltr_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 9, 0, ADCBir (0, _AL)) +#define jit_ltr_d(d, s1, s2) jit_fp_test((d), (s2), (s1), 8, 0x45, SETZr (_AL)) +#define jit_ler_d(d, s1, s2) jit_fp_test((d), (s2), (s1), 9, 0, SBBBir (-1, _AL)) +#define jit_unger_d(d, s1, s2) jit_fp_test((d), (s2), (s1), 8, 0x45, SETNZr (_AL)) +#define jit_ungtr_d(d, s1, s2) jit_fp_test((d), (s2), (s1), 9, 0, ADCBir (0, _AL)) +#define jit_eqr_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 8, 0x45, (CMPBir (0x40, _AL), SETEr (_AL))) +#define jit_ner_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 8, 0x45, (CMPBir (0x40, _AL), SETNEr (_AL))) +#define jit_ltgtr_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 15, 0, SBBBir (-1, _AL)) +#define jit_uneqr_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 15, 0, ADCBir (0, _AL)) +#define jit_ordr_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 11, 0, SBBBir (-1, _AL)) +#define jit_unordr_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 11, 0, ADCBir (0, _AL)) + +#define jit_bgtr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 8, 0x45, 0, JZm) +#define jit_bger_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 9, 0, 0, JNCm) +#define jit_bunler_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 8, 0x45, 0, JNZm) +#define jit_bunltr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 9, 0, 0, JCm) +#define jit_bltr_d(d, s1, s2) jit_fp_btest((d), (s2), (s1), 8, 0x45, 0, JZm) +#define jit_bler_d(d, s1, s2) jit_fp_btest((d), (s2), (s1), 9, 0, 0, JNCm) +#define jit_bunger_d(d, s1, s2) jit_fp_btest((d), (s2), (s1), 8, 0x45, 0, JNZm) +#define jit_bungtr_d(d, s1, s2) jit_fp_btest((d), (s2), (s1), 9, 0, 0, JCm) +#define jit_beqr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 8, 0x45, 0x40, JZm) +#define jit_bner_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 8, 0x45, 0x40, JNZm) +#define jit_bltgtr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 15, 0, 0, JNCm) +#define jit_buneqr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 15, 0, 0, JCm) +#define jit_bordr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 11, 0, 0, JNCm) +#define jit_bunordr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 11, 0, 0, JCm) + +#define jit_pusharg_d(rs) (jit_subi_i(JIT_SP,JIT_SP,sizeof(double)), jit_str_d(JIT_SP,(rs))) +#define jit_pusharg_f(rs) (jit_subi_i(JIT_SP,JIT_SP,sizeof(float)), jit_str_f(JIT_SP,(rs))) + + +#if 0 +#define jit_sin() _OO(0xd9fe) /* fsin */ +#define jit_cos() _OO(0xd9ff) /* fcos */ +#define jit_tan() (_OO(0xd9f2), /* fptan */ \ + FSTPr(0)) /* fstp st */ +#define jit_atn() (_OO(0xd9e8), /* fld1 */ \ + _OO(0xd9f3)) /* fpatan */ +#define jit_exp() (_OO(0xd9ea), /* fldl2e */ \ + FMULPr(1), /* fmulp */ \ + _OO(0xd9c0), /* fld st */ \ + _OO(0xd9fc), /* frndint */ \ + _OO(0xdce9), /* fsubr */ \ + FXCHr(1), /* fxch st(1) */ \ + _OO(0xd9f0), /* f2xm1 */ \ + _OO(0xd9e8), /* fld1 */ \ + _OO(0xdec1), /* faddp */ \ + _OO(0xd9fd), /* fscale */ \ + FSTPr(1)) /* fstp st(1) */ +#define jit_log() (_OO(0xd9ed), /* fldln2 */ \ + FXCHr(1), /* fxch st(1) */ \ + _OO(0xd9f1)) /* fyl2x */ +#endif + +#define jit_prepare_f(nf) (_jitl.argssize += (nf)) +#define jit_prepare_d(nd) (_jitl.argssize += 2 * (nd)) +#define jit_arg_f() ((_jitl.framesize += sizeof(float)) - sizeof(float)) +#define jit_arg_d() ((_jitl.framesize += sizeof(double)) - sizeof(double)) + +#endif /* __lightning_fp_h */ diff --git a/src/runtime/c/pgf/lightning/i386/fp-64.h b/src/runtime/c/pgf/lightning/i386/fp-64.h new file mode 100644 index 000000000..22308a7f1 --- /dev/null +++ b/src/runtime/c/pgf/lightning/i386/fp-64.h @@ -0,0 +1,325 @@ +/******************************** -*- C -*- **************************** + * + * Support macros for SSE floating-point math + * + ***********************************************************************/ + + +/*********************************************************************** + * + * Copyright 2006 Free Software Foundation, Inc. + * Written by Paolo Bonzini. + * + * This file is part of GNU lightning. + * + * GNU lightning is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU lightning is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with GNU lightning; see the file COPYING.LESSER; if not, write to the + * Free Software Foundation, 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + * + ***********************************************************************/ + + +#ifndef __lightning_fp_h +#define __lightning_fp_h + +#include <float.h> + +#define JIT_FPR_NUM 7 +#define JIT_FPRET _XMM0 +#define JIT_FPR(i) (_XMM8 + (i)) +#define JIT_FPTMP _XMM15 + +/* Either use a temporary register that is finally AND/OR/XORed with RS = RD, + or use RD as the temporary register and to the AND/OR/XOR with RS. */ +#define jit_unop_tmp(rd, rs, op) \ + ( (rs) == (rd) \ + ? op((rd), JIT_FPTMP, JIT_FPTMP)) \ + : op((rd), (rd), (rs))) + +#define jit_unop_f(rd, rs, op) \ + ((rs) == (rd) ? op((rd)) : (MOVSSrr ((rs), (rd)), op((rd)))) + +#define jit_unop_d(rd, rs, op) \ + ((rs) == (rd) ? op((rd)) : (MOVSDrr ((rs), (rd)), op((rd)))) + +#define jit_3opc_f(rd, s1, s2, op) \ + ( (s1) == (rd) ? op((s2), (rd)) \ + : ((s2) == (rd) ? op((s1), (rd)) \ + : (MOVSSrr ((s1), (rd)), op((s2), (rd))))) + +#define jit_3opc_d(rd, s1, s2, op) \ + ( (s1) == (rd) ? op((s2), (rd)) \ + : ((s2) == (rd) ? op((s1), (rd)) \ + : (MOVSDrr ((s1), (rd)), op((s2), (rd))))) + +#define jit_3op_f(rd, s1, s2, op) \ + ( (s1) == (rd) ? op((s2), (rd)) \ + : ((s2) == (rd) \ + ? (MOVSSrr ((rd), JIT_FPTMP), MOVSSrr ((s1), (rd)), op(JIT_FPTMP, (rd))) \ + : (MOVSSrr ((s1), (rd)), op((s2), (rd))))) + +#define jit_3op_d(rd, s1, s2, op) \ + ( (s1) == (rd) ? op((s2), (rd)) \ + : ((s2) == (rd) \ + ? (MOVSDrr ((rd), JIT_FPTMP), MOVSDrr ((s1), (rd)), op(JIT_FPTMP, (rd))) \ + : (MOVSDrr ((s1), (rd)), op((s2), (rd))))) + +#define jit_addr_f(rd,s1,s2) jit_3opc_f((rd), (s1), (s2), ADDSSrr) +#define jit_subr_f(rd,s1,s2) jit_3op_f((rd), (s1), (s2), SUBSSrr) +#define jit_mulr_f(rd,s1,s2) jit_3opc_f((rd), (s1), (s2), MULSSrr) +#define jit_divr_f(rd,s1,s2) jit_3op_f((rd), (s1), (s2), DIVSSrr) + +#define jit_addr_d(rd,s1,s2) jit_3opc_d((rd), (s1), (s2), ADDSDrr) +#define jit_subr_d(rd,s1,s2) jit_3op_d((rd), (s1), (s2), SUBSDrr) +#define jit_mulr_d(rd,s1,s2) jit_3opc_d((rd), (s1), (s2), MULSDrr) +#define jit_divr_d(rd,s1,s2) jit_3op_d((rd), (s1), (s2), DIVSDrr) + +#define jit_movr_f(rd,rs) MOVSSrr((rs), (rd)) +#define jit_movr_d(rd,rs) MOVSDrr((rs), (rd)) + +/* either pcmpeqd %xmm7, %xmm7 / psrld $1, %xmm7 / andps %xmm7, %RD (if RS = RD) + or pcmpeqd %RD, %RD / psrld $1, %RD / andps %RS, %RD (if RS != RD) */ +#define _jit_abs_f(rd,cnst,rs) \ + (PCMPEQDrr((cnst), (cnst)), PSRLDir (1, (cnst)), ANDPSrr ((rs), (rd))) +#define _jit_neg_f(rd,cnst,rs) \ + (PCMPEQDrr((cnst), (cnst)), PSLLDir (31, (cnst)), XORPSrr ((rs), (rd))) +#define jit_abs_f(rd,rs) jit_unop_tmp ((rd), (rs), _jit_abs_f) +#define jit_neg_f(rd,rs) jit_unop_tmp ((rd), (rs), _jit_neg_f) + +#define _jit_abs_d(rd,cnst,rs) \ + (PCMPEQDrr((cnst), (cnst)), PSRLQir (1, (cnst)), ANDPDrr ((rs), (rd))) +#define _jit_neg_d(rd,cnst,rs) \ + (PCMPEQDrr((cnst), (cnst)), PSLLQir (63, (cnst)), XORPDrr ((rs), (rd))) +#define jit_abs_d(rd,rs) jit_unop_tmp ((rd), (rs), _jit_abs_d) +#define jit_neg_d(rd,rs) jit_unop_tmp ((rd), (rs), _jit_neg_d) + +#define jit_sqrt_d(rd,rs) SQRTSSrr((rs), (rd)) +#define jit_sqrt_f(rd,rs) SQRTSDrr((rs), (rd)) + +#define _jit_ldi_f(d, is) MOVSSmr((is), 0, 0, 0, (d)) +#define _jit_ldxi_f(d, rs, is) MOVSSmr((is), (rs), 0, 0, (d)) +#define jit_ldr_f(d, rs) MOVSSmr(0, (rs), 0, 0, (d)) +#define jit_ldxr_f(d, s1, s2) MOVSSmr(0, (s1), (s2), 1, (d)) + +#define _jit_sti_f(id, rs) MOVSSrm((rs), (id), 0, 0, 0) +#define _jit_stxi_f(id, rd, rs) MOVSSrm((rs), (id), (rd), 0, 0) +#define jit_str_f(rd, rs) MOVSSrm((rs), 0, (rd), 0, 0) +#define jit_stxr_f(d1, d2, rs) MOVSSrm((rs), 0, (d1), (d2), 1) + +#define jit_ldi_f(d, is) (_u32P((long)(is)) ? _jit_ldi_f((d), (is)) : (jit_movi_l(JIT_REXTMP, (is)), jit_ldr_f((d), JIT_REXTMP))) +#define jit_sti_f(id, rs) (_u32P((long)(id)) ? _jit_sti_f((id), (rs)) : (jit_movi_l(JIT_REXTMP, (id)), jit_str_f (JIT_REXTMP, (rs)))) +#define jit_ldxi_f(d, rs, is) (_u32P((long)(is)) ? _jit_ldxi_f((d), (rs), (is)) : (jit_movi_l(JIT_REXTMP, (is)), jit_ldxr_f((d), (rs), JIT_REXTMP))) +#define jit_stxi_f(id, rd, rs) (_u32P((long)(id)) ? _jit_stxi_f((id), (rd), (rs)) : (jit_movi_l(JIT_REXTMP, (id)), jit_stxr_f (JIT_REXTMP, (rd), (rs)))) + +#define _jit_ldi_d(d, is) MOVSDmr((is), 0, 0, 0, (d)) +#define _jit_ldxi_d(d, rs, is) MOVSDmr((is), (rs), 0, 0, (d)) +#define jit_ldr_d(d, rs) MOVSDmr(0, (rs), 0, 0, (d)) +#define jit_ldxr_d(d, s1, s2) MOVSDmr(0, (s1), (s2), 1, (d)) + +#define _jit_sti_d(id, rs) MOVSDrm((rs), (id), 0, 0, 0) +#define _jit_stxi_d(id, rd, rs) MOVSDrm((rs), (id), (rd), 0, 0) +#define jit_str_d(rd, rs) MOVSDrm((rs), 0, (rd), 0, 0) +#define jit_stxr_d(d1, d2, rs) MOVSDrm((rs), 0, (d1), (d2), 1) + +#define jit_ldi_d(d, is) (_u32P((long)(is)) ? _jit_ldi_d((d), (is)) : (jit_movi_l(JIT_REXTMP, (is)), jit_ldr_d((d), JIT_REXTMP))) +#define jit_sti_d(id, rs) (_u32P((long)(id)) ? _jit_sti_d((id), (rs)) : (jit_movi_l(JIT_REXTMP, (id)), jit_str_d (JIT_REXTMP, (rs)))) +#define jit_ldxi_d(d, rs, is) (_u32P((long)(is)) ? _jit_ldxi_d((d), (rs), (is)) : (jit_movi_l(JIT_REXTMP, (is)), jit_ldxr_d((d), (rs), JIT_REXTMP))) +#define jit_stxi_d(id, rd, rs) (_u32P((long)(id)) ? _jit_stxi_d((id), (rd), (rs)) : (jit_movi_l(JIT_REXTMP, (id)), jit_stxr_d (JIT_REXTMP, (rd), (rs)))) + + +#define jit_movi_f(rd,immf) \ + ((immf) == 0.0 ? XORSSrr ((rd), (rd)) : \ + (PUSHQi (0x12345678L), \ + *((float *) (_jit.x.uc_pc - 4)) = (float) immf, \ + jit_ldr_f((rd), _ESP), \ + ADDQir(8, _ESP))) + +union jit_double_imm { + double d; + long l; +}; + +#define jit_movi_d(rd,immd) \ + ((immd) == 0.0 ? XORSDrr ((rd), (rd)) : \ + (_O (0x50), \ + MOVQir (0x123456789abcdef0L, _EAX), \ + ((union jit_double_imm *) (_jit.x.uc_pc - 8))->d = (double) immd, \ + _O (0x50), jit_ldr_d((rd), _ESP), \ + _O (0x58), _O (0x58))) + +#define jit_extr_i_d(rd, rs) CVTSI2SDLrr((rs), (rd)) +#define jit_extr_i_f(rd, rs) CVTSI2SSLrr((rs), (rd)) +#define jit_extr_l_d(rd, rs) CVTSI2SDQrr((rs), (rd)) +#define jit_extr_l_f(rd, rs) CVTSI2SSQrr((rs), (rd)) +#define jit_extr_f_d(rd, rs) CVTSS2SDrr((rs), (rd)) +#define jit_extr_d_f(rd, rs) CVTSD2SSrr((rs), (rd)) +#define jit_roundr_d_i(rd, rs) CVTSD2SILrr((rs), (rd)) +#define jit_roundr_f_i(rd, rs) CVTSS2SILrr((rs), (rd)) +#define jit_roundr_d_l(rd, rs) CVTSD2SIQrr((rs), (rd)) +#define jit_roundr_f_l(rd, rs) CVTSS2SIQrr((rs), (rd)) +#define jit_truncr_d_i(rd, rs) CVTTSD2SILrr((rs), (rd)) +#define jit_truncr_f_i(rd, rs) CVTTSS2SILrr((rs), (rd)) +#define jit_truncr_d_l(rd, rs) CVTTSD2SIQrr((rs), (rd)) +#define jit_truncr_f_l(rd, rs) CVTTSS2SIQrr((rs), (rd)) + + +#define jit_ceilr_f_i(rd, rs) do { \ + jit_roundr_f_i ((rd), (rs)); \ + jit_extr_i_f (JIT_FPTMP, (rd)); \ + UCOMISSrr ((rs), JIT_FPTMP); \ + ADCLir (0, (rd)); \ + } while (0) + +#define jit_ceilr_d_i(rd, rs) do { \ + jit_roundr_d_i ((rd), (rs)); \ + jit_extr_i_d (JIT_FPTMP, (rd)); \ + UCOMISDrr ((rs), JIT_FPTMP); \ + ADCLir (0, (rd)); \ + } while (0) + +#define jit_ceilr_f_l(rd, rs) do { \ + jit_roundr_f_l ((rd), (rs)); \ + jit_extr_l_f (JIT_FPTMP, (rd)); \ + UCOMISSrr ((rs), JIT_FPTMP); \ + ADCLir (0, (rd)); \ + } while (0) + +#define jit_ceilr_d_l(rd, rs) do { \ + jit_roundr_d_l ((rd), (rs)); \ + jit_extr_l_d (JIT_FPTMP, (rd)); \ + UCOMISDrr ((rs), JIT_FPTMP); \ + ADCLir (0, (rd)); \ + } while (0) + +#define jit_floorr_f_i(rd, rs) do { \ + jit_roundr_f_i ((rd), (rs)); \ + jit_extr_i_f (JIT_FPTMP, (rd)); \ + UCOMISSrr (JIT_FPTMP, (rs)); \ + SBBLir (0, (rd)); \ + } while (0) + +#define jit_floorr_d_i(rd, rs) do { \ + jit_roundr_d_i ((rd), (rs)); \ + jit_extr_i_d (JIT_FPTMP, (rd)); \ + UCOMISDrr (JIT_FPTMP, (rs)); \ + SBBLir (0, (rd)); \ + } while (0) + +#define jit_floorr_f_l(rd, rs) do { \ + jit_roundr_f_l ((rd), (rs)); \ + jit_extr_l_f (JIT_FPTMP, (rd)); \ + UCOMISSrr (JIT_FPTMP, (rs)); \ + SBBLir (0, (rd)); \ + } while (0) + +#define jit_floorr_d_l(rd, rs) do { \ + jit_roundr_d_l ((rd), (rs)); \ + jit_extr_l_d (JIT_FPTMP, (rd)); \ + UCOMISDrr (JIT_FPTMP, (rs)); \ + SBBLir (0, (rd)); \ + } while (0) + +#define jit_bltr_f(d, s1, s2) (UCOMISSrr ((s1), (s2)), JAm ((d)), _jit.x.pc) +#define jit_bler_f(d, s1, s2) (UCOMISSrr ((s1), (s2)), JAEm ((d)), _jit.x.pc) +#define jit_beqr_f(d, s1, s2) (UCOMISSrr ((s1), (s2)), _OO (0x7a06), JEm ((d)), _jit.x.pc) +#define jit_bner_f(d, s1, s2) (UCOMISSrr ((s1), (s2)), _OO (0x7a02), _OO (0x7405), JMPm (((d))), _jit.x.pc) /* JP to JMP, JZ past JMP */ +#define jit_bger_f(d, s1, s2) (UCOMISSrr ((s2), (s1)), JAEm ((d)), _jit.x.pc) +#define jit_bgtr_f(d, s1, s2) (UCOMISSrr ((s2), (s1)), JAm ((d)), _jit.x.pc) +#define jit_bunltr_f(d, s1, s2) (UCOMISSrr ((s2), (s1)), JNAEm ((d)), _jit.x.pc) +#define jit_bunler_f(d, s1, s2) (UCOMISSrr ((s2), (s1)), JNAm ((d)), _jit.x.pc) +#define jit_buneqr_f(d, s1, s2) (UCOMISSrr ((s1), (s2)), JEm ((d)), _jit.x.pc) +#define jit_bltgtr_f(d, s1, s2) (UCOMISSrr ((s1), (s2)), JNEm ((d)), _jit.x.pc) +#define jit_bunger_f(d, s1, s2) (UCOMISSrr ((s1), (s2)), JNAm ((d)), _jit.x.pc) +#define jit_bungtr_f(d, s1, s2) (UCOMISSrr ((s1), (s2)), JNAEm ((d)), _jit.x.pc) +#define jit_bordr_f(d, s1, s2) (UCOMISSrr ((s1), (s2)), JNPm ((d)), _jit.x.pc) +#define jit_bunordr_f(d, s1, s2) (UCOMISSrr ((s1), (s2)), JPm ((d)), _jit.x.pc) + +#define jit_bltr_d(d, s1, s2) (UCOMISDrr ((s1), (s2)), JAm ((d)), _jit.x.pc) +#define jit_bler_d(d, s1, s2) (UCOMISDrr ((s1), (s2)), JAEm ((d)), _jit.x.pc) +#define jit_beqr_d(d, s1, s2) (UCOMISDrr ((s1), (s2)), _OO (0x7a06), JEm ((d)), _jit.x.pc) +#define jit_bner_d(d, s1, s2) (UCOMISDrr ((s1), (s2)), _OO (0x7a02), _OO (0x7405), JMPm (((d))), _jit.x.pc) /* JP to JMP, JZ past JMP */ +#define jit_bger_d(d, s1, s2) (UCOMISDrr ((s2), (s1)), JAEm ((d)), _jit.x.pc) +#define jit_bgtr_d(d, s1, s2) (UCOMISDrr ((s2), (s1)), JAm ((d)), _jit.x.pc) +#define jit_bunltr_d(d, s1, s2) (UCOMISDrr ((s2), (s1)), JNAEm ((d)), _jit.x.pc, _jit.x.pc) +#define jit_bunler_d(d, s1, s2) (UCOMISDrr ((s2), (s1)), JNAm ((d)), _jit.x.pc) +#define jit_buneqr_d(d, s1, s2) (UCOMISDrr ((s1), (s2)), JEm ((d)), _jit.x.pc) +#define jit_bltgtr_d(d, s1, s2) (UCOMISDrr ((s1), (s2)), JNEm ((d)), _jit.x.pc) +#define jit_bunger_d(d, s1, s2) (UCOMISDrr ((s1), (s2)), JNAm ((d)), _jit.x.pc) +#define jit_bungtr_d(d, s1, s2) (UCOMISDrr ((s1), (s2)), JNAEm ((d)), _jit.x.pc) +#define jit_bordr_d(d, s1, s2) (UCOMISDrr ((s1), (s2)), JNPm ((d)), _jit.x.pc) +#define jit_bunordr_d(d, s1, s2) (UCOMISDrr ((s1), (s2)), JPm ((d)), _jit.x.pc) + +#define jit_ltr_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s1), (s2)), SETAr (jit_reg8((d)))) +#define jit_ler_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s1), (s2)), SETAEr (jit_reg8((d)))) +#define jit_eqr_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s1), (s2)), _OO(0x7a03), SETEr (jit_reg8((d)))) +#define jit_ner_f(d, s1, s2) (UCOMISSrr ((s1), (s2)), MOVLir (1, (d)), _OO(0x7a03), SETNEr (jit_reg8((d)))) +#define jit_ger_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s2), (s1)), SETAEr (jit_reg8((d)))) +#define jit_gtr_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s2), (s1)), SETAr (jit_reg8((d)))) +#define jit_unltr_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s2), (s1)), SETNAEr (jit_reg8((d)))) +#define jit_unler_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s2), (s1)), SETNAr (jit_reg8((d)))) +#define jit_uneqr_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s1), (s2)), SETEr (jit_reg8((d)))) +#define jit_ltgtr_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s1), (s2)), SETNEr (jit_reg8((d)))) +#define jit_unger_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s1), (s2)), SETNAr (jit_reg8((d)))) +#define jit_ungtr_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s1), (s2)), SETNAEr (jit_reg8((d)))) +#define jit_ordr_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s1), (s2)), SETNPr (jit_reg8((d)))) +#define jit_unordr_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s1), (s2)), SETPr (jit_reg8((d)))) + +#define jit_ltr_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s1), (s2)), SETAr (jit_reg8((d)))) +#define jit_ler_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s1), (s2)), SETAEr (jit_reg8((d)))) +#define jit_eqr_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s1), (s2)), _OO(0x7a03), SETEr (jit_reg8((d)))) +#define jit_ner_d(d, s1, s2) (UCOMISDrr ((s1), (s2)), MOVLir (1, (d)), _OO(0x7a03), SETNEr (jit_reg8((d)))) +#define jit_ger_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s2), (s1)), SETAEr (jit_reg8((d)))) +#define jit_gtr_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s2), (s1)), SETAr (jit_reg8((d)))) +#define jit_unltr_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s2), (s1)), SETNAEr (jit_reg8((d)))) +#define jit_unler_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s2), (s1)), SETNAr (jit_reg8((d)))) +#define jit_uneqr_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s1), (s2)), SETEr (jit_reg8((d)))) +#define jit_ltgtr_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s1), (s2)), SETNEr (jit_reg8((d)))) +#define jit_unger_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s1), (s2)), SETNAr (jit_reg8((d)))) +#define jit_ungtr_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s1), (s2)), SETNAEr (jit_reg8((d)))) +#define jit_ordr_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s1), (s2)), SETNPr (jit_reg8((d)))) +#define jit_unordr_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s1), (s2)), SETPr (jit_reg8((d)))) + +#define jit_prepare_f(num) ((_jitl.nextarg_putfp + (num) > JIT_FP_ARG_MAX \ + ? (_jitl.argssize += _jitl.nextarg_putfp + (num) - JIT_FP_ARG_MAX, \ + _jitl.fprssize = JIT_FP_ARG_MAX) \ + : (_jitl.fprssize += (num))), \ + _jitl.nextarg_putfp += (num)) +#define jit_prepare_d(num) ((_jitl.nextarg_putfp + (num) > JIT_FP_ARG_MAX \ + ? (_jitl.argssize += _jitl.nextarg_putfp + (num) - JIT_FP_ARG_MAX, \ + _jitl.fprssize = JIT_FP_ARG_MAX) \ + : (_jitl.fprssize += (num))), \ + _jitl.nextarg_putfp += (num)) + +#define jit_arg_f() (_jitl.nextarg_getfp < JIT_FP_ARG_MAX \ + ? _jitl.nextarg_getfp++ \ + : ((_jitl.framesize += sizeof(double)) - sizeof(double))) +#define jit_arg_d() (_jitl.nextarg_getfp < JIT_FP_ARG_MAX \ + ? _jitl.nextarg_getfp++ \ + : ((_jitl.framesize += sizeof(double)) - sizeof(double))) + +#define jit_getarg_f(reg, ofs) ((ofs) < JIT_FP_ARG_MAX \ + ? jit_movr_f((reg), _XMM0 + (ofs)) \ + : jit_ldxi_f((reg), JIT_FP, (ofs))) +#define jit_getarg_d(reg, ofs) ((ofs) < JIT_FP_ARG_MAX \ + ? jit_movr_d((reg), _XMM0 + (ofs)) \ + : jit_ldxi_d((reg), JIT_FP, (ofs))) + +#define jit_pusharg_f(rs) (--_jitl.nextarg_putfp >= JIT_FP_ARG_MAX \ + ? (SUBQir(sizeof(double), JIT_SP), jit_str_f(JIT_SP,(rs))) \ + : jit_movr_f(_XMM0 + _jitl.nextarg_putfp, (rs))) +#define jit_pusharg_d(rs) (--_jitl.nextarg_putfp >= JIT_FP_ARG_MAX \ + ? (SUBQir(sizeof(double), JIT_SP), jit_str_d(JIT_SP,(rs))) \ + : jit_movr_d(_XMM0 + _jitl.nextarg_putfp, (rs))) + +#endif /* __lightning_fp_h */ diff --git a/src/runtime/c/pgf/lightning/i386/fp.h b/src/runtime/c/pgf/lightning/i386/fp.h index 0d2725563..a4942fcf6 100644 --- a/src/runtime/c/pgf/lightning/i386/fp.h +++ b/src/runtime/c/pgf/lightning/i386/fp.h @@ -1,20 +1,20 @@ /******************************** -*- C -*- **************************** * - * Run-time assembler & support macros for the i386 math coprocessor + * Floating-point support (i386) * ***********************************************************************/ /*********************************************************************** * - * Copyright 2000, 2001, 2002, 2004 Free Software Foundation, Inc. + * Copyright 2008 Free Software Foundation, Inc. * Written by Paolo Bonzini. * * This file is part of GNU lightning. * * GNU lightning is free software; you can redistribute it and/or modify it * under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; either version 2.1, or (at your option) + * by the Free Software Foundation; either version 3, or (at your option) * any later version. * * GNU lightning is distributed in the hope that it will be useful, but @@ -24,324 +24,22 @@ * * You should have received a copy of the GNU Lesser General Public License * along with GNU lightning; see the file COPYING.LESSER; if not, write to the - * Free Software Foundation, 59 Temple Place - Suite 330, Boston, - * MA 02111-1307, USA. + * Free Software Foundation, 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. * ***********************************************************************/ -#ifndef __lightning_asm_fp_h -#define __lightning_asm_fp_h -/* We really must map the x87 stack onto a flat register file. In practice, - we can provide something sensible and make it work on the x86 using the - stack like a file of eight registers. +#ifndef __lightning_fp_i386_h +#define __lightning_fp_i386_h - We use six or seven registers so as to have some freedom - for floor, ceil, round, (and log, tan, atn and exp). - - Not hard at all, basically play with FXCH. FXCH is mostly free, - so the generated code is not bad. Of course we special case when one - of the operands turns out to be ST0. - - Here are the macros that actually do the trick. */ - -#define JIT_FPR_NUM 6 -#define JIT_FPR(i) (i) - -#define jit_fxch(rs, op) (((rs) != 0 ? FXCHr(rs) : 0), \ - op, ((rs) != 0 ? FXCHr(rs) : 0)) - -#define jit_fp_unary(rd, s1, op) \ - ((rd) == (s1) ? jit_fxch ((rd), op) \ - : (rd) == 0 ? (FSTPr (0), FLDr ((s1)-1), op) \ - : (FLDr ((s1)), op, FSTPr ((rd)))) - -#define jit_fp_binary(rd, s1, s2, op, opr) \ - ((rd) == (s1) ? \ - ((s2) == 0 ? opr(0, (rd)) \ - : (s2) == (s1) ? jit_fxch((rd), op(0, 0)) \ - : jit_fxch((rd), op((s2), 0))) \ - : (rd) == (s2) ? jit_fxch((s1), opr(0, (rd) == 0 ? (s1) : (rd))) \ - : (FLDr (s1), op(0, (s2)+1), FSTPr((rd)+1))) - -#define jit_addr_d(rd,s1,s2) jit_fp_binary((rd),(s1),(s2),FADDrr,FADDrr) -#define jit_subr_d(rd,s1,s2) jit_fp_binary((rd),(s1),(s2),FSUBrr,FSUBRrr) -#define jit_mulr_d(rd,s1,s2) jit_fp_binary((rd),(s1),(s2),FMULrr,FMULrr) -#define jit_divr_d(rd,s1,s2) jit_fp_binary((rd),(s1),(s2),FDIVrr,FDIVRrr) - -#define jit_abs_d(rd,rs) jit_fp_unary ((rd), (rs), _OO (0xd9e1)) -#define jit_negr_d(rd,rs) jit_fp_unary ((rd), (rs), _OO (0xd9e0)) -#define jit_sqrt_d(rd,rs) jit_fp_unary ((rd), (rs), _OO (0xd9fa)) - -/* - moves: - - move FPR0 to FPR3 - FST ST3 - - move FPR3 to FPR0 - FXCH ST3 - FST ST3 - - move FPR3 to FPR1 - FLD ST1 - FST ST4 Stack is rotated, so FPRn becomes STn+1 */ - -#define jit_movr_d(rd,s1) \ - ((s1) == (rd) ? 0 \ - : (s1) == 0 ? FSTr ((rd)) \ - : (rd) == 0 ? (FXCHr ((s1)), FSTr ((s1))) \ - : (FLDr ((s1)), FSTr ((rd)+1))) - -/* - loads: - - load into FPR0 - FSTP ST0 - FLD [FUBAR] - - load into FPR3 - FSTP ST3 Save old st0 into destination register - FLD [FUBAR] - FXCH ST3 Get back old st0 - - (and similarly for immediates, using the stack) */ - -#define jit_movi_f(rd,immf) \ - (_O (0x68), \ - *((float *) _jit.x.pc) = (float) immf, \ - _jit.x.uc_pc += sizeof (float), \ - jit_ldr_f((rd), _ESP), \ - ADDLir(4, _ESP)) - -union jit_double_imm { - double d; - int i[2]; -}; - -#define jit_movi_d(rd,immd) \ - (_O (0x68), \ - _jit.x.uc_pc[4] = 0x68, \ - ((union jit_double_imm *) (_jit.x.uc_pc + 5))->d = (double) immd, \ - *((int *) _jit.x.uc_pc) = ((union jit_double_imm *) (_jit.x.uc_pc + 5))->i[1], \ - _jit.x.uc_pc += 9, \ - jit_ldr_d((rd), _ESP), \ - ADDLir(8, _ESP)) - -#define jit_ldi_f(rd, is) \ - ((rd) == 0 ? (FSTPr (0), FLDSm((is), 0, 0, 0)) \ - : (FLDSm((is), 0, 0, 0), FSTPr ((rd) + 1))) - -#define jit_ldi_d(rd, is) \ - ((rd) == 0 ? (FSTPr (0), FLDLm((is), 0, 0, 0)) \ - : (FLDLm((is), 0, 0, 0), FSTPr ((rd) + 1))) - -#define jit_ldr_f(rd, rs) \ - ((rd) == 0 ? (FSTPr (0), FLDSm(0, (rs), 0, 0)) \ - : (FLDSm(0, (rs), 0, 0), FSTPr ((rd) + 1))) - -#define jit_ldr_d(rd, rs) \ - ((rd) == 0 ? (FSTPr (0), FLDLm(0, (rs), 0, 0)) \ - : (FLDLm(0, (rs), 0, 0), FSTPr ((rd) + 1))) - -#define jit_ldxi_f(rd, rs, is) \ - ((rd) == 0 ? (FSTPr (0), FLDSm((is), (rs), 0, 0)) \ - : (FLDSm((is), (rs), 0, 0), FSTPr ((rd) + 1))) - -#define jit_ldxi_d(rd, rs, is) \ - ((rd) == 0 ? (FSTPr (0), FLDLm((is), (rs), 0, 0)) \ - : (FLDLm((is), (rs), 0, 0), FSTPr ((rd) + 1))) - -#define jit_ldxr_f(rd, s1, s2) \ - ((rd) == 0 ? (FSTPr (0), FLDSm(0, (s1), (s2), 1)) \ - : (FLDSm(0, (s1), (s2), 1), FSTPr ((rd) + 1))) - -#define jit_ldxr_d(rd, s1, s2) \ - ((rd) == 0 ? (FSTPr (0), FLDLm(0, (s1), (s2), 1)) \ - : (FLDLm(0, (s1), (s2), 1), FSTPr ((rd) + 1))) - -#define jit_extr_i_d(rd, rs) (PUSHLr((rs)), \ - ((rd) == 0 ? (FSTPr (0), FILDLm(0, _ESP, 0, 0)) \ - : (FILDLm(0, _ESP, 0, 0), FSTPr ((rd) + 1))), \ - POPLr((rs))) - -#define jit_stxi_f(id, rd, rs) jit_fxch ((rs), FSTSm((id), (rd), 0, 0)) -#define jit_stxr_f(d1, d2, rs) jit_fxch ((rs), FSTSm(0, (d1), (d2), 1)) -#define jit_stxi_d(id, rd, rs) jit_fxch ((rs), FSTLm((id), (rd), 0, 0)) -#define jit_stxr_d(d1, d2, rs) jit_fxch ((rs), FSTLm(0, (d1), (d2), 1)) -#define jit_sti_f(id, rs) jit_fxch ((rs), FSTSm((id), 0, 0, 0)) -#define jit_str_f(rd, rs) jit_fxch ((rs), FSTSm(0, (rd), 0, 0)) -#define jit_sti_d(id, rs) jit_fxch ((rs), FSTLm((id), 0, 0, 0)) -#define jit_str_d(rd, rs) jit_fxch ((rs), FSTLm(0, (rd), 0, 0)) - -/* Assume round to near mode */ -#define jit_floorr_d_i(rd, rs) \ - (FLDr (rs), jit_floor2((rd), ((rd) == _EDX ? _EAX : _EDX))) - -#define jit_ceilr_d_i(rd, rs) \ - (FLDr (rs), jit_ceil2((rd), ((rd) == _EDX ? _EAX : _EDX))) - -#define jit_truncr_d_i(rd, rs) \ - (FLDr (rs), jit_trunc2((rd), ((rd) == _EDX ? _EAX : _EDX))) - -#define jit_calc_diff(ofs) \ - FISTLm(ofs, _ESP, 0, 0), \ - FILDLm(ofs, _ESP, 0, 0), \ - FSUBRPr(1), \ - FSTPSm(4+ofs, _ESP, 0, 0) \ - -/* The real meat */ -#define jit_floor2(rd, aux) \ - (PUSHLr(aux), \ - SUBLir(8, _ESP), \ - jit_calc_diff(0), \ - POPLr(rd), /* floor in rd */ \ - POPLr(aux), /* x-round(x) in aux */ \ - ADDLir(0x7FFFFFFF, aux), /* carry if x-round(x) < -0 */ \ - SBBLir(0, rd), /* subtract 1 if carry */ \ - POPLr(aux)) - -#define jit_ceil2(rd, aux) \ - (PUSHLr(aux), \ - SUBLir(8, _ESP), \ - jit_calc_diff(0), \ - POPLr(rd), /* floor in rd */ \ - POPLr(aux), /* x-round(x) in aux */ \ - TESTLrr(aux, aux), \ - SETGr(jit_reg8(aux)), \ - SHRLir(1, aux), \ - ADCLir(0, rd), \ - POPLr(aux)) - -/* a mingling of the two above */ -#define jit_trunc2(rd, aux) \ - (PUSHLr(aux), \ - SUBLir(12, _ESP), \ - FSTSm(0, _ESP, 0, 0), \ - jit_calc_diff(4), \ - POPLr(aux), \ - POPLr(rd), \ - TESTLrr(aux, aux), \ - POPLr(aux), \ - JSSm(_jit.x.pc + 11, 0, 0, 0), \ - ADDLir(0x7FFFFFFF, aux), /* 6 */ \ - SBBLir(0, rd), /* 3 */ \ - JMPSm(_jit.x.pc + 10, 0, 0, 0), /* 2 */ \ - TESTLrr(aux, aux), /* 2 */ \ - SETGr(jit_reg8(aux)), /* 3 */ \ - SHRLir(1, aux), /* 2 */ \ - ADCLir(0, rd), /* 3 */ \ - POPLr(aux)) - -/* the easy one */ -#define jit_roundr_d_i(rd, rs) \ - (PUSHLr(_EAX), \ - jit_fxch ((rs), FISTPLm(0, _ESP, 0, 0)), \ - POPLr((rd))) - -#define jit_fp_test(d, s1, s2, n, _and, res) \ - (((s1) == 0 ? FUCOMr((s2)) : (FLDr((s1)), FUCOMPr((s2) + 1))), \ - ((d) != _EAX ? MOVLrr(_EAX, (d)) : 0), \ - FNSTSWr(_EAX), \ - SHRLir(n, _EAX), \ - ((_and) ? ANDLir((_and), _EAX) : MOVLir(0, _EAX)), \ - res, \ - ((d) != _EAX ? _O (0x90 + ((d) & 7)) : 0)) /* xchg */ - -#define jit_fp_btest(d, s1, s2, n, _and, cmp, res) \ - (((s1) == 0 ? FUCOMr((s2)) : (FLDr((s1)), FUCOMPr((s2) + 1))), \ - PUSHLr(_EAX), \ - FNSTSWr(_EAX), \ - SHRLir(n, _EAX), \ - ((_and) ? ANDLir ((_and), _EAX) : 0), \ - ((cmp) ? CMPLir ((cmp), _AL) : 0), \ - POPLr(_EAX), \ - res ((d), 0, 0, 0)) - -#define jit_nothing_needed(x) - -/* After FNSTSW we have 1 if <, 40 if =, 0 if >, 45 if unordered. Here - is how to map the values of the status word's high byte to the - conditions. - - < = > unord valid values condition - gt no no yes no 0 STSW & 45 == 0 - lt yes no no no 1 STSW & 45 == 1 - eq no yes no no 40 STSW & 45 == 40 - unord no no no yes 45 bit 2 == 1 - - ge no yes no no 0, 40 bit 0 == 0 - unlt yes no no yes 1, 45 bit 0 == 1 - ltgt yes no yes no 0, 1 bit 6 == 0 - uneq no yes no yes 40, 45 bit 6 == 1 - le yes yes no no 1, 40 odd parity for STSW & 41 - ungt no no yes yes 0, 45 even parity for STSW & 41 - - unle yes yes no yes 1, 40, 45 STSW & 45 != 0 - unge no yes yes yes 0, 40, 45 STSW & 45 != 1 - ne yes no yes yes 0, 1, 45 STSW & 45 != 40 - ord yes yes yes no 0, 1, 40 bit 2 == 0 - - lt, le, ungt, unge are actually computed as gt, ge, unlt, unle with - the operands swapped; it is more efficient this way. */ - -#define jit_gtr_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 8, 0x45, SETZr (_AL)) -#define jit_ger_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 9, 0, SBBBir (-1, _AL)) -#define jit_unler_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 8, 0x45, SETNZr (_AL)) -#define jit_unltr_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 9, 0, ADCBir (0, _AL)) -#define jit_ltr_d(d, s1, s2) jit_fp_test((d), (s2), (s1), 8, 0x45, SETZr (_AL)) -#define jit_ler_d(d, s1, s2) jit_fp_test((d), (s2), (s1), 9, 0, SBBBir (-1, _AL)) -#define jit_unger_d(d, s1, s2) jit_fp_test((d), (s2), (s1), 8, 0x45, SETNZr (_AL)) -#define jit_ungtr_d(d, s1, s2) jit_fp_test((d), (s2), (s1), 9, 0, ADCBir (0, _AL)) -#define jit_eqr_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 8, 0x45, (CMPBir (0x40, _AL), SETEr (_AL))) -#define jit_ner_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 8, 0x45, (CMPBir (0x40, _AL), SETNEr (_AL))) -#define jit_ltgtr_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 15, 0, SBBBir (-1, _AL)) -#define jit_uneqr_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 15, 0, ADCBir (0, _AL)) -#define jit_ordr_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 11, 0, SBBBir (-1, _AL)) -#define jit_unordr_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 11, 0, ADCBir (0, _AL)) - -#define jit_bgtr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 8, 0x45, 0, JZm) -#define jit_bger_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 9, 0, 0, JNCm) -#define jit_bunler_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 8, 0x45, 0, JNZm) -#define jit_bunltr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 9, 0, 0, JCm) -#define jit_bltr_d(d, s1, s2) jit_fp_btest((d), (s2), (s1), 8, 0x45, 0, JZm) -#define jit_bler_d(d, s1, s2) jit_fp_btest((d), (s2), (s1), 9, 0, 0, JNCm) -#define jit_bunger_d(d, s1, s2) jit_fp_btest((d), (s2), (s1), 8, 0x45, 0, JNZm) -#define jit_bungtr_d(d, s1, s2) jit_fp_btest((d), (s2), (s1), 9, 0, 0, JCm) -#define jit_beqr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 8, 0x45, 0x40, JZm) -#define jit_bner_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 8, 0x45, 0x40, JNZm) -#define jit_bltgtr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 15, 0, 0, JNCm) -#define jit_buneqr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 15, 0, 0, JCm) -#define jit_bordr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 11, 0, 0, JNCm) -#define jit_bunordr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 11, 0, 0, JCm) - -#define jit_getarg_f(rd, ofs) jit_ldxi_f((rd), JIT_FP,(ofs)) -#define jit_getarg_d(rd, ofs) jit_ldxi_d((rd), JIT_FP,(ofs)) -#define jit_pusharg_d(rs) (jit_subi_i(JIT_SP,JIT_SP,sizeof(double)), jit_str_d(JIT_SP,(rs))) -#define jit_pusharg_f(rs) (jit_subi_i(JIT_SP,JIT_SP,sizeof(float)), jit_str_f(JIT_SP,(rs))) -#define jit_retval_d(op1) jit_movr_d(0, (op1)) - - -#if 0 -#define jit_sin() _OO(0xd9fe) /* fsin */ -#define jit_cos() _OO(0xd9ff) /* fcos */ -#define jit_tan() (_OO(0xd9f2), /* fptan */ \ - FSTPr(0)) /* fstp st */ -#define jit_atn() (_OO(0xd9e8), /* fld1 */ \ - _OO(0xd9f3)) /* fpatan */ -#define jit_exp() (_OO(0xd9ea), /* fldl2e */ \ - FMULPr(1), /* fmulp */ \ - _OO(0xd9c0), /* fld st */ \ - _OO(0xd9fc), /* frndint */ \ - _OO(0xdce9), /* fsubr */ \ - FXCHr(1), /* fxch st(1) */ \ - _OO(0xd9f0), /* f2xm1 */ \ - _OO(0xd9e8), /* fld1 */ \ - _OO(0xdec1), /* faddp */ \ - _OO(0xd9fd), /* fscale */ \ - FSTPr(1)) /* fstp st(1) */ -#define jit_log() (_OO(0xd9ed), /* fldln2 */ \ - FXCHr(1), /* fxch st(1) */ \ - _OO(0xd9f1)) /* fyl2x */ +#if LIGHTNING_CROSS \ + ? LIGHTNING_TARGET == LIGHTNING_X86_64 \ + : defined (__x86_64__) +#include "i386/fp-64.h" +#else +#include "i386/fp-32.h" #endif -#endif /* __lightning_asm_h */ +#endif /* __lightning_fp_i386_h */ diff --git a/src/runtime/c/pgf/lightning/i386/funcs.h b/src/runtime/c/pgf/lightning/i386/funcs.h index e90cfa430..c35849956 100644 --- a/src/runtime/c/pgf/lightning/i386/funcs.h +++ b/src/runtime/c/pgf/lightning/i386/funcs.h @@ -7,14 +7,14 @@ /*********************************************************************** * - * Copyright 2000, 2001, 2002 Free Software Foundation, Inc. + * Copyright 2000, 2001, 2002, 2006 Free Software Foundation, Inc. * Written by Paolo Bonzini. * * This file is part of GNU lightning. * * GNU lightning is free software; you can redistribute it and/or modify it * under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; either version 2.1, or (at your option) + * by the Free Software Foundation; either version 3, or (at your option) * any later version. * * GNU lightning is distributed in the hope that it will be useful, but @@ -24,8 +24,8 @@ * * You should have received a copy of the GNU Lesser General Public License * along with GNU lightning; see the file COPYING.LESSER; if not, write to the - * Free Software Foundation, 59 Temple Place - Suite 330, Boston, - * MA 02111-1307, USA. + * Free Software Foundation, 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. * ***********************************************************************/ @@ -62,7 +62,7 @@ jit_flush_code(void *dest, void *end) page_size = sysconf (_SC_PAGESIZE); #endif - page = (unsigned long) dest & ~(page_size - 1); + page = (long) dest & ~(page_size - 1); length = ((char *) end - (char *) page + page_size - 1) & ~(page_size - 1); /* Simple-minded attempt at optimizing the common case where a single @@ -79,7 +79,8 @@ jit_flush_code(void *dest, void *end) /* See if we can extend the previously mprotect'ed memory area towards lower addresses: the highest address remains the same as before. */ - else if (page < prev_page && page + length <= prev_page + prev_length) + else if (page < prev_page && page + length >= prev_page + && page + length <= prev_page + prev_length) prev_length += prev_page - page, prev_page = page; /* Nothing to do, replace the area. */ |
