summaryrefslogtreecommitdiff
path: root/src/runtime
diff options
context:
space:
mode:
Diffstat (limited to 'src/runtime')
-rw-r--r--src/runtime/c/configure.ac2
-rw-r--r--src/runtime/c/pgf/lightning/asm-common.h30
-rw-r--r--src/runtime/c/pgf/lightning/core-common.h6
-rw-r--r--src/runtime/c/pgf/lightning/i386/asm-32.h125
-rw-r--r--src/runtime/c/pgf/lightning/i386/asm-64.h430
-rw-r--r--src/runtime/c/pgf/lightning/i386/asm.h2004
-rw-r--r--src/runtime/c/pgf/lightning/i386/core-32.h174
-rw-r--r--src/runtime/c/pgf/lightning/i386/core-64.h498
-rw-r--r--src/runtime/c/pgf/lightning/i386/core.h345
-rw-r--r--src/runtime/c/pgf/lightning/i386/fp-32.h356
-rw-r--r--src/runtime/c/pgf/lightning/i386/fp-64.h325
-rw-r--r--src/runtime/c/pgf/lightning/i386/fp.h330
-rw-r--r--src/runtime/c/pgf/lightning/i386/funcs.h13
13 files changed, 3373 insertions, 1265 deletions
diff --git a/src/runtime/c/configure.ac b/src/runtime/c/configure.ac
index 82f941cb5..2ead33e3d 100644
--- a/src/runtime/c/configure.ac
+++ b/src/runtime/c/configure.ac
@@ -43,6 +43,8 @@ AC_C_ASCII
case "$target_cpu" in
i?86) cpu=i386; AC_DEFINE(LIGHTNING_I386, 1,
[Define if lightning is targeting the x86 architecture]) ;;
+ x86_64) cpu=i386; AC_DEFINE(LIGHTNING_I386, 1,
+ [Define if lightning is targeting the x86 architecture]) ;;
sparc*) cpu=sparc; AC_DEFINE(LIGHTNING_SPARC, 1,
[Define if lightning is targeting the x86 architecture]) ;;
powerpc) cpu=ppc; AC_DEFINE(LIGHTNING_PPC, 1,
diff --git a/src/runtime/c/pgf/lightning/asm-common.h b/src/runtime/c/pgf/lightning/asm-common.h
index 42c8814a9..bdaa6c2ea 100644
--- a/src/runtime/c/pgf/lightning/asm-common.h
+++ b/src/runtime/c/pgf/lightning/asm-common.h
@@ -14,7 +14,7 @@
*
* GNU lightning is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
- * by the Free Software Foundation; either version 2.1, or (at your option)
+ * by the Free Software Foundation; either version 3, or (at your option)
* any later version.
*
* GNU lightning is distributed in the hope that it will be useful, but
@@ -24,8 +24,8 @@
*
* You should have received a copy of the GNU Lesser General Public License
* along with GNU lightning; see the file COPYING.LESSER; if not, write to the
- * Free Software Foundation, 59 Temple Place - Suite 330, Boston,
- * MA 02111-1307, USA.
+ * Free Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
*
***********************************************************************/
@@ -37,21 +37,25 @@
#ifndef _ASM_SAFETY
#define JITFAIL(MSG) 0
#else
-#if defined __GNUC__ && (__GNUC__ == 3 ? __GNUC_MINOR__ >= 2 : __GNUC__ > 3)
+#if (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L) || (defined __GNUC__ && (__GNUC__ == 3 ? __GNUC_MINOR__ >= 2 : __GNUC__ > 3))
#define JITFAIL(MSG) jit_fail(MSG, __FILE__, __LINE__, __func__)
-#else
+#elif defined __GNUC__
#define JITFAIL(MSG) jit_fail(MSG, __FILE__, __LINE__, __FUNCTION__)
+#else
+#define JITFAIL(MSG) jit_fail(MSG, __FILE__, __LINE__, "(unknown)")
#endif
#endif
-#if defined __GNUC__ && (__GNUC__ == 3 ? __GNUC_MINOR__ >= 2 : __GNUC__ > 3)
+#if (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L) || (defined __GNUC__ && (__GNUC__ == 3 ? __GNUC_MINOR__ >= 2 : __GNUC__ > 3))
#define JITSORRY(MSG) jit_fail("sorry, unimplemented: " MSG, __FILE__, __LINE__, __func__)
-#else
+#elif defined __GNUC__
#define JITSORRY(MSG) jit_fail("sorry, unimplemented: " MSG, __FILE__, __LINE__, __FUNCTION__)
+#else
+#define JITSORRY(MSG) jit_fail("sorry, unimplemented: " MSG, __FILE__, __LINE__, "(unknown)")
#endif
#ifdef __GNUC__
-#define JIT_UNUSED __attribute__((unused))
+#define JIT_UNUSED __attribute__((__unused__))
#else
#define JIT_UNUSED
#endif
@@ -91,11 +95,13 @@ typedef unsigned long _ul;
#define _jit_UC(X) ((_uc )(X))
#define _jit_US(X) ((_us )(X))
#define _jit_UI(X) ((_ui )(X))
+#define _jit_SI(X) ((int )(X))
#define _jit_SL(X) ((_sl )(X))
#define _jit_UL(X) ((_ul )(X))
# define _PUC(X) ((_uc *)(X))
# define _PUS(X) ((_us *)(X))
# define _PUI(X) ((_ui *)(X))
+# define _PSI(X) ((int *)(X))
# define _PSL(X) ((_sl *)(X))
# define _PUL(X) ((_ul *)(X))
@@ -105,9 +111,9 @@ typedef unsigned long _ul;
#define _jit_L(L) _jit_UL(((*_jit.x.ul_pc++)= _jit_UL((L) )))
#define _jit_I_noinc(I) _jit_UL(((*_jit.x.ui_pc)= _jit_UI((I) )))
-#define _MASK(N) ((unsigned)((1<<(N)))-1)
-#define _siP(N,I) (!((((unsigned)(I))^(((unsigned)(I))<<1))&~_MASK(N)))
-#define _uiP(N,I) (!(((unsigned)(I))&~_MASK(N)))
+#define _MASK(N) ((unsigned long)((1L<<(N)))-1L)
+#define _siP(N,I) (!((((unsigned long)(I))^(((unsigned long)(I))<<1))&~_MASK(N)))
+#define _uiP(N,I) (!(((unsigned long)(I))&~_MASK(N)))
#define _suiP(N,I) (_siP(N,I) | _uiP(N,I))
#ifndef _ASM_SAFETY
@@ -125,8 +131,10 @@ typedef unsigned long _ul;
#define _s0P(I) ((I)==0)
#define _s8P(I) _siP(8,I)
#define _s16P(I) _siP(16,I)
+#define _s32P(I) _siP(32,I)
#define _u8P(I) _uiP(8,I)
#define _u16P(I) _uiP(16,I)
+#define _u32P(I) _uiP(32,I)
#define _su8(I) _ck_su(8,I)
#define _su16(I) _ck_su(16,I)
diff --git a/src/runtime/c/pgf/lightning/core-common.h b/src/runtime/c/pgf/lightning/core-common.h
index 9310ee23f..9e29250e1 100644
--- a/src/runtime/c/pgf/lightning/core-common.h
+++ b/src/runtime/c/pgf/lightning/core-common.h
@@ -45,12 +45,6 @@ typedef struct {
struct jit_local_state jitl;
} jit_state;
-#ifdef jit_init
-static jit_state _jit = jit_init ();
-#else
-static jit_state _jit;
-#endif
-
#define JIT_NOREG (-1)
#define JIT_R0 JIT_R(0)
#define JIT_R1 JIT_R(1)
diff --git a/src/runtime/c/pgf/lightning/i386/asm-32.h b/src/runtime/c/pgf/lightning/i386/asm-32.h
new file mode 100644
index 000000000..c5c0f802d
--- /dev/null
+++ b/src/runtime/c/pgf/lightning/i386/asm-32.h
@@ -0,0 +1,125 @@
+/******************************** -*- C -*- ****************************
+ *
+ * Run-time assembler for the i386
+ *
+ ***********************************************************************/
+
+
+/***********************************************************************
+ *
+ * Copyright 2003 Gwenole Beauchesne
+ * Copyright 2006 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ * License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with GNU lightning; see the file COPYING.LESSER; if not, write to the
+ * Free Software Foundation, 59 Temple Place - Suite 330, Boston,
+ * MA 02111-1307, USA.
+ *
+ ***********************************************************************/
+
+
+
+
+#ifndef __lightning_asm_h
+#define __lightning_asm_h
+
+#ifndef LIGHTNING_DEBUG
+
+/* OPCODE + i = immediate operand
+ * + r = register operand
+ * + m = memory operand (disp,base,index,scale)
+ * + sr/sm = a star preceding a register or memory
+ */
+
+#if !_ASM_SAFETY
+# define _r1(R) _rN(R)
+# define _r2(R) _rN(R)
+# define _r4(R) _rN(R)
+# define _r8(R) _rN(R)
+# define _rM(R) _rN(R)
+# define _rX(R) _rN(R)
+#else
+/* _r1() used to check only for _AL and _AH but there is
+ * usage of _CL and _DL when _*AX is already an operand */
+# define _r1(R) \
+ /* Valid 32 bit register? */ \
+ ((!((R) & ~0x77) \
+ /* 32, 16 or 8 bit register? */ \
+ && (((_rC(R) == 0x40 || _rC(R) == 0x30 || _rC(R) == 0x10) \
+ /* Yes. Register is _AL, _CL or _DL? */ \
+ && ( (_rN(R) | 0x10) == _AL \
+ || (_rN(R) | 0x10) == _CL \
+ || (_rN(R) | 0x10) == _DL)) \
+ /* No. Register is _AH? */ \
+ || ((_rC(R) == 0x20 && (_rN(R) | 0x20) == _AH)))) \
+ ? _rN(R) : JITFAIL("bad 8-bit register " #R))
+# define _r2(R) \
+ /* Valid 32 bit register? */ \
+ ((!((R) & ~0x77) \
+ /* 32, 16 or 8 bit register? */ \
+ && (_rC(R) == 0x40 || _rC(R) == 0x30 || _rC(R) == 0x10)) \
+ ? _rN(R) : JITFAIL("bad 16-bit register " #R))
+# define _r4(R) \
+ /* Valid 32 bit register? */ \
+ ((!((R) & ~0x77) \
+ /* 32, 16 or 8 bit register? */ \
+ && (_rC(R) == 0x40 || _rC(R) == 0x30 || _rC(R) == 0x10)) \
+ ? _rN(R) : JITFAIL("bad 32-bit register " #R))
+# define _r8(R) \
+ JITFAIL("bad 64-bit register " #R)
+# define _rM(R) \
+ /* Valid MMX register? */ \
+ ((!((R) & ~0x67) && _rC(R) == 0x60) \
+ ? _rN(R) : JITFAIL("bad MMX register " #R))
+# define _rX(R) \
+ /* Valid SSE register? */ \
+ ((!((R) & ~0x77) && _rC(R) == 0x70) \
+ ? _rN(R) : JITFAIL("bad SSE register " #R))
+#endif
+
+#define _rA(R) _r4(R)
+
+#define jit_check8(rs) ((_rN(rs) | _AL) == _AL)
+#define jit_reg8(rs) \
+ ((jit_reg16(rs) == _SI || jit_reg16(rs) == _DI) \
+ ? _AL : (_rN(rs) | _AL))
+#define jit_reg16(rs) (_rN(rs) | _AX)
+
+/* Use RIP-addressing in 64-bit mode, if possible */
+#define _r_X( R, D,B,I,S,O) (_r0P(I) ? (_r0P(B) ? _r_D (R,D ) : \
+ (_rsp12P(B) ? _r_DBIS(R,D,_ESP,_ESP,1) : \
+ _r_DB (R,D, B ))) : \
+ (_r0P(B) ? _r_4IS (R,D, I,S) : \
+ (!_rspP(I) ? _r_DBIS(R,D, B, I,S) : \
+ JITFAIL("illegal index register: %esp"))))
+#define _m32only(X) (X)
+#define _m64only(X) JITFAIL("invalid instruction in 32-bit mode")
+#define _m64(X) ((void)0)
+
+#define _AH 0x24
+#define _CH 0x25
+#define _DH 0x26
+#define _BH 0x27
+
+#define CALLsr(R) CALLLsr(R)
+#define JMPsr(R) JMPLsr(R)
+
+#define DECWr(RD) (_d16(), _Or (0x48,_r2(RD) ))
+#define DECLr(RD) _Or (0x48,_r4(RD) )
+#define INCWr(RD) (_d16(), _Or (0x40,_r2(RD) ))
+#define INCLr(RD) _Or (0x40,_r4(RD) )
+
+#endif
+#endif /* __lightning_asm_h */
diff --git a/src/runtime/c/pgf/lightning/i386/asm-64.h b/src/runtime/c/pgf/lightning/i386/asm-64.h
new file mode 100644
index 000000000..e1ce99bae
--- /dev/null
+++ b/src/runtime/c/pgf/lightning/i386/asm-64.h
@@ -0,0 +1,430 @@
+/******************************** -*- C -*- ****************************
+ *
+ * Run-time assembler for the x86-64
+ *
+ ***********************************************************************/
+
+
+/***********************************************************************
+ *
+ * Copyright 2003 Gwenole Beauchesne
+ * Copyright 2006 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ * License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with GNU lightning; see the file COPYING.LESSER; if not, write to the
+ * Free Software Foundation, 59 Temple Place - Suite 330, Boston,
+ * MA 02111-1307, USA.
+ *
+ ***********************************************************************/
+
+
+
+
+#ifndef __lightning_asm_h
+#define __lightning_asm_h
+
+#ifndef LIGHTNING_DEBUG
+
+/* OPCODE + i = immediate operand
+ * + r = register operand
+ * + m = memory operand (disp,base,index,scale)
+ * + sr/sm = a star preceding a register or memory
+ */
+
+#if !_ASM_SAFETY
+# define _r1(R) _rN(R)
+# define _r2(R) _rN(R)
+# define _r4(R) _rN(R)
+# define _r8(R) _rN(R)
+# define _rM(R) _rN(R)
+# define _rX(R) _rN(R)
+#else
+# define _r1(R) \
+ /* Valid 64 bit register? */ \
+ ((!((R) & ~0xff) \
+ /* 64, 32, 16 or 8 bit register? */ \
+ && (_rC(R) == 0x50 || _rC(R) == 0x40 \
+ || _rC(R) == 0x30 || _rC(R) == 0x10)) \
+ ? _rN(R) : JITFAIL("bad 8-bit register " #R))
+# define _r2(R) \
+ /* Valid 64 bit register? */ \
+ ((!((R) & ~0xff) \
+ /* 64, 32, 16 or 8 bit register? */ \
+ && (_rC(R) == 0x50 || _rC(R) == 0x40 \
+ || _rC(R) == 0x30 || _rC(R) == 0x10)) \
+ ? _rN(R) : JITFAIL("bad 16-bit register " #R))
+# define _r4(R) \
+ /* Valid 64 bit register? */ \
+ ((!((R) & ~0xff) \
+ /* 64, 32, 16 or 8 bit register? */ \
+ && (_rC(R) == 0x50 || _rC(R) == 0x40 \
+ || _rC(R) == 0x30 || _rC(R) == 0x10)) \
+ ? _rN(R) : JITFAIL("bad 32-bit register " #R))
+# define _r8(R) \
+ /* Valid 64 bit register? */ \
+ ((!((R) & ~0xff) \
+ /* 64, 32, 16 or 8 bit register? */ \
+ && (_rC(R) == 0x50 || _rC(R) == 0x40 \
+ || _rC(R) == 0x30 || _rC(R) == 0x10)) \
+ ? _rN(R) : JITFAIL("bad 64-bit register " #R))
+# define _rM(R) \
+ /* Valid MMX* register? */ \
+ ((!((R) & ~0x6f) && _rC(R) == 0x60) \
+ ? _rN(R) : JITFAIL("bad MMX register " #R))
+# define _rX(R) \
+ /* Valid SSE2 register? */ \
+ ((!((R) & ~0x7f) && _rC(R) == 0x70) \
+ ? _rN(R) : JITFAIL("bad SSE2 register " #R))
+#endif
+
+#define _rA(R) _r8(R)
+
+#define jit_check8(rs) 1
+#define jit_reg8(rs) (_rR(rs) | _AL)
+#define jit_reg16(rs) (_rR(rs) | _AX)
+
+/* Use RIP-addressing in 64-bit mode, if possible */
+#if 0
+#define _x86_RIP_addressing_possible(D,O) (X86_RIP_RELATIVE_ADDR && \
+ ((unsigned long)x86_get_target() + 4 + (O) - (D) <= 0xffffffff))
+
+#define _r_X( R, D,B,I,S,O) (_r0P(I) ? (_r0P(B) ? (!X86_TARGET_64BIT ? _r_D(R,D) : \
+ (_x86_RIP_addressing_possible(D, O) ? \
+ _r_D(R, (D) - ((unsigned long)x86_get_target() + 4 + (O))) : \
+ _r_DSIB(R,D))) : \
+ _r_DSIB(R,D )) : \
+ (_rIP(B) ? _r_D (R,D ) : \
+ (_rsp12P(B) ? _r_DBIS(R,D,_RSP,_RSP,1) : \
+ _r_DB (R,D, B )))) : \
+ (_r0P(B) ? _r_4IS (R,D, I,S) : \
+ (!_rspP(I) ? _r_DBIS(R,D, B, I,S) : \
+ JITFAIL("illegal index register: %esp"))))
+#else
+#define _r_X( R, D,B,I,S,O) (_r0P(I) ? (_r0P(B) ? _r_DSIB(R,D ) : \
+ (_rIP(B) ? _r_D (R,D ) : \
+ (_rsp12P(B) ? _r_DBIS(R,D,_RSP,_RSP,1) : \
+ _r_DB (R,D, B )))) : \
+ (_r0P(B) ? _r_4IS (R,D, I,S) : \
+ (!_rspP(I) ? _r_DBIS(R,D, B, I,S) : \
+ JITFAIL("illegal index register: %esp"))))
+#endif
+
+
+#define _m32only(X) (JITFAIL("invalid instruction in 64-bit mode"))
+#define _m64only(X) (X)
+#define _m64(X) (X)
+
+#define _SPL 0x14
+#define _BPL 0x15
+#define _SIL 0x16
+#define _DIL 0x17
+#define _R8B 0x18
+#define _R9B 0x19
+#define _R10B 0x1A
+#define _R11B 0x1B
+#define _R12B 0x1C
+#define _R13B 0x1D
+#define _R14B 0x1E
+#define _R15B 0x1F
+
+#define _R8W 0x38
+#define _R9W 0x39
+#define _R10W 0x3A
+#define _R11W 0x3B
+#define _R12W 0x3C
+#define _R13W 0x3D
+#define _R14W 0x3E
+#define _R15W 0x3F
+#define _R8D 0x48
+#define _R9D 0x49
+#define _R10D 0x4A
+#define _R11D 0x4B
+#define _R12D 0x4C
+#define _R13D 0x4D
+#define _R14D 0x4E
+#define _R15D 0x4F
+
+#define _RAX 0x50
+#define _RCX 0x51
+#define _RDX 0x52
+#define _RBX 0x53
+#define _RSP 0x54
+#define _RBP 0x55
+#define _RSI 0x56
+#define _RDI 0x57
+#define _R8 0x58
+#define _R9 0x59
+#define _R10 0x5A
+#define _R11 0x5B
+#define _R12 0x5C
+#define _R13 0x5D
+#define _R14 0x5E
+#define _R15 0x5F
+#define _RIP -2
+
+#define _r1e8lP(R) ((int)(R) >= _SPL && (int)(R) <= _DIL)
+
+#define DECWr(RD) (_d16(), _REXLrr(0, RD), _O_Mrm (0xff ,_b11,_b001 ,_r2(RD) ))
+#define DECLr(RD) (_REXLrr(0, RD), _O_Mrm (0xff ,_b11,_b001 ,_r4(RD) ))
+#define INCWr(RD) (_d16(), _REXLrr(0, RD), _O_Mrm (0xff ,_b11,_b000 ,_r2(RD) ))
+#define INCLr(RD) (_REXLrr(0, RD), _O_Mrm (0xff ,_b11,_b000 ,_r4(RD) ))
+
+#define ADCQrr(RS, RD) _ALUQrr(X86_ADC, RS, RD)
+#define ADCQmr(MD, MB, MI, MS, RD) _ALUQmr(X86_ADC, MD, MB, MI, MS, RD)
+#define ADCQrm(RS, MD, MB, MI, MS) _ALUQrm(X86_ADC, RS, MD, MB, MI, MS)
+#define ADCQir(IM, RD) _ALUQir(X86_ADC, IM, RD)
+#define ADCQim(IM, MD, MB, MI, MS) _ALUQim(X86_ADC, IM, MD, MB, MI, MS)
+
+#define ADDQrr(RS, RD) _ALUQrr(X86_ADD, RS, RD)
+#define ADDQmr(MD, MB, MI, MS, RD) _ALUQmr(X86_ADD, MD, MB, MI, MS, RD)
+#define ADDQrm(RS, MD, MB, MI, MS) _ALUQrm(X86_ADD, RS, MD, MB, MI, MS)
+#define ADDQir(IM, RD) _ALUQir(X86_ADD, IM, RD)
+#define ADDQim(IM, MD, MB, MI, MS) _ALUQim(X86_ADD, IM, MD, MB, MI, MS)
+
+#define ANDQrr(RS, RD) _ALUQrr(X86_AND, RS, RD)
+#define ANDQmr(MD, MB, MI, MS, RD) _ALUQmr(X86_AND, MD, MB, MI, MS, RD)
+#define ANDQrm(RS, MD, MB, MI, MS) _ALUQrm(X86_AND, RS, MD, MB, MI, MS)
+#define ANDQir(IM, RD) _ALUQir(X86_AND, IM, RD)
+#define ANDQim(IM, MD, MB, MI, MS) _ALUQim(X86_AND, IM, MD, MB, MI, MS)
+
+#define CMPQrr(RS, RD) _ALUQrr(X86_CMP, RS, RD)
+#define CMPQmr(MD, MB, MI, MS, RD) _ALUQmr(X86_CMP, MD, MB, MI, MS, RD)
+#define CMPQrm(RS, MD, MB, MI, MS) _ALUQrm(X86_CMP, RS, MD, MB, MI, MS)
+#define CMPQir(IM, RD) _ALUQir(X86_CMP, IM, RD)
+#define CMPQim(IM, MD, MB, MI, MS) _ALUQim(X86_CMP, IM, MD, MB, MI, MS)
+
+#define ORQrr(RS, RD) _ALUQrr(X86_OR, RS, RD)
+#define ORQmr(MD, MB, MI, MS, RD) _ALUQmr(X86_OR, MD, MB, MI, MS, RD)
+#define ORQrm(RS, MD, MB, MI, MS) _ALUQrm(X86_OR, RS, MD, MB, MI, MS)
+#define ORQir(IM, RD) _ALUQir(X86_OR, IM, RD)
+#define ORQim(IM, MD, MB, MI, MS) _ALUQim(X86_OR, IM, MD, MB, MI, MS)
+
+#define SBBQrr(RS, RD) _ALUQrr(X86_SBB, RS, RD)
+#define SBBQmr(MD, MB, MI, MS, RD) _ALUQmr(X86_SBB, MD, MB, MI, MS, RD)
+#define SBBQrm(RS, MD, MB, MI, MS) _ALUQrm(X86_SBB, RS, MD, MB, MI, MS)
+#define SBBQir(IM, RD) _ALUQir(X86_SBB, IM, RD)
+#define SBBQim(IM, MD, MB, MI, MS) _ALUQim(X86_SBB, IM, MD, MB, MI, MS)
+
+#define SUBQrr(RS, RD) _ALUQrr(X86_SUB, RS, RD)
+#define SUBQmr(MD, MB, MI, MS, RD) _ALUQmr(X86_SUB, MD, MB, MI, MS, RD)
+#define SUBQrm(RS, MD, MB, MI, MS) _ALUQrm(X86_SUB, RS, MD, MB, MI, MS)
+#define SUBQir(IM, RD) _ALUQir(X86_SUB, IM, RD)
+#define SUBQim(IM, MD, MB, MI, MS) _ALUQim(X86_SUB, IM, MD, MB, MI, MS)
+
+#define XORQrr(RS, RD) _ALUQrr(X86_XOR, RS, RD)
+#define XORQmr(MD, MB, MI, MS, RD) _ALUQmr(X86_XOR, MD, MB, MI, MS, RD)
+#define XORQrm(RS, MD, MB, MI, MS) _ALUQrm(X86_XOR, RS, MD, MB, MI, MS)
+#define XORQir(IM, RD) _ALUQir(X86_XOR, IM, RD)
+#define XORQim(IM, MD, MB, MI, MS) _ALUQim(X86_XOR, IM, MD, MB, MI, MS)
+
+#define ROLQir(IM, RD) _ROTSHIQir(X86_ROL, IM, RD)
+#define ROLQim(IM, MD, MB, MI, MS) _ROTSHIQim(X86_ROL, IM, MD, MB, MI, MS)
+#define ROLQrr(RS, RD) _ROTSHIQrr(X86_ROL, RS, RD)
+#define ROLQrm(RS, MD, MB, MI, MS) _ROTSHIQrm(X86_ROL, RS, MD, MB, MI, MS)
+
+#define RORQir(IM, RD) _ROTSHIQir(X86_ROR, IM, RD)
+#define RORQim(IM, MD, MB, MI, MS) _ROTSHIQim(X86_ROR, IM, MD, MB, MI, MS)
+#define RORQrr(RS, RD) _ROTSHIQrr(X86_ROR, RS, RD)
+#define RORQrm(RS, MD, MB, MI, MS) _ROTSHIQrm(X86_ROR, RS, MD, MB, MI, MS)
+
+#define RCLQir(IM, RD) _ROTSHIQir(X86_RCL, IM, RD)
+#define RCLQim(IM, MD, MB, MI, MS) _ROTSHIQim(X86_RCL, IM, MD, MB, MI, MS)
+#define RCLQrr(RS, RD) _ROTSHIQrr(X86_RCL, RS, RD)
+#define RCLQrm(RS, MD, MB, MI, MS) _ROTSHIQrm(X86_RCL, RS, MD, MB, MI, MS)
+
+#define RCRQir(IM, RD) _ROTSHIQir(X86_RCR, IM, RD)
+#define RCRQim(IM, MD, MB, MI, MS) _ROTSHIQim(X86_RCR, IM, MD, MB, MI, MS)
+#define RCRQrr(RS, RD) _ROTSHIQrr(X86_RCR, RS, RD)
+#define RCRQrm(RS, MD, MB, MI, MS) _ROTSHIQrm(X86_RCR, RS, MD, MB, MI, MS)
+
+#define SHLQir(IM, RD) _ROTSHIQir(X86_SHL, IM, RD)
+#define SHLQim(IM, MD, MB, MI, MS) _ROTSHIQim(X86_SHL, IM, MD, MB, MI, MS)
+#define SHLQrr(RS, RD) _ROTSHIQrr(X86_SHL, RS, RD)
+#define SHLQrm(RS, MD, MB, MI, MS) _ROTSHIQrm(X86_SHL, RS, MD, MB, MI, MS)
+
+#define SHRQir(IM, RD) _ROTSHIQir(X86_SHR, IM, RD)
+#define SHRQim(IM, MD, MB, MI, MS) _ROTSHIQim(X86_SHR, IM, MD, MB, MI, MS)
+#define SHRQrr(RS, RD) _ROTSHIQrr(X86_SHR, RS, RD)
+#define SHRQrm(RS, MD, MB, MI, MS) _ROTSHIQrm(X86_SHR, RS, MD, MB, MI, MS)
+
+#define SALQir SHLQir
+#define SALQim SHLQim
+#define SALQrr SHLQrr
+#define SALQrm SHLQrm
+
+#define SARQir(IM, RD) _ROTSHIQir(X86_SAR, IM, RD)
+#define SARQim(IM, MD, MB, MI, MS) _ROTSHIQim(X86_SAR, IM, MD, MB, MI, MS)
+#define SARQrr(RS, RD) _ROTSHIQrr(X86_SAR, RS, RD)
+#define SARQrm(RS, MD, MB, MI, MS) _ROTSHIQrm(X86_SAR, RS, MD, MB, MI, MS)
+
+#define BTQir(IM, RD) _BTQir(X86_BT, IM, RD)
+#define BTQim(IM, MD, MB, MI, MS) _BTQim(X86_BT, IM, MD, MB, MI, MS)
+#define BTQrr(RS, RD) _BTQrr(X86_BT, RS, RD)
+#define BTQrm(RS, MD, MB, MI, MS) _BTQrm(X86_BT, RS, MD, MB, MI, MS)
+
+#define BTCQir(IM, RD) _BTQir(X86_BTC, IM, RD)
+#define BTCQim(IM, MD, MB, MI, MS) _BTQim(X86_BTC, IM, MD, MB, MI, MS)
+#define BTCQrr(RS, RD) _BTQrr(X86_BTC, RS, RD)
+#define BTCQrm(RS, MD, MB, MI, MS) _BTQrm(X86_BTC, RS, MD, MB, MI, MS)
+
+#define BTRQir(IM, RD) _BTQir(X86_BTR, IM, RD)
+#define BTRQim(IM, MD, MB, MI, MS) _BTQim(X86_BTR, IM, MD, MB, MI, MS)
+#define BTRQrr(RS, RD) _BTQrr(X86_BTR, RS, RD)
+#define BTRQrm(RS, MD, MB, MI, MS) _BTQrm(X86_BTR, RS, MD, MB, MI, MS)
+
+#define BTSQir(IM, RD) _BTQir(X86_BTS, IM, RD)
+#define BTSQim(IM, MD, MB, MI, MS) _BTQim(X86_BTS, IM, MD, MB, MI, MS)
+#define BTSQrr(RS, RD) _BTQrr(X86_BTS, RS, RD)
+#define BTSQrm(RS, MD, MB, MI, MS) _BTQrm(X86_BTS, RS, MD, MB, MI, MS)
+
+#define LEAQmr(MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _O_r_X (0x8d ,_r8(RD) ,MD,MB,MI,MS ))
+
+#define MOVQrr(RS, RD) (_REXQrr(RS, RD), _O_Mrm (0x89 ,_b11,_r8(RS),_r8(RD) ))
+#define MOVQmr(MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _O_r_X (0x8b ,_r8(RD) ,MD,MB,MI,MS ))
+#define MOVQrm(RS, MD, MB, MI, MS) (_REXQrm(RS, MB, MI), _O_r_X (0x89 ,_r8(RS) ,MD,MB,MI,MS ))
+#define MOVQir(IM, R) (_REXQrr(0, R), _Or_Q (0xb8,_r8(R) ,IM ))
+#define MOVQim(IM, MD, MB, MI, MS) (_REXQrm(0, MB, MI), _O_X_L (0xc7 ,MD,MB,MI,MS ,IM ))
+
+#define NOTQr(RS) _UNARYQr(X86_NOT, RS)
+#define NOTQm(MD, MB, MI, MS) _UNARYQm(X86_NOT, MD, MB, MI, MS)
+
+#define NEGQr(RS) _UNARYQr(X86_NEG, RS)
+#define NEGQm(MD, MB, MI, MS) _UNARYQm(X86_NEG, MD, MB, MI, MS)
+
+#define MULQr(RS) _UNARYQr(X86_MUL, RS)
+#define MULQm(MD, MB, MI, MS) _UNARYQm(X86_MUL, MD, MB, MI, MS)
+
+#define IMULQr(RS) _UNARYQr(X86_IMUL, RS)
+#define IMULQm(MD, MB, MI, MS) _UNARYQm(X86_IMUL, MD, MB, MI, MS)
+
+#define DIVQr(RS) _UNARYQr(X86_DIV, RS)
+#define DIVQm(MD, MB, MI, MS) _UNARYQm(X86_DIV, MD, MB, MI, MS)
+
+#define IDIVQr(RS) _UNARYQr(X86_IDIV, RS)
+#define IDIVQm(MD, MB, MI, MS) _UNARYQm(X86_IDIV, MD, MB, MI, MS)
+
+#define IMULQir(IM, RD) IMULQirr(IM, RD, RD)
+#define IMULQrr(RS, RD) (_REXQrr(RD, RS), _OO_Mrm (0x0faf ,_b11,_r8(RD),_r8(RS) ))
+#define IMULQmr(MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _OO_r_X (0x0faf ,_r8(RD) ,MD,MB,MI,MS ))
+#define IMULQirr(IM,RS,RD) (_REXQrr(RD, RS), _Os_Mrm_sL (0x69 ,_b11,_r8(RS),_r8(RD) ,IM ))
+#define IMULQimr(IM,MD,MB,MI,MS,RD) (_REXQmr(MB, MI, RD), _Os_r_X_sL (0x69 ,_r8(RD) ,MD,MB,MI,MS ,IM ))
+
+#define CALLQsr(R) (_REXQrr(0, R), _O_Mrm (0xff ,_b11,_b010,_r8(R) ))
+#define JMPQsr(R) (_REXQrr(0, R), _O_Mrm (0xff ,_b11,_b100,_r8(R) ))
+
+#define CMOVQrr(CC,RS,RD) (_REXQrr(RD, RS), _OO_Mrm (0x0f40|(CC) ,_b11,_r8(RD),_r8(RS) ))
+#define CMOVQmr(CC,MD,MB,MI,MS,RD) (_REXQmr(MB, MI, RD), _OO_r_X (0x0f40|(CC) ,_r8(RD) ,MD,MB,MI,MS ))
+
+#define POPQr(RD) _m64only((_REXQr(RD), _Or (0x58,_r8(RD) )))
+#define POPQm(MD, MB, MI, MS) _m64only((_REXQm(MB, MI), _O_r_X (0x8f ,_b000 ,MD,MB,MI,MS )))
+
+#define PUSHQr(RS) _m64only((_REXQr(RS), _Or (0x50,_r8(RS) )))
+#define PUSHQm(MD, MB, MI, MS) _m64only((_REXQm(MB, MI), _O_r_X (0xff ,_b110 ,MD,MB,MI,MS )))
+#define PUSHQi(IM) _m64only( _Os_sL (0x68 ,IM ))
+
+#define TESTQrr(RS, RD) (_REXQrr(RS, RD), _O_Mrm (0x85 ,_b11,_r8(RS),_r8(RD) ))
+#define TESTQrm(RS, MD, MB, MI, MS) (_REXQrm(RS, MB, MI), _O_r_X (0x85 ,_r8(RS) ,MD,MB,MI,MS ))
+#define TESTQir(IM, RD) \
+ /* Immediate fits in 32 bits? */ \
+ (_s32P((long)(IM)) \
+ /* Yes. Immediate does not fit in 8 bits and reg is %rax? */ \
+ ? (!_s8P(IM) && (RD) == _RAX \
+ ? (_REXQrr(0, RD), _O_L(0xa9, IM)) \
+ : (_REXQrr(0, RD), _O_Mrm_L(0xf7, _b11, _b000, _r8(RD), IM))) \
+ /* No. Need immediate in a register */ \
+ : (MOVQir(IM, JIT_REXTMP), TESTQrr(JIT_REXTMP, RD)))
+#define TESTQim(IM, MD, MB, MI, MS) (_REXQrm(0, MB, MI), _O_r_X_L (0xf7 ,_b000 ,MD,MB,MI,MS ,IM ))
+
+#define CMPXCHGQrr(RS, RD) (_REXQrr(RS, RD), _OO_Mrm (0x0fb1 ,_b11,_r8(RS),_r8(RD) ))
+#define CMPXCHGQrm(RS, MD, MB, MI, MS) (_REXQrm(RS, MB, MI), _OO_r_X (0x0fb1 ,_r8(RS) ,MD,MB,MI,MS ))
+
+#define XADDQrr(RS, RD) (_REXQrr(RS, RD), _OO_Mrm (0x0fc1 ,_b11,_r8(RS),_r8(RD) ))
+#define XADDQrm(RS, MD, MB, MI, MS) (_REXQrm(RS, MB, MI), _OO_r_X (0x0fc1 ,_r8(RS) ,MD,MB,MI,MS ))
+
+#define XCHGQrr(RS, RD) (_REXQrr(RS, RD), _O_Mrm (0x87 ,_b11,_r8(RS),_r8(RD) ))
+#define XCHGQrm(RS, MD, MB, MI, MS) (_REXQrm(RS, MB, MI), _O_r_X (0x87 ,_r8(RS) ,MD,MB,MI,MS ))
+
+#define DECQm(MD, MB, MI, MS) (_REXQrm(0, MB, MI), _O_r_X (0xff ,_b001 ,MD,MB,MI,MS ))
+#define DECQr(RD) (_REXQrr(0, RD), _O_Mrm (0xff ,_b11,_b001 ,_r8(RD) ))
+#define INCQm(MD, MB, MI, MS) (_REXQrm(0, MB, MI), _O_r_X (0xff ,_b000 ,MD,MB,MI,MS ))
+#define INCQr(RD) (_REXQrr(0, RD), _O_Mrm (0xff ,_b11,_b000 ,_r8(RD) ))
+
+#define BSFQrr(RS, RD) (_REXQrr(RD, RS), _OO_Mrm (0x0fbc ,_b11,_r8(RD),_r8(RS) ))
+#define BSFQmr(MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _OO_r_X (0x0fbc ,_r8(RD) ,MD,MB,MI,MS ))
+
+#define BSRQrr(RS, RD) (_REXQrr(RD, RS), _OO_Mrm (0x0fbd ,_b11,_r8(RD),_r8(RS) ))
+#define BSRQmr(MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _OO_r_X (0x0fbd ,_r8(RD) ,MD,MB,MI,MS ))
+
+#define MOVSBQrr(RS, RD) (_REXQrr(RD, RS), _OO_Mrm (0x0fbe ,_b11,_r8(RD),_r1(RS) ))
+#define MOVSBQmr(MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _OO_r_X (0x0fbe ,_r8(RD) ,MD,MB,MI,MS ))
+
+#define MOVZBQrr(RS, RD) (_REXQrr(RD, RS), _OO_Mrm (0x0fb6 ,_b11,_r8(RD),_r1(RS) ))
+#define MOVZBQmr(MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _OO_r_X (0x0fb6 ,_r8(RD) ,MD,MB,MI,MS ))
+
+#define MOVSWQrr(RS, RD) (_REXQrr(RD, RS), _OO_Mrm (0x0fbf ,_b11,_r8(RD),_r2(RS) ))
+#define MOVSWQmr(MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _OO_r_X (0x0fbf ,_r8(RD) ,MD,MB,MI,MS ))
+
+#define MOVZWQrr(RS, RD) (_REXQrr(RD, RS), _OO_Mrm (0x0fb7 ,_b11,_r8(RD),_r2(RS) ))
+#define MOVZWQmr(MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _OO_r_X (0x0fb7 ,_r8(RD) ,MD,MB,MI,MS ))
+
+#define MOVSLQrr(RS, RD) _m64only((_REXQrr(RD, RS), _O_Mrm (0x63 ,_b11,_r8(RD),_r4(RS) )))
+#define MOVSLQmr(MD, MB, MI, MS, RD) _m64only((_REXQmr(MB, MI, RD), _O_r_X (0x63 ,_r8(RD) ,MD,MB,MI,MS )))
+
+#define BSWAPQr(R) (_REXQrr(0, R), _OOr (0x0fc8,_r8(R) ))
+
+
+
+#define __SSEQrr(OP,RS,RSA,RD,RDA) (_REXQrr(RD, RS), _OO_Mrm (0x0f00|(OP) ,_b11,RDA(RD),RSA(RS) ))
+#define __SSEQmr(OP,MD,MB,MI,MS,RD,RDA) (_REXQmr(MB, MI, RD), _OO_r_X (0x0f00|(OP) ,RDA(RD) ,MD,MB,MI,MS ))
+#define __SSEQrm(OP,RS,RSA,MD,MB,MI,MS) (_REXQrm(RS, MB, MI), _OO_r_X (0x0f00|(OP) ,RSA(RS) ,MD,MB,MI,MS ))
+#define __SSEQ1rm(OP,RS,RSA,MD,MB,MI,MS) (_REXQrm(RS, MB, MI), _OO_r_X (0x0f01|(OP) ,RSA(RS) ,MD,MB,MI,MS ))
+
+#define _SSEQrr(PX,OP,RS,RSA,RD,RDA) (_jit_B(PX), __SSEQrr(OP, RS, RSA, RD, RDA))
+#define _SSEQmr(PX,OP,MD,MB,MI,MS,RD,RDA) (_jit_B(PX), __SSEQmr(OP, MD, MB, MI, MS, RD, RDA))
+#define _SSEQrm(PX,OP,RS,RSA,MD,MB,MI,MS) (_jit_B(PX), __SSEQrm(OP, RS, RSA, MD, MB, MI, MS))
+#define _SSEQ1rm(PX,OP,RS,RSA,MD,MB,MI,MS) (_jit_B(PX), __SSEQ1rm(OP, RS, RSA, MD, MB, MI, MS))
+
+#define CVTTSS2SIQrr(RS, RD) _SSEQrr(0xf3, X86_SSE_CVTTSI, RS,_rX, RD,_r8)
+#define CVTTSS2SIQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf3, X86_SSE_CVTTSI, MD, MB, MI, MS, RD,_r8)
+#define CVTTSD2SIQrr(RS, RD) _SSEQrr(0xf2, X86_SSE_CVTTSI, RS,_rX, RD,_r8)
+#define CVTTSD2SIQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf2, X86_SSE_CVTTSI, MD, MB, MI, MS, RD,_r8)
+
+#define CVTSS2SIQrr(RS, RD) _SSEQrr(0xf3, X86_SSE_CVTSI, RS,_rX, RD,_r8)
+#define CVTSS2SIQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf3, X86_SSE_CVTSI, MD, MB, MI, MS, RD,_r8)
+#define CVTSD2SIQrr(RS, RD) _SSEQrr(0xf2, X86_SSE_CVTSI, RS,_rX, RD,_r8)
+#define CVTSD2SIQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf2, X86_SSE_CVTSI, MD, MB, MI, MS, RD,_r8)
+
+#define CVTSI2SSQrr(RS, RD) _SSEQrr(0xf3, X86_SSE_CVTIS, RS,_r8, RD,_rX)
+#define CVTSI2SSQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf3, X86_SSE_CVTIS, MD, MB, MI, MS, RD,_rX)
+#define CVTSI2SDQrr(RS, RD) _SSEQrr(0xf2, X86_SSE_CVTIS, RS,_r8, RD,_rX)
+#define CVTSI2SDQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf2, X86_SSE_CVTIS, MD, MB, MI, MS, RD,_rX)
+
+#define MOVDQXrr(RS, RD) _SSEQrr(0x66, 0x6e, RS,_r8, RD,_rX)
+#define MOVDQXmr(MD, MB, MI, MS, RD) _SSEQmr(0x66, 0x6e, MD, MB, MI, MS, RD,_rX)
+
+#define MOVDXQrr(RS, RD) _SSEQrr(0x66, 0x7e, RS,_rX, RD,_r8)
+#define MOVDXQrm(RS, MD, MB, MI, MS) _SSEQrm(0x66, 0x7e, RS,_rX, MD, MB, MI, MS)
+#define MOVDQMrr(RS, RD) __SSEQrr( 0x6e, RS,_r8, RD,_rM)
+#define MOVDQMmr(MD, MB, MI, MS, RD) __SSEQmr( 0x6e, MD, MB, MI, MS, RD,_rM)
+#define MOVDMQrr(RS, RD) __SSEQrr( 0x7e, RS,_rM, RD,_r8)
+#define MOVDMQrm(RS, MD, MB, MI, MS) __SSEQrm( 0x7e, RS,_rM, MD, MB, MI, MS)
+
+
+
+#define CALLsr(R) CALLQsr(R)
+#define JMPsr(R) JMPQsr(R)
+
+#endif
+#endif /* __lightning_asm_h */
diff --git a/src/runtime/c/pgf/lightning/i386/asm.h b/src/runtime/c/pgf/lightning/i386/asm.h
index fcc364c05..3d1a1a19b 100644
--- a/src/runtime/c/pgf/lightning/i386/asm.h
+++ b/src/runtime/c/pgf/lightning/i386/asm.h
@@ -8,12 +8,14 @@
/***********************************************************************
*
* Copyright 1999, 2000, 2001, 2002 Ian Piumarta
+ * Copyright 2003 Gwenole Beauchesne
+ * Copyright 2006 Free Software Foundation
*
* This file is part of GNU lightning.
*
* GNU lightning is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
- * by the Free Software Foundation; either version 2.1, or (at your option)
+ * by the Free Software Foundation; either version 3, or (at your option)
* any later version.
*
* GNU lightning is distributed in the hope that it will be useful, but
@@ -23,16 +25,16 @@
*
* You should have received a copy of the GNU Lesser General Public License
* along with GNU lightning; see the file COPYING.LESSER; if not, write to the
- * Free Software Foundation, 59 Temple Place - Suite 330, Boston,
- * MA 02111-1307, USA.
+ * Free Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
*
***********************************************************************/
-#ifndef __lightning_asm_h
-#define __lightning_asm_h
+#ifndef __lightning_asm_i386_h
+#define __lightning_asm_i386_h
/* OPCODE + i = immediate operand
* + r = register operand
@@ -40,7 +42,6 @@
* + sr/sm = a star preceding a register or memory
*/
-
typedef _uc jit_insn;
#ifndef LIGHTNING_DEBUG
@@ -60,24 +61,21 @@ typedef _uc jit_insn;
/*** REGISTERS ***/ /* [size,,number] */
+#define _NOREG 0
#define _AL 0x10
#define _CL 0x11
#define _DL 0x12
#define _BL 0x13
-#define _AH 0x14
-#define _CH 0x15
-#define _DH 0x16
-#define _BH 0x17
-
-#define _AX 0x20
-#define _CX 0x21
-#define _DX 0x22
-#define _BX 0x23
-#define _SP 0x24
-#define _BP 0x25
-#define _SI 0x26
-#define _DI 0x27
+
+#define _AX 0x30
+#define _CX 0x31
+#define _DX 0x32
+#define _BX 0x33
+#define _SP 0x34
+#define _BP 0x35
+#define _SI 0x36
+#define _DI 0x37
#define _EAX 0x40
#define _ECX 0x41
@@ -88,6 +86,32 @@ typedef _uc jit_insn;
#define _ESI 0x46
#define _EDI 0x47
+#define _MM0 0x60
+#define _MM1 0x61
+#define _MM2 0x62
+#define _MM3 0x63
+#define _MM4 0x64
+#define _MM5 0x65
+#define _MM6 0x66
+#define _MM7 0x67
+
+#define _XMM0 0x70
+#define _XMM1 0x71
+#define _XMM2 0x72
+#define _XMM3 0x73
+#define _XMM4 0x74
+#define _XMM5 0x75
+#define _XMM6 0x76
+#define _XMM7 0x77
+#define _XMM8 0x78
+#define _XMM9 0x79
+#define _XMM10 0x7a
+#define _XMM11 0x7b
+#define _XMM12 0x7c
+#define _XMM13 0x7d
+#define _XMM14 0x7e
+#define _XMM15 0x7f
+
#define _ST0 0
#define _ST1 1
#define _ST2 2
@@ -97,19 +121,18 @@ typedef _uc jit_insn;
#define _ST6 6
#define _ST7 7
-#define _rS(R) ((R)>>4)
-#define _rN(R) ((R)&0x7)
-#define _r0P(R) ((R)==0)
+#define _r0P(R) ((int)(R) == (int)_NOREG)
+#define _rIP(R) ((int)(R) == (int)_RIP)
-#ifndef _ASM_SAFETY
-#define _r1(R) _rN(R)
-#define _r2(R) _rN(R)
-#define _r4(R) _rN(R)
-#else
-#define _r1(R) ((_rS(R)==1) ? _rN(R) : JITFAIL( "8-bit register required"))
-#define _r2(R) ((_rS(R)==2) ? _rN(R) : JITFAIL("16-bit register required"))
-#define _r4(R) ((_rS(R)==4) ? _rN(R) : JITFAIL("32-bit register required"))
-#endif
+#define _rC(R) ((R) & 0xf0)
+#define _rR(R) ((R) & 0x0f)
+#define _rN(R) ((R) & 0x07)
+#define _rXP(R) ((R) > 0 && _rR(R) > 7)
+
+#define _rbpP(R) (_rR(R) == _rR(_EBP))
+#define _rspP(R) (_rR(R) == _rR(_ESP))
+#define _rbp13P(R) (_rN(R) == _rN(_EBP))
+#define _rsp12P(R) (_rN(R) == _rN(_ESP))
/*** ASSEMBLER ***/
@@ -117,7 +140,7 @@ typedef _uc jit_insn;
#define _CKD8(D) _ck_d(8, ((_uc) _OFF4(D)) )
#define _D8(D) (_jit_B(0), ((*(_PUC(_jit.x.pc)-1))= _CKD8(D)))
-#define _D32(D) (_jit_L(0), ((*(_PUL(_jit.x.pc)-1))= _OFF4(D)))
+#define _D32(D) (_jit_I(0), ((*(_PUI(_jit.x.pc)-1))= _OFF4(D)))
#ifndef _ASM_SAFETY
# define _M(M) (M)
@@ -147,27 +170,23 @@ typedef _uc jit_insn;
/* memory subformats - urgh! */
-#define _r_D( R, D ) (_Mrm(_b00,_rN(R),_b101 ) ,_jit_L((long)(D)))
-#define _r_0B( R, B ) (_Mrm(_b00,_rN(R),_r4(B)) )
-#define _r_0BIS(R, B,I,S) (_Mrm(_b00,_rN(R),_b100 ),_SIB(_SCL(S),_r4(I),_r4(B)) )
-#define _r_1B( R, D,B ) (_Mrm(_b01,_rN(R),_r4(B)) ,_jit_B((long)(D)))
-#define _r_1BIS(R, D,B,I,S) (_Mrm(_b01,_rN(R),_b100 ),_SIB(_SCL(S),_r4(I),_r4(B)),_jit_B((long)(D)))
-#define _r_4B( R, D,B ) (_Mrm(_b10,_rN(R),_r4(B)) ,_jit_L((long)(D)))
-#define _r_4IS( R, D,I,S) (_Mrm(_b00,_rN(R),_b100 ),_SIB(_SCL(S),_r4(I),_b101 ),_jit_L((long)(D)))
-#define _r_4BIS(R, D,B,I,S) (_Mrm(_b10,_rN(R),_b100 ),_SIB(_SCL(S),_r4(I),_r4(B)),_jit_L((long)(D)))
+/* _r_D() is RIP addressing mode if X86_TARGET_64BIT, use _r_DSIB() instead */
+#define _r_D( R, D ) (_Mrm(_b00,_rN(R),_b101 ) ,_jit_I((long)(D)))
+#define _r_DSIB(R, D ) (_Mrm(_b00,_rN(R),_b100 ),_SIB(_SCL(1),_b100 ,_b101 ),_jit_I((long)(D)))
+#define _r_0B( R, B ) (_Mrm(_b00,_rN(R),_rA(B)) )
+#define _r_0BIS(R, B,I,S) (_Mrm(_b00,_rN(R),_b100 ),_SIB(_SCL(S),_rA(I),_rA(B)) )
+#define _r_1B( R, D,B ) (_Mrm(_b01,_rN(R),_rA(B)) ,_jit_B((long)(D)))
+#define _r_1BIS(R, D,B,I,S) (_Mrm(_b01,_rN(R),_b100 ),_SIB(_SCL(S),_rA(I),_rA(B)),_jit_B((long)(D)))
+#define _r_4B( R, D,B ) (_Mrm(_b10,_rN(R),_rA(B)) ,_jit_I((long)(D)))
+#define _r_4IS( R, D,I,S) (_Mrm(_b00,_rN(R),_b100 ),_SIB(_SCL(S),_rA(I),_b101 ),_jit_I((long)(D)))
+#define _r_4BIS(R, D,B,I,S) (_Mrm(_b10,_rN(R),_b100 ),_SIB(_SCL(S),_rA(I),_rA(B)),_jit_I((long)(D)))
-#define _r_DB( R, D,B ) ((_s0P(D) && (B != _EBP) ? _r_0B (R, B ) : (_s8P(D) ? _r_1B( R,D,B ) : _r_4B( R,D,B ))))
-#define _r_DBIS(R, D,B,I,S) ((_s0P(D) ? _r_0BIS(R, B,I,S) : (_s8P(D) ? _r_1BIS(R,D,B,I,S) : _r_4BIS(R,D,B,I,S))))
-
-#define _r_X( R, D,B,I,S) (_r0P(I) ? (_r0P(B) ? _r_D (R,D ) : \
- (_ESP==(B) ? _r_DBIS(R,D,_ESP,_ESP,1) : \
- _r_DB (R,D, B ))) : \
- (_r0P(B) ? _r_4IS (R,D, I,S) : \
- (((I)!=_ESP) ? _r_DBIS(R,D, B, I,S) : \
- JITFAIL("illegal index register: %esp"))))
+#define _r_DB( R, D,B ) ((_s0P(D) && (!_rbp13P(B)) ? _r_0B (R, B ) : (_s8P(D) ? _r_1B( R,D,B ) : _r_4B( R,D,B ))))
+#define _r_DBIS(R, D,B,I,S) ((_s0P(D) && (!_rbp13P(B)) ? _r_0BIS(R, B,I,S) : (_s8P(D) ? _r_1BIS(R,D,B,I,S) : _r_4BIS(R,D,B,I,S))))
-/* instruction formats */
+
+/* --- Instruction formats ------------------------------------------------- */
/* _format Opcd ModR/M dN(rB,rI,Sc) imm... */
@@ -178,8 +197,12 @@ typedef _uc jit_insn;
#define _OOr( OP,R ) ( _jit_B((OP)>>8), _jit_B( (OP)|_r(R)) )
#define _Os( OP,B ) ( _s8P(B) ? _jit_B(((OP)|_b10)) : _jit_B(OP) )
#define _sW( W ) ( _s8P(W) ? _jit_B(W):_jit_W(W) )
-#define _sL( L ) ( _s8P(L) ? _jit_B(L):_jit_L(L) )
+#define _sL( L ) ( _s8P(L) ? _jit_B(L):_jit_I(L) )
+#define _sWO( W ) ( _s8P(W) ? 1 : 2 )
+#define _sLO( L ) ( _s8P(L) ? 1 : 4 )
+#define _O_B( OP ,B ) ( _O ( OP ) ,_jit_B(B) )
#define _O_W( OP ,W ) ( _O ( OP ) ,_jit_W(W) )
+#define _O_L( OP ,L ) ( _O ( OP ) ,_jit_I(L) )
#define _O_D8( OP ,D ) ( _O ( OP ) ,_D8(D) )
#define _O_D32( OP ,D ) ( _O ( OP ) ,_D32(D) )
#define _OO_D32( OP ,D ) ( _OO ( OP ) ,_D32(D) )
@@ -188,802 +211,1036 @@ typedef _uc jit_insn;
#define _O_W_B( OP ,W,B) ( _O ( OP ) ,_jit_W(W),_jit_B(B))
#define _Or_B( OP,R ,B ) ( _Or ( OP,R) ,_jit_B(B) )
#define _Or_W( OP,R ,W ) ( _Or ( OP,R) ,_jit_W(W) )
-#define _Or_L( OP,R ,L ) ( _Or ( OP,R) ,_jit_L(L) )
+#define _Or_L( OP,R ,L ) ( _Or ( OP,R) ,_jit_I(L) )
+#define _Or_Q( OP,R ,Q ) ( _Or ( OP,R) ,_jit_L(Q) )
#define _O_Mrm( OP ,MO,R,M ) ( _O ( OP ),_Mrm(MO,R,M ) )
#define _OO_Mrm( OP ,MO,R,M ) ( _OO ( OP ),_Mrm(MO,R,M ) )
#define _O_Mrm_B( OP ,MO,R,M ,B ) ( _O ( OP ),_Mrm(MO,R,M ) ,_jit_B(B) )
#define _O_Mrm_W( OP ,MO,R,M ,W ) ( _O ( OP ),_Mrm(MO,R,M ) ,_jit_W(W) )
-#define _O_Mrm_L( OP ,MO,R,M ,L ) ( _O ( OP ),_Mrm(MO,R,M ) ,_jit_L(L) )
+#define _O_Mrm_L( OP ,MO,R,M ,L ) ( _O ( OP ),_Mrm(MO,R,M ) ,_jit_I(L) )
#define _OO_Mrm_B( OP ,MO,R,M ,B ) ( _OO ( OP ),_Mrm(MO,R,M ) ,_jit_B(B) )
#define _Os_Mrm_sW(OP ,MO,R,M ,W ) ( _Os ( OP,W),_Mrm(MO,R,M ),_sW(W) )
#define _Os_Mrm_sL(OP ,MO,R,M ,L ) ( _Os ( OP,L),_Mrm(MO,R,M ),_sL(L) )
-#define _O_r_X( OP ,R ,MD,MB,MI,MS ) ( _O ( OP ),_r_X( R ,MD,MB,MI,MS) )
-#define _OO_r_X( OP ,R ,MD,MB,MI,MS ) ( _OO ( OP ),_r_X( R ,MD,MB,MI,MS) )
-#define _O_r_X_B( OP ,R ,MD,MB,MI,MS,B ) ( _O ( OP ),_r_X( R ,MD,MB,MI,MS) ,_jit_B(B) )
-#define _O_r_X_W( OP ,R ,MD,MB,MI,MS,W ) ( _O ( OP ),_r_X( R ,MD,MB,MI,MS) ,_jit_W(W) )
-#define _O_r_X_L( OP ,R ,MD,MB,MI,MS,L ) ( _O ( OP ),_r_X( R ,MD,MB,MI,MS) ,_jit_L(L) )
-#define _OO_r_X_B( OP ,R ,MD,MB,MI,MS,B ) ( _OO ( OP ),_r_X( R ,MD,MB,MI,MS) ,_jit_B(B) )
-#define _Os_r_X_sW(OP ,R ,MD,MB,MI,MS,W ) ( _Os ( OP,W),_r_X( R ,MD,MB,MI,MS),_sW(W) )
-#define _Os_r_X_sL(OP ,R ,MD,MB,MI,MS,L ) ( _Os ( OP,L),_r_X( R ,MD,MB,MI,MS),_sL(L) )
+#define _O_r_X( OP ,R ,MD,MB,MI,MS ) ( _O ( OP ),_r_X( R ,MD,MB,MI,MS,0) )
+#define _OO_r_X( OP ,R ,MD,MB,MI,MS ) ( _OO ( OP ),_r_X( R ,MD,MB,MI,MS,0) )
+#define _O_r_X_B( OP ,R ,MD,MB,MI,MS,B ) ( _O ( OP ),_r_X( R ,MD,MB,MI,MS,1) ,_jit_B(B) )
+#define _O_r_X_W( OP ,R ,MD,MB,MI,MS,W ) ( _O ( OP ),_r_X( R ,MD,MB,MI,MS,2) ,_jit_W(W) )
+#define _O_r_X_L( OP ,R ,MD,MB,MI,MS,L ) ( _O ( OP ),_r_X( R ,MD,MB,MI,MS,4) ,_jit_I(L) )
+#define _OO_r_X_B( OP ,R ,MD,MB,MI,MS,B ) ( _OO ( OP ),_r_X( R ,MD,MB,MI,MS,1) ,_jit_B(B) )
+#define _Os_r_X_sW(OP ,R ,MD,MB,MI,MS,W ) ( _Os ( OP,W),_r_X( R ,MD,MB,MI,MS,_sWO(W)),_sW(W))
+#define _Os_r_X_sL(OP ,R ,MD,MB,MI,MS,L ) ( _Os ( OP,L),_r_X( R ,MD,MB,MI,MS,_sLO(L)),_sL(L))
#define _O_X_B( OP ,MD,MB,MI,MS,B ) ( _O_r_X_B( OP ,0 ,MD,MB,MI,MS ,B) )
#define _O_X_W( OP ,MD,MB,MI,MS,W ) ( _O_r_X_W( OP ,0 ,MD,MB,MI,MS ,W) )
#define _O_X_L( OP ,MD,MB,MI,MS,L ) ( _O_r_X_L( OP ,0 ,MD,MB,MI,MS ,L) )
-#define _wO( OP ) (_d16(), _O( OP ) )
-#define _wOr( OP,R ) (_d16(), _Or( OP,R ) )
-#define _wOr_W( OP,R ,W ) (_d16(), _Or_W( OP,R ,W) )
-#define _wOs_sW( OP ,W ) (_d16(), _Os_sW( OP ,W) )
-#define _wO_Mrm( OP ,MO,R,M ) (_d16(), _O_Mrm( OP ,MO,R,M ) )
-#define _wOO_Mrm( OP ,MO,R,M ) (_d16(),_OO_Mrm( OP ,MO,R,M ) )
-#define _wO_Mrm_B( OP ,MO,R,M ,B ) (_d16(), _O_Mrm_B( OP ,MO,R,M ,B) )
-#define _wOO_Mrm_B( OP ,MO,R,M ,B ) (_d16(),_OO_Mrm_B( OP ,MO,R,M ,B) )
-#define _wO_Mrm_W( OP ,MO,R,M ,W ) (_d16(), _O_Mrm_W( OP ,MO,R,M ,W) )
-#define _wOs_Mrm_sW(OP ,MO,R,M ,W ) (_d16(), _Os_Mrm_sW(OP ,MO,R,M ,W) )
-#define _wO_X_W( OP ,MD,MB,MI,MS,W ) (_d16(), _O_X_W( OP ,MD,MB,MI,MS ,W) )
-#define _wO_r_X( OP ,R ,MD,MB,MI,MS ) (_d16(), _O_r_X( OP ,R ,MD,MB,MI,MS ) )
-#define _wOO_r_X( OP ,R ,MD,MB,MI,MS ) (_d16(),_OO_r_X( OP ,R ,MD,MB,MI,MS ) )
-#define _wO_r_X_B( OP ,R ,MD,MB,MI,MS,B ) (_d16(), _O_r_X_B( OP ,R ,MD,MB,MI,MS ,B) )
-#define _wOO_r_X_B( OP ,R ,MD,MB,MI,MS,B ) (_d16(),_OO_r_X_B( OP ,R ,MD,MB,MI,MS ,B) )
-#define _wO_r_X_W( OP ,R ,MD,MB,MI,MS,W ) (_d16(), _O_r_X_W( OP ,R ,MD,MB,MI,MS ,W) )
-#define _wOs_r_X_sW(OP ,R ,MD,MB,MI,MS,W ) (_d16(), _Os_r_X_sW(OP ,R ,MD,MB,MI,MS ,W) )
-
-/* +++ fully-qualified intrinsic instructions */
-
-/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
-
-#define ADCBrr(RS, RD) _O_Mrm (0x10 ,_b11,_r1(RS),_r1(RD) )
-#define ADCBmr(MD, MB, MI, MS, RD) _O_r_X (0x12 ,_r1(RD) ,MD,MB,MI,MS )
-#define ADCBrm(RS, MD, MB, MI, MS) _O_r_X (0x10 ,_r1(RS) ,MD,MB,MI,MS )
-#define ADCBir(IM, RD) _O_Mrm_B (0x80 ,_b11,_b010 ,_r1(RD) ,_su8(IM))
-#define ADCBim(IM, MD, MB, MI, MS) _O_r_X_B (0x80 ,_b010 ,MD,MB,MI,MS ,_su8(IM))
-
-#define ADCWrr(RS, RD) _wO_Mrm (0x11 ,_b11,_r2(RS),_r2(RD) )
-#define ADCWmr(MD, MB, MI, MS, RD) _wO_r_X (0x13 ,_r2(RD) ,MD,MB,MI,MS )
-#define ADCWrm(RS, MD, MB, MI, MS) _wO_r_X (0x11 ,_r2(RS) ,MD,MB,MI,MS )
-#define ADCWir(IM, RD) _wOs_Mrm_sW (0x81 ,_b11,_b010 ,_r2(RD) ,_su16(IM))
-#define ADCWim(IM, MD, MB, MI, MS) _wOs_r_X_sW (0x81 ,_b010 ,MD,MB,MI,MS ,_su16(IM))
-
-#define ADCLrr(RS, RD) _O_Mrm (0x11 ,_b11,_r4(RS),_r4(RD) )
-#define ADCLmr(MD, MB, MI, MS, RD) _O_r_X (0x13 ,_r4(RD) ,MD,MB,MI,MS )
-#define ADCLrm(RS, MD, MB, MI, MS) _O_r_X (0x11 ,_r4(RS) ,MD,MB,MI,MS )
-#define ADCLir(IM, RD) _Os_Mrm_sL (0x81 ,_b11,_b010 ,_r4(RD) ,IM )
-#define ADCLim(IM, MD, MB, MI, MS) _Os_r_X_sL (0x81 ,_b010 ,MD,MB,MI,MS ,IM )
-
-
-#define ADDBrr(RS, RD) _O_Mrm (0x00 ,_b11,_r1(RS),_r1(RD) )
-#define ADDBmr(MD, MB, MI, MS, RD) _O_r_X (0x02 ,_r1(RD) ,MD,MB,MI,MS )
-#define ADDBrm(RS, MD, MB, MI, MS) _O_r_X (0x00 ,_r1(RS) ,MD,MB,MI,MS )
-#define ADDBir(IM, RD) _O_Mrm_B (0x80 ,_b11,_b000 ,_r1(RD) ,_su8(IM))
-#define ADDBim(IM, MD, MB, MI, MS) _O_r_X_B (0x80 ,_b000 ,MD,MB,MI,MS ,_su8(IM))
-
-#define ADDWrr(RS, RD) _wO_Mrm (0x01 ,_b11,_r2(RS),_r2(RD) )
-#define ADDWmr(MD, MB, MI, MS, RD) _wO_r_X (0x03 ,_r2(RD) ,MD,MB,MI,MS )
-#define ADDWrm(RS, MD, MB, MI, MS) _wO_r_X (0x01 ,_r2(RS) ,MD,MB,MI,MS )
-#define ADDWir(IM, RD) _wOs_Mrm_sW (0x81 ,_b11,_b000 ,_r2(RD) ,_su16(IM))
-#define ADDWim(IM, MD, MB, MI, MS) _wOs_r_X_sW (0x81 ,_b000 ,MD,MB,MI,MS ,_su16(IM))
-
-#define ADDLrr(RS, RD) _O_Mrm (0x01 ,_b11,_r4(RS),_r4(RD) )
-#define ADDLmr(MD, MB, MI, MS, RD) _O_r_X (0x03 ,_r4(RD) ,MD,MB,MI,MS )
-#define ADDLrm(RS, MD, MB, MI, MS) _O_r_X (0x01 ,_r4(RS) ,MD,MB,MI,MS )
-#define ADDLir(IM, RD) _Os_Mrm_sL (0x81 ,_b11,_b000 ,_r4(RD) ,IM )
-#define ADDLim(IM, MD, MB, MI, MS) _Os_r_X_sL (0x81 ,_b000 ,MD,MB,MI,MS ,IM )
-
-
-#define ANDBrr(RS, RD) _O_Mrm (0x20 ,_b11,_r1(RS),_r1(RD) )
-#define ANDBmr(MD, MB, MI, MS, RD) _O_r_X (0x22 ,_r1(RD) ,MD,MB,MI,MS )
-#define ANDBrm(RS, MD, MB, MI, MS) _O_r_X (0x20 ,_r1(RS) ,MD,MB,MI,MS )
-#define ANDBir(IM, RD) _O_Mrm_B (0x80 ,_b11,_b100 ,_r1(RD) ,_su8(IM))
-#define ANDBim(IM, MD, MB, MI, MS) _O_r_X_B (0x80 ,_b100 ,MD,MB,MI,MS ,_su8(IM))
-
-#define ANDWrr(RS, RD) _wO_Mrm (0x21 ,_b11,_r2(RS),_r2(RD) )
-#define ANDWmr(MD, MB, MI, MS, RD) _wO_r_X (0x23 ,_r2(RD) ,MD,MB,MI,MS )
-#define ANDWrm(RS, MD, MB, MI, MS) _wO_r_X (0x21 ,_r2(RS) ,MD,MB,MI,MS )
-#define ANDWir(IM, RD) _wOs_Mrm_sW (0x81 ,_b11,_b100 ,_r2(RD) ,_su16(IM))
-#define ANDWim(IM, MD, MB, MI, MS) _wOs_r_X_sW (0x81 ,_b100 ,MD,MB,MI,MS ,_su16(IM))
-#define ANDLrr(RS, RD) _O_Mrm (0x21 ,_b11,_r4(RS),_r4(RD) )
-#define ANDLmr(MD, MB, MI, MS, RD) _O_r_X (0x23 ,_r4(RD) ,MD,MB,MI,MS )
-#define ANDLrm(RS, MD, MB, MI, MS) _O_r_X (0x21 ,_r4(RS) ,MD,MB,MI,MS )
-#define ANDLir(IM, RD) _Os_Mrm_sL (0x81 ,_b11,_b100 ,_r4(RD) ,IM )
-#define ANDLim(IM, MD, MB, MI, MS) _Os_r_X_sL (0x81 ,_b100 ,MD,MB,MI,MS ,IM )
+/* --- REX prefixes -------------------------------------------------------- */
-#define BSWAPLr(R) _OOr (0x0fc8,_r4(R) )
+#define _BIT(X) (!!(X))
+#define _d64(W,R,X,B) (_jit_B(0x40|(W)<<3|(R)<<2|(X)<<1|(B)))
-#define BTWir(IM,RD) _wOO_Mrm_B (0x0fba ,_b11,_b100 ,_r2(RD) ,_u8(IM))
-#define BTWim(IM,MD,MB,MI,MS) _wOO_r_X_B (0x0fba ,_b100 ,MD,MB,MI,MS ,_u8(IM))
-#define BTWrr(RS,RD) _wOO_Mrm (0x0fa3 ,_b11,_r2(RS),_r2(RD) )
-#define BTWrm(RS,MD,MB,MI,MS) _wOO_r_X (0x0fa3 ,_r2(RS) ,MD,MB,MI,MS )
+#define __REXwrxb(L,W,R,X,B) ((W|R|X|B) || (L) ? (void)_d64(W,R,X,B) : ((void)0))
+#define __REXwrx_(L,W,R,X,MR) (__REXwrxb(L,W,R,X,_BIT(_rIP(MR)?0:_rXP(MR))))
+#define __REXw_x_(L,W,R,X,MR) (__REXwrx_(L,W,_BIT(_rXP(R)),X,MR))
+#define __REX_reg(RR) (__REXwrxb(0,0,0,00,_BIT(_rXP(RR))))
+#define __REX_mem(MB,MI) (__REXwrxb(0,0,0,_BIT(_rXP(MI)),_BIT(_rXP(MB))))
-#define BTLir(IM,RD) _OO_Mrm_B (0x0fba ,_b11,_b100 ,_r4(RD) ,_u8(IM))
-#define BTLim(IM,MD,MB,MI,MS) _OO_r_X_B (0x0fba ,_b100 ,MD,MB,MI,MS ,_u8(IM))
-#define BTLrr(RS,RD) _OO_Mrm (0x0fa3 ,_b11,_r4(RS),_r4(RD) )
-#define BTLrm(RS,MD,MB,MI,MS) _OO_r_X (0x0fa3 ,_r4(RS) ,MD,MB,MI,MS )
+/* FIXME: can't mix new (SPL,BPL,SIL,DIL) with (AH,BH,CH,DH) */
+#define _REXBrr(RR,MR) _m64(__REXw_x_(_r1e8lP(RR)||_r1e8lP(MR),0,RR,0,MR))
+#define _REXBmr(MB,MI,RD) _m64(__REXw_x_(_r1e8lP(RD)||_r1e8lP(MB),0,RD,_BIT(_rXP(MI)),MB))
+#define _REXBrm(RS,MB,MI) _REXBmr(MB,MI,RS)
+#define _REXBLrr(RR,MR) _m64(__REXw_x_(_r1e8lP(MR),0,RR,0,MR))
+#define _REXLrr(RR,MR) _m64(__REXw_x_(0,0,RR,0,MR))
+#define _REXLmr(MB,MI,RD) _m64(__REXw_x_(0,0,RD,_BIT(_rXP(MI)),MB))
+#define _REXLrm(RS,MB,MI) _REXLmr(MB,MI,RS)
+#define _REXLr(RR) _m64(__REX_reg(RR))
+#define _REXLm(MB,MI) _m64(__REX_mem(MB,MI))
-#define BTCWir(IM,RD) _wOO_Mrm_B (0x0fba ,_b11,_b111 ,_r2(RD) ,_u8(IM))
-#define BTCWim(IM,MD,MB,MI,MS) _wOO_r_X_B (0x0fba ,_b111 ,MD,MB,MI,MS ,_u8(IM))
-#define BTCWrr(RS,RD) _wOO_Mrm (0x0fbb ,_b11,_r2(RS),_r2(RD) )
-#define BTCWrm(RS,MD,MB,MI,MS) _wOO_r_X (0x0fbb ,_r2(RS) ,MD,MB,MI,MS )
+#define _REXQrr(RR,MR) _m64only(__REXw_x_(0,1,RR,0,MR))
+#define _REXQmr(MB,MI,RD) _m64only(__REXw_x_(0,1,RD,_BIT(_rXP(MI)),MB))
+#define _REXQrm(RS,MB,MI) _REXQmr(MB,MI,RS)
+#define _REXQr(RR) _m64only(__REX_reg(RR))
+#define _REXQm(MB,MI) _m64only(__REX_mem(MB,MI))
-#define BTCLir(IM,RD) _OO_Mrm_B (0x0fba ,_b11,_b111 ,_r4(RD) ,_u8(IM))
-#define BTCLim(IM,MD,MB,MI,MS) _OO_r_X_B (0x0fba ,_b111 ,MD,MB,MI,MS ,_u8(IM))
-#define BTCLrr(RS,RD) _OO_Mrm (0x0fbb ,_b11,_r4(RS),_r4(RD) )
-#define BTCLrm(RS,MD,MB,MI,MS) _OO_r_X (0x0fbb ,_r4(RS) ,MD,MB,MI,MS )
+/* ========================================================================= */
+/* --- Fully-qualified intrinsic instructions ------------------------------ */
+/* ========================================================================= */
-#define BTRWir(IM,RD) _wOO_Mrm_B (0x0fba ,_b11,_b110 ,_r2(RD) ,_u8(IM))
-#define BTRWim(IM,MD,MB,MI,MS) _wOO_r_X_B (0x0fba ,_b110 ,MD,MB,MI,MS ,_u8(IM))
-#define BTRWrr(RS,RD) _wOO_Mrm (0x0fb3 ,_b11,_r2(RS),_r2(RD) )
-#define BTRWrm(RS,MD,MB,MI,MS) _wOO_r_X (0x0fb3 ,_r2(RS) ,MD,MB,MI,MS )
+/* OPCODE + i = immediate operand
+ * + r = register operand
+ * + m = memory operand (disp,base,index,scale)
+ * + sr/sm = a star preceding a register or memory
+ * + 0 = top of stack register (for FPU instructions)
+ *
+ * NOTE in x86-64 mode: a memory operand with only a valid
+ * displacement value will lead to the expect absolute mode. If
+ * RIP addressing is necessary, X86_RIP shall be used as the base
+ * register argument.
+ */
-#define BTRLir(IM,RD) _OO_Mrm_B (0x0fba ,_b11,_b110 ,_r4(RD) ,_u8(IM))
-#define BTRLim(IM,MD,MB,MI,MS) _OO_r_X_B (0x0fba ,_b110 ,MD,MB,MI,MS ,_u8(IM))
-#define BTRLrr(RS,RD) _OO_Mrm (0x0fb3 ,_b11,_r4(RS),_r4(RD) )
-#define BTRLrm(RS,MD,MB,MI,MS) _OO_r_X (0x0fb3 ,_r4(RS) ,MD,MB,MI,MS )
+/* --- ALU instructions ---------------------------------------------------- */
+
+enum {
+ X86_ADD = 0,
+ X86_OR = 1,
+ X86_ADC = 2,
+ X86_SBB = 3,
+ X86_AND = 4,
+ X86_SUB = 5,
+ X86_XOR = 6,
+ X86_CMP = 7,
+};
+
+/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
+
+#define _ALUBrr(OP,RS, RD) (_REXBrr(RS, RD), _O_Mrm (((OP) << 3) ,_b11,_r1(RS),_r1(RD) ))
+#define _ALUBmr(OP, MD, MB, MI, MS, RD) (_REXBmr(MB, MI, RD), _O_r_X (((OP) << 3) + 2,_r1(RD) ,MD,MB,MI,MS ))
+#define _ALUBrm(OP, RS, MD, MB, MI, MS) (_REXBrm(RS, MB, MI), _O_r_X (((OP) << 3) , ,_r1(RS) ,MD,MB,MI,MS ))
+#define _ALUBir(OP, IM, RD) ((RD) == _AL ? \
+ (_REXBrr(0, RD), _O_B (((OP) << 3) + 4 ,_su8(IM))) : \
+ (_REXBrr(0, RD), _O_Mrm_B (0x80 ,_b11,OP ,_r1(RD) ,_su8(IM))) )
+#define _ALUBim(OP, IM, MD, MB, MI, MS) (_REXBrm(0, MB, MI), _O_r_X_B (0x80 ,OP ,MD,MB,MI,MS ,_su8(IM)))
+
+#define _ALUWrr(OP, RS, RD) (_d16(), _REXLrr(RS, RD), _O_Mrm (((OP) << 3) + 1,_b11,_r2(RS),_r2(RD) ))
+#define _ALUWmr(OP, MD, MB, MI, MS, RD) (_d16(), _REXLmr(MB, MI, RD), _O_r_X (((OP) << 3) + 3 ,_r2(RD) ,MD,MB,MI,MS ))
+#define _ALUWrm(OP, RS, MD, MB, MI, MS) (_d16(), _REXLrm(RS, MB, MI), _O_r_X (((OP) << 3) + 1 ,_r2(RS) ,MD,MB,MI,MS ))
+#define _ALUWir(OP, IM, RD) ((RD) == _AX ? \
+ (_d16(), _REXLrr(0, RD), _O_W (((OP) << 3) + 5 ,_su16(IM))) : \
+ (_d16(), _REXLrr(0, RD), _Os_Mrm_sW (0x81 ,_b11,OP ,_r2(RD) ,_su16(IM))) )
+#define _ALUWim(OP, IM, MD, MB, MI, MS) (_d16(), _REXLrm(0, MB, MI), _Os_r_X_sW (0x81 ,OP ,MD,MB,MI,MS ,_su16(IM)))
+
+#define _ALULrr(OP, RS, RD) (_REXLrr(RS, RD), _O_Mrm (((OP) << 3) + 1,_b11,_r4(RS),_r4(RD) ))
+#define _ALULmr(OP, MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _O_r_X (((OP) << 3) + 3 ,_r4(RD) ,MD,MB,MI,MS ))
+#define _ALULrm(OP, RS, MD, MB, MI, MS) (_REXLrm(RS, MB, MI), _O_r_X (((OP) << 3) + 1 ,_r4(RS) ,MD,MB,MI,MS ))
+#define _ALULir(OP, IM, RD) (!_s8P(IM) && (RD) == _EAX ? \
+ (_REXLrr(0, RD), _O_L (((OP) << 3) + 5 ,IM )) : \
+ (_REXLrr(0, RD), _Os_Mrm_sL (0x81 ,_b11,OP ,_r4(RD) ,IM )) )
+#define _ALULim(OP, IM, MD, MB, MI, MS) (_REXLrm(0, MB, MI), _Os_r_X_sL (0x81 ,OP ,MD,MB,MI,MS ,IM ))
+
+#define _ALUQrr(OP, RS, RD) (_REXQrr(RS, RD), _O_Mrm (((OP) << 3) + 1,_b11,_r8(RS),_r8(RD) ))
+#define _ALUQmr(OP, MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _O_r_X (((OP) << 3) + 3 ,_r8(RD) ,MD,MB,MI,MS ))
+#define _ALUQrm(OP, RS, MD, MB, MI, MS) (_REXQrm(RS, MB, MI), _O_r_X (((OP) << 3) + 1 ,_r8(RS) ,MD,MB,MI,MS ))
+#define _ALUQir(OP, IM, RD) \
+ /* Immediate fits in 32 bits? */ \
+ (_s32P((long)(IM)) \
+ /* Yes. Immediate does not fit in 8 bits and reg is %rax? */ \
+ ? (!_s8P(IM) && (RD) == _RAX \
+ ? (_REXQrr(0, RD), _O_L(((OP) << 3) + 5, IM)) \
+ : (_REXQrr(0, RD), _Os_Mrm_sL(0x81, _b11, OP, _r8(RD), IM))) \
+ /* No. Need immediate in a register */ \
+ : (MOVQir(IM, JIT_REXTMP), _ALUQrr(OP, JIT_REXTMP, RD)))
+#define _ALUQim(OP, IM, MD, MB, MI, MS) (_REXQrm(0, MB, MI), _Os_r_X_sL (0x81 ,OP ,MD,MB,MI,MS ,IM ))
+
+#define ADCBrr(RS, RD) _ALUBrr(X86_ADC, RS, RD)
+#define ADCBmr(MD, MB, MI, MS, RD) _ALUBmr(X86_ADC, MD, MB, MI, MS, RD)
+#define ADCBrm(RS, MD, MB, MI, MS) _ALUBrm(X86_ADC, RS, MD, MB, MI, MS)
+#define ADCBir(IM, RD) _ALUBir(X86_ADC, IM, RD)
+#define ADCBim(IM, MD, MB, MI, MS) _ALUBim(X86_ADC, IM, MD, MB, MI, MS)
+
+#define ADCWrr(RS, RD) _ALUWrr(X86_ADC, RS, RD)
+#define ADCWmr(MD, MB, MI, MS, RD) _ALUWmr(X86_ADC, MD, MB, MI, MS, RD)
+#define ADCWrm(RS, MD, MB, MI, MS) _ALUWrm(X86_ADC, RS, MD, MB, MI, MS)
+#define ADCWir(IM, RD) _ALUWir(X86_ADC, IM, RD)
+#define ADCWim(IM, MD, MB, MI, MS) _ALUWim(X86_ADC, IM, MD, MB, MI, MS)
+
+#define ADCLrr(RS, RD) _ALULrr(X86_ADC, RS, RD)
+#define ADCLmr(MD, MB, MI, MS, RD) _ALULmr(X86_ADC, MD, MB, MI, MS, RD)
+#define ADCLrm(RS, MD, MB, MI, MS) _ALULrm(X86_ADC, RS, MD, MB, MI, MS)
+#define ADCLir(IM, RD) _ALULir(X86_ADC, IM, RD)
+#define ADCLim(IM, MD, MB, MI, MS) _ALULim(X86_ADC, IM, MD, MB, MI, MS)
+
+
+#define ADDBrr(RS, RD) _ALUBrr(X86_ADD, RS, RD)
+#define ADDBmr(MD, MB, MI, MS, RD) _ALUBmr(X86_ADD, MD, MB, MI, MS, RD)
+#define ADDBrm(RS, MD, MB, MI, MS) _ALUBrm(X86_ADD, RS, MD, MB, MI, MS)
+#define ADDBir(IM, RD) _ALUBir(X86_ADD, IM, RD)
+#define ADDBim(IM, MD, MB, MI, MS) _ALUBim(X86_ADD, IM, MD, MB, MI, MS)
+
+#define ADDWrr(RS, RD) _ALUWrr(X86_ADD, RS, RD)
+#define ADDWmr(MD, MB, MI, MS, RD) _ALUWmr(X86_ADD, MD, MB, MI, MS, RD)
+#define ADDWrm(RS, MD, MB, MI, MS) _ALUWrm(X86_ADD, RS, MD, MB, MI, MS)
+#define ADDWir(IM, RD) _ALUWir(X86_ADD, IM, RD)
+#define ADDWim(IM, MD, MB, MI, MS) _ALUWim(X86_ADD, IM, MD, MB, MI, MS)
+
+#define ADDLrr(RS, RD) _ALULrr(X86_ADD, RS, RD)
+#define ADDLmr(MD, MB, MI, MS, RD) _ALULmr(X86_ADD, MD, MB, MI, MS, RD)
+#define ADDLrm(RS, MD, MB, MI, MS) _ALULrm(X86_ADD, RS, MD, MB, MI, MS)
+#define ADDLir(IM, RD) _ALULir(X86_ADD, IM, RD)
+#define ADDLim(IM, MD, MB, MI, MS) _ALULim(X86_ADD, IM, MD, MB, MI, MS)
+
+
+#define ANDBrr(RS, RD) _ALUBrr(X86_AND, RS, RD)
+#define ANDBmr(MD, MB, MI, MS, RD) _ALUBmr(X86_AND, MD, MB, MI, MS, RD)
+#define ANDBrm(RS, MD, MB, MI, MS) _ALUBrm(X86_AND, RS, MD, MB, MI, MS)
+#define ANDBir(IM, RD) _ALUBir(X86_AND, IM, RD)
+#define ANDBim(IM, MD, MB, MI, MS) _ALUBim(X86_AND, IM, MD, MB, MI, MS)
+
+#define ANDWrr(RS, RD) _ALUWrr(X86_AND, RS, RD)
+#define ANDWmr(MD, MB, MI, MS, RD) _ALUWmr(X86_AND, MD, MB, MI, MS, RD)
+#define ANDWrm(RS, MD, MB, MI, MS) _ALUWrm(X86_AND, RS, MD, MB, MI, MS)
+#define ANDWir(IM, RD) _ALUWir(X86_AND, IM, RD)
+#define ANDWim(IM, MD, MB, MI, MS) _ALUWim(X86_AND, IM, MD, MB, MI, MS)
+
+#define ANDLrr(RS, RD) _ALULrr(X86_AND, RS, RD)
+#define ANDLmr(MD, MB, MI, MS, RD) _ALULmr(X86_AND, MD, MB, MI, MS, RD)
+#define ANDLrm(RS, MD, MB, MI, MS) _ALULrm(X86_AND, RS, MD, MB, MI, MS)
+#define ANDLir(IM, RD) _ALULir(X86_AND, IM, RD)
+#define ANDLim(IM, MD, MB, MI, MS) _ALULim(X86_AND, IM, MD, MB, MI, MS)
+
+
+#define CMPBrr(RS, RD) _ALUBrr(X86_CMP, RS, RD)
+#define CMPBmr(MD, MB, MI, MS, RD) _ALUBmr(X86_CMP, MD, MB, MI, MS, RD)
+#define CMPBrm(RS, MD, MB, MI, MS) _ALUBrm(X86_CMP, RS, MD, MB, MI, MS)
+#define CMPBir(IM, RD) _ALUBir(X86_CMP, IM, RD)
+#define CMPBim(IM, MD, MB, MI, MS) _ALUBim(X86_CMP, IM, MD, MB, MI, MS)
+
+#define CMPWrr(RS, RD) _ALUWrr(X86_CMP, RS, RD)
+#define CMPWmr(MD, MB, MI, MS, RD) _ALUWmr(X86_CMP, MD, MB, MI, MS, RD)
+#define CMPWrm(RS, MD, MB, MI, MS) _ALUWrm(X86_CMP, RS, MD, MB, MI, MS)
+#define CMPWir(IM, RD) _ALUWir(X86_CMP, IM, RD)
+#define CMPWim(IM, MD, MB, MI, MS) _ALUWim(X86_CMP, IM, MD, MB, MI, MS)
+
+#define CMPLrr(RS, RD) _ALULrr(X86_CMP, RS, RD)
+#define CMPLmr(MD, MB, MI, MS, RD) _ALULmr(X86_CMP, MD, MB, MI, MS, RD)
+#define CMPLrm(RS, MD, MB, MI, MS) _ALULrm(X86_CMP, RS, MD, MB, MI, MS)
+#define CMPLir(IM, RD) _ALULir(X86_CMP, IM, RD)
+#define CMPLim(IM, MD, MB, MI, MS) _ALULim(X86_CMP, IM, MD, MB, MI, MS)
+
+
+#define ORBrr(RS, RD) _ALUBrr(X86_OR, RS, RD)
+#define ORBmr(MD, MB, MI, MS, RD) _ALUBmr(X86_OR, MD, MB, MI, MS, RD)
+#define ORBrm(RS, MD, MB, MI, MS) _ALUBrm(X86_OR, RS, MD, MB, MI, MS)
+#define ORBir(IM, RD) _ALUBir(X86_OR, IM, RD)
+#define ORBim(IM, MD, MB, MI, MS) _ALUBim(X86_OR, IM, MD, MB, MI, MS)
+
+#define ORWrr(RS, RD) _ALUWrr(X86_OR, RS, RD)
+#define ORWmr(MD, MB, MI, MS, RD) _ALUWmr(X86_OR, MD, MB, MI, MS, RD)
+#define ORWrm(RS, MD, MB, MI, MS) _ALUWrm(X86_OR, RS, MD, MB, MI, MS)
+#define ORWir(IM, RD) _ALUWir(X86_OR, IM, RD)
+#define ORWim(IM, MD, MB, MI, MS) _ALUWim(X86_OR, IM, MD, MB, MI, MS)
+
+#define ORLrr(RS, RD) _ALULrr(X86_OR, RS, RD)
+#define ORLmr(MD, MB, MI, MS, RD) _ALULmr(X86_OR, MD, MB, MI, MS, RD)
+#define ORLrm(RS, MD, MB, MI, MS) _ALULrm(X86_OR, RS, MD, MB, MI, MS)
+#define ORLir(IM, RD) _ALULir(X86_OR, IM, RD)
+#define ORLim(IM, MD, MB, MI, MS) _ALULim(X86_OR, IM, MD, MB, MI, MS)
+
+
+#define SBBBrr(RS, RD) _ALUBrr(X86_SBB, RS, RD)
+#define SBBBmr(MD, MB, MI, MS, RD) _ALUBmr(X86_SBB, MD, MB, MI, MS, RD)
+#define SBBBrm(RS, MD, MB, MI, MS) _ALUBrm(X86_SBB, RS, MD, MB, MI, MS)
+#define SBBBir(IM, RD) _ALUBir(X86_SBB, IM, RD)
+#define SBBBim(IM, MD, MB, MI, MS) _ALUBim(X86_SBB, IM, MD, MB, MI, MS)
+
+#define SBBWrr(RS, RD) _ALUWrr(X86_SBB, RS, RD)
+#define SBBWmr(MD, MB, MI, MS, RD) _ALUWmr(X86_SBB, MD, MB, MI, MS, RD)
+#define SBBWrm(RS, MD, MB, MI, MS) _ALUWrm(X86_SBB, RS, MD, MB, MI, MS)
+#define SBBWir(IM, RD) _ALUWir(X86_SBB, IM, RD)
+#define SBBWim(IM, MD, MB, MI, MS) _ALUWim(X86_SBB, IM, MD, MB, MI, MS)
+
+#define SBBLrr(RS, RD) _ALULrr(X86_SBB, RS, RD)
+#define SBBLmr(MD, MB, MI, MS, RD) _ALULmr(X86_SBB, MD, MB, MI, MS, RD)
+#define SBBLrm(RS, MD, MB, MI, MS) _ALULrm(X86_SBB, RS, MD, MB, MI, MS)
+#define SBBLir(IM, RD) _ALULir(X86_SBB, IM, RD)
+#define SBBLim(IM, MD, MB, MI, MS) _ALULim(X86_SBB, IM, MD, MB, MI, MS)
+
+
+#define SUBBrr(RS, RD) _ALUBrr(X86_SUB, RS, RD)
+#define SUBBmr(MD, MB, MI, MS, RD) _ALUBmr(X86_SUB, MD, MB, MI, MS, RD)
+#define SUBBrm(RS, MD, MB, MI, MS) _ALUBrm(X86_SUB, RS, MD, MB, MI, MS)
+#define SUBBir(IM, RD) _ALUBir(X86_SUB, IM, RD)
+#define SUBBim(IM, MD, MB, MI, MS) _ALUBim(X86_SUB, IM, MD, MB, MI, MS)
+
+#define SUBWrr(RS, RD) _ALUWrr(X86_SUB, RS, RD)
+#define SUBWmr(MD, MB, MI, MS, RD) _ALUWmr(X86_SUB, MD, MB, MI, MS, RD)
+#define SUBWrm(RS, MD, MB, MI, MS) _ALUWrm(X86_SUB, RS, MD, MB, MI, MS)
+#define SUBWir(IM, RD) _ALUWir(X86_SUB, IM, RD)
+#define SUBWim(IM, MD, MB, MI, MS) _ALUWim(X86_SUB, IM, MD, MB, MI, MS)
+
+#define SUBLrr(RS, RD) _ALULrr(X86_SUB, RS, RD)
+#define SUBLmr(MD, MB, MI, MS, RD) _ALULmr(X86_SUB, MD, MB, MI, MS, RD)
+#define SUBLrm(RS, MD, MB, MI, MS) _ALULrm(X86_SUB, RS, MD, MB, MI, MS)
+#define SUBLir(IM, RD) _ALULir(X86_SUB, IM, RD)
+#define SUBLim(IM, MD, MB, MI, MS) _ALULim(X86_SUB, IM, MD, MB, MI, MS)
+
+
+#define XORBrr(RS, RD) _ALUBrr(X86_XOR, RS, RD)
+#define XORBmr(MD, MB, MI, MS, RD) _ALUBmr(X86_XOR, MD, MB, MI, MS, RD)
+#define XORBrm(RS, MD, MB, MI, MS) _ALUBrm(X86_XOR, RS, MD, MB, MI, MS)
+#define XORBir(IM, RD) _ALUBir(X86_XOR, IM, RD)
+#define XORBim(IM, MD, MB, MI, MS) _ALUBim(X86_XOR, IM, MD, MB, MI, MS)
+
+#define XORWrr(RS, RD) _ALUWrr(X86_XOR, RS, RD)
+#define XORWmr(MD, MB, MI, MS, RD) _ALUWmr(X86_XOR, MD, MB, MI, MS, RD)
+#define XORWrm(RS, MD, MB, MI, MS) _ALUWrm(X86_XOR, RS, MD, MB, MI, MS)
+#define XORWir(IM, RD) _ALUWir(X86_XOR, IM, RD)
+#define XORWim(IM, MD, MB, MI, MS) _ALUWim(X86_XOR, IM, MD, MB, MI, MS)
+
+#define XORLrr(RS, RD) _ALULrr(X86_XOR, RS, RD)
+#define XORLmr(MD, MB, MI, MS, RD) _ALULmr(X86_XOR, MD, MB, MI, MS, RD)
+#define XORLrm(RS, MD, MB, MI, MS) _ALULrm(X86_XOR, RS, MD, MB, MI, MS)
+#define XORLir(IM, RD) _ALULir(X86_XOR, IM, RD)
+#define XORLim(IM, MD, MB, MI, MS) _ALULim(X86_XOR, IM, MD, MB, MI, MS)
+
+
+
+/* --- Shift/Rotate instructions ------------------------------------------- */
+
+enum {
+ X86_ROL = 0,
+ X86_ROR = 1,
+ X86_RCL = 2,
+ X86_RCR = 3,
+ X86_SHL = 4,
+ X86_SHR = 5,
+ X86_SAR = 7,
+};
+
+/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
+
+#define _ROTSHIBir(OP,IM,RD) ((IM) == 1 ? \
+ (_REXBrr(0, RD), _O_Mrm (0xd0 ,_b11,OP,_r1(RD) )) : \
+ (_REXBrr(0, RD), _O_Mrm_B (0xc0 ,_b11,OP,_r1(RD) ,_u8(IM))) )
+#define _ROTSHIBim(OP,IM,MD,MB,MI,MS) ((IM) == 1 ? \
+ (_REXBrm(0, MB, MI), _O_r_X (0xd0 ,OP ,MD,MB,MI,MS )) : \
+ (_REXBrm(0, MB, MI), _O_r_X_B (0xc0 ,OP ,MD,MB,MI,MS ,_u8(IM))) )
+#define _ROTSHIBrr(OP,RS,RD) (((RS) == _CL) ? \
+ (_REXBrr(RS, RD), _O_Mrm (0xd2 ,_b11,OP,_r1(RD) )) : \
+ JITFAIL("source register must be CL" ) )
+#define _ROTSHIBrm(OP,RS,MD,MB,MI,MS) (((RS) == _CL) ? \
+ (_REXBrm(RS, MB, MI), _O_r_X (0xd2 ,OP ,MD,MB,MI,MS )) : \
+ JITFAIL("source register must be CL" ) )
+
+#define _ROTSHIWir(OP,IM,RD) ((IM) == 1 ? \
+ (_d16(), _REXLrr(0, RD), _O_Mrm (0xd1 ,_b11,OP,_r2(RD) )) : \
+ (_d16(), _REXLrr(0, RD), _O_Mrm_B (0xc1 ,_b11,OP,_r2(RD) ,_u8(IM))) )
+#define _ROTSHIWim(OP,IM,MD,MB,MI,MS) ((IM) == 1 ? \
+ (_d16(), _REXLrm(0, MB, MI), _O_r_X (0xd1 ,OP ,MD,MB,MI,MS )) : \
+ (_d16(), _REXLrm(0, MB, MI), _O_r_X_B (0xc1 ,OP ,MD,MB,MI,MS ,_u8(IM))) )
+#define _ROTSHIWrr(OP,RS,RD) (((RS) == _CL) ? \
+ (_d16(), _REXLrr(RS, RD), _O_Mrm (0xd3 ,_b11,OP,_r2(RD) )) : \
+ JITFAIL("source register must be CL" ) )
+#define _ROTSHIWrm(OP,RS,MD,MB,MI,MS) (((RS) == _CL) ? \
+ (_d16(), _REXLrm(RS, MB, MI), _O_r_X (0xd3 ,OP ,MD,MB,MI,MS )) : \
+ JITFAIL("source register must be CL" ) )
+
+#define _ROTSHILir(OP,IM,RD) ((IM) == 1 ? \
+ (_REXLrr(0, RD), _O_Mrm (0xd1 ,_b11,OP,_r4(RD) )) : \
+ (_REXLrr(0, RD), _O_Mrm_B (0xc1 ,_b11,OP,_r4(RD) ,_u8(IM))) )
+#define _ROTSHILim(OP,IM,MD,MB,MI,MS) ((IM) == 1 ? \
+ (_REXLrm(0, MB, MI), _O_r_X (0xd1 ,OP ,MD,MB,MI,MS )) : \
+ (_REXLrm(0, MB, MI), _O_r_X_B (0xc1 ,OP ,MD,MB,MI,MS ,_u8(IM))) )
+#define _ROTSHILrr(OP,RS,RD) (((RS) == _CL) ? \
+ (_REXLrr(RS, RD), _O_Mrm (0xd3 ,_b11,OP,_r4(RD) )) : \
+ JITFAIL("source register must be CL" ) )
+#define _ROTSHILrm(OP,RS,MD,MB,MI,MS) (((RS) == _CL) ? \
+ (_REXLrm(RS, MB, MI), _O_r_X (0xd3 ,OP ,MD,MB,MI,MS )) : \
+ JITFAIL("source register must be CL" ) )
+
+#define _ROTSHIQir(OP,IM,RD) ((IM) == 1 ? \
+ (_REXQrr(0, RD), _O_Mrm (0xd1 ,_b11,OP,_r8(RD) )) : \
+ (_REXQrr(0, RD), _O_Mrm_B (0xc1 ,_b11,OP,_r8(RD) ,_u8(IM))) )
+#define _ROTSHIQim(OP,IM,MD,MB,MI,MS) ((IM) == 1 ? \
+ (_REXQrm(0, MB, MI), _O_r_X (0xd1 ,OP ,MD,MB,MI,MS )) : \
+ (_REXQrm(0, MB, MI), _O_r_X_B (0xc1 ,OP ,MD,MB,MI,MS ,_u8(IM))) )
+#define _ROTSHIQrr(OP,RS,RD) (((RS) == _CL) ? \
+ (_REXQrr(RS, RD), _O_Mrm (0xd3 ,_b11,OP,_r8(RD) )) : \
+ JITFAIL("source register must be CL" ) )
+#define _ROTSHIQrm(OP,RS,MD,MB,MI,MS) (((RS) == _CL) ? \
+ (_REXQrm(RS, MB, MI), _O_r_X (0xd3 ,OP ,MD,MB,MI,MS )) : \
+ JITFAIL("source register must be CL" ) )
+
+#define ROLBir(IM, RD) _ROTSHIBir(X86_ROL, IM, RD)
+#define ROLBim(IM, MD, MB, MI, MS) _ROTSHIBim(X86_ROL, IM, MD, MB, MI, MS)
+#define ROLBrr(RS, RD) _ROTSHIBrr(X86_ROL, RS, RD)
+#define ROLBrm(RS, MD, MB, MI, MS) _ROTSHIBrm(X86_ROL, RS, MD, MB, MI, MS)
+
+#define ROLWir(IM, RD) _ROTSHIWir(X86_ROL, IM, RD)
+#define ROLWim(IM, MD, MB, MI, MS) _ROTSHIWim(X86_ROL, IM, MD, MB, MI, MS)
+#define ROLWrr(RS, RD) _ROTSHIWrr(X86_ROL, RS, RD)
+#define ROLWrm(RS, MD, MB, MI, MS) _ROTSHIWrm(X86_ROL, RS, MD, MB, MI, MS)
+
+#define ROLLir(IM, RD) _ROTSHILir(X86_ROL, IM, RD)
+#define ROLLim(IM, MD, MB, MI, MS) _ROTSHILim(X86_ROL, IM, MD, MB, MI, MS)
+#define ROLLrr(RS, RD) _ROTSHILrr(X86_ROL, RS, RD)
+#define ROLLrm(RS, MD, MB, MI, MS) _ROTSHILrm(X86_ROL, RS, MD, MB, MI, MS)
+
+
+#define RORBir(IM, RD) _ROTSHIBir(X86_ROR, IM, RD)
+#define RORBim(IM, MD, MB, MI, MS) _ROTSHIBim(X86_ROR, IM, MD, MB, MI, MS)
+#define RORBrr(RS, RD) _ROTSHIBrr(X86_ROR, RS, RD)
+#define RORBrm(RS, MD, MB, MI, MS) _ROTSHIBrm(X86_ROR, RS, MD, MB, MI, MS)
+
+#define RORWir(IM, RD) _ROTSHIWir(X86_ROR, IM, RD)
+#define RORWim(IM, MD, MB, MI, MS) _ROTSHIWim(X86_ROR, IM, MD, MB, MI, MS)
+#define RORWrr(RS, RD) _ROTSHIWrr(X86_ROR, RS, RD)
+#define RORWrm(RS, MD, MB, MI, MS) _ROTSHIWrm(X86_ROR, RS, MD, MB, MI, MS)
+
+#define RORLir(IM, RD) _ROTSHILir(X86_ROR, IM, RD)
+#define RORLim(IM, MD, MB, MI, MS) _ROTSHILim(X86_ROR, IM, MD, MB, MI, MS)
+#define RORLrr(RS, RD) _ROTSHILrr(X86_ROR, RS, RD)
+#define RORLrm(RS, MD, MB, MI, MS) _ROTSHILrm(X86_ROR, RS, MD, MB, MI, MS)
+
+
+#define RCLBir(IM, RD) _ROTSHIBir(X86_RCL, IM, RD)
+#define RCLBim(IM, MD, MB, MI, MS) _ROTSHIBim(X86_RCL, IM, MD, MB, MI, MS)
+#define RCLBrr(RS, RD) _ROTSHIBrr(X86_RCL, RS, RD)
+#define RCLBrm(RS, MD, MB, MI, MS) _ROTSHIBrm(X86_RCL, RS, MD, MB, MI, MS)
+
+#define RCLWir(IM, RD) _ROTSHIWir(X86_RCL, IM, RD)
+#define RCLWim(IM, MD, MB, MI, MS) _ROTSHIWim(X86_RCL, IM, MD, MB, MI, MS)
+#define RCLWrr(RS, RD) _ROTSHIWrr(X86_RCL, RS, RD)
+#define RCLWrm(RS, MD, MB, MI, MS) _ROTSHIWrm(X86_RCL, RS, MD, MB, MI, MS)
+
+#define RCLLir(IM, RD) _ROTSHILir(X86_RCL, IM, RD)
+#define RCLLim(IM, MD, MB, MI, MS) _ROTSHILim(X86_RCL, IM, MD, MB, MI, MS)
+#define RCLLrr(RS, RD) _ROTSHILrr(X86_RCL, RS, RD)
+#define RCLLrm(RS, MD, MB, MI, MS) _ROTSHILrm(X86_RCL, RS, MD, MB, MI, MS)
+
+
+#define RCRBir(IM, RD) _ROTSHIBir(X86_RCR, IM, RD)
+#define RCRBim(IM, MD, MB, MI, MS) _ROTSHIBim(X86_RCR, IM, MD, MB, MI, MS)
+#define RCRBrr(RS, RD) _ROTSHIBrr(X86_RCR, RS, RD)
+#define RCRBrm(RS, MD, MB, MI, MS) _ROTSHIBrm(X86_RCR, RS, MD, MB, MI, MS)
+
+#define RCRWir(IM, RD) _ROTSHIWir(X86_RCR, IM, RD)
+#define RCRWim(IM, MD, MB, MI, MS) _ROTSHIWim(X86_RCR, IM, MD, MB, MI, MS)
+#define RCRWrr(RS, RD) _ROTSHIWrr(X86_RCR, RS, RD)
+#define RCRWrm(RS, MD, MB, MI, MS) _ROTSHIWrm(X86_RCR, RS, MD, MB, MI, MS)
+
+#define RCRLir(IM, RD) _ROTSHILir(X86_RCR, IM, RD)
+#define RCRLim(IM, MD, MB, MI, MS) _ROTSHILim(X86_RCR, IM, MD, MB, MI, MS)
+#define RCRLrr(RS, RD) _ROTSHILrr(X86_RCR, RS, RD)
+#define RCRLrm(RS, MD, MB, MI, MS) _ROTSHILrm(X86_RCR, RS, MD, MB, MI, MS)
+
+
+#define SHLBir(IM, RD) _ROTSHIBir(X86_SHL, IM, RD)
+#define SHLBim(IM, MD, MB, MI, MS) _ROTSHIBim(X86_SHL, IM, MD, MB, MI, MS)
+#define SHLBrr(RS, RD) _ROTSHIBrr(X86_SHL, RS, RD)
+#define SHLBrm(RS, MD, MB, MI, MS) _ROTSHIBrm(X86_SHL, RS, MD, MB, MI, MS)
+
+#define SHLWir(IM, RD) _ROTSHIWir(X86_SHL, IM, RD)
+#define SHLWim(IM, MD, MB, MI, MS) _ROTSHIWim(X86_SHL, IM, MD, MB, MI, MS)
+#define SHLWrr(RS, RD) _ROTSHIWrr(X86_SHL, RS, RD)
+#define SHLWrm(RS, MD, MB, MI, MS) _ROTSHIWrm(X86_SHL, RS, MD, MB, MI, MS)
+
+#define SHLLir(IM, RD) _ROTSHILir(X86_SHL, IM, RD)
+#define SHLLim(IM, MD, MB, MI, MS) _ROTSHILim(X86_SHL, IM, MD, MB, MI, MS)
+#define SHLLrr(RS, RD) _ROTSHILrr(X86_SHL, RS, RD)
+#define SHLLrm(RS, MD, MB, MI, MS) _ROTSHILrm(X86_SHL, RS, MD, MB, MI, MS)
+
+
+#define SHRBir(IM, RD) _ROTSHIBir(X86_SHR, IM, RD)
+#define SHRBim(IM, MD, MB, MI, MS) _ROTSHIBim(X86_SHR, IM, MD, MB, MI, MS)
+#define SHRBrr(RS, RD) _ROTSHIBrr(X86_SHR, RS, RD)
+#define SHRBrm(RS, MD, MB, MI, MS) _ROTSHIBrm(X86_SHR, RS, MD, MB, MI, MS)
+
+#define SHRWir(IM, RD) _ROTSHIWir(X86_SHR, IM, RD)
+#define SHRWim(IM, MD, MB, MI, MS) _ROTSHIWim(X86_SHR, IM, MD, MB, MI, MS)
+#define SHRWrr(RS, RD) _ROTSHIWrr(X86_SHR, RS, RD)
+#define SHRWrm(RS, MD, MB, MI, MS) _ROTSHIWrm(X86_SHR, RS, MD, MB, MI, MS)
+
+#define SHRLir(IM, RD) _ROTSHILir(X86_SHR, IM, RD)
+#define SHRLim(IM, MD, MB, MI, MS) _ROTSHILim(X86_SHR, IM, MD, MB, MI, MS)
+#define SHRLrr(RS, RD) _ROTSHILrr(X86_SHR, RS, RD)
+#define SHRLrm(RS, MD, MB, MI, MS) _ROTSHILrm(X86_SHR, RS, MD, MB, MI, MS)
-#define BTSWir(IM,RD) _wOO_Mrm_B (0x0fba ,_b11,_b101 ,_r2(RD) ,_u8(IM))
-#define BTSWim(IM,MD,MB,MI,MS) _wOO_r_X_B (0x0fba ,_b101 ,MD,MB,MI,MS ,_u8(IM))
-#define BTSWrr(RS,RD) _wOO_Mrm (0x0fab ,_b11,_r2(RS),_r2(RD) )
-#define BTSWrm(RS,MD,MB,MI,MS) _wOO_r_X (0x0fab ,_r2(RS) ,MD,MB,MI,MS )
+#define SALBir SHLBir
+#define SALBim SHLBim
+#define SALBrr SHLBrr
+#define SALBrm SHLBrm
-#define BTSLir(IM,RD) _OO_Mrm_B (0x0fba ,_b11,_b101 ,_r4(RD) ,_u8(IM))
-#define BTSLim(IM,MD,MB,MI,MS) _OO_r_X_B (0x0fba ,_b101 ,MD,MB,MI,MS ,_u8(IM))
-#define BTSLrr(RS,RD) _OO_Mrm (0x0fab ,_b11,_r4(RS),_r4(RD) )
-#define BTSLrm(RS,MD,MB,MI,MS) _OO_r_X (0x0fab ,_r4(RS) ,MD,MB,MI,MS )
+#define SALWir SHLWir
+#define SALWim SHLWim
+#define SALWrr SHLWrr
+#define SALWrm SHLWrm
+#define SALLir SHLLir
+#define SALLim SHLLim
+#define SALLrr SHLLrr
+#define SALLrm SHLLrm
-#define CALLm(D,B,I,S) ((_r0P(B) && _r0P(I)) ? _O_D32 (0xe8 ,(int)(D) ) : \
- JITFAIL("illegal mode in direct jump"))
-#define CALLsr(R) _O_Mrm (0xff ,_b11,_b010,_r4(R) )
+#define SARBir(IM, RD) _ROTSHIBir(X86_SAR, IM, RD)
+#define SARBim(IM, MD, MB, MI, MS) _ROTSHIBim(X86_SAR, IM, MD, MB, MI, MS)
+#define SARBrr(RS, RD) _ROTSHIBrr(X86_SAR, RS, RD)
+#define SARBrm(RS, MD, MB, MI, MS) _ROTSHIBrm(X86_SAR, RS, MD, MB, MI, MS)
-#define CALLsm(D,B,I,S) _O_r_X (0xff ,_b010 ,(int)(D),B,I,S )
+#define SARWir(IM, RD) _ROTSHIWir(X86_SAR, IM, RD)
+#define SARWim(IM, MD, MB, MI, MS) _ROTSHIWim(X86_SAR, IM, MD, MB, MI, MS)
+#define SARWrr(RS, RD) _ROTSHIWrr(X86_SAR, RS, RD)
+#define SARWrm(RS, MD, MB, MI, MS) _ROTSHIWrm(X86_SAR, RS, MD, MB, MI, MS)
-#define CBW_() _O (0x98 )
-#define CLC_() _O (0xf8 )
-#define CLTD_() _O (0x99 )
-#define CMC_() _O (0xf5 )
+#define SARLir(IM, RD) _ROTSHILir(X86_SAR, IM, RD)
+#define SARLim(IM, MD, MB, MI, MS) _ROTSHILim(X86_SAR, IM, MD, MB, MI, MS)
+#define SARLrr(RS, RD) _ROTSHILrr(X86_SAR, RS, RD)
+#define SARLrm(RS, MD, MB, MI, MS) _ROTSHILrm(X86_SAR, RS, MD, MB, MI, MS)
-#define CMPBrr(RS, RD) _O_Mrm (0x38 ,_b11,_r1(RS),_r1(RD) )
-#define CMPBmr(MD, MB, MI, MS, RD) _O_r_X (0x3a ,_r1(RD) ,MD,MB,MI,MS )
-#define CMPBrm(RS, MD, MB, MI, MS) _O_r_X (0x38 ,_r1(RS) ,MD,MB,MI,MS )
-#define CMPBir(IM, RD) _O_Mrm_B (0x80 ,_b11,_b111 ,_r1(RD) ,_su8(IM))
-#define CMPBim(IM, MD, MB, MI, MS) _O_r_X_B (0x80 ,_b111 ,MD,MB,MI,MS ,_su8(IM))
-#define CMPWrr(RS, RD) _wO_Mrm (0x39 ,_b11,_r2(RS),_r2(RD) )
-#define CMPWmr(MD, MB, MI, MS, RD) _wO_r_X (0x3b ,_r2(RD) ,MD,MB,MI,MS )
-#define CMPWrm(RS, MD, MB, MI, MS) _wO_r_X (0x39 ,_r2(RS) ,MD,MB,MI,MS )
-#define CMPWir(IM, RD) _wOs_Mrm_sW (0x81 ,_b11,_b111 ,_r2(RD) ,_su16(IM))
-#define CMPWim(IM, MD, MB, MI, MS) _wOs_r_X_sW (0x81 ,_b111 ,MD,MB,MI,MS ,_su16(IM))
+/* --- Bit test instructions ----------------------------------------------- */
-#define CMPLrr(RS, RD) _O_Mrm (0x39 ,_b11,_r4(RS),_r4(RD) )
-#define CMPLmr(MD, MB, MI, MS, RD) _O_r_X (0x3b ,_r4(RD) ,MD,MB,MI,MS )
-#define CMPLrm(RS, MD, MB, MI, MS) _O_r_X (0x39 ,_r4(RS) ,MD,MB,MI,MS )
-#define CMPLir(IM, RD) _O_Mrm_L (0x81 ,_b11,_b111 ,_r4(RD) ,IM )
-#define CMPLim(IM, MD, MB, MI, MS) _O_r_X_L (0x81 ,_b111 ,MD,MB,MI,MS ,IM )
+enum {
+ X86_BT = 4,
+ X86_BTS = 5,
+ X86_BTR = 6,
+ X86_BTC = 7,
+};
+/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
-#define CWD_() _O (0x99 )
+#define _BTWir(OP, IM, RD) (_d16(), _REXLrr(0, RD), _OO_Mrm_B (0x0fba ,_b11,OP ,_r2(RD) ,_u8(IM)))
+#define _BTWim(OP, IM, MD, MB, MI, MS) (_d16(), _REXLrm(0, MB, MI), _OO_r_X_B (0x0fba ,OP ,MD,MB,MI,MS ,_u8(IM)))
+#define _BTWrr(OP, RS, RD) (_d16(), _REXLrr(RS, RD), _OO_Mrm (0x0f83|((OP)<<3),_b11,_r2(RS),_r2(RD) ))
+#define _BTWrm(OP, RS, MD, MB, MI, MS) (_d16(), _REXLrm(RS, MB, MI), _OO_r_X (0x0f83|((OP)<<3) ,_r2(RS) ,MD,MB,MI,MS ))
+#define _BTLir(OP, IM, RD) (_REXLrr(0, RD), _OO_Mrm_B (0x0fba ,_b11,OP ,_r4(RD) ,_u8(IM)))
+#define _BTLim(OP, IM, MD, MB, MI, MS) (_REXLrm(0, MB, MI), _OO_r_X_B (0x0fba ,OP ,MD,MB,MI,MS ,_u8(IM)))
+#define _BTLrr(OP, RS, RD) (_REXLrr(RS, RD), _OO_Mrm (0x0f83|((OP)<<3),_b11,_r4(RS),_r4(RD) ))
+#define _BTLrm(OP, RS, MD, MB, MI, MS) (_REXLrm(RS, MB, MI), _OO_r_X (0x0f83|((OP)<<3) ,_r4(RS) ,MD,MB,MI,MS ))
-#define CMPXCHGBrr(RS,RD) _OO_Mrm (0x0fb0 ,_b11,_r1(RS),_r1(RD) )
-#define CMPXCHGBrm(RS,MD,MB,MI,MS) _OO_r_X (0x0fb0 ,_r1(RS) ,MD,MB,MI,MS )
+#define _BTQir(OP, IM, RD) (_REXQrr(0, RD), _OO_Mrm_B (0x0fba ,_b11,OP ,_r8(RD) ,_u8(IM)))
+#define _BTQim(OP, IM, MD, MB, MI, MS) (_REXQrm(0, MB, MI), _OO_r_X_B (0x0fba ,OP ,MD,MB,MI,MS ,_u8(IM)))
+#define _BTQrr(OP, RS, RD) (_REXQrr(RS, RD), _OO_Mrm (0x0f83|((OP)<<3),_b11,_r8(RS),_r8(RD) ))
+#define _BTQrm(OP, RS, MD, MB, MI, MS) (_REXQrm(RS, MB, MI), _OO_r_X (0x0f83|((OP)<<3) ,_r8(RS) ,MD,MB,MI,MS ))
-#define CMPXCHGWrr(RS,RD) _wOO_Mrm (0x0fb1 ,_b11,_r2(RS),_r2(RD) )
-#define CMPXCHGWrm(RS,MD,MB,MI,MS) _wOO_r_X (0x0fb1 ,_r2(RS) ,MD,MB,MI,MS )
+#define BTWir(IM, RD) _BTWir(X86_BT, IM, RD)
+#define BTWim(IM, MD, MB, MI, MS) _BTWim(X86_BT, IM, MD, MI, MS)
+#define BTWrr(RS, RD) _BTWrr(X86_BT, RS, RD)
+#define BTWrm(RS, MD, MB, MI, MS) _BTWrm(X86_BT, RS, MD, MB, MI, MS)
-#define CMPXCHGLrr(RS,RD) _OO_Mrm (0x0fb1 ,_b11,_r4(RS),_r4(RD) )
-#define CMPXCHGLrm(RS,MD,MB,MI,MS) _OO_r_X (0x0fb1 ,_r4(RS) ,MD,MB,MI,MS )
+#define BTLir(IM, RD) _BTLir(X86_BT, IM, RD)
+#define BTLim(IM, MD, MB, MI, MS) _BTLim(X86_BT, IM, MD, MB, MI, MS)
+#define BTLrr(RS, RD) _BTLrr(X86_BT, RS, RD)
+#define BTLrm(RS, MD, MB, MI, MS) _BTLrm(X86_BT, RS, MD, MB, MI, MS)
+
+
+#define BTCWir(IM, RD) _BTWir(X86_BTC, IM, RD)
+#define BTCWim(IM, MD, MB, MI, MS) _BTWim(X86_BTC, IM, MD, MI, MS)
+#define BTCWrr(RS, RD) _BTWrr(X86_BTC, RS, RD)
+#define BTCWrm(RS, MD, MB, MI, MS) _BTWrm(X86_BTC, RS, MD, MB, MI, MS)
+
+#define BTCLir(IM, RD) _BTLir(X86_BTC, IM, RD)
+#define BTCLim(IM, MD, MB, MI, MS) _BTLim(X86_BTC, IM, MD, MB, MI, MS)
+#define BTCLrr(RS, RD) _BTLrr(X86_BTC, RS, RD)
+#define BTCLrm(RS, MD, MB, MI, MS) _BTLrm(X86_BTC, RS, MD, MB, MI, MS)
+
+
+#define BTRWir(IM, RD) _BTWir(X86_BTR, IM, RD)
+#define BTRWim(IM, MD, MB, MI, MS) _BTWim(X86_BTR, IM, MD, MI, MS)
+#define BTRWrr(RS, RD) _BTWrr(X86_BTR, RS, RD)
+#define BTRWrm(RS, MD, MB, MI, MS) _BTWrm(X86_BTR, RS, MD, MB, MI, MS)
+
+#define BTRLir(IM, RD) _BTLir(X86_BTR, IM, RD)
+#define BTRLim(IM, MD, MB, MI, MS) _BTLim(X86_BTR, IM, MD, MB, MI, MS)
+#define BTRLrr(RS, RD) _BTLrr(X86_BTR, RS, RD)
+#define BTRLrm(RS, MD, MB, MI, MS) _BTLrm(X86_BTR, RS, MD, MB, MI, MS)
+
+
+#define BTSWir(IM, RD) _BTWir(X86_BTS, IM, RD)
+#define BTSWim(IM, MD, MB, MI, MS) _BTWim(X86_BTS, IM, MD, MI, MS)
+#define BTSWrr(RS, RD) _BTWrr(X86_BTS, RS, RD)
+#define BTSWrm(RS, MD, MB, MI, MS) _BTWrm(X86_BTS, RS, MD, MB, MI, MS)
+
+#define BTSLir(IM, RD) _BTLir(X86_BTS, IM, RD)
+#define BTSLim(IM, MD, MB, MI, MS) _BTLim(X86_BTS, IM, MD, MB, MI, MS)
+#define BTSLrr(RS, RD) _BTLrr(X86_BTS, RS, RD)
+#define BTSLrm(RS, MD, MB, MI, MS) _BTLrm(X86_BTS, RS, MD, MB, MI, MS)
+
+
+
+/* --- Move instructions --------------------------------------------------- */
+
+/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
+#define MOVBrr(RS, RD) (_REXBrr(RS, RD), _O_Mrm (0x88 ,_b11,_r1(RS),_r1(RD) ))
+#define MOVBmr(MD, MB, MI, MS, RD) (_REXBmr(MB, MI, RD), _O_r_X (0x8a ,_r1(RD) ,MD,MB,MI,MS ))
+#define MOVBrm(RS, MD, MB, MI, MS) (_REXBrm(RS, MB, MI), _O_r_X (0x88 ,_r1(RS) ,MD,MB,MI,MS ))
+#define MOVBir(IM, R) (_REXBrr(0, R), _Or_B (0xb0,_r1(R) ,_su8(IM)))
+#define MOVBim(IM, MD, MB, MI, MS) (_REXBrm(0, MB, MI), _O_X_B (0xc6 ,MD,MB,MI,MS ,_su8(IM)))
-#define DECBr(RD) _O_Mrm (0xfe ,_b11,_b001 ,_r1(RD) )
-#define DECBm(MD,MB,MI,MS) _O_r_X (0xfe ,_b001 ,MD,MB,MI,MS )
+#define MOVWrr(RS, RD) (_d16(), _REXLrr(RS, RD), _O_Mrm (0x89 ,_b11,_r2(RS),_r2(RD) ))
+#define MOVWmr(MD, MB, MI, MS, RD) (_d16(), _REXLmr(MB, MI, RD), _O_r_X (0x8b ,_r2(RD) ,MD,MB,MI,MS ))
+#define MOVWrm(RS, MD, MB, MI, MS) (_d16(), _REXLrm(RS, MB, MI), _O_r_X (0x89 ,_r2(RS) ,MD,MB,MI,MS ))
+#define MOVWir(IM, R) (_d16(), _REXLrr(0, R), _Or_W (0xb8,_r2(R) ,_su16(IM)))
+#define MOVWim(IM, MD, MB, MI, MS) (_d16(), _REXLrm(0, MB, MI), _O_X_W (0xc7 ,MD,MB,MI,MS ,_su16(IM)))
-#define DECWr(RD) _wOr (0x48,_r2(RD) )
-#define DECWm(MD,MB,MI,MS) _wO_r_X (0xff ,_b001 ,MD,MB,MI,MS )
+#define MOVLrr(RS, RD) (_REXLrr(RS, RD), _O_Mrm (0x89 ,_b11,_r4(RS),_r4(RD) ))
+#define MOVLmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _O_r_X (0x8b ,_r4(RD) ,MD,MB,MI,MS ))
+#define MOVLrm(RS, MD, MB, MI, MS) (_REXLrm(RS, MB, MI), _O_r_X (0x89 ,_r4(RS) ,MD,MB,MI,MS ))
+#define MOVLir(IM, R) (_REXLrr(0, R), _Or_L (0xb8,_r4(R) ,IM ))
+#define MOVLim(IM, MD, MB, MI, MS) (_REXLrm(0, MB, MI), _O_X_L (0xc7 ,MD,MB,MI,MS ,IM ))
+
+
+
+/* --- Unary and Multiply/Divide instructions ------------------------------ */
+
+enum {
+ X86_NOT = 2,
+ X86_NEG = 3,
+ X86_MUL = 4,
+ X86_IMUL = 5,
+ X86_DIV = 6,
+ X86_IDIV = 7,
+};
+
+/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
+
+#define _UNARYBr(OP, RS) (_REXBrr(0, RS), _O_Mrm (0xf6 ,_b11,OP ,_r1(RS) ))
+#define _UNARYBm(OP, MD, MB, MI, MS) (_REXBrm(0, MB, MI), _O_r_X (0xf6 ,OP ,MD,MB,MI,MS ))
+#define _UNARYWr(OP, RS) (_d16(), _REXLrr(0, RS), _O_Mrm (0xf7 ,_b11,OP ,_r2(RS) ))
+#define _UNARYWm(OP, MD, MB, MI, MS) (_d16(), _REXLmr(MB, MI, 0), _O_r_X (0xf7 ,OP ,MD,MB,MI,MS ))
+#define _UNARYLr(OP, RS) (_REXLrr(0, RS), _O_Mrm (0xf7 ,_b11,OP ,_r4(RS) ))
+#define _UNARYLm(OP, MD, MB, MI, MS) (_REXLmr(MB, MI, 0), _O_r_X (0xf7 ,OP ,MD,MB,MI,MS ))
+#define _UNARYQr(OP, RS) (_REXQrr(0, RS), _O_Mrm (0xf7 ,_b11,OP ,_r8(RS) ))
+#define _UNARYQm(OP, MD, MB, MI, MS) (_REXQmr(MB, MI, 0), _O_r_X (0xf7 ,OP ,MD,MB,MI,MS ))
+
+#define NOTBr(RS) _UNARYBr(X86_NOT, RS)
+#define NOTBm(MD, MB, MI, MS) _UNARYBm(X86_NOT, MD, MB, MI, MS)
+#define NOTWr(RS) _UNARYWr(X86_NOT, RS)
+#define NOTWm(MD, MB, MI, MS) _UNARYWm(X86_NOT, MD, MB, MI, MS)
+#define NOTLr(RS) _UNARYLr(X86_NOT, RS)
+#define NOTLm(MD, MB, MI, MS) _UNARYLm(X86_NOT, MD, MB, MI, MS)
+
+#define NEGBr(RS) _UNARYBr(X86_NEG, RS)
+#define NEGBm(MD, MB, MI, MS) _UNARYBm(X86_NEG, MD, MB, MI, MS)
+#define NEGWr(RS) _UNARYWr(X86_NEG, RS)
+#define NEGWm(MD, MB, MI, MS) _UNARYWm(X86_NEG, MD, MB, MI, MS)
+#define NEGLr(RS) _UNARYLr(X86_NEG, RS)
+#define NEGLm(MD, MB, MI, MS) _UNARYLm(X86_NEG, MD, MB, MI, MS)
+
+#define MULBr(RS) _UNARYBr(X86_MUL, RS)
+#define MULBm(MD, MB, MI, MS) _UNARYBm(X86_MUL, MD, MB, MI, MS)
+#define MULWr(RS) _UNARYWr(X86_MUL, RS)
+#define MULWm(MD, MB, MI, MS) _UNARYWm(X86_MUL, MD, MB, MI, MS)
+#define MULLr(RS) _UNARYLr(X86_MUL, RS)
+#define MULLm(MD, MB, MI, MS) _UNARYLm(X86_MUL, MD, MB, MI, MS)
+
+#define IMULBr(RS) _UNARYBr(X86_IMUL, RS)
+#define IMULBm(MD, MB, MI, MS) _UNARYBm(X86_IMUL, MD, MB, MI, MS)
+#define IMULWr(RS) _UNARYWr(X86_IMUL, RS)
+#define IMULWm(MD, MB, MI, MS) _UNARYWm(X86_IMUL, MD, MB, MI, MS)
+#define IMULLr(RS) _UNARYLr(X86_IMUL, RS)
+#define IMULLm(MD, MB, MI, MS) _UNARYLm(X86_IMUL, MD, MB, MI, MS)
+
+#define DIVBr(RS) _UNARYBr(X86_DIV, RS)
+#define DIVBm(MD, MB, MI, MS) _UNARYBm(X86_DIV, MD, MB, MI, MS)
+#define DIVWr(RS) _UNARYWr(X86_DIV, RS)
+#define DIVWm(MD, MB, MI, MS) _UNARYWm(X86_DIV, MD, MB, MI, MS)
+#define DIVLr(RS) _UNARYLr(X86_DIV, RS)
+#define DIVLm(MD, MB, MI, MS) _UNARYLm(X86_DIV, MD, MB, MI, MS)
+
+#define IDIVBr(RS) _UNARYBr(X86_IDIV, RS)
+#define IDIVBm(MD, MB, MI, MS) _UNARYBm(X86_IDIV, MD, MB, MI, MS)
+#define IDIVWr(RS) _UNARYWr(X86_IDIV, RS)
+#define IDIVWm(MD, MB, MI, MS) _UNARYWm(X86_IDIV, MD, MB, MI, MS)
+#define IDIVLr(RS) _UNARYLr(X86_IDIV, RS)
+#define IDIVLm(MD, MB, MI, MS) _UNARYLm(X86_IDIV, MD, MB, MI, MS)
+
+/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
+
+#define IMULWrr(RS, RD) (_d16(), _REXLrr(RD, RS), _OO_Mrm (0x0faf ,_b11,_r2(RD),_r2(RS) ))
+#define IMULWmr(MD, MB, MI, MS, RD) (_d16(), _REXLmr(MB, MI, RD), _OO_r_X (0x0faf ,_r2(RD) ,MD,MB,MI,MS ))
+
+#define IMULWirr(IM,RS,RD) (_d16(), _REXLrr(RS, RD), _Os_Mrm_sW (0x69 ,_b11,_r2(RS),_r2(RD) ,_su16(IM) ))
+#define IMULWimr(IM,MD,MB,MI,MS,RD) (_d16(), _REXLmr(MB, MI, RD), _Os_r_X_sW (0x69 ,_r2(RD) ,MD,MB,MI,MS ,_su16(IM) ))
+
+#define IMULLir(IM, RD) (_REXLrr(0, RD), _Os_Mrm_sL (0x69 ,_b11,_r4(RD),_r4(RD) ,IM ))
+#define IMULLrr(RS, RD) (_REXLrr(RD, RS), _OO_Mrm (0x0faf ,_b11,_r4(RD),_r4(RS) ))
+#define IMULLmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _OO_r_X (0x0faf ,_r4(RD) ,MD,MB,MI,MS ))
+
+
+#define IMULLirr(IM,RS,RD) (_REXLrr(RS, RD), _Os_Mrm_sL (0x69 ,_b11,_r4(RS),_r4(RD) ,IM ))
+#define IMULLimr(IM,MD,MB,MI,MS,RD) (_REXLmr(MB, MI, RD), _Os_r_X_sL (0x69 ,_r4(RD) ,MD,MB,MI,MS ,IM ))
+
+
+
+/* --- Control Flow related instructions ----------------------------------- */
+
+enum {
+ X86_CC_O = 0x0,
+ X86_CC_NO = 0x1,
+ X86_CC_NAE = 0x2,
+ X86_CC_B = 0x2,
+ X86_CC_C = 0x2,
+ X86_CC_AE = 0x3,
+ X86_CC_NB = 0x3,
+ X86_CC_NC = 0x3,
+ X86_CC_E = 0x4,
+ X86_CC_Z = 0x4,
+ X86_CC_NE = 0x5,
+ X86_CC_NZ = 0x5,
+ X86_CC_BE = 0x6,
+ X86_CC_NA = 0x6,
+ X86_CC_A = 0x7,
+ X86_CC_NBE = 0x7,
+ X86_CC_S = 0x8,
+ X86_CC_NS = 0x9,
+ X86_CC_P = 0xa,
+ X86_CC_PE = 0xa,
+ X86_CC_NP = 0xb,
+ X86_CC_PO = 0xb,
+ X86_CC_L = 0xc,
+ X86_CC_NGE = 0xc,
+ X86_CC_GE = 0xd,
+ X86_CC_NL = 0xd,
+ X86_CC_LE = 0xe,
+ X86_CC_NG = 0xe,
+ X86_CC_G = 0xf,
+ X86_CC_NLE = 0xf,
+};
+
+/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
+
+/* FIXME: no prefix is availble to encode a 32-bit operand size in 64-bit
+ mode */
+#define CALLm(M) _O_D32 (0xe8 ,(int)(M) )
+#define CALLLsr(R) (_REXLrr(0, R), _O_Mrm (0xff ,_b11,_b010,_r4(R) ))
+#define CALLsm(D,B,I,S) (_REXLrm(0, B, I), _O_r_X (0xff ,_b010 ,(int)(D),B,I,S ))
+
+/* FIXME: no prefix is availble to encode a 32-bit operand size in 64-bit
+ mode */
+#define JMPSm(M) _O_D8 (0xeb ,(int)(M) )
+#define JMPm(M) _O_D32 (0xe9 ,(int)(M) )
+#define JMPLsr(R) (_REXLrr(0, R), _O_Mrm (0xff ,_b11,_b100,_r4(R) ))
+#define JMPsm(D,B,I,S) (_REXLrm(0, B, I), _O_r_X (0xff ,_b100 ,(int)(D),B,I,S ))
+
+/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
+#define JCCSii(CC, D) _O_B (0x70|(CC) ,(_sc)(int)(D) )
+#define JCCSim(CC, D) _O_D8 (0x70|(CC) ,(int)(D) )
+#define JOSm(D) JCCSim(0x0, D)
+#define JNOSm(D) JCCSim(0x1, D)
+#define JBSm(D) JCCSim(0x2, D)
+#define JCSm(D) JCCSim(0x2, D)
+#define JNAESm(D) JCCSim(0x2, D)
+#define JNBSm(D) JCCSim(0x3, D)
+#define JNCSm(D) JCCSim(0x3, D)
+#define JAESm(D) JCCSim(0x3, D)
+#define JESm(D) JCCSim(0x4, D)
+#define JZSm(D) JCCSim(0x4, D)
+#define JNESm(D) JCCSim(0x5, D)
+#define JNZSm(D) JCCSim(0x5, D)
+#define JBESm(D) JCCSim(0x6, D)
+#define JNASm(D) JCCSim(0x6, D)
+#define JNBESm(D) JCCSim(0x7, D)
+#define JASm(D) JCCSim(0x7, D)
+#define JSSm(D) JCCSim(0x8, D)
+#define JNSSm(D) JCCSim(0x9, D)
+#define JPSm(D) JCCSim(0xa, D)
+#define JPESm(D) JCCSim(0xa, D)
+#define JNPSm(D) JCCSim(0xb, D)
+#define JPOSm(D) JCCSim(0xb, D)
+#define JLSm(D) JCCSim(0xc, D)
+#define JNGESm(D) JCCSim(0xc, D)
+#define JNLSm(D) JCCSim(0xd, D)
+#define JGESm(D) JCCSim(0xd, D)
+#define JLESm(D) JCCSim(0xe, D)
+#define JNGSm(D) JCCSim(0xe, D)
+#define JNLESm(D) JCCSim(0xf, D)
+#define JGSm(D) JCCSim(0xf, D)
+
+/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
+#define JCCim(CC, D) _OO_D32 (0x0f80|(CC) ,(long)(D) )
+#define JOm(D) JCCim(0x0, D)
+#define JNOm(D) JCCim(0x1, D)
+#define JBm(D) JCCim(0x2, D)
+#define JCm(D) JCCim(0x2, D)
+#define JNAEm(D) JCCim(0x2, D)
+#define JNBm(D) JCCim(0x3, D)
+#define JNCm(D) JCCim(0x3, D)
+#define JAEm(D) JCCim(0x3, D)
+#define JEm(D) JCCim(0x4, D)
+#define JZm(D) JCCim(0x4, D)
+#define JNEm(D) JCCim(0x5, D)
+#define JNZm(D) JCCim(0x5, D)
+#define JBEm(D) JCCim(0x6, D)
+#define JNAm(D) JCCim(0x6, D)
+#define JNBEm(D) JCCim(0x7, D)
+#define JAm(D) JCCim(0x7, D)
+#define JSm(D) JCCim(0x8, D)
+#define JNSm(D) JCCim(0x9, D)
+#define JPm(D) JCCim(0xa, D)
+#define JPEm(D) JCCim(0xa, D)
+#define JNPm(D) JCCim(0xb, D)
+#define JPOm(D) JCCim(0xb, D)
+#define JLm(D) JCCim(0xc, D)
+#define JNGEm(D) JCCim(0xc, D)
+#define JNLm(D) JCCim(0xd, D)
+#define JGEm(D) JCCim(0xd, D)
+#define JLEm(D) JCCim(0xe, D)
+#define JNGm(D) JCCim(0xe, D)
+#define JNLEm(D) JCCim(0xf, D)
+#define JGm(D) JCCim(0xf, D)
+
+/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
+#define SETCCir(CC, RD) (_REXBrr(0, RD), _OO_Mrm (0x0f90|(CC) ,_b11,_b000,_r1(RD) ))
+#define SETOr(RD) SETCCir(0x0,RD)
+#define SETNOr(RD) SETCCir(0x1,RD)
+#define SETBr(RD) SETCCir(0x2,RD)
+#define SETNAEr(RD) SETCCir(0x2,RD)
+#define SETNBr(RD) SETCCir(0x3,RD)
+#define SETAEr(RD) SETCCir(0x3,RD)
+#define SETEr(RD) SETCCir(0x4,RD)
+#define SETZr(RD) SETCCir(0x4,RD)
+#define SETNEr(RD) SETCCir(0x5,RD)
+#define SETNZr(RD) SETCCir(0x5,RD)
+#define SETBEr(RD) SETCCir(0x6,RD)
+#define SETNAr(RD) SETCCir(0x6,RD)
+#define SETNBEr(RD) SETCCir(0x7,RD)
+#define SETAr(RD) SETCCir(0x7,RD)
+#define SETSr(RD) SETCCir(0x8,RD)
+#define SETNSr(RD) SETCCir(0x9,RD)
+#define SETPr(RD) SETCCir(0xa,RD)
+#define SETPEr(RD) SETCCir(0xa,RD)
+#define SETNPr(RD) SETCCir(0xb,RD)
+#define SETPOr(RD) SETCCir(0xb,RD)
+#define SETLr(RD) SETCCir(0xc,RD)
+#define SETNGEr(RD) SETCCir(0xc,RD)
+#define SETNLr(RD) SETCCir(0xd,RD)
+#define SETGEr(RD) SETCCir(0xd,RD)
+#define SETLEr(RD) SETCCir(0xe,RD)
+#define SETNGr(RD) SETCCir(0xe,RD)
+#define SETNLEr(RD) SETCCir(0xf,RD)
+#define SETGr(RD) SETCCir(0xf,RD)
-#define DECLr(RD) _Or (0x48,_r4(RD) )
-#define DECLm(MD,MB,MI,MS) _O_r_X (0xff ,_b001 ,MD,MB,MI,MS )
+/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
+#define SETCCim(CC,MD,MB,MI,MS) (_REXBrm(0, MB, MI), _OO_r_X (0x0f90|(CC) ,_b000 ,MD,MB,MI,MS ))
+#define SETOm(D, B, I, S) SETCCim(0x0, D, B, I, S)
+#define SETNOm(D, B, I, S) SETCCim(0x1, D, B, I, S)
+#define SETBm(D, B, I, S) SETCCim(0x2, D, B, I, S)
+#define SETNAEm(D, B, I, S) SETCCim(0x2, D, B, I, S)
+#define SETNBm(D, B, I, S) SETCCim(0x3, D, B, I, S)
+#define SETAEm(D, B, I, S) SETCCim(0x3, D, B, I, S)
+#define SETEm(D, B, I, S) SETCCim(0x4, D, B, I, S)
+#define SETZm(D, B, I, S) SETCCim(0x4, D, B, I, S)
+#define SETNEm(D, B, I, S) SETCCim(0x5, D, B, I, S)
+#define SETNZm(D, B, I, S) SETCCim(0x5, D, B, I, S)
+#define SETBEm(D, B, I, S) SETCCim(0x6, D, B, I, S)
+#define SETNAm(D, B, I, S) SETCCim(0x6, D, B, I, S)
+#define SETNBEm(D, B, I, S) SETCCim(0x7, D, B, I, S)
+#define SETAm(D, B, I, S) SETCCim(0x7, D, B, I, S)
+#define SETSm(D, B, I, S) SETCCim(0x8, D, B, I, S)
+#define SETNSm(D, B, I, S) SETCCim(0x9, D, B, I, S)
+#define SETPm(D, B, I, S) SETCCim(0xa, D, B, I, S)
+#define SETPEm(D, B, I, S) SETCCim(0xa, D, B, I, S)
+#define SETNPm(D, B, I, S) SETCCim(0xb, D, B, I, S)
+#define SETPOm(D, B, I, S) SETCCim(0xb, D, B, I, S)
+#define SETLm(D, B, I, S) SETCCim(0xc, D, B, I, S)
+#define SETNGEm(D, B, I, S) SETCCim(0xc, D, B, I, S)
+#define SETNLm(D, B, I, S) SETCCim(0xd, D, B, I, S)
+#define SETGEm(D, B, I, S) SETCCim(0xd, D, B, I, S)
+#define SETLEm(D, B, I, S) SETCCim(0xe, D, B, I, S)
+#define SETNGm(D, B, I, S) SETCCim(0xe, D, B, I, S)
+#define SETNLEm(D, B, I, S) SETCCim(0xf, D, B, I, S)
+#define SETGm(D, B, I, S) SETCCim(0xf, D, B, I, S)
+/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
+#define CMOVWrr(CC,RS,RD) (_d16(), _REXLrr(RD, RS), _OO_Mrm (0x0f40|(CC) ,_b11,_r2(RD),_r2(RS) ))
+#define CMOVWmr(CC,MD,MB,MI,MS,RD) (_d16(), _REXLmr(MB, MI, RD), _OO_r_X (0x0f40|(CC) ,_r2(RD) ,MD,MB,MI,MS ))
+#define CMOVLrr(CC,RS,RD) (_REXLrr(RD, RS), _OO_Mrm (0x0f40|(CC) ,_b11,_r4(RD),_r4(RS) ))
+#define CMOVLmr(CC,MD,MB,MI,MS,RD) (_REXLmr(MB, MI, RD), _OO_r_X (0x0f40|(CC) ,_r4(RD) ,MD,MB,MI,MS ))
-#define DIVBr(RS) _O_Mrm (0xf6 ,_b11,_b110 ,_r1(RS) )
-#define DIVBm(MD,MB,MI,MS) _O_r_X (0xf6 ,_b110 ,MD,MB,MI,MS )
-#define DIVWr(RS) _wO_Mrm (0xf7 ,_b11,_b110 ,_r2(RS) )
-#define DIVWm(MD,MB,MI,MS) _wO_r_X (0xf7 ,_b110 ,MD,MB,MI,MS )
+/* --- Push/Pop instructions ----------------------------------------------- */
-#define DIVLr(RS) _O_Mrm (0xf7 ,_b11,_b110 ,_r4(RS) )
-#define DIVLm(MD,MB,MI,MS) _O_r_X (0xf7 ,_b110 ,MD,MB,MI,MS )
+/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
+#define POPWr(RD) _m32only((_d16(), _Or (0x58,_r2(RD) )))
+#define POPWm(MD, MB, MI, MS) _m32only((_d16(), _O_r_X (0x8f ,_b000 ,MD,MB,MI,MS )))
-#define ENTERii(W, B) _O_W_B (0xc8 ,_su16(W),_su8(B))
-#define HLT_() _O (0xf4 )
+#define POPLr(RD) _m32only( _Or (0x58,_r4(RD) ))
+#define POPLm(MD, MB, MI, MS) _m32only( _O_r_X (0x8f ,_b000 ,MD,MB,MI,MS ))
-#define IDIVBr(RS) _O_Mrm (0xf6 ,_b11,_b111 ,_r1(RS) )
-#define IDIVBm(MD,MB,MI,MS) _O_r_X (0xf6 ,_b111 ,MD,MB,MI,MS )
+#define PUSHWr(RS) _m32only((_d16(), _Or (0x50,_r2(RS) )))
+#define PUSHWm(MD, MB, MI, MS) _m32only((_d16(), _O_r_X (0xff, ,_b110 ,MD,MB,MI,MS )))
+#define PUSHWi(IM) _m32only((_d16(), _Os_sW (0x68 ,IM )))
-#define IDIVWr(RS) _wO_Mrm (0xf7 ,_b11,_b111 ,_r2(RS) )
-#define IDIVWm(MD,MB,MI,MS) _wO_r_X (0xf7 ,_b111 ,MD,MB,MI,MS )
+#define PUSHLr(RS) _m32only( _Or (0x50,_r4(RS) ))
+#define PUSHLm(MD, MB, MI, MS) _m32only( _O_r_X (0xff ,_b110 ,MD,MB,MI,MS ))
+#define PUSHLi(IM) _m32only( _Os_sL (0x68 ,IM ))
-#define IDIVLr(RS) _O_Mrm (0xf7 ,_b11,_b111 ,_r4(RS) )
-#define IDIVLm(MD,MB,MI,MS) _O_r_X (0xf7 ,_b111 ,MD,MB,MI,MS )
-#define IMULBr(RS) _O_Mrm (0xf6 ,_b11,_b101 ,_r1(RS) )
-#define IMULBm(MD,MB,MI,MS) _O_r_X (0xf6 ,_b101 ,MD,MB,MI,MS )
+#define POPA_() (_d16(), _O (0x61 ))
+#define POPAD_() _O (0x61 )
-#define IMULWr(RS) _wO_Mrm (0xf7 ,_b11,_b101 ,_r2(RS) )
-#define IMULWm(MD,MB,MI,MS) _wO_r_X (0xf7 ,_b101 ,MD,MB,MI,MS )
+#define PUSHA_() (_d16(), _O (0x60 ))
+#define PUSHAD_() _O (0x60 )
-#define IMULLr(RS) _O_Mrm (0xf7 ,_b11,_b101 ,_r4(RS) )
-#define IMULLm(MD,MB,MI,MS) _O_r_X (0xf7 ,_b101 ,MD,MB,MI,MS )
+#define POPF_() _O (0x9d )
+#define PUSHF_() _O (0x9c )
-#define IMULWrr(RS,RD) _wOO_Mrm (0x0faf ,_b11,_r2(RS),_r2(RD) )
-#define IMULWmr(MD,MB,MI,MS,RD) _wOO_r_X (0x0faf ,_r2(RD) ,MD,MB,MI,MS )
-#define IMULWirr(IM,RS,RD) _wOs_Mrm_sW (0x69 ,_b11,_r2(RS),_r2(RD) ,_su16(IM) )
-#define IMULWimr(IM,MD,MB,MI,MS,RD) _wOs_r_X_sW (0x69 ,_r2(RD) ,MD,MB,MI,MS ,_su16(IM) )
+/* --- Test instructions --------------------------------------------------- */
-#define IMULLir(IM,RD) _Os_Mrm_sL (0x69 ,_b11,_r4(RD),_r4(RD) ,IM )
-#define IMULLrr(RS,RD) _OO_Mrm (0x0faf ,_b11,_r4(RD),_r4(RS) )
-#define IMULLmr(MD,MB,MI,MS,RD) _OO_r_X (0x0faf ,_r4(RD) ,MD,MB,MI,MS )
-#define IMULLirr(IM,RS,RD) _Os_Mrm_sL (0x69 ,_b11,_r4(RS),_r4(RD) ,IM )
-#define IMULLimr(IM,MD,MB,MI,MS,RD) _Os_r_X_sL (0x69 ,_r4(RD) ,MD,MB,MI,MS ,IM )
+/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
+#define TESTBrr(RS, RD) (_REXBrr(RS, RD), _O_Mrm (0x84 ,_b11,_r1(RS),_r1(RD) ))
+#define TESTBrm(RS, MD, MB, MI, MS) (_REXBrm(RS, MB, MI), _O_r_X (0x84 ,_r1(RS) ,MD,MB,MI,MS ))
+#define TESTBir(IM, RD) ((RD) == _AL ? \
+ (_REXBrr(0, RD), _O_B (0xa8 ,_u8(IM))) : \
+ (_REXBrr(0, RD), _O_Mrm_B (0xf6 ,_b11,_b000 ,_r1(RD) ,_u8(IM))) )
+#define TESTBim(IM, MD, MB, MI, MS) (_REXBrm(0, MB, MI), _O_r_X_B (0xf6 ,_b000 ,MD,MB,MI,MS ,_u8(IM)))
-#define INCBr(RD) _O_Mrm (0xfe ,_b11,_b000 ,_r1(RD) )
-#define INCBm(MD,MB,MI,MS) _O_r_X (0xfe ,_b000 ,MD,MB,MI,MS )
+#define TESTWrr(RS, RD) (_d16(), _REXLrr(RS, RD), _O_Mrm (0x85 ,_b11,_r2(RS),_r2(RD) ))
+#define TESTWrm(RS, MD, MB, MI, MS) (_d16(), _REXLrm(RS, MB, MI), _O_r_X (0x85 ,_r2(RS) ,MD,MB,MI,MS ))
+#define TESTWir(IM, RD) ((RD) == _AX ? \
+ (_d16(), _REXLrr(0, RD), _O_W (0xa9 ,_u16(IM))) : \
+ (_d16(), _REXLrr(0, RD), _O_Mrm_W (0xf7 ,_b11,_b000 ,_r2(RD) ,_u16(IM))) )
+#define TESTWim(IM, MD, MB, MI, MS) (_d16(), _REXLrm(0, MB, MI), _O_r_X_W (0xf7 ,_b000 ,MD,MB,MI,MS ,_u16(IM)))
-#define INCWr(RD) _wOr (0x40,_r2(RD) )
-#define INCWm(MD,MB,MI,MS) _wO_r_X (0xff ,_b000 ,MD,MB,MI,MS )
+#define TESTLrr(RS, RD) (_REXLrr(RS, RD), _O_Mrm (0x85 ,_b11,_r4(RS),_r4(RD) ))
+#define TESTLrm(RS, MD, MB, MI, MS) (_REXLrm(RS, MB, MI), _O_r_X (0x85 ,_r4(RS) ,MD,MB,MI,MS ))
+#define TESTLir(IM, RD) (!_s8P(IM) && (RD) == _EAX ? \
+ (_REXLrr(0, RD), _O_L (0xa9 ,IM )) : \
+ (_REXLrr(0, RD), _O_Mrm_L (0xf7 ,_b11,_b000 ,_r4(RD) ,IM )) )
+#define TESTLim(IM, MD, MB, MI, MS) (_REXLrm(0, MB, MI), _O_r_X_L (0xf7 ,_b000 ,MD,MB,MI,MS ,IM ))
-#define INCLr(RD) _Or (0x40,_r4(RD) )
-#define INCLm(MD,MB,MI,MS) _O_r_X (0xff ,_b000 ,MD,MB,MI,MS )
-#define INVD_() _OO (0x0f08 )
-#define INVLPGm(MD, MB, MI, MS) _OO_r_X (0x0f01 ,_b111 ,MD,MB,MI,MS )
+/* --- Exchange instructions ----------------------------------------------- */
+/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
-#define JCCSim(CC,D,B,I,S) ((_r0P(B) && _r0P(I)) ? _O_D8 (0x70|(CC) ,(int)(D) ) : \
- JITFAIL("illegal mode in conditional jump"))
+#define CMPXCHGBrr(RS, RD) (_REXBrr(RS, RD), _OO_Mrm (0x0fb0 ,_b11,_r1(RS),_r1(RD) ))
+#define CMPXCHGBrm(RS, MD, MB, MI, MS) (_REXBrm(RS, MB, MI), _OO_r_X (0x0fb0 ,_r1(RS) ,MD,MB,MI,MS ))
-#define JOSm(D,B,I,S) JCCSim(0x0,D,B,I,S)
-#define JNOSm(D,B,I,S) JCCSim(0x1,D,B,I,S)
-#define JBSm(D,B,I,S) JCCSim(0x2,D,B,I,S)
-#define JNAESm(D,B,I,S) JCCSim(0x2,D,B,I,S)
-#define JNBSm(D,B,I,S) JCCSim(0x3,D,B,I,S)
-#define JAESm(D,B,I,S) JCCSim(0x3,D,B,I,S)
-#define JESm(D,B,I,S) JCCSim(0x4,D,B,I,S)
-#define JZSm(D,B,I,S) JCCSim(0x4,D,B,I,S)
-#define JNESm(D,B,I,S) JCCSim(0x5,D,B,I,S)
-#define JNZSm(D,B,I,S) JCCSim(0x5,D,B,I,S)
-#define JBESm(D,B,I,S) JCCSim(0x6,D,B,I,S)
-#define JNASm(D,B,I,S) JCCSim(0x6,D,B,I,S)
-#define JNBESm(D,B,I,S) JCCSim(0x7,D,B,I,S)
-#define JASm(D,B,I,S) JCCSim(0x7,D,B,I,S)
-#define JSSm(D,B,I,S) JCCSim(0x8,D,B,I,S)
-#define JNSSm(D,B,I,S) JCCSim(0x9,D,B,I,S)
-#define JPSm(D,B,I,S) JCCSim(0xa,D,B,I,S)
-#define JPESm(D,B,I,S) JCCSim(0xa,D,B,I,S)
-#define JNPSm(D,B,I,S) JCCSim(0xb,D,B,I,S)
-#define JPOSm(D,B,I,S) JCCSim(0xb,D,B,I,S)
-#define JLSm(D,B,I,S) JCCSim(0xc,D,B,I,S)
-#define JNGESm(D,B,I,S) JCCSim(0xc,D,B,I,S)
-#define JNLSm(D,B,I,S) JCCSim(0xd,D,B,I,S)
-#define JGESm(D,B,I,S) JCCSim(0xd,D,B,I,S)
-#define JLESm(D,B,I,S) JCCSim(0xe,D,B,I,S)
-#define JNGSm(D,B,I,S) JCCSim(0xe,D,B,I,S)
-#define JNLESm(D,B,I,S) JCCSim(0xf,D,B,I,S)
-#define JGSm(D,B,I,S) JCCSim(0xf,D,B,I,S)
+#define CMPXCHGWrr(RS, RD) (_d16(), _REXLrr(RS, RD), _OO_Mrm (0x0fb1 ,_b11,_r2(RS),_r2(RD) ))
+#define CMPXCHGWrm(RS, MD, MB, MI, MS) (_d16(), _REXLrm(RS, MB, MI), _OO_r_X (0x0fb1 ,_r2(RS) ,MD,MB,MI,MS ))
-#define JCCim(CC,D,B,I,S) ((_r0P(B) && _r0P(I)) ? _OO_D32 (0x0f80|(CC) ,(int)(D) ) : \
- JITFAIL("illegal mode in conditional jump"))
+#define CMPXCHGLrr(RS, RD) (_REXLrr(RS, RD), _OO_Mrm (0x0fb1 ,_b11,_r4(RS),_r4(RD) ))
+#define CMPXCHGLrm(RS, MD, MB, MI, MS) (_REXLrm(RS, MB, MI), _OO_r_X (0x0fb1 ,_r4(RS) ,MD,MB,MI,MS ))
-#define JOm(D,B,I,S) JCCim(0x0,D,B,I,S)
-#define JNOm(D,B,I,S) JCCim(0x1,D,B,I,S)
-#define JBm(D,B,I,S) JCCim(0x2,D,B,I,S)
-#define JNAEm(D,B,I,S) JCCim(0x2,D,B,I,S)
-#define JNBm(D,B,I,S) JCCim(0x3,D,B,I,S)
-#define JAEm(D,B,I,S) JCCim(0x3,D,B,I,S)
-#define JEm(D,B,I,S) JCCim(0x4,D,B,I,S)
-#define JZm(D,B,I,S) JCCim(0x4,D,B,I,S)
-#define JNEm(D,B,I,S) JCCim(0x5,D,B,I,S)
-#define JNZm(D,B,I,S) JCCim(0x5,D,B,I,S)
-#define JBEm(D,B,I,S) JCCim(0x6,D,B,I,S)
-#define JNAm(D,B,I,S) JCCim(0x6,D,B,I,S)
-#define JNBEm(D,B,I,S) JCCim(0x7,D,B,I,S)
-#define JAm(D,B,I,S) JCCim(0x7,D,B,I,S)
-#define JSm(D,B,I,S) JCCim(0x8,D,B,I,S)
-#define JNSm(D,B,I,S) JCCim(0x9,D,B,I,S)
-#define JPm(D,B,I,S) JCCim(0xa,D,B,I,S)
-#define JPEm(D,B,I,S) JCCim(0xa,D,B,I,S)
-#define JNPm(D,B,I,S) JCCim(0xb,D,B,I,S)
-#define JPOm(D,B,I,S) JCCim(0xb,D,B,I,S)
-#define JLm(D,B,I,S) JCCim(0xc,D,B,I,S)
-#define JNGEm(D,B,I,S) JCCim(0xc,D,B,I,S)
-#define JNLm(D,B,I,S) JCCim(0xd,D,B,I,S)
-#define JGEm(D,B,I,S) JCCim(0xd,D,B,I,S)
-#define JLEm(D,B,I,S) JCCim(0xe,D,B,I,S)
-#define JNGm(D,B,I,S) JCCim(0xe,D,B,I,S)
-#define JNLEm(D,B,I,S) JCCim(0xf,D,B,I,S)
-#define JGm(D,B,I,S) JCCim(0xf,D,B,I,S)
+#define XADDBrr(RS, RD) (_REXBrr(RS, RD), _OO_Mrm (0x0fc0 ,_b11,_r1(RS),_r1(RD) ))
+#define XADDBrm(RS, MD, MB, MI, MS) (_REXBrm(RS, MB, MI), _OO_r_X (0x0fc0 ,_r1(RS) ,MD,MB,MI,MS ))
-#define JMPSm(D,B,I,S) ((_r0P(B) && _r0P(I)) ? _O_D8 (0xeb ,(int)(D) ) : \
- JITFAIL("illegal mode in short jump"))
+#define XADDWrr(RS, RD) (_d16(), _REXLrr(RS, RD), _OO_Mrm (0x0fc1 ,_b11,_r2(RS),_r2(RD) ))
+#define XADDWrm(RS, MD, MB, MI, MS) (_d16(), _REXLrm(RS, MB, MI), _OO_r_X (0x0fc1 ,_r2(RS) ,MD,MB,MI,MS ))
-#define JMPm(D,B,I,S) ((_r0P(B) && _r0P(I)) ? _O_D32 (0xe9 ,(int)(D) ) : \
- JITFAIL("illegal mode in direct jump"))
+#define XADDLrr(RS, RD) (_REXLrr(RS, RD), _OO_Mrm (0x0fc1 ,_b11,_r4(RS),_r4(RD) ))
+#define XADDLrm(RS, MD, MB, MI, MS) (_REXLrm(RS, MB, MI), _OO_r_X (0x0fc1 ,_r4(RS) ,MD,MB,MI,MS ))
-#define JMPsr(R) _O_Mrm (0xff ,_b11,_b100,_r4(R) )
-#define JMPsm(D,B,I,S) _O_r_X (0xff ,_b100 ,(int)(D),B,I,S )
-
-
-#define LAHF_() _O (0x9f )
-#define LEALmr(MD, MB, MI, MS, RD) _O_r_X (0x8d ,_r4(RD) ,MD,MB,MI,MS )
-#define LEAVE_() _O (0xc9 )
-
-
-#define LMSWr(RS) _OO_Mrm (0x0f01 ,_b11,_b110,_r4(RS) )
-#define LMSWm(MD,MB,MI,MS) _OO_r_X (0x0f01 ,_b110 ,MD,MB,MI,MS )
+#define XCHGBrr(RS, RD) (_REXBrr(RS, RD), _O_Mrm (0x86 ,_b11,_r1(RS),_r1(RD) ))
+#define XCHGBrm(RS, MD, MB, MI, MS) (_REXBrm(RS, MB, MI), _O_r_X (0x86 ,_r1(RS) ,MD,MB,MI,MS ))
-#define LOOPm(MD,MB,MI,MS) ((_r0P(MB) && _r0P(MI)) ? _O_D8 (0xe2 ,MD ) : \
- JITFAIL("illegal mode in loop"))
-
-#define LOOPEm(MD,MB,MI,MS) ((_r0P(MB) && _r0P(MI)) ? _O_D8 (0xe1 ,MD ) : \
- JITFAIL("illegal mode in loope"))
-
-#define LOOPZm(MD,MB,MI,MS) ((_r0P(MB) && _r0P(MI)) ? _O_D8 (0xe1 ,MD ) : \
- JITFAIL("illegal mode in loopz"))
+#define XCHGWrr(RS, RD) (_d16(), _REXLrr(RS, RD), _O_Mrm (0x87 ,_b11,_r2(RS),_r2(RD) ))
+#define XCHGWrm(RS, MD, MB, MI, MS) (_d16(), _REXLrm(RS, MB, MI), _O_r_X (0x87 ,_r2(RS) ,MD,MB,MI,MS ))
-#define LOOPNEm(MD,MB,MI,MS) ((_r0P(MB) && _r0P(MI)) ? _O_D8 (0xe0 ,MD ) : \
- JITFAIL("illegal mode in loopne"))
+#define XCHGLrr(RS, RD) (_REXLrr(RS, RD), _O_Mrm (0x87 ,_b11,_r4(RS),_r4(RD) ))
+#define XCHGLrm(RS, MD, MB, MI, MS) (_REXLrm(RS, MB, MI), _O_r_X (0x87 ,_r4(RS) ,MD,MB,MI,MS ))
-#define LOOPNZm(MD,MB,MI,MS) ((_r0P(MB) && _r0P(MI)) ? _O_D8 (0xe0 ,MD ) : \
- JITFAIL("illegal mode in loopnz"))
-#define MOVBrr(RS, RD) _O_Mrm (0x80 ,_b11,_r1(RS),_r1(RD) )
-#define MOVBmr(MD, MB, MI, MS, RD) _O_r_X (0x8a ,_r1(RD) ,MD,MB,MI,MS )
-#define MOVBrm(RS, MD, MB, MI, MS) _O_r_X (0x88 ,_r1(RS) ,MD,MB,MI,MS )
-#define MOVBir(IM, R) _Or_B (0xb0,_r1(R) ,_su8(IM))
-#define MOVBim(IM, MD, MB, MI, MS) _O_X_B (0xc6 ,MD,MB,MI,MS ,_su8(IM))
-
-#define MOVWrr(RS, RD) _wO_Mrm (0x89 ,_b11,_r2(RS),_r2(RD) )
-#define MOVWmr(MD, MB, MI, MS, RD) _wO_r_X (0x8b ,_r2(RD) ,MD,MB,MI,MS )
-#define MOVWrm(RS, MD, MB, MI, MS) _wO_r_X (0x89 ,_r2(RS) ,MD,MB,MI,MS )
-#define MOVWir(IM, R) _wOr_W (0xb8,_r2(R) ,_su16(IM))
-#define MOVWim(IM, MD, MB, MI, MS) _wO_X_W (0xc7 ,MD,MB,MI,MS ,_su16(IM))
+/* --- Increment/Decrement instructions ------------------------------------ */
-#define MOVLrr(RS, RD) _O_Mrm (0x89 ,_b11,_r4(RS),_r4(RD) )
-#define MOVLmr(MD, MB, MI, MS, RD) _O_r_X (0x8b ,_r4(RD) ,MD,MB,MI,MS )
-#define MOVLrm(RS, MD, MB, MI, MS) _O_r_X (0x89 ,_r4(RS) ,MD,MB,MI,MS )
-#define MOVLir(IM, R) _Or_L (0xb8,_r4(R) ,IM )
-#define MOVLim(IM, MD, MB, MI, MS) _O_X_L (0xc7 ,MD,MB,MI,MS ,IM )
+/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
-#define MOVZBLrr(RS, RD) _OO_Mrm (0x0fb6 ,_b11,_r1(RD),_r1(RS) )
-#define MOVZBLmr(MD, MB, MI, MS, RD) _OO_r_X (0x0fb6 ,_r1(RD) ,MD,MB,MI,MS )
-#define MOVZBWrr(RS, RD) _wOO_Mrm (0x0fb6 ,_b11,_r2(RD),_r2(RS) )
-#define MOVZBWmr(MD, MB, MI, MS, RD) _wOO_r_X (0x0fb6 ,_r2(RD) ,MD,MB,MI,MS )
-#define MOVZWLrr(RS, RD) _OO_Mrm (0x0fb7 ,_b11,_r1(RD),_r1(RS) )
-#define MOVZWLmr(MD, MB, MI, MS, RD) _OO_r_X (0x0fb7 ,_r1(RD) ,MD,MB,MI,MS )
+#define DECBm(MD, MB, MI, MS) (_REXBrm(0, MB, MI), _O_r_X (0xfe ,_b001 ,MD,MB,MI,MS ))
+#define DECBr(RD) (_REXBrr(0, RD), _O_Mrm (0xfe ,_b11,_b001 ,_r1(RD) ))
-#define MOVSBLrr(RS, RD) _OO_Mrm (0x0fbe ,_b11,_r1(RD),_r1(RS) )
-#define MOVSBLmr(MD, MB, MI, MS, RD) _OO_r_X (0x0fbe ,_r1(RD) ,MD,MB,MI,MS )
-#define MOVSBWrr(RS, RD) _wOO_Mrm (0x0fbe ,_b11,_r2(RD),_r2(RS) )
-#define MOVSBWmr(MD, MB, MI, MS, RD) _wOO_r_X (0x0fbe ,_r2(RD) ,MD,MB,MI,MS )
-#define MOVSWLrr(RS, RD) _OO_Mrm (0x0fbf ,_b11,_r1(RD),_r1(RS) )
-#define MOVSWLmr(MD, MB, MI, MS, RD) _OO_r_X (0x0fbf ,_r1(RD) ,MD,MB,MI,MS )
+#define DECWm(MD, MB, MI, MS) (_d16(), _REXLrm(0, MB, MI), _O_r_X (0xff ,_b001 ,MD,MB,MI,MS ))
+#define DECLm(MD, MB, MI, MS) (_REXLrm(0, MB, MI), _O_r_X (0xff ,_b001 ,MD,MB,MI,MS ))
-#define MULBr(RS) _O_Mrm (0xf6 ,_b11,_b100 ,_r1(RS) )
-#define MULBm(MD,MB,MI,MS) _O_r_X (0xf6 ,_b100 ,MD,MB,MI,MS )
-#define MULWr(RS) _wO_Mrm (0xf7 ,_b11,_b100 ,_r2(RS) )
-#define MULWm(MD,MB,MI,MS) _wO_r_X (0xf7 ,_b100 ,MD,MB,MI,MS )
+#define INCBm(MD, MB, MI, MS) (_REXBrm(0, MB, MI), _O_r_X (0xfe ,_b000 ,MD,MB,MI,MS ))
+#define INCBr(RD) (_REXBrr(0, RD), _O_Mrm (0xfe ,_b11,_b000 ,_r1(RD) ))
-#define MULLr(RS) _O_Mrm (0xf7 ,_b11,_b100 ,_r4(RS) )
-#define MULLm(MD,MB,MI,MS) _O_r_X (0xf7 ,_b100 ,MD,MB,MI,MS )
+#define INCWm(MD, MB, MI, MS) (_d16(), _REXLrm(0, MB, MI), _O_r_X (0xff ,_b000 ,MD,MB,MI,MS ))
+#define INCLm(MD, MB, MI, MS) (_REXLrm(0, MB, MI), _O_r_X (0xff ,_b000 ,MD,MB,MI,MS ))
-#define NEGBr(RD) _O_Mrm (0xf6 ,_b11,_b011 ,_r1(RD) )
-#define NEGBm(MD,MB,MI,MS) _O_r_X (0xf6 ,_b011 ,MD,MB,MI,MS )
-#define NEGWr(RD) _wO_Mrm (0xf7 ,_b11,_b011 ,_r2(RD) )
-#define NEGWm(MD,MB,MI,MS) _wO_r_X (0xf7 ,_b011 ,MD,MB,MI,MS )
-#define NEGLr(RD) _O_Mrm (0xf7 ,_b11,_b011 ,_r4(RD) )
-#define NEGLm(MD,MB,MI,MS) _O_r_X (0xf7 ,_b011 ,MD,MB,MI,MS )
+/* --- Misc instructions --------------------------------------------------- */
+/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
-#define NOP_() _O (0x90 )
+#define BSFWrr(RS, RD) (_d16(), _REXLrr(RD, RS), _OO_Mrm (0x0fbc ,_b11,_r2(RD),_r2(RS) ))
+#define BSFWmr(MD, MB, MI, MS, RD) (_d16(), _REXLmr(MB, MI, RD), _OO_r_X (0x0fbc ,_r2(RD) ,MD,MB,MI,MS ))
+#define BSRWrr(RS, RD) (_d16(), _REXLrr(RD, RS), _OO_Mrm (0x0fbd ,_b11,_r2(RD),_r2(RS) ))
+#define BSRWmr(MD, MB, MI, MS, RD) (_d16(), _REXLmr(MB, MI, RD), _OO_r_X (0x0fbd ,_r2(RD) ,MD,MB,MI,MS ))
+#define BSFLrr(RS, RD) (_REXLrr(RD, RS), _OO_Mrm (0x0fbc ,_b11,_r4(RD),_r4(RS) ))
+#define BSFLmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _OO_r_X (0x0fbc ,_r4(RD) ,MD,MB,MI,MS ))
+#define BSRLrr(RS, RD) (_REXLrr(RD, RS), _OO_Mrm (0x0fbd ,_b11,_r4(RD),_r4(RS) ))
+#define BSRLmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _OO_r_X (0x0fbd ,_r4(RD) ,MD,MB,MI,MS ))
-#define NOTBr(RD) _O_Mrm (0xf6 ,_b11,_b010 ,_r1(RD) )
-#define NOTBm(MD,MB,MI,MS) _O_r_X (0xf6 ,_b010 ,MD,MB,MI,MS )
-#define NOTWr(RD) _wO_Mrm (0xf7 ,_b11,_b010 ,_r2(RD) )
-#define NOTWm(MD,MB,MI,MS) _wO_r_X (0xf7 ,_b010 ,MD,MB,MI,MS )
+/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
-#define NOTLr(RD) _O_Mrm (0xf7 ,_b11,_b010 ,_r4(RD) )
-#define NOTLm(MD,MB,MI,MS) _O_r_X (0xf7 ,_b010 ,MD,MB,MI,MS )
+#define MOVSBWrr(RS, RD) (_d16(), _REXBLrr(RD, RS), _OO_Mrm (0x0fbe ,_b11,_r2(RD),_r1(RS) ))
+#define MOVSBWmr(MD, MB, MI, MS, RD) (_d16(), _REXLmr(MB, MI, RD), _OO_r_X (0x0fbe ,_r2(RD) ,MD,MB,MI,MS ))
+#define MOVZBWrr(RS, RD) (_d16(), _REXBLrr(RD, RS), _OO_Mrm (0x0fb6 ,_b11,_r2(RD),_r1(RS) ))
+#define MOVZBWmr(MD, MB, MI, MS, RD) (_d16(), _REXLmr(MB, MI, RD), _OO_r_X (0x0fb6 ,_r2(RD) ,MD,MB,MI,MS ))
+#define MOVSBLrr(RS, RD) (_REXBLrr(RD, RS), _OO_Mrm (0x0fbe ,_b11,_r4(RD),_r1(RS) ))
+#define MOVSBLmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _OO_r_X (0x0fbe ,_r4(RD) ,MD,MB,MI,MS ))
+#define MOVZBLrr(RS, RD) (_REXBLrr(RD, RS), _OO_Mrm (0x0fb6 ,_b11,_r4(RD),_r1(RS) ))
+#define MOVZBLmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _OO_r_X (0x0fb6 ,_r4(RD) ,MD,MB,MI,MS ))
-#define ORBrr(RS, RD) _O_Mrm (0x08 ,_b11,_r1(RS),_r1(RD) )
-#define ORBmr(MD, MB, MI, MS, RD) _O_r_X (0x0a ,_r1(RD) ,MD,MB,MI,MS )
-#define ORBrm(RS, MD, MB, MI, MS) _O_r_X (0x08 ,_r1(RS) ,MD,MB,MI,MS )
-#define ORBir(IM, RD) _O_Mrm_B (0x80 ,_b11,_b001 ,_r1(RD) ,_su8(IM))
-#define ORBim(IM, MD, MB, MI, MS) _O_r_X_B (0x80 ,_b001 ,MD,MB,MI,MS ,_su8(IM))
-#define ORWrr(RS, RD) _wO_Mrm (0x09 ,_b11,_r2(RS),_r2(RD) )
-#define ORWmr(MD, MB, MI, MS, RD) _wO_r_X (0x0b ,_r2(RD) ,MD,MB,MI,MS )
-#define ORWrm(RS, MD, MB, MI, MS) _wO_r_X (0x09 ,_r2(RS) ,MD,MB,MI,MS )
-#define ORWir(IM, RD) _wOs_Mrm_sW (0x81 ,_b11,_b001 ,_r2(RD) ,_su16(IM))
-#define ORWim(IM, MD, MB, MI, MS) _wOs_r_X_sW (0x81 ,_b001 ,MD,MB,MI,MS ,_su16(IM))
+#define MOVSWLrr(RS, RD) (_REXLrr(RD, RS), _OO_Mrm (0x0fbf ,_b11,_r4(RD),_r2(RS) ))
+#define MOVSWLmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _OO_r_X (0x0fbf ,_r4(RD) ,MD,MB,MI,MS ))
+#define MOVZWLrr(RS, RD) (_REXLrr(RD, RS), _OO_Mrm (0x0fb7 ,_b11,_r4(RD),_r2(RS) ))
+#define MOVZWLmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _OO_r_X (0x0fb7 ,_r4(RD) ,MD,MB,MI,MS ))
-#define ORLrr(RS, RD) _O_Mrm (0x09 ,_b11,_r4(RS),_r4(RD) )
-#define ORLmr(MD, MB, MI, MS, RD) _O_r_X (0x0b ,_r4(RD) ,MD,MB,MI,MS )
-#define ORLrm(RS, MD, MB, MI, MS) _O_r_X (0x09 ,_r4(RS) ,MD,MB,MI,MS )
-#define ORLir(IM, RD) _Os_Mrm_sL (0x81 ,_b11,_b001 ,_r4(RD) ,IM )
-#define ORLim(IM, MD, MB, MI, MS) _Os_r_X_sL (0x81 ,_b001 ,MD,MB,MI,MS ,IM )
-#define POPWr(RD) _wOr (0x58,_r2(RD) )
-#define POPWm(MD,MB,MI,MS) _wO_r_X (0x8f ,_b000 ,MD,MB,MI,MS )
+/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
-#define POPLr(RD) _Or (0x58,_r4(RD) )
-#define POPLm(MD,MB,MI,MS) _O_r_X (0x8f ,_b000 ,MD,MB,MI,MS )
+#define LEALmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _O_r_X (0x8d ,_r4(RD) ,MD,MB,MI,MS ))
+#define BSWAPLr(R) (_REXLrr(0, R), _OOr (0x0fc8,_r4(R) ))
-#define POPA_() _wO (0x61 )
-#define POPAD_() _O (0x61 )
+#define CLC_() _O (0xf8 )
+#define STC_() _O (0xf9 )
-#define POPF_() _wO (0x9d )
-#define POPFD_() _O (0x9d )
+#define CMC_() _O (0xf5 )
+#define CLD_() _O (0xfc )
+#define STD_() _O (0xfd )
+#define CBTW_() (_d16(), _O (0x98 ))
+#define CWTL_() _O (0x98 )
+#define CLTQ_() _m64only(_REXQrr(0, 0), _O (0x98 ))
-#define PUSHWr(R) _wOr (0x50,_r2(R) )
-#define PUSHWm(MD,MB,MI,MS) _wO_r_X (0xff, ,_b110 ,MD,MB,MI,MS )
-#define PUSHWi(IM) _wOs_sW (0x68 ,IM )
+#define CBW_() CBTW_()
+#define CWDE_() CWTL_()
+#define CDQE_() CLTQ_()
-#define PUSHLr(R) _Or (0x50,_r4(R) )
-#define PUSHLm(MD,MB,MI,MS) _O_r_X (0xff ,_b110 ,MD,MB,MI,MS )
-#define PUSHLi(IM) _Os_sL (0x68 ,IM )
-
-
-#define PUSHA_() _wO (0x60 )
-#define PUSHAD_() _O (0x60 )
-
-#define PUSHF_() _O (0x9c )
-#define PUSHFD_() _wO (0x9c )
-
-#define RET_() _O (0xc3 )
-#define RETi(IM) _O_W (0xc2 ,_su16(IM))
+#define CWTD_() (_d16(), _O (0x99 ))
+#define CLTD_() _O (0x99 )
+#define CQTO_() _m64only(_REXQrr(0, 0), _O (0x99 ))
+#define CWD_() CWTD_()
+#define CDQ_() CLTD_()
+#define CQO_() CQTO_()
-#define ROLBir(IM,RD) (((IM)==1) ? _O_Mrm (0xd0 ,_b11,_b000,_r1(RD) ) : \
- _O_Mrm_B (0xc0 ,_b11,_b000,_r1(RD) ,_u8(IM) ) )
-#define ROLBim(IM,MD,MB,MS,MI) (((IM)==1) ? _O_r_X (0xd0 ,_b000 ,MD,MB,MI,MS ) : \
- _O_r_X_B (0xc0 ,_b000 ,MD,MB,MI,MS ,_u8(IM) ) )
-#define ROLBrr(RS,RD) (((RS)==_CL) ? _O_Mrm (0xd2 ,_b11,_b000,_r1(RD) ) : \
- JITFAIL ("source register must be CL" ) )
-#define ROLBrm(RS,MD,MB,MS,MI) (((RS)==_CL) ? _O_r_X (0xd2 ,_b000 ,MD,MB,MI,MS ) : \
- JITFAIL ("source register must be CL" ) )
-
-#define ROLWir(IM,RD) (((IM)==1) ? _wO_Mrm (0xd1 ,_b11,_b000,_r2(RD) ) : \
- _wO_Mrm_B (0xc1 ,_b11,_b000,_r2(RD) ,_u8(IM) ) )
-#define ROLWim(IM,MD,MB,MS,MI) (((IM)==1) ? _wO_r_X (0xd1 ,_b000 ,MD,MB,MI,MS ) : \
- _wO_r_X_B (0xc1 ,_b000 ,MD,MB,MI,MS ,_u8(IM) ) )
-#define ROLWrr(RS,RD) (((RS)==_CL) ? _wO_Mrm (0xd3 ,_b11,_b000,_r2(RD) ) : \
- JITFAIL ("source register must be CL" ) )
-#define ROLWrm(RS,MD,MB,MS,MI) (((RS)==_CL) ? _wO_r_X (0xd3 ,_b000 ,MD,MB,MI,MS ) : \
- JITFAIL ("source register must be CL" ) )
-
-#define ROLLir(IM,RD) (((IM)==1) ? _O_Mrm (0xd1 ,_b11,_b000,_r4(RD) ) : \
- _O_Mrm_B (0xc1 ,_b11,_b000,_r4(RD) ,_u8(IM) ) )
-#define ROLLim(IM,MD,MB,MS,MI) (((IM)==1) ? _O_r_X (0xd1 ,_b000 ,MD,MB,MI,MS ) : \
- _O_r_X_B (0xc1 ,_b000 ,MD,MB,MI,MS ,_u8(IM) ) )
-#define ROLLrr(RS,RD) (((RS)==_CL) ? _O_Mrm (0xd3 ,_b11,_b000,_r4(RD) ) : \
- JITFAIL ("source register must be CL" ) )
-#define ROLLrm(RS,MD,MB,MS,MI) (((RS)==_CL) ? _O_r_X (0xd3 ,_b000 ,MD,MB,MI,MS ) : \
- JITFAIL ("source register must be CL" ) )
+#define LAHF_() _m32only( _O (0x9f ))
+#define SAHF_() _m32only( _O (0x9e ))
+/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
-#define RORBir(IM,RD) (((IM)==1) ? _O_Mrm (0xd0 ,_b11,_b001,_r1(RD) ) : \
- _O_Mrm_B (0xc0 ,_b11,_b001,_r1(RD) ,_u8(IM) ) )
-#define RORBim(IM,MD,MB,MS,MI) (((IM)==1) ? _O_r_X (0xd0 ,_b001 ,MD,MB,MI,MS ) : \
- _O_r_X_B (0xc0 ,_b001 ,MD,MB,MI,MS ,_u8(IM) ) )
-#define RORBrr(RS,RD) (((RS)==_CL) ? _O_Mrm (0xd2 ,_b11,_b001,_r1(RD) ) : \
- JITFAIL ("source register must be CL" ) )
-#define RORBrm(RS,MD,MB,MS,MI) (((RS)==_CL) ? _O_r_X (0xd2 ,_b001 ,MD,MB,MI,MS ) : \
- JITFAIL ("source register must be CL" ) )
-
-#define RORWir(IM,RD) (((IM)==1) ? _wO_Mrm (0xd1 ,_b11,_b001,_r2(RD) ) : \
- _wO_Mrm_B (0xc1 ,_b11,_b001,_r2(RD) ,_u8(IM) ) )
-#define RORWim(IM,MD,MB,MS,MI) (((IM)==1) ? _wO_r_X (0xd1 ,_b001 ,MD,MB,MI,MS ) : \
- _wO_r_X_B (0xc1 ,_b001 ,MD,MB,MI,MS ,_u8(IM) ) )
-#define RORWrr(RS,RD) (((RS)==_CL) ? _wO_Mrm (0xd3 ,_b11,_b001,_r2(RD) ) : \
- JITFAIL ("source register must be CL" ) )
-#define RORWrm(RS,MD,MB,MS,MI) (((RS)==_CL) ? _wO_r_X (0xd3 ,_b001 ,MD,MB,MI,MS ) : \
- JITFAIL ("source register must be CL" ) )
-
-#define RORLir(IM,RD) (((IM)==1) ? _O_Mrm (0xd1 ,_b11,_b001,_r4(RD) ) : \
- _O_Mrm_B (0xc1 ,_b11,_b001,_r4(RD) ,_u8(IM) ) )
-#define RORLim(IM,MD,MB,MS,MI) (((IM)==1) ? _O_r_X (0xd1 ,_b001 ,MD,MB,MI,MS ) : \
- _O_r_X_B (0xc1 ,_b001 ,MD,MB,MI,MS ,_u8(IM) ) )
-#define RORLrr(RS,RD) (((RS)==_CL) ? _O_Mrm (0xd3 ,_b11,_b001,_r4(RD) ) : \
- JITFAIL ("source register must be CL" ) )
-#define RORLrm(RS,MD,MB,MS,MI) (((RS)==_CL) ? _O_r_X (0xd3 ,_b001 ,MD,MB,MI,MS ) : \
- JITFAIL ("source register must be CL" ) )
-
-
-#define SAHF_() _O (0x9e )
-
-
-#define SALBir SHLBir
-#define SALBim SHLBim
-#define SALBrr SHLBrr
-#define SALBrm SHLBrm
-#define SALWir SHLWir
-#define SALWim SHLWim
-#define SALWrr SHLWrr
-#define SALWrm SHLWrm
-#define SALLir SHLLir
-#define SALLim SHLLim
-#define SALLrr SHLLrr
-#define SALLrm SHLLrm
-
-
-#define SARBir(IM,RD) (((IM)==1) ? _O_Mrm (0xd0 ,_b11,_b111,_r1(RD) ) : \
- _O_Mrm_B (0xc0 ,_b11,_b111,_r1(RD) ,_u8(IM) ) )
-#define SARBim(IM,MD,MB,MS,MI) (((IM)==1) ? _O_r_X (0xd0 ,_b111 ,MD,MB,MI,MS ) : \
- _O_r_X_B (0xc0 ,_b111 ,MD,MB,MI,MS ,_u8(IM) ) )
-#define SARBrr(RS,RD) (((RS)==_CL) ? _O_Mrm (0xd2 ,_b11,_b111,_r1(RD) ) : \
- JITFAIL ("source register must be CL" ) )
-#define SARBrm(RS,MD,MB,MS,MI) (((RS)==_CL) ? _O_r_X (0xd2 ,_b111 ,MD,MB,MI,MS ) : \
- JITFAIL ("source register must be CL" ) )
-
-#define SARWir(IM,RD) (((IM)==1) ? _wO_Mrm (0xd1 ,_b11,_b111,_r2(RD) ) : \
- _wO_Mrm_B (0xc1 ,_b11,_b111,_r2(RD) ,_u8(IM) ) )
-#define SARWim(IM,MD,MB,MS,MI) (((IM)==1) ? _wO_r_X (0xd1 ,_b111 ,MD,MB,MI,MS ) : \
- _wO_r_X_B (0xc1 ,_b111 ,MD,MB,MI,MS ,_u8(IM) ) )
-#define SARWrr(RS,RD) (((RS)==_CL) ? _wO_Mrm (0xd3 ,_b11,_b111,_r2(RD) ) : \
- JITFAIL ("source register must be CL" ) )
-#define SARWrm(RS,MD,MB,MS,MI) (((RS)==_CL) ? _wO_r_X (0xd3 ,_b111 ,MD,MB,MI,MS ) : \
- JITFAIL ("source register must be CL" ) )
-
-#define SARLir(IM,RD) (((IM)==1) ? _O_Mrm (0xd1 ,_b11,_b111,_r4(RD) ) : \
- _O_Mrm_B (0xc1 ,_b11,_b111,_r4(RD) ,_u8(IM) ) )
-#define SARLim(IM,MD,MB,MS,MI) (((IM)==1) ? _O_r_X (0xd1 ,_b111 ,MD,MB,MI,MS ) : \
- _O_r_X_B (0xc1 ,_b111 ,MD,MB,MI,MS ,_u8(IM) ) )
-#define SARLrr(RS,RD) (((RS)==_CL) ? _O_Mrm (0xd3 ,_b11,_b111,_r4(RD) ) : \
- JITFAIL ("source register must be CL" ) )
-#define SARLrm(RS,MD,MB,MS,MI) (((RS)==_CL) ? _O_r_X (0xd3 ,_b111 ,MD,MB,MI,MS ) : \
- JITFAIL ("source register must be CL" ) )
-
-
-#define SBBBrr(RS, RD) _O_Mrm (0x18 ,_b11,_r1(RS),_r1(RD) )
-#define SBBBmr(MD, MB, MI, MS, RD) _O_r_X (0x1a ,_r1(RD) ,MD,MB,MI,MS )
-#define SBBBrm(RS, MD, MB, MI, MS) _O_r_X (0x18 ,_r1(RS) ,MD,MB,MI,MS )
-#define SBBBir(IM, RD) _O_Mrm_B (0x80 ,_b11,_b011 ,_r1(RD) ,_su8(IM))
-#define SBBBim(IM, MD, MB, MI, MS) _O_r_X_B (0x80 ,_b011 ,MD,MB,MI,MS ,_su8(IM))
-
-#define SBBWrr(RS, RD) _wO_Mrm (0x19 ,_b11,_r2(RS),_r2(RD) )
-#define SBBWmr(MD, MB, MI, MS, RD) _wO_r_X (0x1b ,_r2(RD) ,MD,MB,MI,MS )
-#define SBBWrm(RS, MD, MB, MI, MS) _wO_r_X (0x19 ,_r2(RS) ,MD,MB,MI,MS )
-#define SBBWir(IM, RD) _wOs_Mrm_sW (0x81 ,_b11,_b011 ,_r2(RD) ,_su16(IM))
-#define SBBWim(IM, MD, MB, MI, MS) _wOs_r_X_sW (0x81 ,_b011 ,MD,MB,MI,MS ,_su16(IM))
-
-#define SBBLrr(RS, RD) _O_Mrm (0x19 ,_b11,_r4(RS),_r4(RD) )
-#define SBBLmr(MD, MB, MI, MS, RD) _O_r_X (0x1b ,_r4(RD) ,MD,MB,MI,MS )
-#define SBBLrm(RS, MD, MB, MI, MS) _O_r_X (0x19 ,_r4(RS) ,MD,MB,MI,MS )
-#define SBBLir(IM, RD) _Os_Mrm_sL (0x81 ,_b11,_b011 ,_r4(RD) ,IM )
-#define SBBLim(IM, MD, MB, MI, MS) _Os_r_X_sL (0x81 ,_b011 ,MD,MB,MI,MS ,IM )
-
-
-#define SETCCir(CC,RD) _OO_Mrm (0x0f90|(CC) ,_b11,_b000,_r1(RD) )
+#define CPUID_() _OO (0x0fa2 )
+#define RDTSC_() _OO (0xff31 )
-#define SETOr(RD) SETCCir(0x0,RD)
-#define SETNOr(RD) SETCCir(0x1,RD)
-#define SETBr(RD) SETCCir(0x2,RD)
-#define SETNAEr(RD) SETCCir(0x2,RD)
-#define SETNBr(RD) SETCCir(0x3,RD)
-#define SETAEr(RD) SETCCir(0x3,RD)
-#define SETEr(RD) SETCCir(0x4,RD)
-#define SETZr(RD) SETCCir(0x4,RD)
-#define SETNEr(RD) SETCCir(0x5,RD)
-#define SETNZr(RD) SETCCir(0x5,RD)
-#define SETBEr(RD) SETCCir(0x6,RD)
-#define SETNAr(RD) SETCCir(0x6,RD)
-#define SETNBEr(RD) SETCCir(0x7,RD)
-#define SETAr(RD) SETCCir(0x7,RD)
-#define SETSr(RD) SETCCir(0x8,RD)
-#define SETNSr(RD) SETCCir(0x9,RD)
-#define SETPr(RD) SETCCir(0xa,RD)
-#define SETPEr(RD) SETCCir(0xa,RD)
-#define SETNPr(RD) SETCCir(0xb,RD)
-#define SETPOr(RD) SETCCir(0xb,RD)
-#define SETLr(RD) SETCCir(0xc,RD)
-#define SETNGEr(RD) SETCCir(0xc,RD)
-#define SETNLr(RD) SETCCir(0xd,RD)
-#define SETGEr(RD) SETCCir(0xd,RD)
-#define SETLEr(RD) SETCCir(0xe,RD)
-#define SETNGr(RD) SETCCir(0xe,RD)
-#define SETNLEr(RD) SETCCir(0xf,RD)
-#define SETGr(RD) SETCCir(0xf,RD)
+#define ENTERii(W, B) _O_W_B (0xc8 ,_su16(W),_su8(B))
-#define SETCCim(CC,MD,MB,MI,MS) _OO_r_X (0x0f90|(CC) ,_b000 ,MD,MB,MI,MS )
-
-#define SETOm(D,B,I,S) SETCCim(0x0,D,B,I,S)
-#define SETNOm(D,B,I,S) SETCCim(0x1,D,B,I,S)
-#define SETBm(D,B,I,S) SETCCim(0x2,D,B,I,S)
-#define SETNAEm(D,B,I,S) SETCCim(0x2,D,B,I,S)
-#define SETNBm(D,B,I,S) SETCCim(0x3,D,B,I,S)
-#define SETAEm(D,B,I,S) SETCCim(0x3,D,B,I,S)
-#define SETEm(D,B,I,S) SETCCim(0x4,D,B,I,S)
-#define SETZm(D,B,I,S) SETCCim(0x4,D,B,I,S)
-#define SETNEm(D,B,I,S) SETCCim(0x5,D,B,I,S)
-#define SETNZm(D,B,I,S) SETCCim(0x5,D,B,I,S)
-#define SETBEm(D,B,I,S) SETCCim(0x6,D,B,I,S)
-#define SETNAm(D,B,I,S) SETCCim(0x6,D,B,I,S)
-#define SETNBEm(D,B,I,S) SETCCim(0x7,D,B,I,S)
-#define SETAm(D,B,I,S) SETCCim(0x7,D,B,I,S)
-#define SETSm(D,B,I,S) SETCCim(0x8,D,B,I,S)
-#define SETNSm(D,B,I,S) SETCCim(0x9,D,B,I,S)
-#define SETPm(D,B,I,S) SETCCim(0xa,D,B,I,S)
-#define SETPEm(D,B,I,S) SETCCim(0xa,D,B,I,S)
-#define SETNPm(D,B,I,S) SETCCim(0xb,D,B,I,S)
-#define SETPOm(D,B,I,S) SETCCim(0xb,D,B,I,S)
-#define SETLm(D,B,I,S) SETCCim(0xc,D,B,I,S)
-#define SETNGEm(D,B,I,S) SETCCim(0xc,D,B,I,S)
-#define SETNLm(D,B,I,S) SETCCim(0xd,D,B,I,S)
-#define SETGEm(D,B,I,S) SETCCim(0xd,D,B,I,S)
-#define SETLEm(D,B,I,S) SETCCim(0xe,D,B,I,S)
-#define SETNGm(D,B,I,S) SETCCim(0xe,D,B,I,S)
-#define SETNLEm(D,B,I,S) SETCCim(0xf,D,B,I,S)
-#define SETGm(D,B,I,S) SETCCim(0xf,D,B,I,S)
-
-
-#define SHLBir(IM,RD) (((IM)==1) ? _O_Mrm (0xd0 ,_b11,_b100,_r1(RD) ) : \
- _O_Mrm_B (0xc0 ,_b11,_b100,_r1(RD) ,_u8(IM) ) )
-#define SHLBim(IM,MD,MB,MS,MI) (((IM)==1) ? _O_r_X (0xd0 ,_b100 ,MD,MB,MI,MS ) : \
- _O_r_X_B (0xc0 ,_b100 ,MD,MB,MI,MS ,_u8(IM) ) )
-#define SHLBrr(RS,RD) (((RS)==_CL) ? _O_Mrm (0xd2 ,_b11,_b100,_r1(RD) ) : \
- JITFAIL ("source register must be CL" ) )
-#define SHLBrm(RS,MD,MB,MS,MI) (((RS)==_CL) ? _O_r_X (0xd2 ,_b100 ,MD,MB,MI,MS ) : \
- JITFAIL ("source register must be CL" ) )
-
-#define SHLWir(IM,RD) (((IM)==1) ? _wO_Mrm (0xd1 ,_b11,_b100,_r2(RD) ) : \
- _wO_Mrm_B (0xc1 ,_b11,_b100,_r2(RD) ,_u8(IM) ) )
-#define SHLWim(IM,MD,MB,MS,MI) (((IM)==1) ? _wO_r_X (0xd1 ,_b100 ,MD,MB,MI,MS ) : \
- _wO_r_X_B (0xc1 ,_b100 ,MD,MB,MI,MS ,_u8(IM) ) )
-#define SHLWrr(RS,RD) (((RS)==_CL) ? _wO_Mrm (0xd3 ,_b11,_b100,_r2(RD) ) : \
- JITFAIL ("source register must be CL" ) )
-#define SHLWrm(RS,MD,MB,MS,MI) (((RS)==_CL) ? _wO_r_X (0xd3 ,_b100 ,MD,MB,MI,MS ) : \
- JITFAIL ("source register must be CL" ) )
-
-#define SHLLir(IM,RD) (((IM)==1) ? _O_Mrm (0xd1 ,_b11,_b100,_r4(RD) ) : \
- _O_Mrm_B (0xc1 ,_b11,_b100,_r4(RD) ,_u8(IM) ) )
-#define SHLLim(IM,MD,MB,MS,MI) (((IM)==1) ? _O_r_X (0xd1 ,_b100 ,MD,MB,MI,MS ) : \
- _O_r_X_B (0xc1 ,_b100 ,MD,MB,MI,MS ,_u8(IM) ) )
-#define SHLLrr(RS,RD) (((RS)==_CL) ? _O_Mrm (0xd3 ,_b11,_b100,_r4(RD) ) : \
- JITFAIL ("source register must be CL" ) )
-#define SHLLrm(RS,MD,MB,MS,MI) (((RS)==_CL) ? _O_r_X (0xd3 ,_b100 ,MD,MB,MI,MS ) : \
- JITFAIL ("source register must be CL" ) )
-
-
-#define SHRBir(IM,RD) (((IM)==1) ? _O_Mrm (0xd0 ,_b11,_b101,_r1(RD) ) : \
- _O_Mrm_B (0xc0 ,_b11,_b101,_r1(RD) ,_u8(IM) ) )
-#define SHRBim(IM,MD,MB,MS,MI) (((IM)==1) ? _O_r_X (0xd0 ,_b101 ,MD,MB,MI,MS ) : \
- _O_r_X_B (0xc0 ,_b101 ,MD,MB,MI,MS ,_u8(IM) ) )
-#define SHRBrr(RS,RD) (((RS)==_CL) ? _O_Mrm (0xd2 ,_b11,_b101,_r1(RD) ) : \
- JITFAIL ("source register must be CL" ) )
-#define SHRBrm(RS,MD,MB,MS,MI) (((RS)==_CL) ? _O_r_X (0xd2 ,_b101 ,MD,MB,MI,MS ) : \
- JITFAIL ("source register must be CL" ) )
-
-#define SHRWir(IM,RD) (((IM)==1) ? _wO_Mrm (0xd1 ,_b11,_b101,_r2(RD) ) : \
- _wO_Mrm_B (0xc1 ,_b11,_b101,_r2(RD) ,_u8(IM) ) )
-#define SHRWim(IM,MD,MB,MS,MI) (((IM)==1) ? _wO_r_X (0xd1 ,_b101 ,MD,MB,MI,MS ) : \
- _wO_r_X_B (0xc1 ,_b101 ,MD,MB,MI,MS ,_u8(IM) ) )
-#define SHRWrr(RS,RD) (((RS)==_CL) ? _wO_Mrm (0xd3 ,_b11,_b101,_r2(RD) ) : \
- JITFAIL ("source register must be CL" ) )
-#define SHRWrm(RS,MD,MB,MS,MI) (((RS)==_CL) ? _wO_r_X (0xd3 ,_b101 ,MD,MB,MI,MS ) : \
- JITFAIL ("source register must be CL" ) )
-
-#define SHRLir(IM,RD) (((IM)==1) ? _O_Mrm (0xd1 ,_b11,_b101,_r4(RD) ) : \
- _O_Mrm_B (0xc1 ,_b11,_b101,_r4(RD) ,_u8(IM) ) )
-#define SHRLim(IM,MD,MB,MS,MI) (((IM)==1) ? _O_r_X (0xd1 ,_b101 ,MD,MB,MI,MS ) : \
- _O_r_X_B (0xc1 ,_b101 ,MD,MB,MI,MS ,_u8(IM) ) )
-#define SHRLrr(RS,RD) (((RS)==_CL) ? _O_Mrm (0xd3 ,_b11,_b101,_r4(RD) ) : \
- JITFAIL ("source register must be CL" ) )
-#define SHRLrm(RS,MD,MB,MS,MI) (((RS)==_CL) ? _O_r_X (0xd3 ,_b101 ,MD,MB,MI,MS ) : \
- JITFAIL ("source register must be CL" ) )
-
-
-#define STC_() _O (0xf9 )
-
-
-#define SUBBrr(RS, RD) _O_Mrm (0x28 ,_b11,_r1(RS),_r1(RD) )
-#define SUBBmr(MD, MB, MI, MS, RD) _O_r_X (0x2a ,_r1(RD) ,MD,MB,MI,MS )
-#define SUBBrm(RS, MD, MB, MI, MS) _O_r_X (0x28 ,_r1(RS) ,MD,MB,MI,MS )
-#define SUBBir(IM, RD) _O_Mrm_B (0x80 ,_b11,_b101 ,_r1(RD) ,_su8(IM))
-#define SUBBim(IM, MD, MB, MI, MS) _O_r_X_B (0x80 ,_b101 ,MD,MB,MI,MS ,_su8(IM))
-
-#define SUBWrr(RS, RD) _wO_Mrm (0x29 ,_b11,_r2(RS),_r2(RD) )
-#define SUBWmr(MD, MB, MI, MS, RD) _wO_r_X (0x2b ,_r2(RD) ,MD,MB,MI,MS )
-#define SUBWrm(RS, MD, MB, MI, MS) _wO_r_X (0x29 ,_r2(RS) ,MD,MB,MI,MS )
-#define SUBWir(IM, RD) _wOs_Mrm_sW (0x81 ,_b11,_b101 ,_r2(RD) ,_su16(IM))
-#define SUBWim(IM, MD, MB, MI, MS) _wOs_r_X_sW (0x81 ,_b101 ,MD,MB,MI,MS ,_su16(IM))
-
-#define SUBLrr(RS, RD) _O_Mrm (0x29 ,_b11,_r4(RS),_r4(RD) )
-#define SUBLmr(MD, MB, MI, MS, RD) _O_r_X (0x2b ,_r4(RD) ,MD,MB,MI,MS )
-#define SUBLrm(RS, MD, MB, MI, MS) _O_r_X (0x29 ,_r4(RS) ,MD,MB,MI,MS )
-#define SUBLir(IM, RD) _Os_Mrm_sL (0x81 ,_b11,_b101 ,_r4(RD) ,IM )
-#define SUBLim(IM, MD, MB, MI, MS) _Os_r_X_sL (0x81 ,_b101 ,MD,MB,MI,MS ,IM )
-
-
-#define TESTBrr(RS, RD) _O_Mrm (0x84 ,_b11,_r1(RS),_r1(RD) )
-#define TESTBrm(RS, MD, MB, MI, MS) _O_r_X (0x84 ,_r1(RS) ,MD,MB,MI,MS )
-#define TESTBir(IM, RD) _O_Mrm_B (0xf6 ,_b11,_b000 ,_r1(RD) ,_u8(IM))
-#define TESTBim(IM, MD, MB, MI, MS) _O_r_X_B (0xf6 ,_b000 ,MD,MB,MI,MS ,_u8(IM))
-
-#define TESTWrr(RS, RD) _wO_Mrm (0x85 ,_b11,_r2(RS),_r2(RD) )
-#define TESTWrm(RS, MD, MB, MI, MS) _wO_r_X (0x85 ,_r2(RS) ,MD,MB,MI,MS )
-#define TESTWir(IM, RD) _wO_Mrm_W (0xf7 ,_b11,_b000 ,_r2(RD) ,_u16(IM))
-#define TESTWim(IM, MD, MB, MI, MS) _wO_r_X_W (0xf7 ,_b000 ,MD,MB,MI,MS ,_u16(IM))
-
-#define TESTLrr(RS, RD) _O_Mrm (0x85 ,_b11,_r4(RS),_r4(RD) )
-#define TESTLrm(RS, MD, MB, MI, MS) _O_r_X (0x85 ,_r4(RS) ,MD,MB,MI,MS )
-#define TESTLir(IM, RD) _O_Mrm_L (0xf7 ,_b11,_b000 ,_r4(RD) ,IM )
-#define TESTLim(IM, MD, MB, MI, MS) _O_r_X_L (0xf7 ,_b000 ,MD,MB,MI,MS ,IM )
-
-
-#define XADDBrr(RS,RD) _OO_Mrm (0x0fc0 ,_b11,_r1(RS),_r1(RD) )
-#define XADDBrm(RS,MD,MB,MI,MS) _OO_r_X (0x0fc0 ,_r1(RS) ,MD,MB,MI,MS )
-
-#define XADDWrr(RS,RD) _wOO_Mrm (0x0fc1 ,_b11,_r2(RS),_r2(RD) )
-#define XADDWrm(RS,MD,MB,MI,MS) _wOO_r_X (0x0fc1 ,_r2(RS) ,MD,MB,MI,MS )
-
-#define XADDLrr(RS,RD) _OO_Mrm (0x0fc1 ,_b11,_r4(RS),_r4(RD) )
-#define XADDLrm(RS,MD,MB,MI,MS) _OO_r_X (0x0fc1 ,_r4(RS) ,MD,MB,MI,MS )
-
-
-#define XCHGBrr(RS,RD) _O_Mrm (0x86 ,_b11,_r1(RS),_r1(RD) )
-#define XCHGBrm(RS,MD,MB,MI,MS) _O_r_X (0x86 ,_r1(RS) ,MD,MB,MI,MS )
-
-#define XCHGWrr(RS,RD) _wO_Mrm (0x87 ,_b11,_r2(RS),_r2(RD) )
-#define XCHGWrm(RS,MD,MB,MI,MS) _wO_r_X (0x87 ,_r2(RS) ,MD,MB,MI,MS )
-
-#define XCHGLrr(RS,RD) _O_Mrm (0x87 ,_b11,_r4(RS),_r4(RD) )
-#define XCHGLrm(RS,MD,MB,MI,MS) _O_r_X (0x87 ,_r4(RS) ,MD,MB,MI,MS )
-
-
-#define XORBrr(RS, RD) _O_Mrm (0x30 ,_b11,_r1(RS),_r1(RD) )
-#define XORBmr(MD, MB, MI, MS, RD) _O_r_X (0x32 ,_r1(RD) ,MD,MB,MI,MS )
-#define XORBrm(RS, MD, MB, MI, MS) _O_r_X (0x30 ,_r1(RS) ,MD,MB,MI,MS )
-#define XORBir(IM, RD) _O_Mrm_B (0x80 ,_b11,_b110 ,_r1(RD) ,_su8(IM))
-#define XORBim(IM, MD, MB, MI, MS) _O_r_X_B (0x80 ,_b110 ,MD,MB,MI,MS ,_su8(IM))
-
-#define XORWrr(RS, RD) _wO_Mrm (0x31 ,_b11,_r2(RS),_r2(RD) )
-#define XORWmr(MD, MB, MI, MS, RD) _wO_r_X (0x33 ,_r2(RD) ,MD,MB,MI,MS )
-#define XORWrm(RS, MD, MB, MI, MS) _wO_r_X (0x31 ,_r2(RS) ,MD,MB,MI,MS )
-#define XORWir(IM, RD) _wOs_Mrm_sW (0x81 ,_b11,_b110 ,_r2(RD) ,_su16(IM))
-#define XORWim(IM, MD, MB, MI, MS) _wOs_r_X_sW (0x81 ,_b110 ,MD,MB,MI,MS ,_su16(IM))
+#define LEAVE_() _O (0xc9 )
+#define RET_() _O (0xc3 )
+#define RETi(IM) _O_W (0xc2 ,_su16(IM))
-#define XORLrr(RS, RD) _O_Mrm (0x31 ,_b11,_r4(RS),_r4(RD) )
-#define XORLmr(MD, MB, MI, MS, RD) _O_r_X (0x33 ,_r4(RD) ,MD,MB,MI,MS )
-#define XORLrm(RS, MD, MB, MI, MS) _O_r_X (0x31 ,_r4(RS) ,MD,MB,MI,MS )
-#define XORLir(IM, RD) _Os_Mrm_sL (0x81 ,_b11,_b110 ,_r4(RD) ,IM )
-#define XORLim(IM, MD, MB, MI, MS) _Os_r_X_sL (0x81 ,_b110 ,MD,MB,MI,MS ,IM )
+#define NOP_() _O (0x90 )
/* x87 instructions -- yay, we found a use for octal constants :-) */
-#define ESCmi(D,B,I,S,OP) _O_r_X(0xd8|(OP >> 3), (OP & 7), D,B,I,S)
+#define ESCmi(D,B,I,S,OP) (_REXLrm(0,B,I), _O_r_X(0xd8|(OP >> 3), (OP & 7), D,B,I,S))
#define ESCri(RD,OP) _O_Mrm(0xd8|(OP >> 3), _b11, (OP & 7), RD)
#define ESCrri(RS,RD,OP) ((RS) == _ST0 ? ESCri(RD,(OP|040)) \
@@ -1037,9 +1294,9 @@ typedef _uc jit_insn;
#define FNSTSWr(RD) ((RD == _AX || RD == _EAX) ? _OO (0xdfe0) \
: JITFAIL ("AX or EAX expected"))
/* N byte NOPs */
-#define NOPi(N) ((( (N) >= 8) ? (_jit_B(0x8d),_jit_B(0xb4),_jit_B(0x26),_jit_L(0x00),_jit_B(0x90)) : (void) 0), \
- (( ((N)&7) == 7) ? (_jit_B(0x8d),_jit_B(0xb4),_jit_B(0x26),_jit_L(0x00)) : \
- ( ((N)&7) == 6) ? (_jit_B(0x8d),_jit_B(0xb6),_jit_L(0x00)) : \
+#define NOPi(N) ((( (N) >= 8) ? (_jit_B(0x8d),_jit_B(0xb4),_jit_B(0x26),_jit_I(0x00),_jit_B(0x90)) : (void) 0), \
+ (( ((N)&7) == 7) ? (_jit_B(0x8d),_jit_B(0xb4),_jit_B(0x26),_jit_I(0x00)) : \
+ ( ((N)&7) == 6) ? (_jit_B(0x8d),_jit_B(0xb6),_jit_I(0x00)) : \
( ((N)&7) == 5) ? (_jit_B(0x90),_jit_B(0x8d),_jit_B(0x74),_jit_B(0x26),_jit_B(0x00)) : \
/* leal 0(,%esi), %esi */ ( ((N)&7) == 4) ? (_jit_B(0x8d),_jit_B(0x74),_jit_B(0x26),_jit_B(0x00)) : \
/* leal (,%esi), %esi */ ( ((N)&7) == 3) ? (_jit_B(0x8d),_jit_B(0x76),_jit_B(0x00)) : \
@@ -1049,6 +1306,286 @@ typedef _uc jit_insn;
JITFAIL(".align argument too large")))
+/* --- Media 128-bit instructions ------------------------------------------ */
+
+enum {
+ X86_SSE_MOV = 0x10,
+ X86_SSE_MOVLP = 0x12,
+ X86_SSE_MOVHP = 0x16,
+ X86_SSE_MOVA = 0x28,
+ X86_SSE_CVTIS = 0x2a,
+ X86_SSE_CVTTSI = 0x2c,
+ X86_SSE_CVTSI = 0x2d,
+ X86_SSE_UCOMI = 0x2e,
+ X86_SSE_COMI = 0x2f,
+ X86_SSE_SQRT = 0x51,
+ X86_SSE_RSQRT = 0x52,
+ X86_SSE_RCP = 0x53,
+ X86_SSE_AND = 0x54,
+ X86_SSE_ANDN = 0x55,
+ X86_SSE_OR = 0x56,
+ X86_SSE_XOR = 0x57,
+ X86_SSE_ADD = 0x58,
+ X86_SSE_MUL = 0x59,
+ X86_SSE_CVTSD = 0x5a,
+ X86_SSE_CVTDT = 0x5b,
+ X86_SSE_SUB = 0x5c,
+ X86_SSE_MIN = 0x5d,
+ X86_SSE_DIV = 0x5e,
+ X86_SSE_MAX = 0x5f,
+ X86_SSE_MOV2 = 0xd6
+};
+
+/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
+
+#define __SSELrr(OP,RS,RSA,RD,RDA) (_REXLrr(RD, RS), _OO_Mrm (0x0f00|(OP) ,_b11,RDA(RD),RSA(RS) ))
+#define __SSELmr(OP,MD,MB,MI,MS,RD,RDA) (_REXLmr(MB, MI, RD), _OO_r_X (0x0f00|(OP) ,RDA(RD) ,MD,MB,MI,MS ))
+#define __SSELrm(OP,RS,RSA,MD,MB,MI,MS) (_REXLrm(RS, MB, MI), _OO_r_X (0x0f00|(OP) ,RSA(RS) ,MD,MB,MI,MS ))
+#define __SSEL1rm(OP,RS,RSA,MD,MB,MI,MS) (_REXLrm(RS, MB, MI), _OO_r_X (0x0f01|(OP) ,RSA(RS) ,MD,MB,MI,MS ))
+
+#define _SSELrr(PX,OP,RS,RSA,RD,RDA) (_jit_B(PX), __SSELrr(OP, RS, RSA, RD, RDA))
+#define _SSELmr(PX,OP,MD,MB,MI,MS,RD,RDA) (_jit_B(PX), __SSELmr(OP, MD, MB, MI, MS, RD, RDA))
+#define _SSELrm(PX,OP,RS,RSA,MD,MB,MI,MS) (_jit_B(PX), __SSELrm(OP, RS, RSA, MD, MB, MI, MS))
+#define _SSEL1rm(PX,OP,RS,RSA,MD,MB,MI,MS) (_jit_B(PX), __SSEL1rm(OP, RS, RSA, MD, MB, MI, MS))
+
+#define _SSEPSrr(OP,RS,RD) __SSELrr ( OP, RS,_rX, RD,_rX)
+#define _SSEPSmr(OP,MD,MB,MI,MS,RD) __SSELmr ( OP, MD, MB, MI, MS, RD,_rX)
+#define _SSEPSrm(OP,RS,MD,MB,MI,MS) __SSELrm ( OP, RS,_rX, MD, MB, MI, MS)
+#define _SSEPS1rm(OP,RS,MD,MB,MI,MS) __SSEL1rm( OP, RS,_rX, MD, MB, MI, MS)
+
+#define _SSEPDrr(OP,RS,RD) _SSELrr (0x66, OP, RS,_rX, RD,_rX)
+#define _SSEPDmr(OP,MD,MB,MI,MS,RD) _SSELmr (0x66, OP, MD, MB, MI, MS, RD,_rX)
+#define _SSEPDrm(OP,RS,MD,MB,MI,MS) _SSELrm (0x66, OP, RS,_rX, MD, MB, MI, MS)
+#define _SSEPD1rm(OP,RS,MD,MB,MI,MS) _SSEL1rm(0x66, OP, RS,_rX, MD, MB, MI, MS)
+
+#define _SSESSrr(OP,RS,RD) _SSELrr (0xf3, OP, RS,_rX, RD,_rX)
+#define _SSESSmr(OP,MD,MB,MI,MS,RD) _SSELmr (0xf3, OP, MD, MB, MI, MS, RD,_rX)
+#define _SSESSrm(OP,RS,MD,MB,MI,MS) _SSELrm (0xf3, OP, RS,_rX, MD, MB, MI, MS)
+#define _SSESS1rm(OP,RS,MD,MB,MI,MS) _SSEL1rm(0xf3, OP, RS,_rX, MD, MB, MI, MS)
+
+#define _SSESDrr(OP,RS,RD) _SSELrr (0xf2, OP, RS,_rX, RD,_rX)
+#define _SSESDmr(OP,MD,MB,MI,MS,RD) _SSELmr (0xf2, OP, MD, MB, MI, MS, RD,_rX)
+#define _SSESDrm(OP,RS,MD,MB,MI,MS) _SSELrm (0xf2, OP, RS,_rX, MD, MB, MI, MS)
+#define _SSESD1rm(OP,RS,MD,MB,MI,MS) _SSEL1rm(0xf2, OP, RS,_rX, MD, MB, MI, MS)
+
+#define ADDPSrr(RS, RD) _SSEPSrr(X86_SSE_ADD, RS, RD)
+#define ADDPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_ADD, MD, MB, MI, MS, RD)
+#define ADDPDrr(RS, RD) _SSEPDrr(X86_SSE_ADD, RS, RD)
+#define ADDPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_ADD, MD, MB, MI, MS, RD)
+
+#define ADDSSrr(RS, RD) _SSESSrr(X86_SSE_ADD, RS, RD)
+#define ADDSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_ADD, MD, MB, MI, MS, RD)
+#define ADDSDrr(RS, RD) _SSESDrr(X86_SSE_ADD, RS, RD)
+#define ADDSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_ADD, MD, MB, MI, MS, RD)
+
+#define ANDNPSrr(RS, RD) _SSEPSrr(X86_SSE_ANDN, RS, RD)
+#define ANDNPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_ANDN, MD, MB, MI, MS, RD)
+#define ANDNPDrr(RS, RD) _SSEPDrr(X86_SSE_ANDN, RS, RD)
+#define ANDNPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_ANDN, MD, MB, MI, MS, RD)
+
+#define ANDNSSrr ANDNPSrr
+#define ANDNSSmr ANDNPSrr
+#define ANDNSDrr ANDNPDrr
+#define ANDNSDmr ANDNPDrr
+
+#define ANDPSrr(RS, RD) _SSEPSrr(X86_SSE_AND, RS, RD)
+#define ANDPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_AND, MD, MB, MI, MS, RD)
+#define ANDPDrr(RS, RD) _SSEPDrr(X86_SSE_AND, RS, RD)
+#define ANDPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_AND, MD, MB, MI, MS, RD)
+
+#define ANDSSrr ANDPSrr
+#define ANDSSmr ANDPSrr
+#define ANDSDrr ANDPDrr
+#define ANDSDmr ANDPDrr
+
+#define DIVPSrr(RS, RD) _SSEPSrr(X86_SSE_DIV, RS, RD)
+#define DIVPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_DIV, MD, MB, MI, MS, RD)
+#define DIVPDrr(RS, RD) _SSEPDrr(X86_SSE_DIV, RS, RD)
+#define DIVPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_DIV, MD, MB, MI, MS, RD)
+
+#define DIVSSrr(RS, RD) _SSESSrr(X86_SSE_DIV, RS, RD)
+#define DIVSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_DIV, MD, MB, MI, MS, RD)
+#define DIVSDrr(RS, RD) _SSESDrr(X86_SSE_DIV, RS, RD)
+#define DIVSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_DIV, MD, MB, MI, MS, RD)
+
+#define MAXPSrr(RS, RD) _SSEPSrr(X86_SSE_MAX, RS, RD)
+#define MAXPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_MAX, MD, MB, MI, MS, RD)
+#define MAXPDrr(RS, RD) _SSEPDrr(X86_SSE_MAX, RS, RD)
+#define MAXPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_MAX, MD, MB, MI, MS, RD)
+
+#define MAXSSrr(RS, RD) _SSESSrr(X86_SSE_MAX, RS, RD)
+#define MAXSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_MAX, MD, MB, MI, MS, RD)
+#define MAXSDrr(RS, RD) _SSESDrr(X86_SSE_MAX, RS, RD)
+#define MAXSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_MAX, MD, MB, MI, MS, RD)
+
+#define MINPSrr(RS, RD) _SSEPSrr(X86_SSE_MIN, RS, RD)
+#define MINPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_MIN, MD, MB, MI, MS, RD)
+#define MINPDrr(RS, RD) _SSEPDrr(X86_SSE_MIN, RS, RD)
+#define MINPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_MIN, MD, MB, MI, MS, RD)
+
+#define MINSSrr(RS, RD) _SSESSrr(X86_SSE_MIN, RS, RD)
+#define MINSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_MIN, MD, MB, MI, MS, RD)
+#define MINSDrr(RS, RD) _SSESDrr(X86_SSE_MIN, RS, RD)
+#define MINSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_MIN, MD, MB, MI, MS, RD)
+
+#define MULPSrr(RS, RD) _SSEPSrr(X86_SSE_MUL, RS, RD)
+#define MULPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_MUL, MD, MB, MI, MS, RD)
+#define MULPDrr(RS, RD) _SSEPDrr(X86_SSE_MUL, RS, RD)
+#define MULPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_MUL, MD, MB, MI, MS, RD)
+
+#define MULSSrr(RS, RD) _SSESSrr(X86_SSE_MUL, RS, RD)
+#define MULSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_MUL, MD, MB, MI, MS, RD)
+#define MULSDrr(RS, RD) _SSESDrr(X86_SSE_MUL, RS, RD)
+#define MULSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_MUL, MD, MB, MI, MS, RD)
+
+#define ORPSrr(RS, RD) _SSEPSrr(X86_SSE_OR, RS, RD)
+#define ORPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_OR, MD, MB, MI, MS, RD)
+#define ORPDrr(RS, RD) _SSEPDrr(X86_SSE_OR, RS, RD)
+#define ORPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_OR, MD, MB, MI, MS, RD)
+
+#define ORSSrr ORPSrr
+#define ORSSmr ORPSrr
+#define ORSDrr ORPDrr
+#define ORSDmr ORPDrr
+
+#define RCPPSrr(RS, RD) _SSEPSrr(X86_SSE_RCP, RS, RD)
+#define RCPPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_RCP, MD, MB, MI, MS, RD)
+#define RCPSSrr(RS, RD) _SSESSrr(X86_SSE_RCP, RS, RD)
+#define RCPSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_RCP, MD, MB, MI, MS, RD)
+
+#define RSQRTPSrr(RS, RD) _SSEPSrr(X86_SSE_RSQRT, RS, RD)
+#define RSQRTPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_RSQRT, MD, MB, MI, MS, RD)
+#define RSQRTSSrr(RS, RD) _SSESSrr(X86_SSE_RSQRT, RS, RD)
+#define RSQRTSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_RSQRT, MD, MB, MI, MS, RD)
+
+#define SQRTPSrr(RS, RD) _SSEPSrr(X86_SSE_SQRT, RS, RD)
+#define SQRTPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_SQRT, MD, MB, MI, MS, RD)
+#define SQRTPDrr(RS, RD) _SSEPDrr(X86_SSE_SQRT, RS, RD)
+#define SQRTPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_SQRT, MD, MB, MI, MS, RD)
+
+#define SQRTSSrr(RS, RD) _SSESSrr(X86_SSE_SQRT, RS, RD)
+#define SQRTSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_SQRT, MD, MB, MI, MS, RD)
+#define SQRTSDrr(RS, RD) _SSESDrr(X86_SSE_SQRT, RS, RD)
+#define SQRTSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_SQRT, MD, MB, MI, MS, RD)
+
+#define SUBPSrr(RS, RD) _SSEPSrr(X86_SSE_SUB, RS, RD)
+#define SUBPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_SUB, MD, MB, MI, MS, RD)
+#define SUBPDrr(RS, RD) _SSEPDrr(X86_SSE_SUB, RS, RD)
+#define SUBPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_SUB, MD, MB, MI, MS, RD)
+
+#define SUBSSrr(RS, RD) _SSESSrr(X86_SSE_SUB, RS, RD)
+#define SUBSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_SUB, MD, MB, MI, MS, RD)
+#define SUBSDrr(RS, RD) _SSESDrr(X86_SSE_SUB, RS, RD)
+#define SUBSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_SUB, MD, MB, MI, MS, RD)
+
+#define XORPSrr(RS, RD) _SSEPSrr(X86_SSE_XOR, RS, RD)
+#define XORPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_XOR, MD, MB, MI, MS, RD)
+#define XORPDrr(RS, RD) _SSEPDrr(X86_SSE_XOR, RS, RD)
+#define XORPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_XOR, MD, MB, MI, MS, RD)
+
+#define XORSSrr XORPSrr
+#define XORSSmr XORPSrr
+#define XORSDrr XORPDrr
+#define XORSDmr XORPDrr
+
+/* No prefixes here. */
+#define COMISSrr(RS, RD) _SSEPSrr(X86_SSE_COMI, RS, RD)
+#define COMISSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_COMI, MD, MB, MI, MS, RD)
+#define COMISDrr(RS, RD) _SSEPDrr(X86_SSE_COMI, RS, RD)
+#define COMISDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_COMI, MD, MB, MI, MS, RD)
+
+/* No prefixes here. */
+#define UCOMISSrr(RS, RD) _SSEPSrr(X86_SSE_UCOMI, RS, RD)
+#define UCOMISSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_UCOMI, MD, MB, MI, MS, RD)
+#define UCOMISDrr(RS, RD) _SSEPDrr(X86_SSE_UCOMI, RS, RD)
+#define UCOMISDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_UCOMI, MD, MB, MI, MS, RD)
+
+#define MOVSSrr(RS, RD) _SSESSrr (X86_SSE_MOV, RS, RD)
+#define MOVSSmr(MD, MB, MI, MS, RD) _SSESSmr (X86_SSE_MOV, MD, MB, MI, MS, RD)
+#define MOVSSrm(RS, MD, MB, MI, MS) _SSESS1rm(X86_SSE_MOV, RS, MD, MB, MI, MS)
+
+#define MOVSDrr(RS, RD) _SSESDrr (X86_SSE_MOV, RS, RD)
+#define MOVSDmr(MD, MB, MI, MS, RD) _SSESDmr (X86_SSE_MOV, MD, MB, MI, MS, RD)
+#define MOVSDrm(RS, MD, MB, MI, MS) _SSESD1rm(X86_SSE_MOV, RS, MD, MB, MI, MS)
+
+#define MOVAPSrr(RS, RD) _SSEPSrr (X86_SSE_MOVA, RS, RD)
+#define MOVAPSmr(MD, MB, MI, MS, RD) _SSEPSmr (X86_SSE_MOVA, MD, MB, MI, MS, RD)
+#define MOVAPSrm(RS, MD, MB, MI, MS) _SSEPS1rm(X86_SSE_MOVA, RS, MD, MB, MI, MS)
+
+#define MOVAPDrr(RS, RD) _SSEPDrr (X86_SSE_MOVA, RS, RD)
+#define MOVAPDmr(MD, MB, MI, MS, RD) _SSEPDmr (X86_SSE_MOVA, MD, MB, MI, MS, RD)
+#define MOVAPDrm(RS, MD, MB, MI, MS) _SSEPD1rm(X86_SSE_MOVA, RS, MD, MB, MI, MS)
+
+#define CVTPS2PIrr(RS, RD) __SSELrr( X86_SSE_CVTSI, RS,_rX, RD,_rM)
+#define CVTPS2PImr(MD, MB, MI, MS, RD) __SSELmr( X86_SSE_CVTSI, MD, MB, MI, MS, RD,_rM)
+#define CVTPD2PIrr(RS, RD) _SSELrr(0x66, X86_SSE_CVTSI, RS,_rX, RD,_rM)
+#define CVTPD2PImr(MD, MB, MI, MS, RD) _SSELmr(0x66, X86_SSE_CVTSI, MD, MB, MI, MS, RD,_rM)
+
+#define CVTPI2PSrr(RS, RD) __SSELrr( X86_SSE_CVTIS, RS,_rM, RD,_rX)
+#define CVTPI2PSmr(MD, MB, MI, MS, RD) __SSELmr( X86_SSE_CVTIS, MD, MB, MI, MS, RD,_rX)
+#define CVTPI2PDrr(RS, RD) _SSELrr(0x66, X86_SSE_CVTIS, RS,_rM, RD,_rX)
+#define CVTPI2PDmr(MD, MB, MI, MS, RD) _SSELmr(0x66, X86_SSE_CVTIS, MD, MB, MI, MS, RD,_rX)
+
+#define CVTPS2PDrr(RS, RD) __SSELrr( X86_SSE_CVTSD, RS,_rX, RD,_rX)
+#define CVTPS2PDmr(MD, MB, MI, MS, RD) __SSELmr( X86_SSE_CVTSD, MD, MB, MI, MS, RD,_rX)
+#define CVTPD2PSrr(RS, RD) _SSELrr(0x66, X86_SSE_CVTSD, RS,_rX, RD,_rX)
+#define CVTPD2PSmr(MD, MB, MI, MS, RD) _SSELmr(0x66, X86_SSE_CVTSD, MD, MB, MI, MS, RD,_rX)
+
+#define CVTSS2SDrr(RS, RD) _SSELrr(0xf3, X86_SSE_CVTSD, RS,_rX, RD,_rX)
+#define CVTSS2SDmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, X86_SSE_CVTSD, MD, MB, MI, MS, RD,_rX)
+#define CVTSD2SSrr(RS, RD) _SSELrr(0xf2, X86_SSE_CVTSD, RS,_rX, RD,_rX)
+#define CVTSD2SSmr(MD, MB, MI, MS, RD) _SSELmr(0xf2, X86_SSE_CVTSD, MD, MB, MI, MS, RD,_rX)
+
+#define CVTTSS2SILrr(RS, RD) _SSELrr(0xf3, X86_SSE_CVTTSI, RS,_rX, RD,_r4)
+#define CVTTSS2SILmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, X86_SSE_CVTTSI, MD, MB, MI, MS, RD,_r4)
+#define CVTTSD2SILrr(RS, RD) _SSELrr(0xf2, X86_SSE_CVTTSI, RS,_rX, RD,_r4)
+#define CVTTSD2SILmr(MD, MB, MI, MS, RD) _SSELmr(0xf2, X86_SSE_CVTTSI, MD, MB, MI, MS, RD,_r4)
+
+#define CVTSS2SILrr(RS, RD) _SSELrr(0xf3, X86_SSE_CVTSI, RS,_rX, RD,_r4)
+#define CVTSS2SILmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, X86_SSE_CVTSI, MD, MB, MI, MS, RD,_r4)
+#define CVTSD2SILrr(RS, RD) _SSELrr(0xf2, X86_SSE_CVTSI, RS,_rX, RD,_r4)
+#define CVTSD2SILmr(MD, MB, MI, MS, RD) _SSELmr(0xf2, X86_SSE_CVTSI, MD, MB, MI, MS, RD,_r4)
+
+#define CVTSI2SSLrr(RS, RD) _SSELrr(0xf3, X86_SSE_CVTIS, RS,_r4, RD,_rX)
+#define CVTSI2SSLmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, X86_SSE_CVTIS, MD, MB, MI, MS, RD,_rX)
+#define CVTSI2SDLrr(RS, RD) _SSELrr(0xf2, X86_SSE_CVTIS, RS,_r4, RD,_rX)
+#define CVTSI2SDLmr(MD, MB, MI, MS, RD) _SSELmr(0xf2, X86_SSE_CVTIS, MD, MB, MI, MS, RD,_rX)
+
+#define MOVDLXrr(RS, RD) _SSELrr(0x66, 0x6e, RS,_r4, RD,_rX)
+#define MOVDLXmr(MD, MB, MI, MS, RD) _SSELmr(0x66, 0x6e, MD, MB, MI, MS, RD,_rX)
+
+#define MOVDXLrr(RS, RD) _SSELrr(0x66, 0x7e, RS,_rX, RD,_r4)
+#define MOVDXLrm(RS, MD, MB, MI, MS) _SSELrm(0x66, 0x7e, RS,_rX, MD, MB, MI, MS)
+
+#define MOVDLMrr(RS, RD) __SSELrr( 0x6e, RS,_r4, RD,_rM)
+#define MOVDLMmr(MD, MB, MI, MS, RD) __SSELmr( 0x6e, MD, MB, MI, MS, RD,_rM)
+
+#define MOVDMLrr(RS, RD) __SSELrr( 0x7e, RS,_rM, RD,_r4)
+#define MOVDMLrm(RS, MD, MB, MI, MS) __SSELrm( 0x7e, RS,_rM, MD, MB, MI, MS)
+
+#define MOVDQ2Qrr(RS, RD) _SSELrr(0xf2, X86_SSE_MOV2, RS,_rX, RD,_rM)
+#define MOVQ2DQrr(RS, RD) _SSELrr(0xf3, X86_SSE_MOV2, RS,_rM, RD,_rX)
+#define MOVHLPSrr(RS, RD) __SSELrr( X86_SSE_MOVLP, RS,_rX, RD,_rX)
+#define MOVLHPSrr(RS, RD) __SSELrr( X86_SSE_MOVHP, RS,_rX, RD,_rX)
+
+#define MOVDQArr(RS, RD) _SSELrr(0x66, 0x6f, RS,_rX, RD,_rX)
+#define MOVDQAmr(MD, MB, MI, MS, RD) _SSELmr(0x66, 0x6f, MD, MB, MI, MS, RD,_rX)
+#define MOVDQArm(RS, MD, MB, MI, MS) _SSELrm(0x66, 0x7f, RS,_rX, MD, MB, MI, MS)
+
+#define MOVDQUrr(RS, RD) _SSELrr(0xf3, 0x6f, RS,_rX, RD,_rX)
+#define MOVDQUmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, 0x6f, MD, MB, MI, MS, RD,_rX)
+#define MOVDQUrm(RS, MD, MB, MI, MS) _SSELrm(0xf3, 0x7f, RS,_rX, MD, MB, MI, MS)
+
+#define MOVHPDmr(MD, MB, MI, MS, RD) _SSELmr (0x66, X86_SSE_MOVHP, MD, MB, MI, MS, RD,_rX)
+#define MOVHPDrm(RS, MD, MB, MI, MS) _SSEL1rm(0x66, X86_SSE_MOVHP, RS,_rX, MD, MB, MI, MS)
+#define MOVHPSmr(MD, MB, MI, MS, RD) __SSELmr ( X86_SSE_MOVHP, MD, MB, MI, MS, RD,_rX)
+#define MOVHPSrm(RS, MD, MB, MI, MS) __SSEL1rm( X86_SSE_MOVHP, RS,_rX, MD, MB, MI, MS)
+
+#define MOVLPDmr(MD, MB, MI, MS, RD) _SSELmr (0x66, X86_SSE_MOVLP, MD, MB, MI, MS, RD,_rX)
+#define MOVLPDrm(RS, MD, MB, MI, MS) _SSEL1rm(0x66, X86_SSE_MOVLP, RS,_rX, MD, MB, MI, MS)
+#define MOVLPSmr(MD, MB, MI, MS, RD) __SSELmr ( X86_SSE_MOVLP, MD, MB, MI, MS, RD,_rX)
+#define MOVLPSrm(RS, MD, MB, MI, MS) __SSEL1rm( X86_SSE_MOVLP, RS,_rX, MD, MB, MI, MS)
+
/*** References: */
/* */
/* [1] "Intel Architecture Software Developer's Manual Volume 1: Basic Architecture", */
@@ -1057,6 +1594,13 @@ typedef _uc jit_insn;
/* [2] "Intel Architecture Software Developer's Manual Volume 2: Instruction Set Reference", */
/* Intel Corporation 1997. */
+#if LIGHTNING_CROSS \
+ ? LIGHTNING_TARGET == LIGHTNING_X86_64 \
+ : defined (__x86_64__)
+#include "i386/asm-64.h"
+#else
+#include "i386/asm-32.h"
#endif
-#endif /* __lightning_asm_h */
+#endif
+#endif /* __lightning_asm_i386_h */
diff --git a/src/runtime/c/pgf/lightning/i386/core-32.h b/src/runtime/c/pgf/lightning/i386/core-32.h
new file mode 100644
index 000000000..48117ddb9
--- /dev/null
+++ b/src/runtime/c/pgf/lightning/i386/core-32.h
@@ -0,0 +1,174 @@
+/******************************** -*- C -*- ****************************
+ *
+ * Platform-independent layer (i386 version)
+ *
+ ***********************************************************************/
+
+
+/***********************************************************************
+ *
+ * Copyright 2000, 2001, 2002, 2003, 2006 Free Software Foundation, Inc.
+ * Written by Paolo Bonzini and Matthew Flatt.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ * License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with GNU lightning; see the file COPYING.LESSER; if not, write to the
+ * Free Software Foundation, 59 Temple Place - Suite 330, Boston,
+ * MA 02111-1307, USA.
+ *
+ ***********************************************************************/
+
+
+
+#ifndef __lightning_core_h
+#define __lightning_core_h
+
+#define JIT_CAN_16 1
+#define JIT_AP _EBP
+
+#define JIT_R_NUM 3
+#define JIT_R(i) (_EAX + (i))
+#define JIT_V_NUM 3
+#define JIT_V(i) ((i) == 0 ? _EBX : _ESI + (i) - 1)
+
+struct jit_local_state {
+ int framesize;
+ int argssize;
+ int alloca_offset;
+ int alloca_slack;
+ jit_insn *finish_ref;
+};
+
+/* Whether a register is used for the user-accessible registers. */
+#define jit_save(reg) 1
+
+#define jit_base_prolog() (_jitl.framesize = 20, _jitl.alloca_offset = _jitl.alloca_slack = 0, \
+ PUSHLr(_EBX), PUSHLr(_ESI), PUSHLr(_EDI), PUSHLr(_EBP), MOVLrr(_ESP, _EBP))
+#define jit_base_ret(ofs) \
+ (((ofs) < 0 ? LEAVE_() : POPLr(_EBP)), \
+ POPLr(_EDI), POPLr(_ESI), POPLr(_EBX), RET_())
+
+/* Used internally. SLACK is used by the Darwin ABI which keeps the stack
+ aligned to 16-bytes. */
+
+#define jit_allocai_internal(amount, slack) \
+ (((amount) < _jitl.alloca_slack \
+ ? (void)0 \
+ : (void)(_jitl.alloca_slack += (amount) + (slack), \
+ ((amount) + (slack) == sizeof (int) \
+ ? PUSHLr(_EAX) \
+ : SUBLir((amount) + (slack), _ESP)))), \
+ _jitl.alloca_slack -= (amount), \
+ _jitl.alloca_offset -= (amount))
+
+/* Stack */
+#define jit_pushr_i(rs) PUSHLr(rs)
+#define jit_popr_i(rs) POPLr(rs)
+
+/* The += in argssize allows for stack pollution */
+
+#ifdef __APPLE__
+/* Stack must stay 16-byte aligned: */
+# define jit_prepare_i(ni) (((ni & 0x3) \
+ ? (void)SUBLir(4 * ((((ni) + 3) & ~(0x3)) - (ni)), JIT_SP) \
+ : (void)0), \
+ _jitl.argssize += (((ni) + 3) & ~(0x3)))
+
+#define jit_allocai(n) \
+ jit_allocai_internal ((n), (_jitl.alloca_slack - (n)) & 15)
+
+#define jit_prolog(n) (jit_base_prolog(), jit_subi_i (JIT_SP, JIT_SP, 12))
+#define jit_ret() jit_base_ret (-12)
+
+#else
+# define jit_prepare_i(ni) (_jitl.argssize += (ni))
+
+#define jit_allocai(n) \
+ jit_allocai_internal ((n), 0)
+
+#define jit_prolog(n) jit_base_prolog()
+#define jit_ret() jit_base_ret (_jitl.alloca_offset)
+#endif
+
+#define jit_calli(label) (CALLm( ((unsigned long) (label))), _jit.x.pc)
+#define jit_callr(reg) CALLsr(reg)
+
+#define jit_pusharg_i(rs) PUSHLr(rs)
+#define jit_finish(sub) (_jitl.finish_ref = jit_calli((sub)), ADDLir(sizeof(long) * _jitl.argssize, JIT_SP), _jitl.argssize = 0, _jitl.finish_ref)
+#define jit_finishr(reg) (jit_callr((reg)), )
+
+#define jit_arg_c() ((_jitl.framesize += sizeof(int)) - sizeof(int))
+#define jit_arg_uc() ((_jitl.framesize += sizeof(int)) - sizeof(int))
+#define jit_arg_s() ((_jitl.framesize += sizeof(int)) - sizeof(int))
+#define jit_arg_us() ((_jitl.framesize += sizeof(int)) - sizeof(int))
+#define jit_arg_i() ((_jitl.framesize += sizeof(int)) - sizeof(int))
+#define jit_arg_ui() ((_jitl.framesize += sizeof(int)) - sizeof(int))
+#define jit_arg_l() ((_jitl.framesize += sizeof(long)) - sizeof(long))
+#define jit_arg_ul() ((_jitl.framesize += sizeof(long)) - sizeof(long))
+#define jit_arg_p() ((_jitl.framesize += sizeof(long)) - sizeof(long))
+
+#define jit_movi_p(d, is) (MOVLir (((long)(is)), (d)), _jit.x.pc)
+#define jit_patch_long_at(jump_pc,v) (*_PSL((jump_pc) - sizeof(long)) = _jit_SL((jit_insn *)(v) - (jump_pc)))
+#define jit_patch_at(jump_pc,v) jit_patch_long_at(jump_pc, v)
+
+/* Memory */
+#define jit_replace(s, rep, op) \
+ (jit_pushr_i(rep), \
+ MOVLrr((s), (rep)), \
+ op, jit_popr_i(rep))
+
+#define jit_movbrm(rs, dd, db, di, ds) \
+ (jit_check8(rs) \
+ ? MOVBrm(jit_reg8(rs), dd, db, di, ds) \
+ : jit_replace(rs, \
+ ((dd != _EAX && db != _EAX && di != _EAX) ? _EAX : \
+ ((dd != _ECX && db != _ECX && di != _ECX) ? _ECX : _EDX)), \
+ MOVBrm(((dd != _EAX && db != _EAX && di != _EAX) ? _AL : \
+ ((dd != _ECX && db != _ECX && di != _ECX) ? _CL : _DL)), \
+ dd, db, di, ds)))
+
+#define jit_ldr_c(d, rs) MOVSBLmr(0, (rs), 0, 0, (d))
+#define jit_ldxr_c(d, s1, s2) MOVSBLmr(0, (s1), (s2), 1, (d))
+
+#define jit_ldr_s(d, rs) MOVSWLmr(0, (rs), 0, 0, (d))
+#define jit_ldxr_s(d, s1, s2) MOVSWLmr(0, (s1), (s2), 1, (d))
+
+#define jit_ldi_c(d, is) MOVSBLmr((is), 0, 0, 0, (d))
+#define jit_ldxi_c(d, rs, is) MOVSBLmr((is), (rs), 0, 0, (d))
+
+#define jit_ldi_uc(d, is) MOVZBLmr((is), 0, 0, 0, (d))
+#define jit_ldxi_uc(d, rs, is) MOVZBLmr((is), (rs), 0, 0, (d))
+
+#define jit_sti_c(id, rs) jit_movbrm((rs), (id), 0, 0, 0)
+#define jit_stxi_c(id, rd, rs) jit_movbrm((rs), (id), (rd), 0, 0)
+
+#define jit_ldi_s(d, is) MOVSWLmr((is), 0, 0, 0, (d))
+#define jit_ldxi_s(d, rs, is) MOVSWLmr((is), (rs), 0, 0, (d))
+
+#define jit_ldi_us(d, is) MOVZWLmr((is), 0, 0, 0, (d))
+#define jit_ldxi_us(d, rs, is) MOVZWLmr((is), (rs), 0, 0, (d))
+
+#define jit_sti_s(id, rs) MOVWrm(jit_reg16(rs), (id), 0, 0, 0)
+#define jit_stxi_s(id, rd, rs) MOVWrm(jit_reg16(rs), (id), (rd), 0, 0)
+
+#define jit_ldi_i(d, is) MOVLmr((is), 0, 0, 0, (d))
+#define jit_ldxi_i(d, rs, is) MOVLmr((is), (rs), 0, 0, (d))
+
+#define jit_ldr_i(d, rs) MOVLmr(0, (rs), 0, 0, (d))
+#define jit_ldxr_i(d, s1, s2) MOVLmr(0, (s1), (s2), 1, (d))
+
+#define jit_sti_i(id, rs) MOVLrm((rs), (id), 0, 0, 0)
+#define jit_stxi_i(id, rd, rs) MOVLrm((rs), (id), (rd), 0, 0)
+
+#endif /* __lightning_core_h */
diff --git a/src/runtime/c/pgf/lightning/i386/core-64.h b/src/runtime/c/pgf/lightning/i386/core-64.h
new file mode 100644
index 000000000..46f2daf02
--- /dev/null
+++ b/src/runtime/c/pgf/lightning/i386/core-64.h
@@ -0,0 +1,498 @@
+/******************************** -*- C -*- ****************************
+ *
+ * Platform-independent layer (i386 version)
+ *
+ ***********************************************************************/
+
+
+/***********************************************************************
+ *
+ * Copyright 2000, 2001, 2002, 2003, 2006 Free Software Foundation, Inc.
+ * Written by Paolo Bonzini and Matthew Flatt.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ * License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with GNU lightning; see the file COPYING.LESSER; if not, write to the
+ * Free Software Foundation, 59 Temple Place - Suite 330, Boston,
+ * MA 02111-1307, USA.
+ *
+ ***********************************************************************/
+
+
+
+#ifndef __lightning_core_h
+#define __lightning_core_h
+
+/* Used to implement ldc, stc, ... */
+#define JIT_CAN_16 0
+#define JIT_REXTMP _R12
+
+/* Number or integer argument registers */
+#define JIT_ARG_MAX 6
+
+/* Number of float argument registers */
+#define JIT_FP_ARG_MAX 8
+
+#define JIT_R_NUM 3
+#define JIT_R(i) ((i) == 0 ? _EAX : _R9 + (i))
+#define JIT_V_NUM 3
+#define JIT_V(i) ((i) == 0 ? _EBX : _R12 + (i))
+
+struct jit_local_state {
+ int long_jumps;
+ int nextarg_getfp;
+ int nextarg_putfp;
+ int nextarg_geti;
+ int nextarg_puti;
+ int framesize;
+ int argssize;
+ int fprssize;
+ int alloca_offset;
+ int alloca_slack;
+ jit_insn *finish_ref;
+};
+
+/* Whether a register in the "low" bank is used for the user-accessible
+ registers. */
+#define jit_save(reg) ((reg) == _EAX || (reg) == _EBX)
+
+/* Keep the stack 16-byte aligned, the SSE hardware prefers it this way. */
+#define jit_allocai_internal(amount, slack) \
+ (((amount) < _jitl.alloca_slack \
+ ? 0 \
+ : (_jitl.alloca_slack += (amount) + (slack), \
+ SUBQir((amount) + (slack), _ESP))), \
+ _jitl.alloca_slack -= (amount), \
+ _jitl.alloca_offset -= (amount))
+
+#define jit_allocai(n) \
+ jit_allocai_internal ((n), (_jitl.alloca_slack - (n)) & 15)
+
+/* 3-parameter operation */
+#define jit_qopr_(d, s1, s2, op1d, op2d) \
+ ( ((s2) == (d)) ? op1d : \
+ ( (((s1) == (d)) ? (void)0 : (void)MOVQrr((s1), (d))), op2d ) \
+ )
+
+/* 3-parameter operation, with immediate. TODO: fix the case where mmediate
+ does not fit! */
+#define jit_qop_small(d, s1, op2d) \
+ (((s1) == (d)) ? op2d : (MOVQrr((s1), (d)), op2d))
+#define jit_qop_(d, s1, is, op2d, op2i) \
+ (_s32P((long)(is)) \
+ ? jit_qop_small ((d), (s1), (op2d)) \
+ : (MOVQir ((is), JIT_REXTMP), jit_qop_small ((d), (s1), (op2i))))
+
+#define jit_bra_qr(s1, s2, op) (CMPQrr(s2, s1), op, _jit.x.pc)
+#define _jit_bra_l(rs, is, op) (CMPQir(is, rs), op, _jit.x.pc)
+
+#define jit_bra_l(rs, is, op) (_s32P((long)(is)) \
+ ? _jit_bra_l(rs, is, op) \
+ : (MOVQir(is, JIT_REXTMP), jit_bra_qr(rs, JIT_REXTMP, op)))
+
+/* When CMP with 0 can be replaced with TEST */
+#define jit_bra_l0(rs, is, op, op0) \
+ ( (is) == 0 ? (TESTQrr(rs, rs), op0, _jit.x.pc) : jit_bra_l(rs, is, op))
+
+#define jit_reduceQ(op, is, rs) \
+ (_u8P(is) ? jit_reduce_(op##Bir(is, jit_reg8(rs))) : \
+ jit_reduce_(op##Qir(is, rs)) )
+
+#define jit_addi_l(d, rs, is) \
+ /* Value is not zero? */ \
+ ((is) \
+ /* Yes. Value is unsigned and fits in signed 32 bits? */ \
+ ? (_uiP(31, is) \
+ /* Yes. d == rs? */ \
+ ? jit_opi_((d), (rs), \
+ /* Yes. Use add opcode */ \
+ ADDQir((is), (d)), \
+ /* No. Use lea opcode */ \
+ LEAQmr((is), (rs), 0, 0, (d))) \
+ /* No. Need value in a register */ \
+ : (jit_movi_l(JIT_REXTMP, is), \
+ jit_addr_l(d, rs, JIT_REXTMP))) \
+ /* No. Do nothing. */ \
+ : 0)
+#define jit_addr_l(d, s1, s2) jit_opo_((d), (s1), (s2), ADDQrr((s2), (d)), ADDQrr((s1), (d)), LEAQmr(0, (s1), (s2), 1, (d)) )
+#define jit_addci_l(d, rs, is) jit_qop_ ((d), (rs), (is), ADCQir((is), (d)), ADCQrr(JIT_REXTMP, (d)))
+#define jit_addcr_l(d, s1, s2) jit_qopr_((d), (s1), (s2), ADCQrr((s1), (d)), ADCQrr((s2), (d)) )
+#define jit_addxi_l(d, rs, is) jit_qop_ ((d), (rs), (is), ADDQir((is), (d)), ADDQrr(JIT_REXTMP, (d)))
+#define jit_addxr_l(d, s1, s2) jit_qopr_((d), (s1), (s2), ADDQrr((s1), (d)), ADDQrr((s2), (d)) )
+#define jit_andi_l(d, rs, is) jit_qop_ ((d), (rs), (is), ANDQir((is), (d)), ANDQrr(JIT_REXTMP, (d)))
+#define jit_andr_l(d, s1, s2) jit_qopr_((d), (s1), (s2), ANDQrr((s1), (d)), ANDQrr((s2), (d)) )
+#define jit_orr_l(d, s1, s2) jit_qopr_((d), (s1), (s2), ORQrr((s1), (d)), ORQrr((s2), (d)) )
+#define jit_subr_l(d, s1, s2) jit_qopr_((d), (s1), (s2), (SUBQrr((s1), (d)), NEGQr(d)), SUBQrr((s2), (d)) )
+#define jit_xorr_l(d, s1, s2) jit_qopr_((d), (s1), (s2), XORQrr((s1), (d)), XORQrr((s2), (d)) )
+
+/* These can sometimes use byte or word versions! */
+#define jit_ori_l(d, rs, is) jit_qop_ ((d), (rs), (is), jit_reduceQ(OR, (is), (d)), ORQrr(JIT_REXTMP, (d)) )
+#define jit_xori_l(d, rs, is) jit_qop_ ((d), (rs), (is), jit_reduceQ(XOR, (is), (d)), XORQrr(JIT_REXTMP, (d)) )
+
+#define jit_lshi_l(d, rs, is) ((is) <= 3 ? LEAQmr(0, 0, (rs), 1 << (is), (d)) : jit_qop_small ((d), (rs), SHLQir((is), (d)) ))
+#define jit_rshi_l(d, rs, is) jit_qop_small ((d), (rs), SARQir((is), (d)) )
+#define jit_rshi_ul(d, rs, is) jit_qop_small ((d), (rs), SHRQir((is), (d)) )
+#define jit_lshr_l(d, r1, r2) jit_shift((d), (r1), (r2), SHLQrr)
+#define jit_rshr_l(d, r1, r2) jit_shift((d), (r1), (r2), SARQrr)
+#define jit_rshr_ul(d, r1, r2) jit_shift((d), (r1), (r2), SHRQrr)
+
+
+/* Stack */
+#define jit_pushr_i(rs) PUSHQr(rs)
+#define jit_popr_i(rs) POPQr(rs)
+
+/* A return address is 8 bytes, plus 5 registers = 40 bytes, total = 48 bytes. */
+#define jit_prolog(n) (_jitl.framesize = ((n) & 1) ? 56 : 48, _jitl.nextarg_getfp = _jitl.nextarg_geti = 0, _jitl.alloca_offset = 0, \
+ PUSHQr(_EBX), PUSHQr(_R12), PUSHQr(_R13), PUSHQr(_R14), PUSHQr(_EBP), MOVQrr(_ESP, _EBP))
+
+#define jit_calli(sub) (MOVQir((long) (sub), JIT_REXTMP), CALLsr(JIT_REXTMP))
+#define jit_callr(reg) CALLsr((reg))
+
+#define jit_prepare_i(ni) (_jitl.nextarg_puti = (ni), \
+ _jitl.argssize = _jitl.nextarg_puti > JIT_ARG_MAX \
+ ? _jitl.nextarg_puti - JIT_ARG_MAX : 0)
+#define jit_pusharg_i(rs) (--_jitl.nextarg_puti >= JIT_ARG_MAX \
+ ? PUSHQr(rs) : MOVQrr(rs, jit_arg_reg_order[_jitl.nextarg_puti]))
+
+#define jit_finish(sub) (_jitl.fprssize \
+ ? (MOVBir(_jitl.fprssize, _AL), _jitl.fprssize = 0) \
+ : MOVBir(0, _AL), \
+ ((_jitl.argssize & 1) \
+ ? (PUSHQr(_EAX), ++_jitl.argssize) : 0), \
+ _jitl.finish_ref = jit_calli(sub), \
+ (_jitl.argssize \
+ ? (ADDQir(sizeof(long) * _jitl.argssize, JIT_SP), _jitl.argssize = 0) \
+ : 0), \
+ _jitl.finish_ref)
+#define jit_reg_is_arg(reg) ((reg) == _ECX || (reg) == _EDX)
+
+#define jit_finishr(reg) (_jitl.fprssize \
+ ? (MOVBir(_jitl.fprssize, _AL), _jitl.fprssize = 0) \
+ : MOVBir(0, _AL), \
+ ((_jitl.argssize & 1) \
+ ? (PUSHQr(_EAX), ++_jitl.argssize) : 0), \
+ (jit_reg_is_arg((reg)) \
+ ? (MOVQrr(reg, JIT_REXTMP), \
+ jit_callr(JIT_REXTMP)) \
+ : jit_callr(reg)), \
+ (_jitl.argssize \
+ ? (ADDQir(sizeof(long) * _jitl.argssize, JIT_SP), _jitl.argssize = 0) \
+ : 0))
+
+#define jit_retval_l(rd) ((void)jit_movr_l ((rd), _EAX))
+#define jit_arg_i() (_jitl.nextarg_geti < JIT_ARG_MAX \
+ ? _jitl.nextarg_geti++ \
+ : ((_jitl.framesize += sizeof(long)) - sizeof(long)))
+#define jit_arg_c() jit_arg_i()
+#define jit_arg_uc() jit_arg_i()
+#define jit_arg_s() jit_arg_i()
+#define jit_arg_us() jit_arg_i()
+#define jit_arg_ui() jit_arg_i()
+#define jit_arg_l() jit_arg_i()
+#define jit_arg_ul() jit_arg_i()
+#define jit_arg_p() jit_arg_i()
+
+#define jit_getarg_c(reg, ofs) ((ofs) < JIT_ARG_MAX \
+ ? jit_extr_c_l((reg), jit_arg_reg_order[(ofs)]) \
+ : jit_ldxi_c((reg), JIT_FP, (ofs)))
+#define jit_getarg_uc(reg, ofs) ((ofs) < JIT_ARG_MAX \
+ ? jit_extr_uc_ul((reg), jit_arg_reg_order[(ofs)]) \
+ : jit_ldxi_uc((reg), JIT_FP, (ofs)))
+#define jit_getarg_s(reg, ofs) ((ofs) < JIT_ARG_MAX \
+ ? jit_extr_s_l((reg), jit_arg_reg_order[(ofs)]) \
+ : jit_ldxi_s((reg), JIT_FP, (ofs)))
+#define jit_getarg_us(reg, ofs) ((ofs) < JIT_ARG_MAX \
+ ? jit_extr_us_ul((reg), jit_arg_reg_order[(ofs)]) \
+ : jit_ldxi_us((reg), JIT_FP, (ofs)))
+#define jit_getarg_i(reg, ofs) ((ofs) < JIT_ARG_MAX \
+ ? jit_movr_l((reg), jit_arg_reg_order[(ofs)]) \
+ : jit_ldxi_i((reg), JIT_FP, (ofs)))
+#define jit_getarg_ui(reg, ofs) ((ofs) < JIT_ARG_MAX \
+ ? jit_movr_ul((reg), jit_arg_reg_order[(ofs)]) \
+ : jit_ldxi_ui((reg), JIT_FP, (ofs)))
+#define jit_getarg_l(reg, ofs) ((ofs) < JIT_ARG_MAX \
+ ? jit_movr_l((reg), jit_arg_reg_order[(ofs)]) \
+ : jit_ldxi_l((reg), JIT_FP, (ofs)))
+#define jit_getarg_ul(reg, ofs) ((ofs) < JIT_ARG_MAX \
+ ? jit_movr_ul((reg), jit_arg_reg_order[(ofs)]) \
+ : jit_ldxi_ul((reg), JIT_FP, ofs))
+#define jit_getarg_p(reg, ofs) ((ofs) < JIT_ARG_MAX \
+ ? jit_movr_p((reg), jit_arg_reg_order[(ofs)]) \
+ : jit_ldxi_p((reg), JIT_FP, (ofs)))
+
+static int jit_arg_reg_order[] = { _EDI, _ESI, _EDX, _ECX, _R8D, _R9D };
+
+#define jit_negr_l(d, rs) jit_opi_((d), (rs), NEGQr(d), (XORQrr((d), (d)), SUBQrr((rs), (d))) )
+#define jit_movr_l(d, rs) ((void)((rs) == (d) ? 0 : MOVQrr((rs), (d))))
+#define jit_movi_p(d, is) (MOVQir(((long)(is)), (d)), _jit.x.pc)
+#define jit_movi_l(d, is) \
+ /* Value is not zero? */ \
+ ((is) \
+ /* Yes. Value is unsigned and fits in signed 32 bits? */ \
+ ? (_uiP(31, is) \
+ /* Yes. Use 32 bits opcode */ \
+ ? MOVLir(is, (d)) \
+ /* No. Use 64 bits opcode */ \
+ : MOVQir(is, (d))) \
+ /* No. Set register to zero. */ \
+ : XORQrr ((d), (d)))
+
+#define jit_bmsr_l(label, s1, s2) (TESTQrr((s1), (s2)), JNZm(label), _jit.x.pc)
+#define jit_bmcr_l(label, s1, s2) (TESTQrr((s1), (s2)), JZm(label), _jit.x.pc)
+#define jit_boaddr_l(label, s1, s2) (ADDQrr((s2), (s1)), JOm(label), _jit.x.pc)
+#define jit_bosubr_l(label, s1, s2) (SUBQrr((s2), (s1)), JOm(label), _jit.x.pc)
+#define jit_boaddr_ul(label, s1, s2) (ADDQrr((s2), (s1)), JCm(label), _jit.x.pc)
+#define jit_bosubr_ul(label, s1, s2) (SUBQrr((s2), (s1)), JCm(label), _jit.x.pc)
+
+#define jit_boaddi_l(label, rs, is) (ADDQir((is), (rs)), JOm(label), _jit.x.pc)
+#define jit_bosubi_l(label, rs, is) (SUBQir((is), (rs)), JOm(label), _jit.x.pc)
+#define jit_boaddi_ul(label, rs, is) (ADDQir((is), (rs)), JCm(label), _jit.x.pc)
+#define jit_bosubi_ul(label, rs, is) (SUBQir((is), (rs)), JCm(label), _jit.x.pc)
+
+#define jit_patch_long_at(jump_pc,v) (*_PSL((jump_pc) - sizeof(long)) = _jit_SL((jit_insn *)(v)))
+#define jit_patch_short_at(jump_pc,v) (*_PSI((jump_pc) - sizeof(int)) = _jit_SI((jit_insn *)(v) - (jump_pc)))
+#define jit_patch_at(jump_pc,v) (_jitl.long_jumps ? jit_patch_long_at((jump_pc)-3, v) : jit_patch_short_at(jump_pc, v))
+#define jit_ret() (LEAVE_(), POPQr(_R14), POPQr(_R13), POPQr(_R12), POPQr(_EBX), RET_())
+
+/* Memory */
+
+/* Used to implement ldc, stc, ... We have SIL and friends which simplify it all. */
+#define jit_movbrm(rs, dd, db, di, ds) MOVBrm(jit_reg8(rs), dd, db, di, ds)
+
+#define jit_ldr_c(d, rs) MOVSBQmr(0, (rs), 0, 0, (d))
+#define jit_ldxr_c(d, s1, s2) MOVSBQmr(0, (s1), (s2), 1, (d))
+
+#define jit_ldr_s(d, rs) MOVSWQmr(0, (rs), 0, 0, (d))
+#define jit_ldxr_s(d, s1, s2) MOVSWQmr(0, (s1), (s2), 1, (d))
+
+#define jit_ldi_c(d, is) (_u32P((long)(is)) ? MOVSBQmr((is), 0, 0, 0, (d)) : (jit_movi_l(JIT_REXTMP, is), jit_ldr_c(d, JIT_REXTMP)))
+#define jit_ldxi_c(d, rs, is) (_u32P((long)(is)) ? MOVSBQmr((is), (rs), 0, 0, (d)) : (jit_movi_l(JIT_REXTMP, is), jit_ldxr_c(d, rs, JIT_REXTMP)))
+
+#define jit_ldi_uc(d, is) (_u32P((long)(is)) ? MOVZBLmr((is), 0, 0, 0, (d)) : (jit_movi_l(JIT_REXTMP, is), jit_ldr_uc(d, JIT_REXTMP)))
+#define jit_ldxi_uc(d, rs, is) (_u32P((long)(is)) ? MOVZBLmr((is), (rs), 0, 0, (d)) : (jit_movi_l(JIT_REXTMP, is), jit_ldxr_uc(d, rs, JIT_REXTMP)))
+
+#define jit_sti_c(id, rs) (_u32P((long)(id)) ? MOVBrm(jit_reg8(rs), (id), 0, 0, 0) : (jit_movi_l(JIT_REXTMP, id), jit_str_c(JIT_REXTMP, rs)))
+#define jit_stxi_c(id, rd, rs) (_u32P((long)(id)) ? MOVBrm(jit_reg8(rs), (id), (rd), 0, 0) : (jit_movi_l(JIT_REXTMP, id), jit_stxr_c(JIT_REXTMP, rd, rs)))
+
+#define jit_ldi_s(d, is) (_u32P((long)(is)) ? MOVSWQmr((is), 0, 0, 0, (d)) : (jit_movi_l(JIT_REXTMP, is), jit_ldr_s(d, JIT_REXTMP)))
+#define jit_ldxi_s(d, rs, is) (_u32P((long)(is)) ? MOVSWQmr((is), (rs), 0, 0, (d)) : (jit_movi_l(JIT_REXTMP, is), jit_ldxr_s(d, rs, JIT_REXTMP)))
+
+#define jit_ldi_us(d, is) (_u32P((long)(is)) ? MOVZWLmr((is), 0, 0, 0, (d)) : (jit_movi_l(JIT_REXTMP, is), jit_ldr_us(d, JIT_REXTMP)))
+#define jit_ldxi_us(d, rs, is) (_u32P((long)(is)) ? MOVZWLmr((is), (rs), 0, 0, (d)) : (jit_movi_l(JIT_REXTMP, is), jit_ldxr_us(d, rs, JIT_REXTMP)))
+
+#define jit_sti_s(id, rs) (_u32P((long)(id)) ? MOVWrm(jit_reg16(rs), (id), 0, 0, 0) : (jit_movi_l(JIT_REXTMP, id), jit_str_s(JIT_REXTMP, rs)))
+#define jit_stxi_s(id, rd, rs) (_u32P((long)(id)) ? MOVWrm(jit_reg16(rs), (id), (rd), 0, 0) : (jit_movi_l(JIT_REXTMP, id), jit_stxr_s(JIT_REXTMP, rd, rs)))
+
+#define jit_ldi_ui(d, is) (_u32P((long)(is)) ? MOVLmr((is), 0, 0, 0, (d)) : (jit_movi_l(JIT_REXTMP, is), jit_ldr_ui(d, JIT_REXTMP)))
+#define jit_ldxi_ui(d, rs, is) (_u32P((long)(is)) ? MOVLmr((is), (rs), 0, 0, (d)) : (jit_movi_l(JIT_REXTMP, is), jit_ldxr_ui(d, rs, JIT_REXTMP)))
+
+#define jit_ldi_i(d, is) (_u32P((long)(is)) ? MOVSLQmr((is), 0, 0, 0, (d)) : (jit_movi_l(JIT_REXTMP, is), jit_ldr_i(d, JIT_REXTMP)))
+#define jit_ldxi_i(d, rs, is) (_u32P((long)(is)) ? MOVSLQmr((is), (rs), 0, 0, (d)) : (jit_movi_l(JIT_REXTMP, is), jit_ldxr_i(d, rs, JIT_REXTMP)))
+
+#define jit_sti_i(id, rs) (_u32P((long)(id)) ? MOVLrm((rs), (id), 0, 0, 0) : (jit_movi_l(JIT_REXTMP, id), jit_str_i(JIT_REXTMP, rs)))
+#define jit_stxi_i(id, rd, rs) (_u32P((long)(id)) ? MOVLrm((rs), (id), (rd), 0, 0) : (jit_movi_l(JIT_REXTMP, id), jit_stxr_i(JIT_REXTMP, rd, rs)))
+
+#define jit_ldi_l(d, is) (_u32P((long)(is)) ? MOVQmr((is), 0, 0, 0, (d)) : (jit_movi_l(JIT_REXTMP, is), jit_ldr_l(d, JIT_REXTMP)))
+#define jit_ldxi_l(d, rs, is) (_u32P((long)(is)) ? MOVQmr((is), (rs), 0, 0, (d)) : (jit_movi_l(JIT_REXTMP, is), jit_ldxr_l(d, rs, JIT_REXTMP)))
+
+#define jit_sti_l(id, rs) (_u32P((long)(id)) ? MOVQrm((rs), (id), 0, 0, 0) : (jit_movi_l(JIT_REXTMP, id), jit_str_l(JIT_REXTMP, rs)))
+#define jit_stxi_l(id, rd, rs) (_u32P((long)(id)) ? MOVQrm((rs), (id), (rd), 0, 0) : (jit_movi_l(JIT_REXTMP, id), jit_stxr_l(JIT_REXTMP, rd, rs)))
+
+#define jit_ldr_ui(d, rs) MOVLmr(0, (rs), 0, 0, (d))
+#define jit_ldxr_ui(d, s1, s2) MOVLmr(0, (s1), (s2), 1, (d))
+
+#define jit_ldr_i(d, rs) MOVSLQmr(0, (rs), 0, 0, (d))
+#define jit_ldxr_i(d, s1, s2) MOVSLQmr(0, (s1), (s2), 1, (d))
+
+#define jit_ldr_l(d, rs) MOVQmr(0, (rs), 0, 0, (d))
+#define jit_ldxr_l(d, s1, s2) MOVQmr(0, (s1), (s2), 1, (d))
+
+#define jit_str_l(rd, rs) MOVQrm((rs), 0, (rd), 0, 0)
+#define jit_stxr_l(d1, d2, rs) MOVQrm((rs), 0, (d1), (d2), 1)
+
+#define jit_blti_l(label, rs, is) jit_bra_l0((rs), (is), JLm(label), JSm(label) )
+#define jit_blei_l(label, rs, is) jit_bra_l ((rs), (is), JLEm(label) )
+#define jit_bgti_l(label, rs, is) jit_bra_l ((rs), (is), JGm(label) )
+#define jit_bgei_l(label, rs, is) jit_bra_l0((rs), (is), JGEm(label), JNSm(label) )
+#define jit_beqi_l(label, rs, is) jit_bra_l0((rs), (is), JEm(label), JEm(label) )
+#define jit_bnei_l(label, rs, is) jit_bra_l0((rs), (is), JNEm(label), JNEm(label) )
+#define jit_blti_ul(label, rs, is) jit_bra_l ((rs), (is), JBm(label) )
+#define jit_blei_ul(label, rs, is) jit_bra_l0((rs), (is), JBEm(label), JEm(label) )
+#define jit_bgti_ul(label, rs, is) jit_bra_l0((rs), (is), JAm(label), JNEm(label) )
+#define jit_bgei_ul(label, rs, is) jit_bra_l ((rs), (is), JAEm(label) )
+#define jit_bmsi_l(label, rs, is) (jit_reduceQ(TEST, (is), (rs)), JNZm(label), _jit.x.pc)
+#define jit_bmci_l(label, rs, is) (jit_reduceQ(TEST, (is), (rs)), JZm(label), _jit.x.pc)
+
+#define jit_pushr_l(rs) jit_pushr_i(rs)
+#define jit_popr_l(rs) jit_popr_i(rs)
+
+#define jit_pusharg_l(rs) jit_pusharg_i(rs)
+#define jit_retval_l(rd) ((void)jit_movr_l ((rd), _EAX))
+#define jit_bltr_l(label, s1, s2) jit_bra_qr((s1), (s2), JLm(label) )
+#define jit_bler_l(label, s1, s2) jit_bra_qr((s1), (s2), JLEm(label) )
+#define jit_bgtr_l(label, s1, s2) jit_bra_qr((s1), (s2), JGm(label) )
+#define jit_bger_l(label, s1, s2) jit_bra_qr((s1), (s2), JGEm(label) )
+#define jit_beqr_l(label, s1, s2) jit_bra_qr((s1), (s2), JEm(label) )
+#define jit_bner_l(label, s1, s2) jit_bra_qr((s1), (s2), JNEm(label) )
+#define jit_bltr_ul(label, s1, s2) jit_bra_qr((s1), (s2), JBm(label) )
+#define jit_bler_ul(label, s1, s2) jit_bra_qr((s1), (s2), JBEm(label) )
+#define jit_bgtr_ul(label, s1, s2) jit_bra_qr((s1), (s2), JAm(label) )
+#define jit_bger_ul(label, s1, s2) jit_bra_qr((s1), (s2), JAEm(label) )
+
+/* Bool operations. */
+#define jit_bool_qr(d, s1, s2, op) \
+ (jit_replace8(d, CMPQrr(s2, s1), op))
+
+#define jit_bool_qi(d, rs, is, op) \
+ (jit_replace8(d, CMPQir(is, rs), op))
+
+/* When CMP with 0 can be replaced with TEST */
+#define jit_bool_qi0(d, rs, is, op, op0) \
+ ((is) != 0 \
+ ? (jit_replace8(d, CMPQir(is, rs), op)) \
+ : (jit_replace8(d, TESTQrr(rs, rs), op0)))
+
+#define jit_ltr_l(d, s1, s2) jit_bool_qr((d), (s1), (s2), SETLr )
+#define jit_ler_l(d, s1, s2) jit_bool_qr((d), (s1), (s2), SETLEr )
+#define jit_gtr_l(d, s1, s2) jit_bool_qr((d), (s1), (s2), SETGr )
+#define jit_ger_l(d, s1, s2) jit_bool_qr((d), (s1), (s2), SETGEr )
+#define jit_eqr_l(d, s1, s2) jit_bool_qr((d), (s1), (s2), SETEr )
+#define jit_ner_l(d, s1, s2) jit_bool_qr((d), (s1), (s2), SETNEr )
+#define jit_ltr_ul(d, s1, s2) jit_bool_qr((d), (s1), (s2), SETBr )
+#define jit_ler_ul(d, s1, s2) jit_bool_qr((d), (s1), (s2), SETBEr )
+#define jit_gtr_ul(d, s1, s2) jit_bool_qr((d), (s1), (s2), SETAr )
+#define jit_ger_ul(d, s1, s2) jit_bool_qr((d), (s1), (s2), SETAEr )
+
+#define jit_lti_l(d, rs, is) jit_bool_qi0((d), (rs), (is), SETLr, SETSr )
+#define jit_lei_l(d, rs, is) jit_bool_qi ((d), (rs), (is), SETLEr )
+#define jit_gti_l(d, rs, is) jit_bool_qi ((d), (rs), (is), SETGr )
+#define jit_gei_l(d, rs, is) jit_bool_qi0((d), (rs), (is), SETGEr, SETNSr )
+#define jit_eqi_l(d, rs, is) jit_bool_qi0((d), (rs), (is), SETEr, SETEr )
+#define jit_nei_l(d, rs, is) jit_bool_qi0((d), (rs), (is), SETNEr, SETNEr )
+#define jit_lti_ul(d, rs, is) jit_bool_qi ((d), (rs), (is), SETBr )
+#define jit_lei_ul(d, rs, is) jit_bool_qi0((d), (rs), (is), SETBEr, SETEr )
+#define jit_gti_ul(d, rs, is) jit_bool_qi0((d), (rs), (is), SETAr, SETNEr )
+#define jit_gei_ul(d, rs, is) jit_bool_qi0((d), (rs), (is), SETAEr, INCLr )
+
+/* Multiplication/division. */
+#define jit_mulr_ul_(s1, s2) \
+ jit_qopr_(_RAX, s1, s2, MULQr(s1), MULQr(s2))
+
+#define jit_mulr_l_(s1, s2) \
+ jit_qopr_(_RAX, s1, s2, IMULQr(s1), IMULQr(s2))
+
+#define jit_muli_l_(is, rs) \
+ (MOVQir(is, rs == _RAX ? _RDX : _RAX), \
+ IMULQr(rs == _RAX ? _RDX : rs))
+
+#define jit_muli_ul_(is, rs) \
+ (MOVQir(is, rs == _RAX ? _RDX : _RAX), \
+ IMULQr(rs == _RAX ? _RDX : rs))
+
+#define jit_divi_l_(result, d, rs, is) \
+ (jit_might (d, _RAX, jit_pushr_l(_RAX)), \
+ jit_might (d, _RCX, jit_pushr_l(_RCX)), \
+ jit_might (d, _RDX, jit_pushr_l(_RDX)), \
+ jit_might (rs, _RAX, MOVQrr(rs, _RAX)), \
+ jit_might (rs, _RDX, MOVQrr(rs, _RDX)), \
+ MOVQir(is, _RCX), \
+ SARQir(63, _RDX), \
+ IDIVQr(_RCX), \
+ jit_might(d, result, MOVQrr(result, d)), \
+ jit_might(d, _RDX, jit_popr_l(_RDX)), \
+ jit_might(d, _RCX, jit_popr_l(_RCX)), \
+ jit_might(d, _RAX, jit_popr_l(_RAX)))
+
+#define jit_divr_l_(result, d, s1, s2) \
+ (jit_might (d, _RAX, jit_pushr_l(_RAX)), \
+ jit_might (d, _RCX, jit_pushr_l(_RCX)), \
+ jit_might (d, _RDX, jit_pushr_l(_RDX)), \
+ ((s1 == _RCX) ? jit_pushr_l(_RCX) : 0), \
+ jit_might (s2, _RCX, MOVQrr(s2, _RCX)), \
+ ((s1 == _RCX) ? jit_popr_l(_RDX) : \
+ jit_might (s1, _RDX, MOVQrr(s1, _RDX))), \
+ MOVQrr(_RDX, _RAX), \
+ SARQir(63, _RDX), \
+ IDIVQr(_RCX), \
+ jit_might(d, result, MOVQrr(result, d)), \
+ jit_might(d, _RDX, jit_popr_l(_RDX)), \
+ jit_might(d, _RCX, jit_popr_l(_RCX)), \
+ jit_might(d, _RAX, jit_popr_l(_RAX)))
+
+#define jit_divi_ul_(result, d, rs, is) \
+ (jit_might (d, _RAX, jit_pushr_l(_RAX)), \
+ jit_might (d, _RCX, jit_pushr_l(_RCX)), \
+ jit_might (d, _RDX, jit_pushr_l(_RDX)), \
+ jit_might (rs, _RAX, MOVQrr(rs, _RAX)), \
+ MOVQir(is, _RCX), \
+ XORQrr(_RDX, _RDX), \
+ DIVQr(_RCX), \
+ jit_might(d, result, MOVQrr(result, d)), \
+ jit_might(d, _RDX, jit_popr_l(_RDX)), \
+ jit_might(d, _RCX, jit_popr_l(_RCX)), \
+ jit_might(d, _RAX, jit_popr_l(_RAX)))
+
+#define jit_divr_ul_(result, d, s1, s2) \
+ (jit_might (d, _RAX, jit_pushr_l(_RAX)), \
+ jit_might (d, _RCX, jit_pushr_l(_RCX)), \
+ jit_might (d, _RDX, jit_pushr_l(_RDX)), \
+ ((s1 == _RCX) ? jit_pushr_l(_RCX) : 0), \
+ jit_might (s2, _RCX, MOVQrr(s2, _RCX)), \
+ ((s1 == _RCX) ? jit_popr_l(_RAX) : \
+ jit_might (s1, _RAX, MOVQrr(s1, _RAX))), \
+ XORQrr(_RDX, _RDX), \
+ DIVQr(_RCX), \
+ jit_might(d, result, MOVQrr(result, d)), \
+ jit_might(d, _RDX, jit_popr_l(_RDX)), \
+ jit_might(d, _RCX, jit_popr_l(_RCX)), \
+ jit_might(d, _RAX, jit_popr_l(_RAX)))
+
+#define jit_muli_l(d, rs, is) jit_qop_ ((d), (rs), (is), IMULQir((is), (d)), IMULQrr(JIT_REXTMP, (d)) )
+#define jit_mulr_l(d, s1, s2) jit_qopr_((d), (s1), (s2), IMULQrr((s1), (d)), IMULQrr((s2), (d)) )
+
+/* As far as low bits are concerned, signed and unsigned multiplies are
+ exactly the same. */
+#define jit_muli_ul(d, rs, is) jit_qop_ ((d), (rs), (is), IMULQir((is), (d)), IMULQrr(JIT_REXTMP, (d)) )
+#define jit_mulr_ul(d, s1, s2) jit_qopr_((d), (s1), (s2), IMULQrr((s1), (d)), IMULQrr((s2), (d)) )
+
+#define jit_hmuli_l(d, rs, is) \
+ ((d) == _RDX ? ( jit_pushr_l(_RAX), jit_muli_l_((is), (rs)), jit_popr_l(_RAX) ) : \
+ ((d) == _RAX ? (jit_pushr_l(_RDX), jit_muli_l_((is), (rs)), MOVQrr(_RDX, _RAX), jit_popr_l(_RDX) ) : \
+ (jit_pushr_l(_RDX), jit_pushr_l(_RAX), jit_muli_l_((is), (rs)), MOVQrr(_RDX, (d)), jit_popr_l(_RAX), jit_popr_l(_RDX) )))
+
+#define jit_hmulr_l(d, s1, s2) \
+ ((d) == _RDX ? ( jit_pushr_l(_RAX), jit_mulr_l_((s1), (s2)), jit_popr_l(_RAX) ) : \
+ ((d) == _RAX ? (jit_pushr_l(_RDX), jit_mulr_l_((s1), (s2)), MOVQrr(_RDX, _RAX), jit_popr_l(_RDX) ) : \
+ (jit_pushr_l(_RDX), jit_pushr_l(_RAX), jit_mulr_l_((s1), (s2)), MOVQrr(_RDX, (d)), jit_popr_l(_RAX), jit_popr_l(_RDX) )))
+
+#define jit_hmuli_ul(d, rs, is) \
+ ((d) == _RDX ? ( jit_pushr_l(_RAX), jit_muli_ul_((is), (rs)), jit_popr_l(_RAX) ) : \
+ ((d) == _RAX ? (jit_pushr_l(_RDX), jit_muli_ul_((is), (rs)), MOVQrr(_RDX, _RAX), jit_popr_l(_RDX) ) : \
+ (jit_pushr_l(_RDX), jit_pushr_l(_RAX), jit_muli_ul_((is), (rs)), MOVQrr(_RDX, (d)), jit_popr_l(_RAX), jit_popr_l(_RDX) )))
+
+#define jit_hmulr_ul(d, s1, s2) \
+ ((d) == _RDX ? ( jit_pushr_l(_RAX), jit_mulr_ul_((s1), (s2)), jit_popr_l(_RAX) ) : \
+ ((d) == _RAX ? (jit_pushr_l(_RDX), jit_mulr_ul_((s1), (s2)), MOVQrr(_RDX, _RAX), jit_popr_l(_RDX) ) : \
+ (jit_pushr_l(_RDX), jit_pushr_l(_RAX), jit_mulr_ul_((s1), (s2)), MOVQrr(_RDX, (d)), jit_popr_l(_RAX), jit_popr_l(_RDX) )))
+
+#define jit_divi_l(d, rs, is) jit_divi_l_(_RAX, (d), (rs), (is))
+#define jit_divi_ul(d, rs, is) jit_divi_ul_(_RAX, (d), (rs), (is))
+#define jit_modi_l(d, rs, is) jit_divi_l_(_RDX, (d), (rs), (is))
+#define jit_modi_ul(d, rs, is) jit_divi_ul_(_RDX, (d), (rs), (is))
+#define jit_divr_l(d, s1, s2) jit_divr_l_(_RAX, (d), (s1), (s2))
+#define jit_divr_ul(d, s1, s2) jit_divr_ul_(_RAX, (d), (s1), (s2))
+#define jit_modr_l(d, s1, s2) jit_divr_l_(_RDX, (d), (s1), (s2))
+#define jit_modr_ul(d, s1, s2) jit_divr_ul_(_RDX, (d), (s1), (s2))
+
+#endif /* __lightning_core_h */
diff --git a/src/runtime/c/pgf/lightning/i386/core.h b/src/runtime/c/pgf/lightning/i386/core.h
index dd9d58a87..e6eb7c8a8 100644
--- a/src/runtime/c/pgf/lightning/i386/core.h
+++ b/src/runtime/c/pgf/lightning/i386/core.h
@@ -7,21 +7,21 @@
/***********************************************************************
*
- * Copyright 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
- * Written by Paolo Bonzini.
+ * Copyright 2000, 2001, 2002, 2003, 2006 Free Software Foundation, Inc.
+ * Written by Paolo Bonzini and Matthew Flatt.
*
* This file is part of GNU lightning.
*
* GNU lightning is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
- * by the Free Software Foundation; either version 2.1, or (at your option)
+ * by the Free Software Foundation; either version 3, or (at your option)
* any later version.
- *
- * GNU lightning is distributed in the hope that it will be useful, but
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
* License for more details.
- *
+ *
* You should have received a copy of the GNU Lesser General Public License
* along with GNU lightning; see the file COPYING.LESSER; if not, write to the
* Free Software Foundation, 59 Temple Place - Suite 330, Boston,
@@ -31,22 +31,13 @@
-#ifndef __lightning_core_h
-#define __lightning_core_h
+#ifndef __lightning_core_i386_h
+#define __lightning_core_i386_h
#define JIT_FP _EBP
#define JIT_SP _ESP
#define JIT_RET _EAX
-#define JIT_R_NUM 3
-#define JIT_V_NUM 3
-#define JIT_R(i) (_EAX + (i))
-#define JIT_V(i) ((i) == 0 ? _EBX : _ESI + (i) - 1)
-
-struct jit_local_state {
- int framesize;
- int argssize;
-};
/* 3-parameter operation */
#define jit_opr_(d, s1, s2, op1d, op2d) \
@@ -56,7 +47,7 @@ struct jit_local_state {
/* 3-parameter operation, with immediate */
#define jit_op_(d, s1, op2d) \
- ((s1 == d) ? op2d : (MOVLrr(s1, d), op2d)) \
+ ((s1 == d) ? op2d : (MOVLrr(s1, d), op2d))
/* 3-parameter operation, optimizable */
#define jit_opo_(d, s1, s2, op1d, op2d, op12d) \
@@ -67,28 +58,27 @@ struct jit_local_state {
#define jit_opi_(d, rs, opdi, opdri) \
((rs == d) ? opdi : opdri)
-/* An operand is forced into a register */
-#define jit_replace(rd, rs, forced, op) \
- ((rd == forced) ? JITSORRY("Register conflict for " # op) : \
- (rs == forced) ? op : (PUSHLr(forced), MOVLrr(rs, forced), op, POPLr(forced)))
-
/* For LT, LE, ... */
-#define jit_replace8(d, op) \
- (jit_check8(d) \
- ? (MOVLir(0, d), op(d)) \
- : (PUSHLr(_EAX), MOVLir(0, _EAX), op(_EAX), MOVLrr(_EAX, (d)), POPLr(_EAX)))
+#define jit_replace8(d, cmp, op) \
+ (jit_check8(d) \
+ ? ((cmp), \
+ MOVLir(0, (d)), \
+ op(_rR(d) | _AL)) \
+ : (jit_pushr_i(_EAX), (cmp), \
+ MOVLir(0, _EAX), \
+ op(_AL), MOVLrr(_EAX, (d)), jit_popr_i(_EAX)))
#define jit_bool_r(d, s1, s2, op) \
- (CMPLrr(s2, s1), jit_replace8(d, op))
+ (jit_replace8(d, CMPLrr(s2, s1), op))
#define jit_bool_i(d, rs, is, op) \
- (CMPLir(is, rs), jit_replace8(d, op))
+ (jit_replace8(d, CMPLir(is, rs), op))
/* When CMP with 0 can be replaced with TEST */
#define jit_bool_i0(d, rs, is, op, op0) \
((is) != 0 \
- ? (CMPLir(is, rs), jit_replace8(d, op)) \
- : (TESTLrr(rs, rs), jit_replace8(d, op0)))
+ ? (jit_replace8(d, CMPLir(is, rs), op)) \
+ : (jit_replace8(d, TESTLrr(rs, rs), op0)))
/* For BLT, BLE, ... */
#define jit_bra_r(s1, s2, op) (CMPLrr(s2, s1), op, _jit.x.pc)
@@ -98,22 +88,11 @@ struct jit_local_state {
#define jit_bra_i0(rs, is, op, op0) \
( (is) == 0 ? (TESTLrr(rs, rs), op0, _jit.x.pc) : (CMPLir(is, rs), op, _jit.x.pc))
-/* Used to implement ldc, stc, ... */
-#define jit_check8(rs) ( (rs) <= _EBX )
-#define jit_reg8(rs) ( ((rs) == _SI || (rs) == _DI) ? _AL : ((rs) & _BH) | _AL )
-#define jit_reg16(rs) ( ((rs) & _BH) | _AX )
-
-/* In jit_replace below, _EBX is dummy */
-#define jit_movbrm(rs, dd, db, di, ds) \
- (jit_check8(rs) \
- ? MOVBrm(jit_reg8(rs), dd, db, di, ds) \
- : jit_replace(_EBX, rs, _EAX, MOVBrm(_AL, dd, db, di, ds)))
-
/* Reduce arguments of XOR/OR/TEST */
#define jit_reduce_(op) op
#define jit_reduce(op, is, rs) \
(_u8P(is) && jit_check8(rs) ? jit_reduce_(op##Bir(is, jit_reg8(rs))) : \
- (_u16P(is) ? jit_reduce_(op##Wir(is, jit_reg16(rs))) : \
+ (_u16P(is) && JIT_CAN_16 ? jit_reduce_(op##Wir(is, jit_reg16(rs))) : \
jit_reduce_(op##Lir(is, rs)) ))
/* Helper macros for MUL/DIV/IDIV */
@@ -133,62 +112,62 @@ struct jit_local_state {
IMULLr(rs == _EAX ? _EDX : rs))
#define jit_divi_i_(result, d, rs, is) \
- (jit_might (d, _EAX, PUSHLr(_EAX)), \
- jit_might (d, _ECX, PUSHLr(_ECX)), \
- jit_might (d, _EDX, PUSHLr(_EDX)), \
+ (jit_might (d, _EAX, jit_pushr_i(_EAX)), \
+ jit_might (d, _ECX, jit_pushr_i(_ECX)), \
+ jit_might (d, _EDX, jit_pushr_i(_EDX)), \
jit_might (rs, _EAX, MOVLrr(rs, _EAX)), \
jit_might (rs, _EDX, MOVLrr(rs, _EDX)), \
MOVLir(is, _ECX), \
SARLir(31, _EDX), \
IDIVLr(_ECX), \
jit_might(d, result, MOVLrr(result, d)), \
- jit_might(d, _EDX, POPLr(_EDX)), \
- jit_might(d, _ECX, POPLr(_ECX)), \
- jit_might(d, _EAX, POPLr(_EAX)))
+ jit_might(d, _EDX, jit_popr_i(_EDX)), \
+ jit_might(d, _ECX, jit_popr_i(_ECX)), \
+ jit_might(d, _EAX, jit_popr_i(_EAX)))
#define jit_divr_i_(result, d, s1, s2) \
- (jit_might (d, _EAX, PUSHLr(_EAX)), \
- jit_might (d, _ECX, PUSHLr(_ECX)), \
- jit_might (d, _EDX, PUSHLr(_EDX)), \
- ((s1 == _ECX) ? PUSHLr(_ECX) : 0), \
+ (jit_might (d, _EAX, jit_pushr_i(_EAX)), \
+ jit_might (d, _ECX, jit_pushr_i(_ECX)), \
+ jit_might (d, _EDX, jit_pushr_i(_EDX)), \
+ ((s1 == _ECX) ? jit_pushr_i(_ECX) : 0), \
jit_might (s2, _ECX, MOVLrr(s2, _ECX)), \
- ((s1 == _ECX) ? POPLr(_EDX) : \
+ ((s1 == _ECX) ? jit_popr_i(_EDX) : \
jit_might (s1, _EDX, MOVLrr(s1, _EDX))), \
MOVLrr(_EDX, _EAX), \
SARLir(31, _EDX), \
IDIVLr(_ECX), \
jit_might(d, result, MOVLrr(result, d)), \
- jit_might(d, _EDX, POPLr(_EDX)), \
- jit_might(d, _ECX, POPLr(_ECX)), \
- jit_might(d, _EAX, POPLr(_EAX)))
+ jit_might(d, _EDX, jit_popr_i(_EDX)), \
+ jit_might(d, _ECX, jit_popr_i(_ECX)), \
+ jit_might(d, _EAX, jit_popr_i(_EAX)))
#define jit_divi_ui_(result, d, rs, is) \
- (jit_might (d, _EAX, PUSHLr(_EAX)), \
- jit_might (d, _ECX, PUSHLr(_ECX)), \
- jit_might (d, _EDX, PUSHLr(_EDX)), \
+ (jit_might (d, _EAX, jit_pushr_i(_EAX)), \
+ jit_might (d, _ECX, jit_pushr_i(_ECX)), \
+ jit_might (d, _EDX, jit_pushr_i(_EDX)), \
jit_might (rs, _EAX, MOVLrr(rs, _EAX)), \
MOVLir(is, _ECX), \
XORLrr(_EDX, _EDX), \
DIVLr(_ECX), \
jit_might(d, result, MOVLrr(result, d)), \
- jit_might(d, _EDX, POPLr(_EDX)), \
- jit_might(d, _ECX, POPLr(_ECX)), \
- jit_might(d, _EAX, POPLr(_EAX)))
+ jit_might(d, _EDX, jit_popr_i(_EDX)), \
+ jit_might(d, _ECX, jit_popr_i(_ECX)), \
+ jit_might(d, _EAX, jit_popr_i(_EAX)))
#define jit_divr_ui_(result, d, s1, s2) \
- (jit_might (d, _EAX, PUSHLr(_EAX)), \
- jit_might (d, _ECX, PUSHLr(_ECX)), \
- jit_might (d, _EDX, PUSHLr(_EDX)), \
- ((s1 == _ECX) ? PUSHLr(_ECX) : 0), \
+ (jit_might (d, _EAX, jit_pushr_i(_EAX)), \
+ jit_might (d, _ECX, jit_pushr_i(_ECX)), \
+ jit_might (d, _EDX, jit_pushr_i(_EDX)), \
+ ((s1 == _ECX) ? jit_pushr_i(_ECX) : 0), \
jit_might (s2, _ECX, MOVLrr(s2, _ECX)), \
- ((s1 == _ECX) ? POPLr(_EAX) : \
+ ((s1 == _ECX) ? jit_popr_i(_EAX) : \
jit_might (s1, _EAX, MOVLrr(s1, _EAX))), \
XORLrr(_EDX, _EDX), \
DIVLr(_ECX), \
jit_might(d, result, MOVLrr(result, d)), \
- jit_might(d, _EDX, POPLr(_EDX)), \
- jit_might(d, _ECX, POPLr(_ECX)), \
- jit_might(d, _EAX, POPLr(_EAX)))
+ jit_might(d, _EDX, jit_popr_i(_EDX)), \
+ jit_might(d, _ECX, jit_popr_i(_ECX)), \
+ jit_might(d, _EAX, jit_popr_i(_EAX)))
/* ALU */
@@ -207,6 +186,7 @@ struct jit_local_state {
#define jit_subxi_i(d, rs, is) jit_op_ ((d), (rs), SBBLir((is), (d)) )
#define jit_xorr_i(d, s1, s2) jit_opr_((d), (s1), (s2), XORLrr((s1), (d)), XORLrr((s2), (d)) )
+
/* These can sometimes use byte or word versions! */
#define jit_ori_i(d, rs, is) jit_op_ ((d), (rs), jit_reduce(OR, (is), (d)) )
#define jit_xori_i(d, rs, is) jit_op_ ((d), (rs), jit_reduce(XOR, (is), (d)) )
@@ -220,24 +200,24 @@ struct jit_local_state {
#define jit_mulr_ui(d, s1, s2) jit_opr_((d), (s1), (s2), IMULLrr((s1), (d)), IMULLrr((s2), (d)) )
#define jit_hmuli_i(d, rs, is) \
- ((d) == _EDX ? ( PUSHLr(_EAX), jit_muli_i_((is), (rs)), POPLr(_EAX) ) : \
- ((d) == _EAX ? (PUSHLr(_EDX), jit_muli_i_((is), (rs)), MOVLrr(_EDX, _EAX), POPLr(_EDX) ) : \
- (PUSHLr(_EDX), PUSHLr(_EAX), jit_muli_i_((is), (rs)), MOVLrr(_EDX, (d)), POPLr(_EAX), POPLr(_EDX) )))
+ ((d) == _EDX ? ( jit_pushr_i(_EAX), jit_muli_i_((is), (rs)), jit_popr_i(_EAX) ) : \
+ ((d) == _EAX ? (jit_pushr_i(_EDX), jit_muli_i_((is), (rs)), MOVLrr(_EDX, _EAX), jit_popr_i(_EDX) ) : \
+ (jit_pushr_i(_EDX), jit_pushr_i(_EAX), jit_muli_i_((is), (rs)), MOVLrr(_EDX, (d)), jit_popr_i(_EAX), jit_popr_i(_EDX) )))
#define jit_hmulr_i(d, s1, s2) \
- ((d) == _EDX ? ( PUSHLr(_EAX), jit_mulr_i_((s1), (s2)), POPLr(_EAX) ) : \
- ((d) == _EAX ? (PUSHLr(_EDX), jit_mulr_i_((s1), (s2)), MOVLrr(_EDX, _EAX), POPLr(_EDX) ) : \
- (PUSHLr(_EDX), PUSHLr(_EAX), jit_mulr_i_((s1), (s2)), MOVLrr(_EDX, (d)), POPLr(_EAX), POPLr(_EDX) )))
+ ((d) == _EDX ? ( jit_pushr_i(_EAX), jit_mulr_i_((s1), (s2)), jit_popr_i(_EAX) ) : \
+ ((d) == _EAX ? (jit_pushr_i(_EDX), jit_mulr_i_((s1), (s2)), MOVLrr(_EDX, _EAX), jit_popr_i(_EDX) ) : \
+ (jit_pushr_i(_EDX), jit_pushr_i(_EAX), jit_mulr_i_((s1), (s2)), MOVLrr(_EDX, (d)), jit_popr_i(_EAX), jit_popr_i(_EDX) )))
#define jit_hmuli_ui(d, rs, is) \
- ((d) == _EDX ? ( PUSHLr(_EAX), jit_muli_ui_((is), (rs)), POPLr(_EAX) ) : \
- ((d) == _EAX ? (PUSHLr(_EDX), jit_muli_ui_((is), (rs)), MOVLrr(_EDX, _EAX), POPLr(_EDX) ) : \
- (PUSHLr(_EDX), PUSHLr(_EAX), jit_muli_ui_((is), (rs)), MOVLrr(_EDX, (d)), POPLr(_EAX), POPLr(_EDX) )))
+ ((d) == _EDX ? ( jit_pushr_i(_EAX), jit_muli_ui_((is), (rs)), jit_popr_i(_EAX) ) : \
+ ((d) == _EAX ? (jit_pushr_i(_EDX), jit_muli_ui_((is), (rs)), MOVLrr(_EDX, _EAX), jit_popr_i(_EDX) ) : \
+ (jit_pushr_i(_EDX), jit_pushr_i(_EAX), jit_muli_ui_((is), (rs)), MOVLrr(_EDX, (d)), jit_popr_i(_EAX), jit_popr_i(_EDX) )))
#define jit_hmulr_ui(d, s1, s2) \
- ((d) == _EDX ? ( PUSHLr(_EAX), jit_mulr_ui_((s1), (s2)), POPLr(_EAX) ) : \
- ((d) == _EAX ? (PUSHLr(_EDX), jit_mulr_ui_((s1), (s2)), MOVLrr(_EDX, _EAX), POPLr(_EDX) ) : \
- (PUSHLr(_EDX), PUSHLr(_EAX), jit_mulr_ui_((s1), (s2)), MOVLrr(_EDX, (d)), POPLr(_EAX), POPLr(_EDX) )))
+ ((d) == _EDX ? ( jit_pushr_i(_EAX), jit_mulr_ui_((s1), (s2)), jit_popr_i(_EAX) ) : \
+ ((d) == _EAX ? (jit_pushr_i(_EDX), jit_mulr_ui_((s1), (s2)), MOVLrr(_EDX, _EAX), jit_popr_i(_EDX) ) : \
+ (jit_pushr_i(_EDX), jit_pushr_i(_EAX), jit_mulr_ui_((s1), (s2)), MOVLrr(_EDX, (d)), jit_popr_i(_EAX), jit_popr_i(_EDX) )))
#define jit_divi_i(d, rs, is) jit_divi_i_(_EAX, (d), (rs), (is))
#define jit_divi_ui(d, rs, is) jit_divi_ui_(_EAX, (d), (rs), (is))
@@ -250,49 +230,42 @@ struct jit_local_state {
/* Shifts */
+#define jit_shift(d, s1, s2, m) \
+ ((d) == _ECX || (d) == (s2) \
+ ? ((s2) == _EAX \
+ ? jit_fixd(d, _EDX, jit_shift2(_EDX, s1, s2, m)) \
+ : jit_fixd(d, _EAX, jit_shift2(_EAX, s1, s2, m))) \
+ : jit_shift2(d, s1, s2, m))
+
+/* Shift operation, assuming d != s2 or ECX */
+#define jit_shift2(d, s1, s2, m) \
+ jit_op_(d, s1, jit_cfixs(s2, _ECX, m(_CL, d)))
+
+/* Substitute x for destination register d */
+#define jit_fixd(d, x, op) \
+ (jit_pushr_i(x), op, jit_movr_i(d, x), jit_popr_i(x))
+
+/* Conditionally substitute y for source register s */
+#define jit_cfixs(s, y, op) \
+ ((s) == (y) ? op : \
+ (jit_pushr_i(y), jit_movr_i(y, s), op, jit_popr_i(y)))
+
#define jit_lshi_i(d, rs, is) ((is) <= 3 ? LEALmr(0, 0, (rs), 1 << (is), (d)) : jit_op_ ((d), (rs), SHLLir((is), (d)) ))
#define jit_rshi_i(d, rs, is) jit_op_ ((d), (rs), SARLir((is), (d)) )
#define jit_rshi_ui(d, rs, is) jit_op_ ((d), (rs), SHRLir((is), (d)) )
-#define jit_lshr_i(d, r1, r2) jit_replace((r1), (r2), _ECX, jit_op_ ((d), (r1), SHLLrr(_CL, (d)) ))
-#define jit_rshr_i(d, r1, r2) jit_replace((r1), (r2), _ECX, jit_op_ ((d), (r1), SARLrr(_CL, (d)) ))
-#define jit_rshr_ui(d, r1, r2) jit_replace((r1), (r2), _ECX, jit_op_ ((d), (r1), SHRLrr(_CL, (d)) ))
+#define jit_lshr_i(d, r1, r2) jit_shift((d), (r1), (r2), SHLLrr)
+#define jit_rshr_i(d, r1, r2) jit_shift((d), (r1), (r2), SARLrr)
+#define jit_rshr_ui(d, r1, r2) jit_shift((d), (r1), (r2), SHRLrr)
/* Stack */
-#define jit_pushr_i(rs) PUSHLr(rs)
-#define jit_popr_i(rs) POPLr(rs)
-#define jit_prolog(n) (_jitl.framesize = 8, PUSHLr(_EBP), MOVLrr(_ESP, _EBP), PUSHLr(_EBX), PUSHLr(_ESI), PUSHLr(_EDI))
-
-/* The += allows for stack pollution */
-
-#define jit_prepare_i(ni) (_jitl.argssize += (ni))
-#define jit_prepare_f(nf) (_jitl.argssize += (nf))
-#define jit_prepare_d(nd) (_jitl.argssize += 2 * (nd))
-#define jit_pusharg_i(rs) PUSHLr(rs)
-#define jit_finish(sub) (jit_calli((sub)), ADDLir(4 * _jitl.argssize, JIT_SP), _jitl.argssize = 0)
-#define jit_finishr(reg) (jit_callr((reg)), ADDLir(4 * _jitl.argssize, JIT_SP), _jitl.argssize = 0)
-#define jit_retval_i(rd) jit_movr_i ((rd), _EAX)
-
-#define jit_arg_c() ((_jitl.framesize += sizeof(int)) - sizeof(int))
-#define jit_arg_uc() ((_jitl.framesize += sizeof(int)) - sizeof(int))
-#define jit_arg_s() ((_jitl.framesize += sizeof(int)) - sizeof(int))
-#define jit_arg_us() ((_jitl.framesize += sizeof(int)) - sizeof(int))
-#define jit_arg_i() ((_jitl.framesize += sizeof(int)) - sizeof(int))
-#define jit_arg_ui() ((_jitl.framesize += sizeof(int)) - sizeof(int))
-#define jit_arg_l() ((_jitl.framesize += sizeof(long)) - sizeof(long))
-#define jit_arg_ul() ((_jitl.framesize += sizeof(long)) - sizeof(long))
-#define jit_arg_p() ((_jitl.framesize += sizeof(long)) - sizeof(long))
-
-#define jit_arg_f() ((_jitl.framesize += sizeof(float)) - sizeof(float))
-#define jit_arg_d() ((_jitl.framesize += sizeof(double)) - sizeof(double))
+#define jit_retval_i(rd) ((void)jit_movr_i ((rd), _EAX))
/* Unary */
#define jit_negr_i(d, rs) jit_opi_((d), (rs), NEGLr(d), (XORLrr((d), (d)), SUBLrr((rs), (d))) )
-#define jit_negr_l(d, rs) jit_opi_((d), (rs), NEGLr(d), (XORLrr((d), (d)), SUBLrr((rs), (d))) )
-#define jit_movr_i(d, rs) ((rs) == (d) ? 0 : MOVLrr((rs), (d)))
+#define jit_movr_i(d, rs) ((void)((rs) == (d) ? 0 : MOVLrr((rs), (d))))
#define jit_movi_i(d, is) ((is) ? MOVLir((is), (d)) : XORLrr ((d), (d)) )
-#define jit_movi_p(d, is) (MOVLir((is), (d)), _jit.x.pc)
-#define jit_patch_movi(pa,pv) (*_PSL((pa) - 4) = _jit_SL((pv)))
+#define jit_patch_movi(pa,pv) (*_PSL((pa) - sizeof(long)) = _jit_SL((pv)))
#define jit_ntoh_ui(d, rs) jit_op_((d), (rs), BSWAPLr(d))
#define jit_ntoh_us(d, rs) jit_op_((d), (rs), RORWir(8, d))
@@ -321,93 +294,73 @@ struct jit_local_state {
#define jit_gei_ui(d, rs, is) jit_bool_i0((d), (rs), (is), SETAEr, INCLr )
/* Jump */
-#define jit_bltr_i(label, s1, s2) jit_bra_r((s1), (s2), JLm(label, 0,0,0) )
-#define jit_bler_i(label, s1, s2) jit_bra_r((s1), (s2), JLEm(label,0,0,0) )
-#define jit_bgtr_i(label, s1, s2) jit_bra_r((s1), (s2), JGm(label, 0,0,0) )
-#define jit_bger_i(label, s1, s2) jit_bra_r((s1), (s2), JGEm(label,0,0,0) )
-#define jit_beqr_i(label, s1, s2) jit_bra_r((s1), (s2), JEm(label, 0,0,0) )
-#define jit_bner_i(label, s1, s2) jit_bra_r((s1), (s2), JNEm(label,0,0,0) )
-#define jit_bltr_ui(label, s1, s2) jit_bra_r((s1), (s2), JBm(label, 0,0,0) )
-#define jit_bler_ui(label, s1, s2) jit_bra_r((s1), (s2), JBEm(label,0,0,0) )
-#define jit_bgtr_ui(label, s1, s2) jit_bra_r((s1), (s2), JAm(label, 0,0,0) )
-#define jit_bger_ui(label, s1, s2) jit_bra_r((s1), (s2), JAEm(label,0,0,0) )
-#define jit_bmsr_i(label, s1, s2) (TESTLrr((s1), (s2)), JNZm(label,0,0,0), _jit.x.pc)
-#define jit_bmcr_i(label, s1, s2) (TESTLrr((s1), (s2)), JZm(label,0,0,0), _jit.x.pc)
-#define jit_boaddr_i(label, s1, s2) (ADDLrr((s2), (s1)), JOm(label,0,0,0), _jit.x.pc)
-#define jit_bosubr_i(label, s1, s2) (SUBLrr((s2), (s1)), JOm(label,0,0,0), _jit.x.pc)
-#define jit_boaddr_ui(label, s1, s2) (ADDLrr((s2), (s1)), JCm(label,0,0,0), _jit.x.pc)
-#define jit_bosubr_ui(label, s1, s2) (SUBLrr((s2), (s1)), JCm(label,0,0,0), _jit.x.pc)
-
-#define jit_blti_i(label, rs, is) jit_bra_i0((rs), (is), JLm(label, 0,0,0), JSm(label, 0,0,0) )
-#define jit_blei_i(label, rs, is) jit_bra_i ((rs), (is), JLEm(label,0,0,0) )
-#define jit_bgti_i(label, rs, is) jit_bra_i ((rs), (is), JGm(label, 0,0,0) )
-#define jit_bgei_i(label, rs, is) jit_bra_i0((rs), (is), JGEm(label,0,0,0), JNSm(label,0,0,0) )
-#define jit_beqi_i(label, rs, is) jit_bra_i0((rs), (is), JEm(label, 0,0,0), JEm(label, 0,0,0) )
-#define jit_bnei_i(label, rs, is) jit_bra_i0((rs), (is), JNEm(label,0,0,0), JNEm(label,0,0,0) )
-#define jit_blti_ui(label, rs, is) jit_bra_i ((rs), (is), JBm(label, 0,0,0) )
-#define jit_blei_ui(label, rs, is) jit_bra_i0((rs), (is), JBEm(label,0,0,0), JEm(label, 0,0,0) )
-#define jit_bgti_ui(label, rs, is) jit_bra_i0((rs), (is), JAm(label, 0,0,0), JNEm(label,0,0,0) )
-#define jit_bgei_ui(label, rs, is) jit_bra_i ((rs), (is), JAEm(label,0,0,0) )
-#define jit_boaddi_i(label, rs, is) (ADDLir((is), (rs)), JOm(label,0,0,0), _jit.x.pc)
-#define jit_bosubi_i(label, rs, is) (SUBLir((is), (rs)), JOm(label,0,0,0), _jit.x.pc)
-#define jit_boaddi_ui(label, rs, is) (ADDLir((is), (rs)), JCm(label,0,0,0), _jit.x.pc)
-#define jit_bosubi_ui(label, rs, is) (SUBLir((is), (rs)), JCm(label,0,0,0), _jit.x.pc)
-
-#define jit_bmsi_i(label, rs, is) (jit_reduce(TEST, (is), (rs)), JNZm(label,0,0,0), _jit.x.pc)
-#define jit_bmci_i(label, rs, is) (jit_reduce(TEST, (is), (rs)), JZm(label,0,0,0), _jit.x.pc)
-
-#define jit_jmpi(label) (JMPm( ((unsigned long) (label)), 0, 0, 0), _jit.x.pc)
-#define jit_calli(label) (CALLm( ((unsigned long) (label)), 0, 0, 0), _jit.x.pc)
-#define jit_callr(reg) (CALLsr(reg))
-#define jit_jmpr(reg) JMPsr(reg)
-#define jit_patch_at(jump_pc,v) (*_PSL((jump_pc) - 4) = _jit_SL((v) - (jump_pc)))
-#define jit_ret() (POPLr(_EDI), POPLr(_ESI), POPLr(_EBX), POPLr(_EBP), RET_())
+#define jit_bltr_i(label, s1, s2) jit_bra_r((s1), (s2), JLm(label) )
+#define jit_bler_i(label, s1, s2) jit_bra_r((s1), (s2), JLEm(label) )
+#define jit_bgtr_i(label, s1, s2) jit_bra_r((s1), (s2), JGm(label) )
+#define jit_bger_i(label, s1, s2) jit_bra_r((s1), (s2), JGEm(label) )
+#define jit_beqr_i(label, s1, s2) jit_bra_r((s1), (s2), JEm(label) )
+#define jit_bner_i(label, s1, s2) jit_bra_r((s1), (s2), JNEm(label) )
+#define jit_bltr_ui(label, s1, s2) jit_bra_r((s1), (s2), JBm(label) )
+#define jit_bler_ui(label, s1, s2) jit_bra_r((s1), (s2), JBEm(label) )
+#define jit_bgtr_ui(label, s1, s2) jit_bra_r((s1), (s2), JAm(label) )
+#define jit_bger_ui(label, s1, s2) jit_bra_r((s1), (s2), JAEm(label) )
+#define jit_bmsr_i(label, s1, s2) (TESTLrr((s1), (s2)), JNZm(label), _jit.x.pc)
+#define jit_bmcr_i(label, s1, s2) (TESTLrr((s1), (s2)), JZm(label), _jit.x.pc)
+#define jit_boaddr_i(label, s1, s2) (ADDLrr((s2), (s1)), JOm(label), _jit.x.pc)
+#define jit_bosubr_i(label, s1, s2) (SUBLrr((s2), (s1)), JOm(label), _jit.x.pc)
+#define jit_boaddr_ui(label, s1, s2) (ADDLrr((s2), (s1)), JCm(label), _jit.x.pc)
+#define jit_bosubr_ui(label, s1, s2) (SUBLrr((s2), (s1)), JCm(label), _jit.x.pc)
+
+#define jit_blti_i(label, rs, is) jit_bra_i0((rs), (is), JLm(label), JSm(label) )
+#define jit_blei_i(label, rs, is) jit_bra_i ((rs), (is), JLEm(label) )
+#define jit_bgti_i(label, rs, is) jit_bra_i ((rs), (is), JGm(label) )
+#define jit_bgei_i(label, rs, is) jit_bra_i0((rs), (is), JGEm(label), JNSm(label) )
+#define jit_beqi_i(label, rs, is) jit_bra_i0((rs), (is), JEm(label), JEm(label) )
+#define jit_bnei_i(label, rs, is) jit_bra_i0((rs), (is), JNEm(label), JNEm(label) )
+#define jit_blti_ui(label, rs, is) jit_bra_i ((rs), (is), JBm(label) )
+#define jit_blei_ui(label, rs, is) jit_bra_i0((rs), (is), JBEm(label), JEm(label) )
+#define jit_bgti_ui(label, rs, is) jit_bra_i0((rs), (is), JAm(label), JNEm(label) )
+#define jit_bgei_ui(label, rs, is) jit_bra_i ((rs), (is), JAEm(label) )
+#define jit_boaddi_i(label, rs, is) (ADDLir((is), (rs)), JOm(label), _jit.x.pc)
+#define jit_bosubi_i(label, rs, is) (SUBLir((is), (rs)), JOm(label), _jit.x.pc)
+#define jit_boaddi_ui(label, rs, is) (ADDLir((is), (rs)), JCm(label), _jit.x.pc)
+#define jit_bosubi_ui(label, rs, is) (SUBLir((is), (rs)), JCm(label), _jit.x.pc)
+
+#define jit_bmsi_i(label, rs, is) (jit_reduce(TEST, (is), (rs)), JNZm(label), _jit.x.pc)
+#define jit_bmci_i(label, rs, is) (jit_reduce(TEST, (is), (rs)), JZm(label), _jit.x.pc)
+
+#define jit_jmpi(label) (JMPm( ((unsigned long) (label))), _jit.x.pc)
+#define jit_jmpr(reg) JMPsr(reg)
/* Memory */
-#define jit_ldi_c(d, is) MOVSBLmr((is), 0, 0, 0, (d))
-#define jit_ldr_c(d, rs) MOVSBLmr(0, (rs), 0, 0, (d))
-#define jit_ldxr_c(d, s1, s2) MOVSBLmr(0, (s1), (s2), 1, (d))
-#define jit_ldxi_c(d, rs, is) MOVSBLmr((is), (rs), 0, 0, (d))
-
-#define jit_ldi_uc(d, is) MOVZBLmr((is), 0, 0, 0, (d))
-#define jit_ldr_uc(d, rs) MOVZBLmr(0, (rs), 0, 0, (d))
-#define jit_ldxr_uc(d, s1, s2) MOVZBLmr(0, (s1), (s2), 1, (d))
-#define jit_ldxi_uc(d, rs, is) MOVZBLmr((is), (rs), 0, 0, (d))
-
-#define jit_sti_c(id, rs) jit_movbrm((rs), (id), 0, 0, 0)
+#define jit_ldr_uc(d, rs) MOVZBLmr(0, (rs), 0, 0, (d))
+#define jit_ldxr_uc(d, s1, s2) MOVZBLmr(0, (s1), (s2), 1, (d))
+
#define jit_str_c(rd, rs) jit_movbrm((rs), 0, (rd), 0, 0)
#define jit_stxr_c(d1, d2, rs) jit_movbrm((rs), 0, (d1), (d2), 1)
-#define jit_stxi_c(id, rd, rs) jit_movbrm((rs), (id), (rd), 0, 0)
-
-#define jit_ldi_s(d, is) MOVSWLmr((is), 0, 0, 0, (d))
-#define jit_ldr_s(d, rs) MOVSWLmr(0, (rs), 0, 0, (d))
-#define jit_ldxr_s(d, s1, s2) MOVSWLmr(0, (s1), (s2), 1, (d))
-#define jit_ldxi_s(d, rs, is) MOVSWLmr((is), (rs), 0, 0, (d))
-
-#define jit_ldi_us(d, is) MOVZWLmr((is), 0, 0, 0, (d))
-#define jit_ldr_us(d, rs) MOVZWLmr(0, (rs), 0, 0, (d))
-#define jit_ldxr_us(d, s1, s2) MOVZWLmr(0, (s1), (s2), 1, (d))
-#define jit_ldxi_us(d, rs, is) MOVZWLmr((is), (rs), 0, 0, (d))
-
-#define jit_sti_s(id, rs) MOVWrm(jit_reg16(rs), (id), 0, 0, 0)
-#define jit_str_s(rd, rs) MOVWrm(jit_reg16(rs), 0, (rd), 0, 0)
-#define jit_stxr_s(d1, d2, rs) MOVWrm(jit_reg16(rs), 0, (d1), (d2), 1)
-#define jit_stxi_s(id, rd, rs) MOVWrm(jit_reg16(rs), (id), (rd), 0, 0)
-
-#define jit_ldi_i(d, is) MOVLmr((is), 0, 0, 0, (d))
-#define jit_ldr_i(d, rs) MOVLmr(0, (rs), 0, 0, (d))
-#define jit_ldxr_i(d, s1, s2) MOVLmr(0, (s1), (s2), 1, (d))
-#define jit_ldxi_i(d, rs, is) MOVLmr((is), (rs), 0, 0, (d))
-
-#define jit_sti_i(id, rs) MOVLrm((rs), (id), 0, 0, 0)
-#define jit_str_i(rd, rs) MOVLrm((rs), 0, (rd), 0, 0)
-#define jit_stxr_i(d1, d2, rs) MOVLrm((rs), 0, (d1), (d2), 1)
-#define jit_stxi_i(id, rd, rs) MOVLrm((rs), (id), (rd), 0, 0)
-
+
+#define jit_ldr_us(d, rs) MOVZWLmr(0, (rs), 0, 0, (d))
+#define jit_ldxr_us(d, s1, s2) MOVZWLmr(0, (s1), (s2), 1, (d))
+
+#define jit_str_s(rd, rs) MOVWrm(jit_reg16(rs), 0, (rd), 0, 0)
+#define jit_stxr_s(d1, d2, rs) MOVWrm(jit_reg16(rs), 0, (d1), (d2), 1)
+
+#define jit_str_i(rd, rs) MOVLrm((rs), 0, (rd), 0, 0)
+#define jit_stxr_i(d1, d2, rs) MOVLrm((rs), 0, (d1), (d2), 1)
+
/* Extra */
#define jit_nop() NOP_()
#define _jit_alignment(pc, n) (((pc ^ _MASK(4)) + 1) & _MASK(n))
#define jit_align(n) NOPi(_jit_alignment(_jit_UL(_jit.x.pc), (n)))
-#endif /* __lightning_core_h */
+
+#if LIGHTNING_CROSS \
+ ? LIGHTNING_TARGET == LIGHTNING_X86_64 \
+ : defined (__x86_64__)
+#include "i386/core-64.h"
+#else
+#include "i386/core-32.h"
+#endif
+
+#endif /* __lightning_core_i386_h */
diff --git a/src/runtime/c/pgf/lightning/i386/fp-32.h b/src/runtime/c/pgf/lightning/i386/fp-32.h
new file mode 100644
index 000000000..009afc649
--- /dev/null
+++ b/src/runtime/c/pgf/lightning/i386/fp-32.h
@@ -0,0 +1,356 @@
+/******************************** -*- C -*- ****************************
+ *
+ * Support macros for the i386 math coprocessor
+ *
+ ***********************************************************************/
+
+
+/***********************************************************************
+ *
+ * Copyright 2000, 2001, 2002, 2004, 2008 Free Software Foundation, Inc.
+ * Written by Paolo Bonzini.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ * License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with GNU lightning; see the file COPYING.LESSER; if not, write to the
+ * Free Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ *
+ ***********************************************************************/
+
+
+#ifndef __lightning_fp_h
+#define __lightning_fp_h
+
+/* We really must map the x87 stack onto a flat register file. In practice,
+ we can provide something sensible and make it work on the x86 using the
+ stack like a file of eight registers.
+
+ We use six or seven registers so as to have some freedom
+ for floor, ceil, round, (and log, tan, atn and exp).
+
+ Not hard at all, basically play with FXCH. FXCH is mostly free,
+ so the generated code is not bad. Of course we special case when one
+ of the operands turns out to be ST0.
+
+ Here are the macros that actually do the trick. */
+
+#define JIT_FPR_NUM 6
+#define JIT_FPRET 0
+#define JIT_FPR(i) (i)
+
+#define jit_fxch(rs, op) (((rs) != 0 ? FXCHr(rs) : 0), \
+ op, ((rs) != 0 ? FXCHr(rs) : 0))
+
+#define jit_fp_unary(rd, s1, op) \
+ ((rd) == (s1) ? jit_fxch ((rd), op) \
+ : (rd) == 0 ? (FSTPr (0), FLDr ((s1)-1), op) \
+ : (FLDr ((s1)), op, FSTPr ((rd) + 1)))
+
+#define jit_fp_binary(rd, s1, s2, op, opr) \
+ ((rd) == (s1) ? \
+ ((s2) == 0 ? opr(0, (rd)) \
+ : (s2) == (s1) ? jit_fxch((rd), op(0, 0)) \
+ : jit_fxch((rd), op((s2), 0))) \
+ : (rd) == (s2) ? \
+ ((s1) == 0 ? op(0, (rd)) \
+ : jit_fxch((rd), opr((s1), 0))) \
+ : (FLDr (s1), op((s2)+1, 0), FSTPr((rd)+1)))
+
+#define jit_addr_d(rd,s1,s2) jit_fp_binary((rd),(s1),(s2),FADDrr,FADDrr)
+#define jit_subr_d(rd,s1,s2) jit_fp_binary((rd),(s1),(s2),FSUBrr,FSUBRrr)
+#define jit_mulr_d(rd,s1,s2) jit_fp_binary((rd),(s1),(s2),FMULrr,FMULrr)
+#define jit_divr_d(rd,s1,s2) jit_fp_binary((rd),(s1),(s2),FDIVrr,FDIVRrr)
+
+#define jit_abs_d(rd,rs) jit_fp_unary ((rd), (rs), _OO (0xd9e1))
+#define jit_negr_d(rd,rs) jit_fp_unary ((rd), (rs), _OO (0xd9e0))
+#define jit_sqrt_d(rd,rs) jit_fp_unary ((rd), (rs), _OO (0xd9fa))
+
+/* - moves:
+
+ move FPR0 to FPR3
+ FST ST3
+
+ move FPR3 to FPR0
+ FXCH ST3
+ FST ST3
+
+ move FPR3 to FPR1
+ FLD ST3
+ FSTP ST2 Stack is rotated, so FPRn becomes STn+1 */
+
+#define jit_movr_d(rd,s1) \
+ ((s1) == (rd) ? 0 \
+ : (s1) == 0 ? FSTr ((rd)) \
+ : (rd) == 0 ? (FXCHr ((s1)), FSTr ((s1))) \
+ : (FLDr ((s1)), FSTPr ((rd)+1)))
+
+/* - loads:
+
+ load into FPR0
+ FSTP ST0
+ FLD [FUBAR]
+
+ load into FPR3
+ FSTP ST3 Save old st0 into destination register
+ FLD [FUBAR]
+ FXCH ST3 Get back old st0
+
+ (and similarly for immediates, using the stack) */
+
+#define jit_movi_f(rd,immf) \
+ (_O (0x68), \
+ *((float *) _jit.x.pc) = (float) immf, \
+ _jit.x.uc_pc += sizeof (float), \
+ jit_ldr_f((rd), _ESP), \
+ ADDLir(4, _ESP))
+
+union jit_double_imm {
+ double d;
+ int i[2];
+};
+
+#define jit_movi_d(rd,immd) \
+ (_O (0x68), \
+ _jit.x.uc_pc[4] = 0x68, \
+ ((union jit_double_imm *) (_jit.x.uc_pc + 5))->d = (double) immd, \
+ *((int *) _jit.x.uc_pc) = ((union jit_double_imm *) (_jit.x.uc_pc + 5))->i[1], \
+ _jit.x.uc_pc += 9, \
+ jit_ldr_d((rd), _ESP), \
+ ADDLir(8, _ESP))
+
+#define jit_ldi_f(rd, is) \
+ ((rd) == 0 ? (FSTPr (0), FLDSm((is), 0, 0, 0)) \
+ : (FLDSm((is), 0, 0, 0), FSTPr ((rd) + 1)))
+
+#define jit_ldi_d(rd, is) \
+ ((rd) == 0 ? (FSTPr (0), FLDLm((is), 0, 0, 0)) \
+ : (FLDLm((is), 0, 0, 0), FSTPr ((rd) + 1)))
+
+#define jit_ldr_f(rd, rs) \
+ ((rd) == 0 ? (FSTPr (0), FLDSm(0, (rs), 0, 0)) \
+ : (FLDSm(0, (rs), 0, 0), FSTPr ((rd) + 1)))
+
+#define jit_ldr_d(rd, rs) \
+ ((rd) == 0 ? (FSTPr (0), FLDLm(0, (rs), 0, 0)) \
+ : (FLDLm(0, (rs), 0, 0), FSTPr ((rd) + 1)))
+
+#define jit_ldxi_f(rd, rs, is) \
+ ((rd) == 0 ? (FSTPr (0), FLDSm((is), (rs), 0, 0)) \
+ : (FLDSm((is), (rs), 0, 0), FSTPr ((rd) + 1)))
+
+#define jit_ldxi_d(rd, rs, is) \
+ ((rd) == 0 ? (FSTPr (0), FLDLm((is), (rs), 0, 0)) \
+ : (FLDLm((is), (rs), 0, 0), FSTPr ((rd) + 1)))
+
+#define jit_ldxr_f(rd, s1, s2) \
+ ((rd) == 0 ? (FSTPr (0), FLDSm(0, (s1), (s2), 1)) \
+ : (FLDSm(0, (s1), (s2), 1), FSTPr ((rd) + 1)))
+
+#define jit_ldxr_d(rd, s1, s2) \
+ ((rd) == 0 ? (FSTPr (0), FLDLm(0, (s1), (s2), 1)) \
+ : (FLDLm(0, (s1), (s2), 1), FSTPr ((rd) + 1)))
+
+#define jit_extr_i_d(rd, rs) (PUSHLr((rs)), \
+ ((rd) == 0 ? (FSTPr (0), FILDLm(0, _ESP, 0, 0)) \
+ : (FILDLm(0, _ESP, 0, 0), FSTPr ((rd) + 1))), \
+ POPLr((rs)))
+
+#define jit_stxi_f(id, rd, rs) jit_fxch ((rs), FSTSm((id), (rd), 0, 0))
+#define jit_stxr_f(d1, d2, rs) jit_fxch ((rs), FSTSm(0, (d1), (d2), 1))
+#define jit_stxi_d(id, rd, rs) jit_fxch ((rs), FSTLm((id), (rd), 0, 0))
+#define jit_stxr_d(d1, d2, rs) jit_fxch ((rs), FSTLm(0, (d1), (d2), 1))
+#define jit_sti_f(id, rs) jit_fxch ((rs), FSTSm((id), 0, 0, 0))
+#define jit_str_f(rd, rs) jit_fxch ((rs), FSTSm(0, (rd), 0, 0))
+#define jit_sti_d(id, rs) jit_fxch ((rs), FSTLm((id), 0, 0, 0))
+#define jit_str_d(rd, rs) jit_fxch ((rs), FSTLm(0, (rd), 0, 0))
+
+/* ABI */
+#define jit_retval_d(rd) FSTPr((rd) + 1)
+
+/* Assume round to near mode */
+#define jit_floorr_d_i(rd, rs) \
+ (FLDr (rs), jit_floor2((rd), ((rd) == _EDX ? _EAX : _EDX)))
+
+#define jit_ceilr_d_i(rd, rs) \
+ (FLDr (rs), jit_ceil2((rd), ((rd) == _EDX ? _EAX : _EDX)))
+
+#define jit_truncr_d_i(rd, rs) \
+ (FLDr (rs), jit_trunc2((rd), ((rd) == _EDX ? _EAX : _EDX)))
+
+#define jit_calc_diff(ofs) \
+ FISTLm(ofs, _ESP, 0, 0), \
+ FILDLm(ofs, _ESP, 0, 0), \
+ FSUBRPr(1), \
+ FSTPSm(4+ofs, _ESP, 0, 0) \
+
+/* The real meat */
+#define jit_floor2(rd, aux) \
+ (PUSHLr(aux), \
+ SUBLir(8, _ESP), \
+ jit_calc_diff(0), \
+ POPLr(rd), /* floor in rd */ \
+ POPLr(aux), /* x-round(x) in aux */ \
+ ADDLir(0x7FFFFFFF, aux), /* carry if x-round(x) < -0 */ \
+ SBBLir(0, rd), /* subtract 1 if carry */ \
+ POPLr(aux))
+
+#define jit_ceil2(rd, aux) \
+ (PUSHLr(aux), \
+ SUBLir(8, _ESP), \
+ jit_calc_diff(0), \
+ POPLr(rd), /* floor in rd */ \
+ POPLr(aux), /* x-round(x) in aux */ \
+ TESTLrr(aux, aux), \
+ SETGr(jit_reg8(aux)), \
+ SHRLir(1, aux), \
+ ADCLir(0, rd), \
+ POPLr(aux))
+
+/* a mingling of the two above */
+#define jit_trunc2(rd, aux) \
+ (PUSHLr(aux), \
+ SUBLir(12, _ESP), \
+ FSTSm(0, _ESP, 0, 0), \
+ jit_calc_diff(4), \
+ POPLr(aux), \
+ POPLr(rd), \
+ TESTLrr(aux, aux), \
+ POPLr(aux), \
+ JSSm(_jit.x.pc + 11), \
+ ADDLir(0x7FFFFFFF, aux), /* 6 */ \
+ SBBLir(0, rd), /* 3 */ \
+ JMPSm(_jit.x.pc + 10), /* 2 */ \
+ TESTLrr(aux, aux), /* 2 */ \
+ SETGr(jit_reg8(aux)), /* 3 */ \
+ SHRLir(1, aux), /* 2 */ \
+ ADCLir(0, rd), /* 3 */ \
+ POPLr(aux))
+
+/* the easy one */
+#define jit_roundr_d_i(rd, rs) \
+ (PUSHLr(_EAX), \
+ jit_fxch ((rs), FISTLm(0, _ESP, 0, 0)), \
+ POPLr((rd)))
+
+#define jit_fp_test(d, s1, s2, n, _and, res) \
+ (((s1) == 0 ? FUCOMr((s2)) : (FLDr((s1)), FUCOMPr((s2) + 1))), \
+ ((d) != _EAX ? MOVLrr(_EAX, (d)) : 0), \
+ FNSTSWr(_EAX), \
+ SHRLir(n, _EAX), \
+ ((_and) ? ANDLir((_and), _EAX) : MOVLir(0, _EAX)), \
+ res, \
+ ((d) != _EAX ? _O (0x90 + ((d) & 7)) : 0)) /* xchg */
+
+#define jit_fp_btest(d, s1, s2, n, _and, cmp, res) \
+ (((s1) == 0 ? FUCOMr((s2)) : (FLDr((s1)), FUCOMPr((s2) + 1))), \
+ PUSHLr(_EAX), \
+ FNSTSWr(_EAX), \
+ SHRLir(n, _EAX), \
+ ((_and) ? ANDLir ((_and), _EAX) : 0), \
+ ((cmp) ? CMPLir ((cmp), _EAX) : 0), \
+ POPLr(_EAX), \
+ res ((d)), \
+ _jit.x.pc)
+
+#define jit_nothing_needed(x)
+
+/* After FNSTSW we have 1 if <, 40 if =, 0 if >, 45 if unordered. Here
+ is how to map the values of the status word's high byte to the
+ conditions.
+
+ < = > unord valid values condition
+ gt no no yes no 0 STSW & 45 == 0
+ lt yes no no no 1 STSW & 45 == 1
+ eq no yes no no 40 STSW & 45 == 40
+ unord no no no yes 45 bit 2 == 1
+
+ ge no yes no no 0, 40 bit 0 == 0
+ unlt yes no no yes 1, 45 bit 0 == 1
+ ltgt yes no yes no 0, 1 bit 6 == 0
+ uneq no yes no yes 40, 45 bit 6 == 1
+ le yes yes no no 1, 40 odd parity for STSW & 41
+ ungt no no yes yes 0, 45 even parity for STSW & 41
+
+ unle yes yes no yes 1, 40, 45 STSW & 45 != 0
+ unge no yes yes yes 0, 40, 45 STSW & 45 != 1
+ ne yes no yes yes 0, 1, 45 STSW & 45 != 40
+ ord yes yes yes no 0, 1, 40 bit 2 == 0
+
+ lt, le, ungt, unge are actually computed as gt, ge, unlt, unle with
+ the operands swapped; it is more efficient this way. */
+
+#define jit_gtr_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 8, 0x45, SETZr (_AL))
+#define jit_ger_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 9, 0, SBBBir (-1, _AL))
+#define jit_unler_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 8, 0x45, SETNZr (_AL))
+#define jit_unltr_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 9, 0, ADCBir (0, _AL))
+#define jit_ltr_d(d, s1, s2) jit_fp_test((d), (s2), (s1), 8, 0x45, SETZr (_AL))
+#define jit_ler_d(d, s1, s2) jit_fp_test((d), (s2), (s1), 9, 0, SBBBir (-1, _AL))
+#define jit_unger_d(d, s1, s2) jit_fp_test((d), (s2), (s1), 8, 0x45, SETNZr (_AL))
+#define jit_ungtr_d(d, s1, s2) jit_fp_test((d), (s2), (s1), 9, 0, ADCBir (0, _AL))
+#define jit_eqr_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 8, 0x45, (CMPBir (0x40, _AL), SETEr (_AL)))
+#define jit_ner_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 8, 0x45, (CMPBir (0x40, _AL), SETNEr (_AL)))
+#define jit_ltgtr_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 15, 0, SBBBir (-1, _AL))
+#define jit_uneqr_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 15, 0, ADCBir (0, _AL))
+#define jit_ordr_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 11, 0, SBBBir (-1, _AL))
+#define jit_unordr_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 11, 0, ADCBir (0, _AL))
+
+#define jit_bgtr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 8, 0x45, 0, JZm)
+#define jit_bger_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 9, 0, 0, JNCm)
+#define jit_bunler_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 8, 0x45, 0, JNZm)
+#define jit_bunltr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 9, 0, 0, JCm)
+#define jit_bltr_d(d, s1, s2) jit_fp_btest((d), (s2), (s1), 8, 0x45, 0, JZm)
+#define jit_bler_d(d, s1, s2) jit_fp_btest((d), (s2), (s1), 9, 0, 0, JNCm)
+#define jit_bunger_d(d, s1, s2) jit_fp_btest((d), (s2), (s1), 8, 0x45, 0, JNZm)
+#define jit_bungtr_d(d, s1, s2) jit_fp_btest((d), (s2), (s1), 9, 0, 0, JCm)
+#define jit_beqr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 8, 0x45, 0x40, JZm)
+#define jit_bner_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 8, 0x45, 0x40, JNZm)
+#define jit_bltgtr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 15, 0, 0, JNCm)
+#define jit_buneqr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 15, 0, 0, JCm)
+#define jit_bordr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 11, 0, 0, JNCm)
+#define jit_bunordr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 11, 0, 0, JCm)
+
+#define jit_pusharg_d(rs) (jit_subi_i(JIT_SP,JIT_SP,sizeof(double)), jit_str_d(JIT_SP,(rs)))
+#define jit_pusharg_f(rs) (jit_subi_i(JIT_SP,JIT_SP,sizeof(float)), jit_str_f(JIT_SP,(rs)))
+
+
+#if 0
+#define jit_sin() _OO(0xd9fe) /* fsin */
+#define jit_cos() _OO(0xd9ff) /* fcos */
+#define jit_tan() (_OO(0xd9f2), /* fptan */ \
+ FSTPr(0)) /* fstp st */
+#define jit_atn() (_OO(0xd9e8), /* fld1 */ \
+ _OO(0xd9f3)) /* fpatan */
+#define jit_exp() (_OO(0xd9ea), /* fldl2e */ \
+ FMULPr(1), /* fmulp */ \
+ _OO(0xd9c0), /* fld st */ \
+ _OO(0xd9fc), /* frndint */ \
+ _OO(0xdce9), /* fsubr */ \
+ FXCHr(1), /* fxch st(1) */ \
+ _OO(0xd9f0), /* f2xm1 */ \
+ _OO(0xd9e8), /* fld1 */ \
+ _OO(0xdec1), /* faddp */ \
+ _OO(0xd9fd), /* fscale */ \
+ FSTPr(1)) /* fstp st(1) */
+#define jit_log() (_OO(0xd9ed), /* fldln2 */ \
+ FXCHr(1), /* fxch st(1) */ \
+ _OO(0xd9f1)) /* fyl2x */
+#endif
+
+#define jit_prepare_f(nf) (_jitl.argssize += (nf))
+#define jit_prepare_d(nd) (_jitl.argssize += 2 * (nd))
+#define jit_arg_f() ((_jitl.framesize += sizeof(float)) - sizeof(float))
+#define jit_arg_d() ((_jitl.framesize += sizeof(double)) - sizeof(double))
+
+#endif /* __lightning_fp_h */
diff --git a/src/runtime/c/pgf/lightning/i386/fp-64.h b/src/runtime/c/pgf/lightning/i386/fp-64.h
new file mode 100644
index 000000000..22308a7f1
--- /dev/null
+++ b/src/runtime/c/pgf/lightning/i386/fp-64.h
@@ -0,0 +1,325 @@
+/******************************** -*- C -*- ****************************
+ *
+ * Support macros for SSE floating-point math
+ *
+ ***********************************************************************/
+
+
+/***********************************************************************
+ *
+ * Copyright 2006 Free Software Foundation, Inc.
+ * Written by Paolo Bonzini.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ * License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with GNU lightning; see the file COPYING.LESSER; if not, write to the
+ * Free Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ *
+ ***********************************************************************/
+
+
+#ifndef __lightning_fp_h
+#define __lightning_fp_h
+
+#include <float.h>
+
+#define JIT_FPR_NUM 7
+#define JIT_FPRET _XMM0
+#define JIT_FPR(i) (_XMM8 + (i))
+#define JIT_FPTMP _XMM15
+
+/* Either use a temporary register that is finally AND/OR/XORed with RS = RD,
+ or use RD as the temporary register and to the AND/OR/XOR with RS. */
+#define jit_unop_tmp(rd, rs, op) \
+ ( (rs) == (rd) \
+ ? op((rd), JIT_FPTMP, JIT_FPTMP)) \
+ : op((rd), (rd), (rs)))
+
+#define jit_unop_f(rd, rs, op) \
+ ((rs) == (rd) ? op((rd)) : (MOVSSrr ((rs), (rd)), op((rd))))
+
+#define jit_unop_d(rd, rs, op) \
+ ((rs) == (rd) ? op((rd)) : (MOVSDrr ((rs), (rd)), op((rd))))
+
+#define jit_3opc_f(rd, s1, s2, op) \
+ ( (s1) == (rd) ? op((s2), (rd)) \
+ : ((s2) == (rd) ? op((s1), (rd)) \
+ : (MOVSSrr ((s1), (rd)), op((s2), (rd)))))
+
+#define jit_3opc_d(rd, s1, s2, op) \
+ ( (s1) == (rd) ? op((s2), (rd)) \
+ : ((s2) == (rd) ? op((s1), (rd)) \
+ : (MOVSDrr ((s1), (rd)), op((s2), (rd)))))
+
+#define jit_3op_f(rd, s1, s2, op) \
+ ( (s1) == (rd) ? op((s2), (rd)) \
+ : ((s2) == (rd) \
+ ? (MOVSSrr ((rd), JIT_FPTMP), MOVSSrr ((s1), (rd)), op(JIT_FPTMP, (rd))) \
+ : (MOVSSrr ((s1), (rd)), op((s2), (rd)))))
+
+#define jit_3op_d(rd, s1, s2, op) \
+ ( (s1) == (rd) ? op((s2), (rd)) \
+ : ((s2) == (rd) \
+ ? (MOVSDrr ((rd), JIT_FPTMP), MOVSDrr ((s1), (rd)), op(JIT_FPTMP, (rd))) \
+ : (MOVSDrr ((s1), (rd)), op((s2), (rd)))))
+
+#define jit_addr_f(rd,s1,s2) jit_3opc_f((rd), (s1), (s2), ADDSSrr)
+#define jit_subr_f(rd,s1,s2) jit_3op_f((rd), (s1), (s2), SUBSSrr)
+#define jit_mulr_f(rd,s1,s2) jit_3opc_f((rd), (s1), (s2), MULSSrr)
+#define jit_divr_f(rd,s1,s2) jit_3op_f((rd), (s1), (s2), DIVSSrr)
+
+#define jit_addr_d(rd,s1,s2) jit_3opc_d((rd), (s1), (s2), ADDSDrr)
+#define jit_subr_d(rd,s1,s2) jit_3op_d((rd), (s1), (s2), SUBSDrr)
+#define jit_mulr_d(rd,s1,s2) jit_3opc_d((rd), (s1), (s2), MULSDrr)
+#define jit_divr_d(rd,s1,s2) jit_3op_d((rd), (s1), (s2), DIVSDrr)
+
+#define jit_movr_f(rd,rs) MOVSSrr((rs), (rd))
+#define jit_movr_d(rd,rs) MOVSDrr((rs), (rd))
+
+/* either pcmpeqd %xmm7, %xmm7 / psrld $1, %xmm7 / andps %xmm7, %RD (if RS = RD)
+ or pcmpeqd %RD, %RD / psrld $1, %RD / andps %RS, %RD (if RS != RD) */
+#define _jit_abs_f(rd,cnst,rs) \
+ (PCMPEQDrr((cnst), (cnst)), PSRLDir (1, (cnst)), ANDPSrr ((rs), (rd)))
+#define _jit_neg_f(rd,cnst,rs) \
+ (PCMPEQDrr((cnst), (cnst)), PSLLDir (31, (cnst)), XORPSrr ((rs), (rd)))
+#define jit_abs_f(rd,rs) jit_unop_tmp ((rd), (rs), _jit_abs_f)
+#define jit_neg_f(rd,rs) jit_unop_tmp ((rd), (rs), _jit_neg_f)
+
+#define _jit_abs_d(rd,cnst,rs) \
+ (PCMPEQDrr((cnst), (cnst)), PSRLQir (1, (cnst)), ANDPDrr ((rs), (rd)))
+#define _jit_neg_d(rd,cnst,rs) \
+ (PCMPEQDrr((cnst), (cnst)), PSLLQir (63, (cnst)), XORPDrr ((rs), (rd)))
+#define jit_abs_d(rd,rs) jit_unop_tmp ((rd), (rs), _jit_abs_d)
+#define jit_neg_d(rd,rs) jit_unop_tmp ((rd), (rs), _jit_neg_d)
+
+#define jit_sqrt_d(rd,rs) SQRTSSrr((rs), (rd))
+#define jit_sqrt_f(rd,rs) SQRTSDrr((rs), (rd))
+
+#define _jit_ldi_f(d, is) MOVSSmr((is), 0, 0, 0, (d))
+#define _jit_ldxi_f(d, rs, is) MOVSSmr((is), (rs), 0, 0, (d))
+#define jit_ldr_f(d, rs) MOVSSmr(0, (rs), 0, 0, (d))
+#define jit_ldxr_f(d, s1, s2) MOVSSmr(0, (s1), (s2), 1, (d))
+
+#define _jit_sti_f(id, rs) MOVSSrm((rs), (id), 0, 0, 0)
+#define _jit_stxi_f(id, rd, rs) MOVSSrm((rs), (id), (rd), 0, 0)
+#define jit_str_f(rd, rs) MOVSSrm((rs), 0, (rd), 0, 0)
+#define jit_stxr_f(d1, d2, rs) MOVSSrm((rs), 0, (d1), (d2), 1)
+
+#define jit_ldi_f(d, is) (_u32P((long)(is)) ? _jit_ldi_f((d), (is)) : (jit_movi_l(JIT_REXTMP, (is)), jit_ldr_f((d), JIT_REXTMP)))
+#define jit_sti_f(id, rs) (_u32P((long)(id)) ? _jit_sti_f((id), (rs)) : (jit_movi_l(JIT_REXTMP, (id)), jit_str_f (JIT_REXTMP, (rs))))
+#define jit_ldxi_f(d, rs, is) (_u32P((long)(is)) ? _jit_ldxi_f((d), (rs), (is)) : (jit_movi_l(JIT_REXTMP, (is)), jit_ldxr_f((d), (rs), JIT_REXTMP)))
+#define jit_stxi_f(id, rd, rs) (_u32P((long)(id)) ? _jit_stxi_f((id), (rd), (rs)) : (jit_movi_l(JIT_REXTMP, (id)), jit_stxr_f (JIT_REXTMP, (rd), (rs))))
+
+#define _jit_ldi_d(d, is) MOVSDmr((is), 0, 0, 0, (d))
+#define _jit_ldxi_d(d, rs, is) MOVSDmr((is), (rs), 0, 0, (d))
+#define jit_ldr_d(d, rs) MOVSDmr(0, (rs), 0, 0, (d))
+#define jit_ldxr_d(d, s1, s2) MOVSDmr(0, (s1), (s2), 1, (d))
+
+#define _jit_sti_d(id, rs) MOVSDrm((rs), (id), 0, 0, 0)
+#define _jit_stxi_d(id, rd, rs) MOVSDrm((rs), (id), (rd), 0, 0)
+#define jit_str_d(rd, rs) MOVSDrm((rs), 0, (rd), 0, 0)
+#define jit_stxr_d(d1, d2, rs) MOVSDrm((rs), 0, (d1), (d2), 1)
+
+#define jit_ldi_d(d, is) (_u32P((long)(is)) ? _jit_ldi_d((d), (is)) : (jit_movi_l(JIT_REXTMP, (is)), jit_ldr_d((d), JIT_REXTMP)))
+#define jit_sti_d(id, rs) (_u32P((long)(id)) ? _jit_sti_d((id), (rs)) : (jit_movi_l(JIT_REXTMP, (id)), jit_str_d (JIT_REXTMP, (rs))))
+#define jit_ldxi_d(d, rs, is) (_u32P((long)(is)) ? _jit_ldxi_d((d), (rs), (is)) : (jit_movi_l(JIT_REXTMP, (is)), jit_ldxr_d((d), (rs), JIT_REXTMP)))
+#define jit_stxi_d(id, rd, rs) (_u32P((long)(id)) ? _jit_stxi_d((id), (rd), (rs)) : (jit_movi_l(JIT_REXTMP, (id)), jit_stxr_d (JIT_REXTMP, (rd), (rs))))
+
+
+#define jit_movi_f(rd,immf) \
+ ((immf) == 0.0 ? XORSSrr ((rd), (rd)) : \
+ (PUSHQi (0x12345678L), \
+ *((float *) (_jit.x.uc_pc - 4)) = (float) immf, \
+ jit_ldr_f((rd), _ESP), \
+ ADDQir(8, _ESP)))
+
+union jit_double_imm {
+ double d;
+ long l;
+};
+
+#define jit_movi_d(rd,immd) \
+ ((immd) == 0.0 ? XORSDrr ((rd), (rd)) : \
+ (_O (0x50), \
+ MOVQir (0x123456789abcdef0L, _EAX), \
+ ((union jit_double_imm *) (_jit.x.uc_pc - 8))->d = (double) immd, \
+ _O (0x50), jit_ldr_d((rd), _ESP), \
+ _O (0x58), _O (0x58)))
+
+#define jit_extr_i_d(rd, rs) CVTSI2SDLrr((rs), (rd))
+#define jit_extr_i_f(rd, rs) CVTSI2SSLrr((rs), (rd))
+#define jit_extr_l_d(rd, rs) CVTSI2SDQrr((rs), (rd))
+#define jit_extr_l_f(rd, rs) CVTSI2SSQrr((rs), (rd))
+#define jit_extr_f_d(rd, rs) CVTSS2SDrr((rs), (rd))
+#define jit_extr_d_f(rd, rs) CVTSD2SSrr((rs), (rd))
+#define jit_roundr_d_i(rd, rs) CVTSD2SILrr((rs), (rd))
+#define jit_roundr_f_i(rd, rs) CVTSS2SILrr((rs), (rd))
+#define jit_roundr_d_l(rd, rs) CVTSD2SIQrr((rs), (rd))
+#define jit_roundr_f_l(rd, rs) CVTSS2SIQrr((rs), (rd))
+#define jit_truncr_d_i(rd, rs) CVTTSD2SILrr((rs), (rd))
+#define jit_truncr_f_i(rd, rs) CVTTSS2SILrr((rs), (rd))
+#define jit_truncr_d_l(rd, rs) CVTTSD2SIQrr((rs), (rd))
+#define jit_truncr_f_l(rd, rs) CVTTSS2SIQrr((rs), (rd))
+
+
+#define jit_ceilr_f_i(rd, rs) do { \
+ jit_roundr_f_i ((rd), (rs)); \
+ jit_extr_i_f (JIT_FPTMP, (rd)); \
+ UCOMISSrr ((rs), JIT_FPTMP); \
+ ADCLir (0, (rd)); \
+ } while (0)
+
+#define jit_ceilr_d_i(rd, rs) do { \
+ jit_roundr_d_i ((rd), (rs)); \
+ jit_extr_i_d (JIT_FPTMP, (rd)); \
+ UCOMISDrr ((rs), JIT_FPTMP); \
+ ADCLir (0, (rd)); \
+ } while (0)
+
+#define jit_ceilr_f_l(rd, rs) do { \
+ jit_roundr_f_l ((rd), (rs)); \
+ jit_extr_l_f (JIT_FPTMP, (rd)); \
+ UCOMISSrr ((rs), JIT_FPTMP); \
+ ADCLir (0, (rd)); \
+ } while (0)
+
+#define jit_ceilr_d_l(rd, rs) do { \
+ jit_roundr_d_l ((rd), (rs)); \
+ jit_extr_l_d (JIT_FPTMP, (rd)); \
+ UCOMISDrr ((rs), JIT_FPTMP); \
+ ADCLir (0, (rd)); \
+ } while (0)
+
+#define jit_floorr_f_i(rd, rs) do { \
+ jit_roundr_f_i ((rd), (rs)); \
+ jit_extr_i_f (JIT_FPTMP, (rd)); \
+ UCOMISSrr (JIT_FPTMP, (rs)); \
+ SBBLir (0, (rd)); \
+ } while (0)
+
+#define jit_floorr_d_i(rd, rs) do { \
+ jit_roundr_d_i ((rd), (rs)); \
+ jit_extr_i_d (JIT_FPTMP, (rd)); \
+ UCOMISDrr (JIT_FPTMP, (rs)); \
+ SBBLir (0, (rd)); \
+ } while (0)
+
+#define jit_floorr_f_l(rd, rs) do { \
+ jit_roundr_f_l ((rd), (rs)); \
+ jit_extr_l_f (JIT_FPTMP, (rd)); \
+ UCOMISSrr (JIT_FPTMP, (rs)); \
+ SBBLir (0, (rd)); \
+ } while (0)
+
+#define jit_floorr_d_l(rd, rs) do { \
+ jit_roundr_d_l ((rd), (rs)); \
+ jit_extr_l_d (JIT_FPTMP, (rd)); \
+ UCOMISDrr (JIT_FPTMP, (rs)); \
+ SBBLir (0, (rd)); \
+ } while (0)
+
+#define jit_bltr_f(d, s1, s2) (UCOMISSrr ((s1), (s2)), JAm ((d)), _jit.x.pc)
+#define jit_bler_f(d, s1, s2) (UCOMISSrr ((s1), (s2)), JAEm ((d)), _jit.x.pc)
+#define jit_beqr_f(d, s1, s2) (UCOMISSrr ((s1), (s2)), _OO (0x7a06), JEm ((d)), _jit.x.pc)
+#define jit_bner_f(d, s1, s2) (UCOMISSrr ((s1), (s2)), _OO (0x7a02), _OO (0x7405), JMPm (((d))), _jit.x.pc) /* JP to JMP, JZ past JMP */
+#define jit_bger_f(d, s1, s2) (UCOMISSrr ((s2), (s1)), JAEm ((d)), _jit.x.pc)
+#define jit_bgtr_f(d, s1, s2) (UCOMISSrr ((s2), (s1)), JAm ((d)), _jit.x.pc)
+#define jit_bunltr_f(d, s1, s2) (UCOMISSrr ((s2), (s1)), JNAEm ((d)), _jit.x.pc)
+#define jit_bunler_f(d, s1, s2) (UCOMISSrr ((s2), (s1)), JNAm ((d)), _jit.x.pc)
+#define jit_buneqr_f(d, s1, s2) (UCOMISSrr ((s1), (s2)), JEm ((d)), _jit.x.pc)
+#define jit_bltgtr_f(d, s1, s2) (UCOMISSrr ((s1), (s2)), JNEm ((d)), _jit.x.pc)
+#define jit_bunger_f(d, s1, s2) (UCOMISSrr ((s1), (s2)), JNAm ((d)), _jit.x.pc)
+#define jit_bungtr_f(d, s1, s2) (UCOMISSrr ((s1), (s2)), JNAEm ((d)), _jit.x.pc)
+#define jit_bordr_f(d, s1, s2) (UCOMISSrr ((s1), (s2)), JNPm ((d)), _jit.x.pc)
+#define jit_bunordr_f(d, s1, s2) (UCOMISSrr ((s1), (s2)), JPm ((d)), _jit.x.pc)
+
+#define jit_bltr_d(d, s1, s2) (UCOMISDrr ((s1), (s2)), JAm ((d)), _jit.x.pc)
+#define jit_bler_d(d, s1, s2) (UCOMISDrr ((s1), (s2)), JAEm ((d)), _jit.x.pc)
+#define jit_beqr_d(d, s1, s2) (UCOMISDrr ((s1), (s2)), _OO (0x7a06), JEm ((d)), _jit.x.pc)
+#define jit_bner_d(d, s1, s2) (UCOMISDrr ((s1), (s2)), _OO (0x7a02), _OO (0x7405), JMPm (((d))), _jit.x.pc) /* JP to JMP, JZ past JMP */
+#define jit_bger_d(d, s1, s2) (UCOMISDrr ((s2), (s1)), JAEm ((d)), _jit.x.pc)
+#define jit_bgtr_d(d, s1, s2) (UCOMISDrr ((s2), (s1)), JAm ((d)), _jit.x.pc)
+#define jit_bunltr_d(d, s1, s2) (UCOMISDrr ((s2), (s1)), JNAEm ((d)), _jit.x.pc, _jit.x.pc)
+#define jit_bunler_d(d, s1, s2) (UCOMISDrr ((s2), (s1)), JNAm ((d)), _jit.x.pc)
+#define jit_buneqr_d(d, s1, s2) (UCOMISDrr ((s1), (s2)), JEm ((d)), _jit.x.pc)
+#define jit_bltgtr_d(d, s1, s2) (UCOMISDrr ((s1), (s2)), JNEm ((d)), _jit.x.pc)
+#define jit_bunger_d(d, s1, s2) (UCOMISDrr ((s1), (s2)), JNAm ((d)), _jit.x.pc)
+#define jit_bungtr_d(d, s1, s2) (UCOMISDrr ((s1), (s2)), JNAEm ((d)), _jit.x.pc)
+#define jit_bordr_d(d, s1, s2) (UCOMISDrr ((s1), (s2)), JNPm ((d)), _jit.x.pc)
+#define jit_bunordr_d(d, s1, s2) (UCOMISDrr ((s1), (s2)), JPm ((d)), _jit.x.pc)
+
+#define jit_ltr_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s1), (s2)), SETAr (jit_reg8((d))))
+#define jit_ler_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s1), (s2)), SETAEr (jit_reg8((d))))
+#define jit_eqr_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s1), (s2)), _OO(0x7a03), SETEr (jit_reg8((d))))
+#define jit_ner_f(d, s1, s2) (UCOMISSrr ((s1), (s2)), MOVLir (1, (d)), _OO(0x7a03), SETNEr (jit_reg8((d))))
+#define jit_ger_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s2), (s1)), SETAEr (jit_reg8((d))))
+#define jit_gtr_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s2), (s1)), SETAr (jit_reg8((d))))
+#define jit_unltr_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s2), (s1)), SETNAEr (jit_reg8((d))))
+#define jit_unler_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s2), (s1)), SETNAr (jit_reg8((d))))
+#define jit_uneqr_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s1), (s2)), SETEr (jit_reg8((d))))
+#define jit_ltgtr_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s1), (s2)), SETNEr (jit_reg8((d))))
+#define jit_unger_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s1), (s2)), SETNAr (jit_reg8((d))))
+#define jit_ungtr_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s1), (s2)), SETNAEr (jit_reg8((d))))
+#define jit_ordr_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s1), (s2)), SETNPr (jit_reg8((d))))
+#define jit_unordr_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s1), (s2)), SETPr (jit_reg8((d))))
+
+#define jit_ltr_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s1), (s2)), SETAr (jit_reg8((d))))
+#define jit_ler_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s1), (s2)), SETAEr (jit_reg8((d))))
+#define jit_eqr_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s1), (s2)), _OO(0x7a03), SETEr (jit_reg8((d))))
+#define jit_ner_d(d, s1, s2) (UCOMISDrr ((s1), (s2)), MOVLir (1, (d)), _OO(0x7a03), SETNEr (jit_reg8((d))))
+#define jit_ger_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s2), (s1)), SETAEr (jit_reg8((d))))
+#define jit_gtr_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s2), (s1)), SETAr (jit_reg8((d))))
+#define jit_unltr_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s2), (s1)), SETNAEr (jit_reg8((d))))
+#define jit_unler_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s2), (s1)), SETNAr (jit_reg8((d))))
+#define jit_uneqr_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s1), (s2)), SETEr (jit_reg8((d))))
+#define jit_ltgtr_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s1), (s2)), SETNEr (jit_reg8((d))))
+#define jit_unger_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s1), (s2)), SETNAr (jit_reg8((d))))
+#define jit_ungtr_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s1), (s2)), SETNAEr (jit_reg8((d))))
+#define jit_ordr_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s1), (s2)), SETNPr (jit_reg8((d))))
+#define jit_unordr_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s1), (s2)), SETPr (jit_reg8((d))))
+
+#define jit_prepare_f(num) ((_jitl.nextarg_putfp + (num) > JIT_FP_ARG_MAX \
+ ? (_jitl.argssize += _jitl.nextarg_putfp + (num) - JIT_FP_ARG_MAX, \
+ _jitl.fprssize = JIT_FP_ARG_MAX) \
+ : (_jitl.fprssize += (num))), \
+ _jitl.nextarg_putfp += (num))
+#define jit_prepare_d(num) ((_jitl.nextarg_putfp + (num) > JIT_FP_ARG_MAX \
+ ? (_jitl.argssize += _jitl.nextarg_putfp + (num) - JIT_FP_ARG_MAX, \
+ _jitl.fprssize = JIT_FP_ARG_MAX) \
+ : (_jitl.fprssize += (num))), \
+ _jitl.nextarg_putfp += (num))
+
+#define jit_arg_f() (_jitl.nextarg_getfp < JIT_FP_ARG_MAX \
+ ? _jitl.nextarg_getfp++ \
+ : ((_jitl.framesize += sizeof(double)) - sizeof(double)))
+#define jit_arg_d() (_jitl.nextarg_getfp < JIT_FP_ARG_MAX \
+ ? _jitl.nextarg_getfp++ \
+ : ((_jitl.framesize += sizeof(double)) - sizeof(double)))
+
+#define jit_getarg_f(reg, ofs) ((ofs) < JIT_FP_ARG_MAX \
+ ? jit_movr_f((reg), _XMM0 + (ofs)) \
+ : jit_ldxi_f((reg), JIT_FP, (ofs)))
+#define jit_getarg_d(reg, ofs) ((ofs) < JIT_FP_ARG_MAX \
+ ? jit_movr_d((reg), _XMM0 + (ofs)) \
+ : jit_ldxi_d((reg), JIT_FP, (ofs)))
+
+#define jit_pusharg_f(rs) (--_jitl.nextarg_putfp >= JIT_FP_ARG_MAX \
+ ? (SUBQir(sizeof(double), JIT_SP), jit_str_f(JIT_SP,(rs))) \
+ : jit_movr_f(_XMM0 + _jitl.nextarg_putfp, (rs)))
+#define jit_pusharg_d(rs) (--_jitl.nextarg_putfp >= JIT_FP_ARG_MAX \
+ ? (SUBQir(sizeof(double), JIT_SP), jit_str_d(JIT_SP,(rs))) \
+ : jit_movr_d(_XMM0 + _jitl.nextarg_putfp, (rs)))
+
+#endif /* __lightning_fp_h */
diff --git a/src/runtime/c/pgf/lightning/i386/fp.h b/src/runtime/c/pgf/lightning/i386/fp.h
index 0d2725563..a4942fcf6 100644
--- a/src/runtime/c/pgf/lightning/i386/fp.h
+++ b/src/runtime/c/pgf/lightning/i386/fp.h
@@ -1,20 +1,20 @@
/******************************** -*- C -*- ****************************
*
- * Run-time assembler & support macros for the i386 math coprocessor
+ * Floating-point support (i386)
*
***********************************************************************/
/***********************************************************************
*
- * Copyright 2000, 2001, 2002, 2004 Free Software Foundation, Inc.
+ * Copyright 2008 Free Software Foundation, Inc.
* Written by Paolo Bonzini.
*
* This file is part of GNU lightning.
*
* GNU lightning is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
- * by the Free Software Foundation; either version 2.1, or (at your option)
+ * by the Free Software Foundation; either version 3, or (at your option)
* any later version.
*
* GNU lightning is distributed in the hope that it will be useful, but
@@ -24,324 +24,22 @@
*
* You should have received a copy of the GNU Lesser General Public License
* along with GNU lightning; see the file COPYING.LESSER; if not, write to the
- * Free Software Foundation, 59 Temple Place - Suite 330, Boston,
- * MA 02111-1307, USA.
+ * Free Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
*
***********************************************************************/
-#ifndef __lightning_asm_fp_h
-#define __lightning_asm_fp_h
-/* We really must map the x87 stack onto a flat register file. In practice,
- we can provide something sensible and make it work on the x86 using the
- stack like a file of eight registers.
+#ifndef __lightning_fp_i386_h
+#define __lightning_fp_i386_h
- We use six or seven registers so as to have some freedom
- for floor, ceil, round, (and log, tan, atn and exp).
-
- Not hard at all, basically play with FXCH. FXCH is mostly free,
- so the generated code is not bad. Of course we special case when one
- of the operands turns out to be ST0.
-
- Here are the macros that actually do the trick. */
-
-#define JIT_FPR_NUM 6
-#define JIT_FPR(i) (i)
-
-#define jit_fxch(rs, op) (((rs) != 0 ? FXCHr(rs) : 0), \
- op, ((rs) != 0 ? FXCHr(rs) : 0))
-
-#define jit_fp_unary(rd, s1, op) \
- ((rd) == (s1) ? jit_fxch ((rd), op) \
- : (rd) == 0 ? (FSTPr (0), FLDr ((s1)-1), op) \
- : (FLDr ((s1)), op, FSTPr ((rd))))
-
-#define jit_fp_binary(rd, s1, s2, op, opr) \
- ((rd) == (s1) ? \
- ((s2) == 0 ? opr(0, (rd)) \
- : (s2) == (s1) ? jit_fxch((rd), op(0, 0)) \
- : jit_fxch((rd), op((s2), 0))) \
- : (rd) == (s2) ? jit_fxch((s1), opr(0, (rd) == 0 ? (s1) : (rd))) \
- : (FLDr (s1), op(0, (s2)+1), FSTPr((rd)+1)))
-
-#define jit_addr_d(rd,s1,s2) jit_fp_binary((rd),(s1),(s2),FADDrr,FADDrr)
-#define jit_subr_d(rd,s1,s2) jit_fp_binary((rd),(s1),(s2),FSUBrr,FSUBRrr)
-#define jit_mulr_d(rd,s1,s2) jit_fp_binary((rd),(s1),(s2),FMULrr,FMULrr)
-#define jit_divr_d(rd,s1,s2) jit_fp_binary((rd),(s1),(s2),FDIVrr,FDIVRrr)
-
-#define jit_abs_d(rd,rs) jit_fp_unary ((rd), (rs), _OO (0xd9e1))
-#define jit_negr_d(rd,rs) jit_fp_unary ((rd), (rs), _OO (0xd9e0))
-#define jit_sqrt_d(rd,rs) jit_fp_unary ((rd), (rs), _OO (0xd9fa))
-
-/* - moves:
-
- move FPR0 to FPR3
- FST ST3
-
- move FPR3 to FPR0
- FXCH ST3
- FST ST3
-
- move FPR3 to FPR1
- FLD ST1
- FST ST4 Stack is rotated, so FPRn becomes STn+1 */
-
-#define jit_movr_d(rd,s1) \
- ((s1) == (rd) ? 0 \
- : (s1) == 0 ? FSTr ((rd)) \
- : (rd) == 0 ? (FXCHr ((s1)), FSTr ((s1))) \
- : (FLDr ((s1)), FSTr ((rd)+1)))
-
-/* - loads:
-
- load into FPR0
- FSTP ST0
- FLD [FUBAR]
-
- load into FPR3
- FSTP ST3 Save old st0 into destination register
- FLD [FUBAR]
- FXCH ST3 Get back old st0
-
- (and similarly for immediates, using the stack) */
-
-#define jit_movi_f(rd,immf) \
- (_O (0x68), \
- *((float *) _jit.x.pc) = (float) immf, \
- _jit.x.uc_pc += sizeof (float), \
- jit_ldr_f((rd), _ESP), \
- ADDLir(4, _ESP))
-
-union jit_double_imm {
- double d;
- int i[2];
-};
-
-#define jit_movi_d(rd,immd) \
- (_O (0x68), \
- _jit.x.uc_pc[4] = 0x68, \
- ((union jit_double_imm *) (_jit.x.uc_pc + 5))->d = (double) immd, \
- *((int *) _jit.x.uc_pc) = ((union jit_double_imm *) (_jit.x.uc_pc + 5))->i[1], \
- _jit.x.uc_pc += 9, \
- jit_ldr_d((rd), _ESP), \
- ADDLir(8, _ESP))
-
-#define jit_ldi_f(rd, is) \
- ((rd) == 0 ? (FSTPr (0), FLDSm((is), 0, 0, 0)) \
- : (FLDSm((is), 0, 0, 0), FSTPr ((rd) + 1)))
-
-#define jit_ldi_d(rd, is) \
- ((rd) == 0 ? (FSTPr (0), FLDLm((is), 0, 0, 0)) \
- : (FLDLm((is), 0, 0, 0), FSTPr ((rd) + 1)))
-
-#define jit_ldr_f(rd, rs) \
- ((rd) == 0 ? (FSTPr (0), FLDSm(0, (rs), 0, 0)) \
- : (FLDSm(0, (rs), 0, 0), FSTPr ((rd) + 1)))
-
-#define jit_ldr_d(rd, rs) \
- ((rd) == 0 ? (FSTPr (0), FLDLm(0, (rs), 0, 0)) \
- : (FLDLm(0, (rs), 0, 0), FSTPr ((rd) + 1)))
-
-#define jit_ldxi_f(rd, rs, is) \
- ((rd) == 0 ? (FSTPr (0), FLDSm((is), (rs), 0, 0)) \
- : (FLDSm((is), (rs), 0, 0), FSTPr ((rd) + 1)))
-
-#define jit_ldxi_d(rd, rs, is) \
- ((rd) == 0 ? (FSTPr (0), FLDLm((is), (rs), 0, 0)) \
- : (FLDLm((is), (rs), 0, 0), FSTPr ((rd) + 1)))
-
-#define jit_ldxr_f(rd, s1, s2) \
- ((rd) == 0 ? (FSTPr (0), FLDSm(0, (s1), (s2), 1)) \
- : (FLDSm(0, (s1), (s2), 1), FSTPr ((rd) + 1)))
-
-#define jit_ldxr_d(rd, s1, s2) \
- ((rd) == 0 ? (FSTPr (0), FLDLm(0, (s1), (s2), 1)) \
- : (FLDLm(0, (s1), (s2), 1), FSTPr ((rd) + 1)))
-
-#define jit_extr_i_d(rd, rs) (PUSHLr((rs)), \
- ((rd) == 0 ? (FSTPr (0), FILDLm(0, _ESP, 0, 0)) \
- : (FILDLm(0, _ESP, 0, 0), FSTPr ((rd) + 1))), \
- POPLr((rs)))
-
-#define jit_stxi_f(id, rd, rs) jit_fxch ((rs), FSTSm((id), (rd), 0, 0))
-#define jit_stxr_f(d1, d2, rs) jit_fxch ((rs), FSTSm(0, (d1), (d2), 1))
-#define jit_stxi_d(id, rd, rs) jit_fxch ((rs), FSTLm((id), (rd), 0, 0))
-#define jit_stxr_d(d1, d2, rs) jit_fxch ((rs), FSTLm(0, (d1), (d2), 1))
-#define jit_sti_f(id, rs) jit_fxch ((rs), FSTSm((id), 0, 0, 0))
-#define jit_str_f(rd, rs) jit_fxch ((rs), FSTSm(0, (rd), 0, 0))
-#define jit_sti_d(id, rs) jit_fxch ((rs), FSTLm((id), 0, 0, 0))
-#define jit_str_d(rd, rs) jit_fxch ((rs), FSTLm(0, (rd), 0, 0))
-
-/* Assume round to near mode */
-#define jit_floorr_d_i(rd, rs) \
- (FLDr (rs), jit_floor2((rd), ((rd) == _EDX ? _EAX : _EDX)))
-
-#define jit_ceilr_d_i(rd, rs) \
- (FLDr (rs), jit_ceil2((rd), ((rd) == _EDX ? _EAX : _EDX)))
-
-#define jit_truncr_d_i(rd, rs) \
- (FLDr (rs), jit_trunc2((rd), ((rd) == _EDX ? _EAX : _EDX)))
-
-#define jit_calc_diff(ofs) \
- FISTLm(ofs, _ESP, 0, 0), \
- FILDLm(ofs, _ESP, 0, 0), \
- FSUBRPr(1), \
- FSTPSm(4+ofs, _ESP, 0, 0) \
-
-/* The real meat */
-#define jit_floor2(rd, aux) \
- (PUSHLr(aux), \
- SUBLir(8, _ESP), \
- jit_calc_diff(0), \
- POPLr(rd), /* floor in rd */ \
- POPLr(aux), /* x-round(x) in aux */ \
- ADDLir(0x7FFFFFFF, aux), /* carry if x-round(x) < -0 */ \
- SBBLir(0, rd), /* subtract 1 if carry */ \
- POPLr(aux))
-
-#define jit_ceil2(rd, aux) \
- (PUSHLr(aux), \
- SUBLir(8, _ESP), \
- jit_calc_diff(0), \
- POPLr(rd), /* floor in rd */ \
- POPLr(aux), /* x-round(x) in aux */ \
- TESTLrr(aux, aux), \
- SETGr(jit_reg8(aux)), \
- SHRLir(1, aux), \
- ADCLir(0, rd), \
- POPLr(aux))
-
-/* a mingling of the two above */
-#define jit_trunc2(rd, aux) \
- (PUSHLr(aux), \
- SUBLir(12, _ESP), \
- FSTSm(0, _ESP, 0, 0), \
- jit_calc_diff(4), \
- POPLr(aux), \
- POPLr(rd), \
- TESTLrr(aux, aux), \
- POPLr(aux), \
- JSSm(_jit.x.pc + 11, 0, 0, 0), \
- ADDLir(0x7FFFFFFF, aux), /* 6 */ \
- SBBLir(0, rd), /* 3 */ \
- JMPSm(_jit.x.pc + 10, 0, 0, 0), /* 2 */ \
- TESTLrr(aux, aux), /* 2 */ \
- SETGr(jit_reg8(aux)), /* 3 */ \
- SHRLir(1, aux), /* 2 */ \
- ADCLir(0, rd), /* 3 */ \
- POPLr(aux))
-
-/* the easy one */
-#define jit_roundr_d_i(rd, rs) \
- (PUSHLr(_EAX), \
- jit_fxch ((rs), FISTPLm(0, _ESP, 0, 0)), \
- POPLr((rd)))
-
-#define jit_fp_test(d, s1, s2, n, _and, res) \
- (((s1) == 0 ? FUCOMr((s2)) : (FLDr((s1)), FUCOMPr((s2) + 1))), \
- ((d) != _EAX ? MOVLrr(_EAX, (d)) : 0), \
- FNSTSWr(_EAX), \
- SHRLir(n, _EAX), \
- ((_and) ? ANDLir((_and), _EAX) : MOVLir(0, _EAX)), \
- res, \
- ((d) != _EAX ? _O (0x90 + ((d) & 7)) : 0)) /* xchg */
-
-#define jit_fp_btest(d, s1, s2, n, _and, cmp, res) \
- (((s1) == 0 ? FUCOMr((s2)) : (FLDr((s1)), FUCOMPr((s2) + 1))), \
- PUSHLr(_EAX), \
- FNSTSWr(_EAX), \
- SHRLir(n, _EAX), \
- ((_and) ? ANDLir ((_and), _EAX) : 0), \
- ((cmp) ? CMPLir ((cmp), _AL) : 0), \
- POPLr(_EAX), \
- res ((d), 0, 0, 0))
-
-#define jit_nothing_needed(x)
-
-/* After FNSTSW we have 1 if <, 40 if =, 0 if >, 45 if unordered. Here
- is how to map the values of the status word's high byte to the
- conditions.
-
- < = > unord valid values condition
- gt no no yes no 0 STSW & 45 == 0
- lt yes no no no 1 STSW & 45 == 1
- eq no yes no no 40 STSW & 45 == 40
- unord no no no yes 45 bit 2 == 1
-
- ge no yes no no 0, 40 bit 0 == 0
- unlt yes no no yes 1, 45 bit 0 == 1
- ltgt yes no yes no 0, 1 bit 6 == 0
- uneq no yes no yes 40, 45 bit 6 == 1
- le yes yes no no 1, 40 odd parity for STSW & 41
- ungt no no yes yes 0, 45 even parity for STSW & 41
-
- unle yes yes no yes 1, 40, 45 STSW & 45 != 0
- unge no yes yes yes 0, 40, 45 STSW & 45 != 1
- ne yes no yes yes 0, 1, 45 STSW & 45 != 40
- ord yes yes yes no 0, 1, 40 bit 2 == 0
-
- lt, le, ungt, unge are actually computed as gt, ge, unlt, unle with
- the operands swapped; it is more efficient this way. */
-
-#define jit_gtr_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 8, 0x45, SETZr (_AL))
-#define jit_ger_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 9, 0, SBBBir (-1, _AL))
-#define jit_unler_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 8, 0x45, SETNZr (_AL))
-#define jit_unltr_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 9, 0, ADCBir (0, _AL))
-#define jit_ltr_d(d, s1, s2) jit_fp_test((d), (s2), (s1), 8, 0x45, SETZr (_AL))
-#define jit_ler_d(d, s1, s2) jit_fp_test((d), (s2), (s1), 9, 0, SBBBir (-1, _AL))
-#define jit_unger_d(d, s1, s2) jit_fp_test((d), (s2), (s1), 8, 0x45, SETNZr (_AL))
-#define jit_ungtr_d(d, s1, s2) jit_fp_test((d), (s2), (s1), 9, 0, ADCBir (0, _AL))
-#define jit_eqr_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 8, 0x45, (CMPBir (0x40, _AL), SETEr (_AL)))
-#define jit_ner_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 8, 0x45, (CMPBir (0x40, _AL), SETNEr (_AL)))
-#define jit_ltgtr_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 15, 0, SBBBir (-1, _AL))
-#define jit_uneqr_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 15, 0, ADCBir (0, _AL))
-#define jit_ordr_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 11, 0, SBBBir (-1, _AL))
-#define jit_unordr_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 11, 0, ADCBir (0, _AL))
-
-#define jit_bgtr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 8, 0x45, 0, JZm)
-#define jit_bger_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 9, 0, 0, JNCm)
-#define jit_bunler_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 8, 0x45, 0, JNZm)
-#define jit_bunltr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 9, 0, 0, JCm)
-#define jit_bltr_d(d, s1, s2) jit_fp_btest((d), (s2), (s1), 8, 0x45, 0, JZm)
-#define jit_bler_d(d, s1, s2) jit_fp_btest((d), (s2), (s1), 9, 0, 0, JNCm)
-#define jit_bunger_d(d, s1, s2) jit_fp_btest((d), (s2), (s1), 8, 0x45, 0, JNZm)
-#define jit_bungtr_d(d, s1, s2) jit_fp_btest((d), (s2), (s1), 9, 0, 0, JCm)
-#define jit_beqr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 8, 0x45, 0x40, JZm)
-#define jit_bner_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 8, 0x45, 0x40, JNZm)
-#define jit_bltgtr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 15, 0, 0, JNCm)
-#define jit_buneqr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 15, 0, 0, JCm)
-#define jit_bordr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 11, 0, 0, JNCm)
-#define jit_bunordr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 11, 0, 0, JCm)
-
-#define jit_getarg_f(rd, ofs) jit_ldxi_f((rd), JIT_FP,(ofs))
-#define jit_getarg_d(rd, ofs) jit_ldxi_d((rd), JIT_FP,(ofs))
-#define jit_pusharg_d(rs) (jit_subi_i(JIT_SP,JIT_SP,sizeof(double)), jit_str_d(JIT_SP,(rs)))
-#define jit_pusharg_f(rs) (jit_subi_i(JIT_SP,JIT_SP,sizeof(float)), jit_str_f(JIT_SP,(rs)))
-#define jit_retval_d(op1) jit_movr_d(0, (op1))
-
-
-#if 0
-#define jit_sin() _OO(0xd9fe) /* fsin */
-#define jit_cos() _OO(0xd9ff) /* fcos */
-#define jit_tan() (_OO(0xd9f2), /* fptan */ \
- FSTPr(0)) /* fstp st */
-#define jit_atn() (_OO(0xd9e8), /* fld1 */ \
- _OO(0xd9f3)) /* fpatan */
-#define jit_exp() (_OO(0xd9ea), /* fldl2e */ \
- FMULPr(1), /* fmulp */ \
- _OO(0xd9c0), /* fld st */ \
- _OO(0xd9fc), /* frndint */ \
- _OO(0xdce9), /* fsubr */ \
- FXCHr(1), /* fxch st(1) */ \
- _OO(0xd9f0), /* f2xm1 */ \
- _OO(0xd9e8), /* fld1 */ \
- _OO(0xdec1), /* faddp */ \
- _OO(0xd9fd), /* fscale */ \
- FSTPr(1)) /* fstp st(1) */
-#define jit_log() (_OO(0xd9ed), /* fldln2 */ \
- FXCHr(1), /* fxch st(1) */ \
- _OO(0xd9f1)) /* fyl2x */
+#if LIGHTNING_CROSS \
+ ? LIGHTNING_TARGET == LIGHTNING_X86_64 \
+ : defined (__x86_64__)
+#include "i386/fp-64.h"
+#else
+#include "i386/fp-32.h"
#endif
-#endif /* __lightning_asm_h */
+#endif /* __lightning_fp_i386_h */
diff --git a/src/runtime/c/pgf/lightning/i386/funcs.h b/src/runtime/c/pgf/lightning/i386/funcs.h
index e90cfa430..c35849956 100644
--- a/src/runtime/c/pgf/lightning/i386/funcs.h
+++ b/src/runtime/c/pgf/lightning/i386/funcs.h
@@ -7,14 +7,14 @@
/***********************************************************************
*
- * Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
+ * Copyright 2000, 2001, 2002, 2006 Free Software Foundation, Inc.
* Written by Paolo Bonzini.
*
* This file is part of GNU lightning.
*
* GNU lightning is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
- * by the Free Software Foundation; either version 2.1, or (at your option)
+ * by the Free Software Foundation; either version 3, or (at your option)
* any later version.
*
* GNU lightning is distributed in the hope that it will be useful, but
@@ -24,8 +24,8 @@
*
* You should have received a copy of the GNU Lesser General Public License
* along with GNU lightning; see the file COPYING.LESSER; if not, write to the
- * Free Software Foundation, 59 Temple Place - Suite 330, Boston,
- * MA 02111-1307, USA.
+ * Free Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
*
***********************************************************************/
@@ -62,7 +62,7 @@ jit_flush_code(void *dest, void *end)
page_size = sysconf (_SC_PAGESIZE);
#endif
- page = (unsigned long) dest & ~(page_size - 1);
+ page = (long) dest & ~(page_size - 1);
length = ((char *) end - (char *) page + page_size - 1) & ~(page_size - 1);
/* Simple-minded attempt at optimizing the common case where a single
@@ -79,7 +79,8 @@ jit_flush_code(void *dest, void *end)
/* See if we can extend the previously mprotect'ed memory area towards
lower addresses: the highest address remains the same as before. */
- else if (page < prev_page && page + length <= prev_page + prev_length)
+ else if (page < prev_page && page + length >= prev_page
+ && page + length <= prev_page + prev_length)
prev_length += prev_page - page, prev_page = page;
/* Nothing to do, replace the area. */