diff options
| author | kr.angelov <kr.angelov@gmail.com> | 2013-06-17 07:26:00 +0000 |
|---|---|---|
| committer | kr.angelov <kr.angelov@gmail.com> | 2013-06-17 07:26:00 +0000 |
| commit | 407f680bae1834d5fa5bb27605097e436586663c (patch) | |
| tree | c26f3b5b2573b1f94007a0da8466c0d5990dffb9 /src/runtime/c/pgf/lightning/i386 | |
| parent | 8abd16bace315b5d306d6cec1adfe4766e65935d (diff) | |
add the source code for GNU lightning in the source directory for the C runtime
Diffstat (limited to 'src/runtime/c/pgf/lightning/i386')
| -rw-r--r-- | src/runtime/c/pgf/lightning/i386/asm.h | 1062 | ||||
| -rw-r--r-- | src/runtime/c/pgf/lightning/i386/core.h | 413 | ||||
| -rw-r--r-- | src/runtime/c/pgf/lightning/i386/fp.h | 347 | ||||
| -rw-r--r-- | src/runtime/c/pgf/lightning/i386/funcs.h | 91 |
4 files changed, 1913 insertions, 0 deletions
diff --git a/src/runtime/c/pgf/lightning/i386/asm.h b/src/runtime/c/pgf/lightning/i386/asm.h new file mode 100644 index 000000000..fcc364c05 --- /dev/null +++ b/src/runtime/c/pgf/lightning/i386/asm.h @@ -0,0 +1,1062 @@ +/******************************** -*- C -*- **************************** + * + * Run-time assembler for the i386 + * + ***********************************************************************/ + + +/*********************************************************************** + * + * Copyright 1999, 2000, 2001, 2002 Ian Piumarta + * + * This file is part of GNU lightning. + * + * GNU lightning is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 2.1, or (at your option) + * any later version. + * + * GNU lightning is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with GNU lightning; see the file COPYING.LESSER; if not, write to the + * Free Software Foundation, 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + ***********************************************************************/ + + + + +#ifndef __lightning_asm_h +#define __lightning_asm_h + +/* OPCODE + i = immediate operand + * + r = register operand + * + m = memory operand (disp,base,index,scale) + * + sr/sm = a star preceding a register or memory + */ + + +typedef _uc jit_insn; + +#ifndef LIGHTNING_DEBUG +#define _b00 0 +#define _b01 1 +#define _b10 2 +#define _b11 3 + +#define _b000 0 +#define _b001 1 +#define _b010 2 +#define _b011 3 +#define _b100 4 +#define _b101 5 +#define _b110 6 +#define _b111 7 + +/*** REGISTERS ***/ /* [size,,number] */ + + +#define _AL 0x10 +#define _CL 0x11 +#define _DL 0x12 +#define _BL 0x13 +#define _AH 0x14 +#define _CH 0x15 +#define _DH 0x16 +#define _BH 0x17 + +#define _AX 0x20 +#define _CX 0x21 +#define _DX 0x22 +#define _BX 0x23 +#define _SP 0x24 +#define _BP 0x25 +#define _SI 0x26 +#define _DI 0x27 + +#define _EAX 0x40 +#define _ECX 0x41 +#define _EDX 0x42 +#define _EBX 0x43 +#define _ESP 0x44 +#define _EBP 0x45 +#define _ESI 0x46 +#define _EDI 0x47 + +#define _ST0 0 +#define _ST1 1 +#define _ST2 2 +#define _ST3 3 +#define _ST4 4 +#define _ST5 5 +#define _ST6 6 +#define _ST7 7 + +#define _rS(R) ((R)>>4) +#define _rN(R) ((R)&0x7) +#define _r0P(R) ((R)==0) + +#ifndef _ASM_SAFETY +#define _r1(R) _rN(R) +#define _r2(R) _rN(R) +#define _r4(R) _rN(R) +#else +#define _r1(R) ((_rS(R)==1) ? _rN(R) : JITFAIL( "8-bit register required")) +#define _r2(R) ((_rS(R)==2) ? _rN(R) : JITFAIL("16-bit register required")) +#define _r4(R) ((_rS(R)==4) ? _rN(R) : JITFAIL("32-bit register required")) +#endif + +/*** ASSEMBLER ***/ + +#define _OFF4(D) (_jit_UL(D) - _jit_UL(_jit.x.pc)) +#define _CKD8(D) _ck_d(8, ((_uc) _OFF4(D)) ) + +#define _D8(D) (_jit_B(0), ((*(_PUC(_jit.x.pc)-1))= _CKD8(D))) +#define _D32(D) (_jit_L(0), ((*(_PUL(_jit.x.pc)-1))= _OFF4(D))) + +#ifndef _ASM_SAFETY +# define _M(M) (M) +# define _r(R) (R) +# define _m(M) (M) +# define _s(S) (S) +# define _i(I) (I) +# define _b(B) (B) +# define _noESP(I,OK) (OK) +#else +# define _M(M) (((M)>3) ? JITFAIL("internal error: mod = " #M) : (M)) +# define _r(R) (((R)>7) ? JITFAIL("internal error: reg = " #R) : (R)) +# define _m(M) (((M)>7) ? JITFAIL("internal error: r/m = " #M) : (M)) +# define _s(S) (((S)>3) ? JITFAIL("internal error: memory scale = " #S) : (S)) +# define _i(I) (((I)>7) ? JITFAIL("internal error: memory index = " #I) : (I)) +# define _b(B) (((B)>7) ? JITFAIL("internal error: memory base = " #B) : (B)) +# define _noESP(I,OK) (((I)==_ESP) ? JITFAIL("illegal index register: %esp") : (OK)) +#endif + +#define _Mrm(Md,R,M) _jit_B((_M(Md)<<6)|(_r(R)<<3)|_m(M)) +#define _SIB(Sc,I, B) _jit_B((_s(Sc)<<6)|(_i(I)<<3)|_b(B)) + +#define _SCL(S) ((((S)==1) ? _b00 : \ + (((S)==2) ? _b01 : \ + (((S)==4) ? _b10 : \ + (((S)==8) ? _b11 : JITFAIL("illegal scale: " #S)))))) + +/* memory subformats - urgh! */ + +#define _r_D( R, D ) (_Mrm(_b00,_rN(R),_b101 ) ,_jit_L((long)(D))) +#define _r_0B( R, B ) (_Mrm(_b00,_rN(R),_r4(B)) ) +#define _r_0BIS(R, B,I,S) (_Mrm(_b00,_rN(R),_b100 ),_SIB(_SCL(S),_r4(I),_r4(B)) ) +#define _r_1B( R, D,B ) (_Mrm(_b01,_rN(R),_r4(B)) ,_jit_B((long)(D))) +#define _r_1BIS(R, D,B,I,S) (_Mrm(_b01,_rN(R),_b100 ),_SIB(_SCL(S),_r4(I),_r4(B)),_jit_B((long)(D))) +#define _r_4B( R, D,B ) (_Mrm(_b10,_rN(R),_r4(B)) ,_jit_L((long)(D))) +#define _r_4IS( R, D,I,S) (_Mrm(_b00,_rN(R),_b100 ),_SIB(_SCL(S),_r4(I),_b101 ),_jit_L((long)(D))) +#define _r_4BIS(R, D,B,I,S) (_Mrm(_b10,_rN(R),_b100 ),_SIB(_SCL(S),_r4(I),_r4(B)),_jit_L((long)(D))) + +#define _r_DB( R, D,B ) ((_s0P(D) && (B != _EBP) ? _r_0B (R, B ) : (_s8P(D) ? _r_1B( R,D,B ) : _r_4B( R,D,B )))) +#define _r_DBIS(R, D,B,I,S) ((_s0P(D) ? _r_0BIS(R, B,I,S) : (_s8P(D) ? _r_1BIS(R,D,B,I,S) : _r_4BIS(R,D,B,I,S)))) + +#define _r_X( R, D,B,I,S) (_r0P(I) ? (_r0P(B) ? _r_D (R,D ) : \ + (_ESP==(B) ? _r_DBIS(R,D,_ESP,_ESP,1) : \ + _r_DB (R,D, B ))) : \ + (_r0P(B) ? _r_4IS (R,D, I,S) : \ + (((I)!=_ESP) ? _r_DBIS(R,D, B, I,S) : \ + JITFAIL("illegal index register: %esp")))) + + +/* instruction formats */ + +/* _format Opcd ModR/M dN(rB,rI,Sc) imm... */ + +#define _d16() ( _jit_B(0x66 ) ) +#define _O( OP ) ( _jit_B( OP ) ) +#define _Or( OP,R ) ( _jit_B( (OP)|_r(R)) ) +#define _OO( OP ) ( _jit_B((OP)>>8), _jit_B( (OP) ) ) +#define _OOr( OP,R ) ( _jit_B((OP)>>8), _jit_B( (OP)|_r(R)) ) +#define _Os( OP,B ) ( _s8P(B) ? _jit_B(((OP)|_b10)) : _jit_B(OP) ) +#define _sW( W ) ( _s8P(W) ? _jit_B(W):_jit_W(W) ) +#define _sL( L ) ( _s8P(L) ? _jit_B(L):_jit_L(L) ) +#define _O_W( OP ,W ) ( _O ( OP ) ,_jit_W(W) ) +#define _O_D8( OP ,D ) ( _O ( OP ) ,_D8(D) ) +#define _O_D32( OP ,D ) ( _O ( OP ) ,_D32(D) ) +#define _OO_D32( OP ,D ) ( _OO ( OP ) ,_D32(D) ) +#define _Os_sW( OP ,W ) ( _Os ( OP,W) ,_sW(W) ) +#define _Os_sL( OP ,L ) ( _Os ( OP,L) ,_sL(L) ) +#define _O_W_B( OP ,W,B) ( _O ( OP ) ,_jit_W(W),_jit_B(B)) +#define _Or_B( OP,R ,B ) ( _Or ( OP,R) ,_jit_B(B) ) +#define _Or_W( OP,R ,W ) ( _Or ( OP,R) ,_jit_W(W) ) +#define _Or_L( OP,R ,L ) ( _Or ( OP,R) ,_jit_L(L) ) +#define _O_Mrm( OP ,MO,R,M ) ( _O ( OP ),_Mrm(MO,R,M ) ) +#define _OO_Mrm( OP ,MO,R,M ) ( _OO ( OP ),_Mrm(MO,R,M ) ) +#define _O_Mrm_B( OP ,MO,R,M ,B ) ( _O ( OP ),_Mrm(MO,R,M ) ,_jit_B(B) ) +#define _O_Mrm_W( OP ,MO,R,M ,W ) ( _O ( OP ),_Mrm(MO,R,M ) ,_jit_W(W) ) +#define _O_Mrm_L( OP ,MO,R,M ,L ) ( _O ( OP ),_Mrm(MO,R,M ) ,_jit_L(L) ) +#define _OO_Mrm_B( OP ,MO,R,M ,B ) ( _OO ( OP ),_Mrm(MO,R,M ) ,_jit_B(B) ) +#define _Os_Mrm_sW(OP ,MO,R,M ,W ) ( _Os ( OP,W),_Mrm(MO,R,M ),_sW(W) ) +#define _Os_Mrm_sL(OP ,MO,R,M ,L ) ( _Os ( OP,L),_Mrm(MO,R,M ),_sL(L) ) +#define _O_r_X( OP ,R ,MD,MB,MI,MS ) ( _O ( OP ),_r_X( R ,MD,MB,MI,MS) ) +#define _OO_r_X( OP ,R ,MD,MB,MI,MS ) ( _OO ( OP ),_r_X( R ,MD,MB,MI,MS) ) +#define _O_r_X_B( OP ,R ,MD,MB,MI,MS,B ) ( _O ( OP ),_r_X( R ,MD,MB,MI,MS) ,_jit_B(B) ) +#define _O_r_X_W( OP ,R ,MD,MB,MI,MS,W ) ( _O ( OP ),_r_X( R ,MD,MB,MI,MS) ,_jit_W(W) ) +#define _O_r_X_L( OP ,R ,MD,MB,MI,MS,L ) ( _O ( OP ),_r_X( R ,MD,MB,MI,MS) ,_jit_L(L) ) +#define _OO_r_X_B( OP ,R ,MD,MB,MI,MS,B ) ( _OO ( OP ),_r_X( R ,MD,MB,MI,MS) ,_jit_B(B) ) +#define _Os_r_X_sW(OP ,R ,MD,MB,MI,MS,W ) ( _Os ( OP,W),_r_X( R ,MD,MB,MI,MS),_sW(W) ) +#define _Os_r_X_sL(OP ,R ,MD,MB,MI,MS,L ) ( _Os ( OP,L),_r_X( R ,MD,MB,MI,MS),_sL(L) ) +#define _O_X_B( OP ,MD,MB,MI,MS,B ) ( _O_r_X_B( OP ,0 ,MD,MB,MI,MS ,B) ) +#define _O_X_W( OP ,MD,MB,MI,MS,W ) ( _O_r_X_W( OP ,0 ,MD,MB,MI,MS ,W) ) +#define _O_X_L( OP ,MD,MB,MI,MS,L ) ( _O_r_X_L( OP ,0 ,MD,MB,MI,MS ,L) ) +#define _wO( OP ) (_d16(), _O( OP ) ) +#define _wOr( OP,R ) (_d16(), _Or( OP,R ) ) +#define _wOr_W( OP,R ,W ) (_d16(), _Or_W( OP,R ,W) ) +#define _wOs_sW( OP ,W ) (_d16(), _Os_sW( OP ,W) ) +#define _wO_Mrm( OP ,MO,R,M ) (_d16(), _O_Mrm( OP ,MO,R,M ) ) +#define _wOO_Mrm( OP ,MO,R,M ) (_d16(),_OO_Mrm( OP ,MO,R,M ) ) +#define _wO_Mrm_B( OP ,MO,R,M ,B ) (_d16(), _O_Mrm_B( OP ,MO,R,M ,B) ) +#define _wOO_Mrm_B( OP ,MO,R,M ,B ) (_d16(),_OO_Mrm_B( OP ,MO,R,M ,B) ) +#define _wO_Mrm_W( OP ,MO,R,M ,W ) (_d16(), _O_Mrm_W( OP ,MO,R,M ,W) ) +#define _wOs_Mrm_sW(OP ,MO,R,M ,W ) (_d16(), _Os_Mrm_sW(OP ,MO,R,M ,W) ) +#define _wO_X_W( OP ,MD,MB,MI,MS,W ) (_d16(), _O_X_W( OP ,MD,MB,MI,MS ,W) ) +#define _wO_r_X( OP ,R ,MD,MB,MI,MS ) (_d16(), _O_r_X( OP ,R ,MD,MB,MI,MS ) ) +#define _wOO_r_X( OP ,R ,MD,MB,MI,MS ) (_d16(),_OO_r_X( OP ,R ,MD,MB,MI,MS ) ) +#define _wO_r_X_B( OP ,R ,MD,MB,MI,MS,B ) (_d16(), _O_r_X_B( OP ,R ,MD,MB,MI,MS ,B) ) +#define _wOO_r_X_B( OP ,R ,MD,MB,MI,MS,B ) (_d16(),_OO_r_X_B( OP ,R ,MD,MB,MI,MS ,B) ) +#define _wO_r_X_W( OP ,R ,MD,MB,MI,MS,W ) (_d16(), _O_r_X_W( OP ,R ,MD,MB,MI,MS ,W) ) +#define _wOs_r_X_sW(OP ,R ,MD,MB,MI,MS,W ) (_d16(), _Os_r_X_sW(OP ,R ,MD,MB,MI,MS ,W) ) + +/* +++ fully-qualified intrinsic instructions */ + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ + +#define ADCBrr(RS, RD) _O_Mrm (0x10 ,_b11,_r1(RS),_r1(RD) ) +#define ADCBmr(MD, MB, MI, MS, RD) _O_r_X (0x12 ,_r1(RD) ,MD,MB,MI,MS ) +#define ADCBrm(RS, MD, MB, MI, MS) _O_r_X (0x10 ,_r1(RS) ,MD,MB,MI,MS ) +#define ADCBir(IM, RD) _O_Mrm_B (0x80 ,_b11,_b010 ,_r1(RD) ,_su8(IM)) +#define ADCBim(IM, MD, MB, MI, MS) _O_r_X_B (0x80 ,_b010 ,MD,MB,MI,MS ,_su8(IM)) + +#define ADCWrr(RS, RD) _wO_Mrm (0x11 ,_b11,_r2(RS),_r2(RD) ) +#define ADCWmr(MD, MB, MI, MS, RD) _wO_r_X (0x13 ,_r2(RD) ,MD,MB,MI,MS ) +#define ADCWrm(RS, MD, MB, MI, MS) _wO_r_X (0x11 ,_r2(RS) ,MD,MB,MI,MS ) +#define ADCWir(IM, RD) _wOs_Mrm_sW (0x81 ,_b11,_b010 ,_r2(RD) ,_su16(IM)) +#define ADCWim(IM, MD, MB, MI, MS) _wOs_r_X_sW (0x81 ,_b010 ,MD,MB,MI,MS ,_su16(IM)) + +#define ADCLrr(RS, RD) _O_Mrm (0x11 ,_b11,_r4(RS),_r4(RD) ) +#define ADCLmr(MD, MB, MI, MS, RD) _O_r_X (0x13 ,_r4(RD) ,MD,MB,MI,MS ) +#define ADCLrm(RS, MD, MB, MI, MS) _O_r_X (0x11 ,_r4(RS) ,MD,MB,MI,MS ) +#define ADCLir(IM, RD) _Os_Mrm_sL (0x81 ,_b11,_b010 ,_r4(RD) ,IM ) +#define ADCLim(IM, MD, MB, MI, MS) _Os_r_X_sL (0x81 ,_b010 ,MD,MB,MI,MS ,IM ) + + +#define ADDBrr(RS, RD) _O_Mrm (0x00 ,_b11,_r1(RS),_r1(RD) ) +#define ADDBmr(MD, MB, MI, MS, RD) _O_r_X (0x02 ,_r1(RD) ,MD,MB,MI,MS ) +#define ADDBrm(RS, MD, MB, MI, MS) _O_r_X (0x00 ,_r1(RS) ,MD,MB,MI,MS ) +#define ADDBir(IM, RD) _O_Mrm_B (0x80 ,_b11,_b000 ,_r1(RD) ,_su8(IM)) +#define ADDBim(IM, MD, MB, MI, MS) _O_r_X_B (0x80 ,_b000 ,MD,MB,MI,MS ,_su8(IM)) + +#define ADDWrr(RS, RD) _wO_Mrm (0x01 ,_b11,_r2(RS),_r2(RD) ) +#define ADDWmr(MD, MB, MI, MS, RD) _wO_r_X (0x03 ,_r2(RD) ,MD,MB,MI,MS ) +#define ADDWrm(RS, MD, MB, MI, MS) _wO_r_X (0x01 ,_r2(RS) ,MD,MB,MI,MS ) +#define ADDWir(IM, RD) _wOs_Mrm_sW (0x81 ,_b11,_b000 ,_r2(RD) ,_su16(IM)) +#define ADDWim(IM, MD, MB, MI, MS) _wOs_r_X_sW (0x81 ,_b000 ,MD,MB,MI,MS ,_su16(IM)) + +#define ADDLrr(RS, RD) _O_Mrm (0x01 ,_b11,_r4(RS),_r4(RD) ) +#define ADDLmr(MD, MB, MI, MS, RD) _O_r_X (0x03 ,_r4(RD) ,MD,MB,MI,MS ) +#define ADDLrm(RS, MD, MB, MI, MS) _O_r_X (0x01 ,_r4(RS) ,MD,MB,MI,MS ) +#define ADDLir(IM, RD) _Os_Mrm_sL (0x81 ,_b11,_b000 ,_r4(RD) ,IM ) +#define ADDLim(IM, MD, MB, MI, MS) _Os_r_X_sL (0x81 ,_b000 ,MD,MB,MI,MS ,IM ) + + +#define ANDBrr(RS, RD) _O_Mrm (0x20 ,_b11,_r1(RS),_r1(RD) ) +#define ANDBmr(MD, MB, MI, MS, RD) _O_r_X (0x22 ,_r1(RD) ,MD,MB,MI,MS ) +#define ANDBrm(RS, MD, MB, MI, MS) _O_r_X (0x20 ,_r1(RS) ,MD,MB,MI,MS ) +#define ANDBir(IM, RD) _O_Mrm_B (0x80 ,_b11,_b100 ,_r1(RD) ,_su8(IM)) +#define ANDBim(IM, MD, MB, MI, MS) _O_r_X_B (0x80 ,_b100 ,MD,MB,MI,MS ,_su8(IM)) + +#define ANDWrr(RS, RD) _wO_Mrm (0x21 ,_b11,_r2(RS),_r2(RD) ) +#define ANDWmr(MD, MB, MI, MS, RD) _wO_r_X (0x23 ,_r2(RD) ,MD,MB,MI,MS ) +#define ANDWrm(RS, MD, MB, MI, MS) _wO_r_X (0x21 ,_r2(RS) ,MD,MB,MI,MS ) +#define ANDWir(IM, RD) _wOs_Mrm_sW (0x81 ,_b11,_b100 ,_r2(RD) ,_su16(IM)) +#define ANDWim(IM, MD, MB, MI, MS) _wOs_r_X_sW (0x81 ,_b100 ,MD,MB,MI,MS ,_su16(IM)) + +#define ANDLrr(RS, RD) _O_Mrm (0x21 ,_b11,_r4(RS),_r4(RD) ) +#define ANDLmr(MD, MB, MI, MS, RD) _O_r_X (0x23 ,_r4(RD) ,MD,MB,MI,MS ) +#define ANDLrm(RS, MD, MB, MI, MS) _O_r_X (0x21 ,_r4(RS) ,MD,MB,MI,MS ) +#define ANDLir(IM, RD) _Os_Mrm_sL (0x81 ,_b11,_b100 ,_r4(RD) ,IM ) +#define ANDLim(IM, MD, MB, MI, MS) _Os_r_X_sL (0x81 ,_b100 ,MD,MB,MI,MS ,IM ) + + +#define BSWAPLr(R) _OOr (0x0fc8,_r4(R) ) + + +#define BTWir(IM,RD) _wOO_Mrm_B (0x0fba ,_b11,_b100 ,_r2(RD) ,_u8(IM)) +#define BTWim(IM,MD,MB,MI,MS) _wOO_r_X_B (0x0fba ,_b100 ,MD,MB,MI,MS ,_u8(IM)) +#define BTWrr(RS,RD) _wOO_Mrm (0x0fa3 ,_b11,_r2(RS),_r2(RD) ) +#define BTWrm(RS,MD,MB,MI,MS) _wOO_r_X (0x0fa3 ,_r2(RS) ,MD,MB,MI,MS ) + +#define BTLir(IM,RD) _OO_Mrm_B (0x0fba ,_b11,_b100 ,_r4(RD) ,_u8(IM)) +#define BTLim(IM,MD,MB,MI,MS) _OO_r_X_B (0x0fba ,_b100 ,MD,MB,MI,MS ,_u8(IM)) +#define BTLrr(RS,RD) _OO_Mrm (0x0fa3 ,_b11,_r4(RS),_r4(RD) ) +#define BTLrm(RS,MD,MB,MI,MS) _OO_r_X (0x0fa3 ,_r4(RS) ,MD,MB,MI,MS ) + + +#define BTCWir(IM,RD) _wOO_Mrm_B (0x0fba ,_b11,_b111 ,_r2(RD) ,_u8(IM)) +#define BTCWim(IM,MD,MB,MI,MS) _wOO_r_X_B (0x0fba ,_b111 ,MD,MB,MI,MS ,_u8(IM)) +#define BTCWrr(RS,RD) _wOO_Mrm (0x0fbb ,_b11,_r2(RS),_r2(RD) ) +#define BTCWrm(RS,MD,MB,MI,MS) _wOO_r_X (0x0fbb ,_r2(RS) ,MD,MB,MI,MS ) + +#define BTCLir(IM,RD) _OO_Mrm_B (0x0fba ,_b11,_b111 ,_r4(RD) ,_u8(IM)) +#define BTCLim(IM,MD,MB,MI,MS) _OO_r_X_B (0x0fba ,_b111 ,MD,MB,MI,MS ,_u8(IM)) +#define BTCLrr(RS,RD) _OO_Mrm (0x0fbb ,_b11,_r4(RS),_r4(RD) ) +#define BTCLrm(RS,MD,MB,MI,MS) _OO_r_X (0x0fbb ,_r4(RS) ,MD,MB,MI,MS ) + + +#define BTRWir(IM,RD) _wOO_Mrm_B (0x0fba ,_b11,_b110 ,_r2(RD) ,_u8(IM)) +#define BTRWim(IM,MD,MB,MI,MS) _wOO_r_X_B (0x0fba ,_b110 ,MD,MB,MI,MS ,_u8(IM)) +#define BTRWrr(RS,RD) _wOO_Mrm (0x0fb3 ,_b11,_r2(RS),_r2(RD) ) +#define BTRWrm(RS,MD,MB,MI,MS) _wOO_r_X (0x0fb3 ,_r2(RS) ,MD,MB,MI,MS ) + +#define BTRLir(IM,RD) _OO_Mrm_B (0x0fba ,_b11,_b110 ,_r4(RD) ,_u8(IM)) +#define BTRLim(IM,MD,MB,MI,MS) _OO_r_X_B (0x0fba ,_b110 ,MD,MB,MI,MS ,_u8(IM)) +#define BTRLrr(RS,RD) _OO_Mrm (0x0fb3 ,_b11,_r4(RS),_r4(RD) ) +#define BTRLrm(RS,MD,MB,MI,MS) _OO_r_X (0x0fb3 ,_r4(RS) ,MD,MB,MI,MS ) + + +#define BTSWir(IM,RD) _wOO_Mrm_B (0x0fba ,_b11,_b101 ,_r2(RD) ,_u8(IM)) +#define BTSWim(IM,MD,MB,MI,MS) _wOO_r_X_B (0x0fba ,_b101 ,MD,MB,MI,MS ,_u8(IM)) +#define BTSWrr(RS,RD) _wOO_Mrm (0x0fab ,_b11,_r2(RS),_r2(RD) ) +#define BTSWrm(RS,MD,MB,MI,MS) _wOO_r_X (0x0fab ,_r2(RS) ,MD,MB,MI,MS ) + +#define BTSLir(IM,RD) _OO_Mrm_B (0x0fba ,_b11,_b101 ,_r4(RD) ,_u8(IM)) +#define BTSLim(IM,MD,MB,MI,MS) _OO_r_X_B (0x0fba ,_b101 ,MD,MB,MI,MS ,_u8(IM)) +#define BTSLrr(RS,RD) _OO_Mrm (0x0fab ,_b11,_r4(RS),_r4(RD) ) +#define BTSLrm(RS,MD,MB,MI,MS) _OO_r_X (0x0fab ,_r4(RS) ,MD,MB,MI,MS ) + + +#define CALLm(D,B,I,S) ((_r0P(B) && _r0P(I)) ? _O_D32 (0xe8 ,(int)(D) ) : \ + JITFAIL("illegal mode in direct jump")) + +#define CALLsr(R) _O_Mrm (0xff ,_b11,_b010,_r4(R) ) + +#define CALLsm(D,B,I,S) _O_r_X (0xff ,_b010 ,(int)(D),B,I,S ) + +#define CBW_() _O (0x98 ) +#define CLC_() _O (0xf8 ) +#define CLTD_() _O (0x99 ) +#define CMC_() _O (0xf5 ) + + +#define CMPBrr(RS, RD) _O_Mrm (0x38 ,_b11,_r1(RS),_r1(RD) ) +#define CMPBmr(MD, MB, MI, MS, RD) _O_r_X (0x3a ,_r1(RD) ,MD,MB,MI,MS ) +#define CMPBrm(RS, MD, MB, MI, MS) _O_r_X (0x38 ,_r1(RS) ,MD,MB,MI,MS ) +#define CMPBir(IM, RD) _O_Mrm_B (0x80 ,_b11,_b111 ,_r1(RD) ,_su8(IM)) +#define CMPBim(IM, MD, MB, MI, MS) _O_r_X_B (0x80 ,_b111 ,MD,MB,MI,MS ,_su8(IM)) + +#define CMPWrr(RS, RD) _wO_Mrm (0x39 ,_b11,_r2(RS),_r2(RD) ) +#define CMPWmr(MD, MB, MI, MS, RD) _wO_r_X (0x3b ,_r2(RD) ,MD,MB,MI,MS ) +#define CMPWrm(RS, MD, MB, MI, MS) _wO_r_X (0x39 ,_r2(RS) ,MD,MB,MI,MS ) +#define CMPWir(IM, RD) _wOs_Mrm_sW (0x81 ,_b11,_b111 ,_r2(RD) ,_su16(IM)) +#define CMPWim(IM, MD, MB, MI, MS) _wOs_r_X_sW (0x81 ,_b111 ,MD,MB,MI,MS ,_su16(IM)) + +#define CMPLrr(RS, RD) _O_Mrm (0x39 ,_b11,_r4(RS),_r4(RD) ) +#define CMPLmr(MD, MB, MI, MS, RD) _O_r_X (0x3b ,_r4(RD) ,MD,MB,MI,MS ) +#define CMPLrm(RS, MD, MB, MI, MS) _O_r_X (0x39 ,_r4(RS) ,MD,MB,MI,MS ) +#define CMPLir(IM, RD) _O_Mrm_L (0x81 ,_b11,_b111 ,_r4(RD) ,IM ) +#define CMPLim(IM, MD, MB, MI, MS) _O_r_X_L (0x81 ,_b111 ,MD,MB,MI,MS ,IM ) + + +#define CWD_() _O (0x99 ) + + +#define CMPXCHGBrr(RS,RD) _OO_Mrm (0x0fb0 ,_b11,_r1(RS),_r1(RD) ) +#define CMPXCHGBrm(RS,MD,MB,MI,MS) _OO_r_X (0x0fb0 ,_r1(RS) ,MD,MB,MI,MS ) + +#define CMPXCHGWrr(RS,RD) _wOO_Mrm (0x0fb1 ,_b11,_r2(RS),_r2(RD) ) +#define CMPXCHGWrm(RS,MD,MB,MI,MS) _wOO_r_X (0x0fb1 ,_r2(RS) ,MD,MB,MI,MS ) + +#define CMPXCHGLrr(RS,RD) _OO_Mrm (0x0fb1 ,_b11,_r4(RS),_r4(RD) ) +#define CMPXCHGLrm(RS,MD,MB,MI,MS) _OO_r_X (0x0fb1 ,_r4(RS) ,MD,MB,MI,MS ) + + +#define DECBr(RD) _O_Mrm (0xfe ,_b11,_b001 ,_r1(RD) ) +#define DECBm(MD,MB,MI,MS) _O_r_X (0xfe ,_b001 ,MD,MB,MI,MS ) + +#define DECWr(RD) _wOr (0x48,_r2(RD) ) +#define DECWm(MD,MB,MI,MS) _wO_r_X (0xff ,_b001 ,MD,MB,MI,MS ) + +#define DECLr(RD) _Or (0x48,_r4(RD) ) +#define DECLm(MD,MB,MI,MS) _O_r_X (0xff ,_b001 ,MD,MB,MI,MS ) + + +#define DIVBr(RS) _O_Mrm (0xf6 ,_b11,_b110 ,_r1(RS) ) +#define DIVBm(MD,MB,MI,MS) _O_r_X (0xf6 ,_b110 ,MD,MB,MI,MS ) + +#define DIVWr(RS) _wO_Mrm (0xf7 ,_b11,_b110 ,_r2(RS) ) +#define DIVWm(MD,MB,MI,MS) _wO_r_X (0xf7 ,_b110 ,MD,MB,MI,MS ) + +#define DIVLr(RS) _O_Mrm (0xf7 ,_b11,_b110 ,_r4(RS) ) +#define DIVLm(MD,MB,MI,MS) _O_r_X (0xf7 ,_b110 ,MD,MB,MI,MS ) + + +#define ENTERii(W, B) _O_W_B (0xc8 ,_su16(W),_su8(B)) +#define HLT_() _O (0xf4 ) + + +#define IDIVBr(RS) _O_Mrm (0xf6 ,_b11,_b111 ,_r1(RS) ) +#define IDIVBm(MD,MB,MI,MS) _O_r_X (0xf6 ,_b111 ,MD,MB,MI,MS ) + +#define IDIVWr(RS) _wO_Mrm (0xf7 ,_b11,_b111 ,_r2(RS) ) +#define IDIVWm(MD,MB,MI,MS) _wO_r_X (0xf7 ,_b111 ,MD,MB,MI,MS ) + +#define IDIVLr(RS) _O_Mrm (0xf7 ,_b11,_b111 ,_r4(RS) ) +#define IDIVLm(MD,MB,MI,MS) _O_r_X (0xf7 ,_b111 ,MD,MB,MI,MS ) + +#define IMULBr(RS) _O_Mrm (0xf6 ,_b11,_b101 ,_r1(RS) ) +#define IMULBm(MD,MB,MI,MS) _O_r_X (0xf6 ,_b101 ,MD,MB,MI,MS ) + +#define IMULWr(RS) _wO_Mrm (0xf7 ,_b11,_b101 ,_r2(RS) ) +#define IMULWm(MD,MB,MI,MS) _wO_r_X (0xf7 ,_b101 ,MD,MB,MI,MS ) + +#define IMULLr(RS) _O_Mrm (0xf7 ,_b11,_b101 ,_r4(RS) ) +#define IMULLm(MD,MB,MI,MS) _O_r_X (0xf7 ,_b101 ,MD,MB,MI,MS ) + + +#define IMULWrr(RS,RD) _wOO_Mrm (0x0faf ,_b11,_r2(RS),_r2(RD) ) +#define IMULWmr(MD,MB,MI,MS,RD) _wOO_r_X (0x0faf ,_r2(RD) ,MD,MB,MI,MS ) +#define IMULWirr(IM,RS,RD) _wOs_Mrm_sW (0x69 ,_b11,_r2(RS),_r2(RD) ,_su16(IM) ) +#define IMULWimr(IM,MD,MB,MI,MS,RD) _wOs_r_X_sW (0x69 ,_r2(RD) ,MD,MB,MI,MS ,_su16(IM) ) + +#define IMULLir(IM,RD) _Os_Mrm_sL (0x69 ,_b11,_r4(RD),_r4(RD) ,IM ) +#define IMULLrr(RS,RD) _OO_Mrm (0x0faf ,_b11,_r4(RD),_r4(RS) ) +#define IMULLmr(MD,MB,MI,MS,RD) _OO_r_X (0x0faf ,_r4(RD) ,MD,MB,MI,MS ) +#define IMULLirr(IM,RS,RD) _Os_Mrm_sL (0x69 ,_b11,_r4(RS),_r4(RD) ,IM ) +#define IMULLimr(IM,MD,MB,MI,MS,RD) _Os_r_X_sL (0x69 ,_r4(RD) ,MD,MB,MI,MS ,IM ) + + +#define INCBr(RD) _O_Mrm (0xfe ,_b11,_b000 ,_r1(RD) ) +#define INCBm(MD,MB,MI,MS) _O_r_X (0xfe ,_b000 ,MD,MB,MI,MS ) + +#define INCWr(RD) _wOr (0x40,_r2(RD) ) +#define INCWm(MD,MB,MI,MS) _wO_r_X (0xff ,_b000 ,MD,MB,MI,MS ) + +#define INCLr(RD) _Or (0x40,_r4(RD) ) +#define INCLm(MD,MB,MI,MS) _O_r_X (0xff ,_b000 ,MD,MB,MI,MS ) + + +#define INVD_() _OO (0x0f08 ) +#define INVLPGm(MD, MB, MI, MS) _OO_r_X (0x0f01 ,_b111 ,MD,MB,MI,MS ) + + +#define JCCSim(CC,D,B,I,S) ((_r0P(B) && _r0P(I)) ? _O_D8 (0x70|(CC) ,(int)(D) ) : \ + JITFAIL("illegal mode in conditional jump")) + +#define JOSm(D,B,I,S) JCCSim(0x0,D,B,I,S) +#define JNOSm(D,B,I,S) JCCSim(0x1,D,B,I,S) +#define JBSm(D,B,I,S) JCCSim(0x2,D,B,I,S) +#define JNAESm(D,B,I,S) JCCSim(0x2,D,B,I,S) +#define JNBSm(D,B,I,S) JCCSim(0x3,D,B,I,S) +#define JAESm(D,B,I,S) JCCSim(0x3,D,B,I,S) +#define JESm(D,B,I,S) JCCSim(0x4,D,B,I,S) +#define JZSm(D,B,I,S) JCCSim(0x4,D,B,I,S) +#define JNESm(D,B,I,S) JCCSim(0x5,D,B,I,S) +#define JNZSm(D,B,I,S) JCCSim(0x5,D,B,I,S) +#define JBESm(D,B,I,S) JCCSim(0x6,D,B,I,S) +#define JNASm(D,B,I,S) JCCSim(0x6,D,B,I,S) +#define JNBESm(D,B,I,S) JCCSim(0x7,D,B,I,S) +#define JASm(D,B,I,S) JCCSim(0x7,D,B,I,S) +#define JSSm(D,B,I,S) JCCSim(0x8,D,B,I,S) +#define JNSSm(D,B,I,S) JCCSim(0x9,D,B,I,S) +#define JPSm(D,B,I,S) JCCSim(0xa,D,B,I,S) +#define JPESm(D,B,I,S) JCCSim(0xa,D,B,I,S) +#define JNPSm(D,B,I,S) JCCSim(0xb,D,B,I,S) +#define JPOSm(D,B,I,S) JCCSim(0xb,D,B,I,S) +#define JLSm(D,B,I,S) JCCSim(0xc,D,B,I,S) +#define JNGESm(D,B,I,S) JCCSim(0xc,D,B,I,S) +#define JNLSm(D,B,I,S) JCCSim(0xd,D,B,I,S) +#define JGESm(D,B,I,S) JCCSim(0xd,D,B,I,S) +#define JLESm(D,B,I,S) JCCSim(0xe,D,B,I,S) +#define JNGSm(D,B,I,S) JCCSim(0xe,D,B,I,S) +#define JNLESm(D,B,I,S) JCCSim(0xf,D,B,I,S) +#define JGSm(D,B,I,S) JCCSim(0xf,D,B,I,S) + +#define JCCim(CC,D,B,I,S) ((_r0P(B) && _r0P(I)) ? _OO_D32 (0x0f80|(CC) ,(int)(D) ) : \ + JITFAIL("illegal mode in conditional jump")) + +#define JOm(D,B,I,S) JCCim(0x0,D,B,I,S) +#define JNOm(D,B,I,S) JCCim(0x1,D,B,I,S) +#define JBm(D,B,I,S) JCCim(0x2,D,B,I,S) +#define JNAEm(D,B,I,S) JCCim(0x2,D,B,I,S) +#define JNBm(D,B,I,S) JCCim(0x3,D,B,I,S) +#define JAEm(D,B,I,S) JCCim(0x3,D,B,I,S) +#define JEm(D,B,I,S) JCCim(0x4,D,B,I,S) +#define JZm(D,B,I,S) JCCim(0x4,D,B,I,S) +#define JNEm(D,B,I,S) JCCim(0x5,D,B,I,S) +#define JNZm(D,B,I,S) JCCim(0x5,D,B,I,S) +#define JBEm(D,B,I,S) JCCim(0x6,D,B,I,S) +#define JNAm(D,B,I,S) JCCim(0x6,D,B,I,S) +#define JNBEm(D,B,I,S) JCCim(0x7,D,B,I,S) +#define JAm(D,B,I,S) JCCim(0x7,D,B,I,S) +#define JSm(D,B,I,S) JCCim(0x8,D,B,I,S) +#define JNSm(D,B,I,S) JCCim(0x9,D,B,I,S) +#define JPm(D,B,I,S) JCCim(0xa,D,B,I,S) +#define JPEm(D,B,I,S) JCCim(0xa,D,B,I,S) +#define JNPm(D,B,I,S) JCCim(0xb,D,B,I,S) +#define JPOm(D,B,I,S) JCCim(0xb,D,B,I,S) +#define JLm(D,B,I,S) JCCim(0xc,D,B,I,S) +#define JNGEm(D,B,I,S) JCCim(0xc,D,B,I,S) +#define JNLm(D,B,I,S) JCCim(0xd,D,B,I,S) +#define JGEm(D,B,I,S) JCCim(0xd,D,B,I,S) +#define JLEm(D,B,I,S) JCCim(0xe,D,B,I,S) +#define JNGm(D,B,I,S) JCCim(0xe,D,B,I,S) +#define JNLEm(D,B,I,S) JCCim(0xf,D,B,I,S) +#define JGm(D,B,I,S) JCCim(0xf,D,B,I,S) + + +#define JMPSm(D,B,I,S) ((_r0P(B) && _r0P(I)) ? _O_D8 (0xeb ,(int)(D) ) : \ + JITFAIL("illegal mode in short jump")) + +#define JMPm(D,B,I,S) ((_r0P(B) && _r0P(I)) ? _O_D32 (0xe9 ,(int)(D) ) : \ + JITFAIL("illegal mode in direct jump")) + +#define JMPsr(R) _O_Mrm (0xff ,_b11,_b100,_r4(R) ) + +#define JMPsm(D,B,I,S) _O_r_X (0xff ,_b100 ,(int)(D),B,I,S ) + + +#define LAHF_() _O (0x9f ) +#define LEALmr(MD, MB, MI, MS, RD) _O_r_X (0x8d ,_r4(RD) ,MD,MB,MI,MS ) +#define LEAVE_() _O (0xc9 ) + + +#define LMSWr(RS) _OO_Mrm (0x0f01 ,_b11,_b110,_r4(RS) ) +#define LMSWm(MD,MB,MI,MS) _OO_r_X (0x0f01 ,_b110 ,MD,MB,MI,MS ) + +#define LOOPm(MD,MB,MI,MS) ((_r0P(MB) && _r0P(MI)) ? _O_D8 (0xe2 ,MD ) : \ + JITFAIL("illegal mode in loop")) + +#define LOOPEm(MD,MB,MI,MS) ((_r0P(MB) && _r0P(MI)) ? _O_D8 (0xe1 ,MD ) : \ + JITFAIL("illegal mode in loope")) + +#define LOOPZm(MD,MB,MI,MS) ((_r0P(MB) && _r0P(MI)) ? _O_D8 (0xe1 ,MD ) : \ + JITFAIL("illegal mode in loopz")) + +#define LOOPNEm(MD,MB,MI,MS) ((_r0P(MB) && _r0P(MI)) ? _O_D8 (0xe0 ,MD ) : \ + JITFAIL("illegal mode in loopne")) + +#define LOOPNZm(MD,MB,MI,MS) ((_r0P(MB) && _r0P(MI)) ? _O_D8 (0xe0 ,MD ) : \ + JITFAIL("illegal mode in loopnz")) + + +#define MOVBrr(RS, RD) _O_Mrm (0x80 ,_b11,_r1(RS),_r1(RD) ) +#define MOVBmr(MD, MB, MI, MS, RD) _O_r_X (0x8a ,_r1(RD) ,MD,MB,MI,MS ) +#define MOVBrm(RS, MD, MB, MI, MS) _O_r_X (0x88 ,_r1(RS) ,MD,MB,MI,MS ) +#define MOVBir(IM, R) _Or_B (0xb0,_r1(R) ,_su8(IM)) +#define MOVBim(IM, MD, MB, MI, MS) _O_X_B (0xc6 ,MD,MB,MI,MS ,_su8(IM)) + +#define MOVWrr(RS, RD) _wO_Mrm (0x89 ,_b11,_r2(RS),_r2(RD) ) +#define MOVWmr(MD, MB, MI, MS, RD) _wO_r_X (0x8b ,_r2(RD) ,MD,MB,MI,MS ) +#define MOVWrm(RS, MD, MB, MI, MS) _wO_r_X (0x89 ,_r2(RS) ,MD,MB,MI,MS ) +#define MOVWir(IM, R) _wOr_W (0xb8,_r2(R) ,_su16(IM)) +#define MOVWim(IM, MD, MB, MI, MS) _wO_X_W (0xc7 ,MD,MB,MI,MS ,_su16(IM)) + +#define MOVLrr(RS, RD) _O_Mrm (0x89 ,_b11,_r4(RS),_r4(RD) ) +#define MOVLmr(MD, MB, MI, MS, RD) _O_r_X (0x8b ,_r4(RD) ,MD,MB,MI,MS ) +#define MOVLrm(RS, MD, MB, MI, MS) _O_r_X (0x89 ,_r4(RS) ,MD,MB,MI,MS ) +#define MOVLir(IM, R) _Or_L (0xb8,_r4(R) ,IM ) +#define MOVLim(IM, MD, MB, MI, MS) _O_X_L (0xc7 ,MD,MB,MI,MS ,IM ) + +#define MOVZBLrr(RS, RD) _OO_Mrm (0x0fb6 ,_b11,_r1(RD),_r1(RS) ) +#define MOVZBLmr(MD, MB, MI, MS, RD) _OO_r_X (0x0fb6 ,_r1(RD) ,MD,MB,MI,MS ) +#define MOVZBWrr(RS, RD) _wOO_Mrm (0x0fb6 ,_b11,_r2(RD),_r2(RS) ) +#define MOVZBWmr(MD, MB, MI, MS, RD) _wOO_r_X (0x0fb6 ,_r2(RD) ,MD,MB,MI,MS ) +#define MOVZWLrr(RS, RD) _OO_Mrm (0x0fb7 ,_b11,_r1(RD),_r1(RS) ) +#define MOVZWLmr(MD, MB, MI, MS, RD) _OO_r_X (0x0fb7 ,_r1(RD) ,MD,MB,MI,MS ) + +#define MOVSBLrr(RS, RD) _OO_Mrm (0x0fbe ,_b11,_r1(RD),_r1(RS) ) +#define MOVSBLmr(MD, MB, MI, MS, RD) _OO_r_X (0x0fbe ,_r1(RD) ,MD,MB,MI,MS ) +#define MOVSBWrr(RS, RD) _wOO_Mrm (0x0fbe ,_b11,_r2(RD),_r2(RS) ) +#define MOVSBWmr(MD, MB, MI, MS, RD) _wOO_r_X (0x0fbe ,_r2(RD) ,MD,MB,MI,MS ) +#define MOVSWLrr(RS, RD) _OO_Mrm (0x0fbf ,_b11,_r1(RD),_r1(RS) ) +#define MOVSWLmr(MD, MB, MI, MS, RD) _OO_r_X (0x0fbf ,_r1(RD) ,MD,MB,MI,MS ) + + +#define MULBr(RS) _O_Mrm (0xf6 ,_b11,_b100 ,_r1(RS) ) +#define MULBm(MD,MB,MI,MS) _O_r_X (0xf6 ,_b100 ,MD,MB,MI,MS ) + +#define MULWr(RS) _wO_Mrm (0xf7 ,_b11,_b100 ,_r2(RS) ) +#define MULWm(MD,MB,MI,MS) _wO_r_X (0xf7 ,_b100 ,MD,MB,MI,MS ) + +#define MULLr(RS) _O_Mrm (0xf7 ,_b11,_b100 ,_r4(RS) ) +#define MULLm(MD,MB,MI,MS) _O_r_X (0xf7 ,_b100 ,MD,MB,MI,MS ) + + +#define NEGBr(RD) _O_Mrm (0xf6 ,_b11,_b011 ,_r1(RD) ) +#define NEGBm(MD,MB,MI,MS) _O_r_X (0xf6 ,_b011 ,MD,MB,MI,MS ) + +#define NEGWr(RD) _wO_Mrm (0xf7 ,_b11,_b011 ,_r2(RD) ) +#define NEGWm(MD,MB,MI,MS) _wO_r_X (0xf7 ,_b011 ,MD,MB,MI,MS ) + +#define NEGLr(RD) _O_Mrm (0xf7 ,_b11,_b011 ,_r4(RD) ) +#define NEGLm(MD,MB,MI,MS) _O_r_X (0xf7 ,_b011 ,MD,MB,MI,MS ) + + +#define NOP_() _O (0x90 ) + + +#define NOTBr(RD) _O_Mrm (0xf6 ,_b11,_b010 ,_r1(RD) ) +#define NOTBm(MD,MB,MI,MS) _O_r_X (0xf6 ,_b010 ,MD,MB,MI,MS ) + +#define NOTWr(RD) _wO_Mrm (0xf7 ,_b11,_b010 ,_r2(RD) ) +#define NOTWm(MD,MB,MI,MS) _wO_r_X (0xf7 ,_b010 ,MD,MB,MI,MS ) + +#define NOTLr(RD) _O_Mrm (0xf7 ,_b11,_b010 ,_r4(RD) ) +#define NOTLm(MD,MB,MI,MS) _O_r_X (0xf7 ,_b010 ,MD,MB,MI,MS ) + + +#define ORBrr(RS, RD) _O_Mrm (0x08 ,_b11,_r1(RS),_r1(RD) ) +#define ORBmr(MD, MB, MI, MS, RD) _O_r_X (0x0a ,_r1(RD) ,MD,MB,MI,MS ) +#define ORBrm(RS, MD, MB, MI, MS) _O_r_X (0x08 ,_r1(RS) ,MD,MB,MI,MS ) +#define ORBir(IM, RD) _O_Mrm_B (0x80 ,_b11,_b001 ,_r1(RD) ,_su8(IM)) +#define ORBim(IM, MD, MB, MI, MS) _O_r_X_B (0x80 ,_b001 ,MD,MB,MI,MS ,_su8(IM)) + +#define ORWrr(RS, RD) _wO_Mrm (0x09 ,_b11,_r2(RS),_r2(RD) ) +#define ORWmr(MD, MB, MI, MS, RD) _wO_r_X (0x0b ,_r2(RD) ,MD,MB,MI,MS ) +#define ORWrm(RS, MD, MB, MI, MS) _wO_r_X (0x09 ,_r2(RS) ,MD,MB,MI,MS ) +#define ORWir(IM, RD) _wOs_Mrm_sW (0x81 ,_b11,_b001 ,_r2(RD) ,_su16(IM)) +#define ORWim(IM, MD, MB, MI, MS) _wOs_r_X_sW (0x81 ,_b001 ,MD,MB,MI,MS ,_su16(IM)) + +#define ORLrr(RS, RD) _O_Mrm (0x09 ,_b11,_r4(RS),_r4(RD) ) +#define ORLmr(MD, MB, MI, MS, RD) _O_r_X (0x0b ,_r4(RD) ,MD,MB,MI,MS ) +#define ORLrm(RS, MD, MB, MI, MS) _O_r_X (0x09 ,_r4(RS) ,MD,MB,MI,MS ) +#define ORLir(IM, RD) _Os_Mrm_sL (0x81 ,_b11,_b001 ,_r4(RD) ,IM ) +#define ORLim(IM, MD, MB, MI, MS) _Os_r_X_sL (0x81 ,_b001 ,MD,MB,MI,MS ,IM ) + + +#define POPWr(RD) _wOr (0x58,_r2(RD) ) +#define POPWm(MD,MB,MI,MS) _wO_r_X (0x8f ,_b000 ,MD,MB,MI,MS ) + +#define POPLr(RD) _Or (0x58,_r4(RD) ) +#define POPLm(MD,MB,MI,MS) _O_r_X (0x8f ,_b000 ,MD,MB,MI,MS ) + + +#define POPA_() _wO (0x61 ) +#define POPAD_() _O (0x61 ) + +#define POPF_() _wO (0x9d ) +#define POPFD_() _O (0x9d ) + + +#define PUSHWr(R) _wOr (0x50,_r2(R) ) +#define PUSHWm(MD,MB,MI,MS) _wO_r_X (0xff, ,_b110 ,MD,MB,MI,MS ) +#define PUSHWi(IM) _wOs_sW (0x68 ,IM ) + +#define PUSHLr(R) _Or (0x50,_r4(R) ) +#define PUSHLm(MD,MB,MI,MS) _O_r_X (0xff ,_b110 ,MD,MB,MI,MS ) +#define PUSHLi(IM) _Os_sL (0x68 ,IM ) + + +#define PUSHA_() _wO (0x60 ) +#define PUSHAD_() _O (0x60 ) + +#define PUSHF_() _O (0x9c ) +#define PUSHFD_() _wO (0x9c ) + +#define RET_() _O (0xc3 ) +#define RETi(IM) _O_W (0xc2 ,_su16(IM)) + + +#define ROLBir(IM,RD) (((IM)==1) ? _O_Mrm (0xd0 ,_b11,_b000,_r1(RD) ) : \ + _O_Mrm_B (0xc0 ,_b11,_b000,_r1(RD) ,_u8(IM) ) ) +#define ROLBim(IM,MD,MB,MS,MI) (((IM)==1) ? _O_r_X (0xd0 ,_b000 ,MD,MB,MI,MS ) : \ + _O_r_X_B (0xc0 ,_b000 ,MD,MB,MI,MS ,_u8(IM) ) ) +#define ROLBrr(RS,RD) (((RS)==_CL) ? _O_Mrm (0xd2 ,_b11,_b000,_r1(RD) ) : \ + JITFAIL ("source register must be CL" ) ) +#define ROLBrm(RS,MD,MB,MS,MI) (((RS)==_CL) ? _O_r_X (0xd2 ,_b000 ,MD,MB,MI,MS ) : \ + JITFAIL ("source register must be CL" ) ) + +#define ROLWir(IM,RD) (((IM)==1) ? _wO_Mrm (0xd1 ,_b11,_b000,_r2(RD) ) : \ + _wO_Mrm_B (0xc1 ,_b11,_b000,_r2(RD) ,_u8(IM) ) ) +#define ROLWim(IM,MD,MB,MS,MI) (((IM)==1) ? _wO_r_X (0xd1 ,_b000 ,MD,MB,MI,MS ) : \ + _wO_r_X_B (0xc1 ,_b000 ,MD,MB,MI,MS ,_u8(IM) ) ) +#define ROLWrr(RS,RD) (((RS)==_CL) ? _wO_Mrm (0xd3 ,_b11,_b000,_r2(RD) ) : \ + JITFAIL ("source register must be CL" ) ) +#define ROLWrm(RS,MD,MB,MS,MI) (((RS)==_CL) ? _wO_r_X (0xd3 ,_b000 ,MD,MB,MI,MS ) : \ + JITFAIL ("source register must be CL" ) ) + +#define ROLLir(IM,RD) (((IM)==1) ? _O_Mrm (0xd1 ,_b11,_b000,_r4(RD) ) : \ + _O_Mrm_B (0xc1 ,_b11,_b000,_r4(RD) ,_u8(IM) ) ) +#define ROLLim(IM,MD,MB,MS,MI) (((IM)==1) ? _O_r_X (0xd1 ,_b000 ,MD,MB,MI,MS ) : \ + _O_r_X_B (0xc1 ,_b000 ,MD,MB,MI,MS ,_u8(IM) ) ) +#define ROLLrr(RS,RD) (((RS)==_CL) ? _O_Mrm (0xd3 ,_b11,_b000,_r4(RD) ) : \ + JITFAIL ("source register must be CL" ) ) +#define ROLLrm(RS,MD,MB,MS,MI) (((RS)==_CL) ? _O_r_X (0xd3 ,_b000 ,MD,MB,MI,MS ) : \ + JITFAIL ("source register must be CL" ) ) + + +#define RORBir(IM,RD) (((IM)==1) ? _O_Mrm (0xd0 ,_b11,_b001,_r1(RD) ) : \ + _O_Mrm_B (0xc0 ,_b11,_b001,_r1(RD) ,_u8(IM) ) ) +#define RORBim(IM,MD,MB,MS,MI) (((IM)==1) ? _O_r_X (0xd0 ,_b001 ,MD,MB,MI,MS ) : \ + _O_r_X_B (0xc0 ,_b001 ,MD,MB,MI,MS ,_u8(IM) ) ) +#define RORBrr(RS,RD) (((RS)==_CL) ? _O_Mrm (0xd2 ,_b11,_b001,_r1(RD) ) : \ + JITFAIL ("source register must be CL" ) ) +#define RORBrm(RS,MD,MB,MS,MI) (((RS)==_CL) ? _O_r_X (0xd2 ,_b001 ,MD,MB,MI,MS ) : \ + JITFAIL ("source register must be CL" ) ) + +#define RORWir(IM,RD) (((IM)==1) ? _wO_Mrm (0xd1 ,_b11,_b001,_r2(RD) ) : \ + _wO_Mrm_B (0xc1 ,_b11,_b001,_r2(RD) ,_u8(IM) ) ) +#define RORWim(IM,MD,MB,MS,MI) (((IM)==1) ? _wO_r_X (0xd1 ,_b001 ,MD,MB,MI,MS ) : \ + _wO_r_X_B (0xc1 ,_b001 ,MD,MB,MI,MS ,_u8(IM) ) ) +#define RORWrr(RS,RD) (((RS)==_CL) ? _wO_Mrm (0xd3 ,_b11,_b001,_r2(RD) ) : \ + JITFAIL ("source register must be CL" ) ) +#define RORWrm(RS,MD,MB,MS,MI) (((RS)==_CL) ? _wO_r_X (0xd3 ,_b001 ,MD,MB,MI,MS ) : \ + JITFAIL ("source register must be CL" ) ) + +#define RORLir(IM,RD) (((IM)==1) ? _O_Mrm (0xd1 ,_b11,_b001,_r4(RD) ) : \ + _O_Mrm_B (0xc1 ,_b11,_b001,_r4(RD) ,_u8(IM) ) ) +#define RORLim(IM,MD,MB,MS,MI) (((IM)==1) ? _O_r_X (0xd1 ,_b001 ,MD,MB,MI,MS ) : \ + _O_r_X_B (0xc1 ,_b001 ,MD,MB,MI,MS ,_u8(IM) ) ) +#define RORLrr(RS,RD) (((RS)==_CL) ? _O_Mrm (0xd3 ,_b11,_b001,_r4(RD) ) : \ + JITFAIL ("source register must be CL" ) ) +#define RORLrm(RS,MD,MB,MS,MI) (((RS)==_CL) ? _O_r_X (0xd3 ,_b001 ,MD,MB,MI,MS ) : \ + JITFAIL ("source register must be CL" ) ) + + +#define SAHF_() _O (0x9e ) + + +#define SALBir SHLBir +#define SALBim SHLBim +#define SALBrr SHLBrr +#define SALBrm SHLBrm +#define SALWir SHLWir +#define SALWim SHLWim +#define SALWrr SHLWrr +#define SALWrm SHLWrm +#define SALLir SHLLir +#define SALLim SHLLim +#define SALLrr SHLLrr +#define SALLrm SHLLrm + + +#define SARBir(IM,RD) (((IM)==1) ? _O_Mrm (0xd0 ,_b11,_b111,_r1(RD) ) : \ + _O_Mrm_B (0xc0 ,_b11,_b111,_r1(RD) ,_u8(IM) ) ) +#define SARBim(IM,MD,MB,MS,MI) (((IM)==1) ? _O_r_X (0xd0 ,_b111 ,MD,MB,MI,MS ) : \ + _O_r_X_B (0xc0 ,_b111 ,MD,MB,MI,MS ,_u8(IM) ) ) +#define SARBrr(RS,RD) (((RS)==_CL) ? _O_Mrm (0xd2 ,_b11,_b111,_r1(RD) ) : \ + JITFAIL ("source register must be CL" ) ) +#define SARBrm(RS,MD,MB,MS,MI) (((RS)==_CL) ? _O_r_X (0xd2 ,_b111 ,MD,MB,MI,MS ) : \ + JITFAIL ("source register must be CL" ) ) + +#define SARWir(IM,RD) (((IM)==1) ? _wO_Mrm (0xd1 ,_b11,_b111,_r2(RD) ) : \ + _wO_Mrm_B (0xc1 ,_b11,_b111,_r2(RD) ,_u8(IM) ) ) +#define SARWim(IM,MD,MB,MS,MI) (((IM)==1) ? _wO_r_X (0xd1 ,_b111 ,MD,MB,MI,MS ) : \ + _wO_r_X_B (0xc1 ,_b111 ,MD,MB,MI,MS ,_u8(IM) ) ) +#define SARWrr(RS,RD) (((RS)==_CL) ? _wO_Mrm (0xd3 ,_b11,_b111,_r2(RD) ) : \ + JITFAIL ("source register must be CL" ) ) +#define SARWrm(RS,MD,MB,MS,MI) (((RS)==_CL) ? _wO_r_X (0xd3 ,_b111 ,MD,MB,MI,MS ) : \ + JITFAIL ("source register must be CL" ) ) + +#define SARLir(IM,RD) (((IM)==1) ? _O_Mrm (0xd1 ,_b11,_b111,_r4(RD) ) : \ + _O_Mrm_B (0xc1 ,_b11,_b111,_r4(RD) ,_u8(IM) ) ) +#define SARLim(IM,MD,MB,MS,MI) (((IM)==1) ? _O_r_X (0xd1 ,_b111 ,MD,MB,MI,MS ) : \ + _O_r_X_B (0xc1 ,_b111 ,MD,MB,MI,MS ,_u8(IM) ) ) +#define SARLrr(RS,RD) (((RS)==_CL) ? _O_Mrm (0xd3 ,_b11,_b111,_r4(RD) ) : \ + JITFAIL ("source register must be CL" ) ) +#define SARLrm(RS,MD,MB,MS,MI) (((RS)==_CL) ? _O_r_X (0xd3 ,_b111 ,MD,MB,MI,MS ) : \ + JITFAIL ("source register must be CL" ) ) + + +#define SBBBrr(RS, RD) _O_Mrm (0x18 ,_b11,_r1(RS),_r1(RD) ) +#define SBBBmr(MD, MB, MI, MS, RD) _O_r_X (0x1a ,_r1(RD) ,MD,MB,MI,MS ) +#define SBBBrm(RS, MD, MB, MI, MS) _O_r_X (0x18 ,_r1(RS) ,MD,MB,MI,MS ) +#define SBBBir(IM, RD) _O_Mrm_B (0x80 ,_b11,_b011 ,_r1(RD) ,_su8(IM)) +#define SBBBim(IM, MD, MB, MI, MS) _O_r_X_B (0x80 ,_b011 ,MD,MB,MI,MS ,_su8(IM)) + +#define SBBWrr(RS, RD) _wO_Mrm (0x19 ,_b11,_r2(RS),_r2(RD) ) +#define SBBWmr(MD, MB, MI, MS, RD) _wO_r_X (0x1b ,_r2(RD) ,MD,MB,MI,MS ) +#define SBBWrm(RS, MD, MB, MI, MS) _wO_r_X (0x19 ,_r2(RS) ,MD,MB,MI,MS ) +#define SBBWir(IM, RD) _wOs_Mrm_sW (0x81 ,_b11,_b011 ,_r2(RD) ,_su16(IM)) +#define SBBWim(IM, MD, MB, MI, MS) _wOs_r_X_sW (0x81 ,_b011 ,MD,MB,MI,MS ,_su16(IM)) + +#define SBBLrr(RS, RD) _O_Mrm (0x19 ,_b11,_r4(RS),_r4(RD) ) +#define SBBLmr(MD, MB, MI, MS, RD) _O_r_X (0x1b ,_r4(RD) ,MD,MB,MI,MS ) +#define SBBLrm(RS, MD, MB, MI, MS) _O_r_X (0x19 ,_r4(RS) ,MD,MB,MI,MS ) +#define SBBLir(IM, RD) _Os_Mrm_sL (0x81 ,_b11,_b011 ,_r4(RD) ,IM ) +#define SBBLim(IM, MD, MB, MI, MS) _Os_r_X_sL (0x81 ,_b011 ,MD,MB,MI,MS ,IM ) + + +#define SETCCir(CC,RD) _OO_Mrm (0x0f90|(CC) ,_b11,_b000,_r1(RD) ) + +#define SETOr(RD) SETCCir(0x0,RD) +#define SETNOr(RD) SETCCir(0x1,RD) +#define SETBr(RD) SETCCir(0x2,RD) +#define SETNAEr(RD) SETCCir(0x2,RD) +#define SETNBr(RD) SETCCir(0x3,RD) +#define SETAEr(RD) SETCCir(0x3,RD) +#define SETEr(RD) SETCCir(0x4,RD) +#define SETZr(RD) SETCCir(0x4,RD) +#define SETNEr(RD) SETCCir(0x5,RD) +#define SETNZr(RD) SETCCir(0x5,RD) +#define SETBEr(RD) SETCCir(0x6,RD) +#define SETNAr(RD) SETCCir(0x6,RD) +#define SETNBEr(RD) SETCCir(0x7,RD) +#define SETAr(RD) SETCCir(0x7,RD) +#define SETSr(RD) SETCCir(0x8,RD) +#define SETNSr(RD) SETCCir(0x9,RD) +#define SETPr(RD) SETCCir(0xa,RD) +#define SETPEr(RD) SETCCir(0xa,RD) +#define SETNPr(RD) SETCCir(0xb,RD) +#define SETPOr(RD) SETCCir(0xb,RD) +#define SETLr(RD) SETCCir(0xc,RD) +#define SETNGEr(RD) SETCCir(0xc,RD) +#define SETNLr(RD) SETCCir(0xd,RD) +#define SETGEr(RD) SETCCir(0xd,RD) +#define SETLEr(RD) SETCCir(0xe,RD) +#define SETNGr(RD) SETCCir(0xe,RD) +#define SETNLEr(RD) SETCCir(0xf,RD) +#define SETGr(RD) SETCCir(0xf,RD) + +#define SETCCim(CC,MD,MB,MI,MS) _OO_r_X (0x0f90|(CC) ,_b000 ,MD,MB,MI,MS ) + +#define SETOm(D,B,I,S) SETCCim(0x0,D,B,I,S) +#define SETNOm(D,B,I,S) SETCCim(0x1,D,B,I,S) +#define SETBm(D,B,I,S) SETCCim(0x2,D,B,I,S) +#define SETNAEm(D,B,I,S) SETCCim(0x2,D,B,I,S) +#define SETNBm(D,B,I,S) SETCCim(0x3,D,B,I,S) +#define SETAEm(D,B,I,S) SETCCim(0x3,D,B,I,S) +#define SETEm(D,B,I,S) SETCCim(0x4,D,B,I,S) +#define SETZm(D,B,I,S) SETCCim(0x4,D,B,I,S) +#define SETNEm(D,B,I,S) SETCCim(0x5,D,B,I,S) +#define SETNZm(D,B,I,S) SETCCim(0x5,D,B,I,S) +#define SETBEm(D,B,I,S) SETCCim(0x6,D,B,I,S) +#define SETNAm(D,B,I,S) SETCCim(0x6,D,B,I,S) +#define SETNBEm(D,B,I,S) SETCCim(0x7,D,B,I,S) +#define SETAm(D,B,I,S) SETCCim(0x7,D,B,I,S) +#define SETSm(D,B,I,S) SETCCim(0x8,D,B,I,S) +#define SETNSm(D,B,I,S) SETCCim(0x9,D,B,I,S) +#define SETPm(D,B,I,S) SETCCim(0xa,D,B,I,S) +#define SETPEm(D,B,I,S) SETCCim(0xa,D,B,I,S) +#define SETNPm(D,B,I,S) SETCCim(0xb,D,B,I,S) +#define SETPOm(D,B,I,S) SETCCim(0xb,D,B,I,S) +#define SETLm(D,B,I,S) SETCCim(0xc,D,B,I,S) +#define SETNGEm(D,B,I,S) SETCCim(0xc,D,B,I,S) +#define SETNLm(D,B,I,S) SETCCim(0xd,D,B,I,S) +#define SETGEm(D,B,I,S) SETCCim(0xd,D,B,I,S) +#define SETLEm(D,B,I,S) SETCCim(0xe,D,B,I,S) +#define SETNGm(D,B,I,S) SETCCim(0xe,D,B,I,S) +#define SETNLEm(D,B,I,S) SETCCim(0xf,D,B,I,S) +#define SETGm(D,B,I,S) SETCCim(0xf,D,B,I,S) + + +#define SHLBir(IM,RD) (((IM)==1) ? _O_Mrm (0xd0 ,_b11,_b100,_r1(RD) ) : \ + _O_Mrm_B (0xc0 ,_b11,_b100,_r1(RD) ,_u8(IM) ) ) +#define SHLBim(IM,MD,MB,MS,MI) (((IM)==1) ? _O_r_X (0xd0 ,_b100 ,MD,MB,MI,MS ) : \ + _O_r_X_B (0xc0 ,_b100 ,MD,MB,MI,MS ,_u8(IM) ) ) +#define SHLBrr(RS,RD) (((RS)==_CL) ? _O_Mrm (0xd2 ,_b11,_b100,_r1(RD) ) : \ + JITFAIL ("source register must be CL" ) ) +#define SHLBrm(RS,MD,MB,MS,MI) (((RS)==_CL) ? _O_r_X (0xd2 ,_b100 ,MD,MB,MI,MS ) : \ + JITFAIL ("source register must be CL" ) ) + +#define SHLWir(IM,RD) (((IM)==1) ? _wO_Mrm (0xd1 ,_b11,_b100,_r2(RD) ) : \ + _wO_Mrm_B (0xc1 ,_b11,_b100,_r2(RD) ,_u8(IM) ) ) +#define SHLWim(IM,MD,MB,MS,MI) (((IM)==1) ? _wO_r_X (0xd1 ,_b100 ,MD,MB,MI,MS ) : \ + _wO_r_X_B (0xc1 ,_b100 ,MD,MB,MI,MS ,_u8(IM) ) ) +#define SHLWrr(RS,RD) (((RS)==_CL) ? _wO_Mrm (0xd3 ,_b11,_b100,_r2(RD) ) : \ + JITFAIL ("source register must be CL" ) ) +#define SHLWrm(RS,MD,MB,MS,MI) (((RS)==_CL) ? _wO_r_X (0xd3 ,_b100 ,MD,MB,MI,MS ) : \ + JITFAIL ("source register must be CL" ) ) + +#define SHLLir(IM,RD) (((IM)==1) ? _O_Mrm (0xd1 ,_b11,_b100,_r4(RD) ) : \ + _O_Mrm_B (0xc1 ,_b11,_b100,_r4(RD) ,_u8(IM) ) ) +#define SHLLim(IM,MD,MB,MS,MI) (((IM)==1) ? _O_r_X (0xd1 ,_b100 ,MD,MB,MI,MS ) : \ + _O_r_X_B (0xc1 ,_b100 ,MD,MB,MI,MS ,_u8(IM) ) ) +#define SHLLrr(RS,RD) (((RS)==_CL) ? _O_Mrm (0xd3 ,_b11,_b100,_r4(RD) ) : \ + JITFAIL ("source register must be CL" ) ) +#define SHLLrm(RS,MD,MB,MS,MI) (((RS)==_CL) ? _O_r_X (0xd3 ,_b100 ,MD,MB,MI,MS ) : \ + JITFAIL ("source register must be CL" ) ) + + +#define SHRBir(IM,RD) (((IM)==1) ? _O_Mrm (0xd0 ,_b11,_b101,_r1(RD) ) : \ + _O_Mrm_B (0xc0 ,_b11,_b101,_r1(RD) ,_u8(IM) ) ) +#define SHRBim(IM,MD,MB,MS,MI) (((IM)==1) ? _O_r_X (0xd0 ,_b101 ,MD,MB,MI,MS ) : \ + _O_r_X_B (0xc0 ,_b101 ,MD,MB,MI,MS ,_u8(IM) ) ) +#define SHRBrr(RS,RD) (((RS)==_CL) ? _O_Mrm (0xd2 ,_b11,_b101,_r1(RD) ) : \ + JITFAIL ("source register must be CL" ) ) +#define SHRBrm(RS,MD,MB,MS,MI) (((RS)==_CL) ? _O_r_X (0xd2 ,_b101 ,MD,MB,MI,MS ) : \ + JITFAIL ("source register must be CL" ) ) + +#define SHRWir(IM,RD) (((IM)==1) ? _wO_Mrm (0xd1 ,_b11,_b101,_r2(RD) ) : \ + _wO_Mrm_B (0xc1 ,_b11,_b101,_r2(RD) ,_u8(IM) ) ) +#define SHRWim(IM,MD,MB,MS,MI) (((IM)==1) ? _wO_r_X (0xd1 ,_b101 ,MD,MB,MI,MS ) : \ + _wO_r_X_B (0xc1 ,_b101 ,MD,MB,MI,MS ,_u8(IM) ) ) +#define SHRWrr(RS,RD) (((RS)==_CL) ? _wO_Mrm (0xd3 ,_b11,_b101,_r2(RD) ) : \ + JITFAIL ("source register must be CL" ) ) +#define SHRWrm(RS,MD,MB,MS,MI) (((RS)==_CL) ? _wO_r_X (0xd3 ,_b101 ,MD,MB,MI,MS ) : \ + JITFAIL ("source register must be CL" ) ) + +#define SHRLir(IM,RD) (((IM)==1) ? _O_Mrm (0xd1 ,_b11,_b101,_r4(RD) ) : \ + _O_Mrm_B (0xc1 ,_b11,_b101,_r4(RD) ,_u8(IM) ) ) +#define SHRLim(IM,MD,MB,MS,MI) (((IM)==1) ? _O_r_X (0xd1 ,_b101 ,MD,MB,MI,MS ) : \ + _O_r_X_B (0xc1 ,_b101 ,MD,MB,MI,MS ,_u8(IM) ) ) +#define SHRLrr(RS,RD) (((RS)==_CL) ? _O_Mrm (0xd3 ,_b11,_b101,_r4(RD) ) : \ + JITFAIL ("source register must be CL" ) ) +#define SHRLrm(RS,MD,MB,MS,MI) (((RS)==_CL) ? _O_r_X (0xd3 ,_b101 ,MD,MB,MI,MS ) : \ + JITFAIL ("source register must be CL" ) ) + + +#define STC_() _O (0xf9 ) + + +#define SUBBrr(RS, RD) _O_Mrm (0x28 ,_b11,_r1(RS),_r1(RD) ) +#define SUBBmr(MD, MB, MI, MS, RD) _O_r_X (0x2a ,_r1(RD) ,MD,MB,MI,MS ) +#define SUBBrm(RS, MD, MB, MI, MS) _O_r_X (0x28 ,_r1(RS) ,MD,MB,MI,MS ) +#define SUBBir(IM, RD) _O_Mrm_B (0x80 ,_b11,_b101 ,_r1(RD) ,_su8(IM)) +#define SUBBim(IM, MD, MB, MI, MS) _O_r_X_B (0x80 ,_b101 ,MD,MB,MI,MS ,_su8(IM)) + +#define SUBWrr(RS, RD) _wO_Mrm (0x29 ,_b11,_r2(RS),_r2(RD) ) +#define SUBWmr(MD, MB, MI, MS, RD) _wO_r_X (0x2b ,_r2(RD) ,MD,MB,MI,MS ) +#define SUBWrm(RS, MD, MB, MI, MS) _wO_r_X (0x29 ,_r2(RS) ,MD,MB,MI,MS ) +#define SUBWir(IM, RD) _wOs_Mrm_sW (0x81 ,_b11,_b101 ,_r2(RD) ,_su16(IM)) +#define SUBWim(IM, MD, MB, MI, MS) _wOs_r_X_sW (0x81 ,_b101 ,MD,MB,MI,MS ,_su16(IM)) + +#define SUBLrr(RS, RD) _O_Mrm (0x29 ,_b11,_r4(RS),_r4(RD) ) +#define SUBLmr(MD, MB, MI, MS, RD) _O_r_X (0x2b ,_r4(RD) ,MD,MB,MI,MS ) +#define SUBLrm(RS, MD, MB, MI, MS) _O_r_X (0x29 ,_r4(RS) ,MD,MB,MI,MS ) +#define SUBLir(IM, RD) _Os_Mrm_sL (0x81 ,_b11,_b101 ,_r4(RD) ,IM ) +#define SUBLim(IM, MD, MB, MI, MS) _Os_r_X_sL (0x81 ,_b101 ,MD,MB,MI,MS ,IM ) + + +#define TESTBrr(RS, RD) _O_Mrm (0x84 ,_b11,_r1(RS),_r1(RD) ) +#define TESTBrm(RS, MD, MB, MI, MS) _O_r_X (0x84 ,_r1(RS) ,MD,MB,MI,MS ) +#define TESTBir(IM, RD) _O_Mrm_B (0xf6 ,_b11,_b000 ,_r1(RD) ,_u8(IM)) +#define TESTBim(IM, MD, MB, MI, MS) _O_r_X_B (0xf6 ,_b000 ,MD,MB,MI,MS ,_u8(IM)) + +#define TESTWrr(RS, RD) _wO_Mrm (0x85 ,_b11,_r2(RS),_r2(RD) ) +#define TESTWrm(RS, MD, MB, MI, MS) _wO_r_X (0x85 ,_r2(RS) ,MD,MB,MI,MS ) +#define TESTWir(IM, RD) _wO_Mrm_W (0xf7 ,_b11,_b000 ,_r2(RD) ,_u16(IM)) +#define TESTWim(IM, MD, MB, MI, MS) _wO_r_X_W (0xf7 ,_b000 ,MD,MB,MI,MS ,_u16(IM)) + +#define TESTLrr(RS, RD) _O_Mrm (0x85 ,_b11,_r4(RS),_r4(RD) ) +#define TESTLrm(RS, MD, MB, MI, MS) _O_r_X (0x85 ,_r4(RS) ,MD,MB,MI,MS ) +#define TESTLir(IM, RD) _O_Mrm_L (0xf7 ,_b11,_b000 ,_r4(RD) ,IM ) +#define TESTLim(IM, MD, MB, MI, MS) _O_r_X_L (0xf7 ,_b000 ,MD,MB,MI,MS ,IM ) + + +#define XADDBrr(RS,RD) _OO_Mrm (0x0fc0 ,_b11,_r1(RS),_r1(RD) ) +#define XADDBrm(RS,MD,MB,MI,MS) _OO_r_X (0x0fc0 ,_r1(RS) ,MD,MB,MI,MS ) + +#define XADDWrr(RS,RD) _wOO_Mrm (0x0fc1 ,_b11,_r2(RS),_r2(RD) ) +#define XADDWrm(RS,MD,MB,MI,MS) _wOO_r_X (0x0fc1 ,_r2(RS) ,MD,MB,MI,MS ) + +#define XADDLrr(RS,RD) _OO_Mrm (0x0fc1 ,_b11,_r4(RS),_r4(RD) ) +#define XADDLrm(RS,MD,MB,MI,MS) _OO_r_X (0x0fc1 ,_r4(RS) ,MD,MB,MI,MS ) + + +#define XCHGBrr(RS,RD) _O_Mrm (0x86 ,_b11,_r1(RS),_r1(RD) ) +#define XCHGBrm(RS,MD,MB,MI,MS) _O_r_X (0x86 ,_r1(RS) ,MD,MB,MI,MS ) + +#define XCHGWrr(RS,RD) _wO_Mrm (0x87 ,_b11,_r2(RS),_r2(RD) ) +#define XCHGWrm(RS,MD,MB,MI,MS) _wO_r_X (0x87 ,_r2(RS) ,MD,MB,MI,MS ) + +#define XCHGLrr(RS,RD) _O_Mrm (0x87 ,_b11,_r4(RS),_r4(RD) ) +#define XCHGLrm(RS,MD,MB,MI,MS) _O_r_X (0x87 ,_r4(RS) ,MD,MB,MI,MS ) + + +#define XORBrr(RS, RD) _O_Mrm (0x30 ,_b11,_r1(RS),_r1(RD) ) +#define XORBmr(MD, MB, MI, MS, RD) _O_r_X (0x32 ,_r1(RD) ,MD,MB,MI,MS ) +#define XORBrm(RS, MD, MB, MI, MS) _O_r_X (0x30 ,_r1(RS) ,MD,MB,MI,MS ) +#define XORBir(IM, RD) _O_Mrm_B (0x80 ,_b11,_b110 ,_r1(RD) ,_su8(IM)) +#define XORBim(IM, MD, MB, MI, MS) _O_r_X_B (0x80 ,_b110 ,MD,MB,MI,MS ,_su8(IM)) + +#define XORWrr(RS, RD) _wO_Mrm (0x31 ,_b11,_r2(RS),_r2(RD) ) +#define XORWmr(MD, MB, MI, MS, RD) _wO_r_X (0x33 ,_r2(RD) ,MD,MB,MI,MS ) +#define XORWrm(RS, MD, MB, MI, MS) _wO_r_X (0x31 ,_r2(RS) ,MD,MB,MI,MS ) +#define XORWir(IM, RD) _wOs_Mrm_sW (0x81 ,_b11,_b110 ,_r2(RD) ,_su16(IM)) +#define XORWim(IM, MD, MB, MI, MS) _wOs_r_X_sW (0x81 ,_b110 ,MD,MB,MI,MS ,_su16(IM)) + +#define XORLrr(RS, RD) _O_Mrm (0x31 ,_b11,_r4(RS),_r4(RD) ) +#define XORLmr(MD, MB, MI, MS, RD) _O_r_X (0x33 ,_r4(RD) ,MD,MB,MI,MS ) +#define XORLrm(RS, MD, MB, MI, MS) _O_r_X (0x31 ,_r4(RS) ,MD,MB,MI,MS ) +#define XORLir(IM, RD) _Os_Mrm_sL (0x81 ,_b11,_b110 ,_r4(RD) ,IM ) +#define XORLim(IM, MD, MB, MI, MS) _Os_r_X_sL (0x81 ,_b110 ,MD,MB,MI,MS ,IM ) + +/* x87 instructions -- yay, we found a use for octal constants :-) */ + +#define ESCmi(D,B,I,S,OP) _O_r_X(0xd8|(OP >> 3), (OP & 7), D,B,I,S) +#define ESCri(RD,OP) _O_Mrm(0xd8|(OP >> 3), _b11, (OP & 7), RD) + +#define ESCrri(RS,RD,OP) ((RS) == _ST0 ? ESCri(RD,(OP|040)) \ + : (RD) == _ST0 ? ESCri(RS,OP) \ + : JITFAIL ("coprocessor instruction without st0")) + +#define FLDSm(D,B,I,S) ESCmi(D,B,I,S,010) /* fld m32real */ +#define FILDLm(D,B,I,S) ESCmi(D,B,I,S,030) /* fild m32int */ +#define FLDLm(D,B,I,S) ESCmi(D,B,I,S,050) /* fld m64real */ +#define FILDWm(D,B,I,S) ESCmi(D,B,I,S,070) /* fild m16int */ +#define FSTSm(D,B,I,S) ESCmi(D,B,I,S,012) /* fst m32real */ +#define FISTLm(D,B,I,S) ESCmi(D,B,I,S,032) /* fist m32int */ +#define FSTLm(D,B,I,S) ESCmi(D,B,I,S,052) /* fst m64real */ +#define FISTWm(D,B,I,S) ESCmi(D,B,I,S,072) /* fist m16int */ +#define FSTPSm(D,B,I,S) ESCmi(D,B,I,S,013) /* fstp m32real */ +#define FISTPLm(D,B,I,S) ESCmi(D,B,I,S,033) /* fistp m32int */ +#define FSTPLm(D,B,I,S) ESCmi(D,B,I,S,053) /* fstp m64real */ +#define FISTPWm(D,B,I,S) ESCmi(D,B,I,S,073) /* fistp m16int */ +#define FLDTm(D,B,I,S) ESCmi(D,B,I,S,035) /* fld m80real */ +#define FILDQm(D,B,I,S) ESCmi(D,B,I,S,075) /* fild m64int */ +#define FSTPTm(D,B,I,S) ESCmi(D,B,I,S,037) /* fstp m80real */ +#define FISTPQm(D,B,I,S) ESCmi(D,B,I,S,077) /* fistp m64int */ + +#define FADDrr(RS,RD) ESCrri(RS,RD,000) +#define FMULrr(RS,RD) ESCrri(RS,RD,001) +#define FSUBrr(RS,RD) ESCrri(RS,RD,004) +#define FSUBRrr(RS,RD) ESCrri(RS,RD,005) +#define FDIVrr(RS,RD) ESCrri(RS,RD,006) +#define FDIVRrr(RS,RD) ESCrri(RS,RD,007) + +#define FLDr(RD) ESCri(RD,010) +#define FXCHr(RD) ESCri(RD,011) +#define FFREEr(RD) ESCri(RD,050) +#define FSTr(RD) ESCri(RD,052) +#define FSTPr(RD) ESCri(RD,053) +#define FCOMr(RD) ESCri(RD,002) +#define FCOMPr(RD) ESCri(RD,003) +#define FCOMIr(RD) ESCri(RD,036) +#define FCOMIPr(RD) ESCri(RD,076) +#define FUCOMr(RD) ESCri(RD,054) +#define FUCOMPr(RD) ESCri(RD,055) +#define FUCOMIr(RD) ESCri(RD,035) +#define FUCOMIPr(RD) ESCri(RD,075) +#define FADDPr(RD) ESCri(RD,060) +#define FMULPr(RD) ESCri(RD,061) +#define FSUBPr(RD) ESCri(RD,064) +#define FSUBRPr(RD) ESCri(RD,065) +#define FDIVPr(RD) ESCri(RD,066) +#define FDIVRPr(RD) ESCri(RD,067) + +#define FNSTSWr(RD) ((RD == _AX || RD == _EAX) ? _OO (0xdfe0) \ + : JITFAIL ("AX or EAX expected")) +/* N byte NOPs */ +#define NOPi(N) ((( (N) >= 8) ? (_jit_B(0x8d),_jit_B(0xb4),_jit_B(0x26),_jit_L(0x00),_jit_B(0x90)) : (void) 0), \ + (( ((N)&7) == 7) ? (_jit_B(0x8d),_jit_B(0xb4),_jit_B(0x26),_jit_L(0x00)) : \ + ( ((N)&7) == 6) ? (_jit_B(0x8d),_jit_B(0xb6),_jit_L(0x00)) : \ + ( ((N)&7) == 5) ? (_jit_B(0x90),_jit_B(0x8d),_jit_B(0x74),_jit_B(0x26),_jit_B(0x00)) : \ +/* leal 0(,%esi), %esi */ ( ((N)&7) == 4) ? (_jit_B(0x8d),_jit_B(0x74),_jit_B(0x26),_jit_B(0x00)) : \ +/* leal (,%esi), %esi */ ( ((N)&7) == 3) ? (_jit_B(0x8d),_jit_B(0x76),_jit_B(0x00)) : \ +/* movl %esi, %esi */ ( ((N)&7) == 2) ? (_jit_B(0x89),_jit_B(0xf6)) : \ + ( ((N)&7) == 1) ? (_jit_B(0x90)) : \ + ( ((N)&7) == 0) ? 0 : \ + JITFAIL(".align argument too large"))) + + +/*** References: */ +/* */ +/* [1] "Intel Architecture Software Developer's Manual Volume 1: Basic Architecture", */ +/* Intel Corporation 1997. */ +/* */ +/* [2] "Intel Architecture Software Developer's Manual Volume 2: Instruction Set Reference", */ +/* Intel Corporation 1997. */ + +#endif +#endif /* __lightning_asm_h */ + diff --git a/src/runtime/c/pgf/lightning/i386/core.h b/src/runtime/c/pgf/lightning/i386/core.h new file mode 100644 index 000000000..dd9d58a87 --- /dev/null +++ b/src/runtime/c/pgf/lightning/i386/core.h @@ -0,0 +1,413 @@ +/******************************** -*- C -*- **************************** + * + * Platform-independent layer (i386 version) + * + ***********************************************************************/ + + +/*********************************************************************** + * + * Copyright 2000, 2001, 2002, 2003 Free Software Foundation, Inc. + * Written by Paolo Bonzini. + * + * This file is part of GNU lightning. + * + * GNU lightning is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 2.1, or (at your option) + * any later version. + * + * GNU lightning is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with GNU lightning; see the file COPYING.LESSER; if not, write to the + * Free Software Foundation, 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + ***********************************************************************/ + + + +#ifndef __lightning_core_h +#define __lightning_core_h + +#define JIT_FP _EBP +#define JIT_SP _ESP +#define JIT_RET _EAX + +#define JIT_R_NUM 3 +#define JIT_V_NUM 3 +#define JIT_R(i) (_EAX + (i)) +#define JIT_V(i) ((i) == 0 ? _EBX : _ESI + (i) - 1) + +struct jit_local_state { + int framesize; + int argssize; +}; + +/* 3-parameter operation */ +#define jit_opr_(d, s1, s2, op1d, op2d) \ + ( (s2 == d) ? op1d : \ + ( ((s1 == d) ? (void)0 : (void)MOVLrr(s1, d)), op2d ) \ + ) + +/* 3-parameter operation, with immediate */ +#define jit_op_(d, s1, op2d) \ + ((s1 == d) ? op2d : (MOVLrr(s1, d), op2d)) \ + +/* 3-parameter operation, optimizable */ +#define jit_opo_(d, s1, s2, op1d, op2d, op12d) \ + ((s2 == d) ? op2d : \ + ((s1 == d) ? op1d : op12d)) + +/* 3-parameter operation, optimizable, with immediate */ +#define jit_opi_(d, rs, opdi, opdri) \ + ((rs == d) ? opdi : opdri) + +/* An operand is forced into a register */ +#define jit_replace(rd, rs, forced, op) \ + ((rd == forced) ? JITSORRY("Register conflict for " # op) : \ + (rs == forced) ? op : (PUSHLr(forced), MOVLrr(rs, forced), op, POPLr(forced))) + +/* For LT, LE, ... */ +#define jit_replace8(d, op) \ + (jit_check8(d) \ + ? (MOVLir(0, d), op(d)) \ + : (PUSHLr(_EAX), MOVLir(0, _EAX), op(_EAX), MOVLrr(_EAX, (d)), POPLr(_EAX))) + +#define jit_bool_r(d, s1, s2, op) \ + (CMPLrr(s2, s1), jit_replace8(d, op)) + +#define jit_bool_i(d, rs, is, op) \ + (CMPLir(is, rs), jit_replace8(d, op)) + +/* When CMP with 0 can be replaced with TEST */ +#define jit_bool_i0(d, rs, is, op, op0) \ + ((is) != 0 \ + ? (CMPLir(is, rs), jit_replace8(d, op)) \ + : (TESTLrr(rs, rs), jit_replace8(d, op0))) + +/* For BLT, BLE, ... */ +#define jit_bra_r(s1, s2, op) (CMPLrr(s2, s1), op, _jit.x.pc) +#define jit_bra_i(rs, is, op) (CMPLir(is, rs), op, _jit.x.pc) + +/* When CMP with 0 can be replaced with TEST */ +#define jit_bra_i0(rs, is, op, op0) \ + ( (is) == 0 ? (TESTLrr(rs, rs), op0, _jit.x.pc) : (CMPLir(is, rs), op, _jit.x.pc)) + +/* Used to implement ldc, stc, ... */ +#define jit_check8(rs) ( (rs) <= _EBX ) +#define jit_reg8(rs) ( ((rs) == _SI || (rs) == _DI) ? _AL : ((rs) & _BH) | _AL ) +#define jit_reg16(rs) ( ((rs) & _BH) | _AX ) + +/* In jit_replace below, _EBX is dummy */ +#define jit_movbrm(rs, dd, db, di, ds) \ + (jit_check8(rs) \ + ? MOVBrm(jit_reg8(rs), dd, db, di, ds) \ + : jit_replace(_EBX, rs, _EAX, MOVBrm(_AL, dd, db, di, ds))) + +/* Reduce arguments of XOR/OR/TEST */ +#define jit_reduce_(op) op +#define jit_reduce(op, is, rs) \ + (_u8P(is) && jit_check8(rs) ? jit_reduce_(op##Bir(is, jit_reg8(rs))) : \ + (_u16P(is) ? jit_reduce_(op##Wir(is, jit_reg16(rs))) : \ + jit_reduce_(op##Lir(is, rs)) )) + +/* Helper macros for MUL/DIV/IDIV */ +#define jit_might(d, s1, op) \ + ((s1 == d) ? 0 : op) + +#define jit_mulr_ui_(s1, s2) jit_opr_(_EAX, s1, s2, MULLr(s1), MULLr(s2)) +#define jit_mulr_i_(s1, s2) jit_opr_(_EAX, s1, s2, IMULLr(s1), IMULLr(s2)) + + +#define jit_muli_i_(is, rs) \ + (MOVLir(is, rs == _EAX ? _EDX : _EAX), \ + IMULLr(rs == _EAX ? _EDX : rs)) + +#define jit_muli_ui_(is, rs) \ + (MOVLir(is, rs == _EAX ? _EDX : _EAX), \ + IMULLr(rs == _EAX ? _EDX : rs)) + +#define jit_divi_i_(result, d, rs, is) \ + (jit_might (d, _EAX, PUSHLr(_EAX)), \ + jit_might (d, _ECX, PUSHLr(_ECX)), \ + jit_might (d, _EDX, PUSHLr(_EDX)), \ + jit_might (rs, _EAX, MOVLrr(rs, _EAX)), \ + jit_might (rs, _EDX, MOVLrr(rs, _EDX)), \ + MOVLir(is, _ECX), \ + SARLir(31, _EDX), \ + IDIVLr(_ECX), \ + jit_might(d, result, MOVLrr(result, d)), \ + jit_might(d, _EDX, POPLr(_EDX)), \ + jit_might(d, _ECX, POPLr(_ECX)), \ + jit_might(d, _EAX, POPLr(_EAX))) + +#define jit_divr_i_(result, d, s1, s2) \ + (jit_might (d, _EAX, PUSHLr(_EAX)), \ + jit_might (d, _ECX, PUSHLr(_ECX)), \ + jit_might (d, _EDX, PUSHLr(_EDX)), \ + ((s1 == _ECX) ? PUSHLr(_ECX) : 0), \ + jit_might (s2, _ECX, MOVLrr(s2, _ECX)), \ + ((s1 == _ECX) ? POPLr(_EDX) : \ + jit_might (s1, _EDX, MOVLrr(s1, _EDX))), \ + MOVLrr(_EDX, _EAX), \ + SARLir(31, _EDX), \ + IDIVLr(_ECX), \ + jit_might(d, result, MOVLrr(result, d)), \ + jit_might(d, _EDX, POPLr(_EDX)), \ + jit_might(d, _ECX, POPLr(_ECX)), \ + jit_might(d, _EAX, POPLr(_EAX))) + +#define jit_divi_ui_(result, d, rs, is) \ + (jit_might (d, _EAX, PUSHLr(_EAX)), \ + jit_might (d, _ECX, PUSHLr(_ECX)), \ + jit_might (d, _EDX, PUSHLr(_EDX)), \ + jit_might (rs, _EAX, MOVLrr(rs, _EAX)), \ + MOVLir(is, _ECX), \ + XORLrr(_EDX, _EDX), \ + DIVLr(_ECX), \ + jit_might(d, result, MOVLrr(result, d)), \ + jit_might(d, _EDX, POPLr(_EDX)), \ + jit_might(d, _ECX, POPLr(_ECX)), \ + jit_might(d, _EAX, POPLr(_EAX))) + +#define jit_divr_ui_(result, d, s1, s2) \ + (jit_might (d, _EAX, PUSHLr(_EAX)), \ + jit_might (d, _ECX, PUSHLr(_ECX)), \ + jit_might (d, _EDX, PUSHLr(_EDX)), \ + ((s1 == _ECX) ? PUSHLr(_ECX) : 0), \ + jit_might (s2, _ECX, MOVLrr(s2, _ECX)), \ + ((s1 == _ECX) ? POPLr(_EAX) : \ + jit_might (s1, _EAX, MOVLrr(s1, _EAX))), \ + XORLrr(_EDX, _EDX), \ + DIVLr(_ECX), \ + jit_might(d, result, MOVLrr(result, d)), \ + jit_might(d, _EDX, POPLr(_EDX)), \ + jit_might(d, _ECX, POPLr(_ECX)), \ + jit_might(d, _EAX, POPLr(_EAX))) + + +/* ALU */ +#define jit_addi_i(d, rs, is) jit_opi_((d), (rs), ADDLir((is), (d)), LEALmr((is), (rs), 0, 0, (d)) ) +#define jit_addr_i(d, s1, s2) jit_opo_((d), (s1), (s2), ADDLrr((s2), (d)), ADDLrr((s1), (d)), LEALmr(0, (s1), (s2), 1, (d)) ) +#define jit_addci_i(d, rs, is) jit_op_ ((d), (rs), ADDLir((is), (d)) ) +#define jit_addcr_i(d, s1, s2) jit_opr_((d), (s1), (s2), ADDLrr((s1), (d)), ADDLrr((s2), (d)) ) +#define jit_addxi_i(d, rs, is) jit_op_ ((d), (rs), ADCLir((is), (d)) ) +#define jit_addxr_i(d, s1, s2) jit_opr_((d), (s1), (s2), ADCLrr((s1), (d)), ADCLrr((s2), (d)) ) +#define jit_andi_i(d, rs, is) jit_op_ ((d), (rs), ANDLir((is), (d)) ) +#define jit_andr_i(d, s1, s2) jit_opr_((d), (s1), (s2), ANDLrr((s1), (d)), ANDLrr((s2), (d)) ) +#define jit_orr_i(d, s1, s2) jit_opr_((d), (s1), (s2), ORLrr((s1), (d)), ORLrr((s2), (d)) ) +#define jit_subr_i(d, s1, s2) jit_opr_((d), (s1), (s2), (SUBLrr((s1), (d)), NEGLr(d)), SUBLrr((s2), (d)) ) +#define jit_subcr_i(d, s1, s2) jit_subr_i((d), (s1), (s2)) +#define jit_subxr_i(d, s1, s2) jit_opr_((d), (s1), (s2), SBBLrr((s1), (d)), SBBLrr((s2), (d)) ) +#define jit_subxi_i(d, rs, is) jit_op_ ((d), (rs), SBBLir((is), (d)) ) +#define jit_xorr_i(d, s1, s2) jit_opr_((d), (s1), (s2), XORLrr((s1), (d)), XORLrr((s2), (d)) ) + +/* These can sometimes use byte or word versions! */ +#define jit_ori_i(d, rs, is) jit_op_ ((d), (rs), jit_reduce(OR, (is), (d)) ) +#define jit_xori_i(d, rs, is) jit_op_ ((d), (rs), jit_reduce(XOR, (is), (d)) ) + +#define jit_muli_i(d, rs, is) jit_op_ ((d), (rs), IMULLir((is), (d)) ) +#define jit_mulr_i(d, s1, s2) jit_opr_((d), (s1), (s2), IMULLrr((s1), (d)), IMULLrr((s2), (d)) ) + +/* As far as low bits are concerned, signed and unsigned multiplies are + exactly the same. */ +#define jit_muli_ui(d, rs, is) jit_op_ ((d), (rs), IMULLir((is), (d)) ) +#define jit_mulr_ui(d, s1, s2) jit_opr_((d), (s1), (s2), IMULLrr((s1), (d)), IMULLrr((s2), (d)) ) + +#define jit_hmuli_i(d, rs, is) \ + ((d) == _EDX ? ( PUSHLr(_EAX), jit_muli_i_((is), (rs)), POPLr(_EAX) ) : \ + ((d) == _EAX ? (PUSHLr(_EDX), jit_muli_i_((is), (rs)), MOVLrr(_EDX, _EAX), POPLr(_EDX) ) : \ + (PUSHLr(_EDX), PUSHLr(_EAX), jit_muli_i_((is), (rs)), MOVLrr(_EDX, (d)), POPLr(_EAX), POPLr(_EDX) ))) + +#define jit_hmulr_i(d, s1, s2) \ + ((d) == _EDX ? ( PUSHLr(_EAX), jit_mulr_i_((s1), (s2)), POPLr(_EAX) ) : \ + ((d) == _EAX ? (PUSHLr(_EDX), jit_mulr_i_((s1), (s2)), MOVLrr(_EDX, _EAX), POPLr(_EDX) ) : \ + (PUSHLr(_EDX), PUSHLr(_EAX), jit_mulr_i_((s1), (s2)), MOVLrr(_EDX, (d)), POPLr(_EAX), POPLr(_EDX) ))) + +#define jit_hmuli_ui(d, rs, is) \ + ((d) == _EDX ? ( PUSHLr(_EAX), jit_muli_ui_((is), (rs)), POPLr(_EAX) ) : \ + ((d) == _EAX ? (PUSHLr(_EDX), jit_muli_ui_((is), (rs)), MOVLrr(_EDX, _EAX), POPLr(_EDX) ) : \ + (PUSHLr(_EDX), PUSHLr(_EAX), jit_muli_ui_((is), (rs)), MOVLrr(_EDX, (d)), POPLr(_EAX), POPLr(_EDX) ))) + +#define jit_hmulr_ui(d, s1, s2) \ + ((d) == _EDX ? ( PUSHLr(_EAX), jit_mulr_ui_((s1), (s2)), POPLr(_EAX) ) : \ + ((d) == _EAX ? (PUSHLr(_EDX), jit_mulr_ui_((s1), (s2)), MOVLrr(_EDX, _EAX), POPLr(_EDX) ) : \ + (PUSHLr(_EDX), PUSHLr(_EAX), jit_mulr_ui_((s1), (s2)), MOVLrr(_EDX, (d)), POPLr(_EAX), POPLr(_EDX) ))) + +#define jit_divi_i(d, rs, is) jit_divi_i_(_EAX, (d), (rs), (is)) +#define jit_divi_ui(d, rs, is) jit_divi_ui_(_EAX, (d), (rs), (is)) +#define jit_modi_i(d, rs, is) jit_divi_i_(_EDX, (d), (rs), (is)) +#define jit_modi_ui(d, rs, is) jit_divi_ui_(_EDX, (d), (rs), (is)) +#define jit_divr_i(d, s1, s2) jit_divr_i_(_EAX, (d), (s1), (s2)) +#define jit_divr_ui(d, s1, s2) jit_divr_ui_(_EAX, (d), (s1), (s2)) +#define jit_modr_i(d, s1, s2) jit_divr_i_(_EDX, (d), (s1), (s2)) +#define jit_modr_ui(d, s1, s2) jit_divr_ui_(_EDX, (d), (s1), (s2)) + + +/* Shifts */ +#define jit_lshi_i(d, rs, is) ((is) <= 3 ? LEALmr(0, 0, (rs), 1 << (is), (d)) : jit_op_ ((d), (rs), SHLLir((is), (d)) )) +#define jit_rshi_i(d, rs, is) jit_op_ ((d), (rs), SARLir((is), (d)) ) +#define jit_rshi_ui(d, rs, is) jit_op_ ((d), (rs), SHRLir((is), (d)) ) +#define jit_lshr_i(d, r1, r2) jit_replace((r1), (r2), _ECX, jit_op_ ((d), (r1), SHLLrr(_CL, (d)) )) +#define jit_rshr_i(d, r1, r2) jit_replace((r1), (r2), _ECX, jit_op_ ((d), (r1), SARLrr(_CL, (d)) )) +#define jit_rshr_ui(d, r1, r2) jit_replace((r1), (r2), _ECX, jit_op_ ((d), (r1), SHRLrr(_CL, (d)) )) + +/* Stack */ +#define jit_pushr_i(rs) PUSHLr(rs) +#define jit_popr_i(rs) POPLr(rs) +#define jit_prolog(n) (_jitl.framesize = 8, PUSHLr(_EBP), MOVLrr(_ESP, _EBP), PUSHLr(_EBX), PUSHLr(_ESI), PUSHLr(_EDI)) + +/* The += allows for stack pollution */ + +#define jit_prepare_i(ni) (_jitl.argssize += (ni)) +#define jit_prepare_f(nf) (_jitl.argssize += (nf)) +#define jit_prepare_d(nd) (_jitl.argssize += 2 * (nd)) +#define jit_pusharg_i(rs) PUSHLr(rs) +#define jit_finish(sub) (jit_calli((sub)), ADDLir(4 * _jitl.argssize, JIT_SP), _jitl.argssize = 0) +#define jit_finishr(reg) (jit_callr((reg)), ADDLir(4 * _jitl.argssize, JIT_SP), _jitl.argssize = 0) +#define jit_retval_i(rd) jit_movr_i ((rd), _EAX) + +#define jit_arg_c() ((_jitl.framesize += sizeof(int)) - sizeof(int)) +#define jit_arg_uc() ((_jitl.framesize += sizeof(int)) - sizeof(int)) +#define jit_arg_s() ((_jitl.framesize += sizeof(int)) - sizeof(int)) +#define jit_arg_us() ((_jitl.framesize += sizeof(int)) - sizeof(int)) +#define jit_arg_i() ((_jitl.framesize += sizeof(int)) - sizeof(int)) +#define jit_arg_ui() ((_jitl.framesize += sizeof(int)) - sizeof(int)) +#define jit_arg_l() ((_jitl.framesize += sizeof(long)) - sizeof(long)) +#define jit_arg_ul() ((_jitl.framesize += sizeof(long)) - sizeof(long)) +#define jit_arg_p() ((_jitl.framesize += sizeof(long)) - sizeof(long)) + +#define jit_arg_f() ((_jitl.framesize += sizeof(float)) - sizeof(float)) +#define jit_arg_d() ((_jitl.framesize += sizeof(double)) - sizeof(double)) + +/* Unary */ +#define jit_negr_i(d, rs) jit_opi_((d), (rs), NEGLr(d), (XORLrr((d), (d)), SUBLrr((rs), (d))) ) +#define jit_negr_l(d, rs) jit_opi_((d), (rs), NEGLr(d), (XORLrr((d), (d)), SUBLrr((rs), (d))) ) + +#define jit_movr_i(d, rs) ((rs) == (d) ? 0 : MOVLrr((rs), (d))) +#define jit_movi_i(d, is) ((is) ? MOVLir((is), (d)) : XORLrr ((d), (d)) ) +#define jit_movi_p(d, is) (MOVLir((is), (d)), _jit.x.pc) +#define jit_patch_movi(pa,pv) (*_PSL((pa) - 4) = _jit_SL((pv))) + +#define jit_ntoh_ui(d, rs) jit_op_((d), (rs), BSWAPLr(d)) +#define jit_ntoh_us(d, rs) jit_op_((d), (rs), RORWir(8, d)) + +/* Boolean */ +#define jit_ltr_i(d, s1, s2) jit_bool_r((d), (s1), (s2), SETLr ) +#define jit_ler_i(d, s1, s2) jit_bool_r((d), (s1), (s2), SETLEr ) +#define jit_gtr_i(d, s1, s2) jit_bool_r((d), (s1), (s2), SETGr ) +#define jit_ger_i(d, s1, s2) jit_bool_r((d), (s1), (s2), SETGEr ) +#define jit_eqr_i(d, s1, s2) jit_bool_r((d), (s1), (s2), SETEr ) +#define jit_ner_i(d, s1, s2) jit_bool_r((d), (s1), (s2), SETNEr ) +#define jit_ltr_ui(d, s1, s2) jit_bool_r((d), (s1), (s2), SETBr ) +#define jit_ler_ui(d, s1, s2) jit_bool_r((d), (s1), (s2), SETBEr ) +#define jit_gtr_ui(d, s1, s2) jit_bool_r((d), (s1), (s2), SETAr ) +#define jit_ger_ui(d, s1, s2) jit_bool_r((d), (s1), (s2), SETAEr ) + +#define jit_lti_i(d, rs, is) jit_bool_i0((d), (rs), (is), SETLr, SETSr ) +#define jit_lei_i(d, rs, is) jit_bool_i ((d), (rs), (is), SETLEr ) +#define jit_gti_i(d, rs, is) jit_bool_i ((d), (rs), (is), SETGr ) +#define jit_gei_i(d, rs, is) jit_bool_i0((d), (rs), (is), SETGEr, SETNSr ) +#define jit_eqi_i(d, rs, is) jit_bool_i0((d), (rs), (is), SETEr, SETEr ) +#define jit_nei_i(d, rs, is) jit_bool_i0((d), (rs), (is), SETNEr, SETNEr ) +#define jit_lti_ui(d, rs, is) jit_bool_i ((d), (rs), (is), SETBr ) +#define jit_lei_ui(d, rs, is) jit_bool_i0((d), (rs), (is), SETBEr, SETEr ) +#define jit_gti_ui(d, rs, is) jit_bool_i0((d), (rs), (is), SETAr, SETNEr ) +#define jit_gei_ui(d, rs, is) jit_bool_i0((d), (rs), (is), SETAEr, INCLr ) + +/* Jump */ +#define jit_bltr_i(label, s1, s2) jit_bra_r((s1), (s2), JLm(label, 0,0,0) ) +#define jit_bler_i(label, s1, s2) jit_bra_r((s1), (s2), JLEm(label,0,0,0) ) +#define jit_bgtr_i(label, s1, s2) jit_bra_r((s1), (s2), JGm(label, 0,0,0) ) +#define jit_bger_i(label, s1, s2) jit_bra_r((s1), (s2), JGEm(label,0,0,0) ) +#define jit_beqr_i(label, s1, s2) jit_bra_r((s1), (s2), JEm(label, 0,0,0) ) +#define jit_bner_i(label, s1, s2) jit_bra_r((s1), (s2), JNEm(label,0,0,0) ) +#define jit_bltr_ui(label, s1, s2) jit_bra_r((s1), (s2), JBm(label, 0,0,0) ) +#define jit_bler_ui(label, s1, s2) jit_bra_r((s1), (s2), JBEm(label,0,0,0) ) +#define jit_bgtr_ui(label, s1, s2) jit_bra_r((s1), (s2), JAm(label, 0,0,0) ) +#define jit_bger_ui(label, s1, s2) jit_bra_r((s1), (s2), JAEm(label,0,0,0) ) +#define jit_bmsr_i(label, s1, s2) (TESTLrr((s1), (s2)), JNZm(label,0,0,0), _jit.x.pc) +#define jit_bmcr_i(label, s1, s2) (TESTLrr((s1), (s2)), JZm(label,0,0,0), _jit.x.pc) +#define jit_boaddr_i(label, s1, s2) (ADDLrr((s2), (s1)), JOm(label,0,0,0), _jit.x.pc) +#define jit_bosubr_i(label, s1, s2) (SUBLrr((s2), (s1)), JOm(label,0,0,0), _jit.x.pc) +#define jit_boaddr_ui(label, s1, s2) (ADDLrr((s2), (s1)), JCm(label,0,0,0), _jit.x.pc) +#define jit_bosubr_ui(label, s1, s2) (SUBLrr((s2), (s1)), JCm(label,0,0,0), _jit.x.pc) + +#define jit_blti_i(label, rs, is) jit_bra_i0((rs), (is), JLm(label, 0,0,0), JSm(label, 0,0,0) ) +#define jit_blei_i(label, rs, is) jit_bra_i ((rs), (is), JLEm(label,0,0,0) ) +#define jit_bgti_i(label, rs, is) jit_bra_i ((rs), (is), JGm(label, 0,0,0) ) +#define jit_bgei_i(label, rs, is) jit_bra_i0((rs), (is), JGEm(label,0,0,0), JNSm(label,0,0,0) ) +#define jit_beqi_i(label, rs, is) jit_bra_i0((rs), (is), JEm(label, 0,0,0), JEm(label, 0,0,0) ) +#define jit_bnei_i(label, rs, is) jit_bra_i0((rs), (is), JNEm(label,0,0,0), JNEm(label,0,0,0) ) +#define jit_blti_ui(label, rs, is) jit_bra_i ((rs), (is), JBm(label, 0,0,0) ) +#define jit_blei_ui(label, rs, is) jit_bra_i0((rs), (is), JBEm(label,0,0,0), JEm(label, 0,0,0) ) +#define jit_bgti_ui(label, rs, is) jit_bra_i0((rs), (is), JAm(label, 0,0,0), JNEm(label,0,0,0) ) +#define jit_bgei_ui(label, rs, is) jit_bra_i ((rs), (is), JAEm(label,0,0,0) ) +#define jit_boaddi_i(label, rs, is) (ADDLir((is), (rs)), JOm(label,0,0,0), _jit.x.pc) +#define jit_bosubi_i(label, rs, is) (SUBLir((is), (rs)), JOm(label,0,0,0), _jit.x.pc) +#define jit_boaddi_ui(label, rs, is) (ADDLir((is), (rs)), JCm(label,0,0,0), _jit.x.pc) +#define jit_bosubi_ui(label, rs, is) (SUBLir((is), (rs)), JCm(label,0,0,0), _jit.x.pc) + +#define jit_bmsi_i(label, rs, is) (jit_reduce(TEST, (is), (rs)), JNZm(label,0,0,0), _jit.x.pc) +#define jit_bmci_i(label, rs, is) (jit_reduce(TEST, (is), (rs)), JZm(label,0,0,0), _jit.x.pc) + +#define jit_jmpi(label) (JMPm( ((unsigned long) (label)), 0, 0, 0), _jit.x.pc) +#define jit_calli(label) (CALLm( ((unsigned long) (label)), 0, 0, 0), _jit.x.pc) +#define jit_callr(reg) (CALLsr(reg)) +#define jit_jmpr(reg) JMPsr(reg) +#define jit_patch_at(jump_pc,v) (*_PSL((jump_pc) - 4) = _jit_SL((v) - (jump_pc))) +#define jit_ret() (POPLr(_EDI), POPLr(_ESI), POPLr(_EBX), POPLr(_EBP), RET_()) + +/* Memory */ +#define jit_ldi_c(d, is) MOVSBLmr((is), 0, 0, 0, (d)) +#define jit_ldr_c(d, rs) MOVSBLmr(0, (rs), 0, 0, (d)) +#define jit_ldxr_c(d, s1, s2) MOVSBLmr(0, (s1), (s2), 1, (d)) +#define jit_ldxi_c(d, rs, is) MOVSBLmr((is), (rs), 0, 0, (d)) + +#define jit_ldi_uc(d, is) MOVZBLmr((is), 0, 0, 0, (d)) +#define jit_ldr_uc(d, rs) MOVZBLmr(0, (rs), 0, 0, (d)) +#define jit_ldxr_uc(d, s1, s2) MOVZBLmr(0, (s1), (s2), 1, (d)) +#define jit_ldxi_uc(d, rs, is) MOVZBLmr((is), (rs), 0, 0, (d)) + +#define jit_sti_c(id, rs) jit_movbrm((rs), (id), 0, 0, 0) +#define jit_str_c(rd, rs) jit_movbrm((rs), 0, (rd), 0, 0) +#define jit_stxr_c(d1, d2, rs) jit_movbrm((rs), 0, (d1), (d2), 1) +#define jit_stxi_c(id, rd, rs) jit_movbrm((rs), (id), (rd), 0, 0) + +#define jit_ldi_s(d, is) MOVSWLmr((is), 0, 0, 0, (d)) +#define jit_ldr_s(d, rs) MOVSWLmr(0, (rs), 0, 0, (d)) +#define jit_ldxr_s(d, s1, s2) MOVSWLmr(0, (s1), (s2), 1, (d)) +#define jit_ldxi_s(d, rs, is) MOVSWLmr((is), (rs), 0, 0, (d)) + +#define jit_ldi_us(d, is) MOVZWLmr((is), 0, 0, 0, (d)) +#define jit_ldr_us(d, rs) MOVZWLmr(0, (rs), 0, 0, (d)) +#define jit_ldxr_us(d, s1, s2) MOVZWLmr(0, (s1), (s2), 1, (d)) +#define jit_ldxi_us(d, rs, is) MOVZWLmr((is), (rs), 0, 0, (d)) + +#define jit_sti_s(id, rs) MOVWrm(jit_reg16(rs), (id), 0, 0, 0) +#define jit_str_s(rd, rs) MOVWrm(jit_reg16(rs), 0, (rd), 0, 0) +#define jit_stxr_s(d1, d2, rs) MOVWrm(jit_reg16(rs), 0, (d1), (d2), 1) +#define jit_stxi_s(id, rd, rs) MOVWrm(jit_reg16(rs), (id), (rd), 0, 0) + +#define jit_ldi_i(d, is) MOVLmr((is), 0, 0, 0, (d)) +#define jit_ldr_i(d, rs) MOVLmr(0, (rs), 0, 0, (d)) +#define jit_ldxr_i(d, s1, s2) MOVLmr(0, (s1), (s2), 1, (d)) +#define jit_ldxi_i(d, rs, is) MOVLmr((is), (rs), 0, 0, (d)) + +#define jit_sti_i(id, rs) MOVLrm((rs), (id), 0, 0, 0) +#define jit_str_i(rd, rs) MOVLrm((rs), 0, (rd), 0, 0) +#define jit_stxr_i(d1, d2, rs) MOVLrm((rs), 0, (d1), (d2), 1) +#define jit_stxi_i(id, rd, rs) MOVLrm((rs), (id), (rd), 0, 0) + +/* Extra */ +#define jit_nop() NOP_() + +#define _jit_alignment(pc, n) (((pc ^ _MASK(4)) + 1) & _MASK(n)) +#define jit_align(n) NOPi(_jit_alignment(_jit_UL(_jit.x.pc), (n))) + +#endif /* __lightning_core_h */ diff --git a/src/runtime/c/pgf/lightning/i386/fp.h b/src/runtime/c/pgf/lightning/i386/fp.h new file mode 100644 index 000000000..0d2725563 --- /dev/null +++ b/src/runtime/c/pgf/lightning/i386/fp.h @@ -0,0 +1,347 @@ +/******************************** -*- C -*- **************************** + * + * Run-time assembler & support macros for the i386 math coprocessor + * + ***********************************************************************/ + + +/*********************************************************************** + * + * Copyright 2000, 2001, 2002, 2004 Free Software Foundation, Inc. + * Written by Paolo Bonzini. + * + * This file is part of GNU lightning. + * + * GNU lightning is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 2.1, or (at your option) + * any later version. + * + * GNU lightning is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with GNU lightning; see the file COPYING.LESSER; if not, write to the + * Free Software Foundation, 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + ***********************************************************************/ + + +#ifndef __lightning_asm_fp_h +#define __lightning_asm_fp_h + +/* We really must map the x87 stack onto a flat register file. In practice, + we can provide something sensible and make it work on the x86 using the + stack like a file of eight registers. + + We use six or seven registers so as to have some freedom + for floor, ceil, round, (and log, tan, atn and exp). + + Not hard at all, basically play with FXCH. FXCH is mostly free, + so the generated code is not bad. Of course we special case when one + of the operands turns out to be ST0. + + Here are the macros that actually do the trick. */ + +#define JIT_FPR_NUM 6 +#define JIT_FPR(i) (i) + +#define jit_fxch(rs, op) (((rs) != 0 ? FXCHr(rs) : 0), \ + op, ((rs) != 0 ? FXCHr(rs) : 0)) + +#define jit_fp_unary(rd, s1, op) \ + ((rd) == (s1) ? jit_fxch ((rd), op) \ + : (rd) == 0 ? (FSTPr (0), FLDr ((s1)-1), op) \ + : (FLDr ((s1)), op, FSTPr ((rd)))) + +#define jit_fp_binary(rd, s1, s2, op, opr) \ + ((rd) == (s1) ? \ + ((s2) == 0 ? opr(0, (rd)) \ + : (s2) == (s1) ? jit_fxch((rd), op(0, 0)) \ + : jit_fxch((rd), op((s2), 0))) \ + : (rd) == (s2) ? jit_fxch((s1), opr(0, (rd) == 0 ? (s1) : (rd))) \ + : (FLDr (s1), op(0, (s2)+1), FSTPr((rd)+1))) + +#define jit_addr_d(rd,s1,s2) jit_fp_binary((rd),(s1),(s2),FADDrr,FADDrr) +#define jit_subr_d(rd,s1,s2) jit_fp_binary((rd),(s1),(s2),FSUBrr,FSUBRrr) +#define jit_mulr_d(rd,s1,s2) jit_fp_binary((rd),(s1),(s2),FMULrr,FMULrr) +#define jit_divr_d(rd,s1,s2) jit_fp_binary((rd),(s1),(s2),FDIVrr,FDIVRrr) + +#define jit_abs_d(rd,rs) jit_fp_unary ((rd), (rs), _OO (0xd9e1)) +#define jit_negr_d(rd,rs) jit_fp_unary ((rd), (rs), _OO (0xd9e0)) +#define jit_sqrt_d(rd,rs) jit_fp_unary ((rd), (rs), _OO (0xd9fa)) + +/* - moves: + + move FPR0 to FPR3 + FST ST3 + + move FPR3 to FPR0 + FXCH ST3 + FST ST3 + + move FPR3 to FPR1 + FLD ST1 + FST ST4 Stack is rotated, so FPRn becomes STn+1 */ + +#define jit_movr_d(rd,s1) \ + ((s1) == (rd) ? 0 \ + : (s1) == 0 ? FSTr ((rd)) \ + : (rd) == 0 ? (FXCHr ((s1)), FSTr ((s1))) \ + : (FLDr ((s1)), FSTr ((rd)+1))) + +/* - loads: + + load into FPR0 + FSTP ST0 + FLD [FUBAR] + + load into FPR3 + FSTP ST3 Save old st0 into destination register + FLD [FUBAR] + FXCH ST3 Get back old st0 + + (and similarly for immediates, using the stack) */ + +#define jit_movi_f(rd,immf) \ + (_O (0x68), \ + *((float *) _jit.x.pc) = (float) immf, \ + _jit.x.uc_pc += sizeof (float), \ + jit_ldr_f((rd), _ESP), \ + ADDLir(4, _ESP)) + +union jit_double_imm { + double d; + int i[2]; +}; + +#define jit_movi_d(rd,immd) \ + (_O (0x68), \ + _jit.x.uc_pc[4] = 0x68, \ + ((union jit_double_imm *) (_jit.x.uc_pc + 5))->d = (double) immd, \ + *((int *) _jit.x.uc_pc) = ((union jit_double_imm *) (_jit.x.uc_pc + 5))->i[1], \ + _jit.x.uc_pc += 9, \ + jit_ldr_d((rd), _ESP), \ + ADDLir(8, _ESP)) + +#define jit_ldi_f(rd, is) \ + ((rd) == 0 ? (FSTPr (0), FLDSm((is), 0, 0, 0)) \ + : (FLDSm((is), 0, 0, 0), FSTPr ((rd) + 1))) + +#define jit_ldi_d(rd, is) \ + ((rd) == 0 ? (FSTPr (0), FLDLm((is), 0, 0, 0)) \ + : (FLDLm((is), 0, 0, 0), FSTPr ((rd) + 1))) + +#define jit_ldr_f(rd, rs) \ + ((rd) == 0 ? (FSTPr (0), FLDSm(0, (rs), 0, 0)) \ + : (FLDSm(0, (rs), 0, 0), FSTPr ((rd) + 1))) + +#define jit_ldr_d(rd, rs) \ + ((rd) == 0 ? (FSTPr (0), FLDLm(0, (rs), 0, 0)) \ + : (FLDLm(0, (rs), 0, 0), FSTPr ((rd) + 1))) + +#define jit_ldxi_f(rd, rs, is) \ + ((rd) == 0 ? (FSTPr (0), FLDSm((is), (rs), 0, 0)) \ + : (FLDSm((is), (rs), 0, 0), FSTPr ((rd) + 1))) + +#define jit_ldxi_d(rd, rs, is) \ + ((rd) == 0 ? (FSTPr (0), FLDLm((is), (rs), 0, 0)) \ + : (FLDLm((is), (rs), 0, 0), FSTPr ((rd) + 1))) + +#define jit_ldxr_f(rd, s1, s2) \ + ((rd) == 0 ? (FSTPr (0), FLDSm(0, (s1), (s2), 1)) \ + : (FLDSm(0, (s1), (s2), 1), FSTPr ((rd) + 1))) + +#define jit_ldxr_d(rd, s1, s2) \ + ((rd) == 0 ? (FSTPr (0), FLDLm(0, (s1), (s2), 1)) \ + : (FLDLm(0, (s1), (s2), 1), FSTPr ((rd) + 1))) + +#define jit_extr_i_d(rd, rs) (PUSHLr((rs)), \ + ((rd) == 0 ? (FSTPr (0), FILDLm(0, _ESP, 0, 0)) \ + : (FILDLm(0, _ESP, 0, 0), FSTPr ((rd) + 1))), \ + POPLr((rs))) + +#define jit_stxi_f(id, rd, rs) jit_fxch ((rs), FSTSm((id), (rd), 0, 0)) +#define jit_stxr_f(d1, d2, rs) jit_fxch ((rs), FSTSm(0, (d1), (d2), 1)) +#define jit_stxi_d(id, rd, rs) jit_fxch ((rs), FSTLm((id), (rd), 0, 0)) +#define jit_stxr_d(d1, d2, rs) jit_fxch ((rs), FSTLm(0, (d1), (d2), 1)) +#define jit_sti_f(id, rs) jit_fxch ((rs), FSTSm((id), 0, 0, 0)) +#define jit_str_f(rd, rs) jit_fxch ((rs), FSTSm(0, (rd), 0, 0)) +#define jit_sti_d(id, rs) jit_fxch ((rs), FSTLm((id), 0, 0, 0)) +#define jit_str_d(rd, rs) jit_fxch ((rs), FSTLm(0, (rd), 0, 0)) + +/* Assume round to near mode */ +#define jit_floorr_d_i(rd, rs) \ + (FLDr (rs), jit_floor2((rd), ((rd) == _EDX ? _EAX : _EDX))) + +#define jit_ceilr_d_i(rd, rs) \ + (FLDr (rs), jit_ceil2((rd), ((rd) == _EDX ? _EAX : _EDX))) + +#define jit_truncr_d_i(rd, rs) \ + (FLDr (rs), jit_trunc2((rd), ((rd) == _EDX ? _EAX : _EDX))) + +#define jit_calc_diff(ofs) \ + FISTLm(ofs, _ESP, 0, 0), \ + FILDLm(ofs, _ESP, 0, 0), \ + FSUBRPr(1), \ + FSTPSm(4+ofs, _ESP, 0, 0) \ + +/* The real meat */ +#define jit_floor2(rd, aux) \ + (PUSHLr(aux), \ + SUBLir(8, _ESP), \ + jit_calc_diff(0), \ + POPLr(rd), /* floor in rd */ \ + POPLr(aux), /* x-round(x) in aux */ \ + ADDLir(0x7FFFFFFF, aux), /* carry if x-round(x) < -0 */ \ + SBBLir(0, rd), /* subtract 1 if carry */ \ + POPLr(aux)) + +#define jit_ceil2(rd, aux) \ + (PUSHLr(aux), \ + SUBLir(8, _ESP), \ + jit_calc_diff(0), \ + POPLr(rd), /* floor in rd */ \ + POPLr(aux), /* x-round(x) in aux */ \ + TESTLrr(aux, aux), \ + SETGr(jit_reg8(aux)), \ + SHRLir(1, aux), \ + ADCLir(0, rd), \ + POPLr(aux)) + +/* a mingling of the two above */ +#define jit_trunc2(rd, aux) \ + (PUSHLr(aux), \ + SUBLir(12, _ESP), \ + FSTSm(0, _ESP, 0, 0), \ + jit_calc_diff(4), \ + POPLr(aux), \ + POPLr(rd), \ + TESTLrr(aux, aux), \ + POPLr(aux), \ + JSSm(_jit.x.pc + 11, 0, 0, 0), \ + ADDLir(0x7FFFFFFF, aux), /* 6 */ \ + SBBLir(0, rd), /* 3 */ \ + JMPSm(_jit.x.pc + 10, 0, 0, 0), /* 2 */ \ + TESTLrr(aux, aux), /* 2 */ \ + SETGr(jit_reg8(aux)), /* 3 */ \ + SHRLir(1, aux), /* 2 */ \ + ADCLir(0, rd), /* 3 */ \ + POPLr(aux)) + +/* the easy one */ +#define jit_roundr_d_i(rd, rs) \ + (PUSHLr(_EAX), \ + jit_fxch ((rs), FISTPLm(0, _ESP, 0, 0)), \ + POPLr((rd))) + +#define jit_fp_test(d, s1, s2, n, _and, res) \ + (((s1) == 0 ? FUCOMr((s2)) : (FLDr((s1)), FUCOMPr((s2) + 1))), \ + ((d) != _EAX ? MOVLrr(_EAX, (d)) : 0), \ + FNSTSWr(_EAX), \ + SHRLir(n, _EAX), \ + ((_and) ? ANDLir((_and), _EAX) : MOVLir(0, _EAX)), \ + res, \ + ((d) != _EAX ? _O (0x90 + ((d) & 7)) : 0)) /* xchg */ + +#define jit_fp_btest(d, s1, s2, n, _and, cmp, res) \ + (((s1) == 0 ? FUCOMr((s2)) : (FLDr((s1)), FUCOMPr((s2) + 1))), \ + PUSHLr(_EAX), \ + FNSTSWr(_EAX), \ + SHRLir(n, _EAX), \ + ((_and) ? ANDLir ((_and), _EAX) : 0), \ + ((cmp) ? CMPLir ((cmp), _AL) : 0), \ + POPLr(_EAX), \ + res ((d), 0, 0, 0)) + +#define jit_nothing_needed(x) + +/* After FNSTSW we have 1 if <, 40 if =, 0 if >, 45 if unordered. Here + is how to map the values of the status word's high byte to the + conditions. + + < = > unord valid values condition + gt no no yes no 0 STSW & 45 == 0 + lt yes no no no 1 STSW & 45 == 1 + eq no yes no no 40 STSW & 45 == 40 + unord no no no yes 45 bit 2 == 1 + + ge no yes no no 0, 40 bit 0 == 0 + unlt yes no no yes 1, 45 bit 0 == 1 + ltgt yes no yes no 0, 1 bit 6 == 0 + uneq no yes no yes 40, 45 bit 6 == 1 + le yes yes no no 1, 40 odd parity for STSW & 41 + ungt no no yes yes 0, 45 even parity for STSW & 41 + + unle yes yes no yes 1, 40, 45 STSW & 45 != 0 + unge no yes yes yes 0, 40, 45 STSW & 45 != 1 + ne yes no yes yes 0, 1, 45 STSW & 45 != 40 + ord yes yes yes no 0, 1, 40 bit 2 == 0 + + lt, le, ungt, unge are actually computed as gt, ge, unlt, unle with + the operands swapped; it is more efficient this way. */ + +#define jit_gtr_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 8, 0x45, SETZr (_AL)) +#define jit_ger_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 9, 0, SBBBir (-1, _AL)) +#define jit_unler_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 8, 0x45, SETNZr (_AL)) +#define jit_unltr_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 9, 0, ADCBir (0, _AL)) +#define jit_ltr_d(d, s1, s2) jit_fp_test((d), (s2), (s1), 8, 0x45, SETZr (_AL)) +#define jit_ler_d(d, s1, s2) jit_fp_test((d), (s2), (s1), 9, 0, SBBBir (-1, _AL)) +#define jit_unger_d(d, s1, s2) jit_fp_test((d), (s2), (s1), 8, 0x45, SETNZr (_AL)) +#define jit_ungtr_d(d, s1, s2) jit_fp_test((d), (s2), (s1), 9, 0, ADCBir (0, _AL)) +#define jit_eqr_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 8, 0x45, (CMPBir (0x40, _AL), SETEr (_AL))) +#define jit_ner_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 8, 0x45, (CMPBir (0x40, _AL), SETNEr (_AL))) +#define jit_ltgtr_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 15, 0, SBBBir (-1, _AL)) +#define jit_uneqr_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 15, 0, ADCBir (0, _AL)) +#define jit_ordr_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 11, 0, SBBBir (-1, _AL)) +#define jit_unordr_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 11, 0, ADCBir (0, _AL)) + +#define jit_bgtr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 8, 0x45, 0, JZm) +#define jit_bger_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 9, 0, 0, JNCm) +#define jit_bunler_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 8, 0x45, 0, JNZm) +#define jit_bunltr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 9, 0, 0, JCm) +#define jit_bltr_d(d, s1, s2) jit_fp_btest((d), (s2), (s1), 8, 0x45, 0, JZm) +#define jit_bler_d(d, s1, s2) jit_fp_btest((d), (s2), (s1), 9, 0, 0, JNCm) +#define jit_bunger_d(d, s1, s2) jit_fp_btest((d), (s2), (s1), 8, 0x45, 0, JNZm) +#define jit_bungtr_d(d, s1, s2) jit_fp_btest((d), (s2), (s1), 9, 0, 0, JCm) +#define jit_beqr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 8, 0x45, 0x40, JZm) +#define jit_bner_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 8, 0x45, 0x40, JNZm) +#define jit_bltgtr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 15, 0, 0, JNCm) +#define jit_buneqr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 15, 0, 0, JCm) +#define jit_bordr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 11, 0, 0, JNCm) +#define jit_bunordr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 11, 0, 0, JCm) + +#define jit_getarg_f(rd, ofs) jit_ldxi_f((rd), JIT_FP,(ofs)) +#define jit_getarg_d(rd, ofs) jit_ldxi_d((rd), JIT_FP,(ofs)) +#define jit_pusharg_d(rs) (jit_subi_i(JIT_SP,JIT_SP,sizeof(double)), jit_str_d(JIT_SP,(rs))) +#define jit_pusharg_f(rs) (jit_subi_i(JIT_SP,JIT_SP,sizeof(float)), jit_str_f(JIT_SP,(rs))) +#define jit_retval_d(op1) jit_movr_d(0, (op1)) + + +#if 0 +#define jit_sin() _OO(0xd9fe) /* fsin */ +#define jit_cos() _OO(0xd9ff) /* fcos */ +#define jit_tan() (_OO(0xd9f2), /* fptan */ \ + FSTPr(0)) /* fstp st */ +#define jit_atn() (_OO(0xd9e8), /* fld1 */ \ + _OO(0xd9f3)) /* fpatan */ +#define jit_exp() (_OO(0xd9ea), /* fldl2e */ \ + FMULPr(1), /* fmulp */ \ + _OO(0xd9c0), /* fld st */ \ + _OO(0xd9fc), /* frndint */ \ + _OO(0xdce9), /* fsubr */ \ + FXCHr(1), /* fxch st(1) */ \ + _OO(0xd9f0), /* f2xm1 */ \ + _OO(0xd9e8), /* fld1 */ \ + _OO(0xdec1), /* faddp */ \ + _OO(0xd9fd), /* fscale */ \ + FSTPr(1)) /* fstp st(1) */ +#define jit_log() (_OO(0xd9ed), /* fldln2 */ \ + FXCHr(1), /* fxch st(1) */ \ + _OO(0xd9f1)) /* fyl2x */ +#endif + +#endif /* __lightning_asm_h */ diff --git a/src/runtime/c/pgf/lightning/i386/funcs.h b/src/runtime/c/pgf/lightning/i386/funcs.h new file mode 100644 index 000000000..1e247049f --- /dev/null +++ b/src/runtime/c/pgf/lightning/i386/funcs.h @@ -0,0 +1,91 @@ +/******************************** -*- C -*- **************************** + * + * Platform-independent layer inline functions (i386) + * + ***********************************************************************/ + + +/*********************************************************************** + * + * Copyright 2000, 2001, 2002 Free Software Foundation, Inc. + * Written by Paolo Bonzini. + * + * This file is part of GNU lightning. + * + * GNU lightning is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 2.1, or (at your option) + * any later version. + * + * GNU lightning is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with GNU lightning; see the file COPYING.LESSER; if not, write to the + * Free Software Foundation, 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + ***********************************************************************/ + + + +#ifndef __lightning_funcs_h +#define __lightning_funcs_h + +#ifdef __linux__ +#include <unistd.h> +#include <sys/mman.h> +#endif + +static void +jit_flush_code(void *dest, void *end) +{ + /* On the x86, the PROT_EXEC bits are not handled by the MMU. + However, the kernel can emulate this by setting the code + segment's limit to the end address of the highest page + whose PROT_EXEC bit is set. + + Linux kernels that do so and that disable by default the + execution of the data and stack segment are becoming more + and more common (Fedora, for example), so we implement our + jit_flush_code as an mprotect. */ +#ifdef __linux__ + static unsigned long prev_page = 0, prev_length = 0; + int page, length; +#ifdef PAGESIZE + const int page_size = PAGESIZE; +#else + static int page_size = -1; + if (page_size == -1) + page_size = sysconf (_SC_PAGESIZE); +#endif + + page = (long) dest & ~(page_size - 1); + length = ((char *) end - (char *) page + page_size - 1) & ~(page_size - 1); + + /* Simple-minded attempt at optimizing the common case where a single + chunk of memory is used to compile multiple functions. */ + if (page >= prev_page && page + length <= prev_page + prev_length) + return; + + mprotect ((void *) page, length, PROT_READ | PROT_WRITE | PROT_EXEC); + + /* See if we can extend the previously mprotect'ed memory area towards + higher addresses: the starting address remains the same as before. */ + if (page >= prev_page && page <= prev_page + prev_length) + prev_length = page + length - prev_page; + + /* See if we can extend the previously mprotect'ed memory area towards + lower addresses: the highest address remains the same as before. */ + else if (page < prev_page && page + length <= prev_page + prev_length) + prev_length += prev_page - page, prev_page = page; + + /* Nothing to do, replace the area. */ + else + prev_page = page, prev_length = length; +#endif +} + +#endif /* __lightning_funcs_h */ |
