Gizmo3D

gzAssembler.h

Go to the documentation of this file.
00001 // *****************************************************************************
00002 // File         : gzAssembler.h
00003 // Module       : gzBase
00004 // Description  : Class definition of assembler opcodes.
00005 // Author       : Anders Modén      
00006 // Product      : GizmoBase 2.1.1
00007 //      
00008 // Copyright © 2003- Saab Training Systems AB, Sweden
00009 //          
00010 // NOTE:    GizmoBase is a platform abstraction utility layer for C++. It contains 
00011 //          design patterns and C++ solutions for the advanced programmer.
00012 //
00013 //
00014 // Revision History...                          
00015 //                                  
00016 // Who  Date    Description                     
00017 //                                  
00018 // AMO  000330  Created file 
00019 //
00020 // ******************************************************************************
00021 
00030 #ifndef __GZ_ASSEMBLER_H__
00031 #define __GZ_ASSEMBLER_H__
00032 
00033 #include "gzBasicTypes.h"
00034 
00035 // Stop the "no EMMS" warning, since it doesn't detect FEMMS properly
00036 
00037 #ifdef GZ_WIN32
00038 #pragma warning(disable:4799)
00039 #endif
00040 
00041 // Defines for operands.
00042 #define _K3D_MM0 0xc0
00043 #define _K3D_MM1 0xc1
00044 #define _K3D_MM2 0xc2
00045 #define _K3D_MM3 0xc3
00046 #define _K3D_MM4 0xc4
00047 #define _K3D_MM5 0xc5
00048 #define _K3D_MM6 0xc6
00049 #define _K3D_MM7 0xc7
00050 #define _K3D_mm0 0xc0
00051 #define _K3D_mm1 0xc1
00052 #define _K3D_mm2 0xc2
00053 #define _K3D_mm3 0xc3
00054 #define _K3D_mm4 0xc4
00055 #define _K3D_mm5 0xc5
00056 #define _K3D_mm6 0xc6
00057 #define _K3D_mm7 0xc7
00058 #define _K3D_EAX 0x00
00059 #define _K3D_ECX 0x01
00060 #define _K3D_EDX 0x02
00061 #define _K3D_EBX 0x03
00062 #define _K3D_ESI 0x06
00063 #define _K3D_EDI 0x07
00064 #define _K3D_eax 0x00
00065 #define _K3D_ecx 0x01
00066 #define _K3D_edx 0x02
00067 #define _K3D_ebx 0x03
00068 #define _K3D_esi 0x06
00069 #define _K3D_edi 0x07
00070 
00071 
00072 // General 3DNow! instruction format that is supported by 
00073 // these macros. Note that only the most basic form of memory 
00074 // operands are supported by these macros. 
00075 
00076 #define InjK3DOps(dst,src,inst)                         \
00077 {                                                       \
00078    __asm _emit 0x0f                                      \
00079    __asm _emit 0x0f                                      \
00080    __asm _emit ((_K3D_##dst & 0x3f) << 3) | _K3D_##src   \
00081    __asm _emit _3DNowOpcode##inst                        \
00082 }
00083 
00084 #define InjK3DMOps(dst,src,off,inst)                    \
00085 {                                                       \
00086    __asm _emit 0x0f                                      \
00087    __asm _emit 0x0f                                      \
00088    __asm _emit (((_K3D_##dst & 0x3f) << 3) | _K3D_##src | 0x40) \
00089    __asm _emit off                                       \
00090    __asm _emit _3DNowOpcode##inst                        \
00091 }
00092 
00093 #define InjMMXOps(dst,src,inst)                         \
00094 {                                                       \
00095    __asm _emit 0x0f                                      \
00096    __asm _emit _3DNowOpcode##inst                        \
00097    __asm _emit ((_K3D_##dst & 0x3f) << 3) | _K3D_##src   \
00098 }
00099 
00100 #define InjMMXMOps(dst,src,off,inst)                    \
00101 {                                                       \
00102    __asm _emit 0x0f                                      \
00103    __asm _emit _3DNowOpcode##inst                        \
00104    __asm _emit (((_K3D_##dst & 0x3f) << 3) | _K3D_##src | 0x40) \
00105    __asm _emit off                                       \
00106 }
00107 
00108 #define _3DNowOpcodePF2ID    0x1d
00109 #define _3DNowOpcodePFACC    0xae
00110 #define _3DNowOpcodePFADD    0x9e
00111 #define _3DNowOpcodePFCMPEQ  0xb0
00112 #define _3DNowOpcodePFCMPGE  0x90
00113 #define _3DNowOpcodePFCMPGT  0xa0
00114 #define _3DNowOpcodePFMAX    0xa4
00115 #define _3DNowOpcodePFMIN    0x94
00116 #define _3DNowOpcodePFMUL    0xb4
00117 #define _3DNowOpcodePFRCP    0x96
00118 #define _3DNowOpcodePFRCPIT1 0xa6
00119 #define _3DNowOpcodePFRCPIT2 0xb6
00120 #define _3DNowOpcodePFRSQRT  0x97
00121 #define _3DNowOpcodePFRSQIT1 0xa7
00122 #define _3DNowOpcodePFSUB    0x9a
00123 #define _3DNowOpcodePFSUBR   0xaa
00124 #define _3DNowOpcodePI2FD    0x0d
00125 #define _3DNowOpcodePAVGUSB  0xbf
00126 #define _3DNowOpcodePMULHRW  0xb7
00127 #define _3DNowOpcodePFNACC   0x8a
00128 #define _3DNowOpcodeFPPNACC  0x8e
00129 #define _3DNowOpcodePSWAPD   0xbb
00130 #define _3DNowOpcodePMINUB   0xda
00131 #define _3DNowOpcodePMAXUB   0xde
00132 #define _3DNowOpcodePMINSW   0xea
00133 #define _3DNowOpcodePMAXSW   0xee
00134 #define _3DNowOpcodePMULHUW  0xe4
00135 #define _3DNowOpcodePAVGB    0xe0
00136 #define _3DNowOpcodePAVGW    0xe3
00137 #define _3DNowOpcodePSADBW   0xf6
00138 #define _3DNowOpcodePMOVMSKB 0xd7
00139 
00140 #define _3DNowOpcodePMASKMOVQ   0xf7
00141 #define _3DNowOpcodePINSRW   0xc4
00142 #define _3DNowOpcodePEXTRW   0xc5
00143 #define _3DNowOpcodePSHUFW   0x70
00144 #define _3DNowOpcodeMOVNTQ   0xe7
00145 #define _3DNowOpcodePREFETCHT 0x18
00146 
00147 
00148 #define PF2ID(dst,src)      InjK3DOps(dst, src, PF2ID)
00149 #define PFACC(dst,src)      InjK3DOps(dst, src, PFACC)
00150 #define PFADD(dst,src)      InjK3DOps(dst, src, PFADD)
00151 #define PFCMPEQ(dst,src)    InjK3DOps(dst, src, PFCMPEQ)
00152 #define PFCMPGE(dst,src)    InjK3DOps(dst, src, PFCMPGE)
00153 #define PFCMPGT(dst,src)    InjK3DOps(dst, src, PFCMPGT)
00154 #define PFMAX(dst,src)      InjK3DOps(dst, src, PFMAX)
00155 #define PFMIN(dst,src)      InjK3DOps(dst, src, PFMIN)
00156 #define PFMUL(dst,src)      InjK3DOps(dst, src, PFMUL)
00157 #define PFRCP(dst,src)      InjK3DOps(dst, src, PFRCP)
00158 #define PFRCPIT1(dst,src)   InjK3DOps(dst, src, PFRCPIT1)
00159 #define PFRCPIT2(dst,src)   InjK3DOps(dst, src, PFRCPIT2)
00160 #define PFRSQRT(dst,src)    InjK3DOps(dst, src, PFRSQRT)
00161 #define PFRSQIT1(dst,src)   InjK3DOps(dst, src, PFRSQIT1)
00162 #define PFSUB(dst,src)      InjK3DOps(dst, src, PFSUB)
00163 #define PFSUBR(dst,src)     InjK3DOps(dst, src, PFSUBR)
00164 #define PI2FD(dst,src)      InjK3DOps(dst, src, PI2FD)
00165 #define PAVGUSB(dst,src)    InjK3DOps(dst, src, PAVGUSB)
00166 #define PMULHRW(dst,src)    InjK3DOps(dst, src, PMULHRW)
00167 
00168 #define FEMMS                                   \
00169 {                                               \
00170    __asm _emit 0x0f                              \
00171    __asm _emit 0x0e                              \
00172 }
00173 
00174 #define PREFETCH(src)                           \
00175 {                                               \
00176    __asm _emit 0x0f                              \
00177    __asm _emit 0x0d                              \
00178    __asm _emit (_K3D_##src & 0x07)               \
00179 }
00180 
00181 #define PREFETCHM(src,off)                      \
00182 {                                               \
00183    __asm _emit 0x0f                              \
00184    __asm _emit 0x0d                             \
00185    __asm _emit (0x40 | (_K3D_##src & 0x07))     \
00186    __asm _emit off                              \
00187 }
00188 
00189 /* Prefetch with a long offset */
00190 
00191 #define PREFETCHMLONG(src,off)                  \
00192 {                                               \
00193    __asm _emit 0x0f                              \
00194    __asm _emit 0x0d                             \
00195    __asm _emit (0x80 | (_K3D_##src & 0x07))     \
00196    __asm _emit (off & 0x000000ff)               \
00197    __asm _emit (off & 0x0000ff00) >>    8           \
00198    __asm _emit (off & 0x00ff0000) >>    16          \
00199    __asm _emit (off & 0xff000000) >>    24          \
00200 }
00201 
00202 #define PREFETCHW(src)                          \
00203 {                                               \
00204    __asm _emit 0x0f                              \
00205    __asm _emit 0x0d                              \
00206    __asm _emit (0x08 | (_K3D_##src & 0x07))      \
00207 }
00208 
00209 #define PREFETCHWM(src,off)                     \
00210 {                                               \
00211    __asm _emit 0x0f                              \
00212    __asm _emit 0x0d                              \
00213    __asm _emit 0x48 | (_K3D_##src & 0x07)        \
00214    __asm    _emit off                               \
00215 }
00216 
00217 #define PREFETCHWMLONG(src,off)                 \
00218 {                                               \
00219    __asm _emit 0x0f                              \
00220    __asm _emit 0x0d                              \
00221    __asm _emit 0x88 | (_K3D_##src & 0x07)        \
00222    __asm _emit (off & 0x000000ff)               \
00223    __asm _emit (off & 0x0000ff00) >>    8           \
00224    __asm _emit (off & 0x00ff0000) >>    16          \
00225    __asm _emit (off & 0xff000000) >>    24          \
00226 }
00227 
00228 #define CPUID                                   \
00229 {                                               \
00230     __asm _emit 0x0f                             \
00231     __asm _emit 0xa2                             \
00232 }
00233 
00234 
00235 /* Defines for new, K7 opcodes */
00236 #define SFENCE                                  \
00237 {                                               \
00238     __asm _emit 0x0f                             \
00239     __asm _emit 0xae                             \
00240     __asm _emit 0xf8                             \
00241 }
00242 
00243 #define PFNACC(dst,src)         InjK3DOps(dst,src,PFNACC)
00244 #define PFPNACC(dst,src)        InjK3DOps(dst,src,PFPNACC)
00245 #define PSWAPD(dst,src)         InjK3DOps(dst,src,PSWAPD)
00246 #define PMINUB(dst,src)         InjMMXOps(dst,src,PMINUB)
00247 #define PMAXUB(dst,src)         InjMMXOps(dst,src,PMAXUB)
00248 #define PMINSW(dst,src)         InjMMXOps(dst,src,PMINSW)
00249 #define PMAXSW(dst,src)         InjMMXOps(dst,src,PMAXSW)
00250 #define PMULHUW(dst,src)        InjMMXOps(dst,src,PMULHUW)
00251 #define PAVGB(dst,src)          InjMMXOps(dst,src,PAVGB)
00252 #define PAVGW(dst,src)          InjMMXOps(dst,src,PAVGW)
00253 #define PSADBW(dst,src)         InjMMXOps(dst,src,PSADBW)
00254 #define PMOVMSKB(dst,src)       InjMMXOps(dst,src,PMOVMSKB)
00255 #define PMASKMOVQ(dst,src)      InjMMXOps(dst,src,PMASKMOVQ)
00256 #define PINSRW(dst,src,msk)     InjMMXOps(dst,src,PINSRW) __asm _emit msk
00257 #define PEXTRW(dst,src,msk)     InjMMXOps(dst,src,PEXTRW) __asm _emit msk
00258 #define PSHUFW(dst,src,msk)     InjMMXOps(dst,src,PSHUFW) __asm _emit msk
00259 #define MOVNTQ(dst,src)         InjMMXOps(src,dst,MOVNTQ)
00260 #define PREFETCHNTA(mem)        InjMMXOps(mm0,mem,PREFETCHT)
00261 #define PREFETCHT0(mem)         InjMMXOps(mm1,mem,PREFETCHT)
00262 #define PREFETCHT1(mem)         InjMMXOps(mm2,mem,PREFETCHT)
00263 #define PREFETCHT2(mem)         InjMMXOps(mm3,mem,PREFETCHT)
00264 
00265 
00266 /* Memory/offset versions of the opcodes */
00267 #define PAVGUSBM(dst,src,off)   InjK3DMOps(dst,src,off,PAVGUSB)
00268 #define PF2IDM(dst,src,off)     InjK3DMOps(dst,src,off,PF2ID)
00269 #define PFACCM(dst,src,off)     InjK3DMOps(dst,src,off,PFACC)
00270 #define PFADDM(dst,src,off)     InjK3DMOps(dst,src,off,PFADD)
00271 #define PFCMPEQM(dst,src,off)   InjK3DMOps(dst,src,off,PFCMPEQ)
00272 #define PFCMPGEM(dst,src,off)   InjK3DMOps(dst,src,off,PFCMPGE)
00273 #define PFCMPGTM(dst,src,off)   InjK3DMOps(dst,src,off,PFCMPGT)
00274 #define PFMAXM(dst,src,off)     InjK3DMOps(dst,src,off,PFMAX)
00275 #define PFMINM(dst,src,off)     InjK3DMOps(dst,src,off,PFMIN)
00276 #define PFMULM(dst,src,off)     InjK3DMOps(dst,src,off,PFMUL)
00277 #define PFRCPM(dst,src,off)     InjK3DMOps(dst,src,off,PFRCP)
00278 #define PFRCPIT1M(dst,src,off)  InjK3DMOps(dst,src,off,PFRCPIT1)
00279 #define PFRCPIT2M(dst,src,off)  InjK3DMOps(dst,src,off,PFRCPIT2)
00280 #define PFRSQRTM(dst,src,off)   InjK3DMOps(dst,src,off,PFRSQRT)
00281 #define PFRSQIT1M(dst,src,off)  InjK3DMOps(dst,src,off,PFRSQIT1)
00282 #define PFSUBM(dst,src,off)     InjK3DMOps(dst,src,off,PFSUB)
00283 #define PFSUBRM(dst,src,off)    InjK3DMOps(dst,src,off,PFSUBR)
00284 #define PI2FDM(dst,src,off)     InjK3DMOps(dst,src,off,PI2FD)
00285 #define PMULHRWM(dst,src,off)   InjK3DMOps(dst,src,off,PMULHRW)
00286 
00287 
00288 /* Memory/offset versions of the K7 opcodes */
00289 #define PFNACCM(dst,src,off)     InjK3DMOps(dst,src,off,PFNACC)
00290 #define PFPNACCM(dst,src,off)    InjK3DMOps(dst,src,off,PFPNACC)
00291 #define PSWAPDM(dst,src,off)     InjK3DMOps(dst,src,off,PSWAPD)
00292 #define PMINUBM(dst,src,off)     InjMMXMOps(dst,src,off,PMINUB)
00293 #define PMAXUBM(dst,src,off)     InjMMXMOps(dst,src,off,PMAXUB)
00294 #define PMINSWM(dst,src,off)     InjMMXMOps(dst,src,off,PMINSW)
00295 #define PMAXSWM(dst,src,off)     InjMMXMOps(dst,src,off,PMAXSW)
00296 #define PMULHUWM(dst,src,off)    InjMMXMOps(dst,src,off,PMULHUW)
00297 #define PAVGBM(dst,src,off)      InjMMXMOps(dst,src,off,PAVGB)
00298 #define PAVGWM(dst,src,off)      InjMMXMOps(dst,src,off,PAVGW)
00299 #define PSADBWM(dst,src,off)     InjMMXMOps(dst,src,off,PSADBW)
00300 #define PMOVMSKBM(dst,src,off)   InjMMXMOps(dst,src,off,PMOVMSKB)
00301 #define PMASKMOVQM(dst,src,off)  InjMMXMOps(dst,src,off,PMASKMOVQ)
00302 #define PINSRWM(dst,src,off,msk) InjMMXMOps(dst,src,off,PINSRW) __asm _emit msk
00303 #define PSHUFWM(dst,src,off,msk) InjMMXMOps(dst,src,off,PSHUFW) __asm _emit msk
00304 #define MOVNTQM(dst,src,off)     InjMMXMOps(src,dst,off,MOVNTQ)
00305 #define PREFETCHNTAM(mem,off)    InjMMXMOps(mm0,mem,off,PREFETCHT)
00306 #define PREFETCHT0M(mem,off)     InjMMXMOps(mm1,mem,off,PREFETCHT)
00307 #define PREFETCHT1M(mem,off)     InjMMXMOps(mm2,mem,off,PREFETCHT)
00308 #define PREFETCHT2M(mem,off)     InjMMXMOps(mm3,mem,off,PREFETCHT)
00309 
00310 
00311 /* Just to deal with lower case. */
00312 #define pf2id(dst,src)          PF2ID(dst,src)
00313 #define pfacc(dst,src)          PFACC(dst,src)
00314 #define pfadd(dst,src)          PFADD(dst,src)
00315 #define pfcmpeq(dst,src)        PFCMPEQ(dst,src)
00316 #define pfcmpge(dst,src)        PFCMPGE(dst,src)
00317 #define pfcmpgt(dst,src)        PFCMPGT(dst,src)
00318 #define pfmax(dst,src)          PFMAX(dst,src)
00319 #define pfmin(dst,src)          PFMIN(dst,src)
00320 #define pfmul(dst,src)          PFMUL(dst,src)
00321 #define pfrcp(dst,src)          PFRCP(dst,src)
00322 #define pfrcpit1(dst,src)       PFRCPIT1(dst,src)
00323 #define pfrcpit2(dst,src)       PFRCPIT2(dst,src)
00324 #define pfrsqrt(dst,src)        PFRSQRT(dst,src)
00325 #define pfrsqit1(dst,src)       PFRSQIT1(dst,src)
00326 #define pfsub(dst,src)          PFSUB(dst,src)
00327 #define pfsubr(dst,src)         PFSUBR(dst,src)
00328 #define pi2fd(dst,src)          PI2FD(dst,src)
00329 #define femms                   FEMMS
00330 #define pavgusb(dst,src)        PAVGUSB(dst,src)
00331 #define pmulhrw(dst,src)        PMULHRW(dst,src)
00332 #define prefetch(src)           PREFETCH(src)
00333 #define prefetchw(src)          PREFETCHW(src)
00334 
00335 #define prefetchm(src,off)      PREFETCHM(src,off)
00336 #define prefetchmlong(src,off)  PREFETCHMLONG(src,off)
00337 #define prefetchwm(src,off)     PREFETCHWM(src,off)
00338 #define prefetchwmlong(src,off)  PREFETCHWMLONG(src,off)
00339 
00340 #define pfnacc(dst,src)         PFNACC(dst,src)
00341 #define pfpnacc(dst,src)        PFPNACC(dst,src)
00342 #define pswapd(dst,src)         PSWAPD(dst,src)
00343 #define pminub(dst,src)         PMINUB(dst,src)
00344 #define pmaxub(dst,src)         PMAXUB(dst,src)
00345 #define pminsw(dst,src)         PMINSW(dst,src)
00346 #define pmaxsw(dst,src)         PMAXSW(dst,src)
00347 #define pmulhuw(dst,src)        PMULHUW(dst,src)
00348 #define pavgb(dst,src)          PAVGB(dst,src)
00349 #define pavgw(dst,src)          PAVGW(dst,src)
00350 #define psadbw(dst,src)         PSADBW(dst,src)
00351 #define pmovmskb(dst,src)       PMOVMSKB(dst,src)
00352 #define pmaskmovq(dst,src)      PMASKMOVQ(dst,src)
00353 #define pinsrw(dst,src,msk)     PINSRW(dst,src,msk)
00354 #define pextrw(dst,src,msk)     PEXTRW(dst,src,msk)
00355 #define pshufw(dst,src,msk)     PSHUFW(dst,src,msk)
00356 #define movntq(dst,src)         MOVNTQ(dst,src)
00357 #define prefetchnta(mem)        PREFETCHNTA(mem)
00358 #define prefetcht0(mem)         PREFETCHT0(mem)
00359 #define prefetcht1(mem)         PREFETCHT1(mem)
00360 #define prefetcht2(mem)         PREFETCHT2(mem)
00361 
00362 
00363 #define pavgusbm(dst,src,off)   PAVGUSBM(dst,src,off)
00364 #define pf2idm(dst,src,off)     PF2IDM(dst,src,off)
00365 #define pfaccm(dst,src,off)     PFACCM(dst,src,off)
00366 #define pfaddm(dst,src,off)     PFADDM(dst,src,off)
00367 #define pfcmpeqm(dst,src,off)   PFCMPEQM(dst,src,off)
00368 #define pfcmpgem(dst,src,off)   PFCMPGEM(dst,src,off)
00369 #define pfcmpgtm(dst,src,off)   PFCMPGTM(dst,src,off)
00370 #define pfmaxm(dst,src,off)     PFMAXM(dst,src,off)
00371 #define pfminm(dst,src,off)     PFMINM(dst,src,off)
00372 #define pfmulm(dst,src,off)     PFMULM(dst,src,off)
00373 #define pfrcpm(dst,src,off)     PFRCPM(dst,src,off)
00374 #define pfrcpit1m(dst,src,off)  PFRCPIT1M(dst,src,off)
00375 #define pfrcpit2m(dst,src,off)  PFRCPIT2M(dst,src,off)
00376 #define pfrsqrtm(dst,src,off)   PFRSQRTM(dst,src,off)
00377 #define pfrsqit1m(dst,src,off)  PFRSQIT1M(dst,src,off)
00378 #define pfsubm(dst,src,off)     PFSUBM(dst,src,off)
00379 #define pfsubrm(dst,src,off)    PFSUBRM(dst,src,off)
00380 #define pi2fdm(dst,src,off)     PI2FDM(dst,src,off)
00381 #define pmulhrwm(dst,src,off)   PMULHRWM(dst,src,off)
00382 #define cpuid                   CPUID
00383 #define sfence                  SFENCE
00384 
00385 #define pfnaccm(dst,src,off)    PFNACCM(dst,src,off)
00386 #define pfpnaccm(dst,src,off)   PFPNACCM(dst,src,off)
00387 #define pswapdm(dst,src,off)    PSWAPDM(dst,src,off)
00388 #define pminubm(dst,src,off)    PMINUBM(dst,src,off)
00389 #define pmaxubm(dst,src,off)    PMAXUBM(dst,src,off)
00390 #define pminswm(dst,src,off)    PMINSWM(dst,src,off)
00391 #define pmaxswm(dst,src,off)    PMAXSWM(dst,src,off)
00392 #define pmulhuwm(dst,src,off)   PMULHUWM(dst,src,off)
00393 #define pavgbm(dst,src,off)     PAVGBM(dst,src,off)
00394 #define pavgwm(dst,src,off)     PAVGWM(dst,src,off)
00395 #define psadbwm(dst,src,off)    PSADBWM(dst,src,off)
00396 #define pmovmskbm(dst,src,off)  PMOVMSKBM(dst,src,off)
00397 #define pmaskmovqm(dst,src,off) PMASKMOVQM(dst,src,off)
00398 #define pinsrwm(dst,src,off,msk)    PINSRWM(dst,src,off,msk)
00399 #define pextrwm(dst,src,off,msk)    PEXTRWM(dst,src,off,msk)
00400 #define pshufwm(dst,src,off,msk)    PSHUFWM(dst,src,off,msk)
00401 #define movntqm(dst,src,off)    MOVNTQM(dst,src,off)
00402 #define prefetchntam(mem,off)   PREFETCHNTA(mem,off)
00403 #define prefetcht0m(mem,off)    PREFETCHT0(mem,off)
00404 #define prefetcht1m(mem,off)    PREFETCHT1(mem,off)
00405 #define prefetcht2m(mem,off)    PREFETCHT2(mem,off)
00406 
00407 // SIMD Registers 
00408 #define _xmm0 (0xc0)
00409 #define _xmm1 (0xc1)
00410 #define _xmm2 (0xc2)
00411 #define _xmm3 (0xc3)
00412 #define _xmm4 (0xc4)
00413 #define _xmm5 (0xc5)
00414 #define _xmm6 (0xc6)
00415 #define _xmm7 (0xc7)
00416 
00417 // MMX Registers 
00418 #define _mm0 (0xc0)
00419 #define _mm1 (0xc1)
00420 #define _mm2 (0xc2)
00421 #define _mm3 (0xc3)
00422 #define _mm4 (0xc4)
00423 #define _mm5 (0xc5)
00424 #define _mm6 (0xc6)
00425 #define _mm7 (0xc7)
00426 
00427 // Integer registers used as address pointers
00428 #define _eax_ptr (0)
00429 #define _ebx_ptr (3)
00430 #define _ecx_ptr (1)
00431 #define _edx_ptr (2)
00432 #define _esi_ptr (6)
00433 #define _edi_ptr (7)
00434 #define _ebp_ptr (5)
00435 #define _esp_ptr (4)
00436 
00437 // actual integer registers
00438 #define _eax_reg (0xc0)
00439 #define _ebx_reg (0xc3)
00440 #define _ecx_reg (0xc1)
00441 #define _edx_reg (0xc2)
00442 #define _esi_reg (0xc6)
00443 #define _edi_reg (0xc7)
00444 #define _ebp_reg (0xc5)
00445 #define _esp_reg (0xc4)
00446 
00447 // compare values
00448 #define _eq         (0)
00449 #define _lt         (1)
00450 #define _le         (2)
00451 #define _unordered  (3)
00452 
00453 #define _ne         (4)
00454 #define _neq        (4)
00455 #define _ge         (5)
00456 #define _nlt        (5)
00457 #define _gt         (6)
00458 #define _nle        (6)
00459 #define _ordered    (7)
00460 // Some better names for ordered and unordered
00461 #define _qnan       (3) // true if one of the inputs is a QNAN
00462 #define _num        (7) // false if one of the inputs is a QNAN
00463 
00464 //--------------------------------------------------------------------------
00465 // MOVE INSTRUCTIONS
00466 //--------------------------------------------------------------------------
00467 
00468 #define movaps_st(dst, src)                 \
00469 {                                           \
00470     __asm _emit 0x0f                            \
00471     __asm _emit 0x29                            \
00472     __asm _emit ((src & 0x3f)<<3) | (dst)   \
00473 }
00474 
00475 #define movaps(dst, src)                    \
00476 {                                           \
00477     __asm _emit 0x0f                            \
00478     __asm _emit 0x28                            \
00479     __asm _emit ((dst & 0x3f)<<3) | (src)   \
00480 }
00481 
00482 #define movaps_o(dst, src, off)                 \
00483 {                                           \
00484     __asm _emit 0x0f                            \
00485     __asm _emit 0x28                            \
00486     __asm _emit ((dst & 0x3f)<<3) | (src) | 0x40 \
00487     __asm _emit off                                       \
00488 }
00489 
00490 #define movups_st(dst, src)                 \
00491 {                                           \
00492     __asm _emit 0x0f                            \
00493     __asm _emit 0x11                            \
00494     __asm _emit ((src & 0x3f)<<3) | (dst)   \
00495 }
00496 
00497 #define movups_o(dst, src,off)                  \
00498 {                                           \
00499     __asm _emit 0x0f                            \
00500     __asm _emit 0x10                            \
00501     __asm _emit ((dst & 0x3f)<<3) | (src)|  0x40    \
00502     __asm _emit off \
00503 }
00504 
00505 #define movhps_o(dst, src,off)                  \
00506 {                                           \
00507     __asm _emit 0x0f                            \
00508     __asm _emit 0x16                            \
00509     __asm _emit ((dst & 0x3f)<<3) | (src) | 0x40    \
00510     __asm _emit off \
00511 }
00512 
00513 #define movlps_o(dst, src,off)                  \
00514 {                                           \
00515     __asm _emit 0x0f                            \
00516     __asm _emit 0x12                            \
00517     __asm _emit ((dst & 0x3f)<<3) | (src) | 0x40    \
00518     __asm _emit off \
00519 }
00520 
00521 #define movss_o(dst, src,off)                       \
00522 {                                           \
00523     __asm _emit 0xf3                            \
00524     __asm _emit 0x0f                            \
00525     __asm _emit 0x10                            \
00526     __asm _emit ((dst & 0x3f)<<3) | (src)|  0x40    \
00527     __asm _emit off     \
00528 }
00529 
00530 #define movups_st_o(dst,off,src)                    \
00531 {                                           \
00532     __asm _emit 0x0f                            \
00533     __asm _emit 0x11                            \
00534     __asm _emit ((src & 0x3f)<<3) | (dst) | 0x40    \
00535     __asm _emit off \
00536 }
00537 
00538 #define movups(dst, src)                    \
00539 {                                           \
00540     __asm _emit 0x0f                            \
00541     __asm _emit 0x10                            \
00542     __asm _emit ((dst & 0x3f)<<3) | (src)   \
00543 }
00544 
00545 #define movss_st(dst, src)                  \
00546 {                                           \
00547     __asm _emit 0xf3                            \
00548     __asm _emit 0x0f                            \
00549     __asm _emit 0x11                            \
00550     __asm _emit ((src & 0x3f)<<3) | (dst)   \
00551 }
00552 
00553 #define movss(dst, src)                     \
00554 {                                           \
00555     __asm _emit 0xf3                            \
00556     __asm _emit 0x0f                            \
00557     __asm _emit 0x10                            \
00558     __asm _emit ((dst & 0x3f)<<3) | (src)   \
00559 }
00560 
00561 #define movhlps(dst, src)                   \
00562 {                                           \
00563     __asm _emit 0x0f                            \
00564     __asm _emit 0x12                            \
00565     __asm _emit ((dst & 0x3f)<<3) | (src)   \
00566 }
00567 
00568 #define movlhps(dst, src)                   \
00569 {                                           \
00570     __asm _emit 0x0f                            \
00571     __asm _emit 0x16                            \
00572     __asm _emit ((dst & 0x3f)<<3) | (src)   \
00573 }
00574 
00575 #define movmskps(dst, src)                  \
00576 {                                           \
00577     __asm _emit 0x0f                            \
00578     __asm _emit 0x50                            \
00579     __asm _emit ((dst & 0x3f)<<3) | (src)   \
00580 }
00581 
00582 #define movntps(dst, src)                   \
00583 {                                           \
00584     __asm _emit 0x0f                            \
00585     __asm _emit 0x2b                            \
00586     __asm _emit ((src & 0x3f)<<3) | (dst)   \
00587 }
00588 
00589 #define shufps(dst, src, imm)               \
00590 {                                           \
00591     __asm _emit 0x0f                            \
00592     __asm _emit 0xC6                            \
00593     __asm _emit ((dst & 0x3f)<<3) | (src)   \
00594     __asm _emit imm                          \
00595 }
00596 
00597 #define unpckhps(dst, src)                  \
00598 {                                           \
00599     __asm _emit 0x0f                            \
00600     __asm _emit 0x15                            \
00601     __asm _emit ((dst & 0x3f)<<3) | (src)   \
00602 }
00603 
00604 #define unpcklps(dst, src)                  \
00605 {                                           \
00606     __asm _emit 0x0f                            \
00607     __asm _emit 0x14                            \
00608     __asm _emit ((dst & 0x3f)<<3) | (src)   \
00609 }
00610 
00611 #define movhps(dst, src)                    \
00612 {                                           \
00613     __asm _emit 0x0f                            \
00614     __asm _emit 0x16                            \
00615     __asm _emit ((dst & 0x3f)<<3) | (src)   \
00616 }
00617 
00618 #define movhps_st(dst, src)                 \
00619 {                                           \
00620     __asm _emit 0x0f                            \
00621     __asm _emit 0x17                            \
00622     __asm _emit ((src & 0x3f)<<3) | (dst)   \
00623 }
00624 
00625 #define movlps(dst, src)                    \
00626 {                                           \
00627     __asm _emit 0x0f                            \
00628     __asm _emit 0x12                            \
00629     __asm _emit ((dst & 0x3f)<<3) | (src)   \
00630 }
00631 
00632 #define movlps_st(dst, src)                 \
00633 {                                           \
00634     __asm _emit 0x0f                            \
00635     __asm _emit 0x13                            \
00636     __asm _emit ((src & 0x3f)<<3) | (dst)   \
00637 }
00638 
00639 #define movlps_st_o(dst,off, src)                       \
00640 {                                                       \
00641     __asm _emit 0x0f                                    \
00642     __asm _emit 0x13                                    \
00643     __asm _emit ((src & 0x3f)<<3) | (dst)   |   0x40    \
00644     __asm _emit off                                     \
00645 }
00646 
00647 //--------------------------------------------------------------------------
00648 // MATH INSTRUCTIONS
00649 //--------------------------------------------------------------------------
00650 
00651 #define addps(dst, src)                     \
00652 {                                           \
00653     __asm _emit 0x0f                            \
00654     __asm _emit 0x58                            \
00655     __asm _emit ((dst & 0x3f)<<3) | (src)   \
00656 }
00657 
00658 #define addss(dst, src)                     \
00659 {                                           \
00660     __asm _emit 0xf3                            \
00661     __asm _emit 0x0f                            \
00662     __asm _emit 0x58                            \
00663     __asm _emit ((dst & 0x3f)<<3) | (src)   \
00664 }
00665 
00666 #define subps(dst, src)                     \
00667 {                                           \
00668     __asm _emit 0x0f                            \
00669     __asm _emit 0x5c                            \
00670     __asm _emit ((dst & 0x3f)<<3) | (src)   \
00671 }
00672 
00673 #define subss(dst, src)                     \
00674 {                                           \
00675     __asm _emit 0xf3                            \
00676     __asm _emit 0x0f                            \
00677     __asm _emit 0x5c                            \
00678     __asm _emit ((dst & 0x3f)<<3) | (src)   \
00679 }
00680 
00681 #define mulps(dst, src)                     \
00682 {                                           \
00683     __asm _emit 0x0f                            \
00684     __asm _emit 0x59                            \
00685     __asm _emit ((dst & 0x3f)<<3) | (src)   \
00686 }
00687 
00688 #define mulss(dst, src)                     \
00689 {                                           \
00690     __asm _emit 0xf3                            \
00691     __asm _emit 0x0f                            \
00692     __asm _emit 0x59                            \
00693     __asm _emit ((dst & 0x3f)<<3) | (src)   \
00694 }
00695 
00696 #define divps(dst, src)                     \
00697 {                                           \
00698     __asm _emit 0x0f                            \
00699     __asm _emit 0x5e                            \
00700     __asm _emit ((dst & 0x3f)<<3) | (src)   \
00701 }
00702 
00703 #define divss(dst, src)                     \
00704 {                                           \
00705     __asm _emit 0xf3                            \
00706     __asm _emit 0x0f                            \
00707     __asm _emit 0x5e                            \
00708     __asm _emit ((dst & 0x3f)<<3) | (src)   \
00709 }
00710 
00711 #define sqrtps(dst, src)                    \
00712 {                                           \
00713     __asm _emit 0x0f                            \
00714     __asm _emit 0x51                            \
00715     __asm _emit ((dst & 0x3f)<<3) | (src)   \
00716 }
00717 
00718 #define sqrtss(dst, src)                    \
00719 {                                           \
00720     __asm _emit 0xf3                            \
00721     __asm _emit 0x0f                            \
00722     __asm _emit 0x51                            \
00723     __asm _emit ((dst & 0x3f)<<3) | (src)   \
00724 }
00725 
00726 #define rcpps(dst, src)                     \
00727 {                                           \
00728     __asm _emit 0x0f                            \
00729     __asm _emit 0x53                            \
00730     __asm _emit ((dst & 0x3f)<<3) | (src)   \
00731 }
00732 
00733 #define rcpss(dst, src)                     \
00734 {                                           \
00735     __asm _emit 0xf3                            \
00736     __asm _emit 0x0f                            \
00737     __asm _emit 0x53                            \
00738     __asm _emit ((dst & 0x3f)<<3) | (src)   \
00739 }
00740 
00741 #define rsqrtps(dst, src)                   \
00742 {                                           \
00743     __asm _emit 0x0f                            \
00744     __asm _emit 0x52                            \
00745     __asm _emit ((dst & 0x3f)<<3) | (src)   \
00746 }
00747 
00748 #define rsqrtss(dst, src)                   \
00749 {                                           \
00750     __asm _emit 0xf3                            \
00751     __asm _emit 0x0f                            \
00752     __asm _emit 0x52                            \
00753     __asm _emit ((dst & 0x3f)<<3) | (src)   \
00754 }
00755 
00756 #define maxps(dst, src)                     \
00757 {                                           \
00758     __asm _emit 0x0f                            \
00759     __asm _emit 0x5f                            \
00760     __asm _emit ((dst & 0x3f)<<3) | (src)   \
00761 }
00762 
00763 #define maxss(dst, src)                     \
00764 {                                           \
00765     __asm _emit 0xf3                            \
00766     __asm _emit 0x0f                            \
00767     __asm _emit 0x5f                            \
00768     __asm _emit ((dst & 0x3f)<<3) | (src)   \
00769 }
00770 
00771 #define minps(dst, src)                     \
00772 {                                           \
00773     __asm _emit 0x0f                            \
00774     __asm _emit 0x5d                            \
00775     __asm _emit ((dst & 0x3f)<<3) | (src)   \
00776 }
00777 
00778 #define minss(dst, src)                     \
00779 {                                           \
00780     __asm _emit 0xf3                            \
00781     __asm _emit 0x0f                            \
00782     __asm _emit 0x5d                            \
00783     __asm _emit ((dst & 0x3f)<<3) | (src)   \
00784 }
00785 
00786 //--------------------------------------------------------------------------
00787 // COMPARE INSTRUCTIONS
00788 //--------------------------------------------------------------------------
00789 
00790 #define cmpps(dst, src, cond)               \
00791 {                                           \
00792   __asm _emit 0x0f \
00793   __asm _emit 0xC2 \
00794   __asm _emit((dst & 0x3f) << 3) | (src) \ __asm _emit cond \}
00795 
00796 #define cmpeqps(dst,src) cmpps(dst,src,_eq)
00797 #define cmpltps(dst,src) cmpps(dst,src,_lt)
00798 #define cmpleps(dst,src) cmpps(dst,src,_le)
00799 #define cmpunordps(dst,src) cmpps(dst,src,_unordered)
00800 #define cmpneqps(dst,src) cmpps(dst,src,_neq)
00801 #define cmpneps(dst,src) cmpps(dst,src,_neq)
00802 #define cmpnltps(dst,src) cmpps(dst,src,_nlt)
00803 #define cmpgeps(dst,src) cmpps(dst,src,_nlt)
00804 #define cmpnleps(dst,src) cmpps(dst,src,_nle)
00805 #define cmpgtps(dst,src) cmpps(dst,src,_nle)
00806 #define cmpordps(dst,src) cmpps(dst,src,_ordered)
00807 #define cmpss(dst, src, cond)               \
00808 {                                           \
00809     __asm _emit 0xf3                            \
00810     __asm _emit 0x0f                            \
00811     __asm _emit 0xC2                            \
00812     __asm _emit ((dst & 0x3f)<<3) | (src)   \
00813     __asm _emit cond                            \
00814 }
00815 #define cmpeqss(dst,src) cmpss(dst,src,_eq)
00816 #define cmpltss(dst,src) cmpss(dst,src,_lt)
00817 #define cmpless(dst,src) cmpss(dst,src,_le)
00818 #define cmpunordss(dst,src) cmpss(dst,src,_unordered)
00819 #define cmpneqss(dst,src) cmpss(dst,src,_neq)
00820 #define cmpness(dst,src) cmpss(dst,src,_neq)
00821 #define cmpnltss(dst,src) cmpss(dst,src,_nlt)
00822 #define cmpgess(dst,src) cmpss(dst,src,_nlt)
00823 #define cmpnless(dst,src) cmpss(dst,src,_nle)
00824 #define cmpgtss(dst,src) cmpss(dst,src,_nle)
00825 #define cmpordss(dst,src) cmpss(dst,src,_ordered)
00826 #define comiss(dst, src)                    \
00827 {                                           \
00828     __asm _emit 0x0f                            \
00829     __asm _emit 0x2f                            \
00830     __asm _emit ((dst & 0x3f)<<3) | (src)   \
00831 }
00832 #define ucomiss(dst, src)                   \
00833 {                                           \
00834     __asm _emit 0x0f                            \
00835     __asm _emit 0x2e                            \
00836     __asm _emit ((dst & 0x3f)<<3) | (src)   \
00837 }
00838 //--------------------------------------------------------------------------// LOGICAL INSTRUCTIONS//--------------------------------------------------------------------------
00839 #define andnps(dst, src)                    \
00840 {                                           \
00841     __asm _emit 0x0f                            \
00842     __asm _emit 0x55                            \
00843     __asm _emit ((dst & 0x3f)<<3) | (src)   \
00844 }
00845 #define andps(dst, src)                     \
00846 {                                           \
00847     __asm _emit 0x0f                            \
00848     __asm _emit 0x54                            \
00849     __asm _emit ((dst & 0x3f)<<3) | (src)   \
00850 }
00851 #define orps(dst, src)                      \
00852 {                                           \
00853     __asm _emit 0x0f                            \
00854     __asm _emit 0x56                            \
00855     __asm _emit ((dst & 0x3f)<<3) | (src)   \
00856 }
00857 #define xorps(dst, src)                     \
00858 {                                           \
00859     __asm _emit 0x0f                            \
00860     __asm _emit 0x57                            \
00861     __asm _emit ((dst & 0x3f)<<3) | (src)   \
00862 }
00863 //--------------------------------------------------------------------------// CONVERSION INSTRUCTIONS//--------------------------------------------------------------------------
00864 #define cvtpi2ps(dst, src)                  \
00865 {                                           \
00866     __asm _emit 0x0f                            \
00867     __asm _emit 0x2a                            \
00868     __asm _emit ((dst & 0x3f)<<3) | (src)   \
00869 }
00870 #define cvtps2pi(dst, src)                  \
00871 {                                           \
00872     __asm _emit 0x0f                            \
00873     __asm _emit 0x2d                            \
00874     __asm _emit ((dst & 0x3f)<<3) | (src)   \
00875 }
00876 #define cvtsi2ss(dst, src)                  \
00877 {                                           \
00878     __asm _emit 0xf3                            \
00879     __asm _emit 0x0f                            \
00880     __asm _emit 0x2a                            \
00881     __asm _emit ((dst & 0x3f)<<3) | (src)   \
00882 }
00883 #define cvtss2si(dst, src)                  \
00884 {                                           \
00885     __asm _emit 0xf3                            \
00886     __asm _emit 0x0f                            \
00887     __asm _emit 0x2d                            \
00888     __asm _emit ((dst & 0x3f)<<3) | (src)   \
00889 }
00890 #define cvttps2pi(dst, src)                 \
00891 {                                           \
00892     __asm _emit 0x0f                            \
00893     __asm _emit 0x2c                            \
00894     __asm _emit ((dst & 0x3f)<<3) | (src)   \
00895 }
00896 #define cvttss2si(dst, src)                 \
00897 {                                           \
00898     __asm _emit 0xf3                            \
00899     __asm _emit 0x0f                            \
00900     __asm _emit 0x2c                            \
00901     __asm _emit ((dst & 0x3f)<<3) | (src)   \
00902 }
00903 
00904 //--------------------------------------------------------------------------// INTEGER/MMX INSTRUCTIONS//--------------------------------------------------------------------------
00905 
00906 #define maskmovq(dst, src)                  \
00907