00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00030 #ifndef __GZ_ASSEMBLER_H__
00031 #define __GZ_ASSEMBLER_H__
00032
00033 #include "gzBasicTypes.h"
00034
00035
00036
00037 #ifdef GZ_WIN32
00038 #pragma warning(disable:4799)
00039 #endif
00040
00041
00042 #define _K3D_MM0 0xc0
00043 #define _K3D_MM1 0xc1
00044 #define _K3D_MM2 0xc2
00045 #define _K3D_MM3 0xc3
00046 #define _K3D_MM4 0xc4
00047 #define _K3D_MM5 0xc5
00048 #define _K3D_MM6 0xc6
00049 #define _K3D_MM7 0xc7
00050 #define _K3D_mm0 0xc0
00051 #define _K3D_mm1 0xc1
00052 #define _K3D_mm2 0xc2
00053 #define _K3D_mm3 0xc3
00054 #define _K3D_mm4 0xc4
00055 #define _K3D_mm5 0xc5
00056 #define _K3D_mm6 0xc6
00057 #define _K3D_mm7 0xc7
00058 #define _K3D_EAX 0x00
00059 #define _K3D_ECX 0x01
00060 #define _K3D_EDX 0x02
00061 #define _K3D_EBX 0x03
00062 #define _K3D_ESI 0x06
00063 #define _K3D_EDI 0x07
00064 #define _K3D_eax 0x00
00065 #define _K3D_ecx 0x01
00066 #define _K3D_edx 0x02
00067 #define _K3D_ebx 0x03
00068 #define _K3D_esi 0x06
00069 #define _K3D_edi 0x07
00070
00071
00072
00073
00074
00075
00076 #define InjK3DOps(dst,src,inst) \
00077 { \
00078 __asm _emit 0x0f \
00079 __asm _emit 0x0f \
00080 __asm _emit ((_K3D_##dst & 0x3f) << 3) | _K3D_##src \
00081 __asm _emit _3DNowOpcode##inst \
00082 }
00083
00084 #define InjK3DMOps(dst,src,off,inst) \
00085 { \
00086 __asm _emit 0x0f \
00087 __asm _emit 0x0f \
00088 __asm _emit (((_K3D_##dst & 0x3f) << 3) | _K3D_##src | 0x40) \
00089 __asm _emit off \
00090 __asm _emit _3DNowOpcode##inst \
00091 }
00092
00093 #define InjMMXOps(dst,src,inst) \
00094 { \
00095 __asm _emit 0x0f \
00096 __asm _emit _3DNowOpcode##inst \
00097 __asm _emit ((_K3D_##dst & 0x3f) << 3) | _K3D_##src \
00098 }
00099
00100 #define InjMMXMOps(dst,src,off,inst) \
00101 { \
00102 __asm _emit 0x0f \
00103 __asm _emit _3DNowOpcode##inst \
00104 __asm _emit (((_K3D_##dst & 0x3f) << 3) | _K3D_##src | 0x40) \
00105 __asm _emit off \
00106 }
00107
00108 #define _3DNowOpcodePF2ID 0x1d
00109 #define _3DNowOpcodePFACC 0xae
00110 #define _3DNowOpcodePFADD 0x9e
00111 #define _3DNowOpcodePFCMPEQ 0xb0
00112 #define _3DNowOpcodePFCMPGE 0x90
00113 #define _3DNowOpcodePFCMPGT 0xa0
00114 #define _3DNowOpcodePFMAX 0xa4
00115 #define _3DNowOpcodePFMIN 0x94
00116 #define _3DNowOpcodePFMUL 0xb4
00117 #define _3DNowOpcodePFRCP 0x96
00118 #define _3DNowOpcodePFRCPIT1 0xa6
00119 #define _3DNowOpcodePFRCPIT2 0xb6
00120 #define _3DNowOpcodePFRSQRT 0x97
00121 #define _3DNowOpcodePFRSQIT1 0xa7
00122 #define _3DNowOpcodePFSUB 0x9a
00123 #define _3DNowOpcodePFSUBR 0xaa
00124 #define _3DNowOpcodePI2FD 0x0d
00125 #define _3DNowOpcodePAVGUSB 0xbf
00126 #define _3DNowOpcodePMULHRW 0xb7
00127 #define _3DNowOpcodePFNACC 0x8a
00128 #define _3DNowOpcodeFPPNACC 0x8e
00129 #define _3DNowOpcodePSWAPD 0xbb
00130 #define _3DNowOpcodePMINUB 0xda
00131 #define _3DNowOpcodePMAXUB 0xde
00132 #define _3DNowOpcodePMINSW 0xea
00133 #define _3DNowOpcodePMAXSW 0xee
00134 #define _3DNowOpcodePMULHUW 0xe4
00135 #define _3DNowOpcodePAVGB 0xe0
00136 #define _3DNowOpcodePAVGW 0xe3
00137 #define _3DNowOpcodePSADBW 0xf6
00138 #define _3DNowOpcodePMOVMSKB 0xd7
00139
00140 #define _3DNowOpcodePMASKMOVQ 0xf7
00141 #define _3DNowOpcodePINSRW 0xc4
00142 #define _3DNowOpcodePEXTRW 0xc5
00143 #define _3DNowOpcodePSHUFW 0x70
00144 #define _3DNowOpcodeMOVNTQ 0xe7
00145 #define _3DNowOpcodePREFETCHT 0x18
00146
00147
00148 #define PF2ID(dst,src) InjK3DOps(dst, src, PF2ID)
00149 #define PFACC(dst,src) InjK3DOps(dst, src, PFACC)
00150 #define PFADD(dst,src) InjK3DOps(dst, src, PFADD)
00151 #define PFCMPEQ(dst,src) InjK3DOps(dst, src, PFCMPEQ)
00152 #define PFCMPGE(dst,src) InjK3DOps(dst, src, PFCMPGE)
00153 #define PFCMPGT(dst,src) InjK3DOps(dst, src, PFCMPGT)
00154 #define PFMAX(dst,src) InjK3DOps(dst, src, PFMAX)
00155 #define PFMIN(dst,src) InjK3DOps(dst, src, PFMIN)
00156 #define PFMUL(dst,src) InjK3DOps(dst, src, PFMUL)
00157 #define PFRCP(dst,src) InjK3DOps(dst, src, PFRCP)
00158 #define PFRCPIT1(dst,src) InjK3DOps(dst, src, PFRCPIT1)
00159 #define PFRCPIT2(dst,src) InjK3DOps(dst, src, PFRCPIT2)
00160 #define PFRSQRT(dst,src) InjK3DOps(dst, src, PFRSQRT)
00161 #define PFRSQIT1(dst,src) InjK3DOps(dst, src, PFRSQIT1)
00162 #define PFSUB(dst,src) InjK3DOps(dst, src, PFSUB)
00163 #define PFSUBR(dst,src) InjK3DOps(dst, src, PFSUBR)
00164 #define PI2FD(dst,src) InjK3DOps(dst, src, PI2FD)
00165 #define PAVGUSB(dst,src) InjK3DOps(dst, src, PAVGUSB)
00166 #define PMULHRW(dst,src) InjK3DOps(dst, src, PMULHRW)
00167
00168 #define FEMMS \
00169 { \
00170 __asm _emit 0x0f \
00171 __asm _emit 0x0e \
00172 }
00173
00174 #define PREFETCH(src) \
00175 { \
00176 __asm _emit 0x0f \
00177 __asm _emit 0x0d \
00178 __asm _emit (_K3D_##src & 0x07) \
00179 }
00180
00181 #define PREFETCHM(src,off) \
00182 { \
00183 __asm _emit 0x0f \
00184 __asm _emit 0x0d \
00185 __asm _emit (0x40 | (_K3D_##src & 0x07)) \
00186 __asm _emit off \
00187 }
00188
00189
00190
00191 #define PREFETCHMLONG(src,off) \
00192 { \
00193 __asm _emit 0x0f \
00194 __asm _emit 0x0d \
00195 __asm _emit (0x80 | (_K3D_##src & 0x07)) \
00196 __asm _emit (off & 0x000000ff) \
00197 __asm _emit (off & 0x0000ff00) >> 8 \
00198 __asm _emit (off & 0x00ff0000) >> 16 \
00199 __asm _emit (off & 0xff000000) >> 24 \
00200 }
00201
00202 #define PREFETCHW(src) \
00203 { \
00204 __asm _emit 0x0f \
00205 __asm _emit 0x0d \
00206 __asm _emit (0x08 | (_K3D_##src & 0x07)) \
00207 }
00208
00209 #define PREFETCHWM(src,off) \
00210 { \
00211 __asm _emit 0x0f \
00212 __asm _emit 0x0d \
00213 __asm _emit 0x48 | (_K3D_##src & 0x07) \
00214 __asm _emit off \
00215 }
00216
00217 #define PREFETCHWMLONG(src,off) \
00218 { \
00219 __asm _emit 0x0f \
00220 __asm _emit 0x0d \
00221 __asm _emit 0x88 | (_K3D_##src & 0x07) \
00222 __asm _emit (off & 0x000000ff) \
00223 __asm _emit (off & 0x0000ff00) >> 8 \
00224 __asm _emit (off & 0x00ff0000) >> 16 \
00225 __asm _emit (off & 0xff000000) >> 24 \
00226 }
00227
00228 #define CPUID \
00229 { \
00230 __asm _emit 0x0f \
00231 __asm _emit 0xa2 \
00232 }
00233
00234
00235
00236 #define SFENCE \
00237 { \
00238 __asm _emit 0x0f \
00239 __asm _emit 0xae \
00240 __asm _emit 0xf8 \
00241 }
00242
00243 #define PFNACC(dst,src) InjK3DOps(dst,src,PFNACC)
00244 #define PFPNACC(dst,src) InjK3DOps(dst,src,PFPNACC)
00245 #define PSWAPD(dst,src) InjK3DOps(dst,src,PSWAPD)
00246 #define PMINUB(dst,src) InjMMXOps(dst,src,PMINUB)
00247 #define PMAXUB(dst,src) InjMMXOps(dst,src,PMAXUB)
00248 #define PMINSW(dst,src) InjMMXOps(dst,src,PMINSW)
00249 #define PMAXSW(dst,src) InjMMXOps(dst,src,PMAXSW)
00250 #define PMULHUW(dst,src) InjMMXOps(dst,src,PMULHUW)
00251 #define PAVGB(dst,src) InjMMXOps(dst,src,PAVGB)
00252 #define PAVGW(dst,src) InjMMXOps(dst,src,PAVGW)
00253 #define PSADBW(dst,src) InjMMXOps(dst,src,PSADBW)
00254 #define PMOVMSKB(dst,src) InjMMXOps(dst,src,PMOVMSKB)
00255 #define PMASKMOVQ(dst,src) InjMMXOps(dst,src,PMASKMOVQ)
00256 #define PINSRW(dst,src,msk) InjMMXOps(dst,src,PINSRW) __asm _emit msk
00257 #define PEXTRW(dst,src,msk) InjMMXOps(dst,src,PEXTRW) __asm _emit msk
00258 #define PSHUFW(dst,src,msk) InjMMXOps(dst,src,PSHUFW) __asm _emit msk
00259 #define MOVNTQ(dst,src) InjMMXOps(src,dst,MOVNTQ)
00260 #define PREFETCHNTA(mem) InjMMXOps(mm0,mem,PREFETCHT)
00261 #define PREFETCHT0(mem) InjMMXOps(mm1,mem,PREFETCHT)
00262 #define PREFETCHT1(mem) InjMMXOps(mm2,mem,PREFETCHT)
00263 #define PREFETCHT2(mem) InjMMXOps(mm3,mem,PREFETCHT)
00264
00265
00266
00267 #define PAVGUSBM(dst,src,off) InjK3DMOps(dst,src,off,PAVGUSB)
00268 #define PF2IDM(dst,src,off) InjK3DMOps(dst,src,off,PF2ID)
00269 #define PFACCM(dst,src,off) InjK3DMOps(dst,src,off,PFACC)
00270 #define PFADDM(dst,src,off) InjK3DMOps(dst,src,off,PFADD)
00271 #define PFCMPEQM(dst,src,off) InjK3DMOps(dst,src,off,PFCMPEQ)
00272 #define PFCMPGEM(dst,src,off) InjK3DMOps(dst,src,off,PFCMPGE)
00273 #define PFCMPGTM(dst,src,off) InjK3DMOps(dst,src,off,PFCMPGT)
00274 #define PFMAXM(dst,src,off) InjK3DMOps(dst,src,off,PFMAX)
00275 #define PFMINM(dst,src,off) InjK3DMOps(dst,src,off,PFMIN)
00276 #define PFMULM(dst,src,off) InjK3DMOps(dst,src,off,PFMUL)
00277 #define PFRCPM(dst,src,off) InjK3DMOps(dst,src,off,PFRCP)
00278 #define PFRCPIT1M(dst,src,off) InjK3DMOps(dst,src,off,PFRCPIT1)
00279 #define PFRCPIT2M(dst,src,off) InjK3DMOps(dst,src,off,PFRCPIT2)
00280 #define PFRSQRTM(dst,src,off) InjK3DMOps(dst,src,off,PFRSQRT)
00281 #define PFRSQIT1M(dst,src,off) InjK3DMOps(dst,src,off,PFRSQIT1)
00282 #define PFSUBM(dst,src,off) InjK3DMOps(dst,src,off,PFSUB)
00283 #define PFSUBRM(dst,src,off) InjK3DMOps(dst,src,off,PFSUBR)
00284 #define PI2FDM(dst,src,off) InjK3DMOps(dst,src,off,PI2FD)
00285 #define PMULHRWM(dst,src,off) InjK3DMOps(dst,src,off,PMULHRW)
00286
00287
00288
00289 #define PFNACCM(dst,src,off) InjK3DMOps(dst,src,off,PFNACC)
00290 #define PFPNACCM(dst,src,off) InjK3DMOps(dst,src,off,PFPNACC)
00291 #define PSWAPDM(dst,src,off) InjK3DMOps(dst,src,off,PSWAPD)
00292 #define PMINUBM(dst,src,off) InjMMXMOps(dst,src,off,PMINUB)
00293 #define PMAXUBM(dst,src,off) InjMMXMOps(dst,src,off,PMAXUB)
00294 #define PMINSWM(dst,src,off) InjMMXMOps(dst,src,off,PMINSW)
00295 #define PMAXSWM(dst,src,off) InjMMXMOps(dst,src,off,PMAXSW)
00296 #define PMULHUWM(dst,src,off) InjMMXMOps(dst,src,off,PMULHUW)
00297 #define PAVGBM(dst,src,off) InjMMXMOps(dst,src,off,PAVGB)
00298 #define PAVGWM(dst,src,off) InjMMXMOps(dst,src,off,PAVGW)
00299 #define PSADBWM(dst,src,off) InjMMXMOps(dst,src,off,PSADBW)
00300 #define PMOVMSKBM(dst,src,off) InjMMXMOps(dst,src,off,PMOVMSKB)
00301 #define PMASKMOVQM(dst,src,off) InjMMXMOps(dst,src,off,PMASKMOVQ)
00302 #define PINSRWM(dst,src,off,msk) InjMMXMOps(dst,src,off,PINSRW) __asm _emit msk
00303 #define PSHUFWM(dst,src,off,msk) InjMMXMOps(dst,src,off,PSHUFW) __asm _emit msk
00304 #define MOVNTQM(dst,src,off) InjMMXMOps(src,dst,off,MOVNTQ)
00305 #define PREFETCHNTAM(mem,off) InjMMXMOps(mm0,mem,off,PREFETCHT)
00306 #define PREFETCHT0M(mem,off) InjMMXMOps(mm1,mem,off,PREFETCHT)
00307 #define PREFETCHT1M(mem,off) InjMMXMOps(mm2,mem,off,PREFETCHT)
00308 #define PREFETCHT2M(mem,off) InjMMXMOps(mm3,mem,off,PREFETCHT)
00309
00310
00311
00312 #define pf2id(dst,src) PF2ID(dst,src)
00313 #define pfacc(dst,src) PFACC(dst,src)
00314 #define pfadd(dst,src) PFADD(dst,src)
00315 #define pfcmpeq(dst,src) PFCMPEQ(dst,src)
00316 #define pfcmpge(dst,src) PFCMPGE(dst,src)
00317 #define pfcmpgt(dst,src) PFCMPGT(dst,src)
00318 #define pfmax(dst,src) PFMAX(dst,src)
00319 #define pfmin(dst,src) PFMIN(dst,src)
00320 #define pfmul(dst,src) PFMUL(dst,src)
00321 #define pfrcp(dst,src) PFRCP(dst,src)
00322 #define pfrcpit1(dst,src) PFRCPIT1(dst,src)
00323 #define pfrcpit2(dst,src) PFRCPIT2(dst,src)
00324 #define pfrsqrt(dst,src) PFRSQRT(dst,src)
00325 #define pfrsqit1(dst,src) PFRSQIT1(dst,src)
00326 #define pfsub(dst,src) PFSUB(dst,src)
00327 #define pfsubr(dst,src) PFSUBR(dst,src)
00328 #define pi2fd(dst,src) PI2FD(dst,src)
00329 #define femms FEMMS
00330 #define pavgusb(dst,src) PAVGUSB(dst,src)
00331 #define pmulhrw(dst,src) PMULHRW(dst,src)
00332 #define prefetch(src) PREFETCH(src)
00333 #define prefetchw(src) PREFETCHW(src)
00334
00335 #define prefetchm(src,off) PREFETCHM(src,off)
00336 #define prefetchmlong(src,off) PREFETCHMLONG(src,off)
00337 #define prefetchwm(src,off) PREFETCHWM(src,off)
00338 #define prefetchwmlong(src,off) PREFETCHWMLONG(src,off)
00339
00340 #define pfnacc(dst,src) PFNACC(dst,src)
00341 #define pfpnacc(dst,src) PFPNACC(dst,src)
00342 #define pswapd(dst,src) PSWAPD(dst,src)
00343 #define pminub(dst,src) PMINUB(dst,src)
00344 #define pmaxub(dst,src) PMAXUB(dst,src)
00345 #define pminsw(dst,src) PMINSW(dst,src)
00346 #define pmaxsw(dst,src) PMAXSW(dst,src)
00347 #define pmulhuw(dst,src) PMULHUW(dst,src)
00348 #define pavgb(dst,src) PAVGB(dst,src)
00349 #define pavgw(dst,src) PAVGW(dst,src)
00350 #define psadbw(dst,src) PSADBW(dst,src)
00351 #define pmovmskb(dst,src) PMOVMSKB(dst,src)
00352 #define pmaskmovq(dst,src) PMASKMOVQ(dst,src)
00353 #define pinsrw(dst,src,msk) PINSRW(dst,src,msk)
00354 #define pextrw(dst,src,msk) PEXTRW(dst,src,msk)
00355 #define pshufw(dst,src,msk) PSHUFW(dst,src,msk)
00356 #define movntq(dst,src) MOVNTQ(dst,src)
00357 #define prefetchnta(mem) PREFETCHNTA(mem)
00358 #define prefetcht0(mem) PREFETCHT0(mem)
00359 #define prefetcht1(mem) PREFETCHT1(mem)
00360 #define prefetcht2(mem) PREFETCHT2(mem)
00361
00362
00363 #define pavgusbm(dst,src,off) PAVGUSBM(dst,src,off)
00364 #define pf2idm(dst,src,off) PF2IDM(dst,src,off)
00365 #define pfaccm(dst,src,off) PFACCM(dst,src,off)
00366 #define pfaddm(dst,src,off) PFADDM(dst,src,off)
00367 #define pfcmpeqm(dst,src,off) PFCMPEQM(dst,src,off)
00368 #define pfcmpgem(dst,src,off) PFCMPGEM(dst,src,off)
00369 #define pfcmpgtm(dst,src,off) PFCMPGTM(dst,src,off)
00370 #define pfmaxm(dst,src,off) PFMAXM(dst,src,off)
00371 #define pfminm(dst,src,off) PFMINM(dst,src,off)
00372 #define pfmulm(dst,src,off) PFMULM(dst,src,off)
00373 #define pfrcpm(dst,src,off) PFRCPM(dst,src,off)
00374 #define pfrcpit1m(dst,src,off) PFRCPIT1M(dst,src,off)
00375 #define pfrcpit2m(dst,src,off) PFRCPIT2M(dst,src,off)
00376 #define pfrsqrtm(dst,src,off) PFRSQRTM(dst,src,off)
00377 #define pfrsqit1m(dst,src,off) PFRSQIT1M(dst,src,off)
00378 #define pfsubm(dst,src,off) PFSUBM(dst,src,off)
00379 #define pfsubrm(dst,src,off) PFSUBRM(dst,src,off)
00380 #define pi2fdm(dst,src,off) PI2FDM(dst,src,off)
00381 #define pmulhrwm(dst,src,off) PMULHRWM(dst,src,off)
00382 #define cpuid CPUID
00383 #define sfence SFENCE
00384
00385 #define pfnaccm(dst,src,off) PFNACCM(dst,src,off)
00386 #define pfpnaccm(dst,src,off) PFPNACCM(dst,src,off)
00387 #define pswapdm(dst,src,off) PSWAPDM(dst,src,off)
00388 #define pminubm(dst,src,off) PMINUBM(dst,src,off)
00389 #define pmaxubm(dst,src,off) PMAXUBM(dst,src,off)
00390 #define pminswm(dst,src,off) PMINSWM(dst,src,off)
00391 #define pmaxswm(dst,src,off) PMAXSWM(dst,src,off)
00392 #define pmulhuwm(dst,src,off) PMULHUWM(dst,src,off)
00393 #define pavgbm(dst,src,off) PAVGBM(dst,src,off)
00394 #define pavgwm(dst,src,off) PAVGWM(dst,src,off)
00395 #define psadbwm(dst,src,off) PSADBWM(dst,src,off)
00396 #define pmovmskbm(dst,src,off) PMOVMSKBM(dst,src,off)
00397 #define pmaskmovqm(dst,src,off) PMASKMOVQM(dst,src,off)
00398 #define pinsrwm(dst,src,off,msk) PINSRWM(dst,src,off,msk)
00399 #define pextrwm(dst,src,off,msk) PEXTRWM(dst,src,off,msk)
00400 #define pshufwm(dst,src,off,msk) PSHUFWM(dst,src,off,msk)
00401 #define movntqm(dst,src,off) MOVNTQM(dst,src,off)
00402 #define prefetchntam(mem,off) PREFETCHNTA(mem,off)
00403 #define prefetcht0m(mem,off) PREFETCHT0(mem,off)
00404 #define prefetcht1m(mem,off) PREFETCHT1(mem,off)
00405 #define prefetcht2m(mem,off) PREFETCHT2(mem,off)
00406
00407
00408 #define _xmm0 (0xc0)
00409 #define _xmm1 (0xc1)
00410 #define _xmm2 (0xc2)
00411 #define _xmm3 (0xc3)
00412 #define _xmm4 (0xc4)
00413 #define _xmm5 (0xc5)
00414 #define _xmm6 (0xc6)
00415 #define _xmm7 (0xc7)
00416
00417
00418 #define _mm0 (0xc0)
00419 #define _mm1 (0xc1)
00420 #define _mm2 (0xc2)
00421 #define _mm3 (0xc3)
00422 #define _mm4 (0xc4)
00423 #define _mm5 (0xc5)
00424 #define _mm6 (0xc6)
00425 #define _mm7 (0xc7)
00426
00427
00428 #define _eax_ptr (0)
00429 #define _ebx_ptr (3)
00430 #define _ecx_ptr (1)
00431 #define _edx_ptr (2)
00432 #define _esi_ptr (6)
00433 #define _edi_ptr (7)
00434 #define _ebp_ptr (5)
00435 #define _esp_ptr (4)
00436
00437
00438 #define _eax_reg (0xc0)
00439 #define _ebx_reg (0xc3)
00440 #define _ecx_reg (0xc1)
00441 #define _edx_reg (0xc2)
00442 #define _esi_reg (0xc6)
00443 #define _edi_reg (0xc7)
00444 #define _ebp_reg (0xc5)
00445 #define _esp_reg (0xc4)
00446
00447
00448 #define _eq (0)
00449 #define _lt (1)
00450 #define _le (2)
00451 #define _unordered (3)
00452
00453 #define _ne (4)
00454 #define _neq (4)
00455 #define _ge (5)
00456 #define _nlt (5)
00457 #define _gt (6)
00458 #define _nle (6)
00459 #define _ordered (7)
00460
00461 #define _qnan (3) // true if one of the inputs is a QNAN
00462 #define _num (7) // false if one of the inputs is a QNAN
00463
00464
00465
00466
00467
00468 #define movaps_st(dst, src) \
00469 { \
00470 __asm _emit 0x0f \
00471 __asm _emit 0x29 \
00472 __asm _emit ((src & 0x3f)<<3) | (dst) \
00473 }
00474
00475 #define movaps(dst, src) \
00476 { \
00477 __asm _emit 0x0f \
00478 __asm _emit 0x28 \
00479 __asm _emit ((dst & 0x3f)<<3) | (src) \
00480 }
00481
00482 #define movaps_o(dst, src, off) \
00483 { \
00484 __asm _emit 0x0f \
00485 __asm _emit 0x28 \
00486 __asm _emit ((dst & 0x3f)<<3) | (src) | 0x40 \
00487 __asm _emit off \
00488 }
00489
00490 #define movups_st(dst, src) \
00491 { \
00492 __asm _emit 0x0f \
00493 __asm _emit 0x11 \
00494 __asm _emit ((src & 0x3f)<<3) | (dst) \
00495 }
00496
00497 #define movups_o(dst, src,off) \
00498 { \
00499 __asm _emit 0x0f \
00500 __asm _emit 0x10 \
00501 __asm _emit ((dst & 0x3f)<<3) | (src)| 0x40 \
00502 __asm _emit off \
00503 }
00504
00505 #define movhps_o(dst, src,off) \
00506 { \
00507 __asm _emit 0x0f \
00508 __asm _emit 0x16 \
00509 __asm _emit ((dst & 0x3f)<<3) | (src) | 0x40 \
00510 __asm _emit off \
00511 }
00512
00513 #define movlps_o(dst, src,off) \
00514 { \
00515 __asm _emit 0x0f \
00516 __asm _emit 0x12 \
00517 __asm _emit ((dst & 0x3f)<<3) | (src) | 0x40 \
00518 __asm _emit off \
00519 }
00520
00521 #define movss_o(dst, src,off) \
00522 { \
00523 __asm _emit 0xf3 \
00524 __asm _emit 0x0f \
00525 __asm _emit 0x10 \
00526 __asm _emit ((dst & 0x3f)<<3) | (src)| 0x40 \
00527 __asm _emit off \
00528 }
00529
00530 #define movups_st_o(dst,off,src) \
00531 { \
00532 __asm _emit 0x0f \
00533 __asm _emit 0x11 \
00534 __asm _emit ((src & 0x3f)<<3) | (dst) | 0x40 \
00535 __asm _emit off \
00536 }
00537
00538 #define movups(dst, src) \
00539 { \
00540 __asm _emit 0x0f \
00541 __asm _emit 0x10 \
00542 __asm _emit ((dst & 0x3f)<<3) | (src) \
00543 }
00544
00545 #define movss_st(dst, src) \
00546 { \
00547 __asm _emit 0xf3 \
00548 __asm _emit 0x0f \
00549 __asm _emit 0x11 \
00550 __asm _emit ((src & 0x3f)<<3) | (dst) \
00551 }
00552
00553 #define movss(dst, src) \
00554 { \
00555 __asm _emit 0xf3 \
00556 __asm _emit 0x0f \
00557 __asm _emit 0x10 \
00558 __asm _emit ((dst & 0x3f)<<3) | (src) \
00559 }
00560
00561 #define movhlps(dst, src) \
00562 { \
00563 __asm _emit 0x0f \
00564 __asm _emit 0x12 \
00565 __asm _emit ((dst & 0x3f)<<3) | (src) \
00566 }
00567
00568 #define movlhps(dst, src) \
00569 { \
00570 __asm _emit 0x0f \
00571 __asm _emit 0x16 \
00572 __asm _emit ((dst & 0x3f)<<3) | (src) \
00573 }
00574
00575 #define movmskps(dst, src) \
00576 { \
00577 __asm _emit 0x0f \
00578 __asm _emit 0x50 \
00579 __asm _emit ((dst & 0x3f)<<3) | (src) \
00580 }
00581
00582 #define movntps(dst, src) \
00583 { \
00584 __asm _emit 0x0f \
00585 __asm _emit 0x2b \
00586 __asm _emit ((src & 0x3f)<<3) | (dst) \
00587 }
00588
00589 #define shufps(dst, src, imm) \
00590 { \
00591 __asm _emit 0x0f \
00592 __asm _emit 0xC6 \
00593 __asm _emit ((dst & 0x3f)<<3) | (src) \
00594 __asm _emit imm \
00595 }
00596
00597 #define unpckhps(dst, src) \
00598 { \
00599 __asm _emit 0x0f \
00600 __asm _emit 0x15 \
00601 __asm _emit ((dst & 0x3f)<<3) | (src) \
00602 }
00603
00604 #define unpcklps(dst, src) \
00605 { \
00606 __asm _emit 0x0f \
00607 __asm _emit 0x14 \
00608 __asm _emit ((dst & 0x3f)<<3) | (src) \
00609 }
00610
00611 #define movhps(dst, src) \
00612 { \
00613 __asm _emit 0x0f \
00614 __asm _emit 0x16 \
00615 __asm _emit ((dst & 0x3f)<<3) | (src) \
00616 }
00617
00618 #define movhps_st(dst, src) \
00619 { \
00620 __asm _emit 0x0f \
00621 __asm _emit 0x17 \
00622 __asm _emit ((src & 0x3f)<<3) | (dst) \
00623 }
00624
00625 #define movlps(dst, src) \
00626 { \
00627 __asm _emit 0x0f \
00628 __asm _emit 0x12 \
00629 __asm _emit ((dst & 0x3f)<<3) | (src) \
00630 }
00631
00632 #define movlps_st(dst, src) \
00633 { \
00634 __asm _emit 0x0f \
00635 __asm _emit 0x13 \
00636 __asm _emit ((src & 0x3f)<<3) | (dst) \
00637 }
00638
00639 #define movlps_st_o(dst,off, src) \
00640 { \
00641 __asm _emit 0x0f \
00642 __asm _emit 0x13 \
00643 __asm _emit ((src & 0x3f)<<3) | (dst) | 0x40 \
00644 __asm _emit off \
00645 }
00646
00647
00648
00649
00650
00651 #define addps(dst, src) \
00652 { \
00653 __asm _emit 0x0f \
00654 __asm _emit 0x58 \
00655 __asm _emit ((dst & 0x3f)<<3) | (src) \
00656 }
00657
00658 #define addss(dst, src) \
00659 { \
00660 __asm _emit 0xf3 \
00661 __asm _emit 0x0f \
00662 __asm _emit 0x58 \
00663 __asm _emit ((dst & 0x3f)<<3) | (src) \
00664 }
00665
00666 #define subps(dst, src) \
00667 { \
00668 __asm _emit 0x0f \
00669 __asm _emit 0x5c \
00670 __asm _emit ((dst & 0x3f)<<3) | (src) \
00671 }
00672
00673 #define subss(dst, src) \
00674 { \
00675 __asm _emit 0xf3 \
00676 __asm _emit 0x0f \
00677 __asm _emit 0x5c \
00678 __asm _emit ((dst & 0x3f)<<3) | (src) \
00679 }
00680
00681 #define mulps(dst, src) \
00682 { \
00683 __asm _emit 0x0f \
00684 __asm _emit 0x59 \
00685 __asm _emit ((dst & 0x3f)<<3) | (src) \
00686 }
00687
00688 #define mulss(dst, src) \
00689 { \
00690 __asm _emit 0xf3 \
00691 __asm _emit 0x0f \
00692 __asm _emit 0x59 \
00693 __asm _emit ((dst & 0x3f)<<3) | (src) \
00694 }
00695
00696 #define divps(dst, src) \
00697 { \
00698 __asm _emit 0x0f \
00699 __asm _emit 0x5e \
00700 __asm _emit ((dst & 0x3f)<<3) | (src) \
00701 }
00702
00703 #define divss(dst, src) \
00704 { \
00705 __asm _emit 0xf3 \
00706 __asm _emit 0x0f \
00707 __asm _emit 0x5e \
00708 __asm _emit ((dst & 0x3f)<<3) | (src) \
00709 }
00710
00711 #define sqrtps(dst, src) \
00712 { \
00713 __asm _emit 0x0f \
00714 __asm _emit 0x51 \
00715 __asm _emit ((dst & 0x3f)<<3) | (src) \
00716 }
00717
00718 #define sqrtss(dst, src) \
00719 { \
00720 __asm _emit 0xf3 \
00721 __asm _emit 0x0f \
00722 __asm _emit 0x51 \
00723 __asm _emit ((dst & 0x3f)<<3) | (src) \
00724 }
00725
00726 #define rcpps(dst, src) \
00727 { \
00728 __asm _emit 0x0f \
00729 __asm _emit 0x53 \
00730 __asm _emit ((dst & 0x3f)<<3) | (src) \
00731 }
00732
00733 #define rcpss(dst, src) \
00734 { \
00735 __asm _emit 0xf3 \
00736 __asm _emit 0x0f \
00737 __asm _emit 0x53 \
00738 __asm _emit ((dst & 0x3f)<<3) | (src) \
00739 }
00740
00741 #define rsqrtps(dst, src) \
00742 { \
00743 __asm _emit 0x0f \
00744 __asm _emit 0x52 \
00745 __asm _emit ((dst & 0x3f)<<3) | (src) \
00746 }
00747
00748 #define rsqrtss(dst, src) \
00749 { \
00750 __asm _emit 0xf3 \
00751 __asm _emit 0x0f \
00752 __asm _emit 0x52 \
00753 __asm _emit ((dst & 0x3f)<<3) | (src) \
00754 }
00755
00756 #define maxps(dst, src) \
00757 { \
00758 __asm _emit 0x0f \
00759 __asm _emit 0x5f \
00760 __asm _emit ((dst & 0x3f)<<3) | (src) \
00761 }
00762
00763 #define maxss(dst, src) \
00764 { \
00765 __asm _emit 0xf3 \
00766 __asm _emit 0x0f \
00767 __asm _emit 0x5f \
00768 __asm _emit ((dst & 0x3f)<<3) | (src) \
00769 }
00770
00771 #define minps(dst, src) \
00772 { \
00773 __asm _emit 0x0f \
00774 __asm _emit 0x5d \
00775 __asm _emit ((dst & 0x3f)<<3) | (src) \
00776 }
00777
00778 #define minss(dst, src) \
00779 { \
00780 __asm _emit 0xf3 \
00781 __asm _emit 0x0f \
00782 __asm _emit 0x5d \
00783 __asm _emit ((dst & 0x3f)<<3) | (src) \
00784 }
00785
00786
00787
00788
00789
00790 #define cmpps(dst, src, cond) \
00791 { \
00792 __asm _emit 0x0f \
00793 __asm _emit 0xC2 \
00794 __asm _emit((dst & 0x3f) << 3) | (src) \ __asm _emit cond \}
00795
00796 #define cmpeqps(dst,src) cmpps(dst,src,_eq)
00797 #define cmpltps(dst,src) cmpps(dst,src,_lt)
00798 #define cmpleps(dst,src) cmpps(dst,src,_le)
00799 #define cmpunordps(dst,src) cmpps(dst,src,_unordered)
00800 #define cmpneqps(dst,src) cmpps(dst,src,_neq)
00801 #define cmpneps(dst,src) cmpps(dst,src,_neq)
00802 #define cmpnltps(dst,src) cmpps(dst,src,_nlt)
00803 #define cmpgeps(dst,src) cmpps(dst,src,_nlt)
00804 #define cmpnleps(dst,src) cmpps(dst,src,_nle)
00805 #define cmpgtps(dst,src) cmpps(dst,src,_nle)
00806 #define cmpordps(dst,src) cmpps(dst,src,_ordered)
00807 #define cmpss(dst, src, cond) \
00808 { \
00809 __asm _emit 0xf3 \
00810 __asm _emit 0x0f \
00811 __asm _emit 0xC2 \
00812 __asm _emit ((dst & 0x3f)<<3) | (src) \
00813 __asm _emit cond \
00814 }
00815 #define cmpeqss(dst,src) cmpss(dst,src,_eq)
00816 #define cmpltss(dst,src) cmpss(dst,src,_lt)
00817 #define cmpless(dst,src) cmpss(dst,src,_le)
00818 #define cmpunordss(dst,src) cmpss(dst,src,_unordered)
00819 #define cmpneqss(dst,src) cmpss(dst,src,_neq)
00820 #define cmpness(dst,src) cmpss(dst,src,_neq)
00821 #define cmpnltss(dst,src) cmpss(dst,src,_nlt)
00822 #define cmpgess(dst,src) cmpss(dst,src,_nlt)
00823 #define cmpnless(dst,src) cmpss(dst,src,_nle)
00824 #define cmpgtss(dst,src) cmpss(dst,src,_nle)
00825 #define cmpordss(dst,src) cmpss(dst,src,_ordered)
00826 #define comiss(dst, src) \
00827 { \
00828 __asm _emit 0x0f \
00829 __asm _emit 0x2f \
00830 __asm _emit ((dst & 0x3f)<<3) | (src) \
00831 }
00832 #define ucomiss(dst, src) \
00833 { \
00834 __asm _emit 0x0f \
00835 __asm _emit 0x2e \
00836 __asm _emit ((dst & 0x3f)<<3) | (src) \
00837 }
00838
00839 #define andnps(dst, src) \
00840 { \
00841 __asm _emit 0x0f \
00842 __asm _emit 0x55 \
00843 __asm _emit ((dst & 0x3f)<<3) | (src) \
00844 }
00845 #define andps(dst, src) \
00846 { \
00847 __asm _emit 0x0f \
00848 __asm _emit 0x54 \
00849 __asm _emit ((dst & 0x3f)<<3) | (src) \
00850 }
00851 #define orps(dst, src) \
00852 { \
00853 __asm _emit 0x0f \
00854 __asm _emit 0x56 \
00855 __asm _emit ((dst & 0x3f)<<3) | (src) \
00856 }
00857 #define xorps(dst, src) \
00858 { \
00859 __asm _emit 0x0f \
00860 __asm _emit 0x57 \
00861 __asm _emit ((dst & 0x3f)<<3) | (src) \
00862 }
00863
00864 #define cvtpi2ps(dst, src) \
00865 { \
00866 __asm _emit 0x0f \
00867 __asm _emit 0x2a \
00868 __asm _emit ((dst & 0x3f)<<3) | (src) \
00869 }
00870 #define cvtps2pi(dst, src) \
00871 { \
00872 __asm _emit 0x0f \
00873 __asm _emit 0x2d \
00874 __asm _emit ((dst & 0x3f)<<3) | (src) \
00875 }
00876 #define cvtsi2ss(dst, src) \
00877 { \
00878 __asm _emit 0xf3 \
00879 __asm _emit 0x0f \
00880 __asm _emit 0x2a \
00881 __asm _emit ((dst & 0x3f)<<3) | (src) \
00882 }
00883 #define cvtss2si(dst, src) \
00884 { \
00885 __asm _emit 0xf3 \
00886 __asm _emit 0x0f \
00887 __asm _emit 0x2d \
00888 __asm _emit ((dst & 0x3f)<<3) | (src) \
00889 }
00890 #define cvttps2pi(dst, src) \
00891 { \
00892 __asm _emit 0x0f \
00893 __asm _emit 0x2c \
00894 __asm _emit ((dst & 0x3f)<<3) | (src) \
00895 }
00896 #define cvttss2si(dst, src) \
00897 { \
00898 __asm _emit 0xf3 \
00899 __asm _emit 0x0f \
00900 __asm _emit 0x2c \
00901 __asm _emit ((dst & 0x3f)<<3) | (src) \
00902 }
00903
00904
00905
00906 #define maskmovq(dst, src) \
00907