From Mike Frysinger and/or Gentoo:
authorRyan C. Gordon <icculus@icculus.org>
Thu, 05 Jan 2006 15:25:19 +0000
changeset 123088c2d6aed428
parent 1229 1430f5fe092a
child 1231 cf59e7b91ed4
From Mike Frysinger and/or Gentoo:

- libsdl-PIC-load-mmx-masks-from-stack.patch
this one may be a little controversial ... the fix here is again that you cant
reference the memory addresses like this to load into a mmx register, so the
way to do it is to push two 32bit words onto the stack, load the 64bit value
off of the stack into the mmx register, and then adjust the stack so that
it's back to normal.
src/hermes/mmxp2_32.asm
     1.1 --- a/src/hermes/mmxp2_32.asm	Thu Jan 05 08:17:35 2006 +0000
     1.2 +++ b/src/hermes/mmxp2_32.asm	Thu Jan 05 15:25:19 2006 +0000
     1.3 @@ -29,31 +29,43 @@
     1.4  
     1.5  EXTERN _mmxreturn
     1.6   
     1.7 -SECTION .data
     1.8 -	
     1.9 -ALIGN 8
    1.10 +;; Macros for conversion routines
    1.11  
    1.12 -;; Constants for conversion routines
    1.13 +%macro _push_immq_mask 1
    1.14 +	push dword %1
    1.15 +	push dword %1
    1.16 +%endmacro
    1.17  
    1.18 -mmx32_rgb888_mask dd 00ffffffh,00ffffffh
    1.19 +%macro load_immq 2
    1.20 +	_push_immq_mask %2
    1.21 +	movq %1, [esp]
    1.22 +%endmacro
    1.23  
    1.24 -mmx32_rgb565_b dd 000000f8h, 000000f8h
    1.25 -mmx32_rgb565_g dd 0000fc00h, 0000fc00h
    1.26 -mmx32_rgb565_r dd 00f80000h, 00f80000h
    1.27 +%macro pand_immq 2
    1.28 +	_push_immq_mask %2
    1.29 +	pand %1, [esp]
    1.30 +%endmacro
    1.31  
    1.32 -mmx32_rgb555_rb dd 00f800f8h,00f800f8h
    1.33 -mmx32_rgb555_g dd 0000f800h,0000f800h
    1.34 -mmx32_rgb555_mul dd 20000008h,20000008h
    1.35 -mmx32_bgr555_mul dd 00082000h,00082000h
    1.36 +%define CLEANUP_IMMQ_LOADS(num) \
    1.37 +	add esp, byte 8 * num
    1.38  
    1.39 +%define mmx32_rgb888_mask 00ffffffh
    1.40 +%define mmx32_rgb565_b 000000f8h
    1.41 +%define mmx32_rgb565_g 0000fc00h
    1.42 +%define mmx32_rgb565_r 00f80000h
    1.43  
    1.44 -			
    1.45 +%define mmx32_rgb555_rb 00f800f8h
    1.46 +%define mmx32_rgb555_g 0000f800h
    1.47 +%define mmx32_rgb555_mul 20000008h
    1.48 +%define mmx32_bgr555_mul 00082000h
    1.49 +
    1.50  SECTION .text
    1.51  
    1.52  _ConvertMMXpII32_24RGB888:
    1.53  
    1.54          ; set up mm6 as the mask, mm7 as zero
    1.55 -        movq mm6, qword [mmx32_rgb888_mask]
    1.56 +        load_immq mm6, mmx32_rgb888_mask
    1.57 +        CLEANUP_IMMQ_LOADS(1)
    1.58          pxor mm7, mm7
    1.59  
    1.60          mov edx, ecx                    ; save ecx
    1.61 @@ -115,9 +127,10 @@
    1.62  _ConvertMMXpII32_16RGB565:
    1.63  
    1.64          ; set up masks
    1.65 -        movq mm5, [mmx32_rgb565_b]
    1.66 -        movq mm6, [mmx32_rgb565_g]
    1.67 -        movq mm7, [mmx32_rgb565_r]
    1.68 +        load_immq mm5, mmx32_rgb565_b
    1.69 +        load_immq mm6, mmx32_rgb565_g
    1.70 +        load_immq mm7, mmx32_rgb565_r
    1.71 +        CLEANUP_IMMQ_LOADS(3)
    1.72  
    1.73          mov edx, ecx
    1.74          shr ecx, 2
    1.75 @@ -181,9 +194,10 @@
    1.76  	
    1.77  _ConvertMMXpII32_16BGR565:
    1.78  
    1.79 -        movq mm5, [mmx32_rgb565_r]
    1.80 -        movq mm6, [mmx32_rgb565_g]
    1.81 -        movq mm7, [mmx32_rgb565_b]
    1.82 +        load_immq mm5, mmx32_rgb565_r
    1.83 +        load_immq mm6, mmx32_rgb565_g
    1.84 +        load_immq mm7, mmx32_rgb565_b
    1.85 +        CLEANUP_IMMQ_LOADS(3)
    1.86  
    1.87          mov edx, ecx
    1.88          shr ecx, 2
    1.89 @@ -253,7 +267,7 @@
    1.90          ; except it uses a different multiplier for the pmaddwd
    1.91          ; instruction.  cool huh.
    1.92  
    1.93 -        movq mm7, qword [mmx32_bgr555_mul]
    1.94 +        load_immq mm7, mmx32_bgr555_mul
    1.95          jmp _convert_bgr555_cheat
    1.96  
    1.97  ; This is the same as the Intel version.. they obviously went to
    1.98 @@ -263,9 +277,10 @@
    1.99  ; (I think) a more accurate name..
   1.100  _ConvertMMXpII32_16RGB555:
   1.101  
   1.102 -        movq mm7,qword [mmx32_rgb555_mul]
   1.103 +	load_immq mm7, mmx32_rgb555_mul
   1.104  _convert_bgr555_cheat:
   1.105 -        movq mm6,qword [mmx32_rgb555_g]
   1.106 +	load_immq mm6, mmx32_rgb555_g
   1.107 +	CLEANUP_IMMQ_LOADS(2)
   1.108          
   1.109  	mov edx,ecx		           ; Save ecx 
   1.110  
   1.111 @@ -280,12 +295,14 @@
   1.112  	movq mm0,[esi]
   1.113  	movq mm3,mm2
   1.114  
   1.115 -	pand mm3,qword [mmx32_rgb555_rb]
   1.116 +	pand_immq mm3, mmx32_rgb555_rb
   1.117  	movq mm1,mm0
   1.118  
   1.119 -	pand mm1,qword [mmx32_rgb555_rb]
   1.120 +	pand_immq mm1, mmx32_rgb555_rb
   1.121  	pmaddwd mm3,mm7
   1.122  
   1.123 +	CLEANUP_IMMQ_LOADS(2)
   1.124 +
   1.125  	pmaddwd mm1,mm7
   1.126  	pand mm2,mm6
   1.127  
   1.128 @@ -302,13 +319,13 @@
   1.129  	movq mm0,mm4
   1.130  	psrld mm1,6
   1.131  
   1.132 -	pand mm0,qword [mmx32_rgb555_rb]
   1.133 +	pand_immq mm0, mmx32_rgb555_rb
   1.134  	packssdw mm1,mm3
   1.135  
   1.136  	movq mm3,mm5
   1.137  	pmaddwd mm0,mm7
   1.138  
   1.139 -	pand mm3,qword [mmx32_rgb555_rb]
   1.140 +	pand_immq mm3, mmx32_rgb555_rb
   1.141  	pand mm4,mm6
   1.142  
   1.143  	movq [edi],mm1			
   1.144 @@ -329,12 +346,14 @@
   1.145  	movq mm3,mm2
   1.146  	movq mm1,mm0
   1.147  
   1.148 -	pand mm3,qword [mmx32_rgb555_rb]
   1.149 +	pand_immq mm3, mmx32_rgb555_rb
   1.150  	packssdw mm5,mm4
   1.151  
   1.152 -	pand mm1,qword [mmx32_rgb555_rb]
   1.153 +	pand_immq mm1, mmx32_rgb555_rb
   1.154  	pand mm2,mm6
   1.155  
   1.156 +	CLEANUP_IMMQ_LOADS(4)
   1.157 +
   1.158  	movq [edi+8],mm5
   1.159  	pmaddwd mm3,mm7
   1.160