src/hermes/x86p_16.asm
author Sam Lantinga <slouken@libsdl.org>
Sun, 21 Sep 2003 18:32:04 +0000
changeset 720 f90d80d68071
parent 0 74212992fb08
child 1166 da33b7e6d181
permissions -rw-r--r--
N Sep 17 8791 Sam Lantinga Re: tks source released
Date: Sun, 07 Sep 2003 02:51:58 +0200
From: Stephane Marchesin
Subject: [SDL] Two little patches

Compiling SDL with a recent gcc (gcc 3.3.1, 3.3 doesn't have this
behaviour) gives some nasty warnings :

SDL_blit_A.c: In function `BlitRGBtoRGBSurfaceAlpha128MMX':
SDL_blit_A.c:223: warning: integer constant is too large for "long" type
SDL_blit_A.c:225: warning: integer constant is too large for "long" type
SDL_blit_A.c:227: warning: integer constant is too large for "long" type
[...]

The first attached patch (longlongfix.patch) tells gcc to really treat
those constants as unsigned long long and not long.

The second patch (nasinclude.patch) fixes an include problem I had while
compiling nas audio : when the <audio/audiolib.h> file lies in
/usr/X11R6/include, a -I/usr/X11R6/include option is needed or the file
isn't found.
     1 ;
     2 ; x86 format converters for HERMES
     3 ; Copyright (c) 1998 Glenn Fielder (gaffer@gaffer.org)
     4 ; This source code is licensed under the GNU LGPL
     5 ; 
     6 ; Please refer to the file COPYING.LIB contained in the distribution for
     7 ; licensing conditions		
     8 ; 
     9 ; Routines adjusted for Hermes by Christian Nentwich (brn@eleet.mcb.at)
    10 ; Used with permission.
    11 ; 
    12 
    13 	
    14 BITS 32
    15 
    16 GLOBAL _ConvertX86p16_32RGB888
    17 GLOBAL _ConvertX86p16_32BGR888
    18 GLOBAL _ConvertX86p16_32RGBA888
    19 GLOBAL _ConvertX86p16_32BGRA888
    20 GLOBAL _ConvertX86p16_24RGB888
    21 GLOBAL _ConvertX86p16_24BGR888
    22 GLOBAL _ConvertX86p16_16BGR565
    23 GLOBAL _ConvertX86p16_16RGB555
    24 GLOBAL _ConvertX86p16_16BGR555
    25 GLOBAL _ConvertX86p16_8RGB332
    26 
    27 EXTERN _ConvertX86
    28 EXTERN _x86return
    29 
    30 
    31 SECTION .text
    32 
    33 
    34 
    35 _ConvertX86p16_16BGR565:
    36 
    37     ; check short
    38     cmp ecx,BYTE 16
    39     ja .L3
    40 
    41 
    42 .L1 ; short loop
    43     mov al,[esi]
    44     mov ah,[esi+1]
    45     mov ebx,eax
    46     mov edx,eax
    47     shr eax,11
    48     and eax,BYTE 11111b
    49     and ebx,11111100000b
    50     shl edx,11
    51     add eax,ebx
    52     add eax,edx
    53     mov [edi],al
    54     mov [edi+1],ah
    55     add esi,BYTE 2
    56     add edi,BYTE 2
    57     dec ecx
    58     jnz .L1
    59 .L2
    60     jmp _x86return
    61 
    62 .L3 ; head
    63     mov eax,edi
    64     and eax,BYTE 11b
    65     jz .L4
    66     mov al,[esi]
    67     mov ah,[esi+1]
    68     mov ebx,eax
    69     mov edx,eax
    70     shr eax,11
    71     and eax,BYTE 11111b
    72     and ebx,11111100000b
    73     shl edx,11
    74     add eax,ebx
    75     add eax,edx
    76     mov [edi],al
    77     mov [edi+1],ah
    78     add esi,BYTE 2
    79     add edi,BYTE 2
    80     dec ecx
    81 
    82 .L4 ; save count
    83     push ecx
    84 
    85     ; unroll twice
    86     shr ecx,1
    87     
    88     ; point arrays to end
    89     lea esi,[esi+ecx*4]
    90     lea edi,[edi+ecx*4]
    91 
    92     ; negative counter 
    93     neg ecx
    94     jmp SHORT .L6
    95                               
    96 .L5     mov [edi+ecx*4-4],eax
    97 .L6     mov eax,[esi+ecx*4]
    98 
    99         mov ebx,[esi+ecx*4]
   100         and eax,07E007E0h         
   101 
   102         mov edx,[esi+ecx*4]
   103         and ebx,0F800F800h
   104 
   105         shr ebx,11
   106         and edx,001F001Fh
   107 
   108         shl edx,11
   109         add eax,ebx
   110 
   111         add eax,edx                 
   112         inc ecx
   113 
   114         jnz .L5                 
   115          
   116     mov [edi+ecx*4-4],eax
   117 
   118     ; tail
   119     pop ecx
   120     and ecx,BYTE 1
   121     jz .L7
   122     mov al,[esi]
   123     mov ah,[esi+1]
   124     mov ebx,eax
   125     mov edx,eax
   126     shr eax,11
   127     and eax,BYTE 11111b
   128     and ebx,11111100000b
   129     shl edx,11
   130     add eax,ebx
   131     add eax,edx
   132     mov [edi],al
   133     mov [edi+1],ah
   134     add esi,BYTE 2
   135     add edi,BYTE 2
   136 
   137 .L7
   138     jmp _x86return
   139 
   140 
   141 
   142 
   143 
   144 
   145 _ConvertX86p16_16RGB555:
   146 
   147     ; check short
   148     cmp ecx,BYTE 32
   149     ja .L3
   150 
   151 
   152 .L1 ; short loop
   153     mov al,[esi]
   154     mov ah,[esi+1]
   155     mov ebx,eax
   156     shr ebx,1
   157     and ebx,     0111111111100000b
   158     and eax,BYTE 0000000000011111b
   159     add eax,ebx
   160     mov [edi],al
   161     mov [edi+1],ah
   162     add esi,BYTE 2
   163     add edi,BYTE 2
   164     dec ecx
   165     jnz .L1
   166 .L2
   167     jmp _x86return
   168 
   169 .L3 ; head
   170     mov eax,edi
   171     and eax,BYTE 11b
   172     jz .L4
   173     mov al,[esi]
   174     mov ah,[esi+1]
   175     mov ebx,eax
   176     shr ebx,1
   177     and ebx,     0111111111100000b
   178     and eax,BYTE 0000000000011111b
   179     add eax,ebx
   180     mov [edi],al
   181     mov [edi+1],ah
   182     add esi,BYTE 2
   183     add edi,BYTE 2
   184     dec ecx
   185 
   186 .L4 ; save ebp
   187     push ebp
   188 
   189     ; save count
   190     push ecx
   191 
   192     ; unroll four times
   193     shr ecx,2
   194     
   195     ; point arrays to end
   196     lea esi,[esi+ecx*8]
   197     lea edi,[edi+ecx*8]
   198 
   199     ; negative counter 
   200     xor ebp,ebp
   201     sub ebp,ecx
   202 
   203 .L5     mov eax,[esi+ebp*8]        ; agi?
   204         mov ecx,[esi+ebp*8+4]
   205        
   206         mov ebx,eax
   207         mov edx,ecx
   208 
   209         and eax,0FFC0FFC0h
   210         and ecx,0FFC0FFC0h
   211 
   212         shr eax,1
   213         and ebx,001F001Fh
   214 
   215         shr ecx,1
   216         and edx,001F001Fh
   217 
   218         add eax,ebx
   219         add ecx,edx
   220 
   221         mov [edi+ebp*8],eax
   222         mov [edi+ebp*8+4],ecx
   223 
   224         inc ebp
   225         jnz .L5                 
   226 
   227     ; tail
   228     pop ecx
   229 .L6 and ecx,BYTE 11b
   230     jz .L7
   231     mov al,[esi]
   232     mov ah,[esi+1]
   233     mov ebx,eax
   234     shr ebx,1
   235     and ebx,     0111111111100000b
   236     and eax,BYTE 0000000000011111b
   237     add eax,ebx
   238     mov [edi],al
   239     mov [edi+1],ah
   240     add esi,BYTE 2
   241     add edi,BYTE 2
   242     dec ecx
   243     jmp SHORT .L6
   244 
   245 .L7 pop ebp
   246     jmp _x86return
   247 
   248 
   249 
   250 
   251 
   252 
   253 _ConvertX86p16_16BGR555:
   254 
   255     ; check short
   256     cmp ecx,BYTE 16
   257     ja .L3
   258 
   259 	
   260 .L1 ; short loop
   261     mov al,[esi]
   262     mov ah,[esi+1]
   263     mov ebx,eax
   264     mov edx,eax
   265     shr eax,11
   266     and eax,BYTE 11111b
   267     shr ebx,1
   268     and ebx,1111100000b
   269     shl edx,10
   270     and edx,0111110000000000b
   271     add eax,ebx
   272     add eax,edx
   273     mov [edi],al
   274     mov [edi+1],ah
   275     add esi,BYTE 2
   276     add edi,BYTE 2
   277     dec ecx
   278     jnz .L1
   279 .L2
   280     jmp _x86return
   281 
   282 .L3 ; head
   283     mov eax,edi
   284     and eax,BYTE 11b
   285     jz .L4
   286     mov al,[esi]
   287     mov ah,[esi+1]
   288     mov ebx,eax
   289     mov edx,eax
   290     shr eax,11
   291     and eax,BYTE 11111b
   292     shr ebx,1
   293     and ebx,1111100000b
   294     shl edx,10
   295     and edx,0111110000000000b
   296     add eax,ebx
   297     add eax,edx
   298     mov [edi],al
   299     mov [edi+1],ah
   300     add esi,BYTE 2
   301     add edi,BYTE 2
   302     dec ecx
   303 
   304 .L4 ; save count
   305     push ecx
   306 
   307     ; unroll twice
   308     shr ecx,1
   309     
   310     ; point arrays to end
   311     lea esi,[esi+ecx*4]
   312     lea edi,[edi+ecx*4]
   313 
   314     ; negative counter 
   315     neg ecx
   316     jmp SHORT .L6
   317                               
   318 .L5     mov [edi+ecx*4-4],eax
   319 .L6     mov eax,[esi+ecx*4]
   320 
   321         shr eax,1
   322         mov ebx,[esi+ecx*4]
   323         
   324         and eax,03E003E0h         
   325         mov edx,[esi+ecx*4]
   326 
   327         and ebx,0F800F800h
   328 
   329         shr ebx,11
   330         and edx,001F001Fh
   331 
   332         shl edx,10
   333         add eax,ebx
   334 
   335         add eax,edx                 
   336         inc ecx
   337 
   338         jnz .L5                 
   339          
   340     mov [edi+ecx*4-4],eax
   341 
   342     ; tail
   343     pop ecx
   344     and ecx,BYTE 1
   345     jz .L7
   346     mov al,[esi]
   347     mov ah,[esi+1]
   348     mov ebx,eax
   349     mov edx,eax
   350     shr eax,11
   351     and eax,BYTE 11111b
   352     shr ebx,1
   353     and ebx,1111100000b
   354     shl edx,10
   355     and edx,0111110000000000b
   356     add eax,ebx
   357     add eax,edx
   358     mov [edi],al
   359     mov [edi+1],ah
   360     add esi,BYTE 2
   361     add edi,BYTE 2
   362 
   363 .L7
   364     jmp _x86return
   365 
   366 
   367 
   368 
   369 
   370 
   371 _ConvertX86p16_8RGB332:
   372 
   373     ; check short
   374     cmp ecx,BYTE 16
   375     ja .L3
   376 
   377 
   378 .L1 ; short loop
   379     mov al,[esi+0]
   380     mov ah,[esi+1]
   381     mov ebx,eax
   382     mov edx,eax
   383     and eax,BYTE 11000b         ; blue
   384     shr eax,3
   385     and ebx,11100000000b        ; green
   386     shr ebx,6
   387     and edx,1110000000000000b   ; red
   388     shr edx,8
   389     add eax,ebx
   390     add eax,edx
   391     mov [edi],al
   392     add esi,BYTE 2
   393     inc edi
   394     dec ecx
   395     jnz .L1
   396 .L2
   397     jmp _x86return
   398 
   399 .L3 mov eax,edi
   400     and eax,BYTE 11b
   401     jz .L4
   402     mov al,[esi+0]
   403     mov ah,[esi+1]
   404     mov ebx,eax
   405     mov edx,eax
   406     and eax,BYTE 11000b         ; blue
   407     shr eax,3
   408     and ebx,11100000000b        ; green
   409     shr ebx,6
   410     and edx,1110000000000000b   ; red
   411     shr edx,8
   412     add eax,ebx
   413     add eax,edx
   414     mov [edi],al
   415     add esi,BYTE 2
   416     inc edi
   417     dec ecx
   418     jmp SHORT .L3
   419 
   420 .L4 ; save ebp
   421     push ebp
   422 
   423     ; save count
   424     push ecx
   425 
   426     ; unroll 4 times
   427     shr ecx,2
   428 
   429     ; prestep
   430     mov dl,[esi+0]
   431     mov bl,[esi+1]
   432     mov dh,[esi+2]
   433         
   434 .L5     shl edx,16
   435         mov bh,[esi+3]
   436         
   437         shl ebx,16
   438         mov dl,[esi+4]
   439 
   440         mov dh,[esi+6]
   441         mov bl,[esi+5]
   442 
   443         and edx,00011000000110000001100000011000b
   444         mov bh,[esi+7]
   445 
   446         ror edx,16+3
   447         mov eax,ebx                                     ; setup eax for reds
   448 
   449         and ebx,00000111000001110000011100000111b
   450         and eax,11100000111000001110000011100000b       ; reds
   451 
   452         ror ebx,16-2
   453         add esi,BYTE 8
   454 
   455         ror eax,16
   456         add edi,BYTE 4
   457 
   458         add eax,ebx
   459         mov bl,[esi+1]                                  ; greens
   460 
   461         add eax,edx
   462         mov dl,[esi+0]                                  ; blues
   463 
   464         mov [edi-4],eax
   465         mov dh,[esi+2]
   466 
   467         dec ecx
   468         jnz .L5                 
   469     
   470     ; check tail
   471     pop ecx
   472     and ecx,BYTE 11b
   473     jz .L7
   474 
   475 .L6 ; tail
   476     mov al,[esi+0]
   477     mov ah,[esi+1]
   478     mov ebx,eax
   479     mov edx,eax
   480     and eax,BYTE 11000b         ; blue
   481     shr eax,3
   482     and ebx,11100000000b        ; green
   483     shr ebx,6
   484     and edx,1110000000000000b   ; red
   485     shr edx,8
   486     add eax,ebx
   487     add eax,edx
   488     mov [edi],al
   489     add esi,BYTE 2
   490     inc edi
   491     dec ecx
   492     jnz .L6
   493 
   494 .L7 pop ebp
   495     jmp _x86return
   496