src/hermes/x86p_16.asm
author Sam Lantinga
Mon, 06 Feb 2006 08:28:51 +0000
changeset 1330 450721ad5436
parent 1199 2d6dc7de1145
child 1697 393092a3ebf6
permissions -rw-r--r--
It's now possible to build SDL without any C runtime at all on Windows,
using Visual C++ 2005
     1 ;
     2 ; x86 format converters for HERMES
     3 ; Copyright (c) 1998 Glenn Fielder (gaffer@gaffer.org)
     4 ; This source code is licensed under the GNU LGPL
     5 ; 
     6 ; Please refer to the file COPYING.LIB contained in the distribution for
     7 ; licensing conditions		
     8 ; 
     9 ; Routines adjusted for Hermes by Christian Nentwich (brn@eleet.mcb.at)
    10 ; Used with permission.
    11 ; 
    12 
    13 	
    14 BITS 32
    15 
    16 GLOBAL _ConvertX86p16_32RGB888
    17 GLOBAL _ConvertX86p16_32BGR888
    18 GLOBAL _ConvertX86p16_32RGBA888
    19 GLOBAL _ConvertX86p16_32BGRA888
    20 GLOBAL _ConvertX86p16_24RGB888
    21 GLOBAL _ConvertX86p16_24BGR888
    22 GLOBAL _ConvertX86p16_16BGR565
    23 GLOBAL _ConvertX86p16_16RGB555
    24 GLOBAL _ConvertX86p16_16BGR555
    25 GLOBAL _ConvertX86p16_8RGB332
    26 
    27 EXTERN _ConvertX86
    28 EXTERN _x86return
    29 
    30 SECTION .text
    31 
    32 _ConvertX86p16_16BGR565:
    33 
    34     ; check short
    35     cmp ecx,BYTE 16
    36     ja .L3
    37 
    38 
    39 .L1 ; short loop
    40     mov al,[esi]
    41     mov ah,[esi+1]
    42     mov ebx,eax
    43     mov edx,eax
    44     shr eax,11
    45     and eax,BYTE 11111b
    46     and ebx,11111100000b
    47     shl edx,11
    48     add eax,ebx
    49     add eax,edx
    50     mov [edi],al
    51     mov [edi+1],ah
    52     add esi,BYTE 2
    53     add edi,BYTE 2
    54     dec ecx
    55     jnz .L1
    56 .L2
    57     jmp _x86return
    58 
    59 .L3 ; head
    60     mov eax,edi
    61     and eax,BYTE 11b
    62     jz .L4
    63     mov al,[esi]
    64     mov ah,[esi+1]
    65     mov ebx,eax
    66     mov edx,eax
    67     shr eax,11
    68     and eax,BYTE 11111b
    69     and ebx,11111100000b
    70     shl edx,11
    71     add eax,ebx
    72     add eax,edx
    73     mov [edi],al
    74     mov [edi+1],ah
    75     add esi,BYTE 2
    76     add edi,BYTE 2
    77     dec ecx
    78 
    79 .L4 ; save count
    80     push ecx
    81 
    82     ; unroll twice
    83     shr ecx,1
    84     
    85     ; point arrays to end
    86     lea esi,[esi+ecx*4]
    87     lea edi,[edi+ecx*4]
    88 
    89     ; negative counter 
    90     neg ecx
    91     jmp SHORT .L6
    92                               
    93 .L5     mov [edi+ecx*4-4],eax
    94 .L6     mov eax,[esi+ecx*4]
    95 
    96         mov ebx,[esi+ecx*4]
    97         and eax,07E007E0h         
    98 
    99         mov edx,[esi+ecx*4]
   100         and ebx,0F800F800h
   101 
   102         shr ebx,11
   103         and edx,001F001Fh
   104 
   105         shl edx,11
   106         add eax,ebx
   107 
   108         add eax,edx                 
   109         inc ecx
   110 
   111         jnz .L5                 
   112          
   113     mov [edi+ecx*4-4],eax
   114 
   115     ; tail
   116     pop ecx
   117     and ecx,BYTE 1
   118     jz .L7
   119     mov al,[esi]
   120     mov ah,[esi+1]
   121     mov ebx,eax
   122     mov edx,eax
   123     shr eax,11
   124     and eax,BYTE 11111b
   125     and ebx,11111100000b
   126     shl edx,11
   127     add eax,ebx
   128     add eax,edx
   129     mov [edi],al
   130     mov [edi+1],ah
   131     add esi,BYTE 2
   132     add edi,BYTE 2
   133 
   134 .L7
   135     jmp _x86return
   136 
   137 
   138 
   139 
   140 
   141 
   142 _ConvertX86p16_16RGB555:
   143 
   144     ; check short
   145     cmp ecx,BYTE 32
   146     ja .L3
   147 
   148 
   149 .L1 ; short loop
   150     mov al,[esi]
   151     mov ah,[esi+1]
   152     mov ebx,eax
   153     shr ebx,1
   154     and ebx,     0111111111100000b
   155     and eax,BYTE 0000000000011111b
   156     add eax,ebx
   157     mov [edi],al
   158     mov [edi+1],ah
   159     add esi,BYTE 2
   160     add edi,BYTE 2
   161     dec ecx
   162     jnz .L1
   163 .L2
   164     jmp _x86return
   165 
   166 .L3 ; head
   167     mov eax,edi
   168     and eax,BYTE 11b
   169     jz .L4
   170     mov al,[esi]
   171     mov ah,[esi+1]
   172     mov ebx,eax
   173     shr ebx,1
   174     and ebx,     0111111111100000b
   175     and eax,BYTE 0000000000011111b
   176     add eax,ebx
   177     mov [edi],al
   178     mov [edi+1],ah
   179     add esi,BYTE 2
   180     add edi,BYTE 2
   181     dec ecx
   182 
   183 .L4 ; save ebp
   184     push ebp
   185 
   186     ; save count
   187     push ecx
   188 
   189     ; unroll four times
   190     shr ecx,2
   191     
   192     ; point arrays to end
   193     lea esi,[esi+ecx*8]
   194     lea edi,[edi+ecx*8]
   195 
   196     ; negative counter 
   197     xor ebp,ebp
   198     sub ebp,ecx
   199 
   200 .L5     mov eax,[esi+ebp*8]        ; agi?
   201         mov ecx,[esi+ebp*8+4]
   202        
   203         mov ebx,eax
   204         mov edx,ecx
   205 
   206         and eax,0FFC0FFC0h
   207         and ecx,0FFC0FFC0h
   208 
   209         shr eax,1
   210         and ebx,001F001Fh
   211 
   212         shr ecx,1
   213         and edx,001F001Fh
   214 
   215         add eax,ebx
   216         add ecx,edx
   217 
   218         mov [edi+ebp*8],eax
   219         mov [edi+ebp*8+4],ecx
   220 
   221         inc ebp
   222         jnz .L5                 
   223 
   224     ; tail
   225     pop ecx
   226 .L6 and ecx,BYTE 11b
   227     jz .L7
   228     mov al,[esi]
   229     mov ah,[esi+1]
   230     mov ebx,eax
   231     shr ebx,1
   232     and ebx,     0111111111100000b
   233     and eax,BYTE 0000000000011111b
   234     add eax,ebx
   235     mov [edi],al
   236     mov [edi+1],ah
   237     add esi,BYTE 2
   238     add edi,BYTE 2
   239     dec ecx
   240     jmp SHORT .L6
   241 
   242 .L7 pop ebp
   243     jmp _x86return
   244 
   245 
   246 
   247 
   248 
   249 
   250 _ConvertX86p16_16BGR555:
   251 
   252     ; check short
   253     cmp ecx,BYTE 16
   254     ja .L3
   255 
   256 	
   257 .L1 ; short loop
   258     mov al,[esi]
   259     mov ah,[esi+1]
   260     mov ebx,eax
   261     mov edx,eax
   262     shr eax,11
   263     and eax,BYTE 11111b
   264     shr ebx,1
   265     and ebx,1111100000b
   266     shl edx,10
   267     and edx,0111110000000000b
   268     add eax,ebx
   269     add eax,edx
   270     mov [edi],al
   271     mov [edi+1],ah
   272     add esi,BYTE 2
   273     add edi,BYTE 2
   274     dec ecx
   275     jnz .L1
   276 .L2
   277     jmp _x86return
   278 
   279 .L3 ; head
   280     mov eax,edi
   281     and eax,BYTE 11b
   282     jz .L4
   283     mov al,[esi]
   284     mov ah,[esi+1]
   285     mov ebx,eax
   286     mov edx,eax
   287     shr eax,11
   288     and eax,BYTE 11111b
   289     shr ebx,1
   290     and ebx,1111100000b
   291     shl edx,10
   292     and edx,0111110000000000b
   293     add eax,ebx
   294     add eax,edx
   295     mov [edi],al
   296     mov [edi+1],ah
   297     add esi,BYTE 2
   298     add edi,BYTE 2
   299     dec ecx
   300 
   301 .L4 ; save count
   302     push ecx
   303 
   304     ; unroll twice
   305     shr ecx,1
   306     
   307     ; point arrays to end
   308     lea esi,[esi+ecx*4]
   309     lea edi,[edi+ecx*4]
   310 
   311     ; negative counter 
   312     neg ecx
   313     jmp SHORT .L6
   314                               
   315 .L5     mov [edi+ecx*4-4],eax
   316 .L6     mov eax,[esi+ecx*4]
   317 
   318         shr eax,1
   319         mov ebx,[esi+ecx*4]
   320         
   321         and eax,03E003E0h         
   322         mov edx,[esi+ecx*4]
   323 
   324         and ebx,0F800F800h
   325 
   326         shr ebx,11
   327         and edx,001F001Fh
   328 
   329         shl edx,10
   330         add eax,ebx
   331 
   332         add eax,edx                 
   333         inc ecx
   334 
   335         jnz .L5                 
   336          
   337     mov [edi+ecx*4-4],eax
   338 
   339     ; tail
   340     pop ecx
   341     and ecx,BYTE 1
   342     jz .L7
   343     mov al,[esi]
   344     mov ah,[esi+1]
   345     mov ebx,eax
   346     mov edx,eax
   347     shr eax,11
   348     and eax,BYTE 11111b
   349     shr ebx,1
   350     and ebx,1111100000b
   351     shl edx,10
   352     and edx,0111110000000000b
   353     add eax,ebx
   354     add eax,edx
   355     mov [edi],al
   356     mov [edi+1],ah
   357     add esi,BYTE 2
   358     add edi,BYTE 2
   359 
   360 .L7
   361     jmp _x86return
   362 
   363 
   364 
   365 
   366 
   367 
   368 _ConvertX86p16_8RGB332:
   369 
   370     ; check short
   371     cmp ecx,BYTE 16
   372     ja .L3
   373 
   374 
   375 .L1 ; short loop
   376     mov al,[esi+0]
   377     mov ah,[esi+1]
   378     mov ebx,eax
   379     mov edx,eax
   380     and eax,BYTE 11000b         ; blue
   381     shr eax,3
   382     and ebx,11100000000b        ; green
   383     shr ebx,6
   384     and edx,1110000000000000b   ; red
   385     shr edx,8
   386     add eax,ebx
   387     add eax,edx
   388     mov [edi],al
   389     add esi,BYTE 2
   390     inc edi
   391     dec ecx
   392     jnz .L1
   393 .L2
   394     jmp _x86return
   395 
   396 .L3 mov eax,edi
   397     and eax,BYTE 11b
   398     jz .L4
   399     mov al,[esi+0]
   400     mov ah,[esi+1]
   401     mov ebx,eax
   402     mov edx,eax
   403     and eax,BYTE 11000b         ; blue
   404     shr eax,3
   405     and ebx,11100000000b        ; green
   406     shr ebx,6
   407     and edx,1110000000000000b   ; red
   408     shr edx,8
   409     add eax,ebx
   410     add eax,edx
   411     mov [edi],al
   412     add esi,BYTE 2
   413     inc edi
   414     dec ecx
   415     jmp SHORT .L3
   416 
   417 .L4 ; save ebp
   418     push ebp
   419 
   420     ; save count
   421     push ecx
   422 
   423     ; unroll 4 times
   424     shr ecx,2
   425 
   426     ; prestep
   427     mov dl,[esi+0]
   428     mov bl,[esi+1]
   429     mov dh,[esi+2]
   430         
   431 .L5     shl edx,16
   432         mov bh,[esi+3]
   433         
   434         shl ebx,16
   435         mov dl,[esi+4]
   436 
   437         mov dh,[esi+6]
   438         mov bl,[esi+5]
   439 
   440         and edx,00011000000110000001100000011000b
   441         mov bh,[esi+7]
   442 
   443         ror edx,16+3
   444         mov eax,ebx                                     ; setup eax for reds
   445 
   446         and ebx,00000111000001110000011100000111b
   447         and eax,11100000111000001110000011100000b       ; reds
   448 
   449         ror ebx,16-2
   450         add esi,BYTE 8
   451 
   452         ror eax,16
   453         add edi,BYTE 4
   454 
   455         add eax,ebx
   456         mov bl,[esi+1]                                  ; greens
   457 
   458         add eax,edx
   459         mov dl,[esi+0]                                  ; blues
   460 
   461         mov [edi-4],eax
   462         mov dh,[esi+2]
   463 
   464         dec ecx
   465         jnz .L5                 
   466     
   467     ; check tail
   468     pop ecx
   469     and ecx,BYTE 11b
   470     jz .L7
   471 
   472 .L6 ; tail
   473     mov al,[esi+0]
   474     mov ah,[esi+1]
   475     mov ebx,eax
   476     mov edx,eax
   477     and eax,BYTE 11000b         ; blue
   478     shr eax,3
   479     and ebx,11100000000b        ; green
   480     shr ebx,6
   481     and edx,1110000000000000b   ; red
   482     shr edx,8
   483     add eax,ebx
   484     add eax,edx
   485     mov [edi],al
   486     add esi,BYTE 2
   487     inc edi
   488     dec ecx
   489     jnz .L6
   490 
   491 .L7 pop ebp
   492     jmp _x86return
   493 
   494 %ifidn __OUTPUT_FORMAT__,elf
   495 section .note.GNU-stack noalloc noexec nowrite progbits
   496 %endif