src/hermes/x86p_16.asm
author Ryan C. Gordon
Wed, 29 Nov 2006 10:30:05 +0000
branchSDL-1.2
changeset 3900 ce3a2bd11305
parent 1873 eb4d9d99849b
child 2134 180fa05e98e2
permissions -rw-r--r--
Wrapped some macro params in parentheses for alloca wrappers.
Thansk, Suzuki Masahiro.
     1 ;
     2 ; x86 format converters for HERMES
     3 ; Copyright (c) 1998 Glenn Fielder (gaffer@gaffer.org)
     4 ; This source code is licensed under the GNU LGPL
     5 ; 
     6 ; Please refer to the file COPYING.LIB contained in the distribution for
     7 ; licensing conditions		
     8 ; 
     9 ; Routines adjusted for Hermes by Christian Nentwich (brn@eleet.mcb.at)
    10 ; Used with permission.
    11 ; 
    12 
    13 BITS 32
    14 
    15 %include "common.inc"
    16 
    17 SDL_FUNC _ConvertX86p16_16BGR565
    18 SDL_FUNC _ConvertX86p16_16RGB555
    19 SDL_FUNC _ConvertX86p16_16BGR555
    20 SDL_FUNC _ConvertX86p16_8RGB332
    21 
    22 EXTERN _ConvertX86
    23 EXTERN _x86return
    24 
    25 SECTION .text
    26 
    27 _ConvertX86p16_16BGR565:
    28 
    29     ; check short
    30     cmp ecx,BYTE 16
    31     ja .L3
    32 
    33 
    34 .L1 ; short loop
    35     mov al,[esi]
    36     mov ah,[esi+1]
    37     mov ebx,eax
    38     mov edx,eax
    39     shr eax,11
    40     and eax,BYTE 11111b
    41     and ebx,11111100000b
    42     shl edx,11
    43     add eax,ebx
    44     add eax,edx
    45     mov [edi],al
    46     mov [edi+1],ah
    47     add esi,BYTE 2
    48     add edi,BYTE 2
    49     dec ecx
    50     jnz .L1
    51 .L2
    52     jmp _x86return
    53 
    54 .L3 ; head
    55     mov eax,edi
    56     and eax,BYTE 11b
    57     jz .L4
    58     mov al,[esi]
    59     mov ah,[esi+1]
    60     mov ebx,eax
    61     mov edx,eax
    62     shr eax,11
    63     and eax,BYTE 11111b
    64     and ebx,11111100000b
    65     shl edx,11
    66     add eax,ebx
    67     add eax,edx
    68     mov [edi],al
    69     mov [edi+1],ah
    70     add esi,BYTE 2
    71     add edi,BYTE 2
    72     dec ecx
    73 
    74 .L4 ; save count
    75     push ecx
    76 
    77     ; unroll twice
    78     shr ecx,1
    79     
    80     ; point arrays to end
    81     lea esi,[esi+ecx*4]
    82     lea edi,[edi+ecx*4]
    83 
    84     ; negative counter 
    85     neg ecx
    86     jmp SHORT .L6
    87                               
    88 .L5     mov [edi+ecx*4-4],eax
    89 .L6     mov eax,[esi+ecx*4]
    90 
    91         mov ebx,[esi+ecx*4]
    92         and eax,07E007E0h         
    93 
    94         mov edx,[esi+ecx*4]
    95         and ebx,0F800F800h
    96 
    97         shr ebx,11
    98         and edx,001F001Fh
    99 
   100         shl edx,11
   101         add eax,ebx
   102 
   103         add eax,edx                 
   104         inc ecx
   105 
   106         jnz .L5                 
   107          
   108     mov [edi+ecx*4-4],eax
   109 
   110     ; tail
   111     pop ecx
   112     and ecx,BYTE 1
   113     jz .L7
   114     mov al,[esi]
   115     mov ah,[esi+1]
   116     mov ebx,eax
   117     mov edx,eax
   118     shr eax,11
   119     and eax,BYTE 11111b
   120     and ebx,11111100000b
   121     shl edx,11
   122     add eax,ebx
   123     add eax,edx
   124     mov [edi],al
   125     mov [edi+1],ah
   126     add esi,BYTE 2
   127     add edi,BYTE 2
   128 
   129 .L7
   130     jmp _x86return
   131 
   132 
   133 
   134 
   135 
   136 
   137 _ConvertX86p16_16RGB555:
   138 
   139     ; check short
   140     cmp ecx,BYTE 32
   141     ja .L3
   142 
   143 
   144 .L1 ; short loop
   145     mov al,[esi]
   146     mov ah,[esi+1]
   147     mov ebx,eax
   148     shr ebx,1
   149     and ebx,     0111111111100000b
   150     and eax,BYTE 0000000000011111b
   151     add eax,ebx
   152     mov [edi],al
   153     mov [edi+1],ah
   154     add esi,BYTE 2
   155     add edi,BYTE 2
   156     dec ecx
   157     jnz .L1
   158 .L2
   159     jmp _x86return
   160 
   161 .L3 ; head
   162     mov eax,edi
   163     and eax,BYTE 11b
   164     jz .L4
   165     mov al,[esi]
   166     mov ah,[esi+1]
   167     mov ebx,eax
   168     shr ebx,1
   169     and ebx,     0111111111100000b
   170     and eax,BYTE 0000000000011111b
   171     add eax,ebx
   172     mov [edi],al
   173     mov [edi+1],ah
   174     add esi,BYTE 2
   175     add edi,BYTE 2
   176     dec ecx
   177 
   178 .L4 ; save ebp
   179     push ebp
   180 
   181     ; save count
   182     push ecx
   183 
   184     ; unroll four times
   185     shr ecx,2
   186     
   187     ; point arrays to end
   188     lea esi,[esi+ecx*8]
   189     lea edi,[edi+ecx*8]
   190 
   191     ; negative counter 
   192     xor ebp,ebp
   193     sub ebp,ecx
   194 
   195 .L5     mov eax,[esi+ebp*8]        ; agi?
   196         mov ecx,[esi+ebp*8+4]
   197        
   198         mov ebx,eax
   199         mov edx,ecx
   200 
   201         and eax,0FFC0FFC0h
   202         and ecx,0FFC0FFC0h
   203 
   204         shr eax,1
   205         and ebx,001F001Fh
   206 
   207         shr ecx,1
   208         and edx,001F001Fh
   209 
   210         add eax,ebx
   211         add ecx,edx
   212 
   213         mov [edi+ebp*8],eax
   214         mov [edi+ebp*8+4],ecx
   215 
   216         inc ebp
   217         jnz .L5                 
   218 
   219     ; tail
   220     pop ecx
   221 .L6 and ecx,BYTE 11b
   222     jz .L7
   223     mov al,[esi]
   224     mov ah,[esi+1]
   225     mov ebx,eax
   226     shr ebx,1
   227     and ebx,     0111111111100000b
   228     and eax,BYTE 0000000000011111b
   229     add eax,ebx
   230     mov [edi],al
   231     mov [edi+1],ah
   232     add esi,BYTE 2
   233     add edi,BYTE 2
   234     dec ecx
   235     jmp SHORT .L6
   236 
   237 .L7 pop ebp
   238     jmp _x86return
   239 
   240 
   241 
   242 
   243 
   244 
   245 _ConvertX86p16_16BGR555:
   246 
   247     ; check short
   248     cmp ecx,BYTE 16
   249     ja .L3
   250 
   251 	
   252 .L1 ; short loop
   253     mov al,[esi]
   254     mov ah,[esi+1]
   255     mov ebx,eax
   256     mov edx,eax
   257     shr eax,11
   258     and eax,BYTE 11111b
   259     shr ebx,1
   260     and ebx,1111100000b
   261     shl edx,10
   262     and edx,0111110000000000b
   263     add eax,ebx
   264     add eax,edx
   265     mov [edi],al
   266     mov [edi+1],ah
   267     add esi,BYTE 2
   268     add edi,BYTE 2
   269     dec ecx
   270     jnz .L1
   271 .L2
   272     jmp _x86return
   273 
   274 .L3 ; head
   275     mov eax,edi
   276     and eax,BYTE 11b
   277     jz .L4
   278     mov al,[esi]
   279     mov ah,[esi+1]
   280     mov ebx,eax
   281     mov edx,eax
   282     shr eax,11
   283     and eax,BYTE 11111b
   284     shr ebx,1
   285     and ebx,1111100000b
   286     shl edx,10
   287     and edx,0111110000000000b
   288     add eax,ebx
   289     add eax,edx
   290     mov [edi],al
   291     mov [edi+1],ah
   292     add esi,BYTE 2
   293     add edi,BYTE 2
   294     dec ecx
   295 
   296 .L4 ; save count
   297     push ecx
   298 
   299     ; unroll twice
   300     shr ecx,1
   301     
   302     ; point arrays to end
   303     lea esi,[esi+ecx*4]
   304     lea edi,[edi+ecx*4]
   305 
   306     ; negative counter 
   307     neg ecx
   308     jmp SHORT .L6
   309                               
   310 .L5     mov [edi+ecx*4-4],eax
   311 .L6     mov eax,[esi+ecx*4]
   312 
   313         shr eax,1
   314         mov ebx,[esi+ecx*4]
   315         
   316         and eax,03E003E0h         
   317         mov edx,[esi+ecx*4]
   318 
   319         and ebx,0F800F800h
   320 
   321         shr ebx,11
   322         and edx,001F001Fh
   323 
   324         shl edx,10
   325         add eax,ebx
   326 
   327         add eax,edx                 
   328         inc ecx
   329 
   330         jnz .L5                 
   331          
   332     mov [edi+ecx*4-4],eax
   333 
   334     ; tail
   335     pop ecx
   336     and ecx,BYTE 1
   337     jz .L7
   338     mov al,[esi]
   339     mov ah,[esi+1]
   340     mov ebx,eax
   341     mov edx,eax
   342     shr eax,11
   343     and eax,BYTE 11111b
   344     shr ebx,1
   345     and ebx,1111100000b
   346     shl edx,10
   347     and edx,0111110000000000b
   348     add eax,ebx
   349     add eax,edx
   350     mov [edi],al
   351     mov [edi+1],ah
   352     add esi,BYTE 2
   353     add edi,BYTE 2
   354 
   355 .L7
   356     jmp _x86return
   357 
   358 
   359 
   360 
   361 
   362 
   363 _ConvertX86p16_8RGB332:
   364 
   365     ; check short
   366     cmp ecx,BYTE 16
   367     ja .L3
   368 
   369 
   370 .L1 ; short loop
   371     mov al,[esi+0]
   372     mov ah,[esi+1]
   373     mov ebx,eax
   374     mov edx,eax
   375     and eax,BYTE 11000b         ; blue
   376     shr eax,3
   377     and ebx,11100000000b        ; green
   378     shr ebx,6
   379     and edx,1110000000000000b   ; red
   380     shr edx,8
   381     add eax,ebx
   382     add eax,edx
   383     mov [edi],al
   384     add esi,BYTE 2
   385     inc edi
   386     dec ecx
   387     jnz .L1
   388 .L2
   389     jmp _x86return
   390 
   391 .L3 mov eax,edi
   392     and eax,BYTE 11b
   393     jz .L4
   394     mov al,[esi+0]
   395     mov ah,[esi+1]
   396     mov ebx,eax
   397     mov edx,eax
   398     and eax,BYTE 11000b         ; blue
   399     shr eax,3
   400     and ebx,11100000000b        ; green
   401     shr ebx,6
   402     and edx,1110000000000000b   ; red
   403     shr edx,8
   404     add eax,ebx
   405     add eax,edx
   406     mov [edi],al
   407     add esi,BYTE 2
   408     inc edi
   409     dec ecx
   410     jmp SHORT .L3
   411 
   412 .L4 ; save ebp
   413     push ebp
   414 
   415     ; save count
   416     push ecx
   417 
   418     ; unroll 4 times
   419     shr ecx,2
   420 
   421     ; prestep
   422     mov dl,[esi+0]
   423     mov bl,[esi+1]
   424     mov dh,[esi+2]
   425         
   426 .L5     shl edx,16
   427         mov bh,[esi+3]
   428         
   429         shl ebx,16
   430         mov dl,[esi+4]
   431 
   432         mov dh,[esi+6]
   433         mov bl,[esi+5]
   434 
   435         and edx,00011000000110000001100000011000b
   436         mov bh,[esi+7]
   437 
   438         ror edx,16+3
   439         mov eax,ebx                                     ; setup eax for reds
   440 
   441         and ebx,00000111000001110000011100000111b
   442         and eax,11100000111000001110000011100000b       ; reds
   443 
   444         ror ebx,16-2
   445         add esi,BYTE 8
   446 
   447         ror eax,16
   448         add edi,BYTE 4
   449 
   450         add eax,ebx
   451         mov bl,[esi+1]                                  ; greens
   452 
   453         add eax,edx
   454         mov dl,[esi+0]                                  ; blues
   455 
   456         mov [edi-4],eax
   457         mov dh,[esi+2]
   458 
   459         dec ecx
   460         jnz .L5                 
   461     
   462     ; check tail
   463     pop ecx
   464     and ecx,BYTE 11b
   465     jz .L7
   466 
   467 .L6 ; tail
   468     mov al,[esi+0]
   469     mov ah,[esi+1]
   470     mov ebx,eax
   471     mov edx,eax
   472     and eax,BYTE 11000b         ; blue
   473     shr eax,3
   474     and ebx,11100000000b        ; green
   475     shr ebx,6
   476     and edx,1110000000000000b   ; red
   477     shr edx,8
   478     add eax,ebx
   479     add eax,edx
   480     mov [edi],al
   481     add esi,BYTE 2
   482     inc edi
   483     dec ecx
   484     jnz .L6
   485 
   486 .L7 pop ebp
   487     jmp _x86return
   488 
   489 %ifidn __OUTPUT_FORMAT__,elf
   490 section .note.GNU-stack noalloc noexec nowrite progbits
   491 %endif