src/hermes/x86p_32.asm
author Ryan C. Gordon
Wed, 29 Nov 2006 10:30:05 +0000
branchSDL-1.2
changeset 3900 ce3a2bd11305
parent 1873 eb4d9d99849b
child 2134 180fa05e98e2
permissions -rw-r--r--
Wrapped some macro params in parentheses for alloca wrappers.
Thansk, Suzuki Masahiro.
     1 ;
     2 ; x86 format converters for HERMES
     3 ; Some routines Copyright (c) 1998 Christian Nentwich (brn@eleet.mcb.at)
     4 ; This source code is licensed under the GNU LGPL
     5 ; 
     6 ; Please refer to the file COPYING.LIB contained in the distribution for
     7 ; licensing conditions		
     8 ;
     9 ; Most routines are (c) Glenn Fiedler (ptc@gaffer.org), used with permission
    10 ; 
    11 
    12 BITS 32
    13 
    14 %include "common.inc"
    15 
    16 SDL_FUNC _ConvertX86p32_32BGR888
    17 SDL_FUNC _ConvertX86p32_32RGBA888
    18 SDL_FUNC _ConvertX86p32_32BGRA888
    19 SDL_FUNC _ConvertX86p32_24RGB888	
    20 SDL_FUNC _ConvertX86p32_24BGR888
    21 SDL_FUNC _ConvertX86p32_16RGB565
    22 SDL_FUNC _ConvertX86p32_16BGR565
    23 SDL_FUNC _ConvertX86p32_16RGB555
    24 SDL_FUNC _ConvertX86p32_16BGR555
    25 SDL_FUNC _ConvertX86p32_8RGB332
    26 
    27 EXTERN _x86return
    28 	
    29 SECTION .text
    30 
    31 ;; _Convert_*
    32 ;; Paramters:	
    33 ;;   ESI = source 
    34 ;;   EDI = dest
    35 ;;   ECX = amount (NOT 0!!! (the _ConvertX86 routine checks for that though))
    36 ;; Destroys:
    37 ;;   EAX, EBX, EDX
    38 
    39 
    40 _ConvertX86p32_32BGR888:
    41 
    42     ; check short
    43     cmp ecx,BYTE 32
    44     ja .L3
    45 
    46 .L1 ; short loop
    47     mov edx,[esi]
    48     bswap edx
    49     ror edx,8
    50     mov [edi],edx
    51     add esi,BYTE 4
    52     add edi,BYTE 4
    53     dec ecx
    54     jnz .L1
    55 .L2
    56     jmp _x86return
    57 
    58 .L3 ; save ebp
    59     push ebp
    60 
    61     ; unroll four times
    62     mov ebp,ecx
    63     shr ebp,2
    64     
    65     ; save count
    66     push ecx
    67 
    68 .L4     mov eax,[esi]
    69         mov ebx,[esi+4]
    70 
    71         bswap eax
    72 
    73         bswap ebx
    74 
    75         ror eax,8
    76         mov ecx,[esi+8]
    77 
    78         ror ebx,8
    79         mov edx,[esi+12]
    80 
    81         bswap ecx
    82 
    83         bswap edx
    84 
    85         ror ecx,8
    86         mov [edi+0],eax
    87 
    88         ror edx,8
    89         mov [edi+4],ebx
    90 
    91         mov [edi+8],ecx
    92         mov [edi+12],edx
    93 
    94         add esi,BYTE 16
    95         add edi,BYTE 16
    96 
    97         dec ebp
    98         jnz .L4                 
    99 
   100     ; check tail
   101     pop ecx
   102     and ecx,BYTE 11b
   103     jz .L6
   104 
   105 .L5 ; tail loop
   106     mov edx,[esi]
   107     bswap edx
   108     ror edx,8
   109     mov [edi],edx
   110     add esi,BYTE 4
   111     add edi,BYTE 4
   112     dec ecx
   113     jnz .L5
   114 
   115 .L6 pop ebp
   116     jmp _x86return
   117 	
   118 
   119 	
   120 		
   121 _ConvertX86p32_32RGBA888:
   122 	
   123     ; check short
   124     cmp ecx,BYTE 32
   125     ja .L3
   126 
   127 .L1 ; short loop
   128     mov edx,[esi]
   129     rol edx,8
   130     mov [edi],edx
   131     add esi,BYTE 4
   132     add edi,BYTE 4
   133     dec ecx
   134     jnz .L1
   135 .L2
   136     jmp _x86return
   137 
   138 .L3 ; save ebp
   139     push ebp
   140 
   141     ; unroll four times
   142     mov ebp,ecx
   143     shr ebp,2
   144     
   145     ; save count
   146     push ecx
   147 
   148 .L4     mov eax,[esi]
   149         mov ebx,[esi+4]
   150 
   151         rol eax,8
   152         mov ecx,[esi+8]
   153 
   154         rol ebx,8
   155         mov edx,[esi+12]
   156 
   157         rol ecx,8
   158         mov [edi+0],eax
   159 
   160         rol edx,8
   161         mov [edi+4],ebx
   162 
   163         mov [edi+8],ecx
   164         mov [edi+12],edx
   165 
   166         add esi,BYTE 16
   167         add edi,BYTE 16
   168 
   169         dec ebp
   170         jnz .L4                 
   171 
   172     ; check tail
   173     pop ecx
   174     and ecx,BYTE 11b
   175     jz .L6
   176 
   177 .L5 ; tail loop
   178     mov edx,[esi]
   179     rol edx,8
   180     mov [edi],edx
   181     add esi,BYTE 4
   182     add edi,BYTE 4
   183     dec ecx
   184     jnz .L5
   185 
   186 .L6 pop ebp
   187     jmp _x86return
   188 
   189 	
   190 
   191 
   192 _ConvertX86p32_32BGRA888:
   193 
   194     ; check short
   195     cmp ecx,BYTE 32
   196     ja .L3
   197 
   198 .L1 ; short loop
   199     mov edx,[esi]
   200     bswap edx
   201     mov [edi],edx
   202     add esi,BYTE 4
   203     add edi,BYTE 4
   204     dec ecx
   205     jnz .L1
   206 .L2
   207     jmp _x86return
   208 
   209 .L3 ; save ebp
   210     push ebp
   211 
   212     ; unroll four times
   213     mov ebp,ecx
   214     shr ebp,2
   215     
   216     ; save count
   217     push ecx
   218 
   219 .L4     mov eax,[esi]
   220         mov ebx,[esi+4]
   221 
   222         mov ecx,[esi+8]
   223         mov edx,[esi+12]
   224 
   225         bswap eax
   226 
   227         bswap ebx
   228 
   229         bswap ecx
   230 
   231         bswap edx
   232 
   233         mov [edi+0],eax
   234         mov [edi+4],ebx
   235 
   236         mov [edi+8],ecx
   237         mov [edi+12],edx
   238 
   239         add esi,BYTE 16
   240         add edi,BYTE 16
   241 
   242         dec ebp
   243         jnz .L4                 
   244 
   245     ; check tail
   246     pop ecx
   247     and ecx,BYTE 11b
   248     jz .L6
   249 
   250 .L5 ; tail loop
   251     mov edx,[esi]
   252     bswap edx
   253     mov [edi],edx
   254     add esi,BYTE 4
   255     add edi,BYTE 4
   256     dec ecx
   257     jnz .L5
   258 
   259 .L6 pop ebp
   260     jmp _x86return
   261 
   262 
   263 	
   264 	
   265 ;; 32 bit RGB 888 to 24 BIT RGB 888
   266 
   267 _ConvertX86p32_24RGB888:
   268 
   269 	; check short
   270 	cmp ecx,BYTE 32
   271 	ja .L3
   272 
   273 .L1	; short loop
   274 	mov al,[esi]
   275 	mov bl,[esi+1]
   276 	mov dl,[esi+2]
   277 	mov [edi],al
   278 	mov [edi+1],bl
   279 	mov [edi+2],dl
   280 	add esi,BYTE 4
   281 	add edi,BYTE 3
   282 	dec ecx
   283 	jnz .L1
   284 .L2 
   285 	jmp _x86return
   286 
   287 .L3	;	 head
   288 	mov edx,edi
   289 	and edx,BYTE 11b
   290 	jz .L4
   291 	mov al,[esi]
   292 	mov bl,[esi+1]
   293 	mov dl,[esi+2]
   294 	mov [edi],al
   295 	mov [edi+1],bl
   296 	mov [edi+2],dl
   297 	add esi,BYTE 4
   298 	add edi,BYTE 3
   299 	dec ecx
   300 	jmp SHORT .L3
   301 
   302 .L4 ; unroll 4 times
   303 	push ebp
   304 	mov ebp,ecx
   305 	shr ebp,2
   306 
   307     ; save count
   308 	push ecx
   309 
   310 .L5     mov eax,[esi]                   ; first dword            eax = [A][R][G][B]
   311         mov ebx,[esi+4]                 ; second dword           ebx = [a][r][g][b]
   312 
   313         shl eax,8                       ;                        eax = [R][G][B][.]
   314         mov ecx,[esi+12]                ; third dword            ecx = [a][r][g][b]
   315 
   316         shl ebx,8                       ;                        ebx = [r][g][b][.]
   317         mov al,[esi+4]                  ;                        eax = [R][G][B][b]
   318 
   319         ror eax,8                       ;                        eax = [b][R][G][B] (done)
   320         mov bh,[esi+8+1]                ;                        ebx = [r][g][G][.]
   321 
   322         mov [edi],eax
   323         add edi,BYTE 3*4
   324 
   325         shl ecx,8                       ;                        ecx = [r][g][b][.]
   326         mov bl,[esi+8+0]                ;                        ebx = [r][g][G][B]
   327 
   328         rol ebx,16                      ;                        ebx = [G][B][r][g] (done)
   329         mov cl,[esi+8+2]                ;                        ecx = [r][g][b][R] (done)
   330 
   331         mov [edi+4-3*4],ebx
   332         add esi,BYTE 4*4
   333         
   334         mov [edi+8-3*4],ecx
   335         dec ebp
   336 
   337         jnz .L5
   338 
   339     ; check tail
   340 	pop ecx
   341 	and ecx,BYTE 11b
   342 	jz .L7
   343 
   344 .L6 ; tail loop
   345 	mov al,[esi]
   346 	mov bl,[esi+1]
   347 	mov dl,[esi+2]
   348 	mov [edi],al
   349 	mov [edi+1],bl
   350 	mov [edi+2],dl
   351 	add esi,BYTE 4
   352 	add edi,BYTE 3
   353 	dec ecx
   354 	jnz .L6
   355 
   356 .L7	pop ebp
   357 	jmp _x86return
   358 
   359 
   360 
   361 
   362 ;; 32 bit RGB 888 to 24 bit BGR 888
   363 
   364 _ConvertX86p32_24BGR888:
   365 
   366 	; check short
   367 	cmp ecx,BYTE 32
   368 	ja .L3
   369 
   370 	
   371 .L1	; short loop
   372 	mov dl,[esi]
   373 	mov bl,[esi+1]
   374 	mov al,[esi+2]
   375 	mov [edi],al
   376 	mov [edi+1],bl
   377 	mov [edi+2],dl
   378 	add esi,BYTE 4
   379 	add edi,BYTE 3
   380 	dec ecx
   381 	jnz .L1
   382 .L2
   383 	jmp _x86return
   384 
   385 .L3 ; head
   386 	mov edx,edi
   387 	and edx,BYTE 11b
   388 	jz .L4
   389 	mov dl,[esi]
   390 	mov bl,[esi+1]
   391 	mov al,[esi+2]
   392 	mov [edi],al
   393 	mov [edi+1],bl
   394 	mov [edi+2],dl
   395 	add esi,BYTE 4
   396 	add edi,BYTE 3
   397 	dec ecx
   398 	jmp SHORT .L3
   399 
   400 .L4	; unroll 4 times
   401 	push ebp
   402 	mov ebp,ecx
   403 	shr ebp,2
   404 
   405 	; save count
   406 	push ecx
   407 
   408 .L5     
   409 	mov eax,[esi]                   ; first dword            eax = [A][R][G][B]
   410         mov ebx,[esi+4]                 ; second dword           ebx = [a][r][g][b]
   411         
   412         bswap eax                       ;                        eax = [B][G][R][A]
   413 
   414         bswap ebx                       ;                        ebx = [b][g][r][a]
   415 
   416         mov al,[esi+4+2]                ;                        eax = [B][G][R][r] 
   417         mov bh,[esi+4+4+1]              ;                        ebx = [b][g][G][a]
   418 
   419         ror eax,8                       ;                        eax = [r][B][G][R] (done)
   420         mov bl,[esi+4+4+2]              ;                        ebx = [b][g][G][R]
   421 
   422         ror ebx,16                      ;                        ebx = [G][R][b][g] (done)
   423         mov [edi],eax
   424     
   425         mov [edi+4],ebx
   426         mov ecx,[esi+12]                ; third dword            ecx = [a][r][g][b]
   427         
   428         bswap ecx                       ;                        ecx = [b][g][r][a]
   429         
   430         mov cl,[esi+8]                  ;                        ecx = [b][g][r][B] (done)
   431         add esi,BYTE 4*4
   432 
   433         mov [edi+8],ecx
   434         add edi,BYTE 3*4
   435 
   436         dec ebp
   437         jnz .L5
   438 
   439 	; check tail
   440 	pop ecx
   441 	and ecx,BYTE 11b
   442 	jz .L7
   443 
   444 .L6	; tail loop
   445 	mov dl,[esi]
   446 	mov bl,[esi+1]
   447 	mov al,[esi+2]
   448 	mov [edi],al
   449 	mov [edi+1],bl
   450 	mov [edi+2],dl
   451 	add esi,BYTE 4
   452 	add edi,BYTE 3
   453 	dec ecx
   454 	jnz .L6
   455 
   456 .L7 
   457 	pop ebp
   458 	jmp _x86return
   459  
   460 
   461 	
   462 		
   463 ;; 32 bit RGB 888 to 16 BIT RGB 565 
   464 
   465 _ConvertX86p32_16RGB565:
   466 	; check short
   467 	cmp ecx,BYTE 16
   468 	ja .L3
   469 
   470 .L1 ; short loop
   471 	mov bl,[esi+0]    ; blue
   472 	mov al,[esi+1]    ; green
   473 	mov ah,[esi+2]    ; red
   474 	shr ah,3
   475         and al,11111100b
   476 	shl eax,3
   477 	shr bl,3
   478 	add al,bl
   479 	mov [edi+0],al
   480 	mov [edi+1],ah
   481 	add esi,BYTE 4
   482 	add edi,BYTE 2
   483 	dec ecx
   484 	jnz .L1
   485 
   486 .L2:				; End of short loop
   487 	jmp _x86return
   488 
   489 	
   490 .L3	; head
   491 	mov ebx,edi
   492 	and ebx,BYTE 11b
   493 	jz .L4
   494 	
   495 	mov bl,[esi+0]    ; blue
   496 	mov al,[esi+1]    ; green
   497 	mov ah,[esi+2]    ; red
   498 	shr ah,3
   499 	and al,11111100b
   500 	shl eax,3
   501 	shr bl,3
   502 	add al,bl
   503 	mov [edi+0],al
   504 	mov [edi+1],ah
   505 	add esi,BYTE 4
   506 	add edi,BYTE 2
   507 	dec ecx
   508 
   509 .L4:	 
   510     ; save count
   511 	push ecx
   512 
   513     ; unroll twice
   514 	shr ecx,1
   515     
   516     ; point arrays to end
   517 	lea esi,[esi+ecx*8]
   518 	lea edi,[edi+ecx*4]
   519 
   520     ; negative counter 
   521 	neg ecx
   522 	jmp SHORT .L6
   523 
   524 .L5:	    
   525 	mov [edi+ecx*4-4],eax
   526 .L6:	
   527 	mov eax,[esi+ecx*8]
   528 
   529         shr ah,2
   530         mov ebx,[esi+ecx*8+4]
   531 
   532         shr eax,3
   533         mov edx,[esi+ecx*8+4]
   534 
   535         shr bh,2
   536         mov dl,[esi+ecx*8+2]
   537 
   538         shl ebx,13
   539         and eax,000007FFh
   540         
   541         shl edx,8
   542         and ebx,07FF0000h
   543 
   544         and edx,0F800F800h
   545         add eax,ebx
   546 
   547         add eax,edx
   548         inc ecx
   549 
   550         jnz .L5                 
   551 
   552 	mov [edi+ecx*4-4],eax
   553 
   554     ; tail
   555 	pop ecx
   556 	test cl,1
   557 	jz .L7
   558 	
   559 	mov bl,[esi+0]    ; blue
   560 	mov al,[esi+1]    ; green
   561 	mov ah,[esi+2]    ; red
   562 	shr ah,3
   563 	and al,11111100b
   564 	shl eax,3
   565 	shr bl,3
   566 	add al,bl
   567 	mov [edi+0],al
   568 	mov [edi+1],ah
   569 	add esi,BYTE 4
   570 	add edi,BYTE 2
   571 
   572 .L7:	
   573 	jmp _x86return
   574 
   575 
   576 
   577 	
   578 ;; 32 bit RGB 888 to 16 BIT BGR 565 
   579 
   580 _ConvertX86p32_16BGR565:
   581 	
   582 	; check short
   583 	cmp ecx,BYTE 16
   584 	ja .L3
   585 
   586 .L1	; short loop
   587 	mov ah,[esi+0]    ; blue
   588 	mov al,[esi+1]    ; green
   589 	mov bl,[esi+2]    ; red
   590 	shr ah,3
   591 	and al,11111100b
   592 	shl eax,3
   593 	shr bl,3
   594 	add al,bl
   595 	mov [edi+0],al
   596 	mov [edi+1],ah
   597 	add esi,BYTE 4
   598 	add edi,BYTE 2
   599 	dec ecx
   600 	jnz .L1
   601 .L2
   602 	jmp _x86return
   603 
   604 .L3	; head
   605 	mov ebx,edi
   606 	and ebx,BYTE 11b
   607 	jz .L4   
   608 	mov ah,[esi+0]    ; blue
   609 	mov al,[esi+1]    ; green
   610 	mov bl,[esi+2]    ; red
   611 	shr ah,3
   612 	and al,11111100b
   613 	shl eax,3
   614 	shr bl,3
   615 	add al,bl
   616 	mov [edi+0],al
   617 	mov [edi+1],ah
   618 	add esi,BYTE 4
   619 	add edi,BYTE 2
   620 	dec ecx
   621 
   622 .L4	; save count
   623 	push ecx
   624 
   625 	; unroll twice
   626 	shr ecx,1
   627     
   628 	; point arrays to end
   629 	lea esi,[esi+ecx*8]
   630 	lea edi,[edi+ecx*4]
   631 
   632 	; negative count
   633 	neg ecx
   634 	jmp SHORT .L6
   635 
   636 .L5     
   637 	mov [edi+ecx*4-4],eax            
   638 .L6     
   639 	mov edx,[esi+ecx*8+4]
   640 
   641         mov bh,[esi+ecx*8+4]                       
   642         mov ah,[esi+ecx*8]                       
   643 
   644         shr bh,3
   645         mov al,[esi+ecx*8+1]             
   646 
   647         shr ah,3
   648         mov bl,[esi+ecx*8+5]           
   649 
   650         shl eax,3
   651         mov dl,[esi+ecx*8+2]
   652 
   653         shl ebx,19
   654         and eax,0000FFE0h              
   655                 
   656         shr edx,3
   657         and ebx,0FFE00000h             
   658         
   659         and edx,001F001Fh               
   660         add eax,ebx
   661 
   662         add eax,edx
   663         inc ecx
   664 
   665         jnz .L5                 
   666 
   667 	mov [edi+ecx*4-4],eax            
   668 
   669 	; tail
   670 	pop ecx
   671 	and ecx,BYTE 1
   672 	jz .L7
   673 	mov ah,[esi+0]    ; blue
   674 	mov al,[esi+1]    ; green
   675 	mov bl,[esi+2]    ; red
   676 	shr ah,3
   677 	and al,11111100b
   678 	shl eax,3
   679 	shr bl,3
   680 	add al,bl
   681 	mov [edi+0],al
   682 	mov [edi+1],ah
   683 	add esi,BYTE 4
   684 	add edi,BYTE 2
   685 
   686 .L7 
   687 	jmp _x86return
   688 
   689 
   690 	
   691 	
   692 ;; 32 BIT RGB TO 16 BIT RGB 555
   693 
   694 _ConvertX86p32_16RGB555:
   695 
   696 	; check short
   697 	cmp ecx,BYTE 16
   698 	ja .L3
   699 
   700 .L1	; short loop
   701 	mov bl,[esi+0]    ; blue
   702 	mov al,[esi+1]    ; green
   703 	mov ah,[esi+2]    ; red
   704 	shr ah,3
   705 	and al,11111000b
   706 	shl eax,2
   707 	shr bl,3
   708 	add al,bl
   709 	mov [edi+0],al
   710 	mov [edi+1],ah
   711 	add esi,BYTE 4
   712 	add edi,BYTE 2
   713 	dec ecx
   714 	jnz .L1
   715 .L2
   716 	jmp _x86return
   717 
   718 .L3	; head
   719 	mov ebx,edi
   720         and ebx,BYTE 11b
   721 	jz .L4   
   722 	mov bl,[esi+0]    ; blue
   723 	mov al,[esi+1]    ; green
   724 	mov ah,[esi+2]    ; red
   725 	shr ah,3
   726 	and al,11111000b
   727 	shl eax,2
   728 	shr bl,3
   729 	add al,bl
   730 	mov [edi+0],al
   731 	mov [edi+1],ah
   732 	add esi,BYTE 4
   733 	add edi,BYTE 2
   734 	dec ecx
   735 
   736 .L4	; save count
   737 	push ecx
   738 
   739 	; unroll twice
   740 	shr ecx,1
   741     
   742 	; point arrays to end
   743 	lea esi,[esi+ecx*8]
   744 	lea edi,[edi+ecx*4]
   745 
   746 	; negative counter 
   747 	neg ecx
   748 	jmp SHORT .L6
   749 
   750 .L5     
   751 	mov [edi+ecx*4-4],eax
   752 .L6     
   753 	mov eax,[esi+ecx*8]
   754 
   755         shr ah,3
   756         mov ebx,[esi+ecx*8+4]
   757 
   758         shr eax,3
   759         mov edx,[esi+ecx*8+4]
   760 
   761         shr bh,3
   762         mov dl,[esi+ecx*8+2]
   763 
   764         shl ebx,13
   765         and eax,000007FFh
   766         
   767         shl edx,7
   768         and ebx,07FF0000h
   769 
   770         and edx,07C007C00h
   771         add eax,ebx
   772 
   773         add eax,edx
   774         inc ecx
   775 
   776         jnz .L5                 
   777 
   778 	mov [edi+ecx*4-4],eax
   779 
   780 	; tail
   781 	pop ecx
   782 	and ecx,BYTE 1
   783 	jz .L7
   784 	mov bl,[esi+0]    ; blue
   785 	mov al,[esi+1]    ; green
   786 	mov ah,[esi+2]    ; red
   787 	shr ah,3
   788 	and al,11111000b
   789 	shl eax,2
   790 	shr bl,3
   791 	add al,bl
   792 	mov [edi+0],al
   793 	mov [edi+1],ah
   794 	add esi,BYTE 4
   795 	add edi,BYTE 2
   796 
   797 .L7
   798 	jmp _x86return
   799 
   800 
   801 
   802 
   803 ;; 32 BIT RGB TO 16 BIT BGR 555
   804 	
   805 _ConvertX86p32_16BGR555:
   806 	
   807 	; check short
   808 	cmp ecx,BYTE 16
   809 	ja .L3
   810 
   811 
   812 .L1	; short loop
   813 	mov ah,[esi+0]    ; blue
   814 	mov al,[esi+1]    ; green
   815 	mov bl,[esi+2]    ; red
   816 	shr ah,3
   817 	and al,11111000b
   818 	shl eax,2
   819 	shr bl,3
   820 	add al,bl
   821 	mov [edi+0],al
   822 	mov [edi+1],ah
   823 	add esi,BYTE 4
   824 	add edi,BYTE 2
   825 	dec ecx
   826 	jnz .L1
   827 .L2 
   828 	jmp _x86return
   829 
   830 .L3	; head
   831 	mov ebx,edi
   832         and ebx,BYTE 11b
   833 	jz .L4   
   834 	mov ah,[esi+0]    ; blue
   835 	mov al,[esi+1]    ; green
   836 	mov bl,[esi+2]    ; red
   837 	shr ah,3
   838 	and al,11111000b
   839 	shl eax,2
   840 	shr bl,3
   841 	add al,bl
   842 	mov [edi+0],al
   843 	mov [edi+1],ah
   844 	add esi,BYTE 4
   845 	add edi,BYTE 2
   846 	dec ecx
   847 
   848 .L4	; save count
   849 	push ecx
   850 
   851 	; unroll twice
   852 	shr ecx,1
   853     
   854 	; point arrays to end
   855 	lea esi,[esi+ecx*8]
   856 	lea edi,[edi+ecx*4]
   857 
   858 	; negative counter 
   859 	neg ecx
   860 	jmp SHORT .L6
   861 
   862 .L5     
   863 	mov [edi+ecx*4-4],eax            
   864 .L6     
   865 	mov edx,[esi+ecx*8+4]
   866 
   867         mov bh,[esi+ecx*8+4]                       
   868         mov ah,[esi+ecx*8]                       
   869 
   870         shr bh,3
   871         mov al,[esi+ecx*8+1]             
   872 
   873         shr ah,3
   874         mov bl,[esi+ecx*8+5]           
   875 
   876         shl eax,2
   877         mov dl,[esi+ecx*8+2]
   878 
   879         shl ebx,18
   880         and eax,00007FE0h              
   881                 
   882         shr edx,3
   883         and ebx,07FE00000h             
   884         
   885         and edx,001F001Fh               
   886         add eax,ebx
   887 
   888         add eax,edx
   889         inc ecx
   890 
   891         jnz .L5                 
   892 
   893 	mov [edi+ecx*4-4],eax            
   894 
   895 	; tail
   896 	pop ecx
   897 	and ecx,BYTE 1
   898 	jz .L7
   899 	mov ah,[esi+0]    ; blue
   900 	mov al,[esi+1]    ; green
   901 	mov bl,[esi+2]    ; red
   902 	shr ah,3
   903 	and al,11111000b
   904 	shl eax,2
   905 	shr bl,3
   906 	add al,bl
   907 	mov [edi+0],al
   908 	mov [edi+1],ah
   909 	add esi,BYTE 4
   910 	add edi,BYTE 2
   911 
   912 .L7
   913 	jmp _x86return
   914 
   915 
   916 
   917 
   918 	
   919 ;; FROM 32 BIT RGB to 8 BIT RGB (rrrgggbbb)
   920 ;; This routine writes FOUR pixels at once (dword) and then, if they exist
   921 ;; the trailing three pixels
   922 _ConvertX86p32_8RGB332:
   923 
   924 	
   925 .L_ALIGNED
   926 	push ecx
   927 
   928 	shr ecx,2		; We will draw 4 pixels at once
   929 	jnz .L1
   930 	
   931 	jmp .L2			; short jump out of range :(
   932 	
   933 .L1:
   934 	mov eax,[esi]		; first pair of pixels
   935 	mov edx,[esi+4]
   936 
   937 	shr dl,6
   938 	mov ebx,eax
   939 
   940 	shr al,6
   941 	and ah,0e0h
   942 
   943 	shr ebx,16
   944 	and dh,0e0h
   945 	
   946 	shr ah,3
   947 	and bl,0e0h
   948 
   949 	shr dh,3
   950 	
   951 	or al,bl
   952 	
   953 	mov ebx,edx	
   954 	or al,ah
   955 	
   956 	shr ebx,16
   957 	or dl,dh
   958 
   959 	and bl,0e0h
   960 	
   961 	or dl,bl
   962 
   963 	mov ah,dl
   964 
   965 	
   966 		
   967 	mov ebx,[esi+8]		; second pair of pixels
   968 
   969 	mov edx,ebx
   970 	and bh,0e0h
   971 
   972 	shr bl,6
   973 	and edx,0e00000h
   974 
   975 	shr edx,16
   976 
   977 	shr bh,3
   978 
   979 	ror eax,16
   980 	or bl,dl
   981 
   982 	mov edx,[esi+12]
   983 	or bl,bh
   984 	
   985 	mov al,bl
   986 
   987 	mov ebx,edx
   988 	and dh,0e0h
   989 
   990 	shr dl,6
   991 	and ebx,0e00000h
   992 	
   993 	shr dh,3
   994 	mov ah,dl
   995 
   996 	shr ebx,16
   997 	or ah,dh
   998 
   999 	or ah,bl
  1000 
  1001 	rol eax,16
  1002 	add esi,BYTE 16
  1003 			
  1004 	mov [edi],eax	
  1005 	add edi,BYTE 4
  1006 	
  1007 	dec ecx
  1008 	jz .L2			; L1 out of range for short jump :(
  1009 	
  1010 	jmp .L1
  1011 .L2:
  1012 	
  1013 	pop ecx
  1014 	and ecx,BYTE 3		; mask out number of pixels to draw
  1015 	
  1016 	jz .L4			; Nothing to do anymore
  1017 
  1018 .L3:
  1019 	mov eax,[esi]		; single pixel conversion for trailing pixels
  1020 
  1021         mov ebx,eax
  1022 
  1023         shr al,6
  1024         and ah,0e0h
  1025 
  1026         shr ebx,16
  1027 
  1028         shr ah,3
  1029         and bl,0e0h
  1030 
  1031         or al,ah
  1032         or al,bl
  1033 
  1034         mov [edi],al
  1035 
  1036         inc edi
  1037         add esi,BYTE 4
  1038 
  1039 	dec ecx
  1040 	jnz .L3
  1041 	
  1042 .L4:	
  1043 	jmp _x86return
  1044 
  1045 %ifidn __OUTPUT_FORMAT__,elf
  1046 section .note.GNU-stack noalloc noexec nowrite progbits
  1047 %endif