src/hermes/x86p_32.asm
author Sam Lantinga <slouken@libsdl.org>
Sun, 21 Sep 2003 18:32:04 +0000
changeset 720 f90d80d68071
parent 0 74212992fb08
child 1166 da33b7e6d181
permissions -rw-r--r--
N Sep 17 8791 Sam Lantinga Re: tks source released
Date: Sun, 07 Sep 2003 02:51:58 +0200
From: Stephane Marchesin
Subject: [SDL] Two little patches

Compiling SDL with a recent gcc (gcc 3.3.1, 3.3 doesn't have this
behaviour) gives some nasty warnings :

SDL_blit_A.c: In function `BlitRGBtoRGBSurfaceAlpha128MMX':
SDL_blit_A.c:223: warning: integer constant is too large for "long" type
SDL_blit_A.c:225: warning: integer constant is too large for "long" type
SDL_blit_A.c:227: warning: integer constant is too large for "long" type
[...]

The first attached patch (longlongfix.patch) tells gcc to really treat
those constants as unsigned long long and not long.

The second patch (nasinclude.patch) fixes an include problem I had while
compiling nas audio : when the <audio/audiolib.h> file lies in
/usr/X11R6/include, a -I/usr/X11R6/include option is needed or the file
isn't found.
slouken@0
     1
;
slouken@0
     2
; x86 format converters for HERMES
slouken@0
     3
; Some routines Copyright (c) 1998 Christian Nentwich (brn@eleet.mcb.at)
slouken@0
     4
; This source code is licensed under the GNU LGPL
slouken@0
     5
; 
slouken@0
     6
; Please refer to the file COPYING.LIB contained in the distribution for
slouken@0
     7
; licensing conditions		
slouken@0
     8
;
slouken@0
     9
; Most routines are (c) Glenn Fiedler (ptc@gaffer.org), used with permission
slouken@0
    10
; 
slouken@0
    11
slouken@0
    12
	
slouken@0
    13
BITS 32
slouken@0
    14
slouken@0
    15
GLOBAL _ConvertX86p32_32BGR888
slouken@0
    16
GLOBAL _ConvertX86p32_32RGBA888
slouken@0
    17
GLOBAL _ConvertX86p32_32BGRA888
slouken@0
    18
GLOBAL _ConvertX86p32_24RGB888	
slouken@0
    19
GLOBAL _ConvertX86p32_24BGR888
slouken@0
    20
GLOBAL _ConvertX86p32_16RGB565
slouken@0
    21
GLOBAL _ConvertX86p32_16BGR565
slouken@0
    22
GLOBAL _ConvertX86p32_16RGB555
slouken@0
    23
GLOBAL _ConvertX86p32_16BGR555
slouken@0
    24
GLOBAL _ConvertX86p32_8RGB332
slouken@0
    25
slouken@0
    26
EXTERN _x86return
slouken@0
    27
		
slouken@0
    28
SECTION .text
slouken@0
    29
slouken@0
    30
slouken@0
    31
;; _Convert_*
slouken@0
    32
;; Paramters:	
slouken@0
    33
;;   ESI = source 
slouken@0
    34
;;   EDI = dest
slouken@0
    35
;;   ECX = amount (NOT 0!!! (the _ConvertX86 routine checks for that though))
slouken@0
    36
;; Destroys:
slouken@0
    37
;;   EAX, EBX, EDX
slouken@0
    38
slouken@0
    39
slouken@0
    40
_ConvertX86p32_32BGR888:
slouken@0
    41
slouken@0
    42
    ; check short
slouken@0
    43
    cmp ecx,BYTE 32
slouken@0
    44
    ja .L3
slouken@0
    45
slouken@0
    46
.L1 ; short loop
slouken@0
    47
    mov edx,[esi]
slouken@0
    48
    bswap edx
slouken@0
    49
    ror edx,8
slouken@0
    50
    mov [edi],edx
slouken@0
    51
    add esi,BYTE 4
slouken@0
    52
    add edi,BYTE 4
slouken@0
    53
    dec ecx
slouken@0
    54
    jnz .L1
slouken@0
    55
.L2
slouken@0
    56
    jmp _x86return
slouken@0
    57
slouken@0
    58
.L3 ; save ebp
slouken@0
    59
    push ebp
slouken@0
    60
slouken@0
    61
    ; unroll four times
slouken@0
    62
    mov ebp,ecx
slouken@0
    63
    shr ebp,2
slouken@0
    64
    
slouken@0
    65
    ; save count
slouken@0
    66
    push ecx
slouken@0
    67
slouken@0
    68
.L4     mov eax,[esi]
slouken@0
    69
        mov ebx,[esi+4]
slouken@0
    70
slouken@0
    71
        bswap eax
slouken@0
    72
slouken@0
    73
        bswap ebx
slouken@0
    74
slouken@0
    75
        ror eax,8
slouken@0
    76
        mov ecx,[esi+8]
slouken@0
    77
slouken@0
    78
        ror ebx,8
slouken@0
    79
        mov edx,[esi+12]
slouken@0
    80
slouken@0
    81
        bswap ecx
slouken@0
    82
slouken@0
    83
        bswap edx
slouken@0
    84
slouken@0
    85
        ror ecx,8
slouken@0
    86
        mov [edi+0],eax
slouken@0
    87
slouken@0
    88
        ror edx,8
slouken@0
    89
        mov [edi+4],ebx
slouken@0
    90
slouken@0
    91
        mov [edi+8],ecx
slouken@0
    92
        mov [edi+12],edx
slouken@0
    93
slouken@0
    94
        add esi,BYTE 16
slouken@0
    95
        add edi,BYTE 16
slouken@0
    96
slouken@0
    97
        dec ebp
slouken@0
    98
        jnz .L4                 
slouken@0
    99
slouken@0
   100
    ; check tail
slouken@0
   101
    pop ecx
slouken@0
   102
    and ecx,BYTE 11b
slouken@0
   103
    jz .L6
slouken@0
   104
slouken@0
   105
.L5 ; tail loop
slouken@0
   106
    mov edx,[esi]
slouken@0
   107
    bswap edx
slouken@0
   108
    ror edx,8
slouken@0
   109
    mov [edi],edx
slouken@0
   110
    add esi,BYTE 4
slouken@0
   111
    add edi,BYTE 4
slouken@0
   112
    dec ecx
slouken@0
   113
    jnz .L5
slouken@0
   114
slouken@0
   115
.L6 pop ebp
slouken@0
   116
    jmp _x86return
slouken@0
   117
	
slouken@0
   118
slouken@0
   119
	
slouken@0
   120
		
slouken@0
   121
_ConvertX86p32_32RGBA888:
slouken@0
   122
	
slouken@0
   123
    ; check short
slouken@0
   124
    cmp ecx,BYTE 32
slouken@0
   125
    ja .L3
slouken@0
   126
slouken@0
   127
.L1 ; short loop
slouken@0
   128
    mov edx,[esi]
slouken@0
   129
    rol edx,8
slouken@0
   130
    mov [edi],edx
slouken@0
   131
    add esi,BYTE 4
slouken@0
   132
    add edi,BYTE 4
slouken@0
   133
    dec ecx
slouken@0
   134
    jnz .L1
slouken@0
   135
.L2
slouken@0
   136
    jmp _x86return
slouken@0
   137
slouken@0
   138
.L3 ; save ebp
slouken@0
   139
    push ebp
slouken@0
   140
slouken@0
   141
    ; unroll four times
slouken@0
   142
    mov ebp,ecx
slouken@0
   143
    shr ebp,2
slouken@0
   144
    
slouken@0
   145
    ; save count
slouken@0
   146
    push ecx
slouken@0
   147
slouken@0
   148
.L4     mov eax,[esi]
slouken@0
   149
        mov ebx,[esi+4]
slouken@0
   150
slouken@0
   151
        rol eax,8
slouken@0
   152
        mov ecx,[esi+8]
slouken@0
   153
slouken@0
   154
        rol ebx,8
slouken@0
   155
        mov edx,[esi+12]
slouken@0
   156
slouken@0
   157
        rol ecx,8
slouken@0
   158
        mov [edi+0],eax
slouken@0
   159
slouken@0
   160
        rol edx,8
slouken@0
   161
        mov [edi+4],ebx
slouken@0
   162
slouken@0
   163
        mov [edi+8],ecx
slouken@0
   164
        mov [edi+12],edx
slouken@0
   165
slouken@0
   166
        add esi,BYTE 16
slouken@0
   167
        add edi,BYTE 16
slouken@0
   168
slouken@0
   169
        dec ebp
slouken@0
   170
        jnz .L4                 
slouken@0
   171
slouken@0
   172
    ; check tail
slouken@0
   173
    pop ecx
slouken@0
   174
    and ecx,BYTE 11b
slouken@0
   175
    jz .L6
slouken@0
   176
slouken@0
   177
.L5 ; tail loop
slouken@0
   178
    mov edx,[esi]
slouken@0
   179
    rol edx,8
slouken@0
   180
    mov [edi],edx
slouken@0
   181
    add esi,BYTE 4
slouken@0
   182
    add edi,BYTE 4
slouken@0
   183
    dec ecx
slouken@0
   184
    jnz .L5
slouken@0
   185
slouken@0
   186
.L6 pop ebp
slouken@0
   187
    jmp _x86return
slouken@0
   188
slouken@0
   189
	
slouken@0
   190
slouken@0
   191
slouken@0
   192
_ConvertX86p32_32BGRA888:
slouken@0
   193
slouken@0
   194
    ; check short
slouken@0
   195
    cmp ecx,BYTE 32
slouken@0
   196
    ja .L3
slouken@0
   197
slouken@0
   198
.L1 ; short loop
slouken@0
   199
    mov edx,[esi]
slouken@0
   200
    bswap edx
slouken@0
   201
    mov [edi],edx
slouken@0
   202
    add esi,BYTE 4
slouken@0
   203
    add edi,BYTE 4
slouken@0
   204
    dec ecx
slouken@0
   205
    jnz .L1
slouken@0
   206
.L2
slouken@0
   207
    jmp _x86return
slouken@0
   208
slouken@0
   209
.L3 ; save ebp
slouken@0
   210
    push ebp
slouken@0
   211
slouken@0
   212
    ; unroll four times
slouken@0
   213
    mov ebp,ecx
slouken@0
   214
    shr ebp,2
slouken@0
   215
    
slouken@0
   216
    ; save count
slouken@0
   217
    push ecx
slouken@0
   218
slouken@0
   219
.L4     mov eax,[esi]
slouken@0
   220
        mov ebx,[esi+4]
slouken@0
   221
slouken@0
   222
        mov ecx,[esi+8]
slouken@0
   223
        mov edx,[esi+12]
slouken@0
   224
slouken@0
   225
        bswap eax
slouken@0
   226
slouken@0
   227
        bswap ebx
slouken@0
   228
slouken@0
   229
        bswap ecx
slouken@0
   230
slouken@0
   231
        bswap edx
slouken@0
   232
slouken@0
   233
        mov [edi+0],eax
slouken@0
   234
        mov [edi+4],ebx
slouken@0
   235
slouken@0
   236
        mov [edi+8],ecx
slouken@0
   237
        mov [edi+12],edx
slouken@0
   238
slouken@0
   239
        add esi,BYTE 16
slouken@0
   240
        add edi,BYTE 16
slouken@0
   241
slouken@0
   242
        dec ebp
slouken@0
   243
        jnz .L4                 
slouken@0
   244
slouken@0
   245
    ; check tail
slouken@0
   246
    pop ecx
slouken@0
   247
    and ecx,BYTE 11b
slouken@0
   248
    jz .L6
slouken@0
   249
slouken@0
   250
.L5 ; tail loop
slouken@0
   251
    mov edx,[esi]
slouken@0
   252
    bswap edx
slouken@0
   253
    mov [edi],edx
slouken@0
   254
    add esi,BYTE 4
slouken@0
   255
    add edi,BYTE 4
slouken@0
   256
    dec ecx
slouken@0
   257
    jnz .L5
slouken@0
   258
slouken@0
   259
.L6 pop ebp
slouken@0
   260
    jmp _x86return
slouken@0
   261
slouken@0
   262
slouken@0
   263
	
slouken@0
   264
	
slouken@0
   265
;; 32 bit RGB 888 to 24 BIT RGB 888
slouken@0
   266
slouken@0
   267
_ConvertX86p32_24RGB888:
slouken@0
   268
slouken@0
   269
	; check short
slouken@0
   270
	cmp ecx,BYTE 32
slouken@0
   271
	ja .L3
slouken@0
   272
slouken@0
   273
.L1	; short loop
slouken@0
   274
	mov al,[esi]
slouken@0
   275
	mov bl,[esi+1]
slouken@0
   276
	mov dl,[esi+2]
slouken@0
   277
	mov [edi],al
slouken@0
   278
	mov [edi+1],bl
slouken@0
   279
	mov [edi+2],dl
slouken@0
   280
	add esi,BYTE 4
slouken@0
   281
	add edi,BYTE 3
slouken@0
   282
	dec ecx
slouken@0
   283
	jnz .L1
slouken@0
   284
.L2 
slouken@0
   285
	jmp _x86return
slouken@0
   286
slouken@0
   287
.L3	;	 head
slouken@0
   288
	mov edx,edi
slouken@0
   289
	and edx,BYTE 11b
slouken@0
   290
	jz .L4
slouken@0
   291
	mov al,[esi]
slouken@0
   292
	mov bl,[esi+1]
slouken@0
   293
	mov dl,[esi+2]
slouken@0
   294
	mov [edi],al
slouken@0
   295
	mov [edi+1],bl
slouken@0
   296
	mov [edi+2],dl
slouken@0
   297
	add esi,BYTE 4
slouken@0
   298
	add edi,BYTE 3
slouken@0
   299
	dec ecx
slouken@0
   300
	jmp SHORT .L3
slouken@0
   301
slouken@0
   302
.L4 ; unroll 4 times
slouken@0
   303
	push ebp
slouken@0
   304
	mov ebp,ecx
slouken@0
   305
	shr ebp,2
slouken@0
   306
slouken@0
   307
    ; save count
slouken@0
   308
	push ecx
slouken@0
   309
slouken@0
   310
.L5     mov eax,[esi]                   ; first dword            eax = [A][R][G][B]
slouken@0
   311
        mov ebx,[esi+4]                 ; second dword           ebx = [a][r][g][b]
slouken@0
   312
slouken@0
   313
        shl eax,8                       ;                        eax = [R][G][B][.]
slouken@0
   314
        mov ecx,[esi+12]                ; third dword            ecx = [a][r][g][b]
slouken@0
   315
slouken@0
   316
        shl ebx,8                       ;                        ebx = [r][g][b][.]
slouken@0
   317
        mov al,[esi+4]                  ;                        eax = [R][G][B][b]
slouken@0
   318
slouken@0
   319
        ror eax,8                       ;                        eax = [b][R][G][B] (done)
slouken@0
   320
        mov bh,[esi+8+1]                ;                        ebx = [r][g][G][.]
slouken@0
   321
slouken@0
   322
        mov [edi],eax
slouken@0
   323
        add edi,BYTE 3*4
slouken@0
   324
slouken@0
   325
        shl ecx,8                       ;                        ecx = [r][g][b][.]
slouken@0
   326
        mov bl,[esi+8+0]                ;                        ebx = [r][g][G][B]
slouken@0
   327
slouken@0
   328
        rol ebx,16                      ;                        ebx = [G][B][r][g] (done)
slouken@0
   329
        mov cl,[esi+8+2]                ;                        ecx = [r][g][b][R] (done)
slouken@0
   330
slouken@0
   331
        mov [edi+4-3*4],ebx
slouken@0
   332
        add esi,BYTE 4*4
slouken@0
   333
        
slouken@0
   334
        mov [edi+8-3*4],ecx
slouken@0
   335
        dec ebp
slouken@0
   336
slouken@0
   337
        jnz .L5
slouken@0
   338
slouken@0
   339
    ; check tail
slouken@0
   340
	pop ecx
slouken@0
   341
	and ecx,BYTE 11b
slouken@0
   342
	jz .L7
slouken@0
   343
slouken@0
   344
.L6 ; tail loop
slouken@0
   345
	mov al,[esi]
slouken@0
   346
	mov bl,[esi+1]
slouken@0
   347
	mov dl,[esi+2]
slouken@0
   348
	mov [edi],al
slouken@0
   349
	mov [edi+1],bl
slouken@0
   350
	mov [edi+2],dl
slouken@0
   351
	add esi,BYTE 4
slouken@0
   352
	add edi,BYTE 3
slouken@0
   353
	dec ecx
slouken@0
   354
	jnz .L6
slouken@0
   355
slouken@0
   356
.L7	pop ebp
slouken@0
   357
	jmp _x86return
slouken@0
   358
slouken@0
   359
slouken@0
   360
slouken@0
   361
slouken@0
   362
;; 32 bit RGB 888 to 24 bit BGR 888
slouken@0
   363
slouken@0
   364
_ConvertX86p32_24BGR888:
slouken@0
   365
slouken@0
   366
	; check short
slouken@0
   367
	cmp ecx,BYTE 32
slouken@0
   368
	ja .L3
slouken@0
   369
slouken@0
   370
	
slouken@0
   371
.L1	; short loop
slouken@0
   372
	mov dl,[esi]
slouken@0
   373
	mov bl,[esi+1]
slouken@0
   374
	mov al,[esi+2]
slouken@0
   375
	mov [edi],al
slouken@0
   376
	mov [edi+1],bl
slouken@0
   377
	mov [edi+2],dl
slouken@0
   378
	add esi,BYTE 4
slouken@0
   379
	add edi,BYTE 3
slouken@0
   380
	dec ecx
slouken@0
   381
	jnz .L1
slouken@0
   382
.L2
slouken@0
   383
	jmp _x86return
slouken@0
   384
slouken@0
   385
.L3 ; head
slouken@0
   386
	mov edx,edi
slouken@0
   387
	and edx,BYTE 11b
slouken@0
   388
	jz .L4
slouken@0
   389
	mov dl,[esi]
slouken@0
   390
	mov bl,[esi+1]
slouken@0
   391
	mov al,[esi+2]
slouken@0
   392
	mov [edi],al
slouken@0
   393
	mov [edi+1],bl
slouken@0
   394
	mov [edi+2],dl
slouken@0
   395
	add esi,BYTE 4
slouken@0
   396
	add edi,BYTE 3
slouken@0
   397
	dec ecx
slouken@0
   398
	jmp SHORT .L3
slouken@0
   399
slouken@0
   400
.L4	; unroll 4 times
slouken@0
   401
	push ebp
slouken@0
   402
	mov ebp,ecx
slouken@0
   403
	shr ebp,2
slouken@0
   404
slouken@0
   405
	; save count
slouken@0
   406
	push ecx
slouken@0
   407
slouken@0
   408
.L5     
slouken@0
   409
	mov eax,[esi]                   ; first dword            eax = [A][R][G][B]
slouken@0
   410
        mov ebx,[esi+4]                 ; second dword           ebx = [a][r][g][b]
slouken@0
   411
        
slouken@0
   412
        bswap eax                       ;                        eax = [B][G][R][A]
slouken@0
   413
slouken@0
   414
        bswap ebx                       ;                        ebx = [b][g][r][a]
slouken@0
   415
slouken@0
   416
        mov al,[esi+4+2]                ;                        eax = [B][G][R][r] 
slouken@0
   417
        mov bh,[esi+4+4+1]              ;                        ebx = [b][g][G][a]
slouken@0
   418
slouken@0
   419
        ror eax,8                       ;                        eax = [r][B][G][R] (done)
slouken@0
   420
        mov bl,[esi+4+4+2]              ;                        ebx = [b][g][G][R]
slouken@0
   421
slouken@0
   422
        ror ebx,16                      ;                        ebx = [G][R][b][g] (done)
slouken@0
   423
        mov [edi],eax
slouken@0
   424
    
slouken@0
   425
        mov [edi+4],ebx
slouken@0
   426
        mov ecx,[esi+12]                ; third dword            ecx = [a][r][g][b]
slouken@0
   427
        
slouken@0
   428
        bswap ecx                       ;                        ecx = [b][g][r][a]
slouken@0
   429
        
slouken@0
   430
        mov cl,[esi+8]                  ;                        ecx = [b][g][r][B] (done)
slouken@0
   431
        add esi,BYTE 4*4
slouken@0
   432
slouken@0
   433
        mov [edi+8],ecx
slouken@0
   434
        add edi,BYTE 3*4
slouken@0
   435
slouken@0
   436
        dec ebp
slouken@0
   437
        jnz .L5
slouken@0
   438
slouken@0
   439
	; check tail
slouken@0
   440
	pop ecx
slouken@0
   441
	and ecx,BYTE 11b
slouken@0
   442
	jz .L7
slouken@0
   443
slouken@0
   444
.L6	; tail loop
slouken@0
   445
	mov dl,[esi]
slouken@0
   446
	mov bl,[esi+1]
slouken@0
   447
	mov al,[esi+2]
slouken@0
   448
	mov [edi],al
slouken@0
   449
	mov [edi+1],bl
slouken@0
   450
	mov [edi+2],dl
slouken@0
   451
	add esi,BYTE 4
slouken@0
   452
	add edi,BYTE 3
slouken@0
   453
	dec ecx
slouken@0
   454
	jnz .L6
slouken@0
   455
slouken@0
   456
.L7 
slouken@0
   457
	pop ebp
slouken@0
   458
	jmp _x86return
slouken@0
   459
 
slouken@0
   460
slouken@0
   461
	
slouken@0
   462
		
slouken@0
   463
;; 32 bit RGB 888 to 16 BIT RGB 565 
slouken@0
   464
slouken@0
   465
_ConvertX86p32_16RGB565:
slouken@0
   466
	; check short
slouken@0
   467
	cmp ecx,BYTE 16
slouken@0
   468
	ja .L3
slouken@0
   469
slouken@0
   470
.L1 ; short loop
slouken@0
   471
	mov bl,[esi+0]    ; blue
slouken@0
   472
	mov al,[esi+1]    ; green
slouken@0
   473
	mov ah,[esi+2]    ; red
slouken@0
   474
	shr ah,3
slouken@0
   475
        and al,11111100b
slouken@0
   476
	shl eax,3
slouken@0
   477
	shr bl,3
slouken@0
   478
	add al,bl
slouken@0
   479
	mov [edi+0],al
slouken@0
   480
	mov [edi+1],ah
slouken@0
   481
	add esi,BYTE 4
slouken@0
   482
	add edi,BYTE 2
slouken@0
   483
	dec ecx
slouken@0
   484
	jnz .L1
slouken@0
   485
slouken@0
   486
.L2:				; End of short loop
slouken@0
   487
	jmp _x86return
slouken@0
   488
slouken@0
   489
	
slouken@0
   490
.L3	; head
slouken@0
   491
	mov ebx,edi
slouken@0
   492
	and ebx,BYTE 11b
slouken@0
   493
	jz .L4
slouken@0
   494
	
slouken@0
   495
	mov bl,[esi+0]    ; blue
slouken@0
   496
	mov al,[esi+1]    ; green
slouken@0
   497
	mov ah,[esi+2]    ; red
slouken@0
   498
	shr ah,3
slouken@0
   499
	and al,11111100b
slouken@0
   500
	shl eax,3
slouken@0
   501
	shr bl,3
slouken@0
   502
	add al,bl
slouken@0
   503
	mov [edi+0],al
slouken@0
   504
	mov [edi+1],ah
slouken@0
   505
	add esi,BYTE 4
slouken@0
   506
	add edi,BYTE 2
slouken@0
   507
	dec ecx
slouken@0
   508
slouken@0
   509
.L4:	 
slouken@0
   510
    ; save count
slouken@0
   511
	push ecx
slouken@0
   512
slouken@0
   513
    ; unroll twice
slouken@0
   514
	shr ecx,1
slouken@0
   515
    
slouken@0
   516
    ; point arrays to end
slouken@0
   517
	lea esi,[esi+ecx*8]
slouken@0
   518
	lea edi,[edi+ecx*4]
slouken@0
   519
slouken@0
   520
    ; negative counter 
slouken@0
   521
	neg ecx
slouken@0
   522
	jmp SHORT .L6
slouken@0
   523
slouken@0
   524
.L5:	    
slouken@0
   525
	mov [edi+ecx*4-4],eax
slouken@0
   526
.L6:	
slouken@0
   527
	mov eax,[esi+ecx*8]
slouken@0
   528
slouken@0
   529
        shr ah,2
slouken@0
   530
        mov ebx,[esi+ecx*8+4]
slouken@0
   531
slouken@0
   532
        shr eax,3
slouken@0
   533
        mov edx,[esi+ecx*8+4]
slouken@0
   534
slouken@0
   535
        shr bh,2
slouken@0
   536
        mov dl,[esi+ecx*8+2]
slouken@0
   537
slouken@0
   538
        shl ebx,13
slouken@0
   539
        and eax,000007FFh
slouken@0
   540
        
slouken@0
   541
        shl edx,8
slouken@0
   542
        and ebx,07FF0000h
slouken@0
   543
slouken@0
   544
        and edx,0F800F800h
slouken@0
   545
        add eax,ebx
slouken@0
   546
slouken@0
   547
        add eax,edx
slouken@0
   548
        inc ecx
slouken@0
   549
slouken@0
   550
        jnz .L5                 
slouken@0
   551
slouken@0
   552
	mov [edi+ecx*4-4],eax
slouken@0
   553
slouken@0
   554
    ; tail
slouken@0
   555
	pop ecx
slouken@0
   556
	test cl,1
slouken@0
   557
	jz .L7
slouken@0
   558
	
slouken@0
   559
	mov bl,[esi+0]    ; blue
slouken@0
   560
	mov al,[esi+1]    ; green
slouken@0
   561
	mov ah,[esi+2]    ; red
slouken@0
   562
	shr ah,3
slouken@0
   563
	and al,11111100b
slouken@0
   564
	shl eax,3
slouken@0
   565
	shr bl,3
slouken@0
   566
	add al,bl
slouken@0
   567
	mov [edi+0],al
slouken@0
   568
	mov [edi+1],ah
slouken@0
   569
	add esi,BYTE 4
slouken@0
   570
	add edi,BYTE 2
slouken@0
   571
slouken@0
   572
.L7:	
slouken@0
   573
	jmp _x86return
slouken@0
   574
slouken@0
   575
slouken@0
   576
slouken@0
   577
	
slouken@0
   578
;; 32 bit RGB 888 to 16 BIT BGR 565 
slouken@0
   579
slouken@0
   580
_ConvertX86p32_16BGR565:
slouken@0
   581
	
slouken@0
   582
	; check short
slouken@0
   583
	cmp ecx,BYTE 16
slouken@0
   584
	ja .L3
slouken@0
   585
slouken@0
   586
.L1	; short loop
slouken@0
   587
	mov ah,[esi+0]    ; blue
slouken@0
   588
	mov al,[esi+1]    ; green
slouken@0
   589
	mov bl,[esi+2]    ; red
slouken@0
   590
	shr ah,3
slouken@0
   591
	and al,11111100b
slouken@0
   592
	shl eax,3
slouken@0
   593
	shr bl,3
slouken@0
   594
	add al,bl
slouken@0
   595
	mov [edi+0],al
slouken@0
   596
	mov [edi+1],ah
slouken@0
   597
	add esi,BYTE 4
slouken@0
   598
	add edi,BYTE 2
slouken@0
   599
	dec ecx
slouken@0
   600
	jnz .L1
slouken@0
   601
.L2
slouken@0
   602
	jmp _x86return
slouken@0
   603
slouken@0
   604
.L3	; head
slouken@0
   605
	mov ebx,edi
slouken@0
   606
	and ebx,BYTE 11b
slouken@0
   607
	jz .L4   
slouken@0
   608
	mov ah,[esi+0]    ; blue
slouken@0
   609
	mov al,[esi+1]    ; green
slouken@0
   610
	mov bl,[esi+2]    ; red
slouken@0
   611
	shr ah,3
slouken@0
   612
	and al,11111100b
slouken@0
   613
	shl eax,3
slouken@0
   614
	shr bl,3
slouken@0
   615
	add al,bl
slouken@0
   616
	mov [edi+0],al
slouken@0
   617
	mov [edi+1],ah
slouken@0
   618
	add esi,BYTE 4
slouken@0
   619
	add edi,BYTE 2
slouken@0
   620
	dec ecx
slouken@0
   621
slouken@0
   622
.L4	; save count
slouken@0
   623
	push ecx
slouken@0
   624
slouken@0
   625
	; unroll twice
slouken@0
   626
	shr ecx,1
slouken@0
   627
    
slouken@0
   628
	; point arrays to end
slouken@0
   629
	lea esi,[esi+ecx*8]
slouken@0
   630
	lea edi,[edi+ecx*4]
slouken@0
   631
slouken@0
   632
	; negative count
slouken@0
   633
	neg ecx
slouken@0
   634
	jmp SHORT .L6
slouken@0
   635
slouken@0
   636
.L5     
slouken@0
   637
	mov [edi+ecx*4-4],eax            
slouken@0
   638
.L6     
slouken@0
   639
	mov edx,[esi+ecx*8+4]
slouken@0
   640
slouken@0
   641
        mov bh,[esi+ecx*8+4]                       
slouken@0
   642
        mov ah,[esi+ecx*8]                       
slouken@0
   643
slouken@0
   644
        shr bh,3
slouken@0
   645
        mov al,[esi+ecx*8+1]             
slouken@0
   646
slouken@0
   647
        shr ah,3
slouken@0
   648
        mov bl,[esi+ecx*8+5]           
slouken@0
   649
slouken@0
   650
        shl eax,3
slouken@0
   651
        mov dl,[esi+ecx*8+2]
slouken@0
   652
slouken@0
   653
        shl ebx,19
slouken@0
   654
        and eax,0000FFE0h              
slouken@0
   655
                
slouken@0
   656
        shr edx,3
slouken@0
   657
        and ebx,0FFE00000h             
slouken@0
   658
        
slouken@0
   659
        and edx,001F001Fh               
slouken@0
   660
        add eax,ebx
slouken@0
   661
slouken@0
   662
        add eax,edx
slouken@0
   663
        inc ecx
slouken@0
   664
slouken@0
   665
        jnz .L5                 
slouken@0
   666
slouken@0
   667
	mov [edi+ecx*4-4],eax            
slouken@0
   668
slouken@0
   669
	; tail
slouken@0
   670
	pop ecx
slouken@0
   671
	and ecx,BYTE 1
slouken@0
   672
	jz .L7
slouken@0
   673
	mov ah,[esi+0]    ; blue
slouken@0
   674
	mov al,[esi+1]    ; green
slouken@0
   675
	mov bl,[esi+2]    ; red
slouken@0
   676
	shr ah,3
slouken@0
   677
	and al,11111100b
slouken@0
   678
	shl eax,3
slouken@0
   679
	shr bl,3
slouken@0
   680
	add al,bl
slouken@0
   681
	mov [edi+0],al
slouken@0
   682
	mov [edi+1],ah
slouken@0
   683
	add esi,BYTE 4
slouken@0
   684
	add edi,BYTE 2
slouken@0
   685
slouken@0
   686
.L7 
slouken@0
   687
	jmp _x86return
slouken@0
   688
slouken@0
   689
slouken@0
   690
	
slouken@0
   691
	
slouken@0
   692
;; 32 BIT RGB TO 16 BIT RGB 555
slouken@0
   693
slouken@0
   694
_ConvertX86p32_16RGB555:
slouken@0
   695
slouken@0
   696
	; check short
slouken@0
   697
	cmp ecx,BYTE 16
slouken@0
   698
	ja .L3
slouken@0
   699
slouken@0
   700
.L1	; short loop
slouken@0
   701
	mov bl,[esi+0]    ; blue
slouken@0
   702
	mov al,[esi+1]    ; green
slouken@0
   703
	mov ah,[esi+2]    ; red
slouken@0
   704
	shr ah,3
slouken@0
   705
	and al,11111000b
slouken@0
   706
	shl eax,2
slouken@0
   707
	shr bl,3
slouken@0
   708
	add al,bl
slouken@0
   709
	mov [edi+0],al
slouken@0
   710
	mov [edi+1],ah
slouken@0
   711
	add esi,BYTE 4
slouken@0
   712
	add edi,BYTE 2
slouken@0
   713
	dec ecx
slouken@0
   714
	jnz .L1
slouken@0
   715
.L2
slouken@0
   716
	jmp _x86return
slouken@0
   717
slouken@0
   718
.L3	; head
slouken@0
   719
	mov ebx,edi
slouken@0
   720
        and ebx,BYTE 11b
slouken@0
   721
	jz .L4   
slouken@0
   722
	mov bl,[esi+0]    ; blue
slouken@0
   723
	mov al,[esi+1]    ; green
slouken@0
   724
	mov ah,[esi+2]    ; red
slouken@0
   725
	shr ah,3
slouken@0
   726
	and al,11111000b
slouken@0
   727
	shl eax,2
slouken@0
   728
	shr bl,3
slouken@0
   729
	add al,bl
slouken@0
   730
	mov [edi+0],al
slouken@0
   731
	mov [edi+1],ah
slouken@0
   732
	add esi,BYTE 4
slouken@0
   733
	add edi,BYTE 2
slouken@0
   734
	dec ecx
slouken@0
   735
slouken@0
   736
.L4	; save count
slouken@0
   737
	push ecx
slouken@0
   738
slouken@0
   739
	; unroll twice
slouken@0
   740
	shr ecx,1
slouken@0
   741
    
slouken@0
   742
	; point arrays to end
slouken@0
   743
	lea esi,[esi+ecx*8]
slouken@0
   744
	lea edi,[edi+ecx*4]
slouken@0
   745
slouken@0
   746
	; negative counter 
slouken@0
   747
	neg ecx
slouken@0
   748
	jmp SHORT .L6
slouken@0
   749
slouken@0
   750
.L5     
slouken@0
   751
	mov [edi+ecx*4-4],eax
slouken@0
   752
.L6     
slouken@0
   753
	mov eax,[esi+ecx*8]
slouken@0
   754
slouken@0
   755
        shr ah,3
slouken@0
   756
        mov ebx,[esi+ecx*8+4]
slouken@0
   757
slouken@0
   758
        shr eax,3
slouken@0
   759
        mov edx,[esi+ecx*8+4]
slouken@0
   760
slouken@0
   761
        shr bh,3
slouken@0
   762
        mov dl,[esi+ecx*8+2]
slouken@0
   763
slouken@0
   764
        shl ebx,13
slouken@0
   765
        and eax,000007FFh
slouken@0
   766
        
slouken@0
   767
        shl edx,7
slouken@0
   768
        and ebx,07FF0000h
slouken@0
   769
slouken@0
   770
        and edx,07C007C00h
slouken@0
   771
        add eax,ebx
slouken@0
   772
slouken@0
   773
        add eax,edx
slouken@0
   774
        inc ecx
slouken@0
   775
slouken@0
   776
        jnz .L5                 
slouken@0
   777
slouken@0
   778
	mov [edi+ecx*4-4],eax
slouken@0
   779
slouken@0
   780
	; tail
slouken@0
   781
	pop ecx
slouken@0
   782
	and ecx,BYTE 1
slouken@0
   783
	jz .L7
slouken@0
   784
	mov bl,[esi+0]    ; blue
slouken@0
   785
	mov al,[esi+1]    ; green
slouken@0
   786
	mov ah,[esi+2]    ; red
slouken@0
   787
	shr ah,3
slouken@0
   788
	and al,11111000b
slouken@0
   789
	shl eax,2
slouken@0
   790
	shr bl,3
slouken@0
   791
	add al,bl
slouken@0
   792
	mov [edi+0],al
slouken@0
   793
	mov [edi+1],ah
slouken@0
   794
	add esi,BYTE 4
slouken@0
   795
	add edi,BYTE 2
slouken@0
   796
slouken@0
   797
.L7
slouken@0
   798
	jmp _x86return
slouken@0
   799
slouken@0
   800
slouken@0
   801
slouken@0
   802
slouken@0
   803
;; 32 BIT RGB TO 16 BIT BGR 555
slouken@0
   804
	
slouken@0
   805
_ConvertX86p32_16BGR555:
slouken@0
   806
	
slouken@0
   807
	; check short
slouken@0
   808
	cmp ecx,BYTE 16
slouken@0
   809
	ja .L3
slouken@0
   810
slouken@0
   811
slouken@0
   812
.L1	; short loop
slouken@0
   813
	mov ah,[esi+0]    ; blue
slouken@0
   814
	mov al,[esi+1]    ; green
slouken@0
   815
	mov bl,[esi+2]    ; red
slouken@0
   816
	shr ah,3
slouken@0
   817
	and al,11111000b
slouken@0
   818
	shl eax,2
slouken@0
   819
	shr bl,3
slouken@0
   820
	add al,bl
slouken@0
   821
	mov [edi+0],al
slouken@0
   822
	mov [edi+1],ah
slouken@0
   823
	add esi,BYTE 4
slouken@0
   824
	add edi,BYTE 2
slouken@0
   825
	dec ecx
slouken@0
   826
	jnz .L1
slouken@0
   827
.L2 
slouken@0
   828
	jmp _x86return
slouken@0
   829
slouken@0
   830
.L3	; head
slouken@0
   831
	mov ebx,edi
slouken@0
   832
        and ebx,BYTE 11b
slouken@0
   833
	jz .L4   
slouken@0
   834
	mov ah,[esi+0]    ; blue
slouken@0
   835
	mov al,[esi+1]    ; green
slouken@0
   836
	mov bl,[esi+2]    ; red
slouken@0
   837
	shr ah,3
slouken@0
   838
	and al,11111000b
slouken@0
   839
	shl eax,2
slouken@0
   840
	shr bl,3
slouken@0
   841
	add al,bl
slouken@0
   842
	mov [edi+0],al
slouken@0
   843
	mov [edi+1],ah
slouken@0
   844
	add esi,BYTE 4
slouken@0
   845
	add edi,BYTE 2
slouken@0
   846
	dec ecx
slouken@0
   847
slouken@0
   848
.L4	; save count
slouken@0
   849
	push ecx
slouken@0
   850
slouken@0
   851
	; unroll twice
slouken@0
   852
	shr ecx,1
slouken@0
   853
    
slouken@0
   854
	; point arrays to end
slouken@0
   855
	lea esi,[esi+ecx*8]
slouken@0
   856
	lea edi,[edi+ecx*4]
slouken@0
   857
slouken@0
   858
	; negative counter 
slouken@0
   859
	neg ecx
slouken@0
   860
	jmp SHORT .L6
slouken@0
   861
slouken@0
   862
.L5     
slouken@0
   863
	mov [edi+ecx*4-4],eax            
slouken@0
   864
.L6     
slouken@0
   865
	mov edx,[esi+ecx*8+4]
slouken@0
   866
slouken@0
   867
        mov bh,[esi+ecx*8+4]                       
slouken@0
   868
        mov ah,[esi+ecx*8]                       
slouken@0
   869
slouken@0
   870
        shr bh,3
slouken@0
   871
        mov al,[esi+ecx*8+1]             
slouken@0
   872
slouken@0
   873
        shr ah,3
slouken@0
   874
        mov bl,[esi+ecx*8+5]           
slouken@0
   875
slouken@0
   876
        shl eax,2
slouken@0
   877
        mov dl,[esi+ecx*8+2]
slouken@0
   878
slouken@0
   879
        shl ebx,18
slouken@0
   880
        and eax,00007FE0h              
slouken@0
   881
                
slouken@0
   882
        shr edx,3
slouken@0
   883
        and ebx,07FE00000h             
slouken@0
   884
        
slouken@0
   885
        and edx,001F001Fh               
slouken@0
   886
        add eax,ebx
slouken@0
   887
slouken@0
   888
        add eax,edx
slouken@0
   889
        inc ecx
slouken@0
   890
slouken@0
   891
        jnz .L5                 
slouken@0
   892
slouken@0
   893
	mov [edi+ecx*4-4],eax            
slouken@0
   894
slouken@0
   895
	; tail
slouken@0
   896
	pop ecx
slouken@0
   897
	and ecx,BYTE 1
slouken@0
   898
	jz .L7
slouken@0
   899
	mov ah,[esi+0]    ; blue
slouken@0
   900
	mov al,[esi+1]    ; green
slouken@0
   901
	mov bl,[esi+2]    ; red
slouken@0
   902
	shr ah,3
slouken@0
   903
	and al,11111000b
slouken@0
   904
	shl eax,2
slouken@0
   905
	shr bl,3
slouken@0
   906
	add al,bl
slouken@0
   907
	mov [edi+0],al
slouken@0
   908
	mov [edi+1],ah
slouken@0
   909
	add esi,BYTE 4
slouken@0
   910
	add edi,BYTE 2
slouken@0
   911
slouken@0
   912
.L7
slouken@0
   913
	jmp _x86return
slouken@0
   914
slouken@0
   915
slouken@0
   916
slouken@0
   917
slouken@0
   918
	
slouken@0
   919
;; FROM 32 BIT RGB to 8 BIT RGB (rrrgggbbb)
slouken@0
   920
;; This routine writes FOUR pixels at once (dword) and then, if they exist
slouken@0
   921
;; the trailing three pixels
slouken@0
   922
_ConvertX86p32_8RGB332:
slouken@0
   923
slouken@0
   924
	
slouken@0
   925
.L_ALIGNED
slouken@0
   926
	push ecx
slouken@0
   927
slouken@0
   928
	shr ecx,2		; We will draw 4 pixels at once
slouken@0
   929
	jnz .L1
slouken@0
   930
	
slouken@0
   931
	jmp .L2			; short jump out of range :(
slouken@0
   932
	
slouken@0
   933
.L1:
slouken@0
   934
	mov eax,[esi]		; first pair of pixels
slouken@0
   935
	mov edx,[esi+4]
slouken@0
   936
slouken@0
   937
	shr dl,6
slouken@0
   938
	mov ebx,eax
slouken@0
   939
slouken@0
   940
	shr al,6
slouken@0
   941
	and ah,0e0h
slouken@0
   942
slouken@0
   943
	shr ebx,16
slouken@0
   944
	and dh,0e0h
slouken@0
   945
	
slouken@0
   946
	shr ah,3
slouken@0
   947
	and bl,0e0h
slouken@0
   948
slouken@0
   949
	shr dh,3
slouken@0
   950
	
slouken@0
   951
	or al,bl
slouken@0
   952
	
slouken@0
   953
	mov ebx,edx	
slouken@0
   954
	or al,ah
slouken@0
   955
	
slouken@0
   956
	shr ebx,16
slouken@0
   957
	or dl,dh
slouken@0
   958
slouken@0
   959
	and bl,0e0h
slouken@0
   960
	
slouken@0
   961
	or dl,bl
slouken@0
   962
slouken@0
   963
	mov ah,dl
slouken@0
   964
slouken@0
   965
	
slouken@0
   966
		
slouken@0
   967
	mov ebx,[esi+8]		; second pair of pixels
slouken@0
   968
slouken@0
   969
	mov edx,ebx
slouken@0
   970
	and bh,0e0h
slouken@0
   971
slouken@0
   972
	shr bl,6
slouken@0
   973
	and edx,0e00000h
slouken@0
   974
slouken@0
   975
	shr edx,16
slouken@0
   976
slouken@0
   977
	shr bh,3
slouken@0
   978
slouken@0
   979
	ror eax,16
slouken@0
   980
	or bl,dl
slouken@0
   981
slouken@0
   982
	mov edx,[esi+12]
slouken@0
   983
	or bl,bh
slouken@0
   984
	
slouken@0
   985
	mov al,bl
slouken@0
   986
slouken@0
   987
	mov ebx,edx
slouken@0
   988
	and dh,0e0h
slouken@0
   989
slouken@0
   990
	shr dl,6
slouken@0
   991
	and ebx,0e00000h
slouken@0
   992
	
slouken@0
   993
	shr dh,3
slouken@0
   994
	mov ah,dl
slouken@0
   995
slouken@0
   996
	shr ebx,16
slouken@0
   997
	or ah,dh
slouken@0
   998
slouken@0
   999
	or ah,bl
slouken@0
  1000
slouken@0
  1001
	rol eax,16
slouken@0
  1002
	add esi,BYTE 16
slouken@0
  1003
			
slouken@0
  1004
	mov [edi],eax	
slouken@0
  1005
	add edi,BYTE 4
slouken@0
  1006
	
slouken@0
  1007
	dec ecx
slouken@0
  1008
	jz .L2			; L1 out of range for short jump :(
slouken@0
  1009
	
slouken@0
  1010
	jmp .L1
slouken@0
  1011
.L2:
slouken@0
  1012
	
slouken@0
  1013
	pop ecx
slouken@0
  1014
	and ecx,BYTE 3		; mask out number of pixels to draw
slouken@0
  1015
	
slouken@0
  1016
	jz .L4			; Nothing to do anymore
slouken@0
  1017
slouken@0
  1018
.L3:
slouken@0
  1019
	mov eax,[esi]		; single pixel conversion for trailing pixels
slouken@0
  1020
slouken@0
  1021
        mov ebx,eax
slouken@0
  1022
slouken@0
  1023
        shr al,6
slouken@0
  1024
        and ah,0e0h
slouken@0
  1025
slouken@0
  1026
        shr ebx,16
slouken@0
  1027
slouken@0
  1028
        shr ah,3
slouken@0
  1029
        and bl,0e0h
slouken@0
  1030
slouken@0
  1031
        or al,ah
slouken@0
  1032
        or al,bl
slouken@0
  1033
slouken@0
  1034
        mov [edi],al
slouken@0
  1035
slouken@0
  1036
        inc edi
slouken@0
  1037
        add esi,BYTE 4
slouken@0
  1038
slouken@0
  1039
	dec ecx
slouken@0
  1040
	jnz .L3
slouken@0
  1041
	
slouken@0
  1042
.L4:	
slouken@0
  1043
	jmp _x86return