src/hermes/x86p_16.asm
author Ryan C. Gordon
Wed, 29 Nov 2006 10:30:05 +0000
branchSDL-1.2
changeset 3900 ce3a2bd11305
parent 1873 eb4d9d99849b
child 2134 180fa05e98e2
permissions -rw-r--r--
Wrapped some macro params in parentheses for alloca wrappers.
Thansk, Suzuki Masahiro.
slouken@0
     1
;
slouken@0
     2
; x86 format converters for HERMES
slouken@0
     3
; Copyright (c) 1998 Glenn Fielder (gaffer@gaffer.org)
slouken@0
     4
; This source code is licensed under the GNU LGPL
slouken@0
     5
; 
slouken@0
     6
; Please refer to the file COPYING.LIB contained in the distribution for
slouken@0
     7
; licensing conditions		
slouken@0
     8
; 
slouken@0
     9
; Routines adjusted for Hermes by Christian Nentwich (brn@eleet.mcb.at)
slouken@0
    10
; Used with permission.
slouken@0
    11
; 
slouken@0
    12
slouken@0
    13
BITS 32
slouken@0
    14
slouken@1873
    15
%include "common.inc"
slouken@1871
    16
slouken@1871
    17
SDL_FUNC _ConvertX86p16_16BGR565
slouken@1871
    18
SDL_FUNC _ConvertX86p16_16RGB555
slouken@1871
    19
SDL_FUNC _ConvertX86p16_16BGR555
slouken@1871
    20
SDL_FUNC _ConvertX86p16_8RGB332
slouken@0
    21
slouken@0
    22
EXTERN _ConvertX86
slouken@0
    23
EXTERN _x86return
slouken@0
    24
slouken@0
    25
SECTION .text
slouken@0
    26
slouken@0
    27
_ConvertX86p16_16BGR565:
slouken@0
    28
slouken@0
    29
    ; check short
slouken@0
    30
    cmp ecx,BYTE 16
slouken@0
    31
    ja .L3
slouken@0
    32
slouken@0
    33
slouken@0
    34
.L1 ; short loop
slouken@0
    35
    mov al,[esi]
slouken@0
    36
    mov ah,[esi+1]
slouken@0
    37
    mov ebx,eax
slouken@0
    38
    mov edx,eax
slouken@0
    39
    shr eax,11
slouken@0
    40
    and eax,BYTE 11111b
slouken@0
    41
    and ebx,11111100000b
slouken@0
    42
    shl edx,11
slouken@0
    43
    add eax,ebx
slouken@0
    44
    add eax,edx
slouken@0
    45
    mov [edi],al
slouken@0
    46
    mov [edi+1],ah
slouken@0
    47
    add esi,BYTE 2
slouken@0
    48
    add edi,BYTE 2
slouken@0
    49
    dec ecx
slouken@0
    50
    jnz .L1
slouken@0
    51
.L2
slouken@0
    52
    jmp _x86return
slouken@0
    53
slouken@0
    54
.L3 ; head
slouken@0
    55
    mov eax,edi
slouken@0
    56
    and eax,BYTE 11b
slouken@0
    57
    jz .L4
slouken@0
    58
    mov al,[esi]
slouken@0
    59
    mov ah,[esi+1]
slouken@0
    60
    mov ebx,eax
slouken@0
    61
    mov edx,eax
slouken@0
    62
    shr eax,11
slouken@0
    63
    and eax,BYTE 11111b
slouken@0
    64
    and ebx,11111100000b
slouken@0
    65
    shl edx,11
slouken@0
    66
    add eax,ebx
slouken@0
    67
    add eax,edx
slouken@0
    68
    mov [edi],al
slouken@0
    69
    mov [edi+1],ah
slouken@0
    70
    add esi,BYTE 2
slouken@0
    71
    add edi,BYTE 2
slouken@0
    72
    dec ecx
slouken@0
    73
slouken@0
    74
.L4 ; save count
slouken@0
    75
    push ecx
slouken@0
    76
slouken@0
    77
    ; unroll twice
slouken@0
    78
    shr ecx,1
slouken@0
    79
    
slouken@0
    80
    ; point arrays to end
slouken@0
    81
    lea esi,[esi+ecx*4]
slouken@0
    82
    lea edi,[edi+ecx*4]
slouken@0
    83
slouken@0
    84
    ; negative counter 
slouken@0
    85
    neg ecx
slouken@0
    86
    jmp SHORT .L6
slouken@0
    87
                              
slouken@0
    88
.L5     mov [edi+ecx*4-4],eax
slouken@0
    89
.L6     mov eax,[esi+ecx*4]
slouken@0
    90
slouken@0
    91
        mov ebx,[esi+ecx*4]
slouken@0
    92
        and eax,07E007E0h         
slouken@0
    93
slouken@0
    94
        mov edx,[esi+ecx*4]
slouken@0
    95
        and ebx,0F800F800h
slouken@0
    96
slouken@0
    97
        shr ebx,11
slouken@0
    98
        and edx,001F001Fh
slouken@0
    99
slouken@0
   100
        shl edx,11
slouken@0
   101
        add eax,ebx
slouken@0
   102
slouken@0
   103
        add eax,edx                 
slouken@0
   104
        inc ecx
slouken@0
   105
slouken@0
   106
        jnz .L5                 
slouken@0
   107
         
slouken@0
   108
    mov [edi+ecx*4-4],eax
slouken@0
   109
slouken@0
   110
    ; tail
slouken@0
   111
    pop ecx
slouken@0
   112
    and ecx,BYTE 1
slouken@0
   113
    jz .L7
slouken@0
   114
    mov al,[esi]
slouken@0
   115
    mov ah,[esi+1]
slouken@0
   116
    mov ebx,eax
slouken@0
   117
    mov edx,eax
slouken@0
   118
    shr eax,11
slouken@0
   119
    and eax,BYTE 11111b
slouken@0
   120
    and ebx,11111100000b
slouken@0
   121
    shl edx,11
slouken@0
   122
    add eax,ebx
slouken@0
   123
    add eax,edx
slouken@0
   124
    mov [edi],al
slouken@0
   125
    mov [edi+1],ah
slouken@0
   126
    add esi,BYTE 2
slouken@0
   127
    add edi,BYTE 2
slouken@0
   128
slouken@0
   129
.L7
slouken@0
   130
    jmp _x86return
slouken@0
   131
slouken@0
   132
slouken@0
   133
slouken@0
   134
slouken@0
   135
slouken@0
   136
slouken@0
   137
_ConvertX86p16_16RGB555:
slouken@0
   138
slouken@0
   139
    ; check short
slouken@0
   140
    cmp ecx,BYTE 32
slouken@0
   141
    ja .L3
slouken@0
   142
slouken@0
   143
slouken@0
   144
.L1 ; short loop
slouken@0
   145
    mov al,[esi]
slouken@0
   146
    mov ah,[esi+1]
slouken@0
   147
    mov ebx,eax
slouken@0
   148
    shr ebx,1
slouken@0
   149
    and ebx,     0111111111100000b
slouken@0
   150
    and eax,BYTE 0000000000011111b
slouken@0
   151
    add eax,ebx
slouken@0
   152
    mov [edi],al
slouken@0
   153
    mov [edi+1],ah
slouken@0
   154
    add esi,BYTE 2
slouken@0
   155
    add edi,BYTE 2
slouken@0
   156
    dec ecx
slouken@0
   157
    jnz .L1
slouken@0
   158
.L2
slouken@0
   159
    jmp _x86return
slouken@0
   160
slouken@0
   161
.L3 ; head
slouken@0
   162
    mov eax,edi
slouken@0
   163
    and eax,BYTE 11b
slouken@0
   164
    jz .L4
slouken@0
   165
    mov al,[esi]
slouken@0
   166
    mov ah,[esi+1]
slouken@0
   167
    mov ebx,eax
slouken@0
   168
    shr ebx,1
slouken@0
   169
    and ebx,     0111111111100000b
slouken@0
   170
    and eax,BYTE 0000000000011111b
slouken@0
   171
    add eax,ebx
slouken@0
   172
    mov [edi],al
slouken@0
   173
    mov [edi+1],ah
slouken@0
   174
    add esi,BYTE 2
slouken@0
   175
    add edi,BYTE 2
slouken@0
   176
    dec ecx
slouken@0
   177
slouken@0
   178
.L4 ; save ebp
slouken@0
   179
    push ebp
slouken@0
   180
slouken@0
   181
    ; save count
slouken@0
   182
    push ecx
slouken@0
   183
slouken@0
   184
    ; unroll four times
slouken@0
   185
    shr ecx,2
slouken@0
   186
    
slouken@0
   187
    ; point arrays to end
slouken@0
   188
    lea esi,[esi+ecx*8]
slouken@0
   189
    lea edi,[edi+ecx*8]
slouken@0
   190
slouken@0
   191
    ; negative counter 
slouken@0
   192
    xor ebp,ebp
slouken@0
   193
    sub ebp,ecx
slouken@0
   194
slouken@0
   195
.L5     mov eax,[esi+ebp*8]        ; agi?
slouken@0
   196
        mov ecx,[esi+ebp*8+4]
slouken@0
   197
       
slouken@0
   198
        mov ebx,eax
slouken@0
   199
        mov edx,ecx
slouken@0
   200
slouken@0
   201
        and eax,0FFC0FFC0h
slouken@0
   202
        and ecx,0FFC0FFC0h
slouken@0
   203
slouken@0
   204
        shr eax,1
slouken@0
   205
        and ebx,001F001Fh
slouken@0
   206
slouken@0
   207
        shr ecx,1
slouken@0
   208
        and edx,001F001Fh
slouken@0
   209
slouken@0
   210
        add eax,ebx
slouken@0
   211
        add ecx,edx
slouken@0
   212
slouken@0
   213
        mov [edi+ebp*8],eax
slouken@0
   214
        mov [edi+ebp*8+4],ecx
slouken@0
   215
slouken@0
   216
        inc ebp
slouken@0
   217
        jnz .L5                 
slouken@0
   218
slouken@0
   219
    ; tail
slouken@0
   220
    pop ecx
slouken@0
   221
.L6 and ecx,BYTE 11b
slouken@0
   222
    jz .L7
slouken@0
   223
    mov al,[esi]
slouken@0
   224
    mov ah,[esi+1]
slouken@0
   225
    mov ebx,eax
slouken@0
   226
    shr ebx,1
slouken@0
   227
    and ebx,     0111111111100000b
slouken@0
   228
    and eax,BYTE 0000000000011111b
slouken@0
   229
    add eax,ebx
slouken@0
   230
    mov [edi],al
slouken@0
   231
    mov [edi+1],ah
slouken@0
   232
    add esi,BYTE 2
slouken@0
   233
    add edi,BYTE 2
slouken@0
   234
    dec ecx
slouken@0
   235
    jmp SHORT .L6
slouken@0
   236
slouken@0
   237
.L7 pop ebp
slouken@0
   238
    jmp _x86return
slouken@0
   239
slouken@0
   240
slouken@0
   241
slouken@0
   242
slouken@0
   243
slouken@0
   244
slouken@0
   245
_ConvertX86p16_16BGR555:
slouken@0
   246
slouken@0
   247
    ; check short
slouken@0
   248
    cmp ecx,BYTE 16
slouken@0
   249
    ja .L3
slouken@0
   250
slouken@0
   251
	
slouken@0
   252
.L1 ; short loop
slouken@0
   253
    mov al,[esi]
slouken@0
   254
    mov ah,[esi+1]
slouken@0
   255
    mov ebx,eax
slouken@0
   256
    mov edx,eax
slouken@0
   257
    shr eax,11
slouken@0
   258
    and eax,BYTE 11111b
slouken@0
   259
    shr ebx,1
slouken@0
   260
    and ebx,1111100000b
slouken@0
   261
    shl edx,10
slouken@0
   262
    and edx,0111110000000000b
slouken@0
   263
    add eax,ebx
slouken@0
   264
    add eax,edx
slouken@0
   265
    mov [edi],al
slouken@0
   266
    mov [edi+1],ah
slouken@0
   267
    add esi,BYTE 2
slouken@0
   268
    add edi,BYTE 2
slouken@0
   269
    dec ecx
slouken@0
   270
    jnz .L1
slouken@0
   271
.L2
slouken@0
   272
    jmp _x86return
slouken@0
   273
slouken@0
   274
.L3 ; head
slouken@0
   275
    mov eax,edi
slouken@0
   276
    and eax,BYTE 11b
slouken@0
   277
    jz .L4
slouken@0
   278
    mov al,[esi]
slouken@0
   279
    mov ah,[esi+1]
slouken@0
   280
    mov ebx,eax
slouken@0
   281
    mov edx,eax
slouken@0
   282
    shr eax,11
slouken@0
   283
    and eax,BYTE 11111b
slouken@0
   284
    shr ebx,1
slouken@0
   285
    and ebx,1111100000b
slouken@0
   286
    shl edx,10
slouken@0
   287
    and edx,0111110000000000b
slouken@0
   288
    add eax,ebx
slouken@0
   289
    add eax,edx
slouken@0
   290
    mov [edi],al
slouken@0
   291
    mov [edi+1],ah
slouken@0
   292
    add esi,BYTE 2
slouken@0
   293
    add edi,BYTE 2
slouken@0
   294
    dec ecx
slouken@0
   295
slouken@0
   296
.L4 ; save count
slouken@0
   297
    push ecx
slouken@0
   298
slouken@0
   299
    ; unroll twice
slouken@0
   300
    shr ecx,1
slouken@0
   301
    
slouken@0
   302
    ; point arrays to end
slouken@0
   303
    lea esi,[esi+ecx*4]
slouken@0
   304
    lea edi,[edi+ecx*4]
slouken@0
   305
slouken@0
   306
    ; negative counter 
slouken@0
   307
    neg ecx
slouken@0
   308
    jmp SHORT .L6
slouken@0
   309
                              
slouken@0
   310
.L5     mov [edi+ecx*4-4],eax
slouken@0
   311
.L6     mov eax,[esi+ecx*4]
slouken@0
   312
slouken@0
   313
        shr eax,1
slouken@0
   314
        mov ebx,[esi+ecx*4]
slouken@0
   315
        
slouken@0
   316
        and eax,03E003E0h         
slouken@0
   317
        mov edx,[esi+ecx*4]
slouken@0
   318
slouken@0
   319
        and ebx,0F800F800h
slouken@0
   320
slouken@0
   321
        shr ebx,11
slouken@0
   322
        and edx,001F001Fh
slouken@0
   323
slouken@0
   324
        shl edx,10
slouken@0
   325
        add eax,ebx
slouken@0
   326
slouken@0
   327
        add eax,edx                 
slouken@0
   328
        inc ecx
slouken@0
   329
slouken@0
   330
        jnz .L5                 
slouken@0
   331
         
slouken@0
   332
    mov [edi+ecx*4-4],eax
slouken@0
   333
slouken@0
   334
    ; tail
slouken@0
   335
    pop ecx
slouken@0
   336
    and ecx,BYTE 1
slouken@0
   337
    jz .L7
slouken@0
   338
    mov al,[esi]
slouken@0
   339
    mov ah,[esi+1]
slouken@0
   340
    mov ebx,eax
slouken@0
   341
    mov edx,eax
slouken@0
   342
    shr eax,11
slouken@0
   343
    and eax,BYTE 11111b
slouken@0
   344
    shr ebx,1
slouken@0
   345
    and ebx,1111100000b
slouken@0
   346
    shl edx,10
slouken@0
   347
    and edx,0111110000000000b
slouken@0
   348
    add eax,ebx
slouken@0
   349
    add eax,edx
slouken@0
   350
    mov [edi],al
slouken@0
   351
    mov [edi+1],ah
slouken@0
   352
    add esi,BYTE 2
slouken@0
   353
    add edi,BYTE 2
slouken@0
   354
slouken@0
   355
.L7
slouken@0
   356
    jmp _x86return
slouken@0
   357
slouken@0
   358
slouken@0
   359
slouken@0
   360
slouken@0
   361
slouken@0
   362
slouken@0
   363
_ConvertX86p16_8RGB332:
slouken@0
   364
slouken@0
   365
    ; check short
slouken@0
   366
    cmp ecx,BYTE 16
slouken@0
   367
    ja .L3
slouken@0
   368
slouken@0
   369
slouken@0
   370
.L1 ; short loop
slouken@0
   371
    mov al,[esi+0]
slouken@0
   372
    mov ah,[esi+1]
slouken@0
   373
    mov ebx,eax
slouken@0
   374
    mov edx,eax
slouken@0
   375
    and eax,BYTE 11000b         ; blue
slouken@0
   376
    shr eax,3
slouken@0
   377
    and ebx,11100000000b        ; green
slouken@0
   378
    shr ebx,6
slouken@0
   379
    and edx,1110000000000000b   ; red
slouken@0
   380
    shr edx,8
slouken@0
   381
    add eax,ebx
slouken@0
   382
    add eax,edx
slouken@0
   383
    mov [edi],al
slouken@0
   384
    add esi,BYTE 2
slouken@0
   385
    inc edi
slouken@0
   386
    dec ecx
slouken@0
   387
    jnz .L1
slouken@0
   388
.L2
slouken@0
   389
    jmp _x86return
slouken@0
   390
slouken@0
   391
.L3 mov eax,edi
slouken@0
   392
    and eax,BYTE 11b
slouken@0
   393
    jz .L4
slouken@0
   394
    mov al,[esi+0]
slouken@0
   395
    mov ah,[esi+1]
slouken@0
   396
    mov ebx,eax
slouken@0
   397
    mov edx,eax
slouken@0
   398
    and eax,BYTE 11000b         ; blue
slouken@0
   399
    shr eax,3
slouken@0
   400
    and ebx,11100000000b        ; green
slouken@0
   401
    shr ebx,6
slouken@0
   402
    and edx,1110000000000000b   ; red
slouken@0
   403
    shr edx,8
slouken@0
   404
    add eax,ebx
slouken@0
   405
    add eax,edx
slouken@0
   406
    mov [edi],al
slouken@0
   407
    add esi,BYTE 2
slouken@0
   408
    inc edi
slouken@0
   409
    dec ecx
slouken@0
   410
    jmp SHORT .L3
slouken@0
   411
slouken@0
   412
.L4 ; save ebp
slouken@0
   413
    push ebp
slouken@0
   414
slouken@0
   415
    ; save count
slouken@0
   416
    push ecx
slouken@0
   417
slouken@0
   418
    ; unroll 4 times
slouken@0
   419
    shr ecx,2
slouken@0
   420
slouken@0
   421
    ; prestep
slouken@0
   422
    mov dl,[esi+0]
slouken@0
   423
    mov bl,[esi+1]
slouken@0
   424
    mov dh,[esi+2]
slouken@0
   425
        
slouken@0
   426
.L5     shl edx,16
slouken@0
   427
        mov bh,[esi+3]
slouken@0
   428
        
slouken@0
   429
        shl ebx,16
slouken@0
   430
        mov dl,[esi+4]
slouken@0
   431
slouken@0
   432
        mov dh,[esi+6]
slouken@0
   433
        mov bl,[esi+5]
slouken@0
   434
slouken@0
   435
        and edx,00011000000110000001100000011000b
slouken@0
   436
        mov bh,[esi+7]
slouken@0
   437
slouken@0
   438
        ror edx,16+3
slouken@0
   439
        mov eax,ebx                                     ; setup eax for reds
slouken@0
   440
slouken@0
   441
        and ebx,00000111000001110000011100000111b
slouken@0
   442
        and eax,11100000111000001110000011100000b       ; reds
slouken@0
   443
slouken@0
   444
        ror ebx,16-2
slouken@0
   445
        add esi,BYTE 8
slouken@0
   446
slouken@0
   447
        ror eax,16
slouken@0
   448
        add edi,BYTE 4
slouken@0
   449
slouken@0
   450
        add eax,ebx
slouken@0
   451
        mov bl,[esi+1]                                  ; greens
slouken@0
   452
slouken@0
   453
        add eax,edx
slouken@0
   454
        mov dl,[esi+0]                                  ; blues
slouken@0
   455
slouken@0
   456
        mov [edi-4],eax
slouken@0
   457
        mov dh,[esi+2]
slouken@0
   458
slouken@0
   459
        dec ecx
slouken@0
   460
        jnz .L5                 
slouken@0
   461
    
slouken@0
   462
    ; check tail
slouken@0
   463
    pop ecx
slouken@0
   464
    and ecx,BYTE 11b
slouken@0
   465
    jz .L7
slouken@0
   466
slouken@0
   467
.L6 ; tail
slouken@0
   468
    mov al,[esi+0]
slouken@0
   469
    mov ah,[esi+1]
slouken@0
   470
    mov ebx,eax
slouken@0
   471
    mov edx,eax
slouken@0
   472
    and eax,BYTE 11000b         ; blue
slouken@0
   473
    shr eax,3
slouken@0
   474
    and ebx,11100000000b        ; green
slouken@0
   475
    shr ebx,6
slouken@0
   476
    and edx,1110000000000000b   ; red
slouken@0
   477
    shr edx,8
slouken@0
   478
    add eax,ebx
slouken@0
   479
    add eax,edx
slouken@0
   480
    mov [edi],al
slouken@0
   481
    add esi,BYTE 2
slouken@0
   482
    inc edi
slouken@0
   483
    dec ecx
slouken@0
   484
    jnz .L6
slouken@0
   485
slouken@0
   486
.L7 pop ebp
slouken@0
   487
    jmp _x86return
slouken@0
   488
icculus@1199
   489
%ifidn __OUTPUT_FORMAT__,elf
icculus@1199
   490
section .note.GNU-stack noalloc noexec nowrite progbits
icculus@1199
   491
%endif