src/hermes/x86p_16.asm
author Sam Lantinga
Mon, 06 Feb 2006 08:28:51 +0000
changeset 1330 450721ad5436
parent 1199 2d6dc7de1145
child 1697 393092a3ebf6
permissions -rw-r--r--
It's now possible to build SDL without any C runtime at all on Windows,
using Visual C++ 2005
slouken@0
     1
;
slouken@0
     2
; x86 format converters for HERMES
slouken@0
     3
; Copyright (c) 1998 Glenn Fielder (gaffer@gaffer.org)
slouken@0
     4
; This source code is licensed under the GNU LGPL
slouken@0
     5
; 
slouken@0
     6
; Please refer to the file COPYING.LIB contained in the distribution for
slouken@0
     7
; licensing conditions		
slouken@0
     8
; 
slouken@0
     9
; Routines adjusted for Hermes by Christian Nentwich (brn@eleet.mcb.at)
slouken@0
    10
; Used with permission.
slouken@0
    11
; 
slouken@0
    12
slouken@0
    13
	
slouken@0
    14
BITS 32
slouken@0
    15
slouken@0
    16
GLOBAL _ConvertX86p16_32RGB888
slouken@0
    17
GLOBAL _ConvertX86p16_32BGR888
slouken@0
    18
GLOBAL _ConvertX86p16_32RGBA888
slouken@0
    19
GLOBAL _ConvertX86p16_32BGRA888
slouken@0
    20
GLOBAL _ConvertX86p16_24RGB888
slouken@0
    21
GLOBAL _ConvertX86p16_24BGR888
slouken@0
    22
GLOBAL _ConvertX86p16_16BGR565
slouken@0
    23
GLOBAL _ConvertX86p16_16RGB555
slouken@0
    24
GLOBAL _ConvertX86p16_16BGR555
slouken@0
    25
GLOBAL _ConvertX86p16_8RGB332
slouken@0
    26
slouken@0
    27
EXTERN _ConvertX86
slouken@0
    28
EXTERN _x86return
slouken@0
    29
slouken@0
    30
SECTION .text
slouken@0
    31
slouken@0
    32
_ConvertX86p16_16BGR565:
slouken@0
    33
slouken@0
    34
    ; check short
slouken@0
    35
    cmp ecx,BYTE 16
slouken@0
    36
    ja .L3
slouken@0
    37
slouken@0
    38
slouken@0
    39
.L1 ; short loop
slouken@0
    40
    mov al,[esi]
slouken@0
    41
    mov ah,[esi+1]
slouken@0
    42
    mov ebx,eax
slouken@0
    43
    mov edx,eax
slouken@0
    44
    shr eax,11
slouken@0
    45
    and eax,BYTE 11111b
slouken@0
    46
    and ebx,11111100000b
slouken@0
    47
    shl edx,11
slouken@0
    48
    add eax,ebx
slouken@0
    49
    add eax,edx
slouken@0
    50
    mov [edi],al
slouken@0
    51
    mov [edi+1],ah
slouken@0
    52
    add esi,BYTE 2
slouken@0
    53
    add edi,BYTE 2
slouken@0
    54
    dec ecx
slouken@0
    55
    jnz .L1
slouken@0
    56
.L2
slouken@0
    57
    jmp _x86return
slouken@0
    58
slouken@0
    59
.L3 ; head
slouken@0
    60
    mov eax,edi
slouken@0
    61
    and eax,BYTE 11b
slouken@0
    62
    jz .L4
slouken@0
    63
    mov al,[esi]
slouken@0
    64
    mov ah,[esi+1]
slouken@0
    65
    mov ebx,eax
slouken@0
    66
    mov edx,eax
slouken@0
    67
    shr eax,11
slouken@0
    68
    and eax,BYTE 11111b
slouken@0
    69
    and ebx,11111100000b
slouken@0
    70
    shl edx,11
slouken@0
    71
    add eax,ebx
slouken@0
    72
    add eax,edx
slouken@0
    73
    mov [edi],al
slouken@0
    74
    mov [edi+1],ah
slouken@0
    75
    add esi,BYTE 2
slouken@0
    76
    add edi,BYTE 2
slouken@0
    77
    dec ecx
slouken@0
    78
slouken@0
    79
.L4 ; save count
slouken@0
    80
    push ecx
slouken@0
    81
slouken@0
    82
    ; unroll twice
slouken@0
    83
    shr ecx,1
slouken@0
    84
    
slouken@0
    85
    ; point arrays to end
slouken@0
    86
    lea esi,[esi+ecx*4]
slouken@0
    87
    lea edi,[edi+ecx*4]
slouken@0
    88
slouken@0
    89
    ; negative counter 
slouken@0
    90
    neg ecx
slouken@0
    91
    jmp SHORT .L6
slouken@0
    92
                              
slouken@0
    93
.L5     mov [edi+ecx*4-4],eax
slouken@0
    94
.L6     mov eax,[esi+ecx*4]
slouken@0
    95
slouken@0
    96
        mov ebx,[esi+ecx*4]
slouken@0
    97
        and eax,07E007E0h         
slouken@0
    98
slouken@0
    99
        mov edx,[esi+ecx*4]
slouken@0
   100
        and ebx,0F800F800h
slouken@0
   101
slouken@0
   102
        shr ebx,11
slouken@0
   103
        and edx,001F001Fh
slouken@0
   104
slouken@0
   105
        shl edx,11
slouken@0
   106
        add eax,ebx
slouken@0
   107
slouken@0
   108
        add eax,edx                 
slouken@0
   109
        inc ecx
slouken@0
   110
slouken@0
   111
        jnz .L5                 
slouken@0
   112
         
slouken@0
   113
    mov [edi+ecx*4-4],eax
slouken@0
   114
slouken@0
   115
    ; tail
slouken@0
   116
    pop ecx
slouken@0
   117
    and ecx,BYTE 1
slouken@0
   118
    jz .L7
slouken@0
   119
    mov al,[esi]
slouken@0
   120
    mov ah,[esi+1]
slouken@0
   121
    mov ebx,eax
slouken@0
   122
    mov edx,eax
slouken@0
   123
    shr eax,11
slouken@0
   124
    and eax,BYTE 11111b
slouken@0
   125
    and ebx,11111100000b
slouken@0
   126
    shl edx,11
slouken@0
   127
    add eax,ebx
slouken@0
   128
    add eax,edx
slouken@0
   129
    mov [edi],al
slouken@0
   130
    mov [edi+1],ah
slouken@0
   131
    add esi,BYTE 2
slouken@0
   132
    add edi,BYTE 2
slouken@0
   133
slouken@0
   134
.L7
slouken@0
   135
    jmp _x86return
slouken@0
   136
slouken@0
   137
slouken@0
   138
slouken@0
   139
slouken@0
   140
slouken@0
   141
slouken@0
   142
_ConvertX86p16_16RGB555:
slouken@0
   143
slouken@0
   144
    ; check short
slouken@0
   145
    cmp ecx,BYTE 32
slouken@0
   146
    ja .L3
slouken@0
   147
slouken@0
   148
slouken@0
   149
.L1 ; short loop
slouken@0
   150
    mov al,[esi]
slouken@0
   151
    mov ah,[esi+1]
slouken@0
   152
    mov ebx,eax
slouken@0
   153
    shr ebx,1
slouken@0
   154
    and ebx,     0111111111100000b
slouken@0
   155
    and eax,BYTE 0000000000011111b
slouken@0
   156
    add eax,ebx
slouken@0
   157
    mov [edi],al
slouken@0
   158
    mov [edi+1],ah
slouken@0
   159
    add esi,BYTE 2
slouken@0
   160
    add edi,BYTE 2
slouken@0
   161
    dec ecx
slouken@0
   162
    jnz .L1
slouken@0
   163
.L2
slouken@0
   164
    jmp _x86return
slouken@0
   165
slouken@0
   166
.L3 ; head
slouken@0
   167
    mov eax,edi
slouken@0
   168
    and eax,BYTE 11b
slouken@0
   169
    jz .L4
slouken@0
   170
    mov al,[esi]
slouken@0
   171
    mov ah,[esi+1]
slouken@0
   172
    mov ebx,eax
slouken@0
   173
    shr ebx,1
slouken@0
   174
    and ebx,     0111111111100000b
slouken@0
   175
    and eax,BYTE 0000000000011111b
slouken@0
   176
    add eax,ebx
slouken@0
   177
    mov [edi],al
slouken@0
   178
    mov [edi+1],ah
slouken@0
   179
    add esi,BYTE 2
slouken@0
   180
    add edi,BYTE 2
slouken@0
   181
    dec ecx
slouken@0
   182
slouken@0
   183
.L4 ; save ebp
slouken@0
   184
    push ebp
slouken@0
   185
slouken@0
   186
    ; save count
slouken@0
   187
    push ecx
slouken@0
   188
slouken@0
   189
    ; unroll four times
slouken@0
   190
    shr ecx,2
slouken@0
   191
    
slouken@0
   192
    ; point arrays to end
slouken@0
   193
    lea esi,[esi+ecx*8]
slouken@0
   194
    lea edi,[edi+ecx*8]
slouken@0
   195
slouken@0
   196
    ; negative counter 
slouken@0
   197
    xor ebp,ebp
slouken@0
   198
    sub ebp,ecx
slouken@0
   199
slouken@0
   200
.L5     mov eax,[esi+ebp*8]        ; agi?
slouken@0
   201
        mov ecx,[esi+ebp*8+4]
slouken@0
   202
       
slouken@0
   203
        mov ebx,eax
slouken@0
   204
        mov edx,ecx
slouken@0
   205
slouken@0
   206
        and eax,0FFC0FFC0h
slouken@0
   207
        and ecx,0FFC0FFC0h
slouken@0
   208
slouken@0
   209
        shr eax,1
slouken@0
   210
        and ebx,001F001Fh
slouken@0
   211
slouken@0
   212
        shr ecx,1
slouken@0
   213
        and edx,001F001Fh
slouken@0
   214
slouken@0
   215
        add eax,ebx
slouken@0
   216
        add ecx,edx
slouken@0
   217
slouken@0
   218
        mov [edi+ebp*8],eax
slouken@0
   219
        mov [edi+ebp*8+4],ecx
slouken@0
   220
slouken@0
   221
        inc ebp
slouken@0
   222
        jnz .L5                 
slouken@0
   223
slouken@0
   224
    ; tail
slouken@0
   225
    pop ecx
slouken@0
   226
.L6 and ecx,BYTE 11b
slouken@0
   227
    jz .L7
slouken@0
   228
    mov al,[esi]
slouken@0
   229
    mov ah,[esi+1]
slouken@0
   230
    mov ebx,eax
slouken@0
   231
    shr ebx,1
slouken@0
   232
    and ebx,     0111111111100000b
slouken@0
   233
    and eax,BYTE 0000000000011111b
slouken@0
   234
    add eax,ebx
slouken@0
   235
    mov [edi],al
slouken@0
   236
    mov [edi+1],ah
slouken@0
   237
    add esi,BYTE 2
slouken@0
   238
    add edi,BYTE 2
slouken@0
   239
    dec ecx
slouken@0
   240
    jmp SHORT .L6
slouken@0
   241
slouken@0
   242
.L7 pop ebp
slouken@0
   243
    jmp _x86return
slouken@0
   244
slouken@0
   245
slouken@0
   246
slouken@0
   247
slouken@0
   248
slouken@0
   249
slouken@0
   250
_ConvertX86p16_16BGR555:
slouken@0
   251
slouken@0
   252
    ; check short
slouken@0
   253
    cmp ecx,BYTE 16
slouken@0
   254
    ja .L3
slouken@0
   255
slouken@0
   256
	
slouken@0
   257
.L1 ; short loop
slouken@0
   258
    mov al,[esi]
slouken@0
   259
    mov ah,[esi+1]
slouken@0
   260
    mov ebx,eax
slouken@0
   261
    mov edx,eax
slouken@0
   262
    shr eax,11
slouken@0
   263
    and eax,BYTE 11111b
slouken@0
   264
    shr ebx,1
slouken@0
   265
    and ebx,1111100000b
slouken@0
   266
    shl edx,10
slouken@0
   267
    and edx,0111110000000000b
slouken@0
   268
    add eax,ebx
slouken@0
   269
    add eax,edx
slouken@0
   270
    mov [edi],al
slouken@0
   271
    mov [edi+1],ah
slouken@0
   272
    add esi,BYTE 2
slouken@0
   273
    add edi,BYTE 2
slouken@0
   274
    dec ecx
slouken@0
   275
    jnz .L1
slouken@0
   276
.L2
slouken@0
   277
    jmp _x86return
slouken@0
   278
slouken@0
   279
.L3 ; head
slouken@0
   280
    mov eax,edi
slouken@0
   281
    and eax,BYTE 11b
slouken@0
   282
    jz .L4
slouken@0
   283
    mov al,[esi]
slouken@0
   284
    mov ah,[esi+1]
slouken@0
   285
    mov ebx,eax
slouken@0
   286
    mov edx,eax
slouken@0
   287
    shr eax,11
slouken@0
   288
    and eax,BYTE 11111b
slouken@0
   289
    shr ebx,1
slouken@0
   290
    and ebx,1111100000b
slouken@0
   291
    shl edx,10
slouken@0
   292
    and edx,0111110000000000b
slouken@0
   293
    add eax,ebx
slouken@0
   294
    add eax,edx
slouken@0
   295
    mov [edi],al
slouken@0
   296
    mov [edi+1],ah
slouken@0
   297
    add esi,BYTE 2
slouken@0
   298
    add edi,BYTE 2
slouken@0
   299
    dec ecx
slouken@0
   300
slouken@0
   301
.L4 ; save count
slouken@0
   302
    push ecx
slouken@0
   303
slouken@0
   304
    ; unroll twice
slouken@0
   305
    shr ecx,1
slouken@0
   306
    
slouken@0
   307
    ; point arrays to end
slouken@0
   308
    lea esi,[esi+ecx*4]
slouken@0
   309
    lea edi,[edi+ecx*4]
slouken@0
   310
slouken@0
   311
    ; negative counter 
slouken@0
   312
    neg ecx
slouken@0
   313
    jmp SHORT .L6
slouken@0
   314
                              
slouken@0
   315
.L5     mov [edi+ecx*4-4],eax
slouken@0
   316
.L6     mov eax,[esi+ecx*4]
slouken@0
   317
slouken@0
   318
        shr eax,1
slouken@0
   319
        mov ebx,[esi+ecx*4]
slouken@0
   320
        
slouken@0
   321
        and eax,03E003E0h         
slouken@0
   322
        mov edx,[esi+ecx*4]
slouken@0
   323
slouken@0
   324
        and ebx,0F800F800h
slouken@0
   325
slouken@0
   326
        shr ebx,11
slouken@0
   327
        and edx,001F001Fh
slouken@0
   328
slouken@0
   329
        shl edx,10
slouken@0
   330
        add eax,ebx
slouken@0
   331
slouken@0
   332
        add eax,edx                 
slouken@0
   333
        inc ecx
slouken@0
   334
slouken@0
   335
        jnz .L5                 
slouken@0
   336
         
slouken@0
   337
    mov [edi+ecx*4-4],eax
slouken@0
   338
slouken@0
   339
    ; tail
slouken@0
   340
    pop ecx
slouken@0
   341
    and ecx,BYTE 1
slouken@0
   342
    jz .L7
slouken@0
   343
    mov al,[esi]
slouken@0
   344
    mov ah,[esi+1]
slouken@0
   345
    mov ebx,eax
slouken@0
   346
    mov edx,eax
slouken@0
   347
    shr eax,11
slouken@0
   348
    and eax,BYTE 11111b
slouken@0
   349
    shr ebx,1
slouken@0
   350
    and ebx,1111100000b
slouken@0
   351
    shl edx,10
slouken@0
   352
    and edx,0111110000000000b
slouken@0
   353
    add eax,ebx
slouken@0
   354
    add eax,edx
slouken@0
   355
    mov [edi],al
slouken@0
   356
    mov [edi+1],ah
slouken@0
   357
    add esi,BYTE 2
slouken@0
   358
    add edi,BYTE 2
slouken@0
   359
slouken@0
   360
.L7
slouken@0
   361
    jmp _x86return
slouken@0
   362
slouken@0
   363
slouken@0
   364
slouken@0
   365
slouken@0
   366
slouken@0
   367
slouken@0
   368
_ConvertX86p16_8RGB332:
slouken@0
   369
slouken@0
   370
    ; check short
slouken@0
   371
    cmp ecx,BYTE 16
slouken@0
   372
    ja .L3
slouken@0
   373
slouken@0
   374
slouken@0
   375
.L1 ; short loop
slouken@0
   376
    mov al,[esi+0]
slouken@0
   377
    mov ah,[esi+1]
slouken@0
   378
    mov ebx,eax
slouken@0
   379
    mov edx,eax
slouken@0
   380
    and eax,BYTE 11000b         ; blue
slouken@0
   381
    shr eax,3
slouken@0
   382
    and ebx,11100000000b        ; green
slouken@0
   383
    shr ebx,6
slouken@0
   384
    and edx,1110000000000000b   ; red
slouken@0
   385
    shr edx,8
slouken@0
   386
    add eax,ebx
slouken@0
   387
    add eax,edx
slouken@0
   388
    mov [edi],al
slouken@0
   389
    add esi,BYTE 2
slouken@0
   390
    inc edi
slouken@0
   391
    dec ecx
slouken@0
   392
    jnz .L1
slouken@0
   393
.L2
slouken@0
   394
    jmp _x86return
slouken@0
   395
slouken@0
   396
.L3 mov eax,edi
slouken@0
   397
    and eax,BYTE 11b
slouken@0
   398
    jz .L4
slouken@0
   399
    mov al,[esi+0]
slouken@0
   400
    mov ah,[esi+1]
slouken@0
   401
    mov ebx,eax
slouken@0
   402
    mov edx,eax
slouken@0
   403
    and eax,BYTE 11000b         ; blue
slouken@0
   404
    shr eax,3
slouken@0
   405
    and ebx,11100000000b        ; green
slouken@0
   406
    shr ebx,6
slouken@0
   407
    and edx,1110000000000000b   ; red
slouken@0
   408
    shr edx,8
slouken@0
   409
    add eax,ebx
slouken@0
   410
    add eax,edx
slouken@0
   411
    mov [edi],al
slouken@0
   412
    add esi,BYTE 2
slouken@0
   413
    inc edi
slouken@0
   414
    dec ecx
slouken@0
   415
    jmp SHORT .L3
slouken@0
   416
slouken@0
   417
.L4 ; save ebp
slouken@0
   418
    push ebp
slouken@0
   419
slouken@0
   420
    ; save count
slouken@0
   421
    push ecx
slouken@0
   422
slouken@0
   423
    ; unroll 4 times
slouken@0
   424
    shr ecx,2
slouken@0
   425
slouken@0
   426
    ; prestep
slouken@0
   427
    mov dl,[esi+0]
slouken@0
   428
    mov bl,[esi+1]
slouken@0
   429
    mov dh,[esi+2]
slouken@0
   430
        
slouken@0
   431
.L5     shl edx,16
slouken@0
   432
        mov bh,[esi+3]
slouken@0
   433
        
slouken@0
   434
        shl ebx,16
slouken@0
   435
        mov dl,[esi+4]
slouken@0
   436
slouken@0
   437
        mov dh,[esi+6]
slouken@0
   438
        mov bl,[esi+5]
slouken@0
   439
slouken@0
   440
        and edx,00011000000110000001100000011000b
slouken@0
   441
        mov bh,[esi+7]
slouken@0
   442
slouken@0
   443
        ror edx,16+3
slouken@0
   444
        mov eax,ebx                                     ; setup eax for reds
slouken@0
   445
slouken@0
   446
        and ebx,00000111000001110000011100000111b
slouken@0
   447
        and eax,11100000111000001110000011100000b       ; reds
slouken@0
   448
slouken@0
   449
        ror ebx,16-2
slouken@0
   450
        add esi,BYTE 8
slouken@0
   451
slouken@0
   452
        ror eax,16
slouken@0
   453
        add edi,BYTE 4
slouken@0
   454
slouken@0
   455
        add eax,ebx
slouken@0
   456
        mov bl,[esi+1]                                  ; greens
slouken@0
   457
slouken@0
   458
        add eax,edx
slouken@0
   459
        mov dl,[esi+0]                                  ; blues
slouken@0
   460
slouken@0
   461
        mov [edi-4],eax
slouken@0
   462
        mov dh,[esi+2]
slouken@0
   463
slouken@0
   464
        dec ecx
slouken@0
   465
        jnz .L5                 
slouken@0
   466
    
slouken@0
   467
    ; check tail
slouken@0
   468
    pop ecx
slouken@0
   469
    and ecx,BYTE 11b
slouken@0
   470
    jz .L7
slouken@0
   471
slouken@0
   472
.L6 ; tail
slouken@0
   473
    mov al,[esi+0]
slouken@0
   474
    mov ah,[esi+1]
slouken@0
   475
    mov ebx,eax
slouken@0
   476
    mov edx,eax
slouken@0
   477
    and eax,BYTE 11000b         ; blue
slouken@0
   478
    shr eax,3
slouken@0
   479
    and ebx,11100000000b        ; green
slouken@0
   480
    shr ebx,6
slouken@0
   481
    and edx,1110000000000000b   ; red
slouken@0
   482
    shr edx,8
slouken@0
   483
    add eax,ebx
slouken@0
   484
    add eax,edx
slouken@0
   485
    mov [edi],al
slouken@0
   486
    add esi,BYTE 2
slouken@0
   487
    inc edi
slouken@0
   488
    dec ecx
slouken@0
   489
    jnz .L6
slouken@0
   490
slouken@0
   491
.L7 pop ebp
slouken@0
   492
    jmp _x86return
slouken@0
   493
icculus@1199
   494
%ifidn __OUTPUT_FORMAT__,elf
icculus@1199
   495
section .note.GNU-stack noalloc noexec nowrite progbits
icculus@1199
   496
%endif