src/hermes/x86p_16.asm
author Sam Lantinga <slouken@libsdl.org>
Sun, 21 Sep 2003 18:32:04 +0000
changeset 720 f90d80d68071
parent 0 74212992fb08
child 1166 da33b7e6d181
permissions -rw-r--r--
N Sep 17 8791 Sam Lantinga Re: tks source released
Date: Sun, 07 Sep 2003 02:51:58 +0200
From: Stephane Marchesin
Subject: [SDL] Two little patches

Compiling SDL with a recent gcc (gcc 3.3.1, 3.3 doesn't have this
behaviour) gives some nasty warnings :

SDL_blit_A.c: In function `BlitRGBtoRGBSurfaceAlpha128MMX':
SDL_blit_A.c:223: warning: integer constant is too large for "long" type
SDL_blit_A.c:225: warning: integer constant is too large for "long" type
SDL_blit_A.c:227: warning: integer constant is too large for "long" type
[...]

The first attached patch (longlongfix.patch) tells gcc to really treat
those constants as unsigned long long and not long.

The second patch (nasinclude.patch) fixes an include problem I had while
compiling nas audio : when the <audio/audiolib.h> file lies in
/usr/X11R6/include, a -I/usr/X11R6/include option is needed or the file
isn't found.
slouken@0
     1
;
slouken@0
     2
; x86 format converters for HERMES
slouken@0
     3
; Copyright (c) 1998 Glenn Fielder (gaffer@gaffer.org)
slouken@0
     4
; This source code is licensed under the GNU LGPL
slouken@0
     5
; 
slouken@0
     6
; Please refer to the file COPYING.LIB contained in the distribution for
slouken@0
     7
; licensing conditions		
slouken@0
     8
; 
slouken@0
     9
; Routines adjusted for Hermes by Christian Nentwich (brn@eleet.mcb.at)
slouken@0
    10
; Used with permission.
slouken@0
    11
; 
slouken@0
    12
slouken@0
    13
	
slouken@0
    14
BITS 32
slouken@0
    15
slouken@0
    16
GLOBAL _ConvertX86p16_32RGB888
slouken@0
    17
GLOBAL _ConvertX86p16_32BGR888
slouken@0
    18
GLOBAL _ConvertX86p16_32RGBA888
slouken@0
    19
GLOBAL _ConvertX86p16_32BGRA888
slouken@0
    20
GLOBAL _ConvertX86p16_24RGB888
slouken@0
    21
GLOBAL _ConvertX86p16_24BGR888
slouken@0
    22
GLOBAL _ConvertX86p16_16BGR565
slouken@0
    23
GLOBAL _ConvertX86p16_16RGB555
slouken@0
    24
GLOBAL _ConvertX86p16_16BGR555
slouken@0
    25
GLOBAL _ConvertX86p16_8RGB332
slouken@0
    26
slouken@0
    27
EXTERN _ConvertX86
slouken@0
    28
EXTERN _x86return
slouken@0
    29
slouken@0
    30
slouken@0
    31
SECTION .text
slouken@0
    32
slouken@0
    33
slouken@0
    34
slouken@0
    35
_ConvertX86p16_16BGR565:
slouken@0
    36
slouken@0
    37
    ; check short
slouken@0
    38
    cmp ecx,BYTE 16
slouken@0
    39
    ja .L3
slouken@0
    40
slouken@0
    41
slouken@0
    42
.L1 ; short loop
slouken@0
    43
    mov al,[esi]
slouken@0
    44
    mov ah,[esi+1]
slouken@0
    45
    mov ebx,eax
slouken@0
    46
    mov edx,eax
slouken@0
    47
    shr eax,11
slouken@0
    48
    and eax,BYTE 11111b
slouken@0
    49
    and ebx,11111100000b
slouken@0
    50
    shl edx,11
slouken@0
    51
    add eax,ebx
slouken@0
    52
    add eax,edx
slouken@0
    53
    mov [edi],al
slouken@0
    54
    mov [edi+1],ah
slouken@0
    55
    add esi,BYTE 2
slouken@0
    56
    add edi,BYTE 2
slouken@0
    57
    dec ecx
slouken@0
    58
    jnz .L1
slouken@0
    59
.L2
slouken@0
    60
    jmp _x86return
slouken@0
    61
slouken@0
    62
.L3 ; head
slouken@0
    63
    mov eax,edi
slouken@0
    64
    and eax,BYTE 11b
slouken@0
    65
    jz .L4
slouken@0
    66
    mov al,[esi]
slouken@0
    67
    mov ah,[esi+1]
slouken@0
    68
    mov ebx,eax
slouken@0
    69
    mov edx,eax
slouken@0
    70
    shr eax,11
slouken@0
    71
    and eax,BYTE 11111b
slouken@0
    72
    and ebx,11111100000b
slouken@0
    73
    shl edx,11
slouken@0
    74
    add eax,ebx
slouken@0
    75
    add eax,edx
slouken@0
    76
    mov [edi],al
slouken@0
    77
    mov [edi+1],ah
slouken@0
    78
    add esi,BYTE 2
slouken@0
    79
    add edi,BYTE 2
slouken@0
    80
    dec ecx
slouken@0
    81
slouken@0
    82
.L4 ; save count
slouken@0
    83
    push ecx
slouken@0
    84
slouken@0
    85
    ; unroll twice
slouken@0
    86
    shr ecx,1
slouken@0
    87
    
slouken@0
    88
    ; point arrays to end
slouken@0
    89
    lea esi,[esi+ecx*4]
slouken@0
    90
    lea edi,[edi+ecx*4]
slouken@0
    91
slouken@0
    92
    ; negative counter 
slouken@0
    93
    neg ecx
slouken@0
    94
    jmp SHORT .L6
slouken@0
    95
                              
slouken@0
    96
.L5     mov [edi+ecx*4-4],eax
slouken@0
    97
.L6     mov eax,[esi+ecx*4]
slouken@0
    98
slouken@0
    99
        mov ebx,[esi+ecx*4]
slouken@0
   100
        and eax,07E007E0h         
slouken@0
   101
slouken@0
   102
        mov edx,[esi+ecx*4]
slouken@0
   103
        and ebx,0F800F800h
slouken@0
   104
slouken@0
   105
        shr ebx,11
slouken@0
   106
        and edx,001F001Fh
slouken@0
   107
slouken@0
   108
        shl edx,11
slouken@0
   109
        add eax,ebx
slouken@0
   110
slouken@0
   111
        add eax,edx                 
slouken@0
   112
        inc ecx
slouken@0
   113
slouken@0
   114
        jnz .L5                 
slouken@0
   115
         
slouken@0
   116
    mov [edi+ecx*4-4],eax
slouken@0
   117
slouken@0
   118
    ; tail
slouken@0
   119
    pop ecx
slouken@0
   120
    and ecx,BYTE 1
slouken@0
   121
    jz .L7
slouken@0
   122
    mov al,[esi]
slouken@0
   123
    mov ah,[esi+1]
slouken@0
   124
    mov ebx,eax
slouken@0
   125
    mov edx,eax
slouken@0
   126
    shr eax,11
slouken@0
   127
    and eax,BYTE 11111b
slouken@0
   128
    and ebx,11111100000b
slouken@0
   129
    shl edx,11
slouken@0
   130
    add eax,ebx
slouken@0
   131
    add eax,edx
slouken@0
   132
    mov [edi],al
slouken@0
   133
    mov [edi+1],ah
slouken@0
   134
    add esi,BYTE 2
slouken@0
   135
    add edi,BYTE 2
slouken@0
   136
slouken@0
   137
.L7
slouken@0
   138
    jmp _x86return
slouken@0
   139
slouken@0
   140
slouken@0
   141
slouken@0
   142
slouken@0
   143
slouken@0
   144
slouken@0
   145
_ConvertX86p16_16RGB555:
slouken@0
   146
slouken@0
   147
    ; check short
slouken@0
   148
    cmp ecx,BYTE 32
slouken@0
   149
    ja .L3
slouken@0
   150
slouken@0
   151
slouken@0
   152
.L1 ; short loop
slouken@0
   153
    mov al,[esi]
slouken@0
   154
    mov ah,[esi+1]
slouken@0
   155
    mov ebx,eax
slouken@0
   156
    shr ebx,1
slouken@0
   157
    and ebx,     0111111111100000b
slouken@0
   158
    and eax,BYTE 0000000000011111b
slouken@0
   159
    add eax,ebx
slouken@0
   160
    mov [edi],al
slouken@0
   161
    mov [edi+1],ah
slouken@0
   162
    add esi,BYTE 2
slouken@0
   163
    add edi,BYTE 2
slouken@0
   164
    dec ecx
slouken@0
   165
    jnz .L1
slouken@0
   166
.L2
slouken@0
   167
    jmp _x86return
slouken@0
   168
slouken@0
   169
.L3 ; head
slouken@0
   170
    mov eax,edi
slouken@0
   171
    and eax,BYTE 11b
slouken@0
   172
    jz .L4
slouken@0
   173
    mov al,[esi]
slouken@0
   174
    mov ah,[esi+1]
slouken@0
   175
    mov ebx,eax
slouken@0
   176
    shr ebx,1
slouken@0
   177
    and ebx,     0111111111100000b
slouken@0
   178
    and eax,BYTE 0000000000011111b
slouken@0
   179
    add eax,ebx
slouken@0
   180
    mov [edi],al
slouken@0
   181
    mov [edi+1],ah
slouken@0
   182
    add esi,BYTE 2
slouken@0
   183
    add edi,BYTE 2
slouken@0
   184
    dec ecx
slouken@0
   185
slouken@0
   186
.L4 ; save ebp
slouken@0
   187
    push ebp
slouken@0
   188
slouken@0
   189
    ; save count
slouken@0
   190
    push ecx
slouken@0
   191
slouken@0
   192
    ; unroll four times
slouken@0
   193
    shr ecx,2
slouken@0
   194
    
slouken@0
   195
    ; point arrays to end
slouken@0
   196
    lea esi,[esi+ecx*8]
slouken@0
   197
    lea edi,[edi+ecx*8]
slouken@0
   198
slouken@0
   199
    ; negative counter 
slouken@0
   200
    xor ebp,ebp
slouken@0
   201
    sub ebp,ecx
slouken@0
   202
slouken@0
   203
.L5     mov eax,[esi+ebp*8]        ; agi?
slouken@0
   204
        mov ecx,[esi+ebp*8+4]
slouken@0
   205
       
slouken@0
   206
        mov ebx,eax
slouken@0
   207
        mov edx,ecx
slouken@0
   208
slouken@0
   209
        and eax,0FFC0FFC0h
slouken@0
   210
        and ecx,0FFC0FFC0h
slouken@0
   211
slouken@0
   212
        shr eax,1
slouken@0
   213
        and ebx,001F001Fh
slouken@0
   214
slouken@0
   215
        shr ecx,1
slouken@0
   216
        and edx,001F001Fh
slouken@0
   217
slouken@0
   218
        add eax,ebx
slouken@0
   219
        add ecx,edx
slouken@0
   220
slouken@0
   221
        mov [edi+ebp*8],eax
slouken@0
   222
        mov [edi+ebp*8+4],ecx
slouken@0
   223
slouken@0
   224
        inc ebp
slouken@0
   225
        jnz .L5                 
slouken@0
   226
slouken@0
   227
    ; tail
slouken@0
   228
    pop ecx
slouken@0
   229
.L6 and ecx,BYTE 11b
slouken@0
   230
    jz .L7
slouken@0
   231
    mov al,[esi]
slouken@0
   232
    mov ah,[esi+1]
slouken@0
   233
    mov ebx,eax
slouken@0
   234
    shr ebx,1
slouken@0
   235
    and ebx,     0111111111100000b
slouken@0
   236
    and eax,BYTE 0000000000011111b
slouken@0
   237
    add eax,ebx
slouken@0
   238
    mov [edi],al
slouken@0
   239
    mov [edi+1],ah
slouken@0
   240
    add esi,BYTE 2
slouken@0
   241
    add edi,BYTE 2
slouken@0
   242
    dec ecx
slouken@0
   243
    jmp SHORT .L6
slouken@0
   244
slouken@0
   245
.L7 pop ebp
slouken@0
   246
    jmp _x86return
slouken@0
   247
slouken@0
   248
slouken@0
   249
slouken@0
   250
slouken@0
   251
slouken@0
   252
slouken@0
   253
_ConvertX86p16_16BGR555:
slouken@0
   254
slouken@0
   255
    ; check short
slouken@0
   256
    cmp ecx,BYTE 16
slouken@0
   257
    ja .L3
slouken@0
   258
slouken@0
   259
	
slouken@0
   260
.L1 ; short loop
slouken@0
   261
    mov al,[esi]
slouken@0
   262
    mov ah,[esi+1]
slouken@0
   263
    mov ebx,eax
slouken@0
   264
    mov edx,eax
slouken@0
   265
    shr eax,11
slouken@0
   266
    and eax,BYTE 11111b
slouken@0
   267
    shr ebx,1
slouken@0
   268
    and ebx,1111100000b
slouken@0
   269
    shl edx,10
slouken@0
   270
    and edx,0111110000000000b
slouken@0
   271
    add eax,ebx
slouken@0
   272
    add eax,edx
slouken@0
   273
    mov [edi],al
slouken@0
   274
    mov [edi+1],ah
slouken@0
   275
    add esi,BYTE 2
slouken@0
   276
    add edi,BYTE 2
slouken@0
   277
    dec ecx
slouken@0
   278
    jnz .L1
slouken@0
   279
.L2
slouken@0
   280
    jmp _x86return
slouken@0
   281
slouken@0
   282
.L3 ; head
slouken@0
   283
    mov eax,edi
slouken@0
   284
    and eax,BYTE 11b
slouken@0
   285
    jz .L4
slouken@0
   286
    mov al,[esi]
slouken@0
   287
    mov ah,[esi+1]
slouken@0
   288
    mov ebx,eax
slouken@0
   289
    mov edx,eax
slouken@0
   290
    shr eax,11
slouken@0
   291
    and eax,BYTE 11111b
slouken@0
   292
    shr ebx,1
slouken@0
   293
    and ebx,1111100000b
slouken@0
   294
    shl edx,10
slouken@0
   295
    and edx,0111110000000000b
slouken@0
   296
    add eax,ebx
slouken@0
   297
    add eax,edx
slouken@0
   298
    mov [edi],al
slouken@0
   299
    mov [edi+1],ah
slouken@0
   300
    add esi,BYTE 2
slouken@0
   301
    add edi,BYTE 2
slouken@0
   302
    dec ecx
slouken@0
   303
slouken@0
   304
.L4 ; save count
slouken@0
   305
    push ecx
slouken@0
   306
slouken@0
   307
    ; unroll twice
slouken@0
   308
    shr ecx,1
slouken@0
   309
    
slouken@0
   310
    ; point arrays to end
slouken@0
   311
    lea esi,[esi+ecx*4]
slouken@0
   312
    lea edi,[edi+ecx*4]
slouken@0
   313
slouken@0
   314
    ; negative counter 
slouken@0
   315
    neg ecx
slouken@0
   316
    jmp SHORT .L6
slouken@0
   317
                              
slouken@0
   318
.L5     mov [edi+ecx*4-4],eax
slouken@0
   319
.L6     mov eax,[esi+ecx*4]
slouken@0
   320
slouken@0
   321
        shr eax,1
slouken@0
   322
        mov ebx,[esi+ecx*4]
slouken@0
   323
        
slouken@0
   324
        and eax,03E003E0h         
slouken@0
   325
        mov edx,[esi+ecx*4]
slouken@0
   326
slouken@0
   327
        and ebx,0F800F800h
slouken@0
   328
slouken@0
   329
        shr ebx,11
slouken@0
   330
        and edx,001F001Fh
slouken@0
   331
slouken@0
   332
        shl edx,10
slouken@0
   333
        add eax,ebx
slouken@0
   334
slouken@0
   335
        add eax,edx                 
slouken@0
   336
        inc ecx
slouken@0
   337
slouken@0
   338
        jnz .L5                 
slouken@0
   339
         
slouken@0
   340
    mov [edi+ecx*4-4],eax
slouken@0
   341
slouken@0
   342
    ; tail
slouken@0
   343
    pop ecx
slouken@0
   344
    and ecx,BYTE 1
slouken@0
   345
    jz .L7
slouken@0
   346
    mov al,[esi]
slouken@0
   347
    mov ah,[esi+1]
slouken@0
   348
    mov ebx,eax
slouken@0
   349
    mov edx,eax
slouken@0
   350
    shr eax,11
slouken@0
   351
    and eax,BYTE 11111b
slouken@0
   352
    shr ebx,1
slouken@0
   353
    and ebx,1111100000b
slouken@0
   354
    shl edx,10
slouken@0
   355
    and edx,0111110000000000b
slouken@0
   356
    add eax,ebx
slouken@0
   357
    add eax,edx
slouken@0
   358
    mov [edi],al
slouken@0
   359
    mov [edi+1],ah
slouken@0
   360
    add esi,BYTE 2
slouken@0
   361
    add edi,BYTE 2
slouken@0
   362
slouken@0
   363
.L7
slouken@0
   364
    jmp _x86return
slouken@0
   365
slouken@0
   366
slouken@0
   367
slouken@0
   368
slouken@0
   369
slouken@0
   370
slouken@0
   371
_ConvertX86p16_8RGB332:
slouken@0
   372
slouken@0
   373
    ; check short
slouken@0
   374
    cmp ecx,BYTE 16
slouken@0
   375
    ja .L3
slouken@0
   376
slouken@0
   377
slouken@0
   378
.L1 ; short loop
slouken@0
   379
    mov al,[esi+0]
slouken@0
   380
    mov ah,[esi+1]
slouken@0
   381
    mov ebx,eax
slouken@0
   382
    mov edx,eax
slouken@0
   383
    and eax,BYTE 11000b         ; blue
slouken@0
   384
    shr eax,3
slouken@0
   385
    and ebx,11100000000b        ; green
slouken@0
   386
    shr ebx,6
slouken@0
   387
    and edx,1110000000000000b   ; red
slouken@0
   388
    shr edx,8
slouken@0
   389
    add eax,ebx
slouken@0
   390
    add eax,edx
slouken@0
   391
    mov [edi],al
slouken@0
   392
    add esi,BYTE 2
slouken@0
   393
    inc edi
slouken@0
   394
    dec ecx
slouken@0
   395
    jnz .L1
slouken@0
   396
.L2
slouken@0
   397
    jmp _x86return
slouken@0
   398
slouken@0
   399
.L3 mov eax,edi
slouken@0
   400
    and eax,BYTE 11b
slouken@0
   401
    jz .L4
slouken@0
   402
    mov al,[esi+0]
slouken@0
   403
    mov ah,[esi+1]
slouken@0
   404
    mov ebx,eax
slouken@0
   405
    mov edx,eax
slouken@0
   406
    and eax,BYTE 11000b         ; blue
slouken@0
   407
    shr eax,3
slouken@0
   408
    and ebx,11100000000b        ; green
slouken@0
   409
    shr ebx,6
slouken@0
   410
    and edx,1110000000000000b   ; red
slouken@0
   411
    shr edx,8
slouken@0
   412
    add eax,ebx
slouken@0
   413
    add eax,edx
slouken@0
   414
    mov [edi],al
slouken@0
   415
    add esi,BYTE 2
slouken@0
   416
    inc edi
slouken@0
   417
    dec ecx
slouken@0
   418
    jmp SHORT .L3
slouken@0
   419
slouken@0
   420
.L4 ; save ebp
slouken@0
   421
    push ebp
slouken@0
   422
slouken@0
   423
    ; save count
slouken@0
   424
    push ecx
slouken@0
   425
slouken@0
   426
    ; unroll 4 times
slouken@0
   427
    shr ecx,2
slouken@0
   428
slouken@0
   429
    ; prestep
slouken@0
   430
    mov dl,[esi+0]
slouken@0
   431
    mov bl,[esi+1]
slouken@0
   432
    mov dh,[esi+2]
slouken@0
   433
        
slouken@0
   434
.L5     shl edx,16
slouken@0
   435
        mov bh,[esi+3]
slouken@0
   436
        
slouken@0
   437
        shl ebx,16
slouken@0
   438
        mov dl,[esi+4]
slouken@0
   439
slouken@0
   440
        mov dh,[esi+6]
slouken@0
   441
        mov bl,[esi+5]
slouken@0
   442
slouken@0
   443
        and edx,00011000000110000001100000011000b
slouken@0
   444
        mov bh,[esi+7]
slouken@0
   445
slouken@0
   446
        ror edx,16+3
slouken@0
   447
        mov eax,ebx                                     ; setup eax for reds
slouken@0
   448
slouken@0
   449
        and ebx,00000111000001110000011100000111b
slouken@0
   450
        and eax,11100000111000001110000011100000b       ; reds
slouken@0
   451
slouken@0
   452
        ror ebx,16-2
slouken@0
   453
        add esi,BYTE 8
slouken@0
   454
slouken@0
   455
        ror eax,16
slouken@0
   456
        add edi,BYTE 4
slouken@0
   457
slouken@0
   458
        add eax,ebx
slouken@0
   459
        mov bl,[esi+1]                                  ; greens
slouken@0
   460
slouken@0
   461
        add eax,edx
slouken@0
   462
        mov dl,[esi+0]                                  ; blues
slouken@0
   463
slouken@0
   464
        mov [edi-4],eax
slouken@0
   465
        mov dh,[esi+2]
slouken@0
   466
slouken@0
   467
        dec ecx
slouken@0
   468
        jnz .L5                 
slouken@0
   469
    
slouken@0
   470
    ; check tail
slouken@0
   471
    pop ecx
slouken@0
   472
    and ecx,BYTE 11b
slouken@0
   473
    jz .L7
slouken@0
   474
slouken@0
   475
.L6 ; tail
slouken@0
   476
    mov al,[esi+0]
slouken@0
   477
    mov ah,[esi+1]
slouken@0
   478
    mov ebx,eax
slouken@0
   479
    mov edx,eax
slouken@0
   480
    and eax,BYTE 11000b         ; blue
slouken@0
   481
    shr eax,3
slouken@0
   482
    and ebx,11100000000b        ; green
slouken@0
   483
    shr ebx,6
slouken@0
   484
    and edx,1110000000000000b   ; red
slouken@0
   485
    shr edx,8
slouken@0
   486
    add eax,ebx
slouken@0
   487
    add eax,edx
slouken@0
   488
    mov [edi],al
slouken@0
   489
    add esi,BYTE 2
slouken@0
   490
    inc edi
slouken@0
   491
    dec ecx
slouken@0
   492
    jnz .L6
slouken@0
   493
slouken@0
   494
.L7 pop ebp
slouken@0
   495
    jmp _x86return
slouken@0
   496