src/video/SDL_blit_N.c
author Sam Lantinga <slouken@libsdl.org>
Tue, 09 May 2006 15:09:47 +0000
changeset 1795 398ac0f88e4d
parent 1544 ab1e4c41ab71
child 1662 782fd950bd46
child 1895 c121d94672cb
child 4159 a1b03ba2fcd0
permissions -rw-r--r--
Fixed bug #220

The AltiVec blitters don't compile, since they require __VEC__ to be enabled in
order for the compiler to understand "vector" and friends (i.e. do AltiVec)
But you don't want to turn AltiVec on globally, since then the code would only
run on a G4 (there are already runtime tests, before using the AltiVec
variants)

The solution here is to enable AltiVec locally, for the actual AltiVec code.
slouken@0
     1
/*
slouken@0
     2
    SDL - Simple DirectMedia Layer
slouken@1312
     3
    Copyright (C) 1997-2006 Sam Lantinga
slouken@0
     4
slouken@0
     5
    This library is free software; you can redistribute it and/or
slouken@1312
     6
    modify it under the terms of the GNU Lesser General Public
slouken@0
     7
    License as published by the Free Software Foundation; either
slouken@1312
     8
    version 2.1 of the License, or (at your option) any later version.
slouken@0
     9
slouken@0
    10
    This library is distributed in the hope that it will be useful,
slouken@0
    11
    but WITHOUT ANY WARRANTY; without even the implied warranty of
slouken@0
    12
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
slouken@1312
    13
    Lesser General Public License for more details.
slouken@0
    14
slouken@1312
    15
    You should have received a copy of the GNU Lesser General Public
slouken@1312
    16
    License along with this library; if not, write to the Free Software
slouken@1312
    17
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
slouken@0
    18
slouken@0
    19
    Sam Lantinga
slouken@252
    20
    slouken@libsdl.org
slouken@0
    21
*/
slouken@1402
    22
#include "SDL_config.h"
slouken@0
    23
slouken@0
    24
#include "SDL_video.h"
slouken@1358
    25
#include "SDL_endian.h"
slouken@1358
    26
#include "SDL_cpuinfo.h"
slouken@0
    27
#include "SDL_blit.h"
slouken@0
    28
slouken@0
    29
/* Functions to blit from N-bit surfaces to other surfaces */
slouken@0
    30
slouken@1361
    31
#if SDL_ALTIVEC_BLITTERS
slouken@1795
    32
#if __MWERKS__
slouken@1795
    33
#pragma altivec_model on
slouken@1795
    34
#endif
icculus@1175
    35
#ifdef HAVE_ALTIVEC_H
icculus@1162
    36
#include <altivec.h>
icculus@1175
    37
#endif
slouken@1338
    38
#define assert(X)
slouken@1402
    39
#ifdef __MACOSX__
icculus@1047
    40
#include <sys/sysctl.h>
icculus@1047
    41
static size_t GetL3CacheSize( void )
icculus@1047
    42
{
icculus@1047
    43
    const char key[] = "hw.l3cachesize";
icculus@1047
    44
    u_int64_t result = 0;
icculus@1047
    45
    size_t typeSize = sizeof( result );
icculus@1047
    46
icculus@1047
    47
icculus@1047
    48
    int err = sysctlbyname( key, &result, &typeSize, NULL, 0 );
icculus@1047
    49
    if( 0 != err ) return 0;
icculus@1047
    50
icculus@1047
    51
    return result;
icculus@1047
    52
}
icculus@1047
    53
#else
icculus@1047
    54
static size_t GetL3CacheSize( void )
icculus@1047
    55
{
icculus@1047
    56
    /* XXX: Just guess G4 */
icculus@1047
    57
    return 2097152;
icculus@1047
    58
}
slouken@1402
    59
#endif /* __MACOSX__ */
icculus@1047
    60
slouken@1402
    61
#if (defined(__MACOSX__) && (__GNUC__ < 4))
icculus@1162
    62
    #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
icculus@1162
    63
        (vector unsigned char) ( a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p )
icculus@1162
    64
    #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
icculus@1162
    65
        (vector unsigned short) ( a,b,c,d,e,f,g,h )
icculus@1162
    66
#else
icculus@1162
    67
    #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
icculus@1162
    68
        (vector unsigned char) { a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p }
icculus@1162
    69
    #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
icculus@1162
    70
        (vector unsigned short) { a,b,c,d,e,f,g,h }
icculus@1162
    71
#endif
icculus@1162
    72
icculus@1047
    73
#define UNALIGNED_PTR(x) (((size_t) x) & 0x0000000F)
icculus@1047
    74
#define VSWIZZLE32(a,b,c,d) (vector unsigned char) \
icculus@1047
    75
                               ( 0x00+a, 0x00+b, 0x00+c, 0x00+d, \
icculus@1047
    76
                                 0x04+a, 0x04+b, 0x04+c, 0x04+d, \
icculus@1047
    77
                                 0x08+a, 0x08+b, 0x08+c, 0x08+d, \
icculus@1047
    78
                                 0x0C+a, 0x0C+b, 0x0C+c, 0x0C+d )
icculus@1047
    79
icculus@1047
    80
#define MAKE8888(dstfmt, r, g, b, a)  \
icculus@1047
    81
    ( ((r<<dstfmt->Rshift)&dstfmt->Rmask) | \
icculus@1047
    82
      ((g<<dstfmt->Gshift)&dstfmt->Gmask) | \
icculus@1047
    83
      ((b<<dstfmt->Bshift)&dstfmt->Bmask) | \
icculus@1047
    84
      ((a<<dstfmt->Ashift)&dstfmt->Amask) )
icculus@1047
    85
icculus@1047
    86
/*
icculus@1047
    87
 * Data Stream Touch...Altivec cache prefetching.
icculus@1047
    88
 *
icculus@1047
    89
 *  Don't use this on a G5...however, the speed boost is very significant
icculus@1047
    90
 *   on a G4.
icculus@1047
    91
 */
icculus@1047
    92
#define DST_CHAN_SRC 1
icculus@1047
    93
#define DST_CHAN_DEST 2
icculus@1047
    94
icculus@1047
    95
/* macro to set DST control word value... */
icculus@1047
    96
#define DST_CTRL(size, count, stride) \
icculus@1047
    97
    (((size) << 24) | ((count) << 16) | (stride))
icculus@1047
    98
icculus@1047
    99
#define VEC_ALIGNER(src) ((UNALIGNED_PTR(src)) \
icculus@1047
   100
    ? vec_lvsl(0, src) \
icculus@1047
   101
    : vec_add(vec_lvsl(8, src), vec_splat_u8(8)))
icculus@1047
   102
icculus@1047
   103
/* Calculate the permute vector used for 32->32 swizzling */
icculus@1047
   104
static vector unsigned char calc_swizzle32(const SDL_PixelFormat *srcfmt,
icculus@1047
   105
                                  const SDL_PixelFormat *dstfmt)
icculus@1047
   106
{
icculus@1047
   107
    /*
icculus@1047
   108
    * We have to assume that the bits that aren't used by other
icculus@1047
   109
     *  colors is alpha, and it's one complete byte, since some formats
icculus@1047
   110
     *  leave alpha with a zero mask, but we should still swizzle the bits.
icculus@1047
   111
     */
icculus@1047
   112
    /* ARGB */
icculus@1047
   113
    const static struct SDL_PixelFormat default_pixel_format = {
icculus@1047
   114
        NULL, 0, 0,
icculus@1047
   115
        0, 0, 0, 0,
icculus@1047
   116
        16, 8, 0, 24,
icculus@1047
   117
        0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000,
icculus@1047
   118
        0, 0};
icculus@1047
   119
    if (!srcfmt) {
icculus@1047
   120
        srcfmt = &default_pixel_format;
icculus@1047
   121
    }
icculus@1047
   122
    if (!dstfmt) {
icculus@1047
   123
        dstfmt = &default_pixel_format;
icculus@1047
   124
    }
slouken@1487
   125
    const vector unsigned char plus = VECUINT8_LITERAL(
icculus@1162
   126
                                      0x00, 0x00, 0x00, 0x00,
icculus@1047
   127
                                      0x04, 0x04, 0x04, 0x04,
icculus@1047
   128
                                      0x08, 0x08, 0x08, 0x08,
icculus@1047
   129
                                      0x0C, 0x0C, 0x0C, 0x0C );
icculus@1047
   130
    vector unsigned char vswiz;
icculus@1047
   131
    vector unsigned int srcvec;
icculus@1047
   132
#define RESHIFT(X) (3 - ((X) >> 3))
icculus@1047
   133
    Uint32 rmask = RESHIFT(srcfmt->Rshift) << (dstfmt->Rshift);
icculus@1047
   134
    Uint32 gmask = RESHIFT(srcfmt->Gshift) << (dstfmt->Gshift);
icculus@1047
   135
    Uint32 bmask = RESHIFT(srcfmt->Bshift) << (dstfmt->Bshift);
icculus@1047
   136
    Uint32 amask;
icculus@1047
   137
    /* Use zero for alpha if either surface doesn't have alpha */
icculus@1047
   138
    if (dstfmt->Amask) {
icculus@1047
   139
        amask = ((srcfmt->Amask) ? RESHIFT(srcfmt->Ashift) : 0x10) << (dstfmt->Ashift);
icculus@1047
   140
    } else {    
icculus@1047
   141
        amask = 0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^ 0xFFFFFFFF);
icculus@1047
   142
    }           
icculus@1047
   143
#undef RESHIFT  
icculus@1162
   144
    ((unsigned int *)(char*)&srcvec)[0] = (rmask | gmask | bmask | amask);
icculus@1047
   145
    vswiz = vec_add(plus, (vector unsigned char)vec_splat(srcvec, 0));
icculus@1047
   146
    return(vswiz);
icculus@1047
   147
}
icculus@1047
   148
icculus@1047
   149
static void Blit_RGB888_RGB565(SDL_BlitInfo *info);
icculus@1047
   150
static void Blit_RGB888_RGB565Altivec(SDL_BlitInfo *info) {
icculus@1047
   151
    int height = info->d_height;
icculus@1047
   152
    Uint8 *src = (Uint8 *) info->s_pixels;
icculus@1047
   153
    int srcskip = info->s_skip;
icculus@1047
   154
    Uint8 *dst = (Uint8 *) info->d_pixels;
icculus@1047
   155
    int dstskip = info->d_skip;
icculus@1047
   156
    SDL_PixelFormat *srcfmt = info->src;
icculus@1047
   157
    vector unsigned char valpha = vec_splat_u8(0);
icculus@1047
   158
    vector unsigned char vpermute = calc_swizzle32(srcfmt, NULL);
icculus@1162
   159
    vector unsigned char vgmerge = VECUINT8_LITERAL(
icculus@1047
   160
        0x00, 0x02, 0x00, 0x06,
icculus@1047
   161
        0x00, 0x0a, 0x00, 0x0e,
icculus@1047
   162
        0x00, 0x12, 0x00, 0x16,
icculus@1047
   163
        0x00, 0x1a, 0x00, 0x1e);
icculus@1047
   164
    vector unsigned short v1 = vec_splat_u16(1);
icculus@1047
   165
    vector unsigned short v3 = vec_splat_u16(3);
icculus@1162
   166
    vector unsigned short v3f = VECUINT16_LITERAL(
icculus@1047
   167
        0x003f, 0x003f, 0x003f, 0x003f,
icculus@1047
   168
        0x003f, 0x003f, 0x003f, 0x003f);
icculus@1162
   169
    vector unsigned short vfc = VECUINT16_LITERAL(
icculus@1047
   170
        0x00fc, 0x00fc, 0x00fc, 0x00fc,
icculus@1047
   171
        0x00fc, 0x00fc, 0x00fc, 0x00fc);
icculus@1047
   172
    vector unsigned short vf800 = (vector unsigned short)vec_splat_u8(-7);
icculus@1047
   173
    vf800 = vec_sl(vf800, vec_splat_u16(8));
icculus@1047
   174
icculus@1047
   175
    while (height--) {
icculus@1047
   176
        vector unsigned char valigner;
icculus@1047
   177
        vector unsigned char voverflow;
icculus@1047
   178
        vector unsigned char vsrc;
icculus@1047
   179
icculus@1047
   180
        int width = info->d_width;
icculus@1047
   181
        int extrawidth;
icculus@1047
   182
icculus@1047
   183
        /* do scalar until we can align... */
icculus@1047
   184
#define ONE_PIXEL_BLEND(condition, widthvar) \
icculus@1047
   185
        while (condition) { \
icculus@1162
   186
            Uint32 Pixel; \
icculus@1047
   187
            unsigned sR, sG, sB, sA; \
icculus@1162
   188
            DISEMBLE_RGBA((Uint8 *)src, 4, srcfmt, Pixel, \
icculus@1047
   189
                          sR, sG, sB, sA); \
icculus@1047
   190
            *(Uint16 *)(dst) = (((sR << 8) & 0x0000F800) | \
icculus@1047
   191
                                ((sG << 3) & 0x000007E0) | \
icculus@1047
   192
                                ((sB >> 3) & 0x0000001F)); \
icculus@1047
   193
            dst += 2; \
icculus@1047
   194
            src += 4; \
icculus@1047
   195
            widthvar--; \
icculus@1047
   196
        }
icculus@1047
   197
icculus@1047
   198
        ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
icculus@1047
   199
icculus@1047
   200
        /* After all that work, here's the vector part! */
icculus@1047
   201
        extrawidth = (width % 8);  /* trailing unaligned stores */
icculus@1047
   202
        width -= extrawidth;
icculus@1047
   203
        vsrc = vec_ld(0, src);
icculus@1047
   204
        valigner = VEC_ALIGNER(src);
icculus@1047
   205
icculus@1047
   206
        while (width) {
icculus@1047
   207
            vector unsigned short vpixel, vrpixel, vgpixel, vbpixel;
icculus@1047
   208
            vector unsigned int vsrc1, vsrc2;
icculus@1047
   209
            vector unsigned char vdst;
icculus@1047
   210
icculus@1047
   211
            voverflow = vec_ld(15, src);
icculus@1047
   212
            vsrc = vec_perm(vsrc, voverflow, valigner);
icculus@1047
   213
            vsrc1 = (vector unsigned int)vec_perm(vsrc, valpha, vpermute);
icculus@1047
   214
            src += 16;
icculus@1047
   215
            vsrc = voverflow;
icculus@1047
   216
            voverflow = vec_ld(15, src);
icculus@1047
   217
            vsrc = vec_perm(vsrc, voverflow, valigner);
icculus@1047
   218
            vsrc2 = (vector unsigned int)vec_perm(vsrc, valpha, vpermute);
icculus@1047
   219
            /* 1555 */
icculus@1047
   220
            vpixel = (vector unsigned short)vec_packpx(vsrc1, vsrc2);
icculus@1047
   221
            vgpixel = (vector unsigned short)vec_perm(vsrc1, vsrc2, vgmerge);
icculus@1047
   222
            vgpixel = vec_and(vgpixel, vfc);
icculus@1047
   223
            vgpixel = vec_sl(vgpixel, v3);
icculus@1047
   224
            vrpixel = vec_sl(vpixel, v1);
icculus@1047
   225
            vrpixel = vec_and(vrpixel, vf800);
icculus@1047
   226
            vbpixel = vec_and(vpixel, v3f);
icculus@1047
   227
            vdst = vec_or((vector unsigned char)vrpixel, (vector unsigned char)vgpixel);
icculus@1047
   228
            /* 565 */
icculus@1047
   229
            vdst = vec_or(vdst, (vector unsigned char)vbpixel);
icculus@1047
   230
            vec_st(vdst, 0, dst);
icculus@1047
   231
icculus@1047
   232
            width -= 8;
icculus@1047
   233
            src += 16;
icculus@1047
   234
            dst += 16;
icculus@1047
   235
            vsrc = voverflow;
icculus@1047
   236
        }
icculus@1047
   237
icculus@1047
   238
        assert(width == 0);
icculus@1047
   239
icculus@1047
   240
        /* do scalar until we can align... */
icculus@1047
   241
        ONE_PIXEL_BLEND((extrawidth), extrawidth);
icculus@1047
   242
#undef ONE_PIXEL_BLEND
icculus@1047
   243
icculus@1047
   244
        src += srcskip;  /* move to next row, accounting for pitch. */
icculus@1047
   245
        dst += dstskip;
icculus@1047
   246
    }
icculus@1047
   247
icculus@1047
   248
icculus@1047
   249
}
icculus@1047
   250
icculus@1047
   251
static void Blit_RGB565_32Altivec(SDL_BlitInfo *info) {
icculus@1047
   252
    int height = info->d_height;
icculus@1047
   253
    Uint8 *src = (Uint8 *) info->s_pixels;
icculus@1047
   254
    int srcskip = info->s_skip;
icculus@1047
   255
    Uint8 *dst = (Uint8 *) info->d_pixels;
icculus@1047
   256
    int dstskip = info->d_skip;
icculus@1047
   257
    SDL_PixelFormat *srcfmt = info->src;
icculus@1047
   258
    SDL_PixelFormat *dstfmt = info->dst;
icculus@1047
   259
    unsigned alpha;
icculus@1047
   260
    vector unsigned char valpha;
icculus@1047
   261
    vector unsigned char vpermute;
icculus@1047
   262
    vector unsigned short vf800;
icculus@1047
   263
    vector unsigned int v8 = vec_splat_u32(8);
icculus@1047
   264
    vector unsigned int v16 = vec_add(v8, v8);
icculus@1047
   265
    vector unsigned short v2 = vec_splat_u16(2);
icculus@1047
   266
    vector unsigned short v3 = vec_splat_u16(3);
icculus@1047
   267
    /* 
icculus@1047
   268
        0x10 - 0x1f is the alpha
icculus@1047
   269
        0x00 - 0x0e evens are the red
icculus@1047
   270
        0x01 - 0x0f odds are zero
icculus@1047
   271
    */
icculus@1162
   272
    vector unsigned char vredalpha1 = VECUINT8_LITERAL(
icculus@1047
   273
        0x10, 0x00, 0x01, 0x01,
icculus@1047
   274
        0x10, 0x02, 0x01, 0x01,
icculus@1047
   275
        0x10, 0x04, 0x01, 0x01,
icculus@1047
   276
        0x10, 0x06, 0x01, 0x01
icculus@1047
   277
    );
icculus@1162
   278
    vector unsigned char vredalpha2 = (vector unsigned char) (
icculus@1047
   279
        vec_add((vector unsigned int)vredalpha1, vec_sl(v8, v16))
icculus@1047
   280
    );
icculus@1047
   281
    /*
icculus@1047
   282
        0x00 - 0x0f is ARxx ARxx ARxx ARxx
icculus@1047
   283
        0x11 - 0x0f odds are blue
icculus@1047
   284
    */
icculus@1162
   285
    vector unsigned char vblue1 = VECUINT8_LITERAL(
icculus@1047
   286
        0x00, 0x01, 0x02, 0x11,
icculus@1047
   287
        0x04, 0x05, 0x06, 0x13,
icculus@1047
   288
        0x08, 0x09, 0x0a, 0x15,
icculus@1047
   289
        0x0c, 0x0d, 0x0e, 0x17
icculus@1047
   290
    );
icculus@1047
   291
    vector unsigned char vblue2 = (vector unsigned char)(
icculus@1047
   292
        vec_add((vector unsigned int)vblue1, v8)
icculus@1047
   293
    );
icculus@1047
   294
    /*
icculus@1047
   295
        0x00 - 0x0f is ARxB ARxB ARxB ARxB
icculus@1047
   296
        0x10 - 0x0e evens are green
icculus@1047
   297
    */
icculus@1162
   298
    vector unsigned char vgreen1 = VECUINT8_LITERAL(
icculus@1047
   299
        0x00, 0x01, 0x10, 0x03,
icculus@1047
   300
        0x04, 0x05, 0x12, 0x07,
icculus@1047
   301
        0x08, 0x09, 0x14, 0x0b,
icculus@1047
   302
        0x0c, 0x0d, 0x16, 0x0f
icculus@1047
   303
    );
icculus@1047
   304
    vector unsigned char vgreen2 = (vector unsigned char)(
icculus@1047
   305
        vec_add((vector unsigned int)vgreen1, vec_sl(v8, v8))
icculus@1047
   306
    );
icculus@1047
   307
    
icculus@1047
   308
icculus@1047
   309
    assert(srcfmt->BytesPerPixel == 2);
icculus@1047
   310
    assert(dstfmt->BytesPerPixel == 4);
icculus@1047
   311
icculus@1047
   312
    vf800 = (vector unsigned short)vec_splat_u8(-7);
icculus@1047
   313
    vf800 = vec_sl(vf800, vec_splat_u16(8));
icculus@1047
   314
icculus@1047
   315
    if (dstfmt->Amask && srcfmt->alpha) {
icculus@1047
   316
        ((unsigned char *)&valpha)[0] = alpha = srcfmt->alpha;
icculus@1047
   317
        valpha = vec_splat(valpha, 0);
icculus@1047
   318
    } else {
icculus@1047
   319
        alpha = 0;
icculus@1047
   320
        valpha = vec_splat_u8(0);
icculus@1047
   321
    }
icculus@1047
   322
icculus@1047
   323
    vpermute = calc_swizzle32(NULL, dstfmt);
icculus@1047
   324
    while (height--) {
icculus@1047
   325
        vector unsigned char valigner;
icculus@1047
   326
        vector unsigned char voverflow;
icculus@1047
   327
        vector unsigned char vsrc;
icculus@1047
   328
icculus@1047
   329
        int width = info->d_width;
icculus@1047
   330
        int extrawidth;
icculus@1047
   331
icculus@1047
   332
        /* do scalar until we can align... */
icculus@1047
   333
#define ONE_PIXEL_BLEND(condition, widthvar) \
icculus@1047
   334
        while (condition) { \
icculus@1047
   335
            unsigned sR, sG, sB; \
icculus@1162
   336
            unsigned short Pixel = *((unsigned short *)src); \
icculus@1162
   337
            sR = (Pixel >> 8) & 0xf8; \
icculus@1162
   338
            sG = (Pixel >> 3) & 0xfc; \
icculus@1162
   339
            sB = (Pixel << 3) & 0xf8; \
icculus@1047
   340
            ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
icculus@1047
   341
            src += 2; \
icculus@1047
   342
            dst += 4; \
icculus@1047
   343
            widthvar--; \
icculus@1047
   344
        }
icculus@1047
   345
        ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
icculus@1047
   346
icculus@1047
   347
        /* After all that work, here's the vector part! */
icculus@1047
   348
        extrawidth = (width % 8);  /* trailing unaligned stores */
icculus@1047
   349
        width -= extrawidth;
icculus@1047
   350
        vsrc = vec_ld(0, src);
icculus@1047
   351
        valigner = VEC_ALIGNER(src);
icculus@1047
   352
icculus@1047
   353
        while (width) {
icculus@1047
   354
            vector unsigned short vR, vG, vB;
icculus@1047
   355
            vector unsigned char vdst1, vdst2;
icculus@1047
   356
icculus@1047
   357
            voverflow = vec_ld(15, src);
icculus@1047
   358
            vsrc = vec_perm(vsrc, voverflow, valigner);
icculus@1047
   359
icculus@1047
   360
            vR = vec_and((vector unsigned short)vsrc, vf800);
icculus@1047
   361
            vB = vec_sl((vector unsigned short)vsrc, v3);
icculus@1047
   362
            vG = vec_sl(vB, v2);
icculus@1047
   363
icculus@1047
   364
            vdst1 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha1);
icculus@1047
   365
            vdst1 = vec_perm(vdst1, (vector unsigned char)vB, vblue1);
icculus@1047
   366
            vdst1 = vec_perm(vdst1, (vector unsigned char)vG, vgreen1);
icculus@1047
   367
            vdst1 = vec_perm(vdst1, valpha, vpermute);
icculus@1047
   368
            vec_st(vdst1, 0, dst);
icculus@1047
   369
icculus@1047
   370
            vdst2 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha2);
icculus@1047
   371
            vdst2 = vec_perm(vdst2, (vector unsigned char)vB, vblue2);
icculus@1047
   372
            vdst2 = vec_perm(vdst2, (vector unsigned char)vG, vgreen2);
icculus@1047
   373
            vdst2 = vec_perm(vdst2, valpha, vpermute);
icculus@1047
   374
            vec_st(vdst2, 16, dst);
icculus@1047
   375
            
icculus@1047
   376
            width -= 8;
icculus@1047
   377
            dst += 32;
icculus@1047
   378
            src += 16;
icculus@1047
   379
            vsrc = voverflow;
icculus@1047
   380
        }
icculus@1047
   381
icculus@1047
   382
        assert(width == 0);
icculus@1047
   383
icculus@1047
   384
icculus@1047
   385
        /* do scalar until we can align... */
icculus@1047
   386
        ONE_PIXEL_BLEND((extrawidth), extrawidth);
icculus@1047
   387
#undef ONE_PIXEL_BLEND
icculus@1047
   388
icculus@1047
   389
        src += srcskip;  /* move to next row, accounting for pitch. */
icculus@1047
   390
        dst += dstskip;
icculus@1047
   391
    }
icculus@1047
   392
icculus@1047
   393
}
icculus@1047
   394
icculus@1139
   395
icculus@1139
   396
static void Blit_RGB555_32Altivec(SDL_BlitInfo *info) {
icculus@1139
   397
    int height = info->d_height;
icculus@1139
   398
    Uint8 *src = (Uint8 *) info->s_pixels;
icculus@1139
   399
    int srcskip = info->s_skip;
icculus@1139
   400
    Uint8 *dst = (Uint8 *) info->d_pixels;
icculus@1139
   401
    int dstskip = info->d_skip;
icculus@1139
   402
    SDL_PixelFormat *srcfmt = info->src;
icculus@1139
   403
    SDL_PixelFormat *dstfmt = info->dst;
icculus@1139
   404
    unsigned alpha;
icculus@1139
   405
    vector unsigned char valpha;
icculus@1139
   406
    vector unsigned char vpermute;
icculus@1139
   407
    vector unsigned short vf800;
icculus@1139
   408
    vector unsigned int v8 = vec_splat_u32(8);
icculus@1139
   409
    vector unsigned int v16 = vec_add(v8, v8);
icculus@1139
   410
    vector unsigned short v1 = vec_splat_u16(1);
icculus@1139
   411
    vector unsigned short v3 = vec_splat_u16(3);
icculus@1139
   412
    /* 
icculus@1139
   413
        0x10 - 0x1f is the alpha
icculus@1139
   414
        0x00 - 0x0e evens are the red
icculus@1139
   415
        0x01 - 0x0f odds are zero
icculus@1139
   416
    */
icculus@1162
   417
    vector unsigned char vredalpha1 = VECUINT8_LITERAL(
icculus@1139
   418
        0x10, 0x00, 0x01, 0x01,
icculus@1139
   419
        0x10, 0x02, 0x01, 0x01,
icculus@1139
   420
        0x10, 0x04, 0x01, 0x01,
icculus@1139
   421
        0x10, 0x06, 0x01, 0x01
icculus@1139
   422
    );
icculus@1139
   423
    vector unsigned char vredalpha2 = (vector unsigned char)(
icculus@1139
   424
        vec_add((vector unsigned int)vredalpha1, vec_sl(v8, v16))
icculus@1139
   425
    );
icculus@1139
   426
    /*
icculus@1139
   427
        0x00 - 0x0f is ARxx ARxx ARxx ARxx
icculus@1139
   428
        0x11 - 0x0f odds are blue
icculus@1139
   429
    */
icculus@1162
   430
    vector unsigned char vblue1 = VECUINT8_LITERAL(
icculus@1139
   431
        0x00, 0x01, 0x02, 0x11,
icculus@1139
   432
        0x04, 0x05, 0x06, 0x13,
icculus@1139
   433
        0x08, 0x09, 0x0a, 0x15,
icculus@1139
   434
        0x0c, 0x0d, 0x0e, 0x17
icculus@1139
   435
    );
icculus@1139
   436
    vector unsigned char vblue2 = (vector unsigned char)(
icculus@1139
   437
        vec_add((vector unsigned int)vblue1, v8)
icculus@1139
   438
    );
icculus@1139
   439
    /*
icculus@1139
   440
        0x00 - 0x0f is ARxB ARxB ARxB ARxB
icculus@1139
   441
        0x10 - 0x0e evens are green
icculus@1139
   442
    */
icculus@1162
   443
    vector unsigned char vgreen1 = VECUINT8_LITERAL(
icculus@1139
   444
        0x00, 0x01, 0x10, 0x03,
icculus@1139
   445
        0x04, 0x05, 0x12, 0x07,
icculus@1139
   446
        0x08, 0x09, 0x14, 0x0b,
icculus@1139
   447
        0x0c, 0x0d, 0x16, 0x0f
icculus@1139
   448
    );
icculus@1139
   449
    vector unsigned char vgreen2 = (vector unsigned char)(
icculus@1139
   450
        vec_add((vector unsigned int)vgreen1, vec_sl(v8, v8))
icculus@1139
   451
    );
icculus@1139
   452
    
icculus@1139
   453
icculus@1139
   454
    assert(srcfmt->BytesPerPixel == 2);
icculus@1139
   455
    assert(dstfmt->BytesPerPixel == 4);
icculus@1139
   456
icculus@1139
   457
    vf800 = (vector unsigned short)vec_splat_u8(-7);
icculus@1139
   458
    vf800 = vec_sl(vf800, vec_splat_u16(8));
icculus@1139
   459
icculus@1139
   460
    if (dstfmt->Amask && srcfmt->alpha) {
icculus@1139
   461
        ((unsigned char *)&valpha)[0] = alpha = srcfmt->alpha;
icculus@1139
   462
        valpha = vec_splat(valpha, 0);
icculus@1139
   463
    } else {
icculus@1139
   464
        alpha = 0;
icculus@1139
   465
        valpha = vec_splat_u8(0);
icculus@1139
   466
    }
icculus@1139
   467
icculus@1139
   468
    vpermute = calc_swizzle32(NULL, dstfmt);
icculus@1139
   469
    while (height--) {
icculus@1139
   470
        vector unsigned char valigner;
icculus@1139
   471
        vector unsigned char voverflow;
icculus@1139
   472
        vector unsigned char vsrc;
icculus@1139
   473
icculus@1139
   474
        int width = info->d_width;
icculus@1139
   475
        int extrawidth;
icculus@1139
   476
icculus@1139
   477
        /* do scalar until we can align... */
icculus@1139
   478
#define ONE_PIXEL_BLEND(condition, widthvar) \
icculus@1139
   479
        while (condition) { \
icculus@1139
   480
            unsigned sR, sG, sB; \
icculus@1162
   481
            unsigned short Pixel = *((unsigned short *)src); \
icculus@1162
   482
            sR = (Pixel >> 7) & 0xf8; \
icculus@1162
   483
            sG = (Pixel >> 2) & 0xf8; \
icculus@1162
   484
            sB = (Pixel << 3) & 0xf8; \
icculus@1139
   485
            ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
icculus@1139
   486
            src += 2; \
icculus@1139
   487
            dst += 4; \
icculus@1139
   488
            widthvar--; \
icculus@1139
   489
        }
icculus@1139
   490
        ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
icculus@1139
   491
icculus@1139
   492
        /* After all that work, here's the vector part! */
icculus@1139
   493
        extrawidth = (width % 8);  /* trailing unaligned stores */
icculus@1139
   494
        width -= extrawidth;
icculus@1139
   495
        vsrc = vec_ld(0, src);
icculus@1139
   496
        valigner = VEC_ALIGNER(src);
icculus@1139
   497
icculus@1139
   498
        while (width) {
icculus@1139
   499
            vector unsigned short vR, vG, vB;
icculus@1139
   500
            vector unsigned char vdst1, vdst2;
icculus@1139
   501
icculus@1139
   502
            voverflow = vec_ld(15, src);
icculus@1139
   503
            vsrc = vec_perm(vsrc, voverflow, valigner);
icculus@1139
   504
icculus@1139
   505
            vR = vec_and(vec_sl((vector unsigned short)vsrc,v1), vf800);
icculus@1139
   506
            vB = vec_sl((vector unsigned short)vsrc, v3);
icculus@1139
   507
            vG = vec_sl(vB, v3);
icculus@1139
   508
icculus@1139
   509
            vdst1 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha1);
icculus@1139
   510
            vdst1 = vec_perm(vdst1, (vector unsigned char)vB, vblue1);
icculus@1139
   511
            vdst1 = vec_perm(vdst1, (vector unsigned char)vG, vgreen1);
icculus@1139
   512
            vdst1 = vec_perm(vdst1, valpha, vpermute);
icculus@1139
   513
            vec_st(vdst1, 0, dst);
icculus@1139
   514
icculus@1139
   515
            vdst2 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha2);
icculus@1139
   516
            vdst2 = vec_perm(vdst2, (vector unsigned char)vB, vblue2);
icculus@1139
   517
            vdst2 = vec_perm(vdst2, (vector unsigned char)vG, vgreen2);
icculus@1139
   518
            vdst2 = vec_perm(vdst2, valpha, vpermute);
icculus@1139
   519
            vec_st(vdst2, 16, dst);
icculus@1139
   520
            
icculus@1139
   521
            width -= 8;
icculus@1139
   522
            dst += 32;
icculus@1139
   523
            src += 16;
icculus@1139
   524
            vsrc = voverflow;
icculus@1139
   525
        }
icculus@1139
   526
icculus@1139
   527
        assert(width == 0);
icculus@1139
   528
icculus@1139
   529
icculus@1139
   530
        /* do scalar until we can align... */
icculus@1139
   531
        ONE_PIXEL_BLEND((extrawidth), extrawidth);
icculus@1139
   532
#undef ONE_PIXEL_BLEND
icculus@1139
   533
icculus@1139
   534
        src += srcskip;  /* move to next row, accounting for pitch. */
icculus@1139
   535
        dst += dstskip;
icculus@1139
   536
    }
icculus@1139
   537
icculus@1139
   538
}
icculus@1139
   539
icculus@1047
   540
static void BlitNtoNKey(SDL_BlitInfo *info);
icculus@1047
   541
static void BlitNtoNKeyCopyAlpha(SDL_BlitInfo *info);
icculus@1047
   542
static void Blit32to32KeyAltivec(SDL_BlitInfo *info)
icculus@1047
   543
{
icculus@1047
   544
    int height = info->d_height;
icculus@1047
   545
    Uint32 *srcp = (Uint32 *) info->s_pixels;
icculus@1047
   546
    int srcskip = info->s_skip;
icculus@1047
   547
    Uint32 *dstp = (Uint32 *) info->d_pixels;
icculus@1047
   548
    int dstskip = info->d_skip;
icculus@1047
   549
    SDL_PixelFormat *srcfmt = info->src;
icculus@1047
   550
    int srcbpp = srcfmt->BytesPerPixel;
icculus@1047
   551
    SDL_PixelFormat *dstfmt = info->dst;
icculus@1047
   552
    int dstbpp = dstfmt->BytesPerPixel;
icculus@1047
   553
    int copy_alpha = (srcfmt->Amask && dstfmt->Amask);
icculus@1047
   554
	unsigned alpha = dstfmt->Amask ? srcfmt->alpha : 0;
icculus@1047
   555
    Uint32 rgbmask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
icculus@1047
   556
	Uint32 ckey = info->src->colorkey;
icculus@1047
   557
    vector unsigned int valpha;
icculus@1047
   558
    vector unsigned char vpermute;
icculus@1047
   559
    vector unsigned char vzero;
icculus@1047
   560
    vector unsigned int vckey;
icculus@1047
   561
    vector unsigned int vrgbmask;
icculus@1047
   562
    vpermute = calc_swizzle32(srcfmt, dstfmt);
icculus@1047
   563
    if (info->d_width < 16) {
icculus@1047
   564
        if(copy_alpha) {
slouken@1487
   565
            BlitNtoNKeyCopyAlpha(info);
icculus@1047
   566
        } else {
slouken@1487
   567
            BlitNtoNKey(info);
icculus@1047
   568
        }
slouken@1487
   569
        return;
icculus@1047
   570
    }
icculus@1047
   571
    vzero = vec_splat_u8(0);
icculus@1047
   572
    if (alpha) {
icculus@1047
   573
        ((unsigned char *)&valpha)[0] = (unsigned char)alpha;
icculus@1047
   574
        valpha = (vector unsigned int)vec_splat((vector unsigned char)valpha, 0);
icculus@1047
   575
    } else {
icculus@1047
   576
        valpha = (vector unsigned int)vzero;
icculus@1047
   577
    }
icculus@1047
   578
    ckey &= rgbmask;
icculus@1162
   579
    ((unsigned int *)(char*)&vckey)[0] = ckey;
icculus@1047
   580
    vckey = vec_splat(vckey, 0);
icculus@1162
   581
    ((unsigned int *)(char*)&vrgbmask)[0] = rgbmask;
icculus@1047
   582
    vrgbmask = vec_splat(vrgbmask, 0);
icculus@1047
   583
icculus@1047
   584
    while (height--) {
icculus@1047
   585
#define ONE_PIXEL_BLEND(condition, widthvar) \
icculus@1047
   586
        if (copy_alpha) { \
icculus@1047
   587
            while (condition) { \
icculus@1162
   588
                Uint32 Pixel; \
icculus@1047
   589
                unsigned sR, sG, sB, sA; \
icculus@1162
   590
                DISEMBLE_RGBA((Uint8 *)srcp, srcbpp, srcfmt, Pixel, \
icculus@1047
   591
                          sR, sG, sB, sA); \
icculus@1162
   592
                if ( (Pixel & rgbmask) != ckey ) { \
icculus@1047
   593
                      ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
icculus@1047
   594
                            sR, sG, sB, sA); \
icculus@1047
   595
                } \
icculus@1162
   596
                dstp = (Uint32 *) (((Uint8 *) dstp) + dstbpp); \
icculus@1162
   597
                srcp = (Uint32 *) (((Uint8 *) srcp) + srcbpp); \
icculus@1047
   598
                widthvar--; \
icculus@1047
   599
            } \
icculus@1047
   600
        } else { \
icculus@1047
   601
            while (condition) { \
icculus@1162
   602
                Uint32 Pixel; \
icculus@1047
   603
                unsigned sR, sG, sB; \
icculus@1162
   604
                RETRIEVE_RGB_PIXEL((Uint8 *)srcp, srcbpp, Pixel); \
icculus@1162
   605
                if ( Pixel != ckey ) { \
icculus@1162
   606
                    RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB); \
icculus@1047
   607
                    ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
icculus@1047
   608
                              sR, sG, sB, alpha); \
icculus@1047
   609
                } \
icculus@1162
   610
                dstp = (Uint32 *) (((Uint8 *)dstp) + dstbpp); \
icculus@1162
   611
                srcp = (Uint32 *) (((Uint8 *)srcp) + srcbpp); \
icculus@1047
   612
                widthvar--; \
icculus@1047
   613
            } \
icculus@1047
   614
        }
icculus@1047
   615
        int width = info->d_width;
icculus@1047
   616
        ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
icculus@1047
   617
        assert(width > 0);
icculus@1047
   618
        if (width > 0) {
icculus@1047
   619
            int extrawidth = (width % 4);
icculus@1047
   620
            vector unsigned char valigner = VEC_ALIGNER(srcp);
icculus@1047
   621
            vector unsigned int vs = vec_ld(0, srcp);
icculus@1047
   622
            width -= extrawidth;
icculus@1047
   623
            assert(width >= 4);
icculus@1047
   624
            while (width) {
icculus@1047
   625
                vector unsigned char vsel;
icculus@1047
   626
                vector unsigned int vd;
icculus@1047
   627
                vector unsigned int voverflow = vec_ld(15, srcp);
icculus@1047
   628
                /* load the source vec */
icculus@1047
   629
                vs = vec_perm(vs, voverflow, valigner);
icculus@1047
   630
                /* vsel is set for items that match the key */
icculus@1047
   631
                vsel = (vector unsigned char)vec_and(vs, vrgbmask);
icculus@1047
   632
                vsel = (vector unsigned char)vec_cmpeq(vs, vckey);
icculus@1047
   633
                /* permute the src vec to the dest format */
icculus@1047
   634
                vs = vec_perm(vs, valpha, vpermute);
icculus@1047
   635
                /* load the destination vec */
icculus@1047
   636
                vd = vec_ld(0, dstp);
icculus@1047
   637
                /* select the source and dest into vs */
icculus@1047
   638
                vd = (vector unsigned int)vec_sel((vector unsigned char)vs, (vector unsigned char)vd, vsel);
icculus@1047
   639
                
icculus@1047
   640
                vec_st(vd, 0, dstp);
icculus@1047
   641
                srcp += 4;
icculus@1047
   642
                width -= 4;
icculus@1047
   643
                dstp += 4;
icculus@1047
   644
                vs = voverflow;
icculus@1047
   645
            }
icculus@1047
   646
            ONE_PIXEL_BLEND((extrawidth), extrawidth);
icculus@1047
   647
#undef ONE_PIXEL_BLEND
icculus@1047
   648
            srcp += srcskip >> 2;
icculus@1047
   649
            dstp += dstskip >> 2;
icculus@1047
   650
        }
icculus@1047
   651
    }
icculus@1047
   652
}
icculus@1047
   653
icculus@1047
   654
/* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
icculus@1047
   655
/* Use this on a G5 */
icculus@1047
   656
static void ConvertAltivec32to32_noprefetch(SDL_BlitInfo *info)
icculus@1047
   657
{
icculus@1047
   658
    int height = info->d_height;
icculus@1047
   659
    Uint32 *src = (Uint32 *) info->s_pixels;
icculus@1047
   660
    int srcskip = info->s_skip;
icculus@1047
   661
    Uint32 *dst = (Uint32 *) info->d_pixels;
icculus@1047
   662
    int dstskip = info->d_skip;
icculus@1047
   663
    SDL_PixelFormat *srcfmt = info->src;
icculus@1047
   664
    SDL_PixelFormat *dstfmt = info->dst;
icculus@1047
   665
    vector unsigned int vzero = vec_splat_u32(0);
icculus@1047
   666
    vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
icculus@1047
   667
    if (dstfmt->Amask && !srcfmt->Amask) {
icculus@1047
   668
        if (srcfmt->alpha) {
icculus@1047
   669
            vector unsigned char valpha;
icculus@1047
   670
            ((unsigned char *)&valpha)[0] = srcfmt->alpha;
icculus@1047
   671
            vzero = (vector unsigned int)vec_splat(valpha, 0);
icculus@1047
   672
        }
icculus@1047
   673
    }
icculus@1047
   674
slouken@1339
   675
    assert(srcfmt->BytesPerPixel == 4);
slouken@1339
   676
    assert(dstfmt->BytesPerPixel == 4);
icculus@1047
   677
icculus@1047
   678
    while (height--) {
icculus@1047
   679
        vector unsigned char valigner;
icculus@1047
   680
        vector unsigned int vbits;
icculus@1047
   681
        vector unsigned int voverflow;
icculus@1047
   682
        Uint32 bits;
icculus@1047
   683
        Uint8 r, g, b, a;
icculus@1047
   684
icculus@1047
   685
        int width = info->d_width;
icculus@1047
   686
        int extrawidth;
icculus@1047
   687
icculus@1047
   688
        /* do scalar until we can align... */
icculus@1047
   689
        while ((UNALIGNED_PTR(dst)) && (width)) {
icculus@1047
   690
            bits = *(src++);
icculus@1047
   691
            RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
icculus@1047
   692
            *(dst++) = MAKE8888(dstfmt, r, g, b, a);
icculus@1047
   693
            width--;
icculus@1047
   694
        }
icculus@1047
   695
icculus@1047
   696
        /* After all that work, here's the vector part! */
icculus@1047
   697
        extrawidth = (width % 4);
icculus@1047
   698
        width -= extrawidth;
icculus@1047
   699
        valigner = VEC_ALIGNER(src);
icculus@1047
   700
        vbits = vec_ld(0, src);
icculus@1047
   701
icculus@1047
   702
       while (width) {
icculus@1047
   703
            voverflow = vec_ld(15, src);
icculus@1047
   704
            src += 4;
icculus@1047
   705
            width -= 4;
icculus@1047
   706
            vbits = vec_perm(vbits, voverflow, valigner);  /* src is ready. */
icculus@1047
   707
            vbits = vec_perm(vbits, vzero, vpermute);  /* swizzle it. */
icculus@1047
   708
            vec_st(vbits, 0, dst);  /* store it back out. */
icculus@1047
   709
            dst += 4;
icculus@1047
   710
            vbits = voverflow;
icculus@1047
   711
        }
icculus@1047
   712
icculus@1047
   713
        assert(width == 0);
icculus@1047
   714
icculus@1047
   715
        /* cover pixels at the end of the row that didn't fit in 16 bytes. */
icculus@1047
   716
        while (extrawidth) {
icculus@1047
   717
            bits = *(src++);  /* max 7 pixels, don't bother with prefetch. */
icculus@1047
   718
            RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
icculus@1047
   719
            *(dst++) = MAKE8888(dstfmt, r, g, b, a);
icculus@1047
   720
            extrawidth--;
icculus@1047
   721
        }
icculus@1047
   722
icculus@1047
   723
        src += srcskip >> 2;  /* move to next row, accounting for pitch. */
icculus@1047
   724
        dst += dstskip >> 2;
icculus@1047
   725
    }
icculus@1047
   726
icculus@1047
   727
}
icculus@1047
   728
icculus@1047
   729
/* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
icculus@1047
   730
/* Use this on a G4 */
icculus@1047
   731
static void ConvertAltivec32to32_prefetch(SDL_BlitInfo *info)
icculus@1047
   732
{
icculus@1047
   733
    const int scalar_dst_lead = sizeof (Uint32) * 4;
icculus@1047
   734
    const int vector_dst_lead = sizeof (Uint32) * 16;
icculus@1047
   735
icculus@1047
   736
    int height = info->d_height;
icculus@1047
   737
    Uint32 *src = (Uint32 *) info->s_pixels;
icculus@1047
   738
    int srcskip = info->s_skip;
icculus@1047
   739
    Uint32 *dst = (Uint32 *) info->d_pixels;
icculus@1047
   740
    int dstskip = info->d_skip;
icculus@1047
   741
    SDL_PixelFormat *srcfmt = info->src;
icculus@1047
   742
    SDL_PixelFormat *dstfmt = info->dst;
icculus@1047
   743
    vector unsigned int vzero = vec_splat_u32(0);
icculus@1047
   744
    vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
icculus@1047
   745
    if (dstfmt->Amask && !srcfmt->Amask) {
icculus@1047
   746
        if (srcfmt->alpha) {
icculus@1047
   747
            vector unsigned char valpha;
icculus@1047
   748
            ((unsigned char *)&valpha)[0] = srcfmt->alpha;
icculus@1047
   749
            vzero = (vector unsigned int)vec_splat(valpha, 0);
icculus@1047
   750
        }
icculus@1047
   751
    }
icculus@1047
   752
slouken@1339
   753
    assert(srcfmt->BytesPerPixel == 4);
slouken@1339
   754
    assert(dstfmt->BytesPerPixel == 4);
icculus@1047
   755
icculus@1047
   756
    while (height--) {
icculus@1047
   757
        vector unsigned char valigner;
icculus@1047
   758
        vector unsigned int vbits;
icculus@1047
   759
        vector unsigned int voverflow;
icculus@1047
   760
        Uint32 bits;
icculus@1047
   761
        Uint8 r, g, b, a;
icculus@1047
   762
icculus@1047
   763
        int width = info->d_width;
icculus@1047
   764
        int extrawidth;
icculus@1047
   765
icculus@1047
   766
        /* do scalar until we can align... */
icculus@1047
   767
        while ((UNALIGNED_PTR(dst)) && (width)) {
icculus@1047
   768
            vec_dstt(src+scalar_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_SRC);
icculus@1047
   769
            vec_dstst(dst+scalar_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_DEST);
icculus@1047
   770
            bits = *(src++);
icculus@1047
   771
            RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
icculus@1047
   772
            *(dst++) = MAKE8888(dstfmt, r, g, b, a);
icculus@1047
   773
            width--;
icculus@1047
   774
        }
icculus@1047
   775
icculus@1047
   776
        /* After all that work, here's the vector part! */
icculus@1047
   777
        extrawidth = (width % 4);
icculus@1047
   778
        width -= extrawidth;
icculus@1047
   779
        valigner = VEC_ALIGNER(src);
icculus@1047
   780
        vbits = vec_ld(0, src);
icculus@1047
   781
icculus@1047
   782
        while (width) {
icculus@1047
   783
            vec_dstt(src+vector_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_SRC);
icculus@1047
   784
            vec_dstst(dst+vector_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_DEST);
icculus@1047
   785
            voverflow = vec_ld(15, src);
icculus@1047
   786
            src += 4;
icculus@1047
   787
            width -= 4;
icculus@1047
   788
            vbits = vec_perm(vbits, voverflow, valigner);  /* src is ready. */
icculus@1047
   789
            vbits = vec_perm(vbits, vzero, vpermute);  /* swizzle it. */
icculus@1047
   790
            vec_st(vbits, 0, dst);  /* store it back out. */
icculus@1047
   791
            dst += 4;
icculus@1047
   792
            vbits = voverflow;
icculus@1047
   793
        }
icculus@1047
   794
        
icculus@1047
   795
        assert(width == 0);
icculus@1047
   796
icculus@1047
   797
        /* cover pixels at the end of the row that didn't fit in 16 bytes. */
icculus@1047
   798
        while (extrawidth) {
icculus@1047
   799
            bits = *(src++);  /* max 7 pixels, don't bother with prefetch. */
icculus@1047
   800
            RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
icculus@1047
   801
            *(dst++) = MAKE8888(dstfmt, r, g, b, a);
icculus@1047
   802
            extrawidth--;
icculus@1047
   803
        }
icculus@1047
   804
icculus@1047
   805
        src += srcskip >> 2;  /* move to next row, accounting for pitch. */
icculus@1047
   806
        dst += dstskip >> 2;
icculus@1047
   807
    }
icculus@1047
   808
icculus@1047
   809
    vec_dss(DST_CHAN_SRC);
icculus@1047
   810
    vec_dss(DST_CHAN_DEST);
icculus@1047
   811
}
icculus@1047
   812
icculus@1047
   813
static Uint32 GetBlitFeatures( void )
icculus@1047
   814
{
icculus@1047
   815
    static Uint32 features = 0xffffffff;
icculus@1047
   816
    if (features == 0xffffffff) {
icculus@1047
   817
        /* Provide an override for testing .. */
slouken@1336
   818
        char *override = SDL_getenv("SDL_ALTIVEC_BLIT_FEATURES");
icculus@1047
   819
        if (override) {
icculus@1047
   820
            features = 0;
slouken@1336
   821
            SDL_sscanf(override, "%u", &features);
icculus@1047
   822
        } else {
icculus@1047
   823
            features = ( 0
icculus@1047
   824
                /* Feature 1 is has-MMX */
icculus@1047
   825
                | ((SDL_HasMMX()) ? 1 : 0)
icculus@1047
   826
                /* Feature 2 is has-AltiVec */
icculus@1047
   827
                | ((SDL_HasAltiVec()) ? 2 : 0)
icculus@1047
   828
                /* Feature 4 is dont-use-prefetch */
icculus@1162
   829
                /* !!!! FIXME: Check for G5 or later, not the cache size! Always prefetch on a G4. */
icculus@1047
   830
                | ((GetL3CacheSize() == 0) ? 4 : 0)
icculus@1047
   831
            );
icculus@1047
   832
        }
icculus@1047
   833
    }
icculus@1047
   834
    return features;
icculus@1047
   835
}
slouken@1795
   836
#if __MWERKS__
slouken@1795
   837
#pragma altivec_model off
slouken@1795
   838
#endif
icculus@1047
   839
#else
icculus@1047
   840
/* Feature 1 is has-MMX */
icculus@1047
   841
#define GetBlitFeatures() ((Uint32)(SDL_HasMMX() ? 1 : 0))
icculus@1047
   842
#endif
icculus@1047
   843
icculus@1053
   844
/* This is now endian dependent */
slouken@1443
   845
#if SDL_BYTEORDER == SDL_LIL_ENDIAN
icculus@1053
   846
#define HI	1
icculus@1053
   847
#define LO	0
slouken@1443
   848
#else /* SDL_BYTEORDER == SDL_BIG_ENDIAN */
icculus@1053
   849
#define HI	0
icculus@1053
   850
#define LO	1
icculus@1053
   851
#endif
icculus@1053
   852
slouken@1361
   853
#if SDL_HERMES_BLITTERS
slouken@0
   854
slouken@0
   855
/* Heheheh, we coerce Hermes into using SDL blit information */
slouken@0
   856
#define X86_ASSEMBLER
slouken@0
   857
#define HermesConverterInterface	SDL_BlitInfo
slouken@0
   858
#define HermesClearInterface		void
slouken@0
   859
#define STACKCALL
slouken@0
   860
slouken@1361
   861
#include "../hermes/HeadMMX.h"
slouken@1361
   862
#include "../hermes/HeadX86.h"
slouken@0
   863
slouken@0
   864
#else
slouken@0
   865
slouken@0
   866
/* Special optimized blit for RGB 8-8-8 --> RGB 3-3-2 */
slouken@0
   867
#define RGB888_RGB332(dst, src) { \
slouken@1428
   868
	dst = (Uint8)((((src)&0x00E00000)>>16)| \
slouken@1428
   869
	              (((src)&0x0000E000)>>11)| \
slouken@1428
   870
	              (((src)&0x000000C0)>>6)); \
slouken@0
   871
}
slouken@0
   872
static void Blit_RGB888_index8(SDL_BlitInfo *info)
slouken@0
   873
{
slouken@0
   874
#ifndef USE_DUFFS_LOOP
slouken@0
   875
	int c;
slouken@0
   876
#endif
slouken@0
   877
	int width, height;
slouken@0
   878
	Uint32 *src;
slouken@91
   879
	const Uint8 *map;
slouken@91
   880
	Uint8 *dst;
slouken@0
   881
	int srcskip, dstskip;
slouken@0
   882
slouken@0
   883
	/* Set up some basic variables */
slouken@0
   884
	width = info->d_width;
slouken@0
   885
	height = info->d_height;
slouken@0
   886
	src = (Uint32 *)info->s_pixels;
slouken@0
   887
	srcskip = info->s_skip/4;
slouken@0
   888
	dst = info->d_pixels;
slouken@0
   889
	dstskip = info->d_skip;
slouken@0
   890
	map = info->table;
slouken@0
   891
slouken@0
   892
	if ( map == NULL ) {
slouken@0
   893
		while ( height-- ) {
slouken@0
   894
#ifdef USE_DUFFS_LOOP
slouken@0
   895
			DUFFS_LOOP(
slouken@0
   896
				RGB888_RGB332(*dst++, *src);
slouken@0
   897
			, width);
slouken@0
   898
#else
slouken@0
   899
			for ( c=width/4; c; --c ) {
slouken@0
   900
				/* Pack RGB into 8bit pixel */
slouken@0
   901
				++src;
slouken@0
   902
				RGB888_RGB332(*dst++, *src);
slouken@0
   903
				++src;
slouken@0
   904
				RGB888_RGB332(*dst++, *src);
slouken@0
   905
				++src;
slouken@0
   906
				RGB888_RGB332(*dst++, *src);
slouken@0
   907
				++src;
slouken@0
   908
			}
slouken@91
   909
			switch ( width & 3 ) {
slouken@0
   910
				case 3:
slouken@0
   911
					RGB888_RGB332(*dst++, *src);
slouken@0
   912
					++src;
slouken@0
   913
				case 2:
slouken@0
   914
					RGB888_RGB332(*dst++, *src);
slouken@0
   915
					++src;
slouken@0
   916
				case 1:
slouken@0
   917
					RGB888_RGB332(*dst++, *src);
slouken@0
   918
					++src;
slouken@0
   919
			}
slouken@0
   920
#endif /* USE_DUFFS_LOOP */
slouken@0
   921
			src += srcskip;
slouken@0
   922
			dst += dstskip;
slouken@0
   923
		}
slouken@0
   924
	} else {
icculus@1162
   925
		int Pixel;
slouken@0
   926
slouken@0
   927
		while ( height-- ) {
slouken@0
   928
#ifdef USE_DUFFS_LOOP
slouken@0
   929
			DUFFS_LOOP(
icculus@1162
   930
				RGB888_RGB332(Pixel, *src);
icculus@1162
   931
				*dst++ = map[Pixel];
slouken@0
   932
				++src;
slouken@0
   933
			, width);
slouken@0
   934
#else
slouken@0
   935
			for ( c=width/4; c; --c ) {
slouken@0
   936
				/* Pack RGB into 8bit pixel */
icculus@1162
   937
				RGB888_RGB332(Pixel, *src);
icculus@1162
   938
				*dst++ = map[Pixel];
slouken@0
   939
				++src;
icculus@1162
   940
				RGB888_RGB332(Pixel, *src);
icculus@1162
   941
				*dst++ = map[Pixel];
slouken@0
   942
				++src;
icculus@1162
   943
				RGB888_RGB332(Pixel, *src);
icculus@1162
   944
				*dst++ = map[Pixel];
slouken@0
   945
				++src;
icculus@1162
   946
				RGB888_RGB332(Pixel, *src);
icculus@1162
   947
				*dst++ = map[Pixel];
slouken@0
   948
				++src;
slouken@0
   949
			}
slouken@91
   950
			switch ( width & 3 ) {
slouken@0
   951
				case 3:
icculus@1162
   952
					RGB888_RGB332(Pixel, *src);
icculus@1162
   953
					*dst++ = map[Pixel];
slouken@0
   954
					++src;
slouken@0
   955
				case 2:
icculus@1162
   956
					RGB888_RGB332(Pixel, *src);
icculus@1162
   957
					*dst++ = map[Pixel];
slouken@0
   958
					++src;
slouken@0
   959
				case 1:
icculus@1162
   960
					RGB888_RGB332(Pixel, *src);
icculus@1162
   961
					*dst++ = map[Pixel];
slouken@0
   962
					++src;
slouken@0
   963
			}
slouken@0
   964
#endif /* USE_DUFFS_LOOP */
slouken@0
   965
			src += srcskip;
slouken@0
   966
			dst += dstskip;
slouken@0
   967
		}
slouken@0
   968
	}
slouken@0
   969
}
slouken@0
   970
/* Special optimized blit for RGB 8-8-8 --> RGB 5-5-5 */
slouken@0
   971
#define RGB888_RGB555(dst, src) { \
slouken@1428
   972
	*(Uint16 *)(dst) = (Uint16)((((*src)&0x00F80000)>>9)| \
slouken@1428
   973
	                            (((*src)&0x0000F800)>>6)| \
slouken@1428
   974
	                            (((*src)&0x000000F8)>>3)); \
slouken@0
   975
}
slouken@0
   976
#define RGB888_RGB555_TWO(dst, src) { \
slouken@0
   977
	*(Uint32 *)(dst) = (((((src[HI])&0x00F80000)>>9)| \
slouken@0
   978
	                     (((src[HI])&0x0000F800)>>6)| \
slouken@0
   979
	                     (((src[HI])&0x000000F8)>>3))<<16)| \
slouken@0
   980
	                     (((src[LO])&0x00F80000)>>9)| \
slouken@0
   981
	                     (((src[LO])&0x0000F800)>>6)| \
slouken@0
   982
	                     (((src[LO])&0x000000F8)>>3); \
slouken@0
   983
}
slouken@0
   984
static void Blit_RGB888_RGB555(SDL_BlitInfo *info)
slouken@0
   985
{
slouken@0
   986
#ifndef USE_DUFFS_LOOP
slouken@0
   987
	int c;
slouken@0
   988
#endif
slouken@0
   989
	int width, height;
slouken@0
   990
	Uint32 *src;
slouken@0
   991
	Uint16 *dst;
slouken@0
   992
	int srcskip, dstskip;
slouken@0
   993
slouken@0
   994
	/* Set up some basic variables */
slouken@0
   995
	width = info->d_width;
slouken@0
   996
	height = info->d_height;
slouken@0
   997
	src = (Uint32 *)info->s_pixels;
slouken@0
   998
	srcskip = info->s_skip/4;
slouken@0
   999
	dst = (Uint16 *)info->d_pixels;
slouken@0
  1000
	dstskip = info->d_skip/2;
slouken@0
  1001
slouken@0
  1002
#ifdef USE_DUFFS_LOOP
slouken@0
  1003
	while ( height-- ) {
slouken@0
  1004
		DUFFS_LOOP(
slouken@0
  1005
			RGB888_RGB555(dst, src);
slouken@0
  1006
			++src;
slouken@0
  1007
			++dst;
slouken@0
  1008
		, width);
slouken@0
  1009
		src += srcskip;
slouken@0
  1010
		dst += dstskip;
slouken@0
  1011
	}
slouken@0
  1012
#else
slouken@0
  1013
	/* Memory align at 4-byte boundary, if necessary */
slouken@0
  1014
	if ( (long)dst & 0x03 ) {
slouken@0
  1015
		/* Don't do anything if width is 0 */
slouken@0
  1016
		if ( width == 0 ) {
slouken@0
  1017
			return;
slouken@0
  1018
		}
slouken@0
  1019
		--width;
slouken@0
  1020
slouken@0
  1021
		while ( height-- ) {
slouken@0
  1022
			/* Perform copy alignment */
slouken@0
  1023
			RGB888_RGB555(dst, src);
slouken@0
  1024
			++src;
slouken@0
  1025
			++dst;
slouken@0
  1026
slouken@0
  1027
			/* Copy in 4 pixel chunks */
slouken@0
  1028
			for ( c=width/4; c; --c ) {
slouken@0
  1029
				RGB888_RGB555_TWO(dst, src);
slouken@0
  1030
				src += 2;
slouken@0
  1031
				dst += 2;
slouken@0
  1032
				RGB888_RGB555_TWO(dst, src);
slouken@0
  1033
				src += 2;
slouken@0
  1034
				dst += 2;
slouken@0
  1035
			}
slouken@0
  1036
			/* Get any leftovers */
slouken@91
  1037
			switch (width & 3) {
slouken@0
  1038
				case 3:
slouken@0
  1039
					RGB888_RGB555(dst, src);
slouken@0
  1040
					++src;
slouken@0
  1041
					++dst;
slouken@0
  1042
				case 2:
slouken@0
  1043
					RGB888_RGB555_TWO(dst, src);
slouken@0
  1044
					src += 2;
slouken@0
  1045
					dst += 2;
slouken@0
  1046
					break;
slouken@0
  1047
				case 1:
slouken@0
  1048
					RGB888_RGB555(dst, src);
slouken@0
  1049
					++src;
slouken@0
  1050
					++dst;
slouken@0
  1051
					break;
slouken@0
  1052
			}
slouken@0
  1053
			src += srcskip;
slouken@0
  1054
			dst += dstskip;
slouken@0
  1055
		}
slouken@0
  1056
	} else { 
slouken@0
  1057
		while ( height-- ) {
slouken@0
  1058
			/* Copy in 4 pixel chunks */
slouken@0
  1059
			for ( c=width/4; c; --c ) {
slouken@0
  1060
				RGB888_RGB555_TWO(dst, src);
slouken@0
  1061
				src += 2;
slouken@0
  1062
				dst += 2;
slouken@0
  1063
				RGB888_RGB555_TWO(dst, src);
slouken@0
  1064
				src += 2;
slouken@0
  1065
				dst += 2;
slouken@0
  1066
			}
slouken@0
  1067
			/* Get any leftovers */
slouken@91
  1068
			switch (width & 3) {
slouken@0
  1069
				case 3:
slouken@0
  1070
					RGB888_RGB555(dst, src);
slouken@0
  1071
					++src;
slouken@0
  1072
					++dst;
slouken@0
  1073
				case 2:
slouken@0
  1074
					RGB888_RGB555_TWO(dst, src);
slouken@0
  1075
					src += 2;
slouken@0
  1076
					dst += 2;
slouken@0
  1077
					break;
slouken@0
  1078
				case 1:
slouken@0
  1079
					RGB888_RGB555(dst, src);
slouken@0
  1080
					++src;
slouken@0
  1081
					++dst;
slouken@0
  1082
					break;
slouken@0
  1083
			}
slouken@0
  1084
			src += srcskip;
slouken@0
  1085
			dst += dstskip;
slouken@0
  1086
		}
slouken@0
  1087
	}
slouken@0
  1088
#endif /* USE_DUFFS_LOOP */
slouken@0
  1089
}
slouken@0
  1090
/* Special optimized blit for RGB 8-8-8 --> RGB 5-6-5 */
slouken@0
  1091
#define RGB888_RGB565(dst, src) { \
slouken@1428
  1092
	*(Uint16 *)(dst) = (Uint16)((((*src)&0x00F80000)>>8)| \
slouken@1428
  1093
	                            (((*src)&0x0000FC00)>>5)| \
slouken@1428
  1094
	                            (((*src)&0x000000F8)>>3)); \
slouken@0
  1095
}
slouken@0
  1096
#define RGB888_RGB565_TWO(dst, src) { \
slouken@0
  1097
	*(Uint32 *)(dst) = (((((src[HI])&0x00F80000)>>8)| \
slouken@0
  1098
	                     (((src[HI])&0x0000FC00)>>5)| \
slouken@0
  1099
	                     (((src[HI])&0x000000F8)>>3))<<16)| \
slouken@0
  1100
	                     (((src[LO])&0x00F80000)>>8)| \
slouken@0
  1101
	                     (((src[LO])&0x0000FC00)>>5)| \
slouken@0
  1102
	                     (((src[LO])&0x000000F8)>>3); \
slouken@0
  1103
}
slouken@0
  1104
static void Blit_RGB888_RGB565(SDL_BlitInfo *info)
slouken@0
  1105
{
slouken@0
  1106
#ifndef USE_DUFFS_LOOP
slouken@0
  1107
	int c;
slouken@0
  1108
#endif
slouken@0
  1109
	int width, height;
slouken@0
  1110
	Uint32 *src;
slouken@0
  1111
	Uint16 *dst;
slouken@0
  1112
	int srcskip, dstskip;
slouken@0
  1113
slouken@0
  1114
	/* Set up some basic variables */
slouken@0
  1115
	width = info->d_width;
slouken@0
  1116
	height = info->d_height;
slouken@0
  1117
	src = (Uint32 *)info->s_pixels;
slouken@0
  1118
	srcskip = info->s_skip/4;
slouken@0
  1119
	dst = (Uint16 *)info->d_pixels;
slouken@0
  1120
	dstskip = info->d_skip/2;
slouken@0
  1121
slouken@0
  1122
#ifdef USE_DUFFS_LOOP
slouken@0
  1123
	while ( height-- ) {
slouken@0
  1124
		DUFFS_LOOP(
slouken@0
  1125
			RGB888_RGB565(dst, src);
slouken@0
  1126
			++src;
slouken@0
  1127
			++dst;
slouken@0
  1128
		, width);
slouken@0
  1129
		src += srcskip;
slouken@0
  1130
		dst += dstskip;
slouken@0
  1131
	}
slouken@0
  1132
#else
slouken@0
  1133
	/* Memory align at 4-byte boundary, if necessary */
slouken@0
  1134
	if ( (long)dst & 0x03 ) {
slouken@0
  1135
		/* Don't do anything if width is 0 */
slouken@0
  1136
		if ( width == 0 ) {
slouken@0
  1137
			return;
slouken@0
  1138
		}
slouken@0
  1139
		--width;
slouken@0
  1140
slouken@0
  1141
		while ( height-- ) {
slouken@0
  1142
			/* Perform copy alignment */
slouken@0
  1143
			RGB888_RGB565(dst, src);
slouken@0
  1144
			++src;
slouken@0
  1145
			++dst;
slouken@0
  1146
slouken@0
  1147
			/* Copy in 4 pixel chunks */
slouken@0
  1148
			for ( c=width/4; c; --c ) {
slouken@0
  1149
				RGB888_RGB565_TWO(dst, src);
slouken@0
  1150
				src += 2;
slouken@0
  1151
				dst += 2;
slouken@0
  1152
				RGB888_RGB565_TWO(dst, src);
slouken@0
  1153
				src += 2;
slouken@0
  1154
				dst += 2;
slouken@0
  1155
			}
slouken@0
  1156
			/* Get any leftovers */
slouken@91
  1157
			switch (width & 3) {
slouken@0
  1158
				case 3:
slouken@0
  1159
					RGB888_RGB565(dst, src);
slouken@0
  1160
					++src;
slouken@0
  1161
					++dst;
slouken@0
  1162
				case 2:
slouken@0
  1163
					RGB888_RGB565_TWO(dst, src);
slouken@0
  1164
					src += 2;
slouken@0
  1165
					dst += 2;
slouken@0
  1166
					break;
slouken@0
  1167
				case 1:
slouken@0
  1168
					RGB888_RGB565(dst, src);
slouken@0
  1169
					++src;
slouken@0
  1170
					++dst;
slouken@0
  1171
					break;
slouken@0
  1172
			}
slouken@0
  1173
			src += srcskip;
slouken@0
  1174
			dst += dstskip;
slouken@0
  1175
		}
slouken@0
  1176
	} else { 
slouken@0
  1177
		while ( height-- ) {
slouken@0
  1178
			/* Copy in 4 pixel chunks */
slouken@0
  1179
			for ( c=width/4; c; --c ) {
slouken@0
  1180
				RGB888_RGB565_TWO(dst, src);
slouken@0
  1181
				src += 2;
slouken@0
  1182
				dst += 2;
slouken@0
  1183
				RGB888_RGB565_TWO(dst, src);
slouken@0
  1184
				src += 2;
slouken@0
  1185
				dst += 2;
slouken@0
  1186
			}
slouken@0
  1187
			/* Get any leftovers */
slouken@91
  1188
			switch (width & 3) {
slouken@0
  1189
				case 3:
slouken@0
  1190
					RGB888_RGB565(dst, src);
slouken@0
  1191
					++src;
slouken@0
  1192
					++dst;
slouken@0
  1193
				case 2:
slouken@0
  1194
					RGB888_RGB565_TWO(dst, src);
slouken@0
  1195
					src += 2;
slouken@0
  1196
					dst += 2;
slouken@0
  1197
					break;
slouken@0
  1198
				case 1:
slouken@0
  1199
					RGB888_RGB565(dst, src);
slouken@0
  1200
					++src;
slouken@0
  1201
					++dst;
slouken@0
  1202
					break;
slouken@0
  1203
			}
slouken@0
  1204
			src += srcskip;
slouken@0
  1205
			dst += dstskip;
slouken@0
  1206
		}
slouken@0
  1207
	}
slouken@0
  1208
#endif /* USE_DUFFS_LOOP */
slouken@0
  1209
}
slouken@0
  1210
slouken@1361
  1211
#endif /* SDL_HERMES_BLITTERS */
slouken@0
  1212
slouken@0
  1213
slouken@0
  1214
/* Special optimized blit for RGB 5-6-5 --> 32-bit RGB surfaces */
icculus@1047
  1215
#define RGB565_32(dst, src, map) (map[src[LO]*2] + map[src[HI]*2+1])
slouken@91
  1216
static void Blit_RGB565_32(SDL_BlitInfo *info, const Uint32 *map)
slouken@0
  1217
{
slouken@0
  1218
#ifndef USE_DUFFS_LOOP
slouken@0
  1219
	int c;
slouken@0
  1220
#endif
slouken@0
  1221
	int width, height;
slouken@0
  1222
	Uint8 *src;
slouken@0
  1223
	Uint32 *dst;
slouken@0
  1224
	int srcskip, dstskip;
slouken@0
  1225
slouken@0
  1226
	/* Set up some basic variables */
slouken@0
  1227
	width = info->d_width;
slouken@0
  1228
	height = info->d_height;
slouken@0
  1229
	src = (Uint8 *)info->s_pixels;
slouken@0
  1230
	srcskip = info->s_skip;
slouken@0
  1231
	dst = (Uint32 *)info->d_pixels;
slouken@0
  1232
	dstskip = info->d_skip/4;
slouken@0
  1233
slouken@0
  1234
#ifdef USE_DUFFS_LOOP
slouken@0
  1235
	while ( height-- ) {
slouken@0
  1236
		DUFFS_LOOP(
slouken@0
  1237
		{
slouken@0
  1238
			*dst++ = RGB565_32(dst, src, map);
slouken@0
  1239
			src += 2;
slouken@0
  1240
		},
slouken@0
  1241
		width);
slouken@0
  1242
		src += srcskip;
slouken@0
  1243
		dst += dstskip;
slouken@0
  1244
	}
slouken@0
  1245
#else
slouken@0
  1246
	while ( height-- ) {
slouken@0
  1247
		/* Copy in 4 pixel chunks */
slouken@0
  1248
		for ( c=width/4; c; --c ) {
slouken@0
  1249
			*dst++ = RGB565_32(dst, src, map);
slouken@0
  1250
			src += 2;
slouken@0
  1251
			*dst++ = RGB565_32(dst, src, map);
slouken@0
  1252
			src += 2;
slouken@0
  1253
			*dst++ = RGB565_32(dst, src, map);
slouken@0
  1254
			src += 2;
slouken@0
  1255
			*dst++ = RGB565_32(dst, src, map);
slouken@0
  1256
			src += 2;
slouken@0
  1257
		}
slouken@0
  1258
		/* Get any leftovers */
slouken@91
  1259
		switch (width & 3) {
slouken@0
  1260
			case 3:
slouken@0
  1261
				*dst++ = RGB565_32(dst, src, map);
slouken@0
  1262
				src += 2;
slouken@0
  1263
			case 2:
slouken@0
  1264
				*dst++ = RGB565_32(dst, src, map);
slouken@0
  1265
				src += 2;
slouken@0
  1266
			case 1:
slouken@0
  1267
				*dst++ = RGB565_32(dst, src, map);
slouken@0
  1268
				src += 2;
slouken@0
  1269
				break;
slouken@0
  1270
		}
slouken@0
  1271
		src += srcskip;
slouken@0
  1272
		dst += dstskip;
slouken@0
  1273
	}
slouken@0
  1274
#endif /* USE_DUFFS_LOOP */
slouken@0
  1275
}
slouken@0
  1276
slouken@0
  1277
/* Special optimized blit for RGB 5-6-5 --> ARGB 8-8-8-8 */
slouken@91
  1278
static const Uint32 RGB565_ARGB8888_LUT[512] = {
slouken@0
  1279
		0x00000000, 0xff000000, 0x00000008, 0xff002000,
slouken@0
  1280
		0x00000010, 0xff004000, 0x00000018, 0xff006100,
slouken@0
  1281
		0x00000020, 0xff008100, 0x00000029, 0xff00a100,
slouken@0
  1282
		0x00000031, 0xff00c200, 0x00000039, 0xff00e200,
slouken@0
  1283
		0x00000041, 0xff080000, 0x0000004a, 0xff082000,
slouken@0
  1284
		0x00000052, 0xff084000, 0x0000005a, 0xff086100,
slouken@0
  1285
		0x00000062, 0xff088100, 0x0000006a, 0xff08a100,
slouken@0
  1286
		0x00000073, 0xff08c200, 0x0000007b, 0xff08e200,
slouken@0
  1287
		0x00000083, 0xff100000, 0x0000008b, 0xff102000,
slouken@0
  1288
		0x00000094, 0xff104000, 0x0000009c, 0xff106100,
slouken@0
  1289
		0x000000a4, 0xff108100, 0x000000ac, 0xff10a100,
slouken@0
  1290
		0x000000b4, 0xff10c200, 0x000000bd, 0xff10e200,
slouken@0
  1291
		0x000000c5, 0xff180000, 0x000000cd, 0xff182000,
slouken@0
  1292
		0x000000d5, 0xff184000, 0x000000de, 0xff186100,
slouken@0
  1293
		0x000000e6, 0xff188100, 0x000000ee, 0xff18a100,
slouken@0
  1294
		0x000000f6, 0xff18c200, 0x000000ff, 0xff18e200,
slouken@0
  1295
		0x00000400, 0xff200000, 0x00000408, 0xff202000,
slouken@0
  1296
		0x00000410, 0xff204000, 0x00000418, 0xff206100,
slouken@0
  1297
		0x00000420, 0xff208100, 0x00000429, 0xff20a100,
slouken@0
  1298
		0x00000431, 0xff20c200, 0x00000439, 0xff20e200,
slouken@0
  1299
		0x00000441, 0xff290000, 0x0000044a, 0xff292000,
slouken@0
  1300
		0x00000452, 0xff294000, 0x0000045a, 0xff296100,
slouken@0
  1301
		0x00000462, 0xff298100, 0x0000046a, 0xff29a100,
slouken@0
  1302
		0x00000473, 0xff29c200, 0x0000047b, 0xff29e200,
slouken@0
  1303
		0x00000483, 0xff310000, 0x0000048b, 0xff312000,
slouken@0
  1304
		0x00000494, 0xff314000, 0x0000049c, 0xff316100,
slouken@0
  1305
		0x000004a4, 0xff318100, 0x000004ac, 0xff31a100,
slouken@0
  1306
		0x000004b4, 0xff31c200, 0x000004bd, 0xff31e200,
slouken@0
  1307
		0x000004c5, 0xff390000, 0x000004cd, 0xff392000,
slouken@0
  1308
		0x000004d5, 0xff394000, 0x000004de, 0xff396100,
slouken@0
  1309
		0x000004e6, 0xff398100, 0x000004ee, 0xff39a100,
slouken@0
  1310
		0x000004f6, 0xff39c200, 0x000004ff, 0xff39e200,
slouken@0
  1311
		0x00000800, 0xff410000, 0x00000808, 0xff412000,
slouken@0
  1312
		0x00000810, 0xff414000, 0x00000818, 0xff416100,
slouken@0
  1313
		0x00000820, 0xff418100, 0x00000829, 0xff41a100,
slouken@0
  1314
		0x00000831, 0xff41c200, 0x00000839, 0xff41e200,
slouken@0
  1315
		0x00000841, 0xff4a0000, 0x0000084a, 0xff4a2000,
slouken@0
  1316
		0x00000852, 0xff4a4000, 0x0000085a, 0xff4a6100,
slouken@0
  1317
		0x00000862, 0xff4a8100, 0x0000086a, 0xff4aa100,
slouken@0
  1318
		0x00000873, 0xff4ac200, 0x0000087b, 0xff4ae200,
slouken@0
  1319
		0x00000883, 0xff520000, 0x0000088b, 0xff522000,
slouken@0
  1320
		0x00000894, 0xff524000, 0x0000089c, 0xff526100,
slouken@0
  1321
		0x000008a4, 0xff528100, 0x000008ac, 0xff52a100,
slouken@0
  1322
		0x000008b4, 0xff52c200, 0x000008bd, 0xff52e200,
slouken@0
  1323
		0x000008c5, 0xff5a0000, 0x000008cd, 0xff5a2000,
slouken@0
  1324
		0x000008d5, 0xff5a4000, 0x000008de, 0xff5a6100,
slouken@0
  1325
		0x000008e6, 0xff5a8100, 0x000008ee, 0xff5aa100,
slouken@0
  1326
		0x000008f6, 0xff5ac200, 0x000008ff, 0xff5ae200,
slouken@0
  1327
		0x00000c00, 0xff620000, 0x00000c08, 0xff622000,
slouken@0
  1328
		0x00000c10, 0xff624000, 0x00000c18, 0xff626100,
slouken@0
  1329
		0x00000c20, 0xff628100, 0x00000c29, 0xff62a100,
slouken@0
  1330
		0x00000c31, 0xff62c200, 0x00000c39, 0xff62e200,
slouken@0
  1331
		0x00000c41, 0xff6a0000, 0x00000c4a, 0xff6a2000,
slouken@0
  1332
		0x00000c52, 0xff6a4000, 0x00000c5a, 0xff6a6100,
slouken@0
  1333
		0x00000c62, 0xff6a8100, 0x00000c6a, 0xff6aa100,
slouken@0
  1334
		0x00000c73, 0xff6ac200, 0x00000c7b, 0xff6ae200,
slouken@0
  1335
		0x00000c83, 0xff730000, 0x00000c8b, 0xff732000,
slouken@0
  1336
		0x00000c94, 0xff734000, 0x00000c9c, 0xff736100,
slouken@0
  1337
		0x00000ca4, 0xff738100, 0x00000cac, 0xff73a100,
slouken@0
  1338
		0x00000cb4, 0xff73c200, 0x00000cbd, 0xff73e200,
slouken@0
  1339
		0x00000cc5, 0xff7b0000, 0x00000ccd, 0xff7b2000,
slouken@0
  1340
		0x00000cd5, 0xff7b4000, 0x00000cde, 0xff7b6100,
slouken@0
  1341
		0x00000ce6, 0xff7b8100, 0x00000cee, 0xff7ba100,
slouken@0
  1342
		0x00000cf6, 0xff7bc200, 0x00000cff, 0xff7be200,
slouken@0
  1343
		0x00001000, 0xff830000, 0x00001008, 0xff832000,
slouken@0
  1344
		0x00001010, 0xff834000, 0x00001018, 0xff836100,
slouken@0
  1345
		0x00001020, 0xff838100, 0x00001029, 0xff83a100,
slouken@0
  1346
		0x00001031, 0xff83c200, 0x00001039, 0xff83e200,
slouken@0
  1347
		0x00001041, 0xff8b0000, 0x0000104a, 0xff8b2000,
slouken@0
  1348
		0x00001052, 0xff8b4000, 0x0000105a, 0xff8b6100,
slouken@0
  1349
		0x00001062, 0xff8b8100, 0x0000106a, 0xff8ba100,
slouken@0
  1350
		0x00001073, 0xff8bc200, 0x0000107b, 0xff8be200,
slouken@0
  1351
		0x00001083, 0xff940000, 0x0000108b, 0xff942000,
slouken@0
  1352
		0x00001094, 0xff944000, 0x0000109c, 0xff946100,
slouken@0
  1353
		0x000010a4, 0xff948100, 0x000010ac, 0xff94a100,
slouken@0
  1354
		0x000010b4, 0xff94c200, 0x000010bd, 0xff94e200,
slouken@0
  1355
		0x000010c5, 0xff9c0000, 0x000010cd, 0xff9c2000,
slouken@0
  1356
		0x000010d5, 0xff9c4000, 0x000010de, 0xff9c6100,
slouken@0
  1357
		0x000010e6, 0xff9c8100, 0x000010ee, 0xff9ca100,
slouken@0
  1358
		0x000010f6, 0xff9cc200, 0x000010ff, 0xff9ce200,
slouken@0
  1359
		0x00001400, 0xffa40000, 0x00001408, 0xffa42000,
slouken@0
  1360
		0x00001410, 0xffa44000, 0x00001418, 0xffa46100,
slouken@0
  1361
		0x00001420, 0xffa48100, 0x00001429, 0xffa4a100,
slouken@0
  1362
		0x00001431, 0xffa4c200, 0x00001439, 0xffa4e200,
slouken@0
  1363
		0x00001441, 0xffac0000, 0x0000144a, 0xffac2000,
slouken@0
  1364
		0x00001452, 0xffac4000, 0x0000145a, 0xffac6100,
slouken@0
  1365
		0x00001462, 0xffac8100, 0x0000146a, 0xffaca100,
slouken@0
  1366
		0x00001473, 0xffacc200, 0x0000147b, 0xfface200,
slouken@0
  1367
		0x00001483, 0xffb40000, 0x0000148b, 0xffb42000,
slouken@0
  1368
		0x00001494, 0xffb44000, 0x0000149c, 0xffb46100,
slouken@0
  1369
		0x000014a4, 0xffb48100, 0x000014ac, 0xffb4a100,
slouken@0
  1370
		0x000014b4, 0xffb4c200, 0x000014bd, 0xffb4e200,
slouken@0
  1371
		0x000014c5, 0xffbd0000, 0x000014cd, 0xffbd2000,
slouken@0
  1372
		0x000014d5, 0xffbd4000, 0x000014de, 0xffbd6100,
slouken@0
  1373
		0x000014e6, 0xffbd8100, 0x000014ee, 0xffbda100,
slouken@0
  1374
		0x000014f6, 0xffbdc200, 0x000014ff, 0xffbde200,
slouken@0
  1375
		0x00001800, 0xffc50000, 0x00001808, 0xffc52000,
slouken@0
  1376
		0x00001810, 0xffc54000, 0x00001818, 0xffc56100,
slouken@0
  1377
		0x00001820, 0xffc58100, 0x00001829, 0xffc5a100,
slouken@0
  1378
		0x00001831, 0xffc5c200, 0x00001839, 0xffc5e200,
slouken@0
  1379
		0x00001841, 0xffcd0000, 0x0000184a, 0xffcd2000,
slouken@0
  1380
		0x00001852, 0xffcd4000, 0x0000185a, 0xffcd6100,
slouken@0
  1381
		0x00001862, 0xffcd8100, 0x0000186a, 0xffcda100,
slouken@0
  1382
		0x00001873, 0xffcdc200, 0x0000187b, 0xffcde200,
slouken@0
  1383
		0x00001883, 0xffd50000, 0x0000188b, 0xffd52000,
slouken@0
  1384
		0x00001894, 0xffd54000, 0x0000189c, 0xffd56100,
slouken@0
  1385
		0x000018a4, 0xffd58100, 0x000018ac, 0xffd5a100,
slouken@0
  1386
		0x000018b4, 0xffd5c200, 0x000018bd, 0xffd5e200,
slouken@0
  1387
		0x000018c5, 0xffde0000, 0x000018cd, 0xffde2000,
slouken@0
  1388
		0x000018d5, 0xffde4000, 0x000018de, 0xffde6100,
slouken@0
  1389
		0x000018e6, 0xffde8100, 0x000018ee, 0xffdea100,
slouken@0
  1390
		0x000018f6, 0xffdec200, 0x000018ff, 0xffdee200,
slouken@0
  1391
		0x00001c00, 0xffe60000, 0x00001c08, 0xffe62000,
slouken@0
  1392
		0x00001c10, 0xffe64000, 0x00001c18, 0xffe66100,
slouken@0
  1393
		0x00001c20, 0xffe68100, 0x00001c29, 0xffe6a100,
slouken@0
  1394
		0x00001c31, 0xffe6c200, 0x00001c39, 0xffe6e200,
slouken@0
  1395
		0x00001c41, 0xffee0000, 0x00001c4a, 0xffee2000,
slouken@0
  1396
		0x00001c52, 0xffee4000, 0x00001c5a, 0xffee6100,
slouken@0
  1397
		0x00001c62, 0xffee8100, 0x00001c6a, 0xffeea100,
slouken@0
  1398
		0x00001c73, 0xffeec200, 0x00001c7b, 0xffeee200,
slouken@0
  1399
		0x00001c83, 0xfff60000, 0x00001c8b, 0xfff62000,
slouken@0
  1400
		0x00001c94, 0xfff64000, 0x00001c9c, 0xfff66100,
slouken@0
  1401
		0x00001ca4, 0xfff68100, 0x00001cac, 0xfff6a100,
slouken@0
  1402
		0x00001cb4, 0xfff6c200, 0x00001cbd, 0xfff6e200,
slouken@0
  1403
		0x00001cc5, 0xffff0000, 0x00001ccd, 0xffff2000,
slouken@0
  1404
		0x00001cd5, 0xffff4000, 0x00001cde, 0xffff6100,
slouken@0
  1405
		0x00001ce6, 0xffff8100, 0x00001cee, 0xffffa100,
slouken@0
  1406
		0x00001cf6, 0xffffc200, 0x00001cff, 0xffffe200
slouken@0
  1407
};
slouken@0
  1408
static void Blit_RGB565_ARGB8888(SDL_BlitInfo *info)
slouken@0
  1409
{
slouken@0
  1410
    Blit_RGB565_32(info, RGB565_ARGB8888_LUT);
slouken@0
  1411
}
slouken@0
  1412
slouken@0
  1413
/* Special optimized blit for RGB 5-6-5 --> ABGR 8-8-8-8 */
slouken@91
  1414
static const Uint32 RGB565_ABGR8888_LUT[512] = {
slouken@0
  1415
		0xff000000, 0x00000000, 0xff080000, 0x00002000,
slouken@0
  1416
		0xff100000, 0x00004000, 0xff180000, 0x00006100,
slouken@0
  1417
		0xff200000, 0x00008100, 0xff290000, 0x0000a100,
slouken@0
  1418
		0xff310000, 0x0000c200, 0xff390000, 0x0000e200,
slouken@0
  1419
		0xff410000, 0x00000008, 0xff4a0000, 0x00002008,
slouken@0
  1420
		0xff520000, 0x00004008, 0xff5a0000, 0x00006108,
slouken@0
  1421
		0xff620000, 0x00008108, 0xff6a0000, 0x0000a108,
slouken@0
  1422
		0xff730000, 0x0000c208, 0xff7b0000, 0x0000e208,
slouken@0
  1423
		0xff830000, 0x00000010, 0xff8b0000, 0x00002010,
slouken@0
  1424
		0xff940000, 0x00004010, 0xff9c0000, 0x00006110,
slouken@0
  1425
		0xffa40000, 0x00008110, 0xffac0000, 0x0000a110,
slouken@0
  1426
		0xffb40000, 0x0000c210, 0xffbd0000, 0x0000e210,
slouken@0
  1427
		0xffc50000, 0x00000018, 0xffcd0000, 0x00002018,
slouken@0
  1428
		0xffd50000, 0x00004018, 0xffde0000, 0x00006118,
slouken@0
  1429
		0xffe60000, 0x00008118, 0xffee0000, 0x0000a118,
slouken@0
  1430
		0xfff60000, 0x0000c218, 0xffff0000, 0x0000e218,
slouken@0
  1431
		0xff000400, 0x00000020, 0xff080400, 0x00002020,
slouken@0
  1432
		0xff100400, 0x00004020, 0xff180400, 0x00006120,
slouken@0
  1433
		0xff200400, 0x00008120, 0xff290400, 0x0000a120,
slouken@0
  1434
		0xff310400, 0x0000c220, 0xff390400, 0x0000e220,
slouken@0
  1435
		0xff410400, 0x00000029, 0xff4a0400, 0x00002029,
slouken@0
  1436
		0xff520400, 0x00004029, 0xff5a0400, 0x00006129,
slouken@0
  1437
		0xff620400, 0x00008129, 0xff6a0400, 0x0000a129,
slouken@0
  1438
		0xff730400, 0x0000c229, 0xff7b0400, 0x0000e229,
slouken@0
  1439
		0xff830400, 0x00000031, 0xff8b0400, 0x00002031,
slouken@0
  1440
		0xff940400, 0x00004031, 0xff9c0400, 0x00006131,
slouken@0
  1441
		0xffa40400, 0x00008131, 0xffac0400, 0x0000a131,
slouken@0
  1442
		0xffb40400, 0x0000c231, 0xffbd0400, 0x0000e231,
slouken@0
  1443
		0xffc50400, 0x00000039, 0xffcd0400, 0x00002039,
slouken@0
  1444
		0xffd50400, 0x00004039, 0xffde0400, 0x00006139,
slouken@0
  1445
		0xffe60400, 0x00008139, 0xffee0400, 0x0000a139,
slouken@0
  1446
		0xfff60400, 0x0000c239, 0xffff0400, 0x0000e239,
slouken@0
  1447
		0xff000800, 0x00000041, 0xff080800, 0x00002041,
slouken@0
  1448
		0xff100800, 0x00004041, 0xff180800, 0x00006141,
slouken@0
  1449
		0xff200800, 0x00008141, 0xff290800, 0x0000a141,
slouken@0
  1450
		0xff310800, 0x0000c241, 0xff390800, 0x0000e241,
slouken@0
  1451
		0xff410800, 0x0000004a, 0xff4a0800, 0x0000204a,
slouken@0
  1452
		0xff520800, 0x0000404a, 0xff5a0800, 0x0000614a,
slouken@0
  1453
		0xff620800, 0x0000814a, 0xff6a0800, 0x0000a14a,
slouken@0
  1454
		0xff730800, 0x0000c24a, 0xff7b0800, 0x0000e24a,
slouken@0
  1455
		0xff830800, 0x00000052, 0xff8b0800, 0x00002052,
slouken@0
  1456
		0xff940800, 0x00004052, 0xff9c0800, 0x00006152,
slouken@0
  1457
		0xffa40800, 0x00008152, 0xffac0800, 0x0000a152,
slouken@0
  1458
		0xffb40800, 0x0000c252, 0xffbd0800, 0x0000e252,
slouken@0
  1459
		0xffc50800, 0x0000005a, 0xffcd0800, 0x0000205a,
slouken@0
  1460
		0xffd50800, 0x0000405a, 0xffde0800, 0x0000615a,
slouken@0
  1461
		0xffe60800, 0x0000815a, 0xffee0800, 0x0000a15a,
slouken@0
  1462
		0xfff60800, 0x0000c25a, 0xffff0800, 0x0000e25a,
slouken@0
  1463
		0xff000c00, 0x00000062, 0xff080c00, 0x00002062,
slouken@0
  1464
		0xff100c00, 0x00004062, 0xff180c00, 0x00006162,
slouken@0
  1465
		0xff200c00, 0x00008162, 0xff290c00, 0x0000a162,
slouken@0
  1466
		0xff310c00, 0x0000c262, 0xff390c00, 0x0000e262,
slouken@0
  1467
		0xff410c00, 0x0000006a, 0xff4a0c00, 0x0000206a,
slouken@0
  1468
		0xff520c00, 0x0000406a, 0xff5a0c00, 0x0000616a,
slouken@0
  1469
		0xff620c00, 0x0000816a, 0xff6a0c00, 0x0000a16a,
slouken@0
  1470
		0xff730c00, 0x0000c26a, 0xff7b0c00, 0x0000e26a,
slouken@0
  1471
		0xff830c00, 0x00000073, 0xff8b0c00, 0x00002073,
slouken@0
  1472
		0xff940c00, 0x00004073, 0xff9c0c00, 0x00006173,
slouken@0
  1473
		0xffa40c00, 0x00008173, 0xffac0c00, 0x0000a173,
slouken@0
  1474
		0xffb40c00, 0x0000c273, 0xffbd0c00, 0x0000e273,
slouken@0
  1475
		0xffc50c00, 0x0000007b, 0xffcd0c00, 0x0000207b,
slouken@0
  1476
		0xffd50c00, 0x0000407b, 0xffde0c00, 0x0000617b,
slouken@0
  1477
		0xffe60c00, 0x0000817b, 0xffee0c00, 0x0000a17b,
slouken@0
  1478
		0xfff60c00, 0x0000c27b, 0xffff0c00, 0x0000e27b,
slouken@0
  1479
		0xff001000, 0x00000083, 0xff081000, 0x00002083,
slouken@0
  1480
		0xff101000, 0x00004083, 0xff181000, 0x00006183,
slouken@0
  1481
		0xff201000, 0x00008183, 0xff291000, 0x0000a183,
slouken@0
  1482
		0xff311000, 0x0000c283, 0xff391000, 0x0000e283,
slouken@0
  1483
		0xff411000, 0x0000008b, 0xff4a1000, 0x0000208b,
slouken@0
  1484
		0xff521000, 0x0000408b, 0xff5a1000, 0x0000618b,
slouken@0
  1485
		0xff621000, 0x0000818b, 0xff6a1000, 0x0000a18b,
slouken@0
  1486
		0xff731000, 0x0000c28b, 0xff7b1000, 0x0000e28b,
slouken@0
  1487
		0xff831000, 0x00000094, 0xff8b1000, 0x00002094,
slouken@0
  1488
		0xff941000, 0x00004094, 0xff9c1000, 0x00006194,
slouken@0
  1489
		0xffa41000, 0x00008194, 0xffac1000, 0x0000a194,
slouken@0
  1490
		0xffb41000, 0x0000c294, 0xffbd1000, 0x0000e294,
slouken@0
  1491
		0xffc51000, 0x0000009c, 0xffcd1000, 0x0000209c,
slouken@0
  1492
		0xffd51000, 0x0000409c, 0xffde1000, 0x0000619c,
slouken@0
  1493
		0xffe61000, 0x0000819c, 0xffee1000, 0x0000a19c,
slouken@0
  1494
		0xfff61000, 0x0000c29c, 0xffff1000, 0x0000e29c,
slouken@0
  1495
		0xff001400, 0x000000a4, 0xff081400, 0x000020a4,
slouken@0
  1496
		0xff101400, 0x000040a4, 0xff181400, 0x000061a4,
slouken@0
  1497
		0xff201400, 0x000081a4, 0xff291400, 0x0000a1a4,
slouken@0
  1498
		0xff311400, 0x0000c2a4, 0xff391400, 0x0000e2a4,
slouken@0
  1499
		0xff411400, 0x000000ac, 0xff4a1400, 0x000020ac,
slouken@0
  1500
		0xff521400, 0x000040ac, 0xff5a1400, 0x000061ac,
slouken@0
  1501
		0xff621400, 0x000081ac, 0xff6a1400, 0x0000a1ac,
slouken@0
  1502
		0xff731400, 0x0000c2ac, 0xff7b1400, 0x0000e2ac,
slouken@0
  1503
		0xff831400, 0x000000b4, 0xff8b1400, 0x000020b4,
slouken@0
  1504
		0xff941400, 0x000040b4, 0xff9c1400, 0x000061b4,
slouken@0
  1505
		0xffa41400, 0x000081b4, 0xffac1400, 0x0000a1b4,
slouken@0
  1506
		0xffb41400, 0x0000c2b4, 0xffbd1400, 0x0000e2b4,
slouken@0
  1507
		0xffc51400, 0x000000bd, 0xffcd1400, 0x000020bd,
slouken@0
  1508
		0xffd51400, 0x000040bd, 0xffde1400, 0x000061bd,
slouken@0
  1509
		0xffe61400, 0x000081bd, 0xffee1400, 0x0000a1bd,
slouken@0
  1510
		0xfff61400, 0x0000c2bd, 0xffff1400, 0x0000e2bd,
slouken@0
  1511
		0xff001800, 0x000000c5, 0xff081800, 0x000020c5,
slouken@0
  1512
		0xff101800, 0x000040c5, 0xff181800, 0x000061c5,
slouken@0
  1513
		0xff201800, 0x000081c5, 0xff291800, 0x0000a1c5,
slouken@0
  1514
		0xff311800, 0x0000c2c5, 0xff391800, 0x0000e2c5,
slouken@0
  1515
		0xff411800, 0x000000cd, 0xff4a1800, 0x000020cd,
slouken@0
  1516
		0xff521800, 0x000040cd, 0xff5a1800, 0x000061cd,
slouken@0
  1517
		0xff621800, 0x000081cd, 0xff6a1800, 0x0000a1cd,
slouken@0
  1518
		0xff731800, 0x0000c2cd, 0xff7b1800, 0x0000e2cd,
slouken@0
  1519
		0xff831800, 0x000000d5, 0xff8b1800, 0x000020d5,
slouken@0
  1520
		0xff941800, 0x000040d5, 0xff9c1800, 0x000061d5,
slouken@0
  1521
		0xffa41800, 0x000081d5, 0xffac1800, 0x0000a1d5,
slouken@0
  1522
		0xffb41800, 0x0000c2d5, 0xffbd1800, 0x0000e2d5,
slouken@0
  1523
		0xffc51800, 0x000000de, 0xffcd1800, 0x000020de,
slouken@0
  1524
		0xffd51800, 0x000040de, 0xffde1800, 0x000061de,
slouken@0
  1525
		0xffe61800, 0x000081de, 0xffee1800, 0x0000a1de,
slouken@0
  1526
		0xfff61800, 0x0000c2de, 0xffff1800, 0x0000e2de,
slouken@0
  1527
		0xff001c00, 0x000000e6, 0xff081c00, 0x000020e6,
slouken@0
  1528
		0xff101c00, 0x000040e6, 0xff181c00, 0x000061e6,
slouken@0
  1529
		0xff201c00, 0x000081e6, 0xff291c00, 0x0000a1e6,
slouken@0
  1530
		0xff311c00, 0x0000c2e6, 0xff391c00, 0x0000e2e6,
slouken@0
  1531
		0xff411c00, 0x000000ee, 0xff4a1c00, 0x000020ee,
slouken@0
  1532
		0xff521c00, 0x000040ee, 0xff5a1c00, 0x000061ee,
slouken@0
  1533
		0xff621c00, 0x000081ee, 0xff6a1c00, 0x0000a1ee,
slouken@0
  1534
		0xff731c00, 0x0000c2ee, 0xff7b1c00, 0x0000e2ee,
slouken@0
  1535
		0xff831c00, 0x000000f6, 0xff8b1c00, 0x000020f6,
slouken@0
  1536
		0xff941c00, 0x000040f6, 0xff9c1c00, 0x000061f6,
slouken@0
  1537
		0xffa41c00, 0x000081f6, 0xffac1c00, 0x0000a1f6,
slouken@0
  1538
		0xffb41c00, 0x0000c2f6, 0xffbd1c00, 0x0000e2f6,
slouken@0
  1539
		0xffc51c00, 0x000000ff, 0xffcd1c00, 0x000020ff,
slouken@0
  1540
		0xffd51c00, 0x000040ff, 0xffde1c00, 0x000061ff,
slouken@0
  1541
		0xffe61c00, 0x000081ff, 0xffee1c00, 0x0000a1ff,
slouken@0
  1542
		0xfff61c00, 0x0000c2ff, 0xffff1c00, 0x0000e2ff
slouken@0
  1543
};
slouken@0
  1544
static void Blit_RGB565_ABGR8888(SDL_BlitInfo *info)
slouken@0
  1545
{
slouken@0
  1546
    Blit_RGB565_32(info, RGB565_ABGR8888_LUT);
slouken@0
  1547
}
slouken@0
  1548
slouken@0
  1549
/* Special optimized blit for RGB 5-6-5 --> RGBA 8-8-8-8 */
slouken@91
  1550
static const Uint32 RGB565_RGBA8888_LUT[512] = {
slouken@0
  1551
		0x000000ff, 0x00000000, 0x000008ff, 0x00200000,
slouken@0
  1552
		0x000010ff, 0x00400000, 0x000018ff, 0x00610000,
slouken@0
  1553
		0x000020ff, 0x00810000, 0x000029ff, 0x00a10000,
slouken@0
  1554
		0x000031ff, 0x00c20000, 0x000039ff, 0x00e20000,
slouken@0
  1555
		0x000041ff, 0x08000000, 0x00004aff, 0x08200000,
slouken@0
  1556
		0x000052ff, 0x08400000, 0x00005aff, 0x08610000,
slouken@0
  1557
		0x000062ff, 0x08810000, 0x00006aff, 0x08a10000,
slouken@0
  1558
		0x000073ff, 0x08c20000, 0x00007bff, 0x08e20000,
slouken@0
  1559
		0x000083ff, 0x10000000, 0x00008bff, 0x10200000,
slouken@0
  1560
		0x000094ff, 0x10400000, 0x00009cff, 0x10610000,
slouken@0
  1561
		0x0000a4ff, 0x10810000, 0x0000acff, 0x10a10000,
slouken@0
  1562
		0x0000b4ff, 0x10c20000, 0x0000bdff, 0x10e20000,
slouken@0
  1563
		0x0000c5ff, 0x18000000, 0x0000cdff, 0x18200000,
slouken@0
  1564
		0x0000d5ff, 0x18400000, 0x0000deff, 0x18610000,
slouken@0
  1565
		0x0000e6ff, 0x18810000, 0x0000eeff, 0x18a10000,
slouken@0
  1566
		0x0000f6ff, 0x18c20000, 0x0000ffff, 0x18e20000,
slouken@0
  1567
		0x000400ff, 0x20000000, 0x000408ff, 0x20200000,
slouken@0
  1568
		0x000410ff, 0x20400000, 0x000418ff, 0x20610000,
slouken@0
  1569
		0x000420ff, 0x20810000, 0x000429ff, 0x20a10000,
slouken@0
  1570
		0x000431ff, 0x20c20000, 0x000439ff, 0x20e20000,
slouken@0
  1571
		0x000441ff, 0x29000000, 0x00044aff, 0x29200000,
slouken@0
  1572
		0x000452ff, 0x29400000, 0x00045aff, 0x29610000,
slouken@0
  1573
		0x000462ff, 0x29810000, 0x00046aff, 0x29a10000,
slouken@0
  1574
		0x000473ff, 0x29c20000, 0x00047bff, 0x29e20000,
slouken@0
  1575
		0x000483ff, 0x31000000, 0x00048bff, 0x31200000,
slouken@0
  1576
		0x000494ff, 0x31400000, 0x00049cff, 0x31610000,
slouken@0
  1577
		0x0004a4ff, 0x31810000, 0x0004acff, 0x31a10000,
slouken@0
  1578
		0x0004b4ff, 0x31c20000, 0x0004bdff, 0x31e20000,
slouken@0
  1579
		0x0004c5ff, 0x39000000, 0x0004cdff, 0x39200000,
slouken@0
  1580
		0x0004d5ff, 0x39400000, 0x0004deff, 0x39610000,
slouken@0
  1581
		0x0004e6ff, 0x39810000, 0x0004eeff, 0x39a10000,
slouken@0
  1582
		0x0004f6ff, 0x39c20000, 0x0004ffff, 0x39e20000,
slouken@0
  1583
		0x000800ff, 0x41000000, 0x000808ff, 0x41200000,
slouken@0
  1584
		0x000810ff, 0x41400000, 0x000818ff, 0x41610000,
slouken@0
  1585
		0x000820ff, 0x41810000, 0x000829ff, 0x41a10000,
slouken@0
  1586
		0x000831ff, 0x41c20000, 0x000839ff, 0x41e20000,
slouken@0
  1587
		0x000841ff, 0x4a000000, 0x00084aff, 0x4a200000,
slouken@0
  1588
		0x000852ff, 0x4a400000, 0x00085aff, 0x4a610000,
slouken@0
  1589
		0x000862ff, 0x4a810000, 0x00086aff, 0x4aa10000,
slouken@0
  1590
		0x000873ff, 0x4ac20000, 0x00087bff, 0x4ae20000,
slouken@0
  1591
		0x000883ff, 0x52000000, 0x00088bff, 0x52200000,
slouken@0
  1592
		0x000894ff, 0x52400000, 0x00089cff, 0x52610000,
slouken@0
  1593
		0x0008a4ff, 0x52810000, 0x0008acff, 0x52a10000,
slouken@0
  1594
		0x0008b4ff, 0x52c20000, 0x0008bdff, 0x52e20000,
slouken@0
  1595
		0x0008c5ff, 0x5a000000, 0x0008cdff, 0x5a200000,
slouken@0
  1596
		0x0008d5ff, 0x5a400000, 0x0008deff, 0x5a610000,
slouken@0
  1597
		0x0008e6ff, 0x5a810000, 0x0008eeff, 0x5aa10000,
slouken@0
  1598
		0x0008f6ff, 0x5ac20000, 0x0008ffff, 0x5ae20000,
slouken@0
  1599
		0x000c00ff, 0x62000000, 0x000c08ff, 0x62200000,
slouken@0
  1600
		0x000c10ff, 0x62400000, 0x000c18ff, 0x62610000,
slouken@0
  1601
		0x000c20ff, 0x62810000, 0x000c29ff, 0x62a10000,
slouken@0
  1602
		0x000c31ff, 0x62c20000, 0x000c39ff, 0x62e20000,
slouken@0
  1603
		0x000c41ff, 0x6a000000, 0x000c4aff, 0x6a200000,
slouken@0
  1604
		0x000c52ff, 0x6a400000, 0x000c5aff, 0x6a610000,
slouken@0
  1605
		0x000c62ff, 0x6a810000, 0x000c6aff, 0x6aa10000,
slouken@0
  1606
		0x000c73ff, 0x6ac20000, 0x000c7bff, 0x6ae20000,
slouken@0
  1607
		0x000c83ff, 0x73000000, 0x000c8bff, 0x73200000,
slouken@0
  1608
		0x000c94ff, 0x73400000, 0x000c9cff, 0x73610000,
slouken@0
  1609
		0x000ca4ff, 0x73810000, 0x000cacff, 0x73a10000,
slouken@0
  1610
		0x000cb4ff, 0x73c20000, 0x000cbdff, 0x73e20000,
slouken@0
  1611
		0x000cc5ff, 0x7b000000, 0x000ccdff, 0x7b200000,
slouken@0
  1612
		0x000cd5ff, 0x7b400000, 0x000cdeff, 0x7b610000,
slouken@0
  1613
		0x000ce6ff, 0x7b810000, 0x000ceeff, 0x7ba10000,
slouken@0
  1614
		0x000cf6ff, 0x7bc20000, 0x000cffff, 0x7be20000,
slouken@0
  1615
		0x001000ff, 0x83000000, 0x001008ff, 0x83200000,
slouken@0
  1616
		0x001010ff, 0x83400000, 0x001018ff, 0x83610000,
slouken@0
  1617
		0x001020ff, 0x83810000, 0x001029ff, 0x83a10000,
slouken@0
  1618
		0x001031ff, 0x83c20000, 0x001039ff, 0x83e20000,
slouken@0
  1619
		0x001041ff, 0x8b000000, 0x00104aff, 0x8b200000,
slouken@0
  1620
		0x001052ff, 0x8b400000, 0x00105aff, 0x8b610000,
slouken@0
  1621
		0x001062ff, 0x8b810000, 0x00106aff, 0x8ba10000,
slouken@0
  1622
		0x001073ff, 0x8bc20000, 0x00107bff, 0x8be20000,
slouken@0
  1623
		0x001083ff, 0x94000000, 0x00108bff, 0x94200000,
slouken@0
  1624
		0x001094ff, 0x94400000, 0x00109cff, 0x94610000,
slouken@0
  1625
		0x0010a4ff, 0x94810000, 0x0010acff, 0x94a10000,
slouken@0
  1626
		0x0010b4ff, 0x94c20000, 0x0010bdff, 0x94e20000,
slouken@0
  1627
		0x0010c5ff, 0x9c000000, 0x0010cdff, 0x9c200000,
slouken@0
  1628
		0x0010d5ff, 0x9c400000, 0x0010deff, 0x9c610000,
slouken@0
  1629
		0x0010e6ff, 0x9c810000, 0x0010eeff, 0x9ca10000,
slouken@0
  1630
		0x0010f6ff, 0x9cc20000, 0x0010ffff, 0x9ce20000,
slouken@0
  1631
		0x001400ff, 0xa4000000, 0x001408ff, 0xa4200000,
slouken@0
  1632
		0x001410ff, 0xa4400000, 0x001418ff, 0xa4610000,
slouken@0
  1633
		0x001420ff, 0xa4810000, 0x001429ff, 0xa4a10000,
slouken@0
  1634
		0x001431ff, 0xa4c20000, 0x001439ff, 0xa4e20000,
slouken@0
  1635
		0x001441ff, 0xac000000, 0x00144aff, 0xac200000,
slouken@0
  1636
		0x001452ff, 0xac400000, 0x00145aff, 0xac610000,
slouken@0
  1637
		0x001462ff, 0xac810000, 0x00146aff, 0xaca10000,
slouken@0
  1638
		0x001473ff, 0xacc20000, 0x00147bff, 0xace20000,
slouken@0
  1639
		0x001483ff, 0xb4000000, 0x00148bff, 0xb4200000,
slouken@0
  1640
		0x001494ff, 0xb4400000, 0x00149cff, 0xb4610000,
slouken@0
  1641
		0x0014a4ff, 0xb4810000, 0x0014acff, 0xb4a10000,
slouken@0
  1642
		0x0014b4ff, 0xb4c20000, 0x0014bdff, 0xb4e20000,
slouken@0
  1643
		0x0014c5ff, 0xbd000000, 0x0014cdff, 0xbd200000,
slouken@0
  1644
		0x0014d5ff, 0xbd400000, 0x0014deff, 0xbd610000,
slouken@0
  1645
		0x0014e6ff, 0xbd810000, 0x0014eeff, 0xbda10000,
slouken@0
  1646
		0x0014f6ff, 0xbdc20000, 0x0014ffff, 0xbde20000,
slouken@0
  1647
		0x001800ff, 0xc5000000, 0x001808ff, 0xc5200000,
slouken@0
  1648
		0x001810ff, 0xc5400000, 0x001818ff, 0xc5610000,
slouken@0
  1649
		0x001820ff, 0xc5810000, 0x001829ff, 0xc5a10000,
slouken@0
  1650
		0x001831ff, 0xc5c20000, 0x001839ff, 0xc5e20000,
slouken@0
  1651
		0x001841ff, 0xcd000000, 0x00184aff, 0xcd200000,
slouken@0
  1652
		0x001852ff, 0xcd400000, 0x00185aff, 0xcd610000,
slouken@0
  1653
		0x001862ff, 0xcd810000, 0x00186aff, 0xcda10000,
slouken@0
  1654
		0x001873ff, 0xcdc20000, 0x00187bff, 0xcde20000,
slouken@0
  1655
		0x001883ff, 0xd5000000, 0x00188bff, 0xd5200000,
slouken@0
  1656
		0x001894ff, 0xd5400000, 0x00189cff, 0xd5610000,
slouken@0
  1657
		0x0018a4ff, 0xd5810000, 0x0018acff, 0xd5a10000,
slouken@0
  1658
		0x0018b4ff, 0xd5c20000, 0x0018bdff, 0xd5e20000,
slouken@0
  1659
		0x0018c5ff, 0xde000000, 0x0018cdff, 0xde200000,
slouken@0
  1660
		0x0018d5ff, 0xde400000, 0x0018deff, 0xde610000,
slouken@0
  1661
		0x0018e6ff, 0xde810000, 0x0018eeff, 0xdea10000,
slouken@0
  1662
		0x0018f6ff, 0xdec20000, 0x0018ffff, 0xdee20000,
slouken@0
  1663
		0x001c00ff, 0xe6000000, 0x001c08ff, 0xe6200000,
slouken@0
  1664
		0x001c10ff, 0xe6400000, 0x001c18ff, 0xe6610000,
slouken@0
  1665
		0x001c20ff, 0xe6810000, 0x001c29ff, 0xe6a10000,
slouken@0
  1666
		0x001c31ff, 0xe6c20000, 0x001c39ff, 0xe6e20000,
slouken@0
  1667
		0x001c41ff, 0xee000000, 0x001c4aff, 0xee200000,
slouken@0
  1668
		0x001c52ff, 0xee400000, 0x001c5aff, 0xee610000,
slouken@0
  1669
		0x001c62ff, 0xee810000, 0x001c6aff, 0xeea10000,
slouken@0
  1670
		0x001c73ff, 0xeec20000, 0x001c7bff, 0xeee20000,
slouken@0
  1671
		0x001c83ff, 0xf6000000, 0x001c8bff, 0xf6200000,
slouken@0
  1672
		0x001c94ff, 0xf6400000, 0x001c9cff, 0xf6610000,
slouken@0
  1673
		0x001ca4ff, 0xf6810000, 0x001cacff, 0xf6a10000,
slouken@0
  1674
		0x001cb4ff, 0xf6c20000, 0x001cbdff, 0xf6e20000,
slouken@0
  1675
		0x001cc5ff, 0xff000000, 0x001ccdff, 0xff200000,
slouken@0
  1676
		0x001cd5ff, 0xff400000, 0x001cdeff, 0xff610000,
slouken@0
  1677
		0x001ce6ff, 0xff810000, 0x001ceeff, 0xffa10000,
slouken@0
  1678
		0x001cf6ff, 0xffc20000, 0x001cffff, 0xffe20000,
slouken@0
  1679
};
slouken@0
  1680
static void Blit_RGB565_RGBA8888(SDL_BlitInfo *info)
slouken@0
  1681
{
slouken@0
  1682
    Blit_RGB565_32(info, RGB565_RGBA8888_LUT);
slouken@0
  1683
}
slouken@0
  1684
slouken@0
  1685
/* Special optimized blit for RGB 5-6-5 --> BGRA 8-8-8-8 */
slouken@91
  1686
static const Uint32 RGB565_BGRA8888_LUT[512] = {
slouken@0
  1687
		0x00000000, 0x000000ff, 0x08000000, 0x002000ff,
slouken@0
  1688
		0x10000000, 0x004000ff, 0x18000000, 0x006100ff,
slouken@0
  1689
		0x20000000, 0x008100ff, 0x29000000, 0x00a100ff,
slouken@0
  1690
		0x31000000, 0x00c200ff, 0x39000000, 0x00e200ff,
slouken@0
  1691
		0x41000000, 0x000008ff, 0x4a000000, 0x002008ff,
slouken@0
  1692
		0x52000000, 0x004008ff, 0x5a000000, 0x006108ff,
slouken@0
  1693
		0x62000000, 0x008108ff, 0x6a000000, 0x00a108ff,
slouken@0
  1694
		0x73000000, 0x00c208ff, 0x7b000000, 0x00e208ff,
slouken@0
  1695
		0x83000000, 0x000010ff, 0x8b000000, 0x002010ff,
slouken@0
  1696
		0x94000000, 0x004010ff, 0x9c000000, 0x006110ff,
slouken@0
  1697
		0xa4000000, 0x008110ff, 0xac000000, 0x00a110ff,
slouken@0
  1698
		0xb4000000, 0x00c210ff, 0xbd000000, 0x00e210ff,
slouken@0
  1699
		0xc5000000, 0x000018ff, 0xcd000000, 0x002018ff,
slouken@0
  1700
		0xd5000000, 0x004018ff, 0xde000000, 0x006118ff,
slouken@0
  1701
		0xe6000000, 0x008118ff, 0xee000000, 0x00a118ff,
slouken@0
  1702
		0xf6000000, 0x00c218ff, 0xff000000, 0x00e218ff,
slouken@0
  1703
		0x00040000, 0x000020ff, 0x08040000, 0x002020ff,
slouken@0
  1704
		0x10040000, 0x004020ff, 0x18040000, 0x006120ff,
slouken@0
  1705
		0x20040000, 0x008120ff, 0x29040000, 0x00a120ff,
slouken@0
  1706
		0x31040000, 0x00c220ff, 0x39040000, 0x00e220ff,
slouken@0
  1707
		0x41040000, 0x000029ff, 0x4a040000, 0x002029ff,
slouken@0
  1708
		0x52040000, 0x004029ff, 0x5a040000, 0x006129ff,
slouken@0
  1709
		0x62040000, 0x008129ff, 0x6a040000, 0x00a129ff,
slouken@0
  1710
		0x73040000, 0x00c229ff, 0x7b040000, 0x00e229ff,
slouken@0
  1711
		0x83040000, 0x000031ff, 0x8b040000, 0x002031ff,
slouken@0
  1712
		0x94040000, 0x004031ff, 0x9c040000, 0x006131ff,
slouken@0
  1713
		0xa4040000, 0x008131ff, 0xac040000, 0x00a131ff,
slouken@0
  1714
		0xb4040000, 0x00c231ff, 0xbd040000, 0x00e231ff,
slouken@0
  1715
		0xc5040000, 0x000039ff, 0xcd040000, 0x002039ff,
slouken@0
  1716
		0xd5040000, 0x004039ff, 0xde040000, 0x006139ff,
slouken@0
  1717
		0xe6040000, 0x008139ff, 0xee040000, 0x00a139ff,
slouken@0
  1718
		0xf6040000, 0x00c239ff, 0xff040000, 0x00e239ff,
slouken@0
  1719
		0x00080000, 0x000041ff, 0x08080000, 0x002041ff,
slouken@0
  1720
		0x10080000, 0x004041ff, 0x18080000, 0x006141ff,
slouken@0
  1721
		0x20080000, 0x008141ff, 0x29080000, 0x00a141ff,
slouken@0
  1722
		0x31080000, 0x00c241ff, 0x39080000, 0x00e241ff,
slouken@0
  1723
		0x41080000, 0x00004aff, 0x4a080000, 0x00204aff,
slouken@0
  1724
		0x52080000, 0x00404aff, 0x5a080000, 0x00614aff,
slouken@0
  1725
		0x62080000, 0x00814aff, 0x6a080000, 0x00a14aff,
slouken@0
  1726
		0x73080000, 0x00c24aff, 0x7b080000, 0x00e24aff,
slouken@0
  1727
		0x83080000, 0x000052ff, 0x8b080000, 0x002052ff,
slouken@0
  1728
		0x94080000, 0x004052ff, 0x9c080000, 0x006152ff,
slouken@0
  1729
		0xa4080000, 0x008152ff, 0xac080000, 0x00a152ff,
slouken@0
  1730
		0xb4080000, 0x00c252ff, 0xbd080000, 0x00e252ff,
slouken@0
  1731
		0xc5080000, 0x00005aff, 0xcd080000, 0x00205aff,
slouken@0
  1732
		0xd5080000, 0x00405aff, 0xde080000, 0x00615aff,
slouken@0
  1733
		0xe6080000, 0x00815aff, 0xee080000, 0x00a15aff,
slouken@0
  1734
		0xf6080000, 0x00c25aff, 0xff080000, 0x00e25aff,
slouken@0
  1735
		0x000c0000, 0x000062ff, 0x080c0000, 0x002062ff,
slouken@0
  1736
		0x100c0000, 0x004062ff, 0x180c0000, 0x006162ff,
slouken@0
  1737
		0x200c0000, 0x008162ff, 0x290c0000, 0x00a162ff,
slouken@0
  1738
		0x310c0000, 0x00c262ff, 0x390c0000, 0x00e262ff,
slouken@0
  1739
		0x410c0000, 0x00006aff, 0x4a0c0000, 0x00206aff,
slouken@0
  1740
		0x520c0000, 0x00406aff, 0x5a0c0000, 0x00616aff,
slouken@0
  1741
		0x620c0000, 0x00816aff, 0x6a0c0000, 0x00a16aff,
slouken@0
  1742
		0x730c0000, 0x00c26aff, 0x7b0c0000, 0x00e26aff,
slouken@0
  1743
		0x830c0000, 0x000073ff, 0x8b0c0000, 0x002073ff,
slouken@0
  1744
		0x940c0000, 0x004073ff, 0x9c0c0000, 0x006173ff,
slouken@0
  1745
		0xa40c0000, 0x008173ff, 0xac0c0000, 0x00a173ff,
slouken@0
  1746
		0xb40c0000, 0x00c273ff, 0xbd0c0000, 0x00e273ff,
slouken@0
  1747
		0xc50c0000, 0x00007bff, 0xcd0c0000, 0x00207bff,
slouken@0
  1748
		0xd50c0000, 0x00407bff, 0xde0c0000, 0x00617bff,
slouken@0
  1749
		0xe60c0000, 0x00817bff, 0xee0c0000, 0x00a17bff,
slouken@0
  1750
		0xf60c0000, 0x00c27bff, 0xff0c0000, 0x00e27bff,
slouken@0
  1751
		0x00100000, 0x000083ff, 0x08100000, 0x002083ff,
slouken@0
  1752
		0x10100000, 0x004083ff, 0x18100000, 0x006183ff,
slouken@0
  1753
		0x20100000, 0x008183ff, 0x29100000, 0x00a183ff,
slouken@0
  1754
		0x31100000, 0x00c283ff, 0x39100000, 0x00e283ff,
slouken@0
  1755
		0x41100000, 0x00008bff, 0x4a100000, 0x00208bff,
slouken@0
  1756
		0x52100000, 0x00408bff, 0x5a100000, 0x00618bff,
slouken@0
  1757
		0x62100000, 0x00818bff, 0x6a100000, 0x00a18bff,
slouken@0
  1758
		0x73100000, 0x00c28bff, 0x7b100000, 0x00e28bff,
slouken@0
  1759
		0x83100000, 0x000094ff, 0x8b100000, 0x002094ff,
slouken@0
  1760
		0x94100000, 0x004094ff, 0x9c100000, 0x006194ff,
slouken@0
  1761
		0xa4100000, 0x008194ff, 0xac100000, 0x00a194ff,
slouken@0
  1762
		0xb4100000, 0x00c294ff, 0xbd100000, 0x00e294ff,
slouken@0
  1763
		0xc5100000, 0x00009cff, 0xcd100000, 0x00209cff,
slouken@0
  1764
		0xd5100000, 0x00409cff, 0xde100000, 0x00619cff,
slouken@0
  1765
		0xe6100000, 0x00819cff, 0xee100000, 0x00a19cff,
slouken@0
  1766
		0xf6100000, 0x00c29cff, 0xff100000, 0x00e29cff,
slouken@0
  1767
		0x00140000, 0x0000a4ff, 0x08140000, 0x0020a4ff,
slouken@0
  1768
		0x10140000, 0x0040a4ff, 0x18140000, 0x0061a4ff,
slouken@0
  1769
		0x20140000, 0x0081a4ff, 0x29140000, 0x00a1a4ff,
slouken@0
  1770
		0x31140000, 0x00c2a4ff, 0x39140000, 0x00e2a4ff,
slouken@0
  1771
		0x41140000, 0x0000acff, 0x4a140000, 0x0020acff,
slouken@0
  1772
		0x52140000, 0x0040acff, 0x5a140000, 0x0061acff,
slouken@0
  1773
		0x62140000, 0x0081acff, 0x6a140000, 0x00a1acff,
slouken@0
  1774
		0x73140000, 0x00c2acff, 0x7b140000, 0x00e2acff,
slouken@0
  1775
		0x83140000, 0x0000b4ff, 0x8b140000, 0x0020b4ff,
slouken@0
  1776
		0x94140000, 0x0040b4ff, 0x9c140000, 0x0061b4ff,
slouken@0
  1777
		0xa4140000, 0x0081b4ff, 0xac140000, 0x00a1b4ff,
slouken@0
  1778
		0xb4140000, 0x00c2b4ff, 0xbd140000, 0x00e2b4ff,
slouken@0
  1779
		0xc5140000, 0x0000bdff, 0xcd140000, 0x0020bdff,
slouken@0
  1780
		0xd5140000, 0x0040bdff, 0xde140000, 0x0061bdff,
slouken@0
  1781
		0xe6140000, 0x0081bdff, 0xee140000, 0x00a1bdff,
slouken@0
  1782
		0xf6140000, 0x00c2bdff, 0xff140000, 0x00e2bdff,
slouken@0
  1783
		0x00180000, 0x0000c5ff, 0x08180000, 0x0020c5ff,
slouken@0
  1784
		0x10180000, 0x0040c5ff, 0x18180000, 0x0061c5ff,
slouken@0
  1785
		0x20180000, 0x0081c5ff, 0x29180000, 0x00a1c5ff,
slouken@0
  1786
		0x31180000, 0x00c2c5ff, 0x39180000, 0x00e2c5ff,
slouken@0
  1787
		0x41180000, 0x0000cdff, 0x4a180000, 0x0020cdff,
slouken@0
  1788
		0x52180000, 0x0040cdff, 0x5a180000, 0x0061cdff,
slouken@0
  1789
		0x62180000, 0x0081cdff, 0x6a180000, 0x00a1cdff,
slouken@0
  1790
		0x73180000, 0x00c2cdff, 0x7b180000, 0x00e2cdff,
slouken@0
  1791
		0x83180000, 0x0000d5ff, 0x8b180000, 0x0020d5ff,
slouken@0
  1792
		0x94180000, 0x0040d5ff, 0x9c180000, 0x0061d5ff,
slouken@0
  1793
		0xa4180000, 0x0081d5ff, 0xac180000, 0x00a1d5ff,
slouken@0
  1794
		0xb4180000, 0x00c2d5ff, 0xbd180000, 0x00e2d5ff,
slouken@0
  1795
		0xc5180000, 0x0000deff, 0xcd180000, 0x0020deff,
slouken@0
  1796
		0xd5180000, 0x0040deff, 0xde180000, 0x0061deff,
slouken@0
  1797
		0xe6180000, 0x0081deff, 0xee180000, 0x00a1deff,
slouken@0
  1798
		0xf6180000, 0x00c2deff, 0xff180000, 0x00e2deff,
slouken@0
  1799
		0x001c0000, 0x0000e6ff, 0x081c0000, 0x0020e6ff,
slouken@0
  1800
		0x101c0000, 0x0040e6ff, 0x181c0000, 0x0061e6ff,
slouken@0
  1801
		0x201c0000, 0x0081e6ff, 0x291c0000, 0x00a1e6ff,
slouken@0
  1802
		0x311c0000, 0x00c2e6ff, 0x391c0000, 0x00e2e6ff,
slouken@0
  1803
		0x411c0000, 0x0000eeff, 0x4a1c0000, 0x0020eeff,
slouken@0
  1804
		0x521c0000, 0x0040eeff, 0x5a1c0000, 0x0061eeff,
slouken@0
  1805
		0x621c0000, 0x0081eeff, 0x6a1c0000, 0x00a1eeff,
slouken@0
  1806
		0x731c0000, 0x00c2eeff, 0x7b1c0000, 0x00e2eeff,
slouken@0
  1807
		0x831c0000, 0x0000f6ff, 0x8b1c0000, 0x0020f6ff,
slouken@0
  1808
		0x941c0000, 0x0040f6ff, 0x9c1c0000, 0x0061f6ff,
slouken@0
  1809
		0xa41c0000, 0x0081f6ff, 0xac1c0000, 0x00a1f6ff,
slouken@0
  1810
		0xb41c0000, 0x00c2f6ff, 0xbd1c0000, 0x00e2f6ff,
slouken@0
  1811
		0xc51c0000, 0x0000ffff, 0xcd1c0000, 0x0020ffff,
slouken@0
  1812
		0xd51c0000, 0x0040ffff, 0xde1c0000, 0x0061ffff,
slouken@0
  1813
		0xe61c0000, 0x0081ffff, 0xee1c0000, 0x00a1ffff,
slouken@0
  1814
		0xf61c0000, 0x00c2ffff, 0xff1c0000, 0x00e2ffff
slouken@0
  1815
};
slouken@0
  1816
static void Blit_RGB565_BGRA8888(SDL_BlitInfo *info)
slouken@0
  1817
{
slouken@0
  1818
    Blit_RGB565_32(info, RGB565_BGRA8888_LUT);
slouken@0
  1819
}
slouken@0
  1820
slouken@0
  1821
/* Special optimized blit for RGB 8-8-8 --> RGB 3-3-2 */
slouken@0
  1822
#ifndef RGB888_RGB332
slouken@0
  1823
#define RGB888_RGB332(dst, src) { \
slouken@0
  1824
	dst = (((src)&0x00E00000)>>16)| \
slouken@0
  1825
	      (((src)&0x0000E000)>>11)| \
slouken@0
  1826
	      (((src)&0x000000C0)>>6); \
slouken@0
  1827
}
slouken@0
  1828
#endif
slouken@0
  1829
static void Blit_RGB888_index8_map(SDL_BlitInfo *info)
slouken@0
  1830
{
slouken@0
  1831
#ifndef USE_DUFFS_LOOP
slouken@0
  1832
	int c;
slouken@0
  1833
#endif
icculus@1162
  1834
	int Pixel;
slouken@0
  1835
	int width, height;
slouken@0
  1836
	Uint32 *src;
slouken@91
  1837
	const Uint8 *map;
slouken@91
  1838
	Uint8 *dst;
slouken@0
  1839
	int srcskip, dstskip;
slouken@0
  1840
slouken@0
  1841
	/* Set up some basic variables */
slouken@0
  1842
	width = info->d_width;
slouken@0
  1843
	height = info->d_height;
slouken@0
  1844
	src = (Uint32 *)info->s_pixels;
slouken@0
  1845
	srcskip = info->s_skip/4;
slouken@0
  1846
	dst = info->d_pixels;
slouken@0
  1847
	dstskip = info->d_skip;
slouken@0
  1848
	map = info->table;
slouken@0
  1849
slouken@0
  1850
#ifdef USE_DUFFS_LOOP
slouken@0
  1851
	while ( height-- ) {
slouken@0
  1852
		DUFFS_LOOP(
icculus@1162
  1853
			RGB888_RGB332(Pixel, *src);
icculus@1162
  1854
			*dst++ = map[Pixel];
slouken@0
  1855
			++src;
slouken@0
  1856
		, width);
slouken@0
  1857
		src += srcskip;
slouken@0
  1858
		dst += dstskip;
slouken@0
  1859
	}
slouken@0
  1860
#else
slouken@0
  1861
	while ( height-- ) {
slouken@0
  1862
		for ( c=width/4; c; --c ) {
slouken@0
  1863
			/* Pack RGB into 8bit pixel */
icculus@1162
  1864
			RGB888_RGB332(Pixel, *src);
icculus@1162
  1865
			*dst++ = map[Pixel];
slouken@0
  1866
			++src;
icculus@1162
  1867
			RGB888_RGB332(Pixel, *src);
icculus@1162
  1868
			*dst++ = map[Pixel];
slouken@0
  1869
			++src;
icculus@1162
  1870
			RGB888_RGB332(Pixel, *src);
icculus@1162
  1871
			*dst++ = map[Pixel];
slouken@0
  1872
			++src;
icculus@1162
  1873
			RGB888_RGB332(Pixel, *src);
icculus@1162
  1874
			*dst++ = map[Pixel];
slouken@0
  1875
			++src;
slouken@0
  1876
		}
slouken@91
  1877
		switch ( width & 3 ) {
slouken@0
  1878
			case 3:
icculus@1162
  1879
				RGB888_RGB332(Pixel, *src);
icculus@1162
  1880
				*dst++ = map[Pixel];
slouken@0
  1881
				++src;
slouken@0
  1882
			case 2:
icculus@1162
  1883
				RGB888_RGB332(Pixel, *src);
icculus@1162
  1884
				*dst++ = map[Pixel];
slouken@0
  1885
				++src;
slouken@0
  1886
			case 1:
icculus@1162
  1887
				RGB888_RGB332(Pixel, *src);
icculus@1162
  1888
				*dst++ = map[Pixel];
slouken@0
  1889
				++src;
slouken@0
  1890
		}
slouken@0
  1891
		src += srcskip;
slouken@0
  1892
		dst += dstskip;
slouken@0
  1893
	}
slouken@0
  1894
#endif /* USE_DUFFS_LOOP */
slouken@0
  1895
}
slouken@0
  1896
static void BlitNto1(SDL_BlitInfo *info)
slouken@0
  1897
{
slouken@0
  1898
#ifndef USE_DUFFS_LOOP
slouken@0
  1899
	int c;
slouken@0
  1900
#endif
slouken@0
  1901
	int width, height;
slouken@91
  1902
	Uint8 *src;
slouken@91
  1903
	const Uint8 *map;
slouken@91
  1904
	Uint8 *dst;
slouken@0
  1905
	int srcskip, dstskip;
slouken@0
  1906
	int srcbpp;
icculus@1162
  1907
	Uint32 Pixel;
slouken@0
  1908
	int  sR, sG, sB;
slouken@0
  1909
	SDL_PixelFormat *srcfmt;
slouken@0
  1910
slouken@0
  1911
	/* Set up some basic variables */
slouken@0
  1912
	width = info->d_width;
slouken@0
  1913
	height = info->d_height;
slouken@0
  1914
	src = info->s_pixels;
slouken@0
  1915
	srcskip = info->s_skip;
slouken@0
  1916
	dst = info->d_pixels;
slouken@0
  1917
	dstskip = info->d_skip;
slouken@0
  1918
	map = info->table;
slouken@0
  1919
	srcfmt = info->src;
slouken@0
  1920
	srcbpp = srcfmt->BytesPerPixel;
slouken@0
  1921
slouken@0
  1922
	if ( map == NULL ) {
slouken@0
  1923
		while ( height-- ) {
slouken@0
  1924
#ifdef USE_DUFFS_LOOP
slouken@0
  1925
			DUFFS_LOOP(
icculus@1162
  1926
				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
slouken@0
  1927
								sR, sG, sB);
slouken@0
  1928
				if ( 1 ) {
slouken@0
  1929
				  	/* Pack RGB into 8bit pixel */
slouken@0
  1930
				  	*dst = ((sR>>5)<<(3+2))|
slouken@0
  1931
					        ((sG>>5)<<(2)) |
slouken@0
  1932
					        ((sB>>6)<<(0)) ;
slouken@0
  1933
				}
slouken@0
  1934
				dst++;
slouken@0
  1935
				src += srcbpp;
slouken@0
  1936
			, width);
slouken@0
  1937
#else
slouken@0
  1938
			for ( c=width; c; --c ) {
icculus@1162
  1939
				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
slouken@0
  1940
								sR, sG, sB);
slouken@0
  1941
				if ( 1 ) {
slouken@0
  1942
				  	/* Pack RGB into 8bit pixel */
slouken@0
  1943
				  	*dst = ((sR>>5)<<(3+2))|
slouken@0
  1944
					        ((sG>>5)<<(2)) |
slouken@0
  1945
					        ((sB>>6)<<(0)) ;
slouken@0
  1946
				}
slouken@0
  1947
				dst++;
slouken@0
  1948
				src += srcbpp;
slouken@0
  1949
			}
slouken@0
  1950
#endif
slouken@0
  1951
			src += srcskip;
slouken@0
  1952
			dst += dstskip;
slouken@0
  1953
		}
slouken@0
  1954
	} else {
slouken@0
  1955
		while ( height-- ) {
slouken@0
  1956
#ifdef USE_DUFFS_LOOP
slouken@0
  1957
			DUFFS_LOOP(
icculus@1162
  1958
				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
slouken@0
  1959
								sR, sG, sB);
slouken@0
  1960
				if ( 1 ) {
slouken@0
  1961
				  	/* Pack RGB into 8bit pixel */
slouken@0
  1962
				  	*dst = map[((sR>>5)<<(3+2))|
slouken@0
  1963
						   ((sG>>5)<<(2))  |
slouken@0
  1964
						   ((sB>>6)<<(0))  ];
slouken@0
  1965
				}
slouken@0
  1966
				dst++;
slouken@0
  1967
				src += srcbpp;
slouken@0
  1968
			, width);
slouken@0
  1969
#else
slouken@0
  1970
			for ( c=width; c; --c ) {
icculus@1162
  1971
				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
slouken@0
  1972
								sR, sG, sB);
slouken@0
  1973
				if ( 1 ) {
slouken@0
  1974
				  	/* Pack RGB into 8bit pixel */
slouken@0
  1975
				  	*dst = map[((sR>>5)<<(3+2))|
slouken@0
  1976
						   ((sG>>5)<<(2))  |
slouken@0
  1977
						   ((sB>>6)<<(0))  ];
slouken@0
  1978
				}
slouken@0
  1979
				dst++;
slouken@0
  1980
				src += srcbpp;
slouken@0
  1981
			}
slouken@0
  1982
#endif /* USE_DUFFS_LOOP */
slouken@0
  1983
			src += srcskip;
slouken@0
  1984
			dst += dstskip;
slouken@0
  1985
		}
slouken@0
  1986
	}
slouken@0
  1987
}
icculus@1232
  1988
icculus@1232
  1989
/* blits 32 bit RGB<->RGBA with both surfaces having the same R,G,B fields */
icculus@1232
  1990
static void Blit4to4MaskAlpha(SDL_BlitInfo *info)
icculus@1232
  1991
{
icculus@1232
  1992
	int width = info->d_width;
icculus@1232
  1993
	int height = info->d_height;
icculus@1232
  1994
	Uint32 *src = (Uint32 *)info->s_pixels;
icculus@1232
  1995
	int srcskip = info->s_skip;
icculus@1232
  1996
	Uint32 *dst = (Uint32 *)info->d_pixels;
icculus@1232
  1997
	int dstskip = info->d_skip;
icculus@1232
  1998
	SDL_PixelFormat *srcfmt = info->src;
icculus@1232
  1999
	SDL_PixelFormat *dstfmt = info->dst;
icculus@1232
  2000
icculus@1232
  2001
	if (dstfmt->Amask) {
icculus@1232
  2002
		/* RGB->RGBA, SET_ALPHA */
icculus@1232
  2003
		Uint32 mask = (srcfmt->alpha >> dstfmt->Aloss) << dstfmt->Ashift;
icculus@1232
  2004
icculus@1232
  2005
		while ( height-- ) {
icculus@1232
  2006
			DUFFS_LOOP(
icculus@1232
  2007
			{
icculus@1232
  2008
				*dst = *src | mask;
icculus@1232
  2009
				++dst;
icculus@1232
  2010
				++src;
icculus@1232
  2011
			},
icculus@1232
  2012
			width);
icculus@1232
  2013
			src = (Uint32*)((Uint8*)src + srcskip);
icculus@1232
  2014
			dst = (Uint32*)((Uint8*)dst + dstskip);
icculus@1232
  2015
		}
icculus@1232
  2016
	} else {
icculus@1232
  2017
		/* RGBA->RGB, NO_ALPHA */
icculus@1232
  2018
		Uint32 mask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
icculus@1232
  2019
icculus@1232
  2020
		while ( height-- ) {
icculus@1232
  2021
			DUFFS_LOOP(
icculus@1232
  2022
			{
icculus@1232
  2023
				*dst = *src & mask;
icculus@1232
  2024
				++dst;
icculus@1232
  2025
				++src;
icculus@1232
  2026
			},
icculus@1232
  2027
			width);
icculus@1232
  2028
			src = (Uint32*)((Uint8*)src + srcskip);
icculus@1232
  2029
			dst = (Uint32*)((Uint8*)dst + dstskip);
icculus@1232
  2030
		}
icculus@1232
  2031
	}
icculus@1232
  2032
}
icculus@1232
  2033
slouken@0
  2034
static void BlitNtoN(SDL_BlitInfo *info)
slouken@0
  2035
{
slouken@0
  2036
	int width = info->d_width;
slouken@0
  2037
	int height = info->d_height;
slouken@0
  2038
	Uint8 *src = info->s_pixels;
slouken@0
  2039
	int srcskip = info->s_skip;
slouken@0
  2040
	Uint8 *dst = info->d_pixels;
slouken@0
  2041
	int dstskip = info->d_skip;
slouken@0
  2042
	SDL_PixelFormat *srcfmt = info->src;
slouken@0
  2043
	int srcbpp = srcfmt->BytesPerPixel;
slouken@0
  2044
	SDL_PixelFormat *dstfmt = info->dst;
slouken@0
  2045
	int dstbpp = dstfmt->BytesPerPixel;
slouken@431
  2046
	unsigned alpha = dstfmt->Amask ? srcfmt->alpha : 0;
slouken@0
  2047
slouken@0
  2048
	while ( height-- ) {
slouken@0
  2049
		DUFFS_LOOP(
slouken@0
  2050
		{
icculus@1162
  2051
		        Uint32 Pixel;
slouken@0
  2052
			unsigned sR;
slouken@0
  2053
			unsigned sG;
slouken@0
  2054
			unsigned sB;
icculus@1162
  2055
			DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
slouken@0
  2056
			ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, alpha);
slouken@0
  2057
			dst += dstbpp;
slouken@0
  2058
			src += srcbpp;
slouken@0
  2059
		},
slouken@0
  2060
		width);
slouken@0
  2061
		src += srcskip;
slouken@0
  2062
		dst += dstskip;
slouken@0
  2063
	}
slouken@0
  2064
}
slouken@0
  2065
slouken@0
  2066
static void BlitNtoNCopyAlpha(SDL_BlitInfo *info)
slouken@0
  2067
{
slouken@0
  2068
	int width = info->d_width;
slouken@0
  2069
	int height = info->d_height;
slouken@0
  2070
	Uint8 *src = info->s_pixels;
slouken@0
  2071
	int srcskip = info->s_skip;
slouken@0
  2072
	Uint8 *dst = info->d_pixels;
slouken@0
  2073
	int dstskip = info->d_skip;
slouken@0
  2074
	SDL_PixelFormat *srcfmt = info->src;
slouken@0
  2075
	int srcbpp = srcfmt->BytesPerPixel;
slouken@0
  2076
	SDL_PixelFormat *dstfmt = info->dst;
slouken@0
  2077
	int dstbpp = dstfmt->BytesPerPixel;
slouken@0
  2078
	int c;
slouken@0
  2079
slouken@0
  2080
	/* FIXME: should map alpha to [0..255] correctly! */
slouken@0
  2081
	while ( height-- ) {
slouken@0
  2082
		for ( c=width; c; --c ) {
icculus@1162
  2083
		        Uint32 Pixel;
slouken@0
  2084
			unsigned sR, sG, sB, sA;
icculus@1162
  2085
			DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel,
slouken@0
  2086
				      sR, sG, sB, sA);
slouken@0
  2087
			ASSEMBLE_RGBA(dst, dstbpp, dstfmt,
slouken@0
  2088
				      sR, sG, sB, sA);
slouken@0
  2089
			dst += dstbpp;
slouken@0
  2090
			src += srcbpp;
slouken@0
  2091
		}
slouken@0
  2092
		src += srcskip;
slouken@0
  2093
		dst += dstskip;
slouken@0
  2094
	}
slouken@0
  2095
}
slouken@0
  2096
slouken@0
  2097
static void BlitNto1Key(SDL_BlitInfo *info)
slouken@0
  2098
{
slouken@0
  2099
	int width = info->d_width;
slouken@0
  2100
	int height = info->d_height;
slouken@0
  2101
	Uint8 *src = info->s_pixels;
slouken@0
  2102
	int srcskip = info->s_skip;
slouken@0
  2103
	Uint8 *dst = info->d_pixels;
slouken@0
  2104
	int dstskip = info->d_skip;
slouken@0
  2105
	SDL_PixelFormat *srcfmt = info->src;
slouken@91
  2106
	const Uint8 *palmap = info->table;
slouken@0
  2107
	Uint32 ckey = srcfmt->colorkey;
slouken@0
  2108
	Uint32 rgbmask = ~srcfmt->Amask;
slouken@0
  2109
	int srcbpp;
icculus@1162
  2110
	Uint32 Pixel;
slouken@1428
  2111
	unsigned sR, sG, sB;
slouken@0
  2112
slouken@0
  2113
	/* Set up some basic variables */
slouken@0
  2114
	srcbpp = srcfmt->BytesPerPixel;
slouken@0
  2115
	ckey &= rgbmask;
slouken@0
  2116
slouken@0
  2117
	if ( palmap == NULL ) {
slouken@0
  2118
		while ( height-- ) {
slouken@0
  2119
			DUFFS_LOOP(
slouken@0
  2120
			{
icculus@1162
  2121
				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
slouken@0
  2122
								sR, sG, sB);
icculus@1162
  2123
				if ( (Pixel & rgbmask) != ckey ) {
slouken@0
  2124
				  	/* Pack RGB into 8bit pixel */
slouken@1428
  2125
				  	*dst = (Uint8)(((sR>>5)<<(3+2))|
slouken@1428
  2126
						           ((sG>>5)<<(2)) |
slouken@1428
  2127
						           ((sB>>6)<<(0)));
slouken@0
  2128
				}
slouken@0
  2129
				dst++;
slouken@0
  2130
				src += srcbpp;
slouken@0
  2131
			},
slouken@0
  2132
			width);
slouken@0
  2133
			src += srcskip;
slouken@0
  2134
			dst += dstskip;
slouken@0
  2135
		}
slouken@0
  2136
	} else {
slouken@0
  2137
		while ( height-- ) {
slouken@0
  2138
			DUFFS_LOOP(
slouken@0
  2139
			{
icculus@1162
  2140
				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
slouken@0
  2141
								sR, sG, sB);
icculus@1162
  2142
				if ( (Pixel & rgbmask) != ckey ) {
slouken@0
  2143
				  	/* Pack RGB into 8bit pixel */
slouken@1428
  2144
				  	*dst = (Uint8)palmap[((sR>>5)<<(3+2))|
slouken@1428
  2145
							             ((sG>>5)<<(2))  |
slouken@1428
  2146
							             ((sB>>6)<<(0))  ];
slouken@0
  2147
				}
slouken@0
  2148
				dst++;
slouken@0
  2149
				src += srcbpp;
slouken@0
  2150
			},
slouken@0
  2151
			width);
slouken@0
  2152
			src += srcskip;
slouken@0
  2153
			dst += dstskip;
slouken@0
  2154
		}
slouken@0
  2155
	}
slouken@0
  2156
}
slouken@0
  2157
slouken@0
  2158
static void Blit2to2Key(SDL_BlitInfo *info)
slouken@0
  2159
{
slouken@0
  2160
	int width = info->d_width;
slouken@0
  2161
	int height = info->d_height;
slouken@0
  2162
	Uint16 *srcp = (Uint16 *)info->s_pixels;
slouken@0
  2163
	int srcskip = info->s_skip;
slouken@0
  2164
	Uint16 *dstp = (Uint16 *)info->d_pixels;
slouken@0
  2165
	int dstskip = info->d_skip;
slouken@0
  2166
	Uint32 ckey = info->src->colorkey;
slouken@0
  2167
	Uint32 rgbmask = ~info->src->Amask;
slouken@0
  2168
slouken@0
  2169
	/* Set up some basic variables */
slouken@0
  2170
        srcskip /= 2;
slouken@0
  2171
        dstskip /= 2;
slouken@0
  2172
	ckey &= rgbmask;
slouken@0
  2173
slouken@0
  2174
	while ( height-- ) {
slouken@0
  2175
		DUFFS_LOOP(
slouken@0
  2176
		{
slouken@0
  2177
			if ( (*srcp & rgbmask) != ckey ) {
slouken@0
  2178
				*dstp = *srcp;
slouken@0
  2179
			}
slouken@0
  2180
			dstp++;
slouken@0
  2181
			srcp++;
slouken@0
  2182
		},
slouken@0
  2183
		width);
slouken@0
  2184
		srcp += srcskip;
slouken@0
  2185
		dstp += dstskip;
slouken@0
  2186
	}
slouken@0
  2187
}
slouken@0
  2188
slouken@0
  2189
static void BlitNtoNKey(SDL_BlitInfo *info)
slouken@0
  2190
{
slouken@0
  2191
	int width = info->d_width;
slouken@0
  2192
	int height = info->d_height;
slouken@0
  2193
	Uint8 *src = info->s_pixels;
slouken@0
  2194
	int srcskip = info->s_skip;
slouken@0
  2195
	Uint8 *dst = info->d_pixels;
slouken@0
  2196
	int dstskip = info->d_skip;
slouken@0
  2197
	Uint32 ckey = info->src->colorkey;
slouken@0
  2198
	SDL_PixelFormat *srcfmt = info->src;
slouken@0
  2199
	SDL_PixelFormat *dstfmt = info->dst;
slouken@0
  2200
	int srcbpp = srcfmt->BytesPerPixel;
slouken@0
  2201
	int dstbpp = dstfmt->BytesPerPixel;
slouken@431
  2202
	unsigned alpha = dstfmt->Amask ? srcfmt->alpha : 0;
slouken@1543
  2203
	Uint32 rgbmask = ~srcfmt->Amask;
slouken@1543
  2204
slouken@1543
  2205
	/* Set up some basic variables */
slouken@1543
  2206
	ckey &= rgbmask;
slouken@0
  2207
slouken@0
  2208
	while ( height-- ) {
slouken@0
  2209
		DUFFS_LOOP(
slouken@0
  2210
		{
icculus@1162
  2211
		        Uint32 Pixel;
slouken@0
  2212
			unsigned sR;
slouken@0
  2213
			unsigned sG;
slouken@0
  2214
			unsigned sB;
icculus@1162
  2215
			RETRIEVE_RGB_PIXEL(src, srcbpp, Pixel);
slouken@1543
  2216
			if ( (Pixel & rgbmask) != ckey ) {
icculus@1162
  2217
			        RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB);
slouken@0
  2218
				ASSEMBLE_RGBA(dst, dstbpp, dstfmt,
slouken@0
  2219
					      sR, sG, sB, alpha);
slouken@0
  2220
			}
slouken@0
  2221
			dst += dstbpp;
slouken@0
  2222
			src += srcbpp;
slouken@0
  2223
		},
slouken@0
  2224
		width);
slouken@0
  2225
		src += srcskip;
slouken@0
  2226
		dst += dstskip;
slouken@0
  2227
	}
slouken@0
  2228
}
slouken@0
  2229
slouken@0
  2230
static void BlitNtoNKeyCopyAlpha(SDL_BlitInfo *info)
slouken@0
  2231
{
slouken@0
  2232
	int width = info->d_width;
slouken@0
  2233
	int height = info->d_height;
slouken@0
  2234
	Uint8 *src = info->s_pixels;
slouken@0
  2235
	int srcskip = info->s_skip;
slouken@0
  2236
	Uint8 *dst = info->d_pixels;
slouken@0
  2237
	int dstskip = info->d_skip;
slouken@0
  2238
	Uint32 ckey = info->src->colorkey;
slouken@0
  2239
	SDL_PixelFormat *srcfmt = info->src;
slouken@0
  2240
	SDL_PixelFormat *dstfmt = info->dst;
slouken@0
  2241
	Uint32 rgbmask = ~srcfmt->Amask;
slouken@0
  2242
slouken@0
  2243
	Uint8 srcbpp;
slouken@0
  2244
	Uint8 dstbpp;
icculus@1162
  2245
	Uint32 Pixel;
slouken@1428
  2246
	unsigned sR, sG, sB, sA;
slouken@0
  2247
slouken@0
  2248
	/* Set up some basic variables */
slouken@0
  2249
	srcbpp = srcfmt->BytesPerPixel;
slouken@0
  2250
	dstbpp = dstfmt->BytesPerPixel;
slouken@0
  2251
	ckey &= rgbmask;
slouken@0
  2252
slouken@0
  2253
	/* FIXME: should map alpha to [0..255] correctly! */
slouken@0
  2254
	while ( height-- ) {
slouken@0
  2255
		DUFFS_LOOP(
slouken@0
  2256
		{
icculus@1162
  2257
			DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel,
slouken@0
  2258
				      sR, sG, sB, sA);
icculus@1162
  2259
			if ( (Pixel & rgbmask) != ckey ) {
slouken@0
  2260
				  ASSEMBLE_RGBA(dst, dstbpp, dstfmt,
slouken@0
  2261
						sR, sG, sB, sA);
slouken@0
  2262
			}
slouken@0
  2263
			dst += dstbpp;
slouken@0
  2264
			src += srcbpp;
slouken@0
  2265
		},
slouken@0
  2266
		width);
slouken@0
  2267
		src += srcskip;
slouken@0
  2268
		dst += dstskip;
slouken@0
  2269
	}
slouken@0
  2270
}
slouken@0
  2271
slouken@0
  2272
/* Normal N to N optimized blitters */
slouken@0
  2273
struct blit_table {
slouken@0
  2274
	Uint32 srcR, srcG, srcB;
slouken@0
  2275
	int dstbpp;
slouken@0
  2276
	Uint32 dstR, dstG, dstB;
icculus@1047
  2277
	Uint32 blit_features;
slouken@0
  2278
	void *aux_data;
slouken@0
  2279
	SDL_loblit blitfunc;
icculus@1047
  2280
	enum { NO_ALPHA=1, SET_ALPHA=2, COPY_ALPHA=4 } alpha;
slouken@0
  2281
};
slouken@91
  2282
static const struct blit_table normal_blit_1[] = {
slouken@0
  2283
	/* Default for 8-bit RGB source, an invalid combination */
slouken@0
  2284
	{ 0,0,0, 0, 0,0,0, 0, NULL, NULL },
slouken@0
  2285
};
slouken@91
  2286
static const struct blit_table normal_blit_2[] = {
slouken@1361
  2287
#if SDL_HERMES_BLITTERS
slouken@0
  2288
    { 0x0000F800,0x000007E0,0x0000001F, 2, 0x0000001F,0x000007E0,0x0000F800,
slouken@0
  2289
      0, ConvertX86p16_16BGR565, ConvertX86, NO_ALPHA },
slouken@0
  2290
    { 0x0000F800,0x000007E0,0x0000001F, 2, 0x00007C00,0x000003E0,0x0000001F,
slouken@0
  2291
      0, ConvertX86p16_16RGB555, ConvertX86, NO_ALPHA },
slouken@0
  2292
    { 0x0000F800,0x000007E0,0x0000001F, 2, 0x0000001F,0x000003E0,0x00007C00,
slouken@0
  2293
      0, ConvertX86p16_16BGR555, ConvertX86, NO_ALPHA },
slouken@1361
  2294
#elif SDL_ALTIVEC_BLITTERS
icculus@1047
  2295
    /* has-altivec */
icculus@1047
  2296
    { 0x0000F800,0x000007E0,0x0000001F, 4, 0x00000000,0x00000000,0x00000000,
icculus@1047
  2297
      2, NULL, Blit_RGB565_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
icculus@1139
  2298
    { 0x00007C00,0x000003E0,0x0000001F, 4, 0x00000000,0x00000000,0x00000000,
icculus@1139
  2299
      2, NULL, Blit_RGB555_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
icculus@1047
  2300
#endif
slouken@0
  2301
    { 0x0000F800,0x000007E0,0x0000001F, 4, 0x00FF0000,0x0000FF00,0x000000FF,
slouken@0
  2302
      0, NULL, Blit_RGB565_ARGB8888, SET_ALPHA },
slouken@0
  2303
    { 0x0000F800,0x000007E0,0x0000001F, 4, 0x000000FF,0x0000FF00,0x00FF0000,
slouken@0
  2304
      0, NULL, Blit_RGB565_ABGR8888, SET_ALPHA },
slouken@0
  2305
    { 0x0000F800,0x000007E0,0x0000001F, 4, 0xFF000000,0x00FF0000,0x0000FF00,
slouken@0
  2306
      0, NULL, Blit_RGB565_RGBA8888, SET_ALPHA },
slouken@0
  2307
    { 0x0000F800,0x000007E0,0x0000001F, 4, 0x0000FF00,0x00FF0000,0xFF000000,
slouken@0
  2308
      0, NULL, Blit_RGB565_BGRA8888, SET_ALPHA },
slouken@0
  2309
slouken@0
  2310
    /* Default for 16-bit RGB source, used if no other blitter matches */
slouken@0
  2311
    { 0,0,0, 0, 0,0,0, 0, NULL, BlitNtoN, 0 }
slouken@0
  2312
};
slouken@91
  2313
static const struct blit_table normal_blit_3[] = {
slouken@0
  2314
	/* Default for 24-bit RGB source, never optimized */
slouken@0
  2315
    { 0,0,0, 0, 0,0,0, 0, NULL, BlitNtoN, 0 }
slouken@0
  2316
};
slouken@91
  2317
static const struct blit_table normal_blit_4[] = {
slouken@1361
  2318
#if SDL_HERMES_BLITTERS
slouken@0
  2319
    { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000F800,0x000007E0,0x0000001F,
slouken@739
  2320
      1, ConvertMMXpII32_16RGB565, ConvertMMX, NO_ALPHA },
slouken@0
  2321
    { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000F800,0x000007E0,0x0000001F,
slouken@0
  2322
      0, ConvertX86p32_16RGB565, ConvertX86, NO_ALPHA },
slouken@0
  2323
    { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000001F,0x000007E0,0x0000F800,
slouken@739
  2324
      1, ConvertMMXpII32_16BGR565, ConvertMMX, NO_ALPHA },
slouken@0
  2325
    { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000001F,0x000007E0,0x0000F800,
slouken@0
  2326
      0, ConvertX86p32_16BGR565, ConvertX86, NO_ALPHA },
slouken@0
  2327
    { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x00007C00,0x000003E0,0x0000001F,
slouken@739
  2328
      1, ConvertMMXpII32_16RGB555, ConvertMMX, NO_ALPHA },
slouken@0
  2329
    { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x00007C00,0x000003E0,0x0000001F,
slouken@0
  2330
      0, ConvertX86p32_16RGB555, ConvertX86, NO_ALPHA },
slouken@0
  2331
    { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000001F,0x000003E0,0x00007C00,
slouken@739
  2332
      1, ConvertMMXpII32_16BGR555, ConvertMMX, NO_ALPHA },
slouken@0
  2333
    { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000001F,0x000003E0,0x00007C00,
slouken@0
  2334
      0, ConvertX86p32_16BGR555, ConvertX86, NO_ALPHA },
slouken@0
  2335
    { 0x00FF0000,0x0000FF00,0x000000FF, 3, 0x00FF0000,0x0000FF00,0x000000FF,
slouken@0
  2336
      0, ConvertX86p32_24RGB888, ConvertX86, NO_ALPHA },
slouken@0
  2337
    { 0x00FF0000,0x0000FF00,0x000000FF, 3, 0x000000FF,0x0000FF00,0x00FF0000,
slouken@0
  2338
      0, ConvertX86p32_24BGR888, ConvertX86, NO_ALPHA },
slouken@0
  2339
    { 0x00FF0000,0x0000FF00,0x000000FF, 4, 0x000000FF,0x0000FF00,0x00FF0000,
slouken@0
  2340
      0, ConvertX86p32_32BGR888, ConvertX86, NO_ALPHA },
slouken@0
  2341
    { 0x00FF0000,0x0000FF00,0x000000FF, 4, 0xFF000000,0x00FF0000,0x0000FF00,
slouken@0
  2342
      0, ConvertX86p32_32RGBA888, ConvertX86, NO_ALPHA },
slouken@0
  2343
    { 0x00FF0000,0x0000FF00,0x000000FF, 4, 0x0000FF00,0x00FF0000,0xFF000000,
slouken@0
  2344
      0, ConvertX86p32_32BGRA888, ConvertX86, NO_ALPHA },
slouken@0
  2345
#else
slouken@1361
  2346
#if SDL_ALTIVEC_BLITTERS
icculus@1047
  2347
    /* has-altivec | dont-use-prefetch */
icculus@1047
  2348
    { 0x00000000,0x00000000,0x00000000, 4, 0x00000000,0x00000000,0x00000000,
icculus@1047
  2349
      6, NULL, ConvertAltivec32to32_noprefetch, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
icculus@1047
  2350
    /* has-altivec */
icculus@1047
  2351
    { 0x00000000,0x00000000,0x00000000, 4, 0x00000000,0x00000000,0x00000000,
icculus@1047
  2352
      2, NULL, ConvertAltivec32to32_prefetch, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
icculus@1047
  2353
    /* has-altivec */
icculus@1047
  2354
    { 0x00000000,0x00000000,0x00000000, 2, 0x0000F800,0x000007E0,0x0000001F,
icculus@1047
  2355
      2, NULL, Blit_RGB888_RGB565Altivec, NO_ALPHA },
icculus@1047
  2356
#endif
slouken@0
  2357
    { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000F800,0x000007E0,0x0000001F,
slouken@0
  2358
      0, NULL, Blit_RGB888_RGB565, NO_ALPHA },
slouken@0
  2359
    { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x00007C00,0x000003E0,0x0000001F,
slouken@0
  2360
      0, NULL, Blit_RGB888_RGB555, NO_ALPHA },
slouken@0
  2361
#endif
slouken@0
  2362
	/* Default for 32-bit RGB source, used if no other blitter matches */
slouken@0
  2363
	{ 0,0,0, 0, 0,0,0, 0, NULL, BlitNtoN, 0 }
slouken@0
  2364
};
slouken@91
  2365
static const struct blit_table *normal_blit[] = {
slouken@0
  2366
	normal_blit_1, normal_blit_2, normal_blit_3, normal_blit_4
slouken@0
  2367
};
slouken@0
  2368
icculus@1047
  2369
/* Mask matches table, or table entry is zero */
icculus@1047
  2370
#define MASKOK(x, y) (((x) == (y)) || ((y) == 0x00000000))
icculus@1047
  2371
slouken@0
  2372
SDL_loblit SDL_CalculateBlitN(SDL_Surface *surface, int blit_index)
slouken@0
  2373
{
slouken@0
  2374
	struct private_swaccel *sdata;
slouken@0
  2375
	SDL_PixelFormat *srcfmt;
slouken@0
  2376
	SDL_PixelFormat *dstfmt;
slouken@91
  2377
	const struct blit_table *table;
slouken@0
  2378
	int which;
slouken@0
  2379
	SDL_loblit blitfun;
slouken@0
  2380
slouken@0
  2381
	/* Set up data for choosing the blit */
slouken@0
  2382
	sdata = surface->map->sw_data;
slouken@0
  2383
	srcfmt = surface->format;
slouken@0
  2384
	dstfmt = surface->map->dst->format;
slouken@0
  2385
slouken@0
  2386
	if ( blit_index & 2 ) {
slouken@0
  2387
	        /* alpha or alpha+colorkey */
slouken@0
  2388
	        return SDL_CalculateAlphaBlit(surface, blit_index);
slouken@0
  2389
	}
slouken@0
  2390
slouken@0
  2391
	/* We don't support destinations less than 8-bits */
slouken@0
  2392
	if ( dstfmt->BitsPerPixel < 8 ) {
slouken@0
  2393
		return(NULL);
slouken@0
  2394
	}
slouken@0
  2395
	
slouken@0
  2396
	if(blit_index == 1) {
slouken@0
  2397
	    /* colorkey blit: Here we don't have too many options, mostly
slouken@0
  2398
	       because RLE is the preferred fast way to deal with this.
slouken@0
  2399
	       If a particular case turns out to be useful we'll add it. */
slouken@0
  2400
slouken@0
  2401
	    if(srcfmt->BytesPerPixel == 2
slouken@0
  2402
	       && surface->map->identity)
slouken@0
  2403
		return Blit2to2Key;
slouken@0
  2404
	    else if(dstfmt->BytesPerPixel == 1)
slouken@0
  2405
		return BlitNto1Key;
slouken@0
  2406
	    else {
slouken@1361
  2407
#if SDL_ALTIVEC_BLITTERS
icculus@1047
  2408
        if((srcfmt->BytesPerPixel == 4) && (dstfmt->BytesPerPixel == 4) && SDL_HasAltiVec()) {
icculus@1047
  2409
            return Blit32to32KeyAltivec;
icculus@1047
  2410
        } else
icculus@1047
  2411
#endif
icculus@1047
  2412
slouken@0
  2413
		if(srcfmt->Amask && dstfmt->Amask)
slouken@0
  2414
		    return BlitNtoNKeyCopyAlpha;
slouken@0
  2415
		else
slouken@0
  2416
		    return BlitNtoNKey;
slouken@0
  2417
	    }
slouken@0
  2418
	}
slouken@0
  2419
slouken@0
  2420
	blitfun = NULL;
slouken@0
  2421
	if ( dstfmt->BitsPerPixel == 8 ) {
slouken@0
  2422
		/* We assume 8-bit destinations are palettized */
slouken@0
  2423
		if ( (srcfmt->BytesPerPixel == 4) &&
slouken@0
  2424
		     (srcfmt->Rmask == 0x00FF0000) &&
slouken@0
  2425
		     (srcfmt->Gmask == 0x0000FF00) &&
slouken@0
  2426
		     (srcfmt->Bmask == 0x000000FF) ) {
slouken@0
  2427
			if ( surface->map->table ) {
slouken@0
  2428
				blitfun = Blit_RGB888_index8_map;
slouken@0
  2429
			} else {
slouken@1361
  2430
#if SDL_HERMES_BLITTERS
slouken@0
  2431
				sdata->aux_data = ConvertX86p32_8RGB332;
slouken@0
  2432
				blitfun = ConvertX86;
slouken@0
  2433
#else
slouken@0
  2434
				blitfun = Blit_RGB888_index8;
slouken@0
  2435
#endif
slouken@0
  2436
			}
slouken@0
  2437
		} else {
slouken@0
  2438
			blitfun = BlitNto1;
slouken@0
  2439
		}
slouken@0
  2440
	} else {
slouken@0
  2441
		/* Now the meat, choose the blitter we want */
slouken@1544
  2442
		int a_need = NO_ALPHA;
slouken@0
  2443
		if(dstfmt->Amask)
slouken@0
  2444
		    a_need = srcfmt->Amask ? COPY_ALPHA : SET_ALPHA;
slouken@0
  2445
		table = normal_blit[srcfmt->BytesPerPixel-1];
icculus@1047
  2446
		for ( which=0; table[which].dstbpp; ++which ) {
icculus@1047
  2447
			if ( MASKOK(srcfmt->Rmask, table[which].srcR) &&
icculus@1047
  2448
			    MASKOK(srcfmt->Gmask, table[which].srcG) &&
icculus@1047
  2449
			    MASKOK(srcfmt->Bmask, table[which].srcB) &&
icculus@1047
  2450
			    MASKOK(dstfmt->Rmask, table[which].dstR) &&
icculus@1047
  2451
			    MASKOK(dstfmt->Gmask, table[which].dstG) &&
icculus@1047
  2452
			    MASKOK(dstfmt->Bmask, table[which].dstB) &&
icculus@1047
  2453
			    dstfmt->BytesPerPixel == table[which].dstbpp &&
icculus@1047
  2454
			    (a_need & table[which].alpha) == a_need &&
icculus@1047
  2455
			    ((table[which].blit_features & GetBlitFeatures()) == table[which].blit_features) )
slouken@0
  2456
				break;
slouken@0
  2457
		}
slouken@0
  2458
		sdata->aux_data = table[which].aux_data;
slouken@0
  2459
		blitfun = table[which].blitfunc;
icculus@1232
  2460
icculus@1232
  2461
		if(blitfun == BlitNtoN) {  /* default C fallback catch-all. Slow! */
icculus@1232
  2462
			/* Fastpath C fallback: 32bit RGB<->RGBA blit with matching RGB */
icculus@1232
  2463
			if ( srcfmt->BytesPerPixel == 4 && dstfmt->BytesPerPixel == 4 &&
icculus@1232
  2464
			     srcfmt->Rmask == dstfmt->Rmask &&
icculus@1232
  2465
			     srcfmt->Gmask == dstfmt->Gmask &&
icculus@1232
  2466
			     srcfmt->Bmask == dstfmt->Bmask ) {
icculus@1232
  2467
				blitfun = Blit4to4MaskAlpha;
icculus@1232
  2468
			} else if ( a_need == COPY_ALPHA ) {
icculus@1232
  2469
			    blitfun = BlitNtoNCopyAlpha;
icculus@1232
  2470
			}
icculus@1232
  2471
		}
slouken@0
  2472
	}
slouken@0
  2473
slouken@0
  2474
#ifdef DEBUG_ASM
slouken@1361
  2475
#if SDL_HERMES_BLITTERS
slouken@0
  2476
	if ( blitfun == ConvertMMX )
slouken@0
  2477
		fprintf(stderr, "Using mmx blit\n");
slouken@0
  2478
	else
slouken@0
  2479
	if ( blitfun == ConvertX86 )
slouken@0
  2480
		fprintf(stderr, "Using asm blit\n");
slouken@0
  2481
	else
slouken@0
  2482
#endif
icculus@1224
  2483
	if ( (blitfun == BlitNtoN) || (blitfun == BlitNto1) )
slouken@0
  2484
		fprintf(stderr, "Using C blit\n");
slouken@0
  2485
	else
slouken@0
  2486
		fprintf(stderr, "Using optimized C blit\n");
slouken@0
  2487
#endif /* DEBUG_ASM */
slouken@0
  2488
slouken@0
  2489
	return(blitfun);
slouken@0
  2490
}