src/video/SDL_blit_N.c
author Sam Lantinga <slouken@libsdl.org>
Mon, 20 Feb 2012 20:55:23 -0500
branchSDL-1.2
changeset 6294 7a2e0f7b30cb
parent 6137 4720145f848b
child 6411 ec7ccf5ee3dd
permissions -rw-r--r--
Fixed bug 1424 - Handling of alpha channel in Altivec accelerated blit functions

evilbite 2012-02-19 09:38:21 PST

There is only one Altivec accelerated blit function
(ConvertAltivec32to32_prefetch() or ConvertAltivec32to32_noprefetch(),
depending on the CPU used) that is supposed to handle all alpha combinations.
This works as follows for every pixel line:
1. Blit single pixels until an aligned address is reached
2. Accelerated blit as far as possible
3. Blit single remaining pixels
Part 2. is set up correctly to handle different combinations of the alpha
channels of the participating surfaces. Parts 1. and 3. only do a simple copy
of all the pixel's components from souce to destination. But when the source
surface has no alpha channel (Amask is 0, e.g. the video surface) the surface's
alpha value must be used instead. Otherwise crap (uninitialized data) is being
copied to the destiniation's alpha channel.

The attached patch is a quick'n'dirty solution to the problem. A more
sophisticated solution might require separate functions for different
combinations of the alpha channels of the participating surfaces.
slouken@0
     1
/*
slouken@0
     2
    SDL - Simple DirectMedia Layer
slouken@6137
     3
    Copyright (C) 1997-2012 Sam Lantinga
slouken@0
     4
slouken@0
     5
    This library is free software; you can redistribute it and/or
slouken@1312
     6
    modify it under the terms of the GNU Lesser General Public
slouken@0
     7
    License as published by the Free Software Foundation; either
slouken@1312
     8
    version 2.1 of the License, or (at your option) any later version.
slouken@0
     9
slouken@0
    10
    This library is distributed in the hope that it will be useful,
slouken@0
    11
    but WITHOUT ANY WARRANTY; without even the implied warranty of
slouken@0
    12
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
slouken@1312
    13
    Lesser General Public License for more details.
slouken@0
    14
slouken@1312
    15
    You should have received a copy of the GNU Lesser General Public
slouken@1312
    16
    License along with this library; if not, write to the Free Software
slouken@1312
    17
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
slouken@0
    18
slouken@0
    19
    Sam Lantinga
slouken@252
    20
    slouken@libsdl.org
slouken@0
    21
*/
slouken@1402
    22
#include "SDL_config.h"
slouken@0
    23
slouken@0
    24
#include "SDL_video.h"
slouken@1358
    25
#include "SDL_endian.h"
slouken@1358
    26
#include "SDL_cpuinfo.h"
slouken@0
    27
#include "SDL_blit.h"
slouken@0
    28
slouken@0
    29
/* Functions to blit from N-bit surfaces to other surfaces */
slouken@0
    30
slouken@1361
    31
#if SDL_ALTIVEC_BLITTERS
slouken@1795
    32
#if __MWERKS__
slouken@1795
    33
#pragma altivec_model on
slouken@1795
    34
#endif
icculus@1175
    35
#ifdef HAVE_ALTIVEC_H
icculus@1162
    36
#include <altivec.h>
icculus@1175
    37
#endif
slouken@1338
    38
#define assert(X)
slouken@1402
    39
#ifdef __MACOSX__
icculus@1047
    40
#include <sys/sysctl.h>
icculus@1047
    41
static size_t GetL3CacheSize( void )
icculus@1047
    42
{
icculus@1047
    43
    const char key[] = "hw.l3cachesize";
icculus@1047
    44
    u_int64_t result = 0;
icculus@1047
    45
    size_t typeSize = sizeof( result );
icculus@1047
    46
icculus@1047
    47
icculus@1047
    48
    int err = sysctlbyname( key, &result, &typeSize, NULL, 0 );
icculus@1047
    49
    if( 0 != err ) return 0;
icculus@1047
    50
icculus@1047
    51
    return result;
icculus@1047
    52
}
icculus@1047
    53
#else
icculus@1047
    54
static size_t GetL3CacheSize( void )
icculus@1047
    55
{
icculus@1047
    56
    /* XXX: Just guess G4 */
icculus@1047
    57
    return 2097152;
icculus@1047
    58
}
slouken@1402
    59
#endif /* __MACOSX__ */
icculus@1047
    60
slouken@1402
    61
#if (defined(__MACOSX__) && (__GNUC__ < 4))
icculus@1162
    62
    #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
icculus@1162
    63
        (vector unsigned char) ( a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p )
icculus@1162
    64
    #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
icculus@1162
    65
        (vector unsigned short) ( a,b,c,d,e,f,g,h )
icculus@1162
    66
#else
icculus@1162
    67
    #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
icculus@1162
    68
        (vector unsigned char) { a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p }
icculus@1162
    69
    #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
icculus@1162
    70
        (vector unsigned short) { a,b,c,d,e,f,g,h }
icculus@1162
    71
#endif
icculus@1162
    72
icculus@1047
    73
#define UNALIGNED_PTR(x) (((size_t) x) & 0x0000000F)
icculus@1047
    74
#define VSWIZZLE32(a,b,c,d) (vector unsigned char) \
icculus@1047
    75
                               ( 0x00+a, 0x00+b, 0x00+c, 0x00+d, \
icculus@1047
    76
                                 0x04+a, 0x04+b, 0x04+c, 0x04+d, \
icculus@1047
    77
                                 0x08+a, 0x08+b, 0x08+c, 0x08+d, \
icculus@1047
    78
                                 0x0C+a, 0x0C+b, 0x0C+c, 0x0C+d )
icculus@1047
    79
icculus@1047
    80
#define MAKE8888(dstfmt, r, g, b, a)  \
icculus@1047
    81
    ( ((r<<dstfmt->Rshift)&dstfmt->Rmask) | \
icculus@1047
    82
      ((g<<dstfmt->Gshift)&dstfmt->Gmask) | \
icculus@1047
    83
      ((b<<dstfmt->Bshift)&dstfmt->Bmask) | \
icculus@1047
    84
      ((a<<dstfmt->Ashift)&dstfmt->Amask) )
icculus@1047
    85
icculus@1047
    86
/*
icculus@1047
    87
 * Data Stream Touch...Altivec cache prefetching.
icculus@1047
    88
 *
icculus@1047
    89
 *  Don't use this on a G5...however, the speed boost is very significant
icculus@1047
    90
 *   on a G4.
icculus@1047
    91
 */
icculus@1047
    92
#define DST_CHAN_SRC 1
icculus@1047
    93
#define DST_CHAN_DEST 2
icculus@1047
    94
icculus@1047
    95
/* macro to set DST control word value... */
icculus@1047
    96
#define DST_CTRL(size, count, stride) \
icculus@1047
    97
    (((size) << 24) | ((count) << 16) | (stride))
icculus@1047
    98
icculus@1047
    99
#define VEC_ALIGNER(src) ((UNALIGNED_PTR(src)) \
icculus@1047
   100
    ? vec_lvsl(0, src) \
icculus@1047
   101
    : vec_add(vec_lvsl(8, src), vec_splat_u8(8)))
icculus@1047
   102
icculus@1047
   103
/* Calculate the permute vector used for 32->32 swizzling */
icculus@1047
   104
static vector unsigned char calc_swizzle32(const SDL_PixelFormat *srcfmt,
icculus@1047
   105
                                  const SDL_PixelFormat *dstfmt)
icculus@1047
   106
{
icculus@1047
   107
    /*
icculus@1047
   108
    * We have to assume that the bits that aren't used by other
icculus@1047
   109
     *  colors is alpha, and it's one complete byte, since some formats
icculus@1047
   110
     *  leave alpha with a zero mask, but we should still swizzle the bits.
icculus@1047
   111
     */
icculus@1047
   112
    /* ARGB */
icculus@1047
   113
    const static struct SDL_PixelFormat default_pixel_format = {
icculus@1047
   114
        NULL, 0, 0,
icculus@1047
   115
        0, 0, 0, 0,
icculus@1047
   116
        16, 8, 0, 24,
icculus@1047
   117
        0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000,
icculus@1047
   118
        0, 0};
icculus@1047
   119
    if (!srcfmt) {
icculus@1047
   120
        srcfmt = &default_pixel_format;
icculus@1047
   121
    }
icculus@1047
   122
    if (!dstfmt) {
icculus@1047
   123
        dstfmt = &default_pixel_format;
icculus@1047
   124
    }
slouken@1487
   125
    const vector unsigned char plus = VECUINT8_LITERAL(
icculus@1162
   126
                                      0x00, 0x00, 0x00, 0x00,
icculus@1047
   127
                                      0x04, 0x04, 0x04, 0x04,
icculus@1047
   128
                                      0x08, 0x08, 0x08, 0x08,
icculus@1047
   129
                                      0x0C, 0x0C, 0x0C, 0x0C );
icculus@1047
   130
    vector unsigned char vswiz;
icculus@1047
   131
    vector unsigned int srcvec;
icculus@1047
   132
#define RESHIFT(X) (3 - ((X) >> 3))
icculus@1047
   133
    Uint32 rmask = RESHIFT(srcfmt->Rshift) << (dstfmt->Rshift);
icculus@1047
   134
    Uint32 gmask = RESHIFT(srcfmt->Gshift) << (dstfmt->Gshift);
icculus@1047
   135
    Uint32 bmask = RESHIFT(srcfmt->Bshift) << (dstfmt->Bshift);
icculus@1047
   136
    Uint32 amask;
icculus@1047
   137
    /* Use zero for alpha if either surface doesn't have alpha */
icculus@1047
   138
    if (dstfmt->Amask) {
icculus@1047
   139
        amask = ((srcfmt->Amask) ? RESHIFT(srcfmt->Ashift) : 0x10) << (dstfmt->Ashift);
icculus@1047
   140
    } else {    
icculus@1047
   141
        amask = 0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^ 0xFFFFFFFF);
icculus@1047
   142
    }           
icculus@1047
   143
#undef RESHIFT  
icculus@1162
   144
    ((unsigned int *)(char*)&srcvec)[0] = (rmask | gmask | bmask | amask);
icculus@1047
   145
    vswiz = vec_add(plus, (vector unsigned char)vec_splat(srcvec, 0));
icculus@1047
   146
    return(vswiz);
icculus@1047
   147
}
icculus@1047
   148
icculus@1047
   149
static void Blit_RGB888_RGB565(SDL_BlitInfo *info);
icculus@1047
   150
static void Blit_RGB888_RGB565Altivec(SDL_BlitInfo *info) {
icculus@1047
   151
    int height = info->d_height;
icculus@1047
   152
    Uint8 *src = (Uint8 *) info->s_pixels;
icculus@1047
   153
    int srcskip = info->s_skip;
icculus@1047
   154
    Uint8 *dst = (Uint8 *) info->d_pixels;
icculus@1047
   155
    int dstskip = info->d_skip;
icculus@1047
   156
    SDL_PixelFormat *srcfmt = info->src;
icculus@1047
   157
    vector unsigned char valpha = vec_splat_u8(0);
icculus@1047
   158
    vector unsigned char vpermute = calc_swizzle32(srcfmt, NULL);
icculus@1162
   159
    vector unsigned char vgmerge = VECUINT8_LITERAL(
icculus@1047
   160
        0x00, 0x02, 0x00, 0x06,
icculus@1047
   161
        0x00, 0x0a, 0x00, 0x0e,
icculus@1047
   162
        0x00, 0x12, 0x00, 0x16,
icculus@1047
   163
        0x00, 0x1a, 0x00, 0x1e);
icculus@1047
   164
    vector unsigned short v1 = vec_splat_u16(1);
icculus@1047
   165
    vector unsigned short v3 = vec_splat_u16(3);
icculus@1162
   166
    vector unsigned short v3f = VECUINT16_LITERAL(
icculus@1047
   167
        0x003f, 0x003f, 0x003f, 0x003f,
icculus@1047
   168
        0x003f, 0x003f, 0x003f, 0x003f);
icculus@1162
   169
    vector unsigned short vfc = VECUINT16_LITERAL(
icculus@1047
   170
        0x00fc, 0x00fc, 0x00fc, 0x00fc,
icculus@1047
   171
        0x00fc, 0x00fc, 0x00fc, 0x00fc);
icculus@1047
   172
    vector unsigned short vf800 = (vector unsigned short)vec_splat_u8(-7);
icculus@1047
   173
    vf800 = vec_sl(vf800, vec_splat_u16(8));
icculus@1047
   174
icculus@1047
   175
    while (height--) {
icculus@1047
   176
        vector unsigned char valigner;
icculus@1047
   177
        vector unsigned char voverflow;
icculus@1047
   178
        vector unsigned char vsrc;
icculus@1047
   179
icculus@1047
   180
        int width = info->d_width;
icculus@1047
   181
        int extrawidth;
icculus@1047
   182
icculus@1047
   183
        /* do scalar until we can align... */
icculus@1047
   184
#define ONE_PIXEL_BLEND(condition, widthvar) \
icculus@1047
   185
        while (condition) { \
icculus@1162
   186
            Uint32 Pixel; \
icculus@1047
   187
            unsigned sR, sG, sB, sA; \
icculus@1162
   188
            DISEMBLE_RGBA((Uint8 *)src, 4, srcfmt, Pixel, \
icculus@1047
   189
                          sR, sG, sB, sA); \
icculus@1047
   190
            *(Uint16 *)(dst) = (((sR << 8) & 0x0000F800) | \
icculus@1047
   191
                                ((sG << 3) & 0x000007E0) | \
icculus@1047
   192
                                ((sB >> 3) & 0x0000001F)); \
icculus@1047
   193
            dst += 2; \
icculus@1047
   194
            src += 4; \
icculus@1047
   195
            widthvar--; \
icculus@1047
   196
        }
icculus@1047
   197
icculus@1047
   198
        ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
icculus@1047
   199
icculus@1047
   200
        /* After all that work, here's the vector part! */
icculus@1047
   201
        extrawidth = (width % 8);  /* trailing unaligned stores */
icculus@1047
   202
        width -= extrawidth;
icculus@1047
   203
        vsrc = vec_ld(0, src);
icculus@1047
   204
        valigner = VEC_ALIGNER(src);
icculus@1047
   205
icculus@1047
   206
        while (width) {
icculus@1047
   207
            vector unsigned short vpixel, vrpixel, vgpixel, vbpixel;
icculus@1047
   208
            vector unsigned int vsrc1, vsrc2;
icculus@1047
   209
            vector unsigned char vdst;
icculus@1047
   210
icculus@1047
   211
            voverflow = vec_ld(15, src);
icculus@1047
   212
            vsrc = vec_perm(vsrc, voverflow, valigner);
icculus@1047
   213
            vsrc1 = (vector unsigned int)vec_perm(vsrc, valpha, vpermute);
icculus@1047
   214
            src += 16;
icculus@1047
   215
            vsrc = voverflow;
icculus@1047
   216
            voverflow = vec_ld(15, src);
icculus@1047
   217
            vsrc = vec_perm(vsrc, voverflow, valigner);
icculus@1047
   218
            vsrc2 = (vector unsigned int)vec_perm(vsrc, valpha, vpermute);
icculus@1047
   219
            /* 1555 */
icculus@1047
   220
            vpixel = (vector unsigned short)vec_packpx(vsrc1, vsrc2);
icculus@1047
   221
            vgpixel = (vector unsigned short)vec_perm(vsrc1, vsrc2, vgmerge);
icculus@1047
   222
            vgpixel = vec_and(vgpixel, vfc);
icculus@1047
   223
            vgpixel = vec_sl(vgpixel, v3);
icculus@1047
   224
            vrpixel = vec_sl(vpixel, v1);
icculus@1047
   225
            vrpixel = vec_and(vrpixel, vf800);
icculus@1047
   226
            vbpixel = vec_and(vpixel, v3f);
icculus@1047
   227
            vdst = vec_or((vector unsigned char)vrpixel, (vector unsigned char)vgpixel);
icculus@1047
   228
            /* 565 */
icculus@1047
   229
            vdst = vec_or(vdst, (vector unsigned char)vbpixel);
icculus@1047
   230
            vec_st(vdst, 0, dst);
icculus@1047
   231
icculus@1047
   232
            width -= 8;
icculus@1047
   233
            src += 16;
icculus@1047
   234
            dst += 16;
icculus@1047
   235
            vsrc = voverflow;
icculus@1047
   236
        }
icculus@1047
   237
icculus@1047
   238
        assert(width == 0);
icculus@1047
   239
icculus@1047
   240
        /* do scalar until we can align... */
icculus@1047
   241
        ONE_PIXEL_BLEND((extrawidth), extrawidth);
icculus@1047
   242
#undef ONE_PIXEL_BLEND
icculus@1047
   243
icculus@1047
   244
        src += srcskip;  /* move to next row, accounting for pitch. */
icculus@1047
   245
        dst += dstskip;
icculus@1047
   246
    }
icculus@1047
   247
icculus@1047
   248
icculus@1047
   249
}
icculus@1047
   250
icculus@1047
   251
static void Blit_RGB565_32Altivec(SDL_BlitInfo *info) {
icculus@1047
   252
    int height = info->d_height;
icculus@1047
   253
    Uint8 *src = (Uint8 *) info->s_pixels;
icculus@1047
   254
    int srcskip = info->s_skip;
icculus@1047
   255
    Uint8 *dst = (Uint8 *) info->d_pixels;
icculus@1047
   256
    int dstskip = info->d_skip;
icculus@1047
   257
    SDL_PixelFormat *srcfmt = info->src;
icculus@1047
   258
    SDL_PixelFormat *dstfmt = info->dst;
icculus@1047
   259
    unsigned alpha;
icculus@1047
   260
    vector unsigned char valpha;
icculus@1047
   261
    vector unsigned char vpermute;
icculus@1047
   262
    vector unsigned short vf800;
icculus@1047
   263
    vector unsigned int v8 = vec_splat_u32(8);
icculus@1047
   264
    vector unsigned int v16 = vec_add(v8, v8);
icculus@1047
   265
    vector unsigned short v2 = vec_splat_u16(2);
icculus@1047
   266
    vector unsigned short v3 = vec_splat_u16(3);
icculus@1047
   267
    /* 
icculus@1047
   268
        0x10 - 0x1f is the alpha
icculus@1047
   269
        0x00 - 0x0e evens are the red
icculus@1047
   270
        0x01 - 0x0f odds are zero
icculus@1047
   271
    */
icculus@1162
   272
    vector unsigned char vredalpha1 = VECUINT8_LITERAL(
icculus@1047
   273
        0x10, 0x00, 0x01, 0x01,
icculus@1047
   274
        0x10, 0x02, 0x01, 0x01,
icculus@1047
   275
        0x10, 0x04, 0x01, 0x01,
icculus@1047
   276
        0x10, 0x06, 0x01, 0x01
icculus@1047
   277
    );
icculus@1162
   278
    vector unsigned char vredalpha2 = (vector unsigned char) (
icculus@1047
   279
        vec_add((vector unsigned int)vredalpha1, vec_sl(v8, v16))
icculus@1047
   280
    );
icculus@1047
   281
    /*
icculus@1047
   282
        0x00 - 0x0f is ARxx ARxx ARxx ARxx
icculus@1047
   283
        0x11 - 0x0f odds are blue
icculus@1047
   284
    */
icculus@1162
   285
    vector unsigned char vblue1 = VECUINT8_LITERAL(
icculus@1047
   286
        0x00, 0x01, 0x02, 0x11,
icculus@1047
   287
        0x04, 0x05, 0x06, 0x13,
icculus@1047
   288
        0x08, 0x09, 0x0a, 0x15,
icculus@1047
   289
        0x0c, 0x0d, 0x0e, 0x17
icculus@1047
   290
    );
icculus@1047
   291
    vector unsigned char vblue2 = (vector unsigned char)(
icculus@1047
   292
        vec_add((vector unsigned int)vblue1, v8)
icculus@1047
   293
    );
icculus@1047
   294
    /*
icculus@1047
   295
        0x00 - 0x0f is ARxB ARxB ARxB ARxB
icculus@1047
   296
        0x10 - 0x0e evens are green
icculus@1047
   297
    */
icculus@1162
   298
    vector unsigned char vgreen1 = VECUINT8_LITERAL(
icculus@1047
   299
        0x00, 0x01, 0x10, 0x03,
icculus@1047
   300
        0x04, 0x05, 0x12, 0x07,
icculus@1047
   301
        0x08, 0x09, 0x14, 0x0b,
icculus@1047
   302
        0x0c, 0x0d, 0x16, 0x0f
icculus@1047
   303
    );
icculus@1047
   304
    vector unsigned char vgreen2 = (vector unsigned char)(
icculus@1047
   305
        vec_add((vector unsigned int)vgreen1, vec_sl(v8, v8))
icculus@1047
   306
    );
icculus@1047
   307
    
icculus@1047
   308
icculus@1047
   309
    assert(srcfmt->BytesPerPixel == 2);
icculus@1047
   310
    assert(dstfmt->BytesPerPixel == 4);
icculus@1047
   311
icculus@1047
   312
    vf800 = (vector unsigned short)vec_splat_u8(-7);
icculus@1047
   313
    vf800 = vec_sl(vf800, vec_splat_u16(8));
icculus@1047
   314
icculus@1047
   315
    if (dstfmt->Amask && srcfmt->alpha) {
icculus@1047
   316
        ((unsigned char *)&valpha)[0] = alpha = srcfmt->alpha;
icculus@1047
   317
        valpha = vec_splat(valpha, 0);
icculus@1047
   318
    } else {
icculus@1047
   319
        alpha = 0;
icculus@1047
   320
        valpha = vec_splat_u8(0);
icculus@1047
   321
    }
icculus@1047
   322
icculus@1047
   323
    vpermute = calc_swizzle32(NULL, dstfmt);
icculus@1047
   324
    while (height--) {
icculus@1047
   325
        vector unsigned char valigner;
icculus@1047
   326
        vector unsigned char voverflow;
icculus@1047
   327
        vector unsigned char vsrc;
icculus@1047
   328
icculus@1047
   329
        int width = info->d_width;
icculus@1047
   330
        int extrawidth;
icculus@1047
   331
icculus@1047
   332
        /* do scalar until we can align... */
icculus@1047
   333
#define ONE_PIXEL_BLEND(condition, widthvar) \
icculus@1047
   334
        while (condition) { \
icculus@1047
   335
            unsigned sR, sG, sB; \
icculus@1162
   336
            unsigned short Pixel = *((unsigned short *)src); \
icculus@1162
   337
            sR = (Pixel >> 8) & 0xf8; \
icculus@1162
   338
            sG = (Pixel >> 3) & 0xfc; \
icculus@1162
   339
            sB = (Pixel << 3) & 0xf8; \
icculus@1047
   340
            ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
icculus@1047
   341
            src += 2; \
icculus@1047
   342
            dst += 4; \
icculus@1047
   343
            widthvar--; \
icculus@1047
   344
        }
icculus@1047
   345
        ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
icculus@1047
   346
icculus@1047
   347
        /* After all that work, here's the vector part! */
icculus@1047
   348
        extrawidth = (width % 8);  /* trailing unaligned stores */
icculus@1047
   349
        width -= extrawidth;
icculus@1047
   350
        vsrc = vec_ld(0, src);
icculus@1047
   351
        valigner = VEC_ALIGNER(src);
icculus@1047
   352
icculus@1047
   353
        while (width) {
icculus@1047
   354
            vector unsigned short vR, vG, vB;
icculus@1047
   355
            vector unsigned char vdst1, vdst2;
icculus@1047
   356
icculus@1047
   357
            voverflow = vec_ld(15, src);
icculus@1047
   358
            vsrc = vec_perm(vsrc, voverflow, valigner);
icculus@1047
   359
icculus@1047
   360
            vR = vec_and((vector unsigned short)vsrc, vf800);
icculus@1047
   361
            vB = vec_sl((vector unsigned short)vsrc, v3);
icculus@1047
   362
            vG = vec_sl(vB, v2);
icculus@1047
   363
icculus@1047
   364
            vdst1 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha1);
icculus@1047
   365
            vdst1 = vec_perm(vdst1, (vector unsigned char)vB, vblue1);
icculus@1047
   366
            vdst1 = vec_perm(vdst1, (vector unsigned char)vG, vgreen1);
icculus@1047
   367
            vdst1 = vec_perm(vdst1, valpha, vpermute);
icculus@1047
   368
            vec_st(vdst1, 0, dst);
icculus@1047
   369
icculus@1047
   370
            vdst2 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha2);
icculus@1047
   371
            vdst2 = vec_perm(vdst2, (vector unsigned char)vB, vblue2);
icculus@1047
   372
            vdst2 = vec_perm(vdst2, (vector unsigned char)vG, vgreen2);
icculus@1047
   373
            vdst2 = vec_perm(vdst2, valpha, vpermute);
icculus@1047
   374
            vec_st(vdst2, 16, dst);
icculus@1047
   375
            
icculus@1047
   376
            width -= 8;
icculus@1047
   377
            dst += 32;
icculus@1047
   378
            src += 16;
icculus@1047
   379
            vsrc = voverflow;
icculus@1047
   380
        }
icculus@1047
   381
icculus@1047
   382
        assert(width == 0);
icculus@1047
   383
icculus@1047
   384
icculus@1047
   385
        /* do scalar until we can align... */
icculus@1047
   386
        ONE_PIXEL_BLEND((extrawidth), extrawidth);
icculus@1047
   387
#undef ONE_PIXEL_BLEND
icculus@1047
   388
icculus@1047
   389
        src += srcskip;  /* move to next row, accounting for pitch. */
icculus@1047
   390
        dst += dstskip;
icculus@1047
   391
    }
icculus@1047
   392
icculus@1047
   393
}
icculus@1047
   394
icculus@1139
   395
icculus@1139
   396
static void Blit_RGB555_32Altivec(SDL_BlitInfo *info) {
icculus@1139
   397
    int height = info->d_height;
icculus@1139
   398
    Uint8 *src = (Uint8 *) info->s_pixels;
icculus@1139
   399
    int srcskip = info->s_skip;
icculus@1139
   400
    Uint8 *dst = (Uint8 *) info->d_pixels;
icculus@1139
   401
    int dstskip = info->d_skip;
icculus@1139
   402
    SDL_PixelFormat *srcfmt = info->src;
icculus@1139
   403
    SDL_PixelFormat *dstfmt = info->dst;
icculus@1139
   404
    unsigned alpha;
icculus@1139
   405
    vector unsigned char valpha;
icculus@1139
   406
    vector unsigned char vpermute;
icculus@1139
   407
    vector unsigned short vf800;
icculus@1139
   408
    vector unsigned int v8 = vec_splat_u32(8);
icculus@1139
   409
    vector unsigned int v16 = vec_add(v8, v8);
icculus@1139
   410
    vector unsigned short v1 = vec_splat_u16(1);
icculus@1139
   411
    vector unsigned short v3 = vec_splat_u16(3);
icculus@1139
   412
    /* 
icculus@1139
   413
        0x10 - 0x1f is the alpha
icculus@1139
   414
        0x00 - 0x0e evens are the red
icculus@1139
   415
        0x01 - 0x0f odds are zero
icculus@1139
   416
    */
icculus@1162
   417
    vector unsigned char vredalpha1 = VECUINT8_LITERAL(
icculus@1139
   418
        0x10, 0x00, 0x01, 0x01,
icculus@1139
   419
        0x10, 0x02, 0x01, 0x01,
icculus@1139
   420
        0x10, 0x04, 0x01, 0x01,
icculus@1139
   421
        0x10, 0x06, 0x01, 0x01
icculus@1139
   422
    );
icculus@1139
   423
    vector unsigned char vredalpha2 = (vector unsigned char)(
icculus@1139
   424
        vec_add((vector unsigned int)vredalpha1, vec_sl(v8, v16))
icculus@1139
   425
    );
icculus@1139
   426
    /*
icculus@1139
   427
        0x00 - 0x0f is ARxx ARxx ARxx ARxx
icculus@1139
   428
        0x11 - 0x0f odds are blue
icculus@1139
   429
    */
icculus@1162
   430
    vector unsigned char vblue1 = VECUINT8_LITERAL(
icculus@1139
   431
        0x00, 0x01, 0x02, 0x11,
icculus@1139
   432
        0x04, 0x05, 0x06, 0x13,
icculus@1139
   433
        0x08, 0x09, 0x0a, 0x15,
icculus@1139
   434
        0x0c, 0x0d, 0x0e, 0x17
icculus@1139
   435
    );
icculus@1139
   436
    vector unsigned char vblue2 = (vector unsigned char)(
icculus@1139
   437
        vec_add((vector unsigned int)vblue1, v8)
icculus@1139
   438
    );
icculus@1139
   439
    /*
icculus@1139
   440
        0x00 - 0x0f is ARxB ARxB ARxB ARxB
icculus@1139
   441
        0x10 - 0x0e evens are green
icculus@1139
   442
    */
icculus@1162
   443
    vector unsigned char vgreen1 = VECUINT8_LITERAL(
icculus@1139
   444
        0x00, 0x01, 0x10, 0x03,
icculus@1139
   445
        0x04, 0x05, 0x12, 0x07,
icculus@1139
   446
        0x08, 0x09, 0x14, 0x0b,
icculus@1139
   447
        0x0c, 0x0d, 0x16, 0x0f
icculus@1139
   448
    );
icculus@1139
   449
    vector unsigned char vgreen2 = (vector unsigned char)(
icculus@1139
   450
        vec_add((vector unsigned int)vgreen1, vec_sl(v8, v8))
icculus@1139
   451
    );
icculus@1139
   452
    
icculus@1139
   453
icculus@1139
   454
    assert(srcfmt->BytesPerPixel == 2);
icculus@1139
   455
    assert(dstfmt->BytesPerPixel == 4);
icculus@1139
   456
icculus@1139
   457
    vf800 = (vector unsigned short)vec_splat_u8(-7);
icculus@1139
   458
    vf800 = vec_sl(vf800, vec_splat_u16(8));
icculus@1139
   459
icculus@1139
   460
    if (dstfmt->Amask && srcfmt->alpha) {
icculus@1139
   461
        ((unsigned char *)&valpha)[0] = alpha = srcfmt->alpha;
icculus@1139
   462
        valpha = vec_splat(valpha, 0);
icculus@1139
   463
    } else {
icculus@1139
   464
        alpha = 0;
icculus@1139
   465
        valpha = vec_splat_u8(0);
icculus@1139
   466
    }
icculus@1139
   467
icculus@1139
   468
    vpermute = calc_swizzle32(NULL, dstfmt);
icculus@1139
   469
    while (height--) {
icculus@1139
   470
        vector unsigned char valigner;
icculus@1139
   471
        vector unsigned char voverflow;
icculus@1139
   472
        vector unsigned char vsrc;
icculus@1139
   473
icculus@1139
   474
        int width = info->d_width;
icculus@1139
   475
        int extrawidth;
icculus@1139
   476
icculus@1139
   477
        /* do scalar until we can align... */
icculus@1139
   478
#define ONE_PIXEL_BLEND(condition, widthvar) \
icculus@1139
   479
        while (condition) { \
icculus@1139
   480
            unsigned sR, sG, sB; \
icculus@1162
   481
            unsigned short Pixel = *((unsigned short *)src); \
icculus@1162
   482
            sR = (Pixel >> 7) & 0xf8; \
icculus@1162
   483
            sG = (Pixel >> 2) & 0xf8; \
icculus@1162
   484
            sB = (Pixel << 3) & 0xf8; \
icculus@1139
   485
            ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
icculus@1139
   486
            src += 2; \
icculus@1139
   487
            dst += 4; \
icculus@1139
   488
            widthvar--; \
icculus@1139
   489
        }
icculus@1139
   490
        ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
icculus@1139
   491
icculus@1139
   492
        /* After all that work, here's the vector part! */
icculus@1139
   493
        extrawidth = (width % 8);  /* trailing unaligned stores */
icculus@1139
   494
        width -= extrawidth;
icculus@1139
   495
        vsrc = vec_ld(0, src);
icculus@1139
   496
        valigner = VEC_ALIGNER(src);
icculus@1139
   497
icculus@1139
   498
        while (width) {
icculus@1139
   499
            vector unsigned short vR, vG, vB;
icculus@1139
   500
            vector unsigned char vdst1, vdst2;
icculus@1139
   501
icculus@1139
   502
            voverflow = vec_ld(15, src);
icculus@1139
   503
            vsrc = vec_perm(vsrc, voverflow, valigner);
icculus@1139
   504
icculus@1139
   505
            vR = vec_and(vec_sl((vector unsigned short)vsrc,v1), vf800);
icculus@1139
   506
            vB = vec_sl((vector unsigned short)vsrc, v3);
icculus@1139
   507
            vG = vec_sl(vB, v3);
icculus@1139
   508
icculus@1139
   509
            vdst1 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha1);
icculus@1139
   510
            vdst1 = vec_perm(vdst1, (vector unsigned char)vB, vblue1);
icculus@1139
   511
            vdst1 = vec_perm(vdst1, (vector unsigned char)vG, vgreen1);
icculus@1139
   512
            vdst1 = vec_perm(vdst1, valpha, vpermute);
icculus@1139
   513
            vec_st(vdst1, 0, dst);
icculus@1139
   514
icculus@1139
   515
            vdst2 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha2);
icculus@1139
   516
            vdst2 = vec_perm(vdst2, (vector unsigned char)vB, vblue2);
icculus@1139
   517
            vdst2 = vec_perm(vdst2, (vector unsigned char)vG, vgreen2);
icculus@1139
   518
            vdst2 = vec_perm(vdst2, valpha, vpermute);
icculus@1139
   519
            vec_st(vdst2, 16, dst);
icculus@1139
   520
            
icculus@1139
   521
            width -= 8;
icculus@1139
   522
            dst += 32;
icculus@1139
   523
            src += 16;
icculus@1139
   524
            vsrc = voverflow;
icculus@1139
   525
        }
icculus@1139
   526
icculus@1139
   527
        assert(width == 0);
icculus@1139
   528
icculus@1139
   529
icculus@1139
   530
        /* do scalar until we can align... */
icculus@1139
   531
        ONE_PIXEL_BLEND((extrawidth), extrawidth);
icculus@1139
   532
#undef ONE_PIXEL_BLEND
icculus@1139
   533
icculus@1139
   534
        src += srcskip;  /* move to next row, accounting for pitch. */
icculus@1139
   535
        dst += dstskip;
icculus@1139
   536
    }
icculus@1139
   537
icculus@1139
   538
}
icculus@1139
   539
icculus@1047
   540
static void BlitNtoNKey(SDL_BlitInfo *info);
icculus@1047
   541
static void BlitNtoNKeyCopyAlpha(SDL_BlitInfo *info);
icculus@1047
   542
static void Blit32to32KeyAltivec(SDL_BlitInfo *info)
icculus@1047
   543
{
icculus@1047
   544
    int height = info->d_height;
icculus@1047
   545
    Uint32 *srcp = (Uint32 *) info->s_pixels;
icculus@1047
   546
    int srcskip = info->s_skip;
icculus@1047
   547
    Uint32 *dstp = (Uint32 *) info->d_pixels;
icculus@1047
   548
    int dstskip = info->d_skip;
icculus@1047
   549
    SDL_PixelFormat *srcfmt = info->src;
icculus@1047
   550
    int srcbpp = srcfmt->BytesPerPixel;
icculus@1047
   551
    SDL_PixelFormat *dstfmt = info->dst;
icculus@1047
   552
    int dstbpp = dstfmt->BytesPerPixel;
icculus@1047
   553
    int copy_alpha = (srcfmt->Amask && dstfmt->Amask);
icculus@1047
   554
	unsigned alpha = dstfmt->Amask ? srcfmt->alpha : 0;
icculus@1047
   555
    Uint32 rgbmask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
icculus@1047
   556
	Uint32 ckey = info->src->colorkey;
icculus@1047
   557
    vector unsigned int valpha;
icculus@1047
   558
    vector unsigned char vpermute;
icculus@1047
   559
    vector unsigned char vzero;
icculus@1047
   560
    vector unsigned int vckey;
icculus@1047
   561
    vector unsigned int vrgbmask;
icculus@1047
   562
    vpermute = calc_swizzle32(srcfmt, dstfmt);
icculus@1047
   563
    if (info->d_width < 16) {
icculus@1047
   564
        if(copy_alpha) {
slouken@1487
   565
            BlitNtoNKeyCopyAlpha(info);
icculus@1047
   566
        } else {
slouken@1487
   567
            BlitNtoNKey(info);
icculus@1047
   568
        }
slouken@1487
   569
        return;
icculus@1047
   570
    }
icculus@1047
   571
    vzero = vec_splat_u8(0);
icculus@1047
   572
    if (alpha) {
icculus@1047
   573
        ((unsigned char *)&valpha)[0] = (unsigned char)alpha;
icculus@1047
   574
        valpha = (vector unsigned int)vec_splat((vector unsigned char)valpha, 0);
icculus@1047
   575
    } else {
icculus@1047
   576
        valpha = (vector unsigned int)vzero;
icculus@1047
   577
    }
icculus@1047
   578
    ckey &= rgbmask;
icculus@1162
   579
    ((unsigned int *)(char*)&vckey)[0] = ckey;
icculus@1047
   580
    vckey = vec_splat(vckey, 0);
icculus@1162
   581
    ((unsigned int *)(char*)&vrgbmask)[0] = rgbmask;
icculus@1047
   582
    vrgbmask = vec_splat(vrgbmask, 0);
icculus@1047
   583
icculus@1047
   584
    while (height--) {
icculus@1047
   585
#define ONE_PIXEL_BLEND(condition, widthvar) \
icculus@1047
   586
        if (copy_alpha) { \
icculus@1047
   587
            while (condition) { \
icculus@1162
   588
                Uint32 Pixel; \
icculus@1047
   589
                unsigned sR, sG, sB, sA; \
icculus@1162
   590
                DISEMBLE_RGBA((Uint8 *)srcp, srcbpp, srcfmt, Pixel, \
icculus@1047
   591
                          sR, sG, sB, sA); \
icculus@1162
   592
                if ( (Pixel & rgbmask) != ckey ) { \
icculus@1047
   593
                      ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
icculus@1047
   594
                            sR, sG, sB, sA); \
icculus@1047
   595
                } \
icculus@1162
   596
                dstp = (Uint32 *) (((Uint8 *) dstp) + dstbpp); \
icculus@1162
   597
                srcp = (Uint32 *) (((Uint8 *) srcp) + srcbpp); \
icculus@1047
   598
                widthvar--; \
icculus@1047
   599
            } \
icculus@1047
   600
        } else { \
icculus@1047
   601
            while (condition) { \
icculus@1162
   602
                Uint32 Pixel; \
icculus@1047
   603
                unsigned sR, sG, sB; \
icculus@1162
   604
                RETRIEVE_RGB_PIXEL((Uint8 *)srcp, srcbpp, Pixel); \
icculus@1162
   605
                if ( Pixel != ckey ) { \
icculus@1162
   606
                    RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB); \
icculus@1047
   607
                    ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
icculus@1047
   608
                              sR, sG, sB, alpha); \
icculus@1047
   609
                } \
icculus@1162
   610
                dstp = (Uint32 *) (((Uint8 *)dstp) + dstbpp); \
icculus@1162
   611
                srcp = (Uint32 *) (((Uint8 *)srcp) + srcbpp); \
icculus@1047
   612
                widthvar--; \
icculus@1047
   613
            } \
icculus@1047
   614
        }
icculus@1047
   615
        int width = info->d_width;
icculus@1047
   616
        ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
icculus@1047
   617
        assert(width > 0);
icculus@1047
   618
        if (width > 0) {
icculus@1047
   619
            int extrawidth = (width % 4);
icculus@1047
   620
            vector unsigned char valigner = VEC_ALIGNER(srcp);
icculus@1047
   621
            vector unsigned int vs = vec_ld(0, srcp);
icculus@1047
   622
            width -= extrawidth;
icculus@1047
   623
            assert(width >= 4);
icculus@1047
   624
            while (width) {
icculus@1047
   625
                vector unsigned char vsel;
icculus@1047
   626
                vector unsigned int vd;
icculus@1047
   627
                vector unsigned int voverflow = vec_ld(15, srcp);
icculus@1047
   628
                /* load the source vec */
icculus@1047
   629
                vs = vec_perm(vs, voverflow, valigner);
icculus@1047
   630
                /* vsel is set for items that match the key */
icculus@1047
   631
                vsel = (vector unsigned char)vec_and(vs, vrgbmask);
icculus@1047
   632
                vsel = (vector unsigned char)vec_cmpeq(vs, vckey);
icculus@1047
   633
                /* permute the src vec to the dest format */
icculus@1047
   634
                vs = vec_perm(vs, valpha, vpermute);
icculus@1047
   635
                /* load the destination vec */
icculus@1047
   636
                vd = vec_ld(0, dstp);
icculus@1047
   637
                /* select the source and dest into vs */
icculus@1047
   638
                vd = (vector unsigned int)vec_sel((vector unsigned char)vs, (vector unsigned char)vd, vsel);
icculus@1047
   639
                
icculus@1047
   640
                vec_st(vd, 0, dstp);
icculus@1047
   641
                srcp += 4;
icculus@1047
   642
                width -= 4;
icculus@1047
   643
                dstp += 4;
icculus@1047
   644
                vs = voverflow;
icculus@1047
   645
            }
icculus@1047
   646
            ONE_PIXEL_BLEND((extrawidth), extrawidth);
icculus@1047
   647
#undef ONE_PIXEL_BLEND
icculus@1047
   648
            srcp += srcskip >> 2;
icculus@1047
   649
            dstp += dstskip >> 2;
icculus@1047
   650
        }
icculus@1047
   651
    }
icculus@1047
   652
}
icculus@1047
   653
icculus@1047
   654
/* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
icculus@1047
   655
/* Use this on a G5 */
icculus@1047
   656
static void ConvertAltivec32to32_noprefetch(SDL_BlitInfo *info)
icculus@1047
   657
{
icculus@1047
   658
    int height = info->d_height;
icculus@1047
   659
    Uint32 *src = (Uint32 *) info->s_pixels;
icculus@1047
   660
    int srcskip = info->s_skip;
icculus@1047
   661
    Uint32 *dst = (Uint32 *) info->d_pixels;
icculus@1047
   662
    int dstskip = info->d_skip;
icculus@1047
   663
    SDL_PixelFormat *srcfmt = info->src;
icculus@1047
   664
    SDL_PixelFormat *dstfmt = info->dst;
icculus@1047
   665
    vector unsigned int vzero = vec_splat_u32(0);
icculus@1047
   666
    vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
icculus@1047
   667
    if (dstfmt->Amask && !srcfmt->Amask) {
icculus@1047
   668
        if (srcfmt->alpha) {
icculus@1047
   669
            vector unsigned char valpha;
icculus@1047
   670
            ((unsigned char *)&valpha)[0] = srcfmt->alpha;
icculus@1047
   671
            vzero = (vector unsigned int)vec_splat(valpha, 0);
icculus@1047
   672
        }
icculus@1047
   673
    }
icculus@1047
   674
slouken@1339
   675
    assert(srcfmt->BytesPerPixel == 4);
slouken@1339
   676
    assert(dstfmt->BytesPerPixel == 4);
icculus@1047
   677
icculus@1047
   678
    while (height--) {
icculus@1047
   679
        vector unsigned char valigner;
icculus@1047
   680
        vector unsigned int vbits;
icculus@1047
   681
        vector unsigned int voverflow;
icculus@1047
   682
        Uint32 bits;
icculus@1047
   683
        Uint8 r, g, b, a;
icculus@1047
   684
icculus@1047
   685
        int width = info->d_width;
icculus@1047
   686
        int extrawidth;
icculus@1047
   687
icculus@1047
   688
        /* do scalar until we can align... */
icculus@1047
   689
        while ((UNALIGNED_PTR(dst)) && (width)) {
icculus@1047
   690
            bits = *(src++);
icculus@1047
   691
            RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
slouken@6294
   692
            if(!srcfmt->Amask)
slouken@6294
   693
              a = srcfmt->alpha;
icculus@1047
   694
            *(dst++) = MAKE8888(dstfmt, r, g, b, a);
icculus@1047
   695
            width--;
icculus@1047
   696
        }
icculus@1047
   697
icculus@1047
   698
        /* After all that work, here's the vector part! */
icculus@1047
   699
        extrawidth = (width % 4);
icculus@1047
   700
        width -= extrawidth;
icculus@1047
   701
        valigner = VEC_ALIGNER(src);
icculus@1047
   702
        vbits = vec_ld(0, src);
icculus@1047
   703
icculus@1047
   704
       while (width) {
icculus@1047
   705
            voverflow = vec_ld(15, src);
icculus@1047
   706
            src += 4;
icculus@1047
   707
            width -= 4;
icculus@1047
   708
            vbits = vec_perm(vbits, voverflow, valigner);  /* src is ready. */
icculus@1047
   709
            vbits = vec_perm(vbits, vzero, vpermute);  /* swizzle it. */
icculus@1047
   710
            vec_st(vbits, 0, dst);  /* store it back out. */
icculus@1047
   711
            dst += 4;
icculus@1047
   712
            vbits = voverflow;
icculus@1047
   713
        }
icculus@1047
   714
icculus@1047
   715
        assert(width == 0);
icculus@1047
   716
icculus@1047
   717
        /* cover pixels at the end of the row that didn't fit in 16 bytes. */
icculus@1047
   718
        while (extrawidth) {
icculus@1047
   719
            bits = *(src++);  /* max 7 pixels, don't bother with prefetch. */
icculus@1047
   720
            RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
slouken@6294
   721
            if(!srcfmt->Amask)
slouken@6294
   722
              a = srcfmt->alpha;
icculus@1047
   723
            *(dst++) = MAKE8888(dstfmt, r, g, b, a);
icculus@1047
   724
            extrawidth--;
icculus@1047
   725
        }
icculus@1047
   726
icculus@1047
   727
        src += srcskip >> 2;  /* move to next row, accounting for pitch. */
icculus@1047
   728
        dst += dstskip >> 2;
icculus@1047
   729
    }
icculus@1047
   730
icculus@1047
   731
}
icculus@1047
   732
icculus@1047
   733
/* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
icculus@1047
   734
/* Use this on a G4 */
icculus@1047
   735
static void ConvertAltivec32to32_prefetch(SDL_BlitInfo *info)
icculus@1047
   736
{
icculus@1047
   737
    const int scalar_dst_lead = sizeof (Uint32) * 4;
icculus@1047
   738
    const int vector_dst_lead = sizeof (Uint32) * 16;
icculus@1047
   739
icculus@1047
   740
    int height = info->d_height;
icculus@1047
   741
    Uint32 *src = (Uint32 *) info->s_pixels;
icculus@1047
   742
    int srcskip = info->s_skip;
icculus@1047
   743
    Uint32 *dst = (Uint32 *) info->d_pixels;
icculus@1047
   744
    int dstskip = info->d_skip;
icculus@1047
   745
    SDL_PixelFormat *srcfmt = info->src;
icculus@1047
   746
    SDL_PixelFormat *dstfmt = info->dst;
icculus@1047
   747
    vector unsigned int vzero = vec_splat_u32(0);
icculus@1047
   748
    vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
icculus@1047
   749
    if (dstfmt->Amask && !srcfmt->Amask) {
icculus@1047
   750
        if (srcfmt->alpha) {
icculus@1047
   751
            vector unsigned char valpha;
icculus@1047
   752
            ((unsigned char *)&valpha)[0] = srcfmt->alpha;
icculus@1047
   753
            vzero = (vector unsigned int)vec_splat(valpha, 0);
icculus@1047
   754
        }
icculus@1047
   755
    }
icculus@1047
   756
slouken@1339
   757
    assert(srcfmt->BytesPerPixel == 4);
slouken@1339
   758
    assert(dstfmt->BytesPerPixel == 4);
icculus@1047
   759
icculus@1047
   760
    while (height--) {
icculus@1047
   761
        vector unsigned char valigner;
icculus@1047
   762
        vector unsigned int vbits;
icculus@1047
   763
        vector unsigned int voverflow;
icculus@1047
   764
        Uint32 bits;
icculus@1047
   765
        Uint8 r, g, b, a;
icculus@1047
   766
icculus@1047
   767
        int width = info->d_width;
icculus@1047
   768
        int extrawidth;
icculus@1047
   769
icculus@1047
   770
        /* do scalar until we can align... */
icculus@1047
   771
        while ((UNALIGNED_PTR(dst)) && (width)) {
icculus@1047
   772
            vec_dstt(src+scalar_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_SRC);
icculus@1047
   773
            vec_dstst(dst+scalar_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_DEST);
icculus@1047
   774
            bits = *(src++);
icculus@1047
   775
            RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
slouken@6294
   776
            if(!srcfmt->Amask)
slouken@6294
   777
              a = srcfmt->alpha;
icculus@1047
   778
            *(dst++) = MAKE8888(dstfmt, r, g, b, a);
icculus@1047
   779
            width--;
icculus@1047
   780
        }
icculus@1047
   781
icculus@1047
   782
        /* After all that work, here's the vector part! */
icculus@1047
   783
        extrawidth = (width % 4);
icculus@1047
   784
        width -= extrawidth;
icculus@1047
   785
        valigner = VEC_ALIGNER(src);
icculus@1047
   786
        vbits = vec_ld(0, src);
icculus@1047
   787
icculus@1047
   788
        while (width) {
icculus@1047
   789
            vec_dstt(src+vector_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_SRC);
icculus@1047
   790
            vec_dstst(dst+vector_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_DEST);
icculus@1047
   791
            voverflow = vec_ld(15, src);
icculus@1047
   792
            src += 4;
icculus@1047
   793
            width -= 4;
icculus@1047
   794
            vbits = vec_perm(vbits, voverflow, valigner);  /* src is ready. */
icculus@1047
   795
            vbits = vec_perm(vbits, vzero, vpermute);  /* swizzle it. */
icculus@1047
   796
            vec_st(vbits, 0, dst);  /* store it back out. */
icculus@1047
   797
            dst += 4;
icculus@1047
   798
            vbits = voverflow;
icculus@1047
   799
        }
icculus@1047
   800
        
icculus@1047
   801
        assert(width == 0);
icculus@1047
   802
icculus@1047
   803
        /* cover pixels at the end of the row that didn't fit in 16 bytes. */
icculus@1047
   804
        while (extrawidth) {
icculus@1047
   805
            bits = *(src++);  /* max 7 pixels, don't bother with prefetch. */
icculus@1047
   806
            RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
slouken@6294
   807
            if(!srcfmt->Amask)
slouken@6294
   808
              a = srcfmt->alpha;
icculus@1047
   809
            *(dst++) = MAKE8888(dstfmt, r, g, b, a);
icculus@1047
   810
            extrawidth--;
icculus@1047
   811
        }
icculus@1047
   812
icculus@1047
   813
        src += srcskip >> 2;  /* move to next row, accounting for pitch. */
icculus@1047
   814
        dst += dstskip >> 2;
icculus@1047
   815
    }
icculus@1047
   816
icculus@1047
   817
    vec_dss(DST_CHAN_SRC);
icculus@1047
   818
    vec_dss(DST_CHAN_DEST);
icculus@1047
   819
}
icculus@1047
   820
icculus@1047
   821
static Uint32 GetBlitFeatures( void )
icculus@1047
   822
{
icculus@1047
   823
    static Uint32 features = 0xffffffff;
icculus@1047
   824
    if (features == 0xffffffff) {
icculus@1047
   825
        /* Provide an override for testing .. */
slouken@1336
   826
        char *override = SDL_getenv("SDL_ALTIVEC_BLIT_FEATURES");
icculus@1047
   827
        if (override) {
icculus@1047
   828
            features = 0;
slouken@1336
   829
            SDL_sscanf(override, "%u", &features);
icculus@1047
   830
        } else {
icculus@1047
   831
            features = ( 0
icculus@1047
   832
                /* Feature 1 is has-MMX */
icculus@1047
   833
                | ((SDL_HasMMX()) ? 1 : 0)
icculus@1047
   834
                /* Feature 2 is has-AltiVec */
icculus@1047
   835
                | ((SDL_HasAltiVec()) ? 2 : 0)
icculus@1047
   836
                /* Feature 4 is dont-use-prefetch */
icculus@1162
   837
                /* !!!! FIXME: Check for G5 or later, not the cache size! Always prefetch on a G4. */
icculus@1047
   838
                | ((GetL3CacheSize() == 0) ? 4 : 0)
icculus@1047
   839
            );
icculus@1047
   840
        }
icculus@1047
   841
    }
icculus@1047
   842
    return features;
icculus@1047
   843
}
slouken@1795
   844
#if __MWERKS__
slouken@1795
   845
#pragma altivec_model off
slouken@1795
   846
#endif
icculus@1047
   847
#else
icculus@1047
   848
/* Feature 1 is has-MMX */
icculus@1047
   849
#define GetBlitFeatures() ((Uint32)(SDL_HasMMX() ? 1 : 0))
icculus@1047
   850
#endif
icculus@1047
   851
icculus@1053
   852
/* This is now endian dependent */
slouken@1443
   853
#if SDL_BYTEORDER == SDL_LIL_ENDIAN
icculus@1053
   854
#define HI	1
icculus@1053
   855
#define LO	0
slouken@1443
   856
#else /* SDL_BYTEORDER == SDL_BIG_ENDIAN */
icculus@1053
   857
#define HI	0
icculus@1053
   858
#define LO	1
icculus@1053
   859
#endif
icculus@1053
   860
slouken@1361
   861
#if SDL_HERMES_BLITTERS
slouken@0
   862
slouken@0
   863
/* Heheheh, we coerce Hermes into using SDL blit information */
slouken@0
   864
#define X86_ASSEMBLER
slouken@0
   865
#define HermesConverterInterface	SDL_BlitInfo
slouken@0
   866
#define HermesClearInterface		void
slouken@0
   867
#define STACKCALL
slouken@0
   868
slouken@1361
   869
#include "../hermes/HeadMMX.h"
slouken@1361
   870
#include "../hermes/HeadX86.h"
slouken@0
   871
slouken@0
   872
#else
slouken@0
   873
slouken@0
   874
/* Special optimized blit for RGB 8-8-8 --> RGB 3-3-2 */
slouken@0
   875
#define RGB888_RGB332(dst, src) { \
slouken@1428
   876
	dst = (Uint8)((((src)&0x00E00000)>>16)| \
slouken@1428
   877
	              (((src)&0x0000E000)>>11)| \
slouken@1428
   878
	              (((src)&0x000000C0)>>6)); \
slouken@0
   879
}
slouken@0
   880
static void Blit_RGB888_index8(SDL_BlitInfo *info)
slouken@0
   881
{
slouken@0
   882
#ifndef USE_DUFFS_LOOP
slouken@0
   883
	int c;
slouken@0
   884
#endif
slouken@0
   885
	int width, height;
slouken@0
   886
	Uint32 *src;
slouken@91
   887
	const Uint8 *map;
slouken@91
   888
	Uint8 *dst;
slouken@0
   889
	int srcskip, dstskip;
slouken@0
   890
slouken@0
   891
	/* Set up some basic variables */
slouken@0
   892
	width = info->d_width;
slouken@0
   893
	height = info->d_height;
slouken@0
   894
	src = (Uint32 *)info->s_pixels;
slouken@0
   895
	srcskip = info->s_skip/4;
slouken@0
   896
	dst = info->d_pixels;
slouken@0
   897
	dstskip = info->d_skip;
slouken@0
   898
	map = info->table;
slouken@0
   899
slouken@0
   900
	if ( map == NULL ) {
slouken@0
   901
		while ( height-- ) {
slouken@0
   902
#ifdef USE_DUFFS_LOOP
slouken@0
   903
			DUFFS_LOOP(
slouken@0
   904
				RGB888_RGB332(*dst++, *src);
slouken@0
   905
			, width);
slouken@0
   906
#else
slouken@0
   907
			for ( c=width/4; c; --c ) {
slouken@0
   908
				/* Pack RGB into 8bit pixel */
slouken@0
   909
				++src;
slouken@0
   910
				RGB888_RGB332(*dst++, *src);
slouken@0
   911
				++src;
slouken@0
   912
				RGB888_RGB332(*dst++, *src);
slouken@0
   913
				++src;
slouken@0
   914
				RGB888_RGB332(*dst++, *src);
slouken@0
   915
				++src;
slouken@0
   916
			}
slouken@91
   917
			switch ( width & 3 ) {
slouken@0
   918
				case 3:
slouken@0
   919
					RGB888_RGB332(*dst++, *src);
slouken@0
   920
					++src;
slouken@0
   921
				case 2:
slouken@0
   922
					RGB888_RGB332(*dst++, *src);
slouken@0
   923
					++src;
slouken@0
   924
				case 1:
slouken@0
   925
					RGB888_RGB332(*dst++, *src);
slouken@0
   926
					++src;
slouken@0
   927
			}
slouken@0
   928
#endif /* USE_DUFFS_LOOP */
slouken@0
   929
			src += srcskip;
slouken@0
   930
			dst += dstskip;
slouken@0
   931
		}
slouken@0
   932
	} else {
icculus@1162
   933
		int Pixel;
slouken@0
   934
slouken@0
   935
		while ( height-- ) {
slouken@0
   936
#ifdef USE_DUFFS_LOOP
slouken@0
   937
			DUFFS_LOOP(
icculus@1162
   938
				RGB888_RGB332(Pixel, *src);
icculus@1162
   939
				*dst++ = map[Pixel];
slouken@0
   940
				++src;
slouken@0
   941
			, width);
slouken@0
   942
#else
slouken@0
   943
			for ( c=width/4; c; --c ) {
slouken@0
   944
				/* Pack RGB into 8bit pixel */
icculus@1162
   945
				RGB888_RGB332(Pixel, *src);
icculus@1162
   946
				*dst++ = map[Pixel];
slouken@0
   947
				++src;
icculus@1162
   948
				RGB888_RGB332(Pixel, *src);
icculus@1162
   949
				*dst++ = map[Pixel];
slouken@0
   950
				++src;
icculus@1162
   951
				RGB888_RGB332(Pixel, *src);
icculus@1162
   952
				*dst++ = map[Pixel];
slouken@0
   953
				++src;
icculus@1162
   954
				RGB888_RGB332(Pixel, *src);
icculus@1162
   955
				*dst++ = map[Pixel];
slouken@0
   956
				++src;
slouken@0
   957
			}
slouken@91
   958
			switch ( width & 3 ) {
slouken@0
   959
				case 3:
icculus@1162
   960
					RGB888_RGB332(Pixel, *src);
icculus@1162
   961
					*dst++ = map[Pixel];
slouken@0
   962
					++src;
slouken@0
   963
				case 2:
icculus@1162
   964
					RGB888_RGB332(Pixel, *src);
icculus@1162
   965
					*dst++ = map[Pixel];
slouken@0
   966
					++src;
slouken@0
   967
				case 1:
icculus@1162
   968
					RGB888_RGB332(Pixel, *src);
icculus@1162
   969
					*dst++ = map[Pixel];
slouken@0
   970
					++src;
slouken@0
   971
			}
slouken@0
   972
#endif /* USE_DUFFS_LOOP */
slouken@0
   973
			src += srcskip;
slouken@0
   974
			dst += dstskip;
slouken@0
   975
		}
slouken@0
   976
	}
slouken@0
   977
}
slouken@0
   978
/* Special optimized blit for RGB 8-8-8 --> RGB 5-5-5 */
slouken@0
   979
#define RGB888_RGB555(dst, src) { \
slouken@1428
   980
	*(Uint16 *)(dst) = (Uint16)((((*src)&0x00F80000)>>9)| \
slouken@1428
   981
	                            (((*src)&0x0000F800)>>6)| \
slouken@1428
   982
	                            (((*src)&0x000000F8)>>3)); \
slouken@0
   983
}
slouken@0
   984
#define RGB888_RGB555_TWO(dst, src) { \
slouken@0
   985
	*(Uint32 *)(dst) = (((((src[HI])&0x00F80000)>>9)| \
slouken@0
   986
	                     (((src[HI])&0x0000F800)>>6)| \
slouken@0
   987
	                     (((src[HI])&0x000000F8)>>3))<<16)| \
slouken@0
   988
	                     (((src[LO])&0x00F80000)>>9)| \
slouken@0
   989
	                     (((src[LO])&0x0000F800)>>6)| \
slouken@0
   990
	                     (((src[LO])&0x000000F8)>>3); \
slouken@0
   991
}
slouken@0
   992
static void Blit_RGB888_RGB555(SDL_BlitInfo *info)
slouken@0
   993
{
slouken@0
   994
#ifndef USE_DUFFS_LOOP
slouken@0
   995
	int c;
slouken@0
   996
#endif
slouken@0
   997
	int width, height;
slouken@0
   998
	Uint32 *src;
slouken@0
   999
	Uint16 *dst;
slouken@0
  1000
	int srcskip, dstskip;
slouken@0
  1001
slouken@0
  1002
	/* Set up some basic variables */
slouken@0
  1003
	width = info->d_width;
slouken@0
  1004
	height = info->d_height;
slouken@0
  1005
	src = (Uint32 *)info->s_pixels;
slouken@0
  1006
	srcskip = info->s_skip/4;
slouken@0
  1007
	dst = (Uint16 *)info->d_pixels;
slouken@0
  1008
	dstskip = info->d_skip/2;
slouken@0
  1009
slouken@0
  1010
#ifdef USE_DUFFS_LOOP
slouken@0
  1011
	while ( height-- ) {
slouken@0
  1012
		DUFFS_LOOP(
slouken@0
  1013
			RGB888_RGB555(dst, src);
slouken@0
  1014
			++src;
slouken@0
  1015
			++dst;
slouken@0
  1016
		, width);
slouken@0
  1017
		src += srcskip;
slouken@0
  1018
		dst += dstskip;
slouken@0
  1019
	}
slouken@0
  1020
#else
slouken@0
  1021
	/* Memory align at 4-byte boundary, if necessary */
slouken@0
  1022
	if ( (long)dst & 0x03 ) {
slouken@0
  1023
		/* Don't do anything if width is 0 */
slouken@0
  1024
		if ( width == 0 ) {
slouken@0
  1025
			return;
slouken@0
  1026
		}
slouken@0
  1027
		--width;
slouken@0
  1028
slouken@0
  1029
		while ( height-- ) {
slouken@0
  1030
			/* Perform copy alignment */
slouken@0
  1031
			RGB888_RGB555(dst, src);
slouken@0
  1032
			++src;
slouken@0
  1033
			++dst;
slouken@0
  1034
slouken@0
  1035
			/* Copy in 4 pixel chunks */
slouken@0
  1036
			for ( c=width/4; c; --c ) {
slouken@0
  1037
				RGB888_RGB555_TWO(dst, src);
slouken@0
  1038
				src += 2;
slouken@0
  1039
				dst += 2;
slouken@0
  1040
				RGB888_RGB555_TWO(dst, src);
slouken@0
  1041
				src += 2;
slouken@0
  1042
				dst += 2;
slouken@0
  1043
			}
slouken@0
  1044
			/* Get any leftovers */
slouken@91
  1045
			switch (width & 3) {
slouken@0
  1046
				case 3:
slouken@0
  1047
					RGB888_RGB555(dst, src);
slouken@0
  1048
					++src;
slouken@0
  1049
					++dst;
slouken@0
  1050
				case 2:
slouken@0
  1051
					RGB888_RGB555_TWO(dst, src);
slouken@0
  1052
					src += 2;
slouken@0
  1053
					dst += 2;
slouken@0
  1054
					break;
slouken@0
  1055
				case 1:
slouken@0
  1056
					RGB888_RGB555(dst, src);
slouken@0
  1057
					++src;
slouken@0
  1058
					++dst;
slouken@0
  1059
					break;
slouken@0
  1060
			}
slouken@0
  1061
			src += srcskip;
slouken@0
  1062
			dst += dstskip;
slouken@0
  1063
		}
slouken@0
  1064
	} else { 
slouken@0
  1065
		while ( height-- ) {
slouken@0
  1066
			/* Copy in 4 pixel chunks */
slouken@0
  1067
			for ( c=width/4; c; --c ) {
slouken@0
  1068
				RGB888_RGB555_TWO(dst, src);
slouken@0
  1069
				src += 2;
slouken@0
  1070
				dst += 2;
slouken@0
  1071
				RGB888_RGB555_TWO(dst, src);
slouken@0
  1072
				src += 2;
slouken@0
  1073
				dst += 2;
slouken@0
  1074
			}
slouken@0
  1075
			/* Get any leftovers */
slouken@91
  1076
			switch (width & 3) {
slouken@0
  1077
				case 3:
slouken@0
  1078
					RGB888_RGB555(dst, src);
slouken@0
  1079
					++src;
slouken@0
  1080
					++dst;
slouken@0
  1081
				case 2:
slouken@0
  1082
					RGB888_RGB555_TWO(dst, src);
slouken@0
  1083
					src += 2;
slouken@0
  1084
					dst += 2;
slouken@0
  1085
					break;
slouken@0
  1086
				case 1:
slouken@0
  1087
					RGB888_RGB555(dst, src);
slouken@0
  1088
					++src;
slouken@0
  1089
					++dst;
slouken@0
  1090
					break;
slouken@0
  1091
			}
slouken@0
  1092
			src += srcskip;
slouken@0
  1093
			dst += dstskip;
slouken@0
  1094
		}
slouken@0
  1095
	}
slouken@0
  1096
#endif /* USE_DUFFS_LOOP */
slouken@0
  1097
}
slouken@0
  1098
/* Special optimized blit for RGB 8-8-8 --> RGB 5-6-5 */
slouken@0
  1099
#define RGB888_RGB565(dst, src) { \
slouken@1428
  1100
	*(Uint16 *)(dst) = (Uint16)((((*src)&0x00F80000)>>8)| \
slouken@1428
  1101
	                            (((*src)&0x0000FC00)>>5)| \
slouken@1428
  1102
	                            (((*src)&0x000000F8)>>3)); \
slouken@0
  1103
}
slouken@0
  1104
#define RGB888_RGB565_TWO(dst, src) { \
slouken@0
  1105
	*(Uint32 *)(dst) = (((((src[HI])&0x00F80000)>>8)| \
slouken@0
  1106
	                     (((src[HI])&0x0000FC00)>>5)| \
slouken@0
  1107
	                     (((src[HI])&0x000000F8)>>3))<<16)| \
slouken@0
  1108
	                     (((src[LO])&0x00F80000)>>8)| \
slouken@0
  1109
	                     (((src[LO])&0x0000FC00)>>5)| \
slouken@0
  1110
	                     (((src[LO])&0x000000F8)>>3); \
slouken@0
  1111
}
slouken@0
  1112
static void Blit_RGB888_RGB565(SDL_BlitInfo *info)
slouken@0
  1113
{
slouken@0
  1114
#ifndef USE_DUFFS_LOOP
slouken@0
  1115
	int c;
slouken@0
  1116
#endif
slouken@0
  1117
	int width, height;
slouken@0
  1118
	Uint32 *src;
slouken@0
  1119
	Uint16 *dst;
slouken@0
  1120
	int srcskip, dstskip;
slouken@0
  1121
slouken@0
  1122
	/* Set up some basic variables */
slouken@0
  1123
	width = info->d_width;
slouken@0
  1124
	height = info->d_height;
slouken@0
  1125
	src = (Uint32 *)info->s_pixels;
slouken@0
  1126
	srcskip = info->s_skip/4;
slouken@0
  1127
	dst = (Uint16 *)info->d_pixels;
slouken@0
  1128
	dstskip = info->d_skip/2;
slouken@0
  1129
slouken@0
  1130
#ifdef USE_DUFFS_LOOP
slouken@0
  1131
	while ( height-- ) {
slouken@0
  1132
		DUFFS_LOOP(
slouken@0
  1133
			RGB888_RGB565(dst, src);
slouken@0
  1134
			++src;
slouken@0
  1135
			++dst;
slouken@0
  1136
		, width);
slouken@0
  1137
		src += srcskip;
slouken@0
  1138
		dst += dstskip;
slouken@0
  1139
	}
slouken@0
  1140
#else
slouken@0
  1141
	/* Memory align at 4-byte boundary, if necessary */
slouken@0
  1142
	if ( (long)dst & 0x03 ) {
slouken@0
  1143
		/* Don't do anything if width is 0 */
slouken@0
  1144
		if ( width == 0 ) {
slouken@0
  1145
			return;
slouken@0
  1146
		}
slouken@0
  1147
		--width;
slouken@0
  1148
slouken@0
  1149
		while ( height-- ) {
slouken@0
  1150
			/* Perform copy alignment */
slouken@0
  1151
			RGB888_RGB565(dst, src);
slouken@0
  1152
			++src;
slouken@0
  1153
			++dst;
slouken@0
  1154
slouken@0
  1155
			/* Copy in 4 pixel chunks */
slouken@0
  1156
			for ( c=width/4; c; --c ) {
slouken@0
  1157
				RGB888_RGB565_TWO(dst, src);
slouken@0
  1158
				src += 2;
slouken@0
  1159
				dst += 2;
slouken@0
  1160
				RGB888_RGB565_TWO(dst, src);
slouken@0
  1161
				src += 2;
slouken@0
  1162
				dst += 2;
slouken@0
  1163
			}
slouken@0
  1164
			/* Get any leftovers */
slouken@91
  1165
			switch (width & 3) {
slouken@0
  1166
				case 3:
slouken@0
  1167
					RGB888_RGB565(dst, src);
slouken@0
  1168
					++src;
slouken@0
  1169
					++dst;
slouken@0
  1170
				case 2:
slouken@0
  1171
					RGB888_RGB565_TWO(dst, src);
slouken@0
  1172
					src += 2;
slouken@0
  1173
					dst += 2;
slouken@0
  1174
					break;
slouken@0
  1175
				case 1:
slouken@0
  1176
					RGB888_RGB565(dst, src);
slouken@0
  1177
					++src;
slouken@0
  1178
					++dst;
slouken@0
  1179
					break;
slouken@0
  1180
			}
slouken@0
  1181
			src += srcskip;
slouken@0
  1182
			dst += dstskip;
slouken@0
  1183
		}
slouken@0
  1184
	} else { 
slouken@0
  1185
		while ( height-- ) {
slouken@0
  1186
			/* Copy in 4 pixel chunks */
slouken@0
  1187
			for ( c=width/4; c; --c ) {
slouken@0
  1188
				RGB888_RGB565_TWO(dst, src);
slouken@0
  1189
				src += 2;
slouken@0
  1190
				dst += 2;
slouken@0
  1191
				RGB888_RGB565_TWO(dst, src);
slouken@0
  1192
				src += 2;
slouken@0
  1193
				dst += 2;
slouken@0
  1194
			}
slouken@0
  1195
			/* Get any leftovers */
slouken@91
  1196
			switch (width & 3) {
slouken@0
  1197
				case 3:
slouken@0
  1198
					RGB888_RGB565(dst, src);
slouken@0
  1199
					++src;
slouken@0
  1200
					++dst;
slouken@0
  1201
				case 2:
slouken@0
  1202
					RGB888_RGB565_TWO(dst, src);
slouken@0
  1203
					src += 2;
slouken@0
  1204
					dst += 2;
slouken@0
  1205
					break;
slouken@0
  1206
				case 1:
slouken@0
  1207
					RGB888_RGB565(dst, src);
slouken@0
  1208
					++src;
slouken@0
  1209
					++dst;
slouken@0
  1210
					break;
slouken@0
  1211
			}
slouken@0
  1212
			src += srcskip;
slouken@0
  1213
			dst += dstskip;
slouken@0
  1214
		}
slouken@0
  1215
	}
slouken@0
  1216
#endif /* USE_DUFFS_LOOP */
slouken@0
  1217
}
slouken@0
  1218
slouken@1361
  1219
#endif /* SDL_HERMES_BLITTERS */
slouken@0
  1220
slouken@0
  1221
slouken@0
  1222
/* Special optimized blit for RGB 5-6-5 --> 32-bit RGB surfaces */
icculus@1047
  1223
#define RGB565_32(dst, src, map) (map[src[LO]*2] + map[src[HI]*2+1])
slouken@91
  1224
static void Blit_RGB565_32(SDL_BlitInfo *info, const Uint32 *map)
slouken@0
  1225
{
slouken@0
  1226
#ifndef USE_DUFFS_LOOP
slouken@0
  1227
	int c;
slouken@0
  1228
#endif
slouken@0
  1229
	int width, height;
slouken@0
  1230
	Uint8 *src;
slouken@0
  1231
	Uint32 *dst;
slouken@0
  1232
	int srcskip, dstskip;
slouken@0
  1233
slouken@0
  1234
	/* Set up some basic variables */
slouken@0
  1235
	width = info->d_width;
slouken@0
  1236
	height = info->d_height;
slouken@0
  1237
	src = (Uint8 *)info->s_pixels;
slouken@0
  1238
	srcskip = info->s_skip;
slouken@0
  1239
	dst = (Uint32 *)info->d_pixels;
slouken@0
  1240
	dstskip = info->d_skip/4;
slouken@0
  1241
slouken@0
  1242
#ifdef USE_DUFFS_LOOP
slouken@0
  1243
	while ( height-- ) {
slouken@0
  1244
		DUFFS_LOOP(
slouken@0
  1245
		{
slouken@0
  1246
			*dst++ = RGB565_32(dst, src, map);
slouken@0
  1247
			src += 2;
slouken@0
  1248
		},
slouken@0
  1249
		width);
slouken@0
  1250
		src += srcskip;
slouken@0
  1251
		dst += dstskip;
slouken@0
  1252
	}
slouken@0
  1253
#else
slouken@0
  1254
	while ( height-- ) {
slouken@0
  1255
		/* Copy in 4 pixel chunks */
slouken@0
  1256
		for ( c=width/4; c; --c ) {
slouken@0
  1257
			*dst++ = RGB565_32(dst, src, map);
slouken@0
  1258
			src += 2;
slouken@0
  1259
			*dst++ = RGB565_32(dst, src, map);
slouken@0
  1260
			src += 2;
slouken@0
  1261
			*dst++ = RGB565_32(dst, src, map);
slouken@0
  1262
			src += 2;
slouken@0
  1263
			*dst++ = RGB565_32(dst, src, map);
slouken@0
  1264
			src += 2;
slouken@0
  1265
		}
slouken@0
  1266
		/* Get any leftovers */
slouken@91
  1267
		switch (width & 3) {
slouken@0
  1268
			case 3:
slouken@0
  1269
				*dst++ = RGB565_32(dst, src, map);
slouken@0
  1270
				src += 2;
slouken@0
  1271
			case 2:
slouken@0
  1272
				*dst++ = RGB565_32(dst, src, map);
slouken@0
  1273
				src += 2;
slouken@0
  1274
			case 1:
slouken@0
  1275
				*dst++ = RGB565_32(dst, src, map);
slouken@0
  1276
				src += 2;
slouken@0
  1277
				break;
slouken@0
  1278
		}
slouken@0
  1279
		src += srcskip;
slouken@0
  1280
		dst += dstskip;
slouken@0
  1281
	}
slouken@0
  1282
#endif /* USE_DUFFS_LOOP */
slouken@0
  1283
}
slouken@0
  1284
slouken@0
  1285
/* Special optimized blit for RGB 5-6-5 --> ARGB 8-8-8-8 */
slouken@91
  1286
static const Uint32 RGB565_ARGB8888_LUT[512] = {
slouken@0
  1287
		0x00000000, 0xff000000, 0x00000008, 0xff002000,
slouken@0
  1288
		0x00000010, 0xff004000, 0x00000018, 0xff006100,
slouken@0
  1289
		0x00000020, 0xff008100, 0x00000029, 0xff00a100,
slouken@0
  1290
		0x00000031, 0xff00c200, 0x00000039, 0xff00e200,
slouken@0
  1291
		0x00000041, 0xff080000, 0x0000004a, 0xff082000,
slouken@0
  1292
		0x00000052, 0xff084000, 0x0000005a, 0xff086100,
slouken@0
  1293
		0x00000062, 0xff088100, 0x0000006a, 0xff08a100,
slouken@0
  1294
		0x00000073, 0xff08c200, 0x0000007b, 0xff08e200,
slouken@0
  1295
		0x00000083, 0xff100000, 0x0000008b, 0xff102000,
slouken@0
  1296
		0x00000094, 0xff104000, 0x0000009c, 0xff106100,
slouken@0
  1297
		0x000000a4, 0xff108100, 0x000000ac, 0xff10a100,
slouken@0
  1298
		0x000000b4, 0xff10c200, 0x000000bd, 0xff10e200,
slouken@0
  1299
		0x000000c5, 0xff180000, 0x000000cd, 0xff182000,
slouken@0
  1300
		0x000000d5, 0xff184000, 0x000000de, 0xff186100,
slouken@0
  1301
		0x000000e6, 0xff188100, 0x000000ee, 0xff18a100,
slouken@0
  1302
		0x000000f6, 0xff18c200, 0x000000ff, 0xff18e200,
slouken@0
  1303
		0x00000400, 0xff200000, 0x00000408, 0xff202000,
slouken@0
  1304
		0x00000410, 0xff204000, 0x00000418, 0xff206100,
slouken@0
  1305
		0x00000420, 0xff208100, 0x00000429, 0xff20a100,
slouken@0
  1306
		0x00000431, 0xff20c200, 0x00000439, 0xff20e200,
slouken@0
  1307
		0x00000441, 0xff290000, 0x0000044a, 0xff292000,
slouken@0
  1308
		0x00000452, 0xff294000, 0x0000045a, 0xff296100,
slouken@0
  1309
		0x00000462, 0xff298100, 0x0000046a, 0xff29a100,
slouken@0
  1310
		0x00000473, 0xff29c200, 0x0000047b, 0xff29e200,
slouken@0
  1311
		0x00000483, 0xff310000, 0x0000048b, 0xff312000,
slouken@0
  1312
		0x00000494, 0xff314000, 0x0000049c, 0xff316100,
slouken@0
  1313
		0x000004a4, 0xff318100, 0x000004ac, 0xff31a100,
slouken@0
  1314
		0x000004b4, 0xff31c200, 0x000004bd, 0xff31e200,
slouken@0
  1315
		0x000004c5, 0xff390000, 0x000004cd, 0xff392000,
slouken@0
  1316
		0x000004d5, 0xff394000, 0x000004de, 0xff396100,
slouken@0
  1317
		0x000004e6, 0xff398100, 0x000004ee, 0xff39a100,
slouken@0
  1318
		0x000004f6, 0xff39c200, 0x000004ff, 0xff39e200,
slouken@0
  1319
		0x00000800, 0xff410000, 0x00000808, 0xff412000,
slouken@0
  1320
		0x00000810, 0xff414000, 0x00000818, 0xff416100,
slouken@0
  1321
		0x00000820, 0xff418100, 0x00000829, 0xff41a100,
slouken@0
  1322
		0x00000831, 0xff41c200, 0x00000839, 0xff41e200,
slouken@0
  1323
		0x00000841, 0xff4a0000, 0x0000084a, 0xff4a2000,
slouken@0
  1324
		0x00000852, 0xff4a4000, 0x0000085a, 0xff4a6100,
slouken@0
  1325
		0x00000862, 0xff4a8100, 0x0000086a, 0xff4aa100,
slouken@0
  1326
		0x00000873, 0xff4ac200, 0x0000087b, 0xff4ae200,
slouken@0
  1327
		0x00000883, 0xff520000, 0x0000088b, 0xff522000,
slouken@0
  1328
		0x00000894, 0xff524000, 0x0000089c, 0xff526100,
slouken@0
  1329
		0x000008a4, 0xff528100, 0x000008ac, 0xff52a100,
slouken@0
  1330
		0x000008b4, 0xff52c200, 0x000008bd, 0xff52e200,
slouken@0
  1331
		0x000008c5, 0xff5a0000, 0x000008cd, 0xff5a2000,
slouken@0
  1332
		0x000008d5, 0xff5a4000, 0x000008de, 0xff5a6100,
slouken@0
  1333
		0x000008e6, 0xff5a8100, 0x000008ee, 0xff5aa100,
slouken@0
  1334
		0x000008f6, 0xff5ac200, 0x000008ff, 0xff5ae200,
slouken@0
  1335
		0x00000c00, 0xff620000, 0x00000c08, 0xff622000,
slouken@0
  1336
		0x00000c10, 0xff624000, 0x00000c18, 0xff626100,
slouken@0
  1337
		0x00000c20, 0xff628100, 0x00000c29, 0xff62a100,
slouken@0
  1338
		0x00000c31, 0xff62c200, 0x00000c39, 0xff62e200,
slouken@0
  1339
		0x00000c41, 0xff6a0000, 0x00000c4a, 0xff6a2000,
slouken@0
  1340
		0x00000c52, 0xff6a4000, 0x00000c5a, 0xff6a6100,
slouken@0
  1341
		0x00000c62, 0xff6a8100, 0x00000c6a, 0xff6aa100,
slouken@0
  1342
		0x00000c73, 0xff6ac200, 0x00000c7b, 0xff6ae200,
slouken@0
  1343
		0x00000c83, 0xff730000, 0x00000c8b, 0xff732000,
slouken@0
  1344
		0x00000c94, 0xff734000, 0x00000c9c, 0xff736100,
slouken@0
  1345
		0x00000ca4, 0xff738100, 0x00000cac, 0xff73a100,
slouken@0
  1346
		0x00000cb4, 0xff73c200, 0x00000cbd, 0xff73e200,
slouken@0
  1347
		0x00000cc5, 0xff7b0000, 0x00000ccd, 0xff7b2000,
slouken@0
  1348
		0x00000cd5, 0xff7b4000, 0x00000cde, 0xff7b6100,
slouken@0
  1349
		0x00000ce6, 0xff7b8100, 0x00000cee, 0xff7ba100,
slouken@0
  1350
		0x00000cf6, 0xff7bc200, 0x00000cff, 0xff7be200,
slouken@0
  1351
		0x00001000, 0xff830000, 0x00001008, 0xff832000,
slouken@0
  1352
		0x00001010, 0xff834000, 0x00001018, 0xff836100,
slouken@0
  1353
		0x00001020, 0xff838100, 0x00001029, 0xff83a100,
slouken@0
  1354
		0x00001031, 0xff83c200, 0x00001039, 0xff83e200,
slouken@0
  1355
		0x00001041, 0xff8b0000, 0x0000104a, 0xff8b2000,
slouken@0
  1356
		0x00001052, 0xff8b4000, 0x0000105a, 0xff8b6100,
slouken@0
  1357
		0x00001062, 0xff8b8100, 0x0000106a, 0xff8ba100,
slouken@0
  1358
		0x00001073, 0xff8bc200, 0x0000107b, 0xff8be200,
slouken@0
  1359
		0x00001083, 0xff940000, 0x0000108b, 0xff942000,
slouken@0
  1360
		0x00001094, 0xff944000, 0x0000109c, 0xff946100,
slouken@0
  1361
		0x000010a4, 0xff948100, 0x000010ac, 0xff94a100,
slouken@0
  1362
		0x000010b4, 0xff94c200, 0x000010bd, 0xff94e200,
slouken@0
  1363
		0x000010c5, 0xff9c0000, 0x000010cd, 0xff9c2000,
slouken@0
  1364
		0x000010d5, 0xff9c4000, 0x000010de, 0xff9c6100,
slouken@0
  1365
		0x000010e6, 0xff9c8100, 0x000010ee, 0xff9ca100,
slouken@0
  1366
		0x000010f6, 0xff9cc200, 0x000010ff, 0xff9ce200,
slouken@0
  1367
		0x00001400, 0xffa40000, 0x00001408, 0xffa42000,
slouken@0
  1368
		0x00001410, 0xffa44000, 0x00001418, 0xffa46100,
slouken@0
  1369
		0x00001420, 0xffa48100, 0x00001429, 0xffa4a100,
slouken@0
  1370
		0x00001431, 0xffa4c200, 0x00001439, 0xffa4e200,
slouken@0
  1371
		0x00001441, 0xffac0000, 0x0000144a, 0xffac2000,
slouken@0
  1372
		0x00001452, 0xffac4000, 0x0000145a, 0xffac6100,
slouken@0
  1373
		0x00001462, 0xffac8100, 0x0000146a, 0xffaca100,
slouken@0
  1374
		0x00001473, 0xffacc200, 0x0000147b, 0xfface200,
slouken@0
  1375
		0x00001483, 0xffb40000, 0x0000148b, 0xffb42000,
slouken@0
  1376
		0x00001494, 0xffb44000, 0x0000149c, 0xffb46100,
slouken@0
  1377
		0x000014a4, 0xffb48100, 0x000014ac, 0xffb4a100,
slouken@0
  1378
		0x000014b4, 0xffb4c200, 0x000014bd, 0xffb4e200,
slouken@0
  1379
		0x000014c5, 0xffbd0000, 0x000014cd, 0xffbd2000,
slouken@0
  1380
		0x000014d5, 0xffbd4000, 0x000014de, 0xffbd6100,
slouken@0
  1381
		0x000014e6, 0xffbd8100, 0x000014ee, 0xffbda100,
slouken@0
  1382
		0x000014f6, 0xffbdc200, 0x000014ff, 0xffbde200,
slouken@0
  1383
		0x00001800, 0xffc50000, 0x00001808, 0xffc52000,
slouken@0
  1384
		0x00001810, 0xffc54000, 0x00001818, 0xffc56100,
slouken@0
  1385
		0x00001820, 0xffc58100, 0x00001829, 0xffc5a100,
slouken@0
  1386
		0x00001831, 0xffc5c200, 0x00001839, 0xffc5e200,
slouken@0
  1387
		0x00001841, 0xffcd0000, 0x0000184a, 0xffcd2000,
slouken@0
  1388
		0x00001852, 0xffcd4000, 0x0000185a, 0xffcd6100,
slouken@0
  1389
		0x00001862, 0xffcd8100, 0x0000186a, 0xffcda100,
slouken@0
  1390
		0x00001873, 0xffcdc200, 0x0000187b, 0xffcde200,
slouken@0
  1391
		0x00001883, 0xffd50000, 0x0000188b, 0xffd52000,
slouken@0
  1392
		0x00001894, 0xffd54000, 0x0000189c, 0xffd56100,
slouken@0
  1393
		0x000018a4, 0xffd58100, 0x000018ac, 0xffd5a100,
slouken@0
  1394
		0x000018b4, 0xffd5c200, 0x000018bd, 0xffd5e200,
slouken@0
  1395
		0x000018c5, 0xffde0000, 0x000018cd, 0xffde2000,
slouken@0
  1396
		0x000018d5, 0xffde4000, 0x000018de, 0xffde6100,
slouken@0
  1397
		0x000018e6, 0xffde8100, 0x000018ee, 0xffdea100,
slouken@0
  1398
		0x000018f6, 0xffdec200, 0x000018ff, 0xffdee200,
slouken@0
  1399
		0x00001c00, 0xffe60000, 0x00001c08, 0xffe62000,
slouken@0
  1400
		0x00001c10, 0xffe64000, 0x00001c18, 0xffe66100,
slouken@0
  1401
		0x00001c20, 0xffe68100, 0x00001c29, 0xffe6a100,
slouken@0
  1402
		0x00001c31, 0xffe6c200, 0x00001c39, 0xffe6e200,
slouken@0
  1403
		0x00001c41, 0xffee0000, 0x00001c4a, 0xffee2000,
slouken@0
  1404
		0x00001c52, 0xffee4000, 0x00001c5a, 0xffee6100,
slouken@0
  1405
		0x00001c62, 0xffee8100, 0x00001c6a, 0xffeea100,
slouken@0
  1406
		0x00001c73, 0xffeec200, 0x00001c7b, 0xffeee200,
slouken@0
  1407
		0x00001c83, 0xfff60000, 0x00001c8b, 0xfff62000,
slouken@0
  1408
		0x00001c94, 0xfff64000, 0x00001c9c, 0xfff66100,
slouken@0
  1409
		0x00001ca4, 0xfff68100, 0x00001cac, 0xfff6a100,
slouken@0
  1410
		0x00001cb4, 0xfff6c200, 0x00001cbd, 0xfff6e200,
slouken@0
  1411
		0x00001cc5, 0xffff0000, 0x00001ccd, 0xffff2000,
slouken@0
  1412
		0x00001cd5, 0xffff4000, 0x00001cde, 0xffff6100,
slouken@0
  1413
		0x00001ce6, 0xffff8100, 0x00001cee, 0xffffa100,
slouken@0
  1414
		0x00001cf6, 0xffffc200, 0x00001cff, 0xffffe200
slouken@0
  1415
};
slouken@0
  1416
static void Blit_RGB565_ARGB8888(SDL_BlitInfo *info)
slouken@0
  1417
{
slouken@0
  1418
    Blit_RGB565_32(info, RGB565_ARGB8888_LUT);
slouken@0
  1419
}
slouken@0
  1420
slouken@0
  1421
/* Special optimized blit for RGB 5-6-5 --> ABGR 8-8-8-8 */
slouken@91
  1422
static const Uint32 RGB565_ABGR8888_LUT[512] = {
slouken@0
  1423
		0xff000000, 0x00000000, 0xff080000, 0x00002000,
slouken@0
  1424
		0xff100000, 0x00004000, 0xff180000, 0x00006100,
slouken@0
  1425
		0xff200000, 0x00008100, 0xff290000, 0x0000a100,
slouken@0
  1426
		0xff310000, 0x0000c200, 0xff390000, 0x0000e200,
slouken@0
  1427
		0xff410000, 0x00000008, 0xff4a0000, 0x00002008,
slouken@0
  1428
		0xff520000, 0x00004008, 0xff5a0000, 0x00006108,
slouken@0
  1429
		0xff620000, 0x00008108, 0xff6a0000, 0x0000a108,
slouken@0
  1430
		0xff730000, 0x0000c208, 0xff7b0000, 0x0000e208,
slouken@0
  1431
		0xff830000, 0x00000010, 0xff8b0000, 0x00002010,
slouken@0
  1432
		0xff940000, 0x00004010, 0xff9c0000, 0x00006110,
slouken@0
  1433
		0xffa40000, 0x00008110, 0xffac0000, 0x0000a110,
slouken@0
  1434
		0xffb40000, 0x0000c210, 0xffbd0000, 0x0000e210,
slouken@0
  1435
		0xffc50000, 0x00000018, 0xffcd0000, 0x00002018,
slouken@0
  1436
		0xffd50000, 0x00004018, 0xffde0000, 0x00006118,
slouken@0
  1437
		0xffe60000, 0x00008118, 0xffee0000, 0x0000a118,
slouken@0
  1438
		0xfff60000, 0x0000c218, 0xffff0000, 0x0000e218,
slouken@0
  1439
		0xff000400, 0x00000020, 0xff080400, 0x00002020,
slouken@0
  1440
		0xff100400, 0x00004020, 0xff180400, 0x00006120,
slouken@0
  1441
		0xff200400, 0x00008120, 0xff290400, 0x0000a120,
slouken@0
  1442
		0xff310400, 0x0000c220, 0xff390400, 0x0000e220,
slouken@0
  1443
		0xff410400, 0x00000029, 0xff4a0400, 0x00002029,
slouken@0
  1444
		0xff520400, 0x00004029, 0xff5a0400, 0x00006129,
slouken@0
  1445
		0xff620400, 0x00008129, 0xff6a0400, 0x0000a129,
slouken@0
  1446
		0xff730400, 0x0000c229, 0xff7b0400, 0x0000e229,
slouken@0
  1447
		0xff830400, 0x00000031, 0xff8b0400, 0x00002031,
slouken@0
  1448
		0xff940400, 0x00004031, 0xff9c0400, 0x00006131,
slouken@0
  1449
		0xffa40400, 0x00008131, 0xffac0400, 0x0000a131,
slouken@0
  1450
		0xffb40400, 0x0000c231, 0xffbd0400, 0x0000e231,
slouken@0
  1451
		0xffc50400, 0x00000039, 0xffcd0400, 0x00002039,
slouken@0
  1452
		0xffd50400, 0x00004039, 0xffde0400, 0x00006139,
slouken@0
  1453
		0xffe60400, 0x00008139, 0xffee0400, 0x0000a139,
slouken@0
  1454
		0xfff60400, 0x0000c239, 0xffff0400, 0x0000e239,
slouken@0
  1455
		0xff000800, 0x00000041, 0xff080800, 0x00002041,
slouken@0
  1456
		0xff100800, 0x00004041, 0xff180800, 0x00006141,
slouken@0
  1457
		0xff200800, 0x00008141, 0xff290800, 0x0000a141,
slouken@0
  1458
		0xff310800, 0x0000c241, 0xff390800, 0x0000e241,
slouken@0
  1459
		0xff410800, 0x0000004a, 0xff4a0800, 0x0000204a,
slouken@0
  1460
		0xff520800, 0x0000404a, 0xff5a0800, 0x0000614a,
slouken@0
  1461
		0xff620800, 0x0000814a, 0xff6a0800, 0x0000a14a,
slouken@0
  1462
		0xff730800, 0x0000c24a, 0xff7b0800, 0x0000e24a,
slouken@0
  1463
		0xff830800, 0x00000052, 0xff8b0800, 0x00002052,
slouken@0
  1464
		0xff940800, 0x00004052, 0xff9c0800, 0x00006152,
slouken@0
  1465
		0xffa40800, 0x00008152, 0xffac0800, 0x0000a152,
slouken@0
  1466
		0xffb40800, 0x0000c252, 0xffbd0800, 0x0000e252,
slouken@0
  1467
		0xffc50800, 0x0000005a, 0xffcd0800, 0x0000205a,
slouken@0
  1468
		0xffd50800, 0x0000405a, 0xffde0800, 0x0000615a,
slouken@0
  1469
		0xffe60800, 0x0000815a, 0xffee0800, 0x0000a15a,
slouken@0
  1470
		0xfff60800, 0x0000c25a, 0xffff0800, 0x0000e25a,
slouken@0
  1471
		0xff000c00, 0x00000062, 0xff080c00, 0x00002062,
slouken@0
  1472
		0xff100c00, 0x00004062, 0xff180c00, 0x00006162,
slouken@0
  1473
		0xff200c00, 0x00008162, 0xff290c00, 0x0000a162,
slouken@0
  1474
		0xff310c00, 0x0000c262, 0xff390c00, 0x0000e262,
slouken@0
  1475
		0xff410c00, 0x0000006a, 0xff4a0c00, 0x0000206a,
slouken@0
  1476
		0xff520c00, 0x0000406a, 0xff5a0c00, 0x0000616a,
slouken@0
  1477
		0xff620c00, 0x0000816a, 0xff6a0c00, 0x0000a16a,
slouken@0
  1478
		0xff730c00, 0x0000c26a, 0xff7b0c00, 0x0000e26a,
slouken@0
  1479
		0xff830c00, 0x00000073, 0xff8b0c00, 0x00002073,
slouken@0
  1480
		0xff940c00, 0x00004073, 0xff9c0c00, 0x00006173,
slouken@0
  1481
		0xffa40c00, 0x00008173, 0xffac0c00, 0x0000a173,
slouken@0
  1482
		0xffb40c00, 0x0000c273, 0xffbd0c00, 0x0000e273,
slouken@0
  1483
		0xffc50c00, 0x0000007b, 0xffcd0c00, 0x0000207b,
slouken@0
  1484
		0xffd50c00, 0x0000407b, 0xffde0c00, 0x0000617b,
slouken@0
  1485
		0xffe60c00, 0x0000817b, 0xffee0c00, 0x0000a17b,
slouken@0
  1486
		0xfff60c00, 0x0000c27b, 0xffff0c00, 0x0000e27b,
slouken@0
  1487
		0xff001000, 0x00000083, 0xff081000, 0x00002083,
slouken@0
  1488
		0xff101000, 0x00004083, 0xff181000, 0x00006183,
slouken@0
  1489
		0xff201000, 0x00008183, 0xff291000, 0x0000a183,
slouken@0
  1490
		0xff311000, 0x0000c283, 0xff391000, 0x0000e283,
slouken@0
  1491
		0xff411000, 0x0000008b, 0xff4a1000, 0x0000208b,
slouken@0
  1492
		0xff521000, 0x0000408b, 0xff5a1000, 0x0000618b,
slouken@0
  1493
		0xff621000, 0x0000818b, 0xff6a1000, 0x0000a18b,
slouken@0
  1494
		0xff731000, 0x0000c28b, 0xff7b1000, 0x0000e28b,
slouken@0
  1495
		0xff831000, 0x00000094, 0xff8b1000, 0x00002094,
slouken@0
  1496
		0xff941000, 0x00004094, 0xff9c1000, 0x00006194,
slouken@0
  1497
		0xffa41000, 0x00008194, 0xffac1000, 0x0000a194,
slouken@0
  1498
		0xffb41000, 0x0000c294, 0xffbd1000, 0x0000e294,
slouken@0
  1499
		0xffc51000, 0x0000009c, 0xffcd1000, 0x0000209c,
slouken@0
  1500
		0xffd51000, 0x0000409c, 0xffde1000, 0x0000619c,
slouken@0
  1501
		0xffe61000, 0x0000819c, 0xffee1000, 0x0000a19c,
slouken@0
  1502
		0xfff61000, 0x0000c29c, 0xffff1000, 0x0000e29c,
slouken@0
  1503
		0xff001400, 0x000000a4, 0xff081400, 0x000020a4,
slouken@0
  1504
		0xff101400, 0x000040a4, 0xff181400, 0x000061a4,
slouken@0
  1505
		0xff201400, 0x000081a4, 0xff291400, 0x0000a1a4,
slouken@0
  1506
		0xff311400, 0x0000c2a4, 0xff391400, 0x0000e2a4,
slouken@0
  1507
		0xff411400, 0x000000ac, 0xff4a1400, 0x000020ac,
slouken@0
  1508
		0xff521400, 0x000040ac, 0xff5a1400, 0x000061ac,
slouken@0
  1509
		0xff621400, 0x000081ac, 0xff6a1400, 0x0000a1ac,
slouken@0
  1510
		0xff731400, 0x0000c2ac, 0xff7b1400, 0x0000e2ac,
slouken@0
  1511
		0xff831400, 0x000000b4, 0xff8b1400, 0x000020b4,
slouken@0
  1512
		0xff941400, 0x000040b4, 0xff9c1400, 0x000061b4,
slouken@0
  1513
		0xffa41400, 0x000081b4, 0xffac1400, 0x0000a1b4,
slouken@0
  1514
		0xffb41400, 0x0000c2b4, 0xffbd1400, 0x0000e2b4,
slouken@0
  1515
		0xffc51400, 0x000000bd, 0xffcd1400, 0x000020bd,
slouken@0
  1516
		0xffd51400, 0x000040bd, 0xffde1400, 0x000061bd,
slouken@0
  1517
		0xffe61400, 0x000081bd, 0xffee1400, 0x0000a1bd,
slouken@0
  1518
		0xfff61400, 0x0000c2bd, 0xffff1400, 0x0000e2bd,
slouken@0
  1519
		0xff001800, 0x000000c5, 0xff081800, 0x000020c5,
slouken@0
  1520
		0xff101800, 0x000040c5, 0xff181800, 0x000061c5,
slouken@0
  1521
		0xff201800, 0x000081c5, 0xff291800, 0x0000a1c5,
slouken@0
  1522
		0xff311800, 0x0000c2c5, 0xff391800, 0x0000e2c5,
slouken@0
  1523
		0xff411800, 0x000000cd, 0xff4a1800, 0x000020cd,
slouken@0
  1524
		0xff521800, 0x000040cd, 0xff5a1800, 0x000061cd,
slouken@0
  1525
		0xff621800, 0x000081cd, 0xff6a1800, 0x0000a1cd,
slouken@0
  1526
		0xff731800, 0x0000c2cd, 0xff7b1800, 0x0000e2cd,
slouken@0
  1527
		0xff831800, 0x000000d5, 0xff8b1800, 0x000020d5,
slouken@0
  1528
		0xff941800, 0x000040d5, 0xff9c1800, 0x000061d5,
slouken@0
  1529
		0xffa41800, 0x000081d5, 0xffac1800, 0x0000a1d5,
slouken@0
  1530
		0xffb41800, 0x0000c2d5, 0xffbd1800, 0x0000e2d5,
slouken@0
  1531
		0xffc51800, 0x000000de, 0xffcd1800, 0x000020de,
slouken@0
  1532
		0xffd51800, 0x000040de, 0xffde1800, 0x000061de,
slouken@0
  1533
		0xffe61800, 0x000081de, 0xffee1800, 0x0000a1de,
slouken@0
  1534
		0xfff61800, 0x0000c2de, 0xffff1800, 0x0000e2de,
slouken@0
  1535
		0xff001c00, 0x000000e6, 0xff081c00, 0x000020e6,
slouken@0
  1536
		0xff101c00, 0x000040e6, 0xff181c00, 0x000061e6,
slouken@0
  1537
		0xff201c00, 0x000081e6, 0xff291c00, 0x0000a1e6,
slouken@0
  1538
		0xff311c00, 0x0000c2e6, 0xff391c00, 0x0000e2e6,
slouken@0
  1539
		0xff411c00, 0x000000ee, 0xff4a1c00, 0x000020ee,
slouken@0
  1540
		0xff521c00, 0x000040ee, 0xff5a1c00, 0x000061ee,
slouken@0
  1541
		0xff621c00, 0x000081ee, 0xff6a1c00, 0x0000a1ee,
slouken@0
  1542
		0xff731c00, 0x0000c2ee, 0xff7b1c00, 0x0000e2ee,
slouken@0
  1543
		0xff831c00, 0x000000f6, 0xff8b1c00, 0x000020f6,
slouken@0
  1544
		0xff941c00, 0x000040f6, 0xff9c1c00, 0x000061f6,
slouken@0
  1545
		0xffa41c00, 0x000081f6, 0xffac1c00, 0x0000a1f6,
slouken@0
  1546
		0xffb41c00, 0x0000c2f6, 0xffbd1c00, 0x0000e2f6,
slouken@0
  1547
		0xffc51c00, 0x000000ff, 0xffcd1c00, 0x000020ff,
slouken@0
  1548
		0xffd51c00, 0x000040ff, 0xffde1c00, 0x000061ff,
slouken@0
  1549
		0xffe61c00, 0x000081ff, 0xffee1c00, 0x0000a1ff,
slouken@0
  1550
		0xfff61c00, 0x0000c2ff, 0xffff1c00, 0x0000e2ff
slouken@0
  1551
};
slouken@0
  1552
static void Blit_RGB565_ABGR8888(SDL_BlitInfo *info)
slouken@0
  1553
{
slouken@0
  1554
    Blit_RGB565_32(info, RGB565_ABGR8888_LUT);
slouken@0
  1555
}
slouken@0
  1556
slouken@0
  1557
/* Special optimized blit for RGB 5-6-5 --> RGBA 8-8-8-8 */
slouken@91
  1558
static const Uint32 RGB565_RGBA8888_LUT[512] = {
slouken@0
  1559
		0x000000ff, 0x00000000, 0x000008ff, 0x00200000,
slouken@0
  1560
		0x000010ff, 0x00400000, 0x000018ff, 0x00610000,
slouken@0
  1561
		0x000020ff, 0x00810000, 0x000029ff, 0x00a10000,
slouken@0
  1562
		0x000031ff, 0x00c20000, 0x000039ff, 0x00e20000,
slouken@0
  1563
		0x000041ff, 0x08000000, 0x00004aff, 0x08200000,
slouken@0
  1564
		0x000052ff, 0x08400000, 0x00005aff, 0x08610000,
slouken@0
  1565
		0x000062ff, 0x08810000, 0x00006aff, 0x08a10000,
slouken@0
  1566
		0x000073ff, 0x08c20000, 0x00007bff, 0x08e20000,
slouken@0
  1567
		0x000083ff, 0x10000000, 0x00008bff, 0x10200000,
slouken@0
  1568
		0x000094ff, 0x10400000, 0x00009cff, 0x10610000,
slouken@0
  1569
		0x0000a4ff, 0x10810000, 0x0000acff, 0x10a10000,
slouken@0
  1570
		0x0000b4ff, 0x10c20000, 0x0000bdff, 0x10e20000,
slouken@0
  1571
		0x0000c5ff, 0x18000000, 0x0000cdff, 0x18200000,
slouken@0
  1572
		0x0000d5ff, 0x18400000, 0x0000deff, 0x18610000,
slouken@0
  1573
		0x0000e6ff, 0x18810000, 0x0000eeff, 0x18a10000,
slouken@0
  1574
		0x0000f6ff, 0x18c20000, 0x0000ffff, 0x18e20000,
slouken@0
  1575
		0x000400ff, 0x20000000, 0x000408ff, 0x20200000,
slouken@0
  1576
		0x000410ff, 0x20400000, 0x000418ff, 0x20610000,
slouken@0
  1577
		0x000420ff, 0x20810000, 0x000429ff, 0x20a10000,
slouken@0
  1578
		0x000431ff, 0x20c20000, 0x000439ff, 0x20e20000,
slouken@0
  1579
		0x000441ff, 0x29000000, 0x00044aff, 0x29200000,
slouken@0
  1580
		0x000452ff, 0x29400000, 0x00045aff, 0x29610000,
slouken@0
  1581
		0x000462ff, 0x29810000, 0x00046aff, 0x29a10000,
slouken@0
  1582
		0x000473ff, 0x29c20000, 0x00047bff, 0x29e20000,
slouken@0
  1583
		0x000483ff, 0x31000000, 0x00048bff, 0x31200000,
slouken@0
  1584
		0x000494ff, 0x31400000, 0x00049cff, 0x31610000,
slouken@0
  1585
		0x0004a4ff, 0x31810000, 0x0004acff, 0x31a10000,
slouken@0
  1586
		0x0004b4ff, 0x31c20000, 0x0004bdff, 0x31e20000,
slouken@0
  1587
		0x0004c5ff, 0x39000000, 0x0004cdff, 0x39200000,
slouken@0
  1588
		0x0004d5ff, 0x39400000, 0x0004deff, 0x39610000,
slouken@0
  1589
		0x0004e6ff, 0x39810000, 0x0004eeff, 0x39a10000,
slouken@0
  1590
		0x0004f6ff, 0x39c20000, 0x0004ffff, 0x39e20000,
slouken@0
  1591
		0x000800ff, 0x41000000, 0x000808ff, 0x41200000,
slouken@0
  1592
		0x000810ff, 0x41400000, 0x000818ff, 0x41610000,
slouken@0
  1593
		0x000820ff, 0x41810000, 0x000829ff, 0x41a10000,
slouken@0
  1594
		0x000831ff, 0x41c20000, 0x000839ff, 0x41e20000,
slouken@0
  1595
		0x000841ff, 0x4a000000, 0x00084aff, 0x4a200000,
slouken@0
  1596
		0x000852ff, 0x4a400000, 0x00085aff, 0x4a610000,
slouken@0
  1597
		0x000862ff, 0x4a810000, 0x00086aff, 0x4aa10000,
slouken@0
  1598
		0x000873ff, 0x4ac20000, 0x00087bff, 0x4ae20000,
slouken@0
  1599
		0x000883ff, 0x52000000, 0x00088bff, 0x52200000,
slouken@0
  1600
		0x000894ff, 0x52400000, 0x00089cff, 0x52610000,
slouken@0
  1601
		0x0008a4ff, 0x52810000, 0x0008acff, 0x52a10000,
slouken@0
  1602
		0x0008b4ff, 0x52c20000, 0x0008bdff, 0x52e20000,
slouken@0
  1603
		0x0008c5ff, 0x5a000000, 0x0008cdff, 0x5a200000,
slouken@0
  1604
		0x0008d5ff, 0x5a400000, 0x0008deff, 0x5a610000,
slouken@0
  1605
		0x0008e6ff, 0x5a810000, 0x0008eeff, 0x5aa10000,
slouken@0
  1606
		0x0008f6ff, 0x5ac20000, 0x0008ffff, 0x5ae20000,
slouken@0
  1607
		0x000c00ff, 0x62000000, 0x000c08ff, 0x62200000,
slouken@0
  1608
		0x000c10ff, 0x62400000, 0x000c18ff, 0x62610000,
slouken@0
  1609
		0x000c20ff, 0x62810000, 0x000c29ff, 0x62a10000,
slouken@0
  1610
		0x000c31ff, 0x62c20000, 0x000c39ff, 0x62e20000,
slouken@0
  1611
		0x000c41ff, 0x6a000000, 0x000c4aff, 0x6a200000,
slouken@0
  1612
		0x000c52ff, 0x6a400000, 0x000c5aff, 0x6a610000,
slouken@0
  1613
		0x000c62ff, 0x6a810000, 0x000c6aff, 0x6aa10000,
slouken@0
  1614
		0x000c73ff, 0x6ac20000, 0x000c7bff, 0x6ae20000,
slouken@0
  1615
		0x000c83ff, 0x73000000, 0x000c8bff, 0x73200000,
slouken@0
  1616
		0x000c94ff, 0x73400000, 0x000c9cff, 0x73610000,
slouken@0
  1617
		0x000ca4ff, 0x73810000, 0x000cacff, 0x73a10000,
slouken@0
  1618
		0x000cb4ff, 0x73c20000, 0x000cbdff, 0x73e20000,
slouken@0
  1619
		0x000cc5ff, 0x7b000000, 0x000ccdff, 0x7b200000,
slouken@0
  1620
		0x000cd5ff, 0x7b400000, 0x000cdeff, 0x7b610000,
slouken@0
  1621
		0x000ce6ff, 0x7b810000, 0x000ceeff, 0x7ba10000,
slouken@0
  1622
		0x000cf6ff, 0x7bc20000, 0x000cffff, 0x7be20000,
slouken@0
  1623
		0x001000ff, 0x83000000, 0x001008ff, 0x83200000,
slouken@0
  1624
		0x001010ff, 0x83400000, 0x001018ff, 0x83610000,
slouken@0
  1625
		0x001020ff, 0x83810000, 0x001029ff, 0x83a10000,
slouken@0
  1626
		0x001031ff, 0x83c20000, 0x001039ff, 0x83e20000,
slouken@0
  1627
		0x001041ff, 0x8b000000, 0x00104aff, 0x8b200000,
slouken@0
  1628
		0x001052ff, 0x8b400000, 0x00105aff, 0x8b610000,
slouken@0
  1629
		0x001062ff, 0x8b810000, 0x00106aff, 0x8ba10000,
slouken@0
  1630
		0x001073ff, 0x8bc20000, 0x00107bff, 0x8be20000,
slouken@0
  1631
		0x001083ff, 0x94000000, 0x00108bff, 0x94200000,
slouken@0
  1632
		0x001094ff, 0x94400000, 0x00109cff, 0x94610000,
slouken@0
  1633
		0x0010a4ff, 0x94810000, 0x0010acff, 0x94a10000,
slouken@0
  1634
		0x0010b4ff, 0x94c20000, 0x0010bdff, 0x94e20000,
slouken@0
  1635
		0x0010c5ff, 0x9c000000, 0x0010cdff, 0x9c200000,
slouken@0
  1636
		0x0010d5ff, 0x9c400000, 0x0010deff, 0x9c610000,
slouken@0
  1637
		0x0010e6ff, 0x9c810000, 0x0010eeff, 0x9ca10000,
slouken@0
  1638
		0x0010f6ff, 0x9cc20000, 0x0010ffff, 0x9ce20000,
slouken@0
  1639
		0x001400ff, 0xa4000000, 0x001408ff, 0xa4200000,
slouken@0
  1640
		0x001410ff, 0xa4400000, 0x001418ff, 0xa4610000,
slouken@0
  1641
		0x001420ff, 0xa4810000, 0x001429ff, 0xa4a10000,
slouken@0
  1642
		0x001431ff, 0xa4c20000, 0x001439ff, 0xa4e20000,
slouken@0
  1643
		0x001441ff, 0xac000000, 0x00144aff, 0xac200000,
slouken@0
  1644
		0x001452ff, 0xac400000, 0x00145aff, 0xac610000,
slouken@0
  1645
		0x001462ff, 0xac810000, 0x00146aff, 0xaca10000,
slouken@0
  1646
		0x001473ff, 0xacc20000, 0x00147bff, 0xace20000,
slouken@0
  1647
		0x001483ff, 0xb4000000, 0x00148bff, 0xb4200000,
slouken@0
  1648
		0x001494ff, 0xb4400000, 0x00149cff, 0xb4610000,
slouken@0
  1649
		0x0014a4ff, 0xb4810000, 0x0014acff, 0xb4a10000,
slouken@0
  1650
		0x0014b4ff, 0xb4c20000, 0x0014bdff, 0xb4e20000,
slouken@0
  1651
		0x0014c5ff, 0xbd000000, 0x0014cdff, 0xbd200000,
slouken@0
  1652
		0x0014d5ff, 0xbd400000, 0x0014deff, 0xbd610000,
slouken@0
  1653
		0x0014e6ff, 0xbd810000, 0x0014eeff, 0xbda10000,
slouken@0
  1654
		0x0014f6ff, 0xbdc20000, 0x0014ffff, 0xbde20000,
slouken@0
  1655
		0x001800ff, 0xc5000000, 0x001808ff, 0xc5200000,
slouken@0
  1656
		0x001810ff, 0xc5400000, 0x001818ff, 0xc5610000,
slouken@0
  1657
		0x001820ff, 0xc5810000, 0x001829ff, 0xc5a10000,
slouken@0
  1658
		0x001831ff, 0xc5c20000, 0x001839ff, 0xc5e20000,
slouken@0
  1659
		0x001841ff, 0xcd000000, 0x00184aff, 0xcd200000,
slouken@0
  1660
		0x001852ff, 0xcd400000, 0x00185aff, 0xcd610000,
slouken@0
  1661
		0x001862ff, 0xcd810000, 0x00186aff, 0xcda10000,
slouken@0
  1662
		0x001873ff, 0xcdc20000, 0x00187bff, 0xcde20000,
slouken@0
  1663
		0x001883ff, 0xd5000000, 0x00188bff, 0xd5200000,
slouken@0
  1664
		0x001894ff, 0xd5400000, 0x00189cff, 0xd5610000,
slouken@0
  1665
		0x0018a4ff, 0xd5810000, 0x0018acff, 0xd5a10000,
slouken@0
  1666
		0x0018b4ff, 0xd5c20000, 0x0018bdff, 0xd5e20000,
slouken@0
  1667
		0x0018c5ff, 0xde000000, 0x0018cdff, 0xde200000,
slouken@0
  1668
		0x0018d5ff, 0xde400000, 0x0018deff, 0xde610000,
slouken@0
  1669
		0x0018e6ff, 0xde810000, 0x0018eeff, 0xdea10000,
slouken@0
  1670
		0x0018f6ff, 0xdec20000, 0x0018ffff, 0xdee20000,
slouken@0
  1671
		0x001c00ff, 0xe6000000, 0x001c08ff, 0xe6200000,
slouken@0
  1672
		0x001c10ff, 0xe6400000, 0x001c18ff, 0xe6610000,
slouken@0
  1673
		0x001c20ff, 0xe6810000, 0x001c29ff, 0xe6a10000,
slouken@0
  1674
		0x001c31ff, 0xe6c20000, 0x001c39ff, 0xe6e20000,
slouken@0
  1675
		0x001c41ff, 0xee000000, 0x001c4aff, 0xee200000,
slouken@0
  1676
		0x001c52ff, 0xee400000, 0x001c5aff, 0xee610000,
slouken@0
  1677
		0x001c62ff, 0xee810000, 0x001c6aff, 0xeea10000,
slouken@0
  1678
		0x001c73ff, 0xeec20000, 0x001c7bff, 0xeee20000,
slouken@0
  1679
		0x001c83ff, 0xf6000000, 0x001c8bff, 0xf6200000,
slouken@0
  1680
		0x001c94ff, 0xf6400000, 0x001c9cff, 0xf6610000,
slouken@0
  1681
		0x001ca4ff, 0xf6810000, 0x001cacff, 0xf6a10000,
slouken@0
  1682
		0x001cb4ff, 0xf6c20000, 0x001cbdff, 0xf6e20000,
slouken@0
  1683
		0x001cc5ff, 0xff000000, 0x001ccdff, 0xff200000,
slouken@0
  1684
		0x001cd5ff, 0xff400000, 0x001cdeff, 0xff610000,
slouken@0
  1685
		0x001ce6ff, 0xff810000, 0x001ceeff, 0xffa10000,
slouken@0
  1686
		0x001cf6ff, 0xffc20000, 0x001cffff, 0xffe20000,
slouken@0
  1687
};
slouken@0
  1688
static void Blit_RGB565_RGBA8888(SDL_BlitInfo *info)
slouken@0
  1689
{
slouken@0
  1690
    Blit_RGB565_32(info, RGB565_RGBA8888_LUT);
slouken@0
  1691
}
slouken@0
  1692
slouken@0
  1693
/* Special optimized blit for RGB 5-6-5 --> BGRA 8-8-8-8 */
slouken@91
  1694
static const Uint32 RGB565_BGRA8888_LUT[512] = {
slouken@0
  1695
		0x00000000, 0x000000ff, 0x08000000, 0x002000ff,
slouken@0
  1696
		0x10000000, 0x004000ff, 0x18000000, 0x006100ff,
slouken@0
  1697
		0x20000000, 0x008100ff, 0x29000000, 0x00a100ff,
slouken@0
  1698
		0x31000000, 0x00c200ff, 0x39000000, 0x00e200ff,
slouken@0
  1699
		0x41000000, 0x000008ff, 0x4a000000, 0x002008ff,
slouken@0
  1700
		0x52000000, 0x004008ff, 0x5a000000, 0x006108ff,
slouken@0
  1701
		0x62000000, 0x008108ff, 0x6a000000, 0x00a108ff,
slouken@0
  1702
		0x73000000, 0x00c208ff, 0x7b000000, 0x00e208ff,
slouken@0
  1703
		0x83000000, 0x000010ff, 0x8b000000, 0x002010ff,
slouken@0
  1704
		0x94000000, 0x004010ff, 0x9c000000, 0x006110ff,
slouken@0
  1705
		0xa4000000, 0x008110ff, 0xac000000, 0x00a110ff,
slouken@0
  1706
		0xb4000000, 0x00c210ff, 0xbd000000, 0x00e210ff,
slouken@0
  1707
		0xc5000000, 0x000018ff, 0xcd000000, 0x002018ff,
slouken@0
  1708
		0xd5000000, 0x004018ff, 0xde000000, 0x006118ff,
slouken@0
  1709
		0xe6000000, 0x008118ff, 0xee000000, 0x00a118ff,
slouken@0
  1710
		0xf6000000, 0x00c218ff, 0xff000000, 0x00e218ff,
slouken@0
  1711
		0x00040000, 0x000020ff, 0x08040000, 0x002020ff,
slouken@0
  1712
		0x10040000, 0x004020ff, 0x18040000, 0x006120ff,
slouken@0
  1713
		0x20040000, 0x008120ff, 0x29040000, 0x00a120ff,
slouken@0
  1714
		0x31040000, 0x00c220ff, 0x39040000, 0x00e220ff,
slouken@0
  1715
		0x41040000, 0x000029ff, 0x4a040000, 0x002029ff,
slouken@0
  1716
		0x52040000, 0x004029ff, 0x5a040000, 0x006129ff,
slouken@0
  1717
		0x62040000, 0x008129ff, 0x6a040000, 0x00a129ff,
slouken@0
  1718
		0x73040000, 0x00c229ff, 0x7b040000, 0x00e229ff,
slouken@0
  1719
		0x83040000, 0x000031ff, 0x8b040000, 0x002031ff,
slouken@0
  1720
		0x94040000, 0x004031ff, 0x9c040000, 0x006131ff,
slouken@0
  1721
		0xa4040000, 0x008131ff, 0xac040000, 0x00a131ff,
slouken@0
  1722
		0xb4040000, 0x00c231ff, 0xbd040000, 0x00e231ff,
slouken@0
  1723
		0xc5040000, 0x000039ff, 0xcd040000, 0x002039ff,
slouken@0
  1724
		0xd5040000, 0x004039ff, 0xde040000, 0x006139ff,
slouken@0
  1725
		0xe6040000, 0x008139ff, 0xee040000, 0x00a139ff,
slouken@0
  1726
		0xf6040000, 0x00c239ff, 0xff040000, 0x00e239ff,
slouken@0
  1727
		0x00080000, 0x000041ff, 0x08080000, 0x002041ff,
slouken@0
  1728
		0x10080000, 0x004041ff, 0x18080000, 0x006141ff,
slouken@0
  1729
		0x20080000, 0x008141ff, 0x29080000, 0x00a141ff,
slouken@0
  1730
		0x31080000, 0x00c241ff, 0x39080000, 0x00e241ff,
slouken@0
  1731
		0x41080000, 0x00004aff, 0x4a080000, 0x00204aff,
slouken@0
  1732
		0x52080000, 0x00404aff, 0x5a080000, 0x00614aff,
slouken@0
  1733
		0x62080000, 0x00814aff, 0x6a080000, 0x00a14aff,
slouken@0
  1734
		0x73080000, 0x00c24aff, 0x7b080000, 0x00e24aff,
slouken@0
  1735
		0x83080000, 0x000052ff, 0x8b080000, 0x002052ff,
slouken@0
  1736
		0x94080000, 0x004052ff, 0x9c080000, 0x006152ff,
slouken@0
  1737
		0xa4080000, 0x008152ff, 0xac080000, 0x00a152ff,
slouken@0
  1738
		0xb4080000, 0x00c252ff, 0xbd080000, 0x00e252ff,
slouken@0
  1739
		0xc5080000, 0x00005aff, 0xcd080000, 0x00205aff,
slouken@0
  1740
		0xd5080000, 0x00405aff, 0xde080000, 0x00615aff,
slouken@0
  1741
		0xe6080000, 0x00815aff, 0xee080000, 0x00a15aff,
slouken@0
  1742
		0xf6080000, 0x00c25aff, 0xff080000, 0x00e25aff,
slouken@0
  1743
		0x000c0000, 0x000062ff, 0x080c0000, 0x002062ff,
slouken@0
  1744
		0x100c0000, 0x004062ff, 0x180c0000, 0x006162ff,
slouken@0
  1745
		0x200c0000, 0x008162ff, 0x290c0000, 0x00a162ff,
slouken@0
  1746
		0x310c0000, 0x00c262ff, 0x390c0000, 0x00e262ff,
slouken@0
  1747
		0x410c0000, 0x00006aff, 0x4a0c0000, 0x00206aff,
slouken@0
  1748
		0x520c0000, 0x00406aff, 0x5a0c0000, 0x00616aff,
slouken@0
  1749
		0x620c0000, 0x00816aff, 0x6a0c0000, 0x00a16aff,
slouken@0
  1750
		0x730c0000, 0x00c26aff, 0x7b0c0000, 0x00e26aff,
slouken@0
  1751
		0x830c0000, 0x000073ff, 0x8b0c0000, 0x002073ff,
slouken@0
  1752
		0x940c0000, 0x004073ff, 0x9c0c0000, 0x006173ff,
slouken@0
  1753
		0xa40c0000, 0x008173ff, 0xac0c0000, 0x00a173ff,
slouken@0
  1754
		0xb40c0000, 0x00c273ff, 0xbd0c0000, 0x00e273ff,
slouken@0
  1755
		0xc50c0000, 0x00007bff, 0xcd0c0000, 0x00207bff,
slouken@0
  1756
		0xd50c0000, 0x00407bff, 0xde0c0000, 0x00617bff,
slouken@0
  1757
		0xe60c0000, 0x00817bff, 0xee0c0000, 0x00a17bff,
slouken@0
  1758
		0xf60c0000, 0x00c27bff, 0xff0c0000, 0x00e27bff,
slouken@0
  1759
		0x00100000, 0x000083ff, 0x08100000, 0x002083ff,
slouken@0
  1760
		0x10100000, 0x004083ff, 0x18100000, 0x006183ff,
slouken@0
  1761
		0x20100000, 0x008183ff, 0x29100000, 0x00a183ff,
slouken@0
  1762
		0x31100000, 0x00c283ff, 0x39100000, 0x00e283ff,
slouken@0
  1763
		0x41100000, 0x00008bff, 0x4a100000, 0x00208bff,
slouken@0
  1764
		0x52100000, 0x00408bff, 0x5a100000, 0x00618bff,
slouken@0
  1765
		0x62100000, 0x00818bff, 0x6a100000, 0x00a18bff,
slouken@0
  1766
		0x73100000, 0x00c28bff, 0x7b100000, 0x00e28bff,
slouken@0
  1767
		0x83100000, 0x000094ff, 0x8b100000, 0x002094ff,
slouken@0
  1768
		0x94100000, 0x004094ff, 0x9c100000, 0x006194ff,
slouken@0
  1769
		0xa4100000, 0x008194ff, 0xac100000, 0x00a194ff,
slouken@0
  1770
		0xb4100000, 0x00c294ff, 0xbd100000, 0x00e294ff,
slouken@0
  1771
		0xc5100000, 0x00009cff, 0xcd100000, 0x00209cff,
slouken@0
  1772
		0xd5100000, 0x00409cff, 0xde100000, 0x00619cff,
slouken@0
  1773
		0xe6100000, 0x00819cff, 0xee100000, 0x00a19cff,
slouken@0
  1774
		0xf6100000, 0x00c29cff, 0xff100000, 0x00e29cff,
slouken@0
  1775
		0x00140000, 0x0000a4ff, 0x08140000, 0x0020a4ff,
slouken@0
  1776
		0x10140000, 0x0040a4ff, 0x18140000, 0x0061a4ff,
slouken@0
  1777
		0x20140000, 0x0081a4ff, 0x29140000, 0x00a1a4ff,
slouken@0
  1778
		0x31140000, 0x00c2a4ff, 0x39140000, 0x00e2a4ff,
slouken@0
  1779
		0x41140000, 0x0000acff, 0x4a140000, 0x0020acff,
slouken@0
  1780
		0x52140000, 0x0040acff, 0x5a140000, 0x0061acff,
slouken@0
  1781
		0x62140000, 0x0081acff, 0x6a140000, 0x00a1acff,
slouken@0
  1782
		0x73140000, 0x00c2acff, 0x7b140000, 0x00e2acff,
slouken@0
  1783
		0x83140000, 0x0000b4ff, 0x8b140000, 0x0020b4ff,
slouken@0
  1784
		0x94140000, 0x0040b4ff, 0x9c140000, 0x0061b4ff,
slouken@0
  1785
		0xa4140000, 0x0081b4ff, 0xac140000, 0x00a1b4ff,
slouken@0
  1786
		0xb4140000, 0x00c2b4ff, 0xbd140000, 0x00e2b4ff,
slouken@0
  1787
		0xc5140000, 0x0000bdff, 0xcd140000, 0x0020bdff,
slouken@0
  1788
		0xd5140000, 0x0040bdff, 0xde140000, 0x0061bdff,
slouken@0
  1789
		0xe6140000, 0x0081bdff, 0xee140000, 0x00a1bdff,
slouken@0
  1790
		0xf6140000, 0x00c2bdff, 0xff140000, 0x00e2bdff,
slouken@0
  1791
		0x00180000, 0x0000c5ff, 0x08180000, 0x0020c5ff,
slouken@0
  1792
		0x10180000, 0x0040c5ff, 0x18180000, 0x0061c5ff,
slouken@0
  1793
		0x20180000, 0x0081c5ff, 0x29180000, 0x00a1c5ff,
slouken@0
  1794
		0x31180000, 0x00c2c5ff, 0x39180000, 0x00e2c5ff,
slouken@0
  1795
		0x41180000, 0x0000cdff, 0x4a180000, 0x0020cdff,
slouken@0
  1796
		0x52180000, 0x0040cdff, 0x5a180000, 0x0061cdff,
slouken@0
  1797
		0x62180000, 0x0081cdff, 0x6a180000, 0x00a1cdff,
slouken@0
  1798
		0x73180000, 0x00c2cdff, 0x7b180000, 0x00e2cdff,
slouken@0
  1799
		0x83180000, 0x0000d5ff, 0x8b180000, 0x0020d5ff,
slouken@0
  1800
		0x94180000, 0x0040d5ff, 0x9c180000, 0x0061d5ff,
slouken@0
  1801
		0xa4180000, 0x0081d5ff, 0xac180000, 0x00a1d5ff,
slouken@0
  1802
		0xb4180000, 0x00c2d5ff, 0xbd180000, 0x00e2d5ff,
slouken@0
  1803
		0xc5180000, 0x0000deff, 0xcd180000, 0x0020deff,
slouken@0
  1804
		0xd5180000, 0x0040deff, 0xde180000, 0x0061deff,
slouken@0
  1805
		0xe6180000, 0x0081deff, 0xee180000, 0x00a1deff,
slouken@0
  1806
		0xf6180000, 0x00c2deff, 0xff180000, 0x00e2deff,
slouken@0
  1807
		0x001c0000, 0x0000e6ff, 0x081c0000, 0x0020e6ff,
slouken@0
  1808
		0x101c0000, 0x0040e6ff, 0x181c0000, 0x0061e6ff,
slouken@0
  1809
		0x201c0000, 0x0081e6ff, 0x291c0000, 0x00a1e6ff,
slouken@0
  1810
		0x311c0000, 0x00c2e6ff, 0x391c0000, 0x00e2e6ff,
slouken@0
  1811
		0x411c0000, 0x0000eeff, 0x4a1c0000, 0x0020eeff,
slouken@0
  1812
		0x521c0000, 0x0040eeff, 0x5a1c0000, 0x0061eeff,
slouken@0
  1813
		0x621c0000, 0x0081eeff, 0x6a1c0000, 0x00a1eeff,
slouken@0
  1814
		0x731c0000, 0x00c2eeff, 0x7b1c0000, 0x00e2eeff,
slouken@0
  1815
		0x831c0000, 0x0000f6ff, 0x8b1c0000, 0x0020f6ff,
slouken@0
  1816
		0x941c0000, 0x0040f6ff, 0x9c1c0000, 0x0061f6ff,
slouken@0
  1817
		0xa41c0000, 0x0081f6ff, 0xac1c0000, 0x00a1f6ff,
slouken@0
  1818
		0xb41c0000, 0x00c2f6ff, 0xbd1c0000, 0x00e2f6ff,
slouken@0
  1819
		0xc51c0000, 0x0000ffff, 0xcd1c0000, 0x0020ffff,
slouken@0
  1820
		0xd51c0000, 0x0040ffff, 0xde1c0000, 0x0061ffff,
slouken@0
  1821
		0xe61c0000, 0x0081ffff, 0xee1c0000, 0x00a1ffff,
slouken@0
  1822
		0xf61c0000, 0x00c2ffff, 0xff1c0000, 0x00e2ffff
slouken@0
  1823
};
slouken@0
  1824
static void Blit_RGB565_BGRA8888(SDL_BlitInfo *info)
slouken@0
  1825
{
slouken@0
  1826
    Blit_RGB565_32(info, RGB565_BGRA8888_LUT);
slouken@0
  1827
}
slouken@0
  1828
slouken@0
  1829
/* Special optimized blit for RGB 8-8-8 --> RGB 3-3-2 */
slouken@0
  1830
#ifndef RGB888_RGB332
slouken@0
  1831
#define RGB888_RGB332(dst, src) { \
slouken@0
  1832
	dst = (((src)&0x00E00000)>>16)| \
slouken@0
  1833
	      (((src)&0x0000E000)>>11)| \
slouken@0
  1834
	      (((src)&0x000000C0)>>6); \
slouken@0
  1835
}
slouken@0
  1836
#endif
slouken@0
  1837
static void Blit_RGB888_index8_map(SDL_BlitInfo *info)
slouken@0
  1838
{
slouken@0
  1839
#ifndef USE_DUFFS_LOOP
slouken@0
  1840
	int c;
slouken@0
  1841
#endif
icculus@1162
  1842
	int Pixel;
slouken@0
  1843
	int width, height;
slouken@0
  1844
	Uint32 *src;
slouken@91
  1845
	const Uint8 *map;
slouken@91
  1846
	Uint8 *dst;
slouken@0
  1847
	int srcskip, dstskip;
slouken@0
  1848
slouken@0
  1849
	/* Set up some basic variables */
slouken@0
  1850
	width = info->d_width;
slouken@0
  1851
	height = info->d_height;
slouken@0
  1852
	src = (Uint32 *)info->s_pixels;
slouken@0
  1853
	srcskip = info->s_skip/4;
slouken@0
  1854
	dst = info->d_pixels;
slouken@0
  1855
	dstskip = info->d_skip;
slouken@0
  1856
	map = info->table;
slouken@0
  1857
slouken@0
  1858
#ifdef USE_DUFFS_LOOP
slouken@0
  1859
	while ( height-- ) {
slouken@0
  1860
		DUFFS_LOOP(
icculus@1162
  1861
			RGB888_RGB332(Pixel, *src);
icculus@1162
  1862
			*dst++ = map[Pixel];
slouken@0
  1863
			++src;
slouken@0
  1864
		, width);
slouken@0
  1865
		src += srcskip;
slouken@0
  1866
		dst += dstskip;
slouken@0
  1867
	}
slouken@0
  1868
#else
slouken@0
  1869
	while ( height-- ) {
slouken@0
  1870
		for ( c=width/4; c; --c ) {
slouken@0
  1871
			/* Pack RGB into 8bit pixel */
icculus@1162
  1872
			RGB888_RGB332(Pixel, *src);
icculus@1162
  1873
			*dst++ = map[Pixel];
slouken@0
  1874
			++src;
icculus@1162
  1875
			RGB888_RGB332(Pixel, *src);
icculus@1162
  1876
			*dst++ = map[Pixel];
slouken@0
  1877
			++src;
icculus@1162
  1878
			RGB888_RGB332(Pixel, *src);
icculus@1162
  1879
			*dst++ = map[Pixel];
slouken@0
  1880
			++src;
icculus@1162
  1881
			RGB888_RGB332(Pixel, *src);
icculus@1162
  1882
			*dst++ = map[Pixel];
slouken@0
  1883
			++src;
slouken@0
  1884
		}
slouken@91
  1885
		switch ( width & 3 ) {
slouken@0
  1886
			case 3:
icculus@1162
  1887
				RGB888_RGB332(Pixel, *src);
icculus@1162
  1888
				*dst++ = map[Pixel];
slouken@0
  1889
				++src;
slouken@0
  1890
			case 2:
icculus@1162
  1891
				RGB888_RGB332(Pixel, *src);
icculus@1162
  1892
				*dst++ = map[Pixel];
slouken@0
  1893
				++src;
slouken@0
  1894
			case 1:
icculus@1162
  1895
				RGB888_RGB332(Pixel, *src);
icculus@1162
  1896
				*dst++ = map[Pixel];
slouken@0
  1897
				++src;
slouken@0
  1898
		}
slouken@0
  1899
		src += srcskip;
slouken@0
  1900
		dst += dstskip;
slouken@0
  1901
	}
slouken@0
  1902
#endif /* USE_DUFFS_LOOP */
slouken@0
  1903
}
slouken@0
  1904
static void BlitNto1(SDL_BlitInfo *info)
slouken@0
  1905
{
slouken@0
  1906
#ifndef USE_DUFFS_LOOP
slouken@0
  1907
	int c;
slouken@0
  1908
#endif
slouken@0
  1909
	int width, height;
slouken@91
  1910
	Uint8 *src;
slouken@91
  1911
	const Uint8 *map;
slouken@91
  1912
	Uint8 *dst;
slouken@0
  1913
	int srcskip, dstskip;
slouken@0
  1914
	int srcbpp;
icculus@1162
  1915
	Uint32 Pixel;
slouken@0
  1916
	int  sR, sG, sB;
slouken@0
  1917
	SDL_PixelFormat *srcfmt;
slouken@0
  1918
slouken@0
  1919
	/* Set up some basic variables */
slouken@0
  1920
	width = info->d_width;
slouken@0
  1921
	height = info->d_height;
slouken@0
  1922
	src = info->s_pixels;
slouken@0
  1923
	srcskip = info->s_skip;
slouken@0
  1924
	dst = info->d_pixels;
slouken@0
  1925
	dstskip = info->d_skip;
slouken@0
  1926
	map = info->table;
slouken@0
  1927
	srcfmt = info->src;
slouken@0
  1928
	srcbpp = srcfmt->BytesPerPixel;
slouken@0
  1929
slouken@0
  1930
	if ( map == NULL ) {
slouken@0
  1931
		while ( height-- ) {
slouken@0
  1932
#ifdef USE_DUFFS_LOOP
slouken@0
  1933
			DUFFS_LOOP(
icculus@1162
  1934
				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
slouken@0
  1935
								sR, sG, sB);
slouken@0
  1936
				if ( 1 ) {
slouken@0
  1937
				  	/* Pack RGB into 8bit pixel */
slouken@0
  1938
				  	*dst = ((sR>>5)<<(3+2))|
slouken@0
  1939
					        ((sG>>5)<<(2)) |
slouken@0
  1940
					        ((sB>>6)<<(0)) ;
slouken@0
  1941
				}
slouken@0
  1942
				dst++;
slouken@0
  1943
				src += srcbpp;
slouken@0
  1944
			, width);
slouken@0
  1945
#else
slouken@0
  1946
			for ( c=width; c; --c ) {
icculus@1162
  1947
				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
slouken@0
  1948
								sR, sG, sB);
slouken@0
  1949
				if ( 1 ) {
slouken@0
  1950
				  	/* Pack RGB into 8bit pixel */
slouken@0
  1951
				  	*dst = ((sR>>5)<<(3+2))|
slouken@0
  1952
					        ((sG>>5)<<(2)) |
slouken@0
  1953
					        ((sB>>6)<<(0)) ;
slouken@0
  1954
				}
slouken@0
  1955
				dst++;
slouken@0
  1956
				src += srcbpp;
slouken@0
  1957
			}
slouken@0
  1958
#endif
slouken@0
  1959
			src += srcskip;
slouken@0
  1960
			dst += dstskip;
slouken@0
  1961
		}
slouken@0
  1962
	} else {
slouken@0
  1963
		while ( height-- ) {
slouken@0
  1964
#ifdef USE_DUFFS_LOOP
slouken@0
  1965
			DUFFS_LOOP(
icculus@1162
  1966
				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
slouken@0
  1967
								sR, sG, sB);
slouken@0
  1968
				if ( 1 ) {
slouken@0
  1969
				  	/* Pack RGB into 8bit pixel */
slouken@0
  1970
				  	*dst = map[((sR>>5)<<(3+2))|
slouken@0
  1971
						   ((sG>>5)<<(2))  |
slouken@0
  1972
						   ((sB>>6)<<(0))  ];
slouken@0
  1973
				}
slouken@0
  1974
				dst++;
slouken@0
  1975
				src += srcbpp;
slouken@0
  1976
			, width);
slouken@0
  1977
#else
slouken@0
  1978
			for ( c=width; c; --c ) {
icculus@1162
  1979
				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
slouken@0
  1980
								sR, sG, sB);
slouken@0
  1981
				if ( 1 ) {
slouken@0
  1982
				  	/* Pack RGB into 8bit pixel */
slouken@0
  1983
				  	*dst = map[((sR>>5)<<(3+2))|
slouken@0
  1984
						   ((sG>>5)<<(2))  |
slouken@0
  1985
						   ((sB>>6)<<(0))  ];
slouken@0
  1986
				}
slouken@0
  1987
				dst++;
slouken@0
  1988
				src += srcbpp;
slouken@0
  1989
			}
slouken@0
  1990
#endif /* USE_DUFFS_LOOP */
slouken@0
  1991
			src += srcskip;
slouken@0
  1992
			dst += dstskip;
slouken@0
  1993
		}
slouken@0
  1994
	}
slouken@0
  1995
}
icculus@1232
  1996
icculus@1232
  1997
/* blits 32 bit RGB<->RGBA with both surfaces having the same R,G,B fields */
icculus@1232
  1998
static void Blit4to4MaskAlpha(SDL_BlitInfo *info)
icculus@1232
  1999
{
icculus@1232
  2000
	int width = info->d_width;
icculus@1232
  2001
	int height = info->d_height;
icculus@1232
  2002
	Uint32 *src = (Uint32 *)info->s_pixels;
icculus@1232
  2003
	int srcskip = info->s_skip;
icculus@1232
  2004
	Uint32 *dst = (Uint32 *)info->d_pixels;
icculus@1232
  2005
	int dstskip = info->d_skip;
icculus@1232
  2006
	SDL_PixelFormat *srcfmt = info->src;
icculus@1232
  2007
	SDL_PixelFormat *dstfmt = info->dst;
icculus@1232
  2008
icculus@1232
  2009
	if (dstfmt->Amask) {
icculus@1232
  2010
		/* RGB->RGBA, SET_ALPHA */
icculus@1232
  2011
		Uint32 mask = (srcfmt->alpha >> dstfmt->Aloss) << dstfmt->Ashift;
icculus@1232
  2012
icculus@1232
  2013
		while ( height-- ) {
icculus@1232
  2014
			DUFFS_LOOP(
icculus@1232
  2015
			{
icculus@1232
  2016
				*dst = *src | mask;
icculus@1232
  2017
				++dst;
icculus@1232
  2018
				++src;
icculus@1232
  2019
			},
icculus@1232
  2020
			width);
icculus@1232
  2021
			src = (Uint32*)((Uint8*)src + srcskip);
icculus@1232
  2022
			dst = (Uint32*)((Uint8*)dst + dstskip);
icculus@1232
  2023
		}
icculus@1232
  2024
	} else {
icculus@1232
  2025
		/* RGBA->RGB, NO_ALPHA */
icculus@1232
  2026
		Uint32 mask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
icculus@1232
  2027
icculus@1232
  2028
		while ( height-- ) {
icculus@1232
  2029
			DUFFS_LOOP(
icculus@1232
  2030
			{
icculus@1232
  2031
				*dst = *src & mask;
icculus@1232
  2032
				++dst;
icculus@1232
  2033
				++src;
icculus@1232
  2034
			},
icculus@1232
  2035
			width);
icculus@1232
  2036
			src = (Uint32*)((Uint8*)src + srcskip);
icculus@1232
  2037
			dst = (Uint32*)((Uint8*)dst + dstskip);
icculus@1232
  2038
		}
icculus@1232
  2039
	}
icculus@1232
  2040
}
icculus@1232
  2041
slouken@0
  2042
static void BlitNtoN(SDL_BlitInfo *info)
slouken@0
  2043
{
slouken@0
  2044
	int width = info->d_width;
slouken@0
  2045
	int height = info->d_height;
slouken@0
  2046
	Uint8 *src = info->s_pixels;
slouken@0
  2047
	int srcskip = info->s_skip;
slouken@0
  2048
	Uint8 *dst = info->d_pixels;
slouken@0
  2049
	int dstskip = info->d_skip;
slouken@0
  2050
	SDL_PixelFormat *srcfmt = info->src;
slouken@0
  2051
	int srcbpp = srcfmt->BytesPerPixel;
slouken@0
  2052
	SDL_PixelFormat *dstfmt = info->dst;
slouken@0
  2053
	int dstbpp = dstfmt->BytesPerPixel;
slouken@431
  2054
	unsigned alpha = dstfmt->Amask ? srcfmt->alpha : 0;
slouken@0
  2055
slouken@0
  2056
	while ( height-- ) {
slouken@0
  2057
		DUFFS_LOOP(
slouken@0
  2058
		{
icculus@1162
  2059
		        Uint32 Pixel;
slouken@0
  2060
			unsigned sR;
slouken@0
  2061
			unsigned sG;
slouken@0
  2062
			unsigned sB;
icculus@1162
  2063
			DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
slouken@0
  2064
			ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, alpha);
slouken@0
  2065
			dst += dstbpp;
slouken@0
  2066
			src += srcbpp;
slouken@0
  2067
		},
slouken@0
  2068
		width);
slouken@0
  2069
		src += srcskip;
slouken@0
  2070
		dst += dstskip;
slouken@0
  2071
	}
slouken@0
  2072
}
slouken@0
  2073
slouken@0
  2074
static void BlitNtoNCopyAlpha(SDL_BlitInfo *info)
slouken@0
  2075
{
slouken@0
  2076
	int width = info->d_width;
slouken@0
  2077
	int height = info->d_height;
slouken@0
  2078
	Uint8 *src = info->s_pixels;
slouken@0
  2079
	int srcskip = info->s_skip;
slouken@0
  2080
	Uint8 *dst = info->d_pixels;
slouken@0
  2081
	int dstskip = info->d_skip;
slouken@0
  2082
	SDL_PixelFormat *srcfmt = info->src;
slouken@0
  2083
	int srcbpp = srcfmt->BytesPerPixel;
slouken@0
  2084
	SDL_PixelFormat *dstfmt = info->dst;
slouken@0
  2085
	int dstbpp = dstfmt->BytesPerPixel;
slouken@0
  2086
	int c;
slouken@0
  2087
slouken@0
  2088
	/* FIXME: should map alpha to [0..255] correctly! */
slouken@0
  2089
	while ( height-- ) {
slouken@0
  2090
		for ( c=width; c; --c ) {
icculus@1162
  2091
		        Uint32 Pixel;
slouken@0
  2092
			unsigned sR, sG, sB, sA;
icculus@1162
  2093
			DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel,
slouken@0
  2094
				      sR, sG, sB, sA);
slouken@0
  2095
			ASSEMBLE_RGBA(dst, dstbpp, dstfmt,
slouken@0
  2096
				      sR, sG, sB, sA);
slouken@0
  2097
			dst += dstbpp;
slouken@0
  2098
			src += srcbpp;
slouken@0
  2099
		}
slouken@0
  2100
		src += srcskip;
slouken@0
  2101
		dst += dstskip;
slouken@0
  2102
	}
slouken@0
  2103
}
slouken@0
  2104
slouken@0
  2105
static void BlitNto1Key(SDL_BlitInfo *info)
slouken@0
  2106
{
slouken@0
  2107
	int width = info->d_width;
slouken@0
  2108
	int height = info->d_height;
slouken@0
  2109
	Uint8 *src = info->s_pixels;
slouken@0
  2110
	int srcskip = info->s_skip;
slouken@0
  2111
	Uint8 *dst = info->d_pixels;
slouken@0
  2112
	int dstskip = info->d_skip;
slouken@0
  2113
	SDL_PixelFormat *srcfmt = info->src;
slouken@91
  2114
	const Uint8 *palmap = info->table;
slouken@0
  2115
	Uint32 ckey = srcfmt->colorkey;
slouken@0
  2116
	Uint32 rgbmask = ~srcfmt->Amask;
slouken@0
  2117
	int srcbpp;
icculus@1162
  2118
	Uint32 Pixel;
slouken@1428
  2119
	unsigned sR, sG, sB;
slouken@0
  2120
slouken@0
  2121
	/* Set up some basic variables */
slouken@0
  2122
	srcbpp = srcfmt->BytesPerPixel;
slouken@0
  2123
	ckey &= rgbmask;
slouken@0
  2124
slouken@0
  2125
	if ( palmap == NULL ) {
slouken@0
  2126
		while ( height-- ) {
slouken@0
  2127
			DUFFS_LOOP(
slouken@0
  2128
			{
icculus@1162
  2129
				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
slouken@0
  2130
								sR, sG, sB);
icculus@1162
  2131
				if ( (Pixel & rgbmask) != ckey ) {
slouken@0
  2132
				  	/* Pack RGB into 8bit pixel */
slouken@1428
  2133
				  	*dst = (Uint8)(((sR>>5)<<(3+2))|
slouken@1428
  2134
						           ((sG>>5)<<(2)) |
slouken@1428
  2135
						           ((sB>>6)<<(0)));
slouken@0
  2136
				}
slouken@0
  2137
				dst++;
slouken@0
  2138
				src += srcbpp;
slouken@0
  2139
			},
slouken@0
  2140
			width);
slouken@0
  2141
			src += srcskip;
slouken@0
  2142
			dst += dstskip;
slouken@0
  2143
		}
slouken@0
  2144
	} else {
slouken@0
  2145
		while ( height-- ) {
slouken@0
  2146
			DUFFS_LOOP(
slouken@0
  2147
			{
icculus@1162
  2148
				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
slouken@0
  2149
								sR, sG, sB);
icculus@1162
  2150
				if ( (Pixel & rgbmask) != ckey ) {
slouken@0
  2151
				  	/* Pack RGB into 8bit pixel */
slouken@1428
  2152
				  	*dst = (Uint8)palmap[((sR>>5)<<(3+2))|
slouken@1428
  2153
							             ((sG>>5)<<(2))  |
slouken@1428
  2154
							             ((sB>>6)<<(0))  ];
slouken@0
  2155
				}
slouken@0
  2156
				dst++;
slouken@0
  2157
				src += srcbpp;
slouken@0
  2158
			},
slouken@0
  2159
			width);
slouken@0
  2160
			src += srcskip;
slouken@0
  2161
			dst += dstskip;
slouken@0
  2162
		}
slouken@0
  2163
	}
slouken@0
  2164
}
slouken@0
  2165
slouken@0
  2166
static void Blit2to2Key(SDL_BlitInfo *info)
slouken@0
  2167
{
slouken@0
  2168
	int width = info->d_width;
slouken@0
  2169
	int height = info->d_height;
slouken@0
  2170
	Uint16 *srcp = (Uint16 *)info->s_pixels;
slouken@0
  2171
	int srcskip = info->s_skip;
slouken@0
  2172
	Uint16 *dstp = (Uint16 *)info->d_pixels;
slouken@0
  2173
	int dstskip = info->d_skip;
slouken@0
  2174
	Uint32 ckey = info->src->colorkey;
slouken@0
  2175
	Uint32 rgbmask = ~info->src->Amask;
slouken@0
  2176
slouken@0
  2177
	/* Set up some basic variables */
slouken@0
  2178
        srcskip /= 2;
slouken@0
  2179
        dstskip /= 2;
slouken@0
  2180
	ckey &= rgbmask;
slouken@0
  2181
slouken@0
  2182
	while ( height-- ) {
slouken@0
  2183
		DUFFS_LOOP(
slouken@0
  2184
		{
slouken@0
  2185
			if ( (*srcp & rgbmask) != ckey ) {
slouken@0
  2186
				*dstp = *srcp;
slouken@0
  2187
			}
slouken@0
  2188
			dstp++;
slouken@0
  2189
			srcp++;
slouken@0
  2190
		},
slouken@0
  2191
		width);
slouken@0
  2192
		srcp += srcskip;
slouken@0
  2193
		dstp += dstskip;
slouken@0
  2194
	}
slouken@0
  2195
}
slouken@0
  2196
slouken@0
  2197
static void BlitNtoNKey(SDL_BlitInfo *info)
slouken@0
  2198
{
slouken@0
  2199
	int width = info->d_width;
slouken@0
  2200
	int height = info->d_height;
slouken@0
  2201
	Uint8 *src = info->s_pixels;
slouken@0
  2202
	int srcskip = info->s_skip;
slouken@0
  2203
	Uint8 *dst = info->d_pixels;
slouken@0
  2204
	int dstskip = info->d_skip;
slouken@0
  2205
	Uint32 ckey = info->src->colorkey;
slouken@0
  2206
	SDL_PixelFormat *srcfmt = info->src;
slouken@0
  2207
	SDL_PixelFormat *dstfmt = info->dst;
slouken@0
  2208
	int srcbpp = srcfmt->BytesPerPixel;
slouken@0
  2209
	int dstbpp = dstfmt->BytesPerPixel;
slouken@431
  2210
	unsigned alpha = dstfmt->Amask ? srcfmt->alpha : 0;
slouken@1543
  2211
	Uint32 rgbmask = ~srcfmt->Amask;
slouken@1543
  2212
slouken@1543
  2213
	/* Set up some basic variables */
slouken@1543
  2214
	ckey &= rgbmask;
slouken@0
  2215
slouken@0
  2216
	while ( height-- ) {
slouken@0
  2217
		DUFFS_LOOP(
slouken@0
  2218
		{
icculus@1162
  2219
		        Uint32 Pixel;
slouken@0
  2220
			unsigned sR;
slouken@0
  2221
			unsigned sG;
slouken@0
  2222
			unsigned sB;
icculus@1162
  2223
			RETRIEVE_RGB_PIXEL(src, srcbpp, Pixel);
slouken@1543
  2224
			if ( (Pixel & rgbmask) != ckey ) {
icculus@1162
  2225
			        RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB);
slouken@0
  2226
				ASSEMBLE_RGBA(dst, dstbpp, dstfmt,
slouken@0
  2227
					      sR, sG, sB, alpha);
slouken@0
  2228
			}
slouken@0
  2229
			dst += dstbpp;
slouken@0
  2230
			src += srcbpp;
slouken@0
  2231
		},
slouken@0
  2232
		width);
slouken@0
  2233
		src += srcskip;
slouken@0
  2234
		dst += dstskip;
slouken@0
  2235
	}
slouken@0
  2236
}
slouken@0
  2237
slouken@0
  2238
static void BlitNtoNKeyCopyAlpha(SDL_BlitInfo *info)
slouken@0
  2239
{
slouken@0
  2240
	int width = info->d_width;
slouken@0
  2241
	int height = info->d_height;
slouken@0
  2242
	Uint8 *src = info->s_pixels;
slouken@0
  2243
	int srcskip = info->s_skip;
slouken@0
  2244
	Uint8 *dst = info->d_pixels;
slouken@0
  2245
	int dstskip = info->d_skip;
slouken@0
  2246
	Uint32 ckey = info->src->colorkey;
slouken@0
  2247
	SDL_PixelFormat *srcfmt = info->src;
slouken@0
  2248
	SDL_PixelFormat *dstfmt = info->dst;
slouken@0
  2249
	Uint32 rgbmask = ~srcfmt->Amask;
slouken@0
  2250
slouken@0
  2251
	Uint8 srcbpp;
slouken@0
  2252
	Uint8 dstbpp;
icculus@1162
  2253
	Uint32 Pixel;
slouken@1428
  2254
	unsigned sR, sG, sB, sA;
slouken@0
  2255
slouken@0
  2256
	/* Set up some basic variables */
slouken@0
  2257
	srcbpp = srcfmt->BytesPerPixel;
slouken@0
  2258
	dstbpp = dstfmt->BytesPerPixel;
slouken@0
  2259
	ckey &= rgbmask;
slouken@0
  2260
slouken@0
  2261
	/* FIXME: should map alpha to [0..255] correctly! */
slouken@0
  2262
	while ( height-- ) {
slouken@0
  2263
		DUFFS_LOOP(
slouken@0
  2264
		{
icculus@1162
  2265
			DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel,
slouken@0
  2266
				      sR, sG, sB, sA);
icculus@1162
  2267
			if ( (Pixel & rgbmask) != ckey ) {
slouken@0
  2268
				  ASSEMBLE_RGBA(dst, dstbpp, dstfmt,
slouken@0
  2269
						sR, sG, sB, sA);
slouken@0
  2270
			}
slouken@0
  2271
			dst += dstbpp;
slouken@0
  2272
			src += srcbpp;
slouken@0
  2273
		},
slouken@0
  2274
		width);
slouken@0
  2275
		src += srcskip;
slouken@0
  2276
		dst += dstskip;
slouken@0
  2277
	}
slouken@0
  2278
}
slouken@0
  2279
slouken@0
  2280
/* Normal N to N optimized blitters */
slouken@0
  2281
struct blit_table {
slouken@0
  2282
	Uint32 srcR, srcG, srcB;
slouken@0
  2283
	int dstbpp;
slouken@0
  2284
	Uint32 dstR, dstG, dstB;
icculus@1047
  2285
	Uint32 blit_features;
slouken@0
  2286
	void *aux_data;
slouken@0
  2287
	SDL_loblit blitfunc;
icculus@1047
  2288
	enum { NO_ALPHA=1, SET_ALPHA=2, COPY_ALPHA=4 } alpha;
slouken@0
  2289
};
slouken@91
  2290
static const struct blit_table normal_blit_1[] = {
slouken@0
  2291
	/* Default for 8-bit RGB source, an invalid combination */
slouken@0
  2292
	{ 0,0,0, 0, 0,0,0, 0, NULL, NULL },
slouken@0
  2293
};
slouken@91
  2294
static const struct blit_table normal_blit_2[] = {
slouken@1361
  2295
#if SDL_HERMES_BLITTERS
slouken@0
  2296
    { 0x0000F800,0x000007E0,0x0000001F, 2, 0x0000001F,0x000007E0,0x0000F800,
slouken@0
  2297
      0, ConvertX86p16_16BGR565, ConvertX86, NO_ALPHA },
slouken@0
  2298
    { 0x0000F800,0x000007E0,0x0000001F, 2, 0x00007C00,0x000003E0,0x0000001F,
slouken@0
  2299
      0, ConvertX86p16_16RGB555, ConvertX86, NO_ALPHA },
slouken@0
  2300
    { 0x0000F800,0x000007E0,0x0000001F, 2, 0x0000001F,0x000003E0,0x00007C00,
slouken@0
  2301
      0, ConvertX86p16_16BGR555, ConvertX86, NO_ALPHA },
slouken@1361
  2302
#elif SDL_ALTIVEC_BLITTERS
icculus@1047
  2303
    /* has-altivec */
icculus@1047
  2304
    { 0x0000F800,0x000007E0,0x0000001F, 4, 0x00000000,0x00000000,0x00000000,
icculus@1047
  2305
      2, NULL, Blit_RGB565_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
icculus@1139
  2306
    { 0x00007C00,0x000003E0,0x0000001F, 4, 0x00000000,0x00000000,0x00000000,
icculus@1139
  2307
      2, NULL, Blit_RGB555_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
icculus@1047
  2308
#endif
slouken@0
  2309
    { 0x0000F800,0x000007E0,0x0000001F, 4, 0x00FF0000,0x0000FF00,0x000000FF,
slouken@0
  2310
      0, NULL, Blit_RGB565_ARGB8888, SET_ALPHA },
slouken@0
  2311
    { 0x0000F800,0x000007E0,0x0000001F, 4, 0x000000FF,0x0000FF00,0x00FF0000,
slouken@0
  2312
      0, NULL, Blit_RGB565_ABGR8888, SET_ALPHA },
slouken@0
  2313
    { 0x0000F800,0x000007E0,0x0000001F, 4, 0xFF000000,0x00FF0000,0x0000FF00,
slouken@0
  2314
      0, NULL, Blit_RGB565_RGBA8888, SET_ALPHA },
slouken@0
  2315
    { 0x0000F800,0x000007E0,0x0000001F, 4, 0x0000FF00,0x00FF0000,0xFF000000,
slouken@0
  2316
      0, NULL, Blit_RGB565_BGRA8888, SET_ALPHA },
slouken@0
  2317
slouken@0
  2318
    /* Default for 16-bit RGB source, used if no other blitter matches */
slouken@0
  2319
    { 0,0,0, 0, 0,0,0, 0, NULL, BlitNtoN, 0 }
slouken@0
  2320
};
slouken@91
  2321
static const struct blit_table normal_blit_3[] = {
slouken@0
  2322
	/* Default for 24-bit RGB source, never optimized */
slouken@0
  2323
    { 0,0,0, 0, 0,0,0, 0, NULL, BlitNtoN, 0 }
slouken@0
  2324
};
slouken@91
  2325
static const struct blit_table normal_blit_4[] = {
slouken@1361
  2326
#if SDL_HERMES_BLITTERS
slouken@0
  2327
    { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000F800,0x000007E0,0x0000001F,
slouken@739
  2328
      1, ConvertMMXpII32_16RGB565, ConvertMMX, NO_ALPHA },
slouken@0
  2329
    { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000F800,0x000007E0,0x0000001F,
slouken@0
  2330
      0, ConvertX86p32_16RGB565, ConvertX86, NO_ALPHA },
slouken@0
  2331
    { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000001F,0x000007E0,0x0000F800,
slouken@739
  2332
      1, ConvertMMXpII32_16BGR565, ConvertMMX, NO_ALPHA },
slouken@0
  2333
    { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000001F,0x000007E0,0x0000F800,
slouken@0
  2334
      0, ConvertX86p32_16BGR565, ConvertX86, NO_ALPHA },
slouken@0
  2335
    { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x00007C00,0x000003E0,0x0000001F,
slouken@739
  2336
      1, ConvertMMXpII32_16RGB555, ConvertMMX, NO_ALPHA },
slouken@0
  2337
    { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x00007C00,0x000003E0,0x0000001F,
slouken@0
  2338
      0, ConvertX86p32_16RGB555, ConvertX86, NO_ALPHA },
slouken@0
  2339
    { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000001F,0x000003E0,0x00007C00,
slouken@739
  2340
      1, ConvertMMXpII32_16BGR555, ConvertMMX, NO_ALPHA },
slouken@0
  2341
    { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000001F,0x000003E0,0x00007C00,
slouken@0
  2342
      0, ConvertX86p32_16BGR555, ConvertX86, NO_ALPHA },
slouken@0
  2343
    { 0x00FF0000,0x0000FF00,0x000000FF, 3, 0x00FF0000,0x0000FF00,0x000000FF,
slouken@4304
  2344
      1, ConvertMMXpII32_24RGB888, ConvertMMX, NO_ALPHA },
slouken@4304
  2345
    { 0x00FF0000,0x0000FF00,0x000000FF, 3, 0x00FF0000,0x0000FF00,0x000000FF,
slouken@0
  2346
      0, ConvertX86p32_24RGB888, ConvertX86, NO_ALPHA },
slouken@0
  2347
    { 0x00FF0000,0x0000FF00,0x000000FF, 3, 0x000000FF,0x0000FF00,0x00FF0000,
slouken@0
  2348
      0, ConvertX86p32_24BGR888, ConvertX86, NO_ALPHA },
slouken@0
  2349
    { 0x00FF0000,0x0000FF00,0x000000FF, 4, 0x000000FF,0x0000FF00,0x00FF0000,
slouken@0
  2350
      0, ConvertX86p32_32BGR888, ConvertX86, NO_ALPHA },
slouken@0
  2351
    { 0x00FF0000,0x0000FF00,0x000000FF, 4, 0xFF000000,0x00FF0000,0x0000FF00,
slouken@0
  2352
      0, ConvertX86p32_32RGBA888, ConvertX86, NO_ALPHA },
slouken@0
  2353
    { 0x00FF0000,0x0000FF00,0x000000FF, 4, 0x0000FF00,0x00FF0000,0xFF000000,
slouken@0
  2354
      0, ConvertX86p32_32BGRA888, ConvertX86, NO_ALPHA },
slouken@0
  2355
#else
slouken@1361
  2356
#if SDL_ALTIVEC_BLITTERS
icculus@1047
  2357
    /* has-altivec | dont-use-prefetch */
icculus@1047
  2358
    { 0x00000000,0x00000000,0x00000000, 4, 0x00000000,0x00000000,0x00000000,
icculus@1047
  2359
      6, NULL, ConvertAltivec32to32_noprefetch, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
icculus@1047
  2360
    /* has-altivec */
icculus@1047
  2361
    { 0x00000000,0x00000000,0x00000000, 4, 0x00000000,0x00000000,0x00000000,
icculus@1047
  2362
      2, NULL, ConvertAltivec32to32_prefetch, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
icculus@1047
  2363
    /* has-altivec */
icculus@1047
  2364
    { 0x00000000,0x00000000,0x00000000, 2, 0x0000F800,0x000007E0,0x0000001F,
icculus@1047
  2365
      2, NULL, Blit_RGB888_RGB565Altivec, NO_ALPHA },
icculus@1047
  2366
#endif
slouken@0
  2367
    { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000F800,0x000007E0,0x0000001F,
slouken@0
  2368
      0, NULL, Blit_RGB888_RGB565, NO_ALPHA },
slouken@0
  2369
    { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x00007C00,0x000003E0,0x0000001F,
slouken@0
  2370
      0, NULL, Blit_RGB888_RGB555, NO_ALPHA },
slouken@0
  2371
#endif
slouken@0
  2372
	/* Default for 32-bit RGB source, used if no other blitter matches */
slouken@0
  2373
	{ 0,0,0, 0, 0,0,0, 0, NULL, BlitNtoN, 0 }
slouken@0
  2374
};
slouken@91
  2375
static const struct blit_table *normal_blit[] = {
slouken@0
  2376
	normal_blit_1, normal_blit_2, normal_blit_3, normal_blit_4
slouken@0
  2377
};
slouken@0
  2378
icculus@1047
  2379
/* Mask matches table, or table entry is zero */
icculus@1047
  2380
#define MASKOK(x, y) (((x) == (y)) || ((y) == 0x00000000))
icculus@1047
  2381
slouken@0
  2382
SDL_loblit SDL_CalculateBlitN(SDL_Surface *surface, int blit_index)
slouken@0
  2383
{
slouken@0
  2384
	struct private_swaccel *sdata;
slouken@0
  2385
	SDL_PixelFormat *srcfmt;
slouken@0
  2386
	SDL_PixelFormat *dstfmt;
slouken@91
  2387
	const struct blit_table *table;
slouken@0
  2388
	int which;
slouken@0
  2389
	SDL_loblit blitfun;
slouken@0
  2390
slouken@0
  2391
	/* Set up data for choosing the blit */
slouken@0
  2392
	sdata = surface->map->sw_data;
slouken@0
  2393
	srcfmt = surface->format;
slouken@0
  2394
	dstfmt = surface->map->dst->format;
slouken@0
  2395
slouken@0
  2396
	if ( blit_index & 2 ) {
slouken@0
  2397
	        /* alpha or alpha+colorkey */
slouken@0
  2398
	        return SDL_CalculateAlphaBlit(surface, blit_index);
slouken@0
  2399
	}
slouken@0
  2400
slouken@0
  2401
	/* We don't support destinations less than 8-bits */
slouken@0
  2402
	if ( dstfmt->BitsPerPixel < 8 ) {
slouken@0
  2403
		return(NULL);
slouken@0
  2404
	}
slouken@0
  2405
	
slouken@0
  2406
	if(blit_index == 1) {
slouken@0
  2407
	    /* colorkey blit: Here we don't have too many options, mostly
slouken@0
  2408
	       because RLE is the preferred fast way to deal with this.
slouken@0
  2409
	       If a particular case turns out to be useful we'll add it. */
slouken@0
  2410
slouken@0
  2411
	    if(srcfmt->BytesPerPixel == 2
slouken@0
  2412
	       && surface->map->identity)
slouken@0
  2413
		return Blit2to2Key;
slouken@0
  2414
	    else if(dstfmt->BytesPerPixel == 1)
slouken@0
  2415
		return BlitNto1Key;
slouken@0
  2416
	    else {
slouken@1361
  2417
#if SDL_ALTIVEC_BLITTERS
icculus@1047
  2418
        if((srcfmt->BytesPerPixel == 4) && (dstfmt->BytesPerPixel == 4) && SDL_HasAltiVec()) {
icculus@1047
  2419
            return Blit32to32KeyAltivec;
icculus@1047
  2420
        } else
icculus@1047
  2421
#endif
icculus@1047
  2422
slouken@0
  2423
		if(srcfmt->Amask && dstfmt->Amask)
slouken@0
  2424
		    return BlitNtoNKeyCopyAlpha;
slouken@0
  2425
		else
slouken@0
  2426
		    return BlitNtoNKey;
slouken@0
  2427
	    }
slouken@0
  2428
	}
slouken@0
  2429
slouken@0
  2430
	blitfun = NULL;
slouken@0
  2431
	if ( dstfmt->BitsPerPixel == 8 ) {
slouken@0
  2432
		/* We assume 8-bit destinations are palettized */
slouken@0
  2433
		if ( (srcfmt->BytesPerPixel == 4) &&
slouken@0
  2434
		     (srcfmt->Rmask == 0x00FF0000) &&
slouken@0
  2435
		     (srcfmt->Gmask == 0x0000FF00) &&
slouken@0
  2436
		     (srcfmt->Bmask == 0x000000FF) ) {
slouken@0
  2437
			if ( surface->map->table ) {
slouken@0
  2438
				blitfun = Blit_RGB888_index8_map;
slouken@0
  2439
			} else {
slouken@1361
  2440
#if SDL_HERMES_BLITTERS
slouken@0
  2441
				sdata->aux_data = ConvertX86p32_8RGB332;
slouken@0
  2442
				blitfun = ConvertX86;
slouken@0
  2443
#else
slouken@0
  2444
				blitfun = Blit_RGB888_index8;
slouken@0
  2445
#endif
slouken@0
  2446
			}
slouken@0
  2447
		} else {
slouken@0
  2448
			blitfun = BlitNto1;
slouken@0
  2449
		}
slouken@0
  2450
	} else {
slouken@0
  2451
		/* Now the meat, choose the blitter we want */
slouken@1544
  2452
		int a_need = NO_ALPHA;
slouken@0
  2453
		if(dstfmt->Amask)
slouken@0
  2454
		    a_need = srcfmt->Amask ? COPY_ALPHA : SET_ALPHA;
slouken@0
  2455
		table = normal_blit[srcfmt->BytesPerPixel-1];
icculus@1047
  2456
		for ( which=0; table[which].dstbpp; ++which ) {
icculus@1047
  2457
			if ( MASKOK(srcfmt->Rmask, table[which].srcR) &&
icculus@1047
  2458
			    MASKOK(srcfmt->Gmask, table[which].srcG) &&
icculus@1047
  2459
			    MASKOK(srcfmt->Bmask, table[which].srcB) &&
icculus@1047
  2460
			    MASKOK(dstfmt->Rmask, table[which].dstR) &&
icculus@1047
  2461
			    MASKOK(dstfmt->Gmask, table[which].dstG) &&
icculus@1047
  2462
			    MASKOK(dstfmt->Bmask, table[which].dstB) &&
icculus@1047
  2463
			    dstfmt->BytesPerPixel == table[which].dstbpp &&
icculus@1047
  2464
			    (a_need & table[which].alpha) == a_need &&
icculus@1047
  2465
			    ((table[which].blit_features & GetBlitFeatures()) == table[which].blit_features) )
slouken@0
  2466
				break;
slouken@0
  2467
		}
slouken@0
  2468
		sdata->aux_data = table[which].aux_data;
slouken@0
  2469
		blitfun = table[which].blitfunc;
icculus@1232
  2470
icculus@1232
  2471
		if(blitfun == BlitNtoN) {  /* default C fallback catch-all. Slow! */
icculus@1232
  2472
			/* Fastpath C fallback: 32bit RGB<->RGBA blit with matching RGB */
icculus@1232
  2473
			if ( srcfmt->BytesPerPixel == 4 && dstfmt->BytesPerPixel == 4 &&
icculus@1232
  2474
			     srcfmt->Rmask == dstfmt->Rmask &&
icculus@1232
  2475
			     srcfmt->Gmask == dstfmt->Gmask &&
icculus@1232
  2476
			     srcfmt->Bmask == dstfmt->Bmask ) {
icculus@1232
  2477
				blitfun = Blit4to4MaskAlpha;
icculus@1232
  2478
			} else if ( a_need == COPY_ALPHA ) {
icculus@1232
  2479
			    blitfun = BlitNtoNCopyAlpha;
icculus@1232
  2480
			}
icculus@1232
  2481
		}
slouken@0
  2482
	}
slouken@0
  2483
slouken@0
  2484
#ifdef DEBUG_ASM
slouken@1361
  2485
#if SDL_HERMES_BLITTERS
slouken@0
  2486
	if ( blitfun == ConvertMMX )
slouken@0
  2487
		fprintf(stderr, "Using mmx blit\n");
slouken@0
  2488
	else
slouken@0
  2489
	if ( blitfun == ConvertX86 )
slouken@0
  2490
		fprintf(stderr, "Using asm blit\n");
slouken@0
  2491
	else
slouken@0
  2492
#endif
icculus@1224
  2493
	if ( (blitfun == BlitNtoN) || (blitfun == BlitNto1) )
slouken@0
  2494
		fprintf(stderr, "Using C blit\n");
slouken@0
  2495
	else
slouken@0
  2496
		fprintf(stderr, "Using optimized C blit\n");
slouken@0
  2497
#endif /* DEBUG_ASM */
slouken@0
  2498
slouken@0
  2499
	return(blitfun);
slouken@0
  2500
}