src/video/SDL_stretch.c
author Sam Lantinga <slouken@libsdl.org>
Sun, 02 Feb 2014 00:53:27 -0800
changeset 8149 681eb46b8ac4
parent 8093 b43765095a6f
child 9619 b94b6d0bff0f
permissions -rw-r--r--
Fixed bug 2374 - Update copyright for 2014...

Is it that time already??
slouken@0
     1
/*
slouken@5535
     2
  Simple DirectMedia Layer
slouken@8149
     3
  Copyright (C) 1997-2014 Sam Lantinga <slouken@libsdl.org>
slouken@0
     4
slouken@5535
     5
  This software is provided 'as-is', without any express or implied
slouken@5535
     6
  warranty.  In no event will the authors be held liable for any damages
slouken@5535
     7
  arising from the use of this software.
slouken@0
     8
slouken@5535
     9
  Permission is granted to anyone to use this software for any purpose,
slouken@5535
    10
  including commercial applications, and to alter it and redistribute it
slouken@5535
    11
  freely, subject to the following restrictions:
slouken@0
    12
slouken@5535
    13
  1. The origin of this software must not be misrepresented; you must not
slouken@5535
    14
     claim that you wrote the original software. If you use this software
slouken@5535
    15
     in a product, an acknowledgment in the product documentation would be
slouken@5535
    16
     appreciated but is not required.
slouken@5535
    17
  2. Altered source versions must be plainly marked as such, and must not be
slouken@5535
    18
     misrepresented as being the original software.
slouken@5535
    19
  3. This notice may not be removed or altered from any source distribution.
slouken@0
    20
*/
icculus@8093
    21
#include "../SDL_internal.h"
slouken@0
    22
slouken@0
    23
/* This a stretch blit implementation based on ideas given to me by
slouken@0
    24
   Tomasz Cejner - thanks! :)
slouken@0
    25
slouken@0
    26
   April 27, 2000 - Sam Lantinga
slouken@0
    27
*/
slouken@0
    28
slouken@0
    29
#include "SDL_video.h"
slouken@0
    30
#include "SDL_blit.h"
slouken@0
    31
slouken@0
    32
/* This isn't ready for general consumption yet - it should be folded
slouken@0
    33
   into the general blitting mechanism.
slouken@0
    34
*/
slouken@0
    35
icculus@6430
    36
#if ((defined(_MFC_VER) && defined(_M_IX86)) || \
slouken@1442
    37
     defined(__WATCOMC__) || \
slouken@1402
    38
     (defined(__GNUC__) && defined(__i386__))) && SDL_ASSEMBLY_ROUTINES
slouken@3406
    39
/* There's a bug with gcc 4.4.1 and -O2 where srcp doesn't get the correct
slouken@3406
    40
 * value after the first scanline.  FIXME? */
gabomdq@7678
    41
/* #define USE_ASM_STRETCH */
slouken@0
    42
#endif
slouken@0
    43
slouken@0
    44
#ifdef USE_ASM_STRETCH
slouken@0
    45
slouken@3405
    46
#ifdef HAVE_MPROTECT
slouken@3405
    47
#include <sys/types.h>
slouken@3405
    48
#include <sys/mman.h>
slouken@3405
    49
#endif
slouken@3405
    50
#ifdef __GNUC__
slouken@3405
    51
#define PAGE_ALIGNED __attribute__((__aligned__(4096)))
slouken@3405
    52
#else
slouken@3405
    53
#define PAGE_ALIGNED
slouken@3405
    54
#endif
slouken@3405
    55
slouken@1361
    56
#if defined(_M_IX86) || defined(i386)
slouken@7208
    57
#define PREFIX16    0x66
slouken@7208
    58
#define STORE_BYTE  0xAA
slouken@7208
    59
#define STORE_WORD  0xAB
slouken@7208
    60
#define LOAD_BYTE   0xAC
slouken@7208
    61
#define LOAD_WORD   0xAD
slouken@7208
    62
#define RETURN      0xC3
slouken@0
    63
#else
slouken@0
    64
#error Need assembly opcodes for this architecture
slouken@0
    65
#endif
slouken@0
    66
slouken@3405
    67
static unsigned char copy_row[4096] PAGE_ALIGNED;
slouken@0
    68
slouken@1895
    69
static int
slouken@1895
    70
generate_rowbytes(int src_w, int dst_w, int bpp)
slouken@0
    71
{
slouken@1895
    72
    static struct
slouken@1895
    73
    {
slouken@1895
    74
        int bpp;
slouken@1895
    75
        int src_w;
slouken@1895
    76
        int dst_w;
slouken@3405
    77
        int status;
slouken@1895
    78
    } last;
slouken@0
    79
slouken@1895
    80
    int i;
slouken@1895
    81
    int pos, inc;
slouken@4542
    82
    unsigned char *eip, *fence;
slouken@1895
    83
    unsigned char load, store;
slouken@0
    84
slouken@1895
    85
    /* See if we need to regenerate the copy buffer */
slouken@1895
    86
    if ((src_w == last.src_w) && (dst_w == last.dst_w) && (bpp == last.bpp)) {
slouken@3405
    87
        return (last.status);
slouken@1895
    88
    }
slouken@1895
    89
    last.bpp = bpp;
slouken@1895
    90
    last.src_w = src_w;
slouken@1895
    91
    last.dst_w = dst_w;
slouken@3405
    92
    last.status = -1;
slouken@0
    93
slouken@1895
    94
    switch (bpp) {
slouken@1895
    95
    case 1:
slouken@1895
    96
        load = LOAD_BYTE;
slouken@1895
    97
        store = STORE_BYTE;
slouken@1895
    98
        break;
slouken@1895
    99
    case 2:
slouken@1895
   100
    case 4:
slouken@1895
   101
        load = LOAD_WORD;
slouken@1895
   102
        store = STORE_WORD;
slouken@1895
   103
        break;
slouken@1895
   104
    default:
icculus@7037
   105
        return SDL_SetError("ASM stretch of %d bytes isn't supported\n", bpp);
slouken@1895
   106
    }
slouken@3405
   107
#ifdef HAVE_MPROTECT
slouken@3405
   108
    /* Make the code writeable */
slouken@3405
   109
    if (mprotect(copy_row, sizeof(copy_row), PROT_READ | PROT_WRITE) < 0) {
icculus@7037
   110
        return SDL_SetError("Couldn't make copy buffer writeable");
slouken@3405
   111
    }
slouken@3405
   112
#endif
slouken@1895
   113
    pos = 0x10000;
slouken@1895
   114
    inc = (src_w << 16) / dst_w;
slouken@1895
   115
    eip = copy_row;
slouken@4542
   116
    fence = copy_row + sizeof(copy_row)-2;
slouken@1895
   117
    for (i = 0; i < dst_w; ++i) {
slouken@1895
   118
        while (pos >= 0x10000L) {
slouken@4542
   119
            if (eip == fence) {
slouken@4542
   120
                return -1;
slouken@4542
   121
            }
slouken@1895
   122
            if (bpp == 2) {
slouken@1895
   123
                *eip++ = PREFIX16;
slouken@1895
   124
            }
slouken@1895
   125
            *eip++ = load;
slouken@1895
   126
            pos -= 0x10000L;
slouken@1895
   127
        }
slouken@4542
   128
        if (eip == fence) {
slouken@4542
   129
            return -1;
slouken@4542
   130
        }
slouken@1895
   131
        if (bpp == 2) {
slouken@1895
   132
            *eip++ = PREFIX16;
slouken@1895
   133
        }
slouken@1895
   134
        *eip++ = store;
slouken@1895
   135
        pos += inc;
slouken@1895
   136
    }
slouken@1895
   137
    *eip++ = RETURN;
slouken@0
   138
slouken@3405
   139
#ifdef HAVE_MPROTECT
slouken@3405
   140
    /* Make the code executable but not writeable */
slouken@3405
   141
    if (mprotect(copy_row, sizeof(copy_row), PROT_READ | PROT_EXEC) < 0) {
icculus@7037
   142
        return SDL_SetError("Couldn't make copy buffer executable");
slouken@3405
   143
    }
slouken@3405
   144
#endif
slouken@3405
   145
    last.status = 0;
slouken@1895
   146
    return (0);
slouken@0
   147
}
slouken@0
   148
slouken@3405
   149
#endif /* USE_ASM_STRETCH */
slouken@0
   150
slouken@7208
   151
#define DEFINE_COPY_ROW(name, type)         \
slouken@7208
   152
static void name(type *src, int src_w, type *dst, int dst_w)    \
slouken@7209
   153
{                                           \
slouken@7209
   154
    int i;                                  \
slouken@7209
   155
    int pos, inc;                           \
slouken@7209
   156
    type pixel = 0;                         \
slouken@7209
   157
                                            \
slouken@7209
   158
    pos = 0x10000;                          \
slouken@7208
   159
    inc = (src_w << 16) / dst_w;            \
slouken@7209
   160
    for ( i=dst_w; i>0; --i ) {             \
slouken@7209
   161
        while ( pos >= 0x10000L ) {         \
slouken@7209
   162
            pixel = *src++;                 \
slouken@7209
   163
            pos -= 0x10000L;                \
slouken@7209
   164
        }                                   \
slouken@7209
   165
        *dst++ = pixel;                     \
slouken@7209
   166
        pos += inc;                         \
slouken@7209
   167
    }                                       \
slouken@0
   168
}
slouken@1985
   169
/* *INDENT-OFF* */
slouken@0
   170
DEFINE_COPY_ROW(copy_row1, Uint8)
slouken@1985
   171
DEFINE_COPY_ROW(copy_row2, Uint16)
slouken@1985
   172
DEFINE_COPY_ROW(copy_row4, Uint32)
slouken@1985
   173
/* *INDENT-ON* */
slouken@3405
   174
slouken@1895
   175
/* The ASM code doesn't handle 24-bpp stretch blits */
slouken@4472
   176
static void
slouken@1985
   177
copy_row3(Uint8 * src, int src_w, Uint8 * dst, int dst_w)
slouken@1895
   178
{
slouken@1895
   179
    int i;
slouken@1895
   180
    int pos, inc;
slouken@3405
   181
    Uint8 pixel[3] = { 0, 0, 0 };
slouken@0
   182
slouken@1895
   183
    pos = 0x10000;
slouken@1895
   184
    inc = (src_w << 16) / dst_w;
slouken@1895
   185
    for (i = dst_w; i > 0; --i) {
slouken@1895
   186
        while (pos >= 0x10000L) {
slouken@1895
   187
            pixel[0] = *src++;
slouken@1895
   188
            pixel[1] = *src++;
slouken@1895
   189
            pixel[2] = *src++;
slouken@1895
   190
            pos -= 0x10000L;
slouken@1895
   191
        }
slouken@1895
   192
        *dst++ = pixel[0];
slouken@1895
   193
        *dst++ = pixel[1];
slouken@1895
   194
        *dst++ = pixel[2];
slouken@1895
   195
        pos += inc;
slouken@1895
   196
    }
slouken@0
   197
}
slouken@0
   198
slouken@0
   199
/* Perform a stretch blit between two surfaces of the same format.
slouken@0
   200
   NOTE:  This function is not safe to call from multiple threads!
slouken@0
   201
*/
slouken@1895
   202
int
slouken@2828
   203
SDL_SoftStretch(SDL_Surface * src, const SDL_Rect * srcrect,
slouken@2828
   204
                SDL_Surface * dst, const SDL_Rect * dstrect)
slouken@0
   205
{
slouken@1895
   206
    int src_locked;
slouken@1895
   207
    int dst_locked;
slouken@1895
   208
    int pos, inc;
slouken@1895
   209
    int dst_maxrow;
slouken@1895
   210
    int src_row, dst_row;
slouken@1895
   211
    Uint8 *srcp = NULL;
slouken@1895
   212
    Uint8 *dstp;
slouken@1895
   213
    SDL_Rect full_src;
slouken@1895
   214
    SDL_Rect full_dst;
slouken@3405
   215
#ifdef USE_ASM_STRETCH
slouken@3405
   216
    SDL_bool use_asm = SDL_TRUE;
slouken@3405
   217
#ifdef __GNUC__
slouken@1895
   218
    int u1, u2;
slouken@0
   219
#endif
slouken@3405
   220
#endif /* USE_ASM_STRETCH */
slouken@1895
   221
    const int bpp = dst->format->BytesPerPixel;
slouken@0
   222
slouken@7208
   223
    if (src->format->format != dst->format->format) {
icculus@7037
   224
        return SDL_SetError("Only works with same format surfaces");
slouken@1895
   225
    }
slouken@0
   226
slouken@1895
   227
    /* Verify the blit rectangles */
slouken@1895
   228
    if (srcrect) {
slouken@1895
   229
        if ((srcrect->x < 0) || (srcrect->y < 0) ||
slouken@1895
   230
            ((srcrect->x + srcrect->w) > src->w) ||
slouken@1895
   231
            ((srcrect->y + srcrect->h) > src->h)) {
icculus@7037
   232
            return SDL_SetError("Invalid source blit rectangle");
slouken@1895
   233
        }
slouken@1895
   234
    } else {
slouken@1895
   235
        full_src.x = 0;
slouken@1895
   236
        full_src.y = 0;
slouken@1895
   237
        full_src.w = src->w;
slouken@1895
   238
        full_src.h = src->h;
slouken@1895
   239
        srcrect = &full_src;
slouken@1895
   240
    }
slouken@1895
   241
    if (dstrect) {
slouken@1895
   242
        if ((dstrect->x < 0) || (dstrect->y < 0) ||
slouken@1895
   243
            ((dstrect->x + dstrect->w) > dst->w) ||
slouken@1895
   244
            ((dstrect->y + dstrect->h) > dst->h)) {
icculus@7037
   245
            return SDL_SetError("Invalid destination blit rectangle");
slouken@1895
   246
        }
slouken@1895
   247
    } else {
slouken@1895
   248
        full_dst.x = 0;
slouken@1895
   249
        full_dst.y = 0;
slouken@1895
   250
        full_dst.w = dst->w;
slouken@1895
   251
        full_dst.h = dst->h;
slouken@1895
   252
        dstrect = &full_dst;
slouken@1895
   253
    }
slouken@0
   254
slouken@1895
   255
    /* Lock the destination if it's in hardware */
slouken@1895
   256
    dst_locked = 0;
slouken@1895
   257
    if (SDL_MUSTLOCK(dst)) {
slouken@1895
   258
        if (SDL_LockSurface(dst) < 0) {
icculus@7037
   259
            return SDL_SetError("Unable to lock destination surface");
slouken@1895
   260
        }
slouken@1895
   261
        dst_locked = 1;
slouken@1895
   262
    }
slouken@1895
   263
    /* Lock the source if it's in hardware */
slouken@1895
   264
    src_locked = 0;
slouken@1895
   265
    if (SDL_MUSTLOCK(src)) {
slouken@1895
   266
        if (SDL_LockSurface(src) < 0) {
slouken@1895
   267
            if (dst_locked) {
slouken@1895
   268
                SDL_UnlockSurface(dst);
slouken@1895
   269
            }
icculus@7037
   270
            return SDL_SetError("Unable to lock source surface");
slouken@1895
   271
        }
slouken@1895
   272
        src_locked = 1;
slouken@1895
   273
    }
slouken@894
   274
slouken@1895
   275
    /* Set up the data... */
slouken@1895
   276
    pos = 0x10000;
slouken@1895
   277
    inc = (srcrect->h << 16) / dstrect->h;
slouken@1895
   278
    src_row = srcrect->y;
slouken@1895
   279
    dst_row = dstrect->y;
slouken@0
   280
slouken@0
   281
#ifdef USE_ASM_STRETCH
slouken@1895
   282
    /* Write the opcodes for this stretch */
slouken@3405
   283
    if ((bpp == 3) || (generate_rowbytes(srcrect->w, dstrect->w, bpp) < 0)) {
slouken@3405
   284
        use_asm = SDL_FALSE;
slouken@1895
   285
    }
slouken@0
   286
#endif
slouken@0
   287
slouken@1895
   288
    /* Perform the stretch blit */
slouken@1895
   289
    for (dst_maxrow = dst_row + dstrect->h; dst_row < dst_maxrow; ++dst_row) {
slouken@1895
   290
        dstp = (Uint8 *) dst->pixels + (dst_row * dst->pitch)
slouken@1895
   291
            + (dstrect->x * bpp);
slouken@1895
   292
        while (pos >= 0x10000L) {
slouken@1895
   293
            srcp = (Uint8 *) src->pixels + (src_row * src->pitch)
slouken@1895
   294
                + (srcrect->x * bpp);
slouken@1895
   295
            ++src_row;
slouken@1895
   296
            pos -= 0x10000L;
slouken@1895
   297
        }
slouken@0
   298
#ifdef USE_ASM_STRETCH
slouken@3405
   299
        if (use_asm) {
slouken@0
   300
#ifdef __GNUC__
slouken@3405
   301
            __asm__ __volatile__("call *%4":"=&D"(u1), "=&S"(u2)
slouken@3405
   302
                                 :"0"(dstp), "1"(srcp), "r"(copy_row)
slouken@3405
   303
                                 :"memory");
slouken@1442
   304
#elif defined(_MSC_VER) || defined(__WATCOMC__)
slouken@1985
   305
            /* *INDENT-OFF* */
slouken@1895
   306
            {
slouken@1895
   307
                void *code = copy_row;
slouken@1895
   308
                __asm {
slouken@1985
   309
                    push edi
slouken@1985
   310
                    push esi
slouken@1985
   311
                    mov edi, dstp
slouken@1985
   312
                    mov esi, srcp
slouken@1985
   313
                    call dword ptr code
slouken@1985
   314
                    pop esi
slouken@1985
   315
                    pop edi
slouken@1985
   316
                }
slouken@1895
   317
            }
slouken@1985
   318
            /* *INDENT-ON* */
slouken@0
   319
#else
slouken@0
   320
#error Need inline assembly for this compiler
slouken@0
   321
#endif
slouken@3405
   322
        } else
slouken@0
   323
#endif
slouken@3405
   324
            switch (bpp) {
slouken@3405
   325
            case 1:
slouken@3405
   326
                copy_row1(srcp, srcrect->w, dstp, dstrect->w);
slouken@3405
   327
                break;
slouken@3405
   328
            case 2:
slouken@3405
   329
                copy_row2((Uint16 *) srcp, srcrect->w,
slouken@3405
   330
                          (Uint16 *) dstp, dstrect->w);
slouken@3405
   331
                break;
slouken@3405
   332
            case 3:
slouken@3405
   333
                copy_row3(srcp, srcrect->w, dstp, dstrect->w);
slouken@3405
   334
                break;
slouken@3405
   335
            case 4:
slouken@3405
   336
                copy_row4((Uint32 *) srcp, srcrect->w,
slouken@3405
   337
                          (Uint32 *) dstp, dstrect->w);
slouken@3405
   338
                break;
slouken@3405
   339
            }
slouken@1895
   340
        pos += inc;
slouken@1895
   341
    }
slouken@894
   342
slouken@1895
   343
    /* We need to unlock the surfaces if they're locked */
slouken@1895
   344
    if (dst_locked) {
slouken@1895
   345
        SDL_UnlockSurface(dst);
slouken@1895
   346
    }
slouken@1895
   347
    if (src_locked) {
slouken@1895
   348
        SDL_UnlockSurface(src);
slouken@1895
   349
    }
slouken@1895
   350
    return (0);
slouken@0
   351
}
slouken@0
   352
slouken@1895
   353
/* vi: set ts=4 sw=4 expandtab: */