src/video/SDL_stretch.c
author Sam Lantinga <slouken@libsdl.org>
Sat, 31 Dec 2011 09:28:07 -0500
changeset 6138 4c64952a58fb
parent 5535 96594ac5fd1a
child 6389 43a190ad60a7
permissions -rwxr-xr-x
Happy New Year!
     1 /*
     2   Simple DirectMedia Layer
     3   Copyright (C) 1997-2012 Sam Lantinga <slouken@libsdl.org>
     4 
     5   This software is provided 'as-is', without any express or implied
     6   warranty.  In no event will the authors be held liable for any damages
     7   arising from the use of this software.
     8 
     9   Permission is granted to anyone to use this software for any purpose,
    10   including commercial applications, and to alter it and redistribute it
    11   freely, subject to the following restrictions:
    12 
    13   1. The origin of this software must not be misrepresented; you must not
    14      claim that you wrote the original software. If you use this software
    15      in a product, an acknowledgment in the product documentation would be
    16      appreciated but is not required.
    17   2. Altered source versions must be plainly marked as such, and must not be
    18      misrepresented as being the original software.
    19   3. This notice may not be removed or altered from any source distribution.
    20 */
    21 #include "SDL_config.h"
    22 
    23 /* This a stretch blit implementation based on ideas given to me by
    24    Tomasz Cejner - thanks! :)
    25 
    26    April 27, 2000 - Sam Lantinga
    27 */
    28 
    29 #include "SDL_video.h"
    30 #include "SDL_blit.h"
    31 
    32 /* This isn't ready for general consumption yet - it should be folded
    33    into the general blitting mechanism.
    34 */
    35 
    36 #if ((defined(_MFC_VER) && defined(_M_IX86)/* && !defined(_WIN32_WCE) still needed? */) || \
    37      defined(__WATCOMC__) || \
    38      (defined(__GNUC__) && defined(__i386__))) && SDL_ASSEMBLY_ROUTINES
    39 /* There's a bug with gcc 4.4.1 and -O2 where srcp doesn't get the correct
    40  * value after the first scanline.  FIXME? */
    41 /*#define USE_ASM_STRETCH*/
    42 #endif
    43 
    44 #ifdef USE_ASM_STRETCH
    45 
    46 #ifdef HAVE_MPROTECT
    47 #include <sys/types.h>
    48 #include <sys/mman.h>
    49 #endif
    50 #ifdef __GNUC__
    51 #define PAGE_ALIGNED __attribute__((__aligned__(4096)))
    52 #else
    53 #define PAGE_ALIGNED
    54 #endif
    55 
    56 #if defined(_M_IX86) || defined(i386)
    57 #define PREFIX16	0x66
    58 #define STORE_BYTE	0xAA
    59 #define STORE_WORD	0xAB
    60 #define LOAD_BYTE	0xAC
    61 #define LOAD_WORD	0xAD
    62 #define RETURN		0xC3
    63 #else
    64 #error Need assembly opcodes for this architecture
    65 #endif
    66 
    67 static unsigned char copy_row[4096] PAGE_ALIGNED;
    68 
    69 static int
    70 generate_rowbytes(int src_w, int dst_w, int bpp)
    71 {
    72     static struct
    73     {
    74         int bpp;
    75         int src_w;
    76         int dst_w;
    77         int status;
    78     } last;
    79 
    80     int i;
    81     int pos, inc;
    82     unsigned char *eip, *fence;
    83     unsigned char load, store;
    84 
    85     /* See if we need to regenerate the copy buffer */
    86     if ((src_w == last.src_w) && (dst_w == last.dst_w) && (bpp == last.bpp)) {
    87         return (last.status);
    88     }
    89     last.bpp = bpp;
    90     last.src_w = src_w;
    91     last.dst_w = dst_w;
    92     last.status = -1;
    93 
    94     switch (bpp) {
    95     case 1:
    96         load = LOAD_BYTE;
    97         store = STORE_BYTE;
    98         break;
    99     case 2:
   100     case 4:
   101         load = LOAD_WORD;
   102         store = STORE_WORD;
   103         break;
   104     default:
   105         SDL_SetError("ASM stretch of %d bytes isn't supported\n", bpp);
   106         return (-1);
   107     }
   108 #ifdef HAVE_MPROTECT
   109     /* Make the code writeable */
   110     if (mprotect(copy_row, sizeof(copy_row), PROT_READ | PROT_WRITE) < 0) {
   111         SDL_SetError("Couldn't make copy buffer writeable");
   112         return (-1);
   113     }
   114 #endif
   115     pos = 0x10000;
   116     inc = (src_w << 16) / dst_w;
   117     eip = copy_row;
   118     fence = copy_row + sizeof(copy_row)-2;
   119     for (i = 0; i < dst_w; ++i) {
   120         while (pos >= 0x10000L) {
   121             if (eip == fence) {
   122                 return -1;
   123             }
   124             if (bpp == 2) {
   125                 *eip++ = PREFIX16;
   126             }
   127             *eip++ = load;
   128             pos -= 0x10000L;
   129         }
   130         if (eip == fence) {
   131             return -1;
   132         }
   133         if (bpp == 2) {
   134             *eip++ = PREFIX16;
   135         }
   136         *eip++ = store;
   137         pos += inc;
   138     }
   139     *eip++ = RETURN;
   140 
   141 #ifdef HAVE_MPROTECT
   142     /* Make the code executable but not writeable */
   143     if (mprotect(copy_row, sizeof(copy_row), PROT_READ | PROT_EXEC) < 0) {
   144         SDL_SetError("Couldn't make copy buffer executable");
   145         return (-1);
   146     }
   147 #endif
   148     last.status = 0;
   149     return (0);
   150 }
   151 
   152 #endif /* USE_ASM_STRETCH */
   153 
   154 #define DEFINE_COPY_ROW(name, type)			\
   155 static void name(type *src, int src_w, type *dst, int dst_w)	\
   156 {							\
   157 	int i;						\
   158 	int pos, inc;					\
   159 	type pixel = 0;					\
   160 							\
   161 	pos = 0x10000;					\
   162 	inc = (src_w << 16) / dst_w;			\
   163 	for ( i=dst_w; i>0; --i ) {			\
   164 		while ( pos >= 0x10000L ) {		\
   165 			pixel = *src++;			\
   166 			pos -= 0x10000L;		\
   167 		}					\
   168 		*dst++ = pixel;				\
   169 		pos += inc;				\
   170 	}						\
   171 }
   172 /* *INDENT-OFF* */
   173 DEFINE_COPY_ROW(copy_row1, Uint8)
   174 DEFINE_COPY_ROW(copy_row2, Uint16)
   175 DEFINE_COPY_ROW(copy_row4, Uint32)
   176 /* *INDENT-ON* */
   177 
   178 /* The ASM code doesn't handle 24-bpp stretch blits */
   179 static void
   180 copy_row3(Uint8 * src, int src_w, Uint8 * dst, int dst_w)
   181 {
   182     int i;
   183     int pos, inc;
   184     Uint8 pixel[3] = { 0, 0, 0 };
   185 
   186     pos = 0x10000;
   187     inc = (src_w << 16) / dst_w;
   188     for (i = dst_w; i > 0; --i) {
   189         while (pos >= 0x10000L) {
   190             pixel[0] = *src++;
   191             pixel[1] = *src++;
   192             pixel[2] = *src++;
   193             pos -= 0x10000L;
   194         }
   195         *dst++ = pixel[0];
   196         *dst++ = pixel[1];
   197         *dst++ = pixel[2];
   198         pos += inc;
   199     }
   200 }
   201 
   202 /* Perform a stretch blit between two surfaces of the same format.
   203    NOTE:  This function is not safe to call from multiple threads!
   204 */
   205 int
   206 SDL_SoftStretch(SDL_Surface * src, const SDL_Rect * srcrect,
   207                 SDL_Surface * dst, const SDL_Rect * dstrect)
   208 {
   209     int src_locked;
   210     int dst_locked;
   211     int pos, inc;
   212     int dst_width;
   213     int dst_maxrow;
   214     int src_row, dst_row;
   215     Uint8 *srcp = NULL;
   216     Uint8 *dstp;
   217     SDL_Rect full_src;
   218     SDL_Rect full_dst;
   219 #ifdef USE_ASM_STRETCH
   220     SDL_bool use_asm = SDL_TRUE;
   221 #ifdef __GNUC__
   222     int u1, u2;
   223 #endif
   224 #endif /* USE_ASM_STRETCH */
   225     const int bpp = dst->format->BytesPerPixel;
   226 
   227     if (src->format->BitsPerPixel != dst->format->BitsPerPixel) {
   228         SDL_SetError("Only works with same format surfaces");
   229         return (-1);
   230     }
   231 
   232     /* Verify the blit rectangles */
   233     if (srcrect) {
   234         if ((srcrect->x < 0) || (srcrect->y < 0) ||
   235             ((srcrect->x + srcrect->w) > src->w) ||
   236             ((srcrect->y + srcrect->h) > src->h)) {
   237             SDL_SetError("Invalid source blit rectangle");
   238             return (-1);
   239         }
   240     } else {
   241         full_src.x = 0;
   242         full_src.y = 0;
   243         full_src.w = src->w;
   244         full_src.h = src->h;
   245         srcrect = &full_src;
   246     }
   247     if (dstrect) {
   248         if ((dstrect->x < 0) || (dstrect->y < 0) ||
   249             ((dstrect->x + dstrect->w) > dst->w) ||
   250             ((dstrect->y + dstrect->h) > dst->h)) {
   251             SDL_SetError("Invalid destination blit rectangle");
   252             return (-1);
   253         }
   254     } else {
   255         full_dst.x = 0;
   256         full_dst.y = 0;
   257         full_dst.w = dst->w;
   258         full_dst.h = dst->h;
   259         dstrect = &full_dst;
   260     }
   261 
   262     /* Lock the destination if it's in hardware */
   263     dst_locked = 0;
   264     if (SDL_MUSTLOCK(dst)) {
   265         if (SDL_LockSurface(dst) < 0) {
   266             SDL_SetError("Unable to lock destination surface");
   267             return (-1);
   268         }
   269         dst_locked = 1;
   270     }
   271     /* Lock the source if it's in hardware */
   272     src_locked = 0;
   273     if (SDL_MUSTLOCK(src)) {
   274         if (SDL_LockSurface(src) < 0) {
   275             if (dst_locked) {
   276                 SDL_UnlockSurface(dst);
   277             }
   278             SDL_SetError("Unable to lock source surface");
   279             return (-1);
   280         }
   281         src_locked = 1;
   282     }
   283 
   284     /* Set up the data... */
   285     pos = 0x10000;
   286     inc = (srcrect->h << 16) / dstrect->h;
   287     src_row = srcrect->y;
   288     dst_row = dstrect->y;
   289     dst_width = dstrect->w * bpp;
   290 
   291 #ifdef USE_ASM_STRETCH
   292     /* Write the opcodes for this stretch */
   293     if ((bpp == 3) || (generate_rowbytes(srcrect->w, dstrect->w, bpp) < 0)) {
   294         use_asm = SDL_FALSE;
   295     }
   296 #endif
   297 
   298     /* Perform the stretch blit */
   299     for (dst_maxrow = dst_row + dstrect->h; dst_row < dst_maxrow; ++dst_row) {
   300         dstp = (Uint8 *) dst->pixels + (dst_row * dst->pitch)
   301             + (dstrect->x * bpp);
   302         while (pos >= 0x10000L) {
   303             srcp = (Uint8 *) src->pixels + (src_row * src->pitch)
   304                 + (srcrect->x * bpp);
   305             ++src_row;
   306             pos -= 0x10000L;
   307         }
   308 #ifdef USE_ASM_STRETCH
   309         if (use_asm) {
   310 #ifdef __GNUC__
   311             __asm__ __volatile__("call *%4":"=&D"(u1), "=&S"(u2)
   312                                  :"0"(dstp), "1"(srcp), "r"(copy_row)
   313                                  :"memory");
   314 #elif defined(_MSC_VER) || defined(__WATCOMC__)
   315             /* *INDENT-OFF* */
   316             {
   317                 void *code = copy_row;
   318                 __asm {
   319                     push edi
   320                     push esi
   321                     mov edi, dstp
   322                     mov esi, srcp
   323                     call dword ptr code
   324                     pop esi
   325                     pop edi
   326                 }
   327             }
   328             /* *INDENT-ON* */
   329 #else
   330 #error Need inline assembly for this compiler
   331 #endif
   332         } else
   333 #endif
   334             switch (bpp) {
   335             case 1:
   336                 copy_row1(srcp, srcrect->w, dstp, dstrect->w);
   337                 break;
   338             case 2:
   339                 copy_row2((Uint16 *) srcp, srcrect->w,
   340                           (Uint16 *) dstp, dstrect->w);
   341                 break;
   342             case 3:
   343                 copy_row3(srcp, srcrect->w, dstp, dstrect->w);
   344                 break;
   345             case 4:
   346                 copy_row4((Uint32 *) srcp, srcrect->w,
   347                           (Uint32 *) dstp, dstrect->w);
   348                 break;
   349             }
   350         pos += inc;
   351     }
   352 
   353     /* We need to unlock the surfaces if they're locked */
   354     if (dst_locked) {
   355         SDL_UnlockSurface(dst);
   356     }
   357     if (src_locked) {
   358         SDL_UnlockSurface(src);
   359     }
   360     return (0);
   361 }
   362 
   363 /* vi: set ts=4 sw=4 expandtab: */