src/video/SDL_stretch.c
author Sam Lantinga <slouken@libsdl.org>
Fri, 11 Feb 2011 22:37:15 -0800
changeset 5262 b530ef003506
parent 4542 af1d018ebbe6
child 5535 96594ac5fd1a
permissions -rw-r--r--
Happy 2011! :)
     1 /*
     2     SDL - Simple DirectMedia Layer
     3     Copyright (C) 1997-2011 Sam Lantinga
     4 
     5     This library is free software; you can redistribute it and/or
     6     modify it under the terms of the GNU Lesser General Public
     7     License as published by the Free Software Foundation; either
     8     version 2.1 of the License, or (at your option) any later version.
     9 
    10     This library is distributed in the hope that it will be useful,
    11     but WITHOUT ANY WARRANTY; without even the implied warranty of
    12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    13     Lesser General Public License for more details.
    14 
    15     You should have received a copy of the GNU Lesser General Public
    16     License along with this library; if not, write to the Free Software
    17     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
    18 
    19     Sam Lantinga
    20     slouken@libsdl.org
    21 */
    22 #include "SDL_config.h"
    23 
    24 /* This a stretch blit implementation based on ideas given to me by
    25    Tomasz Cejner - thanks! :)
    26 
    27    April 27, 2000 - Sam Lantinga
    28 */
    29 
    30 #include "SDL_video.h"
    31 #include "SDL_blit.h"
    32 
    33 /* This isn't ready for general consumption yet - it should be folded
    34    into the general blitting mechanism.
    35 */
    36 
    37 #if ((defined(_MFC_VER) && defined(_M_IX86)/* && !defined(_WIN32_WCE) still needed? */) || \
    38      defined(__WATCOMC__) || \
    39      (defined(__GNUC__) && defined(__i386__))) && SDL_ASSEMBLY_ROUTINES
    40 /* There's a bug with gcc 4.4.1 and -O2 where srcp doesn't get the correct
    41  * value after the first scanline.  FIXME? */
    42 /*#define USE_ASM_STRETCH*/
    43 #endif
    44 
    45 #ifdef USE_ASM_STRETCH
    46 
    47 #ifdef HAVE_MPROTECT
    48 #include <sys/types.h>
    49 #include <sys/mman.h>
    50 #endif
    51 #ifdef __GNUC__
    52 #define PAGE_ALIGNED __attribute__((__aligned__(4096)))
    53 #else
    54 #define PAGE_ALIGNED
    55 #endif
    56 
    57 #if defined(_M_IX86) || defined(i386)
    58 #define PREFIX16	0x66
    59 #define STORE_BYTE	0xAA
    60 #define STORE_WORD	0xAB
    61 #define LOAD_BYTE	0xAC
    62 #define LOAD_WORD	0xAD
    63 #define RETURN		0xC3
    64 #else
    65 #error Need assembly opcodes for this architecture
    66 #endif
    67 
    68 static unsigned char copy_row[4096] PAGE_ALIGNED;
    69 
    70 static int
    71 generate_rowbytes(int src_w, int dst_w, int bpp)
    72 {
    73     static struct
    74     {
    75         int bpp;
    76         int src_w;
    77         int dst_w;
    78         int status;
    79     } last;
    80 
    81     int i;
    82     int pos, inc;
    83     unsigned char *eip, *fence;
    84     unsigned char load, store;
    85 
    86     /* See if we need to regenerate the copy buffer */
    87     if ((src_w == last.src_w) && (dst_w == last.dst_w) && (bpp == last.bpp)) {
    88         return (last.status);
    89     }
    90     last.bpp = bpp;
    91     last.src_w = src_w;
    92     last.dst_w = dst_w;
    93     last.status = -1;
    94 
    95     switch (bpp) {
    96     case 1:
    97         load = LOAD_BYTE;
    98         store = STORE_BYTE;
    99         break;
   100     case 2:
   101     case 4:
   102         load = LOAD_WORD;
   103         store = STORE_WORD;
   104         break;
   105     default:
   106         SDL_SetError("ASM stretch of %d bytes isn't supported\n", bpp);
   107         return (-1);
   108     }
   109 #ifdef HAVE_MPROTECT
   110     /* Make the code writeable */
   111     if (mprotect(copy_row, sizeof(copy_row), PROT_READ | PROT_WRITE) < 0) {
   112         SDL_SetError("Couldn't make copy buffer writeable");
   113         return (-1);
   114     }
   115 #endif
   116     pos = 0x10000;
   117     inc = (src_w << 16) / dst_w;
   118     eip = copy_row;
   119     fence = copy_row + sizeof(copy_row)-2;
   120     for (i = 0; i < dst_w; ++i) {
   121         while (pos >= 0x10000L) {
   122             if (eip == fence) {
   123                 return -1;
   124             }
   125             if (bpp == 2) {
   126                 *eip++ = PREFIX16;
   127             }
   128             *eip++ = load;
   129             pos -= 0x10000L;
   130         }
   131         if (eip == fence) {
   132             return -1;
   133         }
   134         if (bpp == 2) {
   135             *eip++ = PREFIX16;
   136         }
   137         *eip++ = store;
   138         pos += inc;
   139     }
   140     *eip++ = RETURN;
   141 
   142 #ifdef HAVE_MPROTECT
   143     /* Make the code executable but not writeable */
   144     if (mprotect(copy_row, sizeof(copy_row), PROT_READ | PROT_EXEC) < 0) {
   145         SDL_SetError("Couldn't make copy buffer executable");
   146         return (-1);
   147     }
   148 #endif
   149     last.status = 0;
   150     return (0);
   151 }
   152 
   153 #endif /* USE_ASM_STRETCH */
   154 
   155 #define DEFINE_COPY_ROW(name, type)			\
   156 static void name(type *src, int src_w, type *dst, int dst_w)	\
   157 {							\
   158 	int i;						\
   159 	int pos, inc;					\
   160 	type pixel = 0;					\
   161 							\
   162 	pos = 0x10000;					\
   163 	inc = (src_w << 16) / dst_w;			\
   164 	for ( i=dst_w; i>0; --i ) {			\
   165 		while ( pos >= 0x10000L ) {		\
   166 			pixel = *src++;			\
   167 			pos -= 0x10000L;		\
   168 		}					\
   169 		*dst++ = pixel;				\
   170 		pos += inc;				\
   171 	}						\
   172 }
   173 /* *INDENT-OFF* */
   174 DEFINE_COPY_ROW(copy_row1, Uint8)
   175 DEFINE_COPY_ROW(copy_row2, Uint16)
   176 DEFINE_COPY_ROW(copy_row4, Uint32)
   177 /* *INDENT-ON* */
   178 
   179 /* The ASM code doesn't handle 24-bpp stretch blits */
   180 static void
   181 copy_row3(Uint8 * src, int src_w, Uint8 * dst, int dst_w)
   182 {
   183     int i;
   184     int pos, inc;
   185     Uint8 pixel[3] = { 0, 0, 0 };
   186 
   187     pos = 0x10000;
   188     inc = (src_w << 16) / dst_w;
   189     for (i = dst_w; i > 0; --i) {
   190         while (pos >= 0x10000L) {
   191             pixel[0] = *src++;
   192             pixel[1] = *src++;
   193             pixel[2] = *src++;
   194             pos -= 0x10000L;
   195         }
   196         *dst++ = pixel[0];
   197         *dst++ = pixel[1];
   198         *dst++ = pixel[2];
   199         pos += inc;
   200     }
   201 }
   202 
   203 /* Perform a stretch blit between two surfaces of the same format.
   204    NOTE:  This function is not safe to call from multiple threads!
   205 */
   206 int
   207 SDL_SoftStretch(SDL_Surface * src, const SDL_Rect * srcrect,
   208                 SDL_Surface * dst, const SDL_Rect * dstrect)
   209 {
   210     int src_locked;
   211     int dst_locked;
   212     int pos, inc;
   213     int dst_width;
   214     int dst_maxrow;
   215     int src_row, dst_row;
   216     Uint8 *srcp = NULL;
   217     Uint8 *dstp;
   218     SDL_Rect full_src;
   219     SDL_Rect full_dst;
   220 #ifdef USE_ASM_STRETCH
   221     SDL_bool use_asm = SDL_TRUE;
   222 #ifdef __GNUC__
   223     int u1, u2;
   224 #endif
   225 #endif /* USE_ASM_STRETCH */
   226     const int bpp = dst->format->BytesPerPixel;
   227 
   228     if (src->format->BitsPerPixel != dst->format->BitsPerPixel) {
   229         SDL_SetError("Only works with same format surfaces");
   230         return (-1);
   231     }
   232 
   233     /* Verify the blit rectangles */
   234     if (srcrect) {
   235         if ((srcrect->x < 0) || (srcrect->y < 0) ||
   236             ((srcrect->x + srcrect->w) > src->w) ||
   237             ((srcrect->y + srcrect->h) > src->h)) {
   238             SDL_SetError("Invalid source blit rectangle");
   239             return (-1);
   240         }
   241     } else {
   242         full_src.x = 0;
   243         full_src.y = 0;
   244         full_src.w = src->w;
   245         full_src.h = src->h;
   246         srcrect = &full_src;
   247     }
   248     if (dstrect) {
   249         if ((dstrect->x < 0) || (dstrect->y < 0) ||
   250             ((dstrect->x + dstrect->w) > dst->w) ||
   251             ((dstrect->y + dstrect->h) > dst->h)) {
   252             SDL_SetError("Invalid destination blit rectangle");
   253             return (-1);
   254         }
   255     } else {
   256         full_dst.x = 0;
   257         full_dst.y = 0;
   258         full_dst.w = dst->w;
   259         full_dst.h = dst->h;
   260         dstrect = &full_dst;
   261     }
   262 
   263     /* Lock the destination if it's in hardware */
   264     dst_locked = 0;
   265     if (SDL_MUSTLOCK(dst)) {
   266         if (SDL_LockSurface(dst) < 0) {
   267             SDL_SetError("Unable to lock destination surface");
   268             return (-1);
   269         }
   270         dst_locked = 1;
   271     }
   272     /* Lock the source if it's in hardware */
   273     src_locked = 0;
   274     if (SDL_MUSTLOCK(src)) {
   275         if (SDL_LockSurface(src) < 0) {
   276             if (dst_locked) {
   277                 SDL_UnlockSurface(dst);
   278             }
   279             SDL_SetError("Unable to lock source surface");
   280             return (-1);
   281         }
   282         src_locked = 1;
   283     }
   284 
   285     /* Set up the data... */
   286     pos = 0x10000;
   287     inc = (srcrect->h << 16) / dstrect->h;
   288     src_row = srcrect->y;
   289     dst_row = dstrect->y;
   290     dst_width = dstrect->w * bpp;
   291 
   292 #ifdef USE_ASM_STRETCH
   293     /* Write the opcodes for this stretch */
   294     if ((bpp == 3) || (generate_rowbytes(srcrect->w, dstrect->w, bpp) < 0)) {
   295         use_asm = SDL_FALSE;
   296     }
   297 #endif
   298 
   299     /* Perform the stretch blit */
   300     for (dst_maxrow = dst_row + dstrect->h; dst_row < dst_maxrow; ++dst_row) {
   301         dstp = (Uint8 *) dst->pixels + (dst_row * dst->pitch)
   302             + (dstrect->x * bpp);
   303         while (pos >= 0x10000L) {
   304             srcp = (Uint8 *) src->pixels + (src_row * src->pitch)
   305                 + (srcrect->x * bpp);
   306             ++src_row;
   307             pos -= 0x10000L;
   308         }
   309 #ifdef USE_ASM_STRETCH
   310         if (use_asm) {
   311 #ifdef __GNUC__
   312             __asm__ __volatile__("call *%4":"=&D"(u1), "=&S"(u2)
   313                                  :"0"(dstp), "1"(srcp), "r"(copy_row)
   314                                  :"memory");
   315 #elif defined(_MSC_VER) || defined(__WATCOMC__)
   316             /* *INDENT-OFF* */
   317             {
   318                 void *code = copy_row;
   319                 __asm {
   320                     push edi
   321                     push esi
   322                     mov edi, dstp
   323                     mov esi, srcp
   324                     call dword ptr code
   325                     pop esi
   326                     pop edi
   327                 }
   328             }
   329             /* *INDENT-ON* */
   330 #else
   331 #error Need inline assembly for this compiler
   332 #endif
   333         } else
   334 #endif
   335             switch (bpp) {
   336             case 1:
   337                 copy_row1(srcp, srcrect->w, dstp, dstrect->w);
   338                 break;
   339             case 2:
   340                 copy_row2((Uint16 *) srcp, srcrect->w,
   341                           (Uint16 *) dstp, dstrect->w);
   342                 break;
   343             case 3:
   344                 copy_row3(srcp, srcrect->w, dstp, dstrect->w);
   345                 break;
   346             case 4:
   347                 copy_row4((Uint32 *) srcp, srcrect->w,
   348                           (Uint32 *) dstp, dstrect->w);
   349                 break;
   350             }
   351         pos += inc;
   352     }
   353 
   354     /* We need to unlock the surfaces if they're locked */
   355     if (dst_locked) {
   356         SDL_UnlockSurface(dst);
   357     }
   358     if (src_locked) {
   359         SDL_UnlockSurface(src);
   360     }
   361     return (0);
   362 }
   363 
   364 /* vi: set ts=4 sw=4 expandtab: */