src/video/SDL_stretch.c
author Sam Lantinga <slouken@libsdl.org>
Sun, 18 Oct 2009 17:49:40 +0000
changeset 3405 d5f2dd33f4eb
parent 2859 99210400e8b9
child 3406 8ae607392409
permissions -rw-r--r--
Merged improvements to SDL_SoftStretch() from SDL 1.2
     1 /*
     2     SDL - Simple DirectMedia Layer
     3     Copyright (C) 1997-2009 Sam Lantinga
     4 
     5     This library is free software; you can redistribute it and/or
     6     modify it under the terms of the GNU Lesser General Public
     7     License as published by the Free Software Foundation; either
     8     version 2.1 of the License, or (at your option) any later version.
     9 
    10     This library is distributed in the hope that it will be useful,
    11     but WITHOUT ANY WARRANTY; without even the implied warranty of
    12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    13     Lesser General Public License for more details.
    14 
    15     You should have received a copy of the GNU Lesser General Public
    16     License along with this library; if not, write to the Free Software
    17     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
    18 
    19     Sam Lantinga
    20     slouken@libsdl.org
    21 */
    22 #include "SDL_config.h"
    23 
    24 /* This a stretch blit implementation based on ideas given to me by
    25    Tomasz Cejner - thanks! :)
    26 
    27    April 27, 2000 - Sam Lantinga
    28 */
    29 
    30 #include "SDL_video.h"
    31 #include "SDL_blit.h"
    32 
    33 /* This isn't ready for general consumption yet - it should be folded
    34    into the general blitting mechanism.
    35 */
    36 
    37 #if ((defined(_MFC_VER) && defined(_M_IX86)/* && !defined(_WIN32_WCE) still needed? */) || \
    38      defined(__WATCOMC__) || \
    39      (defined(__GNUC__) && defined(__i386__))) && SDL_ASSEMBLY_ROUTINES
    40 #define USE_ASM_STRETCH
    41 #endif
    42 
    43 #ifdef USE_ASM_STRETCH
    44 
    45 #ifdef HAVE_MPROTECT
    46 #include <sys/types.h>
    47 #include <sys/mman.h>
    48 #endif
    49 #ifdef __GNUC__
    50 #define PAGE_ALIGNED __attribute__((__aligned__(4096)))
    51 #else
    52 #define PAGE_ALIGNED
    53 #endif
    54 
    55 #if defined(_M_IX86) || defined(i386)
    56 #define PREFIX16	0x66
    57 #define STORE_BYTE	0xAA
    58 #define STORE_WORD	0xAB
    59 #define LOAD_BYTE	0xAC
    60 #define LOAD_WORD	0xAD
    61 #define RETURN		0xC3
    62 #else
    63 #error Need assembly opcodes for this architecture
    64 #endif
    65 
    66 static unsigned char copy_row[4096] PAGE_ALIGNED;
    67 
    68 static int
    69 generate_rowbytes(int src_w, int dst_w, int bpp)
    70 {
    71     static struct
    72     {
    73         int bpp;
    74         int src_w;
    75         int dst_w;
    76         int status;
    77     } last;
    78 
    79     int i;
    80     int pos, inc;
    81     unsigned char *eip;
    82     unsigned char load, store;
    83 
    84     /* See if we need to regenerate the copy buffer */
    85     if ((src_w == last.src_w) && (dst_w == last.dst_w) && (bpp == last.bpp)) {
    86         return (last.status);
    87     }
    88     last.bpp = bpp;
    89     last.src_w = src_w;
    90     last.dst_w = dst_w;
    91     last.status = -1;
    92 
    93     switch (bpp) {
    94     case 1:
    95         load = LOAD_BYTE;
    96         store = STORE_BYTE;
    97         break;
    98     case 2:
    99     case 4:
   100         load = LOAD_WORD;
   101         store = STORE_WORD;
   102         break;
   103     default:
   104         SDL_SetError("ASM stretch of %d bytes isn't supported\n", bpp);
   105         return (-1);
   106     }
   107 #ifdef HAVE_MPROTECT
   108     /* Make the code writeable */
   109     if (mprotect(copy_row, sizeof(copy_row), PROT_READ | PROT_WRITE) < 0) {
   110         SDL_SetError("Couldn't make copy buffer writeable");
   111         return (-1);
   112     }
   113 #endif
   114     pos = 0x10000;
   115     inc = (src_w << 16) / dst_w;
   116     eip = copy_row;
   117     for (i = 0; i < dst_w; ++i) {
   118         while (pos >= 0x10000L) {
   119             if (bpp == 2) {
   120                 *eip++ = PREFIX16;
   121             }
   122             *eip++ = load;
   123             pos -= 0x10000L;
   124         }
   125         if (bpp == 2) {
   126             *eip++ = PREFIX16;
   127         }
   128         *eip++ = store;
   129         pos += inc;
   130     }
   131     *eip++ = RETURN;
   132 
   133     /* Verify that we didn't overflow (too late!!!) */
   134     if (eip > (copy_row + sizeof(copy_row))) {
   135         SDL_SetError("Copy buffer overflow");
   136         return (-1);
   137     }
   138 #ifdef HAVE_MPROTECT
   139     /* Make the code executable but not writeable */
   140     if (mprotect(copy_row, sizeof(copy_row), PROT_READ | PROT_EXEC) < 0) {
   141         SDL_SetError("Couldn't make copy buffer executable");
   142         return (-1);
   143     }
   144 #endif
   145     last.status = 0;
   146     return (0);
   147 }
   148 
   149 #endif /* USE_ASM_STRETCH */
   150 
   151 #define DEFINE_COPY_ROW(name, type)			\
   152 void name(type *src, int src_w, type *dst, int dst_w)	\
   153 {							\
   154 	int i;						\
   155 	int pos, inc;					\
   156 	type pixel = 0;					\
   157 							\
   158 	pos = 0x10000;					\
   159 	inc = (src_w << 16) / dst_w;			\
   160 	for ( i=dst_w; i>0; --i ) {			\
   161 		while ( pos >= 0x10000L ) {		\
   162 			pixel = *src++;			\
   163 			pos -= 0x10000L;		\
   164 		}					\
   165 		*dst++ = pixel;				\
   166 		pos += inc;				\
   167 	}						\
   168 }
   169 /* *INDENT-OFF* */
   170 DEFINE_COPY_ROW(copy_row1, Uint8)
   171 DEFINE_COPY_ROW(copy_row2, Uint16)
   172 DEFINE_COPY_ROW(copy_row4, Uint32)
   173 /* *INDENT-ON* */
   174 
   175 /* The ASM code doesn't handle 24-bpp stretch blits */
   176 void
   177 copy_row3(Uint8 * src, int src_w, Uint8 * dst, int dst_w)
   178 {
   179     int i;
   180     int pos, inc;
   181     Uint8 pixel[3] = { 0, 0, 0 };
   182 
   183     pos = 0x10000;
   184     inc = (src_w << 16) / dst_w;
   185     for (i = dst_w; i > 0; --i) {
   186         while (pos >= 0x10000L) {
   187             pixel[0] = *src++;
   188             pixel[1] = *src++;
   189             pixel[2] = *src++;
   190             pos -= 0x10000L;
   191         }
   192         *dst++ = pixel[0];
   193         *dst++ = pixel[1];
   194         *dst++ = pixel[2];
   195         pos += inc;
   196     }
   197 }
   198 
   199 /* Perform a stretch blit between two surfaces of the same format.
   200    NOTE:  This function is not safe to call from multiple threads!
   201 */
   202 int
   203 SDL_SoftStretch(SDL_Surface * src, const SDL_Rect * srcrect,
   204                 SDL_Surface * dst, const SDL_Rect * dstrect)
   205 {
   206     int src_locked;
   207     int dst_locked;
   208     int pos, inc;
   209     int dst_width;
   210     int dst_maxrow;
   211     int src_row, dst_row;
   212     Uint8 *srcp = NULL;
   213     Uint8 *dstp;
   214     SDL_Rect full_src;
   215     SDL_Rect full_dst;
   216 #ifdef USE_ASM_STRETCH
   217     SDL_bool use_asm = SDL_TRUE;
   218 #ifdef __GNUC__
   219     int u1, u2;
   220 #endif
   221 #endif /* USE_ASM_STRETCH */
   222     const int bpp = dst->format->BytesPerPixel;
   223 
   224     if (src->format->BitsPerPixel != dst->format->BitsPerPixel) {
   225         SDL_SetError("Only works with same format surfaces");
   226         return (-1);
   227     }
   228 
   229     /* Verify the blit rectangles */
   230     if (srcrect) {
   231         if ((srcrect->x < 0) || (srcrect->y < 0) ||
   232             ((srcrect->x + srcrect->w) > src->w) ||
   233             ((srcrect->y + srcrect->h) > src->h)) {
   234             SDL_SetError("Invalid source blit rectangle");
   235             return (-1);
   236         }
   237     } else {
   238         full_src.x = 0;
   239         full_src.y = 0;
   240         full_src.w = src->w;
   241         full_src.h = src->h;
   242         srcrect = &full_src;
   243     }
   244     if (dstrect) {
   245         if ((dstrect->x < 0) || (dstrect->y < 0) ||
   246             ((dstrect->x + dstrect->w) > dst->w) ||
   247             ((dstrect->y + dstrect->h) > dst->h)) {
   248             SDL_SetError("Invalid destination blit rectangle");
   249             return (-1);
   250         }
   251     } else {
   252         full_dst.x = 0;
   253         full_dst.y = 0;
   254         full_dst.w = dst->w;
   255         full_dst.h = dst->h;
   256         dstrect = &full_dst;
   257     }
   258 
   259     /* Lock the destination if it's in hardware */
   260     dst_locked = 0;
   261     if (SDL_MUSTLOCK(dst)) {
   262         if (SDL_LockSurface(dst) < 0) {
   263             SDL_SetError("Unable to lock destination surface");
   264             return (-1);
   265         }
   266         dst_locked = 1;
   267     }
   268     /* Lock the source if it's in hardware */
   269     src_locked = 0;
   270     if (SDL_MUSTLOCK(src)) {
   271         if (SDL_LockSurface(src) < 0) {
   272             if (dst_locked) {
   273                 SDL_UnlockSurface(dst);
   274             }
   275             SDL_SetError("Unable to lock source surface");
   276             return (-1);
   277         }
   278         src_locked = 1;
   279     }
   280 
   281     /* Set up the data... */
   282     pos = 0x10000;
   283     inc = (srcrect->h << 16) / dstrect->h;
   284     src_row = srcrect->y;
   285     dst_row = dstrect->y;
   286     dst_width = dstrect->w * bpp;
   287 
   288 #ifdef USE_ASM_STRETCH
   289     /* Write the opcodes for this stretch */
   290     if ((bpp == 3) || (generate_rowbytes(srcrect->w, dstrect->w, bpp) < 0)) {
   291         use_asm = SDL_FALSE;
   292     }
   293 #endif
   294 
   295     /* Perform the stretch blit */
   296     for (dst_maxrow = dst_row + dstrect->h; dst_row < dst_maxrow; ++dst_row) {
   297         dstp = (Uint8 *) dst->pixels + (dst_row * dst->pitch)
   298             + (dstrect->x * bpp);
   299         while (pos >= 0x10000L) {
   300             srcp = (Uint8 *) src->pixels + (src_row * src->pitch)
   301                 + (srcrect->x * bpp);
   302             ++src_row;
   303             pos -= 0x10000L;
   304         }
   305 #ifdef USE_ASM_STRETCH
   306         if (use_asm) {
   307 #ifdef __GNUC__
   308             __asm__ __volatile__("call *%4":"=&D"(u1), "=&S"(u2)
   309                                  :"0"(dstp), "1"(srcp), "r"(copy_row)
   310                                  :"memory");
   311 #elif defined(_MSC_VER) || defined(__WATCOMC__)
   312             /* *INDENT-OFF* */
   313             {
   314                 void *code = copy_row;
   315                 __asm {
   316                     push edi
   317                     push esi
   318                     mov edi, dstp
   319                     mov esi, srcp
   320                     call dword ptr code
   321                     pop esi
   322                     pop edi
   323                 }
   324             }
   325             /* *INDENT-ON* */
   326 #else
   327 #error Need inline assembly for this compiler
   328 #endif
   329         } else
   330 #endif
   331             switch (bpp) {
   332             case 1:
   333                 copy_row1(srcp, srcrect->w, dstp, dstrect->w);
   334                 break;
   335             case 2:
   336                 copy_row2((Uint16 *) srcp, srcrect->w,
   337                           (Uint16 *) dstp, dstrect->w);
   338                 break;
   339             case 3:
   340                 copy_row3(srcp, srcrect->w, dstp, dstrect->w);
   341                 break;
   342             case 4:
   343                 copy_row4((Uint32 *) srcp, srcrect->w,
   344                           (Uint32 *) dstp, dstrect->w);
   345                 break;
   346             }
   347         pos += inc;
   348     }
   349 
   350     /* We need to unlock the surfaces if they're locked */
   351     if (dst_locked) {
   352         SDL_UnlockSurface(dst);
   353     }
   354     if (src_locked) {
   355         SDL_UnlockSurface(src);
   356     }
   357     return (0);
   358 }
   359 
   360 /* vi: set ts=4 sw=4 expandtab: */