src/video/SDL_stretch.c
author Sam Lantinga <slouken@libsdl.org>
Sun, 18 Oct 2009 23:18:28 +0000
branchSDL-1.2
changeset 4356 ab2dfac9d5c1
parent 4355 9b464226e541
child 4541 abb56f7699ea
permissions -rw-r--r--
There's a bug with gcc 4.4.1 and -O2 where srcp doesn't get the correct value after the first scanline. Ugh.
     1 /*
     2     SDL - Simple DirectMedia Layer
     3     Copyright (C) 1997-2009 Sam Lantinga
     4 
     5     This library is free software; you can redistribute it and/or
     6     modify it under the terms of the GNU Lesser General Public
     7     License as published by the Free Software Foundation; either
     8     version 2.1 of the License, or (at your option) any later version.
     9 
    10     This library is distributed in the hope that it will be useful,
    11     but WITHOUT ANY WARRANTY; without even the implied warranty of
    12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    13     Lesser General Public License for more details.
    14 
    15     You should have received a copy of the GNU Lesser General Public
    16     License along with this library; if not, write to the Free Software
    17     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
    18 
    19     Sam Lantinga
    20     slouken@libsdl.org
    21 */
    22 #include "SDL_config.h"
    23 
    24 /* This a stretch blit implementation based on ideas given to me by
    25    Tomasz Cejner - thanks! :)
    26 
    27    April 27, 2000 - Sam Lantinga
    28 */
    29 
    30 #include "SDL_video.h"
    31 #include "SDL_blit.h"
    32 
    33 /* This isn't ready for general consumption yet - it should be folded
    34    into the general blitting mechanism.
    35 */
    36 
    37 #if ((defined(_MFC_VER) && defined(_M_IX86)/* && !defined(_WIN32_WCE) still needed? */) || \
    38      defined(__WATCOMC__) || \
    39      (defined(__GNUC__) && defined(__i386__))) && SDL_ASSEMBLY_ROUTINES
    40 /* There's a bug with gcc 4.4.1 and -O2 where srcp doesn't get the correct
    41  * value after the first scanline.  FIXME? */
    42 /*#define USE_ASM_STRETCH*/
    43 #endif
    44 
    45 #ifdef USE_ASM_STRETCH
    46 
    47 #ifdef HAVE_MPROTECT
    48 #include <sys/types.h>
    49 #include <sys/mman.h>
    50 #endif
    51 #ifdef __GNUC__
    52 #define PAGE_ALIGNED __attribute__((__aligned__(4096)))
    53 #else
    54 #define PAGE_ALIGNED
    55 #endif
    56 
    57 #if defined(_M_IX86) || defined(i386)
    58 #define PREFIX16	0x66
    59 #define STORE_BYTE	0xAA
    60 #define STORE_WORD	0xAB
    61 #define LOAD_BYTE	0xAC
    62 #define LOAD_WORD	0xAD
    63 #define RETURN		0xC3
    64 #else
    65 #error Need assembly opcodes for this architecture
    66 #endif
    67 
    68 static unsigned char copy_row[4096] PAGE_ALIGNED;
    69 
    70 static int generate_rowbytes(int src_w, int dst_w, int bpp)
    71 {
    72 	static struct {
    73 		int bpp;
    74 		int src_w;
    75 		int dst_w;
    76 		int status;
    77 	} last;
    78 
    79 	int i;
    80 	int pos, inc;
    81 	unsigned char *eip;
    82 	unsigned char load, store;
    83 
    84 	/* See if we need to regenerate the copy buffer */
    85 	if ( (src_w == last.src_w) &&
    86 	     (dst_w == last.dst_w) && (bpp == last.bpp) ) {
    87 		return(last.status);
    88 	}
    89 	last.bpp = bpp;
    90 	last.src_w = src_w;
    91 	last.dst_w = dst_w;
    92 	last.status = -1;
    93 
    94 	switch (bpp) {
    95 	    case 1:
    96 		load = LOAD_BYTE;
    97 		store = STORE_BYTE;
    98 		break;
    99 	    case 2:
   100 	    case 4:
   101 		load = LOAD_WORD;
   102 		store = STORE_WORD;
   103 		break;
   104 	    default:
   105 		SDL_SetError("ASM stretch of %d bytes isn't supported\n", bpp);
   106 		return(-1);
   107 	}
   108 #ifdef HAVE_MPROTECT
   109 	/* Make the code writeable */
   110 	if ( mprotect(copy_row, sizeof(copy_row), PROT_READ|PROT_WRITE) < 0 ) {
   111 		SDL_SetError("Couldn't make copy buffer writeable");
   112 		return(-1);
   113 	}
   114 #endif
   115 	pos = 0x10000;
   116 	inc = (src_w << 16) / dst_w;
   117 	eip = copy_row;
   118 	for ( i=0; i<dst_w; ++i ) {
   119 		while ( pos >= 0x10000L ) {
   120 			if ( bpp == 2 ) {
   121 				*eip++ = PREFIX16;
   122 			}
   123 			*eip++ = load;
   124 			pos -= 0x10000L;
   125 		}
   126 		if ( bpp == 2 ) {
   127 			*eip++ = PREFIX16;
   128 		}
   129 		*eip++ = store;
   130 		pos += inc;
   131 	}
   132 	*eip++ = RETURN;
   133 
   134 	/* Verify that we didn't overflow (too late!!!) */
   135 	if ( eip > (copy_row+sizeof(copy_row)) ) {
   136 		SDL_SetError("Copy buffer overflow");
   137 		return(-1);
   138 	}
   139 #ifdef HAVE_MPROTECT
   140 	/* Make the code executable but not writeable */
   141 	if ( mprotect(copy_row, sizeof(copy_row), PROT_READ|PROT_EXEC) < 0 ) {
   142 		SDL_SetError("Couldn't make copy buffer executable");
   143 		return(-1);
   144 	}
   145 #endif
   146 	last.status = 0;
   147 	return(0);
   148 }
   149 
   150 #endif /* USE_ASM_STRETCH */
   151 
   152 #define DEFINE_COPY_ROW(name, type)			\
   153 void name(type *src, int src_w, type *dst, int dst_w)	\
   154 {							\
   155 	int i;						\
   156 	int pos, inc;					\
   157 	type pixel = 0;					\
   158 							\
   159 	pos = 0x10000;					\
   160 	inc = (src_w << 16) / dst_w;			\
   161 	for ( i=dst_w; i>0; --i ) {			\
   162 		while ( pos >= 0x10000L ) {		\
   163 			pixel = *src++;			\
   164 			pos -= 0x10000L;		\
   165 		}					\
   166 		*dst++ = pixel;				\
   167 		pos += inc;				\
   168 	}						\
   169 }
   170 DEFINE_COPY_ROW(copy_row1, Uint8)
   171 DEFINE_COPY_ROW(copy_row2, Uint16)
   172 DEFINE_COPY_ROW(copy_row4, Uint32)
   173 
   174 /* The ASM code doesn't handle 24-bpp stretch blits */
   175 void copy_row3(Uint8 *src, int src_w, Uint8 *dst, int dst_w)
   176 {
   177 	int i;
   178 	int pos, inc;
   179 	Uint8 pixel[3] = { 0, 0, 0 };
   180 
   181 	pos = 0x10000;
   182 	inc = (src_w << 16) / dst_w;
   183 	for ( i=dst_w; i>0; --i ) {
   184 		while ( pos >= 0x10000L ) {
   185 			pixel[0] = *src++;
   186 			pixel[1] = *src++;
   187 			pixel[2] = *src++;
   188 			pos -= 0x10000L;
   189 		}
   190 		*dst++ = pixel[0];
   191 		*dst++ = pixel[1];
   192 		*dst++ = pixel[2];
   193 		pos += inc;
   194 	}
   195 }
   196 
   197 /* Perform a stretch blit between two surfaces of the same format.
   198    NOTE:  This function is not safe to call from multiple threads!
   199 */
   200 int SDL_SoftStretch(SDL_Surface *src, SDL_Rect *srcrect,
   201                     SDL_Surface *dst, SDL_Rect *dstrect)
   202 {
   203 	int src_locked;
   204 	int dst_locked;
   205 	int pos, inc;
   206 	int dst_width;
   207 	int dst_maxrow;
   208 	int src_row, dst_row;
   209 	Uint8 *srcp = NULL;
   210 	Uint8 *dstp;
   211 	SDL_Rect full_src;
   212 	SDL_Rect full_dst;
   213 #ifdef USE_ASM_STRETCH
   214 	SDL_bool use_asm = SDL_TRUE;
   215 #ifdef __GNUC__
   216 	int u1, u2;
   217 #endif
   218 #endif /* USE_ASM_STRETCH */
   219 	const int bpp = dst->format->BytesPerPixel;
   220 
   221 	if ( src->format->BitsPerPixel != dst->format->BitsPerPixel ) {
   222 		SDL_SetError("Only works with same format surfaces");
   223 		return(-1);
   224 	}
   225 
   226 	/* Verify the blit rectangles */
   227 	if ( srcrect ) {
   228 		if ( (srcrect->x < 0) || (srcrect->y < 0) ||
   229 		     ((srcrect->x+srcrect->w) > src->w) ||
   230 		     ((srcrect->y+srcrect->h) > src->h) ) {
   231 			SDL_SetError("Invalid source blit rectangle");
   232 			return(-1);
   233 		}
   234 	} else {
   235 		full_src.x = 0;
   236 		full_src.y = 0;
   237 		full_src.w = src->w;
   238 		full_src.h = src->h;
   239 		srcrect = &full_src;
   240 	}
   241 	if ( dstrect ) {
   242 		if ( (dstrect->x < 0) || (dstrect->y < 0) ||
   243 		     ((dstrect->x+dstrect->w) > dst->w) ||
   244 		     ((dstrect->y+dstrect->h) > dst->h) ) {
   245 			SDL_SetError("Invalid destination blit rectangle");
   246 			return(-1);
   247 		}
   248 	} else {
   249 		full_dst.x = 0;
   250 		full_dst.y = 0;
   251 		full_dst.w = dst->w;
   252 		full_dst.h = dst->h;
   253 		dstrect = &full_dst;
   254 	}
   255 
   256 	/* Lock the destination if it's in hardware */
   257 	dst_locked = 0;
   258 	if ( SDL_MUSTLOCK(dst) ) {
   259 		if ( SDL_LockSurface(dst) < 0 ) {
   260 			SDL_SetError("Unable to lock destination surface");
   261 			return(-1);
   262 		}
   263 		dst_locked = 1;
   264 	}
   265 	/* Lock the source if it's in hardware */
   266 	src_locked = 0;
   267 	if ( SDL_MUSTLOCK(src) ) {
   268 		if ( SDL_LockSurface(src) < 0 ) {
   269 			if ( dst_locked ) {
   270 				SDL_UnlockSurface(dst);
   271 			}
   272 			SDL_SetError("Unable to lock source surface");
   273 			return(-1);
   274 		}
   275 		src_locked = 1;
   276 	}
   277 
   278 	/* Set up the data... */
   279 	pos = 0x10000;
   280 	inc = (srcrect->h << 16) / dstrect->h;
   281 	src_row = srcrect->y;
   282 	dst_row = dstrect->y;
   283 	dst_width = dstrect->w*bpp;
   284 
   285 #ifdef USE_ASM_STRETCH
   286 	/* Write the opcodes for this stretch */
   287 	if ( (bpp == 3) ||
   288 	     (generate_rowbytes(srcrect->w, dstrect->w, bpp) < 0) ) {
   289 		use_asm = SDL_FALSE;
   290 	}
   291 #endif
   292 
   293 	/* Perform the stretch blit */
   294 	for ( dst_maxrow = dst_row+dstrect->h; dst_row<dst_maxrow; ++dst_row ) {
   295 		dstp = (Uint8 *)dst->pixels + (dst_row*dst->pitch)
   296 		                            + (dstrect->x*bpp);
   297 		while ( pos >= 0x10000L ) {
   298 			srcp = (Uint8 *)src->pixels + (src_row*src->pitch)
   299 			                            + (srcrect->x*bpp);
   300 			++src_row;
   301 			pos -= 0x10000L;
   302 		}
   303 #ifdef USE_ASM_STRETCH
   304 		if (use_asm) {
   305 #ifdef __GNUC__
   306 			__asm__ __volatile__ (
   307 			"call *%4"
   308 			: "=&D" (u1), "=&S" (u2)
   309 			: "0" (dstp), "1" (srcp), "r" (copy_row)
   310 			: "memory" );
   311 #elif defined(_MSC_VER) || defined(__WATCOMC__)
   312 		{ void *code = copy_row;
   313 			__asm {
   314 				push edi
   315 				push esi
   316 	
   317 				mov edi, dstp
   318 				mov esi, srcp
   319 				call dword ptr code
   320 
   321 				pop esi
   322 				pop edi
   323 			}
   324 		}
   325 #else
   326 #error Need inline assembly for this compiler
   327 #endif
   328 		} else
   329 #endif
   330 		switch (bpp) {
   331 		    case 1:
   332 			copy_row1(srcp, srcrect->w, dstp, dstrect->w);
   333 			break;
   334 		    case 2:
   335 			copy_row2((Uint16 *)srcp, srcrect->w,
   336 			          (Uint16 *)dstp, dstrect->w);
   337 			break;
   338 		    case 3:
   339 			copy_row3(srcp, srcrect->w, dstp, dstrect->w);
   340 			break;
   341 		    case 4:
   342 			copy_row4((Uint32 *)srcp, srcrect->w,
   343 			          (Uint32 *)dstp, dstrect->w);
   344 			break;
   345 		}
   346 		pos += inc;
   347 	}
   348 
   349 	/* We need to unlock the surfaces if they're locked */
   350 	if ( dst_locked ) {
   351 		SDL_UnlockSurface(dst);
   352 	}
   353 	if ( src_locked ) {
   354 		SDL_UnlockSurface(src);
   355 	}
   356 	return(0);
   357 }
   358