src/video/SDL_stretch.c
author Sam Lantinga <slouken@libsdl.org>
Sun, 18 Oct 2009 23:18:28 +0000
branchSDL-1.2
changeset 4356 ab2dfac9d5c1
parent 4355 9b464226e541
child 4541 abb56f7699ea
permissions -rw-r--r--
There's a bug with gcc 4.4.1 and -O2 where srcp doesn't get the correct value after the first scanline. Ugh.
slouken@0
     1
/*
slouken@0
     2
    SDL - Simple DirectMedia Layer
slouken@4159
     3
    Copyright (C) 1997-2009 Sam Lantinga
slouken@0
     4
slouken@0
     5
    This library is free software; you can redistribute it and/or
slouken@1312
     6
    modify it under the terms of the GNU Lesser General Public
slouken@0
     7
    License as published by the Free Software Foundation; either
slouken@1312
     8
    version 2.1 of the License, or (at your option) any later version.
slouken@0
     9
slouken@0
    10
    This library is distributed in the hope that it will be useful,
slouken@0
    11
    but WITHOUT ANY WARRANTY; without even the implied warranty of
slouken@0
    12
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
slouken@1312
    13
    Lesser General Public License for more details.
slouken@0
    14
slouken@1312
    15
    You should have received a copy of the GNU Lesser General Public
slouken@1312
    16
    License along with this library; if not, write to the Free Software
slouken@1312
    17
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
slouken@0
    18
slouken@0
    19
    Sam Lantinga
slouken@252
    20
    slouken@libsdl.org
slouken@0
    21
*/
slouken@1402
    22
#include "SDL_config.h"
slouken@0
    23
slouken@0
    24
/* This a stretch blit implementation based on ideas given to me by
slouken@0
    25
   Tomasz Cejner - thanks! :)
slouken@0
    26
slouken@0
    27
   April 27, 2000 - Sam Lantinga
slouken@0
    28
*/
slouken@0
    29
slouken@0
    30
#include "SDL_video.h"
slouken@0
    31
#include "SDL_blit.h"
slouken@0
    32
slouken@0
    33
/* This isn't ready for general consumption yet - it should be folded
slouken@0
    34
   into the general blitting mechanism.
slouken@0
    35
*/
slouken@0
    36
slouken@1361
    37
#if ((defined(_MFC_VER) && defined(_M_IX86)/* && !defined(_WIN32_WCE) still needed? */) || \
slouken@1442
    38
     defined(__WATCOMC__) || \
slouken@1402
    39
     (defined(__GNUC__) && defined(__i386__))) && SDL_ASSEMBLY_ROUTINES
slouken@4356
    40
/* There's a bug with gcc 4.4.1 and -O2 where srcp doesn't get the correct
slouken@4356
    41
 * value after the first scanline.  FIXME? */
slouken@4356
    42
/*#define USE_ASM_STRETCH*/
slouken@0
    43
#endif
slouken@0
    44
slouken@0
    45
#ifdef USE_ASM_STRETCH
slouken@0
    46
slouken@4109
    47
#ifdef HAVE_MPROTECT
slouken@4108
    48
#include <sys/types.h>
slouken@4108
    49
#include <sys/mman.h>
slouken@4108
    50
#endif
slouken@4109
    51
#ifdef __GNUC__
slouken@4109
    52
#define PAGE_ALIGNED __attribute__((__aligned__(4096)))
slouken@4109
    53
#else
slouken@4109
    54
#define PAGE_ALIGNED
slouken@4109
    55
#endif
slouken@4108
    56
slouken@1361
    57
#if defined(_M_IX86) || defined(i386)
slouken@0
    58
#define PREFIX16	0x66
slouken@0
    59
#define STORE_BYTE	0xAA
slouken@0
    60
#define STORE_WORD	0xAB
slouken@0
    61
#define LOAD_BYTE	0xAC
slouken@0
    62
#define LOAD_WORD	0xAD
slouken@0
    63
#define RETURN		0xC3
slouken@0
    64
#else
slouken@0
    65
#error Need assembly opcodes for this architecture
slouken@0
    66
#endif
slouken@0
    67
slouken@4109
    68
static unsigned char copy_row[4096] PAGE_ALIGNED;
slouken@0
    69
slouken@0
    70
static int generate_rowbytes(int src_w, int dst_w, int bpp)
slouken@0
    71
{
slouken@0
    72
	static struct {
slouken@0
    73
		int bpp;
slouken@0
    74
		int src_w;
slouken@0
    75
		int dst_w;
slouken@4109
    76
		int status;
slouken@0
    77
	} last;
slouken@0
    78
slouken@0
    79
	int i;
slouken@0
    80
	int pos, inc;
slouken@0
    81
	unsigned char *eip;
slouken@0
    82
	unsigned char load, store;
slouken@0
    83
slouken@0
    84
	/* See if we need to regenerate the copy buffer */
slouken@0
    85
	if ( (src_w == last.src_w) &&
icculus@1164
    86
	     (dst_w == last.dst_w) && (bpp == last.bpp) ) {
slouken@4109
    87
		return(last.status);
slouken@0
    88
	}
slouken@0
    89
	last.bpp = bpp;
slouken@0
    90
	last.src_w = src_w;
slouken@0
    91
	last.dst_w = dst_w;
slouken@4109
    92
	last.status = -1;
slouken@0
    93
slouken@0
    94
	switch (bpp) {
slouken@0
    95
	    case 1:
slouken@0
    96
		load = LOAD_BYTE;
slouken@0
    97
		store = STORE_BYTE;
slouken@0
    98
		break;
slouken@0
    99
	    case 2:
slouken@0
   100
	    case 4:
slouken@0
   101
		load = LOAD_WORD;
slouken@0
   102
		store = STORE_WORD;
slouken@0
   103
		break;
slouken@0
   104
	    default:
slouken@0
   105
		SDL_SetError("ASM stretch of %d bytes isn't supported\n", bpp);
slouken@0
   106
		return(-1);
slouken@0
   107
	}
slouken@4355
   108
#ifdef HAVE_MPROTECT
slouken@4355
   109
	/* Make the code writeable */
slouken@4355
   110
	if ( mprotect(copy_row, sizeof(copy_row), PROT_READ|PROT_WRITE) < 0 ) {
slouken@4355
   111
		SDL_SetError("Couldn't make copy buffer writeable");
slouken@4355
   112
		return(-1);
slouken@4355
   113
	}
slouken@4355
   114
#endif
slouken@0
   115
	pos = 0x10000;
slouken@0
   116
	inc = (src_w << 16) / dst_w;
slouken@0
   117
	eip = copy_row;
slouken@0
   118
	for ( i=0; i<dst_w; ++i ) {
slouken@0
   119
		while ( pos >= 0x10000L ) {
slouken@0
   120
			if ( bpp == 2 ) {
slouken@0
   121
				*eip++ = PREFIX16;
slouken@0
   122
			}
slouken@0
   123
			*eip++ = load;
slouken@0
   124
			pos -= 0x10000L;
slouken@0
   125
		}
slouken@0
   126
		if ( bpp == 2 ) {
slouken@0
   127
			*eip++ = PREFIX16;
slouken@0
   128
		}
slouken@0
   129
		*eip++ = store;
slouken@0
   130
		pos += inc;
slouken@0
   131
	}
slouken@0
   132
	*eip++ = RETURN;
slouken@0
   133
slouken@4109
   134
	/* Verify that we didn't overflow (too late!!!) */
slouken@0
   135
	if ( eip > (copy_row+sizeof(copy_row)) ) {
slouken@0
   136
		SDL_SetError("Copy buffer overflow");
slouken@0
   137
		return(-1);
slouken@0
   138
	}
slouken@4109
   139
#ifdef HAVE_MPROTECT
slouken@4355
   140
	/* Make the code executable but not writeable */
slouken@4355
   141
	if ( mprotect(copy_row, sizeof(copy_row), PROT_READ|PROT_EXEC) < 0 ) {
slouken@4109
   142
		SDL_SetError("Couldn't make copy buffer executable");
slouken@4109
   143
		return(-1);
slouken@4109
   144
	}
slouken@4109
   145
#endif
slouken@4109
   146
	last.status = 0;
slouken@0
   147
	return(0);
slouken@0
   148
}
slouken@0
   149
slouken@4109
   150
#endif /* USE_ASM_STRETCH */
slouken@0
   151
slouken@0
   152
#define DEFINE_COPY_ROW(name, type)			\
slouken@0
   153
void name(type *src, int src_w, type *dst, int dst_w)	\
slouken@0
   154
{							\
slouken@0
   155
	int i;						\
slouken@0
   156
	int pos, inc;					\
slouken@0
   157
	type pixel = 0;					\
slouken@0
   158
							\
slouken@0
   159
	pos = 0x10000;					\
slouken@0
   160
	inc = (src_w << 16) / dst_w;			\
slouken@0
   161
	for ( i=dst_w; i>0; --i ) {			\
slouken@0
   162
		while ( pos >= 0x10000L ) {		\
slouken@0
   163
			pixel = *src++;			\
slouken@0
   164
			pos -= 0x10000L;		\
slouken@0
   165
		}					\
slouken@0
   166
		*dst++ = pixel;				\
slouken@0
   167
		pos += inc;				\
slouken@0
   168
	}						\
slouken@0
   169
}
slouken@0
   170
DEFINE_COPY_ROW(copy_row1, Uint8)
slouken@0
   171
DEFINE_COPY_ROW(copy_row2, Uint16)
slouken@0
   172
DEFINE_COPY_ROW(copy_row4, Uint32)
slouken@0
   173
slouken@0
   174
/* The ASM code doesn't handle 24-bpp stretch blits */
slouken@0
   175
void copy_row3(Uint8 *src, int src_w, Uint8 *dst, int dst_w)
slouken@0
   176
{
slouken@0
   177
	int i;
slouken@0
   178
	int pos, inc;
slouken@1849
   179
	Uint8 pixel[3] = { 0, 0, 0 };
slouken@0
   180
slouken@0
   181
	pos = 0x10000;
slouken@0
   182
	inc = (src_w << 16) / dst_w;
slouken@0
   183
	for ( i=dst_w; i>0; --i ) {
slouken@0
   184
		while ( pos >= 0x10000L ) {
slouken@0
   185
			pixel[0] = *src++;
slouken@0
   186
			pixel[1] = *src++;
slouken@0
   187
			pixel[2] = *src++;
slouken@0
   188
			pos -= 0x10000L;
slouken@0
   189
		}
slouken@0
   190
		*dst++ = pixel[0];
slouken@0
   191
		*dst++ = pixel[1];
slouken@0
   192
		*dst++ = pixel[2];
slouken@0
   193
		pos += inc;
slouken@0
   194
	}
slouken@0
   195
}
slouken@0
   196
slouken@0
   197
/* Perform a stretch blit between two surfaces of the same format.
slouken@0
   198
   NOTE:  This function is not safe to call from multiple threads!
slouken@0
   199
*/
slouken@0
   200
int SDL_SoftStretch(SDL_Surface *src, SDL_Rect *srcrect,
slouken@0
   201
                    SDL_Surface *dst, SDL_Rect *dstrect)
slouken@0
   202
{
slouken@894
   203
	int src_locked;
slouken@894
   204
	int dst_locked;
slouken@0
   205
	int pos, inc;
slouken@0
   206
	int dst_width;
slouken@0
   207
	int dst_maxrow;
slouken@0
   208
	int src_row, dst_row;
slouken@0
   209
	Uint8 *srcp = NULL;
slouken@0
   210
	Uint8 *dstp;
slouken@0
   211
	SDL_Rect full_src;
slouken@0
   212
	SDL_Rect full_dst;
slouken@4109
   213
#ifdef USE_ASM_STRETCH
slouken@4109
   214
	SDL_bool use_asm = SDL_TRUE;
slouken@4109
   215
#ifdef __GNUC__
slouken@0
   216
	int u1, u2;
slouken@0
   217
#endif
slouken@4109
   218
#endif /* USE_ASM_STRETCH */
slouken@0
   219
	const int bpp = dst->format->BytesPerPixel;
slouken@0
   220
slouken@0
   221
	if ( src->format->BitsPerPixel != dst->format->BitsPerPixel ) {
slouken@0
   222
		SDL_SetError("Only works with same format surfaces");
slouken@0
   223
		return(-1);
slouken@0
   224
	}
slouken@0
   225
slouken@0
   226
	/* Verify the blit rectangles */
slouken@0
   227
	if ( srcrect ) {
slouken@0
   228
		if ( (srcrect->x < 0) || (srcrect->y < 0) ||
slouken@0
   229
		     ((srcrect->x+srcrect->w) > src->w) ||
slouken@0
   230
		     ((srcrect->y+srcrect->h) > src->h) ) {
slouken@0
   231
			SDL_SetError("Invalid source blit rectangle");
slouken@0
   232
			return(-1);
slouken@0
   233
		}
slouken@0
   234
	} else {
slouken@0
   235
		full_src.x = 0;
slouken@0
   236
		full_src.y = 0;
slouken@0
   237
		full_src.w = src->w;
slouken@0
   238
		full_src.h = src->h;
slouken@0
   239
		srcrect = &full_src;
slouken@0
   240
	}
slouken@0
   241
	if ( dstrect ) {
slouken@0
   242
		if ( (dstrect->x < 0) || (dstrect->y < 0) ||
slouken@0
   243
		     ((dstrect->x+dstrect->w) > dst->w) ||
slouken@0
   244
		     ((dstrect->y+dstrect->h) > dst->h) ) {
slouken@0
   245
			SDL_SetError("Invalid destination blit rectangle");
slouken@0
   246
			return(-1);
slouken@0
   247
		}
slouken@0
   248
	} else {
slouken@0
   249
		full_dst.x = 0;
slouken@0
   250
		full_dst.y = 0;
slouken@0
   251
		full_dst.w = dst->w;
slouken@0
   252
		full_dst.h = dst->h;
slouken@0
   253
		dstrect = &full_dst;
slouken@0
   254
	}
slouken@0
   255
slouken@894
   256
	/* Lock the destination if it's in hardware */
slouken@894
   257
	dst_locked = 0;
slouken@894
   258
	if ( SDL_MUSTLOCK(dst) ) {
slouken@894
   259
		if ( SDL_LockSurface(dst) < 0 ) {
slouken@894
   260
			SDL_SetError("Unable to lock destination surface");
slouken@894
   261
			return(-1);
slouken@894
   262
		}
slouken@894
   263
		dst_locked = 1;
slouken@894
   264
	}
slouken@894
   265
	/* Lock the source if it's in hardware */
slouken@894
   266
	src_locked = 0;
slouken@894
   267
	if ( SDL_MUSTLOCK(src) ) {
slouken@894
   268
		if ( SDL_LockSurface(src) < 0 ) {
slouken@894
   269
			if ( dst_locked ) {
slouken@894
   270
				SDL_UnlockSurface(dst);
slouken@894
   271
			}
slouken@894
   272
			SDL_SetError("Unable to lock source surface");
slouken@894
   273
			return(-1);
slouken@894
   274
		}
slouken@894
   275
		src_locked = 1;
slouken@894
   276
	}
slouken@894
   277
slouken@0
   278
	/* Set up the data... */
slouken@0
   279
	pos = 0x10000;
slouken@0
   280
	inc = (srcrect->h << 16) / dstrect->h;
slouken@0
   281
	src_row = srcrect->y;
slouken@0
   282
	dst_row = dstrect->y;
slouken@0
   283
	dst_width = dstrect->w*bpp;
slouken@0
   284
slouken@0
   285
#ifdef USE_ASM_STRETCH
slouken@0
   286
	/* Write the opcodes for this stretch */
slouken@4109
   287
	if ( (bpp == 3) ||
slouken@0
   288
	     (generate_rowbytes(srcrect->w, dstrect->w, bpp) < 0) ) {
slouken@4109
   289
		use_asm = SDL_FALSE;
slouken@0
   290
	}
slouken@0
   291
#endif
slouken@0
   292
slouken@0
   293
	/* Perform the stretch blit */
slouken@0
   294
	for ( dst_maxrow = dst_row+dstrect->h; dst_row<dst_maxrow; ++dst_row ) {
slouken@0
   295
		dstp = (Uint8 *)dst->pixels + (dst_row*dst->pitch)
slouken@0
   296
		                            + (dstrect->x*bpp);
slouken@0
   297
		while ( pos >= 0x10000L ) {
slouken@0
   298
			srcp = (Uint8 *)src->pixels + (src_row*src->pitch)
slouken@0
   299
			                            + (srcrect->x*bpp);
slouken@0
   300
			++src_row;
slouken@0
   301
			pos -= 0x10000L;
slouken@0
   302
		}
slouken@0
   303
#ifdef USE_ASM_STRETCH
slouken@4109
   304
		if (use_asm) {
slouken@0
   305
#ifdef __GNUC__
slouken@627
   306
			__asm__ __volatile__ (
icculus@1228
   307
			"call *%4"
slouken@0
   308
			: "=&D" (u1), "=&S" (u2)
icculus@1234
   309
			: "0" (dstp), "1" (srcp), "r" (copy_row)
slouken@0
   310
			: "memory" );
slouken@1442
   311
#elif defined(_MSC_VER) || defined(__WATCOMC__)
icculus@1234
   312
		{ void *code = copy_row;
slouken@0
   313
			__asm {
slouken@0
   314
				push edi
slouken@0
   315
				push esi
slouken@0
   316
	
slouken@0
   317
				mov edi, dstp
slouken@0
   318
				mov esi, srcp
slouken@0
   319
				call dword ptr code
slouken@0
   320
slouken@0
   321
				pop esi
slouken@0
   322
				pop edi
slouken@0
   323
			}
slouken@0
   324
		}
slouken@0
   325
#else
slouken@0
   326
#error Need inline assembly for this compiler
slouken@0
   327
#endif
slouken@4109
   328
		} else
slouken@4109
   329
#endif
slouken@0
   330
		switch (bpp) {
slouken@0
   331
		    case 1:
slouken@0
   332
			copy_row1(srcp, srcrect->w, dstp, dstrect->w);
slouken@0
   333
			break;
slouken@0
   334
		    case 2:
slouken@0
   335
			copy_row2((Uint16 *)srcp, srcrect->w,
slouken@0
   336
			          (Uint16 *)dstp, dstrect->w);
slouken@0
   337
			break;
slouken@0
   338
		    case 3:
slouken@0
   339
			copy_row3(srcp, srcrect->w, dstp, dstrect->w);
slouken@0
   340
			break;
slouken@0
   341
		    case 4:
slouken@0
   342
			copy_row4((Uint32 *)srcp, srcrect->w,
slouken@0
   343
			          (Uint32 *)dstp, dstrect->w);
slouken@0
   344
			break;
slouken@0
   345
		}
slouken@0
   346
		pos += inc;
slouken@0
   347
	}
slouken@894
   348
slouken@894
   349
	/* We need to unlock the surfaces if they're locked */
slouken@894
   350
	if ( dst_locked ) {
slouken@894
   351
		SDL_UnlockSurface(dst);
slouken@894
   352
	}
slouken@894
   353
	if ( src_locked ) {
slouken@894
   354
		SDL_UnlockSurface(src);
slouken@894
   355
	}
slouken@0
   356
	return(0);
slouken@0
   357
}
slouken@0
   358