Merged improvements to SDL_SoftStretch() from SDL 1.2
authorSam Lantinga <slouken@libsdl.org>
Sun, 18 Oct 2009 17:49:40 +0000
changeset 3405d5f2dd33f4eb
parent 3404 c9dcc73f6a36
child 3406 8ae607392409
Merged improvements to SDL_SoftStretch() from SDL 1.2
configure.in
include/SDL_config.h.in
src/video/SDL_stretch.c
     1.1 --- a/configure.in	Sun Oct 18 16:14:35 2009 +0000
     1.2 +++ b/configure.in	Sun Oct 18 17:49:40 2009 +0000
     1.3 @@ -204,6 +204,15 @@
     1.4      if test x$ac_cv_func_strtod = xyes; then
     1.5          AC_DEFINE(HAVE_STRTOD)
     1.6      fi
     1.7 +    AC_CHECK_FUNC(mprotect,
     1.8 +        AC_TRY_COMPILE([
     1.9 +          #include <sys/types.h>
    1.10 +          #include <sys/mman.h>
    1.11 +        ],[
    1.12 +        ],[
    1.13 +        AC_DEFINE(HAVE_MPROTECT)
    1.14 +        ]),
    1.15 +    )
    1.16      AC_CHECK_FUNCS(malloc calloc realloc free getenv putenv unsetenv qsort abs bcopy memset memcpy memmove strlen strlcpy strlcat strdup _strrev _strupr _strlwr strchr strrchr strstr itoa _ltoa _uitoa _ultoa strtol strtoul _i64toa _ui64toa strtoll strtoull atoi atof strcmp strncmp _stricmp strcasecmp _strnicmp strncasecmp sscanf snprintf vsnprintf sigaction setjmp nanosleep)
    1.17  
    1.18      AC_CHECK_LIB(m, pow, [LIBS="$LIBS -lm"; EXTRA_LDFLAGS="$EXTRA_LDFLAGS -lm"])
     2.1 --- a/include/SDL_config.h.in	Sun Oct 18 16:14:35 2009 +0000
     2.2 +++ b/include/SDL_config.h.in	Sun Oct 18 17:49:40 2009 +0000
     2.3 @@ -151,6 +151,7 @@
     2.4  #undef HAVE_NANOSLEEP
     2.5  #undef HAVE_CLOCK_GETTIME
     2.6  #undef HAVE_GETPAGESIZE
     2.7 +#undef HAVE_MPROTECT
     2.8  
     2.9  #else
    2.10  /* We may need some replacement for stdarg.h here */
     3.1 --- a/src/video/SDL_stretch.c	Sun Oct 18 16:14:35 2009 +0000
     3.2 +++ b/src/video/SDL_stretch.c	Sun Oct 18 17:49:40 2009 +0000
     3.3 @@ -42,6 +42,16 @@
     3.4  
     3.5  #ifdef USE_ASM_STRETCH
     3.6  
     3.7 +#ifdef HAVE_MPROTECT
     3.8 +#include <sys/types.h>
     3.9 +#include <sys/mman.h>
    3.10 +#endif
    3.11 +#ifdef __GNUC__
    3.12 +#define PAGE_ALIGNED __attribute__((__aligned__(4096)))
    3.13 +#else
    3.14 +#define PAGE_ALIGNED
    3.15 +#endif
    3.16 +
    3.17  #if defined(_M_IX86) || defined(i386)
    3.18  #define PREFIX16	0x66
    3.19  #define STORE_BYTE	0xAA
    3.20 @@ -53,7 +63,7 @@
    3.21  #error Need assembly opcodes for this architecture
    3.22  #endif
    3.23  
    3.24 -static unsigned char copy_row[4096];
    3.25 +static unsigned char copy_row[4096] PAGE_ALIGNED;
    3.26  
    3.27  static int
    3.28  generate_rowbytes(int src_w, int dst_w, int bpp)
    3.29 @@ -63,6 +73,7 @@
    3.30          int bpp;
    3.31          int src_w;
    3.32          int dst_w;
    3.33 +        int status;
    3.34      } last;
    3.35  
    3.36      int i;
    3.37 @@ -72,11 +83,12 @@
    3.38  
    3.39      /* See if we need to regenerate the copy buffer */
    3.40      if ((src_w == last.src_w) && (dst_w == last.dst_w) && (bpp == last.bpp)) {
    3.41 -        return (0);
    3.42 +        return (last.status);
    3.43      }
    3.44      last.bpp = bpp;
    3.45      last.src_w = src_w;
    3.46      last.dst_w = dst_w;
    3.47 +    last.status = -1;
    3.48  
    3.49      switch (bpp) {
    3.50      case 1:
    3.51 @@ -92,6 +104,13 @@
    3.52          SDL_SetError("ASM stretch of %d bytes isn't supported\n", bpp);
    3.53          return (-1);
    3.54      }
    3.55 +#ifdef HAVE_MPROTECT
    3.56 +    /* Make the code writeable */
    3.57 +    if (mprotect(copy_row, sizeof(copy_row), PROT_READ | PROT_WRITE) < 0) {
    3.58 +        SDL_SetError("Couldn't make copy buffer writeable");
    3.59 +        return (-1);
    3.60 +    }
    3.61 +#endif
    3.62      pos = 0x10000;
    3.63      inc = (src_w << 16) / dst_w;
    3.64      eip = copy_row;
    3.65 @@ -111,47 +130,55 @@
    3.66      }
    3.67      *eip++ = RETURN;
    3.68  
    3.69 -    /* Verify that we didn't overflow (too late) */
    3.70 +    /* Verify that we didn't overflow (too late!!!) */
    3.71      if (eip > (copy_row + sizeof(copy_row))) {
    3.72          SDL_SetError("Copy buffer overflow");
    3.73          return (-1);
    3.74      }
    3.75 +#ifdef HAVE_MPROTECT
    3.76 +    /* Make the code executable but not writeable */
    3.77 +    if (mprotect(copy_row, sizeof(copy_row), PROT_READ | PROT_EXEC) < 0) {
    3.78 +        SDL_SetError("Couldn't make copy buffer executable");
    3.79 +        return (-1);
    3.80 +    }
    3.81 +#endif
    3.82 +    last.status = 0;
    3.83      return (0);
    3.84  }
    3.85  
    3.86 -#else
    3.87 +#endif /* USE_ASM_STRETCH */
    3.88  
    3.89 -#define DEFINE_COPY_ROW(name, type)                     \
    3.90 -void name(type *src, int src_w, type *dst, int dst_w)   \
    3.91 -{                                                       \
    3.92 -    int i;                                              \
    3.93 -    int pos, inc;                                       \
    3.94 -    type pixel = 0;                                     \
    3.95 -                                                        \
    3.96 -    pos = 0x10000;                                      \
    3.97 -    inc = (src_w << 16) / dst_w;                        \
    3.98 -    for ( i=dst_w; i>0; --i ) {                         \
    3.99 -        while ( pos >= 0x10000L ) {                     \
   3.100 -            pixel = *src++;                             \
   3.101 -            pos -= 0x10000L;                            \
   3.102 -        }                                               \
   3.103 -        *dst++ = pixel;                                 \
   3.104 -        pos += inc;                                     \
   3.105 -    }                                                   \
   3.106 +#define DEFINE_COPY_ROW(name, type)			\
   3.107 +void name(type *src, int src_w, type *dst, int dst_w)	\
   3.108 +{							\
   3.109 +	int i;						\
   3.110 +	int pos, inc;					\
   3.111 +	type pixel = 0;					\
   3.112 +							\
   3.113 +	pos = 0x10000;					\
   3.114 +	inc = (src_w << 16) / dst_w;			\
   3.115 +	for ( i=dst_w; i>0; --i ) {			\
   3.116 +		while ( pos >= 0x10000L ) {		\
   3.117 +			pixel = *src++;			\
   3.118 +			pos -= 0x10000L;		\
   3.119 +		}					\
   3.120 +		*dst++ = pixel;				\
   3.121 +		pos += inc;				\
   3.122 +	}						\
   3.123  }
   3.124  /* *INDENT-OFF* */
   3.125  DEFINE_COPY_ROW(copy_row1, Uint8)
   3.126  DEFINE_COPY_ROW(copy_row2, Uint16)
   3.127  DEFINE_COPY_ROW(copy_row4, Uint32)
   3.128  /* *INDENT-ON* */
   3.129 -#endif /* USE_ASM_STRETCH */
   3.130 +
   3.131  /* The ASM code doesn't handle 24-bpp stretch blits */
   3.132  void
   3.133  copy_row3(Uint8 * src, int src_w, Uint8 * dst, int dst_w)
   3.134  {
   3.135      int i;
   3.136      int pos, inc;
   3.137 -    Uint8 pixel[3];
   3.138 +    Uint8 pixel[3] = { 0, 0, 0 };
   3.139  
   3.140      pos = 0x10000;
   3.141      inc = (src_w << 16) / dst_w;
   3.142 @@ -186,9 +213,12 @@
   3.143      Uint8 *dstp;
   3.144      SDL_Rect full_src;
   3.145      SDL_Rect full_dst;
   3.146 -#if defined(USE_ASM_STRETCH) && defined(__GNUC__)
   3.147 +#ifdef USE_ASM_STRETCH
   3.148 +    SDL_bool use_asm = SDL_TRUE;
   3.149 +#ifdef __GNUC__
   3.150      int u1, u2;
   3.151  #endif
   3.152 +#endif /* USE_ASM_STRETCH */
   3.153      const int bpp = dst->format->BytesPerPixel;
   3.154  
   3.155      if (src->format->BitsPerPixel != dst->format->BitsPerPixel) {
   3.156 @@ -257,8 +287,8 @@
   3.157  
   3.158  #ifdef USE_ASM_STRETCH
   3.159      /* Write the opcodes for this stretch */
   3.160 -    if ((bpp != 3) && (generate_rowbytes(srcrect->w, dstrect->w, bpp) < 0)) {
   3.161 -        return (-1);
   3.162 +    if ((bpp == 3) || (generate_rowbytes(srcrect->w, dstrect->w, bpp) < 0)) {
   3.163 +        use_asm = SDL_FALSE;
   3.164      }
   3.165  #endif
   3.166  
   3.167 @@ -273,13 +303,11 @@
   3.168              pos -= 0x10000L;
   3.169          }
   3.170  #ifdef USE_ASM_STRETCH
   3.171 -        switch (bpp) {
   3.172 -        case 3:
   3.173 -            copy_row3(srcp, srcrect->w, dstp, dstrect->w);
   3.174 -            break;
   3.175 -        default:
   3.176 +        if (use_asm) {
   3.177  #ifdef __GNUC__
   3.178 -          __asm__ __volatile__("call *%4": "=&D"(u1), "=&S"(u2): "0"(dstp), "1"(srcp), "r"(copy_row):"memory");
   3.179 +            __asm__ __volatile__("call *%4":"=&D"(u1), "=&S"(u2)
   3.180 +                                 :"0"(dstp), "1"(srcp), "r"(copy_row)
   3.181 +                                 :"memory");
   3.182  #elif defined(_MSC_VER) || defined(__WATCOMC__)
   3.183              /* *INDENT-OFF* */
   3.184              {
   3.185 @@ -298,26 +326,24 @@
   3.186  #else
   3.187  #error Need inline assembly for this compiler
   3.188  #endif
   3.189 -            break;
   3.190 -        }
   3.191 -#else
   3.192 -        switch (bpp) {
   3.193 -        case 1:
   3.194 -            copy_row1(srcp, srcrect->w, dstp, dstrect->w);
   3.195 -            break;
   3.196 -        case 2:
   3.197 -            copy_row2((Uint16 *) srcp, srcrect->w,
   3.198 -                      (Uint16 *) dstp, dstrect->w);
   3.199 -            break;
   3.200 -        case 3:
   3.201 -            copy_row3(srcp, srcrect->w, dstp, dstrect->w);
   3.202 -            break;
   3.203 -        case 4:
   3.204 -            copy_row4((Uint32 *) srcp, srcrect->w,
   3.205 -                      (Uint32 *) dstp, dstrect->w);
   3.206 -            break;
   3.207 -        }
   3.208 +        } else
   3.209  #endif
   3.210 +            switch (bpp) {
   3.211 +            case 1:
   3.212 +                copy_row1(srcp, srcrect->w, dstp, dstrect->w);
   3.213 +                break;
   3.214 +            case 2:
   3.215 +                copy_row2((Uint16 *) srcp, srcrect->w,
   3.216 +                          (Uint16 *) dstp, dstrect->w);
   3.217 +                break;
   3.218 +            case 3:
   3.219 +                copy_row3(srcp, srcrect->w, dstp, dstrect->w);
   3.220 +                break;
   3.221 +            case 4:
   3.222 +                copy_row4((Uint32 *) srcp, srcrect->w,
   3.223 +                          (Uint32 *) dstp, dstrect->w);
   3.224 +                break;
   3.225 +            }
   3.226          pos += inc;
   3.227      }
   3.228