src/video/SDL_blit.c
changeset 689 5bb080d35049
parent 526 4314a501d7be
child 697 8468fc0504f3
     1.1 --- a/src/video/SDL_blit.c	Tue Aug 12 15:17:20 2003 +0000
     1.2 +++ b/src/video/SDL_blit.c	Fri Aug 22 05:51:19 2003 +0000
     1.3 @@ -37,6 +37,19 @@
     1.4  #include "SDL_pixels_c.h"
     1.5  #include "SDL_memops.h"
     1.6  
     1.7 +#if defined(i386) && defined(__GNUC__) && defined(USE_ASMBLIT)
     1.8 +#include "mmx.h"
     1.9 +/* Function to check the CPU flags */
    1.10 +#define MMX_CPU		0x800000
    1.11 +#define SSE_CPU		0x2000000
    1.12 +#define CPU_Flags()	Hermes_X86_CPU()
    1.13 +#define X86_ASSEMBLER
    1.14 +#define HermesConverterInterface	void
    1.15 +#define HermesClearInterface		void
    1.16 +#define STACKCALL
    1.17 +#include "HeadX86.h"
    1.18 +#endif
    1.19 +
    1.20  /* The general purpose software blit routine */
    1.21  static int SDL_SoftBlit(SDL_Surface *src, SDL_Rect *srcrect,
    1.22  			SDL_Surface *dst, SDL_Rect *dstrect)
    1.23 @@ -106,11 +119,54 @@
    1.24  	return(okay ? 0 : -1);
    1.25  }
    1.26  
    1.27 +#if defined(i386) && defined(__GNUC__) && defined(USE_ASMBLIT)
    1.28 +void SDL_memcpyMMX(char* to,char* from,int len)
    1.29 +{
    1.30 +	int i;
    1.31 +
    1.32 +	for(i=0; i<len/8; i++) {
    1.33 +		__asm__ __volatile__ (
    1.34 +		"	movq (%0), %%mm0\n"
    1.35 +		"	movq %%mm0, (%1)\n"
    1.36 +		: : "r" (from), "r" (to) : "memory");
    1.37 +		from+=8;
    1.38 +		to+=8;
    1.39 +	}
    1.40 +	if (len&7)
    1.41 +		SDL_memcpy(to, from, len&7);
    1.42 +}
    1.43 +
    1.44 +void SDL_memcpySSE(char* to,char* from,int len)
    1.45 +{
    1.46 +	int i;
    1.47 +
    1.48 +	__asm__ __volatile__ (
    1.49 +	"	prefetchnta (%0)\n"
    1.50 +	"	prefetchnta 64(%0)\n"
    1.51 +	"	prefetchnta 128(%0)\n"
    1.52 +	"	prefetchnta 192(%0)\n"
    1.53 +	: : "r" (from) );
    1.54 +
    1.55 +	for(i=0; i<len/8; i++) {
    1.56 +		__asm__ __volatile__ (
    1.57 +		"	prefetchnta 256(%0)\n"
    1.58 +		"	movq (%0), %%mm0\n"
    1.59 +		"	movntq %%mm0, (%1)\n"
    1.60 +		: : "r" (from), "r" (to) : "memory");
    1.61 +		from+=8;
    1.62 +		to+=8;
    1.63 +	}
    1.64 +	if (len&7)
    1.65 +		SDL_memcpy(to, from, len&7);
    1.66 +}
    1.67 +#endif
    1.68 +
    1.69  static void SDL_BlitCopy(SDL_BlitInfo *info)
    1.70  {
    1.71  	Uint8 *src, *dst;
    1.72  	int w, h;
    1.73  	int srcskip, dstskip;
    1.74 +	Uint32 f;
    1.75  
    1.76  	w = info->d_width*info->dst->BytesPerPixel;
    1.77  	h = info->d_height;
    1.78 @@ -118,6 +174,33 @@
    1.79  	dst = info->d_pixels;
    1.80  	srcskip = w+info->s_skip;
    1.81  	dstskip = w+info->d_skip;
    1.82 +#if defined(i386) && defined(__GNUC__) && defined(USE_ASMBLIT)
    1.83 +	f=CPU_Flags();
    1.84 +	if((f&(MMX_CPU|SSE_CPU))==(MMX_CPU|SSE_CPU))
    1.85 +	{
    1.86 +		while ( h-- ) {
    1.87 +			SDL_memcpySSE(dst, src, w);
    1.88 +			src += srcskip;
    1.89 +			dst += dstskip;
    1.90 +		}
    1.91 +		__asm__ __volatile__ (
    1.92 +		"	emms\n"
    1.93 +		::);
    1.94 +	}
    1.95 +	else
    1.96 +	if((f&(MMX_CPU))!=0)
    1.97 +	{
    1.98 +		while ( h-- ) {
    1.99 +			SDL_memcpyMMX(dst, src, w);
   1.100 +			src += srcskip;
   1.101 +			dst += dstskip;
   1.102 +		}
   1.103 +		__asm__ __volatile__ (
   1.104 +		"	emms\n"
   1.105 +		::);
   1.106 +	}
   1.107 +	else
   1.108 +#endif
   1.109  	while ( h-- ) {
   1.110  		SDL_memcpy(dst, src, w);
   1.111  		src += srcskip;