Removed hermes since it's LGPL and not compatible with a commercial license.
authorSam Lantinga <slouken@libsdl.org>
Wed, 15 Aug 2007 08:21:10 +0000
changeset 224793994f65c74c
parent 2246 75daa0792bd1
child 2248 5cd2a2293cf0
Removed hermes since it's LGPL and not compatible with a commercial license.

Prepping for using MMX and SSE intrinsics instead of inline assembly.
.. except for memcpy equivalents which only get faster if they can
exploit the parallelism of loading into multiple SIMD registers. :)
build-scripts/makedep.sh
configure.in
include/SDL_config.h.in
src/hermes/COPYING.LIB
src/hermes/HeadMMX.h
src/hermes/HeadX86.h
src/hermes/README
src/hermes/common.inc
src/hermes/mmx_main.asm
src/hermes/mmxp2_32.asm
src/hermes/x86_main.asm
src/hermes/x86p_16.asm
src/hermes/x86p_32.asm
src/video/SDL_blit.c
src/video/SDL_blit.h
src/video/SDL_blit_N.c
src/video/SDL_blit_copy.c
src/video/SDL_blit_copy.h
     1.1 --- a/build-scripts/makedep.sh	Wed Aug 15 04:04:17 2007 +0000
     1.2 +++ b/build-scripts/makedep.sh	Wed Aug 15 08:21:10 2007 +0000
     1.3 @@ -67,12 +67,6 @@
     1.4  
     1.5  __EOF__
     1.6          ;;
     1.7 -        asm) cat >>${output}.new <<__EOF__
     1.8 -
     1.9 -	\$(LIBTOOL) --tag=CC --mode=compile \$(auxdir)/strip_fPIC.sh \$(NASM) $src -o \$@
    1.10 -
    1.11 -__EOF__
    1.12 -        ;;
    1.13          S) cat >>${output}.new <<__EOF__
    1.14  
    1.15  	\$(LIBTOOL)  --mode=compile \$(CC) \$(CFLAGS) \$(EXTRA_CFLAGS) -c $src  -o \$@
     2.1 --- a/configure.in	Wed Aug 15 04:04:17 2007 +0000
     2.2 +++ b/configure.in	Wed Aug 15 08:21:10 2007 +0000
     2.3 @@ -276,6 +276,127 @@
     2.4                , enable_assembly=yes)
     2.5  if test x$enable_assembly = xyes; then
     2.6      AC_DEFINE(SDL_ASSEMBLY_ROUTINES)
     2.7 +
     2.8 +    dnl Check for various instruction support
     2.9 +    AC_ARG_ENABLE(mmx,
    2.10 +AC_HELP_STRING([--enable-mmx], [use MMX assembly routines [[default=yes]]]),
    2.11 +                  , enable_mmx=yes)
    2.12 +    if test x$enable_mmx = xyes; then
    2.13 +        save_CFLAGS="$CFLAGS"
    2.14 +        have_gcc_mmx=no
    2.15 +        AC_MSG_CHECKING(for GCC -mmmx option)
    2.16 +        mmx_CFLAGS="-mmmx"
    2.17 +        CFLAGS="$save_CFLAGS $mmx_CFLAGS"
    2.18 +
    2.19 +        AC_TRY_COMPILE([
    2.20 +        #include <mmintrin.h>
    2.21 +        ],[
    2.22 +        ],[
    2.23 +        have_gcc_mmx=yes
    2.24 +        ])
    2.25 +        AC_MSG_RESULT($have_gcc_mmx)
    2.26 +
    2.27 +        if test x$have_gcc_mmx = xyes; then
    2.28 +            EXTRA_CFLAGS="$EXTRA_CFLAGS $mmx_CFLAGS"
    2.29 +        fi
    2.30 +    fi
    2.31 +
    2.32 +    AC_ARG_ENABLE(sse,
    2.33 +AC_HELP_STRING([--enable-sse], [use SSE assembly routines [[default=yes]]]),
    2.34 +                  , enable_sse=yes)
    2.35 +    if test x$enable_sse = xyes; then
    2.36 +        save_CFLAGS="$CFLAGS"
    2.37 +        have_gcc_sse=no
    2.38 +        AC_MSG_CHECKING(for GCC -msse option)
    2.39 +        sse_CFLAGS="-msse"
    2.40 +        CFLAGS="$save_CFLAGS $sse_CFLAGS"
    2.41 +
    2.42 +        AC_TRY_COMPILE([
    2.43 +        #include <xmmintrin.h>
    2.44 +        ],[
    2.45 +        ],[
    2.46 +        have_gcc_sse=yes
    2.47 +        ])
    2.48 +        AC_MSG_RESULT($have_gcc_sse)
    2.49 +
    2.50 +        if test x$have_gcc_sse = xyes; then
    2.51 +            EXTRA_CFLAGS="$EXTRA_CFLAGS $sse_CFLAGS"
    2.52 +        fi
    2.53 +    fi
    2.54 +
    2.55 +    AC_ARG_ENABLE(altivec,
    2.56 +AC_HELP_STRING([--enable-altivec], [use Altivec assembly routines [[default=yes]]]),
    2.57 +                  , enable_altivec=yes)
    2.58 +    if test x$enable_altivec = xyes; then
    2.59 +        have_altivec_h_hdr=no
    2.60 +        AC_CHECK_HEADER(altivec.h, have_altivec_h_hdr=yes)
    2.61 +
    2.62 +        save_CFLAGS="$CFLAGS"
    2.63 +        have_gcc_altivec=no
    2.64 +        AC_MSG_CHECKING(for Altivec with GCC -maltivec option)
    2.65 +        altivec_CFLAGS="-maltivec"
    2.66 +        CFLAGS="$save_CFLAGS $altivec_CFLAGS"
    2.67 +
    2.68 +        if test x$have_altivec_h_hdr = xyes; then
    2.69 +          AC_TRY_COMPILE([
    2.70 +          #include <altivec.h>
    2.71 +          vector unsigned int vzero() {
    2.72 +              return vec_splat_u32(0);
    2.73 +          }
    2.74 +          ],[
    2.75 +          ],[
    2.76 +          have_gcc_altivec=yes
    2.77 +          ])
    2.78 +          AC_MSG_RESULT($have_gcc_altivec)
    2.79 +        else
    2.80 +          AC_TRY_COMPILE([
    2.81 +          vector unsigned int vzero() {
    2.82 +              return vec_splat_u32(0);
    2.83 +          }
    2.84 +          ],[
    2.85 +          ],[
    2.86 +          have_gcc_altivec=yes
    2.87 +          ])
    2.88 +          AC_MSG_RESULT($have_gcc_altivec)
    2.89 +        fi
    2.90 +
    2.91 +        if test x$have_gcc_altivec = xno; then
    2.92 +            AC_MSG_CHECKING(for Altivec with GCC -faltivec option)
    2.93 +            altivec_CFLAGS="-faltivec"
    2.94 +            CFLAGS="$save_CFLAGS $altivec_CFLAGS"
    2.95 +            if test x$have_altivec_h_hdr = xyes; then
    2.96 +              AC_TRY_COMPILE([
    2.97 +              #include <altivec.h>
    2.98 +              vector unsigned int vzero() {
    2.99 +                  return vec_splat_u32(0);
   2.100 +              }
   2.101 +              ],[
   2.102 +              ],[
   2.103 +              have_gcc_altivec=yes
   2.104 +              ])
   2.105 +              AC_MSG_RESULT($have_gcc_altivec)
   2.106 +            else
   2.107 +              AC_TRY_COMPILE([
   2.108 +              vector unsigned int vzero() {
   2.109 +                  return vec_splat_u32(0);
   2.110 +              }
   2.111 +              ],[
   2.112 +              ],[
   2.113 +              have_gcc_altivec=yes
   2.114 +              ])
   2.115 +              AC_MSG_RESULT($have_gcc_altivec)
   2.116 +            fi
   2.117 +        fi
   2.118 +        CFLAGS="$save_CFLAGS"
   2.119 +
   2.120 +        if test x$have_gcc_altivec = xyes; then
   2.121 +            AC_DEFINE(SDL_ALTIVEC_BLITTERS)
   2.122 +            if test x$have_altivec_h_hdr = xyes; then
   2.123 +              AC_DEFINE(HAVE_ALTIVEC_H)
   2.124 +            fi
   2.125 +            EXTRA_CFLAGS="$EXTRA_CFLAGS $altivec_CFLAGS"
   2.126 +        fi
   2.127 +    fi
   2.128  fi
   2.129  
   2.130  dnl See if the OSS audio interface is supported
   2.131 @@ -629,167 +750,6 @@
   2.132      fi
   2.133  }
   2.134  
   2.135 -dnl See if we can use x86 assembly blitters
   2.136 -# NASM is available from: http://nasm.sourceforge.net
   2.137 -CheckNASM()
   2.138 -{
   2.139 -    dnl Make sure we are running on an x86 platform
   2.140 -    case $host in
   2.141 -        i?86*)
   2.142 -            ;;
   2.143 -        *)
   2.144 -        # Nope, bail early.
   2.145 -            return
   2.146 -            ;;
   2.147 -    esac
   2.148 -    dnl Check for NASM (for assembly blit routines)
   2.149 -    AC_ARG_ENABLE(nasm,
   2.150 -AC_HELP_STRING([--enable-nasm], [use nasm assembly blitters on x86 [[default=yes]]]),
   2.151 -                  , enable_nasm=yes)
   2.152 -    if test x$enable_video = xyes -a x$enable_assembly = xyes -a x$enable_nasm = xyes; then
   2.153 -        CompileNASM()
   2.154 -        {
   2.155 -            # Usage: CompileNASM <filename>
   2.156 -            AC_MSG_CHECKING(to see if $NASM supports $1)
   2.157 -            if $NASM $NASMFLAGS $1 -o $1.o >&AS_MESSAGE_LOG_FD 2>&1; then
   2.158 -                CompileNASM_ret="yes"
   2.159 -            else
   2.160 -                CompileNASM_ret="no"
   2.161 -            fi
   2.162 -            rm -f $1 $1.o
   2.163 -            AC_MSG_RESULT($CompileNASM_ret)
   2.164 -            test "$CompileNASM_ret" = "yes"
   2.165 -        }
   2.166 -
   2.167 -        if test x"$NASMFLAGS" = x; then
   2.168 -            case $ARCH in
   2.169 -              win32)
   2.170 -                  NASMFLAGS="-f win32"
   2.171 -                  ;;
   2.172 -              openbsd)
   2.173 -                  NASMFLAGS="-f aoutb"
   2.174 -                  ;;
   2.175 -              macosx)
   2.176 -                  NASMFLAGS="-f macho"
   2.177 -                  ;;
   2.178 -              *)
   2.179 -                  NASMFLAGS="-f elf"
   2.180 -                  ;;
   2.181 -            esac
   2.182 -        fi
   2.183 -
   2.184 -        AC_PATH_PROG(NASM, yasm)
   2.185 -        echo "%ifidn __OUTPUT_FORMAT__,elf" > unquoted-sections
   2.186 -        echo "section .note.GNU-stack noalloc noexec nowrite progbits" >> unquoted-sections
   2.187 -        echo "%endif" >> unquoted-sections
   2.188 -        CompileNASM unquoted-sections || NASM=""
   2.189 -
   2.190 -        if test "x$NASM" = x -o "x$NASM" = x'"$NASM"'; then
   2.191 -            $as_unset ac_cv_path_NASM
   2.192 -            AC_PATH_PROG(NASM, nasm)
   2.193 -        fi
   2.194 -        if test "x$NASM" != x -a "x$NASM" != x'"$NASM"'; then
   2.195 -            AC_DEFINE(SDL_HERMES_BLITTERS)
   2.196 -            SOURCES="$SOURCES $srcdir/src/hermes/*.asm"
   2.197 -            NASMFLAGS="$NASMFLAGS -I $srcdir/src/hermes/"
   2.198 -
   2.199 -            dnl See if hidden visibility is supported
   2.200 -            echo "GLOBAL _bar:function hidden" > symbol-visibility
   2.201 -            echo "_bar:" >> symbol-visibility
   2.202 -            CompileNASM symbol-visibility && NASMFLAGS="$NASMFLAGS -DHIDDEN_VISIBILITY"
   2.203 -
   2.204 -            AC_SUBST(NASM)
   2.205 -            AC_SUBST(NASMFLAGS)
   2.206 -
   2.207 -            case "$host" in
   2.208 -                # this line is needed for QNX, because it's not defined the __ELF__
   2.209 -                *-*-qnx*)
   2.210 -                     EXTRA_CFLAGS="$EXTRA_CFLAGS -D__ELF__";;
   2.211 -                *-*-solaris*)
   2.212 -                     EXTRA_CFLAGS="$EXTRA_CFLAGS -D__ELF__";;
   2.213 -            esac
   2.214 -        fi
   2.215 -    fi
   2.216 -}
   2.217 -
   2.218 -dnl Check for altivec instruction support using gas syntax
   2.219 -CheckAltivec()
   2.220 -{
   2.221 -    AC_ARG_ENABLE(altivec,
   2.222 -AC_HELP_STRING([--enable-altivec], [use altivec assembly blitters on PPC [[default=yes]]]),
   2.223 -                  , enable_altivec=yes)
   2.224 -    if test x$enable_video = xyes -a x$enable_assembly = xyes -a x$enable_altivec = xyes; then
   2.225 -        have_altivec_h_hdr=no
   2.226 -        AC_CHECK_HEADER(altivec.h, have_altivec_h_hdr=yes)
   2.227 -
   2.228 -        save_CFLAGS="$CFLAGS"
   2.229 -        have_gcc_altivec=no
   2.230 -        AC_MSG_CHECKING(for Altivec with GCC -maltivec option)
   2.231 -        altivec_CFLAGS="-maltivec"
   2.232 -        CFLAGS="$save_CFLAGS $altivec_CFLAGS"
   2.233 -
   2.234 -        if test x$have_altivec_h_hdr = xyes; then
   2.235 -          AC_TRY_COMPILE([
   2.236 -          #include <altivec.h>
   2.237 -          vector unsigned int vzero() {
   2.238 -              return vec_splat_u32(0);
   2.239 -          }
   2.240 -          ],[
   2.241 -          ],[
   2.242 -          have_gcc_altivec=yes
   2.243 -          ])
   2.244 -          AC_MSG_RESULT($have_gcc_altivec)
   2.245 -        else
   2.246 -          AC_TRY_COMPILE([
   2.247 -          vector unsigned int vzero() {
   2.248 -              return vec_splat_u32(0);
   2.249 -          }
   2.250 -          ],[
   2.251 -          ],[
   2.252 -          have_gcc_altivec=yes
   2.253 -          ])
   2.254 -          AC_MSG_RESULT($have_gcc_altivec)
   2.255 -        fi
   2.256 -
   2.257 -        if test x$have_gcc_altivec = xno; then
   2.258 -            AC_MSG_CHECKING(for Altivec with GCC -faltivec option)
   2.259 -            altivec_CFLAGS="-faltivec"
   2.260 -            CFLAGS="$save_CFLAGS $altivec_CFLAGS"
   2.261 -            if test x$have_altivec_h_hdr = xyes; then
   2.262 -              AC_TRY_COMPILE([
   2.263 -              #include <altivec.h>
   2.264 -              vector unsigned int vzero() {
   2.265 -                  return vec_splat_u32(0);
   2.266 -              }
   2.267 -              ],[
   2.268 -              ],[
   2.269 -              have_gcc_altivec=yes
   2.270 -              ])
   2.271 -              AC_MSG_RESULT($have_gcc_altivec)
   2.272 -            else
   2.273 -              AC_TRY_COMPILE([
   2.274 -              vector unsigned int vzero() {
   2.275 -                  return vec_splat_u32(0);
   2.276 -              }
   2.277 -              ],[
   2.278 -              ],[
   2.279 -              have_gcc_altivec=yes
   2.280 -              ])
   2.281 -              AC_MSG_RESULT($have_gcc_altivec)
   2.282 -            fi
   2.283 -        fi
   2.284 -        CFLAGS="$save_CFLAGS"
   2.285 -
   2.286 -        if test x$have_gcc_altivec = xyes; then
   2.287 -            AC_DEFINE(SDL_ALTIVEC_BLITTERS)
   2.288 -            if test x$have_altivec_h_hdr = xyes; then
   2.289 -              AC_DEFINE(HAVE_ALTIVEC_H)
   2.290 -            fi
   2.291 -            EXTRA_CFLAGS="$EXTRA_CFLAGS $altivec_CFLAGS"
   2.292 -        fi
   2.293 -    fi
   2.294 -}
   2.295 -
   2.296  dnl See if GCC's -fvisibility=hidden is supported (gcc4 and later, usually).
   2.297  dnl  Details of this flag are here: http://gcc.gnu.org/wiki/Visibility
   2.298  CheckVisibilityHidden()
   2.299 @@ -2043,8 +2003,6 @@
   2.300          CheckDiskAudio
   2.301          CheckDummyAudio
   2.302          CheckDLOPEN
   2.303 -        CheckNASM
   2.304 -        CheckAltivec
   2.305          CheckOSS
   2.306          CheckDMEDIA
   2.307          CheckMME
   2.308 @@ -2153,7 +2111,6 @@
   2.309          CheckDummyVideo
   2.310          CheckDiskAudio
   2.311          CheckDummyAudio
   2.312 -        # CheckNASM
   2.313          CheckDLOPEN
   2.314          CheckNAS
   2.315          CheckPHOTON
   2.316 @@ -2197,7 +2154,6 @@
   2.317          CheckWIN32
   2.318          CheckWIN32GL
   2.319          CheckDIRECTX
   2.320 -        CheckNASM
   2.321          # Set up files for the video library
   2.322          if test x$enable_video = xyes; then
   2.323              AC_DEFINE(SDL_VIDEO_DRIVER_WIN32)
   2.324 @@ -2278,7 +2234,6 @@
   2.325          CheckDummyVideo
   2.326          CheckDiskAudio
   2.327          CheckDummyAudio
   2.328 -        CheckNASM
   2.329          CheckBWINDOW
   2.330          CheckBeGL
   2.331          # Set up files for the audio library
   2.332 @@ -2344,7 +2299,6 @@
   2.333          CheckDiskAudio
   2.334          CheckDummyAudio
   2.335          CheckDLOPEN
   2.336 -        CheckNASM
   2.337  
   2.338          # Set up files for the shared object loading library
   2.339          # (this needs to be done before the dynamic X11 check)
   2.340 @@ -2359,7 +2313,6 @@
   2.341          CheckMacGL
   2.342          CheckOpenGLX11
   2.343          CheckPTHREAD
   2.344 -        CheckAltivec
   2.345  
   2.346          # Good optimization on Mac OS X, yes...
   2.347          EXTRA_CFLAGS="$EXTRA_CFLAGS -falign-loops=16"
     3.1 --- a/include/SDL_config.h.in	Wed Aug 15 04:04:17 2007 +0000
     3.2 +++ b/include/SDL_config.h.in	Wed Aug 15 08:21:10 2007 +0000
     3.3 @@ -292,7 +292,6 @@
     3.4  
     3.5  /* Enable assembly routines */
     3.6  #undef SDL_ASSEMBLY_ROUTINES
     3.7 -#undef SDL_HERMES_BLITTERS
     3.8  #undef SDL_ALTIVEC_BLITTERS
     3.9  
    3.10  #endif /* _SDL_config_h */
     4.1 --- a/src/hermes/COPYING.LIB	Wed Aug 15 04:04:17 2007 +0000
     4.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     4.3 @@ -1,438 +0,0 @@
     4.4 -		  GNU LIBRARY GENERAL PUBLIC LICENSE
     4.5 -		       Version 2, June 1991
     4.6 -
     4.7 - Copyright (C) 1991 Free Software Foundation, Inc.
     4.8 -    		    59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
     4.9 - Everyone is permitted to copy and distribute verbatim copies
    4.10 - of this license document, but changing it is not allowed.
    4.11 -
    4.12 -[This is the first released version of the library GPL.  It is
    4.13 - numbered 2 because it goes with version 2 of the ordinary GPL.]
    4.14 -
    4.15 -			    Preamble
    4.16 -
    4.17 -  The licenses for most software are designed to take away your
    4.18 -freedom to share and change it.  By contrast, the GNU General Public
    4.19 -Licenses are intended to guarantee your freedom to share and change
    4.20 -free software--to make sure the software is free for all its users.
    4.21 -
    4.22 -  This license, the Library General Public License, applies to some
    4.23 -specially designated Free Software Foundation software, and to any
    4.24 -other libraries whose authors decide to use it.  You can use it for
    4.25 -your libraries, too.
    4.26 -
    4.27 -  When we speak of free software, we are referring to freedom, not
    4.28 -price.  Our General Public Licenses are designed to make sure that you
    4.29 -have the freedom to distribute copies of free software (and charge for
    4.30 -this service if you wish), that you receive source code or can get it
    4.31 -if you want it, that you can change the software or use pieces of it
    4.32 -in new free programs; and that you know you can do these things.
    4.33 -
    4.34 -  To protect your rights, we need to make restrictions that forbid
    4.35 -anyone to deny you these rights or to ask you to surrender the rights.
    4.36 -These restrictions translate to certain responsibilities for you if
    4.37 -you distribute copies of the library, or if you modify it.
    4.38 -
    4.39 -  For example, if you distribute copies of the library, whether gratis
    4.40 -or for a fee, you must give the recipients all the rights that we gave
    4.41 -you.  You must make sure that they, too, receive or can get the source
    4.42 -code.  If you link a program with the library, you must provide
    4.43 -complete object files to the recipients so that they can relink them
    4.44 -with the library, after making changes to the library and recompiling
    4.45 -it.  And you must show them these terms so they know their rights.
    4.46 -
    4.47 -  Our method of protecting your rights has two steps: (1) copyright
    4.48 -the library, and (2) offer you this license which gives you legal
    4.49 -permission to copy, distribute and/or modify the library.
    4.50 -
    4.51 -  Also, for each distributor's protection, we want to make certain
    4.52 -that everyone understands that there is no warranty for this free
    4.53 -library.  If the library is modified by someone else and passed on, we
    4.54 -want its recipients to know that what they have is not the original
    4.55 -version, so that any problems introduced by others will not reflect on
    4.56 -the original authors' reputations.
    4.57 -
    4.58 -  Finally, any free program is threatened constantly by software
    4.59 -patents.  We wish to avoid the danger that companies distributing free
    4.60 -software will individually obtain patent licenses, thus in effect
    4.61 -transforming the program into proprietary software.  To prevent this,
    4.62 -we have made it clear that any patent must be licensed for everyone's
    4.63 -free use or not licensed at all.
    4.64 -
    4.65 -  Most GNU software, including some libraries, is covered by the ordinary
    4.66 -GNU General Public License, which was designed for utility programs.  This
    4.67 -license, the GNU Library General Public License, applies to certain
    4.68 -designated libraries.  This license is quite different from the ordinary
    4.69 -one; be sure to read it in full, and don't assume that anything in it is
    4.70 -the same as in the ordinary license.
    4.71 -
    4.72 -  The reason we have a separate public license for some libraries is that
    4.73 -they blur the distinction we usually make between modifying or adding to a
    4.74 -program and simply using it.  Linking a program with a library, without
    4.75 -changing the library, is in some sense simply using the library, and is
    4.76 -analogous to running a utility program or application program.  However, in
    4.77 -a textual and legal sense, the linked executable is a combined work, a
    4.78 -derivative of the original library, and the ordinary General Public License
    4.79 -treats it as such.
    4.80 -
    4.81 -  Because of this blurred distinction, using the ordinary General
    4.82 -Public License for libraries did not effectively promote software
    4.83 -sharing, because most developers did not use the libraries.  We
    4.84 -concluded that weaker conditions might promote sharing better.
    4.85 -
    4.86 -  However, unrestricted linking of non-free programs would deprive the
    4.87 -users of those programs of all benefit from the free status of the
    4.88 -libraries themselves.  This Library General Public License is intended to
    4.89 -permit developers of non-free programs to use free libraries, while
    4.90 -preserving your freedom as a user of such programs to change the free
    4.91 -libraries that are incorporated in them.  (We have not seen how to achieve
    4.92 -this as regards changes in header files, but we have achieved it as regards
    4.93 -changes in the actual functions of the Library.)  The hope is that this
    4.94 -will lead to faster development of free libraries.
    4.95 -
    4.96 -  The precise terms and conditions for copying, distribution and
    4.97 -modification follow.  Pay close attention to the difference between a
    4.98 -"work based on the library" and a "work that uses the library".  The
    4.99 -former contains code derived from the library, while the latter only
   4.100 -works together with the library.
   4.101 -
   4.102 -  Note that it is possible for a library to be covered by the ordinary
   4.103 -General Public License rather than by this special one.
   4.104 -
   4.105 -		  GNU LIBRARY GENERAL PUBLIC LICENSE
   4.106 -   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
   4.107 -
   4.108 -  0. This License Agreement applies to any software library which
   4.109 -contains a notice placed by the copyright holder or other authorized
   4.110 -party saying it may be distributed under the terms of this Library
   4.111 -General Public License (also called "this License").  Each licensee is
   4.112 -addressed as "you".
   4.113 -
   4.114 -  A "library" means a collection of software functions and/or data
   4.115 -prepared so as to be conveniently linked with application programs
   4.116 -(which use some of those functions and data) to form executables.
   4.117 -
   4.118 -  The "Library", below, refers to any such software library or work
   4.119 -which has been distributed under these terms.  A "work based on the
   4.120 -Library" means either the Library or any derivative work under
   4.121 -copyright law: that is to say, a work containing the Library or a
   4.122 -portion of it, either verbatim or with modifications and/or translated
   4.123 -straightforwardly into another language.  (Hereinafter, translation is
   4.124 -included without limitation in the term "modification".)
   4.125 -
   4.126 -  "Source code" for a work means the preferred form of the work for
   4.127 -making modifications to it.  For a library, complete source code means
   4.128 -all the source code for all modules it contains, plus any associated
   4.129 -interface definition files, plus the scripts used to control compilation
   4.130 -and installation of the library.
   4.131 -
   4.132 -  Activities other than copying, distribution and modification are not
   4.133 -covered by this License; they are outside its scope.  The act of
   4.134 -running a program using the Library is not restricted, and output from
   4.135 -such a program is covered only if its contents constitute a work based
   4.136 -on the Library (independent of the use of the Library in a tool for
   4.137 -writing it).  Whether that is true depends on what the Library does
   4.138 -and what the program that uses the Library does.
   4.139 -  
   4.140 -  1. You may copy and distribute verbatim copies of the Library's
   4.141 -complete source code as you receive it, in any medium, provided that
   4.142 -you conspicuously and appropriately publish on each copy an
   4.143 -appropriate copyright notice and disclaimer of warranty; keep intact
   4.144 -all the notices that refer to this License and to the absence of any
   4.145 -warranty; and distribute a copy of this License along with the
   4.146 -Library.
   4.147 -
   4.148 -  You may charge a fee for the physical act of transferring a copy,
   4.149 -and you may at your option offer warranty protection in exchange for a
   4.150 -fee.
   4.151 -
   4.152 -  2. You may modify your copy or copies of the Library or any portion
   4.153 -of it, thus forming a work based on the Library, and copy and
   4.154 -distribute such modifications or work under the terms of Section 1
   4.155 -above, provided that you also meet all of these conditions:
   4.156 -
   4.157 -    a) The modified work must itself be a software library.
   4.158 -
   4.159 -    b) You must cause the files modified to carry prominent notices
   4.160 -    stating that you changed the files and the date of any change.
   4.161 -
   4.162 -    c) You must cause the whole of the work to be licensed at no
   4.163 -    charge to all third parties under the terms of this License.
   4.164 -
   4.165 -    d) If a facility in the modified Library refers to a function or a
   4.166 -    table of data to be supplied by an application program that uses
   4.167 -    the facility, other than as an argument passed when the facility
   4.168 -    is invoked, then you must make a good faith effort to ensure that,
   4.169 -    in the event an application does not supply such function or
   4.170 -    table, the facility still operates, and performs whatever part of
   4.171 -    its purpose remains meaningful.
   4.172 -
   4.173 -    (For example, a function in a library to compute square roots has
   4.174 -    a purpose that is entirely well-defined independent of the
   4.175 -    application.  Therefore, Subsection 2d requires that any
   4.176 -    application-supplied function or table used by this function must
   4.177 -    be optional: if the application does not supply it, the square
   4.178 -    root function must still compute square roots.)
   4.179 -
   4.180 -These requirements apply to the modified work as a whole.  If
   4.181 -identifiable sections of that work are not derived from the Library,
   4.182 -and can be reasonably considered independent and separate works in
   4.183 -themselves, then this License, and its terms, do not apply to those
   4.184 -sections when you distribute them as separate works.  But when you
   4.185 -distribute the same sections as part of a whole which is a work based
   4.186 -on the Library, the distribution of the whole must be on the terms of
   4.187 -this License, whose permissions for other licensees extend to the
   4.188 -entire whole, and thus to each and every part regardless of who wrote
   4.189 -it.
   4.190 -
   4.191 -Thus, it is not the intent of this section to claim rights or contest
   4.192 -your rights to work written entirely by you; rather, the intent is to
   4.193 -exercise the right to control the distribution of derivative or
   4.194 -collective works based on the Library.
   4.195 -
   4.196 -In addition, mere aggregation of another work not based on the Library
   4.197 -with the Library (or with a work based on the Library) on a volume of
   4.198 -a storage or distribution medium does not bring the other work under
   4.199 -the scope of this License.
   4.200 -
   4.201 -  3. You may opt to apply the terms of the ordinary GNU General Public
   4.202 -License instead of this License to a given copy of the Library.  To do
   4.203 -this, you must alter all the notices that refer to this License, so
   4.204 -that they refer to the ordinary GNU General Public License, version 2,
   4.205 -instead of to this License.  (If a newer version than version 2 of the
   4.206 -ordinary GNU General Public License has appeared, then you can specify
   4.207 -that version instead if you wish.)  Do not make any other change in
   4.208 -these notices.
   4.209 -
   4.210 -  Once this change is made in a given copy, it is irreversible for
   4.211 -that copy, so the ordinary GNU General Public License applies to all
   4.212 -subsequent copies and derivative works made from that copy.
   4.213 -
   4.214 -  This option is useful when you wish to copy part of the code of
   4.215 -the Library into a program that is not a library.
   4.216 -
   4.217 -  4. You may copy and distribute the Library (or a portion or
   4.218 -derivative of it, under Section 2) in object code or executable form
   4.219 -under the terms of Sections 1 and 2 above provided that you accompany
   4.220 -it with the complete corresponding machine-readable source code, which
   4.221 -must be distributed under the terms of Sections 1 and 2 above on a
   4.222 -medium customarily used for software interchange.
   4.223 -
   4.224 -  If distribution of object code is made by offering access to copy
   4.225 -from a designated place, then offering equivalent access to copy the
   4.226 -source code from the same place satisfies the requirement to
   4.227 -distribute the source code, even though third parties are not
   4.228 -compelled to copy the source along with the object code.
   4.229 -
   4.230 -  5. A program that contains no derivative of any portion of the
   4.231 -Library, but is designed to work with the Library by being compiled or
   4.232 -linked with it, is called a "work that uses the Library".  Such a
   4.233 -work, in isolation, is not a derivative work of the Library, and
   4.234 -therefore falls outside the scope of this License.
   4.235 -
   4.236 -  However, linking a "work that uses the Library" with the Library
   4.237 -creates an executable that is a derivative of the Library (because it
   4.238 -contains portions of the Library), rather than a "work that uses the
   4.239 -library".  The executable is therefore covered by this License.
   4.240 -Section 6 states terms for distribution of such executables.
   4.241 -
   4.242 -  When a "work that uses the Library" uses material from a header file
   4.243 -that is part of the Library, the object code for the work may be a
   4.244 -derivative work of the Library even though the source code is not.
   4.245 -Whether this is true is especially significant if the work can be
   4.246 -linked without the Library, or if the work is itself a library.  The
   4.247 -threshold for this to be true is not precisely defined by law.
   4.248 -
   4.249 -  If such an object file uses only numerical parameters, data
   4.250 -structure layouts and accessors, and small macros and small inline
   4.251 -functions (ten lines or less in length), then the use of the object
   4.252 -file is unrestricted, regardless of whether it is legally a derivative
   4.253 -work.  (Executables containing this object code plus portions of the
   4.254 -Library will still fall under Section 6.)
   4.255 -
   4.256 -  Otherwise, if the work is a derivative of the Library, you may
   4.257 -distribute the object code for the work under the terms of Section 6.
   4.258 -Any executables containing that work also fall under Section 6,
   4.259 -whether or not they are linked directly with the Library itself.
   4.260 -
   4.261 -  6. As an exception to the Sections above, you may also compile or
   4.262 -link a "work that uses the Library" with the Library to produce a
   4.263 -work containing portions of the Library, and distribute that work
   4.264 -under terms of your choice, provided that the terms permit
   4.265 -modification of the work for the customer's own use and reverse
   4.266 -engineering for debugging such modifications.
   4.267 -
   4.268 -  You must give prominent notice with each copy of the work that the
   4.269 -Library is used in it and that the Library and its use are covered by
   4.270 -this License.  You must supply a copy of this License.  If the work
   4.271 -during execution displays copyright notices, you must include the
   4.272 -copyright notice for the Library among them, as well as a reference
   4.273 -directing the user to the copy of this License.  Also, you must do one
   4.274 -of these things:
   4.275 -
   4.276 -    a) Accompany the work with the complete corresponding
   4.277 -    machine-readable source code for the Library including whatever
   4.278 -    changes were used in the work (which must be distributed under
   4.279 -    Sections 1 and 2 above); and, if the work is an executable linked
   4.280 -    with the Library, with the complete machine-readable "work that
   4.281 -    uses the Library", as object code and/or source code, so that the
   4.282 -    user can modify the Library and then relink to produce a modified
   4.283 -    executable containing the modified Library.  (It is understood
   4.284 -    that the user who changes the contents of definitions files in the
   4.285 -    Library will not necessarily be able to recompile the application
   4.286 -    to use the modified definitions.)
   4.287 -
   4.288 -    b) Accompany the work with a written offer, valid for at
   4.289 -    least three years, to give the same user the materials
   4.290 -    specified in Subsection 6a, above, for a charge no more
   4.291 -    than the cost of performing this distribution.
   4.292 -
   4.293 -    c) If distribution of the work is made by offering access to copy
   4.294 -    from a designated place, offer equivalent access to copy the above
   4.295 -    specified materials from the same place.
   4.296 -
   4.297 -    d) Verify that the user has already received a copy of these
   4.298 -    materials or that you have already sent this user a copy.
   4.299 -
   4.300 -  For an executable, the required form of the "work that uses the
   4.301 -Library" must include any data and utility programs needed for
   4.302 -reproducing the executable from it.  However, as a special exception,
   4.303 -the source code distributed need not include anything that is normally
   4.304 -distributed (in either source or binary form) with the major
   4.305 -components (compiler, kernel, and so on) of the operating system on
   4.306 -which the executable runs, unless that component itself accompanies
   4.307 -the executable.
   4.308 -
   4.309 -  It may happen that this requirement contradicts the license
   4.310 -restrictions of other proprietary libraries that do not normally
   4.311 -accompany the operating system.  Such a contradiction means you cannot
   4.312 -use both them and the Library together in an executable that you
   4.313 -distribute.
   4.314 -
   4.315 -  7. You may place library facilities that are a work based on the
   4.316 -Library side-by-side in a single library together with other library
   4.317 -facilities not covered by this License, and distribute such a combined
   4.318 -library, provided that the separate distribution of the work based on
   4.319 -the Library and of the other library facilities is otherwise
   4.320 -permitted, and provided that you do these two things:
   4.321 -
   4.322 -    a) Accompany the combined library with a copy of the same work
   4.323 -    based on the Library, uncombined with any other library
   4.324 -    facilities.  This must be distributed under the terms of the
   4.325 -    Sections above.
   4.326 -
   4.327 -    b) Give prominent notice with the combined library of the fact
   4.328 -    that part of it is a work based on the Library, and explaining
   4.329 -    where to find the accompanying uncombined form of the same work.
   4.330 -
   4.331 -  8. You may not copy, modify, sublicense, link with, or distribute
   4.332 -the Library except as expressly provided under this License.  Any
   4.333 -attempt otherwise to copy, modify, sublicense, link with, or
   4.334 -distribute the Library is void, and will automatically terminate your
   4.335 -rights under this License.  However, parties who have received copies,
   4.336 -or rights, from you under this License will not have their licenses
   4.337 -terminated so long as such parties remain in full compliance.
   4.338 -
   4.339 -  9. You are not required to accept this License, since you have not
   4.340 -signed it.  However, nothing else grants you permission to modify or
   4.341 -distribute the Library or its derivative works.  These actions are
   4.342 -prohibited by law if you do not accept this License.  Therefore, by
   4.343 -modifying or distributing the Library (or any work based on the
   4.344 -Library), you indicate your acceptance of this License to do so, and
   4.345 -all its terms and conditions for copying, distributing or modifying
   4.346 -the Library or works based on it.
   4.347 -
   4.348 -  10. Each time you redistribute the Library (or any work based on the
   4.349 -Library), the recipient automatically receives a license from the
   4.350 -original licensor to copy, distribute, link with or modify the Library
   4.351 -subject to these terms and conditions.  You may not impose any further
   4.352 -restrictions on the recipients' exercise of the rights granted herein.
   4.353 -You are not responsible for enforcing compliance by third parties to
   4.354 -this License.
   4.355 -
   4.356 -  11. If, as a consequence of a court judgment or allegation of patent
   4.357 -infringement or for any other reason (not limited to patent issues),
   4.358 -conditions are imposed on you (whether by court order, agreement or
   4.359 -otherwise) that contradict the conditions of this License, they do not
   4.360 -excuse you from the conditions of this License.  If you cannot
   4.361 -distribute so as to satisfy simultaneously your obligations under this
   4.362 -License and any other pertinent obligations, then as a consequence you
   4.363 -may not distribute the Library at all.  For example, if a patent
   4.364 -license would not permit royalty-free redistribution of the Library by
   4.365 -all those who receive copies directly or indirectly through you, then
   4.366 -the only way you could satisfy both it and this License would be to
   4.367 -refrain entirely from distribution of the Library.
   4.368 -
   4.369 -If any portion of this section is held invalid or unenforceable under any
   4.370 -particular circumstance, the balance of the section is intended to apply,
   4.371 -and the section as a whole is intended to apply in other circumstances.
   4.372 -
   4.373 -It is not the purpose of this section to induce you to infringe any
   4.374 -patents or other property right claims or to contest validity of any
   4.375 -such claims; this section has the sole purpose of protecting the
   4.376 -integrity of the free software distribution system which is
   4.377 -implemented by public license practices.  Many people have made
   4.378 -generous contributions to the wide range of software distributed
   4.379 -through that system in reliance on consistent application of that
   4.380 -system; it is up to the author/donor to decide if he or she is willing
   4.381 -to distribute software through any other system and a licensee cannot
   4.382 -impose that choice.
   4.383 -
   4.384 -This section is intended to make thoroughly clear what is believed to
   4.385 -be a consequence of the rest of this License.
   4.386 -
   4.387 -  12. If the distribution and/or use of the Library is restricted in
   4.388 -certain countries either by patents or by copyrighted interfaces, the
   4.389 -original copyright holder who places the Library under this License may add
   4.390 -an explicit geographical distribution limitation excluding those countries,
   4.391 -so that distribution is permitted only in or among countries not thus
   4.392 -excluded.  In such case, this License incorporates the limitation as if
   4.393 -written in the body of this License.
   4.394 -
   4.395 -  13. The Free Software Foundation may publish revised and/or new
   4.396 -versions of the Library General Public License from time to time.
   4.397 -Such new versions will be similar in spirit to the present version,
   4.398 -but may differ in detail to address new problems or concerns.
   4.399 -
   4.400 -Each version is given a distinguishing version number.  If the Library
   4.401 -specifies a version number of this License which applies to it and
   4.402 -"any later version", you have the option of following the terms and
   4.403 -conditions either of that version or of any later version published by
   4.404 -the Free Software Foundation.  If the Library does not specify a
   4.405 -license version number, you may choose any version ever published by
   4.406 -the Free Software Foundation.
   4.407 -
   4.408 -  14. If you wish to incorporate parts of the Library into other free
   4.409 -programs whose distribution conditions are incompatible with these,
   4.410 -write to the author to ask for permission.  For software which is
   4.411 -copyrighted by the Free Software Foundation, write to the Free
   4.412 -Software Foundation; we sometimes make exceptions for this.  Our
   4.413 -decision will be guided by the two goals of preserving the free status
   4.414 -of all derivatives of our free software and of promoting the sharing
   4.415 -and reuse of software generally.
   4.416 -
   4.417 -			    NO WARRANTY
   4.418 -
   4.419 -  15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO
   4.420 -WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
   4.421 -EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
   4.422 -OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY
   4.423 -KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
   4.424 -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   4.425 -PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
   4.426 -LIBRARY IS WITH YOU.  SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
   4.427 -THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
   4.428 -
   4.429 -  16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
   4.430 -WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
   4.431 -AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU
   4.432 -FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
   4.433 -CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
   4.434 -LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
   4.435 -RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
   4.436 -FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
   4.437 -SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
   4.438 -DAMAGES.
   4.439 -
   4.440 -		     END OF TERMS AND CONDITIONS
   4.441 -
     5.1 --- a/src/hermes/HeadMMX.h	Wed Aug 15 04:04:17 2007 +0000
     5.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     5.3 @@ -1,103 +0,0 @@
     5.4 -/*
     5.5 -   Header definitions for the MMX routines for the HERMES library
     5.6 -   Copyright (c) 1998 Christian Nentwich (c.nentwich@cs.ucl.ac.uk)
     5.7 -   This source code is licensed under the GNU LGPL
     5.8 -  
     5.9 -   Please refer to the file COPYING.LIB contained in the distribution for
    5.10 -   licensing conditions
    5.11 -*/
    5.12 -#include "SDL_config.h"
    5.13 -
    5.14 -#ifndef __HERMES_HEAD_MMX__
    5.15 -#define __HERMES_HEAD_MMX__
    5.16 -
    5.17 -
    5.18 -/* If you cannot stand ifdefs, then please do not look into this file, it's
    5.19 -   going to end your life :) */
    5.20 -
    5.21 -#ifdef X86_ASSEMBLER
    5.22 -
    5.23 -
    5.24 -#ifdef __cplusplus
    5.25 -extern "C"
    5.26 -{
    5.27 -#endif
    5.28 -
    5.29 -    void STACKCALL ConvertMMX(HermesConverterInterface *);
    5.30 -
    5.31 -    void STACKCALL ClearMMX_32(HermesClearInterface *);
    5.32 -    void STACKCALL ClearMMX_24(HermesClearInterface *);
    5.33 -    void STACKCALL ClearMMX_16(HermesClearInterface *);
    5.34 -    void STACKCALL ClearMMX_8(HermesClearInterface *);
    5.35 -
    5.36 -    void ConvertMMXpII32_24RGB888();
    5.37 -    void ConvertMMXpII32_16RGB565();
    5.38 -    void ConvertMMXpII32_16BGR565();
    5.39 -    void ConvertMMXpII32_16RGB555();
    5.40 -    void ConvertMMXpII32_16BGR565();
    5.41 -    void ConvertMMXpII32_16BGR555();
    5.42 -
    5.43 -    void ConvertMMXp32_16RGB555();
    5.44 -
    5.45 -#ifdef __cplusplus
    5.46 -}
    5.47 -#endif
    5.48 -
    5.49 -
    5.50 -
    5.51 -/* Fix the underscore business with ELF compilers */
    5.52 -
    5.53 -#if defined(__ELF__) && defined(__GNUC__)
    5.54 -#ifdef __cplusplus
    5.55 -extern "C"
    5.56 -{
    5.57 -#endif
    5.58 -
    5.59 -    extern void _ConvertMMX(HermesConverterInterface *);
    5.60 -    extern void _ConvertMMXpII32_24RGB888();
    5.61 -    extern void _ConvertMMXpII32_16RGB565();
    5.62 -    extern void _ConvertMMXpII32_16BGR565();
    5.63 -    extern void _ConvertMMXpII32_16RGB555();
    5.64 -    extern void _ConvertMMXpII32_16BGR555();
    5.65 -
    5.66 -#define ConvertMMX _ConvertMMX
    5.67 -#define ConvertMMXpII32_24RGB888 _ConvertMMXpII32_24RGB888
    5.68 -#define ConvertMMXpII32_16RGB565 _ConvertMMXpII32_16RGB565
    5.69 -#define ConvertMMXpII32_16BGR565 _ConvertMMXpII32_16BGR565
    5.70 -#define ConvertMMXpII32_16RGB555 _ConvertMMXpII32_16RGB555
    5.71 -#define ConvertMMXpII32_16BGR555 _ConvertMMXpII32_16BGR555
    5.72 -
    5.73 -#ifdef __cplusplus
    5.74 -}
    5.75 -#endif
    5.76 -
    5.77 -#endif                          /* ELF and GNUC */
    5.78 -
    5.79 -
    5.80 -
    5.81 -
    5.82 -/* Make it work with Watcom */
    5.83 -#ifdef __WATCOMC__
    5.84 -#pragma warning 601 9
    5.85 -
    5.86 -#pragma aux ConvertMMX "_*" modify [EAX EBX ECX EDX ESI EDI]
    5.87 -
    5.88 -#pragma aux ClearMMX_32 "_*" modify [EAX EBX ECX EDX ESI EDI]
    5.89 -#pragma aux ClearMMX_24 "_*" modify [EAX EBX ECX EDX ESI EDI]
    5.90 -#pragma aux ClearMMX_16 "_*" modify [EAX EBX ECX EDX ESI EDI]
    5.91 -#pragma aux ClearMMX_8 "_*" modify [EAX EBX ECX EDX ESI EDI]
    5.92 -
    5.93 -#pragma aux ConvertMMXpII32_24RGB888 "_*"
    5.94 -#pragma aux ConvertMMXpII32_16RGB565 "_*"
    5.95 -#pragma aux ConvertMMXpII32_16BGR565 "_*"
    5.96 -#pragma aux ConvertMMXpII32_16RGB555 "_*"
    5.97 -#pragma aux ConvertMMXpII32_16BGR555 "_*"
    5.98 -#pragma aux ConvertMMXp32_16RGB555 "_*"
    5.99 -
   5.100 -#endif                          /* WATCOM */
   5.101 -
   5.102 -#endif                          /* X86_ASSEMBLER */
   5.103 -
   5.104 -
   5.105 -#endif
   5.106 -/* vi: set ts=4 sw=4 expandtab: */
     6.1 --- a/src/hermes/HeadX86.h	Wed Aug 15 04:04:17 2007 +0000
     6.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     6.3 @@ -1,190 +0,0 @@
     6.4 -/*
     6.5 -   Header definitions for the x86 routines for the HERMES library
     6.6 -   Copyright (c) 1998 Christian Nentwich (brn@eleet.mcb.at)
     6.7 -   This source code is licensed under the GNU LGPL
     6.8 -  
     6.9 -   Please refer to the file COPYING.LIB contained in the distribution for
    6.10 -   licensing conditions
    6.11 -*/
    6.12 -
    6.13 -#ifndef __HERMES_HEAD_X86__
    6.14 -#define __HERMES_HEAD_X86__
    6.15 -
    6.16 -
    6.17 -#ifdef X86_ASSEMBLER
    6.18 -
    6.19 -/* If you can't stand IFDEFS, then close your eyes now, please :) */
    6.20 -
    6.21 -/* Ok, we start with normal function definitions */
    6.22 -#ifdef __cplusplus
    6.23 -extern "C"
    6.24 -{
    6.25 -#endif
    6.26 -
    6.27 -
    6.28 -    void STACKCALL ConvertX86(HermesConverterInterface *);
    6.29 -    void STACKCALL ClearX86_32(HermesClearInterface *);
    6.30 -    void STACKCALL ClearX86_24(HermesClearInterface *);
    6.31 -    void STACKCALL ClearX86_16(HermesClearInterface *);
    6.32 -    void STACKCALL ClearX86_8(HermesClearInterface *);
    6.33 -
    6.34 -    int STACKCALL Hermes_X86_CPU();
    6.35 -
    6.36 -    void ConvertX86p32_32BGR888();
    6.37 -    void ConvertX86p32_32RGBA888();
    6.38 -    void ConvertX86p32_32BGRA888();
    6.39 -    void ConvertX86p32_24RGB888();
    6.40 -    void ConvertX86p32_24BGR888();
    6.41 -    void ConvertX86p32_16RGB565();
    6.42 -    void ConvertX86p32_16BGR565();
    6.43 -    void ConvertX86p32_16RGB555();
    6.44 -    void ConvertX86p32_16BGR555();
    6.45 -    void ConvertX86p32_8RGB332();
    6.46 -
    6.47 -    void ConvertX86p16_32RGB888();
    6.48 -    void ConvertX86p16_32BGR888();
    6.49 -    void ConvertX86p16_32RGBA888();
    6.50 -    void ConvertX86p16_32BGRA888();
    6.51 -    void ConvertX86p16_24RGB888();
    6.52 -    void ConvertX86p16_24BGR888();
    6.53 -    void ConvertX86p16_16BGR565();
    6.54 -    void ConvertX86p16_16RGB555();
    6.55 -    void ConvertX86p16_16BGR555();
    6.56 -    void ConvertX86p16_8RGB332();
    6.57 -
    6.58 -    void CopyX86p_4byte();
    6.59 -    void CopyX86p_3byte();
    6.60 -    void CopyX86p_2byte();
    6.61 -    void CopyX86p_1byte();
    6.62 -
    6.63 -    void ConvertX86pI8_32();
    6.64 -    void ConvertX86pI8_24();
    6.65 -    void ConvertX86pI8_16();
    6.66 -
    6.67 -    extern int ConvertX86p16_32RGB888_LUT_X86[512];
    6.68 -    extern int ConvertX86p16_32BGR888_LUT_X86[512];
    6.69 -    extern int ConvertX86p16_32RGBA888_LUT_X86[512];
    6.70 -    extern int ConvertX86p16_32BGRA888_LUT_X86[512];
    6.71 -
    6.72 -#ifdef __cplusplus
    6.73 -}
    6.74 -#endif
    6.75 -
    6.76 -
    6.77 -
    6.78 -
    6.79 -/* Now fix up the ELF underscore problem */
    6.80 -
    6.81 -#if defined(__ELF__) && defined(__GNUC__)
    6.82 -#ifdef __cplusplus
    6.83 -extern "C"
    6.84 -{
    6.85 -#endif
    6.86 -
    6.87 -    extern int _Hermes_X86_CPU();
    6.88 -
    6.89 -    extern void _ConvertX86(HermesConverterInterface *);
    6.90 -
    6.91 -    extern void _ConvertX86p32_32BGR888();
    6.92 -    extern void _ConvertX86p32_32RGBA888();
    6.93 -    extern void _ConvertX86p32_32BGRA888();
    6.94 -    extern void _ConvertX86p32_24RGB888();
    6.95 -    extern void _ConvertX86p32_24BGR888();
    6.96 -    extern void _ConvertX86p32_16RGB565();
    6.97 -    extern void _ConvertX86p32_16BGR565();
    6.98 -    extern void _ConvertX86p32_16RGB555();
    6.99 -    extern void _ConvertX86p32_16BGR555();
   6.100 -    extern void _ConvertX86p32_8RGB332();
   6.101 -
   6.102 -    extern void _ConvertX86p16_16BGR565();
   6.103 -    extern void _ConvertX86p16_16RGB555();
   6.104 -    extern void _ConvertX86p16_16BGR555();
   6.105 -    extern void _ConvertX86p16_8RGB332();
   6.106 -
   6.107 -
   6.108 -#define Hermes_X86_CPU _Hermes_X86_CPU
   6.109 -
   6.110 -#define ConvertX86 _ConvertX86
   6.111 -
   6.112 -#define ConvertX86p32_32BGR888 _ConvertX86p32_32BGR888
   6.113 -#define ConvertX86p32_32RGBA888 _ConvertX86p32_32RGBA888
   6.114 -#define ConvertX86p32_32BGRA888 _ConvertX86p32_32BGRA888
   6.115 -#define ConvertX86p32_24RGB888 _ConvertX86p32_24RGB888
   6.116 -#define ConvertX86p32_24BGR888 _ConvertX86p32_24BGR888
   6.117 -#define ConvertX86p32_16RGB565 _ConvertX86p32_16RGB565
   6.118 -#define ConvertX86p32_16BGR565 _ConvertX86p32_16BGR565
   6.119 -#define ConvertX86p32_16RGB555 _ConvertX86p32_16RGB555
   6.120 -#define ConvertX86p32_16BGR555 _ConvertX86p32_16BGR555
   6.121 -#define ConvertX86p32_8RGB332 _ConvertX86p32_8RGB332
   6.122 -
   6.123 -#define ConvertX86p16_16BGR565 _ConvertX86p16_16BGR565
   6.124 -#define ConvertX86p16_16RGB555 _ConvertX86p16_16RGB555
   6.125 -#define ConvertX86p16_16BGR555 _ConvertX86p16_16BGR555
   6.126 -#define ConvertX86p16_8RGB332 _ConvertX86p16_8RGB332
   6.127 -
   6.128 -
   6.129 -#ifdef __cplusplus
   6.130 -}
   6.131 -#endif
   6.132 -
   6.133 -#endif                          /* ELF & GNU */
   6.134 -
   6.135 -
   6.136 -
   6.137 -/* Make it run with WATCOM C */
   6.138 -#ifdef __WATCOMC__
   6.139 -#pragma warning 601 9
   6.140 -
   6.141 -#pragma aux Hermes_X86_CPU "_*"
   6.142 -
   6.143 -#pragma aux ConvertX86 "_*" modify [EAX EBX ECX EDX ESI EDI]
   6.144 -#pragma aux ClearX86_32 "_*" modify [EAX EBX ECX EDX ESI EDI]
   6.145 -#pragma aux ClearX86_24 "_*" modify [EAX EBX ECX EDX ESI EDI]
   6.146 -#pragma aux ClearX86_16 "_*" modify [EAX EBX ECX EDX ESI EDI]
   6.147 -#pragma aux ClearX86_8 "_*" modify [EAX EBX ECX EDX ESI EDI]
   6.148 -
   6.149 -#pragma aux ConvertX86p32_32BGR888 "_*"
   6.150 -#pragma aux ConvertX86p32_32RGBA888 "_*"
   6.151 -#pragma aux ConvertX86p32_32BGRA888 "_*"
   6.152 -#pragma aux ConvertX86p32_24RGB888 "_*"
   6.153 -#pragma aux ConvertX86p32_24BGR888 "_*"
   6.154 -#pragma aux ConvertX86p32_16RGB565 "_*"
   6.155 -#pragma aux ConvertX86p32_16BGR565 "_*"
   6.156 -#pragma aux ConvertX86p32_16RGB555 "_*"
   6.157 -#pragma aux ConvertX86p32_16BGR555 "_*"
   6.158 -#pragma aux ConvertX86p32_8RGB332 "_*"
   6.159 -
   6.160 -#pragma aux ConvertX86p16_32RGB888 "_*"
   6.161 -#pragma aux ConvertX86p16_32BGR888 "_*"
   6.162 -#pragma aux ConvertX86p16_32RGBA888 "_*"
   6.163 -#pragma aux ConvertX86p16_32BGRA888 "_*"
   6.164 -#pragma aux ConvertX86p16_24RGB888 "_*"
   6.165 -#pragma aux ConvertX86p16_24BGR888 "_*"
   6.166 -#pragma aux ConvertX86p16_16BGR565 "_*"
   6.167 -#pragma aux ConvertX86p16_16RGB555 "_*"
   6.168 -#pragma aux ConvertX86p16_16BGR555 "_*"
   6.169 -#pragma aux ConvertX86p16_8RGB332 "_*"
   6.170 -
   6.171 -#pragma aux CopyX86p_4byte "_*"
   6.172 -#pragma aux CopyX86p_3byte "_*"
   6.173 -#pragma aux CopyX86p_2byte "_*"
   6.174 -#pragma aux CopyX86p_1byte "_*"
   6.175 -
   6.176 -#pragma aux ConvertX86pI8_32 "_*"
   6.177 -#pragma aux ConvertX86pI8_24 "_*"
   6.178 -#pragma aux ConvertX86pI8_16 "_*"
   6.179 -
   6.180 -#pragma aux ConvertX86p16_32RGB888_LUT_X86 "_*"
   6.181 -#pragma aux ConvertX86p16_32BGR888_LUT_X86 "_*"
   6.182 -#pragma aux ConvertX86p16_32RGBA888_LUT_X86 "_*"
   6.183 -#pragma aux ConvertX86p16_32BGRA888_LUT_X86 "_*"
   6.184 -
   6.185 -#endif                          /* __WATCOMC__ */
   6.186 -
   6.187 -
   6.188 -#endif                          /* X86_ASSEMBLER */
   6.189 -
   6.190 -
   6.191 -#endif
   6.192 -
   6.193 -/* vi: set ts=4 sw=4 expandtab: */
     7.1 --- a/src/hermes/README	Wed Aug 15 04:04:17 2007 +0000
     7.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     7.3 @@ -1,13 +0,0 @@
     7.4 -HERMES 1.2.4 (c)1998 Christian Nentwich (brn) (c.nentwich@cs.ucl.ac.uk)
     7.5 -and quite a few assembler routines (c) Glenn Fielder (gaffer@gaffer.org)
     7.6 -
     7.7 -This library and all the files enclosed in this package are free software
     7.8 -under the terms of the GNU Library General Public License (LGPL). Please
     7.9 -refer to the included file COPYING.LIB for the exact terms.
    7.10 -----------------------------------------------------------------------------
    7.11 -
    7.12 -This is a stripped down version of HERMES, including only the x86 assembler
    7.13 -converters, for use with Simple DirectMedia Layer.
    7.14 -
    7.15 -The full HERMES library is available at:  http://hermes.terminal.at/
    7.16 -
     8.1 --- a/src/hermes/common.inc	Wed Aug 15 04:04:17 2007 +0000
     8.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     8.3 @@ -1,9 +0,0 @@
     8.4 -; Some common macros for hermes nasm code
     8.5 -
     8.6 -%macro SDL_FUNC 1
     8.7 -%ifdef HIDDEN_VISIBILITY
     8.8 -GLOBAL %1:function hidden
     8.9 -%else
    8.10 -GLOBAL %1
    8.11 -%endif
    8.12 -%endmacro
     9.1 --- a/src/hermes/mmx_main.asm	Wed Aug 15 04:04:17 2007 +0000
     9.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     9.3 @@ -1,74 +0,0 @@
     9.4 -;
     9.5 -; mmx format converter main loops for HERMES
     9.6 -; Some routines Copyright (c) 1998 Christian Nentwich (c.nentwich@cs.ucl.ac.uk)
     9.7 -; This source code is licensed under the GNU LGPL
     9.8 -; 
     9.9 -; Please refer to the file COPYING.LIB contained in the distribution for
    9.10 -; licensing conditions		
    9.11 -;
    9.12 -
    9.13 -BITS 32
    9.14 -
    9.15 -%include "common.inc"
    9.16 -
    9.17 -SDL_FUNC _ConvertMMX
    9.18 -
    9.19 -SECTION .text
    9.20 -		
    9.21 -;; _ConvertMMX:	 
    9.22 -;; [ESP+8] ConverterInfo*
    9.23 -;; --------------------------------------------------------------------------
    9.24 -;; ConverterInfo (ebp+..)
    9.25 -;;   0:	void *s_pixels
    9.26 -;;   4:	int s_width
    9.27 -;;   8:	int s_height
    9.28 -;;  12:	int s_add
    9.29 -;;  16:	void *d_pixels
    9.30 -;;  20:	int d_width
    9.31 -;;  24:	int d_height
    9.32 -;;  28:	int d_add
    9.33 -;;  32:	void (*converter_function)() 
    9.34 -;;  36: int32 *lookup
    9.35 -	
    9.36 -_ConvertMMX:
    9.37 -	push ebp
    9.38 -	mov ebp,esp
    9.39 -
    9.40 -; Save the registers used by the blitters, necessary for optimized code
    9.41 -	pusha
    9.42 -
    9.43 -	mov eax,[ebp+8]
    9.44 -
    9.45 -        cmp dword [eax+4],BYTE 0
    9.46 -	je endconvert
    9.47 -	
    9.48 -	mov ebp,eax
    9.49 -	
    9.50 -	mov esi,[ebp+0]
    9.51 -	mov edi,[ebp+16]
    9.52 -	
    9.53 -y_loop:	
    9.54 -	mov ecx,[ebp+4]
    9.55 -
    9.56 -	call [ebp+32]
    9.57 -
    9.58 -	add esi,[ebp+12]
    9.59 -	add edi,[ebp+28]
    9.60 -	
    9.61 -	dec dword  [ebp+8]
    9.62 -	jnz y_loop
    9.63 -
    9.64 -	
    9.65 -; Restore the registers used by the blitters, necessary for optimized code
    9.66 -	popa
    9.67 -
    9.68 -	pop ebp
    9.69 -
    9.70 -endconvert:
    9.71 -	emms
    9.72 -	
    9.73 -	ret		
    9.74 -
    9.75 -%ifidn __OUTPUT_FORMAT__,elf
    9.76 -section .note.GNU-stack noalloc noexec nowrite progbits
    9.77 -%endif
    10.1 --- a/src/hermes/mmxp2_32.asm	Wed Aug 15 04:04:17 2007 +0000
    10.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    10.3 @@ -1,405 +0,0 @@
    10.4 -;
    10.5 -; pII-optimised MMX format converters for HERMES
    10.6 -; Copyright (c) 1998 Christian Nentwich (c.nentwich@cs.ucl.ac.uk)
    10.7 -;   and (c) 1999 Jonathan Matthew (jmatthew@uq.net.au)
    10.8 -; This source code is licensed under the GNU LGPL
    10.9 -; 
   10.10 -; Please refer to the file COPYING.LIB contained in the distribution for
   10.11 -; licensing conditions		
   10.12 -;
   10.13 -; COPYRIGHT NOTICE
   10.14 -; 
   10.15 -; This file partly contains code that is (c) Intel Corporation, specifically
   10.16 -; the mode detection routine, and the converter to 15 bit (8 pixel
   10.17 -; conversion routine from the mmx programming tutorial pages).
   10.18 -;
   10.19 -;
   10.20 -; These routines aren't exactly pII optimised - it's just that as they
   10.21 -; are, they're terrible on p5 MMXs, but less so on pIIs.  Someone needs to
   10.22 -; optimise them for p5 MMXs..
   10.23 -
   10.24 -BITS 32
   10.25 -
   10.26 -%include "common.inc"
   10.27 -	
   10.28 -SDL_FUNC _ConvertMMXpII32_24RGB888
   10.29 -SDL_FUNC _ConvertMMXpII32_16RGB565
   10.30 -SDL_FUNC _ConvertMMXpII32_16BGR565
   10.31 -SDL_FUNC _ConvertMMXpII32_16RGB555
   10.32 -SDL_FUNC _ConvertMMXpII32_16BGR555
   10.33 -
   10.34 -;; Macros for conversion routines
   10.35 -
   10.36 -%macro _push_immq_mask 1
   10.37 -	push dword %1
   10.38 -	push dword %1
   10.39 -%endmacro
   10.40 -
   10.41 -%macro load_immq 2
   10.42 -	_push_immq_mask %2
   10.43 -	movq %1, [esp]
   10.44 -%endmacro
   10.45 -
   10.46 -%macro pand_immq 2
   10.47 -	_push_immq_mask %2
   10.48 -	pand %1, [esp]
   10.49 -%endmacro
   10.50 -
   10.51 -%define CLEANUP_IMMQ_LOADS(num) \
   10.52 -	add esp, byte 8 * num
   10.53 -
   10.54 -%define mmx32_rgb888_mask 00ffffffh
   10.55 -%define mmx32_rgb565_b 000000f8h
   10.56 -%define mmx32_rgb565_g 0000fc00h
   10.57 -%define mmx32_rgb565_r 00f80000h
   10.58 -
   10.59 -%define mmx32_rgb555_rb 00f800f8h
   10.60 -%define mmx32_rgb555_g 0000f800h
   10.61 -%define mmx32_rgb555_mul 20000008h
   10.62 -%define mmx32_bgr555_mul 00082000h
   10.63 -
   10.64 -SECTION .text
   10.65 -
   10.66 -_ConvertMMXpII32_24RGB888:
   10.67 -
   10.68 -        ; set up mm6 as the mask, mm7 as zero
   10.69 -        load_immq mm6, mmx32_rgb888_mask
   10.70 -        CLEANUP_IMMQ_LOADS(1)
   10.71 -        pxor mm7, mm7
   10.72 -
   10.73 -        mov edx, ecx                    ; save ecx
   10.74 -        and ecx, 0fffffffch             ; clear lower two bits
   10.75 -        jnz .L1
   10.76 -        jmp .L2
   10.77 -
   10.78 -.L1:
   10.79 -
   10.80 -        movq mm0, [esi]                 ; A R G B a r g b
   10.81 -        pand mm0, mm6                   ; 0 R G B 0 r g b
   10.82 -        movq mm1, [esi+8]               ; A R G B a r g b
   10.83 -        pand mm1, mm6                   ; 0 R G B 0 r g b
   10.84 -
   10.85 -        movq mm2, mm0                   ; 0 R G B 0 r g b
   10.86 -        punpckhdq mm2, mm7              ; 0 0 0 0 0 R G B
   10.87 -        punpckldq mm0, mm7              ; 0 0 0 0 0 r g b
   10.88 -        psllq mm2, 24                   ; 0 0 R G B 0 0 0
   10.89 -        por mm0, mm2                    ; 0 0 R G B r g b
   10.90 -
   10.91 -        movq mm3, mm1                   ; 0 R G B 0 r g b
   10.92 -        psllq mm3, 48                   ; g b 0 0 0 0 0 0
   10.93 -        por mm0, mm3                    ; g b R G B r g b
   10.94 -
   10.95 -        movq mm4, mm1                   ; 0 R G B 0 r g b
   10.96 -        punpckhdq mm4, mm7              ; 0 0 0 0 0 R G B
   10.97 -        punpckldq mm1, mm7              ; 0 0 0 0 0 r g b
   10.98 -        psrlq mm1, 16                   ; 0 0 0 R G B 0 r
   10.99 -        psllq mm4, 8                    ; 0 0 0 0 R G B 0
  10.100 -        por mm1, mm4                    ; 0 0 0 0 R G B r
  10.101 -
  10.102 -        movq [edi], mm0
  10.103 -        add esi, BYTE 16
  10.104 -        movd [edi+8], mm1
  10.105 -        add edi, BYTE 12
  10.106 -        sub ecx, BYTE 4
  10.107 -        jnz .L1
  10.108 -
  10.109 -.L2:
  10.110 -        mov ecx, edx
  10.111 -        and ecx, BYTE 3
  10.112 -        jz .L4
  10.113 -.L3:
  10.114 -        mov al, [esi]
  10.115 -        mov bl, [esi+1]
  10.116 -        mov dl, [esi+2]
  10.117 -        mov [edi], al
  10.118 -        mov [edi+1], bl
  10.119 -        mov [edi+2], dl
  10.120 -        add esi, BYTE 4
  10.121 -        add edi, BYTE 3
  10.122 -        dec ecx
  10.123 -        jnz .L3
  10.124 -.L4:
  10.125 -        return
  10.126 -
  10.127 -
  10.128 -
  10.129 -_ConvertMMXpII32_16RGB565:
  10.130 -
  10.131 -        ; set up masks
  10.132 -        load_immq mm5, mmx32_rgb565_b
  10.133 -        load_immq mm6, mmx32_rgb565_g
  10.134 -        load_immq mm7, mmx32_rgb565_r
  10.135 -        CLEANUP_IMMQ_LOADS(3)
  10.136 -
  10.137 -        mov edx, ecx
  10.138 -        shr ecx, 2
  10.139 -        jnz .L1
  10.140 -        jmp .L2         ; not necessary at the moment, but doesn't hurt (much)
  10.141 -
  10.142 -.L1:
  10.143 -        movq mm0, [esi]         ; argb
  10.144 -        movq mm1, mm0           ; argb
  10.145 -        pand mm0, mm6           ; 00g0
  10.146 -        movq mm3, mm1           ; argb
  10.147 -        pand mm1, mm5           ; 000b
  10.148 -        pand mm3, mm7           ; 0r00
  10.149 -        pslld mm1, 2            ; 0 0 000000bb bbb00000
  10.150 -        por mm0, mm1            ; 0 0 ggggggbb bbb00000
  10.151 -        psrld mm0, 5            ; 0 0 00000ggg gggbbbbb
  10.152 -
  10.153 -        movq mm4, [esi+8]       ; argb
  10.154 -        movq mm2, mm4           ; argb
  10.155 -        pand mm4, mm6           ; 00g0
  10.156 -        movq mm1, mm2           ; argb
  10.157 -        pand mm2, mm5           ; 000b
  10.158 -        pand mm1, mm7           ; 0r00
  10.159 -        pslld mm2, 2            ; 0 0 000000bb bbb00000
  10.160 -        por mm4, mm2            ; 0 0 ggggggbb bbb00000
  10.161 -        psrld mm4, 5            ; 0 0 00000ggg gggbbbbb
  10.162 -
  10.163 -        packuswb mm3, mm1       ; R 0 r 0
  10.164 -        packssdw mm0, mm4       ; as above.. ish
  10.165 -        por mm0, mm3            ; done.
  10.166 -        movq [edi], mm0
  10.167 -
  10.168 -        add esi, 16
  10.169 -        add edi, 8
  10.170 -        dec ecx
  10.171 -        jnz .L1
  10.172 -
  10.173 -.L2:
  10.174 -        mov ecx, edx
  10.175 -        and ecx, BYTE 3
  10.176 -        jz .L4
  10.177 -.L3:
  10.178 -        mov al, [esi]
  10.179 -        mov bh, [esi+1]
  10.180 -        mov ah, [esi+2]
  10.181 -        shr al, 3
  10.182 -        and eax, 0F81Fh            ; BYTE?
  10.183 -        shr ebx, 5
  10.184 -        and ebx, 07E0h             ; BYTE?
  10.185 -        add eax, ebx
  10.186 -        mov [edi], al
  10.187 -        mov [edi+1], ah
  10.188 -        add esi, BYTE 4
  10.189 -        add edi, BYTE 2
  10.190 -        dec ecx
  10.191 -        jnz .L3
  10.192 -
  10.193 -.L4:
  10.194 -	retn
  10.195 -
  10.196 -	
  10.197 -_ConvertMMXpII32_16BGR565:
  10.198 -
  10.199 -        load_immq mm5, mmx32_rgb565_r
  10.200 -        load_immq mm6, mmx32_rgb565_g
  10.201 -        load_immq mm7, mmx32_rgb565_b
  10.202 -        CLEANUP_IMMQ_LOADS(3)
  10.203 -
  10.204 -        mov edx, ecx
  10.205 -        shr ecx, 2
  10.206 -        jnz .L1
  10.207 -        jmp .L2
  10.208 -
  10.209 -.L1:
  10.210 -        movq mm0, [esi]                 ; a r g b
  10.211 -        movq mm1, mm0                   ; a r g b
  10.212 -        pand mm0, mm6                   ; 0 0 g 0
  10.213 -        movq mm3, mm1                   ; a r g b
  10.214 -        pand mm1, mm5                   ; 0 r 0 0
  10.215 -        pand mm3, mm7                   ; 0 0 0 b
  10.216 -
  10.217 -        psllq mm3, 16                   ; 0 b 0 0
  10.218 -        psrld mm1, 14                   ; 0 0 000000rr rrr00000
  10.219 -        por mm0, mm1                    ; 0 0 ggggggrr rrr00000
  10.220 -        psrld mm0, 5                    ; 0 0 00000ggg gggrrrrr
  10.221 -
  10.222 -        movq mm4, [esi+8]               ; a r g b
  10.223 -        movq mm2, mm4                   ; a r g b
  10.224 -        pand mm4, mm6                   ; 0 0 g 0
  10.225 -        movq mm1, mm2                   ; a r g b
  10.226 -        pand mm2, mm5                   ; 0 r 0 0
  10.227 -        pand mm1, mm7                   ; 0 0 0 b
  10.228 -
  10.229 -        psllq mm1, 16                   ; 0 b 0 0
  10.230 -        psrld mm2, 14                   ; 0 0 000000rr rrr00000
  10.231 -        por mm4, mm2                    ; 0 0 ggggggrr rrr00000
  10.232 -        psrld mm4, 5                    ; 0 0 00000ggg gggrrrrr
  10.233 -
  10.234 -        packuswb mm3, mm1               ; BBBBB000 00000000 bbbbb000 00000000
  10.235 -        packssdw mm0, mm4               ; 00000GGG GGGRRRRR 00000GGG GGGRRRRR
  10.236 -        por mm0, mm3                    ; BBBBBGGG GGGRRRRR bbbbbggg gggrrrrr
  10.237 -        movq [edi], mm0
  10.238 -
  10.239 -        add esi, BYTE 16
  10.240 -        add edi, BYTE 8
  10.241 -        dec ecx
  10.242 -        jnz .L1
  10.243 -
  10.244 -.L2:
  10.245 -        and edx, BYTE 3
  10.246 -        jz .L4
  10.247 -.L3:
  10.248 -        mov al, [esi+2]
  10.249 -        mov bh, [esi+1]
  10.250 -        mov ah, [esi]
  10.251 -        shr al, 3
  10.252 -        and eax, 0F81Fh                    ; BYTE ?
  10.253 -        shr ebx, 5
  10.254 -        and ebx, 07E0h                     ; BYTE ?
  10.255 -        add eax, ebx
  10.256 -        mov [edi], al
  10.257 -        mov [edi+1], ah
  10.258 -        add esi, BYTE 4
  10.259 -        add edi, BYTE 2
  10.260 -        dec edx
  10.261 -        jnz .L3
  10.262 -
  10.263 -.L4:
  10.264 -        retn
  10.265 -
  10.266 -_ConvertMMXpII32_16BGR555:
  10.267 -
  10.268 -        ; the 16BGR555 converter is identical to the RGB555 one,
  10.269 -        ; except it uses a different multiplier for the pmaddwd
  10.270 -        ; instruction.  cool huh.
  10.271 -
  10.272 -        load_immq mm7, mmx32_bgr555_mul
  10.273 -        jmp _convert_bgr555_cheat
  10.274 -
  10.275 -; This is the same as the Intel version.. they obviously went to
  10.276 -; much more trouble to expand/coil the loop than I did, so theirs
  10.277 -; would almost certainly be faster, even if only a little.
  10.278 -; I did rename 'mmx32_rgb555_add' to 'mmx32_rgb555_mul', which is
  10.279 -; (I think) a more accurate name..
  10.280 -_ConvertMMXpII32_16RGB555:
  10.281 -
  10.282 -	load_immq mm7, mmx32_rgb555_mul
  10.283 -_convert_bgr555_cheat:
  10.284 -	load_immq mm6, mmx32_rgb555_g
  10.285 -	CLEANUP_IMMQ_LOADS(2)
  10.286 -        
  10.287 -	mov edx,ecx		           ; Save ecx 
  10.288 -
  10.289 -        and ecx,DWORD 0fffffff8h            ; clear lower three bits
  10.290 -	jnz .L_OK
  10.291 -        jmp near .L2 
  10.292 -
  10.293 -.L_OK:
  10.294 -	
  10.295 -	movq mm2,[esi+8]
  10.296 -
  10.297 -	movq mm0,[esi]
  10.298 -	movq mm3,mm2
  10.299 -
  10.300 -	pand_immq mm3, mmx32_rgb555_rb
  10.301 -	movq mm1,mm0
  10.302 -
  10.303 -	pand_immq mm1, mmx32_rgb555_rb
  10.304 -	pmaddwd mm3,mm7
  10.305 -
  10.306 -	CLEANUP_IMMQ_LOADS(2)
  10.307 -
  10.308 -	pmaddwd mm1,mm7
  10.309 -	pand mm2,mm6
  10.310 -
  10.311 -.L1:
  10.312 -	movq mm4,[esi+24]
  10.313 -	pand mm0,mm6
  10.314 -
  10.315 -	movq mm5,[esi+16]
  10.316 -	por mm3,mm2
  10.317 -
  10.318 -	psrld mm3,6
  10.319 -	por mm1,mm0
  10.320 -
  10.321 -	movq mm0,mm4
  10.322 -	psrld mm1,6
  10.323 -
  10.324 -	pand_immq mm0, mmx32_rgb555_rb
  10.325 -	packssdw mm1,mm3
  10.326 -
  10.327 -	movq mm3,mm5
  10.328 -	pmaddwd mm0,mm7
  10.329 -
  10.330 -	pand_immq mm3, mmx32_rgb555_rb
  10.331 -	pand mm4,mm6
  10.332 -
  10.333 -	movq [edi],mm1			
  10.334 -	pmaddwd mm3,mm7
  10.335 -
  10.336 -        add esi,BYTE 32
  10.337 -	por mm4,mm0
  10.338 -
  10.339 -	pand mm5,mm6
  10.340 -	psrld mm4,6
  10.341 -
  10.342 -	movq mm2,[esi+8]
  10.343 -	por mm5,mm3
  10.344 -
  10.345 -	movq mm0,[esi]
  10.346 -	psrld mm5,6
  10.347 -
  10.348 -	movq mm3,mm2
  10.349 -	movq mm1,mm0
  10.350 -
  10.351 -	pand_immq mm3, mmx32_rgb555_rb
  10.352 -	packssdw mm5,mm4
  10.353 -
  10.354 -	pand_immq mm1, mmx32_rgb555_rb
  10.355 -	pand mm2,mm6
  10.356 -
  10.357 -	CLEANUP_IMMQ_LOADS(4)
  10.358 -
  10.359 -	movq [edi+8],mm5
  10.360 -	pmaddwd mm3,mm7
  10.361 -
  10.362 -	pmaddwd mm1,mm7
  10.363 -        add edi,BYTE 16
  10.364 -	
  10.365 -        sub ecx,BYTE 8
  10.366 -	jz .L2
  10.367 -        jmp .L1
  10.368 -
  10.369 -
  10.370 -.L2:	
  10.371 -	mov ecx,edx
  10.372 -	
  10.373 -        and ecx,BYTE 7
  10.374 -	jz .L4
  10.375 -	
  10.376 -.L3:	
  10.377 -	mov ebx,[esi]
  10.378 -        add esi,BYTE 4
  10.379 -	
  10.380 -        mov eax,ebx
  10.381 -        mov edx,ebx
  10.382 -
  10.383 -        shr eax,3
  10.384 -        shr edx,6
  10.385 -
  10.386 -        and eax,BYTE 0000000000011111b
  10.387 -        and edx,     0000001111100000b
  10.388 -
  10.389 -        shr ebx,9
  10.390 -
  10.391 -        or eax,edx
  10.392 -
  10.393 -        and ebx,     0111110000000000b
  10.394 -
  10.395 -        or eax,ebx
  10.396 -
  10.397 -        mov [edi],ax
  10.398 -        add edi,BYTE 2
  10.399 -
  10.400 -	dec ecx
  10.401 -	jnz .L3	
  10.402 -
  10.403 -.L4:		
  10.404 -	retn
  10.405 -
  10.406 -%ifidn __OUTPUT_FORMAT__,elf
  10.407 -section .note.GNU-stack noalloc noexec nowrite progbits
  10.408 -%endif
    11.1 --- a/src/hermes/x86_main.asm	Wed Aug 15 04:04:17 2007 +0000
    11.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    11.3 @@ -1,75 +0,0 @@
    11.4 -;
    11.5 -; x86 format converters for HERMES
    11.6 -; Some routines Copyright (c) 1998 Christian Nentwich (brn@eleet.mcb.at)
    11.7 -; This source code is licensed under the GNU LGPL
    11.8 -; 
    11.9 -; Please refer to the file COPYING.LIB contained in the distribution for
   11.10 -; licensing conditions		
   11.11 -;
   11.12 -; Most routines are (c) Glenn Fiedler (ptc@gaffer.org), used with permission
   11.13 -; 
   11.14 -
   11.15 -BITS 32
   11.16 -
   11.17 -%include "common.inc"
   11.18 -
   11.19 -SDL_FUNC _ConvertX86
   11.20 -
   11.21 -SECTION .text
   11.22 -		
   11.23 -;; _ConvertX86:	 
   11.24 -;; [ESP+8] ConverterInfo*
   11.25 -;; --------------------------------------------------------------------------
   11.26 -;; ConverterInfo (ebp+..)
   11.27 -;;   0:	void *s_pixels
   11.28 -;;   4:	int s_width
   11.29 -;;   8:	int s_height
   11.30 -;;  12:	int s_add
   11.31 -;;  16:	void *d_pixels
   11.32 -;;  20:	int d_width
   11.33 -;;  24:	int d_height
   11.34 -;;  28:	int d_add
   11.35 -;;  32:	void (*converter_function)() 
   11.36 -;;  36: int32 *lookup
   11.37 -	
   11.38 -_ConvertX86:
   11.39 -	push ebp
   11.40 -	mov ebp,esp
   11.41 -
   11.42 -; Save the registers used by the blitters, necessary for optimized code
   11.43 -	pusha
   11.44 -
   11.45 -	mov eax,[ebp+8]
   11.46 -
   11.47 -        cmp dword [eax+4],BYTE 0
   11.48 -	je endconvert
   11.49 -	
   11.50 -	mov ebp,eax
   11.51 -	
   11.52 -	mov esi,[ebp+0]
   11.53 -	mov edi,[ebp+16]
   11.54 -	
   11.55 -y_loop:	
   11.56 -	mov ecx,[ebp+4]
   11.57 -
   11.58 -	call [ebp+32]
   11.59 -
   11.60 -	add esi,[ebp+12]
   11.61 -	add edi,[ebp+28]
   11.62 -	
   11.63 -	dec dword  [ebp+8]
   11.64 -	jnz y_loop
   11.65 -
   11.66 -; Restore the registers used by the blitters, necessary for optimized code
   11.67 -	popa
   11.68 -	
   11.69 -	pop ebp
   11.70 -
   11.71 -endconvert:	
   11.72 -	ret		
   11.73 -
   11.74 -
   11.75 -
   11.76 -%ifidn __OUTPUT_FORMAT__,elf
   11.77 -section .note.GNU-stack noalloc noexec nowrite progbits
   11.78 -%endif
    12.1 --- a/src/hermes/x86p_16.asm	Wed Aug 15 04:04:17 2007 +0000
    12.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    12.3 @@ -1,490 +0,0 @@
    12.4 -;
    12.5 -; x86 format converters for HERMES
    12.6 -; Copyright (c) 1998 Glenn Fielder (gaffer@gaffer.org)
    12.7 -; This source code is licensed under the GNU LGPL
    12.8 -; 
    12.9 -; Please refer to the file COPYING.LIB contained in the distribution for
   12.10 -; licensing conditions		
   12.11 -; 
   12.12 -; Routines adjusted for Hermes by Christian Nentwich (brn@eleet.mcb.at)
   12.13 -; Used with permission.
   12.14 -; 
   12.15 -
   12.16 -BITS 32
   12.17 -
   12.18 -%include "common.inc"
   12.19 -
   12.20 -SDL_FUNC _ConvertX86p16_16BGR565
   12.21 -SDL_FUNC _ConvertX86p16_16RGB555
   12.22 -SDL_FUNC _ConvertX86p16_16BGR555
   12.23 -SDL_FUNC _ConvertX86p16_8RGB332
   12.24 -
   12.25 -EXTERN _ConvertX86
   12.26 -
   12.27 -SECTION .text
   12.28 -
   12.29 -_ConvertX86p16_16BGR565:
   12.30 -
   12.31 -    ; check short
   12.32 -    cmp ecx,BYTE 16
   12.33 -    ja .L3
   12.34 -
   12.35 -
   12.36 -.L1 ; short loop
   12.37 -    mov al,[esi]
   12.38 -    mov ah,[esi+1]
   12.39 -    mov ebx,eax
   12.40 -    mov edx,eax
   12.41 -    shr eax,11
   12.42 -    and eax,BYTE 11111b
   12.43 -    and ebx,11111100000b
   12.44 -    shl edx,11
   12.45 -    add eax,ebx
   12.46 -    add eax,edx
   12.47 -    mov [edi],al
   12.48 -    mov [edi+1],ah
   12.49 -    add esi,BYTE 2
   12.50 -    add edi,BYTE 2
   12.51 -    dec ecx
   12.52 -    jnz .L1
   12.53 -.L2
   12.54 -    retn
   12.55 -
   12.56 -.L3 ; head
   12.57 -    mov eax,edi
   12.58 -    and eax,BYTE 11b
   12.59 -    jz .L4
   12.60 -    mov al,[esi]
   12.61 -    mov ah,[esi+1]
   12.62 -    mov ebx,eax
   12.63 -    mov edx,eax
   12.64 -    shr eax,11
   12.65 -    and eax,BYTE 11111b
   12.66 -    and ebx,11111100000b
   12.67 -    shl edx,11
   12.68 -    add eax,ebx
   12.69 -    add eax,edx
   12.70 -    mov [edi],al
   12.71 -    mov [edi+1],ah
   12.72 -    add esi,BYTE 2
   12.73 -    add edi,BYTE 2
   12.74 -    dec ecx
   12.75 -
   12.76 -.L4 ; save count
   12.77 -    push ecx
   12.78 -
   12.79 -    ; unroll twice
   12.80 -    shr ecx,1
   12.81 -    
   12.82 -    ; point arrays to end
   12.83 -    lea esi,[esi+ecx*4]
   12.84 -    lea edi,[edi+ecx*4]
   12.85 -
   12.86 -    ; negative counter 
   12.87 -    neg ecx
   12.88 -    jmp SHORT .L6
   12.89 -                              
   12.90 -.L5     mov [edi+ecx*4-4],eax
   12.91 -.L6     mov eax,[esi+ecx*4]
   12.92 -
   12.93 -        mov ebx,[esi+ecx*4]
   12.94 -        and eax,07E007E0h         
   12.95 -
   12.96 -        mov edx,[esi+ecx*4]
   12.97 -        and ebx,0F800F800h
   12.98 -
   12.99 -        shr ebx,11
  12.100 -        and edx,001F001Fh
  12.101 -
  12.102 -        shl edx,11
  12.103 -        add eax,ebx
  12.104 -
  12.105 -        add eax,edx                 
  12.106 -        inc ecx
  12.107 -
  12.108 -        jnz .L5                 
  12.109 -         
  12.110 -    mov [edi+ecx*4-4],eax
  12.111 -
  12.112 -    ; tail
  12.113 -    pop ecx
  12.114 -    and ecx,BYTE 1
  12.115 -    jz .L7
  12.116 -    mov al,[esi]
  12.117 -    mov ah,[esi+1]
  12.118 -    mov ebx,eax
  12.119 -    mov edx,eax
  12.120 -    shr eax,11
  12.121 -    and eax,BYTE 11111b
  12.122 -    and ebx,11111100000b
  12.123 -    shl edx,11
  12.124 -    add eax,ebx
  12.125 -    add eax,edx
  12.126 -    mov [edi],al
  12.127 -    mov [edi+1],ah
  12.128 -    add esi,BYTE 2
  12.129 -    add edi,BYTE 2
  12.130 -
  12.131 -.L7
  12.132 -    retn
  12.133 -
  12.134 -
  12.135 -
  12.136 -
  12.137 -
  12.138 -
  12.139 -_ConvertX86p16_16RGB555:
  12.140 -
  12.141 -    ; check short
  12.142 -    cmp ecx,BYTE 32
  12.143 -    ja .L3
  12.144 -
  12.145 -
  12.146 -.L1 ; short loop
  12.147 -    mov al,[esi]
  12.148 -    mov ah,[esi+1]
  12.149 -    mov ebx,eax
  12.150 -    shr ebx,1
  12.151 -    and ebx,     0111111111100000b
  12.152 -    and eax,BYTE 0000000000011111b
  12.153 -    add eax,ebx
  12.154 -    mov [edi],al
  12.155 -    mov [edi+1],ah
  12.156 -    add esi,BYTE 2
  12.157 -    add edi,BYTE 2
  12.158 -    dec ecx
  12.159 -    jnz .L1
  12.160 -.L2
  12.161 -    retn
  12.162 -
  12.163 -.L3 ; head
  12.164 -    mov eax,edi
  12.165 -    and eax,BYTE 11b
  12.166 -    jz .L4
  12.167 -    mov al,[esi]
  12.168 -    mov ah,[esi+1]
  12.169 -    mov ebx,eax
  12.170 -    shr ebx,1
  12.171 -    and ebx,     0111111111100000b
  12.172 -    and eax,BYTE 0000000000011111b
  12.173 -    add eax,ebx
  12.174 -    mov [edi],al
  12.175 -    mov [edi+1],ah
  12.176 -    add esi,BYTE 2
  12.177 -    add edi,BYTE 2
  12.178 -    dec ecx
  12.179 -
  12.180 -.L4 ; save ebp
  12.181 -    push ebp
  12.182 -
  12.183 -    ; save count
  12.184 -    push ecx
  12.185 -
  12.186 -    ; unroll four times
  12.187 -    shr ecx,2
  12.188 -    
  12.189 -    ; point arrays to end
  12.190 -    lea esi,[esi+ecx*8]
  12.191 -    lea edi,[edi+ecx*8]
  12.192 -
  12.193 -    ; negative counter 
  12.194 -    xor ebp,ebp
  12.195 -    sub ebp,ecx
  12.196 -
  12.197 -.L5     mov eax,[esi+ebp*8]        ; agi?
  12.198 -        mov ecx,[esi+ebp*8+4]
  12.199 -       
  12.200 -        mov ebx,eax
  12.201 -        mov edx,ecx
  12.202 -
  12.203 -        and eax,0FFC0FFC0h
  12.204 -        and ecx,0FFC0FFC0h
  12.205 -
  12.206 -        shr eax,1
  12.207 -        and ebx,001F001Fh
  12.208 -
  12.209 -        shr ecx,1
  12.210 -        and edx,001F001Fh
  12.211 -
  12.212 -        add eax,ebx
  12.213 -        add ecx,edx
  12.214 -
  12.215 -        mov [edi+ebp*8],eax
  12.216 -        mov [edi+ebp*8+4],ecx
  12.217 -
  12.218 -        inc ebp
  12.219 -        jnz .L5                 
  12.220 -
  12.221 -    ; tail
  12.222 -    pop ecx
  12.223 -.L6 and ecx,BYTE 11b
  12.224 -    jz .L7
  12.225 -    mov al,[esi]
  12.226 -    mov ah,[esi+1]
  12.227 -    mov ebx,eax
  12.228 -    shr ebx,1
  12.229 -    and ebx,     0111111111100000b
  12.230 -    and eax,BYTE 0000000000011111b
  12.231 -    add eax,ebx
  12.232 -    mov [edi],al
  12.233 -    mov [edi+1],ah
  12.234 -    add esi,BYTE 2
  12.235 -    add edi,BYTE 2
  12.236 -    dec ecx
  12.237 -    jmp SHORT .L6
  12.238 -
  12.239 -.L7 pop ebp
  12.240 -    retn
  12.241 -
  12.242 -
  12.243 -
  12.244 -
  12.245 -
  12.246 -
  12.247 -_ConvertX86p16_16BGR555:
  12.248 -
  12.249 -    ; check short
  12.250 -    cmp ecx,BYTE 16
  12.251 -    ja .L3
  12.252 -
  12.253 -	
  12.254 -.L1 ; short loop
  12.255 -    mov al,[esi]
  12.256 -    mov ah,[esi+1]
  12.257 -    mov ebx,eax
  12.258 -    mov edx,eax
  12.259 -    shr eax,11
  12.260 -    and eax,BYTE 11111b
  12.261 -    shr ebx,1
  12.262 -    and ebx,1111100000b
  12.263 -    shl edx,10
  12.264 -    and edx,0111110000000000b
  12.265 -    add eax,ebx
  12.266 -    add eax,edx
  12.267 -    mov [edi],al
  12.268 -    mov [edi+1],ah
  12.269 -    add esi,BYTE 2
  12.270 -    add edi,BYTE 2
  12.271 -    dec ecx
  12.272 -    jnz .L1
  12.273 -.L2
  12.274 -    retn
  12.275 -
  12.276 -.L3 ; head
  12.277 -    mov eax,edi
  12.278 -    and eax,BYTE 11b
  12.279 -    jz .L4
  12.280 -    mov al,[esi]
  12.281 -    mov ah,[esi+1]
  12.282 -    mov ebx,eax
  12.283 -    mov edx,eax
  12.284 -    shr eax,11
  12.285 -    and eax,BYTE 11111b
  12.286 -    shr ebx,1
  12.287 -    and ebx,1111100000b
  12.288 -    shl edx,10
  12.289 -    and edx,0111110000000000b
  12.290 -    add eax,ebx
  12.291 -    add eax,edx
  12.292 -    mov [edi],al
  12.293 -    mov [edi+1],ah
  12.294 -    add esi,BYTE 2
  12.295 -    add edi,BYTE 2
  12.296 -    dec ecx
  12.297 -
  12.298 -.L4 ; save count
  12.299 -    push ecx
  12.300 -
  12.301 -    ; unroll twice
  12.302 -    shr ecx,1
  12.303 -    
  12.304 -    ; point arrays to end
  12.305 -    lea esi,[esi+ecx*4]
  12.306 -    lea edi,[edi+ecx*4]
  12.307 -
  12.308 -    ; negative counter 
  12.309 -    neg ecx
  12.310 -    jmp SHORT .L6
  12.311 -                              
  12.312 -.L5     mov [edi+ecx*4-4],eax
  12.313 -.L6     mov eax,[esi+ecx*4]
  12.314 -
  12.315 -        shr eax,1
  12.316 -        mov ebx,[esi+ecx*4]
  12.317 -        
  12.318 -        and eax,03E003E0h         
  12.319 -        mov edx,[esi+ecx*4]
  12.320 -
  12.321 -        and ebx,0F800F800h
  12.322 -
  12.323 -        shr ebx,11
  12.324 -        and edx,001F001Fh
  12.325 -
  12.326 -        shl edx,10
  12.327 -        add eax,ebx
  12.328 -
  12.329 -        add eax,edx                 
  12.330 -        inc ecx
  12.331 -
  12.332 -        jnz .L5                 
  12.333 -         
  12.334 -    mov [edi+ecx*4-4],eax
  12.335 -
  12.336 -    ; tail
  12.337 -    pop ecx
  12.338 -    and ecx,BYTE 1
  12.339 -    jz .L7
  12.340 -    mov al,[esi]
  12.341 -    mov ah,[esi+1]
  12.342 -    mov ebx,eax
  12.343 -    mov edx,eax
  12.344 -    shr eax,11
  12.345 -    and eax,BYTE 11111b
  12.346 -    shr ebx,1
  12.347 -    and ebx,1111100000b
  12.348 -    shl edx,10
  12.349 -    and edx,0111110000000000b
  12.350 -    add eax,ebx
  12.351 -    add eax,edx
  12.352 -    mov [edi],al
  12.353 -    mov [edi+1],ah
  12.354 -    add esi,BYTE 2
  12.355 -    add edi,BYTE 2
  12.356 -
  12.357 -.L7
  12.358 -    retn
  12.359 -
  12.360 -
  12.361 -
  12.362 -
  12.363 -
  12.364 -
  12.365 -_ConvertX86p16_8RGB332:
  12.366 -
  12.367 -    ; check short
  12.368 -    cmp ecx,BYTE 16
  12.369 -    ja .L3
  12.370 -
  12.371 -
  12.372 -.L1 ; short loop
  12.373 -    mov al,[esi+0]
  12.374 -    mov ah,[esi+1]
  12.375 -    mov ebx,eax
  12.376 -    mov edx,eax
  12.377 -    and eax,BYTE 11000b         ; blue
  12.378 -    shr eax,3
  12.379 -    and ebx,11100000000b        ; green
  12.380 -    shr ebx,6
  12.381 -    and edx,1110000000000000b   ; red
  12.382 -    shr edx,8
  12.383 -    add eax,ebx
  12.384 -    add eax,edx
  12.385 -    mov [edi],al
  12.386 -    add esi,BYTE 2
  12.387 -    inc edi
  12.388 -    dec ecx
  12.389 -    jnz .L1
  12.390 -.L2
  12.391 -    retn
  12.392 -
  12.393 -.L3 mov eax,edi
  12.394 -    and eax,BYTE 11b
  12.395 -    jz .L4
  12.396 -    mov al,[esi+0]
  12.397 -    mov ah,[esi+1]
  12.398 -    mov ebx,eax
  12.399 -    mov edx,eax
  12.400 -    and eax,BYTE 11000b         ; blue
  12.401 -    shr eax,3
  12.402 -    and ebx,11100000000b        ; green
  12.403 -    shr ebx,6
  12.404 -    and edx,1110000000000000b   ; red
  12.405 -    shr edx,8
  12.406 -    add eax,ebx
  12.407 -    add eax,edx
  12.408 -    mov [edi],al
  12.409 -    add esi,BYTE 2
  12.410 -    inc edi
  12.411 -    dec ecx
  12.412 -    jmp SHORT .L3
  12.413 -
  12.414 -.L4 ; save ebp
  12.415 -    push ebp
  12.416 -
  12.417 -    ; save count
  12.418 -    push ecx
  12.419 -
  12.420 -    ; unroll 4 times
  12.421 -    shr ecx,2
  12.422 -
  12.423 -    ; prestep
  12.424 -    mov dl,[esi+0]
  12.425 -    mov bl,[esi+1]
  12.426 -    mov dh,[esi+2]
  12.427 -        
  12.428 -.L5     shl edx,16
  12.429 -        mov bh,[esi+3]
  12.430 -        
  12.431 -        shl ebx,16
  12.432 -        mov dl,[esi+4]
  12.433 -
  12.434 -        mov dh,[esi+6]
  12.435 -        mov bl,[esi+5]
  12.436 -
  12.437 -        and edx,00011000000110000001100000011000b
  12.438 -        mov bh,[esi+7]
  12.439 -
  12.440 -        ror edx,16+3
  12.441 -        mov eax,ebx                                     ; setup eax for reds
  12.442 -
  12.443 -        and ebx,00000111000001110000011100000111b
  12.444 -        and eax,11100000111000001110000011100000b       ; reds
  12.445 -
  12.446 -        ror ebx,16-2
  12.447 -        add esi,BYTE 8
  12.448 -
  12.449 -        ror eax,16
  12.450 -        add edi,BYTE 4
  12.451 -
  12.452 -        add eax,ebx
  12.453 -        mov bl,[esi+1]                                  ; greens
  12.454 -
  12.455 -        add eax,edx
  12.456 -        mov dl,[esi+0]                                  ; blues
  12.457 -
  12.458 -        mov [edi-4],eax
  12.459 -        mov dh,[esi+2]
  12.460 -
  12.461 -        dec ecx
  12.462 -        jnz .L5                 
  12.463 -    
  12.464 -    ; check tail
  12.465 -    pop ecx
  12.466 -    and ecx,BYTE 11b
  12.467 -    jz .L7
  12.468 -
  12.469 -.L6 ; tail
  12.470 -    mov al,[esi+0]
  12.471 -    mov ah,[esi+1]
  12.472 -    mov ebx,eax
  12.473 -    mov edx,eax
  12.474 -    and eax,BYTE 11000b         ; blue
  12.475 -    shr eax,3
  12.476 -    and ebx,11100000000b        ; green
  12.477 -    shr ebx,6
  12.478 -    and edx,1110000000000000b   ; red
  12.479 -    shr edx,8
  12.480 -    add eax,ebx
  12.481 -    add eax,edx
  12.482 -    mov [edi],al
  12.483 -    add esi,BYTE 2
  12.484 -    inc edi
  12.485 -    dec ecx
  12.486 -    jnz .L6
  12.487 -
  12.488 -.L7 pop ebp
  12.489 -    retn
  12.490 -
  12.491 -%ifidn __OUTPUT_FORMAT__,elf
  12.492 -section .note.GNU-stack noalloc noexec nowrite progbits
  12.493 -%endif
    13.1 --- a/src/hermes/x86p_32.asm	Wed Aug 15 04:04:17 2007 +0000
    13.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    13.3 @@ -1,1045 +0,0 @@
    13.4 -;
    13.5 -; x86 format converters for HERMES
    13.6 -; Some routines Copyright (c) 1998 Christian Nentwich (brn@eleet.mcb.at)
    13.7 -; This source code is licensed under the GNU LGPL
    13.8 -; 
    13.9 -; Please refer to the file COPYING.LIB contained in the distribution for
   13.10 -; licensing conditions		
   13.11 -;
   13.12 -; Most routines are (c) Glenn Fiedler (ptc@gaffer.org), used with permission
   13.13 -; 
   13.14 -
   13.15 -BITS 32
   13.16 -
   13.17 -%include "common.inc"
   13.18 -
   13.19 -SDL_FUNC _ConvertX86p32_32BGR888
   13.20 -SDL_FUNC _ConvertX86p32_32RGBA888
   13.21 -SDL_FUNC _ConvertX86p32_32BGRA888
   13.22 -SDL_FUNC _ConvertX86p32_24RGB888	
   13.23 -SDL_FUNC _ConvertX86p32_24BGR888
   13.24 -SDL_FUNC _ConvertX86p32_16RGB565
   13.25 -SDL_FUNC _ConvertX86p32_16BGR565
   13.26 -SDL_FUNC _ConvertX86p32_16RGB555
   13.27 -SDL_FUNC _ConvertX86p32_16BGR555
   13.28 -SDL_FUNC _ConvertX86p32_8RGB332
   13.29 -
   13.30 -SECTION .text
   13.31 -
   13.32 -;; _Convert_*
   13.33 -;; Paramters:	
   13.34 -;;   ESI = source 
   13.35 -;;   EDI = dest
   13.36 -;;   ECX = amount (NOT 0!!! (the _ConvertX86 routine checks for that though))
   13.37 -;; Destroys:
   13.38 -;;   EAX, EBX, EDX
   13.39 -
   13.40 -
   13.41 -_ConvertX86p32_32BGR888:
   13.42 -
   13.43 -    ; check short
   13.44 -    cmp ecx,BYTE 32
   13.45 -    ja .L3
   13.46 -
   13.47 -.L1 ; short loop
   13.48 -    mov edx,[esi]
   13.49 -    bswap edx
   13.50 -    ror edx,8
   13.51 -    mov [edi],edx
   13.52 -    add esi,BYTE 4
   13.53 -    add edi,BYTE 4
   13.54 -    dec ecx
   13.55 -    jnz .L1
   13.56 -.L2
   13.57 -    retn
   13.58 -
   13.59 -.L3 ; save ebp
   13.60 -    push ebp
   13.61 -
   13.62 -    ; unroll four times
   13.63 -    mov ebp,ecx
   13.64 -    shr ebp,2
   13.65 -    
   13.66 -    ; save count
   13.67 -    push ecx
   13.68 -
   13.69 -.L4     mov eax,[esi]
   13.70 -        mov ebx,[esi+4]
   13.71 -
   13.72 -        bswap eax
   13.73 -
   13.74 -        bswap ebx
   13.75 -
   13.76 -        ror eax,8
   13.77 -        mov ecx,[esi+8]
   13.78 -
   13.79 -        ror ebx,8
   13.80 -        mov edx,[esi+12]
   13.81 -
   13.82 -        bswap ecx
   13.83 -
   13.84 -        bswap edx
   13.85 -
   13.86 -        ror ecx,8
   13.87 -        mov [edi+0],eax
   13.88 -
   13.89 -        ror edx,8
   13.90 -        mov [edi+4],ebx
   13.91 -
   13.92 -        mov [edi+8],ecx
   13.93 -        mov [edi+12],edx
   13.94 -
   13.95 -        add esi,BYTE 16
   13.96 -        add edi,BYTE 16
   13.97 -
   13.98 -        dec ebp
   13.99 -        jnz .L4                 
  13.100 -
  13.101 -    ; check tail
  13.102 -    pop ecx
  13.103 -    and ecx,BYTE 11b
  13.104 -    jz .L6
  13.105 -
  13.106 -.L5 ; tail loop
  13.107 -    mov edx,[esi]
  13.108 -    bswap edx
  13.109 -    ror edx,8
  13.110 -    mov [edi],edx
  13.111 -    add esi,BYTE 4
  13.112 -    add edi,BYTE 4
  13.113 -    dec ecx
  13.114 -    jnz .L5
  13.115 -
  13.116 -.L6 pop ebp
  13.117 -    retn
  13.118 -	
  13.119 -
  13.120 -	
  13.121 -		
  13.122 -_ConvertX86p32_32RGBA888:
  13.123 -	
  13.124 -    ; check short
  13.125 -    cmp ecx,BYTE 32
  13.126 -    ja .L3
  13.127 -
  13.128 -.L1 ; short loop
  13.129 -    mov edx,[esi]
  13.130 -    rol edx,8
  13.131 -    mov [edi],edx
  13.132 -    add esi,BYTE 4
  13.133 -    add edi,BYTE 4
  13.134 -    dec ecx
  13.135 -    jnz .L1
  13.136 -.L2
  13.137 -    retn
  13.138 -
  13.139 -.L3 ; save ebp
  13.140 -    push ebp
  13.141 -
  13.142 -    ; unroll four times
  13.143 -    mov ebp,ecx
  13.144 -    shr ebp,2
  13.145 -    
  13.146 -    ; save count
  13.147 -    push ecx
  13.148 -
  13.149 -.L4     mov eax,[esi]
  13.150 -        mov ebx,[esi+4]
  13.151 -
  13.152 -        rol eax,8
  13.153 -        mov ecx,[esi+8]
  13.154 -
  13.155 -        rol ebx,8
  13.156 -        mov edx,[esi+12]
  13.157 -
  13.158 -        rol ecx,8
  13.159 -        mov [edi+0],eax
  13.160 -
  13.161 -        rol edx,8
  13.162 -        mov [edi+4],ebx
  13.163 -
  13.164 -        mov [edi+8],ecx
  13.165 -        mov [edi+12],edx
  13.166 -
  13.167 -        add esi,BYTE 16
  13.168 -        add edi,BYTE 16
  13.169 -
  13.170 -        dec ebp
  13.171 -        jnz .L4                 
  13.172 -
  13.173 -    ; check tail
  13.174 -    pop ecx
  13.175 -    and ecx,BYTE 11b
  13.176 -    jz .L6
  13.177 -
  13.178 -.L5 ; tail loop
  13.179 -    mov edx,[esi]
  13.180 -    rol edx,8
  13.181 -    mov [edi],edx
  13.182 -    add esi,BYTE 4
  13.183 -    add edi,BYTE 4
  13.184 -    dec ecx
  13.185 -    jnz .L5
  13.186 -
  13.187 -.L6 pop ebp
  13.188 -    retn
  13.189 -
  13.190 -	
  13.191 -
  13.192 -
  13.193 -_ConvertX86p32_32BGRA888:
  13.194 -
  13.195 -    ; check short
  13.196 -    cmp ecx,BYTE 32
  13.197 -    ja .L3
  13.198 -
  13.199 -.L1 ; short loop
  13.200 -    mov edx,[esi]
  13.201 -    bswap edx
  13.202 -    mov [edi],edx
  13.203 -    add esi,BYTE 4
  13.204 -    add edi,BYTE 4
  13.205 -    dec ecx
  13.206 -    jnz .L1
  13.207 -.L2
  13.208 -    retn
  13.209 -
  13.210 -.L3 ; save ebp
  13.211 -    push ebp
  13.212 -
  13.213 -    ; unroll four times
  13.214 -    mov ebp,ecx
  13.215 -    shr ebp,2
  13.216 -    
  13.217 -    ; save count
  13.218 -    push ecx
  13.219 -
  13.220 -.L4     mov eax,[esi]
  13.221 -        mov ebx,[esi+4]
  13.222 -
  13.223 -        mov ecx,[esi+8]
  13.224 -        mov edx,[esi+12]
  13.225 -
  13.226 -        bswap eax
  13.227 -
  13.228 -        bswap ebx
  13.229 -
  13.230 -        bswap ecx
  13.231 -
  13.232 -        bswap edx
  13.233 -
  13.234 -        mov [edi+0],eax
  13.235 -        mov [edi+4],ebx
  13.236 -
  13.237 -        mov [edi+8],ecx
  13.238 -        mov [edi+12],edx
  13.239 -
  13.240 -        add esi,BYTE 16
  13.241 -        add edi,BYTE 16
  13.242 -
  13.243 -        dec ebp
  13.244 -        jnz .L4                 
  13.245 -
  13.246 -    ; check tail
  13.247 -    pop ecx
  13.248 -    and ecx,BYTE 11b
  13.249 -    jz .L6
  13.250 -
  13.251 -.L5 ; tail loop
  13.252 -    mov edx,[esi]
  13.253 -    bswap edx
  13.254 -    mov [edi],edx
  13.255 -    add esi,BYTE 4
  13.256 -    add edi,BYTE 4
  13.257 -    dec ecx
  13.258 -    jnz .L5
  13.259 -
  13.260 -.L6 pop ebp
  13.261 -    retn
  13.262 -
  13.263 -
  13.264 -	
  13.265 -	
  13.266 -;; 32 bit RGB 888 to 24 BIT RGB 888
  13.267 -
  13.268 -_ConvertX86p32_24RGB888:
  13.269 -
  13.270 -	; check short
  13.271 -	cmp ecx,BYTE 32
  13.272 -	ja .L3
  13.273 -
  13.274 -.L1	; short loop
  13.275 -	mov al,[esi]
  13.276 -	mov bl,[esi+1]
  13.277 -	mov dl,[esi+2]
  13.278 -	mov [edi],al
  13.279 -	mov [edi+1],bl
  13.280 -	mov [edi+2],dl
  13.281 -	add esi,BYTE 4
  13.282 -	add edi,BYTE 3
  13.283 -	dec ecx
  13.284 -	jnz .L1
  13.285 -.L2 
  13.286 -	retn
  13.287 -
  13.288 -.L3	;	 head
  13.289 -	mov edx,edi
  13.290 -	and edx,BYTE 11b
  13.291 -	jz .L4
  13.292 -	mov al,[esi]
  13.293 -	mov bl,[esi+1]
  13.294 -	mov dl,[esi+2]
  13.295 -	mov [edi],al
  13.296 -	mov [edi+1],bl
  13.297 -	mov [edi+2],dl
  13.298 -	add esi,BYTE 4
  13.299 -	add edi,BYTE 3
  13.300 -	dec ecx
  13.301 -	jmp SHORT .L3
  13.302 -
  13.303 -.L4 ; unroll 4 times
  13.304 -	push ebp
  13.305 -	mov ebp,ecx
  13.306 -	shr ebp,2
  13.307 -
  13.308 -    ; save count
  13.309 -	push ecx
  13.310 -
  13.311 -.L5     mov eax,[esi]                   ; first dword            eax = [A][R][G][B]
  13.312 -        mov ebx,[esi+4]                 ; second dword           ebx = [a][r][g][b]
  13.313 -
  13.314 -        shl eax,8                       ;                        eax = [R][G][B][.]
  13.315 -        mov ecx,[esi+12]                ; third dword            ecx = [a][r][g][b]
  13.316 -
  13.317 -        shl ebx,8                       ;                        ebx = [r][g][b][.]
  13.318 -        mov al,[esi+4]                  ;                        eax = [R][G][B][b]
  13.319 -
  13.320 -        ror eax,8                       ;                        eax = [b][R][G][B] (done)
  13.321 -        mov bh,[esi+8+1]                ;                        ebx = [r][g][G][.]
  13.322 -
  13.323 -        mov [edi],eax
  13.324 -        add edi,BYTE 3*4
  13.325 -
  13.326 -        shl ecx,8                       ;                        ecx = [r][g][b][.]
  13.327 -        mov bl,[esi+8+0]                ;                        ebx = [r][g][G][B]
  13.328 -
  13.329 -        rol ebx,16                      ;                        ebx = [G][B][r][g] (done)
  13.330 -        mov cl,[esi+8+2]                ;                        ecx = [r][g][b][R] (done)
  13.331 -
  13.332 -        mov [edi+4-3*4],ebx
  13.333 -        add esi,BYTE 4*4
  13.334 -        
  13.335 -        mov [edi+8-3*4],ecx
  13.336 -        dec ebp
  13.337 -
  13.338 -        jnz .L5
  13.339 -
  13.340 -    ; check tail
  13.341 -	pop ecx
  13.342 -	and ecx,BYTE 11b
  13.343 -	jz .L7
  13.344 -
  13.345 -.L6 ; tail loop
  13.346 -	mov al,[esi]
  13.347 -	mov bl,[esi+1]
  13.348 -	mov dl,[esi+2]
  13.349 -	mov [edi],al
  13.350 -	mov [edi+1],bl
  13.351 -	mov [edi+2],dl
  13.352 -	add esi,BYTE 4
  13.353 -	add edi,BYTE 3
  13.354 -	dec ecx
  13.355 -	jnz .L6
  13.356 -
  13.357 -.L7	pop ebp
  13.358 -	retn
  13.359 -
  13.360 -
  13.361 -
  13.362 -
  13.363 -;; 32 bit RGB 888 to 24 bit BGR 888
  13.364 -
  13.365 -_ConvertX86p32_24BGR888:
  13.366 -
  13.367 -	; check short
  13.368 -	cmp ecx,BYTE 32
  13.369 -	ja .L3
  13.370 -
  13.371 -	
  13.372 -.L1	; short loop
  13.373 -	mov dl,[esi]
  13.374 -	mov bl,[esi+1]
  13.375 -	mov al,[esi+2]
  13.376 -	mov [edi],al
  13.377 -	mov [edi+1],bl
  13.378 -	mov [edi+2],dl
  13.379 -	add esi,BYTE 4
  13.380 -	add edi,BYTE 3
  13.381 -	dec ecx
  13.382 -	jnz .L1
  13.383 -.L2
  13.384 -	retn
  13.385 -
  13.386 -.L3 ; head
  13.387 -	mov edx,edi
  13.388 -	and edx,BYTE 11b
  13.389 -	jz .L4
  13.390 -	mov dl,[esi]
  13.391 -	mov bl,[esi+1]
  13.392 -	mov al,[esi+2]
  13.393 -	mov [edi],al
  13.394 -	mov [edi+1],bl
  13.395 -	mov [edi+2],dl
  13.396 -	add esi,BYTE 4
  13.397 -	add edi,BYTE 3
  13.398 -	dec ecx
  13.399 -	jmp SHORT .L3
  13.400 -
  13.401 -.L4	; unroll 4 times
  13.402 -	push ebp
  13.403 -	mov ebp,ecx
  13.404 -	shr ebp,2
  13.405 -
  13.406 -	; save count
  13.407 -	push ecx
  13.408 -
  13.409 -.L5     
  13.410 -	mov eax,[esi]                   ; first dword            eax = [A][R][G][B]
  13.411 -        mov ebx,[esi+4]                 ; second dword           ebx = [a][r][g][b]
  13.412 -        
  13.413 -        bswap eax                       ;                        eax = [B][G][R][A]
  13.414 -
  13.415 -        bswap ebx                       ;                        ebx = [b][g][r][a]
  13.416 -
  13.417 -        mov al,[esi+4+2]                ;                        eax = [B][G][R][r] 
  13.418 -        mov bh,[esi+4+4+1]              ;                        ebx = [b][g][G][a]
  13.419 -
  13.420 -        ror eax,8                       ;                        eax = [r][B][G][R] (done)
  13.421 -        mov bl,[esi+4+4+2]              ;                        ebx = [b][g][G][R]
  13.422 -
  13.423 -        ror ebx,16                      ;                        ebx = [G][R][b][g] (done)
  13.424 -        mov [edi],eax
  13.425 -    
  13.426 -        mov [edi+4],ebx
  13.427 -        mov ecx,[esi+12]                ; third dword            ecx = [a][r][g][b]
  13.428 -        
  13.429 -        bswap ecx                       ;                        ecx = [b][g][r][a]
  13.430 -        
  13.431 -        mov cl,[esi+8]                  ;                        ecx = [b][g][r][B] (done)
  13.432 -        add esi,BYTE 4*4
  13.433 -
  13.434 -        mov [edi+8],ecx
  13.435 -        add edi,BYTE 3*4
  13.436 -
  13.437 -        dec ebp
  13.438 -        jnz .L5
  13.439 -
  13.440 -	; check tail
  13.441 -	pop ecx
  13.442 -	and ecx,BYTE 11b
  13.443 -	jz .L7
  13.444 -
  13.445 -.L6	; tail loop
  13.446 -	mov dl,[esi]
  13.447 -	mov bl,[esi+1]
  13.448 -	mov al,[esi+2]
  13.449 -	mov [edi],al
  13.450 -	mov [edi+1],bl
  13.451 -	mov [edi+2],dl
  13.452 -	add esi,BYTE 4
  13.453 -	add edi,BYTE 3
  13.454 -	dec ecx
  13.455 -	jnz .L6
  13.456 -
  13.457 -.L7 
  13.458 -	pop ebp
  13.459 -	retn
  13.460 - 
  13.461 -
  13.462 -	
  13.463 -		
  13.464 -;; 32 bit RGB 888 to 16 BIT RGB 565 
  13.465 -
  13.466 -_ConvertX86p32_16RGB565:
  13.467 -	; check short
  13.468 -	cmp ecx,BYTE 16
  13.469 -	ja .L3
  13.470 -
  13.471 -.L1 ; short loop
  13.472 -	mov bl,[esi+0]    ; blue
  13.473 -	mov al,[esi+1]    ; green
  13.474 -	mov ah,[esi+2]    ; red
  13.475 -	shr ah,3
  13.476 -        and al,11111100b
  13.477 -	shl eax,3
  13.478 -	shr bl,3
  13.479 -	add al,bl
  13.480 -	mov [edi+0],al
  13.481 -	mov [edi+1],ah
  13.482 -	add esi,BYTE 4
  13.483 -	add edi,BYTE 2
  13.484 -	dec ecx
  13.485 -	jnz .L1
  13.486 -
  13.487 -.L2:				; End of short loop
  13.488 -	retn
  13.489 -
  13.490 -	
  13.491 -.L3	; head
  13.492 -	mov ebx,edi
  13.493 -	and ebx,BYTE 11b
  13.494 -	jz .L4
  13.495 -	
  13.496 -	mov bl,[esi+0]    ; blue
  13.497 -	mov al,[esi+1]    ; green
  13.498 -	mov ah,[esi+2]    ; red
  13.499 -	shr ah,3
  13.500 -	and al,11111100b
  13.501 -	shl eax,3
  13.502 -	shr bl,3
  13.503 -	add al,bl
  13.504 -	mov [edi+0],al
  13.505 -	mov [edi+1],ah
  13.506 -	add esi,BYTE 4
  13.507 -	add edi,BYTE 2
  13.508 -	dec ecx
  13.509 -
  13.510 -.L4:	 
  13.511 -    ; save count
  13.512 -	push ecx
  13.513 -
  13.514 -    ; unroll twice
  13.515 -	shr ecx,1
  13.516 -    
  13.517 -    ; point arrays to end
  13.518 -	lea esi,[esi+ecx*8]
  13.519 -	lea edi,[edi+ecx*4]
  13.520 -
  13.521 -    ; negative counter 
  13.522 -	neg ecx
  13.523 -	jmp SHORT .L6
  13.524 -
  13.525 -.L5:	    
  13.526 -	mov [edi+ecx*4-4],eax
  13.527 -.L6:	
  13.528 -	mov eax,[esi+ecx*8]
  13.529 -
  13.530 -        shr ah,2
  13.531 -        mov ebx,[esi+ecx*8+4]
  13.532 -
  13.533 -        shr eax,3
  13.534 -        mov edx,[esi+ecx*8+4]
  13.535 -
  13.536 -        shr bh,2
  13.537 -        mov dl,[esi+ecx*8+2]
  13.538 -
  13.539 -        shl ebx,13
  13.540 -        and eax,000007FFh
  13.541 -        
  13.542 -        shl edx,8
  13.543 -        and ebx,07FF0000h
  13.544 -
  13.545 -        and edx,0F800F800h
  13.546 -        add eax,ebx
  13.547 -
  13.548 -        add eax,edx
  13.549 -        inc ecx
  13.550 -
  13.551 -        jnz .L5                 
  13.552 -
  13.553 -	mov [edi+ecx*4-4],eax
  13.554 -
  13.555 -    ; tail
  13.556 -	pop ecx
  13.557 -	test cl,1
  13.558 -	jz .L7
  13.559 -	
  13.560 -	mov bl,[esi+0]    ; blue
  13.561 -	mov al,[esi+1]    ; green
  13.562 -	mov ah,[esi+2]    ; red
  13.563 -	shr ah,3
  13.564 -	and al,11111100b
  13.565 -	shl eax,3
  13.566 -	shr bl,3
  13.567 -	add al,bl
  13.568 -	mov [edi+0],al
  13.569 -	mov [edi+1],ah
  13.570 -	add esi,BYTE 4
  13.571 -	add edi,BYTE 2
  13.572 -
  13.573 -.L7:	
  13.574 -	retn
  13.575 -
  13.576 -
  13.577 -
  13.578 -	
  13.579 -;; 32 bit RGB 888 to 16 BIT BGR 565 
  13.580 -
  13.581 -_ConvertX86p32_16BGR565:
  13.582 -	
  13.583 -	; check short
  13.584 -	cmp ecx,BYTE 16
  13.585 -	ja .L3
  13.586 -
  13.587 -.L1	; short loop
  13.588 -	mov ah,[esi+0]    ; blue
  13.589 -	mov al,[esi+1]    ; green
  13.590 -	mov bl,[esi+2]    ; red
  13.591 -	shr ah,3
  13.592 -	and al,11111100b
  13.593 -	shl eax,3
  13.594 -	shr bl,3
  13.595 -	add al,bl
  13.596 -	mov [edi+0],al
  13.597 -	mov [edi+1],ah
  13.598 -	add esi,BYTE 4
  13.599 -	add edi,BYTE 2
  13.600 -	dec ecx
  13.601 -	jnz .L1
  13.602 -.L2
  13.603 -	retn
  13.604 -
  13.605 -.L3	; head
  13.606 -	mov ebx,edi
  13.607 -	and ebx,BYTE 11b
  13.608 -	jz .L4   
  13.609 -	mov ah,[esi+0]    ; blue
  13.610 -	mov al,[esi+1]    ; green
  13.611 -	mov bl,[esi+2]    ; red
  13.612 -	shr ah,3
  13.613 -	and al,11111100b
  13.614 -	shl eax,3
  13.615 -	shr bl,3
  13.616 -	add al,bl
  13.617 -	mov [edi+0],al
  13.618 -	mov [edi+1],ah
  13.619 -	add esi,BYTE 4
  13.620 -	add edi,BYTE 2
  13.621 -	dec ecx
  13.622 -
  13.623 -.L4	; save count
  13.624 -	push ecx
  13.625 -
  13.626 -	; unroll twice
  13.627 -	shr ecx,1
  13.628 -    
  13.629 -	; point arrays to end
  13.630 -	lea esi,[esi+ecx*8]
  13.631 -	lea edi,[edi+ecx*4]
  13.632 -
  13.633 -	; negative count
  13.634 -	neg ecx
  13.635 -	jmp SHORT .L6
  13.636 -
  13.637 -.L5     
  13.638 -	mov [edi+ecx*4-4],eax            
  13.639 -.L6     
  13.640 -	mov edx,[esi+ecx*8+4]
  13.641 -
  13.642 -        mov bh,[esi+ecx*8+4]                       
  13.643 -        mov ah,[esi+ecx*8]                       
  13.644 -
  13.645 -        shr bh,3
  13.646 -        mov al,[esi+ecx*8+1]             
  13.647 -
  13.648 -        shr ah,3
  13.649 -        mov bl,[esi+ecx*8+5]           
  13.650 -
  13.651 -        shl eax,3
  13.652 -        mov dl,[esi+ecx*8+2]
  13.653 -
  13.654 -        shl ebx,19
  13.655 -        and eax,0000FFE0h              
  13.656 -                
  13.657 -        shr edx,3
  13.658 -        and ebx,0FFE00000h             
  13.659 -        
  13.660 -        and edx,001F001Fh               
  13.661 -        add eax,ebx
  13.662 -
  13.663 -        add eax,edx
  13.664 -        inc ecx
  13.665 -
  13.666 -        jnz .L5                 
  13.667 -
  13.668 -	mov [edi+ecx*4-4],eax            
  13.669 -
  13.670 -	; tail
  13.671 -	pop ecx
  13.672 -	and ecx,BYTE 1
  13.673 -	jz .L7
  13.674 -	mov ah,[esi+0]    ; blue
  13.675 -	mov al,[esi+1]    ; green
  13.676 -	mov bl,[esi+2]    ; red
  13.677 -	shr ah,3
  13.678 -	and al,11111100b
  13.679 -	shl eax,3
  13.680 -	shr bl,3
  13.681 -	add al,bl
  13.682 -	mov [edi+0],al
  13.683 -	mov [edi+1],ah
  13.684 -	add esi,BYTE 4
  13.685 -	add edi,BYTE 2
  13.686 -
  13.687 -.L7 
  13.688 -	retn
  13.689 -
  13.690 -
  13.691 -	
  13.692 -	
  13.693 -;; 32 BIT RGB TO 16 BIT RGB 555
  13.694 -
  13.695 -_ConvertX86p32_16RGB555:
  13.696 -
  13.697 -	; check short
  13.698 -	cmp ecx,BYTE 16
  13.699 -	ja .L3
  13.700 -
  13.701 -.L1	; short loop
  13.702 -	mov bl,[esi+0]    ; blue
  13.703 -	mov al,[esi+1]    ; green
  13.704 -	mov ah,[esi+2]    ; red
  13.705 -	shr ah,3
  13.706 -	and al,11111000b
  13.707 -	shl eax,2
  13.708 -	shr bl,3
  13.709 -	add al,bl
  13.710 -	mov [edi+0],al
  13.711 -	mov [edi+1],ah
  13.712 -	add esi,BYTE 4
  13.713 -	add edi,BYTE 2
  13.714 -	dec ecx
  13.715 -	jnz .L1
  13.716 -.L2
  13.717 -	retn
  13.718 -
  13.719 -.L3	; head
  13.720 -	mov ebx,edi
  13.721 -        and ebx,BYTE 11b
  13.722 -	jz .L4   
  13.723 -	mov bl,[esi+0]    ; blue
  13.724 -	mov al,[esi+1]    ; green
  13.725 -	mov ah,[esi+2]    ; red
  13.726 -	shr ah,3
  13.727 -	and al,11111000b
  13.728 -	shl eax,2
  13.729 -	shr bl,3
  13.730 -	add al,bl
  13.731 -	mov [edi+0],al
  13.732 -	mov [edi+1],ah
  13.733 -	add esi,BYTE 4
  13.734 -	add edi,BYTE 2
  13.735 -	dec ecx
  13.736 -
  13.737 -.L4	; save count
  13.738 -	push ecx
  13.739 -
  13.740 -	; unroll twice
  13.741 -	shr ecx,1
  13.742 -    
  13.743 -	; point arrays to end
  13.744 -	lea esi,[esi+ecx*8]
  13.745 -	lea edi,[edi+ecx*4]
  13.746 -
  13.747 -	; negative counter 
  13.748 -	neg ecx
  13.749 -	jmp SHORT .L6
  13.750 -
  13.751 -.L5     
  13.752 -	mov [edi+ecx*4-4],eax
  13.753 -.L6     
  13.754 -	mov eax,[esi+ecx*8]
  13.755 -
  13.756 -        shr ah,3
  13.757 -        mov ebx,[esi+ecx*8+4]
  13.758 -
  13.759 -        shr eax,3
  13.760 -        mov edx,[esi+ecx*8+4]
  13.761 -
  13.762 -        shr bh,3
  13.763 -        mov dl,[esi+ecx*8+2]
  13.764 -
  13.765 -        shl ebx,13
  13.766 -        and eax,000007FFh
  13.767 -        
  13.768 -        shl edx,7
  13.769 -        and ebx,07FF0000h
  13.770 -
  13.771 -        and edx,07C007C00h
  13.772 -        add eax,ebx
  13.773 -
  13.774 -        add eax,edx
  13.775 -        inc ecx
  13.776 -
  13.777 -        jnz .L5                 
  13.778 -
  13.779 -	mov [edi+ecx*4-4],eax
  13.780 -
  13.781 -	; tail
  13.782 -	pop ecx
  13.783 -	and ecx,BYTE 1
  13.784 -	jz .L7
  13.785 -	mov bl,[esi+0]    ; blue
  13.786 -	mov al,[esi+1]    ; green
  13.787 -	mov ah,[esi+2]    ; red
  13.788 -	shr ah,3
  13.789 -	and al,11111000b
  13.790 -	shl eax,2
  13.791 -	shr bl,3
  13.792 -	add al,bl
  13.793 -	mov [edi+0],al
  13.794 -	mov [edi+1],ah
  13.795 -	add esi,BYTE 4
  13.796 -	add edi,BYTE 2
  13.797 -
  13.798 -.L7
  13.799 -	retn
  13.800 -
  13.801 -
  13.802 -
  13.803 -
  13.804 -;; 32 BIT RGB TO 16 BIT BGR 555
  13.805 -	
  13.806 -_ConvertX86p32_16BGR555:
  13.807 -	
  13.808 -	; check short
  13.809 -	cmp ecx,BYTE 16
  13.810 -	ja .L3
  13.811 -
  13.812 -
  13.813 -.L1	; short loop
  13.814 -	mov ah,[esi+0]    ; blue
  13.815 -	mov al,[esi+1]    ; green
  13.816 -	mov bl,[esi+2]    ; red
  13.817 -	shr ah,3
  13.818 -	and al,11111000b
  13.819 -	shl eax,2
  13.820 -	shr bl,3
  13.821 -	add al,bl
  13.822 -	mov [edi+0],al
  13.823 -	mov [edi+1],ah
  13.824 -	add esi,BYTE 4
  13.825 -	add edi,BYTE 2
  13.826 -	dec ecx
  13.827 -	jnz .L1
  13.828 -.L2 
  13.829 -	retn
  13.830 -
  13.831 -.L3	; head
  13.832 -	mov ebx,edi
  13.833 -        and ebx,BYTE 11b
  13.834 -	jz .L4   
  13.835 -	mov ah,[esi+0]    ; blue
  13.836 -	mov al,[esi+1]    ; green
  13.837 -	mov bl,[esi+2]    ; red
  13.838 -	shr ah,3
  13.839 -	and al,11111000b
  13.840 -	shl eax,2
  13.841 -	shr bl,3
  13.842 -	add al,bl
  13.843 -	mov [edi+0],al
  13.844 -	mov [edi+1],ah
  13.845 -	add esi,BYTE 4
  13.846 -	add edi,BYTE 2
  13.847 -	dec ecx
  13.848 -
  13.849 -.L4	; save count
  13.850 -	push ecx
  13.851 -
  13.852 -	; unroll twice
  13.853 -	shr ecx,1
  13.854 -    
  13.855 -	; point arrays to end
  13.856 -	lea esi,[esi+ecx*8]
  13.857 -	lea edi,[edi+ecx*4]
  13.858 -
  13.859 -	; negative counter 
  13.860 -	neg ecx
  13.861 -	jmp SHORT .L6
  13.862 -
  13.863 -.L5     
  13.864 -	mov [edi+ecx*4-4],eax            
  13.865 -.L6     
  13.866 -	mov edx,[esi+ecx*8+4]
  13.867 -
  13.868 -        mov bh,[esi+ecx*8+4]                       
  13.869 -        mov ah,[esi+ecx*8]                       
  13.870 -
  13.871 -        shr bh,3
  13.872 -        mov al,[esi+ecx*8+1]             
  13.873 -
  13.874 -        shr ah,3
  13.875 -        mov bl,[esi+ecx*8+5]           
  13.876 -
  13.877 -        shl eax,2
  13.878 -        mov dl,[esi+ecx*8+2]
  13.879 -
  13.880 -        shl ebx,18
  13.881 -        and eax,00007FE0h              
  13.882 -                
  13.883 -        shr edx,3
  13.884 -        and ebx,07FE00000h             
  13.885 -        
  13.886 -        and edx,001F001Fh               
  13.887 -        add eax,ebx
  13.888 -
  13.889 -        add eax,edx
  13.890 -        inc ecx
  13.891 -
  13.892 -        jnz .L5                 
  13.893 -
  13.894 -	mov [edi+ecx*4-4],eax            
  13.895 -
  13.896 -	; tail
  13.897 -	pop ecx
  13.898 -	and ecx,BYTE 1
  13.899 -	jz .L7
  13.900 -	mov ah,[esi+0]    ; blue
  13.901 -	mov al,[esi+1]    ; green
  13.902 -	mov bl,[esi+2]    ; red
  13.903 -	shr ah,3
  13.904 -	and al,11111000b
  13.905 -	shl eax,2
  13.906 -	shr bl,3
  13.907 -	add al,bl
  13.908 -	mov [edi+0],al
  13.909 -	mov [edi+1],ah
  13.910 -	add esi,BYTE 4
  13.911 -	add edi,BYTE 2
  13.912 -
  13.913 -.L7
  13.914 -	retn
  13.915 -
  13.916 -
  13.917 -
  13.918 -
  13.919 -	
  13.920 -;; FROM 32 BIT RGB to 8 BIT RGB (rrrgggbbb)
  13.921 -;; This routine writes FOUR pixels at once (dword) and then, if they exist
  13.922 -;; the trailing three pixels
  13.923 -_ConvertX86p32_8RGB332:
  13.924 -
  13.925 -	
  13.926 -.L_ALIGNED
  13.927 -	push ecx
  13.928 -
  13.929 -	shr ecx,2		; We will draw 4 pixels at once
  13.930 -	jnz .L1
  13.931 -	
  13.932 -	jmp .L2			; short jump out of range :(
  13.933 -	
  13.934 -.L1:
  13.935 -	mov eax,[esi]		; first pair of pixels
  13.936 -	mov edx,[esi+4]
  13.937 -
  13.938 -	shr dl,6
  13.939 -	mov ebx,eax
  13.940 -
  13.941 -	shr al,6
  13.942 -	and ah,0e0h
  13.943 -
  13.944 -	shr ebx,16
  13.945 -	and dh,0e0h
  13.946 -	
  13.947 -	shr ah,3
  13.948 -	and bl,0e0h
  13.949 -
  13.950 -	shr dh,3
  13.951 -	
  13.952 -	or al,bl
  13.953 -	
  13.954 -	mov ebx,edx	
  13.955 -	or al,ah
  13.956 -	
  13.957 -	shr ebx,16
  13.958 -	or dl,dh
  13.959 -
  13.960 -	and bl,0e0h
  13.961 -	
  13.962 -	or dl,bl
  13.963 -
  13.964 -	mov ah,dl
  13.965 -
  13.966 -	
  13.967 -		
  13.968 -	mov ebx,[esi+8]		; second pair of pixels
  13.969 -
  13.970 -	mov edx,ebx
  13.971 -	and bh,0e0h
  13.972 -
  13.973 -	shr bl,6
  13.974 -	and edx,0e00000h
  13.975 -
  13.976 -	shr edx,16
  13.977 -
  13.978 -	shr bh,3
  13.979 -
  13.980 -	ror eax,16
  13.981 -	or bl,dl
  13.982 -
  13.983 -	mov edx,[esi+12]
  13.984 -	or bl,bh
  13.985 -	
  13.986 -	mov al,bl
  13.987 -
  13.988 -	mov ebx,edx
  13.989 -	and dh,0e0h
  13.990 -
  13.991 -	shr dl,6
  13.992 -	and ebx,0e00000h
  13.993 -	
  13.994 -	shr dh,3
  13.995 -	mov ah,dl
  13.996 -
  13.997 -	shr ebx,16
  13.998 -	or ah,dh
  13.999 -
 13.1000 -	or ah,bl
 13.1001 -
 13.1002 -	rol eax,16
 13.1003 -	add esi,BYTE 16
 13.1004 -			
 13.1005 -	mov [edi],eax	
 13.1006 -	add edi,BYTE 4
 13.1007 -	
 13.1008 -	dec ecx
 13.1009 -	jz .L2			; L1 out of range for short jump :(
 13.1010 -	
 13.1011 -	jmp .L1
 13.1012 -.L2:
 13.1013 -	
 13.1014 -	pop ecx
 13.1015 -	and ecx,BYTE 3		; mask out number of pixels to draw
 13.1016 -	
 13.1017 -	jz .L4			; Nothing to do anymore
 13.1018 -
 13.1019 -.L3:
 13.1020 -	mov eax,[esi]		; single pixel conversion for trailing pixels
 13.1021 -
 13.1022 -        mov ebx,eax
 13.1023 -
 13.1024 -        shr al,6
 13.1025 -        and ah,0e0h
 13.1026 -
 13.1027 -        shr ebx,16
 13.1028 -
 13.1029 -        shr ah,3
 13.1030 -        and bl,0e0h
 13.1031 -
 13.1032 -        or al,ah
 13.1033 -        or al,bl
 13.1034 -
 13.1035 -        mov [edi],al
 13.1036 -
 13.1037 -        inc edi
 13.1038 -        add esi,BYTE 4
 13.1039 -
 13.1040 -	dec ecx
 13.1041 -	jnz .L3
 13.1042 -	
 13.1043 -.L4:	
 13.1044 -	retn
 13.1045 -
 13.1046 -%ifidn __OUTPUT_FORMAT__,elf
 13.1047 -section .note.GNU-stack noalloc noexec nowrite progbits
 13.1048 -%endif
    14.1 --- a/src/video/SDL_blit.c	Wed Aug 15 04:04:17 2007 +0000
    14.2 +++ b/src/video/SDL_blit.c	Wed Aug 15 08:21:10 2007 +0000
    14.3 @@ -24,6 +24,7 @@
    14.4  #include "SDL_video.h"
    14.5  #include "SDL_sysvideo.h"
    14.6  #include "SDL_blit.h"
    14.7 +#include "SDL_blit_copy.h"
    14.8  #include "SDL_RLEaccel_c.h"
    14.9  #include "SDL_pixels_c.h"
   14.10  
   14.11 @@ -106,111 +107,64 @@
   14.12      return (okay ? 0 : -1);
   14.13  }
   14.14  
   14.15 -#ifdef MMX_ASMBLIT
   14.16 -static __inline__ void
   14.17 -SDL_memcpyMMX(Uint8 * to, const Uint8 * from, int len)
   14.18 +#ifdef __MACOSX__
   14.19 +#include <sys/sysctl.h>
   14.20 +
   14.21 +static SDL_bool SDL_UseAltivecPrefetch()
   14.22 +{
   14.23 +    const char key[] = "hw.l3cachesize";
   14.24 +    u_int64_t result = 0;
   14.25 +    size_t typeSize = sizeof(result);
   14.26 +
   14.27 +    if (sysctlbyname(key, &result, &typeSize, NULL, 0) == 0 && result > 0) {
   14.28 +        return SDL_TRUE;
   14.29 +    } else {
   14.30 +        return SDL_FALSE;
   14.31 +    }
   14.32 +}
   14.33 +#else
   14.34 +static SDL_bool SDL_UseAltivecPrefetch()
   14.35 +{
   14.36 +    /* Just guess G4 */
   14.37 +    return SDL_TRUE;
   14.38 +}
   14.39 +#endif /* __MACOSX__ */
   14.40 +
   14.41 +static SDL_loblit SDL_ChooseBlitFunc(SDL_BlitEntry *entries, int count)
   14.42  {
   14.43      int i;
   14.44 +    static Uint32 features = 0xffffffff;
   14.45  
   14.46 -    for (i = 0; i < len / 8; i++) {
   14.47 -        __asm__ __volatile__("	movq (%0), %%mm0\n"
   14.48 -                             "	movq %%mm0, (%1)\n"::"r"(from),
   14.49 -                             "r"(to):"memory");
   14.50 -        from += 8;
   14.51 -        to += 8;
   14.52 -    }
   14.53 -    if (len & 7)
   14.54 -        SDL_memcpy(to, from, len & 7);
   14.55 -}
   14.56 +    if (features == 0xffffffff) {
   14.57 +        features = SDL_BLIT_ANY;
   14.58  
   14.59 -static __inline__ void
   14.60 -SDL_memcpySSE(Uint8 * to, const Uint8 * from, int len)
   14.61 -{
   14.62 -    int i;
   14.63 -
   14.64 -    __asm__ __volatile__("	prefetchnta (%0)\n"
   14.65 -                         "	prefetchnta 64(%0)\n"
   14.66 -                         "	prefetchnta 128(%0)\n"
   14.67 -                         "	prefetchnta 192(%0)\n"::"r"(from));
   14.68 -
   14.69 -    for (i = 0; i < len / 8; i++) {
   14.70 -        __asm__ __volatile__("	prefetchnta 256(%0)\n"
   14.71 -                             "	movq (%0), %%mm0\n"
   14.72 -                             "	movntq %%mm0, (%1)\n"::"r"(from),
   14.73 -                             "r"(to):"memory");
   14.74 -        from += 8;
   14.75 -        to += 8;
   14.76 -    }
   14.77 -    if (len & 7)
   14.78 -        SDL_memcpy(to, from, len & 7);
   14.79 -}
   14.80 -#endif
   14.81 -
   14.82 -static void
   14.83 -SDL_BlitCopy(SDL_BlitInfo * info)
   14.84 -{
   14.85 -    Uint8 *src, *dst;
   14.86 -    int w, h;
   14.87 -    int srcskip, dstskip;
   14.88 -
   14.89 -    w = info->d_width * info->dst->BytesPerPixel;
   14.90 -    h = info->d_height;
   14.91 -    src = info->s_pixels;
   14.92 -    dst = info->d_pixels;
   14.93 -    srcskip = w + info->s_skip;
   14.94 -    dstskip = w + info->d_skip;
   14.95 -#ifdef MMX_ASMBLIT
   14.96 -    if (SDL_HasSSE()) {
   14.97 -        while (h--) {
   14.98 -            SDL_memcpySSE(dst, src, w);
   14.99 -            src += srcskip;
  14.100 -            dst += dstskip;
  14.101 -        }
  14.102 -        __asm__ __volatile__("	emms\n"::);
  14.103 -    } else if (SDL_HasMMX()) {
  14.104 -        while (h--) {
  14.105 -            SDL_memcpyMMX(dst, src, w);
  14.106 -            src += srcskip;
  14.107 -            dst += dstskip;
  14.108 -        }
  14.109 -        __asm__ __volatile__("	emms\n"::);
  14.110 -    } else
  14.111 -#endif
  14.112 -        while (h--) {
  14.113 -            SDL_memcpy(dst, src, w);
  14.114 -            src += srcskip;
  14.115 -            dst += dstskip;
  14.116 -        }
  14.117 -}
  14.118 -
  14.119 -static void
  14.120 -SDL_BlitCopyOverlap(SDL_BlitInfo * info)
  14.121 -{
  14.122 -    Uint8 *src, *dst;
  14.123 -    int w, h;
  14.124 -    int srcskip, dstskip;
  14.125 -
  14.126 -    w = info->d_width * info->dst->BytesPerPixel;
  14.127 -    h = info->d_height;
  14.128 -    src = info->s_pixels;
  14.129 -    dst = info->d_pixels;
  14.130 -    srcskip = w + info->s_skip;
  14.131 -    dstskip = w + info->d_skip;
  14.132 -    if (dst < src) {
  14.133 -        while (h--) {
  14.134 -            SDL_memcpy(dst, src, w);
  14.135 -            src += srcskip;
  14.136 -            dst += dstskip;
  14.137 -        }
  14.138 -    } else {
  14.139 -        src += ((h - 1) * srcskip);
  14.140 -        dst += ((h - 1) * dstskip);
  14.141 -        while (h--) {
  14.142 -            SDL_revcpy(dst, src, w);
  14.143 -            src -= srcskip;
  14.144 -            dst -= dstskip;
  14.145 +        /* Provide an override for testing .. */
  14.146 +        const char *override = SDL_getenv("SDL_BLIT_FEATURES");
  14.147 +        if (override) {
  14.148 +            SDL_sscanf(override, "%u", &features);
  14.149 +        } else {
  14.150 +            if (SDL_HasMMX()) {
  14.151 +                features |= SDL_BLIT_MMX;
  14.152 +            }
  14.153 +            if (SDL_HasSSE()) {
  14.154 +                features |= SDL_BLIT_SSE;
  14.155 +            }
  14.156 +            if (SDL_HasAltivec()) {
  14.157 +                if (SDL_UseAltivecPrefetch()) {
  14.158 +                    features |= SDL_BLIT_ALTIVEC_PREFETCH;
  14.159 +                } else {
  14.160 +                    features |= SDL_BLIT_ALTIVEC_NOPREFETCH;
  14.161 +                }
  14.162 +            }
  14.163          }
  14.164      }
  14.165 +
  14.166 +    for (i = count; i > 0; --i) {
  14.167 +        if (features & entries[i].features) {
  14.168 +            return entries[i].blit;
  14.169 +        }
  14.170 +    }
  14.171 +    return entries[0].blit;
  14.172  }
  14.173  
  14.174  /* Figure out which of many blit routines to set up on a surface */
  14.175 @@ -237,11 +191,11 @@
  14.176  
  14.177      /* Check for special "identity" case -- copy blit */
  14.178      if (surface->map->identity && blit_index == 0) {
  14.179 -        surface->map->sw_data->blit = SDL_BlitCopy;
  14.180 -
  14.181          /* Handle overlapping blits on the same surface */
  14.182          if (surface == surface->map->dst) {
  14.183              surface->map->sw_data->blit = SDL_BlitCopyOverlap;
  14.184 +        } else {
  14.185 +            surface->map->sw_data->blit = SDL_BlitCopy;
  14.186          }
  14.187      } else {
  14.188          if (surface->format->BitsPerPixel < 8) {
    15.1 --- a/src/video/SDL_blit.h	Wed Aug 15 04:04:17 2007 +0000
    15.2 +++ b/src/video/SDL_blit.h	Wed Aug 15 08:21:10 2007 +0000
    15.3 @@ -67,6 +67,17 @@
    15.4      unsigned int format_version;
    15.5  } SDL_BlitMap;
    15.6  
    15.7 +#define SDL_BLIT_ANY                0x00000000
    15.8 +#define SDL_BLIT_MMX                0x00000001
    15.9 +#define SDL_BLIT_SSE                0x00000002
   15.10 +#define SDL_BLIT_ALTIVEC_PREFETCH   0x00000004
   15.11 +#define SDL_BLIT_ALTIVEC_NOPREFETCH 0x00000008
   15.12 +
   15.13 +typedef struct SDL_BlitEntry
   15.14 +{
   15.15 +    Uint32 features;
   15.16 +    SDL_loblit blit;
   15.17 +} SDL_BlitEntry;
   15.18  
   15.19  /* Functions found in SDL_blit.c */
   15.20  extern int SDL_CalculateBlit(SDL_Surface * surface);
    16.1 --- a/src/video/SDL_blit_N.c	Wed Aug 15 04:04:17 2007 +0000
    16.2 +++ b/src/video/SDL_blit_N.c	Wed Aug 15 08:21:10 2007 +0000
    16.3 @@ -879,19 +879,6 @@
    16.4  #define LO	1
    16.5  #endif
    16.6  
    16.7 -#if SDL_HERMES_BLITTERS
    16.8 -
    16.9 -/* Heheheh, we coerce Hermes into using SDL blit information */
   16.10 -#define X86_ASSEMBLER
   16.11 -#define HermesConverterInterface	SDL_BlitInfo
   16.12 -#define HermesClearInterface		void
   16.13 -#define STACKCALL
   16.14 -
   16.15 -#include "../hermes/HeadMMX.h"
   16.16 -#include "../hermes/HeadX86.h"
   16.17 -
   16.18 -#else
   16.19 -
   16.20  /* Special optimized blit for RGB 8-8-8 --> RGB 3-3-2 */
   16.21  #define RGB888_RGB332(dst, src) { \
   16.22  	dst = (Uint8)((((src)&0x00E00000)>>16)| \
   16.23 @@ -1250,8 +1237,6 @@
   16.24  #endif /* USE_DUFFS_LOOP */
   16.25  }
   16.26  
   16.27 -#endif /* SDL_HERMES_BLITTERS */
   16.28 -
   16.29  
   16.30  /* Special optimized blit for RGB 5-6-5 --> 32-bit RGB surfaces */
   16.31  #define RGB565_32(dst, src, map) (map[src[LO]*2] + map[src[HI]*2+1])
   16.32 @@ -2357,17 +2342,7 @@
   16.33      {0, 0, 0, 0, 0, 0, 0, 0, NULL, NULL},
   16.34  };
   16.35  static const struct blit_table normal_blit_2[] = {
   16.36 -#if SDL_HERMES_BLITTERS
   16.37 -    {0x0000F800, 0x000007E0, 0x0000001F, 2, 0x0000001F, 0x000007E0,
   16.38 -     0x0000F800,
   16.39 -     0, ConvertX86p16_16BGR565, ConvertX86, NO_ALPHA},
   16.40 -    {0x0000F800, 0x000007E0, 0x0000001F, 2, 0x00007C00, 0x000003E0,
   16.41 -     0x0000001F,
   16.42 -     0, ConvertX86p16_16RGB555, ConvertX86, NO_ALPHA},
   16.43 -    {0x0000F800, 0x000007E0, 0x0000001F, 2, 0x0000001F, 0x000003E0,
   16.44 -     0x00007C00,
   16.45 -     0, ConvertX86p16_16BGR555, ConvertX86, NO_ALPHA},
   16.46 -#elif SDL_ALTIVEC_BLITTERS
   16.47 +#if SDL_ALTIVEC_BLITTERS
   16.48      /* has-altivec */
   16.49      {0x0000F800, 0x000007E0, 0x0000001F, 4, 0x00000000, 0x00000000,
   16.50       0x00000000,
   16.51 @@ -2397,47 +2372,6 @@
   16.52      {0, 0, 0, 0, 0, 0, 0, 0, NULL, BlitNtoN, 0}
   16.53  };
   16.54  static const struct blit_table normal_blit_4[] = {
   16.55 -#if SDL_HERMES_BLITTERS
   16.56 -    {0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x0000F800, 0x000007E0,
   16.57 -     0x0000001F,
   16.58 -     1, ConvertMMXpII32_16RGB565, ConvertMMX, NO_ALPHA},
   16.59 -    {0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x0000F800, 0x000007E0,
   16.60 -     0x0000001F,
   16.61 -     0, ConvertX86p32_16RGB565, ConvertX86, NO_ALPHA},
   16.62 -    {0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x0000001F, 0x000007E0,
   16.63 -     0x0000F800,
   16.64 -     1, ConvertMMXpII32_16BGR565, ConvertMMX, NO_ALPHA},
   16.65 -    {0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x0000001F, 0x000007E0,
   16.66 -     0x0000F800,
   16.67 -     0, ConvertX86p32_16BGR565, ConvertX86, NO_ALPHA},
   16.68 -    {0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x00007C00, 0x000003E0,
   16.69 -     0x0000001F,
   16.70 -     1, ConvertMMXpII32_16RGB555, ConvertMMX, NO_ALPHA},
   16.71 -    {0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x00007C00, 0x000003E0,
   16.72 -     0x0000001F,
   16.73 -     0, ConvertX86p32_16RGB555, ConvertX86, NO_ALPHA},
   16.74 -    {0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x0000001F, 0x000003E0,
   16.75 -     0x00007C00,
   16.76 -     1, ConvertMMXpII32_16BGR555, ConvertMMX, NO_ALPHA},
   16.77 -    {0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x0000001F, 0x000003E0,
   16.78 -     0x00007C00,
   16.79 -     0, ConvertX86p32_16BGR555, ConvertX86, NO_ALPHA},
   16.80 -    {0x00FF0000, 0x0000FF00, 0x000000FF, 3, 0x00FF0000, 0x0000FF00,
   16.81 -     0x000000FF,
   16.82 -     0, ConvertX86p32_24RGB888, ConvertX86, NO_ALPHA},
   16.83 -    {0x00FF0000, 0x0000FF00, 0x000000FF, 3, 0x000000FF, 0x0000FF00,
   16.84 -     0x00FF0000,
   16.85 -     0, ConvertX86p32_24BGR888, ConvertX86, NO_ALPHA},
   16.86 -    {0x00FF0000, 0x0000FF00, 0x000000FF, 4, 0x000000FF, 0x0000FF00,
   16.87 -     0x00FF0000,
   16.88 -     0, ConvertX86p32_32BGR888, ConvertX86, NO_ALPHA},
   16.89 -    {0x00FF0000, 0x0000FF00, 0x000000FF, 4, 0xFF000000, 0x00FF0000,
   16.90 -     0x0000FF00,
   16.91 -     0, ConvertX86p32_32RGBA888, ConvertX86, NO_ALPHA},
   16.92 -    {0x00FF0000, 0x0000FF00, 0x000000FF, 4, 0x0000FF00, 0x00FF0000,
   16.93 -     0xFF000000,
   16.94 -     0, ConvertX86p32_32BGRA888, ConvertX86, NO_ALPHA},
   16.95 -#else
   16.96  #if SDL_ALTIVEC_BLITTERS
   16.97      /* has-altivec | dont-use-prefetch */
   16.98      {0x00000000, 0x00000000, 0x00000000, 4, 0x00000000, 0x00000000,
   16.99 @@ -2460,7 +2394,6 @@
  16.100      {0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x00007C00, 0x000003E0,
  16.101       0x0000001F,
  16.102       0, NULL, Blit_RGB888_RGB555, NO_ALPHA},
  16.103 -#endif
  16.104      /* Default for 32-bit RGB source, used if no other blitter matches */
  16.105      {0, 0, 0, 0, 0, 0, 0, 0, NULL, BlitNtoN, 0}
  16.106  };
  16.107 @@ -2529,12 +2462,7 @@
  16.108              if (surface->map->table) {
  16.109                  blitfun = Blit_RGB888_index8_map;
  16.110              } else {
  16.111 -#if SDL_HERMES_BLITTERS
  16.112 -                sdata->aux_data = ConvertX86p32_8RGB332;
  16.113 -                blitfun = ConvertX86;
  16.114 -#else
  16.115                  blitfun = Blit_RGB888_index8;
  16.116 -#endif
  16.117              }
  16.118          } else {
  16.119              blitfun = BlitNto1;
  16.120 @@ -2575,13 +2503,6 @@
  16.121      }
  16.122  
  16.123  #ifdef DEBUG_ASM
  16.124 -#if SDL_HERMES_BLITTERS
  16.125 -    if (blitfun == ConvertMMX)
  16.126 -        fprintf(stderr, "Using mmx blit\n");
  16.127 -    else if (blitfun == ConvertX86)
  16.128 -        fprintf(stderr, "Using asm blit\n");
  16.129 -    else
  16.130 -#endif
  16.131      if ((blitfun == BlitNtoN) || (blitfun == BlitNto1))
  16.132          fprintf(stderr, "Using C blit\n");
  16.133      else
    17.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    17.2 +++ b/src/video/SDL_blit_copy.c	Wed Aug 15 08:21:10 2007 +0000
    17.3 @@ -0,0 +1,164 @@
    17.4 +/*
    17.5 +    SDL - Simple DirectMedia Layer
    17.6 +    Copyright (C) 1997-2006 Sam Lantinga
    17.7 +
    17.8 +    This library is free software; you can redistribute it and/or
    17.9 +    modify it under the terms of the GNU Lesser General Public
   17.10 +    License as published by the Free Software Foundation; either
   17.11 +    version 2.1 of the License, or (at your option) any later version.
   17.12 +
   17.13 +    This library is distributed in the hope that it will be useful,
   17.14 +    but WITHOUT ANY WARRANTY; without even the implied warranty of
   17.15 +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   17.16 +    Lesser General Public License for more details.
   17.17 +
   17.18 +    You should have received a copy of the GNU Lesser General Public
   17.19 +    License along with this library; if not, write to the Free Software
   17.20 +    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
   17.21 +
   17.22 +    Sam Lantinga
   17.23 +    slouken@libsdl.org
   17.24 +*/
   17.25 +#include "SDL_config.h"
   17.26 +
   17.27 +#include "SDL_video.h"
   17.28 +#include "SDL_blit.h"
   17.29 +
   17.30 +/* The MMX/SSE intrinsics don't give access to specific registers for
   17.31 +   the most memory parallelism, so we'll use GCC inline assembly here...
   17.32 +*/
   17.33 +#ifndef __GNUC__
   17.34 +#undef __MMX__
   17.35 +#undef __SSE__
   17.36 +#endif
   17.37 +
   17.38 +#ifdef __MMX__
   17.39 +static __inline__ void
   17.40 +SDL_memcpyMMX(Uint8 *dst, const Uint8 *src, int len)
   17.41 +{
   17.42 +    int i;
   17.43 +
   17.44 +    for (i = len / 64; i--;) {
   17.45 +        __asm__ __volatile__ (
   17.46 +        "prefetchnta (%0)\n"
   17.47 +        "movq (%0), %%mm0\n"
   17.48 +        "movq 8(%0), %%mm1\n"
   17.49 +        "movq 16(%0), %%mm2\n"
   17.50 +        "movq 24(%0), %%mm3\n"
   17.51 +        "movq 32(%0), %%mm4\n"
   17.52 +        "movq 40(%0), %%mm5\n"
   17.53 +        "movq 48(%0), %%mm6\n"
   17.54 +        "movq 56(%0), %%mm7\n"
   17.55 +        "movntq %%mm0, (%1)\n"
   17.56 +        "movntq %%mm1, 8(%1)\n"
   17.57 +        "movntq %%mm2, 16(%1)\n"
   17.58 +        "movntq %%mm3, 24(%1)\n"
   17.59 +        "movntq %%mm4, 32(%1)\n"
   17.60 +        "movntq %%mm5, 40(%1)\n"
   17.61 +        "movntq %%mm6, 48(%1)\n"
   17.62 +        "movntq %%mm7, 56(%1)\n"
   17.63 +        :: "r" (src), "r" (dst) : "memory");
   17.64 +        src += 64;
   17.65 +        dst += 64;
   17.66 +    }
   17.67 +    if (len & 63)
   17.68 +        SDL_memcpy(dst, src, len & 63);
   17.69 +}
   17.70 +#endif /* __MMX__ */
   17.71 +
   17.72 +#ifdef __SSE__
   17.73 +static __inline__ void
   17.74 +SDL_memcpySSE(Uint8 *dst, const Uint8 *src, int len)
   17.75 +{
   17.76 +    int i;
   17.77 +
   17.78 +    for (i = len / 64; i--;) {
   17.79 +        __asm__ __volatile__ (
   17.80 +        "prefetchnta (%0)\n"
   17.81 +        "movaps (%0), %%xmm0\n"
   17.82 +        "movaps 16(%0), %%xmm1\n"
   17.83 +        "movaps 32(%0), %%xmm2\n"
   17.84 +        "movaps 48(%0), %%xmm3\n"
   17.85 +        "movntps %%xmm0, (%1)\n"
   17.86 +        "movntps %%xmm1, 16(%1)\n"
   17.87 +        "movntps %%xmm2, 32(%1)\n"
   17.88 +        "movntps %%xmm3, 48(%1)\n"
   17.89 +        :: "r" (src), "r" (dst) : "memory");
   17.90 +        src += 64;
   17.91 +        dst += 64;
   17.92 +    }
   17.93 +    if (len & 63)
   17.94 +        SDL_memcpy(dst, src, len & 63);
   17.95 +}
   17.96 +#endif /* __SSE__ */
   17.97 +
   17.98 +void
   17.99 +SDL_BlitCopy(SDL_BlitInfo * info)
  17.100 +{
  17.101 +    Uint8 *src, *dst;
  17.102 +    int w, h;
  17.103 +    int srcskip, dstskip;
  17.104 +
  17.105 +    w = info->d_width * info->dst->BytesPerPixel;
  17.106 +    h = info->d_height;
  17.107 +    src = info->s_pixels;
  17.108 +    dst = info->d_pixels;
  17.109 +    srcskip = w + info->s_skip;
  17.110 +    dstskip = w + info->d_skip;
  17.111 +
  17.112 +#ifdef __SSE__
  17.113 +    if (SDL_HasSSE() && !((uintptr_t)src & 15) && !((uintptr_t)dst & 15)) {
  17.114 +        while (h--) {
  17.115 +            SDL_memcpySSE(dst, src, w);
  17.116 +            src += srcskip;
  17.117 +            dst += dstskip;
  17.118 +        }
  17.119 +        return;
  17.120 +    }
  17.121 +#endif
  17.122 +
  17.123 +#ifdef __MMX__
  17.124 +    if (SDL_HasMMX() && !((uintptr_t)src & 7) && !((uintptr_t)dst & 7)) {
  17.125 +        while (h--) {
  17.126 +            SDL_memcpyMMX(dst, src, w);
  17.127 +            src += srcskip;
  17.128 +            dst += dstskip;
  17.129 +        }
  17.130 +        __asm__ __volatile__("	emms\n"::);
  17.131 +        return;
  17.132 +    }
  17.133 +#endif
  17.134 +
  17.135 +    while (h--) {
  17.136 +        SDL_memcpy(dst, src, w);
  17.137 +        src += srcskip;
  17.138 +        dst += dstskip;
  17.139 +    }
  17.140 +}
  17.141 +
  17.142 +void
  17.143 +SDL_BlitCopyOverlap(SDL_BlitInfo * info)
  17.144 +{
  17.145 +    Uint8 *src, *dst;
  17.146 +    int w, h;
  17.147 +    int skip;
  17.148 +
  17.149 +    w = info->d_width * info->dst->BytesPerPixel;
  17.150 +    h = info->d_height;
  17.151 +    src = info->s_pixels;
  17.152 +    dst = info->d_pixels;
  17.153 +    skip = w + info->s_skip;
  17.154 +    if ((dst < src) || (dst >= (src + h*skip))) {
  17.155 +        SDL_BlitCopy(info);
  17.156 +    } else {
  17.157 +        src += ((h - 1) * skip);
  17.158 +        dst += ((h - 1) * skip);
  17.159 +        while (h--) {
  17.160 +            SDL_revcpy(dst, src, w);
  17.161 +            src -= skip;
  17.162 +            dst -= skip;
  17.163 +        }
  17.164 +    }
  17.165 +}
  17.166 +
  17.167 +/* vi: set ts=4 sw=4 expandtab: */
    18.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    18.2 +++ b/src/video/SDL_blit_copy.h	Wed Aug 15 08:21:10 2007 +0000
    18.3 @@ -0,0 +1,26 @@
    18.4 +/*
    18.5 +    SDL - Simple DirectMedia Layer
    18.6 +    Copyright (C) 1997-2006 Sam Lantinga
    18.7 +
    18.8 +    This library is free software; you can redistribute it and/or
    18.9 +    modify it under the terms of the GNU Lesser General Public
   18.10 +    License as published by the Free Software Foundation; either
   18.11 +    version 2.1 of the License, or (at your option) any later version.
   18.12 +
   18.13 +    This library is distributed in the hope that it will be useful,
   18.14 +    but WITHOUT ANY WARRANTY; without even the implied warranty of
   18.15 +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   18.16 +    Lesser General Public License for more details.
   18.17 +
   18.18 +    You should have received a copy of the GNU Lesser General Public
   18.19 +    License along with this library; if not, write to the Free Software
   18.20 +    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
   18.21 +
   18.22 +    Sam Lantinga
   18.23 +    slouken@libsdl.org
   18.24 +*/
   18.25 +
   18.26 +void SDL_BlitCopy(SDL_BlitInfo * info);
   18.27 +void SDL_BlitCopyOverlap(SDL_BlitInfo * info);
   18.28 +
   18.29 +/* vi: set ts=4 sw=4 expandtab: */