src/audio/SDL_mixer_MMX_VC.c
author Sam Lantinga <slouken@libsdl.org>
Mon, 29 May 2006 04:04:35 +0000
branchSDL-1.3
changeset 1668 4da1ee79c9af
parent 1662 782fd950bd46
child 1720 a1ebb17f9c52
permissions -rw-r--r--
more tweaking indent options
     1 /*
     2     SDL - Simple DirectMedia Layer
     3     Copyright (C) 1997-2006 Sam Lantinga
     4 
     5     This library is free software; you can redistribute it and/or
     6     modify it under the terms of the GNU Lesser General Public
     7     License as published by the Free Software Foundation; either
     8     version 2.1 of the License, or (at your option) any later version.
     9 
    10     This library is distributed in the hope that it will be useful,
    11     but WITHOUT ANY WARRANTY; without even the implied warranty of
    12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    13     Lesser General Public License for more details.
    14 
    15     You should have received a copy of the GNU Lesser General Public
    16     License along with this library; if not, write to the Free Software
    17     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
    18 
    19     Sam Lantinga
    20     slouken@libsdl.org
    21 */
    22 #include "SDL_config.h"
    23 
    24 #include "SDL_mixer_MMX_VC.h"
    25 
    26 #if ((defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__)) && defined(SDL_ASSEMBLY_ROUTINES)
    27 // MMX assembler version of SDL_MixAudio for signed little endian 16 bit samples and signed 8 bit samples
    28 // Copyright 2002 Stephane Marchesin (stephane.marchesin@wanadoo.fr)
    29 // Converted to Intel ASM notation by Cth
    30 // This code is licensed under the LGPL (see COPYING for details)
    31 // 
    32 // Assumes buffer size in bytes is a multiple of 16
    33 // Assumes SDL_MIX_MAXVOLUME = 128
    34 
    35 
    36 ////////////////////////////////////////////////
    37 // Mixing for 16 bit signed buffers
    38 ////////////////////////////////////////////////
    39 
    40 void
    41 SDL_MixAudio_MMX_S16_VC(char *dst, char *src, unsigned int nSize, int volume)
    42 {
    43     __asm {
    44 
    45         push edi push esi push ebx mov edi, dst // edi = dst
    46           mov esi, src          // esi = src
    47           mov eax, volume       // eax = volume
    48           mov ebx, nSize        // ebx = size
    49           shr ebx, 4            // process 16 bytes per iteration = 8 samples
    50           jz endS16 pxor mm0, mm0 movd mm0, eax //%%eax,%%mm0
    51           movq mm1, mm0         //%%mm0,%%mm1
    52           psllq mm0, 16         //$16,%%mm0
    53           por mm0, mm1          //%%mm1,%%mm0
    54           psllq mm0, 16         //$16,%%mm0
    55           por mm0, mm1          //%%mm1,%%mm0
    56           psllq mm0, 16         //$16,%%mm0
    57           por mm0, mm1          //%%mm1,%%mm0                   // mm0 = vol|vol|vol|vol
    58 #ifndef __WATCOMC__
    59           align 16
    60 #endif
    61           mixloopS16:movq mm1,[esi]     //(%%esi),%%mm1\n" // mm1 = a|b|c|d
    62         movq mm2, mm1           //%%mm1,%%mm2\n" // mm2 = a|b|c|d
    63           movq mm4,[esi + 8]    //8(%%esi),%%mm4\n" // mm4 = e|f|g|h
    64             // pre charger le buffer dst dans mm7
    65         movq mm7,[edi]          //(%%edi),%%mm7\n" // mm7 = dst[0]"
    66             // multiplier par le volume
    67         pmullw mm1, mm0         //%%mm0,%%mm1\n" // mm1 = l(a*v)|l(b*v)|l(c*v)|l(d*v)
    68           pmulhw mm2, mm0       //%%mm0,%%mm2\n" // mm2 = h(a*v)|h(b*v)|h(c*v)|h(d*v)
    69           movq mm5, mm4         //%%mm4,%%mm5\n" // mm5 = e|f|g|h
    70           pmullw mm4, mm0       //%%mm0,%%mm4\n" // mm4 = l(e*v)|l(f*v)|l(g*v)|l(h*v)
    71           pmulhw mm5, mm0       //%%mm0,%%mm5\n" // mm5 = h(e*v)|h(f*v)|h(g*v)|h(h*v)
    72           movq mm3, mm1         //%%mm1,%%mm3\n" // mm3 = l(a*v)|l(b*v)|l(c*v)|l(d*v)
    73           punpckhwd mm1, mm2    //%%mm2,%%mm1\n" // mm1 = a*v|b*v
    74           movq mm6, mm4         //%%mm4,%%mm6\n" // mm6 = l(e*v)|l(f*v)|l(g*v)|l(h*v)
    75           punpcklwd mm3, mm2    //%%mm2,%%mm3\n" // mm3 = c*v|d*v
    76           punpckhwd mm4, mm5    //%%mm5,%%mm4\n" // mm4 = e*f|f*v
    77           punpcklwd mm6, mm5    //%%mm5,%%mm6\n" // mm6 = g*v|h*v
    78             // pre charger le buffer dst dans mm5
    79           movq mm5,[edi + 8]    //8(%%edi),%%mm5\n" // mm5 = dst[1]
    80             // diviser par 128
    81         psrad mm1, 7            //$7,%%mm1\n" // mm1 = a*v/128|b*v/128 , 128 = SDL_MIX_MAXVOLUME
    82           add esi, 16           //$16,%%esi\n"
    83           psrad mm3, 7          //$7,%%mm3\n" // mm3 = c*v/128|d*v/128
    84           psrad mm4, 7          //$7,%%mm4\n" // mm4 = e*v/128|f*v/128
    85             // mm1 = le sample avec le volume modifie
    86           packssdw mm3, mm1     //%%mm1,%%mm3\n" // mm3 = s(a*v|b*v|c*v|d*v)
    87           psrad mm6, 7          //$7,%%mm6\n" // mm6= g*v/128|h*v/128
    88           paddsw mm3, mm7       //%%mm7,%%mm3\n" // mm3 = adjust_volume(src)+dst
    89             // mm4 = le sample avec le volume modifie
    90           packssdw mm6, mm4     //%%mm4,%%mm6\n" // mm6 = s(e*v|f*v|g*v|h*v)
    91           movq[edi], mm3        //%%mm3,(%%edi)\n"
    92           paddsw mm6, mm5       //%%mm5,%%mm6\n" // mm6 = adjust_volume(src)+dst
    93           movq[edi + 8], mm6    //%%mm6,8(%%edi)\n"
    94           add edi, 16           //$16,%%edi\n"
    95           dec ebx               //%%ebx\n"
    96       jnz mixloopS16 ends16:emms pop ebx pop esi pop edi}
    97 
    98 }
    99 
   100 ////////////////////////////////////////////////
   101 // Mixing for 8 bit signed buffers
   102 ////////////////////////////////////////////////
   103 
   104 void
   105 SDL_MixAudio_MMX_S8_VC(char *dst, char *src, unsigned int nSize, int volume)
   106 {
   107     _asm {
   108 
   109         push edi push esi push ebx mov edi, dst //movl  %0,%%edi        // edi = dst
   110           mov esi, src          //%1,%%esi      // esi = src
   111           mov eax, volume       //%3,%%eax      // eax = volume
   112           movd mm0, eax         //%%eax,%%mm0
   113           movq mm1, mm0         //%%mm0,%%mm1
   114           psllq mm0, 16         //$16,%%mm0
   115           por mm0, mm1          //%%mm1,%%mm0
   116           psllq mm0, 16         //$16,%%mm0
   117           por mm0, mm1          //%%mm1,%%mm0
   118           psllq mm0, 16         //$16,%%mm0
   119           por mm0, mm1          //%%mm1,%%mm0
   120           mov ebx, nSize        //%2,%%ebx      // ebx = size
   121           shr ebx, 3            //$3,%%ebx      // process 8 bytes per iteration = 8 samples
   122           cmp ebx, 0            //$0,%%ebx
   123           je endS8
   124 #ifndef __WATCOMC__
   125           align 16
   126 #endif
   127           mixloopS8:pxor mm2, mm2       //%%mm2,%%mm2           // mm2 = 0
   128           movq mm1,[esi]        //(%%esi),%%mm1 // mm1 = a|b|c|d|e|f|g|h
   129         movq mm3, mm1           //%%mm1,%%mm3   // mm3 = a|b|c|d|e|f|g|h
   130             // on va faire le "sign extension" en faisant un cmp avec 0 qui retourne 1 si <0, 0 si >0
   131           pcmpgtb mm2, mm1      //%%mm1,%%mm2   // mm2 = 11111111|00000000|00000000....
   132           punpckhbw mm1, mm2    //%%mm2,%%mm1   // mm1 = 0|a|0|b|0|c|0|d
   133           punpcklbw mm3, mm2    //%%mm2,%%mm3   // mm3 = 0|e|0|f|0|g|0|h
   134           movq mm2,[edi]        //(%%edi),%%mm2 // mm2 = destination
   135         pmullw mm1, mm0         //%%mm0,%%mm1   // mm1 = v*a|v*b|v*c|v*d
   136           add esi, 8            //$8,%%esi
   137           pmullw mm3, mm0       //%%mm0,%%mm3   // mm3 = v*e|v*f|v*g|v*h
   138           psraw mm1, 7          //$7,%%mm1              // mm1 = v*a/128|v*b/128|v*c/128|v*d/128 
   139           psraw mm3, 7          //$7,%%mm3              // mm3 = v*e/128|v*f/128|v*g/128|v*h/128
   140           packsswb mm3, mm1     //%%mm1,%%mm3   // mm1 = v*a/128|v*b/128|v*c/128|v*d/128|v*e/128|v*f/128|v*g/128|v*h/128
   141           paddsb mm3, mm2       //%%mm2,%%mm3   // add to destination buffer
   142           movq[edi], mm3        //%%mm3,(%%edi) // store back to ram
   143           add edi, 8            //$8,%%edi
   144           dec ebx               //%%ebx
   145       jnz mixloopS8 endS8:emms pop ebx pop esi pop edi}
   146 }
   147 
   148 #endif                          /* SDL_ASSEMBLY_ROUTINES */
   149 /* vi: set ts=4 sw=4 expandtab: */