src/audio/SDL_mixer_MMX.c
author Ozkan Sezer <sezeroz@gmail.com>
Sat, 24 Mar 2018 22:41:17 +0300
branchSDL-1.2
changeset 11972 0b5ac2b90ab1
parent 6137 4720145f848b
permissions -rw-r--r--
backfort fix for bug #3739: handle %lu, %li and %ld in SDL_SetError.
slouken@1402
     1
/*
slouken@1402
     2
    SDL - Simple DirectMedia Layer
slouken@6137
     3
    Copyright (C) 1997-2012 Sam Lantinga
slouken@1402
     4
slouken@1402
     5
    This library is free software; you can redistribute it and/or
slouken@1402
     6
    modify it under the terms of the GNU Lesser General Public
slouken@1402
     7
    License as published by the Free Software Foundation; either
slouken@1402
     8
    version 2.1 of the License, or (at your option) any later version.
slouken@1402
     9
slouken@1402
    10
    This library is distributed in the hope that it will be useful,
slouken@1402
    11
    but WITHOUT ANY WARRANTY; without even the implied warranty of
slouken@1402
    12
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
slouken@1402
    13
    Lesser General Public License for more details.
slouken@1402
    14
slouken@1402
    15
    You should have received a copy of the GNU Lesser General Public
slouken@1402
    16
    License along with this library; if not, write to the Free Software
slouken@1402
    17
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
slouken@1402
    18
slouken@1402
    19
    Sam Lantinga
slouken@1402
    20
    slouken@libsdl.org
slouken@1402
    21
*/
slouken@1402
    22
#include "SDL_config.h"
slouken@1402
    23
slouken@1019
    24
/*
slouken@1019
    25
    MMX assembler version of SDL_MixAudio for signed little endian 16 bit samples and signed 8 bit samples
slouken@1019
    26
    Copyright 2002 Stephane Marchesin (stephane.marchesin@wanadoo.fr)
slouken@1019
    27
    This code is licensed under the LGPL (see COPYING for details)
slouken@1019
    28
 
slouken@1019
    29
    Assumes buffer size in bytes is a multiple of 16
slouken@1019
    30
    Assumes SDL_MIX_MAXVOLUME = 128
slouken@1019
    31
*/
slouken@539
    32
slouken@539
    33
slouken@1019
    34
/***********************************************
slouken@1019
    35
*   Mixing for 16 bit signed buffers
slouken@1019
    36
***********************************************/
slouken@539
    37
icculus@4322
    38
#if defined(SDL_BUGGY_MMX_MIXERS) /* buggy, so we're disabling them. --ryan. */
slouken@1402
    39
#if defined(__GNUC__) && defined(__i386__) && defined(SDL_ASSEMBLY_ROUTINES)
slouken@539
    40
void SDL_MixAudio_MMX_S16(char* dst,char* src,unsigned int size,int volume)
slouken@539
    41
{
slouken@539
    42
    __asm__ __volatile__ (
slouken@539
    43
slouken@1612
    44
"	movl %3,%%eax\n"	/* eax = volume */
slouken@539
    45
slouken@1612
    46
"	movl %2,%%edx\n"	/* edx = size */
slouken@539
    47
slouken@1612
    48
"	shrl $4,%%edx\n"	/* process 16 bytes per iteration = 8 samples */
slouken@539
    49
slouken@539
    50
"	jz .endS16\n"
slouken@539
    51
slouken@539
    52
"	pxor %%mm0,%%mm0\n"
slouken@539
    53
slouken@539
    54
"	movd %%eax,%%mm0\n"
slouken@539
    55
"	movq %%mm0,%%mm1\n"
slouken@539
    56
"	psllq $16,%%mm0\n"
slouken@539
    57
"	por %%mm1,%%mm0\n"
slouken@539
    58
"	psllq $16,%%mm0\n"
slouken@539
    59
"	por %%mm1,%%mm0\n"
slouken@539
    60
"	psllq $16,%%mm0\n"
slouken@1612
    61
"	por %%mm1,%%mm0\n"		/* mm0 = vol|vol|vol|vol */
slouken@539
    62
slouken@1411
    63
".align 8\n"
slouken@539
    64
"	.mixloopS16:\n"
slouken@539
    65
slouken@1612
    66
"	movq (%1),%%mm1\n" /* mm1 = a|b|c|d */
slouken@539
    67
slouken@1612
    68
"	movq %%mm1,%%mm2\n" /* mm2 = a|b|c|d */
slouken@539
    69
slouken@1612
    70
"	movq 8(%1),%%mm4\n" /* mm4 = e|f|g|h */
slouken@539
    71
slouken@1612
    72
	/* pré charger le buffer dst dans mm7 */
slouken@1612
    73
"	movq (%0),%%mm7\n" /* mm7 = dst[0] */
slouken@539
    74
slouken@1612
    75
	/* multiplier par le volume */
slouken@1612
    76
"	pmullw %%mm0,%%mm1\n" /* mm1 = l(a*v)|l(b*v)|l(c*v)|l(d*v) */
slouken@539
    77
slouken@1612
    78
"	pmulhw %%mm0,%%mm2\n" /* mm2 = h(a*v)|h(b*v)|h(c*v)|h(d*v) */
slouken@1612
    79
"	movq %%mm4,%%mm5\n" /* mm5 = e|f|g|h */
slouken@539
    80
slouken@1612
    81
"	pmullw %%mm0,%%mm4\n" /* mm4 = l(e*v)|l(f*v)|l(g*v)|l(h*v) */
slouken@539
    82
slouken@1612
    83
"	pmulhw %%mm0,%%mm5\n" /* mm5 = h(e*v)|h(f*v)|h(g*v)|h(h*v) */
slouken@1612
    84
"	movq %%mm1,%%mm3\n" /* mm3 = l(a*v)|l(b*v)|l(c*v)|l(d*v) */
slouken@539
    85
slouken@1612
    86
"	punpckhwd %%mm2,%%mm1\n" /* mm1 = a*v|b*v */
slouken@539
    87
slouken@1612
    88
"	movq %%mm4,%%mm6\n" /* mm6 = l(e*v)|l(f*v)|l(g*v)|l(h*v) */
slouken@1612
    89
"	punpcklwd %%mm2,%%mm3\n" /* mm3 = c*v|d*v */
slouken@539
    90
slouken@1612
    91
"	punpckhwd %%mm5,%%mm4\n" /* mm4 = e*f|f*v */
slouken@539
    92
slouken@1612
    93
"	punpcklwd %%mm5,%%mm6\n" /* mm6 = g*v|h*v */
slouken@539
    94
slouken@1612
    95
	/* pré charger le buffer dst dans mm5 */
slouken@1612
    96
"	movq 8(%0),%%mm5\n" /* mm5 = dst[1] */
slouken@539
    97
slouken@1612
    98
	/* diviser par 128 */
slouken@1612
    99
"	psrad $7,%%mm1\n" /* mm1 = a*v/128|b*v/128 , 128 = SDL_MIX_MAXVOLUME */
slouken@887
   100
"	add $16,%1\n"
slouken@539
   101
slouken@1612
   102
"	psrad $7,%%mm3\n" /* mm3 = c*v/128|d*v/128 */
slouken@539
   103
slouken@1612
   104
"	psrad $7,%%mm4\n" /* mm4 = e*v/128|f*v/128 */
slouken@539
   105
slouken@1612
   106
	/* mm1 = le sample avec le volume modifié */
slouken@1612
   107
"	packssdw %%mm1,%%mm3\n" /* mm3 = s(a*v|b*v|c*v|d*v) */
slouken@539
   108
slouken@1612
   109
"	psrad $7,%%mm6\n" /* mm6= g*v/128|h*v/128 */
slouken@1612
   110
"	paddsw %%mm7,%%mm3\n" /* mm3 = adjust_volume(src)+dst */
slouken@539
   111
slouken@1612
   112
	/* mm4 = le sample avec le volume modifié */
slouken@1612
   113
"	packssdw %%mm4,%%mm6\n" /* mm6 = s(e*v|f*v|g*v|h*v) */
slouken@887
   114
"	movq %%mm3,(%0)\n"
slouken@539
   115
slouken@1612
   116
"	paddsw %%mm5,%%mm6\n" /* mm6 = adjust_volume(src)+dst */
slouken@539
   117
slouken@887
   118
"	movq %%mm6,8(%0)\n"
slouken@539
   119
slouken@887
   120
"	add $16,%0\n"
slouken@539
   121
slouken@887
   122
"	dec %%edx\n"
slouken@539
   123
slouken@539
   124
"	jnz .mixloopS16\n"
slouken@539
   125
slouken@539
   126
"	emms\n"
slouken@539
   127
slouken@539
   128
".endS16:\n"
slouken@539
   129
	 :
slouken@887
   130
	 : "r" (dst), "r"(src),"m"(size),
slouken@539
   131
	 "m"(volume)
slouken@887
   132
	 : "eax","edx","memory"
slouken@539
   133
	 );
slouken@539
   134
}
slouken@539
   135
slouken@539
   136
slouken@539
   137
slouken@1612
   138
/*////////////////////////////////////////////// */
slouken@1612
   139
/* Mixing for 8 bit signed buffers */
slouken@1612
   140
/*////////////////////////////////////////////// */
slouken@539
   141
slouken@539
   142
void SDL_MixAudio_MMX_S8(char* dst,char* src,unsigned int size,int volume)
slouken@539
   143
{
slouken@539
   144
    __asm__ __volatile__ (
slouken@539
   145
slouken@1612
   146
"	movl %3,%%eax\n"	/* eax = volume */
slouken@539
   147
icculus@1147
   148
"	movd %%eax,%%mm0\n"
slouken@539
   149
"	movq %%mm0,%%mm1\n"
slouken@539
   150
"	psllq $16,%%mm0\n"
slouken@539
   151
"	por %%mm1,%%mm0\n"
slouken@539
   152
"	psllq $16,%%mm0\n"
slouken@539
   153
"	por %%mm1,%%mm0\n"
slouken@539
   154
"	psllq $16,%%mm0\n"
slouken@539
   155
"	por %%mm1,%%mm0\n"
slouken@539
   156
slouken@1612
   157
"	movl %2,%%edx\n"	/* edx = size */
slouken@1612
   158
"	shr $3,%%edx\n"	/* process 8 bytes per iteration = 8 samples */
slouken@539
   159
slouken@887
   160
"	cmp $0,%%edx\n"
slouken@539
   161
"	je .endS8\n"
slouken@539
   162
slouken@1411
   163
".align 8\n"
slouken@539
   164
"	.mixloopS8:\n"
slouken@539
   165
slouken@1612
   166
"	pxor %%mm2,%%mm2\n"		/* mm2 = 0 */
slouken@1612
   167
"	movq (%1),%%mm1\n"	/* mm1 = a|b|c|d|e|f|g|h */
slouken@539
   168
slouken@1612
   169
"	movq %%mm1,%%mm3\n" 	/* mm3 = a|b|c|d|e|f|g|h */
slouken@539
   170
slouken@1612
   171
	/* on va faire le "sign extension" en faisant un cmp avec 0 qui retourne 1 si <0, 0 si >0 */
slouken@1612
   172
"	pcmpgtb %%mm1,%%mm2\n"	/* mm2 = 11111111|00000000|00000000.... */
slouken@539
   173
slouken@1612
   174
"	punpckhbw %%mm2,%%mm1\n"	/* mm1 = 0|a|0|b|0|c|0|d */
slouken@539
   175
slouken@1612
   176
"	punpcklbw %%mm2,%%mm3\n"	/* mm3 = 0|e|0|f|0|g|0|h */
slouken@1612
   177
"	movq (%0),%%mm2\n"	/* mm2 = destination */
slouken@539
   178
slouken@1612
   179
"	pmullw %%mm0,%%mm1\n"	/* mm1 = v*a|v*b|v*c|v*d */
slouken@887
   180
"	add $8,%1\n"
slouken@539
   181
slouken@1612
   182
"	pmullw %%mm0,%%mm3\n"	/* mm3 = v*e|v*f|v*g|v*h */
slouken@1612
   183
"	psraw $7,%%mm1\n"		/* mm1 = v*a/128|v*b/128|v*c/128|v*d/128  */
slouken@539
   184
slouken@1612
   185
"	psraw $7,%%mm3\n"		/* mm3 = v*e/128|v*f/128|v*g/128|v*h/128 */
slouken@539
   186
slouken@1612
   187
"	packsswb %%mm1,%%mm3\n"	/* mm1 = v*a/128|v*b/128|v*c/128|v*d/128|v*e/128|v*f/128|v*g/128|v*h/128 */
slouken@539
   188
slouken@1612
   189
"	paddsb %%mm2,%%mm3\n"	/* add to destination buffer */
slouken@539
   190
slouken@1612
   191
"	movq %%mm3,(%0)\n"	/* store back to ram */
slouken@887
   192
"	add $8,%0\n"
slouken@539
   193
slouken@887
   194
"	dec %%edx\n"
slouken@539
   195
slouken@539
   196
"	jnz .mixloopS8\n"
slouken@539
   197
slouken@539
   198
".endS8:\n"
slouken@539
   199
"	emms\n"
slouken@539
   200
	 :
slouken@887
   201
	 : "r" (dst), "r"(src),"m"(size),
slouken@539
   202
	 "m"(volume)
slouken@887
   203
	 : "eax","edx","memory"
slouken@539
   204
	 );
slouken@539
   205
}
slouken@539
   206
#endif
icculus@4322
   207
#endif