src/audio/SDL_audiotypecvt.c
changeset 10837 c2f241c2f6ad
parent 10836 8f0aa225f261
child 10838 57ac8b68d7c8
equal deleted inserted replaced
10836:8f0aa225f261 10837:c2f241c2f6ad
   499             const __m128i ints = _mm_load_si128(mmsrc);
   499             const __m128i ints = _mm_load_si128(mmsrc);
   500             /* bitshift the whole register over, so _mm_cvtepi32_pd can read the top ints in the bottom of the vector. */
   500             /* bitshift the whole register over, so _mm_cvtepi32_pd can read the top ints in the bottom of the vector. */
   501             const __m128d doubles1 = _mm_mul_pd(_mm_cvtepi32_pd(_mm_srli_si128(ints, 8)), divby2147483647);
   501             const __m128d doubles1 = _mm_mul_pd(_mm_cvtepi32_pd(_mm_srli_si128(ints, 8)), divby2147483647);
   502             const __m128d doubles2 = _mm_mul_pd(_mm_cvtepi32_pd(ints), divby2147483647);
   502             const __m128d doubles2 = _mm_mul_pd(_mm_cvtepi32_pd(ints), divby2147483647);
   503             /* convert to float32, bitshift/or to get these into a vector to store. */
   503             /* convert to float32, bitshift/or to get these into a vector to store. */
   504             _mm_store_ps(dst, _mm_castsi128_ps(_mm_or_si128(_mm_bslli_si128(_mm_castps_si128(_mm_cvtpd_ps(doubles1)), 8), _mm_castps_si128(_mm_cvtpd_ps(doubles2)))));
   504             _mm_store_ps(dst, _mm_castsi128_ps(_mm_or_si128(_mm_slli_si128(_mm_castps_si128(_mm_cvtpd_ps(doubles1)), 8), _mm_castps_si128(_mm_cvtpd_ps(doubles2)))));
   505             i -= 4; mmsrc++; dst += 4;
   505             i -= 4; mmsrc++; dst += 4;
   506         }
   506         }
   507         src = (const Sint32 *) mmsrc;
   507         src = (const Sint32 *) mmsrc;
   508     }
   508     }
   509 
   509 
   723         while (i >= 4) {   /* 4 * float32 */
   723         while (i >= 4) {   /* 4 * float32 */
   724             const __m128 floats = _mm_load_ps(src);
   724             const __m128 floats = _mm_load_ps(src);
   725             /* bitshift the whole register over, so _mm_cvtps_pd can read the top floats in the bottom of the vector. */
   725             /* bitshift the whole register over, so _mm_cvtps_pd can read the top floats in the bottom of the vector. */
   726             const __m128d doubles1 = _mm_mul_pd(_mm_cvtps_pd(_mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(floats), 8))), mulby2147483647);
   726             const __m128d doubles1 = _mm_mul_pd(_mm_cvtps_pd(_mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(floats), 8))), mulby2147483647);
   727             const __m128d doubles2 = _mm_mul_pd(_mm_cvtps_pd(floats), mulby2147483647);
   727             const __m128d doubles2 = _mm_mul_pd(_mm_cvtps_pd(floats), mulby2147483647);
   728             _mm_store_si128(mmdst, _mm_or_si128(_mm_bslli_si128(_mm_cvtpd_epi32(doubles1), 8), _mm_cvtpd_epi32(doubles2)));
   728             _mm_store_si128(mmdst, _mm_or_si128(_mm_slli_si128(_mm_cvtpd_epi32(doubles1), 8), _mm_cvtpd_epi32(doubles2)));
   729             i -= 4; src += 4; mmdst++;
   729             i -= 4; src += 4; mmdst++;
   730         }
   730         }
   731         dst = (Sint32 *) mmdst;
   731         dst = (Sint32 *) mmdst;
   732     }
   732     }
   733 
   733