21 #include "../SDL_internal.h"
28 #define HAVE_NEON_INTRINSICS 1
32 #define HAVE_SSE2_INTRINSICS 1
35 #if defined(__x86_64__) && HAVE_SSE2_INTRINSICS
36 #define NEED_SCALAR_CONVERTER_FALLBACKS 0
37 #elif __MACOSX__ && HAVE_SSE2_INTRINSICS
38 #define NEED_SCALAR_CONVERTER_FALLBACKS 0
39 #elif defined(__ARM_ARCH) && (__ARM_ARCH >= 8) && HAVE_NEON_INTRINSICS
40 #define NEED_SCALAR_CONVERTER_FALLBACKS 0
41 #elif defined(__APPLE__) && defined(__ARM_ARCH) && (__ARM_ARCH >= 7) && HAVE_NEON_INTRINSICS
42 #define NEED_SCALAR_CONVERTER_FALLBACKS 0
46 #ifndef NEED_SCALAR_CONVERTER_FALLBACKS
47 #define NEED_SCALAR_CONVERTER_FALLBACKS 1
63 #define DIVBY128 0.0078125f
64 #define DIVBY32768 0.000030517578125f
65 #define DIVBY8388607 0.00000011920930376163766f
68 #if NEED_SCALAR_CONVERTER_FALLBACKS
149 float *
dst = (
float *) cvt->
buf;
166 const float *
src = (
const float *) cvt->
buf;
173 const float sample = *
src;
174 if (sample >= 1.0f) {
176 }
else if (sample <= -1.0f) {
192 const float *
src = (
const float *) cvt->
buf;
199 const float sample = *
src;
200 if (sample >= 1.0f) {
202 }
else if (sample <= -1.0f) {
205 *
dst = (
Uint8)((sample + 1.0f) * 127.0f);
218 const float *
src = (
const float *) cvt->
buf;
225 const float sample = *
src;
226 if (sample >= 1.0f) {
228 }
else if (sample <= -1.0f) {
244 const float *
src = (
const float *) cvt->
buf;
251 const float sample = *
src;
252 if (sample >= 1.0f) {
254 }
else if (sample <= -1.0f) {
257 *
dst = (
Uint16)((sample + 1.0f) * 32767.0f);
270 const float *
src = (
const float *) cvt->
buf;
277 const float sample = *
src;
278 if (sample >= 1.0f) {
280 }
else if (sample <= -1.0f) {
283 *
dst = ((
Sint32)(sample * 8388607.0f)) << 8;
294 #if HAVE_SSE2_INTRINSICS
313 if ((((
size_t)
src) & 15) == 0) {
315 const __m128i *mmsrc = (
const __m128i *)
src;
316 const __m128i
zero = _mm_setzero_si128();
317 const __m128 divby128 = _mm_set1_ps(
DIVBY128);
319 const __m128i bytes = _mm_load_si128(mmsrc);
321 const __m128i shorts1 = _mm_srai_epi16(_mm_slli_epi16(bytes, 8), 8);
323 const __m128i shorts2 = _mm_srai_epi16(bytes, 8);
325 const __m128 floats1 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_slli_epi32(_mm_unpacklo_epi16(shorts1,
zero), 16), 16)), divby128);
326 const __m128 floats2 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_slli_epi32(_mm_unpacklo_epi16(shorts2,
zero), 16), 16)), divby128);
327 const __m128 floats3 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_slli_epi32(_mm_unpackhi_epi16(shorts1,
zero), 16), 16)), divby128);
328 const __m128 floats4 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_slli_epi32(_mm_unpackhi_epi16(shorts2,
zero), 16), 16)), divby128);
330 _mm_store_ps(
dst, _mm_unpacklo_ps(floats1, floats2));
331 _mm_store_ps(
dst+4, _mm_unpackhi_ps(floats1, floats2));
332 _mm_store_ps(
dst+8, _mm_unpacklo_ps(floats3, floats4));
333 _mm_store_ps(
dst+12, _mm_unpackhi_ps(floats3, floats4));
334 i -= 16; mmsrc--;
dst -= 16;
372 if ((((
size_t)
src) & 15) == 0) {
374 const __m128i *mmsrc = (
const __m128i *)
src;
375 const __m128i
zero = _mm_setzero_si128();
376 const __m128 divby128 = _mm_set1_ps(
DIVBY128);
377 const __m128 minus1 = _mm_set1_ps(-1.0f);
379 const __m128i bytes = _mm_load_si128(mmsrc);
381 const __m128i shorts1 = _mm_srli_epi16(_mm_slli_epi16(bytes, 8), 8);
383 const __m128i shorts2 = _mm_srli_epi16(bytes, 8);
386 const __m128 floats1 = _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi16(shorts1,
zero)), divby128), minus1);
387 const __m128 floats2 = _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi16(shorts2,
zero)), divby128), minus1);
388 const __m128 floats3 = _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi16(shorts1,
zero)), divby128), minus1);
389 const __m128 floats4 = _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi16(shorts2,
zero)), divby128), minus1);
391 _mm_store_ps(
dst, _mm_unpacklo_ps(floats1, floats2));
392 _mm_store_ps(
dst+4, _mm_unpackhi_ps(floats1, floats2));
393 _mm_store_ps(
dst+8, _mm_unpacklo_ps(floats3, floats4));
394 _mm_store_ps(
dst+12, _mm_unpackhi_ps(floats3, floats4));
395 i -= 16; mmsrc--;
dst -= 16;
433 if ((((
size_t)
src) & 15) == 0) {
435 const __m128 divby32768 = _mm_set1_ps(
DIVBY32768);
437 const __m128i ints = _mm_load_si128((__m128i
const *)
src);
439 const __m128i
a = _mm_srai_epi32(_mm_slli_epi32(ints, 16), 16);
441 const __m128i
b = _mm_srai_epi32(ints, 16);
443 _mm_store_ps(
dst, _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi32(
a,
b)), divby32768));
444 _mm_store_ps(
dst+4, _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi32(
a,
b)), divby32768));
481 if ((((
size_t)
src) & 15) == 0) {
483 const __m128 divby32768 = _mm_set1_ps(
DIVBY32768);
484 const __m128 minus1 = _mm_set1_ps(-1.0f);
486 const __m128i ints = _mm_load_si128((__m128i
const *)
src);
488 const __m128i
a = _mm_srli_epi32(_mm_slli_epi32(ints, 16), 16);
490 const __m128i
b = _mm_srli_epi32(ints, 16);
492 _mm_store_ps(
dst, _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi32(
a,
b)), divby32768), minus1));
493 _mm_store_ps(
dst+4, _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi32(
a,
b)), divby32768), minus1));
516 float *
dst = (
float *) cvt->
buf;
529 if ((((
size_t)
src) & 15) == 0) {
532 const __m128i *mmsrc = (
const __m128i *)
src;
535 _mm_store_ps(
dst, _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_load_si128(mmsrc), 8)), divby8388607));
536 i -= 4; mmsrc++;
dst += 4;
555 const float *
src = (
const float *) cvt->
buf;
563 const float sample = *
src;
564 if (sample >= 1.0f) {
566 }
else if (sample <= -1.0f) {
576 if ((((
size_t)
src) & 15) == 0) {
578 const __m128
one = _mm_set1_ps(1.0f);
579 const __m128 negone = _mm_set1_ps(-1.0f);
580 const __m128 mulby127 = _mm_set1_ps(127.0f);
581 __m128i *mmdst = (__m128i *)
dst;
583 const __m128i ints1 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(
src)),
one), mulby127));
584 const __m128i ints2 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(
src+4)),
one), mulby127));
585 const __m128i ints3 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(
src+8)),
one), mulby127));
586 const __m128i ints4 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(
src+12)),
one), mulby127));
587 _mm_store_si128(mmdst, _mm_packs_epi16(_mm_packs_epi32(ints1, ints2), _mm_packs_epi32(ints3, ints4)));
588 i -= 16;
src += 16; mmdst++;
595 const float sample = *
src;
596 if (sample >= 1.0f) {
598 }
else if (sample <= -1.0f) {
615 const float *
src = (
const float *) cvt->
buf;
623 const float sample = *
src;
624 if (sample >= 1.0f) {
626 }
else if (sample <= -1.0f) {
629 *
dst = (
Uint8)((sample + 1.0f) * 127.0f);
636 if ((((
size_t)
src) & 15) == 0) {
638 const __m128
one = _mm_set1_ps(1.0f);
639 const __m128 negone = _mm_set1_ps(-1.0f);
640 const __m128 mulby127 = _mm_set1_ps(127.0f);
641 __m128i *mmdst = (__m128i *)
dst;
643 const __m128i ints1 = _mm_cvtps_epi32(_mm_mul_ps(_mm_add_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(
src)),
one),
one), mulby127));
644 const __m128i ints2 = _mm_cvtps_epi32(_mm_mul_ps(_mm_add_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(
src+4)),
one),
one), mulby127));
645 const __m128i ints3 = _mm_cvtps_epi32(_mm_mul_ps(_mm_add_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(
src+8)),
one),
one), mulby127));
646 const __m128i ints4 = _mm_cvtps_epi32(_mm_mul_ps(_mm_add_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(
src+12)),
one),
one), mulby127));
647 _mm_store_si128(mmdst, _mm_packus_epi16(_mm_packs_epi32(ints1, ints2), _mm_packs_epi32(ints3, ints4)));
648 i -= 16;
src += 16; mmdst++;
655 const float sample = *
src;
656 if (sample >= 1.0f) {
658 }
else if (sample <= -1.0f) {
661 *
dst = (
Uint8)((sample + 1.0f) * 127.0f);
675 const float *
src = (
const float *) cvt->
buf;
683 const float sample = *
src;
684 if (sample >= 1.0f) {
686 }
else if (sample <= -1.0f) {
696 if ((((
size_t)
src) & 15) == 0) {
698 const __m128
one = _mm_set1_ps(1.0f);
699 const __m128 negone = _mm_set1_ps(-1.0f);
700 const __m128 mulby32767 = _mm_set1_ps(32767.0f);
701 __m128i *mmdst = (__m128i *)
dst;
703 const __m128i ints1 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(
src)),
one), mulby32767));
704 const __m128i ints2 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(
src+4)),
one), mulby32767));
705 _mm_store_si128(mmdst, _mm_packs_epi32(ints1, ints2));
706 i -= 8;
src += 8; mmdst++;
713 const float sample = *
src;
714 if (sample >= 1.0f) {
716 }
else if (sample <= -1.0f) {
733 const float *
src = (
const float *) cvt->
buf;
741 const float sample = *
src;
742 if (sample >= 1.0f) {
744 }
else if (sample <= -1.0f) {
747 *
dst = (
Uint16)((sample + 1.0f) * 32767.0f);
754 if ((((
size_t)
src) & 15) == 0) {
763 const __m128 mulby32767 = _mm_set1_ps(32767.0f);
764 const __m128i topbit = _mm_set1_epi16(-32768);
765 const __m128
one = _mm_set1_ps(1.0f);
766 const __m128 negone = _mm_set1_ps(-1.0f);
767 __m128i *mmdst = (__m128i *)
dst;
769 const __m128i ints1 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(
src)),
one), mulby32767));
770 const __m128i ints2 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(
src+4)),
one), mulby32767));
771 _mm_store_si128(mmdst, _mm_xor_si128(_mm_packs_epi32(ints1, ints2), topbit));
772 i -= 8;
src += 8; mmdst++;
779 const float sample = *
src;
780 if (sample >= 1.0f) {
782 }
else if (sample <= -1.0f) {
785 *
dst = (
Uint16)((sample + 1.0f) * 32767.0f);
799 const float *
src = (
const float *) cvt->
buf;
807 const float sample = *
src;
808 if (sample >= 1.0f) {
810 }
else if (sample <= -1.0f) {
813 *
dst = ((
Sint32)(sample * 8388607.0f)) << 8;
822 const __m128
one = _mm_set1_ps(1.0f);
823 const __m128 negone = _mm_set1_ps(-1.0f);
824 const __m128 mulby8388607 = _mm_set1_ps(8388607.0f);
825 __m128i *mmdst = (__m128i *)
dst;
827 _mm_store_si128(mmdst, _mm_slli_epi32(_mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(
src)),
one), mulby8388607)), 8));
828 i -= 4;
src += 4; mmdst++;
835 const float sample = *
src;
836 if (sample >= 1.0f) {
838 }
else if (sample <= -1.0f) {
841 *
dst = ((
Sint32)(sample * 8388607.0f)) << 8;
853 #if HAVE_NEON_INTRINSICS
872 if ((((
size_t)
src) & 15) == 0) {
875 const float32x4_t divby128 = vdupq_n_f32(
DIVBY128);
877 const int8x16_t bytes = vld1q_s8(mmsrc);
878 const int16x8_t int16hi = vmovl_s8(vget_high_s8(bytes));
879 const int16x8_t int16lo = vmovl_s8(vget_low_s8(bytes));
881 vst1q_f32(
dst, vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_low_s16(int16lo))), divby128));
882 vst1q_f32(
dst+4, vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_high_s16(int16lo))), divby128));
883 vst1q_f32(
dst+8, vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_low_s16(int16hi))), divby128));
884 vst1q_f32(
dst+12, vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_high_s16(int16hi))), divby128));
885 i -= 16; mmsrc -= 16;
dst -= 16;
923 if ((((
size_t)
src) & 15) == 0) {
926 const float32x4_t divby128 = vdupq_n_f32(
DIVBY128);
927 const float32x4_t negone = vdupq_n_f32(-1.0f);
929 const uint8x16_t bytes = vld1q_u8(mmsrc);
930 const uint16x8_t uint16hi = vmovl_u8(vget_high_u8(bytes));
931 const uint16x8_t uint16lo = vmovl_u8(vget_low_u8(bytes));
933 vst1q_f32(
dst, vmlaq_f32(negone, vcvtq_f32_u32(vmovl_u16(vget_low_u16(uint16lo))), divby128));
934 vst1q_f32(
dst+4, vmlaq_f32(negone, vcvtq_f32_u32(vmovl_u16(vget_high_u16(uint16lo))), divby128));
935 vst1q_f32(
dst+8, vmlaq_f32(negone, vcvtq_f32_u32(vmovl_u16(vget_low_u16(uint16hi))), divby128));
936 vst1q_f32(
dst+12, vmlaq_f32(negone, vcvtq_f32_u32(vmovl_u16(vget_high_u16(uint16hi))), divby128));
937 i -= 16; mmsrc -= 16;
dst -= 16;
975 if ((((
size_t)
src) & 15) == 0) {
977 const float32x4_t divby32768 = vdupq_n_f32(
DIVBY32768);
979 const int16x8_t ints = vld1q_s16((
int16_t const *)
src);
981 vst1q_f32(
dst, vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_low_s16(ints))), divby32768));
982 vst1q_f32(
dst+4, vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_high_s16(ints))), divby32768));
1019 if ((((
size_t)
src) & 15) == 0) {
1021 const float32x4_t divby32768 = vdupq_n_f32(
DIVBY32768);
1022 const float32x4_t negone = vdupq_n_f32(-1.0f);
1024 const uint16x8_t uints = vld1q_u16((
uint16_t const *)
src);
1026 vst1q_f32(
dst, vmlaq_f32(negone, vcvtq_f32_u32(vmovl_u16(vget_low_u16(uints))), divby32768));
1027 vst1q_f32(
dst+4, vmlaq_f32(negone, vcvtq_f32_u32(vmovl_u16(vget_high_u16(uints))), divby32768));
1050 float *
dst = (
float *) cvt->
buf;
1063 if ((((
size_t)
src) & 15) == 0) {
1065 const float32x4_t divby8388607 = vdupq_n_f32(
DIVBY8388607);
1069 vst1q_f32(
dst, vmulq_f32(vcvtq_f32_s32(vshrq_n_s32(vld1q_s32(mmsrc), 8)), divby8388607));
1070 i -= 4; mmsrc += 4;
dst += 4;
1089 const float *
src = (
const float *) cvt->
buf;
1097 const float sample = *
src;
1098 if (sample >= 1.0f) {
1100 }
else if (sample <= -1.0f) {
1110 if ((((
size_t)
src) & 15) == 0) {
1112 const float32x4_t
one = vdupq_n_f32(1.0f);
1113 const float32x4_t negone = vdupq_n_f32(-1.0f);
1114 const float32x4_t mulby127 = vdupq_n_f32(127.0f);
1117 const int32x4_t ints1 = vcvtq_s32_f32(vmulq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(
src)),
one), mulby127));
1118 const int32x4_t ints2 = vcvtq_s32_f32(vmulq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(
src+4)),
one), mulby127));
1119 const int32x4_t ints3 = vcvtq_s32_f32(vmulq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(
src+8)),
one), mulby127));
1120 const int32x4_t ints4 = vcvtq_s32_f32(vmulq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(
src+12)),
one), mulby127));
1121 const int8x8_t i8lo = vmovn_s16(vcombine_s16(vmovn_s32(ints1), vmovn_s32(ints2)));
1122 const int8x8_t i8hi = vmovn_s16(vcombine_s16(vmovn_s32(ints3), vmovn_s32(ints4)));
1123 vst1q_s8(mmdst, vcombine_s8(i8lo, i8hi));
1124 i -= 16;
src += 16; mmdst += 16;
1131 const float sample = *
src;
1132 if (sample >= 1.0f) {
1134 }
else if (sample <= -1.0f) {
1151 const float *
src = (
const float *) cvt->
buf;
1159 const float sample = *
src;
1160 if (sample >= 1.0f) {
1162 }
else if (sample <= -1.0f) {
1165 *
dst = (
Uint8)((sample + 1.0f) * 127.0f);
1172 if ((((
size_t)
src) & 15) == 0) {
1174 const float32x4_t
one = vdupq_n_f32(1.0f);
1175 const float32x4_t negone = vdupq_n_f32(-1.0f);
1176 const float32x4_t mulby127 = vdupq_n_f32(127.0f);
1179 const uint32x4_t uints1 = vcvtq_u32_f32(vmulq_f32(vaddq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(
src)),
one),
one), mulby127));
1180 const uint32x4_t uints2 = vcvtq_u32_f32(vmulq_f32(vaddq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(
src+4)),
one),
one), mulby127));
1181 const uint32x4_t uints3 = vcvtq_u32_f32(vmulq_f32(vaddq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(
src+8)),
one),
one), mulby127));
1182 const uint32x4_t uints4 = vcvtq_u32_f32(vmulq_f32(vaddq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(
src+12)),
one),
one), mulby127));
1183 const uint8x8_t ui8lo = vmovn_u16(vcombine_u16(vmovn_u32(uints1), vmovn_u32(uints2)));
1184 const uint8x8_t ui8hi = vmovn_u16(vcombine_u16(vmovn_u32(uints3), vmovn_u32(uints4)));
1185 vst1q_u8(mmdst, vcombine_u8(ui8lo, ui8hi));
1186 i -= 16;
src += 16; mmdst += 16;
1194 const float sample = *
src;
1195 if (sample >= 1.0f) {
1197 }
else if (sample <= -1.0f) {
1200 *
dst = (
Uint8)((sample + 1.0f) * 127.0f);
1214 const float *
src = (
const float *) cvt->
buf;
1222 const float sample = *
src;
1223 if (sample >= 1.0f) {
1225 }
else if (sample <= -1.0f) {
1235 if ((((
size_t)
src) & 15) == 0) {
1237 const float32x4_t
one = vdupq_n_f32(1.0f);
1238 const float32x4_t negone = vdupq_n_f32(-1.0f);
1239 const float32x4_t mulby32767 = vdupq_n_f32(32767.0f);
1242 const int32x4_t ints1 = vcvtq_s32_f32(vmulq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(
src)),
one), mulby32767));
1243 const int32x4_t ints2 = vcvtq_s32_f32(vmulq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(
src+4)),
one), mulby32767));
1244 vst1q_s16(mmdst, vcombine_s16(vmovn_s32(ints1), vmovn_s32(ints2)));
1245 i -= 8;
src += 8; mmdst += 8;
1252 const float sample = *
src;
1253 if (sample >= 1.0f) {
1255 }
else if (sample <= -1.0f) {
1272 const float *
src = (
const float *) cvt->
buf;
1280 const float sample = *
src;
1281 if (sample >= 1.0f) {
1283 }
else if (sample <= -1.0f) {
1286 *
dst = (
Uint16)((sample + 1.0f) * 32767.0f);
1293 if ((((
size_t)
src) & 15) == 0) {
1295 const float32x4_t
one = vdupq_n_f32(1.0f);
1296 const float32x4_t negone = vdupq_n_f32(-1.0f);
1297 const float32x4_t mulby32767 = vdupq_n_f32(32767.0f);
1300 const uint32x4_t uints1 = vcvtq_u32_f32(vmulq_f32(vaddq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(
src)),
one),
one), mulby32767));
1301 const uint32x4_t uints2 = vcvtq_u32_f32(vmulq_f32(vaddq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(
src+4)),
one),
one), mulby32767));
1302 vst1q_u16(mmdst, vcombine_u16(vmovn_u32(uints1), vmovn_u32(uints2)));
1303 i -= 8;
src += 8; mmdst += 8;
1310 const float sample = *
src;
1311 if (sample >= 1.0f) {
1313 }
else if (sample <= -1.0f) {
1316 *
dst = (
Uint16)((sample + 1.0f) * 32767.0f);
1330 const float *
src = (
const float *) cvt->
buf;
1338 const float sample = *
src;
1339 if (sample >= 1.0f) {
1341 }
else if (sample <= -1.0f) {
1342 *
dst = (-2147483647) - 1;
1344 *
dst = ((
Sint32)(sample * 8388607.0f)) << 8;
1353 const float32x4_t
one = vdupq_n_f32(1.0f);
1354 const float32x4_t negone = vdupq_n_f32(-1.0f);
1355 const float32x4_t mulby8388607 = vdupq_n_f32(8388607.0f);
1358 vst1q_s32(mmdst, vshlq_n_s32(vcvtq_s32_f32(vmulq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(
src)),
one), mulby8388607)), 8));
1359 i -= 4;
src += 4; mmdst += 4;
1366 const float sample = *
src;
1367 if (sample >= 1.0f) {
1369 }
else if (sample <= -1.0f) {
1370 *
dst = (-2147483647) - 1;
1372 *
dst = ((
Sint32)(sample * 8388607.0f)) << 8;
1389 if (converters_chosen) {
1393 #define SET_CONVERTER_FUNCS(fntype) \
1394 SDL_Convert_S8_to_F32 = SDL_Convert_S8_to_F32_##fntype; \
1395 SDL_Convert_U8_to_F32 = SDL_Convert_U8_to_F32_##fntype; \
1396 SDL_Convert_S16_to_F32 = SDL_Convert_S16_to_F32_##fntype; \
1397 SDL_Convert_U16_to_F32 = SDL_Convert_U16_to_F32_##fntype; \
1398 SDL_Convert_S32_to_F32 = SDL_Convert_S32_to_F32_##fntype; \
1399 SDL_Convert_F32_to_S8 = SDL_Convert_F32_to_S8_##fntype; \
1400 SDL_Convert_F32_to_U8 = SDL_Convert_F32_to_U8_##fntype; \
1401 SDL_Convert_F32_to_S16 = SDL_Convert_F32_to_S16_##fntype; \
1402 SDL_Convert_F32_to_U16 = SDL_Convert_F32_to_U16_##fntype; \
1403 SDL_Convert_F32_to_S32 = SDL_Convert_F32_to_S32_##fntype; \
1404 converters_chosen = SDL_TRUE
1406 #if HAVE_SSE2_INTRINSICS
1413 #if HAVE_NEON_INTRINSICS
1420 #if NEED_SCALAR_CONVERTER_FALLBACKS
1424 #undef SET_CONVERTER_FUNCS
#define SDL_assert(condition)
Uint16 SDL_AudioFormat
Audio format flags.
void(* SDL_AudioFilter)(struct SDL_AudioCVT *cvt, SDL_AudioFormat format)
#define LOG_DEBUG_CONVERT(from, to)
SDL_AudioFilter SDL_Convert_F32_to_U16
static void SDL_Convert_F32_to_S8_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
SDL_AudioFilter SDL_Convert_U16_to_F32
SDL_AudioFilter SDL_Convert_F32_to_S32
static void SDL_Convert_S8_to_F32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
static void SDL_Convert_F32_to_S16_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
SDL_AudioFilter SDL_Convert_F32_to_S16
static void SDL_Convert_F32_to_U16_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
static void SDL_Convert_F32_to_U8_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
static void SDL_Convert_U16_to_F32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
static void SDL_Convert_F32_to_S32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
static void SDL_Convert_U8_to_F32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
static void SDL_Convert_S32_to_F32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
SDL_AudioFilter SDL_Convert_F32_to_U8
SDL_AudioFilter SDL_Convert_F32_to_S8
SDL_AudioFilter SDL_Convert_S8_to_F32
#define SET_CONVERTER_FUNCS(fntype)
static void SDL_Convert_S16_to_F32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
void SDL_ChooseAudioConverters(void)
SDL_AudioFilter SDL_Convert_S32_to_F32
SDL_AudioFilter SDL_Convert_S16_to_F32
SDL_AudioFilter SDL_Convert_U8_to_F32
GLint GLint GLsizei GLsizei GLsizei GLint GLenum format
GLboolean GLboolean GLboolean b
GLboolean GLboolean GLboolean GLboolean a
return Display return Display Bool Bool int int int return Display XEvent Bool(*) XPointer return Display return Display Drawable _Xconst char unsigned int unsigned int return Display Pixmap Pixmap XColor XColor unsigned int unsigned int return Display _Xconst char char int char return Display Visual unsigned int int int char unsigned int unsigned int in i)
A structure to hold a set of audio conversion filters and buffers.
SDL_AudioFilter filters[SDL_AUDIOCVT_MAX_FILTERS+1]