SDL  2.0
SDL_audiotypecvt.c
Go to the documentation of this file.
1 /*
2  Simple DirectMedia Layer
3  Copyright (C) 1997-2020 Sam Lantinga <slouken@libsdl.org>
4 
5  This software is provided 'as-is', without any express or implied
6  warranty. In no event will the authors be held liable for any damages
7  arising from the use of this software.
8 
9  Permission is granted to anyone to use this software for any purpose,
10  including commercial applications, and to alter it and redistribute it
11  freely, subject to the following restrictions:
12 
13  1. The origin of this software must not be misrepresented; you must not
14  claim that you wrote the original software. If you use this software
15  in a product, an acknowledgment in the product documentation would be
16  appreciated but is not required.
17  2. Altered source versions must be plainly marked as such, and must not be
18  misrepresented as being the original software.
19  3. This notice may not be removed or altered from any source distribution.
20 */
21 #include "../SDL_internal.h"
22 
23 #include "SDL_audio.h"
24 #include "SDL_audio_c.h"
25 #include "SDL_cpuinfo.h"
26 
27 #ifdef __ARM_NEON
28 #define HAVE_NEON_INTRINSICS 1
29 #endif
30 
31 #ifdef __SSE2__
32 #define HAVE_SSE2_INTRINSICS 1
33 #endif
34 
35 #if defined(__x86_64__) && HAVE_SSE2_INTRINSICS
36 #define NEED_SCALAR_CONVERTER_FALLBACKS 0 /* x86_64 guarantees SSE2. */
37 #elif __MACOSX__ && HAVE_SSE2_INTRINSICS
38 #define NEED_SCALAR_CONVERTER_FALLBACKS 0 /* Mac OS X/Intel guarantees SSE2. */
39 #elif defined(__ARM_ARCH) && (__ARM_ARCH >= 8) && HAVE_NEON_INTRINSICS
40 #define NEED_SCALAR_CONVERTER_FALLBACKS 0 /* ARMv8+ promise NEON. */
41 #elif defined(__APPLE__) && defined(__ARM_ARCH) && (__ARM_ARCH >= 7) && HAVE_NEON_INTRINSICS
42 #define NEED_SCALAR_CONVERTER_FALLBACKS 0 /* All Apple ARMv7 chips promise NEON support. */
43 #endif
44 
45 /* Set to zero if platform is guaranteed to use a SIMD codepath here. */
46 #ifndef NEED_SCALAR_CONVERTER_FALLBACKS
47 #define NEED_SCALAR_CONVERTER_FALLBACKS 1
48 #endif
49 
50 /* Function pointers set to a CPU-specific implementation. */
61 
62 
63 #define DIVBY128 0.0078125f
64 #define DIVBY32768 0.000030517578125f
65 #define DIVBY8388607 0.00000011920930376163766f
66 
67 
68 #if NEED_SCALAR_CONVERTER_FALLBACKS
69 static void SDLCALL
71 {
72  const Sint8 *src = ((const Sint8 *) (cvt->buf + cvt->len_cvt)) - 1;
73  float *dst = ((float *) (cvt->buf + cvt->len_cvt * 4)) - 1;
74  int i;
75 
76  LOG_DEBUG_CONVERT("AUDIO_S8", "AUDIO_F32");
77 
78  for (i = cvt->len_cvt; i; --i, --src, --dst) {
79  *dst = ((float) *src) * DIVBY128;
80  }
81 
82  cvt->len_cvt *= 4;
83  if (cvt->filters[++cvt->filter_index]) {
84  cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
85  }
86 }
87 
88 static void SDLCALL
90 {
91  const Uint8 *src = ((const Uint8 *) (cvt->buf + cvt->len_cvt)) - 1;
92  float *dst = ((float *) (cvt->buf + cvt->len_cvt * 4)) - 1;
93  int i;
94 
95  LOG_DEBUG_CONVERT("AUDIO_U8", "AUDIO_F32");
96 
97  for (i = cvt->len_cvt; i; --i, --src, --dst) {
98  *dst = (((float) *src) * DIVBY128) - 1.0f;
99  }
100 
101  cvt->len_cvt *= 4;
102  if (cvt->filters[++cvt->filter_index]) {
103  cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
104  }
105 }
106 
107 static void SDLCALL
109 {
110  const Sint16 *src = ((const Sint16 *) (cvt->buf + cvt->len_cvt)) - 1;
111  float *dst = ((float *) (cvt->buf + cvt->len_cvt * 2)) - 1;
112  int i;
113 
114  LOG_DEBUG_CONVERT("AUDIO_S16", "AUDIO_F32");
115 
116  for (i = cvt->len_cvt / sizeof (Sint16); i; --i, --src, --dst) {
117  *dst = ((float) *src) * DIVBY32768;
118  }
119 
120  cvt->len_cvt *= 2;
121  if (cvt->filters[++cvt->filter_index]) {
122  cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
123  }
124 }
125 
126 static void SDLCALL
128 {
129  const Uint16 *src = ((const Uint16 *) (cvt->buf + cvt->len_cvt)) - 1;
130  float *dst = ((float *) (cvt->buf + cvt->len_cvt * 2)) - 1;
131  int i;
132 
133  LOG_DEBUG_CONVERT("AUDIO_U16", "AUDIO_F32");
134 
135  for (i = cvt->len_cvt / sizeof (Uint16); i; --i, --src, --dst) {
136  *dst = (((float) *src) * DIVBY32768) - 1.0f;
137  }
138 
139  cvt->len_cvt *= 2;
140  if (cvt->filters[++cvt->filter_index]) {
141  cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
142  }
143 }
144 
145 static void SDLCALL
147 {
148  const Sint32 *src = (const Sint32 *) cvt->buf;
149  float *dst = (float *) cvt->buf;
150  int i;
151 
152  LOG_DEBUG_CONVERT("AUDIO_S32", "AUDIO_F32");
153 
154  for (i = cvt->len_cvt / sizeof (Sint32); i; --i, ++src, ++dst) {
155  *dst = ((float) (*src>>8)) * DIVBY8388607;
156  }
157 
158  if (cvt->filters[++cvt->filter_index]) {
159  cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
160  }
161 }
162 
163 static void SDLCALL
165 {
166  const float *src = (const float *) cvt->buf;
167  Sint8 *dst = (Sint8 *) cvt->buf;
168  int i;
169 
170  LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_S8");
171 
172  for (i = cvt->len_cvt / sizeof (float); i; --i, ++src, ++dst) {
173  const float sample = *src;
174  if (sample >= 1.0f) {
175  *dst = 127;
176  } else if (sample <= -1.0f) {
177  *dst = -128;
178  } else {
179  *dst = (Sint8)(sample * 127.0f);
180  }
181  }
182 
183  cvt->len_cvt /= 4;
184  if (cvt->filters[++cvt->filter_index]) {
185  cvt->filters[cvt->filter_index](cvt, AUDIO_S8);
186  }
187 }
188 
189 static void SDLCALL
191 {
192  const float *src = (const float *) cvt->buf;
193  Uint8 *dst = (Uint8 *) cvt->buf;
194  int i;
195 
196  LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_U8");
197 
198  for (i = cvt->len_cvt / sizeof (float); i; --i, ++src, ++dst) {
199  const float sample = *src;
200  if (sample >= 1.0f) {
201  *dst = 255;
202  } else if (sample <= -1.0f) {
203  *dst = 0;
204  } else {
205  *dst = (Uint8)((sample + 1.0f) * 127.0f);
206  }
207  }
208 
209  cvt->len_cvt /= 4;
210  if (cvt->filters[++cvt->filter_index]) {
211  cvt->filters[cvt->filter_index](cvt, AUDIO_U8);
212  }
213 }
214 
215 static void SDLCALL
217 {
218  const float *src = (const float *) cvt->buf;
219  Sint16 *dst = (Sint16 *) cvt->buf;
220  int i;
221 
222  LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_S16");
223 
224  for (i = cvt->len_cvt / sizeof (float); i; --i, ++src, ++dst) {
225  const float sample = *src;
226  if (sample >= 1.0f) {
227  *dst = 32767;
228  } else if (sample <= -1.0f) {
229  *dst = -32768;
230  } else {
231  *dst = (Sint16)(sample * 32767.0f);
232  }
233  }
234 
235  cvt->len_cvt /= 2;
236  if (cvt->filters[++cvt->filter_index]) {
237  cvt->filters[cvt->filter_index](cvt, AUDIO_S16SYS);
238  }
239 }
240 
241 static void SDLCALL
243 {
244  const float *src = (const float *) cvt->buf;
245  Uint16 *dst = (Uint16 *) cvt->buf;
246  int i;
247 
248  LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_U16");
249 
250  for (i = cvt->len_cvt / sizeof (float); i; --i, ++src, ++dst) {
251  const float sample = *src;
252  if (sample >= 1.0f) {
253  *dst = 65535;
254  } else if (sample <= -1.0f) {
255  *dst = 0;
256  } else {
257  *dst = (Uint16)((sample + 1.0f) * 32767.0f);
258  }
259  }
260 
261  cvt->len_cvt /= 2;
262  if (cvt->filters[++cvt->filter_index]) {
263  cvt->filters[cvt->filter_index](cvt, AUDIO_U16SYS);
264  }
265 }
266 
267 static void SDLCALL
269 {
270  const float *src = (const float *) cvt->buf;
271  Sint32 *dst = (Sint32 *) cvt->buf;
272  int i;
273 
274  LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_S32");
275 
276  for (i = cvt->len_cvt / sizeof (float); i; --i, ++src, ++dst) {
277  const float sample = *src;
278  if (sample >= 1.0f) {
279  *dst = 2147483647;
280  } else if (sample <= -1.0f) {
281  *dst = (Sint32) -2147483648LL;
282  } else {
283  *dst = ((Sint32)(sample * 8388607.0f)) << 8;
284  }
285  }
286 
287  if (cvt->filters[++cvt->filter_index]) {
288  cvt->filters[cvt->filter_index](cvt, AUDIO_S32SYS);
289  }
290 }
291 #endif
292 
293 
294 #if HAVE_SSE2_INTRINSICS
295 static void SDLCALL
296 SDL_Convert_S8_to_F32_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
297 {
298  const Sint8 *src = ((const Sint8 *) (cvt->buf + cvt->len_cvt)) - 1;
299  float *dst = ((float *) (cvt->buf + cvt->len_cvt * 4)) - 1;
300  int i;
301 
302  LOG_DEBUG_CONVERT("AUDIO_S8", "AUDIO_F32 (using SSE2)");
303 
304  /* Get dst aligned to 16 bytes (since buffer is growing, we don't have to worry about overreading from src) */
305  for (i = cvt->len_cvt; i && (((size_t) (dst-15)) & 15); --i, --src, --dst) {
306  *dst = ((float) *src) * DIVBY128;
307  }
308 
309  src -= 15; dst -= 15; /* adjust to read SSE blocks from the start. */
310  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
311 
312  /* Make sure src is aligned too. */
313  if ((((size_t) src) & 15) == 0) {
314  /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
315  const __m128i *mmsrc = (const __m128i *) src;
316  const __m128i zero = _mm_setzero_si128();
317  const __m128 divby128 = _mm_set1_ps(DIVBY128);
318  while (i >= 16) { /* 16 * 8-bit */
319  const __m128i bytes = _mm_load_si128(mmsrc); /* get 16 sint8 into an XMM register. */
320  /* treat as int16, shift left to clear every other sint16, then back right with sign-extend. Now sint16. */
321  const __m128i shorts1 = _mm_srai_epi16(_mm_slli_epi16(bytes, 8), 8);
322  /* right-shift-sign-extend gets us sint16 with the other set of values. */
323  const __m128i shorts2 = _mm_srai_epi16(bytes, 8);
324  /* unpack against zero to make these int32, shift to make them sign-extend, convert to float, multiply. Whew! */
325  const __m128 floats1 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_slli_epi32(_mm_unpacklo_epi16(shorts1, zero), 16), 16)), divby128);
326  const __m128 floats2 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_slli_epi32(_mm_unpacklo_epi16(shorts2, zero), 16), 16)), divby128);
327  const __m128 floats3 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_slli_epi32(_mm_unpackhi_epi16(shorts1, zero), 16), 16)), divby128);
328  const __m128 floats4 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_slli_epi32(_mm_unpackhi_epi16(shorts2, zero), 16), 16)), divby128);
329  /* Interleave back into correct order, store. */
330  _mm_store_ps(dst, _mm_unpacklo_ps(floats1, floats2));
331  _mm_store_ps(dst+4, _mm_unpackhi_ps(floats1, floats2));
332  _mm_store_ps(dst+8, _mm_unpacklo_ps(floats3, floats4));
333  _mm_store_ps(dst+12, _mm_unpackhi_ps(floats3, floats4));
334  i -= 16; mmsrc--; dst -= 16;
335  }
336 
337  src = (const Sint8 *) mmsrc;
338  }
339 
340  src += 15; dst += 15; /* adjust for any scalar finishing. */
341 
342  /* Finish off any leftovers with scalar operations. */
343  while (i) {
344  *dst = ((float) *src) * DIVBY128;
345  i--; src--; dst--;
346  }
347 
348  cvt->len_cvt *= 4;
349  if (cvt->filters[++cvt->filter_index]) {
350  cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
351  }
352 }
353 
354 static void SDLCALL
355 SDL_Convert_U8_to_F32_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
356 {
357  const Uint8 *src = ((const Uint8 *) (cvt->buf + cvt->len_cvt)) - 1;
358  float *dst = ((float *) (cvt->buf + cvt->len_cvt * 4)) - 1;
359  int i;
360 
361  LOG_DEBUG_CONVERT("AUDIO_U8", "AUDIO_F32 (using SSE2)");
362 
363  /* Get dst aligned to 16 bytes (since buffer is growing, we don't have to worry about overreading from src) */
364  for (i = cvt->len_cvt; i && (((size_t) (dst-15)) & 15); --i, --src, --dst) {
365  *dst = (((float) *src) * DIVBY128) - 1.0f;
366  }
367 
368  src -= 15; dst -= 15; /* adjust to read SSE blocks from the start. */
369  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
370 
371  /* Make sure src is aligned too. */
372  if ((((size_t) src) & 15) == 0) {
373  /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
374  const __m128i *mmsrc = (const __m128i *) src;
375  const __m128i zero = _mm_setzero_si128();
376  const __m128 divby128 = _mm_set1_ps(DIVBY128);
377  const __m128 minus1 = _mm_set1_ps(-1.0f);
378  while (i >= 16) { /* 16 * 8-bit */
379  const __m128i bytes = _mm_load_si128(mmsrc); /* get 16 uint8 into an XMM register. */
380  /* treat as int16, shift left to clear every other sint16, then back right with zero-extend. Now uint16. */
381  const __m128i shorts1 = _mm_srli_epi16(_mm_slli_epi16(bytes, 8), 8);
382  /* right-shift-zero-extend gets us uint16 with the other set of values. */
383  const __m128i shorts2 = _mm_srli_epi16(bytes, 8);
384  /* unpack against zero to make these int32, convert to float, multiply, add. Whew! */
385  /* Note that AVX2 can do floating point multiply+add in one instruction, fwiw. SSE2 cannot. */
386  const __m128 floats1 = _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi16(shorts1, zero)), divby128), minus1);
387  const __m128 floats2 = _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi16(shorts2, zero)), divby128), minus1);
388  const __m128 floats3 = _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi16(shorts1, zero)), divby128), minus1);
389  const __m128 floats4 = _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi16(shorts2, zero)), divby128), minus1);
390  /* Interleave back into correct order, store. */
391  _mm_store_ps(dst, _mm_unpacklo_ps(floats1, floats2));
392  _mm_store_ps(dst+4, _mm_unpackhi_ps(floats1, floats2));
393  _mm_store_ps(dst+8, _mm_unpacklo_ps(floats3, floats4));
394  _mm_store_ps(dst+12, _mm_unpackhi_ps(floats3, floats4));
395  i -= 16; mmsrc--; dst -= 16;
396  }
397 
398  src = (const Uint8 *) mmsrc;
399  }
400 
401  src += 15; dst += 15; /* adjust for any scalar finishing. */
402 
403  /* Finish off any leftovers with scalar operations. */
404  while (i) {
405  *dst = (((float) *src) * DIVBY128) - 1.0f;
406  i--; src--; dst--;
407  }
408 
409  cvt->len_cvt *= 4;
410  if (cvt->filters[++cvt->filter_index]) {
411  cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
412  }
413 }
414 
415 static void SDLCALL
416 SDL_Convert_S16_to_F32_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
417 {
418  const Sint16 *src = ((const Sint16 *) (cvt->buf + cvt->len_cvt)) - 1;
419  float *dst = ((float *) (cvt->buf + cvt->len_cvt * 2)) - 1;
420  int i;
421 
422  LOG_DEBUG_CONVERT("AUDIO_S16", "AUDIO_F32 (using SSE2)");
423 
424  /* Get dst aligned to 16 bytes (since buffer is growing, we don't have to worry about overreading from src) */
425  for (i = cvt->len_cvt / sizeof (Sint16); i && (((size_t) (dst-7)) & 15); --i, --src, --dst) {
426  *dst = ((float) *src) * DIVBY32768;
427  }
428 
429  src -= 7; dst -= 7; /* adjust to read SSE blocks from the start. */
430  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
431 
432  /* Make sure src is aligned too. */
433  if ((((size_t) src) & 15) == 0) {
434  /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
435  const __m128 divby32768 = _mm_set1_ps(DIVBY32768);
436  while (i >= 8) { /* 8 * 16-bit */
437  const __m128i ints = _mm_load_si128((__m128i const *) src); /* get 8 sint16 into an XMM register. */
438  /* treat as int32, shift left to clear every other sint16, then back right with sign-extend. Now sint32. */
439  const __m128i a = _mm_srai_epi32(_mm_slli_epi32(ints, 16), 16);
440  /* right-shift-sign-extend gets us sint32 with the other set of values. */
441  const __m128i b = _mm_srai_epi32(ints, 16);
442  /* Interleave these back into the right order, convert to float, multiply, store. */
443  _mm_store_ps(dst, _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi32(a, b)), divby32768));
444  _mm_store_ps(dst+4, _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi32(a, b)), divby32768));
445  i -= 8; src -= 8; dst -= 8;
446  }
447  }
448 
449  src += 7; dst += 7; /* adjust for any scalar finishing. */
450 
451  /* Finish off any leftovers with scalar operations. */
452  while (i) {
453  *dst = ((float) *src) * DIVBY32768;
454  i--; src--; dst--;
455  }
456 
457  cvt->len_cvt *= 2;
458  if (cvt->filters[++cvt->filter_index]) {
459  cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
460  }
461 }
462 
463 static void SDLCALL
464 SDL_Convert_U16_to_F32_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
465 {
466  const Uint16 *src = ((const Uint16 *) (cvt->buf + cvt->len_cvt)) - 1;
467  float *dst = ((float *) (cvt->buf + cvt->len_cvt * 2)) - 1;
468  int i;
469 
470  LOG_DEBUG_CONVERT("AUDIO_U16", "AUDIO_F32 (using SSE2)");
471 
472  /* Get dst aligned to 16 bytes (since buffer is growing, we don't have to worry about overreading from src) */
473  for (i = cvt->len_cvt / sizeof (Sint16); i && (((size_t) (dst-7)) & 15); --i, --src, --dst) {
474  *dst = (((float) *src) * DIVBY32768) - 1.0f;
475  }
476 
477  src -= 7; dst -= 7; /* adjust to read SSE blocks from the start. */
478  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
479 
480  /* Make sure src is aligned too. */
481  if ((((size_t) src) & 15) == 0) {
482  /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
483  const __m128 divby32768 = _mm_set1_ps(DIVBY32768);
484  const __m128 minus1 = _mm_set1_ps(-1.0f);
485  while (i >= 8) { /* 8 * 16-bit */
486  const __m128i ints = _mm_load_si128((__m128i const *) src); /* get 8 sint16 into an XMM register. */
487  /* treat as int32, shift left to clear every other sint16, then back right with zero-extend. Now sint32. */
488  const __m128i a = _mm_srli_epi32(_mm_slli_epi32(ints, 16), 16);
489  /* right-shift-sign-extend gets us sint32 with the other set of values. */
490  const __m128i b = _mm_srli_epi32(ints, 16);
491  /* Interleave these back into the right order, convert to float, multiply, store. */
492  _mm_store_ps(dst, _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi32(a, b)), divby32768), minus1));
493  _mm_store_ps(dst+4, _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi32(a, b)), divby32768), minus1));
494  i -= 8; src -= 8; dst -= 8;
495  }
496  }
497 
498  src += 7; dst += 7; /* adjust for any scalar finishing. */
499 
500  /* Finish off any leftovers with scalar operations. */
501  while (i) {
502  *dst = (((float) *src) * DIVBY32768) - 1.0f;
503  i--; src--; dst--;
504  }
505 
506  cvt->len_cvt *= 2;
507  if (cvt->filters[++cvt->filter_index]) {
508  cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
509  }
510 }
511 
512 static void SDLCALL
513 SDL_Convert_S32_to_F32_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
514 {
515  const Sint32 *src = (const Sint32 *) cvt->buf;
516  float *dst = (float *) cvt->buf;
517  int i;
518 
519  LOG_DEBUG_CONVERT("AUDIO_S32", "AUDIO_F32 (using SSE2)");
520 
521  /* Get dst aligned to 16 bytes */
522  for (i = cvt->len_cvt / sizeof (Sint32); i && (((size_t) dst) & 15); --i, ++src, ++dst) {
523  *dst = ((float) (*src>>8)) * DIVBY8388607;
524  }
525 
526  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
527 
528  /* Make sure src is aligned too. */
529  if ((((size_t) src) & 15) == 0) {
530  /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
531  const __m128 divby8388607 = _mm_set1_ps(DIVBY8388607);
532  const __m128i *mmsrc = (const __m128i *) src;
533  while (i >= 4) { /* 4 * sint32 */
534  /* shift out lowest bits so int fits in a float32. Small precision loss, but much faster. */
535  _mm_store_ps(dst, _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_load_si128(mmsrc), 8)), divby8388607));
536  i -= 4; mmsrc++; dst += 4;
537  }
538  src = (const Sint32 *) mmsrc;
539  }
540 
541  /* Finish off any leftovers with scalar operations. */
542  while (i) {
543  *dst = ((float) (*src>>8)) * DIVBY8388607;
544  i--; src++; dst++;
545  }
546 
547  if (cvt->filters[++cvt->filter_index]) {
548  cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
549  }
550 }
551 
552 static void SDLCALL
553 SDL_Convert_F32_to_S8_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
554 {
555  const float *src = (const float *) cvt->buf;
556  Sint8 *dst = (Sint8 *) cvt->buf;
557  int i;
558 
559  LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_S8 (using SSE2)");
560 
561  /* Get dst aligned to 16 bytes */
562  for (i = cvt->len_cvt / sizeof (float); i && (((size_t) dst) & 15); --i, ++src, ++dst) {
563  const float sample = *src;
564  if (sample >= 1.0f) {
565  *dst = 127;
566  } else if (sample <= -1.0f) {
567  *dst = -128;
568  } else {
569  *dst = (Sint8)(sample * 127.0f);
570  }
571  }
572 
573  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
574 
575  /* Make sure src is aligned too. */
576  if ((((size_t) src) & 15) == 0) {
577  /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
578  const __m128 one = _mm_set1_ps(1.0f);
579  const __m128 negone = _mm_set1_ps(-1.0f);
580  const __m128 mulby127 = _mm_set1_ps(127.0f);
581  __m128i *mmdst = (__m128i *) dst;
582  while (i >= 16) { /* 16 * float32 */
583  const __m128i ints1 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src)), one), mulby127)); /* load 4 floats, clamp, convert to sint32 */
584  const __m128i ints2 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src+4)), one), mulby127)); /* load 4 floats, clamp, convert to sint32 */
585  const __m128i ints3 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src+8)), one), mulby127)); /* load 4 floats, clamp, convert to sint32 */
586  const __m128i ints4 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src+12)), one), mulby127)); /* load 4 floats, clamp, convert to sint32 */
587  _mm_store_si128(mmdst, _mm_packs_epi16(_mm_packs_epi32(ints1, ints2), _mm_packs_epi32(ints3, ints4))); /* pack down, store out. */
588  i -= 16; src += 16; mmdst++;
589  }
590  dst = (Sint8 *) mmdst;
591  }
592 
593  /* Finish off any leftovers with scalar operations. */
594  while (i) {
595  const float sample = *src;
596  if (sample >= 1.0f) {
597  *dst = 127;
598  } else if (sample <= -1.0f) {
599  *dst = -128;
600  } else {
601  *dst = (Sint8)(sample * 127.0f);
602  }
603  i--; src++; dst++;
604  }
605 
606  cvt->len_cvt /= 4;
607  if (cvt->filters[++cvt->filter_index]) {
608  cvt->filters[cvt->filter_index](cvt, AUDIO_S8);
609  }
610 }
611 
612 static void SDLCALL
613 SDL_Convert_F32_to_U8_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
614 {
615  const float *src = (const float *) cvt->buf;
616  Uint8 *dst = cvt->buf;
617  int i;
618 
619  LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_U8 (using SSE2)");
620 
621  /* Get dst aligned to 16 bytes */
622  for (i = cvt->len_cvt / sizeof (float); i && (((size_t) dst) & 15); --i, ++src, ++dst) {
623  const float sample = *src;
624  if (sample >= 1.0f) {
625  *dst = 255;
626  } else if (sample <= -1.0f) {
627  *dst = 0;
628  } else {
629  *dst = (Uint8)((sample + 1.0f) * 127.0f);
630  }
631  }
632 
633  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
634 
635  /* Make sure src is aligned too. */
636  if ((((size_t) src) & 15) == 0) {
637  /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
638  const __m128 one = _mm_set1_ps(1.0f);
639  const __m128 negone = _mm_set1_ps(-1.0f);
640  const __m128 mulby127 = _mm_set1_ps(127.0f);
641  __m128i *mmdst = (__m128i *) dst;
642  while (i >= 16) { /* 16 * float32 */
643  const __m128i ints1 = _mm_cvtps_epi32(_mm_mul_ps(_mm_add_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src)), one), one), mulby127)); /* load 4 floats, clamp, convert to sint32 */
644  const __m128i ints2 = _mm_cvtps_epi32(_mm_mul_ps(_mm_add_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src+4)), one), one), mulby127)); /* load 4 floats, clamp, convert to sint32 */
645  const __m128i ints3 = _mm_cvtps_epi32(_mm_mul_ps(_mm_add_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src+8)), one), one), mulby127)); /* load 4 floats, clamp, convert to sint32 */
646  const __m128i ints4 = _mm_cvtps_epi32(_mm_mul_ps(_mm_add_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src+12)), one), one), mulby127)); /* load 4 floats, clamp, convert to sint32 */
647  _mm_store_si128(mmdst, _mm_packus_epi16(_mm_packs_epi32(ints1, ints2), _mm_packs_epi32(ints3, ints4))); /* pack down, store out. */
648  i -= 16; src += 16; mmdst++;
649  }
650  dst = (Uint8 *) mmdst;
651  }
652 
653  /* Finish off any leftovers with scalar operations. */
654  while (i) {
655  const float sample = *src;
656  if (sample >= 1.0f) {
657  *dst = 255;
658  } else if (sample <= -1.0f) {
659  *dst = 0;
660  } else {
661  *dst = (Uint8)((sample + 1.0f) * 127.0f);
662  }
663  i--; src++; dst++;
664  }
665 
666  cvt->len_cvt /= 4;
667  if (cvt->filters[++cvt->filter_index]) {
668  cvt->filters[cvt->filter_index](cvt, AUDIO_U8);
669  }
670 }
671 
672 static void SDLCALL
673 SDL_Convert_F32_to_S16_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
674 {
675  const float *src = (const float *) cvt->buf;
676  Sint16 *dst = (Sint16 *) cvt->buf;
677  int i;
678 
679  LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_S16 (using SSE2)");
680 
681  /* Get dst aligned to 16 bytes */
682  for (i = cvt->len_cvt / sizeof (float); i && (((size_t) dst) & 15); --i, ++src, ++dst) {
683  const float sample = *src;
684  if (sample >= 1.0f) {
685  *dst = 32767;
686  } else if (sample <= -1.0f) {
687  *dst = -32768;
688  } else {
689  *dst = (Sint16)(sample * 32767.0f);
690  }
691  }
692 
693  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
694 
695  /* Make sure src is aligned too. */
696  if ((((size_t) src) & 15) == 0) {
697  /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
698  const __m128 one = _mm_set1_ps(1.0f);
699  const __m128 negone = _mm_set1_ps(-1.0f);
700  const __m128 mulby32767 = _mm_set1_ps(32767.0f);
701  __m128i *mmdst = (__m128i *) dst;
702  while (i >= 8) { /* 8 * float32 */
703  const __m128i ints1 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src)), one), mulby32767)); /* load 4 floats, clamp, convert to sint32 */
704  const __m128i ints2 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src+4)), one), mulby32767)); /* load 4 floats, clamp, convert to sint32 */
705  _mm_store_si128(mmdst, _mm_packs_epi32(ints1, ints2)); /* pack to sint16, store out. */
706  i -= 8; src += 8; mmdst++;
707  }
708  dst = (Sint16 *) mmdst;
709  }
710 
711  /* Finish off any leftovers with scalar operations. */
712  while (i) {
713  const float sample = *src;
714  if (sample >= 1.0f) {
715  *dst = 32767;
716  } else if (sample <= -1.0f) {
717  *dst = -32768;
718  } else {
719  *dst = (Sint16)(sample * 32767.0f);
720  }
721  i--; src++; dst++;
722  }
723 
724  cvt->len_cvt /= 2;
725  if (cvt->filters[++cvt->filter_index]) {
726  cvt->filters[cvt->filter_index](cvt, AUDIO_S16SYS);
727  }
728 }
729 
730 static void SDLCALL
731 SDL_Convert_F32_to_U16_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
732 {
733  const float *src = (const float *) cvt->buf;
734  Uint16 *dst = (Uint16 *) cvt->buf;
735  int i;
736 
737  LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_U16 (using SSE2)");
738 
739  /* Get dst aligned to 16 bytes */
740  for (i = cvt->len_cvt / sizeof (float); i && (((size_t) dst) & 15); --i, ++src, ++dst) {
741  const float sample = *src;
742  if (sample >= 1.0f) {
743  *dst = 65535;
744  } else if (sample <= -1.0f) {
745  *dst = 0;
746  } else {
747  *dst = (Uint16)((sample + 1.0f) * 32767.0f);
748  }
749  }
750 
751  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
752 
753  /* Make sure src is aligned too. */
754  if ((((size_t) src) & 15) == 0) {
755  /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
756  /* This calculates differently than the scalar path because SSE2 can't
757  pack int32 data down to unsigned int16. _mm_packs_epi32 does signed
758  saturation, so that would corrupt our data. _mm_packus_epi32 exists,
759  but not before SSE 4.1. So we convert from float to sint16, packing
760  that down with legit signed saturation, and then xor the top bit
761  against 1. This results in the correct unsigned 16-bit value, even
762  though it looks like dark magic. */
763  const __m128 mulby32767 = _mm_set1_ps(32767.0f);
764  const __m128i topbit = _mm_set1_epi16(-32768);
765  const __m128 one = _mm_set1_ps(1.0f);
766  const __m128 negone = _mm_set1_ps(-1.0f);
767  __m128i *mmdst = (__m128i *) dst;
768  while (i >= 8) { /* 8 * float32 */
769  const __m128i ints1 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src)), one), mulby32767)); /* load 4 floats, clamp, convert to sint32 */
770  const __m128i ints2 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src+4)), one), mulby32767)); /* load 4 floats, clamp, convert to sint32 */
771  _mm_store_si128(mmdst, _mm_xor_si128(_mm_packs_epi32(ints1, ints2), topbit)); /* pack to sint16, xor top bit, store out. */
772  i -= 8; src += 8; mmdst++;
773  }
774  dst = (Uint16 *) mmdst;
775  }
776 
777  /* Finish off any leftovers with scalar operations. */
778  while (i) {
779  const float sample = *src;
780  if (sample >= 1.0f) {
781  *dst = 65535;
782  } else if (sample <= -1.0f) {
783  *dst = 0;
784  } else {
785  *dst = (Uint16)((sample + 1.0f) * 32767.0f);
786  }
787  i--; src++; dst++;
788  }
789 
790  cvt->len_cvt /= 2;
791  if (cvt->filters[++cvt->filter_index]) {
792  cvt->filters[cvt->filter_index](cvt, AUDIO_U16SYS);
793  }
794 }
795 
796 static void SDLCALL
797 SDL_Convert_F32_to_S32_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
798 {
799  const float *src = (const float *) cvt->buf;
800  Sint32 *dst = (Sint32 *) cvt->buf;
801  int i;
802 
803  LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_S32 (using SSE2)");
804 
805  /* Get dst aligned to 16 bytes */
806  for (i = cvt->len_cvt / sizeof (float); i && (((size_t) dst) & 15); --i, ++src, ++dst) {
807  const float sample = *src;
808  if (sample >= 1.0f) {
809  *dst = 2147483647;
810  } else if (sample <= -1.0f) {
811  *dst = (Sint32) -2147483648LL;
812  } else {
813  *dst = ((Sint32)(sample * 8388607.0f)) << 8;
814  }
815  }
816 
817  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
818  SDL_assert(!i || ((((size_t) src) & 15) == 0));
819 
820  {
821  /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
822  const __m128 one = _mm_set1_ps(1.0f);
823  const __m128 negone = _mm_set1_ps(-1.0f);
824  const __m128 mulby8388607 = _mm_set1_ps(8388607.0f);
825  __m128i *mmdst = (__m128i *) dst;
826  while (i >= 4) { /* 4 * float32 */
827  _mm_store_si128(mmdst, _mm_slli_epi32(_mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src)), one), mulby8388607)), 8)); /* load 4 floats, clamp, convert to sint32 */
828  i -= 4; src += 4; mmdst++;
829  }
830  dst = (Sint32 *) mmdst;
831  }
832 
833  /* Finish off any leftovers with scalar operations. */
834  while (i) {
835  const float sample = *src;
836  if (sample >= 1.0f) {
837  *dst = 2147483647;
838  } else if (sample <= -1.0f) {
839  *dst = (Sint32) -2147483648LL;
840  } else {
841  *dst = ((Sint32)(sample * 8388607.0f)) << 8;
842  }
843  i--; src++; dst++;
844  }
845 
846  if (cvt->filters[++cvt->filter_index]) {
847  cvt->filters[cvt->filter_index](cvt, AUDIO_S32SYS);
848  }
849 }
850 #endif
851 
852 
853 #if HAVE_NEON_INTRINSICS
854 static void SDLCALL
855 SDL_Convert_S8_to_F32_NEON(SDL_AudioCVT *cvt, SDL_AudioFormat format)
856 {
857  const Sint8 *src = ((const Sint8 *) (cvt->buf + cvt->len_cvt)) - 1;
858  float *dst = ((float *) (cvt->buf + cvt->len_cvt * 4)) - 1;
859  int i;
860 
861  LOG_DEBUG_CONVERT("AUDIO_S8", "AUDIO_F32 (using NEON)");
862 
863  /* Get dst aligned to 16 bytes (since buffer is growing, we don't have to worry about overreading from src) */
864  for (i = cvt->len_cvt; i && (((size_t) (dst-15)) & 15); --i, --src, --dst) {
865  *dst = ((float) *src) * DIVBY128;
866  }
867 
868  src -= 15; dst -= 15; /* adjust to read NEON blocks from the start. */
869  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
870 
871  /* Make sure src is aligned too. */
872  if ((((size_t) src) & 15) == 0) {
873  /* Aligned! Do NEON blocks as long as we have 16 bytes available. */
874  const int8_t *mmsrc = (const int8_t *) src;
875  const float32x4_t divby128 = vdupq_n_f32(DIVBY128);
876  while (i >= 16) { /* 16 * 8-bit */
877  const int8x16_t bytes = vld1q_s8(mmsrc); /* get 16 sint8 into a NEON register. */
878  const int16x8_t int16hi = vmovl_s8(vget_high_s8(bytes)); /* convert top 8 bytes to 8 int16 */
879  const int16x8_t int16lo = vmovl_s8(vget_low_s8(bytes)); /* convert bottom 8 bytes to 8 int16 */
880  /* split int16 to two int32, then convert to float, then multiply to normalize, store. */
881  vst1q_f32(dst, vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_low_s16(int16lo))), divby128));
882  vst1q_f32(dst+4, vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_high_s16(int16lo))), divby128));
883  vst1q_f32(dst+8, vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_low_s16(int16hi))), divby128));
884  vst1q_f32(dst+12, vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_high_s16(int16hi))), divby128));
885  i -= 16; mmsrc -= 16; dst -= 16;
886  }
887 
888  src = (const Sint8 *) mmsrc;
889  }
890 
891  src += 15; dst += 15; /* adjust for any scalar finishing. */
892 
893  /* Finish off any leftovers with scalar operations. */
894  while (i) {
895  *dst = ((float) *src) * DIVBY128;
896  i--; src--; dst--;
897  }
898 
899  cvt->len_cvt *= 4;
900  if (cvt->filters[++cvt->filter_index]) {
901  cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
902  }
903 }
904 
905 static void SDLCALL
906 SDL_Convert_U8_to_F32_NEON(SDL_AudioCVT *cvt, SDL_AudioFormat format)
907 {
908  const Uint8 *src = ((const Uint8 *) (cvt->buf + cvt->len_cvt)) - 1;
909  float *dst = ((float *) (cvt->buf + cvt->len_cvt * 4)) - 1;
910  int i;
911 
912  LOG_DEBUG_CONVERT("AUDIO_U8", "AUDIO_F32 (using NEON)");
913 
914  /* Get dst aligned to 16 bytes (since buffer is growing, we don't have to worry about overreading from src) */
915  for (i = cvt->len_cvt; i && (((size_t) (dst-15)) & 15); --i, --src, --dst) {
916  *dst = (((float) *src) * DIVBY128) - 1.0f;
917  }
918 
919  src -= 15; dst -= 15; /* adjust to read NEON blocks from the start. */
920  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
921 
922  /* Make sure src is aligned too. */
923  if ((((size_t) src) & 15) == 0) {
924  /* Aligned! Do NEON blocks as long as we have 16 bytes available. */
925  const uint8_t *mmsrc = (const uint8_t *) src;
926  const float32x4_t divby128 = vdupq_n_f32(DIVBY128);
927  const float32x4_t negone = vdupq_n_f32(-1.0f);
928  while (i >= 16) { /* 16 * 8-bit */
929  const uint8x16_t bytes = vld1q_u8(mmsrc); /* get 16 uint8 into a NEON register. */
930  const uint16x8_t uint16hi = vmovl_u8(vget_high_u8(bytes)); /* convert top 8 bytes to 8 uint16 */
931  const uint16x8_t uint16lo = vmovl_u8(vget_low_u8(bytes)); /* convert bottom 8 bytes to 8 uint16 */
932  /* split uint16 to two uint32, then convert to float, then multiply to normalize, subtract to adjust for sign, store. */
933  vst1q_f32(dst, vmlaq_f32(negone, vcvtq_f32_u32(vmovl_u16(vget_low_u16(uint16lo))), divby128));
934  vst1q_f32(dst+4, vmlaq_f32(negone, vcvtq_f32_u32(vmovl_u16(vget_high_u16(uint16lo))), divby128));
935  vst1q_f32(dst+8, vmlaq_f32(negone, vcvtq_f32_u32(vmovl_u16(vget_low_u16(uint16hi))), divby128));
936  vst1q_f32(dst+12, vmlaq_f32(negone, vcvtq_f32_u32(vmovl_u16(vget_high_u16(uint16hi))), divby128));
937  i -= 16; mmsrc -= 16; dst -= 16;
938  }
939 
940  src = (const Uint8 *) mmsrc;
941  }
942 
943  src += 15; dst += 15; /* adjust for any scalar finishing. */
944 
945  /* Finish off any leftovers with scalar operations. */
946  while (i) {
947  *dst = (((float) *src) * DIVBY128) - 1.0f;
948  i--; src--; dst--;
949  }
950 
951  cvt->len_cvt *= 4;
952  if (cvt->filters[++cvt->filter_index]) {
953  cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
954  }
955 }
956 
957 static void SDLCALL
958 SDL_Convert_S16_to_F32_NEON(SDL_AudioCVT *cvt, SDL_AudioFormat format)
959 {
960  const Sint16 *src = ((const Sint16 *) (cvt->buf + cvt->len_cvt)) - 1;
961  float *dst = ((float *) (cvt->buf + cvt->len_cvt * 2)) - 1;
962  int i;
963 
964  LOG_DEBUG_CONVERT("AUDIO_S16", "AUDIO_F32 (using NEON)");
965 
966  /* Get dst aligned to 16 bytes (since buffer is growing, we don't have to worry about overreading from src) */
967  for (i = cvt->len_cvt / sizeof (Sint16); i && (((size_t) (dst-7)) & 15); --i, --src, --dst) {
968  *dst = ((float) *src) * DIVBY32768;
969  }
970 
971  src -= 7; dst -= 7; /* adjust to read NEON blocks from the start. */
972  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
973 
974  /* Make sure src is aligned too. */
975  if ((((size_t) src) & 15) == 0) {
976  /* Aligned! Do NEON blocks as long as we have 16 bytes available. */
977  const float32x4_t divby32768 = vdupq_n_f32(DIVBY32768);
978  while (i >= 8) { /* 8 * 16-bit */
979  const int16x8_t ints = vld1q_s16((int16_t const *) src); /* get 8 sint16 into a NEON register. */
980  /* split int16 to two int32, then convert to float, then multiply to normalize, store. */
981  vst1q_f32(dst, vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_low_s16(ints))), divby32768));
982  vst1q_f32(dst+4, vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_high_s16(ints))), divby32768));
983  i -= 8; src -= 8; dst -= 8;
984  }
985  }
986 
987  src += 7; dst += 7; /* adjust for any scalar finishing. */
988 
989  /* Finish off any leftovers with scalar operations. */
990  while (i) {
991  *dst = ((float) *src) * DIVBY32768;
992  i--; src--; dst--;
993  }
994 
995  cvt->len_cvt *= 2;
996  if (cvt->filters[++cvt->filter_index]) {
997  cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
998  }
999 }
1000 
1001 static void SDLCALL
1002 SDL_Convert_U16_to_F32_NEON(SDL_AudioCVT *cvt, SDL_AudioFormat format)
1003 {
1004  const Uint16 *src = ((const Uint16 *) (cvt->buf + cvt->len_cvt)) - 1;
1005  float *dst = ((float *) (cvt->buf + cvt->len_cvt * 2)) - 1;
1006  int i;
1007 
1008  LOG_DEBUG_CONVERT("AUDIO_U16", "AUDIO_F32 (using NEON)");
1009 
1010  /* Get dst aligned to 16 bytes (since buffer is growing, we don't have to worry about overreading from src) */
1011  for (i = cvt->len_cvt / sizeof (Sint16); i && (((size_t) (dst-7)) & 15); --i, --src, --dst) {
1012  *dst = (((float) *src) * DIVBY32768) - 1.0f;
1013  }
1014 
1015  src -= 7; dst -= 7; /* adjust to read NEON blocks from the start. */
1016  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
1017 
1018  /* Make sure src is aligned too. */
1019  if ((((size_t) src) & 15) == 0) {
1020  /* Aligned! Do NEON blocks as long as we have 16 bytes available. */
1021  const float32x4_t divby32768 = vdupq_n_f32(DIVBY32768);
1022  const float32x4_t negone = vdupq_n_f32(-1.0f);
1023  while (i >= 8) { /* 8 * 16-bit */
1024  const uint16x8_t uints = vld1q_u16((uint16_t const *) src); /* get 8 uint16 into a NEON register. */
1025  /* split uint16 to two int32, then convert to float, then multiply to normalize, subtract for sign, store. */
1026  vst1q_f32(dst, vmlaq_f32(negone, vcvtq_f32_u32(vmovl_u16(vget_low_u16(uints))), divby32768));
1027  vst1q_f32(dst+4, vmlaq_f32(negone, vcvtq_f32_u32(vmovl_u16(vget_high_u16(uints))), divby32768));
1028  i -= 8; src -= 8; dst -= 8;
1029  }
1030  }
1031 
1032  src += 7; dst += 7; /* adjust for any scalar finishing. */
1033 
1034  /* Finish off any leftovers with scalar operations. */
1035  while (i) {
1036  *dst = (((float) *src) * DIVBY32768) - 1.0f;
1037  i--; src--; dst--;
1038  }
1039 
1040  cvt->len_cvt *= 2;
1041  if (cvt->filters[++cvt->filter_index]) {
1042  cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
1043  }
1044 }
1045 
1046 static void SDLCALL
1047 SDL_Convert_S32_to_F32_NEON(SDL_AudioCVT *cvt, SDL_AudioFormat format)
1048 {
1049  const Sint32 *src = (const Sint32 *) cvt->buf;
1050  float *dst = (float *) cvt->buf;
1051  int i;
1052 
1053  LOG_DEBUG_CONVERT("AUDIO_S32", "AUDIO_F32 (using NEON)");
1054 
1055  /* Get dst aligned to 16 bytes */
1056  for (i = cvt->len_cvt / sizeof (Sint32); i && (((size_t) dst) & 15); --i, ++src, ++dst) {
1057  *dst = ((float) (*src>>8)) * DIVBY8388607;
1058  }
1059 
1060  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
1061 
1062  /* Make sure src is aligned too. */
1063  if ((((size_t) src) & 15) == 0) {
1064  /* Aligned! Do NEON blocks as long as we have 16 bytes available. */
1065  const float32x4_t divby8388607 = vdupq_n_f32(DIVBY8388607);
1066  const int32_t *mmsrc = (const int32_t *) src;
1067  while (i >= 4) { /* 4 * sint32 */
1068  /* shift out lowest bits so int fits in a float32. Small precision loss, but much faster. */
1069  vst1q_f32(dst, vmulq_f32(vcvtq_f32_s32(vshrq_n_s32(vld1q_s32(mmsrc), 8)), divby8388607));
1070  i -= 4; mmsrc += 4; dst += 4;
1071  }
1072  src = (const Sint32 *) mmsrc;
1073  }
1074 
1075  /* Finish off any leftovers with scalar operations. */
1076  while (i) {
1077  *dst = ((float) (*src>>8)) * DIVBY8388607;
1078  i--; src++; dst++;
1079  }
1080 
1081  if (cvt->filters[++cvt->filter_index]) {
1082  cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
1083  }
1084 }
1085 
1086 static void SDLCALL
1087 SDL_Convert_F32_to_S8_NEON(SDL_AudioCVT *cvt, SDL_AudioFormat format)
1088 {
1089  const float *src = (const float *) cvt->buf;
1090  Sint8 *dst = (Sint8 *) cvt->buf;
1091  int i;
1092 
1093  LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_S8 (using NEON)");
1094 
1095  /* Get dst aligned to 16 bytes */
1096  for (i = cvt->len_cvt / sizeof (float); i && (((size_t) dst) & 15); --i, ++src, ++dst) {
1097  const float sample = *src;
1098  if (sample >= 1.0f) {
1099  *dst = 127;
1100  } else if (sample <= -1.0f) {
1101  *dst = -128;
1102  } else {
1103  *dst = (Sint8)(sample * 127.0f);
1104  }
1105  }
1106 
1107  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
1108 
1109  /* Make sure src is aligned too. */
1110  if ((((size_t) src) & 15) == 0) {
1111  /* Aligned! Do NEON blocks as long as we have 16 bytes available. */
1112  const float32x4_t one = vdupq_n_f32(1.0f);
1113  const float32x4_t negone = vdupq_n_f32(-1.0f);
1114  const float32x4_t mulby127 = vdupq_n_f32(127.0f);
1115  int8_t *mmdst = (int8_t *) dst;
1116  while (i >= 16) { /* 16 * float32 */
1117  const int32x4_t ints1 = vcvtq_s32_f32(vmulq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src)), one), mulby127)); /* load 4 floats, clamp, convert to sint32 */
1118  const int32x4_t ints2 = vcvtq_s32_f32(vmulq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src+4)), one), mulby127)); /* load 4 floats, clamp, convert to sint32 */
1119  const int32x4_t ints3 = vcvtq_s32_f32(vmulq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src+8)), one), mulby127)); /* load 4 floats, clamp, convert to sint32 */
1120  const int32x4_t ints4 = vcvtq_s32_f32(vmulq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src+12)), one), mulby127)); /* load 4 floats, clamp, convert to sint32 */
1121  const int8x8_t i8lo = vmovn_s16(vcombine_s16(vmovn_s32(ints1), vmovn_s32(ints2))); /* narrow to sint16, combine, narrow to sint8 */
1122  const int8x8_t i8hi = vmovn_s16(vcombine_s16(vmovn_s32(ints3), vmovn_s32(ints4))); /* narrow to sint16, combine, narrow to sint8 */
1123  vst1q_s8(mmdst, vcombine_s8(i8lo, i8hi)); /* combine to int8x16_t, store out */
1124  i -= 16; src += 16; mmdst += 16;
1125  }
1126  dst = (Sint8 *) mmdst;
1127  }
1128 
1129  /* Finish off any leftovers with scalar operations. */
1130  while (i) {
1131  const float sample = *src;
1132  if (sample >= 1.0f) {
1133  *dst = 127;
1134  } else if (sample <= -1.0f) {
1135  *dst = -128;
1136  } else {
1137  *dst = (Sint8)(sample * 127.0f);
1138  }
1139  i--; src++; dst++;
1140  }
1141 
1142  cvt->len_cvt /= 4;
1143  if (cvt->filters[++cvt->filter_index]) {
1144  cvt->filters[cvt->filter_index](cvt, AUDIO_S8);
1145  }
1146 }
1147 
1148 static void SDLCALL
1149 SDL_Convert_F32_to_U8_NEON(SDL_AudioCVT *cvt, SDL_AudioFormat format)
1150 {
1151  const float *src = (const float *) cvt->buf;
1152  Uint8 *dst = (Uint8 *) cvt->buf;
1153  int i;
1154 
1155  LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_U8 (using NEON)");
1156 
1157  /* Get dst aligned to 16 bytes */
1158  for (i = cvt->len_cvt / sizeof (float); i && (((size_t) dst) & 15); --i, ++src, ++dst) {
1159  const float sample = *src;
1160  if (sample >= 1.0f) {
1161  *dst = 255;
1162  } else if (sample <= -1.0f) {
1163  *dst = 0;
1164  } else {
1165  *dst = (Uint8)((sample + 1.0f) * 127.0f);
1166  }
1167  }
1168 
1169  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
1170 
1171  /* Make sure src is aligned too. */
1172  if ((((size_t) src) & 15) == 0) {
1173  /* Aligned! Do NEON blocks as long as we have 16 bytes available. */
1174  const float32x4_t one = vdupq_n_f32(1.0f);
1175  const float32x4_t negone = vdupq_n_f32(-1.0f);
1176  const float32x4_t mulby127 = vdupq_n_f32(127.0f);
1177  uint8_t *mmdst = (uint8_t *) dst;
1178  while (i >= 16) { /* 16 * float32 */
1179  const uint32x4_t uints1 = vcvtq_u32_f32(vmulq_f32(vaddq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src)), one), one), mulby127)); /* load 4 floats, clamp, convert to uint32 */
1180  const uint32x4_t uints2 = vcvtq_u32_f32(vmulq_f32(vaddq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src+4)), one), one), mulby127)); /* load 4 floats, clamp, convert to uint32 */
1181  const uint32x4_t uints3 = vcvtq_u32_f32(vmulq_f32(vaddq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src+8)), one), one), mulby127)); /* load 4 floats, clamp, convert to uint32 */
1182  const uint32x4_t uints4 = vcvtq_u32_f32(vmulq_f32(vaddq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src+12)), one), one), mulby127)); /* load 4 floats, clamp, convert to uint32 */
1183  const uint8x8_t ui8lo = vmovn_u16(vcombine_u16(vmovn_u32(uints1), vmovn_u32(uints2))); /* narrow to uint16, combine, narrow to uint8 */
1184  const uint8x8_t ui8hi = vmovn_u16(vcombine_u16(vmovn_u32(uints3), vmovn_u32(uints4))); /* narrow to uint16, combine, narrow to uint8 */
1185  vst1q_u8(mmdst, vcombine_u8(ui8lo, ui8hi)); /* combine to uint8x16_t, store out */
1186  i -= 16; src += 16; mmdst += 16;
1187  }
1188 
1189  dst = (Uint8 *) mmdst;
1190  }
1191 
1192  /* Finish off any leftovers with scalar operations. */
1193  while (i) {
1194  const float sample = *src;
1195  if (sample >= 1.0f) {
1196  *dst = 255;
1197  } else if (sample <= -1.0f) {
1198  *dst = 0;
1199  } else {
1200  *dst = (Uint8)((sample + 1.0f) * 127.0f);
1201  }
1202  i--; src++; dst++;
1203  }
1204 
1205  cvt->len_cvt /= 4;
1206  if (cvt->filters[++cvt->filter_index]) {
1207  cvt->filters[cvt->filter_index](cvt, AUDIO_U8);
1208  }
1209 }
1210 
1211 static void SDLCALL
1212 SDL_Convert_F32_to_S16_NEON(SDL_AudioCVT *cvt, SDL_AudioFormat format)
1213 {
1214  const float *src = (const float *) cvt->buf;
1215  Sint16 *dst = (Sint16 *) cvt->buf;
1216  int i;
1217 
1218  LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_S16 (using NEON)");
1219 
1220  /* Get dst aligned to 16 bytes */
1221  for (i = cvt->len_cvt / sizeof (float); i && (((size_t) dst) & 15); --i, ++src, ++dst) {
1222  const float sample = *src;
1223  if (sample >= 1.0f) {
1224  *dst = 32767;
1225  } else if (sample <= -1.0f) {
1226  *dst = -32768;
1227  } else {
1228  *dst = (Sint16)(sample * 32767.0f);
1229  }
1230  }
1231 
1232  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
1233 
1234  /* Make sure src is aligned too. */
1235  if ((((size_t) src) & 15) == 0) {
1236  /* Aligned! Do NEON blocks as long as we have 16 bytes available. */
1237  const float32x4_t one = vdupq_n_f32(1.0f);
1238  const float32x4_t negone = vdupq_n_f32(-1.0f);
1239  const float32x4_t mulby32767 = vdupq_n_f32(32767.0f);
1240  int16_t *mmdst = (int16_t *) dst;
1241  while (i >= 8) { /* 8 * float32 */
1242  const int32x4_t ints1 = vcvtq_s32_f32(vmulq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src)), one), mulby32767)); /* load 4 floats, clamp, convert to sint32 */
1243  const int32x4_t ints2 = vcvtq_s32_f32(vmulq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src+4)), one), mulby32767)); /* load 4 floats, clamp, convert to sint32 */
1244  vst1q_s16(mmdst, vcombine_s16(vmovn_s32(ints1), vmovn_s32(ints2))); /* narrow to sint16, combine, store out. */
1245  i -= 8; src += 8; mmdst += 8;
1246  }
1247  dst = (Sint16 *) mmdst;
1248  }
1249 
1250  /* Finish off any leftovers with scalar operations. */
1251  while (i) {
1252  const float sample = *src;
1253  if (sample >= 1.0f) {
1254  *dst = 32767;
1255  } else if (sample <= -1.0f) {
1256  *dst = -32768;
1257  } else {
1258  *dst = (Sint16)(sample * 32767.0f);
1259  }
1260  i--; src++; dst++;
1261  }
1262 
1263  cvt->len_cvt /= 2;
1264  if (cvt->filters[++cvt->filter_index]) {
1265  cvt->filters[cvt->filter_index](cvt, AUDIO_S16SYS);
1266  }
1267 }
1268 
1269 static void SDLCALL
1270 SDL_Convert_F32_to_U16_NEON(SDL_AudioCVT *cvt, SDL_AudioFormat format)
1271 {
1272  const float *src = (const float *) cvt->buf;
1273  Uint16 *dst = (Uint16 *) cvt->buf;
1274  int i;
1275 
1276  LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_U16 (using NEON)");
1277 
1278  /* Get dst aligned to 16 bytes */
1279  for (i = cvt->len_cvt / sizeof (float); i && (((size_t) dst) & 15); --i, ++src, ++dst) {
1280  const float sample = *src;
1281  if (sample >= 1.0f) {
1282  *dst = 65535;
1283  } else if (sample <= -1.0f) {
1284  *dst = 0;
1285  } else {
1286  *dst = (Uint16)((sample + 1.0f) * 32767.0f);
1287  }
1288  }
1289 
1290  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
1291 
1292  /* Make sure src is aligned too. */
1293  if ((((size_t) src) & 15) == 0) {
1294  /* Aligned! Do NEON blocks as long as we have 16 bytes available. */
1295  const float32x4_t one = vdupq_n_f32(1.0f);
1296  const float32x4_t negone = vdupq_n_f32(-1.0f);
1297  const float32x4_t mulby32767 = vdupq_n_f32(32767.0f);
1298  uint16_t *mmdst = (uint16_t *) dst;
1299  while (i >= 8) { /* 8 * float32 */
1300  const uint32x4_t uints1 = vcvtq_u32_f32(vmulq_f32(vaddq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src)), one), one), mulby32767)); /* load 4 floats, clamp, convert to uint32 */
1301  const uint32x4_t uints2 = vcvtq_u32_f32(vmulq_f32(vaddq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src+4)), one), one), mulby32767)); /* load 4 floats, clamp, convert to uint32 */
1302  vst1q_u16(mmdst, vcombine_u16(vmovn_u32(uints1), vmovn_u32(uints2))); /* narrow to uint16, combine, store out. */
1303  i -= 8; src += 8; mmdst += 8;
1304  }
1305  dst = (Uint16 *) mmdst;
1306  }
1307 
1308  /* Finish off any leftovers with scalar operations. */
1309  while (i) {
1310  const float sample = *src;
1311  if (sample >= 1.0f) {
1312  *dst = 65535;
1313  } else if (sample <= -1.0f) {
1314  *dst = 0;
1315  } else {
1316  *dst = (Uint16)((sample + 1.0f) * 32767.0f);
1317  }
1318  i--; src++; dst++;
1319  }
1320 
1321  cvt->len_cvt /= 2;
1322  if (cvt->filters[++cvt->filter_index]) {
1323  cvt->filters[cvt->filter_index](cvt, AUDIO_U16SYS);
1324  }
1325 }
1326 
1327 static void SDLCALL
1328 SDL_Convert_F32_to_S32_NEON(SDL_AudioCVT *cvt, SDL_AudioFormat format)
1329 {
1330  const float *src = (const float *) cvt->buf;
1331  Sint32 *dst = (Sint32 *) cvt->buf;
1332  int i;
1333 
1334  LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_S32 (using NEON)");
1335 
1336  /* Get dst aligned to 16 bytes */
1337  for (i = cvt->len_cvt / sizeof (float); i && (((size_t) dst) & 15); --i, ++src, ++dst) {
1338  const float sample = *src;
1339  if (sample >= 1.0f) {
1340  *dst = 2147483647;
1341  } else if (sample <= -1.0f) {
1342  *dst = (-2147483647) - 1;
1343  } else {
1344  *dst = ((Sint32)(sample * 8388607.0f)) << 8;
1345  }
1346  }
1347 
1348  SDL_assert(!i || ((((size_t) dst) & 15) == 0));
1349  SDL_assert(!i || ((((size_t) src) & 15) == 0));
1350 
1351  {
1352  /* Aligned! Do NEON blocks as long as we have 16 bytes available. */
1353  const float32x4_t one = vdupq_n_f32(1.0f);
1354  const float32x4_t negone = vdupq_n_f32(-1.0f);
1355  const float32x4_t mulby8388607 = vdupq_n_f32(8388607.0f);
1356  int32_t *mmdst = (int32_t *) dst;
1357  while (i >= 4) { /* 4 * float32 */
1358  vst1q_s32(mmdst, vshlq_n_s32(vcvtq_s32_f32(vmulq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src)), one), mulby8388607)), 8));
1359  i -= 4; src += 4; mmdst += 4;
1360  }
1361  dst = (Sint32 *) mmdst;
1362  }
1363 
1364  /* Finish off any leftovers with scalar operations. */
1365  while (i) {
1366  const float sample = *src;
1367  if (sample >= 1.0f) {
1368  *dst = 2147483647;
1369  } else if (sample <= -1.0f) {
1370  *dst = (-2147483647) - 1;
1371  } else {
1372  *dst = ((Sint32)(sample * 8388607.0f)) << 8;
1373  }
1374  i--; src++; dst++;
1375  }
1376 
1377  if (cvt->filters[++cvt->filter_index]) {
1378  cvt->filters[cvt->filter_index](cvt, AUDIO_S32SYS);
1379  }
1380 }
1381 #endif
1382 
1383 
1384 
1386 {
1387  static SDL_bool converters_chosen = SDL_FALSE;
1388 
1389  if (converters_chosen) {
1390  return;
1391  }
1392 
1393 #define SET_CONVERTER_FUNCS(fntype) \
1394  SDL_Convert_S8_to_F32 = SDL_Convert_S8_to_F32_##fntype; \
1395  SDL_Convert_U8_to_F32 = SDL_Convert_U8_to_F32_##fntype; \
1396  SDL_Convert_S16_to_F32 = SDL_Convert_S16_to_F32_##fntype; \
1397  SDL_Convert_U16_to_F32 = SDL_Convert_U16_to_F32_##fntype; \
1398  SDL_Convert_S32_to_F32 = SDL_Convert_S32_to_F32_##fntype; \
1399  SDL_Convert_F32_to_S8 = SDL_Convert_F32_to_S8_##fntype; \
1400  SDL_Convert_F32_to_U8 = SDL_Convert_F32_to_U8_##fntype; \
1401  SDL_Convert_F32_to_S16 = SDL_Convert_F32_to_S16_##fntype; \
1402  SDL_Convert_F32_to_U16 = SDL_Convert_F32_to_U16_##fntype; \
1403  SDL_Convert_F32_to_S32 = SDL_Convert_F32_to_S32_##fntype; \
1404  converters_chosen = SDL_TRUE
1405 
1406 #if HAVE_SSE2_INTRINSICS
1407  if (SDL_HasSSE2()) {
1408  SET_CONVERTER_FUNCS(SSE2);
1409  return;
1410  }
1411 #endif
1412 
1413 #if HAVE_NEON_INTRINSICS
1414  if (SDL_HasNEON()) {
1415  SET_CONVERTER_FUNCS(NEON);
1416  return;
1417  }
1418 #endif
1419 
1420 #if NEED_SCALAR_CONVERTER_FALLBACKS
1421  SET_CONVERTER_FUNCS(Scalar);
1422 #endif
1423 
1424 #undef SET_CONVERTER_FUNCS
1425 
1426  SDL_assert(converters_chosen == SDL_TRUE);
1427 }
1428 
1429 /* vi: set ts=4 sw=4 expandtab: */
#define SDL_assert(condition)
Definition: SDL_assert.h:171
#define AUDIO_S16SYS
Definition: SDL_audio.h:123
Uint16 SDL_AudioFormat
Audio format flags.
Definition: SDL_audio.h:64
#define AUDIO_U8
Definition: SDL_audio.h:89
void(* SDL_AudioFilter)(struct SDL_AudioCVT *cvt, SDL_AudioFormat format)
Definition: SDL_audio.h:193
#define AUDIO_S8
Definition: SDL_audio.h:90
#define AUDIO_S32SYS
Definition: SDL_audio.h:124
#define AUDIO_F32SYS
Definition: SDL_audio.h:125
#define AUDIO_U16SYS
Definition: SDL_audio.h:122
#define LOG_DEBUG_CONVERT(from, to)
Definition: SDL_audio_c.h:34
SDL_AudioFilter SDL_Convert_F32_to_U16
static void SDL_Convert_F32_to_S8_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
SDL_AudioFilter SDL_Convert_U16_to_F32
SDL_AudioFilter SDL_Convert_F32_to_S32
static void SDL_Convert_S8_to_F32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
static void SDL_Convert_F32_to_S16_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
SDL_AudioFilter SDL_Convert_F32_to_S16
static void SDL_Convert_F32_to_U16_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
static void SDL_Convert_F32_to_U8_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
#define DIVBY8388607
static void SDL_Convert_U16_to_F32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
static void SDL_Convert_F32_to_S32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
static void SDL_Convert_U8_to_F32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
static void SDL_Convert_S32_to_F32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
SDL_AudioFilter SDL_Convert_F32_to_U8
#define DIVBY128
SDL_AudioFilter SDL_Convert_F32_to_S8
SDL_AudioFilter SDL_Convert_S8_to_F32
#define SET_CONVERTER_FUNCS(fntype)
static void SDL_Convert_S16_to_F32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
void SDL_ChooseAudioConverters(void)
SDL_AudioFilter SDL_Convert_S32_to_F32
SDL_AudioFilter SDL_Convert_S16_to_F32
SDL_AudioFilter SDL_Convert_U8_to_F32
#define DIVBY32768
signed short int16_t
unsigned short uint16_t
unsigned int size_t
signed int int32_t
unsigned char uint8_t
signed char int8_t
#define SDL_HasNEON
#define SDL_HasSSE2
#define SDLCALL
Definition: SDL_internal.h:49
GLint GLint GLsizei GLsizei GLsizei GLint GLenum format
Definition: SDL_opengl.h:1572
GLboolean GLboolean GLboolean b
GLenum src
GLboolean GLboolean GLboolean GLboolean a
GLenum GLenum dst
uint16_t Uint16
Definition: SDL_stdinc.h:197
int16_t Sint16
Definition: SDL_stdinc.h:191
SDL_bool
Definition: SDL_stdinc.h:168
@ SDL_TRUE
Definition: SDL_stdinc.h:170
@ SDL_FALSE
Definition: SDL_stdinc.h:169
int8_t Sint8
Definition: SDL_stdinc.h:179
uint8_t Uint8
Definition: SDL_stdinc.h:185
int32_t Sint32
Definition: SDL_stdinc.h:203
return Display return Display Bool Bool int int int return Display XEvent Bool(*) XPointer return Display return Display Drawable _Xconst char unsigned int unsigned int return Display Pixmap Pixmap XColor XColor unsigned int unsigned int return Display _Xconst char char int char return Display Visual unsigned int int int char unsigned int unsigned int in i)
Definition: SDL_x11sym.h:50
#define NULL
Definition: begin_code.h:163
static const double zero
Definition: e_atan2.c:44
static const double one
Definition: e_exp.c:83
A structure to hold a set of audio conversion filters and buffers.
Definition: SDL_audio.h:227
Uint8 * buf
Definition: SDL_audio.h:232
int filter_index
Definition: SDL_audio.h:238
SDL_AudioFilter filters[SDL_AUDIOCVT_MAX_FILTERS+1]
Definition: SDL_audio.h:237