3 #include "../../SDL_internal.h"
13 #define PRECISION_FACTOR (1<<PRECISION)
37 #define V(value) (int16_t)((value*PRECISION_FACTOR)+0.5)
46 { 0,
V(1.0),
V(1.402), -
V(0.3441), -
V(0.7141),
V(1.772)},
48 { 16,
V(1.1644),
V(1.596), -
V(0.3918), -
V(0.813),
V(2.0172)},
50 { 16,
V(1.1644),
V(1.7927), -
V(0.2132), -
V(0.5329),
V(2.1124)}
55 { 0, {{
V(0.299),
V(0.587),
V(0.114)}, {-
V(0.1687), -
V(0.3313),
V(0.5)}, {
V(0.5), -
V(0.4187), -
V(0.0813)}}},
57 { 16, {{
V(0.2568),
V(0.5041),
V(0.0979)}, {-
V(0.1482), -
V(0.291),
V(0.4392)}, {
V(0.4392), -
V(0.3678), -
V(0.0714)}}},
59 { 16, {{
V(0.1826),
V(0.6142),
V(0.062)}, {-
V(0.1006), -
V(0.3386),
V(0.4392)}, {
V(0.4392), -
V(0.3989), -
V(0.0403)}}}
63 #define YUV_FORMAT_420 1
64 #define YUV_FORMAT_422 2
65 #define YUV_FORMAT_NV12 3
68 #define RGB_FORMAT_RGB565 1
69 #define RGB_FORMAT_RGB24 2
70 #define RGB_FORMAT_RGBA 3
71 #define RGB_FORMAT_BGRA 4
72 #define RGB_FORMAT_ARGB 5
73 #define RGB_FORMAT_ABGR 6
80 {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
81 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
82 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,
83 47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,
84 91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,
85 126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,
86 159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
87 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,
88 225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,
89 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
90 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
91 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
92 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255
98 #define STD_FUNCTION_NAME yuv420_rgb565_std
99 #define YUV_FORMAT YUV_FORMAT_420
100 #define RGB_FORMAT RGB_FORMAT_RGB565
103 #define STD_FUNCTION_NAME yuv420_rgb24_std
104 #define YUV_FORMAT YUV_FORMAT_420
105 #define RGB_FORMAT RGB_FORMAT_RGB24
108 #define STD_FUNCTION_NAME yuv420_rgba_std
109 #define YUV_FORMAT YUV_FORMAT_420
110 #define RGB_FORMAT RGB_FORMAT_RGBA
113 #define STD_FUNCTION_NAME yuv420_bgra_std
114 #define YUV_FORMAT YUV_FORMAT_420
115 #define RGB_FORMAT RGB_FORMAT_BGRA
118 #define STD_FUNCTION_NAME yuv420_argb_std
119 #define YUV_FORMAT YUV_FORMAT_420
120 #define RGB_FORMAT RGB_FORMAT_ARGB
123 #define STD_FUNCTION_NAME yuv420_abgr_std
124 #define YUV_FORMAT YUV_FORMAT_420
125 #define RGB_FORMAT RGB_FORMAT_ABGR
128 #define STD_FUNCTION_NAME yuv422_rgb565_std
129 #define YUV_FORMAT YUV_FORMAT_422
130 #define RGB_FORMAT RGB_FORMAT_RGB565
133 #define STD_FUNCTION_NAME yuv422_rgb24_std
134 #define YUV_FORMAT YUV_FORMAT_422
135 #define RGB_FORMAT RGB_FORMAT_RGB24
138 #define STD_FUNCTION_NAME yuv422_rgba_std
139 #define YUV_FORMAT YUV_FORMAT_422
140 #define RGB_FORMAT RGB_FORMAT_RGBA
143 #define STD_FUNCTION_NAME yuv422_bgra_std
144 #define YUV_FORMAT YUV_FORMAT_422
145 #define RGB_FORMAT RGB_FORMAT_BGRA
148 #define STD_FUNCTION_NAME yuv422_argb_std
149 #define YUV_FORMAT YUV_FORMAT_422
150 #define RGB_FORMAT RGB_FORMAT_ARGB
153 #define STD_FUNCTION_NAME yuv422_abgr_std
154 #define YUV_FORMAT YUV_FORMAT_422
155 #define RGB_FORMAT RGB_FORMAT_ABGR
158 #define STD_FUNCTION_NAME yuvnv12_rgb565_std
159 #define YUV_FORMAT YUV_FORMAT_NV12
160 #define RGB_FORMAT RGB_FORMAT_RGB565
163 #define STD_FUNCTION_NAME yuvnv12_rgb24_std
164 #define YUV_FORMAT YUV_FORMAT_NV12
165 #define RGB_FORMAT RGB_FORMAT_RGB24
168 #define STD_FUNCTION_NAME yuvnv12_rgba_std
169 #define YUV_FORMAT YUV_FORMAT_NV12
170 #define RGB_FORMAT RGB_FORMAT_RGBA
173 #define STD_FUNCTION_NAME yuvnv12_bgra_std
174 #define YUV_FORMAT YUV_FORMAT_NV12
175 #define RGB_FORMAT RGB_FORMAT_BGRA
178 #define STD_FUNCTION_NAME yuvnv12_argb_std
179 #define YUV_FORMAT YUV_FORMAT_NV12
180 #define RGB_FORMAT RGB_FORMAT_ARGB
183 #define STD_FUNCTION_NAME yuvnv12_abgr_std
184 #define YUV_FORMAT YUV_FORMAT_NV12
185 #define RGB_FORMAT RGB_FORMAT_ABGR
200 *rgb_ptr2=
RGB+(
y+1)*RGB_stride;
203 *y_ptr2=
Y+(
y+1)*Y_stride,
204 *u_ptr=U+(
y/2)*UV_stride,
205 *v_ptr=
V+(
y/2)*UV_stride;
212 y_tmp =
param->matrix[0][0]*rgb_ptr1[0] +
param->matrix[0][1]*rgb_ptr1[1] +
param->matrix[0][2]*rgb_ptr1[2];
213 u_tmp =
param->matrix[1][0]*rgb_ptr1[0] +
param->matrix[1][1]*rgb_ptr1[1] +
param->matrix[1][2]*rgb_ptr1[2];
214 v_tmp =
param->matrix[2][0]*rgb_ptr1[0] +
param->matrix[2][1]*rgb_ptr1[1] +
param->matrix[2][2]*rgb_ptr1[2];
217 y_tmp =
param->matrix[0][0]*rgb_ptr1[3] +
param->matrix[0][1]*rgb_ptr1[4] +
param->matrix[0][2]*rgb_ptr1[5];
218 u_tmp +=
param->matrix[1][0]*rgb_ptr1[3] +
param->matrix[1][1]*rgb_ptr1[4] +
param->matrix[1][2]*rgb_ptr1[5];
219 v_tmp +=
param->matrix[2][0]*rgb_ptr1[3] +
param->matrix[2][1]*rgb_ptr1[4] +
param->matrix[2][2]*rgb_ptr1[5];
222 y_tmp =
param->matrix[0][0]*rgb_ptr2[0] +
param->matrix[0][1]*rgb_ptr2[1] +
param->matrix[0][2]*rgb_ptr2[2];
223 u_tmp +=
param->matrix[1][0]*rgb_ptr2[0] +
param->matrix[1][1]*rgb_ptr2[1] +
param->matrix[1][2]*rgb_ptr2[2];
224 v_tmp +=
param->matrix[2][0]*rgb_ptr2[0] +
param->matrix[2][1]*rgb_ptr2[1] +
param->matrix[2][2]*rgb_ptr2[2];
227 y_tmp =
param->matrix[0][0]*rgb_ptr2[3] +
param->matrix[0][1]*rgb_ptr2[4] +
param->matrix[0][2]*rgb_ptr2[5];
228 u_tmp +=
param->matrix[1][0]*rgb_ptr2[3] +
param->matrix[1][1]*rgb_ptr2[4] +
param->matrix[1][2]*rgb_ptr2[5];
229 v_tmp +=
param->matrix[2][0]*rgb_ptr2[3] +
param->matrix[2][1]*rgb_ptr2[4] +
param->matrix[2][2]*rgb_ptr2[5];
247 #define SSE_FUNCTION_NAME yuv420_rgb565_sse
248 #define STD_FUNCTION_NAME yuv420_rgb565_std
249 #define YUV_FORMAT YUV_FORMAT_420
250 #define RGB_FORMAT RGB_FORMAT_RGB565
254 #define SSE_FUNCTION_NAME yuv420_rgb565_sseu
255 #define STD_FUNCTION_NAME yuv420_rgb565_std
256 #define YUV_FORMAT YUV_FORMAT_420
257 #define RGB_FORMAT RGB_FORMAT_RGB565
260 #define SSE_FUNCTION_NAME yuv420_rgb24_sse
261 #define STD_FUNCTION_NAME yuv420_rgb24_std
262 #define YUV_FORMAT YUV_FORMAT_420
263 #define RGB_FORMAT RGB_FORMAT_RGB24
267 #define SSE_FUNCTION_NAME yuv420_rgb24_sseu
268 #define STD_FUNCTION_NAME yuv420_rgb24_std
269 #define YUV_FORMAT YUV_FORMAT_420
270 #define RGB_FORMAT RGB_FORMAT_RGB24
273 #define SSE_FUNCTION_NAME yuv420_rgba_sse
274 #define STD_FUNCTION_NAME yuv420_rgba_std
275 #define YUV_FORMAT YUV_FORMAT_420
276 #define RGB_FORMAT RGB_FORMAT_RGBA
280 #define SSE_FUNCTION_NAME yuv420_rgba_sseu
281 #define STD_FUNCTION_NAME yuv420_rgba_std
282 #define YUV_FORMAT YUV_FORMAT_420
283 #define RGB_FORMAT RGB_FORMAT_RGBA
286 #define SSE_FUNCTION_NAME yuv420_bgra_sse
287 #define STD_FUNCTION_NAME yuv420_bgra_std
288 #define YUV_FORMAT YUV_FORMAT_420
289 #define RGB_FORMAT RGB_FORMAT_BGRA
293 #define SSE_FUNCTION_NAME yuv420_bgra_sseu
294 #define STD_FUNCTION_NAME yuv420_bgra_std
295 #define YUV_FORMAT YUV_FORMAT_420
296 #define RGB_FORMAT RGB_FORMAT_BGRA
299 #define SSE_FUNCTION_NAME yuv420_argb_sse
300 #define STD_FUNCTION_NAME yuv420_argb_std
301 #define YUV_FORMAT YUV_FORMAT_420
302 #define RGB_FORMAT RGB_FORMAT_ARGB
306 #define SSE_FUNCTION_NAME yuv420_argb_sseu
307 #define STD_FUNCTION_NAME yuv420_argb_std
308 #define YUV_FORMAT YUV_FORMAT_420
309 #define RGB_FORMAT RGB_FORMAT_ARGB
312 #define SSE_FUNCTION_NAME yuv420_abgr_sse
313 #define STD_FUNCTION_NAME yuv420_abgr_std
314 #define YUV_FORMAT YUV_FORMAT_420
315 #define RGB_FORMAT RGB_FORMAT_ABGR
319 #define SSE_FUNCTION_NAME yuv420_abgr_sseu
320 #define STD_FUNCTION_NAME yuv420_abgr_std
321 #define YUV_FORMAT YUV_FORMAT_420
322 #define RGB_FORMAT RGB_FORMAT_ABGR
325 #define SSE_FUNCTION_NAME yuv422_rgb565_sse
326 #define STD_FUNCTION_NAME yuv422_rgb565_std
327 #define YUV_FORMAT YUV_FORMAT_422
328 #define RGB_FORMAT RGB_FORMAT_RGB565
332 #define SSE_FUNCTION_NAME yuv422_rgb565_sseu
333 #define STD_FUNCTION_NAME yuv422_rgb565_std
334 #define YUV_FORMAT YUV_FORMAT_422
335 #define RGB_FORMAT RGB_FORMAT_RGB565
338 #define SSE_FUNCTION_NAME yuv422_rgb24_sse
339 #define STD_FUNCTION_NAME yuv422_rgb24_std
340 #define YUV_FORMAT YUV_FORMAT_422
341 #define RGB_FORMAT RGB_FORMAT_RGB24
345 #define SSE_FUNCTION_NAME yuv422_rgb24_sseu
346 #define STD_FUNCTION_NAME yuv422_rgb24_std
347 #define YUV_FORMAT YUV_FORMAT_422
348 #define RGB_FORMAT RGB_FORMAT_RGB24
351 #define SSE_FUNCTION_NAME yuv422_rgba_sse
352 #define STD_FUNCTION_NAME yuv422_rgba_std
353 #define YUV_FORMAT YUV_FORMAT_422
354 #define RGB_FORMAT RGB_FORMAT_RGBA
358 #define SSE_FUNCTION_NAME yuv422_rgba_sseu
359 #define STD_FUNCTION_NAME yuv422_rgba_std
360 #define YUV_FORMAT YUV_FORMAT_422
361 #define RGB_FORMAT RGB_FORMAT_RGBA
364 #define SSE_FUNCTION_NAME yuv422_bgra_sse
365 #define STD_FUNCTION_NAME yuv422_bgra_std
366 #define YUV_FORMAT YUV_FORMAT_422
367 #define RGB_FORMAT RGB_FORMAT_BGRA
371 #define SSE_FUNCTION_NAME yuv422_bgra_sseu
372 #define STD_FUNCTION_NAME yuv422_bgra_std
373 #define YUV_FORMAT YUV_FORMAT_422
374 #define RGB_FORMAT RGB_FORMAT_BGRA
377 #define SSE_FUNCTION_NAME yuv422_argb_sse
378 #define STD_FUNCTION_NAME yuv422_argb_std
379 #define YUV_FORMAT YUV_FORMAT_422
380 #define RGB_FORMAT RGB_FORMAT_ARGB
384 #define SSE_FUNCTION_NAME yuv422_argb_sseu
385 #define STD_FUNCTION_NAME yuv422_argb_std
386 #define YUV_FORMAT YUV_FORMAT_422
387 #define RGB_FORMAT RGB_FORMAT_ARGB
390 #define SSE_FUNCTION_NAME yuv422_abgr_sse
391 #define STD_FUNCTION_NAME yuv422_abgr_std
392 #define YUV_FORMAT YUV_FORMAT_422
393 #define RGB_FORMAT RGB_FORMAT_ABGR
397 #define SSE_FUNCTION_NAME yuv422_abgr_sseu
398 #define STD_FUNCTION_NAME yuv422_abgr_std
399 #define YUV_FORMAT YUV_FORMAT_422
400 #define RGB_FORMAT RGB_FORMAT_ABGR
403 #define SSE_FUNCTION_NAME yuvnv12_rgb565_sse
404 #define STD_FUNCTION_NAME yuvnv12_rgb565_std
405 #define YUV_FORMAT YUV_FORMAT_NV12
406 #define RGB_FORMAT RGB_FORMAT_RGB565
410 #define SSE_FUNCTION_NAME yuvnv12_rgb565_sseu
411 #define STD_FUNCTION_NAME yuvnv12_rgb565_std
412 #define YUV_FORMAT YUV_FORMAT_NV12
413 #define RGB_FORMAT RGB_FORMAT_RGB565
416 #define SSE_FUNCTION_NAME yuvnv12_rgb24_sse
417 #define STD_FUNCTION_NAME yuvnv12_rgb24_std
418 #define YUV_FORMAT YUV_FORMAT_NV12
419 #define RGB_FORMAT RGB_FORMAT_RGB24
423 #define SSE_FUNCTION_NAME yuvnv12_rgb24_sseu
424 #define STD_FUNCTION_NAME yuvnv12_rgb24_std
425 #define YUV_FORMAT YUV_FORMAT_NV12
426 #define RGB_FORMAT RGB_FORMAT_RGB24
429 #define SSE_FUNCTION_NAME yuvnv12_rgba_sse
430 #define STD_FUNCTION_NAME yuvnv12_rgba_std
431 #define YUV_FORMAT YUV_FORMAT_NV12
432 #define RGB_FORMAT RGB_FORMAT_RGBA
436 #define SSE_FUNCTION_NAME yuvnv12_rgba_sseu
437 #define STD_FUNCTION_NAME yuvnv12_rgba_std
438 #define YUV_FORMAT YUV_FORMAT_NV12
439 #define RGB_FORMAT RGB_FORMAT_RGBA
442 #define SSE_FUNCTION_NAME yuvnv12_bgra_sse
443 #define STD_FUNCTION_NAME yuvnv12_bgra_std
444 #define YUV_FORMAT YUV_FORMAT_NV12
445 #define RGB_FORMAT RGB_FORMAT_BGRA
449 #define SSE_FUNCTION_NAME yuvnv12_bgra_sseu
450 #define STD_FUNCTION_NAME yuvnv12_bgra_std
451 #define YUV_FORMAT YUV_FORMAT_NV12
452 #define RGB_FORMAT RGB_FORMAT_BGRA
455 #define SSE_FUNCTION_NAME yuvnv12_argb_sse
456 #define STD_FUNCTION_NAME yuvnv12_argb_std
457 #define YUV_FORMAT YUV_FORMAT_NV12
458 #define RGB_FORMAT RGB_FORMAT_ARGB
462 #define SSE_FUNCTION_NAME yuvnv12_argb_sseu
463 #define STD_FUNCTION_NAME yuvnv12_argb_std
464 #define YUV_FORMAT YUV_FORMAT_NV12
465 #define RGB_FORMAT RGB_FORMAT_ARGB
468 #define SSE_FUNCTION_NAME yuvnv12_abgr_sse
469 #define STD_FUNCTION_NAME yuvnv12_abgr_std
470 #define YUV_FORMAT YUV_FORMAT_NV12
471 #define RGB_FORMAT RGB_FORMAT_ABGR
475 #define SSE_FUNCTION_NAME yuvnv12_abgr_sseu
476 #define STD_FUNCTION_NAME yuvnv12_abgr_std
477 #define YUV_FORMAT YUV_FORMAT_NV12
478 #define RGB_FORMAT RGB_FORMAT_ABGR
482 #define UNPACK_RGB24_32_STEP1(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \
483 R1 = _mm_unpacklo_epi8(RGB1, RGB4); \
484 R2 = _mm_unpackhi_epi8(RGB1, RGB4); \
485 G1 = _mm_unpacklo_epi8(RGB2, RGB5); \
486 G2 = _mm_unpackhi_epi8(RGB2, RGB5); \
487 B1 = _mm_unpacklo_epi8(RGB3, RGB6); \
488 B2 = _mm_unpackhi_epi8(RGB3, RGB6);
490 #define UNPACK_RGB24_32_STEP2(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \
491 RGB1 = _mm_unpacklo_epi8(R1, G2); \
492 RGB2 = _mm_unpackhi_epi8(R1, G2); \
493 RGB3 = _mm_unpacklo_epi8(R2, B1); \
494 RGB4 = _mm_unpackhi_epi8(R2, B1); \
495 RGB5 = _mm_unpacklo_epi8(G1, B2); \
496 RGB6 = _mm_unpackhi_epi8(G1, B2); \
498 #define UNPACK_RGB24_32(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \
499 UNPACK_RGB24_32_STEP1(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \
500 UNPACK_RGB24_32_STEP2(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \
501 UNPACK_RGB24_32_STEP1(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \
502 UNPACK_RGB24_32_STEP2(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \
503 UNPACK_RGB24_32_STEP1(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \
505 #define RGB2YUV_16(R, G, B, Y, U, V) \
506 Y = _mm_add_epi16(_mm_mullo_epi16(R, _mm_set1_epi16(param->matrix[0][0])), \
507 _mm_mullo_epi16(G, _mm_set1_epi16(param->matrix[0][1]))); \
508 Y = _mm_add_epi16(Y, _mm_mullo_epi16(B, _mm_set1_epi16(param->matrix[0][2]))); \
509 Y = _mm_add_epi16(Y, _mm_set1_epi16((param->y_shift)<<PRECISION)); \
510 Y = _mm_srai_epi16(Y, PRECISION); \
511 U = _mm_add_epi16(_mm_mullo_epi16(R, _mm_set1_epi16(param->matrix[1][0])), \
512 _mm_mullo_epi16(G, _mm_set1_epi16(param->matrix[1][1]))); \
513 U = _mm_add_epi16(U, _mm_mullo_epi16(B, _mm_set1_epi16(param->matrix[1][2]))); \
514 U = _mm_add_epi16(U, _mm_set1_epi16(128<<PRECISION)); \
515 U = _mm_srai_epi16(U, PRECISION); \
516 V = _mm_add_epi16(_mm_mullo_epi16(R, _mm_set1_epi16(param->matrix[2][0])), \
517 _mm_mullo_epi16(G, _mm_set1_epi16(param->matrix[2][1]))); \
518 V = _mm_add_epi16(V, _mm_mullo_epi16(B, _mm_set1_epi16(param->matrix[2][2]))); \
519 V = _mm_add_epi16(V, _mm_set1_epi16(128<<PRECISION)); \
520 V = _mm_srai_epi16(V, PRECISION);
523 __m128i r1, r2, b1, b2, g1, g2; \
524 __m128i r_16, g_16, b_16; \
525 __m128i y1_16, y2_16, u1_16, u2_16, v1_16, v2_16, y, u1, u2, v1, v2, u1_tmp, u2_tmp, v1_tmp, v2_tmp; \
526 __m128i rgb1 = LOAD_SI128((const __m128i*)(rgb_ptr1)), \
527 rgb2 = LOAD_SI128((const __m128i*)(rgb_ptr1+16)), \
528 rgb3 = LOAD_SI128((const __m128i*)(rgb_ptr1+32)), \
529 rgb4 = LOAD_SI128((const __m128i*)(rgb_ptr2)), \
530 rgb5 = LOAD_SI128((const __m128i*)(rgb_ptr2+16)), \
531 rgb6 = LOAD_SI128((const __m128i*)(rgb_ptr2+32)); \
533 UNPACK_RGB24_32(rgb1, rgb2, rgb3, rgb4, rgb5, rgb6, r1, r2, g1, g2, b1, b2) \
535 r_16 = _mm_unpacklo_epi8(r1, _mm_setzero_si128()); \
536 g_16 = _mm_unpacklo_epi8(g1, _mm_setzero_si128()); \
537 b_16 = _mm_unpacklo_epi8(b1, _mm_setzero_si128()); \
538 RGB2YUV_16(r_16, g_16, b_16, y1_16, u1_16, v1_16) \
539 r_16 = _mm_unpackhi_epi8(r1, _mm_setzero_si128()); \
540 g_16 = _mm_unpackhi_epi8(g1, _mm_setzero_si128()); \
541 b_16 = _mm_unpackhi_epi8(b1, _mm_setzero_si128()); \
542 RGB2YUV_16(r_16, g_16, b_16, y2_16, u2_16, v2_16) \
543 y = _mm_packus_epi16(y1_16, y2_16); \
544 u1 = _mm_packus_epi16(u1_16, u2_16); \
545 v1 = _mm_packus_epi16(v1_16, v2_16); \
547 SAVE_SI128((__m128i*)(y_ptr1), y); \
549 r_16 = _mm_unpacklo_epi8(r2, _mm_setzero_si128()); \
550 g_16 = _mm_unpacklo_epi8(g2, _mm_setzero_si128()); \
551 b_16 = _mm_unpacklo_epi8(b2, _mm_setzero_si128()); \
552 RGB2YUV_16(r_16, g_16, b_16, y1_16, u1_16, v1_16) \
553 r_16 = _mm_unpackhi_epi8(r2, _mm_setzero_si128()); \
554 g_16 = _mm_unpackhi_epi8(g2, _mm_setzero_si128()); \
555 b_16 = _mm_unpackhi_epi8(b2, _mm_setzero_si128()); \
556 RGB2YUV_16(r_16, g_16, b_16, y2_16, u2_16, v2_16) \
557 y = _mm_packus_epi16(y1_16, y2_16); \
558 u2 = _mm_packus_epi16(u1_16, u2_16); \
559 v2 = _mm_packus_epi16(v1_16, v2_16); \
561 SAVE_SI128((__m128i*)(y_ptr2), y); \
563 u1_tmp = _mm_avg_epu8(u1, u2); \
564 v1_tmp = _mm_avg_epu8(v1, v2); \
566 rgb1 = LOAD_SI128((const __m128i*)(rgb_ptr1+48)); \
567 rgb2 = LOAD_SI128((const __m128i*)(rgb_ptr1+64)); \
568 rgb3 = LOAD_SI128((const __m128i*)(rgb_ptr1+80)); \
569 rgb4 = LOAD_SI128((const __m128i*)(rgb_ptr2+48)); \
570 rgb5 = LOAD_SI128((const __m128i*)(rgb_ptr2+64)); \
571 rgb6 = LOAD_SI128((const __m128i*)(rgb_ptr2+80)); \
573 UNPACK_RGB24_32(rgb1, rgb2, rgb3, rgb4, rgb5, rgb6, r1, r2, g1, g2, b1, b2) \
575 r_16 = _mm_unpacklo_epi8(r1, _mm_setzero_si128()); \
576 g_16 = _mm_unpacklo_epi8(g1, _mm_setzero_si128()); \
577 b_16 = _mm_unpacklo_epi8(b1, _mm_setzero_si128()); \
578 RGB2YUV_16(r_16, g_16, b_16, y1_16, u1_16, v1_16) \
579 r_16 = _mm_unpackhi_epi8(r1, _mm_setzero_si128()); \
580 g_16 = _mm_unpackhi_epi8(g1, _mm_setzero_si128()); \
581 b_16 = _mm_unpackhi_epi8(b1, _mm_setzero_si128()); \
582 RGB2YUV_16(r_16, g_16, b_16, y2_16, u2_16, v2_16) \
583 y = _mm_packus_epi16(y1_16, y2_16); \
584 u1 = _mm_packus_epi16(u1_16, u2_16); \
585 v1 = _mm_packus_epi16(v1_16, v2_16); \
587 SAVE_SI128((__m128i*)(y_ptr1+16), y); \
589 r_16 = _mm_unpacklo_epi8(r2, _mm_setzero_si128()); \
590 g_16 = _mm_unpacklo_epi8(g2, _mm_setzero_si128()); \
591 b_16 = _mm_unpacklo_epi8(b2, _mm_setzero_si128()); \
592 RGB2YUV_16(r_16, g_16, b_16, y1_16, u1_16, v1_16) \
593 r_16 = _mm_unpackhi_epi8(r2, _mm_setzero_si128()); \
594 g_16 = _mm_unpackhi_epi8(g2, _mm_setzero_si128()); \
595 b_16 = _mm_unpackhi_epi8(b2, _mm_setzero_si128()); \
596 RGB2YUV_16(r_16, g_16, b_16, y2_16, u2_16, v2_16) \
597 y = _mm_packus_epi16(y1_16, y2_16); \
598 u2 = _mm_packus_epi16(u1_16, u2_16); \
599 v2 = _mm_packus_epi16(v1_16, v2_16); \
601 SAVE_SI128((__m128i*)(y_ptr2+16), y); \
603 u2_tmp = _mm_avg_epu8(u1, u2); \
604 v2_tmp = _mm_avg_epu8(v1, v2); \
606 u1 = _mm_packus_epi16(_mm_srl_epi16(u1_tmp, _mm_cvtsi32_si128(8)), _mm_srl_epi16(u2_tmp, _mm_cvtsi32_si128(8))); \
607 v1 = _mm_packus_epi16(_mm_srl_epi16(v1_tmp, _mm_cvtsi32_si128(8)), _mm_srl_epi16(v2_tmp, _mm_cvtsi32_si128(8))); \
608 u2 = _mm_packus_epi16(_mm_and_si128(u1_tmp, _mm_set1_epi16(0xFF)), _mm_and_si128(u2_tmp, _mm_set1_epi16(0xFF))); \
609 v2 = _mm_packus_epi16(_mm_and_si128(v1_tmp, _mm_set1_epi16(0xFF)), _mm_and_si128(v2_tmp, _mm_set1_epi16(0xFF))); \
610 u1 = _mm_avg_epu8(u1, u2); \
611 v1 = _mm_avg_epu8(v1, v2); \
612 SAVE_SI128((__m128i*)(u_ptr), u1); \
613 SAVE_SI128((__m128i*)(v_ptr), v1);
620 #define LOAD_SI128 _mm_load_si128
621 #define SAVE_SI128 _mm_stream_si128
625 for(ypos=0; ypos<(
height-1); ypos+=2)
628 *rgb_ptr2=
RGB+(ypos+1)*RGB_stride;
631 *y_ptr2=
Y+(ypos+1)*Y_stride,
632 *u_ptr=U+(ypos/2)*UV_stride,
633 *v_ptr=
V+(ypos/2)*UV_stride;
635 for(xpos=0; xpos<(
width-31); xpos+=32)
656 #define LOAD_SI128 _mm_loadu_si128
657 #define SAVE_SI128 _mm_storeu_si128
661 for(ypos=0; ypos<(
height-1); ypos+=2)
664 *rgb_ptr2=
RGB+(ypos+1)*RGB_stride;
667 *y_ptr2=
Y+(ypos+1)*Y_stride,
668 *u_ptr=U+(ypos/2)*UV_stride,
669 *v_ptr=
V+(ypos/2)*UV_stride;
671 for(xpos=0; xpos<(
width-31); xpos+=32)
GLint GLint GLint GLint GLint GLint y
GLint GLint GLsizei width
GLint GLint GLint GLint GLint x
GLint GLint GLsizei GLsizei height
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp lsl PF PF lsl PF PF lsl PF PF PF else PF lsl PF lsl PF lsl PF endif SIZE macro preload_middle scratch_holds_offset if bpp if else PF PF endif endif endif endm macro preload_trailing base if bpp if bpp *pix_per_block PF PF lsl PF PF PF PF PF else PF lsl PF lsl PF PF PF PF PF base if bpp if narrow_case &&bpp<=dst_w_bpp) PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, LSL #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 90f PF pld,[WK1]90:.else PF bic, WK0, base, #31 PF pld,[WK0] PF add, WK1, base, X, lsl #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 92f91:PF add, WK0, WK0, #32 PF cmp, WK0, WK1 PF pld,[WK0] PF bne, 91b92:.endif .endif.endm.macro conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond X, X, #8 *numbytes/dst_w_bpp .endif process_tail cond, numbytes, firstreg .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst cond, numbytes, firstreg, DST .endif.endm.macro conditional_process1 cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_BRANCH_OVER .ifc cond, mi bpl 100f .endif .ifc cond, cs bcc 100f .endif .ifc cond, ne beq 100f .endif conditional_process1_helper, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx100:.else conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .endif.endm.macro conditional_process2 test, cond1, cond2, process_head, process_tail, numbytes1, numbytes2, firstreg1, firstreg2, unaligned_src, unaligned_mask, decrementx .if(flags) &(FLAG_DST_READWRITE|FLAG_BRANCH_OVER|FLAG_PROCESS_CORRUPTS_PSR|FLAG_PROCESS_DOES_STORE) test conditional_process1 cond1, process_head, process_tail, numbytes1, firstreg1, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_PROCESS_CORRUPTS_PSR test .endif conditional_process1 cond2, process_head, process_tail, numbytes2, firstreg2, unaligned_src, unaligned_mask, decrementx .else test process_head cond1, numbytes1, firstreg1, unaligned_src, unaligned_mask, 0 process_head cond2, numbytes2, firstreg2, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond1 X, X, #8 *numbytes1/dst_w_bpp sub &cond2 X, X, #8 *numbytes2/dst_w_bpp .endif process_tail cond1, numbytes1, firstreg1 process_tail cond2, numbytes2, firstreg2 pixst cond1, numbytes1, firstreg1, DST pixst cond2, numbytes2, firstreg2, DST .endif.endm.macro test_bits_1_0_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-1 .else movs SCRATCH, WK0, lsl #32-1 .endif.endm.macro test_bits_3_2_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-3 .else movs SCRATCH, WK0, lsl #32-3 .endif.endm.macro leading_15bytes process_head, process_tail .set DECREMENT_X, 1 .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 .set DECREMENT_X, 0 sub X, X, WK0, lsr #dst_bpp_shift str X,[sp, #LINE_SAVED_REG_COUNT *4] mov X, WK0 .endif .if dst_w_bpp==8 conditional_process2 test_bits_1_0_ptr, mi, cs, process_head, process_tail, 1, 2, 1, 2, 1, 1, DECREMENT_X .elseif dst_w_bpp==16 test_bits_1_0_ptr conditional_process1 cs, process_head, process_tail, 2, 2, 1, 1, DECREMENT_X .endif conditional_process2 test_bits_3_2_ptr, mi, cs, process_head, process_tail, 4, 8, 1, 2, 1, 1, DECREMENT_X .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 ldr X,[sp, #LINE_SAVED_REG_COUNT *4] .endif.endm.macro test_bits_3_2_pix movs SCRATCH, X, lsl #dst_bpp_shift+32-3.endm.macro test_bits_1_0_pix .if dst_w_bpp==8 movs SCRATCH, X, lsl #dst_bpp_shift+32-1 .else movs SCRATCH, X, lsr #1 .endif.endm.macro trailing_15bytes process_head, process_tail, unaligned_src, unaligned_mask conditional_process2 test_bits_3_2_pix, cs, mi, process_head, process_tail, 8, 4, 0, 2, unaligned_src, unaligned_mask, 0 .if dst_w_bpp==16 test_bits_1_0_pix conditional_process1 cs, process_head, process_tail, 2, 0, unaligned_src, unaligned_mask, 0 .elseif dst_w_bpp==8 conditional_process2 test_bits_1_0_pix, cs, mi, process_head, process_tail, 2, 1, 0, 1, unaligned_src, unaligned_mask, 0 .endif.endm.macro wide_case_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, dst_alignment110:.set SUBBLOCK, 0 .rept pix_per_block *dst_w_bpp/128 process_head, 16, 0, unaligned_src, unaligned_mask, 1 .if(src_bpp > 0) &&(mask_bpp==0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle src_bpp, SRC, 1 .elseif(src_bpp==0) &&(mask_bpp > 0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle mask_bpp, MASK, 1 .else preload_middle src_bpp, SRC, 0 preload_middle mask_bpp, MASK, 0 .endif .if(dst_r_bpp > 0) &&((SUBBLOCK % 2)==0) &&(((flags) &FLAG_NO_PRELOAD_DST)==0) PF pld,[DST, #32 *prefetch_distance - dst_alignment] .endif process_tail, 16, 0 .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst, 16, 0, DST .endif .set SUBBLOCK, SUBBLOCK+1 .endr subs X, X, #pix_per_block bhs 110b.endm.macro wide_case_inner_loop_and_trailing_pixels process_head, process_tail, process_inner_loop, exit_label, unaligned_src, unaligned_mask .if dst_r_bpp > tst bne process_inner_loop DST_PRELOAD_BIAS endif preload_trailing SRC preload_trailing MASK DST endif add medium_case_inner_loop_and_trailing_pixels unaligned_mask endm macro medium_case_inner_loop_and_trailing_pixels DST endif subs bhs tst beq exit_label trailing_15bytes unaligned_mask endm macro narrow_case_inner_loop_and_trailing_pixels unaligned_mask tst conditional_process1 trailing_15bytes unaligned_mask endm macro switch_on_alignment exit_label if bne endif if bne endif action if endif if bne endif action if endif endif endm macro end_of_line last_one if SINGLE_SCANLINE ifc b endif else if vars_spilled word LINE_SAVED_REGS endif subs Y
static const YUV2RGBParam YUV2RGB[3]
void rgb24_yuv420_std(uint32_t width, uint32_t height, const uint8_t *RGB, uint32_t RGB_stride, uint8_t *Y, uint8_t *U, uint8_t *V, uint32_t Y_stride, uint32_t UV_stride, YCbCrType yuv_type)
static uint8_t clampU8(int32_t v)
static const RGB2YUVParam RGB2YUV[3]
void rgb24_yuv420_sseu(uint32_t width, uint32_t height, const uint8_t *rgb, uint32_t rgb_stride, uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride, YCbCrType yuv_type)
void rgb24_yuv420_sse(uint32_t width, uint32_t height, const uint8_t *rgb, uint32_t rgb_stride, uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride, YCbCrType yuv_type)