src/video/SDL_yuv.c
author Ryan C. Gordon <icculus@icculus.org>
Fri, 18 Oct 2019 00:07:32 -0400
changeset 13142 2e673c68ab97
parent 13042 a4f7b983fbe8
permissions -rw-r--r--
egl: adjust how we load symbols in SDL_EGL_GetProcAddress.

Use eglGetProcAddress for everything on EGL >= 1.5. Try SDL_LoadFunction first
for EGL <= 1.4 in case it's a core symbol, and as a fallback if
eglGetProcAddress fails. Finally, for EGL <= 1.4, fallback to
eglGetProcAddress to catch extensions not exported from the shared library.

(Maybe) Fixes Bugzilla #4794.
     1 /*
     2   Simple DirectMedia Layer
     3   Copyright (C) 1997-2019 Sam Lantinga <slouken@libsdl.org>
     4 
     5   This software is provided 'as-is', without any express or implied
     6   warranty.  In no event will the authors be held liable for any damages
     7   arising from the use of this software.
     8 
     9   Permission is granted to anyone to use this software for any purpose,
    10   including commercial applications, and to alter it and redistribute it
    11   freely, subject to the following restrictions:
    12 
    13   1. The origin of this software must not be misrepresented; you must not
    14      claim that you wrote the original software. If you use this software
    15      in a product, an acknowledgment in the product documentation would be
    16      appreciated but is not required.
    17   2. Altered source versions must be plainly marked as such, and must not be
    18      misrepresented as being the original software.
    19   3. This notice may not be removed or altered from any source distribution.
    20 */
    21 #include "../SDL_internal.h"
    22 
    23 #include "SDL_endian.h"
    24 #include "SDL_video.h"
    25 #include "SDL_pixels_c.h"
    26 #include "SDL_yuv_c.h"
    27 
    28 #include "yuv2rgb/yuv_rgb.h"
    29 
    30 #define SDL_YUV_SD_THRESHOLD    576
    31 
    32 
    33 static SDL_YUV_CONVERSION_MODE SDL_YUV_ConversionMode = SDL_YUV_CONVERSION_BT601;
    34 
    35 
    36 void SDL_SetYUVConversionMode(SDL_YUV_CONVERSION_MODE mode)
    37 {
    38     SDL_YUV_ConversionMode = mode;
    39 }
    40 
    41 SDL_YUV_CONVERSION_MODE SDL_GetYUVConversionMode()
    42 {
    43     return SDL_YUV_ConversionMode;
    44 }
    45 
    46 SDL_YUV_CONVERSION_MODE SDL_GetYUVConversionModeForResolution(int width, int height)
    47 {
    48     SDL_YUV_CONVERSION_MODE mode = SDL_GetYUVConversionMode();
    49     if (mode == SDL_YUV_CONVERSION_AUTOMATIC) {
    50         if (height <= SDL_YUV_SD_THRESHOLD) {
    51             mode = SDL_YUV_CONVERSION_BT601;
    52         } else {
    53             mode = SDL_YUV_CONVERSION_BT709;
    54         }
    55     }
    56     return mode;
    57 }
    58 
    59 static int GetYUVConversionType(int width, int height, YCbCrType *yuv_type)
    60 {
    61     switch (SDL_GetYUVConversionModeForResolution(width, height)) {
    62     case SDL_YUV_CONVERSION_JPEG:
    63         *yuv_type = YCBCR_JPEG;
    64         break;
    65     case SDL_YUV_CONVERSION_BT601:
    66         *yuv_type = YCBCR_601;
    67         break;
    68     case SDL_YUV_CONVERSION_BT709:
    69         *yuv_type = YCBCR_709;
    70         break;
    71     default:
    72         return SDL_SetError("Unexpected YUV conversion mode");
    73     }
    74     return 0;
    75 }
    76 
    77 static SDL_bool IsPlanar2x2Format(Uint32 format)
    78 {
    79     return (format == SDL_PIXELFORMAT_YV12 ||
    80             format == SDL_PIXELFORMAT_IYUV ||
    81             format == SDL_PIXELFORMAT_NV12 ||
    82             format == SDL_PIXELFORMAT_NV21);
    83 }
    84 
    85 static SDL_bool IsPacked4Format(Uint32 format)
    86 {
    87     return (format == SDL_PIXELFORMAT_YUY2 ||
    88             format == SDL_PIXELFORMAT_UYVY ||
    89             format == SDL_PIXELFORMAT_YVYU);
    90 }
    91 
    92 static int GetYUVPlanes(int width, int height, Uint32 format, const void *yuv, int yuv_pitch,
    93                         const Uint8 **y, const Uint8 **u, const Uint8 **v, Uint32 *y_stride, Uint32 *uv_stride)
    94 {
    95     const Uint8 *planes[3] = { NULL, NULL, NULL };
    96     int pitches[3] = { 0, 0, 0 };
    97 
    98     switch (format) {
    99     case SDL_PIXELFORMAT_YV12:
   100     case SDL_PIXELFORMAT_IYUV:
   101         pitches[0] = yuv_pitch;
   102         pitches[1] = (pitches[0] + 1) / 2;
   103         pitches[2] = (pitches[0] + 1) / 2;
   104         planes[0] = (const Uint8 *)yuv;
   105         planes[1] = planes[0] + pitches[0] * height;
   106         planes[2] = planes[1] + pitches[1] * ((height + 1) / 2);
   107         break;
   108     case SDL_PIXELFORMAT_YUY2:
   109     case SDL_PIXELFORMAT_UYVY:
   110     case SDL_PIXELFORMAT_YVYU:
   111         pitches[0] = yuv_pitch;
   112         planes[0] = (const Uint8 *)yuv;
   113         break;
   114     case SDL_PIXELFORMAT_NV12:
   115     case SDL_PIXELFORMAT_NV21:
   116         pitches[0] = yuv_pitch;
   117         pitches[1] = 2 * ((pitches[0] + 1) / 2);
   118         planes[0] = (const Uint8 *)yuv;
   119         planes[1] = planes[0] + pitches[0] * height;
   120         break;
   121     default:
   122         return SDL_SetError("GetYUVPlanes(): Unsupported YUV format: %s", SDL_GetPixelFormatName(format));
   123     }
   124 
   125     switch (format) {
   126     case SDL_PIXELFORMAT_YV12:
   127         *y = planes[0];
   128         *y_stride = pitches[0];
   129         *v = planes[1];
   130         *u = planes[2];
   131         *uv_stride = pitches[1];
   132         break;
   133     case SDL_PIXELFORMAT_IYUV:
   134         *y = planes[0];
   135         *y_stride = pitches[0];
   136         *v = planes[2];
   137         *u = planes[1];
   138         *uv_stride = pitches[1];
   139         break;
   140     case SDL_PIXELFORMAT_YUY2:
   141         *y = planes[0];
   142         *y_stride = pitches[0];
   143         *v = *y + 3;
   144         *u = *y + 1;
   145         *uv_stride = pitches[0];
   146         break;
   147     case SDL_PIXELFORMAT_UYVY:
   148         *y = planes[0] + 1;
   149         *y_stride = pitches[0];
   150         *v = *y + 1;
   151         *u = *y - 1;
   152         *uv_stride = pitches[0];
   153         break;
   154     case SDL_PIXELFORMAT_YVYU:
   155         *y = planes[0];
   156         *y_stride = pitches[0];
   157         *v = *y + 1;
   158         *u = *y + 3;
   159         *uv_stride = pitches[0];
   160         break;
   161     case SDL_PIXELFORMAT_NV12:
   162         *y = planes[0];
   163         *y_stride = pitches[0];
   164         *u = planes[1];
   165         *v = *u + 1;
   166         *uv_stride = pitches[1];
   167         break;
   168     case SDL_PIXELFORMAT_NV21:
   169         *y = planes[0];
   170         *y_stride = pitches[0];
   171         *v = planes[1];
   172         *u = *v + 1;
   173         *uv_stride = pitches[1];
   174         break;
   175     default:
   176         /* Should have caught this above */
   177         return SDL_SetError("GetYUVPlanes[2]: Unsupported YUV format: %s", SDL_GetPixelFormatName(format));
   178     }
   179     return 0;
   180 }
   181 
   182 static SDL_bool yuv_rgb_sse(
   183     Uint32 src_format, Uint32 dst_format,
   184     Uint32 width, Uint32 height, 
   185     const Uint8 *y, const Uint8 *u, const Uint8 *v, Uint32 y_stride, Uint32 uv_stride, 
   186     Uint8 *rgb, Uint32 rgb_stride, 
   187     YCbCrType yuv_type)
   188 {
   189 #ifdef __SSE2__
   190     if (!SDL_HasSSE2()) {
   191         return SDL_FALSE;
   192     }
   193 
   194     if (src_format == SDL_PIXELFORMAT_YV12 ||
   195         src_format == SDL_PIXELFORMAT_IYUV) {
   196 
   197         switch (dst_format) {
   198         case SDL_PIXELFORMAT_RGB565:
   199             yuv420_rgb565_sseu(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   200             return SDL_TRUE;
   201         case SDL_PIXELFORMAT_RGB24:
   202             yuv420_rgb24_sseu(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   203             return SDL_TRUE;
   204         case SDL_PIXELFORMAT_RGBX8888:
   205         case SDL_PIXELFORMAT_RGBA8888:
   206             yuv420_rgba_sseu(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   207             return SDL_TRUE;
   208         case SDL_PIXELFORMAT_BGRX8888:
   209         case SDL_PIXELFORMAT_BGRA8888:
   210             yuv420_bgra_sseu(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   211             return SDL_TRUE;
   212         case SDL_PIXELFORMAT_RGB888:
   213         case SDL_PIXELFORMAT_ARGB8888:
   214             yuv420_argb_sseu(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   215             return SDL_TRUE;
   216         case SDL_PIXELFORMAT_BGR888:
   217         case SDL_PIXELFORMAT_ABGR8888:
   218             yuv420_abgr_sseu(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   219             return SDL_TRUE;
   220         default:
   221             break;
   222         }
   223     }
   224 
   225     if (src_format == SDL_PIXELFORMAT_YUY2 ||
   226         src_format == SDL_PIXELFORMAT_UYVY ||
   227         src_format == SDL_PIXELFORMAT_YVYU) {
   228 
   229         switch (dst_format) {
   230         case SDL_PIXELFORMAT_RGB565:
   231             yuv422_rgb565_sseu(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   232             return SDL_TRUE;
   233         case SDL_PIXELFORMAT_RGB24:
   234             yuv422_rgb24_sseu(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   235             return SDL_TRUE;
   236         case SDL_PIXELFORMAT_RGBX8888:
   237         case SDL_PIXELFORMAT_RGBA8888:
   238             yuv422_rgba_sseu(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   239             return SDL_TRUE;
   240         case SDL_PIXELFORMAT_BGRX8888:
   241         case SDL_PIXELFORMAT_BGRA8888:
   242             yuv422_bgra_sseu(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   243             return SDL_TRUE;
   244         case SDL_PIXELFORMAT_RGB888:
   245         case SDL_PIXELFORMAT_ARGB8888:
   246             yuv422_argb_sseu(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   247             return SDL_TRUE;
   248         case SDL_PIXELFORMAT_BGR888:
   249         case SDL_PIXELFORMAT_ABGR8888:
   250             yuv422_abgr_sseu(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   251             return SDL_TRUE;
   252         default:
   253             break;
   254         }
   255     }
   256 
   257     if (src_format == SDL_PIXELFORMAT_NV12 ||
   258         src_format == SDL_PIXELFORMAT_NV21) {
   259 
   260         switch (dst_format) {
   261         case SDL_PIXELFORMAT_RGB565:
   262             yuvnv12_rgb565_sseu(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   263             return SDL_TRUE;
   264         case SDL_PIXELFORMAT_RGB24:
   265             yuvnv12_rgb24_sseu(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   266             return SDL_TRUE;
   267         case SDL_PIXELFORMAT_RGBX8888:
   268         case SDL_PIXELFORMAT_RGBA8888:
   269             yuvnv12_rgba_sseu(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   270             return SDL_TRUE;
   271         case SDL_PIXELFORMAT_BGRX8888:
   272         case SDL_PIXELFORMAT_BGRA8888:
   273             yuvnv12_bgra_sseu(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   274             return SDL_TRUE;
   275         case SDL_PIXELFORMAT_RGB888:
   276         case SDL_PIXELFORMAT_ARGB8888:
   277             yuvnv12_argb_sseu(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   278             return SDL_TRUE;
   279         case SDL_PIXELFORMAT_BGR888:
   280         case SDL_PIXELFORMAT_ABGR8888:
   281             yuvnv12_abgr_sseu(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   282             return SDL_TRUE;
   283         default:
   284             break;
   285         }
   286     }
   287 #endif
   288     return SDL_FALSE;
   289 }
   290 
   291 static SDL_bool yuv_rgb_std(
   292     Uint32 src_format, Uint32 dst_format,
   293     Uint32 width, Uint32 height, 
   294     const Uint8 *y, const Uint8 *u, const Uint8 *v, Uint32 y_stride, Uint32 uv_stride, 
   295     Uint8 *rgb, Uint32 rgb_stride, 
   296     YCbCrType yuv_type)
   297 {
   298     if (src_format == SDL_PIXELFORMAT_YV12 ||
   299         src_format == SDL_PIXELFORMAT_IYUV) {
   300 
   301         switch (dst_format) {
   302         case SDL_PIXELFORMAT_RGB565:
   303             yuv420_rgb565_std(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   304             return SDL_TRUE;
   305         case SDL_PIXELFORMAT_RGB24:
   306             yuv420_rgb24_std(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   307             return SDL_TRUE;
   308         case SDL_PIXELFORMAT_RGBX8888:
   309         case SDL_PIXELFORMAT_RGBA8888:
   310             yuv420_rgba_std(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   311             return SDL_TRUE;
   312         case SDL_PIXELFORMAT_BGRX8888:
   313         case SDL_PIXELFORMAT_BGRA8888:
   314             yuv420_bgra_std(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   315             return SDL_TRUE;
   316         case SDL_PIXELFORMAT_RGB888:
   317         case SDL_PIXELFORMAT_ARGB8888:
   318             yuv420_argb_std(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   319             return SDL_TRUE;
   320         case SDL_PIXELFORMAT_BGR888:
   321         case SDL_PIXELFORMAT_ABGR8888:
   322             yuv420_abgr_std(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   323             return SDL_TRUE;
   324         default:
   325             break;
   326         }
   327     }
   328 
   329     if (src_format == SDL_PIXELFORMAT_YUY2 ||
   330         src_format == SDL_PIXELFORMAT_UYVY ||
   331         src_format == SDL_PIXELFORMAT_YVYU) {
   332 
   333         switch (dst_format) {
   334         case SDL_PIXELFORMAT_RGB565:
   335             yuv422_rgb565_std(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   336             return SDL_TRUE;
   337         case SDL_PIXELFORMAT_RGB24:
   338             yuv422_rgb24_std(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   339             return SDL_TRUE;
   340         case SDL_PIXELFORMAT_RGBX8888:
   341         case SDL_PIXELFORMAT_RGBA8888:
   342             yuv422_rgba_std(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   343             return SDL_TRUE;
   344         case SDL_PIXELFORMAT_BGRX8888:
   345         case SDL_PIXELFORMAT_BGRA8888:
   346             yuv422_bgra_std(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   347             return SDL_TRUE;
   348         case SDL_PIXELFORMAT_RGB888:
   349         case SDL_PIXELFORMAT_ARGB8888:
   350             yuv422_argb_std(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   351             return SDL_TRUE;
   352         case SDL_PIXELFORMAT_BGR888:
   353         case SDL_PIXELFORMAT_ABGR8888:
   354             yuv422_abgr_std(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   355             return SDL_TRUE;
   356         default:
   357             break;
   358         }
   359     }
   360 
   361     if (src_format == SDL_PIXELFORMAT_NV12 ||
   362         src_format == SDL_PIXELFORMAT_NV21) {
   363 
   364         switch (dst_format) {
   365         case SDL_PIXELFORMAT_RGB565:
   366             yuvnv12_rgb565_std(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   367             return SDL_TRUE;
   368         case SDL_PIXELFORMAT_RGB24:
   369             yuvnv12_rgb24_std(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   370             return SDL_TRUE;
   371         case SDL_PIXELFORMAT_RGBX8888:
   372         case SDL_PIXELFORMAT_RGBA8888:
   373             yuvnv12_rgba_std(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   374             return SDL_TRUE;
   375         case SDL_PIXELFORMAT_BGRX8888:
   376         case SDL_PIXELFORMAT_BGRA8888:
   377             yuvnv12_bgra_std(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   378             return SDL_TRUE;
   379         case SDL_PIXELFORMAT_RGB888:
   380         case SDL_PIXELFORMAT_ARGB8888:
   381             yuvnv12_argb_std(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   382             return SDL_TRUE;
   383         case SDL_PIXELFORMAT_BGR888:
   384         case SDL_PIXELFORMAT_ABGR8888:
   385             yuvnv12_abgr_std(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   386             return SDL_TRUE;
   387         default:
   388             break;
   389         }
   390     }
   391     return SDL_FALSE;
   392 }
   393 
   394 int
   395 SDL_ConvertPixels_YUV_to_RGB(int width, int height,
   396          Uint32 src_format, const void *src, int src_pitch,
   397          Uint32 dst_format, void *dst, int dst_pitch)
   398 {
   399     const Uint8 *y = NULL;
   400     const Uint8 *u = NULL;
   401     const Uint8 *v = NULL;
   402     Uint32 y_stride = 0;
   403     Uint32 uv_stride = 0;
   404     YCbCrType yuv_type = YCBCR_601;
   405 
   406     if (GetYUVPlanes(width, height, src_format, src, src_pitch, &y, &u, &v, &y_stride, &uv_stride) < 0) {
   407         return -1;
   408     }
   409 
   410     if (GetYUVConversionType(width, height, &yuv_type) < 0) {
   411         return -1;
   412     }
   413 
   414     if (yuv_rgb_sse(src_format, dst_format, width, height, y, u, v, y_stride, uv_stride, (Uint8*)dst, dst_pitch, yuv_type)) {
   415         return 0;
   416     }
   417 
   418     if (yuv_rgb_std(src_format, dst_format, width, height, y, u, v, y_stride, uv_stride, (Uint8*)dst, dst_pitch, yuv_type)) {
   419         return 0;
   420     }
   421 
   422     /* No fast path for the RGB format, instead convert using an intermediate buffer */
   423     if (dst_format != SDL_PIXELFORMAT_ARGB8888) {
   424         int ret;
   425         void *tmp;
   426         int tmp_pitch = (width * sizeof(Uint32));
   427 
   428         tmp = SDL_malloc(tmp_pitch * height);
   429         if (tmp == NULL) {
   430             return SDL_OutOfMemory();
   431         }
   432 
   433         /* convert src/src_format to tmp/ARGB8888 */
   434         ret = SDL_ConvertPixels_YUV_to_RGB(width, height, src_format, src, src_pitch, SDL_PIXELFORMAT_ARGB8888, tmp, tmp_pitch);
   435         if (ret < 0) {
   436             SDL_free(tmp);
   437             return ret;
   438         }
   439 
   440         /* convert tmp/ARGB8888 to dst/RGB */
   441         ret = SDL_ConvertPixels(width, height, SDL_PIXELFORMAT_ARGB8888, tmp, tmp_pitch, dst_format, dst, dst_pitch);
   442         SDL_free(tmp);
   443         return ret;
   444     }
   445 
   446     return SDL_SetError("Unsupported YUV conversion");
   447 }
   448 
   449 struct RGB2YUVFactors
   450 {
   451     int y_offset;
   452     float y[3]; /* Rfactor, Gfactor, Bfactor */
   453     float u[3]; /* Rfactor, Gfactor, Bfactor */
   454     float v[3]; /* Rfactor, Gfactor, Bfactor */
   455 };
   456 
   457 static int
   458 SDL_ConvertPixels_ARGB8888_to_YUV(int width, int height, const void *src, int src_pitch, Uint32 dst_format, void *dst, int dst_pitch)
   459 {
   460     const int src_pitch_x_2    = src_pitch * 2;
   461     const int height_half      = height / 2;
   462     const int height_remainder = (height & 0x1);
   463     const int width_half       = width / 2;
   464     const int width_remainder  = (width & 0x1);
   465     int i, j;
   466  
   467     static struct RGB2YUVFactors RGB2YUVFactorTables[SDL_YUV_CONVERSION_BT709 + 1] =
   468     {
   469         /* ITU-T T.871 (JPEG) */
   470         {
   471             0,
   472             {  0.2990f,  0.5870f,  0.1140f },
   473             { -0.1687f, -0.3313f,  0.5000f },
   474             {  0.5000f, -0.4187f, -0.0813f },
   475         },
   476         /* ITU-R BT.601-7 */
   477         {
   478             16,
   479             {  0.2568f,  0.5041f,  0.0979f },
   480             { -0.1482f, -0.2910f,  0.4392f },
   481             {  0.4392f, -0.3678f, -0.0714f },
   482         },
   483         /* ITU-R BT.709-6 */
   484         {
   485             16,
   486             { 0.1826f,  0.6142f,  0.0620f },
   487             {-0.1006f, -0.3386f,  0.4392f },
   488             { 0.4392f, -0.3989f, -0.0403f },
   489         },
   490     };
   491     const struct RGB2YUVFactors *cvt = &RGB2YUVFactorTables[SDL_GetYUVConversionModeForResolution(width, height)];
   492 
   493 #define MAKE_Y(r, g, b) (Uint8)((int)(cvt->y[0] * (r) + cvt->y[1] * (g) + cvt->y[2] * (b) + 0.5f) + cvt->y_offset)
   494 #define MAKE_U(r, g, b) (Uint8)((int)(cvt->u[0] * (r) + cvt->u[1] * (g) + cvt->u[2] * (b) + 0.5f) + 128)
   495 #define MAKE_V(r, g, b) (Uint8)((int)(cvt->v[0] * (r) + cvt->v[1] * (g) + cvt->v[2] * (b) + 0.5f) + 128)
   496 
   497 #define READ_2x2_PIXELS                                                                                         \
   498         const Uint32 p1 = ((const Uint32 *)curr_row)[2 * i];                                                    \
   499         const Uint32 p2 = ((const Uint32 *)curr_row)[2 * i + 1];                                                \
   500         const Uint32 p3 = ((const Uint32 *)next_row)[2 * i];                                                    \
   501         const Uint32 p4 = ((const Uint32 *)next_row)[2 * i + 1];                                                \
   502         const Uint32 r = ((p1 & 0x00ff0000) + (p2 & 0x00ff0000) + (p3 & 0x00ff0000) + (p4 & 0x00ff0000)) >> 18; \
   503         const Uint32 g = ((p1 & 0x0000ff00) + (p2 & 0x0000ff00) + (p3 & 0x0000ff00) + (p4 & 0x0000ff00)) >> 10; \
   504         const Uint32 b = ((p1 & 0x000000ff) + (p2 & 0x000000ff) + (p3 & 0x000000ff) + (p4 & 0x000000ff)) >> 2;  \
   505 
   506 #define READ_2x1_PIXELS                                                                                         \
   507         const Uint32 p1 = ((const Uint32 *)curr_row)[2 * i];                                                    \
   508         const Uint32 p2 = ((const Uint32 *)next_row)[2 * i];                                                    \
   509         const Uint32 r = ((p1 & 0x00ff0000) + (p2 & 0x00ff0000)) >> 17;                                         \
   510         const Uint32 g = ((p1 & 0x0000ff00) + (p2 & 0x0000ff00)) >> 9;                                          \
   511         const Uint32 b = ((p1 & 0x000000ff) + (p2 & 0x000000ff)) >> 1;                                          \
   512 
   513 #define READ_1x2_PIXELS                                                                                         \
   514         const Uint32 p1 = ((const Uint32 *)curr_row)[2 * i];                                                    \
   515         const Uint32 p2 = ((const Uint32 *)curr_row)[2 * i + 1];                                                \
   516         const Uint32 r = ((p1 & 0x00ff0000) + (p2 & 0x00ff0000)) >> 17;                                         \
   517         const Uint32 g = ((p1 & 0x0000ff00) + (p2 & 0x0000ff00)) >> 9;                                          \
   518         const Uint32 b = ((p1 & 0x000000ff) + (p2 & 0x000000ff)) >> 1;                                          \
   519 
   520 #define READ_1x1_PIXEL                                                                                          \
   521         const Uint32 p = ((const Uint32 *)curr_row)[2 * i];                                                     \
   522         const Uint32 r = (p & 0x00ff0000) >> 16;                                                                \
   523         const Uint32 g = (p & 0x0000ff00) >> 8;                                                                 \
   524         const Uint32 b = (p & 0x000000ff);                                                                      \
   525 
   526 #define READ_TWO_RGB_PIXELS                                                                                     \
   527         const Uint32 p = ((const Uint32 *)curr_row)[2 * i];                                                     \
   528         const Uint32 r = (p & 0x00ff0000) >> 16;                                                                \
   529         const Uint32 g = (p & 0x0000ff00) >> 8;                                                                 \
   530         const Uint32 b = (p & 0x000000ff);                                                                      \
   531         const Uint32 p1 = ((const Uint32 *)curr_row)[2 * i + 1];                                                \
   532         const Uint32 r1 = (p1 & 0x00ff0000) >> 16;                                                              \
   533         const Uint32 g1 = (p1 & 0x0000ff00) >> 8;                                                               \
   534         const Uint32 b1 = (p1 & 0x000000ff);                                                                    \
   535         const Uint32 R = (r + r1)/2;                                                                            \
   536         const Uint32 G = (g + g1)/2;                                                                            \
   537         const Uint32 B = (b + b1)/2;                                                                            \
   538 
   539 #define READ_ONE_RGB_PIXEL  READ_1x1_PIXEL
   540 
   541     switch (dst_format) 
   542     {
   543     case SDL_PIXELFORMAT_YV12:
   544     case SDL_PIXELFORMAT_IYUV:
   545     case SDL_PIXELFORMAT_NV12:
   546     case SDL_PIXELFORMAT_NV21:
   547         {
   548             const Uint8 *curr_row, *next_row;
   549             
   550             Uint8 *plane_y;
   551             Uint8 *plane_u;
   552             Uint8 *plane_v;
   553             Uint8 *plane_interleaved_uv;
   554             Uint32 y_stride, uv_stride, y_skip, uv_skip;
   555 
   556             GetYUVPlanes(width, height, dst_format, dst, dst_pitch,
   557                          (const Uint8 **)&plane_y, (const Uint8 **)&plane_u, (const Uint8 **)&plane_v,
   558                          &y_stride, &uv_stride);
   559             plane_interleaved_uv = (plane_y + height * y_stride);
   560             y_skip = (y_stride - width);
   561 
   562             curr_row = (const Uint8*)src;
   563 
   564             /* Write Y plane */
   565             for (j = 0; j < height; j++) {
   566                 for (i = 0; i < width; i++) {
   567                     const Uint32 p1 = ((const Uint32 *)curr_row)[i];
   568                     const Uint32 r = (p1 & 0x00ff0000) >> 16;
   569                     const Uint32 g = (p1 & 0x0000ff00) >> 8;
   570                     const Uint32 b = (p1 & 0x000000ff);
   571                     *plane_y++ = MAKE_Y(r, g, b);
   572                 }
   573                 plane_y += y_skip;
   574                 curr_row += src_pitch;
   575             }
   576 
   577             curr_row = (const Uint8*)src;
   578             next_row = (const Uint8*)src;
   579             next_row += src_pitch;
   580 
   581             if (dst_format == SDL_PIXELFORMAT_YV12 || dst_format == SDL_PIXELFORMAT_IYUV)
   582             {
   583                 /* Write UV planes, not interleaved */
   584                 uv_skip = (uv_stride - (width + 1)/2);
   585                 for (j = 0; j < height_half; j++) {
   586                     for (i = 0; i < width_half; i++) {
   587                         READ_2x2_PIXELS;
   588                         *plane_u++ = MAKE_U(r, g, b);
   589                         *plane_v++ = MAKE_V(r, g, b);
   590                     }
   591                     if (width_remainder) {
   592                         READ_2x1_PIXELS;
   593                         *plane_u++ = MAKE_U(r, g, b);
   594                         *plane_v++ = MAKE_V(r, g, b);
   595                     }
   596                     plane_u += uv_skip;
   597                     plane_v += uv_skip;
   598                     curr_row += src_pitch_x_2;
   599                     next_row += src_pitch_x_2;
   600                 }
   601                 if (height_remainder) {
   602                     for (i = 0; i < width_half; i++) {
   603                         READ_1x2_PIXELS;
   604                         *plane_u++ = MAKE_U(r, g, b);
   605                         *plane_v++ = MAKE_V(r, g, b);
   606                     }
   607                     if (width_remainder) {
   608                         READ_1x1_PIXEL;
   609                         *plane_u++ = MAKE_U(r, g, b);
   610                         *plane_v++ = MAKE_V(r, g, b);
   611                     }
   612                     plane_u += uv_skip;
   613                     plane_v += uv_skip;
   614                 }
   615             }
   616             else if (dst_format == SDL_PIXELFORMAT_NV12)
   617             {
   618                 uv_skip = (uv_stride - ((width + 1)/2)*2);
   619                 for (j = 0; j < height_half; j++) {
   620                     for (i = 0; i < width_half; i++) {
   621                         READ_2x2_PIXELS;
   622                         *plane_interleaved_uv++ = MAKE_U(r, g, b);
   623                         *plane_interleaved_uv++ = MAKE_V(r, g, b);
   624                     }
   625                     if (width_remainder) {
   626                         READ_2x1_PIXELS;
   627                         *plane_interleaved_uv++ = MAKE_U(r, g, b);
   628                         *plane_interleaved_uv++ = MAKE_V(r, g, b);
   629                     }
   630                     plane_interleaved_uv += uv_skip;
   631                     curr_row += src_pitch_x_2;
   632                     next_row += src_pitch_x_2;
   633                 }
   634                 if (height_remainder) {
   635                     for (i = 0; i < width_half; i++) {
   636                         READ_1x2_PIXELS;
   637                         *plane_interleaved_uv++ = MAKE_U(r, g, b);
   638                         *plane_interleaved_uv++ = MAKE_V(r, g, b);
   639                     }
   640                     if (width_remainder) {
   641                         READ_1x1_PIXEL;
   642                         *plane_interleaved_uv++ = MAKE_U(r, g, b);
   643                         *plane_interleaved_uv++ = MAKE_V(r, g, b);
   644                     }
   645                 }
   646             } 
   647             else /* dst_format == SDL_PIXELFORMAT_NV21 */
   648             {
   649                 uv_skip = (uv_stride - ((width + 1)/2)*2);
   650                 for (j = 0; j < height_half; j++) {
   651                     for (i = 0; i < width_half; i++) {
   652                         READ_2x2_PIXELS;
   653                         *plane_interleaved_uv++ = MAKE_V(r, g, b);
   654                         *plane_interleaved_uv++ = MAKE_U(r, g, b);
   655                     }
   656                     if (width_remainder) {
   657                         READ_2x1_PIXELS;
   658                         *plane_interleaved_uv++ = MAKE_V(r, g, b);
   659                         *plane_interleaved_uv++ = MAKE_U(r, g, b);
   660                     }
   661                     plane_interleaved_uv += uv_skip;
   662                     curr_row += src_pitch_x_2;
   663                     next_row += src_pitch_x_2;
   664                 }
   665                 if (height_remainder) {
   666                     for (i = 0; i < width_half; i++) {
   667                         READ_1x2_PIXELS;
   668                         *plane_interleaved_uv++ = MAKE_V(r, g, b);
   669                         *plane_interleaved_uv++ = MAKE_U(r, g, b);
   670                     }
   671                     if (width_remainder) {
   672                         READ_1x1_PIXEL;
   673                         *plane_interleaved_uv++ = MAKE_V(r, g, b);
   674                         *plane_interleaved_uv++ = MAKE_U(r, g, b);
   675                     }
   676                 }
   677             }
   678         }
   679         break;
   680 
   681     case SDL_PIXELFORMAT_YUY2:
   682     case SDL_PIXELFORMAT_UYVY:
   683     case SDL_PIXELFORMAT_YVYU:
   684         {
   685             const Uint8 *curr_row = (const Uint8*) src;
   686             Uint8 *plane           = (Uint8*) dst;
   687             const int row_size = (4 * ((width + 1) / 2));
   688             int plane_skip;
   689 
   690             if (dst_pitch < row_size) {
   691                 return SDL_SetError("Destination pitch is too small, expected at least %d\n", row_size);
   692             }
   693             plane_skip = (dst_pitch - row_size);
   694 
   695             /* Write YUV plane, packed */
   696             if (dst_format == SDL_PIXELFORMAT_YUY2) 
   697             {
   698                 for (j = 0; j < height; j++) {
   699                     for (i = 0; i < width_half; i++) {
   700                         READ_TWO_RGB_PIXELS;
   701                         /* Y U Y1 V */
   702                         *plane++ = MAKE_Y(r, g, b);
   703                         *plane++ = MAKE_U(R, G, B);
   704                         *plane++ = MAKE_Y(r1, g1, b1);
   705                         *plane++ = MAKE_V(R, G, B);
   706                     }
   707                     if (width_remainder) {
   708                         READ_ONE_RGB_PIXEL;
   709                         /* Y U Y V */
   710                         *plane++ = MAKE_Y(r, g, b);
   711                         *plane++ = MAKE_U(r, g, b);
   712                         *plane++ = MAKE_Y(r, g, b);
   713                         *plane++ = MAKE_V(r, g, b);
   714                     }
   715                     plane += plane_skip;
   716                     curr_row += src_pitch;
   717                 }
   718             } 
   719             else if (dst_format == SDL_PIXELFORMAT_UYVY)
   720             {
   721                 for (j = 0; j < height; j++) {
   722                     for (i = 0; i < width_half; i++) {
   723                         READ_TWO_RGB_PIXELS;
   724                         /* U Y V Y1 */
   725                         *plane++ = MAKE_U(R, G, B);
   726                         *plane++ = MAKE_Y(r, g, b);
   727                         *plane++ = MAKE_V(R, G, B);
   728                         *plane++ = MAKE_Y(r1, g1, b1);
   729                     }
   730                     if (width_remainder) {
   731                         READ_ONE_RGB_PIXEL;
   732                         /* U Y V Y */
   733                         *plane++ = MAKE_U(r, g, b);
   734                         *plane++ = MAKE_Y(r, g, b);
   735                         *plane++ = MAKE_V(r, g, b);
   736                         *plane++ = MAKE_Y(r, g, b);
   737                     }
   738                     plane += plane_skip;
   739                     curr_row += src_pitch;
   740                 }
   741             }
   742             else if (dst_format == SDL_PIXELFORMAT_YVYU)
   743             {
   744                 for (j = 0; j < height; j++) {
   745                     for (i = 0; i < width_half; i++) {
   746                         READ_TWO_RGB_PIXELS;
   747                         /* Y V Y1 U */
   748                         *plane++ = MAKE_Y(r, g, b);
   749                         *plane++ = MAKE_V(R, G, B);
   750                         *plane++ = MAKE_Y(r1, g1, b1);
   751                         *plane++ = MAKE_U(R, G, B);
   752                     }
   753                     if (width_remainder) {
   754                         READ_ONE_RGB_PIXEL;
   755                         /* Y V Y U */
   756                         *plane++ = MAKE_Y(r, g, b);
   757                         *plane++ = MAKE_V(r, g, b);
   758                         *plane++ = MAKE_Y(r, g, b);
   759                         *plane++ = MAKE_U(r, g, b);
   760                     }
   761                     plane += plane_skip;
   762                     curr_row += src_pitch;
   763                 }
   764             }
   765         }
   766         break;
   767 
   768     default:
   769         return SDL_SetError("Unsupported YUV destination format: %s", SDL_GetPixelFormatName(dst_format));
   770     }
   771 #undef MAKE_Y
   772 #undef MAKE_U
   773 #undef MAKE_V
   774 #undef READ_2x2_PIXELS
   775 #undef READ_2x1_PIXELS
   776 #undef READ_1x2_PIXELS
   777 #undef READ_1x1_PIXEL
   778 #undef READ_TWO_RGB_PIXELS
   779 #undef READ_ONE_RGB_PIXEL
   780     return 0;
   781 }
   782 
   783 int
   784 SDL_ConvertPixels_RGB_to_YUV(int width, int height,
   785          Uint32 src_format, const void *src, int src_pitch,
   786          Uint32 dst_format, void *dst, int dst_pitch)
   787 {
   788 #if 0 /* Doesn't handle odd widths */
   789     /* RGB24 to FOURCC */
   790     if (src_format == SDL_PIXELFORMAT_RGB24) {
   791         Uint8 *y;
   792         Uint8 *u;
   793         Uint8 *v;
   794         Uint32 y_stride;
   795         Uint32 uv_stride;
   796         YCbCrType yuv_type;
   797 
   798         if (GetYUVPlanes(width, height, dst_format, dst, dst_pitch, (const Uint8 **)&y, (const Uint8 **)&u, (const Uint8 **)&v, &y_stride, &uv_stride) < 0) {
   799             return -1;
   800         }
   801 
   802         if (GetYUVConversionType(width, height, &yuv_type) < 0) {
   803             return -1;
   804         }
   805 
   806         rgb24_yuv420_std(width, height, src, src_pitch, y, u, v, y_stride, uv_stride, yuv_type);
   807         return 0;
   808     }
   809 #endif
   810 
   811     /* ARGB8888 to FOURCC */
   812     if (src_format == SDL_PIXELFORMAT_ARGB8888) {
   813         return SDL_ConvertPixels_ARGB8888_to_YUV(width, height, src, src_pitch, dst_format, dst, dst_pitch);
   814     }
   815 
   816     /* not ARGB8888 to FOURCC : need an intermediate conversion */
   817     {
   818         int ret;
   819         void *tmp;
   820         int tmp_pitch = (width * sizeof(Uint32));
   821 
   822         tmp = SDL_malloc(tmp_pitch * height);
   823         if (tmp == NULL) {
   824             return SDL_OutOfMemory();
   825         }
   826 
   827         /* convert src/src_format to tmp/ARGB8888 */
   828         ret = SDL_ConvertPixels(width, height, src_format, src, src_pitch, SDL_PIXELFORMAT_ARGB8888, tmp, tmp_pitch);
   829         if (ret == -1) {
   830             SDL_free(tmp);
   831             return ret;
   832         }
   833 
   834         /* convert tmp/ARGB8888 to dst/FOURCC */
   835         ret = SDL_ConvertPixels_ARGB8888_to_YUV(width, height, tmp, tmp_pitch, dst_format, dst, dst_pitch);
   836         SDL_free(tmp);
   837         return ret;
   838     }
   839 }
   840 
   841 static int
   842 SDL_ConvertPixels_YUV_to_YUV_Copy(int width, int height, Uint32 format,
   843         const void *src, int src_pitch, void *dst, int dst_pitch)
   844 {
   845     int i;
   846 
   847     if (IsPlanar2x2Format(format)) {
   848         /* Y plane */
   849         for (i = height; i--;) {
   850             SDL_memcpy(dst, src, width);
   851             src = (const Uint8*)src + src_pitch;
   852             dst = (Uint8*)dst + dst_pitch;
   853         }
   854 
   855         if (format == SDL_PIXELFORMAT_YV12 || format == SDL_PIXELFORMAT_IYUV) {
   856             /* U and V planes are a quarter the size of the Y plane, rounded up */
   857             width = (width + 1) / 2;
   858             height = (height + 1) / 2;
   859             src_pitch = (src_pitch + 1) / 2;
   860             dst_pitch = (dst_pitch + 1) / 2;
   861             for (i = height * 2; i--;) {
   862                 SDL_memcpy(dst, src, width);
   863                 src = (const Uint8*)src + src_pitch;
   864                 dst = (Uint8*)dst + dst_pitch;
   865             }
   866         } else if (format == SDL_PIXELFORMAT_NV12 || format == SDL_PIXELFORMAT_NV21) {
   867             /* U/V plane is half the height of the Y plane, rounded up */
   868             height = (height + 1) / 2;
   869             width = ((width + 1) / 2)*2;
   870             src_pitch = ((src_pitch + 1) / 2)*2;
   871             dst_pitch = ((dst_pitch + 1) / 2)*2;
   872             for (i = height; i--;) {
   873                 SDL_memcpy(dst, src, width);
   874                 src = (const Uint8*)src + src_pitch;
   875                 dst = (Uint8*)dst + dst_pitch;
   876             }
   877         }
   878         return 0;
   879     }
   880 
   881     if (IsPacked4Format(format)) {
   882         /* Packed planes */
   883         width = 4 * ((width + 1) / 2);
   884         for (i = height; i--;) {
   885             SDL_memcpy(dst, src, width);
   886             src = (const Uint8*)src + src_pitch;
   887             dst = (Uint8*)dst + dst_pitch;
   888         }
   889         return 0;
   890     }
   891 
   892     return SDL_SetError("SDL_ConvertPixels_YUV_to_YUV_Copy: Unsupported YUV format: %s", SDL_GetPixelFormatName(format));
   893 }
   894 
   895 static int
   896 SDL_ConvertPixels_SwapUVPlanes(int width, int height, const void *src, int src_pitch, void *dst, int dst_pitch)
   897 {
   898     int y;
   899     const int UVwidth = (width + 1)/2;
   900     const int UVheight = (height + 1)/2;
   901 
   902     /* Skip the Y plane */
   903     src = (const Uint8 *)src + height * src_pitch;
   904     dst = (Uint8 *)dst + height * dst_pitch;
   905 
   906     if (src == dst) {
   907         int UVpitch = (dst_pitch + 1)/2;
   908         Uint8 *tmp;
   909         Uint8 *row1 = dst;
   910         Uint8 *row2 = (Uint8 *)dst + UVheight * UVpitch;
   911 
   912         /* Allocate a temporary row for the swap */
   913         tmp = (Uint8 *)SDL_malloc(UVwidth);
   914         if (!tmp) {
   915             return SDL_OutOfMemory();
   916         }
   917         for (y = 0; y < UVheight; ++y) {
   918             SDL_memcpy(tmp, row1, UVwidth);
   919             SDL_memcpy(row1, row2, UVwidth);
   920             SDL_memcpy(row2, tmp, UVwidth);
   921             row1 += UVpitch;
   922             row2 += UVpitch;
   923         }
   924         SDL_free(tmp);
   925     } else {
   926         const Uint8 *srcUV;
   927         Uint8 *dstUV;
   928         int srcUVPitch = ((src_pitch + 1)/2);
   929         int dstUVPitch = ((dst_pitch + 1)/2);
   930 
   931         /* Copy the first plane */
   932         srcUV = (const Uint8 *)src;
   933         dstUV = (Uint8 *)dst + UVheight * dstUVPitch;
   934         for (y = 0; y < UVheight; ++y) {
   935             SDL_memcpy(dstUV, srcUV, UVwidth);
   936             srcUV += srcUVPitch;
   937             dstUV += dstUVPitch;
   938         }
   939 
   940         /* Copy the second plane */
   941         dstUV = (Uint8 *)dst;
   942         for (y = 0; y < UVheight; ++y) {
   943             SDL_memcpy(dstUV, srcUV, UVwidth);
   944             srcUV += srcUVPitch;
   945             dstUV += dstUVPitch;
   946         }
   947     }
   948     return 0;
   949 }
   950 
   951 static int
   952 SDL_ConvertPixels_PackUVPlanes_to_NV(int width, int height, const void *src, int src_pitch, void *dst, int dst_pitch, SDL_bool reverseUV)
   953 {
   954     int x, y;
   955     const int UVwidth = (width + 1)/2;
   956     const int UVheight = (height + 1)/2;
   957     const int srcUVPitch = ((src_pitch + 1)/2);
   958     const int srcUVPitchLeft = srcUVPitch - UVwidth;
   959     const int dstUVPitch = ((dst_pitch + 1)/2)*2;
   960     const int dstUVPitchLeft = dstUVPitch - UVwidth*2;
   961     const Uint8 *src1, *src2;
   962     Uint8 *dstUV;
   963     Uint8 *tmp = NULL;
   964 #ifdef __SSE2__
   965     const SDL_bool use_SSE2 = SDL_HasSSE2();
   966 #endif
   967 
   968     /* Skip the Y plane */
   969     src = (const Uint8 *)src + height * src_pitch;
   970     dst = (Uint8 *)dst + height * dst_pitch;
   971 
   972     if (src == dst) {
   973         /* Need to make a copy of the buffer so we don't clobber it while converting */
   974         tmp = (Uint8 *)SDL_malloc(2*UVheight*srcUVPitch);
   975         if (!tmp) {
   976             return SDL_OutOfMemory();
   977         }
   978         SDL_memcpy(tmp, src, 2*UVheight*srcUVPitch);
   979         src = tmp;
   980     }
   981 
   982     if (reverseUV) {
   983         src2 = (const Uint8 *)src;
   984         src1 = src2 + UVheight * srcUVPitch;
   985     } else {
   986         src1 = (const Uint8 *)src;
   987         src2 = src1 + UVheight * srcUVPitch;
   988     }
   989     dstUV = (Uint8 *)dst;
   990 
   991     y = UVheight;
   992     while (y--) {
   993         x = UVwidth;
   994 #ifdef __SSE2__
   995         if (use_SSE2) {
   996             while (x >= 16) {
   997                 __m128i u = _mm_loadu_si128((__m128i *)src1);
   998                 __m128i v = _mm_loadu_si128((__m128i *)src2);
   999                 __m128i uv1 = _mm_unpacklo_epi8(u, v);
  1000                 __m128i uv2 = _mm_unpackhi_epi8(u, v);
  1001                 _mm_storeu_si128((__m128i*)dstUV, uv1);
  1002                 _mm_storeu_si128((__m128i*)(dstUV + 16), uv2);
  1003                 src1 += 16;
  1004                 src2 += 16;
  1005                 dstUV += 32;
  1006                 x -= 16;
  1007             }
  1008         }
  1009 #endif
  1010         while (x--) {
  1011             *dstUV++ = *src1++;
  1012             *dstUV++ = *src2++;
  1013         }
  1014         src1 += srcUVPitchLeft;
  1015         src2 += srcUVPitchLeft;
  1016         dstUV += dstUVPitchLeft;
  1017     }
  1018 
  1019     if (tmp) {
  1020         SDL_free(tmp);
  1021     }
  1022     return 0;
  1023 }
  1024 
  1025 static int
  1026 SDL_ConvertPixels_SplitNV_to_UVPlanes(int width, int height, const void *src, int src_pitch, void *dst, int dst_pitch, SDL_bool reverseUV)
  1027 {
  1028     int x, y;
  1029     const int UVwidth = (width + 1)/2;
  1030     const int UVheight = (height + 1)/2;
  1031     const int srcUVPitch = ((src_pitch + 1)/2)*2;
  1032     const int srcUVPitchLeft = srcUVPitch - UVwidth*2;
  1033     const int dstUVPitch = ((dst_pitch + 1)/2);
  1034     const int dstUVPitchLeft = dstUVPitch - UVwidth;
  1035     const Uint8 *srcUV;
  1036     Uint8 *dst1, *dst2;
  1037     Uint8 *tmp = NULL;
  1038 #ifdef __SSE2__
  1039     const SDL_bool use_SSE2 = SDL_HasSSE2();
  1040 #endif
  1041 
  1042     /* Skip the Y plane */
  1043     src = (const Uint8 *)src + height * src_pitch;
  1044     dst = (Uint8 *)dst + height * dst_pitch;
  1045 
  1046     if (src == dst) {
  1047         /* Need to make a copy of the buffer so we don't clobber it while converting */
  1048         tmp = (Uint8 *)SDL_malloc(UVheight*srcUVPitch);
  1049         if (!tmp) {
  1050             return SDL_OutOfMemory();
  1051         }
  1052         SDL_memcpy(tmp, src, UVheight*srcUVPitch);
  1053         src = tmp;
  1054     }
  1055 
  1056     if (reverseUV) {
  1057         dst2 = (Uint8 *)dst;
  1058         dst1 = dst2 + UVheight * dstUVPitch;
  1059     } else {
  1060         dst1 = (Uint8 *)dst;
  1061         dst2 = dst1 + UVheight * dstUVPitch;
  1062     }
  1063     srcUV = (const Uint8 *)src;
  1064 
  1065     y = UVheight;
  1066     while (y--) {
  1067         x = UVwidth;
  1068 #ifdef __SSE2__
  1069         if (use_SSE2) {
  1070             __m128i mask = _mm_set1_epi16(0x00FF);
  1071             while (x >= 16) {
  1072                 __m128i uv1 = _mm_loadu_si128((__m128i*)srcUV);
  1073                 __m128i uv2 = _mm_loadu_si128((__m128i*)(srcUV+16));
  1074                 __m128i u1 = _mm_and_si128(uv1, mask);
  1075                 __m128i u2 = _mm_and_si128(uv2, mask);
  1076                 __m128i u = _mm_packus_epi16(u1, u2);
  1077                 __m128i v1 = _mm_srli_epi16(uv1, 8);
  1078                 __m128i v2 = _mm_srli_epi16(uv2, 8);
  1079                 __m128i v = _mm_packus_epi16(v1, v2);
  1080                 _mm_storeu_si128((__m128i*)dst1, u);
  1081                 _mm_storeu_si128((__m128i*)dst2, v);
  1082                 srcUV += 32;
  1083                 dst1 += 16;
  1084                 dst2 += 16;
  1085                 x -= 16;
  1086             }
  1087         }
  1088 #endif
  1089         while (x--) {
  1090             *dst1++ = *srcUV++;
  1091             *dst2++ = *srcUV++;
  1092         }
  1093         srcUV += srcUVPitchLeft;
  1094         dst1 += dstUVPitchLeft;
  1095         dst2 += dstUVPitchLeft;
  1096     }
  1097 
  1098     if (tmp) {
  1099         SDL_free(tmp);
  1100     }
  1101     return 0;
  1102 }
  1103 
  1104 static int
  1105 SDL_ConvertPixels_SwapNV(int width, int height, const void *src, int src_pitch, void *dst, int dst_pitch)
  1106 {
  1107     int x, y;
  1108     const int UVwidth = (width + 1)/2;
  1109     const int UVheight = (height + 1)/2;
  1110     const int srcUVPitch = ((src_pitch + 1)/2)*2;
  1111     const int srcUVPitchLeft = (srcUVPitch - UVwidth*2)/sizeof(Uint16);
  1112     const int dstUVPitch = ((dst_pitch + 1)/2)*2;
  1113     const int dstUVPitchLeft = (dstUVPitch - UVwidth*2)/sizeof(Uint16);
  1114     const Uint16 *srcUV;
  1115     Uint16 *dstUV;
  1116 #ifdef __SSE2__
  1117     const SDL_bool use_SSE2 = SDL_HasSSE2();
  1118 #endif
  1119 
  1120     /* Skip the Y plane */
  1121     src = (const Uint8 *)src + height * src_pitch;
  1122     dst = (Uint8 *)dst + height * dst_pitch;
  1123 
  1124     srcUV = (const Uint16 *)src;
  1125     dstUV = (Uint16 *)dst;
  1126     y = UVheight;
  1127     while (y--) {
  1128         x = UVwidth;
  1129 #ifdef __SSE2__
  1130         if (use_SSE2) {
  1131             while (x >= 8) {
  1132                 __m128i uv = _mm_loadu_si128((__m128i*)srcUV);
  1133                 __m128i v = _mm_slli_epi16(uv, 8);
  1134                 __m128i u = _mm_srli_epi16(uv, 8);
  1135                 __m128i vu = _mm_or_si128(v, u);
  1136                 _mm_storeu_si128((__m128i*)dstUV, vu);
  1137                 srcUV += 8;
  1138                 dstUV += 8;
  1139                 x -= 8;
  1140             }
  1141         }
  1142 #endif
  1143         while (x--) {
  1144             *dstUV++ = SDL_Swap16(*srcUV++);
  1145         }
  1146         srcUV += srcUVPitchLeft;
  1147         dstUV += dstUVPitchLeft;
  1148     }
  1149     return 0;
  1150 }
  1151 
  1152 static int
  1153 SDL_ConvertPixels_Planar2x2_to_Planar2x2(int width, int height,
  1154          Uint32 src_format, const void *src, int src_pitch,
  1155          Uint32 dst_format, void *dst, int dst_pitch)
  1156 {
  1157     if (src != dst) {
  1158         /* Copy Y plane */
  1159         int i;
  1160         const Uint8 *srcY = (const Uint8 *)src;
  1161         Uint8 *dstY = (Uint8 *)dst;
  1162         for (i = height; i--; ) {
  1163             SDL_memcpy(dstY, srcY, width);
  1164             srcY += src_pitch;
  1165             dstY += dst_pitch;
  1166         }
  1167     }
  1168 
  1169     switch (src_format) {
  1170     case SDL_PIXELFORMAT_YV12:
  1171         switch (dst_format) {
  1172         case SDL_PIXELFORMAT_IYUV:
  1173             return SDL_ConvertPixels_SwapUVPlanes(width, height, src, src_pitch, dst, dst_pitch);
  1174         case SDL_PIXELFORMAT_NV12:
  1175             return SDL_ConvertPixels_PackUVPlanes_to_NV(width, height, src, src_pitch, dst, dst_pitch, SDL_TRUE);
  1176         case SDL_PIXELFORMAT_NV21:
  1177             return SDL_ConvertPixels_PackUVPlanes_to_NV(width, height, src, src_pitch, dst, dst_pitch, SDL_FALSE);
  1178         default:
  1179             break;
  1180         }
  1181         break;
  1182     case SDL_PIXELFORMAT_IYUV:
  1183         switch (dst_format) {
  1184         case SDL_PIXELFORMAT_YV12:
  1185             return SDL_ConvertPixels_SwapUVPlanes(width, height, src, src_pitch, dst, dst_pitch);
  1186         case SDL_PIXELFORMAT_NV12:
  1187             return SDL_ConvertPixels_PackUVPlanes_to_NV(width, height, src, src_pitch, dst, dst_pitch, SDL_FALSE);
  1188         case SDL_PIXELFORMAT_NV21:
  1189             return SDL_ConvertPixels_PackUVPlanes_to_NV(width, height, src, src_pitch, dst, dst_pitch, SDL_TRUE);
  1190         default:
  1191             break;
  1192         }
  1193         break;
  1194     case SDL_PIXELFORMAT_NV12:
  1195         switch (dst_format) {
  1196         case SDL_PIXELFORMAT_YV12:
  1197             return SDL_ConvertPixels_SplitNV_to_UVPlanes(width, height, src, src_pitch, dst, dst_pitch, SDL_TRUE);
  1198         case SDL_PIXELFORMAT_IYUV:
  1199             return SDL_ConvertPixels_SplitNV_to_UVPlanes(width, height, src, src_pitch, dst, dst_pitch, SDL_FALSE);
  1200         case SDL_PIXELFORMAT_NV21:
  1201             return SDL_ConvertPixels_SwapNV(width, height, src, src_pitch, dst, dst_pitch);
  1202         default:
  1203             break;
  1204         }
  1205         break;
  1206     case SDL_PIXELFORMAT_NV21:
  1207         switch (dst_format) {
  1208         case SDL_PIXELFORMAT_YV12:
  1209             return SDL_ConvertPixels_SplitNV_to_UVPlanes(width, height, src, src_pitch, dst, dst_pitch, SDL_FALSE);
  1210         case SDL_PIXELFORMAT_IYUV:
  1211             return SDL_ConvertPixels_SplitNV_to_UVPlanes(width, height, src, src_pitch, dst, dst_pitch, SDL_TRUE);
  1212         case SDL_PIXELFORMAT_NV12:
  1213             return SDL_ConvertPixels_SwapNV(width, height, src, src_pitch, dst, dst_pitch);
  1214         default:
  1215             break;
  1216         }
  1217         break;
  1218     default:
  1219         break;
  1220     }
  1221     return SDL_SetError("SDL_ConvertPixels_Planar2x2_to_Planar2x2: Unsupported YUV conversion: %s -> %s", SDL_GetPixelFormatName(src_format), SDL_GetPixelFormatName(dst_format));
  1222 }
  1223 
  1224 #ifdef __SSE2__
  1225 #define PACKED4_TO_PACKED4_ROW_SSE2(shuffle)                                                        \
  1226     while (x >= 4) {                                                                                \
  1227         __m128i yuv = _mm_loadu_si128((__m128i*)srcYUV);                                            \
  1228         __m128i lo = _mm_unpacklo_epi8(yuv, _mm_setzero_si128());                                   \
  1229         __m128i hi = _mm_unpackhi_epi8(yuv, _mm_setzero_si128());                                   \
  1230         lo = _mm_shufflelo_epi16(lo, shuffle);                                                      \
  1231         lo = _mm_shufflehi_epi16(lo, shuffle);                                                      \
  1232         hi = _mm_shufflelo_epi16(hi, shuffle);                                                      \
  1233         hi = _mm_shufflehi_epi16(hi, shuffle);                                                      \
  1234         yuv = _mm_packus_epi16(lo, hi);                                                             \
  1235         _mm_storeu_si128((__m128i*)dstYUV, yuv);                                                    \
  1236         srcYUV += 16;                                                                               \
  1237         dstYUV += 16;                                                                               \
  1238         x -= 4;                                                                                     \
  1239     }                                                                                               \
  1240 
  1241 #endif
  1242 
  1243 static int
  1244 SDL_ConvertPixels_YUY2_to_UYVY(int width, int height, const void *src, int src_pitch, void *dst, int dst_pitch)
  1245 {
  1246     int x, y;
  1247     const int YUVwidth = (width + 1)/2;
  1248     const int srcYUVPitchLeft = (src_pitch - YUVwidth*4);
  1249     const int dstYUVPitchLeft = (dst_pitch - YUVwidth*4);
  1250     const Uint8 *srcYUV = (const Uint8 *)src;
  1251     Uint8 *dstYUV = (Uint8 *)dst;
  1252 #ifdef __SSE2__
  1253     const SDL_bool use_SSE2 = SDL_HasSSE2();
  1254 #endif
  1255 
  1256     y = height;
  1257     while (y--) {
  1258         x = YUVwidth;
  1259 #ifdef __SSE2__
  1260         if (use_SSE2) {
  1261             PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(2, 3, 0, 1));
  1262         }
  1263 #endif
  1264         while (x--) {
  1265             Uint8 Y1, U, Y2, V;
  1266 
  1267             Y1 = srcYUV[0];
  1268             U = srcYUV[1];
  1269             Y2 = srcYUV[2];
  1270             V = srcYUV[3];
  1271             srcYUV += 4;
  1272 
  1273             dstYUV[0] = U;
  1274             dstYUV[1] = Y1;
  1275             dstYUV[2] = V;
  1276             dstYUV[3] = Y2;
  1277             dstYUV += 4;
  1278         }
  1279         srcYUV += srcYUVPitchLeft;
  1280         dstYUV += dstYUVPitchLeft;
  1281     }
  1282     return 0;
  1283 }
  1284 
  1285 static int
  1286 SDL_ConvertPixels_YUY2_to_YVYU(int width, int height, const void *src, int src_pitch, void *dst, int dst_pitch)
  1287 {
  1288     int x, y;
  1289     const int YUVwidth = (width + 1)/2;
  1290     const int srcYUVPitchLeft = (src_pitch - YUVwidth*4);
  1291     const int dstYUVPitchLeft = (dst_pitch - YUVwidth*4);
  1292     const Uint8 *srcYUV = (const Uint8 *)src;
  1293     Uint8 *dstYUV = (Uint8 *)dst;
  1294 #ifdef __SSE2__
  1295     const SDL_bool use_SSE2 = SDL_HasSSE2();
  1296 #endif
  1297 
  1298     y = height;
  1299     while (y--) {
  1300         x = YUVwidth;
  1301 #ifdef __SSE2__
  1302         if (use_SSE2) {
  1303             PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(1, 2, 3, 0));
  1304         }
  1305 #endif
  1306         while (x--) {
  1307             Uint8 Y1, U, Y2, V;
  1308 
  1309             Y1 = srcYUV[0];
  1310             U = srcYUV[1];
  1311             Y2 = srcYUV[2];
  1312             V = srcYUV[3];
  1313             srcYUV += 4;
  1314 
  1315             dstYUV[0] = Y1;
  1316             dstYUV[1] = V;
  1317             dstYUV[2] = Y2;
  1318             dstYUV[3] = U;
  1319             dstYUV += 4;
  1320         }
  1321         srcYUV += srcYUVPitchLeft;
  1322         dstYUV += dstYUVPitchLeft;
  1323     }
  1324     return 0;
  1325 }
  1326 
  1327 static int
  1328 SDL_ConvertPixels_UYVY_to_YUY2(int width, int height, const void *src, int src_pitch, void *dst, int dst_pitch)
  1329 {
  1330     int x, y;
  1331     const int YUVwidth = (width + 1)/2;
  1332     const int srcYUVPitchLeft = (src_pitch - YUVwidth*4);
  1333     const int dstYUVPitchLeft = (dst_pitch - YUVwidth*4);
  1334     const Uint8 *srcYUV = (const Uint8 *)src;
  1335     Uint8 *dstYUV = (Uint8 *)dst;
  1336 #ifdef __SSE2__
  1337     const SDL_bool use_SSE2 = SDL_HasSSE2();
  1338 #endif
  1339 
  1340     y = height;
  1341     while (y--) {
  1342         x = YUVwidth;
  1343 #ifdef __SSE2__
  1344         if (use_SSE2) {
  1345             PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(2, 3, 0, 1));
  1346         }
  1347 #endif
  1348         while (x--) {
  1349             Uint8 Y1, U, Y2, V;
  1350 
  1351             U = srcYUV[0];
  1352             Y1 = srcYUV[1];
  1353             V = srcYUV[2];
  1354             Y2 = srcYUV[3];
  1355             srcYUV += 4;
  1356 
  1357             dstYUV[0] = Y1;
  1358             dstYUV[1] = U;
  1359             dstYUV[2] = Y2;
  1360             dstYUV[3] = V;
  1361             dstYUV += 4;
  1362         }
  1363         srcYUV += srcYUVPitchLeft;
  1364         dstYUV += dstYUVPitchLeft;
  1365     }
  1366     return 0;
  1367 }
  1368 
  1369 static int
  1370 SDL_ConvertPixels_UYVY_to_YVYU(int width, int height, const void *src, int src_pitch, void *dst, int dst_pitch)
  1371 {
  1372     int x, y;
  1373     const int YUVwidth = (width + 1)/2;
  1374     const int srcYUVPitchLeft = (src_pitch - YUVwidth*4);
  1375     const int dstYUVPitchLeft = (dst_pitch - YUVwidth*4);
  1376     const Uint8 *srcYUV = (const Uint8 *)src;
  1377     Uint8 *dstYUV = (Uint8 *)dst;
  1378 #ifdef __SSE2__
  1379     const SDL_bool use_SSE2 = SDL_HasSSE2();
  1380 #endif
  1381 
  1382     y = height;
  1383     while (y--) {
  1384         x = YUVwidth;
  1385 #ifdef __SSE2__
  1386         if (use_SSE2) {
  1387             PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(0, 3, 2, 1));
  1388         }
  1389 #endif
  1390         while (x--) {
  1391             Uint8 Y1, U, Y2, V;
  1392 
  1393             U = srcYUV[0];
  1394             Y1 = srcYUV[1];
  1395             V = srcYUV[2];
  1396             Y2 = srcYUV[3];
  1397             srcYUV += 4;
  1398 
  1399             dstYUV[0] = Y1;
  1400             dstYUV[1] = V;
  1401             dstYUV[2] = Y2;
  1402             dstYUV[3] = U;
  1403             dstYUV += 4;
  1404         }
  1405         srcYUV += srcYUVPitchLeft;
  1406         dstYUV += dstYUVPitchLeft;
  1407     }
  1408     return 0;
  1409 }
  1410 
  1411 static int
  1412 SDL_ConvertPixels_YVYU_to_YUY2(int width, int height, const void *src, int src_pitch, void *dst, int dst_pitch)
  1413 {
  1414     int x, y;
  1415     const int YUVwidth = (width + 1)/2;
  1416     const int srcYUVPitchLeft = (src_pitch - YUVwidth*4);
  1417     const int dstYUVPitchLeft = (dst_pitch - YUVwidth*4);
  1418     const Uint8 *srcYUV = (const Uint8 *)src;
  1419     Uint8 *dstYUV = (Uint8 *)dst;
  1420 #ifdef __SSE2__
  1421     const SDL_bool use_SSE2 = SDL_HasSSE2();
  1422 #endif
  1423 
  1424     y = height;
  1425     while (y--) {
  1426         x = YUVwidth;
  1427 #ifdef __SSE2__
  1428         if (use_SSE2) {
  1429             PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(1, 2, 3, 0));
  1430         }
  1431 #endif
  1432         while (x--) {
  1433             Uint8 Y1, U, Y2, V;
  1434 
  1435             Y1 = srcYUV[0];
  1436             V = srcYUV[1];
  1437             Y2 = srcYUV[2];
  1438             U = srcYUV[3];
  1439             srcYUV += 4;
  1440 
  1441             dstYUV[0] = Y1;
  1442             dstYUV[1] = U;
  1443             dstYUV[2] = Y2;
  1444             dstYUV[3] = V;
  1445             dstYUV += 4;
  1446         }
  1447         srcYUV += srcYUVPitchLeft;
  1448         dstYUV += dstYUVPitchLeft;
  1449     }
  1450     return 0;
  1451 }
  1452 
  1453 static int
  1454 SDL_ConvertPixels_YVYU_to_UYVY(int width, int height, const void *src, int src_pitch, void *dst, int dst_pitch)
  1455 {
  1456     int x, y;
  1457     const int YUVwidth = (width + 1)/2;
  1458     const int srcYUVPitchLeft = (src_pitch - YUVwidth*4);
  1459     const int dstYUVPitchLeft = (dst_pitch - YUVwidth*4);
  1460     const Uint8 *srcYUV = (const Uint8 *)src;
  1461     Uint8 *dstYUV = (Uint8 *)dst;
  1462 #ifdef __SSE2__
  1463     const SDL_bool use_SSE2 = SDL_HasSSE2();
  1464 #endif
  1465 
  1466     y = height;
  1467     while (y--) {
  1468         x = YUVwidth;
  1469 #ifdef __SSE2__
  1470         if (use_SSE2) {
  1471             PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(2, 1, 0, 3));
  1472         }
  1473 #endif
  1474         while (x--) {
  1475             Uint8 Y1, U, Y2, V;
  1476 
  1477             Y1 = srcYUV[0];
  1478             V = srcYUV[1];
  1479             Y2 = srcYUV[2];
  1480             U = srcYUV[3];
  1481             srcYUV += 4;
  1482 
  1483             dstYUV[0] = U;
  1484             dstYUV[1] = Y1;
  1485             dstYUV[2] = V;
  1486             dstYUV[3] = Y2;
  1487             dstYUV += 4;
  1488         }
  1489         srcYUV += srcYUVPitchLeft;
  1490         dstYUV += dstYUVPitchLeft;
  1491     }
  1492     return 0;
  1493 }
  1494 
  1495 static int
  1496 SDL_ConvertPixels_Packed4_to_Packed4(int width, int height,
  1497          Uint32 src_format, const void *src, int src_pitch,
  1498          Uint32 dst_format, void *dst, int dst_pitch)
  1499 {
  1500     switch (src_format) {
  1501     case SDL_PIXELFORMAT_YUY2:
  1502         switch (dst_format) {
  1503         case SDL_PIXELFORMAT_UYVY:
  1504             return SDL_ConvertPixels_YUY2_to_UYVY(width, height, src, src_pitch, dst, dst_pitch);
  1505         case SDL_PIXELFORMAT_YVYU:
  1506             return SDL_ConvertPixels_YUY2_to_YVYU(width, height, src, src_pitch, dst, dst_pitch);
  1507         default:
  1508             break;
  1509         }
  1510         break;
  1511     case SDL_PIXELFORMAT_UYVY:
  1512         switch (dst_format) {
  1513         case SDL_PIXELFORMAT_YUY2:
  1514             return SDL_ConvertPixels_UYVY_to_YUY2(width, height, src, src_pitch, dst, dst_pitch);
  1515         case SDL_PIXELFORMAT_YVYU:
  1516             return SDL_ConvertPixels_UYVY_to_YVYU(width, height, src, src_pitch, dst, dst_pitch);
  1517         default:
  1518             break;
  1519         }
  1520         break;
  1521     case SDL_PIXELFORMAT_YVYU:
  1522         switch (dst_format) {
  1523         case SDL_PIXELFORMAT_YUY2:
  1524             return SDL_ConvertPixels_YVYU_to_YUY2(width, height, src, src_pitch, dst, dst_pitch);
  1525         case SDL_PIXELFORMAT_UYVY:
  1526             return SDL_ConvertPixels_YVYU_to_UYVY(width, height, src, src_pitch, dst, dst_pitch);
  1527         default:
  1528             break;
  1529         }
  1530         break;
  1531     default:
  1532         break;
  1533     }
  1534     return SDL_SetError("SDL_ConvertPixels_Packed4_to_Packed4: Unsupported YUV conversion: %s -> %s", SDL_GetPixelFormatName(src_format), SDL_GetPixelFormatName(dst_format));
  1535 }
  1536 
  1537 static int
  1538 SDL_ConvertPixels_Planar2x2_to_Packed4(int width, int height,
  1539          Uint32 src_format, const void *src, int src_pitch,
  1540          Uint32 dst_format, void *dst, int dst_pitch)
  1541 {
  1542     int x, y;
  1543     const Uint8 *srcY1, *srcY2, *srcU, *srcV;
  1544     Uint32 srcY_pitch, srcUV_pitch;
  1545     Uint32 srcY_pitch_left, srcUV_pitch_left, srcUV_pixel_stride;
  1546     Uint8 *dstY1, *dstY2, *dstU1, *dstU2, *dstV1, *dstV2;
  1547     Uint32 dstY_pitch, dstUV_pitch;
  1548     Uint32 dst_pitch_left;
  1549 
  1550     if (src == dst) {
  1551         return SDL_SetError("Can't change YUV plane types in-place");
  1552     }
  1553 
  1554     if (GetYUVPlanes(width, height, src_format, src, src_pitch,
  1555                      &srcY1, &srcU, &srcV, &srcY_pitch, &srcUV_pitch) < 0) {
  1556         return -1;
  1557     }
  1558     srcY2 = srcY1 + srcY_pitch;
  1559     srcY_pitch_left = (srcY_pitch - width);
  1560 
  1561     if (src_format == SDL_PIXELFORMAT_NV12 || src_format == SDL_PIXELFORMAT_NV21) {
  1562         srcUV_pixel_stride = 2;
  1563         srcUV_pitch_left = (srcUV_pitch - 2*((width + 1)/2));
  1564     } else {
  1565         srcUV_pixel_stride = 1;
  1566         srcUV_pitch_left = (srcUV_pitch - ((width + 1)/2));
  1567     }
  1568 
  1569     if (GetYUVPlanes(width, height, dst_format, dst, dst_pitch,
  1570                      (const Uint8 **)&dstY1, (const Uint8 **)&dstU1, (const Uint8 **)&dstV1,
  1571                      &dstY_pitch, &dstUV_pitch) < 0) {
  1572         return -1;
  1573     }
  1574     dstY2 = dstY1 + dstY_pitch;
  1575     dstU2 = dstU1 + dstUV_pitch;
  1576     dstV2 = dstV1 + dstUV_pitch;
  1577     dst_pitch_left = (dstY_pitch - 4*((width + 1)/2));
  1578 
  1579     /* Copy 2x2 blocks of pixels at a time */
  1580     for (y = 0; y < (height - 1); y += 2) {
  1581         for (x = 0; x < (width - 1); x += 2) {
  1582             /* Row 1 */
  1583             *dstY1 = *srcY1++;
  1584             dstY1 += 2;
  1585             *dstY1 = *srcY1++;
  1586             dstY1 += 2;
  1587             *dstU1 = *srcU;
  1588             *dstV1 = *srcV;
  1589 
  1590             /* Row 2 */
  1591             *dstY2 = *srcY2++;
  1592             dstY2 += 2;
  1593             *dstY2 = *srcY2++;
  1594             dstY2 += 2;
  1595             *dstU2 = *srcU;
  1596             *dstV2 = *srcV;
  1597 
  1598             srcU += srcUV_pixel_stride;
  1599             srcV += srcUV_pixel_stride;
  1600             dstU1 += 4;
  1601             dstU2 += 4;
  1602             dstV1 += 4;
  1603             dstV2 += 4;
  1604         }
  1605 
  1606         /* Last column */
  1607         if (x == (width - 1)) {
  1608             /* Row 1 */
  1609             *dstY1 = *srcY1;
  1610             dstY1 += 2;
  1611             *dstY1 = *srcY1++;
  1612             dstY1 += 2;
  1613             *dstU1 = *srcU;
  1614             *dstV1 = *srcV;
  1615 
  1616             /* Row 2 */
  1617             *dstY2 = *srcY2;
  1618             dstY2 += 2;
  1619             *dstY2 = *srcY2++;
  1620             dstY2 += 2;
  1621             *dstU2 = *srcU;
  1622             *dstV2 = *srcV;
  1623 
  1624             srcU += srcUV_pixel_stride;
  1625             srcV += srcUV_pixel_stride;
  1626             dstU1 += 4;
  1627             dstU2 += 4;
  1628             dstV1 += 4;
  1629             dstV2 += 4;
  1630         }
  1631 
  1632         srcY1 += srcY_pitch_left + srcY_pitch;
  1633         srcY2 += srcY_pitch_left + srcY_pitch;
  1634         srcU += srcUV_pitch_left;
  1635         srcV += srcUV_pitch_left;
  1636         dstY1 += dst_pitch_left + dstY_pitch;
  1637         dstY2 += dst_pitch_left + dstY_pitch;
  1638         dstU1 += dst_pitch_left + dstUV_pitch;
  1639         dstU2 += dst_pitch_left + dstUV_pitch;
  1640         dstV1 += dst_pitch_left + dstUV_pitch;
  1641         dstV2 += dst_pitch_left + dstUV_pitch;
  1642     }
  1643 
  1644     /* Last row */
  1645     if (y == (height - 1)) {
  1646         for (x = 0; x < (width - 1); x += 2) {
  1647             /* Row 1 */
  1648             *dstY1 = *srcY1++;
  1649             dstY1 += 2;
  1650             *dstY1 = *srcY1++;
  1651             dstY1 += 2;
  1652             *dstU1 = *srcU;
  1653             *dstV1 = *srcV;
  1654 
  1655             srcU += srcUV_pixel_stride;
  1656             srcV += srcUV_pixel_stride;
  1657             dstU1 += 4;
  1658             dstV1 += 4;
  1659         }
  1660 
  1661         /* Last column */
  1662         if (x == (width - 1)) {
  1663             /* Row 1 */
  1664             *dstY1 = *srcY1;
  1665             dstY1 += 2;
  1666             *dstY1 = *srcY1++;
  1667             dstY1 += 2;
  1668             *dstU1 = *srcU;
  1669             *dstV1 = *srcV;
  1670 
  1671             srcU += srcUV_pixel_stride;
  1672             srcV += srcUV_pixel_stride;
  1673             dstU1 += 4;
  1674             dstV1 += 4;
  1675         }
  1676     }
  1677     return 0;
  1678 }
  1679 
  1680 static int
  1681 SDL_ConvertPixels_Packed4_to_Planar2x2(int width, int height,
  1682          Uint32 src_format, const void *src, int src_pitch,
  1683          Uint32 dst_format, void *dst, int dst_pitch)
  1684 {
  1685     int x, y;
  1686     const Uint8 *srcY1, *srcY2, *srcU1, *srcU2, *srcV1, *srcV2;
  1687     Uint32 srcY_pitch, srcUV_pitch;
  1688     Uint32 src_pitch_left;
  1689     Uint8 *dstY1, *dstY2, *dstU, *dstV;
  1690     Uint32 dstY_pitch, dstUV_pitch;
  1691     Uint32 dstY_pitch_left, dstUV_pitch_left, dstUV_pixel_stride;
  1692 
  1693     if (src == dst) {
  1694         return SDL_SetError("Can't change YUV plane types in-place");
  1695     }
  1696 
  1697     if (GetYUVPlanes(width, height, src_format, src, src_pitch,
  1698                      &srcY1, &srcU1, &srcV1, &srcY_pitch, &srcUV_pitch) < 0) {
  1699         return -1;
  1700     }
  1701     srcY2 = srcY1 + srcY_pitch;
  1702     srcU2 = srcU1 + srcUV_pitch;
  1703     srcV2 = srcV1 + srcUV_pitch;
  1704     src_pitch_left = (srcY_pitch - 4*((width + 1)/2));
  1705 
  1706     if (GetYUVPlanes(width, height, dst_format, dst, dst_pitch,
  1707                      (const Uint8 **)&dstY1, (const Uint8 **)&dstU, (const Uint8 **)&dstV,
  1708                      &dstY_pitch, &dstUV_pitch) < 0) {
  1709         return -1;
  1710     }
  1711     dstY2 = dstY1 + dstY_pitch;
  1712     dstY_pitch_left = (dstY_pitch - width);
  1713 
  1714     if (dst_format == SDL_PIXELFORMAT_NV12 || dst_format == SDL_PIXELFORMAT_NV21) {
  1715         dstUV_pixel_stride = 2;
  1716         dstUV_pitch_left = (dstUV_pitch - 2*((width + 1)/2));
  1717     } else {
  1718         dstUV_pixel_stride = 1;
  1719         dstUV_pitch_left = (dstUV_pitch - ((width + 1)/2));
  1720     }
  1721 
  1722     /* Copy 2x2 blocks of pixels at a time */
  1723     for (y = 0; y < (height - 1); y += 2) {
  1724         for (x = 0; x < (width - 1); x += 2) {
  1725             /* Row 1 */
  1726             *dstY1++ = *srcY1;
  1727             srcY1 += 2;
  1728             *dstY1++ = *srcY1;
  1729             srcY1 += 2;
  1730 
  1731             /* Row 2 */
  1732             *dstY2++ = *srcY2;
  1733             srcY2 += 2;
  1734             *dstY2++ = *srcY2;
  1735             srcY2 += 2;
  1736 
  1737             *dstU = (Uint8)(((Uint32)*srcU1 + *srcU2)/2);
  1738             *dstV = (Uint8)(((Uint32)*srcV1 + *srcV2)/2);
  1739 
  1740             srcU1 += 4;
  1741             srcU2 += 4;
  1742             srcV1 += 4;
  1743             srcV2 += 4;
  1744             dstU += dstUV_pixel_stride;
  1745             dstV += dstUV_pixel_stride;
  1746         }
  1747 
  1748         /* Last column */
  1749         if (x == (width - 1)) {
  1750             /* Row 1 */
  1751             *dstY1 = *srcY1;
  1752             srcY1 += 2;
  1753             *dstY1++ = *srcY1;
  1754             srcY1 += 2;
  1755 
  1756             /* Row 2 */
  1757             *dstY2 = *srcY2;
  1758             srcY2 += 2;
  1759             *dstY2++ = *srcY2;
  1760             srcY2 += 2;
  1761 
  1762             *dstU = (Uint8)(((Uint32)*srcU1 + *srcU2)/2);
  1763             *dstV = (Uint8)(((Uint32)*srcV1 + *srcV2)/2);
  1764 
  1765             srcU1 += 4;
  1766             srcU2 += 4;
  1767             srcV1 += 4;
  1768             srcV2 += 4;
  1769             dstU += dstUV_pixel_stride;
  1770             dstV += dstUV_pixel_stride;
  1771         }
  1772 
  1773         srcY1 += src_pitch_left + srcY_pitch;
  1774         srcY2 += src_pitch_left + srcY_pitch;
  1775         srcU1 += src_pitch_left + srcUV_pitch;
  1776         srcU2 += src_pitch_left + srcUV_pitch;
  1777         srcV1 += src_pitch_left + srcUV_pitch;
  1778         srcV2 += src_pitch_left + srcUV_pitch;
  1779         dstY1 += dstY_pitch_left + dstY_pitch;
  1780         dstY2 += dstY_pitch_left + dstY_pitch;
  1781         dstU += dstUV_pitch_left;
  1782         dstV += dstUV_pitch_left;
  1783     }
  1784 
  1785     /* Last row */
  1786     if (y == (height - 1)) {
  1787         for (x = 0; x < (width - 1); x += 2) {
  1788             *dstY1++ = *srcY1;
  1789             srcY1 += 2;
  1790             *dstY1++ = *srcY1;
  1791             srcY1 += 2;
  1792 
  1793             *dstU = *srcU1;
  1794             *dstV = *srcV1;
  1795 
  1796             srcU1 += 4;
  1797             srcV1 += 4;
  1798             dstU += dstUV_pixel_stride;
  1799             dstV += dstUV_pixel_stride;
  1800         }
  1801 
  1802         /* Last column */
  1803         if (x == (width - 1)) {
  1804             *dstY1 = *srcY1;
  1805             *dstU = *srcU1;
  1806             *dstV = *srcV1;
  1807         }
  1808     }
  1809     return 0;
  1810 }
  1811 
  1812 int
  1813 SDL_ConvertPixels_YUV_to_YUV(int width, int height,
  1814          Uint32 src_format, const void *src, int src_pitch,
  1815          Uint32 dst_format, void *dst, int dst_pitch)
  1816 {
  1817     if (src_format == dst_format) {
  1818         if (src == dst) {
  1819             /* Nothing to do */
  1820             return 0;
  1821         }
  1822         return SDL_ConvertPixels_YUV_to_YUV_Copy(width, height, src_format, src, src_pitch, dst, dst_pitch);
  1823     }
  1824 
  1825     if (IsPlanar2x2Format(src_format) && IsPlanar2x2Format(dst_format)) {
  1826         return SDL_ConvertPixels_Planar2x2_to_Planar2x2(width, height, src_format, src, src_pitch, dst_format, dst, dst_pitch);
  1827     } else if (IsPacked4Format(src_format) && IsPacked4Format(dst_format)) {
  1828         return SDL_ConvertPixels_Packed4_to_Packed4(width, height, src_format, src, src_pitch, dst_format, dst, dst_pitch);
  1829     } else if (IsPlanar2x2Format(src_format) && IsPacked4Format(dst_format)) {
  1830         return SDL_ConvertPixels_Planar2x2_to_Packed4(width, height, src_format, src, src_pitch, dst_format, dst, dst_pitch);
  1831     } else if (IsPacked4Format(src_format) && IsPlanar2x2Format(dst_format)) {
  1832         return SDL_ConvertPixels_Packed4_to_Planar2x2(width, height, src_format, src, src_pitch, dst_format, dst, dst_pitch);
  1833     } else {
  1834         return SDL_SetError("SDL_ConvertPixels_YUV_to_YUV: Unsupported YUV conversion: %s -> %s", SDL_GetPixelFormatName(src_format), SDL_GetPixelFormatName(dst_format));
  1835     }
  1836 }
  1837 
  1838 /* vi: set ts=4 sw=4 expandtab: */