src/video/SDL_yuv.c
author Sam Lantinga <slouken@libsdl.org>
Thu, 07 Dec 2017 16:08:47 -0800
changeset 11731 30f337dc8c74
parent 11712 b94292eebb5c
child 11811 5d94cb6b24d3
permissions -rw-r--r--
Added iOS and OSX versions of the Metal shaders
     1 /*
     2   Simple DirectMedia Layer
     3   Copyright (C) 1997-2017 Sam Lantinga <slouken@libsdl.org>
     4 
     5   This software is provided 'as-is', without any express or implied
     6   warranty.  In no event will the authors be held liable for any damages
     7   arising from the use of this software.
     8 
     9   Permission is granted to anyone to use this software for any purpose,
    10   including commercial applications, and to alter it and redistribute it
    11   freely, subject to the following restrictions:
    12 
    13   1. The origin of this software must not be misrepresented; you must not
    14      claim that you wrote the original software. If you use this software
    15      in a product, an acknowledgment in the product documentation would be
    16      appreciated but is not required.
    17   2. Altered source versions must be plainly marked as such, and must not be
    18      misrepresented as being the original software.
    19   3. This notice may not be removed or altered from any source distribution.
    20 */
    21 #include "../SDL_internal.h"
    22 
    23 #include "SDL_endian.h"
    24 #include "SDL_video.h"
    25 #include "SDL_pixels_c.h"
    26 
    27 #include "yuv2rgb/yuv_rgb.h"
    28 
    29 #define SDL_YUV_SD_THRESHOLD    576
    30 
    31 
    32 static SDL_YUV_CONVERSION_MODE SDL_YUV_ConversionMode = SDL_YUV_CONVERSION_BT601;
    33 
    34 
    35 void SDL_SetYUVConversionMode(SDL_YUV_CONVERSION_MODE mode)
    36 {
    37     SDL_YUV_ConversionMode = mode;
    38 }
    39 
    40 SDL_YUV_CONVERSION_MODE SDL_GetYUVConversionMode()
    41 {
    42     return SDL_YUV_ConversionMode;
    43 }
    44 
    45 SDL_YUV_CONVERSION_MODE SDL_GetYUVConversionModeForResolution(int width, int height)
    46 {
    47     SDL_YUV_CONVERSION_MODE mode = SDL_GetYUVConversionMode();
    48     if (mode == SDL_YUV_CONVERSION_AUTOMATIC) {
    49         if (height <= SDL_YUV_SD_THRESHOLD) {
    50             mode = SDL_YUV_CONVERSION_BT601;
    51         } else {
    52             mode = SDL_YUV_CONVERSION_BT709;
    53         }
    54     }
    55     return mode;
    56 }
    57 
    58 static int GetYUVConversionType(int width, int height, YCbCrType *yuv_type)
    59 {
    60     switch (SDL_GetYUVConversionModeForResolution(width, height)) {
    61     case SDL_YUV_CONVERSION_JPEG:
    62         *yuv_type = YCBCR_JPEG;
    63         break;
    64     case SDL_YUV_CONVERSION_BT601:
    65         *yuv_type = YCBCR_601;
    66         break;
    67     case SDL_YUV_CONVERSION_BT709:
    68         *yuv_type = YCBCR_709;
    69         break;
    70     default:
    71         return SDL_SetError("Unexpected YUV conversion mode");
    72     }
    73     return 0;
    74 }
    75 
    76 static SDL_bool IsPlanar2x2Format(Uint32 format)
    77 {
    78     return (format == SDL_PIXELFORMAT_YV12 ||
    79             format == SDL_PIXELFORMAT_IYUV ||
    80             format == SDL_PIXELFORMAT_NV12 ||
    81             format == SDL_PIXELFORMAT_NV21);
    82 }
    83 
    84 static SDL_bool IsPacked4Format(Uint32 format)
    85 {
    86     return (format == SDL_PIXELFORMAT_YUY2 ||
    87             format == SDL_PIXELFORMAT_UYVY ||
    88             format == SDL_PIXELFORMAT_YVYU);
    89 }
    90 
    91 static int GetYUVPlanes(int width, int height, Uint32 format, const void *yuv, int yuv_pitch,
    92 	                    const Uint8 **y, const Uint8 **u, const Uint8 **v, Uint32 *y_stride, Uint32 *uv_stride)
    93 {
    94 	const Uint8 *planes[3] = { NULL, NULL, NULL };
    95 	int pitches[3] = { 0, 0, 0 };
    96 
    97     switch (format) {
    98     case SDL_PIXELFORMAT_YV12:
    99     case SDL_PIXELFORMAT_IYUV:
   100         pitches[0] = yuv_pitch;
   101         pitches[1] = (pitches[0] + 1) / 2;
   102         pitches[2] = (pitches[0] + 1) / 2;
   103         planes[0] = (const Uint8 *)yuv;
   104         planes[1] = planes[0] + pitches[0] * height;
   105         planes[2] = planes[1] + pitches[1] * ((height + 1) / 2);
   106         break;
   107     case SDL_PIXELFORMAT_YUY2:
   108     case SDL_PIXELFORMAT_UYVY:
   109     case SDL_PIXELFORMAT_YVYU:
   110         pitches[0] = yuv_pitch;
   111         planes[0] = (const Uint8 *)yuv;
   112         break;
   113     case SDL_PIXELFORMAT_NV12:
   114     case SDL_PIXELFORMAT_NV21:
   115         pitches[0] = yuv_pitch;
   116         pitches[1] = 2 * ((pitches[0] + 1) / 2);
   117         planes[0] = (const Uint8 *)yuv;
   118         planes[1] = planes[0] + pitches[0] * height;
   119         break;
   120     default:
   121         return SDL_SetError("GetYUVPlanes(): Unsupported YUV format: %s", SDL_GetPixelFormatName(format));
   122     }
   123 
   124     switch (format) {
   125     case SDL_PIXELFORMAT_YV12:
   126         *y = planes[0];
   127         *y_stride = pitches[0];
   128         *v = planes[1];
   129         *u = planes[2];
   130         *uv_stride = pitches[1];
   131         break;
   132     case SDL_PIXELFORMAT_IYUV:
   133         *y = planes[0];
   134         *y_stride = pitches[0];
   135         *v = planes[2];
   136         *u = planes[1];
   137         *uv_stride = pitches[1];
   138         break;
   139     case SDL_PIXELFORMAT_YUY2:
   140         *y = planes[0];
   141         *y_stride = pitches[0];
   142         *v = *y + 3;
   143         *u = *y + 1;
   144         *uv_stride = pitches[0];
   145         break;
   146     case SDL_PIXELFORMAT_UYVY:
   147         *y = planes[0] + 1;
   148         *y_stride = pitches[0];
   149         *v = *y + 1;
   150         *u = *y - 1;
   151         *uv_stride = pitches[0];
   152         break;
   153     case SDL_PIXELFORMAT_YVYU:
   154         *y = planes[0];
   155         *y_stride = pitches[0];
   156         *v = *y + 1;
   157         *u = *y + 3;
   158         *uv_stride = pitches[0];
   159         break;
   160     case SDL_PIXELFORMAT_NV12:
   161         *y = planes[0];
   162         *y_stride = pitches[0];
   163         *u = planes[1];
   164         *v = *u + 1;
   165         *uv_stride = pitches[1];
   166         break;
   167     case SDL_PIXELFORMAT_NV21:
   168         *y = planes[0];
   169         *y_stride = pitches[0];
   170         *v = planes[1];
   171         *u = *v + 1;
   172         *uv_stride = pitches[1];
   173         break;
   174     default:
   175         /* Should have caught this above */
   176         return SDL_SetError("GetYUVPlanes[2]: Unsupported YUV format: %s", SDL_GetPixelFormatName(format));
   177     }
   178     return 0;
   179 }
   180 
   181 static SDL_bool yuv_rgb_sse(
   182     Uint32 src_format, Uint32 dst_format,
   183 	Uint32 width, Uint32 height, 
   184 	const Uint8 *y, const Uint8 *u, const Uint8 *v, Uint32 y_stride, Uint32 uv_stride, 
   185 	Uint8 *rgb, Uint32 rgb_stride, 
   186 	YCbCrType yuv_type)
   187 {
   188 #ifdef __SSE2__
   189     if (!SDL_HasSSE2()) {
   190         return SDL_FALSE;
   191     }
   192 
   193     if (src_format == SDL_PIXELFORMAT_YV12 ||
   194         src_format == SDL_PIXELFORMAT_IYUV) {
   195 
   196         switch (dst_format) {
   197         case SDL_PIXELFORMAT_RGB565:
   198             yuv420_rgb565_sseu(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   199             return SDL_TRUE;
   200         case SDL_PIXELFORMAT_RGB24:
   201             yuv420_rgb24_sseu(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   202             return SDL_TRUE;
   203         case SDL_PIXELFORMAT_RGBX8888:
   204         case SDL_PIXELFORMAT_RGBA8888:
   205             yuv420_rgba_sseu(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   206             return SDL_TRUE;
   207         case SDL_PIXELFORMAT_BGRX8888:
   208         case SDL_PIXELFORMAT_BGRA8888:
   209             yuv420_bgra_sseu(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   210             return SDL_TRUE;
   211         case SDL_PIXELFORMAT_RGB888:
   212         case SDL_PIXELFORMAT_ARGB8888:
   213             yuv420_argb_sseu(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   214             return SDL_TRUE;
   215         case SDL_PIXELFORMAT_BGR888:
   216         case SDL_PIXELFORMAT_ABGR8888:
   217             yuv420_abgr_sseu(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   218             return SDL_TRUE;
   219         default:
   220             break;
   221         }
   222     }
   223 
   224     if (src_format == SDL_PIXELFORMAT_YUY2 ||
   225         src_format == SDL_PIXELFORMAT_UYVY ||
   226         src_format == SDL_PIXELFORMAT_YVYU) {
   227 
   228         switch (dst_format) {
   229         case SDL_PIXELFORMAT_RGB565:
   230             yuv422_rgb565_sseu(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   231             return SDL_TRUE;
   232         case SDL_PIXELFORMAT_RGB24:
   233             yuv422_rgb24_sseu(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   234             return SDL_TRUE;
   235         case SDL_PIXELFORMAT_RGBX8888:
   236         case SDL_PIXELFORMAT_RGBA8888:
   237             yuv422_rgba_sseu(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   238             return SDL_TRUE;
   239         case SDL_PIXELFORMAT_BGRX8888:
   240         case SDL_PIXELFORMAT_BGRA8888:
   241             yuv422_bgra_sseu(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   242             return SDL_TRUE;
   243         case SDL_PIXELFORMAT_RGB888:
   244         case SDL_PIXELFORMAT_ARGB8888:
   245             yuv422_argb_sseu(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   246             return SDL_TRUE;
   247         case SDL_PIXELFORMAT_BGR888:
   248         case SDL_PIXELFORMAT_ABGR8888:
   249             yuv422_abgr_sseu(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   250             return SDL_TRUE;
   251         default:
   252             break;
   253         }
   254     }
   255 
   256     if (src_format == SDL_PIXELFORMAT_NV12 ||
   257         src_format == SDL_PIXELFORMAT_NV21) {
   258 
   259         switch (dst_format) {
   260         case SDL_PIXELFORMAT_RGB565:
   261             yuvnv12_rgb565_sseu(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   262             return SDL_TRUE;
   263         case SDL_PIXELFORMAT_RGB24:
   264             yuvnv12_rgb24_sseu(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   265             return SDL_TRUE;
   266         case SDL_PIXELFORMAT_RGBX8888:
   267         case SDL_PIXELFORMAT_RGBA8888:
   268             yuvnv12_rgba_sseu(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   269             return SDL_TRUE;
   270         case SDL_PIXELFORMAT_BGRX8888:
   271         case SDL_PIXELFORMAT_BGRA8888:
   272             yuvnv12_bgra_sseu(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   273             return SDL_TRUE;
   274         case SDL_PIXELFORMAT_RGB888:
   275         case SDL_PIXELFORMAT_ARGB8888:
   276             yuvnv12_argb_sseu(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   277             return SDL_TRUE;
   278         case SDL_PIXELFORMAT_BGR888:
   279         case SDL_PIXELFORMAT_ABGR8888:
   280             yuvnv12_abgr_sseu(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   281             return SDL_TRUE;
   282         default:
   283             break;
   284         }
   285     }
   286 #endif
   287     return SDL_FALSE;
   288 }
   289 
   290 static SDL_bool yuv_rgb_std(
   291     Uint32 src_format, Uint32 dst_format,
   292 	Uint32 width, Uint32 height, 
   293 	const Uint8 *y, const Uint8 *u, const Uint8 *v, Uint32 y_stride, Uint32 uv_stride, 
   294 	Uint8 *rgb, Uint32 rgb_stride, 
   295 	YCbCrType yuv_type)
   296 {
   297     if (src_format == SDL_PIXELFORMAT_YV12 ||
   298         src_format == SDL_PIXELFORMAT_IYUV) {
   299 
   300         switch (dst_format) {
   301         case SDL_PIXELFORMAT_RGB565:
   302             yuv420_rgb565_std(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   303             return SDL_TRUE;
   304         case SDL_PIXELFORMAT_RGB24:
   305             yuv420_rgb24_std(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   306             return SDL_TRUE;
   307         case SDL_PIXELFORMAT_RGBX8888:
   308         case SDL_PIXELFORMAT_RGBA8888:
   309             yuv420_rgba_std(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   310             return SDL_TRUE;
   311         case SDL_PIXELFORMAT_BGRX8888:
   312         case SDL_PIXELFORMAT_BGRA8888:
   313             yuv420_bgra_std(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   314             return SDL_TRUE;
   315         case SDL_PIXELFORMAT_RGB888:
   316         case SDL_PIXELFORMAT_ARGB8888:
   317             yuv420_argb_std(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   318             return SDL_TRUE;
   319         case SDL_PIXELFORMAT_BGR888:
   320         case SDL_PIXELFORMAT_ABGR8888:
   321             yuv420_abgr_std(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   322             return SDL_TRUE;
   323         default:
   324             break;
   325         }
   326     }
   327 
   328     if (src_format == SDL_PIXELFORMAT_YUY2 ||
   329         src_format == SDL_PIXELFORMAT_UYVY ||
   330         src_format == SDL_PIXELFORMAT_YVYU) {
   331 
   332         switch (dst_format) {
   333         case SDL_PIXELFORMAT_RGB565:
   334             yuv422_rgb565_std(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   335             return SDL_TRUE;
   336         case SDL_PIXELFORMAT_RGB24:
   337             yuv422_rgb24_std(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   338             return SDL_TRUE;
   339         case SDL_PIXELFORMAT_RGBX8888:
   340         case SDL_PIXELFORMAT_RGBA8888:
   341             yuv422_rgba_std(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   342             return SDL_TRUE;
   343         case SDL_PIXELFORMAT_BGRX8888:
   344         case SDL_PIXELFORMAT_BGRA8888:
   345             yuv422_bgra_std(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   346             return SDL_TRUE;
   347         case SDL_PIXELFORMAT_RGB888:
   348         case SDL_PIXELFORMAT_ARGB8888:
   349             yuv422_argb_std(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   350             return SDL_TRUE;
   351         case SDL_PIXELFORMAT_BGR888:
   352         case SDL_PIXELFORMAT_ABGR8888:
   353             yuv422_abgr_std(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   354             return SDL_TRUE;
   355         default:
   356             break;
   357         }
   358     }
   359 
   360     if (src_format == SDL_PIXELFORMAT_NV12 ||
   361         src_format == SDL_PIXELFORMAT_NV21) {
   362 
   363         switch (dst_format) {
   364         case SDL_PIXELFORMAT_RGB565:
   365             yuvnv12_rgb565_std(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   366             return SDL_TRUE;
   367         case SDL_PIXELFORMAT_RGB24:
   368             yuvnv12_rgb24_std(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   369             return SDL_TRUE;
   370         case SDL_PIXELFORMAT_RGBX8888:
   371         case SDL_PIXELFORMAT_RGBA8888:
   372             yuvnv12_rgba_std(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   373             return SDL_TRUE;
   374         case SDL_PIXELFORMAT_BGRX8888:
   375         case SDL_PIXELFORMAT_BGRA8888:
   376             yuvnv12_bgra_std(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   377             return SDL_TRUE;
   378         case SDL_PIXELFORMAT_RGB888:
   379         case SDL_PIXELFORMAT_ARGB8888:
   380             yuvnv12_argb_std(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   381             return SDL_TRUE;
   382         case SDL_PIXELFORMAT_BGR888:
   383         case SDL_PIXELFORMAT_ABGR8888:
   384             yuvnv12_abgr_std(width, height, y, u, v, y_stride, uv_stride, rgb, rgb_stride, yuv_type);
   385             return SDL_TRUE;
   386         default:
   387             break;
   388         }
   389     }
   390     return SDL_FALSE;
   391 }
   392 
   393 int
   394 SDL_ConvertPixels_YUV_to_RGB(int width, int height,
   395          Uint32 src_format, const void *src, int src_pitch,
   396          Uint32 dst_format, void *dst, int dst_pitch)
   397 {
   398 	const Uint8 *y = NULL;
   399     const Uint8 *u = NULL;
   400     const Uint8 *v = NULL;
   401     Uint32 y_stride = 0;
   402     Uint32 uv_stride = 0;
   403     YCbCrType yuv_type = YCBCR_601;
   404 
   405     if (GetYUVPlanes(width, height, src_format, src, src_pitch, &y, &u, &v, &y_stride, &uv_stride) < 0) {
   406         return -1;
   407     }
   408 
   409     if (GetYUVConversionType(width, height, &yuv_type) < 0) {
   410         return -1;
   411     }
   412 
   413     if (yuv_rgb_sse(src_format, dst_format, width, height, y, u, v, y_stride, uv_stride, (Uint8*)dst, dst_pitch, yuv_type)) {
   414         return 0;
   415     }
   416 
   417     if (yuv_rgb_std(src_format, dst_format, width, height, y, u, v, y_stride, uv_stride, (Uint8*)dst, dst_pitch, yuv_type)) {
   418         return 0;
   419     }
   420 
   421     /* No fast path for the RGB format, instead convert using an intermediate buffer */
   422     if (dst_format != SDL_PIXELFORMAT_ARGB8888) {
   423         int ret;
   424         void *tmp;
   425         int tmp_pitch = (width * sizeof(Uint32));
   426 
   427         tmp = SDL_malloc(tmp_pitch * height);
   428         if (tmp == NULL) {
   429             return SDL_OutOfMemory();
   430         }
   431 
   432         /* convert src/src_format to tmp/ARGB8888 */
   433         ret = SDL_ConvertPixels_YUV_to_RGB(width, height, src_format, src, src_pitch, SDL_PIXELFORMAT_ARGB8888, tmp, tmp_pitch);
   434         if (ret < 0) {
   435             SDL_free(tmp);
   436             return ret;
   437         }
   438 
   439         /* convert tmp/ARGB8888 to dst/RGB */
   440         ret = SDL_ConvertPixels(width, height, SDL_PIXELFORMAT_ARGB8888, tmp, tmp_pitch, dst_format, dst, dst_pitch);
   441         SDL_free(tmp);
   442         return ret;
   443     }
   444 
   445     return SDL_SetError("Unsupported YUV conversion");
   446 }
   447 
   448 struct RGB2YUVFactors
   449 {
   450     int y_offset;
   451     float y[3]; /* Rfactor, Gfactor, Bfactor */
   452     float u[3]; /* Rfactor, Gfactor, Bfactor */
   453     float v[3]; /* Rfactor, Gfactor, Bfactor */
   454 };
   455 
   456 static int
   457 SDL_ConvertPixels_ARGB8888_to_YUV(int width, int height, const void *src, int src_pitch, Uint32 dst_format, void *dst, int dst_pitch)
   458 {
   459     const int src_pitch_x_2    = src_pitch * 2;
   460     const int height_half      = height / 2;
   461     const int height_remainder = (height & 0x1);
   462     const int width_half       = width / 2;
   463     const int width_remainder  = (width & 0x1);
   464     int i, j;
   465  
   466     static struct RGB2YUVFactors RGB2YUVFactorTables[SDL_YUV_CONVERSION_BT709 + 1] =
   467     {
   468         /* ITU-T T.871 (JPEG) */
   469         {
   470             0,
   471             {  0.2990f,  0.5870f,  0.1140f },
   472             { -0.1687f, -0.3313f,  0.5000f },
   473             {  0.5000f, -0.4187f, -0.0813f },
   474         },
   475         /* ITU-R BT.601-7 */
   476         {
   477             16,
   478             {  0.2568f,  0.5041f,  0.0979f },
   479             { -0.1482f, -0.2910f,  0.4392f },
   480             {  0.4392f, -0.3678f, -0.0714f },
   481         },
   482         /* ITU-R BT.709-6 */
   483         {
   484             16,
   485             { 0.1826f,  0.6142f,  0.0620f },
   486             {-0.1006f, -0.3386f,  0.4392f },
   487             { 0.4392f, -0.3989f, -0.0403f },
   488         },
   489     };
   490     const struct RGB2YUVFactors *cvt = &RGB2YUVFactorTables[SDL_GetYUVConversionModeForResolution(width, height)];
   491 
   492 #define MAKE_Y(r, g, b) (Uint8)((int)(cvt->y[0] * (r) + cvt->y[1] * (g) + cvt->y[2] * (b) + 0.5f) + cvt->y_offset)
   493 #define MAKE_U(r, g, b) (Uint8)((int)(cvt->u[0] * (r) + cvt->u[1] * (g) + cvt->u[2] * (b) + 0.5f) + 128)
   494 #define MAKE_V(r, g, b) (Uint8)((int)(cvt->v[0] * (r) + cvt->v[1] * (g) + cvt->v[2] * (b) + 0.5f) + 128)
   495 
   496 #define READ_2x2_PIXELS                                                                                         \
   497         const Uint32 p1 = ((const Uint32 *)curr_row)[2 * i];                                                    \
   498         const Uint32 p2 = ((const Uint32 *)curr_row)[2 * i + 1];                                                \
   499         const Uint32 p3 = ((const Uint32 *)next_row)[2 * i];                                                    \
   500         const Uint32 p4 = ((const Uint32 *)next_row)[2 * i + 1];                                                \
   501         const Uint32 r = ((p1 & 0x00ff0000) + (p2 & 0x00ff0000) + (p3 & 0x00ff0000) + (p4 & 0x00ff0000)) >> 18; \
   502         const Uint32 g = ((p1 & 0x0000ff00) + (p2 & 0x0000ff00) + (p3 & 0x0000ff00) + (p4 & 0x0000ff00)) >> 10; \
   503         const Uint32 b = ((p1 & 0x000000ff) + (p2 & 0x000000ff) + (p3 & 0x000000ff) + (p4 & 0x000000ff)) >> 2;  \
   504 
   505 #define READ_2x1_PIXELS                                                                                         \
   506         const Uint32 p1 = ((const Uint32 *)curr_row)[2 * i];                                                    \
   507         const Uint32 p2 = ((const Uint32 *)next_row)[2 * i];                                                    \
   508         const Uint32 r = ((p1 & 0x00ff0000) + (p2 & 0x00ff0000)) >> 17;                                         \
   509         const Uint32 g = ((p1 & 0x0000ff00) + (p2 & 0x0000ff00)) >> 9;                                          \
   510         const Uint32 b = ((p1 & 0x000000ff) + (p2 & 0x000000ff)) >> 1;                                          \
   511 
   512 #define READ_1x2_PIXELS                                                                                         \
   513         const Uint32 p1 = ((const Uint32 *)curr_row)[2 * i];                                                    \
   514         const Uint32 p2 = ((const Uint32 *)curr_row)[2 * i + 1];                                                \
   515         const Uint32 r = ((p1 & 0x00ff0000) + (p2 & 0x00ff0000)) >> 17;                                         \
   516         const Uint32 g = ((p1 & 0x0000ff00) + (p2 & 0x0000ff00)) >> 9;                                          \
   517         const Uint32 b = ((p1 & 0x000000ff) + (p2 & 0x000000ff)) >> 1;                                          \
   518 
   519 #define READ_1x1_PIXEL                                                                                          \
   520         const Uint32 p = ((const Uint32 *)curr_row)[2 * i];                                                     \
   521         const Uint32 r = (p & 0x00ff0000) >> 16;                                                                \
   522         const Uint32 g = (p & 0x0000ff00) >> 8;                                                                 \
   523         const Uint32 b = (p & 0x000000ff);                                                                      \
   524 
   525 #define READ_TWO_RGB_PIXELS                                                                                     \
   526         const Uint32 p = ((const Uint32 *)curr_row)[2 * i];                                                     \
   527         const Uint32 r = (p & 0x00ff0000) >> 16;                                                                \
   528         const Uint32 g = (p & 0x0000ff00) >> 8;                                                                 \
   529         const Uint32 b = (p & 0x000000ff);                                                                      \
   530         const Uint32 p1 = ((const Uint32 *)curr_row)[2 * i + 1];                                                \
   531         const Uint32 r1 = (p1 & 0x00ff0000) >> 16;                                                              \
   532         const Uint32 g1 = (p1 & 0x0000ff00) >> 8;                                                               \
   533         const Uint32 b1 = (p1 & 0x000000ff);                                                                    \
   534         const Uint32 R = (r + r1)/2;                                                                            \
   535         const Uint32 G = (g + g1)/2;                                                                            \
   536         const Uint32 B = (b + b1)/2;                                                                            \
   537 
   538 #define READ_ONE_RGB_PIXEL  READ_1x1_PIXEL
   539 
   540     switch (dst_format) 
   541     {
   542     case SDL_PIXELFORMAT_YV12:
   543     case SDL_PIXELFORMAT_IYUV:
   544     case SDL_PIXELFORMAT_NV12:
   545     case SDL_PIXELFORMAT_NV21:
   546         {
   547             const Uint8 *curr_row, *next_row;
   548             
   549             Uint8 *plane_y;
   550             Uint8 *plane_u;
   551             Uint8 *plane_v;
   552             Uint8 *plane_interleaved_uv;
   553             Uint32 y_stride, uv_stride, y_skip, uv_skip;
   554 
   555             GetYUVPlanes(width, height, dst_format, dst, dst_pitch,
   556 	                     (const Uint8 **)&plane_y, (const Uint8 **)&plane_u, (const Uint8 **)&plane_v,
   557                          &y_stride, &uv_stride);
   558             plane_interleaved_uv = (plane_y + height * y_stride);
   559             y_skip = (y_stride - width);
   560 
   561             curr_row = (const Uint8*)src;
   562 
   563             /* Write Y plane */
   564             for (j = 0; j < height; j++) {
   565                 for (i = 0; i < width; i++) {
   566                     const Uint32 p1 = ((const Uint32 *)curr_row)[i];
   567                     const Uint32 r = (p1 & 0x00ff0000) >> 16;
   568                     const Uint32 g = (p1 & 0x0000ff00) >> 8;
   569                     const Uint32 b = (p1 & 0x000000ff);
   570                     *plane_y++ = MAKE_Y(r, g, b);
   571                 }
   572                 plane_y += y_skip;
   573                 curr_row += src_pitch;
   574             }
   575 
   576             curr_row = (const Uint8*)src;
   577             next_row = (const Uint8*)src;
   578             next_row += src_pitch;
   579 
   580             if (dst_format == SDL_PIXELFORMAT_YV12 || dst_format == SDL_PIXELFORMAT_IYUV)
   581             {
   582                 /* Write UV planes, not interleaved */
   583                 uv_skip = (uv_stride - (width + 1)/2);
   584                 for (j = 0; j < height_half; j++) {
   585                     for (i = 0; i < width_half; i++) {
   586                         READ_2x2_PIXELS;
   587                         *plane_u++ = MAKE_U(r, g, b);
   588                         *plane_v++ = MAKE_V(r, g, b);
   589                     }
   590                     if (width_remainder) {
   591                         READ_2x1_PIXELS;
   592                         *plane_u++ = MAKE_U(r, g, b);
   593                         *plane_v++ = MAKE_V(r, g, b);
   594                     }
   595                     plane_u += uv_skip;
   596                     plane_v += uv_skip;
   597                     curr_row += src_pitch_x_2;
   598                     next_row += src_pitch_x_2;
   599                 }
   600                 if (height_remainder) {
   601                     for (i = 0; i < width_half; i++) {
   602                         READ_1x2_PIXELS;
   603                         *plane_u++ = MAKE_U(r, g, b);
   604                         *plane_v++ = MAKE_V(r, g, b);
   605                     }
   606                     if (width_remainder) {
   607                         READ_1x1_PIXEL;
   608                         *plane_u++ = MAKE_U(r, g, b);
   609                         *plane_v++ = MAKE_V(r, g, b);
   610                     }
   611                     plane_u += uv_skip;
   612                     plane_v += uv_skip;
   613                 }
   614             }
   615             else if (dst_format == SDL_PIXELFORMAT_NV12)
   616             {
   617                 uv_skip = (uv_stride - ((width + 1)/2)*2);
   618                 for (j = 0; j < height_half; j++) {
   619                     for (i = 0; i < width_half; i++) {
   620                         READ_2x2_PIXELS;
   621                         *plane_interleaved_uv++ = MAKE_U(r, g, b);
   622                         *plane_interleaved_uv++ = MAKE_V(r, g, b);
   623                     }
   624                     if (width_remainder) {
   625                         READ_2x1_PIXELS;
   626                         *plane_interleaved_uv++ = MAKE_U(r, g, b);
   627                         *plane_interleaved_uv++ = MAKE_V(r, g, b);
   628                     }
   629                     plane_interleaved_uv += uv_skip;
   630                     curr_row += src_pitch_x_2;
   631                     next_row += src_pitch_x_2;
   632                 }
   633                 if (height_remainder) {
   634                     for (i = 0; i < width_half; i++) {
   635                         READ_1x2_PIXELS;
   636                         *plane_interleaved_uv++ = MAKE_U(r, g, b);
   637                         *plane_interleaved_uv++ = MAKE_V(r, g, b);
   638                     }
   639                     if (width_remainder) {
   640                         READ_1x1_PIXEL;
   641                         *plane_interleaved_uv++ = MAKE_U(r, g, b);
   642                         *plane_interleaved_uv++ = MAKE_V(r, g, b);
   643                     }
   644                 }
   645             } 
   646             else /* dst_format == SDL_PIXELFORMAT_NV21 */
   647             {
   648                 uv_skip = (uv_stride - ((width + 1)/2)*2);
   649                 for (j = 0; j < height_half; j++) {
   650                     for (i = 0; i < width_half; i++) {
   651                         READ_2x2_PIXELS;
   652                         *plane_interleaved_uv++ = MAKE_V(r, g, b);
   653                         *plane_interleaved_uv++ = MAKE_U(r, g, b);
   654                     }
   655                     if (width_remainder) {
   656                         READ_2x1_PIXELS;
   657                         *plane_interleaved_uv++ = MAKE_V(r, g, b);
   658                         *plane_interleaved_uv++ = MAKE_U(r, g, b);
   659                     }
   660                     plane_interleaved_uv += uv_skip;
   661                     curr_row += src_pitch_x_2;
   662                     next_row += src_pitch_x_2;
   663                 }
   664                 if (height_remainder) {
   665                     for (i = 0; i < width_half; i++) {
   666                         READ_1x2_PIXELS;
   667                         *plane_interleaved_uv++ = MAKE_V(r, g, b);
   668                         *plane_interleaved_uv++ = MAKE_U(r, g, b);
   669                     }
   670                     if (width_remainder) {
   671                         READ_1x1_PIXEL;
   672                         *plane_interleaved_uv++ = MAKE_V(r, g, b);
   673                         *plane_interleaved_uv++ = MAKE_U(r, g, b);
   674                     }
   675                 }
   676             }
   677         }
   678         break;
   679 
   680     case SDL_PIXELFORMAT_YUY2:
   681     case SDL_PIXELFORMAT_UYVY:
   682     case SDL_PIXELFORMAT_YVYU:
   683         {
   684             const Uint8 *curr_row = (const Uint8*) src;
   685             Uint8 *plane           = (Uint8*) dst;
   686             const int row_size = (4 * ((width + 1) / 2));
   687             int plane_skip;
   688 
   689             if (dst_pitch < row_size) {
   690                 return SDL_SetError("Destination pitch is too small, expected at least %d\n", row_size);
   691             }
   692             plane_skip = (dst_pitch - row_size);
   693 
   694             /* Write YUV plane, packed */
   695             if (dst_format == SDL_PIXELFORMAT_YUY2) 
   696             {
   697                 for (j = 0; j < height; j++) {
   698                     for (i = 0; i < width_half; i++) {
   699                         READ_TWO_RGB_PIXELS;
   700                         /* Y U Y1 V */
   701                         *plane++ = MAKE_Y(r, g, b);
   702                         *plane++ = MAKE_U(R, G, B);
   703                         *plane++ = MAKE_Y(r1, g1, b1);
   704                         *plane++ = MAKE_V(R, G, B);
   705                     }
   706                     if (width_remainder) {
   707                         READ_ONE_RGB_PIXEL;
   708                         /* Y U Y V */
   709                         *plane++ = MAKE_Y(r, g, b);
   710                         *plane++ = MAKE_U(r, g, b);
   711                         *plane++ = MAKE_Y(r, g, b);
   712                         *plane++ = MAKE_V(r, g, b);
   713                     }
   714                     plane += plane_skip;
   715                     curr_row += src_pitch;
   716                 }
   717             } 
   718             else if (dst_format == SDL_PIXELFORMAT_UYVY)
   719             {
   720                 for (j = 0; j < height; j++) {
   721                     for (i = 0; i < width_half; i++) {
   722                         READ_TWO_RGB_PIXELS;
   723                         /* U Y V Y1 */
   724                         *plane++ = MAKE_U(R, G, B);
   725                         *plane++ = MAKE_Y(r, g, b);
   726                         *plane++ = MAKE_V(R, G, B);
   727                         *plane++ = MAKE_Y(r1, g1, b1);
   728                     }
   729                     if (width_remainder) {
   730                         READ_ONE_RGB_PIXEL;
   731                         /* U Y V Y */
   732                         *plane++ = MAKE_U(r, g, b);
   733                         *plane++ = MAKE_Y(r, g, b);
   734                         *plane++ = MAKE_V(r, g, b);
   735                         *plane++ = MAKE_Y(r, g, b);
   736                     }
   737                     plane += plane_skip;
   738                     curr_row += src_pitch;
   739                 }
   740             }
   741             else if (dst_format == SDL_PIXELFORMAT_YVYU)
   742             {
   743                 for (j = 0; j < height; j++) {
   744                     for (i = 0; i < width_half; i++) {
   745                         READ_TWO_RGB_PIXELS;
   746                         /* Y V Y1 U */
   747                         *plane++ = MAKE_Y(r, g, b);
   748                         *plane++ = MAKE_V(R, G, B);
   749                         *plane++ = MAKE_Y(r1, g1, b1);
   750                         *plane++ = MAKE_U(R, G, B);
   751                     }
   752                     if (width_remainder) {
   753                         READ_ONE_RGB_PIXEL;
   754                         /* Y V Y U */
   755                         *plane++ = MAKE_Y(r, g, b);
   756                         *plane++ = MAKE_V(r, g, b);
   757                         *plane++ = MAKE_Y(r, g, b);
   758                         *plane++ = MAKE_U(r, g, b);
   759                     }
   760                     plane += plane_skip;
   761                     curr_row += src_pitch;
   762                 }
   763             }
   764         }
   765         break;
   766 
   767     default:
   768         return SDL_SetError("Unsupported YUV destination format: %s", SDL_GetPixelFormatName(dst_format));
   769     }
   770 #undef MAKE_Y
   771 #undef MAKE_U
   772 #undef MAKE_V
   773 #undef READ_2x2_PIXELS
   774 #undef READ_2x1_PIXELS
   775 #undef READ_1x2_PIXELS
   776 #undef READ_1x1_PIXEL
   777 #undef READ_TWO_RGB_PIXELS
   778 #undef READ_ONE_RGB_PIXEL
   779     return 0;
   780 }
   781 
   782 int
   783 SDL_ConvertPixels_RGB_to_YUV(int width, int height,
   784          Uint32 src_format, const void *src, int src_pitch,
   785          Uint32 dst_format, void *dst, int dst_pitch)
   786 {
   787 #if 0 /* Doesn't handle odd widths */
   788     /* RGB24 to FOURCC */
   789     if (src_format == SDL_PIXELFORMAT_RGB24) {
   790         Uint8 *y;
   791         Uint8 *u;
   792         Uint8 *v;
   793         Uint32 y_stride;
   794         Uint32 uv_stride;
   795         YCbCrType yuv_type;
   796 
   797         if (GetYUVPlanes(width, height, dst_format, dst, dst_pitch, (const Uint8 **)&y, (const Uint8 **)&u, (const Uint8 **)&v, &y_stride, &uv_stride) < 0) {
   798             return -1;
   799         }
   800 
   801         if (GetYUVConversionType(width, height, &yuv_type) < 0) {
   802             return -1;
   803         }
   804 
   805         rgb24_yuv420_std(width, height, src, src_pitch, y, u, v, y_stride, uv_stride, yuv_type);
   806         return 0;
   807     }
   808 #endif
   809 
   810     /* ARGB8888 to FOURCC */
   811     if (src_format == SDL_PIXELFORMAT_ARGB8888) {
   812         return SDL_ConvertPixels_ARGB8888_to_YUV(width, height, src, src_pitch, dst_format, dst, dst_pitch);
   813     }
   814 
   815     /* not ARGB8888 to FOURCC : need an intermediate conversion */
   816     {
   817         int ret;
   818         void *tmp;
   819         int tmp_pitch = (width * sizeof(Uint32));
   820 
   821         tmp = SDL_malloc(tmp_pitch * height);
   822         if (tmp == NULL) {
   823             return SDL_OutOfMemory();
   824         }
   825 
   826         /* convert src/src_format to tmp/ARGB8888 */
   827         ret = SDL_ConvertPixels(width, height, src_format, src, src_pitch, SDL_PIXELFORMAT_ARGB8888, tmp, tmp_pitch);
   828         if (ret == -1) {
   829             SDL_free(tmp);
   830             return ret;
   831         }
   832 
   833         /* convert tmp/ARGB8888 to dst/FOURCC */
   834         ret = SDL_ConvertPixels_ARGB8888_to_YUV(width, height, tmp, tmp_pitch, dst_format, dst, dst_pitch);
   835         SDL_free(tmp);
   836         return ret;
   837     }
   838 }
   839 
   840 static int
   841 SDL_ConvertPixels_YUV_to_YUV_Copy(int width, int height, Uint32 format,
   842         const void *src, int src_pitch, void *dst, int dst_pitch)
   843 {
   844     int i;
   845 
   846     if (IsPlanar2x2Format(format)) {
   847         /* Y plane */
   848         for (i = height; i--;) {
   849             SDL_memcpy(dst, src, width);
   850             src = (const Uint8*)src + src_pitch;
   851             dst = (Uint8*)dst + dst_pitch;
   852         }
   853 
   854         if (format == SDL_PIXELFORMAT_YV12 || format == SDL_PIXELFORMAT_IYUV) {
   855             /* U and V planes are a quarter the size of the Y plane, rounded up */
   856             width = (width + 1) / 2;
   857             height = (height + 1) / 2;
   858             src_pitch = (src_pitch + 1) / 2;
   859             dst_pitch = (dst_pitch + 1) / 2;
   860             for (i = height * 2; i--;) {
   861                 SDL_memcpy(dst, src, width);
   862                 src = (const Uint8*)src + src_pitch;
   863                 dst = (Uint8*)dst + dst_pitch;
   864             }
   865         } else if (format == SDL_PIXELFORMAT_NV12 || format == SDL_PIXELFORMAT_NV21) {
   866             /* U/V plane is half the height of the Y plane, rounded up */
   867             height = (height + 1) / 2;
   868             width = ((width + 1) / 2)*2;
   869             src_pitch = ((src_pitch + 1) / 2)*2;
   870             dst_pitch = ((dst_pitch + 1) / 2)*2;
   871             for (i = height; i--;) {
   872                 SDL_memcpy(dst, src, width);
   873                 src = (const Uint8*)src + src_pitch;
   874                 dst = (Uint8*)dst + dst_pitch;
   875             }
   876         }
   877         return 0;
   878     }
   879 
   880     if (IsPacked4Format(format)) {
   881         /* Packed planes */
   882         width = 4 * ((width + 1) / 2);
   883         for (i = height; i--;) {
   884             SDL_memcpy(dst, src, width);
   885             src = (const Uint8*)src + src_pitch;
   886             dst = (Uint8*)dst + dst_pitch;
   887         }
   888         return 0;
   889     }
   890 
   891     return SDL_SetError("SDL_ConvertPixels_YUV_to_YUV_Copy: Unsupported YUV format: %s", SDL_GetPixelFormatName(format));
   892 }
   893 
   894 static int
   895 SDL_ConvertPixels_SwapUVPlanes(int width, int height, const void *src, int src_pitch, void *dst, int dst_pitch)
   896 {
   897     int y;
   898     const int UVwidth = (width + 1)/2;
   899     const int UVheight = (height + 1)/2;
   900 
   901     /* Skip the Y plane */
   902     src = (const Uint8 *)src + height * src_pitch;
   903     dst = (Uint8 *)dst + height * dst_pitch;
   904 
   905     if (src == dst) {
   906         int UVpitch = (dst_pitch + 1)/2;
   907         Uint8 *tmp;
   908         Uint8 *row1 = dst;
   909         Uint8 *row2 = (Uint8 *)dst + UVheight * UVpitch;
   910 
   911         /* Allocate a temporary row for the swap */
   912         tmp = (Uint8 *)SDL_malloc(UVwidth);
   913         if (!tmp) {
   914             return SDL_OutOfMemory();
   915         }
   916         for (y = 0; y < UVheight; ++y) {
   917             SDL_memcpy(tmp, row1, UVwidth);
   918             SDL_memcpy(row1, row2, UVwidth);
   919             SDL_memcpy(row2, tmp, UVwidth);
   920             row1 += UVpitch;
   921             row2 += UVpitch;
   922         }
   923         SDL_free(tmp);
   924     } else {
   925         const Uint8 *srcUV;
   926         Uint8 *dstUV;
   927         int srcUVPitch = ((src_pitch + 1)/2);
   928         int dstUVPitch = ((dst_pitch + 1)/2);
   929 
   930         /* Copy the first plane */
   931         srcUV = (const Uint8 *)src;
   932         dstUV = (Uint8 *)dst + UVheight * dstUVPitch;
   933         for (y = 0; y < UVheight; ++y) {
   934             SDL_memcpy(dstUV, srcUV, UVwidth);
   935             srcUV += srcUVPitch;
   936             dstUV += dstUVPitch;
   937         }
   938 
   939         /* Copy the second plane */
   940         dstUV = (Uint8 *)dst;
   941         for (y = 0; y < UVheight; ++y) {
   942             SDL_memcpy(dstUV, srcUV, UVwidth);
   943             srcUV += srcUVPitch;
   944             dstUV += dstUVPitch;
   945         }
   946     }
   947     return 0;
   948 }
   949 
   950 static int
   951 SDL_ConvertPixels_PackUVPlanes_to_NV(int width, int height, const void *src, int src_pitch, void *dst, int dst_pitch, SDL_bool reverseUV)
   952 {
   953     int x, y;
   954     const int UVwidth = (width + 1)/2;
   955     const int UVheight = (height + 1)/2;
   956     const int srcUVPitch = ((src_pitch + 1)/2);
   957     const int srcUVPitchLeft = srcUVPitch - UVwidth;
   958     const int dstUVPitch = ((dst_pitch + 1)/2)*2;
   959     const int dstUVPitchLeft = dstUVPitch - UVwidth*2;
   960     const Uint8 *src1, *src2;
   961     Uint8 *dstUV;
   962     Uint8 *tmp = NULL;
   963 #ifdef __SSE2__
   964     const SDL_bool use_SSE2 = SDL_HasSSE2();
   965 #endif
   966 
   967     /* Skip the Y plane */
   968     src = (const Uint8 *)src + height * src_pitch;
   969     dst = (Uint8 *)dst + height * dst_pitch;
   970 
   971     if (src == dst) {
   972         /* Need to make a copy of the buffer so we don't clobber it while converting */
   973         tmp = (Uint8 *)SDL_malloc(2*UVheight*srcUVPitch);
   974         if (!tmp) {
   975             return SDL_OutOfMemory();
   976         }
   977         SDL_memcpy(tmp, src, 2*UVheight*srcUVPitch);
   978         src = tmp;
   979     }
   980 
   981     if (reverseUV) {
   982         src2 = (const Uint8 *)src;
   983         src1 = src2 + UVheight * srcUVPitch;
   984     } else {
   985         src1 = (const Uint8 *)src;
   986         src2 = src1 + UVheight * srcUVPitch;
   987     }
   988     dstUV = (Uint8 *)dst;
   989 
   990     y = UVheight;
   991     while (y--) {
   992         x = UVwidth;
   993 #ifdef __SSE2__
   994         if (use_SSE2) {
   995             while (x >= 16) {
   996                 __m128i u = _mm_loadu_si128((__m128i *)src1);
   997                 __m128i v = _mm_loadu_si128((__m128i *)src2);
   998                 __m128i uv1 = _mm_unpacklo_epi8(u, v);
   999                 __m128i uv2 = _mm_unpackhi_epi8(u, v);
  1000                 _mm_storeu_si128((__m128i*)dstUV, uv1);
  1001                 _mm_storeu_si128((__m128i*)(dstUV + 16), uv2);
  1002                 src1 += 16;
  1003                 src2 += 16;
  1004                 dstUV += 32;
  1005                 x -= 16;
  1006             }
  1007         }
  1008 #endif
  1009         while (x--) {
  1010             *dstUV++ = *src1++;
  1011             *dstUV++ = *src2++;
  1012         }
  1013         src1 += srcUVPitchLeft;
  1014         src2 += srcUVPitchLeft;
  1015         dstUV += dstUVPitchLeft;
  1016     }
  1017 
  1018     if (tmp) {
  1019         SDL_free(tmp);
  1020     }
  1021     return 0;
  1022 }
  1023 
  1024 static int
  1025 SDL_ConvertPixels_SplitNV_to_UVPlanes(int width, int height, const void *src, int src_pitch, void *dst, int dst_pitch, SDL_bool reverseUV)
  1026 {
  1027     int x, y;
  1028     const int UVwidth = (width + 1)/2;
  1029     const int UVheight = (height + 1)/2;
  1030     const int srcUVPitch = ((src_pitch + 1)/2)*2;
  1031     const int srcUVPitchLeft = srcUVPitch - UVwidth*2;
  1032     const int dstUVPitch = ((dst_pitch + 1)/2);
  1033     const int dstUVPitchLeft = dstUVPitch - UVwidth;
  1034     const Uint8 *srcUV;
  1035     Uint8 *dst1, *dst2;
  1036     Uint8 *tmp = NULL;
  1037 #ifdef __SSE2__
  1038     const SDL_bool use_SSE2 = SDL_HasSSE2();
  1039 #endif
  1040 
  1041     /* Skip the Y plane */
  1042     src = (const Uint8 *)src + height * src_pitch;
  1043     dst = (Uint8 *)dst + height * dst_pitch;
  1044 
  1045     if (src == dst) {
  1046         /* Need to make a copy of the buffer so we don't clobber it while converting */
  1047         tmp = (Uint8 *)SDL_malloc(UVheight*srcUVPitch);
  1048         if (!tmp) {
  1049             return SDL_OutOfMemory();
  1050         }
  1051         SDL_memcpy(tmp, src, UVheight*srcUVPitch);
  1052         src = tmp;
  1053     }
  1054 
  1055     if (reverseUV) {
  1056         dst2 = (Uint8 *)dst;
  1057         dst1 = dst2 + UVheight * dstUVPitch;
  1058     } else {
  1059         dst1 = (Uint8 *)dst;
  1060         dst2 = dst1 + UVheight * dstUVPitch;
  1061     }
  1062     srcUV = (const Uint8 *)src;
  1063 
  1064     y = UVheight;
  1065     while (y--) {
  1066         x = UVwidth;
  1067 #ifdef __SSE2__
  1068         if (use_SSE2) {
  1069             __m128i mask = _mm_set1_epi16(0x00FF);
  1070             while (x >= 16) {
  1071                 __m128i uv1 = _mm_loadu_si128((__m128i*)srcUV);
  1072                 __m128i uv2 = _mm_loadu_si128((__m128i*)(srcUV+16));
  1073                 __m128i u1 = _mm_and_si128(uv1, mask);
  1074                 __m128i u2 = _mm_and_si128(uv2, mask);
  1075                 __m128i u = _mm_packus_epi16(u1, u2);
  1076                 __m128i v1 = _mm_srli_epi16(uv1, 8);
  1077                 __m128i v2 = _mm_srli_epi16(uv2, 8);
  1078                 __m128i v = _mm_packus_epi16(v1, v2);
  1079                 _mm_storeu_si128((__m128i*)dst1, u);
  1080                 _mm_storeu_si128((__m128i*)dst2, v);
  1081                 srcUV += 32;
  1082                 dst1 += 16;
  1083                 dst2 += 16;
  1084                 x -= 16;
  1085             }
  1086         }
  1087 #endif
  1088         while (x--) {
  1089             *dst1++ = *srcUV++;
  1090             *dst2++ = *srcUV++;
  1091         }
  1092         srcUV += srcUVPitchLeft;
  1093         dst1 += dstUVPitchLeft;
  1094         dst2 += dstUVPitchLeft;
  1095     }
  1096 
  1097     if (tmp) {
  1098         SDL_free(tmp);
  1099     }
  1100     return 0;
  1101 }
  1102 
  1103 static int
  1104 SDL_ConvertPixels_SwapNV(int width, int height, const void *src, int src_pitch, void *dst, int dst_pitch)
  1105 {
  1106     int x, y;
  1107     const int UVwidth = (width + 1)/2;
  1108     const int UVheight = (height + 1)/2;
  1109     const int srcUVPitch = ((src_pitch + 1)/2)*2;
  1110     const int srcUVPitchLeft = (srcUVPitch - UVwidth*2)/sizeof(Uint16);
  1111     const int dstUVPitch = ((dst_pitch + 1)/2)*2;
  1112     const int dstUVPitchLeft = (dstUVPitch - UVwidth*2)/sizeof(Uint16);
  1113     const Uint16 *srcUV;
  1114     Uint16 *dstUV;
  1115 #ifdef __SSE2__
  1116     const SDL_bool use_SSE2 = SDL_HasSSE2();
  1117 #endif
  1118 
  1119     /* Skip the Y plane */
  1120     src = (const Uint8 *)src + height * src_pitch;
  1121     dst = (Uint8 *)dst + height * dst_pitch;
  1122 
  1123     srcUV = (const Uint16 *)src;
  1124     dstUV = (Uint16 *)dst;
  1125     y = UVheight;
  1126     while (y--) {
  1127         x = UVwidth;
  1128 #ifdef __SSE2__
  1129         if (use_SSE2) {
  1130             while (x >= 8) {
  1131                 __m128i uv = _mm_loadu_si128((__m128i*)srcUV);
  1132                 __m128i v = _mm_slli_epi16(uv, 8);
  1133                 __m128i u = _mm_srli_epi16(uv, 8);
  1134                 __m128i vu = _mm_or_si128(v, u);
  1135                 _mm_storeu_si128((__m128i*)dstUV, vu);
  1136                 srcUV += 8;
  1137                 dstUV += 8;
  1138                 x -= 8;
  1139             }
  1140         }
  1141 #endif
  1142         while (x--) {
  1143             *dstUV++ = SDL_Swap16(*srcUV++);
  1144         }
  1145         srcUV += srcUVPitchLeft;
  1146         dstUV += dstUVPitchLeft;
  1147     }
  1148     return 0;
  1149 }
  1150 
  1151 static int
  1152 SDL_ConvertPixels_Planar2x2_to_Planar2x2(int width, int height,
  1153          Uint32 src_format, const void *src, int src_pitch,
  1154          Uint32 dst_format, void *dst, int dst_pitch)
  1155 {
  1156     if (src != dst) {
  1157         /* Copy Y plane */
  1158         int i;
  1159         const Uint8 *srcY = (const Uint8 *)src;
  1160         Uint8 *dstY = (Uint8 *)dst;
  1161         for (i = height; i--; ) {
  1162             SDL_memcpy(dstY, srcY, width);
  1163             srcY += src_pitch;
  1164             dstY += dst_pitch;
  1165         }
  1166     }
  1167 
  1168     switch (src_format) {
  1169     case SDL_PIXELFORMAT_YV12:
  1170         switch (dst_format) {
  1171         case SDL_PIXELFORMAT_IYUV:
  1172             return SDL_ConvertPixels_SwapUVPlanes(width, height, src, src_pitch, dst, dst_pitch);
  1173         case SDL_PIXELFORMAT_NV12:
  1174             return SDL_ConvertPixels_PackUVPlanes_to_NV(width, height, src, src_pitch, dst, dst_pitch, SDL_TRUE);
  1175         case SDL_PIXELFORMAT_NV21:
  1176             return SDL_ConvertPixels_PackUVPlanes_to_NV(width, height, src, src_pitch, dst, dst_pitch, SDL_FALSE);
  1177         default:
  1178             break;
  1179         }
  1180         break;
  1181     case SDL_PIXELFORMAT_IYUV:
  1182         switch (dst_format) {
  1183         case SDL_PIXELFORMAT_YV12:
  1184             return SDL_ConvertPixels_SwapUVPlanes(width, height, src, src_pitch, dst, dst_pitch);
  1185         case SDL_PIXELFORMAT_NV12:
  1186             return SDL_ConvertPixels_PackUVPlanes_to_NV(width, height, src, src_pitch, dst, dst_pitch, SDL_FALSE);
  1187         case SDL_PIXELFORMAT_NV21:
  1188             return SDL_ConvertPixels_PackUVPlanes_to_NV(width, height, src, src_pitch, dst, dst_pitch, SDL_TRUE);
  1189         default:
  1190             break;
  1191         }
  1192         break;
  1193     case SDL_PIXELFORMAT_NV12:
  1194         switch (dst_format) {
  1195         case SDL_PIXELFORMAT_YV12:
  1196             return SDL_ConvertPixels_SplitNV_to_UVPlanes(width, height, src, src_pitch, dst, dst_pitch, SDL_TRUE);
  1197         case SDL_PIXELFORMAT_IYUV:
  1198             return SDL_ConvertPixels_SplitNV_to_UVPlanes(width, height, src, src_pitch, dst, dst_pitch, SDL_FALSE);
  1199         case SDL_PIXELFORMAT_NV21:
  1200             return SDL_ConvertPixels_SwapNV(width, height, src, src_pitch, dst, dst_pitch);
  1201         default:
  1202             break;
  1203         }
  1204         break;
  1205     case SDL_PIXELFORMAT_NV21:
  1206         switch (dst_format) {
  1207         case SDL_PIXELFORMAT_YV12:
  1208             return SDL_ConvertPixels_SplitNV_to_UVPlanes(width, height, src, src_pitch, dst, dst_pitch, SDL_FALSE);
  1209         case SDL_PIXELFORMAT_IYUV:
  1210             return SDL_ConvertPixels_SplitNV_to_UVPlanes(width, height, src, src_pitch, dst, dst_pitch, SDL_TRUE);
  1211         case SDL_PIXELFORMAT_NV12:
  1212             return SDL_ConvertPixels_SwapNV(width, height, src, src_pitch, dst, dst_pitch);
  1213         default:
  1214             break;
  1215         }
  1216         break;
  1217     default:
  1218         break;
  1219     }
  1220     return SDL_SetError("SDL_ConvertPixels_Planar2x2_to_Planar2x2: Unsupported YUV conversion: %s -> %s", SDL_GetPixelFormatName(src_format), SDL_GetPixelFormatName(dst_format));
  1221 }
  1222 
  1223 #define PACKED4_TO_PACKED4_ROW_SSE2(shuffle)                                                        \
  1224     while (x >= 4) {                                                                                \
  1225         __m128i yuv = _mm_loadu_si128((__m128i*)srcYUV);                                            \
  1226         __m128i lo = _mm_unpacklo_epi8(yuv, _mm_setzero_si128());                                   \
  1227         __m128i hi = _mm_unpackhi_epi8(yuv, _mm_setzero_si128());                                   \
  1228         lo = _mm_shufflelo_epi16(lo, shuffle);                                                      \
  1229         lo = _mm_shufflehi_epi16(lo, shuffle);                                                      \
  1230         hi = _mm_shufflelo_epi16(hi, shuffle);                                                      \
  1231         hi = _mm_shufflehi_epi16(hi, shuffle);                                                      \
  1232         yuv = _mm_packus_epi16(lo, hi);                                                             \
  1233         _mm_storeu_si128((__m128i*)dstYUV, yuv);                                                    \
  1234         srcYUV += 16;                                                                               \
  1235         dstYUV += 16;                                                                               \
  1236         x -= 4;                                                                                     \
  1237     }                                                                                               \
  1238 
  1239 static int
  1240 SDL_ConvertPixels_YUY2_to_UYVY(int width, int height, const void *src, int src_pitch, void *dst, int dst_pitch)
  1241 {
  1242     int x, y;
  1243     const int YUVwidth = (width + 1)/2;
  1244     const int srcYUVPitchLeft = (src_pitch - YUVwidth*4);
  1245     const int dstYUVPitchLeft = (dst_pitch - YUVwidth*4);
  1246     const Uint8 *srcYUV = (const Uint8 *)src;
  1247     Uint8 *dstYUV = (Uint8 *)dst;
  1248 #ifdef __SSE2__
  1249     const SDL_bool use_SSE2 = SDL_HasSSE2();
  1250 #endif
  1251 
  1252     y = height;
  1253     while (y--) {
  1254         x = YUVwidth;
  1255 #ifdef __SSE2__
  1256         if (use_SSE2) {
  1257             PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(2, 3, 0, 1));
  1258         }
  1259 #endif
  1260         while (x--) {
  1261             Uint8 Y1, U, Y2, V;
  1262 
  1263             Y1 = srcYUV[0];
  1264             U = srcYUV[1];
  1265             Y2 = srcYUV[2];
  1266             V = srcYUV[3];
  1267             srcYUV += 4;
  1268 
  1269             dstYUV[0] = U;
  1270             dstYUV[1] = Y1;
  1271             dstYUV[2] = V;
  1272             dstYUV[3] = Y2;
  1273             dstYUV += 4;
  1274         }
  1275         srcYUV += srcYUVPitchLeft;
  1276         dstYUV += dstYUVPitchLeft;
  1277     }
  1278     return 0;
  1279 }
  1280 
  1281 static int
  1282 SDL_ConvertPixels_YUY2_to_YVYU(int width, int height, const void *src, int src_pitch, void *dst, int dst_pitch)
  1283 {
  1284     int x, y;
  1285     const int YUVwidth = (width + 1)/2;
  1286     const int srcYUVPitchLeft = (src_pitch - YUVwidth*4);
  1287     const int dstYUVPitchLeft = (dst_pitch - YUVwidth*4);
  1288     const Uint8 *srcYUV = (const Uint8 *)src;
  1289     Uint8 *dstYUV = (Uint8 *)dst;
  1290 #ifdef __SSE2__
  1291     const SDL_bool use_SSE2 = SDL_HasSSE2();
  1292 #endif
  1293 
  1294     y = height;
  1295     while (y--) {
  1296         x = YUVwidth;
  1297 #ifdef __SSE2__
  1298         if (use_SSE2) {
  1299             PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(1, 2, 3, 0));
  1300         }
  1301 #endif
  1302         while (x--) {
  1303             Uint8 Y1, U, Y2, V;
  1304 
  1305             Y1 = srcYUV[0];
  1306             U = srcYUV[1];
  1307             Y2 = srcYUV[2];
  1308             V = srcYUV[3];
  1309             srcYUV += 4;
  1310 
  1311             dstYUV[0] = Y1;
  1312             dstYUV[1] = V;
  1313             dstYUV[2] = Y2;
  1314             dstYUV[3] = U;
  1315             dstYUV += 4;
  1316         }
  1317         srcYUV += srcYUVPitchLeft;
  1318         dstYUV += dstYUVPitchLeft;
  1319     }
  1320     return 0;
  1321 }
  1322 
  1323 static int
  1324 SDL_ConvertPixels_UYVY_to_YUY2(int width, int height, const void *src, int src_pitch, void *dst, int dst_pitch)
  1325 {
  1326     int x, y;
  1327     const int YUVwidth = (width + 1)/2;
  1328     const int srcYUVPitchLeft = (src_pitch - YUVwidth*4);
  1329     const int dstYUVPitchLeft = (dst_pitch - YUVwidth*4);
  1330     const Uint8 *srcYUV = (const Uint8 *)src;
  1331     Uint8 *dstYUV = (Uint8 *)dst;
  1332 #ifdef __SSE2__
  1333     const SDL_bool use_SSE2 = SDL_HasSSE2();
  1334 #endif
  1335 
  1336     y = height;
  1337     while (y--) {
  1338         x = YUVwidth;
  1339 #ifdef __SSE2__
  1340         if (use_SSE2) {
  1341             PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(2, 3, 0, 1));
  1342         }
  1343 #endif
  1344         while (x--) {
  1345             Uint8 Y1, U, Y2, V;
  1346 
  1347             U = srcYUV[0];
  1348             Y1 = srcYUV[1];
  1349             V = srcYUV[2];
  1350             Y2 = srcYUV[3];
  1351             srcYUV += 4;
  1352 
  1353             dstYUV[0] = Y1;
  1354             dstYUV[1] = U;
  1355             dstYUV[2] = Y2;
  1356             dstYUV[3] = V;
  1357             dstYUV += 4;
  1358         }
  1359         srcYUV += srcYUVPitchLeft;
  1360         dstYUV += dstYUVPitchLeft;
  1361     }
  1362     return 0;
  1363 }
  1364 
  1365 static int
  1366 SDL_ConvertPixels_UYVY_to_YVYU(int width, int height, const void *src, int src_pitch, void *dst, int dst_pitch)
  1367 {
  1368     int x, y;
  1369     const int YUVwidth = (width + 1)/2;
  1370     const int srcYUVPitchLeft = (src_pitch - YUVwidth*4);
  1371     const int dstYUVPitchLeft = (dst_pitch - YUVwidth*4);
  1372     const Uint8 *srcYUV = (const Uint8 *)src;
  1373     Uint8 *dstYUV = (Uint8 *)dst;
  1374 #ifdef __SSE2__
  1375     const SDL_bool use_SSE2 = SDL_HasSSE2();
  1376 #endif
  1377 
  1378     y = height;
  1379     while (y--) {
  1380         x = YUVwidth;
  1381 #ifdef __SSE2__
  1382         if (use_SSE2) {
  1383             PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(0, 3, 2, 1));
  1384         }
  1385 #endif
  1386         while (x--) {
  1387             Uint8 Y1, U, Y2, V;
  1388 
  1389             U = srcYUV[0];
  1390             Y1 = srcYUV[1];
  1391             V = srcYUV[2];
  1392             Y2 = srcYUV[3];
  1393             srcYUV += 4;
  1394 
  1395             dstYUV[0] = Y1;
  1396             dstYUV[1] = V;
  1397             dstYUV[2] = Y2;
  1398             dstYUV[3] = U;
  1399             dstYUV += 4;
  1400         }
  1401         srcYUV += srcYUVPitchLeft;
  1402         dstYUV += dstYUVPitchLeft;
  1403     }
  1404     return 0;
  1405 }
  1406 
  1407 static int
  1408 SDL_ConvertPixels_YVYU_to_YUY2(int width, int height, const void *src, int src_pitch, void *dst, int dst_pitch)
  1409 {
  1410     int x, y;
  1411     const int YUVwidth = (width + 1)/2;
  1412     const int srcYUVPitchLeft = (src_pitch - YUVwidth*4);
  1413     const int dstYUVPitchLeft = (dst_pitch - YUVwidth*4);
  1414     const Uint8 *srcYUV = (const Uint8 *)src;
  1415     Uint8 *dstYUV = (Uint8 *)dst;
  1416 #ifdef __SSE2__
  1417     const SDL_bool use_SSE2 = SDL_HasSSE2();
  1418 #endif
  1419 
  1420     y = height;
  1421     while (y--) {
  1422         x = YUVwidth;
  1423 #ifdef __SSE2__
  1424         if (use_SSE2) {
  1425             PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(1, 2, 3, 0));
  1426         }
  1427 #endif
  1428         while (x--) {
  1429             Uint8 Y1, U, Y2, V;
  1430 
  1431             Y1 = srcYUV[0];
  1432             V = srcYUV[1];
  1433             Y2 = srcYUV[2];
  1434             U = srcYUV[3];
  1435             srcYUV += 4;
  1436 
  1437             dstYUV[0] = Y1;
  1438             dstYUV[1] = U;
  1439             dstYUV[2] = Y2;
  1440             dstYUV[3] = V;
  1441             dstYUV += 4;
  1442         }
  1443         srcYUV += srcYUVPitchLeft;
  1444         dstYUV += dstYUVPitchLeft;
  1445     }
  1446     return 0;
  1447 }
  1448 
  1449 static int
  1450 SDL_ConvertPixels_YVYU_to_UYVY(int width, int height, const void *src, int src_pitch, void *dst, int dst_pitch)
  1451 {
  1452     int x, y;
  1453     const int YUVwidth = (width + 1)/2;
  1454     const int srcYUVPitchLeft = (src_pitch - YUVwidth*4);
  1455     const int dstYUVPitchLeft = (dst_pitch - YUVwidth*4);
  1456     const Uint8 *srcYUV = (const Uint8 *)src;
  1457     Uint8 *dstYUV = (Uint8 *)dst;
  1458 #ifdef __SSE2__
  1459     const SDL_bool use_SSE2 = SDL_HasSSE2();
  1460 #endif
  1461 
  1462     y = height;
  1463     while (y--) {
  1464         x = YUVwidth;
  1465 #ifdef __SSE2__
  1466         if (use_SSE2) {
  1467             PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(2, 1, 0, 3));
  1468         }
  1469 #endif
  1470         while (x--) {
  1471             Uint8 Y1, U, Y2, V;
  1472 
  1473             Y1 = srcYUV[0];
  1474             V = srcYUV[1];
  1475             Y2 = srcYUV[2];
  1476             U = srcYUV[3];
  1477             srcYUV += 4;
  1478 
  1479             dstYUV[0] = U;
  1480             dstYUV[1] = Y1;
  1481             dstYUV[2] = V;
  1482             dstYUV[3] = Y2;
  1483             dstYUV += 4;
  1484         }
  1485         srcYUV += srcYUVPitchLeft;
  1486         dstYUV += dstYUVPitchLeft;
  1487     }
  1488     return 0;
  1489 }
  1490 
  1491 static int
  1492 SDL_ConvertPixels_Packed4_to_Packed4(int width, int height,
  1493          Uint32 src_format, const void *src, int src_pitch,
  1494          Uint32 dst_format, void *dst, int dst_pitch)
  1495 {
  1496     switch (src_format) {
  1497     case SDL_PIXELFORMAT_YUY2:
  1498         switch (dst_format) {
  1499         case SDL_PIXELFORMAT_UYVY:
  1500             return SDL_ConvertPixels_YUY2_to_UYVY(width, height, src, src_pitch, dst, dst_pitch);
  1501         case SDL_PIXELFORMAT_YVYU:
  1502             return SDL_ConvertPixels_YUY2_to_YVYU(width, height, src, src_pitch, dst, dst_pitch);
  1503         default:
  1504             break;
  1505         }
  1506         break;
  1507     case SDL_PIXELFORMAT_UYVY:
  1508         switch (dst_format) {
  1509         case SDL_PIXELFORMAT_YUY2:
  1510             return SDL_ConvertPixels_UYVY_to_YUY2(width, height, src, src_pitch, dst, dst_pitch);
  1511         case SDL_PIXELFORMAT_YVYU:
  1512             return SDL_ConvertPixels_UYVY_to_YVYU(width, height, src, src_pitch, dst, dst_pitch);
  1513         default:
  1514             break;
  1515         }
  1516         break;
  1517     case SDL_PIXELFORMAT_YVYU:
  1518         switch (dst_format) {
  1519         case SDL_PIXELFORMAT_YUY2:
  1520             return SDL_ConvertPixels_YVYU_to_YUY2(width, height, src, src_pitch, dst, dst_pitch);
  1521         case SDL_PIXELFORMAT_UYVY:
  1522             return SDL_ConvertPixels_YVYU_to_UYVY(width, height, src, src_pitch, dst, dst_pitch);
  1523         default:
  1524             break;
  1525         }
  1526         break;
  1527     default:
  1528         break;
  1529     }
  1530     return SDL_SetError("SDL_ConvertPixels_Packed4_to_Packed4: Unsupported YUV conversion: %s -> %s", SDL_GetPixelFormatName(src_format), SDL_GetPixelFormatName(dst_format));
  1531 }
  1532 
  1533 static int
  1534 SDL_ConvertPixels_Planar2x2_to_Packed4(int width, int height,
  1535          Uint32 src_format, const void *src, int src_pitch,
  1536          Uint32 dst_format, void *dst, int dst_pitch)
  1537 {
  1538     int x, y;
  1539     const Uint8 *srcY1, *srcY2, *srcU, *srcV;
  1540     Uint32 srcY_pitch, srcUV_pitch;
  1541     Uint32 srcY_pitch_left, srcUV_pitch_left, srcUV_pixel_stride;
  1542     Uint8 *dstY1, *dstY2, *dstU1, *dstU2, *dstV1, *dstV2;
  1543     Uint32 dstY_pitch, dstUV_pitch;
  1544     Uint32 dst_pitch_left;
  1545 
  1546     if (src == dst) {
  1547         return SDL_SetError("Can't change YUV plane types in-place");
  1548     }
  1549 
  1550     if (GetYUVPlanes(width, height, src_format, src, src_pitch,
  1551                      &srcY1, &srcU, &srcV, &srcY_pitch, &srcUV_pitch) < 0) {
  1552         return -1;
  1553     }
  1554     srcY2 = srcY1 + srcY_pitch;
  1555     srcY_pitch_left = (srcY_pitch - width);
  1556 
  1557     if (src_format == SDL_PIXELFORMAT_NV12 || src_format == SDL_PIXELFORMAT_NV21) {
  1558         srcUV_pixel_stride = 2;
  1559         srcUV_pitch_left = (srcUV_pitch - 2*((width + 1)/2));
  1560     } else {
  1561         srcUV_pixel_stride = 1;
  1562         srcUV_pitch_left = (srcUV_pitch - ((width + 1)/2));
  1563     }
  1564 
  1565     if (GetYUVPlanes(width, height, dst_format, dst, dst_pitch,
  1566                      (const Uint8 **)&dstY1, (const Uint8 **)&dstU1, (const Uint8 **)&dstV1,
  1567                      &dstY_pitch, &dstUV_pitch) < 0) {
  1568         return -1;
  1569     }
  1570     dstY2 = dstY1 + dstY_pitch;
  1571     dstU2 = dstU1 + dstUV_pitch;
  1572     dstV2 = dstV1 + dstUV_pitch;
  1573     dst_pitch_left = (dstY_pitch - 4*((width + 1)/2));
  1574 
  1575     /* Copy 2x2 blocks of pixels at a time */
  1576     for (y = 0; y < (height - 1); y += 2) {
  1577         for (x = 0; x < (width - 1); x += 2) {
  1578             /* Row 1 */
  1579             *dstY1 = *srcY1++;
  1580             dstY1 += 2;
  1581             *dstY1 = *srcY1++;
  1582             dstY1 += 2;
  1583             *dstU1 = *srcU;
  1584             *dstV1 = *srcV;
  1585 
  1586             /* Row 2 */
  1587             *dstY2 = *srcY2++;
  1588             dstY2 += 2;
  1589             *dstY2 = *srcY2++;
  1590             dstY2 += 2;
  1591             *dstU2 = *srcU;
  1592             *dstV2 = *srcV;
  1593 
  1594             srcU += srcUV_pixel_stride;
  1595             srcV += srcUV_pixel_stride;
  1596             dstU1 += 4;
  1597             dstU2 += 4;
  1598             dstV1 += 4;
  1599             dstV2 += 4;
  1600         }
  1601 
  1602         /* Last column */
  1603         if (x == (width - 1)) {
  1604             /* Row 1 */
  1605             *dstY1 = *srcY1;
  1606             dstY1 += 2;
  1607             *dstY1 = *srcY1++;
  1608             dstY1 += 2;
  1609             *dstU1 = *srcU;
  1610             *dstV1 = *srcV;
  1611 
  1612             /* Row 2 */
  1613             *dstY2 = *srcY2;
  1614             dstY2 += 2;
  1615             *dstY2 = *srcY2++;
  1616             dstY2 += 2;
  1617             *dstU2 = *srcU;
  1618             *dstV2 = *srcV;
  1619 
  1620             srcU += srcUV_pixel_stride;
  1621             srcV += srcUV_pixel_stride;
  1622             dstU1 += 4;
  1623             dstU2 += 4;
  1624             dstV1 += 4;
  1625             dstV2 += 4;
  1626         }
  1627 
  1628         srcY1 += srcY_pitch_left + srcY_pitch;
  1629         srcY2 += srcY_pitch_left + srcY_pitch;
  1630         srcU += srcUV_pitch_left;
  1631         srcV += srcUV_pitch_left;
  1632         dstY1 += dst_pitch_left + dstY_pitch;
  1633         dstY2 += dst_pitch_left + dstY_pitch;
  1634         dstU1 += dst_pitch_left + dstUV_pitch;
  1635         dstU2 += dst_pitch_left + dstUV_pitch;
  1636         dstV1 += dst_pitch_left + dstUV_pitch;
  1637         dstV2 += dst_pitch_left + dstUV_pitch;
  1638     }
  1639 
  1640     /* Last row */
  1641     if (y == (height - 1)) {
  1642         for (x = 0; x < (width - 1); x += 2) {
  1643             /* Row 1 */
  1644             *dstY1 = *srcY1++;
  1645             dstY1 += 2;
  1646             *dstY1 = *srcY1++;
  1647             dstY1 += 2;
  1648             *dstU1 = *srcU;
  1649             *dstV1 = *srcV;
  1650 
  1651             srcU += srcUV_pixel_stride;
  1652             srcV += srcUV_pixel_stride;
  1653             dstU1 += 4;
  1654             dstV1 += 4;
  1655         }
  1656 
  1657         /* Last column */
  1658         if (x == (width - 1)) {
  1659             /* Row 1 */
  1660             *dstY1 = *srcY1;
  1661             dstY1 += 2;
  1662             *dstY1 = *srcY1++;
  1663             dstY1 += 2;
  1664             *dstU1 = *srcU;
  1665             *dstV1 = *srcV;
  1666 
  1667             srcU += srcUV_pixel_stride;
  1668             srcV += srcUV_pixel_stride;
  1669             dstU1 += 4;
  1670             dstV1 += 4;
  1671         }
  1672     }
  1673     return 0;
  1674 }
  1675 
  1676 static int
  1677 SDL_ConvertPixels_Packed4_to_Planar2x2(int width, int height,
  1678          Uint32 src_format, const void *src, int src_pitch,
  1679          Uint32 dst_format, void *dst, int dst_pitch)
  1680 {
  1681     int x, y;
  1682     const Uint8 *srcY1, *srcY2, *srcU1, *srcU2, *srcV1, *srcV2;
  1683     Uint32 srcY_pitch, srcUV_pitch;
  1684     Uint32 src_pitch_left;
  1685     Uint8 *dstY1, *dstY2, *dstU, *dstV;
  1686     Uint32 dstY_pitch, dstUV_pitch;
  1687     Uint32 dstY_pitch_left, dstUV_pitch_left, dstUV_pixel_stride;
  1688 
  1689     if (src == dst) {
  1690         return SDL_SetError("Can't change YUV plane types in-place");
  1691     }
  1692 
  1693     if (GetYUVPlanes(width, height, src_format, src, src_pitch,
  1694                      &srcY1, &srcU1, &srcV1, &srcY_pitch, &srcUV_pitch) < 0) {
  1695         return -1;
  1696     }
  1697     srcY2 = srcY1 + srcY_pitch;
  1698     srcU2 = srcU1 + srcUV_pitch;
  1699     srcV2 = srcV1 + srcUV_pitch;
  1700     src_pitch_left = (srcY_pitch - 4*((width + 1)/2));
  1701 
  1702     if (GetYUVPlanes(width, height, dst_format, dst, dst_pitch,
  1703                      (const Uint8 **)&dstY1, (const Uint8 **)&dstU, (const Uint8 **)&dstV,
  1704                      &dstY_pitch, &dstUV_pitch) < 0) {
  1705         return -1;
  1706     }
  1707     dstY2 = dstY1 + dstY_pitch;
  1708     dstY_pitch_left = (dstY_pitch - width);
  1709 
  1710     if (dst_format == SDL_PIXELFORMAT_NV12 || dst_format == SDL_PIXELFORMAT_NV21) {
  1711         dstUV_pixel_stride = 2;
  1712         dstUV_pitch_left = (dstUV_pitch - 2*((width + 1)/2));
  1713     } else {
  1714         dstUV_pixel_stride = 1;
  1715         dstUV_pitch_left = (dstUV_pitch - ((width + 1)/2));
  1716     }
  1717 
  1718     /* Copy 2x2 blocks of pixels at a time */
  1719     for (y = 0; y < (height - 1); y += 2) {
  1720         for (x = 0; x < (width - 1); x += 2) {
  1721             /* Row 1 */
  1722             *dstY1++ = *srcY1;
  1723             srcY1 += 2;
  1724             *dstY1++ = *srcY1;
  1725             srcY1 += 2;
  1726 
  1727             /* Row 2 */
  1728             *dstY2++ = *srcY2;
  1729             srcY2 += 2;
  1730             *dstY2++ = *srcY2;
  1731             srcY2 += 2;
  1732 
  1733             *dstU = (Uint8)(((Uint32)*srcU1 + *srcU2)/2);
  1734             *dstV = (Uint8)(((Uint32)*srcV1 + *srcV2)/2);
  1735 
  1736             srcU1 += 4;
  1737             srcU2 += 4;
  1738             srcV1 += 4;
  1739             srcV2 += 4;
  1740             dstU += dstUV_pixel_stride;
  1741             dstV += dstUV_pixel_stride;
  1742         }
  1743 
  1744         /* Last column */
  1745         if (x == (width - 1)) {
  1746             /* Row 1 */
  1747             *dstY1 = *srcY1;
  1748             srcY1 += 2;
  1749             *dstY1++ = *srcY1;
  1750             srcY1 += 2;
  1751 
  1752             /* Row 2 */
  1753             *dstY2 = *srcY2;
  1754             srcY2 += 2;
  1755             *dstY2++ = *srcY2;
  1756             srcY2 += 2;
  1757 
  1758             *dstU = (Uint8)(((Uint32)*srcU1 + *srcU2)/2);
  1759             *dstV = (Uint8)(((Uint32)*srcV1 + *srcV2)/2);
  1760 
  1761             srcU1 += 4;
  1762             srcU2 += 4;
  1763             srcV1 += 4;
  1764             srcV2 += 4;
  1765             dstU += dstUV_pixel_stride;
  1766             dstV += dstUV_pixel_stride;
  1767         }
  1768 
  1769         srcY1 += src_pitch_left + srcY_pitch;
  1770         srcY2 += src_pitch_left + srcY_pitch;
  1771         srcU1 += src_pitch_left + srcUV_pitch;
  1772         srcU2 += src_pitch_left + srcUV_pitch;
  1773         srcV1 += src_pitch_left + srcUV_pitch;
  1774         srcV2 += src_pitch_left + srcUV_pitch;
  1775         dstY1 += dstY_pitch_left + dstY_pitch;
  1776         dstY2 += dstY_pitch_left + dstY_pitch;
  1777         dstU += dstUV_pitch_left;
  1778         dstV += dstUV_pitch_left;
  1779     }
  1780 
  1781     /* Last row */
  1782     if (y == (height - 1)) {
  1783         for (x = 0; x < (width - 1); x += 2) {
  1784             *dstY1++ = *srcY1;
  1785             srcY1 += 2;
  1786             *dstY1++ = *srcY1;
  1787             srcY1 += 2;
  1788 
  1789             *dstU = *srcU1;
  1790             *dstV = *srcV1;
  1791 
  1792             srcU1 += 4;
  1793             srcV1 += 4;
  1794             dstU += dstUV_pixel_stride;
  1795             dstV += dstUV_pixel_stride;
  1796         }
  1797 
  1798         /* Last column */
  1799         if (x == (width - 1)) {
  1800             *dstY1 = *srcY1;
  1801             *dstU = *srcU1;
  1802             *dstV = *srcV1;
  1803         }
  1804     }
  1805     return 0;
  1806 }
  1807 
  1808 int
  1809 SDL_ConvertPixels_YUV_to_YUV(int width, int height,
  1810          Uint32 src_format, const void *src, int src_pitch,
  1811          Uint32 dst_format, void *dst, int dst_pitch)
  1812 {
  1813     if (src_format == dst_format) {
  1814         if (src == dst) {
  1815             /* Nothing to do */
  1816             return 0;
  1817         }
  1818         return SDL_ConvertPixels_YUV_to_YUV_Copy(width, height, src_format, src, src_pitch, dst, dst_pitch);
  1819     }
  1820 
  1821     if (IsPlanar2x2Format(src_format) && IsPlanar2x2Format(dst_format)) {
  1822         return SDL_ConvertPixels_Planar2x2_to_Planar2x2(width, height, src_format, src, src_pitch, dst_format, dst, dst_pitch);
  1823     } else if (IsPacked4Format(src_format) && IsPacked4Format(dst_format)) {
  1824         return SDL_ConvertPixels_Packed4_to_Packed4(width, height, src_format, src, src_pitch, dst_format, dst, dst_pitch);
  1825     } else if (IsPlanar2x2Format(src_format) && IsPacked4Format(dst_format)) {
  1826         return SDL_ConvertPixels_Planar2x2_to_Packed4(width, height, src_format, src, src_pitch, dst_format, dst, dst_pitch);
  1827     } else if (IsPacked4Format(src_format) && IsPlanar2x2Format(dst_format)) {
  1828         return SDL_ConvertPixels_Packed4_to_Planar2x2(width, height, src_format, src, src_pitch, dst_format, dst, dst_pitch);
  1829     } else {
  1830         return SDL_SetError("SDL_ConvertPixels_YUV_to_YUV: Unsupported YUV conversion: %s -> %s", SDL_GetPixelFormatName(src_format), SDL_GetPixelFormatName(dst_format));
  1831     }
  1832 }
  1833 
  1834 /* vi: set ts=4 sw=4 expandtab: */