src/render/SDL_yuv_sw.c
author Ryan C. Gordon <icculus@icculus.org>
Sat, 20 Jul 2013 21:19:20 -0400
changeset 7487 a4e43eb67e79
parent 7332 b53acf6ab70b
child 7488 a74f1f664047
permissions -rw-r--r--
Don't allocate memory if we're just going to fail when checking parameters.
     1 /*
     2   Simple DirectMedia Layer
     3   Copyright (C) 1997-2013 Sam Lantinga <slouken@libsdl.org>
     4 
     5   This software is provided 'as-is', without any express or implied
     6   warranty.  In no event will the authors be held liable for any damages
     7   arising from the use of this software.
     8 
     9   Permission is granted to anyone to use this software for any purpose,
    10   including commercial applications, and to alter it and redistribute it
    11   freely, subject to the following restrictions:
    12 
    13   1. The origin of this software must not be misrepresented; you must not
    14      claim that you wrote the original software. If you use this software
    15      in a product, an acknowledgment in the product documentation would be
    16      appreciated but is not required.
    17   2. Altered source versions must be plainly marked as such, and must not be
    18      misrepresented as being the original software.
    19   3. This notice may not be removed or altered from any source distribution.
    20 */
    21 #include "SDL_config.h"
    22 
    23 /* This is the software implementation of the YUV texture support */
    24 
    25 /* This code was derived from code carrying the following copyright notices:
    26 
    27  * Copyright (c) 1995 The Regents of the University of California.
    28  * All rights reserved.
    29  *
    30  * Permission to use, copy, modify, and distribute this software and its
    31  * documentation for any purpose, without fee, and without written agreement is
    32  * hereby granted, provided that the above copyright notice and the following
    33  * two paragraphs appear in all copies of this software.
    34  *
    35  * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR
    36  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
    37  * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF
    38  * CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    39  *
    40  * THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES,
    41  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
    42  * AND FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
    43  * ON AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATION TO
    44  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
    45 
    46  * Copyright (c) 1995 Erik Corry
    47  * All rights reserved.
    48  *
    49  * Permission to use, copy, modify, and distribute this software and its
    50  * documentation for any purpose, without fee, and without written agreement is
    51  * hereby granted, provided that the above copyright notice and the following
    52  * two paragraphs appear in all copies of this software.
    53  *
    54  * IN NO EVENT SHALL ERIK CORRY BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
    55  * SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OF
    56  * THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF ERIK CORRY HAS BEEN ADVISED
    57  * OF THE POSSIBILITY OF SUCH DAMAGE.
    58  *
    59  * ERIK CORRY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
    60  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
    61  * PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
    62  * BASIS, AND ERIK CORRY HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT,
    63  * UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
    64 
    65  * Portions of this software Copyright (c) 1995 Brown University.
    66  * All rights reserved.
    67  *
    68  * Permission to use, copy, modify, and distribute this software and its
    69  * documentation for any purpose, without fee, and without written agreement
    70  * is hereby granted, provided that the above copyright notice and the
    71  * following two paragraphs appear in all copies of this software.
    72  *
    73  * IN NO EVENT SHALL BROWN UNIVERSITY BE LIABLE TO ANY PARTY FOR
    74  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
    75  * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF BROWN
    76  * UNIVERSITY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    77  *
    78  * BROWN UNIVERSITY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
    79  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
    80  * PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
    81  * BASIS, AND BROWN UNIVERSITY HAS NO OBLIGATION TO PROVIDE MAINTENANCE,
    82  * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
    83  */
    84 
    85 #include "SDL_video.h"
    86 #include "SDL_cpuinfo.h"
    87 #include "SDL_yuv_sw_c.h"
    88 
    89 
    90 /* The colorspace conversion functions */
    91 
    92 #if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES
    93 extern void Color565DitherYV12MMX1X(int *colortab, Uint32 * rgb_2_pix,
    94                                     unsigned char *lum, unsigned char *cr,
    95                                     unsigned char *cb, unsigned char *out,
    96                                     int rows, int cols, int mod);
    97 extern void ColorRGBDitherYV12MMX1X(int *colortab, Uint32 * rgb_2_pix,
    98                                     unsigned char *lum, unsigned char *cr,
    99                                     unsigned char *cb, unsigned char *out,
   100                                     int rows, int cols, int mod);
   101 #endif
   102 
   103 static void
   104 Color16DitherYV12Mod1X(int *colortab, Uint32 * rgb_2_pix,
   105                        unsigned char *lum, unsigned char *cr,
   106                        unsigned char *cb, unsigned char *out,
   107                        int rows, int cols, int mod)
   108 {
   109     unsigned short *row1;
   110     unsigned short *row2;
   111     unsigned char *lum2;
   112     int x, y;
   113     int cr_r;
   114     int crb_g;
   115     int cb_b;
   116     int cols_2 = cols / 2;
   117 
   118     row1 = (unsigned short *) out;
   119     row2 = row1 + cols + mod;
   120     lum2 = lum + cols;
   121 
   122     mod += cols + mod;
   123 
   124     y = rows / 2;
   125     while (y--) {
   126         x = cols_2;
   127         while (x--) {
   128             register int L;
   129 
   130             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   131             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   132                 + colortab[*cb + 2 * 256];
   133             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   134             ++cr;
   135             ++cb;
   136 
   137             L = *lum++;
   138             *row1++ = (unsigned short) (rgb_2_pix[L + cr_r] |
   139                                         rgb_2_pix[L + crb_g] |
   140                                         rgb_2_pix[L + cb_b]);
   141 
   142             L = *lum++;
   143             *row1++ = (unsigned short) (rgb_2_pix[L + cr_r] |
   144                                         rgb_2_pix[L + crb_g] |
   145                                         rgb_2_pix[L + cb_b]);
   146 
   147 
   148             /* Now, do second row.  */
   149 
   150             L = *lum2++;
   151             *row2++ = (unsigned short) (rgb_2_pix[L + cr_r] |
   152                                         rgb_2_pix[L + crb_g] |
   153                                         rgb_2_pix[L + cb_b]);
   154 
   155             L = *lum2++;
   156             *row2++ = (unsigned short) (rgb_2_pix[L + cr_r] |
   157                                         rgb_2_pix[L + crb_g] |
   158                                         rgb_2_pix[L + cb_b]);
   159         }
   160 
   161         /*
   162          * These values are at the start of the next line, (due
   163          * to the ++'s above),but they need to be at the start
   164          * of the line after that.
   165          */
   166         lum += cols;
   167         lum2 += cols;
   168         row1 += mod;
   169         row2 += mod;
   170     }
   171 }
   172 
   173 static void
   174 Color24DitherYV12Mod1X(int *colortab, Uint32 * rgb_2_pix,
   175                        unsigned char *lum, unsigned char *cr,
   176                        unsigned char *cb, unsigned char *out,
   177                        int rows, int cols, int mod)
   178 {
   179     unsigned int value;
   180     unsigned char *row1;
   181     unsigned char *row2;
   182     unsigned char *lum2;
   183     int x, y;
   184     int cr_r;
   185     int crb_g;
   186     int cb_b;
   187     int cols_2 = cols / 2;
   188 
   189     row1 = out;
   190     row2 = row1 + cols * 3 + mod * 3;
   191     lum2 = lum + cols;
   192 
   193     mod += cols + mod;
   194     mod *= 3;
   195 
   196     y = rows / 2;
   197     while (y--) {
   198         x = cols_2;
   199         while (x--) {
   200             register int L;
   201 
   202             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   203             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   204                 + colortab[*cb + 2 * 256];
   205             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   206             ++cr;
   207             ++cb;
   208 
   209             L = *lum++;
   210             value = (rgb_2_pix[L + cr_r] |
   211                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   212             *row1++ = (value) & 0xFF;
   213             *row1++ = (value >> 8) & 0xFF;
   214             *row1++ = (value >> 16) & 0xFF;
   215 
   216             L = *lum++;
   217             value = (rgb_2_pix[L + cr_r] |
   218                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   219             *row1++ = (value) & 0xFF;
   220             *row1++ = (value >> 8) & 0xFF;
   221             *row1++ = (value >> 16) & 0xFF;
   222 
   223 
   224             /* Now, do second row.  */
   225 
   226             L = *lum2++;
   227             value = (rgb_2_pix[L + cr_r] |
   228                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   229             *row2++ = (value) & 0xFF;
   230             *row2++ = (value >> 8) & 0xFF;
   231             *row2++ = (value >> 16) & 0xFF;
   232 
   233             L = *lum2++;
   234             value = (rgb_2_pix[L + cr_r] |
   235                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   236             *row2++ = (value) & 0xFF;
   237             *row2++ = (value >> 8) & 0xFF;
   238             *row2++ = (value >> 16) & 0xFF;
   239         }
   240 
   241         /*
   242          * These values are at the start of the next line, (due
   243          * to the ++'s above),but they need to be at the start
   244          * of the line after that.
   245          */
   246         lum += cols;
   247         lum2 += cols;
   248         row1 += mod;
   249         row2 += mod;
   250     }
   251 }
   252 
   253 static void
   254 Color32DitherYV12Mod1X(int *colortab, Uint32 * rgb_2_pix,
   255                        unsigned char *lum, unsigned char *cr,
   256                        unsigned char *cb, unsigned char *out,
   257                        int rows, int cols, int mod)
   258 {
   259     unsigned int *row1;
   260     unsigned int *row2;
   261     unsigned char *lum2;
   262     int x, y;
   263     int cr_r;
   264     int crb_g;
   265     int cb_b;
   266     int cols_2 = cols / 2;
   267 
   268     row1 = (unsigned int *) out;
   269     row2 = row1 + cols + mod;
   270     lum2 = lum + cols;
   271 
   272     mod += cols + mod;
   273 
   274     y = rows / 2;
   275     while (y--) {
   276         x = cols_2;
   277         while (x--) {
   278             register int L;
   279 
   280             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   281             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   282                 + colortab[*cb + 2 * 256];
   283             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   284             ++cr;
   285             ++cb;
   286 
   287             L = *lum++;
   288             *row1++ = (rgb_2_pix[L + cr_r] |
   289                        rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   290 
   291             L = *lum++;
   292             *row1++ = (rgb_2_pix[L + cr_r] |
   293                        rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   294 
   295 
   296             /* Now, do second row.  */
   297 
   298             L = *lum2++;
   299             *row2++ = (rgb_2_pix[L + cr_r] |
   300                        rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   301 
   302             L = *lum2++;
   303             *row2++ = (rgb_2_pix[L + cr_r] |
   304                        rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   305         }
   306 
   307         /*
   308          * These values are at the start of the next line, (due
   309          * to the ++'s above),but they need to be at the start
   310          * of the line after that.
   311          */
   312         lum += cols;
   313         lum2 += cols;
   314         row1 += mod;
   315         row2 += mod;
   316     }
   317 }
   318 
   319 /*
   320  * In this function I make use of a nasty trick. The tables have the lower
   321  * 16 bits replicated in the upper 16. This means I can write ints and get
   322  * the horisontal doubling for free (almost).
   323  */
   324 static void
   325 Color16DitherYV12Mod2X(int *colortab, Uint32 * rgb_2_pix,
   326                        unsigned char *lum, unsigned char *cr,
   327                        unsigned char *cb, unsigned char *out,
   328                        int rows, int cols, int mod)
   329 {
   330     unsigned int *row1 = (unsigned int *) out;
   331     const int next_row = cols + (mod / 2);
   332     unsigned int *row2 = row1 + 2 * next_row;
   333     unsigned char *lum2;
   334     int x, y;
   335     int cr_r;
   336     int crb_g;
   337     int cb_b;
   338     int cols_2 = cols / 2;
   339 
   340     lum2 = lum + cols;
   341 
   342     mod = (next_row * 3) + (mod / 2);
   343 
   344     y = rows / 2;
   345     while (y--) {
   346         x = cols_2;
   347         while (x--) {
   348             register int L;
   349 
   350             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   351             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   352                 + colortab[*cb + 2 * 256];
   353             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   354             ++cr;
   355             ++cb;
   356 
   357             L = *lum++;
   358             row1[0] = row1[next_row] = (rgb_2_pix[L + cr_r] |
   359                                         rgb_2_pix[L + crb_g] |
   360                                         rgb_2_pix[L + cb_b]);
   361             row1++;
   362 
   363             L = *lum++;
   364             row1[0] = row1[next_row] = (rgb_2_pix[L + cr_r] |
   365                                         rgb_2_pix[L + crb_g] |
   366                                         rgb_2_pix[L + cb_b]);
   367             row1++;
   368 
   369 
   370             /* Now, do second row. */
   371 
   372             L = *lum2++;
   373             row2[0] = row2[next_row] = (rgb_2_pix[L + cr_r] |
   374                                         rgb_2_pix[L + crb_g] |
   375                                         rgb_2_pix[L + cb_b]);
   376             row2++;
   377 
   378             L = *lum2++;
   379             row2[0] = row2[next_row] = (rgb_2_pix[L + cr_r] |
   380                                         rgb_2_pix[L + crb_g] |
   381                                         rgb_2_pix[L + cb_b]);
   382             row2++;
   383         }
   384 
   385         /*
   386          * These values are at the start of the next line, (due
   387          * to the ++'s above),but they need to be at the start
   388          * of the line after that.
   389          */
   390         lum += cols;
   391         lum2 += cols;
   392         row1 += mod;
   393         row2 += mod;
   394     }
   395 }
   396 
   397 static void
   398 Color24DitherYV12Mod2X(int *colortab, Uint32 * rgb_2_pix,
   399                        unsigned char *lum, unsigned char *cr,
   400                        unsigned char *cb, unsigned char *out,
   401                        int rows, int cols, int mod)
   402 {
   403     unsigned int value;
   404     unsigned char *row1 = out;
   405     const int next_row = (cols * 2 + mod) * 3;
   406     unsigned char *row2 = row1 + 2 * next_row;
   407     unsigned char *lum2;
   408     int x, y;
   409     int cr_r;
   410     int crb_g;
   411     int cb_b;
   412     int cols_2 = cols / 2;
   413 
   414     lum2 = lum + cols;
   415 
   416     mod = next_row * 3 + mod * 3;
   417 
   418     y = rows / 2;
   419     while (y--) {
   420         x = cols_2;
   421         while (x--) {
   422             register int L;
   423 
   424             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   425             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   426                 + colortab[*cb + 2 * 256];
   427             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   428             ++cr;
   429             ++cb;
   430 
   431             L = *lum++;
   432             value = (rgb_2_pix[L + cr_r] |
   433                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   434             row1[0 + 0] = row1[3 + 0] = row1[next_row + 0] =
   435                 row1[next_row + 3 + 0] = (value) & 0xFF;
   436             row1[0 + 1] = row1[3 + 1] = row1[next_row + 1] =
   437                 row1[next_row + 3 + 1] = (value >> 8) & 0xFF;
   438             row1[0 + 2] = row1[3 + 2] = row1[next_row + 2] =
   439                 row1[next_row + 3 + 2] = (value >> 16) & 0xFF;
   440             row1 += 2 * 3;
   441 
   442             L = *lum++;
   443             value = (rgb_2_pix[L + cr_r] |
   444                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   445             row1[0 + 0] = row1[3 + 0] = row1[next_row + 0] =
   446                 row1[next_row + 3 + 0] = (value) & 0xFF;
   447             row1[0 + 1] = row1[3 + 1] = row1[next_row + 1] =
   448                 row1[next_row + 3 + 1] = (value >> 8) & 0xFF;
   449             row1[0 + 2] = row1[3 + 2] = row1[next_row + 2] =
   450                 row1[next_row + 3 + 2] = (value >> 16) & 0xFF;
   451             row1 += 2 * 3;
   452 
   453 
   454             /* Now, do second row. */
   455 
   456             L = *lum2++;
   457             value = (rgb_2_pix[L + cr_r] |
   458                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   459             row2[0 + 0] = row2[3 + 0] = row2[next_row + 0] =
   460                 row2[next_row + 3 + 0] = (value) & 0xFF;
   461             row2[0 + 1] = row2[3 + 1] = row2[next_row + 1] =
   462                 row2[next_row + 3 + 1] = (value >> 8) & 0xFF;
   463             row2[0 + 2] = row2[3 + 2] = row2[next_row + 2] =
   464                 row2[next_row + 3 + 2] = (value >> 16) & 0xFF;
   465             row2 += 2 * 3;
   466 
   467             L = *lum2++;
   468             value = (rgb_2_pix[L + cr_r] |
   469                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   470             row2[0 + 0] = row2[3 + 0] = row2[next_row + 0] =
   471                 row2[next_row + 3 + 0] = (value) & 0xFF;
   472             row2[0 + 1] = row2[3 + 1] = row2[next_row + 1] =
   473                 row2[next_row + 3 + 1] = (value >> 8) & 0xFF;
   474             row2[0 + 2] = row2[3 + 2] = row2[next_row + 2] =
   475                 row2[next_row + 3 + 2] = (value >> 16) & 0xFF;
   476             row2 += 2 * 3;
   477         }
   478 
   479         /*
   480          * These values are at the start of the next line, (due
   481          * to the ++'s above),but they need to be at the start
   482          * of the line after that.
   483          */
   484         lum += cols;
   485         lum2 += cols;
   486         row1 += mod;
   487         row2 += mod;
   488     }
   489 }
   490 
   491 static void
   492 Color32DitherYV12Mod2X(int *colortab, Uint32 * rgb_2_pix,
   493                        unsigned char *lum, unsigned char *cr,
   494                        unsigned char *cb, unsigned char *out,
   495                        int rows, int cols, int mod)
   496 {
   497     unsigned int *row1 = (unsigned int *) out;
   498     const int next_row = cols * 2 + mod;
   499     unsigned int *row2 = row1 + 2 * next_row;
   500     unsigned char *lum2;
   501     int x, y;
   502     int cr_r;
   503     int crb_g;
   504     int cb_b;
   505     int cols_2 = cols / 2;
   506 
   507     lum2 = lum + cols;
   508 
   509     mod = (next_row * 3) + mod;
   510 
   511     y = rows / 2;
   512     while (y--) {
   513         x = cols_2;
   514         while (x--) {
   515             register int L;
   516 
   517             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   518             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   519                 + colortab[*cb + 2 * 256];
   520             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   521             ++cr;
   522             ++cb;
   523 
   524             L = *lum++;
   525             row1[0] = row1[1] = row1[next_row] = row1[next_row + 1] =
   526                 (rgb_2_pix[L + cr_r] |
   527                  rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   528             row1 += 2;
   529 
   530             L = *lum++;
   531             row1[0] = row1[1] = row1[next_row] = row1[next_row + 1] =
   532                 (rgb_2_pix[L + cr_r] |
   533                  rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   534             row1 += 2;
   535 
   536 
   537             /* Now, do second row. */
   538 
   539             L = *lum2++;
   540             row2[0] = row2[1] = row2[next_row] = row2[next_row + 1] =
   541                 (rgb_2_pix[L + cr_r] |
   542                  rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   543             row2 += 2;
   544 
   545             L = *lum2++;
   546             row2[0] = row2[1] = row2[next_row] = row2[next_row + 1] =
   547                 (rgb_2_pix[L + cr_r] |
   548                  rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   549             row2 += 2;
   550         }
   551 
   552         /*
   553          * These values are at the start of the next line, (due
   554          * to the ++'s above),but they need to be at the start
   555          * of the line after that.
   556          */
   557         lum += cols;
   558         lum2 += cols;
   559         row1 += mod;
   560         row2 += mod;
   561     }
   562 }
   563 
   564 static void
   565 Color16DitherYUY2Mod1X(int *colortab, Uint32 * rgb_2_pix,
   566                        unsigned char *lum, unsigned char *cr,
   567                        unsigned char *cb, unsigned char *out,
   568                        int rows, int cols, int mod)
   569 {
   570     unsigned short *row;
   571     int x, y;
   572     int cr_r;
   573     int crb_g;
   574     int cb_b;
   575     int cols_2 = cols / 2;
   576 
   577     row = (unsigned short *) out;
   578 
   579     y = rows;
   580     while (y--) {
   581         x = cols_2;
   582         while (x--) {
   583             register int L;
   584 
   585             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   586             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   587                 + colortab[*cb + 2 * 256];
   588             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   589             cr += 4;
   590             cb += 4;
   591 
   592             L = *lum;
   593             lum += 2;
   594             *row++ = (unsigned short) (rgb_2_pix[L + cr_r] |
   595                                        rgb_2_pix[L + crb_g] |
   596                                        rgb_2_pix[L + cb_b]);
   597 
   598             L = *lum;
   599             lum += 2;
   600             *row++ = (unsigned short) (rgb_2_pix[L + cr_r] |
   601                                        rgb_2_pix[L + crb_g] |
   602                                        rgb_2_pix[L + cb_b]);
   603 
   604         }
   605 
   606         row += mod;
   607     }
   608 }
   609 
   610 static void
   611 Color24DitherYUY2Mod1X(int *colortab, Uint32 * rgb_2_pix,
   612                        unsigned char *lum, unsigned char *cr,
   613                        unsigned char *cb, unsigned char *out,
   614                        int rows, int cols, int mod)
   615 {
   616     unsigned int value;
   617     unsigned char *row;
   618     int x, y;
   619     int cr_r;
   620     int crb_g;
   621     int cb_b;
   622     int cols_2 = cols / 2;
   623 
   624     row = (unsigned char *) out;
   625     mod *= 3;
   626     y = rows;
   627     while (y--) {
   628         x = cols_2;
   629         while (x--) {
   630             register int L;
   631 
   632             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   633             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   634                 + colortab[*cb + 2 * 256];
   635             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   636             cr += 4;
   637             cb += 4;
   638 
   639             L = *lum;
   640             lum += 2;
   641             value = (rgb_2_pix[L + cr_r] |
   642                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   643             *row++ = (value) & 0xFF;
   644             *row++ = (value >> 8) & 0xFF;
   645             *row++ = (value >> 16) & 0xFF;
   646 
   647             L = *lum;
   648             lum += 2;
   649             value = (rgb_2_pix[L + cr_r] |
   650                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   651             *row++ = (value) & 0xFF;
   652             *row++ = (value >> 8) & 0xFF;
   653             *row++ = (value >> 16) & 0xFF;
   654 
   655         }
   656         row += mod;
   657     }
   658 }
   659 
   660 static void
   661 Color32DitherYUY2Mod1X(int *colortab, Uint32 * rgb_2_pix,
   662                        unsigned char *lum, unsigned char *cr,
   663                        unsigned char *cb, unsigned char *out,
   664                        int rows, int cols, int mod)
   665 {
   666     unsigned int *row;
   667     int x, y;
   668     int cr_r;
   669     int crb_g;
   670     int cb_b;
   671     int cols_2 = cols / 2;
   672 
   673     row = (unsigned int *) out;
   674     y = rows;
   675     while (y--) {
   676         x = cols_2;
   677         while (x--) {
   678             register int L;
   679 
   680             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   681             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   682                 + colortab[*cb + 2 * 256];
   683             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   684             cr += 4;
   685             cb += 4;
   686 
   687             L = *lum;
   688             lum += 2;
   689             *row++ = (rgb_2_pix[L + cr_r] |
   690                       rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   691 
   692             L = *lum;
   693             lum += 2;
   694             *row++ = (rgb_2_pix[L + cr_r] |
   695                       rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   696 
   697 
   698         }
   699         row += mod;
   700     }
   701 }
   702 
   703 /*
   704  * In this function I make use of a nasty trick. The tables have the lower
   705  * 16 bits replicated in the upper 16. This means I can write ints and get
   706  * the horisontal doubling for free (almost).
   707  */
   708 static void
   709 Color16DitherYUY2Mod2X(int *colortab, Uint32 * rgb_2_pix,
   710                        unsigned char *lum, unsigned char *cr,
   711                        unsigned char *cb, unsigned char *out,
   712                        int rows, int cols, int mod)
   713 {
   714     unsigned int *row = (unsigned int *) out;
   715     const int next_row = cols + (mod / 2);
   716     int x, y;
   717     int cr_r;
   718     int crb_g;
   719     int cb_b;
   720     int cols_2 = cols / 2;
   721 
   722     y = rows;
   723     while (y--) {
   724         x = cols_2;
   725         while (x--) {
   726             register int L;
   727 
   728             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   729             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   730                 + colortab[*cb + 2 * 256];
   731             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   732             cr += 4;
   733             cb += 4;
   734 
   735             L = *lum;
   736             lum += 2;
   737             row[0] = row[next_row] = (rgb_2_pix[L + cr_r] |
   738                                       rgb_2_pix[L + crb_g] |
   739                                       rgb_2_pix[L + cb_b]);
   740             row++;
   741 
   742             L = *lum;
   743             lum += 2;
   744             row[0] = row[next_row] = (rgb_2_pix[L + cr_r] |
   745                                       rgb_2_pix[L + crb_g] |
   746                                       rgb_2_pix[L + cb_b]);
   747             row++;
   748 
   749         }
   750         row += next_row;
   751     }
   752 }
   753 
   754 static void
   755 Color24DitherYUY2Mod2X(int *colortab, Uint32 * rgb_2_pix,
   756                        unsigned char *lum, unsigned char *cr,
   757                        unsigned char *cb, unsigned char *out,
   758                        int rows, int cols, int mod)
   759 {
   760     unsigned int value;
   761     unsigned char *row = out;
   762     const int next_row = (cols * 2 + mod) * 3;
   763     int x, y;
   764     int cr_r;
   765     int crb_g;
   766     int cb_b;
   767     int cols_2 = cols / 2;
   768     y = rows;
   769     while (y--) {
   770         x = cols_2;
   771         while (x--) {
   772             register int L;
   773 
   774             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   775             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   776                 + colortab[*cb + 2 * 256];
   777             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   778             cr += 4;
   779             cb += 4;
   780 
   781             L = *lum;
   782             lum += 2;
   783             value = (rgb_2_pix[L + cr_r] |
   784                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   785             row[0 + 0] = row[3 + 0] = row[next_row + 0] =
   786                 row[next_row + 3 + 0] = (value) & 0xFF;
   787             row[0 + 1] = row[3 + 1] = row[next_row + 1] =
   788                 row[next_row + 3 + 1] = (value >> 8) & 0xFF;
   789             row[0 + 2] = row[3 + 2] = row[next_row + 2] =
   790                 row[next_row + 3 + 2] = (value >> 16) & 0xFF;
   791             row += 2 * 3;
   792 
   793             L = *lum;
   794             lum += 2;
   795             value = (rgb_2_pix[L + cr_r] |
   796                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   797             row[0 + 0] = row[3 + 0] = row[next_row + 0] =
   798                 row[next_row + 3 + 0] = (value) & 0xFF;
   799             row[0 + 1] = row[3 + 1] = row[next_row + 1] =
   800                 row[next_row + 3 + 1] = (value >> 8) & 0xFF;
   801             row[0 + 2] = row[3 + 2] = row[next_row + 2] =
   802                 row[next_row + 3 + 2] = (value >> 16) & 0xFF;
   803             row += 2 * 3;
   804 
   805         }
   806         row += next_row;
   807     }
   808 }
   809 
   810 static void
   811 Color32DitherYUY2Mod2X(int *colortab, Uint32 * rgb_2_pix,
   812                        unsigned char *lum, unsigned char *cr,
   813                        unsigned char *cb, unsigned char *out,
   814                        int rows, int cols, int mod)
   815 {
   816     unsigned int *row = (unsigned int *) out;
   817     const int next_row = cols * 2 + mod;
   818     int x, y;
   819     int cr_r;
   820     int crb_g;
   821     int cb_b;
   822     int cols_2 = cols / 2;
   823     mod += mod;
   824     y = rows;
   825     while (y--) {
   826         x = cols_2;
   827         while (x--) {
   828             register int L;
   829 
   830             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   831             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   832                 + colortab[*cb + 2 * 256];
   833             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   834             cr += 4;
   835             cb += 4;
   836 
   837             L = *lum;
   838             lum += 2;
   839             row[0] = row[1] = row[next_row] = row[next_row + 1] =
   840                 (rgb_2_pix[L + cr_r] |
   841                  rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   842             row += 2;
   843 
   844             L = *lum;
   845             lum += 2;
   846             row[0] = row[1] = row[next_row] = row[next_row + 1] =
   847                 (rgb_2_pix[L + cr_r] |
   848                  rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   849             row += 2;
   850 
   851 
   852         }
   853 
   854         row += next_row;
   855     }
   856 }
   857 
   858 /*
   859  * How many 1 bits are there in the Uint32.
   860  * Low performance, do not call often.
   861  */
   862 static int
   863 number_of_bits_set(Uint32 a)
   864 {
   865     if (!a)
   866         return 0;
   867     if (a & 1)
   868         return 1 + number_of_bits_set(a >> 1);
   869     return (number_of_bits_set(a >> 1));
   870 }
   871 
   872 /*
   873  * How many 0 bits are there at least significant end of Uint32.
   874  * Low performance, do not call often.
   875  */
   876 static int
   877 free_bits_at_bottom(Uint32 a)
   878 {
   879     /* assume char is 8 bits */
   880     if (!a)
   881         return sizeof(Uint32) * 8;
   882     if (((Sint32) a) & 1l)
   883         return 0;
   884     return 1 + free_bits_at_bottom(a >> 1);
   885 }
   886 
   887 static int
   888 SDL_SW_SetupYUVDisplay(SDL_SW_YUVTexture * swdata, Uint32 target_format)
   889 {
   890     Uint32 *r_2_pix_alloc;
   891     Uint32 *g_2_pix_alloc;
   892     Uint32 *b_2_pix_alloc;
   893     int i;
   894     int bpp;
   895     Uint32 Rmask, Gmask, Bmask, Amask;
   896 
   897     if (!SDL_PixelFormatEnumToMasks
   898         (target_format, &bpp, &Rmask, &Gmask, &Bmask, &Amask) || bpp < 15) {
   899         return SDL_SetError("Unsupported YUV destination format");
   900     }
   901 
   902     swdata->target_format = target_format;
   903     r_2_pix_alloc = &swdata->rgb_2_pix[0 * 768];
   904     g_2_pix_alloc = &swdata->rgb_2_pix[1 * 768];
   905     b_2_pix_alloc = &swdata->rgb_2_pix[2 * 768];
   906 
   907     /*
   908      * Set up entries 0-255 in rgb-to-pixel value tables.
   909      */
   910     for (i = 0; i < 256; ++i) {
   911         r_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(Rmask));
   912         r_2_pix_alloc[i + 256] <<= free_bits_at_bottom(Rmask);
   913         r_2_pix_alloc[i + 256] |= Amask;
   914         g_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(Gmask));
   915         g_2_pix_alloc[i + 256] <<= free_bits_at_bottom(Gmask);
   916         g_2_pix_alloc[i + 256] |= Amask;
   917         b_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(Bmask));
   918         b_2_pix_alloc[i + 256] <<= free_bits_at_bottom(Bmask);
   919         b_2_pix_alloc[i + 256] |= Amask;
   920     }
   921 
   922     /*
   923      * If we have 16-bit output depth, then we double the value
   924      * in the top word. This means that we can write out both
   925      * pixels in the pixel doubling mode with one op. It is
   926      * harmless in the normal case as storing a 32-bit value
   927      * through a short pointer will lose the top bits anyway.
   928      */
   929     if (SDL_BYTESPERPIXEL(target_format) == 2) {
   930         for (i = 0; i < 256; ++i) {
   931             r_2_pix_alloc[i + 256] |= (r_2_pix_alloc[i + 256]) << 16;
   932             g_2_pix_alloc[i + 256] |= (g_2_pix_alloc[i + 256]) << 16;
   933             b_2_pix_alloc[i + 256] |= (b_2_pix_alloc[i + 256]) << 16;
   934         }
   935     }
   936 
   937     /*
   938      * Spread out the values we have to the rest of the array so that
   939      * we do not need to check for overflow.
   940      */
   941     for (i = 0; i < 256; ++i) {
   942         r_2_pix_alloc[i] = r_2_pix_alloc[256];
   943         r_2_pix_alloc[i + 512] = r_2_pix_alloc[511];
   944         g_2_pix_alloc[i] = g_2_pix_alloc[256];
   945         g_2_pix_alloc[i + 512] = g_2_pix_alloc[511];
   946         b_2_pix_alloc[i] = b_2_pix_alloc[256];
   947         b_2_pix_alloc[i + 512] = b_2_pix_alloc[511];
   948     }
   949 
   950     /* You have chosen wisely... */
   951     switch (swdata->format) {
   952     case SDL_PIXELFORMAT_YV12:
   953     case SDL_PIXELFORMAT_IYUV:
   954         if (SDL_BYTESPERPIXEL(target_format) == 2) {
   955 #if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES
   956             /* inline assembly functions */
   957             if (SDL_HasMMX() && (Rmask == 0xF800) &&
   958                 (Gmask == 0x07E0) && (Bmask == 0x001F)
   959                 && (swdata->w & 15) == 0) {
   960 /*printf("Using MMX 16-bit 565 dither\n");*/
   961                 swdata->Display1X = Color565DitherYV12MMX1X;
   962             } else {
   963 /*printf("Using C 16-bit dither\n");*/
   964                 swdata->Display1X = Color16DitherYV12Mod1X;
   965             }
   966 #else
   967             swdata->Display1X = Color16DitherYV12Mod1X;
   968 #endif
   969             swdata->Display2X = Color16DitherYV12Mod2X;
   970         }
   971         if (SDL_BYTESPERPIXEL(target_format) == 3) {
   972             swdata->Display1X = Color24DitherYV12Mod1X;
   973             swdata->Display2X = Color24DitherYV12Mod2X;
   974         }
   975         if (SDL_BYTESPERPIXEL(target_format) == 4) {
   976 #if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES
   977             /* inline assembly functions */
   978             if (SDL_HasMMX() && (Rmask == 0x00FF0000) &&
   979                 (Gmask == 0x0000FF00) &&
   980                 (Bmask == 0x000000FF) && (swdata->w & 15) == 0) {
   981 /*printf("Using MMX 32-bit dither\n");*/
   982                 swdata->Display1X = ColorRGBDitherYV12MMX1X;
   983             } else {
   984 /*printf("Using C 32-bit dither\n");*/
   985                 swdata->Display1X = Color32DitherYV12Mod1X;
   986             }
   987 #else
   988             swdata->Display1X = Color32DitherYV12Mod1X;
   989 #endif
   990             swdata->Display2X = Color32DitherYV12Mod2X;
   991         }
   992         break;
   993     case SDL_PIXELFORMAT_YUY2:
   994     case SDL_PIXELFORMAT_UYVY:
   995     case SDL_PIXELFORMAT_YVYU:
   996         if (SDL_BYTESPERPIXEL(target_format) == 2) {
   997             swdata->Display1X = Color16DitherYUY2Mod1X;
   998             swdata->Display2X = Color16DitherYUY2Mod2X;
   999         }
  1000         if (SDL_BYTESPERPIXEL(target_format) == 3) {
  1001             swdata->Display1X = Color24DitherYUY2Mod1X;
  1002             swdata->Display2X = Color24DitherYUY2Mod2X;
  1003         }
  1004         if (SDL_BYTESPERPIXEL(target_format) == 4) {
  1005             swdata->Display1X = Color32DitherYUY2Mod1X;
  1006             swdata->Display2X = Color32DitherYUY2Mod2X;
  1007         }
  1008         break;
  1009     default:
  1010         /* We should never get here (caught above) */
  1011         break;
  1012     }
  1013 
  1014     if (swdata->display) {
  1015         SDL_FreeSurface(swdata->display);
  1016         swdata->display = NULL;
  1017     }
  1018     return 0;
  1019 }
  1020 
  1021 SDL_SW_YUVTexture *
  1022 SDL_SW_CreateYUVTexture(Uint32 format, int w, int h)
  1023 {
  1024     SDL_SW_YUVTexture *swdata;
  1025     int *Cr_r_tab;
  1026     int *Cr_g_tab;
  1027     int *Cb_g_tab;
  1028     int *Cb_b_tab;
  1029     int i;
  1030     int CR, CB;
  1031 
  1032     switch (format) {
  1033     case SDL_PIXELFORMAT_YV12:
  1034     case SDL_PIXELFORMAT_IYUV:
  1035     case SDL_PIXELFORMAT_YUY2:
  1036     case SDL_PIXELFORMAT_UYVY:
  1037     case SDL_PIXELFORMAT_YVYU:
  1038         break;
  1039     default:
  1040         SDL_SetError("Unsupported YUV format");
  1041         return NULL;
  1042     }
  1043 
  1044     swdata = (SDL_SW_YUVTexture *) SDL_calloc(1, sizeof(*swdata));
  1045     if (!swdata) {
  1046         SDL_OutOfMemory();
  1047         return NULL;
  1048     }
  1049 
  1050     swdata->format = format;
  1051     swdata->target_format = SDL_PIXELFORMAT_UNKNOWN;
  1052     swdata->w = w;
  1053     swdata->h = h;
  1054     swdata->pixels = (Uint8 *) SDL_malloc(w * h * 2);
  1055     swdata->colortab = (int *) SDL_malloc(4 * 256 * sizeof(int));
  1056     swdata->rgb_2_pix = (Uint32 *) SDL_malloc(3 * 768 * sizeof(Uint32));
  1057     if (!swdata->pixels || !swdata->colortab || !swdata->rgb_2_pix) {
  1058         SDL_SW_DestroyYUVTexture(swdata);
  1059         SDL_OutOfMemory();
  1060         return NULL;
  1061     }
  1062 
  1063     /* Generate the tables for the display surface */
  1064     Cr_r_tab = &swdata->colortab[0 * 256];
  1065     Cr_g_tab = &swdata->colortab[1 * 256];
  1066     Cb_g_tab = &swdata->colortab[2 * 256];
  1067     Cb_b_tab = &swdata->colortab[3 * 256];
  1068     for (i = 0; i < 256; i++) {
  1069         /* Gamma correction (luminescence table) and chroma correction
  1070            would be done here.  See the Berkeley mpeg_play sources.
  1071          */
  1072         CB = CR = (i - 128);
  1073         Cr_r_tab[i] = (int) ((0.419 / 0.299) * CR);
  1074         Cr_g_tab[i] = (int) (-(0.299 / 0.419) * CR);
  1075         Cb_g_tab[i] = (int) (-(0.114 / 0.331) * CB);
  1076         Cb_b_tab[i] = (int) ((0.587 / 0.331) * CB);
  1077     }
  1078 
  1079     /* Find the pitch and offset values for the overlay */
  1080     switch (format) {
  1081     case SDL_PIXELFORMAT_YV12:
  1082     case SDL_PIXELFORMAT_IYUV:
  1083         swdata->pitches[0] = w;
  1084         swdata->pitches[1] = swdata->pitches[0] / 2;
  1085         swdata->pitches[2] = swdata->pitches[0] / 2;
  1086         swdata->planes[0] = swdata->pixels;
  1087         swdata->planes[1] = swdata->planes[0] + swdata->pitches[0] * h;
  1088         swdata->planes[2] = swdata->planes[1] + swdata->pitches[1] * h / 2;
  1089         break;
  1090     case SDL_PIXELFORMAT_YUY2:
  1091     case SDL_PIXELFORMAT_UYVY:
  1092     case SDL_PIXELFORMAT_YVYU:
  1093         swdata->pitches[0] = w * 2;
  1094         swdata->planes[0] = swdata->pixels;
  1095         break;
  1096     default:
  1097         SDL_assert(0 && "We should never get here (caught above)");
  1098         break;
  1099     }
  1100 
  1101     /* We're all done.. */
  1102     return (swdata);
  1103 }
  1104 
  1105 int
  1106 SDL_SW_QueryYUVTexturePixels(SDL_SW_YUVTexture * swdata, void **pixels,
  1107                              int *pitch)
  1108 {
  1109     *pixels = swdata->planes[0];
  1110     *pitch = swdata->pitches[0];
  1111     return 0;
  1112 }
  1113 
  1114 int
  1115 SDL_SW_UpdateYUVTexture(SDL_SW_YUVTexture * swdata, const SDL_Rect * rect,
  1116                         const void *pixels, int pitch)
  1117 {
  1118     switch (swdata->format) {
  1119     case SDL_PIXELFORMAT_YV12:
  1120     case SDL_PIXELFORMAT_IYUV:
  1121         if (rect->x == 0 && rect->y == 0 &&
  1122             rect->w == swdata->w && rect->h == swdata->h) {
  1123                 SDL_memcpy(swdata->pixels, pixels,
  1124                            (swdata->h * swdata->w) + (swdata->h * swdata->w) / 2);
  1125         } else {
  1126             Uint8 *src, *dst;
  1127             int row;
  1128             size_t length;
  1129 
  1130             /* Copy the Y plane */
  1131             src = (Uint8 *) pixels;
  1132             dst = swdata->pixels + rect->y * swdata->w + rect->x;
  1133             length = rect->w;
  1134             for (row = 0; row < rect->h; ++row) {
  1135                 SDL_memcpy(dst, src, length);
  1136                 src += pitch;
  1137                 dst += swdata->w;
  1138             }
  1139 
  1140             /* Copy the next plane */
  1141             src = (Uint8 *) pixels + rect->h * pitch;
  1142             dst = swdata->pixels + swdata->h * swdata->w;
  1143             dst += rect->y/2 * swdata->w/2 + rect->x/2;
  1144             length = rect->w / 2;
  1145             for (row = 0; row < rect->h/2; ++row) {
  1146                 SDL_memcpy(dst, src, length);
  1147                 src += pitch/2;
  1148                 dst += swdata->w/2;
  1149             }
  1150 
  1151             /* Copy the next plane */
  1152             src = (Uint8 *) pixels + rect->h * pitch + (rect->h * pitch) / 4;
  1153             dst = swdata->pixels + swdata->h * swdata->w +
  1154                   (swdata->h * swdata->w) / 4;
  1155             dst += rect->y/2 * swdata->w/2 + rect->x/2;
  1156             length = rect->w / 2;
  1157             for (row = 0; row < rect->h/2; ++row) {
  1158                 SDL_memcpy(dst, src, length);
  1159                 src += pitch/2;
  1160                 dst += swdata->w/2;
  1161             }
  1162         }
  1163         break;
  1164     case SDL_PIXELFORMAT_YUY2:
  1165     case SDL_PIXELFORMAT_UYVY:
  1166     case SDL_PIXELFORMAT_YVYU:
  1167         {
  1168             Uint8 *src, *dst;
  1169             int row;
  1170             size_t length;
  1171 
  1172             src = (Uint8 *) pixels;
  1173             dst =
  1174                 swdata->planes[0] + rect->y * swdata->pitches[0] +
  1175                 rect->x * 2;
  1176             length = rect->w * 2;
  1177             for (row = 0; row < rect->h; ++row) {
  1178                 SDL_memcpy(dst, src, length);
  1179                 src += pitch;
  1180                 dst += swdata->pitches[0];
  1181             }
  1182         }
  1183         break;
  1184     }
  1185     return 0;
  1186 }
  1187 
  1188 int
  1189 SDL_SW_LockYUVTexture(SDL_SW_YUVTexture * swdata, const SDL_Rect * rect,
  1190                       void **pixels, int *pitch)
  1191 {
  1192     switch (swdata->format) {
  1193     case SDL_PIXELFORMAT_YV12:
  1194     case SDL_PIXELFORMAT_IYUV:
  1195         if (rect
  1196             && (rect->x != 0 || rect->y != 0 || rect->w != swdata->w
  1197                 || rect->h != swdata->h)) {
  1198             return SDL_SetError
  1199                 ("YV12 and IYUV textures only support full surface locks");
  1200         }
  1201         break;
  1202     }
  1203 
  1204     if (rect) {
  1205         *pixels = swdata->planes[0] + rect->y * swdata->pitches[0] + rect->x * 2;
  1206     } else {
  1207         *pixels = swdata->planes[0];
  1208     }
  1209     *pitch = swdata->pitches[0];
  1210     return 0;
  1211 }
  1212 
  1213 void
  1214 SDL_SW_UnlockYUVTexture(SDL_SW_YUVTexture * swdata)
  1215 {
  1216 }
  1217 
  1218 int
  1219 SDL_SW_CopyYUVToRGB(SDL_SW_YUVTexture * swdata, const SDL_Rect * srcrect,
  1220                     Uint32 target_format, int w, int h, void *pixels,
  1221                     int pitch)
  1222 {
  1223     int stretch;
  1224     int scale_2x;
  1225     Uint8 *lum, *Cr, *Cb;
  1226     int mod;
  1227 
  1228     /* Make sure we're set up to display in the desired format */
  1229     if (target_format != swdata->target_format) {
  1230         if (SDL_SW_SetupYUVDisplay(swdata, target_format) < 0) {
  1231             return -1;
  1232         }
  1233     }
  1234 
  1235     stretch = 0;
  1236     scale_2x = 0;
  1237     if (srcrect->x || srcrect->y || srcrect->w < swdata->w
  1238         || srcrect->h < swdata->h) {
  1239         /* The source rectangle has been clipped.
  1240            Using a scratch surface is easier than adding clipped
  1241            source support to all the blitters, plus that would
  1242            slow them down in the general unclipped case.
  1243          */
  1244         stretch = 1;
  1245     } else if ((srcrect->w != w) || (srcrect->h != h)) {
  1246         if ((w == 2 * srcrect->w) && (h == 2 * srcrect->h)) {
  1247             scale_2x = 1;
  1248         } else {
  1249             stretch = 1;
  1250         }
  1251     }
  1252     if (stretch) {
  1253         int bpp;
  1254         Uint32 Rmask, Gmask, Bmask, Amask;
  1255 
  1256         if (swdata->display) {
  1257             swdata->display->w = w;
  1258             swdata->display->h = h;
  1259             swdata->display->pixels = pixels;
  1260             swdata->display->pitch = pitch;
  1261         } else {
  1262             /* This must have succeeded in SDL_SW_SetupYUVDisplay() earlier */
  1263             SDL_PixelFormatEnumToMasks(target_format, &bpp, &Rmask, &Gmask,
  1264                                        &Bmask, &Amask);
  1265             swdata->display =
  1266                 SDL_CreateRGBSurfaceFrom(pixels, w, h, bpp, pitch, Rmask,
  1267                                          Gmask, Bmask, Amask);
  1268             if (!swdata->display) {
  1269                 return (-1);
  1270             }
  1271         }
  1272         if (!swdata->stretch) {
  1273             /* This must have succeeded in SDL_SW_SetupYUVDisplay() earlier */
  1274             SDL_PixelFormatEnumToMasks(target_format, &bpp, &Rmask, &Gmask,
  1275                                        &Bmask, &Amask);
  1276             swdata->stretch =
  1277                 SDL_CreateRGBSurface(0, swdata->w, swdata->h, bpp, Rmask,
  1278                                      Gmask, Bmask, Amask);
  1279             if (!swdata->stretch) {
  1280                 return (-1);
  1281             }
  1282         }
  1283         pixels = swdata->stretch->pixels;
  1284         pitch = swdata->stretch->pitch;
  1285     }
  1286     switch (swdata->format) {
  1287     case SDL_PIXELFORMAT_YV12:
  1288         lum = swdata->planes[0];
  1289         Cr = swdata->planes[1];
  1290         Cb = swdata->planes[2];
  1291         break;
  1292     case SDL_PIXELFORMAT_IYUV:
  1293         lum = swdata->planes[0];
  1294         Cr = swdata->planes[2];
  1295         Cb = swdata->planes[1];
  1296         break;
  1297     case SDL_PIXELFORMAT_YUY2:
  1298         lum = swdata->planes[0];
  1299         Cr = lum + 3;
  1300         Cb = lum + 1;
  1301         break;
  1302     case SDL_PIXELFORMAT_UYVY:
  1303         lum = swdata->planes[0] + 1;
  1304         Cr = lum + 1;
  1305         Cb = lum - 1;
  1306         break;
  1307     case SDL_PIXELFORMAT_YVYU:
  1308         lum = swdata->planes[0];
  1309         Cr = lum + 1;
  1310         Cb = lum + 3;
  1311         break;
  1312     default:
  1313         return SDL_SetError("Unsupported YUV format in copy");
  1314     }
  1315     mod = (pitch / SDL_BYTESPERPIXEL(target_format));
  1316 
  1317     if (scale_2x) {
  1318         mod -= (swdata->w * 2);
  1319         swdata->Display2X(swdata->colortab, swdata->rgb_2_pix,
  1320                           lum, Cr, Cb, pixels, swdata->h, swdata->w, mod);
  1321     } else {
  1322         mod -= swdata->w;
  1323         swdata->Display1X(swdata->colortab, swdata->rgb_2_pix,
  1324                           lum, Cr, Cb, pixels, swdata->h, swdata->w, mod);
  1325     }
  1326     if (stretch) {
  1327         SDL_Rect rect = *srcrect;
  1328         SDL_SoftStretch(swdata->stretch, &rect, swdata->display, NULL);
  1329     }
  1330     return 0;
  1331 }
  1332 
  1333 void
  1334 SDL_SW_DestroyYUVTexture(SDL_SW_YUVTexture * swdata)
  1335 {
  1336     if (swdata) {
  1337         if (swdata->pixels) {
  1338             SDL_free(swdata->pixels);
  1339         }
  1340         if (swdata->colortab) {
  1341             SDL_free(swdata->colortab);
  1342         }
  1343         if (swdata->rgb_2_pix) {
  1344             SDL_free(swdata->rgb_2_pix);
  1345         }
  1346         if (swdata->stretch) {
  1347             SDL_FreeSurface(swdata->stretch);
  1348         }
  1349         if (swdata->display) {
  1350             SDL_FreeSurface(swdata->display);
  1351         }
  1352         SDL_free(swdata);
  1353     }
  1354 }
  1355 
  1356 /* vi: set ts=4 sw=4 expandtab: */