src/render/SDL_yuv_sw.c
author Sam Lantinga <slouken@libsdl.org>
Fri, 28 Sep 2012 02:43:13 -0700
changeset 6490 d2d8576aa3a0
parent 6138 4c64952a58fb
child 6885 700f1b25f77f
permissions -rw-r--r--
Fixed memory leak in an error case
     1 /*
     2   Simple DirectMedia Layer
     3   Copyright (C) 1997-2012 Sam Lantinga <slouken@libsdl.org>
     4 
     5   This software is provided 'as-is', without any express or implied
     6   warranty.  In no event will the authors be held liable for any damages
     7   arising from the use of this software.
     8 
     9   Permission is granted to anyone to use this software for any purpose,
    10   including commercial applications, and to alter it and redistribute it
    11   freely, subject to the following restrictions:
    12 
    13   1. The origin of this software must not be misrepresented; you must not
    14      claim that you wrote the original software. If you use this software
    15      in a product, an acknowledgment in the product documentation would be
    16      appreciated but is not required.
    17   2. Altered source versions must be plainly marked as such, and must not be
    18      misrepresented as being the original software.
    19   3. This notice may not be removed or altered from any source distribution.
    20 */
    21 #include "SDL_config.h"
    22 
    23 /* This is the software implementation of the YUV texture support */
    24 
    25 /* This code was derived from code carrying the following copyright notices:
    26 
    27  * Copyright (c) 1995 The Regents of the University of California.
    28  * All rights reserved.
    29  * 
    30  * Permission to use, copy, modify, and distribute this software and its
    31  * documentation for any purpose, without fee, and without written agreement is
    32  * hereby granted, provided that the above copyright notice and the following
    33  * two paragraphs appear in all copies of this software.
    34  * 
    35  * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR
    36  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
    37  * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF
    38  * CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    39  * 
    40  * THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES,
    41  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
    42  * AND FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
    43  * ON AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATION TO
    44  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
    45 
    46  * Copyright (c) 1995 Erik Corry
    47  * All rights reserved.
    48  * 
    49  * Permission to use, copy, modify, and distribute this software and its
    50  * documentation for any purpose, without fee, and without written agreement is
    51  * hereby granted, provided that the above copyright notice and the following
    52  * two paragraphs appear in all copies of this software.
    53  * 
    54  * IN NO EVENT SHALL ERIK CORRY BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
    55  * SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OF
    56  * THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF ERIK CORRY HAS BEEN ADVISED
    57  * OF THE POSSIBILITY OF SUCH DAMAGE.
    58  * 
    59  * ERIK CORRY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
    60  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
    61  * PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
    62  * BASIS, AND ERIK CORRY HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT,
    63  * UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
    64 
    65  * Portions of this software Copyright (c) 1995 Brown University.
    66  * All rights reserved.
    67  * 
    68  * Permission to use, copy, modify, and distribute this software and its
    69  * documentation for any purpose, without fee, and without written agreement
    70  * is hereby granted, provided that the above copyright notice and the
    71  * following two paragraphs appear in all copies of this software.
    72  * 
    73  * IN NO EVENT SHALL BROWN UNIVERSITY BE LIABLE TO ANY PARTY FOR
    74  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
    75  * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF BROWN
    76  * UNIVERSITY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    77  * 
    78  * BROWN UNIVERSITY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
    79  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
    80  * PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
    81  * BASIS, AND BROWN UNIVERSITY HAS NO OBLIGATION TO PROVIDE MAINTENANCE,
    82  * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
    83  */
    84 
    85 #include "SDL_video.h"
    86 #include "SDL_cpuinfo.h"
    87 #include "SDL_yuv_sw_c.h"
    88 
    89 
    90 /* The colorspace conversion functions */
    91 
    92 #if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES
    93 extern void Color565DitherYV12MMX1X(int *colortab, Uint32 * rgb_2_pix,
    94                                     unsigned char *lum, unsigned char *cr,
    95                                     unsigned char *cb, unsigned char *out,
    96                                     int rows, int cols, int mod);
    97 extern void ColorRGBDitherYV12MMX1X(int *colortab, Uint32 * rgb_2_pix,
    98                                     unsigned char *lum, unsigned char *cr,
    99                                     unsigned char *cb, unsigned char *out,
   100                                     int rows, int cols, int mod);
   101 #endif
   102 
   103 static void
   104 Color16DitherYV12Mod1X(int *colortab, Uint32 * rgb_2_pix,
   105                        unsigned char *lum, unsigned char *cr,
   106                        unsigned char *cb, unsigned char *out,
   107                        int rows, int cols, int mod)
   108 {
   109     unsigned short *row1;
   110     unsigned short *row2;
   111     unsigned char *lum2;
   112     int x, y;
   113     int cr_r;
   114     int crb_g;
   115     int cb_b;
   116     int cols_2 = cols / 2;
   117 
   118     row1 = (unsigned short *) out;
   119     row2 = row1 + cols + mod;
   120     lum2 = lum + cols;
   121 
   122     mod += cols + mod;
   123 
   124     y = rows / 2;
   125     while (y--) {
   126         x = cols_2;
   127         while (x--) {
   128             register int L;
   129 
   130             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   131             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   132                 + colortab[*cb + 2 * 256];
   133             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   134             ++cr;
   135             ++cb;
   136 
   137             L = *lum++;
   138             *row1++ = (unsigned short) (rgb_2_pix[L + cr_r] |
   139                                         rgb_2_pix[L + crb_g] |
   140                                         rgb_2_pix[L + cb_b]);
   141 
   142             L = *lum++;
   143             *row1++ = (unsigned short) (rgb_2_pix[L + cr_r] |
   144                                         rgb_2_pix[L + crb_g] |
   145                                         rgb_2_pix[L + cb_b]);
   146 
   147 
   148             /* Now, do second row.  */
   149 
   150             L = *lum2++;
   151             *row2++ = (unsigned short) (rgb_2_pix[L + cr_r] |
   152                                         rgb_2_pix[L + crb_g] |
   153                                         rgb_2_pix[L + cb_b]);
   154 
   155             L = *lum2++;
   156             *row2++ = (unsigned short) (rgb_2_pix[L + cr_r] |
   157                                         rgb_2_pix[L + crb_g] |
   158                                         rgb_2_pix[L + cb_b]);
   159         }
   160 
   161         /*
   162          * These values are at the start of the next line, (due
   163          * to the ++'s above),but they need to be at the start
   164          * of the line after that.
   165          */
   166         lum += cols;
   167         lum2 += cols;
   168         row1 += mod;
   169         row2 += mod;
   170     }
   171 }
   172 
   173 static void
   174 Color24DitherYV12Mod1X(int *colortab, Uint32 * rgb_2_pix,
   175                        unsigned char *lum, unsigned char *cr,
   176                        unsigned char *cb, unsigned char *out,
   177                        int rows, int cols, int mod)
   178 {
   179     unsigned int value;
   180     unsigned char *row1;
   181     unsigned char *row2;
   182     unsigned char *lum2;
   183     int x, y;
   184     int cr_r;
   185     int crb_g;
   186     int cb_b;
   187     int cols_2 = cols / 2;
   188 
   189     row1 = out;
   190     row2 = row1 + cols * 3 + mod * 3;
   191     lum2 = lum + cols;
   192 
   193     mod += cols + mod;
   194     mod *= 3;
   195 
   196     y = rows / 2;
   197     while (y--) {
   198         x = cols_2;
   199         while (x--) {
   200             register int L;
   201 
   202             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   203             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   204                 + colortab[*cb + 2 * 256];
   205             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   206             ++cr;
   207             ++cb;
   208 
   209             L = *lum++;
   210             value = (rgb_2_pix[L + cr_r] |
   211                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   212             *row1++ = (value) & 0xFF;
   213             *row1++ = (value >> 8) & 0xFF;
   214             *row1++ = (value >> 16) & 0xFF;
   215 
   216             L = *lum++;
   217             value = (rgb_2_pix[L + cr_r] |
   218                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   219             *row1++ = (value) & 0xFF;
   220             *row1++ = (value >> 8) & 0xFF;
   221             *row1++ = (value >> 16) & 0xFF;
   222 
   223 
   224             /* Now, do second row.  */
   225 
   226             L = *lum2++;
   227             value = (rgb_2_pix[L + cr_r] |
   228                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   229             *row2++ = (value) & 0xFF;
   230             *row2++ = (value >> 8) & 0xFF;
   231             *row2++ = (value >> 16) & 0xFF;
   232 
   233             L = *lum2++;
   234             value = (rgb_2_pix[L + cr_r] |
   235                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   236             *row2++ = (value) & 0xFF;
   237             *row2++ = (value >> 8) & 0xFF;
   238             *row2++ = (value >> 16) & 0xFF;
   239         }
   240 
   241         /*
   242          * These values are at the start of the next line, (due
   243          * to the ++'s above),but they need to be at the start
   244          * of the line after that.
   245          */
   246         lum += cols;
   247         lum2 += cols;
   248         row1 += mod;
   249         row2 += mod;
   250     }
   251 }
   252 
   253 static void
   254 Color32DitherYV12Mod1X(int *colortab, Uint32 * rgb_2_pix,
   255                        unsigned char *lum, unsigned char *cr,
   256                        unsigned char *cb, unsigned char *out,
   257                        int rows, int cols, int mod)
   258 {
   259     unsigned int *row1;
   260     unsigned int *row2;
   261     unsigned char *lum2;
   262     int x, y;
   263     int cr_r;
   264     int crb_g;
   265     int cb_b;
   266     int cols_2 = cols / 2;
   267 
   268     row1 = (unsigned int *) out;
   269     row2 = row1 + cols + mod;
   270     lum2 = lum + cols;
   271 
   272     mod += cols + mod;
   273 
   274     y = rows / 2;
   275     while (y--) {
   276         x = cols_2;
   277         while (x--) {
   278             register int L;
   279 
   280             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   281             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   282                 + colortab[*cb + 2 * 256];
   283             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   284             ++cr;
   285             ++cb;
   286 
   287             L = *lum++;
   288             *row1++ = (rgb_2_pix[L + cr_r] |
   289                        rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   290 
   291             L = *lum++;
   292             *row1++ = (rgb_2_pix[L + cr_r] |
   293                        rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   294 
   295 
   296             /* Now, do second row.  */
   297 
   298             L = *lum2++;
   299             *row2++ = (rgb_2_pix[L + cr_r] |
   300                        rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   301 
   302             L = *lum2++;
   303             *row2++ = (rgb_2_pix[L + cr_r] |
   304                        rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   305         }
   306 
   307         /*
   308          * These values are at the start of the next line, (due
   309          * to the ++'s above),but they need to be at the start
   310          * of the line after that.
   311          */
   312         lum += cols;
   313         lum2 += cols;
   314         row1 += mod;
   315         row2 += mod;
   316     }
   317 }
   318 
   319 /*
   320  * In this function I make use of a nasty trick. The tables have the lower
   321  * 16 bits replicated in the upper 16. This means I can write ints and get
   322  * the horisontal doubling for free (almost).
   323  */
   324 static void
   325 Color16DitherYV12Mod2X(int *colortab, Uint32 * rgb_2_pix,
   326                        unsigned char *lum, unsigned char *cr,
   327                        unsigned char *cb, unsigned char *out,
   328                        int rows, int cols, int mod)
   329 {
   330     unsigned int *row1 = (unsigned int *) out;
   331     const int next_row = cols + (mod / 2);
   332     unsigned int *row2 = row1 + 2 * next_row;
   333     unsigned char *lum2;
   334     int x, y;
   335     int cr_r;
   336     int crb_g;
   337     int cb_b;
   338     int cols_2 = cols / 2;
   339 
   340     lum2 = lum + cols;
   341 
   342     mod = (next_row * 3) + (mod / 2);
   343 
   344     y = rows / 2;
   345     while (y--) {
   346         x = cols_2;
   347         while (x--) {
   348             register int L;
   349 
   350             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   351             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   352                 + colortab[*cb + 2 * 256];
   353             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   354             ++cr;
   355             ++cb;
   356 
   357             L = *lum++;
   358             row1[0] = row1[next_row] = (rgb_2_pix[L + cr_r] |
   359                                         rgb_2_pix[L + crb_g] |
   360                                         rgb_2_pix[L + cb_b]);
   361             row1++;
   362 
   363             L = *lum++;
   364             row1[0] = row1[next_row] = (rgb_2_pix[L + cr_r] |
   365                                         rgb_2_pix[L + crb_g] |
   366                                         rgb_2_pix[L + cb_b]);
   367             row1++;
   368 
   369 
   370             /* Now, do second row. */
   371 
   372             L = *lum2++;
   373             row2[0] = row2[next_row] = (rgb_2_pix[L + cr_r] |
   374                                         rgb_2_pix[L + crb_g] |
   375                                         rgb_2_pix[L + cb_b]);
   376             row2++;
   377 
   378             L = *lum2++;
   379             row2[0] = row2[next_row] = (rgb_2_pix[L + cr_r] |
   380                                         rgb_2_pix[L + crb_g] |
   381                                         rgb_2_pix[L + cb_b]);
   382             row2++;
   383         }
   384 
   385         /*
   386          * These values are at the start of the next line, (due
   387          * to the ++'s above),but they need to be at the start
   388          * of the line after that.
   389          */
   390         lum += cols;
   391         lum2 += cols;
   392         row1 += mod;
   393         row2 += mod;
   394     }
   395 }
   396 
   397 static void
   398 Color24DitherYV12Mod2X(int *colortab, Uint32 * rgb_2_pix,
   399                        unsigned char *lum, unsigned char *cr,
   400                        unsigned char *cb, unsigned char *out,
   401                        int rows, int cols, int mod)
   402 {
   403     unsigned int value;
   404     unsigned char *row1 = out;
   405     const int next_row = (cols * 2 + mod) * 3;
   406     unsigned char *row2 = row1 + 2 * next_row;
   407     unsigned char *lum2;
   408     int x, y;
   409     int cr_r;
   410     int crb_g;
   411     int cb_b;
   412     int cols_2 = cols / 2;
   413 
   414     lum2 = lum + cols;
   415 
   416     mod = next_row * 3 + mod * 3;
   417 
   418     y = rows / 2;
   419     while (y--) {
   420         x = cols_2;
   421         while (x--) {
   422             register int L;
   423 
   424             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   425             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   426                 + colortab[*cb + 2 * 256];
   427             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   428             ++cr;
   429             ++cb;
   430 
   431             L = *lum++;
   432             value = (rgb_2_pix[L + cr_r] |
   433                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   434             row1[0 + 0] = row1[3 + 0] = row1[next_row + 0] =
   435                 row1[next_row + 3 + 0] = (value) & 0xFF;
   436             row1[0 + 1] = row1[3 + 1] = row1[next_row + 1] =
   437                 row1[next_row + 3 + 1] = (value >> 8) & 0xFF;
   438             row1[0 + 2] = row1[3 + 2] = row1[next_row + 2] =
   439                 row1[next_row + 3 + 2] = (value >> 16) & 0xFF;
   440             row1 += 2 * 3;
   441 
   442             L = *lum++;
   443             value = (rgb_2_pix[L + cr_r] |
   444                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   445             row1[0 + 0] = row1[3 + 0] = row1[next_row + 0] =
   446                 row1[next_row + 3 + 0] = (value) & 0xFF;
   447             row1[0 + 1] = row1[3 + 1] = row1[next_row + 1] =
   448                 row1[next_row + 3 + 1] = (value >> 8) & 0xFF;
   449             row1[0 + 2] = row1[3 + 2] = row1[next_row + 2] =
   450                 row1[next_row + 3 + 2] = (value >> 16) & 0xFF;
   451             row1 += 2 * 3;
   452 
   453 
   454             /* Now, do second row. */
   455 
   456             L = *lum2++;
   457             value = (rgb_2_pix[L + cr_r] |
   458                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   459             row2[0 + 0] = row2[3 + 0] = row2[next_row + 0] =
   460                 row2[next_row + 3 + 0] = (value) & 0xFF;
   461             row2[0 + 1] = row2[3 + 1] = row2[next_row + 1] =
   462                 row2[next_row + 3 + 1] = (value >> 8) & 0xFF;
   463             row2[0 + 2] = row2[3 + 2] = row2[next_row + 2] =
   464                 row2[next_row + 3 + 2] = (value >> 16) & 0xFF;
   465             row2 += 2 * 3;
   466 
   467             L = *lum2++;
   468             value = (rgb_2_pix[L + cr_r] |
   469                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   470             row2[0 + 0] = row2[3 + 0] = row2[next_row + 0] =
   471                 row2[next_row + 3 + 0] = (value) & 0xFF;
   472             row2[0 + 1] = row2[3 + 1] = row2[next_row + 1] =
   473                 row2[next_row + 3 + 1] = (value >> 8) & 0xFF;
   474             row2[0 + 2] = row2[3 + 2] = row2[next_row + 2] =
   475                 row2[next_row + 3 + 2] = (value >> 16) & 0xFF;
   476             row2 += 2 * 3;
   477         }
   478 
   479         /*
   480          * These values are at the start of the next line, (due
   481          * to the ++'s above),but they need to be at the start
   482          * of the line after that.
   483          */
   484         lum += cols;
   485         lum2 += cols;
   486         row1 += mod;
   487         row2 += mod;
   488     }
   489 }
   490 
   491 static void
   492 Color32DitherYV12Mod2X(int *colortab, Uint32 * rgb_2_pix,
   493                        unsigned char *lum, unsigned char *cr,
   494                        unsigned char *cb, unsigned char *out,
   495                        int rows, int cols, int mod)
   496 {
   497     unsigned int *row1 = (unsigned int *) out;
   498     const int next_row = cols * 2 + mod;
   499     unsigned int *row2 = row1 + 2 * next_row;
   500     unsigned char *lum2;
   501     int x, y;
   502     int cr_r;
   503     int crb_g;
   504     int cb_b;
   505     int cols_2 = cols / 2;
   506 
   507     lum2 = lum + cols;
   508 
   509     mod = (next_row * 3) + mod;
   510 
   511     y = rows / 2;
   512     while (y--) {
   513         x = cols_2;
   514         while (x--) {
   515             register int L;
   516 
   517             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   518             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   519                 + colortab[*cb + 2 * 256];
   520             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   521             ++cr;
   522             ++cb;
   523 
   524             L = *lum++;
   525             row1[0] = row1[1] = row1[next_row] = row1[next_row + 1] =
   526                 (rgb_2_pix[L + cr_r] |
   527                  rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   528             row1 += 2;
   529 
   530             L = *lum++;
   531             row1[0] = row1[1] = row1[next_row] = row1[next_row + 1] =
   532                 (rgb_2_pix[L + cr_r] |
   533                  rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   534             row1 += 2;
   535 
   536 
   537             /* Now, do second row. */
   538 
   539             L = *lum2++;
   540             row2[0] = row2[1] = row2[next_row] = row2[next_row + 1] =
   541                 (rgb_2_pix[L + cr_r] |
   542                  rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   543             row2 += 2;
   544 
   545             L = *lum2++;
   546             row2[0] = row2[1] = row2[next_row] = row2[next_row + 1] =
   547                 (rgb_2_pix[L + cr_r] |
   548                  rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   549             row2 += 2;
   550         }
   551 
   552         /*
   553          * These values are at the start of the next line, (due
   554          * to the ++'s above),but they need to be at the start
   555          * of the line after that.
   556          */
   557         lum += cols;
   558         lum2 += cols;
   559         row1 += mod;
   560         row2 += mod;
   561     }
   562 }
   563 
   564 static void
   565 Color16DitherYUY2Mod1X(int *colortab, Uint32 * rgb_2_pix,
   566                        unsigned char *lum, unsigned char *cr,
   567                        unsigned char *cb, unsigned char *out,
   568                        int rows, int cols, int mod)
   569 {
   570     unsigned short *row;
   571     int x, y;
   572     int cr_r;
   573     int crb_g;
   574     int cb_b;
   575     int cols_2 = cols / 2;
   576 
   577     row = (unsigned short *) out;
   578 
   579     y = rows;
   580     while (y--) {
   581         x = cols_2;
   582         while (x--) {
   583             register int L;
   584 
   585             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   586             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   587                 + colortab[*cb + 2 * 256];
   588             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   589             cr += 4;
   590             cb += 4;
   591 
   592             L = *lum;
   593             lum += 2;
   594             *row++ = (unsigned short) (rgb_2_pix[L + cr_r] |
   595                                        rgb_2_pix[L + crb_g] |
   596                                        rgb_2_pix[L + cb_b]);
   597 
   598             L = *lum;
   599             lum += 2;
   600             *row++ = (unsigned short) (rgb_2_pix[L + cr_r] |
   601                                        rgb_2_pix[L + crb_g] |
   602                                        rgb_2_pix[L + cb_b]);
   603 
   604         }
   605 
   606         row += mod;
   607     }
   608 }
   609 
   610 static void
   611 Color24DitherYUY2Mod1X(int *colortab, Uint32 * rgb_2_pix,
   612                        unsigned char *lum, unsigned char *cr,
   613                        unsigned char *cb, unsigned char *out,
   614                        int rows, int cols, int mod)
   615 {
   616     unsigned int value;
   617     unsigned char *row;
   618     int x, y;
   619     int cr_r;
   620     int crb_g;
   621     int cb_b;
   622     int cols_2 = cols / 2;
   623 
   624     row = (unsigned char *) out;
   625     mod *= 3;
   626     y = rows;
   627     while (y--) {
   628         x = cols_2;
   629         while (x--) {
   630             register int L;
   631 
   632             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   633             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   634                 + colortab[*cb + 2 * 256];
   635             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   636             cr += 4;
   637             cb += 4;
   638 
   639             L = *lum;
   640             lum += 2;
   641             value = (rgb_2_pix[L + cr_r] |
   642                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   643             *row++ = (value) & 0xFF;
   644             *row++ = (value >> 8) & 0xFF;
   645             *row++ = (value >> 16) & 0xFF;
   646 
   647             L = *lum;
   648             lum += 2;
   649             value = (rgb_2_pix[L + cr_r] |
   650                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   651             *row++ = (value) & 0xFF;
   652             *row++ = (value >> 8) & 0xFF;
   653             *row++ = (value >> 16) & 0xFF;
   654 
   655         }
   656         row += mod;
   657     }
   658 }
   659 
   660 static void
   661 Color32DitherYUY2Mod1X(int *colortab, Uint32 * rgb_2_pix,
   662                        unsigned char *lum, unsigned char *cr,
   663                        unsigned char *cb, unsigned char *out,
   664                        int rows, int cols, int mod)
   665 {
   666     unsigned int *row;
   667     int x, y;
   668     int cr_r;
   669     int crb_g;
   670     int cb_b;
   671     int cols_2 = cols / 2;
   672 
   673     row = (unsigned int *) out;
   674     y = rows;
   675     while (y--) {
   676         x = cols_2;
   677         while (x--) {
   678             register int L;
   679 
   680             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   681             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   682                 + colortab[*cb + 2 * 256];
   683             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   684             cr += 4;
   685             cb += 4;
   686 
   687             L = *lum;
   688             lum += 2;
   689             *row++ = (rgb_2_pix[L + cr_r] |
   690                       rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   691 
   692             L = *lum;
   693             lum += 2;
   694             *row++ = (rgb_2_pix[L + cr_r] |
   695                       rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   696 
   697 
   698         }
   699         row += mod;
   700     }
   701 }
   702 
   703 /*
   704  * In this function I make use of a nasty trick. The tables have the lower
   705  * 16 bits replicated in the upper 16. This means I can write ints and get
   706  * the horisontal doubling for free (almost).
   707  */
   708 static void
   709 Color16DitherYUY2Mod2X(int *colortab, Uint32 * rgb_2_pix,
   710                        unsigned char *lum, unsigned char *cr,
   711                        unsigned char *cb, unsigned char *out,
   712                        int rows, int cols, int mod)
   713 {
   714     unsigned int *row = (unsigned int *) out;
   715     const int next_row = cols + (mod / 2);
   716     int x, y;
   717     int cr_r;
   718     int crb_g;
   719     int cb_b;
   720     int cols_2 = cols / 2;
   721 
   722     y = rows;
   723     while (y--) {
   724         x = cols_2;
   725         while (x--) {
   726             register int L;
   727 
   728             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   729             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   730                 + colortab[*cb + 2 * 256];
   731             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   732             cr += 4;
   733             cb += 4;
   734 
   735             L = *lum;
   736             lum += 2;
   737             row[0] = row[next_row] = (rgb_2_pix[L + cr_r] |
   738                                       rgb_2_pix[L + crb_g] |
   739                                       rgb_2_pix[L + cb_b]);
   740             row++;
   741 
   742             L = *lum;
   743             lum += 2;
   744             row[0] = row[next_row] = (rgb_2_pix[L + cr_r] |
   745                                       rgb_2_pix[L + crb_g] |
   746                                       rgb_2_pix[L + cb_b]);
   747             row++;
   748 
   749         }
   750         row += next_row;
   751     }
   752 }
   753 
   754 static void
   755 Color24DitherYUY2Mod2X(int *colortab, Uint32 * rgb_2_pix,
   756                        unsigned char *lum, unsigned char *cr,
   757                        unsigned char *cb, unsigned char *out,
   758                        int rows, int cols, int mod)
   759 {
   760     unsigned int value;
   761     unsigned char *row = out;
   762     const int next_row = (cols * 2 + mod) * 3;
   763     int x, y;
   764     int cr_r;
   765     int crb_g;
   766     int cb_b;
   767     int cols_2 = cols / 2;
   768     y = rows;
   769     while (y--) {
   770         x = cols_2;
   771         while (x--) {
   772             register int L;
   773 
   774             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   775             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   776                 + colortab[*cb + 2 * 256];
   777             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   778             cr += 4;
   779             cb += 4;
   780 
   781             L = *lum;
   782             lum += 2;
   783             value = (rgb_2_pix[L + cr_r] |
   784                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   785             row[0 + 0] = row[3 + 0] = row[next_row + 0] =
   786                 row[next_row + 3 + 0] = (value) & 0xFF;
   787             row[0 + 1] = row[3 + 1] = row[next_row + 1] =
   788                 row[next_row + 3 + 1] = (value >> 8) & 0xFF;
   789             row[0 + 2] = row[3 + 2] = row[next_row + 2] =
   790                 row[next_row + 3 + 2] = (value >> 16) & 0xFF;
   791             row += 2 * 3;
   792 
   793             L = *lum;
   794             lum += 2;
   795             value = (rgb_2_pix[L + cr_r] |
   796                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   797             row[0 + 0] = row[3 + 0] = row[next_row + 0] =
   798                 row[next_row + 3 + 0] = (value) & 0xFF;
   799             row[0 + 1] = row[3 + 1] = row[next_row + 1] =
   800                 row[next_row + 3 + 1] = (value >> 8) & 0xFF;
   801             row[0 + 2] = row[3 + 2] = row[next_row + 2] =
   802                 row[next_row + 3 + 2] = (value >> 16) & 0xFF;
   803             row += 2 * 3;
   804 
   805         }
   806         row += next_row;
   807     }
   808 }
   809 
   810 static void
   811 Color32DitherYUY2Mod2X(int *colortab, Uint32 * rgb_2_pix,
   812                        unsigned char *lum, unsigned char *cr,
   813                        unsigned char *cb, unsigned char *out,
   814                        int rows, int cols, int mod)
   815 {
   816     unsigned int *row = (unsigned int *) out;
   817     const int next_row = cols * 2 + mod;
   818     int x, y;
   819     int cr_r;
   820     int crb_g;
   821     int cb_b;
   822     int cols_2 = cols / 2;
   823     mod += mod;
   824     y = rows;
   825     while (y--) {
   826         x = cols_2;
   827         while (x--) {
   828             register int L;
   829 
   830             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   831             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   832                 + colortab[*cb + 2 * 256];
   833             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   834             cr += 4;
   835             cb += 4;
   836 
   837             L = *lum;
   838             lum += 2;
   839             row[0] = row[1] = row[next_row] = row[next_row + 1] =
   840                 (rgb_2_pix[L + cr_r] |
   841                  rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   842             row += 2;
   843 
   844             L = *lum;
   845             lum += 2;
   846             row[0] = row[1] = row[next_row] = row[next_row + 1] =
   847                 (rgb_2_pix[L + cr_r] |
   848                  rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   849             row += 2;
   850 
   851 
   852         }
   853 
   854         row += next_row;
   855     }
   856 }
   857 
   858 /*
   859  * How many 1 bits are there in the Uint32.
   860  * Low performance, do not call often.
   861  */
   862 static int
   863 number_of_bits_set(Uint32 a)
   864 {
   865     if (!a)
   866         return 0;
   867     if (a & 1)
   868         return 1 + number_of_bits_set(a >> 1);
   869     return (number_of_bits_set(a >> 1));
   870 }
   871 
   872 /*
   873  * How many 0 bits are there at least significant end of Uint32.
   874  * Low performance, do not call often.
   875  */
   876 static int
   877 free_bits_at_bottom(Uint32 a)
   878 {
   879     /* assume char is 8 bits */
   880     if (!a)
   881         return sizeof(Uint32) * 8;
   882     if (((Sint32) a) & 1l)
   883         return 0;
   884     return 1 + free_bits_at_bottom(a >> 1);
   885 }
   886 
   887 static int
   888 SDL_SW_SetupYUVDisplay(SDL_SW_YUVTexture * swdata, Uint32 target_format)
   889 {
   890     Uint32 *r_2_pix_alloc;
   891     Uint32 *g_2_pix_alloc;
   892     Uint32 *b_2_pix_alloc;
   893     int i;
   894     int bpp;
   895     Uint32 Rmask, Gmask, Bmask, Amask;
   896 
   897     if (!SDL_PixelFormatEnumToMasks
   898         (target_format, &bpp, &Rmask, &Gmask, &Bmask, &Amask) || bpp < 15) {
   899         SDL_SetError("Unsupported YUV destination format");
   900         return -1;
   901     }
   902 
   903     swdata->target_format = target_format;
   904     r_2_pix_alloc = &swdata->rgb_2_pix[0 * 768];
   905     g_2_pix_alloc = &swdata->rgb_2_pix[1 * 768];
   906     b_2_pix_alloc = &swdata->rgb_2_pix[2 * 768];
   907 
   908     /* 
   909      * Set up entries 0-255 in rgb-to-pixel value tables.
   910      */
   911     for (i = 0; i < 256; ++i) {
   912         r_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(Rmask));
   913         r_2_pix_alloc[i + 256] <<= free_bits_at_bottom(Rmask);
   914         r_2_pix_alloc[i + 256] |= Amask;
   915         g_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(Gmask));
   916         g_2_pix_alloc[i + 256] <<= free_bits_at_bottom(Gmask);
   917         g_2_pix_alloc[i + 256] |= Amask;
   918         b_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(Bmask));
   919         b_2_pix_alloc[i + 256] <<= free_bits_at_bottom(Bmask);
   920         b_2_pix_alloc[i + 256] |= Amask;
   921     }
   922 
   923     /*
   924      * If we have 16-bit output depth, then we double the value
   925      * in the top word. This means that we can write out both
   926      * pixels in the pixel doubling mode with one op. It is 
   927      * harmless in the normal case as storing a 32-bit value
   928      * through a short pointer will lose the top bits anyway.
   929      */
   930     if (SDL_BYTESPERPIXEL(target_format) == 2) {
   931         for (i = 0; i < 256; ++i) {
   932             r_2_pix_alloc[i + 256] |= (r_2_pix_alloc[i + 256]) << 16;
   933             g_2_pix_alloc[i + 256] |= (g_2_pix_alloc[i + 256]) << 16;
   934             b_2_pix_alloc[i + 256] |= (b_2_pix_alloc[i + 256]) << 16;
   935         }
   936     }
   937 
   938     /*
   939      * Spread out the values we have to the rest of the array so that
   940      * we do not need to check for overflow.
   941      */
   942     for (i = 0; i < 256; ++i) {
   943         r_2_pix_alloc[i] = r_2_pix_alloc[256];
   944         r_2_pix_alloc[i + 512] = r_2_pix_alloc[511];
   945         g_2_pix_alloc[i] = g_2_pix_alloc[256];
   946         g_2_pix_alloc[i + 512] = g_2_pix_alloc[511];
   947         b_2_pix_alloc[i] = b_2_pix_alloc[256];
   948         b_2_pix_alloc[i + 512] = b_2_pix_alloc[511];
   949     }
   950 
   951     /* You have chosen wisely... */
   952     switch (swdata->format) {
   953     case SDL_PIXELFORMAT_YV12:
   954     case SDL_PIXELFORMAT_IYUV:
   955         if (SDL_BYTESPERPIXEL(target_format) == 2) {
   956 #if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES
   957             /* inline assembly functions */
   958             if (SDL_HasMMX() && (Rmask == 0xF800) &&
   959                 (Gmask == 0x07E0) && (Bmask == 0x001F)
   960                 && (swdata->w & 15) == 0) {
   961 /*printf("Using MMX 16-bit 565 dither\n");*/
   962                 swdata->Display1X = Color565DitherYV12MMX1X;
   963             } else {
   964 /*printf("Using C 16-bit dither\n");*/
   965                 swdata->Display1X = Color16DitherYV12Mod1X;
   966             }
   967 #else
   968             swdata->Display1X = Color16DitherYV12Mod1X;
   969 #endif
   970             swdata->Display2X = Color16DitherYV12Mod2X;
   971         }
   972         if (SDL_BYTESPERPIXEL(target_format) == 3) {
   973             swdata->Display1X = Color24DitherYV12Mod1X;
   974             swdata->Display2X = Color24DitherYV12Mod2X;
   975         }
   976         if (SDL_BYTESPERPIXEL(target_format) == 4) {
   977 #if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES
   978             /* inline assembly functions */
   979             if (SDL_HasMMX() && (Rmask == 0x00FF0000) &&
   980                 (Gmask == 0x0000FF00) &&
   981                 (Bmask == 0x000000FF) && (swdata->w & 15) == 0) {
   982 /*printf("Using MMX 32-bit dither\n");*/
   983                 swdata->Display1X = ColorRGBDitherYV12MMX1X;
   984             } else {
   985 /*printf("Using C 32-bit dither\n");*/
   986                 swdata->Display1X = Color32DitherYV12Mod1X;
   987             }
   988 #else
   989             swdata->Display1X = Color32DitherYV12Mod1X;
   990 #endif
   991             swdata->Display2X = Color32DitherYV12Mod2X;
   992         }
   993         break;
   994     case SDL_PIXELFORMAT_YUY2:
   995     case SDL_PIXELFORMAT_UYVY:
   996     case SDL_PIXELFORMAT_YVYU:
   997         if (SDL_BYTESPERPIXEL(target_format) == 2) {
   998             swdata->Display1X = Color16DitherYUY2Mod1X;
   999             swdata->Display2X = Color16DitherYUY2Mod2X;
  1000         }
  1001         if (SDL_BYTESPERPIXEL(target_format) == 3) {
  1002             swdata->Display1X = Color24DitherYUY2Mod1X;
  1003             swdata->Display2X = Color24DitherYUY2Mod2X;
  1004         }
  1005         if (SDL_BYTESPERPIXEL(target_format) == 4) {
  1006             swdata->Display1X = Color32DitherYUY2Mod1X;
  1007             swdata->Display2X = Color32DitherYUY2Mod2X;
  1008         }
  1009         break;
  1010     default:
  1011         /* We should never get here (caught above) */
  1012         break;
  1013     }
  1014 
  1015     if (swdata->display) {
  1016         SDL_FreeSurface(swdata->display);
  1017         swdata->display = NULL;
  1018     }
  1019     return 0;
  1020 }
  1021 
  1022 SDL_SW_YUVTexture *
  1023 SDL_SW_CreateYUVTexture(Uint32 format, int w, int h)
  1024 {
  1025     SDL_SW_YUVTexture *swdata;
  1026     int *Cr_r_tab;
  1027     int *Cr_g_tab;
  1028     int *Cb_g_tab;
  1029     int *Cb_b_tab;
  1030     int i;
  1031     int CR, CB;
  1032 
  1033     swdata = (SDL_SW_YUVTexture *) SDL_calloc(1, sizeof(*swdata));
  1034     if (!swdata) {
  1035         SDL_OutOfMemory();
  1036         return NULL;
  1037     }
  1038 
  1039     switch (format) {
  1040     case SDL_PIXELFORMAT_YV12:
  1041     case SDL_PIXELFORMAT_IYUV:
  1042     case SDL_PIXELFORMAT_YUY2:
  1043     case SDL_PIXELFORMAT_UYVY:
  1044     case SDL_PIXELFORMAT_YVYU:
  1045         break;
  1046     default:
  1047         SDL_SW_DestroyYUVTexture(swdata);
  1048         SDL_SetError("Unsupported YUV format");
  1049         return NULL;
  1050     }
  1051 
  1052     swdata->format = format;
  1053     swdata->target_format = SDL_PIXELFORMAT_UNKNOWN;
  1054     swdata->w = w;
  1055     swdata->h = h;
  1056     swdata->pixels = (Uint8 *) SDL_malloc(w * h * 2);
  1057     swdata->colortab = (int *) SDL_malloc(4 * 256 * sizeof(int));
  1058     swdata->rgb_2_pix = (Uint32 *) SDL_malloc(3 * 768 * sizeof(Uint32));
  1059     if (!swdata->pixels || !swdata->colortab || !swdata->rgb_2_pix) {
  1060         SDL_OutOfMemory();
  1061         SDL_SW_DestroyYUVTexture(swdata);
  1062         return NULL;
  1063     }
  1064 
  1065     /* Generate the tables for the display surface */
  1066     Cr_r_tab = &swdata->colortab[0 * 256];
  1067     Cr_g_tab = &swdata->colortab[1 * 256];
  1068     Cb_g_tab = &swdata->colortab[2 * 256];
  1069     Cb_b_tab = &swdata->colortab[3 * 256];
  1070     for (i = 0; i < 256; i++) {
  1071         /* Gamma correction (luminescence table) and chroma correction
  1072            would be done here.  See the Berkeley mpeg_play sources.
  1073          */
  1074         CB = CR = (i - 128);
  1075         Cr_r_tab[i] = (int) ((0.419 / 0.299) * CR);
  1076         Cr_g_tab[i] = (int) (-(0.299 / 0.419) * CR);
  1077         Cb_g_tab[i] = (int) (-(0.114 / 0.331) * CB);
  1078         Cb_b_tab[i] = (int) ((0.587 / 0.331) * CB);
  1079     }
  1080 
  1081     /* Find the pitch and offset values for the overlay */
  1082     switch (format) {
  1083     case SDL_PIXELFORMAT_YV12:
  1084     case SDL_PIXELFORMAT_IYUV:
  1085         swdata->pitches[0] = w;
  1086         swdata->pitches[1] = swdata->pitches[0] / 2;
  1087         swdata->pitches[2] = swdata->pitches[0] / 2;
  1088         swdata->planes[0] = swdata->pixels;
  1089         swdata->planes[1] = swdata->planes[0] + swdata->pitches[0] * h;
  1090         swdata->planes[2] = swdata->planes[1] + swdata->pitches[1] * h / 2;
  1091         break;
  1092     case SDL_PIXELFORMAT_YUY2:
  1093     case SDL_PIXELFORMAT_UYVY:
  1094     case SDL_PIXELFORMAT_YVYU:
  1095         swdata->pitches[0] = w * 2;
  1096         swdata->planes[0] = swdata->pixels;
  1097         break;
  1098     default:
  1099         /* We should never get here (caught above) */
  1100         break;
  1101     }
  1102 
  1103     /* We're all done.. */
  1104     return (swdata);
  1105 }
  1106 
  1107 int
  1108 SDL_SW_QueryYUVTexturePixels(SDL_SW_YUVTexture * swdata, void **pixels,
  1109                              int *pitch)
  1110 {
  1111     *pixels = swdata->planes[0];
  1112     *pitch = swdata->pitches[0];
  1113     return 0;
  1114 }
  1115 
  1116 int
  1117 SDL_SW_UpdateYUVTexture(SDL_SW_YUVTexture * swdata, const SDL_Rect * rect,
  1118                         const void *pixels, int pitch)
  1119 {
  1120     switch (swdata->format) {
  1121     case SDL_PIXELFORMAT_YV12:
  1122     case SDL_PIXELFORMAT_IYUV:
  1123         if (rect->x == 0 && rect->y == 0 &&
  1124             rect->w == swdata->w && rect->h == swdata->h) {
  1125                 SDL_memcpy(swdata->pixels, pixels,
  1126                            (swdata->h * swdata->w) + (swdata->h * swdata->w) / 2);
  1127         } else {
  1128             Uint8 *src, *dst;
  1129             int row;
  1130             size_t length;
  1131 
  1132             /* Copy the Y plane */
  1133             src = (Uint8 *) pixels;
  1134             dst = swdata->pixels + rect->y * swdata->w + rect->x;
  1135             length = rect->w;
  1136             for (row = 0; row < rect->h; ++row) {
  1137                 SDL_memcpy(dst, src, length);
  1138                 src += pitch;
  1139                 dst += swdata->w;
  1140             }
  1141 
  1142             /* Copy the next plane */
  1143             src = (Uint8 *) pixels + rect->h * pitch;
  1144             dst = swdata->pixels + swdata->h * swdata->w;
  1145             dst += rect->y/2 * swdata->w/2 + rect->x/2;
  1146             length = rect->w / 2;
  1147             for (row = 0; row < rect->h/2; ++row) {
  1148                 SDL_memcpy(dst, src, length);
  1149                 src += pitch/2;
  1150                 dst += swdata->w/2;
  1151             }
  1152 
  1153             /* Copy the next plane */
  1154             src = (Uint8 *) pixels + rect->h * pitch + (rect->h * pitch) / 4;
  1155             dst = swdata->pixels + swdata->h * swdata->w +
  1156                   (swdata->h * swdata->w) / 4;
  1157             dst += rect->y/2 * swdata->w/2 + rect->x/2;
  1158             length = rect->w / 2;
  1159             for (row = 0; row < rect->h/2; ++row) {
  1160                 SDL_memcpy(dst, src, length);
  1161                 src += pitch/2;
  1162                 dst += swdata->w/2;
  1163             }
  1164         }
  1165         break;
  1166     case SDL_PIXELFORMAT_YUY2:
  1167     case SDL_PIXELFORMAT_UYVY:
  1168     case SDL_PIXELFORMAT_YVYU:
  1169         {
  1170             Uint8 *src, *dst;
  1171             int row;
  1172             size_t length;
  1173 
  1174             src = (Uint8 *) pixels;
  1175             dst =
  1176                 swdata->planes[0] + rect->y * swdata->pitches[0] +
  1177                 rect->x * 2;
  1178             length = rect->w * 2;
  1179             for (row = 0; row < rect->h; ++row) {
  1180                 SDL_memcpy(dst, src, length);
  1181                 src += pitch;
  1182                 dst += swdata->pitches[0];
  1183             }
  1184         }
  1185         break;
  1186     }
  1187     return 0;
  1188 }
  1189 
  1190 int
  1191 SDL_SW_LockYUVTexture(SDL_SW_YUVTexture * swdata, const SDL_Rect * rect,
  1192                       void **pixels, int *pitch)
  1193 {
  1194     switch (swdata->format) {
  1195     case SDL_PIXELFORMAT_YV12:
  1196     case SDL_PIXELFORMAT_IYUV:
  1197         if (rect
  1198             && (rect->x != 0 || rect->y != 0 || rect->w != swdata->w
  1199                 || rect->h != swdata->h)) {
  1200             SDL_SetError
  1201                 ("YV12 and IYUV textures only support full surface locks");
  1202             return -1;
  1203         }
  1204         break;
  1205     }
  1206 
  1207     *pixels = swdata->planes[0] + rect->y * swdata->pitches[0] + rect->x * 2;
  1208     *pitch = swdata->pitches[0];
  1209     return 0;
  1210 }
  1211 
  1212 void
  1213 SDL_SW_UnlockYUVTexture(SDL_SW_YUVTexture * swdata)
  1214 {
  1215 }
  1216 
  1217 int
  1218 SDL_SW_CopyYUVToRGB(SDL_SW_YUVTexture * swdata, const SDL_Rect * srcrect,
  1219                     Uint32 target_format, int w, int h, void *pixels,
  1220                     int pitch)
  1221 {
  1222     int stretch;
  1223     int scale_2x;
  1224     Uint8 *lum, *Cr, *Cb;
  1225     int mod;
  1226 
  1227     /* Make sure we're set up to display in the desired format */
  1228     if (target_format != swdata->target_format) {
  1229         if (SDL_SW_SetupYUVDisplay(swdata, target_format) < 0) {
  1230             return -1;
  1231         }
  1232     }
  1233 
  1234     stretch = 0;
  1235     scale_2x = 0;
  1236     if (srcrect->x || srcrect->y || srcrect->w < swdata->w
  1237         || srcrect->h < swdata->h) {
  1238         /* The source rectangle has been clipped.
  1239            Using a scratch surface is easier than adding clipped
  1240            source support to all the blitters, plus that would
  1241            slow them down in the general unclipped case.
  1242          */
  1243         stretch = 1;
  1244     } else if ((srcrect->w != w) || (srcrect->h != h)) {
  1245         if ((w == 2 * srcrect->w) && (h == 2 * srcrect->h)) {
  1246             scale_2x = 1;
  1247         } else {
  1248             stretch = 1;
  1249         }
  1250     }
  1251     if (stretch) {
  1252         int bpp;
  1253         Uint32 Rmask, Gmask, Bmask, Amask;
  1254 
  1255         if (swdata->display) {
  1256             swdata->display->w = w;
  1257             swdata->display->h = h;
  1258             swdata->display->pixels = pixels;
  1259             swdata->display->pitch = pitch;
  1260         } else {
  1261             /* This must have succeeded in SDL_SW_SetupYUVDisplay() earlier */
  1262             SDL_PixelFormatEnumToMasks(target_format, &bpp, &Rmask, &Gmask,
  1263                                        &Bmask, &Amask);
  1264             swdata->display =
  1265                 SDL_CreateRGBSurfaceFrom(pixels, w, h, bpp, pitch, Rmask,
  1266                                          Gmask, Bmask, Amask);
  1267             if (!swdata->display) {
  1268                 return (-1);
  1269             }
  1270         }
  1271         if (!swdata->stretch) {
  1272             /* This must have succeeded in SDL_SW_SetupYUVDisplay() earlier */
  1273             SDL_PixelFormatEnumToMasks(target_format, &bpp, &Rmask, &Gmask,
  1274                                        &Bmask, &Amask);
  1275             swdata->stretch =
  1276                 SDL_CreateRGBSurface(0, swdata->w, swdata->h, bpp, Rmask,
  1277                                      Gmask, Bmask, Amask);
  1278             if (!swdata->stretch) {
  1279                 return (-1);
  1280             }
  1281         }
  1282         pixels = swdata->stretch->pixels;
  1283         pitch = swdata->stretch->pitch;
  1284     }
  1285     switch (swdata->format) {
  1286     case SDL_PIXELFORMAT_YV12:
  1287         lum = swdata->planes[0];
  1288         Cr = swdata->planes[1];
  1289         Cb = swdata->planes[2];
  1290         break;
  1291     case SDL_PIXELFORMAT_IYUV:
  1292         lum = swdata->planes[0];
  1293         Cr = swdata->planes[2];
  1294         Cb = swdata->planes[1];
  1295         break;
  1296     case SDL_PIXELFORMAT_YUY2:
  1297         lum = swdata->planes[0];
  1298         Cr = lum + 3;
  1299         Cb = lum + 1;
  1300         break;
  1301     case SDL_PIXELFORMAT_UYVY:
  1302         lum = swdata->planes[0] + 1;
  1303         Cr = lum + 1;
  1304         Cb = lum - 1;
  1305         break;
  1306     case SDL_PIXELFORMAT_YVYU:
  1307         lum = swdata->planes[0];
  1308         Cr = lum + 1;
  1309         Cb = lum + 3;
  1310         break;
  1311     default:
  1312         SDL_SetError("Unsupported YUV format in copy");
  1313         return (-1);
  1314     }
  1315     mod = (pitch / SDL_BYTESPERPIXEL(target_format));
  1316 
  1317     if (scale_2x) {
  1318         mod -= (swdata->w * 2);
  1319         swdata->Display2X(swdata->colortab, swdata->rgb_2_pix,
  1320                           lum, Cr, Cb, pixels, swdata->h, swdata->w, mod);
  1321     } else {
  1322         mod -= swdata->w;
  1323         swdata->Display1X(swdata->colortab, swdata->rgb_2_pix,
  1324                           lum, Cr, Cb, pixels, swdata->h, swdata->w, mod);
  1325     }
  1326     if (stretch) {
  1327         SDL_Rect rect = *srcrect;
  1328         SDL_SoftStretch(swdata->stretch, &rect, swdata->display, NULL);
  1329     }
  1330     return 0;
  1331 }
  1332 
  1333 void
  1334 SDL_SW_DestroyYUVTexture(SDL_SW_YUVTexture * swdata)
  1335 {
  1336     if (swdata) {
  1337         if (swdata->pixels) {
  1338             SDL_free(swdata->pixels);
  1339         }
  1340         if (swdata->colortab) {
  1341             SDL_free(swdata->colortab);
  1342         }
  1343         if (swdata->rgb_2_pix) {
  1344             SDL_free(swdata->rgb_2_pix);
  1345         }
  1346         if (swdata->stretch) {
  1347             SDL_FreeSurface(swdata->stretch);
  1348         }
  1349         if (swdata->display) {
  1350             SDL_FreeSurface(swdata->display);
  1351         }
  1352         SDL_free(swdata);
  1353     }
  1354 }
  1355 
  1356 /* vi: set ts=4 sw=4 expandtab: */