src/render/SDL_yuv_sw.c
author Sam Lantinga <slouken@libsdl.org>
Sat, 15 Jun 2013 02:46:32 -0700
changeset 7332 b53acf6ab70b
parent 7191 75360622e65f
child 7487 a4e43eb67e79
permissions -rw-r--r--
Fixed some Visual Studio analyze warnings
     1 /*
     2   Simple DirectMedia Layer
     3   Copyright (C) 1997-2013 Sam Lantinga <slouken@libsdl.org>
     4 
     5   This software is provided 'as-is', without any express or implied
     6   warranty.  In no event will the authors be held liable for any damages
     7   arising from the use of this software.
     8 
     9   Permission is granted to anyone to use this software for any purpose,
    10   including commercial applications, and to alter it and redistribute it
    11   freely, subject to the following restrictions:
    12 
    13   1. The origin of this software must not be misrepresented; you must not
    14      claim that you wrote the original software. If you use this software
    15      in a product, an acknowledgment in the product documentation would be
    16      appreciated but is not required.
    17   2. Altered source versions must be plainly marked as such, and must not be
    18      misrepresented as being the original software.
    19   3. This notice may not be removed or altered from any source distribution.
    20 */
    21 #include "SDL_config.h"
    22 
    23 /* This is the software implementation of the YUV texture support */
    24 
    25 /* This code was derived from code carrying the following copyright notices:
    26 
    27  * Copyright (c) 1995 The Regents of the University of California.
    28  * All rights reserved.
    29  *
    30  * Permission to use, copy, modify, and distribute this software and its
    31  * documentation for any purpose, without fee, and without written agreement is
    32  * hereby granted, provided that the above copyright notice and the following
    33  * two paragraphs appear in all copies of this software.
    34  *
    35  * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR
    36  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
    37  * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF
    38  * CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    39  *
    40  * THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES,
    41  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
    42  * AND FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
    43  * ON AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATION TO
    44  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
    45 
    46  * Copyright (c) 1995 Erik Corry
    47  * All rights reserved.
    48  *
    49  * Permission to use, copy, modify, and distribute this software and its
    50  * documentation for any purpose, without fee, and without written agreement is
    51  * hereby granted, provided that the above copyright notice and the following
    52  * two paragraphs appear in all copies of this software.
    53  *
    54  * IN NO EVENT SHALL ERIK CORRY BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
    55  * SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OF
    56  * THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF ERIK CORRY HAS BEEN ADVISED
    57  * OF THE POSSIBILITY OF SUCH DAMAGE.
    58  *
    59  * ERIK CORRY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
    60  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
    61  * PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
    62  * BASIS, AND ERIK CORRY HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT,
    63  * UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
    64 
    65  * Portions of this software Copyright (c) 1995 Brown University.
    66  * All rights reserved.
    67  *
    68  * Permission to use, copy, modify, and distribute this software and its
    69  * documentation for any purpose, without fee, and without written agreement
    70  * is hereby granted, provided that the above copyright notice and the
    71  * following two paragraphs appear in all copies of this software.
    72  *
    73  * IN NO EVENT SHALL BROWN UNIVERSITY BE LIABLE TO ANY PARTY FOR
    74  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
    75  * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF BROWN
    76  * UNIVERSITY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    77  *
    78  * BROWN UNIVERSITY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
    79  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
    80  * PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
    81  * BASIS, AND BROWN UNIVERSITY HAS NO OBLIGATION TO PROVIDE MAINTENANCE,
    82  * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
    83  */
    84 
    85 #include "SDL_video.h"
    86 #include "SDL_cpuinfo.h"
    87 #include "SDL_yuv_sw_c.h"
    88 
    89 
    90 /* The colorspace conversion functions */
    91 
    92 #if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES
    93 extern void Color565DitherYV12MMX1X(int *colortab, Uint32 * rgb_2_pix,
    94                                     unsigned char *lum, unsigned char *cr,
    95                                     unsigned char *cb, unsigned char *out,
    96                                     int rows, int cols, int mod);
    97 extern void ColorRGBDitherYV12MMX1X(int *colortab, Uint32 * rgb_2_pix,
    98                                     unsigned char *lum, unsigned char *cr,
    99                                     unsigned char *cb, unsigned char *out,
   100                                     int rows, int cols, int mod);
   101 #endif
   102 
   103 static void
   104 Color16DitherYV12Mod1X(int *colortab, Uint32 * rgb_2_pix,
   105                        unsigned char *lum, unsigned char *cr,
   106                        unsigned char *cb, unsigned char *out,
   107                        int rows, int cols, int mod)
   108 {
   109     unsigned short *row1;
   110     unsigned short *row2;
   111     unsigned char *lum2;
   112     int x, y;
   113     int cr_r;
   114     int crb_g;
   115     int cb_b;
   116     int cols_2 = cols / 2;
   117 
   118     row1 = (unsigned short *) out;
   119     row2 = row1 + cols + mod;
   120     lum2 = lum + cols;
   121 
   122     mod += cols + mod;
   123 
   124     y = rows / 2;
   125     while (y--) {
   126         x = cols_2;
   127         while (x--) {
   128             register int L;
   129 
   130             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   131             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   132                 + colortab[*cb + 2 * 256];
   133             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   134             ++cr;
   135             ++cb;
   136 
   137             L = *lum++;
   138             *row1++ = (unsigned short) (rgb_2_pix[L + cr_r] |
   139                                         rgb_2_pix[L + crb_g] |
   140                                         rgb_2_pix[L + cb_b]);
   141 
   142             L = *lum++;
   143             *row1++ = (unsigned short) (rgb_2_pix[L + cr_r] |
   144                                         rgb_2_pix[L + crb_g] |
   145                                         rgb_2_pix[L + cb_b]);
   146 
   147 
   148             /* Now, do second row.  */
   149 
   150             L = *lum2++;
   151             *row2++ = (unsigned short) (rgb_2_pix[L + cr_r] |
   152                                         rgb_2_pix[L + crb_g] |
   153                                         rgb_2_pix[L + cb_b]);
   154 
   155             L = *lum2++;
   156             *row2++ = (unsigned short) (rgb_2_pix[L + cr_r] |
   157                                         rgb_2_pix[L + crb_g] |
   158                                         rgb_2_pix[L + cb_b]);
   159         }
   160 
   161         /*
   162          * These values are at the start of the next line, (due
   163          * to the ++'s above),but they need to be at the start
   164          * of the line after that.
   165          */
   166         lum += cols;
   167         lum2 += cols;
   168         row1 += mod;
   169         row2 += mod;
   170     }
   171 }
   172 
   173 static void
   174 Color24DitherYV12Mod1X(int *colortab, Uint32 * rgb_2_pix,
   175                        unsigned char *lum, unsigned char *cr,
   176                        unsigned char *cb, unsigned char *out,
   177                        int rows, int cols, int mod)
   178 {
   179     unsigned int value;
   180     unsigned char *row1;
   181     unsigned char *row2;
   182     unsigned char *lum2;
   183     int x, y;
   184     int cr_r;
   185     int crb_g;
   186     int cb_b;
   187     int cols_2 = cols / 2;
   188 
   189     row1 = out;
   190     row2 = row1 + cols * 3 + mod * 3;
   191     lum2 = lum + cols;
   192 
   193     mod += cols + mod;
   194     mod *= 3;
   195 
   196     y = rows / 2;
   197     while (y--) {
   198         x = cols_2;
   199         while (x--) {
   200             register int L;
   201 
   202             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   203             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   204                 + colortab[*cb + 2 * 256];
   205             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   206             ++cr;
   207             ++cb;
   208 
   209             L = *lum++;
   210             value = (rgb_2_pix[L + cr_r] |
   211                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   212             *row1++ = (value) & 0xFF;
   213             *row1++ = (value >> 8) & 0xFF;
   214             *row1++ = (value >> 16) & 0xFF;
   215 
   216             L = *lum++;
   217             value = (rgb_2_pix[L + cr_r] |
   218                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   219             *row1++ = (value) & 0xFF;
   220             *row1++ = (value >> 8) & 0xFF;
   221             *row1++ = (value >> 16) & 0xFF;
   222 
   223 
   224             /* Now, do second row.  */
   225 
   226             L = *lum2++;
   227             value = (rgb_2_pix[L + cr_r] |
   228                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   229             *row2++ = (value) & 0xFF;
   230             *row2++ = (value >> 8) & 0xFF;
   231             *row2++ = (value >> 16) & 0xFF;
   232 
   233             L = *lum2++;
   234             value = (rgb_2_pix[L + cr_r] |
   235                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   236             *row2++ = (value) & 0xFF;
   237             *row2++ = (value >> 8) & 0xFF;
   238             *row2++ = (value >> 16) & 0xFF;
   239         }
   240 
   241         /*
   242          * These values are at the start of the next line, (due
   243          * to the ++'s above),but they need to be at the start
   244          * of the line after that.
   245          */
   246         lum += cols;
   247         lum2 += cols;
   248         row1 += mod;
   249         row2 += mod;
   250     }
   251 }
   252 
   253 static void
   254 Color32DitherYV12Mod1X(int *colortab, Uint32 * rgb_2_pix,
   255                        unsigned char *lum, unsigned char *cr,
   256                        unsigned char *cb, unsigned char *out,
   257                        int rows, int cols, int mod)
   258 {
   259     unsigned int *row1;
   260     unsigned int *row2;
   261     unsigned char *lum2;
   262     int x, y;
   263     int cr_r;
   264     int crb_g;
   265     int cb_b;
   266     int cols_2 = cols / 2;
   267 
   268     row1 = (unsigned int *) out;
   269     row2 = row1 + cols + mod;
   270     lum2 = lum + cols;
   271 
   272     mod += cols + mod;
   273 
   274     y = rows / 2;
   275     while (y--) {
   276         x = cols_2;
   277         while (x--) {
   278             register int L;
   279 
   280             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   281             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   282                 + colortab[*cb + 2 * 256];
   283             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   284             ++cr;
   285             ++cb;
   286 
   287             L = *lum++;
   288             *row1++ = (rgb_2_pix[L + cr_r] |
   289                        rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   290 
   291             L = *lum++;
   292             *row1++ = (rgb_2_pix[L + cr_r] |
   293                        rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   294 
   295 
   296             /* Now, do second row.  */
   297 
   298             L = *lum2++;
   299             *row2++ = (rgb_2_pix[L + cr_r] |
   300                        rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   301 
   302             L = *lum2++;
   303             *row2++ = (rgb_2_pix[L + cr_r] |
   304                        rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   305         }
   306 
   307         /*
   308          * These values are at the start of the next line, (due
   309          * to the ++'s above),but they need to be at the start
   310          * of the line after that.
   311          */
   312         lum += cols;
   313         lum2 += cols;
   314         row1 += mod;
   315         row2 += mod;
   316     }
   317 }
   318 
   319 /*
   320  * In this function I make use of a nasty trick. The tables have the lower
   321  * 16 bits replicated in the upper 16. This means I can write ints and get
   322  * the horisontal doubling for free (almost).
   323  */
   324 static void
   325 Color16DitherYV12Mod2X(int *colortab, Uint32 * rgb_2_pix,
   326                        unsigned char *lum, unsigned char *cr,
   327                        unsigned char *cb, unsigned char *out,
   328                        int rows, int cols, int mod)
   329 {
   330     unsigned int *row1 = (unsigned int *) out;
   331     const int next_row = cols + (mod / 2);
   332     unsigned int *row2 = row1 + 2 * next_row;
   333     unsigned char *lum2;
   334     int x, y;
   335     int cr_r;
   336     int crb_g;
   337     int cb_b;
   338     int cols_2 = cols / 2;
   339 
   340     lum2 = lum + cols;
   341 
   342     mod = (next_row * 3) + (mod / 2);
   343 
   344     y = rows / 2;
   345     while (y--) {
   346         x = cols_2;
   347         while (x--) {
   348             register int L;
   349 
   350             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   351             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   352                 + colortab[*cb + 2 * 256];
   353             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   354             ++cr;
   355             ++cb;
   356 
   357             L = *lum++;
   358             row1[0] = row1[next_row] = (rgb_2_pix[L + cr_r] |
   359                                         rgb_2_pix[L + crb_g] |
   360                                         rgb_2_pix[L + cb_b]);
   361             row1++;
   362 
   363             L = *lum++;
   364             row1[0] = row1[next_row] = (rgb_2_pix[L + cr_r] |
   365                                         rgb_2_pix[L + crb_g] |
   366                                         rgb_2_pix[L + cb_b]);
   367             row1++;
   368 
   369 
   370             /* Now, do second row. */
   371 
   372             L = *lum2++;
   373             row2[0] = row2[next_row] = (rgb_2_pix[L + cr_r] |
   374                                         rgb_2_pix[L + crb_g] |
   375                                         rgb_2_pix[L + cb_b]);
   376             row2++;
   377 
   378             L = *lum2++;
   379             row2[0] = row2[next_row] = (rgb_2_pix[L + cr_r] |
   380                                         rgb_2_pix[L + crb_g] |
   381                                         rgb_2_pix[L + cb_b]);
   382             row2++;
   383         }
   384 
   385         /*
   386          * These values are at the start of the next line, (due
   387          * to the ++'s above),but they need to be at the start
   388          * of the line after that.
   389          */
   390         lum += cols;
   391         lum2 += cols;
   392         row1 += mod;
   393         row2 += mod;
   394     }
   395 }
   396 
   397 static void
   398 Color24DitherYV12Mod2X(int *colortab, Uint32 * rgb_2_pix,
   399                        unsigned char *lum, unsigned char *cr,
   400                        unsigned char *cb, unsigned char *out,
   401                        int rows, int cols, int mod)
   402 {
   403     unsigned int value;
   404     unsigned char *row1 = out;
   405     const int next_row = (cols * 2 + mod) * 3;
   406     unsigned char *row2 = row1 + 2 * next_row;
   407     unsigned char *lum2;
   408     int x, y;
   409     int cr_r;
   410     int crb_g;
   411     int cb_b;
   412     int cols_2 = cols / 2;
   413 
   414     lum2 = lum + cols;
   415 
   416     mod = next_row * 3 + mod * 3;
   417 
   418     y = rows / 2;
   419     while (y--) {
   420         x = cols_2;
   421         while (x--) {
   422             register int L;
   423 
   424             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   425             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   426                 + colortab[*cb + 2 * 256];
   427             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   428             ++cr;
   429             ++cb;
   430 
   431             L = *lum++;
   432             value = (rgb_2_pix[L + cr_r] |
   433                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   434             row1[0 + 0] = row1[3 + 0] = row1[next_row + 0] =
   435                 row1[next_row + 3 + 0] = (value) & 0xFF;
   436             row1[0 + 1] = row1[3 + 1] = row1[next_row + 1] =
   437                 row1[next_row + 3 + 1] = (value >> 8) & 0xFF;
   438             row1[0 + 2] = row1[3 + 2] = row1[next_row + 2] =
   439                 row1[next_row + 3 + 2] = (value >> 16) & 0xFF;
   440             row1 += 2 * 3;
   441 
   442             L = *lum++;
   443             value = (rgb_2_pix[L + cr_r] |
   444                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   445             row1[0 + 0] = row1[3 + 0] = row1[next_row + 0] =
   446                 row1[next_row + 3 + 0] = (value) & 0xFF;
   447             row1[0 + 1] = row1[3 + 1] = row1[next_row + 1] =
   448                 row1[next_row + 3 + 1] = (value >> 8) & 0xFF;
   449             row1[0 + 2] = row1[3 + 2] = row1[next_row + 2] =
   450                 row1[next_row + 3 + 2] = (value >> 16) & 0xFF;
   451             row1 += 2 * 3;
   452 
   453 
   454             /* Now, do second row. */
   455 
   456             L = *lum2++;
   457             value = (rgb_2_pix[L + cr_r] |
   458                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   459             row2[0 + 0] = row2[3 + 0] = row2[next_row + 0] =
   460                 row2[next_row + 3 + 0] = (value) & 0xFF;
   461             row2[0 + 1] = row2[3 + 1] = row2[next_row + 1] =
   462                 row2[next_row + 3 + 1] = (value >> 8) & 0xFF;
   463             row2[0 + 2] = row2[3 + 2] = row2[next_row + 2] =
   464                 row2[next_row + 3 + 2] = (value >> 16) & 0xFF;
   465             row2 += 2 * 3;
   466 
   467             L = *lum2++;
   468             value = (rgb_2_pix[L + cr_r] |
   469                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   470             row2[0 + 0] = row2[3 + 0] = row2[next_row + 0] =
   471                 row2[next_row + 3 + 0] = (value) & 0xFF;
   472             row2[0 + 1] = row2[3 + 1] = row2[next_row + 1] =
   473                 row2[next_row + 3 + 1] = (value >> 8) & 0xFF;
   474             row2[0 + 2] = row2[3 + 2] = row2[next_row + 2] =
   475                 row2[next_row + 3 + 2] = (value >> 16) & 0xFF;
   476             row2 += 2 * 3;
   477         }
   478 
   479         /*
   480          * These values are at the start of the next line, (due
   481          * to the ++'s above),but they need to be at the start
   482          * of the line after that.
   483          */
   484         lum += cols;
   485         lum2 += cols;
   486         row1 += mod;
   487         row2 += mod;
   488     }
   489 }
   490 
   491 static void
   492 Color32DitherYV12Mod2X(int *colortab, Uint32 * rgb_2_pix,
   493                        unsigned char *lum, unsigned char *cr,
   494                        unsigned char *cb, unsigned char *out,
   495                        int rows, int cols, int mod)
   496 {
   497     unsigned int *row1 = (unsigned int *) out;
   498     const int next_row = cols * 2 + mod;
   499     unsigned int *row2 = row1 + 2 * next_row;
   500     unsigned char *lum2;
   501     int x, y;
   502     int cr_r;
   503     int crb_g;
   504     int cb_b;
   505     int cols_2 = cols / 2;
   506 
   507     lum2 = lum + cols;
   508 
   509     mod = (next_row * 3) + mod;
   510 
   511     y = rows / 2;
   512     while (y--) {
   513         x = cols_2;
   514         while (x--) {
   515             register int L;
   516 
   517             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   518             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   519                 + colortab[*cb + 2 * 256];
   520             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   521             ++cr;
   522             ++cb;
   523 
   524             L = *lum++;
   525             row1[0] = row1[1] = row1[next_row] = row1[next_row + 1] =
   526                 (rgb_2_pix[L + cr_r] |
   527                  rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   528             row1 += 2;
   529 
   530             L = *lum++;
   531             row1[0] = row1[1] = row1[next_row] = row1[next_row + 1] =
   532                 (rgb_2_pix[L + cr_r] |
   533                  rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   534             row1 += 2;
   535 
   536 
   537             /* Now, do second row. */
   538 
   539             L = *lum2++;
   540             row2[0] = row2[1] = row2[next_row] = row2[next_row + 1] =
   541                 (rgb_2_pix[L + cr_r] |
   542                  rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   543             row2 += 2;
   544 
   545             L = *lum2++;
   546             row2[0] = row2[1] = row2[next_row] = row2[next_row + 1] =
   547                 (rgb_2_pix[L + cr_r] |
   548                  rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   549             row2 += 2;
   550         }
   551 
   552         /*
   553          * These values are at the start of the next line, (due
   554          * to the ++'s above),but they need to be at the start
   555          * of the line after that.
   556          */
   557         lum += cols;
   558         lum2 += cols;
   559         row1 += mod;
   560         row2 += mod;
   561     }
   562 }
   563 
   564 static void
   565 Color16DitherYUY2Mod1X(int *colortab, Uint32 * rgb_2_pix,
   566                        unsigned char *lum, unsigned char *cr,
   567                        unsigned char *cb, unsigned char *out,
   568                        int rows, int cols, int mod)
   569 {
   570     unsigned short *row;
   571     int x, y;
   572     int cr_r;
   573     int crb_g;
   574     int cb_b;
   575     int cols_2 = cols / 2;
   576 
   577     row = (unsigned short *) out;
   578 
   579     y = rows;
   580     while (y--) {
   581         x = cols_2;
   582         while (x--) {
   583             register int L;
   584 
   585             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   586             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   587                 + colortab[*cb + 2 * 256];
   588             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   589             cr += 4;
   590             cb += 4;
   591 
   592             L = *lum;
   593             lum += 2;
   594             *row++ = (unsigned short) (rgb_2_pix[L + cr_r] |
   595                                        rgb_2_pix[L + crb_g] |
   596                                        rgb_2_pix[L + cb_b]);
   597 
   598             L = *lum;
   599             lum += 2;
   600             *row++ = (unsigned short) (rgb_2_pix[L + cr_r] |
   601                                        rgb_2_pix[L + crb_g] |
   602                                        rgb_2_pix[L + cb_b]);
   603 
   604         }
   605 
   606         row += mod;
   607     }
   608 }
   609 
   610 static void
   611 Color24DitherYUY2Mod1X(int *colortab, Uint32 * rgb_2_pix,
   612                        unsigned char *lum, unsigned char *cr,
   613                        unsigned char *cb, unsigned char *out,
   614                        int rows, int cols, int mod)
   615 {
   616     unsigned int value;
   617     unsigned char *row;
   618     int x, y;
   619     int cr_r;
   620     int crb_g;
   621     int cb_b;
   622     int cols_2 = cols / 2;
   623 
   624     row = (unsigned char *) out;
   625     mod *= 3;
   626     y = rows;
   627     while (y--) {
   628         x = cols_2;
   629         while (x--) {
   630             register int L;
   631 
   632             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   633             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   634                 + colortab[*cb + 2 * 256];
   635             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   636             cr += 4;
   637             cb += 4;
   638 
   639             L = *lum;
   640             lum += 2;
   641             value = (rgb_2_pix[L + cr_r] |
   642                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   643             *row++ = (value) & 0xFF;
   644             *row++ = (value >> 8) & 0xFF;
   645             *row++ = (value >> 16) & 0xFF;
   646 
   647             L = *lum;
   648             lum += 2;
   649             value = (rgb_2_pix[L + cr_r] |
   650                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   651             *row++ = (value) & 0xFF;
   652             *row++ = (value >> 8) & 0xFF;
   653             *row++ = (value >> 16) & 0xFF;
   654 
   655         }
   656         row += mod;
   657     }
   658 }
   659 
   660 static void
   661 Color32DitherYUY2Mod1X(int *colortab, Uint32 * rgb_2_pix,
   662                        unsigned char *lum, unsigned char *cr,
   663                        unsigned char *cb, unsigned char *out,
   664                        int rows, int cols, int mod)
   665 {
   666     unsigned int *row;
   667     int x, y;
   668     int cr_r;
   669     int crb_g;
   670     int cb_b;
   671     int cols_2 = cols / 2;
   672 
   673     row = (unsigned int *) out;
   674     y = rows;
   675     while (y--) {
   676         x = cols_2;
   677         while (x--) {
   678             register int L;
   679 
   680             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   681             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   682                 + colortab[*cb + 2 * 256];
   683             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   684             cr += 4;
   685             cb += 4;
   686 
   687             L = *lum;
   688             lum += 2;
   689             *row++ = (rgb_2_pix[L + cr_r] |
   690                       rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   691 
   692             L = *lum;
   693             lum += 2;
   694             *row++ = (rgb_2_pix[L + cr_r] |
   695                       rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   696 
   697 
   698         }
   699         row += mod;
   700     }
   701 }
   702 
   703 /*
   704  * In this function I make use of a nasty trick. The tables have the lower
   705  * 16 bits replicated in the upper 16. This means I can write ints and get
   706  * the horisontal doubling for free (almost).
   707  */
   708 static void
   709 Color16DitherYUY2Mod2X(int *colortab, Uint32 * rgb_2_pix,
   710                        unsigned char *lum, unsigned char *cr,
   711                        unsigned char *cb, unsigned char *out,
   712                        int rows, int cols, int mod)
   713 {
   714     unsigned int *row = (unsigned int *) out;
   715     const int next_row = cols + (mod / 2);
   716     int x, y;
   717     int cr_r;
   718     int crb_g;
   719     int cb_b;
   720     int cols_2 = cols / 2;
   721 
   722     y = rows;
   723     while (y--) {
   724         x = cols_2;
   725         while (x--) {
   726             register int L;
   727 
   728             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   729             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   730                 + colortab[*cb + 2 * 256];
   731             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   732             cr += 4;
   733             cb += 4;
   734 
   735             L = *lum;
   736             lum += 2;
   737             row[0] = row[next_row] = (rgb_2_pix[L + cr_r] |
   738                                       rgb_2_pix[L + crb_g] |
   739                                       rgb_2_pix[L + cb_b]);
   740             row++;
   741 
   742             L = *lum;
   743             lum += 2;
   744             row[0] = row[next_row] = (rgb_2_pix[L + cr_r] |
   745                                       rgb_2_pix[L + crb_g] |
   746                                       rgb_2_pix[L + cb_b]);
   747             row++;
   748 
   749         }
   750         row += next_row;
   751     }
   752 }
   753 
   754 static void
   755 Color24DitherYUY2Mod2X(int *colortab, Uint32 * rgb_2_pix,
   756                        unsigned char *lum, unsigned char *cr,
   757                        unsigned char *cb, unsigned char *out,
   758                        int rows, int cols, int mod)
   759 {
   760     unsigned int value;
   761     unsigned char *row = out;
   762     const int next_row = (cols * 2 + mod) * 3;
   763     int x, y;
   764     int cr_r;
   765     int crb_g;
   766     int cb_b;
   767     int cols_2 = cols / 2;
   768     y = rows;
   769     while (y--) {
   770         x = cols_2;
   771         while (x--) {
   772             register int L;
   773 
   774             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   775             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   776                 + colortab[*cb + 2 * 256];
   777             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   778             cr += 4;
   779             cb += 4;
   780 
   781             L = *lum;
   782             lum += 2;
   783             value = (rgb_2_pix[L + cr_r] |
   784                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   785             row[0 + 0] = row[3 + 0] = row[next_row + 0] =
   786                 row[next_row + 3 + 0] = (value) & 0xFF;
   787             row[0 + 1] = row[3 + 1] = row[next_row + 1] =
   788                 row[next_row + 3 + 1] = (value >> 8) & 0xFF;
   789             row[0 + 2] = row[3 + 2] = row[next_row + 2] =
   790                 row[next_row + 3 + 2] = (value >> 16) & 0xFF;
   791             row += 2 * 3;
   792 
   793             L = *lum;
   794             lum += 2;
   795             value = (rgb_2_pix[L + cr_r] |
   796                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   797             row[0 + 0] = row[3 + 0] = row[next_row + 0] =
   798                 row[next_row + 3 + 0] = (value) & 0xFF;
   799             row[0 + 1] = row[3 + 1] = row[next_row + 1] =
   800                 row[next_row + 3 + 1] = (value >> 8) & 0xFF;
   801             row[0 + 2] = row[3 + 2] = row[next_row + 2] =
   802                 row[next_row + 3 + 2] = (value >> 16) & 0xFF;
   803             row += 2 * 3;
   804 
   805         }
   806         row += next_row;
   807     }
   808 }
   809 
   810 static void
   811 Color32DitherYUY2Mod2X(int *colortab, Uint32 * rgb_2_pix,
   812                        unsigned char *lum, unsigned char *cr,
   813                        unsigned char *cb, unsigned char *out,
   814                        int rows, int cols, int mod)
   815 {
   816     unsigned int *row = (unsigned int *) out;
   817     const int next_row = cols * 2 + mod;
   818     int x, y;
   819     int cr_r;
   820     int crb_g;
   821     int cb_b;
   822     int cols_2 = cols / 2;
   823     mod += mod;
   824     y = rows;
   825     while (y--) {
   826         x = cols_2;
   827         while (x--) {
   828             register int L;
   829 
   830             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   831             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   832                 + colortab[*cb + 2 * 256];
   833             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   834             cr += 4;
   835             cb += 4;
   836 
   837             L = *lum;
   838             lum += 2;
   839             row[0] = row[1] = row[next_row] = row[next_row + 1] =
   840                 (rgb_2_pix[L + cr_r] |
   841                  rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   842             row += 2;
   843 
   844             L = *lum;
   845             lum += 2;
   846             row[0] = row[1] = row[next_row] = row[next_row + 1] =
   847                 (rgb_2_pix[L + cr_r] |
   848                  rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   849             row += 2;
   850 
   851 
   852         }
   853 
   854         row += next_row;
   855     }
   856 }
   857 
   858 /*
   859  * How many 1 bits are there in the Uint32.
   860  * Low performance, do not call often.
   861  */
   862 static int
   863 number_of_bits_set(Uint32 a)
   864 {
   865     if (!a)
   866         return 0;
   867     if (a & 1)
   868         return 1 + number_of_bits_set(a >> 1);
   869     return (number_of_bits_set(a >> 1));
   870 }
   871 
   872 /*
   873  * How many 0 bits are there at least significant end of Uint32.
   874  * Low performance, do not call often.
   875  */
   876 static int
   877 free_bits_at_bottom(Uint32 a)
   878 {
   879     /* assume char is 8 bits */
   880     if (!a)
   881         return sizeof(Uint32) * 8;
   882     if (((Sint32) a) & 1l)
   883         return 0;
   884     return 1 + free_bits_at_bottom(a >> 1);
   885 }
   886 
   887 static int
   888 SDL_SW_SetupYUVDisplay(SDL_SW_YUVTexture * swdata, Uint32 target_format)
   889 {
   890     Uint32 *r_2_pix_alloc;
   891     Uint32 *g_2_pix_alloc;
   892     Uint32 *b_2_pix_alloc;
   893     int i;
   894     int bpp;
   895     Uint32 Rmask, Gmask, Bmask, Amask;
   896 
   897     if (!SDL_PixelFormatEnumToMasks
   898         (target_format, &bpp, &Rmask, &Gmask, &Bmask, &Amask) || bpp < 15) {
   899         return SDL_SetError("Unsupported YUV destination format");
   900     }
   901 
   902     swdata->target_format = target_format;
   903     r_2_pix_alloc = &swdata->rgb_2_pix[0 * 768];
   904     g_2_pix_alloc = &swdata->rgb_2_pix[1 * 768];
   905     b_2_pix_alloc = &swdata->rgb_2_pix[2 * 768];
   906 
   907     /*
   908      * Set up entries 0-255 in rgb-to-pixel value tables.
   909      */
   910     for (i = 0; i < 256; ++i) {
   911         r_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(Rmask));
   912         r_2_pix_alloc[i + 256] <<= free_bits_at_bottom(Rmask);
   913         r_2_pix_alloc[i + 256] |= Amask;
   914         g_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(Gmask));
   915         g_2_pix_alloc[i + 256] <<= free_bits_at_bottom(Gmask);
   916         g_2_pix_alloc[i + 256] |= Amask;
   917         b_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(Bmask));
   918         b_2_pix_alloc[i + 256] <<= free_bits_at_bottom(Bmask);
   919         b_2_pix_alloc[i + 256] |= Amask;
   920     }
   921 
   922     /*
   923      * If we have 16-bit output depth, then we double the value
   924      * in the top word. This means that we can write out both
   925      * pixels in the pixel doubling mode with one op. It is
   926      * harmless in the normal case as storing a 32-bit value
   927      * through a short pointer will lose the top bits anyway.
   928      */
   929     if (SDL_BYTESPERPIXEL(target_format) == 2) {
   930         for (i = 0; i < 256; ++i) {
   931             r_2_pix_alloc[i + 256] |= (r_2_pix_alloc[i + 256]) << 16;
   932             g_2_pix_alloc[i + 256] |= (g_2_pix_alloc[i + 256]) << 16;
   933             b_2_pix_alloc[i + 256] |= (b_2_pix_alloc[i + 256]) << 16;
   934         }
   935     }
   936 
   937     /*
   938      * Spread out the values we have to the rest of the array so that
   939      * we do not need to check for overflow.
   940      */
   941     for (i = 0; i < 256; ++i) {
   942         r_2_pix_alloc[i] = r_2_pix_alloc[256];
   943         r_2_pix_alloc[i + 512] = r_2_pix_alloc[511];
   944         g_2_pix_alloc[i] = g_2_pix_alloc[256];
   945         g_2_pix_alloc[i + 512] = g_2_pix_alloc[511];
   946         b_2_pix_alloc[i] = b_2_pix_alloc[256];
   947         b_2_pix_alloc[i + 512] = b_2_pix_alloc[511];
   948     }
   949 
   950     /* You have chosen wisely... */
   951     switch (swdata->format) {
   952     case SDL_PIXELFORMAT_YV12:
   953     case SDL_PIXELFORMAT_IYUV:
   954         if (SDL_BYTESPERPIXEL(target_format) == 2) {
   955 #if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES
   956             /* inline assembly functions */
   957             if (SDL_HasMMX() && (Rmask == 0xF800) &&
   958                 (Gmask == 0x07E0) && (Bmask == 0x001F)
   959                 && (swdata->w & 15) == 0) {
   960 /*printf("Using MMX 16-bit 565 dither\n");*/
   961                 swdata->Display1X = Color565DitherYV12MMX1X;
   962             } else {
   963 /*printf("Using C 16-bit dither\n");*/
   964                 swdata->Display1X = Color16DitherYV12Mod1X;
   965             }
   966 #else
   967             swdata->Display1X = Color16DitherYV12Mod1X;
   968 #endif
   969             swdata->Display2X = Color16DitherYV12Mod2X;
   970         }
   971         if (SDL_BYTESPERPIXEL(target_format) == 3) {
   972             swdata->Display1X = Color24DitherYV12Mod1X;
   973             swdata->Display2X = Color24DitherYV12Mod2X;
   974         }
   975         if (SDL_BYTESPERPIXEL(target_format) == 4) {
   976 #if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES
   977             /* inline assembly functions */
   978             if (SDL_HasMMX() && (Rmask == 0x00FF0000) &&
   979                 (Gmask == 0x0000FF00) &&
   980                 (Bmask == 0x000000FF) && (swdata->w & 15) == 0) {
   981 /*printf("Using MMX 32-bit dither\n");*/
   982                 swdata->Display1X = ColorRGBDitherYV12MMX1X;
   983             } else {
   984 /*printf("Using C 32-bit dither\n");*/
   985                 swdata->Display1X = Color32DitherYV12Mod1X;
   986             }
   987 #else
   988             swdata->Display1X = Color32DitherYV12Mod1X;
   989 #endif
   990             swdata->Display2X = Color32DitherYV12Mod2X;
   991         }
   992         break;
   993     case SDL_PIXELFORMAT_YUY2:
   994     case SDL_PIXELFORMAT_UYVY:
   995     case SDL_PIXELFORMAT_YVYU:
   996         if (SDL_BYTESPERPIXEL(target_format) == 2) {
   997             swdata->Display1X = Color16DitherYUY2Mod1X;
   998             swdata->Display2X = Color16DitherYUY2Mod2X;
   999         }
  1000         if (SDL_BYTESPERPIXEL(target_format) == 3) {
  1001             swdata->Display1X = Color24DitherYUY2Mod1X;
  1002             swdata->Display2X = Color24DitherYUY2Mod2X;
  1003         }
  1004         if (SDL_BYTESPERPIXEL(target_format) == 4) {
  1005             swdata->Display1X = Color32DitherYUY2Mod1X;
  1006             swdata->Display2X = Color32DitherYUY2Mod2X;
  1007         }
  1008         break;
  1009     default:
  1010         /* We should never get here (caught above) */
  1011         break;
  1012     }
  1013 
  1014     if (swdata->display) {
  1015         SDL_FreeSurface(swdata->display);
  1016         swdata->display = NULL;
  1017     }
  1018     return 0;
  1019 }
  1020 
  1021 SDL_SW_YUVTexture *
  1022 SDL_SW_CreateYUVTexture(Uint32 format, int w, int h)
  1023 {
  1024     SDL_SW_YUVTexture *swdata;
  1025     int *Cr_r_tab;
  1026     int *Cr_g_tab;
  1027     int *Cb_g_tab;
  1028     int *Cb_b_tab;
  1029     int i;
  1030     int CR, CB;
  1031 
  1032     swdata = (SDL_SW_YUVTexture *) SDL_calloc(1, sizeof(*swdata));
  1033     if (!swdata) {
  1034         SDL_OutOfMemory();
  1035         return NULL;
  1036     }
  1037 
  1038     switch (format) {
  1039     case SDL_PIXELFORMAT_YV12:
  1040     case SDL_PIXELFORMAT_IYUV:
  1041     case SDL_PIXELFORMAT_YUY2:
  1042     case SDL_PIXELFORMAT_UYVY:
  1043     case SDL_PIXELFORMAT_YVYU:
  1044         break;
  1045     default:
  1046         SDL_SW_DestroyYUVTexture(swdata);
  1047         SDL_SetError("Unsupported YUV format");
  1048         return NULL;
  1049     }
  1050 
  1051     swdata->format = format;
  1052     swdata->target_format = SDL_PIXELFORMAT_UNKNOWN;
  1053     swdata->w = w;
  1054     swdata->h = h;
  1055     swdata->pixels = (Uint8 *) SDL_malloc(w * h * 2);
  1056     swdata->colortab = (int *) SDL_malloc(4 * 256 * sizeof(int));
  1057     swdata->rgb_2_pix = (Uint32 *) SDL_malloc(3 * 768 * sizeof(Uint32));
  1058     if (!swdata->pixels || !swdata->colortab || !swdata->rgb_2_pix) {
  1059         SDL_SW_DestroyYUVTexture(swdata);
  1060         SDL_OutOfMemory();
  1061         return NULL;
  1062     }
  1063 
  1064     /* Generate the tables for the display surface */
  1065     Cr_r_tab = &swdata->colortab[0 * 256];
  1066     Cr_g_tab = &swdata->colortab[1 * 256];
  1067     Cb_g_tab = &swdata->colortab[2 * 256];
  1068     Cb_b_tab = &swdata->colortab[3 * 256];
  1069     for (i = 0; i < 256; i++) {
  1070         /* Gamma correction (luminescence table) and chroma correction
  1071            would be done here.  See the Berkeley mpeg_play sources.
  1072          */
  1073         CB = CR = (i - 128);
  1074         Cr_r_tab[i] = (int) ((0.419 / 0.299) * CR);
  1075         Cr_g_tab[i] = (int) (-(0.299 / 0.419) * CR);
  1076         Cb_g_tab[i] = (int) (-(0.114 / 0.331) * CB);
  1077         Cb_b_tab[i] = (int) ((0.587 / 0.331) * CB);
  1078     }
  1079 
  1080     /* Find the pitch and offset values for the overlay */
  1081     switch (format) {
  1082     case SDL_PIXELFORMAT_YV12:
  1083     case SDL_PIXELFORMAT_IYUV:
  1084         swdata->pitches[0] = w;
  1085         swdata->pitches[1] = swdata->pitches[0] / 2;
  1086         swdata->pitches[2] = swdata->pitches[0] / 2;
  1087         swdata->planes[0] = swdata->pixels;
  1088         swdata->planes[1] = swdata->planes[0] + swdata->pitches[0] * h;
  1089         swdata->planes[2] = swdata->planes[1] + swdata->pitches[1] * h / 2;
  1090         break;
  1091     case SDL_PIXELFORMAT_YUY2:
  1092     case SDL_PIXELFORMAT_UYVY:
  1093     case SDL_PIXELFORMAT_YVYU:
  1094         swdata->pitches[0] = w * 2;
  1095         swdata->planes[0] = swdata->pixels;
  1096         break;
  1097     default:
  1098         /* We should never get here (caught above) */
  1099         break;
  1100     }
  1101 
  1102     /* We're all done.. */
  1103     return (swdata);
  1104 }
  1105 
  1106 int
  1107 SDL_SW_QueryYUVTexturePixels(SDL_SW_YUVTexture * swdata, void **pixels,
  1108                              int *pitch)
  1109 {
  1110     *pixels = swdata->planes[0];
  1111     *pitch = swdata->pitches[0];
  1112     return 0;
  1113 }
  1114 
  1115 int
  1116 SDL_SW_UpdateYUVTexture(SDL_SW_YUVTexture * swdata, const SDL_Rect * rect,
  1117                         const void *pixels, int pitch)
  1118 {
  1119     switch (swdata->format) {
  1120     case SDL_PIXELFORMAT_YV12:
  1121     case SDL_PIXELFORMAT_IYUV:
  1122         if (rect->x == 0 && rect->y == 0 &&
  1123             rect->w == swdata->w && rect->h == swdata->h) {
  1124                 SDL_memcpy(swdata->pixels, pixels,
  1125                            (swdata->h * swdata->w) + (swdata->h * swdata->w) / 2);
  1126         } else {
  1127             Uint8 *src, *dst;
  1128             int row;
  1129             size_t length;
  1130 
  1131             /* Copy the Y plane */
  1132             src = (Uint8 *) pixels;
  1133             dst = swdata->pixels + rect->y * swdata->w + rect->x;
  1134             length = rect->w;
  1135             for (row = 0; row < rect->h; ++row) {
  1136                 SDL_memcpy(dst, src, length);
  1137                 src += pitch;
  1138                 dst += swdata->w;
  1139             }
  1140 
  1141             /* Copy the next plane */
  1142             src = (Uint8 *) pixels + rect->h * pitch;
  1143             dst = swdata->pixels + swdata->h * swdata->w;
  1144             dst += rect->y/2 * swdata->w/2 + rect->x/2;
  1145             length = rect->w / 2;
  1146             for (row = 0; row < rect->h/2; ++row) {
  1147                 SDL_memcpy(dst, src, length);
  1148                 src += pitch/2;
  1149                 dst += swdata->w/2;
  1150             }
  1151 
  1152             /* Copy the next plane */
  1153             src = (Uint8 *) pixels + rect->h * pitch + (rect->h * pitch) / 4;
  1154             dst = swdata->pixels + swdata->h * swdata->w +
  1155                   (swdata->h * swdata->w) / 4;
  1156             dst += rect->y/2 * swdata->w/2 + rect->x/2;
  1157             length = rect->w / 2;
  1158             for (row = 0; row < rect->h/2; ++row) {
  1159                 SDL_memcpy(dst, src, length);
  1160                 src += pitch/2;
  1161                 dst += swdata->w/2;
  1162             }
  1163         }
  1164         break;
  1165     case SDL_PIXELFORMAT_YUY2:
  1166     case SDL_PIXELFORMAT_UYVY:
  1167     case SDL_PIXELFORMAT_YVYU:
  1168         {
  1169             Uint8 *src, *dst;
  1170             int row;
  1171             size_t length;
  1172 
  1173             src = (Uint8 *) pixels;
  1174             dst =
  1175                 swdata->planes[0] + rect->y * swdata->pitches[0] +
  1176                 rect->x * 2;
  1177             length = rect->w * 2;
  1178             for (row = 0; row < rect->h; ++row) {
  1179                 SDL_memcpy(dst, src, length);
  1180                 src += pitch;
  1181                 dst += swdata->pitches[0];
  1182             }
  1183         }
  1184         break;
  1185     }
  1186     return 0;
  1187 }
  1188 
  1189 int
  1190 SDL_SW_LockYUVTexture(SDL_SW_YUVTexture * swdata, const SDL_Rect * rect,
  1191                       void **pixels, int *pitch)
  1192 {
  1193     switch (swdata->format) {
  1194     case SDL_PIXELFORMAT_YV12:
  1195     case SDL_PIXELFORMAT_IYUV:
  1196         if (rect
  1197             && (rect->x != 0 || rect->y != 0 || rect->w != swdata->w
  1198                 || rect->h != swdata->h)) {
  1199             return SDL_SetError
  1200                 ("YV12 and IYUV textures only support full surface locks");
  1201         }
  1202         break;
  1203     }
  1204 
  1205     if (rect) {
  1206         *pixels = swdata->planes[0] + rect->y * swdata->pitches[0] + rect->x * 2;
  1207     } else {
  1208         *pixels = swdata->planes[0];
  1209     }
  1210     *pitch = swdata->pitches[0];
  1211     return 0;
  1212 }
  1213 
  1214 void
  1215 SDL_SW_UnlockYUVTexture(SDL_SW_YUVTexture * swdata)
  1216 {
  1217 }
  1218 
  1219 int
  1220 SDL_SW_CopyYUVToRGB(SDL_SW_YUVTexture * swdata, const SDL_Rect * srcrect,
  1221                     Uint32 target_format, int w, int h, void *pixels,
  1222                     int pitch)
  1223 {
  1224     int stretch;
  1225     int scale_2x;
  1226     Uint8 *lum, *Cr, *Cb;
  1227     int mod;
  1228 
  1229     /* Make sure we're set up to display in the desired format */
  1230     if (target_format != swdata->target_format) {
  1231         if (SDL_SW_SetupYUVDisplay(swdata, target_format) < 0) {
  1232             return -1;
  1233         }
  1234     }
  1235 
  1236     stretch = 0;
  1237     scale_2x = 0;
  1238     if (srcrect->x || srcrect->y || srcrect->w < swdata->w
  1239         || srcrect->h < swdata->h) {
  1240         /* The source rectangle has been clipped.
  1241            Using a scratch surface is easier than adding clipped
  1242            source support to all the blitters, plus that would
  1243            slow them down in the general unclipped case.
  1244          */
  1245         stretch = 1;
  1246     } else if ((srcrect->w != w) || (srcrect->h != h)) {
  1247         if ((w == 2 * srcrect->w) && (h == 2 * srcrect->h)) {
  1248             scale_2x = 1;
  1249         } else {
  1250             stretch = 1;
  1251         }
  1252     }
  1253     if (stretch) {
  1254         int bpp;
  1255         Uint32 Rmask, Gmask, Bmask, Amask;
  1256 
  1257         if (swdata->display) {
  1258             swdata->display->w = w;
  1259             swdata->display->h = h;
  1260             swdata->display->pixels = pixels;
  1261             swdata->display->pitch = pitch;
  1262         } else {
  1263             /* This must have succeeded in SDL_SW_SetupYUVDisplay() earlier */
  1264             SDL_PixelFormatEnumToMasks(target_format, &bpp, &Rmask, &Gmask,
  1265                                        &Bmask, &Amask);
  1266             swdata->display =
  1267                 SDL_CreateRGBSurfaceFrom(pixels, w, h, bpp, pitch, Rmask,
  1268                                          Gmask, Bmask, Amask);
  1269             if (!swdata->display) {
  1270                 return (-1);
  1271             }
  1272         }
  1273         if (!swdata->stretch) {
  1274             /* This must have succeeded in SDL_SW_SetupYUVDisplay() earlier */
  1275             SDL_PixelFormatEnumToMasks(target_format, &bpp, &Rmask, &Gmask,
  1276                                        &Bmask, &Amask);
  1277             swdata->stretch =
  1278                 SDL_CreateRGBSurface(0, swdata->w, swdata->h, bpp, Rmask,
  1279                                      Gmask, Bmask, Amask);
  1280             if (!swdata->stretch) {
  1281                 return (-1);
  1282             }
  1283         }
  1284         pixels = swdata->stretch->pixels;
  1285         pitch = swdata->stretch->pitch;
  1286     }
  1287     switch (swdata->format) {
  1288     case SDL_PIXELFORMAT_YV12:
  1289         lum = swdata->planes[0];
  1290         Cr = swdata->planes[1];
  1291         Cb = swdata->planes[2];
  1292         break;
  1293     case SDL_PIXELFORMAT_IYUV:
  1294         lum = swdata->planes[0];
  1295         Cr = swdata->planes[2];
  1296         Cb = swdata->planes[1];
  1297         break;
  1298     case SDL_PIXELFORMAT_YUY2:
  1299         lum = swdata->planes[0];
  1300         Cr = lum + 3;
  1301         Cb = lum + 1;
  1302         break;
  1303     case SDL_PIXELFORMAT_UYVY:
  1304         lum = swdata->planes[0] + 1;
  1305         Cr = lum + 1;
  1306         Cb = lum - 1;
  1307         break;
  1308     case SDL_PIXELFORMAT_YVYU:
  1309         lum = swdata->planes[0];
  1310         Cr = lum + 1;
  1311         Cb = lum + 3;
  1312         break;
  1313     default:
  1314         return SDL_SetError("Unsupported YUV format in copy");
  1315     }
  1316     mod = (pitch / SDL_BYTESPERPIXEL(target_format));
  1317 
  1318     if (scale_2x) {
  1319         mod -= (swdata->w * 2);
  1320         swdata->Display2X(swdata->colortab, swdata->rgb_2_pix,
  1321                           lum, Cr, Cb, pixels, swdata->h, swdata->w, mod);
  1322     } else {
  1323         mod -= swdata->w;
  1324         swdata->Display1X(swdata->colortab, swdata->rgb_2_pix,
  1325                           lum, Cr, Cb, pixels, swdata->h, swdata->w, mod);
  1326     }
  1327     if (stretch) {
  1328         SDL_Rect rect = *srcrect;
  1329         SDL_SoftStretch(swdata->stretch, &rect, swdata->display, NULL);
  1330     }
  1331     return 0;
  1332 }
  1333 
  1334 void
  1335 SDL_SW_DestroyYUVTexture(SDL_SW_YUVTexture * swdata)
  1336 {
  1337     if (swdata) {
  1338         if (swdata->pixels) {
  1339             SDL_free(swdata->pixels);
  1340         }
  1341         if (swdata->colortab) {
  1342             SDL_free(swdata->colortab);
  1343         }
  1344         if (swdata->rgb_2_pix) {
  1345             SDL_free(swdata->rgb_2_pix);
  1346         }
  1347         if (swdata->stretch) {
  1348             SDL_FreeSurface(swdata->stretch);
  1349         }
  1350         if (swdata->display) {
  1351             SDL_FreeSurface(swdata->display);
  1352         }
  1353         SDL_free(swdata);
  1354     }
  1355 }
  1356 
  1357 /* vi: set ts=4 sw=4 expandtab: */