src/render/SDL_yuv_sw.c
author Sam Lantinga <slouken@libsdl.org>
Fri, 30 Dec 2011 18:19:35 -0500
changeset 6136 731cd0659cb5
parent 5535 96594ac5fd1a
child 6138 4c64952a58fb
permissions -rw-r--r--
Added the ability to update a subrect of a YV12/IYUV texture.
     1 /*
     2   Simple DirectMedia Layer
     3   Copyright (C) 1997-2011 Sam Lantinga <slouken@libsdl.org>
     4 
     5   This software is provided 'as-is', without any express or implied
     6   warranty.  In no event will the authors be held liable for any damages
     7   arising from the use of this software.
     8 
     9   Permission is granted to anyone to use this software for any purpose,
    10   including commercial applications, and to alter it and redistribute it
    11   freely, subject to the following restrictions:
    12 
    13   1. The origin of this software must not be misrepresented; you must not
    14      claim that you wrote the original software. If you use this software
    15      in a product, an acknowledgment in the product documentation would be
    16      appreciated but is not required.
    17   2. Altered source versions must be plainly marked as such, and must not be
    18      misrepresented as being the original software.
    19   3. This notice may not be removed or altered from any source distribution.
    20 */
    21 #include "SDL_config.h"
    22 
    23 /* This is the software implementation of the YUV texture support */
    24 
    25 /* This code was derived from code carrying the following copyright notices:
    26 
    27  * Copyright (c) 1995 The Regents of the University of California.
    28  * All rights reserved.
    29  * 
    30  * Permission to use, copy, modify, and distribute this software and its
    31  * documentation for any purpose, without fee, and without written agreement is
    32  * hereby granted, provided that the above copyright notice and the following
    33  * two paragraphs appear in all copies of this software.
    34  * 
    35  * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR
    36  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
    37  * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF
    38  * CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    39  * 
    40  * THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES,
    41  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
    42  * AND FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
    43  * ON AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATION TO
    44  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
    45 
    46  * Copyright (c) 1995 Erik Corry
    47  * All rights reserved.
    48  * 
    49  * Permission to use, copy, modify, and distribute this software and its
    50  * documentation for any purpose, without fee, and without written agreement is
    51  * hereby granted, provided that the above copyright notice and the following
    52  * two paragraphs appear in all copies of this software.
    53  * 
    54  * IN NO EVENT SHALL ERIK CORRY BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
    55  * SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OF
    56  * THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF ERIK CORRY HAS BEEN ADVISED
    57  * OF THE POSSIBILITY OF SUCH DAMAGE.
    58  * 
    59  * ERIK CORRY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
    60  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
    61  * PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
    62  * BASIS, AND ERIK CORRY HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT,
    63  * UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
    64 
    65  * Portions of this software Copyright (c) 1995 Brown University.
    66  * All rights reserved.
    67  * 
    68  * Permission to use, copy, modify, and distribute this software and its
    69  * documentation for any purpose, without fee, and without written agreement
    70  * is hereby granted, provided that the above copyright notice and the
    71  * following two paragraphs appear in all copies of this software.
    72  * 
    73  * IN NO EVENT SHALL BROWN UNIVERSITY BE LIABLE TO ANY PARTY FOR
    74  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
    75  * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF BROWN
    76  * UNIVERSITY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    77  * 
    78  * BROWN UNIVERSITY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
    79  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
    80  * PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
    81  * BASIS, AND BROWN UNIVERSITY HAS NO OBLIGATION TO PROVIDE MAINTENANCE,
    82  * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
    83  */
    84 
    85 #include "SDL_video.h"
    86 #include "SDL_cpuinfo.h"
    87 #include "SDL_yuv_sw_c.h"
    88 
    89 
    90 /* The colorspace conversion functions */
    91 
    92 #if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES
    93 extern void Color565DitherYV12MMX1X(int *colortab, Uint32 * rgb_2_pix,
    94                                     unsigned char *lum, unsigned char *cr,
    95                                     unsigned char *cb, unsigned char *out,
    96                                     int rows, int cols, int mod);
    97 extern void ColorRGBDitherYV12MMX1X(int *colortab, Uint32 * rgb_2_pix,
    98                                     unsigned char *lum, unsigned char *cr,
    99                                     unsigned char *cb, unsigned char *out,
   100                                     int rows, int cols, int mod);
   101 #endif
   102 
   103 static void
   104 Color16DitherYV12Mod1X(int *colortab, Uint32 * rgb_2_pix,
   105                        unsigned char *lum, unsigned char *cr,
   106                        unsigned char *cb, unsigned char *out,
   107                        int rows, int cols, int mod)
   108 {
   109     unsigned short *row1;
   110     unsigned short *row2;
   111     unsigned char *lum2;
   112     int x, y;
   113     int cr_r;
   114     int crb_g;
   115     int cb_b;
   116     int cols_2 = cols / 2;
   117 
   118     row1 = (unsigned short *) out;
   119     row2 = row1 + cols + mod;
   120     lum2 = lum + cols;
   121 
   122     mod += cols + mod;
   123 
   124     y = rows / 2;
   125     while (y--) {
   126         x = cols_2;
   127         while (x--) {
   128             register int L;
   129 
   130             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   131             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   132                 + colortab[*cb + 2 * 256];
   133             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   134             ++cr;
   135             ++cb;
   136 
   137             L = *lum++;
   138             *row1++ = (unsigned short) (rgb_2_pix[L + cr_r] |
   139                                         rgb_2_pix[L + crb_g] |
   140                                         rgb_2_pix[L + cb_b]);
   141 
   142             L = *lum++;
   143             *row1++ = (unsigned short) (rgb_2_pix[L + cr_r] |
   144                                         rgb_2_pix[L + crb_g] |
   145                                         rgb_2_pix[L + cb_b]);
   146 
   147 
   148             /* Now, do second row.  */
   149 
   150             L = *lum2++;
   151             *row2++ = (unsigned short) (rgb_2_pix[L + cr_r] |
   152                                         rgb_2_pix[L + crb_g] |
   153                                         rgb_2_pix[L + cb_b]);
   154 
   155             L = *lum2++;
   156             *row2++ = (unsigned short) (rgb_2_pix[L + cr_r] |
   157                                         rgb_2_pix[L + crb_g] |
   158                                         rgb_2_pix[L + cb_b]);
   159         }
   160 
   161         /*
   162          * These values are at the start of the next line, (due
   163          * to the ++'s above),but they need to be at the start
   164          * of the line after that.
   165          */
   166         lum += cols;
   167         lum2 += cols;
   168         row1 += mod;
   169         row2 += mod;
   170     }
   171 }
   172 
   173 static void
   174 Color24DitherYV12Mod1X(int *colortab, Uint32 * rgb_2_pix,
   175                        unsigned char *lum, unsigned char *cr,
   176                        unsigned char *cb, unsigned char *out,
   177                        int rows, int cols, int mod)
   178 {
   179     unsigned int value;
   180     unsigned char *row1;
   181     unsigned char *row2;
   182     unsigned char *lum2;
   183     int x, y;
   184     int cr_r;
   185     int crb_g;
   186     int cb_b;
   187     int cols_2 = cols / 2;
   188 
   189     row1 = out;
   190     row2 = row1 + cols * 3 + mod * 3;
   191     lum2 = lum + cols;
   192 
   193     mod += cols + mod;
   194     mod *= 3;
   195 
   196     y = rows / 2;
   197     while (y--) {
   198         x = cols_2;
   199         while (x--) {
   200             register int L;
   201 
   202             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   203             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   204                 + colortab[*cb + 2 * 256];
   205             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   206             ++cr;
   207             ++cb;
   208 
   209             L = *lum++;
   210             value = (rgb_2_pix[L + cr_r] |
   211                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   212             *row1++ = (value) & 0xFF;
   213             *row1++ = (value >> 8) & 0xFF;
   214             *row1++ = (value >> 16) & 0xFF;
   215 
   216             L = *lum++;
   217             value = (rgb_2_pix[L + cr_r] |
   218                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   219             *row1++ = (value) & 0xFF;
   220             *row1++ = (value >> 8) & 0xFF;
   221             *row1++ = (value >> 16) & 0xFF;
   222 
   223 
   224             /* Now, do second row.  */
   225 
   226             L = *lum2++;
   227             value = (rgb_2_pix[L + cr_r] |
   228                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   229             *row2++ = (value) & 0xFF;
   230             *row2++ = (value >> 8) & 0xFF;
   231             *row2++ = (value >> 16) & 0xFF;
   232 
   233             L = *lum2++;
   234             value = (rgb_2_pix[L + cr_r] |
   235                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   236             *row2++ = (value) & 0xFF;
   237             *row2++ = (value >> 8) & 0xFF;
   238             *row2++ = (value >> 16) & 0xFF;
   239         }
   240 
   241         /*
   242          * These values are at the start of the next line, (due
   243          * to the ++'s above),but they need to be at the start
   244          * of the line after that.
   245          */
   246         lum += cols;
   247         lum2 += cols;
   248         row1 += mod;
   249         row2 += mod;
   250     }
   251 }
   252 
   253 static void
   254 Color32DitherYV12Mod1X(int *colortab, Uint32 * rgb_2_pix,
   255                        unsigned char *lum, unsigned char *cr,
   256                        unsigned char *cb, unsigned char *out,
   257                        int rows, int cols, int mod)
   258 {
   259     unsigned int *row1;
   260     unsigned int *row2;
   261     unsigned char *lum2;
   262     int x, y;
   263     int cr_r;
   264     int crb_g;
   265     int cb_b;
   266     int cols_2 = cols / 2;
   267 
   268     row1 = (unsigned int *) out;
   269     row2 = row1 + cols + mod;
   270     lum2 = lum + cols;
   271 
   272     mod += cols + mod;
   273 
   274     y = rows / 2;
   275     while (y--) {
   276         x = cols_2;
   277         while (x--) {
   278             register int L;
   279 
   280             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   281             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   282                 + colortab[*cb + 2 * 256];
   283             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   284             ++cr;
   285             ++cb;
   286 
   287             L = *lum++;
   288             *row1++ = (rgb_2_pix[L + cr_r] |
   289                        rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   290 
   291             L = *lum++;
   292             *row1++ = (rgb_2_pix[L + cr_r] |
   293                        rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   294 
   295 
   296             /* Now, do second row.  */
   297 
   298             L = *lum2++;
   299             *row2++ = (rgb_2_pix[L + cr_r] |
   300                        rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   301 
   302             L = *lum2++;
   303             *row2++ = (rgb_2_pix[L + cr_r] |
   304                        rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   305         }
   306 
   307         /*
   308          * These values are at the start of the next line, (due
   309          * to the ++'s above),but they need to be at the start
   310          * of the line after that.
   311          */
   312         lum += cols;
   313         lum2 += cols;
   314         row1 += mod;
   315         row2 += mod;
   316     }
   317 }
   318 
   319 /*
   320  * In this function I make use of a nasty trick. The tables have the lower
   321  * 16 bits replicated in the upper 16. This means I can write ints and get
   322  * the horisontal doubling for free (almost).
   323  */
   324 static void
   325 Color16DitherYV12Mod2X(int *colortab, Uint32 * rgb_2_pix,
   326                        unsigned char *lum, unsigned char *cr,
   327                        unsigned char *cb, unsigned char *out,
   328                        int rows, int cols, int mod)
   329 {
   330     unsigned int *row1 = (unsigned int *) out;
   331     const int next_row = cols + (mod / 2);
   332     unsigned int *row2 = row1 + 2 * next_row;
   333     unsigned char *lum2;
   334     int x, y;
   335     int cr_r;
   336     int crb_g;
   337     int cb_b;
   338     int cols_2 = cols / 2;
   339 
   340     lum2 = lum + cols;
   341 
   342     mod = (next_row * 3) + (mod / 2);
   343 
   344     y = rows / 2;
   345     while (y--) {
   346         x = cols_2;
   347         while (x--) {
   348             register int L;
   349 
   350             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   351             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   352                 + colortab[*cb + 2 * 256];
   353             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   354             ++cr;
   355             ++cb;
   356 
   357             L = *lum++;
   358             row1[0] = row1[next_row] = (rgb_2_pix[L + cr_r] |
   359                                         rgb_2_pix[L + crb_g] |
   360                                         rgb_2_pix[L + cb_b]);
   361             row1++;
   362 
   363             L = *lum++;
   364             row1[0] = row1[next_row] = (rgb_2_pix[L + cr_r] |
   365                                         rgb_2_pix[L + crb_g] |
   366                                         rgb_2_pix[L + cb_b]);
   367             row1++;
   368 
   369 
   370             /* Now, do second row. */
   371 
   372             L = *lum2++;
   373             row2[0] = row2[next_row] = (rgb_2_pix[L + cr_r] |
   374                                         rgb_2_pix[L + crb_g] |
   375                                         rgb_2_pix[L + cb_b]);
   376             row2++;
   377 
   378             L = *lum2++;
   379             row2[0] = row2[next_row] = (rgb_2_pix[L + cr_r] |
   380                                         rgb_2_pix[L + crb_g] |
   381                                         rgb_2_pix[L + cb_b]);
   382             row2++;
   383         }
   384 
   385         /*
   386          * These values are at the start of the next line, (due
   387          * to the ++'s above),but they need to be at the start
   388          * of the line after that.
   389          */
   390         lum += cols;
   391         lum2 += cols;
   392         row1 += mod;
   393         row2 += mod;
   394     }
   395 }
   396 
   397 static void
   398 Color24DitherYV12Mod2X(int *colortab, Uint32 * rgb_2_pix,
   399                        unsigned char *lum, unsigned char *cr,
   400                        unsigned char *cb, unsigned char *out,
   401                        int rows, int cols, int mod)
   402 {
   403     unsigned int value;
   404     unsigned char *row1 = out;
   405     const int next_row = (cols * 2 + mod) * 3;
   406     unsigned char *row2 = row1 + 2 * next_row;
   407     unsigned char *lum2;
   408     int x, y;
   409     int cr_r;
   410     int crb_g;
   411     int cb_b;
   412     int cols_2 = cols / 2;
   413 
   414     lum2 = lum + cols;
   415 
   416     mod = next_row * 3 + mod * 3;
   417 
   418     y = rows / 2;
   419     while (y--) {
   420         x = cols_2;
   421         while (x--) {
   422             register int L;
   423 
   424             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   425             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   426                 + colortab[*cb + 2 * 256];
   427             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   428             ++cr;
   429             ++cb;
   430 
   431             L = *lum++;
   432             value = (rgb_2_pix[L + cr_r] |
   433                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   434             row1[0 + 0] = row1[3 + 0] = row1[next_row + 0] =
   435                 row1[next_row + 3 + 0] = (value) & 0xFF;
   436             row1[0 + 1] = row1[3 + 1] = row1[next_row + 1] =
   437                 row1[next_row + 3 + 1] = (value >> 8) & 0xFF;
   438             row1[0 + 2] = row1[3 + 2] = row1[next_row + 2] =
   439                 row1[next_row + 3 + 2] = (value >> 16) & 0xFF;
   440             row1 += 2 * 3;
   441 
   442             L = *lum++;
   443             value = (rgb_2_pix[L + cr_r] |
   444                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   445             row1[0 + 0] = row1[3 + 0] = row1[next_row + 0] =
   446                 row1[next_row + 3 + 0] = (value) & 0xFF;
   447             row1[0 + 1] = row1[3 + 1] = row1[next_row + 1] =
   448                 row1[next_row + 3 + 1] = (value >> 8) & 0xFF;
   449             row1[0 + 2] = row1[3 + 2] = row1[next_row + 2] =
   450                 row1[next_row + 3 + 2] = (value >> 16) & 0xFF;
   451             row1 += 2 * 3;
   452 
   453 
   454             /* Now, do second row. */
   455 
   456             L = *lum2++;
   457             value = (rgb_2_pix[L + cr_r] |
   458                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   459             row2[0 + 0] = row2[3 + 0] = row2[next_row + 0] =
   460                 row2[next_row + 3 + 0] = (value) & 0xFF;
   461             row2[0 + 1] = row2[3 + 1] = row2[next_row + 1] =
   462                 row2[next_row + 3 + 1] = (value >> 8) & 0xFF;
   463             row2[0 + 2] = row2[3 + 2] = row2[next_row + 2] =
   464                 row2[next_row + 3 + 2] = (value >> 16) & 0xFF;
   465             row2 += 2 * 3;
   466 
   467             L = *lum2++;
   468             value = (rgb_2_pix[L + cr_r] |
   469                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   470             row2[0 + 0] = row2[3 + 0] = row2[next_row + 0] =
   471                 row2[next_row + 3 + 0] = (value) & 0xFF;
   472             row2[0 + 1] = row2[3 + 1] = row2[next_row + 1] =
   473                 row2[next_row + 3 + 1] = (value >> 8) & 0xFF;
   474             row2[0 + 2] = row2[3 + 2] = row2[next_row + 2] =
   475                 row2[next_row + 3 + 2] = (value >> 16) & 0xFF;
   476             row2 += 2 * 3;
   477         }
   478 
   479         /*
   480          * These values are at the start of the next line, (due
   481          * to the ++'s above),but they need to be at the start
   482          * of the line after that.
   483          */
   484         lum += cols;
   485         lum2 += cols;
   486         row1 += mod;
   487         row2 += mod;
   488     }
   489 }
   490 
   491 static void
   492 Color32DitherYV12Mod2X(int *colortab, Uint32 * rgb_2_pix,
   493                        unsigned char *lum, unsigned char *cr,
   494                        unsigned char *cb, unsigned char *out,
   495                        int rows, int cols, int mod)
   496 {
   497     unsigned int *row1 = (unsigned int *) out;
   498     const int next_row = cols * 2 + mod;
   499     unsigned int *row2 = row1 + 2 * next_row;
   500     unsigned char *lum2;
   501     int x, y;
   502     int cr_r;
   503     int crb_g;
   504     int cb_b;
   505     int cols_2 = cols / 2;
   506 
   507     lum2 = lum + cols;
   508 
   509     mod = (next_row * 3) + mod;
   510 
   511     y = rows / 2;
   512     while (y--) {
   513         x = cols_2;
   514         while (x--) {
   515             register int L;
   516 
   517             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   518             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   519                 + colortab[*cb + 2 * 256];
   520             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   521             ++cr;
   522             ++cb;
   523 
   524             L = *lum++;
   525             row1[0] = row1[1] = row1[next_row] = row1[next_row + 1] =
   526                 (rgb_2_pix[L + cr_r] |
   527                  rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   528             row1 += 2;
   529 
   530             L = *lum++;
   531             row1[0] = row1[1] = row1[next_row] = row1[next_row + 1] =
   532                 (rgb_2_pix[L + cr_r] |
   533                  rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   534             row1 += 2;
   535 
   536 
   537             /* Now, do second row. */
   538 
   539             L = *lum2++;
   540             row2[0] = row2[1] = row2[next_row] = row2[next_row + 1] =
   541                 (rgb_2_pix[L + cr_r] |
   542                  rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   543             row2 += 2;
   544 
   545             L = *lum2++;
   546             row2[0] = row2[1] = row2[next_row] = row2[next_row + 1] =
   547                 (rgb_2_pix[L + cr_r] |
   548                  rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   549             row2 += 2;
   550         }
   551 
   552         /*
   553          * These values are at the start of the next line, (due
   554          * to the ++'s above),but they need to be at the start
   555          * of the line after that.
   556          */
   557         lum += cols;
   558         lum2 += cols;
   559         row1 += mod;
   560         row2 += mod;
   561     }
   562 }
   563 
   564 static void
   565 Color16DitherYUY2Mod1X(int *colortab, Uint32 * rgb_2_pix,
   566                        unsigned char *lum, unsigned char *cr,
   567                        unsigned char *cb, unsigned char *out,
   568                        int rows, int cols, int mod)
   569 {
   570     unsigned short *row;
   571     int x, y;
   572     int cr_r;
   573     int crb_g;
   574     int cb_b;
   575     int cols_2 = cols / 2;
   576 
   577     row = (unsigned short *) out;
   578 
   579     y = rows;
   580     while (y--) {
   581         x = cols_2;
   582         while (x--) {
   583             register int L;
   584 
   585             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   586             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   587                 + colortab[*cb + 2 * 256];
   588             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   589             cr += 4;
   590             cb += 4;
   591 
   592             L = *lum;
   593             lum += 2;
   594             *row++ = (unsigned short) (rgb_2_pix[L + cr_r] |
   595                                        rgb_2_pix[L + crb_g] |
   596                                        rgb_2_pix[L + cb_b]);
   597 
   598             L = *lum;
   599             lum += 2;
   600             *row++ = (unsigned short) (rgb_2_pix[L + cr_r] |
   601                                        rgb_2_pix[L + crb_g] |
   602                                        rgb_2_pix[L + cb_b]);
   603 
   604         }
   605 
   606         row += mod;
   607     }
   608 }
   609 
   610 static void
   611 Color24DitherYUY2Mod1X(int *colortab, Uint32 * rgb_2_pix,
   612                        unsigned char *lum, unsigned char *cr,
   613                        unsigned char *cb, unsigned char *out,
   614                        int rows, int cols, int mod)
   615 {
   616     unsigned int value;
   617     unsigned char *row;
   618     int x, y;
   619     int cr_r;
   620     int crb_g;
   621     int cb_b;
   622     int cols_2 = cols / 2;
   623 
   624     row = (unsigned char *) out;
   625     mod *= 3;
   626     y = rows;
   627     while (y--) {
   628         x = cols_2;
   629         while (x--) {
   630             register int L;
   631 
   632             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   633             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   634                 + colortab[*cb + 2 * 256];
   635             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   636             cr += 4;
   637             cb += 4;
   638 
   639             L = *lum;
   640             lum += 2;
   641             value = (rgb_2_pix[L + cr_r] |
   642                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   643             *row++ = (value) & 0xFF;
   644             *row++ = (value >> 8) & 0xFF;
   645             *row++ = (value >> 16) & 0xFF;
   646 
   647             L = *lum;
   648             lum += 2;
   649             value = (rgb_2_pix[L + cr_r] |
   650                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   651             *row++ = (value) & 0xFF;
   652             *row++ = (value >> 8) & 0xFF;
   653             *row++ = (value >> 16) & 0xFF;
   654 
   655         }
   656         row += mod;
   657     }
   658 }
   659 
   660 static void
   661 Color32DitherYUY2Mod1X(int *colortab, Uint32 * rgb_2_pix,
   662                        unsigned char *lum, unsigned char *cr,
   663                        unsigned char *cb, unsigned char *out,
   664                        int rows, int cols, int mod)
   665 {
   666     unsigned int *row;
   667     int x, y;
   668     int cr_r;
   669     int crb_g;
   670     int cb_b;
   671     int cols_2 = cols / 2;
   672 
   673     row = (unsigned int *) out;
   674     y = rows;
   675     while (y--) {
   676         x = cols_2;
   677         while (x--) {
   678             register int L;
   679 
   680             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   681             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   682                 + colortab[*cb + 2 * 256];
   683             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   684             cr += 4;
   685             cb += 4;
   686 
   687             L = *lum;
   688             lum += 2;
   689             *row++ = (rgb_2_pix[L + cr_r] |
   690                       rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   691 
   692             L = *lum;
   693             lum += 2;
   694             *row++ = (rgb_2_pix[L + cr_r] |
   695                       rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   696 
   697 
   698         }
   699         row += mod;
   700     }
   701 }
   702 
   703 /*
   704  * In this function I make use of a nasty trick. The tables have the lower
   705  * 16 bits replicated in the upper 16. This means I can write ints and get
   706  * the horisontal doubling for free (almost).
   707  */
   708 static void
   709 Color16DitherYUY2Mod2X(int *colortab, Uint32 * rgb_2_pix,
   710                        unsigned char *lum, unsigned char *cr,
   711                        unsigned char *cb, unsigned char *out,
   712                        int rows, int cols, int mod)
   713 {
   714     unsigned int *row = (unsigned int *) out;
   715     const int next_row = cols + (mod / 2);
   716     int x, y;
   717     int cr_r;
   718     int crb_g;
   719     int cb_b;
   720     int cols_2 = cols / 2;
   721 
   722     y = rows;
   723     while (y--) {
   724         x = cols_2;
   725         while (x--) {
   726             register int L;
   727 
   728             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   729             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   730                 + colortab[*cb + 2 * 256];
   731             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   732             cr += 4;
   733             cb += 4;
   734 
   735             L = *lum;
   736             lum += 2;
   737             row[0] = row[next_row] = (rgb_2_pix[L + cr_r] |
   738                                       rgb_2_pix[L + crb_g] |
   739                                       rgb_2_pix[L + cb_b]);
   740             row++;
   741 
   742             L = *lum;
   743             lum += 2;
   744             row[0] = row[next_row] = (rgb_2_pix[L + cr_r] |
   745                                       rgb_2_pix[L + crb_g] |
   746                                       rgb_2_pix[L + cb_b]);
   747             row++;
   748 
   749         }
   750         row += next_row;
   751     }
   752 }
   753 
   754 static void
   755 Color24DitherYUY2Mod2X(int *colortab, Uint32 * rgb_2_pix,
   756                        unsigned char *lum, unsigned char *cr,
   757                        unsigned char *cb, unsigned char *out,
   758                        int rows, int cols, int mod)
   759 {
   760     unsigned int value;
   761     unsigned char *row = out;
   762     const int next_row = (cols * 2 + mod) * 3;
   763     int x, y;
   764     int cr_r;
   765     int crb_g;
   766     int cb_b;
   767     int cols_2 = cols / 2;
   768     y = rows;
   769     while (y--) {
   770         x = cols_2;
   771         while (x--) {
   772             register int L;
   773 
   774             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   775             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   776                 + colortab[*cb + 2 * 256];
   777             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   778             cr += 4;
   779             cb += 4;
   780 
   781             L = *lum;
   782             lum += 2;
   783             value = (rgb_2_pix[L + cr_r] |
   784                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   785             row[0 + 0] = row[3 + 0] = row[next_row + 0] =
   786                 row[next_row + 3 + 0] = (value) & 0xFF;
   787             row[0 + 1] = row[3 + 1] = row[next_row + 1] =
   788                 row[next_row + 3 + 1] = (value >> 8) & 0xFF;
   789             row[0 + 2] = row[3 + 2] = row[next_row + 2] =
   790                 row[next_row + 3 + 2] = (value >> 16) & 0xFF;
   791             row += 2 * 3;
   792 
   793             L = *lum;
   794             lum += 2;
   795             value = (rgb_2_pix[L + cr_r] |
   796                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   797             row[0 + 0] = row[3 + 0] = row[next_row + 0] =
   798                 row[next_row + 3 + 0] = (value) & 0xFF;
   799             row[0 + 1] = row[3 + 1] = row[next_row + 1] =
   800                 row[next_row + 3 + 1] = (value >> 8) & 0xFF;
   801             row[0 + 2] = row[3 + 2] = row[next_row + 2] =
   802                 row[next_row + 3 + 2] = (value >> 16) & 0xFF;
   803             row += 2 * 3;
   804 
   805         }
   806         row += next_row;
   807     }
   808 }
   809 
   810 static void
   811 Color32DitherYUY2Mod2X(int *colortab, Uint32 * rgb_2_pix,
   812                        unsigned char *lum, unsigned char *cr,
   813                        unsigned char *cb, unsigned char *out,
   814                        int rows, int cols, int mod)
   815 {
   816     unsigned int *row = (unsigned int *) out;
   817     const int next_row = cols * 2 + mod;
   818     int x, y;
   819     int cr_r;
   820     int crb_g;
   821     int cb_b;
   822     int cols_2 = cols / 2;
   823     mod += mod;
   824     y = rows;
   825     while (y--) {
   826         x = cols_2;
   827         while (x--) {
   828             register int L;
   829 
   830             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   831             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   832                 + colortab[*cb + 2 * 256];
   833             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   834             cr += 4;
   835             cb += 4;
   836 
   837             L = *lum;
   838             lum += 2;
   839             row[0] = row[1] = row[next_row] = row[next_row + 1] =
   840                 (rgb_2_pix[L + cr_r] |
   841                  rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   842             row += 2;
   843 
   844             L = *lum;
   845             lum += 2;
   846             row[0] = row[1] = row[next_row] = row[next_row + 1] =
   847                 (rgb_2_pix[L + cr_r] |
   848                  rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   849             row += 2;
   850 
   851 
   852         }
   853 
   854         row += next_row;
   855     }
   856 }
   857 
   858 /*
   859  * How many 1 bits are there in the Uint32.
   860  * Low performance, do not call often.
   861  */
   862 static int
   863 number_of_bits_set(Uint32 a)
   864 {
   865     if (!a)
   866         return 0;
   867     if (a & 1)
   868         return 1 + number_of_bits_set(a >> 1);
   869     return (number_of_bits_set(a >> 1));
   870 }
   871 
   872 /*
   873  * How many 0 bits are there at least significant end of Uint32.
   874  * Low performance, do not call often.
   875  */
   876 static int
   877 free_bits_at_bottom(Uint32 a)
   878 {
   879     /* assume char is 8 bits */
   880     if (!a)
   881         return sizeof(Uint32) * 8;
   882     if (((Sint32) a) & 1l)
   883         return 0;
   884     return 1 + free_bits_at_bottom(a >> 1);
   885 }
   886 
   887 static int
   888 SDL_SW_SetupYUVDisplay(SDL_SW_YUVTexture * swdata, Uint32 target_format)
   889 {
   890     Uint32 *r_2_pix_alloc;
   891     Uint32 *g_2_pix_alloc;
   892     Uint32 *b_2_pix_alloc;
   893     int i;
   894     int bpp;
   895     Uint32 Rmask, Gmask, Bmask, Amask;
   896 
   897     if (!SDL_PixelFormatEnumToMasks
   898         (target_format, &bpp, &Rmask, &Gmask, &Bmask, &Amask) || bpp < 15) {
   899         SDL_SetError("Unsupported YUV destination format");
   900         return -1;
   901     }
   902 
   903     swdata->target_format = target_format;
   904     r_2_pix_alloc = &swdata->rgb_2_pix[0 * 768];
   905     g_2_pix_alloc = &swdata->rgb_2_pix[1 * 768];
   906     b_2_pix_alloc = &swdata->rgb_2_pix[2 * 768];
   907 
   908     /* 
   909      * Set up entries 0-255 in rgb-to-pixel value tables.
   910      */
   911     for (i = 0; i < 256; ++i) {
   912         r_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(Rmask));
   913         r_2_pix_alloc[i + 256] <<= free_bits_at_bottom(Rmask);
   914         r_2_pix_alloc[i + 256] |= Amask;
   915         g_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(Gmask));
   916         g_2_pix_alloc[i + 256] <<= free_bits_at_bottom(Gmask);
   917         g_2_pix_alloc[i + 256] |= Amask;
   918         b_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(Bmask));
   919         b_2_pix_alloc[i + 256] <<= free_bits_at_bottom(Bmask);
   920         b_2_pix_alloc[i + 256] |= Amask;
   921     }
   922 
   923     /*
   924      * If we have 16-bit output depth, then we double the value
   925      * in the top word. This means that we can write out both
   926      * pixels in the pixel doubling mode with one op. It is 
   927      * harmless in the normal case as storing a 32-bit value
   928      * through a short pointer will lose the top bits anyway.
   929      */
   930     if (SDL_BYTESPERPIXEL(target_format) == 2) {
   931         for (i = 0; i < 256; ++i) {
   932             r_2_pix_alloc[i + 256] |= (r_2_pix_alloc[i + 256]) << 16;
   933             g_2_pix_alloc[i + 256] |= (g_2_pix_alloc[i + 256]) << 16;
   934             b_2_pix_alloc[i + 256] |= (b_2_pix_alloc[i + 256]) << 16;
   935         }
   936     }
   937 
   938     /*
   939      * Spread out the values we have to the rest of the array so that
   940      * we do not need to check for overflow.
   941      */
   942     for (i = 0; i < 256; ++i) {
   943         r_2_pix_alloc[i] = r_2_pix_alloc[256];
   944         r_2_pix_alloc[i + 512] = r_2_pix_alloc[511];
   945         g_2_pix_alloc[i] = g_2_pix_alloc[256];
   946         g_2_pix_alloc[i + 512] = g_2_pix_alloc[511];
   947         b_2_pix_alloc[i] = b_2_pix_alloc[256];
   948         b_2_pix_alloc[i + 512] = b_2_pix_alloc[511];
   949     }
   950 
   951     /* You have chosen wisely... */
   952     switch (swdata->format) {
   953     case SDL_PIXELFORMAT_YV12:
   954     case SDL_PIXELFORMAT_IYUV:
   955         if (SDL_BYTESPERPIXEL(target_format) == 2) {
   956 #if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES
   957             /* inline assembly functions */
   958             if (SDL_HasMMX() && (Rmask == 0xF800) &&
   959                 (Gmask == 0x07E0) && (Bmask == 0x001F)
   960                 && (swdata->w & 15) == 0) {
   961 /*printf("Using MMX 16-bit 565 dither\n");*/
   962                 swdata->Display1X = Color565DitherYV12MMX1X;
   963             } else {
   964 /*printf("Using C 16-bit dither\n");*/
   965                 swdata->Display1X = Color16DitherYV12Mod1X;
   966             }
   967 #else
   968             swdata->Display1X = Color16DitherYV12Mod1X;
   969 #endif
   970             swdata->Display2X = Color16DitherYV12Mod2X;
   971         }
   972         if (SDL_BYTESPERPIXEL(target_format) == 3) {
   973             swdata->Display1X = Color24DitherYV12Mod1X;
   974             swdata->Display2X = Color24DitherYV12Mod2X;
   975         }
   976         if (SDL_BYTESPERPIXEL(target_format) == 4) {
   977 #if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES
   978             /* inline assembly functions */
   979             if (SDL_HasMMX() && (Rmask == 0x00FF0000) &&
   980                 (Gmask == 0x0000FF00) &&
   981                 (Bmask == 0x000000FF) && (swdata->w & 15) == 0) {
   982 /*printf("Using MMX 32-bit dither\n");*/
   983                 swdata->Display1X = ColorRGBDitherYV12MMX1X;
   984             } else {
   985 /*printf("Using C 32-bit dither\n");*/
   986                 swdata->Display1X = Color32DitherYV12Mod1X;
   987             }
   988 #else
   989             swdata->Display1X = Color32DitherYV12Mod1X;
   990 #endif
   991             swdata->Display2X = Color32DitherYV12Mod2X;
   992         }
   993         break;
   994     case SDL_PIXELFORMAT_YUY2:
   995     case SDL_PIXELFORMAT_UYVY:
   996     case SDL_PIXELFORMAT_YVYU:
   997         if (SDL_BYTESPERPIXEL(target_format) == 2) {
   998             swdata->Display1X = Color16DitherYUY2Mod1X;
   999             swdata->Display2X = Color16DitherYUY2Mod2X;
  1000         }
  1001         if (SDL_BYTESPERPIXEL(target_format) == 3) {
  1002             swdata->Display1X = Color24DitherYUY2Mod1X;
  1003             swdata->Display2X = Color24DitherYUY2Mod2X;
  1004         }
  1005         if (SDL_BYTESPERPIXEL(target_format) == 4) {
  1006             swdata->Display1X = Color32DitherYUY2Mod1X;
  1007             swdata->Display2X = Color32DitherYUY2Mod2X;
  1008         }
  1009         break;
  1010     default:
  1011         /* We should never get here (caught above) */
  1012         break;
  1013     }
  1014 
  1015     if (swdata->display) {
  1016         SDL_FreeSurface(swdata->display);
  1017         swdata->display = NULL;
  1018     }
  1019     return 0;
  1020 }
  1021 
  1022 SDL_SW_YUVTexture *
  1023 SDL_SW_CreateYUVTexture(Uint32 format, int w, int h)
  1024 {
  1025     SDL_SW_YUVTexture *swdata;
  1026     int *Cr_r_tab;
  1027     int *Cr_g_tab;
  1028     int *Cb_g_tab;
  1029     int *Cb_b_tab;
  1030     int i;
  1031     int CR, CB;
  1032 
  1033     swdata = (SDL_SW_YUVTexture *) SDL_calloc(1, sizeof(*swdata));
  1034     if (!swdata) {
  1035         SDL_OutOfMemory();
  1036         return NULL;
  1037     }
  1038 
  1039     switch (format) {
  1040     case SDL_PIXELFORMAT_YV12:
  1041     case SDL_PIXELFORMAT_IYUV:
  1042     case SDL_PIXELFORMAT_YUY2:
  1043     case SDL_PIXELFORMAT_UYVY:
  1044     case SDL_PIXELFORMAT_YVYU:
  1045         break;
  1046     default:
  1047         SDL_SetError("Unsupported YUV format");
  1048         return NULL;
  1049     }
  1050 
  1051     swdata->format = format;
  1052     swdata->target_format = SDL_PIXELFORMAT_UNKNOWN;
  1053     swdata->w = w;
  1054     swdata->h = h;
  1055     swdata->pixels = (Uint8 *) SDL_malloc(w * h * 2);
  1056     swdata->colortab = (int *) SDL_malloc(4 * 256 * sizeof(int));
  1057     swdata->rgb_2_pix = (Uint32 *) SDL_malloc(3 * 768 * sizeof(Uint32));
  1058     if (!swdata->pixels || !swdata->colortab || !swdata->rgb_2_pix) {
  1059         SDL_OutOfMemory();
  1060         SDL_SW_DestroyYUVTexture(swdata);
  1061         return NULL;
  1062     }
  1063 
  1064     /* Generate the tables for the display surface */
  1065     Cr_r_tab = &swdata->colortab[0 * 256];
  1066     Cr_g_tab = &swdata->colortab[1 * 256];
  1067     Cb_g_tab = &swdata->colortab[2 * 256];
  1068     Cb_b_tab = &swdata->colortab[3 * 256];
  1069     for (i = 0; i < 256; i++) {
  1070         /* Gamma correction (luminescence table) and chroma correction
  1071            would be done here.  See the Berkeley mpeg_play sources.
  1072          */
  1073         CB = CR = (i - 128);
  1074         Cr_r_tab[i] = (int) ((0.419 / 0.299) * CR);
  1075         Cr_g_tab[i] = (int) (-(0.299 / 0.419) * CR);
  1076         Cb_g_tab[i] = (int) (-(0.114 / 0.331) * CB);
  1077         Cb_b_tab[i] = (int) ((0.587 / 0.331) * CB);
  1078     }
  1079 
  1080     /* Find the pitch and offset values for the overlay */
  1081     switch (format) {
  1082     case SDL_PIXELFORMAT_YV12:
  1083     case SDL_PIXELFORMAT_IYUV:
  1084         swdata->pitches[0] = w;
  1085         swdata->pitches[1] = swdata->pitches[0] / 2;
  1086         swdata->pitches[2] = swdata->pitches[0] / 2;
  1087         swdata->planes[0] = swdata->pixels;
  1088         swdata->planes[1] = swdata->planes[0] + swdata->pitches[0] * h;
  1089         swdata->planes[2] = swdata->planes[1] + swdata->pitches[1] * h / 2;
  1090         break;
  1091     case SDL_PIXELFORMAT_YUY2:
  1092     case SDL_PIXELFORMAT_UYVY:
  1093     case SDL_PIXELFORMAT_YVYU:
  1094         swdata->pitches[0] = w * 2;
  1095         swdata->planes[0] = swdata->pixels;
  1096         break;
  1097     default:
  1098         /* We should never get here (caught above) */
  1099         break;
  1100     }
  1101 
  1102     /* We're all done.. */
  1103     return (swdata);
  1104 }
  1105 
  1106 int
  1107 SDL_SW_QueryYUVTexturePixels(SDL_SW_YUVTexture * swdata, void **pixels,
  1108                              int *pitch)
  1109 {
  1110     *pixels = swdata->planes[0];
  1111     *pitch = swdata->pitches[0];
  1112     return 0;
  1113 }
  1114 
  1115 int
  1116 SDL_SW_UpdateYUVTexture(SDL_SW_YUVTexture * swdata, const SDL_Rect * rect,
  1117                         const void *pixels, int pitch)
  1118 {
  1119     switch (swdata->format) {
  1120     case SDL_PIXELFORMAT_YV12:
  1121     case SDL_PIXELFORMAT_IYUV:
  1122         if (rect->x == 0 && rect->y == 0 &&
  1123             rect->w == swdata->w && rect->h == swdata->h) {
  1124                 SDL_memcpy(swdata->pixels, pixels,
  1125                            (swdata->h * swdata->w) + (swdata->h * swdata->w) / 2);
  1126         } else {
  1127             Uint8 *src, *dst;
  1128             int row;
  1129             size_t length;
  1130 
  1131             /* Copy the Y plane */
  1132             src = (Uint8 *) pixels;
  1133             dst = swdata->pixels + rect->y * swdata->w + rect->x;
  1134             length = rect->w;
  1135             for (row = 0; row < rect->h; ++row) {
  1136                 SDL_memcpy(dst, src, length);
  1137                 src += pitch;
  1138                 dst += swdata->w;
  1139             }
  1140 
  1141             /* Copy the next plane */
  1142             src = (Uint8 *) pixels + rect->h * pitch;
  1143             dst = swdata->pixels + swdata->h * swdata->w;
  1144             dst += rect->y/2 * swdata->w/2 + rect->x/2;
  1145             length = rect->w / 2;
  1146             for (row = 0; row < rect->h/2; ++row) {
  1147                 SDL_memcpy(dst, src, length);
  1148                 src += pitch/2;
  1149                 dst += swdata->w/2;
  1150             }
  1151 
  1152             /* Copy the next plane */
  1153             src = (Uint8 *) pixels + rect->h * pitch + (rect->h * pitch) / 4;
  1154             dst = swdata->pixels + swdata->h * swdata->w +
  1155                   (swdata->h * swdata->w) / 4;
  1156             dst += rect->y/2 * swdata->w/2 + rect->x/2;
  1157             length = rect->w / 2;
  1158             for (row = 0; row < rect->h/2; ++row) {
  1159                 SDL_memcpy(dst, src, length);
  1160                 src += pitch/2;
  1161                 dst += swdata->w/2;
  1162             }
  1163         }
  1164         break;
  1165     case SDL_PIXELFORMAT_YUY2:
  1166     case SDL_PIXELFORMAT_UYVY:
  1167     case SDL_PIXELFORMAT_YVYU:
  1168         {
  1169             Uint8 *src, *dst;
  1170             int row;
  1171             size_t length;
  1172 
  1173             src = (Uint8 *) pixels;
  1174             dst =
  1175                 swdata->planes[0] + rect->y * swdata->pitches[0] +
  1176                 rect->x * 2;
  1177             length = rect->w * 2;
  1178             for (row = 0; row < rect->h; ++row) {
  1179                 SDL_memcpy(dst, src, length);
  1180                 src += pitch;
  1181                 dst += swdata->pitches[0];
  1182             }
  1183         }
  1184         break;
  1185     }
  1186     return 0;
  1187 }
  1188 
  1189 int
  1190 SDL_SW_LockYUVTexture(SDL_SW_YUVTexture * swdata, const SDL_Rect * rect,
  1191                       void **pixels, int *pitch)
  1192 {
  1193     switch (swdata->format) {
  1194     case SDL_PIXELFORMAT_YV12:
  1195     case SDL_PIXELFORMAT_IYUV:
  1196         if (rect
  1197             && (rect->x != 0 || rect->y != 0 || rect->w != swdata->w
  1198                 || rect->h != swdata->h)) {
  1199             SDL_SetError
  1200                 ("YV12 and IYUV textures only support full surface locks");
  1201             return -1;
  1202         }
  1203         break;
  1204     }
  1205 
  1206     *pixels = swdata->planes[0] + rect->y * swdata->pitches[0] + rect->x * 2;
  1207     *pitch = swdata->pitches[0];
  1208     return 0;
  1209 }
  1210 
  1211 void
  1212 SDL_SW_UnlockYUVTexture(SDL_SW_YUVTexture * swdata)
  1213 {
  1214 }
  1215 
  1216 int
  1217 SDL_SW_CopyYUVToRGB(SDL_SW_YUVTexture * swdata, const SDL_Rect * srcrect,
  1218                     Uint32 target_format, int w, int h, void *pixels,
  1219                     int pitch)
  1220 {
  1221     int stretch;
  1222     int scale_2x;
  1223     Uint8 *lum, *Cr, *Cb;
  1224     int mod;
  1225 
  1226     /* Make sure we're set up to display in the desired format */
  1227     if (target_format != swdata->target_format) {
  1228         if (SDL_SW_SetupYUVDisplay(swdata, target_format) < 0) {
  1229             return -1;
  1230         }
  1231     }
  1232 
  1233     stretch = 0;
  1234     scale_2x = 0;
  1235     if (srcrect->x || srcrect->y || srcrect->w < swdata->w
  1236         || srcrect->h < swdata->h) {
  1237         /* The source rectangle has been clipped.
  1238            Using a scratch surface is easier than adding clipped
  1239            source support to all the blitters, plus that would
  1240            slow them down in the general unclipped case.
  1241          */
  1242         stretch = 1;
  1243     } else if ((srcrect->w != w) || (srcrect->h != h)) {
  1244         if ((w == 2 * srcrect->w) && (h == 2 * srcrect->h)) {
  1245             scale_2x = 1;
  1246         } else {
  1247             stretch = 1;
  1248         }
  1249     }
  1250     if (stretch) {
  1251         int bpp;
  1252         Uint32 Rmask, Gmask, Bmask, Amask;
  1253 
  1254         if (swdata->display) {
  1255             swdata->display->w = w;
  1256             swdata->display->h = h;
  1257             swdata->display->pixels = pixels;
  1258             swdata->display->pitch = pitch;
  1259         } else {
  1260             /* This must have succeeded in SDL_SW_SetupYUVDisplay() earlier */
  1261             SDL_PixelFormatEnumToMasks(target_format, &bpp, &Rmask, &Gmask,
  1262                                        &Bmask, &Amask);
  1263             swdata->display =
  1264                 SDL_CreateRGBSurfaceFrom(pixels, w, h, bpp, pitch, Rmask,
  1265                                          Gmask, Bmask, Amask);
  1266             if (!swdata->display) {
  1267                 return (-1);
  1268             }
  1269         }
  1270         if (!swdata->stretch) {
  1271             /* This must have succeeded in SDL_SW_SetupYUVDisplay() earlier */
  1272             SDL_PixelFormatEnumToMasks(target_format, &bpp, &Rmask, &Gmask,
  1273                                        &Bmask, &Amask);
  1274             swdata->stretch =
  1275                 SDL_CreateRGBSurface(0, swdata->w, swdata->h, bpp, Rmask,
  1276                                      Gmask, Bmask, Amask);
  1277             if (!swdata->stretch) {
  1278                 return (-1);
  1279             }
  1280         }
  1281         pixels = swdata->stretch->pixels;
  1282         pitch = swdata->stretch->pitch;
  1283     }
  1284     switch (swdata->format) {
  1285     case SDL_PIXELFORMAT_YV12:
  1286         lum = swdata->planes[0];
  1287         Cr = swdata->planes[1];
  1288         Cb = swdata->planes[2];
  1289         break;
  1290     case SDL_PIXELFORMAT_IYUV:
  1291         lum = swdata->planes[0];
  1292         Cr = swdata->planes[2];
  1293         Cb = swdata->planes[1];
  1294         break;
  1295     case SDL_PIXELFORMAT_YUY2:
  1296         lum = swdata->planes[0];
  1297         Cr = lum + 3;
  1298         Cb = lum + 1;
  1299         break;
  1300     case SDL_PIXELFORMAT_UYVY:
  1301         lum = swdata->planes[0] + 1;
  1302         Cr = lum + 1;
  1303         Cb = lum - 1;
  1304         break;
  1305     case SDL_PIXELFORMAT_YVYU:
  1306         lum = swdata->planes[0];
  1307         Cr = lum + 1;
  1308         Cb = lum + 3;
  1309         break;
  1310     default:
  1311         SDL_SetError("Unsupported YUV format in copy");
  1312         return (-1);
  1313     }
  1314     mod = (pitch / SDL_BYTESPERPIXEL(target_format));
  1315 
  1316     if (scale_2x) {
  1317         mod -= (swdata->w * 2);
  1318         swdata->Display2X(swdata->colortab, swdata->rgb_2_pix,
  1319                           lum, Cr, Cb, pixels, swdata->h, swdata->w, mod);
  1320     } else {
  1321         mod -= swdata->w;
  1322         swdata->Display1X(swdata->colortab, swdata->rgb_2_pix,
  1323                           lum, Cr, Cb, pixels, swdata->h, swdata->w, mod);
  1324     }
  1325     if (stretch) {
  1326         SDL_Rect rect = *srcrect;
  1327         SDL_SoftStretch(swdata->stretch, &rect, swdata->display, NULL);
  1328     }
  1329     return 0;
  1330 }
  1331 
  1332 void
  1333 SDL_SW_DestroyYUVTexture(SDL_SW_YUVTexture * swdata)
  1334 {
  1335     if (swdata) {
  1336         if (swdata->pixels) {
  1337             SDL_free(swdata->pixels);
  1338         }
  1339         if (swdata->colortab) {
  1340             SDL_free(swdata->colortab);
  1341         }
  1342         if (swdata->rgb_2_pix) {
  1343             SDL_free(swdata->rgb_2_pix);
  1344         }
  1345         if (swdata->stretch) {
  1346             SDL_FreeSurface(swdata->stretch);
  1347         }
  1348         if (swdata->display) {
  1349             SDL_FreeSurface(swdata->display);
  1350         }
  1351         SDL_free(swdata);
  1352     }
  1353 }
  1354 
  1355 /* vi: set ts=4 sw=4 expandtab: */