src/render/SDL_yuv_sw.c
author Sam Lantinga <slouken@libsdl.org>
Thu, 29 Aug 2013 08:29:21 -0700
changeset 7719 31b5f9ff36ca
parent 7678 286c42d7c5ed
child 7720 f9a649383362
permissions -rw-r--r--
Christoph Mallon: Remove pointless if (x) before SDL_free(x)
     1 /*
     2   Simple DirectMedia Layer
     3   Copyright (C) 1997-2013 Sam Lantinga <slouken@libsdl.org>
     4 
     5   This software is provided 'as-is', without any express or implied
     6   warranty.  In no event will the authors be held liable for any damages
     7   arising from the use of this software.
     8 
     9   Permission is granted to anyone to use this software for any purpose,
    10   including commercial applications, and to alter it and redistribute it
    11   freely, subject to the following restrictions:
    12 
    13   1. The origin of this software must not be misrepresented; you must not
    14      claim that you wrote the original software. If you use this software
    15      in a product, an acknowledgment in the product documentation would be
    16      appreciated but is not required.
    17   2. Altered source versions must be plainly marked as such, and must not be
    18      misrepresented as being the original software.
    19   3. This notice may not be removed or altered from any source distribution.
    20 */
    21 #include "SDL_config.h"
    22 
    23 /* This is the software implementation of the YUV texture support */
    24 
    25 /* This code was derived from code carrying the following copyright notices:
    26 
    27  * Copyright (c) 1995 The Regents of the University of California.
    28  * All rights reserved.
    29  *
    30  * Permission to use, copy, modify, and distribute this software and its
    31  * documentation for any purpose, without fee, and without written agreement is
    32  * hereby granted, provided that the above copyright notice and the following
    33  * two paragraphs appear in all copies of this software.
    34  *
    35  * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR
    36  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
    37  * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF
    38  * CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    39  *
    40  * THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES,
    41  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
    42  * AND FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
    43  * ON AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATION TO
    44  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
    45 
    46  * Copyright (c) 1995 Erik Corry
    47  * All rights reserved.
    48  *
    49  * Permission to use, copy, modify, and distribute this software and its
    50  * documentation for any purpose, without fee, and without written agreement is
    51  * hereby granted, provided that the above copyright notice and the following
    52  * two paragraphs appear in all copies of this software.
    53  *
    54  * IN NO EVENT SHALL ERIK CORRY BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
    55  * SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OF
    56  * THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF ERIK CORRY HAS BEEN ADVISED
    57  * OF THE POSSIBILITY OF SUCH DAMAGE.
    58  *
    59  * ERIK CORRY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
    60  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
    61  * PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
    62  * BASIS, AND ERIK CORRY HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT,
    63  * UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
    64 
    65  * Portions of this software Copyright (c) 1995 Brown University.
    66  * All rights reserved.
    67  *
    68  * Permission to use, copy, modify, and distribute this software and its
    69  * documentation for any purpose, without fee, and without written agreement
    70  * is hereby granted, provided that the above copyright notice and the
    71  * following two paragraphs appear in all copies of this software.
    72  *
    73  * IN NO EVENT SHALL BROWN UNIVERSITY BE LIABLE TO ANY PARTY FOR
    74  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
    75  * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF BROWN
    76  * UNIVERSITY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    77  *
    78  * BROWN UNIVERSITY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
    79  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
    80  * PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
    81  * BASIS, AND BROWN UNIVERSITY HAS NO OBLIGATION TO PROVIDE MAINTENANCE,
    82  * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
    83  */
    84 
    85 #include "SDL_assert.h"
    86 #include "SDL_video.h"
    87 #include "SDL_cpuinfo.h"
    88 #include "SDL_yuv_sw_c.h"
    89 
    90 
    91 /* The colorspace conversion functions */
    92 
    93 #if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES
    94 extern void Color565DitherYV12MMX1X(int *colortab, Uint32 * rgb_2_pix,
    95                                     unsigned char *lum, unsigned char *cr,
    96                                     unsigned char *cb, unsigned char *out,
    97                                     int rows, int cols, int mod);
    98 extern void ColorRGBDitherYV12MMX1X(int *colortab, Uint32 * rgb_2_pix,
    99                                     unsigned char *lum, unsigned char *cr,
   100                                     unsigned char *cb, unsigned char *out,
   101                                     int rows, int cols, int mod);
   102 #endif
   103 
   104 static void
   105 Color16DitherYV12Mod1X(int *colortab, Uint32 * rgb_2_pix,
   106                        unsigned char *lum, unsigned char *cr,
   107                        unsigned char *cb, unsigned char *out,
   108                        int rows, int cols, int mod)
   109 {
   110     unsigned short *row1;
   111     unsigned short *row2;
   112     unsigned char *lum2;
   113     int x, y;
   114     int cr_r;
   115     int crb_g;
   116     int cb_b;
   117     int cols_2 = cols / 2;
   118 
   119     row1 = (unsigned short *) out;
   120     row2 = row1 + cols + mod;
   121     lum2 = lum + cols;
   122 
   123     mod += cols + mod;
   124 
   125     y = rows / 2;
   126     while (y--) {
   127         x = cols_2;
   128         while (x--) {
   129             register int L;
   130 
   131             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   132             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   133                 + colortab[*cb + 2 * 256];
   134             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   135             ++cr;
   136             ++cb;
   137 
   138             L = *lum++;
   139             *row1++ = (unsigned short) (rgb_2_pix[L + cr_r] |
   140                                         rgb_2_pix[L + crb_g] |
   141                                         rgb_2_pix[L + cb_b]);
   142 
   143             L = *lum++;
   144             *row1++ = (unsigned short) (rgb_2_pix[L + cr_r] |
   145                                         rgb_2_pix[L + crb_g] |
   146                                         rgb_2_pix[L + cb_b]);
   147 
   148 
   149             /* Now, do second row.  */
   150 
   151             L = *lum2++;
   152             *row2++ = (unsigned short) (rgb_2_pix[L + cr_r] |
   153                                         rgb_2_pix[L + crb_g] |
   154                                         rgb_2_pix[L + cb_b]);
   155 
   156             L = *lum2++;
   157             *row2++ = (unsigned short) (rgb_2_pix[L + cr_r] |
   158                                         rgb_2_pix[L + crb_g] |
   159                                         rgb_2_pix[L + cb_b]);
   160         }
   161 
   162         /*
   163          * These values are at the start of the next line, (due
   164          * to the ++'s above),but they need to be at the start
   165          * of the line after that.
   166          */
   167         lum += cols;
   168         lum2 += cols;
   169         row1 += mod;
   170         row2 += mod;
   171     }
   172 }
   173 
   174 static void
   175 Color24DitherYV12Mod1X(int *colortab, Uint32 * rgb_2_pix,
   176                        unsigned char *lum, unsigned char *cr,
   177                        unsigned char *cb, unsigned char *out,
   178                        int rows, int cols, int mod)
   179 {
   180     unsigned int value;
   181     unsigned char *row1;
   182     unsigned char *row2;
   183     unsigned char *lum2;
   184     int x, y;
   185     int cr_r;
   186     int crb_g;
   187     int cb_b;
   188     int cols_2 = cols / 2;
   189 
   190     row1 = out;
   191     row2 = row1 + cols * 3 + mod * 3;
   192     lum2 = lum + cols;
   193 
   194     mod += cols + mod;
   195     mod *= 3;
   196 
   197     y = rows / 2;
   198     while (y--) {
   199         x = cols_2;
   200         while (x--) {
   201             register int L;
   202 
   203             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   204             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   205                 + colortab[*cb + 2 * 256];
   206             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   207             ++cr;
   208             ++cb;
   209 
   210             L = *lum++;
   211             value = (rgb_2_pix[L + cr_r] |
   212                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   213             *row1++ = (value) & 0xFF;
   214             *row1++ = (value >> 8) & 0xFF;
   215             *row1++ = (value >> 16) & 0xFF;
   216 
   217             L = *lum++;
   218             value = (rgb_2_pix[L + cr_r] |
   219                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   220             *row1++ = (value) & 0xFF;
   221             *row1++ = (value >> 8) & 0xFF;
   222             *row1++ = (value >> 16) & 0xFF;
   223 
   224 
   225             /* Now, do second row.  */
   226 
   227             L = *lum2++;
   228             value = (rgb_2_pix[L + cr_r] |
   229                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   230             *row2++ = (value) & 0xFF;
   231             *row2++ = (value >> 8) & 0xFF;
   232             *row2++ = (value >> 16) & 0xFF;
   233 
   234             L = *lum2++;
   235             value = (rgb_2_pix[L + cr_r] |
   236                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   237             *row2++ = (value) & 0xFF;
   238             *row2++ = (value >> 8) & 0xFF;
   239             *row2++ = (value >> 16) & 0xFF;
   240         }
   241 
   242         /*
   243          * These values are at the start of the next line, (due
   244          * to the ++'s above),but they need to be at the start
   245          * of the line after that.
   246          */
   247         lum += cols;
   248         lum2 += cols;
   249         row1 += mod;
   250         row2 += mod;
   251     }
   252 }
   253 
   254 static void
   255 Color32DitherYV12Mod1X(int *colortab, Uint32 * rgb_2_pix,
   256                        unsigned char *lum, unsigned char *cr,
   257                        unsigned char *cb, unsigned char *out,
   258                        int rows, int cols, int mod)
   259 {
   260     unsigned int *row1;
   261     unsigned int *row2;
   262     unsigned char *lum2;
   263     int x, y;
   264     int cr_r;
   265     int crb_g;
   266     int cb_b;
   267     int cols_2 = cols / 2;
   268 
   269     row1 = (unsigned int *) out;
   270     row2 = row1 + cols + mod;
   271     lum2 = lum + cols;
   272 
   273     mod += cols + mod;
   274 
   275     y = rows / 2;
   276     while (y--) {
   277         x = cols_2;
   278         while (x--) {
   279             register int L;
   280 
   281             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   282             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   283                 + colortab[*cb + 2 * 256];
   284             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   285             ++cr;
   286             ++cb;
   287 
   288             L = *lum++;
   289             *row1++ = (rgb_2_pix[L + cr_r] |
   290                        rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   291 
   292             L = *lum++;
   293             *row1++ = (rgb_2_pix[L + cr_r] |
   294                        rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   295 
   296 
   297             /* Now, do second row.  */
   298 
   299             L = *lum2++;
   300             *row2++ = (rgb_2_pix[L + cr_r] |
   301                        rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   302 
   303             L = *lum2++;
   304             *row2++ = (rgb_2_pix[L + cr_r] |
   305                        rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   306         }
   307 
   308         /*
   309          * These values are at the start of the next line, (due
   310          * to the ++'s above),but they need to be at the start
   311          * of the line after that.
   312          */
   313         lum += cols;
   314         lum2 += cols;
   315         row1 += mod;
   316         row2 += mod;
   317     }
   318 }
   319 
   320 /*
   321  * In this function I make use of a nasty trick. The tables have the lower
   322  * 16 bits replicated in the upper 16. This means I can write ints and get
   323  * the horisontal doubling for free (almost).
   324  */
   325 static void
   326 Color16DitherYV12Mod2X(int *colortab, Uint32 * rgb_2_pix,
   327                        unsigned char *lum, unsigned char *cr,
   328                        unsigned char *cb, unsigned char *out,
   329                        int rows, int cols, int mod)
   330 {
   331     unsigned int *row1 = (unsigned int *) out;
   332     const int next_row = cols + (mod / 2);
   333     unsigned int *row2 = row1 + 2 * next_row;
   334     unsigned char *lum2;
   335     int x, y;
   336     int cr_r;
   337     int crb_g;
   338     int cb_b;
   339     int cols_2 = cols / 2;
   340 
   341     lum2 = lum + cols;
   342 
   343     mod = (next_row * 3) + (mod / 2);
   344 
   345     y = rows / 2;
   346     while (y--) {
   347         x = cols_2;
   348         while (x--) {
   349             register int L;
   350 
   351             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   352             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   353                 + colortab[*cb + 2 * 256];
   354             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   355             ++cr;
   356             ++cb;
   357 
   358             L = *lum++;
   359             row1[0] = row1[next_row] = (rgb_2_pix[L + cr_r] |
   360                                         rgb_2_pix[L + crb_g] |
   361                                         rgb_2_pix[L + cb_b]);
   362             row1++;
   363 
   364             L = *lum++;
   365             row1[0] = row1[next_row] = (rgb_2_pix[L + cr_r] |
   366                                         rgb_2_pix[L + crb_g] |
   367                                         rgb_2_pix[L + cb_b]);
   368             row1++;
   369 
   370 
   371             /* Now, do second row. */
   372 
   373             L = *lum2++;
   374             row2[0] = row2[next_row] = (rgb_2_pix[L + cr_r] |
   375                                         rgb_2_pix[L + crb_g] |
   376                                         rgb_2_pix[L + cb_b]);
   377             row2++;
   378 
   379             L = *lum2++;
   380             row2[0] = row2[next_row] = (rgb_2_pix[L + cr_r] |
   381                                         rgb_2_pix[L + crb_g] |
   382                                         rgb_2_pix[L + cb_b]);
   383             row2++;
   384         }
   385 
   386         /*
   387          * These values are at the start of the next line, (due
   388          * to the ++'s above),but they need to be at the start
   389          * of the line after that.
   390          */
   391         lum += cols;
   392         lum2 += cols;
   393         row1 += mod;
   394         row2 += mod;
   395     }
   396 }
   397 
   398 static void
   399 Color24DitherYV12Mod2X(int *colortab, Uint32 * rgb_2_pix,
   400                        unsigned char *lum, unsigned char *cr,
   401                        unsigned char *cb, unsigned char *out,
   402                        int rows, int cols, int mod)
   403 {
   404     unsigned int value;
   405     unsigned char *row1 = out;
   406     const int next_row = (cols * 2 + mod) * 3;
   407     unsigned char *row2 = row1 + 2 * next_row;
   408     unsigned char *lum2;
   409     int x, y;
   410     int cr_r;
   411     int crb_g;
   412     int cb_b;
   413     int cols_2 = cols / 2;
   414 
   415     lum2 = lum + cols;
   416 
   417     mod = next_row * 3 + mod * 3;
   418 
   419     y = rows / 2;
   420     while (y--) {
   421         x = cols_2;
   422         while (x--) {
   423             register int L;
   424 
   425             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   426             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   427                 + colortab[*cb + 2 * 256];
   428             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   429             ++cr;
   430             ++cb;
   431 
   432             L = *lum++;
   433             value = (rgb_2_pix[L + cr_r] |
   434                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   435             row1[0 + 0] = row1[3 + 0] = row1[next_row + 0] =
   436                 row1[next_row + 3 + 0] = (value) & 0xFF;
   437             row1[0 + 1] = row1[3 + 1] = row1[next_row + 1] =
   438                 row1[next_row + 3 + 1] = (value >> 8) & 0xFF;
   439             row1[0 + 2] = row1[3 + 2] = row1[next_row + 2] =
   440                 row1[next_row + 3 + 2] = (value >> 16) & 0xFF;
   441             row1 += 2 * 3;
   442 
   443             L = *lum++;
   444             value = (rgb_2_pix[L + cr_r] |
   445                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   446             row1[0 + 0] = row1[3 + 0] = row1[next_row + 0] =
   447                 row1[next_row + 3 + 0] = (value) & 0xFF;
   448             row1[0 + 1] = row1[3 + 1] = row1[next_row + 1] =
   449                 row1[next_row + 3 + 1] = (value >> 8) & 0xFF;
   450             row1[0 + 2] = row1[3 + 2] = row1[next_row + 2] =
   451                 row1[next_row + 3 + 2] = (value >> 16) & 0xFF;
   452             row1 += 2 * 3;
   453 
   454 
   455             /* Now, do second row. */
   456 
   457             L = *lum2++;
   458             value = (rgb_2_pix[L + cr_r] |
   459                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   460             row2[0 + 0] = row2[3 + 0] = row2[next_row + 0] =
   461                 row2[next_row + 3 + 0] = (value) & 0xFF;
   462             row2[0 + 1] = row2[3 + 1] = row2[next_row + 1] =
   463                 row2[next_row + 3 + 1] = (value >> 8) & 0xFF;
   464             row2[0 + 2] = row2[3 + 2] = row2[next_row + 2] =
   465                 row2[next_row + 3 + 2] = (value >> 16) & 0xFF;
   466             row2 += 2 * 3;
   467 
   468             L = *lum2++;
   469             value = (rgb_2_pix[L + cr_r] |
   470                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   471             row2[0 + 0] = row2[3 + 0] = row2[next_row + 0] =
   472                 row2[next_row + 3 + 0] = (value) & 0xFF;
   473             row2[0 + 1] = row2[3 + 1] = row2[next_row + 1] =
   474                 row2[next_row + 3 + 1] = (value >> 8) & 0xFF;
   475             row2[0 + 2] = row2[3 + 2] = row2[next_row + 2] =
   476                 row2[next_row + 3 + 2] = (value >> 16) & 0xFF;
   477             row2 += 2 * 3;
   478         }
   479 
   480         /*
   481          * These values are at the start of the next line, (due
   482          * to the ++'s above),but they need to be at the start
   483          * of the line after that.
   484          */
   485         lum += cols;
   486         lum2 += cols;
   487         row1 += mod;
   488         row2 += mod;
   489     }
   490 }
   491 
   492 static void
   493 Color32DitherYV12Mod2X(int *colortab, Uint32 * rgb_2_pix,
   494                        unsigned char *lum, unsigned char *cr,
   495                        unsigned char *cb, unsigned char *out,
   496                        int rows, int cols, int mod)
   497 {
   498     unsigned int *row1 = (unsigned int *) out;
   499     const int next_row = cols * 2 + mod;
   500     unsigned int *row2 = row1 + 2 * next_row;
   501     unsigned char *lum2;
   502     int x, y;
   503     int cr_r;
   504     int crb_g;
   505     int cb_b;
   506     int cols_2 = cols / 2;
   507 
   508     lum2 = lum + cols;
   509 
   510     mod = (next_row * 3) + mod;
   511 
   512     y = rows / 2;
   513     while (y--) {
   514         x = cols_2;
   515         while (x--) {
   516             register int L;
   517 
   518             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   519             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   520                 + colortab[*cb + 2 * 256];
   521             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   522             ++cr;
   523             ++cb;
   524 
   525             L = *lum++;
   526             row1[0] = row1[1] = row1[next_row] = row1[next_row + 1] =
   527                 (rgb_2_pix[L + cr_r] |
   528                  rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   529             row1 += 2;
   530 
   531             L = *lum++;
   532             row1[0] = row1[1] = row1[next_row] = row1[next_row + 1] =
   533                 (rgb_2_pix[L + cr_r] |
   534                  rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   535             row1 += 2;
   536 
   537 
   538             /* Now, do second row. */
   539 
   540             L = *lum2++;
   541             row2[0] = row2[1] = row2[next_row] = row2[next_row + 1] =
   542                 (rgb_2_pix[L + cr_r] |
   543                  rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   544             row2 += 2;
   545 
   546             L = *lum2++;
   547             row2[0] = row2[1] = row2[next_row] = row2[next_row + 1] =
   548                 (rgb_2_pix[L + cr_r] |
   549                  rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   550             row2 += 2;
   551         }
   552 
   553         /*
   554          * These values are at the start of the next line, (due
   555          * to the ++'s above),but they need to be at the start
   556          * of the line after that.
   557          */
   558         lum += cols;
   559         lum2 += cols;
   560         row1 += mod;
   561         row2 += mod;
   562     }
   563 }
   564 
   565 static void
   566 Color16DitherYUY2Mod1X(int *colortab, Uint32 * rgb_2_pix,
   567                        unsigned char *lum, unsigned char *cr,
   568                        unsigned char *cb, unsigned char *out,
   569                        int rows, int cols, int mod)
   570 {
   571     unsigned short *row;
   572     int x, y;
   573     int cr_r;
   574     int crb_g;
   575     int cb_b;
   576     int cols_2 = cols / 2;
   577 
   578     row = (unsigned short *) out;
   579 
   580     y = rows;
   581     while (y--) {
   582         x = cols_2;
   583         while (x--) {
   584             register int L;
   585 
   586             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   587             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   588                 + colortab[*cb + 2 * 256];
   589             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   590             cr += 4;
   591             cb += 4;
   592 
   593             L = *lum;
   594             lum += 2;
   595             *row++ = (unsigned short) (rgb_2_pix[L + cr_r] |
   596                                        rgb_2_pix[L + crb_g] |
   597                                        rgb_2_pix[L + cb_b]);
   598 
   599             L = *lum;
   600             lum += 2;
   601             *row++ = (unsigned short) (rgb_2_pix[L + cr_r] |
   602                                        rgb_2_pix[L + crb_g] |
   603                                        rgb_2_pix[L + cb_b]);
   604 
   605         }
   606 
   607         row += mod;
   608     }
   609 }
   610 
   611 static void
   612 Color24DitherYUY2Mod1X(int *colortab, Uint32 * rgb_2_pix,
   613                        unsigned char *lum, unsigned char *cr,
   614                        unsigned char *cb, unsigned char *out,
   615                        int rows, int cols, int mod)
   616 {
   617     unsigned int value;
   618     unsigned char *row;
   619     int x, y;
   620     int cr_r;
   621     int crb_g;
   622     int cb_b;
   623     int cols_2 = cols / 2;
   624 
   625     row = (unsigned char *) out;
   626     mod *= 3;
   627     y = rows;
   628     while (y--) {
   629         x = cols_2;
   630         while (x--) {
   631             register int L;
   632 
   633             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   634             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   635                 + colortab[*cb + 2 * 256];
   636             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   637             cr += 4;
   638             cb += 4;
   639 
   640             L = *lum;
   641             lum += 2;
   642             value = (rgb_2_pix[L + cr_r] |
   643                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   644             *row++ = (value) & 0xFF;
   645             *row++ = (value >> 8) & 0xFF;
   646             *row++ = (value >> 16) & 0xFF;
   647 
   648             L = *lum;
   649             lum += 2;
   650             value = (rgb_2_pix[L + cr_r] |
   651                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   652             *row++ = (value) & 0xFF;
   653             *row++ = (value >> 8) & 0xFF;
   654             *row++ = (value >> 16) & 0xFF;
   655 
   656         }
   657         row += mod;
   658     }
   659 }
   660 
   661 static void
   662 Color32DitherYUY2Mod1X(int *colortab, Uint32 * rgb_2_pix,
   663                        unsigned char *lum, unsigned char *cr,
   664                        unsigned char *cb, unsigned char *out,
   665                        int rows, int cols, int mod)
   666 {
   667     unsigned int *row;
   668     int x, y;
   669     int cr_r;
   670     int crb_g;
   671     int cb_b;
   672     int cols_2 = cols / 2;
   673 
   674     row = (unsigned int *) out;
   675     y = rows;
   676     while (y--) {
   677         x = cols_2;
   678         while (x--) {
   679             register int L;
   680 
   681             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   682             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   683                 + colortab[*cb + 2 * 256];
   684             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   685             cr += 4;
   686             cb += 4;
   687 
   688             L = *lum;
   689             lum += 2;
   690             *row++ = (rgb_2_pix[L + cr_r] |
   691                       rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   692 
   693             L = *lum;
   694             lum += 2;
   695             *row++ = (rgb_2_pix[L + cr_r] |
   696                       rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   697 
   698 
   699         }
   700         row += mod;
   701     }
   702 }
   703 
   704 /*
   705  * In this function I make use of a nasty trick. The tables have the lower
   706  * 16 bits replicated in the upper 16. This means I can write ints and get
   707  * the horisontal doubling for free (almost).
   708  */
   709 static void
   710 Color16DitherYUY2Mod2X(int *colortab, Uint32 * rgb_2_pix,
   711                        unsigned char *lum, unsigned char *cr,
   712                        unsigned char *cb, unsigned char *out,
   713                        int rows, int cols, int mod)
   714 {
   715     unsigned int *row = (unsigned int *) out;
   716     const int next_row = cols + (mod / 2);
   717     int x, y;
   718     int cr_r;
   719     int crb_g;
   720     int cb_b;
   721     int cols_2 = cols / 2;
   722 
   723     y = rows;
   724     while (y--) {
   725         x = cols_2;
   726         while (x--) {
   727             register int L;
   728 
   729             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   730             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   731                 + colortab[*cb + 2 * 256];
   732             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   733             cr += 4;
   734             cb += 4;
   735 
   736             L = *lum;
   737             lum += 2;
   738             row[0] = row[next_row] = (rgb_2_pix[L + cr_r] |
   739                                       rgb_2_pix[L + crb_g] |
   740                                       rgb_2_pix[L + cb_b]);
   741             row++;
   742 
   743             L = *lum;
   744             lum += 2;
   745             row[0] = row[next_row] = (rgb_2_pix[L + cr_r] |
   746                                       rgb_2_pix[L + crb_g] |
   747                                       rgb_2_pix[L + cb_b]);
   748             row++;
   749 
   750         }
   751         row += next_row;
   752     }
   753 }
   754 
   755 static void
   756 Color24DitherYUY2Mod2X(int *colortab, Uint32 * rgb_2_pix,
   757                        unsigned char *lum, unsigned char *cr,
   758                        unsigned char *cb, unsigned char *out,
   759                        int rows, int cols, int mod)
   760 {
   761     unsigned int value;
   762     unsigned char *row = out;
   763     const int next_row = (cols * 2 + mod) * 3;
   764     int x, y;
   765     int cr_r;
   766     int crb_g;
   767     int cb_b;
   768     int cols_2 = cols / 2;
   769     y = rows;
   770     while (y--) {
   771         x = cols_2;
   772         while (x--) {
   773             register int L;
   774 
   775             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   776             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   777                 + colortab[*cb + 2 * 256];
   778             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   779             cr += 4;
   780             cb += 4;
   781 
   782             L = *lum;
   783             lum += 2;
   784             value = (rgb_2_pix[L + cr_r] |
   785                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   786             row[0 + 0] = row[3 + 0] = row[next_row + 0] =
   787                 row[next_row + 3 + 0] = (value) & 0xFF;
   788             row[0 + 1] = row[3 + 1] = row[next_row + 1] =
   789                 row[next_row + 3 + 1] = (value >> 8) & 0xFF;
   790             row[0 + 2] = row[3 + 2] = row[next_row + 2] =
   791                 row[next_row + 3 + 2] = (value >> 16) & 0xFF;
   792             row += 2 * 3;
   793 
   794             L = *lum;
   795             lum += 2;
   796             value = (rgb_2_pix[L + cr_r] |
   797                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   798             row[0 + 0] = row[3 + 0] = row[next_row + 0] =
   799                 row[next_row + 3 + 0] = (value) & 0xFF;
   800             row[0 + 1] = row[3 + 1] = row[next_row + 1] =
   801                 row[next_row + 3 + 1] = (value >> 8) & 0xFF;
   802             row[0 + 2] = row[3 + 2] = row[next_row + 2] =
   803                 row[next_row + 3 + 2] = (value >> 16) & 0xFF;
   804             row += 2 * 3;
   805 
   806         }
   807         row += next_row;
   808     }
   809 }
   810 
   811 static void
   812 Color32DitherYUY2Mod2X(int *colortab, Uint32 * rgb_2_pix,
   813                        unsigned char *lum, unsigned char *cr,
   814                        unsigned char *cb, unsigned char *out,
   815                        int rows, int cols, int mod)
   816 {
   817     unsigned int *row = (unsigned int *) out;
   818     const int next_row = cols * 2 + mod;
   819     int x, y;
   820     int cr_r;
   821     int crb_g;
   822     int cb_b;
   823     int cols_2 = cols / 2;
   824     mod += mod;
   825     y = rows;
   826     while (y--) {
   827         x = cols_2;
   828         while (x--) {
   829             register int L;
   830 
   831             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   832             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   833                 + colortab[*cb + 2 * 256];
   834             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   835             cr += 4;
   836             cb += 4;
   837 
   838             L = *lum;
   839             lum += 2;
   840             row[0] = row[1] = row[next_row] = row[next_row + 1] =
   841                 (rgb_2_pix[L + cr_r] |
   842                  rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   843             row += 2;
   844 
   845             L = *lum;
   846             lum += 2;
   847             row[0] = row[1] = row[next_row] = row[next_row + 1] =
   848                 (rgb_2_pix[L + cr_r] |
   849                  rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   850             row += 2;
   851 
   852 
   853         }
   854 
   855         row += next_row;
   856     }
   857 }
   858 
   859 /*
   860  * How many 1 bits are there in the Uint32.
   861  * Low performance, do not call often.
   862  */
   863 static int
   864 number_of_bits_set(Uint32 a)
   865 {
   866     if (!a)
   867         return 0;
   868     if (a & 1)
   869         return 1 + number_of_bits_set(a >> 1);
   870     return (number_of_bits_set(a >> 1));
   871 }
   872 
   873 /*
   874  * How many 0 bits are there at least significant end of Uint32.
   875  * Low performance, do not call often.
   876  */
   877 static int
   878 free_bits_at_bottom(Uint32 a)
   879 {
   880     /* assume char is 8 bits */
   881     if (!a)
   882         return sizeof(Uint32) * 8;
   883     if (((Sint32) a) & 1l)
   884         return 0;
   885     return 1 + free_bits_at_bottom(a >> 1);
   886 }
   887 
   888 static int
   889 SDL_SW_SetupYUVDisplay(SDL_SW_YUVTexture * swdata, Uint32 target_format)
   890 {
   891     Uint32 *r_2_pix_alloc;
   892     Uint32 *g_2_pix_alloc;
   893     Uint32 *b_2_pix_alloc;
   894     int i;
   895     int bpp;
   896     Uint32 Rmask, Gmask, Bmask, Amask;
   897 
   898     if (!SDL_PixelFormatEnumToMasks
   899         (target_format, &bpp, &Rmask, &Gmask, &Bmask, &Amask) || bpp < 15) {
   900         return SDL_SetError("Unsupported YUV destination format");
   901     }
   902 
   903     swdata->target_format = target_format;
   904     r_2_pix_alloc = &swdata->rgb_2_pix[0 * 768];
   905     g_2_pix_alloc = &swdata->rgb_2_pix[1 * 768];
   906     b_2_pix_alloc = &swdata->rgb_2_pix[2 * 768];
   907 
   908     /*
   909      * Set up entries 0-255 in rgb-to-pixel value tables.
   910      */
   911     for (i = 0; i < 256; ++i) {
   912         r_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(Rmask));
   913         r_2_pix_alloc[i + 256] <<= free_bits_at_bottom(Rmask);
   914         r_2_pix_alloc[i + 256] |= Amask;
   915         g_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(Gmask));
   916         g_2_pix_alloc[i + 256] <<= free_bits_at_bottom(Gmask);
   917         g_2_pix_alloc[i + 256] |= Amask;
   918         b_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(Bmask));
   919         b_2_pix_alloc[i + 256] <<= free_bits_at_bottom(Bmask);
   920         b_2_pix_alloc[i + 256] |= Amask;
   921     }
   922 
   923     /*
   924      * If we have 16-bit output depth, then we double the value
   925      * in the top word. This means that we can write out both
   926      * pixels in the pixel doubling mode with one op. It is
   927      * harmless in the normal case as storing a 32-bit value
   928      * through a short pointer will lose the top bits anyway.
   929      */
   930     if (SDL_BYTESPERPIXEL(target_format) == 2) {
   931         for (i = 0; i < 256; ++i) {
   932             r_2_pix_alloc[i + 256] |= (r_2_pix_alloc[i + 256]) << 16;
   933             g_2_pix_alloc[i + 256] |= (g_2_pix_alloc[i + 256]) << 16;
   934             b_2_pix_alloc[i + 256] |= (b_2_pix_alloc[i + 256]) << 16;
   935         }
   936     }
   937 
   938     /*
   939      * Spread out the values we have to the rest of the array so that
   940      * we do not need to check for overflow.
   941      */
   942     for (i = 0; i < 256; ++i) {
   943         r_2_pix_alloc[i] = r_2_pix_alloc[256];
   944         r_2_pix_alloc[i + 512] = r_2_pix_alloc[511];
   945         g_2_pix_alloc[i] = g_2_pix_alloc[256];
   946         g_2_pix_alloc[i + 512] = g_2_pix_alloc[511];
   947         b_2_pix_alloc[i] = b_2_pix_alloc[256];
   948         b_2_pix_alloc[i + 512] = b_2_pix_alloc[511];
   949     }
   950 
   951     /* You have chosen wisely... */
   952     switch (swdata->format) {
   953     case SDL_PIXELFORMAT_YV12:
   954     case SDL_PIXELFORMAT_IYUV:
   955         if (SDL_BYTESPERPIXEL(target_format) == 2) {
   956 #if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES
   957             /* inline assembly functions */
   958             if (SDL_HasMMX() && (Rmask == 0xF800) &&
   959                 (Gmask == 0x07E0) && (Bmask == 0x001F)
   960                 && (swdata->w & 15) == 0) {
   961 /* printf("Using MMX 16-bit 565 dither\n"); */
   962                 swdata->Display1X = Color565DitherYV12MMX1X;
   963             } else {
   964 /* printf("Using C 16-bit dither\n"); */
   965                 swdata->Display1X = Color16DitherYV12Mod1X;
   966             }
   967 #else
   968             swdata->Display1X = Color16DitherYV12Mod1X;
   969 #endif
   970             swdata->Display2X = Color16DitherYV12Mod2X;
   971         }
   972         if (SDL_BYTESPERPIXEL(target_format) == 3) {
   973             swdata->Display1X = Color24DitherYV12Mod1X;
   974             swdata->Display2X = Color24DitherYV12Mod2X;
   975         }
   976         if (SDL_BYTESPERPIXEL(target_format) == 4) {
   977 #if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES
   978             /* inline assembly functions */
   979             if (SDL_HasMMX() && (Rmask == 0x00FF0000) &&
   980                 (Gmask == 0x0000FF00) &&
   981                 (Bmask == 0x000000FF) && (swdata->w & 15) == 0) {
   982 /* printf("Using MMX 32-bit dither\n"); */
   983                 swdata->Display1X = ColorRGBDitherYV12MMX1X;
   984             } else {
   985 /* printf("Using C 32-bit dither\n"); */
   986                 swdata->Display1X = Color32DitherYV12Mod1X;
   987             }
   988 #else
   989             swdata->Display1X = Color32DitherYV12Mod1X;
   990 #endif
   991             swdata->Display2X = Color32DitherYV12Mod2X;
   992         }
   993         break;
   994     case SDL_PIXELFORMAT_YUY2:
   995     case SDL_PIXELFORMAT_UYVY:
   996     case SDL_PIXELFORMAT_YVYU:
   997         if (SDL_BYTESPERPIXEL(target_format) == 2) {
   998             swdata->Display1X = Color16DitherYUY2Mod1X;
   999             swdata->Display2X = Color16DitherYUY2Mod2X;
  1000         }
  1001         if (SDL_BYTESPERPIXEL(target_format) == 3) {
  1002             swdata->Display1X = Color24DitherYUY2Mod1X;
  1003             swdata->Display2X = Color24DitherYUY2Mod2X;
  1004         }
  1005         if (SDL_BYTESPERPIXEL(target_format) == 4) {
  1006             swdata->Display1X = Color32DitherYUY2Mod1X;
  1007             swdata->Display2X = Color32DitherYUY2Mod2X;
  1008         }
  1009         break;
  1010     default:
  1011         /* We should never get here (caught above) */
  1012         break;
  1013     }
  1014 
  1015     if (swdata->display) {
  1016         SDL_FreeSurface(swdata->display);
  1017         swdata->display = NULL;
  1018     }
  1019     return 0;
  1020 }
  1021 
  1022 SDL_SW_YUVTexture *
  1023 SDL_SW_CreateYUVTexture(Uint32 format, int w, int h)
  1024 {
  1025     SDL_SW_YUVTexture *swdata;
  1026     int *Cr_r_tab;
  1027     int *Cr_g_tab;
  1028     int *Cb_g_tab;
  1029     int *Cb_b_tab;
  1030     int i;
  1031     int CR, CB;
  1032 
  1033     switch (format) {
  1034     case SDL_PIXELFORMAT_YV12:
  1035     case SDL_PIXELFORMAT_IYUV:
  1036     case SDL_PIXELFORMAT_YUY2:
  1037     case SDL_PIXELFORMAT_UYVY:
  1038     case SDL_PIXELFORMAT_YVYU:
  1039         break;
  1040     default:
  1041         SDL_SetError("Unsupported YUV format");
  1042         return NULL;
  1043     }
  1044 
  1045     swdata = (SDL_SW_YUVTexture *) SDL_calloc(1, sizeof(*swdata));
  1046     if (!swdata) {
  1047         SDL_OutOfMemory();
  1048         return NULL;
  1049     }
  1050 
  1051     swdata->format = format;
  1052     swdata->target_format = SDL_PIXELFORMAT_UNKNOWN;
  1053     swdata->w = w;
  1054     swdata->h = h;
  1055     swdata->pixels = (Uint8 *) SDL_malloc(w * h * 2);
  1056     swdata->colortab = (int *) SDL_malloc(4 * 256 * sizeof(int));
  1057     swdata->rgb_2_pix = (Uint32 *) SDL_malloc(3 * 768 * sizeof(Uint32));
  1058     if (!swdata->pixels || !swdata->colortab || !swdata->rgb_2_pix) {
  1059         SDL_SW_DestroyYUVTexture(swdata);
  1060         SDL_OutOfMemory();
  1061         return NULL;
  1062     }
  1063 
  1064     /* Generate the tables for the display surface */
  1065     Cr_r_tab = &swdata->colortab[0 * 256];
  1066     Cr_g_tab = &swdata->colortab[1 * 256];
  1067     Cb_g_tab = &swdata->colortab[2 * 256];
  1068     Cb_b_tab = &swdata->colortab[3 * 256];
  1069     for (i = 0; i < 256; i++) {
  1070         /* Gamma correction (luminescence table) and chroma correction
  1071            would be done here.  See the Berkeley mpeg_play sources.
  1072          */
  1073         CB = CR = (i - 128);
  1074         Cr_r_tab[i] = (int) ((0.419 / 0.299) * CR);
  1075         Cr_g_tab[i] = (int) (-(0.299 / 0.419) * CR);
  1076         Cb_g_tab[i] = (int) (-(0.114 / 0.331) * CB);
  1077         Cb_b_tab[i] = (int) ((0.587 / 0.331) * CB);
  1078     }
  1079 
  1080     /* Find the pitch and offset values for the overlay */
  1081     switch (format) {
  1082     case SDL_PIXELFORMAT_YV12:
  1083     case SDL_PIXELFORMAT_IYUV:
  1084         swdata->pitches[0] = w;
  1085         swdata->pitches[1] = swdata->pitches[0] / 2;
  1086         swdata->pitches[2] = swdata->pitches[0] / 2;
  1087         swdata->planes[0] = swdata->pixels;
  1088         swdata->planes[1] = swdata->planes[0] + swdata->pitches[0] * h;
  1089         swdata->planes[2] = swdata->planes[1] + swdata->pitches[1] * h / 2;
  1090         break;
  1091     case SDL_PIXELFORMAT_YUY2:
  1092     case SDL_PIXELFORMAT_UYVY:
  1093     case SDL_PIXELFORMAT_YVYU:
  1094         swdata->pitches[0] = w * 2;
  1095         swdata->planes[0] = swdata->pixels;
  1096         break;
  1097     default:
  1098         SDL_assert(0 && "We should never get here (caught above)");
  1099         break;
  1100     }
  1101 
  1102     /* We're all done.. */
  1103     return (swdata);
  1104 }
  1105 
  1106 int
  1107 SDL_SW_QueryYUVTexturePixels(SDL_SW_YUVTexture * swdata, void **pixels,
  1108                              int *pitch)
  1109 {
  1110     *pixels = swdata->planes[0];
  1111     *pitch = swdata->pitches[0];
  1112     return 0;
  1113 }
  1114 
  1115 int
  1116 SDL_SW_UpdateYUVTexture(SDL_SW_YUVTexture * swdata, const SDL_Rect * rect,
  1117                         const void *pixels, int pitch)
  1118 {
  1119     switch (swdata->format) {
  1120     case SDL_PIXELFORMAT_YV12:
  1121     case SDL_PIXELFORMAT_IYUV:
  1122         if (rect->x == 0 && rect->y == 0 &&
  1123             rect->w == swdata->w && rect->h == swdata->h) {
  1124                 SDL_memcpy(swdata->pixels, pixels,
  1125                            (swdata->h * swdata->w) + (swdata->h * swdata->w) / 2);
  1126         } else {
  1127             Uint8 *src, *dst;
  1128             int row;
  1129             size_t length;
  1130 
  1131             /* Copy the Y plane */
  1132             src = (Uint8 *) pixels;
  1133             dst = swdata->pixels + rect->y * swdata->w + rect->x;
  1134             length = rect->w;
  1135             for (row = 0; row < rect->h; ++row) {
  1136                 SDL_memcpy(dst, src, length);
  1137                 src += pitch;
  1138                 dst += swdata->w;
  1139             }
  1140 
  1141             /* Copy the next plane */
  1142             src = (Uint8 *) pixels + rect->h * pitch;
  1143             dst = swdata->pixels + swdata->h * swdata->w;
  1144             dst += rect->y/2 * swdata->w/2 + rect->x/2;
  1145             length = rect->w / 2;
  1146             for (row = 0; row < rect->h/2; ++row) {
  1147                 SDL_memcpy(dst, src, length);
  1148                 src += pitch/2;
  1149                 dst += swdata->w/2;
  1150             }
  1151 
  1152             /* Copy the next plane */
  1153             src = (Uint8 *) pixels + rect->h * pitch + (rect->h * pitch) / 4;
  1154             dst = swdata->pixels + swdata->h * swdata->w +
  1155                   (swdata->h * swdata->w) / 4;
  1156             dst += rect->y/2 * swdata->w/2 + rect->x/2;
  1157             length = rect->w / 2;
  1158             for (row = 0; row < rect->h/2; ++row) {
  1159                 SDL_memcpy(dst, src, length);
  1160                 src += pitch/2;
  1161                 dst += swdata->w/2;
  1162             }
  1163         }
  1164         break;
  1165     case SDL_PIXELFORMAT_YUY2:
  1166     case SDL_PIXELFORMAT_UYVY:
  1167     case SDL_PIXELFORMAT_YVYU:
  1168         {
  1169             Uint8 *src, *dst;
  1170             int row;
  1171             size_t length;
  1172 
  1173             src = (Uint8 *) pixels;
  1174             dst =
  1175                 swdata->planes[0] + rect->y * swdata->pitches[0] +
  1176                 rect->x * 2;
  1177             length = rect->w * 2;
  1178             for (row = 0; row < rect->h; ++row) {
  1179                 SDL_memcpy(dst, src, length);
  1180                 src += pitch;
  1181                 dst += swdata->pitches[0];
  1182             }
  1183         }
  1184         break;
  1185     }
  1186     return 0;
  1187 }
  1188 
  1189 int
  1190 SDL_SW_LockYUVTexture(SDL_SW_YUVTexture * swdata, const SDL_Rect * rect,
  1191                       void **pixels, int *pitch)
  1192 {
  1193     switch (swdata->format) {
  1194     case SDL_PIXELFORMAT_YV12:
  1195     case SDL_PIXELFORMAT_IYUV:
  1196         if (rect
  1197             && (rect->x != 0 || rect->y != 0 || rect->w != swdata->w
  1198                 || rect->h != swdata->h)) {
  1199             return SDL_SetError
  1200                 ("YV12 and IYUV textures only support full surface locks");
  1201         }
  1202         break;
  1203     }
  1204 
  1205     if (rect) {
  1206         *pixels = swdata->planes[0] + rect->y * swdata->pitches[0] + rect->x * 2;
  1207     } else {
  1208         *pixels = swdata->planes[0];
  1209     }
  1210     *pitch = swdata->pitches[0];
  1211     return 0;
  1212 }
  1213 
  1214 void
  1215 SDL_SW_UnlockYUVTexture(SDL_SW_YUVTexture * swdata)
  1216 {
  1217 }
  1218 
  1219 int
  1220 SDL_SW_CopyYUVToRGB(SDL_SW_YUVTexture * swdata, const SDL_Rect * srcrect,
  1221                     Uint32 target_format, int w, int h, void *pixels,
  1222                     int pitch)
  1223 {
  1224     int stretch;
  1225     int scale_2x;
  1226     Uint8 *lum, *Cr, *Cb;
  1227     int mod;
  1228 
  1229     /* Make sure we're set up to display in the desired format */
  1230     if (target_format != swdata->target_format) {
  1231         if (SDL_SW_SetupYUVDisplay(swdata, target_format) < 0) {
  1232             return -1;
  1233         }
  1234     }
  1235 
  1236     stretch = 0;
  1237     scale_2x = 0;
  1238     if (srcrect->x || srcrect->y || srcrect->w < swdata->w
  1239         || srcrect->h < swdata->h) {
  1240         /* The source rectangle has been clipped.
  1241            Using a scratch surface is easier than adding clipped
  1242            source support to all the blitters, plus that would
  1243            slow them down in the general unclipped case.
  1244          */
  1245         stretch = 1;
  1246     } else if ((srcrect->w != w) || (srcrect->h != h)) {
  1247         if ((w == 2 * srcrect->w) && (h == 2 * srcrect->h)) {
  1248             scale_2x = 1;
  1249         } else {
  1250             stretch = 1;
  1251         }
  1252     }
  1253     if (stretch) {
  1254         int bpp;
  1255         Uint32 Rmask, Gmask, Bmask, Amask;
  1256 
  1257         if (swdata->display) {
  1258             swdata->display->w = w;
  1259             swdata->display->h = h;
  1260             swdata->display->pixels = pixels;
  1261             swdata->display->pitch = pitch;
  1262         } else {
  1263             /* This must have succeeded in SDL_SW_SetupYUVDisplay() earlier */
  1264             SDL_PixelFormatEnumToMasks(target_format, &bpp, &Rmask, &Gmask,
  1265                                        &Bmask, &Amask);
  1266             swdata->display =
  1267                 SDL_CreateRGBSurfaceFrom(pixels, w, h, bpp, pitch, Rmask,
  1268                                          Gmask, Bmask, Amask);
  1269             if (!swdata->display) {
  1270                 return (-1);
  1271             }
  1272         }
  1273         if (!swdata->stretch) {
  1274             /* This must have succeeded in SDL_SW_SetupYUVDisplay() earlier */
  1275             SDL_PixelFormatEnumToMasks(target_format, &bpp, &Rmask, &Gmask,
  1276                                        &Bmask, &Amask);
  1277             swdata->stretch =
  1278                 SDL_CreateRGBSurface(0, swdata->w, swdata->h, bpp, Rmask,
  1279                                      Gmask, Bmask, Amask);
  1280             if (!swdata->stretch) {
  1281                 return (-1);
  1282             }
  1283         }
  1284         pixels = swdata->stretch->pixels;
  1285         pitch = swdata->stretch->pitch;
  1286     }
  1287     switch (swdata->format) {
  1288     case SDL_PIXELFORMAT_YV12:
  1289         lum = swdata->planes[0];
  1290         Cr = swdata->planes[1];
  1291         Cb = swdata->planes[2];
  1292         break;
  1293     case SDL_PIXELFORMAT_IYUV:
  1294         lum = swdata->planes[0];
  1295         Cr = swdata->planes[2];
  1296         Cb = swdata->planes[1];
  1297         break;
  1298     case SDL_PIXELFORMAT_YUY2:
  1299         lum = swdata->planes[0];
  1300         Cr = lum + 3;
  1301         Cb = lum + 1;
  1302         break;
  1303     case SDL_PIXELFORMAT_UYVY:
  1304         lum = swdata->planes[0] + 1;
  1305         Cr = lum + 1;
  1306         Cb = lum - 1;
  1307         break;
  1308     case SDL_PIXELFORMAT_YVYU:
  1309         lum = swdata->planes[0];
  1310         Cr = lum + 1;
  1311         Cb = lum + 3;
  1312         break;
  1313     default:
  1314         return SDL_SetError("Unsupported YUV format in copy");
  1315     }
  1316     mod = (pitch / SDL_BYTESPERPIXEL(target_format));
  1317 
  1318     if (scale_2x) {
  1319         mod -= (swdata->w * 2);
  1320         swdata->Display2X(swdata->colortab, swdata->rgb_2_pix,
  1321                           lum, Cr, Cb, pixels, swdata->h, swdata->w, mod);
  1322     } else {
  1323         mod -= swdata->w;
  1324         swdata->Display1X(swdata->colortab, swdata->rgb_2_pix,
  1325                           lum, Cr, Cb, pixels, swdata->h, swdata->w, mod);
  1326     }
  1327     if (stretch) {
  1328         SDL_Rect rect = *srcrect;
  1329         SDL_SoftStretch(swdata->stretch, &rect, swdata->display, NULL);
  1330     }
  1331     return 0;
  1332 }
  1333 
  1334 void
  1335 SDL_SW_DestroyYUVTexture(SDL_SW_YUVTexture * swdata)
  1336 {
  1337     if (swdata) {
  1338         SDL_free(swdata->pixels);
  1339         SDL_free(swdata->colortab);
  1340         SDL_free(swdata->rgb_2_pix);
  1341         if (swdata->stretch) {
  1342             SDL_FreeSurface(swdata->stretch);
  1343         }
  1344         if (swdata->display) {
  1345             SDL_FreeSurface(swdata->display);
  1346         }
  1347         SDL_free(swdata);
  1348     }
  1349 }
  1350 
  1351 /* vi: set ts=4 sw=4 expandtab: */