src/render/SDL_yuv_sw.c
author Sam Lantinga <slouken@libsdl.org>
Sun, 01 Jan 2017 18:33:28 -0800
changeset 10737 3406a0f8b041
parent 10650 b6ec7005ca15
child 11156 5ba02f3c5a8b
permissions -rw-r--r--
Updated copyright for 2017
     1 /*
     2   Simple DirectMedia Layer
     3   Copyright (C) 1997-2017 Sam Lantinga <slouken@libsdl.org>
     4 
     5   This software is provided 'as-is', without any express or implied
     6   warranty.  In no event will the authors be held liable for any damages
     7   arising from the use of this software.
     8 
     9   Permission is granted to anyone to use this software for any purpose,
    10   including commercial applications, and to alter it and redistribute it
    11   freely, subject to the following restrictions:
    12 
    13   1. The origin of this software must not be misrepresented; you must not
    14      claim that you wrote the original software. If you use this software
    15      in a product, an acknowledgment in the product documentation would be
    16      appreciated but is not required.
    17   2. Altered source versions must be plainly marked as such, and must not be
    18      misrepresented as being the original software.
    19   3. This notice may not be removed or altered from any source distribution.
    20 */
    21 #include "../SDL_internal.h"
    22 
    23 /* This is the software implementation of the YUV texture support */
    24 
    25 /* This code was derived from code carrying the following copyright notices:
    26 
    27  * Copyright (c) 1995 The Regents of the University of California.
    28  * All rights reserved.
    29  *
    30  * Permission to use, copy, modify, and distribute this software and its
    31  * documentation for any purpose, without fee, and without written agreement is
    32  * hereby granted, provided that the above copyright notice and the following
    33  * two paragraphs appear in all copies of this software.
    34  *
    35  * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR
    36  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
    37  * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF
    38  * CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    39  *
    40  * THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES,
    41  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
    42  * AND FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
    43  * ON AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATION TO
    44  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
    45 
    46  * Copyright (c) 1995 Erik Corry
    47  * All rights reserved.
    48  *
    49  * Permission to use, copy, modify, and distribute this software and its
    50  * documentation for any purpose, without fee, and without written agreement is
    51  * hereby granted, provided that the above copyright notice and the following
    52  * two paragraphs appear in all copies of this software.
    53  *
    54  * IN NO EVENT SHALL ERIK CORRY BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
    55  * SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OF
    56  * THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF ERIK CORRY HAS BEEN ADVISED
    57  * OF THE POSSIBILITY OF SUCH DAMAGE.
    58  *
    59  * ERIK CORRY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
    60  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
    61  * PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
    62  * BASIS, AND ERIK CORRY HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT,
    63  * UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
    64 
    65  * Portions of this software Copyright (c) 1995 Brown University.
    66  * All rights reserved.
    67  *
    68  * Permission to use, copy, modify, and distribute this software and its
    69  * documentation for any purpose, without fee, and without written agreement
    70  * is hereby granted, provided that the above copyright notice and the
    71  * following two paragraphs appear in all copies of this software.
    72  *
    73  * IN NO EVENT SHALL BROWN UNIVERSITY BE LIABLE TO ANY PARTY FOR
    74  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
    75  * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF BROWN
    76  * UNIVERSITY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    77  *
    78  * BROWN UNIVERSITY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
    79  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
    80  * PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
    81  * BASIS, AND BROWN UNIVERSITY HAS NO OBLIGATION TO PROVIDE MAINTENANCE,
    82  * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
    83  */
    84 
    85 #include "SDL_assert.h"
    86 #include "SDL_video.h"
    87 #include "SDL_cpuinfo.h"
    88 #include "SDL_yuv_sw_c.h"
    89 
    90 
    91 /* The colorspace conversion functions */
    92 
    93 #if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES
    94 extern void Color565DitherYV12MMX1X(int *colortab, Uint32 * rgb_2_pix,
    95                                     unsigned char *lum, unsigned char *cr,
    96                                     unsigned char *cb, unsigned char *out,
    97                                     int rows, int cols, int mod);
    98 extern void ColorRGBDitherYV12MMX1X(int *colortab, Uint32 * rgb_2_pix,
    99                                     unsigned char *lum, unsigned char *cr,
   100                                     unsigned char *cb, unsigned char *out,
   101                                     int rows, int cols, int mod);
   102 #endif
   103 
   104 static void
   105 Color16DitherYV12Mod1X(int *colortab, Uint32 * rgb_2_pix,
   106                        unsigned char *lum, unsigned char *cr,
   107                        unsigned char *cb, unsigned char *out,
   108                        int rows, int cols, int mod)
   109 {
   110     unsigned short *row1;
   111     unsigned short *row2;
   112     unsigned char *lum2;
   113     int x, y;
   114     int cr_r;
   115     int crb_g;
   116     int cb_b;
   117     int cols_2 = cols / 2;
   118 
   119     row1 = (unsigned short *) out;
   120     row2 = row1 + cols + mod;
   121     lum2 = lum + cols;
   122 
   123     mod += cols + mod;
   124 
   125     y = rows / 2;
   126     while (y--) {
   127         x = cols_2;
   128         while (x--) {
   129             register int L;
   130 
   131             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   132             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   133                 + colortab[*cb + 2 * 256];
   134             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   135             ++cr;
   136             ++cb;
   137 
   138             L = *lum++;
   139             *row1++ = (unsigned short) (rgb_2_pix[L + cr_r] |
   140                                         rgb_2_pix[L + crb_g] |
   141                                         rgb_2_pix[L + cb_b]);
   142 
   143             L = *lum++;
   144             *row1++ = (unsigned short) (rgb_2_pix[L + cr_r] |
   145                                         rgb_2_pix[L + crb_g] |
   146                                         rgb_2_pix[L + cb_b]);
   147 
   148 
   149             /* Now, do second row.  */
   150 
   151             L = *lum2++;
   152             *row2++ = (unsigned short) (rgb_2_pix[L + cr_r] |
   153                                         rgb_2_pix[L + crb_g] |
   154                                         rgb_2_pix[L + cb_b]);
   155 
   156             L = *lum2++;
   157             *row2++ = (unsigned short) (rgb_2_pix[L + cr_r] |
   158                                         rgb_2_pix[L + crb_g] |
   159                                         rgb_2_pix[L + cb_b]);
   160         }
   161 
   162         /*
   163          * These values are at the start of the next line, (due
   164          * to the ++'s above),but they need to be at the start
   165          * of the line after that.
   166          */
   167         lum += cols;
   168         lum2 += cols;
   169         row1 += mod;
   170         row2 += mod;
   171     }
   172 }
   173 
   174 static void
   175 Color24DitherYV12Mod1X(int *colortab, Uint32 * rgb_2_pix,
   176                        unsigned char *lum, unsigned char *cr,
   177                        unsigned char *cb, unsigned char *out,
   178                        int rows, int cols, int mod)
   179 {
   180     unsigned int value;
   181     unsigned char *row1;
   182     unsigned char *row2;
   183     unsigned char *lum2;
   184     int x, y;
   185     int cr_r;
   186     int crb_g;
   187     int cb_b;
   188     int cols_2 = cols / 2;
   189 
   190     row1 = out;
   191     row2 = row1 + cols * 3 + mod * 3;
   192     lum2 = lum + cols;
   193 
   194     mod += cols + mod;
   195     mod *= 3;
   196 
   197     y = rows / 2;
   198     while (y--) {
   199         x = cols_2;
   200         while (x--) {
   201             register int L;
   202 
   203             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   204             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   205                 + colortab[*cb + 2 * 256];
   206             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   207             ++cr;
   208             ++cb;
   209 
   210             L = *lum++;
   211             value = (rgb_2_pix[L + cr_r] |
   212                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   213             *row1++ = (value) & 0xFF;
   214             *row1++ = (value >> 8) & 0xFF;
   215             *row1++ = (value >> 16) & 0xFF;
   216 
   217             L = *lum++;
   218             value = (rgb_2_pix[L + cr_r] |
   219                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   220             *row1++ = (value) & 0xFF;
   221             *row1++ = (value >> 8) & 0xFF;
   222             *row1++ = (value >> 16) & 0xFF;
   223 
   224 
   225             /* Now, do second row.  */
   226 
   227             L = *lum2++;
   228             value = (rgb_2_pix[L + cr_r] |
   229                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   230             *row2++ = (value) & 0xFF;
   231             *row2++ = (value >> 8) & 0xFF;
   232             *row2++ = (value >> 16) & 0xFF;
   233 
   234             L = *lum2++;
   235             value = (rgb_2_pix[L + cr_r] |
   236                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   237             *row2++ = (value) & 0xFF;
   238             *row2++ = (value >> 8) & 0xFF;
   239             *row2++ = (value >> 16) & 0xFF;
   240         }
   241 
   242         /*
   243          * These values are at the start of the next line, (due
   244          * to the ++'s above),but they need to be at the start
   245          * of the line after that.
   246          */
   247         lum += cols;
   248         lum2 += cols;
   249         row1 += mod;
   250         row2 += mod;
   251     }
   252 }
   253 
   254 static void
   255 Color32DitherYV12Mod1X(int *colortab, Uint32 * rgb_2_pix,
   256                        unsigned char *lum, unsigned char *cr,
   257                        unsigned char *cb, unsigned char *out,
   258                        int rows, int cols, int mod)
   259 {
   260     unsigned int *row1;
   261     unsigned int *row2;
   262     unsigned char *lum2;
   263     int x, y;
   264     int cr_r;
   265     int crb_g;
   266     int cb_b;
   267     int cols_2 = cols / 2;
   268 
   269     row1 = (unsigned int *) out;
   270     row2 = row1 + cols + mod;
   271     lum2 = lum + cols;
   272 
   273     mod += cols + mod;
   274 
   275     y = rows / 2;
   276     while (y--) {
   277         x = cols_2;
   278         while (x--) {
   279             register int L;
   280 
   281             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   282             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   283                 + colortab[*cb + 2 * 256];
   284             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   285             ++cr;
   286             ++cb;
   287 
   288             L = *lum++;
   289             *row1++ = (rgb_2_pix[L + cr_r] |
   290                        rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   291 
   292             L = *lum++;
   293             *row1++ = (rgb_2_pix[L + cr_r] |
   294                        rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   295 
   296 
   297             /* Now, do second row.  */
   298 
   299             L = *lum2++;
   300             *row2++ = (rgb_2_pix[L + cr_r] |
   301                        rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   302 
   303             L = *lum2++;
   304             *row2++ = (rgb_2_pix[L + cr_r] |
   305                        rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   306         }
   307 
   308         /*
   309          * These values are at the start of the next line, (due
   310          * to the ++'s above),but they need to be at the start
   311          * of the line after that.
   312          */
   313         lum += cols;
   314         lum2 += cols;
   315         row1 += mod;
   316         row2 += mod;
   317     }
   318 }
   319 
   320 /*
   321  * In this function I make use of a nasty trick. The tables have the lower
   322  * 16 bits replicated in the upper 16. This means I can write ints and get
   323  * the horisontal doubling for free (almost).
   324  */
   325 static void
   326 Color16DitherYV12Mod2X(int *colortab, Uint32 * rgb_2_pix,
   327                        unsigned char *lum, unsigned char *cr,
   328                        unsigned char *cb, unsigned char *out,
   329                        int rows, int cols, int mod)
   330 {
   331     unsigned int *row1 = (unsigned int *) out;
   332     const int next_row = cols + (mod / 2);
   333     unsigned int *row2 = row1 + 2 * next_row;
   334     unsigned char *lum2;
   335     int x, y;
   336     int cr_r;
   337     int crb_g;
   338     int cb_b;
   339     int cols_2 = cols / 2;
   340 
   341     lum2 = lum + cols;
   342 
   343     mod = (next_row * 3) + (mod / 2);
   344 
   345     y = rows / 2;
   346     while (y--) {
   347         x = cols_2;
   348         while (x--) {
   349             register int L;
   350 
   351             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   352             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   353                 + colortab[*cb + 2 * 256];
   354             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   355             ++cr;
   356             ++cb;
   357 
   358             L = *lum++;
   359             row1[0] = row1[next_row] = (rgb_2_pix[L + cr_r] |
   360                                         rgb_2_pix[L + crb_g] |
   361                                         rgb_2_pix[L + cb_b]);
   362             row1++;
   363 
   364             L = *lum++;
   365             row1[0] = row1[next_row] = (rgb_2_pix[L + cr_r] |
   366                                         rgb_2_pix[L + crb_g] |
   367                                         rgb_2_pix[L + cb_b]);
   368             row1++;
   369 
   370 
   371             /* Now, do second row. */
   372 
   373             L = *lum2++;
   374             row2[0] = row2[next_row] = (rgb_2_pix[L + cr_r] |
   375                                         rgb_2_pix[L + crb_g] |
   376                                         rgb_2_pix[L + cb_b]);
   377             row2++;
   378 
   379             L = *lum2++;
   380             row2[0] = row2[next_row] = (rgb_2_pix[L + cr_r] |
   381                                         rgb_2_pix[L + crb_g] |
   382                                         rgb_2_pix[L + cb_b]);
   383             row2++;
   384         }
   385 
   386         /*
   387          * These values are at the start of the next line, (due
   388          * to the ++'s above),but they need to be at the start
   389          * of the line after that.
   390          */
   391         lum += cols;
   392         lum2 += cols;
   393         row1 += mod;
   394         row2 += mod;
   395     }
   396 }
   397 
   398 static void
   399 Color24DitherYV12Mod2X(int *colortab, Uint32 * rgb_2_pix,
   400                        unsigned char *lum, unsigned char *cr,
   401                        unsigned char *cb, unsigned char *out,
   402                        int rows, int cols, int mod)
   403 {
   404     unsigned int value;
   405     unsigned char *row1 = out;
   406     const int next_row = (cols * 2 + mod) * 3;
   407     unsigned char *row2 = row1 + 2 * next_row;
   408     unsigned char *lum2;
   409     int x, y;
   410     int cr_r;
   411     int crb_g;
   412     int cb_b;
   413     int cols_2 = cols / 2;
   414 
   415     lum2 = lum + cols;
   416 
   417     mod = next_row * 3 + mod * 3;
   418 
   419     y = rows / 2;
   420     while (y--) {
   421         x = cols_2;
   422         while (x--) {
   423             register int L;
   424 
   425             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   426             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   427                 + colortab[*cb + 2 * 256];
   428             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   429             ++cr;
   430             ++cb;
   431 
   432             L = *lum++;
   433             value = (rgb_2_pix[L + cr_r] |
   434                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   435             row1[0 + 0] = row1[3 + 0] = row1[next_row + 0] =
   436                 row1[next_row + 3 + 0] = (value) & 0xFF;
   437             row1[0 + 1] = row1[3 + 1] = row1[next_row + 1] =
   438                 row1[next_row + 3 + 1] = (value >> 8) & 0xFF;
   439             row1[0 + 2] = row1[3 + 2] = row1[next_row + 2] =
   440                 row1[next_row + 3 + 2] = (value >> 16) & 0xFF;
   441             row1 += 2 * 3;
   442 
   443             L = *lum++;
   444             value = (rgb_2_pix[L + cr_r] |
   445                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   446             row1[0 + 0] = row1[3 + 0] = row1[next_row + 0] =
   447                 row1[next_row + 3 + 0] = (value) & 0xFF;
   448             row1[0 + 1] = row1[3 + 1] = row1[next_row + 1] =
   449                 row1[next_row + 3 + 1] = (value >> 8) & 0xFF;
   450             row1[0 + 2] = row1[3 + 2] = row1[next_row + 2] =
   451                 row1[next_row + 3 + 2] = (value >> 16) & 0xFF;
   452             row1 += 2 * 3;
   453 
   454 
   455             /* Now, do second row. */
   456 
   457             L = *lum2++;
   458             value = (rgb_2_pix[L + cr_r] |
   459                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   460             row2[0 + 0] = row2[3 + 0] = row2[next_row + 0] =
   461                 row2[next_row + 3 + 0] = (value) & 0xFF;
   462             row2[0 + 1] = row2[3 + 1] = row2[next_row + 1] =
   463                 row2[next_row + 3 + 1] = (value >> 8) & 0xFF;
   464             row2[0 + 2] = row2[3 + 2] = row2[next_row + 2] =
   465                 row2[next_row + 3 + 2] = (value >> 16) & 0xFF;
   466             row2 += 2 * 3;
   467 
   468             L = *lum2++;
   469             value = (rgb_2_pix[L + cr_r] |
   470                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   471             row2[0 + 0] = row2[3 + 0] = row2[next_row + 0] =
   472                 row2[next_row + 3 + 0] = (value) & 0xFF;
   473             row2[0 + 1] = row2[3 + 1] = row2[next_row + 1] =
   474                 row2[next_row + 3 + 1] = (value >> 8) & 0xFF;
   475             row2[0 + 2] = row2[3 + 2] = row2[next_row + 2] =
   476                 row2[next_row + 3 + 2] = (value >> 16) & 0xFF;
   477             row2 += 2 * 3;
   478         }
   479 
   480         /*
   481          * These values are at the start of the next line, (due
   482          * to the ++'s above),but they need to be at the start
   483          * of the line after that.
   484          */
   485         lum += cols;
   486         lum2 += cols;
   487         row1 += mod;
   488         row2 += mod;
   489     }
   490 }
   491 
   492 static void
   493 Color32DitherYV12Mod2X(int *colortab, Uint32 * rgb_2_pix,
   494                        unsigned char *lum, unsigned char *cr,
   495                        unsigned char *cb, unsigned char *out,
   496                        int rows, int cols, int mod)
   497 {
   498     unsigned int *row1 = (unsigned int *) out;
   499     const int next_row = cols * 2 + mod;
   500     unsigned int *row2 = row1 + 2 * next_row;
   501     unsigned char *lum2;
   502     int x, y;
   503     int cr_r;
   504     int crb_g;
   505     int cb_b;
   506     int cols_2 = cols / 2;
   507 
   508     lum2 = lum + cols;
   509 
   510     mod = (next_row * 3) + mod;
   511 
   512     y = rows / 2;
   513     while (y--) {
   514         x = cols_2;
   515         while (x--) {
   516             register int L;
   517 
   518             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   519             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   520                 + colortab[*cb + 2 * 256];
   521             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   522             ++cr;
   523             ++cb;
   524 
   525             L = *lum++;
   526             row1[0] = row1[1] = row1[next_row] = row1[next_row + 1] =
   527                 (rgb_2_pix[L + cr_r] |
   528                  rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   529             row1 += 2;
   530 
   531             L = *lum++;
   532             row1[0] = row1[1] = row1[next_row] = row1[next_row + 1] =
   533                 (rgb_2_pix[L + cr_r] |
   534                  rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   535             row1 += 2;
   536 
   537 
   538             /* Now, do second row. */
   539 
   540             L = *lum2++;
   541             row2[0] = row2[1] = row2[next_row] = row2[next_row + 1] =
   542                 (rgb_2_pix[L + cr_r] |
   543                  rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   544             row2 += 2;
   545 
   546             L = *lum2++;
   547             row2[0] = row2[1] = row2[next_row] = row2[next_row + 1] =
   548                 (rgb_2_pix[L + cr_r] |
   549                  rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   550             row2 += 2;
   551         }
   552 
   553         /*
   554          * These values are at the start of the next line, (due
   555          * to the ++'s above),but they need to be at the start
   556          * of the line after that.
   557          */
   558         lum += cols;
   559         lum2 += cols;
   560         row1 += mod;
   561         row2 += mod;
   562     }
   563 }
   564 
   565 static void
   566 Color16DitherYUY2Mod1X(int *colortab, Uint32 * rgb_2_pix,
   567                        unsigned char *lum, unsigned char *cr,
   568                        unsigned char *cb, unsigned char *out,
   569                        int rows, int cols, int mod)
   570 {
   571     unsigned short *row;
   572     int x, y;
   573     int cr_r;
   574     int crb_g;
   575     int cb_b;
   576     int cols_2 = cols / 2;
   577 
   578     row = (unsigned short *) out;
   579 
   580     y = rows;
   581     while (y--) {
   582         x = cols_2;
   583         while (x--) {
   584             register int L;
   585 
   586             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   587             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   588                 + colortab[*cb + 2 * 256];
   589             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   590             cr += 4;
   591             cb += 4;
   592 
   593             L = *lum;
   594             lum += 2;
   595             *row++ = (unsigned short) (rgb_2_pix[L + cr_r] |
   596                                        rgb_2_pix[L + crb_g] |
   597                                        rgb_2_pix[L + cb_b]);
   598 
   599             L = *lum;
   600             lum += 2;
   601             *row++ = (unsigned short) (rgb_2_pix[L + cr_r] |
   602                                        rgb_2_pix[L + crb_g] |
   603                                        rgb_2_pix[L + cb_b]);
   604 
   605         }
   606 
   607         row += mod;
   608     }
   609 }
   610 
   611 static void
   612 Color24DitherYUY2Mod1X(int *colortab, Uint32 * rgb_2_pix,
   613                        unsigned char *lum, unsigned char *cr,
   614                        unsigned char *cb, unsigned char *out,
   615                        int rows, int cols, int mod)
   616 {
   617     unsigned int value;
   618     unsigned char *row;
   619     int x, y;
   620     int cr_r;
   621     int crb_g;
   622     int cb_b;
   623     int cols_2 = cols / 2;
   624 
   625     row = (unsigned char *) out;
   626     mod *= 3;
   627     y = rows;
   628     while (y--) {
   629         x = cols_2;
   630         while (x--) {
   631             register int L;
   632 
   633             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   634             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   635                 + colortab[*cb + 2 * 256];
   636             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   637             cr += 4;
   638             cb += 4;
   639 
   640             L = *lum;
   641             lum += 2;
   642             value = (rgb_2_pix[L + cr_r] |
   643                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   644             *row++ = (value) & 0xFF;
   645             *row++ = (value >> 8) & 0xFF;
   646             *row++ = (value >> 16) & 0xFF;
   647 
   648             L = *lum;
   649             lum += 2;
   650             value = (rgb_2_pix[L + cr_r] |
   651                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   652             *row++ = (value) & 0xFF;
   653             *row++ = (value >> 8) & 0xFF;
   654             *row++ = (value >> 16) & 0xFF;
   655 
   656         }
   657         row += mod;
   658     }
   659 }
   660 
   661 static void
   662 Color32DitherYUY2Mod1X(int *colortab, Uint32 * rgb_2_pix,
   663                        unsigned char *lum, unsigned char *cr,
   664                        unsigned char *cb, unsigned char *out,
   665                        int rows, int cols, int mod)
   666 {
   667     unsigned int *row;
   668     int x, y;
   669     int cr_r;
   670     int crb_g;
   671     int cb_b;
   672     int cols_2 = cols / 2;
   673 
   674     row = (unsigned int *) out;
   675     y = rows;
   676     while (y--) {
   677         x = cols_2;
   678         while (x--) {
   679             register int L;
   680 
   681             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   682             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   683                 + colortab[*cb + 2 * 256];
   684             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   685             cr += 4;
   686             cb += 4;
   687 
   688             L = *lum;
   689             lum += 2;
   690             *row++ = (rgb_2_pix[L + cr_r] |
   691                       rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   692 
   693             L = *lum;
   694             lum += 2;
   695             *row++ = (rgb_2_pix[L + cr_r] |
   696                       rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   697 
   698 
   699         }
   700         row += mod;
   701     }
   702 }
   703 
   704 /*
   705  * In this function I make use of a nasty trick. The tables have the lower
   706  * 16 bits replicated in the upper 16. This means I can write ints and get
   707  * the horisontal doubling for free (almost).
   708  */
   709 static void
   710 Color16DitherYUY2Mod2X(int *colortab, Uint32 * rgb_2_pix,
   711                        unsigned char *lum, unsigned char *cr,
   712                        unsigned char *cb, unsigned char *out,
   713                        int rows, int cols, int mod)
   714 {
   715     unsigned int *row = (unsigned int *) out;
   716     const int next_row = cols + (mod / 2);
   717     int x, y;
   718     int cr_r;
   719     int crb_g;
   720     int cb_b;
   721     int cols_2 = cols / 2;
   722 
   723     y = rows;
   724     while (y--) {
   725         x = cols_2;
   726         while (x--) {
   727             register int L;
   728 
   729             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   730             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   731                 + colortab[*cb + 2 * 256];
   732             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   733             cr += 4;
   734             cb += 4;
   735 
   736             L = *lum;
   737             lum += 2;
   738             row[0] = row[next_row] = (rgb_2_pix[L + cr_r] |
   739                                       rgb_2_pix[L + crb_g] |
   740                                       rgb_2_pix[L + cb_b]);
   741             row++;
   742 
   743             L = *lum;
   744             lum += 2;
   745             row[0] = row[next_row] = (rgb_2_pix[L + cr_r] |
   746                                       rgb_2_pix[L + crb_g] |
   747                                       rgb_2_pix[L + cb_b]);
   748             row++;
   749 
   750         }
   751         row += next_row;
   752     }
   753 }
   754 
   755 static void
   756 Color24DitherYUY2Mod2X(int *colortab, Uint32 * rgb_2_pix,
   757                        unsigned char *lum, unsigned char *cr,
   758                        unsigned char *cb, unsigned char *out,
   759                        int rows, int cols, int mod)
   760 {
   761     unsigned int value;
   762     unsigned char *row = out;
   763     const int next_row = (cols * 2 + mod) * 3;
   764     int x, y;
   765     int cr_r;
   766     int crb_g;
   767     int cb_b;
   768     int cols_2 = cols / 2;
   769     y = rows;
   770     while (y--) {
   771         x = cols_2;
   772         while (x--) {
   773             register int L;
   774 
   775             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   776             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   777                 + colortab[*cb + 2 * 256];
   778             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   779             cr += 4;
   780             cb += 4;
   781 
   782             L = *lum;
   783             lum += 2;
   784             value = (rgb_2_pix[L + cr_r] |
   785                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   786             row[0 + 0] = row[3 + 0] = row[next_row + 0] =
   787                 row[next_row + 3 + 0] = (value) & 0xFF;
   788             row[0 + 1] = row[3 + 1] = row[next_row + 1] =
   789                 row[next_row + 3 + 1] = (value >> 8) & 0xFF;
   790             row[0 + 2] = row[3 + 2] = row[next_row + 2] =
   791                 row[next_row + 3 + 2] = (value >> 16) & 0xFF;
   792             row += 2 * 3;
   793 
   794             L = *lum;
   795             lum += 2;
   796             value = (rgb_2_pix[L + cr_r] |
   797                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   798             row[0 + 0] = row[3 + 0] = row[next_row + 0] =
   799                 row[next_row + 3 + 0] = (value) & 0xFF;
   800             row[0 + 1] = row[3 + 1] = row[next_row + 1] =
   801                 row[next_row + 3 + 1] = (value >> 8) & 0xFF;
   802             row[0 + 2] = row[3 + 2] = row[next_row + 2] =
   803                 row[next_row + 3 + 2] = (value >> 16) & 0xFF;
   804             row += 2 * 3;
   805 
   806         }
   807         row += next_row;
   808     }
   809 }
   810 
   811 static void
   812 Color32DitherYUY2Mod2X(int *colortab, Uint32 * rgb_2_pix,
   813                        unsigned char *lum, unsigned char *cr,
   814                        unsigned char *cb, unsigned char *out,
   815                        int rows, int cols, int mod)
   816 {
   817     unsigned int *row = (unsigned int *) out;
   818     const int next_row = cols * 2 + mod;
   819     int x, y;
   820     int cr_r;
   821     int crb_g;
   822     int cb_b;
   823     int cols_2 = cols / 2;
   824     mod += mod;
   825     y = rows;
   826     while (y--) {
   827         x = cols_2;
   828         while (x--) {
   829             register int L;
   830 
   831             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   832             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   833                 + colortab[*cb + 2 * 256];
   834             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   835             cr += 4;
   836             cb += 4;
   837 
   838             L = *lum;
   839             lum += 2;
   840             row[0] = row[1] = row[next_row] = row[next_row + 1] =
   841                 (rgb_2_pix[L + cr_r] |
   842                  rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   843             row += 2;
   844 
   845             L = *lum;
   846             lum += 2;
   847             row[0] = row[1] = row[next_row] = row[next_row + 1] =
   848                 (rgb_2_pix[L + cr_r] |
   849                  rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   850             row += 2;
   851 
   852 
   853         }
   854 
   855         row += next_row;
   856     }
   857 }
   858 
   859 /*
   860  * How many 1 bits are there in the Uint32.
   861  * Low performance, do not call often.
   862  */
   863 static int
   864 number_of_bits_set(Uint32 a)
   865 {
   866     if (!a)
   867         return 0;
   868     if (a & 1)
   869         return 1 + number_of_bits_set(a >> 1);
   870     return (number_of_bits_set(a >> 1));
   871 }
   872 
   873 /*
   874  * How many 0 bits are there at least significant end of Uint32.
   875  * Low performance, do not call often.
   876  */
   877 static int
   878 free_bits_at_bottom_nonzero(Uint32 a)
   879 {
   880     SDL_assert(a != 0);
   881     return (((Sint32) a) & 1l) ? 0 : 1 + free_bits_at_bottom_nonzero(a >> 1);
   882 }
   883 
   884 static SDL_INLINE int
   885 free_bits_at_bottom(Uint32 a)
   886 {
   887     return a ? free_bits_at_bottom_nonzero(a) : 32;
   888 }
   889 
   890 static int
   891 SDL_SW_SetupYUVDisplay(SDL_SW_YUVTexture * swdata, Uint32 target_format)
   892 {
   893     Uint32 *r_2_pix_alloc;
   894     Uint32 *g_2_pix_alloc;
   895     Uint32 *b_2_pix_alloc;
   896     int i;
   897     int bpp;
   898     Uint32 Rmask, Gmask, Bmask, Amask;
   899     int freebits;
   900 
   901     if (!SDL_PixelFormatEnumToMasks
   902         (target_format, &bpp, &Rmask, &Gmask, &Bmask, &Amask) || bpp < 15) {
   903         return SDL_SetError("Unsupported YUV destination format");
   904     }
   905 
   906     swdata->target_format = target_format;
   907     r_2_pix_alloc = &swdata->rgb_2_pix[0 * 768];
   908     g_2_pix_alloc = &swdata->rgb_2_pix[1 * 768];
   909     b_2_pix_alloc = &swdata->rgb_2_pix[2 * 768];
   910 
   911     /*
   912      * Set up entries 0-255 in rgb-to-pixel value tables.
   913      */
   914     for (i = 0; i < 256; ++i) {
   915         r_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(Rmask));
   916         freebits = free_bits_at_bottom(Rmask);
   917         if (freebits < 32) {
   918             r_2_pix_alloc[i + 256] <<= freebits;
   919         }
   920         r_2_pix_alloc[i + 256] |= Amask;
   921 
   922         g_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(Gmask));
   923         freebits = free_bits_at_bottom(Gmask);
   924         if (freebits < 32) {
   925             g_2_pix_alloc[i + 256] <<= freebits;
   926         }
   927         g_2_pix_alloc[i + 256] |= Amask;
   928 
   929         b_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(Bmask));
   930         freebits = free_bits_at_bottom(Bmask);
   931         if (freebits < 32) {
   932             b_2_pix_alloc[i + 256] <<= freebits;
   933         }
   934         b_2_pix_alloc[i + 256] |= Amask;
   935     }
   936 
   937     /*
   938      * If we have 16-bit output depth, then we double the value
   939      * in the top word. This means that we can write out both
   940      * pixels in the pixel doubling mode with one op. It is
   941      * harmless in the normal case as storing a 32-bit value
   942      * through a short pointer will lose the top bits anyway.
   943      */
   944     if (SDL_BYTESPERPIXEL(target_format) == 2) {
   945         for (i = 0; i < 256; ++i) {
   946             r_2_pix_alloc[i + 256] |= (r_2_pix_alloc[i + 256]) << 16;
   947             g_2_pix_alloc[i + 256] |= (g_2_pix_alloc[i + 256]) << 16;
   948             b_2_pix_alloc[i + 256] |= (b_2_pix_alloc[i + 256]) << 16;
   949         }
   950     }
   951 
   952     /*
   953      * Spread out the values we have to the rest of the array so that
   954      * we do not need to check for overflow.
   955      */
   956     for (i = 0; i < 256; ++i) {
   957         r_2_pix_alloc[i] = r_2_pix_alloc[256];
   958         r_2_pix_alloc[i + 512] = r_2_pix_alloc[511];
   959         g_2_pix_alloc[i] = g_2_pix_alloc[256];
   960         g_2_pix_alloc[i + 512] = g_2_pix_alloc[511];
   961         b_2_pix_alloc[i] = b_2_pix_alloc[256];
   962         b_2_pix_alloc[i + 512] = b_2_pix_alloc[511];
   963     }
   964 
   965     /* You have chosen wisely... */
   966     switch (swdata->format) {
   967     case SDL_PIXELFORMAT_YV12:
   968     case SDL_PIXELFORMAT_IYUV:
   969         if (SDL_BYTESPERPIXEL(target_format) == 2) {
   970 #if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES
   971             /* inline assembly functions */
   972             if (SDL_HasMMX() && (Rmask == 0xF800) &&
   973                 (Gmask == 0x07E0) && (Bmask == 0x001F)
   974                 && (swdata->w & 15) == 0) {
   975 /* printf("Using MMX 16-bit 565 dither\n"); */
   976                 swdata->Display1X = Color565DitherYV12MMX1X;
   977             } else {
   978 /* printf("Using C 16-bit dither\n"); */
   979                 swdata->Display1X = Color16DitherYV12Mod1X;
   980             }
   981 #else
   982             swdata->Display1X = Color16DitherYV12Mod1X;
   983 #endif
   984             swdata->Display2X = Color16DitherYV12Mod2X;
   985         }
   986         if (SDL_BYTESPERPIXEL(target_format) == 3) {
   987             swdata->Display1X = Color24DitherYV12Mod1X;
   988             swdata->Display2X = Color24DitherYV12Mod2X;
   989         }
   990         if (SDL_BYTESPERPIXEL(target_format) == 4) {
   991 #if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES
   992             /* inline assembly functions */
   993             if (SDL_HasMMX() && (Rmask == 0x00FF0000) &&
   994                 (Gmask == 0x0000FF00) &&
   995                 (Bmask == 0x000000FF) && (swdata->w & 15) == 0) {
   996 /* printf("Using MMX 32-bit dither\n"); */
   997                 swdata->Display1X = ColorRGBDitherYV12MMX1X;
   998             } else {
   999 /* printf("Using C 32-bit dither\n"); */
  1000                 swdata->Display1X = Color32DitherYV12Mod1X;
  1001             }
  1002 #else
  1003             swdata->Display1X = Color32DitherYV12Mod1X;
  1004 #endif
  1005             swdata->Display2X = Color32DitherYV12Mod2X;
  1006         }
  1007         break;
  1008     case SDL_PIXELFORMAT_YUY2:
  1009     case SDL_PIXELFORMAT_UYVY:
  1010     case SDL_PIXELFORMAT_YVYU:
  1011         if (SDL_BYTESPERPIXEL(target_format) == 2) {
  1012             swdata->Display1X = Color16DitherYUY2Mod1X;
  1013             swdata->Display2X = Color16DitherYUY2Mod2X;
  1014         }
  1015         if (SDL_BYTESPERPIXEL(target_format) == 3) {
  1016             swdata->Display1X = Color24DitherYUY2Mod1X;
  1017             swdata->Display2X = Color24DitherYUY2Mod2X;
  1018         }
  1019         if (SDL_BYTESPERPIXEL(target_format) == 4) {
  1020             swdata->Display1X = Color32DitherYUY2Mod1X;
  1021             swdata->Display2X = Color32DitherYUY2Mod2X;
  1022         }
  1023         break;
  1024     default:
  1025         /* We should never get here (caught above) */
  1026         break;
  1027     }
  1028 
  1029     SDL_FreeSurface(swdata->display);
  1030     swdata->display = NULL;
  1031     return 0;
  1032 }
  1033 
  1034 SDL_SW_YUVTexture *
  1035 SDL_SW_CreateYUVTexture(Uint32 format, int w, int h)
  1036 {
  1037     SDL_SW_YUVTexture *swdata;
  1038     int *Cr_r_tab;
  1039     int *Cr_g_tab;
  1040     int *Cb_g_tab;
  1041     int *Cb_b_tab;
  1042     int i;
  1043     int CR, CB;
  1044 
  1045     switch (format) {
  1046     case SDL_PIXELFORMAT_YV12:
  1047     case SDL_PIXELFORMAT_IYUV:
  1048     case SDL_PIXELFORMAT_YUY2:
  1049     case SDL_PIXELFORMAT_UYVY:
  1050     case SDL_PIXELFORMAT_YVYU:
  1051         break;
  1052     default:
  1053         SDL_SetError("Unsupported YUV format");
  1054         return NULL;
  1055     }
  1056 
  1057     swdata = (SDL_SW_YUVTexture *) SDL_calloc(1, sizeof(*swdata));
  1058     if (!swdata) {
  1059         SDL_OutOfMemory();
  1060         return NULL;
  1061     }
  1062 
  1063     swdata->format = format;
  1064     swdata->target_format = SDL_PIXELFORMAT_UNKNOWN;
  1065     swdata->w = w;
  1066     swdata->h = h;
  1067     swdata->pixels = (Uint8 *) SDL_malloc(w * h * 2);
  1068     swdata->colortab = (int *) SDL_malloc(4 * 256 * sizeof(int));
  1069     swdata->rgb_2_pix = (Uint32 *) SDL_malloc(3 * 768 * sizeof(Uint32));
  1070     if (!swdata->pixels || !swdata->colortab || !swdata->rgb_2_pix) {
  1071         SDL_SW_DestroyYUVTexture(swdata);
  1072         SDL_OutOfMemory();
  1073         return NULL;
  1074     }
  1075 
  1076     /* Generate the tables for the display surface */
  1077     Cr_r_tab = &swdata->colortab[0 * 256];
  1078     Cr_g_tab = &swdata->colortab[1 * 256];
  1079     Cb_g_tab = &swdata->colortab[2 * 256];
  1080     Cb_b_tab = &swdata->colortab[3 * 256];
  1081     for (i = 0; i < 256; i++) {
  1082         /* Gamma correction (luminescence table) and chroma correction
  1083            would be done here.  See the Berkeley mpeg_play sources.
  1084          */
  1085         CB = CR = (i - 128);
  1086         Cr_r_tab[i] = (int) ((0.419 / 0.299) * CR);
  1087         Cr_g_tab[i] = (int) (-(0.299 / 0.419) * CR);
  1088         Cb_g_tab[i] = (int) (-(0.114 / 0.331) * CB);
  1089         Cb_b_tab[i] = (int) ((0.587 / 0.331) * CB);
  1090     }
  1091 
  1092     /* Find the pitch and offset values for the overlay */
  1093     switch (format) {
  1094     case SDL_PIXELFORMAT_YV12:
  1095     case SDL_PIXELFORMAT_IYUV:
  1096         swdata->pitches[0] = w;
  1097         swdata->pitches[1] = swdata->pitches[0] / 2;
  1098         swdata->pitches[2] = swdata->pitches[0] / 2;
  1099         swdata->planes[0] = swdata->pixels;
  1100         swdata->planes[1] = swdata->planes[0] + swdata->pitches[0] * h;
  1101         swdata->planes[2] = swdata->planes[1] + swdata->pitches[1] * h / 2;
  1102         break;
  1103     case SDL_PIXELFORMAT_YUY2:
  1104     case SDL_PIXELFORMAT_UYVY:
  1105     case SDL_PIXELFORMAT_YVYU:
  1106         swdata->pitches[0] = w * 2;
  1107         swdata->planes[0] = swdata->pixels;
  1108         break;
  1109     default:
  1110         SDL_assert(0 && "We should never get here (caught above)");
  1111         break;
  1112     }
  1113 
  1114     /* We're all done.. */
  1115     return (swdata);
  1116 }
  1117 
  1118 int
  1119 SDL_SW_QueryYUVTexturePixels(SDL_SW_YUVTexture * swdata, void **pixels,
  1120                              int *pitch)
  1121 {
  1122     *pixels = swdata->planes[0];
  1123     *pitch = swdata->pitches[0];
  1124     return 0;
  1125 }
  1126 
  1127 int
  1128 SDL_SW_UpdateYUVTexture(SDL_SW_YUVTexture * swdata, const SDL_Rect * rect,
  1129                         const void *pixels, int pitch)
  1130 {
  1131     switch (swdata->format) {
  1132     case SDL_PIXELFORMAT_YV12:
  1133     case SDL_PIXELFORMAT_IYUV:
  1134         if (rect->x == 0 && rect->y == 0 &&
  1135             rect->w == swdata->w && rect->h == swdata->h) {
  1136                 SDL_memcpy(swdata->pixels, pixels,
  1137                            (swdata->h * swdata->w) + (swdata->h * swdata->w) / 2);
  1138         } else {
  1139             Uint8 *src, *dst;
  1140             int row;
  1141             size_t length;
  1142 
  1143             /* Copy the Y plane */
  1144             src = (Uint8 *) pixels;
  1145             dst = swdata->pixels + rect->y * swdata->w + rect->x;
  1146             length = rect->w;
  1147             for (row = 0; row < rect->h; ++row) {
  1148                 SDL_memcpy(dst, src, length);
  1149                 src += pitch;
  1150                 dst += swdata->w;
  1151             }
  1152 
  1153             /* Copy the next plane */
  1154             src = (Uint8 *) pixels + rect->h * pitch;
  1155             dst = swdata->pixels + swdata->h * swdata->w;
  1156             dst += rect->y/2 * swdata->w/2 + rect->x/2;
  1157             length = rect->w / 2;
  1158             for (row = 0; row < rect->h/2; ++row) {
  1159                 SDL_memcpy(dst, src, length);
  1160                 src += pitch/2;
  1161                 dst += swdata->w/2;
  1162             }
  1163 
  1164             /* Copy the next plane */
  1165             src = (Uint8 *) pixels + rect->h * pitch + (rect->h * pitch) / 4;
  1166             dst = swdata->pixels + swdata->h * swdata->w +
  1167                   (swdata->h * swdata->w) / 4;
  1168             dst += rect->y/2 * swdata->w/2 + rect->x/2;
  1169             length = rect->w / 2;
  1170             for (row = 0; row < rect->h/2; ++row) {
  1171                 SDL_memcpy(dst, src, length);
  1172                 src += pitch/2;
  1173                 dst += swdata->w/2;
  1174             }
  1175         }
  1176         break;
  1177     case SDL_PIXELFORMAT_YUY2:
  1178     case SDL_PIXELFORMAT_UYVY:
  1179     case SDL_PIXELFORMAT_YVYU:
  1180         {
  1181             Uint8 *src, *dst;
  1182             int row;
  1183             size_t length;
  1184 
  1185             src = (Uint8 *) pixels;
  1186             dst =
  1187                 swdata->planes[0] + rect->y * swdata->pitches[0] +
  1188                 rect->x * 2;
  1189             length = rect->w * 2;
  1190             for (row = 0; row < rect->h; ++row) {
  1191                 SDL_memcpy(dst, src, length);
  1192                 src += pitch;
  1193                 dst += swdata->pitches[0];
  1194             }
  1195         }
  1196         break;
  1197     }
  1198     return 0;
  1199 }
  1200 
  1201 int
  1202 SDL_SW_UpdateYUVTexturePlanar(SDL_SW_YUVTexture * swdata, const SDL_Rect * rect,
  1203                               const Uint8 *Yplane, int Ypitch,
  1204                               const Uint8 *Uplane, int Upitch,
  1205                               const Uint8 *Vplane, int Vpitch)
  1206 {
  1207     const Uint8 *src;
  1208     Uint8 *dst;
  1209     int row;
  1210     size_t length;
  1211 
  1212     /* Copy the Y plane */
  1213     src = Yplane;
  1214     dst = swdata->pixels + rect->y * swdata->w + rect->x;
  1215     length = rect->w;
  1216     for (row = 0; row < rect->h; ++row) {
  1217         SDL_memcpy(dst, src, length);
  1218         src += Ypitch;
  1219         dst += swdata->w;
  1220     }
  1221 
  1222     /* Copy the U plane */
  1223     src = Uplane;
  1224     if (swdata->format == SDL_PIXELFORMAT_IYUV) {
  1225         dst = swdata->pixels + swdata->h * swdata->w;
  1226     } else {
  1227         dst = swdata->pixels + swdata->h * swdata->w +
  1228               (swdata->h * swdata->w) / 4;
  1229     }
  1230     dst += rect->y/2 * swdata->w/2 + rect->x/2;
  1231     length = rect->w / 2;
  1232     for (row = 0; row < rect->h/2; ++row) {
  1233         SDL_memcpy(dst, src, length);
  1234         src += Upitch;
  1235         dst += swdata->w/2;
  1236     }
  1237 
  1238     /* Copy the V plane */
  1239     src = Vplane;
  1240     if (swdata->format == SDL_PIXELFORMAT_YV12) {
  1241         dst = swdata->pixels + swdata->h * swdata->w;
  1242     } else {
  1243         dst = swdata->pixels + swdata->h * swdata->w +
  1244               (swdata->h * swdata->w) / 4;
  1245     }
  1246     dst += rect->y/2 * swdata->w/2 + rect->x/2;
  1247     length = rect->w / 2;
  1248     for (row = 0; row < rect->h/2; ++row) {
  1249         SDL_memcpy(dst, src, length);
  1250         src += Vpitch;
  1251         dst += swdata->w/2;
  1252     }
  1253     return 0;
  1254 }
  1255 
  1256 int
  1257 SDL_SW_LockYUVTexture(SDL_SW_YUVTexture * swdata, const SDL_Rect * rect,
  1258                       void **pixels, int *pitch)
  1259 {
  1260     switch (swdata->format) {
  1261     case SDL_PIXELFORMAT_YV12:
  1262     case SDL_PIXELFORMAT_IYUV:
  1263         if (rect
  1264             && (rect->x != 0 || rect->y != 0 || rect->w != swdata->w
  1265                 || rect->h != swdata->h)) {
  1266             return SDL_SetError
  1267                 ("YV12 and IYUV textures only support full surface locks");
  1268         }
  1269         break;
  1270     }
  1271 
  1272     if (rect) {
  1273         *pixels = swdata->planes[0] + rect->y * swdata->pitches[0] + rect->x * 2;
  1274     } else {
  1275         *pixels = swdata->planes[0];
  1276     }
  1277     *pitch = swdata->pitches[0];
  1278     return 0;
  1279 }
  1280 
  1281 void
  1282 SDL_SW_UnlockYUVTexture(SDL_SW_YUVTexture * swdata)
  1283 {
  1284 }
  1285 
  1286 int
  1287 SDL_SW_CopyYUVToRGB(SDL_SW_YUVTexture * swdata, const SDL_Rect * srcrect,
  1288                     Uint32 target_format, int w, int h, void *pixels,
  1289                     int pitch)
  1290 {
  1291     const int targetbpp = SDL_BYTESPERPIXEL(target_format);
  1292     int stretch;
  1293     int scale_2x;
  1294     Uint8 *lum, *Cr, *Cb;
  1295     int mod;
  1296 
  1297     if (targetbpp == 0) {
  1298         return SDL_SetError("Invalid target pixel format");
  1299     }
  1300 
  1301     /* Make sure we're set up to display in the desired format */
  1302     if (target_format != swdata->target_format) {
  1303         if (SDL_SW_SetupYUVDisplay(swdata, target_format) < 0) {
  1304             return -1;
  1305         }
  1306     }
  1307 
  1308     stretch = 0;
  1309     scale_2x = 0;
  1310     if (srcrect->x || srcrect->y || srcrect->w < swdata->w
  1311         || srcrect->h < swdata->h) {
  1312         /* The source rectangle has been clipped.
  1313            Using a scratch surface is easier than adding clipped
  1314            source support to all the blitters, plus that would
  1315            slow them down in the general unclipped case.
  1316          */
  1317         stretch = 1;
  1318     } else if ((srcrect->w != w) || (srcrect->h != h)) {
  1319         if ((w == 2 * srcrect->w) && (h == 2 * srcrect->h)) {
  1320             scale_2x = 1;
  1321         } else {
  1322             stretch = 1;
  1323         }
  1324     }
  1325     if (stretch) {
  1326         int bpp;
  1327         Uint32 Rmask, Gmask, Bmask, Amask;
  1328 
  1329         if (swdata->display) {
  1330             swdata->display->w = w;
  1331             swdata->display->h = h;
  1332             swdata->display->pixels = pixels;
  1333             swdata->display->pitch = pitch;
  1334         } else {
  1335             /* This must have succeeded in SDL_SW_SetupYUVDisplay() earlier */
  1336             SDL_PixelFormatEnumToMasks(target_format, &bpp, &Rmask, &Gmask,
  1337                                        &Bmask, &Amask);
  1338             swdata->display =
  1339                 SDL_CreateRGBSurfaceFrom(pixels, w, h, bpp, pitch, Rmask,
  1340                                          Gmask, Bmask, Amask);
  1341             if (!swdata->display) {
  1342                 return (-1);
  1343             }
  1344         }
  1345         if (!swdata->stretch) {
  1346             /* This must have succeeded in SDL_SW_SetupYUVDisplay() earlier */
  1347             SDL_PixelFormatEnumToMasks(target_format, &bpp, &Rmask, &Gmask,
  1348                                        &Bmask, &Amask);
  1349             swdata->stretch =
  1350                 SDL_CreateRGBSurface(0, swdata->w, swdata->h, bpp, Rmask,
  1351                                      Gmask, Bmask, Amask);
  1352             if (!swdata->stretch) {
  1353                 return (-1);
  1354             }
  1355         }
  1356         pixels = swdata->stretch->pixels;
  1357         pitch = swdata->stretch->pitch;
  1358     }
  1359     switch (swdata->format) {
  1360     case SDL_PIXELFORMAT_YV12:
  1361         lum = swdata->planes[0];
  1362         Cr = swdata->planes[1];
  1363         Cb = swdata->planes[2];
  1364         break;
  1365     case SDL_PIXELFORMAT_IYUV:
  1366         lum = swdata->planes[0];
  1367         Cr = swdata->planes[2];
  1368         Cb = swdata->planes[1];
  1369         break;
  1370     case SDL_PIXELFORMAT_YUY2:
  1371         lum = swdata->planes[0];
  1372         Cr = lum + 3;
  1373         Cb = lum + 1;
  1374         break;
  1375     case SDL_PIXELFORMAT_UYVY:
  1376         lum = swdata->planes[0] + 1;
  1377         Cr = lum + 1;
  1378         Cb = lum - 1;
  1379         break;
  1380     case SDL_PIXELFORMAT_YVYU:
  1381         lum = swdata->planes[0];
  1382         Cr = lum + 1;
  1383         Cb = lum + 3;
  1384         break;
  1385     default:
  1386         return SDL_SetError("Unsupported YUV format in copy");
  1387     }
  1388     mod = (pitch / targetbpp);
  1389 
  1390     if (scale_2x) {
  1391         mod -= (swdata->w * 2);
  1392         swdata->Display2X(swdata->colortab, swdata->rgb_2_pix,
  1393                           lum, Cr, Cb, pixels, swdata->h, swdata->w, mod);
  1394     } else {
  1395         mod -= swdata->w;
  1396         swdata->Display1X(swdata->colortab, swdata->rgb_2_pix,
  1397                           lum, Cr, Cb, pixels, swdata->h, swdata->w, mod);
  1398     }
  1399     if (stretch) {
  1400         SDL_Rect rect = *srcrect;
  1401         SDL_SoftStretch(swdata->stretch, &rect, swdata->display, NULL);
  1402     }
  1403     return 0;
  1404 }
  1405 
  1406 void
  1407 SDL_SW_DestroyYUVTexture(SDL_SW_YUVTexture * swdata)
  1408 {
  1409     if (swdata) {
  1410         SDL_free(swdata->pixels);
  1411         SDL_free(swdata->colortab);
  1412         SDL_free(swdata->rgb_2_pix);
  1413         SDL_FreeSurface(swdata->stretch);
  1414         SDL_FreeSurface(swdata->display);
  1415         SDL_free(swdata);
  1416     }
  1417 }
  1418 
  1419 /* vi: set ts=4 sw=4 expandtab: */