src/render/SDL_yuv_sw.c
author Sam Lantinga <slouken@libsdl.org>
Mon, 28 Aug 2017 01:42:18 -0700
changeset 11386 d5c2d689bf6d
parent 11195 0d399e95168b
child 11574 696d0036f442
permissions -rw-r--r--
Fixed build when Wayland is dynamically loaded
     1 /*
     2   Simple DirectMedia Layer
     3   Copyright (C) 1997-2017 Sam Lantinga <slouken@libsdl.org>
     4 
     5   This software is provided 'as-is', without any express or implied
     6   warranty.  In no event will the authors be held liable for any damages
     7   arising from the use of this software.
     8 
     9   Permission is granted to anyone to use this software for any purpose,
    10   including commercial applications, and to alter it and redistribute it
    11   freely, subject to the following restrictions:
    12 
    13   1. The origin of this software must not be misrepresented; you must not
    14      claim that you wrote the original software. If you use this software
    15      in a product, an acknowledgment in the product documentation would be
    16      appreciated but is not required.
    17   2. Altered source versions must be plainly marked as such, and must not be
    18      misrepresented as being the original software.
    19   3. This notice may not be removed or altered from any source distribution.
    20 */
    21 #include "../SDL_internal.h"
    22 
    23 /* This is the software implementation of the YUV texture support */
    24 
    25 /* This code was derived from code carrying the following copyright notices:
    26 
    27  * Copyright (c) 1995 The Regents of the University of California.
    28  * All rights reserved.
    29  *
    30  * Permission to use, copy, modify, and distribute this software and its
    31  * documentation for any purpose, without fee, and without written agreement is
    32  * hereby granted, provided that the above copyright notice and the following
    33  * two paragraphs appear in all copies of this software.
    34  *
    35  * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR
    36  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
    37  * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF
    38  * CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    39  *
    40  * THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES,
    41  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
    42  * AND FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
    43  * ON AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATION TO
    44  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
    45 
    46  * Copyright (c) 1995 Erik Corry
    47  * All rights reserved.
    48  *
    49  * Permission to use, copy, modify, and distribute this software and its
    50  * documentation for any purpose, without fee, and without written agreement is
    51  * hereby granted, provided that the above copyright notice and the following
    52  * two paragraphs appear in all copies of this software.
    53  *
    54  * IN NO EVENT SHALL ERIK CORRY BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
    55  * SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OF
    56  * THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF ERIK CORRY HAS BEEN ADVISED
    57  * OF THE POSSIBILITY OF SUCH DAMAGE.
    58  *
    59  * ERIK CORRY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
    60  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
    61  * PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
    62  * BASIS, AND ERIK CORRY HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT,
    63  * UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
    64 
    65  * Portions of this software Copyright (c) 1995 Brown University.
    66  * All rights reserved.
    67  *
    68  * Permission to use, copy, modify, and distribute this software and its
    69  * documentation for any purpose, without fee, and without written agreement
    70  * is hereby granted, provided that the above copyright notice and the
    71  * following two paragraphs appear in all copies of this software.
    72  *
    73  * IN NO EVENT SHALL BROWN UNIVERSITY BE LIABLE TO ANY PARTY FOR
    74  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
    75  * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF BROWN
    76  * UNIVERSITY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    77  *
    78  * BROWN UNIVERSITY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
    79  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
    80  * PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
    81  * BASIS, AND BROWN UNIVERSITY HAS NO OBLIGATION TO PROVIDE MAINTENANCE,
    82  * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
    83  */
    84 
    85 #include "SDL_assert.h"
    86 #include "SDL_video.h"
    87 #include "SDL_cpuinfo.h"
    88 #include "SDL_yuv_sw_c.h"
    89 #include "SDL_yuv_mmx_c.h"
    90 
    91 
    92 /* The colorspace conversion functions */
    93 
    94 #ifdef USE_MMX_ASSEMBLY
    95 extern void Color565DitherYV12MMX1X(int *colortab, Uint32 * rgb_2_pix,
    96                                     unsigned char *lum, unsigned char *cr,
    97                                     unsigned char *cb, unsigned char *out,
    98                                     int rows, int cols, int mod);
    99 extern void ColorRGBDitherYV12MMX1X(int *colortab, Uint32 * rgb_2_pix,
   100                                     unsigned char *lum, unsigned char *cr,
   101                                     unsigned char *cb, unsigned char *out,
   102                                     int rows, int cols, int mod);
   103 #endif
   104 
   105 static void
   106 Color16DitherYV12Mod1X(int *colortab, Uint32 * rgb_2_pix,
   107                        unsigned char *lum, unsigned char *cr,
   108                        unsigned char *cb, unsigned char *out,
   109                        int rows, int cols, int mod)
   110 {
   111     unsigned short *row1;
   112     unsigned short *row2;
   113     unsigned char *lum2;
   114     int x, y;
   115     int cr_r;
   116     int crb_g;
   117     int cb_b;
   118     int cols_2 = cols / 2;
   119 
   120     row1 = (unsigned short *) out;
   121     row2 = row1 + cols + mod;
   122     lum2 = lum + cols;
   123 
   124     mod += cols + mod;
   125 
   126     y = rows / 2;
   127     while (y--) {
   128         x = cols_2;
   129         while (x--) {
   130             register int L;
   131 
   132             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   133             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   134                 + colortab[*cb + 2 * 256];
   135             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   136             ++cr;
   137             ++cb;
   138 
   139             L = *lum++;
   140             *row1++ = (unsigned short) (rgb_2_pix[L + cr_r] |
   141                                         rgb_2_pix[L + crb_g] |
   142                                         rgb_2_pix[L + cb_b]);
   143 
   144             L = *lum++;
   145             *row1++ = (unsigned short) (rgb_2_pix[L + cr_r] |
   146                                         rgb_2_pix[L + crb_g] |
   147                                         rgb_2_pix[L + cb_b]);
   148 
   149 
   150             /* Now, do second row.  */
   151 
   152             L = *lum2++;
   153             *row2++ = (unsigned short) (rgb_2_pix[L + cr_r] |
   154                                         rgb_2_pix[L + crb_g] |
   155                                         rgb_2_pix[L + cb_b]);
   156 
   157             L = *lum2++;
   158             *row2++ = (unsigned short) (rgb_2_pix[L + cr_r] |
   159                                         rgb_2_pix[L + crb_g] |
   160                                         rgb_2_pix[L + cb_b]);
   161         }
   162 
   163         /*
   164          * These values are at the start of the next line, (due
   165          * to the ++'s above),but they need to be at the start
   166          * of the line after that.
   167          */
   168         lum += cols;
   169         lum2 += cols;
   170         row1 += mod;
   171         row2 += mod;
   172     }
   173 }
   174 
   175 static void
   176 Color24DitherYV12Mod1X(int *colortab, Uint32 * rgb_2_pix,
   177                        unsigned char *lum, unsigned char *cr,
   178                        unsigned char *cb, unsigned char *out,
   179                        int rows, int cols, int mod)
   180 {
   181     unsigned int value;
   182     unsigned char *row1;
   183     unsigned char *row2;
   184     unsigned char *lum2;
   185     int x, y;
   186     int cr_r;
   187     int crb_g;
   188     int cb_b;
   189     int cols_2 = cols / 2;
   190 
   191     row1 = out;
   192     row2 = row1 + cols * 3 + mod * 3;
   193     lum2 = lum + cols;
   194 
   195     mod += cols + mod;
   196     mod *= 3;
   197 
   198     y = rows / 2;
   199     while (y--) {
   200         x = cols_2;
   201         while (x--) {
   202             register int L;
   203 
   204             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   205             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   206                 + colortab[*cb + 2 * 256];
   207             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   208             ++cr;
   209             ++cb;
   210 
   211             L = *lum++;
   212             value = (rgb_2_pix[L + cr_r] |
   213                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   214             *row1++ = (value) & 0xFF;
   215             *row1++ = (value >> 8) & 0xFF;
   216             *row1++ = (value >> 16) & 0xFF;
   217 
   218             L = *lum++;
   219             value = (rgb_2_pix[L + cr_r] |
   220                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   221             *row1++ = (value) & 0xFF;
   222             *row1++ = (value >> 8) & 0xFF;
   223             *row1++ = (value >> 16) & 0xFF;
   224 
   225 
   226             /* Now, do second row.  */
   227 
   228             L = *lum2++;
   229             value = (rgb_2_pix[L + cr_r] |
   230                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   231             *row2++ = (value) & 0xFF;
   232             *row2++ = (value >> 8) & 0xFF;
   233             *row2++ = (value >> 16) & 0xFF;
   234 
   235             L = *lum2++;
   236             value = (rgb_2_pix[L + cr_r] |
   237                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   238             *row2++ = (value) & 0xFF;
   239             *row2++ = (value >> 8) & 0xFF;
   240             *row2++ = (value >> 16) & 0xFF;
   241         }
   242 
   243         /*
   244          * These values are at the start of the next line, (due
   245          * to the ++'s above),but they need to be at the start
   246          * of the line after that.
   247          */
   248         lum += cols;
   249         lum2 += cols;
   250         row1 += mod;
   251         row2 += mod;
   252     }
   253 }
   254 
   255 static void
   256 Color32DitherYV12Mod1X(int *colortab, Uint32 * rgb_2_pix,
   257                        unsigned char *lum, unsigned char *cr,
   258                        unsigned char *cb, unsigned char *out,
   259                        int rows, int cols, int mod)
   260 {
   261     unsigned int *row1;
   262     unsigned int *row2;
   263     unsigned char *lum2;
   264     int x, y;
   265     int cr_r;
   266     int crb_g;
   267     int cb_b;
   268     int cols_2 = cols / 2;
   269 
   270     row1 = (unsigned int *) out;
   271     row2 = row1 + cols + mod;
   272     lum2 = lum + cols;
   273 
   274     mod += cols + mod;
   275 
   276     y = rows / 2;
   277     while (y--) {
   278         x = cols_2;
   279         while (x--) {
   280             register int L;
   281 
   282             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   283             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   284                 + colortab[*cb + 2 * 256];
   285             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   286             ++cr;
   287             ++cb;
   288 
   289             L = *lum++;
   290             *row1++ = (rgb_2_pix[L + cr_r] |
   291                        rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   292 
   293             L = *lum++;
   294             *row1++ = (rgb_2_pix[L + cr_r] |
   295                        rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   296 
   297 
   298             /* Now, do second row.  */
   299 
   300             L = *lum2++;
   301             *row2++ = (rgb_2_pix[L + cr_r] |
   302                        rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   303 
   304             L = *lum2++;
   305             *row2++ = (rgb_2_pix[L + cr_r] |
   306                        rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   307         }
   308 
   309         /*
   310          * These values are at the start of the next line, (due
   311          * to the ++'s above),but they need to be at the start
   312          * of the line after that.
   313          */
   314         lum += cols;
   315         lum2 += cols;
   316         row1 += mod;
   317         row2 += mod;
   318     }
   319 }
   320 
   321 /*
   322  * In this function I make use of a nasty trick. The tables have the lower
   323  * 16 bits replicated in the upper 16. This means I can write ints and get
   324  * the horisontal doubling for free (almost).
   325  */
   326 static void
   327 Color16DitherYV12Mod2X(int *colortab, Uint32 * rgb_2_pix,
   328                        unsigned char *lum, unsigned char *cr,
   329                        unsigned char *cb, unsigned char *out,
   330                        int rows, int cols, int mod)
   331 {
   332     unsigned int *row1 = (unsigned int *) out;
   333     const int next_row = cols + (mod / 2);
   334     unsigned int *row2 = row1 + 2 * next_row;
   335     unsigned char *lum2;
   336     int x, y;
   337     int cr_r;
   338     int crb_g;
   339     int cb_b;
   340     int cols_2 = cols / 2;
   341 
   342     lum2 = lum + cols;
   343 
   344     mod = (next_row * 3) + (mod / 2);
   345 
   346     y = rows / 2;
   347     while (y--) {
   348         x = cols_2;
   349         while (x--) {
   350             register int L;
   351 
   352             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   353             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   354                 + colortab[*cb + 2 * 256];
   355             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   356             ++cr;
   357             ++cb;
   358 
   359             L = *lum++;
   360             row1[0] = row1[next_row] = (rgb_2_pix[L + cr_r] |
   361                                         rgb_2_pix[L + crb_g] |
   362                                         rgb_2_pix[L + cb_b]);
   363             row1++;
   364 
   365             L = *lum++;
   366             row1[0] = row1[next_row] = (rgb_2_pix[L + cr_r] |
   367                                         rgb_2_pix[L + crb_g] |
   368                                         rgb_2_pix[L + cb_b]);
   369             row1++;
   370 
   371 
   372             /* Now, do second row. */
   373 
   374             L = *lum2++;
   375             row2[0] = row2[next_row] = (rgb_2_pix[L + cr_r] |
   376                                         rgb_2_pix[L + crb_g] |
   377                                         rgb_2_pix[L + cb_b]);
   378             row2++;
   379 
   380             L = *lum2++;
   381             row2[0] = row2[next_row] = (rgb_2_pix[L + cr_r] |
   382                                         rgb_2_pix[L + crb_g] |
   383                                         rgb_2_pix[L + cb_b]);
   384             row2++;
   385         }
   386 
   387         /*
   388          * These values are at the start of the next line, (due
   389          * to the ++'s above),but they need to be at the start
   390          * of the line after that.
   391          */
   392         lum += cols;
   393         lum2 += cols;
   394         row1 += mod;
   395         row2 += mod;
   396     }
   397 }
   398 
   399 static void
   400 Color24DitherYV12Mod2X(int *colortab, Uint32 * rgb_2_pix,
   401                        unsigned char *lum, unsigned char *cr,
   402                        unsigned char *cb, unsigned char *out,
   403                        int rows, int cols, int mod)
   404 {
   405     unsigned int value;
   406     unsigned char *row1 = out;
   407     const int next_row = (cols * 2 + mod) * 3;
   408     unsigned char *row2 = row1 + 2 * next_row;
   409     unsigned char *lum2;
   410     int x, y;
   411     int cr_r;
   412     int crb_g;
   413     int cb_b;
   414     int cols_2 = cols / 2;
   415 
   416     lum2 = lum + cols;
   417 
   418     mod = next_row * 3 + mod * 3;
   419 
   420     y = rows / 2;
   421     while (y--) {
   422         x = cols_2;
   423         while (x--) {
   424             register int L;
   425 
   426             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   427             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   428                 + colortab[*cb + 2 * 256];
   429             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   430             ++cr;
   431             ++cb;
   432 
   433             L = *lum++;
   434             value = (rgb_2_pix[L + cr_r] |
   435                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   436             row1[0 + 0] = row1[3 + 0] = row1[next_row + 0] =
   437                 row1[next_row + 3 + 0] = (value) & 0xFF;
   438             row1[0 + 1] = row1[3 + 1] = row1[next_row + 1] =
   439                 row1[next_row + 3 + 1] = (value >> 8) & 0xFF;
   440             row1[0 + 2] = row1[3 + 2] = row1[next_row + 2] =
   441                 row1[next_row + 3 + 2] = (value >> 16) & 0xFF;
   442             row1 += 2 * 3;
   443 
   444             L = *lum++;
   445             value = (rgb_2_pix[L + cr_r] |
   446                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   447             row1[0 + 0] = row1[3 + 0] = row1[next_row + 0] =
   448                 row1[next_row + 3 + 0] = (value) & 0xFF;
   449             row1[0 + 1] = row1[3 + 1] = row1[next_row + 1] =
   450                 row1[next_row + 3 + 1] = (value >> 8) & 0xFF;
   451             row1[0 + 2] = row1[3 + 2] = row1[next_row + 2] =
   452                 row1[next_row + 3 + 2] = (value >> 16) & 0xFF;
   453             row1 += 2 * 3;
   454 
   455 
   456             /* Now, do second row. */
   457 
   458             L = *lum2++;
   459             value = (rgb_2_pix[L + cr_r] |
   460                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   461             row2[0 + 0] = row2[3 + 0] = row2[next_row + 0] =
   462                 row2[next_row + 3 + 0] = (value) & 0xFF;
   463             row2[0 + 1] = row2[3 + 1] = row2[next_row + 1] =
   464                 row2[next_row + 3 + 1] = (value >> 8) & 0xFF;
   465             row2[0 + 2] = row2[3 + 2] = row2[next_row + 2] =
   466                 row2[next_row + 3 + 2] = (value >> 16) & 0xFF;
   467             row2 += 2 * 3;
   468 
   469             L = *lum2++;
   470             value = (rgb_2_pix[L + cr_r] |
   471                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   472             row2[0 + 0] = row2[3 + 0] = row2[next_row + 0] =
   473                 row2[next_row + 3 + 0] = (value) & 0xFF;
   474             row2[0 + 1] = row2[3 + 1] = row2[next_row + 1] =
   475                 row2[next_row + 3 + 1] = (value >> 8) & 0xFF;
   476             row2[0 + 2] = row2[3 + 2] = row2[next_row + 2] =
   477                 row2[next_row + 3 + 2] = (value >> 16) & 0xFF;
   478             row2 += 2 * 3;
   479         }
   480 
   481         /*
   482          * These values are at the start of the next line, (due
   483          * to the ++'s above),but they need to be at the start
   484          * of the line after that.
   485          */
   486         lum += cols;
   487         lum2 += cols;
   488         row1 += mod;
   489         row2 += mod;
   490     }
   491 }
   492 
   493 static void
   494 Color32DitherYV12Mod2X(int *colortab, Uint32 * rgb_2_pix,
   495                        unsigned char *lum, unsigned char *cr,
   496                        unsigned char *cb, unsigned char *out,
   497                        int rows, int cols, int mod)
   498 {
   499     unsigned int *row1 = (unsigned int *) out;
   500     const int next_row = cols * 2 + mod;
   501     unsigned int *row2 = row1 + 2 * next_row;
   502     unsigned char *lum2;
   503     int x, y;
   504     int cr_r;
   505     int crb_g;
   506     int cb_b;
   507     int cols_2 = cols / 2;
   508 
   509     lum2 = lum + cols;
   510 
   511     mod = (next_row * 3) + mod;
   512 
   513     y = rows / 2;
   514     while (y--) {
   515         x = cols_2;
   516         while (x--) {
   517             register int L;
   518 
   519             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   520             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   521                 + colortab[*cb + 2 * 256];
   522             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   523             ++cr;
   524             ++cb;
   525 
   526             L = *lum++;
   527             row1[0] = row1[1] = row1[next_row] = row1[next_row + 1] =
   528                 (rgb_2_pix[L + cr_r] |
   529                  rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   530             row1 += 2;
   531 
   532             L = *lum++;
   533             row1[0] = row1[1] = row1[next_row] = row1[next_row + 1] =
   534                 (rgb_2_pix[L + cr_r] |
   535                  rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   536             row1 += 2;
   537 
   538 
   539             /* Now, do second row. */
   540 
   541             L = *lum2++;
   542             row2[0] = row2[1] = row2[next_row] = row2[next_row + 1] =
   543                 (rgb_2_pix[L + cr_r] |
   544                  rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   545             row2 += 2;
   546 
   547             L = *lum2++;
   548             row2[0] = row2[1] = row2[next_row] = row2[next_row + 1] =
   549                 (rgb_2_pix[L + cr_r] |
   550                  rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   551             row2 += 2;
   552         }
   553 
   554         /*
   555          * These values are at the start of the next line, (due
   556          * to the ++'s above),but they need to be at the start
   557          * of the line after that.
   558          */
   559         lum += cols;
   560         lum2 += cols;
   561         row1 += mod;
   562         row2 += mod;
   563     }
   564 }
   565 
   566 static void
   567 Color16DitherYUY2Mod1X(int *colortab, Uint32 * rgb_2_pix,
   568                        unsigned char *lum, unsigned char *cr,
   569                        unsigned char *cb, unsigned char *out,
   570                        int rows, int cols, int mod)
   571 {
   572     unsigned short *row;
   573     int x, y;
   574     int cr_r;
   575     int crb_g;
   576     int cb_b;
   577     int cols_2 = cols / 2;
   578 
   579     row = (unsigned short *) out;
   580 
   581     y = rows;
   582     while (y--) {
   583         x = cols_2;
   584         while (x--) {
   585             register int L;
   586 
   587             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   588             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   589                 + colortab[*cb + 2 * 256];
   590             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   591             cr += 4;
   592             cb += 4;
   593 
   594             L = *lum;
   595             lum += 2;
   596             *row++ = (unsigned short) (rgb_2_pix[L + cr_r] |
   597                                        rgb_2_pix[L + crb_g] |
   598                                        rgb_2_pix[L + cb_b]);
   599 
   600             L = *lum;
   601             lum += 2;
   602             *row++ = (unsigned short) (rgb_2_pix[L + cr_r] |
   603                                        rgb_2_pix[L + crb_g] |
   604                                        rgb_2_pix[L + cb_b]);
   605 
   606         }
   607 
   608         row += mod;
   609     }
   610 }
   611 
   612 static void
   613 Color24DitherYUY2Mod1X(int *colortab, Uint32 * rgb_2_pix,
   614                        unsigned char *lum, unsigned char *cr,
   615                        unsigned char *cb, unsigned char *out,
   616                        int rows, int cols, int mod)
   617 {
   618     unsigned int value;
   619     unsigned char *row;
   620     int x, y;
   621     int cr_r;
   622     int crb_g;
   623     int cb_b;
   624     int cols_2 = cols / 2;
   625 
   626     row = (unsigned char *) out;
   627     mod *= 3;
   628     y = rows;
   629     while (y--) {
   630         x = cols_2;
   631         while (x--) {
   632             register int L;
   633 
   634             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   635             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   636                 + colortab[*cb + 2 * 256];
   637             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   638             cr += 4;
   639             cb += 4;
   640 
   641             L = *lum;
   642             lum += 2;
   643             value = (rgb_2_pix[L + cr_r] |
   644                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   645             *row++ = (value) & 0xFF;
   646             *row++ = (value >> 8) & 0xFF;
   647             *row++ = (value >> 16) & 0xFF;
   648 
   649             L = *lum;
   650             lum += 2;
   651             value = (rgb_2_pix[L + cr_r] |
   652                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   653             *row++ = (value) & 0xFF;
   654             *row++ = (value >> 8) & 0xFF;
   655             *row++ = (value >> 16) & 0xFF;
   656 
   657         }
   658         row += mod;
   659     }
   660 }
   661 
   662 static void
   663 Color32DitherYUY2Mod1X(int *colortab, Uint32 * rgb_2_pix,
   664                        unsigned char *lum, unsigned char *cr,
   665                        unsigned char *cb, unsigned char *out,
   666                        int rows, int cols, int mod)
   667 {
   668     unsigned int *row;
   669     int x, y;
   670     int cr_r;
   671     int crb_g;
   672     int cb_b;
   673     int cols_2 = cols / 2;
   674 
   675     row = (unsigned int *) out;
   676     y = rows;
   677     while (y--) {
   678         x = cols_2;
   679         while (x--) {
   680             register int L;
   681 
   682             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   683             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   684                 + colortab[*cb + 2 * 256];
   685             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   686             cr += 4;
   687             cb += 4;
   688 
   689             L = *lum;
   690             lum += 2;
   691             *row++ = (rgb_2_pix[L + cr_r] |
   692                       rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   693 
   694             L = *lum;
   695             lum += 2;
   696             *row++ = (rgb_2_pix[L + cr_r] |
   697                       rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   698 
   699 
   700         }
   701         row += mod;
   702     }
   703 }
   704 
   705 /*
   706  * In this function I make use of a nasty trick. The tables have the lower
   707  * 16 bits replicated in the upper 16. This means I can write ints and get
   708  * the horisontal doubling for free (almost).
   709  */
   710 static void
   711 Color16DitherYUY2Mod2X(int *colortab, Uint32 * rgb_2_pix,
   712                        unsigned char *lum, unsigned char *cr,
   713                        unsigned char *cb, unsigned char *out,
   714                        int rows, int cols, int mod)
   715 {
   716     unsigned int *row = (unsigned int *) out;
   717     const int next_row = cols + (mod / 2);
   718     int x, y;
   719     int cr_r;
   720     int crb_g;
   721     int cb_b;
   722     int cols_2 = cols / 2;
   723 
   724     y = rows;
   725     while (y--) {
   726         x = cols_2;
   727         while (x--) {
   728             register int L;
   729 
   730             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   731             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   732                 + colortab[*cb + 2 * 256];
   733             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   734             cr += 4;
   735             cb += 4;
   736 
   737             L = *lum;
   738             lum += 2;
   739             row[0] = row[next_row] = (rgb_2_pix[L + cr_r] |
   740                                       rgb_2_pix[L + crb_g] |
   741                                       rgb_2_pix[L + cb_b]);
   742             row++;
   743 
   744             L = *lum;
   745             lum += 2;
   746             row[0] = row[next_row] = (rgb_2_pix[L + cr_r] |
   747                                       rgb_2_pix[L + crb_g] |
   748                                       rgb_2_pix[L + cb_b]);
   749             row++;
   750 
   751         }
   752         row += next_row;
   753     }
   754 }
   755 
   756 static void
   757 Color24DitherYUY2Mod2X(int *colortab, Uint32 * rgb_2_pix,
   758                        unsigned char *lum, unsigned char *cr,
   759                        unsigned char *cb, unsigned char *out,
   760                        int rows, int cols, int mod)
   761 {
   762     unsigned int value;
   763     unsigned char *row = out;
   764     const int next_row = (cols * 2 + mod) * 3;
   765     int x, y;
   766     int cr_r;
   767     int crb_g;
   768     int cb_b;
   769     int cols_2 = cols / 2;
   770     y = rows;
   771     while (y--) {
   772         x = cols_2;
   773         while (x--) {
   774             register int L;
   775 
   776             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   777             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   778                 + colortab[*cb + 2 * 256];
   779             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   780             cr += 4;
   781             cb += 4;
   782 
   783             L = *lum;
   784             lum += 2;
   785             value = (rgb_2_pix[L + cr_r] |
   786                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   787             row[0 + 0] = row[3 + 0] = row[next_row + 0] =
   788                 row[next_row + 3 + 0] = (value) & 0xFF;
   789             row[0 + 1] = row[3 + 1] = row[next_row + 1] =
   790                 row[next_row + 3 + 1] = (value >> 8) & 0xFF;
   791             row[0 + 2] = row[3 + 2] = row[next_row + 2] =
   792                 row[next_row + 3 + 2] = (value >> 16) & 0xFF;
   793             row += 2 * 3;
   794 
   795             L = *lum;
   796             lum += 2;
   797             value = (rgb_2_pix[L + cr_r] |
   798                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   799             row[0 + 0] = row[3 + 0] = row[next_row + 0] =
   800                 row[next_row + 3 + 0] = (value) & 0xFF;
   801             row[0 + 1] = row[3 + 1] = row[next_row + 1] =
   802                 row[next_row + 3 + 1] = (value >> 8) & 0xFF;
   803             row[0 + 2] = row[3 + 2] = row[next_row + 2] =
   804                 row[next_row + 3 + 2] = (value >> 16) & 0xFF;
   805             row += 2 * 3;
   806 
   807         }
   808         row += next_row;
   809     }
   810 }
   811 
   812 static void
   813 Color32DitherYUY2Mod2X(int *colortab, Uint32 * rgb_2_pix,
   814                        unsigned char *lum, unsigned char *cr,
   815                        unsigned char *cb, unsigned char *out,
   816                        int rows, int cols, int mod)
   817 {
   818     unsigned int *row = (unsigned int *) out;
   819     const int next_row = cols * 2 + mod;
   820     int x, y;
   821     int cr_r;
   822     int crb_g;
   823     int cb_b;
   824     int cols_2 = cols / 2;
   825     mod += mod;
   826     y = rows;
   827     while (y--) {
   828         x = cols_2;
   829         while (x--) {
   830             register int L;
   831 
   832             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   833             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   834                 + colortab[*cb + 2 * 256];
   835             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   836             cr += 4;
   837             cb += 4;
   838 
   839             L = *lum;
   840             lum += 2;
   841             row[0] = row[1] = row[next_row] = row[next_row + 1] =
   842                 (rgb_2_pix[L + cr_r] |
   843                  rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   844             row += 2;
   845 
   846             L = *lum;
   847             lum += 2;
   848             row[0] = row[1] = row[next_row] = row[next_row + 1] =
   849                 (rgb_2_pix[L + cr_r] |
   850                  rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   851             row += 2;
   852 
   853 
   854         }
   855 
   856         row += next_row;
   857     }
   858 }
   859 
   860 /*
   861  * How many 1 bits are there in the Uint32.
   862  * Low performance, do not call often.
   863  */
   864 static int
   865 number_of_bits_set(Uint32 a)
   866 {
   867     if (!a)
   868         return 0;
   869     if (a & 1)
   870         return 1 + number_of_bits_set(a >> 1);
   871     return (number_of_bits_set(a >> 1));
   872 }
   873 
   874 /*
   875  * How many 0 bits are there at least significant end of Uint32.
   876  * Low performance, do not call often.
   877  */
   878 static int
   879 free_bits_at_bottom_nonzero(Uint32 a)
   880 {
   881     SDL_assert(a != 0);
   882     return (((Sint32) a) & 1l) ? 0 : 1 + free_bits_at_bottom_nonzero(a >> 1);
   883 }
   884 
   885 static SDL_INLINE int
   886 free_bits_at_bottom(Uint32 a)
   887 {
   888     return a ? free_bits_at_bottom_nonzero(a) : 32;
   889 }
   890 
   891 static int
   892 SDL_SW_SetupYUVDisplay(SDL_SW_YUVTexture * swdata, Uint32 target_format)
   893 {
   894     Uint32 *r_2_pix_alloc;
   895     Uint32 *g_2_pix_alloc;
   896     Uint32 *b_2_pix_alloc;
   897     int i;
   898     int bpp;
   899     Uint32 Rmask, Gmask, Bmask, Amask;
   900     int freebits;
   901 
   902     if (!SDL_PixelFormatEnumToMasks
   903         (target_format, &bpp, &Rmask, &Gmask, &Bmask, &Amask) || bpp < 15) {
   904         return SDL_SetError("Unsupported YUV destination format");
   905     }
   906 
   907     swdata->target_format = target_format;
   908     r_2_pix_alloc = &swdata->rgb_2_pix[0 * 768];
   909     g_2_pix_alloc = &swdata->rgb_2_pix[1 * 768];
   910     b_2_pix_alloc = &swdata->rgb_2_pix[2 * 768];
   911 
   912     /*
   913      * Set up entries 0-255 in rgb-to-pixel value tables.
   914      */
   915     for (i = 0; i < 256; ++i) {
   916         r_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(Rmask));
   917         freebits = free_bits_at_bottom(Rmask);
   918         if (freebits < 32) {
   919             r_2_pix_alloc[i + 256] <<= freebits;
   920         }
   921         r_2_pix_alloc[i + 256] |= Amask;
   922 
   923         g_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(Gmask));
   924         freebits = free_bits_at_bottom(Gmask);
   925         if (freebits < 32) {
   926             g_2_pix_alloc[i + 256] <<= freebits;
   927         }
   928         g_2_pix_alloc[i + 256] |= Amask;
   929 
   930         b_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(Bmask));
   931         freebits = free_bits_at_bottom(Bmask);
   932         if (freebits < 32) {
   933             b_2_pix_alloc[i + 256] <<= freebits;
   934         }
   935         b_2_pix_alloc[i + 256] |= Amask;
   936     }
   937 
   938     /*
   939      * If we have 16-bit output depth, then we double the value
   940      * in the top word. This means that we can write out both
   941      * pixels in the pixel doubling mode with one op. It is
   942      * harmless in the normal case as storing a 32-bit value
   943      * through a short pointer will lose the top bits anyway.
   944      */
   945     if (SDL_BYTESPERPIXEL(target_format) == 2) {
   946         for (i = 0; i < 256; ++i) {
   947             r_2_pix_alloc[i + 256] |= (r_2_pix_alloc[i + 256]) << 16;
   948             g_2_pix_alloc[i + 256] |= (g_2_pix_alloc[i + 256]) << 16;
   949             b_2_pix_alloc[i + 256] |= (b_2_pix_alloc[i + 256]) << 16;
   950         }
   951     }
   952 
   953     /*
   954      * Spread out the values we have to the rest of the array so that
   955      * we do not need to check for overflow.
   956      */
   957     for (i = 0; i < 256; ++i) {
   958         r_2_pix_alloc[i] = r_2_pix_alloc[256];
   959         r_2_pix_alloc[i + 512] = r_2_pix_alloc[511];
   960         g_2_pix_alloc[i] = g_2_pix_alloc[256];
   961         g_2_pix_alloc[i + 512] = g_2_pix_alloc[511];
   962         b_2_pix_alloc[i] = b_2_pix_alloc[256];
   963         b_2_pix_alloc[i + 512] = b_2_pix_alloc[511];
   964     }
   965 
   966     /* You have chosen wisely... */
   967     switch (swdata->format) {
   968     case SDL_PIXELFORMAT_YV12:
   969     case SDL_PIXELFORMAT_IYUV:
   970         if (SDL_BYTESPERPIXEL(target_format) == 2) {
   971 #ifdef USE_MMX_ASSEMBLY
   972             /* inline assembly functions */
   973             if (SDL_HasMMX() && (Rmask == 0xF800) &&
   974                 (Gmask == 0x07E0) && (Bmask == 0x001F)
   975                 && (swdata->w & 15) == 0) {
   976 /* printf("Using MMX 16-bit 565 dither\n"); */
   977                 swdata->Display1X = Color565DitherYV12MMX1X;
   978             } else {
   979 /* printf("Using C 16-bit dither\n"); */
   980                 swdata->Display1X = Color16DitherYV12Mod1X;
   981             }
   982 #else
   983             swdata->Display1X = Color16DitherYV12Mod1X;
   984 #endif
   985             swdata->Display2X = Color16DitherYV12Mod2X;
   986         }
   987         if (SDL_BYTESPERPIXEL(target_format) == 3) {
   988             swdata->Display1X = Color24DitherYV12Mod1X;
   989             swdata->Display2X = Color24DitherYV12Mod2X;
   990         }
   991         if (SDL_BYTESPERPIXEL(target_format) == 4) {
   992 #ifdef USE_MMX_ASSEMBLY
   993             /* inline assembly functions */
   994             if (SDL_HasMMX() && (Rmask == 0x00FF0000) &&
   995                 (Gmask == 0x0000FF00) &&
   996                 (Bmask == 0x000000FF) && (swdata->w & 15) == 0) {
   997 /* printf("Using MMX 32-bit dither\n"); */
   998                 swdata->Display1X = ColorRGBDitherYV12MMX1X;
   999             } else {
  1000 /* printf("Using C 32-bit dither\n"); */
  1001                 swdata->Display1X = Color32DitherYV12Mod1X;
  1002             }
  1003 #else
  1004             swdata->Display1X = Color32DitherYV12Mod1X;
  1005 #endif
  1006             swdata->Display2X = Color32DitherYV12Mod2X;
  1007         }
  1008         break;
  1009     case SDL_PIXELFORMAT_YUY2:
  1010     case SDL_PIXELFORMAT_UYVY:
  1011     case SDL_PIXELFORMAT_YVYU:
  1012         if (SDL_BYTESPERPIXEL(target_format) == 2) {
  1013             swdata->Display1X = Color16DitherYUY2Mod1X;
  1014             swdata->Display2X = Color16DitherYUY2Mod2X;
  1015         }
  1016         if (SDL_BYTESPERPIXEL(target_format) == 3) {
  1017             swdata->Display1X = Color24DitherYUY2Mod1X;
  1018             swdata->Display2X = Color24DitherYUY2Mod2X;
  1019         }
  1020         if (SDL_BYTESPERPIXEL(target_format) == 4) {
  1021             swdata->Display1X = Color32DitherYUY2Mod1X;
  1022             swdata->Display2X = Color32DitherYUY2Mod2X;
  1023         }
  1024         break;
  1025     default:
  1026         /* We should never get here (caught above) */
  1027         break;
  1028     }
  1029 
  1030     SDL_FreeSurface(swdata->display);
  1031     swdata->display = NULL;
  1032     return 0;
  1033 }
  1034 
  1035 SDL_SW_YUVTexture *
  1036 SDL_SW_CreateYUVTexture(Uint32 format, int w, int h)
  1037 {
  1038     SDL_SW_YUVTexture *swdata;
  1039     int *Cr_r_tab;
  1040     int *Cr_g_tab;
  1041     int *Cb_g_tab;
  1042     int *Cb_b_tab;
  1043     int i;
  1044     int CR, CB;
  1045 
  1046     switch (format) {
  1047     case SDL_PIXELFORMAT_YV12:
  1048     case SDL_PIXELFORMAT_IYUV:
  1049     case SDL_PIXELFORMAT_YUY2:
  1050     case SDL_PIXELFORMAT_UYVY:
  1051     case SDL_PIXELFORMAT_YVYU:
  1052         break;
  1053     default:
  1054         SDL_SetError("Unsupported YUV format");
  1055         return NULL;
  1056     }
  1057 
  1058     swdata = (SDL_SW_YUVTexture *) SDL_calloc(1, sizeof(*swdata));
  1059     if (!swdata) {
  1060         SDL_OutOfMemory();
  1061         return NULL;
  1062     }
  1063 
  1064     swdata->format = format;
  1065     swdata->target_format = SDL_PIXELFORMAT_UNKNOWN;
  1066     swdata->w = w;
  1067     swdata->h = h;
  1068     swdata->pixels = (Uint8 *) SDL_malloc(w * h * 2);
  1069     swdata->colortab = (int *) SDL_malloc(4 * 256 * sizeof(int));
  1070     swdata->rgb_2_pix = (Uint32 *) SDL_malloc(3 * 768 * sizeof(Uint32));
  1071     if (!swdata->pixels || !swdata->colortab || !swdata->rgb_2_pix) {
  1072         SDL_SW_DestroyYUVTexture(swdata);
  1073         SDL_OutOfMemory();
  1074         return NULL;
  1075     }
  1076 
  1077     /* Generate the tables for the display surface */
  1078     Cr_r_tab = &swdata->colortab[0 * 256];
  1079     Cr_g_tab = &swdata->colortab[1 * 256];
  1080     Cb_g_tab = &swdata->colortab[2 * 256];
  1081     Cb_b_tab = &swdata->colortab[3 * 256];
  1082     for (i = 0; i < 256; i++) {
  1083         /* Gamma correction (luminescence table) and chroma correction
  1084            would be done here.  See the Berkeley mpeg_play sources.
  1085          */
  1086         CB = CR = (i - 128);
  1087         Cr_r_tab[i] = (int) ((0.419 / 0.299) * CR);
  1088         Cr_g_tab[i] = (int) (-(0.299 / 0.419) * CR);
  1089         Cb_g_tab[i] = (int) (-(0.114 / 0.331) * CB);
  1090         Cb_b_tab[i] = (int) ((0.587 / 0.331) * CB);
  1091     }
  1092 
  1093     /* Find the pitch and offset values for the overlay */
  1094     switch (format) {
  1095     case SDL_PIXELFORMAT_YV12:
  1096     case SDL_PIXELFORMAT_IYUV:
  1097         swdata->pitches[0] = w;
  1098         swdata->pitches[1] = swdata->pitches[0] / 2;
  1099         swdata->pitches[2] = swdata->pitches[0] / 2;
  1100         swdata->planes[0] = swdata->pixels;
  1101         swdata->planes[1] = swdata->planes[0] + swdata->pitches[0] * h;
  1102         swdata->planes[2] = swdata->planes[1] + swdata->pitches[1] * h / 2;
  1103         break;
  1104     case SDL_PIXELFORMAT_YUY2:
  1105     case SDL_PIXELFORMAT_UYVY:
  1106     case SDL_PIXELFORMAT_YVYU:
  1107         swdata->pitches[0] = w * 2;
  1108         swdata->planes[0] = swdata->pixels;
  1109         break;
  1110     default:
  1111         SDL_assert(0 && "We should never get here (caught above)");
  1112         break;
  1113     }
  1114 
  1115     /* We're all done.. */
  1116     return (swdata);
  1117 }
  1118 
  1119 int
  1120 SDL_SW_QueryYUVTexturePixels(SDL_SW_YUVTexture * swdata, void **pixels,
  1121                              int *pitch)
  1122 {
  1123     *pixels = swdata->planes[0];
  1124     *pitch = swdata->pitches[0];
  1125     return 0;
  1126 }
  1127 
  1128 int
  1129 SDL_SW_UpdateYUVTexture(SDL_SW_YUVTexture * swdata, const SDL_Rect * rect,
  1130                         const void *pixels, int pitch)
  1131 {
  1132     switch (swdata->format) {
  1133     case SDL_PIXELFORMAT_YV12:
  1134     case SDL_PIXELFORMAT_IYUV:
  1135         if (rect->x == 0 && rect->y == 0 &&
  1136             rect->w == swdata->w && rect->h == swdata->h) {
  1137                 SDL_memcpy(swdata->pixels, pixels,
  1138                            (swdata->h * swdata->w) + (swdata->h * swdata->w) / 2);
  1139         } else {
  1140             Uint8 *src, *dst;
  1141             int row;
  1142             size_t length;
  1143 
  1144             /* Copy the Y plane */
  1145             src = (Uint8 *) pixels;
  1146             dst = swdata->pixels + rect->y * swdata->w + rect->x;
  1147             length = rect->w;
  1148             for (row = 0; row < rect->h; ++row) {
  1149                 SDL_memcpy(dst, src, length);
  1150                 src += pitch;
  1151                 dst += swdata->w;
  1152             }
  1153 
  1154             /* Copy the next plane */
  1155             src = (Uint8 *) pixels + rect->h * pitch;
  1156             dst = swdata->pixels + swdata->h * swdata->w;
  1157             dst += rect->y/2 * swdata->w/2 + rect->x/2;
  1158             length = rect->w / 2;
  1159             for (row = 0; row < rect->h/2; ++row) {
  1160                 SDL_memcpy(dst, src, length);
  1161                 src += pitch/2;
  1162                 dst += swdata->w/2;
  1163             }
  1164 
  1165             /* Copy the next plane */
  1166             src = (Uint8 *) pixels + rect->h * pitch + (rect->h * pitch) / 4;
  1167             dst = swdata->pixels + swdata->h * swdata->w +
  1168                   (swdata->h * swdata->w) / 4;
  1169             dst += rect->y/2 * swdata->w/2 + rect->x/2;
  1170             length = rect->w / 2;
  1171             for (row = 0; row < rect->h/2; ++row) {
  1172                 SDL_memcpy(dst, src, length);
  1173                 src += pitch/2;
  1174                 dst += swdata->w/2;
  1175             }
  1176         }
  1177         break;
  1178     case SDL_PIXELFORMAT_YUY2:
  1179     case SDL_PIXELFORMAT_UYVY:
  1180     case SDL_PIXELFORMAT_YVYU:
  1181         {
  1182             Uint8 *src, *dst;
  1183             int row;
  1184             size_t length;
  1185 
  1186             src = (Uint8 *) pixels;
  1187             dst =
  1188                 swdata->planes[0] + rect->y * swdata->pitches[0] +
  1189                 rect->x * 2;
  1190             length = rect->w * 2;
  1191             for (row = 0; row < rect->h; ++row) {
  1192                 SDL_memcpy(dst, src, length);
  1193                 src += pitch;
  1194                 dst += swdata->pitches[0];
  1195             }
  1196         }
  1197         break;
  1198     }
  1199     return 0;
  1200 }
  1201 
  1202 int
  1203 SDL_SW_UpdateYUVTexturePlanar(SDL_SW_YUVTexture * swdata, const SDL_Rect * rect,
  1204                               const Uint8 *Yplane, int Ypitch,
  1205                               const Uint8 *Uplane, int Upitch,
  1206                               const Uint8 *Vplane, int Vpitch)
  1207 {
  1208     const Uint8 *src;
  1209     Uint8 *dst;
  1210     int row;
  1211     size_t length;
  1212 
  1213     /* Copy the Y plane */
  1214     src = Yplane;
  1215     dst = swdata->pixels + rect->y * swdata->w + rect->x;
  1216     length = rect->w;
  1217     for (row = 0; row < rect->h; ++row) {
  1218         SDL_memcpy(dst, src, length);
  1219         src += Ypitch;
  1220         dst += swdata->w;
  1221     }
  1222 
  1223     /* Copy the U plane */
  1224     src = Uplane;
  1225     if (swdata->format == SDL_PIXELFORMAT_IYUV) {
  1226         dst = swdata->pixels + swdata->h * swdata->w;
  1227     } else {
  1228         dst = swdata->pixels + swdata->h * swdata->w +
  1229               (swdata->h * swdata->w) / 4;
  1230     }
  1231     dst += rect->y/2 * swdata->w/2 + rect->x/2;
  1232     length = rect->w / 2;
  1233     for (row = 0; row < rect->h/2; ++row) {
  1234         SDL_memcpy(dst, src, length);
  1235         src += Upitch;
  1236         dst += swdata->w/2;
  1237     }
  1238 
  1239     /* Copy the V plane */
  1240     src = Vplane;
  1241     if (swdata->format == SDL_PIXELFORMAT_YV12) {
  1242         dst = swdata->pixels + swdata->h * swdata->w;
  1243     } else {
  1244         dst = swdata->pixels + swdata->h * swdata->w +
  1245               (swdata->h * swdata->w) / 4;
  1246     }
  1247     dst += rect->y/2 * swdata->w/2 + rect->x/2;
  1248     length = rect->w / 2;
  1249     for (row = 0; row < rect->h/2; ++row) {
  1250         SDL_memcpy(dst, src, length);
  1251         src += Vpitch;
  1252         dst += swdata->w/2;
  1253     }
  1254     return 0;
  1255 }
  1256 
  1257 int
  1258 SDL_SW_LockYUVTexture(SDL_SW_YUVTexture * swdata, const SDL_Rect * rect,
  1259                       void **pixels, int *pitch)
  1260 {
  1261     switch (swdata->format) {
  1262     case SDL_PIXELFORMAT_YV12:
  1263     case SDL_PIXELFORMAT_IYUV:
  1264         if (rect
  1265             && (rect->x != 0 || rect->y != 0 || rect->w != swdata->w
  1266                 || rect->h != swdata->h)) {
  1267             return SDL_SetError
  1268                 ("YV12 and IYUV textures only support full surface locks");
  1269         }
  1270         break;
  1271     }
  1272 
  1273     if (rect) {
  1274         *pixels = swdata->planes[0] + rect->y * swdata->pitches[0] + rect->x * 2;
  1275     } else {
  1276         *pixels = swdata->planes[0];
  1277     }
  1278     *pitch = swdata->pitches[0];
  1279     return 0;
  1280 }
  1281 
  1282 void
  1283 SDL_SW_UnlockYUVTexture(SDL_SW_YUVTexture * swdata)
  1284 {
  1285 }
  1286 
  1287 int
  1288 SDL_SW_CopyYUVToRGB(SDL_SW_YUVTexture * swdata, const SDL_Rect * srcrect,
  1289                     Uint32 target_format, int w, int h, void *pixels,
  1290                     int pitch)
  1291 {
  1292     const int targetbpp = SDL_BYTESPERPIXEL(target_format);
  1293     int stretch;
  1294     int scale_2x;
  1295     Uint8 *lum, *Cr, *Cb;
  1296     int mod;
  1297 
  1298     if (targetbpp == 0) {
  1299         return SDL_SetError("Invalid target pixel format");
  1300     }
  1301 
  1302     /* Make sure we're set up to display in the desired format */
  1303     if (target_format != swdata->target_format) {
  1304         if (SDL_SW_SetupYUVDisplay(swdata, target_format) < 0) {
  1305             return -1;
  1306         }
  1307     }
  1308 
  1309     stretch = 0;
  1310     scale_2x = 0;
  1311     if (srcrect->x || srcrect->y || srcrect->w < swdata->w
  1312         || srcrect->h < swdata->h) {
  1313         /* The source rectangle has been clipped.
  1314            Using a scratch surface is easier than adding clipped
  1315            source support to all the blitters, plus that would
  1316            slow them down in the general unclipped case.
  1317          */
  1318         stretch = 1;
  1319     } else if ((srcrect->w != w) || (srcrect->h != h)) {
  1320         if ((w == 2 * srcrect->w) && (h == 2 * srcrect->h)) {
  1321             scale_2x = 1;
  1322         } else {
  1323             stretch = 1;
  1324         }
  1325     }
  1326     if (stretch) {
  1327         int bpp;
  1328         Uint32 Rmask, Gmask, Bmask, Amask;
  1329 
  1330         if (swdata->display) {
  1331             swdata->display->w = w;
  1332             swdata->display->h = h;
  1333             swdata->display->pixels = pixels;
  1334             swdata->display->pitch = pitch;
  1335         } else {
  1336             /* This must have succeeded in SDL_SW_SetupYUVDisplay() earlier */
  1337             SDL_PixelFormatEnumToMasks(target_format, &bpp, &Rmask, &Gmask,
  1338                                        &Bmask, &Amask);
  1339             swdata->display =
  1340                 SDL_CreateRGBSurfaceFrom(pixels, w, h, bpp, pitch, Rmask,
  1341                                          Gmask, Bmask, Amask);
  1342             if (!swdata->display) {
  1343                 return (-1);
  1344             }
  1345         }
  1346         if (!swdata->stretch) {
  1347             /* This must have succeeded in SDL_SW_SetupYUVDisplay() earlier */
  1348             SDL_PixelFormatEnumToMasks(target_format, &bpp, &Rmask, &Gmask,
  1349                                        &Bmask, &Amask);
  1350             swdata->stretch =
  1351                 SDL_CreateRGBSurface(0, swdata->w, swdata->h, bpp, Rmask,
  1352                                      Gmask, Bmask, Amask);
  1353             if (!swdata->stretch) {
  1354                 return (-1);
  1355             }
  1356         }
  1357         pixels = swdata->stretch->pixels;
  1358         pitch = swdata->stretch->pitch;
  1359     }
  1360     switch (swdata->format) {
  1361     case SDL_PIXELFORMAT_YV12:
  1362         lum = swdata->planes[0];
  1363         Cr = swdata->planes[1];
  1364         Cb = swdata->planes[2];
  1365         break;
  1366     case SDL_PIXELFORMAT_IYUV:
  1367         lum = swdata->planes[0];
  1368         Cr = swdata->planes[2];
  1369         Cb = swdata->planes[1];
  1370         break;
  1371     case SDL_PIXELFORMAT_YUY2:
  1372         lum = swdata->planes[0];
  1373         Cr = lum + 3;
  1374         Cb = lum + 1;
  1375         break;
  1376     case SDL_PIXELFORMAT_UYVY:
  1377         lum = swdata->planes[0] + 1;
  1378         Cr = lum + 1;
  1379         Cb = lum - 1;
  1380         break;
  1381     case SDL_PIXELFORMAT_YVYU:
  1382         lum = swdata->planes[0];
  1383         Cr = lum + 1;
  1384         Cb = lum + 3;
  1385         break;
  1386     default:
  1387         return SDL_SetError("Unsupported YUV format in copy");
  1388     }
  1389     mod = (pitch / targetbpp);
  1390 
  1391     if (scale_2x) {
  1392         mod -= (swdata->w * 2);
  1393         swdata->Display2X(swdata->colortab, swdata->rgb_2_pix,
  1394                           lum, Cr, Cb, pixels, swdata->h, swdata->w, mod);
  1395     } else {
  1396         mod -= swdata->w;
  1397         swdata->Display1X(swdata->colortab, swdata->rgb_2_pix,
  1398                           lum, Cr, Cb, pixels, swdata->h, swdata->w, mod);
  1399     }
  1400     if (stretch) {
  1401         SDL_Rect rect = *srcrect;
  1402         SDL_SoftStretch(swdata->stretch, &rect, swdata->display, NULL);
  1403     }
  1404     return 0;
  1405 }
  1406 
  1407 void
  1408 SDL_SW_DestroyYUVTexture(SDL_SW_YUVTexture * swdata)
  1409 {
  1410     if (swdata) {
  1411         SDL_free(swdata->pixels);
  1412         SDL_free(swdata->colortab);
  1413         SDL_free(swdata->rgb_2_pix);
  1414         SDL_FreeSurface(swdata->stretch);
  1415         SDL_FreeSurface(swdata->display);
  1416         SDL_free(swdata);
  1417     }
  1418 }
  1419 
  1420 /* vi: set ts=4 sw=4 expandtab: */