src/render/SDL_yuv_sw.c
author Sam Lantinga <slouken@libsdl.org>
Sun, 12 Nov 2017 10:59:05 -0800
changeset 11701 d131f3193794
parent 11574 696d0036f442
child 11702 cf166abbde4a
permissions -rw-r--r--
Fixed Android build error on older SDK
     1 /*
     2   Simple DirectMedia Layer
     3   Copyright (C) 1997-2017 Sam Lantinga <slouken@libsdl.org>
     4 
     5   This software is provided 'as-is', without any express or implied
     6   warranty.  In no event will the authors be held liable for any damages
     7   arising from the use of this software.
     8 
     9   Permission is granted to anyone to use this software for any purpose,
    10   including commercial applications, and to alter it and redistribute it
    11   freely, subject to the following restrictions:
    12 
    13   1. The origin of this software must not be misrepresented; you must not
    14      claim that you wrote the original software. If you use this software
    15      in a product, an acknowledgment in the product documentation would be
    16      appreciated but is not required.
    17   2. Altered source versions must be plainly marked as such, and must not be
    18      misrepresented as being the original software.
    19   3. This notice may not be removed or altered from any source distribution.
    20 */
    21 #include "../SDL_internal.h"
    22 
    23 /* This is the software implementation of the YUV texture support */
    24 
    25 /* This code was derived from code carrying the following copyright notices:
    26 
    27  * Copyright (c) 1995 The Regents of the University of California.
    28  * All rights reserved.
    29  *
    30  * Permission to use, copy, modify, and distribute this software and its
    31  * documentation for any purpose, without fee, and without written agreement is
    32  * hereby granted, provided that the above copyright notice and the following
    33  * two paragraphs appear in all copies of this software.
    34  *
    35  * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR
    36  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
    37  * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF
    38  * CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    39  *
    40  * THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES,
    41  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
    42  * AND FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
    43  * ON AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATION TO
    44  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
    45 
    46  * Copyright (c) 1995 Erik Corry
    47  * All rights reserved.
    48  *
    49  * Permission to use, copy, modify, and distribute this software and its
    50  * documentation for any purpose, without fee, and without written agreement is
    51  * hereby granted, provided that the above copyright notice and the following
    52  * two paragraphs appear in all copies of this software.
    53  *
    54  * IN NO EVENT SHALL ERIK CORRY BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
    55  * SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OF
    56  * THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF ERIK CORRY HAS BEEN ADVISED
    57  * OF THE POSSIBILITY OF SUCH DAMAGE.
    58  *
    59  * ERIK CORRY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
    60  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
    61  * PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
    62  * BASIS, AND ERIK CORRY HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT,
    63  * UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
    64 
    65  * Portions of this software Copyright (c) 1995 Brown University.
    66  * All rights reserved.
    67  *
    68  * Permission to use, copy, modify, and distribute this software and its
    69  * documentation for any purpose, without fee, and without written agreement
    70  * is hereby granted, provided that the above copyright notice and the
    71  * following two paragraphs appear in all copies of this software.
    72  *
    73  * IN NO EVENT SHALL BROWN UNIVERSITY BE LIABLE TO ANY PARTY FOR
    74  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
    75  * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF BROWN
    76  * UNIVERSITY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    77  *
    78  * BROWN UNIVERSITY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
    79  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
    80  * PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
    81  * BASIS, AND BROWN UNIVERSITY HAS NO OBLIGATION TO PROVIDE MAINTENANCE,
    82  * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
    83  */
    84 
    85 #include "SDL_assert.h"
    86 #include "SDL_video.h"
    87 #include "SDL_cpuinfo.h"
    88 #include "SDL_yuv_sw_c.h"
    89 #include "SDL_yuv_mmx_c.h"
    90 
    91 
    92 /* The colorspace conversion functions */
    93 
    94 #ifdef USE_MMX_ASSEMBLY
    95 extern void Color565DitherYV12MMX1X(int *colortab, Uint32 * rgb_2_pix,
    96                                     unsigned char *lum, unsigned char *cr,
    97                                     unsigned char *cb, unsigned char *out,
    98                                     int rows, int cols, int mod);
    99 extern void ColorRGBDitherYV12MMX1X(int *colortab, Uint32 * rgb_2_pix,
   100                                     unsigned char *lum, unsigned char *cr,
   101                                     unsigned char *cb, unsigned char *out,
   102                                     int rows, int cols, int mod);
   103 #endif
   104 
   105 static void
   106 Color16DitherYV12Mod1X(int *colortab, Uint32 * rgb_2_pix,
   107                        unsigned char *lum, unsigned char *cr,
   108                        unsigned char *cb, unsigned char *out,
   109                        int rows, int cols, int mod)
   110 {
   111     unsigned short *row1;
   112     unsigned short *row2;
   113     unsigned char *lum2;
   114     int x, y;
   115     int cr_r;
   116     int crb_g;
   117     int cb_b;
   118     int cols_2 = cols / 2;
   119 
   120     row1 = (unsigned short *) out;
   121     row2 = row1 + cols + mod;
   122     lum2 = lum + cols;
   123 
   124     mod += cols + mod;
   125 
   126     y = rows / 2;
   127     while (y--) {
   128         x = cols_2;
   129         while (x--) {
   130             register int L;
   131 
   132             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   133             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   134                 + colortab[*cb + 2 * 256];
   135             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   136             ++cr;
   137             ++cb;
   138 
   139             L = *lum++;
   140             *row1++ = (unsigned short) (rgb_2_pix[L + cr_r] |
   141                                         rgb_2_pix[L + crb_g] |
   142                                         rgb_2_pix[L + cb_b]);
   143 
   144             L = *lum++;
   145             *row1++ = (unsigned short) (rgb_2_pix[L + cr_r] |
   146                                         rgb_2_pix[L + crb_g] |
   147                                         rgb_2_pix[L + cb_b]);
   148 
   149 
   150             /* Now, do second row.  */
   151 
   152             L = *lum2++;
   153             *row2++ = (unsigned short) (rgb_2_pix[L + cr_r] |
   154                                         rgb_2_pix[L + crb_g] |
   155                                         rgb_2_pix[L + cb_b]);
   156 
   157             L = *lum2++;
   158             *row2++ = (unsigned short) (rgb_2_pix[L + cr_r] |
   159                                         rgb_2_pix[L + crb_g] |
   160                                         rgb_2_pix[L + cb_b]);
   161         }
   162 
   163         /*
   164          * These values are at the start of the next line, (due
   165          * to the ++'s above),but they need to be at the start
   166          * of the line after that.
   167          */
   168         lum += cols;
   169         lum2 += cols;
   170         row1 += mod;
   171         row2 += mod;
   172     }
   173 }
   174 
   175 static void
   176 Color24DitherYV12Mod1X(int *colortab, Uint32 * rgb_2_pix,
   177                        unsigned char *lum, unsigned char *cr,
   178                        unsigned char *cb, unsigned char *out,
   179                        int rows, int cols, int mod)
   180 {
   181     unsigned int value;
   182     unsigned char *row1;
   183     unsigned char *row2;
   184     unsigned char *lum2;
   185     int x, y;
   186     int cr_r;
   187     int crb_g;
   188     int cb_b;
   189     int cols_2 = cols / 2;
   190 
   191     row1 = out;
   192     row2 = row1 + cols * 3 + mod * 3;
   193     lum2 = lum + cols;
   194 
   195     mod += cols + mod;
   196     mod *= 3;
   197 
   198     y = rows / 2;
   199     while (y--) {
   200         x = cols_2;
   201         while (x--) {
   202             register int L;
   203 
   204             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   205             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   206                 + colortab[*cb + 2 * 256];
   207             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   208             ++cr;
   209             ++cb;
   210 
   211             L = *lum++;
   212             value = (rgb_2_pix[L + cr_r] |
   213                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   214             *row1++ = (value) & 0xFF;
   215             *row1++ = (value >> 8) & 0xFF;
   216             *row1++ = (value >> 16) & 0xFF;
   217 
   218             L = *lum++;
   219             value = (rgb_2_pix[L + cr_r] |
   220                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   221             *row1++ = (value) & 0xFF;
   222             *row1++ = (value >> 8) & 0xFF;
   223             *row1++ = (value >> 16) & 0xFF;
   224 
   225 
   226             /* Now, do second row.  */
   227 
   228             L = *lum2++;
   229             value = (rgb_2_pix[L + cr_r] |
   230                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   231             *row2++ = (value) & 0xFF;
   232             *row2++ = (value >> 8) & 0xFF;
   233             *row2++ = (value >> 16) & 0xFF;
   234 
   235             L = *lum2++;
   236             value = (rgb_2_pix[L + cr_r] |
   237                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   238             *row2++ = (value) & 0xFF;
   239             *row2++ = (value >> 8) & 0xFF;
   240             *row2++ = (value >> 16) & 0xFF;
   241         }
   242 
   243         /*
   244          * These values are at the start of the next line, (due
   245          * to the ++'s above),but they need to be at the start
   246          * of the line after that.
   247          */
   248         lum += cols;
   249         lum2 += cols;
   250         row1 += mod;
   251         row2 += mod;
   252     }
   253 }
   254 
   255 static void
   256 Color32DitherYV12Mod1X(int *colortab, Uint32 * rgb_2_pix,
   257                        unsigned char *lum, unsigned char *cr,
   258                        unsigned char *cb, unsigned char *out,
   259                        int rows, int cols, int mod)
   260 {
   261     unsigned int *row1;
   262     unsigned int *row2;
   263     unsigned char *lum2;
   264     int x, y;
   265     int cr_r;
   266     int crb_g;
   267     int cb_b;
   268     int cols_2 = (cols + 1) / 2;
   269     /* not even dimensions */
   270     int skip_last_col = 0;
   271     int skip_last_row = 0;
   272 
   273     if ( (cols & 0x1) ) {
   274         skip_last_col = 1;
   275     }
   276 
   277     if ( (rows & 0x1) ) {
   278         skip_last_row = 1;
   279     }
   280 
   281     row1 = (unsigned int *) out;
   282     row2 = row1 + cols + mod;
   283     lum2 = lum + cols;
   284 
   285     mod += cols + mod;
   286 
   287     y = (rows + 1) / 2;
   288     while (y--) {
   289         x = cols_2;
   290         while (x--) {
   291             register int L;
   292 
   293             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   294             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   295                 + colortab[*cb + 2 * 256];
   296             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   297             ++cr;
   298             ++cb;
   299 
   300             L = *lum++;
   301             *row1++ = (rgb_2_pix[L + cr_r] |
   302                        rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   303 
   304             if (!(x == 0 && skip_last_col)) {
   305             L = *lum++;
   306             *row1++ = (rgb_2_pix[L + cr_r] |
   307                        rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   308             } /* skip col */
   309 
   310 
   311             if (!(y == 0 && skip_last_row)) {
   312 
   313             /* Now, do second row.  */
   314 
   315             L = *lum2++;
   316             *row2++ = (rgb_2_pix[L + cr_r] |
   317                        rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   318 
   319             if (!(x == 1 && skip_last_col)) {
   320             L = *lum2++;
   321             *row2++ = (rgb_2_pix[L + cr_r] |
   322                        rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   323             } /* skip col */
   324             } /* skip row */
   325         }
   326 
   327         /*
   328          * These values are at the start of the next line, (due
   329          * to the ++'s above),but they need to be at the start
   330          * of the line after that.
   331          */
   332         lum += cols;
   333         lum2 += cols;
   334         row1 += mod;
   335         row2 += mod;
   336     }
   337 }
   338 
   339 /*
   340  * In this function I make use of a nasty trick. The tables have the lower
   341  * 16 bits replicated in the upper 16. This means I can write ints and get
   342  * the horisontal doubling for free (almost).
   343  */
   344 static void
   345 Color16DitherYV12Mod2X(int *colortab, Uint32 * rgb_2_pix,
   346                        unsigned char *lum, unsigned char *cr,
   347                        unsigned char *cb, unsigned char *out,
   348                        int rows, int cols, int mod)
   349 {
   350     unsigned int *row1 = (unsigned int *) out;
   351     const int next_row = cols + (mod / 2);
   352     unsigned int *row2 = row1 + 2 * next_row;
   353     unsigned char *lum2;
   354     int x, y;
   355     int cr_r;
   356     int crb_g;
   357     int cb_b;
   358     int cols_2 = cols / 2;
   359 
   360     lum2 = lum + cols;
   361 
   362     mod = (next_row * 3) + (mod / 2);
   363 
   364     y = rows / 2;
   365     while (y--) {
   366         x = cols_2;
   367         while (x--) {
   368             register int L;
   369 
   370             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   371             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   372                 + colortab[*cb + 2 * 256];
   373             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   374             ++cr;
   375             ++cb;
   376 
   377             L = *lum++;
   378             row1[0] = row1[next_row] = (rgb_2_pix[L + cr_r] |
   379                                         rgb_2_pix[L + crb_g] |
   380                                         rgb_2_pix[L + cb_b]);
   381             row1++;
   382 
   383             L = *lum++;
   384             row1[0] = row1[next_row] = (rgb_2_pix[L + cr_r] |
   385                                         rgb_2_pix[L + crb_g] |
   386                                         rgb_2_pix[L + cb_b]);
   387             row1++;
   388 
   389 
   390             /* Now, do second row. */
   391 
   392             L = *lum2++;
   393             row2[0] = row2[next_row] = (rgb_2_pix[L + cr_r] |
   394                                         rgb_2_pix[L + crb_g] |
   395                                         rgb_2_pix[L + cb_b]);
   396             row2++;
   397 
   398             L = *lum2++;
   399             row2[0] = row2[next_row] = (rgb_2_pix[L + cr_r] |
   400                                         rgb_2_pix[L + crb_g] |
   401                                         rgb_2_pix[L + cb_b]);
   402             row2++;
   403         }
   404 
   405         /*
   406          * These values are at the start of the next line, (due
   407          * to the ++'s above),but they need to be at the start
   408          * of the line after that.
   409          */
   410         lum += cols;
   411         lum2 += cols;
   412         row1 += mod;
   413         row2 += mod;
   414     }
   415 }
   416 
   417 static void
   418 Color24DitherYV12Mod2X(int *colortab, Uint32 * rgb_2_pix,
   419                        unsigned char *lum, unsigned char *cr,
   420                        unsigned char *cb, unsigned char *out,
   421                        int rows, int cols, int mod)
   422 {
   423     unsigned int value;
   424     unsigned char *row1 = out;
   425     const int next_row = (cols * 2 + mod) * 3;
   426     unsigned char *row2 = row1 + 2 * next_row;
   427     unsigned char *lum2;
   428     int x, y;
   429     int cr_r;
   430     int crb_g;
   431     int cb_b;
   432     int cols_2 = cols / 2;
   433 
   434     lum2 = lum + cols;
   435 
   436     mod = next_row * 3 + mod * 3;
   437 
   438     y = rows / 2;
   439     while (y--) {
   440         x = cols_2;
   441         while (x--) {
   442             register int L;
   443 
   444             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   445             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   446                 + colortab[*cb + 2 * 256];
   447             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   448             ++cr;
   449             ++cb;
   450 
   451             L = *lum++;
   452             value = (rgb_2_pix[L + cr_r] |
   453                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   454             row1[0 + 0] = row1[3 + 0] = row1[next_row + 0] =
   455                 row1[next_row + 3 + 0] = (value) & 0xFF;
   456             row1[0 + 1] = row1[3 + 1] = row1[next_row + 1] =
   457                 row1[next_row + 3 + 1] = (value >> 8) & 0xFF;
   458             row1[0 + 2] = row1[3 + 2] = row1[next_row + 2] =
   459                 row1[next_row + 3 + 2] = (value >> 16) & 0xFF;
   460             row1 += 2 * 3;
   461 
   462             L = *lum++;
   463             value = (rgb_2_pix[L + cr_r] |
   464                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   465             row1[0 + 0] = row1[3 + 0] = row1[next_row + 0] =
   466                 row1[next_row + 3 + 0] = (value) & 0xFF;
   467             row1[0 + 1] = row1[3 + 1] = row1[next_row + 1] =
   468                 row1[next_row + 3 + 1] = (value >> 8) & 0xFF;
   469             row1[0 + 2] = row1[3 + 2] = row1[next_row + 2] =
   470                 row1[next_row + 3 + 2] = (value >> 16) & 0xFF;
   471             row1 += 2 * 3;
   472 
   473 
   474             /* Now, do second row. */
   475 
   476             L = *lum2++;
   477             value = (rgb_2_pix[L + cr_r] |
   478                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   479             row2[0 + 0] = row2[3 + 0] = row2[next_row + 0] =
   480                 row2[next_row + 3 + 0] = (value) & 0xFF;
   481             row2[0 + 1] = row2[3 + 1] = row2[next_row + 1] =
   482                 row2[next_row + 3 + 1] = (value >> 8) & 0xFF;
   483             row2[0 + 2] = row2[3 + 2] = row2[next_row + 2] =
   484                 row2[next_row + 3 + 2] = (value >> 16) & 0xFF;
   485             row2 += 2 * 3;
   486 
   487             L = *lum2++;
   488             value = (rgb_2_pix[L + cr_r] |
   489                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   490             row2[0 + 0] = row2[3 + 0] = row2[next_row + 0] =
   491                 row2[next_row + 3 + 0] = (value) & 0xFF;
   492             row2[0 + 1] = row2[3 + 1] = row2[next_row + 1] =
   493                 row2[next_row + 3 + 1] = (value >> 8) & 0xFF;
   494             row2[0 + 2] = row2[3 + 2] = row2[next_row + 2] =
   495                 row2[next_row + 3 + 2] = (value >> 16) & 0xFF;
   496             row2 += 2 * 3;
   497         }
   498 
   499         /*
   500          * These values are at the start of the next line, (due
   501          * to the ++'s above),but they need to be at the start
   502          * of the line after that.
   503          */
   504         lum += cols;
   505         lum2 += cols;
   506         row1 += mod;
   507         row2 += mod;
   508     }
   509 }
   510 
   511 static void
   512 Color32DitherYV12Mod2X(int *colortab, Uint32 * rgb_2_pix,
   513                        unsigned char *lum, unsigned char *cr,
   514                        unsigned char *cb, unsigned char *out,
   515                        int rows, int cols, int mod)
   516 {
   517     unsigned int *row1 = (unsigned int *) out;
   518     const int next_row = cols * 2 + mod;
   519     unsigned int *row2 = row1 + 2 * next_row;
   520     unsigned char *lum2;
   521     int x, y;
   522     int cr_r;
   523     int crb_g;
   524     int cb_b;
   525     int cols_2 = cols / 2;
   526 
   527     lum2 = lum + cols;
   528 
   529     mod = (next_row * 3) + mod;
   530 
   531     y = rows / 2;
   532     while (y--) {
   533         x = cols_2;
   534         while (x--) {
   535             register int L;
   536 
   537             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   538             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   539                 + colortab[*cb + 2 * 256];
   540             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   541             ++cr;
   542             ++cb;
   543 
   544             L = *lum++;
   545             row1[0] = row1[1] = row1[next_row] = row1[next_row + 1] =
   546                 (rgb_2_pix[L + cr_r] |
   547                  rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   548             row1 += 2;
   549 
   550             L = *lum++;
   551             row1[0] = row1[1] = row1[next_row] = row1[next_row + 1] =
   552                 (rgb_2_pix[L + cr_r] |
   553                  rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   554             row1 += 2;
   555 
   556 
   557             /* Now, do second row. */
   558 
   559             L = *lum2++;
   560             row2[0] = row2[1] = row2[next_row] = row2[next_row + 1] =
   561                 (rgb_2_pix[L + cr_r] |
   562                  rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   563             row2 += 2;
   564 
   565             L = *lum2++;
   566             row2[0] = row2[1] = row2[next_row] = row2[next_row + 1] =
   567                 (rgb_2_pix[L + cr_r] |
   568                  rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   569             row2 += 2;
   570         }
   571 
   572         /*
   573          * These values are at the start of the next line, (due
   574          * to the ++'s above),but they need to be at the start
   575          * of the line after that.
   576          */
   577         lum += cols;
   578         lum2 += cols;
   579         row1 += mod;
   580         row2 += mod;
   581     }
   582 }
   583 
   584 static void
   585 Color16DitherYUY2Mod1X(int *colortab, Uint32 * rgb_2_pix,
   586                        unsigned char *lum, unsigned char *cr,
   587                        unsigned char *cb, unsigned char *out,
   588                        int rows, int cols, int mod)
   589 {
   590     unsigned short *row;
   591     int x, y;
   592     int cr_r;
   593     int crb_g;
   594     int cb_b;
   595     int cols_2 = cols / 2;
   596 
   597     row = (unsigned short *) out;
   598 
   599     y = rows;
   600     while (y--) {
   601         x = cols_2;
   602         while (x--) {
   603             register int L;
   604 
   605             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   606             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   607                 + colortab[*cb + 2 * 256];
   608             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   609             cr += 4;
   610             cb += 4;
   611 
   612             L = *lum;
   613             lum += 2;
   614             *row++ = (unsigned short) (rgb_2_pix[L + cr_r] |
   615                                        rgb_2_pix[L + crb_g] |
   616                                        rgb_2_pix[L + cb_b]);
   617 
   618             L = *lum;
   619             lum += 2;
   620             *row++ = (unsigned short) (rgb_2_pix[L + cr_r] |
   621                                        rgb_2_pix[L + crb_g] |
   622                                        rgb_2_pix[L + cb_b]);
   623 
   624         }
   625 
   626         row += mod;
   627     }
   628 }
   629 
   630 static void
   631 Color24DitherYUY2Mod1X(int *colortab, Uint32 * rgb_2_pix,
   632                        unsigned char *lum, unsigned char *cr,
   633                        unsigned char *cb, unsigned char *out,
   634                        int rows, int cols, int mod)
   635 {
   636     unsigned int value;
   637     unsigned char *row;
   638     int x, y;
   639     int cr_r;
   640     int crb_g;
   641     int cb_b;
   642     int cols_2 = cols / 2;
   643 
   644     row = (unsigned char *) out;
   645     mod *= 3;
   646     y = rows;
   647     while (y--) {
   648         x = cols_2;
   649         while (x--) {
   650             register int L;
   651 
   652             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   653             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   654                 + colortab[*cb + 2 * 256];
   655             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   656             cr += 4;
   657             cb += 4;
   658 
   659             L = *lum;
   660             lum += 2;
   661             value = (rgb_2_pix[L + cr_r] |
   662                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   663             *row++ = (value) & 0xFF;
   664             *row++ = (value >> 8) & 0xFF;
   665             *row++ = (value >> 16) & 0xFF;
   666 
   667             L = *lum;
   668             lum += 2;
   669             value = (rgb_2_pix[L + cr_r] |
   670                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   671             *row++ = (value) & 0xFF;
   672             *row++ = (value >> 8) & 0xFF;
   673             *row++ = (value >> 16) & 0xFF;
   674 
   675         }
   676         row += mod;
   677     }
   678 }
   679 
   680 static void
   681 Color32DitherYUY2Mod1X(int *colortab, Uint32 * rgb_2_pix,
   682                        unsigned char *lum, unsigned char *cr,
   683                        unsigned char *cb, unsigned char *out,
   684                        int rows, int cols, int mod)
   685 {
   686     unsigned int *row;
   687     int x, y;
   688     int cr_r;
   689     int crb_g;
   690     int cb_b;
   691     int cols_2 = (cols + 1) / 2;
   692     /* not even dimensions */
   693     int skip_last_col = 0;
   694     if ( (cols & 0x1) ) {
   695         skip_last_col = 1;
   696     }
   697 
   698     row = (unsigned int *) out;
   699     y = rows;
   700     while (y--) {
   701         x = cols_2;
   702         while (x--) {
   703             register int L;
   704 
   705             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   706             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   707                 + colortab[*cb + 2 * 256];
   708             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   709             cr += 4;
   710             cb += 4;
   711 
   712             L = *lum;
   713             lum += 2;
   714             *row++ = (rgb_2_pix[L + cr_r] |
   715                       rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   716 
   717             L = *lum;
   718             lum += 2;
   719 
   720             if (!(x == 0 && skip_last_col)) {
   721             *row++ = (rgb_2_pix[L + cr_r] |
   722                       rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   723             } /* skip col */
   724 
   725         }
   726         row += mod;
   727     }
   728 }
   729 
   730 /*
   731  * In this function I make use of a nasty trick. The tables have the lower
   732  * 16 bits replicated in the upper 16. This means I can write ints and get
   733  * the horisontal doubling for free (almost).
   734  */
   735 static void
   736 Color16DitherYUY2Mod2X(int *colortab, Uint32 * rgb_2_pix,
   737                        unsigned char *lum, unsigned char *cr,
   738                        unsigned char *cb, unsigned char *out,
   739                        int rows, int cols, int mod)
   740 {
   741     unsigned int *row = (unsigned int *) out;
   742     const int next_row = cols + (mod / 2);
   743     int x, y;
   744     int cr_r;
   745     int crb_g;
   746     int cb_b;
   747     int cols_2 = cols / 2;
   748 
   749     y = rows;
   750     while (y--) {
   751         x = cols_2;
   752         while (x--) {
   753             register int L;
   754 
   755             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   756             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   757                 + colortab[*cb + 2 * 256];
   758             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   759             cr += 4;
   760             cb += 4;
   761 
   762             L = *lum;
   763             lum += 2;
   764             row[0] = row[next_row] = (rgb_2_pix[L + cr_r] |
   765                                       rgb_2_pix[L + crb_g] |
   766                                       rgb_2_pix[L + cb_b]);
   767             row++;
   768 
   769             L = *lum;
   770             lum += 2;
   771             row[0] = row[next_row] = (rgb_2_pix[L + cr_r] |
   772                                       rgb_2_pix[L + crb_g] |
   773                                       rgb_2_pix[L + cb_b]);
   774             row++;
   775 
   776         }
   777         row += next_row;
   778     }
   779 }
   780 
   781 static void
   782 Color24DitherYUY2Mod2X(int *colortab, Uint32 * rgb_2_pix,
   783                        unsigned char *lum, unsigned char *cr,
   784                        unsigned char *cb, unsigned char *out,
   785                        int rows, int cols, int mod)
   786 {
   787     unsigned int value;
   788     unsigned char *row = out;
   789     const int next_row = (cols * 2 + mod) * 3;
   790     int x, y;
   791     int cr_r;
   792     int crb_g;
   793     int cb_b;
   794     int cols_2 = cols / 2;
   795     y = rows;
   796     while (y--) {
   797         x = cols_2;
   798         while (x--) {
   799             register int L;
   800 
   801             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   802             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   803                 + colortab[*cb + 2 * 256];
   804             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   805             cr += 4;
   806             cb += 4;
   807 
   808             L = *lum;
   809             lum += 2;
   810             value = (rgb_2_pix[L + cr_r] |
   811                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   812             row[0 + 0] = row[3 + 0] = row[next_row + 0] =
   813                 row[next_row + 3 + 0] = (value) & 0xFF;
   814             row[0 + 1] = row[3 + 1] = row[next_row + 1] =
   815                 row[next_row + 3 + 1] = (value >> 8) & 0xFF;
   816             row[0 + 2] = row[3 + 2] = row[next_row + 2] =
   817                 row[next_row + 3 + 2] = (value >> 16) & 0xFF;
   818             row += 2 * 3;
   819 
   820             L = *lum;
   821             lum += 2;
   822             value = (rgb_2_pix[L + cr_r] |
   823                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   824             row[0 + 0] = row[3 + 0] = row[next_row + 0] =
   825                 row[next_row + 3 + 0] = (value) & 0xFF;
   826             row[0 + 1] = row[3 + 1] = row[next_row + 1] =
   827                 row[next_row + 3 + 1] = (value >> 8) & 0xFF;
   828             row[0 + 2] = row[3 + 2] = row[next_row + 2] =
   829                 row[next_row + 3 + 2] = (value >> 16) & 0xFF;
   830             row += 2 * 3;
   831 
   832         }
   833         row += next_row;
   834     }
   835 }
   836 
   837 static void
   838 Color32DitherYUY2Mod2X(int *colortab, Uint32 * rgb_2_pix,
   839                        unsigned char *lum, unsigned char *cr,
   840                        unsigned char *cb, unsigned char *out,
   841                        int rows, int cols, int mod)
   842 {
   843     unsigned int *row = (unsigned int *) out;
   844     const int next_row = cols * 2 + mod;
   845     int x, y;
   846     int cr_r;
   847     int crb_g;
   848     int cb_b;
   849     int cols_2 = cols / 2;
   850     mod += mod;
   851     y = rows;
   852     while (y--) {
   853         x = cols_2;
   854         while (x--) {
   855             register int L;
   856 
   857             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   858             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   859                 + colortab[*cb + 2 * 256];
   860             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   861             cr += 4;
   862             cb += 4;
   863 
   864             L = *lum;
   865             lum += 2;
   866             row[0] = row[1] = row[next_row] = row[next_row + 1] =
   867                 (rgb_2_pix[L + cr_r] |
   868                  rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   869             row += 2;
   870 
   871             L = *lum;
   872             lum += 2;
   873             row[0] = row[1] = row[next_row] = row[next_row + 1] =
   874                 (rgb_2_pix[L + cr_r] |
   875                  rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   876             row += 2;
   877 
   878 
   879         }
   880 
   881         row += next_row;
   882     }
   883 }
   884 
   885 /*
   886  * How many 1 bits are there in the Uint32.
   887  * Low performance, do not call often.
   888  */
   889 static int
   890 number_of_bits_set(Uint32 a)
   891 {
   892     if (!a)
   893         return 0;
   894     if (a & 1)
   895         return 1 + number_of_bits_set(a >> 1);
   896     return (number_of_bits_set(a >> 1));
   897 }
   898 
   899 /*
   900  * How many 0 bits are there at least significant end of Uint32.
   901  * Low performance, do not call often.
   902  */
   903 static int
   904 free_bits_at_bottom_nonzero(Uint32 a)
   905 {
   906     SDL_assert(a != 0);
   907     return (((Sint32) a) & 1l) ? 0 : 1 + free_bits_at_bottom_nonzero(a >> 1);
   908 }
   909 
   910 static SDL_INLINE int
   911 free_bits_at_bottom(Uint32 a)
   912 {
   913     return a ? free_bits_at_bottom_nonzero(a) : 32;
   914 }
   915 
   916 static int
   917 SDL_SW_SetupYUVDisplay(SDL_SW_YUVTexture * swdata, Uint32 target_format)
   918 {
   919     Uint32 *r_2_pix_alloc;
   920     Uint32 *g_2_pix_alloc;
   921     Uint32 *b_2_pix_alloc;
   922     int i;
   923     int bpp;
   924     Uint32 Rmask, Gmask, Bmask, Amask;
   925     int freebits;
   926 
   927     if (!SDL_PixelFormatEnumToMasks
   928         (target_format, &bpp, &Rmask, &Gmask, &Bmask, &Amask) || bpp < 15) {
   929         return SDL_SetError("Unsupported YUV destination format");
   930     }
   931 
   932     swdata->target_format = target_format;
   933     r_2_pix_alloc = &swdata->rgb_2_pix[0 * 768];
   934     g_2_pix_alloc = &swdata->rgb_2_pix[1 * 768];
   935     b_2_pix_alloc = &swdata->rgb_2_pix[2 * 768];
   936 
   937     /*
   938      * Set up entries 0-255 in rgb-to-pixel value tables.
   939      */
   940     for (i = 0; i < 256; ++i) {
   941         r_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(Rmask));
   942         freebits = free_bits_at_bottom(Rmask);
   943         if (freebits < 32) {
   944             r_2_pix_alloc[i + 256] <<= freebits;
   945         }
   946         r_2_pix_alloc[i + 256] |= Amask;
   947 
   948         g_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(Gmask));
   949         freebits = free_bits_at_bottom(Gmask);
   950         if (freebits < 32) {
   951             g_2_pix_alloc[i + 256] <<= freebits;
   952         }
   953         g_2_pix_alloc[i + 256] |= Amask;
   954 
   955         b_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(Bmask));
   956         freebits = free_bits_at_bottom(Bmask);
   957         if (freebits < 32) {
   958             b_2_pix_alloc[i + 256] <<= freebits;
   959         }
   960         b_2_pix_alloc[i + 256] |= Amask;
   961     }
   962 
   963     /*
   964      * If we have 16-bit output depth, then we double the value
   965      * in the top word. This means that we can write out both
   966      * pixels in the pixel doubling mode with one op. It is
   967      * harmless in the normal case as storing a 32-bit value
   968      * through a short pointer will lose the top bits anyway.
   969      */
   970     if (SDL_BYTESPERPIXEL(target_format) == 2) {
   971         for (i = 0; i < 256; ++i) {
   972             r_2_pix_alloc[i + 256] |= (r_2_pix_alloc[i + 256]) << 16;
   973             g_2_pix_alloc[i + 256] |= (g_2_pix_alloc[i + 256]) << 16;
   974             b_2_pix_alloc[i + 256] |= (b_2_pix_alloc[i + 256]) << 16;
   975         }
   976     }
   977 
   978     /*
   979      * Spread out the values we have to the rest of the array so that
   980      * we do not need to check for overflow.
   981      */
   982     for (i = 0; i < 256; ++i) {
   983         r_2_pix_alloc[i] = r_2_pix_alloc[256];
   984         r_2_pix_alloc[i + 512] = r_2_pix_alloc[511];
   985         g_2_pix_alloc[i] = g_2_pix_alloc[256];
   986         g_2_pix_alloc[i + 512] = g_2_pix_alloc[511];
   987         b_2_pix_alloc[i] = b_2_pix_alloc[256];
   988         b_2_pix_alloc[i + 512] = b_2_pix_alloc[511];
   989     }
   990 
   991     /* You have chosen wisely... */
   992     switch (swdata->format) {
   993     case SDL_PIXELFORMAT_YV12:
   994     case SDL_PIXELFORMAT_IYUV:
   995         if (SDL_BYTESPERPIXEL(target_format) == 2) {
   996 #ifdef USE_MMX_ASSEMBLY
   997             /* inline assembly functions */
   998             if (SDL_HasMMX() && (Rmask == 0xF800) &&
   999                 (Gmask == 0x07E0) && (Bmask == 0x001F)
  1000                 && (swdata->w & 15) == 0) {
  1001 /* printf("Using MMX 16-bit 565 dither\n"); */
  1002                 swdata->Display1X = Color565DitherYV12MMX1X;
  1003             } else {
  1004 /* printf("Using C 16-bit dither\n"); */
  1005                 swdata->Display1X = Color16DitherYV12Mod1X;
  1006             }
  1007 #else
  1008             swdata->Display1X = Color16DitherYV12Mod1X;
  1009 #endif
  1010             swdata->Display2X = Color16DitherYV12Mod2X;
  1011         }
  1012         if (SDL_BYTESPERPIXEL(target_format) == 3) {
  1013             swdata->Display1X = Color24DitherYV12Mod1X;
  1014             swdata->Display2X = Color24DitherYV12Mod2X;
  1015         }
  1016         if (SDL_BYTESPERPIXEL(target_format) == 4) {
  1017 #ifdef USE_MMX_ASSEMBLY
  1018             /* inline assembly functions */
  1019             if (SDL_HasMMX() && (Rmask == 0x00FF0000) &&
  1020                 (Gmask == 0x0000FF00) &&
  1021                 (Bmask == 0x000000FF) && (swdata->w & 15) == 0) {
  1022 /* printf("Using MMX 32-bit dither\n"); */
  1023                 swdata->Display1X = ColorRGBDitherYV12MMX1X;
  1024             } else {
  1025 /* printf("Using C 32-bit dither\n"); */
  1026                 swdata->Display1X = Color32DitherYV12Mod1X;
  1027             }
  1028 #else
  1029             swdata->Display1X = Color32DitherYV12Mod1X;
  1030 #endif
  1031             swdata->Display2X = Color32DitherYV12Mod2X;
  1032         }
  1033         break;
  1034     case SDL_PIXELFORMAT_YUY2:
  1035     case SDL_PIXELFORMAT_UYVY:
  1036     case SDL_PIXELFORMAT_YVYU:
  1037         if (SDL_BYTESPERPIXEL(target_format) == 2) {
  1038             swdata->Display1X = Color16DitherYUY2Mod1X;
  1039             swdata->Display2X = Color16DitherYUY2Mod2X;
  1040         }
  1041         if (SDL_BYTESPERPIXEL(target_format) == 3) {
  1042             swdata->Display1X = Color24DitherYUY2Mod1X;
  1043             swdata->Display2X = Color24DitherYUY2Mod2X;
  1044         }
  1045         if (SDL_BYTESPERPIXEL(target_format) == 4) {
  1046             swdata->Display1X = Color32DitherYUY2Mod1X;
  1047             swdata->Display2X = Color32DitherYUY2Mod2X;
  1048         }
  1049         break;
  1050     case SDL_PIXELFORMAT_NV21:
  1051     case SDL_PIXELFORMAT_NV12:
  1052         /* no Display{1,2}X function */
  1053         swdata->Display1X = NULL;
  1054         swdata->Display2X = NULL;
  1055         break;
  1056 
  1057     default:
  1058         /* We should never get here (caught above) */
  1059         break;
  1060     }
  1061 
  1062     SDL_FreeSurface(swdata->display);
  1063     swdata->display = NULL;
  1064     return 0;
  1065 }
  1066 
  1067 SDL_SW_YUVTexture *
  1068 SDL_SW_CreateYUVTexture(Uint32 format, int w, int h)
  1069 {
  1070     SDL_SW_YUVTexture *swdata;
  1071     int *Cr_r_tab;
  1072     int *Cr_g_tab;
  1073     int *Cb_g_tab;
  1074     int *Cb_b_tab;
  1075     int i;
  1076     int CR, CB;
  1077 
  1078     switch (format) {
  1079     case SDL_PIXELFORMAT_YV12:
  1080     case SDL_PIXELFORMAT_IYUV:
  1081     case SDL_PIXELFORMAT_YUY2:
  1082     case SDL_PIXELFORMAT_UYVY:
  1083     case SDL_PIXELFORMAT_YVYU:
  1084     case SDL_PIXELFORMAT_NV12:
  1085     case SDL_PIXELFORMAT_NV21:
  1086         break;
  1087     default:
  1088         SDL_SetError("Unsupported YUV format");
  1089         return NULL;
  1090     }
  1091 
  1092     swdata = (SDL_SW_YUVTexture *) SDL_calloc(1, sizeof(*swdata));
  1093     if (!swdata) {
  1094         SDL_OutOfMemory();
  1095         return NULL;
  1096     }
  1097 
  1098     swdata->format = format;
  1099     swdata->target_format = SDL_PIXELFORMAT_UNKNOWN;
  1100     swdata->w = w;
  1101     swdata->h = h;
  1102     {
  1103         const int sz_plane         = w * h;
  1104         const int sz_plane_chroma  = ((w + 1) / 2) * ((h + 1) / 2);
  1105         const int sz_plane_packed  = ((w + 1) / 2) * h;
  1106         int dst_size = 0;     
  1107         switch(format) 
  1108         {
  1109             case SDL_PIXELFORMAT_YV12: /**< Planar mode: Y + V + U  (3 planes) */
  1110             case SDL_PIXELFORMAT_IYUV: /**< Planar mode: Y + U + V  (3 planes) */
  1111                 dst_size = sz_plane + sz_plane_chroma + sz_plane_chroma;
  1112                 break;
  1113 
  1114             case SDL_PIXELFORMAT_YUY2: /**< Packed mode: Y0+U0+Y1+V0 (1 plane) */
  1115             case SDL_PIXELFORMAT_UYVY: /**< Packed mode: U0+Y0+V0+Y1 (1 plane) */
  1116             case SDL_PIXELFORMAT_YVYU: /**< Packed mode: Y0+V0+Y1+U0 (1 plane) */
  1117                 dst_size = 4 * sz_plane_packed;
  1118                 break;
  1119 
  1120             case SDL_PIXELFORMAT_NV12: /**< Planar mode: Y + U/V interleaved  (2 planes) */
  1121             case SDL_PIXELFORMAT_NV21: /**< Planar mode: Y + V/U interleaved  (2 planes) */
  1122                 dst_size = sz_plane + sz_plane_chroma + sz_plane_chroma;
  1123                 break;
  1124 
  1125             default:
  1126                 SDL_assert(0 && "We should never get here (caught above)");
  1127                 break;
  1128         }
  1129         swdata->pixels = (Uint8 *) SDL_malloc(dst_size);
  1130     }
  1131     swdata->colortab = (int *) SDL_malloc(4 * 256 * sizeof(int));
  1132     swdata->rgb_2_pix = (Uint32 *) SDL_malloc(3 * 768 * sizeof(Uint32));
  1133     if (!swdata->pixels || !swdata->colortab || !swdata->rgb_2_pix) {
  1134         SDL_SW_DestroyYUVTexture(swdata);
  1135         SDL_OutOfMemory();
  1136         return NULL;
  1137     }
  1138 
  1139     /* Generate the tables for the display surface */
  1140     Cr_r_tab = &swdata->colortab[0 * 256];
  1141     Cr_g_tab = &swdata->colortab[1 * 256];
  1142     Cb_g_tab = &swdata->colortab[2 * 256];
  1143     Cb_b_tab = &swdata->colortab[3 * 256];
  1144     for (i = 0; i < 256; i++) {
  1145         /* Gamma correction (luminescence table) and chroma correction
  1146            would be done here.  See the Berkeley mpeg_play sources.
  1147          */
  1148         CB = CR = (i - 128);
  1149         Cr_r_tab[i] = (int) ((0.419 / 0.299) * CR);
  1150         Cr_g_tab[i] = (int) (-(0.299 / 0.419) * CR);
  1151         Cb_g_tab[i] = (int) (-(0.114 / 0.331) * CB);
  1152         Cb_b_tab[i] = (int) ((0.587 / 0.331) * CB);
  1153     }
  1154 
  1155     /* Find the pitch and offset values for the overlay */
  1156     switch (format) {
  1157     case SDL_PIXELFORMAT_YV12:
  1158     case SDL_PIXELFORMAT_IYUV:
  1159         swdata->pitches[0] = w;
  1160         swdata->pitches[1] = (swdata->pitches[0] + 1) / 2;
  1161         swdata->pitches[2] = (swdata->pitches[0] + 1) / 2;
  1162         swdata->planes[0] = swdata->pixels;
  1163         swdata->planes[1] = swdata->planes[0] + swdata->pitches[0] * h;
  1164         swdata->planes[2] = swdata->planes[1] + swdata->pitches[1] * ((h + 1) / 2);
  1165         break;
  1166     case SDL_PIXELFORMAT_YUY2:
  1167     case SDL_PIXELFORMAT_UYVY:
  1168     case SDL_PIXELFORMAT_YVYU:
  1169         swdata->pitches[0] = ((w + 1) / 2) * 4;
  1170         swdata->planes[0] = swdata->pixels;
  1171         break;
  1172 
  1173     case SDL_PIXELFORMAT_NV12:
  1174     case SDL_PIXELFORMAT_NV21:
  1175         swdata->pitches[0] = w;
  1176         swdata->pitches[1] = 2 * ((swdata->pitches[0] + 1) / 2);
  1177         swdata->planes[0] = swdata->pixels;
  1178         swdata->planes[1] = swdata->planes[0] + swdata->pitches[0] * h;
  1179         break;
  1180 
  1181     default:
  1182         SDL_assert(0 && "We should never get here (caught above)");
  1183         break;
  1184     }
  1185 
  1186     /* We're all done.. */
  1187     return (swdata);
  1188 }
  1189 
  1190 int
  1191 SDL_SW_QueryYUVTexturePixels(SDL_SW_YUVTexture * swdata, void **pixels,
  1192                              int *pitch)
  1193 {
  1194     *pixels = swdata->planes[0];
  1195     *pitch = swdata->pitches[0];
  1196     return 0;
  1197 }
  1198 
  1199 int
  1200 SDL_SW_UpdateYUVTexture(SDL_SW_YUVTexture * swdata, const SDL_Rect * rect,
  1201                         const void *pixels, int pitch)
  1202 {
  1203     switch (swdata->format) {
  1204     case SDL_PIXELFORMAT_YV12:
  1205     case SDL_PIXELFORMAT_IYUV:
  1206         if (rect->x == 0 && rect->y == 0 &&
  1207             rect->w == swdata->w && rect->h == swdata->h) {
  1208                 SDL_memcpy(swdata->pixels, pixels,
  1209                            (swdata->h * swdata->w) + 2* ((swdata->h + 1) /2) * ((swdata->w + 1) / 2));
  1210         } else {
  1211             Uint8 *src, *dst;
  1212             int row;
  1213             size_t length;
  1214 
  1215             /* Copy the Y plane */
  1216             src = (Uint8 *) pixels;
  1217             dst = swdata->pixels + rect->y * swdata->w + rect->x;
  1218             length = rect->w;
  1219             for (row = 0; row < rect->h; ++row) {
  1220                 SDL_memcpy(dst, src, length);
  1221                 src += pitch;
  1222                 dst += swdata->w;
  1223             }
  1224             
  1225             /* Copy the next plane */
  1226             src = (Uint8 *) pixels + rect->h * pitch;
  1227             dst = swdata->pixels + swdata->h * swdata->w;
  1228             dst += rect->y/2 * ((swdata->w + 1) / 2) + rect->x/2;
  1229             length = (rect->w + 1) / 2;
  1230             for (row = 0; row < (rect->h + 1)/2; ++row) {
  1231                 SDL_memcpy(dst, src, length);
  1232                 src += (pitch + 1)/2;
  1233                 dst += (swdata->w + 1)/2;
  1234             }
  1235 
  1236             /* Copy the next plane */
  1237             src = (Uint8 *) pixels + rect->h * pitch + ((rect->h + 1) / 2) * ((pitch + 1) / 2);
  1238             dst = swdata->pixels + swdata->h * swdata->w +
  1239                   ((swdata->h + 1)/2) * ((swdata->w+1) / 2);
  1240             dst += rect->y/2 * ((swdata->w + 1)/2) + rect->x/2;
  1241             length = (rect->w + 1) / 2;
  1242             for (row = 0; row < (rect->h + 1)/2; ++row) {
  1243                 SDL_memcpy(dst, src, length);
  1244                 src += (pitch + 1)/2;
  1245                 dst += (swdata->w + 1)/2;
  1246             }
  1247         }
  1248         break;
  1249     case SDL_PIXELFORMAT_YUY2:
  1250     case SDL_PIXELFORMAT_UYVY:
  1251     case SDL_PIXELFORMAT_YVYU:
  1252         {
  1253             Uint8 *src, *dst;
  1254             int row;
  1255             size_t length;
  1256 
  1257             src = (Uint8 *) pixels;
  1258             dst =
  1259                 swdata->planes[0] + rect->y * swdata->pitches[0] +
  1260                 rect->x * 2;
  1261             length = 4 * ((rect->w + 1) / 2);
  1262             for (row = 0; row < rect->h; ++row) {
  1263                 SDL_memcpy(dst, src, length);
  1264                 src += pitch;
  1265                 dst += swdata->pitches[0];
  1266             }
  1267         }
  1268         break;
  1269     case SDL_PIXELFORMAT_NV12:
  1270     case SDL_PIXELFORMAT_NV21:
  1271         {
  1272             if (rect->x == 0 && rect->y == 0 && rect->w == swdata->w && rect->h == swdata->h) {
  1273                 SDL_memcpy(swdata->pixels, pixels,
  1274                         (swdata->h * swdata->w) + 2* ((swdata->h + 1) /2) * ((swdata->w + 1) / 2));
  1275             } else {
  1276 
  1277                 Uint8 *src, *dst;
  1278                 int row;
  1279                 size_t length;
  1280 
  1281                 /* Copy the Y plane */
  1282                 src = (Uint8 *) pixels;
  1283                 dst = swdata->pixels + rect->y * swdata->w + rect->x;
  1284                 length = rect->w;
  1285                 for (row = 0; row < rect->h; ++row) {
  1286                     SDL_memcpy(dst, src, length);
  1287                     src += pitch;
  1288                     dst += swdata->w;
  1289                 }
  1290                 
  1291                 /* Copy the next plane */
  1292                 src = (Uint8 *) pixels + rect->h * pitch;
  1293                 dst = swdata->pixels + swdata->h * swdata->w;
  1294                 dst += 2 * ((rect->y + 1)/2) * ((swdata->w + 1) / 2) + 2 * (rect->x/2);
  1295                 length = 2 * ((rect->w + 1) / 2);
  1296                 for (row = 0; row < (rect->h + 1)/2; ++row) {
  1297                     SDL_memcpy(dst, src, length);
  1298                     src += 2 * ((pitch + 1)/2);
  1299                     dst += 2 * ((swdata->w + 1)/2);
  1300                 }
  1301             }
  1302         }
  1303         break;
  1304 
  1305     }
  1306     return 0;
  1307 }
  1308 
  1309 int
  1310 SDL_SW_UpdateYUVTexturePlanar(SDL_SW_YUVTexture * swdata, const SDL_Rect * rect,
  1311                               const Uint8 *Yplane, int Ypitch,
  1312                               const Uint8 *Uplane, int Upitch,
  1313                               const Uint8 *Vplane, int Vpitch)
  1314 {
  1315     const Uint8 *src;
  1316     Uint8 *dst;
  1317     int row;
  1318     size_t length;
  1319 
  1320     /* Copy the Y plane */
  1321     src = Yplane;
  1322     dst = swdata->pixels + rect->y * swdata->w + rect->x;
  1323     length = rect->w;
  1324     for (row = 0; row < rect->h; ++row) {
  1325         SDL_memcpy(dst, src, length);
  1326         src += Ypitch;
  1327         dst += swdata->w;
  1328     }
  1329 
  1330     /* Copy the U plane */
  1331     src = Uplane;
  1332     if (swdata->format == SDL_PIXELFORMAT_IYUV) {
  1333         dst = swdata->pixels + swdata->h * swdata->w;
  1334     } else {
  1335         dst = swdata->pixels + swdata->h * swdata->w +
  1336               ((swdata->h + 1) / 2) * ((swdata->w + 1) / 2);
  1337     }
  1338     dst += rect->y/2 * ((swdata->w + 1)/2) + rect->x/2;
  1339     length = (rect->w + 1) / 2;
  1340     for (row = 0; row < (rect->h + 1)/2; ++row) {
  1341         SDL_memcpy(dst, src, length);
  1342         src += Upitch;
  1343         dst += (swdata->w + 1)/2;
  1344     }
  1345 
  1346     /* Copy the V plane */
  1347     src = Vplane;
  1348     if (swdata->format == SDL_PIXELFORMAT_YV12) {
  1349         dst = swdata->pixels + swdata->h * swdata->w;
  1350     } else {
  1351         dst = swdata->pixels + swdata->h * swdata->w +
  1352               ((swdata->h + 1) / 2) * ((swdata->w + 1) / 2);
  1353     }
  1354     dst += rect->y/2 * ((swdata->w + 1)/2) + rect->x/2;
  1355     length = (rect->w + 1) / 2;
  1356     for (row = 0; row < (rect->h + 1)/2; ++row) {
  1357         SDL_memcpy(dst, src, length);
  1358         src += Vpitch;
  1359         dst += (swdata->w + 1)/2;
  1360     }
  1361     return 0;
  1362 }
  1363 
  1364 int
  1365 SDL_SW_LockYUVTexture(SDL_SW_YUVTexture * swdata, const SDL_Rect * rect,
  1366                       void **pixels, int *pitch)
  1367 {
  1368     switch (swdata->format) {
  1369     case SDL_PIXELFORMAT_YV12:
  1370     case SDL_PIXELFORMAT_IYUV:
  1371     case SDL_PIXELFORMAT_NV12:
  1372     case SDL_PIXELFORMAT_NV21:
  1373         if (rect
  1374             && (rect->x != 0 || rect->y != 0 || rect->w != swdata->w
  1375                 || rect->h != swdata->h)) {
  1376             return SDL_SetError
  1377                 ("YV12, IYUV, NV12, NV21 textures only support full surface locks");
  1378         }
  1379         break;
  1380     }
  1381 
  1382     if (rect) {
  1383         *pixels = swdata->planes[0] + rect->y * swdata->pitches[0] + rect->x * 2;
  1384     } else {
  1385         *pixels = swdata->planes[0];
  1386     }
  1387     *pitch = swdata->pitches[0];
  1388     return 0;
  1389 }
  1390 
  1391 void
  1392 SDL_SW_UnlockYUVTexture(SDL_SW_YUVTexture * swdata)
  1393 {
  1394 }
  1395 
  1396 int
  1397 SDL_SW_CopyYUVToRGB(SDL_SW_YUVTexture * swdata, const SDL_Rect * srcrect,
  1398                     Uint32 target_format, int w, int h, void *pixels,
  1399                     int pitch)
  1400 {
  1401     const int targetbpp = SDL_BYTESPERPIXEL(target_format);
  1402     int stretch;
  1403     int scale_2x;
  1404     Uint8 *lum, *Cr, *Cb;
  1405     int mod;
  1406 
  1407     if (targetbpp == 0) {
  1408         return SDL_SetError("Invalid target pixel format");
  1409     }
  1410 
  1411     /* Make sure we're set up to display in the desired format */
  1412     if (target_format != swdata->target_format) {
  1413         if (SDL_SW_SetupYUVDisplay(swdata, target_format) < 0) {
  1414             return -1;
  1415         }
  1416     }
  1417 
  1418     stretch = 0;
  1419     scale_2x = 0;
  1420     if (srcrect->x || srcrect->y || srcrect->w < swdata->w
  1421         || srcrect->h < swdata->h) {
  1422         /* The source rectangle has been clipped.
  1423            Using a scratch surface is easier than adding clipped
  1424            source support to all the blitters, plus that would
  1425            slow them down in the general unclipped case.
  1426          */
  1427         stretch = 1;
  1428     } else if ((srcrect->w != w) || (srcrect->h != h)) {
  1429         if ((w == 2 * srcrect->w) && (h == 2 * srcrect->h)) {
  1430             scale_2x = 1;
  1431         } else {
  1432             stretch = 1;
  1433         }
  1434     }
  1435     if (stretch) {
  1436         int bpp;
  1437         Uint32 Rmask, Gmask, Bmask, Amask;
  1438 
  1439         if (swdata->display) {
  1440             swdata->display->w = w;
  1441             swdata->display->h = h;
  1442             swdata->display->pixels = pixels;
  1443             swdata->display->pitch = pitch;
  1444         } else {
  1445             /* This must have succeeded in SDL_SW_SetupYUVDisplay() earlier */
  1446             SDL_PixelFormatEnumToMasks(target_format, &bpp, &Rmask, &Gmask,
  1447                                        &Bmask, &Amask);
  1448             swdata->display =
  1449                 SDL_CreateRGBSurfaceFrom(pixels, w, h, bpp, pitch, Rmask,
  1450                                          Gmask, Bmask, Amask);
  1451             if (!swdata->display) {
  1452                 return (-1);
  1453             }
  1454         }
  1455         if (!swdata->stretch) {
  1456             /* This must have succeeded in SDL_SW_SetupYUVDisplay() earlier */
  1457             SDL_PixelFormatEnumToMasks(target_format, &bpp, &Rmask, &Gmask,
  1458                                        &Bmask, &Amask);
  1459             swdata->stretch =
  1460                 SDL_CreateRGBSurface(0, swdata->w, swdata->h, bpp, Rmask,
  1461                                      Gmask, Bmask, Amask);
  1462             if (!swdata->stretch) {
  1463                 return (-1);
  1464             }
  1465         }
  1466         pixels = swdata->stretch->pixels;
  1467         pitch = swdata->stretch->pitch;
  1468     }
  1469     switch (swdata->format) {
  1470     case SDL_PIXELFORMAT_YV12:
  1471         lum = swdata->planes[0];
  1472         Cr = swdata->planes[1];
  1473         Cb = swdata->planes[2];
  1474         break;
  1475     case SDL_PIXELFORMAT_IYUV:
  1476         lum = swdata->planes[0];
  1477         Cr = swdata->planes[2];
  1478         Cb = swdata->planes[1];
  1479         break;
  1480     case SDL_PIXELFORMAT_YUY2:
  1481         lum = swdata->planes[0];
  1482         Cr = lum + 3;
  1483         Cb = lum + 1;
  1484         break;
  1485     case SDL_PIXELFORMAT_UYVY:
  1486         lum = swdata->planes[0] + 1;
  1487         Cr = lum + 1;
  1488         Cb = lum - 1;
  1489         break;
  1490     case SDL_PIXELFORMAT_YVYU:
  1491         lum = swdata->planes[0];
  1492         Cr = lum + 1;
  1493         Cb = lum + 3;
  1494         break;
  1495     case SDL_PIXELFORMAT_NV12:
  1496     case SDL_PIXELFORMAT_NV21:
  1497         return SDL_ConvertPixels(swdata->w, swdata->h, 
  1498                 swdata->format, swdata->planes[0], swdata->pitches[0], 
  1499                 target_format, pixels, pitch);
  1500         break;
  1501     default:
  1502         return SDL_SetError("Unsupported YUV format in copy");
  1503     }
  1504     mod = (pitch / targetbpp);
  1505 
  1506     if (scale_2x) {
  1507         mod -= (swdata->w * 2);
  1508         swdata->Display2X(swdata->colortab, swdata->rgb_2_pix,
  1509                           lum, Cr, Cb, pixels, swdata->h, swdata->w, mod);
  1510     } else {
  1511         mod -= swdata->w;
  1512         swdata->Display1X(swdata->colortab, swdata->rgb_2_pix,
  1513                           lum, Cr, Cb, pixels, swdata->h, swdata->w, mod);
  1514     }
  1515     if (stretch) {
  1516         SDL_Rect rect = *srcrect;
  1517         SDL_SoftStretch(swdata->stretch, &rect, swdata->display, NULL);
  1518     }
  1519     return 0;
  1520 }
  1521 
  1522 void
  1523 SDL_SW_DestroyYUVTexture(SDL_SW_YUVTexture * swdata)
  1524 {
  1525     if (swdata) {
  1526         SDL_free(swdata->pixels);
  1527         SDL_free(swdata->colortab);
  1528         SDL_free(swdata->rgb_2_pix);
  1529         SDL_FreeSurface(swdata->stretch);
  1530         SDL_FreeSurface(swdata->display);
  1531         SDL_free(swdata);
  1532     }
  1533 }
  1534 
  1535 /* vi: set ts=4 sw=4 expandtab: */