src/video/SDL_yuv_sw.c
author Sam Lantinga <slouken@libsdl.org>
Fri, 09 Jun 2006 07:06:12 +0000
branchSDL-1.3
changeset 1672 8e754b82cecc
parent 1668 4da1ee79c9af
child 1679 153477a6cc31
permissions -rw-r--r--
Updated SDL_Surface code for software-only access, fixed some build errors
     1 /*
     2     SDL - Simple DirectMedia Layer
     3     Copyright (C) 1997-2006 Sam Lantinga
     4 
     5     This library is free software; you can redistribute it and/or
     6     modify it under the terms of the GNU Lesser General Public
     7     License as published by the Free Software Foundation; either
     8     version 2.1 of the License, or (at your option) any later version.
     9 
    10     This library is distributed in the hope that it will be useful,
    11     but WITHOUT ANY WARRANTY; without even the implied warranty of
    12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    13     Lesser General Public License for more details.
    14 
    15     You should have received a copy of the GNU Lesser General Public
    16     License along with this library; if not, write to the Free Software
    17     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
    18 
    19     Sam Lantinga
    20     slouken@libsdl.org
    21 */
    22 #include "SDL_config.h"
    23 
    24 /* This is the software implementation of the YUV video overlay support */
    25 
    26 /* This code was derived from code carrying the following copyright notices:
    27 
    28  * Copyright (c) 1995 The Regents of the University of California.
    29  * All rights reserved.
    30  * 
    31  * Permission to use, copy, modify, and distribute this software and its
    32  * documentation for any purpose, without fee, and without written agreement is
    33  * hereby granted, provided that the above copyright notice and the following
    34  * two paragraphs appear in all copies of this software.
    35  * 
    36  * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR
    37  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
    38  * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF
    39  * CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    40  * 
    41  * THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES,
    42  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
    43  * AND FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
    44  * ON AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATION TO
    45  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
    46 
    47  * Copyright (c) 1995 Erik Corry
    48  * All rights reserved.
    49  * 
    50  * Permission to use, copy, modify, and distribute this software and its
    51  * documentation for any purpose, without fee, and without written agreement is
    52  * hereby granted, provided that the above copyright notice and the following
    53  * two paragraphs appear in all copies of this software.
    54  * 
    55  * IN NO EVENT SHALL ERIK CORRY BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
    56  * SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OF
    57  * THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF ERIK CORRY HAS BEEN ADVISED
    58  * OF THE POSSIBILITY OF SUCH DAMAGE.
    59  * 
    60  * ERIK CORRY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
    61  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
    62  * PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
    63  * BASIS, AND ERIK CORRY HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT,
    64  * UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
    65 
    66  * Portions of this software Copyright (c) 1995 Brown University.
    67  * All rights reserved.
    68  * 
    69  * Permission to use, copy, modify, and distribute this software and its
    70  * documentation for any purpose, without fee, and without written agreement
    71  * is hereby granted, provided that the above copyright notice and the
    72  * following two paragraphs appear in all copies of this software.
    73  * 
    74  * IN NO EVENT SHALL BROWN UNIVERSITY BE LIABLE TO ANY PARTY FOR
    75  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
    76  * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF BROWN
    77  * UNIVERSITY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    78  * 
    79  * BROWN UNIVERSITY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
    80  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
    81  * PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
    82  * BASIS, AND BROWN UNIVERSITY HAS NO OBLIGATION TO PROVIDE MAINTENANCE,
    83  * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
    84  */
    85 
    86 #include "SDL_video.h"
    87 #include "SDL_cpuinfo.h"
    88 #include "SDL_stretch_c.h"
    89 #include "SDL_yuvfuncs.h"
    90 #include "SDL_yuv_sw_c.h"
    91 
    92 /* The functions used to manipulate software video overlays */
    93 static struct private_yuvhwfuncs sw_yuvfuncs = {
    94     SDL_LockYUV_SW,
    95     SDL_UnlockYUV_SW,
    96     SDL_DisplayYUV_SW,
    97     SDL_FreeYUV_SW
    98 };
    99 
   100 /* RGB conversion lookup tables */
   101 struct private_yuvhwdata
   102 {
   103     SDL_Surface *stretch;
   104     SDL_Surface *display;
   105     Uint8 *pixels;
   106     int *colortab;
   107     Uint32 *rgb_2_pix;
   108     void (*Display1X) (int *colortab, Uint32 * rgb_2_pix,
   109                        unsigned char *lum, unsigned char *cr,
   110                        unsigned char *cb, unsigned char *out,
   111                        int rows, int cols, int mod);
   112     void (*Display2X) (int *colortab, Uint32 * rgb_2_pix,
   113                        unsigned char *lum, unsigned char *cr,
   114                        unsigned char *cb, unsigned char *out,
   115                        int rows, int cols, int mod);
   116 
   117     /* These are just so we don't have to allocate them separately */
   118     Uint16 pitches[3];
   119     Uint8 *planes[3];
   120 };
   121 
   122 
   123 /* The colorspace conversion functions */
   124 
   125 #if 0                           /*defined(__GNUC__) && defined(__i386__) && SDL_ASSEMBLY_ROUTINES */
   126 extern void Color565DitherYV12MMX1X(int *colortab, Uint32 * rgb_2_pix,
   127                                     unsigned char *lum, unsigned char *cr,
   128                                     unsigned char *cb, unsigned char *out,
   129                                     int rows, int cols, int mod);
   130 extern void ColorRGBDitherYV12MMX1X(int *colortab, Uint32 * rgb_2_pix,
   131                                     unsigned char *lum, unsigned char *cr,
   132                                     unsigned char *cb, unsigned char *out,
   133                                     int rows, int cols, int mod);
   134 #endif
   135 
   136 static void
   137 Color16DitherYV12Mod1X(int *colortab, Uint32 * rgb_2_pix,
   138                        unsigned char *lum, unsigned char *cr,
   139                        unsigned char *cb, unsigned char *out,
   140                        int rows, int cols, int mod)
   141 {
   142     unsigned short *row1;
   143     unsigned short *row2;
   144     unsigned char *lum2;
   145     int x, y;
   146     int cr_r;
   147     int crb_g;
   148     int cb_b;
   149     int cols_2 = cols / 2;
   150 
   151     row1 = (unsigned short *) out;
   152     row2 = row1 + cols + mod;
   153     lum2 = lum + cols;
   154 
   155     mod += cols + mod;
   156 
   157     y = rows / 2;
   158     while (y--) {
   159         x = cols_2;
   160         while (x--) {
   161             register int L;
   162 
   163             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   164             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   165                 + colortab[*cb + 2 * 256];
   166             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   167             ++cr;
   168             ++cb;
   169 
   170             L = *lum++;
   171             *row1++ = (unsigned short) (rgb_2_pix[L + cr_r] |
   172                                         rgb_2_pix[L + crb_g] |
   173                                         rgb_2_pix[L + cb_b]);
   174 
   175             L = *lum++;
   176             *row1++ = (unsigned short) (rgb_2_pix[L + cr_r] |
   177                                         rgb_2_pix[L + crb_g] |
   178                                         rgb_2_pix[L + cb_b]);
   179 
   180 
   181             /* Now, do second row.  */
   182 
   183             L = *lum2++;
   184             *row2++ = (unsigned short) (rgb_2_pix[L + cr_r] |
   185                                         rgb_2_pix[L + crb_g] |
   186                                         rgb_2_pix[L + cb_b]);
   187 
   188             L = *lum2++;
   189             *row2++ = (unsigned short) (rgb_2_pix[L + cr_r] |
   190                                         rgb_2_pix[L + crb_g] |
   191                                         rgb_2_pix[L + cb_b]);
   192         }
   193 
   194         /*
   195          * These values are at the start of the next line, (due
   196          * to the ++'s above),but they need to be at the start
   197          * of the line after that.
   198          */
   199         lum += cols;
   200         lum2 += cols;
   201         row1 += mod;
   202         row2 += mod;
   203     }
   204 }
   205 
   206 static void
   207 Color24DitherYV12Mod1X(int *colortab, Uint32 * rgb_2_pix,
   208                        unsigned char *lum, unsigned char *cr,
   209                        unsigned char *cb, unsigned char *out,
   210                        int rows, int cols, int mod)
   211 {
   212     unsigned int value;
   213     unsigned char *row1;
   214     unsigned char *row2;
   215     unsigned char *lum2;
   216     int x, y;
   217     int cr_r;
   218     int crb_g;
   219     int cb_b;
   220     int cols_2 = cols / 2;
   221 
   222     row1 = out;
   223     row2 = row1 + cols * 3 + mod * 3;
   224     lum2 = lum + cols;
   225 
   226     mod += cols + mod;
   227     mod *= 3;
   228 
   229     y = rows / 2;
   230     while (y--) {
   231         x = cols_2;
   232         while (x--) {
   233             register int L;
   234 
   235             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   236             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   237                 + colortab[*cb + 2 * 256];
   238             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   239             ++cr;
   240             ++cb;
   241 
   242             L = *lum++;
   243             value = (rgb_2_pix[L + cr_r] |
   244                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   245             *row1++ = (value) & 0xFF;
   246             *row1++ = (value >> 8) & 0xFF;
   247             *row1++ = (value >> 16) & 0xFF;
   248 
   249             L = *lum++;
   250             value = (rgb_2_pix[L + cr_r] |
   251                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   252             *row1++ = (value) & 0xFF;
   253             *row1++ = (value >> 8) & 0xFF;
   254             *row1++ = (value >> 16) & 0xFF;
   255 
   256 
   257             /* Now, do second row.  */
   258 
   259             L = *lum2++;
   260             value = (rgb_2_pix[L + cr_r] |
   261                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   262             *row2++ = (value) & 0xFF;
   263             *row2++ = (value >> 8) & 0xFF;
   264             *row2++ = (value >> 16) & 0xFF;
   265 
   266             L = *lum2++;
   267             value = (rgb_2_pix[L + cr_r] |
   268                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   269             *row2++ = (value) & 0xFF;
   270             *row2++ = (value >> 8) & 0xFF;
   271             *row2++ = (value >> 16) & 0xFF;
   272         }
   273 
   274         /*
   275          * These values are at the start of the next line, (due
   276          * to the ++'s above),but they need to be at the start
   277          * of the line after that.
   278          */
   279         lum += cols;
   280         lum2 += cols;
   281         row1 += mod;
   282         row2 += mod;
   283     }
   284 }
   285 
   286 static void
   287 Color32DitherYV12Mod1X(int *colortab, Uint32 * rgb_2_pix,
   288                        unsigned char *lum, unsigned char *cr,
   289                        unsigned char *cb, unsigned char *out,
   290                        int rows, int cols, int mod)
   291 {
   292     unsigned int *row1;
   293     unsigned int *row2;
   294     unsigned char *lum2;
   295     int x, y;
   296     int cr_r;
   297     int crb_g;
   298     int cb_b;
   299     int cols_2 = cols / 2;
   300 
   301     row1 = (unsigned int *) out;
   302     row2 = row1 + cols + mod;
   303     lum2 = lum + cols;
   304 
   305     mod += cols + mod;
   306 
   307     y = rows / 2;
   308     while (y--) {
   309         x = cols_2;
   310         while (x--) {
   311             register int L;
   312 
   313             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   314             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   315                 + colortab[*cb + 2 * 256];
   316             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   317             ++cr;
   318             ++cb;
   319 
   320             L = *lum++;
   321             *row1++ = (rgb_2_pix[L + cr_r] |
   322                        rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   323 
   324             L = *lum++;
   325             *row1++ = (rgb_2_pix[L + cr_r] |
   326                        rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   327 
   328 
   329             /* Now, do second row.  */
   330 
   331             L = *lum2++;
   332             *row2++ = (rgb_2_pix[L + cr_r] |
   333                        rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   334 
   335             L = *lum2++;
   336             *row2++ = (rgb_2_pix[L + cr_r] |
   337                        rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   338         }
   339 
   340         /*
   341          * These values are at the start of the next line, (due
   342          * to the ++'s above),but they need to be at the start
   343          * of the line after that.
   344          */
   345         lum += cols;
   346         lum2 += cols;
   347         row1 += mod;
   348         row2 += mod;
   349     }
   350 }
   351 
   352 /*
   353  * In this function I make use of a nasty trick. The tables have the lower
   354  * 16 bits replicated in the upper 16. This means I can write ints and get
   355  * the horisontal doubling for free (almost).
   356  */
   357 static void
   358 Color16DitherYV12Mod2X(int *colortab, Uint32 * rgb_2_pix,
   359                        unsigned char *lum, unsigned char *cr,
   360                        unsigned char *cb, unsigned char *out,
   361                        int rows, int cols, int mod)
   362 {
   363     unsigned int *row1 = (unsigned int *) out;
   364     const int next_row = cols + (mod / 2);
   365     unsigned int *row2 = row1 + 2 * next_row;
   366     unsigned char *lum2;
   367     int x, y;
   368     int cr_r;
   369     int crb_g;
   370     int cb_b;
   371     int cols_2 = cols / 2;
   372 
   373     lum2 = lum + cols;
   374 
   375     mod = (next_row * 3) + (mod / 2);
   376 
   377     y = rows / 2;
   378     while (y--) {
   379         x = cols_2;
   380         while (x--) {
   381             register int L;
   382 
   383             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   384             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   385                 + colortab[*cb + 2 * 256];
   386             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   387             ++cr;
   388             ++cb;
   389 
   390             L = *lum++;
   391             row1[0] = row1[next_row] = (rgb_2_pix[L + cr_r] |
   392                                         rgb_2_pix[L + crb_g] |
   393                                         rgb_2_pix[L + cb_b]);
   394             row1++;
   395 
   396             L = *lum++;
   397             row1[0] = row1[next_row] = (rgb_2_pix[L + cr_r] |
   398                                         rgb_2_pix[L + crb_g] |
   399                                         rgb_2_pix[L + cb_b]);
   400             row1++;
   401 
   402 
   403             /* Now, do second row. */
   404 
   405             L = *lum2++;
   406             row2[0] = row2[next_row] = (rgb_2_pix[L + cr_r] |
   407                                         rgb_2_pix[L + crb_g] |
   408                                         rgb_2_pix[L + cb_b]);
   409             row2++;
   410 
   411             L = *lum2++;
   412             row2[0] = row2[next_row] = (rgb_2_pix[L + cr_r] |
   413                                         rgb_2_pix[L + crb_g] |
   414                                         rgb_2_pix[L + cb_b]);
   415             row2++;
   416         }
   417 
   418         /*
   419          * These values are at the start of the next line, (due
   420          * to the ++'s above),but they need to be at the start
   421          * of the line after that.
   422          */
   423         lum += cols;
   424         lum2 += cols;
   425         row1 += mod;
   426         row2 += mod;
   427     }
   428 }
   429 
   430 static void
   431 Color24DitherYV12Mod2X(int *colortab, Uint32 * rgb_2_pix,
   432                        unsigned char *lum, unsigned char *cr,
   433                        unsigned char *cb, unsigned char *out,
   434                        int rows, int cols, int mod)
   435 {
   436     unsigned int value;
   437     unsigned char *row1 = out;
   438     const int next_row = (cols * 2 + mod) * 3;
   439     unsigned char *row2 = row1 + 2 * next_row;
   440     unsigned char *lum2;
   441     int x, y;
   442     int cr_r;
   443     int crb_g;
   444     int cb_b;
   445     int cols_2 = cols / 2;
   446 
   447     lum2 = lum + cols;
   448 
   449     mod = next_row * 3 + mod * 3;
   450 
   451     y = rows / 2;
   452     while (y--) {
   453         x = cols_2;
   454         while (x--) {
   455             register int L;
   456 
   457             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   458             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   459                 + colortab[*cb + 2 * 256];
   460             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   461             ++cr;
   462             ++cb;
   463 
   464             L = *lum++;
   465             value = (rgb_2_pix[L + cr_r] |
   466                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   467             row1[0 + 0] = row1[3 + 0] = row1[next_row + 0] =
   468                 row1[next_row + 3 + 0] = (value) & 0xFF;
   469             row1[0 + 1] = row1[3 + 1] = row1[next_row + 1] =
   470                 row1[next_row + 3 + 1] = (value >> 8) & 0xFF;
   471             row1[0 + 2] = row1[3 + 2] = row1[next_row + 2] =
   472                 row1[next_row + 3 + 2] = (value >> 16) & 0xFF;
   473             row1 += 2 * 3;
   474 
   475             L = *lum++;
   476             value = (rgb_2_pix[L + cr_r] |
   477                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   478             row1[0 + 0] = row1[3 + 0] = row1[next_row + 0] =
   479                 row1[next_row + 3 + 0] = (value) & 0xFF;
   480             row1[0 + 1] = row1[3 + 1] = row1[next_row + 1] =
   481                 row1[next_row + 3 + 1] = (value >> 8) & 0xFF;
   482             row1[0 + 2] = row1[3 + 2] = row1[next_row + 2] =
   483                 row1[next_row + 3 + 2] = (value >> 16) & 0xFF;
   484             row1 += 2 * 3;
   485 
   486 
   487             /* Now, do second row. */
   488 
   489             L = *lum2++;
   490             value = (rgb_2_pix[L + cr_r] |
   491                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   492             row2[0 + 0] = row2[3 + 0] = row2[next_row + 0] =
   493                 row2[next_row + 3 + 0] = (value) & 0xFF;
   494             row2[0 + 1] = row2[3 + 1] = row2[next_row + 1] =
   495                 row2[next_row + 3 + 1] = (value >> 8) & 0xFF;
   496             row2[0 + 2] = row2[3 + 2] = row2[next_row + 2] =
   497                 row2[next_row + 3 + 2] = (value >> 16) & 0xFF;
   498             row2 += 2 * 3;
   499 
   500             L = *lum2++;
   501             value = (rgb_2_pix[L + cr_r] |
   502                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   503             row2[0 + 0] = row2[3 + 0] = row2[next_row + 0] =
   504                 row2[next_row + 3 + 0] = (value) & 0xFF;
   505             row2[0 + 1] = row2[3 + 1] = row2[next_row + 1] =
   506                 row2[next_row + 3 + 1] = (value >> 8) & 0xFF;
   507             row2[0 + 2] = row2[3 + 2] = row2[next_row + 2] =
   508                 row2[next_row + 3 + 2] = (value >> 16) & 0xFF;
   509             row2 += 2 * 3;
   510         }
   511 
   512         /*
   513          * These values are at the start of the next line, (due
   514          * to the ++'s above),but they need to be at the start
   515          * of the line after that.
   516          */
   517         lum += cols;
   518         lum2 += cols;
   519         row1 += mod;
   520         row2 += mod;
   521     }
   522 }
   523 
   524 static void
   525 Color32DitherYV12Mod2X(int *colortab, Uint32 * rgb_2_pix,
   526                        unsigned char *lum, unsigned char *cr,
   527                        unsigned char *cb, unsigned char *out,
   528                        int rows, int cols, int mod)
   529 {
   530     unsigned int *row1 = (unsigned int *) out;
   531     const int next_row = cols * 2 + mod;
   532     unsigned int *row2 = row1 + 2 * next_row;
   533     unsigned char *lum2;
   534     int x, y;
   535     int cr_r;
   536     int crb_g;
   537     int cb_b;
   538     int cols_2 = cols / 2;
   539 
   540     lum2 = lum + cols;
   541 
   542     mod = (next_row * 3) + mod;
   543 
   544     y = rows / 2;
   545     while (y--) {
   546         x = cols_2;
   547         while (x--) {
   548             register int L;
   549 
   550             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   551             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   552                 + colortab[*cb + 2 * 256];
   553             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   554             ++cr;
   555             ++cb;
   556 
   557             L = *lum++;
   558             row1[0] = row1[1] = row1[next_row] = row1[next_row + 1] =
   559                 (rgb_2_pix[L + cr_r] |
   560                  rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   561             row1 += 2;
   562 
   563             L = *lum++;
   564             row1[0] = row1[1] = row1[next_row] = row1[next_row + 1] =
   565                 (rgb_2_pix[L + cr_r] |
   566                  rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   567             row1 += 2;
   568 
   569 
   570             /* Now, do second row. */
   571 
   572             L = *lum2++;
   573             row2[0] = row2[1] = row2[next_row] = row2[next_row + 1] =
   574                 (rgb_2_pix[L + cr_r] |
   575                  rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   576             row2 += 2;
   577 
   578             L = *lum2++;
   579             row2[0] = row2[1] = row2[next_row] = row2[next_row + 1] =
   580                 (rgb_2_pix[L + cr_r] |
   581                  rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   582             row2 += 2;
   583         }
   584 
   585         /*
   586          * These values are at the start of the next line, (due
   587          * to the ++'s above),but they need to be at the start
   588          * of the line after that.
   589          */
   590         lum += cols;
   591         lum2 += cols;
   592         row1 += mod;
   593         row2 += mod;
   594     }
   595 }
   596 
   597 static void
   598 Color16DitherYUY2Mod1X(int *colortab, Uint32 * rgb_2_pix,
   599                        unsigned char *lum, unsigned char *cr,
   600                        unsigned char *cb, unsigned char *out,
   601                        int rows, int cols, int mod)
   602 {
   603     unsigned short *row;
   604     int x, y;
   605     int cr_r;
   606     int crb_g;
   607     int cb_b;
   608     int cols_2 = cols / 2;
   609 
   610     row = (unsigned short *) out;
   611 
   612     y = rows;
   613     while (y--) {
   614         x = cols_2;
   615         while (x--) {
   616             register int L;
   617 
   618             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   619             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   620                 + colortab[*cb + 2 * 256];
   621             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   622             cr += 4;
   623             cb += 4;
   624 
   625             L = *lum;
   626             lum += 2;
   627             *row++ = (unsigned short) (rgb_2_pix[L + cr_r] |
   628                                        rgb_2_pix[L + crb_g] |
   629                                        rgb_2_pix[L + cb_b]);
   630 
   631             L = *lum;
   632             lum += 2;
   633             *row++ = (unsigned short) (rgb_2_pix[L + cr_r] |
   634                                        rgb_2_pix[L + crb_g] |
   635                                        rgb_2_pix[L + cb_b]);
   636 
   637         }
   638 
   639         row += mod;
   640     }
   641 }
   642 
   643 static void
   644 Color24DitherYUY2Mod1X(int *colortab, Uint32 * rgb_2_pix,
   645                        unsigned char *lum, unsigned char *cr,
   646                        unsigned char *cb, unsigned char *out,
   647                        int rows, int cols, int mod)
   648 {
   649     unsigned int value;
   650     unsigned char *row;
   651     int x, y;
   652     int cr_r;
   653     int crb_g;
   654     int cb_b;
   655     int cols_2 = cols / 2;
   656 
   657     row = (unsigned char *) out;
   658     mod *= 3;
   659     y = rows;
   660     while (y--) {
   661         x = cols_2;
   662         while (x--) {
   663             register int L;
   664 
   665             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   666             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   667                 + colortab[*cb + 2 * 256];
   668             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   669             cr += 4;
   670             cb += 4;
   671 
   672             L = *lum;
   673             lum += 2;
   674             value = (rgb_2_pix[L + cr_r] |
   675                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   676             *row++ = (value) & 0xFF;
   677             *row++ = (value >> 8) & 0xFF;
   678             *row++ = (value >> 16) & 0xFF;
   679 
   680             L = *lum;
   681             lum += 2;
   682             value = (rgb_2_pix[L + cr_r] |
   683                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   684             *row++ = (value) & 0xFF;
   685             *row++ = (value >> 8) & 0xFF;
   686             *row++ = (value >> 16) & 0xFF;
   687 
   688         }
   689         row += mod;
   690     }
   691 }
   692 
   693 static void
   694 Color32DitherYUY2Mod1X(int *colortab, Uint32 * rgb_2_pix,
   695                        unsigned char *lum, unsigned char *cr,
   696                        unsigned char *cb, unsigned char *out,
   697                        int rows, int cols, int mod)
   698 {
   699     unsigned int *row;
   700     int x, y;
   701     int cr_r;
   702     int crb_g;
   703     int cb_b;
   704     int cols_2 = cols / 2;
   705 
   706     row = (unsigned int *) out;
   707     y = rows;
   708     while (y--) {
   709         x = cols_2;
   710         while (x--) {
   711             register int L;
   712 
   713             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   714             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   715                 + colortab[*cb + 2 * 256];
   716             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   717             cr += 4;
   718             cb += 4;
   719 
   720             L = *lum;
   721             lum += 2;
   722             *row++ = (rgb_2_pix[L + cr_r] |
   723                       rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   724 
   725             L = *lum;
   726             lum += 2;
   727             *row++ = (rgb_2_pix[L + cr_r] |
   728                       rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   729 
   730 
   731         }
   732         row += mod;
   733     }
   734 }
   735 
   736 /*
   737  * In this function I make use of a nasty trick. The tables have the lower
   738  * 16 bits replicated in the upper 16. This means I can write ints and get
   739  * the horisontal doubling for free (almost).
   740  */
   741 static void
   742 Color16DitherYUY2Mod2X(int *colortab, Uint32 * rgb_2_pix,
   743                        unsigned char *lum, unsigned char *cr,
   744                        unsigned char *cb, unsigned char *out,
   745                        int rows, int cols, int mod)
   746 {
   747     unsigned int *row = (unsigned int *) out;
   748     const int next_row = cols + (mod / 2);
   749     int x, y;
   750     int cr_r;
   751     int crb_g;
   752     int cb_b;
   753     int cols_2 = cols / 2;
   754 
   755     y = rows;
   756     while (y--) {
   757         x = cols_2;
   758         while (x--) {
   759             register int L;
   760 
   761             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   762             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   763                 + colortab[*cb + 2 * 256];
   764             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   765             cr += 4;
   766             cb += 4;
   767 
   768             L = *lum;
   769             lum += 2;
   770             row[0] = row[next_row] = (rgb_2_pix[L + cr_r] |
   771                                       rgb_2_pix[L + crb_g] |
   772                                       rgb_2_pix[L + cb_b]);
   773             row++;
   774 
   775             L = *lum;
   776             lum += 2;
   777             row[0] = row[next_row] = (rgb_2_pix[L + cr_r] |
   778                                       rgb_2_pix[L + crb_g] |
   779                                       rgb_2_pix[L + cb_b]);
   780             row++;
   781 
   782         }
   783         row += next_row;
   784     }
   785 }
   786 
   787 static void
   788 Color24DitherYUY2Mod2X(int *colortab, Uint32 * rgb_2_pix,
   789                        unsigned char *lum, unsigned char *cr,
   790                        unsigned char *cb, unsigned char *out,
   791                        int rows, int cols, int mod)
   792 {
   793     unsigned int value;
   794     unsigned char *row = out;
   795     const int next_row = (cols * 2 + mod) * 3;
   796     int x, y;
   797     int cr_r;
   798     int crb_g;
   799     int cb_b;
   800     int cols_2 = cols / 2;
   801     y = rows;
   802     while (y--) {
   803         x = cols_2;
   804         while (x--) {
   805             register int L;
   806 
   807             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   808             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   809                 + colortab[*cb + 2 * 256];
   810             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   811             cr += 4;
   812             cb += 4;
   813 
   814             L = *lum;
   815             lum += 2;
   816             value = (rgb_2_pix[L + cr_r] |
   817                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   818             row[0 + 0] = row[3 + 0] = row[next_row + 0] =
   819                 row[next_row + 3 + 0] = (value) & 0xFF;
   820             row[0 + 1] = row[3 + 1] = row[next_row + 1] =
   821                 row[next_row + 3 + 1] = (value >> 8) & 0xFF;
   822             row[0 + 2] = row[3 + 2] = row[next_row + 2] =
   823                 row[next_row + 3 + 2] = (value >> 16) & 0xFF;
   824             row += 2 * 3;
   825 
   826             L = *lum;
   827             lum += 2;
   828             value = (rgb_2_pix[L + cr_r] |
   829                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   830             row[0 + 0] = row[3 + 0] = row[next_row + 0] =
   831                 row[next_row + 3 + 0] = (value) & 0xFF;
   832             row[0 + 1] = row[3 + 1] = row[next_row + 1] =
   833                 row[next_row + 3 + 1] = (value >> 8) & 0xFF;
   834             row[0 + 2] = row[3 + 2] = row[next_row + 2] =
   835                 row[next_row + 3 + 2] = (value >> 16) & 0xFF;
   836             row += 2 * 3;
   837 
   838         }
   839         row += next_row;
   840     }
   841 }
   842 
   843 static void
   844 Color32DitherYUY2Mod2X(int *colortab, Uint32 * rgb_2_pix,
   845                        unsigned char *lum, unsigned char *cr,
   846                        unsigned char *cb, unsigned char *out,
   847                        int rows, int cols, int mod)
   848 {
   849     unsigned int *row = (unsigned int *) out;
   850     const int next_row = cols * 2 + mod;
   851     int x, y;
   852     int cr_r;
   853     int crb_g;
   854     int cb_b;
   855     int cols_2 = cols / 2;
   856     mod += mod;
   857     y = rows;
   858     while (y--) {
   859         x = cols_2;
   860         while (x--) {
   861             register int L;
   862 
   863             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
   864             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
   865                 + colortab[*cb + 2 * 256];
   866             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
   867             cr += 4;
   868             cb += 4;
   869 
   870             L = *lum;
   871             lum += 2;
   872             row[0] = row[1] = row[next_row] = row[next_row + 1] =
   873                 (rgb_2_pix[L + cr_r] |
   874                  rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   875             row += 2;
   876 
   877             L = *lum;
   878             lum += 2;
   879             row[0] = row[1] = row[next_row] = row[next_row + 1] =
   880                 (rgb_2_pix[L + cr_r] |
   881                  rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
   882             row += 2;
   883 
   884 
   885         }
   886 
   887         row += next_row;
   888     }
   889 }
   890 
   891 /*
   892  * How many 1 bits are there in the Uint32.
   893  * Low performance, do not call often.
   894  */
   895 static int
   896 number_of_bits_set(Uint32 a)
   897 {
   898     if (!a)
   899         return 0;
   900     if (a & 1)
   901         return 1 + number_of_bits_set(a >> 1);
   902     return (number_of_bits_set(a >> 1));
   903 }
   904 
   905 /*
   906  * How many 0 bits are there at least significant end of Uint32.
   907  * Low performance, do not call often.
   908  */
   909 static int
   910 free_bits_at_bottom(Uint32 a)
   911 {
   912     /* assume char is 8 bits */
   913     if (!a)
   914         return sizeof(Uint32) * 8;
   915     if (((Sint32) a) & 1l)
   916         return 0;
   917     return 1 + free_bits_at_bottom(a >> 1);
   918 }
   919 
   920 
   921 SDL_Overlay *
   922 SDL_CreateYUV_SW(_THIS, int width, int height, Uint32 format,
   923                  SDL_Surface * display)
   924 {
   925     SDL_Overlay *overlay;
   926     struct private_yuvhwdata *swdata;
   927     int *Cr_r_tab;
   928     int *Cr_g_tab;
   929     int *Cb_g_tab;
   930     int *Cb_b_tab;
   931     Uint32 *r_2_pix_alloc;
   932     Uint32 *g_2_pix_alloc;
   933     Uint32 *b_2_pix_alloc;
   934     int i;
   935     int CR, CB;
   936     Uint32 Rmask, Gmask, Bmask;
   937 
   938     /* Only RGB packed pixel conversion supported */
   939     if ((display->format->BytesPerPixel != 2) &&
   940         (display->format->BytesPerPixel != 3) &&
   941         (display->format->BytesPerPixel != 4)) {
   942         SDL_SetError("Can't use YUV data on non 16/24/32 bit surfaces");
   943         return (NULL);
   944     }
   945 
   946     /* Verify that we support the format */
   947     switch (format) {
   948     case SDL_YV12_OVERLAY:
   949     case SDL_IYUV_OVERLAY:
   950     case SDL_YUY2_OVERLAY:
   951     case SDL_UYVY_OVERLAY:
   952     case SDL_YVYU_OVERLAY:
   953         break;
   954     default:
   955         SDL_SetError("Unsupported YUV format");
   956         return (NULL);
   957     }
   958 
   959     /* Create the overlay structure */
   960     overlay = (SDL_Overlay *) SDL_malloc(sizeof *overlay);
   961     if (overlay == NULL) {
   962         SDL_OutOfMemory();
   963         return (NULL);
   964     }
   965     SDL_memset(overlay, 0, (sizeof *overlay));
   966 
   967     /* Fill in the basic members */
   968     overlay->format = format;
   969     overlay->w = width;
   970     overlay->h = height;
   971 
   972     /* Set up the YUV surface function structure */
   973     overlay->hwfuncs = &sw_yuvfuncs;
   974 
   975     /* Create the pixel data and lookup tables */
   976     swdata = (struct private_yuvhwdata *) SDL_malloc(sizeof *swdata);
   977     overlay->hwdata = swdata;
   978     if (swdata == NULL) {
   979         SDL_OutOfMemory();
   980         SDL_FreeYUVOverlay(overlay);
   981         return (NULL);
   982     }
   983     swdata->stretch = NULL;
   984     swdata->display = display;
   985     swdata->pixels = (Uint8 *) SDL_malloc(width * height * 2);
   986     swdata->colortab = (int *) SDL_malloc(4 * 256 * sizeof(int));
   987     Cr_r_tab = &swdata->colortab[0 * 256];
   988     Cr_g_tab = &swdata->colortab[1 * 256];
   989     Cb_g_tab = &swdata->colortab[2 * 256];
   990     Cb_b_tab = &swdata->colortab[3 * 256];
   991     swdata->rgb_2_pix = (Uint32 *) SDL_malloc(3 * 768 * sizeof(Uint32));
   992     r_2_pix_alloc = &swdata->rgb_2_pix[0 * 768];
   993     g_2_pix_alloc = &swdata->rgb_2_pix[1 * 768];
   994     b_2_pix_alloc = &swdata->rgb_2_pix[2 * 768];
   995     if (!swdata->pixels || !swdata->colortab || !swdata->rgb_2_pix) {
   996         SDL_OutOfMemory();
   997         SDL_FreeYUVOverlay(overlay);
   998         return (NULL);
   999     }
  1000 
  1001     /* Generate the tables for the display surface */
  1002     for (i = 0; i < 256; i++) {
  1003         /* Gamma correction (luminescence table) and chroma correction
  1004            would be done here.  See the Berkeley mpeg_play sources.
  1005          */
  1006         CB = CR = (i - 128);
  1007         Cr_r_tab[i] = (int) ((0.419 / 0.299) * CR);
  1008         Cr_g_tab[i] = (int) (-(0.299 / 0.419) * CR);
  1009         Cb_g_tab[i] = (int) (-(0.114 / 0.331) * CB);
  1010         Cb_b_tab[i] = (int) ((0.587 / 0.331) * CB);
  1011     }
  1012 
  1013     /* 
  1014      * Set up entries 0-255 in rgb-to-pixel value tables.
  1015      */
  1016     Rmask = display->format->Rmask;
  1017     Gmask = display->format->Gmask;
  1018     Bmask = display->format->Bmask;
  1019     for (i = 0; i < 256; ++i) {
  1020         r_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(Rmask));
  1021         r_2_pix_alloc[i + 256] <<= free_bits_at_bottom(Rmask);
  1022         g_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(Gmask));
  1023         g_2_pix_alloc[i + 256] <<= free_bits_at_bottom(Gmask);
  1024         b_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(Bmask));
  1025         b_2_pix_alloc[i + 256] <<= free_bits_at_bottom(Bmask);
  1026     }
  1027 
  1028     /*
  1029      * If we have 16-bit output depth, then we double the value
  1030      * in the top word. This means that we can write out both
  1031      * pixels in the pixel doubling mode with one op. It is 
  1032      * harmless in the normal case as storing a 32-bit value
  1033      * through a short pointer will lose the top bits anyway.
  1034      */
  1035     if (display->format->BytesPerPixel == 2) {
  1036         for (i = 0; i < 256; ++i) {
  1037             r_2_pix_alloc[i + 256] |= (r_2_pix_alloc[i + 256]) << 16;
  1038             g_2_pix_alloc[i + 256] |= (g_2_pix_alloc[i + 256]) << 16;
  1039             b_2_pix_alloc[i + 256] |= (b_2_pix_alloc[i + 256]) << 16;
  1040         }
  1041     }
  1042 
  1043     /*
  1044      * Spread out the values we have to the rest of the array so that
  1045      * we do not need to check for overflow.
  1046      */
  1047     for (i = 0; i < 256; ++i) {
  1048         r_2_pix_alloc[i] = r_2_pix_alloc[256];
  1049         r_2_pix_alloc[i + 512] = r_2_pix_alloc[511];
  1050         g_2_pix_alloc[i] = g_2_pix_alloc[256];
  1051         g_2_pix_alloc[i + 512] = g_2_pix_alloc[511];
  1052         b_2_pix_alloc[i] = b_2_pix_alloc[256];
  1053         b_2_pix_alloc[i + 512] = b_2_pix_alloc[511];
  1054     }
  1055 
  1056     /* You have chosen wisely... */
  1057     switch (format) {
  1058     case SDL_YV12_OVERLAY:
  1059     case SDL_IYUV_OVERLAY:
  1060         if (display->format->BytesPerPixel == 2) {
  1061 #if 0                           /*defined(__GNUC__) && defined(__i386__) && SDL_ASSEMBLY_ROUTINES */
  1062             /* inline assembly functions */
  1063             if (SDL_HasMMX() && (Rmask == 0xF800) &&
  1064                 (Gmask == 0x07E0) && (Bmask == 0x001F) && (width & 15) == 0) {
  1065 /*printf("Using MMX 16-bit 565 dither\n");*/
  1066                 swdata->Display1X = Color565DitherYV12MMX1X;
  1067             } else {
  1068 /*printf("Using C 16-bit dither\n");*/
  1069                 swdata->Display1X = Color16DitherYV12Mod1X;
  1070             }
  1071 #else
  1072             swdata->Display1X = Color16DitherYV12Mod1X;
  1073 #endif
  1074             swdata->Display2X = Color16DitherYV12Mod2X;
  1075         }
  1076         if (display->format->BytesPerPixel == 3) {
  1077             swdata->Display1X = Color24DitherYV12Mod1X;
  1078             swdata->Display2X = Color24DitherYV12Mod2X;
  1079         }
  1080         if (display->format->BytesPerPixel == 4) {
  1081 #if 0                           /*defined(__GNUC__) && defined(__i386__) && SDL_ASSEMBLY_ROUTINES */
  1082             /* inline assembly functions */
  1083             if (SDL_HasMMX() && (Rmask == 0x00FF0000) &&
  1084                 (Gmask == 0x0000FF00) &&
  1085                 (Bmask == 0x000000FF) && (width & 15) == 0) {
  1086 /*printf("Using MMX 32-bit dither\n");*/
  1087                 swdata->Display1X = ColorRGBDitherYV12MMX1X;
  1088             } else {
  1089 /*printf("Using C 32-bit dither\n");*/
  1090                 swdata->Display1X = Color32DitherYV12Mod1X;
  1091             }
  1092 #else
  1093             swdata->Display1X = Color32DitherYV12Mod1X;
  1094 #endif
  1095             swdata->Display2X = Color32DitherYV12Mod2X;
  1096         }
  1097         break;
  1098     case SDL_YUY2_OVERLAY:
  1099     case SDL_UYVY_OVERLAY:
  1100     case SDL_YVYU_OVERLAY:
  1101         if (display->format->BytesPerPixel == 2) {
  1102             swdata->Display1X = Color16DitherYUY2Mod1X;
  1103             swdata->Display2X = Color16DitherYUY2Mod2X;
  1104         }
  1105         if (display->format->BytesPerPixel == 3) {
  1106             swdata->Display1X = Color24DitherYUY2Mod1X;
  1107             swdata->Display2X = Color24DitherYUY2Mod2X;
  1108         }
  1109         if (display->format->BytesPerPixel == 4) {
  1110             swdata->Display1X = Color32DitherYUY2Mod1X;
  1111             swdata->Display2X = Color32DitherYUY2Mod2X;
  1112         }
  1113         break;
  1114     default:
  1115         /* We should never get here (caught above) */
  1116         break;
  1117     }
  1118 
  1119     /* Find the pitch and offset values for the overlay */
  1120     overlay->pitches = swdata->pitches;
  1121     overlay->pixels = swdata->planes;
  1122     switch (format) {
  1123     case SDL_YV12_OVERLAY:
  1124     case SDL_IYUV_OVERLAY:
  1125         overlay->pitches[0] = overlay->w;
  1126         overlay->pitches[1] = overlay->pitches[0] / 2;
  1127         overlay->pitches[2] = overlay->pitches[0] / 2;
  1128         overlay->pixels[0] = swdata->pixels;
  1129         overlay->pixels[1] = overlay->pixels[0] +
  1130             overlay->pitches[0] * overlay->h;
  1131         overlay->pixels[2] = overlay->pixels[1] +
  1132             overlay->pitches[1] * overlay->h / 2;
  1133         overlay->planes = 3;
  1134         break;
  1135     case SDL_YUY2_OVERLAY:
  1136     case SDL_UYVY_OVERLAY:
  1137     case SDL_YVYU_OVERLAY:
  1138         overlay->pitches[0] = overlay->w * 2;
  1139         overlay->pixels[0] = swdata->pixels;
  1140         overlay->planes = 1;
  1141         break;
  1142     default:
  1143         /* We should never get here (caught above) */
  1144         break;
  1145     }
  1146 
  1147     /* We're all done.. */
  1148     return (overlay);
  1149 }
  1150 
  1151 int
  1152 SDL_LockYUV_SW(_THIS, SDL_Overlay * overlay)
  1153 {
  1154     return (0);
  1155 }
  1156 
  1157 void
  1158 SDL_UnlockYUV_SW(_THIS, SDL_Overlay * overlay)
  1159 {
  1160     return;
  1161 }
  1162 
  1163 int
  1164 SDL_DisplayYUV_SW(_THIS, SDL_Overlay * overlay, SDL_Rect * src,
  1165                   SDL_Rect * dst)
  1166 {
  1167     struct private_yuvhwdata *swdata;
  1168     int stretch;
  1169     int scale_2x;
  1170     SDL_Surface *display;
  1171     Uint8 *lum, *Cr, *Cb;
  1172     Uint8 *dstp;
  1173     int mod;
  1174 
  1175     swdata = overlay->hwdata;
  1176     stretch = 0;
  1177     scale_2x = 0;
  1178     if (src->x || src->y || src->w < overlay->w || src->h < overlay->h) {
  1179         /* The source rectangle has been clipped.
  1180            Using a scratch surface is easier than adding clipped
  1181            source support to all the blitters, plus that would
  1182            slow them down in the general unclipped case.
  1183          */
  1184         stretch = 1;
  1185     } else if ((src->w != dst->w) || (src->h != dst->h)) {
  1186         if ((dst->w == 2 * src->w) && (dst->h == 2 * src->h)) {
  1187             scale_2x = 1;
  1188         } else {
  1189             stretch = 1;
  1190         }
  1191     }
  1192     if (stretch) {
  1193         if (!swdata->stretch) {
  1194             display = swdata->display;
  1195             swdata->stretch = SDL_CreateRGBSurface(0,
  1196                                                    overlay->w,
  1197                                                    overlay->h,
  1198                                                    display->format->
  1199                                                    BitsPerPixel,
  1200                                                    display->format->
  1201                                                    Rmask,
  1202                                                    display->format->
  1203                                                    Gmask,
  1204                                                    display->format->Bmask, 0);
  1205             if (!swdata->stretch) {
  1206                 return (-1);
  1207             }
  1208         }
  1209         display = swdata->stretch;
  1210     } else {
  1211         display = swdata->display;
  1212     }
  1213     switch (overlay->format) {
  1214     case SDL_YV12_OVERLAY:
  1215         lum = overlay->pixels[0];
  1216         Cr = overlay->pixels[1];
  1217         Cb = overlay->pixels[2];
  1218         break;
  1219     case SDL_IYUV_OVERLAY:
  1220         lum = overlay->pixels[0];
  1221         Cr = overlay->pixels[2];
  1222         Cb = overlay->pixels[1];
  1223         break;
  1224     case SDL_YUY2_OVERLAY:
  1225         lum = overlay->pixels[0];
  1226         Cr = lum + 3;
  1227         Cb = lum + 1;
  1228         break;
  1229     case SDL_UYVY_OVERLAY:
  1230         lum = overlay->pixels[0] + 1;
  1231         Cr = lum + 1;
  1232         Cb = lum - 1;
  1233         break;
  1234     case SDL_YVYU_OVERLAY:
  1235         lum = overlay->pixels[0];
  1236         Cr = lum + 1;
  1237         Cb = lum + 3;
  1238         break;
  1239     default:
  1240         SDL_SetError("Unsupported YUV format in blit");
  1241         return (-1);
  1242     }
  1243     if (SDL_MUSTLOCK(display)) {
  1244         if (SDL_LockSurface(display) < 0) {
  1245             return (-1);
  1246         }
  1247     }
  1248     if (stretch) {
  1249         dstp = (Uint8 *) swdata->stretch->pixels;
  1250     } else {
  1251         dstp = (Uint8 *) display->pixels
  1252             + dst->x * display->format->BytesPerPixel
  1253             + dst->y * display->pitch;
  1254     }
  1255     mod = (display->pitch / display->format->BytesPerPixel);
  1256 
  1257     if (scale_2x) {
  1258         mod -= (overlay->w * 2);
  1259         swdata->Display2X(swdata->colortab, swdata->rgb_2_pix,
  1260                           lum, Cr, Cb, dstp, overlay->h, overlay->w, mod);
  1261     } else {
  1262         mod -= overlay->w;
  1263         swdata->Display1X(swdata->colortab, swdata->rgb_2_pix,
  1264                           lum, Cr, Cb, dstp, overlay->h, overlay->w, mod);
  1265     }
  1266     if (SDL_MUSTLOCK(display)) {
  1267         SDL_UnlockSurface(display);
  1268     }
  1269     if (stretch) {
  1270         display = swdata->display;
  1271         SDL_SoftStretch(swdata->stretch, src, display, dst);
  1272     }
  1273     SDL_UpdateRects(display, 1, dst);
  1274 
  1275     return (0);
  1276 }
  1277 
  1278 void
  1279 SDL_FreeYUV_SW(_THIS, SDL_Overlay * overlay)
  1280 {
  1281     struct private_yuvhwdata *swdata;
  1282 
  1283     swdata = overlay->hwdata;
  1284     if (swdata) {
  1285         if (swdata->stretch) {
  1286             SDL_FreeSurface(swdata->stretch);
  1287         }
  1288         if (swdata->pixels) {
  1289             SDL_free(swdata->pixels);
  1290         }
  1291         if (swdata->colortab) {
  1292             SDL_free(swdata->colortab);
  1293         }
  1294         if (swdata->rgb_2_pix) {
  1295             SDL_free(swdata->rgb_2_pix);
  1296         }
  1297         SDL_free(swdata);
  1298     }
  1299 }
  1300 
  1301 /* vi: set ts=4 sw=4 expandtab: */