src/video/SDL_yuv_sw.c
author Sam Lantinga <slouken@libsdl.org>
Tue, 07 Feb 2006 06:59:48 +0000
changeset 1336 3692456e7b0f
parent 1330 450721ad5436
child 1338 604d73db6802
permissions -rw-r--r--
Use SDL_ prefixed versions of C library functions.
FIXME:
Change #include <stdlib.h> to #include "SDL_stdlib.h"
Change #include <string.h> to #include "SDL_string.h"
Make sure nothing else broke because of this...
     1 /*
     2     SDL - Simple DirectMedia Layer
     3     Copyright (C) 1997-2006 Sam Lantinga
     4 
     5     This library is free software; you can redistribute it and/or
     6     modify it under the terms of the GNU Lesser General Public
     7     License as published by the Free Software Foundation; either
     8     version 2.1 of the License, or (at your option) any later version.
     9 
    10     This library is distributed in the hope that it will be useful,
    11     but WITHOUT ANY WARRANTY; without even the implied warranty of
    12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    13     Lesser General Public License for more details.
    14 
    15     You should have received a copy of the GNU Lesser General Public
    16     License along with this library; if not, write to the Free Software
    17     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
    18 
    19     Sam Lantinga
    20     slouken@libsdl.org
    21 */
    22 
    23 /* This is the software implementation of the YUV video overlay support */
    24 
    25 /* This code was derived from code carrying the following copyright notices:
    26 
    27  * Copyright (c) 1995 The Regents of the University of California.
    28  * All rights reserved.
    29  * 
    30  * Permission to use, copy, modify, and distribute this software and its
    31  * documentation for any purpose, without fee, and without written agreement is
    32  * hereby granted, provided that the above copyright notice and the following
    33  * two paragraphs appear in all copies of this software.
    34  * 
    35  * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR
    36  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
    37  * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF
    38  * CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    39  * 
    40  * THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES,
    41  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
    42  * AND FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
    43  * ON AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATION TO
    44  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
    45 
    46  * Copyright (c) 1995 Erik Corry
    47  * All rights reserved.
    48  * 
    49  * Permission to use, copy, modify, and distribute this software and its
    50  * documentation for any purpose, without fee, and without written agreement is
    51  * hereby granted, provided that the above copyright notice and the following
    52  * two paragraphs appear in all copies of this software.
    53  * 
    54  * IN NO EVENT SHALL ERIK CORRY BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
    55  * SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OF
    56  * THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF ERIK CORRY HAS BEEN ADVISED
    57  * OF THE POSSIBILITY OF SUCH DAMAGE.
    58  * 
    59  * ERIK CORRY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
    60  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
    61  * PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
    62  * BASIS, AND ERIK CORRY HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT,
    63  * UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
    64 
    65  * Portions of this software Copyright (c) 1995 Brown University.
    66  * All rights reserved.
    67  * 
    68  * Permission to use, copy, modify, and distribute this software and its
    69  * documentation for any purpose, without fee, and without written agreement
    70  * is hereby granted, provided that the above copyright notice and the
    71  * following two paragraphs appear in all copies of this software.
    72  * 
    73  * IN NO EVENT SHALL BROWN UNIVERSITY BE LIABLE TO ANY PARTY FOR
    74  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
    75  * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF BROWN
    76  * UNIVERSITY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    77  * 
    78  * BROWN UNIVERSITY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
    79  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
    80  * PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
    81  * BASIS, AND BROWN UNIVERSITY HAS NO OBLIGATION TO PROVIDE MAINTENANCE,
    82  * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
    83  */
    84 
    85 #include "SDL_error.h"
    86 #include "SDL_video.h"
    87 #include "SDL_cpuinfo.h"
    88 #include "SDL_stdlib.h"
    89 #include "SDL_string.h"
    90 #include "SDL_stretch_c.h"
    91 #include "SDL_yuvfuncs.h"
    92 #include "SDL_yuv_sw_c.h"
    93 
    94 /* The functions used to manipulate software video overlays */
    95 static struct private_yuvhwfuncs sw_yuvfuncs = {
    96 	SDL_LockYUV_SW,
    97 	SDL_UnlockYUV_SW,
    98 	SDL_DisplayYUV_SW,
    99 	SDL_FreeYUV_SW
   100 };
   101 
   102 /* RGB conversion lookup tables */
   103 struct private_yuvhwdata {
   104 	SDL_Surface *stretch;
   105 	SDL_Surface *display;
   106 	Uint8 *pixels;
   107 	int *colortab;
   108 	Uint32 *rgb_2_pix;
   109 	void (*Display1X)(int *colortab, Uint32 *rgb_2_pix,
   110                           unsigned char *lum, unsigned char *cr,
   111                           unsigned char *cb, unsigned char *out,
   112                           int rows, int cols, int mod );
   113 	void (*Display2X)(int *colortab, Uint32 *rgb_2_pix,
   114 	                  unsigned char *lum, unsigned char *cr,
   115                           unsigned char *cb, unsigned char *out,
   116                           int rows, int cols, int mod );
   117 
   118 	/* These are just so we don't have to allocate them separately */
   119 	Uint16 pitches[3];
   120 	Uint8 *planes[3];
   121 };
   122 
   123 
   124 /* The colorspace conversion functions */
   125 
   126 extern void Color565DitherYV12MMX1X( int *colortab, Uint32 *rgb_2_pix,
   127                                      unsigned char *lum, unsigned char *cr,
   128                                      unsigned char *cb, unsigned char *out,
   129                                      int rows, int cols, int mod );
   130 extern void ColorRGBDitherYV12MMX1X( int *colortab, Uint32 *rgb_2_pix,
   131                                      unsigned char *lum, unsigned char *cr,
   132                                      unsigned char *cb, unsigned char *out,
   133                                      int rows, int cols, int mod );
   134 
   135 static void Color16DitherYV12Mod1X( int *colortab, Uint32 *rgb_2_pix,
   136                                     unsigned char *lum, unsigned char *cr,
   137                                     unsigned char *cb, unsigned char *out,
   138                                     int rows, int cols, int mod )
   139 {
   140     unsigned short* row1;
   141     unsigned short* row2;
   142     unsigned char* lum2;
   143     int x, y;
   144     int cr_r;
   145     int crb_g;
   146     int cb_b;
   147     int cols_2 = cols / 2;
   148 
   149     row1 = (unsigned short*) out;
   150     row2 = row1 + cols + mod;
   151     lum2 = lum + cols;
   152 
   153     mod += cols + mod;
   154 
   155     y = rows / 2;
   156     while( y-- )
   157     {
   158         x = cols_2;
   159         while( x-- )
   160         {
   161             register int L;
   162 
   163             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   164             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   165                                + colortab[ *cb + 2*256 ];
   166             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   167             ++cr; ++cb;
   168 
   169             L = *lum++;
   170             *row1++ = (rgb_2_pix[ L + cr_r ] |
   171                        rgb_2_pix[ L + crb_g ] |
   172                        rgb_2_pix[ L + cb_b ]);
   173 
   174             L = *lum++;
   175             *row1++ = (rgb_2_pix[ L + cr_r ] |
   176                        rgb_2_pix[ L + crb_g ] |
   177                        rgb_2_pix[ L + cb_b ]);
   178 
   179 
   180             /* Now, do second row.  */
   181 
   182             L = *lum2++;
   183             *row2++ = (rgb_2_pix[ L + cr_r ] |
   184                        rgb_2_pix[ L + crb_g ] |
   185                        rgb_2_pix[ L + cb_b ]);
   186 
   187             L = *lum2++;
   188             *row2++ = (rgb_2_pix[ L + cr_r ] |
   189                        rgb_2_pix[ L + crb_g ] |
   190                        rgb_2_pix[ L + cb_b ]);
   191         }
   192 
   193         /*
   194          * These values are at the start of the next line, (due
   195          * to the ++'s above),but they need to be at the start
   196          * of the line after that.
   197          */
   198         lum  += cols;
   199         lum2 += cols;
   200         row1 += mod;
   201         row2 += mod;
   202     }
   203 }
   204 
   205 static void Color24DitherYV12Mod1X( int *colortab, Uint32 *rgb_2_pix,
   206                                     unsigned char *lum, unsigned char *cr,
   207                                     unsigned char *cb, unsigned char *out,
   208                                     int rows, int cols, int mod )
   209 {
   210     unsigned int value;
   211     unsigned char* row1;
   212     unsigned char* row2;
   213     unsigned char* lum2;
   214     int x, y;
   215     int cr_r;
   216     int crb_g;
   217     int cb_b;
   218     int cols_2 = cols / 2;
   219 
   220     row1 = out;
   221     row2 = row1 + cols*3 + mod*3;
   222     lum2 = lum + cols;
   223 
   224     mod += cols + mod;
   225     mod *= 3;
   226 
   227     y = rows / 2;
   228     while( y-- )
   229     {
   230         x = cols_2;
   231         while( x-- )
   232         {
   233             register int L;
   234 
   235             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   236             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   237                                + colortab[ *cb + 2*256 ];
   238             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   239             ++cr; ++cb;
   240 
   241             L = *lum++;
   242             value = (rgb_2_pix[ L + cr_r ] |
   243                      rgb_2_pix[ L + crb_g ] |
   244                      rgb_2_pix[ L + cb_b ]);
   245             *row1++ = (value      ) & 0xFF;
   246             *row1++ = (value >>  8) & 0xFF;
   247             *row1++ = (value >> 16) & 0xFF;
   248 
   249             L = *lum++;
   250             value = (rgb_2_pix[ L + cr_r ] |
   251                      rgb_2_pix[ L + crb_g ] |
   252                      rgb_2_pix[ L + cb_b ]);
   253             *row1++ = (value      ) & 0xFF;
   254             *row1++ = (value >>  8) & 0xFF;
   255             *row1++ = (value >> 16) & 0xFF;
   256 
   257 
   258             /* Now, do second row.  */
   259 
   260             L = *lum2++;
   261             value = (rgb_2_pix[ L + cr_r ] |
   262                      rgb_2_pix[ L + crb_g ] |
   263                      rgb_2_pix[ L + cb_b ]);
   264             *row2++ = (value      ) & 0xFF;
   265             *row2++ = (value >>  8) & 0xFF;
   266             *row2++ = (value >> 16) & 0xFF;
   267 
   268             L = *lum2++;
   269             value = (rgb_2_pix[ L + cr_r ] |
   270                      rgb_2_pix[ L + crb_g ] |
   271                      rgb_2_pix[ L + cb_b ]);
   272             *row2++ = (value      ) & 0xFF;
   273             *row2++ = (value >>  8) & 0xFF;
   274             *row2++ = (value >> 16) & 0xFF;
   275         }
   276 
   277         /*
   278          * These values are at the start of the next line, (due
   279          * to the ++'s above),but they need to be at the start
   280          * of the line after that.
   281          */
   282         lum  += cols;
   283         lum2 += cols;
   284         row1 += mod;
   285         row2 += mod;
   286     }
   287 }
   288 
   289 static void Color32DitherYV12Mod1X( int *colortab, Uint32 *rgb_2_pix,
   290                                     unsigned char *lum, unsigned char *cr,
   291                                     unsigned char *cb, unsigned char *out,
   292                                     int rows, int cols, int mod )
   293 {
   294     unsigned int* row1;
   295     unsigned int* row2;
   296     unsigned char* lum2;
   297     int x, y;
   298     int cr_r;
   299     int crb_g;
   300     int cb_b;
   301     int cols_2 = cols / 2;
   302 
   303     row1 = (unsigned int*) out;
   304     row2 = row1 + cols + mod;
   305     lum2 = lum + cols;
   306 
   307     mod += cols + mod;
   308 
   309     y = rows / 2;
   310     while( y-- )
   311     {
   312         x = cols_2;
   313         while( x-- )
   314         {
   315             register int L;
   316 
   317             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   318             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   319                                + colortab[ *cb + 2*256 ];
   320             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   321             ++cr; ++cb;
   322 
   323             L = *lum++;
   324             *row1++ = (rgb_2_pix[ L + cr_r ] |
   325                        rgb_2_pix[ L + crb_g ] |
   326                        rgb_2_pix[ L + cb_b ]);
   327 
   328             L = *lum++;
   329             *row1++ = (rgb_2_pix[ L + cr_r ] |
   330                        rgb_2_pix[ L + crb_g ] |
   331                        rgb_2_pix[ L + cb_b ]);
   332 
   333 
   334             /* Now, do second row.  */
   335 
   336             L = *lum2++;
   337             *row2++ = (rgb_2_pix[ L + cr_r ] |
   338                        rgb_2_pix[ L + crb_g ] |
   339                        rgb_2_pix[ L + cb_b ]);
   340 
   341             L = *lum2++;
   342             *row2++ = (rgb_2_pix[ L + cr_r ] |
   343                        rgb_2_pix[ L + crb_g ] |
   344                        rgb_2_pix[ L + cb_b ]);
   345         }
   346 
   347         /*
   348          * These values are at the start of the next line, (due
   349          * to the ++'s above),but they need to be at the start
   350          * of the line after that.
   351          */
   352         lum  += cols;
   353         lum2 += cols;
   354         row1 += mod;
   355         row2 += mod;
   356     }
   357 }
   358 
   359 /*
   360  * In this function I make use of a nasty trick. The tables have the lower
   361  * 16 bits replicated in the upper 16. This means I can write ints and get
   362  * the horisontal doubling for free (almost).
   363  */
   364 static void Color16DitherYV12Mod2X( int *colortab, Uint32 *rgb_2_pix,
   365                                     unsigned char *lum, unsigned char *cr,
   366                                     unsigned char *cb, unsigned char *out,
   367                                     int rows, int cols, int mod )
   368 {
   369     unsigned int* row1 = (unsigned int*) out;
   370     const int next_row = cols+(mod/2);
   371     unsigned int* row2 = row1 + 2*next_row;
   372     unsigned char* lum2;
   373     int x, y;
   374     int cr_r;
   375     int crb_g;
   376     int cb_b;
   377     int cols_2 = cols / 2;
   378 
   379     lum2 = lum + cols;
   380 
   381     mod = (next_row * 3) + (mod/2);
   382 
   383     y = rows / 2;
   384     while( y-- )
   385     {
   386         x = cols_2;
   387         while( x-- )
   388         {
   389             register int L;
   390 
   391             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   392             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   393                                + colortab[ *cb + 2*256 ];
   394             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   395             ++cr; ++cb;
   396 
   397             L = *lum++;
   398             row1[0] = row1[next_row] = (rgb_2_pix[ L + cr_r ] |
   399                                         rgb_2_pix[ L + crb_g ] |
   400                                         rgb_2_pix[ L + cb_b ]);
   401             row1++;
   402 
   403             L = *lum++;
   404             row1[0] = row1[next_row] = (rgb_2_pix[ L + cr_r ] |
   405                                         rgb_2_pix[ L + crb_g ] |
   406                                         rgb_2_pix[ L + cb_b ]);
   407             row1++;
   408 
   409 
   410             /* Now, do second row. */
   411 
   412             L = *lum2++;
   413             row2[0] = row2[next_row] = (rgb_2_pix[ L + cr_r ] |
   414                                         rgb_2_pix[ L + crb_g ] |
   415                                         rgb_2_pix[ L + cb_b ]);
   416             row2++;
   417 
   418             L = *lum2++;
   419             row2[0] = row2[next_row] = (rgb_2_pix[ L + cr_r ] |
   420                                         rgb_2_pix[ L + crb_g ] |
   421                                         rgb_2_pix[ L + cb_b ]);
   422             row2++;
   423         }
   424 
   425         /*
   426          * These values are at the start of the next line, (due
   427          * to the ++'s above),but they need to be at the start
   428          * of the line after that.
   429          */
   430         lum  += cols;
   431         lum2 += cols;
   432         row1 += mod;
   433         row2 += mod;
   434     }
   435 }
   436 
   437 static void Color24DitherYV12Mod2X( int *colortab, Uint32 *rgb_2_pix,
   438                                     unsigned char *lum, unsigned char *cr,
   439                                     unsigned char *cb, unsigned char *out,
   440                                     int rows, int cols, int mod )
   441 {
   442     unsigned int value;
   443     unsigned char* row1 = out;
   444     const int next_row = (cols*2 + mod) * 3;
   445     unsigned char* row2 = row1 + 2*next_row;
   446     unsigned char* lum2;
   447     int x, y;
   448     int cr_r;
   449     int crb_g;
   450     int cb_b;
   451     int cols_2 = cols / 2;
   452 
   453     lum2 = lum + cols;
   454 
   455     mod = next_row*3 + mod*3;
   456 
   457     y = rows / 2;
   458     while( y-- )
   459     {
   460         x = cols_2;
   461         while( x-- )
   462         {
   463             register int L;
   464 
   465             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   466             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   467                                + colortab[ *cb + 2*256 ];
   468             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   469             ++cr; ++cb;
   470 
   471             L = *lum++;
   472             value = (rgb_2_pix[ L + cr_r ] |
   473                      rgb_2_pix[ L + crb_g ] |
   474                      rgb_2_pix[ L + cb_b ]);
   475             row1[0+0] = row1[3+0] = row1[next_row+0] = row1[next_row+3+0] =
   476                      (value      ) & 0xFF;
   477             row1[0+1] = row1[3+1] = row1[next_row+1] = row1[next_row+3+1] =
   478                      (value >>  8) & 0xFF;
   479             row1[0+2] = row1[3+2] = row1[next_row+2] = row1[next_row+3+2] =
   480                      (value >> 16) & 0xFF;
   481             row1 += 2*3;
   482 
   483             L = *lum++;
   484             value = (rgb_2_pix[ L + cr_r ] |
   485                      rgb_2_pix[ L + crb_g ] |
   486                      rgb_2_pix[ L + cb_b ]);
   487             row1[0+0] = row1[3+0] = row1[next_row+0] = row1[next_row+3+0] =
   488                      (value      ) & 0xFF;
   489             row1[0+1] = row1[3+1] = row1[next_row+1] = row1[next_row+3+1] =
   490                      (value >>  8) & 0xFF;
   491             row1[0+2] = row1[3+2] = row1[next_row+2] = row1[next_row+3+2] =
   492                      (value >> 16) & 0xFF;
   493             row1 += 2*3;
   494 
   495 
   496             /* Now, do second row. */
   497 
   498             L = *lum2++;
   499             value = (rgb_2_pix[ L + cr_r ] |
   500                      rgb_2_pix[ L + crb_g ] |
   501                      rgb_2_pix[ L + cb_b ]);
   502             row2[0+0] = row2[3+0] = row2[next_row+0] = row2[next_row+3+0] =
   503                      (value      ) & 0xFF;
   504             row2[0+1] = row2[3+1] = row2[next_row+1] = row2[next_row+3+1] =
   505                      (value >>  8) & 0xFF;
   506             row2[0+2] = row2[3+2] = row2[next_row+2] = row2[next_row+3+2] =
   507                      (value >> 16) & 0xFF;
   508             row2 += 2*3;
   509 
   510             L = *lum2++;
   511             value = (rgb_2_pix[ L + cr_r ] |
   512                      rgb_2_pix[ L + crb_g ] |
   513                      rgb_2_pix[ L + cb_b ]);
   514             row2[0+0] = row2[3+0] = row2[next_row+0] = row2[next_row+3+0] =
   515                      (value      ) & 0xFF;
   516             row2[0+1] = row2[3+1] = row2[next_row+1] = row2[next_row+3+1] =
   517                      (value >>  8) & 0xFF;
   518             row2[0+2] = row2[3+2] = row2[next_row+2] = row2[next_row+3+2] =
   519                      (value >> 16) & 0xFF;
   520             row2 += 2*3;
   521         }
   522 
   523         /*
   524          * These values are at the start of the next line, (due
   525          * to the ++'s above),but they need to be at the start
   526          * of the line after that.
   527          */
   528         lum  += cols;
   529         lum2 += cols;
   530         row1 += mod;
   531         row2 += mod;
   532     }
   533 }
   534 
   535 static void Color32DitherYV12Mod2X( int *colortab, Uint32 *rgb_2_pix,
   536                                     unsigned char *lum, unsigned char *cr,
   537                                     unsigned char *cb, unsigned char *out,
   538                                     int rows, int cols, int mod )
   539 {
   540     unsigned int* row1 = (unsigned int*) out;
   541     const int next_row = cols*2+mod;
   542     unsigned int* row2 = row1 + 2*next_row;
   543     unsigned char* lum2;
   544     int x, y;
   545     int cr_r;
   546     int crb_g;
   547     int cb_b;
   548     int cols_2 = cols / 2;
   549 
   550     lum2 = lum + cols;
   551 
   552     mod = (next_row * 3) + mod;
   553 
   554     y = rows / 2;
   555     while( y-- )
   556     {
   557         x = cols_2;
   558         while( x-- )
   559         {
   560             register int L;
   561 
   562             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   563             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   564                                + colortab[ *cb + 2*256 ];
   565             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   566             ++cr; ++cb;
   567 
   568             L = *lum++;
   569             row1[0] = row1[1] = row1[next_row] = row1[next_row+1] =
   570                                        (rgb_2_pix[ L + cr_r ] |
   571                                         rgb_2_pix[ L + crb_g ] |
   572                                         rgb_2_pix[ L + cb_b ]);
   573             row1 += 2;
   574 
   575             L = *lum++;
   576             row1[0] = row1[1] = row1[next_row] = row1[next_row+1] =
   577                                        (rgb_2_pix[ L + cr_r ] |
   578                                         rgb_2_pix[ L + crb_g ] |
   579                                         rgb_2_pix[ L + cb_b ]);
   580             row1 += 2;
   581 
   582 
   583             /* Now, do second row. */
   584 
   585             L = *lum2++;
   586             row2[0] = row2[1] = row2[next_row] = row2[next_row+1] =
   587                                        (rgb_2_pix[ L + cr_r ] |
   588                                         rgb_2_pix[ L + crb_g ] |
   589                                         rgb_2_pix[ L + cb_b ]);
   590             row2 += 2;
   591 
   592             L = *lum2++;
   593             row2[0] = row2[1] = row2[next_row] = row2[next_row+1] =
   594                                        (rgb_2_pix[ L + cr_r ] |
   595                                         rgb_2_pix[ L + crb_g ] |
   596                                         rgb_2_pix[ L + cb_b ]);
   597             row2 += 2;
   598         }
   599 
   600         /*
   601          * These values are at the start of the next line, (due
   602          * to the ++'s above),but they need to be at the start
   603          * of the line after that.
   604          */
   605         lum  += cols;
   606         lum2 += cols;
   607         row1 += mod;
   608         row2 += mod;
   609     }
   610 }
   611 
   612 static void Color16DitherYUY2Mod1X( int *colortab, Uint32 *rgb_2_pix,
   613                                     unsigned char *lum, unsigned char *cr,
   614                                     unsigned char *cb, unsigned char *out,
   615                                     int rows, int cols, int mod )
   616 {
   617     unsigned short* row;
   618     int x, y;
   619     int cr_r;
   620     int crb_g;
   621     int cb_b;
   622     int cols_2 = cols / 2;
   623 
   624     row = (unsigned short*) out;
   625 
   626     y = rows;
   627     while( y-- )
   628     {
   629         x = cols_2;
   630         while( x-- )
   631         {
   632             register int L;
   633 
   634             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   635             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   636                                + colortab[ *cb + 2*256 ];
   637             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   638             cr += 4; cb += 4;
   639 
   640             L = *lum; lum += 2;
   641             *row++ = (rgb_2_pix[ L + cr_r ] |
   642                        rgb_2_pix[ L + crb_g ] |
   643                        rgb_2_pix[ L + cb_b ]);
   644 
   645             L = *lum; lum += 2;
   646             *row++ = (rgb_2_pix[ L + cr_r ] |
   647                        rgb_2_pix[ L + crb_g ] |
   648                        rgb_2_pix[ L + cb_b ]);
   649 
   650         }
   651 
   652         row += mod;
   653     }
   654 }
   655 
   656 static void Color24DitherYUY2Mod1X( int *colortab, Uint32 *rgb_2_pix,
   657                                     unsigned char *lum, unsigned char *cr,
   658                                     unsigned char *cb, unsigned char *out,
   659                                     int rows, int cols, int mod )
   660 {
   661     unsigned int value;
   662     unsigned char* row;
   663     int x, y;
   664     int cr_r;
   665     int crb_g;
   666     int cb_b;
   667     int cols_2 = cols / 2;
   668 
   669     row = (unsigned char*) out;
   670     mod *= 3;
   671     y = rows;
   672     while( y-- )
   673     {
   674         x = cols_2;
   675         while( x-- )
   676         {
   677             register int L;
   678 
   679             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   680             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   681                                + colortab[ *cb + 2*256 ];
   682             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   683             cr += 4; cb += 4;
   684 
   685             L = *lum; lum += 2;
   686             value = (rgb_2_pix[ L + cr_r ] |
   687                      rgb_2_pix[ L + crb_g ] |
   688                      rgb_2_pix[ L + cb_b ]);
   689             *row++ = (value      ) & 0xFF;
   690             *row++ = (value >>  8) & 0xFF;
   691             *row++ = (value >> 16) & 0xFF;
   692 
   693             L = *lum; lum += 2;
   694             value = (rgb_2_pix[ L + cr_r ] |
   695                      rgb_2_pix[ L + crb_g ] |
   696                      rgb_2_pix[ L + cb_b ]);
   697             *row++ = (value      ) & 0xFF;
   698             *row++ = (value >>  8) & 0xFF;
   699             *row++ = (value >> 16) & 0xFF;
   700 
   701         }
   702         row += mod;
   703     }
   704 }
   705 
   706 static void Color32DitherYUY2Mod1X( int *colortab, Uint32 *rgb_2_pix,
   707                                     unsigned char *lum, unsigned char *cr,
   708                                     unsigned char *cb, unsigned char *out,
   709                                     int rows, int cols, int mod )
   710 {
   711     unsigned int* row;
   712     int x, y;
   713     int cr_r;
   714     int crb_g;
   715     int cb_b;
   716     int cols_2 = cols / 2;
   717 
   718     row = (unsigned int*) out;
   719     y = rows;
   720     while( y-- )
   721     {
   722         x = cols_2;
   723         while( x-- )
   724         {
   725             register int L;
   726 
   727             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   728             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   729                                + colortab[ *cb + 2*256 ];
   730             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   731             cr += 4; cb += 4;
   732 
   733             L = *lum; lum += 2;
   734             *row++ = (rgb_2_pix[ L + cr_r ] |
   735                        rgb_2_pix[ L + crb_g ] |
   736                        rgb_2_pix[ L + cb_b ]);
   737 
   738             L = *lum; lum += 2;
   739             *row++ = (rgb_2_pix[ L + cr_r ] |
   740                        rgb_2_pix[ L + crb_g ] |
   741                        rgb_2_pix[ L + cb_b ]);
   742 
   743 
   744         }
   745         row += mod;
   746     }
   747 }
   748 
   749 /*
   750  * In this function I make use of a nasty trick. The tables have the lower
   751  * 16 bits replicated in the upper 16. This means I can write ints and get
   752  * the horisontal doubling for free (almost).
   753  */
   754 static void Color16DitherYUY2Mod2X( int *colortab, Uint32 *rgb_2_pix,
   755                                     unsigned char *lum, unsigned char *cr,
   756                                     unsigned char *cb, unsigned char *out,
   757                                     int rows, int cols, int mod )
   758 {
   759     unsigned int* row = (unsigned int*) out;
   760     const int next_row = cols+(mod/2);
   761     int x, y;
   762     int cr_r;
   763     int crb_g;
   764     int cb_b;
   765     int cols_2 = cols / 2;
   766 
   767     y = rows;
   768     while( y-- )
   769     {
   770         x = cols_2;
   771         while( x-- )
   772         {
   773             register int L;
   774 
   775             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   776             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   777                                + colortab[ *cb + 2*256 ];
   778             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   779             cr += 4; cb += 4;
   780 
   781             L = *lum; lum += 2;
   782             row[0] = row[next_row] = (rgb_2_pix[ L + cr_r ] |
   783                                         rgb_2_pix[ L + crb_g ] |
   784                                         rgb_2_pix[ L + cb_b ]);
   785             row++;
   786 
   787             L = *lum; lum += 2;
   788             row[0] = row[next_row] = (rgb_2_pix[ L + cr_r ] |
   789                                         rgb_2_pix[ L + crb_g ] |
   790                                         rgb_2_pix[ L + cb_b ]);
   791             row++;
   792 
   793         }
   794         row += next_row;
   795     }
   796 }
   797 
   798 static void Color24DitherYUY2Mod2X( int *colortab, Uint32 *rgb_2_pix,
   799                                     unsigned char *lum, unsigned char *cr,
   800                                     unsigned char *cb, unsigned char *out,
   801                                     int rows, int cols, int mod )
   802 {
   803     unsigned int value;
   804     unsigned char* row = out;
   805     const int next_row = (cols*2 + mod) * 3;
   806     int x, y;
   807     int cr_r;
   808     int crb_g;
   809     int cb_b;
   810     int cols_2 = cols / 2;
   811     y = rows;
   812     while( y-- )
   813     {
   814         x = cols_2;
   815         while( x-- )
   816         {
   817             register int L;
   818 
   819             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   820             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   821                                + colortab[ *cb + 2*256 ];
   822             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   823             cr += 4; cb += 4;
   824 
   825             L = *lum; lum += 2;
   826             value = (rgb_2_pix[ L + cr_r ] |
   827                      rgb_2_pix[ L + crb_g ] |
   828                      rgb_2_pix[ L + cb_b ]);
   829             row[0+0] = row[3+0] = row[next_row+0] = row[next_row+3+0] =
   830                      (value      ) & 0xFF;
   831             row[0+1] = row[3+1] = row[next_row+1] = row[next_row+3+1] =
   832                      (value >>  8) & 0xFF;
   833             row[0+2] = row[3+2] = row[next_row+2] = row[next_row+3+2] =
   834                      (value >> 16) & 0xFF;
   835             row += 2*3;
   836 
   837             L = *lum; lum += 2;
   838             value = (rgb_2_pix[ L + cr_r ] |
   839                      rgb_2_pix[ L + crb_g ] |
   840                      rgb_2_pix[ L + cb_b ]);
   841             row[0+0] = row[3+0] = row[next_row+0] = row[next_row+3+0] =
   842                      (value      ) & 0xFF;
   843             row[0+1] = row[3+1] = row[next_row+1] = row[next_row+3+1] =
   844                      (value >>  8) & 0xFF;
   845             row[0+2] = row[3+2] = row[next_row+2] = row[next_row+3+2] =
   846                      (value >> 16) & 0xFF;
   847             row += 2*3;
   848 
   849         }
   850         row += next_row;
   851     }
   852 }
   853 
   854 static void Color32DitherYUY2Mod2X( int *colortab, Uint32 *rgb_2_pix,
   855                                     unsigned char *lum, unsigned char *cr,
   856                                     unsigned char *cb, unsigned char *out,
   857                                     int rows, int cols, int mod )
   858 {
   859     unsigned int* row = (unsigned int*) out;
   860     const int next_row = cols*2+mod;
   861     int x, y;
   862     int cr_r;
   863     int crb_g;
   864     int cb_b;
   865     int cols_2 = cols / 2;
   866     mod+=mod;
   867     y = rows;
   868     while( y-- )
   869     {
   870         x = cols_2;
   871         while( x-- )
   872         {
   873             register int L;
   874 
   875             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   876             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   877                                + colortab[ *cb + 2*256 ];
   878             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   879             cr += 4; cb += 4;
   880 
   881             L = *lum; lum += 2;
   882             row[0] = row[1] = row[next_row] = row[next_row+1] =
   883                                        (rgb_2_pix[ L + cr_r ] |
   884                                         rgb_2_pix[ L + crb_g ] |
   885                                         rgb_2_pix[ L + cb_b ]);
   886             row += 2;
   887 
   888             L = *lum; lum += 2;
   889             row[0] = row[1] = row[next_row] = row[next_row+1] =
   890                                        (rgb_2_pix[ L + cr_r ] |
   891                                         rgb_2_pix[ L + crb_g ] |
   892                                         rgb_2_pix[ L + cb_b ]);
   893             row += 2;
   894 
   895 
   896         }
   897 
   898         row += next_row;
   899     }
   900 }
   901 
   902 /*
   903  * How many 1 bits are there in the Uint32.
   904  * Low performance, do not call often.
   905  */
   906 static int number_of_bits_set( Uint32 a )
   907 {
   908     if(!a) return 0;
   909     if(a & 1) return 1 + number_of_bits_set(a >> 1);
   910     return(number_of_bits_set(a >> 1));
   911 }
   912 
   913 /*
   914  * How many 0 bits are there at least significant end of Uint32.
   915  * Low performance, do not call often.
   916  */
   917 static int free_bits_at_bottom( Uint32 a )
   918 {
   919       /* assume char is 8 bits */
   920     if(!a) return sizeof(Uint32) * 8;
   921     if(((Sint32)a) & 1l) return 0;
   922     return 1 + free_bits_at_bottom ( a >> 1);
   923 }
   924 
   925 
   926 SDL_Overlay *SDL_CreateYUV_SW(_THIS, int width, int height, Uint32 format, SDL_Surface *display)
   927 {
   928 	SDL_Overlay *overlay;
   929 	struct private_yuvhwdata *swdata;
   930 	int *Cr_r_tab;
   931 	int *Cr_g_tab;
   932 	int *Cb_g_tab;
   933 	int *Cb_b_tab;
   934 	Uint32 *r_2_pix_alloc;
   935 	Uint32 *g_2_pix_alloc;
   936 	Uint32 *b_2_pix_alloc;
   937 	int i;
   938 	int CR, CB;
   939 	Uint32 Rmask, Gmask, Bmask;
   940 
   941 	/* Only RGB packed pixel conversion supported */
   942 	if ( (display->format->BytesPerPixel != 2) &&
   943 	     (display->format->BytesPerPixel != 3) &&
   944 	     (display->format->BytesPerPixel != 4) ) {
   945 		SDL_SetError("Can't use YUV data on non 16/24/32 bit surfaces");
   946 		return(NULL);
   947 	}
   948 
   949 	/* Verify that we support the format */
   950 	switch (format) {
   951 	    case SDL_YV12_OVERLAY:
   952 	    case SDL_IYUV_OVERLAY:
   953 	    case SDL_YUY2_OVERLAY:
   954 	    case SDL_UYVY_OVERLAY:
   955 	    case SDL_YVYU_OVERLAY:
   956 		break;
   957 	    default:
   958 		SDL_SetError("Unsupported YUV format");
   959 		return(NULL);
   960 	}
   961 
   962 	/* Create the overlay structure */
   963 	overlay = (SDL_Overlay *)SDL_malloc(sizeof *overlay);
   964 	if ( overlay == NULL ) {
   965 		SDL_OutOfMemory();
   966 		return(NULL);
   967 	}
   968 	SDL_memset(overlay, 0, (sizeof *overlay));
   969 
   970 	/* Fill in the basic members */
   971 	overlay->format = format;
   972 	overlay->w = width;
   973 	overlay->h = height;
   974 
   975 	/* Set up the YUV surface function structure */
   976 	overlay->hwfuncs = &sw_yuvfuncs;
   977 
   978 	/* Create the pixel data and lookup tables */
   979 	swdata = (struct private_yuvhwdata *)SDL_malloc(sizeof *swdata);
   980 	overlay->hwdata = swdata;
   981 	if ( swdata == NULL ) {
   982 		SDL_OutOfMemory();
   983 		SDL_FreeYUVOverlay(overlay);
   984 		return(NULL);
   985 	}
   986 	swdata->stretch = NULL;
   987 	swdata->display = display;
   988 	swdata->pixels = (Uint8 *) SDL_malloc(width*height*2);
   989 	swdata->colortab = (int *)SDL_malloc(4*256*sizeof(int));
   990 	Cr_r_tab = &swdata->colortab[0*256];
   991 	Cr_g_tab = &swdata->colortab[1*256];
   992 	Cb_g_tab = &swdata->colortab[2*256];
   993 	Cb_b_tab = &swdata->colortab[3*256];
   994 	swdata->rgb_2_pix = (Uint32 *)SDL_malloc(3*768*sizeof(Uint32));
   995 	r_2_pix_alloc = &swdata->rgb_2_pix[0*768];
   996 	g_2_pix_alloc = &swdata->rgb_2_pix[1*768];
   997 	b_2_pix_alloc = &swdata->rgb_2_pix[2*768];
   998 	if ( ! swdata->pixels || ! swdata->colortab || ! swdata->rgb_2_pix ) {
   999 		SDL_OutOfMemory();
  1000 		SDL_FreeYUVOverlay(overlay);
  1001 		return(NULL);
  1002 	}
  1003 
  1004 	/* Generate the tables for the display surface */
  1005 	for (i=0; i<256; i++) {
  1006 		/* Gamma correction (luminescence table) and chroma correction
  1007 		   would be done here.  See the Berkeley mpeg_play sources.
  1008 		*/
  1009 		CB = CR = (i-128);
  1010 		Cr_r_tab[i] = (int) ( (0.419/0.299) * CR);
  1011 		Cr_g_tab[i] = (int) (-(0.299/0.419) * CR);
  1012 		Cb_g_tab[i] = (int) (-(0.114/0.331) * CB); 
  1013 		Cb_b_tab[i] = (int) ( (0.587/0.331) * CB);
  1014 	}
  1015 
  1016 	/* 
  1017 	 * Set up entries 0-255 in rgb-to-pixel value tables.
  1018 	 */
  1019 	Rmask = display->format->Rmask;
  1020 	Gmask = display->format->Gmask;
  1021 	Bmask = display->format->Bmask;
  1022 	for ( i=0; i<256; ++i ) {
  1023 		r_2_pix_alloc[i+256] = i >> (8 - number_of_bits_set(Rmask));
  1024 		r_2_pix_alloc[i+256] <<= free_bits_at_bottom(Rmask);
  1025 		g_2_pix_alloc[i+256] = i >> (8 - number_of_bits_set(Gmask));
  1026 		g_2_pix_alloc[i+256] <<= free_bits_at_bottom(Gmask);
  1027 		b_2_pix_alloc[i+256] = i >> (8 - number_of_bits_set(Bmask));
  1028 		b_2_pix_alloc[i+256] <<= free_bits_at_bottom(Bmask);
  1029 	}
  1030 
  1031 	/*
  1032 	 * If we have 16-bit output depth, then we double the value
  1033 	 * in the top word. This means that we can write out both
  1034 	 * pixels in the pixel doubling mode with one op. It is 
  1035 	 * harmless in the normal case as storing a 32-bit value
  1036 	 * through a short pointer will lose the top bits anyway.
  1037 	 */
  1038 	if( display->format->BytesPerPixel == 2 ) {
  1039 		for ( i=0; i<256; ++i ) {
  1040 			r_2_pix_alloc[i+256] |= (r_2_pix_alloc[i+256]) << 16;
  1041 			g_2_pix_alloc[i+256] |= (g_2_pix_alloc[i+256]) << 16;
  1042 			b_2_pix_alloc[i+256] |= (b_2_pix_alloc[i+256]) << 16;
  1043 		}
  1044 	}
  1045 
  1046 	/*
  1047 	 * Spread out the values we have to the rest of the array so that
  1048 	 * we do not need to check for overflow.
  1049 	 */
  1050 	for ( i=0; i<256; ++i ) {
  1051 		r_2_pix_alloc[i] = r_2_pix_alloc[256];
  1052 		r_2_pix_alloc[i+512] = r_2_pix_alloc[511];
  1053 		g_2_pix_alloc[i] = g_2_pix_alloc[256];
  1054 		g_2_pix_alloc[i+512] = g_2_pix_alloc[511];
  1055 		b_2_pix_alloc[i] = b_2_pix_alloc[256];
  1056 		b_2_pix_alloc[i+512] = b_2_pix_alloc[511];
  1057 	}
  1058 
  1059 	/* You have chosen wisely... */
  1060 	switch (format) {
  1061 	    case SDL_YV12_OVERLAY:
  1062 	    case SDL_IYUV_OVERLAY:
  1063 		if ( display->format->BytesPerPixel == 2 ) {
  1064 #if defined(i386) && defined(__GNUC__) && defined(USE_ASMBLIT)
  1065 			/* inline assembly functions */
  1066 			if ( SDL_HasMMX() && (Rmask == 0xF800) &&
  1067 			                     (Gmask == 0x07E0) &&
  1068 				             (Bmask == 0x001F) &&
  1069 			                     (width & 15) == 0) {
  1070 /*printf("Using MMX 16-bit 565 dither\n");*/
  1071 				swdata->Display1X = Color565DitherYV12MMX1X;
  1072 			} else {
  1073 /*printf("Using C 16-bit dither\n");*/
  1074 				swdata->Display1X = Color16DitherYV12Mod1X;
  1075 			}
  1076 #else
  1077 			swdata->Display1X = Color16DitherYV12Mod1X;
  1078 #endif
  1079 			swdata->Display2X = Color16DitherYV12Mod2X;
  1080 		}
  1081 		if ( display->format->BytesPerPixel == 3 ) {
  1082 			swdata->Display1X = Color24DitherYV12Mod1X;
  1083 			swdata->Display2X = Color24DitherYV12Mod2X;
  1084 		}
  1085 		if ( display->format->BytesPerPixel == 4 ) {
  1086 #if defined(i386) && defined(__GNUC__) && defined(USE_ASMBLIT)
  1087 			/* inline assembly functions */
  1088 			if ( SDL_HasMMX() && (Rmask == 0x00FF0000) &&
  1089 			                     (Gmask == 0x0000FF00) &&
  1090 				             (Bmask == 0x000000FF) && 
  1091 			                     (width & 15) == 0) {
  1092 /*printf("Using MMX 32-bit dither\n");*/
  1093 				swdata->Display1X = ColorRGBDitherYV12MMX1X;
  1094 			} else {
  1095 /*printf("Using C 32-bit dither\n");*/
  1096 				swdata->Display1X = Color32DitherYV12Mod1X;
  1097 			}
  1098 #else
  1099 			swdata->Display1X = Color32DitherYV12Mod1X;
  1100 #endif
  1101 			swdata->Display2X = Color32DitherYV12Mod2X;
  1102 		}
  1103 		break;
  1104 	    case SDL_YUY2_OVERLAY:
  1105 	    case SDL_UYVY_OVERLAY:
  1106 	    case SDL_YVYU_OVERLAY:
  1107 		if ( display->format->BytesPerPixel == 2 ) {
  1108 			swdata->Display1X = Color16DitherYUY2Mod1X;
  1109 			swdata->Display2X = Color16DitherYUY2Mod2X;
  1110 		}
  1111 		if ( display->format->BytesPerPixel == 3 ) {
  1112 			swdata->Display1X = Color24DitherYUY2Mod1X;
  1113 			swdata->Display2X = Color24DitherYUY2Mod2X;
  1114 		}
  1115 		if ( display->format->BytesPerPixel == 4 ) {
  1116 			swdata->Display1X = Color32DitherYUY2Mod1X;
  1117 			swdata->Display2X = Color32DitherYUY2Mod2X;
  1118 		}
  1119 		break;
  1120 	    default:
  1121 		/* We should never get here (caught above) */
  1122 		break;
  1123 	}
  1124 
  1125 	/* Find the pitch and offset values for the overlay */
  1126 	overlay->pitches = swdata->pitches;
  1127 	overlay->pixels = swdata->planes;
  1128 	switch (format) {
  1129 	    case SDL_YV12_OVERLAY:
  1130 	    case SDL_IYUV_OVERLAY:
  1131 		overlay->pitches[0] = overlay->w;
  1132 		overlay->pitches[1] = overlay->pitches[0] / 2;
  1133 		overlay->pitches[2] = overlay->pitches[0] / 2;
  1134 	        overlay->pixels[0] = swdata->pixels;
  1135 	        overlay->pixels[1] = overlay->pixels[0] +
  1136 		                     overlay->pitches[0] * overlay->h;
  1137 	        overlay->pixels[2] = overlay->pixels[1] +
  1138 		                     overlay->pitches[1] * overlay->h / 2;
  1139 		overlay->planes = 3;
  1140 		break;
  1141 	    case SDL_YUY2_OVERLAY:
  1142 	    case SDL_UYVY_OVERLAY:
  1143 	    case SDL_YVYU_OVERLAY:
  1144 		overlay->pitches[0] = overlay->w*2;
  1145 	        overlay->pixels[0] = swdata->pixels;
  1146 		overlay->planes = 1;
  1147 		break;
  1148 	    default:
  1149 		/* We should never get here (caught above) */
  1150 		break;
  1151 	}
  1152 
  1153 	/* We're all done.. */
  1154 	return(overlay);
  1155 }
  1156 
  1157 int SDL_LockYUV_SW(_THIS, SDL_Overlay *overlay)
  1158 {
  1159 	return(0);
  1160 }
  1161 
  1162 void SDL_UnlockYUV_SW(_THIS, SDL_Overlay *overlay)
  1163 {
  1164 	return;
  1165 }
  1166 
  1167 int SDL_DisplayYUV_SW(_THIS, SDL_Overlay *overlay, SDL_Rect *dstrect)
  1168 {
  1169 	struct private_yuvhwdata *swdata;
  1170 	SDL_Surface *stretch;
  1171 	SDL_Surface *display;
  1172 	int scale_2x;
  1173 	Uint8 *lum, *Cr, *Cb;
  1174 	Uint8 *dst;
  1175 	int mod;
  1176 
  1177 	swdata = overlay->hwdata;
  1178 	scale_2x = 0;
  1179 	stretch = 0;
  1180 	if ( (overlay->w != dstrect->w) || (overlay->h != dstrect->h) ) {
  1181 		if ( (dstrect->w == 2*overlay->w) &&
  1182 		     (dstrect->h == 2*overlay->h) ) {
  1183 			scale_2x = 1;
  1184 		} else {
  1185 			if ( ! swdata->stretch ) {
  1186 				display = swdata->display;
  1187 				swdata->stretch = SDL_CreateRGBSurface(
  1188 					SDL_SWSURFACE,
  1189 					overlay->w, overlay->h,
  1190 					display->format->BitsPerPixel,
  1191 					display->format->Rmask,
  1192 					display->format->Gmask,
  1193 					display->format->Bmask, 0);
  1194 				if ( ! swdata->stretch ) {
  1195 					return(-1);
  1196 				}
  1197 			}
  1198 			stretch = swdata->stretch;
  1199 		}
  1200 	}
  1201 
  1202 	if ( stretch ) {
  1203 		display = stretch;
  1204 	} else {
  1205 		display = swdata->display;
  1206 	}
  1207 	switch (overlay->format) {
  1208 	    case SDL_YV12_OVERLAY:
  1209 		lum = overlay->pixels[0];
  1210 		Cr =  overlay->pixels[1];
  1211 		Cb =  overlay->pixels[2];
  1212 		break;
  1213 	    case SDL_IYUV_OVERLAY:
  1214 		lum = overlay->pixels[0];
  1215 		Cr =  overlay->pixels[2];
  1216 		Cb =  overlay->pixels[1];
  1217 		break;
  1218 	    case SDL_YUY2_OVERLAY:
  1219 		lum = overlay->pixels[0];
  1220 		Cr = lum + 3;
  1221 		Cb = lum + 1;
  1222 		break;
  1223 	    case SDL_UYVY_OVERLAY:
  1224 		lum = overlay->pixels[0]+1;
  1225 		Cr = lum + 1;
  1226 		Cb = lum - 1;
  1227 		break;
  1228 	    case SDL_YVYU_OVERLAY:
  1229 		lum = overlay->pixels[0];
  1230 		Cr = lum + 1;
  1231 		Cb = lum + 3;
  1232 		break;
  1233 	    default:
  1234 		SDL_SetError("Unsupported YUV format in blit");
  1235 		return(-1);
  1236 	}
  1237 	if ( SDL_MUSTLOCK(display) ) {
  1238         	if ( SDL_LockSurface(display) < 0 ) {
  1239 			return(-1);
  1240 		}
  1241 	}
  1242 	if ( stretch ) {
  1243 		dst = (Uint8 *)stretch->pixels;
  1244 	} else {
  1245 		dst = (Uint8 *)display->pixels
  1246 			+ dstrect->x * display->format->BytesPerPixel
  1247 			+ dstrect->y * display->pitch;
  1248 	}
  1249 	mod = (display->pitch / display->format->BytesPerPixel);
  1250 
  1251 	if ( scale_2x ) {
  1252 		mod -= (overlay->w * 2);
  1253 		swdata->Display2X(swdata->colortab, swdata->rgb_2_pix,
  1254 		                  lum, Cr, Cb, dst, overlay->h, overlay->w,mod);
  1255 	} else {
  1256 		mod -= overlay->w;
  1257 		swdata->Display1X(swdata->colortab, swdata->rgb_2_pix,
  1258 		                  lum, Cr, Cb, dst, overlay->h, overlay->w,mod);
  1259 	}
  1260 	if ( SDL_MUSTLOCK(display) ) {
  1261 		SDL_UnlockSurface(display);
  1262 	}
  1263 	if ( stretch ) {
  1264 		display = swdata->display;
  1265 		SDL_SoftStretch(stretch, NULL, display, dstrect);
  1266 	}
  1267 	SDL_UpdateRects(display, 1, dstrect);
  1268 
  1269 	return(0);
  1270 }
  1271 
  1272 void SDL_FreeYUV_SW(_THIS, SDL_Overlay *overlay)
  1273 {
  1274 	struct private_yuvhwdata *swdata;
  1275 
  1276 	swdata = overlay->hwdata;
  1277 	if ( swdata ) {
  1278 		if ( swdata->stretch ) {
  1279 			SDL_FreeSurface(swdata->stretch);
  1280 		}
  1281 		if ( swdata->pixels ) {
  1282 			SDL_free(swdata->pixels);
  1283 		}
  1284 		if ( swdata->colortab ) {
  1285 			SDL_free(swdata->colortab);
  1286 		}
  1287 		if ( swdata->rgb_2_pix ) {
  1288 			SDL_free(swdata->rgb_2_pix);
  1289 		}
  1290 		SDL_free(swdata);
  1291 	}
  1292 }