src/video/SDL_yuv_sw.c
author Ryan C. Gordon <icculus@icculus.org>
Fri, 06 Jan 2006 13:20:10 +0000
changeset 1234 73676c1f56ee
parent 769 b8d311d90021
child 1312 c9b51268668f
permissions -rw-r--r--
For sanity's sake, removed the '&' when passing copy_row array to asm.
     1 /*
     2     SDL - Simple DirectMedia Layer
     3     Copyright (C) 1997-2004 Sam Lantinga
     4 
     5     This library is free software; you can redistribute it and/or
     6     modify it under the terms of the GNU Library General Public
     7     License as published by the Free Software Foundation; either
     8     version 2 of the License, or (at your option) any later version.
     9 
    10     This library is distributed in the hope that it will be useful,
    11     but WITHOUT ANY WARRANTY; without even the implied warranty of
    12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    13     Library General Public License for more details.
    14 
    15     You should have received a copy of the GNU Library General Public
    16     License along with this library; if not, write to the Free
    17     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
    18 
    19     Sam Lantinga
    20     slouken@libsdl.org
    21 */
    22 
    23 #ifdef SAVE_RCSID
    24 static char rcsid =
    25  "@(#) $Id$";
    26 #endif
    27 
    28 /* This is the software implementation of the YUV video overlay support */
    29 
    30 /* This code was derived from code carrying the following copyright notices:
    31 
    32  * Copyright (c) 1995 The Regents of the University of California.
    33  * All rights reserved.
    34  * 
    35  * Permission to use, copy, modify, and distribute this software and its
    36  * documentation for any purpose, without fee, and without written agreement is
    37  * hereby granted, provided that the above copyright notice and the following
    38  * two paragraphs appear in all copies of this software.
    39  * 
    40  * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR
    41  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
    42  * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF
    43  * CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    44  * 
    45  * THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES,
    46  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
    47  * AND FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
    48  * ON AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATION TO
    49  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
    50 
    51  * Copyright (c) 1995 Erik Corry
    52  * All rights reserved.
    53  * 
    54  * Permission to use, copy, modify, and distribute this software and its
    55  * documentation for any purpose, without fee, and without written agreement is
    56  * hereby granted, provided that the above copyright notice and the following
    57  * two paragraphs appear in all copies of this software.
    58  * 
    59  * IN NO EVENT SHALL ERIK CORRY BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
    60  * SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OF
    61  * THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF ERIK CORRY HAS BEEN ADVISED
    62  * OF THE POSSIBILITY OF SUCH DAMAGE.
    63  * 
    64  * ERIK CORRY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
    65  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
    66  * PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
    67  * BASIS, AND ERIK CORRY HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT,
    68  * UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
    69 
    70  * Portions of this software Copyright (c) 1995 Brown University.
    71  * All rights reserved.
    72  * 
    73  * Permission to use, copy, modify, and distribute this software and its
    74  * documentation for any purpose, without fee, and without written agreement
    75  * is hereby granted, provided that the above copyright notice and the
    76  * following two paragraphs appear in all copies of this software.
    77  * 
    78  * IN NO EVENT SHALL BROWN UNIVERSITY BE LIABLE TO ANY PARTY FOR
    79  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
    80  * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF BROWN
    81  * UNIVERSITY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    82  * 
    83  * BROWN UNIVERSITY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
    84  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
    85  * PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
    86  * BASIS, AND BROWN UNIVERSITY HAS NO OBLIGATION TO PROVIDE MAINTENANCE,
    87  * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
    88  */
    89 
    90 #include <stdlib.h>
    91 #include <string.h>
    92 
    93 #include "SDL_error.h"
    94 #include "SDL_video.h"
    95 #include "SDL_cpuinfo.h"
    96 #include "SDL_stretch_c.h"
    97 #include "SDL_yuvfuncs.h"
    98 #include "SDL_yuv_sw_c.h"
    99 
   100 /* The functions used to manipulate software video overlays */
   101 static struct private_yuvhwfuncs sw_yuvfuncs = {
   102 	SDL_LockYUV_SW,
   103 	SDL_UnlockYUV_SW,
   104 	SDL_DisplayYUV_SW,
   105 	SDL_FreeYUV_SW
   106 };
   107 
   108 /* RGB conversion lookup tables */
   109 struct private_yuvhwdata {
   110 	SDL_Surface *stretch;
   111 	SDL_Surface *display;
   112 	Uint8 *pixels;
   113 	int *colortab;
   114 	Uint32 *rgb_2_pix;
   115 	void (*Display1X)(int *colortab, Uint32 *rgb_2_pix,
   116                           unsigned char *lum, unsigned char *cr,
   117                           unsigned char *cb, unsigned char *out,
   118                           int rows, int cols, int mod );
   119 	void (*Display2X)(int *colortab, Uint32 *rgb_2_pix,
   120 	                  unsigned char *lum, unsigned char *cr,
   121                           unsigned char *cb, unsigned char *out,
   122                           int rows, int cols, int mod );
   123 
   124 	/* These are just so we don't have to allocate them separately */
   125 	Uint16 pitches[3];
   126 	Uint8 *planes[3];
   127 };
   128 
   129 
   130 /* The colorspace conversion functions */
   131 
   132 extern void Color565DitherYV12MMX1X( int *colortab, Uint32 *rgb_2_pix,
   133                                      unsigned char *lum, unsigned char *cr,
   134                                      unsigned char *cb, unsigned char *out,
   135                                      int rows, int cols, int mod );
   136 extern void ColorRGBDitherYV12MMX1X( int *colortab, Uint32 *rgb_2_pix,
   137                                      unsigned char *lum, unsigned char *cr,
   138                                      unsigned char *cb, unsigned char *out,
   139                                      int rows, int cols, int mod );
   140 
   141 static void Color16DitherYV12Mod1X( int *colortab, Uint32 *rgb_2_pix,
   142                                     unsigned char *lum, unsigned char *cr,
   143                                     unsigned char *cb, unsigned char *out,
   144                                     int rows, int cols, int mod )
   145 {
   146     unsigned short* row1;
   147     unsigned short* row2;
   148     unsigned char* lum2;
   149     int x, y;
   150     int cr_r;
   151     int crb_g;
   152     int cb_b;
   153     int cols_2 = cols / 2;
   154 
   155     row1 = (unsigned short*) out;
   156     row2 = row1 + cols + mod;
   157     lum2 = lum + cols;
   158 
   159     mod += cols + mod;
   160 
   161     y = rows / 2;
   162     while( y-- )
   163     {
   164         x = cols_2;
   165         while( x-- )
   166         {
   167             register int L;
   168 
   169             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   170             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   171                                + colortab[ *cb + 2*256 ];
   172             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   173             ++cr; ++cb;
   174 
   175             L = *lum++;
   176             *row1++ = (rgb_2_pix[ L + cr_r ] |
   177                        rgb_2_pix[ L + crb_g ] |
   178                        rgb_2_pix[ L + cb_b ]);
   179 
   180             L = *lum++;
   181             *row1++ = (rgb_2_pix[ L + cr_r ] |
   182                        rgb_2_pix[ L + crb_g ] |
   183                        rgb_2_pix[ L + cb_b ]);
   184 
   185 
   186             /* Now, do second row.  */
   187 
   188             L = *lum2++;
   189             *row2++ = (rgb_2_pix[ L + cr_r ] |
   190                        rgb_2_pix[ L + crb_g ] |
   191                        rgb_2_pix[ L + cb_b ]);
   192 
   193             L = *lum2++;
   194             *row2++ = (rgb_2_pix[ L + cr_r ] |
   195                        rgb_2_pix[ L + crb_g ] |
   196                        rgb_2_pix[ L + cb_b ]);
   197         }
   198 
   199         /*
   200          * These values are at the start of the next line, (due
   201          * to the ++'s above),but they need to be at the start
   202          * of the line after that.
   203          */
   204         lum  += cols;
   205         lum2 += cols;
   206         row1 += mod;
   207         row2 += mod;
   208     }
   209 }
   210 
   211 static void Color24DitherYV12Mod1X( int *colortab, Uint32 *rgb_2_pix,
   212                                     unsigned char *lum, unsigned char *cr,
   213                                     unsigned char *cb, unsigned char *out,
   214                                     int rows, int cols, int mod )
   215 {
   216     unsigned int value;
   217     unsigned char* row1;
   218     unsigned char* row2;
   219     unsigned char* lum2;
   220     int x, y;
   221     int cr_r;
   222     int crb_g;
   223     int cb_b;
   224     int cols_2 = cols / 2;
   225 
   226     row1 = out;
   227     row2 = row1 + cols*3 + mod*3;
   228     lum2 = lum + cols;
   229 
   230     mod += cols + mod;
   231     mod *= 3;
   232 
   233     y = rows / 2;
   234     while( y-- )
   235     {
   236         x = cols_2;
   237         while( x-- )
   238         {
   239             register int L;
   240 
   241             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   242             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   243                                + colortab[ *cb + 2*256 ];
   244             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   245             ++cr; ++cb;
   246 
   247             L = *lum++;
   248             value = (rgb_2_pix[ L + cr_r ] |
   249                      rgb_2_pix[ L + crb_g ] |
   250                      rgb_2_pix[ L + cb_b ]);
   251             *row1++ = (value      ) & 0xFF;
   252             *row1++ = (value >>  8) & 0xFF;
   253             *row1++ = (value >> 16) & 0xFF;
   254 
   255             L = *lum++;
   256             value = (rgb_2_pix[ L + cr_r ] |
   257                      rgb_2_pix[ L + crb_g ] |
   258                      rgb_2_pix[ L + cb_b ]);
   259             *row1++ = (value      ) & 0xFF;
   260             *row1++ = (value >>  8) & 0xFF;
   261             *row1++ = (value >> 16) & 0xFF;
   262 
   263 
   264             /* Now, do second row.  */
   265 
   266             L = *lum2++;
   267             value = (rgb_2_pix[ L + cr_r ] |
   268                      rgb_2_pix[ L + crb_g ] |
   269                      rgb_2_pix[ L + cb_b ]);
   270             *row2++ = (value      ) & 0xFF;
   271             *row2++ = (value >>  8) & 0xFF;
   272             *row2++ = (value >> 16) & 0xFF;
   273 
   274             L = *lum2++;
   275             value = (rgb_2_pix[ L + cr_r ] |
   276                      rgb_2_pix[ L + crb_g ] |
   277                      rgb_2_pix[ L + cb_b ]);
   278             *row2++ = (value      ) & 0xFF;
   279             *row2++ = (value >>  8) & 0xFF;
   280             *row2++ = (value >> 16) & 0xFF;
   281         }
   282 
   283         /*
   284          * These values are at the start of the next line, (due
   285          * to the ++'s above),but they need to be at the start
   286          * of the line after that.
   287          */
   288         lum  += cols;
   289         lum2 += cols;
   290         row1 += mod;
   291         row2 += mod;
   292     }
   293 }
   294 
   295 static void Color32DitherYV12Mod1X( int *colortab, Uint32 *rgb_2_pix,
   296                                     unsigned char *lum, unsigned char *cr,
   297                                     unsigned char *cb, unsigned char *out,
   298                                     int rows, int cols, int mod )
   299 {
   300     unsigned int* row1;
   301     unsigned int* row2;
   302     unsigned char* lum2;
   303     int x, y;
   304     int cr_r;
   305     int crb_g;
   306     int cb_b;
   307     int cols_2 = cols / 2;
   308 
   309     row1 = (unsigned int*) out;
   310     row2 = row1 + cols + mod;
   311     lum2 = lum + cols;
   312 
   313     mod += cols + mod;
   314 
   315     y = rows / 2;
   316     while( y-- )
   317     {
   318         x = cols_2;
   319         while( x-- )
   320         {
   321             register int L;
   322 
   323             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   324             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   325                                + colortab[ *cb + 2*256 ];
   326             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   327             ++cr; ++cb;
   328 
   329             L = *lum++;
   330             *row1++ = (rgb_2_pix[ L + cr_r ] |
   331                        rgb_2_pix[ L + crb_g ] |
   332                        rgb_2_pix[ L + cb_b ]);
   333 
   334             L = *lum++;
   335             *row1++ = (rgb_2_pix[ L + cr_r ] |
   336                        rgb_2_pix[ L + crb_g ] |
   337                        rgb_2_pix[ L + cb_b ]);
   338 
   339 
   340             /* Now, do second row.  */
   341 
   342             L = *lum2++;
   343             *row2++ = (rgb_2_pix[ L + cr_r ] |
   344                        rgb_2_pix[ L + crb_g ] |
   345                        rgb_2_pix[ L + cb_b ]);
   346 
   347             L = *lum2++;
   348             *row2++ = (rgb_2_pix[ L + cr_r ] |
   349                        rgb_2_pix[ L + crb_g ] |
   350                        rgb_2_pix[ L + cb_b ]);
   351         }
   352 
   353         /*
   354          * These values are at the start of the next line, (due
   355          * to the ++'s above),but they need to be at the start
   356          * of the line after that.
   357          */
   358         lum  += cols;
   359         lum2 += cols;
   360         row1 += mod;
   361         row2 += mod;
   362     }
   363 }
   364 
   365 /*
   366  * In this function I make use of a nasty trick. The tables have the lower
   367  * 16 bits replicated in the upper 16. This means I can write ints and get
   368  * the horisontal doubling for free (almost).
   369  */
   370 static void Color16DitherYV12Mod2X( int *colortab, Uint32 *rgb_2_pix,
   371                                     unsigned char *lum, unsigned char *cr,
   372                                     unsigned char *cb, unsigned char *out,
   373                                     int rows, int cols, int mod )
   374 {
   375     unsigned int* row1 = (unsigned int*) out;
   376     const int next_row = cols+(mod/2);
   377     unsigned int* row2 = row1 + 2*next_row;
   378     unsigned char* lum2;
   379     int x, y;
   380     int cr_r;
   381     int crb_g;
   382     int cb_b;
   383     int cols_2 = cols / 2;
   384 
   385     lum2 = lum + cols;
   386 
   387     mod = (next_row * 3) + (mod/2);
   388 
   389     y = rows / 2;
   390     while( y-- )
   391     {
   392         x = cols_2;
   393         while( x-- )
   394         {
   395             register int L;
   396 
   397             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   398             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   399                                + colortab[ *cb + 2*256 ];
   400             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   401             ++cr; ++cb;
   402 
   403             L = *lum++;
   404             row1[0] = row1[next_row] = (rgb_2_pix[ L + cr_r ] |
   405                                         rgb_2_pix[ L + crb_g ] |
   406                                         rgb_2_pix[ L + cb_b ]);
   407             row1++;
   408 
   409             L = *lum++;
   410             row1[0] = row1[next_row] = (rgb_2_pix[ L + cr_r ] |
   411                                         rgb_2_pix[ L + crb_g ] |
   412                                         rgb_2_pix[ L + cb_b ]);
   413             row1++;
   414 
   415 
   416             /* Now, do second row. */
   417 
   418             L = *lum2++;
   419             row2[0] = row2[next_row] = (rgb_2_pix[ L + cr_r ] |
   420                                         rgb_2_pix[ L + crb_g ] |
   421                                         rgb_2_pix[ L + cb_b ]);
   422             row2++;
   423 
   424             L = *lum2++;
   425             row2[0] = row2[next_row] = (rgb_2_pix[ L + cr_r ] |
   426                                         rgb_2_pix[ L + crb_g ] |
   427                                         rgb_2_pix[ L + cb_b ]);
   428             row2++;
   429         }
   430 
   431         /*
   432          * These values are at the start of the next line, (due
   433          * to the ++'s above),but they need to be at the start
   434          * of the line after that.
   435          */
   436         lum  += cols;
   437         lum2 += cols;
   438         row1 += mod;
   439         row2 += mod;
   440     }
   441 }
   442 
   443 static void Color24DitherYV12Mod2X( int *colortab, Uint32 *rgb_2_pix,
   444                                     unsigned char *lum, unsigned char *cr,
   445                                     unsigned char *cb, unsigned char *out,
   446                                     int rows, int cols, int mod )
   447 {
   448     unsigned int value;
   449     unsigned char* row1 = out;
   450     const int next_row = (cols*2 + mod) * 3;
   451     unsigned char* row2 = row1 + 2*next_row;
   452     unsigned char* lum2;
   453     int x, y;
   454     int cr_r;
   455     int crb_g;
   456     int cb_b;
   457     int cols_2 = cols / 2;
   458 
   459     lum2 = lum + cols;
   460 
   461     mod = next_row*3 + mod*3;
   462 
   463     y = rows / 2;
   464     while( y-- )
   465     {
   466         x = cols_2;
   467         while( x-- )
   468         {
   469             register int L;
   470 
   471             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   472             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   473                                + colortab[ *cb + 2*256 ];
   474             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   475             ++cr; ++cb;
   476 
   477             L = *lum++;
   478             value = (rgb_2_pix[ L + cr_r ] |
   479                      rgb_2_pix[ L + crb_g ] |
   480                      rgb_2_pix[ L + cb_b ]);
   481             row1[0+0] = row1[3+0] = row1[next_row+0] = row1[next_row+3+0] =
   482                      (value      ) & 0xFF;
   483             row1[0+1] = row1[3+1] = row1[next_row+1] = row1[next_row+3+1] =
   484                      (value >>  8) & 0xFF;
   485             row1[0+2] = row1[3+2] = row1[next_row+2] = row1[next_row+3+2] =
   486                      (value >> 16) & 0xFF;
   487             row1 += 2*3;
   488 
   489             L = *lum++;
   490             value = (rgb_2_pix[ L + cr_r ] |
   491                      rgb_2_pix[ L + crb_g ] |
   492                      rgb_2_pix[ L + cb_b ]);
   493             row1[0+0] = row1[3+0] = row1[next_row+0] = row1[next_row+3+0] =
   494                      (value      ) & 0xFF;
   495             row1[0+1] = row1[3+1] = row1[next_row+1] = row1[next_row+3+1] =
   496                      (value >>  8) & 0xFF;
   497             row1[0+2] = row1[3+2] = row1[next_row+2] = row1[next_row+3+2] =
   498                      (value >> 16) & 0xFF;
   499             row1 += 2*3;
   500 
   501 
   502             /* Now, do second row. */
   503 
   504             L = *lum2++;
   505             value = (rgb_2_pix[ L + cr_r ] |
   506                      rgb_2_pix[ L + crb_g ] |
   507                      rgb_2_pix[ L + cb_b ]);
   508             row2[0+0] = row2[3+0] = row2[next_row+0] = row2[next_row+3+0] =
   509                      (value      ) & 0xFF;
   510             row2[0+1] = row2[3+1] = row2[next_row+1] = row2[next_row+3+1] =
   511                      (value >>  8) & 0xFF;
   512             row2[0+2] = row2[3+2] = row2[next_row+2] = row2[next_row+3+2] =
   513                      (value >> 16) & 0xFF;
   514             row2 += 2*3;
   515 
   516             L = *lum2++;
   517             value = (rgb_2_pix[ L + cr_r ] |
   518                      rgb_2_pix[ L + crb_g ] |
   519                      rgb_2_pix[ L + cb_b ]);
   520             row2[0+0] = row2[3+0] = row2[next_row+0] = row2[next_row+3+0] =
   521                      (value      ) & 0xFF;
   522             row2[0+1] = row2[3+1] = row2[next_row+1] = row2[next_row+3+1] =
   523                      (value >>  8) & 0xFF;
   524             row2[0+2] = row2[3+2] = row2[next_row+2] = row2[next_row+3+2] =
   525                      (value >> 16) & 0xFF;
   526             row2 += 2*3;
   527         }
   528 
   529         /*
   530          * These values are at the start of the next line, (due
   531          * to the ++'s above),but they need to be at the start
   532          * of the line after that.
   533          */
   534         lum  += cols;
   535         lum2 += cols;
   536         row1 += mod;
   537         row2 += mod;
   538     }
   539 }
   540 
   541 static void Color32DitherYV12Mod2X( int *colortab, Uint32 *rgb_2_pix,
   542                                     unsigned char *lum, unsigned char *cr,
   543                                     unsigned char *cb, unsigned char *out,
   544                                     int rows, int cols, int mod )
   545 {
   546     unsigned int* row1 = (unsigned int*) out;
   547     const int next_row = cols*2+mod;
   548     unsigned int* row2 = row1 + 2*next_row;
   549     unsigned char* lum2;
   550     int x, y;
   551     int cr_r;
   552     int crb_g;
   553     int cb_b;
   554     int cols_2 = cols / 2;
   555 
   556     lum2 = lum + cols;
   557 
   558     mod = (next_row * 3) + mod;
   559 
   560     y = rows / 2;
   561     while( y-- )
   562     {
   563         x = cols_2;
   564         while( x-- )
   565         {
   566             register int L;
   567 
   568             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   569             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   570                                + colortab[ *cb + 2*256 ];
   571             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   572             ++cr; ++cb;
   573 
   574             L = *lum++;
   575             row1[0] = row1[1] = row1[next_row] = row1[next_row+1] =
   576                                        (rgb_2_pix[ L + cr_r ] |
   577                                         rgb_2_pix[ L + crb_g ] |
   578                                         rgb_2_pix[ L + cb_b ]);
   579             row1 += 2;
   580 
   581             L = *lum++;
   582             row1[0] = row1[1] = row1[next_row] = row1[next_row+1] =
   583                                        (rgb_2_pix[ L + cr_r ] |
   584                                         rgb_2_pix[ L + crb_g ] |
   585                                         rgb_2_pix[ L + cb_b ]);
   586             row1 += 2;
   587 
   588 
   589             /* Now, do second row. */
   590 
   591             L = *lum2++;
   592             row2[0] = row2[1] = row2[next_row] = row2[next_row+1] =
   593                                        (rgb_2_pix[ L + cr_r ] |
   594                                         rgb_2_pix[ L + crb_g ] |
   595                                         rgb_2_pix[ L + cb_b ]);
   596             row2 += 2;
   597 
   598             L = *lum2++;
   599             row2[0] = row2[1] = row2[next_row] = row2[next_row+1] =
   600                                        (rgb_2_pix[ L + cr_r ] |
   601                                         rgb_2_pix[ L + crb_g ] |
   602                                         rgb_2_pix[ L + cb_b ]);
   603             row2 += 2;
   604         }
   605 
   606         /*
   607          * These values are at the start of the next line, (due
   608          * to the ++'s above),but they need to be at the start
   609          * of the line after that.
   610          */
   611         lum  += cols;
   612         lum2 += cols;
   613         row1 += mod;
   614         row2 += mod;
   615     }
   616 }
   617 
   618 static void Color16DitherYUY2Mod1X( int *colortab, Uint32 *rgb_2_pix,
   619                                     unsigned char *lum, unsigned char *cr,
   620                                     unsigned char *cb, unsigned char *out,
   621                                     int rows, int cols, int mod )
   622 {
   623     unsigned short* row;
   624     int x, y;
   625     int cr_r;
   626     int crb_g;
   627     int cb_b;
   628     int cols_2 = cols / 2;
   629 
   630     row = (unsigned short*) out;
   631 
   632     y = rows;
   633     while( y-- )
   634     {
   635         x = cols_2;
   636         while( x-- )
   637         {
   638             register int L;
   639 
   640             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   641             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   642                                + colortab[ *cb + 2*256 ];
   643             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   644             cr += 4; cb += 4;
   645 
   646             L = *lum; lum += 2;
   647             *row++ = (rgb_2_pix[ L + cr_r ] |
   648                        rgb_2_pix[ L + crb_g ] |
   649                        rgb_2_pix[ L + cb_b ]);
   650 
   651             L = *lum; lum += 2;
   652             *row++ = (rgb_2_pix[ L + cr_r ] |
   653                        rgb_2_pix[ L + crb_g ] |
   654                        rgb_2_pix[ L + cb_b ]);
   655 
   656         }
   657 
   658         row += mod;
   659     }
   660 }
   661 
   662 static void Color24DitherYUY2Mod1X( int *colortab, Uint32 *rgb_2_pix,
   663                                     unsigned char *lum, unsigned char *cr,
   664                                     unsigned char *cb, unsigned char *out,
   665                                     int rows, int cols, int mod )
   666 {
   667     unsigned int value;
   668     unsigned char* row;
   669     int x, y;
   670     int cr_r;
   671     int crb_g;
   672     int cb_b;
   673     int cols_2 = cols / 2;
   674 
   675     row = (unsigned char*) out;
   676     mod *= 3;
   677     y = rows;
   678     while( y-- )
   679     {
   680         x = cols_2;
   681         while( x-- )
   682         {
   683             register int L;
   684 
   685             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   686             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   687                                + colortab[ *cb + 2*256 ];
   688             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   689             cr += 4; cb += 4;
   690 
   691             L = *lum; lum += 2;
   692             value = (rgb_2_pix[ L + cr_r ] |
   693                      rgb_2_pix[ L + crb_g ] |
   694                      rgb_2_pix[ L + cb_b ]);
   695             *row++ = (value      ) & 0xFF;
   696             *row++ = (value >>  8) & 0xFF;
   697             *row++ = (value >> 16) & 0xFF;
   698 
   699             L = *lum; lum += 2;
   700             value = (rgb_2_pix[ L + cr_r ] |
   701                      rgb_2_pix[ L + crb_g ] |
   702                      rgb_2_pix[ L + cb_b ]);
   703             *row++ = (value      ) & 0xFF;
   704             *row++ = (value >>  8) & 0xFF;
   705             *row++ = (value >> 16) & 0xFF;
   706 
   707         }
   708         row += mod;
   709     }
   710 }
   711 
   712 static void Color32DitherYUY2Mod1X( int *colortab, Uint32 *rgb_2_pix,
   713                                     unsigned char *lum, unsigned char *cr,
   714                                     unsigned char *cb, unsigned char *out,
   715                                     int rows, int cols, int mod )
   716 {
   717     unsigned int* row;
   718     int x, y;
   719     int cr_r;
   720     int crb_g;
   721     int cb_b;
   722     int cols_2 = cols / 2;
   723 
   724     row = (unsigned int*) out;
   725     y = rows;
   726     while( y-- )
   727     {
   728         x = cols_2;
   729         while( x-- )
   730         {
   731             register int L;
   732 
   733             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   734             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   735                                + colortab[ *cb + 2*256 ];
   736             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   737             cr += 4; cb += 4;
   738 
   739             L = *lum; lum += 2;
   740             *row++ = (rgb_2_pix[ L + cr_r ] |
   741                        rgb_2_pix[ L + crb_g ] |
   742                        rgb_2_pix[ L + cb_b ]);
   743 
   744             L = *lum; lum += 2;
   745             *row++ = (rgb_2_pix[ L + cr_r ] |
   746                        rgb_2_pix[ L + crb_g ] |
   747                        rgb_2_pix[ L + cb_b ]);
   748 
   749 
   750         }
   751         row += mod;
   752     }
   753 }
   754 
   755 /*
   756  * In this function I make use of a nasty trick. The tables have the lower
   757  * 16 bits replicated in the upper 16. This means I can write ints and get
   758  * the horisontal doubling for free (almost).
   759  */
   760 static void Color16DitherYUY2Mod2X( int *colortab, Uint32 *rgb_2_pix,
   761                                     unsigned char *lum, unsigned char *cr,
   762                                     unsigned char *cb, unsigned char *out,
   763                                     int rows, int cols, int mod )
   764 {
   765     unsigned int* row = (unsigned int*) out;
   766     const int next_row = cols+(mod/2);
   767     int x, y;
   768     int cr_r;
   769     int crb_g;
   770     int cb_b;
   771     int cols_2 = cols / 2;
   772 
   773     y = rows;
   774     while( y-- )
   775     {
   776         x = cols_2;
   777         while( x-- )
   778         {
   779             register int L;
   780 
   781             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   782             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   783                                + colortab[ *cb + 2*256 ];
   784             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   785             cr += 4; cb += 4;
   786 
   787             L = *lum; lum += 2;
   788             row[0] = row[next_row] = (rgb_2_pix[ L + cr_r ] |
   789                                         rgb_2_pix[ L + crb_g ] |
   790                                         rgb_2_pix[ L + cb_b ]);
   791             row++;
   792 
   793             L = *lum; lum += 2;
   794             row[0] = row[next_row] = (rgb_2_pix[ L + cr_r ] |
   795                                         rgb_2_pix[ L + crb_g ] |
   796                                         rgb_2_pix[ L + cb_b ]);
   797             row++;
   798 
   799         }
   800         row += next_row;
   801     }
   802 }
   803 
   804 static void Color24DitherYUY2Mod2X( int *colortab, Uint32 *rgb_2_pix,
   805                                     unsigned char *lum, unsigned char *cr,
   806                                     unsigned char *cb, unsigned char *out,
   807                                     int rows, int cols, int mod )
   808 {
   809     unsigned int value;
   810     unsigned char* row = out;
   811     const int next_row = (cols*2 + mod) * 3;
   812     int x, y;
   813     int cr_r;
   814     int crb_g;
   815     int cb_b;
   816     int cols_2 = cols / 2;
   817     y = rows;
   818     while( y-- )
   819     {
   820         x = cols_2;
   821         while( x-- )
   822         {
   823             register int L;
   824 
   825             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   826             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   827                                + colortab[ *cb + 2*256 ];
   828             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   829             cr += 4; cb += 4;
   830 
   831             L = *lum; lum += 2;
   832             value = (rgb_2_pix[ L + cr_r ] |
   833                      rgb_2_pix[ L + crb_g ] |
   834                      rgb_2_pix[ L + cb_b ]);
   835             row[0+0] = row[3+0] = row[next_row+0] = row[next_row+3+0] =
   836                      (value      ) & 0xFF;
   837             row[0+1] = row[3+1] = row[next_row+1] = row[next_row+3+1] =
   838                      (value >>  8) & 0xFF;
   839             row[0+2] = row[3+2] = row[next_row+2] = row[next_row+3+2] =
   840                      (value >> 16) & 0xFF;
   841             row += 2*3;
   842 
   843             L = *lum; lum += 2;
   844             value = (rgb_2_pix[ L + cr_r ] |
   845                      rgb_2_pix[ L + crb_g ] |
   846                      rgb_2_pix[ L + cb_b ]);
   847             row[0+0] = row[3+0] = row[next_row+0] = row[next_row+3+0] =
   848                      (value      ) & 0xFF;
   849             row[0+1] = row[3+1] = row[next_row+1] = row[next_row+3+1] =
   850                      (value >>  8) & 0xFF;
   851             row[0+2] = row[3+2] = row[next_row+2] = row[next_row+3+2] =
   852                      (value >> 16) & 0xFF;
   853             row += 2*3;
   854 
   855         }
   856         row += next_row;
   857     }
   858 }
   859 
   860 static void Color32DitherYUY2Mod2X( int *colortab, Uint32 *rgb_2_pix,
   861                                     unsigned char *lum, unsigned char *cr,
   862                                     unsigned char *cb, unsigned char *out,
   863                                     int rows, int cols, int mod )
   864 {
   865     unsigned int* row = (unsigned int*) out;
   866     const int next_row = cols*2+mod;
   867     int x, y;
   868     int cr_r;
   869     int crb_g;
   870     int cb_b;
   871     int cols_2 = cols / 2;
   872     mod+=mod;
   873     y = rows;
   874     while( y-- )
   875     {
   876         x = cols_2;
   877         while( x-- )
   878         {
   879             register int L;
   880 
   881             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   882             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   883                                + colortab[ *cb + 2*256 ];
   884             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   885             cr += 4; cb += 4;
   886 
   887             L = *lum; lum += 2;
   888             row[0] = row[1] = row[next_row] = row[next_row+1] =
   889                                        (rgb_2_pix[ L + cr_r ] |
   890                                         rgb_2_pix[ L + crb_g ] |
   891                                         rgb_2_pix[ L + cb_b ]);
   892             row += 2;
   893 
   894             L = *lum; lum += 2;
   895             row[0] = row[1] = row[next_row] = row[next_row+1] =
   896                                        (rgb_2_pix[ L + cr_r ] |
   897                                         rgb_2_pix[ L + crb_g ] |
   898                                         rgb_2_pix[ L + cb_b ]);
   899             row += 2;
   900 
   901 
   902         }
   903 
   904         row += next_row;
   905     }
   906 }
   907 
   908 /*
   909  * How many 1 bits are there in the Uint32.
   910  * Low performance, do not call often.
   911  */
   912 static int number_of_bits_set( Uint32 a )
   913 {
   914     if(!a) return 0;
   915     if(a & 1) return 1 + number_of_bits_set(a >> 1);
   916     return(number_of_bits_set(a >> 1));
   917 }
   918 
   919 /*
   920  * How many 0 bits are there at least significant end of Uint32.
   921  * Low performance, do not call often.
   922  */
   923 static int free_bits_at_bottom( Uint32 a )
   924 {
   925       /* assume char is 8 bits */
   926     if(!a) return sizeof(Uint32) * 8;
   927     if(((Sint32)a) & 1l) return 0;
   928     return 1 + free_bits_at_bottom ( a >> 1);
   929 }
   930 
   931 
   932 SDL_Overlay *SDL_CreateYUV_SW(_THIS, int width, int height, Uint32 format, SDL_Surface *display)
   933 {
   934 	SDL_Overlay *overlay;
   935 	struct private_yuvhwdata *swdata;
   936 	int *Cr_r_tab;
   937 	int *Cr_g_tab;
   938 	int *Cb_g_tab;
   939 	int *Cb_b_tab;
   940 	Uint32 *r_2_pix_alloc;
   941 	Uint32 *g_2_pix_alloc;
   942 	Uint32 *b_2_pix_alloc;
   943 	int i;
   944 	int CR, CB;
   945 	Uint32 Rmask, Gmask, Bmask;
   946 
   947 	/* Only RGB packed pixel conversion supported */
   948 	if ( (display->format->BytesPerPixel != 2) &&
   949 	     (display->format->BytesPerPixel != 3) &&
   950 	     (display->format->BytesPerPixel != 4) ) {
   951 		SDL_SetError("Can't use YUV data on non 16/24/32 bit surfaces");
   952 		return(NULL);
   953 	}
   954 
   955 	/* Verify that we support the format */
   956 	switch (format) {
   957 	    case SDL_YV12_OVERLAY:
   958 	    case SDL_IYUV_OVERLAY:
   959 	    case SDL_YUY2_OVERLAY:
   960 	    case SDL_UYVY_OVERLAY:
   961 	    case SDL_YVYU_OVERLAY:
   962 		break;
   963 	    default:
   964 		SDL_SetError("Unsupported YUV format");
   965 		return(NULL);
   966 	}
   967 
   968 	/* Create the overlay structure */
   969 	overlay = (SDL_Overlay *)malloc(sizeof *overlay);
   970 	if ( overlay == NULL ) {
   971 		SDL_OutOfMemory();
   972 		return(NULL);
   973 	}
   974 	memset(overlay, 0, (sizeof *overlay));
   975 
   976 	/* Fill in the basic members */
   977 	overlay->format = format;
   978 	overlay->w = width;
   979 	overlay->h = height;
   980 
   981 	/* Set up the YUV surface function structure */
   982 	overlay->hwfuncs = &sw_yuvfuncs;
   983 
   984 	/* Create the pixel data and lookup tables */
   985 	swdata = (struct private_yuvhwdata *)malloc(sizeof *swdata);
   986 	overlay->hwdata = swdata;
   987 	if ( swdata == NULL ) {
   988 		SDL_OutOfMemory();
   989 		SDL_FreeYUVOverlay(overlay);
   990 		return(NULL);
   991 	}
   992 	swdata->stretch = NULL;
   993 	swdata->display = display;
   994 	swdata->pixels = (Uint8 *) malloc(width*height*2);
   995 	swdata->colortab = (int *)malloc(4*256*sizeof(int));
   996 	Cr_r_tab = &swdata->colortab[0*256];
   997 	Cr_g_tab = &swdata->colortab[1*256];
   998 	Cb_g_tab = &swdata->colortab[2*256];
   999 	Cb_b_tab = &swdata->colortab[3*256];
  1000 	swdata->rgb_2_pix = (Uint32 *)malloc(3*768*sizeof(Uint32));
  1001 	r_2_pix_alloc = &swdata->rgb_2_pix[0*768];
  1002 	g_2_pix_alloc = &swdata->rgb_2_pix[1*768];
  1003 	b_2_pix_alloc = &swdata->rgb_2_pix[2*768];
  1004 	if ( ! swdata->pixels || ! swdata->colortab || ! swdata->rgb_2_pix ) {
  1005 		SDL_OutOfMemory();
  1006 		SDL_FreeYUVOverlay(overlay);
  1007 		return(NULL);
  1008 	}
  1009 
  1010 	/* Generate the tables for the display surface */
  1011 	for (i=0; i<256; i++) {
  1012 		/* Gamma correction (luminescence table) and chroma correction
  1013 		   would be done here.  See the Berkeley mpeg_play sources.
  1014 		*/
  1015 		CB = CR = (i-128);
  1016 		Cr_r_tab[i] = (int) ( (0.419/0.299) * CR);
  1017 		Cr_g_tab[i] = (int) (-(0.299/0.419) * CR);
  1018 		Cb_g_tab[i] = (int) (-(0.114/0.331) * CB); 
  1019 		Cb_b_tab[i] = (int) ( (0.587/0.331) * CB);
  1020 	}
  1021 
  1022 	/* 
  1023 	 * Set up entries 0-255 in rgb-to-pixel value tables.
  1024 	 */
  1025 	Rmask = display->format->Rmask;
  1026 	Gmask = display->format->Gmask;
  1027 	Bmask = display->format->Bmask;
  1028 	for ( i=0; i<256; ++i ) {
  1029 		r_2_pix_alloc[i+256] = i >> (8 - number_of_bits_set(Rmask));
  1030 		r_2_pix_alloc[i+256] <<= free_bits_at_bottom(Rmask);
  1031 		g_2_pix_alloc[i+256] = i >> (8 - number_of_bits_set(Gmask));
  1032 		g_2_pix_alloc[i+256] <<= free_bits_at_bottom(Gmask);
  1033 		b_2_pix_alloc[i+256] = i >> (8 - number_of_bits_set(Bmask));
  1034 		b_2_pix_alloc[i+256] <<= free_bits_at_bottom(Bmask);
  1035 	}
  1036 
  1037 	/*
  1038 	 * If we have 16-bit output depth, then we double the value
  1039 	 * in the top word. This means that we can write out both
  1040 	 * pixels in the pixel doubling mode with one op. It is 
  1041 	 * harmless in the normal case as storing a 32-bit value
  1042 	 * through a short pointer will lose the top bits anyway.
  1043 	 */
  1044 	if( display->format->BytesPerPixel == 2 ) {
  1045 		for ( i=0; i<256; ++i ) {
  1046 			r_2_pix_alloc[i+256] |= (r_2_pix_alloc[i+256]) << 16;
  1047 			g_2_pix_alloc[i+256] |= (g_2_pix_alloc[i+256]) << 16;
  1048 			b_2_pix_alloc[i+256] |= (b_2_pix_alloc[i+256]) << 16;
  1049 		}
  1050 	}
  1051 
  1052 	/*
  1053 	 * Spread out the values we have to the rest of the array so that
  1054 	 * we do not need to check for overflow.
  1055 	 */
  1056 	for ( i=0; i<256; ++i ) {
  1057 		r_2_pix_alloc[i] = r_2_pix_alloc[256];
  1058 		r_2_pix_alloc[i+512] = r_2_pix_alloc[511];
  1059 		g_2_pix_alloc[i] = g_2_pix_alloc[256];
  1060 		g_2_pix_alloc[i+512] = g_2_pix_alloc[511];
  1061 		b_2_pix_alloc[i] = b_2_pix_alloc[256];
  1062 		b_2_pix_alloc[i+512] = b_2_pix_alloc[511];
  1063 	}
  1064 
  1065 	/* You have chosen wisely... */
  1066 	switch (format) {
  1067 	    case SDL_YV12_OVERLAY:
  1068 	    case SDL_IYUV_OVERLAY:
  1069 		if ( display->format->BytesPerPixel == 2 ) {
  1070 #if defined(i386) && defined(__GNUC__) && defined(USE_ASMBLIT)
  1071 			/* inline assembly functions */
  1072 			if ( SDL_HasMMX() && (Rmask == 0xF800) &&
  1073 			                     (Gmask == 0x07E0) &&
  1074 				             (Bmask == 0x001F) &&
  1075 			                     (width & 15) == 0) {
  1076 /*printf("Using MMX 16-bit 565 dither\n");*/
  1077 				swdata->Display1X = Color565DitherYV12MMX1X;
  1078 			} else {
  1079 /*printf("Using C 16-bit dither\n");*/
  1080 				swdata->Display1X = Color16DitherYV12Mod1X;
  1081 			}
  1082 #else
  1083 			swdata->Display1X = Color16DitherYV12Mod1X;
  1084 #endif
  1085 			swdata->Display2X = Color16DitherYV12Mod2X;
  1086 		}
  1087 		if ( display->format->BytesPerPixel == 3 ) {
  1088 			swdata->Display1X = Color24DitherYV12Mod1X;
  1089 			swdata->Display2X = Color24DitherYV12Mod2X;
  1090 		}
  1091 		if ( display->format->BytesPerPixel == 4 ) {
  1092 #if defined(i386) && defined(__GNUC__) && defined(USE_ASMBLIT)
  1093 			/* inline assembly functions */
  1094 			if ( SDL_HasMMX() && (Rmask == 0x00FF0000) &&
  1095 			                     (Gmask == 0x0000FF00) &&
  1096 				             (Bmask == 0x000000FF) && 
  1097 			                     (width & 15) == 0) {
  1098 /*printf("Using MMX 32-bit dither\n");*/
  1099 				swdata->Display1X = ColorRGBDitherYV12MMX1X;
  1100 			} else {
  1101 /*printf("Using C 32-bit dither\n");*/
  1102 				swdata->Display1X = Color32DitherYV12Mod1X;
  1103 			}
  1104 #else
  1105 			swdata->Display1X = Color32DitherYV12Mod1X;
  1106 #endif
  1107 			swdata->Display2X = Color32DitherYV12Mod2X;
  1108 		}
  1109 		break;
  1110 	    case SDL_YUY2_OVERLAY:
  1111 	    case SDL_UYVY_OVERLAY:
  1112 	    case SDL_YVYU_OVERLAY:
  1113 		if ( display->format->BytesPerPixel == 2 ) {
  1114 			swdata->Display1X = Color16DitherYUY2Mod1X;
  1115 			swdata->Display2X = Color16DitherYUY2Mod2X;
  1116 		}
  1117 		if ( display->format->BytesPerPixel == 3 ) {
  1118 			swdata->Display1X = Color24DitherYUY2Mod1X;
  1119 			swdata->Display2X = Color24DitherYUY2Mod2X;
  1120 		}
  1121 		if ( display->format->BytesPerPixel == 4 ) {
  1122 			swdata->Display1X = Color32DitherYUY2Mod1X;
  1123 			swdata->Display2X = Color32DitherYUY2Mod2X;
  1124 		}
  1125 		break;
  1126 	    default:
  1127 		/* We should never get here (caught above) */
  1128 		break;
  1129 	}
  1130 
  1131 	/* Find the pitch and offset values for the overlay */
  1132 	overlay->pitches = swdata->pitches;
  1133 	overlay->pixels = swdata->planes;
  1134 	switch (format) {
  1135 	    case SDL_YV12_OVERLAY:
  1136 	    case SDL_IYUV_OVERLAY:
  1137 		overlay->pitches[0] = overlay->w;
  1138 		overlay->pitches[1] = overlay->pitches[0] / 2;
  1139 		overlay->pitches[2] = overlay->pitches[0] / 2;
  1140 	        overlay->pixels[0] = swdata->pixels;
  1141 	        overlay->pixels[1] = overlay->pixels[0] +
  1142 		                     overlay->pitches[0] * overlay->h;
  1143 	        overlay->pixels[2] = overlay->pixels[1] +
  1144 		                     overlay->pitches[1] * overlay->h / 2;
  1145 		overlay->planes = 3;
  1146 		break;
  1147 	    case SDL_YUY2_OVERLAY:
  1148 	    case SDL_UYVY_OVERLAY:
  1149 	    case SDL_YVYU_OVERLAY:
  1150 		overlay->pitches[0] = overlay->w*2;
  1151 	        overlay->pixels[0] = swdata->pixels;
  1152 		overlay->planes = 1;
  1153 		break;
  1154 	    default:
  1155 		/* We should never get here (caught above) */
  1156 		break;
  1157 	}
  1158 
  1159 	/* We're all done.. */
  1160 	return(overlay);
  1161 }
  1162 
  1163 int SDL_LockYUV_SW(_THIS, SDL_Overlay *overlay)
  1164 {
  1165 	return(0);
  1166 }
  1167 
  1168 void SDL_UnlockYUV_SW(_THIS, SDL_Overlay *overlay)
  1169 {
  1170 	return;
  1171 }
  1172 
  1173 int SDL_DisplayYUV_SW(_THIS, SDL_Overlay *overlay, SDL_Rect *dstrect)
  1174 {
  1175 	struct private_yuvhwdata *swdata;
  1176 	SDL_Surface *stretch;
  1177 	SDL_Surface *display;
  1178 	int scale_2x;
  1179 	Uint8 *lum, *Cr, *Cb;
  1180 	Uint8 *dst;
  1181 	int mod;
  1182 
  1183 	swdata = overlay->hwdata;
  1184 	scale_2x = 0;
  1185 	stretch = 0;
  1186 	if ( (overlay->w != dstrect->w) || (overlay->h != dstrect->h) ) {
  1187 		if ( (dstrect->w == 2*overlay->w) &&
  1188 		     (dstrect->h == 2*overlay->h) ) {
  1189 			scale_2x = 1;
  1190 		} else {
  1191 			if ( ! swdata->stretch ) {
  1192 				display = swdata->display;
  1193 				swdata->stretch = SDL_CreateRGBSurface(
  1194 					SDL_SWSURFACE,
  1195 					overlay->w, overlay->h,
  1196 					display->format->BitsPerPixel,
  1197 					display->format->Rmask,
  1198 					display->format->Gmask,
  1199 					display->format->Bmask, 0);
  1200 				if ( ! swdata->stretch ) {
  1201 					return(-1);
  1202 				}
  1203 			}
  1204 			stretch = swdata->stretch;
  1205 		}
  1206 	}
  1207 
  1208 	if ( stretch ) {
  1209 		display = stretch;
  1210 	} else {
  1211 		display = swdata->display;
  1212 	}
  1213 	switch (overlay->format) {
  1214 	    case SDL_YV12_OVERLAY:
  1215 		lum = overlay->pixels[0];
  1216 		Cr =  overlay->pixels[1];
  1217 		Cb =  overlay->pixels[2];
  1218 		break;
  1219 	    case SDL_IYUV_OVERLAY:
  1220 		lum = overlay->pixels[0];
  1221 		Cr =  overlay->pixels[2];
  1222 		Cb =  overlay->pixels[1];
  1223 		break;
  1224 	    case SDL_YUY2_OVERLAY:
  1225 		lum = overlay->pixels[0];
  1226 		Cr = lum + 3;
  1227 		Cb = lum + 1;
  1228 		break;
  1229 	    case SDL_UYVY_OVERLAY:
  1230 		lum = overlay->pixels[0]+1;
  1231 		Cr = lum + 1;
  1232 		Cb = lum - 1;
  1233 		break;
  1234 	    case SDL_YVYU_OVERLAY:
  1235 		lum = overlay->pixels[0];
  1236 		Cr = lum + 1;
  1237 		Cb = lum + 3;
  1238 		break;
  1239 	    default:
  1240 		SDL_SetError("Unsupported YUV format in blit");
  1241 		return(-1);
  1242 	}
  1243 	if ( SDL_MUSTLOCK(display) ) {
  1244         	if ( SDL_LockSurface(display) < 0 ) {
  1245 			return(-1);
  1246 		}
  1247 	}
  1248 	if ( stretch ) {
  1249 		dst = (Uint8 *)stretch->pixels;
  1250 	} else {
  1251 		dst = (Uint8 *)display->pixels
  1252 			+ dstrect->x * display->format->BytesPerPixel
  1253 			+ dstrect->y * display->pitch;
  1254 	}
  1255 	mod = (display->pitch / display->format->BytesPerPixel);
  1256 
  1257 	if ( scale_2x ) {
  1258 		mod -= (overlay->w * 2);
  1259 		swdata->Display2X(swdata->colortab, swdata->rgb_2_pix,
  1260 		                  lum, Cr, Cb, dst, overlay->h, overlay->w,mod);
  1261 	} else {
  1262 		mod -= overlay->w;
  1263 		swdata->Display1X(swdata->colortab, swdata->rgb_2_pix,
  1264 		                  lum, Cr, Cb, dst, overlay->h, overlay->w,mod);
  1265 	}
  1266 	if ( SDL_MUSTLOCK(display) ) {
  1267 		SDL_UnlockSurface(display);
  1268 	}
  1269 	if ( stretch ) {
  1270 		display = swdata->display;
  1271 		SDL_SoftStretch(stretch, NULL, display, dstrect);
  1272 	}
  1273 	SDL_UpdateRects(display, 1, dstrect);
  1274 
  1275 	return(0);
  1276 }
  1277 
  1278 void SDL_FreeYUV_SW(_THIS, SDL_Overlay *overlay)
  1279 {
  1280 	struct private_yuvhwdata *swdata;
  1281 
  1282 	swdata = overlay->hwdata;
  1283 	if ( swdata ) {
  1284 		if ( swdata->stretch ) {
  1285 			SDL_FreeSurface(swdata->stretch);
  1286 		}
  1287 		if ( swdata->pixels ) {
  1288 			free(swdata->pixels);
  1289 		}
  1290 		if ( swdata->colortab ) {
  1291 			free(swdata->colortab);
  1292 		}
  1293 		if ( swdata->rgb_2_pix ) {
  1294 			free(swdata->rgb_2_pix);
  1295 		}
  1296 		free(swdata);
  1297 	}
  1298 }