src/video/SDL_yuv_sw.c
author Sam Lantinga <slouken@libsdl.org>
Fri, 10 Feb 2006 06:48:43 +0000
changeset 1358 c71e05b4dc2e
parent 1338 604d73db6802
child 1361 19418e4422cb
permissions -rw-r--r--
More header massaging... works great on Windows. ;-)
     1 /*
     2     SDL - Simple DirectMedia Layer
     3     Copyright (C) 1997-2006 Sam Lantinga
     4 
     5     This library is free software; you can redistribute it and/or
     6     modify it under the terms of the GNU Lesser General Public
     7     License as published by the Free Software Foundation; either
     8     version 2.1 of the License, or (at your option) any later version.
     9 
    10     This library is distributed in the hope that it will be useful,
    11     but WITHOUT ANY WARRANTY; without even the implied warranty of
    12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    13     Lesser General Public License for more details.
    14 
    15     You should have received a copy of the GNU Lesser General Public
    16     License along with this library; if not, write to the Free Software
    17     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
    18 
    19     Sam Lantinga
    20     slouken@libsdl.org
    21 */
    22 
    23 /* This is the software implementation of the YUV video overlay support */
    24 
    25 /* This code was derived from code carrying the following copyright notices:
    26 
    27  * Copyright (c) 1995 The Regents of the University of California.
    28  * All rights reserved.
    29  * 
    30  * Permission to use, copy, modify, and distribute this software and its
    31  * documentation for any purpose, without fee, and without written agreement is
    32  * hereby granted, provided that the above copyright notice and the following
    33  * two paragraphs appear in all copies of this software.
    34  * 
    35  * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR
    36  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
    37  * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF
    38  * CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    39  * 
    40  * THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES,
    41  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
    42  * AND FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
    43  * ON AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATION TO
    44  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
    45 
    46  * Copyright (c) 1995 Erik Corry
    47  * All rights reserved.
    48  * 
    49  * Permission to use, copy, modify, and distribute this software and its
    50  * documentation for any purpose, without fee, and without written agreement is
    51  * hereby granted, provided that the above copyright notice and the following
    52  * two paragraphs appear in all copies of this software.
    53  * 
    54  * IN NO EVENT SHALL ERIK CORRY BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
    55  * SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OF
    56  * THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF ERIK CORRY HAS BEEN ADVISED
    57  * OF THE POSSIBILITY OF SUCH DAMAGE.
    58  * 
    59  * ERIK CORRY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
    60  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
    61  * PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
    62  * BASIS, AND ERIK CORRY HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT,
    63  * UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
    64 
    65  * Portions of this software Copyright (c) 1995 Brown University.
    66  * All rights reserved.
    67  * 
    68  * Permission to use, copy, modify, and distribute this software and its
    69  * documentation for any purpose, without fee, and without written agreement
    70  * is hereby granted, provided that the above copyright notice and the
    71  * following two paragraphs appear in all copies of this software.
    72  * 
    73  * IN NO EVENT SHALL BROWN UNIVERSITY BE LIABLE TO ANY PARTY FOR
    74  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
    75  * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF BROWN
    76  * UNIVERSITY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    77  * 
    78  * BROWN UNIVERSITY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
    79  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
    80  * PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
    81  * BASIS, AND BROWN UNIVERSITY HAS NO OBLIGATION TO PROVIDE MAINTENANCE,
    82  * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
    83  */
    84 
    85 #include "SDL_video.h"
    86 #include "SDL_cpuinfo.h"
    87 #include "SDL_stretch_c.h"
    88 #include "SDL_yuvfuncs.h"
    89 #include "SDL_yuv_sw_c.h"
    90 
    91 /* The functions used to manipulate software video overlays */
    92 static struct private_yuvhwfuncs sw_yuvfuncs = {
    93 	SDL_LockYUV_SW,
    94 	SDL_UnlockYUV_SW,
    95 	SDL_DisplayYUV_SW,
    96 	SDL_FreeYUV_SW
    97 };
    98 
    99 /* RGB conversion lookup tables */
   100 struct private_yuvhwdata {
   101 	SDL_Surface *stretch;
   102 	SDL_Surface *display;
   103 	Uint8 *pixels;
   104 	int *colortab;
   105 	Uint32 *rgb_2_pix;
   106 	void (*Display1X)(int *colortab, Uint32 *rgb_2_pix,
   107                           unsigned char *lum, unsigned char *cr,
   108                           unsigned char *cb, unsigned char *out,
   109                           int rows, int cols, int mod );
   110 	void (*Display2X)(int *colortab, Uint32 *rgb_2_pix,
   111 	                  unsigned char *lum, unsigned char *cr,
   112                           unsigned char *cb, unsigned char *out,
   113                           int rows, int cols, int mod );
   114 
   115 	/* These are just so we don't have to allocate them separately */
   116 	Uint16 pitches[3];
   117 	Uint8 *planes[3];
   118 };
   119 
   120 
   121 /* The colorspace conversion functions */
   122 
   123 extern void Color565DitherYV12MMX1X( int *colortab, Uint32 *rgb_2_pix,
   124                                      unsigned char *lum, unsigned char *cr,
   125                                      unsigned char *cb, unsigned char *out,
   126                                      int rows, int cols, int mod );
   127 extern void ColorRGBDitherYV12MMX1X( int *colortab, Uint32 *rgb_2_pix,
   128                                      unsigned char *lum, unsigned char *cr,
   129                                      unsigned char *cb, unsigned char *out,
   130                                      int rows, int cols, int mod );
   131 
   132 static void Color16DitherYV12Mod1X( int *colortab, Uint32 *rgb_2_pix,
   133                                     unsigned char *lum, unsigned char *cr,
   134                                     unsigned char *cb, unsigned char *out,
   135                                     int rows, int cols, int mod )
   136 {
   137     unsigned short* row1;
   138     unsigned short* row2;
   139     unsigned char* lum2;
   140     int x, y;
   141     int cr_r;
   142     int crb_g;
   143     int cb_b;
   144     int cols_2 = cols / 2;
   145 
   146     row1 = (unsigned short*) out;
   147     row2 = row1 + cols + mod;
   148     lum2 = lum + cols;
   149 
   150     mod += cols + mod;
   151 
   152     y = rows / 2;
   153     while( y-- )
   154     {
   155         x = cols_2;
   156         while( x-- )
   157         {
   158             register int L;
   159 
   160             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   161             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   162                                + colortab[ *cb + 2*256 ];
   163             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   164             ++cr; ++cb;
   165 
   166             L = *lum++;
   167             *row1++ = (rgb_2_pix[ L + cr_r ] |
   168                        rgb_2_pix[ L + crb_g ] |
   169                        rgb_2_pix[ L + cb_b ]);
   170 
   171             L = *lum++;
   172             *row1++ = (rgb_2_pix[ L + cr_r ] |
   173                        rgb_2_pix[ L + crb_g ] |
   174                        rgb_2_pix[ L + cb_b ]);
   175 
   176 
   177             /* Now, do second row.  */
   178 
   179             L = *lum2++;
   180             *row2++ = (rgb_2_pix[ L + cr_r ] |
   181                        rgb_2_pix[ L + crb_g ] |
   182                        rgb_2_pix[ L + cb_b ]);
   183 
   184             L = *lum2++;
   185             *row2++ = (rgb_2_pix[ L + cr_r ] |
   186                        rgb_2_pix[ L + crb_g ] |
   187                        rgb_2_pix[ L + cb_b ]);
   188         }
   189 
   190         /*
   191          * These values are at the start of the next line, (due
   192          * to the ++'s above),but they need to be at the start
   193          * of the line after that.
   194          */
   195         lum  += cols;
   196         lum2 += cols;
   197         row1 += mod;
   198         row2 += mod;
   199     }
   200 }
   201 
   202 static void Color24DitherYV12Mod1X( int *colortab, Uint32 *rgb_2_pix,
   203                                     unsigned char *lum, unsigned char *cr,
   204                                     unsigned char *cb, unsigned char *out,
   205                                     int rows, int cols, int mod )
   206 {
   207     unsigned int value;
   208     unsigned char* row1;
   209     unsigned char* row2;
   210     unsigned char* lum2;
   211     int x, y;
   212     int cr_r;
   213     int crb_g;
   214     int cb_b;
   215     int cols_2 = cols / 2;
   216 
   217     row1 = out;
   218     row2 = row1 + cols*3 + mod*3;
   219     lum2 = lum + cols;
   220 
   221     mod += cols + mod;
   222     mod *= 3;
   223 
   224     y = rows / 2;
   225     while( y-- )
   226     {
   227         x = cols_2;
   228         while( x-- )
   229         {
   230             register int L;
   231 
   232             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   233             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   234                                + colortab[ *cb + 2*256 ];
   235             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   236             ++cr; ++cb;
   237 
   238             L = *lum++;
   239             value = (rgb_2_pix[ L + cr_r ] |
   240                      rgb_2_pix[ L + crb_g ] |
   241                      rgb_2_pix[ L + cb_b ]);
   242             *row1++ = (value      ) & 0xFF;
   243             *row1++ = (value >>  8) & 0xFF;
   244             *row1++ = (value >> 16) & 0xFF;
   245 
   246             L = *lum++;
   247             value = (rgb_2_pix[ L + cr_r ] |
   248                      rgb_2_pix[ L + crb_g ] |
   249                      rgb_2_pix[ L + cb_b ]);
   250             *row1++ = (value      ) & 0xFF;
   251             *row1++ = (value >>  8) & 0xFF;
   252             *row1++ = (value >> 16) & 0xFF;
   253 
   254 
   255             /* Now, do second row.  */
   256 
   257             L = *lum2++;
   258             value = (rgb_2_pix[ L + cr_r ] |
   259                      rgb_2_pix[ L + crb_g ] |
   260                      rgb_2_pix[ L + cb_b ]);
   261             *row2++ = (value      ) & 0xFF;
   262             *row2++ = (value >>  8) & 0xFF;
   263             *row2++ = (value >> 16) & 0xFF;
   264 
   265             L = *lum2++;
   266             value = (rgb_2_pix[ L + cr_r ] |
   267                      rgb_2_pix[ L + crb_g ] |
   268                      rgb_2_pix[ L + cb_b ]);
   269             *row2++ = (value      ) & 0xFF;
   270             *row2++ = (value >>  8) & 0xFF;
   271             *row2++ = (value >> 16) & 0xFF;
   272         }
   273 
   274         /*
   275          * These values are at the start of the next line, (due
   276          * to the ++'s above),but they need to be at the start
   277          * of the line after that.
   278          */
   279         lum  += cols;
   280         lum2 += cols;
   281         row1 += mod;
   282         row2 += mod;
   283     }
   284 }
   285 
   286 static void Color32DitherYV12Mod1X( int *colortab, Uint32 *rgb_2_pix,
   287                                     unsigned char *lum, unsigned char *cr,
   288                                     unsigned char *cb, unsigned char *out,
   289                                     int rows, int cols, int mod )
   290 {
   291     unsigned int* row1;
   292     unsigned int* row2;
   293     unsigned char* lum2;
   294     int x, y;
   295     int cr_r;
   296     int crb_g;
   297     int cb_b;
   298     int cols_2 = cols / 2;
   299 
   300     row1 = (unsigned int*) out;
   301     row2 = row1 + cols + mod;
   302     lum2 = lum + cols;
   303 
   304     mod += cols + mod;
   305 
   306     y = rows / 2;
   307     while( y-- )
   308     {
   309         x = cols_2;
   310         while( x-- )
   311         {
   312             register int L;
   313 
   314             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   315             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   316                                + colortab[ *cb + 2*256 ];
   317             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   318             ++cr; ++cb;
   319 
   320             L = *lum++;
   321             *row1++ = (rgb_2_pix[ L + cr_r ] |
   322                        rgb_2_pix[ L + crb_g ] |
   323                        rgb_2_pix[ L + cb_b ]);
   324 
   325             L = *lum++;
   326             *row1++ = (rgb_2_pix[ L + cr_r ] |
   327                        rgb_2_pix[ L + crb_g ] |
   328                        rgb_2_pix[ L + cb_b ]);
   329 
   330 
   331             /* Now, do second row.  */
   332 
   333             L = *lum2++;
   334             *row2++ = (rgb_2_pix[ L + cr_r ] |
   335                        rgb_2_pix[ L + crb_g ] |
   336                        rgb_2_pix[ L + cb_b ]);
   337 
   338             L = *lum2++;
   339             *row2++ = (rgb_2_pix[ L + cr_r ] |
   340                        rgb_2_pix[ L + crb_g ] |
   341                        rgb_2_pix[ L + cb_b ]);
   342         }
   343 
   344         /*
   345          * These values are at the start of the next line, (due
   346          * to the ++'s above),but they need to be at the start
   347          * of the line after that.
   348          */
   349         lum  += cols;
   350         lum2 += cols;
   351         row1 += mod;
   352         row2 += mod;
   353     }
   354 }
   355 
   356 /*
   357  * In this function I make use of a nasty trick. The tables have the lower
   358  * 16 bits replicated in the upper 16. This means I can write ints and get
   359  * the horisontal doubling for free (almost).
   360  */
   361 static void Color16DitherYV12Mod2X( int *colortab, Uint32 *rgb_2_pix,
   362                                     unsigned char *lum, unsigned char *cr,
   363                                     unsigned char *cb, unsigned char *out,
   364                                     int rows, int cols, int mod )
   365 {
   366     unsigned int* row1 = (unsigned int*) out;
   367     const int next_row = cols+(mod/2);
   368     unsigned int* row2 = row1 + 2*next_row;
   369     unsigned char* lum2;
   370     int x, y;
   371     int cr_r;
   372     int crb_g;
   373     int cb_b;
   374     int cols_2 = cols / 2;
   375 
   376     lum2 = lum + cols;
   377 
   378     mod = (next_row * 3) + (mod/2);
   379 
   380     y = rows / 2;
   381     while( y-- )
   382     {
   383         x = cols_2;
   384         while( x-- )
   385         {
   386             register int L;
   387 
   388             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   389             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   390                                + colortab[ *cb + 2*256 ];
   391             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   392             ++cr; ++cb;
   393 
   394             L = *lum++;
   395             row1[0] = row1[next_row] = (rgb_2_pix[ L + cr_r ] |
   396                                         rgb_2_pix[ L + crb_g ] |
   397                                         rgb_2_pix[ L + cb_b ]);
   398             row1++;
   399 
   400             L = *lum++;
   401             row1[0] = row1[next_row] = (rgb_2_pix[ L + cr_r ] |
   402                                         rgb_2_pix[ L + crb_g ] |
   403                                         rgb_2_pix[ L + cb_b ]);
   404             row1++;
   405 
   406 
   407             /* Now, do second row. */
   408 
   409             L = *lum2++;
   410             row2[0] = row2[next_row] = (rgb_2_pix[ L + cr_r ] |
   411                                         rgb_2_pix[ L + crb_g ] |
   412                                         rgb_2_pix[ L + cb_b ]);
   413             row2++;
   414 
   415             L = *lum2++;
   416             row2[0] = row2[next_row] = (rgb_2_pix[ L + cr_r ] |
   417                                         rgb_2_pix[ L + crb_g ] |
   418                                         rgb_2_pix[ L + cb_b ]);
   419             row2++;
   420         }
   421 
   422         /*
   423          * These values are at the start of the next line, (due
   424          * to the ++'s above),but they need to be at the start
   425          * of the line after that.
   426          */
   427         lum  += cols;
   428         lum2 += cols;
   429         row1 += mod;
   430         row2 += mod;
   431     }
   432 }
   433 
   434 static void Color24DitherYV12Mod2X( int *colortab, Uint32 *rgb_2_pix,
   435                                     unsigned char *lum, unsigned char *cr,
   436                                     unsigned char *cb, unsigned char *out,
   437                                     int rows, int cols, int mod )
   438 {
   439     unsigned int value;
   440     unsigned char* row1 = out;
   441     const int next_row = (cols*2 + mod) * 3;
   442     unsigned char* row2 = row1 + 2*next_row;
   443     unsigned char* lum2;
   444     int x, y;
   445     int cr_r;
   446     int crb_g;
   447     int cb_b;
   448     int cols_2 = cols / 2;
   449 
   450     lum2 = lum + cols;
   451 
   452     mod = next_row*3 + mod*3;
   453 
   454     y = rows / 2;
   455     while( y-- )
   456     {
   457         x = cols_2;
   458         while( x-- )
   459         {
   460             register int L;
   461 
   462             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   463             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   464                                + colortab[ *cb + 2*256 ];
   465             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   466             ++cr; ++cb;
   467 
   468             L = *lum++;
   469             value = (rgb_2_pix[ L + cr_r ] |
   470                      rgb_2_pix[ L + crb_g ] |
   471                      rgb_2_pix[ L + cb_b ]);
   472             row1[0+0] = row1[3+0] = row1[next_row+0] = row1[next_row+3+0] =
   473                      (value      ) & 0xFF;
   474             row1[0+1] = row1[3+1] = row1[next_row+1] = row1[next_row+3+1] =
   475                      (value >>  8) & 0xFF;
   476             row1[0+2] = row1[3+2] = row1[next_row+2] = row1[next_row+3+2] =
   477                      (value >> 16) & 0xFF;
   478             row1 += 2*3;
   479 
   480             L = *lum++;
   481             value = (rgb_2_pix[ L + cr_r ] |
   482                      rgb_2_pix[ L + crb_g ] |
   483                      rgb_2_pix[ L + cb_b ]);
   484             row1[0+0] = row1[3+0] = row1[next_row+0] = row1[next_row+3+0] =
   485                      (value      ) & 0xFF;
   486             row1[0+1] = row1[3+1] = row1[next_row+1] = row1[next_row+3+1] =
   487                      (value >>  8) & 0xFF;
   488             row1[0+2] = row1[3+2] = row1[next_row+2] = row1[next_row+3+2] =
   489                      (value >> 16) & 0xFF;
   490             row1 += 2*3;
   491 
   492 
   493             /* Now, do second row. */
   494 
   495             L = *lum2++;
   496             value = (rgb_2_pix[ L + cr_r ] |
   497                      rgb_2_pix[ L + crb_g ] |
   498                      rgb_2_pix[ L + cb_b ]);
   499             row2[0+0] = row2[3+0] = row2[next_row+0] = row2[next_row+3+0] =
   500                      (value      ) & 0xFF;
   501             row2[0+1] = row2[3+1] = row2[next_row+1] = row2[next_row+3+1] =
   502                      (value >>  8) & 0xFF;
   503             row2[0+2] = row2[3+2] = row2[next_row+2] = row2[next_row+3+2] =
   504                      (value >> 16) & 0xFF;
   505             row2 += 2*3;
   506 
   507             L = *lum2++;
   508             value = (rgb_2_pix[ L + cr_r ] |
   509                      rgb_2_pix[ L + crb_g ] |
   510                      rgb_2_pix[ L + cb_b ]);
   511             row2[0+0] = row2[3+0] = row2[next_row+0] = row2[next_row+3+0] =
   512                      (value      ) & 0xFF;
   513             row2[0+1] = row2[3+1] = row2[next_row+1] = row2[next_row+3+1] =
   514                      (value >>  8) & 0xFF;
   515             row2[0+2] = row2[3+2] = row2[next_row+2] = row2[next_row+3+2] =
   516                      (value >> 16) & 0xFF;
   517             row2 += 2*3;
   518         }
   519 
   520         /*
   521          * These values are at the start of the next line, (due
   522          * to the ++'s above),but they need to be at the start
   523          * of the line after that.
   524          */
   525         lum  += cols;
   526         lum2 += cols;
   527         row1 += mod;
   528         row2 += mod;
   529     }
   530 }
   531 
   532 static void Color32DitherYV12Mod2X( int *colortab, Uint32 *rgb_2_pix,
   533                                     unsigned char *lum, unsigned char *cr,
   534                                     unsigned char *cb, unsigned char *out,
   535                                     int rows, int cols, int mod )
   536 {
   537     unsigned int* row1 = (unsigned int*) out;
   538     const int next_row = cols*2+mod;
   539     unsigned int* row2 = row1 + 2*next_row;
   540     unsigned char* lum2;
   541     int x, y;
   542     int cr_r;
   543     int crb_g;
   544     int cb_b;
   545     int cols_2 = cols / 2;
   546 
   547     lum2 = lum + cols;
   548 
   549     mod = (next_row * 3) + mod;
   550 
   551     y = rows / 2;
   552     while( y-- )
   553     {
   554         x = cols_2;
   555         while( x-- )
   556         {
   557             register int L;
   558 
   559             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   560             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   561                                + colortab[ *cb + 2*256 ];
   562             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   563             ++cr; ++cb;
   564 
   565             L = *lum++;
   566             row1[0] = row1[1] = row1[next_row] = row1[next_row+1] =
   567                                        (rgb_2_pix[ L + cr_r ] |
   568                                         rgb_2_pix[ L + crb_g ] |
   569                                         rgb_2_pix[ L + cb_b ]);
   570             row1 += 2;
   571 
   572             L = *lum++;
   573             row1[0] = row1[1] = row1[next_row] = row1[next_row+1] =
   574                                        (rgb_2_pix[ L + cr_r ] |
   575                                         rgb_2_pix[ L + crb_g ] |
   576                                         rgb_2_pix[ L + cb_b ]);
   577             row1 += 2;
   578 
   579 
   580             /* Now, do second row. */
   581 
   582             L = *lum2++;
   583             row2[0] = row2[1] = row2[next_row] = row2[next_row+1] =
   584                                        (rgb_2_pix[ L + cr_r ] |
   585                                         rgb_2_pix[ L + crb_g ] |
   586                                         rgb_2_pix[ L + cb_b ]);
   587             row2 += 2;
   588 
   589             L = *lum2++;
   590             row2[0] = row2[1] = row2[next_row] = row2[next_row+1] =
   591                                        (rgb_2_pix[ L + cr_r ] |
   592                                         rgb_2_pix[ L + crb_g ] |
   593                                         rgb_2_pix[ L + cb_b ]);
   594             row2 += 2;
   595         }
   596 
   597         /*
   598          * These values are at the start of the next line, (due
   599          * to the ++'s above),but they need to be at the start
   600          * of the line after that.
   601          */
   602         lum  += cols;
   603         lum2 += cols;
   604         row1 += mod;
   605         row2 += mod;
   606     }
   607 }
   608 
   609 static void Color16DitherYUY2Mod1X( int *colortab, Uint32 *rgb_2_pix,
   610                                     unsigned char *lum, unsigned char *cr,
   611                                     unsigned char *cb, unsigned char *out,
   612                                     int rows, int cols, int mod )
   613 {
   614     unsigned short* row;
   615     int x, y;
   616     int cr_r;
   617     int crb_g;
   618     int cb_b;
   619     int cols_2 = cols / 2;
   620 
   621     row = (unsigned short*) out;
   622 
   623     y = rows;
   624     while( y-- )
   625     {
   626         x = cols_2;
   627         while( x-- )
   628         {
   629             register int L;
   630 
   631             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   632             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   633                                + colortab[ *cb + 2*256 ];
   634             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   635             cr += 4; cb += 4;
   636 
   637             L = *lum; lum += 2;
   638             *row++ = (rgb_2_pix[ L + cr_r ] |
   639                        rgb_2_pix[ L + crb_g ] |
   640                        rgb_2_pix[ L + cb_b ]);
   641 
   642             L = *lum; lum += 2;
   643             *row++ = (rgb_2_pix[ L + cr_r ] |
   644                        rgb_2_pix[ L + crb_g ] |
   645                        rgb_2_pix[ L + cb_b ]);
   646 
   647         }
   648 
   649         row += mod;
   650     }
   651 }
   652 
   653 static void Color24DitherYUY2Mod1X( int *colortab, Uint32 *rgb_2_pix,
   654                                     unsigned char *lum, unsigned char *cr,
   655                                     unsigned char *cb, unsigned char *out,
   656                                     int rows, int cols, int mod )
   657 {
   658     unsigned int value;
   659     unsigned char* row;
   660     int x, y;
   661     int cr_r;
   662     int crb_g;
   663     int cb_b;
   664     int cols_2 = cols / 2;
   665 
   666     row = (unsigned char*) out;
   667     mod *= 3;
   668     y = rows;
   669     while( y-- )
   670     {
   671         x = cols_2;
   672         while( x-- )
   673         {
   674             register int L;
   675 
   676             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   677             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   678                                + colortab[ *cb + 2*256 ];
   679             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   680             cr += 4; cb += 4;
   681 
   682             L = *lum; lum += 2;
   683             value = (rgb_2_pix[ L + cr_r ] |
   684                      rgb_2_pix[ L + crb_g ] |
   685                      rgb_2_pix[ L + cb_b ]);
   686             *row++ = (value      ) & 0xFF;
   687             *row++ = (value >>  8) & 0xFF;
   688             *row++ = (value >> 16) & 0xFF;
   689 
   690             L = *lum; lum += 2;
   691             value = (rgb_2_pix[ L + cr_r ] |
   692                      rgb_2_pix[ L + crb_g ] |
   693                      rgb_2_pix[ L + cb_b ]);
   694             *row++ = (value      ) & 0xFF;
   695             *row++ = (value >>  8) & 0xFF;
   696             *row++ = (value >> 16) & 0xFF;
   697 
   698         }
   699         row += mod;
   700     }
   701 }
   702 
   703 static void Color32DitherYUY2Mod1X( int *colortab, Uint32 *rgb_2_pix,
   704                                     unsigned char *lum, unsigned char *cr,
   705                                     unsigned char *cb, unsigned char *out,
   706                                     int rows, int cols, int mod )
   707 {
   708     unsigned int* row;
   709     int x, y;
   710     int cr_r;
   711     int crb_g;
   712     int cb_b;
   713     int cols_2 = cols / 2;
   714 
   715     row = (unsigned int*) out;
   716     y = rows;
   717     while( y-- )
   718     {
   719         x = cols_2;
   720         while( x-- )
   721         {
   722             register int L;
   723 
   724             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   725             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   726                                + colortab[ *cb + 2*256 ];
   727             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   728             cr += 4; cb += 4;
   729 
   730             L = *lum; lum += 2;
   731             *row++ = (rgb_2_pix[ L + cr_r ] |
   732                        rgb_2_pix[ L + crb_g ] |
   733                        rgb_2_pix[ L + cb_b ]);
   734 
   735             L = *lum; lum += 2;
   736             *row++ = (rgb_2_pix[ L + cr_r ] |
   737                        rgb_2_pix[ L + crb_g ] |
   738                        rgb_2_pix[ L + cb_b ]);
   739 
   740 
   741         }
   742         row += mod;
   743     }
   744 }
   745 
   746 /*
   747  * In this function I make use of a nasty trick. The tables have the lower
   748  * 16 bits replicated in the upper 16. This means I can write ints and get
   749  * the horisontal doubling for free (almost).
   750  */
   751 static void Color16DitherYUY2Mod2X( int *colortab, Uint32 *rgb_2_pix,
   752                                     unsigned char *lum, unsigned char *cr,
   753                                     unsigned char *cb, unsigned char *out,
   754                                     int rows, int cols, int mod )
   755 {
   756     unsigned int* row = (unsigned int*) out;
   757     const int next_row = cols+(mod/2);
   758     int x, y;
   759     int cr_r;
   760     int crb_g;
   761     int cb_b;
   762     int cols_2 = cols / 2;
   763 
   764     y = rows;
   765     while( y-- )
   766     {
   767         x = cols_2;
   768         while( x-- )
   769         {
   770             register int L;
   771 
   772             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   773             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   774                                + colortab[ *cb + 2*256 ];
   775             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   776             cr += 4; cb += 4;
   777 
   778             L = *lum; lum += 2;
   779             row[0] = row[next_row] = (rgb_2_pix[ L + cr_r ] |
   780                                         rgb_2_pix[ L + crb_g ] |
   781                                         rgb_2_pix[ L + cb_b ]);
   782             row++;
   783 
   784             L = *lum; lum += 2;
   785             row[0] = row[next_row] = (rgb_2_pix[ L + cr_r ] |
   786                                         rgb_2_pix[ L + crb_g ] |
   787                                         rgb_2_pix[ L + cb_b ]);
   788             row++;
   789 
   790         }
   791         row += next_row;
   792     }
   793 }
   794 
   795 static void Color24DitherYUY2Mod2X( int *colortab, Uint32 *rgb_2_pix,
   796                                     unsigned char *lum, unsigned char *cr,
   797                                     unsigned char *cb, unsigned char *out,
   798                                     int rows, int cols, int mod )
   799 {
   800     unsigned int value;
   801     unsigned char* row = out;
   802     const int next_row = (cols*2 + mod) * 3;
   803     int x, y;
   804     int cr_r;
   805     int crb_g;
   806     int cb_b;
   807     int cols_2 = cols / 2;
   808     y = rows;
   809     while( y-- )
   810     {
   811         x = cols_2;
   812         while( x-- )
   813         {
   814             register int L;
   815 
   816             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   817             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   818                                + colortab[ *cb + 2*256 ];
   819             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   820             cr += 4; cb += 4;
   821 
   822             L = *lum; lum += 2;
   823             value = (rgb_2_pix[ L + cr_r ] |
   824                      rgb_2_pix[ L + crb_g ] |
   825                      rgb_2_pix[ L + cb_b ]);
   826             row[0+0] = row[3+0] = row[next_row+0] = row[next_row+3+0] =
   827                      (value      ) & 0xFF;
   828             row[0+1] = row[3+1] = row[next_row+1] = row[next_row+3+1] =
   829                      (value >>  8) & 0xFF;
   830             row[0+2] = row[3+2] = row[next_row+2] = row[next_row+3+2] =
   831                      (value >> 16) & 0xFF;
   832             row += 2*3;
   833 
   834             L = *lum; lum += 2;
   835             value = (rgb_2_pix[ L + cr_r ] |
   836                      rgb_2_pix[ L + crb_g ] |
   837                      rgb_2_pix[ L + cb_b ]);
   838             row[0+0] = row[3+0] = row[next_row+0] = row[next_row+3+0] =
   839                      (value      ) & 0xFF;
   840             row[0+1] = row[3+1] = row[next_row+1] = row[next_row+3+1] =
   841                      (value >>  8) & 0xFF;
   842             row[0+2] = row[3+2] = row[next_row+2] = row[next_row+3+2] =
   843                      (value >> 16) & 0xFF;
   844             row += 2*3;
   845 
   846         }
   847         row += next_row;
   848     }
   849 }
   850 
   851 static void Color32DitherYUY2Mod2X( int *colortab, Uint32 *rgb_2_pix,
   852                                     unsigned char *lum, unsigned char *cr,
   853                                     unsigned char *cb, unsigned char *out,
   854                                     int rows, int cols, int mod )
   855 {
   856     unsigned int* row = (unsigned int*) out;
   857     const int next_row = cols*2+mod;
   858     int x, y;
   859     int cr_r;
   860     int crb_g;
   861     int cb_b;
   862     int cols_2 = cols / 2;
   863     mod+=mod;
   864     y = rows;
   865     while( y-- )
   866     {
   867         x = cols_2;
   868         while( x-- )
   869         {
   870             register int L;
   871 
   872             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   873             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   874                                + colortab[ *cb + 2*256 ];
   875             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   876             cr += 4; cb += 4;
   877 
   878             L = *lum; lum += 2;
   879             row[0] = row[1] = row[next_row] = row[next_row+1] =
   880                                        (rgb_2_pix[ L + cr_r ] |
   881                                         rgb_2_pix[ L + crb_g ] |
   882                                         rgb_2_pix[ L + cb_b ]);
   883             row += 2;
   884 
   885             L = *lum; lum += 2;
   886             row[0] = row[1] = row[next_row] = row[next_row+1] =
   887                                        (rgb_2_pix[ L + cr_r ] |
   888                                         rgb_2_pix[ L + crb_g ] |
   889                                         rgb_2_pix[ L + cb_b ]);
   890             row += 2;
   891 
   892 
   893         }
   894 
   895         row += next_row;
   896     }
   897 }
   898 
   899 /*
   900  * How many 1 bits are there in the Uint32.
   901  * Low performance, do not call often.
   902  */
   903 static int number_of_bits_set( Uint32 a )
   904 {
   905     if(!a) return 0;
   906     if(a & 1) return 1 + number_of_bits_set(a >> 1);
   907     return(number_of_bits_set(a >> 1));
   908 }
   909 
   910 /*
   911  * How many 0 bits are there at least significant end of Uint32.
   912  * Low performance, do not call often.
   913  */
   914 static int free_bits_at_bottom( Uint32 a )
   915 {
   916       /* assume char is 8 bits */
   917     if(!a) return sizeof(Uint32) * 8;
   918     if(((Sint32)a) & 1l) return 0;
   919     return 1 + free_bits_at_bottom ( a >> 1);
   920 }
   921 
   922 
   923 SDL_Overlay *SDL_CreateYUV_SW(_THIS, int width, int height, Uint32 format, SDL_Surface *display)
   924 {
   925 	SDL_Overlay *overlay;
   926 	struct private_yuvhwdata *swdata;
   927 	int *Cr_r_tab;
   928 	int *Cr_g_tab;
   929 	int *Cb_g_tab;
   930 	int *Cb_b_tab;
   931 	Uint32 *r_2_pix_alloc;
   932 	Uint32 *g_2_pix_alloc;
   933 	Uint32 *b_2_pix_alloc;
   934 	int i;
   935 	int CR, CB;
   936 	Uint32 Rmask, Gmask, Bmask;
   937 
   938 	/* Only RGB packed pixel conversion supported */
   939 	if ( (display->format->BytesPerPixel != 2) &&
   940 	     (display->format->BytesPerPixel != 3) &&
   941 	     (display->format->BytesPerPixel != 4) ) {
   942 		SDL_SetError("Can't use YUV data on non 16/24/32 bit surfaces");
   943 		return(NULL);
   944 	}
   945 
   946 	/* Verify that we support the format */
   947 	switch (format) {
   948 	    case SDL_YV12_OVERLAY:
   949 	    case SDL_IYUV_OVERLAY:
   950 	    case SDL_YUY2_OVERLAY:
   951 	    case SDL_UYVY_OVERLAY:
   952 	    case SDL_YVYU_OVERLAY:
   953 		break;
   954 	    default:
   955 		SDL_SetError("Unsupported YUV format");
   956 		return(NULL);
   957 	}
   958 
   959 	/* Create the overlay structure */
   960 	overlay = (SDL_Overlay *)SDL_malloc(sizeof *overlay);
   961 	if ( overlay == NULL ) {
   962 		SDL_OutOfMemory();
   963 		return(NULL);
   964 	}
   965 	SDL_memset(overlay, 0, (sizeof *overlay));
   966 
   967 	/* Fill in the basic members */
   968 	overlay->format = format;
   969 	overlay->w = width;
   970 	overlay->h = height;
   971 
   972 	/* Set up the YUV surface function structure */
   973 	overlay->hwfuncs = &sw_yuvfuncs;
   974 
   975 	/* Create the pixel data and lookup tables */
   976 	swdata = (struct private_yuvhwdata *)SDL_malloc(sizeof *swdata);
   977 	overlay->hwdata = swdata;
   978 	if ( swdata == NULL ) {
   979 		SDL_OutOfMemory();
   980 		SDL_FreeYUVOverlay(overlay);
   981 		return(NULL);
   982 	}
   983 	swdata->stretch = NULL;
   984 	swdata->display = display;
   985 	swdata->pixels = (Uint8 *) SDL_malloc(width*height*2);
   986 	swdata->colortab = (int *)SDL_malloc(4*256*sizeof(int));
   987 	Cr_r_tab = &swdata->colortab[0*256];
   988 	Cr_g_tab = &swdata->colortab[1*256];
   989 	Cb_g_tab = &swdata->colortab[2*256];
   990 	Cb_b_tab = &swdata->colortab[3*256];
   991 	swdata->rgb_2_pix = (Uint32 *)SDL_malloc(3*768*sizeof(Uint32));
   992 	r_2_pix_alloc = &swdata->rgb_2_pix[0*768];
   993 	g_2_pix_alloc = &swdata->rgb_2_pix[1*768];
   994 	b_2_pix_alloc = &swdata->rgb_2_pix[2*768];
   995 	if ( ! swdata->pixels || ! swdata->colortab || ! swdata->rgb_2_pix ) {
   996 		SDL_OutOfMemory();
   997 		SDL_FreeYUVOverlay(overlay);
   998 		return(NULL);
   999 	}
  1000 
  1001 	/* Generate the tables for the display surface */
  1002 	for (i=0; i<256; i++) {
  1003 		/* Gamma correction (luminescence table) and chroma correction
  1004 		   would be done here.  See the Berkeley mpeg_play sources.
  1005 		*/
  1006 		CB = CR = (i-128);
  1007 		Cr_r_tab[i] = (int) ( (0.419/0.299) * CR);
  1008 		Cr_g_tab[i] = (int) (-(0.299/0.419) * CR);
  1009 		Cb_g_tab[i] = (int) (-(0.114/0.331) * CB); 
  1010 		Cb_b_tab[i] = (int) ( (0.587/0.331) * CB);
  1011 	}
  1012 
  1013 	/* 
  1014 	 * Set up entries 0-255 in rgb-to-pixel value tables.
  1015 	 */
  1016 	Rmask = display->format->Rmask;
  1017 	Gmask = display->format->Gmask;
  1018 	Bmask = display->format->Bmask;
  1019 	for ( i=0; i<256; ++i ) {
  1020 		r_2_pix_alloc[i+256] = i >> (8 - number_of_bits_set(Rmask));
  1021 		r_2_pix_alloc[i+256] <<= free_bits_at_bottom(Rmask);
  1022 		g_2_pix_alloc[i+256] = i >> (8 - number_of_bits_set(Gmask));
  1023 		g_2_pix_alloc[i+256] <<= free_bits_at_bottom(Gmask);
  1024 		b_2_pix_alloc[i+256] = i >> (8 - number_of_bits_set(Bmask));
  1025 		b_2_pix_alloc[i+256] <<= free_bits_at_bottom(Bmask);
  1026 	}
  1027 
  1028 	/*
  1029 	 * If we have 16-bit output depth, then we double the value
  1030 	 * in the top word. This means that we can write out both
  1031 	 * pixels in the pixel doubling mode with one op. It is 
  1032 	 * harmless in the normal case as storing a 32-bit value
  1033 	 * through a short pointer will lose the top bits anyway.
  1034 	 */
  1035 	if( display->format->BytesPerPixel == 2 ) {
  1036 		for ( i=0; i<256; ++i ) {
  1037 			r_2_pix_alloc[i+256] |= (r_2_pix_alloc[i+256]) << 16;
  1038 			g_2_pix_alloc[i+256] |= (g_2_pix_alloc[i+256]) << 16;
  1039 			b_2_pix_alloc[i+256] |= (b_2_pix_alloc[i+256]) << 16;
  1040 		}
  1041 	}
  1042 
  1043 	/*
  1044 	 * Spread out the values we have to the rest of the array so that
  1045 	 * we do not need to check for overflow.
  1046 	 */
  1047 	for ( i=0; i<256; ++i ) {
  1048 		r_2_pix_alloc[i] = r_2_pix_alloc[256];
  1049 		r_2_pix_alloc[i+512] = r_2_pix_alloc[511];
  1050 		g_2_pix_alloc[i] = g_2_pix_alloc[256];
  1051 		g_2_pix_alloc[i+512] = g_2_pix_alloc[511];
  1052 		b_2_pix_alloc[i] = b_2_pix_alloc[256];
  1053 		b_2_pix_alloc[i+512] = b_2_pix_alloc[511];
  1054 	}
  1055 
  1056 	/* You have chosen wisely... */
  1057 	switch (format) {
  1058 	    case SDL_YV12_OVERLAY:
  1059 	    case SDL_IYUV_OVERLAY:
  1060 		if ( display->format->BytesPerPixel == 2 ) {
  1061 #if defined(i386) && defined(__GNUC__) && defined(USE_ASMBLIT)
  1062 			/* inline assembly functions */
  1063 			if ( SDL_HasMMX() && (Rmask == 0xF800) &&
  1064 			                     (Gmask == 0x07E0) &&
  1065 				             (Bmask == 0x001F) &&
  1066 			                     (width & 15) == 0) {
  1067 /*printf("Using MMX 16-bit 565 dither\n");*/
  1068 				swdata->Display1X = Color565DitherYV12MMX1X;
  1069 			} else {
  1070 /*printf("Using C 16-bit dither\n");*/
  1071 				swdata->Display1X = Color16DitherYV12Mod1X;
  1072 			}
  1073 #else
  1074 			swdata->Display1X = Color16DitherYV12Mod1X;
  1075 #endif
  1076 			swdata->Display2X = Color16DitherYV12Mod2X;
  1077 		}
  1078 		if ( display->format->BytesPerPixel == 3 ) {
  1079 			swdata->Display1X = Color24DitherYV12Mod1X;
  1080 			swdata->Display2X = Color24DitherYV12Mod2X;
  1081 		}
  1082 		if ( display->format->BytesPerPixel == 4 ) {
  1083 #if defined(i386) && defined(__GNUC__) && defined(USE_ASMBLIT)
  1084 			/* inline assembly functions */
  1085 			if ( SDL_HasMMX() && (Rmask == 0x00FF0000) &&
  1086 			                     (Gmask == 0x0000FF00) &&
  1087 				             (Bmask == 0x000000FF) && 
  1088 			                     (width & 15) == 0) {
  1089 /*printf("Using MMX 32-bit dither\n");*/
  1090 				swdata->Display1X = ColorRGBDitherYV12MMX1X;
  1091 			} else {
  1092 /*printf("Using C 32-bit dither\n");*/
  1093 				swdata->Display1X = Color32DitherYV12Mod1X;
  1094 			}
  1095 #else
  1096 			swdata->Display1X = Color32DitherYV12Mod1X;
  1097 #endif
  1098 			swdata->Display2X = Color32DitherYV12Mod2X;
  1099 		}
  1100 		break;
  1101 	    case SDL_YUY2_OVERLAY:
  1102 	    case SDL_UYVY_OVERLAY:
  1103 	    case SDL_YVYU_OVERLAY:
  1104 		if ( display->format->BytesPerPixel == 2 ) {
  1105 			swdata->Display1X = Color16DitherYUY2Mod1X;
  1106 			swdata->Display2X = Color16DitherYUY2Mod2X;
  1107 		}
  1108 		if ( display->format->BytesPerPixel == 3 ) {
  1109 			swdata->Display1X = Color24DitherYUY2Mod1X;
  1110 			swdata->Display2X = Color24DitherYUY2Mod2X;
  1111 		}
  1112 		if ( display->format->BytesPerPixel == 4 ) {
  1113 			swdata->Display1X = Color32DitherYUY2Mod1X;
  1114 			swdata->Display2X = Color32DitherYUY2Mod2X;
  1115 		}
  1116 		break;
  1117 	    default:
  1118 		/* We should never get here (caught above) */
  1119 		break;
  1120 	}
  1121 
  1122 	/* Find the pitch and offset values for the overlay */
  1123 	overlay->pitches = swdata->pitches;
  1124 	overlay->pixels = swdata->planes;
  1125 	switch (format) {
  1126 	    case SDL_YV12_OVERLAY:
  1127 	    case SDL_IYUV_OVERLAY:
  1128 		overlay->pitches[0] = overlay->w;
  1129 		overlay->pitches[1] = overlay->pitches[0] / 2;
  1130 		overlay->pitches[2] = overlay->pitches[0] / 2;
  1131 	        overlay->pixels[0] = swdata->pixels;
  1132 	        overlay->pixels[1] = overlay->pixels[0] +
  1133 		                     overlay->pitches[0] * overlay->h;
  1134 	        overlay->pixels[2] = overlay->pixels[1] +
  1135 		                     overlay->pitches[1] * overlay->h / 2;
  1136 		overlay->planes = 3;
  1137 		break;
  1138 	    case SDL_YUY2_OVERLAY:
  1139 	    case SDL_UYVY_OVERLAY:
  1140 	    case SDL_YVYU_OVERLAY:
  1141 		overlay->pitches[0] = overlay->w*2;
  1142 	        overlay->pixels[0] = swdata->pixels;
  1143 		overlay->planes = 1;
  1144 		break;
  1145 	    default:
  1146 		/* We should never get here (caught above) */
  1147 		break;
  1148 	}
  1149 
  1150 	/* We're all done.. */
  1151 	return(overlay);
  1152 }
  1153 
  1154 int SDL_LockYUV_SW(_THIS, SDL_Overlay *overlay)
  1155 {
  1156 	return(0);
  1157 }
  1158 
  1159 void SDL_UnlockYUV_SW(_THIS, SDL_Overlay *overlay)
  1160 {
  1161 	return;
  1162 }
  1163 
  1164 int SDL_DisplayYUV_SW(_THIS, SDL_Overlay *overlay, SDL_Rect *dstrect)
  1165 {
  1166 	struct private_yuvhwdata *swdata;
  1167 	SDL_Surface *stretch;
  1168 	SDL_Surface *display;
  1169 	int scale_2x;
  1170 	Uint8 *lum, *Cr, *Cb;
  1171 	Uint8 *dst;
  1172 	int mod;
  1173 
  1174 	swdata = overlay->hwdata;
  1175 	scale_2x = 0;
  1176 	stretch = 0;
  1177 	if ( (overlay->w != dstrect->w) || (overlay->h != dstrect->h) ) {
  1178 		if ( (dstrect->w == 2*overlay->w) &&
  1179 		     (dstrect->h == 2*overlay->h) ) {
  1180 			scale_2x = 1;
  1181 		} else {
  1182 			if ( ! swdata->stretch ) {
  1183 				display = swdata->display;
  1184 				swdata->stretch = SDL_CreateRGBSurface(
  1185 					SDL_SWSURFACE,
  1186 					overlay->w, overlay->h,
  1187 					display->format->BitsPerPixel,
  1188 					display->format->Rmask,
  1189 					display->format->Gmask,
  1190 					display->format->Bmask, 0);
  1191 				if ( ! swdata->stretch ) {
  1192 					return(-1);
  1193 				}
  1194 			}
  1195 			stretch = swdata->stretch;
  1196 		}
  1197 	}
  1198 
  1199 	if ( stretch ) {
  1200 		display = stretch;
  1201 	} else {
  1202 		display = swdata->display;
  1203 	}
  1204 	switch (overlay->format) {
  1205 	    case SDL_YV12_OVERLAY:
  1206 		lum = overlay->pixels[0];
  1207 		Cr =  overlay->pixels[1];
  1208 		Cb =  overlay->pixels[2];
  1209 		break;
  1210 	    case SDL_IYUV_OVERLAY:
  1211 		lum = overlay->pixels[0];
  1212 		Cr =  overlay->pixels[2];
  1213 		Cb =  overlay->pixels[1];
  1214 		break;
  1215 	    case SDL_YUY2_OVERLAY:
  1216 		lum = overlay->pixels[0];
  1217 		Cr = lum + 3;
  1218 		Cb = lum + 1;
  1219 		break;
  1220 	    case SDL_UYVY_OVERLAY:
  1221 		lum = overlay->pixels[0]+1;
  1222 		Cr = lum + 1;
  1223 		Cb = lum - 1;
  1224 		break;
  1225 	    case SDL_YVYU_OVERLAY:
  1226 		lum = overlay->pixels[0];
  1227 		Cr = lum + 1;
  1228 		Cb = lum + 3;
  1229 		break;
  1230 	    default:
  1231 		SDL_SetError("Unsupported YUV format in blit");
  1232 		return(-1);
  1233 	}
  1234 	if ( SDL_MUSTLOCK(display) ) {
  1235         	if ( SDL_LockSurface(display) < 0 ) {
  1236 			return(-1);
  1237 		}
  1238 	}
  1239 	if ( stretch ) {
  1240 		dst = (Uint8 *)stretch->pixels;
  1241 	} else {
  1242 		dst = (Uint8 *)display->pixels
  1243 			+ dstrect->x * display->format->BytesPerPixel
  1244 			+ dstrect->y * display->pitch;
  1245 	}
  1246 	mod = (display->pitch / display->format->BytesPerPixel);
  1247 
  1248 	if ( scale_2x ) {
  1249 		mod -= (overlay->w * 2);
  1250 		swdata->Display2X(swdata->colortab, swdata->rgb_2_pix,
  1251 		                  lum, Cr, Cb, dst, overlay->h, overlay->w,mod);
  1252 	} else {
  1253 		mod -= overlay->w;
  1254 		swdata->Display1X(swdata->colortab, swdata->rgb_2_pix,
  1255 		                  lum, Cr, Cb, dst, overlay->h, overlay->w,mod);
  1256 	}
  1257 	if ( SDL_MUSTLOCK(display) ) {
  1258 		SDL_UnlockSurface(display);
  1259 	}
  1260 	if ( stretch ) {
  1261 		display = swdata->display;
  1262 		SDL_SoftStretch(stretch, NULL, display, dstrect);
  1263 	}
  1264 	SDL_UpdateRects(display, 1, dstrect);
  1265 
  1266 	return(0);
  1267 }
  1268 
  1269 void SDL_FreeYUV_SW(_THIS, SDL_Overlay *overlay)
  1270 {
  1271 	struct private_yuvhwdata *swdata;
  1272 
  1273 	swdata = overlay->hwdata;
  1274 	if ( swdata ) {
  1275 		if ( swdata->stretch ) {
  1276 			SDL_FreeSurface(swdata->stretch);
  1277 		}
  1278 		if ( swdata->pixels ) {
  1279 			SDL_free(swdata->pixels);
  1280 		}
  1281 		if ( swdata->colortab ) {
  1282 			SDL_free(swdata->colortab);
  1283 		}
  1284 		if ( swdata->rgb_2_pix ) {
  1285 			SDL_free(swdata->rgb_2_pix);
  1286 		}
  1287 		SDL_free(swdata);
  1288 	}
  1289 }