src/video/SDL_yuv_sw.c
author Sam Lantinga <slouken@libsdl.org>
Tue, 21 Feb 2006 08:46:50 +0000
changeset 1402 d910939febfa
parent 1361 19418e4422cb
child 1413 40edc79b0926
permissions -rw-r--r--
Use consistent identifiers for the various platforms we support.
Make sure every source file includes SDL_config.h, so the proper system
headers are chosen.
     1 /*
     2     SDL - Simple DirectMedia Layer
     3     Copyright (C) 1997-2006 Sam Lantinga
     4 
     5     This library is free software; you can redistribute it and/or
     6     modify it under the terms of the GNU Lesser General Public
     7     License as published by the Free Software Foundation; either
     8     version 2.1 of the License, or (at your option) any later version.
     9 
    10     This library is distributed in the hope that it will be useful,
    11     but WITHOUT ANY WARRANTY; without even the implied warranty of
    12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    13     Lesser General Public License for more details.
    14 
    15     You should have received a copy of the GNU Lesser General Public
    16     License along with this library; if not, write to the Free Software
    17     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
    18 
    19     Sam Lantinga
    20     slouken@libsdl.org
    21 */
    22 #include "SDL_config.h"
    23 
    24 /* This is the software implementation of the YUV video overlay support */
    25 
    26 /* This code was derived from code carrying the following copyright notices:
    27 
    28  * Copyright (c) 1995 The Regents of the University of California.
    29  * All rights reserved.
    30  * 
    31  * Permission to use, copy, modify, and distribute this software and its
    32  * documentation for any purpose, without fee, and without written agreement is
    33  * hereby granted, provided that the above copyright notice and the following
    34  * two paragraphs appear in all copies of this software.
    35  * 
    36  * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR
    37  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
    38  * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF
    39  * CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    40  * 
    41  * THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES,
    42  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
    43  * AND FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
    44  * ON AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATION TO
    45  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
    46 
    47  * Copyright (c) 1995 Erik Corry
    48  * All rights reserved.
    49  * 
    50  * Permission to use, copy, modify, and distribute this software and its
    51  * documentation for any purpose, without fee, and without written agreement is
    52  * hereby granted, provided that the above copyright notice and the following
    53  * two paragraphs appear in all copies of this software.
    54  * 
    55  * IN NO EVENT SHALL ERIK CORRY BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
    56  * SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OF
    57  * THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF ERIK CORRY HAS BEEN ADVISED
    58  * OF THE POSSIBILITY OF SUCH DAMAGE.
    59  * 
    60  * ERIK CORRY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
    61  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
    62  * PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
    63  * BASIS, AND ERIK CORRY HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT,
    64  * UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
    65 
    66  * Portions of this software Copyright (c) 1995 Brown University.
    67  * All rights reserved.
    68  * 
    69  * Permission to use, copy, modify, and distribute this software and its
    70  * documentation for any purpose, without fee, and without written agreement
    71  * is hereby granted, provided that the above copyright notice and the
    72  * following two paragraphs appear in all copies of this software.
    73  * 
    74  * IN NO EVENT SHALL BROWN UNIVERSITY BE LIABLE TO ANY PARTY FOR
    75  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
    76  * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF BROWN
    77  * UNIVERSITY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    78  * 
    79  * BROWN UNIVERSITY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
    80  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
    81  * PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
    82  * BASIS, AND BROWN UNIVERSITY HAS NO OBLIGATION TO PROVIDE MAINTENANCE,
    83  * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
    84  */
    85 
    86 #include "SDL_video.h"
    87 #include "SDL_cpuinfo.h"
    88 #include "SDL_stretch_c.h"
    89 #include "SDL_yuvfuncs.h"
    90 #include "SDL_yuv_sw_c.h"
    91 
    92 /* The functions used to manipulate software video overlays */
    93 static struct private_yuvhwfuncs sw_yuvfuncs = {
    94 	SDL_LockYUV_SW,
    95 	SDL_UnlockYUV_SW,
    96 	SDL_DisplayYUV_SW,
    97 	SDL_FreeYUV_SW
    98 };
    99 
   100 /* RGB conversion lookup tables */
   101 struct private_yuvhwdata {
   102 	SDL_Surface *stretch;
   103 	SDL_Surface *display;
   104 	Uint8 *pixels;
   105 	int *colortab;
   106 	Uint32 *rgb_2_pix;
   107 	void (*Display1X)(int *colortab, Uint32 *rgb_2_pix,
   108                           unsigned char *lum, unsigned char *cr,
   109                           unsigned char *cb, unsigned char *out,
   110                           int rows, int cols, int mod );
   111 	void (*Display2X)(int *colortab, Uint32 *rgb_2_pix,
   112 	                  unsigned char *lum, unsigned char *cr,
   113                           unsigned char *cb, unsigned char *out,
   114                           int rows, int cols, int mod );
   115 
   116 	/* These are just so we don't have to allocate them separately */
   117 	Uint16 pitches[3];
   118 	Uint8 *planes[3];
   119 };
   120 
   121 
   122 /* The colorspace conversion functions */
   123 
   124 extern void Color565DitherYV12MMX1X( int *colortab, Uint32 *rgb_2_pix,
   125                                      unsigned char *lum, unsigned char *cr,
   126                                      unsigned char *cb, unsigned char *out,
   127                                      int rows, int cols, int mod );
   128 extern void ColorRGBDitherYV12MMX1X( int *colortab, Uint32 *rgb_2_pix,
   129                                      unsigned char *lum, unsigned char *cr,
   130                                      unsigned char *cb, unsigned char *out,
   131                                      int rows, int cols, int mod );
   132 
   133 static void Color16DitherYV12Mod1X( int *colortab, Uint32 *rgb_2_pix,
   134                                     unsigned char *lum, unsigned char *cr,
   135                                     unsigned char *cb, unsigned char *out,
   136                                     int rows, int cols, int mod )
   137 {
   138     unsigned short* row1;
   139     unsigned short* row2;
   140     unsigned char* lum2;
   141     int x, y;
   142     int cr_r;
   143     int crb_g;
   144     int cb_b;
   145     int cols_2 = cols / 2;
   146 
   147     row1 = (unsigned short*) out;
   148     row2 = row1 + cols + mod;
   149     lum2 = lum + cols;
   150 
   151     mod += cols + mod;
   152 
   153     y = rows / 2;
   154     while( y-- )
   155     {
   156         x = cols_2;
   157         while( x-- )
   158         {
   159             register int L;
   160 
   161             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   162             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   163                                + colortab[ *cb + 2*256 ];
   164             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   165             ++cr; ++cb;
   166 
   167             L = *lum++;
   168             *row1++ = (rgb_2_pix[ L + cr_r ] |
   169                        rgb_2_pix[ L + crb_g ] |
   170                        rgb_2_pix[ L + cb_b ]);
   171 
   172             L = *lum++;
   173             *row1++ = (rgb_2_pix[ L + cr_r ] |
   174                        rgb_2_pix[ L + crb_g ] |
   175                        rgb_2_pix[ L + cb_b ]);
   176 
   177 
   178             /* Now, do second row.  */
   179 
   180             L = *lum2++;
   181             *row2++ = (rgb_2_pix[ L + cr_r ] |
   182                        rgb_2_pix[ L + crb_g ] |
   183                        rgb_2_pix[ L + cb_b ]);
   184 
   185             L = *lum2++;
   186             *row2++ = (rgb_2_pix[ L + cr_r ] |
   187                        rgb_2_pix[ L + crb_g ] |
   188                        rgb_2_pix[ L + cb_b ]);
   189         }
   190 
   191         /*
   192          * These values are at the start of the next line, (due
   193          * to the ++'s above),but they need to be at the start
   194          * of the line after that.
   195          */
   196         lum  += cols;
   197         lum2 += cols;
   198         row1 += mod;
   199         row2 += mod;
   200     }
   201 }
   202 
   203 static void Color24DitherYV12Mod1X( int *colortab, Uint32 *rgb_2_pix,
   204                                     unsigned char *lum, unsigned char *cr,
   205                                     unsigned char *cb, unsigned char *out,
   206                                     int rows, int cols, int mod )
   207 {
   208     unsigned int value;
   209     unsigned char* row1;
   210     unsigned char* row2;
   211     unsigned char* lum2;
   212     int x, y;
   213     int cr_r;
   214     int crb_g;
   215     int cb_b;
   216     int cols_2 = cols / 2;
   217 
   218     row1 = out;
   219     row2 = row1 + cols*3 + mod*3;
   220     lum2 = lum + cols;
   221 
   222     mod += cols + mod;
   223     mod *= 3;
   224 
   225     y = rows / 2;
   226     while( y-- )
   227     {
   228         x = cols_2;
   229         while( x-- )
   230         {
   231             register int L;
   232 
   233             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   234             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   235                                + colortab[ *cb + 2*256 ];
   236             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   237             ++cr; ++cb;
   238 
   239             L = *lum++;
   240             value = (rgb_2_pix[ L + cr_r ] |
   241                      rgb_2_pix[ L + crb_g ] |
   242                      rgb_2_pix[ L + cb_b ]);
   243             *row1++ = (value      ) & 0xFF;
   244             *row1++ = (value >>  8) & 0xFF;
   245             *row1++ = (value >> 16) & 0xFF;
   246 
   247             L = *lum++;
   248             value = (rgb_2_pix[ L + cr_r ] |
   249                      rgb_2_pix[ L + crb_g ] |
   250                      rgb_2_pix[ L + cb_b ]);
   251             *row1++ = (value      ) & 0xFF;
   252             *row1++ = (value >>  8) & 0xFF;
   253             *row1++ = (value >> 16) & 0xFF;
   254 
   255 
   256             /* Now, do second row.  */
   257 
   258             L = *lum2++;
   259             value = (rgb_2_pix[ L + cr_r ] |
   260                      rgb_2_pix[ L + crb_g ] |
   261                      rgb_2_pix[ L + cb_b ]);
   262             *row2++ = (value      ) & 0xFF;
   263             *row2++ = (value >>  8) & 0xFF;
   264             *row2++ = (value >> 16) & 0xFF;
   265 
   266             L = *lum2++;
   267             value = (rgb_2_pix[ L + cr_r ] |
   268                      rgb_2_pix[ L + crb_g ] |
   269                      rgb_2_pix[ L + cb_b ]);
   270             *row2++ = (value      ) & 0xFF;
   271             *row2++ = (value >>  8) & 0xFF;
   272             *row2++ = (value >> 16) & 0xFF;
   273         }
   274 
   275         /*
   276          * These values are at the start of the next line, (due
   277          * to the ++'s above),but they need to be at the start
   278          * of the line after that.
   279          */
   280         lum  += cols;
   281         lum2 += cols;
   282         row1 += mod;
   283         row2 += mod;
   284     }
   285 }
   286 
   287 static void Color32DitherYV12Mod1X( int *colortab, Uint32 *rgb_2_pix,
   288                                     unsigned char *lum, unsigned char *cr,
   289                                     unsigned char *cb, unsigned char *out,
   290                                     int rows, int cols, int mod )
   291 {
   292     unsigned int* row1;
   293     unsigned int* row2;
   294     unsigned char* lum2;
   295     int x, y;
   296     int cr_r;
   297     int crb_g;
   298     int cb_b;
   299     int cols_2 = cols / 2;
   300 
   301     row1 = (unsigned int*) out;
   302     row2 = row1 + cols + mod;
   303     lum2 = lum + cols;
   304 
   305     mod += cols + mod;
   306 
   307     y = rows / 2;
   308     while( y-- )
   309     {
   310         x = cols_2;
   311         while( x-- )
   312         {
   313             register int L;
   314 
   315             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   316             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   317                                + colortab[ *cb + 2*256 ];
   318             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   319             ++cr; ++cb;
   320 
   321             L = *lum++;
   322             *row1++ = (rgb_2_pix[ L + cr_r ] |
   323                        rgb_2_pix[ L + crb_g ] |
   324                        rgb_2_pix[ L + cb_b ]);
   325 
   326             L = *lum++;
   327             *row1++ = (rgb_2_pix[ L + cr_r ] |
   328                        rgb_2_pix[ L + crb_g ] |
   329                        rgb_2_pix[ L + cb_b ]);
   330 
   331 
   332             /* Now, do second row.  */
   333 
   334             L = *lum2++;
   335             *row2++ = (rgb_2_pix[ L + cr_r ] |
   336                        rgb_2_pix[ L + crb_g ] |
   337                        rgb_2_pix[ L + cb_b ]);
   338 
   339             L = *lum2++;
   340             *row2++ = (rgb_2_pix[ L + cr_r ] |
   341                        rgb_2_pix[ L + crb_g ] |
   342                        rgb_2_pix[ L + cb_b ]);
   343         }
   344 
   345         /*
   346          * These values are at the start of the next line, (due
   347          * to the ++'s above),but they need to be at the start
   348          * of the line after that.
   349          */
   350         lum  += cols;
   351         lum2 += cols;
   352         row1 += mod;
   353         row2 += mod;
   354     }
   355 }
   356 
   357 /*
   358  * In this function I make use of a nasty trick. The tables have the lower
   359  * 16 bits replicated in the upper 16. This means I can write ints and get
   360  * the horisontal doubling for free (almost).
   361  */
   362 static void Color16DitherYV12Mod2X( int *colortab, Uint32 *rgb_2_pix,
   363                                     unsigned char *lum, unsigned char *cr,
   364                                     unsigned char *cb, unsigned char *out,
   365                                     int rows, int cols, int mod )
   366 {
   367     unsigned int* row1 = (unsigned int*) out;
   368     const int next_row = cols+(mod/2);
   369     unsigned int* row2 = row1 + 2*next_row;
   370     unsigned char* lum2;
   371     int x, y;
   372     int cr_r;
   373     int crb_g;
   374     int cb_b;
   375     int cols_2 = cols / 2;
   376 
   377     lum2 = lum + cols;
   378 
   379     mod = (next_row * 3) + (mod/2);
   380 
   381     y = rows / 2;
   382     while( y-- )
   383     {
   384         x = cols_2;
   385         while( x-- )
   386         {
   387             register int L;
   388 
   389             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   390             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   391                                + colortab[ *cb + 2*256 ];
   392             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   393             ++cr; ++cb;
   394 
   395             L = *lum++;
   396             row1[0] = row1[next_row] = (rgb_2_pix[ L + cr_r ] |
   397                                         rgb_2_pix[ L + crb_g ] |
   398                                         rgb_2_pix[ L + cb_b ]);
   399             row1++;
   400 
   401             L = *lum++;
   402             row1[0] = row1[next_row] = (rgb_2_pix[ L + cr_r ] |
   403                                         rgb_2_pix[ L + crb_g ] |
   404                                         rgb_2_pix[ L + cb_b ]);
   405             row1++;
   406 
   407 
   408             /* Now, do second row. */
   409 
   410             L = *lum2++;
   411             row2[0] = row2[next_row] = (rgb_2_pix[ L + cr_r ] |
   412                                         rgb_2_pix[ L + crb_g ] |
   413                                         rgb_2_pix[ L + cb_b ]);
   414             row2++;
   415 
   416             L = *lum2++;
   417             row2[0] = row2[next_row] = (rgb_2_pix[ L + cr_r ] |
   418                                         rgb_2_pix[ L + crb_g ] |
   419                                         rgb_2_pix[ L + cb_b ]);
   420             row2++;
   421         }
   422 
   423         /*
   424          * These values are at the start of the next line, (due
   425          * to the ++'s above),but they need to be at the start
   426          * of the line after that.
   427          */
   428         lum  += cols;
   429         lum2 += cols;
   430         row1 += mod;
   431         row2 += mod;
   432     }
   433 }
   434 
   435 static void Color24DitherYV12Mod2X( int *colortab, Uint32 *rgb_2_pix,
   436                                     unsigned char *lum, unsigned char *cr,
   437                                     unsigned char *cb, unsigned char *out,
   438                                     int rows, int cols, int mod )
   439 {
   440     unsigned int value;
   441     unsigned char* row1 = out;
   442     const int next_row = (cols*2 + mod) * 3;
   443     unsigned char* row2 = row1 + 2*next_row;
   444     unsigned char* lum2;
   445     int x, y;
   446     int cr_r;
   447     int crb_g;
   448     int cb_b;
   449     int cols_2 = cols / 2;
   450 
   451     lum2 = lum + cols;
   452 
   453     mod = next_row*3 + mod*3;
   454 
   455     y = rows / 2;
   456     while( y-- )
   457     {
   458         x = cols_2;
   459         while( x-- )
   460         {
   461             register int L;
   462 
   463             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   464             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   465                                + colortab[ *cb + 2*256 ];
   466             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   467             ++cr; ++cb;
   468 
   469             L = *lum++;
   470             value = (rgb_2_pix[ L + cr_r ] |
   471                      rgb_2_pix[ L + crb_g ] |
   472                      rgb_2_pix[ L + cb_b ]);
   473             row1[0+0] = row1[3+0] = row1[next_row+0] = row1[next_row+3+0] =
   474                      (value      ) & 0xFF;
   475             row1[0+1] = row1[3+1] = row1[next_row+1] = row1[next_row+3+1] =
   476                      (value >>  8) & 0xFF;
   477             row1[0+2] = row1[3+2] = row1[next_row+2] = row1[next_row+3+2] =
   478                      (value >> 16) & 0xFF;
   479             row1 += 2*3;
   480 
   481             L = *lum++;
   482             value = (rgb_2_pix[ L + cr_r ] |
   483                      rgb_2_pix[ L + crb_g ] |
   484                      rgb_2_pix[ L + cb_b ]);
   485             row1[0+0] = row1[3+0] = row1[next_row+0] = row1[next_row+3+0] =
   486                      (value      ) & 0xFF;
   487             row1[0+1] = row1[3+1] = row1[next_row+1] = row1[next_row+3+1] =
   488                      (value >>  8) & 0xFF;
   489             row1[0+2] = row1[3+2] = row1[next_row+2] = row1[next_row+3+2] =
   490                      (value >> 16) & 0xFF;
   491             row1 += 2*3;
   492 
   493 
   494             /* Now, do second row. */
   495 
   496             L = *lum2++;
   497             value = (rgb_2_pix[ L + cr_r ] |
   498                      rgb_2_pix[ L + crb_g ] |
   499                      rgb_2_pix[ L + cb_b ]);
   500             row2[0+0] = row2[3+0] = row2[next_row+0] = row2[next_row+3+0] =
   501                      (value      ) & 0xFF;
   502             row2[0+1] = row2[3+1] = row2[next_row+1] = row2[next_row+3+1] =
   503                      (value >>  8) & 0xFF;
   504             row2[0+2] = row2[3+2] = row2[next_row+2] = row2[next_row+3+2] =
   505                      (value >> 16) & 0xFF;
   506             row2 += 2*3;
   507 
   508             L = *lum2++;
   509             value = (rgb_2_pix[ L + cr_r ] |
   510                      rgb_2_pix[ L + crb_g ] |
   511                      rgb_2_pix[ L + cb_b ]);
   512             row2[0+0] = row2[3+0] = row2[next_row+0] = row2[next_row+3+0] =
   513                      (value      ) & 0xFF;
   514             row2[0+1] = row2[3+1] = row2[next_row+1] = row2[next_row+3+1] =
   515                      (value >>  8) & 0xFF;
   516             row2[0+2] = row2[3+2] = row2[next_row+2] = row2[next_row+3+2] =
   517                      (value >> 16) & 0xFF;
   518             row2 += 2*3;
   519         }
   520 
   521         /*
   522          * These values are at the start of the next line, (due
   523          * to the ++'s above),but they need to be at the start
   524          * of the line after that.
   525          */
   526         lum  += cols;
   527         lum2 += cols;
   528         row1 += mod;
   529         row2 += mod;
   530     }
   531 }
   532 
   533 static void Color32DitherYV12Mod2X( int *colortab, Uint32 *rgb_2_pix,
   534                                     unsigned char *lum, unsigned char *cr,
   535                                     unsigned char *cb, unsigned char *out,
   536                                     int rows, int cols, int mod )
   537 {
   538     unsigned int* row1 = (unsigned int*) out;
   539     const int next_row = cols*2+mod;
   540     unsigned int* row2 = row1 + 2*next_row;
   541     unsigned char* lum2;
   542     int x, y;
   543     int cr_r;
   544     int crb_g;
   545     int cb_b;
   546     int cols_2 = cols / 2;
   547 
   548     lum2 = lum + cols;
   549 
   550     mod = (next_row * 3) + mod;
   551 
   552     y = rows / 2;
   553     while( y-- )
   554     {
   555         x = cols_2;
   556         while( x-- )
   557         {
   558             register int L;
   559 
   560             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   561             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   562                                + colortab[ *cb + 2*256 ];
   563             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   564             ++cr; ++cb;
   565 
   566             L = *lum++;
   567             row1[0] = row1[1] = row1[next_row] = row1[next_row+1] =
   568                                        (rgb_2_pix[ L + cr_r ] |
   569                                         rgb_2_pix[ L + crb_g ] |
   570                                         rgb_2_pix[ L + cb_b ]);
   571             row1 += 2;
   572 
   573             L = *lum++;
   574             row1[0] = row1[1] = row1[next_row] = row1[next_row+1] =
   575                                        (rgb_2_pix[ L + cr_r ] |
   576                                         rgb_2_pix[ L + crb_g ] |
   577                                         rgb_2_pix[ L + cb_b ]);
   578             row1 += 2;
   579 
   580 
   581             /* Now, do second row. */
   582 
   583             L = *lum2++;
   584             row2[0] = row2[1] = row2[next_row] = row2[next_row+1] =
   585                                        (rgb_2_pix[ L + cr_r ] |
   586                                         rgb_2_pix[ L + crb_g ] |
   587                                         rgb_2_pix[ L + cb_b ]);
   588             row2 += 2;
   589 
   590             L = *lum2++;
   591             row2[0] = row2[1] = row2[next_row] = row2[next_row+1] =
   592                                        (rgb_2_pix[ L + cr_r ] |
   593                                         rgb_2_pix[ L + crb_g ] |
   594                                         rgb_2_pix[ L + cb_b ]);
   595             row2 += 2;
   596         }
   597 
   598         /*
   599          * These values are at the start of the next line, (due
   600          * to the ++'s above),but they need to be at the start
   601          * of the line after that.
   602          */
   603         lum  += cols;
   604         lum2 += cols;
   605         row1 += mod;
   606         row2 += mod;
   607     }
   608 }
   609 
   610 static void Color16DitherYUY2Mod1X( int *colortab, Uint32 *rgb_2_pix,
   611                                     unsigned char *lum, unsigned char *cr,
   612                                     unsigned char *cb, unsigned char *out,
   613                                     int rows, int cols, int mod )
   614 {
   615     unsigned short* row;
   616     int x, y;
   617     int cr_r;
   618     int crb_g;
   619     int cb_b;
   620     int cols_2 = cols / 2;
   621 
   622     row = (unsigned short*) out;
   623 
   624     y = rows;
   625     while( y-- )
   626     {
   627         x = cols_2;
   628         while( x-- )
   629         {
   630             register int L;
   631 
   632             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   633             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   634                                + colortab[ *cb + 2*256 ];
   635             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   636             cr += 4; cb += 4;
   637 
   638             L = *lum; lum += 2;
   639             *row++ = (rgb_2_pix[ L + cr_r ] |
   640                        rgb_2_pix[ L + crb_g ] |
   641                        rgb_2_pix[ L + cb_b ]);
   642 
   643             L = *lum; lum += 2;
   644             *row++ = (rgb_2_pix[ L + cr_r ] |
   645                        rgb_2_pix[ L + crb_g ] |
   646                        rgb_2_pix[ L + cb_b ]);
   647 
   648         }
   649 
   650         row += mod;
   651     }
   652 }
   653 
   654 static void Color24DitherYUY2Mod1X( int *colortab, Uint32 *rgb_2_pix,
   655                                     unsigned char *lum, unsigned char *cr,
   656                                     unsigned char *cb, unsigned char *out,
   657                                     int rows, int cols, int mod )
   658 {
   659     unsigned int value;
   660     unsigned char* row;
   661     int x, y;
   662     int cr_r;
   663     int crb_g;
   664     int cb_b;
   665     int cols_2 = cols / 2;
   666 
   667     row = (unsigned char*) out;
   668     mod *= 3;
   669     y = rows;
   670     while( y-- )
   671     {
   672         x = cols_2;
   673         while( x-- )
   674         {
   675             register int L;
   676 
   677             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   678             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   679                                + colortab[ *cb + 2*256 ];
   680             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   681             cr += 4; cb += 4;
   682 
   683             L = *lum; lum += 2;
   684             value = (rgb_2_pix[ L + cr_r ] |
   685                      rgb_2_pix[ L + crb_g ] |
   686                      rgb_2_pix[ L + cb_b ]);
   687             *row++ = (value      ) & 0xFF;
   688             *row++ = (value >>  8) & 0xFF;
   689             *row++ = (value >> 16) & 0xFF;
   690 
   691             L = *lum; lum += 2;
   692             value = (rgb_2_pix[ L + cr_r ] |
   693                      rgb_2_pix[ L + crb_g ] |
   694                      rgb_2_pix[ L + cb_b ]);
   695             *row++ = (value      ) & 0xFF;
   696             *row++ = (value >>  8) & 0xFF;
   697             *row++ = (value >> 16) & 0xFF;
   698 
   699         }
   700         row += mod;
   701     }
   702 }
   703 
   704 static void Color32DitherYUY2Mod1X( int *colortab, Uint32 *rgb_2_pix,
   705                                     unsigned char *lum, unsigned char *cr,
   706                                     unsigned char *cb, unsigned char *out,
   707                                     int rows, int cols, int mod )
   708 {
   709     unsigned int* row;
   710     int x, y;
   711     int cr_r;
   712     int crb_g;
   713     int cb_b;
   714     int cols_2 = cols / 2;
   715 
   716     row = (unsigned int*) out;
   717     y = rows;
   718     while( y-- )
   719     {
   720         x = cols_2;
   721         while( x-- )
   722         {
   723             register int L;
   724 
   725             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   726             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   727                                + colortab[ *cb + 2*256 ];
   728             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   729             cr += 4; cb += 4;
   730 
   731             L = *lum; lum += 2;
   732             *row++ = (rgb_2_pix[ L + cr_r ] |
   733                        rgb_2_pix[ L + crb_g ] |
   734                        rgb_2_pix[ L + cb_b ]);
   735 
   736             L = *lum; lum += 2;
   737             *row++ = (rgb_2_pix[ L + cr_r ] |
   738                        rgb_2_pix[ L + crb_g ] |
   739                        rgb_2_pix[ L + cb_b ]);
   740 
   741 
   742         }
   743         row += mod;
   744     }
   745 }
   746 
   747 /*
   748  * In this function I make use of a nasty trick. The tables have the lower
   749  * 16 bits replicated in the upper 16. This means I can write ints and get
   750  * the horisontal doubling for free (almost).
   751  */
   752 static void Color16DitherYUY2Mod2X( int *colortab, Uint32 *rgb_2_pix,
   753                                     unsigned char *lum, unsigned char *cr,
   754                                     unsigned char *cb, unsigned char *out,
   755                                     int rows, int cols, int mod )
   756 {
   757     unsigned int* row = (unsigned int*) out;
   758     const int next_row = cols+(mod/2);
   759     int x, y;
   760     int cr_r;
   761     int crb_g;
   762     int cb_b;
   763     int cols_2 = cols / 2;
   764 
   765     y = rows;
   766     while( y-- )
   767     {
   768         x = cols_2;
   769         while( x-- )
   770         {
   771             register int L;
   772 
   773             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   774             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   775                                + colortab[ *cb + 2*256 ];
   776             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   777             cr += 4; cb += 4;
   778 
   779             L = *lum; lum += 2;
   780             row[0] = row[next_row] = (rgb_2_pix[ L + cr_r ] |
   781                                         rgb_2_pix[ L + crb_g ] |
   782                                         rgb_2_pix[ L + cb_b ]);
   783             row++;
   784 
   785             L = *lum; lum += 2;
   786             row[0] = row[next_row] = (rgb_2_pix[ L + cr_r ] |
   787                                         rgb_2_pix[ L + crb_g ] |
   788                                         rgb_2_pix[ L + cb_b ]);
   789             row++;
   790 
   791         }
   792         row += next_row;
   793     }
   794 }
   795 
   796 static void Color24DitherYUY2Mod2X( int *colortab, Uint32 *rgb_2_pix,
   797                                     unsigned char *lum, unsigned char *cr,
   798                                     unsigned char *cb, unsigned char *out,
   799                                     int rows, int cols, int mod )
   800 {
   801     unsigned int value;
   802     unsigned char* row = out;
   803     const int next_row = (cols*2 + mod) * 3;
   804     int x, y;
   805     int cr_r;
   806     int crb_g;
   807     int cb_b;
   808     int cols_2 = cols / 2;
   809     y = rows;
   810     while( y-- )
   811     {
   812         x = cols_2;
   813         while( x-- )
   814         {
   815             register int L;
   816 
   817             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   818             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   819                                + colortab[ *cb + 2*256 ];
   820             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   821             cr += 4; cb += 4;
   822 
   823             L = *lum; lum += 2;
   824             value = (rgb_2_pix[ L + cr_r ] |
   825                      rgb_2_pix[ L + crb_g ] |
   826                      rgb_2_pix[ L + cb_b ]);
   827             row[0+0] = row[3+0] = row[next_row+0] = row[next_row+3+0] =
   828                      (value      ) & 0xFF;
   829             row[0+1] = row[3+1] = row[next_row+1] = row[next_row+3+1] =
   830                      (value >>  8) & 0xFF;
   831             row[0+2] = row[3+2] = row[next_row+2] = row[next_row+3+2] =
   832                      (value >> 16) & 0xFF;
   833             row += 2*3;
   834 
   835             L = *lum; lum += 2;
   836             value = (rgb_2_pix[ L + cr_r ] |
   837                      rgb_2_pix[ L + crb_g ] |
   838                      rgb_2_pix[ L + cb_b ]);
   839             row[0+0] = row[3+0] = row[next_row+0] = row[next_row+3+0] =
   840                      (value      ) & 0xFF;
   841             row[0+1] = row[3+1] = row[next_row+1] = row[next_row+3+1] =
   842                      (value >>  8) & 0xFF;
   843             row[0+2] = row[3+2] = row[next_row+2] = row[next_row+3+2] =
   844                      (value >> 16) & 0xFF;
   845             row += 2*3;
   846 
   847         }
   848         row += next_row;
   849     }
   850 }
   851 
   852 static void Color32DitherYUY2Mod2X( int *colortab, Uint32 *rgb_2_pix,
   853                                     unsigned char *lum, unsigned char *cr,
   854                                     unsigned char *cb, unsigned char *out,
   855                                     int rows, int cols, int mod )
   856 {
   857     unsigned int* row = (unsigned int*) out;
   858     const int next_row = cols*2+mod;
   859     int x, y;
   860     int cr_r;
   861     int crb_g;
   862     int cb_b;
   863     int cols_2 = cols / 2;
   864     mod+=mod;
   865     y = rows;
   866     while( y-- )
   867     {
   868         x = cols_2;
   869         while( x-- )
   870         {
   871             register int L;
   872 
   873             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   874             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   875                                + colortab[ *cb + 2*256 ];
   876             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   877             cr += 4; cb += 4;
   878 
   879             L = *lum; lum += 2;
   880             row[0] = row[1] = row[next_row] = row[next_row+1] =
   881                                        (rgb_2_pix[ L + cr_r ] |
   882                                         rgb_2_pix[ L + crb_g ] |
   883                                         rgb_2_pix[ L + cb_b ]);
   884             row += 2;
   885 
   886             L = *lum; lum += 2;
   887             row[0] = row[1] = row[next_row] = row[next_row+1] =
   888                                        (rgb_2_pix[ L + cr_r ] |
   889                                         rgb_2_pix[ L + crb_g ] |
   890                                         rgb_2_pix[ L + cb_b ]);
   891             row += 2;
   892 
   893 
   894         }
   895 
   896         row += next_row;
   897     }
   898 }
   899 
   900 /*
   901  * How many 1 bits are there in the Uint32.
   902  * Low performance, do not call often.
   903  */
   904 static int number_of_bits_set( Uint32 a )
   905 {
   906     if(!a) return 0;
   907     if(a & 1) return 1 + number_of_bits_set(a >> 1);
   908     return(number_of_bits_set(a >> 1));
   909 }
   910 
   911 /*
   912  * How many 0 bits are there at least significant end of Uint32.
   913  * Low performance, do not call often.
   914  */
   915 static int free_bits_at_bottom( Uint32 a )
   916 {
   917       /* assume char is 8 bits */
   918     if(!a) return sizeof(Uint32) * 8;
   919     if(((Sint32)a) & 1l) return 0;
   920     return 1 + free_bits_at_bottom ( a >> 1);
   921 }
   922 
   923 
   924 SDL_Overlay *SDL_CreateYUV_SW(_THIS, int width, int height, Uint32 format, SDL_Surface *display)
   925 {
   926 	SDL_Overlay *overlay;
   927 	struct private_yuvhwdata *swdata;
   928 	int *Cr_r_tab;
   929 	int *Cr_g_tab;
   930 	int *Cb_g_tab;
   931 	int *Cb_b_tab;
   932 	Uint32 *r_2_pix_alloc;
   933 	Uint32 *g_2_pix_alloc;
   934 	Uint32 *b_2_pix_alloc;
   935 	int i;
   936 	int CR, CB;
   937 	Uint32 Rmask, Gmask, Bmask;
   938 
   939 	/* Only RGB packed pixel conversion supported */
   940 	if ( (display->format->BytesPerPixel != 2) &&
   941 	     (display->format->BytesPerPixel != 3) &&
   942 	     (display->format->BytesPerPixel != 4) ) {
   943 		SDL_SetError("Can't use YUV data on non 16/24/32 bit surfaces");
   944 		return(NULL);
   945 	}
   946 
   947 	/* Verify that we support the format */
   948 	switch (format) {
   949 	    case SDL_YV12_OVERLAY:
   950 	    case SDL_IYUV_OVERLAY:
   951 	    case SDL_YUY2_OVERLAY:
   952 	    case SDL_UYVY_OVERLAY:
   953 	    case SDL_YVYU_OVERLAY:
   954 		break;
   955 	    default:
   956 		SDL_SetError("Unsupported YUV format");
   957 		return(NULL);
   958 	}
   959 
   960 	/* Create the overlay structure */
   961 	overlay = (SDL_Overlay *)SDL_malloc(sizeof *overlay);
   962 	if ( overlay == NULL ) {
   963 		SDL_OutOfMemory();
   964 		return(NULL);
   965 	}
   966 	SDL_memset(overlay, 0, (sizeof *overlay));
   967 
   968 	/* Fill in the basic members */
   969 	overlay->format = format;
   970 	overlay->w = width;
   971 	overlay->h = height;
   972 
   973 	/* Set up the YUV surface function structure */
   974 	overlay->hwfuncs = &sw_yuvfuncs;
   975 
   976 	/* Create the pixel data and lookup tables */
   977 	swdata = (struct private_yuvhwdata *)SDL_malloc(sizeof *swdata);
   978 	overlay->hwdata = swdata;
   979 	if ( swdata == NULL ) {
   980 		SDL_OutOfMemory();
   981 		SDL_FreeYUVOverlay(overlay);
   982 		return(NULL);
   983 	}
   984 	swdata->stretch = NULL;
   985 	swdata->display = display;
   986 	swdata->pixels = (Uint8 *) SDL_malloc(width*height*2);
   987 	swdata->colortab = (int *)SDL_malloc(4*256*sizeof(int));
   988 	Cr_r_tab = &swdata->colortab[0*256];
   989 	Cr_g_tab = &swdata->colortab[1*256];
   990 	Cb_g_tab = &swdata->colortab[2*256];
   991 	Cb_b_tab = &swdata->colortab[3*256];
   992 	swdata->rgb_2_pix = (Uint32 *)SDL_malloc(3*768*sizeof(Uint32));
   993 	r_2_pix_alloc = &swdata->rgb_2_pix[0*768];
   994 	g_2_pix_alloc = &swdata->rgb_2_pix[1*768];
   995 	b_2_pix_alloc = &swdata->rgb_2_pix[2*768];
   996 	if ( ! swdata->pixels || ! swdata->colortab || ! swdata->rgb_2_pix ) {
   997 		SDL_OutOfMemory();
   998 		SDL_FreeYUVOverlay(overlay);
   999 		return(NULL);
  1000 	}
  1001 
  1002 	/* Generate the tables for the display surface */
  1003 	for (i=0; i<256; i++) {
  1004 		/* Gamma correction (luminescence table) and chroma correction
  1005 		   would be done here.  See the Berkeley mpeg_play sources.
  1006 		*/
  1007 		CB = CR = (i-128);
  1008 		Cr_r_tab[i] = (int) ( (0.419/0.299) * CR);
  1009 		Cr_g_tab[i] = (int) (-(0.299/0.419) * CR);
  1010 		Cb_g_tab[i] = (int) (-(0.114/0.331) * CB); 
  1011 		Cb_b_tab[i] = (int) ( (0.587/0.331) * CB);
  1012 	}
  1013 
  1014 	/* 
  1015 	 * Set up entries 0-255 in rgb-to-pixel value tables.
  1016 	 */
  1017 	Rmask = display->format->Rmask;
  1018 	Gmask = display->format->Gmask;
  1019 	Bmask = display->format->Bmask;
  1020 	for ( i=0; i<256; ++i ) {
  1021 		r_2_pix_alloc[i+256] = i >> (8 - number_of_bits_set(Rmask));
  1022 		r_2_pix_alloc[i+256] <<= free_bits_at_bottom(Rmask);
  1023 		g_2_pix_alloc[i+256] = i >> (8 - number_of_bits_set(Gmask));
  1024 		g_2_pix_alloc[i+256] <<= free_bits_at_bottom(Gmask);
  1025 		b_2_pix_alloc[i+256] = i >> (8 - number_of_bits_set(Bmask));
  1026 		b_2_pix_alloc[i+256] <<= free_bits_at_bottom(Bmask);
  1027 	}
  1028 
  1029 	/*
  1030 	 * If we have 16-bit output depth, then we double the value
  1031 	 * in the top word. This means that we can write out both
  1032 	 * pixels in the pixel doubling mode with one op. It is 
  1033 	 * harmless in the normal case as storing a 32-bit value
  1034 	 * through a short pointer will lose the top bits anyway.
  1035 	 */
  1036 	if( display->format->BytesPerPixel == 2 ) {
  1037 		for ( i=0; i<256; ++i ) {
  1038 			r_2_pix_alloc[i+256] |= (r_2_pix_alloc[i+256]) << 16;
  1039 			g_2_pix_alloc[i+256] |= (g_2_pix_alloc[i+256]) << 16;
  1040 			b_2_pix_alloc[i+256] |= (b_2_pix_alloc[i+256]) << 16;
  1041 		}
  1042 	}
  1043 
  1044 	/*
  1045 	 * Spread out the values we have to the rest of the array so that
  1046 	 * we do not need to check for overflow.
  1047 	 */
  1048 	for ( i=0; i<256; ++i ) {
  1049 		r_2_pix_alloc[i] = r_2_pix_alloc[256];
  1050 		r_2_pix_alloc[i+512] = r_2_pix_alloc[511];
  1051 		g_2_pix_alloc[i] = g_2_pix_alloc[256];
  1052 		g_2_pix_alloc[i+512] = g_2_pix_alloc[511];
  1053 		b_2_pix_alloc[i] = b_2_pix_alloc[256];
  1054 		b_2_pix_alloc[i+512] = b_2_pix_alloc[511];
  1055 	}
  1056 
  1057 	/* You have chosen wisely... */
  1058 	switch (format) {
  1059 	    case SDL_YV12_OVERLAY:
  1060 	    case SDL_IYUV_OVERLAY:
  1061 		if ( display->format->BytesPerPixel == 2 ) {
  1062 #if defined(__GNUC__) && defined(__i386__) && SDL_ASSEMBLY_ROUTINES
  1063 			/* inline assembly functions */
  1064 			if ( SDL_HasMMX() && (Rmask == 0xF800) &&
  1065 			                     (Gmask == 0x07E0) &&
  1066 				             (Bmask == 0x001F) &&
  1067 			                     (width & 15) == 0) {
  1068 /*printf("Using MMX 16-bit 565 dither\n");*/
  1069 				swdata->Display1X = Color565DitherYV12MMX1X;
  1070 			} else {
  1071 /*printf("Using C 16-bit dither\n");*/
  1072 				swdata->Display1X = Color16DitherYV12Mod1X;
  1073 			}
  1074 #else
  1075 			swdata->Display1X = Color16DitherYV12Mod1X;
  1076 #endif
  1077 			swdata->Display2X = Color16DitherYV12Mod2X;
  1078 		}
  1079 		if ( display->format->BytesPerPixel == 3 ) {
  1080 			swdata->Display1X = Color24DitherYV12Mod1X;
  1081 			swdata->Display2X = Color24DitherYV12Mod2X;
  1082 		}
  1083 		if ( display->format->BytesPerPixel == 4 ) {
  1084 #if defined(__GNUC__) && defined(__i386__) && SDL_ASSEMBLY_ROUTINES
  1085 			/* inline assembly functions */
  1086 			if ( SDL_HasMMX() && (Rmask == 0x00FF0000) &&
  1087 			                     (Gmask == 0x0000FF00) &&
  1088 				             (Bmask == 0x000000FF) && 
  1089 			                     (width & 15) == 0) {
  1090 /*printf("Using MMX 32-bit dither\n");*/
  1091 				swdata->Display1X = ColorRGBDitherYV12MMX1X;
  1092 			} else {
  1093 /*printf("Using C 32-bit dither\n");*/
  1094 				swdata->Display1X = Color32DitherYV12Mod1X;
  1095 			}
  1096 #else
  1097 			swdata->Display1X = Color32DitherYV12Mod1X;
  1098 #endif
  1099 			swdata->Display2X = Color32DitherYV12Mod2X;
  1100 		}
  1101 		break;
  1102 	    case SDL_YUY2_OVERLAY:
  1103 	    case SDL_UYVY_OVERLAY:
  1104 	    case SDL_YVYU_OVERLAY:
  1105 		if ( display->format->BytesPerPixel == 2 ) {
  1106 			swdata->Display1X = Color16DitherYUY2Mod1X;
  1107 			swdata->Display2X = Color16DitherYUY2Mod2X;
  1108 		}
  1109 		if ( display->format->BytesPerPixel == 3 ) {
  1110 			swdata->Display1X = Color24DitherYUY2Mod1X;
  1111 			swdata->Display2X = Color24DitherYUY2Mod2X;
  1112 		}
  1113 		if ( display->format->BytesPerPixel == 4 ) {
  1114 			swdata->Display1X = Color32DitherYUY2Mod1X;
  1115 			swdata->Display2X = Color32DitherYUY2Mod2X;
  1116 		}
  1117 		break;
  1118 	    default:
  1119 		/* We should never get here (caught above) */
  1120 		break;
  1121 	}
  1122 
  1123 	/* Find the pitch and offset values for the overlay */
  1124 	overlay->pitches = swdata->pitches;
  1125 	overlay->pixels = swdata->planes;
  1126 	switch (format) {
  1127 	    case SDL_YV12_OVERLAY:
  1128 	    case SDL_IYUV_OVERLAY:
  1129 		overlay->pitches[0] = overlay->w;
  1130 		overlay->pitches[1] = overlay->pitches[0] / 2;
  1131 		overlay->pitches[2] = overlay->pitches[0] / 2;
  1132 	        overlay->pixels[0] = swdata->pixels;
  1133 	        overlay->pixels[1] = overlay->pixels[0] +
  1134 		                     overlay->pitches[0] * overlay->h;
  1135 	        overlay->pixels[2] = overlay->pixels[1] +
  1136 		                     overlay->pitches[1] * overlay->h / 2;
  1137 		overlay->planes = 3;
  1138 		break;
  1139 	    case SDL_YUY2_OVERLAY:
  1140 	    case SDL_UYVY_OVERLAY:
  1141 	    case SDL_YVYU_OVERLAY:
  1142 		overlay->pitches[0] = overlay->w*2;
  1143 	        overlay->pixels[0] = swdata->pixels;
  1144 		overlay->planes = 1;
  1145 		break;
  1146 	    default:
  1147 		/* We should never get here (caught above) */
  1148 		break;
  1149 	}
  1150 
  1151 	/* We're all done.. */
  1152 	return(overlay);
  1153 }
  1154 
  1155 int SDL_LockYUV_SW(_THIS, SDL_Overlay *overlay)
  1156 {
  1157 	return(0);
  1158 }
  1159 
  1160 void SDL_UnlockYUV_SW(_THIS, SDL_Overlay *overlay)
  1161 {
  1162 	return;
  1163 }
  1164 
  1165 int SDL_DisplayYUV_SW(_THIS, SDL_Overlay *overlay, SDL_Rect *dstrect)
  1166 {
  1167 	struct private_yuvhwdata *swdata;
  1168 	SDL_Surface *stretch;
  1169 	SDL_Surface *display;
  1170 	int scale_2x;
  1171 	Uint8 *lum, *Cr, *Cb;
  1172 	Uint8 *dst;
  1173 	int mod;
  1174 
  1175 	swdata = overlay->hwdata;
  1176 	scale_2x = 0;
  1177 	stretch = 0;
  1178 	if ( (overlay->w != dstrect->w) || (overlay->h != dstrect->h) ) {
  1179 		if ( (dstrect->w == 2*overlay->w) &&
  1180 		     (dstrect->h == 2*overlay->h) ) {
  1181 			scale_2x = 1;
  1182 		} else {
  1183 			if ( ! swdata->stretch ) {
  1184 				display = swdata->display;
  1185 				swdata->stretch = SDL_CreateRGBSurface(
  1186 					SDL_SWSURFACE,
  1187 					overlay->w, overlay->h,
  1188 					display->format->BitsPerPixel,
  1189 					display->format->Rmask,
  1190 					display->format->Gmask,
  1191 					display->format->Bmask, 0);
  1192 				if ( ! swdata->stretch ) {
  1193 					return(-1);
  1194 				}
  1195 			}
  1196 			stretch = swdata->stretch;
  1197 		}
  1198 	}
  1199 
  1200 	if ( stretch ) {
  1201 		display = stretch;
  1202 	} else {
  1203 		display = swdata->display;
  1204 	}
  1205 	switch (overlay->format) {
  1206 	    case SDL_YV12_OVERLAY:
  1207 		lum = overlay->pixels[0];
  1208 		Cr =  overlay->pixels[1];
  1209 		Cb =  overlay->pixels[2];
  1210 		break;
  1211 	    case SDL_IYUV_OVERLAY:
  1212 		lum = overlay->pixels[0];
  1213 		Cr =  overlay->pixels[2];
  1214 		Cb =  overlay->pixels[1];
  1215 		break;
  1216 	    case SDL_YUY2_OVERLAY:
  1217 		lum = overlay->pixels[0];
  1218 		Cr = lum + 3;
  1219 		Cb = lum + 1;
  1220 		break;
  1221 	    case SDL_UYVY_OVERLAY:
  1222 		lum = overlay->pixels[0]+1;
  1223 		Cr = lum + 1;
  1224 		Cb = lum - 1;
  1225 		break;
  1226 	    case SDL_YVYU_OVERLAY:
  1227 		lum = overlay->pixels[0];
  1228 		Cr = lum + 1;
  1229 		Cb = lum + 3;
  1230 		break;
  1231 	    default:
  1232 		SDL_SetError("Unsupported YUV format in blit");
  1233 		return(-1);
  1234 	}
  1235 	if ( SDL_MUSTLOCK(display) ) {
  1236         	if ( SDL_LockSurface(display) < 0 ) {
  1237 			return(-1);
  1238 		}
  1239 	}
  1240 	if ( stretch ) {
  1241 		dst = (Uint8 *)stretch->pixels;
  1242 	} else {
  1243 		dst = (Uint8 *)display->pixels
  1244 			+ dstrect->x * display->format->BytesPerPixel
  1245 			+ dstrect->y * display->pitch;
  1246 	}
  1247 	mod = (display->pitch / display->format->BytesPerPixel);
  1248 
  1249 	if ( scale_2x ) {
  1250 		mod -= (overlay->w * 2);
  1251 		swdata->Display2X(swdata->colortab, swdata->rgb_2_pix,
  1252 		                  lum, Cr, Cb, dst, overlay->h, overlay->w,mod);
  1253 	} else {
  1254 		mod -= overlay->w;
  1255 		swdata->Display1X(swdata->colortab, swdata->rgb_2_pix,
  1256 		                  lum, Cr, Cb, dst, overlay->h, overlay->w,mod);
  1257 	}
  1258 	if ( SDL_MUSTLOCK(display) ) {
  1259 		SDL_UnlockSurface(display);
  1260 	}
  1261 	if ( stretch ) {
  1262 		display = swdata->display;
  1263 		SDL_SoftStretch(stretch, NULL, display, dstrect);
  1264 	}
  1265 	SDL_UpdateRects(display, 1, dstrect);
  1266 
  1267 	return(0);
  1268 }
  1269 
  1270 void SDL_FreeYUV_SW(_THIS, SDL_Overlay *overlay)
  1271 {
  1272 	struct private_yuvhwdata *swdata;
  1273 
  1274 	swdata = overlay->hwdata;
  1275 	if ( swdata ) {
  1276 		if ( swdata->stretch ) {
  1277 			SDL_FreeSurface(swdata->stretch);
  1278 		}
  1279 		if ( swdata->pixels ) {
  1280 			SDL_free(swdata->pixels);
  1281 		}
  1282 		if ( swdata->colortab ) {
  1283 			SDL_free(swdata->colortab);
  1284 		}
  1285 		if ( swdata->rgb_2_pix ) {
  1286 			SDL_free(swdata->rgb_2_pix);
  1287 		}
  1288 		SDL_free(swdata);
  1289 	}
  1290 }