src/video/SDL_yuv_sw.c
author Sam Lantinga <slouken@libsdl.org>
Fri, 14 Dec 2001 12:38:15 +0000
changeset 252 e8157fcb3114
parent 9 a1c15fa4abb9
child 292 eadc0746dfaf
permissions -rw-r--r--
Updated the source with the correct e-mail address
     1 /*
     2     SDL - Simple DirectMedia Layer
     3     Copyright (C) 1997, 1998, 1999, 2000, 2001  Sam Lantinga
     4 
     5     This library is free software; you can redistribute it and/or
     6     modify it under the terms of the GNU Library General Public
     7     License as published by the Free Software Foundation; either
     8     version 2 of the License, or (at your option) any later version.
     9 
    10     This library is distributed in the hope that it will be useful,
    11     but WITHOUT ANY WARRANTY; without even the implied warranty of
    12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    13     Library General Public License for more details.
    14 
    15     You should have received a copy of the GNU Library General Public
    16     License along with this library; if not, write to the Free
    17     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
    18 
    19     Sam Lantinga
    20     slouken@libsdl.org
    21 */
    22 
    23 #ifdef SAVE_RCSID
    24 static char rcsid =
    25  "@(#) $Id$";
    26 #endif
    27 
    28 /* This is the software implementation of the YUV video overlay support */
    29 
    30 /* This code was derived from code carrying the following copyright notices:
    31 
    32  * Copyright (c) 1995 The Regents of the University of California.
    33  * All rights reserved.
    34  * 
    35  * Permission to use, copy, modify, and distribute this software and its
    36  * documentation for any purpose, without fee, and without written agreement is
    37  * hereby granted, provided that the above copyright notice and the following
    38  * two paragraphs appear in all copies of this software.
    39  * 
    40  * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR
    41  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
    42  * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF
    43  * CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    44  * 
    45  * THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES,
    46  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
    47  * AND FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
    48  * ON AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATION TO
    49  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
    50 
    51  * Copyright (c) 1995 Erik Corry
    52  * All rights reserved.
    53  * 
    54  * Permission to use, copy, modify, and distribute this software and its
    55  * documentation for any purpose, without fee, and without written agreement is
    56  * hereby granted, provided that the above copyright notice and the following
    57  * two paragraphs appear in all copies of this software.
    58  * 
    59  * IN NO EVENT SHALL ERIK CORRY BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
    60  * SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OF
    61  * THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF ERIK CORRY HAS BEEN ADVISED
    62  * OF THE POSSIBILITY OF SUCH DAMAGE.
    63  * 
    64  * ERIK CORRY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
    65  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
    66  * PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
    67  * BASIS, AND ERIK CORRY HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT,
    68  * UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
    69 
    70  * Portions of this software Copyright (c) 1995 Brown University.
    71  * All rights reserved.
    72  * 
    73  * Permission to use, copy, modify, and distribute this software and its
    74  * documentation for any purpose, without fee, and without written agreement
    75  * is hereby granted, provided that the above copyright notice and the
    76  * following two paragraphs appear in all copies of this software.
    77  * 
    78  * IN NO EVENT SHALL BROWN UNIVERSITY BE LIABLE TO ANY PARTY FOR
    79  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
    80  * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF BROWN
    81  * UNIVERSITY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    82  * 
    83  * BROWN UNIVERSITY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
    84  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
    85  * PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
    86  * BASIS, AND BROWN UNIVERSITY HAS NO OBLIGATION TO PROVIDE MAINTENANCE,
    87  * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
    88  */
    89 
    90 #include <stdlib.h>
    91 #include <string.h>
    92 
    93 #include "SDL_error.h"
    94 #include "SDL_video.h"
    95 #include "SDL_stretch_c.h"
    96 #include "SDL_yuvfuncs.h"
    97 #include "SDL_yuv_sw_c.h"
    98 
    99 /* Function to check the CPU flags */
   100 #define MMX_CPU		0x800000
   101 #ifdef USE_ASMBLIT
   102 #define CPU_Flags()	Hermes_X86_CPU()
   103 #else
   104 #define CPU_Flags()	0L
   105 #endif
   106 
   107 #ifdef USE_ASMBLIT
   108 #define X86_ASSEMBLER
   109 #define HermesConverterInterface	void
   110 #define HermesClearInterface		void
   111 #define STACKCALL
   112 typedef Uint32 int32;
   113 
   114 #include "HeadX86.h"
   115 #endif
   116 
   117 /* The functions used to manipulate software video overlays */
   118 static struct private_yuvhwfuncs sw_yuvfuncs = {
   119 	SDL_LockYUV_SW,
   120 	SDL_UnlockYUV_SW,
   121 	SDL_DisplayYUV_SW,
   122 	SDL_FreeYUV_SW
   123 };
   124 
   125 /* RGB conversion lookup tables */
   126 struct private_yuvhwdata {
   127 	SDL_Surface *stretch;
   128 	SDL_Surface *display;
   129 	Uint8 *pixels;
   130 	int *colortab;
   131 	Uint32 *rgb_2_pix;
   132 	void (*Display1X)(int *colortab, Uint32 *rgb_2_pix,
   133                           unsigned char *lum, unsigned char *cr,
   134                           unsigned char *cb, unsigned char *out,
   135                           int rows, int cols, int mod );
   136 	void (*Display2X)(int *colortab, Uint32 *rgb_2_pix,
   137 	                  unsigned char *lum, unsigned char *cr,
   138                           unsigned char *cb, unsigned char *out,
   139                           int rows, int cols, int mod );
   140 
   141 	/* These are just so we don't have to allocate them separately */
   142 	Uint16 pitches[3];
   143 	Uint8 *planes[3];
   144 };
   145 
   146 
   147 /* The colorspace conversion functions */
   148 
   149 extern void Color565DitherYV12MMX1X( int *colortab, Uint32 *rgb_2_pix,
   150                                      unsigned char *lum, unsigned char *cr,
   151                                      unsigned char *cb, unsigned char *out,
   152                                      int rows, int cols, int mod );
   153 extern void ColorRGBDitherYV12MMX1X( int *colortab, Uint32 *rgb_2_pix,
   154                                      unsigned char *lum, unsigned char *cr,
   155                                      unsigned char *cb, unsigned char *out,
   156                                      int rows, int cols, int mod );
   157 
   158 static void Color16DitherYV12Mod1X( int *colortab, Uint32 *rgb_2_pix,
   159                                     unsigned char *lum, unsigned char *cr,
   160                                     unsigned char *cb, unsigned char *out,
   161                                     int rows, int cols, int mod )
   162 {
   163     unsigned short* row1;
   164     unsigned short* row2;
   165     unsigned char* lum2;
   166     int x, y;
   167     int cr_r;
   168     int crb_g;
   169     int cb_b;
   170     int cols_2 = cols / 2;
   171 
   172     row1 = (unsigned short*) out;
   173     row2 = row1 + cols + mod;
   174     lum2 = lum + cols;
   175 
   176     mod += cols + mod;
   177 
   178     y = rows / 2;
   179     while( y-- )
   180     {
   181         x = cols_2;
   182         while( x-- )
   183         {
   184             register int L;
   185 
   186             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   187             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   188                                + colortab[ *cb + 2*256 ];
   189             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   190             ++cr; ++cb;
   191 
   192             L = *lum++;
   193             *row1++ = (rgb_2_pix[ L + cr_r ] |
   194                        rgb_2_pix[ L + crb_g ] |
   195                        rgb_2_pix[ L + cb_b ]);
   196 
   197             L = *lum++;
   198             *row1++ = (rgb_2_pix[ L + cr_r ] |
   199                        rgb_2_pix[ L + crb_g ] |
   200                        rgb_2_pix[ L + cb_b ]);
   201 
   202 
   203             /* Now, do second row.  */
   204 
   205             L = *lum2++;
   206             *row2++ = (rgb_2_pix[ L + cr_r ] |
   207                        rgb_2_pix[ L + crb_g ] |
   208                        rgb_2_pix[ L + cb_b ]);
   209 
   210             L = *lum2++;
   211             *row2++ = (rgb_2_pix[ L + cr_r ] |
   212                        rgb_2_pix[ L + crb_g ] |
   213                        rgb_2_pix[ L + cb_b ]);
   214         }
   215 
   216         /*
   217          * These values are at the start of the next line, (due
   218          * to the ++'s above),but they need to be at the start
   219          * of the line after that.
   220          */
   221         lum  += cols;
   222         lum2 += cols;
   223         row1 += mod;
   224         row2 += mod;
   225     }
   226 }
   227 
   228 static void Color24DitherYV12Mod1X( int *colortab, Uint32 *rgb_2_pix,
   229                                     unsigned char *lum, unsigned char *cr,
   230                                     unsigned char *cb, unsigned char *out,
   231                                     int rows, int cols, int mod )
   232 {
   233     unsigned int value;
   234     unsigned char* row1;
   235     unsigned char* row2;
   236     unsigned char* lum2;
   237     int x, y;
   238     int cr_r;
   239     int crb_g;
   240     int cb_b;
   241     int cols_2 = cols / 2;
   242 
   243     row1 = out;
   244     row2 = row1 + cols*3 + mod*3;
   245     lum2 = lum + cols;
   246 
   247     mod += cols + mod;
   248     mod *= 3;
   249 
   250     y = rows / 2;
   251     while( y-- )
   252     {
   253         x = cols_2;
   254         while( x-- )
   255         {
   256             register int L;
   257 
   258             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   259             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   260                                + colortab[ *cb + 2*256 ];
   261             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   262             ++cr; ++cb;
   263 
   264             L = *lum++;
   265             value = (rgb_2_pix[ L + cr_r ] |
   266                      rgb_2_pix[ L + crb_g ] |
   267                      rgb_2_pix[ L + cb_b ]);
   268             *row1++ = (value      ) & 0xFF;
   269             *row1++ = (value >>  8) & 0xFF;
   270             *row1++ = (value >> 16) & 0xFF;
   271 
   272             L = *lum++;
   273             value = (rgb_2_pix[ L + cr_r ] |
   274                      rgb_2_pix[ L + crb_g ] |
   275                      rgb_2_pix[ L + cb_b ]);
   276             *row1++ = (value      ) & 0xFF;
   277             *row1++ = (value >>  8) & 0xFF;
   278             *row1++ = (value >> 16) & 0xFF;
   279 
   280 
   281             /* Now, do second row.  */
   282 
   283             L = *lum2++;
   284             value = (rgb_2_pix[ L + cr_r ] |
   285                      rgb_2_pix[ L + crb_g ] |
   286                      rgb_2_pix[ L + cb_b ]);
   287             *row2++ = (value      ) & 0xFF;
   288             *row2++ = (value >>  8) & 0xFF;
   289             *row2++ = (value >> 16) & 0xFF;
   290 
   291             L = *lum2++;
   292             value = (rgb_2_pix[ L + cr_r ] |
   293                      rgb_2_pix[ L + crb_g ] |
   294                      rgb_2_pix[ L + cb_b ]);
   295             *row2++ = (value      ) & 0xFF;
   296             *row2++ = (value >>  8) & 0xFF;
   297             *row2++ = (value >> 16) & 0xFF;
   298         }
   299 
   300         /*
   301          * These values are at the start of the next line, (due
   302          * to the ++'s above),but they need to be at the start
   303          * of the line after that.
   304          */
   305         lum  += cols;
   306         lum2 += cols;
   307         row1 += mod;
   308         row2 += mod;
   309     }
   310 }
   311 
   312 static void Color32DitherYV12Mod1X( int *colortab, Uint32 *rgb_2_pix,
   313                                     unsigned char *lum, unsigned char *cr,
   314                                     unsigned char *cb, unsigned char *out,
   315                                     int rows, int cols, int mod )
   316 {
   317     unsigned int* row1;
   318     unsigned int* row2;
   319     unsigned char* lum2;
   320     int x, y;
   321     int cr_r;
   322     int crb_g;
   323     int cb_b;
   324     int cols_2 = cols / 2;
   325 
   326     row1 = (unsigned int*) out;
   327     row2 = row1 + cols + mod;
   328     lum2 = lum + cols;
   329 
   330     mod += cols + mod;
   331 
   332     y = rows / 2;
   333     while( y-- )
   334     {
   335         x = cols_2;
   336         while( x-- )
   337         {
   338             register int L;
   339 
   340             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   341             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   342                                + colortab[ *cb + 2*256 ];
   343             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   344             ++cr; ++cb;
   345 
   346             L = *lum++;
   347             *row1++ = (rgb_2_pix[ L + cr_r ] |
   348                        rgb_2_pix[ L + crb_g ] |
   349                        rgb_2_pix[ L + cb_b ]);
   350 
   351             L = *lum++;
   352             *row1++ = (rgb_2_pix[ L + cr_r ] |
   353                        rgb_2_pix[ L + crb_g ] |
   354                        rgb_2_pix[ L + cb_b ]);
   355 
   356 
   357             /* Now, do second row.  */
   358 
   359             L = *lum2++;
   360             *row2++ = (rgb_2_pix[ L + cr_r ] |
   361                        rgb_2_pix[ L + crb_g ] |
   362                        rgb_2_pix[ L + cb_b ]);
   363 
   364             L = *lum2++;
   365             *row2++ = (rgb_2_pix[ L + cr_r ] |
   366                        rgb_2_pix[ L + crb_g ] |
   367                        rgb_2_pix[ L + cb_b ]);
   368         }
   369 
   370         /*
   371          * These values are at the start of the next line, (due
   372          * to the ++'s above),but they need to be at the start
   373          * of the line after that.
   374          */
   375         lum  += cols;
   376         lum2 += cols;
   377         row1 += mod;
   378         row2 += mod;
   379     }
   380 }
   381 
   382 /*
   383  * In this function I make use of a nasty trick. The tables have the lower
   384  * 16 bits replicated in the upper 16. This means I can write ints and get
   385  * the horisontal doubling for free (almost).
   386  */
   387 static void Color16DitherYV12Mod2X( int *colortab, Uint32 *rgb_2_pix,
   388                                     unsigned char *lum, unsigned char *cr,
   389                                     unsigned char *cb, unsigned char *out,
   390                                     int rows, int cols, int mod )
   391 {
   392     unsigned int* row1 = (unsigned int*) out;
   393     const int next_row = cols+(mod/2);
   394     unsigned int* row2 = row1 + 2*next_row;
   395     unsigned char* lum2;
   396     int x, y;
   397     int cr_r;
   398     int crb_g;
   399     int cb_b;
   400     int cols_2 = cols / 2;
   401 
   402     lum2 = lum + cols;
   403 
   404     mod = (next_row * 3) + (mod/2);
   405 
   406     y = rows / 2;
   407     while( y-- )
   408     {
   409         x = cols_2;
   410         while( x-- )
   411         {
   412             register int L;
   413 
   414             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   415             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   416                                + colortab[ *cb + 2*256 ];
   417             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   418             ++cr; ++cb;
   419 
   420             L = *lum++;
   421             row1[0] = row1[next_row] = (rgb_2_pix[ L + cr_r ] |
   422                                         rgb_2_pix[ L + crb_g ] |
   423                                         rgb_2_pix[ L + cb_b ]);
   424             row1++;
   425 
   426             L = *lum++;
   427             row1[0] = row1[next_row] = (rgb_2_pix[ L + cr_r ] |
   428                                         rgb_2_pix[ L + crb_g ] |
   429                                         rgb_2_pix[ L + cb_b ]);
   430             row1++;
   431 
   432 
   433             /* Now, do second row. */
   434 
   435             L = *lum2++;
   436             row2[0] = row2[next_row] = (rgb_2_pix[ L + cr_r ] |
   437                                         rgb_2_pix[ L + crb_g ] |
   438                                         rgb_2_pix[ L + cb_b ]);
   439             row2++;
   440 
   441             L = *lum2++;
   442             row2[0] = row2[next_row] = (rgb_2_pix[ L + cr_r ] |
   443                                         rgb_2_pix[ L + crb_g ] |
   444                                         rgb_2_pix[ L + cb_b ]);
   445             row2++;
   446         }
   447 
   448         /*
   449          * These values are at the start of the next line, (due
   450          * to the ++'s above),but they need to be at the start
   451          * of the line after that.
   452          */
   453         lum  += cols;
   454         lum2 += cols;
   455         row1 += mod;
   456         row2 += mod;
   457     }
   458 }
   459 
   460 static void Color24DitherYV12Mod2X( int *colortab, Uint32 *rgb_2_pix,
   461                                     unsigned char *lum, unsigned char *cr,
   462                                     unsigned char *cb, unsigned char *out,
   463                                     int rows, int cols, int mod )
   464 {
   465     unsigned int value;
   466     unsigned char* row1 = out;
   467     const int next_row = (cols*2 + mod) * 3;
   468     unsigned char* row2 = row1 + 2*next_row;
   469     unsigned char* lum2;
   470     int x, y;
   471     int cr_r;
   472     int crb_g;
   473     int cb_b;
   474     int cols_2 = cols / 2;
   475 
   476     lum2 = lum + cols;
   477 
   478     mod = next_row*3 + mod*3;
   479 
   480     y = rows / 2;
   481     while( y-- )
   482     {
   483         x = cols_2;
   484         while( x-- )
   485         {
   486             register int L;
   487 
   488             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   489             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   490                                + colortab[ *cb + 2*256 ];
   491             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   492             ++cr; ++cb;
   493 
   494             L = *lum++;
   495             value = (rgb_2_pix[ L + cr_r ] |
   496                      rgb_2_pix[ L + crb_g ] |
   497                      rgb_2_pix[ L + cb_b ]);
   498             row1[0+0] = row1[3+0] = row1[next_row+0] = row1[next_row+3+0] =
   499                      (value      ) & 0xFF;
   500             row1[0+1] = row1[3+1] = row1[next_row+1] = row1[next_row+3+1] =
   501                      (value >>  8) & 0xFF;
   502             row1[0+2] = row1[3+2] = row1[next_row+2] = row1[next_row+3+2] =
   503                      (value >> 16) & 0xFF;
   504             row1 += 2*3;
   505 
   506             L = *lum++;
   507             value = (rgb_2_pix[ L + cr_r ] |
   508                      rgb_2_pix[ L + crb_g ] |
   509                      rgb_2_pix[ L + cb_b ]);
   510             row1[0+0] = row1[3+0] = row1[next_row+0] = row1[next_row+3+0] =
   511                      (value      ) & 0xFF;
   512             row1[0+1] = row1[3+1] = row1[next_row+1] = row1[next_row+3+1] =
   513                      (value >>  8) & 0xFF;
   514             row1[0+2] = row1[3+2] = row1[next_row+2] = row1[next_row+3+2] =
   515                      (value >> 16) & 0xFF;
   516             row1 += 2*3;
   517 
   518 
   519             /* Now, do second row. */
   520 
   521             L = *lum2++;
   522             value = (rgb_2_pix[ L + cr_r ] |
   523                      rgb_2_pix[ L + crb_g ] |
   524                      rgb_2_pix[ L + cb_b ]);
   525             row2[0+0] = row2[3+0] = row2[next_row+0] = row2[next_row+3+0] =
   526                      (value      ) & 0xFF;
   527             row2[0+1] = row2[3+1] = row2[next_row+1] = row2[next_row+3+1] =
   528                      (value >>  8) & 0xFF;
   529             row2[0+2] = row2[3+2] = row2[next_row+2] = row2[next_row+3+2] =
   530                      (value >> 16) & 0xFF;
   531             row2 += 2*3;
   532 
   533             L = *lum2++;
   534             value = (rgb_2_pix[ L + cr_r ] |
   535                      rgb_2_pix[ L + crb_g ] |
   536                      rgb_2_pix[ L + cb_b ]);
   537             row2[0+0] = row2[3+0] = row2[next_row+0] = row2[next_row+3+0] =
   538                      (value      ) & 0xFF;
   539             row2[0+1] = row2[3+1] = row2[next_row+1] = row2[next_row+3+1] =
   540                      (value >>  8) & 0xFF;
   541             row2[0+2] = row2[3+2] = row2[next_row+2] = row2[next_row+3+2] =
   542                      (value >> 16) & 0xFF;
   543             row2 += 2*3;
   544         }
   545 
   546         /*
   547          * These values are at the start of the next line, (due
   548          * to the ++'s above),but they need to be at the start
   549          * of the line after that.
   550          */
   551         lum  += cols;
   552         lum2 += cols;
   553         row1 += mod;
   554         row2 += mod;
   555     }
   556 }
   557 
   558 static void Color32DitherYV12Mod2X( int *colortab, Uint32 *rgb_2_pix,
   559                                     unsigned char *lum, unsigned char *cr,
   560                                     unsigned char *cb, unsigned char *out,
   561                                     int rows, int cols, int mod )
   562 {
   563     unsigned int* row1 = (unsigned int*) out;
   564     const int next_row = cols*2+mod;
   565     unsigned int* row2 = row1 + 2*next_row;
   566     unsigned char* lum2;
   567     int x, y;
   568     int cr_r;
   569     int crb_g;
   570     int cb_b;
   571     int cols_2 = cols / 2;
   572 
   573     lum2 = lum + cols;
   574 
   575     mod = (next_row * 3) + mod;
   576 
   577     y = rows / 2;
   578     while( y-- )
   579     {
   580         x = cols_2;
   581         while( x-- )
   582         {
   583             register int L;
   584 
   585             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   586             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   587                                + colortab[ *cb + 2*256 ];
   588             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   589             ++cr; ++cb;
   590 
   591             L = *lum++;
   592             row1[0] = row1[1] = row1[next_row] = row1[next_row+1] =
   593                                        (rgb_2_pix[ L + cr_r ] |
   594                                         rgb_2_pix[ L + crb_g ] |
   595                                         rgb_2_pix[ L + cb_b ]);
   596             row1 += 2;
   597 
   598             L = *lum++;
   599             row1[0] = row1[1] = row1[next_row] = row1[next_row+1] =
   600                                        (rgb_2_pix[ L + cr_r ] |
   601                                         rgb_2_pix[ L + crb_g ] |
   602                                         rgb_2_pix[ L + cb_b ]);
   603             row1 += 2;
   604 
   605 
   606             /* Now, do second row. */
   607 
   608             L = *lum2++;
   609             row2[0] = row2[1] = row2[next_row] = row2[next_row+1] =
   610                                        (rgb_2_pix[ L + cr_r ] |
   611                                         rgb_2_pix[ L + crb_g ] |
   612                                         rgb_2_pix[ L + cb_b ]);
   613             row2 += 2;
   614 
   615             L = *lum2++;
   616             row2[0] = row2[1] = row2[next_row] = row2[next_row+1] =
   617                                        (rgb_2_pix[ L + cr_r ] |
   618                                         rgb_2_pix[ L + crb_g ] |
   619                                         rgb_2_pix[ L + cb_b ]);
   620             row2 += 2;
   621         }
   622 
   623         /*
   624          * These values are at the start of the next line, (due
   625          * to the ++'s above),but they need to be at the start
   626          * of the line after that.
   627          */
   628         lum  += cols;
   629         lum2 += cols;
   630         row1 += mod;
   631         row2 += mod;
   632     }
   633 }
   634 
   635 static void Color16DitherYUY2Mod1X( int *colortab, Uint32 *rgb_2_pix,
   636                                     unsigned char *lum, unsigned char *cr,
   637                                     unsigned char *cb, unsigned char *out,
   638                                     int rows, int cols, int mod )
   639 {
   640     unsigned short* row;
   641     int x, y;
   642     int cr_r;
   643     int crb_g;
   644     int cb_b;
   645     int cols_2 = cols / 2;
   646 
   647     row = (unsigned short*) out;
   648 
   649     y = rows;
   650     while( y-- )
   651     {
   652         x = cols_2;
   653         while( x-- )
   654         {
   655             register int L;
   656 
   657             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   658             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   659                                + colortab[ *cb + 2*256 ];
   660             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   661             cr += 4; cb += 4;
   662 
   663             L = *lum; lum += 2;
   664             *row++ = (rgb_2_pix[ L + cr_r ] |
   665                        rgb_2_pix[ L + crb_g ] |
   666                        rgb_2_pix[ L + cb_b ]);
   667 
   668             L = *lum; lum += 2;
   669             *row++ = (rgb_2_pix[ L + cr_r ] |
   670                        rgb_2_pix[ L + crb_g ] |
   671                        rgb_2_pix[ L + cb_b ]);
   672 
   673         }
   674 
   675         row += mod;
   676     }
   677 }
   678 
   679 static void Color24DitherYUY2Mod1X( int *colortab, Uint32 *rgb_2_pix,
   680                                     unsigned char *lum, unsigned char *cr,
   681                                     unsigned char *cb, unsigned char *out,
   682                                     int rows, int cols, int mod )
   683 {
   684     unsigned int value;
   685     unsigned char* row;
   686     int x, y;
   687     int cr_r;
   688     int crb_g;
   689     int cb_b;
   690     int cols_2 = cols / 2;
   691 
   692     row = (unsigned char*) out;
   693     mod *= 3;
   694     y = rows;
   695     while( y-- )
   696     {
   697         x = cols_2;
   698         while( x-- )
   699         {
   700             register int L;
   701 
   702             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   703             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   704                                + colortab[ *cb + 2*256 ];
   705             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   706             cr += 4; cb += 4;
   707 
   708             L = *lum; lum += 2;
   709             value = (rgb_2_pix[ L + cr_r ] |
   710                      rgb_2_pix[ L + crb_g ] |
   711                      rgb_2_pix[ L + cb_b ]);
   712             *row++ = (value      ) & 0xFF;
   713             *row++ = (value >>  8) & 0xFF;
   714             *row++ = (value >> 16) & 0xFF;
   715 
   716             L = *lum; lum += 2;
   717             value = (rgb_2_pix[ L + cr_r ] |
   718                      rgb_2_pix[ L + crb_g ] |
   719                      rgb_2_pix[ L + cb_b ]);
   720             *row++ = (value      ) & 0xFF;
   721             *row++ = (value >>  8) & 0xFF;
   722             *row++ = (value >> 16) & 0xFF;
   723 
   724         }
   725         row += mod;
   726     }
   727 }
   728 
   729 static void Color32DitherYUY2Mod1X( int *colortab, Uint32 *rgb_2_pix,
   730                                     unsigned char *lum, unsigned char *cr,
   731                                     unsigned char *cb, unsigned char *out,
   732                                     int rows, int cols, int mod )
   733 {
   734     unsigned int* row;
   735     int x, y;
   736     int cr_r;
   737     int crb_g;
   738     int cb_b;
   739     int cols_2 = cols / 2;
   740 
   741     row = (unsigned int*) out;
   742     y = rows;
   743     while( y-- )
   744     {
   745         x = cols_2;
   746         while( x-- )
   747         {
   748             register int L;
   749 
   750             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   751             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   752                                + colortab[ *cb + 2*256 ];
   753             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   754             cr += 4; cb += 4;
   755 
   756             L = *lum; lum += 2;
   757             *row++ = (rgb_2_pix[ L + cr_r ] |
   758                        rgb_2_pix[ L + crb_g ] |
   759                        rgb_2_pix[ L + cb_b ]);
   760 
   761             L = *lum; lum += 2;
   762             *row++ = (rgb_2_pix[ L + cr_r ] |
   763                        rgb_2_pix[ L + crb_g ] |
   764                        rgb_2_pix[ L + cb_b ]);
   765 
   766 
   767         }
   768         row += mod;
   769     }
   770 }
   771 
   772 /*
   773  * In this function I make use of a nasty trick. The tables have the lower
   774  * 16 bits replicated in the upper 16. This means I can write ints and get
   775  * the horisontal doubling for free (almost).
   776  */
   777 static void Color16DitherYUY2Mod2X( int *colortab, Uint32 *rgb_2_pix,
   778                                     unsigned char *lum, unsigned char *cr,
   779                                     unsigned char *cb, unsigned char *out,
   780                                     int rows, int cols, int mod )
   781 {
   782     unsigned int* row = (unsigned int*) out;
   783     const int next_row = cols+(mod/2);
   784     int x, y;
   785     int cr_r;
   786     int crb_g;
   787     int cb_b;
   788     int cols_2 = cols / 2;
   789 
   790     y = rows;
   791     while( y-- )
   792     {
   793         x = cols_2;
   794         while( x-- )
   795         {
   796             register int L;
   797 
   798             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   799             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   800                                + colortab[ *cb + 2*256 ];
   801             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   802             cr += 4; cb += 4;
   803 
   804             L = *lum; lum += 2;
   805             row[0] = row[next_row] = (rgb_2_pix[ L + cr_r ] |
   806                                         rgb_2_pix[ L + crb_g ] |
   807                                         rgb_2_pix[ L + cb_b ]);
   808             row++;
   809 
   810             L = *lum; lum += 2;
   811             row[0] = row[next_row] = (rgb_2_pix[ L + cr_r ] |
   812                                         rgb_2_pix[ L + crb_g ] |
   813                                         rgb_2_pix[ L + cb_b ]);
   814             row++;
   815 
   816         }
   817         row += next_row;
   818     }
   819 }
   820 
   821 static void Color24DitherYUY2Mod2X( int *colortab, Uint32 *rgb_2_pix,
   822                                     unsigned char *lum, unsigned char *cr,
   823                                     unsigned char *cb, unsigned char *out,
   824                                     int rows, int cols, int mod )
   825 {
   826     unsigned int value;
   827     unsigned char* row = out;
   828     const int next_row = (cols*2 + mod) * 3;
   829     int x, y;
   830     int cr_r;
   831     int crb_g;
   832     int cb_b;
   833     int cols_2 = cols / 2;
   834     y = rows;
   835     while( y-- )
   836     {
   837         x = cols_2;
   838         while( x-- )
   839         {
   840             register int L;
   841 
   842             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   843             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   844                                + colortab[ *cb + 2*256 ];
   845             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   846             cr += 4; cb += 4;
   847 
   848             L = *lum; lum += 2;
   849             value = (rgb_2_pix[ L + cr_r ] |
   850                      rgb_2_pix[ L + crb_g ] |
   851                      rgb_2_pix[ L + cb_b ]);
   852             row[0+0] = row[3+0] = row[next_row+0] = row[next_row+3+0] =
   853                      (value      ) & 0xFF;
   854             row[0+1] = row[3+1] = row[next_row+1] = row[next_row+3+1] =
   855                      (value >>  8) & 0xFF;
   856             row[0+2] = row[3+2] = row[next_row+2] = row[next_row+3+2] =
   857                      (value >> 16) & 0xFF;
   858             row += 2*3;
   859 
   860             L = *lum; lum += 2;
   861             value = (rgb_2_pix[ L + cr_r ] |
   862                      rgb_2_pix[ L + crb_g ] |
   863                      rgb_2_pix[ L + cb_b ]);
   864             row[0+0] = row[3+0] = row[next_row+0] = row[next_row+3+0] =
   865                      (value      ) & 0xFF;
   866             row[0+1] = row[3+1] = row[next_row+1] = row[next_row+3+1] =
   867                      (value >>  8) & 0xFF;
   868             row[0+2] = row[3+2] = row[next_row+2] = row[next_row+3+2] =
   869                      (value >> 16) & 0xFF;
   870             row += 2*3;
   871 
   872         }
   873         row += next_row;
   874     }
   875 }
   876 
   877 static void Color32DitherYUY2Mod2X( int *colortab, Uint32 *rgb_2_pix,
   878                                     unsigned char *lum, unsigned char *cr,
   879                                     unsigned char *cb, unsigned char *out,
   880                                     int rows, int cols, int mod )
   881 {
   882     unsigned int* row = (unsigned int*) out;
   883     const int next_row = cols*2+mod;
   884     int x, y;
   885     int cr_r;
   886     int crb_g;
   887     int cb_b;
   888     int cols_2 = cols / 2;
   889     mod+=mod;
   890     y = rows;
   891     while( y-- )
   892     {
   893         x = cols_2;
   894         while( x-- )
   895         {
   896             register int L;
   897 
   898             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   899             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   900                                + colortab[ *cb + 2*256 ];
   901             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   902             cr += 4; cb += 4;
   903 
   904             L = *lum; lum += 2;
   905             row[0] = row[1] = row[next_row] = row[next_row+1] =
   906                                        (rgb_2_pix[ L + cr_r ] |
   907                                         rgb_2_pix[ L + crb_g ] |
   908                                         rgb_2_pix[ L + cb_b ]);
   909             row += 2;
   910 
   911             L = *lum; lum += 2;
   912             row[0] = row[1] = row[next_row] = row[next_row+1] =
   913                                        (rgb_2_pix[ L + cr_r ] |
   914                                         rgb_2_pix[ L + crb_g ] |
   915                                         rgb_2_pix[ L + cb_b ]);
   916             row += 2;
   917 
   918 
   919         }
   920 
   921         row += next_row;
   922     }
   923 }
   924 
   925 /*
   926  * How many 1 bits are there in the Uint32.
   927  * Low performance, do not call often.
   928  */
   929 static int number_of_bits_set( Uint32 a )
   930 {
   931     if(!a) return 0;
   932     if(a & 1) return 1 + number_of_bits_set(a >> 1);
   933     return(number_of_bits_set(a >> 1));
   934 }
   935 
   936 /*
   937  * How many 0 bits are there at least significant end of Uint32.
   938  * Low performance, do not call often.
   939  */
   940 static int free_bits_at_bottom( Uint32 a )
   941 {
   942       /* assume char is 8 bits */
   943     if(!a) return sizeof(Uint32) * 8;
   944     if(((Sint32)a) & 1l) return 0;
   945     return 1 + free_bits_at_bottom ( a >> 1);
   946 }
   947 
   948 
   949 SDL_Overlay *SDL_CreateYUV_SW(_THIS, int width, int height, Uint32 format, SDL_Surface *display)
   950 {
   951 	SDL_Overlay *overlay;
   952 	struct private_yuvhwdata *swdata;
   953 	int *Cr_r_tab;
   954 	int *Cr_g_tab;
   955 	int *Cb_g_tab;
   956 	int *Cb_b_tab;
   957 	Uint32 *r_2_pix_alloc;
   958 	Uint32 *g_2_pix_alloc;
   959 	Uint32 *b_2_pix_alloc;
   960 	int i, cpu_mmx;
   961 	int CR, CB;
   962 	Uint32 Rmask, Gmask, Bmask;
   963 
   964 	/* Only RGB packed pixel conversion supported */
   965 	if ( (display->format->BytesPerPixel != 2) &&
   966 	     (display->format->BytesPerPixel != 3) &&
   967 	     (display->format->BytesPerPixel != 4) ) {
   968 		SDL_SetError("Can't use YUV data on non 16/24/32 bit surfaces");
   969 		return(NULL);
   970 	}
   971 
   972 	/* Verify that we support the format */
   973 	switch (format) {
   974 	    case SDL_YV12_OVERLAY:
   975 	    case SDL_IYUV_OVERLAY:
   976 	    case SDL_YUY2_OVERLAY:
   977 	    case SDL_UYVY_OVERLAY:
   978 	    case SDL_YVYU_OVERLAY:
   979 		break;
   980 	    default:
   981 		SDL_SetError("Unsupported YUV format");
   982 		return(NULL);
   983 	}
   984 
   985 	/* Create the overlay structure */
   986 	overlay = (SDL_Overlay *)malloc(sizeof *overlay);
   987 	if ( overlay == NULL ) {
   988 		SDL_OutOfMemory();
   989 		return(NULL);
   990 	}
   991 	memset(overlay, 0, (sizeof *overlay));
   992 
   993 	/* Fill in the basic members */
   994 	overlay->format = format;
   995 	overlay->w = width;
   996 	overlay->h = height;
   997 
   998 	/* Set up the YUV surface function structure */
   999 	overlay->hwfuncs = &sw_yuvfuncs;
  1000 
  1001 	/* Create the pixel data and lookup tables */
  1002 	swdata = (struct private_yuvhwdata *)malloc(sizeof *swdata);
  1003 	overlay->hwdata = swdata;
  1004 	if ( swdata == NULL ) {
  1005 		SDL_OutOfMemory();
  1006 		SDL_FreeYUVOverlay(overlay);
  1007 		return(NULL);
  1008 	}
  1009 	swdata->stretch = NULL;
  1010 	swdata->display = display;
  1011 	swdata->pixels = (Uint8 *) malloc(width*height*2);
  1012 	swdata->colortab = (int *)malloc(4*256*sizeof(int));
  1013 	Cr_r_tab = &swdata->colortab[0*256];
  1014 	Cr_g_tab = &swdata->colortab[1*256];
  1015 	Cb_g_tab = &swdata->colortab[2*256];
  1016 	Cb_b_tab = &swdata->colortab[3*256];
  1017 	swdata->rgb_2_pix = (Uint32 *)malloc(3*768*sizeof(Uint32));
  1018 	r_2_pix_alloc = &swdata->rgb_2_pix[0*768];
  1019 	g_2_pix_alloc = &swdata->rgb_2_pix[1*768];
  1020 	b_2_pix_alloc = &swdata->rgb_2_pix[2*768];
  1021 	if ( ! swdata->pixels || ! swdata->colortab || ! swdata->rgb_2_pix ) {
  1022 		SDL_OutOfMemory();
  1023 		SDL_FreeYUVOverlay(overlay);
  1024 		return(NULL);
  1025 	}
  1026 
  1027 	/* Generate the tables for the display surface */
  1028 	for (i=0; i<256; i++) {
  1029 		/* Gamma correction (luminescence table) and chroma correction
  1030 		   would be done here.  See the Berkeley mpeg_play sources.
  1031 		*/
  1032 		CB = CR = (i-128);
  1033 		Cr_r_tab[i] = (int) ( (0.419/0.299) * CR);
  1034 		Cr_g_tab[i] = (int) (-(0.299/0.419) * CR);
  1035 		Cb_g_tab[i] = (int) (-(0.114/0.331) * CB); 
  1036 		Cb_b_tab[i] = (int) ( (0.587/0.331) * CB);
  1037 	}
  1038 
  1039 	/* 
  1040 	 * Set up entries 0-255 in rgb-to-pixel value tables.
  1041 	 */
  1042 	Rmask = display->format->Rmask;
  1043 	Gmask = display->format->Gmask;
  1044 	Bmask = display->format->Bmask;
  1045 	for ( i=0; i<256; ++i ) {
  1046 		r_2_pix_alloc[i+256] = i >> (8 - number_of_bits_set(Rmask));
  1047 		r_2_pix_alloc[i+256] <<= free_bits_at_bottom(Rmask);
  1048 		g_2_pix_alloc[i+256] = i >> (8 - number_of_bits_set(Gmask));
  1049 		g_2_pix_alloc[i+256] <<= free_bits_at_bottom(Gmask);
  1050 		b_2_pix_alloc[i+256] = i >> (8 - number_of_bits_set(Bmask));
  1051 		b_2_pix_alloc[i+256] <<= free_bits_at_bottom(Bmask);
  1052 	}
  1053 
  1054 	/*
  1055 	 * If we have 16-bit output depth, then we double the value
  1056 	 * in the top word. This means that we can write out both
  1057 	 * pixels in the pixel doubling mode with one op. It is 
  1058 	 * harmless in the normal case as storing a 32-bit value
  1059 	 * through a short pointer will lose the top bits anyway.
  1060 	 */
  1061 	if( display->format->BytesPerPixel == 2 ) {
  1062 		for ( i=0; i<256; ++i ) {
  1063 			r_2_pix_alloc[i+256] |= (r_2_pix_alloc[i+256]) << 16;
  1064 			g_2_pix_alloc[i+256] |= (g_2_pix_alloc[i+256]) << 16;
  1065 			b_2_pix_alloc[i+256] |= (b_2_pix_alloc[i+256]) << 16;
  1066 		}
  1067 	}
  1068 
  1069 	/*
  1070 	 * Spread out the values we have to the rest of the array so that
  1071 	 * we do not need to check for overflow.
  1072 	 */
  1073 	for ( i=0; i<256; ++i ) {
  1074 		r_2_pix_alloc[i] = r_2_pix_alloc[256];
  1075 		r_2_pix_alloc[i+512] = r_2_pix_alloc[511];
  1076 		g_2_pix_alloc[i] = g_2_pix_alloc[256];
  1077 		g_2_pix_alloc[i+512] = g_2_pix_alloc[511];
  1078 		b_2_pix_alloc[i] = b_2_pix_alloc[256];
  1079 		b_2_pix_alloc[i+512] = b_2_pix_alloc[511];
  1080 	}
  1081 
  1082 	/* You have chosen wisely... */
  1083 	switch (format) {
  1084 	    case SDL_YV12_OVERLAY:
  1085 	    case SDL_IYUV_OVERLAY:
  1086 		cpu_mmx = CPU_Flags() & MMX_CPU;
  1087 		if ( display->format->BytesPerPixel == 2 ) {
  1088 #if defined(i386) && defined(__GNUC__) && defined(USE_ASMBLIT)
  1089 			/* inline assembly functions */
  1090 			if ( cpu_mmx && (Rmask == 0xF800) &&
  1091 			                (Gmask == 0x07E0) &&
  1092 				        (Bmask == 0x001F) &&
  1093 			                (width & 15) == 0) {
  1094 /*printf("Using MMX 16-bit 565 dither\n");*/
  1095 				swdata->Display1X = Color565DitherYV12MMX1X;
  1096 			} else {
  1097 /*printf("Using C 16-bit dither\n");*/
  1098 				swdata->Display1X = Color16DitherYV12Mod1X;
  1099 			}
  1100 #else
  1101 			swdata->Display1X = Color16DitherYV12Mod1X;
  1102 #endif
  1103 			swdata->Display2X = Color16DitherYV12Mod2X;
  1104 		}
  1105 		if ( display->format->BytesPerPixel == 3 ) {
  1106 			swdata->Display1X = Color24DitherYV12Mod1X;
  1107 			swdata->Display2X = Color24DitherYV12Mod2X;
  1108 		}
  1109 		if ( display->format->BytesPerPixel == 4 ) {
  1110 #if defined(i386) && defined(__GNUC__) && defined(USE_ASMBLIT)
  1111 			/* inline assembly functions */
  1112 			if ( cpu_mmx && (Rmask == 0x00FF0000) &&
  1113 			                (Gmask == 0x0000FF00) &&
  1114 				        (Bmask == 0x000000FF) && 
  1115 			                (width & 15) == 0) {
  1116 /*printf("Using MMX 32-bit dither\n");*/
  1117 				swdata->Display1X = ColorRGBDitherYV12MMX1X;
  1118 			} else {
  1119 /*printf("Using C 32-bit dither\n");*/
  1120 				swdata->Display1X = Color32DitherYV12Mod1X;
  1121 			}
  1122 #else
  1123 			swdata->Display1X = Color32DitherYV12Mod1X;
  1124 #endif
  1125 			swdata->Display2X = Color32DitherYV12Mod2X;
  1126 		}
  1127 		break;
  1128 	    case SDL_YUY2_OVERLAY:
  1129 	    case SDL_UYVY_OVERLAY:
  1130 	    case SDL_YVYU_OVERLAY:
  1131 		if ( display->format->BytesPerPixel == 2 ) {
  1132 			swdata->Display1X = Color16DitherYUY2Mod1X;
  1133 			swdata->Display2X = Color16DitherYUY2Mod2X;
  1134 		}
  1135 		if ( display->format->BytesPerPixel == 3 ) {
  1136 			swdata->Display1X = Color24DitherYUY2Mod1X;
  1137 			swdata->Display2X = Color24DitherYUY2Mod2X;
  1138 		}
  1139 		if ( display->format->BytesPerPixel == 4 ) {
  1140 			swdata->Display1X = Color32DitherYUY2Mod1X;
  1141 			swdata->Display2X = Color32DitherYUY2Mod2X;
  1142 		}
  1143 		break;
  1144 	    default:
  1145 		/* We should never get here (caught above) */
  1146 		break;
  1147 	}
  1148 
  1149 	/* Find the pitch and offset values for the overlay */
  1150 	overlay->pitches = swdata->pitches;
  1151 	overlay->pixels = swdata->planes;
  1152 	switch (format) {
  1153 	    case SDL_YV12_OVERLAY:
  1154 	    case SDL_IYUV_OVERLAY:
  1155 		overlay->pitches[0] = overlay->w;
  1156 		overlay->pitches[1] = overlay->pitches[0] / 2;
  1157 		overlay->pitches[2] = overlay->pitches[0] / 2;
  1158 	        overlay->pixels[0] = swdata->pixels;
  1159 	        overlay->pixels[1] = overlay->pixels[0] +
  1160 		                     overlay->pitches[0] * overlay->h;
  1161 	        overlay->pixels[2] = overlay->pixels[1] +
  1162 		                     overlay->pitches[1] * overlay->h / 2;
  1163 		overlay->planes = 3;
  1164 		break;
  1165 	    case SDL_YUY2_OVERLAY:
  1166 	    case SDL_UYVY_OVERLAY:
  1167 	    case SDL_YVYU_OVERLAY:
  1168 		overlay->pitches[0] = overlay->w*2;
  1169 	        overlay->pixels[0] = swdata->pixels;
  1170 		overlay->planes = 1;
  1171 		break;
  1172 	    default:
  1173 		/* We should never get here (caught above) */
  1174 		break;
  1175 	}
  1176 
  1177 	/* We're all done.. */
  1178 	return(overlay);
  1179 }
  1180 
  1181 int SDL_LockYUV_SW(_THIS, SDL_Overlay *overlay)
  1182 {
  1183 	return(0);
  1184 }
  1185 
  1186 void SDL_UnlockYUV_SW(_THIS, SDL_Overlay *overlay)
  1187 {
  1188 	return;
  1189 }
  1190 
  1191 int SDL_DisplayYUV_SW(_THIS, SDL_Overlay *overlay, SDL_Rect *dstrect)
  1192 {
  1193 	struct private_yuvhwdata *swdata;
  1194 	SDL_Surface *stretch;
  1195 	SDL_Surface *display;
  1196 	int scale_2x;
  1197 	Uint8 *lum, *Cr, *Cb;
  1198 	Uint8 *dst;
  1199 	int mod;
  1200 
  1201 	swdata = overlay->hwdata;
  1202 	scale_2x = 0;
  1203 	stretch = 0;
  1204 	if ( (overlay->w != dstrect->w) || (overlay->h != dstrect->h) ) {
  1205 		if ( (dstrect->w == 2*overlay->w) &&
  1206 		     (dstrect->h == 2*overlay->h) ) {
  1207 			scale_2x = 1;
  1208 		} else {
  1209 			if ( ! swdata->stretch ) {
  1210 				display = swdata->display;
  1211 				swdata->stretch = SDL_CreateRGBSurface(
  1212 					SDL_SWSURFACE,
  1213 					overlay->w, overlay->h,
  1214 					display->format->BitsPerPixel,
  1215 					display->format->Rmask,
  1216 					display->format->Gmask,
  1217 					display->format->Bmask, 0);
  1218 				if ( ! swdata->stretch ) {
  1219 					return(-1);
  1220 				}
  1221 			}
  1222 			stretch = swdata->stretch;
  1223 		}
  1224 	}
  1225 
  1226 	if ( stretch ) {
  1227 		display = stretch;
  1228 	} else {
  1229 		display = swdata->display;
  1230 	}
  1231 	switch (overlay->format) {
  1232 	    case SDL_YV12_OVERLAY:
  1233 		lum = overlay->pixels[0];
  1234 		Cr =  overlay->pixels[1];
  1235 		Cb =  overlay->pixels[2];
  1236 		break;
  1237 	    case SDL_IYUV_OVERLAY:
  1238 		lum = overlay->pixels[0];
  1239 		Cr =  overlay->pixels[2];
  1240 		Cb =  overlay->pixels[1];
  1241 		break;
  1242 	    case SDL_YUY2_OVERLAY:
  1243 		lum = overlay->pixels[0];
  1244 		Cr = lum + 3;
  1245 		Cb = lum + 1;
  1246 		break;
  1247 	    case SDL_UYVY_OVERLAY:
  1248 		lum = overlay->pixels[0]+1;
  1249 		Cr = lum + 1;
  1250 		Cb = lum - 1;
  1251 		break;
  1252 	    case SDL_YVYU_OVERLAY:
  1253 		lum = overlay->pixels[0];
  1254 		Cr = lum + 1;
  1255 		Cb = lum + 3;
  1256 		break;
  1257 	    default:
  1258 		SDL_SetError("Unsupported YUV format in blit (??)");
  1259 		return(-1);
  1260 	}
  1261 	if ( SDL_MUSTLOCK(display) ) {
  1262         	if ( SDL_LockSurface(display) < 0 ) {
  1263 			return(-1);
  1264 		}
  1265 	}
  1266 	if ( stretch ) {
  1267 		dst = (Uint8 *)stretch->pixels;
  1268 	} else {
  1269 		dst = (Uint8 *)display->pixels
  1270 			+ dstrect->x * display->format->BytesPerPixel
  1271 			+ dstrect->y * display->pitch;
  1272 	}
  1273 	mod = (display->pitch / display->format->BytesPerPixel);
  1274 
  1275 	if ( scale_2x ) {
  1276 		mod -= (overlay->w * 2);
  1277 		swdata->Display2X(swdata->colortab, swdata->rgb_2_pix,
  1278 		                  lum, Cr, Cb, dst, overlay->h, overlay->w,mod);
  1279 	} else {
  1280 		mod -= overlay->w;
  1281 		swdata->Display1X(swdata->colortab, swdata->rgb_2_pix,
  1282 		                  lum, Cr, Cb, dst, overlay->h, overlay->w,mod);
  1283 	}
  1284 	if ( SDL_MUSTLOCK(display) ) {
  1285 		SDL_UnlockSurface(display);
  1286 	}
  1287 	if ( stretch ) {
  1288 		display = swdata->display;
  1289 		SDL_SoftStretch(stretch, NULL, display, dstrect);
  1290 	}
  1291 	SDL_UpdateRects(display, 1, dstrect);
  1292 
  1293 	return(0);
  1294 }
  1295 
  1296 void SDL_FreeYUV_SW(_THIS, SDL_Overlay *overlay)
  1297 {
  1298 	struct private_yuvhwdata *swdata;
  1299 
  1300 	swdata = overlay->hwdata;
  1301 	if ( swdata ) {
  1302 		if ( swdata->stretch ) {
  1303 			SDL_FreeSurface(swdata->stretch);
  1304 		}
  1305 		if ( swdata->pixels ) {
  1306 			free(swdata->pixels);
  1307 		}
  1308 		if ( swdata->colortab ) {
  1309 			free(swdata->colortab);
  1310 		}
  1311 		if ( swdata->rgb_2_pix ) {
  1312 			free(swdata->rgb_2_pix);
  1313 		}
  1314 		free(swdata);
  1315 	}
  1316 }