src/video/SDL_yuv_sw.c
author Sam Lantinga <slouken@libsdl.org>
Sat, 18 May 2002 11:30:38 +0000
changeset 366 c94b390687d2
parent 297 f6ffac90895c
child 739 22dbf364c017
permissions -rw-r--r--
Avoid a conflict in the definition of int32
     1 /*
     2     SDL - Simple DirectMedia Layer
     3     Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002  Sam Lantinga
     4 
     5     This library is free software; you can redistribute it and/or
     6     modify it under the terms of the GNU Library General Public
     7     License as published by the Free Software Foundation; either
     8     version 2 of the License, or (at your option) any later version.
     9 
    10     This library is distributed in the hope that it will be useful,
    11     but WITHOUT ANY WARRANTY; without even the implied warranty of
    12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    13     Library General Public License for more details.
    14 
    15     You should have received a copy of the GNU Library General Public
    16     License along with this library; if not, write to the Free
    17     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
    18 
    19     Sam Lantinga
    20     slouken@libsdl.org
    21 */
    22 
    23 #ifdef SAVE_RCSID
    24 static char rcsid =
    25  "@(#) $Id$";
    26 #endif
    27 
    28 /* This is the software implementation of the YUV video overlay support */
    29 
    30 /* This code was derived from code carrying the following copyright notices:
    31 
    32  * Copyright (c) 1995 The Regents of the University of California.
    33  * All rights reserved.
    34  * 
    35  * Permission to use, copy, modify, and distribute this software and its
    36  * documentation for any purpose, without fee, and without written agreement is
    37  * hereby granted, provided that the above copyright notice and the following
    38  * two paragraphs appear in all copies of this software.
    39  * 
    40  * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR
    41  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
    42  * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF
    43  * CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    44  * 
    45  * THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES,
    46  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
    47  * AND FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
    48  * ON AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATION TO
    49  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
    50 
    51  * Copyright (c) 1995 Erik Corry
    52  * All rights reserved.
    53  * 
    54  * Permission to use, copy, modify, and distribute this software and its
    55  * documentation for any purpose, without fee, and without written agreement is
    56  * hereby granted, provided that the above copyright notice and the following
    57  * two paragraphs appear in all copies of this software.
    58  * 
    59  * IN NO EVENT SHALL ERIK CORRY BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
    60  * SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OF
    61  * THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF ERIK CORRY HAS BEEN ADVISED
    62  * OF THE POSSIBILITY OF SUCH DAMAGE.
    63  * 
    64  * ERIK CORRY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
    65  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
    66  * PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
    67  * BASIS, AND ERIK CORRY HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT,
    68  * UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
    69 
    70  * Portions of this software Copyright (c) 1995 Brown University.
    71  * All rights reserved.
    72  * 
    73  * Permission to use, copy, modify, and distribute this software and its
    74  * documentation for any purpose, without fee, and without written agreement
    75  * is hereby granted, provided that the above copyright notice and the
    76  * following two paragraphs appear in all copies of this software.
    77  * 
    78  * IN NO EVENT SHALL BROWN UNIVERSITY BE LIABLE TO ANY PARTY FOR
    79  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
    80  * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF BROWN
    81  * UNIVERSITY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    82  * 
    83  * BROWN UNIVERSITY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
    84  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
    85  * PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
    86  * BASIS, AND BROWN UNIVERSITY HAS NO OBLIGATION TO PROVIDE MAINTENANCE,
    87  * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
    88  */
    89 
    90 #include <stdlib.h>
    91 #include <string.h>
    92 
    93 #include "SDL_error.h"
    94 #include "SDL_video.h"
    95 #include "SDL_stretch_c.h"
    96 #include "SDL_yuvfuncs.h"
    97 #include "SDL_yuv_sw_c.h"
    98 
    99 /* Function to check the CPU flags */
   100 #define MMX_CPU		0x800000
   101 #ifdef USE_ASMBLIT
   102 #define CPU_Flags()	Hermes_X86_CPU()
   103 #else
   104 #define CPU_Flags()	0L
   105 #endif
   106 
   107 #ifdef USE_ASMBLIT
   108 #define X86_ASSEMBLER
   109 #define HermesConverterInterface	void
   110 #define HermesClearInterface		void
   111 #define STACKCALL
   112 
   113 #include "HeadX86.h"
   114 #endif
   115 
   116 /* The functions used to manipulate software video overlays */
   117 static struct private_yuvhwfuncs sw_yuvfuncs = {
   118 	SDL_LockYUV_SW,
   119 	SDL_UnlockYUV_SW,
   120 	SDL_DisplayYUV_SW,
   121 	SDL_FreeYUV_SW
   122 };
   123 
   124 /* RGB conversion lookup tables */
   125 struct private_yuvhwdata {
   126 	SDL_Surface *stretch;
   127 	SDL_Surface *display;
   128 	Uint8 *pixels;
   129 	int *colortab;
   130 	Uint32 *rgb_2_pix;
   131 	void (*Display1X)(int *colortab, Uint32 *rgb_2_pix,
   132                           unsigned char *lum, unsigned char *cr,
   133                           unsigned char *cb, unsigned char *out,
   134                           int rows, int cols, int mod );
   135 	void (*Display2X)(int *colortab, Uint32 *rgb_2_pix,
   136 	                  unsigned char *lum, unsigned char *cr,
   137                           unsigned char *cb, unsigned char *out,
   138                           int rows, int cols, int mod );
   139 
   140 	/* These are just so we don't have to allocate them separately */
   141 	Uint16 pitches[3];
   142 	Uint8 *planes[3];
   143 };
   144 
   145 
   146 /* The colorspace conversion functions */
   147 
   148 extern void Color565DitherYV12MMX1X( int *colortab, Uint32 *rgb_2_pix,
   149                                      unsigned char *lum, unsigned char *cr,
   150                                      unsigned char *cb, unsigned char *out,
   151                                      int rows, int cols, int mod );
   152 extern void ColorRGBDitherYV12MMX1X( int *colortab, Uint32 *rgb_2_pix,
   153                                      unsigned char *lum, unsigned char *cr,
   154                                      unsigned char *cb, unsigned char *out,
   155                                      int rows, int cols, int mod );
   156 
   157 static void Color16DitherYV12Mod1X( int *colortab, Uint32 *rgb_2_pix,
   158                                     unsigned char *lum, unsigned char *cr,
   159                                     unsigned char *cb, unsigned char *out,
   160                                     int rows, int cols, int mod )
   161 {
   162     unsigned short* row1;
   163     unsigned short* row2;
   164     unsigned char* lum2;
   165     int x, y;
   166     int cr_r;
   167     int crb_g;
   168     int cb_b;
   169     int cols_2 = cols / 2;
   170 
   171     row1 = (unsigned short*) out;
   172     row2 = row1 + cols + mod;
   173     lum2 = lum + cols;
   174 
   175     mod += cols + mod;
   176 
   177     y = rows / 2;
   178     while( y-- )
   179     {
   180         x = cols_2;
   181         while( x-- )
   182         {
   183             register int L;
   184 
   185             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   186             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   187                                + colortab[ *cb + 2*256 ];
   188             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   189             ++cr; ++cb;
   190 
   191             L = *lum++;
   192             *row1++ = (rgb_2_pix[ L + cr_r ] |
   193                        rgb_2_pix[ L + crb_g ] |
   194                        rgb_2_pix[ L + cb_b ]);
   195 
   196             L = *lum++;
   197             *row1++ = (rgb_2_pix[ L + cr_r ] |
   198                        rgb_2_pix[ L + crb_g ] |
   199                        rgb_2_pix[ L + cb_b ]);
   200 
   201 
   202             /* Now, do second row.  */
   203 
   204             L = *lum2++;
   205             *row2++ = (rgb_2_pix[ L + cr_r ] |
   206                        rgb_2_pix[ L + crb_g ] |
   207                        rgb_2_pix[ L + cb_b ]);
   208 
   209             L = *lum2++;
   210             *row2++ = (rgb_2_pix[ L + cr_r ] |
   211                        rgb_2_pix[ L + crb_g ] |
   212                        rgb_2_pix[ L + cb_b ]);
   213         }
   214 
   215         /*
   216          * These values are at the start of the next line, (due
   217          * to the ++'s above),but they need to be at the start
   218          * of the line after that.
   219          */
   220         lum  += cols;
   221         lum2 += cols;
   222         row1 += mod;
   223         row2 += mod;
   224     }
   225 }
   226 
   227 static void Color24DitherYV12Mod1X( int *colortab, Uint32 *rgb_2_pix,
   228                                     unsigned char *lum, unsigned char *cr,
   229                                     unsigned char *cb, unsigned char *out,
   230                                     int rows, int cols, int mod )
   231 {
   232     unsigned int value;
   233     unsigned char* row1;
   234     unsigned char* row2;
   235     unsigned char* lum2;
   236     int x, y;
   237     int cr_r;
   238     int crb_g;
   239     int cb_b;
   240     int cols_2 = cols / 2;
   241 
   242     row1 = out;
   243     row2 = row1 + cols*3 + mod*3;
   244     lum2 = lum + cols;
   245 
   246     mod += cols + mod;
   247     mod *= 3;
   248 
   249     y = rows / 2;
   250     while( y-- )
   251     {
   252         x = cols_2;
   253         while( x-- )
   254         {
   255             register int L;
   256 
   257             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   258             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   259                                + colortab[ *cb + 2*256 ];
   260             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   261             ++cr; ++cb;
   262 
   263             L = *lum++;
   264             value = (rgb_2_pix[ L + cr_r ] |
   265                      rgb_2_pix[ L + crb_g ] |
   266                      rgb_2_pix[ L + cb_b ]);
   267             *row1++ = (value      ) & 0xFF;
   268             *row1++ = (value >>  8) & 0xFF;
   269             *row1++ = (value >> 16) & 0xFF;
   270 
   271             L = *lum++;
   272             value = (rgb_2_pix[ L + cr_r ] |
   273                      rgb_2_pix[ L + crb_g ] |
   274                      rgb_2_pix[ L + cb_b ]);
   275             *row1++ = (value      ) & 0xFF;
   276             *row1++ = (value >>  8) & 0xFF;
   277             *row1++ = (value >> 16) & 0xFF;
   278 
   279 
   280             /* Now, do second row.  */
   281 
   282             L = *lum2++;
   283             value = (rgb_2_pix[ L + cr_r ] |
   284                      rgb_2_pix[ L + crb_g ] |
   285                      rgb_2_pix[ L + cb_b ]);
   286             *row2++ = (value      ) & 0xFF;
   287             *row2++ = (value >>  8) & 0xFF;
   288             *row2++ = (value >> 16) & 0xFF;
   289 
   290             L = *lum2++;
   291             value = (rgb_2_pix[ L + cr_r ] |
   292                      rgb_2_pix[ L + crb_g ] |
   293                      rgb_2_pix[ L + cb_b ]);
   294             *row2++ = (value      ) & 0xFF;
   295             *row2++ = (value >>  8) & 0xFF;
   296             *row2++ = (value >> 16) & 0xFF;
   297         }
   298 
   299         /*
   300          * These values are at the start of the next line, (due
   301          * to the ++'s above),but they need to be at the start
   302          * of the line after that.
   303          */
   304         lum  += cols;
   305         lum2 += cols;
   306         row1 += mod;
   307         row2 += mod;
   308     }
   309 }
   310 
   311 static void Color32DitherYV12Mod1X( int *colortab, Uint32 *rgb_2_pix,
   312                                     unsigned char *lum, unsigned char *cr,
   313                                     unsigned char *cb, unsigned char *out,
   314                                     int rows, int cols, int mod )
   315 {
   316     unsigned int* row1;
   317     unsigned int* row2;
   318     unsigned char* lum2;
   319     int x, y;
   320     int cr_r;
   321     int crb_g;
   322     int cb_b;
   323     int cols_2 = cols / 2;
   324 
   325     row1 = (unsigned int*) out;
   326     row2 = row1 + cols + mod;
   327     lum2 = lum + cols;
   328 
   329     mod += cols + mod;
   330 
   331     y = rows / 2;
   332     while( y-- )
   333     {
   334         x = cols_2;
   335         while( x-- )
   336         {
   337             register int L;
   338 
   339             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   340             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   341                                + colortab[ *cb + 2*256 ];
   342             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   343             ++cr; ++cb;
   344 
   345             L = *lum++;
   346             *row1++ = (rgb_2_pix[ L + cr_r ] |
   347                        rgb_2_pix[ L + crb_g ] |
   348                        rgb_2_pix[ L + cb_b ]);
   349 
   350             L = *lum++;
   351             *row1++ = (rgb_2_pix[ L + cr_r ] |
   352                        rgb_2_pix[ L + crb_g ] |
   353                        rgb_2_pix[ L + cb_b ]);
   354 
   355 
   356             /* Now, do second row.  */
   357 
   358             L = *lum2++;
   359             *row2++ = (rgb_2_pix[ L + cr_r ] |
   360                        rgb_2_pix[ L + crb_g ] |
   361                        rgb_2_pix[ L + cb_b ]);
   362 
   363             L = *lum2++;
   364             *row2++ = (rgb_2_pix[ L + cr_r ] |
   365                        rgb_2_pix[ L + crb_g ] |
   366                        rgb_2_pix[ L + cb_b ]);
   367         }
   368 
   369         /*
   370          * These values are at the start of the next line, (due
   371          * to the ++'s above),but they need to be at the start
   372          * of the line after that.
   373          */
   374         lum  += cols;
   375         lum2 += cols;
   376         row1 += mod;
   377         row2 += mod;
   378     }
   379 }
   380 
   381 /*
   382  * In this function I make use of a nasty trick. The tables have the lower
   383  * 16 bits replicated in the upper 16. This means I can write ints and get
   384  * the horisontal doubling for free (almost).
   385  */
   386 static void Color16DitherYV12Mod2X( int *colortab, Uint32 *rgb_2_pix,
   387                                     unsigned char *lum, unsigned char *cr,
   388                                     unsigned char *cb, unsigned char *out,
   389                                     int rows, int cols, int mod )
   390 {
   391     unsigned int* row1 = (unsigned int*) out;
   392     const int next_row = cols+(mod/2);
   393     unsigned int* row2 = row1 + 2*next_row;
   394     unsigned char* lum2;
   395     int x, y;
   396     int cr_r;
   397     int crb_g;
   398     int cb_b;
   399     int cols_2 = cols / 2;
   400 
   401     lum2 = lum + cols;
   402 
   403     mod = (next_row * 3) + (mod/2);
   404 
   405     y = rows / 2;
   406     while( y-- )
   407     {
   408         x = cols_2;
   409         while( x-- )
   410         {
   411             register int L;
   412 
   413             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   414             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   415                                + colortab[ *cb + 2*256 ];
   416             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   417             ++cr; ++cb;
   418 
   419             L = *lum++;
   420             row1[0] = row1[next_row] = (rgb_2_pix[ L + cr_r ] |
   421                                         rgb_2_pix[ L + crb_g ] |
   422                                         rgb_2_pix[ L + cb_b ]);
   423             row1++;
   424 
   425             L = *lum++;
   426             row1[0] = row1[next_row] = (rgb_2_pix[ L + cr_r ] |
   427                                         rgb_2_pix[ L + crb_g ] |
   428                                         rgb_2_pix[ L + cb_b ]);
   429             row1++;
   430 
   431 
   432             /* Now, do second row. */
   433 
   434             L = *lum2++;
   435             row2[0] = row2[next_row] = (rgb_2_pix[ L + cr_r ] |
   436                                         rgb_2_pix[ L + crb_g ] |
   437                                         rgb_2_pix[ L + cb_b ]);
   438             row2++;
   439 
   440             L = *lum2++;
   441             row2[0] = row2[next_row] = (rgb_2_pix[ L + cr_r ] |
   442                                         rgb_2_pix[ L + crb_g ] |
   443                                         rgb_2_pix[ L + cb_b ]);
   444             row2++;
   445         }
   446 
   447         /*
   448          * These values are at the start of the next line, (due
   449          * to the ++'s above),but they need to be at the start
   450          * of the line after that.
   451          */
   452         lum  += cols;
   453         lum2 += cols;
   454         row1 += mod;
   455         row2 += mod;
   456     }
   457 }
   458 
   459 static void Color24DitherYV12Mod2X( int *colortab, Uint32 *rgb_2_pix,
   460                                     unsigned char *lum, unsigned char *cr,
   461                                     unsigned char *cb, unsigned char *out,
   462                                     int rows, int cols, int mod )
   463 {
   464     unsigned int value;
   465     unsigned char* row1 = out;
   466     const int next_row = (cols*2 + mod) * 3;
   467     unsigned char* row2 = row1 + 2*next_row;
   468     unsigned char* lum2;
   469     int x, y;
   470     int cr_r;
   471     int crb_g;
   472     int cb_b;
   473     int cols_2 = cols / 2;
   474 
   475     lum2 = lum + cols;
   476 
   477     mod = next_row*3 + mod*3;
   478 
   479     y = rows / 2;
   480     while( y-- )
   481     {
   482         x = cols_2;
   483         while( x-- )
   484         {
   485             register int L;
   486 
   487             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   488             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   489                                + colortab[ *cb + 2*256 ];
   490             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   491             ++cr; ++cb;
   492 
   493             L = *lum++;
   494             value = (rgb_2_pix[ L + cr_r ] |
   495                      rgb_2_pix[ L + crb_g ] |
   496                      rgb_2_pix[ L + cb_b ]);
   497             row1[0+0] = row1[3+0] = row1[next_row+0] = row1[next_row+3+0] =
   498                      (value      ) & 0xFF;
   499             row1[0+1] = row1[3+1] = row1[next_row+1] = row1[next_row+3+1] =
   500                      (value >>  8) & 0xFF;
   501             row1[0+2] = row1[3+2] = row1[next_row+2] = row1[next_row+3+2] =
   502                      (value >> 16) & 0xFF;
   503             row1 += 2*3;
   504 
   505             L = *lum++;
   506             value = (rgb_2_pix[ L + cr_r ] |
   507                      rgb_2_pix[ L + crb_g ] |
   508                      rgb_2_pix[ L + cb_b ]);
   509             row1[0+0] = row1[3+0] = row1[next_row+0] = row1[next_row+3+0] =
   510                      (value      ) & 0xFF;
   511             row1[0+1] = row1[3+1] = row1[next_row+1] = row1[next_row+3+1] =
   512                      (value >>  8) & 0xFF;
   513             row1[0+2] = row1[3+2] = row1[next_row+2] = row1[next_row+3+2] =
   514                      (value >> 16) & 0xFF;
   515             row1 += 2*3;
   516 
   517 
   518             /* Now, do second row. */
   519 
   520             L = *lum2++;
   521             value = (rgb_2_pix[ L + cr_r ] |
   522                      rgb_2_pix[ L + crb_g ] |
   523                      rgb_2_pix[ L + cb_b ]);
   524             row2[0+0] = row2[3+0] = row2[next_row+0] = row2[next_row+3+0] =
   525                      (value      ) & 0xFF;
   526             row2[0+1] = row2[3+1] = row2[next_row+1] = row2[next_row+3+1] =
   527                      (value >>  8) & 0xFF;
   528             row2[0+2] = row2[3+2] = row2[next_row+2] = row2[next_row+3+2] =
   529                      (value >> 16) & 0xFF;
   530             row2 += 2*3;
   531 
   532             L = *lum2++;
   533             value = (rgb_2_pix[ L + cr_r ] |
   534                      rgb_2_pix[ L + crb_g ] |
   535                      rgb_2_pix[ L + cb_b ]);
   536             row2[0+0] = row2[3+0] = row2[next_row+0] = row2[next_row+3+0] =
   537                      (value      ) & 0xFF;
   538             row2[0+1] = row2[3+1] = row2[next_row+1] = row2[next_row+3+1] =
   539                      (value >>  8) & 0xFF;
   540             row2[0+2] = row2[3+2] = row2[next_row+2] = row2[next_row+3+2] =
   541                      (value >> 16) & 0xFF;
   542             row2 += 2*3;
   543         }
   544 
   545         /*
   546          * These values are at the start of the next line, (due
   547          * to the ++'s above),but they need to be at the start
   548          * of the line after that.
   549          */
   550         lum  += cols;
   551         lum2 += cols;
   552         row1 += mod;
   553         row2 += mod;
   554     }
   555 }
   556 
   557 static void Color32DitherYV12Mod2X( int *colortab, Uint32 *rgb_2_pix,
   558                                     unsigned char *lum, unsigned char *cr,
   559                                     unsigned char *cb, unsigned char *out,
   560                                     int rows, int cols, int mod )
   561 {
   562     unsigned int* row1 = (unsigned int*) out;
   563     const int next_row = cols*2+mod;
   564     unsigned int* row2 = row1 + 2*next_row;
   565     unsigned char* lum2;
   566     int x, y;
   567     int cr_r;
   568     int crb_g;
   569     int cb_b;
   570     int cols_2 = cols / 2;
   571 
   572     lum2 = lum + cols;
   573 
   574     mod = (next_row * 3) + mod;
   575 
   576     y = rows / 2;
   577     while( y-- )
   578     {
   579         x = cols_2;
   580         while( x-- )
   581         {
   582             register int L;
   583 
   584             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   585             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   586                                + colortab[ *cb + 2*256 ];
   587             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   588             ++cr; ++cb;
   589 
   590             L = *lum++;
   591             row1[0] = row1[1] = row1[next_row] = row1[next_row+1] =
   592                                        (rgb_2_pix[ L + cr_r ] |
   593                                         rgb_2_pix[ L + crb_g ] |
   594                                         rgb_2_pix[ L + cb_b ]);
   595             row1 += 2;
   596 
   597             L = *lum++;
   598             row1[0] = row1[1] = row1[next_row] = row1[next_row+1] =
   599                                        (rgb_2_pix[ L + cr_r ] |
   600                                         rgb_2_pix[ L + crb_g ] |
   601                                         rgb_2_pix[ L + cb_b ]);
   602             row1 += 2;
   603 
   604 
   605             /* Now, do second row. */
   606 
   607             L = *lum2++;
   608             row2[0] = row2[1] = row2[next_row] = row2[next_row+1] =
   609                                        (rgb_2_pix[ L + cr_r ] |
   610                                         rgb_2_pix[ L + crb_g ] |
   611                                         rgb_2_pix[ L + cb_b ]);
   612             row2 += 2;
   613 
   614             L = *lum2++;
   615             row2[0] = row2[1] = row2[next_row] = row2[next_row+1] =
   616                                        (rgb_2_pix[ L + cr_r ] |
   617                                         rgb_2_pix[ L + crb_g ] |
   618                                         rgb_2_pix[ L + cb_b ]);
   619             row2 += 2;
   620         }
   621 
   622         /*
   623          * These values are at the start of the next line, (due
   624          * to the ++'s above),but they need to be at the start
   625          * of the line after that.
   626          */
   627         lum  += cols;
   628         lum2 += cols;
   629         row1 += mod;
   630         row2 += mod;
   631     }
   632 }
   633 
   634 static void Color16DitherYUY2Mod1X( int *colortab, Uint32 *rgb_2_pix,
   635                                     unsigned char *lum, unsigned char *cr,
   636                                     unsigned char *cb, unsigned char *out,
   637                                     int rows, int cols, int mod )
   638 {
   639     unsigned short* row;
   640     int x, y;
   641     int cr_r;
   642     int crb_g;
   643     int cb_b;
   644     int cols_2 = cols / 2;
   645 
   646     row = (unsigned short*) out;
   647 
   648     y = rows;
   649     while( y-- )
   650     {
   651         x = cols_2;
   652         while( x-- )
   653         {
   654             register int L;
   655 
   656             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   657             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   658                                + colortab[ *cb + 2*256 ];
   659             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   660             cr += 4; cb += 4;
   661 
   662             L = *lum; lum += 2;
   663             *row++ = (rgb_2_pix[ L + cr_r ] |
   664                        rgb_2_pix[ L + crb_g ] |
   665                        rgb_2_pix[ L + cb_b ]);
   666 
   667             L = *lum; lum += 2;
   668             *row++ = (rgb_2_pix[ L + cr_r ] |
   669                        rgb_2_pix[ L + crb_g ] |
   670                        rgb_2_pix[ L + cb_b ]);
   671 
   672         }
   673 
   674         row += mod;
   675     }
   676 }
   677 
   678 static void Color24DitherYUY2Mod1X( int *colortab, Uint32 *rgb_2_pix,
   679                                     unsigned char *lum, unsigned char *cr,
   680                                     unsigned char *cb, unsigned char *out,
   681                                     int rows, int cols, int mod )
   682 {
   683     unsigned int value;
   684     unsigned char* row;
   685     int x, y;
   686     int cr_r;
   687     int crb_g;
   688     int cb_b;
   689     int cols_2 = cols / 2;
   690 
   691     row = (unsigned char*) out;
   692     mod *= 3;
   693     y = rows;
   694     while( y-- )
   695     {
   696         x = cols_2;
   697         while( x-- )
   698         {
   699             register int L;
   700 
   701             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   702             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   703                                + colortab[ *cb + 2*256 ];
   704             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   705             cr += 4; cb += 4;
   706 
   707             L = *lum; lum += 2;
   708             value = (rgb_2_pix[ L + cr_r ] |
   709                      rgb_2_pix[ L + crb_g ] |
   710                      rgb_2_pix[ L + cb_b ]);
   711             *row++ = (value      ) & 0xFF;
   712             *row++ = (value >>  8) & 0xFF;
   713             *row++ = (value >> 16) & 0xFF;
   714 
   715             L = *lum; lum += 2;
   716             value = (rgb_2_pix[ L + cr_r ] |
   717                      rgb_2_pix[ L + crb_g ] |
   718                      rgb_2_pix[ L + cb_b ]);
   719             *row++ = (value      ) & 0xFF;
   720             *row++ = (value >>  8) & 0xFF;
   721             *row++ = (value >> 16) & 0xFF;
   722 
   723         }
   724         row += mod;
   725     }
   726 }
   727 
   728 static void Color32DitherYUY2Mod1X( int *colortab, Uint32 *rgb_2_pix,
   729                                     unsigned char *lum, unsigned char *cr,
   730                                     unsigned char *cb, unsigned char *out,
   731                                     int rows, int cols, int mod )
   732 {
   733     unsigned int* row;
   734     int x, y;
   735     int cr_r;
   736     int crb_g;
   737     int cb_b;
   738     int cols_2 = cols / 2;
   739 
   740     row = (unsigned int*) out;
   741     y = rows;
   742     while( y-- )
   743     {
   744         x = cols_2;
   745         while( x-- )
   746         {
   747             register int L;
   748 
   749             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   750             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   751                                + colortab[ *cb + 2*256 ];
   752             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   753             cr += 4; cb += 4;
   754 
   755             L = *lum; lum += 2;
   756             *row++ = (rgb_2_pix[ L + cr_r ] |
   757                        rgb_2_pix[ L + crb_g ] |
   758                        rgb_2_pix[ L + cb_b ]);
   759 
   760             L = *lum; lum += 2;
   761             *row++ = (rgb_2_pix[ L + cr_r ] |
   762                        rgb_2_pix[ L + crb_g ] |
   763                        rgb_2_pix[ L + cb_b ]);
   764 
   765 
   766         }
   767         row += mod;
   768     }
   769 }
   770 
   771 /*
   772  * In this function I make use of a nasty trick. The tables have the lower
   773  * 16 bits replicated in the upper 16. This means I can write ints and get
   774  * the horisontal doubling for free (almost).
   775  */
   776 static void Color16DitherYUY2Mod2X( int *colortab, Uint32 *rgb_2_pix,
   777                                     unsigned char *lum, unsigned char *cr,
   778                                     unsigned char *cb, unsigned char *out,
   779                                     int rows, int cols, int mod )
   780 {
   781     unsigned int* row = (unsigned int*) out;
   782     const int next_row = cols+(mod/2);
   783     int x, y;
   784     int cr_r;
   785     int crb_g;
   786     int cb_b;
   787     int cols_2 = cols / 2;
   788 
   789     y = rows;
   790     while( y-- )
   791     {
   792         x = cols_2;
   793         while( x-- )
   794         {
   795             register int L;
   796 
   797             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   798             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   799                                + colortab[ *cb + 2*256 ];
   800             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   801             cr += 4; cb += 4;
   802 
   803             L = *lum; lum += 2;
   804             row[0] = row[next_row] = (rgb_2_pix[ L + cr_r ] |
   805                                         rgb_2_pix[ L + crb_g ] |
   806                                         rgb_2_pix[ L + cb_b ]);
   807             row++;
   808 
   809             L = *lum; lum += 2;
   810             row[0] = row[next_row] = (rgb_2_pix[ L + cr_r ] |
   811                                         rgb_2_pix[ L + crb_g ] |
   812                                         rgb_2_pix[ L + cb_b ]);
   813             row++;
   814 
   815         }
   816         row += next_row;
   817     }
   818 }
   819 
   820 static void Color24DitherYUY2Mod2X( int *colortab, Uint32 *rgb_2_pix,
   821                                     unsigned char *lum, unsigned char *cr,
   822                                     unsigned char *cb, unsigned char *out,
   823                                     int rows, int cols, int mod )
   824 {
   825     unsigned int value;
   826     unsigned char* row = out;
   827     const int next_row = (cols*2 + mod) * 3;
   828     int x, y;
   829     int cr_r;
   830     int crb_g;
   831     int cb_b;
   832     int cols_2 = cols / 2;
   833     y = rows;
   834     while( y-- )
   835     {
   836         x = cols_2;
   837         while( x-- )
   838         {
   839             register int L;
   840 
   841             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   842             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   843                                + colortab[ *cb + 2*256 ];
   844             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   845             cr += 4; cb += 4;
   846 
   847             L = *lum; lum += 2;
   848             value = (rgb_2_pix[ L + cr_r ] |
   849                      rgb_2_pix[ L + crb_g ] |
   850                      rgb_2_pix[ L + cb_b ]);
   851             row[0+0] = row[3+0] = row[next_row+0] = row[next_row+3+0] =
   852                      (value      ) & 0xFF;
   853             row[0+1] = row[3+1] = row[next_row+1] = row[next_row+3+1] =
   854                      (value >>  8) & 0xFF;
   855             row[0+2] = row[3+2] = row[next_row+2] = row[next_row+3+2] =
   856                      (value >> 16) & 0xFF;
   857             row += 2*3;
   858 
   859             L = *lum; lum += 2;
   860             value = (rgb_2_pix[ L + cr_r ] |
   861                      rgb_2_pix[ L + crb_g ] |
   862                      rgb_2_pix[ L + cb_b ]);
   863             row[0+0] = row[3+0] = row[next_row+0] = row[next_row+3+0] =
   864                      (value      ) & 0xFF;
   865             row[0+1] = row[3+1] = row[next_row+1] = row[next_row+3+1] =
   866                      (value >>  8) & 0xFF;
   867             row[0+2] = row[3+2] = row[next_row+2] = row[next_row+3+2] =
   868                      (value >> 16) & 0xFF;
   869             row += 2*3;
   870 
   871         }
   872         row += next_row;
   873     }
   874 }
   875 
   876 static void Color32DitherYUY2Mod2X( int *colortab, Uint32 *rgb_2_pix,
   877                                     unsigned char *lum, unsigned char *cr,
   878                                     unsigned char *cb, unsigned char *out,
   879                                     int rows, int cols, int mod )
   880 {
   881     unsigned int* row = (unsigned int*) out;
   882     const int next_row = cols*2+mod;
   883     int x, y;
   884     int cr_r;
   885     int crb_g;
   886     int cb_b;
   887     int cols_2 = cols / 2;
   888     mod+=mod;
   889     y = rows;
   890     while( y-- )
   891     {
   892         x = cols_2;
   893         while( x-- )
   894         {
   895             register int L;
   896 
   897             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
   898             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
   899                                + colortab[ *cb + 2*256 ];
   900             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
   901             cr += 4; cb += 4;
   902 
   903             L = *lum; lum += 2;
   904             row[0] = row[1] = row[next_row] = row[next_row+1] =
   905                                        (rgb_2_pix[ L + cr_r ] |
   906                                         rgb_2_pix[ L + crb_g ] |
   907                                         rgb_2_pix[ L + cb_b ]);
   908             row += 2;
   909 
   910             L = *lum; lum += 2;
   911             row[0] = row[1] = row[next_row] = row[next_row+1] =
   912                                        (rgb_2_pix[ L + cr_r ] |
   913                                         rgb_2_pix[ L + crb_g ] |
   914                                         rgb_2_pix[ L + cb_b ]);
   915             row += 2;
   916 
   917 
   918         }
   919 
   920         row += next_row;
   921     }
   922 }
   923 
   924 /*
   925  * How many 1 bits are there in the Uint32.
   926  * Low performance, do not call often.
   927  */
   928 static int number_of_bits_set( Uint32 a )
   929 {
   930     if(!a) return 0;
   931     if(a & 1) return 1 + number_of_bits_set(a >> 1);
   932     return(number_of_bits_set(a >> 1));
   933 }
   934 
   935 /*
   936  * How many 0 bits are there at least significant end of Uint32.
   937  * Low performance, do not call often.
   938  */
   939 static int free_bits_at_bottom( Uint32 a )
   940 {
   941       /* assume char is 8 bits */
   942     if(!a) return sizeof(Uint32) * 8;
   943     if(((Sint32)a) & 1l) return 0;
   944     return 1 + free_bits_at_bottom ( a >> 1);
   945 }
   946 
   947 
   948 SDL_Overlay *SDL_CreateYUV_SW(_THIS, int width, int height, Uint32 format, SDL_Surface *display)
   949 {
   950 	SDL_Overlay *overlay;
   951 	struct private_yuvhwdata *swdata;
   952 	int *Cr_r_tab;
   953 	int *Cr_g_tab;
   954 	int *Cb_g_tab;
   955 	int *Cb_b_tab;
   956 	Uint32 *r_2_pix_alloc;
   957 	Uint32 *g_2_pix_alloc;
   958 	Uint32 *b_2_pix_alloc;
   959 	int i, cpu_mmx;
   960 	int CR, CB;
   961 	Uint32 Rmask, Gmask, Bmask;
   962 
   963 	/* Only RGB packed pixel conversion supported */
   964 	if ( (display->format->BytesPerPixel != 2) &&
   965 	     (display->format->BytesPerPixel != 3) &&
   966 	     (display->format->BytesPerPixel != 4) ) {
   967 		SDL_SetError("Can't use YUV data on non 16/24/32 bit surfaces");
   968 		return(NULL);
   969 	}
   970 
   971 	/* Verify that we support the format */
   972 	switch (format) {
   973 	    case SDL_YV12_OVERLAY:
   974 	    case SDL_IYUV_OVERLAY:
   975 	    case SDL_YUY2_OVERLAY:
   976 	    case SDL_UYVY_OVERLAY:
   977 	    case SDL_YVYU_OVERLAY:
   978 		break;
   979 	    default:
   980 		SDL_SetError("Unsupported YUV format");
   981 		return(NULL);
   982 	}
   983 
   984 	/* Create the overlay structure */
   985 	overlay = (SDL_Overlay *)malloc(sizeof *overlay);
   986 	if ( overlay == NULL ) {
   987 		SDL_OutOfMemory();
   988 		return(NULL);
   989 	}
   990 	memset(overlay, 0, (sizeof *overlay));
   991 
   992 	/* Fill in the basic members */
   993 	overlay->format = format;
   994 	overlay->w = width;
   995 	overlay->h = height;
   996 
   997 	/* Set up the YUV surface function structure */
   998 	overlay->hwfuncs = &sw_yuvfuncs;
   999 
  1000 	/* Create the pixel data and lookup tables */
  1001 	swdata = (struct private_yuvhwdata *)malloc(sizeof *swdata);
  1002 	overlay->hwdata = swdata;
  1003 	if ( swdata == NULL ) {
  1004 		SDL_OutOfMemory();
  1005 		SDL_FreeYUVOverlay(overlay);
  1006 		return(NULL);
  1007 	}
  1008 	swdata->stretch = NULL;
  1009 	swdata->display = display;
  1010 	swdata->pixels = (Uint8 *) malloc(width*height*2);
  1011 	swdata->colortab = (int *)malloc(4*256*sizeof(int));
  1012 	Cr_r_tab = &swdata->colortab[0*256];
  1013 	Cr_g_tab = &swdata->colortab[1*256];
  1014 	Cb_g_tab = &swdata->colortab[2*256];
  1015 	Cb_b_tab = &swdata->colortab[3*256];
  1016 	swdata->rgb_2_pix = (Uint32 *)malloc(3*768*sizeof(Uint32));
  1017 	r_2_pix_alloc = &swdata->rgb_2_pix[0*768];
  1018 	g_2_pix_alloc = &swdata->rgb_2_pix[1*768];
  1019 	b_2_pix_alloc = &swdata->rgb_2_pix[2*768];
  1020 	if ( ! swdata->pixels || ! swdata->colortab || ! swdata->rgb_2_pix ) {
  1021 		SDL_OutOfMemory();
  1022 		SDL_FreeYUVOverlay(overlay);
  1023 		return(NULL);
  1024 	}
  1025 
  1026 	/* Generate the tables for the display surface */
  1027 	for (i=0; i<256; i++) {
  1028 		/* Gamma correction (luminescence table) and chroma correction
  1029 		   would be done here.  See the Berkeley mpeg_play sources.
  1030 		*/
  1031 		CB = CR = (i-128);
  1032 		Cr_r_tab[i] = (int) ( (0.419/0.299) * CR);
  1033 		Cr_g_tab[i] = (int) (-(0.299/0.419) * CR);
  1034 		Cb_g_tab[i] = (int) (-(0.114/0.331) * CB); 
  1035 		Cb_b_tab[i] = (int) ( (0.587/0.331) * CB);
  1036 	}
  1037 
  1038 	/* 
  1039 	 * Set up entries 0-255 in rgb-to-pixel value tables.
  1040 	 */
  1041 	Rmask = display->format->Rmask;
  1042 	Gmask = display->format->Gmask;
  1043 	Bmask = display->format->Bmask;
  1044 	for ( i=0; i<256; ++i ) {
  1045 		r_2_pix_alloc[i+256] = i >> (8 - number_of_bits_set(Rmask));
  1046 		r_2_pix_alloc[i+256] <<= free_bits_at_bottom(Rmask);
  1047 		g_2_pix_alloc[i+256] = i >> (8 - number_of_bits_set(Gmask));
  1048 		g_2_pix_alloc[i+256] <<= free_bits_at_bottom(Gmask);
  1049 		b_2_pix_alloc[i+256] = i >> (8 - number_of_bits_set(Bmask));
  1050 		b_2_pix_alloc[i+256] <<= free_bits_at_bottom(Bmask);
  1051 	}
  1052 
  1053 	/*
  1054 	 * If we have 16-bit output depth, then we double the value
  1055 	 * in the top word. This means that we can write out both
  1056 	 * pixels in the pixel doubling mode with one op. It is 
  1057 	 * harmless in the normal case as storing a 32-bit value
  1058 	 * through a short pointer will lose the top bits anyway.
  1059 	 */
  1060 	if( display->format->BytesPerPixel == 2 ) {
  1061 		for ( i=0; i<256; ++i ) {
  1062 			r_2_pix_alloc[i+256] |= (r_2_pix_alloc[i+256]) << 16;
  1063 			g_2_pix_alloc[i+256] |= (g_2_pix_alloc[i+256]) << 16;
  1064 			b_2_pix_alloc[i+256] |= (b_2_pix_alloc[i+256]) << 16;
  1065 		}
  1066 	}
  1067 
  1068 	/*
  1069 	 * Spread out the values we have to the rest of the array so that
  1070 	 * we do not need to check for overflow.
  1071 	 */
  1072 	for ( i=0; i<256; ++i ) {
  1073 		r_2_pix_alloc[i] = r_2_pix_alloc[256];
  1074 		r_2_pix_alloc[i+512] = r_2_pix_alloc[511];
  1075 		g_2_pix_alloc[i] = g_2_pix_alloc[256];
  1076 		g_2_pix_alloc[i+512] = g_2_pix_alloc[511];
  1077 		b_2_pix_alloc[i] = b_2_pix_alloc[256];
  1078 		b_2_pix_alloc[i+512] = b_2_pix_alloc[511];
  1079 	}
  1080 
  1081 	/* You have chosen wisely... */
  1082 	switch (format) {
  1083 	    case SDL_YV12_OVERLAY:
  1084 	    case SDL_IYUV_OVERLAY:
  1085 		cpu_mmx = CPU_Flags() & MMX_CPU;
  1086 		if ( display->format->BytesPerPixel == 2 ) {
  1087 #if defined(i386) && defined(__GNUC__) && defined(USE_ASMBLIT)
  1088 			/* inline assembly functions */
  1089 			if ( cpu_mmx && (Rmask == 0xF800) &&
  1090 			                (Gmask == 0x07E0) &&
  1091 				        (Bmask == 0x001F) &&
  1092 			                (width & 15) == 0) {
  1093 /*printf("Using MMX 16-bit 565 dither\n");*/
  1094 				swdata->Display1X = Color565DitherYV12MMX1X;
  1095 			} else {
  1096 /*printf("Using C 16-bit dither\n");*/
  1097 				swdata->Display1X = Color16DitherYV12Mod1X;
  1098 			}
  1099 #else
  1100 			swdata->Display1X = Color16DitherYV12Mod1X;
  1101 #endif
  1102 			swdata->Display2X = Color16DitherYV12Mod2X;
  1103 		}
  1104 		if ( display->format->BytesPerPixel == 3 ) {
  1105 			swdata->Display1X = Color24DitherYV12Mod1X;
  1106 			swdata->Display2X = Color24DitherYV12Mod2X;
  1107 		}
  1108 		if ( display->format->BytesPerPixel == 4 ) {
  1109 #if defined(i386) && defined(__GNUC__) && defined(USE_ASMBLIT)
  1110 			/* inline assembly functions */
  1111 			if ( cpu_mmx && (Rmask == 0x00FF0000) &&
  1112 			                (Gmask == 0x0000FF00) &&
  1113 				        (Bmask == 0x000000FF) && 
  1114 			                (width & 15) == 0) {
  1115 /*printf("Using MMX 32-bit dither\n");*/
  1116 				swdata->Display1X = ColorRGBDitherYV12MMX1X;
  1117 			} else {
  1118 /*printf("Using C 32-bit dither\n");*/
  1119 				swdata->Display1X = Color32DitherYV12Mod1X;
  1120 			}
  1121 #else
  1122 			swdata->Display1X = Color32DitherYV12Mod1X;
  1123 #endif
  1124 			swdata->Display2X = Color32DitherYV12Mod2X;
  1125 		}
  1126 		break;
  1127 	    case SDL_YUY2_OVERLAY:
  1128 	    case SDL_UYVY_OVERLAY:
  1129 	    case SDL_YVYU_OVERLAY:
  1130 		if ( display->format->BytesPerPixel == 2 ) {
  1131 			swdata->Display1X = Color16DitherYUY2Mod1X;
  1132 			swdata->Display2X = Color16DitherYUY2Mod2X;
  1133 		}
  1134 		if ( display->format->BytesPerPixel == 3 ) {
  1135 			swdata->Display1X = Color24DitherYUY2Mod1X;
  1136 			swdata->Display2X = Color24DitherYUY2Mod2X;
  1137 		}
  1138 		if ( display->format->BytesPerPixel == 4 ) {
  1139 			swdata->Display1X = Color32DitherYUY2Mod1X;
  1140 			swdata->Display2X = Color32DitherYUY2Mod2X;
  1141 		}
  1142 		break;
  1143 	    default:
  1144 		/* We should never get here (caught above) */
  1145 		break;
  1146 	}
  1147 
  1148 	/* Find the pitch and offset values for the overlay */
  1149 	overlay->pitches = swdata->pitches;
  1150 	overlay->pixels = swdata->planes;
  1151 	switch (format) {
  1152 	    case SDL_YV12_OVERLAY:
  1153 	    case SDL_IYUV_OVERLAY:
  1154 		overlay->pitches[0] = overlay->w;
  1155 		overlay->pitches[1] = overlay->pitches[0] / 2;
  1156 		overlay->pitches[2] = overlay->pitches[0] / 2;
  1157 	        overlay->pixels[0] = swdata->pixels;
  1158 	        overlay->pixels[1] = overlay->pixels[0] +
  1159 		                     overlay->pitches[0] * overlay->h;
  1160 	        overlay->pixels[2] = overlay->pixels[1] +
  1161 		                     overlay->pitches[1] * overlay->h / 2;
  1162 		overlay->planes = 3;
  1163 		break;
  1164 	    case SDL_YUY2_OVERLAY:
  1165 	    case SDL_UYVY_OVERLAY:
  1166 	    case SDL_YVYU_OVERLAY:
  1167 		overlay->pitches[0] = overlay->w*2;
  1168 	        overlay->pixels[0] = swdata->pixels;
  1169 		overlay->planes = 1;
  1170 		break;
  1171 	    default:
  1172 		/* We should never get here (caught above) */
  1173 		break;
  1174 	}
  1175 
  1176 	/* We're all done.. */
  1177 	return(overlay);
  1178 }
  1179 
  1180 int SDL_LockYUV_SW(_THIS, SDL_Overlay *overlay)
  1181 {
  1182 	return(0);
  1183 }
  1184 
  1185 void SDL_UnlockYUV_SW(_THIS, SDL_Overlay *overlay)
  1186 {
  1187 	return;
  1188 }
  1189 
  1190 int SDL_DisplayYUV_SW(_THIS, SDL_Overlay *overlay, SDL_Rect *dstrect)
  1191 {
  1192 	struct private_yuvhwdata *swdata;
  1193 	SDL_Surface *stretch;
  1194 	SDL_Surface *display;
  1195 	int scale_2x;
  1196 	Uint8 *lum, *Cr, *Cb;
  1197 	Uint8 *dst;
  1198 	int mod;
  1199 
  1200 	swdata = overlay->hwdata;
  1201 	scale_2x = 0;
  1202 	stretch = 0;
  1203 	if ( (overlay->w != dstrect->w) || (overlay->h != dstrect->h) ) {
  1204 		if ( (dstrect->w == 2*overlay->w) &&
  1205 		     (dstrect->h == 2*overlay->h) ) {
  1206 			scale_2x = 1;
  1207 		} else {
  1208 			if ( ! swdata->stretch ) {
  1209 				display = swdata->display;
  1210 				swdata->stretch = SDL_CreateRGBSurface(
  1211 					SDL_SWSURFACE,
  1212 					overlay->w, overlay->h,
  1213 					display->format->BitsPerPixel,
  1214 					display->format->Rmask,
  1215 					display->format->Gmask,
  1216 					display->format->Bmask, 0);
  1217 				if ( ! swdata->stretch ) {
  1218 					return(-1);
  1219 				}
  1220 			}
  1221 			stretch = swdata->stretch;
  1222 		}
  1223 	}
  1224 
  1225 	if ( stretch ) {
  1226 		display = stretch;
  1227 	} else {
  1228 		display = swdata->display;
  1229 	}
  1230 	switch (overlay->format) {
  1231 	    case SDL_YV12_OVERLAY:
  1232 		lum = overlay->pixels[0];
  1233 		Cr =  overlay->pixels[1];
  1234 		Cb =  overlay->pixels[2];
  1235 		break;
  1236 	    case SDL_IYUV_OVERLAY:
  1237 		lum = overlay->pixels[0];
  1238 		Cr =  overlay->pixels[2];
  1239 		Cb =  overlay->pixels[1];
  1240 		break;
  1241 	    case SDL_YUY2_OVERLAY:
  1242 		lum = overlay->pixels[0];
  1243 		Cr = lum + 3;
  1244 		Cb = lum + 1;
  1245 		break;
  1246 	    case SDL_UYVY_OVERLAY:
  1247 		lum = overlay->pixels[0]+1;
  1248 		Cr = lum + 1;
  1249 		Cb = lum - 1;
  1250 		break;
  1251 	    case SDL_YVYU_OVERLAY:
  1252 		lum = overlay->pixels[0];
  1253 		Cr = lum + 1;
  1254 		Cb = lum + 3;
  1255 		break;
  1256 	    default:
  1257 		SDL_SetError("Unsupported YUV format in blit");
  1258 		return(-1);
  1259 	}
  1260 	if ( SDL_MUSTLOCK(display) ) {
  1261         	if ( SDL_LockSurface(display) < 0 ) {
  1262 			return(-1);
  1263 		}
  1264 	}
  1265 	if ( stretch ) {
  1266 		dst = (Uint8 *)stretch->pixels;
  1267 	} else {
  1268 		dst = (Uint8 *)display->pixels
  1269 			+ dstrect->x * display->format->BytesPerPixel
  1270 			+ dstrect->y * display->pitch;
  1271 	}
  1272 	mod = (display->pitch / display->format->BytesPerPixel);
  1273 
  1274 	if ( scale_2x ) {
  1275 		mod -= (overlay->w * 2);
  1276 		swdata->Display2X(swdata->colortab, swdata->rgb_2_pix,
  1277 		                  lum, Cr, Cb, dst, overlay->h, overlay->w,mod);
  1278 	} else {
  1279 		mod -= overlay->w;
  1280 		swdata->Display1X(swdata->colortab, swdata->rgb_2_pix,
  1281 		                  lum, Cr, Cb, dst, overlay->h, overlay->w,mod);
  1282 	}
  1283 	if ( SDL_MUSTLOCK(display) ) {
  1284 		SDL_UnlockSurface(display);
  1285 	}
  1286 	if ( stretch ) {
  1287 		display = swdata->display;
  1288 		SDL_SoftStretch(stretch, NULL, display, dstrect);
  1289 	}
  1290 	SDL_UpdateRects(display, 1, dstrect);
  1291 
  1292 	return(0);
  1293 }
  1294 
  1295 void SDL_FreeYUV_SW(_THIS, SDL_Overlay *overlay)
  1296 {
  1297 	struct private_yuvhwdata *swdata;
  1298 
  1299 	swdata = overlay->hwdata;
  1300 	if ( swdata ) {
  1301 		if ( swdata->stretch ) {
  1302 			SDL_FreeSurface(swdata->stretch);
  1303 		}
  1304 		if ( swdata->pixels ) {
  1305 			free(swdata->pixels);
  1306 		}
  1307 		if ( swdata->colortab ) {
  1308 			free(swdata->colortab);
  1309 		}
  1310 		if ( swdata->rgb_2_pix ) {
  1311 			free(swdata->rgb_2_pix);
  1312 		}
  1313 		free(swdata);
  1314 	}
  1315 }