For sanity's sake, removed the '&' when passing copy_row array to asm.
2 SDL - Simple DirectMedia Layer
3 Copyright (C) 1997-2004 Sam Lantinga
5 This library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public
7 License as published by the Free Software Foundation; either
8 version 2 of the License, or (at your option) any later version.
10 This library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
15 You should have received a copy of the GNU Library General Public
16 License along with this library; if not, write to the Free
17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
28 /* This is the software implementation of the YUV video overlay support */
30 /* This code was derived from code carrying the following copyright notices:
32 * Copyright (c) 1995 The Regents of the University of California.
33 * All rights reserved.
35 * Permission to use, copy, modify, and distribute this software and its
36 * documentation for any purpose, without fee, and without written agreement is
37 * hereby granted, provided that the above copyright notice and the following
38 * two paragraphs appear in all copies of this software.
40 * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR
41 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
42 * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF
43 * CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45 * THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES,
46 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
47 * AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
48 * ON AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATION TO
49 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
51 * Copyright (c) 1995 Erik Corry
52 * All rights reserved.
54 * Permission to use, copy, modify, and distribute this software and its
55 * documentation for any purpose, without fee, and without written agreement is
56 * hereby granted, provided that the above copyright notice and the following
57 * two paragraphs appear in all copies of this software.
59 * IN NO EVENT SHALL ERIK CORRY BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
60 * SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OF
61 * THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF ERIK CORRY HAS BEEN ADVISED
62 * OF THE POSSIBILITY OF SUCH DAMAGE.
64 * ERIK CORRY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
65 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
66 * PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
67 * BASIS, AND ERIK CORRY HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT,
68 * UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
70 * Portions of this software Copyright (c) 1995 Brown University.
71 * All rights reserved.
73 * Permission to use, copy, modify, and distribute this software and its
74 * documentation for any purpose, without fee, and without written agreement
75 * is hereby granted, provided that the above copyright notice and the
76 * following two paragraphs appear in all copies of this software.
78 * IN NO EVENT SHALL BROWN UNIVERSITY BE LIABLE TO ANY PARTY FOR
79 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
80 * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF BROWN
81 * UNIVERSITY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
83 * BROWN UNIVERSITY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
84 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
85 * PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
86 * BASIS, AND BROWN UNIVERSITY HAS NO OBLIGATION TO PROVIDE MAINTENANCE,
87 * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
93 #include "SDL_error.h"
94 #include "SDL_video.h"
95 #include "SDL_cpuinfo.h"
96 #include "SDL_stretch_c.h"
97 #include "SDL_yuvfuncs.h"
98 #include "SDL_yuv_sw_c.h"
100 /* The functions used to manipulate software video overlays */
101 static struct private_yuvhwfuncs sw_yuvfuncs = {
108 /* RGB conversion lookup tables */
109 struct private_yuvhwdata {
110 SDL_Surface *stretch;
111 SDL_Surface *display;
115 void (*Display1X)(int *colortab, Uint32 *rgb_2_pix,
116 unsigned char *lum, unsigned char *cr,
117 unsigned char *cb, unsigned char *out,
118 int rows, int cols, int mod );
119 void (*Display2X)(int *colortab, Uint32 *rgb_2_pix,
120 unsigned char *lum, unsigned char *cr,
121 unsigned char *cb, unsigned char *out,
122 int rows, int cols, int mod );
124 /* These are just so we don't have to allocate them separately */
130 /* The colorspace conversion functions */
132 extern void Color565DitherYV12MMX1X( int *colortab, Uint32 *rgb_2_pix,
133 unsigned char *lum, unsigned char *cr,
134 unsigned char *cb, unsigned char *out,
135 int rows, int cols, int mod );
136 extern void ColorRGBDitherYV12MMX1X( int *colortab, Uint32 *rgb_2_pix,
137 unsigned char *lum, unsigned char *cr,
138 unsigned char *cb, unsigned char *out,
139 int rows, int cols, int mod );
141 static void Color16DitherYV12Mod1X( int *colortab, Uint32 *rgb_2_pix,
142 unsigned char *lum, unsigned char *cr,
143 unsigned char *cb, unsigned char *out,
144 int rows, int cols, int mod )
146 unsigned short* row1;
147 unsigned short* row2;
153 int cols_2 = cols / 2;
155 row1 = (unsigned short*) out;
156 row2 = row1 + cols + mod;
169 cr_r = 0*768+256 + colortab[ *cr + 0*256 ];
170 crb_g = 1*768+256 + colortab[ *cr + 1*256 ]
171 + colortab[ *cb + 2*256 ];
172 cb_b = 2*768+256 + colortab[ *cb + 3*256 ];
176 *row1++ = (rgb_2_pix[ L + cr_r ] |
177 rgb_2_pix[ L + crb_g ] |
178 rgb_2_pix[ L + cb_b ]);
181 *row1++ = (rgb_2_pix[ L + cr_r ] |
182 rgb_2_pix[ L + crb_g ] |
183 rgb_2_pix[ L + cb_b ]);
186 /* Now, do second row. */
189 *row2++ = (rgb_2_pix[ L + cr_r ] |
190 rgb_2_pix[ L + crb_g ] |
191 rgb_2_pix[ L + cb_b ]);
194 *row2++ = (rgb_2_pix[ L + cr_r ] |
195 rgb_2_pix[ L + crb_g ] |
196 rgb_2_pix[ L + cb_b ]);
200 * These values are at the start of the next line, (due
201 * to the ++'s above),but they need to be at the start
202 * of the line after that.
211 static void Color24DitherYV12Mod1X( int *colortab, Uint32 *rgb_2_pix,
212 unsigned char *lum, unsigned char *cr,
213 unsigned char *cb, unsigned char *out,
214 int rows, int cols, int mod )
224 int cols_2 = cols / 2;
227 row2 = row1 + cols*3 + mod*3;
241 cr_r = 0*768+256 + colortab[ *cr + 0*256 ];
242 crb_g = 1*768+256 + colortab[ *cr + 1*256 ]
243 + colortab[ *cb + 2*256 ];
244 cb_b = 2*768+256 + colortab[ *cb + 3*256 ];
248 value = (rgb_2_pix[ L + cr_r ] |
249 rgb_2_pix[ L + crb_g ] |
250 rgb_2_pix[ L + cb_b ]);
251 *row1++ = (value ) & 0xFF;
252 *row1++ = (value >> 8) & 0xFF;
253 *row1++ = (value >> 16) & 0xFF;
256 value = (rgb_2_pix[ L + cr_r ] |
257 rgb_2_pix[ L + crb_g ] |
258 rgb_2_pix[ L + cb_b ]);
259 *row1++ = (value ) & 0xFF;
260 *row1++ = (value >> 8) & 0xFF;
261 *row1++ = (value >> 16) & 0xFF;
264 /* Now, do second row. */
267 value = (rgb_2_pix[ L + cr_r ] |
268 rgb_2_pix[ L + crb_g ] |
269 rgb_2_pix[ L + cb_b ]);
270 *row2++ = (value ) & 0xFF;
271 *row2++ = (value >> 8) & 0xFF;
272 *row2++ = (value >> 16) & 0xFF;
275 value = (rgb_2_pix[ L + cr_r ] |
276 rgb_2_pix[ L + crb_g ] |
277 rgb_2_pix[ L + cb_b ]);
278 *row2++ = (value ) & 0xFF;
279 *row2++ = (value >> 8) & 0xFF;
280 *row2++ = (value >> 16) & 0xFF;
284 * These values are at the start of the next line, (due
285 * to the ++'s above),but they need to be at the start
286 * of the line after that.
295 static void Color32DitherYV12Mod1X( int *colortab, Uint32 *rgb_2_pix,
296 unsigned char *lum, unsigned char *cr,
297 unsigned char *cb, unsigned char *out,
298 int rows, int cols, int mod )
307 int cols_2 = cols / 2;
309 row1 = (unsigned int*) out;
310 row2 = row1 + cols + mod;
323 cr_r = 0*768+256 + colortab[ *cr + 0*256 ];
324 crb_g = 1*768+256 + colortab[ *cr + 1*256 ]
325 + colortab[ *cb + 2*256 ];
326 cb_b = 2*768+256 + colortab[ *cb + 3*256 ];
330 *row1++ = (rgb_2_pix[ L + cr_r ] |
331 rgb_2_pix[ L + crb_g ] |
332 rgb_2_pix[ L + cb_b ]);
335 *row1++ = (rgb_2_pix[ L + cr_r ] |
336 rgb_2_pix[ L + crb_g ] |
337 rgb_2_pix[ L + cb_b ]);
340 /* Now, do second row. */
343 *row2++ = (rgb_2_pix[ L + cr_r ] |
344 rgb_2_pix[ L + crb_g ] |
345 rgb_2_pix[ L + cb_b ]);
348 *row2++ = (rgb_2_pix[ L + cr_r ] |
349 rgb_2_pix[ L + crb_g ] |
350 rgb_2_pix[ L + cb_b ]);
354 * These values are at the start of the next line, (due
355 * to the ++'s above),but they need to be at the start
356 * of the line after that.
366 * In this function I make use of a nasty trick. The tables have the lower
367 * 16 bits replicated in the upper 16. This means I can write ints and get
368 * the horisontal doubling for free (almost).
370 static void Color16DitherYV12Mod2X( int *colortab, Uint32 *rgb_2_pix,
371 unsigned char *lum, unsigned char *cr,
372 unsigned char *cb, unsigned char *out,
373 int rows, int cols, int mod )
375 unsigned int* row1 = (unsigned int*) out;
376 const int next_row = cols+(mod/2);
377 unsigned int* row2 = row1 + 2*next_row;
383 int cols_2 = cols / 2;
387 mod = (next_row * 3) + (mod/2);
397 cr_r = 0*768+256 + colortab[ *cr + 0*256 ];
398 crb_g = 1*768+256 + colortab[ *cr + 1*256 ]
399 + colortab[ *cb + 2*256 ];
400 cb_b = 2*768+256 + colortab[ *cb + 3*256 ];
404 row1[0] = row1[next_row] = (rgb_2_pix[ L + cr_r ] |
405 rgb_2_pix[ L + crb_g ] |
406 rgb_2_pix[ L + cb_b ]);
410 row1[0] = row1[next_row] = (rgb_2_pix[ L + cr_r ] |
411 rgb_2_pix[ L + crb_g ] |
412 rgb_2_pix[ L + cb_b ]);
416 /* Now, do second row. */
419 row2[0] = row2[next_row] = (rgb_2_pix[ L + cr_r ] |
420 rgb_2_pix[ L + crb_g ] |
421 rgb_2_pix[ L + cb_b ]);
425 row2[0] = row2[next_row] = (rgb_2_pix[ L + cr_r ] |
426 rgb_2_pix[ L + crb_g ] |
427 rgb_2_pix[ L + cb_b ]);
432 * These values are at the start of the next line, (due
433 * to the ++'s above),but they need to be at the start
434 * of the line after that.
443 static void Color24DitherYV12Mod2X( int *colortab, Uint32 *rgb_2_pix,
444 unsigned char *lum, unsigned char *cr,
445 unsigned char *cb, unsigned char *out,
446 int rows, int cols, int mod )
449 unsigned char* row1 = out;
450 const int next_row = (cols*2 + mod) * 3;
451 unsigned char* row2 = row1 + 2*next_row;
457 int cols_2 = cols / 2;
461 mod = next_row*3 + mod*3;
471 cr_r = 0*768+256 + colortab[ *cr + 0*256 ];
472 crb_g = 1*768+256 + colortab[ *cr + 1*256 ]
473 + colortab[ *cb + 2*256 ];
474 cb_b = 2*768+256 + colortab[ *cb + 3*256 ];
478 value = (rgb_2_pix[ L + cr_r ] |
479 rgb_2_pix[ L + crb_g ] |
480 rgb_2_pix[ L + cb_b ]);
481 row1[0+0] = row1[3+0] = row1[next_row+0] = row1[next_row+3+0] =
483 row1[0+1] = row1[3+1] = row1[next_row+1] = row1[next_row+3+1] =
485 row1[0+2] = row1[3+2] = row1[next_row+2] = row1[next_row+3+2] =
486 (value >> 16) & 0xFF;
490 value = (rgb_2_pix[ L + cr_r ] |
491 rgb_2_pix[ L + crb_g ] |
492 rgb_2_pix[ L + cb_b ]);
493 row1[0+0] = row1[3+0] = row1[next_row+0] = row1[next_row+3+0] =
495 row1[0+1] = row1[3+1] = row1[next_row+1] = row1[next_row+3+1] =
497 row1[0+2] = row1[3+2] = row1[next_row+2] = row1[next_row+3+2] =
498 (value >> 16) & 0xFF;
502 /* Now, do second row. */
505 value = (rgb_2_pix[ L + cr_r ] |
506 rgb_2_pix[ L + crb_g ] |
507 rgb_2_pix[ L + cb_b ]);
508 row2[0+0] = row2[3+0] = row2[next_row+0] = row2[next_row+3+0] =
510 row2[0+1] = row2[3+1] = row2[next_row+1] = row2[next_row+3+1] =
512 row2[0+2] = row2[3+2] = row2[next_row+2] = row2[next_row+3+2] =
513 (value >> 16) & 0xFF;
517 value = (rgb_2_pix[ L + cr_r ] |
518 rgb_2_pix[ L + crb_g ] |
519 rgb_2_pix[ L + cb_b ]);
520 row2[0+0] = row2[3+0] = row2[next_row+0] = row2[next_row+3+0] =
522 row2[0+1] = row2[3+1] = row2[next_row+1] = row2[next_row+3+1] =
524 row2[0+2] = row2[3+2] = row2[next_row+2] = row2[next_row+3+2] =
525 (value >> 16) & 0xFF;
530 * These values are at the start of the next line, (due
531 * to the ++'s above),but they need to be at the start
532 * of the line after that.
541 static void Color32DitherYV12Mod2X( int *colortab, Uint32 *rgb_2_pix,
542 unsigned char *lum, unsigned char *cr,
543 unsigned char *cb, unsigned char *out,
544 int rows, int cols, int mod )
546 unsigned int* row1 = (unsigned int*) out;
547 const int next_row = cols*2+mod;
548 unsigned int* row2 = row1 + 2*next_row;
554 int cols_2 = cols / 2;
558 mod = (next_row * 3) + mod;
568 cr_r = 0*768+256 + colortab[ *cr + 0*256 ];
569 crb_g = 1*768+256 + colortab[ *cr + 1*256 ]
570 + colortab[ *cb + 2*256 ];
571 cb_b = 2*768+256 + colortab[ *cb + 3*256 ];
575 row1[0] = row1[1] = row1[next_row] = row1[next_row+1] =
576 (rgb_2_pix[ L + cr_r ] |
577 rgb_2_pix[ L + crb_g ] |
578 rgb_2_pix[ L + cb_b ]);
582 row1[0] = row1[1] = row1[next_row] = row1[next_row+1] =
583 (rgb_2_pix[ L + cr_r ] |
584 rgb_2_pix[ L + crb_g ] |
585 rgb_2_pix[ L + cb_b ]);
589 /* Now, do second row. */
592 row2[0] = row2[1] = row2[next_row] = row2[next_row+1] =
593 (rgb_2_pix[ L + cr_r ] |
594 rgb_2_pix[ L + crb_g ] |
595 rgb_2_pix[ L + cb_b ]);
599 row2[0] = row2[1] = row2[next_row] = row2[next_row+1] =
600 (rgb_2_pix[ L + cr_r ] |
601 rgb_2_pix[ L + crb_g ] |
602 rgb_2_pix[ L + cb_b ]);
607 * These values are at the start of the next line, (due
608 * to the ++'s above),but they need to be at the start
609 * of the line after that.
618 static void Color16DitherYUY2Mod1X( int *colortab, Uint32 *rgb_2_pix,
619 unsigned char *lum, unsigned char *cr,
620 unsigned char *cb, unsigned char *out,
621 int rows, int cols, int mod )
628 int cols_2 = cols / 2;
630 row = (unsigned short*) out;
640 cr_r = 0*768+256 + colortab[ *cr + 0*256 ];
641 crb_g = 1*768+256 + colortab[ *cr + 1*256 ]
642 + colortab[ *cb + 2*256 ];
643 cb_b = 2*768+256 + colortab[ *cb + 3*256 ];
647 *row++ = (rgb_2_pix[ L + cr_r ] |
648 rgb_2_pix[ L + crb_g ] |
649 rgb_2_pix[ L + cb_b ]);
652 *row++ = (rgb_2_pix[ L + cr_r ] |
653 rgb_2_pix[ L + crb_g ] |
654 rgb_2_pix[ L + cb_b ]);
662 static void Color24DitherYUY2Mod1X( int *colortab, Uint32 *rgb_2_pix,
663 unsigned char *lum, unsigned char *cr,
664 unsigned char *cb, unsigned char *out,
665 int rows, int cols, int mod )
673 int cols_2 = cols / 2;
675 row = (unsigned char*) out;
685 cr_r = 0*768+256 + colortab[ *cr + 0*256 ];
686 crb_g = 1*768+256 + colortab[ *cr + 1*256 ]
687 + colortab[ *cb + 2*256 ];
688 cb_b = 2*768+256 + colortab[ *cb + 3*256 ];
692 value = (rgb_2_pix[ L + cr_r ] |
693 rgb_2_pix[ L + crb_g ] |
694 rgb_2_pix[ L + cb_b ]);
695 *row++ = (value ) & 0xFF;
696 *row++ = (value >> 8) & 0xFF;
697 *row++ = (value >> 16) & 0xFF;
700 value = (rgb_2_pix[ L + cr_r ] |
701 rgb_2_pix[ L + crb_g ] |
702 rgb_2_pix[ L + cb_b ]);
703 *row++ = (value ) & 0xFF;
704 *row++ = (value >> 8) & 0xFF;
705 *row++ = (value >> 16) & 0xFF;
712 static void Color32DitherYUY2Mod1X( int *colortab, Uint32 *rgb_2_pix,
713 unsigned char *lum, unsigned char *cr,
714 unsigned char *cb, unsigned char *out,
715 int rows, int cols, int mod )
722 int cols_2 = cols / 2;
724 row = (unsigned int*) out;
733 cr_r = 0*768+256 + colortab[ *cr + 0*256 ];
734 crb_g = 1*768+256 + colortab[ *cr + 1*256 ]
735 + colortab[ *cb + 2*256 ];
736 cb_b = 2*768+256 + colortab[ *cb + 3*256 ];
740 *row++ = (rgb_2_pix[ L + cr_r ] |
741 rgb_2_pix[ L + crb_g ] |
742 rgb_2_pix[ L + cb_b ]);
745 *row++ = (rgb_2_pix[ L + cr_r ] |
746 rgb_2_pix[ L + crb_g ] |
747 rgb_2_pix[ L + cb_b ]);
756 * In this function I make use of a nasty trick. The tables have the lower
757 * 16 bits replicated in the upper 16. This means I can write ints and get
758 * the horisontal doubling for free (almost).
760 static void Color16DitherYUY2Mod2X( int *colortab, Uint32 *rgb_2_pix,
761 unsigned char *lum, unsigned char *cr,
762 unsigned char *cb, unsigned char *out,
763 int rows, int cols, int mod )
765 unsigned int* row = (unsigned int*) out;
766 const int next_row = cols+(mod/2);
771 int cols_2 = cols / 2;
781 cr_r = 0*768+256 + colortab[ *cr + 0*256 ];
782 crb_g = 1*768+256 + colortab[ *cr + 1*256 ]
783 + colortab[ *cb + 2*256 ];
784 cb_b = 2*768+256 + colortab[ *cb + 3*256 ];
788 row[0] = row[next_row] = (rgb_2_pix[ L + cr_r ] |
789 rgb_2_pix[ L + crb_g ] |
790 rgb_2_pix[ L + cb_b ]);
794 row[0] = row[next_row] = (rgb_2_pix[ L + cr_r ] |
795 rgb_2_pix[ L + crb_g ] |
796 rgb_2_pix[ L + cb_b ]);
804 static void Color24DitherYUY2Mod2X( int *colortab, Uint32 *rgb_2_pix,
805 unsigned char *lum, unsigned char *cr,
806 unsigned char *cb, unsigned char *out,
807 int rows, int cols, int mod )
810 unsigned char* row = out;
811 const int next_row = (cols*2 + mod) * 3;
816 int cols_2 = cols / 2;
825 cr_r = 0*768+256 + colortab[ *cr + 0*256 ];
826 crb_g = 1*768+256 + colortab[ *cr + 1*256 ]
827 + colortab[ *cb + 2*256 ];
828 cb_b = 2*768+256 + colortab[ *cb + 3*256 ];
832 value = (rgb_2_pix[ L + cr_r ] |
833 rgb_2_pix[ L + crb_g ] |
834 rgb_2_pix[ L + cb_b ]);
835 row[0+0] = row[3+0] = row[next_row+0] = row[next_row+3+0] =
837 row[0+1] = row[3+1] = row[next_row+1] = row[next_row+3+1] =
839 row[0+2] = row[3+2] = row[next_row+2] = row[next_row+3+2] =
840 (value >> 16) & 0xFF;
844 value = (rgb_2_pix[ L + cr_r ] |
845 rgb_2_pix[ L + crb_g ] |
846 rgb_2_pix[ L + cb_b ]);
847 row[0+0] = row[3+0] = row[next_row+0] = row[next_row+3+0] =
849 row[0+1] = row[3+1] = row[next_row+1] = row[next_row+3+1] =
851 row[0+2] = row[3+2] = row[next_row+2] = row[next_row+3+2] =
852 (value >> 16) & 0xFF;
860 static void Color32DitherYUY2Mod2X( int *colortab, Uint32 *rgb_2_pix,
861 unsigned char *lum, unsigned char *cr,
862 unsigned char *cb, unsigned char *out,
863 int rows, int cols, int mod )
865 unsigned int* row = (unsigned int*) out;
866 const int next_row = cols*2+mod;
871 int cols_2 = cols / 2;
881 cr_r = 0*768+256 + colortab[ *cr + 0*256 ];
882 crb_g = 1*768+256 + colortab[ *cr + 1*256 ]
883 + colortab[ *cb + 2*256 ];
884 cb_b = 2*768+256 + colortab[ *cb + 3*256 ];
888 row[0] = row[1] = row[next_row] = row[next_row+1] =
889 (rgb_2_pix[ L + cr_r ] |
890 rgb_2_pix[ L + crb_g ] |
891 rgb_2_pix[ L + cb_b ]);
895 row[0] = row[1] = row[next_row] = row[next_row+1] =
896 (rgb_2_pix[ L + cr_r ] |
897 rgb_2_pix[ L + crb_g ] |
898 rgb_2_pix[ L + cb_b ]);
909 * How many 1 bits are there in the Uint32.
910 * Low performance, do not call often.
912 static int number_of_bits_set( Uint32 a )
915 if(a & 1) return 1 + number_of_bits_set(a >> 1);
916 return(number_of_bits_set(a >> 1));
920 * How many 0 bits are there at least significant end of Uint32.
921 * Low performance, do not call often.
923 static int free_bits_at_bottom( Uint32 a )
925 /* assume char is 8 bits */
926 if(!a) return sizeof(Uint32) * 8;
927 if(((Sint32)a) & 1l) return 0;
928 return 1 + free_bits_at_bottom ( a >> 1);
932 SDL_Overlay *SDL_CreateYUV_SW(_THIS, int width, int height, Uint32 format, SDL_Surface *display)
934 SDL_Overlay *overlay;
935 struct private_yuvhwdata *swdata;
940 Uint32 *r_2_pix_alloc;
941 Uint32 *g_2_pix_alloc;
942 Uint32 *b_2_pix_alloc;
945 Uint32 Rmask, Gmask, Bmask;
947 /* Only RGB packed pixel conversion supported */
948 if ( (display->format->BytesPerPixel != 2) &&
949 (display->format->BytesPerPixel != 3) &&
950 (display->format->BytesPerPixel != 4) ) {
951 SDL_SetError("Can't use YUV data on non 16/24/32 bit surfaces");
955 /* Verify that we support the format */
957 case SDL_YV12_OVERLAY:
958 case SDL_IYUV_OVERLAY:
959 case SDL_YUY2_OVERLAY:
960 case SDL_UYVY_OVERLAY:
961 case SDL_YVYU_OVERLAY:
964 SDL_SetError("Unsupported YUV format");
968 /* Create the overlay structure */
969 overlay = (SDL_Overlay *)malloc(sizeof *overlay);
970 if ( overlay == NULL ) {
974 memset(overlay, 0, (sizeof *overlay));
976 /* Fill in the basic members */
977 overlay->format = format;
981 /* Set up the YUV surface function structure */
982 overlay->hwfuncs = &sw_yuvfuncs;
984 /* Create the pixel data and lookup tables */
985 swdata = (struct private_yuvhwdata *)malloc(sizeof *swdata);
986 overlay->hwdata = swdata;
987 if ( swdata == NULL ) {
989 SDL_FreeYUVOverlay(overlay);
992 swdata->stretch = NULL;
993 swdata->display = display;
994 swdata->pixels = (Uint8 *) malloc(width*height*2);
995 swdata->colortab = (int *)malloc(4*256*sizeof(int));
996 Cr_r_tab = &swdata->colortab[0*256];
997 Cr_g_tab = &swdata->colortab[1*256];
998 Cb_g_tab = &swdata->colortab[2*256];
999 Cb_b_tab = &swdata->colortab[3*256];
1000 swdata->rgb_2_pix = (Uint32 *)malloc(3*768*sizeof(Uint32));
1001 r_2_pix_alloc = &swdata->rgb_2_pix[0*768];
1002 g_2_pix_alloc = &swdata->rgb_2_pix[1*768];
1003 b_2_pix_alloc = &swdata->rgb_2_pix[2*768];
1004 if ( ! swdata->pixels || ! swdata->colortab || ! swdata->rgb_2_pix ) {
1006 SDL_FreeYUVOverlay(overlay);
1010 /* Generate the tables for the display surface */
1011 for (i=0; i<256; i++) {
1012 /* Gamma correction (luminescence table) and chroma correction
1013 would be done here. See the Berkeley mpeg_play sources.
1016 Cr_r_tab[i] = (int) ( (0.419/0.299) * CR);
1017 Cr_g_tab[i] = (int) (-(0.299/0.419) * CR);
1018 Cb_g_tab[i] = (int) (-(0.114/0.331) * CB);
1019 Cb_b_tab[i] = (int) ( (0.587/0.331) * CB);
1023 * Set up entries 0-255 in rgb-to-pixel value tables.
1025 Rmask = display->format->Rmask;
1026 Gmask = display->format->Gmask;
1027 Bmask = display->format->Bmask;
1028 for ( i=0; i<256; ++i ) {
1029 r_2_pix_alloc[i+256] = i >> (8 - number_of_bits_set(Rmask));
1030 r_2_pix_alloc[i+256] <<= free_bits_at_bottom(Rmask);
1031 g_2_pix_alloc[i+256] = i >> (8 - number_of_bits_set(Gmask));
1032 g_2_pix_alloc[i+256] <<= free_bits_at_bottom(Gmask);
1033 b_2_pix_alloc[i+256] = i >> (8 - number_of_bits_set(Bmask));
1034 b_2_pix_alloc[i+256] <<= free_bits_at_bottom(Bmask);
1038 * If we have 16-bit output depth, then we double the value
1039 * in the top word. This means that we can write out both
1040 * pixels in the pixel doubling mode with one op. It is
1041 * harmless in the normal case as storing a 32-bit value
1042 * through a short pointer will lose the top bits anyway.
1044 if( display->format->BytesPerPixel == 2 ) {
1045 for ( i=0; i<256; ++i ) {
1046 r_2_pix_alloc[i+256] |= (r_2_pix_alloc[i+256]) << 16;
1047 g_2_pix_alloc[i+256] |= (g_2_pix_alloc[i+256]) << 16;
1048 b_2_pix_alloc[i+256] |= (b_2_pix_alloc[i+256]) << 16;
1053 * Spread out the values we have to the rest of the array so that
1054 * we do not need to check for overflow.
1056 for ( i=0; i<256; ++i ) {
1057 r_2_pix_alloc[i] = r_2_pix_alloc[256];
1058 r_2_pix_alloc[i+512] = r_2_pix_alloc[511];
1059 g_2_pix_alloc[i] = g_2_pix_alloc[256];
1060 g_2_pix_alloc[i+512] = g_2_pix_alloc[511];
1061 b_2_pix_alloc[i] = b_2_pix_alloc[256];
1062 b_2_pix_alloc[i+512] = b_2_pix_alloc[511];
1065 /* You have chosen wisely... */
1067 case SDL_YV12_OVERLAY:
1068 case SDL_IYUV_OVERLAY:
1069 if ( display->format->BytesPerPixel == 2 ) {
1070 #if defined(i386) && defined(__GNUC__) && defined(USE_ASMBLIT)
1071 /* inline assembly functions */
1072 if ( SDL_HasMMX() && (Rmask == 0xF800) &&
1073 (Gmask == 0x07E0) &&
1074 (Bmask == 0x001F) &&
1075 (width & 15) == 0) {
1076 /*printf("Using MMX 16-bit 565 dither\n");*/
1077 swdata->Display1X = Color565DitherYV12MMX1X;
1079 /*printf("Using C 16-bit dither\n");*/
1080 swdata->Display1X = Color16DitherYV12Mod1X;
1083 swdata->Display1X = Color16DitherYV12Mod1X;
1085 swdata->Display2X = Color16DitherYV12Mod2X;
1087 if ( display->format->BytesPerPixel == 3 ) {
1088 swdata->Display1X = Color24DitherYV12Mod1X;
1089 swdata->Display2X = Color24DitherYV12Mod2X;
1091 if ( display->format->BytesPerPixel == 4 ) {
1092 #if defined(i386) && defined(__GNUC__) && defined(USE_ASMBLIT)
1093 /* inline assembly functions */
1094 if ( SDL_HasMMX() && (Rmask == 0x00FF0000) &&
1095 (Gmask == 0x0000FF00) &&
1096 (Bmask == 0x000000FF) &&
1097 (width & 15) == 0) {
1098 /*printf("Using MMX 32-bit dither\n");*/
1099 swdata->Display1X = ColorRGBDitherYV12MMX1X;
1101 /*printf("Using C 32-bit dither\n");*/
1102 swdata->Display1X = Color32DitherYV12Mod1X;
1105 swdata->Display1X = Color32DitherYV12Mod1X;
1107 swdata->Display2X = Color32DitherYV12Mod2X;
1110 case SDL_YUY2_OVERLAY:
1111 case SDL_UYVY_OVERLAY:
1112 case SDL_YVYU_OVERLAY:
1113 if ( display->format->BytesPerPixel == 2 ) {
1114 swdata->Display1X = Color16DitherYUY2Mod1X;
1115 swdata->Display2X = Color16DitherYUY2Mod2X;
1117 if ( display->format->BytesPerPixel == 3 ) {
1118 swdata->Display1X = Color24DitherYUY2Mod1X;
1119 swdata->Display2X = Color24DitherYUY2Mod2X;
1121 if ( display->format->BytesPerPixel == 4 ) {
1122 swdata->Display1X = Color32DitherYUY2Mod1X;
1123 swdata->Display2X = Color32DitherYUY2Mod2X;
1127 /* We should never get here (caught above) */
1131 /* Find the pitch and offset values for the overlay */
1132 overlay->pitches = swdata->pitches;
1133 overlay->pixels = swdata->planes;
1135 case SDL_YV12_OVERLAY:
1136 case SDL_IYUV_OVERLAY:
1137 overlay->pitches[0] = overlay->w;
1138 overlay->pitches[1] = overlay->pitches[0] / 2;
1139 overlay->pitches[2] = overlay->pitches[0] / 2;
1140 overlay->pixels[0] = swdata->pixels;
1141 overlay->pixels[1] = overlay->pixels[0] +
1142 overlay->pitches[0] * overlay->h;
1143 overlay->pixels[2] = overlay->pixels[1] +
1144 overlay->pitches[1] * overlay->h / 2;
1145 overlay->planes = 3;
1147 case SDL_YUY2_OVERLAY:
1148 case SDL_UYVY_OVERLAY:
1149 case SDL_YVYU_OVERLAY:
1150 overlay->pitches[0] = overlay->w*2;
1151 overlay->pixels[0] = swdata->pixels;
1152 overlay->planes = 1;
1155 /* We should never get here (caught above) */
1159 /* We're all done.. */
1163 int SDL_LockYUV_SW(_THIS, SDL_Overlay *overlay)
1168 void SDL_UnlockYUV_SW(_THIS, SDL_Overlay *overlay)
1173 int SDL_DisplayYUV_SW(_THIS, SDL_Overlay *overlay, SDL_Rect *dstrect)
1175 struct private_yuvhwdata *swdata;
1176 SDL_Surface *stretch;
1177 SDL_Surface *display;
1179 Uint8 *lum, *Cr, *Cb;
1183 swdata = overlay->hwdata;
1186 if ( (overlay->w != dstrect->w) || (overlay->h != dstrect->h) ) {
1187 if ( (dstrect->w == 2*overlay->w) &&
1188 (dstrect->h == 2*overlay->h) ) {
1191 if ( ! swdata->stretch ) {
1192 display = swdata->display;
1193 swdata->stretch = SDL_CreateRGBSurface(
1195 overlay->w, overlay->h,
1196 display->format->BitsPerPixel,
1197 display->format->Rmask,
1198 display->format->Gmask,
1199 display->format->Bmask, 0);
1200 if ( ! swdata->stretch ) {
1204 stretch = swdata->stretch;
1211 display = swdata->display;
1213 switch (overlay->format) {
1214 case SDL_YV12_OVERLAY:
1215 lum = overlay->pixels[0];
1216 Cr = overlay->pixels[1];
1217 Cb = overlay->pixels[2];
1219 case SDL_IYUV_OVERLAY:
1220 lum = overlay->pixels[0];
1221 Cr = overlay->pixels[2];
1222 Cb = overlay->pixels[1];
1224 case SDL_YUY2_OVERLAY:
1225 lum = overlay->pixels[0];
1229 case SDL_UYVY_OVERLAY:
1230 lum = overlay->pixels[0]+1;
1234 case SDL_YVYU_OVERLAY:
1235 lum = overlay->pixels[0];
1240 SDL_SetError("Unsupported YUV format in blit");
1243 if ( SDL_MUSTLOCK(display) ) {
1244 if ( SDL_LockSurface(display) < 0 ) {
1249 dst = (Uint8 *)stretch->pixels;
1251 dst = (Uint8 *)display->pixels
1252 + dstrect->x * display->format->BytesPerPixel
1253 + dstrect->y * display->pitch;
1255 mod = (display->pitch / display->format->BytesPerPixel);
1258 mod -= (overlay->w * 2);
1259 swdata->Display2X(swdata->colortab, swdata->rgb_2_pix,
1260 lum, Cr, Cb, dst, overlay->h, overlay->w,mod);
1263 swdata->Display1X(swdata->colortab, swdata->rgb_2_pix,
1264 lum, Cr, Cb, dst, overlay->h, overlay->w,mod);
1266 if ( SDL_MUSTLOCK(display) ) {
1267 SDL_UnlockSurface(display);
1270 display = swdata->display;
1271 SDL_SoftStretch(stretch, NULL, display, dstrect);
1273 SDL_UpdateRects(display, 1, dstrect);
1278 void SDL_FreeYUV_SW(_THIS, SDL_Overlay *overlay)
1280 struct private_yuvhwdata *swdata;
1282 swdata = overlay->hwdata;
1284 if ( swdata->stretch ) {
1285 SDL_FreeSurface(swdata->stretch);
1287 if ( swdata->pixels ) {
1288 free(swdata->pixels);
1290 if ( swdata->colortab ) {
1291 free(swdata->colortab);
1293 if ( swdata->rgb_2_pix ) {
1294 free(swdata->rgb_2_pix);