src/render/SDL_yuv_sw.c
author Sam Lantinga <slouken@libsdl.org>
Sun, 12 Nov 2017 10:59:05 -0800
changeset 11701 d131f3193794
parent 11574 696d0036f442
child 11702 cf166abbde4a
permissions -rw-r--r--
Fixed Android build error on older SDK
slouken@0
     1
/*
slouken@5535
     2
  Simple DirectMedia Layer
slouken@10737
     3
  Copyright (C) 1997-2017 Sam Lantinga <slouken@libsdl.org>
slouken@0
     4
slouken@5535
     5
  This software is provided 'as-is', without any express or implied
slouken@5535
     6
  warranty.  In no event will the authors be held liable for any damages
slouken@5535
     7
  arising from the use of this software.
slouken@0
     8
slouken@5535
     9
  Permission is granted to anyone to use this software for any purpose,
slouken@5535
    10
  including commercial applications, and to alter it and redistribute it
slouken@5535
    11
  freely, subject to the following restrictions:
slouken@0
    12
slouken@5535
    13
  1. The origin of this software must not be misrepresented; you must not
slouken@5535
    14
     claim that you wrote the original software. If you use this software
slouken@5535
    15
     in a product, an acknowledgment in the product documentation would be
slouken@5535
    16
     appreciated but is not required.
slouken@5535
    17
  2. Altered source versions must be plainly marked as such, and must not be
slouken@5535
    18
     misrepresented as being the original software.
slouken@5535
    19
  3. This notice may not be removed or altered from any source distribution.
slouken@0
    20
*/
icculus@8093
    21
#include "../SDL_internal.h"
slouken@0
    22
slouken@1895
    23
/* This is the software implementation of the YUV texture support */
slouken@0
    24
slouken@0
    25
/* This code was derived from code carrying the following copyright notices:
slouken@0
    26
slouken@0
    27
 * Copyright (c) 1995 The Regents of the University of California.
slouken@0
    28
 * All rights reserved.
slouken@7191
    29
 *
slouken@0
    30
 * Permission to use, copy, modify, and distribute this software and its
slouken@0
    31
 * documentation for any purpose, without fee, and without written agreement is
slouken@0
    32
 * hereby granted, provided that the above copyright notice and the following
slouken@0
    33
 * two paragraphs appear in all copies of this software.
slouken@7191
    34
 *
slouken@0
    35
 * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR
slouken@0
    36
 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
slouken@0
    37
 * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF
slouken@0
    38
 * CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
slouken@7191
    39
 *
slouken@0
    40
 * THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES,
slouken@0
    41
 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
slouken@0
    42
 * AND FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
slouken@0
    43
 * ON AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATION TO
slouken@0
    44
 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
slouken@0
    45
slouken@0
    46
 * Copyright (c) 1995 Erik Corry
slouken@0
    47
 * All rights reserved.
slouken@7191
    48
 *
slouken@0
    49
 * Permission to use, copy, modify, and distribute this software and its
slouken@0
    50
 * documentation for any purpose, without fee, and without written agreement is
slouken@0
    51
 * hereby granted, provided that the above copyright notice and the following
slouken@0
    52
 * two paragraphs appear in all copies of this software.
slouken@7191
    53
 *
slouken@0
    54
 * IN NO EVENT SHALL ERIK CORRY BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
slouken@0
    55
 * SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OF
slouken@0
    56
 * THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF ERIK CORRY HAS BEEN ADVISED
slouken@0
    57
 * OF THE POSSIBILITY OF SUCH DAMAGE.
slouken@7191
    58
 *
slouken@0
    59
 * ERIK CORRY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
slouken@0
    60
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
slouken@0
    61
 * PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
slouken@0
    62
 * BASIS, AND ERIK CORRY HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT,
slouken@0
    63
 * UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
slouken@0
    64
slouken@0
    65
 * Portions of this software Copyright (c) 1995 Brown University.
slouken@0
    66
 * All rights reserved.
slouken@7191
    67
 *
slouken@0
    68
 * Permission to use, copy, modify, and distribute this software and its
slouken@0
    69
 * documentation for any purpose, without fee, and without written agreement
slouken@0
    70
 * is hereby granted, provided that the above copyright notice and the
slouken@0
    71
 * following two paragraphs appear in all copies of this software.
slouken@7191
    72
 *
slouken@0
    73
 * IN NO EVENT SHALL BROWN UNIVERSITY BE LIABLE TO ANY PARTY FOR
slouken@0
    74
 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
slouken@0
    75
 * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF BROWN
slouken@0
    76
 * UNIVERSITY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
slouken@7191
    77
 *
slouken@0
    78
 * BROWN UNIVERSITY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
slouken@0
    79
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
slouken@0
    80
 * PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
slouken@0
    81
 * BASIS, AND BROWN UNIVERSITY HAS NO OBLIGATION TO PROVIDE MAINTENANCE,
slouken@0
    82
 * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
slouken@0
    83
 */
slouken@0
    84
icculus@7488
    85
#include "SDL_assert.h"
slouken@0
    86
#include "SDL_video.h"
slouken@739
    87
#include "SDL_cpuinfo.h"
slouken@0
    88
#include "SDL_yuv_sw_c.h"
slouken@11195
    89
#include "SDL_yuv_mmx_c.h"
slouken@0
    90
slouken@1895
    91
slouken@0
    92
/* The colorspace conversion functions */
slouken@0
    93
icculus@11156
    94
#ifdef USE_MMX_ASSEMBLY
slouken@1895
    95
extern void Color565DitherYV12MMX1X(int *colortab, Uint32 * rgb_2_pix,
slouken@0
    96
                                    unsigned char *lum, unsigned char *cr,
slouken@0
    97
                                    unsigned char *cb, unsigned char *out,
slouken@1895
    98
                                    int rows, int cols, int mod);
slouken@1895
    99
extern void ColorRGBDitherYV12MMX1X(int *colortab, Uint32 * rgb_2_pix,
slouken@1895
   100
                                    unsigned char *lum, unsigned char *cr,
slouken@1895
   101
                                    unsigned char *cb, unsigned char *out,
slouken@1895
   102
                                    int rows, int cols, int mod);
slouken@1895
   103
#endif
slouken@1895
   104
slouken@1895
   105
static void
slouken@1895
   106
Color16DitherYV12Mod1X(int *colortab, Uint32 * rgb_2_pix,
slouken@1895
   107
                       unsigned char *lum, unsigned char *cr,
slouken@1895
   108
                       unsigned char *cb, unsigned char *out,
slouken@1895
   109
                       int rows, int cols, int mod)
slouken@0
   110
{
slouken@1895
   111
    unsigned short *row1;
slouken@1895
   112
    unsigned short *row2;
slouken@1895
   113
    unsigned char *lum2;
slouken@0
   114
    int x, y;
slouken@0
   115
    int cr_r;
slouken@0
   116
    int crb_g;
slouken@0
   117
    int cb_b;
slouken@0
   118
    int cols_2 = cols / 2;
slouken@0
   119
slouken@1895
   120
    row1 = (unsigned short *) out;
slouken@0
   121
    row2 = row1 + cols + mod;
slouken@0
   122
    lum2 = lum + cols;
slouken@0
   123
slouken@0
   124
    mod += cols + mod;
slouken@0
   125
slouken@0
   126
    y = rows / 2;
slouken@1895
   127
    while (y--) {
slouken@0
   128
        x = cols_2;
slouken@1895
   129
        while (x--) {
slouken@0
   130
            register int L;
slouken@0
   131
slouken@1895
   132
            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
slouken@1895
   133
            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
slouken@1895
   134
                + colortab[*cb + 2 * 256];
slouken@1895
   135
            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
slouken@1895
   136
            ++cr;
slouken@1895
   137
            ++cb;
slouken@0
   138
slouken@0
   139
            L = *lum++;
slouken@1895
   140
            *row1++ = (unsigned short) (rgb_2_pix[L + cr_r] |
slouken@1895
   141
                                        rgb_2_pix[L + crb_g] |
slouken@1895
   142
                                        rgb_2_pix[L + cb_b]);
slouken@0
   143
slouken@0
   144
            L = *lum++;
slouken@1895
   145
            *row1++ = (unsigned short) (rgb_2_pix[L + cr_r] |
slouken@1895
   146
                                        rgb_2_pix[L + crb_g] |
slouken@1895
   147
                                        rgb_2_pix[L + cb_b]);
slouken@0
   148
slouken@0
   149
slouken@0
   150
            /* Now, do second row.  */
slouken@0
   151
slouken@0
   152
            L = *lum2++;
slouken@1895
   153
            *row2++ = (unsigned short) (rgb_2_pix[L + cr_r] |
slouken@1895
   154
                                        rgb_2_pix[L + crb_g] |
slouken@1895
   155
                                        rgb_2_pix[L + cb_b]);
slouken@0
   156
slouken@0
   157
            L = *lum2++;
slouken@1895
   158
            *row2++ = (unsigned short) (rgb_2_pix[L + cr_r] |
slouken@1895
   159
                                        rgb_2_pix[L + crb_g] |
slouken@1895
   160
                                        rgb_2_pix[L + cb_b]);
slouken@0
   161
        }
slouken@0
   162
slouken@0
   163
        /*
slouken@0
   164
         * These values are at the start of the next line, (due
slouken@0
   165
         * to the ++'s above),but they need to be at the start
slouken@0
   166
         * of the line after that.
slouken@0
   167
         */
slouken@1895
   168
        lum += cols;
slouken@0
   169
        lum2 += cols;
slouken@0
   170
        row1 += mod;
slouken@0
   171
        row2 += mod;
slouken@0
   172
    }
slouken@0
   173
}
slouken@0
   174
slouken@1895
   175
static void
slouken@1895
   176
Color24DitherYV12Mod1X(int *colortab, Uint32 * rgb_2_pix,
slouken@1895
   177
                       unsigned char *lum, unsigned char *cr,
slouken@1895
   178
                       unsigned char *cb, unsigned char *out,
slouken@1895
   179
                       int rows, int cols, int mod)
slouken@0
   180
{
slouken@0
   181
    unsigned int value;
slouken@1895
   182
    unsigned char *row1;
slouken@1895
   183
    unsigned char *row2;
slouken@1895
   184
    unsigned char *lum2;
slouken@0
   185
    int x, y;
slouken@0
   186
    int cr_r;
slouken@0
   187
    int crb_g;
slouken@0
   188
    int cb_b;
slouken@0
   189
    int cols_2 = cols / 2;
slouken@0
   190
slouken@0
   191
    row1 = out;
slouken@1895
   192
    row2 = row1 + cols * 3 + mod * 3;
slouken@0
   193
    lum2 = lum + cols;
slouken@0
   194
slouken@0
   195
    mod += cols + mod;
slouken@0
   196
    mod *= 3;
slouken@0
   197
slouken@0
   198
    y = rows / 2;
slouken@1895
   199
    while (y--) {
slouken@0
   200
        x = cols_2;
slouken@1895
   201
        while (x--) {
slouken@0
   202
            register int L;
slouken@0
   203
slouken@1895
   204
            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
slouken@1895
   205
            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
slouken@1895
   206
                + colortab[*cb + 2 * 256];
slouken@1895
   207
            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
slouken@1895
   208
            ++cr;
slouken@1895
   209
            ++cb;
slouken@0
   210
slouken@0
   211
            L = *lum++;
slouken@1895
   212
            value = (rgb_2_pix[L + cr_r] |
slouken@1895
   213
                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
slouken@1895
   214
            *row1++ = (value) & 0xFF;
slouken@1895
   215
            *row1++ = (value >> 8) & 0xFF;
slouken@0
   216
            *row1++ = (value >> 16) & 0xFF;
slouken@0
   217
slouken@0
   218
            L = *lum++;
slouken@1895
   219
            value = (rgb_2_pix[L + cr_r] |
slouken@1895
   220
                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
slouken@1895
   221
            *row1++ = (value) & 0xFF;
slouken@1895
   222
            *row1++ = (value >> 8) & 0xFF;
slouken@0
   223
            *row1++ = (value >> 16) & 0xFF;
slouken@0
   224
slouken@0
   225
slouken@0
   226
            /* Now, do second row.  */
slouken@0
   227
slouken@0
   228
            L = *lum2++;
slouken@1895
   229
            value = (rgb_2_pix[L + cr_r] |
slouken@1895
   230
                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
slouken@1895
   231
            *row2++ = (value) & 0xFF;
slouken@1895
   232
            *row2++ = (value >> 8) & 0xFF;
slouken@0
   233
            *row2++ = (value >> 16) & 0xFF;
slouken@0
   234
slouken@0
   235
            L = *lum2++;
slouken@1895
   236
            value = (rgb_2_pix[L + cr_r] |
slouken@1895
   237
                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
slouken@1895
   238
            *row2++ = (value) & 0xFF;
slouken@1895
   239
            *row2++ = (value >> 8) & 0xFF;
slouken@0
   240
            *row2++ = (value >> 16) & 0xFF;
slouken@0
   241
        }
slouken@0
   242
slouken@0
   243
        /*
slouken@0
   244
         * These values are at the start of the next line, (due
slouken@0
   245
         * to the ++'s above),but they need to be at the start
slouken@0
   246
         * of the line after that.
slouken@0
   247
         */
slouken@1895
   248
        lum += cols;
slouken@0
   249
        lum2 += cols;
slouken@0
   250
        row1 += mod;
slouken@0
   251
        row2 += mod;
slouken@0
   252
    }
slouken@0
   253
}
slouken@0
   254
slouken@1895
   255
static void
slouken@1895
   256
Color32DitherYV12Mod1X(int *colortab, Uint32 * rgb_2_pix,
slouken@1895
   257
                       unsigned char *lum, unsigned char *cr,
slouken@1895
   258
                       unsigned char *cb, unsigned char *out,
slouken@1895
   259
                       int rows, int cols, int mod)
slouken@0
   260
{
slouken@1895
   261
    unsigned int *row1;
slouken@1895
   262
    unsigned int *row2;
slouken@1895
   263
    unsigned char *lum2;
slouken@0
   264
    int x, y;
slouken@0
   265
    int cr_r;
slouken@0
   266
    int crb_g;
slouken@0
   267
    int cb_b;
slouken@11574
   268
    int cols_2 = (cols + 1) / 2;
slouken@11574
   269
    /* not even dimensions */
slouken@11574
   270
    int skip_last_col = 0;
slouken@11574
   271
    int skip_last_row = 0;
slouken@11574
   272
slouken@11574
   273
    if ( (cols & 0x1) ) {
slouken@11574
   274
        skip_last_col = 1;
slouken@11574
   275
    }
slouken@11574
   276
slouken@11574
   277
    if ( (rows & 0x1) ) {
slouken@11574
   278
        skip_last_row = 1;
slouken@11574
   279
    }
slouken@0
   280
slouken@1895
   281
    row1 = (unsigned int *) out;
slouken@0
   282
    row2 = row1 + cols + mod;
slouken@0
   283
    lum2 = lum + cols;
slouken@0
   284
slouken@0
   285
    mod += cols + mod;
slouken@0
   286
slouken@11574
   287
    y = (rows + 1) / 2;
slouken@1895
   288
    while (y--) {
slouken@0
   289
        x = cols_2;
slouken@1895
   290
        while (x--) {
slouken@0
   291
            register int L;
slouken@0
   292
slouken@1895
   293
            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
slouken@1895
   294
            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
slouken@1895
   295
                + colortab[*cb + 2 * 256];
slouken@1895
   296
            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
slouken@1895
   297
            ++cr;
slouken@1895
   298
            ++cb;
slouken@0
   299
slouken@0
   300
            L = *lum++;
slouken@1895
   301
            *row1++ = (rgb_2_pix[L + cr_r] |
slouken@1895
   302
                       rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
slouken@0
   303
slouken@11574
   304
            if (!(x == 0 && skip_last_col)) {
slouken@0
   305
            L = *lum++;
slouken@1895
   306
            *row1++ = (rgb_2_pix[L + cr_r] |
slouken@1895
   307
                       rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
slouken@11574
   308
            } /* skip col */
slouken@0
   309
slouken@0
   310
slouken@11574
   311
            if (!(y == 0 && skip_last_row)) {
slouken@11574
   312
slouken@0
   313
            /* Now, do second row.  */
slouken@0
   314
slouken@0
   315
            L = *lum2++;
slouken@1895
   316
            *row2++ = (rgb_2_pix[L + cr_r] |
slouken@1895
   317
                       rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
slouken@0
   318
slouken@11574
   319
            if (!(x == 1 && skip_last_col)) {
slouken@0
   320
            L = *lum2++;
slouken@1895
   321
            *row2++ = (rgb_2_pix[L + cr_r] |
slouken@1895
   322
                       rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
slouken@11574
   323
            } /* skip col */
slouken@11574
   324
            } /* skip row */
slouken@0
   325
        }
slouken@0
   326
slouken@0
   327
        /*
slouken@0
   328
         * These values are at the start of the next line, (due
slouken@0
   329
         * to the ++'s above),but they need to be at the start
slouken@0
   330
         * of the line after that.
slouken@0
   331
         */
slouken@1895
   332
        lum += cols;
slouken@0
   333
        lum2 += cols;
slouken@0
   334
        row1 += mod;
slouken@0
   335
        row2 += mod;
slouken@0
   336
    }
slouken@0
   337
}
slouken@0
   338
slouken@0
   339
/*
slouken@0
   340
 * In this function I make use of a nasty trick. The tables have the lower
slouken@0
   341
 * 16 bits replicated in the upper 16. This means I can write ints and get
slouken@0
   342
 * the horisontal doubling for free (almost).
slouken@0
   343
 */
slouken@1895
   344
static void
slouken@1895
   345
Color16DitherYV12Mod2X(int *colortab, Uint32 * rgb_2_pix,
slouken@1895
   346
                       unsigned char *lum, unsigned char *cr,
slouken@1895
   347
                       unsigned char *cb, unsigned char *out,
slouken@1895
   348
                       int rows, int cols, int mod)
slouken@0
   349
{
slouken@1895
   350
    unsigned int *row1 = (unsigned int *) out;
slouken@1895
   351
    const int next_row = cols + (mod / 2);
slouken@1895
   352
    unsigned int *row2 = row1 + 2 * next_row;
slouken@1895
   353
    unsigned char *lum2;
slouken@0
   354
    int x, y;
slouken@0
   355
    int cr_r;
slouken@0
   356
    int crb_g;
slouken@0
   357
    int cb_b;
slouken@0
   358
    int cols_2 = cols / 2;
slouken@0
   359
slouken@0
   360
    lum2 = lum + cols;
slouken@0
   361
slouken@1895
   362
    mod = (next_row * 3) + (mod / 2);
slouken@0
   363
slouken@0
   364
    y = rows / 2;
slouken@1895
   365
    while (y--) {
slouken@0
   366
        x = cols_2;
slouken@1895
   367
        while (x--) {
slouken@0
   368
            register int L;
slouken@0
   369
slouken@1895
   370
            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
slouken@1895
   371
            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
slouken@1895
   372
                + colortab[*cb + 2 * 256];
slouken@1895
   373
            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
slouken@1895
   374
            ++cr;
slouken@1895
   375
            ++cb;
slouken@0
   376
slouken@0
   377
            L = *lum++;
slouken@1895
   378
            row1[0] = row1[next_row] = (rgb_2_pix[L + cr_r] |
slouken@1895
   379
                                        rgb_2_pix[L + crb_g] |
slouken@1895
   380
                                        rgb_2_pix[L + cb_b]);
slouken@0
   381
            row1++;
slouken@0
   382
slouken@0
   383
            L = *lum++;
slouken@1895
   384
            row1[0] = row1[next_row] = (rgb_2_pix[L + cr_r] |
slouken@1895
   385
                                        rgb_2_pix[L + crb_g] |
slouken@1895
   386
                                        rgb_2_pix[L + cb_b]);
slouken@0
   387
            row1++;
slouken@0
   388
slouken@0
   389
slouken@0
   390
            /* Now, do second row. */
slouken@0
   391
slouken@0
   392
            L = *lum2++;
slouken@1895
   393
            row2[0] = row2[next_row] = (rgb_2_pix[L + cr_r] |
slouken@1895
   394
                                        rgb_2_pix[L + crb_g] |
slouken@1895
   395
                                        rgb_2_pix[L + cb_b]);
slouken@0
   396
            row2++;
slouken@0
   397
slouken@0
   398
            L = *lum2++;
slouken@1895
   399
            row2[0] = row2[next_row] = (rgb_2_pix[L + cr_r] |
slouken@1895
   400
                                        rgb_2_pix[L + crb_g] |
slouken@1895
   401
                                        rgb_2_pix[L + cb_b]);
slouken@0
   402
            row2++;
slouken@0
   403
        }
slouken@0
   404
slouken@0
   405
        /*
slouken@0
   406
         * These values are at the start of the next line, (due
slouken@0
   407
         * to the ++'s above),but they need to be at the start
slouken@0
   408
         * of the line after that.
slouken@0
   409
         */
slouken@1895
   410
        lum += cols;
slouken@0
   411
        lum2 += cols;
slouken@0
   412
        row1 += mod;
slouken@0
   413
        row2 += mod;
slouken@0
   414
    }
slouken@0
   415
}
slouken@0
   416
slouken@1895
   417
static void
slouken@1895
   418
Color24DitherYV12Mod2X(int *colortab, Uint32 * rgb_2_pix,
slouken@1895
   419
                       unsigned char *lum, unsigned char *cr,
slouken@1895
   420
                       unsigned char *cb, unsigned char *out,
slouken@1895
   421
                       int rows, int cols, int mod)
slouken@0
   422
{
slouken@0
   423
    unsigned int value;
slouken@1895
   424
    unsigned char *row1 = out;
slouken@1895
   425
    const int next_row = (cols * 2 + mod) * 3;
slouken@1895
   426
    unsigned char *row2 = row1 + 2 * next_row;
slouken@1895
   427
    unsigned char *lum2;
slouken@0
   428
    int x, y;
slouken@0
   429
    int cr_r;
slouken@0
   430
    int crb_g;
slouken@0
   431
    int cb_b;
slouken@0
   432
    int cols_2 = cols / 2;
slouken@0
   433
slouken@0
   434
    lum2 = lum + cols;
slouken@0
   435
slouken@1895
   436
    mod = next_row * 3 + mod * 3;
slouken@0
   437
slouken@0
   438
    y = rows / 2;
slouken@1895
   439
    while (y--) {
slouken@0
   440
        x = cols_2;
slouken@1895
   441
        while (x--) {
slouken@0
   442
            register int L;
slouken@0
   443
slouken@1895
   444
            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
slouken@1895
   445
            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
slouken@1895
   446
                + colortab[*cb + 2 * 256];
slouken@1895
   447
            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
slouken@1895
   448
            ++cr;
slouken@1895
   449
            ++cb;
slouken@0
   450
slouken@0
   451
            L = *lum++;
slouken@1895
   452
            value = (rgb_2_pix[L + cr_r] |
slouken@1895
   453
                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
slouken@1895
   454
            row1[0 + 0] = row1[3 + 0] = row1[next_row + 0] =
slouken@1895
   455
                row1[next_row + 3 + 0] = (value) & 0xFF;
slouken@1895
   456
            row1[0 + 1] = row1[3 + 1] = row1[next_row + 1] =
slouken@1895
   457
                row1[next_row + 3 + 1] = (value >> 8) & 0xFF;
slouken@1895
   458
            row1[0 + 2] = row1[3 + 2] = row1[next_row + 2] =
slouken@1895
   459
                row1[next_row + 3 + 2] = (value >> 16) & 0xFF;
slouken@1895
   460
            row1 += 2 * 3;
slouken@0
   461
slouken@0
   462
            L = *lum++;
slouken@1895
   463
            value = (rgb_2_pix[L + cr_r] |
slouken@1895
   464
                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
slouken@1895
   465
            row1[0 + 0] = row1[3 + 0] = row1[next_row + 0] =
slouken@1895
   466
                row1[next_row + 3 + 0] = (value) & 0xFF;
slouken@1895
   467
            row1[0 + 1] = row1[3 + 1] = row1[next_row + 1] =
slouken@1895
   468
                row1[next_row + 3 + 1] = (value >> 8) & 0xFF;
slouken@1895
   469
            row1[0 + 2] = row1[3 + 2] = row1[next_row + 2] =
slouken@1895
   470
                row1[next_row + 3 + 2] = (value >> 16) & 0xFF;
slouken@1895
   471
            row1 += 2 * 3;
slouken@0
   472
slouken@0
   473
slouken@0
   474
            /* Now, do second row. */
slouken@0
   475
slouken@0
   476
            L = *lum2++;
slouken@1895
   477
            value = (rgb_2_pix[L + cr_r] |
slouken@1895
   478
                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
slouken@1895
   479
            row2[0 + 0] = row2[3 + 0] = row2[next_row + 0] =
slouken@1895
   480
                row2[next_row + 3 + 0] = (value) & 0xFF;
slouken@1895
   481
            row2[0 + 1] = row2[3 + 1] = row2[next_row + 1] =
slouken@1895
   482
                row2[next_row + 3 + 1] = (value >> 8) & 0xFF;
slouken@1895
   483
            row2[0 + 2] = row2[3 + 2] = row2[next_row + 2] =
slouken@1895
   484
                row2[next_row + 3 + 2] = (value >> 16) & 0xFF;
slouken@1895
   485
            row2 += 2 * 3;
slouken@0
   486
slouken@0
   487
            L = *lum2++;
slouken@1895
   488
            value = (rgb_2_pix[L + cr_r] |
slouken@1895
   489
                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
slouken@1895
   490
            row2[0 + 0] = row2[3 + 0] = row2[next_row + 0] =
slouken@1895
   491
                row2[next_row + 3 + 0] = (value) & 0xFF;
slouken@1895
   492
            row2[0 + 1] = row2[3 + 1] = row2[next_row + 1] =
slouken@1895
   493
                row2[next_row + 3 + 1] = (value >> 8) & 0xFF;
slouken@1895
   494
            row2[0 + 2] = row2[3 + 2] = row2[next_row + 2] =
slouken@1895
   495
                row2[next_row + 3 + 2] = (value >> 16) & 0xFF;
slouken@1895
   496
            row2 += 2 * 3;
slouken@0
   497
        }
slouken@0
   498
slouken@0
   499
        /*
slouken@0
   500
         * These values are at the start of the next line, (due
slouken@0
   501
         * to the ++'s above),but they need to be at the start
slouken@0
   502
         * of the line after that.
slouken@0
   503
         */
slouken@1895
   504
        lum += cols;
slouken@0
   505
        lum2 += cols;
slouken@0
   506
        row1 += mod;
slouken@0
   507
        row2 += mod;
slouken@0
   508
    }
slouken@0
   509
}
slouken@0
   510
slouken@1895
   511
static void
slouken@1895
   512
Color32DitherYV12Mod2X(int *colortab, Uint32 * rgb_2_pix,
slouken@1895
   513
                       unsigned char *lum, unsigned char *cr,
slouken@1895
   514
                       unsigned char *cb, unsigned char *out,
slouken@1895
   515
                       int rows, int cols, int mod)
slouken@0
   516
{
slouken@1895
   517
    unsigned int *row1 = (unsigned int *) out;
slouken@1895
   518
    const int next_row = cols * 2 + mod;
slouken@1895
   519
    unsigned int *row2 = row1 + 2 * next_row;
slouken@1895
   520
    unsigned char *lum2;
slouken@0
   521
    int x, y;
slouken@0
   522
    int cr_r;
slouken@0
   523
    int crb_g;
slouken@0
   524
    int cb_b;
slouken@0
   525
    int cols_2 = cols / 2;
slouken@0
   526
slouken@0
   527
    lum2 = lum + cols;
slouken@0
   528
slouken@0
   529
    mod = (next_row * 3) + mod;
slouken@0
   530
slouken@0
   531
    y = rows / 2;
slouken@1895
   532
    while (y--) {
slouken@0
   533
        x = cols_2;
slouken@1895
   534
        while (x--) {
slouken@0
   535
            register int L;
slouken@0
   536
slouken@1895
   537
            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
slouken@1895
   538
            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
slouken@1895
   539
                + colortab[*cb + 2 * 256];
slouken@1895
   540
            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
slouken@1895
   541
            ++cr;
slouken@1895
   542
            ++cb;
slouken@0
   543
slouken@0
   544
            L = *lum++;
slouken@1895
   545
            row1[0] = row1[1] = row1[next_row] = row1[next_row + 1] =
slouken@1895
   546
                (rgb_2_pix[L + cr_r] |
slouken@1895
   547
                 rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
slouken@0
   548
            row1 += 2;
slouken@0
   549
slouken@0
   550
            L = *lum++;
slouken@1895
   551
            row1[0] = row1[1] = row1[next_row] = row1[next_row + 1] =
slouken@1895
   552
                (rgb_2_pix[L + cr_r] |
slouken@1895
   553
                 rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
slouken@0
   554
            row1 += 2;
slouken@0
   555
slouken@0
   556
slouken@0
   557
            /* Now, do second row. */
slouken@0
   558
slouken@0
   559
            L = *lum2++;
slouken@1895
   560
            row2[0] = row2[1] = row2[next_row] = row2[next_row + 1] =
slouken@1895
   561
                (rgb_2_pix[L + cr_r] |
slouken@1895
   562
                 rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
slouken@0
   563
            row2 += 2;
slouken@0
   564
slouken@0
   565
            L = *lum2++;
slouken@1895
   566
            row2[0] = row2[1] = row2[next_row] = row2[next_row + 1] =
slouken@1895
   567
                (rgb_2_pix[L + cr_r] |
slouken@1895
   568
                 rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
slouken@0
   569
            row2 += 2;
slouken@0
   570
        }
slouken@0
   571
slouken@0
   572
        /*
slouken@0
   573
         * These values are at the start of the next line, (due
slouken@0
   574
         * to the ++'s above),but they need to be at the start
slouken@0
   575
         * of the line after that.
slouken@0
   576
         */
slouken@1895
   577
        lum += cols;
slouken@0
   578
        lum2 += cols;
slouken@0
   579
        row1 += mod;
slouken@0
   580
        row2 += mod;
slouken@0
   581
    }
slouken@0
   582
}
slouken@0
   583
slouken@1895
   584
static void
slouken@1895
   585
Color16DitherYUY2Mod1X(int *colortab, Uint32 * rgb_2_pix,
slouken@1895
   586
                       unsigned char *lum, unsigned char *cr,
slouken@1895
   587
                       unsigned char *cb, unsigned char *out,
slouken@1895
   588
                       int rows, int cols, int mod)
slouken@0
   589
{
slouken@1895
   590
    unsigned short *row;
slouken@0
   591
    int x, y;
slouken@0
   592
    int cr_r;
slouken@0
   593
    int crb_g;
slouken@0
   594
    int cb_b;
slouken@0
   595
    int cols_2 = cols / 2;
slouken@0
   596
slouken@1895
   597
    row = (unsigned short *) out;
slouken@0
   598
slouken@0
   599
    y = rows;
slouken@1895
   600
    while (y--) {
slouken@0
   601
        x = cols_2;
slouken@1895
   602
        while (x--) {
slouken@0
   603
            register int L;
slouken@0
   604
slouken@1895
   605
            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
slouken@1895
   606
            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
slouken@1895
   607
                + colortab[*cb + 2 * 256];
slouken@1895
   608
            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
slouken@1895
   609
            cr += 4;
slouken@1895
   610
            cb += 4;
slouken@0
   611
slouken@1895
   612
            L = *lum;
slouken@1895
   613
            lum += 2;
slouken@1895
   614
            *row++ = (unsigned short) (rgb_2_pix[L + cr_r] |
slouken@1895
   615
                                       rgb_2_pix[L + crb_g] |
slouken@1895
   616
                                       rgb_2_pix[L + cb_b]);
slouken@0
   617
slouken@1895
   618
            L = *lum;
slouken@1895
   619
            lum += 2;
slouken@1895
   620
            *row++ = (unsigned short) (rgb_2_pix[L + cr_r] |
slouken@1895
   621
                                       rgb_2_pix[L + crb_g] |
slouken@1895
   622
                                       rgb_2_pix[L + cb_b]);
slouken@0
   623
slouken@0
   624
        }
slouken@0
   625
slouken@0
   626
        row += mod;
slouken@0
   627
    }
slouken@0
   628
}
slouken@0
   629
slouken@1895
   630
static void
slouken@1895
   631
Color24DitherYUY2Mod1X(int *colortab, Uint32 * rgb_2_pix,
slouken@1895
   632
                       unsigned char *lum, unsigned char *cr,
slouken@1895
   633
                       unsigned char *cb, unsigned char *out,
slouken@1895
   634
                       int rows, int cols, int mod)
slouken@0
   635
{
slouken@0
   636
    unsigned int value;
slouken@1895
   637
    unsigned char *row;
slouken@0
   638
    int x, y;
slouken@0
   639
    int cr_r;
slouken@0
   640
    int crb_g;
slouken@0
   641
    int cb_b;
slouken@0
   642
    int cols_2 = cols / 2;
slouken@0
   643
slouken@1895
   644
    row = (unsigned char *) out;
slouken@0
   645
    mod *= 3;
slouken@0
   646
    y = rows;
slouken@1895
   647
    while (y--) {
slouken@0
   648
        x = cols_2;
slouken@1895
   649
        while (x--) {
slouken@0
   650
            register int L;
slouken@0
   651
slouken@1895
   652
            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
slouken@1895
   653
            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
slouken@1895
   654
                + colortab[*cb + 2 * 256];
slouken@1895
   655
            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
slouken@1895
   656
            cr += 4;
slouken@1895
   657
            cb += 4;
slouken@0
   658
slouken@1895
   659
            L = *lum;
slouken@1895
   660
            lum += 2;
slouken@1895
   661
            value = (rgb_2_pix[L + cr_r] |
slouken@1895
   662
                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
slouken@1895
   663
            *row++ = (value) & 0xFF;
slouken@1895
   664
            *row++ = (value >> 8) & 0xFF;
slouken@0
   665
            *row++ = (value >> 16) & 0xFF;
slouken@0
   666
slouken@1895
   667
            L = *lum;
slouken@1895
   668
            lum += 2;
slouken@1895
   669
            value = (rgb_2_pix[L + cr_r] |
slouken@1895
   670
                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
slouken@1895
   671
            *row++ = (value) & 0xFF;
slouken@1895
   672
            *row++ = (value >> 8) & 0xFF;
slouken@0
   673
            *row++ = (value >> 16) & 0xFF;
slouken@0
   674
slouken@0
   675
        }
slouken@0
   676
        row += mod;
slouken@0
   677
    }
slouken@0
   678
}
slouken@0
   679
slouken@1895
   680
static void
slouken@1895
   681
Color32DitherYUY2Mod1X(int *colortab, Uint32 * rgb_2_pix,
slouken@1895
   682
                       unsigned char *lum, unsigned char *cr,
slouken@1895
   683
                       unsigned char *cb, unsigned char *out,
slouken@1895
   684
                       int rows, int cols, int mod)
slouken@0
   685
{
slouken@1895
   686
    unsigned int *row;
slouken@0
   687
    int x, y;
slouken@0
   688
    int cr_r;
slouken@0
   689
    int crb_g;
slouken@0
   690
    int cb_b;
slouken@11574
   691
    int cols_2 = (cols + 1) / 2;
slouken@11574
   692
    /* not even dimensions */
slouken@11574
   693
    int skip_last_col = 0;
slouken@11574
   694
    if ( (cols & 0x1) ) {
slouken@11574
   695
        skip_last_col = 1;
slouken@11574
   696
    }
slouken@0
   697
slouken@1895
   698
    row = (unsigned int *) out;
slouken@0
   699
    y = rows;
slouken@1895
   700
    while (y--) {
slouken@0
   701
        x = cols_2;
slouken@1895
   702
        while (x--) {
slouken@0
   703
            register int L;
slouken@0
   704
slouken@1895
   705
            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
slouken@1895
   706
            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
slouken@1895
   707
                + colortab[*cb + 2 * 256];
slouken@1895
   708
            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
slouken@1895
   709
            cr += 4;
slouken@1895
   710
            cb += 4;
slouken@0
   711
slouken@1895
   712
            L = *lum;
slouken@1895
   713
            lum += 2;
slouken@1895
   714
            *row++ = (rgb_2_pix[L + cr_r] |
slouken@1895
   715
                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
slouken@0
   716
slouken@1895
   717
            L = *lum;
slouken@1895
   718
            lum += 2;
slouken@11574
   719
slouken@11574
   720
            if (!(x == 0 && skip_last_col)) {
slouken@1895
   721
            *row++ = (rgb_2_pix[L + cr_r] |
slouken@1895
   722
                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
slouken@11574
   723
            } /* skip col */
slouken@0
   724
slouken@0
   725
        }
slouken@0
   726
        row += mod;
slouken@0
   727
    }
slouken@0
   728
}
slouken@0
   729
slouken@0
   730
/*
slouken@0
   731
 * In this function I make use of a nasty trick. The tables have the lower
slouken@0
   732
 * 16 bits replicated in the upper 16. This means I can write ints and get
slouken@0
   733
 * the horisontal doubling for free (almost).
slouken@0
   734
 */
slouken@1895
   735
static void
slouken@1895
   736
Color16DitherYUY2Mod2X(int *colortab, Uint32 * rgb_2_pix,
slouken@1895
   737
                       unsigned char *lum, unsigned char *cr,
slouken@1895
   738
                       unsigned char *cb, unsigned char *out,
slouken@1895
   739
                       int rows, int cols, int mod)
slouken@0
   740
{
slouken@1895
   741
    unsigned int *row = (unsigned int *) out;
slouken@1895
   742
    const int next_row = cols + (mod / 2);
slouken@0
   743
    int x, y;
slouken@0
   744
    int cr_r;
slouken@0
   745
    int crb_g;
slouken@0
   746
    int cb_b;
slouken@0
   747
    int cols_2 = cols / 2;
slouken@0
   748
slouken@0
   749
    y = rows;
slouken@1895
   750
    while (y--) {
slouken@0
   751
        x = cols_2;
slouken@1895
   752
        while (x--) {
slouken@0
   753
            register int L;
slouken@0
   754
slouken@1895
   755
            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
slouken@1895
   756
            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
slouken@1895
   757
                + colortab[*cb + 2 * 256];
slouken@1895
   758
            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
slouken@1895
   759
            cr += 4;
slouken@1895
   760
            cb += 4;
slouken@0
   761
slouken@1895
   762
            L = *lum;
slouken@1895
   763
            lum += 2;
slouken@1895
   764
            row[0] = row[next_row] = (rgb_2_pix[L + cr_r] |
slouken@1895
   765
                                      rgb_2_pix[L + crb_g] |
slouken@1895
   766
                                      rgb_2_pix[L + cb_b]);
slouken@0
   767
            row++;
slouken@0
   768
slouken@1895
   769
            L = *lum;
slouken@1895
   770
            lum += 2;
slouken@1895
   771
            row[0] = row[next_row] = (rgb_2_pix[L + cr_r] |
slouken@1895
   772
                                      rgb_2_pix[L + crb_g] |
slouken@1895
   773
                                      rgb_2_pix[L + cb_b]);
slouken@0
   774
            row++;
slouken@0
   775
slouken@0
   776
        }
slouken@0
   777
        row += next_row;
slouken@0
   778
    }
slouken@0
   779
}
slouken@0
   780
slouken@1895
   781
static void
slouken@1895
   782
Color24DitherYUY2Mod2X(int *colortab, Uint32 * rgb_2_pix,
slouken@1895
   783
                       unsigned char *lum, unsigned char *cr,
slouken@1895
   784
                       unsigned char *cb, unsigned char *out,
slouken@1895
   785
                       int rows, int cols, int mod)
slouken@0
   786
{
slouken@0
   787
    unsigned int value;
slouken@1895
   788
    unsigned char *row = out;
slouken@1895
   789
    const int next_row = (cols * 2 + mod) * 3;
slouken@0
   790
    int x, y;
slouken@0
   791
    int cr_r;
slouken@0
   792
    int crb_g;
slouken@0
   793
    int cb_b;
slouken@0
   794
    int cols_2 = cols / 2;
slouken@0
   795
    y = rows;
slouken@1895
   796
    while (y--) {
slouken@0
   797
        x = cols_2;
slouken@1895
   798
        while (x--) {
slouken@0
   799
            register int L;
slouken@0
   800
slouken@1895
   801
            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
slouken@1895
   802
            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
slouken@1895
   803
                + colortab[*cb + 2 * 256];
slouken@1895
   804
            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
slouken@1895
   805
            cr += 4;
slouken@1895
   806
            cb += 4;
slouken@0
   807
slouken@1895
   808
            L = *lum;
slouken@1895
   809
            lum += 2;
slouken@1895
   810
            value = (rgb_2_pix[L + cr_r] |
slouken@1895
   811
                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
slouken@1895
   812
            row[0 + 0] = row[3 + 0] = row[next_row + 0] =
slouken@1895
   813
                row[next_row + 3 + 0] = (value) & 0xFF;
slouken@1895
   814
            row[0 + 1] = row[3 + 1] = row[next_row + 1] =
slouken@1895
   815
                row[next_row + 3 + 1] = (value >> 8) & 0xFF;
slouken@1895
   816
            row[0 + 2] = row[3 + 2] = row[next_row + 2] =
slouken@1895
   817
                row[next_row + 3 + 2] = (value >> 16) & 0xFF;
slouken@1895
   818
            row += 2 * 3;
slouken@0
   819
slouken@1895
   820
            L = *lum;
slouken@1895
   821
            lum += 2;
slouken@1895
   822
            value = (rgb_2_pix[L + cr_r] |
slouken@1895
   823
                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
slouken@1895
   824
            row[0 + 0] = row[3 + 0] = row[next_row + 0] =
slouken@1895
   825
                row[next_row + 3 + 0] = (value) & 0xFF;
slouken@1895
   826
            row[0 + 1] = row[3 + 1] = row[next_row + 1] =
slouken@1895
   827
                row[next_row + 3 + 1] = (value >> 8) & 0xFF;
slouken@1895
   828
            row[0 + 2] = row[3 + 2] = row[next_row + 2] =
slouken@1895
   829
                row[next_row + 3 + 2] = (value >> 16) & 0xFF;
slouken@1895
   830
            row += 2 * 3;
slouken@0
   831
slouken@0
   832
        }
slouken@0
   833
        row += next_row;
slouken@0
   834
    }
slouken@0
   835
}
slouken@0
   836
slouken@1895
   837
static void
slouken@1895
   838
Color32DitherYUY2Mod2X(int *colortab, Uint32 * rgb_2_pix,
slouken@1895
   839
                       unsigned char *lum, unsigned char *cr,
slouken@1895
   840
                       unsigned char *cb, unsigned char *out,
slouken@1895
   841
                       int rows, int cols, int mod)
slouken@0
   842
{
slouken@1895
   843
    unsigned int *row = (unsigned int *) out;
slouken@1895
   844
    const int next_row = cols * 2 + mod;
slouken@0
   845
    int x, y;
slouken@0
   846
    int cr_r;
slouken@0
   847
    int crb_g;
slouken@0
   848
    int cb_b;
slouken@0
   849
    int cols_2 = cols / 2;
slouken@1895
   850
    mod += mod;
slouken@0
   851
    y = rows;
slouken@1895
   852
    while (y--) {
slouken@0
   853
        x = cols_2;
slouken@1895
   854
        while (x--) {
slouken@0
   855
            register int L;
slouken@0
   856
slouken@1895
   857
            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
slouken@1895
   858
            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
slouken@1895
   859
                + colortab[*cb + 2 * 256];
slouken@1895
   860
            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
slouken@1895
   861
            cr += 4;
slouken@1895
   862
            cb += 4;
slouken@0
   863
slouken@1895
   864
            L = *lum;
slouken@1895
   865
            lum += 2;
slouken@1895
   866
            row[0] = row[1] = row[next_row] = row[next_row + 1] =
slouken@1895
   867
                (rgb_2_pix[L + cr_r] |
slouken@1895
   868
                 rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
slouken@0
   869
            row += 2;
slouken@0
   870
slouken@1895
   871
            L = *lum;
slouken@1895
   872
            lum += 2;
slouken@1895
   873
            row[0] = row[1] = row[next_row] = row[next_row + 1] =
slouken@1895
   874
                (rgb_2_pix[L + cr_r] |
slouken@1895
   875
                 rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
slouken@0
   876
            row += 2;
slouken@0
   877
slouken@0
   878
slouken@0
   879
        }
slouken@0
   880
slouken@0
   881
        row += next_row;
slouken@0
   882
    }
slouken@0
   883
}
slouken@0
   884
slouken@0
   885
/*
slouken@0
   886
 * How many 1 bits are there in the Uint32.
slouken@0
   887
 * Low performance, do not call often.
slouken@0
   888
 */
slouken@1895
   889
static int
slouken@1895
   890
number_of_bits_set(Uint32 a)
slouken@0
   891
{
slouken@1895
   892
    if (!a)
slouken@1895
   893
        return 0;
slouken@1895
   894
    if (a & 1)
slouken@1895
   895
        return 1 + number_of_bits_set(a >> 1);
slouken@1895
   896
    return (number_of_bits_set(a >> 1));
slouken@0
   897
}
slouken@0
   898
slouken@0
   899
/*
slouken@0
   900
 * How many 0 bits are there at least significant end of Uint32.
slouken@0
   901
 * Low performance, do not call often.
slouken@0
   902
 */
slouken@1895
   903
static int
icculus@10650
   904
free_bits_at_bottom_nonzero(Uint32 a)
icculus@10650
   905
{
icculus@10650
   906
    SDL_assert(a != 0);
icculus@10650
   907
    return (((Sint32) a) & 1l) ? 0 : 1 + free_bits_at_bottom_nonzero(a >> 1);
icculus@10650
   908
}
icculus@10650
   909
icculus@10650
   910
static SDL_INLINE int
slouken@1895
   911
free_bits_at_bottom(Uint32 a)
slouken@0
   912
{
icculus@10650
   913
    return a ? free_bits_at_bottom_nonzero(a) : 32;
slouken@0
   914
}
slouken@0
   915
slouken@1895
   916
static int
slouken@1895
   917
SDL_SW_SetupYUVDisplay(SDL_SW_YUVTexture * swdata, Uint32 target_format)
slouken@1895
   918
{
slouken@1895
   919
    Uint32 *r_2_pix_alloc;
slouken@1895
   920
    Uint32 *g_2_pix_alloc;
slouken@1895
   921
    Uint32 *b_2_pix_alloc;
slouken@1895
   922
    int i;
slouken@1895
   923
    int bpp;
slouken@1895
   924
    Uint32 Rmask, Gmask, Bmask, Amask;
icculus@10650
   925
    int freebits;
slouken@0
   926
slouken@1895
   927
    if (!SDL_PixelFormatEnumToMasks
slouken@1895
   928
        (target_format, &bpp, &Rmask, &Gmask, &Bmask, &Amask) || bpp < 15) {
icculus@7037
   929
        return SDL_SetError("Unsupported YUV destination format");
slouken@1895
   930
    }
slouken@0
   931
slouken@1895
   932
    swdata->target_format = target_format;
slouken@1895
   933
    r_2_pix_alloc = &swdata->rgb_2_pix[0 * 768];
slouken@1895
   934
    g_2_pix_alloc = &swdata->rgb_2_pix[1 * 768];
slouken@1895
   935
    b_2_pix_alloc = &swdata->rgb_2_pix[2 * 768];
slouken@0
   936
slouken@7191
   937
    /*
slouken@1895
   938
     * Set up entries 0-255 in rgb-to-pixel value tables.
slouken@1895
   939
     */
slouken@1895
   940
    for (i = 0; i < 256; ++i) {
slouken@1895
   941
        r_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(Rmask));
icculus@10650
   942
        freebits = free_bits_at_bottom(Rmask);
icculus@10650
   943
        if (freebits < 32) {
icculus@10650
   944
            r_2_pix_alloc[i + 256] <<= freebits;
icculus@10650
   945
        }
slouken@2795
   946
        r_2_pix_alloc[i + 256] |= Amask;
icculus@10650
   947
slouken@1895
   948
        g_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(Gmask));
icculus@10650
   949
        freebits = free_bits_at_bottom(Gmask);
icculus@10650
   950
        if (freebits < 32) {
icculus@10650
   951
            g_2_pix_alloc[i + 256] <<= freebits;
icculus@10650
   952
        }
slouken@2795
   953
        g_2_pix_alloc[i + 256] |= Amask;
icculus@10650
   954
slouken@1895
   955
        b_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(Bmask));
icculus@10650
   956
        freebits = free_bits_at_bottom(Bmask);
icculus@10650
   957
        if (freebits < 32) {
icculus@10650
   958
            b_2_pix_alloc[i + 256] <<= freebits;
icculus@10650
   959
        }
slouken@2795
   960
        b_2_pix_alloc[i + 256] |= Amask;
slouken@1895
   961
    }
slouken@0
   962
slouken@1895
   963
    /*
slouken@1895
   964
     * If we have 16-bit output depth, then we double the value
slouken@1895
   965
     * in the top word. This means that we can write out both
slouken@7191
   966
     * pixels in the pixel doubling mode with one op. It is
slouken@1895
   967
     * harmless in the normal case as storing a 32-bit value
slouken@1895
   968
     * through a short pointer will lose the top bits anyway.
slouken@1895
   969
     */
slouken@1895
   970
    if (SDL_BYTESPERPIXEL(target_format) == 2) {
slouken@1895
   971
        for (i = 0; i < 256; ++i) {
slouken@1895
   972
            r_2_pix_alloc[i + 256] |= (r_2_pix_alloc[i + 256]) << 16;
slouken@1895
   973
            g_2_pix_alloc[i + 256] |= (g_2_pix_alloc[i + 256]) << 16;
slouken@1895
   974
            b_2_pix_alloc[i + 256] |= (b_2_pix_alloc[i + 256]) << 16;
slouken@1895
   975
        }
slouken@1895
   976
    }
slouken@0
   977
slouken@1895
   978
    /*
slouken@1895
   979
     * Spread out the values we have to the rest of the array so that
slouken@1895
   980
     * we do not need to check for overflow.
slouken@1895
   981
     */
slouken@1895
   982
    for (i = 0; i < 256; ++i) {
slouken@1895
   983
        r_2_pix_alloc[i] = r_2_pix_alloc[256];
slouken@1895
   984
        r_2_pix_alloc[i + 512] = r_2_pix_alloc[511];
slouken@1895
   985
        g_2_pix_alloc[i] = g_2_pix_alloc[256];
slouken@1895
   986
        g_2_pix_alloc[i + 512] = g_2_pix_alloc[511];
slouken@1895
   987
        b_2_pix_alloc[i] = b_2_pix_alloc[256];
slouken@1895
   988
        b_2_pix_alloc[i + 512] = b_2_pix_alloc[511];
slouken@1895
   989
    }
slouken@0
   990
slouken@1895
   991
    /* You have chosen wisely... */
slouken@2781
   992
    switch (swdata->format) {
slouken@1965
   993
    case SDL_PIXELFORMAT_YV12:
slouken@1965
   994
    case SDL_PIXELFORMAT_IYUV:
slouken@1895
   995
        if (SDL_BYTESPERPIXEL(target_format) == 2) {
icculus@11156
   996
#ifdef USE_MMX_ASSEMBLY
slouken@1895
   997
            /* inline assembly functions */
slouken@1895
   998
            if (SDL_HasMMX() && (Rmask == 0xF800) &&
slouken@2172
   999
                (Gmask == 0x07E0) && (Bmask == 0x001F)
slouken@2781
  1000
                && (swdata->w & 15) == 0) {
gabomdq@7678
  1001
/* printf("Using MMX 16-bit 565 dither\n"); */
slouken@1895
  1002
                swdata->Display1X = Color565DitherYV12MMX1X;
slouken@1895
  1003
            } else {
gabomdq@7678
  1004
/* printf("Using C 16-bit dither\n"); */
slouken@1895
  1005
                swdata->Display1X = Color16DitherYV12Mod1X;
slouken@1895
  1006
            }
slouken@1895
  1007
#else
slouken@1895
  1008
            swdata->Display1X = Color16DitherYV12Mod1X;
slouken@1895
  1009
#endif
slouken@1895
  1010
            swdata->Display2X = Color16DitherYV12Mod2X;
slouken@1895
  1011
        }
slouken@1895
  1012
        if (SDL_BYTESPERPIXEL(target_format) == 3) {
slouken@1895
  1013
            swdata->Display1X = Color24DitherYV12Mod1X;
slouken@1895
  1014
            swdata->Display2X = Color24DitherYV12Mod2X;
slouken@1895
  1015
        }
slouken@1895
  1016
        if (SDL_BYTESPERPIXEL(target_format) == 4) {
icculus@11156
  1017
#ifdef USE_MMX_ASSEMBLY
slouken@1895
  1018
            /* inline assembly functions */
slouken@1895
  1019
            if (SDL_HasMMX() && (Rmask == 0x00FF0000) &&
slouken@1895
  1020
                (Gmask == 0x0000FF00) &&
slouken@2781
  1021
                (Bmask == 0x000000FF) && (swdata->w & 15) == 0) {
gabomdq@7678
  1022
/* printf("Using MMX 32-bit dither\n"); */
slouken@1895
  1023
                swdata->Display1X = ColorRGBDitherYV12MMX1X;
slouken@1895
  1024
            } else {
gabomdq@7678
  1025
/* printf("Using C 32-bit dither\n"); */
slouken@1895
  1026
                swdata->Display1X = Color32DitherYV12Mod1X;
slouken@1895
  1027
            }
slouken@1895
  1028
#else
slouken@1895
  1029
            swdata->Display1X = Color32DitherYV12Mod1X;
slouken@1895
  1030
#endif
slouken@1895
  1031
            swdata->Display2X = Color32DitherYV12Mod2X;
slouken@1895
  1032
        }
slouken@1895
  1033
        break;
slouken@1965
  1034
    case SDL_PIXELFORMAT_YUY2:
slouken@1965
  1035
    case SDL_PIXELFORMAT_UYVY:
slouken@1965
  1036
    case SDL_PIXELFORMAT_YVYU:
slouken@1895
  1037
        if (SDL_BYTESPERPIXEL(target_format) == 2) {
slouken@1895
  1038
            swdata->Display1X = Color16DitherYUY2Mod1X;
slouken@1895
  1039
            swdata->Display2X = Color16DitherYUY2Mod2X;
slouken@1895
  1040
        }
slouken@1895
  1041
        if (SDL_BYTESPERPIXEL(target_format) == 3) {
slouken@1895
  1042
            swdata->Display1X = Color24DitherYUY2Mod1X;
slouken@1895
  1043
            swdata->Display2X = Color24DitherYUY2Mod2X;
slouken@1895
  1044
        }
slouken@1895
  1045
        if (SDL_BYTESPERPIXEL(target_format) == 4) {
slouken@1895
  1046
            swdata->Display1X = Color32DitherYUY2Mod1X;
slouken@1895
  1047
            swdata->Display2X = Color32DitherYUY2Mod2X;
slouken@1895
  1048
        }
slouken@1895
  1049
        break;
slouken@11574
  1050
    case SDL_PIXELFORMAT_NV21:
slouken@11574
  1051
    case SDL_PIXELFORMAT_NV12:
slouken@11574
  1052
        /* no Display{1,2}X function */
slouken@11574
  1053
        swdata->Display1X = NULL;
slouken@11574
  1054
        swdata->Display2X = NULL;
slouken@11574
  1055
        break;
slouken@11574
  1056
slouken@1895
  1057
    default:
slouken@1895
  1058
        /* We should never get here (caught above) */
slouken@1895
  1059
        break;
slouken@1895
  1060
    }
slouken@0
  1061
slouken@7720
  1062
    SDL_FreeSurface(swdata->display);
slouken@7720
  1063
    swdata->display = NULL;
slouken@1895
  1064
    return 0;
slouken@0
  1065
}
slouken@0
  1066
slouken@1895
  1067
SDL_SW_YUVTexture *
slouken@2781
  1068
SDL_SW_CreateYUVTexture(Uint32 format, int w, int h)
slouken@0
  1069
{
slouken@1895
  1070
    SDL_SW_YUVTexture *swdata;
slouken@1895
  1071
    int *Cr_r_tab;
slouken@1895
  1072
    int *Cr_g_tab;
slouken@1895
  1073
    int *Cb_g_tab;
slouken@1895
  1074
    int *Cb_b_tab;
slouken@1895
  1075
    int i;
slouken@1895
  1076
    int CR, CB;
slouken@1895
  1077
slouken@2781
  1078
    switch (format) {
slouken@1965
  1079
    case SDL_PIXELFORMAT_YV12:
slouken@1965
  1080
    case SDL_PIXELFORMAT_IYUV:
slouken@1965
  1081
    case SDL_PIXELFORMAT_YUY2:
slouken@1965
  1082
    case SDL_PIXELFORMAT_UYVY:
slouken@1965
  1083
    case SDL_PIXELFORMAT_YVYU:
slouken@11574
  1084
    case SDL_PIXELFORMAT_NV12:
slouken@11574
  1085
    case SDL_PIXELFORMAT_NV21:
slouken@1895
  1086
        break;
slouken@1895
  1087
    default:
slouken@1895
  1088
        SDL_SetError("Unsupported YUV format");
slouken@1895
  1089
        return NULL;
slouken@1895
  1090
    }
slouken@1895
  1091
icculus@7487
  1092
    swdata = (SDL_SW_YUVTexture *) SDL_calloc(1, sizeof(*swdata));
icculus@7487
  1093
    if (!swdata) {
icculus@7487
  1094
        SDL_OutOfMemory();
icculus@7487
  1095
        return NULL;
icculus@7487
  1096
    }
icculus@7487
  1097
slouken@2781
  1098
    swdata->format = format;
slouken@1965
  1099
    swdata->target_format = SDL_PIXELFORMAT_UNKNOWN;
slouken@2786
  1100
    swdata->w = w;
slouken@2786
  1101
    swdata->h = h;
slouken@11574
  1102
    {
slouken@11574
  1103
        const int sz_plane         = w * h;
slouken@11574
  1104
        const int sz_plane_chroma  = ((w + 1) / 2) * ((h + 1) / 2);
slouken@11574
  1105
        const int sz_plane_packed  = ((w + 1) / 2) * h;
slouken@11574
  1106
        int dst_size = 0;     
slouken@11574
  1107
        switch(format) 
slouken@11574
  1108
        {
slouken@11574
  1109
            case SDL_PIXELFORMAT_YV12: /**< Planar mode: Y + V + U  (3 planes) */
slouken@11574
  1110
            case SDL_PIXELFORMAT_IYUV: /**< Planar mode: Y + U + V  (3 planes) */
slouken@11574
  1111
                dst_size = sz_plane + sz_plane_chroma + sz_plane_chroma;
slouken@11574
  1112
                break;
slouken@11574
  1113
slouken@11574
  1114
            case SDL_PIXELFORMAT_YUY2: /**< Packed mode: Y0+U0+Y1+V0 (1 plane) */
slouken@11574
  1115
            case SDL_PIXELFORMAT_UYVY: /**< Packed mode: U0+Y0+V0+Y1 (1 plane) */
slouken@11574
  1116
            case SDL_PIXELFORMAT_YVYU: /**< Packed mode: Y0+V0+Y1+U0 (1 plane) */
slouken@11574
  1117
                dst_size = 4 * sz_plane_packed;
slouken@11574
  1118
                break;
slouken@11574
  1119
slouken@11574
  1120
            case SDL_PIXELFORMAT_NV12: /**< Planar mode: Y + U/V interleaved  (2 planes) */
slouken@11574
  1121
            case SDL_PIXELFORMAT_NV21: /**< Planar mode: Y + V/U interleaved  (2 planes) */
slouken@11574
  1122
                dst_size = sz_plane + sz_plane_chroma + sz_plane_chroma;
slouken@11574
  1123
                break;
slouken@11574
  1124
slouken@11574
  1125
            default:
slouken@11574
  1126
                SDL_assert(0 && "We should never get here (caught above)");
slouken@11574
  1127
                break;
slouken@11574
  1128
        }
slouken@11574
  1129
        swdata->pixels = (Uint8 *) SDL_malloc(dst_size);
slouken@11574
  1130
    }
slouken@1895
  1131
    swdata->colortab = (int *) SDL_malloc(4 * 256 * sizeof(int));
slouken@1895
  1132
    swdata->rgb_2_pix = (Uint32 *) SDL_malloc(3 * 768 * sizeof(Uint32));
slouken@1895
  1133
    if (!swdata->pixels || !swdata->colortab || !swdata->rgb_2_pix) {
icculus@7037
  1134
        SDL_SW_DestroyYUVTexture(swdata);
slouken@1895
  1135
        SDL_OutOfMemory();
slouken@1895
  1136
        return NULL;
slouken@1895
  1137
    }
slouken@1895
  1138
slouken@1895
  1139
    /* Generate the tables for the display surface */
slouken@1895
  1140
    Cr_r_tab = &swdata->colortab[0 * 256];
slouken@1895
  1141
    Cr_g_tab = &swdata->colortab[1 * 256];
slouken@1895
  1142
    Cb_g_tab = &swdata->colortab[2 * 256];
slouken@1895
  1143
    Cb_b_tab = &swdata->colortab[3 * 256];
slouken@1895
  1144
    for (i = 0; i < 256; i++) {
slouken@1895
  1145
        /* Gamma correction (luminescence table) and chroma correction
slouken@1895
  1146
           would be done here.  See the Berkeley mpeg_play sources.
slouken@1895
  1147
         */
slouken@1895
  1148
        CB = CR = (i - 128);
slouken@1895
  1149
        Cr_r_tab[i] = (int) ((0.419 / 0.299) * CR);
slouken@1895
  1150
        Cr_g_tab[i] = (int) (-(0.299 / 0.419) * CR);
slouken@1895
  1151
        Cb_g_tab[i] = (int) (-(0.114 / 0.331) * CB);
slouken@1895
  1152
        Cb_b_tab[i] = (int) ((0.587 / 0.331) * CB);
slouken@1895
  1153
    }
slouken@1895
  1154
slouken@1895
  1155
    /* Find the pitch and offset values for the overlay */
slouken@2781
  1156
    switch (format) {
slouken@1965
  1157
    case SDL_PIXELFORMAT_YV12:
slouken@1965
  1158
    case SDL_PIXELFORMAT_IYUV:
slouken@2781
  1159
        swdata->pitches[0] = w;
slouken@11574
  1160
        swdata->pitches[1] = (swdata->pitches[0] + 1) / 2;
slouken@11574
  1161
        swdata->pitches[2] = (swdata->pitches[0] + 1) / 2;
slouken@1895
  1162
        swdata->planes[0] = swdata->pixels;
slouken@2786
  1163
        swdata->planes[1] = swdata->planes[0] + swdata->pitches[0] * h;
slouken@11574
  1164
        swdata->planes[2] = swdata->planes[1] + swdata->pitches[1] * ((h + 1) / 2);
slouken@1895
  1165
        break;
slouken@1965
  1166
    case SDL_PIXELFORMAT_YUY2:
slouken@1965
  1167
    case SDL_PIXELFORMAT_UYVY:
slouken@1965
  1168
    case SDL_PIXELFORMAT_YVYU:
slouken@11574
  1169
        swdata->pitches[0] = ((w + 1) / 2) * 4;
slouken@1895
  1170
        swdata->planes[0] = swdata->pixels;
slouken@1895
  1171
        break;
slouken@11574
  1172
slouken@11574
  1173
    case SDL_PIXELFORMAT_NV12:
slouken@11574
  1174
    case SDL_PIXELFORMAT_NV21:
slouken@11574
  1175
        swdata->pitches[0] = w;
slouken@11574
  1176
        swdata->pitches[1] = 2 * ((swdata->pitches[0] + 1) / 2);
slouken@11574
  1177
        swdata->planes[0] = swdata->pixels;
slouken@11574
  1178
        swdata->planes[1] = swdata->planes[0] + swdata->pitches[0] * h;
slouken@11574
  1179
        break;
slouken@11574
  1180
slouken@1895
  1181
    default:
icculus@7487
  1182
        SDL_assert(0 && "We should never get here (caught above)");
slouken@1895
  1183
        break;
slouken@1895
  1184
    }
slouken@1895
  1185
slouken@1895
  1186
    /* We're all done.. */
slouken@1895
  1187
    return (swdata);
slouken@0
  1188
}
slouken@0
  1189
slouken@1895
  1190
int
slouken@1895
  1191
SDL_SW_QueryYUVTexturePixels(SDL_SW_YUVTexture * swdata, void **pixels,
slouken@1895
  1192
                             int *pitch)
slouken@0
  1193
{
slouken@1895
  1194
    *pixels = swdata->planes[0];
slouken@1895
  1195
    *pitch = swdata->pitches[0];
slouken@1895
  1196
    return 0;
slouken@0
  1197
}
slouken@0
  1198
slouken@1895
  1199
int
slouken@1895
  1200
SDL_SW_UpdateYUVTexture(SDL_SW_YUVTexture * swdata, const SDL_Rect * rect,
slouken@1895
  1201
                        const void *pixels, int pitch)
slouken@0
  1202
{
slouken@2781
  1203
    switch (swdata->format) {
slouken@1965
  1204
    case SDL_PIXELFORMAT_YV12:
slouken@1965
  1205
    case SDL_PIXELFORMAT_IYUV:
slouken@6136
  1206
        if (rect->x == 0 && rect->y == 0 &&
slouken@6136
  1207
            rect->w == swdata->w && rect->h == swdata->h) {
slouken@6136
  1208
                SDL_memcpy(swdata->pixels, pixels,
slouken@11574
  1209
                           (swdata->h * swdata->w) + 2* ((swdata->h + 1) /2) * ((swdata->w + 1) / 2));
slouken@6136
  1210
        } else {
slouken@6136
  1211
            Uint8 *src, *dst;
slouken@6136
  1212
            int row;
slouken@6136
  1213
            size_t length;
slouken@6136
  1214
slouken@6136
  1215
            /* Copy the Y plane */
slouken@6136
  1216
            src = (Uint8 *) pixels;
slouken@6136
  1217
            dst = swdata->pixels + rect->y * swdata->w + rect->x;
slouken@6136
  1218
            length = rect->w;
slouken@6136
  1219
            for (row = 0; row < rect->h; ++row) {
slouken@6136
  1220
                SDL_memcpy(dst, src, length);
slouken@6136
  1221
                src += pitch;
slouken@6136
  1222
                dst += swdata->w;
slouken@6136
  1223
            }
slouken@11574
  1224
            
slouken@6136
  1225
            /* Copy the next plane */
slouken@6136
  1226
            src = (Uint8 *) pixels + rect->h * pitch;
slouken@6136
  1227
            dst = swdata->pixels + swdata->h * swdata->w;
slouken@11574
  1228
            dst += rect->y/2 * ((swdata->w + 1) / 2) + rect->x/2;
slouken@11574
  1229
            length = (rect->w + 1) / 2;
slouken@11574
  1230
            for (row = 0; row < (rect->h + 1)/2; ++row) {
slouken@6136
  1231
                SDL_memcpy(dst, src, length);
slouken@11574
  1232
                src += (pitch + 1)/2;
slouken@11574
  1233
                dst += (swdata->w + 1)/2;
slouken@6136
  1234
            }
slouken@6136
  1235
slouken@6136
  1236
            /* Copy the next plane */
slouken@11574
  1237
            src = (Uint8 *) pixels + rect->h * pitch + ((rect->h + 1) / 2) * ((pitch + 1) / 2);
slouken@6136
  1238
            dst = swdata->pixels + swdata->h * swdata->w +
slouken@11574
  1239
                  ((swdata->h + 1)/2) * ((swdata->w+1) / 2);
slouken@11574
  1240
            dst += rect->y/2 * ((swdata->w + 1)/2) + rect->x/2;
slouken@11574
  1241
            length = (rect->w + 1) / 2;
slouken@11574
  1242
            for (row = 0; row < (rect->h + 1)/2; ++row) {
slouken@6136
  1243
                SDL_memcpy(dst, src, length);
slouken@11574
  1244
                src += (pitch + 1)/2;
slouken@11574
  1245
                dst += (swdata->w + 1)/2;
slouken@6136
  1246
            }
slouken@1895
  1247
        }
slouken@1895
  1248
        break;
slouken@1965
  1249
    case SDL_PIXELFORMAT_YUY2:
slouken@1965
  1250
    case SDL_PIXELFORMAT_UYVY:
slouken@1965
  1251
    case SDL_PIXELFORMAT_YVYU:
slouken@1895
  1252
        {
slouken@1895
  1253
            Uint8 *src, *dst;
slouken@1895
  1254
            int row;
slouken@1895
  1255
            size_t length;
slouken@0
  1256
slouken@1895
  1257
            src = (Uint8 *) pixels;
slouken@1895
  1258
            dst =
slouken@1895
  1259
                swdata->planes[0] + rect->y * swdata->pitches[0] +
slouken@1895
  1260
                rect->x * 2;
slouken@11574
  1261
            length = 4 * ((rect->w + 1) / 2);
slouken@1895
  1262
            for (row = 0; row < rect->h; ++row) {
slouken@1895
  1263
                SDL_memcpy(dst, src, length);
slouken@1895
  1264
                src += pitch;
slouken@1895
  1265
                dst += swdata->pitches[0];
slouken@1895
  1266
            }
slouken@1895
  1267
        }
slouken@1895
  1268
        break;
slouken@11574
  1269
    case SDL_PIXELFORMAT_NV12:
slouken@11574
  1270
    case SDL_PIXELFORMAT_NV21:
slouken@11574
  1271
        {
slouken@11574
  1272
            if (rect->x == 0 && rect->y == 0 && rect->w == swdata->w && rect->h == swdata->h) {
slouken@11574
  1273
                SDL_memcpy(swdata->pixels, pixels,
slouken@11574
  1274
                        (swdata->h * swdata->w) + 2* ((swdata->h + 1) /2) * ((swdata->w + 1) / 2));
slouken@11574
  1275
            } else {
slouken@11574
  1276
slouken@11574
  1277
                Uint8 *src, *dst;
slouken@11574
  1278
                int row;
slouken@11574
  1279
                size_t length;
slouken@11574
  1280
slouken@11574
  1281
                /* Copy the Y plane */
slouken@11574
  1282
                src = (Uint8 *) pixels;
slouken@11574
  1283
                dst = swdata->pixels + rect->y * swdata->w + rect->x;
slouken@11574
  1284
                length = rect->w;
slouken@11574
  1285
                for (row = 0; row < rect->h; ++row) {
slouken@11574
  1286
                    SDL_memcpy(dst, src, length);
slouken@11574
  1287
                    src += pitch;
slouken@11574
  1288
                    dst += swdata->w;
slouken@11574
  1289
                }
slouken@11574
  1290
                
slouken@11574
  1291
                /* Copy the next plane */
slouken@11574
  1292
                src = (Uint8 *) pixels + rect->h * pitch;
slouken@11574
  1293
                dst = swdata->pixels + swdata->h * swdata->w;
slouken@11574
  1294
                dst += 2 * ((rect->y + 1)/2) * ((swdata->w + 1) / 2) + 2 * (rect->x/2);
slouken@11574
  1295
                length = 2 * ((rect->w + 1) / 2);
slouken@11574
  1296
                for (row = 0; row < (rect->h + 1)/2; ++row) {
slouken@11574
  1297
                    SDL_memcpy(dst, src, length);
slouken@11574
  1298
                    src += 2 * ((pitch + 1)/2);
slouken@11574
  1299
                    dst += 2 * ((swdata->w + 1)/2);
slouken@11574
  1300
                }
slouken@11574
  1301
            }
slouken@11574
  1302
        }
slouken@11574
  1303
        break;
slouken@11574
  1304
slouken@1895
  1305
    }
slouken@1895
  1306
    return 0;
slouken@0
  1307
}
slouken@0
  1308
slouken@1895
  1309
int
slouken@7759
  1310
SDL_SW_UpdateYUVTexturePlanar(SDL_SW_YUVTexture * swdata, const SDL_Rect * rect,
slouken@7759
  1311
                              const Uint8 *Yplane, int Ypitch,
slouken@7759
  1312
                              const Uint8 *Uplane, int Upitch,
slouken@7759
  1313
                              const Uint8 *Vplane, int Vpitch)
slouken@7759
  1314
{
slouken@7777
  1315
    const Uint8 *src;
slouken@7777
  1316
    Uint8 *dst;
slouken@7759
  1317
    int row;
slouken@7759
  1318
    size_t length;
slouken@7759
  1319
slouken@7759
  1320
    /* Copy the Y plane */
slouken@7759
  1321
    src = Yplane;
slouken@7759
  1322
    dst = swdata->pixels + rect->y * swdata->w + rect->x;
slouken@7759
  1323
    length = rect->w;
slouken@7759
  1324
    for (row = 0; row < rect->h; ++row) {
slouken@7759
  1325
        SDL_memcpy(dst, src, length);
slouken@7759
  1326
        src += Ypitch;
slouken@7759
  1327
        dst += swdata->w;
slouken@7759
  1328
    }
slouken@7759
  1329
slouken@7759
  1330
    /* Copy the U plane */
slouken@7759
  1331
    src = Uplane;
slouken@7759
  1332
    if (swdata->format == SDL_PIXELFORMAT_IYUV) {
slouken@7759
  1333
        dst = swdata->pixels + swdata->h * swdata->w;
slouken@7759
  1334
    } else {
slouken@7759
  1335
        dst = swdata->pixels + swdata->h * swdata->w +
slouken@11574
  1336
              ((swdata->h + 1) / 2) * ((swdata->w + 1) / 2);
slouken@7759
  1337
    }
slouken@11574
  1338
    dst += rect->y/2 * ((swdata->w + 1)/2) + rect->x/2;
slouken@11574
  1339
    length = (rect->w + 1) / 2;
slouken@11574
  1340
    for (row = 0; row < (rect->h + 1)/2; ++row) {
slouken@7759
  1341
        SDL_memcpy(dst, src, length);
slouken@7759
  1342
        src += Upitch;
slouken@11574
  1343
        dst += (swdata->w + 1)/2;
slouken@7759
  1344
    }
slouken@7759
  1345
slouken@7759
  1346
    /* Copy the V plane */
slouken@7759
  1347
    src = Vplane;
slouken@7759
  1348
    if (swdata->format == SDL_PIXELFORMAT_YV12) {
slouken@7759
  1349
        dst = swdata->pixels + swdata->h * swdata->w;
slouken@7759
  1350
    } else {
slouken@7759
  1351
        dst = swdata->pixels + swdata->h * swdata->w +
slouken@11574
  1352
              ((swdata->h + 1) / 2) * ((swdata->w + 1) / 2);
slouken@7759
  1353
    }
slouken@11574
  1354
    dst += rect->y/2 * ((swdata->w + 1)/2) + rect->x/2;
slouken@11574
  1355
    length = (rect->w + 1) / 2;
slouken@11574
  1356
    for (row = 0; row < (rect->h + 1)/2; ++row) {
slouken@7759
  1357
        SDL_memcpy(dst, src, length);
slouken@7759
  1358
        src += Vpitch;
slouken@11574
  1359
        dst += (swdata->w + 1)/2;
slouken@7759
  1360
    }
slouken@7759
  1361
    return 0;
slouken@7759
  1362
}
slouken@7759
  1363
slouken@7759
  1364
int
slouken@1895
  1365
SDL_SW_LockYUVTexture(SDL_SW_YUVTexture * swdata, const SDL_Rect * rect,
slouken@5156
  1366
                      void **pixels, int *pitch)
slouken@0
  1367
{
slouken@2781
  1368
    switch (swdata->format) {
slouken@1965
  1369
    case SDL_PIXELFORMAT_YV12:
slouken@1965
  1370
    case SDL_PIXELFORMAT_IYUV:
slouken@11574
  1371
    case SDL_PIXELFORMAT_NV12:
slouken@11574
  1372
    case SDL_PIXELFORMAT_NV21:
slouken@1895
  1373
        if (rect
slouken@2781
  1374
            && (rect->x != 0 || rect->y != 0 || rect->w != swdata->w
slouken@2781
  1375
                || rect->h != swdata->h)) {
icculus@7037
  1376
            return SDL_SetError
slouken@11574
  1377
                ("YV12, IYUV, NV12, NV21 textures only support full surface locks");
slouken@1895
  1378
        }
slouken@1895
  1379
        break;
slouken@1895
  1380
    }
slouken@1895
  1381
slouken@7332
  1382
    if (rect) {
slouken@7332
  1383
        *pixels = swdata->planes[0] + rect->y * swdata->pitches[0] + rect->x * 2;
slouken@7332
  1384
    } else {
slouken@7332
  1385
        *pixels = swdata->planes[0];
slouken@7332
  1386
    }
slouken@1895
  1387
    *pitch = swdata->pitches[0];
slouken@1895
  1388
    return 0;
slouken@0
  1389
}
slouken@1895
  1390
slouken@1895
  1391
void
slouken@1895
  1392
SDL_SW_UnlockYUVTexture(SDL_SW_YUVTexture * swdata)
slouken@1895
  1393
{
slouken@1895
  1394
}
slouken@1895
  1395
slouken@1895
  1396
int
slouken@1895
  1397
SDL_SW_CopyYUVToRGB(SDL_SW_YUVTexture * swdata, const SDL_Rect * srcrect,
slouken@1895
  1398
                    Uint32 target_format, int w, int h, void *pixels,
slouken@1895
  1399
                    int pitch)
slouken@1895
  1400
{
icculus@8643
  1401
    const int targetbpp = SDL_BYTESPERPIXEL(target_format);
slouken@1895
  1402
    int stretch;
slouken@1895
  1403
    int scale_2x;
slouken@1895
  1404
    Uint8 *lum, *Cr, *Cb;
slouken@1895
  1405
    int mod;
slouken@1895
  1406
icculus@8643
  1407
    if (targetbpp == 0) {
icculus@8643
  1408
        return SDL_SetError("Invalid target pixel format");
icculus@8643
  1409
    }
icculus@8643
  1410
slouken@1895
  1411
    /* Make sure we're set up to display in the desired format */
slouken@1895
  1412
    if (target_format != swdata->target_format) {
slouken@1895
  1413
        if (SDL_SW_SetupYUVDisplay(swdata, target_format) < 0) {
slouken@1895
  1414
            return -1;
slouken@1895
  1415
        }
slouken@1895
  1416
    }
slouken@1895
  1417
slouken@1895
  1418
    stretch = 0;
slouken@1895
  1419
    scale_2x = 0;
slouken@2781
  1420
    if (srcrect->x || srcrect->y || srcrect->w < swdata->w
slouken@2781
  1421
        || srcrect->h < swdata->h) {
slouken@1895
  1422
        /* The source rectangle has been clipped.
slouken@1895
  1423
           Using a scratch surface is easier than adding clipped
slouken@1895
  1424
           source support to all the blitters, plus that would
slouken@1895
  1425
           slow them down in the general unclipped case.
slouken@1895
  1426
         */
slouken@1895
  1427
        stretch = 1;
slouken@1895
  1428
    } else if ((srcrect->w != w) || (srcrect->h != h)) {
slouken@1895
  1429
        if ((w == 2 * srcrect->w) && (h == 2 * srcrect->h)) {
slouken@1895
  1430
            scale_2x = 1;
slouken@1895
  1431
        } else {
slouken@1895
  1432
            stretch = 1;
slouken@1895
  1433
        }
slouken@1895
  1434
    }
slouken@1895
  1435
    if (stretch) {
slouken@1895
  1436
        int bpp;
slouken@1895
  1437
        Uint32 Rmask, Gmask, Bmask, Amask;
slouken@1895
  1438
slouken@1895
  1439
        if (swdata->display) {
slouken@1895
  1440
            swdata->display->w = w;
slouken@1895
  1441
            swdata->display->h = h;
slouken@1895
  1442
            swdata->display->pixels = pixels;
slouken@1895
  1443
            swdata->display->pitch = pitch;
slouken@1895
  1444
        } else {
slouken@1895
  1445
            /* This must have succeeded in SDL_SW_SetupYUVDisplay() earlier */
slouken@1895
  1446
            SDL_PixelFormatEnumToMasks(target_format, &bpp, &Rmask, &Gmask,
slouken@1895
  1447
                                       &Bmask, &Amask);
slouken@1895
  1448
            swdata->display =
slouken@1895
  1449
                SDL_CreateRGBSurfaceFrom(pixels, w, h, bpp, pitch, Rmask,
slouken@1895
  1450
                                         Gmask, Bmask, Amask);
slouken@1895
  1451
            if (!swdata->display) {
slouken@1895
  1452
                return (-1);
slouken@1895
  1453
            }
slouken@1895
  1454
        }
slouken@1895
  1455
        if (!swdata->stretch) {
slouken@1895
  1456
            /* This must have succeeded in SDL_SW_SetupYUVDisplay() earlier */
slouken@1895
  1457
            SDL_PixelFormatEnumToMasks(target_format, &bpp, &Rmask, &Gmask,
slouken@1895
  1458
                                       &Bmask, &Amask);
slouken@1895
  1459
            swdata->stretch =
slouken@2781
  1460
                SDL_CreateRGBSurface(0, swdata->w, swdata->h, bpp, Rmask,
slouken@1895
  1461
                                     Gmask, Bmask, Amask);
slouken@1895
  1462
            if (!swdata->stretch) {
slouken@1895
  1463
                return (-1);
slouken@1895
  1464
            }
slouken@1895
  1465
        }
slouken@1895
  1466
        pixels = swdata->stretch->pixels;
slouken@1895
  1467
        pitch = swdata->stretch->pitch;
slouken@1895
  1468
    }
slouken@2781
  1469
    switch (swdata->format) {
slouken@1965
  1470
    case SDL_PIXELFORMAT_YV12:
slouken@1895
  1471
        lum = swdata->planes[0];
slouken@1895
  1472
        Cr = swdata->planes[1];
slouken@1895
  1473
        Cb = swdata->planes[2];
slouken@1895
  1474
        break;
slouken@1965
  1475
    case SDL_PIXELFORMAT_IYUV:
slouken@1895
  1476
        lum = swdata->planes[0];
slouken@1895
  1477
        Cr = swdata->planes[2];
slouken@1895
  1478
        Cb = swdata->planes[1];
slouken@1895
  1479
        break;
slouken@1965
  1480
    case SDL_PIXELFORMAT_YUY2:
slouken@1895
  1481
        lum = swdata->planes[0];
slouken@1895
  1482
        Cr = lum + 3;
slouken@1895
  1483
        Cb = lum + 1;
slouken@1895
  1484
        break;
slouken@1965
  1485
    case SDL_PIXELFORMAT_UYVY:
slouken@1895
  1486
        lum = swdata->planes[0] + 1;
slouken@1895
  1487
        Cr = lum + 1;
slouken@1895
  1488
        Cb = lum - 1;
slouken@1895
  1489
        break;
slouken@1965
  1490
    case SDL_PIXELFORMAT_YVYU:
slouken@1895
  1491
        lum = swdata->planes[0];
slouken@1895
  1492
        Cr = lum + 1;
slouken@1895
  1493
        Cb = lum + 3;
slouken@1895
  1494
        break;
slouken@11574
  1495
    case SDL_PIXELFORMAT_NV12:
slouken@11574
  1496
    case SDL_PIXELFORMAT_NV21:
slouken@11574
  1497
        return SDL_ConvertPixels(swdata->w, swdata->h, 
slouken@11574
  1498
                swdata->format, swdata->planes[0], swdata->pitches[0], 
slouken@11574
  1499
                target_format, pixels, pitch);
slouken@11574
  1500
        break;
slouken@1895
  1501
    default:
icculus@7037
  1502
        return SDL_SetError("Unsupported YUV format in copy");
slouken@1895
  1503
    }
icculus@8643
  1504
    mod = (pitch / targetbpp);
slouken@1895
  1505
slouken@1895
  1506
    if (scale_2x) {
slouken@2781
  1507
        mod -= (swdata->w * 2);
slouken@1895
  1508
        swdata->Display2X(swdata->colortab, swdata->rgb_2_pix,
slouken@2781
  1509
                          lum, Cr, Cb, pixels, swdata->h, swdata->w, mod);
slouken@1895
  1510
    } else {
slouken@2781
  1511
        mod -= swdata->w;
slouken@1895
  1512
        swdata->Display1X(swdata->colortab, swdata->rgb_2_pix,
slouken@2781
  1513
                          lum, Cr, Cb, pixels, swdata->h, swdata->w, mod);
slouken@1895
  1514
    }
slouken@1895
  1515
    if (stretch) {
slouken@1895
  1516
        SDL_Rect rect = *srcrect;
slouken@1895
  1517
        SDL_SoftStretch(swdata->stretch, &rect, swdata->display, NULL);
slouken@1895
  1518
    }
slouken@1895
  1519
    return 0;
slouken@1895
  1520
}
slouken@1895
  1521
slouken@1895
  1522
void
slouken@1895
  1523
SDL_SW_DestroyYUVTexture(SDL_SW_YUVTexture * swdata)
slouken@1895
  1524
{
slouken@1895
  1525
    if (swdata) {
slouken@7719
  1526
        SDL_free(swdata->pixels);
slouken@7719
  1527
        SDL_free(swdata->colortab);
slouken@7719
  1528
        SDL_free(swdata->rgb_2_pix);
slouken@7720
  1529
        SDL_FreeSurface(swdata->stretch);
slouken@7720
  1530
        SDL_FreeSurface(swdata->display);
slouken@1895
  1531
        SDL_free(swdata);
slouken@1895
  1532
    }
slouken@1895
  1533
}
slouken@1895
  1534
slouken@1895
  1535
/* vi: set ts=4 sw=4 expandtab: */