BlitNtoN BlitNtoNKey: remove non-aligned word read/store (bpp 3<->4) (Bug 4503)
authorSylvain Becker <sylvain.becker@gmail.com>
Fri, 22 Feb 2019 09:30:45 +0100
changeset 126168a160ecca90f
parent 12615 6f0eb21dd7d1
child 12617 c005c49beaa9
BlitNtoN BlitNtoNKey: remove non-aligned word read/store (bpp 3<->4) (Bug 4503)

Mips and (old) ARM doesn't allow word read/write when adress isn't 4bytes
aligned. So just remove that.
src/video/SDL_blit_N.c
     1.1 --- a/src/video/SDL_blit_N.c	Tue Feb 19 23:46:54 2019 -0500
     1.2 +++ b/src/video/SDL_blit_N.c	Fri Feb 22 09:30:45 2019 +0100
     1.3 @@ -2251,8 +2251,57 @@
     1.4          srcfmt->format != SDL_PIXELFORMAT_ARGB2101010 &&
     1.5          dstfmt->format != SDL_PIXELFORMAT_ARGB2101010) {
     1.6  
     1.7 -        Uint32 *src32 = (Uint32*)src;
     1.8 -        Uint32 *dst32 = (Uint32*)dst;
     1.9 +        /* Find the appropriate permutation */
    1.10 +        int alpha_channel, p0, p1, p2, p3;
    1.11 +        get_permutation(srcfmt, dstfmt, &p0, &p1, &p2, &p3, &alpha_channel);
    1.12 +
    1.13 +        while (height--) {
    1.14 +            /* *INDENT-OFF* */
    1.15 +            DUFFS_LOOP(
    1.16 +            {
    1.17 +                dst[0] = src[p0];
    1.18 +                dst[1] = src[p1];
    1.19 +                dst[2] = src[p2];
    1.20 +                dst[3] = src[p3];
    1.21 +                dst[alpha_channel] = alpha;
    1.22 +                src += 4;
    1.23 +                dst += 4;
    1.24 +            }, width);
    1.25 +            /* *INDENT-ON* */
    1.26 +            src += srcskip;
    1.27 +            dst += dstskip;
    1.28 +        }
    1.29 +        return;
    1.30 +    }
    1.31 +
    1.32 +    /* Blit with permutation: 4->3 */
    1.33 +    if (srcbpp == 4 && dstbpp == 3 &&
    1.34 +        srcfmt->format != SDL_PIXELFORMAT_ARGB2101010) {
    1.35 +
    1.36 +        /* Find the appropriate permutation */
    1.37 +        int p0, p1, p2, p3;
    1.38 +        get_permutation(srcfmt, dstfmt, &p0, &p1, &p2, &p3, NULL);
    1.39 +
    1.40 +        while (height--) {
    1.41 +            /* *INDENT-OFF* */
    1.42 +            DUFFS_LOOP(
    1.43 +            {
    1.44 +                dst[0] = src[p0];
    1.45 +                dst[1] = src[p1];
    1.46 +                dst[2] = src[p2];
    1.47 +                src += 4;
    1.48 +                dst += 3;
    1.49 +            }, width);
    1.50 +            /* *INDENT-ON* */
    1.51 +            src += srcskip;
    1.52 +            dst += dstskip;
    1.53 +        }
    1.54 +        return;
    1.55 +    }
    1.56 +
    1.57 +    /* Blit with permutation: 3->4 */
    1.58 +    if (srcbpp == 3 && dstbpp == 4 &&
    1.59 +        dstfmt->format != SDL_PIXELFORMAT_ARGB2101010) {
    1.60  
    1.61          /* Find the appropriate permutation */
    1.62          int alpha_channel, p0, p1, p2, p3;
    1.63 @@ -2262,77 +2311,17 @@
    1.64              /* *INDENT-OFF* */
    1.65              DUFFS_LOOP(
    1.66              {
    1.67 -                Uint8 *s8 = (Uint8 *)src32;
    1.68 -                Uint8 *d8 = (Uint8 *)dst32;
    1.69 -                d8[0] = s8[p0];
    1.70 -                d8[1] = s8[p1];
    1.71 -                d8[2] = s8[p2];
    1.72 -                d8[3] = s8[p3];
    1.73 -                d8[alpha_channel] = alpha;
    1.74 -                ++src32;
    1.75 -                ++dst32;
    1.76 -            }, width);
    1.77 -            /* *INDENT-ON* */
    1.78 -            src32 = (Uint32 *)((Uint8 *)src32 + srcskip);
    1.79 -            dst32 = (Uint32 *)((Uint8 *)dst32 + dstskip);
    1.80 -        }
    1.81 -        return;
    1.82 -    }
    1.83 -
    1.84 -    /* Blit with permutation: 4->3 */
    1.85 -    if (srcbpp == 4 && dstbpp == 3 &&
    1.86 -        srcfmt->format != SDL_PIXELFORMAT_ARGB2101010) {
    1.87 -
    1.88 -        Uint32 *src32 = (Uint32*)src;
    1.89 -
    1.90 -        /* Find the appropriate permutation */
    1.91 -        int p0, p1, p2, p3;
    1.92 -        get_permutation(srcfmt, dstfmt, &p0, &p1, &p2, &p3, NULL);
    1.93 -
    1.94 -        while (height--) {
    1.95 -            /* *INDENT-OFF* */
    1.96 -            DUFFS_LOOP(
    1.97 -            {
    1.98 -                Uint8 *s8 = (Uint8 *)src32;
    1.99 -                dst[0] = s8[p0];
   1.100 -                dst[1] = s8[p1];
   1.101 -                dst[2] = s8[p2];
   1.102 -                ++src32;
   1.103 -                dst += 3;
   1.104 -            }, width);
   1.105 -            /* *INDENT-ON* */
   1.106 -            src32 = (Uint32 *)((Uint8 *)src32 + srcskip);
   1.107 -            dst += dstskip;
   1.108 -        }
   1.109 -        return;
   1.110 -    }
   1.111 -
   1.112 -    /* Blit with permutation: 3->4 */
   1.113 -    if (srcbpp == 3 && dstbpp == 4 &&
   1.114 -        dstfmt->format != SDL_PIXELFORMAT_ARGB2101010) {
   1.115 -
   1.116 -        Uint32 *dst32 = (Uint32*)dst;
   1.117 -
   1.118 -        /* Find the appropriate permutation */
   1.119 -        int alpha_channel, p0, p1, p2, p3;
   1.120 -        get_permutation(srcfmt, dstfmt, &p0, &p1, &p2, &p3, &alpha_channel);
   1.121 -
   1.122 -        while (height--) {
   1.123 -            /* *INDENT-OFF* */
   1.124 -            DUFFS_LOOP(
   1.125 -            {
   1.126 -                Uint8 *d8 = (Uint8 *)dst32;
   1.127 -                d8[0] = src[p0];
   1.128 -                d8[1] = src[p1];
   1.129 -                d8[2] = src[p2];
   1.130 -                d8[3] = src[p3];
   1.131 -                d8[alpha_channel] = alpha;
   1.132 +                dst[0] = src[p0];
   1.133 +                dst[1] = src[p1];
   1.134 +                dst[2] = src[p2];
   1.135 +                dst[3] = src[p3];
   1.136 +                dst[alpha_channel] = alpha;
   1.137                  src += 3;
   1.138 -                ++dst32;
   1.139 +                dst += 4;
   1.140              }, width);
   1.141              /* *INDENT-ON* */
   1.142              src += srcskip;
   1.143 -            dst32 = (Uint32 *)((Uint8 *)dst32 + dstskip);
   1.144 +            dst += dstskip;
   1.145          }
   1.146          return;
   1.147      }
   1.148 @@ -2377,9 +2366,6 @@
   1.149          srcfmt->format != SDL_PIXELFORMAT_ARGB2101010 &&
   1.150          dstfmt->format != SDL_PIXELFORMAT_ARGB2101010) {
   1.151  
   1.152 -        Uint32 *src32 = (Uint32*)src;
   1.153 -        Uint32 *dst32 = (Uint32*)dst;
   1.154 -
   1.155          /* Find the appropriate permutation */
   1.156          int p0, p1, p2, p3;
   1.157          get_permutation(srcfmt, dstfmt, &p0, &p1, &p2, &p3, NULL);
   1.158 @@ -2388,18 +2374,16 @@
   1.159              /* *INDENT-OFF* */
   1.160              DUFFS_LOOP(
   1.161              {
   1.162 -                Uint8 *s8 = (Uint8 *)src32;
   1.163 -                Uint8 *d8 = (Uint8 *)dst32;
   1.164 -                d8[0] = s8[p0];
   1.165 -                d8[1] = s8[p1];
   1.166 -                d8[2] = s8[p2];
   1.167 -                d8[3] = s8[p3];
   1.168 -                ++src32;
   1.169 -                ++dst32;
   1.170 +                dst[0] = src[p0];
   1.171 +                dst[1] = src[p1];
   1.172 +                dst[2] = src[p2];
   1.173 +                dst[3] = src[p3];
   1.174 +                src += 4;
   1.175 +                dst += 4;
   1.176              }, width);
   1.177              /* *INDENT-ON* */
   1.178 -            src32 = (Uint32 *)((Uint8 *)src32 + srcskip);
   1.179 -            dst32 = (Uint32 *)((Uint8 *)dst32 + dstskip);
   1.180 +            src += srcskip;
   1.181 +            dst += dstskip;
   1.182          }
   1.183          return;
   1.184      }
   1.185 @@ -2589,9 +2573,6 @@
   1.186          srcfmt->format != SDL_PIXELFORMAT_ARGB2101010 &&
   1.187          dstfmt->format != SDL_PIXELFORMAT_ARGB2101010) {
   1.188  
   1.189 -        Uint32 *src32 = (Uint32*)src;
   1.190 -        Uint32 *dst32 = (Uint32*)dst;
   1.191 -
   1.192          /* Find the appropriate permutation */
   1.193          int alpha_channel, p0, p1, p2, p3;
   1.194          get_permutation(srcfmt, dstfmt, &p0, &p1, &p2, &p3, &alpha_channel);
   1.195 @@ -2600,21 +2581,21 @@
   1.196              /* *INDENT-OFF* */
   1.197              DUFFS_LOOP(
   1.198              {
   1.199 +                Uint32 *src32 = (Uint32*)src;
   1.200 +
   1.201                  if ((*src32 & rgbmask) != ckey) {
   1.202 -                    Uint8 *s8 = (Uint8 *)src32;
   1.203 -                    Uint8 *d8 = (Uint8 *)dst32;
   1.204 -                    d8[0] = s8[p0];
   1.205 -                    d8[1] = s8[p1];
   1.206 -                    d8[2] = s8[p2];
   1.207 -                    d8[3] = s8[p3];
   1.208 -                    d8[alpha_channel] = alpha;
   1.209 +                    dst[0] = src[p0];
   1.210 +                    dst[1] = src[p1];
   1.211 +                    dst[2] = src[p2];
   1.212 +                    dst[3] = src[p3];
   1.213 +                    dst[alpha_channel] = alpha;
   1.214                  }
   1.215 -                ++src32;
   1.216 -                ++dst32;
   1.217 +                src += 4;
   1.218 +                dst += 4;
   1.219              }, width);
   1.220              /* *INDENT-ON* */
   1.221 -            src32 = (Uint32 *)((Uint8 *)src32 + srcskip);
   1.222 -            dst32 = (Uint32 *)((Uint8 *)dst32 + dstskip);
   1.223 +            src += srcskip;
   1.224 +            dst += dstskip;
   1.225          }
   1.226          return;
   1.227      }
   1.228 @@ -2699,8 +2680,6 @@
   1.229      if (srcbpp == 4 && dstbpp == 3 &&
   1.230          srcfmt->format != SDL_PIXELFORMAT_ARGB2101010) {
   1.231  
   1.232 -        Uint32 *src32 = (Uint32*)src;
   1.233 -
   1.234          /* Find the appropriate permutation */
   1.235          int p0, p1, p2, p3;
   1.236          get_permutation(srcfmt, dstfmt, &p0, &p1, &p2, &p3, NULL);
   1.237 @@ -2709,17 +2688,17 @@
   1.238              /* *INDENT-OFF* */
   1.239              DUFFS_LOOP(
   1.240              {
   1.241 +                Uint32 *src32 = (Uint32*)src;
   1.242                  if ((*src32 & rgbmask) != ckey) {
   1.243 -                    Uint8 *s8 = (Uint8 *)src32;
   1.244 -                    dst[0] = s8[p0];
   1.245 -                    dst[1] = s8[p1];
   1.246 -                    dst[2] = s8[p2];
   1.247 +                    dst[0] = src[p0];
   1.248 +                    dst[1] = src[p1];
   1.249 +                    dst[2] = src[p2];
   1.250                  }
   1.251 -                ++src32;
   1.252 +                src += 4;
   1.253                  dst += 3;
   1.254              }, width);
   1.255              /* *INDENT-ON* */
   1.256 -            src32 = (Uint32 *)((Uint8 *)src32 + srcskip);
   1.257 +            src += srcskip;
   1.258              dst += dstskip;
   1.259          }
   1.260          return;
   1.261 @@ -2729,8 +2708,6 @@
   1.262      if (srcbpp == 3 && dstbpp == 4 &&
   1.263          dstfmt->format != SDL_PIXELFORMAT_ARGB2101010) {
   1.264  
   1.265 -        Uint32 *dst32 = (Uint32*)dst;
   1.266 -
   1.267  #if SDL_BYTEORDER == SDL_LIL_ENDIAN
   1.268          Uint8 k0 = ckey & 0xFF;
   1.269          Uint8 k1 = (ckey >> 8)  & 0xFF;
   1.270 @@ -2754,20 +2731,18 @@
   1.271                  Uint8 s2 = src[2];
   1.272  
   1.273                  if (k0 != s0 || k1 != s1 || k2 != s2) {
   1.274 -                    Uint8 *d8 = (Uint8 *)dst32;
   1.275 -                    d8[0] = src[p0];
   1.276 -                    d8[1] = src[p1];
   1.277 -                    d8[2] = src[p2];
   1.278 -                    d8[3] = src[p3];
   1.279 -                    d8[alpha_channel] = alpha;
   1.280 +                    dst[0] = src[p0];
   1.281 +                    dst[1] = src[p1];
   1.282 +                    dst[2] = src[p2];
   1.283 +                    dst[3] = src[p3];
   1.284 +                    dst[alpha_channel] = alpha;
   1.285                  }
   1.286                  src += 3;
   1.287 -                ++dst32;
   1.288 +                dst += 4;
   1.289              }, width);
   1.290              /* *INDENT-ON* */
   1.291              src += srcskip;
   1.292 -            dst32 = (Uint32 *)((Uint8 *)dst32 + dstskip);
   1.293 -
   1.294 +            dst += dstskip;
   1.295          }
   1.296          return;
   1.297      }
   1.298 @@ -2853,9 +2828,6 @@
   1.299          srcfmt->format != SDL_PIXELFORMAT_ARGB2101010 &&
   1.300          dstfmt->format != SDL_PIXELFORMAT_ARGB2101010) {
   1.301  
   1.302 -        Uint32 *src32 = (Uint32*)src;
   1.303 -        Uint32 *dst32 = (Uint32*)dst;
   1.304 -
   1.305          /* Find the appropriate permutation */
   1.306          int p0, p1, p2, p3;
   1.307          get_permutation(srcfmt, dstfmt, &p0, &p1, &p2, &p3, NULL);
   1.308 @@ -2864,20 +2836,19 @@
   1.309              /* *INDENT-OFF* */
   1.310              DUFFS_LOOP(
   1.311              {
   1.312 +                Uint32 *src32 = (Uint32*)src;
   1.313                  if ((*src32 & rgbmask) != ckey) {
   1.314 -                    Uint8 *s8 = (Uint8 *)src32;
   1.315 -                    Uint8 *d8 = (Uint8 *)dst32;
   1.316 -                    d8[0] = s8[p0];
   1.317 -                    d8[1] = s8[p1];
   1.318 -                    d8[2] = s8[p2];
   1.319 -                    d8[3] = s8[p3];
   1.320 +                    dst[0] = src[p0];
   1.321 +                    dst[1] = src[p1];
   1.322 +                    dst[2] = src[p2];
   1.323 +                    dst[3] = src[p3];
   1.324                  }
   1.325 -                ++src32;
   1.326 -                ++dst32;
   1.327 +                src += 4;
   1.328 +                dst += 4;
   1.329              }, width);
   1.330              /* *INDENT-ON* */
   1.331 -            src32 = (Uint32 *)((Uint8 *)src32 + srcskip);
   1.332 -            dst32 = (Uint32 *)((Uint8 *)dst32 + dstskip);
   1.333 +            src += srcskip;
   1.334 +            dst += dstskip;
   1.335          }
   1.336          return;
   1.337      }
   1.338 @@ -2982,31 +2953,15 @@
   1.339      if (dstfmt->Amask) {
   1.340          /* SET_ALPHA */
   1.341          Uint32 mask = info->a << dstfmt->Ashift;
   1.342 -        int last_line = 0;
   1.343  #if SDL_BYTEORDER == SDL_LIL_ENDIAN
   1.344 +        int i0 = 0, i1 = 1, i2 = 2;
   1.345  #else
   1.346          int i0 = srcbpp - 1 - 0;
   1.347          int i1 = srcbpp - 1 - 1;
   1.348          int i2 = srcbpp - 1 - 2;
   1.349  #endif
   1.350 -
   1.351 -        if (srcbpp == 3 && height) {
   1.352 -            height -= 1;
   1.353 -            last_line = 1;
   1.354 -        }
   1.355 -
   1.356          while (height--) {
   1.357              /* *INDENT-OFF* */
   1.358 -#if SDL_BYTEORDER == SDL_LIL_ENDIAN
   1.359 -            DUFFS_LOOP(
   1.360 -            {
   1.361 -                Uint32  *dst32 = (Uint32*)dst;
   1.362 -                Uint32  *src32 = (Uint32*)src;
   1.363 -                *dst32 = *src32 | mask;
   1.364 -                dst += 4;
   1.365 -                src += srcbpp;
   1.366 -            }, width);
   1.367 -#else
   1.368              DUFFS_LOOP(
   1.369              {
   1.370                  Uint32  *dst32 = (Uint32*)dst;
   1.371 @@ -3017,34 +2972,15 @@
   1.372                  dst += 4;
   1.373                  src += srcbpp;
   1.374              }, width);
   1.375 -#endif
   1.376              /* *INDENT-ON* */
   1.377              src += srcskip;
   1.378              dst += dstskip;
   1.379          }
   1.380 -
   1.381 -        if (last_line) {
   1.382 -            while (width--) {
   1.383 -                Uint32  *dst32 = (Uint32*)dst;
   1.384 -#if SDL_BYTEORDER == SDL_LIL_ENDIAN
   1.385 -                Uint8 s0 = src[0];
   1.386 -                Uint8 s1 = src[1];
   1.387 -                Uint8 s2 = src[2];
   1.388 -#else
   1.389 -                Uint8 s0 = src[i0];
   1.390 -                Uint8 s1 = src[i1];
   1.391 -                Uint8 s2 = src[i2];
   1.392 -#endif
   1.393 -                *dst32 = (s0) | (s1 << 8) | (s2 << 16) | mask;
   1.394 -                dst += 4;
   1.395 -                src += srcbpp;
   1.396 -            }
   1.397 -        }
   1.398      } else {
   1.399          /* NO_ALPHA */
   1.400 -        int last_line = 0;
   1.401  #if SDL_BYTEORDER == SDL_LIL_ENDIAN
   1.402 -        int mask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
   1.403 +        int i0 = 0, i1 = 1, i2 = 2;
   1.404 +        int j0 = 0, j1 = 1, j2 = 2;
   1.405  #else
   1.406          int i0 = srcbpp - 1 - 0;
   1.407          int i1 = srcbpp - 1 - 1;
   1.408 @@ -3052,72 +2988,23 @@
   1.409          int j0 = dstbpp - 1 - 0;
   1.410          int j1 = dstbpp - 1 - 1;
   1.411          int j2 = dstbpp - 1 - 2;
   1.412 -        int shift0, shift1, shift2;
   1.413 -        if (dstbpp == 4) {
   1.414 -            shift2 = 16;
   1.415 -            shift1 = 8;
   1.416 -            shift0 = 0;
   1.417 -        } else { /* dstbpp 3 */
   1.418 -            shift2 = 24;
   1.419 -            shift1 = 16;
   1.420 -            shift0 = 8;
   1.421 -        }
   1.422  #endif
   1.423 -
   1.424 -        if ((dstbpp == 3 || srcbpp == 3) && height) {
   1.425 -            height -= 1;
   1.426 -            last_line = 1;
   1.427 -        }
   1.428 -
   1.429          while (height--) {
   1.430              /* *INDENT-OFF* */
   1.431 -#if SDL_BYTEORDER == SDL_LIL_ENDIAN
   1.432              DUFFS_LOOP(
   1.433              {
   1.434 -                Uint32  *dst32 = (Uint32*)dst;
   1.435 -
   1.436 -                Uint32  *src32 = (Uint32*)src;
   1.437 -                *dst32 = *src32 & mask;
   1.438 -                dst += dstbpp;
   1.439 -                src += srcbpp;
   1.440 -            }, width);
   1.441 -#else
   1.442 -            DUFFS_LOOP(
   1.443 -            {
   1.444 -                Uint32  *dst32 = (Uint32*)dst;
   1.445 -                Uint8 s0 = src[i0];
   1.446 -                Uint8 s1 = src[i1];
   1.447 -                Uint8 s2 = src[i2];
   1.448 -                *dst32 = (s0 << shift0) | (s1 << shift1) | (s2 << shift2);
   1.449 -                dst += dstbpp;
   1.450 -                src += srcbpp;
   1.451 -            }, width);
   1.452 -#endif
   1.453 -            /* *INDENT-ON* */
   1.454 -            src += srcskip;
   1.455 -            dst += dstskip;
   1.456 -        }
   1.457 -
   1.458 -        if (last_line) {
   1.459 -            while (width--) {
   1.460 -#if SDL_BYTEORDER == SDL_LIL_ENDIAN
   1.461 -                Uint8 s0 = src[0];
   1.462 -                Uint8 s1 = src[1];
   1.463 -                Uint8 s2 = src[2];
   1.464 -                dst[0] = s0;
   1.465 -                dst[1] = s1;
   1.466 -                dst[2] = s2;
   1.467 -#else
   1.468                  Uint8 s0 = src[i0];
   1.469                  Uint8 s1 = src[i1];
   1.470                  Uint8 s2 = src[i2];
   1.471                  dst[j0] = s0;
   1.472                  dst[j1] = s1;
   1.473                  dst[j2] = s2;
   1.474 -#endif
   1.475                  dst += dstbpp;
   1.476                  src += srcbpp;
   1.477 -            }
   1.478 +            }, width);
   1.479 +            /* *INDENT-ON* */
   1.480 +            src += srcskip;
   1.481 +            dst += dstskip;
   1.482          }
   1.483      }
   1.484      return;
   1.485 @@ -3158,8 +3045,8 @@
   1.486                      Uint32 alphashift = src[i3] << dstfmt->Ashift;
   1.487                      /* inversed, compared to Blit_3or4_to_3or4__same_rgb */
   1.488                      *dst32 = (s0 << 16) | (s1 << 8) | (s2) | alphashift;
   1.489 -                    dst += dstbpp;
   1.490 -                    src += srcbpp;
   1.491 +                    dst += 4;
   1.492 +                    src += 4;
   1.493                  }, width);
   1.494                  /* *INDENT-ON* */
   1.495                  src += srcskip;
   1.496 @@ -3175,7 +3062,6 @@
   1.497              int i1 = srcbpp - 1 - 1;
   1.498              int i2 = srcbpp - 1 - 2;
   1.499  #endif
   1.500 -
   1.501              while (height--) {
   1.502                  /* *INDENT-OFF* */
   1.503                  DUFFS_LOOP(
   1.504 @@ -3186,7 +3072,7 @@
   1.505                      Uint8 s2 = src[i2];
   1.506                      /* inversed, compared to Blit_3or4_to_3or4__same_rgb */
   1.507                      *dst32 = (s0 << 16) | (s1 << 8) | (s2) | mask;
   1.508 -                    dst += dstbpp;
   1.509 +                    dst += 4;
   1.510                      src += srcbpp;
   1.511                  }, width);
   1.512                  /* *INDENT-ON* */
   1.513 @@ -3196,41 +3082,28 @@
   1.514          }
   1.515      } else {
   1.516          /* NO_ALPHA */
   1.517 -        int last_line = 0;
   1.518  #if SDL_BYTEORDER == SDL_LIL_ENDIAN
   1.519          int i0 = 0, i1 = 1, i2 = 2;
   1.520 -        int shift0 = 16, shift1 = 8, shift2 = 0;
   1.521 +        int j0 = 2, j1 = 1, j2 = 0;
   1.522  #else
   1.523          int i0 = srcbpp - 1 - 0;
   1.524          int i1 = srcbpp - 1 - 1;
   1.525          int i2 = srcbpp - 1 - 2;
   1.526 -        int shift0, shift1, shift2;
   1.527 -        if (dstbpp == 4) {
   1.528 -            shift0 = 16;
   1.529 -            shift1 = 8;
   1.530 -            shift2 = 0;
   1.531 -        } else { /* dstbpp 3 */
   1.532 -            shift0 = 24;
   1.533 -            shift1 = 16;
   1.534 -            shift2 = 8;
   1.535 -        }
   1.536 +        int j0 = dstbpp - 1 - 2;
   1.537 +        int j1 = dstbpp - 1 - 1;
   1.538 +        int j2 = dstbpp - 1 - 0;
   1.539  #endif
   1.540 -
   1.541 -        if (dstbpp == 3 && height) {
   1.542 -            height -= 1;
   1.543 -            last_line = 1;
   1.544 -        }
   1.545 -
   1.546          while (height--) {
   1.547              /* *INDENT-OFF* */
   1.548              DUFFS_LOOP(
   1.549              {
   1.550 -                Uint32 *dst32 = (Uint32*)dst;
   1.551                  Uint8 s0 = src[i0];
   1.552                  Uint8 s1 = src[i1];
   1.553                  Uint8 s2 = src[i2];
   1.554                  /* inversed, compared to Blit_3or4_to_3or4__same_rgb */
   1.555 -                *dst32 = (s0 << shift0) | (s1 << shift1) | (s2 << shift2);
   1.556 +                dst[j0] = s0;
   1.557 +                dst[j1] = s1;
   1.558 +                dst[j2] = s2;
   1.559                  dst += dstbpp;
   1.560                  src += srcbpp;
   1.561              }, width);
   1.562 @@ -3238,30 +3111,6 @@
   1.563              src += srcskip;
   1.564              dst += dstskip;
   1.565          }
   1.566 -
   1.567 -        if (last_line) {
   1.568 -            while (width--) {
   1.569 -#if SDL_BYTEORDER == SDL_LIL_ENDIAN
   1.570 -                Uint8 s0 = src[0];
   1.571 -                Uint8 s1 = src[1];
   1.572 -                Uint8 s2 = src[2];
   1.573 -                /* inversed, compared to Blit_3or4_to_3or4__same_rgb */
   1.574 -                dst[0] = s2;
   1.575 -                dst[1] = s1;
   1.576 -                dst[2] = s0;
   1.577 -#else
   1.578 -                Uint8 s0 = src[i0];
   1.579 -                Uint8 s1 = src[i1];
   1.580 -                Uint8 s2 = src[i2];
   1.581 -                /* inversed, compared to Blit_3or4_to_3or4__same_rgb */
   1.582 -                dst[0] = s0;
   1.583 -                dst[1] = s1;
   1.584 -                dst[2] = s2;
   1.585 -#endif
   1.586 -                dst += dstbpp;
   1.587 -                src += srcbpp;
   1.588 -            }
   1.589 -        }
   1.590      }
   1.591      return;
   1.592  }