src/video/SDL_blit_A.c
changeset 5259 6a65c1fc07af
parent 3697 f7b03b6838cb
child 5262 b530ef003506
equal deleted inserted replaced
5258:f26314c20071 5259:6a65c1fc07af
   417     _mm_empty();
   417     _mm_empty();
   418 }
   418 }
   419 
   419 
   420 #endif /* __MMX__ */
   420 #endif /* __MMX__ */
   421 
   421 
   422 #if SDL_ALTIVEC_BLITTERS
       
   423 #if __MWERKS__
       
   424 #pragma altivec_model on
       
   425 #endif
       
   426 #if HAVE_ALTIVEC_H
       
   427 #include <altivec.h>
       
   428 #endif
       
   429 #include <assert.h>
       
   430 
       
   431 #if (defined(__MACOSX__) && (__GNUC__ < 4))
       
   432 #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
       
   433         (vector unsigned char) ( a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p )
       
   434 #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
       
   435         (vector unsigned short) ( a,b,c,d,e,f,g,h )
       
   436 #else
       
   437 #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
       
   438         (vector unsigned char) { a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p }
       
   439 #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
       
   440         (vector unsigned short) { a,b,c,d,e,f,g,h }
       
   441 #endif
       
   442 
       
   443 #define UNALIGNED_PTR(x) (((size_t) x) & 0x0000000F)
       
   444 #define VECPRINT(msg, v) do { \
       
   445     vector unsigned int tmpvec = (vector unsigned int)(v); \
       
   446     unsigned int *vp = (unsigned int *)&tmpvec; \
       
   447     printf("%s = %08X %08X %08X %08X\n", msg, vp[0], vp[1], vp[2], vp[3]); \
       
   448 } while (0)
       
   449 
       
   450 /* the permuation vector that takes the high bytes out of all the appropriate shorts 
       
   451     (vector unsigned char)(
       
   452         0x00, 0x10, 0x02, 0x12,
       
   453         0x04, 0x14, 0x06, 0x16,
       
   454         0x08, 0x18, 0x0A, 0x1A,
       
   455         0x0C, 0x1C, 0x0E, 0x1E );
       
   456 */
       
   457 #define VEC_MERGE_PERMUTE() (vec_add(vec_lvsl(0, (int*)NULL), (vector unsigned char)vec_splat_u16(0x0F)))
       
   458 #define VEC_U32_24() (vec_add(vec_splat_u32(12), vec_splat_u32(12)))
       
   459 #define VEC_ALPHA_MASK() ((vector unsigned char)vec_sl((vector unsigned int)vec_splat_s8(-1), VEC_U32_24()))
       
   460 #define VEC_ALIGNER(src) ((UNALIGNED_PTR(src)) \
       
   461     ? vec_lvsl(0, src) \
       
   462     : vec_add(vec_lvsl(8, src), vec_splat_u8(8)))
       
   463 
       
   464 
       
   465 #define VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1_16, v8_16) do { \
       
   466     /* vtemp1 contains source AAGGAAGGAAGGAAGG */ \
       
   467     vector unsigned short vtemp1 = vec_mule(vs, valpha); \
       
   468     /* vtemp2 contains source RRBBRRBBRRBBRRBB */ \
       
   469     vector unsigned short vtemp2 = vec_mulo(vs, valpha); \
       
   470     /* valpha2 is 255-alpha */ \
       
   471     vector unsigned char valpha2 = vec_nor(valpha, valpha); \
       
   472     /* vtemp3 contains dest AAGGAAGGAAGGAAGG */ \
       
   473     vector unsigned short vtemp3 = vec_mule(vd, valpha2); \
       
   474     /* vtemp4 contains dest RRBBRRBBRRBBRRBB */ \
       
   475     vector unsigned short vtemp4 = vec_mulo(vd, valpha2); \
       
   476     /* add source and dest */ \
       
   477     vtemp1 = vec_add(vtemp1, vtemp3); \
       
   478     vtemp2 = vec_add(vtemp2, vtemp4); \
       
   479     /* vtemp1 = (vtemp1 + 1) + ((vtemp1 + 1) >> 8) */ \
       
   480     vtemp1 = vec_add(vtemp1, v1_16); \
       
   481     vtemp3 = vec_sr(vtemp1, v8_16); \
       
   482     vtemp1 = vec_add(vtemp1, vtemp3); \
       
   483     /* vtemp2 = (vtemp2 + 1) + ((vtemp2 + 1) >> 8) */ \
       
   484     vtemp2 = vec_add(vtemp2, v1_16); \
       
   485     vtemp4 = vec_sr(vtemp2, v8_16); \
       
   486     vtemp2 = vec_add(vtemp2, vtemp4); \
       
   487     /* (>>8) and get ARGBARGBARGBARGB */ \
       
   488     vd = (vector unsigned char)vec_perm(vtemp1, vtemp2, mergePermute); \
       
   489 } while (0)
       
   490 
       
   491 /* Calculate the permute vector used for 32->32 swizzling */
       
   492 static vector unsigned char
       
   493 calc_swizzle32(const SDL_PixelFormat * srcfmt, const SDL_PixelFormat * dstfmt)
       
   494 {
       
   495     /*
       
   496      * We have to assume that the bits that aren't used by other
       
   497      *  colors is alpha, and it's one complete byte, since some formats
       
   498      *  leave alpha with a zero mask, but we should still swizzle the bits.
       
   499      */
       
   500     /* ARGB */
       
   501     const static struct SDL_PixelFormat default_pixel_format = {
       
   502         NULL, 0, 0,
       
   503         0, 0, 0, 0,
       
   504         16, 8, 0, 24,
       
   505         0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000
       
   506     };
       
   507     if (!srcfmt) {
       
   508         srcfmt = &default_pixel_format;
       
   509     }
       
   510     if (!dstfmt) {
       
   511         dstfmt = &default_pixel_format;
       
   512     }
       
   513     const vector unsigned char plus = VECUINT8_LITERAL(0x00, 0x00, 0x00, 0x00,
       
   514                                                        0x04, 0x04, 0x04, 0x04,
       
   515                                                        0x08, 0x08, 0x08, 0x08,
       
   516                                                        0x0C, 0x0C, 0x0C,
       
   517                                                        0x0C);
       
   518     vector unsigned char vswiz;
       
   519     vector unsigned int srcvec;
       
   520 #define RESHIFT(X) (3 - ((X) >> 3))
       
   521     Uint32 rmask = RESHIFT(srcfmt->Rshift) << (dstfmt->Rshift);
       
   522     Uint32 gmask = RESHIFT(srcfmt->Gshift) << (dstfmt->Gshift);
       
   523     Uint32 bmask = RESHIFT(srcfmt->Bshift) << (dstfmt->Bshift);
       
   524     Uint32 amask;
       
   525     /* Use zero for alpha if either surface doesn't have alpha */
       
   526     if (dstfmt->Amask) {
       
   527         amask =
       
   528             ((srcfmt->Amask) ? RESHIFT(srcfmt->
       
   529                                        Ashift) : 0x10) << (dstfmt->Ashift);
       
   530     } else {
       
   531         amask =
       
   532             0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^
       
   533                           0xFFFFFFFF);
       
   534     }
       
   535 #undef RESHIFT
       
   536     ((unsigned int *) (char *) &srcvec)[0] = (rmask | gmask | bmask | amask);
       
   537     vswiz = vec_add(plus, (vector unsigned char) vec_splat(srcvec, 0));
       
   538     return (vswiz);
       
   539 }
       
   540 
       
   541 static void
       
   542 Blit32to565PixelAlphaAltivec(SDL_BlitInfo * info)
       
   543 {
       
   544     int height = info->dst_h;
       
   545     Uint8 *src = (Uint8 *) info->src;
       
   546     int srcskip = info->src_skip;
       
   547     Uint8 *dst = (Uint8 *) info->dst;
       
   548     int dstskip = info->dst_skip;
       
   549     SDL_PixelFormat *srcfmt = info->src_fmt;
       
   550 
       
   551     vector unsigned char v0 = vec_splat_u8(0);
       
   552     vector unsigned short v8_16 = vec_splat_u16(8);
       
   553     vector unsigned short v1_16 = vec_splat_u16(1);
       
   554     vector unsigned short v2_16 = vec_splat_u16(2);
       
   555     vector unsigned short v3_16 = vec_splat_u16(3);
       
   556     vector unsigned int v8_32 = vec_splat_u32(8);
       
   557     vector unsigned int v16_32 = vec_add(v8_32, v8_32);
       
   558     vector unsigned short v3f =
       
   559         VECUINT16_LITERAL(0x003f, 0x003f, 0x003f, 0x003f,
       
   560                           0x003f, 0x003f, 0x003f, 0x003f);
       
   561     vector unsigned short vfc =
       
   562         VECUINT16_LITERAL(0x00fc, 0x00fc, 0x00fc, 0x00fc,
       
   563                           0x00fc, 0x00fc, 0x00fc, 0x00fc);
       
   564 
       
   565     /* 
       
   566        0x10 - 0x1f is the alpha
       
   567        0x00 - 0x0e evens are the red
       
   568        0x01 - 0x0f odds are zero
       
   569      */
       
   570     vector unsigned char vredalpha1 = VECUINT8_LITERAL(0x10, 0x00, 0x01, 0x01,
       
   571                                                        0x10, 0x02, 0x01, 0x01,
       
   572                                                        0x10, 0x04, 0x01, 0x01,
       
   573                                                        0x10, 0x06, 0x01,
       
   574                                                        0x01);
       
   575     vector unsigned char vredalpha2 =
       
   576         (vector unsigned char) (vec_add((vector unsigned int) vredalpha1,
       
   577                                         vec_sl(v8_32, v16_32))
       
   578         );
       
   579     /*
       
   580        0x00 - 0x0f is ARxx ARxx ARxx ARxx
       
   581        0x11 - 0x0f odds are blue
       
   582      */
       
   583     vector unsigned char vblue1 = VECUINT8_LITERAL(0x00, 0x01, 0x02, 0x11,
       
   584                                                    0x04, 0x05, 0x06, 0x13,
       
   585                                                    0x08, 0x09, 0x0a, 0x15,
       
   586                                                    0x0c, 0x0d, 0x0e, 0x17);
       
   587     vector unsigned char vblue2 =
       
   588         (vector unsigned char) (vec_add((vector unsigned int) vblue1, v8_32)
       
   589         );
       
   590     /*
       
   591        0x00 - 0x0f is ARxB ARxB ARxB ARxB
       
   592        0x10 - 0x0e evens are green
       
   593      */
       
   594     vector unsigned char vgreen1 = VECUINT8_LITERAL(0x00, 0x01, 0x10, 0x03,
       
   595                                                     0x04, 0x05, 0x12, 0x07,
       
   596                                                     0x08, 0x09, 0x14, 0x0b,
       
   597                                                     0x0c, 0x0d, 0x16, 0x0f);
       
   598     vector unsigned char vgreen2 =
       
   599         (vector unsigned
       
   600          char) (vec_add((vector unsigned int) vgreen1, vec_sl(v8_32, v8_32))
       
   601         );
       
   602     vector unsigned char vgmerge = VECUINT8_LITERAL(0x00, 0x02, 0x00, 0x06,
       
   603                                                     0x00, 0x0a, 0x00, 0x0e,
       
   604                                                     0x00, 0x12, 0x00, 0x16,
       
   605                                                     0x00, 0x1a, 0x00, 0x1e);
       
   606     vector unsigned char mergePermute = VEC_MERGE_PERMUTE();
       
   607     vector unsigned char vpermute = calc_swizzle32(srcfmt, NULL);
       
   608     vector unsigned char valphaPermute =
       
   609         vec_and(vec_lvsl(0, (int *) NULL), vec_splat_u8(0xC));
       
   610 
       
   611     vector unsigned short vf800 = (vector unsigned short) vec_splat_u8(-7);
       
   612     vf800 = vec_sl(vf800, vec_splat_u16(8));
       
   613 
       
   614     while (height--) {
       
   615         int extrawidth;
       
   616         vector unsigned char valigner;
       
   617         vector unsigned char vsrc;
       
   618         vector unsigned char voverflow;
       
   619         int width = info->dst_w;
       
   620 
       
   621 #define ONE_PIXEL_BLEND(condition, widthvar) \
       
   622         while (condition) { \
       
   623             Uint32 Pixel; \
       
   624             unsigned sR, sG, sB, dR, dG, dB, sA; \
       
   625             DISEMBLE_RGBA(src, 4, srcfmt, Pixel, sR, sG, sB, sA); \
       
   626             if(sA) { \
       
   627                 unsigned short dstpixel = *((unsigned short *)dst); \
       
   628                 dR = (dstpixel >> 8) & 0xf8; \
       
   629                 dG = (dstpixel >> 3) & 0xfc; \
       
   630                 dB = (dstpixel << 3) & 0xf8; \
       
   631                 ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \
       
   632                 *((unsigned short *)dst) = ( \
       
   633                     ((dR & 0xf8) << 8) | ((dG & 0xfc) << 3) | (dB >> 3) \
       
   634                 ); \
       
   635             } \
       
   636             src += 4; \
       
   637             dst += 2; \
       
   638             widthvar--; \
       
   639         }
       
   640         ONE_PIXEL_BLEND((UNALIGNED_PTR(dst)) && (width), width);
       
   641         extrawidth = (width % 8);
       
   642         valigner = VEC_ALIGNER(src);
       
   643         vsrc = (vector unsigned char) vec_ld(0, src);
       
   644         width -= extrawidth;
       
   645         while (width) {
       
   646             vector unsigned char valpha;
       
   647             vector unsigned char vsrc1, vsrc2;
       
   648             vector unsigned char vdst1, vdst2;
       
   649             vector unsigned short vR, vG, vB;
       
   650             vector unsigned short vpixel, vrpixel, vgpixel, vbpixel;
       
   651 
       
   652             /* Load 8 pixels from src as ARGB */
       
   653             voverflow = (vector unsigned char) vec_ld(15, src);
       
   654             vsrc = vec_perm(vsrc, voverflow, valigner);
       
   655             vsrc1 = vec_perm(vsrc, vsrc, vpermute);
       
   656             src += 16;
       
   657             vsrc = (vector unsigned char) vec_ld(15, src);
       
   658             voverflow = vec_perm(voverflow, vsrc, valigner);
       
   659             vsrc2 = vec_perm(voverflow, voverflow, vpermute);
       
   660             src += 16;
       
   661 
       
   662             /* Load 8 pixels from dst as XRGB */
       
   663             voverflow = vec_ld(0, dst);
       
   664             vR = vec_and((vector unsigned short) voverflow, vf800);
       
   665             vB = vec_sl((vector unsigned short) voverflow, v3_16);
       
   666             vG = vec_sl(vB, v2_16);
       
   667             vdst1 =
       
   668                 (vector unsigned char) vec_perm((vector unsigned char) vR,
       
   669                                                 (vector unsigned char) vR,
       
   670                                                 vredalpha1);
       
   671             vdst1 = vec_perm(vdst1, (vector unsigned char) vB, vblue1);
       
   672             vdst1 = vec_perm(vdst1, (vector unsigned char) vG, vgreen1);
       
   673             vdst2 =
       
   674                 (vector unsigned char) vec_perm((vector unsigned char) vR,
       
   675                                                 (vector unsigned char) vR,
       
   676                                                 vredalpha2);
       
   677             vdst2 = vec_perm(vdst2, (vector unsigned char) vB, vblue2);
       
   678             vdst2 = vec_perm(vdst2, (vector unsigned char) vG, vgreen2);
       
   679 
       
   680             /* Alpha blend 8 pixels as ARGB */
       
   681             valpha = vec_perm(vsrc1, v0, valphaPermute);
       
   682             VEC_MULTIPLY_ALPHA(vsrc1, vdst1, valpha, mergePermute, v1_16,
       
   683                                v8_16);
       
   684             valpha = vec_perm(vsrc2, v0, valphaPermute);
       
   685             VEC_MULTIPLY_ALPHA(vsrc2, vdst2, valpha, mergePermute, v1_16,
       
   686                                v8_16);
       
   687 
       
   688             /* Convert 8 pixels to 565 */
       
   689             vpixel = (vector unsigned short) vec_packpx((vector unsigned int)
       
   690                                                         vdst1,
       
   691                                                         (vector unsigned int)
       
   692                                                         vdst2);
       
   693             vgpixel = (vector unsigned short) vec_perm(vdst1, vdst2, vgmerge);
       
   694             vgpixel = vec_and(vgpixel, vfc);
       
   695             vgpixel = vec_sl(vgpixel, v3_16);
       
   696             vrpixel = vec_sl(vpixel, v1_16);
       
   697             vrpixel = vec_and(vrpixel, vf800);
       
   698             vbpixel = vec_and(vpixel, v3f);
       
   699             vdst1 =
       
   700                 vec_or((vector unsigned char) vrpixel,
       
   701                        (vector unsigned char) vgpixel);
       
   702             vdst1 = vec_or(vdst1, (vector unsigned char) vbpixel);
       
   703 
       
   704             /* Store 8 pixels */
       
   705             vec_st(vdst1, 0, dst);
       
   706 
       
   707             width -= 8;
       
   708             dst += 16;
       
   709         }
       
   710         ONE_PIXEL_BLEND((extrawidth), extrawidth);
       
   711 #undef ONE_PIXEL_BLEND
       
   712         src += srcskip;
       
   713         dst += dstskip;
       
   714     }
       
   715 }
       
   716 
       
   717 static void
       
   718 Blit32to32SurfaceAlphaKeyAltivec(SDL_BlitInfo * info)
       
   719 {
       
   720     int height = info->dst_h;
       
   721     Uint32 *srcp = (Uint32 *) info->src;
       
   722     int srcskip = info->src_skip >> 2;
       
   723     Uint32 *dstp = (Uint32 *) info->dst;
       
   724     int dstskip = info->dst_skip >> 2;
       
   725     SDL_PixelFormat *srcfmt = info->src_fmt;
       
   726     SDL_PixelFormat *dstfmt = info->dst_fmt;
       
   727     unsigned sA = info->a;
       
   728     unsigned dA = dstfmt->Amask ? SDL_ALPHA_OPAQUE : 0;
       
   729     Uint32 rgbmask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
       
   730     Uint32 ckey = info->colorkey;
       
   731     vector unsigned char mergePermute;
       
   732     vector unsigned char vsrcPermute;
       
   733     vector unsigned char vdstPermute;
       
   734     vector unsigned char vsdstPermute;
       
   735     vector unsigned char valpha;
       
   736     vector unsigned char valphamask;
       
   737     vector unsigned char vbits;
       
   738     vector unsigned char v0;
       
   739     vector unsigned short v1;
       
   740     vector unsigned short v8;
       
   741     vector unsigned int vckey;
       
   742     vector unsigned int vrgbmask;
       
   743 
       
   744     mergePermute = VEC_MERGE_PERMUTE();
       
   745     v0 = vec_splat_u8(0);
       
   746     v1 = vec_splat_u16(1);
       
   747     v8 = vec_splat_u16(8);
       
   748 
       
   749     /* set the alpha to 255 on the destination surf */
       
   750     valphamask = VEC_ALPHA_MASK();
       
   751 
       
   752     vsrcPermute = calc_swizzle32(srcfmt, NULL);
       
   753     vdstPermute = calc_swizzle32(NULL, dstfmt);
       
   754     vsdstPermute = calc_swizzle32(dstfmt, NULL);
       
   755 
       
   756     /* set a vector full of alpha and 255-alpha */
       
   757     ((unsigned char *) &valpha)[0] = sA;
       
   758     valpha = vec_splat(valpha, 0);
       
   759     vbits = (vector unsigned char) vec_splat_s8(-1);
       
   760 
       
   761     ckey &= rgbmask;
       
   762     ((unsigned int *) (char *) &vckey)[0] = ckey;
       
   763     vckey = vec_splat(vckey, 0);
       
   764     ((unsigned int *) (char *) &vrgbmask)[0] = rgbmask;
       
   765     vrgbmask = vec_splat(vrgbmask, 0);
       
   766 
       
   767     while (height--) {
       
   768         int width = info->dst_w;
       
   769 #define ONE_PIXEL_BLEND(condition, widthvar) \
       
   770         while (condition) { \
       
   771             Uint32 Pixel; \
       
   772             unsigned sR, sG, sB, dR, dG, dB; \
       
   773             RETRIEVE_RGB_PIXEL(((Uint8 *)srcp), 4, Pixel); \
       
   774             if(sA && Pixel != ckey) { \
       
   775                 RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB); \
       
   776                 DISEMBLE_RGB(((Uint8 *)dstp), 4, dstfmt, Pixel, dR, dG, dB); \
       
   777                 ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \
       
   778                 ASSEMBLE_RGBA(((Uint8 *)dstp), 4, dstfmt, dR, dG, dB, dA); \
       
   779             } \
       
   780             dstp++; \
       
   781             srcp++; \
       
   782             widthvar--; \
       
   783         }
       
   784         ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
       
   785         if (width > 0) {
       
   786             int extrawidth = (width % 4);
       
   787             vector unsigned char valigner = VEC_ALIGNER(srcp);
       
   788             vector unsigned char vs = (vector unsigned char) vec_ld(0, srcp);
       
   789             width -= extrawidth;
       
   790             while (width) {
       
   791                 vector unsigned char vsel;
       
   792                 vector unsigned char voverflow;
       
   793                 vector unsigned char vd;
       
   794                 vector unsigned char vd_orig;
       
   795 
       
   796                 /* s = *srcp */
       
   797                 voverflow = (vector unsigned char) vec_ld(15, srcp);
       
   798                 vs = vec_perm(vs, voverflow, valigner);
       
   799 
       
   800                 /* vsel is set for items that match the key */
       
   801                 vsel =
       
   802                     (vector unsigned char) vec_and((vector unsigned int) vs,
       
   803                                                    vrgbmask);
       
   804                 vsel = (vector unsigned char) vec_cmpeq((vector unsigned int)
       
   805                                                         vsel, vckey);
       
   806 
       
   807                 /* permute to source format */
       
   808                 vs = vec_perm(vs, valpha, vsrcPermute);
       
   809 
       
   810                 /* d = *dstp */
       
   811                 vd = (vector unsigned char) vec_ld(0, dstp);
       
   812                 vd_orig = vd = vec_perm(vd, v0, vsdstPermute);
       
   813 
       
   814                 VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8);
       
   815 
       
   816                 /* set the alpha channel to full on */
       
   817                 vd = vec_or(vd, valphamask);
       
   818 
       
   819                 /* mask out color key */
       
   820                 vd = vec_sel(vd, vd_orig, vsel);
       
   821 
       
   822                 /* permute to dest format */
       
   823                 vd = vec_perm(vd, vbits, vdstPermute);
       
   824 
       
   825                 /* *dstp = res */
       
   826                 vec_st((vector unsigned int) vd, 0, dstp);
       
   827 
       
   828                 srcp += 4;
       
   829                 dstp += 4;
       
   830                 width -= 4;
       
   831                 vs = voverflow;
       
   832             }
       
   833             ONE_PIXEL_BLEND((extrawidth), extrawidth);
       
   834         }
       
   835 #undef ONE_PIXEL_BLEND
       
   836 
       
   837         srcp += srcskip;
       
   838         dstp += dstskip;
       
   839     }
       
   840 }
       
   841 
       
   842 
       
   843 static void
       
   844 Blit32to32PixelAlphaAltivec(SDL_BlitInfo * info)
       
   845 {
       
   846     int width = info->dst_w;
       
   847     int height = info->dst_h;
       
   848     Uint32 *srcp = (Uint32 *) info->src;
       
   849     int srcskip = info->src_skip >> 2;
       
   850     Uint32 *dstp = (Uint32 *) info->dst;
       
   851     int dstskip = info->dst_skip >> 2;
       
   852     SDL_PixelFormat *srcfmt = info->src_fmt;
       
   853     SDL_PixelFormat *dstfmt = info->dst_fmt;
       
   854     vector unsigned char mergePermute;
       
   855     vector unsigned char valphaPermute;
       
   856     vector unsigned char vsrcPermute;
       
   857     vector unsigned char vdstPermute;
       
   858     vector unsigned char vsdstPermute;
       
   859     vector unsigned char valphamask;
       
   860     vector unsigned char vpixelmask;
       
   861     vector unsigned char v0;
       
   862     vector unsigned short v1;
       
   863     vector unsigned short v8;
       
   864 
       
   865     v0 = vec_splat_u8(0);
       
   866     v1 = vec_splat_u16(1);
       
   867     v8 = vec_splat_u16(8);
       
   868     mergePermute = VEC_MERGE_PERMUTE();
       
   869     valphamask = VEC_ALPHA_MASK();
       
   870     valphaPermute = vec_and(vec_lvsl(0, (int *) NULL), vec_splat_u8(0xC));
       
   871     vpixelmask = vec_nor(valphamask, v0);
       
   872     vsrcPermute = calc_swizzle32(srcfmt, NULL);
       
   873     vdstPermute = calc_swizzle32(NULL, dstfmt);
       
   874     vsdstPermute = calc_swizzle32(dstfmt, NULL);
       
   875 
       
   876     while (height--) {
       
   877         width = info->dst_w;
       
   878 #define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \
       
   879             Uint32 Pixel; \
       
   880             unsigned sR, sG, sB, dR, dG, dB, sA, dA; \
       
   881             DISEMBLE_RGBA((Uint8 *)srcp, 4, srcfmt, Pixel, sR, sG, sB, sA); \
       
   882             if(sA) { \
       
   883               DISEMBLE_RGBA((Uint8 *)dstp, 4, dstfmt, Pixel, dR, dG, dB, dA); \
       
   884               ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \
       
   885               ASSEMBLE_RGBA((Uint8 *)dstp, 4, dstfmt, dR, dG, dB, dA); \
       
   886             } \
       
   887             ++srcp; \
       
   888             ++dstp; \
       
   889             widthvar--; \
       
   890         }
       
   891         ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
       
   892         if (width > 0) {
       
   893             /* vsrcPermute */
       
   894             /* vdstPermute */
       
   895             int extrawidth = (width % 4);
       
   896             vector unsigned char valigner = VEC_ALIGNER(srcp);
       
   897             vector unsigned char vs = (vector unsigned char) vec_ld(0, srcp);
       
   898             width -= extrawidth;
       
   899             while (width) {
       
   900                 vector unsigned char voverflow;
       
   901                 vector unsigned char vd;
       
   902                 vector unsigned char valpha;
       
   903                 vector unsigned char vdstalpha;
       
   904                 /* s = *srcp */
       
   905                 voverflow = (vector unsigned char) vec_ld(15, srcp);
       
   906                 vs = vec_perm(vs, voverflow, valigner);
       
   907                 vs = vec_perm(vs, v0, vsrcPermute);
       
   908 
       
   909                 valpha = vec_perm(vs, v0, valphaPermute);
       
   910 
       
   911                 /* d = *dstp */
       
   912                 vd = (vector unsigned char) vec_ld(0, dstp);
       
   913                 vd = vec_perm(vd, v0, vsdstPermute);
       
   914                 vdstalpha = vec_and(vd, valphamask);
       
   915 
       
   916                 VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8);
       
   917 
       
   918                 /* set the alpha to the dest alpha */
       
   919                 vd = vec_and(vd, vpixelmask);
       
   920                 vd = vec_or(vd, vdstalpha);
       
   921                 vd = vec_perm(vd, v0, vdstPermute);
       
   922 
       
   923                 /* *dstp = res */
       
   924                 vec_st((vector unsigned int) vd, 0, dstp);
       
   925 
       
   926                 srcp += 4;
       
   927                 dstp += 4;
       
   928                 width -= 4;
       
   929                 vs = voverflow;
       
   930 
       
   931             }
       
   932             ONE_PIXEL_BLEND((extrawidth), extrawidth);
       
   933         }
       
   934         srcp += srcskip;
       
   935         dstp += dstskip;
       
   936 #undef ONE_PIXEL_BLEND
       
   937     }
       
   938 }
       
   939 
       
   940 /* fast ARGB888->(A)RGB888 blending with pixel alpha */
       
   941 static void
       
   942 BlitRGBtoRGBPixelAlphaAltivec(SDL_BlitInfo * info)
       
   943 {
       
   944     int width = info->dst_w;
       
   945     int height = info->dst_h;
       
   946     Uint32 *srcp = (Uint32 *) info->src;
       
   947     int srcskip = info->src_skip >> 2;
       
   948     Uint32 *dstp = (Uint32 *) info->dst;
       
   949     int dstskip = info->dst_skip >> 2;
       
   950     vector unsigned char mergePermute;
       
   951     vector unsigned char valphaPermute;
       
   952     vector unsigned char valphamask;
       
   953     vector unsigned char vpixelmask;
       
   954     vector unsigned char v0;
       
   955     vector unsigned short v1;
       
   956     vector unsigned short v8;
       
   957     v0 = vec_splat_u8(0);
       
   958     v1 = vec_splat_u16(1);
       
   959     v8 = vec_splat_u16(8);
       
   960     mergePermute = VEC_MERGE_PERMUTE();
       
   961     valphamask = VEC_ALPHA_MASK();
       
   962     valphaPermute = vec_and(vec_lvsl(0, (int *) NULL), vec_splat_u8(0xC));
       
   963 
       
   964 
       
   965     vpixelmask = vec_nor(valphamask, v0);
       
   966     while (height--) {
       
   967         width = info->dst_w;
       
   968 #define ONE_PIXEL_BLEND(condition, widthvar) \
       
   969         while ((condition)) { \
       
   970             Uint32 dalpha; \
       
   971             Uint32 d; \
       
   972             Uint32 s1; \
       
   973             Uint32 d1; \
       
   974             Uint32 s = *srcp; \
       
   975             Uint32 alpha = s >> 24; \
       
   976             if(alpha) { \
       
   977               if(alpha == SDL_ALPHA_OPAQUE) { \
       
   978                 *dstp = (s & 0x00ffffff) | (*dstp & 0xff000000); \
       
   979               } else { \
       
   980                 d = *dstp; \
       
   981                 dalpha = d & 0xff000000; \
       
   982                 s1 = s & 0xff00ff; \
       
   983                 d1 = d & 0xff00ff; \
       
   984                 d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff; \
       
   985                 s &= 0xff00; \
       
   986                 d &= 0xff00; \
       
   987                 d = (d + ((s - d) * alpha >> 8)) & 0xff00; \
       
   988                 *dstp = d1 | d | dalpha; \
       
   989               } \
       
   990             } \
       
   991             ++srcp; \
       
   992             ++dstp; \
       
   993             widthvar--; \
       
   994 	    }
       
   995         ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
       
   996         if (width > 0) {
       
   997             int extrawidth = (width % 4);
       
   998             vector unsigned char valigner = VEC_ALIGNER(srcp);
       
   999             vector unsigned char vs = (vector unsigned char) vec_ld(0, srcp);
       
  1000             width -= extrawidth;
       
  1001             while (width) {
       
  1002                 vector unsigned char voverflow;
       
  1003                 vector unsigned char vd;
       
  1004                 vector unsigned char valpha;
       
  1005                 vector unsigned char vdstalpha;
       
  1006                 /* s = *srcp */
       
  1007                 voverflow = (vector unsigned char) vec_ld(15, srcp);
       
  1008                 vs = vec_perm(vs, voverflow, valigner);
       
  1009 
       
  1010                 valpha = vec_perm(vs, v0, valphaPermute);
       
  1011 
       
  1012                 /* d = *dstp */
       
  1013                 vd = (vector unsigned char) vec_ld(0, dstp);
       
  1014                 vdstalpha = vec_and(vd, valphamask);
       
  1015 
       
  1016                 VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8);
       
  1017 
       
  1018                 /* set the alpha to the dest alpha */
       
  1019                 vd = vec_and(vd, vpixelmask);
       
  1020                 vd = vec_or(vd, vdstalpha);
       
  1021 
       
  1022                 /* *dstp = res */
       
  1023                 vec_st((vector unsigned int) vd, 0, dstp);
       
  1024 
       
  1025                 srcp += 4;
       
  1026                 dstp += 4;
       
  1027                 width -= 4;
       
  1028                 vs = voverflow;
       
  1029             }
       
  1030             ONE_PIXEL_BLEND((extrawidth), extrawidth);
       
  1031         }
       
  1032         srcp += srcskip;
       
  1033         dstp += dstskip;
       
  1034     }
       
  1035 #undef ONE_PIXEL_BLEND
       
  1036 }
       
  1037 
       
  1038 static void
       
  1039 Blit32to32SurfaceAlphaAltivec(SDL_BlitInfo * info)
       
  1040 {
       
  1041     /* XXX : 6 */
       
  1042     int height = info->dst_h;
       
  1043     Uint32 *srcp = (Uint32 *) info->src;
       
  1044     int srcskip = info->src_skip >> 2;
       
  1045     Uint32 *dstp = (Uint32 *) info->dst;
       
  1046     int dstskip = info->dst_skip >> 2;
       
  1047     SDL_PixelFormat *srcfmt = info->src_fmt;
       
  1048     SDL_PixelFormat *dstfmt = info->dst_fmt;
       
  1049     unsigned sA = info->a;
       
  1050     unsigned dA = dstfmt->Amask ? SDL_ALPHA_OPAQUE : 0;
       
  1051     vector unsigned char mergePermute;
       
  1052     vector unsigned char vsrcPermute;
       
  1053     vector unsigned char vdstPermute;
       
  1054     vector unsigned char vsdstPermute;
       
  1055     vector unsigned char valpha;
       
  1056     vector unsigned char valphamask;
       
  1057     vector unsigned char vbits;
       
  1058     vector unsigned short v1;
       
  1059     vector unsigned short v8;
       
  1060 
       
  1061     mergePermute = VEC_MERGE_PERMUTE();
       
  1062     v1 = vec_splat_u16(1);
       
  1063     v8 = vec_splat_u16(8);
       
  1064 
       
  1065     /* set the alpha to 255 on the destination surf */
       
  1066     valphamask = VEC_ALPHA_MASK();
       
  1067 
       
  1068     vsrcPermute = calc_swizzle32(srcfmt, NULL);
       
  1069     vdstPermute = calc_swizzle32(NULL, dstfmt);
       
  1070     vsdstPermute = calc_swizzle32(dstfmt, NULL);
       
  1071 
       
  1072     /* set a vector full of alpha and 255-alpha */
       
  1073     ((unsigned char *) &valpha)[0] = sA;
       
  1074     valpha = vec_splat(valpha, 0);
       
  1075     vbits = (vector unsigned char) vec_splat_s8(-1);
       
  1076 
       
  1077     while (height--) {
       
  1078         int width = info->dst_w;
       
  1079 #define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \
       
  1080             Uint32 Pixel; \
       
  1081             unsigned sR, sG, sB, dR, dG, dB; \
       
  1082             DISEMBLE_RGB(((Uint8 *)srcp), 4, srcfmt, Pixel, sR, sG, sB); \
       
  1083             DISEMBLE_RGB(((Uint8 *)dstp), 4, dstfmt, Pixel, dR, dG, dB); \
       
  1084             ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \
       
  1085             ASSEMBLE_RGBA(((Uint8 *)dstp), 4, dstfmt, dR, dG, dB, dA); \
       
  1086             ++srcp; \
       
  1087             ++dstp; \
       
  1088             widthvar--; \
       
  1089         }
       
  1090         ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
       
  1091         if (width > 0) {
       
  1092             int extrawidth = (width % 4);
       
  1093             vector unsigned char valigner = VEC_ALIGNER(srcp);
       
  1094             vector unsigned char vs = (vector unsigned char) vec_ld(0, srcp);
       
  1095             width -= extrawidth;
       
  1096             while (width) {
       
  1097                 vector unsigned char voverflow;
       
  1098                 vector unsigned char vd;
       
  1099 
       
  1100                 /* s = *srcp */
       
  1101                 voverflow = (vector unsigned char) vec_ld(15, srcp);
       
  1102                 vs = vec_perm(vs, voverflow, valigner);
       
  1103                 vs = vec_perm(vs, valpha, vsrcPermute);
       
  1104 
       
  1105                 /* d = *dstp */
       
  1106                 vd = (vector unsigned char) vec_ld(0, dstp);
       
  1107                 vd = vec_perm(vd, vd, vsdstPermute);
       
  1108 
       
  1109                 VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8);
       
  1110 
       
  1111                 /* set the alpha channel to full on */
       
  1112                 vd = vec_or(vd, valphamask);
       
  1113                 vd = vec_perm(vd, vbits, vdstPermute);
       
  1114 
       
  1115                 /* *dstp = res */
       
  1116                 vec_st((vector unsigned int) vd, 0, dstp);
       
  1117 
       
  1118                 srcp += 4;
       
  1119                 dstp += 4;
       
  1120                 width -= 4;
       
  1121                 vs = voverflow;
       
  1122             }
       
  1123             ONE_PIXEL_BLEND((extrawidth), extrawidth);
       
  1124         }
       
  1125 #undef ONE_PIXEL_BLEND
       
  1126 
       
  1127         srcp += srcskip;
       
  1128         dstp += dstskip;
       
  1129     }
       
  1130 
       
  1131 }
       
  1132 
       
  1133 
       
  1134 /* fast RGB888->(A)RGB888 blending */
       
  1135 static void
       
  1136 BlitRGBtoRGBSurfaceAlphaAltivec(SDL_BlitInfo * info)
       
  1137 {
       
  1138     unsigned alpha = info->a;
       
  1139     int height = info->dst_h;
       
  1140     Uint32 *srcp = (Uint32 *) info->src;
       
  1141     int srcskip = info->src_skip >> 2;
       
  1142     Uint32 *dstp = (Uint32 *) info->dst;
       
  1143     int dstskip = info->dst_skip >> 2;
       
  1144     vector unsigned char mergePermute;
       
  1145     vector unsigned char valpha;
       
  1146     vector unsigned char valphamask;
       
  1147     vector unsigned short v1;
       
  1148     vector unsigned short v8;
       
  1149 
       
  1150     mergePermute = VEC_MERGE_PERMUTE();
       
  1151     v1 = vec_splat_u16(1);
       
  1152     v8 = vec_splat_u16(8);
       
  1153 
       
  1154     /* set the alpha to 255 on the destination surf */
       
  1155     valphamask = VEC_ALPHA_MASK();
       
  1156 
       
  1157     /* set a vector full of alpha and 255-alpha */
       
  1158     ((unsigned char *) &valpha)[0] = alpha;
       
  1159     valpha = vec_splat(valpha, 0);
       
  1160 
       
  1161     while (height--) {
       
  1162         int width = info->dst_w;
       
  1163 #define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \
       
  1164             Uint32 s = *srcp; \
       
  1165             Uint32 d = *dstp; \
       
  1166             Uint32 s1 = s & 0xff00ff; \
       
  1167             Uint32 d1 = d & 0xff00ff; \
       
  1168             d1 = (d1 + ((s1 - d1) * alpha >> 8)) \
       
  1169                  & 0xff00ff; \
       
  1170             s &= 0xff00; \
       
  1171             d &= 0xff00; \
       
  1172             d = (d + ((s - d) * alpha >> 8)) & 0xff00; \
       
  1173             *dstp = d1 | d | 0xff000000; \
       
  1174             ++srcp; \
       
  1175             ++dstp; \
       
  1176             widthvar--; \
       
  1177         }
       
  1178         ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
       
  1179         if (width > 0) {
       
  1180             int extrawidth = (width % 4);
       
  1181             vector unsigned char valigner = VEC_ALIGNER(srcp);
       
  1182             vector unsigned char vs = (vector unsigned char) vec_ld(0, srcp);
       
  1183             width -= extrawidth;
       
  1184             while (width) {
       
  1185                 vector unsigned char voverflow;
       
  1186                 vector unsigned char vd;
       
  1187 
       
  1188                 /* s = *srcp */
       
  1189                 voverflow = (vector unsigned char) vec_ld(15, srcp);
       
  1190                 vs = vec_perm(vs, voverflow, valigner);
       
  1191 
       
  1192                 /* d = *dstp */
       
  1193                 vd = (vector unsigned char) vec_ld(0, dstp);
       
  1194 
       
  1195                 VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8);
       
  1196 
       
  1197                 /* set the alpha channel to full on */
       
  1198                 vd = vec_or(vd, valphamask);
       
  1199 
       
  1200                 /* *dstp = res */
       
  1201                 vec_st((vector unsigned int) vd, 0, dstp);
       
  1202 
       
  1203                 srcp += 4;
       
  1204                 dstp += 4;
       
  1205                 width -= 4;
       
  1206                 vs = voverflow;
       
  1207             }
       
  1208             ONE_PIXEL_BLEND((extrawidth), extrawidth);
       
  1209         }
       
  1210 #undef ONE_PIXEL_BLEND
       
  1211 
       
  1212         srcp += srcskip;
       
  1213         dstp += dstskip;
       
  1214     }
       
  1215 }
       
  1216 
       
  1217 #if __MWERKS__
       
  1218 #pragma altivec_model off
       
  1219 #endif
       
  1220 #endif /* SDL_ALTIVEC_BLITTERS */
       
  1221 
       
  1222 /* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */
   422 /* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */
  1223 static void
   423 static void
  1224 BlitRGBtoRGBSurfaceAlpha128(SDL_BlitInfo * info)
   424 BlitRGBtoRGBSurfaceAlpha128(SDL_BlitInfo * info)
  1225 {
   425 {
  1226     int width = info->dst_w;
   426     int width = info->dst_w;
  1335 	    /* *INDENT-ON* */
   535 	    /* *INDENT-ON* */
  1336         srcp += srcskip;
   536         srcp += srcskip;
  1337         dstp += dstskip;
   537         dstp += dstskip;
  1338     }
   538     }
  1339 }
   539 }
  1340 
       
  1341 #ifdef __3dNOW__
       
  1342 /* fast (as in MMX with prefetch) ARGB888->(A)RGB888 blending with pixel alpha */
       
  1343 static void
       
  1344 BlitRGBtoRGBPixelAlphaMMX3DNOW(SDL_BlitInfo * info)
       
  1345 {
       
  1346     int width = info->dst_w;
       
  1347     int height = info->dst_h;
       
  1348     Uint32 *srcp = (Uint32 *) info->src;
       
  1349     int srcskip = info->src_skip >> 2;
       
  1350     Uint32 *dstp = (Uint32 *) info->dst;
       
  1351     int dstskip = info->dst_skip >> 2;
       
  1352     SDL_PixelFormat *sf = info->src_fmt;
       
  1353     Uint32 chanmask = sf->Rmask | sf->Gmask | sf->Bmask;
       
  1354     Uint32 amask = sf->Amask;
       
  1355     Uint32 ashift = sf->Ashift;
       
  1356     Uint64 multmask;
       
  1357 
       
  1358     __m64 src1, dst1, mm_alpha, mm_zero, dmask;
       
  1359 
       
  1360     mm_zero = _mm_setzero_si64();       /* 0 -> mm_zero */
       
  1361     multmask = 0xFFFF;
       
  1362     multmask <<= (ashift * 2);
       
  1363     multmask = ~multmask;
       
  1364     dmask = *(__m64 *) & multmask;      /* dst alpha mask -> dmask */
       
  1365 
       
  1366     while (height--) {
       
  1367 	    /* *INDENT-OFF* */
       
  1368 	    DUFFS_LOOP4({
       
  1369 		Uint32 alpha;
       
  1370 
       
  1371 		_m_prefetch(srcp + 16);
       
  1372 		_m_prefetch(dstp + 16);
       
  1373 
       
  1374 		alpha = *srcp & amask;
       
  1375 		if (alpha == 0) {
       
  1376 			/* do nothing */
       
  1377 		} else if (alpha == amask) {
       
  1378 			/* copy RGB, keep dst alpha */
       
  1379 			*dstp = (*srcp & chanmask) | (*dstp & ~chanmask);
       
  1380 		} else {
       
  1381 			src1 = _mm_cvtsi32_si64(*srcp); /* src(ARGB) -> src1 (0000ARGB)*/
       
  1382 			src1 = _mm_unpacklo_pi8(src1, mm_zero); /* 0A0R0G0B -> src1 */
       
  1383 
       
  1384 			dst1 = _mm_cvtsi32_si64(*dstp); /* dst(ARGB) -> dst1 (0000ARGB)*/
       
  1385 			dst1 = _mm_unpacklo_pi8(dst1, mm_zero); /* 0A0R0G0B -> dst1 */
       
  1386 
       
  1387 			mm_alpha = _mm_cvtsi32_si64(alpha); /* alpha -> mm_alpha (0000000A) */
       
  1388 			mm_alpha = _mm_srli_si64(mm_alpha, ashift); /* mm_alpha >> ashift -> mm_alpha(0000000A) */
       
  1389 			mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */
       
  1390 			mm_alpha = _mm_unpacklo_pi32(mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha */
       
  1391 			mm_alpha = _mm_and_si64(mm_alpha, dmask); /* 000A0A0A -> mm_alpha, preserve dst alpha on add */
       
  1392 
       
  1393 			/* blend */		    
       
  1394 			src1 = _mm_sub_pi16(src1, dst1);/* src - dst -> src1 */
       
  1395 			src1 = _mm_mullo_pi16(src1, mm_alpha); /* (src - dst) * alpha -> src1 */
       
  1396 			src1 = _mm_srli_pi16(src1, 8); /* src1 >> 8 -> src1(000R0G0B) */
       
  1397 			dst1 = _mm_add_pi8(src1, dst1); /* src1 + dst1(dst) -> dst1(0A0R0G0B) */
       
  1398 			dst1 = _mm_packs_pu16(dst1, mm_zero);  /* 0000ARGB -> dst1 */
       
  1399 			
       
  1400 			*dstp = _mm_cvtsi64_si32(dst1); /* dst1 -> pixel */
       
  1401 		}
       
  1402 		++srcp;
       
  1403 		++dstp;
       
  1404 	    }, width);
       
  1405 	    /* *INDENT-ON* */
       
  1406         srcp += srcskip;
       
  1407         dstp += dstskip;
       
  1408     }
       
  1409     _mm_empty();
       
  1410 }
       
  1411 
       
  1412 #endif /* __MMX__ */
       
  1413 
   540 
  1414 /* 16bpp special case for per-surface alpha=50%: blend 2 pixels in parallel */
   541 /* 16bpp special case for per-surface alpha=50%: blend 2 pixels in parallel */
  1415 
   542 
  1416 /* blend a single 16 bit pixel at 50% */
   543 /* blend a single 16 bit pixel at 50% */
  1417 #define BLEND16_50(d, s, mask)						\
   544 #define BLEND16_50(d, s, mask)						\
  2128         switch (df->BytesPerPixel) {
  1255         switch (df->BytesPerPixel) {
  2129         case 1:
  1256         case 1:
  2130             return BlitNto1PixelAlpha;
  1257             return BlitNto1PixelAlpha;
  2131 
  1258 
  2132         case 2:
  1259         case 2:
  2133 #if SDL_ALTIVEC_BLITTERS
  1260             if (sf->BytesPerPixel == 4 && sf->Amask == 0xff000000
  2134             if (sf->BytesPerPixel == 4
  1261                 && sf->Gmask == 0xff00
  2135                 && df->Gmask == 0x7e0 && df->Bmask == 0x1f
  1262                 && ((sf->Rmask == 0xff && df->Rmask == 0x1f)
  2136                 && SDL_HasAltiVec())
  1263                     || (sf->Bmask == 0xff && df->Bmask == 0x1f))) {
  2137                 return Blit32to565PixelAlphaAltivec;
       
  2138             else
       
  2139 #endif
       
  2140                 if (sf->BytesPerPixel == 4 && sf->Amask == 0xff000000
       
  2141                     && sf->Gmask == 0xff00
       
  2142                     && ((sf->Rmask == 0xff && df->Rmask == 0x1f)
       
  2143                         || (sf->Bmask == 0xff && df->Bmask == 0x1f))) {
       
  2144                 if (df->Gmask == 0x7e0)
  1264                 if (df->Gmask == 0x7e0)
  2145                     return BlitARGBto565PixelAlpha;
  1265                     return BlitARGBto565PixelAlpha;
  2146                 else if (df->Gmask == 0x3e0)
  1266                 else if (df->Gmask == 0x3e0)
  2147                     return BlitARGBto555PixelAlpha;
  1267                     return BlitARGBto555PixelAlpha;
  2148             }
  1268             }
  2150 
  1270 
  2151         case 4:
  1271         case 4:
  2152             if (sf->Rmask == df->Rmask
  1272             if (sf->Rmask == df->Rmask
  2153                 && sf->Gmask == df->Gmask
  1273                 && sf->Gmask == df->Gmask
  2154                 && sf->Bmask == df->Bmask && sf->BytesPerPixel == 4) {
  1274                 && sf->Bmask == df->Bmask && sf->BytesPerPixel == 4) {
  2155 #if defined(__MMX__) || defined(__3dNOW__)
  1275 #if defined(__MMX__)
  2156                 if (sf->Rshift % 8 == 0
  1276                 if (sf->Rshift % 8 == 0
  2157                     && sf->Gshift % 8 == 0
  1277                     && sf->Gshift % 8 == 0
  2158                     && sf->Bshift % 8 == 0
  1278                     && sf->Bshift % 8 == 0
  2159                     && sf->Ashift % 8 == 0 && sf->Aloss == 0) {
  1279                     && sf->Ashift % 8 == 0 && sf->Aloss == 0) {
  2160 #ifdef __3dNOW__
       
  2161                     if (SDL_Has3DNow())
       
  2162                         return BlitRGBtoRGBPixelAlphaMMX3DNOW;
       
  2163 #endif
       
  2164 #ifdef __MMX__
       
  2165                     if (SDL_HasMMX())
  1280                     if (SDL_HasMMX())
  2166                         return BlitRGBtoRGBPixelAlphaMMX;
  1281                         return BlitRGBtoRGBPixelAlphaMMX;
  2167 #endif
       
  2168                 }
  1282                 }
  2169 #endif /* __MMX__ || __3dNOW__ */
  1283 #endif /* __MMX__ */
  2170                 if (sf->Amask == 0xff000000) {
  1284                 if (sf->Amask == 0xff000000) {
  2171 #if SDL_ALTIVEC_BLITTERS
       
  2172                     if (SDL_HasAltiVec())
       
  2173                         return BlitRGBtoRGBPixelAlphaAltivec;
       
  2174 #endif
       
  2175                     return BlitRGBtoRGBPixelAlpha;
  1285                     return BlitRGBtoRGBPixelAlpha;
  2176                 }
  1286                 }
  2177             }
  1287             }
  2178 #if SDL_ALTIVEC_BLITTERS
  1288             return BlitNtoNPixelAlpha;
  2179             if (sf->Amask && sf->BytesPerPixel == 4 && SDL_HasAltiVec())
       
  2180                 return Blit32to32PixelAlphaAltivec;
       
  2181             else
       
  2182 #endif
       
  2183                 return BlitNtoNPixelAlpha;
       
  2184 
  1289 
  2185         case 3:
  1290         case 3:
  2186         default:
  1291         default:
  2187             return BlitNtoNPixelAlpha;
  1292             return BlitNtoNPixelAlpha;
  2188         }
  1293         }
  2224                         && sf->Gshift % 8 == 0
  1329                         && sf->Gshift % 8 == 0
  2225                         && sf->Bshift % 8 == 0 && SDL_HasMMX())
  1330                         && sf->Bshift % 8 == 0 && SDL_HasMMX())
  2226                         return BlitRGBtoRGBSurfaceAlphaMMX;
  1331                         return BlitRGBtoRGBSurfaceAlphaMMX;
  2227 #endif
  1332 #endif
  2228                     if ((sf->Rmask | sf->Gmask | sf->Bmask) == 0xffffff) {
  1333                     if ((sf->Rmask | sf->Gmask | sf->Bmask) == 0xffffff) {
  2229 #if SDL_ALTIVEC_BLITTERS
       
  2230                         if (SDL_HasAltiVec())
       
  2231                             return BlitRGBtoRGBSurfaceAlphaAltivec;
       
  2232 #endif
       
  2233                         return BlitRGBtoRGBSurfaceAlpha;
  1334                         return BlitRGBtoRGBSurfaceAlpha;
  2234                     }
  1335                     }
  2235                 }
  1336                 }
  2236 #if SDL_ALTIVEC_BLITTERS
  1337                 return BlitNtoNSurfaceAlpha;
  2237                 if ((sf->BytesPerPixel == 4) && SDL_HasAltiVec())
       
  2238                     return Blit32to32SurfaceAlphaAltivec;
       
  2239                 else
       
  2240 #endif
       
  2241                     return BlitNtoNSurfaceAlpha;
       
  2242 
  1338 
  2243             case 3:
  1339             case 3:
  2244             default:
  1340             default:
  2245                 return BlitNtoNSurfaceAlpha;
  1341                 return BlitNtoNSurfaceAlpha;
  2246             }
  1342             }
  2250     case SDL_COPY_COLORKEY | SDL_COPY_MODULATE_ALPHA | SDL_COPY_BLEND:
  1346     case SDL_COPY_COLORKEY | SDL_COPY_MODULATE_ALPHA | SDL_COPY_BLEND:
  2251         if (sf->Amask == 0) {
  1347         if (sf->Amask == 0) {
  2252             if (df->BytesPerPixel == 1)
  1348             if (df->BytesPerPixel == 1)
  2253                 return BlitNto1SurfaceAlphaKey;
  1349                 return BlitNto1SurfaceAlphaKey;
  2254             else
  1350             else
  2255 #if SDL_ALTIVEC_BLITTERS
       
  2256             if (sf->BytesPerPixel == 4 && df->BytesPerPixel == 4 &&
       
  2257                     SDL_HasAltiVec())
       
  2258                 return Blit32to32SurfaceAlphaKeyAltivec;
       
  2259             else
       
  2260 #endif
       
  2261                 return BlitNtoNSurfaceAlphaKey;
  1351                 return BlitNtoNSurfaceAlphaKey;
  2262         }
  1352         }
  2263         break;
  1353         break;
  2264     }
  1354     }
  2265 
  1355