Altivec blitter for 555 -> 8888 surface, written by me.
authorRyan C. Gordon <icculus@icculus.org>
Thu, 08 Sep 2005 07:20:59 +0000
changeset 1139d0ae4dff7208
parent 1138 fcfb783a3ca2
child 1140 af8b0f9ac2f4
Altivec blitter for 555 -> 8888 surface, written by me.

--ryan.
src/video/SDL_blit_N.c
     1.1 --- a/src/video/SDL_blit_N.c	Thu Sep 08 07:15:44 2005 +0000
     1.2 +++ b/src/video/SDL_blit_N.c	Thu Sep 08 07:20:59 2005 +0000
     1.3 @@ -382,6 +382,151 @@
     1.4  
     1.5  }
     1.6  
     1.7 +
     1.8 +static void Blit_RGB555_32Altivec(SDL_BlitInfo *info) {
     1.9 +    int height = info->d_height;
    1.10 +    Uint8 *src = (Uint8 *) info->s_pixels;
    1.11 +    int srcskip = info->s_skip;
    1.12 +    Uint8 *dst = (Uint8 *) info->d_pixels;
    1.13 +    int dstskip = info->d_skip;
    1.14 +    SDL_PixelFormat *srcfmt = info->src;
    1.15 +    SDL_PixelFormat *dstfmt = info->dst;
    1.16 +    unsigned alpha;
    1.17 +    vector unsigned char valpha;
    1.18 +    vector unsigned char vpermute;
    1.19 +    vector unsigned short vf800;
    1.20 +    vector unsigned int v8 = vec_splat_u32(8);
    1.21 +    vector unsigned int v16 = vec_add(v8, v8);
    1.22 +    vector unsigned short v1 = vec_splat_u16(1);
    1.23 +    vector unsigned short v3 = vec_splat_u16(3);
    1.24 +    /* 
    1.25 +        0x10 - 0x1f is the alpha
    1.26 +        0x00 - 0x0e evens are the red
    1.27 +        0x01 - 0x0f odds are zero
    1.28 +    */
    1.29 +    vector unsigned char vredalpha1 = (vector unsigned char)(
    1.30 +        0x10, 0x00, 0x01, 0x01,
    1.31 +        0x10, 0x02, 0x01, 0x01,
    1.32 +        0x10, 0x04, 0x01, 0x01,
    1.33 +        0x10, 0x06, 0x01, 0x01
    1.34 +    );
    1.35 +    vector unsigned char vredalpha2 = (vector unsigned char)(
    1.36 +        vec_add((vector unsigned int)vredalpha1, vec_sl(v8, v16))
    1.37 +    );
    1.38 +    /*
    1.39 +        0x00 - 0x0f is ARxx ARxx ARxx ARxx
    1.40 +        0x11 - 0x0f odds are blue
    1.41 +    */
    1.42 +    vector unsigned char vblue1 = (vector unsigned char)(
    1.43 +        0x00, 0x01, 0x02, 0x11,
    1.44 +        0x04, 0x05, 0x06, 0x13,
    1.45 +        0x08, 0x09, 0x0a, 0x15,
    1.46 +        0x0c, 0x0d, 0x0e, 0x17
    1.47 +    );
    1.48 +    vector unsigned char vblue2 = (vector unsigned char)(
    1.49 +        vec_add((vector unsigned int)vblue1, v8)
    1.50 +    );
    1.51 +    /*
    1.52 +        0x00 - 0x0f is ARxB ARxB ARxB ARxB
    1.53 +        0x10 - 0x0e evens are green
    1.54 +    */
    1.55 +    vector unsigned char vgreen1 = (vector unsigned char)(
    1.56 +        0x00, 0x01, 0x10, 0x03,
    1.57 +        0x04, 0x05, 0x12, 0x07,
    1.58 +        0x08, 0x09, 0x14, 0x0b,
    1.59 +        0x0c, 0x0d, 0x16, 0x0f
    1.60 +    );
    1.61 +    vector unsigned char vgreen2 = (vector unsigned char)(
    1.62 +        vec_add((vector unsigned int)vgreen1, vec_sl(v8, v8))
    1.63 +    );
    1.64 +    
    1.65 +
    1.66 +    assert(srcfmt->BytesPerPixel == 2);
    1.67 +    assert(dstfmt->BytesPerPixel == 4);
    1.68 +
    1.69 +    vf800 = (vector unsigned short)vec_splat_u8(-7);
    1.70 +    vf800 = vec_sl(vf800, vec_splat_u16(8));
    1.71 +
    1.72 +    if (dstfmt->Amask && srcfmt->alpha) {
    1.73 +        ((unsigned char *)&valpha)[0] = alpha = srcfmt->alpha;
    1.74 +        valpha = vec_splat(valpha, 0);
    1.75 +    } else {
    1.76 +        alpha = 0;
    1.77 +        valpha = vec_splat_u8(0);
    1.78 +    }
    1.79 +
    1.80 +    vpermute = calc_swizzle32(NULL, dstfmt);
    1.81 +    while (height--) {
    1.82 +        vector unsigned char valigner;
    1.83 +        vector unsigned char voverflow;
    1.84 +        vector unsigned char vsrc;
    1.85 +
    1.86 +        int width = info->d_width;
    1.87 +        int extrawidth;
    1.88 +
    1.89 +        /* do scalar until we can align... */
    1.90 +#define ONE_PIXEL_BLEND(condition, widthvar) \
    1.91 +        while (condition) { \
    1.92 +            unsigned sR, sG, sB; \
    1.93 +            unsigned short pixel = *((unsigned short *)src); \
    1.94 +            sR = (pixel >> 7) & 0xf8; \
    1.95 +            sG = (pixel >> 2) & 0xf8; \
    1.96 +            sB = (pixel << 3) & 0xf8; \
    1.97 +            ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
    1.98 +            src += 2; \
    1.99 +            dst += 4; \
   1.100 +            widthvar--; \
   1.101 +        }
   1.102 +        ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
   1.103 +
   1.104 +        /* After all that work, here's the vector part! */
   1.105 +        extrawidth = (width % 8);  /* trailing unaligned stores */
   1.106 +        width -= extrawidth;
   1.107 +        vsrc = vec_ld(0, src);
   1.108 +        valigner = VEC_ALIGNER(src);
   1.109 +
   1.110 +        while (width) {
   1.111 +            vector unsigned short vR, vG, vB;
   1.112 +            vector unsigned char vdst1, vdst2;
   1.113 +
   1.114 +            voverflow = vec_ld(15, src);
   1.115 +            vsrc = vec_perm(vsrc, voverflow, valigner);
   1.116 +
   1.117 +            vR = vec_and(vec_sl((vector unsigned short)vsrc,v1), vf800);
   1.118 +            vB = vec_sl((vector unsigned short)vsrc, v3);
   1.119 +            vG = vec_sl(vB, v3);
   1.120 +
   1.121 +            vdst1 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha1);
   1.122 +            vdst1 = vec_perm(vdst1, (vector unsigned char)vB, vblue1);
   1.123 +            vdst1 = vec_perm(vdst1, (vector unsigned char)vG, vgreen1);
   1.124 +            vdst1 = vec_perm(vdst1, valpha, vpermute);
   1.125 +            vec_st(vdst1, 0, dst);
   1.126 +
   1.127 +            vdst2 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha2);
   1.128 +            vdst2 = vec_perm(vdst2, (vector unsigned char)vB, vblue2);
   1.129 +            vdst2 = vec_perm(vdst2, (vector unsigned char)vG, vgreen2);
   1.130 +            vdst2 = vec_perm(vdst2, valpha, vpermute);
   1.131 +            vec_st(vdst2, 16, dst);
   1.132 +            
   1.133 +            width -= 8;
   1.134 +            dst += 32;
   1.135 +            src += 16;
   1.136 +            vsrc = voverflow;
   1.137 +        }
   1.138 +
   1.139 +        assert(width == 0);
   1.140 +
   1.141 +
   1.142 +        /* do scalar until we can align... */
   1.143 +        ONE_PIXEL_BLEND((extrawidth), extrawidth);
   1.144 +#undef ONE_PIXEL_BLEND
   1.145 +
   1.146 +        src += srcskip;  /* move to next row, accounting for pitch. */
   1.147 +        dst += dstskip;
   1.148 +    }
   1.149 +
   1.150 +}
   1.151 +
   1.152  static void BlitNtoNKey(SDL_BlitInfo *info);
   1.153  static void BlitNtoNKeyCopyAlpha(SDL_BlitInfo *info);
   1.154  static void Blit32to32KeyAltivec(SDL_BlitInfo *info)
   1.155 @@ -2090,6 +2235,8 @@
   1.156      /* has-altivec */
   1.157      { 0x0000F800,0x000007E0,0x0000001F, 4, 0x00000000,0x00000000,0x00000000,
   1.158        2, NULL, Blit_RGB565_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
   1.159 +    { 0x00007C00,0x000003E0,0x0000001F, 4, 0x00000000,0x00000000,0x00000000,
   1.160 +      2, NULL, Blit_RGB555_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
   1.161  #endif
   1.162      { 0x0000F800,0x000007E0,0x0000001F, 4, 0x00FF0000,0x0000FF00,0x000000FF,
   1.163        0, NULL, Blit_RGB565_ARGB8888, SET_ALPHA },