From c53b3008cba1dc669584398d06f0e30c392c4dce Mon Sep 17 00:00:00 2001 From: "Ryan C. Gordon" Date: Thu, 8 Sep 2005 07:20:59 +0000 Subject: [PATCH] Altivec blitter for 555 -> 8888 surface, written by me. --ryan. --- src/video/SDL_blit_N.c | 147 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 147 insertions(+) diff --git a/src/video/SDL_blit_N.c b/src/video/SDL_blit_N.c index 8bf27dd2a..e008e4ff3 100644 --- a/src/video/SDL_blit_N.c +++ b/src/video/SDL_blit_N.c @@ -382,6 +382,151 @@ static void Blit_RGB565_32Altivec(SDL_BlitInfo *info) { } + +static void Blit_RGB555_32Altivec(SDL_BlitInfo *info) { + int height = info->d_height; + Uint8 *src = (Uint8 *) info->s_pixels; + int srcskip = info->s_skip; + Uint8 *dst = (Uint8 *) info->d_pixels; + int dstskip = info->d_skip; + SDL_PixelFormat *srcfmt = info->src; + SDL_PixelFormat *dstfmt = info->dst; + unsigned alpha; + vector unsigned char valpha; + vector unsigned char vpermute; + vector unsigned short vf800; + vector unsigned int v8 = vec_splat_u32(8); + vector unsigned int v16 = vec_add(v8, v8); + vector unsigned short v1 = vec_splat_u16(1); + vector unsigned short v3 = vec_splat_u16(3); + /* + 0x10 - 0x1f is the alpha + 0x00 - 0x0e evens are the red + 0x01 - 0x0f odds are zero + */ + vector unsigned char vredalpha1 = (vector unsigned char)( + 0x10, 0x00, 0x01, 0x01, + 0x10, 0x02, 0x01, 0x01, + 0x10, 0x04, 0x01, 0x01, + 0x10, 0x06, 0x01, 0x01 + ); + vector unsigned char vredalpha2 = (vector unsigned char)( + vec_add((vector unsigned int)vredalpha1, vec_sl(v8, v16)) + ); + /* + 0x00 - 0x0f is ARxx ARxx ARxx ARxx + 0x11 - 0x0f odds are blue + */ + vector unsigned char vblue1 = (vector unsigned char)( + 0x00, 0x01, 0x02, 0x11, + 0x04, 0x05, 0x06, 0x13, + 0x08, 0x09, 0x0a, 0x15, + 0x0c, 0x0d, 0x0e, 0x17 + ); + vector unsigned char vblue2 = (vector unsigned char)( + vec_add((vector unsigned int)vblue1, v8) + ); + /* + 0x00 - 0x0f is ARxB ARxB ARxB ARxB + 0x10 - 0x0e evens are green + */ + vector unsigned char vgreen1 = (vector unsigned char)( + 0x00, 0x01, 0x10, 0x03, + 0x04, 0x05, 0x12, 0x07, + 0x08, 0x09, 0x14, 0x0b, + 0x0c, 0x0d, 0x16, 0x0f + ); + vector unsigned char vgreen2 = (vector unsigned char)( + vec_add((vector unsigned int)vgreen1, vec_sl(v8, v8)) + ); + + + assert(srcfmt->BytesPerPixel == 2); + assert(dstfmt->BytesPerPixel == 4); + + vf800 = (vector unsigned short)vec_splat_u8(-7); + vf800 = vec_sl(vf800, vec_splat_u16(8)); + + if (dstfmt->Amask && srcfmt->alpha) { + ((unsigned char *)&valpha)[0] = alpha = srcfmt->alpha; + valpha = vec_splat(valpha, 0); + } else { + alpha = 0; + valpha = vec_splat_u8(0); + } + + vpermute = calc_swizzle32(NULL, dstfmt); + while (height--) { + vector unsigned char valigner; + vector unsigned char voverflow; + vector unsigned char vsrc; + + int width = info->d_width; + int extrawidth; + + /* do scalar until we can align... */ +#define ONE_PIXEL_BLEND(condition, widthvar) \ + while (condition) { \ + unsigned sR, sG, sB; \ + unsigned short pixel = *((unsigned short *)src); \ + sR = (pixel >> 7) & 0xf8; \ + sG = (pixel >> 2) & 0xf8; \ + sB = (pixel << 3) & 0xf8; \ + ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \ + src += 2; \ + dst += 4; \ + widthvar--; \ + } + ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width); + + /* After all that work, here's the vector part! */ + extrawidth = (width % 8); /* trailing unaligned stores */ + width -= extrawidth; + vsrc = vec_ld(0, src); + valigner = VEC_ALIGNER(src); + + while (width) { + vector unsigned short vR, vG, vB; + vector unsigned char vdst1, vdst2; + + voverflow = vec_ld(15, src); + vsrc = vec_perm(vsrc, voverflow, valigner); + + vR = vec_and(vec_sl((vector unsigned short)vsrc,v1), vf800); + vB = vec_sl((vector unsigned short)vsrc, v3); + vG = vec_sl(vB, v3); + + vdst1 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha1); + vdst1 = vec_perm(vdst1, (vector unsigned char)vB, vblue1); + vdst1 = vec_perm(vdst1, (vector unsigned char)vG, vgreen1); + vdst1 = vec_perm(vdst1, valpha, vpermute); + vec_st(vdst1, 0, dst); + + vdst2 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha2); + vdst2 = vec_perm(vdst2, (vector unsigned char)vB, vblue2); + vdst2 = vec_perm(vdst2, (vector unsigned char)vG, vgreen2); + vdst2 = vec_perm(vdst2, valpha, vpermute); + vec_st(vdst2, 16, dst); + + width -= 8; + dst += 32; + src += 16; + vsrc = voverflow; + } + + assert(width == 0); + + + /* do scalar until we can align... */ + ONE_PIXEL_BLEND((extrawidth), extrawidth); +#undef ONE_PIXEL_BLEND + + src += srcskip; /* move to next row, accounting for pitch. */ + dst += dstskip; + } + +} + static void BlitNtoNKey(SDL_BlitInfo *info); static void BlitNtoNKeyCopyAlpha(SDL_BlitInfo *info); static void Blit32to32KeyAltivec(SDL_BlitInfo *info) @@ -2090,6 +2235,8 @@ static const struct blit_table normal_blit_2[] = { /* has-altivec */ { 0x0000F800,0x000007E0,0x0000001F, 4, 0x00000000,0x00000000,0x00000000, 2, NULL, Blit_RGB565_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA }, + { 0x00007C00,0x000003E0,0x0000001F, 4, 0x00000000,0x00000000,0x00000000, + 2, NULL, Blit_RGB555_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA }, #endif { 0x0000F800,0x000007E0,0x0000001F, 4, 0x00FF0000,0x0000FF00,0x000000FF, 0, NULL, Blit_RGB565_ARGB8888, SET_ALPHA },