src/video/ps2gs/SDL_gsyuv.c
changeset 70 f590dd383b5d
child 136 717f739d6ec1
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/video/ps2gs/SDL_gsyuv.c	Sat Jun 16 03:17:45 2001 +0000
     1.3 @@ -0,0 +1,467 @@
     1.4 +/*
     1.5 +    SDL - Simple DirectMedia Layer
     1.6 +    Copyright (C) 1997, 1998, 1999, 2000  Sam Lantinga
     1.7 +
     1.8 +    This library is free software; you can redistribute it and/or
     1.9 +    modify it under the terms of the GNU Library General Public
    1.10 +    License as published by the Free Software Foundation; either
    1.11 +    version 2 of the License, or (at your option) any later version.
    1.12 +
    1.13 +    This library is distributed in the hope that it will be useful,
    1.14 +    but WITHOUT ANY WARRANTY; without even the implied warranty of
    1.15 +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    1.16 +    Library General Public License for more details.
    1.17 +
    1.18 +    You should have received a copy of the GNU Library General Public
    1.19 +    License along with this library; if not, write to the Free
    1.20 +    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
    1.21 +
    1.22 +    Sam Lantinga
    1.23 +    slouken@devolution.com
    1.24 +*/
    1.25 +
    1.26 +#ifdef SAVE_RCSID
    1.27 +static char rcsid =
    1.28 + "@(#) $Id$";
    1.29 +#endif
    1.30 +
    1.31 +/* This is the Playstation 2 implementation of YUV video overlays */
    1.32 +
    1.33 +#include <stdlib.h>
    1.34 +#include <string.h>
    1.35 +#include <fcntl.h>
    1.36 +#include <unistd.h>
    1.37 +#include <sys/ioctl.h>
    1.38 +#include <sys/mman.h>
    1.39 +#include <asm/page.h>		/* For definition of PAGE_SIZE */
    1.40 +
    1.41 +#include "SDL_error.h"
    1.42 +#include "SDL_video.h"
    1.43 +#include "SDL_gsyuv_c.h"
    1.44 +#include "SDL_yuvfuncs.h"
    1.45 +
    1.46 +/* The maximum number of 16x16 pixel block converted at once */
    1.47 +#define MAX_MACROBLOCKS	1024	/* 2^10 macroblocks at once */
    1.48 +
    1.49 +/* The functions used to manipulate video overlays */
    1.50 +static struct private_yuvhwfuncs gs_yuvfuncs = {
    1.51 +	GS_LockYUVOverlay,
    1.52 +	GS_UnlockYUVOverlay,
    1.53 +	GS_DisplayYUVOverlay,
    1.54 +	GS_FreeYUVOverlay
    1.55 +};
    1.56 +
    1.57 +struct private_yuvhwdata {
    1.58 +	int ipu_fd;
    1.59 +	Uint8 *pixels;
    1.60 +	int macroblocks;
    1.61 +	int dma_len;
    1.62 +	caddr_t dma_mem;
    1.63 +	caddr_t ipu_imem;
    1.64 +	caddr_t ipu_omem;
    1.65 +	caddr_t dma_tags;
    1.66 +	unsigned long long *stretch_x1y1;
    1.67 +	unsigned long long *stretch_x2y2;
    1.68 +	struct ps2_plist plist;
    1.69 +
    1.70 +	/* These are just so we don't have to allocate them separately */
    1.71 +	Uint16 pitches[3];
    1.72 +	Uint8 *planes[3];
    1.73 +};
    1.74 +
    1.75 +static int power_of_2(int value)
    1.76 +{
    1.77 +	int shift;
    1.78 +
    1.79 +	for ( shift = 0; (1<<shift) < value; ++shift ) {
    1.80 +		/* Keep looking */ ;
    1.81 +	}
    1.82 +	return(shift);
    1.83 +}
    1.84 +
    1.85 +SDL_Overlay *GS_CreateYUVOverlay(_THIS, int width, int height, Uint32 format, SDL_Surface *display)
    1.86 +{
    1.87 +	SDL_Overlay *overlay;
    1.88 +	struct private_yuvhwdata *hwdata;
    1.89 +	int map_offset;
    1.90 +	unsigned long long *tags;
    1.91 +	caddr_t base;
    1.92 +	int bpp;
    1.93 +	int fbp, fbw, psm;
    1.94 +	int x, y, w, h;
    1.95 +	int pnum;
    1.96 +	struct ps2_packet *packet;
    1.97 +	struct ps2_packet tex_packet;
    1.98 +
    1.99 +	/* We can only decode blocks of 16x16 pixels */
   1.100 +	if ( (width & 15) || (height & 15) ) {
   1.101 +		SDL_SetError("Overlay width/height must be multiples of 16");
   1.102 +		return(NULL);
   1.103 +	}
   1.104 +	/* Make sure the image isn't too large for a single DMA transfer */
   1.105 +	if ( ((width/16) * (height/16)) > MAX_MACROBLOCKS ) {
   1.106 +		SDL_SetError("Overlay too large (maximum size: %d pixels)",
   1.107 +		             MAX_MACROBLOCKS * 16 * 16);
   1.108 +		return(NULL);
   1.109 +	}
   1.110 +
   1.111 +	/* Double-check the requested format.  For simplicity, we'll only
   1.112 +	   support planar YUV formats.
   1.113 +	 */
   1.114 +	switch (format) {
   1.115 +	    case SDL_YV12_OVERLAY:
   1.116 +	    case SDL_IYUV_OVERLAY:
   1.117 +		/* Supported planar YUV format */
   1.118 +		break;
   1.119 +	    default:
   1.120 +		SDL_SetError("Unsupported YUV format");
   1.121 +		return(NULL);
   1.122 +	}
   1.123 +
   1.124 +	/* Create the overlay structure */
   1.125 +	overlay = (SDL_Overlay *)malloc(sizeof *overlay);
   1.126 +	if ( overlay == NULL ) {
   1.127 +		SDL_OutOfMemory();
   1.128 +		return(NULL);
   1.129 +	}
   1.130 +	memset(overlay, 0, (sizeof *overlay));
   1.131 +
   1.132 +	/* Fill in the basic members */
   1.133 +	overlay->format = format;
   1.134 +	overlay->w = width;
   1.135 +	overlay->h = height;
   1.136 +
   1.137 +	/* Set up the YUV surface function structure */
   1.138 +	overlay->hwfuncs = &gs_yuvfuncs;
   1.139 +	overlay->hw_overlay = 1;
   1.140 +
   1.141 +	/* Create the pixel data */
   1.142 +	hwdata = (struct private_yuvhwdata *)malloc(sizeof *hwdata);
   1.143 +	overlay->hwdata = hwdata;
   1.144 +	if ( hwdata == NULL ) {
   1.145 +		SDL_FreeYUVOverlay(overlay);
   1.146 +		SDL_OutOfMemory();
   1.147 +		return(NULL);
   1.148 +	}
   1.149 +	hwdata->ipu_fd = -1;
   1.150 +	hwdata->pixels = (Uint8 *)malloc(width*height*2);
   1.151 +	if ( hwdata->pixels == NULL ) {
   1.152 +		SDL_FreeYUVOverlay(overlay);
   1.153 +		SDL_OutOfMemory();
   1.154 +		return(NULL);
   1.155 +	}
   1.156 +	hwdata->macroblocks = (width/16) * (height/16);
   1.157 +
   1.158 +	/* Find the pitch and offset values for the overlay */
   1.159 +	overlay->pitches = hwdata->pitches;
   1.160 +	overlay->pixels = hwdata->planes;
   1.161 +	switch (format) {
   1.162 +	    case SDL_YV12_OVERLAY:
   1.163 +	    case SDL_IYUV_OVERLAY:
   1.164 +		overlay->pitches[0] = overlay->w;
   1.165 +		overlay->pitches[1] = overlay->pitches[0] / 2;
   1.166 +		overlay->pitches[2] = overlay->pitches[0] / 2;
   1.167 +	        overlay->pixels[0] = hwdata->pixels;
   1.168 +	        overlay->pixels[1] = overlay->pixels[0] +
   1.169 +		                     overlay->pitches[0] * overlay->h;
   1.170 +	        overlay->pixels[2] = overlay->pixels[1] +
   1.171 +		                     overlay->pitches[1] * overlay->h / 2;
   1.172 +		overlay->planes = 3;
   1.173 +		break;
   1.174 +	    default:
   1.175 +		/* We should never get here (caught above) */
   1.176 +		break;
   1.177 +	}
   1.178 +
   1.179 +	/* Theoretically we could support several concurrent decode
   1.180 +	   streams queueing up on the same file descriptor, but for
   1.181 +	   simplicity we'll support only one.  Opening the IPU more
   1.182 +	   than once will fail with EBUSY.
   1.183 +	*/
   1.184 +	hwdata->ipu_fd = open("/dev/ps2ipu", O_RDWR);
   1.185 +	if ( hwdata->ipu_fd < 0 ) {
   1.186 +		SDL_FreeYUVOverlay(overlay);
   1.187 +		SDL_SetError("Playstation 2 IPU busy");
   1.188 +		return(NULL);
   1.189 +	}
   1.190 +
   1.191 +	/* Allocate a DMA area for pixel conversion */
   1.192 +	bpp = this->screen->format->BytesPerPixel;
   1.193 +	map_offset = (mapped_len + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1);
   1.194 +	hwdata->dma_len = hwdata->macroblocks * (16 * 16 + 8 * 8 + 8 * 8) +
   1.195 +	                  width * height * bpp +
   1.196 +	                  hwdata->macroblocks * (16 * sizeof(long long)) +
   1.197 +	                  12 * sizeof(long long);
   1.198 +	hwdata->dma_mem = mmap(0, hwdata->dma_len, PROT_READ|PROT_WRITE,
   1.199 +	                       MAP_SHARED, memory_fd, map_offset);
   1.200 +	if ( hwdata->dma_mem == MAP_FAILED ) {
   1.201 +		hwdata->ipu_imem = (caddr_t)0;
   1.202 +		SDL_FreeYUVOverlay(overlay);
   1.203 +		SDL_SetError("Unable to map %d bytes for DMA", hwdata->dma_len);
   1.204 +		return(NULL);
   1.205 +	}
   1.206 +	hwdata->ipu_imem = hwdata->dma_mem;
   1.207 +	hwdata->ipu_omem = hwdata->ipu_imem +
   1.208 +	                   hwdata->macroblocks * (16 * 16 + 8 * 8 + 8 * 8);
   1.209 +	hwdata->dma_tags = hwdata->ipu_omem + width * height * bpp;
   1.210 +
   1.211 +	/* Allocate memory for the DMA packets */
   1.212 +	hwdata->plist.num = hwdata->macroblocks * 4 + 1;
   1.213 +	hwdata->plist.packet = (struct ps2_packet *)malloc(
   1.214 +	                       hwdata->plist.num*sizeof(struct ps2_packet));
   1.215 +	if ( ! hwdata->plist.packet ) {
   1.216 +		SDL_FreeYUVOverlay(overlay);
   1.217 +		SDL_OutOfMemory();
   1.218 +		return(NULL);
   1.219 +	}
   1.220 +	pnum = 0;
   1.221 +	packet = hwdata->plist.packet;
   1.222 +
   1.223 +	/* Set up the tags to send the image to the screen */
   1.224 +	tags = (unsigned long long *)hwdata->dma_tags;
   1.225 +	base = hwdata->ipu_omem;
   1.226 +	fbp = screen_image.fbp;
   1.227 +	fbw = screen_image.fbw;
   1.228 +	psm = screen_image.psm;
   1.229 +	y = screen_image.h;	/* Offscreen video memory */
   1.230 +	for ( h=height/16; h; --h ) {
   1.231 +		x = 0;			/* Visible video memory */
   1.232 +		for ( w=width/16; w; --w ) {
   1.233 +			/* The head tag */
   1.234 +			packet[pnum].ptr = &tags[0];
   1.235 +			packet[pnum].len = 10 * sizeof(*tags);
   1.236 +			++pnum;
   1.237 +			tags[0] = 4 | (1LL << 60);	/* GIFtag */
   1.238 +			tags[1] = 0x0e;			/* A+D */
   1.239 +			tags[2] = ((unsigned long long)fbp << 32) |
   1.240 +			          ((unsigned long long)fbw << 48) |
   1.241 +			          ((unsigned long long)psm << 56);
   1.242 +			tags[3] = PS2_GS_BITBLTBUF;
   1.243 +			tags[4] = ((unsigned long long)x << 32) |
   1.244 +			          ((unsigned long long)y << 48);
   1.245 +			tags[5] = PS2_GS_TRXPOS;
   1.246 +			tags[6] = (unsigned long long)16 |
   1.247 +			          ((unsigned long long)16 << 32);
   1.248 +			tags[7] = PS2_GS_TRXREG;
   1.249 +			tags[8] = 0;
   1.250 +			tags[9] = PS2_GS_TRXDIR;
   1.251 +			/* Now the actual image data */
   1.252 +			packet[pnum].ptr = &tags[10];
   1.253 +			packet[pnum].len = 2 * sizeof(*tags);
   1.254 +			++pnum;
   1.255 +			tags[10] = ((16*16*bpp) >> 4) | (2LL << 58);
   1.256 +			tags[11] = 0;
   1.257 +			packet[pnum].ptr = (void *)base;
   1.258 +			packet[pnum].len = 16 * 16 * bpp;
   1.259 +			++pnum;
   1.260 +			packet[pnum].ptr = &tags[12];
   1.261 +			packet[pnum].len = 2 * sizeof(*tags);
   1.262 +			++pnum;
   1.263 +			tags[12] = (0 >> 4) | (1 << 15) | (2LL << 58);
   1.264 +			tags[13] = 0;
   1.265 +
   1.266 +			tags += 16;
   1.267 +			base += 16 * 16 * bpp;
   1.268 +
   1.269 +			x += 16;
   1.270 +		}
   1.271 +		y += 16;
   1.272 +	}
   1.273 +
   1.274 +	/* Set up the texture memory area for the video */
   1.275 +	tex_packet.ptr = tags;
   1.276 +	tex_packet.len = 8 * sizeof(*tags);
   1.277 +	tags[0] = 3 | (1LL << 60);	/* GIFtag */
   1.278 +	tags[1] = 0x0e;			/* A+D */
   1.279 +	tags[2] = (screen_image.h * screen_image.w) / 64 +
   1.280 +	          ((unsigned long long)fbw << 14) +
   1.281 +	          ((unsigned long long)psm << 20) +
   1.282 +	          ((unsigned long long)power_of_2(width) << 26) +
   1.283 +	          ((unsigned long long)power_of_2(height) << 30) +
   1.284 +	          ((unsigned long long)1 << 34) +
   1.285 +	          ((unsigned long long)1 << 35);
   1.286 +	tags[3] = PS2_GS_TEX0_1;
   1.287 +	tags[4] = (1 << 5) + (1 << 6);
   1.288 +	tags[5] = PS2_GS_TEX1_1;
   1.289 +	tags[6] = 0;
   1.290 +	tags[7] = PS2_GS_TEXFLUSH;
   1.291 +	ioctl(console_fd, PS2IOC_SEND, &tex_packet);
   1.292 +
   1.293 +	/* Set up the tags for scaling the image */
   1.294 +	packet[pnum].ptr = tags;
   1.295 +	packet[pnum].len = 12 * sizeof(*tags);
   1.296 +	++pnum;
   1.297 +	tags[0] = 5 | (1LL << 60);	/* GIFtag */
   1.298 +	tags[1] = 0x0e;			/* A+D */
   1.299 +	tags[2] = 6 + (1 << 4) + (1 << 8);
   1.300 +	tags[3] = PS2_GS_PRIM;
   1.301 +	tags[4] = ((unsigned long long)0 * 16) +
   1.302 +	           (((unsigned long long)0 * 16) << 16);
   1.303 +	tags[5] = PS2_GS_UV;
   1.304 +	tags[6] = 0; /* X1, Y1 */
   1.305 +	tags[7] = PS2_GS_XYZ2;
   1.306 +	hwdata->stretch_x1y1 = &tags[6];
   1.307 +	tags[8] = ((unsigned long long)overlay->w * 16) +
   1.308 +	           (((unsigned long long)overlay->h * 16) << 16);
   1.309 +	tags[9] = PS2_GS_UV;
   1.310 +	tags[10] = 0; /* X2, Y2 */
   1.311 +	tags[11] = PS2_GS_XYZ2;
   1.312 +	hwdata->stretch_x2y2 = &tags[10];
   1.313 +
   1.314 +	/* We're all done.. */
   1.315 +	return(overlay);
   1.316 +}
   1.317 +
   1.318 +int GS_LockYUVOverlay(_THIS, SDL_Overlay *overlay)
   1.319 +{
   1.320 +	return(0);
   1.321 +}
   1.322 +
   1.323 +void GS_UnlockYUVOverlay(_THIS, SDL_Overlay *overlay)
   1.324 +{
   1.325 +	return;
   1.326 +}
   1.327 +
   1.328 +int GS_DisplayYUVOverlay(_THIS, SDL_Overlay *overlay, SDL_Rect *dstrect)
   1.329 +{
   1.330 +	struct private_yuvhwdata *hwdata;
   1.331 +	__u32 cmd;
   1.332 +	struct ps2_packet packet;
   1.333 +	int h, w, i;
   1.334 +	Uint32 *lum, *Cr, *Cb;
   1.335 +	int lum_pitch;
   1.336 +	int crb_pitch;
   1.337 +	Uint32 *lum_src, *Cr_src, *Cb_src;
   1.338 +	Uint32 *src, *dst;
   1.339 +	unsigned int x, y;
   1.340 +	SDL_Surface *screen;
   1.341 +
   1.342 +	/* Find out where the various portions of the image are */
   1.343 +	hwdata = overlay->hwdata;
   1.344 +	switch (overlay->format) {
   1.345 +	    case SDL_YV12_OVERLAY:
   1.346 +		lum = (Uint32 *)overlay->pixels[0];
   1.347 +		Cr =  (Uint32 *)overlay->pixels[1];
   1.348 +		Cb =  (Uint32 *)overlay->pixels[2];
   1.349 +		break;
   1.350 +	    case SDL_IYUV_OVERLAY:
   1.351 +		lum = (Uint32 *)overlay->pixels[0];
   1.352 +		Cr =  (Uint32 *)overlay->pixels[2];
   1.353 +		Cb =  (Uint32 *)overlay->pixels[1];
   1.354 +	    default:
   1.355 +		SDL_SetError("Unsupported YUV format in blit (??)");
   1.356 +		return(-1);
   1.357 +	}
   1.358 +	dst = (Uint32 *)hwdata->ipu_imem;
   1.359 +	lum_pitch = overlay->w/4;
   1.360 +	crb_pitch = (overlay->w/2)/4;
   1.361 +
   1.362 +	/* Copy blocks of 16x16 pixels to the DMA area */
   1.363 +	for ( h=overlay->h/16; h; --h ) {
   1.364 +		lum_src = lum;
   1.365 +		Cr_src = Cr;
   1.366 +		Cb_src = Cb;
   1.367 +		for ( w=overlay->w/16; w; --w ) {
   1.368 +			src = lum_src;
   1.369 +			for ( i=0; i<16; ++i ) {
   1.370 +				dst[0] = src[0];
   1.371 +				dst[1] = src[1];
   1.372 +				dst[2] = src[2];
   1.373 +				dst[3] = src[3];
   1.374 +				src += lum_pitch;
   1.375 +				dst += 4;
   1.376 +			}
   1.377 +			src = Cb_src;
   1.378 +			for ( i=0; i<8; ++i ) {
   1.379 +				dst[0] = src[0];
   1.380 +				dst[1] = src[1];
   1.381 +				src += crb_pitch;
   1.382 +				dst += 2;
   1.383 +			}
   1.384 +			src = Cr_src;
   1.385 +			for ( i=0; i<8; ++i ) {
   1.386 +				dst[0] = src[0];
   1.387 +				dst[1] = src[1];
   1.388 +				src += crb_pitch;
   1.389 +				dst += 2;
   1.390 +			}
   1.391 +			lum_src += 16 / 4;
   1.392 +			Cb_src += 8 / 4;
   1.393 +			Cr_src += 8 / 4;
   1.394 +		}
   1.395 +		lum += lum_pitch * 16;
   1.396 +		Cr += crb_pitch * 8;
   1.397 +		Cb += crb_pitch * 8;
   1.398 +	}
   1.399 +
   1.400 +	/* Send the macroblock data to the IPU */
   1.401 +#ifdef DEBUG_YUV
   1.402 +	fprintf(stderr, "Sending data to IPU..\n");
   1.403 +#endif
   1.404 +	packet.ptr = hwdata->ipu_imem;
   1.405 +	packet.len = hwdata->macroblocks * (16 * 16 + 8 * 8 + 8 * 8);
   1.406 +	ioctl(hwdata->ipu_fd, PS2IOC_SENDA, &packet);
   1.407 +
   1.408 +	/* Trigger the DMA to the IPU for conversion */
   1.409 +#ifdef DEBUG_YUV
   1.410 +	fprintf(stderr, "Trigging conversion command\n");
   1.411 +#endif
   1.412 +	cmd = (7 << 28) + hwdata->macroblocks;
   1.413 +	if ( screen_image.psm == PS2_GS_PSMCT16 ) {
   1.414 +		cmd += (1 << 27) +	/* Output RGB 555 */
   1.415 +		       (1 << 26);	/* Dither output */
   1.416 +	}
   1.417 +	ioctl(hwdata->ipu_fd, PS2IOC_SIPUCMD, &cmd);
   1.418 +
   1.419 +	/* Retrieve the converted image from the IPU */
   1.420 +#ifdef DEBUG_YUV
   1.421 +	fprintf(stderr, "Retrieving data from IPU..\n");
   1.422 +#endif
   1.423 +	packet.ptr = hwdata->ipu_omem;
   1.424 +	packet.len = overlay->w * overlay->h *
   1.425 +	             this->screen->format->BytesPerPixel;
   1.426 +	ioctl(hwdata->ipu_fd, PS2IOC_RECV, &packet);
   1.427 +
   1.428 +#ifdef DEBUG_YUV
   1.429 +	fprintf(stderr, "Copying image to screen..\n");
   1.430 +#endif
   1.431 +	/* Wait for previous DMA to complete */
   1.432 +	ioctl(console_fd, PS2IOC_SENDQCT, 1);
   1.433 +
   1.434 +	/* Send the current image to the screen and scale it */
   1.435 +	screen = this->screen;
   1.436 +	x = (unsigned int)dstrect->x;
   1.437 +	y = (unsigned int)dstrect->y;
   1.438 +	if ( screen->offset ) {
   1.439 +		x += (screen->offset % screen->pitch) /
   1.440 +		     screen->format->BytesPerPixel;
   1.441 +		y += (screen->offset / screen->pitch);
   1.442 +	}
   1.443 +	*hwdata->stretch_x1y1 = (x * 16) + ((y * 16) << 16);
   1.444 +	x += (unsigned int)dstrect->w;
   1.445 +	y += (unsigned int)dstrect->h;
   1.446 +	*hwdata->stretch_x2y2 = (x * 16) + ((y * 16) << 16);
   1.447 +	return ioctl(console_fd, PS2IOC_SENDL, &hwdata->plist);
   1.448 +}
   1.449 +
   1.450 +void GS_FreeYUVOverlay(_THIS, SDL_Overlay *overlay)
   1.451 +{
   1.452 +	struct private_yuvhwdata *hwdata;
   1.453 +
   1.454 +	hwdata = overlay->hwdata;
   1.455 +	if ( hwdata ) {
   1.456 +		if ( hwdata->ipu_fd >= 0 ) {
   1.457 +			close(hwdata->ipu_fd);
   1.458 +		}
   1.459 +		if ( hwdata->dma_mem ) {
   1.460 +			munmap(hwdata->dma_mem, hwdata->dma_len);
   1.461 +		}
   1.462 +		if ( hwdata->plist.packet ) {
   1.463 +			free(hwdata->plist.packet);
   1.464 +		}
   1.465 +		if ( hwdata->pixels ) {
   1.466 +			free(hwdata->pixels);
   1.467 +		}
   1.468 +		free(hwdata);
   1.469 +	}
   1.470 +}