src/video/ps2gs/SDL_gsyuv.c
author Sam Lantinga <slouken@libsdl.org>
Mon, 17 Apr 2006 06:47:23 +0000
changeset 1643 51038e80ae59
parent 1402 d910939febfa
child 1659 14717b52abc0
permissions -rw-r--r--
More general fix for bug #189

The clipping is done at a higher level, and the low level functions are
passed clipped rectangles. Drivers which don't support source clipping
have not been changed, so the image will be squished instead of clipped,
but at least they will no longer crash when the destination rect was out
of bounds.
     1 /*
     2     SDL - Simple DirectMedia Layer
     3     Copyright (C) 1997-2006 Sam Lantinga
     4 
     5     This library is free software; you can redistribute it and/or
     6     modify it under the terms of the GNU Lesser General Public
     7     License as published by the Free Software Foundation; either
     8     version 2.1 of the License, or (at your option) any later version.
     9 
    10     This library is distributed in the hope that it will be useful,
    11     but WITHOUT ANY WARRANTY; without even the implied warranty of
    12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    13     Lesser General Public License for more details.
    14 
    15     You should have received a copy of the GNU Lesser General Public
    16     License along with this library; if not, write to the Free Software
    17     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
    18 
    19     Sam Lantinga
    20     slouken@libsdl.org
    21 */
    22 #include "SDL_config.h"
    23 
    24 /* This is the Playstation 2 implementation of YUV video overlays */
    25 
    26 #include <fcntl.h>
    27 #include <unistd.h>
    28 #include <sys/ioctl.h>
    29 #include <sys/mman.h>
    30 #include <asm/page.h>		/* For definition of PAGE_SIZE */
    31 
    32 #include "SDL_video.h"
    33 #include "SDL_gsyuv_c.h"
    34 #include "../SDL_yuvfuncs.h"
    35 
    36 /* The maximum number of 16x16 pixel block converted at once */
    37 #define MAX_MACROBLOCKS	1024	/* 2^10 macroblocks at once */
    38 
    39 /* The functions used to manipulate video overlays */
    40 static struct private_yuvhwfuncs gs_yuvfuncs = {
    41 	GS_LockYUVOverlay,
    42 	GS_UnlockYUVOverlay,
    43 	GS_DisplayYUVOverlay,
    44 	GS_FreeYUVOverlay
    45 };
    46 
    47 struct private_yuvhwdata {
    48 	int ipu_fd;
    49 	Uint8 *pixels;
    50 	int macroblocks;
    51 	int dma_len;
    52 	caddr_t dma_mem;
    53 	caddr_t ipu_imem;
    54 	caddr_t ipu_omem;
    55 	caddr_t dma_tags;
    56 	unsigned long long *stretch_x1y1;
    57 	unsigned long long *stretch_x2y2;
    58 	struct ps2_plist plist;
    59 
    60 	/* These are just so we don't have to allocate them separately */
    61 	Uint16 pitches[3];
    62 	Uint8 *planes[3];
    63 };
    64 
    65 static int power_of_2(int value)
    66 {
    67 	int shift;
    68 
    69 	for ( shift = 0; (1<<shift) < value; ++shift ) {
    70 		/* Keep looking */ ;
    71 	}
    72 	return(shift);
    73 }
    74 
    75 SDL_Overlay *GS_CreateYUVOverlay(_THIS, int width, int height, Uint32 format, SDL_Surface *display)
    76 {
    77 	SDL_Overlay *overlay;
    78 	struct private_yuvhwdata *hwdata;
    79 	int map_offset;
    80 	unsigned long long *tags;
    81 	caddr_t base;
    82 	int bpp;
    83 	int fbp, fbw, psm;
    84 	int x, y, w, h;
    85 	int pnum;
    86 	struct ps2_packet *packet;
    87 	struct ps2_packet tex_packet;
    88 
    89 	/* We can only decode blocks of 16x16 pixels */
    90 	if ( (width & 15) || (height & 15) ) {
    91 		SDL_SetError("Overlay width/height must be multiples of 16");
    92 		return(NULL);
    93 	}
    94 	/* Make sure the image isn't too large for a single DMA transfer */
    95 	if ( ((width/16) * (height/16)) > MAX_MACROBLOCKS ) {
    96 		SDL_SetError("Overlay too large (maximum size: %d pixels)",
    97 		             MAX_MACROBLOCKS * 16 * 16);
    98 		return(NULL);
    99 	}
   100 
   101 	/* Double-check the requested format.  For simplicity, we'll only
   102 	   support planar YUV formats.
   103 	 */
   104 	switch (format) {
   105 	    case SDL_YV12_OVERLAY:
   106 	    case SDL_IYUV_OVERLAY:
   107 		/* Supported planar YUV format */
   108 		break;
   109 	    default:
   110 		SDL_SetError("Unsupported YUV format");
   111 		return(NULL);
   112 	}
   113 
   114 	/* Create the overlay structure */
   115 	overlay = (SDL_Overlay *)SDL_malloc(sizeof *overlay);
   116 	if ( overlay == NULL ) {
   117 		SDL_OutOfMemory();
   118 		return(NULL);
   119 	}
   120 	SDL_memset(overlay, 0, (sizeof *overlay));
   121 
   122 	/* Fill in the basic members */
   123 	overlay->format = format;
   124 	overlay->w = width;
   125 	overlay->h = height;
   126 
   127 	/* Set up the YUV surface function structure */
   128 	overlay->hwfuncs = &gs_yuvfuncs;
   129 	overlay->hw_overlay = 1;
   130 
   131 	/* Create the pixel data */
   132 	hwdata = (struct private_yuvhwdata *)SDL_malloc(sizeof *hwdata);
   133 	overlay->hwdata = hwdata;
   134 	if ( hwdata == NULL ) {
   135 		SDL_FreeYUVOverlay(overlay);
   136 		SDL_OutOfMemory();
   137 		return(NULL);
   138 	}
   139 	hwdata->ipu_fd = -1;
   140 	hwdata->pixels = (Uint8 *)SDL_malloc(width*height*2);
   141 	if ( hwdata->pixels == NULL ) {
   142 		SDL_FreeYUVOverlay(overlay);
   143 		SDL_OutOfMemory();
   144 		return(NULL);
   145 	}
   146 	hwdata->macroblocks = (width/16) * (height/16);
   147 
   148 	/* Find the pitch and offset values for the overlay */
   149 	overlay->pitches = hwdata->pitches;
   150 	overlay->pixels = hwdata->planes;
   151 	switch (format) {
   152 	    case SDL_YV12_OVERLAY:
   153 	    case SDL_IYUV_OVERLAY:
   154 		overlay->pitches[0] = overlay->w;
   155 		overlay->pitches[1] = overlay->pitches[0] / 2;
   156 		overlay->pitches[2] = overlay->pitches[0] / 2;
   157 	        overlay->pixels[0] = hwdata->pixels;
   158 	        overlay->pixels[1] = overlay->pixels[0] +
   159 		                     overlay->pitches[0] * overlay->h;
   160 	        overlay->pixels[2] = overlay->pixels[1] +
   161 		                     overlay->pitches[1] * overlay->h / 2;
   162 		overlay->planes = 3;
   163 		break;
   164 	    default:
   165 		/* We should never get here (caught above) */
   166 		break;
   167 	}
   168 
   169 	/* Theoretically we could support several concurrent decode
   170 	   streams queueing up on the same file descriptor, but for
   171 	   simplicity we'll support only one.  Opening the IPU more
   172 	   than once will fail with EBUSY.
   173 	*/
   174 	hwdata->ipu_fd = open("/dev/ps2ipu", O_RDWR);
   175 	if ( hwdata->ipu_fd < 0 ) {
   176 		SDL_FreeYUVOverlay(overlay);
   177 		SDL_SetError("Playstation 2 IPU busy");
   178 		return(NULL);
   179 	}
   180 
   181 	/* Allocate a DMA area for pixel conversion */
   182 	bpp = this->screen->format->BytesPerPixel;
   183 	map_offset = (mapped_len + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1);
   184 	hwdata->dma_len = hwdata->macroblocks * (16 * 16 + 8 * 8 + 8 * 8) +
   185 	                  width * height * bpp +
   186 	                  hwdata->macroblocks * (16 * sizeof(long long)) +
   187 	                  12 * sizeof(long long);
   188 	hwdata->dma_mem = mmap(0, hwdata->dma_len, PROT_READ|PROT_WRITE,
   189 	                       MAP_SHARED, memory_fd, map_offset);
   190 	if ( hwdata->dma_mem == MAP_FAILED ) {
   191 		hwdata->ipu_imem = (caddr_t)0;
   192 		SDL_FreeYUVOverlay(overlay);
   193 		SDL_SetError("Unable to map %d bytes for DMA", hwdata->dma_len);
   194 		return(NULL);
   195 	}
   196 	hwdata->ipu_imem = hwdata->dma_mem;
   197 	hwdata->ipu_omem = hwdata->ipu_imem +
   198 	                   hwdata->macroblocks * (16 * 16 + 8 * 8 + 8 * 8);
   199 	hwdata->dma_tags = hwdata->ipu_omem + width * height * bpp;
   200 
   201 	/* Allocate memory for the DMA packets */
   202 	hwdata->plist.num = hwdata->macroblocks * 4 + 1;
   203 	hwdata->plist.packet = (struct ps2_packet *)SDL_malloc(
   204 	                       hwdata->plist.num*sizeof(struct ps2_packet));
   205 	if ( ! hwdata->plist.packet ) {
   206 		SDL_FreeYUVOverlay(overlay);
   207 		SDL_OutOfMemory();
   208 		return(NULL);
   209 	}
   210 	pnum = 0;
   211 	packet = hwdata->plist.packet;
   212 
   213 	/* Set up the tags to send the image to the screen */
   214 	tags = (unsigned long long *)hwdata->dma_tags;
   215 	base = hwdata->ipu_omem;
   216 	fbp = screen_image.fbp;
   217 	fbw = screen_image.fbw;
   218 	psm = screen_image.psm;
   219 	y = screen_image.y + screen_image.h;	/* Offscreen video memory */
   220 	for ( h=height/16; h; --h ) {
   221 		x = 0;			/* Visible video memory */
   222 		for ( w=width/16; w; --w ) {
   223 			/* The head tag */
   224 			packet[pnum].ptr = &tags[0];
   225 			packet[pnum].len = 10 * sizeof(*tags);
   226 			++pnum;
   227 			tags[0] = 4 | (1LL << 60);	/* GIFtag */
   228 			tags[1] = 0x0e;			/* A+D */
   229 			tags[2] = ((unsigned long long)fbp << 32) |
   230 			          ((unsigned long long)fbw << 48) |
   231 			          ((unsigned long long)psm << 56);
   232 			tags[3] = PS2_GS_BITBLTBUF;
   233 			tags[4] = ((unsigned long long)x << 32) |
   234 			          ((unsigned long long)y << 48);
   235 			tags[5] = PS2_GS_TRXPOS;
   236 			tags[6] = (unsigned long long)16 |
   237 			          ((unsigned long long)16 << 32);
   238 			tags[7] = PS2_GS_TRXREG;
   239 			tags[8] = 0;
   240 			tags[9] = PS2_GS_TRXDIR;
   241 			/* Now the actual image data */
   242 			packet[pnum].ptr = &tags[10];
   243 			packet[pnum].len = 2 * sizeof(*tags);
   244 			++pnum;
   245 			tags[10] = ((16*16*bpp) >> 4) | (2LL << 58);
   246 			tags[11] = 0;
   247 			packet[pnum].ptr = (void *)base;
   248 			packet[pnum].len = 16 * 16 * bpp;
   249 			++pnum;
   250 			packet[pnum].ptr = &tags[12];
   251 			packet[pnum].len = 2 * sizeof(*tags);
   252 			++pnum;
   253 			tags[12] = (0 >> 4) | (1 << 15) | (2LL << 58);
   254 			tags[13] = 0;
   255 
   256 			tags += 16;
   257 			base += 16 * 16 * bpp;
   258 
   259 			x += 16;
   260 		}
   261 		y += 16;
   262 	}
   263 
   264 	/* Set up the texture memory area for the video */
   265 	tex_packet.ptr = tags;
   266 	tex_packet.len = 8 * sizeof(*tags);
   267 	tags[0] = 3 | (1LL << 60);	/* GIFtag */
   268 	tags[1] = 0x0e;			/* A+D */
   269 	tags[2] = ((screen_image.y + screen_image.h) * screen_image.w) / 64 +
   270 	          ((unsigned long long)fbw << 14) +
   271 	          ((unsigned long long)psm << 20) +
   272 	          ((unsigned long long)power_of_2(width) << 26) +
   273 	          ((unsigned long long)power_of_2(height) << 30) +
   274 	          ((unsigned long long)1 << 34) +
   275 	          ((unsigned long long)1 << 35);
   276 	tags[3] = PS2_GS_TEX0_1;
   277 	tags[4] = (1 << 5) + (1 << 6);
   278 	tags[5] = PS2_GS_TEX1_1;
   279 	tags[6] = 0;
   280 	tags[7] = PS2_GS_TEXFLUSH;
   281 	ioctl(console_fd, PS2IOC_SEND, &tex_packet);
   282 
   283 	/* Set up the tags for scaling the image */
   284 	packet[pnum].ptr = tags;
   285 	packet[pnum].len = 12 * sizeof(*tags);
   286 	++pnum;
   287 	tags[0] = 5 | (1LL << 60);	/* GIFtag */
   288 	tags[1] = 0x0e;			/* A+D */
   289 	tags[2] = 6 + (1 << 4) + (1 << 8);
   290 	tags[3] = PS2_GS_PRIM;
   291 	tags[4] = ((unsigned long long)0 * 16) +
   292 	           (((unsigned long long)0 * 16) << 16);
   293 	tags[5] = PS2_GS_UV;
   294 	tags[6] = 0; /* X1, Y1 */
   295 	tags[7] = PS2_GS_XYZ2;
   296 	hwdata->stretch_x1y1 = &tags[6];
   297 	tags[8] = ((unsigned long long)overlay->w * 16) +
   298 	           (((unsigned long long)overlay->h * 16) << 16);
   299 	tags[9] = PS2_GS_UV;
   300 	tags[10] = 0; /* X2, Y2 */
   301 	tags[11] = PS2_GS_XYZ2;
   302 	hwdata->stretch_x2y2 = &tags[10];
   303 
   304 	/* We're all done.. */
   305 	return(overlay);
   306 }
   307 
   308 int GS_LockYUVOverlay(_THIS, SDL_Overlay *overlay)
   309 {
   310 	return(0);
   311 }
   312 
   313 void GS_UnlockYUVOverlay(_THIS, SDL_Overlay *overlay)
   314 {
   315 	return;
   316 }
   317 
   318 int GS_DisplayYUVOverlay(_THIS, SDL_Overlay *overlay, SDL_Rect *src, SDL_Rect *dst)
   319 {
   320 	struct private_yuvhwdata *hwdata;
   321 	__u32 cmd;
   322 	struct ps2_packet packet;
   323 	int h, w, i;
   324 	Uint32 *lum, *Cr, *Cb;
   325 	int lum_pitch;
   326 	int crb_pitch;
   327 	Uint32 *lum_src, *Cr_src, *Cb_src;
   328 	Uint32 *src, *dst;
   329 	unsigned int x, y;
   330 	SDL_Surface *screen;
   331 
   332 	/* Find out where the various portions of the image are */
   333 	hwdata = overlay->hwdata;
   334 	switch (overlay->format) {
   335 	    case SDL_YV12_OVERLAY:
   336 		lum = (Uint32 *)overlay->pixels[0];
   337 		Cr =  (Uint32 *)overlay->pixels[1];
   338 		Cb =  (Uint32 *)overlay->pixels[2];
   339 		break;
   340 	    case SDL_IYUV_OVERLAY:
   341 		lum = (Uint32 *)overlay->pixels[0];
   342 		Cr =  (Uint32 *)overlay->pixels[2];
   343 		Cb =  (Uint32 *)overlay->pixels[1];
   344 	    default:
   345 		SDL_SetError("Unsupported YUV format in blit (?)");
   346 		return(-1);
   347 	}
   348 	dst = (Uint32 *)hwdata->ipu_imem;
   349 	lum_pitch = overlay->w/4;
   350 	crb_pitch = (overlay->w/2)/4;
   351 
   352 	/* Copy blocks of 16x16 pixels to the DMA area */
   353 	for ( h=overlay->h/16; h; --h ) {
   354 		lum_src = lum;
   355 		Cr_src = Cr;
   356 		Cb_src = Cb;
   357 		for ( w=overlay->w/16; w; --w ) {
   358 			src = lum_src;
   359 			for ( i=0; i<16; ++i ) {
   360 				dst[0] = src[0];
   361 				dst[1] = src[1];
   362 				dst[2] = src[2];
   363 				dst[3] = src[3];
   364 				src += lum_pitch;
   365 				dst += 4;
   366 			}
   367 			src = Cb_src;
   368 			for ( i=0; i<8; ++i ) {
   369 				dst[0] = src[0];
   370 				dst[1] = src[1];
   371 				src += crb_pitch;
   372 				dst += 2;
   373 			}
   374 			src = Cr_src;
   375 			for ( i=0; i<8; ++i ) {
   376 				dst[0] = src[0];
   377 				dst[1] = src[1];
   378 				src += crb_pitch;
   379 				dst += 2;
   380 			}
   381 			lum_src += 16 / 4;
   382 			Cb_src += 8 / 4;
   383 			Cr_src += 8 / 4;
   384 		}
   385 		lum += lum_pitch * 16;
   386 		Cr += crb_pitch * 8;
   387 		Cb += crb_pitch * 8;
   388 	}
   389 
   390 	/* Send the macroblock data to the IPU */
   391 #ifdef DEBUG_YUV
   392 	fprintf(stderr, "Sending data to IPU..\n");
   393 #endif
   394 	packet.ptr = hwdata->ipu_imem;
   395 	packet.len = hwdata->macroblocks * (16 * 16 + 8 * 8 + 8 * 8);
   396 	ioctl(hwdata->ipu_fd, PS2IOC_SENDA, &packet);
   397 
   398 	/* Trigger the DMA to the IPU for conversion */
   399 #ifdef DEBUG_YUV
   400 	fprintf(stderr, "Trigging conversion command\n");
   401 #endif
   402 	cmd = (7 << 28) + hwdata->macroblocks;
   403 	if ( screen_image.psm == PS2_GS_PSMCT16 ) {
   404 		cmd += (1 << 27) +	/* Output RGB 555 */
   405 		       (1 << 26);	/* Dither output */
   406 	}
   407 	ioctl(hwdata->ipu_fd, PS2IOC_SIPUCMD, &cmd);
   408 
   409 	/* Retrieve the converted image from the IPU */
   410 #ifdef DEBUG_YUV
   411 	fprintf(stderr, "Retrieving data from IPU..\n");
   412 #endif
   413 	packet.ptr = hwdata->ipu_omem;
   414 	packet.len = overlay->w * overlay->h *
   415 	             this->screen->format->BytesPerPixel;
   416 	ioctl(hwdata->ipu_fd, PS2IOC_RECV, &packet);
   417 
   418 #ifdef DEBUG_YUV
   419 	fprintf(stderr, "Copying image to screen..\n");
   420 #endif
   421 	/* Wait for previous DMA to complete */
   422 	ioctl(console_fd, PS2IOC_SENDQCT, 1);
   423 
   424 	/* Send the current image to the screen and scale it */
   425 	screen = this->screen;
   426 	x = (unsigned int)dst->x;
   427 	y = (unsigned int)dst->y;
   428 	if ( screen->offset ) {
   429 		x += (screen->offset % screen->pitch) /
   430 		     screen->format->BytesPerPixel;
   431 		y += (screen->offset / screen->pitch);
   432 	}
   433 	y += screen_image.y;
   434 	*hwdata->stretch_x1y1 = (x * 16) + ((y * 16) << 16);
   435 	x += (unsigned int)dst->w;
   436 	y += (unsigned int)dst->h;
   437 	*hwdata->stretch_x2y2 = (x * 16) + ((y * 16) << 16);
   438 	return ioctl(console_fd, PS2IOC_SENDL, &hwdata->plist);
   439 }
   440 
   441 void GS_FreeYUVOverlay(_THIS, SDL_Overlay *overlay)
   442 {
   443 	struct private_yuvhwdata *hwdata;
   444 
   445 	hwdata = overlay->hwdata;
   446 	if ( hwdata ) {
   447 		if ( hwdata->ipu_fd >= 0 ) {
   448 			close(hwdata->ipu_fd);
   449 		}
   450 		if ( hwdata->dma_mem ) {
   451 			munmap(hwdata->dma_mem, hwdata->dma_len);
   452 		}
   453 		if ( hwdata->plist.packet ) {
   454 			SDL_free(hwdata->plist.packet);
   455 		}
   456 		if ( hwdata->pixels ) {
   457 			SDL_free(hwdata->pixels);
   458 		}
   459 		SDL_free(hwdata);
   460 	}
   461 }