src/video/ps2gs/SDL_gsyuv.c
author Sam Lantinga <slouken@libsdl.org>
Fri, 10 Feb 2006 06:48:43 +0000
changeset 1358 c71e05b4dc2e
parent 1338 604d73db6802
child 1361 19418e4422cb
permissions -rw-r--r--
More header massaging... works great on Windows. ;-)
     1 /*
     2     SDL - Simple DirectMedia Layer
     3     Copyright (C) 1997-2006 Sam Lantinga
     4 
     5     This library is free software; you can redistribute it and/or
     6     modify it under the terms of the GNU Lesser General Public
     7     License as published by the Free Software Foundation; either
     8     version 2.1 of the License, or (at your option) any later version.
     9 
    10     This library is distributed in the hope that it will be useful,
    11     but WITHOUT ANY WARRANTY; without even the implied warranty of
    12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    13     Lesser General Public License for more details.
    14 
    15     You should have received a copy of the GNU Lesser General Public
    16     License along with this library; if not, write to the Free Software
    17     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
    18 
    19     Sam Lantinga
    20     slouken@libsdl.org
    21 */
    22 
    23 /* This is the Playstation 2 implementation of YUV video overlays */
    24 
    25 #include <fcntl.h>
    26 #include <unistd.h>
    27 #include <sys/ioctl.h>
    28 #include <sys/mman.h>
    29 #include <asm/page.h>		/* For definition of PAGE_SIZE */
    30 
    31 #include "SDL_video.h"
    32 #include "SDL_gsyuv_c.h"
    33 #include "SDL_yuvfuncs.h"
    34 
    35 /* The maximum number of 16x16 pixel block converted at once */
    36 #define MAX_MACROBLOCKS	1024	/* 2^10 macroblocks at once */
    37 
    38 /* The functions used to manipulate video overlays */
    39 static struct private_yuvhwfuncs gs_yuvfuncs = {
    40 	GS_LockYUVOverlay,
    41 	GS_UnlockYUVOverlay,
    42 	GS_DisplayYUVOverlay,
    43 	GS_FreeYUVOverlay
    44 };
    45 
    46 struct private_yuvhwdata {
    47 	int ipu_fd;
    48 	Uint8 *pixels;
    49 	int macroblocks;
    50 	int dma_len;
    51 	caddr_t dma_mem;
    52 	caddr_t ipu_imem;
    53 	caddr_t ipu_omem;
    54 	caddr_t dma_tags;
    55 	unsigned long long *stretch_x1y1;
    56 	unsigned long long *stretch_x2y2;
    57 	struct ps2_plist plist;
    58 
    59 	/* These are just so we don't have to allocate them separately */
    60 	Uint16 pitches[3];
    61 	Uint8 *planes[3];
    62 };
    63 
    64 static int power_of_2(int value)
    65 {
    66 	int shift;
    67 
    68 	for ( shift = 0; (1<<shift) < value; ++shift ) {
    69 		/* Keep looking */ ;
    70 	}
    71 	return(shift);
    72 }
    73 
    74 SDL_Overlay *GS_CreateYUVOverlay(_THIS, int width, int height, Uint32 format, SDL_Surface *display)
    75 {
    76 	SDL_Overlay *overlay;
    77 	struct private_yuvhwdata *hwdata;
    78 	int map_offset;
    79 	unsigned long long *tags;
    80 	caddr_t base;
    81 	int bpp;
    82 	int fbp, fbw, psm;
    83 	int x, y, w, h;
    84 	int pnum;
    85 	struct ps2_packet *packet;
    86 	struct ps2_packet tex_packet;
    87 
    88 	/* We can only decode blocks of 16x16 pixels */
    89 	if ( (width & 15) || (height & 15) ) {
    90 		SDL_SetError("Overlay width/height must be multiples of 16");
    91 		return(NULL);
    92 	}
    93 	/* Make sure the image isn't too large for a single DMA transfer */
    94 	if ( ((width/16) * (height/16)) > MAX_MACROBLOCKS ) {
    95 		SDL_SetError("Overlay too large (maximum size: %d pixels)",
    96 		             MAX_MACROBLOCKS * 16 * 16);
    97 		return(NULL);
    98 	}
    99 
   100 	/* Double-check the requested format.  For simplicity, we'll only
   101 	   support planar YUV formats.
   102 	 */
   103 	switch (format) {
   104 	    case SDL_YV12_OVERLAY:
   105 	    case SDL_IYUV_OVERLAY:
   106 		/* Supported planar YUV format */
   107 		break;
   108 	    default:
   109 		SDL_SetError("Unsupported YUV format");
   110 		return(NULL);
   111 	}
   112 
   113 	/* Create the overlay structure */
   114 	overlay = (SDL_Overlay *)SDL_malloc(sizeof *overlay);
   115 	if ( overlay == NULL ) {
   116 		SDL_OutOfMemory();
   117 		return(NULL);
   118 	}
   119 	SDL_memset(overlay, 0, (sizeof *overlay));
   120 
   121 	/* Fill in the basic members */
   122 	overlay->format = format;
   123 	overlay->w = width;
   124 	overlay->h = height;
   125 
   126 	/* Set up the YUV surface function structure */
   127 	overlay->hwfuncs = &gs_yuvfuncs;
   128 	overlay->hw_overlay = 1;
   129 
   130 	/* Create the pixel data */
   131 	hwdata = (struct private_yuvhwdata *)SDL_malloc(sizeof *hwdata);
   132 	overlay->hwdata = hwdata;
   133 	if ( hwdata == NULL ) {
   134 		SDL_FreeYUVOverlay(overlay);
   135 		SDL_OutOfMemory();
   136 		return(NULL);
   137 	}
   138 	hwdata->ipu_fd = -1;
   139 	hwdata->pixels = (Uint8 *)SDL_malloc(width*height*2);
   140 	if ( hwdata->pixels == NULL ) {
   141 		SDL_FreeYUVOverlay(overlay);
   142 		SDL_OutOfMemory();
   143 		return(NULL);
   144 	}
   145 	hwdata->macroblocks = (width/16) * (height/16);
   146 
   147 	/* Find the pitch and offset values for the overlay */
   148 	overlay->pitches = hwdata->pitches;
   149 	overlay->pixels = hwdata->planes;
   150 	switch (format) {
   151 	    case SDL_YV12_OVERLAY:
   152 	    case SDL_IYUV_OVERLAY:
   153 		overlay->pitches[0] = overlay->w;
   154 		overlay->pitches[1] = overlay->pitches[0] / 2;
   155 		overlay->pitches[2] = overlay->pitches[0] / 2;
   156 	        overlay->pixels[0] = hwdata->pixels;
   157 	        overlay->pixels[1] = overlay->pixels[0] +
   158 		                     overlay->pitches[0] * overlay->h;
   159 	        overlay->pixels[2] = overlay->pixels[1] +
   160 		                     overlay->pitches[1] * overlay->h / 2;
   161 		overlay->planes = 3;
   162 		break;
   163 	    default:
   164 		/* We should never get here (caught above) */
   165 		break;
   166 	}
   167 
   168 	/* Theoretically we could support several concurrent decode
   169 	   streams queueing up on the same file descriptor, but for
   170 	   simplicity we'll support only one.  Opening the IPU more
   171 	   than once will fail with EBUSY.
   172 	*/
   173 	hwdata->ipu_fd = open("/dev/ps2ipu", O_RDWR);
   174 	if ( hwdata->ipu_fd < 0 ) {
   175 		SDL_FreeYUVOverlay(overlay);
   176 		SDL_SetError("Playstation 2 IPU busy");
   177 		return(NULL);
   178 	}
   179 
   180 	/* Allocate a DMA area for pixel conversion */
   181 	bpp = this->screen->format->BytesPerPixel;
   182 	map_offset = (mapped_len + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1);
   183 	hwdata->dma_len = hwdata->macroblocks * (16 * 16 + 8 * 8 + 8 * 8) +
   184 	                  width * height * bpp +
   185 	                  hwdata->macroblocks * (16 * sizeof(long long)) +
   186 	                  12 * sizeof(long long);
   187 	hwdata->dma_mem = mmap(0, hwdata->dma_len, PROT_READ|PROT_WRITE,
   188 	                       MAP_SHARED, memory_fd, map_offset);
   189 	if ( hwdata->dma_mem == MAP_FAILED ) {
   190 		hwdata->ipu_imem = (caddr_t)0;
   191 		SDL_FreeYUVOverlay(overlay);
   192 		SDL_SetError("Unable to map %d bytes for DMA", hwdata->dma_len);
   193 		return(NULL);
   194 	}
   195 	hwdata->ipu_imem = hwdata->dma_mem;
   196 	hwdata->ipu_omem = hwdata->ipu_imem +
   197 	                   hwdata->macroblocks * (16 * 16 + 8 * 8 + 8 * 8);
   198 	hwdata->dma_tags = hwdata->ipu_omem + width * height * bpp;
   199 
   200 	/* Allocate memory for the DMA packets */
   201 	hwdata->plist.num = hwdata->macroblocks * 4 + 1;
   202 	hwdata->plist.packet = (struct ps2_packet *)SDL_malloc(
   203 	                       hwdata->plist.num*sizeof(struct ps2_packet));
   204 	if ( ! hwdata->plist.packet ) {
   205 		SDL_FreeYUVOverlay(overlay);
   206 		SDL_OutOfMemory();
   207 		return(NULL);
   208 	}
   209 	pnum = 0;
   210 	packet = hwdata->plist.packet;
   211 
   212 	/* Set up the tags to send the image to the screen */
   213 	tags = (unsigned long long *)hwdata->dma_tags;
   214 	base = hwdata->ipu_omem;
   215 	fbp = screen_image.fbp;
   216 	fbw = screen_image.fbw;
   217 	psm = screen_image.psm;
   218 	y = screen_image.y + screen_image.h;	/* Offscreen video memory */
   219 	for ( h=height/16; h; --h ) {
   220 		x = 0;			/* Visible video memory */
   221 		for ( w=width/16; w; --w ) {
   222 			/* The head tag */
   223 			packet[pnum].ptr = &tags[0];
   224 			packet[pnum].len = 10 * sizeof(*tags);
   225 			++pnum;
   226 			tags[0] = 4 | (1LL << 60);	/* GIFtag */
   227 			tags[1] = 0x0e;			/* A+D */
   228 			tags[2] = ((unsigned long long)fbp << 32) |
   229 			          ((unsigned long long)fbw << 48) |
   230 			          ((unsigned long long)psm << 56);
   231 			tags[3] = PS2_GS_BITBLTBUF;
   232 			tags[4] = ((unsigned long long)x << 32) |
   233 			          ((unsigned long long)y << 48);
   234 			tags[5] = PS2_GS_TRXPOS;
   235 			tags[6] = (unsigned long long)16 |
   236 			          ((unsigned long long)16 << 32);
   237 			tags[7] = PS2_GS_TRXREG;
   238 			tags[8] = 0;
   239 			tags[9] = PS2_GS_TRXDIR;
   240 			/* Now the actual image data */
   241 			packet[pnum].ptr = &tags[10];
   242 			packet[pnum].len = 2 * sizeof(*tags);
   243 			++pnum;
   244 			tags[10] = ((16*16*bpp) >> 4) | (2LL << 58);
   245 			tags[11] = 0;
   246 			packet[pnum].ptr = (void *)base;
   247 			packet[pnum].len = 16 * 16 * bpp;
   248 			++pnum;
   249 			packet[pnum].ptr = &tags[12];
   250 			packet[pnum].len = 2 * sizeof(*tags);
   251 			++pnum;
   252 			tags[12] = (0 >> 4) | (1 << 15) | (2LL << 58);
   253 			tags[13] = 0;
   254 
   255 			tags += 16;
   256 			base += 16 * 16 * bpp;
   257 
   258 			x += 16;
   259 		}
   260 		y += 16;
   261 	}
   262 
   263 	/* Set up the texture memory area for the video */
   264 	tex_packet.ptr = tags;
   265 	tex_packet.len = 8 * sizeof(*tags);
   266 	tags[0] = 3 | (1LL << 60);	/* GIFtag */
   267 	tags[1] = 0x0e;			/* A+D */
   268 	tags[2] = ((screen_image.y + screen_image.h) * screen_image.w) / 64 +
   269 	          ((unsigned long long)fbw << 14) +
   270 	          ((unsigned long long)psm << 20) +
   271 	          ((unsigned long long)power_of_2(width) << 26) +
   272 	          ((unsigned long long)power_of_2(height) << 30) +
   273 	          ((unsigned long long)1 << 34) +
   274 	          ((unsigned long long)1 << 35);
   275 	tags[3] = PS2_GS_TEX0_1;
   276 	tags[4] = (1 << 5) + (1 << 6);
   277 	tags[5] = PS2_GS_TEX1_1;
   278 	tags[6] = 0;
   279 	tags[7] = PS2_GS_TEXFLUSH;
   280 	ioctl(console_fd, PS2IOC_SEND, &tex_packet);
   281 
   282 	/* Set up the tags for scaling the image */
   283 	packet[pnum].ptr = tags;
   284 	packet[pnum].len = 12 * sizeof(*tags);
   285 	++pnum;
   286 	tags[0] = 5 | (1LL << 60);	/* GIFtag */
   287 	tags[1] = 0x0e;			/* A+D */
   288 	tags[2] = 6 + (1 << 4) + (1 << 8);
   289 	tags[3] = PS2_GS_PRIM;
   290 	tags[4] = ((unsigned long long)0 * 16) +
   291 	           (((unsigned long long)0 * 16) << 16);
   292 	tags[5] = PS2_GS_UV;
   293 	tags[6] = 0; /* X1, Y1 */
   294 	tags[7] = PS2_GS_XYZ2;
   295 	hwdata->stretch_x1y1 = &tags[6];
   296 	tags[8] = ((unsigned long long)overlay->w * 16) +
   297 	           (((unsigned long long)overlay->h * 16) << 16);
   298 	tags[9] = PS2_GS_UV;
   299 	tags[10] = 0; /* X2, Y2 */
   300 	tags[11] = PS2_GS_XYZ2;
   301 	hwdata->stretch_x2y2 = &tags[10];
   302 
   303 	/* We're all done.. */
   304 	return(overlay);
   305 }
   306 
   307 int GS_LockYUVOverlay(_THIS, SDL_Overlay *overlay)
   308 {
   309 	return(0);
   310 }
   311 
   312 void GS_UnlockYUVOverlay(_THIS, SDL_Overlay *overlay)
   313 {
   314 	return;
   315 }
   316 
   317 int GS_DisplayYUVOverlay(_THIS, SDL_Overlay *overlay, SDL_Rect *dstrect)
   318 {
   319 	struct private_yuvhwdata *hwdata;
   320 	__u32 cmd;
   321 	struct ps2_packet packet;
   322 	int h, w, i;
   323 	Uint32 *lum, *Cr, *Cb;
   324 	int lum_pitch;
   325 	int crb_pitch;
   326 	Uint32 *lum_src, *Cr_src, *Cb_src;
   327 	Uint32 *src, *dst;
   328 	unsigned int x, y;
   329 	SDL_Surface *screen;
   330 
   331 	/* Find out where the various portions of the image are */
   332 	hwdata = overlay->hwdata;
   333 	switch (overlay->format) {
   334 	    case SDL_YV12_OVERLAY:
   335 		lum = (Uint32 *)overlay->pixels[0];
   336 		Cr =  (Uint32 *)overlay->pixels[1];
   337 		Cb =  (Uint32 *)overlay->pixels[2];
   338 		break;
   339 	    case SDL_IYUV_OVERLAY:
   340 		lum = (Uint32 *)overlay->pixels[0];
   341 		Cr =  (Uint32 *)overlay->pixels[2];
   342 		Cb =  (Uint32 *)overlay->pixels[1];
   343 	    default:
   344 		SDL_SetError("Unsupported YUV format in blit (?)");
   345 		return(-1);
   346 	}
   347 	dst = (Uint32 *)hwdata->ipu_imem;
   348 	lum_pitch = overlay->w/4;
   349 	crb_pitch = (overlay->w/2)/4;
   350 
   351 	/* Copy blocks of 16x16 pixels to the DMA area */
   352 	for ( h=overlay->h/16; h; --h ) {
   353 		lum_src = lum;
   354 		Cr_src = Cr;
   355 		Cb_src = Cb;
   356 		for ( w=overlay->w/16; w; --w ) {
   357 			src = lum_src;
   358 			for ( i=0; i<16; ++i ) {
   359 				dst[0] = src[0];
   360 				dst[1] = src[1];
   361 				dst[2] = src[2];
   362 				dst[3] = src[3];
   363 				src += lum_pitch;
   364 				dst += 4;
   365 			}
   366 			src = Cb_src;
   367 			for ( i=0; i<8; ++i ) {
   368 				dst[0] = src[0];
   369 				dst[1] = src[1];
   370 				src += crb_pitch;
   371 				dst += 2;
   372 			}
   373 			src = Cr_src;
   374 			for ( i=0; i<8; ++i ) {
   375 				dst[0] = src[0];
   376 				dst[1] = src[1];
   377 				src += crb_pitch;
   378 				dst += 2;
   379 			}
   380 			lum_src += 16 / 4;
   381 			Cb_src += 8 / 4;
   382 			Cr_src += 8 / 4;
   383 		}
   384 		lum += lum_pitch * 16;
   385 		Cr += crb_pitch * 8;
   386 		Cb += crb_pitch * 8;
   387 	}
   388 
   389 	/* Send the macroblock data to the IPU */
   390 #ifdef DEBUG_YUV
   391 	fprintf(stderr, "Sending data to IPU..\n");
   392 #endif
   393 	packet.ptr = hwdata->ipu_imem;
   394 	packet.len = hwdata->macroblocks * (16 * 16 + 8 * 8 + 8 * 8);
   395 	ioctl(hwdata->ipu_fd, PS2IOC_SENDA, &packet);
   396 
   397 	/* Trigger the DMA to the IPU for conversion */
   398 #ifdef DEBUG_YUV
   399 	fprintf(stderr, "Trigging conversion command\n");
   400 #endif
   401 	cmd = (7 << 28) + hwdata->macroblocks;
   402 	if ( screen_image.psm == PS2_GS_PSMCT16 ) {
   403 		cmd += (1 << 27) +	/* Output RGB 555 */
   404 		       (1 << 26);	/* Dither output */
   405 	}
   406 	ioctl(hwdata->ipu_fd, PS2IOC_SIPUCMD, &cmd);
   407 
   408 	/* Retrieve the converted image from the IPU */
   409 #ifdef DEBUG_YUV
   410 	fprintf(stderr, "Retrieving data from IPU..\n");
   411 #endif
   412 	packet.ptr = hwdata->ipu_omem;
   413 	packet.len = overlay->w * overlay->h *
   414 	             this->screen->format->BytesPerPixel;
   415 	ioctl(hwdata->ipu_fd, PS2IOC_RECV, &packet);
   416 
   417 #ifdef DEBUG_YUV
   418 	fprintf(stderr, "Copying image to screen..\n");
   419 #endif
   420 	/* Wait for previous DMA to complete */
   421 	ioctl(console_fd, PS2IOC_SENDQCT, 1);
   422 
   423 	/* Send the current image to the screen and scale it */
   424 	screen = this->screen;
   425 	x = (unsigned int)dstrect->x;
   426 	y = (unsigned int)dstrect->y;
   427 	if ( screen->offset ) {
   428 		x += (screen->offset % screen->pitch) /
   429 		     screen->format->BytesPerPixel;
   430 		y += (screen->offset / screen->pitch);
   431 	}
   432 	y += screen_image.y;
   433 	*hwdata->stretch_x1y1 = (x * 16) + ((y * 16) << 16);
   434 	x += (unsigned int)dstrect->w;
   435 	y += (unsigned int)dstrect->h;
   436 	*hwdata->stretch_x2y2 = (x * 16) + ((y * 16) << 16);
   437 	return ioctl(console_fd, PS2IOC_SENDL, &hwdata->plist);
   438 }
   439 
   440 void GS_FreeYUVOverlay(_THIS, SDL_Overlay *overlay)
   441 {
   442 	struct private_yuvhwdata *hwdata;
   443 
   444 	hwdata = overlay->hwdata;
   445 	if ( hwdata ) {
   446 		if ( hwdata->ipu_fd >= 0 ) {
   447 			close(hwdata->ipu_fd);
   448 		}
   449 		if ( hwdata->dma_mem ) {
   450 			munmap(hwdata->dma_mem, hwdata->dma_len);
   451 		}
   452 		if ( hwdata->plist.packet ) {
   453 			SDL_free(hwdata->plist.packet);
   454 		}
   455 		if ( hwdata->pixels ) {
   456 			SDL_free(hwdata->pixels);
   457 		}
   458 		SDL_free(hwdata);
   459 	}
   460 }