PS3 Linux is no more...
authorSam Lantinga <slouken@libsdl.org>
Wed, 19 Jan 2011 22:25:40 -0800
changeset 5048187d7d446306
parent 5047 edaf3e364a05
child 5049 28003ba91f57
PS3 Linux is no more...
Makefile.in
README.PS3
configure.in
include/SDL_config.h.in
src/video/SDL_sysvideo.h
src/video/SDL_video.c
src/video/ps3/SDL_ps3events.c
src/video/ps3/SDL_ps3events_c.h
src/video/ps3/SDL_ps3modes.c
src/video/ps3/SDL_ps3modes_c.h
src/video/ps3/SDL_ps3render.c
src/video/ps3/SDL_ps3render_c.h
src/video/ps3/SDL_ps3spe.c
src/video/ps3/SDL_ps3spe_c.h
src/video/ps3/SDL_ps3video.c
src/video/ps3/SDL_ps3video.h
src/video/ps3/spulibs/Makefile
src/video/ps3/spulibs/bilin_scaler.c
src/video/ps3/spulibs/fb_writer.c
src/video/ps3/spulibs/spu_common.h
src/video/ps3/spulibs/yuv2rgb.c
     1.1 --- a/Makefile.in	Wed Jan 19 22:21:31 2011 -0800
     1.2 +++ b/Makefile.in	Wed Jan 19 22:25:40 2011 -0800
     1.3 @@ -37,11 +37,6 @@
     1.4  SDLMAIN_TARGET = libSDLmain.a
     1.5  SDLMAIN_OBJECTS = @SDLMAIN_OBJECTS@
     1.6  
     1.7 -# PS3 SPU programs
     1.8 -SPU_GCC = @SPU_GCC@
     1.9 -EMBEDSPU = @EMBEDSPU@
    1.10 -#include $(srcdir)/src/video/ps3/spulibs/Makefile
    1.11 -
    1.12  DIST = acinclude Android.mk autogen.sh Borland.html Borland.zip BUGS build-scripts configure configure.in COPYING CREDITS include INSTALL Makefile.minimal Makefile.in README* sdl-config.in sdl.m4 sdl.pc.in SDL.spec SDL.spec.in src test TODO VisualC.html VisualC VisualCE Watcom-Win32.zip WhatsNew Xcode Xcode-iPhoneOS
    1.13  
    1.14  HDRS = \
     2.1 --- a/README.PS3	Wed Jan 19 22:21:31 2011 -0800
     2.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.3 @@ -1,35 +0,0 @@
     2.4 -
     2.5 -SDL on Sony Playstation3
     2.6 -------------------------
     2.7 -
     2.8 -Installation:
     2.9 -  First, you have to install the Cell SDK
    2.10 -  - Download the Cell SDK installer RPM and ISO images to
    2.11 -    a temporary directory such as /tmp/cellsdk.
    2.12 -  - Mount the image: mount -o loop CellSDK-Devel-Fedora_3.1.0.0.0.iso /tmp/cellsdk
    2.13 -  - Install the SDK installer: rpm -ivh cell-install-3.1.0-0.0.noarch.rpm
    2.14 -  - Install the SDK: cd /opt/cell && ./cellsdk --iso /tmp/cellsdkiso install
    2.15 -
    2.16 -  You'll than need to install the SPU-libs
    2.17 -  - Run make ps3-libs && make ps3libs-install
    2.18 -
    2.19 -  Finally, install SDL
    2.20 -  - Go to SDL-1.2/ and build SDL like any other GNU style package.
    2.21 -  e.g.
    2.22 -    - Build the configure-script with ./autogen.sh
    2.23 -    - Configure SDL for your needs: ./configure --enable-video-ps3 ...
    2.24 -    - Build and install it: make && make install
    2.25 -
    2.26 -
    2.27 -Todo:
    2.28 -  - Mouse & Keyboard support
    2.29 -  - On SPU-side the current scaler and converter restrictions are:
    2.30 -    - resolution has to be a multiple of 8 (will work on that)
    2.31 -    - scaler/converter only supports the YV12 and IYUV format
    2.32 -    - the scaler works only bilinear (lanzos would be nice)
    2.33 -  - Optimize the SPU-program handling on the PPE side
    2.34 -  - Integrate spumedia in SDL
    2.35 -
    2.36 -Have fun!
    2.37 -  Dirk Herrendoerfer <d.herrendoerfer [at] de [dot ibm [dot] com>
    2.38 -
     3.1 --- a/configure.in	Wed Jan 19 22:21:31 2011 -0800
     3.2 +++ b/configure.in	Wed Jan 19 22:25:40 2011 -0800
     3.3 @@ -1546,46 +1546,6 @@
     3.4      fi
     3.5  }
     3.6  
     3.7 -dnl See if we're running on PlayStation 3 Cell hardware
     3.8 -CheckPS3()
     3.9 -{
    3.10 -  AC_ARG_ENABLE(video-ps3,
    3.11 -                AC_HELP_STRING([--enable-video-ps3], [use PlayStation 3 Cell driver [[default=yes]]]),
    3.12 -                , enable_video_ps3=yes)
    3.13 -  if test x$enable_video = xyes -a x$enable_video_ps3 = xyes; then 
    3.14 -    video_ps3=no
    3.15 -    AC_CHECK_HEADER([linux/fb.h])
    3.16 -    AC_CHECK_HEADER([asm/ps3fb.h], [have_ps3fb_hdr=yes], [],
    3.17 -            [#ifndef _LINUX_TYPES_H
    3.18 -                #include <linux/types.h>
    3.19 -            #endif])
    3.20 -    AC_CHECK_HEADER([libspe2.h], have_libspe2_hdr=yes)
    3.21 -    AC_CHECK_LIB([spe2], spe_context_create, have_spe2_lib=yes)
    3.22 -
    3.23 -    AC_CHECK_PROGS(SPU_GCC, [spu-gcc])
    3.24 -    AC_CHECK_PROGS(EMBEDSPU, [embedspu])
    3.25 -
    3.26 -    have_spu_libs=yes
    3.27 -    AC_CHECK_LIB([fb_writer_spu], [main], [], [have_spu_libs=no])
    3.28 -    AC_CHECK_LIB([yuv2rgb_spu], [main], [], [have_spu_libs=no])
    3.29 -    AC_CHECK_LIB([bilin_scaler_spu], [main], [], [have_spu_libs=no])
    3.30 -    if test x$have_ps3fb_hdr = xyes -a x$have_libspe2_hdr = xyes -a x$have_spe2_lib = xyes -a "$SPU_GCC" -a "$EMBEDSPU"; then
    3.31 -        AC_DEFINE(SDL_VIDEO_DRIVER_PS3)
    3.32 -        video_ps3=yes
    3.33 -        have_video=yes
    3.34 -        SOURCES="$SOURCES $srcdir/src/video/ps3/*.c"
    3.35 -        EXTRA_CFLAGS="$EXTRA_CFLAGS -I/opt/cell/sdk/usr/include"
    3.36 -        EXTRA_LDFLAGS="$EXTRA_LDFLAGS -L/opt/cell/sdk/usr/lib -lspe2 -lfb_writer_spu -lyuv2rgb_spu -lbilin_scaler_spu"
    3.37 -
    3.38 -        if test x$have_spu_libs = xno; then 
    3.39 -              AC_MSG_WARN([ps3libs missing, please run make ps3libs])
    3.40 -        fi
    3.41 -    fi
    3.42 -    AC_MSG_CHECKING([for PlayStation 3 Cell support])
    3.43 -    AC_MSG_RESULT([$video_ps3])
    3.44 -  fi
    3.45 -}
    3.46 -
    3.47  dnl rcg04172001 Set up the Null video driver.
    3.48  CheckDummyVideo()
    3.49  {
    3.50 @@ -2245,7 +2205,6 @@
    3.51          CheckX11
    3.52          CheckDirectFB
    3.53          CheckFusionSound
    3.54 -        CheckPS3
    3.55          CheckOpenGLX11
    3.56          CheckInputEvents
    3.57          CheckTslib
     4.1 --- a/include/SDL_config.h.in	Wed Jan 19 22:21:31 2011 -0800
     4.2 +++ b/include/SDL_config.h.in	Wed Jan 19 22:25:40 2011 -0800
     4.3 @@ -264,7 +264,6 @@
     4.4  #undef SDL_VIDEO_DRIVER_NDS
     4.5  #undef SDL_VIDEO_DRIVER_PHOTON
     4.6  #undef SDL_VIDEO_DRIVER_QNXGF
     4.7 -#undef SDL_VIDEO_DRIVER_PS3
     4.8  #undef SDL_VIDEO_DRIVER_RISCOS
     4.9  #undef SDL_VIDEO_DRIVER_WIN32
    4.10  #undef SDL_VIDEO_DRIVER_X11
     5.1 --- a/src/video/SDL_sysvideo.h	Wed Jan 19 22:21:31 2011 -0800
     5.2 +++ b/src/video/SDL_sysvideo.h	Wed Jan 19 22:25:40 2011 -0800
     5.3 @@ -411,9 +411,6 @@
     5.4  #if SDL_VIDEO_DRIVER_DIRECTFB
     5.5  extern VideoBootStrap DirectFB_bootstrap;
     5.6  #endif
     5.7 -#if SDL_VIDEO_DRIVER_PS3
     5.8 -extern VideoBootStrap PS3_bootstrap;
     5.9 -#endif
    5.10  #if SDL_VIDEO_DRIVER_WIN32
    5.11  extern VideoBootStrap WIN32_bootstrap;
    5.12  #endif
     6.1 --- a/src/video/SDL_video.c	Wed Jan 19 22:21:31 2011 -0800
     6.2 +++ b/src/video/SDL_video.c	Wed Jan 19 22:25:40 2011 -0800
     6.3 @@ -65,9 +65,6 @@
     6.4  #if SDL_VIDEO_DRIVER_DIRECTFB
     6.5      &DirectFB_bootstrap,
     6.6  #endif
     6.7 -#if SDL_VIDEO_DRIVER_PS3
     6.8 -    &PS3_bootstrap,
     6.9 -#endif
    6.10  #if SDL_VIDEO_DRIVER_WIN32
    6.11      &WIN32_bootstrap,
    6.12  #endif
     7.1 --- a/src/video/ps3/SDL_ps3events.c	Wed Jan 19 22:21:31 2011 -0800
     7.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     7.3 @@ -1,36 +0,0 @@
     7.4 -/*
     7.5 -    SDL - Simple DirectMedia Layer
     7.6 -    Copyright (C) 1997-2010 Sam Lantinga
     7.7 -
     7.8 -    This library is free software; you can redistribute it and/or
     7.9 -    modify it under the terms of the GNU Lesser General Public
    7.10 -    License as published by the Free Software Foundation; either
    7.11 -    version 2.1 of the License, or (at your option) any later version.
    7.12 -
    7.13 -    This library is distributed in the hope that it will be useful,
    7.14 -    but WITHOUT ANY WARRANTY; without even the implied warranty of
    7.15 -    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    7.16 -    Lesser General Public License for more details.
    7.17 -
    7.18 -    You should have received a copy of the GNU Lesser General Public
    7.19 -    License along with this library; if not, write to the Free Software
    7.20 -    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
    7.21 -
    7.22 -    Sam Lantinga
    7.23 -    slouken@libsdl.org
    7.24 -*/
    7.25 -#include "SDL_config.h"
    7.26 -
    7.27 -#include "../../events/SDL_sysevents.h"
    7.28 -#include "../../events/SDL_events_c.h"
    7.29 -
    7.30 -#include "SDL_ps3video.h"
    7.31 -#include "SDL_ps3events_c.h"
    7.32 -
    7.33 -void
    7.34 -PS3_PumpEvents(_THIS)
    7.35 -{
    7.36 -    /* do nothing. */
    7.37 -}
    7.38 -
    7.39 -/* vi: set ts=4 sw=4 expandtab: */
     8.1 --- a/src/video/ps3/SDL_ps3events_c.h	Wed Jan 19 22:21:31 2011 -0800
     8.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     8.3 @@ -1,28 +0,0 @@
     8.4 -/*
     8.5 -    SDL - Simple DirectMedia Layer
     8.6 -    Copyright (C) 1997-2010 Sam Lantinga
     8.7 -
     8.8 -    This library is free software; you can redistribute it and/or
     8.9 -    modify it under the terms of the GNU Lesser General Public
    8.10 -    License as published by the Free Software Foundation; either
    8.11 -    version 2.1 of the License, or (at your option) any later version.
    8.12 -
    8.13 -    This library is distributed in the hope that it will be useful,
    8.14 -    but WITHOUT ANY WARRANTY; without even the implied warranty of
    8.15 -    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    8.16 -    Lesser General Public License for more details.
    8.17 -
    8.18 -    You should have received a copy of the GNU Lesser General Public
    8.19 -    License along with this library; if not, write to the Free Software
    8.20 -    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
    8.21 -
    8.22 -    Sam Lantinga
    8.23 -    slouken@libsdl.org
    8.24 -*/
    8.25 -#include "SDL_config.h"
    8.26 -
    8.27 -#include "SDL_ps3video.h"
    8.28 -
    8.29 -extern void PS3_PumpEvents(_THIS);
    8.30 -
    8.31 -/* vi: set ts=4 sw=4 expandtab: */
     9.1 --- a/src/video/ps3/SDL_ps3modes.c	Wed Jan 19 22:21:31 2011 -0800
     9.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     9.3 @@ -1,143 +0,0 @@
     9.4 -/*
     9.5 -    SDL - Simple DirectMedia Layer
     9.6 -    Copyright (C) 1997-2010 Sam Lantinga
     9.7 -
     9.8 -    This library is free software; you can redistribute it and/or
     9.9 -    modify it under the terms of the GNU Lesser General Public
    9.10 -    License as published by the Free Software Foundation; either
    9.11 -    version 2.1 of the License, or (at your option) any later version.
    9.12 -
    9.13 -    This library is distributed in the hope that it will be useful,
    9.14 -    but WITHOUT ANY WARRANTY; without even the implied warranty of
    9.15 -    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    9.16 -    Lesser General Public License for more details.
    9.17 -
    9.18 -    You should have received a copy of the GNU Lesser General Public
    9.19 -    License along with this library; if not, write to the Free Software
    9.20 -    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
    9.21 -
    9.22 -    Sam Lantinga
    9.23 -    slouken@libsdl.org
    9.24 -*/
    9.25 -#include "SDL_config.h"
    9.26 -
    9.27 -#include "SDL_ps3video.h"
    9.28 -
    9.29 -void
    9.30 -PS3_InitModes(_THIS)
    9.31 -{
    9.32 -    deprintf(1, "+PS3_InitModes()\n");
    9.33 -    SDL_VideoDisplay display;
    9.34 -    SDL_VideoData *data = (SDL_VideoData *) _this->driverdata;
    9.35 -    SDL_DisplayMode mode;
    9.36 -    PS3_DisplayModeData *modedata;
    9.37 -    unsigned long vid = 0;
    9.38 -
    9.39 -    modedata = (PS3_DisplayModeData *) SDL_malloc(sizeof(*modedata));
    9.40 -    if (!modedata) {
    9.41 -        return;
    9.42 -    }
    9.43 -
    9.44 -    /* Setting up the DisplayMode based on current settings */
    9.45 -    struct ps3fb_ioctl_res res;
    9.46 -    if (ioctl(data->fbdev, PS3FB_IOCTL_SCREENINFO, &res)) {
    9.47 -        SDL_SetError("Can't get PS3FB_IOCTL_SCREENINFO");
    9.48 -    }
    9.49 -    mode.format = SDL_PIXELFORMAT_RGB888;
    9.50 -    mode.refresh_rate = 0;
    9.51 -    mode.w = res.xres;
    9.52 -    mode.h = res.yres;
    9.53 -
    9.54 -    /* Setting up driver specific mode data,
    9.55 -     * Get the current ps3 specific videmode number */
    9.56 -    if (ioctl(data->fbdev, PS3FB_IOCTL_GETMODE, (unsigned long)&vid)) {
    9.57 -        SDL_SetError("Can't get PS3FB_IOCTL_GETMODE");
    9.58 -    }
    9.59 -    deprintf(2, "PS3FB_IOCTL_GETMODE = %u\n", vid);
    9.60 -    modedata->mode = vid;
    9.61 -    mode.driverdata = modedata;
    9.62 -
    9.63 -    /* Set display's videomode and add it */
    9.64 -    SDL_zero(display);
    9.65 -    display.desktop_mode = mode;
    9.66 -    display.current_mode = mode;
    9.67 -
    9.68 -    SDL_AddVideoDisplay(&display);
    9.69 -    deprintf(1, "-PS3_InitModes()\n");
    9.70 -}
    9.71 -
    9.72 -/* DisplayModes available on the PS3 */
    9.73 -static SDL_DisplayMode ps3fb_modedb[] = {
    9.74 -    /* VESA */
    9.75 -    {SDL_PIXELFORMAT_RGB888, 1280, 768, 0, NULL}, // WXGA
    9.76 -    {SDL_PIXELFORMAT_RGB888, 1280, 1024, 0, NULL}, // SXGA
    9.77 -    {SDL_PIXELFORMAT_RGB888, 1920, 1200, 0, NULL}, // WUXGA
    9.78 -    /* Native resolutions (progressive, "fullscreen") */
    9.79 -    {SDL_PIXELFORMAT_RGB888, 720, 480, 0, NULL}, // 480p
    9.80 -    {SDL_PIXELFORMAT_RGB888, 1280, 720, 0, NULL}, // 720p
    9.81 -    {SDL_PIXELFORMAT_RGB888, 1920, 1080, 0, NULL} // 1080p
    9.82 -};
    9.83 -
    9.84 -/* PS3 videomode number according to ps3fb_modedb */
    9.85 -static PS3_DisplayModeData ps3fb_data[] = {
    9.86 -    {11}, {12}, {13}, {130}, {131}, {133}, 
    9.87 -};
    9.88 -
    9.89 -void
    9.90 -PS3_GetDisplayModes(_THIS, SDL_VideoDisplay * display)
    9.91 -{
    9.92 -    deprintf(1, "+PS3_GetDisplayModes()\n");
    9.93 -    SDL_DisplayMode mode;
    9.94 -    unsigned int nummodes;
    9.95 -
    9.96 -    nummodes = sizeof(ps3fb_modedb) / sizeof(SDL_DisplayMode);
    9.97 -
    9.98 -    int n;
    9.99 -    for (n=0; n<nummodes; ++n) {
   9.100 -        /* Get driver specific mode data */
   9.101 -        ps3fb_modedb[n].driverdata = &ps3fb_data[n];
   9.102 -
   9.103 -        /* Add DisplayMode to list */
   9.104 -        deprintf(2, "Adding resolution %u x %u\n", ps3fb_modedb[n].w, ps3fb_modedb[n].h);
   9.105 -        SDL_AddDisplayMode(display, &ps3fb_modedb[n]);
   9.106 -    }
   9.107 -    deprintf(1, "-PS3_GetDisplayModes()\n");
   9.108 -}
   9.109 -
   9.110 -int
   9.111 -PS3_SetDisplayMode(_THIS, SDL_VideoDisplay * display, SDL_DisplayMode * mode)
   9.112 -{
   9.113 -    deprintf(1, "+PS3_SetDisplayMode()\n");
   9.114 -    SDL_VideoData *data = (SDL_VideoData *) _this->driverdata;
   9.115 -    PS3_DisplayModeData *dispdata = (PS3_DisplayModeData *) mode->driverdata;
   9.116 -
   9.117 -    /* Set the new DisplayMode */
   9.118 -    deprintf(2, "Setting PS3FB_MODE to %u\n", dispdata->mode);
   9.119 -    if (ioctl(data->fbdev, PS3FB_IOCTL_SETMODE, (unsigned long)&dispdata->mode)) {
   9.120 -        deprintf(2, "Could not set PS3FB_MODE\n");
   9.121 -        SDL_SetError("Could not set PS3FB_MODE\n");
   9.122 -        return -1;
   9.123 -    }
   9.124 -
   9.125 -    deprintf(1, "-PS3_SetDisplayMode()\n");
   9.126 -    return 0;
   9.127 -}
   9.128 -
   9.129 -void
   9.130 -PS3_QuitModes(_THIS)
   9.131 -{
   9.132 -    deprintf(1, "+PS3_QuitModes()\n");
   9.133 -
   9.134 -    /* There was no mem allocated for driverdata */
   9.135 -    int i, j;
   9.136 -    for (i = 0; i < SDL_GetNumVideoDisplays(); ++i) {
   9.137 -        SDL_VideoDisplay *display = SDL_GetVideoDisplay(i);
   9.138 -        for (j = display->num_display_modes; j--;) {
   9.139 -            display->display_modes[j].driverdata = NULL;
   9.140 -        }
   9.141 -    }
   9.142 -
   9.143 -    deprintf(1, "-PS3_QuitModes()\n");
   9.144 -}
   9.145 -
   9.146 -/* vi: set ts=4 sw=4 expandtab: */
    10.1 --- a/src/video/ps3/SDL_ps3modes_c.h	Wed Jan 19 22:21:31 2011 -0800
    10.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    10.3 @@ -1,34 +0,0 @@
    10.4 -/*
    10.5 -    SDL - Simple DirectMedia Layer
    10.6 -    Copyright (C) 1997-2010 Sam Lantinga
    10.7 -
    10.8 -    This library is free software; you can redistribute it and/or
    10.9 -    modify it under the terms of the GNU Lesser General Public
   10.10 -    License as published by the Free Software Foundation; either
   10.11 -    version 2.1 of the License, or (at your option) any later version.
   10.12 -
   10.13 -    This library is distributed in the hope that it will be useful,
   10.14 -    but WITHOUT ANY WARRANTY; without even the implied warranty of
   10.15 -    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   10.16 -    Lesser General Public License for more details.
   10.17 -
   10.18 -    You should have received a copy of the GNU Lesser General Public
   10.19 -    License along with this library; if not, write to the Free Software
   10.20 -    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
   10.21 -
   10.22 -    Sam Lantinga
   10.23 -    slouken@libsdl.org
   10.24 -*/
   10.25 -#include "SDL_config.h"
   10.26 -
   10.27 -#ifndef _SDL_ps3modes_h
   10.28 -#define _SDL_ps3modes_h
   10.29 -
   10.30 -extern void PS3_InitModes(_THIS);
   10.31 -extern void PS3_GetDisplayModes(_THIS, SDL_VideoDisplay * display);
   10.32 -extern int PS3_SetDisplayMode(_THIS, SDL_VideoDisplay * display, SDL_DisplayMode * mode);
   10.33 -extern void PS3_QuitModes(_THIS);
   10.34 -
   10.35 -#endif /* SDL_ps3modes_h */
   10.36 -
   10.37 -/* vi: set ts=4 sw=4 expandtab: */
    11.1 --- a/src/video/ps3/SDL_ps3render.c	Wed Jan 19 22:21:31 2011 -0800
    11.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    11.3 @@ -1,747 +0,0 @@
    11.4 -/*
    11.5 -    SDL - Simple DirectMedia Layer
    11.6 -    Copyright (C) 1997-2010 Sam Lantinga
    11.7 -
    11.8 -    This library is free software; you can redistribute it and/or
    11.9 -    modify it under the terms of the GNU Lesser General Public
   11.10 -    License as published by the Free Software Foundation; either
   11.11 -    version 2.1 of the License, or (at your option) any later version.
   11.12 -
   11.13 -    This library is distributed in the hope that it will be useful,
   11.14 -    but WITHOUT ANY WARRANTY; without even the implied warranty of
   11.15 -    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   11.16 -    Lesser General Public License for more details.
   11.17 -
   11.18 -    You should have received a copy of the GNU Lesser General Public
   11.19 -    License along with this library; if not, write to the Free Software
   11.20 -    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
   11.21 -
   11.22 -    Sam Lantinga
   11.23 -    slouken@libsdl.org
   11.24 -*/
   11.25 -#include "SDL_config.h"
   11.26 -
   11.27 -#include "SDL_video.h"
   11.28 -#include "../SDL_sysvideo.h"
   11.29 -#include "../SDL_yuv_sw_c.h"
   11.30 -#include "../SDL_renderer_sw.h"
   11.31 -
   11.32 -#include "SDL_ps3video.h"
   11.33 -#include "SDL_ps3spe_c.h"
   11.34 -
   11.35 -#include <fcntl.h>
   11.36 -#include <stdlib.h>
   11.37 -#include <sys/ioctl.h>
   11.38 -#include <linux/kd.h>
   11.39 -#include <linux/fb.h>
   11.40 -#include <sys/mman.h>
   11.41 -#include <asm/ps3fb.h>
   11.42 -
   11.43 -
   11.44 -/* Stores the executable name */
   11.45 -extern spe_program_handle_t yuv2rgb_spu;
   11.46 -extern spe_program_handle_t bilin_scaler_spu;
   11.47 -
   11.48 -/* SDL surface based renderer implementation */
   11.49 -static SDL_Renderer *SDL_PS3_CreateRenderer(SDL_Window * window,
   11.50 -                                              Uint32 flags);
   11.51 -static int SDL_PS3_DisplayModeChanged(SDL_Renderer * renderer);
   11.52 -static int SDL_PS3_ActivateRenderer(SDL_Renderer * renderer);
   11.53 -static int SDL_PS3_RenderPoint(SDL_Renderer * renderer, int x, int y);
   11.54 -static int SDL_PS3_RenderLine(SDL_Renderer * renderer, int x1, int y1,
   11.55 -                                int x2, int y2);
   11.56 -static int SDL_PS3_RenderFill(SDL_Renderer * renderer,
   11.57 -                                const SDL_Rect * rect);
   11.58 -static int SDL_PS3_RenderCopy(SDL_Renderer * renderer,
   11.59 -                                SDL_Texture * texture,
   11.60 -                                const SDL_Rect * srcrect,
   11.61 -                                const SDL_Rect * dstrect);
   11.62 -static void SDL_PS3_RenderPresent(SDL_Renderer * renderer);
   11.63 -static void SDL_PS3_DestroyRenderer(SDL_Renderer * renderer);
   11.64 -
   11.65 -/* Texture */
   11.66 -static int PS3_CreateTexture(SDL_Renderer * renderer, SDL_Texture * texture);
   11.67 -static int PS3_QueryTexturePixels(SDL_Renderer * renderer, SDL_Texture * texture, void **pixels, int *pitch);
   11.68 -static int PS3_UpdateTexture(SDL_Renderer * renderer, SDL_Texture * texture, const SDL_Rect * rect, const void *pixels, int pitch);
   11.69 -static int PS3_LockTexture(SDL_Renderer * renderer, SDL_Texture * texture, const SDL_Rect * rect, int markDirty, void **pixels, int *pitch);
   11.70 -static void PS3_UnlockTexture(SDL_Renderer * renderer, SDL_Texture * texture);
   11.71 -static void PS3_DestroyTexture(SDL_Renderer * renderer, SDL_Texture * texture);
   11.72 -
   11.73 -
   11.74 -SDL_RenderDriver SDL_PS3_RenderDriver = {
   11.75 -    SDL_PS3_CreateRenderer,
   11.76 -    {
   11.77 -     "ps3",
   11.78 -     (SDL_RENDERER_SINGLEBUFFER | SDL_RENDERER_PRESENTVSYNC |
   11.79 -      SDL_RENDERER_PRESENTFLIP2 | SDL_RENDERER_PRESENTDISCARD |
   11.80 -      SDL_RENDERER_ACCELERATED),
   11.81 -     (SDL_TEXTUREMODULATE_NONE),
   11.82 -     (SDL_BLENDMODE_NONE),
   11.83 -     /* We use bilinear scaling on the SPE for YV12 & IYUV
   11.84 -      * (width and height % 8 = 0) */
   11.85 -     (SDL_SCALEMODE_SLOW)
   11.86 -     }
   11.87 -};
   11.88 -
   11.89 -typedef struct
   11.90 -{
   11.91 -    int current_screen;
   11.92 -    SDL_Surface *screen;
   11.93 -    SDL_VideoDisplay *display;
   11.94 -    /* adress of the centered image in the framebuffer (double buffered) */
   11.95 -    uint8_t *center[2];
   11.96 -
   11.97 -    /* width of input (bounded by writeable width) */
   11.98 -    unsigned int bounded_width;
   11.99 -    /* height of input (bounded by writeable height) */
  11.100 -    unsigned int bounded_height;
  11.101 -    /* offset from the left side (used for centering) */
  11.102 -    unsigned int offset_left;
  11.103 -    /* offset from the upper side (used for centering) */
  11.104 -    unsigned int offset_top;
  11.105 -    /* width of screen which is writeable */
  11.106 -    unsigned int wr_width;
  11.107 -    /* width of screen which is writeable */
  11.108 -    unsigned int wr_height;
  11.109 -    /* size of a screen line: width * bpp/8 */
  11.110 -    unsigned int line_length;
  11.111 -
  11.112 -    /* Is the kernels fb size bigger than ~12MB
  11.113 -     * double buffering will work for 1080p */
  11.114 -    unsigned int double_buffering;
  11.115 -
  11.116 -    /* SPE threading stuff */
  11.117 -    spu_data_t *converter_thread_data;
  11.118 -    spu_data_t *scaler_thread_data;
  11.119 -
  11.120 -    /* YUV converting transfer data */
  11.121 -    volatile struct yuv2rgb_parms_t * converter_parms __attribute__((aligned(128)));
  11.122 -    /* Scaler transfer data */
  11.123 -    volatile struct scale_parms_t * scaler_parms __attribute__((aligned(128)));
  11.124 -} SDL_PS3_RenderData;
  11.125 -
  11.126 -typedef struct
  11.127 -{
  11.128 -    int pitch;
  11.129 -    /* Image data */
  11.130 -    volatile void *pixels;
  11.131 -    /* Use software renderer for not supported formats */
  11.132 -    SDL_SW_YUVTexture *yuv;
  11.133 -} PS3_TextureData;
  11.134 -
  11.135 -SDL_Renderer *
  11.136 -SDL_PS3_CreateRenderer(SDL_Window * window, Uint32 flags)
  11.137 -{
  11.138 -    deprintf(1, "+SDL_PS3_CreateRenderer()\n");
  11.139 -    SDL_VideoDisplay *display = window->display;
  11.140 -    SDL_DisplayMode *displayMode = &display->current_mode;
  11.141 -    SDL_VideoData *devdata = display->device->driverdata;
  11.142 -    SDL_Renderer *renderer;
  11.143 -    SDL_PS3_RenderData *data;
  11.144 -    struct ps3fb_ioctl_res res;
  11.145 -    int i, n;
  11.146 -    int bpp;
  11.147 -    Uint32 Rmask, Gmask, Bmask, Amask;
  11.148 -
  11.149 -    if (!SDL_PixelFormatEnumToMasks
  11.150 -        (displayMode->format, &bpp, &Rmask, &Gmask, &Bmask, &Amask)) {
  11.151 -        SDL_SetError("Unknown display format");
  11.152 -        return NULL;
  11.153 -    }
  11.154 -
  11.155 -    renderer = (SDL_Renderer *) SDL_calloc(1, sizeof(*renderer));
  11.156 -    if (!renderer) {
  11.157 -        SDL_OutOfMemory();
  11.158 -        return NULL;
  11.159 -    }
  11.160 -
  11.161 -    data = (SDL_PS3_RenderData *) SDL_malloc(sizeof(*data));
  11.162 -    if (!data) {
  11.163 -        SDL_PS3_DestroyRenderer(renderer);
  11.164 -        SDL_OutOfMemory();
  11.165 -        return NULL;
  11.166 -    }
  11.167 -    SDL_zerop(data);
  11.168 -
  11.169 -    renderer->CreateTexture = PS3_CreateTexture;
  11.170 -    renderer->DestroyTexture = PS3_DestroyTexture;
  11.171 -    renderer->QueryTexturePixels = PS3_QueryTexturePixels;
  11.172 -    renderer->UpdateTexture = PS3_UpdateTexture;
  11.173 -    renderer->LockTexture = PS3_LockTexture;
  11.174 -    renderer->UnlockTexture = PS3_UnlockTexture;
  11.175 -    renderer->ActivateRenderer = SDL_PS3_ActivateRenderer;
  11.176 -    renderer->DisplayModeChanged = SDL_PS3_DisplayModeChanged;
  11.177 -    renderer->RenderPoint = SDL_PS3_RenderPoint;
  11.178 -    renderer->RenderLine = SDL_PS3_RenderLine;
  11.179 -    renderer->RenderFill = SDL_PS3_RenderFill;
  11.180 -    renderer->RenderCopy = SDL_PS3_RenderCopy;
  11.181 -    renderer->RenderPresent = SDL_PS3_RenderPresent;
  11.182 -    renderer->DestroyRenderer = SDL_PS3_DestroyRenderer;
  11.183 -    renderer->info.name = SDL_PS3_RenderDriver.info.name;
  11.184 -    renderer->info.flags = 0;
  11.185 -    renderer->window = window;
  11.186 -    renderer->driverdata = data;
  11.187 -
  11.188 -    deprintf(1, "window->w = %u\n", window->w);
  11.189 -    deprintf(1, "window->h = %u\n", window->h);
  11.190 -
  11.191 -    data->double_buffering = 0;
  11.192 -
  11.193 -    /* Get ps3 screeninfo */
  11.194 -    if (ioctl(devdata->fbdev, PS3FB_IOCTL_SCREENINFO, (unsigned long)&res) < 0) {
  11.195 -        SDL_SetError("[PS3] PS3FB_IOCTL_SCREENINFO failed");
  11.196 -    }
  11.197 -    deprintf(2, "res.num_frames = %d\n", res.num_frames);
  11.198 -
  11.199 -    /* Only use double buffering if enough fb memory is available */
  11.200 -    if (res.num_frames > 1) {
  11.201 -        renderer->info.flags |= SDL_RENDERER_PRESENTFLIP2;
  11.202 -        n = 2;
  11.203 -        data->double_buffering = 1;
  11.204 -    } else {
  11.205 -        renderer->info.flags |= SDL_RENDERER_PRESENTCOPY;
  11.206 -        n = 1;
  11.207 -    }
  11.208 -
  11.209 -    data->screen =
  11.210 -        SDL_CreateRGBSurface(0, window->w, window->h, bpp, Rmask, Gmask,
  11.211 -                             Bmask, Amask);
  11.212 -    if (!data->screen) {
  11.213 -        SDL_PS3_DestroyRenderer(renderer);
  11.214 -        return NULL;
  11.215 -    }
  11.216 -    /* Allocate aligned memory for pixels */
  11.217 -    SDL_free(data->screen->pixels);
  11.218 -    data->screen->pixels = (void *)memalign(16, data->screen->h * data->screen->pitch);
  11.219 -    if (!data->screen->pixels) {
  11.220 -        SDL_FreeSurface(data->screen);
  11.221 -        SDL_OutOfMemory();
  11.222 -        return NULL;
  11.223 -    }
  11.224 -    SDL_memset(data->screen->pixels, 0, data->screen->h * data->screen->pitch);
  11.225 -    SDL_SetSurfacePalette(data->screen, display->palette);
  11.226 -
  11.227 -    data->current_screen = 0;
  11.228 -
  11.229 -    /* Create SPU parms structure */
  11.230 -    data->converter_parms = (struct yuv2rgb_parms_t *) memalign(16, sizeof(struct yuv2rgb_parms_t));
  11.231 -    data->scaler_parms = (struct scale_parms_t *) memalign(16, sizeof(struct scale_parms_t));
  11.232 -    if (data->converter_parms == NULL || data->scaler_parms == NULL) {
  11.233 -        SDL_PS3_DestroyRenderer(renderer);
  11.234 -        SDL_OutOfMemory();
  11.235 -        return NULL;
  11.236 -    }
  11.237 -
  11.238 -    /* Set up the SPE threading data */
  11.239 -    data->converter_thread_data = (spu_data_t *) malloc(sizeof(spu_data_t));
  11.240 -    data->scaler_thread_data = (spu_data_t *) malloc(sizeof(spu_data_t));
  11.241 -    if (data->converter_thread_data == NULL || data->scaler_thread_data == NULL) {
  11.242 -        SDL_PS3_DestroyRenderer(renderer);
  11.243 -        SDL_OutOfMemory();
  11.244 -        return NULL;
  11.245 -    }
  11.246 -
  11.247 -    /* Set up the SPE scaler (booted) */
  11.248 -    data->scaler_thread_data->program = bilin_scaler_spu;
  11.249 -    data->scaler_thread_data->program_name = "bilin_scaler_spu";
  11.250 -    data->scaler_thread_data->keepalive = 0;
  11.251 -    data->scaler_thread_data->booted = 0;
  11.252 -
  11.253 -    /* Set up the SPE converter (always running) */
  11.254 -    data->converter_thread_data->program = yuv2rgb_spu;
  11.255 -    data->converter_thread_data->program_name = "yuv2rgb_spu";
  11.256 -    data->converter_thread_data->keepalive = 1;
  11.257 -    data->converter_thread_data->booted = 0;
  11.258 -
  11.259 -    SPE_Start(data->converter_thread_data);
  11.260 -
  11.261 -    deprintf(1, "-SDL_PS3_CreateRenderer()\n");
  11.262 -    return renderer;
  11.263 -}
  11.264 -
  11.265 -static int
  11.266 -SDL_PS3_ActivateRenderer(SDL_Renderer * renderer)
  11.267 -{
  11.268 -    deprintf(1, "+PS3_ActivateRenderer()\n");
  11.269 -    SDL_PS3_RenderData *data = (SDL_PS3_RenderData *) renderer->driverdata;
  11.270 -
  11.271 -    deprintf(1, "-PS3_ActivateRenderer()\n");
  11.272 -    return 0;
  11.273 -}
  11.274 -
  11.275 -static int SDL_PS3_DisplayModeChanged(SDL_Renderer * renderer) {
  11.276 -    deprintf(1, "+PS3_DisplayModeChanged()\n");
  11.277 -    SDL_PS3_RenderData *data = (SDL_PS3_RenderData *) renderer->driverdata;
  11.278 -
  11.279 -    deprintf(1, "-PS3_DisplayModeChanged()\n");
  11.280 -    return 0;
  11.281 -}
  11.282 -
  11.283 -static int
  11.284 -PS3_CreateTexture(SDL_Renderer * renderer, SDL_Texture * texture) {
  11.285 -    deprintf(1, "+PS3_CreateTexture()\n");
  11.286 -    PS3_TextureData *data;
  11.287 -    data = (PS3_TextureData *) SDL_calloc(1, sizeof(*data));
  11.288 -    if (!data) {
  11.289 -        SDL_OutOfMemory();
  11.290 -        return -1;
  11.291 -    }
  11.292 -    data->pitch = (texture->w * SDL_BYTESPERPIXEL(texture->format));
  11.293 -
  11.294 -    if (SDL_ISPIXELFORMAT_FOURCC(texture->format)) {
  11.295 -        /* Use SDLs SW_YUVTexture */
  11.296 -        data->yuv =
  11.297 -            SDL_SW_CreateYUVTexture(texture->format, texture->w, texture->h);
  11.298 -        if (!data->yuv) {
  11.299 -            SDL_OutOfMemory();
  11.300 -            return -1;
  11.301 -        }
  11.302 -        /* but align pixels */
  11.303 -        SDL_free(data->yuv->pixels);
  11.304 -        data->yuv->pixels = (Uint8 *)memalign(16, texture->w * texture->h * 2);
  11.305 -        if (!data->yuv->pixels) {
  11.306 -            SDL_OutOfMemory();
  11.307 -            return -1;
  11.308 -        }
  11.309 -
  11.310 -        /* Redo: Find the pitch and offset values for the overlay */
  11.311 -        SDL_SW_YUVTexture *swdata = (SDL_SW_YUVTexture *) data->yuv;
  11.312 -        switch (texture->format) {
  11.313 -            case SDL_PIXELFORMAT_YV12:
  11.314 -            case SDL_PIXELFORMAT_IYUV:
  11.315 -                swdata->pitches[0] = texture->w;
  11.316 -                swdata->pitches[1] = swdata->pitches[0] / 2;
  11.317 -                swdata->pitches[2] = swdata->pitches[0] / 2;
  11.318 -                swdata->planes[0] = swdata->pixels;
  11.319 -                swdata->planes[1] = swdata->planes[0] + swdata->pitches[0] * texture->h;
  11.320 -                swdata->planes[2] = swdata->planes[1] + swdata->pitches[1] * texture->h / 2;
  11.321 -                break;
  11.322 -            case SDL_PIXELFORMAT_YUY2:
  11.323 -            case SDL_PIXELFORMAT_UYVY:
  11.324 -            case SDL_PIXELFORMAT_YVYU:
  11.325 -                swdata->pitches[0] = texture->w * 2;
  11.326 -                swdata->planes[0] = swdata->pixels;
  11.327 -                break;
  11.328 -            default:
  11.329 -                /* We should never get here (caught above) */
  11.330 -                break;
  11.331 -        }
  11.332 -    } else {
  11.333 -        data->pixels = NULL;
  11.334 -        data->pixels = SDL_malloc(texture->h * data->pitch);
  11.335 -        if (!data->pixels) {
  11.336 -            PS3_DestroyTexture(renderer, texture);
  11.337 -            SDL_OutOfMemory();
  11.338 -            return -1;
  11.339 -        }
  11.340 -    }
  11.341 -    texture->driverdata = data;
  11.342 -    deprintf(1, "-PS3_CreateTexture()\n");
  11.343 -    return 0;
  11.344 -}
  11.345 -
  11.346 -static int
  11.347 -PS3_QueryTexturePixels(SDL_Renderer * renderer, SDL_Texture * texture,
  11.348 -                      void **pixels, int *pitch)
  11.349 -{
  11.350 -    deprintf(1, "+PS3_QueryTexturePixels()\n");
  11.351 -    PS3_TextureData *data = (PS3_TextureData *) texture->driverdata;
  11.352 -
  11.353 -    if (SDL_ISPIXELFORMAT_FOURCC(texture->format)) {
  11.354 -        return SDL_SW_QueryYUVTexturePixels(data->yuv, pixels, pitch);
  11.355 -    } else {
  11.356 -        *pixels = (void *)data->pixels;
  11.357 -        *pitch = data->pitch;
  11.358 -    }
  11.359 -
  11.360 -    deprintf(1, "-PS3_QueryTexturePixels()\n");
  11.361 -    return 0;
  11.362 -}
  11.363 -
  11.364 -static int
  11.365 -PS3_UpdateTexture(SDL_Renderer * renderer, SDL_Texture * texture,
  11.366 -                       const SDL_Rect * rect, const void *pixels, int pitch)
  11.367 -{
  11.368 -    deprintf(1, "+PS3_UpdateTexture()\n");
  11.369 -    PS3_TextureData *data = (PS3_TextureData *) texture->driverdata;
  11.370 -
  11.371 -    if (SDL_ISPIXELFORMAT_FOURCC(texture->format)) {
  11.372 -        return SDL_SW_UpdateYUVTexture(data->yuv, rect, pixels, pitch);
  11.373 -    } else {
  11.374 -        Uint8 *src, *dst;
  11.375 -        int row;
  11.376 -        size_t length;
  11.377 -        Uint8 *dstpixels;
  11.378 -
  11.379 -        src = (Uint8 *) pixels;
  11.380 -        dst = (Uint8 *) dstpixels + rect->y * data->pitch + rect->x
  11.381 -                        * SDL_BYTESPERPIXEL(texture->format);
  11.382 -        length = rect->w * SDL_BYTESPERPIXEL(texture->format);
  11.383 -        /* Update the texture */
  11.384 -        for (row = 0; row < rect->h; ++row) {
  11.385 -            SDL_memcpy(dst, src, length);
  11.386 -            src += pitch;
  11.387 -            dst += data->pitch;
  11.388 -        }
  11.389 -    }
  11.390 -    deprintf(1, "-PS3_UpdateTexture()\n");
  11.391 -    return 0;
  11.392 -}
  11.393 -
  11.394 -static int
  11.395 -PS3_LockTexture(SDL_Renderer * renderer, SDL_Texture * texture,
  11.396 -               const SDL_Rect * rect, int markDirty, void **pixels,
  11.397 -               int *pitch)
  11.398 -{
  11.399 -    deprintf(1, "+PS3_LockTexture()\n");
  11.400 -    PS3_TextureData *data = (PS3_TextureData *) texture->driverdata;
  11.401 -
  11.402 -    if (SDL_ISPIXELFORMAT_FOURCC(texture->format)) {
  11.403 -        deprintf(1, "-PS3_LockTexture()\n");
  11.404 -        return SDL_SW_LockYUVTexture(data->yuv, rect, markDirty, pixels, pitch);
  11.405 -    } else {
  11.406 -        *pixels =
  11.407 -            (void *) ((Uint8 *) data->pixels + rect->y * data->pitch +
  11.408 -                      rect->x * SDL_BYTESPERPIXEL(texture->format));
  11.409 -        *pitch = data->pitch;
  11.410 -        deprintf(1, "-PS3_LockTexture()\n");
  11.411 -        return 0;
  11.412 -    }
  11.413 -}
  11.414 -
  11.415 -static void
  11.416 -PS3_UnlockTexture(SDL_Renderer * renderer, SDL_Texture * texture)
  11.417 -{
  11.418 -    deprintf(1, "+PS3_UnlockTexture()\n");
  11.419 -    PS3_TextureData *data = (PS3_TextureData *) texture->driverdata;
  11.420 -
  11.421 -    if (SDL_ISPIXELFORMAT_FOURCC(texture->format)) {
  11.422 -        SDL_SW_UnlockYUVTexture(data->yuv);
  11.423 -    }
  11.424 -    deprintf(1, "-PS3_UnlockTexture()\n");
  11.425 -}
  11.426 -
  11.427 -static void
  11.428 -PS3_DestroyTexture(SDL_Renderer * renderer, SDL_Texture * texture)
  11.429 -{
  11.430 -    deprintf(1, "+PS3_DestroyTexture()\n");
  11.431 -    PS3_TextureData *data = (PS3_TextureData *) texture->driverdata;
  11.432 -
  11.433 -    if (!data) {
  11.434 -        return;
  11.435 -    }
  11.436 -    if (data->yuv) {
  11.437 -        SDL_SW_DestroyYUVTexture(data->yuv);
  11.438 -    }
  11.439 -    if (data->pixels) {
  11.440 -        SDL_free((void *)data->pixels);
  11.441 -    }
  11.442 -    deprintf(1, "-PS3_DestroyTexture()\n");
  11.443 -}
  11.444 -
  11.445 -static int
  11.446 -SDL_PS3_RenderPoint(SDL_Renderer * renderer, int x, int y)
  11.447 -{
  11.448 -    SDL_PS3_RenderData *data =
  11.449 -        (SDL_PS3_RenderData *) renderer->driverdata;
  11.450 -    SDL_Surface *target = data->screen;
  11.451 -    int status;
  11.452 -
  11.453 -    if (renderer->blendMode == SDL_BLENDMODE_NONE ||
  11.454 -        renderer->blendMode == SDL_BLENDMODE_MASK) {
  11.455 -        Uint32 color =
  11.456 -            SDL_MapRGBA(target->format, renderer->r, renderer->g, renderer->b,
  11.457 -                        renderer->a);
  11.458 -
  11.459 -        status = SDL_DrawPoint(target, x, y, color);
  11.460 -    } else {
  11.461 -        status =
  11.462 -            SDL_BlendPoint(target, x, y, renderer->blendMode, renderer->r,
  11.463 -                           renderer->g, renderer->b, renderer->a);
  11.464 -    }
  11.465 -    return status;
  11.466 -}
  11.467 -
  11.468 -static int
  11.469 -SDL_PS3_RenderLine(SDL_Renderer * renderer, int x1, int y1, int x2, int y2)
  11.470 -{
  11.471 -    SDL_PS3_RenderData *data =
  11.472 -        (SDL_PS3_RenderData *) renderer->driverdata;
  11.473 -    SDL_Surface *target = data->screen;
  11.474 -    int status;
  11.475 -
  11.476 -    if (renderer->blendMode == SDL_BLENDMODE_NONE ||
  11.477 -        renderer->blendMode == SDL_BLENDMODE_MASK) {
  11.478 -        Uint32 color =
  11.479 -            SDL_MapRGBA(target->format, renderer->r, renderer->g, renderer->b,
  11.480 -                        renderer->a);
  11.481 -
  11.482 -        status = SDL_DrawLine(target, x1, y1, x2, y2, color);
  11.483 -    } else {
  11.484 -        status =
  11.485 -            SDL_BlendLine(target, x1, y1, x2, y2, renderer->blendMode,
  11.486 -                          renderer->r, renderer->g, renderer->b, renderer->a);
  11.487 -    }
  11.488 -    return status;
  11.489 -}
  11.490 -
  11.491 -static int
  11.492 -SDL_PS3_RenderFill(SDL_Renderer * renderer, const SDL_Rect * rect)
  11.493 -{
  11.494 -    deprintf(1, "SDL_PS3_RenderFill()\n");
  11.495 -    SDL_PS3_RenderData *data =
  11.496 -        (SDL_PS3_RenderData *) renderer->driverdata;
  11.497 -    SDL_Surface *target = data->screen;
  11.498 -    SDL_Rect real_rect = *rect;
  11.499 -    int status;
  11.500 -
  11.501 -    if (renderer->blendMode == SDL_BLENDMODE_NONE) {
  11.502 -        Uint32 color =
  11.503 -            SDL_MapRGBA(target->format, renderer->r, renderer->g, renderer->b,
  11.504 -                        renderer->a);
  11.505 -
  11.506 -        status = SDL_FillRect(target, &real_rect, color);
  11.507 -    } else {
  11.508 -        status =
  11.509 -            SDL_BlendFillRect(target, &real_rect, renderer->blendMode,
  11.510 -                              renderer->r, renderer->g, renderer->b,
  11.511 -                              renderer->a);
  11.512 -    }
  11.513 -    return status;
  11.514 -}
  11.515 -
  11.516 -static int
  11.517 -SDL_PS3_RenderCopy(SDL_Renderer * renderer, SDL_Texture * texture,
  11.518 -                     const SDL_Rect * srcrect, const SDL_Rect * dstrect)
  11.519 -{
  11.520 -    deprintf(1, "+SDL_PS3_RenderCopy()\n");
  11.521 -    SDL_PS3_RenderData *data =
  11.522 -        (SDL_PS3_RenderData *) renderer->driverdata;
  11.523 -    SDL_Window *window = SDL_GetWindowFromID(renderer->window);
  11.524 -    SDL_VideoDisplay *display = window->display;
  11.525 -    PS3_TextureData *txdata = (PS3_TextureData *) texture->driverdata;
  11.526 -    SDL_VideoData *devdata = display->device->driverdata;
  11.527 -
  11.528 -    if (SDL_ISPIXELFORMAT_FOURCC(texture->format)) {
  11.529 -        deprintf(1, "Texture is in a FOURCC format\n");
  11.530 -        if ((texture->format == SDL_PIXELFORMAT_YV12 || texture->format == SDL_PIXELFORMAT_IYUV)
  11.531 -                && texture->w % 8 == 0 && texture->h % 8 == 0
  11.532 -                && dstrect->w % 8 == 0 && dstrect->h % 8 == 0) {
  11.533 -            deprintf(1, "Use SPE for scaling/converting\n");
  11.534 -
  11.535 -            SDL_SW_YUVTexture *swdata = (SDL_SW_YUVTexture *) txdata->yuv;
  11.536 -            Uint8 *lum, *Cr, *Cb;
  11.537 -            Uint8 *scaler_out = NULL;
  11.538 -            Uint8 *dstpixels;
  11.539 -            switch (texture->format) {
  11.540 -                case SDL_PIXELFORMAT_YV12:
  11.541 -                    lum = swdata->planes[0];
  11.542 -                    Cr = swdata->planes[1];
  11.543 -                    Cb = swdata->planes[2];
  11.544 -                    break;
  11.545 -                case SDL_PIXELFORMAT_IYUV:
  11.546 -                    lum = swdata->planes[0];
  11.547 -                    Cr = swdata->planes[2];
  11.548 -                    Cb = swdata->planes[1];
  11.549 -                    break;
  11.550 -                default:
  11.551 -                    /* We should never get here (caught above) */
  11.552 -                    return -1;
  11.553 -            }
  11.554 -
  11.555 -            if (srcrect->w != dstrect->w || srcrect->h != dstrect->h) {
  11.556 -                deprintf(1, "We need to scale the texture from %u x %u to %u x %u\n",
  11.557 -                        srcrect->w, srcrect->h, dstrect->w, dstrect->h);
  11.558 -                /* Alloc mem for scaled YUV picture */
  11.559 -                scaler_out = (Uint8 *) memalign(16, dstrect->w * dstrect->h + ((dstrect->w * dstrect->h) >> 1));
  11.560 -                if (scaler_out == NULL) {
  11.561 -                    SDL_OutOfMemory();
  11.562 -                    return -1;
  11.563 -                }
  11.564 -
  11.565 -                /* Set parms for scaling */
  11.566 -                data->scaler_parms->src_pixel_width = srcrect->w;
  11.567 -                data->scaler_parms->src_pixel_height = srcrect->h;
  11.568 -                data->scaler_parms->dst_pixel_width = dstrect->w;
  11.569 -                data->scaler_parms->dst_pixel_height = dstrect->h;
  11.570 -                data->scaler_parms->y_plane = lum;
  11.571 -                data->scaler_parms->v_plane = Cr;
  11.572 -                data->scaler_parms->u_plane = Cb;
  11.573 -                data->scaler_parms->dstBuffer = scaler_out;
  11.574 -                data->scaler_thread_data->argp = (void *)data->scaler_parms;
  11.575 -
  11.576 -                /* Scale the YUV overlay to given size */
  11.577 -                SPE_Start(data->scaler_thread_data);
  11.578 -                SPE_Stop(data->scaler_thread_data);
  11.579 -
  11.580 -                /* Set parms for converting after scaling */
  11.581 -                data->converter_parms->y_plane = scaler_out;
  11.582 -                data->converter_parms->v_plane = scaler_out + dstrect->w * dstrect->h;
  11.583 -                data->converter_parms->u_plane = scaler_out + dstrect->w * dstrect->h + ((dstrect->w * dstrect->h) >> 2);
  11.584 -            } else {
  11.585 -                data->converter_parms->y_plane = lum;
  11.586 -                data->converter_parms->v_plane = Cr;
  11.587 -                data->converter_parms->u_plane = Cb;
  11.588 -            }
  11.589 -
  11.590 -            dstpixels = (Uint8 *) data->screen->pixels + dstrect->y * data->screen->pitch + dstrect->x
  11.591 -                            * SDL_BYTESPERPIXEL(texture->format);
  11.592 -            data->converter_parms->src_pixel_width = dstrect->w;
  11.593 -            data->converter_parms->src_pixel_height = dstrect->h;
  11.594 -            data->converter_parms->dstBuffer = dstpixels/*(Uint8 *)data->screen->pixels*/;
  11.595 -            data->converter_thread_data->argp = (void *)data->converter_parms;
  11.596 -
  11.597 -            /* Convert YUV texture to RGB */
  11.598 -            SPE_SendMsg(data->converter_thread_data, SPU_START);
  11.599 -            SPE_SendMsg(data->converter_thread_data, (unsigned int)data->converter_thread_data->argp);
  11.600 -
  11.601 -            /* We can probably move that to RenderPresent() */
  11.602 -            SPE_WaitForMsg(data->converter_thread_data, SPU_FIN);
  11.603 -            if (scaler_out) {
  11.604 -                free(scaler_out);
  11.605 -            }
  11.606 -        } else {
  11.607 -            deprintf(1, "Use software for scaling/converting\n");
  11.608 -            Uint8 *dst;
  11.609 -            /* FIXME: Not good */
  11.610 -            dst = (Uint8 *) data->screen->pixels + dstrect->y * data->screen->pitch + dstrect->x
  11.611 -                            * SDL_BYTESPERPIXEL(texture->format);
  11.612 -            return SDL_SW_CopyYUVToRGB(txdata->yuv, srcrect, display->current_mode.format,
  11.613 -                                   dstrect->w, dstrect->h, dst/*data->screen->pixels*/,
  11.614 -                                   data->screen->pitch);
  11.615 -        }
  11.616 -    } else {
  11.617 -        deprintf(1, "SDL_ISPIXELFORMAT_FOURCC = false\n");
  11.618 -
  11.619 -        Uint8 *src, *dst;
  11.620 -        int row;
  11.621 -        size_t length;
  11.622 -        Uint8 *dstpixels;
  11.623 -
  11.624 -        src = (Uint8 *) txdata->pixels;
  11.625 -        dst = (Uint8 *) data->screen->pixels + dstrect->y * data->screen->pitch + dstrect->x
  11.626 -                        * SDL_BYTESPERPIXEL(texture->format);
  11.627 -        length = dstrect->w * SDL_BYTESPERPIXEL(texture->format);
  11.628 -        for (row = 0; row < dstrect->h; ++row) {
  11.629 -            SDL_memcpy(dst, src, length);
  11.630 -            src += txdata->pitch;
  11.631 -            dst += data->screen->pitch;
  11.632 -        }
  11.633 -    }
  11.634 -
  11.635 -    deprintf(1, "-SDL_PS3_RenderCopy()\n");
  11.636 -    return 0;
  11.637 -}
  11.638 -
  11.639 -static void
  11.640 -SDL_PS3_RenderPresent(SDL_Renderer * renderer)
  11.641 -{
  11.642 -    deprintf(1, "+SDL_PS3_RenderPresent()\n");
  11.643 -    SDL_PS3_RenderData *data =
  11.644 -        (SDL_PS3_RenderData *) renderer->driverdata;
  11.645 -    SDL_Window *window = SDL_GetWindowFromID(renderer->window);
  11.646 -    SDL_VideoDisplay *display = window->display;
  11.647 -    SDL_VideoData *devdata = display->device->driverdata;
  11.648 -
  11.649 -    /* Send the data to the screen */
  11.650 -    /* Get screeninfo */
  11.651 -    struct fb_fix_screeninfo fb_finfo;
  11.652 -    if (ioctl(devdata->fbdev, FBIOGET_FSCREENINFO, &fb_finfo)) {
  11.653 -        SDL_SetError("[PS3] Can't get fixed screeninfo");
  11.654 -    }
  11.655 -    struct fb_var_screeninfo fb_vinfo;
  11.656 -    if (ioctl(devdata->fbdev, FBIOGET_VSCREENINFO, &fb_vinfo)) {
  11.657 -        SDL_SetError("[PS3] Can't get VSCREENINFO");
  11.658 -    }
  11.659 -
  11.660 -    /* 16 and 15 bpp is reported as 16 bpp */
  11.661 -    //txdata->bpp = fb_vinfo.bits_per_pixel;
  11.662 -    //if (txdata->bpp == 16)
  11.663 -    //    txdata->bpp = fb_vinfo.red.length + fb_vinfo.green.length + fb_vinfo.blue.length;
  11.664 -
  11.665 -    /* Adjust centering */
  11.666 -    data->bounded_width = window->w < fb_vinfo.xres ? window->w : fb_vinfo.xres;
  11.667 -    data->bounded_height = window->h < fb_vinfo.yres ? window->h : fb_vinfo.yres;
  11.668 -    /* We could use SDL's CENTERED flag for centering */
  11.669 -    data->offset_left = (fb_vinfo.xres - data->bounded_width) >> 1;
  11.670 -    data->offset_top = (fb_vinfo.yres - data->bounded_height) >> 1;
  11.671 -    data->center[0] = devdata->frame_buffer + data->offset_left * /*txdata->bpp/8*/ 4 +
  11.672 -                data->offset_top * fb_finfo.line_length;
  11.673 -    data->center[1] = data->center[0] + fb_vinfo.yres * fb_finfo.line_length;
  11.674 -
  11.675 -    deprintf(1, "offset_left = %u\n", data->offset_left);
  11.676 -    deprintf(1, "offset_top = %u\n", data->offset_top);
  11.677 -
  11.678 -    /* Set SPU parms for copying the surface to framebuffer */
  11.679 -    devdata->fb_parms->data = (unsigned char *)data->screen->pixels;
  11.680 -    devdata->fb_parms->center = data->center[data->current_screen];
  11.681 -    devdata->fb_parms->out_line_stride = fb_finfo.line_length;
  11.682 -    devdata->fb_parms->in_line_stride = window->w * /*txdata->bpp / 8*/4;
  11.683 -    devdata->fb_parms->bounded_input_height = data->bounded_height;
  11.684 -    devdata->fb_parms->bounded_input_width = data->bounded_width;
  11.685 -    //devdata->fb_parms->fb_pixel_size = txdata->bpp / 8;
  11.686 -    devdata->fb_parms->fb_pixel_size = 4;//SDL_BYTESPERPIXEL(window->format);
  11.687 -
  11.688 -    deprintf(3, "[PS3->SPU] fb_thread_data->argp = 0x%x\n", devdata->fb_thread_data->argp);
  11.689 -
  11.690 -    /* Copying.. */
  11.691 -    SPE_SendMsg(devdata->fb_thread_data, SPU_START);
  11.692 -    SPE_SendMsg(devdata->fb_thread_data, (unsigned int)devdata->fb_thread_data->argp);
  11.693 -
  11.694 -    SPE_WaitForMsg(devdata->fb_thread_data, SPU_FIN);
  11.695 -
  11.696 -    /* Wait for vsync */
  11.697 -    if (renderer->info.flags & SDL_RENDERER_PRESENTVSYNC) {
  11.698 -        unsigned long crt = 0;
  11.699 -        deprintf(1, "[PS3] Wait for vsync\n");
  11.700 -        ioctl(devdata->fbdev, FBIO_WAITFORVSYNC, &crt);
  11.701 -    }
  11.702 -
  11.703 -    /* Page flip */
  11.704 -    deprintf(1, "[PS3] Page flip to buffer #%u 0x%x\n", data->current_screen, data->center[data->current_screen]);
  11.705 -    ioctl(devdata->fbdev, PS3FB_IOCTL_FSEL, (unsigned long)&data->current_screen);
  11.706 -
  11.707 -    /* Update the flipping chain, if any */
  11.708 -    if (data->double_buffering) {
  11.709 -        data->current_screen = (data->current_screen + 1) % 2;
  11.710 -    }
  11.711 -    deprintf(1, "-SDL_PS3_RenderPresent()\n");
  11.712 -}
  11.713 -
  11.714 -static void
  11.715 -SDL_PS3_DestroyRenderer(SDL_Renderer * renderer)
  11.716 -{
  11.717 -    deprintf(1, "+SDL_PS3_DestroyRenderer()\n");
  11.718 -    SDL_PS3_RenderData *data =
  11.719 -        (SDL_PS3_RenderData *) renderer->driverdata;
  11.720 -    int i;
  11.721 -
  11.722 -    if (data) {
  11.723 -        for (i = 0; i < SDL_arraysize(data->screen); ++i) {
  11.724 -            if (data->screen) {
  11.725 -                SDL_FreeSurface(data->screen);
  11.726 -            }
  11.727 -        }
  11.728 -
  11.729 -        /* Shutdown SPE and release related resources */
  11.730 -        if (data->scaler_thread_data) {
  11.731 -            free((void *)data->scaler_thread_data);
  11.732 -        }
  11.733 -        if (data->scaler_parms) {
  11.734 -            free((void *)data->scaler_parms);
  11.735 -        }
  11.736 -        if (data->converter_thread_data) {
  11.737 -            SPE_Shutdown(data->converter_thread_data);
  11.738 -            free((void *)data->converter_thread_data);
  11.739 -        }
  11.740 -        if (data->converter_parms) {
  11.741 -            free((void *)data->converter_parms);
  11.742 -        }
  11.743 -
  11.744 -        SDL_free(data);
  11.745 -    }
  11.746 -    SDL_free(renderer);
  11.747 -    deprintf(1, "-SDL_PS3_DestroyRenderer()\n");
  11.748 -}
  11.749 -
  11.750 -/* vi: set ts=4 sw=4 expandtab: */
    12.1 --- a/src/video/ps3/SDL_ps3render_c.h	Wed Jan 19 22:21:31 2011 -0800
    12.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    12.3 @@ -1,29 +0,0 @@
    12.4 -/*
    12.5 -    SDL - Simple DirectMedia Layer
    12.6 -    Copyright (C) 1997-2010 Sam Lantinga
    12.7 -
    12.8 -    This library is free software; you can redistribute it and/or
    12.9 -    modify it under the terms of the GNU Lesser General Public
   12.10 -    License as published by the Free Software Foundation; either
   12.11 -    version 2.1 of the License, or (at your option) any later version.
   12.12 -
   12.13 -    This library is distributed in the hope that it will be useful,
   12.14 -    but WITHOUT ANY WARRANTY; without even the implied warranty of
   12.15 -    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   12.16 -    Lesser General Public License for more details.
   12.17 -
   12.18 -    You should have received a copy of the GNU Lesser General Public
   12.19 -    License along with this library; if not, write to the Free Software
   12.20 -    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
   12.21 -
   12.22 -    Sam Lantinga
   12.23 -    slouken@libsdl.org
   12.24 -*/
   12.25 -#include "SDL_config.h"
   12.26 -
   12.27 -/* Default framebuffer device on PS3 */
   12.28 -/* SDL surface based renderer implementation */
   12.29 -
   12.30 -extern SDL_RenderDriver SDL_PS3_RenderDriver;
   12.31 -
   12.32 -/* vi: set ts=4 sw=4 expandtab: */
    13.1 --- a/src/video/ps3/SDL_ps3spe.c	Wed Jan 19 22:21:31 2011 -0800
    13.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    13.3 @@ -1,166 +0,0 @@
    13.4 -/*
    13.5 -    SDL - Simple DirectMedia Layer
    13.6 -    Copyright (C) 1997-2010 Sam Lantinga
    13.7 -
    13.8 -    This library is free software; you can redistribute it and/or
    13.9 -    modify it under the terms of the GNU Lesser General Public
   13.10 -    License as published by the Free Software Foundation; either
   13.11 -    version 2.1 of the License, or (at your option) any later version.
   13.12 -
   13.13 -    This library is distributed in the hope that it will be useful,
   13.14 -    but WITHOUT ANY WARRANTY; without even the implied warranty of
   13.15 -    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   13.16 -    Lesser General Public License for more details.
   13.17 -
   13.18 -    You should have received a copy of the GNU Lesser General Public
   13.19 -    License along with this library; if not, write to the Free Software
   13.20 -    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
   13.21 -
   13.22 -    Sam Lantinga
   13.23 -    slouken@libsdl.org
   13.24 -*/
   13.25 -#include "SDL_config.h"
   13.26 -
   13.27 -#include "SDL_video.h"
   13.28 -#include "SDL_ps3spe_c.h"
   13.29 -
   13.30 -#include "SDL_ps3video.h"
   13.31 -#include "SDL_ps3render_c.h"
   13.32 -
   13.33 -/* Start the SPE thread */
   13.34 -int SPE_Start(spu_data_t * spe_data)
   13.35 -{
   13.36 -  deprintf(2, "[PS3->SPU] Start SPE: %s\n", spe_data->program_name);
   13.37 -  if (!(spe_data->booted))
   13.38 -    SPE_Boot(spe_data);
   13.39 -
   13.40 -  /* To allow re-running of context, spe_ctx_entry has to be set before each call */
   13.41 -  spe_data->entry = SPE_DEFAULT_ENTRY;
   13.42 -  spe_data->error_code = 0;
   13.43 -
   13.44 -  /* Create SPE thread and run */
   13.45 -  deprintf(2, "[PS3->SPU] Create Thread: %s\n", spe_data->program_name);
   13.46 -  if (pthread_create
   13.47 -      (&spe_data->thread, NULL, (void *)&SPE_RunContext, (void *)spe_data)) {
   13.48 -    deprintf(2, "[PS3->SPU] Could not create pthread for spe: %s\n", spe_data->program_name);
   13.49 -    SDL_SetError("[PS3->SPU] Could not create pthread for spe");
   13.50 -    return -1;
   13.51 -  }
   13.52 -
   13.53 -  if (spe_data->keepalive)
   13.54 -    SPE_WaitForMsg(spe_data, SPU_READY);
   13.55 -}
   13.56 -
   13.57 -/* Stop the SPE thread */
   13.58 -int SPE_Stop(spu_data_t * spe_data)
   13.59 -{
   13.60 -  deprintf(2, "[PS3->SPU] Stop SPE: %s\n", spe_data->program_name);
   13.61 -  /* Wait for SPE thread to complete */
   13.62 -  deprintf(2, "[PS3->SPU] Wait for SPE thread to complete: %s\n", spe_data->program_name);
   13.63 -  if (pthread_join(spe_data->thread, NULL)) {
   13.64 -    deprintf(2, "[PS3->SPU] Failed joining the thread: %s\n", spe_data->program_name);
   13.65 -    SDL_SetError("[PS3->SPU] Failed joining the thread");
   13.66 -    return -1;
   13.67 -  }
   13.68 -
   13.69 -  return 0;
   13.70 -}
   13.71 -
   13.72 -/* Create SPE context and load program */
   13.73 -int SPE_Boot(spu_data_t * spe_data)
   13.74 -{
   13.75 -  /* Create SPE context */
   13.76 -  deprintf(2, "[PS3->SPU] Create SPE Context: %s\n", spe_data->program_name);
   13.77 -  spe_data->ctx = spe_context_create(0, NULL);
   13.78 -  if (spe_data->ctx == NULL) {
   13.79 -    deprintf(2, "[PS3->SPU] Failed creating SPE context: %s\n", spe_data->program_name);
   13.80 -    SDL_SetError("[PS3->SPU] Failed creating SPE context");
   13.81 -    return -1;
   13.82 -  }
   13.83 -
   13.84 -  /* Load SPE object into SPE local store */
   13.85 -  deprintf(2, "[PS3->SPU] Load Program into SPE: %s\n", spe_data->program_name);
   13.86 -  if (spe_program_load(spe_data->ctx, &spe_data->program)) {
   13.87 -    deprintf(2, "[PS3->SPU] Failed loading program into SPE context: %s\n", spe_data->program_name);
   13.88 -    SDL_SetError
   13.89 -        ("[PS3->SPU] Failed loading program into SPE context");
   13.90 -    return -1;
   13.91 -  }
   13.92 -  spe_data->booted = 1;
   13.93 -  deprintf(2, "[PS3->SPU] SPE boot successful\n");
   13.94 -
   13.95 -  return 0;
   13.96 -}
   13.97 -
   13.98 -/* (Stop and) shutdown the SPE */
   13.99 -int SPE_Shutdown(spu_data_t * spe_data)
  13.100 -{
  13.101 -  if (spe_data->keepalive && spe_data->booted) {
  13.102 -    SPE_SendMsg(spe_data, SPU_EXIT);
  13.103 -    SPE_Stop(spe_data);
  13.104 -  }
  13.105 -
  13.106 -  /* Destroy SPE context */
  13.107 -  deprintf(2, "[PS3->SPU] Destroy SPE context: %s\n", spe_data->program_name);
  13.108 -  if (spe_context_destroy(spe_data->ctx)) {
  13.109 -    deprintf(2, "[PS3->SPU] Failed destroying context: %s\n", spe_data->program_name);
  13.110 -    SDL_SetError("[PS3->SPU] Failed destroying context");
  13.111 -    return -1;
  13.112 -  }
  13.113 -  deprintf(2, "[PS3->SPU] SPE shutdown successful: %s\n", spe_data->program_name);
  13.114 -  return 0;
  13.115 -}
  13.116 -
  13.117 -/* Send message to the SPE via mailboxe */
  13.118 -int SPE_SendMsg(spu_data_t * spe_data, unsigned int msg)
  13.119 -{
  13.120 -  deprintf(2, "[PS3->SPU] Sending message %u to %s\n", msg, spe_data->program_name);
  13.121 -  /* Send one message, block until message was sent */
  13.122 -  unsigned int spe_in_mbox_msgs[1];
  13.123 -  spe_in_mbox_msgs[0] = msg;
  13.124 -  int in_mbox_write = spe_in_mbox_write(spe_data->ctx, spe_in_mbox_msgs, 1, SPE_MBOX_ALL_BLOCKING);
  13.125 -
  13.126 -  if (1 > in_mbox_write) {
  13.127 -    deprintf(2, "[PS3->SPU] No message could be written to %s\n", spe_data->program_name);
  13.128 -    SDL_SetError("[PS3->SPU] No message could be written");
  13.129 -    return -1;
  13.130 -  }
  13.131 -  return 0;
  13.132 -}
  13.133 -
  13.134 -
  13.135 -/* Read 1 message from SPE, block until at least 1 message was received */
  13.136 -int SPE_WaitForMsg(spu_data_t * spe_data, unsigned int msg)
  13.137 -{
  13.138 -  deprintf(2, "[PS3->SPU] Waiting for message from %s\n", spe_data->program_name);
  13.139 -  unsigned int out_messages[1];
  13.140 -  while (!spe_out_mbox_status(spe_data->ctx));
  13.141 -  int mbox_read = spe_out_mbox_read(spe_data->ctx, out_messages, 1);
  13.142 -  deprintf(2, "[PS3->SPU] Got message from %s, message was %u\n", spe_data->program_name, out_messages[0]);
  13.143 -  if (out_messages[0] == msg)
  13.144 -    return 0;
  13.145 -  else
  13.146 -    return -1;
  13.147 -}
  13.148 -
  13.149 -/* Re-runnable invocation of the spe_context_run call */
  13.150 -void SPE_RunContext(void *thread_argp)
  13.151 -{
  13.152 -  /* argp is the pointer to argument to be passed to the SPE program */
  13.153 -  spu_data_t *args = (spu_data_t *) thread_argp;
  13.154 -  deprintf(3, "[PS3->SPU] void* argp=0x%x\n", (unsigned int)args->argp);
  13.155 -
  13.156 -  /* Run it.. */
  13.157 -  deprintf(2, "[PS3->SPU] Run SPE program: %s\n", args->program_name);
  13.158 -  if (spe_context_run
  13.159 -      (args->ctx, &args->entry, 0, (void *)args->argp, NULL,
  13.160 -       NULL) < 0) {
  13.161 -    deprintf(2, "[PS3->SPU] Failed running SPE context: %s\n", args->program_name);
  13.162 -    SDL_SetError("[PS3->SPU] Failed running SPE context: %s", args->program_name);
  13.163 -    exit(1);
  13.164 -  }
  13.165 -
  13.166 -  pthread_exit(NULL);
  13.167 -}
  13.168 -
  13.169 -/* vi: set ts=4 sw=4 expandtab: */
    14.1 --- a/src/video/ps3/SDL_ps3spe_c.h	Wed Jan 19 22:21:31 2011 -0800
    14.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    14.3 @@ -1,87 +0,0 @@
    14.4 -/*
    14.5 -    SDL - Simple DirectMedia Layer
    14.6 -    Copyright (C) 1997-2010 Sam Lantinga
    14.7 -
    14.8 -    This library is free software; you can redistribute it and/or
    14.9 -    modify it under the terms of the GNU Lesser General Public
   14.10 -    License as published by the Free Software Foundation; either
   14.11 -    version 2.1 of the License, or (at your option) any later version.
   14.12 -
   14.13 -    This library is distributed in the hope that it will be useful,
   14.14 -    but WITHOUT ANY WARRANTY; without even the implied warranty of
   14.15 -    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   14.16 -    Lesser General Public License for more details.
   14.17 -
   14.18 -    You should have received a copy of the GNU Lesser General Public
   14.19 -    License along with this library; if not, write to the Free Software
   14.20 -    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
   14.21 -
   14.22 -    Sam Lantinga
   14.23 -    slouken@libsdl.org
   14.24 -*/
   14.25 -
   14.26 -/* This SPE API basically provides 3 ways to run and control a program
   14.27 - * on the SPE:
   14.28 - * - Start and stop the program (keepalive=0).
   14.29 - *   SPE_Start() will implicitly boot up the program, create a thread and run
   14.30 - *   the context.
   14.31 - *   SPE_Stop() will join the (terminated) thread (may block) and return.
   14.32 - * - Boot the program and run it (keepalive=0).
   14.33 - *   SPE_Boot() will create a context and load the program and finally start
   14.34 - *   the context with SPE_Start().
   14.35 - *   SPE_Stop() will savely end the program.
   14.36 - * - Boot, Run and send messages to the program (keepalive=1).
   14.37 - *   Start the program by using one of the methods described above. When
   14.38 - *   received the READY-message the program is in its infinite loop waiting
   14.39 - *   for new messages.
   14.40 - *   Every time you run the program, send SPU_START and the address of the
   14.41 - *   according struct using SPE_SendMsg().
   14.42 - *   SPE_WaitForMsg() will than wait for SPU_FIN and is blocking.
   14.43 - *   SPE_Shutdown() sends SPU_EXIT and finally stops the program.
   14.44 - *
   14.45 - * Therefor the SPE program
   14.46 - * - either runs once and returns
   14.47 - * - or runs in an infinite loop and is controlled by messages.
   14.48 - */
   14.49 -
   14.50 -#include "SDL_config.h"
   14.51 -
   14.52 -#include "spulibs/spu_common.h"
   14.53 -
   14.54 -#include <libspe2.h>
   14.55 -
   14.56 -#ifndef _SDL_ps3spe_h
   14.57 -#define _SDL_ps3spe_h
   14.58 -
   14.59 -/* SPU handling data */
   14.60 -typedef struct spu_data {
   14.61 -    /* Context to be executed */
   14.62 -    spe_context_ptr_t ctx;
   14.63 -    spe_program_handle_t program;
   14.64 -    /* Thread running the context */
   14.65 -    pthread_t thread;
   14.66 -    /* For debugging */
   14.67 -    char * program_name;
   14.68 -    /* SPE_Start() or SPE_Boot() called */
   14.69 -    unsigned int booted;
   14.70 -    /* Runs the program in an infinite loop? */
   14.71 -    unsigned int keepalive;
   14.72 -    unsigned int entry;
   14.73 -    /* Exit code of the program */
   14.74 -    int error_code;
   14.75 -    /* Arguments passed to the program */
   14.76 -    void * argp;
   14.77 -} spu_data_t;
   14.78 -
   14.79 -/* SPU specific API functions */
   14.80 -int SPE_Start(spu_data_t * spe_data);
   14.81 -int SPE_Stop(spu_data_t * spe_data);
   14.82 -int SPE_Boot(spu_data_t * spe_data);
   14.83 -int SPE_Shutdown(spu_data_t * spe_data);
   14.84 -int SPE_SendMsg(spu_data_t * spe_data, unsigned int msg);
   14.85 -int SPE_WaitForMsg(spu_data_t * spe_data, unsigned int msg);
   14.86 -void SPE_RunContext(void *thread_argp);
   14.87 -
   14.88 -#endif /* _SDL_ps3spe_h */
   14.89 -
   14.90 -/* vi: set ts=4 sw=4 expandtab: */
    15.1 --- a/src/video/ps3/SDL_ps3video.c	Wed Jan 19 22:21:31 2011 -0800
    15.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    15.3 @@ -1,228 +0,0 @@
    15.4 -/*
    15.5 -    SDL - Simple DirectMedia Layer
    15.6 -    Copyright (C) 1997-2010 Sam Lantinga
    15.7 -
    15.8 -    This library is free software; you can redistribute it and/or
    15.9 -    modify it under the terms of the GNU Lesser General Public
   15.10 -    License as published by the Free Software Foundation; either
   15.11 -    version 2.1 of the License, or (at your option) any later version.
   15.12 -
   15.13 -    This library is distributed in the hope that it will be useful,
   15.14 -    but WITHOUT ANY WARRANTY; without even the implied warranty of
   15.15 -    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   15.16 -    Lesser General Public License for more details.
   15.17 -
   15.18 -    You should have received a copy of the GNU Lesser General Public
   15.19 -    License along with this library; if not, write to the Free Software
   15.20 -    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
   15.21 -
   15.22 -    Sam Lantinga
   15.23 -    slouken@libsdl.org
   15.24 -*/
   15.25 -#include "SDL_config.h"
   15.26 -
   15.27 -/* SDL PS3 video driver implementation based on dummy video driver
   15.28 - *
   15.29 - * Initial work by Ryan C. Gordon (icculus@icculus.org). A good portion
   15.30 - *  of this was cut-and-pasted from Stephane Peter's work in the AAlib
   15.31 - *  SDL video driver.  Renamed to "DUMMY" by Sam Lantinga.
   15.32 - */
   15.33 -
   15.34 -#include "SDL_video.h"
   15.35 -#include "SDL_mouse.h"
   15.36 -#include "../SDL_sysvideo.h"
   15.37 -#include "../SDL_pixels_c.h"
   15.38 -#include "../../events/SDL_events_c.h"
   15.39 -
   15.40 -#include "SDL_ps3video.h"
   15.41 -#include "SDL_ps3spe_c.h"
   15.42 -#include "SDL_ps3events_c.h"
   15.43 -#include "SDL_ps3render_c.h"
   15.44 -#include "SDL_ps3modes_c.h"
   15.45 -
   15.46 -#include <fcntl.h>
   15.47 -#include <linux/fb.h>
   15.48 -#include <asm/ps3fb.h>
   15.49 -#include <sys/mman.h>
   15.50 -
   15.51 -#define PS3VID_DRIVER_NAME "ps3"
   15.52 -
   15.53 -/* Initialization/Query functions */
   15.54 -static int PS3_VideoInit(_THIS);
   15.55 -static void PS3_VideoQuit(_THIS);
   15.56 -
   15.57 -/* Stores the SPE executable name of fb_writer_spu */
   15.58 -extern spe_program_handle_t fb_writer_spu;
   15.59 -
   15.60 -/* PS3 driver bootstrap functions */
   15.61 -
   15.62 -static int
   15.63 -PS3_Available(void)
   15.64 -{
   15.65 -    deprintf(1, "+PS3_Available()\n");
   15.66 -    const char *envr = SDL_getenv("SDL_VIDEODRIVER");
   15.67 -    if ((envr) && (SDL_strcmp(envr, PS3VID_DRIVER_NAME) == 0)) {
   15.68 -        return (1);
   15.69 -    }
   15.70 -
   15.71 -    deprintf(1, "-PS3_Available()\n");
   15.72 -    return (0);
   15.73 -}
   15.74 -
   15.75 -static void
   15.76 -PS3_DeleteDevice(SDL_VideoDevice * device)
   15.77 -{
   15.78 -    deprintf(1, "+PS3_DeleteDevice()\n");
   15.79 -    SDL_free(device->driverdata);
   15.80 -    SDL_free(device);
   15.81 -    deprintf(1, "-PS3_DeleteDevice()\n");
   15.82 -}
   15.83 -
   15.84 -static SDL_VideoDevice *
   15.85 -PS3_CreateDevice(int devindex)
   15.86 -{
   15.87 -    deprintf(1, "+PS3_CreateDevice()\n");
   15.88 -    SDL_VideoDevice *device;
   15.89 -    SDL_VideoData *data;
   15.90 -
   15.91 -    /* Initialize all variables that we clean on shutdown */
   15.92 -    device = (SDL_VideoDevice *) SDL_calloc(1, sizeof(SDL_VideoDevice));
   15.93 -    if (!device) {
   15.94 -        SDL_OutOfMemory();
   15.95 -        if (device) {
   15.96 -            SDL_free(device);
   15.97 -        }
   15.98 -        return (0);
   15.99 -    }
  15.100 -    data = (struct SDL_VideoData *) SDL_calloc(1, sizeof(SDL_VideoData));
  15.101 -    if (!data) {
  15.102 -        SDL_OutOfMemory();
  15.103 -        SDL_free(device);
  15.104 -        return (0);
  15.105 -    }
  15.106 -    device->driverdata = data;
  15.107 -
  15.108 -    /* Set the function pointers */
  15.109 -    device->VideoInit = PS3_VideoInit;
  15.110 -    device->VideoQuit = PS3_VideoQuit;
  15.111 -    device->SetDisplayMode = PS3_SetDisplayMode;
  15.112 -    device->GetDisplayModes = PS3_GetDisplayModes;
  15.113 -    device->PumpEvents = PS3_PumpEvents;
  15.114 -
  15.115 -    device->free = PS3_DeleteDevice;
  15.116 -
  15.117 -    deprintf(1, "-PS3_CreateDevice()\n");
  15.118 -    return device;
  15.119 -}
  15.120 -
  15.121 -VideoBootStrap PS3_bootstrap = {
  15.122 -    PS3VID_DRIVER_NAME, "SDL PS3 Cell video driver",
  15.123 -    PS3_Available, PS3_CreateDevice
  15.124 -};
  15.125 -
  15.126 -
  15.127 -int
  15.128 -PS3_VideoInit(_THIS)
  15.129 -{
  15.130 -    int i;
  15.131 -
  15.132 -    deprintf(1, "PS3_VideoInit()\n");
  15.133 -
  15.134 -    SDL_VideoData *data = (SDL_VideoData *) _this->driverdata;
  15.135 -    SDL_DisplayMode mode;
  15.136 -
  15.137 -    /* Create SPU fb_parms and thread structure */
  15.138 -    data->fb_parms = (struct fb_writer_parms_t *)
  15.139 -        memalign(16, sizeof(struct fb_writer_parms_t));
  15.140 -    data->fb_thread_data = (spu_data_t *) malloc(sizeof(spu_data_t));
  15.141 -    if (data->fb_parms == NULL || data->fb_thread_data == NULL) {
  15.142 -        SDL_OutOfMemory();
  15.143 -        return -1;
  15.144 -    }
  15.145 -    data->fb_thread_data->program = fb_writer_spu;
  15.146 -    data->fb_thread_data->program_name = "fb_writer_spu";
  15.147 -    data->fb_thread_data->argp = (void *)data->fb_parms;
  15.148 -    data->fb_thread_data->keepalive = 1;
  15.149 -    data->fb_thread_data->booted = 0;
  15.150 -
  15.151 -    SPE_Start(data->fb_thread_data);
  15.152 -
  15.153 -    /* Open the device */
  15.154 -    data->fbdev = open(PS3DEV, O_RDWR);
  15.155 -    if (data->fbdev < 0) {
  15.156 -        SDL_SetError("[PS3] Unable to open device %s", PS3DEV);
  15.157 -        return -1;
  15.158 -    }
  15.159 -
  15.160 -    /* Take control of frame buffer from kernel, for details see
  15.161 -     * http://felter.org/wesley/files/ps3/linux-20061110-docs/ApplicationProgrammingEnvironment.html
  15.162 -     * kernel will no longer flip the screen itself
  15.163 -     */
  15.164 -    ioctl(data->fbdev, PS3FB_IOCTL_ON, 0);
  15.165 -
  15.166 -    /* Unblank screen */
  15.167 -    ioctl(data->fbdev, FBIOBLANK, 0);
  15.168 -
  15.169 -    struct fb_fix_screeninfo fb_finfo;
  15.170 -    if (ioctl(data->fbdev, FBIOGET_FSCREENINFO, &fb_finfo)) {
  15.171 -        SDL_SetError("[PS3] Can't get fixed screeninfo");
  15.172 -        return (0);
  15.173 -    }
  15.174 -
  15.175 -    /* Note: on PS3, fb_finfo.smem_len is enough for double buffering */
  15.176 -    if ((data->frame_buffer = (uint8_t *)mmap(0, fb_finfo.smem_len,
  15.177 -        PROT_READ | PROT_WRITE, MAP_SHARED,
  15.178 -        data->fbdev, 0)) == (uint8_t *) - 1) {
  15.179 -        SDL_SetError("[PS3] Can't mmap for %s", PS3DEV);
  15.180 -        return (0);
  15.181 -    } else {
  15.182 -        /* Enable double buffering */
  15.183 -    }
  15.184 -
  15.185 -    /* Blank screen */
  15.186 -    memset(data->frame_buffer, 0x00, fb_finfo.smem_len);
  15.187 -
  15.188 -    PS3_InitModes(_this);
  15.189 -    for (i = 0; i < _this->num_displays; ++i) {
  15.190 -        SDL_AddRenderDriver(&_this->displays[i], &SDL_PS3_RenderDriver);
  15.191 -    }
  15.192 -
  15.193 -    /* We're done! */
  15.194 -    return 0;
  15.195 -}
  15.196 -
  15.197 -void
  15.198 -PS3_VideoQuit(_THIS)
  15.199 -{
  15.200 -    deprintf(1, "PS3_VideoQuit()\n");
  15.201 -    SDL_VideoData *data = (SDL_VideoData *) _this->driverdata;
  15.202 -
  15.203 -    PS3_QuitModes(_this);
  15.204 -
  15.205 -    /* Unmap framebuffer */
  15.206 -    if (data->frame_buffer) {
  15.207 -        struct fb_fix_screeninfo fb_finfo;
  15.208 -        if (ioctl(data->fbdev, FBIOGET_FSCREENINFO, &fb_finfo) != -1) {
  15.209 -            munmap(data->frame_buffer, fb_finfo.smem_len);
  15.210 -            data->frame_buffer = 0;
  15.211 -        }
  15.212 -    }
  15.213 -
  15.214 -    /* Shutdown SPE and related resources */
  15.215 -    if (data->fb_parms)
  15.216 -        free((void *)data->fb_parms);
  15.217 -    if (data->fb_thread_data) {
  15.218 -        SPE_Shutdown(data->fb_thread_data);
  15.219 -        free((void *)data->fb_thread_data);
  15.220 -    }
  15.221 -
  15.222 -    /* Close device */
  15.223 -    if (data->fbdev) {
  15.224 -        /* Give control of frame buffer back to kernel */
  15.225 -        ioctl(data->fbdev, PS3FB_IOCTL_OFF, 0);
  15.226 -        close(data->fbdev);
  15.227 -        data->fbdev = -1;
  15.228 -    }
  15.229 -}
  15.230 -
  15.231 -/* vi: set ts=4 sw=4 expandtab: */
    16.1 --- a/src/video/ps3/SDL_ps3video.h	Wed Jan 19 22:21:31 2011 -0800
    16.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    16.3 @@ -1,79 +0,0 @@
    16.4 -/*
    16.5 -    SDL - Simple DirectMedia Layer
    16.6 -    Copyright (C) 1997-2010 Sam Lantinga
    16.7 -
    16.8 -    This library is free software; you can redistribute it and/or
    16.9 -    modify it under the terms of the GNU Lesser General Public
   16.10 -    License as published by the Free Software Foundation; either
   16.11 -    version 2.1 of the License, or (at your option) any later version.
   16.12 -
   16.13 -    This library is distributed in the hope that it will be useful,
   16.14 -    but WITHOUT ANY WARRANTY; without even the implied warranty of
   16.15 -    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   16.16 -    Lesser General Public License for more details.
   16.17 -
   16.18 -    You should have received a copy of the GNU Lesser General Public
   16.19 -    License along with this library; if not, write to the Free Software
   16.20 -    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
   16.21 -
   16.22 -    Sam Lantinga
   16.23 -    slouken@libsdl.org
   16.24 -*/
   16.25 -#include "SDL_config.h"
   16.26 -
   16.27 -#ifndef _SDL_ps3video_h
   16.28 -#define _SDL_ps3video_h
   16.29 -
   16.30 -#include "../SDL_sysvideo.h"
   16.31 -#include "SDL_ps3spe_c.h"
   16.32 -
   16.33 -#include <linux/fb.h>
   16.34 -#include <asm/ps3fb.h>
   16.35 -
   16.36 -/* Debugging
   16.37 - * 0: No debug messages
   16.38 - * 1: Video debug messages
   16.39 - * 2: SPE debug messages
   16.40 - * 3: Memory adresses
   16.41 - */
   16.42 -#define DEBUG_LEVEL 0
   16.43 -
   16.44 -#ifdef DEBUG_LEVEL
   16.45 -#define deprintf( level, fmt, args... ) \
   16.46 -    do \
   16.47 -{ \
   16.48 -    if ( (unsigned)(level) <= DEBUG_LEVEL ) \
   16.49 -    { \
   16.50 -        fprintf( stdout, fmt, ##args ); \
   16.51 -        fflush( stdout ); \
   16.52 -    } \
   16.53 -} while ( 0 )
   16.54 -#else
   16.55 -#define deprintf( level, fmt, args... )
   16.56 -#endif
   16.57 -
   16.58 -/* Default framebuffer device on PS3 */
   16.59 -#define PS3DEV "/dev/fb0"
   16.60 -
   16.61 -/* Private display data */
   16.62 -typedef struct SDL_VideoData
   16.63 -{
   16.64 -    /* Framebuffer device descriptor */
   16.65 -    int fbdev;
   16.66 -    /* mmap'd access to fbdev */
   16.67 -    uint8_t * frame_buffer;
   16.68 -    /* SPE threading stuff of the framebuffer */
   16.69 -    spu_data_t * fb_thread_data;
   16.70 -    /* Framebuffer transfer data */
   16.71 -    volatile struct fb_writer_parms_t * fb_parms __attribute__((aligned(128)));
   16.72 -} SDL_VideoData;
   16.73 -
   16.74 -typedef struct SDL_DisplayModeData
   16.75 -{
   16.76 -    unsigned long mode;
   16.77 -    //struct ps3fb_ioctl_res res;
   16.78 -} PS3_DisplayModeData;
   16.79 -
   16.80 -#endif /* _SDL_ps3video_h */
   16.81 -
   16.82 -/* vi: set ts=4 sw=4 expandtab: */
    17.1 --- a/src/video/ps3/spulibs/Makefile	Wed Jan 19 22:21:31 2011 -0800
    17.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    17.3 @@ -1,47 +0,0 @@
    17.4 -# This Makefile is for building the CELL BE SPU libs
    17.5 -# libfb_writer_spu.so, libyuv2rgb_spu.so, libbilin_scaler_spu.so
    17.6 -
    17.7 -# Toolchain
    17.8 -PPU_LD=/usr/bin/ld
    17.9 -SPU_SRCDIR=$(srcdir)/src/video/ps3/spulibs
   17.10 -SPU_LIBDIR=$(srcdir)/src/video/ps3/spulibs/libs
   17.11 -SPU_CFLAGS=-g -W -Wall -Winline -Wno-main -I. -I /usr/spu/include -I /opt/cell/sdk/usr/spu/include -finline-limit=10000 -Winline -ftree-vectorize -funroll-loops -fmodulo-sched -ffast-math -fPIC -O2
   17.12 -
   17.13 -DEPS = $(SPU_SRCDIR)/spu_common.h
   17.14 -LIBS= fb_writer yuv2rgb bilin_scaler
   17.15 -
   17.16 -OBJLIBS = $(foreach lib,$(LIBS),lib$(lib)_spu.a)
   17.17 -SHALIBS = $(foreach lib,$(LIBS),lib$(lib)_spu.so)
   17.18 -
   17.19 -
   17.20 -ps3libs: $(foreach lib,$(OBJLIBS),$(SPU_LIBDIR)/$(lib)) $(foreach lib,$(SHALIBS),$(SPU_LIBDIR)/$(lib))
   17.21 -
   17.22 -
   17.23 -$(SPU_LIBDIR)/lib%_spu.a: $(SPU_LIBDIR)/%-embed.o
   17.24 -	$(AR) -qcs $@ $<
   17.25 -
   17.26 -$(SPU_LIBDIR)/lib%_spu.so: $(SPU_LIBDIR)/%-embed.o
   17.27 -	$(PPU_LD) -o $@ -shared -soname=$(notdir $@) $<
   17.28 -
   17.29 -$(SPU_LIBDIR)/%-embed.o: $(SPU_LIBDIR)/%.o
   17.30 -	$(EMBEDSPU) -m32 $(subst -embed.o,,$(notdir $@))_spu $< $@
   17.31 -
   17.32 -$(SPU_LIBDIR)/%.o: $(SPU_SRCDIR)/%.c $(DEPS)
   17.33 -	$(SPU_GCC) $(SPU_CFLAGS) -o $@ $< -lm
   17.34 -
   17.35 -
   17.36 -ps3libs-install: $(foreach obj,$(OBJLIBS),$(SPU_LIBDIR)/$(obj)) $(foreach obj,$(SHALIBS),$(SPU_LIBDIR)/$(obj))
   17.37 -	for file in $(OBJLIBS); do \
   17.38 -		$(INSTALL) -c -m 0655 $(SPU_LIBDIR)/$$file $(DESTDIR)$(libdir)/$$file; \
   17.39 -	done
   17.40 -	for file in $(SHALIBS); do \
   17.41 -		$(INSTALL) -c -m 0755 $(SPU_LIBDIR)/$$file $(DESTDIR)$(libdir)/$$file; \
   17.42 -	done
   17.43 -
   17.44 -ps3libs-uninstall:
   17.45 -	for file in $(OBJLIBS) $(SHALIBS); do \
   17.46 -		rm -f $(DESTDIR)$(libdir)/$$file; \
   17.47 -	done
   17.48 -
   17.49 -ps3libs-clean:
   17.50 -	rm -f $(SPU_LIBDIR)/*
    18.1 --- a/src/video/ps3/spulibs/bilin_scaler.c	Wed Jan 19 22:21:31 2011 -0800
    18.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    18.3 @@ -1,2050 +0,0 @@
    18.4 -/*
    18.5 - * SDL - Simple DirectMedia Layer
    18.6 - * CELL BE Support for PS3 Framebuffer
    18.7 - * Copyright (C) 2008, 2009 International Business Machines Corporation
    18.8 - *
    18.9 - * This library is free software; you can redistribute it and/or modify it
   18.10 - * under the terms of the GNU Lesser General Public License as published
   18.11 - * by the Free Software Foundation; either version 2.1 of the License, or
   18.12 - * (at your option) any later version.
   18.13 - *
   18.14 - * This library is distributed in the hope that it will be useful, but
   18.15 - * WITHOUT ANY WARRANTY; without even the implied warranty of
   18.16 - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   18.17 - * Lesser General Public License for more details.
   18.18 - *
   18.19 - * You should have received a copy of the GNU Lesser General Public
   18.20 - * License along with this library; if not, write to the Free Software
   18.21 - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
   18.22 - * USA
   18.23 - *
   18.24 - *  Martin Lowinski  <lowinski [at] de [dot] ibm [ibm] com>
   18.25 - *  Dirk Herrendoerfer <d.herrendoerfer [at] de [dot] ibm [dot] com>
   18.26 - *  SPE code based on research by:
   18.27 - *  Rene Becker
   18.28 - *  Thimo Emmerich
   18.29 - */
   18.30 -
   18.31 -#include "spu_common.h"
   18.32 -
   18.33 -#include <spu_intrinsics.h>
   18.34 -#include <spu_mfcio.h>
   18.35 -
   18.36 -// Debugging
   18.37 -//#define DEBUG
   18.38 -
   18.39 -#ifdef DEBUG
   18.40 -#define deprintf(fmt, args... ) \
   18.41 -	fprintf( stdout, fmt, ##args ); \
   18.42 -	fflush( stdout );
   18.43 -#else
   18.44 -#define deprintf( fmt, args... )
   18.45 -#endif
   18.46 -
   18.47 -struct scale_parms_t parms __attribute__((aligned(128)));
   18.48 -
   18.49 -/* A maximum of 8 lines Y, therefore 4 lines V, 4 lines U are stored
   18.50 - * there might be the need to retrieve misaligned data, adjust
   18.51 - * incoming v and u plane to be able to handle this (add 128)
   18.52 - */
   18.53 -unsigned char y_plane[2][(MAX_HDTV_WIDTH+128)*4] __attribute__((aligned(128)));
   18.54 -unsigned char v_plane[2][(MAX_HDTV_WIDTH+128)*2] __attribute__((aligned(128)));
   18.55 -unsigned char u_plane[2][(MAX_HDTV_WIDTH+128)*2] __attribute__((aligned(128)));
   18.56 -
   18.57 -/* temp-buffer for scaling: 4 lines Y, therefore 2 lines V, 2 lines U */
   18.58 -unsigned char scaled_y_plane[2][MAX_HDTV_WIDTH*2] __attribute__((aligned(128)));
   18.59 -unsigned char scaled_v_plane[2][MAX_HDTV_WIDTH/2] __attribute__((aligned(128)));
   18.60 -unsigned char scaled_u_plane[2][MAX_HDTV_WIDTH/2] __attribute__((aligned(128)));
   18.61 -
   18.62 -/* some vectors needed by the float to int conversion */
   18.63 -static const vector float vec_255 = { 255.0f, 255.0f, 255.0f, 255.0f };
   18.64 -static const vector float vec_0_1 = { 0.1f, 0.1f, 0.1f, 0.1f };
   18.65 -
   18.66 -void bilinear_scale_line_w8(unsigned char* src, unsigned char* dst_, unsigned int dst_width, vector float vf_x_scale, vector float vf_NSweight, unsigned int src_linestride);
   18.67 -void bilinear_scale_line_w16(unsigned char* src, unsigned char* dst_, unsigned int dst_width, vector float vf_x_scale, vector float vf_NSweight, unsigned int src_linestride);
   18.68 -
   18.69 -void scale_srcw16_dstw16();
   18.70 -void scale_srcw16_dstw32();
   18.71 -void scale_srcw32_dstw16();
   18.72 -void scale_srcw32_dstw32();
   18.73 -
   18.74 -int main( unsigned long long spe_id __attribute__((unused)), unsigned long long argp )
   18.75 -{
   18.76 -	deprintf("[SPU] bilin_scaler_spu is up... (on SPE #%llu)\n", spe_id);
   18.77 -	/* DMA transfer for the input parameters */
   18.78 -	spu_mfcdma32(&parms, (unsigned int)argp, sizeof(struct scale_parms_t), TAG_INIT, MFC_GET_CMD);
   18.79 -	DMA_WAIT_TAG(TAG_INIT);
   18.80 -
   18.81 -	deprintf("[SPU] Scale %ux%u to %ux%u\n", parms.src_pixel_width, parms.src_pixel_height,
   18.82 -			parms.dst_pixel_width, parms.dst_pixel_height);
   18.83 -
   18.84 -	if(parms.src_pixel_width & 0x1f) {
   18.85 -		if(parms.dst_pixel_width & 0x1F) {
   18.86 -			deprintf("[SPU] Using scale_srcw16_dstw16\n");
   18.87 -			scale_srcw16_dstw16();
   18.88 -		} else {
   18.89 -			deprintf("[SPU] Using scale_srcw16_dstw32\n");
   18.90 -			scale_srcw16_dstw32();
   18.91 -		}
   18.92 -	} else {
   18.93 -		if(parms.dst_pixel_width & 0x1F) {
   18.94 -			deprintf("[SPU] Using scale_srcw32_dstw16\n");
   18.95 -			scale_srcw32_dstw16();
   18.96 -		} else {
   18.97 -			deprintf("[SPU] Using scale_srcw32_dstw32\n");
   18.98 -			scale_srcw32_dstw32();
   18.99 -		}
  18.100 -	}
  18.101 -	deprintf("[SPU] bilin_scaler_spu... done!\n");
  18.102 -
  18.103 -	return 0;
  18.104 -}
  18.105 -
  18.106 -
  18.107 -/*
  18.108 - * vfloat_to_vuint()
  18.109 - *
  18.110 - * converts a float vector to an unsinged int vector using saturated
  18.111 - * arithmetic
  18.112 - *
  18.113 - * @param vec_s float vector for conversion
  18.114 - * @returns converted unsigned int vector
  18.115 - */
  18.116 -inline static vector unsigned int vfloat_to_vuint(vector float vec_s) {
  18.117 -	vector unsigned int select_1 = spu_cmpgt(vec_0_1, vec_s);
  18.118 -	vec_s = spu_sel(vec_s, vec_0_1, select_1);
  18.119 -
  18.120 -	vector unsigned int select_2 = spu_cmpgt(vec_s, vec_255);
  18.121 -	vec_s = spu_sel(vec_s, vec_255, select_2);
  18.122 -	return spu_convtu(vec_s,0);
  18.123 -}
  18.124 -
  18.125 -
  18.126 -/*
  18.127 - * scale_srcw16_dstw16()
  18.128 - *
  18.129 - * processes an input image of width 16
  18.130 - * scaling is done to a width 16
  18.131 - * result stored in RAM
  18.132 - */
  18.133 -void scale_srcw16_dstw16() {
  18.134 -	// extract parameters
  18.135 -	unsigned char* dst_addr = (unsigned char *)parms.dstBuffer;
  18.136 -
  18.137 -	unsigned int src_width = parms.src_pixel_width;
  18.138 -	unsigned int src_height = parms.src_pixel_height;
  18.139 -	unsigned int dst_width = parms.dst_pixel_width;
  18.140 -	unsigned int dst_height = parms.dst_pixel_height;
  18.141 -
  18.142 -	// YVU
  18.143 -	unsigned int src_linestride_y = src_width;
  18.144 -	unsigned int src_dbl_linestride_y = src_width<<1;
  18.145 -	unsigned int src_linestride_vu = src_width>>1;
  18.146 -	unsigned int src_dbl_linestride_vu = src_width;
  18.147 -
  18.148 -	// scaled YVU
  18.149 -	unsigned int scaled_src_linestride_y = dst_width;
  18.150 -
  18.151 -	// ram addresses
  18.152 -	unsigned char* src_addr_y = parms.y_plane;
  18.153 -	unsigned char* src_addr_v = parms.v_plane;
  18.154 -	unsigned char* src_addr_u = parms.u_plane;
  18.155 -
  18.156 -	// for handling misalignment, addresses are precalculated
  18.157 -	unsigned char* precalc_src_addr_v = src_addr_v;
  18.158 -	unsigned char* precalc_src_addr_u = src_addr_u;
  18.159 -
  18.160 -	unsigned int dst_picture_size = dst_width*dst_height;
  18.161 -
  18.162 -	// Sizes for destination
  18.163 -	unsigned int dst_dbl_linestride_y = dst_width<<1;
  18.164 -	unsigned int dst_dbl_linestride_vu = dst_width>>1;
  18.165 -
  18.166 -	// Perform address calculation for Y, V and U in main memory with dst_addr as base
  18.167 -	unsigned char* dst_addr_main_memory_y = dst_addr;
  18.168 -	unsigned char* dst_addr_main_memory_v = dst_addr + dst_picture_size;
  18.169 -	unsigned char* dst_addr_main_memory_u = dst_addr_main_memory_v +(dst_picture_size>>2);
  18.170 -
  18.171 -	// calculate scale factors
  18.172 -	vector float vf_x_scale = spu_splats( (float)src_width/(float)dst_width );
  18.173 -	float y_scale = (float)src_height/(float)dst_height;
  18.174 -
  18.175 -	// double buffered processing
  18.176 -	// buffer switching
  18.177 -	unsigned int curr_src_idx = 0;
  18.178 -	unsigned int curr_dst_idx = 0;
  18.179 -	unsigned int next_src_idx, next_dst_idx;
  18.180 -
  18.181 -	// 2 lines y as output, upper and lowerline
  18.182 -	unsigned int curr_interpl_y_upper = 0;
  18.183 -	unsigned int next_interpl_y_upper;
  18.184 -	unsigned int curr_interpl_y_lower, next_interpl_y_lower;
  18.185 -	// only 1 line v/u output, both planes have the same dimension
  18.186 -	unsigned int curr_interpl_vu = 0;
  18.187 -	unsigned int next_interpl_vu;
  18.188 -
  18.189 -	// weights, calculated in every loop iteration
  18.190 -	vector float vf_curr_NSweight_y_upper = { 0.0f, 0.0f, 0.0f, 0.0f };
  18.191 -	vector float vf_next_NSweight_y_upper;
  18.192 -	vector float vf_curr_NSweight_y_lower, vf_next_NSweight_y_lower;
  18.193 -	vector float vf_curr_NSweight_vu = { 0.0f, 0.0f, 0.0f, 0.0f };
  18.194 -	vector float vf_next_NSweight_vu;
  18.195 -
  18.196 -	// line indices for the src picture
  18.197 -	float curr_src_y_upper = 0.0f, next_src_y_upper;
  18.198 -	float curr_src_y_lower, next_src_y_lower;
  18.199 -	float curr_src_vu = 0.0f, next_src_vu;
  18.200 -
  18.201 -	// line indices for the dst picture
  18.202 -	unsigned int dst_y=0, dst_vu=0;
  18.203 -
  18.204 -	// offset for the v and u plane to handle misalignement
  18.205 -	unsigned int curr_lsoff_v = 0, next_lsoff_v;
  18.206 -	unsigned int curr_lsoff_u = 0, next_lsoff_u;
  18.207 -
  18.208 -	// calculate lower line indices
  18.209 -	curr_src_y_lower = ((float)curr_interpl_y_upper+1)*y_scale;
  18.210 -	curr_interpl_y_lower = (unsigned int)curr_src_y_lower;
  18.211 -	// lower line weight
  18.212 -	vf_curr_NSweight_y_lower = spu_splats( curr_src_y_lower-(float)curr_interpl_y_lower );
  18.213 -
  18.214 -
  18.215 -	// start partially double buffered processing
  18.216 -	// get initial data, 2 sets of y, 1 set v, 1 set u
  18.217 -	mfc_get( y_plane[curr_src_idx], (unsigned int) src_addr_y, src_dbl_linestride_y, RETR_BUF, 0, 0 );
  18.218 -	mfc_get( y_plane[curr_src_idx]+src_dbl_linestride_y,
  18.219 -			(unsigned int) src_addr_y+(curr_interpl_y_lower*src_linestride_y),
  18.220 -			src_dbl_linestride_y,
  18.221 -			RETR_BUF,
  18.222 -			0, 0 );
  18.223 -	mfc_get( v_plane[curr_src_idx], (unsigned int) src_addr_v, src_dbl_linestride_vu, RETR_BUF, 0, 0 );
  18.224 -	mfc_get( u_plane[curr_src_idx], (unsigned int) src_addr_u, src_dbl_linestride_vu, RETR_BUF, 0, 0 );
  18.225 -
  18.226 -	/* iteration loop
  18.227 -	 * within each iteration 4 lines y, 2 lines v, 2 lines u are retrieved
  18.228 -	 * the scaled output is 2 lines y, 1 line v, 1 line u
  18.229 -	 * the yuv2rgb-converted output is stored to RAM
  18.230 -	 */
  18.231 -	for( dst_vu=0; dst_vu<(dst_height>>1)-1; dst_vu++ ) {
  18.232 -		dst_y = dst_vu<<1;
  18.233 -
  18.234 -		// calculate next indices
  18.235 -		next_src_vu = ((float)dst_vu+1)*y_scale;
  18.236 -		next_src_y_upper = ((float)dst_y+2)*y_scale;
  18.237 -		next_src_y_lower = ((float)dst_y+3)*y_scale;
  18.238 -
  18.239 -		next_interpl_vu = (unsigned int) next_src_vu;
  18.240 -		next_interpl_y_upper = (unsigned int) next_src_y_upper;
  18.241 -		next_interpl_y_lower = (unsigned int) next_src_y_lower;
  18.242 -
  18.243 -		// calculate weight NORTH-SOUTH
  18.244 -		vf_next_NSweight_vu = spu_splats( next_src_vu-(float)next_interpl_vu );
  18.245 -		vf_next_NSweight_y_upper = spu_splats( next_src_y_upper-(float)next_interpl_y_upper );
  18.246 -		vf_next_NSweight_y_lower = spu_splats( next_src_y_lower-(float)next_interpl_y_lower );
  18.247 -
  18.248 -		// get next lines
  18.249 -		next_src_idx = curr_src_idx^1;
  18.250 -		next_dst_idx = curr_dst_idx^1;
  18.251 -
  18.252 -		// 4 lines y
  18.253 -		mfc_get( y_plane[next_src_idx],
  18.254 -				(unsigned int) src_addr_y+(next_interpl_y_upper*src_linestride_y),
  18.255 -				src_dbl_linestride_y,
  18.256 -				RETR_BUF+next_src_idx,
  18.257 -				0, 0 );
  18.258 -		mfc_get( y_plane[next_src_idx]+src_dbl_linestride_y,
  18.259 -				(unsigned int) src_addr_y+(next_interpl_y_lower*src_linestride_y),
  18.260 -				src_dbl_linestride_y,
  18.261 -				RETR_BUF+next_src_idx,
  18.262 -				0, 0 );
  18.263 -
  18.264 -		// 2 lines v
  18.265 -		precalc_src_addr_v = src_addr_v+(next_interpl_vu*src_linestride_vu);
  18.266 -		next_lsoff_v = ((unsigned int)precalc_src_addr_v)&0x0F;
  18.267 -		mfc_get( v_plane[next_src_idx],
  18.268 -				((unsigned int) precalc_src_addr_v)&0xFFFFFFF0,
  18.269 -				src_dbl_linestride_vu+(next_lsoff_v<<1),
  18.270 -				RETR_BUF+next_src_idx,
  18.271 -				0, 0 );
  18.272 -		// 2 lines u
  18.273 -		precalc_src_addr_u = src_addr_u+(next_interpl_vu*src_linestride_vu);
  18.274 -		next_lsoff_u = ((unsigned int)precalc_src_addr_u)&0x0F;
  18.275 -		mfc_get( u_plane[next_src_idx],
  18.276 -				((unsigned int) precalc_src_addr_u)&0xFFFFFFF0,
  18.277 -				src_dbl_linestride_vu+(next_lsoff_v<<1),
  18.278 -				RETR_BUF+next_src_idx,
  18.279 -				0, 0 );
  18.280 -
  18.281 -		DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) );
  18.282 -
  18.283 -		// scaling
  18.284 -		// work line y_upper
  18.285 -		bilinear_scale_line_w16( y_plane[curr_src_idx],
  18.286 -				scaled_y_plane[curr_src_idx],
  18.287 -				dst_width,
  18.288 -				vf_x_scale,
  18.289 -				vf_curr_NSweight_y_upper,
  18.290 -				src_linestride_y );
  18.291 -		// work line y_lower
  18.292 -		bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y,
  18.293 -				scaled_y_plane[curr_src_idx]+scaled_src_linestride_y,
  18.294 -				dst_width,
  18.295 -				vf_x_scale,
  18.296 -				vf_curr_NSweight_y_lower,
  18.297 -				src_linestride_y );
  18.298 -		// work line v
  18.299 -		bilinear_scale_line_w8( v_plane[curr_src_idx]+curr_lsoff_v,
  18.300 -				scaled_v_plane[curr_src_idx],
  18.301 -				dst_width>>1,
  18.302 -				vf_x_scale,
  18.303 -				vf_curr_NSweight_vu,
  18.304 -				src_linestride_vu );
  18.305 -		// work line u
  18.306 -		bilinear_scale_line_w8( u_plane[curr_src_idx]+curr_lsoff_u,
  18.307 -				scaled_u_plane[curr_src_idx],
  18.308 -				dst_width>>1,
  18.309 -				vf_x_scale,
  18.310 -				vf_curr_NSweight_vu,
  18.311 -				src_linestride_vu );
  18.312 -
  18.313 -
  18.314 -		// Store the result back to main memory into a destination buffer in YUV format
  18.315 -		//---------------------------------------------------------------------------------------------
  18.316 -		DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
  18.317 -
  18.318 -		// Perform three DMA transfers to 3 different locations in the main memory!
  18.319 -		// dst_width:	Pixel width of destination image
  18.320 -		// dst_addr:	Destination address in main memory
  18.321 -		// dst_vu:	Counter which is incremented one by one
  18.322 -		// dst_y:	Counter which is twice larger than dst_vu (dst_y = 2*dst_vu)
  18.323 -		mfc_put(	scaled_y_plane[curr_src_idx],					// What from local store (addr)
  18.324 -				(unsigned int)dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y),	// Destination in main memory (addr)
  18.325 -				dst_dbl_linestride_y,						// Two Y lines (depending on the widht of the destination resolution)
  18.326 -				STR_BUF+curr_dst_idx,						// Tag
  18.327 -				0, 0 );
  18.328 -
  18.329 -		mfc_put(	scaled_v_plane[curr_src_idx],					// What from local store (addr)
  18.330 -				(unsigned int)dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu),	// Destination in main memory (addr)
  18.331 -				dst_dbl_linestride_vu,						// Two V lines (depending on the widht of the destination resolution)
  18.332 -				STR_BUF+curr_dst_idx,						// Tag
  18.333 -				0, 0 );
  18.334 -
  18.335 -		mfc_put(	scaled_u_plane[curr_src_idx],					// What from local store (addr)
  18.336 -				(unsigned int)dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu),	// Destination in main memory (addr)
  18.337 -				dst_dbl_linestride_vu,						// Two U lines (depending on the widht of the destination resolution)
  18.338 -				STR_BUF+curr_dst_idx,						// Tag
  18.339 -				0, 0 );
  18.340 -		//---------------------------------------------------------------------------------------------
  18.341 -
  18.342 -
  18.343 -		// update for next cycle
  18.344 -		curr_src_idx = next_src_idx;
  18.345 -		curr_dst_idx = next_dst_idx;
  18.346 -
  18.347 -		curr_interpl_y_upper = next_interpl_y_upper;
  18.348 -		curr_interpl_y_lower = next_interpl_y_lower;
  18.349 -		curr_interpl_vu = next_interpl_vu;
  18.350 -
  18.351 -		vf_curr_NSweight_y_upper = vf_curr_NSweight_y_upper;
  18.352 -		vf_curr_NSweight_y_lower = vf_curr_NSweight_y_lower;
  18.353 -		vf_curr_NSweight_vu = vf_next_NSweight_vu;
  18.354 -
  18.355 -		curr_src_y_upper = next_src_y_upper;
  18.356 -		curr_src_y_lower = next_src_y_lower;
  18.357 -		curr_src_vu = next_src_vu;
  18.358 -
  18.359 -		curr_lsoff_v = next_lsoff_v;
  18.360 -		curr_lsoff_u = next_lsoff_u;
  18.361 -	}
  18.362 -
  18.363 -
  18.364 -
  18.365 -	DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) );
  18.366 -
  18.367 -	// scaling
  18.368 -	// work line y_upper
  18.369 -	bilinear_scale_line_w16( y_plane[curr_src_idx],
  18.370 -			scaled_y_plane[curr_src_idx],
  18.371 -			dst_width,
  18.372 -			vf_x_scale,
  18.373 -			vf_curr_NSweight_y_upper,
  18.374 -			src_linestride_y );
  18.375 -	// work line y_lower
  18.376 -	bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y,
  18.377 -			scaled_y_plane[curr_src_idx]+scaled_src_linestride_y,
  18.378 -			dst_width,
  18.379 -			vf_x_scale,
  18.380 -			vf_curr_NSweight_y_lower,
  18.381 -			src_linestride_y );
  18.382 -	// work line v
  18.383 -	bilinear_scale_line_w8( v_plane[curr_src_idx]+curr_lsoff_v,
  18.384 -			scaled_v_plane[curr_src_idx],
  18.385 -			dst_width>>1,
  18.386 -			vf_x_scale,
  18.387 -			vf_curr_NSweight_vu,
  18.388 -			src_linestride_vu );
  18.389 -	// work line u
  18.390 -	bilinear_scale_line_w8( u_plane[curr_src_idx]+curr_lsoff_u,
  18.391 -			scaled_u_plane[curr_src_idx],
  18.392 -			dst_width>>1,
  18.393 -			vf_x_scale,
  18.394 -			vf_curr_NSweight_vu,
  18.395 -			src_linestride_vu );
  18.396 -
  18.397 -
  18.398 -	// Store the result back to main memory into a destination buffer in YUV format
  18.399 -	//---------------------------------------------------------------------------------------------
  18.400 -	DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
  18.401 -
  18.402 -	// Perform three DMA transfers to 3 different locations in the main memory!
  18.403 -	// dst_width:	Pixel width of destination image
  18.404 -	// dst_addr:	Destination address in main memory
  18.405 -	// dst_vu:	Counter which is incremented one by one
  18.406 -	// dst_y:	Counter which is twice larger than dst_vu (dst_y = 2*dst_vu)
  18.407 -	mfc_put(	scaled_y_plane[curr_src_idx],					// What from local store (addr)
  18.408 -			(unsigned int)dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y),	// Destination in main memory (addr)
  18.409 -			dst_dbl_linestride_y,						// Two Y lines (depending on the widht of the destination resolution)
  18.410 -			STR_BUF+curr_dst_idx,						// Tag
  18.411 -			0, 0 );
  18.412 -
  18.413 -	mfc_put(	scaled_v_plane[curr_src_idx],					// What from local store (addr)
  18.414 -			(unsigned int)dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu),	// Destination in main memory (addr)
  18.415 -			dst_dbl_linestride_vu,						// Two V lines (depending on the widht of the destination resolution)
  18.416 -			STR_BUF+curr_dst_idx,						// Tag
  18.417 -			0, 0 );
  18.418 -
  18.419 -	mfc_put(	scaled_u_plane[curr_src_idx],					// What from local store (addr)
  18.420 -			(unsigned int)dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu),	// Destination in main memory (addr)
  18.421 -			dst_dbl_linestride_vu,						// Two U lines (depending on the widht of the destination resolution)
  18.422 -			STR_BUF+curr_dst_idx,						// Tag
  18.423 -			0, 0 );
  18.424 -
  18.425 -	// wait for completion
  18.426 -	DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
  18.427 -	//---------------------------------------------------------------------------------------------
  18.428 -}
  18.429 -
  18.430 -
  18.431 -/*
  18.432 - * scale_srcw16_dstw32()
  18.433 - *
  18.434 - * processes an input image of width 16
  18.435 - * scaling is done to a width 32
  18.436 - * yuv2rgb conversion on a width of 32
  18.437 - * result stored in RAM
  18.438 - */
  18.439 -void scale_srcw16_dstw32() {
  18.440 -	// extract parameters
  18.441 -	unsigned char* dst_addr = (unsigned char *)parms.dstBuffer;
  18.442 -
  18.443 -	unsigned int src_width = parms.src_pixel_width;
  18.444 -	unsigned int src_height = parms.src_pixel_height;
  18.445 -	unsigned int dst_width = parms.dst_pixel_width;
  18.446 -	unsigned int dst_height = parms.dst_pixel_height;
  18.447 -
  18.448 -	// YVU
  18.449 -	unsigned int src_linestride_y = src_width;
  18.450 -	unsigned int src_dbl_linestride_y = src_width<<1;
  18.451 -	unsigned int src_linestride_vu = src_width>>1;
  18.452 -	unsigned int src_dbl_linestride_vu = src_width;
  18.453 -	// scaled YVU
  18.454 -	unsigned int scaled_src_linestride_y = dst_width;
  18.455 -
  18.456 -	// ram addresses
  18.457 -	unsigned char* src_addr_y = parms.y_plane;
  18.458 -	unsigned char* src_addr_v = parms.v_plane;
  18.459 -	unsigned char* src_addr_u = parms.u_plane;
  18.460 -
  18.461 -	unsigned int dst_picture_size = dst_width*dst_height;
  18.462 -
  18.463 -	// Sizes for destination
  18.464 -	unsigned int dst_dbl_linestride_y = dst_width<<1;
  18.465 -	unsigned int dst_dbl_linestride_vu = dst_width>>1;
  18.466 -
  18.467 -	// Perform address calculation for Y, V and U in main memory with dst_addr as base
  18.468 -	unsigned char* dst_addr_main_memory_y = dst_addr;
  18.469 -	unsigned char* dst_addr_main_memory_v = dst_addr + dst_picture_size;
  18.470 -	unsigned char* dst_addr_main_memory_u = dst_addr_main_memory_v +(dst_picture_size>>2);
  18.471 -
  18.472 -
  18.473 -	// for handling misalignment, addresses are precalculated
  18.474 -	unsigned char* precalc_src_addr_v = src_addr_v;
  18.475 -	unsigned char* precalc_src_addr_u = src_addr_u;
  18.476 -
  18.477 -	// calculate scale factors
  18.478 -	vector float vf_x_scale = spu_splats( (float)src_width/(float)dst_width );
  18.479 -	float y_scale = (float)src_height/(float)dst_height;
  18.480 -
  18.481 -	// double buffered processing
  18.482 -	// buffer switching
  18.483 -	unsigned int curr_src_idx = 0;
  18.484 -	unsigned int curr_dst_idx = 0;
  18.485 -	unsigned int next_src_idx, next_dst_idx;
  18.486 -
  18.487 -	// 2 lines y as output, upper and lowerline
  18.488 -	unsigned int curr_interpl_y_upper = 0;
  18.489 -	unsigned int next_interpl_y_upper;
  18.490 -	unsigned int curr_interpl_y_lower, next_interpl_y_lower;
  18.491 -	// only 1 line v/u output, both planes have the same dimension
  18.492 -	unsigned int curr_interpl_vu = 0;
  18.493 -	unsigned int next_interpl_vu;
  18.494 -
  18.495 -	// weights, calculated in every loop iteration
  18.496 -	vector float vf_curr_NSweight_y_upper = { 0.0f, 0.0f, 0.0f, 0.0f };
  18.497 -	vector float vf_next_NSweight_y_upper;
  18.498 -	vector float vf_curr_NSweight_y_lower, vf_next_NSweight_y_lower;
  18.499 -	vector float vf_curr_NSweight_vu = { 0.0f, 0.0f, 0.0f, 0.0f };
  18.500 -	vector float vf_next_NSweight_vu;
  18.501 -
  18.502 -	// line indices for the src picture
  18.503 -	float curr_src_y_upper = 0.0f, next_src_y_upper;
  18.504 -	float curr_src_y_lower, next_src_y_lower;
  18.505 -	float curr_src_vu = 0.0f, next_src_vu;
  18.506 -
  18.507 -	// line indices for the dst picture
  18.508 -	unsigned int dst_y=0, dst_vu=0;
  18.509 -
  18.510 -	// offset for the v and u plane to handle misalignement
  18.511 -	unsigned int curr_lsoff_v = 0, next_lsoff_v;
  18.512 -	unsigned int curr_lsoff_u = 0, next_lsoff_u;
  18.513 -
  18.514 -	// calculate lower line idices
  18.515 -	curr_src_y_lower = ((float)curr_interpl_y_upper+1)*y_scale;
  18.516 -	curr_interpl_y_lower = (unsigned int)curr_src_y_lower;
  18.517 -	// lower line weight
  18.518 -	vf_curr_NSweight_y_lower = spu_splats( curr_src_y_lower-(float)curr_interpl_y_lower );
  18.519 -
  18.520 -
  18.521 -	// start partially double buffered processing
  18.522 -	// get initial data, 2 sets of y, 1 set v, 1 set u
  18.523 -	mfc_get( y_plane[curr_src_idx], (unsigned int) src_addr_y, src_dbl_linestride_y, RETR_BUF, 0, 0 );
  18.524 -	mfc_get( y_plane[curr_src_idx]+src_dbl_linestride_y,
  18.525 -			(unsigned int) src_addr_y+(curr_interpl_y_lower*src_linestride_y),
  18.526 -			src_dbl_linestride_y,
  18.527 -			RETR_BUF,
  18.528 -			0, 0 );
  18.529 -	mfc_get( v_plane[curr_src_idx], (unsigned int) src_addr_v, src_dbl_linestride_vu, RETR_BUF, 0, 0 );
  18.530 -	mfc_get( u_plane[curr_src_idx], (unsigned int) src_addr_u, src_dbl_linestride_vu, RETR_BUF, 0, 0 );
  18.531 -
  18.532 -	// iteration loop
  18.533 -	// within each iteration 4 lines y, 2 lines v, 2 lines u are retrieved
  18.534 -	// the scaled output is 2 lines y, 1 line v, 1 line u
  18.535 -	// the yuv2rgb-converted output is stored to RAM
  18.536 -	for( dst_vu=0; dst_vu<(dst_height>>1)-1; dst_vu++ ) {
  18.537 -		dst_y = dst_vu<<1;
  18.538 -
  18.539 -		// calculate next indices
  18.540 -		next_src_vu = ((float)dst_vu+1)*y_scale;
  18.541 -		next_src_y_upper = ((float)dst_y+2)*y_scale;
  18.542 -		next_src_y_lower = ((float)dst_y+3)*y_scale;
  18.543 -
  18.544 -		next_interpl_vu = (unsigned int) next_src_vu;
  18.545 -		next_interpl_y_upper = (unsigned int) next_src_y_upper;
  18.546 -		next_interpl_y_lower = (unsigned int) next_src_y_lower;
  18.547 -
  18.548 -		// calculate weight NORTH-SOUTH
  18.549 -		vf_next_NSweight_vu = spu_splats( next_src_vu-(float)next_interpl_vu );
  18.550 -		vf_next_NSweight_y_upper = spu_splats( next_src_y_upper-(float)next_interpl_y_upper );
  18.551 -		vf_next_NSweight_y_lower = spu_splats( next_src_y_lower-(float)next_interpl_y_lower );
  18.552 -
  18.553 -		// get next lines
  18.554 -		next_src_idx = curr_src_idx^1;
  18.555 -		next_dst_idx = curr_dst_idx^1;
  18.556 -
  18.557 -		// 4 lines y
  18.558 -		mfc_get( y_plane[next_src_idx],
  18.559 -				(unsigned int) src_addr_y+(next_interpl_y_upper*src_linestride_y),
  18.560 -				src_dbl_linestride_y,
  18.561 -				RETR_BUF+next_src_idx,
  18.562 -				0, 0 );
  18.563 -		mfc_get( y_plane[next_src_idx]+src_dbl_linestride_y,
  18.564 -				(unsigned int) src_addr_y+(next_interpl_y_lower*src_linestride_y),
  18.565 -				src_dbl_linestride_y,
  18.566 -				RETR_BUF+next_src_idx,
  18.567 -				0, 0 );
  18.568 -
  18.569 -		// 2 lines v
  18.570 -		precalc_src_addr_v = src_addr_v+(next_interpl_vu*src_linestride_vu);
  18.571 -		next_lsoff_v = ((unsigned int)precalc_src_addr_v)&0x0F;
  18.572 -		mfc_get( v_plane[next_src_idx],
  18.573 -				((unsigned int) precalc_src_addr_v)&0xFFFFFFF0,
  18.574 -				src_dbl_linestride_vu+(next_lsoff_v<<1),
  18.575 -				RETR_BUF+next_src_idx,
  18.576 -				0, 0 );
  18.577 -		// 2 lines u
  18.578 -		precalc_src_addr_u = src_addr_u+(next_interpl_vu*src_linestride_vu);
  18.579 -		next_lsoff_u = ((unsigned int)precalc_src_addr_u)&0x0F;
  18.580 -		mfc_get( u_plane[next_src_idx],
  18.581 -				((unsigned int) precalc_src_addr_u)&0xFFFFFFF0,
  18.582 -				src_dbl_linestride_vu+(next_lsoff_v<<1),
  18.583 -				RETR_BUF+next_src_idx,
  18.584 -				0, 0 );
  18.585 -
  18.586 -		DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) );
  18.587 -
  18.588 -		// scaling
  18.589 -		// work line y_upper
  18.590 -		bilinear_scale_line_w16( y_plane[curr_src_idx],
  18.591 -				scaled_y_plane[curr_src_idx],
  18.592 -				dst_width,
  18.593 -				vf_x_scale,
  18.594 -				vf_curr_NSweight_y_upper,
  18.595 -				src_linestride_y );
  18.596 -		// work line y_lower
  18.597 -		bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y,
  18.598 -				scaled_y_plane[curr_src_idx]+scaled_src_linestride_y,
  18.599 -				dst_width,
  18.600 -				vf_x_scale,
  18.601 -				vf_curr_NSweight_y_lower,
  18.602 -				src_linestride_y );
  18.603 -		// work line v
  18.604 -		bilinear_scale_line_w8( v_plane[curr_src_idx]+curr_lsoff_v,
  18.605 -				scaled_v_plane[curr_src_idx],
  18.606 -				dst_width>>1,
  18.607 -				vf_x_scale,
  18.608 -				vf_curr_NSweight_vu,
  18.609 -				src_linestride_vu );
  18.610 -		// work line u
  18.611 -		bilinear_scale_line_w8( u_plane[curr_src_idx]+curr_lsoff_u,
  18.612 -				scaled_u_plane[curr_src_idx],
  18.613 -				dst_width>>1,
  18.614 -				vf_x_scale,
  18.615 -				vf_curr_NSweight_vu,
  18.616 -				src_linestride_vu );
  18.617 -
  18.618 -		//---------------------------------------------------------------------------------------------
  18.619 -		DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
  18.620 -
  18.621 -		// Perform three DMA transfers to 3 different locations in the main memory!
  18.622 -		// dst_width:	Pixel width of destination image
  18.623 -		// dst_addr:	Destination address in main memory
  18.624 -		// dst_vu:	Counter which is incremented one by one
  18.625 -		// dst_y:	Counter which is twice larger than dst_vu (dst_y = 2*dst_vu)
  18.626 -
  18.627 -		mfc_put(	scaled_y_plane[curr_src_idx],							// What from local store (addr)
  18.628 -				(unsigned int)  dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y),	// Destination in main memory (addr)
  18.629 -				dst_dbl_linestride_y,								// Two Y lines (depending on the widht of the destination resolution)
  18.630 -				STR_BUF+curr_dst_idx,								// Tag
  18.631 -				0, 0 );
  18.632 -
  18.633 -		mfc_put(	scaled_v_plane[curr_src_idx],							// What from local store (addr)
  18.634 -				(unsigned int) dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu),	// Destination in main memory (addr)
  18.635 -				dst_dbl_linestride_vu,								// Two V lines (depending on the widht of the destination resolution)
  18.636 -				STR_BUF+curr_dst_idx,								// Tag
  18.637 -				0, 0 );
  18.638 -
  18.639 -		mfc_put(	scaled_u_plane[curr_src_idx],							// What from local store (addr)
  18.640 -				(unsigned int)  dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu),	// Destination in main memory (addr)
  18.641 -				dst_dbl_linestride_vu,								// Two U lines (depending on the widht of the destination resolution)
  18.642 -				STR_BUF+curr_dst_idx,								// Tag
  18.643 -				0, 0 );
  18.644 -		//---------------------------------------------------------------------------------------------
  18.645 -
  18.646 -
  18.647 -		// update for next cycle
  18.648 -		curr_src_idx = next_src_idx;
  18.649 -		curr_dst_idx = next_dst_idx;
  18.650 -
  18.651 -		curr_interpl_y_upper = next_interpl_y_upper;
  18.652 -		curr_interpl_y_lower = next_interpl_y_lower;
  18.653 -		curr_interpl_vu = next_interpl_vu;
  18.654 -
  18.655 -		vf_curr_NSweight_y_upper = vf_curr_NSweight_y_upper;
  18.656 -		vf_curr_NSweight_y_lower = vf_curr_NSweight_y_lower;
  18.657 -		vf_curr_NSweight_vu = vf_next_NSweight_vu;
  18.658 -
  18.659 -		curr_src_y_upper = next_src_y_upper;
  18.660 -		curr_src_y_lower = next_src_y_lower;
  18.661 -		curr_src_vu = next_src_vu;
  18.662 -
  18.663 -		curr_lsoff_v = next_lsoff_v;
  18.664 -		curr_lsoff_u = next_lsoff_u;
  18.665 -	}
  18.666 -
  18.667 -
  18.668 -
  18.669 -	DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) );
  18.670 -
  18.671 -	// scaling
  18.672 -	// work line y_upper
  18.673 -	bilinear_scale_line_w16( y_plane[curr_src_idx],
  18.674 -			scaled_y_plane[curr_src_idx],
  18.675 -			dst_width,
  18.676 -			vf_x_scale,
  18.677 -			vf_curr_NSweight_y_upper,
  18.678 -			src_linestride_y );
  18.679 -	// work line y_lower
  18.680 -	bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y,
  18.681 -			scaled_y_plane[curr_src_idx]+scaled_src_linestride_y,
  18.682 -			dst_width,
  18.683 -			vf_x_scale,
  18.684 -			vf_curr_NSweight_y_lower,
  18.685 -			src_linestride_y );
  18.686 -	// work line v
  18.687 -	bilinear_scale_line_w8( v_plane[curr_src_idx]+curr_lsoff_v,
  18.688 -			scaled_v_plane[curr_src_idx],
  18.689 -			dst_width>>1,
  18.690 -			vf_x_scale,
  18.691 -			vf_curr_NSweight_vu,
  18.692 -			src_linestride_vu );
  18.693 -	// work line u
  18.694 -	bilinear_scale_line_w8( u_plane[curr_src_idx]+curr_lsoff_u,
  18.695 -			scaled_u_plane[curr_src_idx],
  18.696 -			dst_width>>1,
  18.697 -			vf_x_scale,
  18.698 -			vf_curr_NSweight_vu,
  18.699 -			src_linestride_vu );
  18.700 -
  18.701 -	//---------------------------------------------------------------------------------------------
  18.702 -	DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
  18.703 -
  18.704 -	// Perform three DMA transfers to 3 different locations in the main memory!
  18.705 -	// dst_width:	Pixel width of destination image
  18.706 -	// dst_addr:	Destination address in main memory
  18.707 -	// dst_vu:	Counter which is incremented one by one
  18.708 -	// dst_y:	Counter which is twice larger than dst_vu (dst_y = 2*dst_vu)
  18.709 -
  18.710 -	mfc_put(	scaled_y_plane[curr_src_idx],							// What from local store (addr)
  18.711 -			(unsigned int)  dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y),	// Destination in main memory (addr)
  18.712 -			dst_dbl_linestride_y,								// Two Y lines (depending on the widht of the destination resolution)
  18.713 -			STR_BUF+curr_dst_idx,								// Tag
  18.714 -			0, 0 );
  18.715 -
  18.716 -	mfc_put(	scaled_v_plane[curr_src_idx],							// What from local store (addr)
  18.717 -			(unsigned int) dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu),	// Destination in main memory (addr)
  18.718 -			dst_dbl_linestride_vu,								// Two V lines (depending on the widht of the destination resolution)
  18.719 -			STR_BUF+curr_dst_idx,								// Tag
  18.720 -			0, 0 );
  18.721 -
  18.722 -	mfc_put(	scaled_u_plane[curr_src_idx],							// What from local store (addr)
  18.723 -			(unsigned int)  dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu),	// Destination in main memory (addr)
  18.724 -			dst_dbl_linestride_vu,								// Two U lines (depending on the widht of the destination resolution)
  18.725 -			STR_BUF+curr_dst_idx,								// Tag
  18.726 -			0, 0 );
  18.727 -
  18.728 -	// wait for completion
  18.729 -	DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
  18.730 -	//---------------------------------------------------------------------------------------------
  18.731 -}
  18.732 -
  18.733 -
  18.734 -/*
  18.735 - * scale_srcw32_dstw16()
  18.736 - *
  18.737 - * processes an input image of width 32
  18.738 - * scaling is done to a width 16
  18.739 - * yuv2rgb conversion on a width of 16
  18.740 - * result stored in RAM
  18.741 - */
  18.742 -void scale_srcw32_dstw16() {
  18.743 -	// extract parameters
  18.744 -	unsigned char* dst_addr = (unsigned char *)parms.dstBuffer;
  18.745 -
  18.746 -	unsigned int src_width = parms.src_pixel_width;
  18.747 -	unsigned int src_height = parms.src_pixel_height;
  18.748 -	unsigned int dst_width = parms.dst_pixel_width;
  18.749 -	unsigned int dst_height = parms.dst_pixel_height;
  18.750 -
  18.751 -	// YVU
  18.752 -	unsigned int src_linestride_y = src_width;
  18.753 -	unsigned int src_dbl_linestride_y = src_width<<1;
  18.754 -	unsigned int src_linestride_vu = src_width>>1;
  18.755 -	unsigned int src_dbl_linestride_vu = src_width;
  18.756 -	// scaled YVU
  18.757 -	unsigned int scaled_src_linestride_y = dst_width;
  18.758 -
  18.759 -	// ram addresses
  18.760 -	unsigned char* src_addr_y = parms.y_plane;
  18.761 -	unsigned char* src_addr_v = parms.v_plane;
  18.762 -	unsigned char* src_addr_u = parms.u_plane;
  18.763 -
  18.764 -	unsigned int dst_picture_size = dst_width*dst_height;
  18.765 -
  18.766 -	// Sizes for destination
  18.767 -	unsigned int dst_dbl_linestride_y = dst_width<<1;
  18.768 -	unsigned int dst_dbl_linestride_vu = dst_width>>1;
  18.769 -
  18.770 -	// Perform address calculation for Y, V and U in main memory with dst_addr as base
  18.771 -	unsigned char* dst_addr_main_memory_y = dst_addr;
  18.772 -	unsigned char* dst_addr_main_memory_v = dst_addr + dst_picture_size;
  18.773 -	unsigned char* dst_addr_main_memory_u = dst_addr_main_memory_v +(dst_picture_size>>2);
  18.774 -
  18.775 -	// calculate scale factors
  18.776 -	vector float vf_x_scale = spu_splats( (float)src_width/(float)dst_width );
  18.777 -	float y_scale = (float)src_height/(float)dst_height;
  18.778 -
  18.779 -	// double buffered processing
  18.780 -	// buffer switching
  18.781 -	unsigned int curr_src_idx = 0;
  18.782 -	unsigned int curr_dst_idx = 0;
  18.783 -	unsigned int next_src_idx, next_dst_idx;
  18.784 -
  18.785 -	// 2 lines y as output, upper and lowerline
  18.786 -	unsigned int curr_interpl_y_upper = 0;
  18.787 -	unsigned int next_interpl_y_upper;
  18.788 -	unsigned int curr_interpl_y_lower, next_interpl_y_lower;
  18.789 -	// only 1 line v/u output, both planes have the same dimension
  18.790 -	unsigned int curr_interpl_vu = 0;
  18.791 -	unsigned int next_interpl_vu;
  18.792 -
  18.793 -	// weights, calculated in every loop iteration
  18.794 -	vector float vf_curr_NSweight_y_upper = { 0.0f, 0.0f, 0.0f, 0.0f };
  18.795 -	vector float vf_next_NSweight_y_upper;
  18.796 -	vector float vf_curr_NSweight_y_lower, vf_next_NSweight_y_lower;
  18.797 -	vector float vf_curr_NSweight_vu = { 0.0f, 0.0f, 0.0f, 0.0f };
  18.798 -	vector float vf_next_NSweight_vu;
  18.799 -
  18.800 -	// line indices for the src picture
  18.801 -	float curr_src_y_upper = 0.0f, next_src_y_upper;
  18.802 -	float curr_src_y_lower, next_src_y_lower;
  18.803 -	float curr_src_vu = 0.0f, next_src_vu;
  18.804 -
  18.805 -	// line indices for the dst picture
  18.806 -	unsigned int dst_y=0, dst_vu=0;
  18.807 -
  18.808 -	// calculate lower line idices
  18.809 -	curr_src_y_lower = ((float)curr_interpl_y_upper+1)*y_scale;
  18.810 -	curr_interpl_y_lower = (unsigned int)curr_src_y_lower;
  18.811 -	// lower line weight
  18.812 -	vf_curr_NSweight_y_lower = spu_splats( curr_src_y_lower-(float)curr_interpl_y_lower );
  18.813 -
  18.814 -
  18.815 -	// start partially double buffered processing
  18.816 -	// get initial data, 2 sets of y, 1 set v, 1 set u
  18.817 -	mfc_get( y_plane[curr_src_idx], (unsigned int) src_addr_y, src_dbl_linestride_y, RETR_BUF, 0, 0 );
  18.818 -	mfc_get( y_plane[curr_src_idx]+src_dbl_linestride_y,
  18.819 -			(unsigned int) src_addr_y+(curr_interpl_y_lower*src_linestride_y),
  18.820 -			src_dbl_linestride_y,
  18.821 -			RETR_BUF,
  18.822 -			0, 0 );
  18.823 -	mfc_get( v_plane[curr_src_idx], (unsigned int) src_addr_v, src_dbl_linestride_vu, RETR_BUF, 0, 0 );
  18.824 -	mfc_get( u_plane[curr_src_idx], (unsigned int) src_addr_u, src_dbl_linestride_vu, RETR_BUF, 0, 0 );
  18.825 -
  18.826 -	// iteration loop
  18.827 -	// within each iteration 4 lines y, 2 lines v, 2 lines u are retrieved
  18.828 -	// the scaled output is 2 lines y, 1 line v, 1 line u
  18.829 -	// the yuv2rgb-converted output is stored to RAM
  18.830 -	for( dst_vu=0; dst_vu<(dst_height>>1)-1; dst_vu++ ) {
  18.831 -		dst_y = dst_vu<<1;
  18.832 -
  18.833 -		// calculate next indices
  18.834 -		next_src_vu = ((float)dst_vu+1)*y_scale;
  18.835 -		next_src_y_upper = ((float)dst_y+2)*y_scale;
  18.836 -		next_src_y_lower = ((float)dst_y+3)*y_scale;
  18.837 -
  18.838 -		next_interpl_vu = (unsigned int) next_src_vu;
  18.839 -		next_interpl_y_upper = (unsigned int) next_src_y_upper;
  18.840 -		next_interpl_y_lower = (unsigned int) next_src_y_lower;
  18.841 -
  18.842 -		// calculate weight NORTH-SOUTH
  18.843 -		vf_next_NSweight_vu = spu_splats( next_src_vu-(float)next_interpl_vu );
  18.844 -		vf_next_NSweight_y_upper = spu_splats( next_src_y_upper-(float)next_interpl_y_upper );
  18.845 -		vf_next_NSweight_y_lower = spu_splats( next_src_y_lower-(float)next_interpl_y_lower );
  18.846 -
  18.847 -		// get next lines
  18.848 -		next_src_idx = curr_src_idx^1;
  18.849 -		next_dst_idx = curr_dst_idx^1;
  18.850 -
  18.851 -		// 4 lines y
  18.852 -		mfc_get( y_plane[next_src_idx],
  18.853 -				(unsigned int) src_addr_y+(next_interpl_y_upper*src_linestride_y),
  18.854 -				src_dbl_linestride_y,
  18.855 -				RETR_BUF+next_src_idx,
  18.856 -				0, 0 );
  18.857 -		mfc_get( y_plane[next_src_idx]+src_dbl_linestride_y,
  18.858 -				(unsigned int) src_addr_y+(next_interpl_y_lower*src_linestride_y),
  18.859 -				src_dbl_linestride_y,
  18.860 -				RETR_BUF+next_src_idx,
  18.861 -				0, 0 );
  18.862 -
  18.863 -		// 2 lines v
  18.864 -		mfc_get( v_plane[next_src_idx],
  18.865 -				(unsigned int) src_addr_v+(next_interpl_vu*src_linestride_vu),
  18.866 -				src_dbl_linestride_vu,
  18.867 -				RETR_BUF+next_src_idx,
  18.868 -				0, 0 );
  18.869 -		// 2 lines u
  18.870 -		mfc_get( u_plane[next_src_idx],
  18.871 -				(unsigned int) src_addr_u+(next_interpl_vu*src_linestride_vu),
  18.872 -				src_dbl_linestride_vu,
  18.873 -				RETR_BUF+next_src_idx,
  18.874 -				0, 0 );
  18.875 -
  18.876 -		DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) );
  18.877 -
  18.878 -		// scaling
  18.879 -		// work line y_upper
  18.880 -		bilinear_scale_line_w16( y_plane[curr_src_idx],
  18.881 -				scaled_y_plane[curr_src_idx],
  18.882 -				dst_width,
  18.883 -				vf_x_scale,
  18.884 -				vf_curr_NSweight_y_upper,
  18.885 -				src_linestride_y );
  18.886 -		// work line y_lower
  18.887 -		bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y,
  18.888 -				scaled_y_plane[curr_src_idx]+scaled_src_linestride_y,
  18.889 -				dst_width,
  18.890 -				vf_x_scale,
  18.891 -				vf_curr_NSweight_y_lower,
  18.892 -				src_linestride_y );
  18.893 -		// work line v
  18.894 -		bilinear_scale_line_w16( v_plane[curr_src_idx],
  18.895 -				scaled_v_plane[curr_src_idx],
  18.896 -				dst_width>>1,
  18.897 -				vf_x_scale,
  18.898 -				vf_curr_NSweight_vu,
  18.899 -				src_linestride_vu );
  18.900 -		// work line u
  18.901 -		bilinear_scale_line_w16( u_plane[curr_src_idx],
  18.902 -				scaled_u_plane[curr_src_idx],
  18.903 -				dst_width>>1,
  18.904 -				vf_x_scale,
  18.905 -				vf_curr_NSweight_vu,
  18.906 -				src_linestride_vu );
  18.907 -
  18.908 -		//---------------------------------------------------------------------------------------------
  18.909 -		DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
  18.910 -
  18.911 -		// Perform three DMA transfers to 3 different locations in the main memory!
  18.912 -		// dst_width:	Pixel width of destination image
  18.913 -		// dst_addr:	Destination address in main memory
  18.914 -		// dst_vu:	Counter which is incremented one by one
  18.915 -		// dst_y:	Counter which is twice larger than dst_vu (dst_y = 2*dst_vu)
  18.916 -
  18.917 -		mfc_put(	scaled_y_plane[curr_src_idx],							// What from local store (addr)
  18.918 -				(unsigned int)  dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y),	// Destination in main memory (addr)
  18.919 -				dst_dbl_linestride_y,								// Two Y lines (depending on the widht of the destination resolution)
  18.920 -				STR_BUF+curr_dst_idx,								// Tag
  18.921 -				0, 0 );
  18.922 -
  18.923 -		mfc_put(	scaled_v_plane[curr_src_idx],							// What from local store (addr)
  18.924 -				(unsigned int) dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu),	// Destination in main memory (addr)
  18.925 -				dst_dbl_linestride_vu,								// Two V lines (depending on the widht of the destination resolution)
  18.926 -				STR_BUF+curr_dst_idx,								// Tag
  18.927 -				0, 0 );
  18.928 -
  18.929 -		mfc_put(	scaled_u_plane[curr_src_idx],							// What from local store (addr)
  18.930 -				(unsigned int)  dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr)
  18.931 -				dst_dbl_linestride_vu,								// Two U lines (depending on the widht of the destination resolution)
  18.932 -				STR_BUF+curr_dst_idx,								// Tag
  18.933 -				0, 0 );
  18.934 -		//---------------------------------------------------------------------------------------------
  18.935 -
  18.936 -
  18.937 -		// update for next cycle
  18.938 -		curr_src_idx = next_src_idx;
  18.939 -		curr_dst_idx = next_dst_idx;
  18.940 -
  18.941 -		curr_interpl_y_upper = next_interpl_y_upper;
  18.942 -		curr_interpl_y_lower = next_interpl_y_lower;
  18.943 -		curr_interpl_vu = next_interpl_vu;
  18.944 -
  18.945 -		vf_curr_NSweight_y_upper = vf_curr_NSweight_y_upper;
  18.946 -		vf_curr_NSweight_y_lower = vf_curr_NSweight_y_lower;
  18.947 -		vf_curr_NSweight_vu = vf_next_NSweight_vu;
  18.948 -
  18.949 -		curr_src_y_upper = next_src_y_upper;
  18.950 -		curr_src_y_lower = next_src_y_lower;
  18.951 -		curr_src_vu = next_src_vu;
  18.952 -	}
  18.953 -
  18.954 -
  18.955 -
  18.956 -	DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) );
  18.957 -
  18.958 -	// scaling
  18.959 -	// work line y_upper
  18.960 -	bilinear_scale_line_w16( y_plane[curr_src_idx],
  18.961 -			scaled_y_plane[curr_src_idx],
  18.962 -			dst_width,
  18.963 -			vf_x_scale,
  18.964 -			vf_curr_NSweight_y_upper,
  18.965 -			src_linestride_y );
  18.966 -	// work line y_lower
  18.967 -	bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y,
  18.968 -			scaled_y_plane[curr_src_idx]+scaled_src_linestride_y,
  18.969 -			dst_width,
  18.970 -			vf_x_scale,
  18.971 -			vf_curr_NSweight_y_lower,
  18.972 -			src_linestride_y );
  18.973 -	// work line v
  18.974 -	bilinear_scale_line_w16( v_plane[curr_src_idx],
  18.975 -			scaled_v_plane[curr_src_idx],
  18.976 -			dst_width>>1,
  18.977 -			vf_x_scale,
  18.978 -			vf_curr_NSweight_vu,
  18.979 -			src_linestride_vu );
  18.980 -	// work line u
  18.981 -	bilinear_scale_line_w16( u_plane[curr_src_idx],
  18.982 -			scaled_u_plane[curr_src_idx],
  18.983 -			dst_width>>1,
  18.984 -			vf_x_scale,
  18.985 -			vf_curr_NSweight_vu,
  18.986 -			src_linestride_vu );
  18.987 -
  18.988 -
  18.989 -	//---------------------------------------------------------------------------------------------
  18.990 -	DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
  18.991 -
  18.992 -	// Perform three DMA transfers to 3 different locations in the main memory!
  18.993 -	// dst_width:	Pixel width of destination image
  18.994 -	// dst_addr:	Destination address in main memory
  18.995 -	// dst_vu:	Counter which is incremented one by one
  18.996 -	// dst_y:	Counter which is twice larger than dst_vu (dst_y = 2*dst_vu)
  18.997 -
  18.998 -	mfc_put(	scaled_y_plane[curr_src_idx],							// What from local store (addr)
  18.999 -			(unsigned int)  dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y),	// Destination in main memory (addr)
 18.1000 -			dst_dbl_linestride_y,								// Two Y lines (depending on the widht of the destination resolution)
 18.1001 -			STR_BUF+curr_dst_idx,								// Tag
 18.1002 -			0, 0 );
 18.1003 -
 18.1004 -	mfc_put(	scaled_v_plane[curr_src_idx],							// What from local store (addr)
 18.1005 -			(unsigned int) dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu),	// Destination in main memory (addr)
 18.1006 -			dst_dbl_linestride_vu,								// Two V lines (depending on the widht of the destination resolution)
 18.1007 -			STR_BUF+curr_dst_idx,								// Tag
 18.1008 -			0, 0 );
 18.1009 -
 18.1010 -	mfc_put(	scaled_u_plane[curr_src_idx],							// What from local store (addr)
 18.1011 -			(unsigned int)  dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr)
 18.1012 -			dst_dbl_linestride_vu,								// Two U lines (depending on the widht of the destination resolution)
 18.1013 -			STR_BUF+curr_dst_idx,								// Tag
 18.1014 -			0, 0 );
 18.1015 -
 18.1016 -	// wait for completion
 18.1017 -	DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
 18.1018 -	//---------------------------------------------------------------------------------------------
 18.1019 -}
 18.1020 -
 18.1021 -
 18.1022 -/**
 18.1023 - * scale_srcw32_dstw32()
 18.1024 - *
 18.1025 - * processes an input image of width 32
 18.1026 - * scaling is done to a width 32
 18.1027 - * yuv2rgb conversion on a width of 32
 18.1028 - * result stored in RAM
 18.1029 - */
 18.1030 -void scale_srcw32_dstw32() {
 18.1031 -	// extract parameters
 18.1032 -	unsigned char* dst_addr = (unsigned char *)parms.dstBuffer;
 18.1033 -
 18.1034 -	unsigned int src_width = parms.src_pixel_width;
 18.1035 -	unsigned int src_height = parms.src_pixel_height;
 18.1036 -	unsigned int dst_width = parms.dst_pixel_width;
 18.1037 -	unsigned int dst_height = parms.dst_pixel_height;
 18.1038 -
 18.1039 -	// YVU
 18.1040 -	unsigned int src_linestride_y = src_width;
 18.1041 -	unsigned int src_dbl_linestride_y = src_width<<1;
 18.1042 -	unsigned int src_linestride_vu = src_width>>1;
 18.1043 -	unsigned int src_dbl_linestride_vu = src_width;
 18.1044 -
 18.1045 -	// scaled YVU
 18.1046 -	unsigned int scaled_src_linestride_y = dst_width;
 18.1047 -
 18.1048 -	// ram addresses
 18.1049 -	unsigned char* src_addr_y = parms.y_plane;
 18.1050 -	unsigned char* src_addr_v = parms.v_plane;
 18.1051 -	unsigned char* src_addr_u = parms.u_plane;
 18.1052 -
 18.1053 -	unsigned int dst_picture_size = dst_width*dst_height;
 18.1054 -
 18.1055 -	// Sizes for destination
 18.1056 -	unsigned int dst_dbl_linestride_y = dst_width<<1;
 18.1057 -	unsigned int dst_dbl_linestride_vu = dst_width>>1;
 18.1058 -
 18.1059 -	// Perform address calculation for Y, V and U in main memory with dst_addr as base
 18.1060 -	unsigned char* dst_addr_main_memory_y = dst_addr;
 18.1061 -	unsigned char* dst_addr_main_memory_v = dst_addr + dst_picture_size;
 18.1062 -	unsigned char* dst_addr_main_memory_u = dst_addr_main_memory_v +(dst_picture_size>>2);
 18.1063 -
 18.1064 -	// calculate scale factors
 18.1065 -	vector float vf_x_scale = spu_splats( (float)src_width/(float)dst_width );
 18.1066 -	float y_scale = (float)src_height/(float)dst_height;
 18.1067 -
 18.1068 -	// double buffered processing
 18.1069 -	// buffer switching
 18.1070 -	unsigned int curr_src_idx = 0;
 18.1071 -	unsigned int curr_dst_idx = 0;
 18.1072 -	unsigned int next_src_idx, next_dst_idx;
 18.1073 -
 18.1074 -	// 2 lines y as output, upper and lowerline
 18.1075 -	unsigned int curr_interpl_y_upper = 0;
 18.1076 -	unsigned int next_interpl_y_upper;
 18.1077 -	unsigned int curr_interpl_y_lower, next_interpl_y_lower;
 18.1078 -	// only 1 line v/u output, both planes have the same dimension
 18.1079 -	unsigned int curr_interpl_vu = 0;
 18.1080 -	unsigned int next_interpl_vu;
 18.1081 -
 18.1082 -	// weights, calculated in every loop iteration
 18.1083 -	vector float vf_curr_NSweight_y_upper = { 0.0f, 0.0f, 0.0f, 0.0f };
 18.1084 -	vector float vf_next_NSweight_y_upper;
 18.1085 -	vector float vf_curr_NSweight_y_lower, vf_next_NSweight_y_lower;
 18.1086 -	vector float vf_curr_NSweight_vu = { 0.0f, 0.0f, 0.0f, 0.0f };
 18.1087 -	vector float vf_next_NSweight_vu;
 18.1088 -
 18.1089 -	// line indices for the src picture
 18.1090 -	float curr_src_y_upper = 0.0f, next_src_y_upper;
 18.1091 -	float curr_src_y_lower, next_src_y_lower;
 18.1092 -	float curr_src_vu = 0.0f, next_src_vu;
 18.1093 -
 18.1094 -	// line indices for the dst picture
 18.1095 -	unsigned int dst_y=0, dst_vu=0;
 18.1096 -
 18.1097 -	// calculate lower line idices
 18.1098 -	curr_src_y_lower = ((float)curr_interpl_y_upper+1)*y_scale;
 18.1099 -	curr_interpl_y_lower = (unsigned int)curr_src_y_lower;
 18.1100 -	// lower line weight
 18.1101 -	vf_curr_NSweight_y_lower = spu_splats( curr_src_y_lower-(float)curr_interpl_y_lower );
 18.1102 -
 18.1103 -
 18.1104 -	// start partially double buffered processing
 18.1105 -	// get initial data, 2 sets of y, 1 set v, 1 set u
 18.1106 -	mfc_get( y_plane[curr_src_idx], (unsigned int) src_addr_y, src_dbl_linestride_y, RETR_BUF, 0, 0 );
 18.1107 -	mfc_get( y_plane[curr_src_idx]+src_dbl_linestride_y,
 18.1108 -			(unsigned int) src_addr_y+(curr_interpl_y_lower*src_linestride_y),
 18.1109 -			src_dbl_linestride_y,
 18.1110 -			RETR_BUF,
 18.1111 -			0, 0 );
 18.1112 -	mfc_get( v_plane[curr_src_idx], (unsigned int) src_addr_v, src_dbl_linestride_vu, RETR_BUF, 0, 0 );
 18.1113 -	mfc_get( u_plane[curr_src_idx], (unsigned int) src_addr_u, src_dbl_linestride_vu, RETR_BUF, 0, 0 );
 18.1114 -
 18.1115 -	// iteration loop
 18.1116 -	// within each iteration 4 lines y, 2 lines v, 2 lines u are retrieved
 18.1117 -	// the scaled output is 2 lines y, 1 line v, 1 line u
 18.1118 -	// the yuv2rgb-converted output is stored to RAM
 18.1119 -	for( dst_vu=0; dst_vu<(dst_height>>1)-1; dst_vu++ ) {
 18.1120 -		dst_y = dst_vu<<1;
 18.1121 -
 18.1122 -		// calculate next indices
 18.1123 -		next_src_vu = ((float)dst_vu+1)*y_scale;
 18.1124 -		next_src_y_upper = ((float)dst_y+2)*y_scale;
 18.1125 -		next_src_y_lower = ((float)dst_y+3)*y_scale;
 18.1126 -
 18.1127 -		next_interpl_vu = (unsigned int) next_src_vu;
 18.1128 -		next_interpl_y_upper = (unsigned int) next_src_y_upper;
 18.1129 -		next_interpl_y_lower = (unsigned int) next_src_y_lower;
 18.1130 -
 18.1131 -		// calculate weight NORTH-SOUTH
 18.1132 -		vf_next_NSweight_vu = spu_splats( next_src_vu-(float)next_interpl_vu );
 18.1133 -		vf_next_NSweight_y_upper = spu_splats( next_src_y_upper-(float)next_interpl_y_upper );
 18.1134 -		vf_next_NSweight_y_lower = spu_splats( next_src_y_lower-(float)next_interpl_y_lower );
 18.1135 -
 18.1136 -		// get next lines
 18.1137 -		next_src_idx = curr_src_idx^1;
 18.1138 -		next_dst_idx = curr_dst_idx^1;
 18.1139 -
 18.1140 -		// 4 lines y
 18.1141 -		mfc_get( y_plane[next_src_idx],
 18.1142 -				(unsigned int) src_addr_y+(next_interpl_y_upper*src_linestride_y),
 18.1143 -				src_dbl_linestride_y,
 18.1144 -				RETR_BUF+next_src_idx,
 18.1145 -				0, 0 );
 18.1146 -		mfc_get( y_plane[next_src_idx]+src_dbl_linestride_y,
 18.1147 -				(unsigned int) src_addr_y+(next_interpl_y_lower*src_linestride_y),
 18.1148 -				src_dbl_linestride_y,
 18.1149 -				RETR_BUF+next_src_idx,
 18.1150 -				0, 0 );
 18.1151 -
 18.1152 -		// 2 lines v
 18.1153 -		mfc_get( v_plane[next_src_idx],
 18.1154 -				(unsigned int) src_addr_v+(next_interpl_vu*src_linestride_vu),
 18.1155 -				src_dbl_linestride_vu,
 18.1156 -				RETR_BUF+next_src_idx,
 18.1157 -				0, 0 );
 18.1158 -		// 2 lines u
 18.1159 -		mfc_get( u_plane[next_src_idx],
 18.1160 -				(unsigned int) src_addr_u+(next_interpl_vu*src_linestride_vu),
 18.1161 -				src_dbl_linestride_vu,
 18.1162 -				RETR_BUF+next_src_idx,
 18.1163 -				0, 0 );
 18.1164 -
 18.1165 -		DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) );
 18.1166 -
 18.1167 -		// scaling
 18.1168 -		// work line y_upper
 18.1169 -		bilinear_scale_line_w16( y_plane[curr_src_idx],
 18.1170 -				scaled_y_plane[curr_src_idx],
 18.1171 -				dst_width,
 18.1172 -				vf_x_scale,
 18.1173 -				vf_curr_NSweight_y_upper,
 18.1174 -				src_linestride_y );
 18.1175 -		// work line y_lower
 18.1176 -		bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y,
 18.1177 -				scaled_y_plane[curr_src_idx]+scaled_src_linestride_y,
 18.1178 -				dst_width,
 18.1179 -				vf_x_scale,
 18.1180 -				vf_curr_NSweight_y_lower,
 18.1181 -				src_linestride_y );
 18.1182 -		// work line v
 18.1183 -		bilinear_scale_line_w16( v_plane[curr_src_idx],
 18.1184 -				scaled_v_plane[curr_src_idx],
 18.1185 -				dst_width>>1,
 18.1186 -				vf_x_scale,
 18.1187 -				vf_curr_NSweight_vu,
 18.1188 -				src_linestride_vu );
 18.1189 -		// work line u
 18.1190 -		bilinear_scale_line_w16( u_plane[curr_src_idx],
 18.1191 -				scaled_u_plane[curr_src_idx],
 18.1192 -				dst_width>>1,
 18.1193 -				vf_x_scale,
 18.1194 -				vf_curr_NSweight_vu,
 18.1195 -				src_linestride_vu );
 18.1196 -
 18.1197 -
 18.1198 -
 18.1199 -		// Store the result back to main memory into a destination buffer in YUV format
 18.1200 -		//---------------------------------------------------------------------------------------------
 18.1201 -		DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
 18.1202 -
 18.1203 -		// Perform three DMA transfers to 3 different locations in the main memory!
 18.1204 -		// dst_width:	Pixel width of destination image
 18.1205 -		// dst_addr:	Destination address in main memory
 18.1206 -		// dst_vu:	Counter which is incremented one by one
 18.1207 -		// dst_y:	Counter which is twice larger than dst_vu (dst_y = 2*dst_vu)
 18.1208 -
 18.1209 -		mfc_put(	scaled_y_plane[curr_src_idx],							// What from local store (addr)
 18.1210 -				(unsigned int) dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y),	// Destination in main memory (addr)
 18.1211 -				dst_dbl_linestride_y,								// Two Y lines (depending on the widht of the destination resolution)
 18.1212 -				STR_BUF+curr_dst_idx,								// Tag
 18.1213 -				0, 0 );
 18.1214 -
 18.1215 -		mfc_put(	scaled_v_plane[curr_src_idx],							// What from local store (addr)
 18.1216 -				(unsigned int) dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu),	// Destination in main memory (addr)
 18.1217 -				dst_dbl_linestride_vu,								// Two V lines (depending on the widht of the destination resolution)
 18.1218 -				STR_BUF+curr_dst_idx,								// Tag
 18.1219 -				0, 0 );
 18.1220 -
 18.1221 -		mfc_put(	scaled_u_plane[curr_src_idx],							// What from local store (addr)
 18.1222 -				(unsigned int) dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu),	// Destination in main memory (addr)
 18.1223 -				dst_dbl_linestride_vu,								// Two U lines (depending on the widht of the destination resolution)
 18.1224 -				STR_BUF+curr_dst_idx,								// Tag
 18.1225 -				0, 0 );
 18.1226 -		//---------------------------------------------------------------------------------------------
 18.1227 -
 18.1228 -
 18.1229 -		// update for next cycle
 18.1230 -		curr_src_idx = next_src_idx;
 18.1231 -		curr_dst_idx = next_dst_idx;
 18.1232 -
 18.1233 -		curr_interpl_y_upper = next_interpl_y_upper;
 18.1234 -		curr_interpl_y_lower = next_interpl_y_lower;
 18.1235 -		curr_interpl_vu = next_interpl_vu;
 18.1236 -
 18.1237 -		vf_curr_NSweight_y_upper = vf_curr_NSweight_y_upper;
 18.1238 -		vf_curr_NSweight_y_lower = vf_curr_NSweight_y_lower;
 18.1239 -		vf_curr_NSweight_vu = vf_next_NSweight_vu;
 18.1240 -
 18.1241 -		curr_src_y_upper = next_src_y_upper;
 18.1242 -		curr_src_y_lower = next_src_y_lower;
 18.1243 -		curr_src_vu = next_src_vu;
 18.1244 -	}
 18.1245 -
 18.1246 -
 18.1247 -
 18.1248 -	DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) );
 18.1249 -
 18.1250 -	// scaling
 18.1251 -	// work line y_upper
 18.1252 -	bilinear_scale_line_w16( y_plane[curr_src_idx],
 18.1253 -			scaled_y_plane[curr_src_idx],
 18.1254 -			dst_width,
 18.1255 -			vf_x_scale,
 18.1256 -			vf_curr_NSweight_y_upper,
 18.1257 -			src_linestride_y );
 18.1258 -	// work line y_lower
 18.1259 -	bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y,
 18.1260 -			scaled_y_plane[curr_src_idx]+scaled_src_linestride_y,
 18.1261 -			dst_width,
 18.1262 -			vf_x_scale,
 18.1263 -			vf_curr_NSweight_y_lower,
 18.1264 -			src_linestride_y );
 18.1265 -	// work line v
 18.1266 -	bilinear_scale_line_w16( v_plane[curr_src_idx],
 18.1267 -			scaled_v_plane[curr_src_idx],
 18.1268 -			dst_width>>1,
 18.1269 -			vf_x_scale,
 18.1270 -			vf_curr_NSweight_vu,
 18.1271 -			src_linestride_vu );
 18.1272 -	// work line u
 18.1273 -	bilinear_scale_line_w16( u_plane[curr_src_idx],
 18.1274 -			scaled_u_plane[curr_src_idx],
 18.1275 -			dst_width>>1,
 18.1276 -			vf_x_scale,
 18.1277 -			vf_curr_NSweight_vu,
 18.1278 -			src_linestride_vu );
 18.1279 -
 18.1280 -
 18.1281 -	// Store the result back to main memory into a destination buffer in YUV format
 18.1282 -	//---------------------------------------------------------------------------------------------
 18.1283 -	DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
 18.1284 -
 18.1285 -	// Perform three DMA transfers to 3 different locations in the main memory!
 18.1286 -	// dst_width:	Pixel width of destination image
 18.1287 -	// dst_addr:	Destination address in main memory
 18.1288 -	// dst_vu:	Counter which is incremented one by one
 18.1289 -	// dst_y:	Counter which is twice larger than dst_vu (dst_y = 2*dst_vu)
 18.1290 -
 18.1291 -	mfc_put(	scaled_y_plane[curr_src_idx],							// What from local store (addr)
 18.1292 -			(unsigned int)  dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y),	// Destination in main memory (addr)
 18.1293 -			dst_dbl_linestride_y,								// Two Y lines (depending on the widht of the destination resolution)
 18.1294 -			STR_BUF+curr_dst_idx,								// Tag
 18.1295 -			0, 0 );
 18.1296 -
 18.1297 -	mfc_put(	scaled_v_plane[curr_src_idx],							// What from local store (addr)
 18.1298 -			(unsigned int) dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu),	// Destination in main memory (addr)
 18.1299 -			dst_dbl_linestride_vu,								// Two V lines (depending on the widht of the destination resolution)
 18.1300 -			STR_BUF+curr_dst_idx,								// Tag
 18.1301 -			0, 0 );
 18.1302 -
 18.1303 -	mfc_put(	scaled_u_plane[curr_src_idx],							// What from local store (addr)
 18.1304 -			(unsigned int)  dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr)
 18.1305 -			dst_dbl_linestride_vu,								// Two U lines (depending on the widht of the destination resolution)
 18.1306 -			STR_BUF+curr_dst_idx,								// Tag
 18.1307 -			0, 0 );
 18.1308 -
 18.1309 -	// wait for completion
 18.1310 -	DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
 18.1311 -	//---------------------------------------------------------------------------------------------
 18.1312 -}
 18.1313 -
 18.1314 -
 18.1315 -/*
 18.1316 - * bilinear_scale_line_w8()
 18.1317 - *
 18.1318 - * processes a line of yuv-input, width has to be a multiple of 8
 18.1319 - * scaled yuv-output is written to local store buffer
 18.1320 - *
 18.1321 - * @param src buffer for 2 lines input
 18.1322 - * @param dst_ buffer for 1 line output
 18.1323 - * @param dst_width the width of the destination line
 18.1324 - * @param vf_x_scale a float vector, at each entry is the x_scale-factor
 18.1325 - * @param vf_NSweight a float vector, at each position is the weight NORTH/SOUTH for the current line
 18.1326 - * @param src_linestride the stride of the srcline
 18.1327 - */
 18.1328 -void bilinear_scale_line_w8( unsigned char* src, unsigned char* dst_, unsigned int dst_width, vector float vf_x_scale, vector float vf_NSweight, unsigned int src_linestride ) {
 18.1329 -
 18.1330 -	unsigned char* dst = dst_;
 18.1331 -
 18.1332 -	unsigned int dst_x;
 18.1333 -	for( dst_x=0; dst_x<dst_width; dst_x+=8) {
 18.1334 -		// address calculation for loading the 4 surrounding pixel of each calculated
 18.1335 -		// destination pixel
 18.1336 -		vector unsigned int vui_dst_x_tmp = spu_splats( dst_x );
 18.1337 -		// lower range->first 4 pixel
 18.1338 -		// upper range->next 4 pixel
 18.1339 -		vector unsigned int vui_inc_dst_x_lower_range = { 0, 1, 2, 3 };
 18.1340 -		vector unsigned int vui_inc_dst_x_upper_range = { 4, 5, 6, 7 };
 18.1341 -		vector unsigned int vui_dst_x_lower_range = spu_add( vui_dst_x_tmp, vui_inc_dst_x_lower_range );
 18.1342 -		vector unsigned int vui_dst_x_upper_range = spu_add( vui_dst_x_tmp, vui_inc_dst_x_upper_range );
 18.1343 -
 18.1344 -		// calculate weight EAST-WEST
 18.1345 -		vector float vf_dst_x_lower_range = spu_convtf( vui_dst_x_lower_range, 0 );
 18.1346 -		vector float vf_dst_x_upper_range = spu_convtf( vui_dst_x_upper_range, 0 );
 18.1347 -		vector float vf_src_x_lower_range = spu_mul( vf_dst_x_lower_range, vf_x_scale );
 18.1348 -		vector float vf_src_x_upper_range = spu_mul( vf_dst_x_upper_range, vf_x_scale );
 18.1349 -		vector unsigned int vui_interpl_x_lower_range = spu_convtu( vf_src_x_lower_range, 0 );
 18.1350 -		vector unsigned int vui_interpl_x_upper_range = spu_convtu( vf_src_x_upper_range, 0 );
 18.1351 -		vector float vf_interpl_x_lower_range = spu_convtf( vui_interpl_x_lower_range, 0 );
 18.1352 -		vector float vf_interpl_x_upper_range = spu_convtf( vui_interpl_x_upper_range, 0 );
 18.1353 -		vector float vf_EWweight_lower_range = spu_sub( vf_src_x_lower_range, vf_interpl_x_lower_range );
 18.1354 -		vector float vf_EWweight_upper_range = spu_sub( vf_src_x_upper_range, vf_interpl_x_upper_range );
 18.1355 -
 18.1356 -		// calculate address offset
 18.1357 -		//
 18.1358 -		// pixel NORTH WEST
 18.1359 -		vector unsigned int vui_off_pixelNW_lower_range = vui_interpl_x_lower_range;
 18.1360 -		vector unsigned int vui_off_pixelNW_upper_range = vui_interpl_x_upper_range;
 18.1361 -
 18.1362 -		// pixel NORTH EAST-->(offpixelNW+1)
 18.1363 -		vector unsigned int vui_add_1 = { 1, 1, 1, 1 };
 18.1364 -		vector unsigned int vui_off_pixelNE_lower_range = spu_add( vui_off_pixelNW_lower_range, vui_add_1 );
 18.1365 -		vector unsigned int vui_off_pixelNE_upper_range = spu_add( vui_off_pixelNW_upper_range, vui_add_1 );
 18.1366 -
 18.1367 -		// SOUTH-WEST-->(offpixelNW+src_linestride)
 18.1368 -		vector unsigned int vui_srclinestride = spu_splats( src_linestride );
 18.1369 -		vector unsigned int vui_off_pixelSW_lower_range = spu_add( vui_srclinestride, vui_off_pixelNW_lower_range );
 18.1370 -		vector unsigned int vui_off_pixelSW_upper_range = spu_add( vui_srclinestride, vui_off_pixelNW_upper_range );
 18.1371 -
 18.1372 -		// SOUTH-EAST-->(offpixelNW+src_linestride+1)
 18.1373 -		vector unsigned int vui_off_pixelSE_lower_range = spu_add( vui_srclinestride, vui_off_pixelNE_lower_range );
 18.1374 -		vector unsigned int vui_off_pixelSE_upper_range = spu_add( vui_srclinestride, vui_off_pixelNE_upper_range );
 18.1375 -
 18.1376 -		// calculate each address
 18.1377 -		vector unsigned int vui_src_ls = spu_splats( (unsigned int) src );
 18.1378 -		vector unsigned int vui_addr_pixelNW_lower_range = spu_add( vui_src_ls, vui_off_pixelNW_lower_range );
 18.1379 -		vector unsigned int vui_addr_pixelNW_upper_range = spu_add( vui_src_ls, vui_off_pixelNW_upper_range );
 18.1380 -		vector unsigned int vui_addr_pixelNE_lower_range = spu_add( vui_src_ls, vui_off_pixelNE_lower_range );
 18.1381 -		vector unsigned int vui_addr_pixelNE_upper_range = spu_add( vui_src_ls, vui_off_pixelNE_upper_range );
 18.1382 -
 18.1383 -		vector unsigned int vui_addr_pixelSW_lower_range = spu_add( vui_src_ls, vui_off_pixelSW_lower_range );
 18.1384 -		vector unsigned int vui_addr_pixelSW_upper_range = spu_add( vui_src_ls, vui_off_pixelSW_upper_range );
 18.1385 -		vector unsigned int vui_addr_pixelSE_lower_range = spu_add( vui_src_ls, vui_off_pixelSE_lower_range );
 18.1386 -		vector unsigned int vui_addr_pixelSE_upper_range = spu_add( vui_src_ls, vui_off_pixelSE_upper_range );
 18.1387 -
 18.1388 -		// get each pixel
 18.1389 -		//
 18.1390 -		// scalar load, afterwards insertion into the right position
 18.1391 -		// NORTH WEST
 18.1392 -		vector unsigned char null_vector = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
 18.1393 -		vector unsigned char vuc_pixel_NW_lower_range = spu_insert(
 18.1394 -				*((unsigned char*) spu_extract( vui_addr_pixelNW_lower_range, 0 )), null_vector, 3 );
 18.1395 -		vuc_pixel_NW_lower_range = spu_insert(
 18.1396 -				*((unsigned char*) spu_extract( vui_addr_pixelNW_lower_range, 1 )),
 18.1397 -				vuc_pixel_NW_lower_range, 7 );
 18.1398 -		vuc_pixel_NW_lower_range = spu_insert(
 18.1399 -				*((unsigned char*) spu_extract( vui_addr_pixelNW_lower_range, 2 )),
 18.1400 -				vuc_pixel_NW_lower_range, 11 );
 18.1401 -		vuc_pixel_NW_lower_range = spu_insert(
 18.1402 -				*((unsigned char*) spu_extract( vui_addr_pixelNW_lower_range, 3 )),
 18.1403 -				vuc_pixel_NW_lower_range, 15 );
 18.1404 -
 18.1405 -		vector unsigned char vuc_pixel_NW_upper_range = spu_insert(
 18.1406 -				*((unsigned char*) spu_extract( vui_addr_pixelNW_upper_range, 0 )), null_vector, 3 );
 18.1407 -		vuc_pixel_NW_upper_range = spu_insert(
 18.1408 -				*((unsigned char*) spu_extract( vui_addr_pixelNW_upper_range, 1 )),
 18.1409 -				vuc_pixel_NW_upper_range, 7 );
 18.1410 -		vuc_pixel_NW_upper_range = spu_insert(
 18.1411 -				*((unsigned char*) spu_extract( vui_addr_pixelNW_upper_range, 2 )),
 18.1412 -				vuc_pixel_NW_upper_range, 11 );
 18.1413 -		vuc_pixel_NW_upper_range = spu_insert(
 18.1414 -				*((unsigned char*) spu_extract( vui_addr_pixelNW_upper_range, 3 )),
 18.1415 -				vuc_pixel_NW_upper_range, 15 );
 18.1416 -
 18.1417 -		// NORTH EAST
 18.1418 -		vector unsigned char vuc_pixel_NE_lower_range = spu_insert(
 18.1419 -				*((unsigned char*) spu_extract( vui_addr_pixelNE_lower_range, 0 )), null_vector, 3 );
 18.1420 -		vuc_pixel_NE_lower_range = spu_insert(
 18.1421 -				*((unsigned char*) spu_extract( vui_addr_pixelNE_lower_range, 1 )),
 18.1422 -				vuc_pixel_NE_lower_range, 7 );
 18.1423 -		vuc_pixel_NE_lower_range = spu_insert(
 18.1424 -				*((unsigned char*) spu_extract( vui_addr_pixelNE_lower_range, 2 )),
 18.1425 -				vuc_pixel_NE_lower_range, 11 );
 18.1426 -		vuc_pixel_NE_lower_range = spu_insert(
 18.1427 -				*((unsigned char*) spu_extract( vui_addr_pixelNE_lower_range, 3 )),
 18.1428 -				vuc_pixel_NE_lower_range, 15 );
 18.1429 -
 18.1430 -		vector unsigned char vuc_pixel_NE_upper_range = spu_insert(
 18.1431 -				*((unsigned char*) spu_extract( vui_addr_pixelNE_upper_range, 0 )), null_vector, 3 );
 18.1432 -		vuc_pixel_NE_upper_range = spu_insert(
 18.1433 -				*((unsigned char*) spu_extract( vui_addr_pixelNE_upper_range, 1 )),
 18.1434 -				vuc_pixel_NE_upper_range, 7 );
 18.1435 -		vuc_pixel_NE_upper_range = spu_insert(
 18.1436 -				*((unsigned char*) spu_extract( vui_addr_pixelNE_upper_range, 2 )),
 18.1437 -				vuc_pixel_NE_upper_range, 11 );
 18.1438 -		vuc_pixel_NE_upper_range = spu_insert(
 18.1439 -				*((unsigned char*) spu_extract( vui_addr_pixelNE_upper_range, 3 )),
 18.1440 -				vuc_pixel_NE_upper_range, 15 );
 18.1441 -
 18.1442 -
 18.1443 -		// SOUTH WEST
 18.1444 -		vector unsigned char vuc_pixel_SW_lower_range = spu_insert(
 18.1445 -				*((unsigned char*) spu_extract( vui_addr_pixelSW_lower_range, 0 )), null_vector, 3 );
 18.1446 -		vuc_pixel_SW_lower_range = spu_insert(
 18.1447 -				*((unsigned char*) spu_extract( vui_addr_pixelSW_lower_range, 1 )),
 18.1448 -				vuc_pixel_SW_lower_range, 7 );
 18.1449 -		vuc_pixel_SW_lower_range = spu_insert(
 18.1450 -				*((unsigned char*) spu_extract( vui_addr_pixelSW_lower_range, 2 )),
 18.1451 -				vuc_pixel_SW_lower_range, 11 );
 18.1452 -		vuc_pixel_SW_lower_range = spu_insert(
 18.1453 -				*((unsigned char*) spu_extract( vui_addr_pixelSW_lower_range, 3 )),
 18.1454 -				vuc_pixel_SW_lower_range, 15 );
 18.1455 -
 18.1456 -		vector unsigned char vuc_pixel_SW_upper_range = spu_insert(
 18.1457 -				*((unsigned char*) spu_extract( vui_addr_pixelSW_upper_range, 0 )), null_vector, 3 );
 18.1458 -		vuc_pixel_SW_upper_range = spu_insert(
 18.1459 -				*((unsigned char*) spu_extract( vui_addr_pixelSW_upper_range, 1 )),
 18.1460 -				vuc_pixel_SW_upper_range, 7 );
 18.1461 -		vuc_pixel_SW_upper_range = spu_insert(
 18.1462 -				*((unsigned char*) spu_extract( vui_addr_pixelSW_upper_range, 2 )),
 18.1463 -				vuc_pixel_SW_upper_range, 11 );
 18.1464 -		vuc_pixel_SW_upper_range = spu_insert(
 18.1465 -				*((unsigned char*) spu_extract( vui_addr_pixelSW_upper_range, 3 )),
 18.1466 -				vuc_pixel_SW_upper_range, 15 );
 18.1467 -
 18.1468 -		// SOUTH EAST
 18.1469 -		vector unsigned char vuc_pixel_SE_lower_range = spu_insert(
 18.1470 -				*((unsigned char*) spu_extract( vui_addr_pixelSE_lower_range, 0 )), null_vector, 3 );
 18.1471 -		vuc_pixel_SE_lower_range = spu_insert(
 18.1472 -				*((unsigned char*) spu_extract( vui_addr_pixelSE_lower_range, 1 )),
 18.1473 -				vuc_pixel_SE_lower_range, 7 );
 18.1474 -		vuc_pixel_SE_lower_range = spu_insert(
 18.1475 -				*((unsigned char*) spu_extract( vui_addr_pixelSE_lower_range, 2 )),
 18.1476 -				vuc_pixel_SE_lower_range, 11 );
 18.1477 -		vuc_pixel_SE_lower_range = spu_insert(
 18.1478 -				*((unsigned char*) spu_extract( vui_addr_pixelSE_lower_range, 3 )),
 18.1479 -				vuc_pixel_SE_lower_range, 15 );
 18.1480 -
 18.1481 -		vector unsigned char vuc_pixel_SE_upper_range = spu_insert(
 18.1482 -				*((unsigned char*) spu_extract( vui_addr_pixelSE_upper_range, 0 )), null_vector, 3 );
 18.1483 -		vuc_pixel_SE_upper_range = spu_insert(
 18.1484 -				*((unsigned char*) spu_extract( vui_addr_pixelSE_upper_range, 1 )),
 18.1485 -				vuc_pixel_SE_upper_range, 7 );
 18.1486 -		vuc_pixel_SE_upper_range = spu_insert(
 18.1487 -				*((unsigned char*) spu_extract( vui_addr_pixelSE_upper_range, 2 )),
 18.1488 -				vuc_pixel_SE_upper_range, 11 );
 18.1489 -		vuc_pixel_SE_upper_range = spu_insert(
 18.1490 -				*((unsigned char*) spu_extract( vui_addr_pixelSE_upper_range, 3 )),
 18.1491 -				vuc_pixel_SE_upper_range, 15 );
 18.1492 -
 18.1493 -
 18.1494 -		// convert to float
 18.1495 -		vector float vf_pixel_NW_lower_range = spu_convtf( (vector unsigned int) vuc_pixel_NW_lower_range, 0 );
 18.1496 -		vector float vf_pixel_NW_upper_range = spu_convtf( (vector unsigned int) vuc_pixel_NW_upper_range, 0 );
 18.1497 -
 18.1498 -		vector float vf_pixel_SW_lower_range = spu_convtf( (vector unsigned int) vuc_pixel_SW_lower_range, 0 );
 18.1499 -		vector float vf_pixel_SW_upper_range = spu_convtf( (vector unsigned int) vuc_pixel_SW_upper_range, 0 );
 18.1500 -
 18.1501 -		vector float vf_pixel_NE_lower_range = spu_convtf( (vector unsigned int) vuc_pixel_NE_lower_range, 0 );
 18.1502 -		vector float vf_pixel_NE_upper_range = spu_convtf( (vector unsigned int) vuc_pixel_NE_upper_range, 0 );
 18.1503 -
 18.1504 -		vector float vf_pixel_SE_lower_range = spu_convtf( (vector unsigned int) vuc_pixel_SE_lower_range, 0 );
 18.1505 -		vector float vf_pixel_SE_upper_range = spu_convtf( (vector unsigned int) vuc_pixel_SE_upper_range, 0 );
 18.1506 -
 18.1507 -
 18.1508 -
 18.1509 -		// first linear interpolation: EWtop
 18.1510 -		// EWtop = NW + EWweight*(NE-NW)
 18.1511 -		//
 18.1512 -		// lower range
 18.1513 -		vector float vf_EWtop_lower_range_tmp = spu_sub( vf_pixel_NE_lower_range, vf_pixel_NW_lower_range );
 18.1514 -		vector float vf_EWtop_lower_range = spu_madd( vf_EWweight_lower_range,
 18.1515 -								vf_EWtop_lower_range_tmp,
 18.1516 -								vf_pixel_NW_lower_range );
 18.1517 -
 18.1518 -		// upper range
 18.1519 -		vector float vf_EWtop_upper_range_tmp = spu_sub( vf_pixel_NE_upper_range, vf_pixel_NW_upper_range );
 18.1520 -		vector float vf_EWtop_upper_range = spu_madd( vf_EWweight_upper_range,
 18.1521 -								vf_EWtop_upper_range_tmp,
 18.1522 -								vf_pixel_NW_upper_range );
 18.1523 -
 18.1524 -
 18.1525 -
 18.1526 -		// second linear interpolation: EWbottom
 18.1527 -		// EWbottom = SW + EWweight*(SE-SW)
 18.1528 -		//
 18.1529 -		// lower range
 18.1530 -		vector float vf_EWbottom_lower_range_tmp = spu_sub( vf_pixel_SE_lower_range, vf_pixel_SW_lower_range );
 18.1531 -		vector float vf_EWbottom_lower_range = spu_madd( vf_EWweight_lower_range,
 18.1532 -								vf_EWbottom_lower_range_tmp,
 18.1533 -								vf_pixel_SW_lower_range );
 18.1534 -
 18.1535 -		// upper range
 18.1536 -		vector float vf_EWbottom_upper_range_tmp = spu_sub( vf_pixel_SE_upper_range, vf_pixel_SW_upper_range );
 18.1537 -		vector float vf_EWbottom_upper_range = spu_madd( vf_EWweight_upper_range,
 18.1538 -								vf_EWbottom_upper_range_tmp,
 18.1539 -								vf_pixel_SW_upper_range );
 18.1540 -
 18.1541 -
 18.1542 -
 18.1543 -		// third linear interpolation: the bilinear interpolated value
 18.1544 -		// result = EWtop + NSweight*(EWbottom-EWtop);
 18.1545 -		//
 18.1546 -		// lower range
 18.1547 -		vector float vf_result_lower_range_tmp = spu_sub( vf_EWbottom_lower_range, vf_EWtop_lower_range );
 18.1548 -		vector float vf_result_lower_range = spu_madd( vf_NSweight,
 18.1549 -								vf_result_lower_range_tmp,
 18.1550 -								vf_EWtop_lower_range );
 18.1551 -
 18.1552 -		// upper range
 18.1553 -		vector float vf_result_upper_range_tmp = spu_sub( vf_EWbottom_upper_range, vf_EWtop_upper_range );
 18.1554 -		vector float vf_result_upper_range = spu_madd( vf_NSweight,
 18.1555 -								vf_result_upper_range_tmp,
 18.1556 -								vf_EWtop_upper_range );
 18.1557 -
 18.1558 -
 18.1559 -		// convert back: using saturated arithmetic
 18.1560 -		vector unsigned int vui_result_lower_range = vfloat_to_vuint( vf_result_lower_range );
 18.1561 -		vector unsigned int vui_result_upper_range = vfloat_to_vuint( vf_result_upper_range );
 18.1562 -
 18.1563 -		// merge results->lower,upper
 18.1564 -		vector unsigned char vuc_mask_merge_result = { 0x03, 0x07, 0x0B, 0x0F,
 18.1565 -							       0x13, 0x17, 0x1B, 0x1F,
 18.1566 -							       0x00, 0x00, 0x00, 0x00,
 18.1567 -							       0x00, 0x00, 0x00, 0x00 };
 18.1568 -
 18.1569 -		vector unsigned char vuc_result = spu_shuffle( (vector unsigned char) vui_result_lower_range,
 18.1570 -								(vector unsigned char) vui_result_upper_range,
 18.1571 -								vuc_mask_merge_result );
 18.1572 -
 18.1573 -		// partial storing
 18.1574 -		vector unsigned char vuc_mask_out = { 0x00, 0x00, 0x00, 0x00,
 18.1575 -						      0x00, 0x00, 0x00, 0x00,
 18.1576 -						      0xFF, 0xFF, 0xFF, 0xFF,
 18.1577 -						      0xFF, 0xFF, 0xFF, 0xFF };
 18.1578 -
 18.1579 -
 18.1580 -		// get currently stored data
 18.1581 -		vector unsigned char vuc_orig = *((vector unsigned char*)dst);
 18.1582 -
 18.1583 -		// clear currently stored data
 18.1584 -		vuc_orig = spu_and( vuc_orig,
 18.1585 -				spu_rlqwbyte( vuc_mask_out, ((unsigned int)dst)&0x0F) );
 18.1586 -
 18.1587 -		// rotate result according to storing address
 18.1588 -		vuc_result = spu_rlqwbyte( vuc_result, ((unsigned int)dst)&0x0F );
 18.1589 -
 18.1590 -		// store result
 18.1591 -		*((vector unsigned char*)dst) = spu_or( vuc_result,
 18.1592 -							vuc_orig );
 18.1593 -		dst += 8;
 18.1594 -	}
 18.1595 -}
 18.1596 -
 18.1597 -
 18.1598 -/*
 18.1599 - * bilinear_scale_line_w16()
 18.1600 - *
 18.1601 - * processes a line of yuv-input, width has to be a multiple of 16
 18.1602 - * scaled yuv-output is written to local store buffer
 18.1603 - *
 18.1604 - * @param src buffer for 2 lines input
 18.1605 - * @param dst_ buffer for 1 line output
 18.1606 - * @param dst_width the width of the destination line
 18.1607 - * @param vf_x_scale a float vector, at each entry is the x_scale-factor
 18.1608 - * @param vf_NSweight a float vector, at each position is the weight NORTH/SOUTH for the current line
 18.1609 - * @param src_linestride the stride of the srcline
 18.1610 - */
 18.1611 -void bilinear_scale_line_w16( unsigned char* src, unsigned char* dst_, unsigned int dst_width, vector float vf_x_scale, vector float vf_NSweight, unsigned int src_linestride ) {
 18.1612 -
 18.1613 -	unsigned char* dst = dst_;
 18.1614 -
 18.1615 -	unsigned int dst_x;
 18.1616 -	for( dst_x=0; dst_x<dst_width; dst_x+=16) {
 18.1617 -		// address calculation for loading the 4 surrounding pixel of each calculated
 18.1618 -		// destination pixel
 18.1619 -		vector unsigned int vui_dst_x_tmp = spu_splats( dst_x );
 18.1620 -		// parallelised processing
 18.1621 -		// first range->pixel 1 2 3 4
 18.1622 -		// second range->pixel 5 6 7 8
 18.1623 -		// third range->pixel 9 10 11 12
 18.1624 -		// fourth range->pixel 13 14 15 16
 18.1625 -		vector unsigned int vui_inc_dst_x_first_range = { 0, 1, 2, 3 };
 18.1626 -		vector unsigned int vui_inc_dst_x_second_range = { 4, 5, 6, 7 };
 18.1627 -		vector unsigned int vui_inc_dst_x_third_range = { 8, 9, 10, 11 };
 18.1628 -		vector unsigned int vui_inc_dst_x_fourth_range = { 12, 13, 14, 15 };
 18.1629 -		vector unsigned int vui_dst_x_first_range = spu_add( vui_dst_x_tmp, vui_inc_dst_x_first_range );
 18.1630 -		vector unsigned int vui_dst_x_second_range = spu_add( vui_dst_x_tmp, vui_inc_dst_x_second_range );
 18.1631 -		vector unsigned int vui_dst_x_third_range = spu_add( vui_dst_x_tmp, vui_inc_dst_x_third_range );
 18.1632 -		vector unsigned int vui_dst_x_fourth_range = spu_add( vui_dst_x_tmp, vui_inc_dst_x_fourth_range );
 18.1633 -
 18.1634 -		// calculate weight EAST-WEST
 18.1635 -		vector float vf_dst_x_first_range = spu_convtf( vui_dst_x_first_range, 0 );
 18.1636 -		vector float vf_dst_x_second_range = spu_convtf( vui_dst_x_second_range, 0 );
 18.1637 -		vector float vf_dst_x_third_range = spu_convtf( vui_dst_x_third_range, 0 );
 18.1638 -		vector float vf_dst_x_fourth_range = spu_convtf( vui_dst_x_fourth_range, 0 );
 18.1639 -		vector float vf_src_x_first_range = spu_mul( vf_dst_x_first_range, vf_x_scale );
 18.1640 -		vector float vf_src_x_second_range = spu_mul( vf_dst_x_second_range, vf_x_scale );
 18.1641 -		vector float vf_src_x_third_range = spu_mul( vf_dst_x_third_range, vf_x_scale );
 18.1642 -		vector float vf_src_x_fourth_range = spu_mul( vf_dst_x_fourth_range, vf_x_scale );
 18.1643 -		vector unsigned int vui_interpl_x_first_range = spu_convtu( vf_src_x_first_range, 0 );
 18.1644 -		vector unsigned int vui_interpl_x_second_range = spu_convtu( vf_src_x_second_range, 0 );
 18.1645 -		vector unsigned int vui_interpl_x_third_range = spu_convtu( vf_src_x_third_range, 0 );
 18.1646 -		vector unsigned int vui_interpl_x_fourth_range = spu_convtu( vf_src_x_fourth_range, 0 );
 18.1647 -		vector float vf_interpl_x_first_range = spu_convtf( vui_interpl_x_first_range, 0 );
 18.1648 -		vector float vf_interpl_x_second_range = spu_convtf( vui_interpl_x_second_range, 0 );
 18.1649 -		vector float vf_interpl_x_third_range = spu_convtf( vui_interpl_x_third_range, 0 );
 18.1650 -		vector float vf_interpl_x_fourth_range = spu_convtf( vui_interpl_x_fourth_range, 0 );
 18.1651 -		vector float vf_EWweight_first_range = spu_sub( vf_src_x_first_range, vf_interpl_x_first_range );
 18.1652 -		vector float vf_EWweight_second_range = spu_sub( vf_src_x_second_range, vf_interpl_x_second_range );
 18.1653 -		vector float vf_EWweight_third_range = spu_sub( vf_src_x_third_range, vf_interpl_x_third_range );
 18.1654 -		vector float vf_EWweight_fourth_range = spu_sub( vf_src_x_fourth_range, vf_interpl_x_fourth_range );
 18.1655 -
 18.1656 -		// calculate address offset
 18.1657 -		//
 18.1658 -		// pixel NORTH WEST
 18.1659 -		vector unsigned int vui_off_pixelNW_first_range = vui_interpl_x_first_range;
 18.1660 -		vector unsigned int vui_off_pixelNW_second_range = vui_interpl_x_second_range;
 18.1661 -		vector unsigned int vui_off_pixelNW_third_range = vui_interpl_x_third_range;
 18.1662 -		vector unsigned int vui_off_pixelNW_fourth_range = vui_interpl_x_fourth_range;
 18.1663 -
 18.1664 -		// pixel NORTH EAST-->(offpixelNW+1)
 18.1665 -		vector unsigned int vui_add_1 = { 1, 1, 1, 1 };
 18.1666 -		vector unsigned int vui_off_pixelNE_first_range = spu_add( vui_off_pixelNW_first_range, vui_add_1 );
 18.1667 -		vector unsigned int vui_off_pixelNE_second_range = spu_add( vui_off_pixelNW_second_range, vui_add_1 );
 18.1668 -		vector unsigned int vui_off_pixelNE_third_range = spu_add( vui_off_pixelNW_third_range, vui_add_1 );
 18.1669 -		vector unsigned int vui_off_pixelNE_fourth_range = spu_add( vui_off_pixelNW_fourth_range, vui_add_1 );
 18.1670 -
 18.1671 -		// SOUTH-WEST-->(offpixelNW+src_linestride)
 18.1672 -		vector unsigned int vui_srclinestride = spu_splats( src_linestride );
 18.1673 -		vector unsigned int vui_off_pixelSW_first_range = spu_add( vui_srclinestride, vui_off_pixelNW_first_range );
 18.1674 -		vector unsigned int vui_off_pixelSW_second_range = spu_add( vui_srclinestride, vui_off_pixelNW_second_range );
 18.1675 -		vector unsigned int vui_off_pixelSW_third_range = spu_add( vui_srclinestride, vui_off_pixelNW_third_range );
 18.1676 -		vector unsigned int vui_off_pixelSW_fourth_range = spu_add( vui_srclinestride, vui_off_pixelNW_fourth_range );
 18.1677 -
 18.1678 -		// SOUTH-EAST-->(offpixelNW+src_linestride+1)
 18.1679 -		vector unsigned int vui_off_pixelSE_first_range = spu_add( vui_srclinestride, vui_off_pixelNE_first_range );
 18.1680 -		vector unsigned int vui_off_pixelSE_second_range = spu_add( vui_srclinestride, vui_off_pixelNE_second_range );
 18.1681 -		vector unsigned int vui_off_pixelSE_third_range = spu_add( vui_srclinestride, vui_off_pixelNE_third_range );
 18.1682 -		vector unsigned int vui_off_pixelSE_fourth_range = spu_add( vui_srclinestride, vui_off_pixelNE_fourth_range );
 18.1683 -
 18.1684 -		// calculate each address
 18.1685 -		vector unsigned int vui_src_ls = spu_splats( (unsigned int) src );
 18.1686 -		vector unsigned int vui_addr_pixelNW_first_range = spu_add( vui_src_ls, vui_off_pixelNW_first_range );
 18.1687 -		vector unsigned int vui_addr_pixelNW_second_range = spu_add( vui_src_ls, vui_off_pixelNW_second_range );
 18.1688 -		vector unsigned int vui_addr_pixelNW_third_range = spu_add( vui_src_ls, vui_off_pixelNW_third_range );
 18.1689 -		vector unsigned int vui_addr_pixelNW_fourth_range = spu_add( vui_src_ls, vui_off_pixelNW_fourth_range );
 18.1690 -
 18.1691 -		vector unsigned int vui_addr_pixelNE_first_range = spu_add( vui_src_ls, vui_off_pixelNE_first_range );
 18.1692 -		vector unsigned int vui_addr_pixelNE_second_range = spu_add( vui_src_ls, vui_off_pixelNE_second_range );
 18.1693 -		vector unsigned int vui_addr_pixelNE_third_range = spu_add( vui_src_ls, vui_off_pixelNE_third_range );
 18.1694 -		vector unsigned int vui_addr_pixelNE_fourth_range = spu_add( vui_src_ls, vui_off_pixelNE_fourth_range );
 18.1695 -
 18.1696 -		vector unsigned int vui_addr_pixelSW_first_range = spu_add( vui_src_ls, vui_off_pixelSW_first_range );
 18.1697 -		vector unsigned int vui_addr_pixelSW_second_range = spu_add( vui_src_ls, vui_off_pixelSW_second_range );
 18.1698 -		vector unsigned int vui_addr_pixelSW_third_range = spu_add( vui_src_ls, vui_off_pixelSW_third_range );
 18.1699 -		vector unsigned int vui_addr_pixelSW_fourth_range = spu_add( vui_src_ls, vui_off_pixelSW_fourth_range );
 18.1700 -
 18.1701 -		vector unsigned int vui_addr_pixelSE_first_range = spu_add( vui_src_ls, vui_off_pixelSE_first_range );
 18.1702 -		vector unsigned int vui_addr_pixelSE_second_range = spu_add( vui_src_ls, vui_off_pixelSE_second_range );
 18.1703 -		vector unsigned int vui_addr_pixelSE_third_range = spu_add( vui_src_ls, vui_off_pixelSE_third_range );
 18.1704 -		vector unsigned int vui_addr_pixelSE_fourth_range = spu_add( vui_src_ls, vui_off_pixelSE_fourth_range );
 18.1705 -
 18.1706 -
 18.1707 -		// get each pixel
 18.1708 -		//
 18.1709 -		// scalar load, afterwards insertion into the right position
 18.1710 -		// NORTH WEST
 18.1711 -		// first range
 18.1712 -		vector unsigned char null_vector = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
 18.1713 -		vector unsigned char vuc_pixel_NW_first_range = spu_insert(
 18.1714 -				*((unsigned char*) spu_extract( vui_addr_pixelNW_first_range, 0 )), null_vector, 3 );
 18.1715 -		vuc_pixel_NW_first_range = spu_insert(
 18.1716 -				*((unsigned char*) spu_extract( vui_addr_pixelNW_first_range, 1 )),
 18.1717 -				vuc_pixel_NW_first_range, 7 );
 18.1718 -		vuc_pixel_NW_first_range = spu_insert(
 18.1719 -				*((unsigned char*) spu_extract( vui_addr_pixelNW_first_range, 2 )),
 18.1720 -				vuc_pixel_NW_first_range, 11 );
 18.1721 -		vuc_pixel_NW_first_range = spu_insert(
 18.1722 -				*((unsigned char*) spu_extract( vui_addr_pixelNW_first_range, 3 )),
 18.1723 -				vuc_pixel_NW_first_range, 15 );
 18.1724 -		// second range
 18.1725 -		vector unsigned char vuc_pixel_NW_second_range = spu_insert(
 18.1726 -				*((unsigned char*) spu_extract( vui_addr_pixelNW_second_range, 0 )), null_vector, 3 );
 18.1727 -		vuc_pixel_NW_second_range = spu_insert(
 18.1728 -				*((unsigned char*) spu_extract( vui_addr_pixelNW_second_range, 1 )),
 18.1729 -				vuc_pixel_NW_second_range, 7 );
 18.1730 -		vuc_pixel_NW_second_range = spu_insert(
 18.1731 -				*((unsigned char*) spu_extract( vui_addr_pixelNW_second_range, 2 )),
 18.1732 -				vuc_pixel_NW_second_range, 11 );
 18.1733 -		vuc_pixel_NW_second_range = spu_insert(
 18.1734 -				*((unsigned char*) spu_extract( vui_addr_pixelNW_second_range, 3 )),
 18.1735 -				vuc_pixel_NW_second_range, 15 );
 18.1736 -		// third range
 18.1737 -		vector unsigned char vuc_pixel_NW_third_range = spu_insert(
 18.1738 -				*((unsigned char*) spu_extract( vui_addr_pixelNW_third_range, 0 )), null_vector, 3 );
 18.1739 -		vuc_pixel_NW_third_range = spu_insert(
 18.1740 -				*((unsigned char*) spu_extract( vui_addr_pixelNW_third_range, 1 )),
 18.1741 -				vuc_pixel_NW_third_range, 7 );
 18.1742 -		vuc_pixel_NW_third_range = spu_insert(
 18.1743 -				*((unsigned char*) spu_extract( vui_addr_pixelNW_third_range, 2 )),
 18.1744 -				vuc_pixel_NW_third_range, 11 );
 18.1745 -		vuc_pixel_NW_third_range = spu_insert(
 18.1746 -				*((unsigned char*) spu_extract( vui_addr_pixelNW_third_range, 3 )),
 18.1747 -				vuc_pixel_NW_third_range, 15 );
 18.1748 -		// fourth range
 18.1749 -		vector unsigned char vuc_pixel_NW_fourth_range = spu_insert(
 18.1750 -				*((unsigned char*) spu_extract( vui_addr_pixelNW_fourth_range, 0 )), null_vector, 3 );
 18.1751 -		vuc_pixel_NW_fourth_range = spu_insert(
 18.1752 -				*((unsigned char*) spu_extract( vui_addr_pixelNW_fourth_range, 1 )),
 18.1753 -				vuc_pixel_NW_fourth_range, 7 );
 18.1754 -		vuc_pixel_NW_fourth_range = spu_insert(
 18.1755 -				*((unsigned char*) spu_extract( vui_addr_pixelNW_fourth_range, 2 )),
 18.1756 -				vuc_pixel_NW_fourth_range, 11 );
 18.1757 -		vuc_pixel_NW_fourth_range = spu_insert(
 18.1758 -				*((unsigned char*) spu_extract( vui_addr_pixelNW_fourth_range, 3 )),
 18.1759 -				vuc_pixel_NW_fourth_range, 15 );
 18.1760 -
 18.1761 -		// NORTH EAST
 18.1762 -		// first range
 18.1763 -		vector unsigned char vuc_pixel_NE_first_range = spu_insert(
 18.1764 -				*((unsigned char*) spu_extract( vui_addr_pixelNE_first_range, 0 )), null_vector, 3 );
 18.1765 -		vuc_pixel_NE_first_range = spu_insert(
 18.1766 -				*((unsigned char*) spu_extract( vui_addr_pixelNE_first_range, 1 )),
 18.1767 -				vuc_pixel_NE_first_range, 7 );
 18.1768 -		vuc_pixel_NE_first_range = spu_insert(
 18.1769 -				*((unsigned char*) spu_extract( vui_addr_pixelNE_first_range, 2 )),
 18.1770 -				vuc_pixel_NE_first_range, 11 );
 18.1771 -		vuc_pixel_NE_first_range = spu_insert(
 18.1772 -				*((unsigned char*) spu_extract( vui_addr_pixelNE_first_range, 3 )),
 18.1773 -				vuc_pixel_NE_first_range, 15 );
 18.1774 -		// second range
 18.1775 -		vector unsigned char vuc_pixel_NE_second_range = spu_insert(
 18.1776 -				*((unsigned char*) spu_extract( vui_addr_pixelNE_second_range, 0 )), null_vector, 3 );
 18.1777 -		vuc_pixel_NE_second_range = spu_insert(
 18.1778 -				*((unsigned char*) spu_extract( vui_addr_pixelNE_second_range, 1 )),
 18.1779 -				vuc_pixel_NE_second_range, 7 );
 18.1780 -		vuc_pixel_NE_second_range = spu_insert(
 18.1781 -				*((unsigned char*) spu_extract( vui_addr_pixelNE_second_range, 2 )),
 18.1782 -				vuc_pixel_NE_second_range, 11 );
 18.1783 -		vuc_pixel_NE_second_range = spu_insert(
 18.1784 -				*((unsigned char*) spu_extract( vui_addr_pixelNE_second_range, 3 )),
 18.1785 -				vuc_pixel_NE_second_range, 15 );
 18.1786 -		// third range
 18.1787 -		vector unsigned char vuc_pixel_NE_third_range = spu_insert(
 18.1788 -				*((unsigned char*) spu_extract( vui_addr_pixelNE_third_range, 0 )), null_vector, 3 );
 18.1789 -		vuc_pixel_NE_third_range = spu_insert(
 18.1790 -				*((unsigned char*) spu_extract( vui_addr_pixelNE_third_range, 1 )),
 18.1791 -				vuc_pixel_NE_third_range, 7 );
 18.1792 -		vuc_pixel_NE_third_range = spu_insert(
 18.1793 -				*((unsigned char*) spu_extract( vui_addr_pixelNE_third_range, 2 )),
 18.1794 -				vuc_pixel_NE_third_range, 11 );
 18.1795 -		vuc_pixel_NE_third_range = spu_insert(
 18.1796 -				*((unsigned char*) spu_extract( vui_addr_pixelNE_third_range, 3 )),
 18.1797 -				vuc_pixel_NE_third_range, 15 );
 18.1798 -		// fourth range
 18.1799 -		vector unsigned char vuc_pixel_NE_fourth_range = spu_insert(
 18.1800 -				*((unsigned char*) spu_extract( vui_addr_pixelNE_fourth_range, 0 )), null_vector, 3 );
 18.1801 -		vuc_pixel_NE_fourth_range = spu_insert(
 18.1802 -				*((unsigned char*) spu_extract( vui_addr_pixelNE_fourth_range, 1 )),
 18.1803 -				vuc_pixel_NE_fourth_range, 7 );
 18.1804 -		vuc_pixel_NE_fourth_range = spu_insert(
 18.1805 -				*((unsigned char*) spu_extract( vui_addr_pixelNE_fourth_range, 2 )),
 18.1806 -				vuc_pixel_NE_fourth_range, 11 );
 18.1807 -		vuc_pixel_NE_fourth_range = spu_insert(
 18.1808 -				*((unsigned char*) spu_extract( vui_addr_pixelNE_fourth_range, 3 )),
 18.1809 -				vuc_pixel_NE_fourth_range, 15 );
 18.1810 -
 18.1811 -		// SOUTH WEST
 18.1812 -		// first range
 18.1813 -		vector unsigned char vuc_pixel_SW_first_range = spu_insert(
 18.1814 -				*((unsigned char*) spu_extract( vui_addr_pixelSW_first_range, 0 )), null_vector, 3 );
 18.1815 -		vuc_pixel_SW_first_range = spu_insert(
 18.1816 -				*((unsigned char*) spu_extract( vui_addr_pixelSW_first_range, 1 )),
 18.1817 -				vuc_pixel_SW_first_range, 7 );
 18.1818 -		vuc_pixel_SW_first_range = spu_insert(
 18.1819 -				*((unsigned char*) spu_extract( vui_addr_pixelSW_first_range, 2 )),
 18.1820 -				vuc_pixel_SW_first_range, 11 );
 18.1821 -		vuc_pixel_SW_first_range = spu_insert(
 18.1822 -				*((unsigned char*) spu_extract( vui_addr_pixelSW_first_range, 3 )),
 18.1823 -				vuc_pixel_SW_first_range, 15 );
 18.1824 -		// second range
 18.1825 -		vector unsigned char vuc_pixel_SW_second_range = spu_insert(
 18.1826 -				*((unsigned char*) spu_extract( vui_addr_pixelSW_second_range, 0 )), null_vector, 3 );
 18.1827 -		vuc_pixel_SW_second_range = spu_insert(
 18.1828 -				*((unsigned char*) spu_extract( vui_addr_pixelSW_second_range, 1 )),
 18.1829 -				vuc_pixel_SW_second_range, 7 );
 18.1830 -		vuc_pixel_SW_second_range = spu_insert(
 18.1831 -				*((unsigned char*) spu_extract( vui_addr_pixelSW_second_range, 2 )),
 18.1832 -				vuc_pixel_SW_second_range, 11 );
 18.1833 -		vuc_pixel_SW_second_range = spu_insert(
 18.1834 -				*((unsigned char*) spu_extract( vui_addr_pixelSW_second_range, 3 )),
 18.1835 -				vuc_pixel_SW_second_range, 15 );
 18.1836 -		// third range
 18.1837 -		vector unsigned char vuc_pixel_SW_third_range = spu_insert(
 18.1838 -				*((unsigned char*) spu_extract( vui_addr_pixelSW_third_range, 0 )), null_vector, 3 );
 18.1839 -		vuc_pixel_SW_third_range = spu_insert(
 18.1840 -				*((unsigned char*) spu_extract( vui_addr_pixelSW_third_range, 1 )),
 18.1841 -				vuc_pixel_SW_third_range, 7 );
 18.1842 -		vuc_pixel_SW_third_range = spu_insert(
 18.1843 -				*((unsigned char*) spu_extract( vui_addr_pixelSW_third_range, 2 )),
 18.1844 -				vuc_pixel_SW_third_range, 11 );
 18.1845 -		vuc_pixel_SW_third_range = spu_insert(
 18.1846 -				*((unsigned char*) spu_extract( vui_addr_pixelSW_third_range, 3 )),
 18.1847 -				vuc_pixel_SW_third_range, 15 );
 18.1848 -		// fourth range
 18.1849 -		vector unsigned char vuc_pixel_SW_fourth_range = spu_insert(
 18.1850 -				*((unsigned char*) spu_extract( vui_addr_pixelSW_fourth_range, 0 )), null_vector, 3 );
 18.1851 -		vuc_pixel_SW_fourth_range = spu_insert(
 18.1852 -				*((unsigned char*) spu_extract( vui_addr_pixelSW_fourth_range, 1 )),
 18.1853 -				vuc_pixel_SW_fourth_range, 7 );
 18.1854 -		vuc_pixel_SW_fourth_range = spu_insert(
 18.1855 -				*((unsigned char*) spu_extract( vui_addr_pixelSW_fourth_range, 2 )),
 18.1856 -				vuc_pixel_SW_fourth_range, 11 );
 18.1857 -		vuc_pixel_SW_fourth_range = spu_insert(
 18.1858 -				*((unsigned char*) spu_extract( vui_addr_pixelSW_fourth_range, 3 )),
 18.1859 -				vuc_pixel_SW_fourth_range, 15 );
 18.1860 -
 18.1861 -		// NORTH EAST
 18.1862 -		// first range
 18.1863 -		vector unsigned char vuc_pixel_SE_first_range = spu_insert(
 18.1864 -				*((unsigned char*) spu_extract( vui_addr_pixelSE_first_range, 0 )), null_vector, 3 );
 18.1865 -		vuc_pixel_SE_first_range = spu_insert(
 18.1866 -				*((unsigned char*) spu_extract( vui_addr_pixelSE_first_range, 1 )),
 18.1867 -				vuc_pixel_SE_first_range, 7 );
 18.1868 -		vuc_pixel_SE_first_range = spu_insert(
 18.1869 -				*((unsigned char*) spu_extract( vui_addr_pixelSE_first_range, 2 )),
 18.1870 -				vuc_pixel_SE_first_range, 11 );
 18.1871 -		vuc_pixel_SE_first_range = spu_insert(
 18.1872 -				*((unsigned char*) spu_extract( vui_addr_pixelSE_first_range, 3 )),
 18.1873 -				vuc_pixel_SE_first_range, 15 );
 18.1874 -		// second range
 18.1875 -		vector unsigned char vuc_pixel_SE_second_range = spu_insert(
 18.1876 -				*((unsigned char*) spu_extract( vui_addr_pixelSE_second_range, 0 )), null_vector, 3 );
 18.1877 -		vuc_pixel_SE_second_range = spu_insert(
 18.1878 -				*((unsigned char*) spu_extract( vui_addr_pixelSE_second_range, 1 )),
 18.1879 -				vuc_pixel_SE_second_range, 7 );
 18.1880 -		vuc_pixel_SE_second_range = spu_insert(
 18.1881 -				*((unsigned char*) spu_extract( vui_addr_pixelSE_second_range, 2 )),
 18.1882 -				vuc_pixel_SE_second_range, 11 );
 18.1883 -		vuc_pixel_SE_second_range = spu_insert(
 18.1884 -				*((unsigned char*) spu_extract( vui_addr_pixelSE_second_range, 3 )),
 18.1885 -				vuc_pixel_SE_second_range, 15 );
 18.1886 -		// third range
 18.1887 -		vector unsigned char vuc_pixel_SE_third_range = spu_insert(
 18.1888 -				*((unsigned char*) spu_extract( vui_addr_pixelSE_third_range, 0 )), null_vector, 3 );
 18.1889 -		vuc_pixel_SE_third_range = spu_insert(
 18.1890 -				*((unsigned char*) spu_extract( vui_addr_pixelSE_third_range, 1 )),
 18.1891 -				vuc_pixel_SE_third_range, 7 );
 18.1892 -		vuc_pixel_SE_third_range = spu_insert(
 18.1893 -				*((unsigned char*) spu_extract( vui_addr_pixelSE_third_range, 2 )),
 18.1894 -				vuc_pixel_SE_third_range, 11 );
 18.1895 -		vuc_pixel_SE_third_range = spu_insert(
 18.1896 -				*((unsigned char*) spu_extract( vui_addr_pixelSE_third_range, 3 )),
 18.1897 -				vuc_pixel_SE_third_range, 15 );
 18.1898 -		// fourth range
 18.1899 -		vector unsigned char vuc_pixel_SE_fourth_range = spu_insert(
 18.1900 -				*((unsigned char*) spu_extract( vui_addr_pixelSE_fourth_range, 0 )), null_vector, 3 );
 18.1901 -		vuc_pixel_SE_fourth_range = spu_insert(
 18.1902 -				*((unsigned char*) spu_extract( vui_addr_pixelSE_fourth_range, 1 )),
 18.1903 -				vuc_pixel_SE_fourth_range, 7 );
 18.1904 -		vuc_pixel_SE_fourth_range = spu_insert(
 18.1905 -				*((unsigned char*) spu_extract( vui_addr_pixelSE_fourth_range, 2 )),
 18.1906 -				vuc_pixel_SE_fourth_range, 11 );
 18.1907 -		vuc_pixel_SE_fourth_range = spu_insert(
 18.1908 -				*((unsigned char*) spu_extract( vui_addr_pixelSE_fourth_range, 3 )),
 18.1909 -				vuc_pixel_SE_fourth_range, 15 );
 18.1910 -
 18.1911 -
 18.1912 -
 18.1913 -		// convert to float
 18.1914 -		vector float vf_pixel_NW_first_range = spu_convtf( (vector unsigned int) vuc_pixel_NW_first_range, 0 );
 18.1915 -		vector float vf_pixel_NW_second_range = spu_convtf( (vector unsigned int) vuc_pixel_NW_second_range, 0 );
 18.1916 -		vector float vf_pixel_NW_third_range = spu_convtf( (vector unsigned int) vuc_pixel_NW_third_range, 0 );
 18.1917 -		vector float vf_pixel_NW_fourth_range = spu_convtf( (vector unsigned int) vuc_pixel_NW_fourth_range, 0 );
 18.1918 -
 18.1919 -		vector float vf_pixel_NE_first_range = spu_convtf( (vector unsigned int) vuc_pixel_NE_first_range, 0 );
 18.1920 -		vector float vf_pixel_NE_second_range = spu_convtf( (vector unsigned int) vuc_pixel_NE_second_range, 0 );
 18.1921 -		vector float vf_pixel_NE_third_range = spu_convtf( (vector unsigned int) vuc_pixel_NE_third_range, 0 );
 18.1922 -		vector float vf_pixel_NE_fourth_range = spu_convtf( (vector unsigned int) vuc_pixel_NE_fourth_range, 0 );
 18.1923 -
 18.1924 -		vector float vf_pixel_SW_first_range = spu_convtf( (vector unsigned int) vuc_pixel_SW_first_range, 0 );
 18.1925 -		vector float vf_pixel_SW_second_range = spu_convtf( (vector unsigned int) vuc_pixel_SW_second_range, 0 );
 18.1926 -		vector float vf_pixel_SW_third_range = spu_convtf( (vector unsigned int) vuc_pixel_SW_third_range, 0 );
 18.1927 -		vector float vf_pixel_SW_fourth_range = spu_convtf( (vector unsigned int) vuc_pixel_SW_fourth_range, 0 );
 18.1928 -
 18.1929 -		vector float vf_pixel_SE_first_range = spu_convtf( (vector unsigned int) vuc_pixel_SE_first_range, 0 );
 18.1930 -		vector float vf_pixel_SE_second_range = spu_convtf( (vector unsigned int) vuc_pixel_SE_second_range, 0 );
 18.1931 -		vector float vf_pixel_SE_third_range = spu_convtf( (vector unsigned int) vuc_pixel_SE_third_range, 0 );
 18.1932 -		vector float vf_pixel_SE_fourth_range = spu_convtf( (vector unsigned int) vuc_pixel_SE_fourth_range, 0 );
 18.1933 -
 18.1934 -		// first linear interpolation: EWtop
 18.1935 -		// EWtop = NW + EWweight*(NE-NW)
 18.1936 -		//
 18.1937 -		// first range
 18.1938 -		vector float vf_EWtop_first_range_tmp = spu_sub( vf_pixel_NE_first_range, vf_pixel_NW_first_range );
 18.1939 -		vector float vf_EWtop_first_range = spu_madd( vf_EWweight_first_range,
 18.1940 -								vf_EWtop_first_range_tmp,
 18.1941 -								vf_pixel_NW_first_range );
 18.1942 -
 18.1943 -		// second range
 18.1944 -		vector float vf_EWtop_second_range_tmp = spu_sub( vf_pixel_NE_second_range, vf_pixel_NW_second_range );
 18.1945 -		vector float vf_EWtop_second_range = spu_madd( vf_EWweight_second_range,
 18.1946 -								vf_EWtop_second_range_tmp,
 18.1947 -								vf_pixel_NW_second_range );
 18.1948 -
 18.1949 -		// third range
 18.1950 -		vector float vf_EWtop_third_range_tmp = spu_sub( vf_pixel_NE_third_range, vf_pixel_NW_third_range );
 18.1951 -		vector float vf_EWtop_third_range = spu_madd( vf_EWweight_third_range,
 18.1952 -								vf_EWtop_third_range_tmp,
 18.1953 -								vf_pixel_NW_third_range );
 18.1954 -
 18.1955 -		// fourth range
 18.1956 -		vector float vf_EWtop_fourth_range_tmp = spu_sub( vf_pixel_NE_fourth_range, vf_pixel_NW_fourth_range );
 18.1957 -		vector float vf_EWtop_fourth_range = spu_madd( vf_EWweight_fourth_range,
 18.1958 -								vf_EWtop_fourth_range_tmp,
 18.1959 -								vf_pixel_NW_fourth_range );
 18.1960 -
 18.1961 -
 18.1962 -
 18.1963 -		// second linear interpolation: EWbottom
 18.1964 -		// EWbottom = SW + EWweight*(SE-SW)
 18.1965 -		//
 18.1966 -		// first range
 18.1967 -		vector float vf_EWbottom_first_range_tmp = spu_sub( vf_pixel_SE_first_range, vf_pixel_SW_first_range );
 18.1968 -		vector float vf_EWbottom_first_range = spu_madd( vf_EWweight_first_range,
 18.1969 -								vf_EWbottom_first_range_tmp,
 18.1970 -								vf_pixel_SW_first_range );
 18.1971 -
 18.1972 -		// second range
 18.1973 -		vector float vf_EWbottom_second_range_tmp = spu_sub( vf_pixel_SE_second_range, vf_pixel_SW_second_range );
 18.1974 -		vector float vf_EWbottom_second_range = spu_madd( vf_EWweight_second_range,
 18.1975 -								vf_EWbottom_second_range_tmp,
 18.1976 -								vf_pixel_SW_second_range );
 18.1977 -		// first range
 18.1978 -		vector float vf_EWbottom_third_range_tmp = spu_sub( vf_pixel_SE_third_range, vf_pixel_SW_third_range );
 18.1979 -		vector float vf_EWbottom_third_range = spu_madd( vf_EWweight_third_range,
 18.1980 -								vf_EWbottom_third_range_tmp,
 18.1981 -								vf_pixel_SW_third_range );
 18.1982 -
 18.1983 -		// first range
 18.1984 -		vector float vf_EWbottom_fourth_range_tmp = spu_sub( vf_pixel_SE_fourth_range, vf_pixel_SW_fourth_range );
 18.1985 -		vector float vf_EWbottom_fourth_range = spu_madd( vf_EWweight_fourth_range,
 18.1986 -								vf_EWbottom_fourth_range_tmp,
 18.1987 -								vf_pixel_SW_fourth_range );
 18.1988 -
 18.1989 -
 18.1990 -
 18.1991 -		// third linear interpolation: the bilinear interpolated value
 18.1992 -		// result = EWtop + NSweight*(EWbottom-EWtop);
 18.1993 -		//
 18.1994 -		// first range
 18.1995 -		vector float vf_result_first_range_tmp = spu_sub( vf_EWbottom_first_range, vf_EWtop_first_range );
 18.1996 -		vector float vf_result_first_range = spu_madd( vf_NSweight,
 18.1997 -								vf_result_first_range_tmp,
 18.1998 -								vf_EWtop_first_range );
 18.1999 -
 18.2000 -		// second range
 18.2001 -		vector float vf_result_second_range_tmp = spu_sub( vf_EWbottom_second_range, vf_EWtop_second_range );
 18.2002 -		vector float vf_result_second_range = spu_madd( vf_NSweight,
 18.2003 -								vf_result_second_range_tmp,
 18.2004 -								vf_EWtop_second_range );
 18.2005 -
 18.2006 -		// third range
 18.2007 -		vector float vf_result_third_range_tmp = spu_sub( vf_EWbottom_third_range, vf_EWtop_third_range );
 18.2008 -		vector float vf_result_third_range = spu_madd( vf_NSweight,
 18.2009 -								vf_result_third_range_tmp,
 18.2010 -								vf_EWtop_third_range );
 18.2011 -
 18.2012 -		// fourth range
 18.2013 -		vector float vf_result_fourth_range_tmp = spu_sub( vf_EWbottom_fourth_range, vf_EWtop_fourth_range );
 18.2014 -		vector float vf_result_fourth_range = spu_madd( vf_NSweight,
 18.2015 -								vf_result_fourth_range_tmp,
 18.2016 -								vf_EWtop_fourth_range );
 18.2017 -
 18.2018 -
 18.2019 -
 18.2020 -		// convert back: using saturated arithmetic
 18.2021 -		vector unsigned int vui_result_first_range = vfloat_to_vuint( vf_result_first_range );
 18.2022 -		vector unsigned int vui_result_second_range = vfloat_to_vuint( vf_result_second_range );
 18.2023 -		vector unsigned int vui_result_third_range = vfloat_to_vuint( vf_result_third_range );
 18.2024 -		vector unsigned int vui_result_fourth_range = vfloat_to_vuint( vf_result_fourth_range );
 18.2025 -
 18.2026 -		// merge results->lower,upper
 18.2027 -		vector unsigned char vuc_mask_merge_result_first_second = { 0x03, 0x07, 0x0B, 0x0F,
 18.2028 -							       		    0x13, 0x17, 0x1B, 0x1F,
 18.2029 -							       		    0x00, 0x00, 0x00, 0x00,
 18.2030 -							       		    0x00, 0x00, 0x00, 0x00 };
 18.2031 -
 18.2032 -		vector unsigned char vuc_mask_merge_result_third_fourth = { 0x00, 0x00, 0x00, 0x00,
 18.2033 -							       		    0x00, 0x00, 0x00, 0x00,
 18.2034 -									    0x03, 0x07, 0x0B, 0x0F,
 18.2035 -							       		    0x13, 0x17, 0x1B, 0x1F };
 18.2036 -
 18.2037 -		vector unsigned char vuc_result_first_second =
 18.2038 -						spu_shuffle( (vector unsigned char) vui_result_first_range,
 18.2039 -								 (vector unsigned char) vui_result_second_range,
 18.2040 -								vuc_mask_merge_result_first_second );
 18.2041 -
 18.2042 -		vector unsigned char vuc_result_third_fourth =
 18.2043 -						spu_shuffle( (vector unsigned char) vui_result_third_range,
 18.2044 -								 (vector unsigned char) vui_result_fourth_range,
 18.2045 -								vuc_mask_merge_result_third_fourth );
 18.2046 -
 18.2047 -		// store result
 18.2048 -		*((vector unsigned char*)dst) = spu_or( vuc_result_first_second,
 18.2049 -							vuc_result_third_fourth );
 18.2050 -		dst += 16;
 18.2051 -	}
 18.2052 -}
 18.2053 -
    19.1 --- a/src/video/ps3/spulibs/fb_writer.c	Wed Jan 19 22:21:31 2011 -0800
    19.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    19.3 @@ -1,193 +0,0 @@
    19.4 -/*
    19.5 - * SDL - Simple DirectMedia Layer
    19.6 - * CELL BE Support for PS3 Framebuffer
    19.7 - * Copyright (C) 2008, 2009 International Business Machines Corporation
    19.8 - *
    19.9 - * This library is free software; you can redistribute it and/or modify it
   19.10 - * under the terms of the GNU Lesser General Public License as published
   19.11 - * by the Free Software Foundation; either version 2.1 of the License, or
   19.12 - * (at your option) any later version.
   19.13 - *
   19.14 - * This library is distributed in the hope that it will be useful, but
   19.15 - * WITHOUT ANY WARRANTY; without even the implied warranty of
   19.16 - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   19.17 - * Lesser General Public License for more details.
   19.18 - *
   19.19 - * You should have received a copy of the GNU Lesser General Public
   19.20 - * License along with this library; if not, write to the Free Software
   19.21 - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
   19.22 - * USA
   19.23 - *
   19.24 - *  Martin Lowinski  <lowinski [at] de [dot] ibm [ibm] com>
   19.25 - *  Dirk Herrendoerfer <d.herrendoerfer [at] de [dot] ibm [dot] com>
   19.26 - *  SPE code based on research by:
   19.27 - *  Rene Becker
   19.28 - *  Thimo Emmerich
   19.29 - */
   19.30 -
   19.31 -#include "spu_common.h"
   19.32 -
   19.33 -#include <spu_intrinsics.h>
   19.34 -#include <spu_mfcio.h>
   19.35 -#include <stdio.h>
   19.36 -#include <string.h>
   19.37 -
   19.38 -// Debugging
   19.39 -//#define DEBUG
   19.40 -
   19.41 -#ifdef DEBUG
   19.42 -#define deprintf(fmt, args... ) \
   19.43 -	fprintf( stdout, fmt, ##args ); \
   19.44 -	fflush( stdout );
   19.45 -#else
   19.46 -#define deprintf( fmt, args... )
   19.47 -#endif
   19.48 -
   19.49 -void cpy_to_fb(unsigned int);
   19.50 -
   19.51 -/* fb_writer_spu parms */
   19.52 -static volatile struct fb_writer_parms_t parms __attribute__ ((aligned(128)));
   19.53 -
   19.54 -/* Code running on SPU */
   19.55 -int main(unsigned long long spe_id __attribute__ ((unused)), unsigned long long argp __attribute__ ((unused)))
   19.56 -{
   19.57 -	deprintf("[SPU] fb_writer_spu is up... (on SPE #%llu)\n", spe_id);
   19.58 -	uint32_t ea_mfc, mbox;
   19.59 -	// send ready message
   19.60 -	spu_write_out_mbox(SPU_READY);
   19.61 -
   19.62 -	while (1) {
   19.63 -		/* Check mailbox */
   19.64 -		mbox = spu_read_in_mbox();
   19.65 -		deprintf("[SPU] Message is %u\n", mbox);
   19.66 -		switch (mbox) {
   19.67 -			case SPU_EXIT:
   19.68 -				deprintf("[SPU] fb_writer goes down...\n");
   19.69 -				return 0;
   19.70 -			case SPU_START:
   19.71 -				break;
   19.72 -			default:
   19.73 -				deprintf("[SPU] Cannot handle message\n");
   19.74 -				continue;
   19.75 -		}
   19.76 -
   19.77 -		/* Tag Manager setup */
   19.78 -		unsigned int tags;
   19.79 -		tags = mfc_multi_tag_reserve(5);
   19.80 -		if (tags == MFC_TAG_INVALID) {
   19.81 -			deprintf("[SPU] Failed to reserve mfc tags on fb_writer\n");
   19.82 -			return 0;
   19.83 -		}
   19.84 -
   19.85 -		/* Framebuffer parms */
   19.86 -		ea_mfc = spu_read_in_mbox();
   19.87 -		deprintf("[SPU] Message on fb_writer is %u\n", ea_mfc);
   19.88 -		spu_mfcdma32(&parms, (unsigned int)ea_mfc,
   19.89 -				sizeof(struct fb_writer_parms_t), tags,
   19.90 -				MFC_GET_CMD);
   19.91 -		deprintf("[SPU] argp = %u\n", (unsigned int)argp);
   19.92 -		DMA_WAIT_TAG(tags);
   19.93 -
   19.94 -		/* Copy parms->data to framebuffer */
   19.95 -		deprintf("[SPU] Copying to framebuffer started\n");
   19.96 -		cpy_to_fb(tags);
   19.97 -		deprintf("[SPU] Copying to framebuffer done!\n");
   19.98 -
   19.99 -		mfc_multi_tag_release(tags, 5);
  19.100 -		deprintf("[SPU] fb_writer_spu... done!\n");
  19.101 -		/* Send FIN msg */
  19.102 -		spu_write_out_mbox(SPU_FIN);
  19.103 -	}
  19.104 -
  19.105 -	return 0;
  19.106 -}
  19.107 -
  19.108 -void cpy_to_fb(unsigned int tag_id_base)
  19.109 -{
  19.110 -	unsigned int i;
  19.111 -	unsigned char current_buf;
  19.112 -	uint8_t *in = parms.data;
  19.113 -
  19.114 -	/* Align fb pointer which was centered before */
  19.115 -	uint8_t *fb =
  19.116 -	    (unsigned char *)((unsigned int)parms.center & 0xFFFFFFF0);
  19.117 -
  19.118 -	uint32_t bounded_input_height = parms.bounded_input_height;
  19.119 -	uint32_t bounded_input_width = parms.bounded_input_width;
  19.120 -	uint32_t fb_pixel_size = parms.fb_pixel_size;
  19.121 -
  19.122 -	uint32_t out_line_stride = parms.out_line_stride;
  19.123 -	uint32_t in_line_stride = parms.in_line_stride;
  19.124 -	uint32_t in_line_size = bounded_input_width * fb_pixel_size;
  19.125 -
  19.126 -	current_buf = 0;
  19.127 -
  19.128 -	/* Local store buffer */
  19.129 -	static volatile uint8_t buf[4][BUFFER_SIZE]
  19.130 -	    __attribute__ ((aligned(128)));
  19.131 -	/* do 4-times multibuffering using DMA list, process in two steps */
  19.132 -	for (i = 0; i < bounded_input_height >> 2; i++) {
  19.133 -		/* first buffer */
  19.134 -		DMA_WAIT_TAG(tag_id_base + 1);
  19.135 -		// retrieve buffer
  19.136 -		spu_mfcdma32(buf[0], (unsigned int)in, in_line_size,
  19.137 -			     tag_id_base + 1, MFC_GETB_CMD);
  19.138 -		DMA_WAIT_TAG(tag_id_base + 1);
  19.139 -		// store buffer
  19.140 -		spu_mfcdma32(buf[0], (unsigned int)fb, in_line_size,
  19.141 -			     tag_id_base + 1, MFC_PUTB_CMD);
  19.142 -		in += in_line_stride;
  19.143 -		fb += out_line_stride;
  19.144 -		deprintf("[SPU] 1st buffer copied in=0x%x, fb=0x%x\n", in,
  19.145 -		       fb);
  19.146 -
  19.147 -		/* second buffer */
  19.148 -		DMA_WAIT_TAG(tag_id_base + 2);
  19.149 -		// retrieve buffer
  19.150 -		spu_mfcdma32(buf[1], (unsigned int)in, in_line_size,
  19.151 -			     tag_id_base + 2, MFC_GETB_CMD);
  19.152 -		DMA_WAIT_TAG(tag_id_base + 2);
  19.153 -		// store buffer
  19.154 -		spu_mfcdma32(buf[1], (unsigned int)fb, in_line_size,
  19.155 -			     tag_id_base + 2, MFC_PUTB_CMD);
  19.156 -		in += in_line_stride;
  19.157 -		fb += out_line_stride;
  19.158 -		deprintf("[SPU] 2nd buffer copied in=0x%x, fb=0x%x\n", in,
  19.159 -		       fb);
  19.160 -
  19.161 -		/* third buffer */
  19.162 -		DMA_WAIT_TAG(tag_id_base + 3);
  19.163 -		// retrieve buffer
  19.164 -		spu_mfcdma32(buf[2], (unsigned int)in, in_line_size,
  19.165 -			     tag_id_base + 3, MFC_GETB_CMD);
  19.166 -		DMA_WAIT_TAG(tag_id_base + 3);
  19.167 -		// store buffer
  19.168 -		spu_mfcdma32(buf[2], (unsigned int)fb, in_line_size,
  19.169 -			     tag_id_base + 3, MFC_PUTB_CMD);
  19.170 -		in += in_line_stride;
  19.171 -		fb += out_line_stride;
  19.172 -		deprintf("[SPU] 3rd buffer copied in=0x%x, fb=0x%x\n", in,
  19.173 -		       fb);
  19.174 -
  19.175 -		/* fourth buffer */
  19.176 -		DMA_WAIT_TAG(tag_id_base + 4);
  19.177 -		// retrieve buffer
  19.178 -		spu_mfcdma32(buf[3], (unsigned int)in, in_line_size,
  19.179 -			     tag_id_base + 4, MFC_GETB_CMD);
  19.180 -		DMA_WAIT_TAG(tag_id_base + 4);
  19.181 -		// store buffer
  19.182 -		spu_mfcdma32(buf[3], (unsigned int)fb, in_line_size,
  19.183 -			     tag_id_base + 4, MFC_PUTB_CMD);
  19.184 -		in += in_line_stride;
  19.185 -		fb += out_line_stride;
  19.186 -		deprintf("[SPU] 4th buffer copied in=0x%x, fb=0x%x\n", in,
  19.187 -		       fb);
  19.188 -		deprintf("[SPU] Loop #%i, bounded_input_height=%i\n", i,
  19.189 -		       bounded_input_height >> 2);
  19.190 -	}
  19.191 -	DMA_WAIT_TAG(tag_id_base + 2);
  19.192 -	DMA_WAIT_TAG(tag_id_base + 3);
  19.193 -	DMA_WAIT_TAG(tag_id_base + 4);
  19.194 -}
  19.195 -
  19.196 -
    20.1 --- a/src/video/ps3/spulibs/spu_common.h	Wed Jan 19 22:21:31 2011 -0800
    20.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    20.3 @@ -1,108 +0,0 @@
    20.4 -/*
    20.5 - * SDL - Simple DirectMedia Layer
    20.6 - * CELL BE Support for PS3 Framebuffer
    20.7 - * Copyright (C) 2008, 2009 International Business Machines Corporation
    20.8 - *
    20.9 - * This library is free software; you can redistribute it and/or modify it
   20.10 - * under the terms of the GNU Lesser General Public License as published
   20.11 - * by the Free Software Foundation; either version 2.1 of the License, or
   20.12 - * (at your option) any later version.
   20.13 - *
   20.14 - * This library is distributed in the hope that it will be useful, but
   20.15 - * WITHOUT ANY WARRANTY; without even the implied warranty of
   20.16 - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   20.17 - * Lesser General Public License for more details.
   20.18 - *
   20.19 - * You should have received a copy of the GNU Lesser General Public
   20.20 - * License along with this library; if not, write to the Free Software
   20.21 - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
   20.22 - * USA
   20.23 - *
   20.24 - *  Martin Lowinski  <lowinski [at] de [dot] ibm [ibm] com>
   20.25 - *  Dirk Herrendoerfer <d.herrendoerfer [at] de [dot] ibm [dot] com>
   20.26 - *  SPE code based on research by:
   20.27 - *  Rene Becker
   20.28 - *  Thimo Emmerich
   20.29 - */
   20.30 -
   20.31 -/* Common definitions/makros for SPUs */
   20.32 -
   20.33 -#ifndef _SPU_COMMON_H
   20.34 -#define _SPU_COMMON_H
   20.35 -
   20.36 -#include <stdio.h>
   20.37 -#include <stdint.h>
   20.38 -#include <string.h>
   20.39 -
   20.40 -/* Tag management */
   20.41 -#define DMA_WAIT_TAG(_tag)     \
   20.42 -    mfc_write_tag_mask(1<<(_tag)); \
   20.43 -    mfc_read_tag_status_all();
   20.44 -
   20.45 -/* SPU mailbox messages */
   20.46 -#define SPU_READY	0
   20.47 -#define SPU_START	1
   20.48 -#define SPU_FIN		2
   20.49 -#define SPU_EXIT	3
   20.50 -
   20.51 -/* Tags */
   20.52 -#define RETR_BUF	0
   20.53 -#define STR_BUF		1
   20.54 -#define TAG_INIT	2
   20.55 -
   20.56 -/* Buffersizes */
   20.57 -#define MAX_HDTV_WIDTH 1920
   20.58 -#define MAX_HDTV_HEIGHT 1080
   20.59 -/* One stride of HDTV */
   20.60 -#define BUFFER_SIZE 7680
   20.61 -
   20.62 -/* fb_writer ppu/spu exchange parms */
   20.63 -struct fb_writer_parms_t {
   20.64 -	uint8_t *data;
   20.65 -	uint8_t *center;
   20.66 -	uint32_t out_line_stride;
   20.67 -	uint32_t in_line_stride;
   20.68 -	uint32_t bounded_input_height;
   20.69 -	uint32_t bounded_input_width;
   20.70 -	uint32_t fb_pixel_size;
   20.71 -
   20.72 -	/* This padding is to fulfill the need for 16 byte alignment. On parm change, update! */
   20.73 -	char padding[4];
   20.74 -} __attribute__((aligned(128)));
   20.75 -
   20.76 -/* yuv2rgb ppu/spu exchange parms */
   20.77 -struct yuv2rgb_parms_t {
   20.78 -	uint8_t* y_plane;
   20.79 -	uint8_t* v_plane;
   20.80 -	uint8_t* u_plane;
   20.81 -
   20.82 -	uint8_t* dstBuffer;
   20.83 -
   20.84 -	unsigned int src_pixel_width;
   20.85 -	unsigned int src_pixel_height;
   20.86 -
   20.87 -	/* This padding is to fulfill the need for 16 byte alignment. On parm change, update! */
   20.88 -	char padding[128 - ((4 * sizeof(uint8_t *) + 2 * sizeof(unsigned int)) & 0x7F)];
   20.89 -} __attribute__((aligned(128)));
   20.90 -
   20.91 -/* bilin_scaler ppu/spu exchange parms */
   20.92 -struct scale_parms_t {
   20.93 -	uint8_t* y_plane;
   20.94 -	uint8_t* v_plane;
   20.95 -	uint8_t* u_plane;
   20.96 -
   20.97 -	uint8_t* dstBuffer;
   20.98 -
   20.99 -	unsigned int src_pixel_width;
  20.100 -	unsigned int src_pixel_height;
  20.101 -
  20.102 -	unsigned int dst_pixel_width;
  20.103 -	unsigned int dst_pixel_height;
  20.104 -
  20.105 -	/* This padding is to fulfill the need for 16 byte alignment. On parm change, update! */
  20.106 -	char padding[128 - ((4 * sizeof(uint8_t *) + 4 * sizeof(unsigned int)) & 0x7F)];
  20.107 -} __attribute__((aligned(128)));
  20.108 -
  20.109 -#endif /* _SPU_COMMON_H */
  20.110 -
  20.111 -
    21.1 --- a/src/video/ps3/spulibs/yuv2rgb.c	Wed Jan 19 22:21:31 2011 -0800
    21.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    21.3 @@ -1,662 +0,0 @@
    21.4 -/*
    21.5 - * SDL - Simple DirectMedia Layer
    21.6 - * CELL BE Support for PS3 Framebuffer
    21.7 - * Copyright (C) 2008, 2009 International Business Machines Corporation
    21.8 - *
    21.9 - * This library is free software; you can redistribute it and/or modify it
   21.10 - * under the terms of the GNU Lesser General Public License as published
   21.11 - * by the Free Software Foundation; either version 2.1 of the License, or
   21.12 - * (at your option) any later version.
   21.13 - *
   21.14 - * This library is distributed in the hope that it will be useful, but
   21.15 - * WITHOUT ANY WARRANTY; without even the implied warranty of
   21.16 - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   21.17 - * Lesser General Public License for more details.
   21.18 - *
   21.19 - * You should have received a copy of the GNU Lesser General Public
   21.20 - * License along with this library; if not, write to the Free Software
   21.21 - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
   21.22 - * USA
   21.23 - *
   21.24 - *  Martin Lowinski  <lowinski [at] de [dot] ibm [ibm] com>
   21.25 - *  Dirk Herrendoerfer <d.herrendoerfer [at] de [dot] ibm [dot] com>
   21.26 - *  SPE code based on research by:
   21.27 - *  Rene Becker
   21.28 - *  Thimo Emmerich
   21.29 - */
   21.30 -
   21.31 -#include "spu_common.h"
   21.32 -
   21.33 -#include <spu_intrinsics.h>
   21.34 -#include <spu_mfcio.h>
   21.35 -
   21.36 -// Debugging
   21.37 -//#define DEBUG
   21.38 -
   21.39 -// Test environment for /2 resolutions
   21.40 -//#define TESTING
   21.41 -
   21.42 -#ifdef DEBUG
   21.43 -#define deprintf(fmt, args... ) \
   21.44 -	fprintf( stdout, fmt, ##args ); \
   21.45 -	fflush( stdout );
   21.46 -#else
   21.47 -#define deprintf( fmt, args... )
   21.48 -#endif
   21.49 -
   21.50 -struct yuv2rgb_parms_t parms_converter __attribute__((aligned(128)));
   21.51 -
   21.52 -/* A maximum of 8 lines Y, therefore 4 lines V, 4 lines U are stored
   21.53 - * there might be the need to retrieve misaligned data, adjust
   21.54 - * incoming v and u plane to be able to handle this (add 128)
   21.55 - */
   21.56 -unsigned char y_plane[2][(MAX_HDTV_WIDTH + 128) * 4] __attribute__((aligned(128)));
   21.57 -unsigned char v_plane[2][(MAX_HDTV_WIDTH + 128) * 2] __attribute__((aligned(128)));
   21.58 -unsigned char u_plane[2][(MAX_HDTV_WIDTH + 128) * 2] __attribute__((aligned(128)));
   21.59 -
   21.60 -/* A maximum of 4 lines BGRA are stored, 4 byte per pixel */
   21.61 -unsigned char bgra[4 * MAX_HDTV_WIDTH * 4] __attribute__((aligned(128)));
   21.62 -
   21.63 -/* some vectors needed by the float to int conversion */
   21.64 -static const vector float vec_255 = { 255.0f, 255.0f, 255.0f, 255.0f };
   21.65 -static const vector float vec_0_1 = { 0.1f, 0.1f, 0.1f, 0.1f };
   21.66 -
   21.67 -void yuv_to_rgb_w16();
   21.68 -void yuv_to_rgb_w32();
   21.69 -
   21.70 -void yuv_to_rgb_w2_line(unsigned char* y_addr, unsigned char* v_addr, unsigned char* u_addr, unsigned char* bgra_addr, unsigned int width);
   21.71 -void yuv_to_rgb_w32_line(unsigned char* y_addr, unsigned char* v_addr, unsigned char* u_addr, unsigned char* bgra_addr_, unsigned int width);
   21.72 -
   21.73 -
   21.74 -int main(unsigned long long spe_id __attribute__((unused)), unsigned long long argp __attribute__ ((unused)))
   21.75 -{
   21.76 -	deprintf("[SPU] yuv2rgb_spu is up... (on SPE #%llu)\n", spe_id);
   21.77 -	uint32_t ea_mfc, mbox;
   21.78 -	// send ready message
   21.79 -	spu_write_out_mbox(SPU_READY);
   21.80 -
   21.81 -	while (1) {
   21.82 -		/* Check mailbox */
   21.83 -		mbox = spu_read_in_mbox();
   21.84 -		deprintf("[SPU] Message is %u\n", mbox);
   21.85 -		switch (mbox) {
   21.86 -			case SPU_EXIT:
   21.87 -				deprintf("[SPU] yuv2rgb_converter goes down...\n");
   21.88 -				return 0;
   21.89 -			case SPU_START:
   21.90 -				break;
   21.91 -			default:
   21.92 -				deprintf("[SPU] Cannot handle message\n");
   21.93 -				continue;
   21.94 -		}
   21.95 -
   21.96 -		/* Tag Manager setup */
   21.97 -		unsigned int tag_id;
   21.98 -		tag_id = mfc_multi_tag_reserve(1);
   21.99 -		if (tag_id == MFC_TAG_INVALID) {
  21.100 -			deprintf("[SPU] Failed to reserve mfc tags on yuv2rgb_converter\n");
  21.101 -			return 0;
  21.102 -		}
  21.103 -
  21.104 -		/* DMA transfer for the input parameters */
  21.105 -		ea_mfc = spu_read_in_mbox();
  21.106 -		deprintf("[SPU] Message on yuv2rgb_converter is %u\n", ea_mfc);
  21.107 -		spu_mfcdma32(&parms_converter, (unsigned int)ea_mfc, sizeof(struct yuv2rgb_parms_t), tag_id, MFC_GET_CMD);
  21.108 -		DMA_WAIT_TAG(tag_id);
  21.109 -
  21.110 -		/* There are alignment issues that involve handling of special cases
  21.111 -		 * a width of 32 results in a width of 16 in the chrominance
  21.112 -		 * --> choose the proper handling to optimize the performance
  21.113 -		 */
  21.114 -		deprintf("[SPU] Convert %ix%i from YUV to RGB\n", parms_converter.src_pixel_width, parms_converter.src_pixel_height);
  21.115 -		if (!(parms_converter.src_pixel_width & 0x1f)) {
  21.116 -			deprintf("[SPU] Using yuv_to_rgb_w16\n");
  21.117 -			yuv_to_rgb_w16();
  21.118 -		} else {
  21.119 -			deprintf("[SPU] Using yuv_to_rgb_w32\n");
  21.120 -			yuv_to_rgb_w32();
  21.121 -		}
  21.122 -
  21.123 -		mfc_multi_tag_release(tag_id, 1);
  21.124 -		deprintf("[SPU] yuv2rgb_spu... done!\n");
  21.125 -		/* Send FIN message */
  21.126 -		spu_write_out_mbox(SPU_FIN);
  21.127 -	}
  21.128 -
  21.129 -	return 0;
  21.130 -}
  21.131 -
  21.132 -
  21.133 -/*
  21.134 - * float_to_char()
  21.135 - *
  21.136 - * converts a float to a character using saturated
  21.137 - * arithmetic
  21.138 - *
  21.139 - * @param s float for conversion
  21.140 - * @returns converted character
  21.141 - */
  21.142 -inline static unsigned char float_to_char(float s) {
  21.143 -	vector float vec_s = spu_splats(s);
  21.144 -	vector unsigned int select_1 = spu_cmpgt(vec_0_1, vec_s);
  21.145 -	vec_s = spu_sel(vec_s, vec_0_1, select_1);
  21.146 -
  21.147 -	vector unsigned int select_2 = spu_cmpgt(vec_s, vec_255);
  21.148 -	vec_s = spu_sel(vec_s, vec_255, select_2);
  21.149 -	return (unsigned char) spu_extract(vec_s,0);
  21.150 -}
  21.151 -
  21.152 -
  21.153 -/*
  21.154 - * vfloat_to_vuint()
  21.155 - *
  21.156 - * converts a float vector to an unsinged int vector using saturated
  21.157 - * arithmetic
  21.158 - *
  21.159 - * @param vec_s float vector for conversion
  21.160 - * @returns converted unsigned int vector
  21.161 - */
  21.162 -inline static vector unsigned int vfloat_to_vuint(vector float vec_s) {
  21.163 -	vector unsigned int select_1 = spu_cmpgt(vec_0_1, vec_s);
  21.164 -	vec_s = spu_sel(vec_s, vec_0_1, select_1);
  21.165 -
  21.166 -	vector unsigned int select_2 = spu_cmpgt(vec_s, vec_255);
  21.167 -	vec_s = spu_sel(vec_s, vec_255, select_2);
  21.168 -	return spu_convtu(vec_s,0);
  21.169 -}
  21.170 -
  21.171 -
  21.172 -void yuv_to_rgb_w16() {
  21.173 -	// Pixel dimensions of the picture
  21.174 -	uint32_t width, height;
  21.175 -
  21.176 -	// Extract parameters
  21.177 -	width = parms_converter.src_pixel_width;
  21.178 -	height = parms_converter.src_pixel_height;
  21.179 -
  21.180 -	// Plane data management
  21.181 -	// Y
  21.182 -	unsigned char* ram_addr_y = parms_converter.y_plane;
  21.183 -	// V
  21.184 -	unsigned char* ram_addr_v = parms_converter.v_plane;
  21.185 -	// U
  21.186 -	unsigned char* ram_addr_u = parms_converter.u_plane;
  21.187 -
  21.188 -	// BGRA
  21.189 -	unsigned char* ram_addr_bgra = parms_converter.dstBuffer;
  21.190 -
  21.191 -	// Strides
  21.192 -	unsigned int stride_y = width;
  21.193 -	unsigned int stride_vu = width>>1;
  21.194 -
  21.195 -	// Buffer management
  21.196 -	unsigned int buf_idx = 0;
  21.197 -	unsigned int size_4lines_y = stride_y<<2;
  21.198 -	unsigned int size_2lines_y = stride_y<<1;
  21.199 -	unsigned int size_2lines_vu = stride_vu<<1;
  21.200 -
  21.201 -	// 2*width*4byte_per_pixel
  21.202 -	unsigned int size_2lines_bgra = width<<3;
  21.203 -
  21.204 -
  21.205 -	// start double-buffered processing
  21.206 -	// 4 lines y
  21.207 -	spu_mfcdma32(y_plane[buf_idx], (unsigned int) ram_addr_y, size_4lines_y, RETR_BUF+buf_idx, MFC_GET_CMD);
  21.208 -
  21.209 -	// 2 lines v
  21.210 -	spu_mfcdma32(v_plane[buf_idx], (unsigned int) ram_addr_v, size_2lines_vu, RETR_BUF+buf_idx, MFC_GET_CMD);
  21.211 -
  21.212 -	// 2 lines u
  21.213 -	spu_mfcdma32(u_plane[buf_idx], (unsigned int) ram_addr_u, size_2lines_vu, RETR_BUF+buf_idx, MFC_GET_CMD);
  21.214 -
  21.215 -	// Wait for these transfers to be completed
  21.216 -	DMA_WAIT_TAG((RETR_BUF + buf_idx));
  21.217 -
  21.218 -	unsigned int i;
  21.219 -	for(i=0; i<(height>>2)-1; i++) {
  21.220 -
  21.221 -		buf_idx^=1;
  21.222 -
  21.223 -		// 4 lines y
  21.224 -		spu_mfcdma32(y_plane[buf_idx], (unsigned int) ram_addr_y+size_4lines_y, size_4lines_y, RETR_BUF+buf_idx, MFC_GET_CMD);
  21.225 -
  21.226 -		// 2 lines v
  21.227 -		spu_mfcdma32(v_plane[buf_idx], (unsigned int) ram_addr_v+size_2lines_vu, size_2lines_vu, RETR_BUF+buf_idx, MFC_GET_CMD);
  21.228 -
  21.229 -		// 2 lines u
  21.230 -		spu_mfcdma32(u_plane[buf_idx], (unsigned int) ram_addr_u+size_2lines_vu, size_2lines_vu, RETR_BUF+buf_idx, MFC_GET_CMD);
  21.231 -
  21.232 -		DMA_WAIT_TAG((RETR_BUF + buf_idx));
  21.233 -
  21.234 -		buf_idx^=1;
  21.235 -
  21.236 -
  21.237 -		// Convert YUV to BGRA, store it back (first two lines)
  21.238 -#ifndef TESTING
  21.239 -		yuv_to_rgb_w16_line(y_plane[buf_idx], v_plane[buf_idx], u_plane[buf_idx], bgra, width);
  21.240 -
  21.241 -		// Next two lines
  21.242 -		yuv_to_rgb_w16_line(y_plane[buf_idx] + size_2lines_y,
  21.243 -				v_plane[buf_idx] + stride_vu,
  21.244 -				u_plane[buf_idx] + stride_vu,
  21.245 -				bgra + size_2lines_bgra,
  21.246 -				width);
  21.247 -#else
  21.248 -		yuv_to_rgb_w2_line(y_plane[buf_idx], v_plane[buf_idx], u_plane[buf_idx], bgra, width);
  21.249 -
  21.250 -		// Next two lines
  21.251 -		yuv_to_rgb_w2_line(y_plane[buf_idx] + size_2lines_y,
  21.252 -				v_plane[buf_idx] + stride_vu,
  21.253 -				u_plane[buf_idx] + stride_vu,
  21.254 -				bgra + size_2lines_bgra,
  21.255 -				width);
  21.256 -#endif
  21.257 -
  21.258 -		// Wait for previous storing transfer to be completed
  21.259 -		DMA_WAIT_TAG(STR_BUF);
  21.260 -
  21.261 -		// Store converted lines in two steps->max transfer size 16384
  21.262 -		spu_mfcdma32(bgra, (unsigned int) ram_addr_bgra, size_2lines_bgra, STR_BUF, MFC_PUT_CMD);
  21.263 -		ram_addr_bgra += size_2lines_bgra;
  21.264 -		spu_mfcdma32(bgra+size_2lines_bgra, (unsigned int) ram_addr_bgra, size_2lines_bgra, STR_BUF, MFC_PUT_CMD);
  21.265 -		ram_addr_bgra += size_2lines_bgra;
  21.266 -
  21.267 -		// Move 4 lines
  21.268 -		ram_addr_y += size_4lines_y;
  21.269 -		ram_addr_v += size_2lines_vu;
  21.270 -		ram_addr_u += size_2lines_vu;
  21.271 -
  21.272 -		buf_idx^=1;
  21.273 -	}
  21.274 -
  21.275 -#ifndef TESTING
  21.276 -	// Convert YUV to BGRA, store it back (first two lines)
  21.277 -	yuv_to_rgb_w16_line(y_plane[buf_idx], v_plane[buf_idx], u_plane[buf_idx], bgra, width);
  21.278 -
  21.279 -	// Next two lines
  21.280 -	yuv_to_rgb_w16_line(y_plane[buf_idx] + size_2lines_y,
  21.281 -			v_plane[buf_idx] + stride_vu,
  21.282 -			u_plane[buf_idx] + stride_vu,
  21.283 -			bgra + size_2lines_bgra,
  21.284 -			width);
  21.285 -#else
  21.286 -	// Convert YUV to BGRA, store it back (first two lines)
  21.287 -	yuv_to_rgb_w2_line(y_plane[buf_idx], v_plane[buf_idx], u_plane[buf_idx], bgra, width);
  21.288 -
  21.289 -	// Next two lines
  21.290 -	yuv_to_rgb_w2_line(y_plane[buf_idx] + size_2lines_y,
  21.291 -			v_plane[buf_idx] + stride_vu,
  21.292 -			u_plane[buf_idx] + stride_vu,
  21.293 -			bgra + size_2lines_bgra,
  21.294 -			width);
  21.295 -#endif
  21.296 -
  21.297 -	// Wait for previous storing transfer to be completed
  21.298 -	DMA_WAIT_TAG(STR_BUF);
  21.299 -	spu_mfcdma32(bgra, (unsigned int) ram_addr_bgra, size_2lines_bgra, STR_BUF, MFC_PUT_CMD);
  21.300 -	ram_addr_bgra += size_2lines_bgra;
  21.301 -	spu_mfcdma32(bgra+size_2lines_bgra, (unsigned int) ram_addr_bgra, size_2lines_bgra, STR_BUF, MFC_PUT_CMD);
  21.302 -
  21.303 -	// wait for previous storing transfer to be completed
  21.304 -	DMA_WAIT_TAG(STR_BUF);
  21.305 -
  21.306 -}
  21.307 -
  21.308 -
  21.309 -void yuv_to_rgb_w32() {
  21.310 -	// Pixel dimensions of the picture
  21.311 -	uint32_t width, height;
  21.312 -
  21.313 -	// Extract parameters
  21.314 -	width = parms_converter.src_pixel_width;
  21.315 -	height = parms_converter.src_pixel_height;
  21.316 -
  21.317 -	// Plane data management
  21.318 -	// Y
  21.319 -	unsigned char* ram_addr_y = parms_converter.y_plane;
  21.320 -	// V
  21.321 -	unsigned char* ram_addr_v = parms_converter.v_plane;
  21.322 -	// U
  21.323 -	unsigned char* ram_addr_u = parms_converter.u_plane;
  21.324 -
  21.325 -	// BGRA
  21.326 -	unsigned char* ram_addr_bgra = parms_converter.dstBuffer;
  21.327 -
  21.328 -	// Strides
  21.329 -	unsigned int stride_y = width;
  21.330 -	unsigned int stride_vu = width>>1;
  21.331 -
  21.332 -	// Buffer management
  21.333 -	unsigned int buf_idx = 0;
  21.334 -	unsigned int size_4lines_y = stride_y<<2;
  21.335 -	unsigned int size_2lines_y = stride_y<<1;
  21.336 -	unsigned int size_2lines_vu = stride_vu<<1;
  21.337 -
  21.338 -	// 2*width*4byte_per_pixel
  21.339 -	unsigned int size_2lines_bgra = width<<3;
  21.340 -
  21.341 -	// start double-buffered processing
  21.342 -	// 4 lines y
  21.343 -	spu_mfcdma32(y_plane[buf_idx], (unsigned int) ram_addr_y, size_4lines_y, RETR_BUF + buf_idx, MFC_GET_CMD);
  21.344 -	// 2 lines v
  21.345 -	spu_mfcdma32(v_plane[buf_idx], (unsigned int) ram_addr_v, size_2lines_vu, RETR_BUF + buf_idx, MFC_GET_CMD);
  21.346 -	// 2 lines u
  21.347 -	spu_mfcdma32(u_plane[buf_idx], (unsigned int) ram_addr_u, size_2lines_vu, RETR_BUF + buf_idx, MFC_GET_CMD);
  21.348 -
  21.349 -	// Wait for these transfers to be completed
  21.350 -	DMA_WAIT_TAG((RETR_BUF + buf_idx));
  21.351 -
  21.352 -	unsigned int i;
  21.353 -	for(i=0; i < (height>>2)-1; i++) {
  21.354 -		buf_idx^=1;
  21.355 -		// 4 lines y
  21.356 -		spu_mfcdma32(y_plane[buf_idx], (unsigned int) ram_addr_y+size_4lines_y, size_4lines_y, RETR_BUF + buf_idx, MFC_GET_CMD);
  21.357 -		deprintf("4lines = %d\n", size_4lines_y);
  21.358 -		// 2 lines v
  21.359 -		spu_mfcdma32(v_plane[buf_idx], (unsigned int) ram_addr_v+size_2lines_vu, size_2lines_vu, RETR_BUF + buf_idx, MFC_GET_CMD);
  21.360 -		deprintf("2lines = %d\n", size_2lines_vu);
  21.361 -		// 2 lines u
  21.362 -		spu_mfcdma32(u_plane[buf_idx], (unsigned int) ram_addr_u+size_2lines_vu, size_2lines_vu, RETR_BUF + buf_idx, MFC_GET_CMD);
  21.363 -		deprintf("2lines = %d\n", size_2lines_vu);
  21.364 -
  21.365 -		DMA_WAIT_TAG((RETR_BUF + buf_idx));
  21.366 -
  21.367 -		buf_idx^=1;
  21.368 -
  21.369 -		// Convert YUV to BGRA, store it back (first two lines)
  21.370 -		yuv_to_rgb_w32_line(y_plane[buf_idx], v_plane[buf_idx], u_plane[buf_idx], bgra, width);
  21.371 -
  21.372 -		// Next two lines
  21.373 -		yuv_to_rgb_w32_line(y_plane[buf_idx] + size_2lines_y,
  21.374 -				v_plane[buf_idx] + stride_vu,
  21.375 -				u_plane[buf_idx] + stride_vu,
  21.376 -				bgra + size_2lines_bgra,
  21.377 -				width);
  21.378 -
  21.379 -		// Wait for previous storing transfer to be completed
  21.380 -		DMA_WAIT_TAG(STR_BUF);
  21.381 -
  21.382 -		// Store converted lines in two steps->max transfer size 16384
  21.383 -		spu_mfcdma32(bgra, (unsigned int)ram_addr_bgra, size_2lines_bgra, STR_BUF, MFC_PUT_CMD);
  21.384 -		ram_addr_bgra += size_2lines_bgra;
  21.385 -		spu_mfcdma32(bgra + size_2lines_bgra, (unsigned int)ram_addr_bgra, size_2lines_bgra, STR_BUF, MFC_PUT_CMD);
  21.386 -		ram_addr_bgra += size_2lines_bgra;
  21.387 -
  21.388 -		// Move 4 lines
  21.389 -		ram_addr_y += size_4lines_y;
  21.390 -		ram_addr_v += size_2lines_vu;
  21.391 -		ram_addr_u += size_2lines_vu;
  21.392 -
  21.393 -		buf_idx^=1;
  21.394 -	}
  21.395 -
  21.396 -	// Convert YUV to BGRA, store it back (first two lines)
  21.397 -	yuv_to_rgb_w32_line(y_plane[buf_idx], v_plane[buf_idx], u_plane[buf_idx], bgra, width);
  21.398 -
  21.399 -	// Next two lines
  21.400 -	yuv_to_rgb_w32_line(y_plane[buf_idx] + size_2lines_y,
  21.401 -			v_plane[buf_idx] + stride_vu,
  21.402 -			u_plane[buf_idx] + stride_vu,
  21.403 -			bgra + size_2lines_bgra,
  21.404 -			width);
  21.405 -
  21.406 -	// Wait for previous storing transfer to be completed
  21.407 -	DMA_WAIT_TAG(STR_BUF);
  21.408 -	spu_mfcdma32(bgra, (unsigned int) ram_addr_bgra, size_2lines_bgra, STR_BUF, MFC_PUT_CMD);
  21.409 -	ram_addr_bgra += size_2lines_bgra;
  21.410 -	spu_mfcdma32(bgra + size_2lines_bgra, (unsigned int) ram_addr_bgra, size_2lines_bgra, STR_BUF, MFC_PUT_CMD);
  21.411 -
  21.412 -	// Wait for previous storing transfer to be completed
  21.413 -	DMA_WAIT_TAG(STR_BUF);
  21.414 -}
  21.415 -
  21.416 -
  21.417 -/* Some vectors needed by the yuv 2 rgb conversion algorithm */
  21.418 -const vector float vec_minus_128 = { -128.0f, -128.0f, -128.0f, -128.0f };
  21.419 -const vector unsigned char vec_null = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
  21.420 -const vector unsigned char vec_char2int_first = { 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x13 };
  21.421 -const vector unsigned char vec_char2int_second = { 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x17 };
  21.422 -const vector unsigned char vec_char2int_third = { 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, 0x1B };
  21.423 -const vector unsigned char vec_char2int_fourth = { 0x00, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x1F };
  21.424 -
  21.425 -const vector float vec_R_precalc_coeff = {1.403f, 1.403f, 1.403f, 1.403f};
  21.426 -const vector float vec_Gu_precalc_coeff = {-0.344f, -0.344f, -0.344f, -0.344f};
  21.427 -const vector float vec_Gv_precalc_coeff = {-0.714f, -0.714f, -0.714f, -0.714f};
  21.428 -const vector float vec_B_precalc_coeff = {1.773f, 1.773f, 1.773f, 1.773f};
  21.429 -
  21.430 -const vector unsigned int vec_alpha =  { 255 << 24, 255 << 24, 255 << 24, 255 << 24 };
  21.431 -
  21.432 -const vector unsigned char vec_select_floats_upper = { 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07 };
  21.433 -const vector unsigned char vec_select_floats_lower = { 0x08, 0x09, 0x0A, 0x0B, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x0C, 0x0D, 0x0E, 0x0F };
  21.434 -
  21.435 -
  21.436 -#ifdef TESTING
  21.437 -/*
  21.438 - * yuv_to_rgb_w2()
  21.439 - *
  21.440 - * - converts x * 4 pixels from YUV to RGB
  21.441 - * - two lines of YUV are taken as input.
  21.442 - * - width has to be a multiple of 2 (= 4 pixel)
  21.443 - *
  21.444 - * @param y_addr address of the y plane (local store)
  21.445 - * @param v_addr address of the v plane (local store)
  21.446 - * @param u_addr address of the u plane (local store)
  21.447 - * @param bgra_addr_char address of the bgra output buffer (local store)
  21.448 - * @param width the width of a line in pixel
  21.449 - */
  21.450 -void yuv_to_rgb_w2_line(unsigned char* y_addr, unsigned char* v_addr, unsigned char* u_addr, unsigned char* bgra_addr_char, unsigned int width) {
  21.451 -	// each pixel is stored as an integer
  21.452 -	unsigned int* bgra_addr = (unsigned int*) bgra_addr_char;
  21.453 -
  21.454 -	unsigned int x;
  21.455 -	// Go through each line in steps of 2, because every U and V value is connected to 4 pixels Y (YUV 4:2:0)
  21.456 -	for(x = 0; x < width; x+=2) {
  21.457 -		// Get the 4 Y, 1 U and 1 V values
  21.458 -		const unsigned char Y_1 = *(y_addr + x);
  21.459 -		const unsigned char Y_2 = *(y_addr + x + 1);
  21.460 -		const unsigned char Y_3 = *(y_addr + x + width);
  21.461 -		const unsigned char Y_4 = *(y_addr + x + width + 1);
  21.462 -		const unsigned char U = *(u_addr + (x >> 1));
  21.463 -		const unsigned char V = *(v_addr + (x >> 1));
  21.464 -
  21.465 -		// Start converting
  21.466 -		float V_minus_128 = (float)((float)V - 128.0f);
  21.467 -		float U_minus_128 = (float)((float)U - 128.0f);
  21.468 -
  21.469 -		float R_precalculate = 1.403f * V_minus_128;
  21.470 -		float G_precalculate = -(0.344f * U_minus_128 + 0.714f * V_minus_128);
  21.471 -		float B_precalculate = 1.773f * U_minus_128;
  21.472 -
  21.473 -		// Cast the results
  21.474 -		const unsigned char R_1 = float_to_char((Y_1 + R_precalculate));
  21.475 -		const unsigned char R_2 = float_to_char((Y_2 + R_precalculate));
  21.476 -		const unsigned char R_3 = float_to_char((Y_3 + R_precalculate));
  21.477 -		const unsigned char R_4 = float_to_char((Y_4 + R_precalculate));
  21.478 -		const unsigned char G_1 = float_to_char((Y_1 + G_precalculate));
  21.479 -		const unsigned char G_2 = float_to_char((Y_2 + G_precalculate));
  21.480 -		const unsigned char G_3 = float_to_char((Y_3 + G_precalculate));
  21.481 -		const unsigned char G_4 = float_to_char((Y_4 + G_precalculate));
  21.482 -		const unsigned char B_1 = float_to_char((Y_1 + B_precalculate));
  21.483 -		const unsigned char B_2 = float_to_char((Y_2 + B_precalculate));
  21.484 -		const unsigned char B_3 = float_to_char((Y_3 + B_precalculate));
  21.485 -		const unsigned char B_4 = float_to_char((Y_4 + B_precalculate));
  21.486 -
  21.487 -		// Write back
  21.488 -		*(bgra_addr + x) = (B_1 << 0)| (G_1 << 8) | (R_1 << 16) | (255 << 24);
  21.489 -		*(bgra_addr + x + 1) = (B_2 << 0)| (G_2 << 8) | (R_2 << 16) | (255 << 24);
  21.490 -		*(bgra_addr + x + width) = (B_3 << 0)| (G_3 << 8) | (R_3 << 16) | (255 << 24);
  21.491 -		*(bgra_addr + x + width + 1) = (B_4 << 0)| (G_4 << 8) | (R_4 << 16) | (255 << 24);
  21.492 -	}
  21.493 -}
  21.494 -#endif
  21.495 -
  21.496 -
  21.497 -/*
  21.498 - * yuv_to_rgb_w32()
  21.499 - *
  21.500 - * processes to line of yuv-input, width has to be a multiple of 32
  21.501 - * two lines of yuv are taken as input
  21.502 - *
  21.503 - * @param y_addr address of the y plane in local store
  21.504 - * @param v_addr address of the v plane in local store
  21.505 - * @param u_addr address of the u plane in local store
  21.506 - * @param bgra_addr_ address of the bgra output buffer
  21.507 - * @param width the width in pixel
  21.508 - */
  21.509 -void yuv_to_rgb_w32_line(unsigned char* y_addr, unsigned char* v_addr, unsigned char* u_addr, unsigned char* bgra_addr_, unsigned int width) {
  21.510 -	// each pixel is stored as an integer
  21.511 -	unsigned int* bgra_addr = (unsigned int*) bgra_addr_;
  21.512 -
  21.513 -	unsigned int x;
  21.514 -	for(x = 0; x < width; x+=32) {
  21.515 -		// Gehe zweischrittig durch die zeile, da jeder u und v wert fuer 4 pixel(zwei hoch, zwei breit) gilt
  21.516 -
  21.517 -		const vector unsigned char vchar_Y_1 = *((vector unsigned char*)(y_addr + x));
  21.518 -		const vector unsigned char vchar_Y_2 = *((vector unsigned char*)(y_addr + x + 16));
  21.519 -		const vector unsigned char vchar_Y_3 = *((vector unsigned char*)(y_addr + x + width));
  21.520 -		const vector unsigned char vchar_Y_4 = *((vector unsigned char*)(y_addr + x + width + 16));
  21.521 -		const vector unsigned char vchar_U = *((vector unsigned char*)(u_addr + (x >> 1)));
  21.522 -		const vector unsigned char vchar_V = *((vector unsigned char*)(v_addr + (x >> 1)));
  21.523 -
  21.524 -		const vector float vfloat_U_1 = spu_add(spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_U, vec_char2int_first), 0),vec_minus_128);
  21.525 -		const vector float vfloat_U_2 = spu_add(spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_U, vec_char2int_second), 0),vec_minus_128);
  21.526 -		const vector float vfloat_U_3 = spu_add(spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_U, vec_char2int_third), 0),vec_minus_128);
  21.527 -		const vector float vfloat_U_4 = spu_add(spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_U, vec_char2int_fourth), 0),vec_minus_128);
  21.528 -
  21.529 -		const vector float vfloat_V_1 = spu_add(spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_V, vec_char2int_first), 0),vec_minus_128);
  21.530 -		const vector float vfloat_V_2 = spu_add(spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_V, vec_char2int_second), 0),vec_minus_128);
  21.531 -		const vector float vfloat_V_3 = spu_add(spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_V, vec_char2int_third), 0),vec_minus_128);
  21.532 -		const vector float vfloat_V_4 = spu_add(spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_V, vec_char2int_fourth), 0),vec_minus_128);
  21.533 -
  21.534 -		vector float Y_1 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_1, vec_char2int_first), 0);
  21.535 -		vector float Y_2 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_1, vec_char2int_second), 0);
  21.536 -		vector float Y_3 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_1, vec_char2int_third), 0);
  21.537 -		vector float Y_4 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_1, vec_char2int_fourth), 0);
  21.538 -		vector float Y_5 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_2, vec_char2int_first), 0);
  21.539 -		vector float Y_6 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_2, vec_char2int_second), 0);
  21.540 -		vector float Y_7 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_2, vec_char2int_third), 0);
  21.541 -		vector float Y_8 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_2, vec_char2int_fourth), 0);
  21.542 -		vector float Y_9 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_3, vec_char2int_first), 0);
  21.543 -		vector float Y_10 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_3, vec_char2int_second), 0);
  21.544 -		vector float Y_11 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_3, vec_char2int_third), 0);
  21.545 -		vector float Y_12 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_3, vec_char2int_fourth), 0);
  21.546 -		vector float Y_13 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_4, vec_char2int_first), 0);
  21.547 -		vector float Y_14 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_4, vec_char2int_second), 0);
  21.548 -		vector float Y_15 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_4, vec_char2int_third), 0);
  21.549 -		vector float Y_16 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_4, vec_char2int_fourth), 0);
  21.550 -
  21.551 -		const vector float R1a_precalculate = spu_mul(vec_R_precalc_coeff, vfloat_V_1);
  21.552 -		const vector float R2a_precalculate = spu_mul(vec_R_precalc_coeff, vfloat_V_2);
  21.553 -		const vector float R3a_precalculate = spu_mul(vec_R_precalc_coeff, vfloat_V_3);
  21.554 -		const vector float R4a_precalculate = spu_mul(vec_R_precalc_coeff, vfloat_V_4);
  21.555 -
  21.556 -		const vector float R1_precalculate = spu_shuffle(R1a_precalculate,  R1a_precalculate, vec_select_floats_upper);
  21.557 -		const vector float R2_precalculate = spu_shuffle(R1a_precalculate,  R1a_precalculate, vec_select_floats_lower);
  21.558 -		const vector float R3_precalculate = spu_shuffle(R2a_precalculate,  R2a_precalculate, vec_select_floats_upper);
  21.559 -		const vector float R4_precalculate = spu_shuffle(R2a_precalculate,  R2a_precalculate, vec_select_floats_lower);
  21.560 -		const vector float R5_precalculate = spu_shuffle(R3a_precalculate,  R3a_precalculate, vec_select_floats_upper);
  21.561 -		const vector float R6_precalculate = spu_shuffle(R3a_precalculate,  R3a_precalculate, vec_select_floats_lower);
  21.562 -		const vector float R7_precalculate = spu_shuffle(R4a_precalculate,  R4a_precalculate, vec_select_floats_upper);
  21.563 -		const vector float R8_precalculate = spu_shuffle(R4a_precalculate,  R4a_precalculate, vec_select_floats_lower);
  21.564 -
  21.565 -
  21.566 -		const vector float G1a_precalculate = spu_madd(vec_Gu_precalc_coeff, vfloat_U_1, spu_mul(vfloat_V_1, vec_Gv_precalc_coeff));
  21.567 -		const vector float G2a_precalculate = spu_madd(vec_Gu_precalc_coeff, vfloat_U_2, spu_mul(vfloat_V_2, vec_Gv_precalc_coeff));
  21.568 -		const vector float G3a_precalculate = spu_madd(vec_Gu_precalc_coeff, vfloat_U_3, spu_mul(vfloat_V_3, vec_Gv_precalc_coeff));
  21.569 -		const vector float G4a_precalculate = spu_madd(vec_Gu_precalc_coeff, vfloat_U_4, spu_mul(vfloat_V_4, vec_Gv_precalc_coeff));
  21.570 -
  21.571 -		const vector float G1_precalculate = spu_shuffle(G1a_precalculate,  G1a_precalculate, vec_select_floats_upper);
  21.572 -		const vector float G2_precalculate = spu_shuffle(G1a_precalculate,  G1a_precalculate, vec_select_floats_lower);
  21.573 -		const vector float G3_precalculate = spu_shuffle(G2a_precalculate,  G2a_precalculate, vec_select_floats_upper);
  21.574 -		const vector float G4_precalculate = spu_shuffle(G2a_precalculate,  G2a_precalculate, vec_select_floats_lower);
  21.575 -		const vector float G5_precalculate = spu_shuffle(G3a_precalculate,  G3a_precalculate, vec_select_floats_upper);
  21.576 -		const vector float G6_precalculate = spu_shuffle(G3a_precalculate,  G3a_precalculate, vec_select_floats_lower);
  21.577 -		const vector float G7_precalculate = spu_shuffle(G4a_precalculate,  G4a_precalculate, vec_select_floats_upper);
  21.578 -		const vector float G8_precalculate = spu_shuffle(G4a_precalculate,  G4a_precalculate, vec_select_floats_lower);
  21.579 -
  21.580 -
  21.581 -		const vector float B1a_precalculate = spu_mul(vec_B_precalc_coeff, vfloat_U_1);
  21.582 -		const vector float B2a_precalculate = spu_mul(vec_B_precalc_coeff, vfloat_U_2);
  21.583 -		const vector float B3a_precalculate = spu_mul(vec_B_precalc_coeff, vfloat_U_3);
  21.584 -		const vector float B4a_precalculate = spu_mul(vec_B_precalc_coeff, vfloat_U_4);
  21.585 -
  21.586 -		const vector float B1_precalculate = spu_shuffle(B1a_precalculate,  B1a_precalculate, vec_select_floats_upper);
  21.587 -		const vector float B2_precalculate = spu_shuffle(B1a_precalculate,  B1a_precalculate, vec_select_floats_lower);
  21.588 -		const vector float B3_precalculate = spu_shuffle(B2a_precalculate,  B2a_precalculate, vec_select_floats_upper);
  21.589 -		const vector float B4_precalculate = spu_shuffle(B2a_precalculate,  B2a_precalculate, vec_select_floats_lower);
  21.590 -		const vector float B5_precalculate = spu_shuffle(B3a_precalculate,  B3a_precalculate, vec_select_floats_upper);
  21.591 -		const vector float B6_precalculate = spu_shuffle(B3a_precalculate,  B3a_precalculate, vec_select_floats_lower);
  21.592 -		const vector float B7_precalculate = spu_shuffle(B4a_precalculate,  B4a_precalculate, vec_select_floats_upper);
  21.593 -		const vector float B8_precalculate = spu_shuffle(B4a_precalculate,  B4a_precalculate, vec_select_floats_lower);
  21.594 -
  21.595 -
  21.596 -		const vector unsigned int  R_1 = vfloat_to_vuint(spu_add( Y_1, R1_precalculate));
  21.597 -		const vector unsigned int  R_2 = vfloat_to_vuint(spu_add( Y_2, R2_precalculate));
  21.598 -		const vector unsigned int  R_3 = vfloat_to_vuint(spu_add( Y_3, R3_precalculate));
  21.599 -		const vector unsigned int  R_4 = vfloat_to_vuint(spu_add( Y_4, R4_precalculate));
  21.600 -		const vector unsigned int  R_5 = vfloat_to_vuint(spu_add( Y_5, R5_precalculate));
  21.601 -		const vector unsigned int  R_6 = vfloat_to_vuint(spu_add( Y_6, R6_precalculate));
  21.602 -		const vector unsigned int  R_7 = vfloat_to_vuint(spu_add( Y_7, R7_precalculate));
  21.603 -		const vector unsigned int  R_8 = vfloat_to_vuint(spu_add( Y_8, R8_precalculate));
  21.604 -		const vector unsigned int  R_9 = vfloat_to_vuint(spu_add( Y_9, R1_precalculate));
  21.605 -		const vector unsigned int R_10 = vfloat_to_vuint(spu_add(Y_10, R2_precalculate));
  21.606 -		const vector unsigned int R_11 = vfloat_to_vuint(spu_add(Y_11, R3_precalculate));
  21.607 -		const vector unsigned int R_12 = vfloat_to_vuint(spu_add(Y_12, R4_precalculate));
  21.608 -		const vector unsigned int R_13 = vfloat_to_vuint(spu_add(Y_13, R5_precalculate));
  21.609 -		const vector unsigned int R_14 = vfloat_to_vuint(spu_add(Y_14, R6_precalculate));
  21.610 -		const vector unsigned int R_15 = vfloat_to_vuint(spu_add(Y_15, R7_precalculate));
  21.611 -		const vector unsigned int R_16 = vfloat_to_vuint(spu_add(Y_16, R8_precalculate));
  21.612 -
  21.613 -		const vector unsigned int  G_1 = vfloat_to_vuint(spu_add( Y_1, G1_precalculate));
  21.614 -		const vector unsigned int  G_2 = vfloat_to_vuint(spu_add( Y_2, G2_precalculate));
  21.615 -		const vector unsigned int  G_3 = vfloat_to_vuint(spu_add( Y_3, G3_precalculate));
  21.616 -		const vector unsigned int  G_4 = vfloat_to_vuint(spu_add( Y_4, G4_precalculate));
  21.617 -		const vector unsigned int  G_5 = vfloat_to_vuint(spu_add( Y_5, G5_precalculate));
  21.618 -		const vector unsigned int  G_6 = vfloat_to_vuint(spu_add( Y_6, G6_precalculate));
  21.619 -		const vector unsigned int  G_7 = vfloat_to_vuint(spu_add( Y_7, G7_precalculate));
  21.620 -		const vector unsigned int  G_8 = vfloat_to_vuint(spu_add( Y_8, G8_precalculate));
  21.621 -		const vector unsigned int  G_9 = vfloat_to_vuint(spu_add( Y_9, G1_precalculate));
  21.622 -		const vector unsigned int G_10 = vfloat_to_vuint(spu_add(Y_10, G2_precalculate));
  21.623 -		const vector unsigned int G_11 = vfloat_to_vuint(spu_add(Y_11, G3_precalculate));
  21.624 -		const vector unsigned int G_12 = vfloat_to_vuint(spu_add(Y_12, G4_precalculate));
  21.625 -		const vector unsigned int G_13 = vfloat_to_vuint(spu_add(Y_13, G5_precalculate));
  21.626 -		const vector unsigned int G_14 = vfloat_to_vuint(spu_add(Y_14, G6_precalculate));
  21.627 -		const vector unsigned int G_15 = vfloat_to_vuint(spu_add(Y_15, G7_precalculate));
  21.628 -		const vector unsigned int G_16 = vfloat_to_vuint(spu_add(Y_16, G8_precalculate));
  21.629 -
  21.630 -		const vector unsigned int  B_1 = vfloat_to_vuint(spu_add( Y_1, B1_precalculate));
  21.631 -		const vector unsigned int  B_2 = vfloat_to_vuint(spu_add( Y_2, B2_precalculate));
  21.632 -		const vector unsigned int  B_3 = vfloat_to_vuint(spu_add( Y_3, B3_precalculate));
  21.633 -		const vector unsigned int  B_4 = vfloat_to_vuint(spu_add( Y_4, B4_precalculate));
  21.634 -		const vector unsigned int  B_5 = vfloat_to_vuint(spu_add( Y_5, B5_precalculate));
  21.635 -		const vector unsigned int  B_6 = vfloat_to_vuint(spu_add( Y_6, B6_precalculate));
  21.636 -		const vector unsigned int  B_7 = vfloat_to_vuint(spu_add( Y_7, B7_precalculate));
  21.637 -		const vector unsigned int  B_8 = vfloat_to_vuint(spu_add( Y_8, B8_precalculate));
  21.638 -		const vector unsigned int  B_9 = vfloat_to_vuint(spu_add( Y_9, B1_precalculate));
  21.639 -		const vector unsigned int B_10 = vfloat_to_vuint(spu_add(Y_10, B2_precalculate));
  21.640 -		const vector unsigned int B_11 = vfloat_to_vuint(spu_add(Y_11, B3_precalculate));
  21.641 -		const vector unsigned int B_12 = vfloat_to_vuint(spu_add(Y_12, B4_precalculate));
  21.642 -		const vector unsigned int B_13 = vfloat_to_vuint(spu_add(Y_13, B5_precalculate));
  21.643 -		const vector unsigned int B_14 = vfloat_to_vuint(spu_add(Y_14, B6_precalculate));
  21.644 -		const vector unsigned int B_15 = vfloat_to_vuint(spu_add(Y_15, B7_precalculate));
  21.645 -		const vector unsigned int B_16 = vfloat_to_vuint(spu_add(Y_16, B8_precalculate));
  21.646 -
  21.647 -		*((vector unsigned int*)(bgra_addr + x)) = spu_or(spu_or(vec_alpha,  B_1), spu_or(spu_slqwbyte( R_1, 2),spu_slqwbyte(G_1, 1)));
  21.648 -		*((vector unsigned int*)(bgra_addr + x + 4)) = spu_or(spu_or(vec_alpha,  B_2), spu_or(spu_slqwbyte( R_2, 2),spu_slqwbyte(G_2, 1)));
  21.649 -		*((vector unsigned int*)(bgra_addr + x + 8)) = spu_or(spu_or(vec_alpha,  B_3), spu_or(spu_slqwbyte( R_3, 2),spu_slqwbyte(G_3, 1)));
  21.650 -		*((vector unsigned int*)(bgra_addr + x + 12)) = spu_or(spu_or(vec_alpha,  B_4), spu_or(spu_slqwbyte( R_4, 2),spu_slqwbyte(G_4, 1)));
  21.651 -		*((vector unsigned int*)(bgra_addr + x + 16)) = spu_or(spu_or(vec_alpha,  B_5), spu_or(spu_slqwbyte( R_5, 2),spu_slqwbyte(G_5, 1)));
  21.652 -		*((vector unsigned int*)(bgra_addr + x + 20)) = spu_or(spu_or(vec_alpha,  B_6), spu_or(spu_slqwbyte( R_6, 2),spu_slqwbyte(G_6, 1)));
  21.653 -		*((vector unsigned int*)(bgra_addr + x + 24)) = spu_or(spu_or(vec_alpha,  B_7), spu_or(spu_slqwbyte( R_7, 2),spu_slqwbyte(G_7, 1)));
  21.654 -		*((vector unsigned int*)(bgra_addr + x + 28)) = spu_or(spu_or(vec_alpha,  B_8), spu_or(spu_slqwbyte( R_8, 2),spu_slqwbyte(G_8, 1)));
  21.655 -		*((vector unsigned int*)(bgra_addr + x + width)) = spu_or(spu_or(vec_alpha,  B_9), spu_or(spu_slqwbyte( R_9, 2),spu_slqwbyte(G_9, 1)));
  21.656 -		*((vector unsigned int*)(bgra_addr + x + width + 4)) = spu_or(spu_or(vec_alpha, B_10), spu_or(spu_slqwbyte(R_10, 2),spu_slqwbyte(G_10, 1)));
  21.657 -		*((vector unsigned int*)(bgra_addr + x + width + 8)) = spu_or(spu_or(vec_alpha, B_11), spu_or(spu_slqwbyte(R_11, 2),spu_slqwbyte(G_11, 1)));
  21.658 -		*((vector unsigned int*)(bgra_addr + x + width + 12)) = spu_or(spu_or(vec_alpha, B_12), spu_or(spu_slqwbyte(R_12, 2),spu_slqwbyte(G_12, 1)));
  21.659 -		*((vector unsigned int*)(bgra_addr + x + width + 16)) = spu_or(spu_or(vec_alpha, B_13), spu_or(spu_slqwbyte(R_13, 2),spu_slqwbyte(G_13, 1)));
  21.660 -		*((vector unsigned int*)(bgra_addr + x + width + 20)) = spu_or(spu_or(vec_alpha, B_14), spu_or(spu_slqwbyte(R_14, 2),spu_slqwbyte(G_14, 1)));
  21.661 -		*((vector unsigned int*)(bgra_addr + x + width + 24)) = spu_or(spu_or(vec_alpha, B_15), spu_or(spu_slqwbyte(R_15, 2),spu_slqwbyte(G_15, 1)));
  21.662 -		*((vector unsigned int*)(bgra_addr + x + width + 28)) = spu_or(spu_or(vec_alpha, B_16), spu_or(spu_slqwbyte(R_16, 2),spu_slqwbyte(G_16, 1)));
  21.663 -	}
  21.664 -}
  21.665 -