src/render/SDL_yuv_mmx.c
changeset 11150 2ee7d2fa299b
parent 10737 3406a0f8b041
child 11156 5ba02f3c5a8b
     1.1 --- a/src/render/SDL_yuv_mmx.c	Thu Jul 20 10:46:38 2017 -0700
     1.2 +++ b/src/render/SDL_yuv_mmx.c	Thu Jul 20 10:48:57 2017 -0700
     1.3 @@ -91,22 +91,11 @@
     1.4      mod = (mod+cols+mod)*4;               /* increment for row1 in byte */
     1.5  
     1.6      __asm__ __volatile__ (
     1.7 -        /* tap dance to workaround the inability to use %%ebx at will... */
     1.8 -        /*  move one thing to the stack... */
     1.9 -        "pushl $0\n"  /* save a slot on the stack. */
    1.10 -        "pushl %%ebx\n"  /* save %%ebx. */
    1.11 -        "movl %0, %%ebx\n"  /* put the thing in ebx. */
    1.12 -        "movl %%ebx,4(%%esp)\n"  /* put the thing in the stack slot. */
    1.13 -        "popl %%ebx\n"  /* get back %%ebx (the PIC register). */
    1.14 -
    1.15          ".align 8\n"
    1.16          "1:\n"
    1.17  
    1.18          /* create Cr (result in mm1) */
    1.19 -        "pushl %%ebx\n"
    1.20 -        "movl 4(%%esp),%%ebx\n"
    1.21 -        "movd (%%ebx),%%mm1\n"   /*         0  0  0  0  v3 v2 v1 v0 */
    1.22 -        "popl %%ebx\n"
    1.23 +        "movd (%0),%%mm1\n"   /*         0  0  0  0  v3 v2 v1 v0 */
    1.24          "pxor %%mm7,%%mm7\n"      /*         00 00 00 00 00 00 00 00 */
    1.25          "movd (%2), %%mm2\n"           /*    0  0  0  0 l3 l2 l1 l0 */
    1.26          "punpcklbw %%mm7,%%mm1\n" /*         0  v3 0  v2 00 v1 00 v0 */
    1.27 @@ -214,7 +203,7 @@
    1.28          "addl $4,%2\n"            /* lum+4 */
    1.29          "leal 16(%3),%3\n"        /* row1+16 */
    1.30          "leal 16(%5),%5\n"        /* row2+16 */
    1.31 -        "addl $2,(%%esp)\n"        /* cr+2 */
    1.32 +        "addl $2,%0\n"        /* cr+2 */
    1.33          "addl $2,%1\n"           /* cb+2 */
    1.34  
    1.35          "addl $4,%6\n"            /* x+4 */
    1.36 @@ -228,10 +217,9 @@
    1.37          "cmpl %7,%2\n"
    1.38          "jl 1b\n"
    1.39  
    1.40 -        "addl $4,%%esp\n"  /* get rid of the stack slot we reserved. */
    1.41          "emms\n"  /* reset MMX registers. */
    1.42          :
    1.43 -        : "m" (cr), "r"(cb),"r"(lum),
    1.44 +        : "r" (cr), "r"(cb),"r"(lum),
    1.45            "r"(row1),"r"(cols),"r"(row2),"m"(x),"m"(y),"m"(mod),
    1.46            "m"(MMX_0080w),"m"(MMX_VgrnRGB),"m"(MMX_VredRGB),
    1.47            "m"(MMX_FF00w),"m"(MMX_00FFw),"m"(MMX_UgrnRGB),
    1.48 @@ -254,23 +242,12 @@
    1.49      mod = (mod+cols+mod)*2;               /* increment for row1 in byte */
    1.50  
    1.51      __asm__ __volatile__(
    1.52 -        /* tap dance to workaround the inability to use %%ebx at will... */
    1.53 -        /*  move one thing to the stack... */
    1.54 -        "pushl $0\n"  /* save a slot on the stack. */
    1.55 -        "pushl %%ebx\n"  /* save %%ebx. */
    1.56 -        "movl %0, %%ebx\n"  /* put the thing in ebx. */
    1.57 -        "movl %%ebx, 4(%%esp)\n"  /* put the thing in the stack slot. */
    1.58 -        "popl %%ebx\n"  /* get back %%ebx (the PIC register). */
    1.59 -
    1.60          ".align 8\n"
    1.61          "1:\n"
    1.62  
    1.63          "movd           (%1),                   %%mm0\n" /* 4 Cb         0  0  0  0 u3 u2 u1 u0 */
    1.64          "pxor           %%mm7,                  %%mm7\n"
    1.65 -        "pushl %%ebx\n"
    1.66 -        "movl 4(%%esp), %%ebx\n"
    1.67 -        "movd (%%ebx), %%mm1\n"   /* 4 Cr                0  0  0  0 v3 v2 v1 v0 */
    1.68 -        "popl %%ebx\n"
    1.69 +        "movd (%0), %%mm1\n"   /* 4 Cr                0  0  0  0 v3 v2 v1 v0 */
    1.70  
    1.71          "punpcklbw      %%mm7,                  %%mm0\n" /* 4 W cb   0 u3  0 u2  0 u1  0 u0 */
    1.72          "punpcklbw      %%mm7,                  %%mm1\n" /* 4 W cr   0 v3  0 v2  0 v1  0 v0 */
    1.73 @@ -400,7 +377,7 @@
    1.74  
    1.75          "addl           $8,                     %6\n"
    1.76          "addl           $8,                     %2\n"
    1.77 -        "addl           $4,                     (%%esp)\n"
    1.78 +        "addl           $4,                     %0\n"
    1.79          "addl           $4,                     %1\n"
    1.80          "cmpl           %4,                     %6\n"
    1.81          "leal           16(%3),                 %3\n"
    1.82 @@ -413,10 +390,9 @@
    1.83          "movl           $0,     %6\n" /* x=0 */
    1.84          "cmpl           %7,     %2\n"
    1.85          "jl             1b\n"
    1.86 -        "addl $4, %%esp\n"  /* get rid of the stack slot we reserved. */
    1.87          "emms\n"
    1.88          :
    1.89 -        : "m" (cr), "r"(cb),"r"(lum),
    1.90 +        : "r" (cr), "r"(cb),"r"(lum),
    1.91            "r"(row1),"r"(cols),"r"(row2),"m"(x),"m"(y),"m"(mod),
    1.92            "m"(MMX_0080w),"m"(MMX_Ugrn565),"m"(MMX_Ublu5x5),
    1.93            "m"(MMX_00FFw),"m"(MMX_Vgrn565),"m"(MMX_Vred5x5),