diff --git a/configure.in b/configure.in index e8a6191..22d518c 100644 --- a/configure.in +++ b/configure.in @@ -760,6 +760,41 @@ PIXMAN_CFLAGS="$PIXMAN_CFLAGS $MMX_CFLAGS" AM_CONDITIONAL(USE_MMX, test $have_mmx_intrinsics = yes) dnl =========================================================================== +dnl Check for VMX/Altivec +if test -n "`$CC -v 2>&1 | grep version | grep Apple`"; then + VMX_CFLAGS="-faltivec" +else + VMX_CFLAGS="-maltivec -mabi=altivec" +fi + +have_vmx_intrinsics=no +AC_MSG_CHECKING(For VMX/Altivec intrinsics in the compiler) +xserver_save_CFLAGS=$CFLAGS +CFLAGS="$CFLAGS $VMX_CFLAGS" +AC_COMPILE_IFELSE([ +#if defined(__GNUC__) && (__GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 4)) +#error "Need GCC >= 3.4 for sane altivec support" +#endif +#include +int main () { + vector unsigned int v = vec_splat_u32 (1); + v = vec_sub (v, v); + return 0; +}], have_vmx_intrinsics=yes) +CFLAGS=$xserver_save_CFLAGS +AC_MSG_RESULT($have_vmx_intrinsics) + +if test $have_vmx_intrinsics = yes ; then + AC_DEFINE(USE_VMX, 1, [use VMX compiler intrinsics]) +else + VMX_CFLAGS= +fi +AC_SUBST(VMX_CFLAGS) + +AM_CONDITIONAL(USE_VMX, test $have_vmx_intrinsics = yes) + +dnl =========================================================================== + AC_ARG_ENABLE(gcov, AS_HELP_STRING([--enable-gcov], diff --git a/pixman/configure.in b/pixman/configure.in index 4cbb5ba..915d972 100644 --- a/pixman/configure.in +++ b/pixman/configure.in @@ -84,6 +84,40 @@ AC_SUBST(MMX_CFLAGS) AM_CONDITIONAL(USE_MMX, test $have_mmx_intrinsics = yes) dnl =========================================================================== +dnl Check for VMX/Altivec +if test -n "`$CC -v 2>&1 | grep version | grep Apple`"; then + VMX_CFLAGS="-faltivec" +else + VMX_CFLAGS="-maltivec -mabi=altivec" +fi + +have_vmx_intrinsics=no +AC_MSG_CHECKING(For VMX/Altivec intrinsics in the compiler) +xserver_save_CFLAGS=$CFLAGS +CFLAGS="$CFLAGS $VMX_CFLAGS" +AC_COMPILE_IFELSE([ +#if defined(__GNUC__) && (__GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 4)) +#error "Need GCC >= 3.4 for sane altivec support" +#endif +#include +int main () { + vector unsigned int v = vec_splat_u32 (1); + v = vec_sub (v, v); + return 0; +}], have_vmx_intrinsics=yes) +CFLAGS=$xserver_save_CFLAGS +AC_MSG_RESULT($have_vmx_intrinsics) + +if test $have_vmx_intrinsics = yes ; then + AC_DEFINE(USE_VMX, 1, [use VMX compiler intrinsics]) +else + VMX_CFLAGS= +fi +AC_SUBST(VMX_CFLAGS) + +AM_CONDITIONAL(USE_VMX, test $have_vmx_intrinsics = yes) + +dnl =========================================================================== AC_OUTPUT([ libpixman.pc diff --git a/pixman/src/Makefile.am b/pixman/src/Makefile.am index 106f60b..39c0dbd 100644 --- a/pixman/src/Makefile.am +++ b/pixman/src/Makefile.am @@ -42,5 +42,15 @@ libpixman_mmx_la_LIBADD = $(PIXMAN_LDADD) libpixman_la_LIBADD += libpixman-mmx.la endif +if USE_VMX +noinst_LTLIBRARIES += libpixman-vmx.la +libpixman_vmx_la_SOURCES = \ + fbvmx.c \ + fbvmx.h +libpixman_vmx_la_CFLAGS = $(VMX_CFLAGS) +libpixman_vmx_la_LIBADD = $(VMX_LDADD) +libpixman_la_LIBADD += libpixman-vmx.la +endif + TESTS = check-pixmanint.sh EXTRA_DIST = $(TESTS) diff --git a/pixman/src/fbpict.c b/pixman/src/fbpict.c index 2d71402..6e521b4 100644 --- a/pixman/src/fbpict.c +++ b/pixman/src/fbpict.c @@ -29,6 +29,7 @@ #include "fbpict.h" #include "fbmmx.h" +#include "fbvmx.h" static CARD32 fbOver (CARD32 x, CARD32 y) @@ -1410,6 +1411,14 @@ pixman_composite (pixman_operator_t op, } #endif +#ifdef USE_VMX + static Bool vmx_setup = FALSE; + if (!vmx_setup) { + fbComposeSetupVMX(); + vmx_setup = TRUE; + } +#endif + xDst += pDst->pDrawable->x; yDst += pDst->pDrawable->y; if (pSrc->pDrawable) { @@ -2004,6 +2013,42 @@ CLEANUP_REGION: } /* The CPU detection code needs to be in a file not compiled with + * "-maltivec -mabi=altivec", as gcc would try to save vector register + * across function calls causing SIGILL on cpus without Altivec/vmx. + */ +#ifdef USE_VMX + +#include +#include + +static sigjmp_buf jmp; +static volatile sig_atomic_t in_test = 0; + +static void vmx_test (int sig) { + if (!in_test) { + signal(sig, SIG_DFL); + raise (sig); + } + in_test = 0; + siglongjmp (jmp, 1); +} + +pixman_private +Bool fbHaveVMX(void) { + signal (SIGILL, vmx_test); + if (sigsetjmp (jmp, 1)) { + signal (SIGILL, SIG_DFL); + } else { + in_test = 1; + asm volatile ( "vor 0, 0, 0" ); + signal (SIGILL, SIG_DFL); + return 1; + } + return 0; +} +#endif //USE_VMX + +/* The CPU detection code needs to be in a file not compiled with * "-mmmx -msse", as gcc would generate CMOV instructions otherwise * that would lead to SIGILL instructions on old CPUs that don't have * it. diff --git a/pixman/src/fbvmx.c b/pixman/src/fbvmx.c new file mode 100644 index 0000000..b6003e2 --- /dev/null +++ b/pixman/src/fbvmx.c @@ -0,0 +1,1107 @@ +/* + * Copyright © 2006 Luca Barbato + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of Red Hat not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. Red Hat makes no representations about the + * suitability of this software for any purpose. It is provided "as is" + * without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS + * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN + * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS + * SOFTWARE. + * + * Author: Luca Barbato (lu_zero@gentoo.org) + * + * Based on fbmmx.c by Owen Taylor, Søren Sandmann and Nicholas Miell + */ + +#include "fbpict.h" +#include "fbvmx.h" +#include +//#include + +static __inline__ vector unsigned int +splat_alpha(vector unsigned int pix) { + return vec_perm(pix, pix, + (vector unsigned char)AVV(0x00,0x00,0x00,0x00, 0x04,0x04,0x04,0x04, + 0x08,0x08,0x08,0x08, 0x0C,0x0C,0x0C,0x0C)); +} + +static __inline__ vector unsigned int +pix_multiply(vector unsigned int p, vector unsigned int a) +{ + vector unsigned short hi, lo, mod; + /* unpack to short */ + hi = (vector unsigned short) + vec_mergeh((vector unsigned char)AVV(0), + (vector unsigned char)p); + mod = (vector unsigned short) + vec_mergeh((vector unsigned char)AVV(0), + (vector unsigned char)a); + + hi = vec_mladd(hi, mod, (vector unsigned short) + AVV(0x0080,0x0080,0x0080,0x0080, + 0x0080,0x0080,0x0080,0x0080)); + + hi = vec_adds(hi, vec_sr(hi, vec_splat_u16(8))); + + hi = vec_sr(hi, vec_splat_u16(8)); + + /* unpack to short */ + lo = (vector unsigned short) + vec_mergel((vector unsigned char)AVV(0), + (vector unsigned char)p); + mod = (vector unsigned short) + vec_mergel((vector unsigned char)AVV(0), + (vector unsigned char)a); + + lo = vec_mladd(lo, mod, (vector unsigned short) + AVV(0x0080,0x0080,0x0080,0x0080, + 0x0080,0x0080,0x0080,0x0080)); + + lo = vec_adds(lo, vec_sr(lo, vec_splat_u16(8))); + + lo = vec_sr(lo, vec_splat_u16(8)); + + return (vector unsigned int)vec_packsu(hi, lo); +} + +static __inline__ vector unsigned int +pix_add (vector unsigned int a, vector unsigned int b) +{ + return (vector unsigned int)vec_adds ((vector unsigned char)a, + (vector unsigned char)b); +} + +static __inline__ vector unsigned int +pix_add_mul (vector unsigned int x, vector unsigned int a, + vector unsigned int y, vector unsigned int b) +{ + vector unsigned short hi, lo, mod, hiy, loy, mody; + + hi = (vector unsigned short) + vec_mergeh((vector unsigned char)AVV(0), + (vector unsigned char)x); + mod = (vector unsigned short) + vec_mergeh((vector unsigned char)AVV(0), + (vector unsigned char)a); + hiy = (vector unsigned short) + vec_mergeh((vector unsigned char)AVV(0), + (vector unsigned char)y); + mody = (vector unsigned short) + vec_mergeh((vector unsigned char)AVV(0), + (vector unsigned char)b); + + hi = vec_mladd(hi, mod, (vector unsigned short) + AVV(0x0080,0x0080,0x0080,0x0080, + 0x0080,0x0080,0x0080,0x0080)); + + hi = vec_mladd(hiy, mody, hi); + + hi = vec_adds(hi, vec_sr(hi, vec_splat_u16(8))); + + hi = vec_sr(hi, vec_splat_u16(8)); + + lo = (vector unsigned short) + vec_mergel((vector unsigned char)AVV(0), + (vector unsigned char)x); + mod = (vector unsigned short) + vec_mergel((vector unsigned char)AVV(0), + (vector unsigned char)a); + + loy = (vector unsigned short) + vec_mergel((vector unsigned char)AVV(0), + (vector unsigned char)y); + mody = (vector unsigned short) + vec_mergel((vector unsigned char)AVV(0), + (vector unsigned char)b); + + lo = vec_mladd(lo, mod, (vector unsigned short) + AVV(0x0080,0x0080,0x0080,0x0080, + 0x0080,0x0080,0x0080,0x0080)); + + lo = vec_mladd(loy, mody, lo); + + lo = vec_adds(lo, vec_sr(lo, vec_splat_u16(8))); + + lo = vec_sr(lo, vec_splat_u16(8)); + + return (vector unsigned int)vec_packsu(hi, lo); +} + +static __inline__ vector unsigned int +negate (vector unsigned int src) +{ + return vec_nor (src, src); +} +// dest*~srca + src +static __inline__ vector unsigned int +over (vector unsigned int src, vector unsigned int srca, + vector unsigned int dest) +{ + vector unsigned char tmp = (vector unsigned char) + pix_multiply(dest, negate (srca)); + tmp = vec_adds((vector unsigned char)src, tmp); + return (vector unsigned int)tmp; +} + +// in == pix_multiply +#define in_over(src, srca, mask, dest) over(pix_multiply(src, mask),\ + pix_multiply(srca, mask), dest) + + +#define COMPUTE_SHIFT_MASK(source) \ + source ## _mask = vec_lvsl(0, source); + +#define COMPUTE_SHIFT_MASKS(dest, source) \ + dest ## _mask = vec_lvsl(0, dest); \ + source ## _mask = vec_lvsl(0, source); \ + store_mask = vec_lvsr(0, dest); + +#define COMPUTE_SHIFT_MASKC(dest, source, mask) \ + mask ## _mask = vec_lvsl(0, mask); \ + dest ## _mask = vec_lvsl(0, dest); \ + source ## _mask = vec_lvsl(0, source); \ + store_mask = vec_lvsr(0, dest); + +// notice you have to declare temp vars... +// Note: tmp3 and tmp4 must remain untouched! +#define LOAD_VECTOR (source) \ + tmp1 = (typeof(v ## source))vec_ld(0, source); \ + tmp2 = (typeof(v ## source))vec_ld(15, source); \ + v ## source = (typeof(v ## source)) \ + vec_perm(tmp1, tmp2, source ## _mask); + +#define LOAD_VECTORS(dest, source) \ + tmp1 = (typeof(tmp1))vec_ld(0, source); \ + tmp2 = (typeof(tmp2))vec_ld(15, source); \ + tmp3 = (typeof(tmp3))vec_ld(0, dest); \ + v ## source = (typeof(v ## source)) \ + vec_perm(tmp1, tmp2, source ## _mask); \ + tmp4 = (typeof(tmp4))vec_ld(15, dest); \ + v ## dest = (typeof(v ## dest)) \ + vec_perm(tmp3, tmp4, dest ## _mask); + +#define LOAD_VECTORSC(dest, source, mask) \ + tmp1 = (typeof(tmp1))vec_ld(0, source); \ + tmp2 = (typeof(tmp2))vec_ld(15, source); \ + tmp3 = (typeof(tmp3))vec_ld(0, dest); \ + v ## source = (typeof(v ## source)) \ + vec_perm(tmp1, tmp2, source ## _mask); \ + tmp4 = (typeof(tmp4))vec_ld(15, dest); \ + tmp1 = (typeof(tmp1))vec_ld(0, mask); \ + v ## dest = (typeof(v ## dest)) \ + vec_perm(tmp3, tmp4, dest ## _mask); \ + tmp2 = (typeof(tmp2))vec_ld(15, mask); \ + v ## mask = (typeof(v ## mask)) \ + vec_perm(tmp1, tmp2, mask ## _mask); + +#define STORE_VECTOR(dest) \ + edges = (typeof(edges))vec_perm(tmp4, tmp3, dest ## _mask); \ + tmp3 = (typeof(tmp3)) \ + vec_perm((vector unsigned char)v ## dest, edges, store_mask); \ + tmp1 = (typeof(tmp1)) \ + vec_perm(edges, (vector unsigned char)v ## dest, store_mask); \ + vec_st((vector unsigned int) tmp3, 15, dest ); \ + vec_st((vector unsigned int) tmp1, 0, dest ); + +static FASTCALL void +vmxCombineMaskU (CARD32 *src, const CARD32 *msk, int width) +{ + int i; + vector unsigned int vsrc, vmsk; + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, + src_mask, msk_mask, store_mask; + + COMPUTE_SHIFT_MASKS(src, msk) + + //printf("%s\n",__PRETTY_FUNCTION__); + for (i = width/4; i > 0; i--) { + + LOAD_VECTORS(src, msk) + + vsrc = pix_multiply(vsrc, splat_alpha(vmsk)); + + STORE_VECTOR(src) + + msk+=4; + src+=4; + } + + for (i = width%4; --i >= 0;) { + CARD32 a = msk[i] >> 24; + CARD32 s = src[i]; + FbByteMul(s, a); + src[i] = s; + } +} + +static FASTCALL void +vmxCombineOverU (CARD32 *dest, const CARD32 *src, int width) +{ + int i; + vector unsigned int vdest, vsrc; + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, + dest_mask, src_mask, store_mask; + + COMPUTE_SHIFT_MASKS(dest, src) + + //printf("%s\n",__PRETTY_FUNCTION__); + for (i = width/4; i > 0; i--) { + + LOAD_VECTORS(dest, src) + + vdest = over(vsrc, splat_alpha(vsrc), vdest); + + STORE_VECTOR(dest) + + src+=4; + dest+=4; + } + + for (i = width%4; --i >=0;) { + CARD32 s = src[i]; + CARD32 d = dest[i]; + CARD32 ia = Alpha(~s); + + FbByteMulAdd(d, ia, s); + dest[i] = d; + } +} + + +static FASTCALL void +vmxCombineOverReverseU (CARD32 *dest, const CARD32 *src, int width) +{ + int i; + vector unsigned int vdest, vsrc; + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, + dest_mask, src_mask, store_mask; + + COMPUTE_SHIFT_MASKS(dest, src) + + //printf("%s\n",__PRETTY_FUNCTION__); + for (i = width/4; i > 0; i--) { + + LOAD_VECTORS(dest, src) + + vdest = over(vdest, splat_alpha(vdest) , vsrc); + + STORE_VECTOR(dest) + + src+=4; + dest+=4; + } + + for (i = width%4; --i >=0;) { + CARD32 s = src[i]; + CARD32 d = dest[i]; + CARD32 ia = Alpha(~dest[i]); + + FbByteMulAdd(s, ia, d); + dest[i] = s; + } +} + +static FASTCALL void +vmxCombineInU (CARD32 *dest, const CARD32 *src, int width) +{ + int i; + vector unsigned int vdest, vsrc; + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, + dest_mask, src_mask, store_mask; + + COMPUTE_SHIFT_MASKS(dest, src) + + //printf("%s\n",__PRETTY_FUNCTION__); + for (i = width/4; i > 0; i--) { + + LOAD_VECTORS(dest, src) + + vdest = pix_multiply(vsrc, splat_alpha(vdest)); + + STORE_VECTOR(dest) + + src+=4; + dest+=4; + } + + for (i = width%4; --i >=0;) { + + CARD32 s = src[i]; + CARD32 a = Alpha(dest[i]); + FbByteMul(s, a); + dest[i] = s; + } +} + +static FASTCALL void +vmxCombineInReverseU (CARD32 *dest, const CARD32 *src, int width) +{ + int i; + vector unsigned int vdest, vsrc; + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, + dest_mask, src_mask, store_mask; + + COMPUTE_SHIFT_MASKS(dest, src) + + //printf("%s\n",__PRETTY_FUNCTION__); + for (i = width/4; i > 0; i--) { + + LOAD_VECTORS(dest, src) + + vdest = pix_multiply(vdest, splat_alpha(vsrc)); + + STORE_VECTOR(dest) + + src+=4; + dest+=4; + } + + for (i = width%4; --i >=0;) { + CARD32 d = dest[i]; + CARD32 a = Alpha(src[i]); + FbByteMul(d, a); + dest[i] = d; + } +} + +static FASTCALL void +vmxCombineOutU (CARD32 *dest, const CARD32 *src, int width) +{ + int i; + vector unsigned int vdest, vsrc; + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, + dest_mask, src_mask, store_mask; + + COMPUTE_SHIFT_MASKS(dest, src) + + //printf("%s\n",__PRETTY_FUNCTION__); + for (i = width/4; i > 0; i--) { + + LOAD_VECTORS(dest, src) + + vdest = pix_multiply(vsrc, splat_alpha(negate(vdest))); + + STORE_VECTOR(dest) + + src+=4; + dest+=4; + } + + for (i = width%4; --i >=0;) { + CARD32 s = src[i]; + CARD32 a = Alpha(~dest[i]); + FbByteMul(s, a); + dest[i] = s; + } +} + +static FASTCALL void +vmxCombineOutReverseU (CARD32 *dest, const CARD32 *src, int width) +{ + int i; + vector unsigned int vdest, vsrc; + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, + dest_mask, src_mask, store_mask; + + COMPUTE_SHIFT_MASKS(dest, src) + + //printf("%s\n",__PRETTY_FUNCTION__); + for (i = width/4; i > 0; i--) { + + LOAD_VECTORS(dest, src) + + vdest = pix_multiply(vdest, splat_alpha(negate(vsrc))); + + STORE_VECTOR(dest) + + src+=4; + dest+=4; + } + + for (i = width%4; --i >=0;) { + CARD32 d = dest[i]; + CARD32 a = Alpha(~src[i]); + FbByteMul(d, a); + dest[i] = d; + } +} + +static FASTCALL void +vmxCombineAtopU (CARD32 *dest, const CARD32 *src, int width) +{ + int i; + vector unsigned int vdest, vsrc; + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, + dest_mask, src_mask, store_mask; + + COMPUTE_SHIFT_MASKS(dest, src) + + //printf("%s\n",__PRETTY_FUNCTION__); + for (i = width/4; i > 0; i--) { + + LOAD_VECTORS(dest, src) + + vdest = pix_add_mul(vsrc, splat_alpha(vdest), + vdest, splat_alpha(negate(vsrc))); + + STORE_VECTOR(dest) + + src+=4; + dest+=4; + } + + for (i = width%4; --i >=0;) { + CARD32 s = src[i]; + CARD32 d = dest[i]; + CARD32 dest_a = Alpha(d); + CARD32 src_ia = Alpha(~s); + + FbByteAddMul(s, dest_a, d, src_ia); + dest[i] = s; + } +} + +static FASTCALL void +vmxCombineAtopReverseU (CARD32 *dest, const CARD32 *src, int width) +{ + int i; + vector unsigned int vdest, vsrc; + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, + dest_mask, src_mask, store_mask; + + COMPUTE_SHIFT_MASKS(dest, src) + + //printf("%s\n",__PRETTY_FUNCTION__); + for (i = width/4; i > 0; i--) { + + LOAD_VECTORS(dest, src) + + vdest = pix_add_mul(vdest, splat_alpha(vsrc), + vsrc, splat_alpha(negate(vdest))); + + STORE_VECTOR(dest) + + src+=4; + dest+=4; + } + + for (i = width%4; --i >=0;) { + CARD32 s = src[i]; + CARD32 d = dest[i]; + CARD32 src_a = Alpha(s); + CARD32 dest_ia = Alpha(~d); + + FbByteAddMul(s, dest_ia, d, src_a); + dest[i] = s; + } +} + +static FASTCALL void +vmxCombineXorU (CARD32 *dest, const CARD32 *src, int width) +{ + int i; + vector unsigned int vdest, vsrc; + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, + dest_mask, src_mask, store_mask; + + COMPUTE_SHIFT_MASKS(dest, src) + + //printf("%s\n",__PRETTY_FUNCTION__); + for (i = width/4; i > 0; i--) { + + LOAD_VECTORS(dest, src) + + vdest = pix_add_mul(vsrc, splat_alpha(negate(vdest)), + vdest, splat_alpha(negate(vsrc))); + + STORE_VECTOR(dest) + + src+=4; + dest+=4; + } + + for (i = width%4; --i >=0;) { + CARD32 s = src[i]; + CARD32 d = dest[i]; + CARD32 src_ia = Alpha(~s); + CARD32 dest_ia = Alpha(~d); + + FbByteAddMul(s, dest_ia, d, src_ia); + dest[i] = s; + } +} + +static FASTCALL void +vmxCombineAddU (CARD32 *dest, const CARD32 *src, int width) +{ + int i; + vector unsigned int vdest, vsrc; + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, + dest_mask, src_mask, store_mask; + + COMPUTE_SHIFT_MASKS(dest, src) + //printf("%s\n",__PRETTY_FUNCTION__); + for (i = width/4; i > 0; i--) { + + LOAD_VECTORS(dest, src) + + vdest = pix_add(vsrc, vdest); + + STORE_VECTOR(dest) + + src+=4; + dest+=4; + } + + for (i = width%4; --i >=0;) { + CARD32 s = src[i]; + CARD32 d = dest[i]; + FbByteAdd(d, s); + dest[i] = d; + } +} + +static FASTCALL void +vmxCombineSrcC (CARD32 *dest, CARD32 *src, CARD32 *mask, int width) +{ + int i; + vector unsigned int vdest, vsrc, vmask; + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, + dest_mask, mask_mask, src_mask, store_mask; + + COMPUTE_SHIFT_MASKC(dest, src, mask); + //printf("%s\n",__PRETTY_FUNCTION__); + for (i = width/4; i > 0; i--) { + + LOAD_VECTORSC(dest, src, mask) + + vdest = pix_multiply(vsrc, vmask); + + STORE_VECTOR(dest) + + mask+=4; + src+=4; + dest+=4; + } + + for (i = width%4; --i >=0;) { + CARD32 a = mask[i]; + CARD32 s = src[i]; + FbByteMulC(s, a); + dest[i] = s; + } +} + +static FASTCALL void +vmxCombineOverC (CARD32 *dest, CARD32 *src, CARD32 *mask, int width) +{ + int i; + vector unsigned int vdest, vsrc, vmask; + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, + dest_mask, mask_mask, src_mask, store_mask; + + COMPUTE_SHIFT_MASKC(dest, src, mask); + //printf("%s\n",__PRETTY_FUNCTION__); + for (i = width/4; i > 0; i--) { + + LOAD_VECTORSC(dest, src, mask) + + vdest = in_over(vsrc, splat_alpha(vsrc), vmask, vdest); + + STORE_VECTOR(dest) + + mask+=4; + src+=4; + dest+=4; + } + + for (i = width%4; --i >=0;) { + CARD32 a = mask[i]; + CARD32 s = src[i]; + CARD32 d = dest[i]; + FbByteMulC(s, a); + FbByteMulAddC(d, ~a, s); + dest[i] = d; + } +} + +static FASTCALL void +vmxCombineOverReverseC (CARD32 *dest, CARD32 *src, CARD32 *mask, int width) +{ + int i; + vector unsigned int vdest, vsrc, vmask; + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, + dest_mask, mask_mask, src_mask, store_mask; + + COMPUTE_SHIFT_MASKC(dest, src, mask); + //printf("%s\n",__PRETTY_FUNCTION__); + for (i = width/4; i > 0; i--) { + + LOAD_VECTORSC(dest, src, mask) + + vdest = over(vdest, splat_alpha(vdest), pix_multiply(vsrc, vmask)); + + STORE_VECTOR(dest) + + mask+=4; + src+=4; + dest+=4; + } + + for (i = width%4; --i >=0;) { + CARD32 a = mask[i]; + CARD32 s = src[i]; + CARD32 d = dest[i]; + CARD32 da = Alpha(d); + FbByteMulC(s, a); + FbByteMulAddC(s, ~da, d); + dest[i] = s; + } +} + +static FASTCALL void +vmxCombineInC (CARD32 *dest, CARD32 *src, CARD32 *mask, int width) +{ + int i; + vector unsigned int vdest, vsrc, vmask; + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, + dest_mask, mask_mask, src_mask, store_mask; + + COMPUTE_SHIFT_MASKC(dest, src, mask) + + //printf("%s\n",__PRETTY_FUNCTION__); + for (i = width/4; i > 0; i--) { + + LOAD_VECTORSC(dest, src, mask) + + vdest = pix_multiply(pix_multiply(vsrc, vmask), splat_alpha(vdest)); + + STORE_VECTOR(dest) + + src+=4; + dest+=4; + mask+=4; + } + + for (i = width%4; --i >=0;) { + CARD32 a = mask[i]; + CARD32 s = src[i]; + CARD32 da = Alpha(dest[i]); + FbByteMul(s, a); + FbByteMul(s, da); + dest[i] = s; + } +} + +static FASTCALL void +vmxCombineInReverseC (CARD32 *dest, CARD32 *src, CARD32 *mask, int width) +{ + int i; + vector unsigned int vdest, vsrc, vmask; + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, + dest_mask, mask_mask, src_mask, store_mask; + + COMPUTE_SHIFT_MASKC(dest, src, mask) + + //printf("%s\n",__PRETTY_FUNCTION__); + for (i = width/4; i > 0; i--) { + + LOAD_VECTORSC(dest, src, mask) + + vdest = pix_multiply(vdest, pix_multiply(vmask, splat_alpha(vsrc))); + + STORE_VECTOR(dest) + + src+=4; + dest+=4; + mask+=4; + } + + for (i = width%4; --i >=0;) { + CARD32 a = mask[i]; + CARD32 d = dest[i]; + CARD32 sa = Alpha(src[i]); + FbByteMul(a, sa); + FbByteMulC(d, a); + dest[i] = d; + } +} + +static FASTCALL void +vmxCombineOutC (CARD32 *dest, CARD32 *src, CARD32 *mask, int width) +{ + int i; + vector unsigned int vdest, vsrc, vmask; + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, + dest_mask, mask_mask, src_mask, store_mask; + + COMPUTE_SHIFT_MASKC(dest, src, mask) + + //printf("%s\n",__PRETTY_FUNCTION__); + for (i = width/4; i > 0; i--) { + + LOAD_VECTORSC(dest, src, mask) + + vdest = pix_multiply(pix_multiply(vsrc, vmask), splat_alpha(vdest)); + + STORE_VECTOR(dest) + + src+=4; + dest+=4; + mask+=4; + } + + for (i = width%4; --i >=0;) { + CARD32 a = mask[i]; + CARD32 s = src[i]; + CARD32 d = dest[i]; + CARD32 da = Alpha(~d); + FbByteMulC(s, a); + FbByteMulC(s, da); + dest[i] = s; + } +} + +static FASTCALL void +vmxCombineOutReverseC (CARD32 *dest, CARD32 *src, CARD32 *mask, int width) +{ + int i; + vector unsigned int vdest, vsrc, vmask; + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, + dest_mask, mask_mask, src_mask, store_mask; + + COMPUTE_SHIFT_MASKC(dest, src, mask) + + //printf("%s\n",__PRETTY_FUNCTION__); + for (i = width/4; i > 0; i--) { + + LOAD_VECTORSC(dest, src, mask) + + vdest = pix_multiply(vdest, + negate(pix_multiply(vmask, splat_alpha(vsrc)))); + + STORE_VECTOR(dest) + + src+=4; + dest+=4; + mask+=4; + } + + for (i = width%4; --i >=0;) { + CARD32 a = mask[i]; + CARD32 s = src[i]; + CARD32 d = dest[i]; + CARD32 sa = Alpha(s); + FbByteMulC(a, sa); + FbByteMulC(d, ~a); + dest[i] = d; + } +} + +static FASTCALL void +vmxCombineAtopC (CARD32 *dest, CARD32 *src, CARD32 *mask, int width) +{ + int i; + vector unsigned int vdest, vsrc, vmask; + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, + dest_mask, mask_mask, src_mask, store_mask; + + COMPUTE_SHIFT_MASKC(dest, src, mask) + + //printf("%s\n",__PRETTY_FUNCTION__); + for (i = width/4; i > 0; i--) { + + LOAD_VECTORSC(dest, src, mask) + + vdest = pix_add_mul(pix_multiply(vsrc, vmask), splat_alpha(vdest), + vdest, + negate(pix_multiply(vmask, + splat_alpha(vmask)))); + + STORE_VECTOR(dest) + + src+=4; + dest+=4; + mask+=4; + } + + for (i = width%4; --i >=0;) { + CARD32 a = mask[i]; + CARD32 s = src[i]; + CARD32 d = dest[i]; + CARD32 sa = Alpha(s); + CARD32 da = Alpha(d); + + FbByteMulC(s, a); + FbByteMul(a, sa); + FbByteAddMulC(d, ~a, s, da); + dest[i] = d; + } +} + +static FASTCALL void +vmxCombineAtopReverseC (CARD32 *dest, CARD32 *src, CARD32 *mask, int width) +{ + int i; + vector unsigned int vdest, vsrc, vmask; + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, + dest_mask, mask_mask, src_mask, store_mask; + + COMPUTE_SHIFT_MASKC(dest, src, mask) + + //printf("%s\n",__PRETTY_FUNCTION__); + for (i = width/4; i > 0; i--) { + + LOAD_VECTORSC(dest, src, mask) + + vdest = pix_add_mul(vdest, + pix_multiply(vmask, splat_alpha(vsrc)), + pix_multiply(vsrc, vmask), + negate(splat_alpha(vdest))); + + STORE_VECTOR(dest) + + src+=4; + dest+=4; + mask+=4; + } + + for (i = width%4; --i >=0;) { + CARD32 a = mask[i]; + CARD32 s = src[i]; + CARD32 d = dest[i]; + CARD32 sa = Alpha(s); + CARD32 da = Alpha(d); + + FbByteMulC(s, a); + FbByteMul(a, sa); + FbByteAddMulC(d, a, s, ~da); + dest[i] = d; + } +} + +static FASTCALL void +vmxCombineXorC (CARD32 *dest, CARD32 *src, CARD32 *mask, int width) +{ + int i; + vector unsigned int vdest, vsrc, vmask; + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, + dest_mask, mask_mask, src_mask, store_mask; + + COMPUTE_SHIFT_MASKC(dest, src, mask) + + //printf("%s\n",__PRETTY_FUNCTION__); + for (i = width/4; i > 0; i--) { + + LOAD_VECTORSC(dest, src, mask) + + vdest = pix_add_mul(vdest, + negate(pix_multiply(vmask, splat_alpha(vsrc))), + pix_multiply(vsrc, vmask), + negate(splat_alpha(vdest))); + + STORE_VECTOR(dest) + + src+=4; + dest+=4; + mask+=4; + } + + for (i = width%4; --i >=0;) { + CARD32 a = mask[i]; + CARD32 s = src[i]; + CARD32 d = dest[i]; + CARD32 sa = Alpha(s); + CARD32 da = Alpha(d); + + FbByteMulC(s, a); + FbByteMul(a, sa); + FbByteAddMulC(d, ~a, s, ~da); + dest[i] = d; + } +} + +static FASTCALL void +vmxCombineAddC (CARD32 *dest, CARD32 *src, CARD32 *mask, int width) +{ + int i; + vector unsigned int vdest, vsrc, vmask; + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, + dest_mask, mask_mask, src_mask, store_mask; + + COMPUTE_SHIFT_MASKC(dest, src, mask) + + //printf("%s\n",__PRETTY_FUNCTION__); + for (i = width/4; i > 0; i--) { + + LOAD_VECTORSC(dest, src, mask) + + vdest = pix_add(pix_multiply(vsrc, vmask), vdest); + + STORE_VECTOR(dest) + + src+=4; + dest+=4; + mask+=4; + } + + for (i = width%4; --i >=0;) { + CARD32 a = mask[i]; + CARD32 s = src[i]; + CARD32 d = dest[i]; + + FbByteMulC(s, a); + FbByteAdd(s, d); + dest[i] = s; + } +} + + +#if 0 +void +fbCompositeSolid_nx8888vmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height) +{ + CARD32 src; + CARD32 *dstLine, *dst; + FbStride dstStride; + + fbComposeGetSolid(pSrc, pDst, src); + + if (src >> 24 == 0) + return; + + fbComposeGetStart (pDst, xDst, yDst, CARD32, dstStride, dstLine, 1); + + while (height--) + { + dst = dstLine; + dstLine += dstStride; +// vmxCombineOverU(dst, src, width); + } +} + +void +fbCompositeSolid_nx0565mmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height) +{ + CARD32 src; + CARD16 *dstLine, *dst; + CARD16 w; + FbStride dstStride; + + fbComposeGetSolid(pSrc, pDst, src); + + if (src >> 24 == 0) + return; + + fbComposeGetStart (pDst, xDst, yDst, CARD16, dstStride, dstLine, 1); + + while (height--) + { + dst = dstLine; + dstLine += dstStride; + vmxCombineOverU565(dst, src, width); + } +} + +#endif + +extern FbComposeFunctions composeFunctions; + +void fbComposeSetupVMX(void) +{ + /* check if we have MMX support and initialize accordingly */ + if (fbHaveVMX()) { + composeFunctions.combineU[PIXMAN_OPERATOR_OVER] = vmxCombineOverU; + composeFunctions.combineU[PIXMAN_OPERATOR_OVER_REVERSE] = vmxCombineOverReverseU; + composeFunctions.combineU[PIXMAN_OPERATOR_IN] = vmxCombineInU; + composeFunctions.combineU[PIXMAN_OPERATOR_IN_REVERSE] = vmxCombineInReverseU; + composeFunctions.combineU[PIXMAN_OPERATOR_OUT] = vmxCombineOutU; + composeFunctions.combineU[PIXMAN_OPERATOR_OUT_REVERSE] = vmxCombineOutReverseU; + composeFunctions.combineU[PIXMAN_OPERATOR_ATOP] = vmxCombineAtopU; + composeFunctions.combineU[PIXMAN_OPERATOR_ATOP_REVERSE] = vmxCombineAtopReverseU; + composeFunctions.combineU[PIXMAN_OPERATOR_XOR] = vmxCombineXorU; + composeFunctions.combineU[PIXMAN_OPERATOR_ADD] = vmxCombineAddU; + + composeFunctions.combineC[PIXMAN_OPERATOR_SRC] = vmxCombineSrcC; + composeFunctions.combineC[PIXMAN_OPERATOR_OVER] = vmxCombineOverC; + composeFunctions.combineC[PIXMAN_OPERATOR_OVER_REVERSE] = vmxCombineOverReverseC; + composeFunctions.combineC[PIXMAN_OPERATOR_IN] = vmxCombineInC; + composeFunctions.combineC[PIXMAN_OPERATOR_IN_REVERSE] = vmxCombineInReverseC; + composeFunctions.combineC[PIXMAN_OPERATOR_OUT] = vmxCombineOutC; + composeFunctions.combineC[PIXMAN_OPERATOR_OUT_REVERSE] = vmxCombineOutReverseC; + composeFunctions.combineC[PIXMAN_OPERATOR_ATOP] = vmxCombineAtopC; + composeFunctions.combineC[PIXMAN_OPERATOR_ATOP_REVERSE] = vmxCombineAtopReverseC; + composeFunctions.combineC[PIXMAN_OPERATOR_XOR] = vmxCombineXorC; + composeFunctions.combineC[PIXMAN_OPERATOR_ADD] = vmxCombineAddC; + + composeFunctions.combineMaskU = vmxCombineMaskU; + + } +} + + + + +#if 0 +int main (int argc, char** argv) +{ + + int data[5][10245]; __attribute__((aligned(16))); + int i; + //input data + for (i = 0; i<10240; i++) data[0][i] = data[1][i] = data[2][i] = + data[3][i] = (i&0xff) * 0x01010101; + // + for (i = 0; i<10240; i++) data[4][i] = (i&0xff) * 0x01010101; + + for (i = 0; i<10240; i++) + if (data[0][i] != data[1][i]) { + ////printf ("wrong byte %d : %d != %d\n",i , data[0][i], data[1][i]); + } + + ////printf ("combine \n"); + fbCombineSrcC (data[0], data[2], data[4], 1024); + vmxCombineSrcC (data[1], data[3], data[4], 1024); + + for (i = 0; i<10240; i++) + if (data[0][i] != data[1][i]) { + ////printf ("wrong byte %0d : %0x != %0x\n",i , data[0][i], data[1][i]); + } + return 0; +} + +#endif diff --git a/pixman/src/fbvmx.h b/pixman/src/fbvmx.h new file mode 100644 index 0000000..e690dbf --- /dev/null +++ b/pixman/src/fbvmx.h @@ -0,0 +1,316 @@ +/* + * Copyright 2006 Luca Barbato + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of Red Hat not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. Red Hat makes no representations about the + * suitability of this software for any purpose. It is provided "as is" + * without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS + * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN + * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS + * SOFTWARE. + * + * Author: Luca Barbato (lu_zero@gentoo.org) + * + * Based on work by Owen Taylor, Søren Sandmann and Lars Knoll + */ +#ifdef USE_VMX + +pixman_private +Bool fbHaveVMX(void); + +#else +#define fbHaveVMX() FALSE +#endif + +#ifdef USE_VMX + +#define AVV(x...) {x} + + +pixman_private +void fbComposeSetupVMX(void); + + +#if 0 +pixman_private +void fbCompositeIn_nx8x8vmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height); + +pixman_private +void fbCompositeSolidMask_nx8888x0565Cvmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height); +pixman_private +void fbCompositeSrcAdd_8888x8888vmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height); +pixman_private +void fbCompositeSolidMask_nx8888x8888Cvmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height); +pixman_private +void fbCompositeSolidMask_nx8x8888vmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height); +pixman_private +void fbCompositeSolidMaskSrc_nx8x8888vmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height); + +pixman_private +void fbCompositeSrcAdd_8888x8x8vmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height); + +pixman_private +void fbCompositeIn_8x8vmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height); + +pixman_private +void fbCompositeSrcAdd_8000x8000vmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height); +pixman_private +void fbCompositeSrc_8888RevNPx8888vmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height); +pixman_private +void fbCompositeSrc_8888x0565vmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height); +pixman_private +void fbCompositeSrc_8888RevNPx0565vmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height); +pixman_private +void fbCompositeSolid_nx8888vmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height); +pixman_private +void fbCompositeSolid_nx0565vmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height); +pixman_private +void fbCompositeSolidMask_nx8x0565vmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height); +pixman_private +void fbCompositeSrc_x888x8x8888vmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height); +pixman_private +void fbCompositeSrc_8888x8x8888vmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height); +pixman_private +void fbCompositeSrc_8888x8888vmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height); +pixman_private +Bool fbCopyAreavmx (FbPixels *pSrc, + FbPixels *pDst, + int src_x, + int src_y, + int dst_x, + int dst_y, + int width, + int height); + +pixman_private +void fbCompositeCopyAreavmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height); + +pixman_private +Bool fbSolidFillvmx (FbPixels *pDraw, + int x, + int y, + int width, + int height, + FbBits xor); +#endif +#endif /* USE_VMX */