diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 7488e925ad5..49c0844ecd7 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,8 @@ +2013-12-05 Tejas Belagod + + * rtlanal.c (set_noop_p): Return nonzero in case of redundant vec_select + for overlapping register lanes. + 2013-12-05 Kirill Yukhin * config/i386/i386.c (ix86_expand_builtin): Generate diff --git a/gcc/rtlanal.c b/gcc/rtlanal.c index 0cd0c7e1a74..38f9e36593d 100644 --- a/gcc/rtlanal.c +++ b/gcc/rtlanal.c @@ -1180,6 +1180,27 @@ set_noop_p (const_rtx set) dst = SUBREG_REG (dst); } + /* It is a NOOP if destination overlaps with selected src vector + elements. */ + if (GET_CODE (src) == VEC_SELECT + && REG_P (XEXP (src, 0)) && REG_P (dst) + && HARD_REGISTER_P (XEXP (src, 0)) + && HARD_REGISTER_P (dst)) + { + int i; + rtx par = XEXP (src, 1); + rtx src0 = XEXP (src, 0); + int c0 = INTVAL (XVECEXP (par, 0, 0)); + HOST_WIDE_INT offset = GET_MODE_UNIT_SIZE (GET_MODE (src0)) * c0; + + for (i = 1; i < XVECLEN (par, 0); i++) + if (INTVAL (XVECEXP (par, 0, i)) != c0 + i) + return 0; + return + simplify_subreg_regno (REGNO (src0), GET_MODE (src0), + offset, GET_MODE (dst)) == (int) REGNO (dst); + } + return (REG_P (src) && REG_P (dst) && REGNO (src) == REGNO (dst)); } diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 2ede52805dd..d9f7de3a38d 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2013-12-05 Tejas Belagod + + * gcc.dg/vect/vect-nop-move.c: New test. + 2013-12-05 Max Ostapenko * c-c++-common/tsan/atomic_stack.c: New test. diff --git a/gcc/testsuite/gcc.dg/vect/vect-nop-move.c b/gcc/testsuite/gcc.dg/vect/vect-nop-move.c new file mode 100644 index 00000000000..19419335097 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-nop-move.c @@ -0,0 +1,64 @@ +/* { dg-do run } */ +/* { dg-require-effective-target vect_float } */ +/* { dg-options "-O3 -fdump-rtl-combine-details" } */ + +extern void abort (void); + +#define NOINLINE __attribute__((noinline)) + +typedef float float32x4_t __attribute__ ((__vector_size__ (16))); +typedef float float32x2_t __attribute__ ((__vector_size__ (8))); + +NOINLINE float +foo32x4_be (float32x4_t x) +{ + return x[3]; +} + +NOINLINE float +foo32x4_le (float32x4_t x) +{ + return x[0]; +} + +NOINLINE float +bar (float a) +{ + return a; +} + +NOINLINE float +foo32x2_be (float32x2_t x) +{ + return bar (x[1]); +} + +NOINLINE float +foo32x2_le (float32x2_t x) +{ + return bar (x[0]); +} + +int +main() +{ + float32x4_t a = { 0.0f, 1.0f, 2.0f, 3.0f }; + float32x2_t b = { 0.0f, 1.0f }; + + if (foo32x4_be (a) != 3.0f) + abort (); + + if (foo32x4_le (a) != 0.0f) + abort (); + + if (foo32x2_be (b) != 1.0f) + abort (); + + if (foo32x2_le (b) != 0.0f) + abort (); + + return 0; +} + +/* { dg-final { scan-rtl-dump "deleting noop move" "combine" { target aarch64*-*-* } } } */ +/* { dg-final { cleanup-rtl-dump "combine" } } */