diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-77.c b/gcc/testsuite/gcc.dg/vect/bb-slp-77.c new file mode 100644 index 00000000000..b2cc1d114f1 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-77.c @@ -0,0 +1,74 @@ + +/* { dg-do compile } */ +/* { dg-require-effective-target vect_int } */ +#include +#include + + +typedef struct { + uint16_t d; + uint16_t m; + uint8_t val1[4]; + uint8_t val2[16]; +} st1; + +typedef struct { + float d; + float s; + int8_t val2[32]; +} st2; + +float table[1 << 16]; + +inline static float foo(uint16_t f) { + uint16_t s; + memcpy(&s, &f, sizeof(uint16_t)); + return table[s]; +} + + +void test(const int n, float * restrict s, const void * restrict vx, const void * restrict vy) { + const int nb = n / 32; + + + const st1 * restrict x = vx; + const st2 * restrict y = vy; + + float sumf = 0.0; + + for (int i = 0; i < nb; i++) { + uint32_t val1; + memcpy(&val1, x[i].val1, sizeof(val1)); + + int sumi0 = 0; + int sumi1 = 0; + + if (val1) { + for (int j = 0; j < 16; ++j) { + const uint8_t xh_0 = ((val1 >> (j)) << 4) & 0x10; + const uint8_t xh_1 = ((val1 >> (j + 12)) ) & 0x10; + + const int32_t x0 = (x[i].val2[j] & 0xF) | xh_0; + const int32_t x1 = (x[i].val2[j] >> 4) | xh_1; + + sumi0 += (x0 * y[i].val2[j]); + sumi1 += (x1 * y[i].val2[j + 16]); + } + } else { + for (int j = 0; j < 16; ++j) { + const int32_t x0 = (x[i].val2[j] & 0xF); + const int32_t x1 = (x[i].val2[j] >> 4); + + sumi0 += (x0 * y[i].val2[j]); + sumi1 += (x1 * y[i].val2[j + 16]); + } + } + + int sumi = sumi0 + sumi1; + sumf += (foo(x[i].d)*y[i].d)*sumi + foo(x[i].m)*y[i].s; + } + + *s = sumf; +} + +/* { dg-final { scan-tree-dump-times "optimized: basic block" 1 "slp1" { target { { vect_int_mult && vect_element_align } && { ! powerpc*-*-* } } } } } */ diff --git a/gcc/tree-vectorizer.cc b/gcc/tree-vectorizer.cc index af112f212fe..16fa0ec1bb7 100644 --- a/gcc/tree-vectorizer.cc +++ b/gcc/tree-vectorizer.cc @@ -1326,6 +1326,7 @@ pass_vectorize::execute (function *fun) if (g) { fold_loop_internal_call (g, boolean_false_node); + loop->dont_vectorize = false; ret |= TODO_cleanup_cfg; g = NULL; } @@ -1335,6 +1336,7 @@ pass_vectorize::execute (function *fun) if (g) { fold_loop_internal_call (g, boolean_false_node); + loop->dont_vectorize = false; ret |= TODO_cleanup_cfg; } }