Optimize VEC_PERM_EXPR with same permutation index and operation
The sequence c1 = VEC_PERM_EXPR (a, a, mask) c2 = VEC_PERM_EXPR (b, b, mask) c3 = c1 op c2 can be optimized to c = a op b c3 = VEC_PERM_EXPR (c, c, mask) for all integer vector operation, and float operation with full permutation. gcc/ChangeLog: PR target/98167 * match.pd: New perm + vector op patterns for int and fp vector. gcc/testsuite/ChangeLog: PR target/98167 * gcc.target/i386/pr98167.c: New test.
This commit is contained in:
parent
73b582a8e3
commit
dc95e1e970
2 changed files with 98 additions and 0 deletions
54
gcc/match.pd
54
gcc/match.pd
|
@ -8245,3 +8245,57 @@ and,
|
|||
(bit_and (negate @0) integer_onep@1)
|
||||
(if (!TYPE_OVERFLOW_SANITIZED (type))
|
||||
(bit_and @0 @1)))
|
||||
|
||||
/* Optimize
|
||||
c1 = VEC_PERM_EXPR (a, a, mask)
|
||||
c2 = VEC_PERM_EXPR (b, b, mask)
|
||||
c3 = c1 op c2
|
||||
-->
|
||||
c = a op b
|
||||
c3 = VEC_PERM_EXPR (c, c, mask)
|
||||
For all integer non-div operations. */
|
||||
(for op (plus minus mult bit_and bit_ior bit_xor
|
||||
lshift rshift)
|
||||
(simplify
|
||||
(op (vec_perm @0 @0 @2) (vec_perm @1 @1 @2))
|
||||
(if (VECTOR_INTEGER_TYPE_P (type))
|
||||
(vec_perm (op @0 @1) (op @0 @1) @2))))
|
||||
|
||||
/* Similar for float arithmetic when permutation constant covers
|
||||
all vector elements. */
|
||||
(for op (plus minus mult)
|
||||
(simplify
|
||||
(op (vec_perm @0 @0 VECTOR_CST@2) (vec_perm @1 @1 VECTOR_CST@2))
|
||||
(if (VECTOR_FLOAT_TYPE_P (type)
|
||||
&& TYPE_VECTOR_SUBPARTS (type).is_constant ())
|
||||
(with
|
||||
{
|
||||
tree perm_cst = @2;
|
||||
vec_perm_builder builder;
|
||||
bool full_perm_p = false;
|
||||
if (tree_to_vec_perm_builder (&builder, perm_cst))
|
||||
{
|
||||
unsigned HOST_WIDE_INT nelts;
|
||||
|
||||
nelts = TYPE_VECTOR_SUBPARTS (type).to_constant ();
|
||||
/* Create a vec_perm_indices for the VECTOR_CST. */
|
||||
vec_perm_indices sel (builder, 1, nelts);
|
||||
|
||||
/* Check if perm indices covers all vector elements. */
|
||||
if (sel.encoding ().encoded_full_vector_p ())
|
||||
{
|
||||
auto_sbitmap seen (nelts);
|
||||
unsigned HOST_WIDE_INT count = 0, i;
|
||||
|
||||
for (i = 0; i < nelts; i++)
|
||||
{
|
||||
if (!bitmap_set_bit (seen, sel[i].to_constant ()))
|
||||
break;
|
||||
count++;
|
||||
}
|
||||
full_perm_p = count == nelts;
|
||||
}
|
||||
}
|
||||
}
|
||||
(if (full_perm_p)
|
||||
(vec_perm (op @0 @1) (op @0 @1) @2))))))
|
||||
|
|
44
gcc/testsuite/gcc.target/i386/pr98167.c
Normal file
44
gcc/testsuite/gcc.target/i386/pr98167.c
Normal file
|
@ -0,0 +1,44 @@
|
|||
/* PR target/98167 */
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -mavx2" } */
|
||||
|
||||
/* { dg-final { scan-assembler-times "vpshufd\t" 8 } } */
|
||||
/* { dg-final { scan-assembler-times "vpermilps\t" 3 } } */
|
||||
|
||||
#define VEC_PERM_4 \
|
||||
2, 3, 1, 0
|
||||
#define VEC_PERM_8 \
|
||||
4, 5, 6, 7, 3, 2, 1, 0
|
||||
#define VEC_PERM_16 \
|
||||
8, 9, 10, 11, 12, 13, 14, 15, 7, 6, 5, 4, 3, 2, 1, 0
|
||||
|
||||
#define TYPE_PERM_OP(type, size, op, name) \
|
||||
typedef type v##size##s##type __attribute__ ((vector_size(4*size))); \
|
||||
v##size##s##type type##foo##size##i_##name (v##size##s##type a, \
|
||||
v##size##s##type b) \
|
||||
{ \
|
||||
v##size##s##type a1 = __builtin_shufflevector (a, a, \
|
||||
VEC_PERM_##size); \
|
||||
v##size##s##type b1 = __builtin_shufflevector (b, b, \
|
||||
VEC_PERM_##size); \
|
||||
return a1 op b1; \
|
||||
}
|
||||
|
||||
#define INT_PERMS(op, name) \
|
||||
TYPE_PERM_OP (int, 4, op, name) \
|
||||
|
||||
#define FP_PERMS(op, name) \
|
||||
TYPE_PERM_OP (float, 4, op, name) \
|
||||
|
||||
INT_PERMS (+, add)
|
||||
INT_PERMS (-, sub)
|
||||
INT_PERMS (*, mul)
|
||||
INT_PERMS (|, ior)
|
||||
INT_PERMS (^, xor)
|
||||
INT_PERMS (&, and)
|
||||
INT_PERMS (<<, shl)
|
||||
INT_PERMS (>>, shr)
|
||||
FP_PERMS (+, add)
|
||||
FP_PERMS (-, sub)
|
||||
FP_PERMS (*, mul)
|
||||
|
Loading…
Add table
Reference in a new issue