match.pd: Fold vec_perm with view_convert
This patch improves the codegen for the following test case: uint64x2_t foo (uint64x2_t r) { uint32x4_t a = vreinterpretq_u32_u64 (r); uint32_t t; t = a[0]; a[0] = a[1]; a[1] = t; t = a[2]; a[2] = a[3]; a[3] = t; return vreinterpretq_u64_u32 (a); } from (-O1): foo: mov v31.16b, v0.16b ins v0.s[0], v0.s[1] ins v0.s[1], v31.s[0] ins v0.s[2], v31.s[3] ins v0.s[3], v31.s[2] ret to: foo: rev64 v0.4s, v0.4s ret This is achieved by extending the following match.pd pattern to account for type differences between @0 and @1 due to view converts. /* Simplify vector inserts of other vector extracts to a permute. */ (simplify (bit_insert @0 (BIT_FIELD_REF@2 @1 @rsize @rpos) @ipos) The patch was bootstrapped and regtested on aarch64-linux-gnu and x86_64-linux-gnu, no regression. OK for mainline? Signed-off-by: Jennifer Schmitz <jschmitz@nvidia.com> Co-authored-by: Richard Biener <rguenther@suse.de> gcc/ PR tree-optimization/117093 * match.pd: Extend (bit_insert @0 (BIT_FIELD_REF@2 @1 @rsize @rpos) @ipos) to allow type differences between @0 and @1 due to view converts. gcc/testsuite/ PR tree-optimization/117093 * gcc.dg/tree-ssa/pr117093.c: New test.
This commit is contained in:
parent
029c16c15f
commit
c83e2d4757
2 changed files with 25 additions and 5 deletions
13
gcc/match.pd
13
gcc/match.pd
|
@ -9583,7 +9583,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|
|||
(if (VECTOR_TYPE_P (type)
|
||||
&& (VECTOR_MODE_P (TYPE_MODE (type))
|
||||
|| optimize_vectors_before_lowering_p ())
|
||||
&& types_match (@0, @1)
|
||||
&& operand_equal_p (TYPE_SIZE (TREE_TYPE (@0)),
|
||||
TYPE_SIZE (TREE_TYPE (@1)), 0)
|
||||
&& types_match (TREE_TYPE (TREE_TYPE (@0)), TREE_TYPE (@2))
|
||||
&& TYPE_VECTOR_SUBPARTS (type).is_constant ()
|
||||
&& multiple_p (wi::to_poly_offset (@rpos),
|
||||
|
@ -9591,7 +9592,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|
|||
(with
|
||||
{
|
||||
unsigned HOST_WIDE_INT elsz
|
||||
= tree_to_uhwi (TYPE_SIZE (TREE_TYPE (TREE_TYPE (@1))));
|
||||
= tree_to_uhwi (TYPE_SIZE (TREE_TYPE (TREE_TYPE (@0))));
|
||||
poly_uint64 relt = exact_div (tree_to_poly_uint64 (@rpos), elsz);
|
||||
poly_uint64 ielt = exact_div (tree_to_poly_uint64 (@ipos), elsz);
|
||||
unsigned nunits = TYPE_VECTOR_SUBPARTS (type).to_constant ();
|
||||
|
@ -9602,9 +9603,11 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|
|||
vec_perm_indices sel (builder, 2, nunits);
|
||||
}
|
||||
(if (!VECTOR_MODE_P (TYPE_MODE (type))
|
||||
|| can_vec_perm_const_p (TYPE_MODE (type), TYPE_MODE (type), sel, false))
|
||||
(vec_perm @0 @1 { vec_perm_indices_to_tree
|
||||
(build_vector_type (ssizetype, nunits), sel); })))))
|
||||
|| can_vec_perm_const_p (TYPE_MODE (type),
|
||||
TYPE_MODE (type), sel, false))
|
||||
(vec_perm @0 (view_convert @1)
|
||||
{ vec_perm_indices_to_tree (build_vector_type (ssizetype, nunits),
|
||||
sel); })))))
|
||||
|
||||
(if (canonicalize_math_after_vectorization_p ())
|
||||
(for fmas (FMA)
|
||||
|
|
17
gcc/testsuite/gcc.dg/tree-ssa/pr117093.c
Normal file
17
gcc/testsuite/gcc.dg/tree-ssa/pr117093.c
Normal file
|
@ -0,0 +1,17 @@
|
|||
/* { dg-final { check-function-bodies "**" "" } } */
|
||||
/* { dg-options "-O1" } */
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
/*
|
||||
** foo:
|
||||
** rev64 v0\.4s, v0\.4s
|
||||
** ret
|
||||
*/
|
||||
uint64x2_t foo (uint64x2_t r) {
|
||||
uint32x4_t a = vreinterpretq_u32_u64 (r);
|
||||
uint32_t t;
|
||||
t = a[0]; a[0] = a[1]; a[1] = t;
|
||||
t = a[2]; a[2] = a[3]; a[3] = t;
|
||||
return vreinterpretq_u64_u32 (a);
|
||||
}
|
Loading…
Add table
Reference in a new issue