Fold truncations of left shifts in match.pd

Whilst investigating PR 55278, I noticed that the tree-ssa optimizers
aren't eliminating the promotions of shifts to "int" as inserted by the
c-family front-ends, instead leaving this simplification to be left to
the RTL optimizers.  This patch allows match.pd to do this itself earlier,
narrowing (T)(X << C) to (T)X << C when the constant C is known to be
valid for the (narrower) type T.

Hence for this simple test case:
short foo(short x) { return x << 5; }

the .optimized dump currently looks like:

short int foo (short int x)
{
  int _1;
  int _2;
  short int _4;

  <bb 2> [local count: 1073741824]:
  _1 = (int) x_3(D);
  _2 = _1 << 5;
  _4 = (short int) _2;
  return _4;
}

but with this patch, now becomes:

short int foo (short int x)
{
  short int _2;

  <bb 2> [local count: 1073741824]:
  _2 = x_1(D) << 5;
  return _2;
}

This is always reasonable as RTL expansion knows how to use
widening optabs if it makes sense at the RTL level to perform
this shift in a wider mode.

Of course, there's often a catch.  The above simplification not only
reduces the number of statements in gimple, but also allows further
optimizations, for example including the perception of rotate idioms
and bswap16.  Alas, optimizing things earlier than anticipated
requires several testsuite changes [though all these tests have
been confirmed to generate identical assembly code on x86_64].
The only significant change is that the vectorization pass wouldn't
previously lower rotations of signed integer types.  Hence this
patch includes a refinement to tree-vect-patterns to allow signed
types, by using the equivalent unsigned shifts.

2022-06-15  Roger Sayle  <roger@nextmovesoftware.com>
	    Richard Biener  <rguenther@suse.de>

gcc/ChangeLog
	* match.pd (convert (lshift @1 INTEGER_CST@2)): Narrow integer
	left shifts by a constant when the result is truncated, and the
	shift constant is well-defined.
	* tree-vect-patterns.cc (vect_recog_rotate_pattern): Add
	support for rotations of signed integer types, by lowering
	using unsigned vector shifts.

gcc/testsuite/ChangeLog
	* gcc.dg/fold-convlshift-4.c: New test case.
	* gcc.dg/optimize-bswaphi-1.c: Update found bswap count.
	* gcc.dg/tree-ssa/pr61839_3.c: Shift is now optimized before VRP.
	* gcc.dg/vect/vect-over-widen-1-big-array.c: Remove obsolete tests.
	* gcc.dg/vect/vect-over-widen-1.c: Likewise.
	* gcc.dg/vect/vect-over-widen-3-big-array.c: Likewise.
	* gcc.dg/vect/vect-over-widen-3.c: Likewise.
	* gcc.dg/vect/vect-over-widen-4-big-array.c: Likewise.
	* gcc.dg/vect/vect-over-widen-4.c: Likewise.
This commit is contained in:
Roger Sayle 2022-06-15 09:31:13 +02:00
parent 4b1a827f02
commit acb1e6f43d
11 changed files with 49 additions and 44 deletions

View file

@ -3621,17 +3621,18 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(if (integer_zerop (@2) || integer_all_onesp (@2))
(cmp @0 @2)))))
/* Both signed and unsigned lshift produce the same result, so use
the form that minimizes the number of conversions. Postpone this
transformation until after shifts by zero have been folded. */
/* Narrow a lshift by constant. */
(simplify
(convert (lshift:s@0 (convert:s@1 @2) INTEGER_CST@3))
(convert (lshift:s@0 @1 INTEGER_CST@2))
(if (INTEGRAL_TYPE_P (type)
&& tree_nop_conversion_p (type, TREE_TYPE (@0))
&& INTEGRAL_TYPE_P (TREE_TYPE (@2))
&& TYPE_PRECISION (TREE_TYPE (@2)) <= TYPE_PRECISION (type)
&& !integer_zerop (@3))
(lshift (convert @2) @3)))
&& INTEGRAL_TYPE_P (TREE_TYPE (@0))
&& !integer_zerop (@2)
&& TYPE_PRECISION (type) <= TYPE_PRECISION (TREE_TYPE (@0)))
(if (TYPE_PRECISION (type) == TYPE_PRECISION (TREE_TYPE (@0))
|| wi::ltu_p (wi::to_wide (@2), TYPE_PRECISION (type)))
(lshift (convert @1) @2)
(if (wi::ltu_p (wi::to_wide (@2), TYPE_PRECISION (TREE_TYPE (@0))))
{ build_zero_cst (type); }))))
/* Simplifications of conversions. */

View file

@ -0,0 +1,9 @@
/* { dg-do compile } */
/* { dg-options "-O2 -fdump-tree-optimized" } */
short foo(short x)
{
return x << 5;
}
/* { dg-final { scan-tree-dump-not "\\(int\\)" "optimized" } } */
/* { dg-final { scan-tree-dump-not "\\(short int\\)" "optimized" } } */

View file

@ -68,4 +68,4 @@ get_unaligned_16_be (unsigned char *p)
/* { dg-final { scan-tree-dump-times "16 bit load in target endianness found at" 4 "bswap" } } */
/* { dg-final { scan-tree-dump-times "16 bit bswap implementation found at" 5 "bswap" } } */
/* { dg-final { scan-tree-dump-times "16 bit bswap implementation found at" 4 "bswap" } } */

View file

@ -1,6 +1,6 @@
/* PR tree-optimization/61839. */
/* { dg-do run } */
/* { dg-options "-O2 -fdump-tree-vrp -fdump-tree-optimized -fdisable-tree-ethread -fdisable-tree-threadfull1" } */
/* { dg-options "-O2 -fdump-tree-optimized -fdisable-tree-ethread -fdisable-tree-threadfull1" } */
__attribute__ ((noinline))
int foo (int a, unsigned b)
@ -21,6 +21,4 @@ int main ()
foo (-1, b);
}
/* Scan for c [12, 13] << 8 in function foo. */
/* { dg-final { scan-tree-dump-times "3072 : 3328" 1 "vrp1" } } */
/* { dg-final { scan-tree-dump-times "3072" 0 "optimized" } } */

View file

@ -58,9 +58,7 @@ int main (void)
}
/* { dg-final { scan-tree-dump-times "vect_recog_widen_shift_pattern: detected" 2 "vect" { target vect_widen_shift } } } */
/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* << 3} "vect" } } */
/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* >> 3} "vect" } } */
/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* << 8} "vect" } } */
/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* >> 5} "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */

View file

@ -62,9 +62,7 @@ int main (void)
}
/* { dg-final { scan-tree-dump-times "vect_recog_widen_shift_pattern: detected" 2 "vect" { target vect_widen_shift } } } */
/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* << 3} "vect" } } */
/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* >> 3} "vect" } } */
/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* << 8} "vect" } } */
/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* >> 5} "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */

View file

@ -59,9 +59,7 @@ int main (void)
return 0;
}
/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* << 3} "vect" } } */
/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* >> 3} "vect" } } */
/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* >> 8} "vect" } } */
/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* << 9} "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */

View file

@ -57,9 +57,7 @@ int main (void)
return 0;
}
/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* << 3} "vect" } } */
/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* >> 3} "vect" } } */
/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* >> 8} "vect" } } */
/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* << 9} "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */

View file

@ -62,9 +62,7 @@ int main (void)
}
/* { dg-final { scan-tree-dump-times "vect_recog_widen_shift_pattern: detected" 2 "vect" { target vect_widen_shift } } } */
/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* << 3} "vect" } } */
/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* >> 3} "vect" } } */
/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* << 8} "vect" } } */
/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* >> 5} "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */

View file

@ -66,9 +66,7 @@ int main (void)
}
/* { dg-final { scan-tree-dump-times "vect_recog_widen_shift_pattern: detected" 2 "vect" { target vect_widen_shift } } } */
/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* << 3} "vect" } } */
/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* >> 3} "vect" } } */
/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* << 8} "vect" } } */
/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* >> 5} "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */

View file

@ -2614,8 +2614,7 @@ vect_recog_rotate_pattern (vec_info *vinfo,
|| TYPE_PRECISION (TREE_TYPE (lhs)) != 16
|| TYPE_PRECISION (type) <= 16
|| TREE_CODE (oprnd0) != SSA_NAME
|| BITS_PER_UNIT != 8
|| !TYPE_UNSIGNED (TREE_TYPE (lhs)))
|| BITS_PER_UNIT != 8)
return NULL;
stmt_vec_info def_stmt_info;
@ -2688,8 +2687,7 @@ vect_recog_rotate_pattern (vec_info *vinfo,
if (TREE_CODE (oprnd0) != SSA_NAME
|| TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
|| !INTEGRAL_TYPE_P (type)
|| !TYPE_UNSIGNED (type))
|| !INTEGRAL_TYPE_P (type))
return NULL;
stmt_vec_info def_stmt_info;
@ -2745,31 +2743,36 @@ vect_recog_rotate_pattern (vec_info *vinfo,
goto use_rotate;
}
tree utype = unsigned_type_for (type);
tree uvectype = get_vectype_for_scalar_type (vinfo, utype);
if (!uvectype)
return NULL;
/* If vector/vector or vector/scalar shifts aren't supported by the target,
don't do anything here either. */
optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
optab1 = optab_for_tree_code (LSHIFT_EXPR, uvectype, optab_vector);
optab2 = optab_for_tree_code (RSHIFT_EXPR, uvectype, optab_vector);
if (!optab1
|| optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
|| optab_handler (optab1, TYPE_MODE (uvectype)) == CODE_FOR_nothing
|| !optab2
|| optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
|| optab_handler (optab2, TYPE_MODE (uvectype)) == CODE_FOR_nothing)
{
if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
return NULL;
optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
optab1 = optab_for_tree_code (LSHIFT_EXPR, uvectype, optab_scalar);
optab2 = optab_for_tree_code (RSHIFT_EXPR, uvectype, optab_scalar);
if (!optab1
|| optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
|| optab_handler (optab1, TYPE_MODE (uvectype)) == CODE_FOR_nothing
|| !optab2
|| optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
|| optab_handler (optab2, TYPE_MODE (uvectype)) == CODE_FOR_nothing)
return NULL;
}
*type_out = vectype;
if (bswap16_p && !useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
if (!useless_type_conversion_p (utype, TREE_TYPE (oprnd0)))
{
def = vect_recog_temp_ssa_var (type, NULL);
def = vect_recog_temp_ssa_var (utype, NULL);
def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
oprnd0 = def;
@ -2779,7 +2782,7 @@ vect_recog_rotate_pattern (vec_info *vinfo,
ext_def = vect_get_external_def_edge (vinfo, oprnd1);
def = NULL_TREE;
scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type);
scalar_int_mode mode = SCALAR_INT_TYPE_MODE (utype);
if (dt != vect_internal_def || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
def = oprnd1;
else if (def_stmt && gimple_assign_cast_p (def_stmt))
@ -2793,7 +2796,7 @@ vect_recog_rotate_pattern (vec_info *vinfo,
if (def == NULL_TREE)
{
def = vect_recog_temp_ssa_var (type, NULL);
def = vect_recog_temp_ssa_var (utype, NULL);
def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
}
@ -2839,13 +2842,13 @@ vect_recog_rotate_pattern (vec_info *vinfo,
append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
}
var1 = vect_recog_temp_ssa_var (type, NULL);
var1 = vect_recog_temp_ssa_var (utype, NULL);
def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
? LSHIFT_EXPR : RSHIFT_EXPR,
oprnd0, def);
append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
var2 = vect_recog_temp_ssa_var (type, NULL);
var2 = vect_recog_temp_ssa_var (utype, NULL);
def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
? RSHIFT_EXPR : LSHIFT_EXPR,
oprnd0, def2);
@ -2855,9 +2858,15 @@ vect_recog_rotate_pattern (vec_info *vinfo,
vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
/* Pattern supported. Create a stmt to be used to replace the pattern. */
var = vect_recog_temp_ssa_var (type, NULL);
var = vect_recog_temp_ssa_var (utype, NULL);
pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
if (!useless_type_conversion_p (type, utype))
{
append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
tree result = vect_recog_temp_ssa_var (type, NULL);
pattern_stmt = gimple_build_assign (result, NOP_EXPR, var);
}
return pattern_stmt;
}