re PR tree-optimization/90510 (Unnecessary permutation)
2019-05-21 Richard Biener <rguenther@suse.de> PR middle-end/90510 * fold-const.c (fold_read_from_vector): New function. * fold-const.h (fold_read_from_vector): Declare. * match.pd (VEC_PERM_EXPR): Build BIT_INSERT_EXPRs for single-element insert permutations. Canonicalize selector further and fix issue with last commit. * gcc.target/i386/pr90510.c: New testcase. From-SVN: r271463
This commit is contained in:
parent
3b0657dce5
commit
4f8b89f092
6 changed files with 109 additions and 6 deletions
|
@ -1,3 +1,12 @@
|
|||
2019-05-21 Richard Biener <rguenther@suse.de>
|
||||
|
||||
PR middle-end/90510
|
||||
* fold-const.c (fold_read_from_vector): New function.
|
||||
* fold-const.h (fold_read_from_vector): Declare.
|
||||
* match.pd (VEC_PERM_EXPR): Build BIT_INSERT_EXPRs for
|
||||
single-element insert permutations. Canonicalize selector
|
||||
further and fix issue with last commit.
|
||||
|
||||
2019-05-21 Vladislav Ivanishin <vlad@ispras.ru>
|
||||
|
||||
* tree-cfg.h (split_critical_edges): Add for_edge_insertion_p
|
||||
|
|
|
@ -13793,6 +13793,28 @@ fold_read_from_constant_string (tree exp)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
/* Folds a read from vector element at IDX of vector ARG. */
|
||||
|
||||
tree
|
||||
fold_read_from_vector (tree arg, poly_uint64 idx)
|
||||
{
|
||||
unsigned HOST_WIDE_INT i;
|
||||
if (known_lt (idx, TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg)))
|
||||
&& known_ge (idx, 0u)
|
||||
&& idx.is_constant (&i))
|
||||
{
|
||||
if (TREE_CODE (arg) == VECTOR_CST)
|
||||
return VECTOR_CST_ELT (arg, i);
|
||||
else if (TREE_CODE (arg) == CONSTRUCTOR)
|
||||
{
|
||||
if (i >= CONSTRUCTOR_NELTS (arg))
|
||||
return build_zero_cst (TREE_TYPE (TREE_TYPE (arg)));
|
||||
return CONSTRUCTOR_ELT (arg, i)->value;
|
||||
}
|
||||
}
|
||||
return NULL_TREE;
|
||||
}
|
||||
|
||||
/* Return the tree for neg (ARG0) when ARG0 is known to be either
|
||||
an integer constant, real, or fixed-point constant.
|
||||
|
||||
|
|
|
@ -100,6 +100,7 @@ extern tree fold_bit_and_mask (tree, tree, enum tree_code,
|
|||
tree, enum tree_code, tree, tree,
|
||||
tree, enum tree_code, tree, tree, tree *);
|
||||
extern tree fold_read_from_constant_string (tree);
|
||||
extern tree fold_read_from_vector (tree, poly_uint64);
|
||||
#if GCC_VEC_PERN_INDICES_H
|
||||
extern tree fold_vec_perm (tree, tree, tree, const vec_perm_indices &);
|
||||
#endif
|
||||
|
|
56
gcc/match.pd
56
gcc/match.pd
|
@ -5406,6 +5406,11 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|
|||
op0 = op1;
|
||||
sel.rotate_inputs (1);
|
||||
}
|
||||
else if (known_ge (poly_uint64 (sel[0]), nelts))
|
||||
{
|
||||
std::swap (op0, op1);
|
||||
sel.rotate_inputs (1);
|
||||
}
|
||||
}
|
||||
gassign *def;
|
||||
tree cop0 = op0, cop1 = op1;
|
||||
|
@ -5429,9 +5434,46 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|
|||
(with
|
||||
{
|
||||
bool changed = (op0 == op1 && !single_arg);
|
||||
tree ins = NULL_TREE;
|
||||
unsigned at = 0;
|
||||
|
||||
/* See if the permutation is performing a single element
|
||||
insert from a CONSTRUCTOR or constant and use a BIT_INSERT_EXPR
|
||||
in that case. But only if the vector mode is supported,
|
||||
otherwise this is invalid GIMPLE. */
|
||||
if (TYPE_MODE (type) != BLKmode
|
||||
&& (TREE_CODE (cop0) == VECTOR_CST
|
||||
|| TREE_CODE (cop0) == CONSTRUCTOR
|
||||
|| TREE_CODE (cop1) == VECTOR_CST
|
||||
|| TREE_CODE (cop1) == CONSTRUCTOR))
|
||||
{
|
||||
if (sel.series_p (1, 1, nelts + 1, 1))
|
||||
{
|
||||
/* After canonicalizing the first elt to come from the
|
||||
first vector we only can insert the first elt from
|
||||
the first vector. */
|
||||
at = 0;
|
||||
ins = fold_read_from_vector (cop0, 0);
|
||||
op0 = op1;
|
||||
}
|
||||
else
|
||||
{
|
||||
unsigned int encoded_nelts = sel.encoding ().encoded_nelts ();
|
||||
for (at = 0; at < encoded_nelts; ++at)
|
||||
if (maybe_ne (sel[at], at))
|
||||
break;
|
||||
if (at < encoded_nelts && sel.series_p (at + 1, 1, at + 1, 1))
|
||||
{
|
||||
if (known_lt (at, nelts))
|
||||
ins = fold_read_from_vector (cop0, sel[at]);
|
||||
else
|
||||
ins = fold_read_from_vector (cop1, sel[at] - nelts);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Generate a canonical form of the selector. */
|
||||
if (sel.encoding () != builder)
|
||||
if (!ins && sel.encoding () != builder)
|
||||
{
|
||||
/* Some targets are deficient and fail to expand a single
|
||||
argument permutation while still allowing an equivalent
|
||||
|
@ -5450,10 +5492,12 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|
|||
so use the preferred form. */
|
||||
op2 = vec_perm_indices_to_tree (TREE_TYPE (op2), sel);
|
||||
}
|
||||
/* Differences in the encoder do not necessarily mean
|
||||
differences in the resulting vector. */
|
||||
changed = !operand_equal_p (op2, oldop2, 0);
|
||||
if (!operand_equal_p (op2, oldop2, 0))
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
(if (changed)
|
||||
(vec_perm { op0; } { op1; } { op2; })))))))))
|
||||
(if (ins)
|
||||
(bit_insert { op0; } { ins; }
|
||||
{ bitsize_int (at * tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type)))); })
|
||||
(if (changed)
|
||||
(vec_perm { op0; } { op1; } { op2; }))))))))))
|
||||
|
|
|
@ -1,3 +1,8 @@
|
|||
2019-05-21 Richard Biener <rguenther@suse.de>
|
||||
|
||||
PR middle-end/90510
|
||||
* gcc.target/i386/pr90510.c: New testcase.
|
||||
|
||||
2019-05-21 Martin Liska <mliska@suse.cz>
|
||||
|
||||
* gcc.target/i386/pr90500-1.c: Add missing '""'.
|
||||
|
|
22
gcc/testsuite/gcc.target/i386/pr90510.c
Normal file
22
gcc/testsuite/gcc.target/i386/pr90510.c
Normal file
|
@ -0,0 +1,22 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -msse2 -fdump-tree-optimized" } */
|
||||
|
||||
typedef double __v2df __attribute__ ((__vector_size__ (16)));
|
||||
typedef long long __v2di __attribute__ ((__vector_size__ (16)));
|
||||
|
||||
__v2df
|
||||
_mm_add_sd_A (__v2df x, __v2df y)
|
||||
{
|
||||
double tem = x[0] + y[0];
|
||||
return __builtin_shuffle ( x, (__v2df) { tem, tem }, (__v2di) { 2, 1 } );
|
||||
}
|
||||
|
||||
__v2df
|
||||
_mm_add_sd_B (__v2df x, __v2df y)
|
||||
{
|
||||
__v2df z = { (x[0] + y[0]), x[1] };
|
||||
return z;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "BIT_INSERT_EXPR" 2 "optimized" } } */
|
||||
/* { dg-final { scan-assembler-not "unpck" } } */
|
Loading…
Add table
Reference in a new issue