Enhance NARROW FLOAT_EXPR vectorization by truncating integer to lower precision.

Similar like WIDEN FLOAT_EXPR, when direct_optab is not existed, try
intermediate integer type whenever gimple ranger can tell it's safe.

.i.e.
When there's no direct optab for vector long long -> vector float, but
the value range of integer can be represented as int, try vector int
-> vector float if availble.

gcc/ChangeLog:

	PR tree-optimization/108804
	* tree-vect-patterns.cc (vect_get_range_info): Remove static.
	* tree-vect-stmts.cc (vect_create_vectorized_demotion_stmts):
	Add new parameter narrow_src_p.
	(vectorizable_conversion): Enhance NARROW FLOAT_EXPR
	vectorization by truncating to lower precision.
	* tree-vectorizer.h (vect_get_range_info): New declare.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/pr108804.c: New test.
This commit is contained in:
liuhongt 2023-02-22 17:54:46 +08:00
parent e4c8f7024f
commit 3279b62230
4 changed files with 121 additions and 32 deletions

View file

@ -0,0 +1,15 @@
/* { dg-do compile } */
/* { dg-options "-mavx2 -Ofast -fdump-tree-vect-details" } */
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 1 "vect" } } */
typedef unsigned long long uint64_t;
uint64_t d[512];
float f[1024];
void foo() {
for (int i=0; i<512; ++i) {
uint64_t k = d[i];
f[i]=(k & 0x3F30FFFF);
}
}

View file

@ -61,7 +61,7 @@ along with GCC; see the file COPYING3. If not see
/* Return true if we have a useful VR_RANGE range for VAR, storing it
in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
static bool
bool
vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
{
value_range vr;

View file

@ -51,6 +51,7 @@ along with GCC; see the file COPYING3. If not see
#include "internal-fn.h"
#include "tree-vector-builder.h"
#include "vec-perm-indices.h"
#include "gimple-range.h"
#include "tree-ssa-loop-niter.h"
#include "gimple-fold.h"
#include "regs.h"
@ -4794,7 +4795,9 @@ vect_gen_widened_results_half (vec_info *vinfo, enum tree_code code,
/* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
For multi-step conversions store the resulting vectors and call the function
recursively. */
recursively. When NARROW_SRC_P is true, there's still a conversion after
narrowing, don't store the vectors in the SLP_NODE or in vector info of
the scalar statement(or in STMT_VINFO_RELATED_STMT chain). */
static void
vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
@ -4802,7 +4805,8 @@ vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
stmt_vec_info stmt_info,
vec<tree> &vec_dsts,
gimple_stmt_iterator *gsi,
slp_tree slp_node, enum tree_code code)
slp_tree slp_node, enum tree_code code,
bool narrow_src_p)
{
unsigned int i;
tree vop0, vop1, new_tmp, vec_dest;
@ -4818,9 +4822,9 @@ vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
new_tmp = make_ssa_name (vec_dest, new_stmt);
gimple_assign_set_lhs (new_stmt, new_tmp);
vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
if (multi_step_cvt)
/* Store the resulting vector for next recursive call. */
if (multi_step_cvt || narrow_src_p)
/* Store the resulting vector for next recursive call,
or return the resulting vector_tmp for NARROW FLOAT_EXPR. */
(*vec_oprnds)[i/2] = new_tmp;
else
{
@ -4846,7 +4850,8 @@ vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds,
multi_step_cvt - 1,
stmt_info, vec_dsts, gsi,
slp_node, VEC_PACK_TRUNC_EXPR);
slp_node, VEC_PACK_TRUNC_EXPR,
narrow_src_p);
}
vec_dsts.quick_push (vec_dest);
@ -4991,7 +4996,15 @@ vectorizable_conversion (vec_info *vinfo,
tree vectype_out, vectype_in;
int ncopies, i;
tree lhs_type, rhs_type;
enum { NARROW, NONE, WIDEN } modifier;
/* For conversions between floating point and integer, there're 2 NARROW
cases. NARROW_SRC is for FLOAT_EXPR, means
integer --DEMOTION--> integer --FLOAT_EXPR--> floating point.
This is safe when the range of the source integer can fit into the lower
precision. NARROW_DST is for FIX_TRUNC_EXPR, means
floating point --FIX_TRUNC_EXPR--> integer --DEMOTION--> INTEGER.
For other conversions, when there's narrowing, NARROW_DST is used as
default. */
enum { NARROW_SRC, NARROW_DST, NONE, WIDEN } modifier;
vec<tree> vec_oprnds0 = vNULL;
vec<tree> vec_oprnds1 = vNULL;
tree vop0;
@ -5126,7 +5139,7 @@ vectorizable_conversion (vec_info *vinfo,
else
modifier = NONE;
else if (multiple_p (nunits_out, nunits_in))
modifier = NARROW;
modifier = NARROW_DST;
else
{
gcc_checking_assert (multiple_p (nunits_in, nunits_out));
@ -5138,7 +5151,7 @@ vectorizable_conversion (vec_info *vinfo,
case of SLP. */
if (slp_node)
ncopies = 1;
else if (modifier == NARROW)
else if (modifier == NARROW_DST)
ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
else
ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
@ -5244,29 +5257,63 @@ vectorizable_conversion (vec_info *vinfo,
}
break;
case NARROW:
case NARROW_DST:
gcc_assert (op_type == unary_op);
if (supportable_narrowing_operation (code, vectype_out, vectype_in,
&code1, &multi_step_cvt,
&interm_types))
break;
if (code != FIX_TRUNC_EXPR
|| GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
if (GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
goto unsupported;
cvt_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
if (cvt_type == NULL_TREE)
goto unsupported;
if (!supportable_convert_operation (code, cvt_type, vectype_in,
&codecvt1))
goto unsupported;
if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
&code1, &multi_step_cvt,
&interm_types))
break;
if (code == FIX_TRUNC_EXPR)
{
cvt_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
if (cvt_type == NULL_TREE)
goto unsupported;
if (!supportable_convert_operation (code, cvt_type, vectype_in,
&codecvt1))
goto unsupported;
if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
&code1, &multi_step_cvt,
&interm_types))
break;
}
/* If op0 can be represented with low precision integer,
truncate it to cvt_type and the do FLOAT_EXPR. */
else if (code == FLOAT_EXPR)
{
wide_int op_min_value, op_max_value;
if (!vect_get_range_info (op0, &op_min_value, &op_max_value))
goto unsupported;
cvt_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (lhs_mode), 0);
if (cvt_type == NULL_TREE
|| (wi::min_precision (op_max_value, SIGNED)
> TYPE_PRECISION (cvt_type))
|| (wi::min_precision (op_min_value, SIGNED)
> TYPE_PRECISION (cvt_type)))
goto unsupported;
cvt_type = get_same_sized_vectype (cvt_type, vectype_out);
if (cvt_type == NULL_TREE)
goto unsupported;
if (!supportable_narrowing_operation (NOP_EXPR, cvt_type, vectype_in,
&code1, &multi_step_cvt,
&interm_types))
goto unsupported;
if (supportable_convert_operation (code, vectype_out,
cvt_type, &codecvt1))
{
modifier = NARROW_SRC;
break;
}
}
goto unsupported;
default:
@ -5291,7 +5338,7 @@ vectorizable_conversion (vec_info *vinfo,
vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
cost_vec);
}
else if (modifier == NARROW)
else if (modifier == NARROW_SRC || modifier == NARROW_DST)
{
STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
/* The final packing step produces one vector result per copy. */
@ -5338,8 +5385,10 @@ vectorizable_conversion (vec_info *vinfo,
from supportable_*_operation, and store them in the correct order
for future use in vect_create_vectorized_*_stmts (). */
auto_vec<tree> vec_dsts (multi_step_cvt + 1);
bool widen_or_narrow_float_p
= cvt_type && (modifier == WIDEN || modifier == NARROW_SRC);
vec_dest = vect_create_destination_var (scalar_dest,
(cvt_type && modifier == WIDEN)
widen_or_narrow_float_p
? cvt_type : vectype_out);
vec_dsts.quick_push (vec_dest);
@ -5356,7 +5405,7 @@ vectorizable_conversion (vec_info *vinfo,
if (cvt_type)
vec_dest = vect_create_destination_var (scalar_dest,
modifier == WIDEN
widen_or_narrow_float_p
? vectype_out : cvt_type);
int ninputs = 1;
@ -5364,7 +5413,7 @@ vectorizable_conversion (vec_info *vinfo,
{
if (modifier == WIDEN)
;
else if (modifier == NARROW)
else if (modifier == NARROW_SRC || modifier == NARROW_DST)
{
if (multi_step_cvt)
ninputs = vect_pow2 (multi_step_cvt);
@ -5451,7 +5500,8 @@ vectorizable_conversion (vec_info *vinfo,
}
break;
case NARROW:
case NARROW_SRC:
case NARROW_DST:
/* In case the vectorization factor (VF) is bigger than the number
of elements that we can fit in a vectype (nunits), we have to
generate more than one vector stmt - i.e - we need to "unroll"
@ -5459,7 +5509,7 @@ vectorizable_conversion (vec_info *vinfo,
vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
op0, &vec_oprnds0);
/* Arguments are ready. Create the new vector stmts. */
if (cvt_type)
if (cvt_type && modifier == NARROW_DST)
FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
{
gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
@ -5473,7 +5523,30 @@ vectorizable_conversion (vec_info *vinfo,
vect_create_vectorized_demotion_stmts (vinfo, &vec_oprnds0,
multi_step_cvt,
stmt_info, vec_dsts, gsi,
slp_node, code1);
slp_node, code1,
modifier == NARROW_SRC);
/* After demoting op0 to cvt_type, convert it to dest. */
if (cvt_type && code == FLOAT_EXPR)
{
for (unsigned int i = 0; i != vec_oprnds0.length() / 2; i++)
{
/* Arguments are ready, create the new vector stmt. */
gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
gassign *new_stmt
= gimple_build_assign (vec_dest, codecvt1, vec_oprnds0[i]);
new_temp = make_ssa_name (vec_dest, new_stmt);
gimple_assign_set_lhs (new_stmt, new_temp);
vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
/* This is the last step of the conversion sequence. Store the
vectors in SLP_NODE or in vector info of the scalar statement
(or in STMT_VINFO_RELATED_STMT chain). */
if (slp_node)
SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
else
STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
}
}
break;
}
if (!slp_node)

View file

@ -2393,6 +2393,7 @@ extern bool compatible_calls_p (gcall *, gcall *);
/* In tree-vect-patterns.cc. */
extern void
vect_mark_pattern_stmts (vec_info *, stmt_vec_info, gimple *, tree);
extern bool vect_get_range_info (tree, wide_int*, wide_int*);
/* Pattern recognition functions.
Additional pattern recognition functions can (and will) be added