Add new optabs for reducing vectors to scalars
PR tree-optimization/61114 * doc/md.texi (Standard Names): Add reduc_(plus,[us](min|max))|scal optabs, and note in reduc_[us](plus|min|max) to prefer the former. * expr.c (expand_expr_real_2): Use reduc_..._scal if available, fall back to old reduc_... + BIT_FIELD_REF only if not. * optabs.c (optab_for_tree_code): for REDUC_(MAX,MIN,PLUS)_EXPR, return the reduce-to-scalar (reduc_..._scal) optab. (scalar_reduc_to_vector): New. * optabs.def (reduc_smax_scal_optab, reduc_smin_scal_optab, reduc_plus_scal_optab, reduc_umax_scal_optab, reduc_umin_scal_optab): New. * optabs.h (scalar_reduc_to_vector): Declare. * tree-vect-loop.c (vectorizable_reduction): Look for optabs reducing to either scalar or vector. From-SVN: r216737
This commit is contained in:
parent
99f76d9bac
commit
d43a252e2f
7 changed files with 118 additions and 23 deletions
|
@ -1,3 +1,25 @@
|
|||
2014-10-27 Alan Lawrence <alan.lawrence@arm.com>
|
||||
|
||||
PR tree-optimization/61114
|
||||
* doc/md.texi (Standard Names): Add reduc_(plus,[us](min|max))|scal
|
||||
optabs, and note in reduc_[us](plus|min|max) to prefer the former.
|
||||
|
||||
* expr.c (expand_expr_real_2): Use reduc_..._scal if available, fall
|
||||
back to old reduc_... + BIT_FIELD_REF only if not.
|
||||
|
||||
* optabs.c (optab_for_tree_code): for REDUC_(MAX,MIN,PLUS)_EXPR,
|
||||
return the reduce-to-scalar (reduc_..._scal) optab.
|
||||
(scalar_reduc_to_vector): New.
|
||||
|
||||
* optabs.def (reduc_smax_scal_optab, reduc_smin_scal_optab,
|
||||
reduc_plus_scal_optab, reduc_umax_scal_optab, reduc_umin_scal_optab):
|
||||
New.
|
||||
|
||||
* optabs.h (scalar_reduc_to_vector): Declare.
|
||||
|
||||
* tree-vect-loop.c (vectorizable_reduction): Look for optabs reducing
|
||||
to either scalar or vector.
|
||||
|
||||
2014-10-27 Alan Lawrence <alan.lawrence@arm.com>
|
||||
|
||||
PR tree-optimization/61114
|
||||
|
|
|
@ -4724,29 +4724,48 @@ it is unspecified which of the two operands is returned as the result.
|
|||
@cindex @code{reduc_smax_@var{m}} instruction pattern
|
||||
@item @samp{reduc_smin_@var{m}}, @samp{reduc_smax_@var{m}}
|
||||
Find the signed minimum/maximum of the elements of a vector. The vector is
|
||||
operand 1, and the scalar result is stored in the least significant bits of
|
||||
operand 1, and the result is stored in the least significant bits of
|
||||
operand 0 (also a vector). The output and input vector should have the same
|
||||
modes.
|
||||
modes. These are legacy optabs, and platforms should prefer to implement
|
||||
@samp{reduc_smin_scal_@var{m}} and @samp{reduc_smax_scal_@var{m}}.
|
||||
|
||||
@cindex @code{reduc_umin_@var{m}} instruction pattern
|
||||
@cindex @code{reduc_umax_@var{m}} instruction pattern
|
||||
@item @samp{reduc_umin_@var{m}}, @samp{reduc_umax_@var{m}}
|
||||
Find the unsigned minimum/maximum of the elements of a vector. The vector is
|
||||
operand 1, and the scalar result is stored in the least significant bits of
|
||||
operand 1, and the result is stored in the least significant bits of
|
||||
operand 0 (also a vector). The output and input vector should have the same
|
||||
modes.
|
||||
modes. These are legacy optabs, and platforms should prefer to implement
|
||||
@samp{reduc_umin_scal_@var{m}} and @samp{reduc_umax_scal_@var{m}}.
|
||||
|
||||
@cindex @code{reduc_splus_@var{m}} instruction pattern
|
||||
@item @samp{reduc_splus_@var{m}}
|
||||
Compute the sum of the signed elements of a vector. The vector is operand 1,
|
||||
and the scalar result is stored in the least significant bits of operand 0
|
||||
(also a vector). The output and input vector should have the same modes.
|
||||
|
||||
@cindex @code{reduc_uplus_@var{m}} instruction pattern
|
||||
@item @samp{reduc_uplus_@var{m}}
|
||||
Compute the sum of the unsigned elements of a vector. The vector is operand 1,
|
||||
and the scalar result is stored in the least significant bits of operand 0
|
||||
@item @samp{reduc_splus_@var{m}}, @samp{reduc_uplus_@var{m}}
|
||||
Compute the sum of the signed/unsigned elements of a vector. The vector is
|
||||
operand 1, and the result is stored in the least significant bits of operand 0
|
||||
(also a vector). The output and input vector should have the same modes.
|
||||
These are legacy optabs, and platforms should prefer to implement
|
||||
@samp{reduc_plus_scal_@var{m}}.
|
||||
|
||||
@cindex @code{reduc_smin_scal_@var{m}} instruction pattern
|
||||
@cindex @code{reduc_smax_scal_@var{m}} instruction pattern
|
||||
@item @samp{reduc_smin_scal_@var{m}}, @samp{reduc_smax_scal_@var{m}}
|
||||
Find the signed minimum/maximum of the elements of a vector. The vector is
|
||||
operand 1, and operand 0 is the scalar result, with mode equal to the mode of
|
||||
the elements of the input vector.
|
||||
|
||||
@cindex @code{reduc_umin_scal_@var{m}} instruction pattern
|
||||
@cindex @code{reduc_umax_scal_@var{m}} instruction pattern
|
||||
@item @samp{reduc_umin_scal_@var{m}}, @samp{reduc_umax_scal_@var{m}}
|
||||
Find the unsigned minimum/maximum of the elements of a vector. The vector is
|
||||
operand 1, and operand 0 is the scalar result, with mode equal to the mode of
|
||||
the elements of the input vector.
|
||||
|
||||
@cindex @code{reduc_plus_scal_@var{m}} instruction pattern
|
||||
@item @samp{reduc_plus_scal_@var{m}}
|
||||
Compute the sum of the elements of a vector. The vector is operand 1, and
|
||||
operand 0 is the scalar result, with mode equal to the mode of the elements of
|
||||
the input vector.
|
||||
|
||||
@cindex @code{sdot_prod@var{m}} instruction pattern
|
||||
@item @samp{sdot_prod@var{m}}
|
||||
|
|
18
gcc/expr.c
18
gcc/expr.c
|
@ -9052,6 +9052,24 @@ expand_expr_real_2 (sepops ops, rtx target, enum machine_mode tmode,
|
|||
op0 = expand_normal (treeop0);
|
||||
this_optab = optab_for_tree_code (code, type, optab_default);
|
||||
enum machine_mode vec_mode = TYPE_MODE (TREE_TYPE (treeop0));
|
||||
|
||||
if (optab_handler (this_optab, vec_mode) != CODE_FOR_nothing)
|
||||
{
|
||||
struct expand_operand ops[2];
|
||||
enum insn_code icode = optab_handler (this_optab, vec_mode);
|
||||
|
||||
create_output_operand (&ops[0], target, mode);
|
||||
create_input_operand (&ops[1], op0, vec_mode);
|
||||
if (maybe_expand_insn (icode, 2, ops))
|
||||
{
|
||||
target = ops[0].value;
|
||||
if (GET_MODE (target) != mode)
|
||||
return gen_lowpart (tmode, target);
|
||||
return target;
|
||||
}
|
||||
}
|
||||
/* Fall back to optab with vector result, and then extract scalar. */
|
||||
this_optab = scalar_reduc_to_vector (this_optab, type);
|
||||
temp = expand_unop (vec_mode, this_optab, op0, NULL_RTX, unsignedp);
|
||||
gcc_assert (temp);
|
||||
/* The tree code produces a scalar result, but (somewhat by convention)
|
||||
|
|
29
gcc/optabs.c
29
gcc/optabs.c
|
@ -509,13 +509,15 @@ optab_for_tree_code (enum tree_code code, const_tree type,
|
|||
return fma_optab;
|
||||
|
||||
case REDUC_MAX_EXPR:
|
||||
return TYPE_UNSIGNED (type) ? reduc_umax_optab : reduc_smax_optab;
|
||||
return TYPE_UNSIGNED (type)
|
||||
? reduc_umax_scal_optab : reduc_smax_scal_optab;
|
||||
|
||||
case REDUC_MIN_EXPR:
|
||||
return TYPE_UNSIGNED (type) ? reduc_umin_optab : reduc_smin_optab;
|
||||
return TYPE_UNSIGNED (type)
|
||||
? reduc_umin_scal_optab : reduc_smin_scal_optab;
|
||||
|
||||
case REDUC_PLUS_EXPR:
|
||||
return TYPE_UNSIGNED (type) ? reduc_uplus_optab : reduc_splus_optab;
|
||||
return reduc_plus_scal_optab;
|
||||
|
||||
case VEC_LSHIFT_EXPR:
|
||||
return vec_shl_optab;
|
||||
|
@ -611,7 +613,26 @@ optab_for_tree_code (enum tree_code code, const_tree type,
|
|||
return unknown_optab;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Given optab UNOPTAB that reduces a vector to a scalar, find instead the old
|
||||
optab that produces a vector with the reduction result in one element,
|
||||
for a tree with type TYPE. */
|
||||
|
||||
optab
|
||||
scalar_reduc_to_vector (optab unoptab, const_tree type)
|
||||
{
|
||||
switch (unoptab)
|
||||
{
|
||||
case reduc_plus_scal_optab:
|
||||
return TYPE_UNSIGNED (type) ? reduc_uplus_optab : reduc_splus_optab;
|
||||
|
||||
case reduc_smin_scal_optab: return reduc_smin_optab;
|
||||
case reduc_umin_scal_optab: return reduc_umin_optab;
|
||||
case reduc_smax_scal_optab: return reduc_smax_optab;
|
||||
case reduc_umax_scal_optab: return reduc_umax_optab;
|
||||
default: return unknown_optab;
|
||||
}
|
||||
}
|
||||
|
||||
/* Expand vector widening operations.
|
||||
|
||||
|
|
|
@ -243,12 +243,20 @@ OPTAB_D (sin_optab, "sin$a2")
|
|||
OPTAB_D (sincos_optab, "sincos$a3")
|
||||
OPTAB_D (tan_optab, "tan$a2")
|
||||
|
||||
/* Vector reduction to a scalar. */
|
||||
OPTAB_D (reduc_smax_scal_optab, "reduc_smax_scal_$a")
|
||||
OPTAB_D (reduc_smin_scal_optab, "reduc_smin_scal_$a")
|
||||
OPTAB_D (reduc_plus_scal_optab, "reduc_plus_scal_$a")
|
||||
OPTAB_D (reduc_umax_scal_optab, "reduc_umax_scal_$a")
|
||||
OPTAB_D (reduc_umin_scal_optab, "reduc_umin_scal_$a")
|
||||
/* (Old) Vector reduction, returning a vector with the result in one lane. */
|
||||
OPTAB_D (reduc_smax_optab, "reduc_smax_$a")
|
||||
OPTAB_D (reduc_smin_optab, "reduc_smin_$a")
|
||||
OPTAB_D (reduc_splus_optab, "reduc_splus_$a")
|
||||
OPTAB_D (reduc_umax_optab, "reduc_umax_$a")
|
||||
OPTAB_D (reduc_umin_optab, "reduc_umin_$a")
|
||||
OPTAB_D (reduc_uplus_optab, "reduc_uplus_$a")
|
||||
|
||||
OPTAB_D (sdot_prod_optab, "sdot_prod$I$a")
|
||||
OPTAB_D (ssum_widen_optab, "widen_ssum$I$a3")
|
||||
OPTAB_D (udot_prod_optab, "udot_prod$I$a")
|
||||
|
|
|
@ -162,6 +162,11 @@ enum optab_subtype
|
|||
vector shifts and rotates */
|
||||
extern optab optab_for_tree_code (enum tree_code, const_tree, enum optab_subtype);
|
||||
|
||||
/* Given an optab that reduces a vector to a scalar, find instead the old
|
||||
optab that produces a vector with the reduction result in one element,
|
||||
for a tree with the specified type. */
|
||||
extern optab scalar_reduc_to_vector (optab, const_tree type);
|
||||
|
||||
/* The various uses that a comparison can have; used by can_compare_p:
|
||||
jumps, conditional moves, store flag operations. */
|
||||
enum can_compare_purpose
|
||||
|
|
|
@ -5113,15 +5113,17 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
|
|||
|
||||
epilog_reduc_code = ERROR_MARK;
|
||||
}
|
||||
|
||||
if (reduc_optab
|
||||
&& optab_handler (reduc_optab, vec_mode) == CODE_FOR_nothing)
|
||||
else if (optab_handler (reduc_optab, vec_mode) == CODE_FOR_nothing)
|
||||
{
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
||||
"reduc op not supported by target.\n");
|
||||
optab = scalar_reduc_to_vector (reduc_optab, vectype_out);
|
||||
if (optab_handler (optab, vec_mode) == CODE_FOR_nothing)
|
||||
{
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
||||
"reduc op not supported by target.\n");
|
||||
|
||||
epilog_reduc_code = ERROR_MARK;
|
||||
epilog_reduc_code = ERROR_MARK;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
|
|
Loading…
Add table
Reference in a new issue