re PR target/86753 (gcc.target/aarch64/sve/vcond_[45].c fail after recent combine patch)
2019-10-18 Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org> Richard Sandiford <richard.sandiford@arm.com> PR target/86753 * tree-vectorizer.h (scalar_cond_masked_key): New struct, and define hashmap traits for it. (loop_vec_info::scalar_cond_masked_set): New member. (vect_record_loop_mask): Adjust prototype. * tree-vectorizer.c (scalar_cond_masked_key::get_cond_ops_from_tree): Implement method. * tree-vect-loop.c (vectorizable_reduction): Pass NULL as last arg to vect_record_loop_mask. (vectorizable_live_operation): Likewise. (vect_record_loop_mask): New param scalar_mask. Add entry cond, loop_mask to scalar_cond_masked_set if scalar_mask is non NULL. * tree-vect-stmts.c (check_load_store_masking): New param scalar_mask. Pass it as last arg to vect_record_loop_mask. (vectorizable_call): Pass scalar_mask as last arg to vect_record_loop_mask. (vectorizable_store): Likewise. (vectorizable_load): Likewise. (vectorizable_condition): Check if another part of vectorized code applies loop_mask to condition or to it's inverse, and if yes, apply loop_mask to result of vector comparison. testsuite/ * gcc.target/aarch64/sve/cond_cnot_2.c: Remove XFAIL from { scan-assembler-not {\tsel\t}. * gcc.target/aarch64/sve/cond_convert_1.c: Adjust to make only one load conditional. * gcc.target/aarch64/sve/cond_convert_4.c: Likewise. * gcc.target/aarch64/sve/cond_unary_2.c: Likewise. * gcc.target/aarch64/sve/vcond_4.c: Remove XFAIL's. * gcc.target/aarch64/sve/vcond_5.c: Likewise. Co-Authored-By: Richard Sandiford <richard.sandiford@arm.com> From-SVN: r277141
This commit is contained in:
parent
4aa255f525
commit
cc1facefe3
12 changed files with 281 additions and 43 deletions
|
@ -1,3 +1,28 @@
|
|||
2019-10-18 Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org>
|
||||
Richard Sandiford <richard.sandiford@arm.com>
|
||||
|
||||
PR target/86753
|
||||
* tree-vectorizer.h (scalar_cond_masked_key): New struct,
|
||||
and define hashmap traits for it.
|
||||
(loop_vec_info::scalar_cond_masked_set): New member.
|
||||
(vect_record_loop_mask): Adjust prototype.
|
||||
* tree-vectorizer.c (scalar_cond_masked_key::get_cond_ops_from_tree):
|
||||
Implement method.
|
||||
* tree-vect-loop.c (vectorizable_reduction): Pass NULL as last arg to
|
||||
vect_record_loop_mask.
|
||||
(vectorizable_live_operation): Likewise.
|
||||
(vect_record_loop_mask): New param scalar_mask. Add entry
|
||||
cond, loop_mask to scalar_cond_masked_set if scalar_mask is non NULL.
|
||||
* tree-vect-stmts.c (check_load_store_masking): New param scalar_mask.
|
||||
Pass it as last arg to vect_record_loop_mask.
|
||||
(vectorizable_call): Pass scalar_mask as last arg to
|
||||
vect_record_loop_mask.
|
||||
(vectorizable_store): Likewise.
|
||||
(vectorizable_load): Likewise.
|
||||
(vectorizable_condition): Check if another part of vectorized code
|
||||
applies loop_mask to condition or to it's inverse, and if yes,
|
||||
apply loop_mask to result of vector comparison.
|
||||
|
||||
2019-10-17 John David Anglin <danglin@gcc.gnu.org>
|
||||
|
||||
* config/pa/pa.c (pa_output_indirect_call): Fix typos in last change.
|
||||
|
|
|
@ -1,3 +1,16 @@
|
|||
2019-10-18 Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org>
|
||||
Richard Sandiford <richard.sandiford@arm.com>
|
||||
|
||||
PR target/86753
|
||||
* gcc.target/aarch64/sve/cond_cnot_2.c: Remove XFAIL
|
||||
from { scan-assembler-not {\tsel\t}.
|
||||
* gcc.target/aarch64/sve/cond_convert_1.c: Adjust to make
|
||||
only one load conditional.
|
||||
* gcc.target/aarch64/sve/cond_convert_4.c: Likewise.
|
||||
* gcc.target/aarch64/sve/cond_unary_2.c: Likewise.
|
||||
* gcc.target/aarch64/sve/vcond_4.c: Remove XFAIL's.
|
||||
* gcc.target/aarch64/sve/vcond_5.c: Likewise.
|
||||
|
||||
2019-10-18 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR tree-optimization/92056
|
||||
|
|
|
@ -32,4 +32,4 @@ TEST_ALL (DEF_LOOP)
|
|||
/* { dg-final { scan-assembler-not {\tmov\tz} } } */
|
||||
/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */
|
||||
/* Currently we canonicalize the ?: so that !b[i] is the "false" value. */
|
||||
/* { dg-final { scan-assembler-not {\tsel\t} { xfail *-*-* } } } */
|
||||
/* { dg-final { scan-assembler-not {\tsel\t} } } */
|
||||
|
|
|
@ -11,7 +11,10 @@
|
|||
INT_TYPE *__restrict pred, int n) \
|
||||
{ \
|
||||
for (int i = 0; i < n; ++i) \
|
||||
r[i] = pred[i] ? (FLOAT_TYPE) a[i] : b[i]; \
|
||||
{ \
|
||||
FLOAT_TYPE bi = b[i]; \
|
||||
r[i] = pred[i] ? (FLOAT_TYPE) a[i] : bi; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define TEST_ALL(T) \
|
||||
|
|
|
@ -11,7 +11,10 @@
|
|||
INT_TYPE *__restrict pred, int n) \
|
||||
{ \
|
||||
for (int i = 0; i < n; ++i) \
|
||||
r[i] = pred[i] ? (INT_TYPE) a[i] : b[i]; \
|
||||
{ \
|
||||
INT_TYPE bi = b[i]; \
|
||||
r[i] = pred[i] ? (INT_TYPE) a[i] : bi; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define TEST_ALL(T) \
|
||||
|
|
|
@ -13,7 +13,10 @@
|
|||
TYPE *__restrict pred, int n) \
|
||||
{ \
|
||||
for (int i = 0; i < n; ++i) \
|
||||
r[i] = pred[i] ? OP (a[i]) : b[i]; \
|
||||
{ \
|
||||
TYPE bi = b[i]; \
|
||||
r[i] = pred[i] ? OP (a[i]) : bi; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define TEST_INT_TYPE(T, TYPE) \
|
||||
|
|
|
@ -98,24 +98,24 @@ TEST_CMP (nugt)
|
|||
/* { dg-final { scan-assembler-times {\tfcmne\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 { xfail *-*-* } } } */
|
||||
|
||||
/* 5 for lt, 5 for ult and 5 for nult. */
|
||||
/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 15 { xfail *-*-* } } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 { xfail *-*-* } } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 15 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 } } */
|
||||
|
||||
/* 5 for le, 5 for ule and 5 for nule. */
|
||||
/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 15 { xfail *-*-* } } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 { xfail *-*-* } } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 15 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 } } */
|
||||
|
||||
/* 5 for gt, 5 for ugt and 5 for nugt. */
|
||||
/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 15 { xfail *-*-* } } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 { xfail *-*-* } } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 15 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 } } */
|
||||
|
||||
/* 5 for ge, 5 for uge and 5 for nuge. */
|
||||
/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 15 { xfail *-*-* } } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 { xfail *-*-* } } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 15 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-not {\tfcmuo\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} } } */
|
||||
/* 3 loops * 5 invocations for all 12 unordered comparisons. */
|
||||
/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 180 { xfail *-*-* } } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 180 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 7 { xfail *-*-* } } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 14 { xfail *-*-* } } } */
|
||||
|
@ -123,19 +123,19 @@ TEST_CMP (nugt)
|
|||
/* { dg-final { scan-assembler-times {\tfcmne\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 { xfail *-*-* } } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmne\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 { xfail *-*-* } } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 { xfail *-*-* } } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 { xfail *-*-* } } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 { xfail *-*-* } } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 { xfail *-*-* } } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 { xfail *-*-* } } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 { xfail *-*-* } } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 { xfail *-*-* } } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 { xfail *-*-* } } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-not {\tfcmuo\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} } } */
|
||||
/* 3 loops * 5 invocations, with 2 invocations having ncopies == 2,
|
||||
for all 12 unordered comparisons. */
|
||||
/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 252 { xfail *-*-* } } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 252 } } */
|
||||
|
|
|
@ -19,16 +19,16 @@
|
|||
/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s} 40 { xfail *-*-* } } } */
|
||||
|
||||
/* 5 for le, 5 for ule and 5 for nule. */
|
||||
/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0} 15 { xfail *-*-* } } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s} 30 { xfail *-*-* } } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0} 15 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s} 30 } } */
|
||||
|
||||
/* 5 for gt, 5 for ugt, 5 for nueq and 5 for nugt. */
|
||||
/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0} 20 { xfail *-*-* } } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s} 40 { xfail *-*-* } } } */
|
||||
|
||||
/* 5 for ge, 5 for uge and 5 for nuge. */
|
||||
/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0} 15 { xfail *-*-* } } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s} 30 { xfail *-*-* } } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0} 15 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s} 30 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-not {\tfcmuo\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0} } } */
|
||||
/* 3 loops * 5 invocations for ordered, unordered amd ueq. */
|
||||
|
@ -43,14 +43,14 @@
|
|||
/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0} 28 { xfail *-*-* } } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d} 56 { xfail *-*-* } } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0} 21 { xfail *-*-* } } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d} 42 { xfail *-*-* } } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0} 21 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d} 42 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0} 28 { xfail *-*-* } } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d} 56 { xfail *-*-* } } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0} 21 { xfail *-*-* } } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d} 42 { xfail *-*-* } } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0} 21 } } */
|
||||
/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d} 42 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-not {\tfcmuo\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0} } } */
|
||||
/* 3 loops * 5 invocations, with 2 invocations having ncopies == 2,
|
||||
|
|
|
@ -6330,7 +6330,7 @@ vectorizable_reduction (stmt_vec_info stmt_info, slp_tree slp_node,
|
|||
}
|
||||
else
|
||||
vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
|
||||
vectype_in);
|
||||
vectype_in, NULL);
|
||||
}
|
||||
if (dump_enabled_p ()
|
||||
&& reduction_type == FOLD_LEFT_REDUCTION)
|
||||
|
@ -7561,7 +7561,7 @@ vectorizable_live_operation (stmt_vec_info stmt_info,
|
|||
gcc_assert (ncopies == 1 && !slp_node);
|
||||
vect_record_loop_mask (loop_vinfo,
|
||||
&LOOP_VINFO_MASKS (loop_vinfo),
|
||||
1, vectype);
|
||||
1, vectype, NULL);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
|
@ -7760,11 +7760,12 @@ vect_double_mask_nunits (tree type)
|
|||
|
||||
/* Record that a fully-masked version of LOOP_VINFO would need MASKS to
|
||||
contain a sequence of NVECTORS masks that each control a vector of type
|
||||
VECTYPE. */
|
||||
VECTYPE. If SCALAR_MASK is nonnull, the fully-masked loop would AND
|
||||
these vector masks with the vector version of SCALAR_MASK. */
|
||||
|
||||
void
|
||||
vect_record_loop_mask (loop_vec_info loop_vinfo, vec_loop_masks *masks,
|
||||
unsigned int nvectors, tree vectype)
|
||||
unsigned int nvectors, tree vectype, tree scalar_mask)
|
||||
{
|
||||
gcc_assert (nvectors != 0);
|
||||
if (masks->length () < nvectors)
|
||||
|
@ -7775,6 +7776,13 @@ vect_record_loop_mask (loop_vec_info loop_vinfo, vec_loop_masks *masks,
|
|||
unsigned int nscalars_per_iter
|
||||
= exact_div (nvectors * TYPE_VECTOR_SUBPARTS (vectype),
|
||||
LOOP_VINFO_VECT_FACTOR (loop_vinfo)).to_constant ();
|
||||
|
||||
if (scalar_mask)
|
||||
{
|
||||
scalar_cond_masked_key cond (scalar_mask, nvectors);
|
||||
loop_vinfo->scalar_cond_masked_set.add (cond);
|
||||
}
|
||||
|
||||
if (rgm->max_nscalars_per_iter < nscalars_per_iter)
|
||||
{
|
||||
rgm->max_nscalars_per_iter = nscalars_per_iter;
|
||||
|
|
|
@ -1879,7 +1879,8 @@ static tree permute_vec_elements (tree, tree, tree, stmt_vec_info,
|
|||
says how the load or store is going to be implemented and GROUP_SIZE
|
||||
is the number of load or store statements in the containing group.
|
||||
If the access is a gather load or scatter store, GS_INFO describes
|
||||
its arguments.
|
||||
its arguments. If the load or store is conditional, SCALAR_MASK is the
|
||||
condition under which it occurs.
|
||||
|
||||
Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
|
||||
supported, otherwise record the required mask types. */
|
||||
|
@ -1888,7 +1889,7 @@ static void
|
|||
check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
|
||||
vec_load_store_type vls_type, int group_size,
|
||||
vect_memory_access_type memory_access_type,
|
||||
gather_scatter_info *gs_info)
|
||||
gather_scatter_info *gs_info, tree scalar_mask)
|
||||
{
|
||||
/* Invariant loads need no special support. */
|
||||
if (memory_access_type == VMAT_INVARIANT)
|
||||
|
@ -1912,7 +1913,7 @@ check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
|
|||
return;
|
||||
}
|
||||
unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
|
||||
vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
|
||||
vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -1936,7 +1937,7 @@ check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
|
|||
return;
|
||||
}
|
||||
unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
|
||||
vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
|
||||
vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -1974,7 +1975,7 @@ check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
|
|||
poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
|
||||
unsigned int nvectors;
|
||||
if (can_div_away_from_zero_p (group_size * vf, nunits, &nvectors))
|
||||
vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype);
|
||||
vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
|
||||
else
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
@ -3436,7 +3437,9 @@ vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|||
unsigned int nvectors = (slp_node
|
||||
? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
|
||||
: ncopies);
|
||||
vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype_out);
|
||||
tree scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
|
||||
vect_record_loop_mask (loop_vinfo, masks, nvectors,
|
||||
vectype_out, scalar_mask);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
@ -7390,7 +7393,7 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|||
if (loop_vinfo
|
||||
&& LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
|
||||
check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
|
||||
memory_access_type, &gs_info);
|
||||
memory_access_type, &gs_info, mask);
|
||||
|
||||
STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
|
||||
vect_model_store_cost (stmt_info, ncopies, rhs_dt, memory_access_type,
|
||||
|
@ -8637,7 +8640,7 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|||
if (loop_vinfo
|
||||
&& LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
|
||||
check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
|
||||
memory_access_type, &gs_info);
|
||||
memory_access_type, &gs_info, mask);
|
||||
|
||||
STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
|
||||
vect_model_load_cost (stmt_info, ncopies, memory_access_type,
|
||||
|
@ -10007,6 +10010,35 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|||
/* Handle cond expr. */
|
||||
for (j = 0; j < ncopies; j++)
|
||||
{
|
||||
tree loop_mask = NULL_TREE;
|
||||
bool swap_cond_operands = false;
|
||||
|
||||
/* See whether another part of the vectorized code applies a loop
|
||||
mask to the condition, or to its inverse. */
|
||||
|
||||
if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
|
||||
{
|
||||
scalar_cond_masked_key cond (cond_expr, ncopies);
|
||||
if (loop_vinfo->scalar_cond_masked_set.contains (cond))
|
||||
{
|
||||
vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
|
||||
loop_mask = vect_get_loop_mask (gsi, masks, ncopies, vectype, j);
|
||||
}
|
||||
else
|
||||
{
|
||||
bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
|
||||
cond.code = invert_tree_comparison (cond.code, honor_nans);
|
||||
if (loop_vinfo->scalar_cond_masked_set.contains (cond))
|
||||
{
|
||||
vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
|
||||
loop_mask = vect_get_loop_mask (gsi, masks, ncopies,
|
||||
vectype, j);
|
||||
cond_code = cond.code;
|
||||
swap_cond_operands = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stmt_vec_info new_stmt_info = NULL;
|
||||
if (j == 0)
|
||||
{
|
||||
|
@ -10084,6 +10116,9 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|||
vec_then_clause = vec_oprnds2[i];
|
||||
vec_else_clause = vec_oprnds3[i];
|
||||
|
||||
if (swap_cond_operands)
|
||||
std::swap (vec_then_clause, vec_else_clause);
|
||||
|
||||
if (masked)
|
||||
vec_compare = vec_cond_lhs;
|
||||
else
|
||||
|
@ -10122,6 +10157,50 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* If we decided to apply a loop mask to the result of the vector
|
||||
comparison, AND the comparison with the mask now. Later passes
|
||||
should then be able to reuse the AND results between mulitple
|
||||
vector statements.
|
||||
|
||||
For example:
|
||||
for (int i = 0; i < 100; ++i)
|
||||
x[i] = y[i] ? z[i] : 10;
|
||||
|
||||
results in following optimized GIMPLE:
|
||||
|
||||
mask__35.8_43 = vect__4.7_41 != { 0, ... };
|
||||
vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
|
||||
_19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
|
||||
vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
|
||||
vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
|
||||
vect_iftmp.11_47, { 10, ... }>;
|
||||
|
||||
instead of using a masked and unmasked forms of
|
||||
vec != { 0, ... } (masked in the MASK_LOAD,
|
||||
unmasked in the VEC_COND_EXPR). */
|
||||
|
||||
if (loop_mask)
|
||||
{
|
||||
if (COMPARISON_CLASS_P (vec_compare))
|
||||
{
|
||||
tree tmp = make_ssa_name (vec_cmp_type);
|
||||
tree op0 = TREE_OPERAND (vec_compare, 0);
|
||||
tree op1 = TREE_OPERAND (vec_compare, 1);
|
||||
gassign *g = gimple_build_assign (tmp,
|
||||
TREE_CODE (vec_compare),
|
||||
op0, op1);
|
||||
vect_finish_stmt_generation (stmt_info, g, gsi);
|
||||
vec_compare = tmp;
|
||||
}
|
||||
|
||||
tree tmp2 = make_ssa_name (vec_cmp_type);
|
||||
gassign *g = gimple_build_assign (tmp2, BIT_AND_EXPR,
|
||||
vec_compare, loop_mask);
|
||||
vect_finish_stmt_generation (stmt_info, g, gsi);
|
||||
vec_compare = tmp2;
|
||||
}
|
||||
|
||||
if (reduction_type == EXTRACT_LAST_REDUCTION)
|
||||
{
|
||||
if (!is_gimple_val (vec_compare))
|
||||
|
|
|
@ -1515,3 +1515,36 @@ make_pass_ipa_increase_alignment (gcc::context *ctxt)
|
|||
{
|
||||
return new pass_ipa_increase_alignment (ctxt);
|
||||
}
|
||||
|
||||
/* If the condition represented by T is a comparison or the SSA name
|
||||
result of a comparison, extract the comparison's operands. Represent
|
||||
T as NE_EXPR <T, 0> otherwise. */
|
||||
|
||||
void
|
||||
scalar_cond_masked_key::get_cond_ops_from_tree (tree t)
|
||||
{
|
||||
if (TREE_CODE_CLASS (TREE_CODE (t)) == tcc_comparison)
|
||||
{
|
||||
this->code = TREE_CODE (t);
|
||||
this->op0 = TREE_OPERAND (t, 0);
|
||||
this->op1 = TREE_OPERAND (t, 1);
|
||||
return;
|
||||
}
|
||||
|
||||
if (TREE_CODE (t) == SSA_NAME)
|
||||
if (gassign *stmt = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (t)))
|
||||
{
|
||||
tree_code code = gimple_assign_rhs_code (stmt);
|
||||
if (TREE_CODE_CLASS (code) == tcc_comparison)
|
||||
{
|
||||
this->code = code;
|
||||
this->op0 = gimple_assign_rhs1 (stmt);
|
||||
this->op1 = gimple_assign_rhs2 (stmt);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
this->code = NE_EXPR;
|
||||
this->op0 = t;
|
||||
this->op1 = build_zero_cst (TREE_TYPE (t));
|
||||
}
|
||||
|
|
|
@ -177,7 +177,75 @@ public:
|
|||
#define SLP_TREE_TWO_OPERATORS(S) (S)->two_operators
|
||||
#define SLP_TREE_DEF_TYPE(S) (S)->def_type
|
||||
|
||||
/* Key for map that records association between
|
||||
scalar conditions and corresponding loop mask, and
|
||||
is populated by vect_record_loop_mask. */
|
||||
|
||||
struct scalar_cond_masked_key
|
||||
{
|
||||
scalar_cond_masked_key (tree t, unsigned ncopies_)
|
||||
: ncopies (ncopies_)
|
||||
{
|
||||
get_cond_ops_from_tree (t);
|
||||
}
|
||||
|
||||
void get_cond_ops_from_tree (tree);
|
||||
|
||||
unsigned ncopies;
|
||||
tree_code code;
|
||||
tree op0;
|
||||
tree op1;
|
||||
};
|
||||
|
||||
template<>
|
||||
struct default_hash_traits<scalar_cond_masked_key>
|
||||
{
|
||||
typedef scalar_cond_masked_key compare_type;
|
||||
typedef scalar_cond_masked_key value_type;
|
||||
|
||||
static inline hashval_t
|
||||
hash (value_type v)
|
||||
{
|
||||
inchash::hash h;
|
||||
h.add_int (v.code);
|
||||
inchash::add_expr (v.op0, h, 0);
|
||||
inchash::add_expr (v.op1, h, 0);
|
||||
h.add_int (v.ncopies);
|
||||
return h.end ();
|
||||
}
|
||||
|
||||
static inline bool
|
||||
equal (value_type existing, value_type candidate)
|
||||
{
|
||||
return (existing.ncopies == candidate.ncopies
|
||||
&& existing.code == candidate.code
|
||||
&& operand_equal_p (existing.op0, candidate.op0, 0)
|
||||
&& operand_equal_p (existing.op1, candidate.op1, 0));
|
||||
}
|
||||
|
||||
static inline void
|
||||
mark_empty (value_type &v)
|
||||
{
|
||||
v.ncopies = 0;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
is_empty (value_type v)
|
||||
{
|
||||
return v.ncopies == 0;
|
||||
}
|
||||
|
||||
static inline void mark_deleted (value_type &) {}
|
||||
|
||||
static inline bool is_deleted (const value_type &)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline void remove (value_type &) {}
|
||||
};
|
||||
|
||||
typedef hash_set<scalar_cond_masked_key> scalar_cond_masked_set_type;
|
||||
|
||||
/* Describes two objects whose addresses must be unequal for the vectorized
|
||||
loop to be valid. */
|
||||
|
@ -426,6 +494,9 @@ public:
|
|||
on inactive scalars. */
|
||||
vec_loop_masks masks;
|
||||
|
||||
/* Set of scalar conditions that have loop mask applied. */
|
||||
scalar_cond_masked_set_type scalar_cond_masked_set;
|
||||
|
||||
/* If we are using a loop mask to align memory addresses, this variable
|
||||
contains the number of vector elements that we should skip in the
|
||||
first iteration of the vector loop (i.e. the number of leading
|
||||
|
@ -1637,7 +1708,7 @@ extern void vect_gen_vector_loop_niters (loop_vec_info, tree, tree *,
|
|||
extern tree vect_halve_mask_nunits (tree);
|
||||
extern tree vect_double_mask_nunits (tree);
|
||||
extern void vect_record_loop_mask (loop_vec_info, vec_loop_masks *,
|
||||
unsigned int, tree);
|
||||
unsigned int, tree, tree);
|
||||
extern tree vect_get_loop_mask (gimple_stmt_iterator *, vec_loop_masks *,
|
||||
unsigned int, tree, unsigned int);
|
||||
extern stmt_vec_info info_for_reduction (stmt_vec_info);
|
||||
|
|
Loading…
Add table
Reference in a new issue