Allow different vector types for stmt groups

This allows vectorization (in practice non-loop vectorization) to
have a stmt participate in different vector type vectorizations.
It allows us to remove vect_update_shared_vectype and replace it
by pushing/popping STMT_VINFO_VECTYPE from SLP_TREE_VECTYPE around
vect_analyze_stmt and vect_transform_stmt.

For data-ref the situation is a bit more complicated since we
analyze alignment info with a specific vector type in mind which
doesn't play well when that changes.

So the bulk of the change is passing down the actual vector type
used for a vectorized access to the various accessors of alignment
info, first and foremost dr_misalignment but also aligned_access_p,
known_alignment_for_access_p, vect_known_alignment_in_bytes and
vect_supportable_dr_alignment.  I took the liberty to replace
ALL_CAPS macro accessors with the lower-case function invocations.

The actual changes to the behavior are in dr_misalignment which now
is the place factoring in the negative step adjustment as well as
handling alignment queries for a vector type with bigger alignment
requirements than what we can (or have) analyze(d).

vect_slp_analyze_node_alignment makes use of this and upon receiving
a vector type with a bigger alingment desire re-analyzes the DR
with respect to it but keeps an older more precise result if possible.
In this context it might be possible to do the analysis just once
but instead of analyzing with respect to a specific desired alignment
look for the biggest alignment we can compute a not unknown alignment.

The ChangeLog includes the functional changes but not the bulk due
to the alignment accessor API changes - I hope that's something good.

2021-09-17  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/97351
	PR tree-optimization/97352
	PR tree-optimization/82426
	* tree-vectorizer.h (dr_misalignment): Add vector type
	argument.
	(aligned_access_p): Likewise.
	(known_alignment_for_access_p): Likewise.
	(vect_supportable_dr_alignment): Likewise.
	(vect_known_alignment_in_bytes): Likewise.  Refactor.
	(DR_MISALIGNMENT): Remove.
	(vect_update_shared_vectype): Likewise.
	* tree-vect-data-refs.c (dr_misalignment): Refactor, handle
	a vector type with larger alignment requirement and apply
	the negative step adjustment here.
	(vect_calculate_target_alignment): Remove.
	(vect_compute_data_ref_alignment): Get explicit vector type
	argument, do not apply a negative step alignment adjustment
	here.
	(vect_slp_analyze_node_alignment): Re-analyze alignment
	when we re-visit the DR with a bigger desired alignment but
	keep more precise results from smaller alignments.
	* tree-vect-slp.c (vect_update_shared_vectype): Remove.
	(vect_slp_analyze_node_operations_1): Do not update the
	shared vector type on stmts.
	* tree-vect-stmts.c (vect_analyze_stmt): Push/pop the
	vector type of an SLP node to the representative stmt-info.
	(vect_transform_stmt): Likewise.

	* gcc.target/i386/vect-pr82426.c: New testcase.
	* gcc.target/i386/vect-pr97352.c: Likewise.
This commit is contained in:
Richard Biener 2020-11-18 09:36:57 +01:00
parent e7b8d70200
commit 6390c5047a
6 changed files with 235 additions and 213 deletions

View file

@ -0,0 +1,31 @@
/* i?86 does not have V2SF, x32 does though. */
/* { dg-do compile { target { ! ia32 } } } */
/* { dg-options "-O3 -mavx -mfma" } */
struct Matrix
{
float m11;
float m12;
float m21;
float m22;
float dx;
float dy;
};
struct Matrix multiply(const struct Matrix *a, const struct Matrix *b)
{
struct Matrix out;
out.m11 = a->m11*b->m11 + a->m12*b->m21;
out.m12 = a->m11*b->m12 + a->m12*b->m22;
out.m21 = a->m21*b->m11 + a->m22*b->m21;
out.m22 = a->m21*b->m12 + a->m22*b->m22;
out.dx = a->dx*b->m11 + a->dy*b->m21 + b->dx;
out.dy = a->dx*b->m12 + a->dy*b->m22 + b->dy;
return out;
}
/* The whole kernel should be vectorized with V4SF and V2SF operations. */
/* { dg-final { scan-assembler-times "vadd" 1 } } */
/* { dg-final { scan-assembler-times "vmul" 2 } } */
/* { dg-final { scan-assembler-times "vfma" 2 } } */

View file

@ -0,0 +1,22 @@
/* { dg-do compile } */
/* { dg-options "-O3 -mavx" } */
double x[2], a[4], b[4], c[5];
void foo ()
{
a[0] = c[0];
a[1] = c[1];
a[2] = c[0];
a[3] = c[1];
b[0] = c[2];
b[1] = c[3];
b[2] = c[2];
b[3] = c[3];
x[0] = c[4];
x[1] = c[4];
}
/* We should vectorize all three stores and the load from c apart
from c[4] which should be duped. */
/* { dg-final { scan-assembler-times "vmov.pd" 4 } } */

View file

@ -887,37 +887,53 @@ vect_slp_analyze_instance_dependence (vec_info *vinfo, slp_instance instance)
return res;
}
/* Return the misalignment of DR_INFO. */
/* Return the misalignment of DR_INFO accessed in VECTYPE. */
int
dr_misalignment (dr_vec_info *dr_info)
dr_misalignment (dr_vec_info *dr_info, tree vectype)
{
HOST_WIDE_INT diff = 0;
/* Alignment is only analyzed for the first element of a DR group,
use that but adjust misalignment by the offset of the access. */
if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt))
{
dr_vec_info *first_dr
= STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info->stmt));
int misalign = first_dr->misalignment;
gcc_assert (misalign != DR_MISALIGNMENT_UNINITIALIZED);
if (misalign == DR_MISALIGNMENT_UNKNOWN)
return misalign;
/* vect_analyze_data_ref_accesses guarantees that DR_INIT are
INTEGER_CSTs and the first element in the group has the lowest
address. Likewise vect_compute_data_ref_alignment will
have ensured that target_alignment is constant and otherwise
set misalign to DR_MISALIGNMENT_UNKNOWN. */
HOST_WIDE_INT diff = (TREE_INT_CST_LOW (DR_INIT (dr_info->dr))
- TREE_INT_CST_LOW (DR_INIT (first_dr->dr)));
address. */
diff = (TREE_INT_CST_LOW (DR_INIT (dr_info->dr))
- TREE_INT_CST_LOW (DR_INIT (first_dr->dr)));
gcc_assert (diff >= 0);
unsigned HOST_WIDE_INT target_alignment_c
= first_dr->target_alignment.to_constant ();
return (misalign + diff) % target_alignment_c;
}
else
{
int misalign = dr_info->misalignment;
gcc_assert (misalign != DR_MISALIGNMENT_UNINITIALIZED);
return misalign;
dr_info = first_dr;
}
int misalign = dr_info->misalignment;
gcc_assert (misalign != DR_MISALIGNMENT_UNINITIALIZED);
if (misalign == DR_MISALIGNMENT_UNKNOWN)
return misalign;
/* If the access is only aligned for a vector type with smaller alignment
requirement the access has unknown misalignment. */
if (maybe_lt (dr_info->target_alignment * BITS_PER_UNIT,
targetm.vectorize.preferred_vector_alignment (vectype)))
return DR_MISALIGNMENT_UNKNOWN;
/* If this is a backward running DR then first access in the larger
vectype actually is N-1 elements before the address in the DR.
Adjust misalign accordingly. */
poly_int64 misalignment = misalign + diff;
if (tree_int_cst_sgn (DR_STEP (dr_info->dr)) < 0)
misalignment += ((TYPE_VECTOR_SUBPARTS (vectype) - 1)
* -TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
/* vect_compute_data_ref_alignment will have ensured that target_alignment
is constant and otherwise set misalign to DR_MISALIGNMENT_UNKNOWN. */
unsigned HOST_WIDE_INT target_alignment_c
= dr_info->target_alignment.to_constant ();
if (!known_misalignment (misalignment, target_alignment_c, &misalign))
return DR_MISALIGNMENT_UNKNOWN;
return misalign;
}
/* Record the base alignment guarantee given by DRB, which occurs
@ -978,34 +994,26 @@ vect_record_base_alignments (vec_info *vinfo)
}
}
/* Return the target alignment for the vectorized form of DR_INFO. */
static poly_uint64
vect_calculate_target_alignment (dr_vec_info *dr_info)
{
tree vectype = STMT_VINFO_VECTYPE (dr_info->stmt);
return targetm.vectorize.preferred_vector_alignment (vectype);
}
/* Function vect_compute_data_ref_alignment
Compute the misalignment of the data reference DR_INFO.
Compute the misalignment of the data reference DR_INFO when vectorizing
with VECTYPE.
Output:
1. DR_MISALIGNMENT (DR_INFO) is defined.
1. initialized misalignment info for DR_INFO
FOR NOW: No analysis is actually performed. Misalignment is calculated
only for trivial cases. TODO. */
static void
vect_compute_data_ref_alignment (vec_info *vinfo, dr_vec_info *dr_info)
vect_compute_data_ref_alignment (vec_info *vinfo, dr_vec_info *dr_info,
tree vectype)
{
stmt_vec_info stmt_info = dr_info->stmt;
vec_base_alignments *base_alignments = &vinfo->base_alignments;
loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
class loop *loop = NULL;
tree ref = DR_REF (dr_info->dr);
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
@ -1024,7 +1032,8 @@ vect_compute_data_ref_alignment (vec_info *vinfo, dr_vec_info *dr_info)
bool step_preserves_misalignment_p;
poly_uint64 vector_alignment
= exact_div (vect_calculate_target_alignment (dr_info), BITS_PER_UNIT);
= exact_div (targetm.vectorize.preferred_vector_alignment (vectype),
BITS_PER_UNIT);
SET_DR_TARGET_ALIGNMENT (dr_info, vector_alignment);
/* If the main loop has peeled for alignment we have no way of knowing
@ -1147,14 +1156,6 @@ vect_compute_data_ref_alignment (vec_info *vinfo, dr_vec_info *dr_info)
poly_int64 misalignment
= base_misalignment + wi::to_poly_offset (drb->init).force_shwi ();
/* If this is a backward running DR then first access in the larger
vectype actually is N-1 elements before the address in the DR.
Adjust misalign accordingly. */
if (tree_int_cst_sgn (drb->step) < 0)
/* PLUS because STEP is negative. */
misalignment += ((TYPE_VECTOR_SUBPARTS (vectype) - 1)
* -TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
unsigned int const_misalignment;
if (!known_misalignment (misalignment, vect_align_c, &const_misalignment))
{
@ -1169,7 +1170,7 @@ vect_compute_data_ref_alignment (vec_info *vinfo, dr_vec_info *dr_info)
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"misalign = %d bytes of ref %T\n",
DR_MISALIGNMENT (dr_info), ref);
const_misalignment, ref);
return;
}
@ -1237,14 +1238,15 @@ vect_update_misalignment_for_peel (dr_vec_info *dr_info,
}
unsigned HOST_WIDE_INT alignment;
tree vectype = STMT_VINFO_VECTYPE (dr_info->stmt);
if (DR_TARGET_ALIGNMENT (dr_info).is_constant (&alignment)
&& known_alignment_for_access_p (dr_info)
&& known_alignment_for_access_p (dr_peel_info))
&& known_alignment_for_access_p (dr_info, vectype)
&& known_alignment_for_access_p (dr_peel_info, vectype))
{
int misal = DR_MISALIGNMENT (dr_info);
int misal = dr_misalignment (dr_info, vectype);
misal += npeel * TREE_INT_CST_LOW (DR_STEP (dr_info->dr));
misal &= alignment - 1;
SET_DR_MISALIGNMENT (dr_info, misal);
set_dr_misalignment (dr_info, misal);
return;
}
@ -1316,13 +1318,13 @@ vector_alignment_reachable_p (dr_vec_info *dr_info)
int elem_size, mis_in_elements;
/* FORNOW: handle only known alignment. */
if (!known_alignment_for_access_p (dr_info))
if (!known_alignment_for_access_p (dr_info, vectype))
return false;
poly_uint64 nelements = TYPE_VECTOR_SUBPARTS (vectype);
poly_uint64 vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
elem_size = vector_element_size (vector_size, nelements);
mis_in_elements = DR_MISALIGNMENT (dr_info) / elem_size;
mis_in_elements = dr_misalignment (dr_info, vectype) / elem_size;
if (!multiple_p (nelements - mis_in_elements, DR_GROUP_SIZE (stmt_info)))
return false;
@ -1330,7 +1332,8 @@ vector_alignment_reachable_p (dr_vec_info *dr_info)
/* If misalignment is known at the compile time then allow peeling
only if natural alignment is reachable through peeling. */
if (known_alignment_for_access_p (dr_info) && !aligned_access_p (dr_info))
if (known_alignment_for_access_p (dr_info, vectype)
&& !aligned_access_p (dr_info, vectype))
{
HOST_WIDE_INT elmsize =
int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
@ -1338,9 +1341,9 @@ vector_alignment_reachable_p (dr_vec_info *dr_info)
{
dump_printf_loc (MSG_NOTE, vect_location,
"data size = %wd. misalignment = %d.\n", elmsize,
DR_MISALIGNMENT (dr_info));
dr_misalignment (dr_info, vectype));
}
if (DR_MISALIGNMENT (dr_info) % elmsize)
if (dr_misalignment (dr_info, vectype) % elmsize)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@ -1349,7 +1352,7 @@ vector_alignment_reachable_p (dr_vec_info *dr_info)
}
}
if (!known_alignment_for_access_p (dr_info))
if (!known_alignment_for_access_p (dr_info, vectype))
{
tree type = TREE_TYPE (DR_REF (dr_info->dr));
bool is_packed = not_size_aligned (DR_REF (dr_info->dr));
@ -1441,8 +1444,9 @@ vect_peeling_hash_insert (hash_table<peel_info_hasher> *peeling_htab,
{
struct _vect_peel_info elem, *slot;
_vect_peel_info **new_slot;
tree vectype = STMT_VINFO_VECTYPE (dr_info->stmt);
bool supportable_dr_alignment
= vect_supportable_dr_alignment (loop_vinfo, dr_info, true);
= vect_supportable_dr_alignment (loop_vinfo, dr_info, vectype, true);
elem.npeel = npeel;
slot = peeling_htab->find (&elem);
@ -1508,7 +1512,7 @@ vect_get_peeling_costs_all_drs (loop_vec_info loop_vinfo,
continue;
int save_misalignment;
save_misalignment = DR_MISALIGNMENT (dr_info);
save_misalignment = dr_info->misalignment;
if (npeel == 0)
;
else if (unknown_misalignment && dr_info == dr0_info)
@ -1625,10 +1629,11 @@ vect_peeling_supportable (loop_vec_info loop_vinfo, dr_vec_info *dr0_info,
if (!vect_relevant_for_alignment_p (dr_info))
continue;
save_misalignment = DR_MISALIGNMENT (dr_info);
save_misalignment = dr_info->misalignment;
vect_update_misalignment_for_peel (dr_info, dr0_info, npeel);
tree vectype = STMT_VINFO_VECTYPE (dr_info->stmt);
supportable_dr_alignment
= vect_supportable_dr_alignment (loop_vinfo, dr_info, false);
= vect_supportable_dr_alignment (loop_vinfo, dr_info, vectype, false);
SET_DR_MISALIGNMENT (dr_info, save_misalignment);
if (!supportable_dr_alignment)
@ -1782,7 +1787,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
bool one_misalignment_unknown = false;
bool one_dr_unsupportable = false;
dr_vec_info *unsupportable_dr_info = NULL;
unsigned int mis, dr0_same_align_drs = 0, first_store_same_align_drs = 0;
unsigned int dr0_same_align_drs = 0, first_store_same_align_drs = 0;
hash_table<peel_info_hasher> peeling_htab (1);
DUMP_VECT_SCOPE ("vect_enhance_data_refs_alignment");
@ -1878,12 +1883,13 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
continue;
stmt_vec_info stmt_info = dr_info->stmt;
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
supportable_dr_alignment
= vect_supportable_dr_alignment (loop_vinfo, dr_info, true);
= vect_supportable_dr_alignment (loop_vinfo, dr_info, vectype, true);
do_peeling = vector_alignment_reachable_p (dr_info);
if (do_peeling)
{
if (known_alignment_for_access_p (dr_info))
if (known_alignment_for_access_p (dr_info, vectype))
{
unsigned int npeel_tmp = 0;
bool negative = tree_int_cst_compare (DR_STEP (dr),
@ -1896,10 +1902,9 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
unsigned int target_align =
DR_TARGET_ALIGNMENT (dr_info).to_constant ();
unsigned int dr_size = vect_get_scalar_dr_size (dr_info);
mis = (negative
? DR_MISALIGNMENT (dr_info)
: -DR_MISALIGNMENT (dr_info));
if (DR_MISALIGNMENT (dr_info) != 0)
unsigned int mis = dr_misalignment (dr_info, vectype);
mis = negative ? mis : -mis;
if (mis != 0)
npeel_tmp = (mis & (target_align - 1)) / dr_size;
/* For multiple types, it is possible that the bigger type access
@ -1982,7 +1987,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
}
else
{
if (!aligned_access_p (dr_info))
if (!aligned_access_p (dr_info, vectype))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@ -2152,7 +2157,8 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
if (do_peeling)
{
stmt_vec_info stmt_info = dr0_info->stmt;
if (known_alignment_for_access_p (dr0_info))
if (known_alignment_for_access_p (dr0_info,
STMT_VINFO_VECTYPE (stmt_info)))
{
bool negative = tree_int_cst_compare (DR_STEP (dr0_info->dr),
size_zero_node) < 0;
@ -2163,9 +2169,9 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
updating DR_MISALIGNMENT values. The peeling factor is the
vectorization factor minus the misalignment as an element
count. */
mis = (negative
? DR_MISALIGNMENT (dr0_info)
: -DR_MISALIGNMENT (dr0_info));
unsigned int mis
= dr_misalignment (dr0_info, STMT_VINFO_VECTYPE (stmt_info));
mis = negative ? mis : -mis;
/* If known_alignment_for_access_p then we have set
DR_MISALIGNMENT which is only done if we know it at compiler
time, so it is safe to assume target alignment is constant.
@ -2192,7 +2198,10 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
do_peeling = false;
/* Check if all datarefs are supportable and log. */
if (do_peeling && known_alignment_for_access_p (dr0_info) && npeel == 0)
if (do_peeling
&& npeel == 0
&& known_alignment_for_access_p (dr0_info,
STMT_VINFO_VECTYPE (stmt_info)))
return opt_result::success ();
/* Cost model #1 - honor --param vect-max-peeling-for-alignment. */
@ -2304,11 +2313,12 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
FOR_EACH_VEC_ELT (datarefs, i, dr)
{
dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
if (aligned_access_p (dr_info)
stmt_vec_info stmt_info = dr_info->stmt;
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
if (aligned_access_p (dr_info, vectype)
|| !vect_relevant_for_alignment_p (dr_info))
continue;
stmt_vec_info stmt_info = dr_info->stmt;
if (STMT_VINFO_STRIDED_P (stmt_info))
{
do_versioning = false;
@ -2316,14 +2326,11 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
}
supportable_dr_alignment
= vect_supportable_dr_alignment (loop_vinfo, dr_info, false);
= vect_supportable_dr_alignment (loop_vinfo, dr_info, vectype,
false);
if (!supportable_dr_alignment)
{
int mask;
tree vectype;
if (known_alignment_for_access_p (dr_info)
if (known_alignment_for_access_p (dr_info, vectype)
|| LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).length ()
>= (unsigned) param_vect_max_version_for_alignment_checks)
{
@ -2331,9 +2338,6 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
break;
}
vectype = STMT_VINFO_VECTYPE (stmt_info);
gcc_assert (vectype);
/* At present we don't support versioning for alignment
with variable VF, since there's no guarantee that the
VF is a power of two. We could relax this if we added
@ -2363,7 +2367,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
Construct the mask needed for this test. For example,
GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
mask must be 15 = 0xf. */
mask = size - 1;
int mask = size - 1;
/* FORNOW: use the same mask to test all potentially unaligned
references in the loop. */
@ -2444,7 +2448,8 @@ vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt)
&& DR_GROUP_FIRST_ELEMENT (dr_info->stmt) != dr_info->stmt)
continue;
vect_compute_data_ref_alignment (loop_vinfo, dr_info);
vect_compute_data_ref_alignment (loop_vinfo, dr_info,
STMT_VINFO_VECTYPE (dr_info->stmt));
}
}
@ -2460,21 +2465,30 @@ vect_slp_analyze_node_alignment (vec_info *vinfo, slp_tree node)
/* Alignment is maintained in the first element of the group. */
stmt_vec_info first_stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
first_stmt_info = DR_GROUP_FIRST_ELEMENT (first_stmt_info);
/* We need to commit to a vector type for the group now. */
if (is_a <bb_vec_info> (vinfo)
&& !vect_update_shared_vectype (first_stmt_info, SLP_TREE_VECTYPE (node)))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"desired vector type conflicts with earlier one "
"for %G", first_stmt_info->stmt);
return false;
}
dr_vec_info *dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
tree vectype = SLP_TREE_VECTYPE (node);
poly_uint64 vector_alignment
= exact_div (targetm.vectorize.preferred_vector_alignment (vectype),
BITS_PER_UNIT);
if (dr_info->misalignment == DR_MISALIGNMENT_UNINITIALIZED)
vect_compute_data_ref_alignment (vinfo, dr_info);
vect_compute_data_ref_alignment (vinfo, dr_info, SLP_TREE_VECTYPE (node));
/* Re-analyze alignment when we're facing a vectorization with a bigger
alignment requirement. */
else if (known_lt (dr_info->target_alignment, vector_alignment))
{
poly_uint64 old_target_alignment = dr_info->target_alignment;
int old_misalignment = dr_info->misalignment;
vect_compute_data_ref_alignment (vinfo, dr_info, SLP_TREE_VECTYPE (node));
/* But keep knowledge about a smaller alignment. */
if (old_misalignment != DR_MISALIGNMENT_UNKNOWN
&& dr_info->misalignment == DR_MISALIGNMENT_UNKNOWN)
{
dr_info->target_alignment = old_target_alignment;
dr_info->misalignment = old_misalignment;
}
}
/* When we ever face unordered target alignments the first one wins in terms
of analyzing and the other will become unknown in dr_misalignment. */
return true;
}
@ -3259,12 +3273,12 @@ vect_vfa_access_size (vec_info *vinfo, dr_vec_info *dr_info)
gcc_assert (DR_GROUP_FIRST_ELEMENT (stmt_vinfo) == stmt_vinfo);
access_size *= DR_GROUP_SIZE (stmt_vinfo) - DR_GROUP_GAP (stmt_vinfo);
}
tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
if (STMT_VINFO_VEC_STMTS (stmt_vinfo).exists ()
&& (vect_supportable_dr_alignment (vinfo, dr_info, false)
&& (vect_supportable_dr_alignment (vinfo, dr_info, vectype, false)
== dr_explicit_realign_optimized))
{
/* We might access a full vector's worth. */
tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
access_size += tree_to_uhwi (TYPE_SIZE_UNIT (vectype)) - ref_size;
}
return access_size;
@ -4733,7 +4747,7 @@ vect_create_addr_base_for_vector_ref (vec_info *vinfo, stmt_vec_info stmt_info,
unshare_expr (DR_REF (dr)));
}
vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
vect_ptr_type = build_pointer_type (TREE_TYPE (DR_REF (dr)));
dest = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var, base_name);
addr_base = force_gimple_operand (addr_base, &seq, true, dest);
gimple_seq_add_seq (new_stmt_list, seq);
@ -6580,17 +6594,16 @@ vect_can_force_dr_alignment_p (const_tree decl, poly_uint64 alignment)
enum dr_alignment_support
vect_supportable_dr_alignment (vec_info *vinfo, dr_vec_info *dr_info,
bool check_aligned_accesses)
tree vectype, bool check_aligned_accesses)
{
data_reference *dr = dr_info->dr;
stmt_vec_info stmt_info = dr_info->stmt;
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
machine_mode mode = TYPE_MODE (vectype);
loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
class loop *vect_loop = NULL;
bool nested_in_vect_loop = false;
if (aligned_access_p (dr_info) && !check_aligned_accesses)
if (aligned_access_p (dr_info, vectype) && !check_aligned_accesses)
return dr_aligned;
/* For now assume all conditional loads/stores support unaligned
@ -6679,8 +6692,6 @@ vect_supportable_dr_alignment (vec_info *vinfo, dr_vec_info *dr_info,
&& (!targetm.vectorize.builtin_mask_for_load
|| targetm.vectorize.builtin_mask_for_load ()))
{
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
/* If we are doing SLP then the accesses need not have the
same alignment, instead it depends on the SLP group size. */
if (loop_vinfo
@ -6698,11 +6709,11 @@ vect_supportable_dr_alignment (vec_info *vinfo, dr_vec_info *dr_info,
else
return dr_explicit_realign_optimized;
}
if (!known_alignment_for_access_p (dr_info))
if (!known_alignment_for_access_p (dr_info, vectype))
is_packed = not_size_aligned (DR_REF (dr));
if (targetm.vectorize.support_vector_misalignment
(mode, type, DR_MISALIGNMENT (dr_info), is_packed))
(mode, type, dr_misalignment (dr_info, vectype), is_packed))
/* Can't software pipeline the loads, but can at least do them. */
return dr_unaligned_supported;
}
@ -6711,11 +6722,11 @@ vect_supportable_dr_alignment (vec_info *vinfo, dr_vec_info *dr_info,
bool is_packed = false;
tree type = (TREE_TYPE (DR_REF (dr)));
if (!known_alignment_for_access_p (dr_info))
if (!known_alignment_for_access_p (dr_info, vectype))
is_packed = not_size_aligned (DR_REF (dr));
if (targetm.vectorize.support_vector_misalignment
(mode, type, DR_MISALIGNMENT (dr_info), is_packed))
(mode, type, dr_misalignment (dr_info, vectype), is_packed))
return dr_unaligned_supported;
}

View file

@ -779,56 +779,6 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap,
return 0;
}
/* Try to assign vector type VECTYPE to STMT_INFO for BB vectorization.
Return true if we can, meaning that this choice doesn't conflict with
existing SLP nodes that use STMT_INFO. */
bool
vect_update_shared_vectype (stmt_vec_info stmt_info, tree vectype)
{
tree old_vectype = STMT_VINFO_VECTYPE (stmt_info);
if (old_vectype)
return useless_type_conversion_p (vectype, old_vectype);
if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
{
/* We maintain the invariant that if any statement in the group is
used, all other members of the group have the same vector type. */
stmt_vec_info first_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
stmt_vec_info member_info = first_info;
for (; member_info; member_info = DR_GROUP_NEXT_ELEMENT (member_info))
if (is_pattern_stmt_p (member_info)
&& !useless_type_conversion_p (vectype,
STMT_VINFO_VECTYPE (member_info)))
break;
if (!member_info)
{
for (member_info = first_info; member_info;
member_info = DR_GROUP_NEXT_ELEMENT (member_info))
STMT_VINFO_VECTYPE (member_info) = vectype;
return true;
}
}
else if (!is_pattern_stmt_p (stmt_info))
{
STMT_VINFO_VECTYPE (stmt_info) = vectype;
return true;
}
if (dump_enabled_p ())
{
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"Build SLP failed: incompatible vector"
" types for: %G", stmt_info->stmt);
dump_printf_loc (MSG_NOTE, vect_location,
" old vector type: %T\n", old_vectype);
dump_printf_loc (MSG_NOTE, vect_location,
" new vector type: %T\n", vectype);
}
return false;
}
/* Return true if call statements CALL1 and CALL2 are similar enough
to be combined into the same SLP group. */
@ -4508,15 +4458,6 @@ vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node,
return vectorizable_slp_permutation (vinfo, NULL, node, cost_vec);
gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
if (is_a <bb_vec_info> (vinfo)
&& !vect_update_shared_vectype (stmt_info, SLP_TREE_VECTYPE (node)))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"desired vector type conflicts with earlier one "
"for %G", stmt_info->stmt);
return false;
}
bool dummy;
return vect_analyze_stmt (vinfo, stmt_info, &dummy,

View file

@ -1026,8 +1026,9 @@ vect_get_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
stmt_vector_for_cost *body_cost_vec)
{
dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
int alignment_support_scheme
= vect_supportable_dr_alignment (vinfo, dr_info, false);
= vect_supportable_dr_alignment (vinfo, dr_info, vectype, false);
switch (alignment_support_scheme)
{
@ -1048,7 +1049,7 @@ vect_get_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
/* Here, we assign an additional cost for the unaligned store. */
*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
unaligned_store, stmt_info,
DR_MISALIGNMENT (dr_info),
dr_misalignment (dr_info, vectype),
vect_body);
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
@ -1216,8 +1217,9 @@ vect_get_load_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
bool record_prologue_costs)
{
dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
int alignment_support_scheme
= vect_supportable_dr_alignment (vinfo, dr_info, false);
= vect_supportable_dr_alignment (vinfo, dr_info, vectype, false);
switch (alignment_support_scheme)
{
@ -1237,7 +1239,7 @@ vect_get_load_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
/* Here, we assign an additional cost for the unaligned load. */
*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
unaligned_load, stmt_info,
DR_MISALIGNMENT (dr_info),
dr_misalignment (dr_info, vectype),
vect_body);
if (dump_enabled_p ())
@ -1984,8 +1986,8 @@ get_negative_load_store_type (vec_info *vinfo,
return VMAT_ELEMENTWISE;
}
alignment_support_scheme = vect_supportable_dr_alignment (vinfo,
dr_info, false);
alignment_support_scheme = vect_supportable_dr_alignment (vinfo, dr_info,
vectype, false);
if (alignment_support_scheme != dr_aligned
&& alignment_support_scheme != dr_unaligned_supported)
{
@ -2169,7 +2171,8 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
be a multiple of B and so we are guaranteed to access a
non-gap element in the same B-sized block. */
if (overrun_p
&& gap < (vect_known_alignment_in_bytes (first_dr_info)
&& gap < (vect_known_alignment_in_bytes (first_dr_info,
vectype)
/ vect_get_scalar_dr_size (first_dr_info)))
overrun_p = false;
@ -2182,8 +2185,8 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
if (overrun_p
&& !masked_p
&& (((alignment_support_scheme
= vect_supportable_dr_alignment (vinfo,
first_dr_info, false)))
= vect_supportable_dr_alignment (vinfo, first_dr_info,
vectype, false)))
== dr_aligned
|| alignment_support_scheme == dr_unaligned_supported)
&& known_eq (nunits, (group_size - gap) * 2)
@ -2240,7 +2243,7 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
same B-sized block. */
if (would_overrun_p
&& !masked_p
&& gap < (vect_known_alignment_in_bytes (first_dr_info)
&& gap < (vect_known_alignment_in_bytes (first_dr_info, vectype)
/ vect_get_scalar_dr_size (first_dr_info)))
would_overrun_p = false;
@ -2294,7 +2297,7 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
*alignment_support_scheme = dr_unaligned_supported;
else
*alignment_support_scheme
= vect_supportable_dr_alignment (vinfo, first_dr_info, false);
= vect_supportable_dr_alignment (vinfo, first_dr_info, vectype, false);
if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
{
@ -2435,7 +2438,7 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
*alignment_support_scheme
= vect_supportable_dr_alignment (vinfo,
STMT_VINFO_DR_INFO (stmt_info),
false);
vectype, false);
}
}
@ -7907,7 +7910,7 @@ vectorizable_store (vec_info *vinfo,
alignment_support_scheme = dr_unaligned_supported;
else
alignment_support_scheme
= vect_supportable_dr_alignment (vinfo, first_dr_info, false);
= vect_supportable_dr_alignment (vinfo, first_dr_info, vectype, false);
gcc_assert (alignment_support_scheme);
vec_loop_masks *loop_masks
@ -8218,15 +8221,16 @@ vectorizable_store (vec_info *vinfo,
vec_oprnd = result_chain[i];
align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
if (aligned_access_p (first_dr_info))
if (aligned_access_p (first_dr_info, vectype))
misalign = 0;
else if (DR_MISALIGNMENT (first_dr_info) == -1)
else if (dr_misalignment (first_dr_info, vectype)
== DR_MISALIGNMENT_UNKNOWN)
{
align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
misalign = 0;
}
else
misalign = DR_MISALIGNMENT (first_dr_info);
misalign = dr_misalignment (first_dr_info, vectype);
if (dataref_offset == NULL_TREE
&& TREE_CODE (dataref_ptr) == SSA_NAME)
set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
@ -8303,7 +8307,7 @@ vectorizable_store (vec_info *vinfo,
dataref_offset
? dataref_offset
: build_int_cst (ref_type, 0));
if (aligned_access_p (first_dr_info))
if (aligned_access_p (first_dr_info, vectype))
;
else
TREE_TYPE (data_ref)
@ -9551,17 +9555,17 @@ vectorizable_load (vec_info *vinfo,
known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
if (alignment_support_scheme == dr_aligned)
{
gcc_assert (aligned_access_p (first_dr_info));
gcc_assert (aligned_access_p (first_dr_info, vectype));
misalign = 0;
}
else if (DR_MISALIGNMENT (first_dr_info) == -1)
else if (dr_misalignment (first_dr_info, vectype) == -1)
{
align = dr_alignment
(vect_dr_behavior (vinfo, first_dr_info));
misalign = 0;
}
else
misalign = DR_MISALIGNMENT (first_dr_info);
misalign = dr_misalignment (first_dr_info, vectype);
if (dataref_offset == NULL_TREE
&& TREE_CODE (dataref_ptr) == SSA_NAME)
set_ptr_info_alignment (get_ptr_info (dataref_ptr),
@ -9624,7 +9628,8 @@ vectorizable_load (vec_info *vinfo,
unsigned HOST_WIDE_INT gap
= DR_GROUP_GAP (first_stmt_info);
unsigned int vect_align
= vect_known_alignment_in_bytes (first_dr_info);
= vect_known_alignment_in_bytes (first_dr_info,
vectype);
unsigned int scalar_dr_size
= vect_get_scalar_dr_size (first_dr_info);
/* If there's no peeling for gaps but we have a gap
@ -10897,6 +10902,10 @@ vect_analyze_stmt (vec_info *vinfo,
gcc_unreachable ();
}
tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
if (node)
STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (node);
if (STMT_VINFO_RELEVANT_P (stmt_info))
{
gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
@ -10967,6 +10976,9 @@ vect_analyze_stmt (vec_info *vinfo,
|| vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec));
}
if (node)
STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
if (!ok)
return opt_result::failure_at (stmt_info->stmt,
"not vectorized:"
@ -11005,6 +11017,10 @@ vect_transform_stmt (vec_info *vinfo,
gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
if (slp_node)
STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (slp_node);
switch (STMT_VINFO_TYPE (stmt_info))
{
case type_demotion_vec_info_type:
@ -11123,16 +11139,19 @@ vect_transform_stmt (vec_info *vinfo,
if (!slp_node && vec_stmt)
gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info).exists ());
if (STMT_VINFO_TYPE (stmt_info) == store_vec_info_type)
return is_store;
if (STMT_VINFO_TYPE (stmt_info) != store_vec_info_type)
{
/* Handle stmts whose DEF is used outside the loop-nest that is
being vectorized. */
done = can_vectorize_live_stmts (vinfo, stmt_info, gsi, slp_node,
slp_node_instance, true, NULL);
gcc_assert (done);
}
/* Handle stmts whose DEF is used outside the loop-nest that is
being vectorized. */
done = can_vectorize_live_stmts (vinfo, stmt_info, gsi, slp_node,
slp_node_instance, true, NULL);
gcc_assert (done);
if (slp_node)
STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
return false;
return is_store;
}

View file

@ -1606,11 +1606,8 @@ set_dr_misalignment (dr_vec_info *dr_info, int val)
dr_info->misalignment = val;
}
extern int dr_misalignment (dr_vec_info *dr_info);
extern int dr_misalignment (dr_vec_info *dr_info, tree vectype);
/* Reflects actual alignment of first access in the vectorized loop,
taking into account peeling/versioning if applied. */
#define DR_MISALIGNMENT(DR) dr_misalignment (DR)
#define SET_DR_MISALIGNMENT(DR, VAL) set_dr_misalignment (DR, VAL)
/* Only defined once DR_MISALIGNMENT is defined. */
@ -1630,35 +1627,37 @@ set_dr_target_alignment (dr_vec_info *dr_info, poly_uint64 val)
}
#define SET_DR_TARGET_ALIGNMENT(DR, VAL) set_dr_target_alignment (DR, VAL)
/* Return true if data access DR_INFO is aligned to its target alignment
(which may be less than a full vector). */
/* Return true if data access DR_INFO is aligned to the targets
preferred alignment for VECTYPE (which may be less than a full vector). */
static inline bool
aligned_access_p (dr_vec_info *dr_info)
aligned_access_p (dr_vec_info *dr_info, tree vectype)
{
return (DR_MISALIGNMENT (dr_info) == 0);
return (dr_misalignment (dr_info, vectype) == 0);
}
/* Return TRUE if the alignment of the data access is known, and FALSE
/* Return TRUE if the (mis-)alignment of the data access is known with
respect to the targets preferred alignment for VECTYPE, and FALSE
otherwise. */
static inline bool
known_alignment_for_access_p (dr_vec_info *dr_info)
known_alignment_for_access_p (dr_vec_info *dr_info, tree vectype)
{
return (DR_MISALIGNMENT (dr_info) != DR_MISALIGNMENT_UNKNOWN);
return (dr_misalignment (dr_info, vectype) != DR_MISALIGNMENT_UNKNOWN);
}
/* Return the minimum alignment in bytes that the vectorized version
of DR_INFO is guaranteed to have. */
static inline unsigned int
vect_known_alignment_in_bytes (dr_vec_info *dr_info)
vect_known_alignment_in_bytes (dr_vec_info *dr_info, tree vectype)
{
if (DR_MISALIGNMENT (dr_info) == DR_MISALIGNMENT_UNKNOWN)
int misalignment = dr_misalignment (dr_info, vectype);
if (misalignment == DR_MISALIGNMENT_UNKNOWN)
return TYPE_ALIGN_UNIT (TREE_TYPE (DR_REF (dr_info->dr)));
if (DR_MISALIGNMENT (dr_info) == 0)
else if (misalignment == 0)
return known_alignment (DR_TARGET_ALIGNMENT (dr_info));
return DR_MISALIGNMENT (dr_info) & -DR_MISALIGNMENT (dr_info);
return misalignment & -misalignment;
}
/* Return the behavior of DR_INFO with respect to the vectorization context
@ -1971,7 +1970,7 @@ extern opt_tree vect_get_mask_type_for_stmt (stmt_vec_info, unsigned int = 0);
/* In tree-vect-data-refs.c. */
extern bool vect_can_force_dr_alignment_p (const_tree, poly_uint64);
extern enum dr_alignment_support vect_supportable_dr_alignment
(vec_info *, dr_vec_info *, bool);
(vec_info *, dr_vec_info *, tree, bool);
extern tree vect_get_smallest_scalar_type (stmt_vec_info, tree);
extern opt_result vect_analyze_data_ref_dependences (loop_vec_info, unsigned int *);
extern bool vect_slp_analyze_instance_dependence (vec_info *, slp_instance);
@ -2110,7 +2109,6 @@ extern bool can_duplicate_and_interleave_p (vec_info *, unsigned int, tree,
extern void duplicate_and_interleave (vec_info *, gimple_seq *, tree,
const vec<tree> &, unsigned int, vec<tree> &);
extern int vect_get_place_in_interleaving_chain (stmt_vec_info, stmt_vec_info);
extern bool vect_update_shared_vectype (stmt_vec_info, tree);
extern slp_tree vect_create_new_slp_node (unsigned, tree_code);
extern void vect_free_slp_tree (slp_tree);