poly_int: vector_builder element count
This patch changes the number of elements in a vector being built by a vector_builder from unsigned int to poly_uint64. The case in which it isn't a constant is the one that motivated adding the vector encoding in the first place. 2018-01-03 Richard Sandiford <richard.sandiford@linaro.org> gcc/ * vector-builder.h (vector_builder::m_full_nelts): Change from unsigned int to poly_uint64. (vector_builder::full_nelts): Update prototype accordingly. (vector_builder::new_vector): Likewise. (vector_builder::encoded_full_vector_p): Handle polynomial full_nelts. (vector_builder::operator ==): Likewise. (vector_builder::finalize): Likewise. * int-vector-builder.h (int_vector_builder::int_vector_builder): Take the number of elements as a poly_uint64 rather than an unsigned int. * vec-perm-indices.h (vec_perm_indices::m_nelts_per_input): Change from unsigned int to poly_uint64. (vec_perm_indices::vec_perm_indices): Update prototype accordingly. (vec_perm_indices::new_vector): Likewise. (vec_perm_indices::length): Likewise. (vec_perm_indices::nelts_per_input): Likewise. (vec_perm_indices::input_nelts): Likewise. * vec-perm-indices.c (vec_perm_indices::new_vector): Take the number of elements per input as a poly_uint64 rather than an unsigned int. Use the original encoding for variable-length vectors, rather than clamping each individual element. For the second and subsequent elements in each pattern, clamp the step and base before clamping their sum. (vec_perm_indices::series_p): Handle polynomial element counts. (vec_perm_indices::all_in_range_p): Likewise. (vec_perm_indices_to_tree): Likewise. (vec_perm_indices_to_rtx): Likewise. * tree-vect-stmts.c (vect_gen_perm_mask_any): Likewise. * tree-vector-builder.c (tree_vector_builder::new_unary_operation) (tree_vector_builder::new_binary_operation): Handle polynomial element counts. Return false if we need to know the number of elements at compile time. * fold-const.c (fold_vec_perm): Punt if the number of elements isn't known at compile time. From-SVN: r256165
This commit is contained in:
parent
6b0630fbe8
commit
0ecc2b7db7
8 changed files with 114 additions and 41 deletions
|
@ -1,3 +1,40 @@
|
|||
2018-01-03 Richard Sandiford <richard.sandiford@linaro.org>
|
||||
|
||||
* vector-builder.h (vector_builder::m_full_nelts): Change from
|
||||
unsigned int to poly_uint64.
|
||||
(vector_builder::full_nelts): Update prototype accordingly.
|
||||
(vector_builder::new_vector): Likewise.
|
||||
(vector_builder::encoded_full_vector_p): Handle polynomial full_nelts.
|
||||
(vector_builder::operator ==): Likewise.
|
||||
(vector_builder::finalize): Likewise.
|
||||
* int-vector-builder.h (int_vector_builder::int_vector_builder):
|
||||
Take the number of elements as a poly_uint64 rather than an
|
||||
unsigned int.
|
||||
* vec-perm-indices.h (vec_perm_indices::m_nelts_per_input): Change
|
||||
from unsigned int to poly_uint64.
|
||||
(vec_perm_indices::vec_perm_indices): Update prototype accordingly.
|
||||
(vec_perm_indices::new_vector): Likewise.
|
||||
(vec_perm_indices::length): Likewise.
|
||||
(vec_perm_indices::nelts_per_input): Likewise.
|
||||
(vec_perm_indices::input_nelts): Likewise.
|
||||
* vec-perm-indices.c (vec_perm_indices::new_vector): Take the
|
||||
number of elements per input as a poly_uint64 rather than an
|
||||
unsigned int. Use the original encoding for variable-length
|
||||
vectors, rather than clamping each individual element.
|
||||
For the second and subsequent elements in each pattern,
|
||||
clamp the step and base before clamping their sum.
|
||||
(vec_perm_indices::series_p): Handle polynomial element counts.
|
||||
(vec_perm_indices::all_in_range_p): Likewise.
|
||||
(vec_perm_indices_to_tree): Likewise.
|
||||
(vec_perm_indices_to_rtx): Likewise.
|
||||
* tree-vect-stmts.c (vect_gen_perm_mask_any): Likewise.
|
||||
* tree-vector-builder.c (tree_vector_builder::new_unary_operation)
|
||||
(tree_vector_builder::new_binary_operation): Handle polynomial
|
||||
element counts. Return false if we need to know the number
|
||||
of elements at compile time.
|
||||
* fold-const.c (fold_vec_perm): Punt if the number of elements
|
||||
isn't known at compile time.
|
||||
|
||||
2018-01-03 Richard Sandiford <richard.sandiford@linaro.org>
|
||||
|
||||
* vec-perm-indices.h (vec_perm_builder): Change element type
|
||||
|
|
|
@ -8927,9 +8927,11 @@ static tree
|
|||
fold_vec_perm (tree type, tree arg0, tree arg1, const vec_perm_indices &sel)
|
||||
{
|
||||
unsigned int i;
|
||||
unsigned HOST_WIDE_INT nelts;
|
||||
bool need_ctor = false;
|
||||
|
||||
unsigned int nelts = sel.length ();
|
||||
if (!sel.length ().is_constant (&nelts))
|
||||
return NULL_TREE;
|
||||
gcc_assert (TYPE_VECTOR_SUBPARTS (type) == nelts
|
||||
&& TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)) == nelts
|
||||
&& TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg1)) == nelts);
|
||||
|
|
|
@ -33,7 +33,7 @@ class int_vector_builder : public vector_builder<T, int_vector_builder<T> >
|
|||
|
||||
public:
|
||||
int_vector_builder () {}
|
||||
int_vector_builder (unsigned int, unsigned int, unsigned int);
|
||||
int_vector_builder (poly_uint64, unsigned int, unsigned int);
|
||||
|
||||
using parent::new_vector;
|
||||
|
||||
|
@ -53,7 +53,7 @@ private:
|
|||
|
||||
template<typename T>
|
||||
inline
|
||||
int_vector_builder<T>::int_vector_builder (unsigned int full_nelts,
|
||||
int_vector_builder<T>::int_vector_builder (poly_uint64 full_nelts,
|
||||
unsigned int npatterns,
|
||||
unsigned int nelts_per_pattern)
|
||||
{
|
||||
|
|
|
@ -6588,8 +6588,8 @@ vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
|
|||
{
|
||||
tree mask_type;
|
||||
|
||||
unsigned int nunits = sel.length ();
|
||||
gcc_assert (nunits == TYPE_VECTOR_SUBPARTS (vectype));
|
||||
poly_uint64 nunits = sel.length ();
|
||||
gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
|
||||
|
||||
mask_type = build_vector_type (ssizetype, nunits);
|
||||
return vec_perm_indices_to_tree (mask_type, sel);
|
||||
|
|
|
@ -36,13 +36,15 @@ bool
|
|||
tree_vector_builder::new_unary_operation (tree type, tree t,
|
||||
bool allow_stepped_p)
|
||||
{
|
||||
unsigned int full_nelts = TYPE_VECTOR_SUBPARTS (type);
|
||||
gcc_assert (full_nelts == TYPE_VECTOR_SUBPARTS (TREE_TYPE (t)));
|
||||
poly_uint64 full_nelts = TYPE_VECTOR_SUBPARTS (type);
|
||||
gcc_assert (known_eq (full_nelts, TYPE_VECTOR_SUBPARTS (TREE_TYPE (t))));
|
||||
unsigned int npatterns = VECTOR_CST_NPATTERNS (t);
|
||||
unsigned int nelts_per_pattern = VECTOR_CST_NELTS_PER_PATTERN (t);
|
||||
if (!allow_stepped_p && nelts_per_pattern > 2)
|
||||
{
|
||||
npatterns = full_nelts;
|
||||
if (!full_nelts.is_constant ())
|
||||
return false;
|
||||
npatterns = full_nelts.to_constant ();
|
||||
nelts_per_pattern = 1;
|
||||
}
|
||||
new_vector (type, npatterns, nelts_per_pattern);
|
||||
|
@ -61,9 +63,9 @@ bool
|
|||
tree_vector_builder::new_binary_operation (tree type, tree t1, tree t2,
|
||||
bool allow_stepped_p)
|
||||
{
|
||||
unsigned int full_nelts = TYPE_VECTOR_SUBPARTS (type);
|
||||
gcc_assert (full_nelts == TYPE_VECTOR_SUBPARTS (TREE_TYPE (t1))
|
||||
&& full_nelts == TYPE_VECTOR_SUBPARTS (TREE_TYPE (t2)));
|
||||
poly_uint64 full_nelts = TYPE_VECTOR_SUBPARTS (type);
|
||||
gcc_assert (known_eq (full_nelts, TYPE_VECTOR_SUBPARTS (TREE_TYPE (t1)))
|
||||
&& known_eq (full_nelts, TYPE_VECTOR_SUBPARTS (TREE_TYPE (t2))));
|
||||
/* Conceptually we split the patterns in T1 and T2 until we have
|
||||
an equal number for both. Each split pattern requires the same
|
||||
number of elements per pattern as the original. E.g. splitting:
|
||||
|
@ -89,7 +91,9 @@ tree_vector_builder::new_binary_operation (tree type, tree t1, tree t2,
|
|||
VECTOR_CST_NELTS_PER_PATTERN (t2));
|
||||
if (!allow_stepped_p && nelts_per_pattern > 2)
|
||||
{
|
||||
npatterns = full_nelts;
|
||||
if (!full_nelts.is_constant ())
|
||||
return false;
|
||||
npatterns = full_nelts.to_constant ();
|
||||
nelts_per_pattern = 1;
|
||||
}
|
||||
new_vector (type, npatterns, nelts_per_pattern);
|
||||
|
|
|
@ -38,16 +38,42 @@ along with GCC; see the file COPYING3. If not see
|
|||
void
|
||||
vec_perm_indices::new_vector (const vec_perm_builder &elements,
|
||||
unsigned int ninputs,
|
||||
unsigned int nelts_per_input)
|
||||
poly_uint64 nelts_per_input)
|
||||
{
|
||||
m_ninputs = ninputs;
|
||||
m_nelts_per_input = nelts_per_input;
|
||||
/* Expand the encoding and clamp each element. E.g. { 0, 2, 4, ... }
|
||||
might wrap halfway if there is only one vector input. */
|
||||
unsigned int full_nelts = elements.full_nelts ();
|
||||
m_encoding.new_vector (full_nelts, full_nelts, 1);
|
||||
for (unsigned int i = 0; i < full_nelts; ++i)
|
||||
/* If the vector has a constant number of elements, expand the
|
||||
encoding and clamp each element. E.g. { 0, 2, 4, ... } might
|
||||
wrap halfway if there is only one vector input, and we want
|
||||
the wrapped form to be the canonical one.
|
||||
|
||||
If the vector has a variable number of elements, just copy
|
||||
the encoding. In that case the unwrapped form is canonical
|
||||
and there is no way of representing the wrapped form. */
|
||||
poly_uint64 full_nelts = elements.full_nelts ();
|
||||
unsigned HOST_WIDE_INT copy_nelts;
|
||||
if (full_nelts.is_constant (©_nelts))
|
||||
m_encoding.new_vector (full_nelts, copy_nelts, 1);
|
||||
else
|
||||
{
|
||||
copy_nelts = elements.encoded_nelts ();
|
||||
m_encoding.new_vector (full_nelts, elements.npatterns (),
|
||||
elements.nelts_per_pattern ());
|
||||
}
|
||||
unsigned int npatterns = m_encoding.npatterns ();
|
||||
for (unsigned int i = 0; i < npatterns; ++i)
|
||||
m_encoding.quick_push (clamp (elements.elt (i)));
|
||||
/* Use the fact that:
|
||||
|
||||
(a + b) % c == ((a % c) + (b % c)) % c
|
||||
|
||||
to simplify the clamping of variable-length vectors. */
|
||||
for (unsigned int i = npatterns; i < copy_nelts; ++i)
|
||||
{
|
||||
element_type step = clamp (elements.elt (i)
|
||||
- elements.elt (i - npatterns));
|
||||
m_encoding.quick_push (clamp (m_encoding[i - npatterns] + step));
|
||||
}
|
||||
m_encoding.finalize ();
|
||||
}
|
||||
|
||||
|
@ -98,7 +124,7 @@ vec_perm_indices::series_p (unsigned int out_base, unsigned int out_step,
|
|||
if (maybe_ne (clamp (m_encoding.elt (out_base)), clamp (in_base)))
|
||||
return false;
|
||||
|
||||
unsigned int full_nelts = m_encoding.full_nelts ();
|
||||
element_type full_nelts = m_encoding.full_nelts ();
|
||||
unsigned int npatterns = m_encoding.npatterns ();
|
||||
|
||||
/* Calculate which multiple of OUT_STEP elements we need to get
|
||||
|
@ -112,7 +138,7 @@ vec_perm_indices::series_p (unsigned int out_base, unsigned int out_step,
|
|||
for (;;)
|
||||
{
|
||||
/* Succeed if we've checked all the elements in the vector. */
|
||||
if (out_base >= full_nelts)
|
||||
if (known_ge (out_base, full_nelts))
|
||||
return true;
|
||||
|
||||
if (out_base >= npatterns)
|
||||
|
@ -156,7 +182,8 @@ vec_perm_indices::all_in_range_p (element_type start, element_type size) const
|
|||
|
||||
/* The number of elements in each pattern beyond the first two
|
||||
that we checked above. */
|
||||
unsigned int step_nelts = (m_encoding.full_nelts () / npatterns) - 2;
|
||||
poly_int64 step_nelts = exact_div (m_encoding.full_nelts (),
|
||||
npatterns) - 2;
|
||||
for (unsigned int i = 0; i < npatterns; ++i)
|
||||
{
|
||||
/* BASE1 has been checked but BASE2 hasn't. */
|
||||
|
@ -210,7 +237,7 @@ tree_to_vec_perm_builder (vec_perm_builder *builder, tree cst)
|
|||
tree
|
||||
vec_perm_indices_to_tree (tree type, const vec_perm_indices &indices)
|
||||
{
|
||||
gcc_assert (TYPE_VECTOR_SUBPARTS (type) == indices.length ());
|
||||
gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (type), indices.length ()));
|
||||
tree_vector_builder sel (type, indices.encoding ().npatterns (),
|
||||
indices.encoding ().nelts_per_pattern ());
|
||||
unsigned int encoded_nelts = sel.encoded_nelts ();
|
||||
|
@ -226,7 +253,7 @@ rtx
|
|||
vec_perm_indices_to_rtx (machine_mode mode, const vec_perm_indices &indices)
|
||||
{
|
||||
gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
|
||||
&& GET_MODE_NUNITS (mode) == indices.length ());
|
||||
&& known_eq (GET_MODE_NUNITS (mode), indices.length ()));
|
||||
rtx_vector_builder sel (mode, indices.encoding ().npatterns (),
|
||||
indices.encoding ().nelts_per_pattern ());
|
||||
unsigned int encoded_nelts = sel.encoded_nelts ();
|
||||
|
|
|
@ -53,9 +53,9 @@ class vec_perm_indices
|
|||
|
||||
public:
|
||||
vec_perm_indices ();
|
||||
vec_perm_indices (const vec_perm_builder &, unsigned int, unsigned int);
|
||||
vec_perm_indices (const vec_perm_builder &, unsigned int, poly_uint64);
|
||||
|
||||
void new_vector (const vec_perm_builder &, unsigned int, unsigned int);
|
||||
void new_vector (const vec_perm_builder &, unsigned int, poly_uint64);
|
||||
void new_expanded_vector (const vec_perm_indices &, unsigned int);
|
||||
void rotate_inputs (int delta);
|
||||
|
||||
|
@ -64,16 +64,16 @@ public:
|
|||
|
||||
/* Return the number of output elements. This is called length ()
|
||||
so that we present a more vec-like interface. */
|
||||
unsigned int length () const { return m_encoding.full_nelts (); }
|
||||
poly_uint64 length () const { return m_encoding.full_nelts (); }
|
||||
|
||||
/* Return the number of input vectors being permuted. */
|
||||
unsigned int ninputs () const { return m_ninputs; }
|
||||
|
||||
/* Return the number of elements in each input vector. */
|
||||
unsigned int nelts_per_input () const { return m_nelts_per_input; }
|
||||
poly_uint64 nelts_per_input () const { return m_nelts_per_input; }
|
||||
|
||||
/* Return the total number of input elements. */
|
||||
unsigned int input_nelts () const { return m_ninputs * m_nelts_per_input; }
|
||||
poly_uint64 input_nelts () const { return m_ninputs * m_nelts_per_input; }
|
||||
|
||||
element_type clamp (element_type) const;
|
||||
element_type operator[] (unsigned int i) const;
|
||||
|
@ -86,7 +86,7 @@ private:
|
|||
|
||||
vec_perm_builder m_encoding;
|
||||
unsigned int m_ninputs;
|
||||
unsigned int m_nelts_per_input;
|
||||
poly_uint64 m_nelts_per_input;
|
||||
};
|
||||
|
||||
bool tree_to_vec_perm_builder (vec_perm_builder *, tree);
|
||||
|
@ -107,7 +107,7 @@ vec_perm_indices::vec_perm_indices ()
|
|||
inline
|
||||
vec_perm_indices::vec_perm_indices (const vec_perm_builder &elements,
|
||||
unsigned int ninputs,
|
||||
unsigned int nelts_per_input)
|
||||
poly_uint64 nelts_per_input)
|
||||
{
|
||||
new_vector (elements, ninputs, nelts_per_input);
|
||||
}
|
||||
|
|
|
@ -90,7 +90,7 @@ class vector_builder : public auto_vec<T, 32>
|
|||
public:
|
||||
vector_builder ();
|
||||
|
||||
unsigned int full_nelts () const { return m_full_nelts; }
|
||||
poly_uint64 full_nelts () const { return m_full_nelts; }
|
||||
unsigned int npatterns () const { return m_npatterns; }
|
||||
unsigned int nelts_per_pattern () const { return m_nelts_per_pattern; }
|
||||
unsigned int encoded_nelts () const;
|
||||
|
@ -103,7 +103,7 @@ public:
|
|||
void finalize ();
|
||||
|
||||
protected:
|
||||
void new_vector (unsigned int, unsigned int, unsigned int);
|
||||
void new_vector (poly_uint64, unsigned int, unsigned int);
|
||||
void reshape (unsigned int, unsigned int);
|
||||
bool repeating_sequence_p (unsigned int, unsigned int, unsigned int);
|
||||
bool stepped_sequence_p (unsigned int, unsigned int, unsigned int);
|
||||
|
@ -115,7 +115,7 @@ private:
|
|||
Derived *derived () { return static_cast<Derived *> (this); }
|
||||
const Derived *derived () const;
|
||||
|
||||
unsigned int m_full_nelts;
|
||||
poly_uint64 m_full_nelts;
|
||||
unsigned int m_npatterns;
|
||||
unsigned int m_nelts_per_pattern;
|
||||
};
|
||||
|
@ -152,7 +152,7 @@ template<typename T, typename Derived>
|
|||
inline bool
|
||||
vector_builder<T, Derived>::encoded_full_vector_p () const
|
||||
{
|
||||
return m_npatterns * m_nelts_per_pattern == m_full_nelts;
|
||||
return known_eq (m_npatterns * m_nelts_per_pattern, m_full_nelts);
|
||||
}
|
||||
|
||||
/* Start building a vector that has FULL_NELTS elements. Initially
|
||||
|
@ -160,7 +160,7 @@ vector_builder<T, Derived>::encoded_full_vector_p () const
|
|||
|
||||
template<typename T, typename Derived>
|
||||
void
|
||||
vector_builder<T, Derived>::new_vector (unsigned int full_nelts,
|
||||
vector_builder<T, Derived>::new_vector (poly_uint64 full_nelts,
|
||||
unsigned int npatterns,
|
||||
unsigned int nelts_per_pattern)
|
||||
{
|
||||
|
@ -178,7 +178,7 @@ template<typename T, typename Derived>
|
|||
bool
|
||||
vector_builder<T, Derived>::operator == (const Derived &other) const
|
||||
{
|
||||
if (m_full_nelts != other.m_full_nelts
|
||||
if (maybe_ne (m_full_nelts, other.m_full_nelts)
|
||||
|| m_npatterns != other.m_npatterns
|
||||
|| m_nelts_per_pattern != other.m_nelts_per_pattern)
|
||||
return false;
|
||||
|
@ -356,14 +356,16 @@ vector_builder<T, Derived>::finalize ()
|
|||
{
|
||||
/* The encoding requires the same number of elements to come from each
|
||||
pattern. */
|
||||
gcc_assert (m_full_nelts % m_npatterns == 0);
|
||||
gcc_assert (multiple_p (m_full_nelts, m_npatterns));
|
||||
|
||||
/* Allow the caller to build more elements than necessary. For example,
|
||||
it's often convenient to build a stepped vector from the natural
|
||||
encoding of three elements even if the vector itself only has two. */
|
||||
if (m_full_nelts <= encoded_nelts ())
|
||||
unsigned HOST_WIDE_INT const_full_nelts;
|
||||
if (m_full_nelts.is_constant (&const_full_nelts)
|
||||
&& const_full_nelts <= encoded_nelts ())
|
||||
{
|
||||
m_npatterns = m_full_nelts;
|
||||
m_npatterns = const_full_nelts;
|
||||
m_nelts_per_pattern = 1;
|
||||
}
|
||||
|
||||
|
@ -435,9 +437,10 @@ vector_builder<T, Derived>::finalize ()
|
|||
would be for 2-bit elements. We'll have treated them as
|
||||
duplicates in the loop above. */
|
||||
if (m_nelts_per_pattern == 1
|
||||
&& this->length () >= m_full_nelts
|
||||
&& m_full_nelts.is_constant (&const_full_nelts)
|
||||
&& this->length () >= const_full_nelts
|
||||
&& (m_npatterns & 3) == 0
|
||||
&& stepped_sequence_p (m_npatterns / 4, m_full_nelts,
|
||||
&& stepped_sequence_p (m_npatterns / 4, const_full_nelts,
|
||||
m_npatterns / 4))
|
||||
{
|
||||
reshape (m_npatterns / 4, 3);
|
||||
|
|
Loading…
Add table
Reference in a new issue