RISC-V: Fix vec_init for simple sequences [PR114028].
For a vec_init (_a, _a, _a, _a) with _a of mode DImode we try to construct a "superword" of two "_a"s. This only works for modes < Pmode when we can "shift and or" both halves into one Pmode register. This patch disallows the optimization for inner_mode == Pmode and emits a simple broadcast in such a case. gcc/ChangeLog: PR target/114028 * config/riscv/riscv-v.cc (rvv_builder::can_duplicate_repeating_sequence_p): Return false if inner mode is already Pmode. (rvv_builder::is_all_same_sequence): New function. (expand_vec_init): Emit broadcast if sequence is all same. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/pr114028.c: New test.
This commit is contained in:
parent
fdf9df9d55
commit
85c12ae8b8
2 changed files with 49 additions and 1 deletions
|
@ -443,6 +443,7 @@ public:
|
|||
}
|
||||
|
||||
bool can_duplicate_repeating_sequence_p ();
|
||||
bool is_repeating_sequence ();
|
||||
rtx get_merged_repeating_sequence ();
|
||||
|
||||
bool repeating_sequence_use_merge_profitable_p ();
|
||||
|
@ -483,7 +484,8 @@ rvv_builder::can_duplicate_repeating_sequence_p ()
|
|||
{
|
||||
poly_uint64 new_size = exact_div (full_nelts (), npatterns ());
|
||||
unsigned int new_inner_size = m_inner_bits_size * npatterns ();
|
||||
if (!int_mode_for_size (new_inner_size, 0).exists (&m_new_inner_mode)
|
||||
if (m_inner_mode == Pmode
|
||||
|| !int_mode_for_size (new_inner_size, 0).exists (&m_new_inner_mode)
|
||||
|| GET_MODE_SIZE (m_new_inner_mode) > UNITS_PER_WORD
|
||||
|| !get_vector_mode (m_new_inner_mode, new_size).exists (&m_new_mode))
|
||||
return false;
|
||||
|
@ -492,6 +494,18 @@ rvv_builder::can_duplicate_repeating_sequence_p ()
|
|||
return nelts_per_pattern () == 1;
|
||||
}
|
||||
|
||||
/* Return true if the vector is a simple sequence with one pattern and all
|
||||
elements the same. */
|
||||
bool
|
||||
rvv_builder::is_repeating_sequence ()
|
||||
{
|
||||
if (npatterns () > 1)
|
||||
return false;
|
||||
if (full_nelts ().is_constant ())
|
||||
return repeating_sequence_p (0, full_nelts ().to_constant (), 1);
|
||||
return nelts_per_pattern () == 1;
|
||||
}
|
||||
|
||||
/* Return true if it is a repeating sequence that using
|
||||
merge approach has better codegen than using default
|
||||
approach (slide1down).
|
||||
|
@ -2544,6 +2558,15 @@ expand_vec_init (rtx target, rtx vals)
|
|||
v.quick_push (XVECEXP (vals, 0, i));
|
||||
v.finalize ();
|
||||
|
||||
/* If the sequence is v = { a, a, a, a } just broadcast an element. */
|
||||
if (v.is_repeating_sequence ())
|
||||
{
|
||||
machine_mode mode = GET_MODE (target);
|
||||
rtx dup = expand_vector_broadcast (mode, v.elt (0));
|
||||
emit_move_insn (target, dup);
|
||||
return;
|
||||
}
|
||||
|
||||
if (nelts > 3)
|
||||
{
|
||||
/* Case 1: Convert v = { a, b, a, b } into v = { ab, ab }. */
|
||||
|
|
25
gcc/testsuite/gcc.target/riscv/rvv/autovec/pr114028.c
Normal file
25
gcc/testsuite/gcc.target/riscv/rvv/autovec/pr114028.c
Normal file
|
@ -0,0 +1,25 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-march=rv64gcv_zvl256b -mabi=lp64d -O3" } */
|
||||
|
||||
int a, d = 55003;
|
||||
long c = 0, h;
|
||||
long e = 1;
|
||||
short i;
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
for (int g = 0; g < 16; g++)
|
||||
{
|
||||
d |= c;
|
||||
short l = d;
|
||||
i = l < 0 || a >> 4 ? d : a;
|
||||
h = i - 8L;
|
||||
e &= h;
|
||||
}
|
||||
|
||||
if (e != 1)
|
||||
__builtin_abort ();
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-not "vmv\.v\.i\tv\[0-9\],0" } } */
|
Loading…
Add table
Reference in a new issue