AArch64: div-by-255, ensure that arguments are registers. [PR107988]

At -O0 (as opposed to e.g. volatile) we can get into the situation where the
in0 and result RTL arguments passed to the division function are memory
locations instead of registers.  I think we could reject these early on by
checking that the gimple values are GIMPLE registers, but I think it's better to
handle it.

As such I force them to registers and emit a move to the memory locations and
leave it up to reload to handle.  This fixes the ICE and still allows the
optimization in these cases,  which improves the code quality a lot.

gcc/ChangeLog:

	PR target/107988
	* config/aarch64/aarch64.cc
	(aarch64_vectorize_can_special_div_by_constant): Ensure input and output
	RTL are registers.

gcc/testsuite/ChangeLog:

	PR target/107988
	* gcc.target/aarch64/pr107988-1.c: New test.
This commit is contained in:
Tamar Christina 2022-12-14 13:54:28 +00:00
parent 81f86cb969
commit 8c2451ba46
2 changed files with 18 additions and 8 deletions

View file

@ -24395,7 +24395,8 @@ aarch64_vectorize_can_special_div_by_constant (enum tree_code code,
|| !TYPE_UNSIGNED (vectype))
return false;
unsigned int flags = aarch64_classify_vector_mode (TYPE_MODE (vectype));
machine_mode mode = TYPE_MODE (vectype);
unsigned int flags = aarch64_classify_vector_mode (mode);
if ((flags & VEC_ANY_SVE) && !TARGET_SVE2)
return false;
@ -24411,15 +24412,14 @@ aarch64_vectorize_can_special_div_by_constant (enum tree_code code,
if (in0 == NULL_RTX && in1 == NULL_RTX)
return true;
if (!VECTOR_TYPE_P (vectype))
return false;
gcc_assert (output);
if (!*output)
*output = gen_reg_rtx (TYPE_MODE (vectype));
emit_insn (gen_aarch64_bitmask_udiv3 (TYPE_MODE (vectype), *output, in0, in1));
expand_operand ops[3];
create_output_operand (&ops[0], *output, mode);
create_input_operand (&ops[1], in0, mode);
create_fixed_operand (&ops[2], in1);
expand_insn (insn_code, 3, ops);
*output = ops[0].value;
return true;
}

View file

@ -0,0 +1,10 @@
/* { dg-do compile } */
/* { dg-additional-options "-O0" } */
typedef unsigned short __attribute__((__vector_size__ (16))) V;
V
foo (V v)
{
v /= 255;
return v;
}