rs6000: Remove useless insns fed into lvx/stvx [PR97019]

This patch is to extend the existing function find_alignment_op to
check all defintions of base_reg are AND operations with mask -16B
to force the alignment.  If all are satifised, it passes all AND
operations and instructions to function recombine_lvx_pattern
and recombine_stvx_pattern, they can remove all useless ANDs
further.

Bootstrapped/regtested on powerpc64le-linux-gnu P8.

gcc/ChangeLog:

	PR target/97019
	* config/rs6000/rs6000-p8swap.c (find_alignment_op): Adjust to
	support multiple defintions which are all AND operations with
	the mask -16B.
	(recombine_lvx_pattern): Adjust to handle multiple AND operations
	from find_alignment_op.
	(recombine_stvx_pattern): Likewise.

gcc/testsuite/ChangeLog:

	PR target/97019
	* gcc.target/powerpc/pr97019.c: New test.
This commit is contained in:
Kewen Lin 2020-09-15 22:32:55 -05:00
parent 01d56aeaff
commit e0d98ffd14
2 changed files with 174 additions and 52 deletions

View file

@ -2095,11 +2095,15 @@ alignment_mask (rtx_insn *insn)
return alignment_with_canonical_addr (SET_SRC (body));
}
/* Given INSN that's a load or store based at BASE_REG, look for a
feeding computation that aligns its address on a 16-byte boundary.
Return the rtx and its containing AND_INSN. */
static rtx
find_alignment_op (rtx_insn *insn, rtx base_reg, rtx_insn **and_insn)
/* Given INSN that's a load or store based at BASE_REG, check if
all of its feeding computations align its address on a 16-byte
boundary. If so, return true and add all definition insns into
AND_INSNS and their corresponding fully-expanded rtxes for the
masking operations into AND_OPS. */
static bool
find_alignment_op (rtx_insn *insn, rtx base_reg, vec<rtx_insn *> *and_insns,
vec<rtx> *and_ops)
{
df_ref base_use;
struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
@ -2111,19 +2115,28 @@ find_alignment_op (rtx_insn *insn, rtx base_reg, rtx_insn **and_insn)
continue;
struct df_link *base_def_link = DF_REF_CHAIN (base_use);
if (!base_def_link || base_def_link->next)
break;
if (!base_def_link)
return false;
/* With stack-protector code enabled, and possibly in other
circumstances, there may not be an associated insn for
the def. */
if (DF_REF_IS_ARTIFICIAL (base_def_link->ref))
break;
while (base_def_link)
{
/* With stack-protector code enabled, and possibly in other
circumstances, there may not be an associated insn for
the def. */
if (DF_REF_IS_ARTIFICIAL (base_def_link->ref))
return false;
*and_insn = DF_REF_INSN (base_def_link->ref);
and_operation = alignment_mask (*and_insn);
if (and_operation != 0)
break;
rtx_insn *and_insn = DF_REF_INSN (base_def_link->ref);
and_operation = alignment_mask (and_insn);
/* Stop if we find any one which doesn't align. */
if (!and_operation)
return false;
and_insns->safe_push (and_insn);
and_ops->safe_push (and_operation);
base_def_link = base_def_link->next;
}
}
return and_operation;
@ -2143,11 +2156,14 @@ recombine_lvx_pattern (rtx_insn *insn, del_info *to_delete)
rtx mem = XEXP (SET_SRC (body), 0);
rtx base_reg = XEXP (mem, 0);
rtx_insn *and_insn;
rtx and_operation = find_alignment_op (insn, base_reg, &and_insn);
auto_vec<rtx_insn *> and_insns;
auto_vec<rtx> and_ops;
bool is_any_def_and
= find_alignment_op (insn, base_reg, &and_insns, &and_ops);
if (and_operation != 0)
if (is_any_def_and)
{
gcc_assert (and_insns.length () == and_ops.length ());
df_ref def;
struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
FOR_EACH_INSN_INFO_DEF (def, insn_info)
@ -2168,25 +2184,35 @@ recombine_lvx_pattern (rtx_insn *insn, del_info *to_delete)
to_delete[INSN_UID (swap_insn)].replace = true;
to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn;
/* However, first we must be sure that we make the
base register from the AND operation available
in case the register has been overwritten. Copy
the base register to a new pseudo and use that
as the base register of the AND operation in
the new LVX instruction. */
rtx and_base = XEXP (and_operation, 0);
rtx new_reg = gen_reg_rtx (GET_MODE (and_base));
rtx copy = gen_rtx_SET (new_reg, and_base);
rtx_insn *new_insn = emit_insn_after (copy, and_insn);
set_block_for_insn (new_insn, BLOCK_FOR_INSN (and_insn));
df_insn_rescan (new_insn);
rtx new_reg = 0;
rtx and_mask = 0;
for (unsigned i = 0; i < and_insns.length (); i++)
{
/* However, first we must be sure that we make the
base register from the AND operation available
in case the register has been overwritten. Copy
the base register to a new pseudo and use that
as the base register of the AND operation in
the new LVX instruction. */
rtx_insn *and_insn = and_insns[i];
rtx and_op = and_ops[i];
rtx and_base = XEXP (and_op, 0);
if (!new_reg)
{
new_reg = gen_reg_rtx (GET_MODE (and_base));
and_mask = XEXP (and_op, 1);
}
rtx copy = gen_rtx_SET (new_reg, and_base);
rtx_insn *new_insn = emit_insn_after (copy, and_insn);
set_block_for_insn (new_insn, BLOCK_FOR_INSN (and_insn));
df_insn_rescan (new_insn);
}
XEXP (mem, 0) = gen_rtx_AND (GET_MODE (and_base), new_reg,
XEXP (and_operation, 1));
XEXP (mem, 0) = gen_rtx_AND (GET_MODE (new_reg), new_reg, and_mask);
SET_SRC (body) = mem;
INSN_CODE (insn) = -1; /* Force re-recognition. */
df_insn_rescan (insn);
if (dump_file)
fprintf (dump_file, "lvx opportunity found at %d\n",
INSN_UID (insn));
@ -2205,11 +2231,14 @@ recombine_stvx_pattern (rtx_insn *insn, del_info *to_delete)
rtx mem = SET_DEST (body);
rtx base_reg = XEXP (mem, 0);
rtx_insn *and_insn;
rtx and_operation = find_alignment_op (insn, base_reg, &and_insn);
auto_vec<rtx_insn *> and_insns;
auto_vec<rtx> and_ops;
bool is_any_def_and
= find_alignment_op (insn, base_reg, &and_insns, &and_ops);
if (and_operation != 0)
if (is_any_def_and)
{
gcc_assert (and_insns.length () == and_ops.length ());
rtx src_reg = XEXP (SET_SRC (body), 0);
df_ref src_use;
struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
@ -2234,25 +2263,35 @@ recombine_stvx_pattern (rtx_insn *insn, del_info *to_delete)
to_delete[INSN_UID (swap_insn)].replace = true;
to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn;
/* However, first we must be sure that we make the
base register from the AND operation available
in case the register has been overwritten. Copy
the base register to a new pseudo and use that
as the base register of the AND operation in
the new STVX instruction. */
rtx and_base = XEXP (and_operation, 0);
rtx new_reg = gen_reg_rtx (GET_MODE (and_base));
rtx copy = gen_rtx_SET (new_reg, and_base);
rtx_insn *new_insn = emit_insn_after (copy, and_insn);
set_block_for_insn (new_insn, BLOCK_FOR_INSN (and_insn));
df_insn_rescan (new_insn);
rtx new_reg = 0;
rtx and_mask = 0;
for (unsigned i = 0; i < and_insns.length (); i++)
{
/* However, first we must be sure that we make the
base register from the AND operation available
in case the register has been overwritten. Copy
the base register to a new pseudo and use that
as the base register of the AND operation in
the new STVX instruction. */
rtx_insn *and_insn = and_insns[i];
rtx and_op = and_ops[i];
rtx and_base = XEXP (and_op, 0);
if (!new_reg)
{
new_reg = gen_reg_rtx (GET_MODE (and_base));
and_mask = XEXP (and_op, 1);
}
rtx copy = gen_rtx_SET (new_reg, and_base);
rtx_insn *new_insn = emit_insn_after (copy, and_insn);
set_block_for_insn (new_insn, BLOCK_FOR_INSN (and_insn));
df_insn_rescan (new_insn);
}
XEXP (mem, 0) = gen_rtx_AND (GET_MODE (and_base), new_reg,
XEXP (and_operation, 1));
XEXP (mem, 0) = gen_rtx_AND (GET_MODE (new_reg), new_reg, and_mask);
SET_SRC (body) = src_reg;
INSN_CODE (insn) = -1; /* Force re-recognition. */
df_insn_rescan (insn);
if (dump_file)
fprintf (dump_file, "stvx opportunity found at %d\n",
INSN_UID (insn));

View file

@ -0,0 +1,83 @@
/* This issue can only exist on little-endian P8 targets, since
the built-in functions vec_ld/vec_st can use lxvd2x/stxvd2x
(P8 big-endian) or lxv/stxv (P9 and later) for some cases,
those rldicr instructions fed to them are necessary. */
/* { dg-do compile { target { powerpc_p8vector_ok && le } } } */
/* { dg-options "-O2 -mdejagnu-cpu=power8" } */
/* Test there are no useless instructions "rldicr x,y,0,59"
to align the addresses for lvx/stvx. */
extern int a, b, c;
extern vector unsigned long long ev5, ev6, ev7, ev8;
extern int dummy (vector unsigned long long);
int test_vec_ld(unsigned char *pe) {
vector unsigned long long v1, v2, v3, v4, v9;
vector unsigned long long v5 = ev5;
vector unsigned long long v6 = ev6;
vector unsigned long long v7 = ev7;
vector unsigned long long v8 = ev8;
unsigned char *e = pe;
do {
if (a) {
v1 = __builtin_vec_ld(16, (unsigned long long *)e);
v2 = __builtin_vec_ld(32, (unsigned long long *)e);
v3 = __builtin_vec_ld(48, (unsigned long long *)e);
e = e + 8;
for (int i = 0; i < a; i++) {
v4 = v5;
v5 = __builtin_crypto_vpmsumd(v1, v6);
v6 = __builtin_crypto_vpmsumd(v2, v7);
v7 = __builtin_crypto_vpmsumd(v3, v8);
e = e + 8;
}
}
v5 = __builtin_vec_ld(16, (unsigned long long *)e);
v6 = __builtin_vec_ld(32, (unsigned long long *)e);
v7 = __builtin_vec_ld(48, (unsigned long long *)e);
if (c)
b = 1;
} while (b);
return dummy(v4);
}
int test_vec_st(unsigned char *pe) {
vector unsigned long long v1, v2, v3, v4;
vector unsigned long long v5 = ev5;
vector unsigned long long v6 = ev6;
vector unsigned long long v7 = ev7;
vector unsigned long long v8 = ev8;
unsigned char *e = pe;
do {
if (a) {
__builtin_vec_st(v1, 16, (unsigned long long *)e);
__builtin_vec_st(v2, 32, (unsigned long long *)e);
__builtin_vec_st(v3, 48, (unsigned long long *)e);
e = e + 8;
for (int i = 0; i < a; i++) {
v4 = v5;
v5 = __builtin_crypto_vpmsumd(v1, v6);
v6 = __builtin_crypto_vpmsumd(v2, v7);
v7 = __builtin_crypto_vpmsumd(v3, v8);
e = e + 8;
}
}
__builtin_vec_st(v5, 16, (unsigned long long *)e);
__builtin_vec_st(v6, 32, (unsigned long long *)e);
__builtin_vec_st(v7, 48, (unsigned long long *)e);
if (c)
b = 1;
} while (b);
return dummy(v4);
}
/* { dg-final { scan-assembler-not {(?n)rldicr.*,0,59} } } */