rs6000: Remove useless insns fed into lvx/stvx [PR97019]
This patch is to extend the existing function find_alignment_op to check all defintions of base_reg are AND operations with mask -16B to force the alignment. If all are satifised, it passes all AND operations and instructions to function recombine_lvx_pattern and recombine_stvx_pattern, they can remove all useless ANDs further. Bootstrapped/regtested on powerpc64le-linux-gnu P8. gcc/ChangeLog: PR target/97019 * config/rs6000/rs6000-p8swap.c (find_alignment_op): Adjust to support multiple defintions which are all AND operations with the mask -16B. (recombine_lvx_pattern): Adjust to handle multiple AND operations from find_alignment_op. (recombine_stvx_pattern): Likewise. gcc/testsuite/ChangeLog: PR target/97019 * gcc.target/powerpc/pr97019.c: New test.
This commit is contained in:
parent
01d56aeaff
commit
e0d98ffd14
2 changed files with 174 additions and 52 deletions
|
@ -2095,11 +2095,15 @@ alignment_mask (rtx_insn *insn)
|
|||
return alignment_with_canonical_addr (SET_SRC (body));
|
||||
}
|
||||
|
||||
/* Given INSN that's a load or store based at BASE_REG, look for a
|
||||
feeding computation that aligns its address on a 16-byte boundary.
|
||||
Return the rtx and its containing AND_INSN. */
|
||||
static rtx
|
||||
find_alignment_op (rtx_insn *insn, rtx base_reg, rtx_insn **and_insn)
|
||||
/* Given INSN that's a load or store based at BASE_REG, check if
|
||||
all of its feeding computations align its address on a 16-byte
|
||||
boundary. If so, return true and add all definition insns into
|
||||
AND_INSNS and their corresponding fully-expanded rtxes for the
|
||||
masking operations into AND_OPS. */
|
||||
|
||||
static bool
|
||||
find_alignment_op (rtx_insn *insn, rtx base_reg, vec<rtx_insn *> *and_insns,
|
||||
vec<rtx> *and_ops)
|
||||
{
|
||||
df_ref base_use;
|
||||
struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
|
||||
|
@ -2111,19 +2115,28 @@ find_alignment_op (rtx_insn *insn, rtx base_reg, rtx_insn **and_insn)
|
|||
continue;
|
||||
|
||||
struct df_link *base_def_link = DF_REF_CHAIN (base_use);
|
||||
if (!base_def_link || base_def_link->next)
|
||||
break;
|
||||
if (!base_def_link)
|
||||
return false;
|
||||
|
||||
/* With stack-protector code enabled, and possibly in other
|
||||
circumstances, there may not be an associated insn for
|
||||
the def. */
|
||||
if (DF_REF_IS_ARTIFICIAL (base_def_link->ref))
|
||||
break;
|
||||
while (base_def_link)
|
||||
{
|
||||
/* With stack-protector code enabled, and possibly in other
|
||||
circumstances, there may not be an associated insn for
|
||||
the def. */
|
||||
if (DF_REF_IS_ARTIFICIAL (base_def_link->ref))
|
||||
return false;
|
||||
|
||||
*and_insn = DF_REF_INSN (base_def_link->ref);
|
||||
and_operation = alignment_mask (*and_insn);
|
||||
if (and_operation != 0)
|
||||
break;
|
||||
rtx_insn *and_insn = DF_REF_INSN (base_def_link->ref);
|
||||
and_operation = alignment_mask (and_insn);
|
||||
|
||||
/* Stop if we find any one which doesn't align. */
|
||||
if (!and_operation)
|
||||
return false;
|
||||
|
||||
and_insns->safe_push (and_insn);
|
||||
and_ops->safe_push (and_operation);
|
||||
base_def_link = base_def_link->next;
|
||||
}
|
||||
}
|
||||
|
||||
return and_operation;
|
||||
|
@ -2143,11 +2156,14 @@ recombine_lvx_pattern (rtx_insn *insn, del_info *to_delete)
|
|||
rtx mem = XEXP (SET_SRC (body), 0);
|
||||
rtx base_reg = XEXP (mem, 0);
|
||||
|
||||
rtx_insn *and_insn;
|
||||
rtx and_operation = find_alignment_op (insn, base_reg, &and_insn);
|
||||
auto_vec<rtx_insn *> and_insns;
|
||||
auto_vec<rtx> and_ops;
|
||||
bool is_any_def_and
|
||||
= find_alignment_op (insn, base_reg, &and_insns, &and_ops);
|
||||
|
||||
if (and_operation != 0)
|
||||
if (is_any_def_and)
|
||||
{
|
||||
gcc_assert (and_insns.length () == and_ops.length ());
|
||||
df_ref def;
|
||||
struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
|
||||
FOR_EACH_INSN_INFO_DEF (def, insn_info)
|
||||
|
@ -2168,25 +2184,35 @@ recombine_lvx_pattern (rtx_insn *insn, del_info *to_delete)
|
|||
to_delete[INSN_UID (swap_insn)].replace = true;
|
||||
to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn;
|
||||
|
||||
/* However, first we must be sure that we make the
|
||||
base register from the AND operation available
|
||||
in case the register has been overwritten. Copy
|
||||
the base register to a new pseudo and use that
|
||||
as the base register of the AND operation in
|
||||
the new LVX instruction. */
|
||||
rtx and_base = XEXP (and_operation, 0);
|
||||
rtx new_reg = gen_reg_rtx (GET_MODE (and_base));
|
||||
rtx copy = gen_rtx_SET (new_reg, and_base);
|
||||
rtx_insn *new_insn = emit_insn_after (copy, and_insn);
|
||||
set_block_for_insn (new_insn, BLOCK_FOR_INSN (and_insn));
|
||||
df_insn_rescan (new_insn);
|
||||
rtx new_reg = 0;
|
||||
rtx and_mask = 0;
|
||||
for (unsigned i = 0; i < and_insns.length (); i++)
|
||||
{
|
||||
/* However, first we must be sure that we make the
|
||||
base register from the AND operation available
|
||||
in case the register has been overwritten. Copy
|
||||
the base register to a new pseudo and use that
|
||||
as the base register of the AND operation in
|
||||
the new LVX instruction. */
|
||||
rtx_insn *and_insn = and_insns[i];
|
||||
rtx and_op = and_ops[i];
|
||||
rtx and_base = XEXP (and_op, 0);
|
||||
if (!new_reg)
|
||||
{
|
||||
new_reg = gen_reg_rtx (GET_MODE (and_base));
|
||||
and_mask = XEXP (and_op, 1);
|
||||
}
|
||||
rtx copy = gen_rtx_SET (new_reg, and_base);
|
||||
rtx_insn *new_insn = emit_insn_after (copy, and_insn);
|
||||
set_block_for_insn (new_insn, BLOCK_FOR_INSN (and_insn));
|
||||
df_insn_rescan (new_insn);
|
||||
}
|
||||
|
||||
XEXP (mem, 0) = gen_rtx_AND (GET_MODE (and_base), new_reg,
|
||||
XEXP (and_operation, 1));
|
||||
XEXP (mem, 0) = gen_rtx_AND (GET_MODE (new_reg), new_reg, and_mask);
|
||||
SET_SRC (body) = mem;
|
||||
INSN_CODE (insn) = -1; /* Force re-recognition. */
|
||||
df_insn_rescan (insn);
|
||||
|
||||
|
||||
if (dump_file)
|
||||
fprintf (dump_file, "lvx opportunity found at %d\n",
|
||||
INSN_UID (insn));
|
||||
|
@ -2205,11 +2231,14 @@ recombine_stvx_pattern (rtx_insn *insn, del_info *to_delete)
|
|||
rtx mem = SET_DEST (body);
|
||||
rtx base_reg = XEXP (mem, 0);
|
||||
|
||||
rtx_insn *and_insn;
|
||||
rtx and_operation = find_alignment_op (insn, base_reg, &and_insn);
|
||||
auto_vec<rtx_insn *> and_insns;
|
||||
auto_vec<rtx> and_ops;
|
||||
bool is_any_def_and
|
||||
= find_alignment_op (insn, base_reg, &and_insns, &and_ops);
|
||||
|
||||
if (and_operation != 0)
|
||||
if (is_any_def_and)
|
||||
{
|
||||
gcc_assert (and_insns.length () == and_ops.length ());
|
||||
rtx src_reg = XEXP (SET_SRC (body), 0);
|
||||
df_ref src_use;
|
||||
struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
|
||||
|
@ -2234,25 +2263,35 @@ recombine_stvx_pattern (rtx_insn *insn, del_info *to_delete)
|
|||
to_delete[INSN_UID (swap_insn)].replace = true;
|
||||
to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn;
|
||||
|
||||
/* However, first we must be sure that we make the
|
||||
base register from the AND operation available
|
||||
in case the register has been overwritten. Copy
|
||||
the base register to a new pseudo and use that
|
||||
as the base register of the AND operation in
|
||||
the new STVX instruction. */
|
||||
rtx and_base = XEXP (and_operation, 0);
|
||||
rtx new_reg = gen_reg_rtx (GET_MODE (and_base));
|
||||
rtx copy = gen_rtx_SET (new_reg, and_base);
|
||||
rtx_insn *new_insn = emit_insn_after (copy, and_insn);
|
||||
set_block_for_insn (new_insn, BLOCK_FOR_INSN (and_insn));
|
||||
df_insn_rescan (new_insn);
|
||||
rtx new_reg = 0;
|
||||
rtx and_mask = 0;
|
||||
for (unsigned i = 0; i < and_insns.length (); i++)
|
||||
{
|
||||
/* However, first we must be sure that we make the
|
||||
base register from the AND operation available
|
||||
in case the register has been overwritten. Copy
|
||||
the base register to a new pseudo and use that
|
||||
as the base register of the AND operation in
|
||||
the new STVX instruction. */
|
||||
rtx_insn *and_insn = and_insns[i];
|
||||
rtx and_op = and_ops[i];
|
||||
rtx and_base = XEXP (and_op, 0);
|
||||
if (!new_reg)
|
||||
{
|
||||
new_reg = gen_reg_rtx (GET_MODE (and_base));
|
||||
and_mask = XEXP (and_op, 1);
|
||||
}
|
||||
rtx copy = gen_rtx_SET (new_reg, and_base);
|
||||
rtx_insn *new_insn = emit_insn_after (copy, and_insn);
|
||||
set_block_for_insn (new_insn, BLOCK_FOR_INSN (and_insn));
|
||||
df_insn_rescan (new_insn);
|
||||
}
|
||||
|
||||
XEXP (mem, 0) = gen_rtx_AND (GET_MODE (and_base), new_reg,
|
||||
XEXP (and_operation, 1));
|
||||
XEXP (mem, 0) = gen_rtx_AND (GET_MODE (new_reg), new_reg, and_mask);
|
||||
SET_SRC (body) = src_reg;
|
||||
INSN_CODE (insn) = -1; /* Force re-recognition. */
|
||||
df_insn_rescan (insn);
|
||||
|
||||
|
||||
if (dump_file)
|
||||
fprintf (dump_file, "stvx opportunity found at %d\n",
|
||||
INSN_UID (insn));
|
||||
|
|
83
gcc/testsuite/gcc.target/powerpc/pr97019.c
Normal file
83
gcc/testsuite/gcc.target/powerpc/pr97019.c
Normal file
|
@ -0,0 +1,83 @@
|
|||
/* This issue can only exist on little-endian P8 targets, since
|
||||
the built-in functions vec_ld/vec_st can use lxvd2x/stxvd2x
|
||||
(P8 big-endian) or lxv/stxv (P9 and later) for some cases,
|
||||
those rldicr instructions fed to them are necessary. */
|
||||
/* { dg-do compile { target { powerpc_p8vector_ok && le } } } */
|
||||
/* { dg-options "-O2 -mdejagnu-cpu=power8" } */
|
||||
|
||||
/* Test there are no useless instructions "rldicr x,y,0,59"
|
||||
to align the addresses for lvx/stvx. */
|
||||
|
||||
extern int a, b, c;
|
||||
extern vector unsigned long long ev5, ev6, ev7, ev8;
|
||||
extern int dummy (vector unsigned long long);
|
||||
|
||||
int test_vec_ld(unsigned char *pe) {
|
||||
|
||||
vector unsigned long long v1, v2, v3, v4, v9;
|
||||
vector unsigned long long v5 = ev5;
|
||||
vector unsigned long long v6 = ev6;
|
||||
vector unsigned long long v7 = ev7;
|
||||
vector unsigned long long v8 = ev8;
|
||||
|
||||
unsigned char *e = pe;
|
||||
|
||||
do {
|
||||
if (a) {
|
||||
v1 = __builtin_vec_ld(16, (unsigned long long *)e);
|
||||
v2 = __builtin_vec_ld(32, (unsigned long long *)e);
|
||||
v3 = __builtin_vec_ld(48, (unsigned long long *)e);
|
||||
e = e + 8;
|
||||
for (int i = 0; i < a; i++) {
|
||||
v4 = v5;
|
||||
v5 = __builtin_crypto_vpmsumd(v1, v6);
|
||||
v6 = __builtin_crypto_vpmsumd(v2, v7);
|
||||
v7 = __builtin_crypto_vpmsumd(v3, v8);
|
||||
e = e + 8;
|
||||
}
|
||||
}
|
||||
v5 = __builtin_vec_ld(16, (unsigned long long *)e);
|
||||
v6 = __builtin_vec_ld(32, (unsigned long long *)e);
|
||||
v7 = __builtin_vec_ld(48, (unsigned long long *)e);
|
||||
if (c)
|
||||
b = 1;
|
||||
} while (b);
|
||||
|
||||
return dummy(v4);
|
||||
}
|
||||
|
||||
int test_vec_st(unsigned char *pe) {
|
||||
|
||||
vector unsigned long long v1, v2, v3, v4;
|
||||
vector unsigned long long v5 = ev5;
|
||||
vector unsigned long long v6 = ev6;
|
||||
vector unsigned long long v7 = ev7;
|
||||
vector unsigned long long v8 = ev8;
|
||||
|
||||
unsigned char *e = pe;
|
||||
|
||||
do {
|
||||
if (a) {
|
||||
__builtin_vec_st(v1, 16, (unsigned long long *)e);
|
||||
__builtin_vec_st(v2, 32, (unsigned long long *)e);
|
||||
__builtin_vec_st(v3, 48, (unsigned long long *)e);
|
||||
e = e + 8;
|
||||
for (int i = 0; i < a; i++) {
|
||||
v4 = v5;
|
||||
v5 = __builtin_crypto_vpmsumd(v1, v6);
|
||||
v6 = __builtin_crypto_vpmsumd(v2, v7);
|
||||
v7 = __builtin_crypto_vpmsumd(v3, v8);
|
||||
e = e + 8;
|
||||
}
|
||||
}
|
||||
__builtin_vec_st(v5, 16, (unsigned long long *)e);
|
||||
__builtin_vec_st(v6, 32, (unsigned long long *)e);
|
||||
__builtin_vec_st(v7, 48, (unsigned long long *)e);
|
||||
if (c)
|
||||
b = 1;
|
||||
} while (b);
|
||||
|
||||
return dummy(v4);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-not {(?n)rldicr.*,0,59} } } */
|
Loading…
Add table
Reference in a new issue