RISC-V: Fix ratio in vsetvl fuse rule [PR115703].

In PR115703 we fuse two vsetvls:

    Fuse curr info since prev info compatible with it:
      prev_info: VALID (insn 438, bb 2)
        Demand fields: demand_ge_sew demand_non_zero_avl
        SEW=32, VLMUL=m1, RATIO=32, MAX_SEW=64
        TAIL_POLICY=agnostic, MASK_POLICY=agnostic
        AVL=(reg:DI 0 zero)
        VL=(reg:DI 9 s1 [312])
      curr_info: VALID (insn 92, bb 20)
        Demand fields: demand_ratio_and_ge_sew demand_avl
        SEW=64, VLMUL=m1, RATIO=64, MAX_SEW=64
        TAIL_POLICY=agnostic, MASK_POLICY=agnostic
        AVL=(const_int 4 [0x4])
        VL=(nil)
      prev_info after fused: VALID (insn 438, bb 2)
        Demand fields: demand_ratio_and_ge_sew demand_avl
        SEW=64, VLMUL=mf2, RATIO=64, MAX_SEW=64
        TAIL_POLICY=agnostic, MASK_POLICY=agnostic
        AVL=(const_int 4 [0x4])
        VL=(nil).

The result is vsetvl zero, zero, e64, mf2, ta, ma.  The previous vsetvl
set vl = 4 but here we wrongly set it to vl = 2.  As all the following
vsetvls only ever change the ratio we never recover.

The issue is quite difficult to trigger because we can often
deduce the value of d at runtime.  Then very check for the value of
d will be optimized away.

The last known bad commit is r15-3458-g5326306e7d9d36.  With that commit
the output is wrong but -fno-schedule-insns makes it correct.  From the
next commit on the issue is latent.  I still added the PR's test as scan
and run check even if they don't trigger right now.  Not sure if the
run test will ever fail but well.  I verified that the
patch fixes the issue when applied on top of r15-3458-g5326306e7d9d36.

	PR target/115703

gcc/ChangeLog:

	* config/riscv/riscv-vsetvl.cc: Use max_sew for calculating the
	new LMUL.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/autovec/pr115703-run.c: New test.
	* gcc.target/riscv/rvv/autovec/pr115703.c: New test.
This commit is contained in:
Robin Dapp 2025-02-06 14:43:17 +01:00
parent 8c03fbd776
commit 44d4a1086d
3 changed files with 84 additions and 1 deletions

View file

@ -1756,7 +1756,8 @@ private:
inline void use_max_sew_and_lmul_with_next_ratio (vsetvl_info &prev,
const vsetvl_info &next)
{
prev.set_vlmul (calculate_vlmul (prev.get_sew (), next.get_ratio ()));
int max_sew = MAX (prev.get_sew (), next.get_sew ());
prev.set_vlmul (calculate_vlmul (max_sew, next.get_ratio ()));
use_max_sew (prev, next);
prev.set_ratio (next.get_ratio ());
}

View file

@ -0,0 +1,44 @@
/* { dg-do run } */
/* { dg-require-effective-target rvv_zvl256b_ok } */
/* { dg-options "-O3 -march=rv64gcv_zvl256b -mabi=lp64d -fwhole-program -fwrapv" } */
int a, i;
unsigned long b;
unsigned c, f;
long long d = 1;
short e, m;
long g, h;
__attribute__ ((noipa))
void check (unsigned long long x)
{
if (x != 13667643351234938049ull)
__builtin_abort ();
}
int main() {
for (int q = 0; q < 2; q += 1) {
for (short r = 0; r < 2; r += 1)
for (char s = 0; s < 6; s++)
for (short t = 0; t < 011; t += 12081 - 12080)
for (short u = 0; u < 11; u++) {
a = ({ a > 1 ? a : 1; });
b = ({ b > 5 ? b : 5; });
for (short j = 0; j < 2; j = 2080)
c = ({ c > 030 ? c : 030; });
for (short k = 0; k < 2; k += 2080)
d *= 7;
e *= 10807;
f = ({ f > 3 ? f : 3; });
}
for (int l = 0; l < 21; l += 1)
for (int n = 0; n < 16; n++) {
g = ({ m ? g : m; });
for (char o = 0; o < 7; o += 1)
h *= 3;
i = ({ i < 0 ? i : 0; });
}
}
check (d);
}

View file

@ -0,0 +1,38 @@
/* { dg-do compile } */
/* { dg-options "-O3 -march=rv64gcv_zvl256b -mabi=lp64d -fwhole-program -fwrapv" } */
int a, i;
unsigned long b;
unsigned c, f;
long long d = 1;
short e, m;
long g, h;
int main() {
for (int q = 0; q < 2; q += 1) {
for (short r = 0; r < 2; r += 1)
for (char s = 0; s < 6; s++)
for (short t = 0; t < 011; t += 12081 - 12080)
for (short u = 0; u < 11; u++) {
a = ({ a > 1 ? a : 1; });
b = ({ b > 5 ? b : 5; });
for (short j = 0; j < 2; j = 2080)
c = ({ c > 030 ? c : 030; });
for (short k = 0; k < 2; k += 2080)
d *= 7;
e *= 10807;
f = ({ f > 3 ? f : 3; });
}
for (int l = 0; l < 21; l += 1)
for (int n = 0; n < 16; n++) {
g = ({ m ? g : m; });
for (char o = 0; o < 7; o += 1)
h *= 3;
i = ({ i < 0 ? i : 0; });
}
}
__builtin_printf ("%llu\n", d);
}
/* { dg-final { scan-assembler-not "vset.*e64,mf2" } } */