diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc index 74367ec8d8e..b3e07d4c3aa 100644 --- a/gcc/config/riscv/riscv-vsetvl.cc +++ b/gcc/config/riscv/riscv-vsetvl.cc @@ -1433,9 +1433,23 @@ private: inline bool modify_or_use_vl_p (insn_info *i, const vsetvl_info &info) { - return info.has_vl () - && (find_access (i->uses (), REGNO (info.get_vl ())) - || find_access (i->defs (), REGNO (info.get_vl ()))); + if (info.has_vl ()) + { + if (find_access (i->defs (), REGNO (info.get_vl ()))) + return true; + if (find_access (i->uses (), REGNO (info.get_vl ()))) + { + resource_info resource = full_register (REGNO (info.get_vl ())); + def_lookup dl1 = crtl->ssa->find_def (resource, i); + def_lookup dl2 = crtl->ssa->find_def (resource, info.get_insn ()); + if (dl1.matching_set () || dl2.matching_set ()) + return true; + /* If their VLs are coming from same def, we still want to fuse + their VSETVL demand info to gain better performance. */ + return dl1.prev_def (i) != dl2.prev_def (i); + } + } + return false; } inline bool modify_avl_p (insn_info *i, const vsetvl_info &info) { @@ -1702,7 +1716,7 @@ public: for (insn_info *i = next_insn->prev_nondebug_insn (); i != prev_insn; i = i->prev_nondebug_insn ()) { - // no def amd use of vl + // no def and use of vl if (!ignore_vl && modify_or_use_vl_p (i, info)) return false; @@ -2635,11 +2649,8 @@ pre_vsetvl::compute_lcm_local_properties () for (const insn_info *insn : bb->real_nondebug_insns ()) { - if ((info.has_nonvlmax_reg_avl () - && find_access (insn->defs (), REGNO (info.get_avl ()))) - || (info.has_vl () - && find_access (insn->uses (), - REGNO (info.get_vl ())))) + if (info.has_nonvlmax_reg_avl () + && find_access (insn->defs (), REGNO (info.get_avl ()))) { bitmap_clear_bit (m_transp[bb_index], i); break; diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112713-1.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112713-1.c new file mode 100644 index 00000000000..76402ab6167 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112713-1.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */ + +#include "riscv_vector.h" + +size_t +foo (char const *buf, size_t len) +{ + size_t sum = 0; + size_t vl = __riscv_vsetvlmax_e8m8(); + size_t step = vl * 4; + const char *it = buf, *end = buf + len; + for(; it + step <= end; ) { + it += vl; + vint8m8_t v3 = __riscv_vle8_v_i8m8((void*)it, vl); it += vl; + vbool1_t m3 = __riscv_vmsgt_vx_i8m8_b1(v3, -65, vl); + sum += __riscv_vcpop_m_b1(m3, vl); + } + return sum; +} + +/* { dg-final { scan-assembler-times {vsetvli} 1 } } */ +/* { dg-final { scan-assembler-not {vsetivli} } } */ +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*m8,\s*t[au],\s*m[au]} 1 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112713-2.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112713-2.c new file mode 100644 index 00000000000..04539d998cf --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112713-2.c @@ -0,0 +1,47 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */ + +#include "riscv_vector.h" + +static size_t +utf8_count_rvv(char const *buf, size_t len) +{ + size_t sum = 0; + for (size_t vl; len > 0; len -= vl, buf += vl) { + vl = __riscv_vsetvl_e8m8(len); + vint8m8_t v = __riscv_vle8_v_i8m8((void*)buf, vl); + vbool1_t mask = __riscv_vmsgt_vx_i8m8_b1(v, -65, vl); + sum += __riscv_vcpop_m_b1(mask, vl); + } + return sum; +} + +size_t +utf8_count_rvv_4x_tail(char const *buf, size_t len) +{ + size_t sum = 0; + size_t vl = __riscv_vsetvlmax_e8m8(); + size_t step = vl * 4; + const char *it = buf, *end = buf + len; + for(; it + step <= end; ) { + vint8m8_t v0 = __riscv_vle8_v_i8m8((void*)it, vl); it += vl; + vint8m8_t v1 = __riscv_vle8_v_i8m8((void*)it, vl); it += vl; + vint8m8_t v2 = __riscv_vle8_v_i8m8((void*)it, vl); it += vl; + vint8m8_t v3 = __riscv_vle8_v_i8m8((void*)it, vl); it += vl; + vbool1_t m0 = __riscv_vmsgt_vx_i8m8_b1(v0, -65, vl); + vbool1_t m1 = __riscv_vmsgt_vx_i8m8_b1(v1, -65, vl); + vbool1_t m2 = __riscv_vmsgt_vx_i8m8_b1(v2, -65, vl); + vbool1_t m3 = __riscv_vmsgt_vx_i8m8_b1(v3, -65, vl); + sum += __riscv_vcpop_m_b1(m0, vl); + sum += __riscv_vcpop_m_b1(m1, vl); + sum += __riscv_vcpop_m_b1(m2, vl); + sum += __riscv_vcpop_m_b1(m3, vl); + } + return sum + utf8_count_rvv(it, end - it); +} + +/* { dg-final { scan-assembler-times {vsetvli} 2 } } */ +/* { dg-final { scan-assembler-not {vsetivli} } } */ +/* { dg-final { scan-assembler-not {csrr} } } */ +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*m8,\s*t[au],\s*m[au]} 1 } } */ +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e8,\s*m8,\s*t[au],\s*m[au]} 1 } } */