[PATCH v2 2/2] RISC-V: Disable by pieces for vector setmem length > UNITS_PER_WORD
For fast unaligned access targets, by pieces uses up to UNITS_PER_WORD size pieces resulting in more store instructions than needed. For example gcc.target/riscv/rvv/base/setmem-2.c:f1 built with `-O3 -march=rv64gcv -mtune=thead-c906`: ``` f1: vsetivli zero,8,e8,mf2,ta,ma vmv.v.x v1,a1 vsetivli zero,0,e32,mf2,ta,ma sb a1,14(a0) vmv.x.s a4,v1 vsetivli zero,8,e16,m1,ta,ma vmv.x.s a5,v1 vse8.v v1,0(a0) sw a4,8(a0) sh a5,12(a0) ret ``` The slow unaligned access version built with `-O3 -march=rv64gcv` used 15 sb instructions: ``` f1: sb a1,0(a0) sb a1,1(a0) sb a1,2(a0) sb a1,3(a0) sb a1,4(a0) sb a1,5(a0) sb a1,6(a0) sb a1,7(a0) sb a1,8(a0) sb a1,9(a0) sb a1,10(a0) sb a1,11(a0) sb a1,12(a0) sb a1,13(a0) sb a1,14(a0) ret ``` After this patch, the following is generated in both cases: ``` f1: vsetivli zero,15,e8,m1,ta,ma vmv.v.x v1,a1 vse8.v v1,0(a0) ret ``` gcc/ChangeLog: * config/riscv/riscv.cc (riscv_use_by_pieces_infrastructure_p): New function. (TARGET_USE_BY_PIECES_INFRASTRUCTURE_P): Define. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/pr113469.c: Expect mf2 setmem. * gcc.target/riscv/rvv/base/setmem-2.c: Update f1 to expect straight-line vector memset. * gcc.target/riscv/rvv/base/setmem-3.c: Likewise.
This commit is contained in:
parent
b30c6a5eab
commit
6b315907c0
4 changed files with 35 additions and 11 deletions
|
@ -12695,6 +12695,22 @@ riscv_stack_clash_protection_alloca_probe_range (void)
|
|||
return STACK_CLASH_CALLER_GUARD;
|
||||
}
|
||||
|
||||
static bool
|
||||
riscv_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
|
||||
unsigned alignment,
|
||||
enum by_pieces_operation op, bool speed_p)
|
||||
{
|
||||
/* For set/clear with size > UNITS_PER_WORD, by pieces uses vector broadcasts
|
||||
with UNITS_PER_WORD size pieces. Use setmem<mode> instead which can use
|
||||
bigger chunks. */
|
||||
if (TARGET_VECTOR && stringop_strategy & STRATEGY_VECTOR
|
||||
&& (op == CLEAR_BY_PIECES || op == SET_BY_PIECES)
|
||||
&& speed_p && size > UNITS_PER_WORD)
|
||||
return false;
|
||||
|
||||
return default_use_by_pieces_infrastructure_p (size, alignment, op, speed_p);
|
||||
}
|
||||
|
||||
/* Initialize the GCC target structure. */
|
||||
#undef TARGET_ASM_ALIGNED_HI_OP
|
||||
#define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
|
||||
|
@ -13060,6 +13076,9 @@ riscv_stack_clash_protection_alloca_probe_range (void)
|
|||
#undef TARGET_C_MODE_FOR_FLOATING_TYPE
|
||||
#define TARGET_C_MODE_FOR_FLOATING_TYPE riscv_c_mode_for_floating_type
|
||||
|
||||
#undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
|
||||
#define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P riscv_use_by_pieces_infrastructure_p
|
||||
|
||||
struct gcc_target targetm = TARGET_INITIALIZER;
|
||||
|
||||
#include "gt-riscv.h"
|
||||
|
|
|
@ -51,4 +51,5 @@ void p(int buf, __builtin_va_list ab, int q) {
|
|||
} while (k);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times {vsetivli\tzero,\s*4,\s*e8,\s*mf4,\s*t[au],\s*m[au]} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {vsetivli\tzero,\s*4,\s*e8,\s*mf4,\s*t[au],\s*m[au]} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {vsetivli\tzero,\s*8,\s*e8,\s*mf2,\s*t[au],\s*m[au]} 1 } } */
|
||||
|
|
|
@ -5,15 +5,17 @@
|
|||
|
||||
#define MIN_VECTOR_BYTES (__riscv_v_min_vlen / 8)
|
||||
|
||||
/* Small memsets shouldn't be vectorised.
|
||||
/* Vectorise with no loop.
|
||||
** f1:
|
||||
** (
|
||||
** sb\s+a1,0\(a0\)
|
||||
** ...
|
||||
** vsetivli\s+zero,\d+,e8,m1,ta,ma
|
||||
** |
|
||||
** li\s+a2,\d+
|
||||
** tail\s+memset
|
||||
** li\s+a\d+,\d+
|
||||
** vsetvli\s+zero,a\d+,e8,m1,ta,ma
|
||||
** )
|
||||
** vmv\.v\.x\s+v\d+,a1
|
||||
** vse8\.v\s+v\d+,0\(a0\)
|
||||
** ret
|
||||
*/
|
||||
void *
|
||||
f1 (void *a, int const b)
|
||||
|
|
|
@ -5,15 +5,17 @@
|
|||
|
||||
#define MIN_VECTOR_BYTES (__riscv_v_min_vlen / 8)
|
||||
|
||||
/* Small memsets shouldn't be vectorised.
|
||||
/* Vectorise with no loop.
|
||||
** f1:
|
||||
** (
|
||||
** sb\s+a1,0\(a0\)
|
||||
** ...
|
||||
** vsetivli\s+zero,\d+,e8,m1,ta,ma
|
||||
** |
|
||||
** li\s+a2,\d+
|
||||
** tail\s+memset
|
||||
** li\s+a\d+,\d+
|
||||
** vsetvli\s+zero,a\d+,e8,m1,ta,ma
|
||||
** )
|
||||
** vmv\.v\.x\s+v\d+,a1
|
||||
** vse8\.v\s+v\d+,0\(a0\)
|
||||
** ret
|
||||
*/
|
||||
void *
|
||||
f1 (void *a, int const b)
|
||||
|
|
Loading…
Add table
Reference in a new issue