RISC-V: Refactor gen zero_extend rtx for SAT_* when expand SImode in RV64

In previous, we have some specially handling for both the .SAT_ADD and
.SAT_SUB for unsigned int.  There are similar to take care of SImode
in RV64 for zero extend.  Thus refactor these two helper function
into one for possible code duplication.

The below test suite are passed for this patch.
* The rv64gcv fully regression test.

gcc/ChangeLog:

	* config/riscv/riscv.cc (riscv_gen_zero_extend_rtx): Merge
	the zero_extend handing from func riscv_gen_unsigned_xmode_reg.
	(riscv_gen_unsigned_xmode_reg): Remove.
	(riscv_expand_ussub): Leverage riscv_gen_zero_extend_rtx
	instead of riscv_gen_unsigned_xmode_reg.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/sat_u_sub-11.c: Adjust asm check.
	* gcc.target/riscv/sat_u_sub-15.c: Ditto.
	* gcc.target/riscv/sat_u_sub-19.c: Ditto.
	* gcc.target/riscv/sat_u_sub-23.c: Ditto.
	* gcc.target/riscv/sat_u_sub-27.c: Ditto.
	* gcc.target/riscv/sat_u_sub-3.c: Ditto.
	* gcc.target/riscv/sat_u_sub-31.c: Ditto.
	* gcc.target/riscv/sat_u_sub-35.c: Ditto.
	* gcc.target/riscv/sat_u_sub-39.c: Ditto.
	* gcc.target/riscv/sat_u_sub-43.c: Ditto.
	* gcc.target/riscv/sat_u_sub-47.c: Ditto.
	* gcc.target/riscv/sat_u_sub-7.c: Ditto.
	* gcc.target/riscv/sat_u_sub_imm-11.c: Ditto.
	* gcc.target/riscv/sat_u_sub_imm-11_1.c: Ditto.
	* gcc.target/riscv/sat_u_sub_imm-11_2.c: Ditto.
	* gcc.target/riscv/sat_u_sub_imm-15.c: Ditto.
	* gcc.target/riscv/sat_u_sub_imm-15_1.c: Ditto.
	* gcc.target/riscv/sat_u_sub_imm-15_2.c: Ditto.
	* gcc.target/riscv/sat_u_sub_imm-3.c: Ditto.
	* gcc.target/riscv/sat_u_sub_imm-3_1.c: Ditto.
	* gcc.target/riscv/sat_u_sub_imm-3_2.c: Ditto.
	* gcc.target/riscv/sat_u_sub_imm-7.c: Ditto.
	* gcc.target/riscv/sat_u_sub_imm-7_1.c: Ditto.
	* gcc.target/riscv/sat_u_sub_imm-7_2.c: Ditto.

Signed-off-by: Pan Li <pan2.li@intel.com>
This commit is contained in:
Pan Li 2024-08-30 14:07:12 +08:00
parent 880834d3e7
commit e96d4bf6a6
25 changed files with 119 additions and 54 deletions

View file

@ -11894,19 +11894,56 @@ riscv_get_raw_result_mode (int regno)
return default_get_reg_raw_mode (regno);
}
/* Generate a new rtx of Xmode based on the rtx and mode in define pattern.
The rtx x will be zero extended to Xmode if the mode is HI/QImode, and
the new zero extended Xmode rtx will be returned.
Or the gen_lowpart rtx of Xmode will be returned. */
/* Generate a REG rtx of Xmode from the given rtx and mode.
The rtx x can be REG (QI/HI/SI/DI) or const_int.
The machine_mode mode is the original mode from define pattern.
If rtx is REG and Xmode, the RTX x will be returned directly.
If rtx is REG and non-Xmode, the zero extended to new REG of Xmode will be
returned.
If rtx is const_int, a new REG rtx will be created to hold the value of
const_int and then returned.
According to the gccint doc, the constants generated for modes with fewer
bits than in HOST_WIDE_INT must be sign extended to full width. Thus there
will be two cases here, take QImode as example.
For .SAT_SUB (127, y) in QImode, we have (const_int 127) and one simple
mov from const_int to the new REG rtx is good enough here.
For .SAT_SUB (254, y) in QImode, we have (const_int -2) after define_expand.
Aka 0xfffffffffffffffe in Xmode of RV64 but we actually need 0xfe in Xmode
of RV64. So we need to cleanup the highest 56 bits of the new REG rtx moved
from the (const_int -2).
Then the underlying expanding can perform the code generation based on
the REG rtx of Xmode, instead of taking care of these in expand func. */
static rtx
riscv_gen_zero_extend_rtx (rtx x, machine_mode mode)
{
if (mode == Xmode)
return x;
rtx xmode_reg = gen_reg_rtx (Xmode);
riscv_emit_unary (ZERO_EXTEND, xmode_reg, x);
if (!CONST_INT_P (x))
{
if (mode == Xmode)
return x;
riscv_emit_unary (ZERO_EXTEND, xmode_reg, x);
return xmode_reg;
}
if (mode == Xmode)
emit_move_insn (xmode_reg, x);
else
{
rtx reg_x = gen_reg_rtx (mode);
emit_move_insn (reg_x, x);
riscv_emit_unary (ZERO_EXTEND, xmode_reg, reg_x);
}
return xmode_reg;
}
@ -11959,50 +11996,6 @@ riscv_expand_usadd (rtx dest, rtx x, rtx y)
emit_move_insn (dest, gen_lowpart (mode, xmode_dest));
}
/* Generate a REG rtx of Xmode from the given rtx and mode.
The rtx x can be REG (QI/HI/SI/DI) or const_int.
The machine_mode mode is the original mode from define pattern.
If rtx is REG, the gen_lowpart of Xmode will be returned.
If rtx is const_int, a new REG rtx will be created to hold the value of
const_int and then returned.
According to the gccint doc, the constants generated for modes with fewer
bits than in HOST_WIDE_INT must be sign extended to full width. Thus there
will be two cases here, take QImode as example.
For .SAT_SUB (127, y) in QImode, we have (const_int 127) and one simple
mov from const_int to the new REG rtx is good enough here.
For .SAT_SUB (254, y) in QImode, we have (const_int -2) after define_expand.
Aka 0xfffffffffffffffe in Xmode of RV64 but we actually need 0xfe in Xmode
of RV64. So we need to cleanup the highest 56 bits of the new REG rtx moved
from the (const_int -2).
Then the underlying expanding can perform the code generation based on
the REG rtx of Xmode, instead of taking care of these in expand func. */
static rtx
riscv_gen_unsigned_xmode_reg (rtx x, machine_mode mode)
{
if (!CONST_INT_P (x))
return gen_lowpart (Xmode, x);
rtx xmode_x = gen_reg_rtx (Xmode);
if (mode == Xmode)
emit_move_insn (xmode_x, x);
else
{
rtx reg_x = gen_reg_rtx (mode);
emit_move_insn (reg_x, x);
riscv_emit_unary (ZERO_EXTEND, xmode_x, reg_x);
}
return xmode_x;
}
/* Implements the unsigned saturation sub standard name usadd for int mode.
z = SAT_SUB(x, y).
@ -12016,8 +12009,8 @@ void
riscv_expand_ussub (rtx dest, rtx x, rtx y)
{
machine_mode mode = GET_MODE (dest);
rtx xmode_x = riscv_gen_unsigned_xmode_reg (x, mode);
rtx xmode_y = riscv_gen_unsigned_xmode_reg (y, mode);
rtx xmode_x = riscv_gen_zero_extend_rtx (x, mode);
rtx xmode_y = riscv_gen_zero_extend_rtx (y, mode);
rtx xmode_lt = gen_reg_rtx (Xmode);
rtx xmode_minus = gen_reg_rtx (Xmode);
rtx xmode_dest = gen_reg_rtx (Xmode);

View file

@ -6,6 +6,10 @@
/*
** sat_u_sub_uint32_t_fmt_3:
** slli\s+a0,\s*a0,\s*32
** srli\s+a0,\s*a0,\s*32
** slli\s+a1,\s*a1,\s*32
** srli\s+a1,\s*a1,\s*32
** sub\s+[atx][0-9]+,\s*a0,\s*a1
** sltu\s+[atx][0-9]+,\s*a0,\s*a1
** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1

View file

@ -6,6 +6,10 @@
/*
** sat_u_sub_uint32_t_fmt_4:
** slli\s+a0,\s*a0,\s*32
** srli\s+a0,\s*a0,\s*32
** slli\s+a1,\s*a1,\s*32
** srli\s+a1,\s*a1,\s*32
** sub\s+[atx][0-9]+,\s*a0,\s*a1
** sltu\s+[atx][0-9]+,\s*a0,\s*a1
** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1

View file

@ -6,6 +6,10 @@
/*
** sat_u_sub_uint32_t_fmt_5:
** slli\s+a0,\s*a0,\s*32
** srli\s+a0,\s*a0,\s*32
** slli\s+a1,\s*a1,\s*32
** srli\s+a1,\s*a1,\s*32
** sub\s+[atx][0-9]+,\s*a0,\s*a1
** sltu\s+[atx][0-9]+,\s*a0,\s*a1
** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1

View file

@ -6,6 +6,10 @@
/*
** sat_u_sub_uint32_t_fmt_6:
** slli\s+a0,\s*a0,\s*32
** srli\s+a0,\s*a0,\s*32
** slli\s+a1,\s*a1,\s*32
** srli\s+a1,\s*a1,\s*32
** sub\s+[atx][0-9]+,\s*a0,\s*a1
** sltu\s+[atx][0-9]+,\s*a0,\s*a1
** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1

View file

@ -6,6 +6,10 @@
/*
** sat_u_sub_uint32_t_fmt_7:
** slli\s+a0,\s*a0,\s*32
** srli\s+a0,\s*a0,\s*32
** slli\s+a1,\s*a1,\s*32
** srli\s+a1,\s*a1,\s*32
** sub\s+[atx][0-9]+,\s*a0,\s*a1
** sltu\s+[atx][0-9]+,\s*a0,\s*a1
** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1

View file

@ -6,6 +6,10 @@
/*
** sat_u_sub_uint32_t_fmt_1:
** slli\s+a0,\s*a0,\s*32
** srli\s+a0,\s*a0,\s*32
** slli\s+a1,\s*a1,\s*32
** srli\s+a1,\s*a1,\s*32
** sub\s+[atx][0-9]+,\s*a0,\s*a1
** sltu\s+[atx][0-9]+,\s*a0,\s*a1
** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1

View file

@ -6,6 +6,10 @@
/*
** sat_u_sub_uint32_t_fmt_8:
** slli\s+a0,\s*a0,\s*32
** srli\s+a0,\s*a0,\s*32
** slli\s+a1,\s*a1,\s*32
** srli\s+a1,\s*a1,\s*32
** sub\s+[atx][0-9]+,\s*a0,\s*a1
** sltu\s+[atx][0-9]+,\s*a0,\s*a1
** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1

View file

@ -6,6 +6,10 @@
/*
** sat_u_sub_uint32_t_fmt_9:
** slli\s+a0,\s*a0,\s*32
** srli\s+a0,\s*a0,\s*32
** slli\s+a1,\s*a1,\s*32
** srli\s+a1,\s*a1,\s*32
** sub\s+[atx][0-9]+,\s*a0,\s*a1
** sltu\s+[atx][0-9]+,\s*a0,\s*a1
** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1

View file

@ -6,6 +6,10 @@
/*
** sat_u_sub_uint32_t_fmt_10:
** slli\s+a0,\s*a0,\s*32
** srli\s+a0,\s*a0,\s*32
** slli\s+a1,\s*a1,\s*32
** srli\s+a1,\s*a1,\s*32
** sub\s+[atx][0-9]+,\s*a0,\s*a1
** sltu\s+[atx][0-9]+,\s*a0,\s*a1
** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1

View file

@ -6,6 +6,10 @@
/*
** sat_u_sub_uint32_t_fmt_11:
** slli\s+a0,\s*a0,\s*32
** srli\s+a0,\s*a0,\s*32
** slli\s+a1,\s*a1,\s*32
** srli\s+a1,\s*a1,\s*32
** sub\s+[atx][0-9]+,\s*a0,\s*a1
** sltu\s+[atx][0-9]+,\s*a0,\s*a1
** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1

View file

@ -6,6 +6,10 @@
/*
** sat_u_sub_uint32_t_fmt_12:
** slli\s+a0,\s*a0,\s*32
** srli\s+a0,\s*a0,\s*32
** slli\s+a1,\s*a1,\s*32
** srli\s+a1,\s*a1,\s*32
** sub\s+[atx][0-9]+,\s*a0,\s*a1
** sltu\s+[atx][0-9]+,\s*a0,\s*a1
** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1

View file

@ -6,6 +6,10 @@
/*
** sat_u_sub_uint32_t_fmt_2:
** slli\s+a0,\s*a0,\s*32
** srli\s+a0,\s*a0,\s*32
** slli\s+a1,\s*a1,\s*32
** srli\s+a1,\s*a1,\s*32
** sub\s+[atx][0-9]+,\s*a0,\s*a1
** sltu\s+[atx][0-9]+,\s*a0,\s*a1
** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1

View file

@ -7,6 +7,8 @@
/*
** sat_u_sub_imm255_uint32_t_fmt_3:
** li\s+[atx][0-9]+,\s*255
** slli\s+a0,\s*a0,\s*32
** srli\s+a0,\s*a0,\s*32
** sub\s+[atx][0-9]+,\s*[atx][0-9]+,\s*a0
** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
** addi\s+a0,\s*a0,\s*-1

View file

@ -9,6 +9,8 @@
** li\s+[atx][0-9]+,\s*1
** slli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*31
** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*1
** slli\s+a0,\s*a0,\s*32
** srli\s+a0,\s*a0,\s*32
** sub\s+[atx][0-9]+,\s*[atx][0-9]+,\s*a0
** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
** addi\s+a0,\s*a0,\s*-1

View file

@ -9,6 +9,8 @@
** li\s+[atx][0-9]+,\s*1
** slli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*32
** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-4
** slli\s+a0,\s*a0,\s*32
** srli\s+a0,\s*a0,\s*32
** sub\s+[atx][0-9]+,\s*[atx][0-9]+,\s*a0
** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
** addi\s+a0,\s*a0,\s*-1

View file

@ -6,6 +6,8 @@
/*
** sat_u_sub_imm255_uint32_t_fmt_4:
** slli\s+a0,\s*a0,\s*32
** srli\s+a0,\s*a0,\s*32
** addi\s+[atx][0-9]+,\s*a0,\s*-255
** sltiu\s+a0,\s*[atx][0-9]+,\s*255
** addi\s+a0,\s*a0,\s*-1

View file

@ -6,6 +6,8 @@
/*
** sat_u_sub_imm2147483648_uint32_t_fmt_2:
** slli\s+a0,\s*a0,\s*32
** srli\s+a0,\s*a0,\s*32
** li\s+[atx][0-9]+,\s*1
** slli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*31
** sub\s+[atx][0-9]+,\s*a0,\s*[atx][0-9]+

View file

@ -6,6 +6,8 @@
/*
** sat_u_sub_imm68719476732_uint32_t_fmt_2:
** slli\s+a0,\s*a0,\s*32
** srli\s+a0,\s*a0,\s*32
** li\s+[atx][0-9]+,\s*1
** slli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*32
** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-4

View file

@ -7,6 +7,8 @@
/*
** sat_u_sub_imm255_uint32_t_fmt_1:
** li\s+[atx][0-9]+,\s*255
** slli\s+a0,\s*a0,\s*32
** srli\s+a0,\s*a0,\s*32
** sub\s+[atx][0-9]+,\s*[atx][0-9]+,\s*a0
** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
** addi\s+a0,\s*a0,\s*-1

View file

@ -8,6 +8,8 @@
** sat_u_sub_imm2147483648_uint32_t_fmt_1:
** li\s+[atx][0-9]+,\s*1
** slli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*31
** slli\s+a0,\s*a0,\s*32
** srli\s+a0,\s*a0,\s*32
** sub\s+[atx][0-9]+,\s*[atx][0-9]+,\s*a0
** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
** addi\s+a0,\s*a0,\s*-1

View file

@ -9,6 +9,8 @@
** li\s+[atx][0-9]+,\s*1
** slli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*32
** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-4
** slli\s+a0,\s*a0,\s*32
** srli\s+a0,\s*a0,\s*32
** sub\s+[atx][0-9]+,\s*[atx][0-9]+,\s*a0
** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
** addi\s+a0,\s*a0,\s*-1

View file

@ -6,6 +6,8 @@
/*
** sat_u_sub_imm255_uint32_t_fmt_2:
** slli\s+a0,\s*a0,\s*32
** srli\s+a0,\s*a0,\s*32
** addi\s+[atx][0-9]+,\s*a0,\s*-255
** sltiu\s+a0,\s*[atx][0-9]+,\s*255
** addi\s+a0,\s*a0,\s*-1

View file

@ -6,6 +6,8 @@
/*
** sat_u_sub_imm2147483648_uint32_t_fmt_2:
** slli\s+a0,\s*a0,\s*32
** srli\s+a0,\s*a0,\s*32
** li\s+[atx][0-9]+,\s*1
** slli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*31
** sub\s+[atx][0-9]+,\s*a0,\s*[atx][0-9]+

View file

@ -6,6 +6,8 @@
/*
** sat_u_sub_imm68719476732_uint32_t_fmt_2:
** slli\s+a0,\s*a0,\s*32
** srli\s+a0,\s*a0,\s*32
** li\s+[atx][0-9]+,\s*1
** slli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*32
** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-4