rs6000: Improve vsx_init_v4si

This changes vsx_init_v4si to be an expander.  That way, no special
cases are needed anymore for special arguments: the normal RTL passes
can deal with it.


	* config/rs6000/rs6000-p8swap.c (rtx_is_swappable_p): Adjust.
	* config/rs6000/rs6000-protos.h (rs6000_split_v4si_init): Delete.
	* config/rs6000/rs6000.c (rs6000_expand_vector_init): Always force
	the elements into a register.
	(rs6000_split_v4si_init_di_reg): Delete.
	(rs6000_split_v4si_init): Delete.
	* config/rs6000/vsx.md (unspec): Delete UNSPEC_VSX_VEC_INIT.
	(vsx_init_v4si): Rewrite as a define_expand.

From-SVN: r262930
This commit is contained in:
Segher Boessenkool 2018-07-23 13:27:38 +02:00 committed by Segher Boessenkool
parent 268e16e89b
commit 9fede15c4d
5 changed files with 45 additions and 109 deletions

View file

@ -1,3 +1,14 @@
2018-07-23 Segher Boessenkool <segher@kernel.crashing.org>
* config/rs6000/rs6000-p8swap.c (rtx_is_swappable_p): Adjust.
* config/rs6000/rs6000-protos.h (rs6000_split_v4si_init): Delete.
* config/rs6000/rs6000.c (rs6000_expand_vector_init): Always force
the elements into a register.
(rs6000_split_v4si_init_di_reg): Delete.
(rs6000_split_v4si_init): Delete.
* config/rs6000/vsx.md (unspec): Delete UNSPEC_VSX_VEC_INIT.
(vsx_init_v4si): Rewrite as a define_expand.
2018-07-23 Segher Boessenkool <segher@kernel.crashing.org>
* config/rs6000/rs6000.md (splitters for rldimi and rlwimi with the

View file

@ -772,7 +772,6 @@ rtx_is_swappable_p (rtx op, unsigned int *special)
case UNSPEC_VSX_EXTRACT:
case UNSPEC_VSX_SET:
case UNSPEC_VSX_SLDWI:
case UNSPEC_VSX_VEC_INIT:
case UNSPEC_VSX_VSLO:
case UNSPEC_VUNPACK_HI_SIGN:
case UNSPEC_VUNPACK_HI_SIGN_DIRECT:

View file

@ -61,7 +61,6 @@ extern void rs6000_expand_vector_set (rtx, rtx, int);
extern void rs6000_expand_vector_extract (rtx, rtx, rtx);
extern void rs6000_split_vec_extract_var (rtx, rtx, rtx, rtx, rtx);
extern rtx rs6000_adjust_vec_address (rtx, rtx, rtx, rtx, machine_mode);
extern void rs6000_split_v4si_init (rtx []);
extern void altivec_expand_vec_perm_le (rtx op[4]);
extern void rs6000_expand_extract_even (rtx, rtx, rtx);
extern void rs6000_expand_interleave (rtx, rtx, rtx, bool);

View file

@ -6857,11 +6857,7 @@ rs6000_expand_vector_init (rtx target, rtx vals)
size_t i;
for (i = 0; i < 4; i++)
{
elements[i] = XVECEXP (vals, 0, i);
if (!CONST_INT_P (elements[i]) && !REG_P (elements[i]))
elements[i] = copy_to_mode_reg (SImode, elements[i]);
}
elements[i] = force_reg (SImode, XVECEXP (vals, 0, i));
emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1],
elements[2], elements[3]));
@ -7568,92 +7564,6 @@ rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr,
gcc_unreachable ();
}
/* Helper function for rs6000_split_v4si_init to build up a DImode value from
two SImode values. */
static void
rs6000_split_v4si_init_di_reg (rtx dest, rtx si1, rtx si2, rtx tmp)
{
const unsigned HOST_WIDE_INT mask_32bit = HOST_WIDE_INT_C (0xffffffff);
if (CONST_INT_P (si1) && CONST_INT_P (si2))
{
unsigned HOST_WIDE_INT const1 = (UINTVAL (si1) & mask_32bit) << 32;
unsigned HOST_WIDE_INT const2 = UINTVAL (si2) & mask_32bit;
emit_move_insn (dest, GEN_INT (const1 | const2));
return;
}
/* Put si1 into upper 32-bits of dest. */
if (CONST_INT_P (si1))
emit_move_insn (dest, GEN_INT ((UINTVAL (si1) & mask_32bit) << 32));
else
{
/* Generate RLDIC. */
rtx si1_di = gen_rtx_REG (DImode, regno_or_subregno (si1));
rtx shift_rtx = gen_rtx_ASHIFT (DImode, si1_di, GEN_INT (32));
rtx mask_rtx = GEN_INT (mask_32bit << 32);
rtx and_rtx = gen_rtx_AND (DImode, shift_rtx, mask_rtx);
gcc_assert (!reg_overlap_mentioned_p (dest, si1));
emit_insn (gen_rtx_SET (dest, and_rtx));
}
/* Put si2 into the temporary. */
gcc_assert (!reg_overlap_mentioned_p (dest, tmp));
if (CONST_INT_P (si2))
emit_move_insn (tmp, GEN_INT (UINTVAL (si2) & mask_32bit));
else
emit_insn (gen_zero_extendsidi2 (tmp, si2));
/* Combine the two parts. */
emit_insn (gen_iordi3 (dest, dest, tmp));
return;
}
/* Split a V4SI initialization. */
void
rs6000_split_v4si_init (rtx operands[])
{
rtx dest = operands[0];
/* Destination is a GPR, build up the two DImode parts in place. */
if (REG_P (dest) || SUBREG_P (dest))
{
int d_regno = regno_or_subregno (dest);
rtx scalar1 = operands[1];
rtx scalar2 = operands[2];
rtx scalar3 = operands[3];
rtx scalar4 = operands[4];
rtx tmp1 = operands[5];
rtx tmp2 = operands[6];
/* Even though we only need one temporary (plus the destination, which
has an early clobber constraint, try to use two temporaries, one for
each double word created. That way the 2nd insn scheduling pass can
rearrange things so the two parts are done in parallel. */
if (BYTES_BIG_ENDIAN)
{
rtx di_lo = gen_rtx_REG (DImode, d_regno);
rtx di_hi = gen_rtx_REG (DImode, d_regno + 1);
rs6000_split_v4si_init_di_reg (di_lo, scalar1, scalar2, tmp1);
rs6000_split_v4si_init_di_reg (di_hi, scalar3, scalar4, tmp2);
}
else
{
rtx di_lo = gen_rtx_REG (DImode, d_regno + 1);
rtx di_hi = gen_rtx_REG (DImode, d_regno);
rs6000_split_v4si_init_di_reg (di_lo, scalar4, scalar3, tmp1);
rs6000_split_v4si_init_di_reg (di_hi, scalar2, scalar1, tmp2);
}
return;
}
else
gcc_unreachable ();
}
/* Return alignment of TYPE. Existing alignment is ALIGN. HOW
selects whether the alignment is abi mandated, optional, or
both abi and optional alignment. */

View file

@ -388,7 +388,6 @@
UNSPEC_VSX_VXSIG
UNSPEC_VSX_VIEXP
UNSPEC_VSX_VTSTDC
UNSPEC_VSX_VEC_INIT
UNSPEC_VSX_VSIGNED2
UNSPEC_LXVL
@ -2946,23 +2945,41 @@
}
[(set_attr "type" "vecperm")])
;; V4SImode initialization splitter
(define_insn_and_split "vsx_init_v4si"
[(set (match_operand:V4SI 0 "gpc_reg_operand" "=&r")
(unspec:V4SI
[(match_operand:SI 1 "reg_or_cint_operand" "rn")
(match_operand:SI 2 "reg_or_cint_operand" "rn")
(match_operand:SI 3 "reg_or_cint_operand" "rn")
(match_operand:SI 4 "reg_or_cint_operand" "rn")]
UNSPEC_VSX_VEC_INIT))
(clobber (match_scratch:DI 5 "=&r"))
(clobber (match_scratch:DI 6 "=&r"))]
;; Concatenate 4 SImode elements into a V4SImode reg.
(define_expand "vsx_init_v4si"
[(use (match_operand:V4SI 0 "gpc_reg_operand"))
(use (match_operand:SI 1 "gpc_reg_operand"))
(use (match_operand:SI 2 "gpc_reg_operand"))
(use (match_operand:SI 3 "gpc_reg_operand"))
(use (match_operand:SI 4 "gpc_reg_operand"))]
"VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
"#"
"&& reload_completed"
[(const_int 0)]
{
rs6000_split_v4si_init (operands);
rtx a = gen_reg_rtx (DImode);
rtx b = gen_reg_rtx (DImode);
rtx c = gen_reg_rtx (DImode);
rtx d = gen_reg_rtx (DImode);
emit_insn (gen_zero_extendsidi2 (a, operands[1]));
emit_insn (gen_zero_extendsidi2 (b, operands[2]));
emit_insn (gen_zero_extendsidi2 (c, operands[3]));
emit_insn (gen_zero_extendsidi2 (d, operands[4]));
if (!BYTES_BIG_ENDIAN)
{
std::swap (a, b);
std::swap (c, d);
}
rtx aa = gen_reg_rtx (DImode);
rtx ab = gen_reg_rtx (DImode);
rtx cc = gen_reg_rtx (DImode);
rtx cd = gen_reg_rtx (DImode);
emit_insn (gen_ashldi3 (aa, a, GEN_INT (32)));
emit_insn (gen_ashldi3 (cc, c, GEN_INT (32)));
emit_insn (gen_iordi3 (ab, aa, b));
emit_insn (gen_iordi3 (cd, cc, d));
rtx abcd = gen_reg_rtx (V2DImode);
emit_insn (gen_vsx_concat_v2di (abcd, ab, cd));
emit_move_insn (operands[0], gen_lowpart (V4SImode, abcd));
DONE;
})