rs6000: Improve vsx_init_v4si
This changes vsx_init_v4si to be an expander. That way, no special cases are needed anymore for special arguments: the normal RTL passes can deal with it. * config/rs6000/rs6000-p8swap.c (rtx_is_swappable_p): Adjust. * config/rs6000/rs6000-protos.h (rs6000_split_v4si_init): Delete. * config/rs6000/rs6000.c (rs6000_expand_vector_init): Always force the elements into a register. (rs6000_split_v4si_init_di_reg): Delete. (rs6000_split_v4si_init): Delete. * config/rs6000/vsx.md (unspec): Delete UNSPEC_VSX_VEC_INIT. (vsx_init_v4si): Rewrite as a define_expand. From-SVN: r262930
This commit is contained in:
parent
268e16e89b
commit
9fede15c4d
5 changed files with 45 additions and 109 deletions
|
@ -1,3 +1,14 @@
|
|||
2018-07-23 Segher Boessenkool <segher@kernel.crashing.org>
|
||||
|
||||
* config/rs6000/rs6000-p8swap.c (rtx_is_swappable_p): Adjust.
|
||||
* config/rs6000/rs6000-protos.h (rs6000_split_v4si_init): Delete.
|
||||
* config/rs6000/rs6000.c (rs6000_expand_vector_init): Always force
|
||||
the elements into a register.
|
||||
(rs6000_split_v4si_init_di_reg): Delete.
|
||||
(rs6000_split_v4si_init): Delete.
|
||||
* config/rs6000/vsx.md (unspec): Delete UNSPEC_VSX_VEC_INIT.
|
||||
(vsx_init_v4si): Rewrite as a define_expand.
|
||||
|
||||
2018-07-23 Segher Boessenkool <segher@kernel.crashing.org>
|
||||
|
||||
* config/rs6000/rs6000.md (splitters for rldimi and rlwimi with the
|
||||
|
|
|
@ -772,7 +772,6 @@ rtx_is_swappable_p (rtx op, unsigned int *special)
|
|||
case UNSPEC_VSX_EXTRACT:
|
||||
case UNSPEC_VSX_SET:
|
||||
case UNSPEC_VSX_SLDWI:
|
||||
case UNSPEC_VSX_VEC_INIT:
|
||||
case UNSPEC_VSX_VSLO:
|
||||
case UNSPEC_VUNPACK_HI_SIGN:
|
||||
case UNSPEC_VUNPACK_HI_SIGN_DIRECT:
|
||||
|
|
|
@ -61,7 +61,6 @@ extern void rs6000_expand_vector_set (rtx, rtx, int);
|
|||
extern void rs6000_expand_vector_extract (rtx, rtx, rtx);
|
||||
extern void rs6000_split_vec_extract_var (rtx, rtx, rtx, rtx, rtx);
|
||||
extern rtx rs6000_adjust_vec_address (rtx, rtx, rtx, rtx, machine_mode);
|
||||
extern void rs6000_split_v4si_init (rtx []);
|
||||
extern void altivec_expand_vec_perm_le (rtx op[4]);
|
||||
extern void rs6000_expand_extract_even (rtx, rtx, rtx);
|
||||
extern void rs6000_expand_interleave (rtx, rtx, rtx, bool);
|
||||
|
|
|
@ -6857,11 +6857,7 @@ rs6000_expand_vector_init (rtx target, rtx vals)
|
|||
size_t i;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
elements[i] = XVECEXP (vals, 0, i);
|
||||
if (!CONST_INT_P (elements[i]) && !REG_P (elements[i]))
|
||||
elements[i] = copy_to_mode_reg (SImode, elements[i]);
|
||||
}
|
||||
elements[i] = force_reg (SImode, XVECEXP (vals, 0, i));
|
||||
|
||||
emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1],
|
||||
elements[2], elements[3]));
|
||||
|
@ -7568,92 +7564,6 @@ rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr,
|
|||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
/* Helper function for rs6000_split_v4si_init to build up a DImode value from
|
||||
two SImode values. */
|
||||
|
||||
static void
|
||||
rs6000_split_v4si_init_di_reg (rtx dest, rtx si1, rtx si2, rtx tmp)
|
||||
{
|
||||
const unsigned HOST_WIDE_INT mask_32bit = HOST_WIDE_INT_C (0xffffffff);
|
||||
|
||||
if (CONST_INT_P (si1) && CONST_INT_P (si2))
|
||||
{
|
||||
unsigned HOST_WIDE_INT const1 = (UINTVAL (si1) & mask_32bit) << 32;
|
||||
unsigned HOST_WIDE_INT const2 = UINTVAL (si2) & mask_32bit;
|
||||
|
||||
emit_move_insn (dest, GEN_INT (const1 | const2));
|
||||
return;
|
||||
}
|
||||
|
||||
/* Put si1 into upper 32-bits of dest. */
|
||||
if (CONST_INT_P (si1))
|
||||
emit_move_insn (dest, GEN_INT ((UINTVAL (si1) & mask_32bit) << 32));
|
||||
else
|
||||
{
|
||||
/* Generate RLDIC. */
|
||||
rtx si1_di = gen_rtx_REG (DImode, regno_or_subregno (si1));
|
||||
rtx shift_rtx = gen_rtx_ASHIFT (DImode, si1_di, GEN_INT (32));
|
||||
rtx mask_rtx = GEN_INT (mask_32bit << 32);
|
||||
rtx and_rtx = gen_rtx_AND (DImode, shift_rtx, mask_rtx);
|
||||
gcc_assert (!reg_overlap_mentioned_p (dest, si1));
|
||||
emit_insn (gen_rtx_SET (dest, and_rtx));
|
||||
}
|
||||
|
||||
/* Put si2 into the temporary. */
|
||||
gcc_assert (!reg_overlap_mentioned_p (dest, tmp));
|
||||
if (CONST_INT_P (si2))
|
||||
emit_move_insn (tmp, GEN_INT (UINTVAL (si2) & mask_32bit));
|
||||
else
|
||||
emit_insn (gen_zero_extendsidi2 (tmp, si2));
|
||||
|
||||
/* Combine the two parts. */
|
||||
emit_insn (gen_iordi3 (dest, dest, tmp));
|
||||
return;
|
||||
}
|
||||
|
||||
/* Split a V4SI initialization. */
|
||||
|
||||
void
|
||||
rs6000_split_v4si_init (rtx operands[])
|
||||
{
|
||||
rtx dest = operands[0];
|
||||
|
||||
/* Destination is a GPR, build up the two DImode parts in place. */
|
||||
if (REG_P (dest) || SUBREG_P (dest))
|
||||
{
|
||||
int d_regno = regno_or_subregno (dest);
|
||||
rtx scalar1 = operands[1];
|
||||
rtx scalar2 = operands[2];
|
||||
rtx scalar3 = operands[3];
|
||||
rtx scalar4 = operands[4];
|
||||
rtx tmp1 = operands[5];
|
||||
rtx tmp2 = operands[6];
|
||||
|
||||
/* Even though we only need one temporary (plus the destination, which
|
||||
has an early clobber constraint, try to use two temporaries, one for
|
||||
each double word created. That way the 2nd insn scheduling pass can
|
||||
rearrange things so the two parts are done in parallel. */
|
||||
if (BYTES_BIG_ENDIAN)
|
||||
{
|
||||
rtx di_lo = gen_rtx_REG (DImode, d_regno);
|
||||
rtx di_hi = gen_rtx_REG (DImode, d_regno + 1);
|
||||
rs6000_split_v4si_init_di_reg (di_lo, scalar1, scalar2, tmp1);
|
||||
rs6000_split_v4si_init_di_reg (di_hi, scalar3, scalar4, tmp2);
|
||||
}
|
||||
else
|
||||
{
|
||||
rtx di_lo = gen_rtx_REG (DImode, d_regno + 1);
|
||||
rtx di_hi = gen_rtx_REG (DImode, d_regno);
|
||||
rs6000_split_v4si_init_di_reg (di_lo, scalar4, scalar3, tmp1);
|
||||
rs6000_split_v4si_init_di_reg (di_hi, scalar2, scalar1, tmp2);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
else
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
/* Return alignment of TYPE. Existing alignment is ALIGN. HOW
|
||||
selects whether the alignment is abi mandated, optional, or
|
||||
both abi and optional alignment. */
|
||||
|
|
|
@ -388,7 +388,6 @@
|
|||
UNSPEC_VSX_VXSIG
|
||||
UNSPEC_VSX_VIEXP
|
||||
UNSPEC_VSX_VTSTDC
|
||||
UNSPEC_VSX_VEC_INIT
|
||||
UNSPEC_VSX_VSIGNED2
|
||||
|
||||
UNSPEC_LXVL
|
||||
|
@ -2946,23 +2945,41 @@
|
|||
}
|
||||
[(set_attr "type" "vecperm")])
|
||||
|
||||
;; V4SImode initialization splitter
|
||||
(define_insn_and_split "vsx_init_v4si"
|
||||
[(set (match_operand:V4SI 0 "gpc_reg_operand" "=&r")
|
||||
(unspec:V4SI
|
||||
[(match_operand:SI 1 "reg_or_cint_operand" "rn")
|
||||
(match_operand:SI 2 "reg_or_cint_operand" "rn")
|
||||
(match_operand:SI 3 "reg_or_cint_operand" "rn")
|
||||
(match_operand:SI 4 "reg_or_cint_operand" "rn")]
|
||||
UNSPEC_VSX_VEC_INIT))
|
||||
(clobber (match_scratch:DI 5 "=&r"))
|
||||
(clobber (match_scratch:DI 6 "=&r"))]
|
||||
;; Concatenate 4 SImode elements into a V4SImode reg.
|
||||
(define_expand "vsx_init_v4si"
|
||||
[(use (match_operand:V4SI 0 "gpc_reg_operand"))
|
||||
(use (match_operand:SI 1 "gpc_reg_operand"))
|
||||
(use (match_operand:SI 2 "gpc_reg_operand"))
|
||||
(use (match_operand:SI 3 "gpc_reg_operand"))
|
||||
(use (match_operand:SI 4 "gpc_reg_operand"))]
|
||||
"VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
|
||||
"#"
|
||||
"&& reload_completed"
|
||||
[(const_int 0)]
|
||||
{
|
||||
rs6000_split_v4si_init (operands);
|
||||
rtx a = gen_reg_rtx (DImode);
|
||||
rtx b = gen_reg_rtx (DImode);
|
||||
rtx c = gen_reg_rtx (DImode);
|
||||
rtx d = gen_reg_rtx (DImode);
|
||||
emit_insn (gen_zero_extendsidi2 (a, operands[1]));
|
||||
emit_insn (gen_zero_extendsidi2 (b, operands[2]));
|
||||
emit_insn (gen_zero_extendsidi2 (c, operands[3]));
|
||||
emit_insn (gen_zero_extendsidi2 (d, operands[4]));
|
||||
if (!BYTES_BIG_ENDIAN)
|
||||
{
|
||||
std::swap (a, b);
|
||||
std::swap (c, d);
|
||||
}
|
||||
|
||||
rtx aa = gen_reg_rtx (DImode);
|
||||
rtx ab = gen_reg_rtx (DImode);
|
||||
rtx cc = gen_reg_rtx (DImode);
|
||||
rtx cd = gen_reg_rtx (DImode);
|
||||
emit_insn (gen_ashldi3 (aa, a, GEN_INT (32)));
|
||||
emit_insn (gen_ashldi3 (cc, c, GEN_INT (32)));
|
||||
emit_insn (gen_iordi3 (ab, aa, b));
|
||||
emit_insn (gen_iordi3 (cd, cc, d));
|
||||
|
||||
rtx abcd = gen_reg_rtx (V2DImode);
|
||||
emit_insn (gen_vsx_concat_v2di (abcd, ab, cd));
|
||||
emit_move_insn (operands[0], gen_lowpart (V4SImode, abcd));
|
||||
DONE;
|
||||
})
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue