Generate XXSPLTIW on power10.

This patch adds support to automatically generate the ISA 3.1 XXSPLTIW
instruction for V8HImode, V4SImode, and V4SFmode vectors.  It does this by
adding support for vector constants that can be used, and adding a
VEC_DUPLICATE pattern to generate the actual XXSPLTIW instruction.

Add the eP constraint to recognize constants that can be loaded into
vector registers with a single prefixed instruction such as xxspltiw and
xxspltidp.

I added 4 new tests to test loading up V16QI, V8HI, V4SI, and V4SF vector
constants.

2021-12-14  Michael Meissner  <meissner@linux.ibm.com>

gcc/

	* config/rs6000/constraints.md (eP): Update comment.
	* config/rs6000/predicates.md (easy_fp_constant): Add support for
	generating XXSPLTIW.
	(vsx_prefixed_constant): New predicate.
	(easy_vector_constant): Add support for
	generating XXSPLTIW.
	* config/rs6000/rs6000-protos.h (prefixed_xxsplti_p): New
	declaration.
	(constant_generates_xxspltiw): Likewise.
	* config/rs6000/rs6000.c (xxspltib_constant_p): Generate XXSPLTIW
	if possible instead of XXSPLTIB and sign extending the constant.
	(output_vec_const_move): Add support for XXSPLTIW.
	(prefixed_xxsplti_p): New function.
	(constant_generates_xxspltiw): New function.
	* config/rs6000/rs6000.md (prefixed attribute): Add support to
	mark XXSPLTI* instructions as being prefixed.
	* config/rs6000/rs6000.opt (-msplat-word-constant): New debug
	switch.
	* config/rs6000/vsx.md (vsx_mov<mode>_64bit): Add support for
	generating XXSPLTIW or XXSPLTIDP.
	(vsx_mov<mode>_32bit): Likewise.
	* doc/md.texi (PowerPC and IBM RS6000 constraints): Document the
	eP constraint.

gcc/testsuite/

	* gcc.target/powerpc/vec-splat-constant-v16qi.c: New test.
	* gcc.target/powerpc/vec-splat-constant-v4sf.c: New test.
	* gcc.target/powerpc/vec-splat-constant-v4si.c: New test.
	* gcc.target/powerpc/vec-splat-constant-v8hi.c: New test.
	* gcc.target/powerpc/vec-splati-runnable.c: Update insn count.
This commit is contained in:
Michael Meissner 2021-12-15 01:37:08 -05:00
parent 8ccd8b12de
commit d730aa8a9f
13 changed files with 371 additions and 18 deletions

View file

@ -213,6 +213,12 @@
"A signed 34-bit integer constant if prefixed instructions are supported."
(match_operand 0 "cint34_operand"))
;; A SF/DF scalar constant or a vector constant that can be loaded into vector
;; registers with one prefixed instruction such as XXSPLTIDP or XXSPLTIW.
(define_constraint "eP"
"A constant that can be loaded into a VSX register with one prefixed insn."
(match_operand 0 "vsx_prefixed_constant"))
;; A TF/KF scalar constant or a vector constant that can load certain IEEE
;; 128-bit constants into vector registers using LXVKQ.
(define_constraint "eQ"

View file

@ -605,7 +605,10 @@
vec_const_128bit_type vsx_const;
if (TARGET_POWER10 && vec_const_128bit_to_bytes (op, mode, &vsx_const))
{
if (constant_generates_lxvkq (&vsx_const) != 0)
if (constant_generates_lxvkq (&vsx_const))
return true;
if (constant_generates_xxspltiw (&vsx_const))
return true;
}
@ -617,6 +620,42 @@
return 0;
})
;; Return 1 if the operand is a 64-bit floating point scalar constant or a
;; vector constant that can be loaded to a VSX register with one prefixed
;; instruction, such as XXSPLTIDP or XXSPLTIW.
;;
;; In addition regular constants, we also recognize constants formed with the
;; VEC_DUPLICATE insn from scalar constants.
;;
;; We don't handle scalar integer constants here because the assumption is the
;; normal integer constants will be loaded into GPR registers. For the
;; constants that need to be loaded into vector registers, the instructions
;; don't work well with TImode variables assigned a constant. This is because
;; the 64-bit scalar constants are splatted into both halves of the register.
(define_predicate "vsx_prefixed_constant"
(match_code "const_double,const_vector,vec_duplicate")
{
/* If we can generate the constant with a few Altivec instructions, don't
generate a prefixed instruction. */
if (CONST_VECTOR_P (op) && easy_altivec_constant (op, mode))
return false;
/* Do we have prefixed instructions and are VSX registers available? Is the
constant recognized? */
if (!TARGET_PREFIXED || !TARGET_VSX)
return false;
vec_const_128bit_type vsx_const;
if (!vec_const_128bit_to_bytes (op, mode, &vsx_const))
return false;
if (constant_generates_xxspltiw (&vsx_const))
return true;
return false;
})
;; Return 1 if the operand is a special IEEE 128-bit value that can be loaded
;; via the LXVKQ instruction.
@ -683,7 +722,10 @@
vec_const_128bit_type vsx_const;
if (TARGET_POWER10 && vec_const_128bit_to_bytes (op, mode, &vsx_const))
{
if (constant_generates_lxvkq (&vsx_const) != 0)
if (constant_generates_lxvkq (&vsx_const))
return true;
if (constant_generates_xxspltiw (&vsx_const))
return true;
}

View file

@ -198,6 +198,7 @@ enum non_prefixed_form reg_to_non_prefixed (rtx reg, machine_mode mode);
extern bool prefixed_load_p (rtx_insn *);
extern bool prefixed_store_p (rtx_insn *);
extern bool prefixed_paddi_p (rtx_insn *);
extern bool prefixed_xxsplti_p (rtx_insn *);
extern void rs6000_asm_output_opcode (FILE *);
extern void output_pcrel_opt_reloc (rtx);
extern void rs6000_final_prescan_insn (rtx_insn *, rtx [], int);
@ -251,6 +252,7 @@ typedef struct {
extern bool vec_const_128bit_to_bytes (rtx, machine_mode,
vec_const_128bit_type *);
extern unsigned constant_generates_lxvkq (vec_const_128bit_type *);
extern unsigned constant_generates_xxspltiw (vec_const_128bit_type *);
#endif /* RTX_CODE */
#ifdef TREE_CODE

View file

@ -6649,6 +6649,13 @@ xxspltib_constant_p (rtx op,
else if (IN_RANGE (value, -1, 0))
*num_insns_ptr = 1;
/* Do not generate XXSPLTIB and a sign extend operation if we can generate a
single XXSPLTIW or XXSPLTIDP instruction. */
else if (vsx_prefixed_constant (op, mode))
return false;
/* Return XXSPLITB followed by a sign extend operation to convert the
constant to V8HImode or V4SImode. */
else
*num_insns_ptr = 2;
@ -6709,6 +6716,13 @@ output_vec_const_move (rtx *operands)
operands[2] = GEN_INT (imm);
return "lxvkq %x0,%2";
}
imm = constant_generates_xxspltiw (&vsx_const);
if (imm)
{
operands[2] = GEN_INT (imm);
return "xxspltiw %x0,%2";
}
}
if (TARGET_P9_VECTOR
@ -26480,6 +26494,41 @@ prefixed_paddi_p (rtx_insn *insn)
return (iform == INSN_FORM_PCREL_EXTERNAL || iform == INSN_FORM_PCREL_LOCAL);
}
/* Whether an instruction is a prefixed XXSPLTI* instruction. This is called
from the prefixed attribute processing. */
bool
prefixed_xxsplti_p (rtx_insn *insn)
{
rtx set = single_set (insn);
if (!set)
return false;
rtx dest = SET_DEST (set);
rtx src = SET_SRC (set);
machine_mode mode = GET_MODE (dest);
if (!REG_P (dest) && !SUBREG_P (dest))
return false;
if (GET_CODE (src) == UNSPEC)
{
int unspec = XINT (src, 1);
return (unspec == UNSPEC_XXSPLTIW
|| unspec == UNSPEC_XXSPLTIDP
|| unspec == UNSPEC_XXSPLTI32DX);
}
vec_const_128bit_type vsx_const;
if (vec_const_128bit_to_bytes (src, mode, &vsx_const))
{
if (constant_generates_xxspltiw (&vsx_const))
return true;
}
return false;
}
/* Whether the next instruction needs a 'p' prefix issued before the
instruction is printed out. */
static bool prepend_p_to_next_insn;
@ -28648,6 +28697,40 @@ constant_generates_lxvkq (vec_const_128bit_type *vsx_const)
return 0;
}
/* Determine if a vector constant can be loaded with XXSPLTIW. Return zero if
the XXSPLTIW instruction cannot be used. Otherwise return the immediate
value to be used with the XXSPLTIW instruction. */
unsigned
constant_generates_xxspltiw (vec_const_128bit_type *vsx_const)
{
if (!TARGET_SPLAT_WORD_CONSTANT || !TARGET_PREFIXED || !TARGET_VSX)
return 0;
if (!vsx_const->all_words_same)
return 0;
/* If we can use XXSPLTIB, don't generate XXSPLTIW. */
if (vsx_const->all_bytes_same)
return 0;
/* See if we can use VSPLTISH or VSPLTISW. */
if (vsx_const->all_half_words_same)
{
unsigned short h_word = vsx_const->half_words[0];
short sign_h_word = ((h_word & 0xffff) ^ 0x8000) - 0x8000;
if (EASY_VECTOR_15 (sign_h_word))
return 0;
}
unsigned int word = vsx_const->words[0];
int sign_word = ((word & 0xffffffff) ^ 0x80000000) - 0x80000000;
if (EASY_VECTOR_15 (sign_word))
return 0;
return vsx_const->words[0];
}
struct gcc_target targetm = TARGET_INITIALIZER;

View file

@ -314,6 +314,11 @@
(eq_attr "type" "integer,add")
(if_then_else (match_test "prefixed_paddi_p (insn)")
(const_string "yes")
(const_string "no"))
(eq_attr "type" "vecperm")
(if_then_else (match_test "prefixed_xxsplti_p (insn)")
(const_string "yes")
(const_string "no"))]

View file

@ -640,6 +640,10 @@ mprivileged
Target Var(rs6000_privileged) Init(0)
Generate code that will run in privileged state.
msplat-word-constant
Target Var(TARGET_SPLAT_WORD_CONSTANT) Init(1) Save
Generate (do not generate) code that uses the XXSPLTIW instruction.
mieee128-constant
Target Var(TARGET_IEEE128_CONSTANT) Init(1) Save
Generate (do not generate) code that uses the LXVKQ instruction.

View file

@ -1192,19 +1192,19 @@
;; VSX store VSX load VSX move VSX->GPR GPR->VSX LQ (GPR)
;; STQ (GPR) GPR load GPR store GPR move XXSPLTIB VSPLTISW
;; LXVKQ
;; LXVKQ XXSPLTI*
;; VSX 0/-1 VMX const GPR const LVX (VMX) STVX (VMX)
(define_insn "vsx_mov<mode>_64bit"
[(set (match_operand:VSX_M 0 "nonimmediate_operand"
"=ZwO, wa, wa, r, we, ?wQ,
?&r, ??r, ??Y, <??r>, wa, v,
wa,
wa, wa,
?wa, v, <??r>, wZ, v")
(match_operand:VSX_M 1 "input_operand"
"wa, ZwO, wa, we, r, r,
wQ, Y, r, r, wE, jwM,
eQ,
eQ, eP,
?jwM, W, <nW>, v, wZ"))]
"TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
@ -1216,43 +1216,43 @@
[(set_attr "type"
"vecstore, vecload, vecsimple, mtvsr, mfvsr, load,
store, load, store, *, vecsimple, vecsimple,
vecperm,
vecperm, vecperm,
vecsimple, *, *, vecstore, vecload")
(set_attr "num_insns"
"*, *, *, 2, *, 2,
2, 2, 2, 2, *, *,
*,
*, *,
*, 5, 2, *, *")
(set_attr "max_prefixed_insns"
"*, *, *, *, *, 2,
2, 2, 2, 2, *, *,
*,
*, *,
*, *, *, *, *")
(set_attr "length"
"*, *, *, 8, *, 8,
8, 8, 8, 8, *, *,
*,
*, *,
*, 20, 8, *, *")
(set_attr "isa"
"<VSisa>, <VSisa>, <VSisa>, *, *, *,
*, *, *, *, p9v, *,
p10,
p10, p10,
<VSisa>, *, *, *, *")])
;; VSX store VSX load VSX move GPR load GPR store GPR move
;; LXVKQ
;; LXVKQ XXSPLTI*
;; XXSPLTIB VSPLTISW VSX 0/-1 VMX const GPR const
;; LVX (VMX) STVX (VMX)
(define_insn "*vsx_mov<mode>_32bit"
[(set (match_operand:VSX_M 0 "nonimmediate_operand"
"=ZwO, wa, wa, ??r, ??Y, <??r>,
wa,
wa, wa,
wa, v, ?wa, v, <??r>,
wZ, v")
(match_operand:VSX_M 1 "input_operand"
"wa, ZwO, wa, Y, r, r,
eQ,
eQ, eP,
wE, jwM, ?jwM, W, <nW>,
v, wZ"))]
@ -1264,17 +1264,17 @@
}
[(set_attr "type"
"vecstore, vecload, vecsimple, load, store, *,
vecperm,
vecperm, vecperm,
vecsimple, vecsimple, vecsimple, *, *,
vecstore, vecload")
(set_attr "length"
"*, *, *, 16, 16, 16,
*,
*, *,
*, *, *, 20, 16,
*, *")
(set_attr "isa"
"<VSisa>, <VSisa>, <VSisa>, *, *, *,
p10,
p10, p10,
p9v, *, <VSisa>, *, *,
*, *")])

View file

@ -3336,6 +3336,10 @@ A constant whose negation is a signed 16-bit constant.
@item eI
A signed 34-bit integer constant if prefixed instructions are supported.
@item eP
A scalar floating point constant or a vector constant that can be
loaded to a VSX register with one prefixed instruction.
@item eQ
An IEEE 128-bit constant that can be loaded into a VSX register with
the @code{lxvkq} instruction.

View file

@ -0,0 +1,27 @@
/* { dg-do compile } */
/* { dg-require-effective-target power10_ok } */
/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
#include <altivec.h>
/* Test whether XXSPLTIW is generated for V16HI vector constants where the
first 4 elements are the same as the next 4 elements, etc. */
vector unsigned char
v16qi_const_1 (void)
{
return (vector unsigned char) { 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, }; /* VSLTPISB. */
}
vector unsigned char
v16qi_const_2 (void)
{
return (vector unsigned char) { 1, 2, 3, 4, 1, 2, 3, 4,
1, 2, 3, 4, 1, 2, 3, 4, }; /* XXSPLTIW. */
}
/* { dg-final { scan-assembler-times {\mxxspltiw\M} 1 } } */
/* { dg-final { scan-assembler-times {\mvspltisb\M|\mxxspltib\M} 1 } } */
/* { dg-final { scan-assembler-not {\mlxvx?\M} } } */
/* { dg-final { scan-assembler-not {\mplxv\M} } } */

View file

@ -0,0 +1,67 @@
/* { dg-do compile } */
/* { dg-require-effective-target power10_ok } */
/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
#include <altivec.h>
/* Test whether XXSPLTIW is generated for V4SF vector constants. */
vector float
v4sf_const_1 (void)
{
return (vector float) { 1.0f, 1.0f, 1.0f, 1.0f }; /* XXSPLTIW. */
}
vector float
v4sf_const_nan (void)
{
return (vector float) { __builtin_nanf (""),
__builtin_nanf (""),
__builtin_nanf (""),
__builtin_nanf ("") }; /* XXSPLTIW. */
}
vector float
v4sf_const_inf (void)
{
return (vector float) { __builtin_inff (),
__builtin_inff (),
__builtin_inff (),
__builtin_inff () }; /* XXSPLTIW. */
}
vector float
v4sf_const_m0 (void)
{
return (vector float) { -0.0f, -0.0f, -0.0f, -0.0f }; /* XXSPLTIB/VSLW. */
}
vector float
v4sf_splats_1 (void)
{
return vec_splats (1.0f); /* XXSPLTIW. */
}
vector float
v4sf_splats_nan (void)
{
return vec_splats (__builtin_nanf ("")); /* XXSPLTIW. */
}
vector float
v4sf_splats_inf (void)
{
return vec_splats (__builtin_inff ()); /* XXSPLTIW. */
}
vector float
v8hi_splats_m0 (void)
{
return vec_splats (-0.0f); /* XXSPLTIB/VSLW. */
}
/* { dg-final { scan-assembler-times {\mxxspltiw\M} 6 } } */
/* { dg-final { scan-assembler-times {\mxxspltib\M} 2 } } */
/* { dg-final { scan-assembler-times {\mvslw\M} 2 } } */
/* { dg-final { scan-assembler-not {\mlxvx?\M} } } */
/* { dg-final { scan-assembler-not {\mplxv\M} } } */

View file

@ -0,0 +1,51 @@
/* { dg-do compile } */
/* { dg-require-effective-target power10_ok } */
/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
#include <altivec.h>
/* Test whether XXSPLTIW is generated for V4SI vector constants. We make sure
the power9 support (XXSPLTIB/VEXTSB2W) is not done. */
vector int
v4si_const_1 (void)
{
return (vector int) { 1, 1, 1, 1 }; /* VSLTPISW. */
}
vector int
v4si_const_126 (void)
{
return (vector int) { 126, 126, 126, 126 }; /* XXSPLTIW. */
}
vector int
v4si_const_1023 (void)
{
return (vector int) { 1023, 1023, 1023, 1023 }; /* XXSPLTIW. */
}
vector int
v4si_splats_1 (void)
{
return vec_splats (1); /* VSLTPISW. */
}
vector int
v4si_splats_126 (void)
{
return vec_splats (126); /* XXSPLTIW. */
}
vector int
v8hi_splats_1023 (void)
{
return vec_splats (1023); /* XXSPLTIW. */
}
/* { dg-final { scan-assembler-times {\mxxspltiw\M} 4 } } */
/* { dg-final { scan-assembler-times {\mvspltisw\M} 2 } } */
/* { dg-final { scan-assembler-not {\mxxspltib\M} } } */
/* { dg-final { scan-assembler-not {\mvextsb2w\M} } } */
/* { dg-final { scan-assembler-not {\mlxvx?\M} } } */
/* { dg-final { scan-assembler-not {\mplxv\M} } } */

View file

@ -0,0 +1,62 @@
/* { dg-do compile } */
/* { dg-require-effective-target power10_ok } */
/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
#include <altivec.h>
/* Test whether XXSPLTIW is generated for V8HI vector constants. We make sure
the power9 support (XXSPLTIB/VUPKLSB) is not done. */
vector short
v8hi_const_1 (void)
{
return (vector short) { 1, 1, 1, 1, 1, 1, 1, 1 }; /* VSLTPISH. */
}
vector short
v8hi_const_126 (void)
{
return (vector short) { 126, 126, 126, 126,
126, 126, 126, 126 }; /* XXSPLTIW. */
}
vector short
v8hi_const_1023 (void)
{
return (vector short) { 1023, 1023, 1023, 1023,
1023, 1023, 1023, 1023 }; /* XXSPLTIW. */
}
vector short
v8hi_splats_1 (void)
{
return vec_splats ((short)1); /* VSLTPISH. */
}
vector short
v8hi_splats_126 (void)
{
return vec_splats ((short)126); /* XXSPLTIW. */
}
vector short
v8hi_splats_1023 (void)
{
return vec_splats ((short)1023); /* XXSPLTIW. */
}
/* Test that we can optimiza V8HI where all of the even elements are the same
and all of the odd elements are the same. */
vector short
v8hi_const_1023_1000 (void)
{
return (vector short) { 1023, 1000, 1023, 1000,
1023, 1000, 1023, 1000 }; /* XXSPLTIW. */
}
/* { dg-final { scan-assembler-times {\mxxspltiw\M} 5 } } */
/* { dg-final { scan-assembler-times {\mvspltish\M} 2 } } */
/* { dg-final { scan-assembler-not {\mxxspltib\M} } } */
/* { dg-final { scan-assembler-not {\mvupklsb\M} } } */
/* { dg-final { scan-assembler-not {\mlxvx?\M} } } */
/* { dg-final { scan-assembler-not {\mplxv\M} } } */

View file

@ -149,8 +149,8 @@ main (int argc, char *argv [])
return 0;
}
/* { dg-final { scan-assembler-times {\mxxspltiw\M} 2 } } */
/* { dg-final { scan-assembler-times {\mxxspltidp\M} 2 } } */
/* { dg-final { scan-assembler-times {\mxxspltiw\M} 3 } } */
/* { dg-final { scan-assembler-times {\mxxspltidp\M} 3 } } */
/* { dg-final { scan-assembler-times {\mxxsplti32dx\M} 3 } } */