Generate XXSPLTIDP for vectors on power10.

This patch implements XXSPLTIDP support for all vector constants.  The
XXSPLTIDP instruction is given a 32-bit immediate that is converted to a vector
of two DFmode constants.  The immediate is in SFmode format, so only constants
that fit as SFmode values can be loaded with XXSPLTIDP.

The constraint (eP) added in the previous patch for XXSPLTIW is also used
for XXSPLTIDP.

DImode scalar constants are not handled.  This is due to the majority of DImode
constants will be in the GPR registers.  With vector registers, you have the
problem that XXSPLTIDP splats the double word into both elements of the
vector.  However, if TImode is loaded with an integer constant, it wants a full
128-bit constant.

SFmode and DFmode scalar constants are not handled in this patch.  The
support for for those constants will be in the next patch.

I have added a temporary switch (-msplat-float-constant) to control whether or
not the XXSPLTIDP instruction is generated.

I added 2 new tests to test loading up V2DI and V2DF vector constants.

2021-12-14  Michael Meissner  <meissner@the-meissners.org>

gcc/

	* config/rs6000/predicates.md (easy_fp_constant): Add support for
	generating XXSPLTIDP.
	(vsx_prefixed_constant): Likewise.
	(easy_vector_constant): Likewise.
	* config/rs6000/rs6000-protos.h (constant_generates_xxspltidp):
	New declaration.
	* config/rs6000/rs6000.c (output_vec_const_move): Add support for
	generating XXSPLTIDP.
	(prefixed_xxsplti_p): Likewise.
	(constant_generates_xxspltidp): New function.
	* config/rs6000/rs6000.opt (-msplat-float-constant): New debug option.

gcc/testsuite/

	* gcc.target/powerpc/pr86731-fwrapv-longlong.c: Update insn
	regex for power10.
	* gcc.target/powerpc/vec-splat-constant-v2df.c: New test.
	* gcc.target/powerpc/vec-splat-constant-v2di.c: New test.
This commit is contained in:
Michael Meissner 2021-12-15 02:02:24 -05:00
parent d730aa8a9f
commit 8d443ac032
7 changed files with 241 additions and 4 deletions

View file

@ -610,6 +610,9 @@
if (constant_generates_xxspltiw (&vsx_const))
return true;
if (constant_generates_xxspltidp (&vsx_const))
return true;
}
/* Otherwise consider floating point constants hard, so that the
@ -653,6 +656,9 @@
if (constant_generates_xxspltiw (&vsx_const))
return true;
if (constant_generates_xxspltidp (&vsx_const))
return true;
return false;
})
@ -727,6 +733,9 @@
if (constant_generates_xxspltiw (&vsx_const))
return true;
if (constant_generates_xxspltidp (&vsx_const))
return true;
}
if (TARGET_P9_VECTOR

View file

@ -253,6 +253,7 @@ extern bool vec_const_128bit_to_bytes (rtx, machine_mode,
vec_const_128bit_type *);
extern unsigned constant_generates_lxvkq (vec_const_128bit_type *);
extern unsigned constant_generates_xxspltiw (vec_const_128bit_type *);
extern unsigned constant_generates_xxspltidp (vec_const_128bit_type *);
#endif /* RTX_CODE */
#ifdef TREE_CODE

View file

@ -6723,6 +6723,13 @@ output_vec_const_move (rtx *operands)
operands[2] = GEN_INT (imm);
return "xxspltiw %x0,%2";
}
imm = constant_generates_xxspltidp (&vsx_const);
if (imm)
{
operands[2] = GEN_INT (imm);
return "xxspltidp %x0,%2";
}
}
if (TARGET_P9_VECTOR
@ -26524,6 +26531,9 @@ prefixed_xxsplti_p (rtx_insn *insn)
{
if (constant_generates_xxspltiw (&vsx_const))
return true;
if (constant_generates_xxspltidp (&vsx_const))
return true;
}
return false;
@ -28731,6 +28741,104 @@ constant_generates_xxspltiw (vec_const_128bit_type *vsx_const)
return vsx_const->words[0];
}
/* Determine if a vector constant can be loaded with XXSPLTIDP. Return zero if
the XXSPLTIDP instruction cannot be used. Otherwise return the immediate
value to be used with the XXSPLTIDP instruction. */
unsigned
constant_generates_xxspltidp (vec_const_128bit_type *vsx_const)
{
if (!TARGET_SPLAT_FLOAT_CONSTANT || !TARGET_PREFIXED || !TARGET_VSX)
return 0;
/* Reject if the two 64-bit segments are not the same. */
if (!vsx_const->all_double_words_same)
return 0;
/* If the bytes, half words, or words are all the same, don't use XXSPLTIDP.
Use a simpler instruction (XXSPLTIB, VSPLTISB, VSPLTISH, or VSPLTISW). */
if (vsx_const->all_bytes_same
|| vsx_const->all_half_words_same
|| vsx_const->all_words_same)
return 0;
unsigned HOST_WIDE_INT value = vsx_const->double_words[0];
/* Avoid values that look like DFmode NaN's, except for the normal NaN bit
pattern and the signalling NaN bit pattern. Recognize infinity and
negative infinity. */
/* Bit representation of DFmode normal quiet NaN. */
#define RS6000_CONST_DF_NAN HOST_WIDE_INT_UC (0x7ff8000000000000)
/* Bit representation of DFmode normal signaling NaN. */
#define RS6000_CONST_DF_NANS HOST_WIDE_INT_UC (0x7ff4000000000000)
/* Bit representation of DFmode positive infinity. */
#define RS6000_CONST_DF_INF HOST_WIDE_INT_UC (0x7ff0000000000000)
/* Bit representation of DFmode negative infinity. */
#define RS6000_CONST_DF_NEG_INF HOST_WIDE_INT_UC (0xfff0000000000000)
if (value != RS6000_CONST_DF_NAN
&& value != RS6000_CONST_DF_NANS
&& value != RS6000_CONST_DF_INF
&& value != RS6000_CONST_DF_NEG_INF)
{
/* The IEEE 754 64-bit floating format has 1 bit for sign, 11 bits for
the exponent, and 52 bits for the mantissa (not counting the hidden
bit used for normal numbers). NaN values have the exponent set to all
1 bits, and the mantissa non-zero (mantissa == 0 is infinity). */
int df_exponent = (value >> 52) & 0x7ff;
unsigned HOST_WIDE_INT
df_mantissa = value & ((HOST_WIDE_INT_1U << 52) - HOST_WIDE_INT_1U);
if (df_exponent == 0x7ff && df_mantissa != 0) /* other NaNs. */
return 0;
/* Avoid values that are DFmode subnormal values. Subnormal numbers have
the exponent all 0 bits, and the mantissa non-zero. If the value is
subnormal, then the hidden bit in the mantissa is not set. */
if (df_exponent == 0 && df_mantissa != 0) /* subnormal. */
return 0;
}
/* Change the representation to DFmode constant. */
long df_words[2] = { vsx_const->words[0], vsx_const->words[1] };
/* real_from_target takes the target words in target order. */
if (!BYTES_BIG_ENDIAN)
std::swap (df_words[0], df_words[1]);
REAL_VALUE_TYPE rv_type;
real_from_target (&rv_type, df_words, DFmode);
const REAL_VALUE_TYPE *rv = &rv_type;
/* Validate that the number can be stored as a SFmode value. */
if (!exact_real_truncate (SFmode, rv))
return 0;
/* Validate that the number is not a SFmode subnormal value (exponent is 0,
mantissa field is non-zero) which is undefined for the XXSPLTIDP
instruction. */
long sf_value;
real_to_target (&sf_value, rv, SFmode);
/* IEEE 754 32-bit values have 1 bit for the sign, 8 bits for the exponent,
and 23 bits for the mantissa. Subnormal numbers have the exponent all
0 bits, and the mantissa non-zero. */
long sf_exponent = (sf_value >> 23) & 0xFF;
long sf_mantissa = sf_value & 0x7FFFFF;
if (sf_exponent == 0 && sf_mantissa != 0)
return 0;
/* Return the immediate to be used. */
return sf_value;
}
struct gcc_target targetm = TARGET_INITIALIZER;

View file

@ -644,6 +644,10 @@ msplat-word-constant
Target Var(TARGET_SPLAT_WORD_CONSTANT) Init(1) Save
Generate (do not generate) code that uses the XXSPLTIW instruction.
msplat-float-constant
Target Var(TARGET_SPLAT_FLOAT_CONSTANT) Init(1) Save
Generate (do not generate) code that uses the XXSPLTIDP instruction.
mieee128-constant
Target Var(TARGET_IEEE128_CONSTANT) Init(1) Save
Generate (do not generate) code that uses the LXVKQ instruction.

View file

@ -24,11 +24,12 @@ vector signed long long splats4(void)
return (vector signed long long) vec_sl(mzero, mzero);
}
/* Codegen will consist of splat and shift instructions for most types.
If folding is enabled, the vec_sl tests using vector long long type will
generate a lvx instead of a vspltisw+vsld pair. */
/* Codegen will consist of splat and shift instructions for most types. If
folding is enabled, the vec_sl tests using vector long long type will
generate a lvx instead of a vspltisw+vsld pair. On power10, it will
generate a xxspltidp instruction instead of the lvx. */
/* { dg-final { scan-assembler-times {\mvspltis[bhw]\M} 0 } } */
/* { dg-final { scan-assembler-times {\mvsl[bhwd]\M} 0 } } */
/* { dg-final { scan-assembler-times {\mp?lxv\M|\mlxv\M|\mlxvd2x\M} 2 } } */
/* { dg-final { scan-assembler-times {\mp?lxv\M|\mlxv\M|\mlxvd2x\M|\mxxspltidp\M} 2 } } */

View file

@ -0,0 +1,64 @@
/* { dg-do compile } */
/* { dg-require-effective-target power10_ok } */
/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
#include <math.h>
/* Test generating V2DFmode constants with the ISA 3.1 (power10) XXSPLTIDP
instruction. */
vector double
v2df_double_0 (void)
{
return (vector double) { 0.0, 0.0 }; /* XXSPLTIB or XXLXOR. */
}
vector double
v2df_double_1 (void)
{
return (vector double) { 1.0, 1.0 }; /* XXSPLTIDP. */
}
#ifndef __FAST_MATH__
vector double
v2df_double_m0 (void)
{
return (vector double) { -0.0, -0.0 }; /* XXSPLTIDP. */
}
vector double
v2df_double_nan (void)
{
return (vector double) { __builtin_nan (""),
__builtin_nan ("") }; /* XXSPLTIDP. */
}
vector double
v2df_double_inf (void)
{
return (vector double) { __builtin_inf (),
__builtin_inf () }; /* XXSPLTIDP. */
}
vector double
v2df_double_m_inf (void)
{
return (vector double) { - __builtin_inf (),
- __builtin_inf () }; /* XXSPLTIDP. */
}
#endif
vector double
v2df_double_pi (void)
{
return (vector double) { M_PI, M_PI }; /* PLVX. */
}
vector double
v2df_double_denorm (void)
{
return (vector double) { (double)0x1p-149f,
(double)0x1p-149f }; /* PLVX. */
}
/* { dg-final { scan-assembler-times {\mxxspltidp\M} 5 } } */

View file

@ -0,0 +1,50 @@
/* { dg-do compile } */
/* { dg-require-effective-target power10_ok } */
/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
/* Test generating V2DImode constants that have the same bit pattern as
V2DFmode constants that can be loaded with the XXSPLTIDP instruction with
the ISA 3.1 (power10). */
vector long long
vector_0 (void)
{
/* XXSPLTIB or XXLXOR. */
return (vector long long) { 0LL, 0LL };
}
vector long long
vector_1 (void)
{
/* XXSPLTIB and VEXTSB2D. */
return (vector long long) { 1LL, 1LL };
}
/* 0x8000000000000000LL is the bit pattern for -0.0, which can be generated
with XXSPLTISDP. */
vector long long
vector_float_neg_0 (void)
{
/* XXSPLTIDP. */
return (vector long long) { 0x8000000000000000LL, 0x8000000000000000LL };
}
/* 0x3ff0000000000000LL is the bit pattern for 1.0 which can be generated with
XXSPLTISDP. */
vector long long
vector_float_1_0 (void)
{
/* XXSPLTIDP. */
return (vector long long) { 0x3ff0000000000000LL, 0x3ff0000000000000LL };
}
/* 0x400921fb54442d18LL is the bit pattern for PI, which cannot be generated
with XXSPLTIDP. */
vector long long
scalar_pi (void)
{
/* PLXV. */
return (vector long long) { 0x400921fb54442d18LL, 0x400921fb54442d18LL };
}
/* { dg-final { scan-assembler-times {\mxxspltidp\M} 2 } } */