Fix non-comforming expander for floatv2div2sf2,floatunsv2div2sf2,fix_truncv2sfv2di,fixuns_truncv2sfv2di.
2020-05-26 Hongtao Liu <hongtao.liu@intel.com> Uroš Bizjak <ubizjak@gmail.com> gcc/ChangeLog: PR target/95211 PR target/95256 * config/i386/sse.md (<floatunssuffix>v2div2sf2): New expander. (fix<fixunssuffix>_truncv2sfv2di2): Ditto. (avx512dq_float<floatunssuffix>v2div2sf2): Renaming from float<floatunssuffix>v2div2sf2. (avx512dq_fix<fixunssuffix>_truncv2sfv2di2<mask_name>): Renaming from fix<fixunssuffix>_truncv2sfv2di2<mask_name>. (vec_pack<floatprefix>_float_<mode>): Adjust icode name. (vec_unpack_<fixprefix>fix_trunc_lo_<mode>): Ditto. (vec_unpack_<fixprefix>fix_trunc_hi_<mode>): Ditto. * config/i386/i386-builtin.def: Ditto. * emit-rtl.c (validate_subreg): Allow use of *paradoxical* vector subregs when both omode and imode are vector mode and have the same inner mode. gcc/testsuite/ChangeLog * gcc.target/i386/pr95211.c: New test.
This commit is contained in:
parent
0b0d9a77d0
commit
9d0dc47de6
4 changed files with 90 additions and 15 deletions
|
@ -1649,9 +1649,9 @@ BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_ufix_not
|
|||
BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_ufix_notruncv4dfv4si2_mask, "__builtin_ia32_cvtpd2udq256_mask", IX86_BUILTIN_CVTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI)
|
||||
BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_ufix_notruncv2dfv2si2_mask, "__builtin_ia32_cvtpd2udq128_mask", IX86_BUILTIN_CVTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_UQI)
|
||||
BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_fix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2qq256_mask", IX86_BUILTIN_CVTTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_UQI)
|
||||
BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_fix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2qq128_mask", IX86_BUILTIN_CVTTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI)
|
||||
BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512dq_fix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2qq128_mask", IX86_BUILTIN_CVTTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI)
|
||||
BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_fixuns_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2uqq256_mask", IX86_BUILTIN_CVTTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_UQI)
|
||||
BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_fixuns_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2uqq128_mask", IX86_BUILTIN_CVTTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI)
|
||||
BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512dq_fixuns_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2uqq128_mask", IX86_BUILTIN_CVTTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI)
|
||||
BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_fix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2dq256_mask", IX86_BUILTIN_CVTTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI)
|
||||
BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_fix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2dq128_mask", IX86_BUILTIN_CVTTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI)
|
||||
BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_ufix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2udq256_mask", IX86_BUILTIN_CVTTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI)
|
||||
|
|
|
@ -5795,7 +5795,7 @@
|
|||
(set_attr "prefix" "evex")
|
||||
(set_attr "mode" "<MODE>")])
|
||||
|
||||
(define_expand "float<floatunssuffix>v2div2sf2"
|
||||
(define_expand "avx512dq_float<floatunssuffix>v2div2sf2"
|
||||
[(set (match_operand:V4SF 0 "register_operand" "=v")
|
||||
(vec_concat:V4SF
|
||||
(any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
|
||||
|
@ -5803,7 +5803,7 @@
|
|||
"TARGET_AVX512DQ && TARGET_AVX512VL"
|
||||
"operands[2] = CONST0_RTX (V2SFmode);")
|
||||
|
||||
(define_insn "*float<floatunssuffix>v2div2sf2"
|
||||
(define_insn "*avx512dq_float<floatunssuffix>v2div2sf2"
|
||||
[(set (match_operand:V4SF 0 "register_operand" "=v")
|
||||
(vec_concat:V4SF
|
||||
(any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
|
||||
|
@ -5814,6 +5814,17 @@
|
|||
(set_attr "prefix" "evex")
|
||||
(set_attr "mode" "V4SF")])
|
||||
|
||||
(define_expand "float<floatunssuffix>v2div2sf2"
|
||||
[(set (match_operand:V2SF 0 "register_operand")
|
||||
(any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand")))]
|
||||
"TARGET_AVX512DQ && TARGET_AVX512VL"
|
||||
{
|
||||
operands[0] = simplify_gen_subreg (V4SFmode, operands[0], V2SFmode, 0);
|
||||
emit_insn (gen_avx512dq_float<floatunssuffix>v2div2sf2
|
||||
(operands[0], operands[1]));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_mode_attr vpckfloat_concat_mode
|
||||
[(V8DI "v16sf") (V4DI "v8sf") (V2DI "v8sf")])
|
||||
(define_mode_attr vpckfloat_temp_mode
|
||||
|
@ -5830,7 +5841,12 @@
|
|||
{
|
||||
rtx r1 = gen_reg_rtx (<vpckfloat_temp_mode>mode);
|
||||
rtx r2 = gen_reg_rtx (<vpckfloat_temp_mode>mode);
|
||||
rtx (*gen) (rtx, rtx) = gen_float<floatunssuffix><mode><vpckfloat_op_mode>2;
|
||||
rtx (*gen) (rtx, rtx);
|
||||
|
||||
if (<MODE>mode == V2DImode)
|
||||
gen = gen_avx512dq_float<floatunssuffix>v2div2sf2;
|
||||
else
|
||||
gen = gen_float<floatunssuffix><mode><vpckfloat_op_mode>2;
|
||||
emit_insn (gen (r1, operands[1]));
|
||||
emit_insn (gen (r2, operands[2]));
|
||||
if (<MODE>mode == V2DImode)
|
||||
|
@ -6217,7 +6233,7 @@
|
|||
(set_attr "prefix" "evex")
|
||||
(set_attr "mode" "<sseintvecmode3>")])
|
||||
|
||||
(define_insn "fix<fixunssuffix>_truncv2sfv2di2<mask_name>"
|
||||
(define_insn "avx512dq_fix<fixunssuffix>_truncv2sfv2di2<mask_name>"
|
||||
[(set (match_operand:V2DI 0 "register_operand" "=v")
|
||||
(any_fix:V2DI
|
||||
(vec_select:V2SF
|
||||
|
@ -6229,6 +6245,18 @@
|
|||
(set_attr "prefix" "evex")
|
||||
(set_attr "mode" "TI")])
|
||||
|
||||
(define_expand "fix<fixunssuffix>_truncv2sfv2di2"
|
||||
[(set (match_operand:V2DI 0 "register_operand")
|
||||
(any_fix:V2DI
|
||||
(match_operand:V2SF 1 "register_operand")))]
|
||||
"TARGET_AVX512DQ && TARGET_AVX512VL"
|
||||
{
|
||||
operands[1] = simplify_gen_subreg (V4SFmode, operands[1], V2SFmode, 0);
|
||||
emit_insn (gen_avx512dq_fix<fixunssuffix>_truncv2sfv2di2
|
||||
(operands[0], operands[1]));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_mode_attr vunpckfixt_mode
|
||||
[(V16SF "V8DI") (V8SF "V4DI") (V4SF "V2DI")])
|
||||
(define_mode_attr vunpckfixt_model
|
||||
|
@ -6243,14 +6271,18 @@
|
|||
"TARGET_AVX512DQ"
|
||||
{
|
||||
rtx tem = operands[1];
|
||||
rtx (*gen) (rtx, rtx);
|
||||
|
||||
if (<MODE>mode != V4SFmode)
|
||||
{
|
||||
tem = gen_reg_rtx (<ssehalfvecmode>mode);
|
||||
emit_insn (gen_vec_extract_lo_<vunpckfixt_extract_mode> (tem,
|
||||
operands[1]));
|
||||
gen = gen_fix<fixunssuffix>_trunc<ssehalfvecmodelower><vunpckfixt_model>2;
|
||||
}
|
||||
rtx (*gen) (rtx, rtx)
|
||||
= gen_fix<fixunssuffix>_trunc<ssehalfvecmodelower><vunpckfixt_model>2;
|
||||
else
|
||||
gen = gen_avx512dq_fix<fixunssuffix>_truncv2sfv2di2;
|
||||
|
||||
emit_insn (gen (operands[0], tem));
|
||||
DONE;
|
||||
})
|
||||
|
@ -6262,19 +6294,22 @@
|
|||
"TARGET_AVX512DQ"
|
||||
{
|
||||
rtx tem;
|
||||
rtx (*gen) (rtx, rtx);
|
||||
|
||||
if (<MODE>mode != V4SFmode)
|
||||
{
|
||||
tem = gen_reg_rtx (<ssehalfvecmode>mode);
|
||||
emit_insn (gen_vec_extract_hi_<vunpckfixt_extract_mode> (tem,
|
||||
operands[1]));
|
||||
gen = gen_fix<fixunssuffix>_trunc<ssehalfvecmodelower><vunpckfixt_model>2;
|
||||
}
|
||||
else
|
||||
{
|
||||
tem = gen_reg_rtx (V4SFmode);
|
||||
emit_insn (gen_avx_vpermilv4sf (tem, operands[1], GEN_INT (0x4e)));
|
||||
gen = gen_avx512dq_fix<fixunssuffix>_truncv2sfv2di2;
|
||||
}
|
||||
rtx (*gen) (rtx, rtx)
|
||||
= gen_fix<fixunssuffix>_trunc<ssehalfvecmodelower><vunpckfixt_model>2;
|
||||
|
||||
emit_insn (gen (operands[0], tem));
|
||||
DONE;
|
||||
})
|
||||
|
|
|
@ -939,11 +939,13 @@ validate_subreg (machine_mode omode, machine_mode imode,
|
|||
&& GET_MODE_INNER (imode) == omode)
|
||||
;
|
||||
/* ??? x86 sse code makes heavy use of *paradoxical* vector subregs,
|
||||
i.e. (subreg:V4SF (reg:SF) 0). This surely isn't the cleanest way to
|
||||
represent this. It's questionable if this ought to be represented at
|
||||
all -- why can't this all be hidden in post-reload splitters that make
|
||||
arbitrarily mode changes to the registers themselves. */
|
||||
else if (VECTOR_MODE_P (omode) && GET_MODE_INNER (omode) == imode)
|
||||
i.e. (subreg:V4SF (reg:SF) 0) or (subreg:V4SF (reg:V2SF) 0). This
|
||||
surely isn't the cleanest way to represent this. It's questionable
|
||||
if this ought to be represented at all -- why can't this all be hidden
|
||||
in post-reload splitters that make arbitrarily mode changes to the
|
||||
registers themselves. */
|
||||
else if (VECTOR_MODE_P (omode)
|
||||
&& GET_MODE_INNER (omode) == GET_MODE_INNER (imode))
|
||||
;
|
||||
/* Subregs involving floating point modes are not allowed to
|
||||
change size. Therefore (subreg:DI (reg:DF) 0) is fine, but
|
||||
|
|
38
gcc/testsuite/gcc.target/i386/pr95211.c
Normal file
38
gcc/testsuite/gcc.target/i386/pr95211.c
Normal file
|
@ -0,0 +1,38 @@
|
|||
/* PR target/95211 target/95256 */
|
||||
/* { dg-do compile { target { ! ia32 } } } */
|
||||
/* { dg-options "-O2 -ftree-slp-vectorize -march=skylake-avx512" } */
|
||||
|
||||
extern float f[4];
|
||||
extern long long l[2];
|
||||
extern long long ul[2];
|
||||
|
||||
void
|
||||
fix_128 (void)
|
||||
{
|
||||
l[0] = f[0];
|
||||
l[1] = f[1];
|
||||
}
|
||||
|
||||
void
|
||||
fixuns_128 (void)
|
||||
{
|
||||
ul[0] = f[0];
|
||||
ul[1] = f[1];
|
||||
}
|
||||
|
||||
void
|
||||
float_128 (void)
|
||||
{
|
||||
f[0] = l[0];
|
||||
f[1] = l[1];
|
||||
}
|
||||
|
||||
void
|
||||
floatuns_128 (void)
|
||||
{
|
||||
f[0] = ul[0];
|
||||
f[1] = ul[1];
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "vcvttps2qq" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "vcvtqq2ps" 2 } } */
|
Loading…
Add table
Reference in a new issue