[PATCH, PR target/94954] Fix wrong codegen for vec_pack_to_short_fp32() builtin
Hi, Fix codegen for builtin vec_pack_to_short_fp32. This includes adding a define_insn for xvcvsphp, and adding a new define_expand for convert_4f32_8f16. 2020-06-24 Will Schmidt <will_schmidt@vnet.ibm.com> PR target/94954 gcc * config/rs6000/altivec.h (vec_pack_to_short_fp32): Update. * config/rs6000/altivec.md (UNSPEC_CONVERT_4F32_8F16): New unspec. (convert_4f32_8f16): New define_expand * config/rs6000/rs6000-builtin.def (convert_4f32_8f16): New builtin define and overload. * config/rs6000/rs6000-call.c (P9V_BUILTIN_VEC_CONVERT_4F32_8F16): New overloaded builtin entry. * config/rs6000/vsx.md (UNSPEC_VSX_XVCVSPHP): New unspec. (vsx_xvcvsphp): New define_insn. gcc/testsuite * gcc.target/powerpc/builtins-1-p9-runnable.c: Update.
This commit is contained in:
parent
ef6506e236
commit
58b475a223
6 changed files with 83 additions and 10 deletions
|
@ -433,7 +433,7 @@
|
|||
#define vec_first_match_or_eos_index __builtin_vec_first_match_or_eos_index
|
||||
#define vec_first_mismatch_index __builtin_vec_first_mismatch_index
|
||||
#define vec_first_mismatch_or_eos_index __builtin_vec_first_mismatch_or_eos_index
|
||||
#define vec_pack_to_short_fp32 __builtin_vec_convert_4f32_8i16
|
||||
#define vec_pack_to_short_fp32 __builtin_vec_convert_4f32_8f16
|
||||
#define vec_parity_lsbb __builtin_vec_vparity_lsbb
|
||||
#define vec_vctz __builtin_vec_vctz
|
||||
#define vec_cnttz __builtin_vec_vctz
|
||||
|
|
|
@ -80,6 +80,7 @@
|
|||
UNSPEC_VUPKHPX
|
||||
UNSPEC_VUPKLPX
|
||||
UNSPEC_CONVERT_4F32_8I16
|
||||
UNSPEC_CONVERT_4F32_8F16
|
||||
UNSPEC_DST
|
||||
UNSPEC_DSTT
|
||||
UNSPEC_DSTST
|
||||
|
@ -3217,6 +3218,39 @@
|
|||
DONE;
|
||||
})
|
||||
|
||||
|
||||
;; Convert two vector F32 to packed vector F16.
|
||||
;; This builtin packs 32-bit floating-point values into a packed
|
||||
;; 16-bit floating point values (stored in 16bit integer type).
|
||||
;; (vector unsigned short r = vec_pack_to_short_fp32 (a, b);
|
||||
;; The expected codegen for this builtin is
|
||||
;; xvcvsphp t, a
|
||||
;; xvcvsphp u, b
|
||||
;; if (little endian)
|
||||
;; vpkuwum r, t, u
|
||||
;; else
|
||||
;; vpkuwum r, u, t
|
||||
|
||||
(define_expand "convert_4f32_8f16"
|
||||
[(set (match_operand:V8HI 0 "register_operand" "=v")
|
||||
(unspec:V8HI [(match_operand:V4SF 1 "register_operand" "v")
|
||||
(match_operand:V4SF 2 "register_operand" "v")]
|
||||
UNSPEC_CONVERT_4F32_8F16))]
|
||||
"TARGET_P9_VECTOR"
|
||||
{
|
||||
rtx rtx_tmp_hi = gen_reg_rtx (V4SImode);
|
||||
rtx rtx_tmp_lo = gen_reg_rtx (V4SImode);
|
||||
|
||||
emit_insn (gen_vsx_xvcvsphp (rtx_tmp_hi, operands[1]));
|
||||
emit_insn (gen_vsx_xvcvsphp (rtx_tmp_lo, operands[2]));
|
||||
if (!BYTES_BIG_ENDIAN)
|
||||
emit_insn (gen_altivec_vpkuwum (operands[0], rtx_tmp_hi, rtx_tmp_lo));
|
||||
else
|
||||
emit_insn (gen_altivec_vpkuwum (operands[0], rtx_tmp_lo, rtx_tmp_hi));
|
||||
DONE;
|
||||
})
|
||||
|
||||
|
||||
;; Generate
|
||||
;; xxlxor/vxor SCRATCH0,SCRATCH0,SCRATCH0
|
||||
;; vsubu?m SCRATCH2,SCRATCH1,%1
|
||||
|
|
|
@ -2289,6 +2289,7 @@ BU_P8V_OVERLOAD_3 (VPERMXOR, "vpermxor")
|
|||
BU_P9V_AV_2 (VSLV, "vslv", CONST, vslv)
|
||||
BU_P9V_AV_2 (VSRV, "vsrv", CONST, vsrv)
|
||||
BU_P9V_AV_2 (CONVERT_4F32_8I16, "convert_4f32_8i16", CONST, convert_4f32_8i16)
|
||||
BU_P9V_AV_2 (CONVERT_4F32_8F16, "convert_4f32_8f16", CONST, convert_4f32_8f16)
|
||||
|
||||
BU_P9V_AV_2 (VFIRSTMATCHINDEX_V16QI, "first_match_index_v16qi",
|
||||
CONST, first_match_index_v16qi)
|
||||
|
@ -2319,6 +2320,7 @@ BU_P9V_AV_2 (VFIRSTMISMATCHOREOSINDEX_V4SI, "first_mismatch_or_eos_index_v4si",
|
|||
BU_P9V_OVERLOAD_2 (VSLV, "vslv")
|
||||
BU_P9V_OVERLOAD_2 (VSRV, "vsrv")
|
||||
BU_P9V_OVERLOAD_2 (CONVERT_4F32_8I16, "convert_4f32_8i16")
|
||||
BU_P9V_OVERLOAD_2 (CONVERT_4F32_8F16, "convert_4f32_8f16")
|
||||
|
||||
/* 2 argument vector functions added in ISA 3.0 (power9). */
|
||||
BU_P9V_AV_2 (VADUB, "vadub", CONST, vaduv16qi3)
|
||||
|
|
|
@ -1985,6 +1985,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
|
|||
|
||||
{ P9V_BUILTIN_VEC_CONVERT_4F32_8I16, P9V_BUILTIN_CONVERT_4F32_8I16,
|
||||
RS6000_BTI_unsigned_V8HI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
|
||||
{ P9V_BUILTIN_VEC_CONVERT_4F32_8F16, P9V_BUILTIN_CONVERT_4F32_8F16,
|
||||
RS6000_BTI_unsigned_V8HI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
|
||||
|
||||
{ P9V_BUILTIN_VEC_VFIRSTMATCHINDEX, P9V_BUILTIN_VFIRSTMATCHINDEX_V16QI,
|
||||
RS6000_BTI_UINTSI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
|
||||
|
|
|
@ -299,6 +299,7 @@
|
|||
UNSPEC_VSX_XVCVBF16SP
|
||||
UNSPEC_VSX_XVCVSPBF16
|
||||
UNSPEC_VSX_XVCVSPSXDS
|
||||
UNSPEC_VSX_XVCVSPHP
|
||||
UNSPEC_VSX_VSLO
|
||||
UNSPEC_VSX_EXTRACT
|
||||
UNSPEC_VSX_SXEXPDP
|
||||
|
@ -2187,6 +2188,15 @@
|
|||
"xvcvhpsp %x0,%x1"
|
||||
[(set_attr "type" "vecfloat")])
|
||||
|
||||
;; Generate xvcvsphp
|
||||
(define_insn "vsx_xvcvsphp"
|
||||
[(set (match_operand:V4SI 0 "register_operand" "=wa")
|
||||
(unspec:V4SI [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
|
||||
UNSPEC_VSX_XVCVSPHP))]
|
||||
"TARGET_P9_VECTOR"
|
||||
"xvcvsphp %x0,%x1"
|
||||
[(set_attr "type" "vecfloat")])
|
||||
|
||||
;; xscvdpsp used for splat'ing a scalar to V4SF, knowing that the internal SF
|
||||
;; format of scalars is actually DF.
|
||||
(define_insn "vsx_xscvdpsp_scalar"
|
||||
|
|
|
@ -1,25 +1,50 @@
|
|||
/* { dg-do run { target { powerpc*-*-linux* && { lp64 && p9vector_hw } } } } */
|
||||
/* { dg-require-effective-target powerpc_p9vector_ok } */
|
||||
/* { dg-do run { target { powerpc*-*-linux* && p9vector_hw } } } */
|
||||
/* { dg-require-effective-target p9vector_hw } */
|
||||
/* { dg-options "-O2 -mdejagnu-cpu=power9" } */
|
||||
|
||||
#include <altivec.h>
|
||||
#include <stdio.h>
|
||||
|
||||
void abort (void);
|
||||
|
||||
int main() {
|
||||
int i;
|
||||
vector float vfa, vfb;
|
||||
vector unsigned short vur, vuexpt;
|
||||
vector unsigned short vresult, vexpected;
|
||||
|
||||
vfa = (vector float){3.4, 5.0, 20.0, 50.9 };
|
||||
vfb = (vector float){10.0, 40.0, 70.0, 100.0 };
|
||||
vuexpt = (vector unsigned short){ 3, 5, 20, 50,
|
||||
10, 40, 70, 100};
|
||||
vfa = (vector float){0.4, 1.6, 20.0, 99.9 };
|
||||
vfb = (vector float){10.0, -2.0, 70.0, 999.0 };
|
||||
|
||||
vur = vec_pack_to_short_fp32 (vfa, vfb);
|
||||
/* Expected results. */
|
||||
vexpected = (vector unsigned short) { 0x3666, 0x3e66, 0x4d00, 0x563e,
|
||||
0x4900, 0xc000, 0x5460, 0x63ce};
|
||||
|
||||
/*
|
||||
vresult = vec_pack_to_short_fp32 (vfa, vfb);
|
||||
This built-in converts a pair of vector floats into a single vector of
|
||||
packed half-precision (F16) values. The result type is a vector of
|
||||
signed shorts.
|
||||
The expected codegen for this builtin is
|
||||
xvcvsphp t, vfa
|
||||
xvcvsphp u, vfb
|
||||
if (little endian)
|
||||
vpkuwum vresult, t, u
|
||||
else
|
||||
vpkuwum vresult, u, t
|
||||
*/
|
||||
|
||||
vresult = vec_pack_to_short_fp32 (vfa, vfb);
|
||||
|
||||
#ifdef DEBUG
|
||||
for(i = 0; i< 4; i++) { printf("i=[%d] %f \n",i,vfa[i]); }
|
||||
for(i = 0; i< 4; i++) { printf("i=[%d] %f \n",i+4,vfb[i]); }
|
||||
for(i = 0; i< 8; i++) { printf("i=[%d] %d \n",i,vresult[i]); }
|
||||
#endif
|
||||
|
||||
for(i = 0; i< 8; i++) {
|
||||
if (vur[i] != vuexpt[i])
|
||||
if (vresult[i] != vexpected[i]) {
|
||||
printf("i=[%d] 0x%x != 0x%x \n",i,vresult[i],vexpected[i]);
|
||||
abort();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue