[PATCH, rs6000] VSX load/store rightmost element operations
Hi, This adds support for the VSX load/store rightmost element operations. This includes the instructions lxvrbx, lxvrhx, lxvrwx, lxvrdx, stxvrbx, stxvrhx, stxvrwx, stxvrdx; And the builtins vec_xl_sext() /* vector load sign extend */ vec_xl_zext() /* vector load zero extend */ vec_xst_trunc() /* vector store truncate */. Testcase results show that the instructions added with this patch show up at low/no optimization (-O0), with a number of those being replaced with other load and store instructions at higher optimization levels. For consistency I've left the tests at -O0. [v2] Refreshed per review comments. Comments cleaned up, indentation corrected. gcc/ChangeLog: * config/rs6000/altivec.h (vec_xl_zext, vec_xl_sext, vec_xst_trunc): New defines. * config/rs6000/rs6000-builtin.def (BU_P10V_OVERLOAD_X): New builtin macro. (BU_P10V_AV_X): New builtin macro. (se_lxvrhbx, se_lxrbhx, se_lxvrwx, se_lxvrdx): Define internal names for load and sign extend vector element. (ze_lxvrbx, ze_lxvrhx, ze_lxvrwx, ze_lxvrdx): Define internal names for load and zero extend vector element. (tr_stxvrbx, tr_stxvrhx, tr_stxvrwx, tr_stxvrdx): Define internal names for truncate and store vector element. (se_lxvrx, ze_lxvrx, tr_stxvrx): Define internal names for overloaded load/store rightmost element. * config/rs6000/rs6000-call.c (altivec_builtin_types): Define the internal monomorphs P10_BUILTIN_SE_LXVRBX, P10_BUILTIN_SE_LXVRHX, P10_BUILTIN_SE_LXVRWX, P10_BUILTIN_SE_LXVRDX, P10_BUILTIN_ZE_LXVRBX, P10_BUILTIN_ZE_LXVRHX, P10_BUILTIN_ZE_LXVRWX, P10_BUILTIN_ZE_LXVRDX, P10_BUILTIN_TR_STXVRBX, P10_BUILTIN_TR_STXVRHX, P10_BUILTIN_TR_STXVRWX, P10_BUILTIN_TR_STXVRDX, (altivec_expand_lxvr_builtin): New expansion for load element builtins. (altivec_expand_stv_builtin): Update to for truncate and store builtins. (altivec_expand_builtin): Add clases for load/store rightmost builtins. (altivec_init_builtins): Add def_builtin entries for __builtin_altivec_se_lxvrbx, __builtin_altivec_se_lxvrhx, __builtin_altivec_se_lxvrwx, __builtin_altivec_se_lxvrdx, __builtin_altivec_ze_lxvrbx, __builtin_altivec_ze_lxvrhx, __builtin_altivec_ze_lxvrwx, __builtin_altivec_ze_lxvrdx, __builtin_altivec_tr_stxvrbx, __builtin_altivec_tr_stxvrhx, __builtin_altivec_tr_stxvrwx, __builtin_altivec_tr_stxvrdx, __builtin_vec_se_lxvrx, __builtin_vec_ze_lxvrx, __builtin_vec_tr_stxvrx. * config/rs6000/vsx.md (vsx_lxvr<wd>x, vsx_stxvr<wd>x, vsx_stxvr<wd>x): New define_insn entries. * doc/extend.texi: Add documentation for vsx_xl_sext, vsx_xl_zext, and vec_xst_trunc. gcc/testsuite/ChangeLog: * gcc.target/powerpc/vsx-load-element-extend-char.c: New test. * gcc.target/powerpc/vsx-load-element-extend-int.c: New test. * gcc.target/powerpc/vsx-load-element-extend-longlong.c: New test. * gcc.target/powerpc/vsx-load-element-extend-short.c: New test. * gcc.target/powerpc/vsx-store-element-truncate-char.c: New test. * gcc.target/powerpc/vsx-store-element-truncate-int.c: New test. * gcc.target/powerpc/vsx-store-element-truncate-longlong.c: New test. * gcc.target/powerpc/vsx-store-element-truncate-short.c: New test.
This commit is contained in:
parent
8732511910
commit
b69c00612d
13 changed files with 1468 additions and 1 deletions
|
@ -236,6 +236,9 @@
|
|||
#define vec_lvebx __builtin_vec_lvebx
|
||||
#define vec_lvehx __builtin_vec_lvehx
|
||||
#define vec_lvewx __builtin_vec_lvewx
|
||||
#define vec_xl_zext __builtin_vec_ze_lxvrx
|
||||
#define vec_xl_sext __builtin_vec_se_lxvrx
|
||||
#define vec_xst_trunc __builtin_vec_tr_stxvrx
|
||||
#define vec_neg __builtin_vec_neg
|
||||
#define vec_pmsum_be __builtin_vec_vpmsum
|
||||
#define vec_shasigma_be __builtin_crypto_vshasigma
|
||||
|
|
|
@ -1145,6 +1145,14 @@
|
|||
CODE_FOR_ ## ICODE) /* ICODE */
|
||||
#endif
|
||||
|
||||
#define BU_P10V_OVERLOAD_X(ENUM, NAME) \
|
||||
RS6000_BUILTIN_X (P10_BUILTIN_VEC_ ## ENUM, /* ENUM */ \
|
||||
"__builtin_vec_" NAME, /* NAME */ \
|
||||
RS6000_BTM_P10, /* MASK */ \
|
||||
(RS6000_BTC_OVERLOADED /* ATTR */ \
|
||||
| RS6000_BTC_SPECIAL), \
|
||||
CODE_FOR_nothing) /* ICODE */
|
||||
|
||||
/* Power 10 Altivec builtins */
|
||||
|
||||
#define BU_P10V_AV_0(ENUM, NAME, ATTR, ICODE) \
|
||||
|
@ -1179,6 +1187,15 @@
|
|||
| RS6000_BTC_TERNARY), \
|
||||
CODE_FOR_ ## ICODE) /* ICODE */
|
||||
|
||||
#define BU_P10V_AV_X(ENUM, NAME, ATTR) \
|
||||
RS6000_BUILTIN_X (P10_BUILTIN_ ## ENUM, /* ENUM */ \
|
||||
"__builtin_altivec_" NAME, /* NAME */ \
|
||||
RS6000_BTM_P10, /* MASK */ \
|
||||
(RS6000_BTC_ ## ATTR /* ATTR */ \
|
||||
| RS6000_BTC_SPECIAL), \
|
||||
CODE_FOR_nothing) /* ICODE */
|
||||
|
||||
|
||||
|
||||
/* Insure 0 is not a legitimate index. */
|
||||
BU_SPECIAL_X (RS6000_BUILTIN_NONE, NULL, 0, RS6000_BTC_MISC)
|
||||
|
@ -1474,6 +1491,18 @@ BU_ALTIVEC_X (LVSR, "lvsr", PURE)
|
|||
BU_ALTIVEC_X (LVEBX, "lvebx", PURE)
|
||||
BU_ALTIVEC_X (LVEHX, "lvehx", PURE)
|
||||
BU_ALTIVEC_X (LVEWX, "lvewx", PURE)
|
||||
BU_P10V_AV_X (SE_LXVRBX, "se_lxvrbx", PURE)
|
||||
BU_P10V_AV_X (SE_LXVRHX, "se_lxvrhx", PURE)
|
||||
BU_P10V_AV_X (SE_LXVRWX, "se_lxvrwx", PURE)
|
||||
BU_P10V_AV_X (SE_LXVRDX, "se_lxvrdx", PURE)
|
||||
BU_P10V_AV_X (ZE_LXVRBX, "ze_lxvrbx", PURE)
|
||||
BU_P10V_AV_X (ZE_LXVRHX, "ze_lxvrhx", PURE)
|
||||
BU_P10V_AV_X (ZE_LXVRWX, "ze_lxvrwx", PURE)
|
||||
BU_P10V_AV_X (ZE_LXVRDX, "ze_lxvrdx", PURE)
|
||||
BU_P10V_AV_X (TR_STXVRBX, "tr_stxvrbx", MEM)
|
||||
BU_P10V_AV_X (TR_STXVRHX, "tr_stxvrhx", MEM)
|
||||
BU_P10V_AV_X (TR_STXVRWX, "tr_stxvrwx", MEM)
|
||||
BU_P10V_AV_X (TR_STXVRDX, "tr_stxvrdx", MEM)
|
||||
BU_ALTIVEC_X (LVXL, "lvxl", PURE)
|
||||
BU_ALTIVEC_X (LVXL_V2DF, "lvxl_v2df", PURE)
|
||||
BU_ALTIVEC_X (LVXL_V2DI, "lvxl_v2di", PURE)
|
||||
|
@ -1740,6 +1769,9 @@ BU_ALTIVEC_OVERLOAD_X (LDL, "ldl")
|
|||
BU_ALTIVEC_OVERLOAD_X (LVEBX, "lvebx")
|
||||
BU_ALTIVEC_OVERLOAD_X (LVEHX, "lvehx")
|
||||
BU_ALTIVEC_OVERLOAD_X (LVEWX, "lvewx")
|
||||
BU_P10V_OVERLOAD_X (SE_LXVRX, "se_lxvrx")
|
||||
BU_P10V_OVERLOAD_X (ZE_LXVRX, "ze_lxvrx")
|
||||
BU_P10V_OVERLOAD_X (TR_STXVRX, "tr_stxvrx")
|
||||
BU_ALTIVEC_OVERLOAD_X (LVLX, "lvlx")
|
||||
BU_ALTIVEC_OVERLOAD_X (LVLXL, "lvlxl")
|
||||
BU_ALTIVEC_OVERLOAD_X (LVRX, "lvrx")
|
||||
|
|
|
@ -1152,6 +1152,65 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
|
|||
{ ALTIVEC_BUILTIN_VEC_LVEBX, ALTIVEC_BUILTIN_LVEBX,
|
||||
RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 },
|
||||
|
||||
/* vector signed__int128 vec_xl_sext (signed long long, signed char *);
|
||||
vector signed__int128 vec_xl_sext (signed long long, signed short *);
|
||||
vector signed__int128 vec_xl_sext (signed long long, signed int *);
|
||||
vector signed__int128 vec_xl_sext (signed long long, signed longlong *); */
|
||||
{ P10_BUILTIN_VEC_SE_LXVRX, P10_BUILTIN_SE_LXVRBX,
|
||||
RS6000_BTI_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI, 0 },
|
||||
{ P10_BUILTIN_VEC_SE_LXVRX, P10_BUILTIN_SE_LXVRHX,
|
||||
RS6000_BTI_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI, 0 },
|
||||
{ P10_BUILTIN_VEC_SE_LXVRX, P10_BUILTIN_SE_LXVRWX,
|
||||
RS6000_BTI_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI, 0 },
|
||||
{ P10_BUILTIN_VEC_SE_LXVRX, P10_BUILTIN_SE_LXVRDX,
|
||||
RS6000_BTI_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_INTDI, 0 },
|
||||
{ P10_BUILTIN_VEC_SE_LXVRX, P10_BUILTIN_SE_LXVRDX,
|
||||
RS6000_BTI_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_long_long, 0 },
|
||||
|
||||
/* vector unsigned__int128 vec_xl_zext (signed long long, unsigned char *);
|
||||
vector unsigned__int128 vec_xl_zext (signed long long, unsigned short *);
|
||||
vector unsigned__int128 vec_xl_zext (signed long long, unsigned int *);
|
||||
vector unsigned__int128 vec_xl_zext (signed long long, unsigned longlong *); */
|
||||
{ P10_BUILTIN_VEC_ZE_LXVRX, P10_BUILTIN_ZE_LXVRBX,
|
||||
RS6000_BTI_unsigned_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 },
|
||||
{ P10_BUILTIN_VEC_ZE_LXVRX, P10_BUILTIN_ZE_LXVRHX,
|
||||
RS6000_BTI_unsigned_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI, 0 },
|
||||
{ P10_BUILTIN_VEC_ZE_LXVRX, P10_BUILTIN_ZE_LXVRWX,
|
||||
RS6000_BTI_unsigned_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI, 0 },
|
||||
{ P10_BUILTIN_VEC_ZE_LXVRX, P10_BUILTIN_ZE_LXVRDX,
|
||||
RS6000_BTI_unsigned_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTDI, 0 },
|
||||
{ P10_BUILTIN_VEC_ZE_LXVRX, P10_BUILTIN_ZE_LXVRDX,
|
||||
RS6000_BTI_unsigned_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_long_long, 0 },
|
||||
|
||||
/* void vec_xst_trunc (vector signed __int128, signed long long, signed char *);
|
||||
void vec_xst_trunc (vector unsigned __int128, signed long long, unsigned char *);
|
||||
void vec_xst_trunc (vector signed __int128, signed long long, signed char *);
|
||||
void vec_xst_trunc (vector unsigned __int128, signed long long, unsigned char *);
|
||||
void vec_xst_trunc (vector signed __int128, signed long long, signed char *);
|
||||
void vec_xst_trunc (vector unsigned __int128, signed long long, unsigned char *);
|
||||
void vec_xst_trunc (vector signed __int128, signed long long, signed char *);
|
||||
void vec_xst_trunc (vector unsigned __int128, signed long long, unsigned char *); */
|
||||
{ P10_BUILTIN_VEC_TR_STXVRX, P10_BUILTIN_TR_STXVRBX, RS6000_BTI_void,
|
||||
RS6000_BTI_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI },
|
||||
{ P10_BUILTIN_VEC_TR_STXVRX, P10_BUILTIN_TR_STXVRBX, RS6000_BTI_void,
|
||||
RS6000_BTI_unsigned_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI },
|
||||
{ P10_BUILTIN_VEC_TR_STXVRX, P10_BUILTIN_TR_STXVRHX, RS6000_BTI_void,
|
||||
RS6000_BTI_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI },
|
||||
{ P10_BUILTIN_VEC_TR_STXVRX, P10_BUILTIN_TR_STXVRHX, RS6000_BTI_void,
|
||||
RS6000_BTI_unsigned_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI },
|
||||
{ P10_BUILTIN_VEC_TR_STXVRX, P10_BUILTIN_TR_STXVRWX, RS6000_BTI_void,
|
||||
RS6000_BTI_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI },
|
||||
{ P10_BUILTIN_VEC_TR_STXVRX, P10_BUILTIN_TR_STXVRWX, RS6000_BTI_void,
|
||||
RS6000_BTI_unsigned_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI },
|
||||
{ P10_BUILTIN_VEC_TR_STXVRX, P10_BUILTIN_TR_STXVRDX, RS6000_BTI_void,
|
||||
RS6000_BTI_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_long_long },
|
||||
{ P10_BUILTIN_VEC_TR_STXVRX, P10_BUILTIN_TR_STXVRDX, RS6000_BTI_void,
|
||||
RS6000_BTI_unsigned_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_long_long },
|
||||
{ P10_BUILTIN_VEC_TR_STXVRX, P10_BUILTIN_TR_STXVRDX, RS6000_BTI_void,
|
||||
RS6000_BTI_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_INTDI },
|
||||
{ P10_BUILTIN_VEC_TR_STXVRX, P10_BUILTIN_TR_STXVRDX, RS6000_BTI_void,
|
||||
RS6000_BTI_unsigned_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTDI },
|
||||
|
||||
/* vector float vec_ldl (int, vector float *);
|
||||
vector float vec_ldl (int, float *); */
|
||||
{ ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V4SF,
|
||||
|
@ -9574,6 +9633,85 @@ swap_endian_selector_for_mode (machine_mode mode)
|
|||
gen_rtvec_v (16, perm)));
|
||||
}
|
||||
|
||||
/* For the load and sign extend rightmost elements; load and zero extend
|
||||
rightmost element builtins. */
|
||||
static rtx
|
||||
altivec_expand_lxvr_builtin (enum insn_code icode, tree exp, rtx target, bool blk, bool sign_extend)
|
||||
{
|
||||
rtx pat, addr;
|
||||
tree arg0 = CALL_EXPR_ARG (exp, 0);
|
||||
tree arg1 = CALL_EXPR_ARG (exp, 1);
|
||||
machine_mode tmode = insn_data[icode].operand[0].mode;
|
||||
machine_mode smode = insn_data[icode].operand[1].mode;
|
||||
machine_mode mode0 = Pmode;
|
||||
machine_mode mode1 = Pmode;
|
||||
rtx op0 = expand_normal (arg0);
|
||||
rtx op1 = expand_normal (arg1);
|
||||
|
||||
if (icode == CODE_FOR_nothing)
|
||||
/* Builtin not supported on this processor. */
|
||||
return 0;
|
||||
|
||||
/* If we got invalid arguments bail out before generating bad rtl. */
|
||||
if (arg0 == error_mark_node || arg1 == error_mark_node)
|
||||
return const0_rtx;
|
||||
|
||||
if (target == 0
|
||||
|| GET_MODE (target) != tmode
|
||||
|| ! (*insn_data[icode].operand[0].predicate) (target, tmode))
|
||||
target = gen_reg_rtx (tmode);
|
||||
|
||||
op1 = copy_to_mode_reg (mode1, op1);
|
||||
|
||||
if (op0 == const0_rtx)
|
||||
addr = gen_rtx_MEM (blk ? BLKmode : tmode, op1);
|
||||
else
|
||||
{
|
||||
op0 = copy_to_mode_reg (mode0, op0);
|
||||
addr = gen_rtx_MEM (blk ? BLKmode : smode,
|
||||
gen_rtx_PLUS (Pmode, op1, op0));
|
||||
}
|
||||
|
||||
if (sign_extend)
|
||||
{
|
||||
rtx discratch = gen_reg_rtx (DImode);
|
||||
rtx tiscratch = gen_reg_rtx (TImode);
|
||||
|
||||
/* Emit the lxvr*x insn. */
|
||||
pat = GEN_FCN (icode) (tiscratch, addr);
|
||||
if (!pat)
|
||||
return 0;
|
||||
emit_insn (pat);
|
||||
|
||||
/* Emit a sign extension from QI,HI,WI to double (DI). */
|
||||
rtx scratch = gen_lowpart (smode, tiscratch);
|
||||
if (icode == CODE_FOR_vsx_lxvrbx)
|
||||
emit_insn (gen_extendqidi2 (discratch, scratch));
|
||||
else if (icode == CODE_FOR_vsx_lxvrhx)
|
||||
emit_insn (gen_extendhidi2 (discratch, scratch));
|
||||
else if (icode == CODE_FOR_vsx_lxvrwx)
|
||||
emit_insn (gen_extendsidi2 (discratch, scratch));
|
||||
/* Assign discratch directly if scratch is already DI. */
|
||||
if (icode == CODE_FOR_vsx_lxvrdx)
|
||||
discratch = scratch;
|
||||
|
||||
/* Emit the sign extension from DI (double) to TI (quad). */
|
||||
emit_insn (gen_extendditi2 (target, discratch));
|
||||
|
||||
return target;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Zero extend. */
|
||||
pat = GEN_FCN (icode) (target, addr);
|
||||
if (!pat)
|
||||
return 0;
|
||||
emit_insn (pat);
|
||||
return target;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static rtx
|
||||
altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target, bool blk)
|
||||
{
|
||||
|
@ -9692,7 +9830,7 @@ altivec_expand_stv_builtin (enum insn_code icode, tree exp)
|
|||
rtx op0 = expand_normal (arg0);
|
||||
rtx op1 = expand_normal (arg1);
|
||||
rtx op2 = expand_normal (arg2);
|
||||
rtx pat, addr, rawaddr;
|
||||
rtx pat, addr, rawaddr, truncrtx;
|
||||
machine_mode tmode = insn_data[icode].operand[0].mode;
|
||||
machine_mode smode = insn_data[icode].operand[1].mode;
|
||||
machine_mode mode1 = Pmode;
|
||||
|
@ -9731,6 +9869,25 @@ altivec_expand_stv_builtin (enum insn_code icode, tree exp)
|
|||
|
||||
emit_insn (gen_rtx_SET (addr, op0));
|
||||
}
|
||||
else if (icode == CODE_FOR_vsx_stxvrbx
|
||||
|| icode == CODE_FOR_vsx_stxvrhx
|
||||
|| icode == CODE_FOR_vsx_stxvrwx
|
||||
|| icode == CODE_FOR_vsx_stxvrdx)
|
||||
{
|
||||
truncrtx = gen_rtx_TRUNCATE (tmode, op0);
|
||||
op0 = copy_to_mode_reg (E_TImode, truncrtx);
|
||||
|
||||
if (op1 == const0_rtx)
|
||||
addr = gen_rtx_MEM (Pmode, op2);
|
||||
else
|
||||
{
|
||||
op1 = copy_to_mode_reg (mode1, op1);
|
||||
addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op2, op1));
|
||||
}
|
||||
pat = GEN_FCN (icode) (addr, op0);
|
||||
if (pat)
|
||||
emit_insn (pat);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (! (*insn_data[icode].operand[1].predicate) (op0, smode))
|
||||
|
@ -10750,6 +10907,16 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
|
|||
return altivec_expand_stv_builtin (CODE_FOR_altivec_stvehx, exp);
|
||||
case ALTIVEC_BUILTIN_STVEWX:
|
||||
return altivec_expand_stv_builtin (CODE_FOR_altivec_stvewx, exp);
|
||||
|
||||
case P10_BUILTIN_TR_STXVRBX:
|
||||
return altivec_expand_stv_builtin (CODE_FOR_vsx_stxvrbx, exp);
|
||||
case P10_BUILTIN_TR_STXVRHX:
|
||||
return altivec_expand_stv_builtin (CODE_FOR_vsx_stxvrhx, exp);
|
||||
case P10_BUILTIN_TR_STXVRWX:
|
||||
return altivec_expand_stv_builtin (CODE_FOR_vsx_stxvrwx, exp);
|
||||
case P10_BUILTIN_TR_STXVRDX:
|
||||
return altivec_expand_stv_builtin (CODE_FOR_vsx_stxvrdx, exp);
|
||||
|
||||
case ALTIVEC_BUILTIN_STVXL_V2DF:
|
||||
return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2df, exp);
|
||||
case ALTIVEC_BUILTIN_STVXL_V2DI:
|
||||
|
@ -11012,6 +11179,30 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
|
|||
case ALTIVEC_BUILTIN_LVEWX:
|
||||
return altivec_expand_lv_builtin (CODE_FOR_altivec_lvewx,
|
||||
exp, target, false);
|
||||
case P10_BUILTIN_SE_LXVRBX:
|
||||
return altivec_expand_lxvr_builtin (CODE_FOR_vsx_lxvrbx,
|
||||
exp, target, false, true);
|
||||
case P10_BUILTIN_SE_LXVRHX:
|
||||
return altivec_expand_lxvr_builtin (CODE_FOR_vsx_lxvrhx,
|
||||
exp, target, false, true);
|
||||
case P10_BUILTIN_SE_LXVRWX:
|
||||
return altivec_expand_lxvr_builtin (CODE_FOR_vsx_lxvrwx,
|
||||
exp, target, false, true);
|
||||
case P10_BUILTIN_SE_LXVRDX:
|
||||
return altivec_expand_lxvr_builtin (CODE_FOR_vsx_lxvrdx,
|
||||
exp, target, false, true);
|
||||
case P10_BUILTIN_ZE_LXVRBX:
|
||||
return altivec_expand_lxvr_builtin (CODE_FOR_vsx_lxvrbx,
|
||||
exp, target, false, false);
|
||||
case P10_BUILTIN_ZE_LXVRHX:
|
||||
return altivec_expand_lxvr_builtin (CODE_FOR_vsx_lxvrhx,
|
||||
exp, target, false, false);
|
||||
case P10_BUILTIN_ZE_LXVRWX:
|
||||
return altivec_expand_lxvr_builtin (CODE_FOR_vsx_lxvrwx,
|
||||
exp, target, false, false);
|
||||
case P10_BUILTIN_ZE_LXVRDX:
|
||||
return altivec_expand_lxvr_builtin (CODE_FOR_vsx_lxvrdx,
|
||||
exp, target, false, false);
|
||||
case ALTIVEC_BUILTIN_LVXL_V2DF:
|
||||
return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2df,
|
||||
exp, target, false);
|
||||
|
@ -13294,6 +13485,18 @@ altivec_init_builtins (void)
|
|||
def_builtin ("__builtin_altivec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEBX);
|
||||
def_builtin ("__builtin_altivec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEHX);
|
||||
def_builtin ("__builtin_altivec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEWX);
|
||||
def_builtin ("__builtin_altivec_se_lxvrbx", v16qi_ftype_long_pcvoid, P10_BUILTIN_SE_LXVRBX);
|
||||
def_builtin ("__builtin_altivec_se_lxvrhx", v8hi_ftype_long_pcvoid, P10_BUILTIN_SE_LXVRHX);
|
||||
def_builtin ("__builtin_altivec_se_lxvrwx", v4si_ftype_long_pcvoid, P10_BUILTIN_SE_LXVRWX);
|
||||
def_builtin ("__builtin_altivec_se_lxvrdx", v2di_ftype_long_pcvoid, P10_BUILTIN_SE_LXVRDX);
|
||||
def_builtin ("__builtin_altivec_ze_lxvrbx", v16qi_ftype_long_pcvoid, P10_BUILTIN_ZE_LXVRBX);
|
||||
def_builtin ("__builtin_altivec_ze_lxvrhx", v8hi_ftype_long_pcvoid, P10_BUILTIN_ZE_LXVRHX);
|
||||
def_builtin ("__builtin_altivec_ze_lxvrwx", v4si_ftype_long_pcvoid, P10_BUILTIN_ZE_LXVRWX);
|
||||
def_builtin ("__builtin_altivec_ze_lxvrdx", v2di_ftype_long_pcvoid, P10_BUILTIN_ZE_LXVRDX);
|
||||
def_builtin ("__builtin_altivec_tr_stxvrbx", void_ftype_v1ti_long_pvoid, P10_BUILTIN_TR_STXVRBX);
|
||||
def_builtin ("__builtin_altivec_tr_stxvrhx", void_ftype_v1ti_long_pvoid, P10_BUILTIN_TR_STXVRHX);
|
||||
def_builtin ("__builtin_altivec_tr_stxvrwx", void_ftype_v1ti_long_pvoid, P10_BUILTIN_TR_STXVRWX);
|
||||
def_builtin ("__builtin_altivec_tr_stxvrdx", void_ftype_v1ti_long_pvoid, P10_BUILTIN_TR_STXVRDX);
|
||||
def_builtin ("__builtin_altivec_lvxl", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVXL);
|
||||
def_builtin ("__builtin_altivec_lvxl_v2df", v2df_ftype_long_pcvoid,
|
||||
ALTIVEC_BUILTIN_LVXL_V2DF);
|
||||
|
@ -13359,6 +13562,9 @@ altivec_init_builtins (void)
|
|||
def_builtin ("__builtin_vec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEBX);
|
||||
def_builtin ("__builtin_vec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEHX);
|
||||
def_builtin ("__builtin_vec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEWX);
|
||||
def_builtin ("__builtin_vec_se_lxvrx", v1ti_ftype_long_pcvoid, P10_BUILTIN_VEC_SE_LXVRX);
|
||||
def_builtin ("__builtin_vec_ze_lxvrx", v1ti_ftype_long_pcvoid, P10_BUILTIN_VEC_ZE_LXVRX);
|
||||
def_builtin ("__builtin_vec_tr_stxvrx", void_ftype_opaque_long_pvoid, P10_BUILTIN_VEC_TR_STXVRX);
|
||||
def_builtin ("__builtin_vec_st", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_ST);
|
||||
def_builtin ("__builtin_vec_ste", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STE);
|
||||
def_builtin ("__builtin_vec_stl", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STL);
|
||||
|
|
|
@ -1255,6 +1255,24 @@
|
|||
}
|
||||
})
|
||||
|
||||
;; Load rightmost element from load_data
|
||||
;; using lxvrbx, lxvrhx, lxvrwx, lxvrdx.
|
||||
(define_insn "vsx_lxvr<wd>x"
|
||||
[(set (match_operand:TI 0 "vsx_register_operand" "=wa")
|
||||
(zero_extend:TI (match_operand:INT_ISA3 1 "memory_operand" "Z")))]
|
||||
"TARGET_POWER10"
|
||||
"lxvr<wd>x %x0,%y1"
|
||||
[(set_attr "type" "vecload")])
|
||||
|
||||
;; Store rightmost element into store_data
|
||||
;; using stxvrbx, stxvrhx, strvxwx, strvxdx.
|
||||
(define_insn "vsx_stxvr<wd>x"
|
||||
[(set (match_operand:INT_ISA3 0 "memory_operand" "=Z")
|
||||
(truncate:INT_ISA3 (match_operand:TI 1 "vsx_register_operand" "wa")))]
|
||||
"TARGET_POWER10"
|
||||
"stxvr<wd>x %x1,%y0"
|
||||
[(set_attr "type" "vecstore")])
|
||||
|
||||
;; Explicit load/store expanders for the builtin functions for lxvd2x, etc.,
|
||||
;; when you really want their element-reversing behavior.
|
||||
(define_insn "vsx_ld_elemrev_v2di"
|
||||
|
|
|
@ -17664,6 +17664,37 @@ Perform a 64-bit parallel bits extract operation, as if implemented by the
|
|||
@code{pextd} instruction.
|
||||
@findex __builtin_pextd
|
||||
|
||||
@smallexample
|
||||
@exdent vector signed __int128 vsx_xl_sext (signed long long, signed char *);
|
||||
@exdent vector signed __int128 vsx_xl_sext (signed long long, signed short *);
|
||||
@exdent vector signed __int128 vsx_xl_sext (signed long long, signed int *);
|
||||
@exdent vector signed __int128 vsx_xl_sext (signed long long, signed long long *);
|
||||
@exdent vector unsigned __int128 vsx_xl_zext (signed long long, unsigned char *);
|
||||
@exdent vector unsigned __int128 vsx_xl_zext (signed long long, unsigned short *);
|
||||
@exdent vector unsigned __int128 vsx_xl_zext (signed long long, unsigned int *);
|
||||
@exdent vector unsigned __int128 vsx_xl_zext (signed long long, unsigned long long *);
|
||||
@end smallexample
|
||||
|
||||
Load (and sign extend) to an __int128 vector, as if implemented by the ISA 3.1
|
||||
@code{lxvrbx} @code{lxvrhx} @code{lxvrwx} @code{lxvrdx} instructions.
|
||||
@findex vsx_xl_sext
|
||||
@findex vsx_xl_zext
|
||||
|
||||
@smallexample
|
||||
@exdent void vec_xst_trunc (vector signed __int128, signed long long, signed char *);
|
||||
@exdent void vec_xst_trunc (vector signed __int128, signed long long, signed short *);
|
||||
@exdent void vec_xst_trunc (vector signed __int128, signed long long, signed int *);
|
||||
@exdent void vec_xst_trunc (vector signed __int128, signed long long, signed long long *);
|
||||
@exdent void vec_xst_trunc (vector unsigned __int128, signed long long, unsigned char *);
|
||||
@exdent void vec_xst_trunc (vector unsigned __int128, signed long long, unsigned short *);
|
||||
@exdent void vec_xst_trunc (vector unsigned __int128, signed long long, unsigned int *);
|
||||
@exdent void vec_xst_trunc (vector unsigned __int128, signed long long, unsigned long long *);
|
||||
@end smallexample
|
||||
|
||||
Truncate and store the rightmost element of a vector, as if implemented by the
|
||||
ISA 3.1 @code{stxvrbx} @code{stxvrhx} @code{stxvrwx} @code{stxvrdx} instructions.
|
||||
@findex vec_xst_trunc
|
||||
|
||||
@node PowerPC AltiVec/VSX Built-in Functions
|
||||
@subsection PowerPC AltiVec/VSX Built-in Functions
|
||||
|
||||
|
|
170
gcc/testsuite/gcc.target/powerpc/vsx-load-element-extend-char.c
Normal file
170
gcc/testsuite/gcc.target/powerpc/vsx-load-element-extend-char.c
Normal file
|
@ -0,0 +1,170 @@
|
|||
/*
|
||||
Test of vec_xl_sext and vec_xl_zext (load into rightmost
|
||||
vector element and zero/sign extend). */
|
||||
|
||||
/* { dg-do compile {target power10_ok} } */
|
||||
/* { dg-do run {target power10_hw} } */
|
||||
/* { dg-options "-mdejagnu-cpu=power10 -O3" } */
|
||||
|
||||
/* At the time of writing, the number of lxvrbx instructions is
|
||||
double what we expect because we are generating a
|
||||
.constprop copy of the function. */
|
||||
/* { dg-final { scan-assembler-times {\mlxvrbx\M} 4 } } */
|
||||
/* { dg-final { scan-assembler-times {\mlbx\M} 0 } } */
|
||||
|
||||
#define NUM_VEC_ELEMS 16
|
||||
#define ITERS 16
|
||||
|
||||
/*
|
||||
Codegen at time of writing is a lxvrbx for both the
|
||||
zero and sign extended tests. The sign extension test
|
||||
also uses mfvsr*d, extsb, mtvsrdd, vextsd2q.
|
||||
|
||||
0000000010000c90 <test_sign_extended_load>:
|
||||
10000c90: 1a 18 04 7c lxvrbx vs0,r4,r3
|
||||
10000c94: 66 00 0b 7c mfvsrd r11,vs0
|
||||
10000c98: 66 02 0a 7c mfvsrld r10,vs0
|
||||
10000c9c: 74 07 4a 7d extsb r10,r10
|
||||
10000ca0: 67 53 40 7c mtvsrdd vs34,0,r10
|
||||
10000ca4: 02 16 5b 10 vextsd2q v2,v2
|
||||
10000ca8: 20 00 80 4e blr
|
||||
|
||||
0000000010000cc0 <test_zero_extended_unsigned_load>:
|
||||
10000cc0: 1b 18 44 7c lxvrbx vs34,r4,r3
|
||||
10000cc4: 20 00 80 4e blr
|
||||
*/
|
||||
|
||||
#include <altivec.h>
|
||||
#include <stdio.h>
|
||||
#include <inttypes.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
long long buffer[8];
|
||||
unsigned long verbose=0;
|
||||
|
||||
char initbuffer[64] = {
|
||||
0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
|
||||
0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x80,
|
||||
0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
|
||||
0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0x90,
|
||||
0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
|
||||
0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xa0,
|
||||
0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
|
||||
0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xb0
|
||||
};
|
||||
|
||||
vector signed __int128 signed_expected[16] = {
|
||||
{ (__int128) 0x0000000000000000 << 64 | (__int128) 0x0000000000000011},
|
||||
{ (__int128) 0x0000000000000000 << 64 | (__int128) 0x0000000000000012},
|
||||
{ (__int128) 0x0000000000000000 << 64 | (__int128) 0x0000000000000013},
|
||||
{ (__int128) 0x0000000000000000 << 64 | (__int128) 0x0000000000000014},
|
||||
{ (__int128) 0x0000000000000000 << 64 | (__int128) 0x0000000000000015},
|
||||
{ (__int128) 0x0000000000000000 << 64 | (__int128) 0x0000000000000016},
|
||||
{ (__int128) 0x0000000000000000 << 64 | (__int128) 0x0000000000000017},
|
||||
{ (__int128) 0x0000000000000000 << 64 | (__int128) 0x0000000000000018},
|
||||
{ (__int128) 0xffffffffffffffff << 64 | (__int128) 0xffffffffffffff89},
|
||||
{ (__int128) 0xffffffffffffffff << 64 | (__int128) 0xffffffffffffff8a},
|
||||
{ (__int128) 0xffffffffffffffff << 64 | (__int128) 0xffffffffffffff8b},
|
||||
{ (__int128) 0xffffffffffffffff << 64 | (__int128) 0xffffffffffffff8c},
|
||||
{ (__int128) 0xffffffffffffffff << 64 | (__int128) 0xffffffffffffff8d},
|
||||
{ (__int128) 0xffffffffffffffff << 64 | (__int128) 0xffffffffffffff8e},
|
||||
{ (__int128) 0xffffffffffffffff << 64 | (__int128) 0xffffffffffffff8f},
|
||||
{ (__int128) 0xffffffffffffffff << 64 | (__int128) 0xffffffffffffff80}
|
||||
};
|
||||
|
||||
vector unsigned __int128 unsigned_expected[16] = {
|
||||
{ (unsigned __int128) 0x0000000000000000 << 64 | (unsigned __int128) 0x0000000000000011},
|
||||
{ (unsigned __int128) 0x0000000000000000 << 64 | (unsigned __int128) 0x0000000000000012},
|
||||
{ (unsigned __int128) 0x0000000000000000 << 64 | (unsigned __int128) 0x0000000000000013},
|
||||
{ (unsigned __int128) 0x0000000000000000 << 64 | (unsigned __int128) 0x0000000000000014},
|
||||
{ (unsigned __int128) 0x0000000000000000 << 64 | (unsigned __int128) 0x0000000000000015},
|
||||
{ (unsigned __int128) 0x0000000000000000 << 64 | (unsigned __int128) 0x0000000000000016},
|
||||
{ (unsigned __int128) 0x0000000000000000 << 64 | (unsigned __int128) 0x0000000000000017},
|
||||
{ (unsigned __int128) 0x0000000000000000 << 64 | (unsigned __int128) 0x0000000000000018},
|
||||
{ (unsigned __int128) 0x0000000000000000 << 64 | (unsigned __int128) 0x0000000000000089},
|
||||
{ (unsigned __int128) 0x0000000000000000 << 64 | (unsigned __int128) 0x000000000000008a},
|
||||
{ (unsigned __int128) 0x0000000000000000 << 64 | (unsigned __int128) 0x000000000000008b},
|
||||
{ (unsigned __int128) 0x0000000000000000 << 64 | (unsigned __int128) 0x000000000000008c},
|
||||
{ (unsigned __int128) 0x0000000000000000 << 64 | (unsigned __int128) 0x000000000000008d},
|
||||
{ (unsigned __int128) 0x0000000000000000 << 64 | (unsigned __int128) 0x000000000000008e},
|
||||
{ (unsigned __int128) 0x0000000000000000 << 64 | (unsigned __int128) 0x000000000000008f},
|
||||
{ (unsigned __int128) 0x0000000000000000 << 64 | (unsigned __int128) 0x0000000000000080}
|
||||
};
|
||||
|
||||
__attribute__ ((noinline))
|
||||
vector signed __int128 test_sign_extended_load(int RA, signed char * RB) {
|
||||
return vec_xl_sext (RA, RB);
|
||||
}
|
||||
|
||||
__attribute__ ((noinline))
|
||||
vector unsigned __int128 test_zero_extended_unsigned_load(int RA, unsigned char * RB) {
|
||||
return vec_xl_zext (RA, RB);
|
||||
}
|
||||
|
||||
int main (int argc, char *argv [])
|
||||
{
|
||||
int iteration=0;
|
||||
int mismatch=0;
|
||||
vector signed __int128 signed_result_v;
|
||||
vector unsigned __int128 unsigned_result_v;
|
||||
#if VERBOSE
|
||||
verbose=1;
|
||||
printf("%s %s\n", __DATE__, __TIME__);
|
||||
#endif
|
||||
|
||||
memcpy(&buffer, &initbuffer, sizeof(buffer));
|
||||
|
||||
if (verbose) {
|
||||
printf("input buffer:\n");
|
||||
for (int k=0;k<64;k++) {
|
||||
printf("%x ",initbuffer[k]);
|
||||
if (k && (k+1)%16==0) printf("\n");
|
||||
}
|
||||
printf("signed_expected:\n");
|
||||
for (int k=0;k<ITERS;k++) {
|
||||
printf("%llx ",signed_expected[iteration][0]>>64);
|
||||
printf(" %llx \n",signed_expected[iteration][0]&0xffffffffffffffff);
|
||||
printf("\n");
|
||||
}
|
||||
printf("unsigned_expected:\n");
|
||||
for (int k=0;k<ITERS;k++) {
|
||||
printf("%llx ",signed_expected[iteration][0]>>64);
|
||||
printf(" %llx \n",signed_expected[iteration][0]&0xffffffffffffffff);
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
|
||||
for (iteration = 0; iteration < ITERS ; iteration++ ) {
|
||||
signed_result_v = test_sign_extended_load (iteration, (signed char*)buffer);
|
||||
if (signed_result_v[0] != signed_expected[iteration][0] ) {
|
||||
mismatch++;
|
||||
printf("Unexpected results from signed load. i=%d \n", iteration);
|
||||
printf("got: %llx ",signed_result_v[0]>>64);
|
||||
printf(" %llx \n",signed_result_v[0]&0xffffffffffffffff);
|
||||
printf("expected: %llx ",signed_expected[iteration][0]>>64);
|
||||
printf(" %llx \n",signed_expected[iteration][0]&0xffffffffffffffff);
|
||||
fflush(stdout);
|
||||
}
|
||||
}
|
||||
|
||||
for (iteration = 0; iteration < ITERS ; iteration++ ) {
|
||||
unsigned_result_v = test_zero_extended_unsigned_load (iteration, (unsigned char*)buffer);
|
||||
if (unsigned_result_v[0] != unsigned_expected[iteration][0]) {
|
||||
mismatch++;
|
||||
printf("Unexpected results from unsigned load. i=%d \n", iteration);
|
||||
printf("got: %llx ",unsigned_result_v[0]>>64);
|
||||
printf(" %llx \n",unsigned_result_v[0]&0xffffffffffffffff);
|
||||
printf("expected: %llx ",unsigned_expected[iteration][0]>>64);
|
||||
printf(" %llx \n",unsigned_expected[iteration][0]&0xffffffffffffffff);
|
||||
fflush(stdout);
|
||||
}
|
||||
}
|
||||
|
||||
if (mismatch) {
|
||||
printf("%d mismatches. \n",mismatch);
|
||||
abort();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
168
gcc/testsuite/gcc.target/powerpc/vsx-load-element-extend-int.c
Normal file
168
gcc/testsuite/gcc.target/powerpc/vsx-load-element-extend-int.c
Normal file
|
@ -0,0 +1,168 @@
|
|||
/*
|
||||
Test of vec_xl_sext and vec_xl_zext (load into rightmost
|
||||
vector element and zero/sign extend). */
|
||||
|
||||
/* { dg-do compile {target power10_ok} } */
|
||||
/* { dg-do run {target power10_hw} } */
|
||||
|
||||
/* Deliberately set optization to zero for this test to confirm
|
||||
the lxvr*x instruction is generated. At higher optimization levels
|
||||
the instruction we are looking for is sometimes replaced by other
|
||||
load instructions. */
|
||||
/* { dg-options "-mdejagnu-cpu=power10 -O0" } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\mlxvrwx\M} 2 } } */
|
||||
|
||||
#define NUM_VEC_ELEMS 4
|
||||
#define ITERS 16
|
||||
|
||||
/*
|
||||
Codegen at time of writing is a single lxvrwx for the zero
|
||||
extended test, and a lwax,mtvsrdd,vextsd2q for the sign
|
||||
extended test.
|
||||
|
||||
0000000010000c90 <test_sign_extended_load>:
|
||||
10000c90: aa 1a 24 7d lwax r9,r4,r3
|
||||
10000c94: 67 4b 40 7c mtvsrdd vs34,0,r9
|
||||
10000c98: 02 16 5b 10 vextsd2q v2,v2
|
||||
10000c9c: 20 00 80 4e blr
|
||||
|
||||
0000000010000cb0 <test_zero_extended_unsigned_load>:
|
||||
10000cb0: 9b 18 44 7c lxvrwx vs34,r4,r3
|
||||
10000cb4: 20 00 80 4e blr
|
||||
*/
|
||||
|
||||
#include <altivec.h>
|
||||
#include <stdio.h>
|
||||
#include <inttypes.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
long long buffer[8];
|
||||
unsigned long verbose=0;
|
||||
|
||||
char initbuffer[64] = {
|
||||
0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
|
||||
0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x80,
|
||||
0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
|
||||
0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0x90,
|
||||
0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
|
||||
0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xa0,
|
||||
0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
|
||||
0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xb0
|
||||
};
|
||||
|
||||
vector signed __int128 signed_expected[16] = {
|
||||
{ (__int128) 0x0000000000000000 << 64 | (__int128) 0x0000000014131211},
|
||||
{ (__int128) 0x0000000000000000 << 64 | (__int128) 0x0000000015141312},
|
||||
{ (__int128) 0x0000000000000000 << 64 | (__int128) 0x0000000016151413},
|
||||
{ (__int128) 0x0000000000000000 << 64 | (__int128) 0x0000000017161514},
|
||||
{ (__int128) 0x0000000000000000 << 64 | (__int128) 0x0000000018171615},
|
||||
{ (__int128) 0xffffffffffffffff << 64 | (__int128) 0xffffffff89181716},
|
||||
{ (__int128) 0xffffffffffffffff << 64 | (__int128) 0xffffffff8a891817},
|
||||
{ (__int128) 0xffffffffffffffff << 64 | (__int128) 0xffffffff8b8a8918},
|
||||
{ (__int128) 0xffffffffffffffff << 64 | (__int128) 0xffffffff8c8b8a89},
|
||||
{ (__int128) 0xffffffffffffffff << 64 | (__int128) 0xffffffff8d8c8b8a},
|
||||
{ (__int128) 0xffffffffffffffff << 64 | (__int128) 0xffffffff8e8d8c8b},
|
||||
{ (__int128) 0xffffffffffffffff << 64 | (__int128) 0xffffffff8f8e8d8c},
|
||||
{ (__int128) 0xffffffffffffffff << 64 | (__int128) 0xffffffff808f8e8d},
|
||||
{ (__int128) 0x0000000000000000 << 64 | (__int128) 0x0000000021808f8e},
|
||||
{ (__int128) 0x0000000000000000 << 64 | (__int128) 0x000000002221808f},
|
||||
{ (__int128) 0x0000000000000000 << 64 | (__int128) 0x0000000023222180}
|
||||
};
|
||||
|
||||
vector unsigned __int128 unsigned_expected[16] = {
|
||||
{ (unsigned __int128) 0x0000000000000000 << 64 | (unsigned __int128) 0x0000000014131211},
|
||||
{ (unsigned __int128) 0x0000000000000000 << 64 | (unsigned __int128) 0x0000000015141312},
|
||||
{ (unsigned __int128) 0x0000000000000000 << 64 | (unsigned __int128) 0x0000000016151413},
|
||||
{ (unsigned __int128) 0x0000000000000000 << 64 | (unsigned __int128) 0x0000000017161514},
|
||||
{ (unsigned __int128) 0x0000000000000000 << 64 | (unsigned __int128) 0x0000000018171615},
|
||||
{ (unsigned __int128) 0x0000000000000000 << 64 | (unsigned __int128) 0x0000000089181716},
|
||||
{ (unsigned __int128) 0x0000000000000000 << 64 | (unsigned __int128) 0x000000008a891817},
|
||||
{ (unsigned __int128) 0x0000000000000000 << 64 | (unsigned __int128) 0x000000008b8a8918},
|
||||
{ (unsigned __int128) 0x0000000000000000 << 64 | (unsigned __int128) 0x000000008c8b8a89},
|
||||
{ (unsigned __int128) 0x0000000000000000 << 64 | (unsigned __int128) 0x000000008d8c8b8a},
|
||||
{ (unsigned __int128) 0x0000000000000000 << 64 | (unsigned __int128) 0x000000008e8d8c8b},
|
||||
{ (unsigned __int128) 0x0000000000000000 << 64 | (unsigned __int128) 0x000000008f8e8d8c},
|
||||
{ (unsigned __int128) 0x0000000000000000 << 64 | (unsigned __int128) 0x00000000808f8e8d},
|
||||
{ (unsigned __int128) 0x0000000000000000 << 64 | (unsigned __int128) 0x0000000021808f8e},
|
||||
{ (unsigned __int128) 0x0000000000000000 << 64 | (unsigned __int128) 0x000000002221808f},
|
||||
{ (unsigned __int128) 0x0000000000000000 << 64 | (unsigned __int128) 0x0000000023222180}
|
||||
};
|
||||
|
||||
__attribute__ ((noinline))
|
||||
vector signed __int128 test_sign_extended_load(int RA, signed int * RB) {
|
||||
return vec_xl_sext (RA, RB);
|
||||
}
|
||||
|
||||
__attribute__ ((noinline))
|
||||
vector unsigned __int128 test_zero_extended_unsigned_load(int RA, unsigned int * RB) {
|
||||
return vec_xl_zext (RA, RB);
|
||||
}
|
||||
|
||||
int main (int argc, char *argv [])
|
||||
{
|
||||
int iteration=0;
|
||||
int mismatch=0;
|
||||
vector signed __int128 signed_result_v;
|
||||
vector unsigned __int128 unsigned_result_v;
|
||||
#if VERBOSE
|
||||
verbose=1;
|
||||
printf("%s %s\n", __DATE__, __TIME__);
|
||||
#endif
|
||||
|
||||
memcpy(&buffer, &initbuffer, sizeof(buffer));
|
||||
|
||||
if (verbose) {
|
||||
printf("input buffer:\n");
|
||||
for (int k=0;k<64;k++) {
|
||||
printf("%x ",initbuffer[k]);
|
||||
if (k && (k+1)%16==0) printf("\n");
|
||||
}
|
||||
printf("signed_expected:\n");
|
||||
for (int k=0;k<ITERS;k++) {
|
||||
printf("%llx ",signed_expected[iteration][0]>>64);
|
||||
printf(" %llx \n",signed_expected[iteration][0]&0xffffffffffffffff);
|
||||
printf("\n");
|
||||
}
|
||||
printf("unsigned_expected:\n");
|
||||
for (int k=0;k<ITERS;k++) {
|
||||
printf("%llx ",signed_expected[iteration][0]>>64);
|
||||
printf(" %llx \n",signed_expected[iteration][0]&0xffffffffffffffff);
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
|
||||
for (iteration = 0; iteration < ITERS ; iteration++ ) {
|
||||
signed_result_v = test_sign_extended_load (iteration, (signed int*)buffer);
|
||||
if (signed_result_v[0] != signed_expected[iteration][0] ) {
|
||||
mismatch++;
|
||||
printf("Unexpected results from signed load. i=%d \n", iteration);
|
||||
printf("got: %llx ",signed_result_v[0]>>64);
|
||||
printf(" %llx \n",signed_result_v[0]&0xffffffffffffffff);
|
||||
printf("expected: %llx ",signed_expected[iteration][0]>>64);
|
||||
printf(" %llx \n",signed_expected[iteration][0]&0xffffffffffffffff);
|
||||
fflush(stdout);
|
||||
}
|
||||
}
|
||||
|
||||
for (iteration = 0; iteration < ITERS ; iteration++ ) {
|
||||
unsigned_result_v = test_zero_extended_unsigned_load (iteration, (unsigned int*)buffer);
|
||||
if (unsigned_result_v[0] != unsigned_expected[iteration][0]) {
|
||||
mismatch++;
|
||||
printf("Unexpected results from unsigned load. i=%d \n", iteration);
|
||||
printf("got: %llx ",unsigned_result_v[0]>>64);
|
||||
printf(" %llx \n",unsigned_result_v[0]&0xffffffffffffffff);
|
||||
printf("expected: %llx ",unsigned_expected[iteration][0]>>64);
|
||||
printf(" %llx \n",unsigned_expected[iteration][0]&0xffffffffffffffff);
|
||||
fflush(stdout);
|
||||
}
|
||||
}
|
||||
|
||||
if (mismatch) {
|
||||
printf("%d mismatches. \n",mismatch);
|
||||
abort();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -0,0 +1,169 @@
|
|||
/*
|
||||
Test of vec_xl_sext and vec_xl_zext (load into rightmost
|
||||
vector element and zero/sign extend). */
|
||||
|
||||
/* { dg-do compile {target power10_ok} } */
|
||||
/* { dg-do run {target power10_hw} } */
|
||||
/* { dg-options "-mdejagnu-cpu=power10 -O3" } */
|
||||
|
||||
/* At time of writing, we also geenerate a .constrprop copy
|
||||
of the function, so our instruction hit count is
|
||||
twice of what we would otherwise expect. */
|
||||
/* { dg-final { scan-assembler-times {\mlxvrdx\M} 4 } } */
|
||||
/* { dg-final { scan-assembler-times {\mlvdx\M} 0 } } */
|
||||
|
||||
#define NUM_VEC_ELEMS 2
|
||||
#define ITERS 16
|
||||
|
||||
/*
|
||||
Codegen at time of writing uses lxvrdx for both sign and
|
||||
zero extend tests. The sign extended test also uses
|
||||
mfvsr*d, mtvsrdd, vextsd2q.
|
||||
|
||||
0000000010000c90 <test_sign_extended_load>:
|
||||
10000c90: da 18 04 7c lxvrdx vs0,r4,r3
|
||||
10000c94: 66 00 0b 7c mfvsrd r11,vs0
|
||||
10000c98: 66 02 0a 7c mfvsrld r10,vs0
|
||||
10000c9c: 67 53 40 7c mtvsrdd vs34,0,r10
|
||||
10000ca0: 02 16 5b 10 vextsd2q v2,v2
|
||||
10000ca4: 20 00 80 4e blr
|
||||
|
||||
0000000010000cc0 <test_zero_extended_unsigned_load>:
|
||||
10000cc0: db 18 44 7c lxvrdx vs34,r4,r3
|
||||
10000cc4: 20 00 80 4e blr
|
||||
*/
|
||||
|
||||
#include <altivec.h>
|
||||
#include <stdio.h>
|
||||
#include <inttypes.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
long long buffer[8];
|
||||
unsigned long verbose=0;
|
||||
|
||||
char initbuffer[64] = {
|
||||
0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
|
||||
0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x80,
|
||||
0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
|
||||
0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0x90,
|
||||
0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
|
||||
0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xa0,
|
||||
0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
|
||||
0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xb0
|
||||
};
|
||||
|
||||
vector signed __int128 signed_expected[16] = {
|
||||
{ (__int128) 0x0000000000000000 << 64 | (__int128) 0x1817161514131211},
|
||||
{ (__int128) 0xffffffffffffffff << 64 | (__int128) 0x8918171615141312},
|
||||
{ (__int128) 0xffffffffffffffff << 64 | (__int128) 0x8a89181716151413},
|
||||
{ (__int128) 0xffffffffffffffff << 64 | (__int128) 0x8b8a891817161514},
|
||||
{ (__int128) 0xffffffffffffffff << 64 | (__int128) 0x8c8b8a8918171615},
|
||||
{ (__int128) 0xffffffffffffffff << 64 | (__int128) 0x8d8c8b8a89181716},
|
||||
{ (__int128) 0xffffffffffffffff << 64 | (__int128) 0x8e8d8c8b8a891817},
|
||||
{ (__int128) 0xffffffffffffffff << 64 | (__int128) 0x8f8e8d8c8b8a8918},
|
||||
{ (__int128) 0xffffffffffffffff << 64 | (__int128) 0x808f8e8d8c8b8a89},
|
||||
{ (__int128) 0x0000000000000000 << 64 | (__int128) 0x21808f8e8d8c8b8a},
|
||||
{ (__int128) 0x0000000000000000 << 64 | (__int128) 0x2221808f8e8d8c8b},
|
||||
{ (__int128) 0x0000000000000000 << 64 | (__int128) 0x232221808f8e8d8c},
|
||||
{ (__int128) 0x0000000000000000 << 64 | (__int128) 0x24232221808f8e8d},
|
||||
{ (__int128) 0x0000000000000000 << 64 | (__int128) 0x2524232221808f8e},
|
||||
{ (__int128) 0x0000000000000000 << 64 | (__int128) 0x262524232221808f},
|
||||
{ (__int128) 0x0000000000000000 << 64 | (__int128) 0x2726252423222180}
|
||||
};
|
||||
|
||||
vector unsigned __int128 unsigned_expected[16] = {
|
||||
{ (unsigned __int128) 0x0000000000000000 << 64 | (unsigned __int128) 0x1817161514131211},
|
||||
{ (unsigned __int128) 0x0000000000000000 << 64 | (unsigned __int128) 0x8918171615141312},
|
||||
{ (unsigned __int128) 0x0000000000000000 << 64 | (unsigned __int128) 0x8a89181716151413},
|
||||
{ (unsigned __int128) 0x0000000000000000 << 64 | (unsigned __int128) 0x8b8a891817161514},
|
||||
{ (unsigned __int128) 0x0000000000000000 << 64 | (unsigned __int128) 0x8c8b8a8918171615},
|
||||
{ (unsigned __int128) 0x0000000000000000 << 64 | (unsigned __int128) 0x8d8c8b8a89181716},
|
||||
{ (unsigned __int128) 0x0000000000000000 << 64 | (unsigned __int128) 0x8e8d8c8b8a891817},
|
||||
{ (unsigned __int128) 0x0000000000000000 << 64 | (unsigned __int128) 0x8f8e8d8c8b8a8918},
|
||||
{ (unsigned __int128) 0x0000000000000000 << 64 | (unsigned __int128) 0x808f8e8d8c8b8a89},
|
||||
{ (unsigned __int128) 0x0000000000000000 << 64 | (unsigned __int128) 0x21808f8e8d8c8b8a},
|
||||
{ (unsigned __int128) 0x0000000000000000 << 64 | (unsigned __int128) 0x2221808f8e8d8c8b},
|
||||
{ (unsigned __int128) 0x0000000000000000 << 64 | (unsigned __int128) 0x232221808f8e8d8c},
|
||||
{ (unsigned __int128) 0x0000000000000000 << 64 | (unsigned __int128) 0x24232221808f8e8d},
|
||||
{ (unsigned __int128) 0x0000000000000000 << 64 | (unsigned __int128) 0x2524232221808f8e},
|
||||
{ (unsigned __int128) 0x0000000000000000 << 64 | (unsigned __int128) 0x262524232221808f},
|
||||
{ (unsigned __int128) 0x0000000000000000 << 64 | (unsigned __int128) 0x2726252423222180}
|
||||
};
|
||||
|
||||
__attribute__ ((noinline))
|
||||
vector signed __int128 test_sign_extended_load(int RA, signed long long * RB) {
|
||||
return vec_xl_sext (RA, RB);
|
||||
}
|
||||
|
||||
__attribute__ ((noinline))
|
||||
vector unsigned __int128 test_zero_extended_unsigned_load(int RA, unsigned long long * RB) {
|
||||
return vec_xl_zext (RA, RB);
|
||||
}
|
||||
|
||||
int main (int argc, char *argv [])
|
||||
{
|
||||
int iteration=0;
|
||||
int mismatch=0;
|
||||
vector signed __int128 signed_result_v;
|
||||
vector unsigned __int128 unsigned_result_v;
|
||||
#if VERBOSE
|
||||
verbose=1;
|
||||
printf("%s %s\n", __DATE__, __TIME__);
|
||||
#endif
|
||||
|
||||
memcpy(&buffer, &initbuffer, sizeof(buffer));
|
||||
|
||||
if (verbose) {
|
||||
printf("input buffer:\n");
|
||||
for (int k=0;k<64;k++) {
|
||||
printf("%x ",initbuffer[k]);
|
||||
if (k && (k+1)%16==0) printf("\n");
|
||||
}
|
||||
printf("signed_expected:\n");
|
||||
for (int k=0;k<ITERS;k++) {
|
||||
printf("%llx ",signed_expected[iteration][0]>>64);
|
||||
printf(" %llx \n",signed_expected[iteration][0]&0xffffffffffffffff);
|
||||
printf("\n");
|
||||
}
|
||||
printf("unsigned_expected:\n");
|
||||
for (int k=0;k<ITERS;k++) {
|
||||
printf("%llx ",signed_expected[iteration][0]>>64);
|
||||
printf(" %llx \n",signed_expected[iteration][0]&0xffffffffffffffff);
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
|
||||
for (iteration = 0; iteration < ITERS ; iteration++ ) {
|
||||
signed_result_v = test_sign_extended_load (iteration, (signed long long*)buffer);
|
||||
if (signed_result_v[0] != signed_expected[iteration][0] ) {
|
||||
mismatch++;
|
||||
printf("Unexpected results from signed load. i=%d \n", iteration);
|
||||
printf("got: %llx ",signed_result_v[0]>>64);
|
||||
printf(" %llx \n",signed_result_v[0]&0xffffffffffffffff);
|
||||
printf("expected: %llx ",signed_expected[iteration][0]>>64);
|
||||
printf(" %llx \n",signed_expected[iteration][0]&0xffffffffffffffff);
|
||||
fflush(stdout);
|
||||
}
|
||||
}
|
||||
|
||||
for (iteration = 0; iteration < ITERS ; iteration++ ) {
|
||||
unsigned_result_v = test_zero_extended_unsigned_load (iteration, (unsigned long long*)buffer);
|
||||
if (unsigned_result_v[0] != unsigned_expected[iteration][0]) {
|
||||
mismatch++;
|
||||
printf("Unexpected results from unsigned load. i=%d \n", iteration);
|
||||
printf("got: %llx ",unsigned_result_v[0]>>64);
|
||||
printf(" %llx \n",unsigned_result_v[0]&0xffffffffffffffff);
|
||||
printf("expected: %llx ",unsigned_expected[iteration][0]>>64);
|
||||
printf(" %llx \n",unsigned_expected[iteration][0]&0xffffffffffffffff);
|
||||
fflush(stdout);
|
||||
}
|
||||
}
|
||||
|
||||
if (mismatch) {
|
||||
printf("%d mismatches. \n",mismatch);
|
||||
abort();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
168
gcc/testsuite/gcc.target/powerpc/vsx-load-element-extend-short.c
Normal file
168
gcc/testsuite/gcc.target/powerpc/vsx-load-element-extend-short.c
Normal file
|
@ -0,0 +1,168 @@
|
|||
/*
|
||||
Test of vec_xl_sext and vec_xl_zext (load into rightmost
|
||||
vector element and zero/sign extend). */
|
||||
|
||||
/* { dg-do compile {target power10_ok} } */
|
||||
/* { dg-do run {target power10_hw} } */
|
||||
|
||||
/* Deliberately set optization to zero for this test to confirm
|
||||
the lxvr*x instruction is generated. At higher optimization levels
|
||||
the instruction we are looking for is sometimes replaced by other
|
||||
load instructions. */
|
||||
/* { dg-options "-mdejagnu-cpu=power10 -O0" } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\mlxvrhx\M} 2 } } */
|
||||
|
||||
#define NUM_VEC_ELEMS 8
|
||||
#define ITERS 16
|
||||
|
||||
/*
|
||||
Codegen at time of writing uses lxvrhx for the zero
|
||||
extension test and lhax,mtvsrdd,vextsd2q for the
|
||||
sign extended test.
|
||||
|
||||
0000000010001810 <test_sign_extended_load>:
|
||||
10001810: ae 1a 24 7d lhax r9,r4,r3
|
||||
10001814: 67 4b 40 7c mtvsrdd vs34,0,r9
|
||||
10001818: 02 16 5b 10 vextsd2q v2,v2
|
||||
1000181c: 20 00 80 4e blr
|
||||
|
||||
0000000010001830 <test_zero_extended_unsigned_load>:
|
||||
10001830: 5b 18 44 7c lxvrhx vs34,r4,r3
|
||||
10001834: 20 00 80 4e blr
|
||||
*/
|
||||
|
||||
#include <altivec.h>
|
||||
#include <stdio.h>
|
||||
#include <inttypes.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
long long buffer[8];
|
||||
unsigned long verbose=0;
|
||||
|
||||
char initbuffer[64] = {
|
||||
0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
|
||||
0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x80,
|
||||
0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
|
||||
0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0x90,
|
||||
0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
|
||||
0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xa0,
|
||||
0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
|
||||
0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xb0
|
||||
};
|
||||
|
||||
vector signed __int128 signed_expected[16] = {
|
||||
{ (__int128) 0x0000000000000000 << 64 | (__int128) 0x0000000000001211},
|
||||
{ (__int128) 0x0000000000000000 << 64 | (__int128) 0x0000000000001312},
|
||||
{ (__int128) 0x0000000000000000 << 64 | (__int128) 0x0000000000001413},
|
||||
{ (__int128) 0x0000000000000000 << 64 | (__int128) 0x0000000000001514},
|
||||
{ (__int128) 0x0000000000000000 << 64 | (__int128) 0x0000000000001615},
|
||||
{ (__int128) 0x0000000000000000 << 64 | (__int128) 0x0000000000001716},
|
||||
{ (__int128) 0x0000000000000000 << 64 | (__int128) 0x0000000000001817},
|
||||
{ (__int128) 0xffffffffffffffff << 64 | (__int128) 0xffffffffffff8918},
|
||||
{ (__int128) 0xffffffffffffffff << 64 | (__int128) 0xffffffffffff8a89},
|
||||
{ (__int128) 0xffffffffffffffff << 64 | (__int128) 0xffffffffffff8b8a},
|
||||
{ (__int128) 0xffffffffffffffff << 64 | (__int128) 0xffffffffffff8c8b},
|
||||
{ (__int128) 0xffffffffffffffff << 64 | (__int128) 0xffffffffffff8d8c},
|
||||
{ (__int128) 0xffffffffffffffff << 64 | (__int128) 0xffffffffffff8e8d},
|
||||
{ (__int128) 0xffffffffffffffff << 64 | (__int128) 0xffffffffffff8f8e},
|
||||
{ (__int128) 0xffffffffffffffff << 64 | (__int128) 0xffffffffffff808f},
|
||||
{ (__int128) 0x0000000000000000 << 64 | (__int128) 0x0000000000002180}
|
||||
};
|
||||
|
||||
vector unsigned __int128 unsigned_expected[16] = {
|
||||
{ (unsigned __int128) 0x0000000000000000 << 64 | (unsigned __int128) 0x00000000000001211},
|
||||
{ (unsigned __int128) 0x0000000000000000 << 64 | (unsigned __int128) 0x00000000000001312},
|
||||
{ (unsigned __int128) 0x0000000000000000 << 64 | (unsigned __int128) 0x00000000000001413},
|
||||
{ (unsigned __int128) 0x0000000000000000 << 64 | (unsigned __int128) 0x00000000000001514},
|
||||
{ (unsigned __int128) 0x0000000000000000 << 64 | (unsigned __int128) 0x00000000000001615},
|
||||
{ (unsigned __int128) 0x0000000000000000 << 64 | (unsigned __int128) 0x00000000000001716},
|
||||
{ (unsigned __int128) 0x0000000000000000 << 64 | (unsigned __int128) 0x00000000000001817},
|
||||
{ (unsigned __int128) 0x0000000000000000 << 64 | (unsigned __int128) 0x00000000000008918},
|
||||
{ (unsigned __int128) 0x0000000000000000 << 64 | (unsigned __int128) 0x00000000000008a89},
|
||||
{ (unsigned __int128) 0x0000000000000000 << 64 | (unsigned __int128) 0x00000000000008b8a},
|
||||
{ (unsigned __int128) 0x0000000000000000 << 64 | (unsigned __int128) 0x00000000000008c8b},
|
||||
{ (unsigned __int128) 0x0000000000000000 << 64 | (unsigned __int128) 0x00000000000008d8c},
|
||||
{ (unsigned __int128) 0x0000000000000000 << 64 | (unsigned __int128) 0x00000000000008e8d},
|
||||
{ (unsigned __int128) 0x0000000000000000 << 64 | (unsigned __int128) 0x00000000000008f8e},
|
||||
{ (unsigned __int128) 0x0000000000000000 << 64 | (unsigned __int128) 0x0000000000000808f},
|
||||
{ (unsigned __int128) 0x0000000000000000 << 64 | (unsigned __int128) 0x00000000000002180}
|
||||
};
|
||||
|
||||
__attribute__ ((noinline))
|
||||
vector signed __int128 test_sign_extended_load(int RA, signed short * RB) {
|
||||
return vec_xl_sext (RA, RB);
|
||||
}
|
||||
|
||||
__attribute__ ((noinline))
|
||||
vector unsigned __int128 test_zero_extended_unsigned_load(int RA, unsigned short * RB) {
|
||||
return vec_xl_zext (RA, RB);
|
||||
}
|
||||
|
||||
int main (int argc, char *argv [])
|
||||
{
|
||||
int iteration=0;
|
||||
int mismatch=0;
|
||||
vector signed __int128 signed_result_v;
|
||||
vector unsigned __int128 unsigned_result_v;
|
||||
#if VERBOSE
|
||||
verbose=1;
|
||||
printf("%s %s\n", __DATE__, __TIME__);
|
||||
#endif
|
||||
|
||||
memcpy(&buffer, &initbuffer, sizeof(buffer));
|
||||
|
||||
if (verbose) {
|
||||
printf("input buffer:\n");
|
||||
for (int k=0;k<64;k++) {
|
||||
printf("%x ",initbuffer[k]);
|
||||
if (k && (k+1)%16==0) printf("\n");
|
||||
}
|
||||
printf("signed_expected:\n");
|
||||
for (int k=0;k<ITERS;k++) {
|
||||
printf("%llx ",signed_expected[iteration][0]>>64);
|
||||
printf(" %llx \n",signed_expected[iteration][0]&0xffffffffffffffff);
|
||||
printf("\n");
|
||||
}
|
||||
printf("unsigned_expected:\n");
|
||||
for (int k=0;k<ITERS;k++) {
|
||||
printf("%llx ",signed_expected[iteration][0]>>64);
|
||||
printf(" %llx \n",signed_expected[iteration][0]&0xffffffffffffffff);
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
|
||||
for (iteration = 0; iteration < ITERS ; iteration++ ) {
|
||||
signed_result_v = test_sign_extended_load (iteration, (signed short*)buffer);
|
||||
if (signed_result_v[0] != signed_expected[iteration][0] ) {
|
||||
mismatch++;
|
||||
printf("Unexpected results from signed load. i=%d \n", iteration);
|
||||
printf("got: %llx ",signed_result_v[0]>>64);
|
||||
printf(" %llx \n",signed_result_v[0]&0xffffffffffffffff);
|
||||
printf("expected: %llx ",signed_expected[iteration][0]>>64);
|
||||
printf(" %llx \n",signed_expected[iteration][0]&0xffffffffffffffff);
|
||||
fflush(stdout);
|
||||
}
|
||||
}
|
||||
|
||||
for (iteration = 0; iteration < ITERS ; iteration++ ) {
|
||||
unsigned_result_v = test_zero_extended_unsigned_load (iteration, (unsigned short*)buffer);
|
||||
if (unsigned_result_v[0] != unsigned_expected[iteration][0]) {
|
||||
mismatch++;
|
||||
printf("Unexpected results from unsigned load. i=%d \n", iteration);
|
||||
printf("got: %llx ",unsigned_result_v[0]>>64);
|
||||
printf(" %llx \n",unsigned_result_v[0]&0xffffffffffffffff);
|
||||
printf("expected: %llx ",unsigned_expected[iteration][0]>>64);
|
||||
printf(" %llx \n",unsigned_expected[iteration][0]&0xffffffffffffffff);
|
||||
fflush(stdout);
|
||||
}
|
||||
}
|
||||
|
||||
if (mismatch) {
|
||||
printf("%d mismatches. \n",mismatch);
|
||||
abort();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -0,0 +1,125 @@
|
|||
/*
|
||||
Test of vec_xst_trunc (truncate and store rightmost vector element) */
|
||||
|
||||
/* { dg-do compile {target power10_ok} } */
|
||||
/* { dg-do run {target power10_hw} } */
|
||||
/* Deliberately set optization to zero for this test to confirm
|
||||
the stxvr*x instruction is generated. At higher optimization levels
|
||||
the instruction we are looking for is sometimes replaced by other
|
||||
store instructions. */
|
||||
/* { dg-options "-mdejagnu-cpu=power10 -O0" } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\mstxvrbx\M} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\mstbx\M} 0 } } */
|
||||
|
||||
#include <altivec.h>
|
||||
#include <stdio.h>
|
||||
#include <inttypes.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
vector signed __int128 store_this_s[4] = {
|
||||
{ (__int128) 0x7000000000000000 << 64 | (__int128) 0x123456789abcdef8ULL},
|
||||
{ (__int128) 0x8000000000000000 << 64 | (__int128) 0xfedcba9876543217ULL},
|
||||
{ (__int128) 0x1000000000000000 << 64 | (__int128) 0xccccccccccccccccULL},
|
||||
{ (__int128) 0xf000000000000000 << 64 | (__int128) 0xaaaaaaaaaaaaaaaaULL}
|
||||
};
|
||||
|
||||
vector unsigned __int128 store_this_us[4] = {
|
||||
{ (unsigned __int128) 0x7000000000000000 << 64 | (unsigned __int128) 0x123456789abcdef8ULL},
|
||||
{ (unsigned __int128) 0x8000000000000000 << 64 | (unsigned __int128) 0xfedcba9876543217ULL},
|
||||
{ (unsigned __int128) 0x1000000000000000 << 64 | (unsigned __int128) 0xeeeeeeeeeeeeeeeeULL},
|
||||
{ (unsigned __int128) 0xf000000000000000 << 64 | (unsigned __int128) 0x5555555555555555ULL}
|
||||
};
|
||||
|
||||
#define NUM_VEC_ELEMS 16
|
||||
|
||||
vector signed char signed_expected[4] = {
|
||||
{ 0xf8, 0x0, 0x0, 0x0, 0x0 , 0x0, 0x0, 0x0, 0x0 , 0x0, 0x0, 0x0, 0x0 , 0x0, 0x0, 0x0},
|
||||
{ 0x0 , 0x0, 0x0, 0x0, 0x17, 0x0, 0x0, 0x0, 0x0 , 0x0, 0x0, 0x0, 0x0 , 0x0, 0x0, 0x0},
|
||||
{ 0x0 , 0x0, 0x0, 0x0, 0x0 , 0x0, 0x0, 0x0, 0xcc, 0x0, 0x0, 0x0, 0x0 , 0x0, 0x0, 0x0},
|
||||
{ 0x0 , 0x0, 0x0, 0x0, 0x0 , 0x0, 0x0, 0x0, 0x0 , 0x0, 0x0, 0x0, 0xaa, 0x0, 0x0, 0x0}
|
||||
};
|
||||
vector unsigned char unsigned_expected[4] = {
|
||||
{ 0xf8, 0x0, 0x0, 0x0, 0x0 , 0x0, 0x0, 0x0, 0x0 , 0x0, 0x0, 0x0, 0x0 , 0x0, 0x0, 0x0},
|
||||
{ 0x0 , 0x0, 0x0, 0x0, 0x17, 0x0, 0x0, 0x0, 0x0 , 0x0, 0x0, 0x0, 0x0 , 0x0, 0x0, 0x0},
|
||||
{ 0x0 , 0x0, 0x0, 0x0, 0x0 , 0x0, 0x0, 0x0, 0xee, 0x0, 0x0, 0x0, 0x0 , 0x0, 0x0, 0x0},
|
||||
{ 0x0 , 0x0, 0x0, 0x0, 0x0 , 0x0, 0x0, 0x0, 0x0 , 0x0, 0x0, 0x0, 0x55, 0x0, 0x0, 0x0}
|
||||
};
|
||||
|
||||
unsigned long long rawbuffer[32];
|
||||
signed char * vsbuffer = (char *)rawbuffer;
|
||||
unsigned char * vubuffer = (unsigned char *)rawbuffer;
|
||||
|
||||
void reset_buffer() {
|
||||
memset (&rawbuffer,0,sizeof(rawbuffer));
|
||||
}
|
||||
|
||||
#define PRINT_VEC(V) \
|
||||
for (int j=0;j<NUM_VEC_ELEMS;j++) { printf ("(0x%lx) ", V[j] ); }
|
||||
|
||||
void test_signed_store(vector signed __int128 myvec, int offset, signed char * store_data ) {
|
||||
vec_xst_trunc (myvec, offset, store_data );
|
||||
}
|
||||
|
||||
void test_unsigned_store(vector unsigned __int128 myvec, int offset, unsigned char * store_data ) {
|
||||
vec_xst_trunc (myvec, offset, store_data );
|
||||
}
|
||||
|
||||
int main (int argc, char *argv [])
|
||||
{
|
||||
int i;
|
||||
int memcmpresult;
|
||||
int mismatch=0;
|
||||
int verbose=0;
|
||||
|
||||
#if VERBOSE
|
||||
verbose=1;
|
||||
printf("%s %s\n", __DATE__, __TIME__);
|
||||
#endif
|
||||
|
||||
if (verbose) {
|
||||
printf("expected results from signed tests:\n");
|
||||
for (i = 0; i < 4 ; i++ ) {
|
||||
PRINT_VEC(signed_expected[i]);
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 4 ; i++ ) {
|
||||
reset_buffer();
|
||||
test_signed_store (store_this_s[i], 4*i, vsbuffer);
|
||||
memcmpresult = memcmp(rawbuffer,&signed_expected[i],sizeof(vector char));
|
||||
if (memcmpresult) {
|
||||
printf("mismatch signed buffer, i %d (memcmpresult:%d) \n",i,memcmpresult);
|
||||
mismatch++;
|
||||
if (verbose) {
|
||||
printf("results: ");
|
||||
PRINT_VEC(vsbuffer);
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 4 ; i++ ) {
|
||||
reset_buffer();
|
||||
test_unsigned_store (store_this_us[i], 4*i, vubuffer);
|
||||
memcmpresult = memcmp(rawbuffer,&unsigned_expected[i],sizeof(vector char));
|
||||
if (memcmpresult) {
|
||||
printf("mismatch unsigned buffer, i %d (memcmpresult:%d) \n",i,memcmpresult);
|
||||
mismatch++;
|
||||
if (verbose) {
|
||||
printf("results :");
|
||||
PRINT_VEC(vubuffer);
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (mismatch) {
|
||||
printf("%d mismatches. \n",mismatch);
|
||||
abort();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -0,0 +1,125 @@
|
|||
/*
|
||||
Test of vec_xst_trunc (truncate and store rightmost vector element) */
|
||||
|
||||
/* { dg-do compile {target power10_ok} } */
|
||||
/* { dg-do run {target power10_hw} } */
|
||||
/* Deliberately set optization to zero for this test to confirm
|
||||
the stxvr*x instruction is generated. At higher optimization levels
|
||||
the instruction we are looking for is sometimes replaced by other
|
||||
store instructions. */
|
||||
/* { dg-options "-mdejagnu-cpu=power10 -O0" } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\mstxvrwx\M} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\mstwx\M} 0 } } */
|
||||
|
||||
#include <altivec.h>
|
||||
#include <stdio.h>
|
||||
#include <inttypes.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
vector signed __int128 store_this_s[4] = {
|
||||
{ (__int128) 0x7000000000000000 << 64 | (__int128) 0x123456789abcdef8ULL},
|
||||
{ (__int128) 0x8000000000000000 << 64 | (__int128) 0xfedcba9876543217ULL},
|
||||
{ (__int128) 0x1000000000000000 << 64 | (__int128) 0xccccccccccccccccULL},
|
||||
{ (__int128) 0xf000000000000000 << 64 | (__int128) 0xaaaaaaaaaaaaaaaaULL}
|
||||
};
|
||||
|
||||
vector unsigned __int128 store_this_us[4] = {
|
||||
{ (unsigned __int128) 0x7000000000000000 << 64 | (unsigned __int128) 0x123456789abcdef8ULL},
|
||||
{ (unsigned __int128) 0x8000000000000000 << 64 | (unsigned __int128) 0xfedcba9876543217ULL},
|
||||
{ (unsigned __int128) 0x1000000000000000 << 64 | (unsigned __int128) 0xeeeeeeeeeeeeeeeeULL},
|
||||
{ (unsigned __int128) 0xf000000000000000 << 64 | (unsigned __int128) 0x5555555555555555ULL}
|
||||
};
|
||||
|
||||
#define NUM_VEC_ELEMS 4
|
||||
|
||||
vector signed int signed_expected[4] = {
|
||||
{0x9abcdef8, 0x0 , 0x0 , 0x0 },
|
||||
{0x0 , 0x76543217, 0x0 , 0x0 },
|
||||
{0x0 , 0x0 , 0xcccccccc, 0x0 },
|
||||
{0x0 , 0x0 , 0x0 , 0xaaaaaaaa },
|
||||
};
|
||||
vector unsigned int unsigned_expected[4] = {
|
||||
{0x9abcdef8, 0x0 , 0x0 , 0x0 },
|
||||
{0x0 , 0x76543217, 0x0 , 0x0 },
|
||||
{0x0 , 0x0 , 0xeeeeeeee, 0x0 },
|
||||
{0x0 , 0x0 , 0x0 , 0x55555555 },
|
||||
};
|
||||
|
||||
unsigned long long rawbuffer[32];
|
||||
signed int * vsbuffer = (int *)rawbuffer;
|
||||
unsigned int * vubuffer = (unsigned int *)rawbuffer;
|
||||
|
||||
void reset_buffer() {
|
||||
memset (&rawbuffer,0,sizeof(rawbuffer));
|
||||
}
|
||||
|
||||
#define PRINT_VEC(V) \
|
||||
for (int j=0;j<NUM_VEC_ELEMS;j++) { printf ("(0x%lx) ", V[j] ); }
|
||||
|
||||
void test_signed_store(vector signed __int128 myvec, int offset, signed int * store_data ) {
|
||||
vec_xst_trunc (myvec, offset, store_data);
|
||||
}
|
||||
|
||||
void test_unsigned_store(vector unsigned __int128 myvec, int offset, unsigned int * store_data ) {
|
||||
vec_xst_trunc (myvec, offset, store_data);
|
||||
}
|
||||
|
||||
int main (int argc, char *argv [])
|
||||
{
|
||||
int i;
|
||||
int memcmpresult;
|
||||
int mismatch=0;
|
||||
int verbose=0;
|
||||
|
||||
#if VERBOSE
|
||||
verbose=1;
|
||||
printf("%s %s\n", __DATE__, __TIME__);
|
||||
#endif
|
||||
|
||||
if (verbose) {
|
||||
printf("expected results from signed tests:\n");
|
||||
for (i = 0; i < 4 ; i++ ) {
|
||||
PRINT_VEC(signed_expected[i]);
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 4 ; i++ ) {
|
||||
reset_buffer();
|
||||
test_signed_store (store_this_s[i], 4*i, vsbuffer);
|
||||
memcmpresult = memcmp(rawbuffer,&signed_expected[i],sizeof(vector int));
|
||||
if (memcmpresult) {
|
||||
printf("mismatch signed buffer, i %d (memcmpresult:%d) \n",i,memcmpresult);
|
||||
mismatch++;
|
||||
if (verbose) {
|
||||
printf("results: ");
|
||||
PRINT_VEC(vsbuffer);
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 4 ; i++ ) {
|
||||
reset_buffer();
|
||||
test_unsigned_store (store_this_us[i], 4*i, vubuffer);
|
||||
memcmpresult = memcmp(rawbuffer,&unsigned_expected[i],sizeof(vector int));
|
||||
if (memcmpresult) {
|
||||
printf("mismatch unsigned buffer, i %d (memcmpresult:%d) \n",i,memcmpresult);
|
||||
mismatch++;
|
||||
if (verbose) {
|
||||
printf("results :");
|
||||
PRINT_VEC(vubuffer);
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (mismatch) {
|
||||
printf("%d mismatches. \n",mismatch);
|
||||
abort();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -0,0 +1,126 @@
|
|||
/*
|
||||
Test of vec_xst_trunc (truncate and store rightmost vector element) */
|
||||
|
||||
/* { dg-do compile {target power10_ok} } */
|
||||
/* { dg-do run {target power10_hw} } */
|
||||
|
||||
/* Deliberately set optization to zero for this test to confirm
|
||||
the stxvr*x instruction is generated. At higher optimization levels
|
||||
the instruction we are looking for is sometimes replaced by other
|
||||
store instructions. */
|
||||
/* { dg-options "-mdejagnu-cpu=power10 -O0" } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\mstxvrdx\M} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\mstwx\M} 0 } } */
|
||||
|
||||
#include <altivec.h>
|
||||
#include <stdio.h>
|
||||
#include <inttypes.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
vector signed __int128 store_this_s[4] = {
|
||||
{ (__int128) 0x7000000000000000 << 64 | (__int128) 0x123456789abcdef8ULL},
|
||||
{ (__int128) 0x8000000000000000 << 64 | (__int128) 0xfedcba9876543217ULL},
|
||||
{ (__int128) 0x1000000000000000 << 64 | (__int128) 0xccccccccccccccccULL},
|
||||
{ (__int128) 0xf000000000000000 << 64 | (__int128) 0xaaaaaaaaaaaaaaaaULL}
|
||||
};
|
||||
|
||||
vector unsigned __int128 store_this_us[4] = {
|
||||
{ (unsigned __int128) 0x7000000000000000 << 64 | (unsigned __int128) 0x123456789abcdef8ULL},
|
||||
{ (unsigned __int128) 0x8000000000000000 << 64 | (unsigned __int128) 0xfedcba9876543217ULL},
|
||||
{ (unsigned __int128) 0x1000000000000000 << 64 | (unsigned __int128) 0xeeeeeeeeeeeeeeeeULL},
|
||||
{ (unsigned __int128) 0xf000000000000000 << 64 | (unsigned __int128) 0x5555555555555555ULL}
|
||||
};
|
||||
|
||||
#define NUM_VEC_ELEMS 2
|
||||
|
||||
vector signed long long signed_expected[5] = {
|
||||
{ 0x123456789abcdef8, 0x0},
|
||||
{ 0x7654321700000000, 0xfedcba98},
|
||||
{ 0x0000000000000000, 0xcccccccccccccccc},
|
||||
{ 0x0000000000000000, 0xaaaaaaaa00000000} /*note that some data written into the next word */
|
||||
};
|
||||
vector unsigned long long unsigned_expected[5] = {
|
||||
{ 0x123456789abcdef8, 0x0},
|
||||
{ 0x7654321700000000, 0xfedcba98},
|
||||
{ 0x0000000000000000, 0xeeeeeeeeeeeeeeee},
|
||||
{ 0x0000000000000000, 0x5555555500000000}
|
||||
};
|
||||
|
||||
unsigned long long rawbuffer[32];
|
||||
signed long long * vsbuffer = (long long *)rawbuffer;
|
||||
unsigned long long * vubuffer = (unsigned long long *)rawbuffer;
|
||||
|
||||
void reset_buffer() {
|
||||
memset (&rawbuffer,0,sizeof(rawbuffer));
|
||||
}
|
||||
|
||||
#define PRINT_VEC(V) \
|
||||
for (int j=0;j<NUM_VEC_ELEMS;j++) { printf ("(0x%lx) ", V[j] ); }
|
||||
|
||||
void test_signed_store(vector signed __int128 myvec, int offset, signed long long * store_data ) {
|
||||
vec_xst_trunc (myvec, offset, store_data);
|
||||
}
|
||||
|
||||
void test_unsigned_store(vector unsigned __int128 myvec, int offset, unsigned long long * store_data ) {
|
||||
vec_xst_trunc (myvec, offset, store_data);
|
||||
}
|
||||
|
||||
int main (int argc, char *argv [])
|
||||
{
|
||||
int i;
|
||||
int memcmpresult;
|
||||
int mismatch=0;
|
||||
int verbose=0;
|
||||
|
||||
#if VERBOSE
|
||||
verbose=1;
|
||||
printf("%s %s\n", __DATE__, __TIME__);
|
||||
#endif
|
||||
|
||||
if (verbose) {
|
||||
printf("expected results from signed tests:\n");
|
||||
for (i = 0; i < 4 ; i++ ) {
|
||||
PRINT_VEC(signed_expected[i]);
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 4 ; i++ ) {
|
||||
reset_buffer();
|
||||
test_signed_store (store_this_s[i], 4*i, vsbuffer);
|
||||
memcmpresult = memcmp(rawbuffer,&signed_expected[i],sizeof(vector long long));
|
||||
if (memcmpresult) {
|
||||
printf("mismatch signed buffer, i %d (memcmpresult:%d) \n",i,memcmpresult);
|
||||
mismatch++;
|
||||
if (verbose) {
|
||||
printf("results: ");
|
||||
PRINT_VEC(vsbuffer);
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 4 ; i++ ) {
|
||||
reset_buffer();
|
||||
test_unsigned_store (store_this_us[i], 4*i, vubuffer);
|
||||
memcmpresult = memcmp(rawbuffer,&unsigned_expected[i],sizeof(vector long long));
|
||||
if (memcmpresult) {
|
||||
printf("mismatch unsigned buffer, i %d (memcmpresult:%d) \n",i,memcmpresult);
|
||||
mismatch++;
|
||||
if (verbose) {
|
||||
printf("results :");
|
||||
PRINT_VEC(vubuffer);
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (mismatch) {
|
||||
printf("%d mismatches. \n",mismatch);
|
||||
abort();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -0,0 +1,126 @@
|
|||
/*
|
||||
Test of vec_xst_trunc (truncate and store rightmost vector element) */
|
||||
|
||||
/* { dg-do compile {target power10_ok} } */
|
||||
/* { dg-do run {target power10_hw} } */
|
||||
|
||||
/* Deliberately set optization to zero for this test to confirm
|
||||
the stxvr*x instruction is generated. At higher optimization levels
|
||||
the instruction we are looking for is sometimes replaced by other
|
||||
store instructions. */
|
||||
/* { dg-options "-mdejagnu-cpu=power10 -O0" } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\mstxvrhx\M} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\msthx\M} 0 } } */
|
||||
|
||||
#include <altivec.h>
|
||||
#include <stdio.h>
|
||||
#include <inttypes.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
vector signed __int128 store_this_s[4] = {
|
||||
{ (__int128) 0x7000000000000000 << 64 | (__int128) 0x123456789abcdef8ULL},
|
||||
{ (__int128) 0x8000000000000000 << 64 | (__int128) 0xfedcba9876543217ULL},
|
||||
{ (__int128) 0x1000000000000000 << 64 | (__int128) 0xccccccccccccccccULL},
|
||||
{ (__int128) 0xf000000000000000 << 64 | (__int128) 0xaaaaaaaaaaaaaaaaULL}
|
||||
};
|
||||
|
||||
vector unsigned __int128 store_this_us[4] = {
|
||||
{ (unsigned __int128) 0x7000000000000000 << 64 | (unsigned __int128) 0x123456789abcdef8ULL},
|
||||
{ (unsigned __int128) 0x8000000000000000 << 64 | (unsigned __int128) 0xfedcba9876543217ULL},
|
||||
{ (unsigned __int128) 0x1000000000000000 << 64 | (unsigned __int128) 0xeeeeeeeeeeeeeeeeULL},
|
||||
{ (unsigned __int128) 0xf000000000000000 << 64 | (unsigned __int128) 0x5555555555555555ULL}
|
||||
};
|
||||
|
||||
#define NUM_VEC_ELEMS 8
|
||||
|
||||
vector signed short signed_expected[4] = {
|
||||
{0xdef8, 0x0, 0x0 , 0x0, 0x0 , 0x0, 0x0 , 0x0},
|
||||
{0x0 , 0x0, 0x3217, 0x0, 0x0 , 0x0, 0x0 , 0x0},
|
||||
{0x0 , 0x0, 0x0 , 0x0, 0xcccc, 0x0, 0x0 , 0x0},
|
||||
{0x0 , 0x0, 0x0 , 0x0, 0x0 , 0x0, 0xaaaa, 0x0}
|
||||
};
|
||||
vector unsigned short unsigned_expected[4] = {
|
||||
{0xdef8, 0x0, 0x0 , 0x0, 0x0 , 0x0, 0x0 , 0x0},
|
||||
{0x0 , 0x0, 0x3217, 0x0, 0x0 , 0x0, 0x0 , 0x0},
|
||||
{0x0 , 0x0, 0x0 , 0x0, 0xeeee, 0x0, 0x0 , 0x0},
|
||||
{0x0 , 0x0, 0x0 , 0x0, 0x0 , 0x0, 0x5555, 0x0}
|
||||
};
|
||||
|
||||
unsigned long long rawbuffer[32];
|
||||
signed short * vsbuffer = (short *)rawbuffer;
|
||||
unsigned short * vubuffer = (unsigned short *)rawbuffer;
|
||||
|
||||
void reset_buffer() {
|
||||
memset (&rawbuffer,0,sizeof(rawbuffer));
|
||||
}
|
||||
|
||||
#define PRINT_VEC(V) \
|
||||
for (int j=0;j<NUM_VEC_ELEMS;j++) { printf ("(0x%lx) ", V[j] ); }
|
||||
|
||||
void test_signed_store(vector signed __int128 myvec, int offset, signed short * store_data ) {
|
||||
vec_xst_trunc (myvec, offset, store_data);
|
||||
}
|
||||
|
||||
void test_unsigned_store(vector unsigned __int128 myvec, int offset, unsigned short * store_data ) {
|
||||
vec_xst_trunc (myvec, offset, store_data);
|
||||
}
|
||||
|
||||
int main (int argc, char *argv [])
|
||||
{
|
||||
int i;
|
||||
int memcmpresult;
|
||||
int mismatch=0;
|
||||
int verbose=0;
|
||||
|
||||
#if VERBOSE
|
||||
verbose=1;
|
||||
printf("%s %s\n", __DATE__, __TIME__);
|
||||
#endif
|
||||
|
||||
if (verbose) {
|
||||
printf("expected results from signed tests:\n");
|
||||
for (i = 0; i < 4 ; i++ ) {
|
||||
PRINT_VEC(signed_expected[i]);
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 4 ; i++ ) {
|
||||
reset_buffer();
|
||||
test_signed_store (store_this_s[i], 4*i, vsbuffer);
|
||||
memcmpresult = memcmp(rawbuffer,&signed_expected[i],sizeof(vector short));
|
||||
if (memcmpresult) {
|
||||
printf("mismatch signed buffer, i %d (memcmpresult:%d) \n",i,memcmpresult);
|
||||
mismatch++;
|
||||
if (verbose) {
|
||||
printf("results: ");
|
||||
PRINT_VEC(vsbuffer);
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 4 ; i++ ) {
|
||||
reset_buffer();
|
||||
test_unsigned_store (store_this_us[i], 4*i, vubuffer);
|
||||
memcmpresult = memcmp(rawbuffer,&unsigned_expected[i],sizeof(vector short));
|
||||
if (memcmpresult) {
|
||||
printf("mismatch unsigned buffer, i %d (memcmpresult:%d) \n",i,memcmpresult);
|
||||
mismatch++;
|
||||
if (verbose) {
|
||||
printf("results :");
|
||||
PRINT_VEC(vubuffer);
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (mismatch) {
|
||||
printf("%d mismatches. \n",mismatch);
|
||||
abort();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
Loading…
Add table
Reference in a new issue