LoongArch: Add some vector pack/unpack patterns

gcc/ChangeLog:

	* config/loongarch/lasx.md (vec_unpacks_lo_<mode>): Redefine.
	(vec_unpacku_lo_<mode>): Ditto.
	(lasx_vext2xv_h<u>_b<u>): Replaced by vec_unpack<su>_lo_v32qi.
	(vec_unpack<su>_lo_v32qi): New insn.
	(lasx_vext2xv_w<u>_h<u>): Replaced by vec_unpack<su>_lo_v16hi.
	(vec_unpack<su>_lo_v16qi_internal): New insn, for 128 bits.
	(vec_unpack<su>_lo_v16hi): New insn.
	(lasx_vext2xv_d<u>_w<u>): Replaced by vec_unpack<su>_lo_v8si.
	(vec_unpack<su>_lo_v8hi_internal): New insn, for 128 bits.
	(vec_unpack<su>_lo_v8si): New insn.
	(vec_unpack<su>_lo_v4si_internal): New insn, for 128 bits.
	(vec_packs_float_v4di): New expander.
	(vec_pack_sfix_trunc_v4df): Ditto.
	(vec_unpacks_float_hi_v8si): Ditto.
	(vec_unpacks_float_lo_v8si): Ditto.
	(vec_unpack_sfix_trunc_hi_v8sf): Ditto.
	(vec_unpack_sfix_trunc_lo_v8sf): Ditto.
	* config/loongarch/loongarch-builtins.cc
	(CODE_FOR_lsx_vftintrz_w_d): Rename.
	(CODE_FOR_lsx_vftintrzh_l_s): Ditto.
	(CODE_FOR_lsx_vftintrzl_l_s): Ditto.
	(CODE_FOR_lsx_vffint_s_l): Ditto.
	(CODE_FOR_lsx_vffinth_d_w): Ditto.
	(CODE_FOR_lsx_vffintl_d_w): Ditto.
	(CODE_FOR_lsx_vexth_h_b): Ditto.
	(CODE_FOR_lsx_vexth_w_h): Ditto.
	(CODE_FOR_lsx_vexth_d_w): Ditto.
	(CODE_FOR_lsx_vexth_hu_bu): Ditto.
	(CODE_FOR_lsx_vexth_wu_hu): Ditto.
	(CODE_FOR_lsx_vexth_du_wu): Ditto.
	(CODE_FOR_lsx_vfcvth_d_s): Ditto.
	(CODE_FOR_lsx_vfcvtl_d_s): Ditto.
	(CODE_FOR_lasx_vext2xv_h_b): Ditto.
	(CODE_FOR_lasx_vext2xv_w_h): Ditto.
	(CODE_FOR_lasx_vext2xv_d_w): Ditto.
	(CODE_FOR_lasx_vext2xv_hu_bu): Ditto.
	(CODE_FOR_lasx_vext2xv_wu_hu): Ditto.
	(CODE_FOR_lasx_vext2xv_du_wu): Ditto.
	(loongarch_expand_builtin_insn): Swap source operands in
	CODE_FOR_lsx_vftintrz_w_d and CODE_FOR_lsx_vffint_s_l.
	* config/loongarch/loongarch-protos.h
	(loongarch_expand_vec_unpack): Remove useless parameter high_p.
	* config/loongarch/loongarch.cc (loongarch_expand_vec_unpack):
	Rewrite.
	* config/loongarch/lsx.md (vec_unpacks_hi_v4sf): Redefine.
	(vec_unpacks_lo_v4sf): Ditto.
	(vec_unpacks_hi_<mode>): Ditto.
	(vec_unpacku_hi_<mode>): Ditto.
	(lsx_vfcvth_d_s): Replaced by vec_unpacks_hi_v4sf.
	(lsx_vfcvtl_d_s): Replaced by vec_unpacks_lo_v4sf.
	(lsx_vffint_s_l): Replaced by vec_packs_float_v2di.
	(vec_packs_float_v2di): New insn.
	(lsx_vftintrz_w_d): Replaced by vec_pack_sfix_trunc_v2df.
	(vec_pack_sfix_trunc_v2df): New insn.
	(lsx_vffinth_d_w): Replaced by vec_unpacks_float_hi_v4si.
	(vec_unpacks_float_hi_v4si): New insn.
	(lsx_vffintl_d_w): Replaced by vec_unpacks_float_lo_v4si.
	(vec_unpacks_float_lo_v4si): New insn.
	(lsx_vftintrzh_l_s): Replaced by vec_unpack_sfix_trunc_hi_v4sf.
	(vec_unpack_sfix_trunc_hi_v4sf): New insn.
	(lsx_vftintrzl_l_s): Replaced by vec_unpack_sfix_trunc_lo_v4sf.
	(vec_unpack_sfix_trunc_lo_v4sf): New insn.
	(lsx_vexth_h<u>_b<u>): Replaced by vec_unpack<su>_hi_v16qi.
	(vec_unpack<su>_hi_v16qi): New insn.
	(lsx_vexth_w<u>_h<u>): Replaced by vec_unpack<su>_hi_v8hi.
	(vec_unpack<su>_hi_v8hi): New insn.
	(lsx_vexth_d<u>_w<u>): Replaced by vec_unpack<su>_hi_v4si.
	(vec_unpack<su>_hi_v4si): New insn.

gcc/testsuite/ChangeLog:

	* gcc.target/loongarch/vec_pack_unpack_128.c: New test.
	* gcc.target/loongarch/vec_pack_unpack_256.c: New test.
This commit is contained in:
Guo Jie 2024-12-30 10:38:51 +08:00 committed by Lulu Cheng
parent 66b6e578d9
commit 66a88e0f17
7 changed files with 436 additions and 135 deletions

View file

@ -463,17 +463,7 @@
(match_operand:ILASX_WHB 1 "register_operand")]
"ISA_HAS_LASX"
{
loongarch_expand_vec_unpack (operands, false/*unsigned_p*/,
true/*high_p*/);
DONE;
})
(define_expand "vec_unpacks_lo_<mode>"
[(match_operand:<VDMODE256> 0 "register_operand")
(match_operand:ILASX_WHB 1 "register_operand")]
"ISA_HAS_LASX"
{
loongarch_expand_vec_unpack (operands, false/*unsigned_p*/, false/*high_p*/);
loongarch_expand_vec_unpack (operands, false/*unsigned_p*/);
DONE;
})
@ -482,16 +472,7 @@
(match_operand:ILASX_WHB 1 "register_operand")]
"ISA_HAS_LASX"
{
loongarch_expand_vec_unpack (operands, true/*unsigned_p*/, true/*high_p*/);
DONE;
})
(define_expand "vec_unpacku_lo_<mode>"
[(match_operand:<VDMODE256> 0 "register_operand")
(match_operand:ILASX_WHB 1 "register_operand")]
"ISA_HAS_LASX"
{
loongarch_expand_vec_unpack (operands, true/*unsigned_p*/, false/*high_p*/);
loongarch_expand_vec_unpack (operands, true/*unsigned_p*/);
DONE;
})
@ -2537,7 +2518,7 @@
(set_attr "mode" "<MODE>")])
;; loongson-asx.
(define_insn "lasx_vext2xv_h<u>_b<u>"
(define_insn "vec_unpack<su>_lo_v32qi"
[(set (match_operand:V16HI 0 "register_operand" "=f")
(any_extend:V16HI
(vec_select:V16QI
@ -2555,7 +2536,21 @@
[(set_attr "type" "simd_shift")
(set_attr "mode" "V16HI")])
(define_insn "lasx_vext2xv_w<u>_h<u>"
(define_insn "vec_unpack<su>_lo_v16qi_internal"
[(set (match_operand:V8HI 0 "register_operand" "=f")
(any_extend:V8HI
(vec_select:V8QI
(match_operand:V16QI 1 "register_operand" "f")
(parallel [(const_int 0) (const_int 1)
(const_int 2) (const_int 3)
(const_int 4) (const_int 5)
(const_int 6) (const_int 7)]))))]
"ISA_HAS_LASX"
"vext2xv.h<u>.b<u>\t%u0,%u1"
[(set_attr "type" "simd_shift")
(set_attr "mode" "V8HI")])
(define_insn "vec_unpack<su>_lo_v16hi"
[(set (match_operand:V8SI 0 "register_operand" "=f")
(any_extend:V8SI
(vec_select:V8HI
@ -2569,7 +2564,19 @@
[(set_attr "type" "simd_shift")
(set_attr "mode" "V8SI")])
(define_insn "lasx_vext2xv_d<u>_w<u>"
(define_insn "vec_unpack<su>_lo_v8hi_internal"
[(set (match_operand:V4SI 0 "register_operand" "=f")
(any_extend:V4SI
(vec_select:V4HI
(match_operand:V8HI 1 "register_operand" "f")
(parallel [(const_int 0) (const_int 1)
(const_int 2) (const_int 3)]))))]
"ISA_HAS_LASX"
"vext2xv.w<u>.h<u>\t%u0,%u1"
[(set_attr "type" "simd_shift")
(set_attr "mode" "V4SI")])
(define_insn "vec_unpack<su>_lo_v8si"
[(set (match_operand:V4DI 0 "register_operand" "=f")
(any_extend:V4DI
(vec_select:V4SI
@ -2581,6 +2588,17 @@
[(set_attr "type" "simd_shift")
(set_attr "mode" "V4DI")])
(define_insn "vec_unpack<su>_lo_v4si_internal"
[(set (match_operand:V2DI 0 "register_operand" "=f")
(any_extend:V2DI
(vec_select:V2SI
(match_operand:V4SI 1 "register_operand" "f")
(parallel [(const_int 0) (const_int 1)]))))]
"ISA_HAS_LASX"
"vext2xv.d<u>.w<u>\t%u0,%u1"
[(set_attr "type" "simd_shift")
(set_attr "mode" "V2DI")])
(define_insn "lasx_vext2xv_w<u>_b<u>"
[(set (match_operand:V8SI 0 "register_operand" "=f")
(any_extend:V8SI
@ -2972,6 +2990,19 @@
[(set_attr "type" "simd_int_arith")
(set_attr "mode" "V4DF")])
(define_expand "vec_packs_float_v4di"
[(match_operand:V8SF 0 "register_operand")
(match_operand:V4DI 1 "register_operand")
(match_operand:V4DI 2 "register_operand")]
"ISA_HAS_LASX"
{
rtx tmp;
tmp = gen_reg_rtx (V8SFmode);
emit_insn (gen_lasx_xvffint_s_l (tmp, operands[2], operands[1]));
emit_insn (gen_lasx_xvpermi_d_v8sf (operands[0], tmp, GEN_INT (0xd8)));
DONE;
})
(define_insn "lasx_xvffint_s_l"
[(set (match_operand:V8SF 0 "register_operand" "=f")
(unspec:V8SF [(match_operand:V4DI 1 "register_operand" "f")
@ -2982,6 +3013,19 @@
[(set_attr "type" "simd_int_arith")
(set_attr "mode" "V4DI")])
(define_expand "vec_pack_sfix_trunc_v4df"
[(match_operand:V8SI 0 "register_operand")
(match_operand:V4DF 1 "register_operand")
(match_operand:V4DF 2 "register_operand")]
"ISA_HAS_LASX"
{
rtx tmp;
tmp = gen_reg_rtx (V8SImode);
emit_insn (gen_lasx_xvftintrz_w_d (tmp, operands[2], operands[1]));
emit_insn (gen_lasx_xvpermi_d_v8si (operands[0], tmp, GEN_INT (0xd8)));
DONE;
})
(define_insn "lasx_xvftintrz_w_d"
[(set (match_operand:V8SI 0 "register_operand" "=f")
(unspec:V8SI [(match_operand:V4DF 1 "register_operand" "f")
@ -3040,6 +3084,30 @@
[(set_attr "type" "simd_shift")
(set_attr "mode" "V8SF")])
(define_expand "vec_unpacks_float_hi_v8si"
[(match_operand:V4DF 0 "register_operand")
(match_operand:V8SI 1 "register_operand")]
"ISA_HAS_LASX"
{
rtx tmp;
tmp = gen_reg_rtx (V8SImode);
emit_insn (gen_lasx_xvpermi_d_v8si (tmp, operands[1], GEN_INT (0xe8)));
emit_insn (gen_lasx_xvffinth_d_w (operands[0], tmp));
DONE;
})
(define_expand "vec_unpacks_float_lo_v8si"
[(match_operand:V4DF 0 "register_operand")
(match_operand:V8SI 1 "register_operand")]
"ISA_HAS_LASX"
{
rtx tmp;
tmp = gen_reg_rtx (V4DImode);
emit_insn (gen_vec_unpacks_lo_v8si (tmp, operands[1]));
emit_insn (gen_floatv4div4df2 (operands[0], tmp));
DONE;
})
(define_insn "lasx_xvffinth_d_w"
[(set (match_operand:V4DF 0 "register_operand" "=f")
(unspec:V4DF [(match_operand:V8SI 1 "register_operand" "f")]
@ -3058,6 +3126,18 @@
[(set_attr "type" "simd_shift")
(set_attr "mode" "V8SI")])
(define_expand "vec_unpack_sfix_trunc_hi_v8sf"
[(match_operand:V4DI 0 "register_operand")
(match_operand:V8SF 1 "register_operand")]
"ISA_HAS_LASX"
{
rtx tmp;
tmp = gen_reg_rtx (V8SFmode);
emit_insn (gen_lasx_xvpermi_d_v8sf (tmp, operands[1], GEN_INT (0xe8)));
emit_insn (gen_lasx_xvftintrzh_l_s (operands[0], tmp));
DONE;
})
(define_insn "lasx_xvftintrzh_l_s"
[(set (match_operand:V4DI 0 "register_operand" "=f")
(unspec:V4DI [(match_operand:V8SF 1 "register_operand" "f")]
@ -3067,6 +3147,18 @@
[(set_attr "type" "simd_shift")
(set_attr "mode" "V8SF")])
(define_expand "vec_unpack_sfix_trunc_lo_v8sf"
[(match_operand:V4DI 0 "register_operand")
(match_operand:V8SF 1 "register_operand")]
"ISA_HAS_LASX"
{
rtx tmp;
tmp = gen_reg_rtx (V8SFmode);
emit_insn (gen_lasx_xvpermi_d_v8sf (tmp, operands[1], GEN_INT (0xd4)));
emit_insn (gen_lasx_xvftintrzl_l_s (operands[0], tmp));
DONE;
})
(define_insn "lasx_xvftintrzl_l_s"
[(set (match_operand:V4DI 0 "register_operand" "=f")
(unspec:V4DI [(match_operand:V8SF 1 "register_operand" "f")]

View file

@ -282,10 +282,24 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && ISA_HAS_FRECIPE)
#define CODE_FOR_lsx_vftintrz_l_d CODE_FOR_fix_truncv2dfv2di2
#define CODE_FOR_lsx_vftintrz_wu_s CODE_FOR_fixuns_truncv4sfv4si2
#define CODE_FOR_lsx_vftintrz_lu_d CODE_FOR_fixuns_truncv2dfv2di2
#define CODE_FOR_lsx_vftintrz_w_d CODE_FOR_vec_pack_sfix_trunc_v2df
#define CODE_FOR_lsx_vftintrzh_l_s CODE_FOR_vec_unpack_sfix_trunc_hi_v4sf
#define CODE_FOR_lsx_vftintrzl_l_s CODE_FOR_vec_unpack_sfix_trunc_lo_v4sf
#define CODE_FOR_lsx_vffint_s_w CODE_FOR_floatv4siv4sf2
#define CODE_FOR_lsx_vffint_d_l CODE_FOR_floatv2div2df2
#define CODE_FOR_lsx_vffint_s_wu CODE_FOR_floatunsv4siv4sf2
#define CODE_FOR_lsx_vffint_d_lu CODE_FOR_floatunsv2div2df2
#define CODE_FOR_lsx_vffint_s_l CODE_FOR_vec_packs_float_v2di
#define CODE_FOR_lsx_vffinth_d_w CODE_FOR_vec_unpacks_float_hi_v4si
#define CODE_FOR_lsx_vffintl_d_w CODE_FOR_vec_unpacks_float_lo_v4si
#define CODE_FOR_lsx_vexth_h_b CODE_FOR_vec_unpacks_hi_v16qi
#define CODE_FOR_lsx_vexth_w_h CODE_FOR_vec_unpacks_hi_v8hi
#define CODE_FOR_lsx_vexth_d_w CODE_FOR_vec_unpacks_hi_v4si
#define CODE_FOR_lsx_vexth_hu_bu CODE_FOR_vec_unpacku_hi_v16qi
#define CODE_FOR_lsx_vexth_wu_hu CODE_FOR_vec_unpacku_hi_v8hi
#define CODE_FOR_lsx_vexth_du_wu CODE_FOR_vec_unpacku_hi_v4si
#define CODE_FOR_lsx_vfcvth_d_s CODE_FOR_vec_unpacks_hi_v4sf
#define CODE_FOR_lsx_vfcvtl_d_s CODE_FOR_vec_unpacks_lo_v4sf
#define CODE_FOR_lsx_vfsub_s CODE_FOR_subv4sf3
#define CODE_FOR_lsx_vfsub_d CODE_FOR_subv2df3
#define CODE_FOR_lsx_vfmul_s CODE_FOR_mulv4sf3
@ -563,6 +577,12 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && ISA_HAS_FRECIPE)
#define CODE_FOR_lasx_xvffint_d_l CODE_FOR_floatv4div4df2
#define CODE_FOR_lasx_xvffint_s_wu CODE_FOR_floatunsv8siv8sf2
#define CODE_FOR_lasx_xvffint_d_lu CODE_FOR_floatunsv4div4df2
#define CODE_FOR_lasx_vext2xv_h_b CODE_FOR_vec_unpacks_lo_v32qi
#define CODE_FOR_lasx_vext2xv_w_h CODE_FOR_vec_unpacks_lo_v16hi
#define CODE_FOR_lasx_vext2xv_d_w CODE_FOR_vec_unpacks_lo_v8si
#define CODE_FOR_lasx_vext2xv_hu_bu CODE_FOR_vec_unpacku_lo_v32qi
#define CODE_FOR_lasx_vext2xv_wu_hu CODE_FOR_vec_unpacku_lo_v16hi
#define CODE_FOR_lasx_vext2xv_du_wu CODE_FOR_vec_unpacku_lo_v8si
#define CODE_FOR_lasx_xvfsub_s CODE_FOR_subv8sf3
#define CODE_FOR_lasx_xvfsub_d CODE_FOR_subv4df3
#define CODE_FOR_lasx_xvfmul_s CODE_FOR_mulv8sf3
@ -2757,6 +2777,8 @@ loongarch_expand_builtin_insn (enum insn_code icode, unsigned int nops,
case CODE_FOR_lsx_vpickod_h:
case CODE_FOR_lsx_vpickod_w:
case CODE_FOR_lsx_vandn_v:
case CODE_FOR_lsx_vftintrz_w_d:
case CODE_FOR_lsx_vffint_s_l:
case CODE_FOR_lasx_xvilvh_b:
case CODE_FOR_lasx_xvilvh_h:
case CODE_FOR_lasx_xvilvh_w:

View file

@ -171,7 +171,7 @@ extern void loongarch_expand_atomic_qihi (union loongarch_gen_fn_ptrs,
extern void loongarch_expand_vector_group_init (rtx, rtx);
extern void loongarch_expand_vector_init (rtx, rtx);
extern void loongarch_expand_vec_unpack (rtx op[2], bool, bool);
extern void loongarch_expand_vec_unpack (rtx op[2], bool);
extern void loongarch_expand_vec_perm (rtx, rtx, rtx, rtx);
extern void loongarch_expand_vec_perm_1 (rtx[]);
extern void loongarch_expand_vector_extract (rtx, rtx, int);

View file

@ -9759,7 +9759,7 @@ loongarch_expand_vector_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
/* Expand an integral vector unpack operation. */
void
loongarch_expand_vec_unpack (rtx operands[2], bool unsigned_p, bool high_p)
loongarch_expand_vec_unpack (rtx operands[2], bool unsigned_p)
{
machine_mode imode = GET_MODE (operands[1]);
rtx (*unpack) (rtx, rtx, rtx);
@ -9768,31 +9768,32 @@ loongarch_expand_vec_unpack (rtx operands[2], bool unsigned_p, bool high_p)
rtx (*swap_hi_lo) (rtx, rtx, rtx, rtx);
rtx tmp, dest;
/* In LASX, only vec_unpacks_hi_<mode> requires expander. */
if (ISA_HAS_LASX && GET_MODE_SIZE (imode) == 32)
{
switch (imode)
{
case E_V8SImode:
if (unsigned_p)
extend = gen_lasx_vext2xv_du_wu;
extend = gen_vec_unpacku_lo_v8si;
else
extend = gen_lasx_vext2xv_d_w;
extend = gen_vec_unpacks_lo_v8si;
swap_hi_lo = gen_lasx_xvpermi_q_v8si;
break;
case E_V16HImode:
if (unsigned_p)
extend = gen_lasx_vext2xv_wu_hu;
extend = gen_vec_unpacku_lo_v16hi;
else
extend = gen_lasx_vext2xv_w_h;
extend = gen_vec_unpacks_lo_v16hi;
swap_hi_lo = gen_lasx_xvpermi_q_v16hi;
break;
case E_V32QImode:
if (unsigned_p)
extend = gen_lasx_vext2xv_hu_bu;
extend = gen_vec_unpacku_lo_v32qi;
else
extend = gen_lasx_vext2xv_h_b;
extend = gen_vec_unpacks_lo_v32qi;
swap_hi_lo = gen_lasx_xvpermi_q_v32qi;
break;
@ -9801,46 +9802,28 @@ loongarch_expand_vec_unpack (rtx operands[2], bool unsigned_p, bool high_p)
break;
}
if (high_p)
{
tmp = gen_reg_rtx (imode);
emit_insn (swap_hi_lo (tmp, tmp, operands[1], const1_rtx));
emit_insn (extend (operands[0], tmp));
return;
}
emit_insn (extend (operands[0], operands[1]));
tmp = gen_reg_rtx (imode);
emit_insn (swap_hi_lo (tmp, tmp, operands[1], const1_rtx));
emit_insn (extend (operands[0], tmp));
return;
}
else if (ISA_HAS_LSX)
/* In LSX, only vec_unpacks_lo_<mode> requires expander. */
else if (ISA_HAS_LSX && !ISA_HAS_LASX)
{
switch (imode)
{
case E_V4SImode:
if (high_p != 0)
unpack = gen_lsx_vilvh_w;
else
unpack = gen_lsx_vilvl_w;
unpack = gen_lsx_vilvl_w;
cmpFunc = gen_lsx_vslt_w;
break;
case E_V8HImode:
if (high_p != 0)
unpack = gen_lsx_vilvh_h;
else
unpack = gen_lsx_vilvl_h;
unpack = gen_lsx_vilvl_h;
cmpFunc = gen_lsx_vslt_h;
break;
case E_V16QImode:
if (high_p != 0)
unpack = gen_lsx_vilvh_b;
else
unpack = gen_lsx_vilvl_b;
unpack = gen_lsx_vilvl_b;
cmpFunc = gen_lsx_vslt_b;
break;

View file

@ -73,16 +73,10 @@
UNSPEC_LSX_VMSKLTZ
UNSPEC_LSX_VSIGNCOV
UNSPEC_LSX_VFTINT_W_D
UNSPEC_LSX_VFFINT_S_L
UNSPEC_LSX_VFTINTRZ_W_D
UNSPEC_LSX_VFTINTRP_W_D
UNSPEC_LSX_VFTINTRM_W_D
UNSPEC_LSX_VFTINTRNE_W_D
UNSPEC_LSX_VFTINTL_L_S
UNSPEC_LSX_VFFINTH_D_W
UNSPEC_LSX_VFFINTL_D_W
UNSPEC_LSX_VFTINTRZL_L_S
UNSPEC_LSX_VFTINTRZH_L_S
UNSPEC_LSX_VFTINTRPL_L_S
UNSPEC_LSX_VFTINTRPH_L_S
UNSPEC_LSX_VFTINTRMH_L_S
@ -336,54 +330,15 @@
[(set_attr "type" "simd_permute")
(set_attr "mode" "<MODE>")])
(define_expand "vec_unpacks_hi_v4sf"
[(set (match_operand:V2DF 0 "register_operand" "=f")
(float_extend:V2DF
(vec_select:V2SF
(match_operand:V4SF 1 "register_operand" "f")
(match_dup 2))))]
"ISA_HAS_LSX"
{
operands[2] = loongarch_lsx_vec_parallel_const_half (V4SFmode,
true/*high_p*/);
})
(define_expand "vec_unpacks_lo_v4sf"
[(set (match_operand:V2DF 0 "register_operand" "=f")
(float_extend:V2DF
(vec_select:V2SF
(match_operand:V4SF 1 "register_operand" "f")
(match_dup 2))))]
"ISA_HAS_LSX"
{
operands[2] = loongarch_lsx_vec_parallel_const_half (V4SFmode,
false/*high_p*/);
})
(define_expand "vec_unpacks_hi_<mode>"
[(match_operand:<VDMODE> 0 "register_operand")
(match_operand:ILSX_WHB 1 "register_operand")]
"ISA_HAS_LSX"
{
loongarch_expand_vec_unpack (operands, false/*unsigned_p*/, true/*high_p*/);
DONE;
})
(define_expand "vec_unpacks_lo_<mode>"
[(match_operand:<VDMODE> 0 "register_operand")
(match_operand:ILSX_WHB 1 "register_operand")]
"ISA_HAS_LSX"
{
loongarch_expand_vec_unpack (operands, false/*unsigned_p*/, false/*high_p*/);
DONE;
})
(define_expand "vec_unpacku_hi_<mode>"
[(match_operand:<VDMODE> 0 "register_operand")
(match_operand:ILSX_WHB 1 "register_operand")]
"ISA_HAS_LSX"
{
loongarch_expand_vec_unpack (operands, true/*unsigned_p*/, true/*high_p*/);
if (ISA_HAS_LASX)
emit_insn (gen_vec_unpacks_lo_<mode>_internal (operands[0], operands[1]));
else
loongarch_expand_vec_unpack (operands, false/*unsigned_p*/);
DONE;
})
@ -392,7 +347,10 @@
(match_operand:ILSX_WHB 1 "register_operand")]
"ISA_HAS_LSX"
{
loongarch_expand_vec_unpack (operands, true/*unsigned_p*/, false/*high_p*/);
if (ISA_HAS_LASX)
emit_insn (gen_vec_unpacku_lo_<mode>_internal (operands[0], operands[1]));
else
loongarch_expand_vec_unpack (operands, true/*unsigned_p*/);
DONE;
})
@ -2093,7 +2051,7 @@
[(set_attr "type" "simd_fcvt")
(set_attr "mode" "V4SF")])
(define_insn "lsx_vfcvth_d_s"
(define_insn "vec_unpacks_hi_v4sf"
[(set (match_operand:V2DF 0 "register_operand" "=f")
(float_extend:V2DF
(vec_select:V2SF
@ -2113,7 +2071,7 @@
[(set_attr "type" "simd_fcvt")
(set_attr "mode" "V4SF")])
(define_insn "lsx_vfcvtl_d_s"
(define_insn "vec_unpacks_lo_v4sf"
[(set (match_operand:V2DF 0 "register_operand" "=f")
(float_extend:V2DF
(vec_select:V2SF
@ -2691,23 +2649,23 @@
[(set_attr "type" "simd_int_arith")
(set_attr "mode" "V2DF")])
(define_insn "lsx_vffint_s_l"
(define_insn "vec_packs_float_v2di"
[(set (match_operand:V4SF 0 "register_operand" "=f")
(unspec:V4SF [(match_operand:V2DI 1 "register_operand" "f")
(match_operand:V2DI 2 "register_operand" "f")]
UNSPEC_LSX_VFFINT_S_L))]
(vec_concat:V4SF
(float:V2SF (match_operand:V2DI 1 "register_operand" "f"))
(float:V2SF (match_operand:V2DI 2 "register_operand" "f"))))]
"ISA_HAS_LSX"
"vffint.s.l\t%w0,%w1,%w2"
"vffint.s.l\t%w0,%w2,%w1"
[(set_attr "type" "simd_int_arith")
(set_attr "mode" "V2DI")])
(define_insn "lsx_vftintrz_w_d"
(define_insn "vec_pack_sfix_trunc_v2df"
[(set (match_operand:V4SI 0 "register_operand" "=f")
(unspec:V4SI [(match_operand:V2DF 1 "register_operand" "f")
(match_operand:V2DF 2 "register_operand" "f")]
UNSPEC_LSX_VFTINTRZ_W_D))]
(vec_concat:V4SI
(fix:V2SI (match_operand:V2DF 1 "register_operand" "f"))
(fix:V2SI (match_operand:V2DF 2 "register_operand" "f"))))]
"ISA_HAS_LSX"
"vftintrz.w.d\t%w0,%w1,%w2"
"vftintrz.w.d\t%w0,%w2,%w1"
[(set_attr "type" "simd_int_arith")
(set_attr "mode" "V2DF")])
@ -2759,37 +2717,45 @@
[(set_attr "type" "simd_shift")
(set_attr "mode" "V4SF")])
(define_insn "lsx_vffinth_d_w"
(define_insn "vec_unpacks_float_hi_v4si"
[(set (match_operand:V2DF 0 "register_operand" "=f")
(unspec:V2DF [(match_operand:V4SI 1 "register_operand" "f")]
UNSPEC_LSX_VFFINTH_D_W))]
(float:V2DF
(vec_select:V2SI
(match_operand:V4SI 1 "register_operand" "f")
(parallel [(const_int 2) (const_int 3)]))))]
"ISA_HAS_LSX"
"vffinth.d.w\t%w0,%w1"
[(set_attr "type" "simd_shift")
(set_attr "mode" "V4SI")])
(define_insn "lsx_vffintl_d_w"
(define_insn "vec_unpacks_float_lo_v4si"
[(set (match_operand:V2DF 0 "register_operand" "=f")
(unspec:V2DF [(match_operand:V4SI 1 "register_operand" "f")]
UNSPEC_LSX_VFFINTL_D_W))]
(float:V2DF
(vec_select:V2SI
(match_operand:V4SI 1 "register_operand" "f")
(parallel [(const_int 0) (const_int 1)]))))]
"ISA_HAS_LSX"
"vffintl.d.w\t%w0,%w1"
[(set_attr "type" "simd_shift")
(set_attr "mode" "V4SI")])
(define_insn "lsx_vftintrzh_l_s"
(define_insn "vec_unpack_sfix_trunc_hi_v4sf"
[(set (match_operand:V2DI 0 "register_operand" "=f")
(unspec:V2DI [(match_operand:V4SF 1 "register_operand" "f")]
UNSPEC_LSX_VFTINTRZH_L_S))]
(fix:V2DI
(vec_select:V2SF
(match_operand:V4SF 1 "register_operand" "f")
(parallel [(const_int 2) (const_int 3)]))))]
"ISA_HAS_LSX"
"vftintrzh.l.s\t%w0,%w1"
[(set_attr "type" "simd_shift")
(set_attr "mode" "V4SF")])
(define_insn "lsx_vftintrzl_l_s"
(define_insn "vec_unpack_sfix_trunc_lo_v4sf"
[(set (match_operand:V2DI 0 "register_operand" "=f")
(unspec:V2DI [(match_operand:V4SF 1 "register_operand" "f")]
UNSPEC_LSX_VFTINTRZL_L_S))]
(fix:V2DI
(vec_select:V2SF
(match_operand:V4SF 1 "register_operand" "f")
(parallel [(const_int 0) (const_int 1)]))))]
"ISA_HAS_LSX"
"vftintrzl.l.s\t%w0,%w1"
[(set_attr "type" "simd_shift")
@ -4015,7 +3981,7 @@
[(set_attr "type" "simd_bit")
(set_attr "mode" "V16QI")])
(define_insn "lsx_vexth_h<u>_b<u>"
(define_insn "vec_unpack<su>_hi_v16qi"
[(set (match_operand:V8HI 0 "register_operand" "=f")
(any_extend:V8HI
(vec_select:V8QI
@ -4029,7 +3995,7 @@
[(set_attr "type" "simd_fcvt")
(set_attr "mode" "V8HI")])
(define_insn "lsx_vexth_w<u>_h<u>"
(define_insn "vec_unpack<su>_hi_v8hi"
[(set (match_operand:V4SI 0 "register_operand" "=f")
(any_extend:V4SI
(vec_select:V4HI
@ -4041,7 +4007,7 @@
[(set_attr "type" "simd_fcvt")
(set_attr "mode" "V4SI")])
(define_insn "lsx_vexth_d<u>_w<u>"
(define_insn "vec_unpack<su>_hi_v4si"
[(set (match_operand:V2DI 0 "register_operand" "=f")
(any_extend:V2DI
(vec_select:V2SI

View file

@ -0,0 +1,120 @@
/* { dg-do compile } */
/* { dg-options "-mlsx -O3" } */
#define N 128
char c[N];
short int h[N];
int s[N];
long l[N];
float f[N];
double d[N];
unsigned char uc[N];
unsigned short int uh[N];
unsigned int us[N];
unsigned long ul[N];
/* { dg-final { scan-assembler-not "test_vec_pack_sfix_trunc_v2df:.*\tvftintrz\\.l\\.d.*-test_vec_pack_sfix_trunc_v2df\n" } } */
/* { dg-final { scan-assembler-not "test_vec_pack_sfix_trunc_v2df:.*\tvpickev\\.w.*-test_vec_pack_sfix_trunc_v2df\n" } } */
/* { dg-final { scan-assembler "test_vec_pack_sfix_trunc_v2df:.*\tvftintrz\\.w\\.d.*-test_vec_pack_sfix_trunc_v2df\n" } } */
void
test_vec_pack_sfix_trunc_v2df (void)
{
for (int i = 0; i < N; i++)
s[i] = d[i];
}
/* { dg-final { scan-assembler-not "test_vec_packs_float_v2di:.*\tmovgr2fr\\.d.*-test_vec_packs_float_v2di" } } */
/* { dg-final { scan-assembler "test_vec_packs_float_v2di:.*\tvffint\\.s\\.l.*-test_vec_packs_float_v2di" } } */
void
test_vec_packs_float_v2di (void)
{
for (int i = 0; i < N; i++)
f[i] = l[i];
}
/* { dg-final { scan-assembler-not "test_vec_unpack_sfix_trunc_hi_lo_v4sf:.*\tftintrz\\.l\\.s.*-test_vec_unpack_sfix_trunc_hi_lo_v4sf" } } */
/* { dg-final { scan-assembler "test_vec_unpack_sfix_trunc_hi_lo_v4sf:.*\tvftintrzh\\.l\\.s.*-test_vec_unpack_sfix_trunc_hi_lo_v4sf" } } */
/* { dg-final { scan-assembler "test_vec_unpack_sfix_trunc_hi_lo_v4sf:.*\tvftintrzl\\.l\\.s.*-test_vec_unpack_sfix_trunc_hi_lo_v4sf" } } */
void
test_vec_unpack_sfix_trunc_hi_lo_v4sf (void)
{
for (int i = 0; i < N; i++)
l[i] = f[i];
}
/* { dg-final { scan-assembler-not "test_vec_unpacks_float_hi_lo_v4si:.*\tvslti\\.w.*-test_vec_unpacks_float_hi_lo_v4si" } } */
/* { dg-final { scan-assembler-not "test_vec_unpacks_float_hi_lo_v4si:.*\tvilvl\\.w.*-test_vec_unpacks_float_hi_lo_v4si" } } */
/* { dg-final { scan-assembler-not "test_vec_unpacks_float_hi_lo_v4si:.*\tvilvh\\.w.*-test_vec_unpacks_float_hi_lo_v4si" } } */
/* { dg-final { scan-assembler-not "test_vec_unpacks_float_hi_lo_v4si:.*\tvffint\\.d\\.l.*-test_vec_unpacks_float_hi_lo_v4si" } } */
/* { dg-final { scan-assembler "test_vec_unpacks_float_hi_lo_v4si:.*\tvffinth\\.d\\.w.*-test_vec_unpacks_float_hi_lo_v4si" } } */
/* { dg-final { scan-assembler "test_vec_unpacks_float_hi_lo_v4si:.*\tvffintl\\.d\\.w.*-test_vec_unpacks_float_hi_lo_v4si" } } */
void
test_vec_unpacks_float_hi_lo_v4si (void)
{
for (int i = 0; i < N; i++)
d[i] = s[i];
}
/* { dg-final { scan-assembler-not "test_vec_unpacks_hi_lo_v4si:.*\tvilvh\\.w.*-test_vec_unpacks_hi_lo_v4si" } } */
/* { dg-final { scan-assembler "test_vec_unpacks_hi_lo_v4si:.*\tvexth\\.d\\.w.*-test_vec_unpacks_hi_lo_v4si" } } */
void
test_vec_unpacks_hi_lo_v4si (void)
{
for (int i = 0; i < N; i++)
l[i] = s[i];
}
/* { dg-final { scan-assembler-not "test_vec_unpacks_hi_lo_v8hi:.*\tvilvh\\.h.*-test_vec_unpacks_hi_lo_v8hi" } } */
/* { dg-final { scan-assembler "test_vec_unpacks_hi_lo_v8hi:.*\tvexth\\.w\\.h.*-test_vec_unpacks_hi_lo_v8hi" } } */
void
test_vec_unpacks_hi_lo_v8hi (void)
{
for (int i = 0; i < N; i++)
s[i] = h[i];
}
/* { dg-final { scan-assembler-not "test_vec_unpacks_hi_lo_v16qi:.*\tvilvh\\.b.*-test_vec_unpacks_hi_lo_v16qi" } } */
/* { dg-final { scan-assembler "test_vec_unpacks_hi_lo_v16qi:.*\tvexth\\.h\\.b.*-test_vec_unpacks_hi_lo_v16qi" } } */
void
test_vec_unpacks_hi_lo_v16qi (void)
{
for (int i = 0; i < N; i++)
h[i] = c[i];
}
/* { dg-final { scan-assembler "test_vec_unpacks_hi_lo_v4sf:.*\tvfcvtl\\.d\\.s.*-test_vec_unpacks_hi_lo_v4sf" } } */
/* { dg-final { scan-assembler "test_vec_unpacks_hi_lo_v4sf:.*\tvfcvth\\.d\\.s.*-test_vec_unpacks_hi_lo_v4sf" } } */
void
test_vec_unpacks_hi_lo_v4sf (void)
{
for (int i = 0; i < N; i++)
d[i] = f[i];
}
/* { dg-final { scan-assembler-not "test_vec_unpacku_hi_lo_v4si:.*\tvilvh\\.w.*-test_vec_unpacku_hi_lo_v4si" } } */
/* { dg-final { scan-assembler "test_vec_unpacku_hi_lo_v4si:.*\tvexth\\.du\\.wu.*-test_vec_unpacku_hi_lo_v4si" } } */
void
test_vec_unpacku_hi_lo_v4si (void)
{
for (int i = 0; i < N; i++)
ul[i] = us[i];
}
/* { dg-final { scan-assembler-not "test_vec_unpacku_hi_lo_v8hi:.*\tvilvh\\.h.*-test_vec_unpacku_hi_lo_v8hi" } } */
/* { dg-final { scan-assembler "test_vec_unpacku_hi_lo_v8hi:.*\tvexth\\.wu\\.hu.*-test_vec_unpacku_hi_lo_v8hi" } } */
void
test_vec_unpacku_hi_lo_v8hi (void)
{
for (int i = 0; i < N; i++)
us[i] = uh[i];
}
/* { dg-final { scan-assembler-not "test_vec_unpacku_hi_lo_v16qi:.*\tvilvh\\.b.*-test_vec_unpacku_hi_lo_v16qi" } } */
/* { dg-final { scan-assembler "test_vec_unpacku_hi_lo_v16qi:.*\tvexth\\.hu\\.bu.*-test_vec_unpacku_hi_lo_v16qi" } } */
void
test_vec_unpacku_hi_lo_v16qi (void)
{
for (int i = 0; i < N; i++)
uh[i] = uc[i];
}

View file

@ -0,0 +1,118 @@
/* { dg-do compile } */
/* { dg-options "-mlasx -O3" } */
#define N 128
char c[N];
short int h[N];
int s[N];
long l[N];
float f[N];
double d[N];
unsigned char uc[N];
unsigned short int uh[N];
unsigned int us[N];
unsigned long ul[N];
/* { dg-final { scan-assembler-not "test_vec_pack_sfix_trunc_v4df:.*\txvftintrz\\.l\\.d.*-test_vec_pack_sfix_trunc_v4df\n" } } */
/* { dg-final { scan-assembler-not "test_vec_pack_sfix_trunc_v4df:.*\txvpickev\\.w.*-test_vec_pack_sfix_trunc_v4df\n" } } */
/* { dg-final { scan-assembler "test_vec_pack_sfix_trunc_v4df:.*\txvftintrz\\.w\\.d.*-test_vec_pack_sfix_trunc_v4df\n" } } */
void
test_vec_pack_sfix_trunc_v4df (void)
{
for (int i = 0; i < N; i++)
s[i] = d[i];
}
/* { dg-final { scan-assembler-not "test_vec_packs_float_v4di:.*\tmovgr2fr\\.d.*-test_vec_packs_float_v4di" } } */
/* { dg-final { scan-assembler "test_vec_packs_float_v4di:.*\txvffint\\.s\\.l.*-test_vec_packs_float_v4di" } } */
void
test_vec_packs_float_v4di (void)
{
for (int i = 0; i < N; i++)
f[i] = l[i];
}
/* { dg-final { scan-assembler-not "test_vec_unpack_sfix_trunc_hi_lo_v8sf:.*\tftintrz\\.l\\.s.*-test_vec_unpack_sfix_trunc_hi_lo_v8sf" } } */
/* { dg-final { scan-assembler "test_vec_unpack_sfix_trunc_hi_lo_v8sf:.*\txvftintrzh\\.l\\.s.*-test_vec_unpack_sfix_trunc_hi_lo_v8sf" } } */
/* { dg-final { scan-assembler "test_vec_unpack_sfix_trunc_hi_lo_v8sf:.*\txvftintrzl\\.l\\.s.*-test_vec_unpack_sfix_trunc_hi_lo_v8sf" } } */
void
test_vec_unpack_sfix_trunc_hi_lo_v8sf (void)
{
for (int i = 0; i < N; i++)
l[i] = f[i];
}
/* { dg-final { scan-assembler "test_vec_unpacks_float_hi_lo_v8si:.*\txvpermi\\.d.*-test_vec_unpacks_float_hi_lo_v8si" } } */
/* { dg-final { scan-assembler "test_vec_unpacks_float_hi_lo_v8si:.*\tvext2xv\\.d\\.w.*-test_vec_unpacks_float_hi_lo_v8si" } } */
/* { dg-final { scan-assembler "test_vec_unpacks_float_hi_lo_v8si:.*\txvffint\\.d\\.l.*-test_vec_unpacks_float_hi_lo_v8si" } } */
/* { dg-final { scan-assembler "test_vec_unpacks_float_hi_lo_v8si:.*\txvffinth\\.d\\.w.*-test_vec_unpacks_float_hi_lo_v8si" } } */
void
test_vec_unpacks_float_hi_lo_v8si (void)
{
for (int i = 0; i < N; i++)
d[i] = s[i];
}
/* { dg-final { scan-assembler "test_vec_unpacks_hi_lo_v8si:.*\tvext2xv\\.d\\.w.*-test_vec_unpacks_hi_lo_v8si" } } */
/* { dg-final { scan-assembler "test_vec_unpacks_hi_lo_v8si:.*\txvpermi\\.q.*-test_vec_unpacks_hi_lo_v8si" } } */
void
test_vec_unpacks_hi_lo_v8si (void)
{
for (int i = 0; i < N; i++)
l[i] = s[i];
}
/* { dg-final { scan-assembler "test_vec_unpacks_hi_lo_v16hi:.*\tvext2xv\\.w\\.h.*-test_vec_unpacks_hi_lo_v16hi" } } */
/* { dg-final { scan-assembler "test_vec_unpacks_hi_lo_v16hi:.*\txvpermi\\.q.*-test_vec_unpacks_hi_lo_v16hi" } } */
void
test_vec_unpacks_hi_lo_v16hi (void)
{
for (int i = 0; i < N; i++)
s[i] = h[i];
}
/* { dg-final { scan-assembler "test_vec_unpacks_hi_lo_v32qi:.*\tvext2xv\\.h\\.b.*-test_vec_unpacks_hi_lo_v32qi" } } */
/* { dg-final { scan-assembler "test_vec_unpacks_hi_lo_v32qi:.*\txvpermi\\.q.*-test_vec_unpacks_hi_lo_v32qi" } } */
void
test_vec_unpacks_hi_lo_v32qi (void)
{
for (int i = 0; i < N; i++)
h[i] = c[i];
}
/* { dg-final { scan-assembler "test_vec_unpacks_hi_lo_v8sf:.*\txvfcvtl\\.d\\.s.*-test_vec_unpacks_hi_lo_v8sf" } } */
/* { dg-final { scan-assembler "test_vec_unpacks_hi_lo_v8sf:.*\txvpermi\\.d.*-test_vec_unpacks_hi_lo_v8sf" } } */
void
test_vec_unpacks_hi_lo_v8sf (void)
{
for (int i = 0; i < N; i++)
d[i] = f[i];
}
/* { dg-final { scan-assembler "test_vec_unpacku_hi_lo_v8si:.*\tvext2xv\\.du\\.wu.*-test_vec_unpacku_hi_lo_v8si" } } */
/* { dg-final { scan-assembler "test_vec_unpacku_hi_lo_v8si:.*\txvpermi\\.q.*-test_vec_unpacku_hi_lo_v8si" } } */
void
test_vec_unpacku_hi_lo_v8si (void)
{
for (int i = 0; i < N; i++)
ul[i] = us[i];
}
/* { dg-final { scan-assembler "test_vec_unpacku_hi_lo_v16hi:.*\tvext2xv\\.wu\\.hu.*-test_vec_unpacku_hi_lo_v16hi" } } */
/* { dg-final { scan-assembler "test_vec_unpacku_hi_lo_v16hi:.*\txvpermi\\.q.*-test_vec_unpacku_hi_lo_v16hi" } } */
void
test_vec_unpacku_hi_lo_v16hi (void)
{
for (int i = 0; i < N; i++)
us[i] = uh[i];
}
/* { dg-final { scan-assembler "test_vec_unpacku_hi_lo_v32qi:.*\tvext2xv\\.hu\\.bu.*-test_vec_unpacku_hi_lo_v32qi" } } */
/* { dg-final { scan-assembler "test_vec_unpacku_hi_lo_v32qi:.*\txvpermi\\.q.*-test_vec_unpacku_hi_lo_v32qi" } } */
void
test_vec_unpacku_hi_lo_v32qi (void)
{
for (int i = 0; i < N; i++)
uh[i] = uc[i];
}