predicates.md (pmpyshr_operand): New.
* config/ia64/predicates.md (pmpyshr_operand): New. * config/ia64/ia64.c (ia64_expand_unpack): New. (ia64_expand_widen_mul_v4hi): New. (ia64_expand_widen_sum): Update for pattern renames. (ia64_expand_dot_prod_v8qi): Likewise. * config/ia64/ia64-protos.h: Update. * config/ia64/vect.md (vecwider): New mode attribute. (vec_widen_umult_lo_v8qi, vec_widen_umult_hi_v8qi): New. (vec_widen_smult_lo_v8qi, vec_widen_smult_hi_v8qi): New. (pmpyshr2, pmpyshr2_u): New. (vec_widen_smult_lo_v4hi, vec_widen_smult_hi_v4hi): New. (vec_widen_umult_lo_v4hi, vec_widen_umult_hi_v4hi): New. (mulv2si3): New. (vec_pack_ssat_v4hi): Rename from pack2_sss. (vec_pack_usat_v4hi): Rename from *pack2_uss. (vec_pack_ssat_v2si): Rename from pack4_sss. (vec_interleave_lowv8qi): Rename from unpack1_l, use the correct vec_select operation. (vec_interleave_highv8qi): Similarly. (mux1_alt): Rename from *mux1_alt. (vec_extract_evenv8qi, vec_extract_oddv8qi): New. (vec_interleave_lowv4hi): Rename from unpack2_l. (vec_interleave_highv4hi): Rename from unpack2_h. (mix2_r): Rename from *mix2_r. (mix2_l): Similarly. (vec_extract_evenodd_helper): New. (vec_extract_evenv4hi, vec_extract_oddv4hi): New. (vec_interleave_lowv2si): Rename from *unpack4_l. (vec_interleave_highv2si): Rename from *unpack4_h. (vec_extract_evenv2si, vec_extract_oddv2si): New. (vec_interleave_lowv2sf): Rename from fmix_r. (vec_interleave_highv2sf): Rename from *fmix_l. (vec_extract_evenv2sf, vec_extract_oddv2sf): New. (vec_unpacku_lo_<VECINT12>, vec_unpacku_hi_<VECINT12>): New. (vec_unpacks_lo_<VECINT12>, vec_unpacks_hi_<VECINT12>): New. (vec_pack_trunc_v4hi, vec_pack_trunc_v2si): New. testsuite: * lib/target-supports.exp (vect_widen_sum_hi_to_si_pattern, vect_widen_mult_hi_to_si, vect_sdot_qi, vect_udot_qi, vect_sdot_hi, vect_unpack, vect_int_mult, vect_extract_even_odd, vect_extract_even_odd_wide, vect_interleave): Enable for ia64. From-SVN: r167136
This commit is contained in:
parent
5eee6908a1
commit
604e3ff316
7 changed files with 504 additions and 42 deletions
|
@ -1,3 +1,42 @@
|
|||
2010-11-24 Richard Henderson <rth@redhat.com>
|
||||
|
||||
* config/ia64/predicates.md (pmpyshr_operand): New.
|
||||
* config/ia64/ia64.c (ia64_expand_unpack): New.
|
||||
(ia64_expand_widen_mul_v4hi): New.
|
||||
(ia64_expand_widen_sum): Update for pattern renames.
|
||||
(ia64_expand_dot_prod_v8qi): Likewise.
|
||||
* config/ia64/ia64-protos.h: Update.
|
||||
* config/ia64/vect.md (vecwider): New mode attribute.
|
||||
(vec_widen_umult_lo_v8qi, vec_widen_umult_hi_v8qi): New.
|
||||
(vec_widen_smult_lo_v8qi, vec_widen_smult_hi_v8qi): New.
|
||||
(pmpyshr2, pmpyshr2_u): New.
|
||||
(vec_widen_smult_lo_v4hi, vec_widen_smult_hi_v4hi): New.
|
||||
(vec_widen_umult_lo_v4hi, vec_widen_umult_hi_v4hi): New.
|
||||
(mulv2si3): New.
|
||||
(vec_pack_ssat_v4hi): Rename from pack2_sss.
|
||||
(vec_pack_usat_v4hi): Rename from *pack2_uss.
|
||||
(vec_pack_ssat_v2si): Rename from pack4_sss.
|
||||
(vec_interleave_lowv8qi): Rename from unpack1_l, use the correct
|
||||
vec_select operation.
|
||||
(vec_interleave_highv8qi): Similarly.
|
||||
(mux1_alt): Rename from *mux1_alt.
|
||||
(vec_extract_evenv8qi, vec_extract_oddv8qi): New.
|
||||
(vec_interleave_lowv4hi): Rename from unpack2_l.
|
||||
(vec_interleave_highv4hi): Rename from unpack2_h.
|
||||
(mix2_r): Rename from *mix2_r.
|
||||
(mix2_l): Similarly.
|
||||
(vec_extract_evenodd_helper): New.
|
||||
(vec_extract_evenv4hi, vec_extract_oddv4hi): New.
|
||||
(vec_interleave_lowv2si): Rename from *unpack4_l.
|
||||
(vec_interleave_highv2si): Rename from *unpack4_h.
|
||||
(vec_extract_evenv2si, vec_extract_oddv2si): New.
|
||||
(vec_interleave_lowv2sf): Rename from fmix_r.
|
||||
(vec_interleave_highv2sf): Rename from *fmix_l.
|
||||
(vec_extract_evenv2sf, vec_extract_oddv2sf): New.
|
||||
(vec_unpacku_lo_<VECINT12>, vec_unpacku_hi_<VECINT12>): New.
|
||||
(vec_unpacks_lo_<VECINT12>, vec_unpacks_hi_<VECINT12>): New.
|
||||
(vec_pack_trunc_v4hi, vec_pack_trunc_v2si): New.
|
||||
|
||||
2010-11-24 Nathan Froyd <froydnj@codesourcery.com>
|
||||
|
||||
* targhooks.c (default_except_unwind_info): Remove
|
||||
|
|
|
@ -39,7 +39,9 @@ extern bool ia64_expand_movxf_movrf (enum machine_mode, rtx[]);
|
|||
extern void ia64_expand_compare (rtx *, rtx *, rtx *);
|
||||
extern void ia64_expand_vecint_cmov (rtx[]);
|
||||
extern bool ia64_expand_vecint_minmax (enum rtx_code, enum machine_mode, rtx[]);
|
||||
extern void ia64_expand_unpack (rtx [], bool, bool);
|
||||
extern void ia64_expand_widen_sum (rtx[], bool);
|
||||
extern void ia64_expand_widen_mul_v4hi (rtx [], bool, bool);
|
||||
extern void ia64_expand_dot_prod_v8qi (rtx[], bool);
|
||||
extern void ia64_expand_call (rtx, rtx, rtx, int);
|
||||
extern void ia64_split_call (rtx, rtx, rtx, rtx, rtx, int, int);
|
||||
|
|
|
@ -1972,6 +1972,44 @@ ia64_expand_vecint_minmax (enum rtx_code code, enum machine_mode mode,
|
|||
return true;
|
||||
}
|
||||
|
||||
/* Emit an integral vector unpack operation. */
|
||||
|
||||
void
|
||||
ia64_expand_unpack (rtx operands[3], bool unsignedp, bool highp)
|
||||
{
|
||||
enum machine_mode mode = GET_MODE (operands[1]);
|
||||
rtx (*gen) (rtx, rtx, rtx);
|
||||
rtx x;
|
||||
|
||||
switch (mode)
|
||||
{
|
||||
case V8QImode:
|
||||
gen = highp ? gen_vec_interleave_highv8qi : gen_vec_interleave_lowv8qi;
|
||||
break;
|
||||
case V4HImode:
|
||||
gen = highp ? gen_vec_interleave_highv4hi : gen_vec_interleave_lowv4hi;
|
||||
break;
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
/* Fill in x with the sign extension of each element in op1. */
|
||||
if (unsignedp)
|
||||
x = CONST0_RTX (mode);
|
||||
else
|
||||
{
|
||||
bool neg;
|
||||
|
||||
x = gen_reg_rtx (mode);
|
||||
|
||||
neg = ia64_expand_vecint_compare (LT, mode, x, operands[1],
|
||||
CONST0_RTX (mode));
|
||||
gcc_assert (!neg);
|
||||
}
|
||||
|
||||
emit_insn (gen (gen_lowpart (mode, operands[0]), operands[1], x));
|
||||
}
|
||||
|
||||
/* Emit an integral vector widening sum operations. */
|
||||
|
||||
void
|
||||
|
@ -1989,13 +2027,13 @@ ia64_expand_widen_sum (rtx operands[3], bool unsignedp)
|
|||
switch (mode)
|
||||
{
|
||||
case V8QImode:
|
||||
unpack_l = gen_unpack1_l;
|
||||
unpack_h = gen_unpack1_h;
|
||||
unpack_l = gen_vec_interleave_lowv8qi;
|
||||
unpack_h = gen_vec_interleave_highv8qi;
|
||||
plus = gen_addv4hi3;
|
||||
break;
|
||||
case V4HImode:
|
||||
unpack_l = gen_unpack2_l;
|
||||
unpack_h = gen_unpack2_h;
|
||||
unpack_l = gen_vec_interleave_lowv4hi;
|
||||
unpack_h = gen_vec_interleave_highv4hi;
|
||||
plus = gen_addv2si3;
|
||||
break;
|
||||
default:
|
||||
|
@ -2026,6 +2064,27 @@ ia64_expand_widen_sum (rtx operands[3], bool unsignedp)
|
|||
emit_insn (plus (operands[0], h, s));
|
||||
}
|
||||
|
||||
void
|
||||
ia64_expand_widen_mul_v4hi (rtx operands[3], bool unsignedp, bool highp)
|
||||
{
|
||||
rtx l = gen_reg_rtx (V4HImode);
|
||||
rtx h = gen_reg_rtx (V4HImode);
|
||||
rtx (*mulhigh)(rtx, rtx, rtx, rtx);
|
||||
rtx (*interl)(rtx, rtx, rtx);
|
||||
|
||||
emit_insn (gen_mulv4hi3 (l, operands[1], operands[2]));
|
||||
|
||||
/* For signed, pmpy2.r would appear to more closely match this operation.
|
||||
However, the vectorizer is more likely to use the LO and HI patterns
|
||||
in pairs. At which point, with this formulation, the first two insns
|
||||
of each can be CSEd. */
|
||||
mulhigh = unsignedp ? gen_pmpyshr2_u : gen_pmpyshr2;
|
||||
emit_insn (mulhigh (h, operands[1], operands[2], GEN_INT (16)));
|
||||
|
||||
interl = highp ? gen_vec_interleave_highv4hi : gen_vec_interleave_lowv4hi;
|
||||
emit_insn (interl (gen_lowpart (V4HImode, operands[0]), l, h));
|
||||
}
|
||||
|
||||
/* Emit a signed or unsigned V8QI dot product operation. */
|
||||
|
||||
void
|
||||
|
@ -2056,10 +2115,14 @@ ia64_expand_dot_prod_v8qi (rtx operands[4], bool unsignedp)
|
|||
h1 = gen_reg_rtx (V4HImode);
|
||||
h2 = gen_reg_rtx (V4HImode);
|
||||
|
||||
emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l1), operands[1], x1));
|
||||
emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l2), operands[2], x2));
|
||||
emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h1), operands[1], x1));
|
||||
emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h2), operands[2], x2));
|
||||
emit_insn (gen_vec_interleave_lowv8qi
|
||||
(gen_lowpart (V8QImode, l1), operands[1], x1));
|
||||
emit_insn (gen_vec_interleave_lowv8qi
|
||||
(gen_lowpart (V8QImode, l2), operands[2], x2));
|
||||
emit_insn (gen_vec_interleave_highv8qi
|
||||
(gen_lowpart (V8QImode, h1), operands[1], x1));
|
||||
emit_insn (gen_vec_interleave_highv8qi
|
||||
(gen_lowpart (V8QImode, h2), operands[2], x2));
|
||||
|
||||
p1 = gen_reg_rtx (V2SImode);
|
||||
p2 = gen_reg_rtx (V2SImode);
|
||||
|
|
|
@ -526,6 +526,12 @@
|
|||
INTVAL (op) == 1 || INTVAL (op) == 4 ||
|
||||
INTVAL (op) == 8 || INTVAL (op) == 16")))
|
||||
|
||||
;; True if OP is one of the immediate values 0, 7, 15, 16
|
||||
(define_predicate "pmpyshr_operand"
|
||||
(and (match_code "const_int")
|
||||
(match_test "INTVAL (op) == 0 || INTVAL (op) == 7
|
||||
|| INTVAL (op) == 15 || INTVAL (op) == 16")))
|
||||
|
||||
;; True if OP is 0..3.
|
||||
(define_predicate "const_int_2bit_operand"
|
||||
(and (match_code "const_int")
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
(define_mode_iterator VECINT12 [V8QI V4HI])
|
||||
(define_mode_iterator VECINT24 [V4HI V2SI])
|
||||
(define_mode_attr vecsize [(V8QI "1") (V4HI "2") (V2SI "4")])
|
||||
(define_mode_attr vecwider [(V8QI "V4HI") (V4HI "V2SI")])
|
||||
|
||||
(define_expand "mov<mode>"
|
||||
[(set (match_operand:VECINT 0 "general_operand" "")
|
||||
|
@ -203,6 +204,62 @@
|
|||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_widen_umult_lo_v8qi"
|
||||
[(match_operand:V4HI 0 "gr_register_operand" "")
|
||||
(match_operand:V8QI 1 "gr_register_operand" "")
|
||||
(match_operand:V8QI 2 "gr_register_operand" "")]
|
||||
""
|
||||
{
|
||||
rtx op1 = gen_reg_rtx (V4HImode);
|
||||
rtx op2 = gen_reg_rtx (V4HImode);
|
||||
emit_insn (gen_vec_unpacku_lo_v8qi (op1, operands[1]));
|
||||
emit_insn (gen_vec_unpacku_lo_v8qi (op2, operands[2]));
|
||||
emit_insn (gen_mulv4hi3 (operands[0], op1, op2));
|
||||
DONE;
|
||||
});
|
||||
|
||||
(define_expand "vec_widen_umult_hi_v8qi"
|
||||
[(match_operand:V4HI 0 "gr_register_operand" "")
|
||||
(match_operand:V8QI 1 "gr_register_operand" "")
|
||||
(match_operand:V8QI 2 "gr_register_operand" "")]
|
||||
""
|
||||
{
|
||||
rtx op1 = gen_reg_rtx (V4HImode);
|
||||
rtx op2 = gen_reg_rtx (V4HImode);
|
||||
emit_insn (gen_vec_unpacku_hi_v8qi (op1, operands[1]));
|
||||
emit_insn (gen_vec_unpacku_hi_v8qi (op2, operands[2]));
|
||||
emit_insn (gen_mulv4hi3 (operands[0], op1, op2));
|
||||
DONE;
|
||||
});
|
||||
|
||||
(define_expand "vec_widen_smult_lo_v8qi"
|
||||
[(match_operand:V4HI 0 "gr_register_operand" "")
|
||||
(match_operand:V8QI 1 "gr_register_operand" "")
|
||||
(match_operand:V8QI 2 "gr_register_operand" "")]
|
||||
""
|
||||
{
|
||||
rtx op1 = gen_reg_rtx (V4HImode);
|
||||
rtx op2 = gen_reg_rtx (V4HImode);
|
||||
emit_insn (gen_vec_unpacks_lo_v8qi (op1, operands[1]));
|
||||
emit_insn (gen_vec_unpacks_lo_v8qi (op2, operands[2]));
|
||||
emit_insn (gen_mulv4hi3 (operands[0], op1, op2));
|
||||
DONE;
|
||||
});
|
||||
|
||||
(define_expand "vec_widen_smult_hi_v8qi"
|
||||
[(match_operand:V4HI 0 "gr_register_operand" "")
|
||||
(match_operand:V8QI 1 "gr_register_operand" "")
|
||||
(match_operand:V8QI 2 "gr_register_operand" "")]
|
||||
""
|
||||
{
|
||||
rtx op1 = gen_reg_rtx (V4HImode);
|
||||
rtx op2 = gen_reg_rtx (V4HImode);
|
||||
emit_insn (gen_vec_unpacks_hi_v8qi (op1, operands[1]));
|
||||
emit_insn (gen_vec_unpacks_hi_v8qi (op2, operands[2]));
|
||||
emit_insn (gen_mulv4hi3 (operands[0], op1, op2));
|
||||
DONE;
|
||||
});
|
||||
|
||||
(define_insn "mulv4hi3"
|
||||
[(set (match_operand:V4HI 0 "gr_register_operand" "=r")
|
||||
(mult:V4HI (match_operand:V4HI 1 "gr_register_operand" "r")
|
||||
|
@ -211,6 +268,34 @@
|
|||
"pmpyshr2 %0 = %1, %2, 0"
|
||||
[(set_attr "itanium_class" "mmmul")])
|
||||
|
||||
(define_insn "pmpyshr2"
|
||||
[(set (match_operand:V4HI 0 "gr_register_operand" "=r")
|
||||
(truncate:V4HI
|
||||
(ashiftrt:V4SI
|
||||
(mult:V4SI
|
||||
(sign_extend:V4SI
|
||||
(match_operand:V4HI 1 "gr_register_operand" "r"))
|
||||
(sign_extend:V4SI
|
||||
(match_operand:V4HI 2 "gr_register_operand" "r")))
|
||||
(match_operand:SI 3 "pmpyshr_operand" "n"))))]
|
||||
""
|
||||
"pmpyshr2 %0 = %1, %2, %3"
|
||||
[(set_attr "itanium_class" "mmmul")])
|
||||
|
||||
(define_insn "pmpyshr2_u"
|
||||
[(set (match_operand:V4HI 0 "gr_register_operand" "=r")
|
||||
(truncate:V4HI
|
||||
(lshiftrt:V4SI
|
||||
(mult:V4SI
|
||||
(zero_extend:V4SI
|
||||
(match_operand:V4HI 1 "gr_register_operand" "r"))
|
||||
(zero_extend:V4SI
|
||||
(match_operand:V4HI 2 "gr_register_operand" "r")))
|
||||
(match_operand:SI 3 "pmpyshr_operand" "n"))))]
|
||||
""
|
||||
"pmpyshr2.u %0 = %1, %2, %3"
|
||||
[(set_attr "itanium_class" "mmmul")])
|
||||
|
||||
(define_insn "pmpy2_r"
|
||||
[(set (match_operand:V2SI 0 "gr_register_operand" "=r")
|
||||
(mult:V2SI
|
||||
|
@ -241,6 +326,100 @@
|
|||
"pmpy2.l %0 = %1, %2"
|
||||
[(set_attr "itanium_class" "mmshf")])
|
||||
|
||||
(define_expand "vec_widen_smult_lo_v4hi"
|
||||
[(match_operand:V2SI 0 "gr_register_operand" "")
|
||||
(match_operand:V4HI 1 "gr_register_operand" "")
|
||||
(match_operand:V4HI 2 "gr_register_operand" "")]
|
||||
""
|
||||
{
|
||||
ia64_expand_widen_mul_v4hi (operands, false, false);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_widen_smult_hi_v4hi"
|
||||
[(match_operand:V2SI 0 "gr_register_operand" "")
|
||||
(match_operand:V4HI 1 "gr_register_operand" "")
|
||||
(match_operand:V4HI 2 "gr_register_operand" "")]
|
||||
""
|
||||
{
|
||||
ia64_expand_widen_mul_v4hi (operands, false, true);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_widen_umult_lo_v4hi"
|
||||
[(match_operand:V2SI 0 "gr_register_operand" "")
|
||||
(match_operand:V4HI 1 "gr_register_operand" "")
|
||||
(match_operand:V4HI 2 "gr_register_operand" "")]
|
||||
""
|
||||
{
|
||||
ia64_expand_widen_mul_v4hi (operands, true, false);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_widen_umult_hi_v4hi"
|
||||
[(match_operand:V2SI 0 "gr_register_operand" "")
|
||||
(match_operand:V4HI 1 "gr_register_operand" "")
|
||||
(match_operand:V4HI 2 "gr_register_operand" "")]
|
||||
""
|
||||
{
|
||||
ia64_expand_widen_mul_v4hi (operands, true, true);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "mulv2si3"
|
||||
[(set (match_operand:V2SI 0 "gr_register_operand" "")
|
||||
(mult:V2SI (match_operand:V2SI 1 "gr_register_operand" "r")
|
||||
(match_operand:V2SI 2 "gr_register_operand" "r")))]
|
||||
""
|
||||
{
|
||||
rtx t0, t1, t2, t3, t4, t5, t6, t7, x;
|
||||
rtx op1h = gen_lowpart (V4HImode, operands[1]);
|
||||
rtx op2h = gen_lowpart (V4HImode, operands[2]);
|
||||
|
||||
t0 = gen_reg_rtx (V4HImode);
|
||||
t1 = gen_reg_rtx (V4HImode);
|
||||
t2 = gen_reg_rtx (V4HImode);
|
||||
t3 = gen_reg_rtx (V4HImode);
|
||||
t4 = gen_reg_rtx (V2SImode);
|
||||
t5 = gen_reg_rtx (V2SImode);
|
||||
t6 = gen_reg_rtx (V2SImode);
|
||||
t7 = gen_reg_rtx (V2SImode);
|
||||
|
||||
/* Consider the HImode components of op1 = DCBA, op2 = ZYXW.
|
||||
Consider .l and .h suffixes below the low and high 16 bits
|
||||
of the full 32-bit product. */
|
||||
|
||||
/* T0 = CDBA. */
|
||||
x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (4, const1_rtx, const0_rtx,
|
||||
GEN_INT (3), const2_rtx));
|
||||
x = gen_rtx_VEC_SELECT (V4HImode, op1h, x);
|
||||
emit_insn (gen_rtx_SET (VOIDmode, t0, x));
|
||||
|
||||
/* T1 = DZ.l, CY.l, BX.l, AW.l. */
|
||||
emit_insn (gen_mulv4hi3 (t1, op1h, op2h));
|
||||
|
||||
/* T2 = DZ.h, CY.h, BX.h, AW.h. */
|
||||
emit_insn (gen_pmpyshr2_u (t2, op1h, op2h, GEN_INT (16)));
|
||||
|
||||
/* T3 = CZ.l, DY.l, AX.l, BW.l. */
|
||||
emit_insn (gen_mulv4hi3 (t3, t0, op2h));
|
||||
|
||||
/* T4 = CY.h, CY.l, AW.h, AW.l = CY, AW. */
|
||||
emit_insn (gen_mix2_r (gen_lowpart (V4HImode, t4), t1, t2));
|
||||
|
||||
/* T5 = CZ.l, 0, AX.l, 0 = CZ << 16, AX << 16. */
|
||||
emit_insn (gen_mix2_l (gen_lowpart (V4HImode, t5),
|
||||
CONST0_RTX (V4HImode), t3));
|
||||
|
||||
/* T6 = DY.l, 0, BW.l, 0 = DY << 16, BW << 16. */
|
||||
emit_insn (gen_mix2_r (gen_lowpart (V4HImode, t6),
|
||||
CONST0_RTX (V4HImode), t3));
|
||||
|
||||
emit_insn (gen_addv2si3 (t7, t4, t5));
|
||||
emit_insn (gen_addv2si3 (operands[0], t6, t7));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "umax<mode>3"
|
||||
[(set (match_operand:VECINT 0 "gr_register_operand" "")
|
||||
(umax:VECINT (match_operand:VECINT 1 "gr_register_operand" "")
|
||||
|
@ -486,7 +665,7 @@
|
|||
"pcmp<vecsize>.gt %0 = %r1, %r2"
|
||||
[(set_attr "itanium_class" "mmalua")])
|
||||
|
||||
(define_insn "pack2_sss"
|
||||
(define_insn "vec_pack_ssat_v4hi"
|
||||
[(set (match_operand:V8QI 0 "gr_register_operand" "=r")
|
||||
(vec_concat:V8QI
|
||||
(ss_truncate:V4QI
|
||||
|
@ -497,7 +676,7 @@
|
|||
"pack2.sss %0 = %r1, %r2"
|
||||
[(set_attr "itanium_class" "mmshf")])
|
||||
|
||||
(define_insn "*pack2_uss"
|
||||
(define_insn "vec_pack_usat_v4hi"
|
||||
[(set (match_operand:V8QI 0 "gr_register_operand" "=r")
|
||||
(vec_concat:V8QI
|
||||
(us_truncate:V4QI
|
||||
|
@ -508,7 +687,7 @@
|
|||
"pack2.uss %0 = %r1, %r2"
|
||||
[(set_attr "itanium_class" "mmshf")])
|
||||
|
||||
(define_insn "pack4_sss"
|
||||
(define_insn "vec_pack_ssat_v2si"
|
||||
[(set (match_operand:V4HI 0 "gr_register_operand" "=r")
|
||||
(vec_concat:V4HI
|
||||
(ss_truncate:V2HI
|
||||
|
@ -519,38 +698,30 @@
|
|||
"pack4.sss %0 = %r1, %r2"
|
||||
[(set_attr "itanium_class" "mmshf")])
|
||||
|
||||
(define_insn "unpack1_l"
|
||||
(define_insn "vec_interleave_lowv8qi"
|
||||
[(set (match_operand:V8QI 0 "gr_register_operand" "=r")
|
||||
(vec_select:V8QI
|
||||
(vec_concat:V16QI
|
||||
(match_operand:V8QI 1 "gr_reg_or_0_operand" "rU")
|
||||
(match_operand:V8QI 2 "gr_reg_or_0_operand" "rU"))
|
||||
(parallel [(const_int 0)
|
||||
(const_int 1)
|
||||
(const_int 2)
|
||||
(const_int 3)
|
||||
(const_int 8)
|
||||
(const_int 9)
|
||||
(const_int 10)
|
||||
(const_int 11)])))]
|
||||
(parallel [(const_int 0) (const_int 8)
|
||||
(const_int 1) (const_int 9)
|
||||
(const_int 2) (const_int 10)
|
||||
(const_int 3) (const_int 11)])))]
|
||||
""
|
||||
"unpack1.l %0 = %r2, %r1"
|
||||
[(set_attr "itanium_class" "mmshf")])
|
||||
|
||||
(define_insn "unpack1_h"
|
||||
(define_insn "vec_interleave_highv8qi"
|
||||
[(set (match_operand:V8QI 0 "gr_register_operand" "=r")
|
||||
(vec_select:V8QI
|
||||
(vec_concat:V16QI
|
||||
(match_operand:V8QI 1 "gr_reg_or_0_operand" "rU")
|
||||
(match_operand:V8QI 2 "gr_reg_or_0_operand" "rU"))
|
||||
(parallel [(const_int 4)
|
||||
(const_int 5)
|
||||
(const_int 6)
|
||||
(const_int 7)
|
||||
(const_int 12)
|
||||
(const_int 13)
|
||||
(const_int 14)
|
||||
(const_int 15)])))]
|
||||
(parallel [(const_int 4) (const_int 12)
|
||||
(const_int 5) (const_int 13)
|
||||
(const_int 6) (const_int 14)
|
||||
(const_int 7) (const_int 15)])))]
|
||||
""
|
||||
"unpack1.h %0 = %r2, %r1"
|
||||
[(set_attr "itanium_class" "mmshf")])
|
||||
|
@ -639,7 +810,7 @@
|
|||
"mux1 %0 = %1, @shuf"
|
||||
[(set_attr "itanium_class" "mmshf")])
|
||||
|
||||
(define_insn "*mux1_alt"
|
||||
(define_insn "mux1_alt"
|
||||
[(set (match_operand:V8QI 0 "gr_register_operand" "=r")
|
||||
(vec_select:V8QI
|
||||
(match_operand:V8QI 1 "gr_register_operand" "r")
|
||||
|
@ -679,7 +850,31 @@
|
|||
"mux1 %0 = %1, @brcst"
|
||||
[(set_attr "itanium_class" "mmshf")])
|
||||
|
||||
(define_insn "unpack2_l"
|
||||
(define_expand "vec_extract_evenv8qi"
|
||||
[(match_operand:V8QI 0 "gr_register_operand" "")
|
||||
(match_operand:V8QI 1 "gr_register_operand" "")
|
||||
(match_operand:V8QI 2 "gr_register_operand" "")]
|
||||
""
|
||||
{
|
||||
rtx temp = gen_reg_rtx (V8QImode);
|
||||
emit_insn (gen_mix1_r (temp, operands[1], operands[2]));
|
||||
emit_insn (gen_mux1_alt (operands[0], temp));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_extract_oddv8qi"
|
||||
[(match_operand:V8QI 0 "gr_register_operand" "")
|
||||
(match_operand:V8QI 1 "gr_register_operand" "")
|
||||
(match_operand:V8QI 2 "gr_register_operand" "")]
|
||||
""
|
||||
{
|
||||
rtx temp = gen_reg_rtx (V8QImode);
|
||||
emit_insn (gen_mix1_l (temp, operands[1], operands[2]));
|
||||
emit_insn (gen_mux1_alt (operands[0], temp));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_insn "vec_interleave_lowv4hi"
|
||||
[(set (match_operand:V4HI 0 "gr_register_operand" "=r")
|
||||
(vec_select:V4HI
|
||||
(vec_concat:V8HI
|
||||
|
@ -693,7 +888,7 @@
|
|||
"unpack2.l %0 = %r2, %r1"
|
||||
[(set_attr "itanium_class" "mmshf")])
|
||||
|
||||
(define_insn "unpack2_h"
|
||||
(define_insn "vec_interleave_highv4hi"
|
||||
[(set (match_operand:V4HI 0 "gr_register_operand" "=r")
|
||||
(vec_select:V4HI
|
||||
(vec_concat:V8HI
|
||||
|
@ -707,7 +902,7 @@
|
|||
"unpack2.h %0 = %r2, %r1"
|
||||
[(set_attr "itanium_class" "mmshf")])
|
||||
|
||||
(define_insn "*mix2_r"
|
||||
(define_insn "mix2_r"
|
||||
[(set (match_operand:V4HI 0 "gr_register_operand" "=r")
|
||||
(vec_select:V4HI
|
||||
(vec_concat:V8HI
|
||||
|
@ -721,7 +916,7 @@
|
|||
"mix2.r %0 = %r2, %r1"
|
||||
[(set_attr "itanium_class" "mmshf")])
|
||||
|
||||
(define_insn "*mix2_l"
|
||||
(define_insn "mix2_l"
|
||||
[(set (match_operand:V4HI 0 "gr_register_operand" "=r")
|
||||
(vec_select:V4HI
|
||||
(vec_concat:V8HI
|
||||
|
@ -755,6 +950,40 @@
|
|||
}
|
||||
[(set_attr "itanium_class" "mmshf")])
|
||||
|
||||
(define_expand "vec_extract_evenodd_helper"
|
||||
[(set (match_operand:V4HI 0 "gr_register_operand" "")
|
||||
(vec_select:V4HI
|
||||
(match_operand:V4HI 1 "gr_register_operand" "")
|
||||
(parallel [(const_int 0)
|
||||
(const_int 2)
|
||||
(const_int 1)
|
||||
(const_int 3)])))]
|
||||
"")
|
||||
|
||||
(define_expand "vec_extract_evenv4hi"
|
||||
[(match_operand:V4HI 0 "gr_register_operand")
|
||||
(match_operand:V4HI 1 "gr_reg_or_0_operand")
|
||||
(match_operand:V4HI 2 "gr_reg_or_0_operand")]
|
||||
""
|
||||
{
|
||||
rtx temp = gen_reg_rtx (V4HImode);
|
||||
emit_insn (gen_mix2_r (temp, operands[1], operands[2]));
|
||||
emit_insn (gen_vec_extract_evenodd_helper (operands[0], temp));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_extract_oddv4hi"
|
||||
[(match_operand:V4HI 0 "gr_register_operand")
|
||||
(match_operand:V4HI 1 "gr_reg_or_0_operand")
|
||||
(match_operand:V4HI 2 "gr_reg_or_0_operand")]
|
||||
""
|
||||
{
|
||||
rtx temp = gen_reg_rtx (V4HImode);
|
||||
emit_insn (gen_mix2_l (temp, operands[1], operands[2]));
|
||||
emit_insn (gen_vec_extract_evenodd_helper (operands[0], temp));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_insn "*mux2_brcst_hi"
|
||||
[(set (match_operand:V4HI 0 "gr_register_operand" "=r")
|
||||
(vec_duplicate:V4HI
|
||||
|
@ -764,7 +993,7 @@
|
|||
[(set_attr "itanium_class" "mmshf")])
|
||||
|
||||
;; Note that mix4.r performs the exact same operation.
|
||||
(define_insn "*unpack4_l"
|
||||
(define_insn "vec_interleave_lowv2si"
|
||||
[(set (match_operand:V2SI 0 "gr_register_operand" "=r")
|
||||
(vec_select:V2SI
|
||||
(vec_concat:V4SI
|
||||
|
@ -777,7 +1006,7 @@
|
|||
[(set_attr "itanium_class" "mmshf")])
|
||||
|
||||
;; Note that mix4.l performs the exact same operation.
|
||||
(define_insn "*unpack4_h"
|
||||
(define_insn "vec_interleave_highv2si"
|
||||
[(set (match_operand:V2SI 0 "gr_register_operand" "=r")
|
||||
(vec_select:V2SI
|
||||
(vec_concat:V4SI
|
||||
|
@ -789,6 +1018,28 @@
|
|||
"unpack4.h %0 = %r2, %r1"
|
||||
[(set_attr "itanium_class" "mmshf")])
|
||||
|
||||
(define_expand "vec_extract_evenv2si"
|
||||
[(match_operand:V2SI 0 "gr_register_operand" "")
|
||||
(match_operand:V2SI 1 "gr_register_operand" "")
|
||||
(match_operand:V2SI 2 "gr_register_operand" "")]
|
||||
""
|
||||
{
|
||||
emit_insn (gen_vec_interleave_lowv2si (operands[0], operands[1],
|
||||
operands[2]));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_extract_oddv2si"
|
||||
[(match_operand:V2SI 0 "gr_register_operand" "")
|
||||
(match_operand:V2SI 1 "gr_register_operand" "")
|
||||
(match_operand:V2SI 2 "gr_register_operand" "")]
|
||||
""
|
||||
{
|
||||
emit_insn (gen_vec_interleave_highv2si (operands[0], operands[1],
|
||||
operands[2]));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_initv2si"
|
||||
[(match_operand:V2SI 0 "gr_register_operand" "")
|
||||
(match_operand 1 "" "")]
|
||||
|
@ -831,7 +1082,6 @@
|
|||
;; padd.uus
|
||||
;; pavg
|
||||
;; pavgsub
|
||||
;; pmpyshr, general form
|
||||
;; psad
|
||||
;; pshladd
|
||||
;; pshradd
|
||||
|
@ -1110,7 +1360,7 @@
|
|||
"fswap %0 = %F1, %F2"
|
||||
[(set_attr "itanium_class" "fmisc")])
|
||||
|
||||
(define_insn "*fmix_l"
|
||||
(define_insn "vec_interleave_highv2sf"
|
||||
[(set (match_operand:V2SF 0 "fr_register_operand" "=f")
|
||||
(vec_select:V2SF
|
||||
(vec_concat:V4SF
|
||||
|
@ -1121,7 +1371,7 @@
|
|||
"fmix.l %0 = %F2, %F1"
|
||||
[(set_attr "itanium_class" "fmisc")])
|
||||
|
||||
(define_insn "fmix_r"
|
||||
(define_insn "vec_interleave_lowv2sf"
|
||||
[(set (match_operand:V2SF 0 "fr_register_operand" "=f")
|
||||
(vec_select:V2SF
|
||||
(vec_concat:V4SF
|
||||
|
@ -1143,6 +1393,29 @@
|
|||
"fmix.lr %0 = %F2, %F1"
|
||||
[(set_attr "itanium_class" "fmisc")])
|
||||
|
||||
(define_expand "vec_extract_evenv2sf"
|
||||
[(match_operand:V2SF 0 "gr_register_operand" "")
|
||||
(match_operand:V2SF 1 "gr_register_operand" "")
|
||||
(match_operand:V2SF 2 "gr_register_operand" "")]
|
||||
""
|
||||
{
|
||||
emit_insn (gen_vec_interleave_lowv2sf (operands[0], operands[1],
|
||||
operands[2]));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_extract_oddv2sf"
|
||||
[(match_operand:V2SF 0 "gr_register_operand" "")
|
||||
(match_operand:V2SF 1 "gr_register_operand" "")
|
||||
(match_operand:V2SF 2 "gr_register_operand" "")]
|
||||
""
|
||||
{
|
||||
emit_insn (gen_vec_interleave_highv2sf (operands[0], operands[1],
|
||||
operands[2]));
|
||||
DONE;
|
||||
})
|
||||
|
||||
|
||||
(define_expand "vec_setv2sf"
|
||||
[(match_operand:V2SF 0 "fr_register_operand" "")
|
||||
(match_operand:SF 1 "fr_register_operand" "")
|
||||
|
@ -1158,7 +1431,7 @@
|
|||
emit_insn (gen_fmix_lr (operands[0], tmp, operands[0]));
|
||||
break;
|
||||
case 1:
|
||||
emit_insn (gen_fmix_r (operands[0], operands[0], tmp));
|
||||
emit_insn (gen_vec_interleave_lowv2sf (operands[0], operands[0], tmp));
|
||||
break;
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
|
@ -1227,6 +1500,66 @@
|
|||
""
|
||||
"")
|
||||
|
||||
(define_expand "vec_unpacku_lo_<mode>"
|
||||
[(match_operand:<vecwider> 0 "register_operand" "")
|
||||
(match_operand:VECINT12 1 "register_operand" "")]
|
||||
""
|
||||
{
|
||||
ia64_expand_unpack (operands, true, false);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_unpacku_hi_<mode>"
|
||||
[(match_operand:<vecwider> 0 "register_operand" "")
|
||||
(match_operand:VECINT12 1 "register_operand" "")]
|
||||
""
|
||||
{
|
||||
ia64_expand_unpack (operands, true, true);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_unpacks_lo_<mode>"
|
||||
[(match_operand:<vecwider> 0 "register_operand" "")
|
||||
(match_operand:VECINT12 1 "register_operand" "")]
|
||||
""
|
||||
{
|
||||
ia64_expand_unpack (operands, false, false);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_unpacks_hi_<mode>"
|
||||
[(match_operand:<vecwider> 0 "register_operand" "")
|
||||
(match_operand:VECINT12 1 "register_operand" "")]
|
||||
""
|
||||
{
|
||||
ia64_expand_unpack (operands, false, true);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_pack_trunc_v4hi"
|
||||
[(match_operand:V8QI 0 "gr_register_operand" "")
|
||||
(match_operand:V4HI 1 "gr_register_operand" "")
|
||||
(match_operand:V4HI 2 "gr_register_operand" "")]
|
||||
""
|
||||
{
|
||||
rtx op1 = gen_lowpart(V8QImode, operands[1]);
|
||||
rtx op2 = gen_lowpart(V8QImode, operands[2]);
|
||||
emit_insn (gen_vec_extract_evenv8qi (operands[0], op1, op2));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_pack_trunc_v2si"
|
||||
[(match_operand:V4HI 0 "gr_register_operand" "")
|
||||
(match_operand:V2SI 1 "gr_register_operand" "")
|
||||
(match_operand:V2SI 2 "gr_register_operand" "")]
|
||||
""
|
||||
{
|
||||
rtx op1 = gen_lowpart(V4HImode, operands[1]);
|
||||
rtx op2 = gen_lowpart(V4HImode, operands[2]);
|
||||
emit_insn (gen_vec_extract_evenv4hi (operands[0], op1, op2));
|
||||
DONE;
|
||||
})
|
||||
|
||||
;; Missing operations
|
||||
;; fprcpa
|
||||
;; fpsqrta
|
||||
|
|
|
@ -1,3 +1,10 @@
|
|||
2010-11-24 Richard Henderson <rth@redhat.com>
|
||||
|
||||
* lib/target-supports.exp (vect_widen_sum_hi_to_si_pattern,
|
||||
vect_widen_mult_hi_to_si, vect_sdot_qi, vect_udot_qi, vect_sdot_hi,
|
||||
vect_unpack, vect_int_mult, vect_extract_even_odd,
|
||||
vect_extract_even_odd_wide, vect_interleave): Enable for ia64.
|
||||
|
||||
2010-11-24 H.J. Lu <hongjiu.lu@intel.com>
|
||||
|
||||
PR target/46519
|
||||
|
|
|
@ -2518,7 +2518,8 @@ proc check_effective_target_vect_widen_sum_hi_to_si_pattern { } {
|
|||
verbose "check_effective_target_vect_widen_sum_hi_to_si_pattern: using cached result" 2
|
||||
} else {
|
||||
set et_vect_widen_sum_hi_to_si_pattern_saved 0
|
||||
if { [istarget powerpc*-*-*] } {
|
||||
if { [istarget powerpc*-*-*]
|
||||
|| [istarget ia64-*-*] } {
|
||||
set et_vect_widen_sum_hi_to_si_pattern_saved 1
|
||||
}
|
||||
}
|
||||
|
@ -2644,6 +2645,7 @@ proc check_effective_target_vect_widen_mult_hi_to_si { } {
|
|||
}
|
||||
if { [istarget powerpc*-*-*]
|
||||
|| [istarget spu-*-*]
|
||||
|| [istarget ia64-*-*]
|
||||
|| [istarget i?86-*-*]
|
||||
|| [istarget x86_64-*-*] } {
|
||||
set et_vect_widen_mult_hi_to_si_saved 1
|
||||
|
@ -2665,6 +2667,9 @@ proc check_effective_target_vect_sdot_qi { } {
|
|||
verbose "check_effective_target_vect_sdot_qi: using cached result" 2
|
||||
} else {
|
||||
set et_vect_sdot_qi_saved 0
|
||||
if { [istarget ia64-*-*] } {
|
||||
set et_vect_udot_qi_saved 1
|
||||
}
|
||||
}
|
||||
verbose "check_effective_target_vect_sdot_qi: returning $et_vect_sdot_qi_saved" 2
|
||||
return $et_vect_sdot_qi_saved
|
||||
|
@ -2682,7 +2687,8 @@ proc check_effective_target_vect_udot_qi { } {
|
|||
verbose "check_effective_target_vect_udot_qi: using cached result" 2
|
||||
} else {
|
||||
set et_vect_udot_qi_saved 0
|
||||
if { [istarget powerpc*-*-*] } {
|
||||
if { [istarget powerpc*-*-*]
|
||||
|| [istarget ia64-*-*] } {
|
||||
set et_vect_udot_qi_saved 1
|
||||
}
|
||||
}
|
||||
|
@ -2703,6 +2709,7 @@ proc check_effective_target_vect_sdot_hi { } {
|
|||
} else {
|
||||
set et_vect_sdot_hi_saved 0
|
||||
if { ([istarget powerpc*-*-*] && ![istarget powerpc-*-linux*paired*])
|
||||
|| [istarget ia64-*-*]
|
||||
|| [istarget i?86-*-*]
|
||||
|| [istarget x86_64-*-*] } {
|
||||
set et_vect_sdot_hi_saved 1
|
||||
|
@ -2774,6 +2781,7 @@ proc check_effective_target_vect_unpack { } {
|
|||
|| [istarget i?86-*-*]
|
||||
|| [istarget x86_64-*-*]
|
||||
|| [istarget spu-*-*]
|
||||
|| [istarget ia64-*-*]
|
||||
|| ([istarget arm*-*-*] && [check_effective_target_arm_neon]) } {
|
||||
set et_vect_unpack_saved 1
|
||||
}
|
||||
|
@ -3050,6 +3058,7 @@ proc check_effective_target_vect_int_mult { } {
|
|||
|| [istarget spu-*-*]
|
||||
|| [istarget i?86-*-*]
|
||||
|| [istarget x86_64-*-*]
|
||||
|| [istarget ia64-*-*]
|
||||
|| [check_effective_target_arm32] } {
|
||||
set et_vect_int_mult_saved 1
|
||||
}
|
||||
|
@ -3071,6 +3080,7 @@ proc check_effective_target_vect_extract_even_odd { } {
|
|||
if { [istarget powerpc*-*-*]
|
||||
|| [istarget i?86-*-*]
|
||||
|| [istarget x86_64-*-*]
|
||||
|| [istarget ia64-*-*]
|
||||
|| [istarget spu-*-*] } {
|
||||
set et_vect_extract_even_odd_saved 1
|
||||
}
|
||||
|
@ -3093,6 +3103,7 @@ proc check_effective_target_vect_extract_even_odd_wide { } {
|
|||
if { [istarget powerpc*-*-*]
|
||||
|| [istarget i?86-*-*]
|
||||
|| [istarget x86_64-*-*]
|
||||
|| [istarget ia64-*-*]
|
||||
|| [istarget spu-*-*] } {
|
||||
set et_vect_extract_even_odd_wide_saved 1
|
||||
}
|
||||
|
@ -3114,6 +3125,7 @@ proc check_effective_target_vect_interleave { } {
|
|||
if { [istarget powerpc*-*-*]
|
||||
|| [istarget i?86-*-*]
|
||||
|| [istarget x86_64-*-*]
|
||||
|| [istarget ia64-*-*]
|
||||
|| [istarget spu-*-*] } {
|
||||
set et_vect_interleave_saved 1
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue