From 604e3ff316fdb7298f3a54912fb6adc2268765f8 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 24 Nov 2010 17:16:20 -0800 Subject: [PATCH] predicates.md (pmpyshr_operand): New. * config/ia64/predicates.md (pmpyshr_operand): New. * config/ia64/ia64.c (ia64_expand_unpack): New. (ia64_expand_widen_mul_v4hi): New. (ia64_expand_widen_sum): Update for pattern renames. (ia64_expand_dot_prod_v8qi): Likewise. * config/ia64/ia64-protos.h: Update. * config/ia64/vect.md (vecwider): New mode attribute. (vec_widen_umult_lo_v8qi, vec_widen_umult_hi_v8qi): New. (vec_widen_smult_lo_v8qi, vec_widen_smult_hi_v8qi): New. (pmpyshr2, pmpyshr2_u): New. (vec_widen_smult_lo_v4hi, vec_widen_smult_hi_v4hi): New. (vec_widen_umult_lo_v4hi, vec_widen_umult_hi_v4hi): New. (mulv2si3): New. (vec_pack_ssat_v4hi): Rename from pack2_sss. (vec_pack_usat_v4hi): Rename from *pack2_uss. (vec_pack_ssat_v2si): Rename from pack4_sss. (vec_interleave_lowv8qi): Rename from unpack1_l, use the correct vec_select operation. (vec_interleave_highv8qi): Similarly. (mux1_alt): Rename from *mux1_alt. (vec_extract_evenv8qi, vec_extract_oddv8qi): New. (vec_interleave_lowv4hi): Rename from unpack2_l. (vec_interleave_highv4hi): Rename from unpack2_h. (mix2_r): Rename from *mix2_r. (mix2_l): Similarly. (vec_extract_evenodd_helper): New. (vec_extract_evenv4hi, vec_extract_oddv4hi): New. (vec_interleave_lowv2si): Rename from *unpack4_l. (vec_interleave_highv2si): Rename from *unpack4_h. (vec_extract_evenv2si, vec_extract_oddv2si): New. (vec_interleave_lowv2sf): Rename from fmix_r. (vec_interleave_highv2sf): Rename from *fmix_l. (vec_extract_evenv2sf, vec_extract_oddv2sf): New. (vec_unpacku_lo_, vec_unpacku_hi_): New. (vec_unpacks_lo_, vec_unpacks_hi_): New. (vec_pack_trunc_v4hi, vec_pack_trunc_v2si): New. testsuite: * lib/target-supports.exp (vect_widen_sum_hi_to_si_pattern, vect_widen_mult_hi_to_si, vect_sdot_qi, vect_udot_qi, vect_sdot_hi, vect_unpack, vect_int_mult, vect_extract_even_odd, vect_extract_even_odd_wide, vect_interleave): Enable for ia64. From-SVN: r167136 --- gcc/ChangeLog | 39 +++ gcc/config/ia64/ia64-protos.h | 2 + gcc/config/ia64/ia64.c | 79 ++++- gcc/config/ia64/predicates.md | 6 + gcc/config/ia64/vect.md | 397 +++++++++++++++++++++++--- gcc/testsuite/ChangeLog | 7 + gcc/testsuite/lib/target-supports.exp | 16 +- 7 files changed, 504 insertions(+), 42 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index a637c6c9336..275e141e514 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,42 @@ +2010-11-24 Richard Henderson + + * config/ia64/predicates.md (pmpyshr_operand): New. + * config/ia64/ia64.c (ia64_expand_unpack): New. + (ia64_expand_widen_mul_v4hi): New. + (ia64_expand_widen_sum): Update for pattern renames. + (ia64_expand_dot_prod_v8qi): Likewise. + * config/ia64/ia64-protos.h: Update. + * config/ia64/vect.md (vecwider): New mode attribute. + (vec_widen_umult_lo_v8qi, vec_widen_umult_hi_v8qi): New. + (vec_widen_smult_lo_v8qi, vec_widen_smult_hi_v8qi): New. + (pmpyshr2, pmpyshr2_u): New. + (vec_widen_smult_lo_v4hi, vec_widen_smult_hi_v4hi): New. + (vec_widen_umult_lo_v4hi, vec_widen_umult_hi_v4hi): New. + (mulv2si3): New. + (vec_pack_ssat_v4hi): Rename from pack2_sss. + (vec_pack_usat_v4hi): Rename from *pack2_uss. + (vec_pack_ssat_v2si): Rename from pack4_sss. + (vec_interleave_lowv8qi): Rename from unpack1_l, use the correct + vec_select operation. + (vec_interleave_highv8qi): Similarly. + (mux1_alt): Rename from *mux1_alt. + (vec_extract_evenv8qi, vec_extract_oddv8qi): New. + (vec_interleave_lowv4hi): Rename from unpack2_l. + (vec_interleave_highv4hi): Rename from unpack2_h. + (mix2_r): Rename from *mix2_r. + (mix2_l): Similarly. + (vec_extract_evenodd_helper): New. + (vec_extract_evenv4hi, vec_extract_oddv4hi): New. + (vec_interleave_lowv2si): Rename from *unpack4_l. + (vec_interleave_highv2si): Rename from *unpack4_h. + (vec_extract_evenv2si, vec_extract_oddv2si): New. + (vec_interleave_lowv2sf): Rename from fmix_r. + (vec_interleave_highv2sf): Rename from *fmix_l. + (vec_extract_evenv2sf, vec_extract_oddv2sf): New. + (vec_unpacku_lo_, vec_unpacku_hi_): New. + (vec_unpacks_lo_, vec_unpacks_hi_): New. + (vec_pack_trunc_v4hi, vec_pack_trunc_v2si): New. + 2010-11-24 Nathan Froyd * targhooks.c (default_except_unwind_info): Remove diff --git a/gcc/config/ia64/ia64-protos.h b/gcc/config/ia64/ia64-protos.h index b841152586a..87e04a23d76 100644 --- a/gcc/config/ia64/ia64-protos.h +++ b/gcc/config/ia64/ia64-protos.h @@ -39,7 +39,9 @@ extern bool ia64_expand_movxf_movrf (enum machine_mode, rtx[]); extern void ia64_expand_compare (rtx *, rtx *, rtx *); extern void ia64_expand_vecint_cmov (rtx[]); extern bool ia64_expand_vecint_minmax (enum rtx_code, enum machine_mode, rtx[]); +extern void ia64_expand_unpack (rtx [], bool, bool); extern void ia64_expand_widen_sum (rtx[], bool); +extern void ia64_expand_widen_mul_v4hi (rtx [], bool, bool); extern void ia64_expand_dot_prod_v8qi (rtx[], bool); extern void ia64_expand_call (rtx, rtx, rtx, int); extern void ia64_split_call (rtx, rtx, rtx, rtx, rtx, int, int); diff --git a/gcc/config/ia64/ia64.c b/gcc/config/ia64/ia64.c index a007743e2e5..bd42f2b6328 100644 --- a/gcc/config/ia64/ia64.c +++ b/gcc/config/ia64/ia64.c @@ -1972,6 +1972,44 @@ ia64_expand_vecint_minmax (enum rtx_code code, enum machine_mode mode, return true; } +/* Emit an integral vector unpack operation. */ + +void +ia64_expand_unpack (rtx operands[3], bool unsignedp, bool highp) +{ + enum machine_mode mode = GET_MODE (operands[1]); + rtx (*gen) (rtx, rtx, rtx); + rtx x; + + switch (mode) + { + case V8QImode: + gen = highp ? gen_vec_interleave_highv8qi : gen_vec_interleave_lowv8qi; + break; + case V4HImode: + gen = highp ? gen_vec_interleave_highv4hi : gen_vec_interleave_lowv4hi; + break; + default: + gcc_unreachable (); + } + + /* Fill in x with the sign extension of each element in op1. */ + if (unsignedp) + x = CONST0_RTX (mode); + else + { + bool neg; + + x = gen_reg_rtx (mode); + + neg = ia64_expand_vecint_compare (LT, mode, x, operands[1], + CONST0_RTX (mode)); + gcc_assert (!neg); + } + + emit_insn (gen (gen_lowpart (mode, operands[0]), operands[1], x)); +} + /* Emit an integral vector widening sum operations. */ void @@ -1989,13 +2027,13 @@ ia64_expand_widen_sum (rtx operands[3], bool unsignedp) switch (mode) { case V8QImode: - unpack_l = gen_unpack1_l; - unpack_h = gen_unpack1_h; + unpack_l = gen_vec_interleave_lowv8qi; + unpack_h = gen_vec_interleave_highv8qi; plus = gen_addv4hi3; break; case V4HImode: - unpack_l = gen_unpack2_l; - unpack_h = gen_unpack2_h; + unpack_l = gen_vec_interleave_lowv4hi; + unpack_h = gen_vec_interleave_highv4hi; plus = gen_addv2si3; break; default: @@ -2026,6 +2064,27 @@ ia64_expand_widen_sum (rtx operands[3], bool unsignedp) emit_insn (plus (operands[0], h, s)); } +void +ia64_expand_widen_mul_v4hi (rtx operands[3], bool unsignedp, bool highp) +{ + rtx l = gen_reg_rtx (V4HImode); + rtx h = gen_reg_rtx (V4HImode); + rtx (*mulhigh)(rtx, rtx, rtx, rtx); + rtx (*interl)(rtx, rtx, rtx); + + emit_insn (gen_mulv4hi3 (l, operands[1], operands[2])); + + /* For signed, pmpy2.r would appear to more closely match this operation. + However, the vectorizer is more likely to use the LO and HI patterns + in pairs. At which point, with this formulation, the first two insns + of each can be CSEd. */ + mulhigh = unsignedp ? gen_pmpyshr2_u : gen_pmpyshr2; + emit_insn (mulhigh (h, operands[1], operands[2], GEN_INT (16))); + + interl = highp ? gen_vec_interleave_highv4hi : gen_vec_interleave_lowv4hi; + emit_insn (interl (gen_lowpart (V4HImode, operands[0]), l, h)); +} + /* Emit a signed or unsigned V8QI dot product operation. */ void @@ -2056,10 +2115,14 @@ ia64_expand_dot_prod_v8qi (rtx operands[4], bool unsignedp) h1 = gen_reg_rtx (V4HImode); h2 = gen_reg_rtx (V4HImode); - emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l1), operands[1], x1)); - emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l2), operands[2], x2)); - emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h1), operands[1], x1)); - emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h2), operands[2], x2)); + emit_insn (gen_vec_interleave_lowv8qi + (gen_lowpart (V8QImode, l1), operands[1], x1)); + emit_insn (gen_vec_interleave_lowv8qi + (gen_lowpart (V8QImode, l2), operands[2], x2)); + emit_insn (gen_vec_interleave_highv8qi + (gen_lowpart (V8QImode, h1), operands[1], x1)); + emit_insn (gen_vec_interleave_highv8qi + (gen_lowpart (V8QImode, h2), operands[2], x2)); p1 = gen_reg_rtx (V2SImode); p2 = gen_reg_rtx (V2SImode); diff --git a/gcc/config/ia64/predicates.md b/gcc/config/ia64/predicates.md index af24827a8b5..55bb3f58982 100644 --- a/gcc/config/ia64/predicates.md +++ b/gcc/config/ia64/predicates.md @@ -526,6 +526,12 @@ INTVAL (op) == 1 || INTVAL (op) == 4 || INTVAL (op) == 8 || INTVAL (op) == 16"))) +;; True if OP is one of the immediate values 0, 7, 15, 16 +(define_predicate "pmpyshr_operand" + (and (match_code "const_int") + (match_test "INTVAL (op) == 0 || INTVAL (op) == 7 + || INTVAL (op) == 15 || INTVAL (op) == 16"))) + ;; True if OP is 0..3. (define_predicate "const_int_2bit_operand" (and (match_code "const_int") diff --git a/gcc/config/ia64/vect.md b/gcc/config/ia64/vect.md index 6ab1002efdd..9adff69f3c8 100644 --- a/gcc/config/ia64/vect.md +++ b/gcc/config/ia64/vect.md @@ -24,6 +24,7 @@ (define_mode_iterator VECINT12 [V8QI V4HI]) (define_mode_iterator VECINT24 [V4HI V2SI]) (define_mode_attr vecsize [(V8QI "1") (V4HI "2") (V2SI "4")]) +(define_mode_attr vecwider [(V8QI "V4HI") (V4HI "V2SI")]) (define_expand "mov" [(set (match_operand:VECINT 0 "general_operand" "") @@ -203,6 +204,62 @@ DONE; }) +(define_expand "vec_widen_umult_lo_v8qi" + [(match_operand:V4HI 0 "gr_register_operand" "") + (match_operand:V8QI 1 "gr_register_operand" "") + (match_operand:V8QI 2 "gr_register_operand" "")] + "" +{ + rtx op1 = gen_reg_rtx (V4HImode); + rtx op2 = gen_reg_rtx (V4HImode); + emit_insn (gen_vec_unpacku_lo_v8qi (op1, operands[1])); + emit_insn (gen_vec_unpacku_lo_v8qi (op2, operands[2])); + emit_insn (gen_mulv4hi3 (operands[0], op1, op2)); + DONE; +}); + +(define_expand "vec_widen_umult_hi_v8qi" + [(match_operand:V4HI 0 "gr_register_operand" "") + (match_operand:V8QI 1 "gr_register_operand" "") + (match_operand:V8QI 2 "gr_register_operand" "")] + "" +{ + rtx op1 = gen_reg_rtx (V4HImode); + rtx op2 = gen_reg_rtx (V4HImode); + emit_insn (gen_vec_unpacku_hi_v8qi (op1, operands[1])); + emit_insn (gen_vec_unpacku_hi_v8qi (op2, operands[2])); + emit_insn (gen_mulv4hi3 (operands[0], op1, op2)); + DONE; +}); + +(define_expand "vec_widen_smult_lo_v8qi" + [(match_operand:V4HI 0 "gr_register_operand" "") + (match_operand:V8QI 1 "gr_register_operand" "") + (match_operand:V8QI 2 "gr_register_operand" "")] + "" +{ + rtx op1 = gen_reg_rtx (V4HImode); + rtx op2 = gen_reg_rtx (V4HImode); + emit_insn (gen_vec_unpacks_lo_v8qi (op1, operands[1])); + emit_insn (gen_vec_unpacks_lo_v8qi (op2, operands[2])); + emit_insn (gen_mulv4hi3 (operands[0], op1, op2)); + DONE; +}); + +(define_expand "vec_widen_smult_hi_v8qi" + [(match_operand:V4HI 0 "gr_register_operand" "") + (match_operand:V8QI 1 "gr_register_operand" "") + (match_operand:V8QI 2 "gr_register_operand" "")] + "" +{ + rtx op1 = gen_reg_rtx (V4HImode); + rtx op2 = gen_reg_rtx (V4HImode); + emit_insn (gen_vec_unpacks_hi_v8qi (op1, operands[1])); + emit_insn (gen_vec_unpacks_hi_v8qi (op2, operands[2])); + emit_insn (gen_mulv4hi3 (operands[0], op1, op2)); + DONE; +}); + (define_insn "mulv4hi3" [(set (match_operand:V4HI 0 "gr_register_operand" "=r") (mult:V4HI (match_operand:V4HI 1 "gr_register_operand" "r") @@ -211,6 +268,34 @@ "pmpyshr2 %0 = %1, %2, 0" [(set_attr "itanium_class" "mmmul")]) +(define_insn "pmpyshr2" + [(set (match_operand:V4HI 0 "gr_register_operand" "=r") + (truncate:V4HI + (ashiftrt:V4SI + (mult:V4SI + (sign_extend:V4SI + (match_operand:V4HI 1 "gr_register_operand" "r")) + (sign_extend:V4SI + (match_operand:V4HI 2 "gr_register_operand" "r"))) + (match_operand:SI 3 "pmpyshr_operand" "n"))))] + "" + "pmpyshr2 %0 = %1, %2, %3" + [(set_attr "itanium_class" "mmmul")]) + +(define_insn "pmpyshr2_u" + [(set (match_operand:V4HI 0 "gr_register_operand" "=r") + (truncate:V4HI + (lshiftrt:V4SI + (mult:V4SI + (zero_extend:V4SI + (match_operand:V4HI 1 "gr_register_operand" "r")) + (zero_extend:V4SI + (match_operand:V4HI 2 "gr_register_operand" "r"))) + (match_operand:SI 3 "pmpyshr_operand" "n"))))] + "" + "pmpyshr2.u %0 = %1, %2, %3" + [(set_attr "itanium_class" "mmmul")]) + (define_insn "pmpy2_r" [(set (match_operand:V2SI 0 "gr_register_operand" "=r") (mult:V2SI @@ -241,6 +326,100 @@ "pmpy2.l %0 = %1, %2" [(set_attr "itanium_class" "mmshf")]) +(define_expand "vec_widen_smult_lo_v4hi" + [(match_operand:V2SI 0 "gr_register_operand" "") + (match_operand:V4HI 1 "gr_register_operand" "") + (match_operand:V4HI 2 "gr_register_operand" "")] + "" +{ + ia64_expand_widen_mul_v4hi (operands, false, false); + DONE; +}) + +(define_expand "vec_widen_smult_hi_v4hi" + [(match_operand:V2SI 0 "gr_register_operand" "") + (match_operand:V4HI 1 "gr_register_operand" "") + (match_operand:V4HI 2 "gr_register_operand" "")] + "" +{ + ia64_expand_widen_mul_v4hi (operands, false, true); + DONE; +}) + +(define_expand "vec_widen_umult_lo_v4hi" + [(match_operand:V2SI 0 "gr_register_operand" "") + (match_operand:V4HI 1 "gr_register_operand" "") + (match_operand:V4HI 2 "gr_register_operand" "")] + "" +{ + ia64_expand_widen_mul_v4hi (operands, true, false); + DONE; +}) + +(define_expand "vec_widen_umult_hi_v4hi" + [(match_operand:V2SI 0 "gr_register_operand" "") + (match_operand:V4HI 1 "gr_register_operand" "") + (match_operand:V4HI 2 "gr_register_operand" "")] + "" +{ + ia64_expand_widen_mul_v4hi (operands, true, true); + DONE; +}) + +(define_expand "mulv2si3" + [(set (match_operand:V2SI 0 "gr_register_operand" "") + (mult:V2SI (match_operand:V2SI 1 "gr_register_operand" "r") + (match_operand:V2SI 2 "gr_register_operand" "r")))] + "" +{ + rtx t0, t1, t2, t3, t4, t5, t6, t7, x; + rtx op1h = gen_lowpart (V4HImode, operands[1]); + rtx op2h = gen_lowpart (V4HImode, operands[2]); + + t0 = gen_reg_rtx (V4HImode); + t1 = gen_reg_rtx (V4HImode); + t2 = gen_reg_rtx (V4HImode); + t3 = gen_reg_rtx (V4HImode); + t4 = gen_reg_rtx (V2SImode); + t5 = gen_reg_rtx (V2SImode); + t6 = gen_reg_rtx (V2SImode); + t7 = gen_reg_rtx (V2SImode); + + /* Consider the HImode components of op1 = DCBA, op2 = ZYXW. + Consider .l and .h suffixes below the low and high 16 bits + of the full 32-bit product. */ + + /* T0 = CDBA. */ + x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (4, const1_rtx, const0_rtx, + GEN_INT (3), const2_rtx)); + x = gen_rtx_VEC_SELECT (V4HImode, op1h, x); + emit_insn (gen_rtx_SET (VOIDmode, t0, x)); + + /* T1 = DZ.l, CY.l, BX.l, AW.l. */ + emit_insn (gen_mulv4hi3 (t1, op1h, op2h)); + + /* T2 = DZ.h, CY.h, BX.h, AW.h. */ + emit_insn (gen_pmpyshr2_u (t2, op1h, op2h, GEN_INT (16))); + + /* T3 = CZ.l, DY.l, AX.l, BW.l. */ + emit_insn (gen_mulv4hi3 (t3, t0, op2h)); + + /* T4 = CY.h, CY.l, AW.h, AW.l = CY, AW. */ + emit_insn (gen_mix2_r (gen_lowpart (V4HImode, t4), t1, t2)); + + /* T5 = CZ.l, 0, AX.l, 0 = CZ << 16, AX << 16. */ + emit_insn (gen_mix2_l (gen_lowpart (V4HImode, t5), + CONST0_RTX (V4HImode), t3)); + + /* T6 = DY.l, 0, BW.l, 0 = DY << 16, BW << 16. */ + emit_insn (gen_mix2_r (gen_lowpart (V4HImode, t6), + CONST0_RTX (V4HImode), t3)); + + emit_insn (gen_addv2si3 (t7, t4, t5)); + emit_insn (gen_addv2si3 (operands[0], t6, t7)); + DONE; +}) + (define_expand "umax3" [(set (match_operand:VECINT 0 "gr_register_operand" "") (umax:VECINT (match_operand:VECINT 1 "gr_register_operand" "") @@ -486,7 +665,7 @@ "pcmp.gt %0 = %r1, %r2" [(set_attr "itanium_class" "mmalua")]) -(define_insn "pack2_sss" +(define_insn "vec_pack_ssat_v4hi" [(set (match_operand:V8QI 0 "gr_register_operand" "=r") (vec_concat:V8QI (ss_truncate:V4QI @@ -497,7 +676,7 @@ "pack2.sss %0 = %r1, %r2" [(set_attr "itanium_class" "mmshf")]) -(define_insn "*pack2_uss" +(define_insn "vec_pack_usat_v4hi" [(set (match_operand:V8QI 0 "gr_register_operand" "=r") (vec_concat:V8QI (us_truncate:V4QI @@ -508,7 +687,7 @@ "pack2.uss %0 = %r1, %r2" [(set_attr "itanium_class" "mmshf")]) -(define_insn "pack4_sss" +(define_insn "vec_pack_ssat_v2si" [(set (match_operand:V4HI 0 "gr_register_operand" "=r") (vec_concat:V4HI (ss_truncate:V2HI @@ -519,38 +698,30 @@ "pack4.sss %0 = %r1, %r2" [(set_attr "itanium_class" "mmshf")]) -(define_insn "unpack1_l" +(define_insn "vec_interleave_lowv8qi" [(set (match_operand:V8QI 0 "gr_register_operand" "=r") (vec_select:V8QI (vec_concat:V16QI (match_operand:V8QI 1 "gr_reg_or_0_operand" "rU") (match_operand:V8QI 2 "gr_reg_or_0_operand" "rU")) - (parallel [(const_int 0) - (const_int 1) - (const_int 2) - (const_int 3) - (const_int 8) - (const_int 9) - (const_int 10) - (const_int 11)])))] + (parallel [(const_int 0) (const_int 8) + (const_int 1) (const_int 9) + (const_int 2) (const_int 10) + (const_int 3) (const_int 11)])))] "" "unpack1.l %0 = %r2, %r1" [(set_attr "itanium_class" "mmshf")]) -(define_insn "unpack1_h" +(define_insn "vec_interleave_highv8qi" [(set (match_operand:V8QI 0 "gr_register_operand" "=r") (vec_select:V8QI (vec_concat:V16QI (match_operand:V8QI 1 "gr_reg_or_0_operand" "rU") (match_operand:V8QI 2 "gr_reg_or_0_operand" "rU")) - (parallel [(const_int 4) - (const_int 5) - (const_int 6) - (const_int 7) - (const_int 12) - (const_int 13) - (const_int 14) - (const_int 15)])))] + (parallel [(const_int 4) (const_int 12) + (const_int 5) (const_int 13) + (const_int 6) (const_int 14) + (const_int 7) (const_int 15)])))] "" "unpack1.h %0 = %r2, %r1" [(set_attr "itanium_class" "mmshf")]) @@ -639,7 +810,7 @@ "mux1 %0 = %1, @shuf" [(set_attr "itanium_class" "mmshf")]) -(define_insn "*mux1_alt" +(define_insn "mux1_alt" [(set (match_operand:V8QI 0 "gr_register_operand" "=r") (vec_select:V8QI (match_operand:V8QI 1 "gr_register_operand" "r") @@ -679,7 +850,31 @@ "mux1 %0 = %1, @brcst" [(set_attr "itanium_class" "mmshf")]) -(define_insn "unpack2_l" +(define_expand "vec_extract_evenv8qi" + [(match_operand:V8QI 0 "gr_register_operand" "") + (match_operand:V8QI 1 "gr_register_operand" "") + (match_operand:V8QI 2 "gr_register_operand" "")] + "" +{ + rtx temp = gen_reg_rtx (V8QImode); + emit_insn (gen_mix1_r (temp, operands[1], operands[2])); + emit_insn (gen_mux1_alt (operands[0], temp)); + DONE; +}) + +(define_expand "vec_extract_oddv8qi" + [(match_operand:V8QI 0 "gr_register_operand" "") + (match_operand:V8QI 1 "gr_register_operand" "") + (match_operand:V8QI 2 "gr_register_operand" "")] + "" +{ + rtx temp = gen_reg_rtx (V8QImode); + emit_insn (gen_mix1_l (temp, operands[1], operands[2])); + emit_insn (gen_mux1_alt (operands[0], temp)); + DONE; +}) + +(define_insn "vec_interleave_lowv4hi" [(set (match_operand:V4HI 0 "gr_register_operand" "=r") (vec_select:V4HI (vec_concat:V8HI @@ -693,7 +888,7 @@ "unpack2.l %0 = %r2, %r1" [(set_attr "itanium_class" "mmshf")]) -(define_insn "unpack2_h" +(define_insn "vec_interleave_highv4hi" [(set (match_operand:V4HI 0 "gr_register_operand" "=r") (vec_select:V4HI (vec_concat:V8HI @@ -707,7 +902,7 @@ "unpack2.h %0 = %r2, %r1" [(set_attr "itanium_class" "mmshf")]) -(define_insn "*mix2_r" +(define_insn "mix2_r" [(set (match_operand:V4HI 0 "gr_register_operand" "=r") (vec_select:V4HI (vec_concat:V8HI @@ -721,7 +916,7 @@ "mix2.r %0 = %r2, %r1" [(set_attr "itanium_class" "mmshf")]) -(define_insn "*mix2_l" +(define_insn "mix2_l" [(set (match_operand:V4HI 0 "gr_register_operand" "=r") (vec_select:V4HI (vec_concat:V8HI @@ -755,6 +950,40 @@ } [(set_attr "itanium_class" "mmshf")]) +(define_expand "vec_extract_evenodd_helper" + [(set (match_operand:V4HI 0 "gr_register_operand" "") + (vec_select:V4HI + (match_operand:V4HI 1 "gr_register_operand" "") + (parallel [(const_int 0) + (const_int 2) + (const_int 1) + (const_int 3)])))] + "") + +(define_expand "vec_extract_evenv4hi" + [(match_operand:V4HI 0 "gr_register_operand") + (match_operand:V4HI 1 "gr_reg_or_0_operand") + (match_operand:V4HI 2 "gr_reg_or_0_operand")] + "" +{ + rtx temp = gen_reg_rtx (V4HImode); + emit_insn (gen_mix2_r (temp, operands[1], operands[2])); + emit_insn (gen_vec_extract_evenodd_helper (operands[0], temp)); + DONE; +}) + +(define_expand "vec_extract_oddv4hi" + [(match_operand:V4HI 0 "gr_register_operand") + (match_operand:V4HI 1 "gr_reg_or_0_operand") + (match_operand:V4HI 2 "gr_reg_or_0_operand")] + "" +{ + rtx temp = gen_reg_rtx (V4HImode); + emit_insn (gen_mix2_l (temp, operands[1], operands[2])); + emit_insn (gen_vec_extract_evenodd_helper (operands[0], temp)); + DONE; +}) + (define_insn "*mux2_brcst_hi" [(set (match_operand:V4HI 0 "gr_register_operand" "=r") (vec_duplicate:V4HI @@ -764,7 +993,7 @@ [(set_attr "itanium_class" "mmshf")]) ;; Note that mix4.r performs the exact same operation. -(define_insn "*unpack4_l" +(define_insn "vec_interleave_lowv2si" [(set (match_operand:V2SI 0 "gr_register_operand" "=r") (vec_select:V2SI (vec_concat:V4SI @@ -777,7 +1006,7 @@ [(set_attr "itanium_class" "mmshf")]) ;; Note that mix4.l performs the exact same operation. -(define_insn "*unpack4_h" +(define_insn "vec_interleave_highv2si" [(set (match_operand:V2SI 0 "gr_register_operand" "=r") (vec_select:V2SI (vec_concat:V4SI @@ -789,6 +1018,28 @@ "unpack4.h %0 = %r2, %r1" [(set_attr "itanium_class" "mmshf")]) +(define_expand "vec_extract_evenv2si" + [(match_operand:V2SI 0 "gr_register_operand" "") + (match_operand:V2SI 1 "gr_register_operand" "") + (match_operand:V2SI 2 "gr_register_operand" "")] + "" +{ + emit_insn (gen_vec_interleave_lowv2si (operands[0], operands[1], + operands[2])); + DONE; +}) + +(define_expand "vec_extract_oddv2si" + [(match_operand:V2SI 0 "gr_register_operand" "") + (match_operand:V2SI 1 "gr_register_operand" "") + (match_operand:V2SI 2 "gr_register_operand" "")] + "" +{ + emit_insn (gen_vec_interleave_highv2si (operands[0], operands[1], + operands[2])); + DONE; +}) + (define_expand "vec_initv2si" [(match_operand:V2SI 0 "gr_register_operand" "") (match_operand 1 "" "")] @@ -831,7 +1082,6 @@ ;; padd.uus ;; pavg ;; pavgsub -;; pmpyshr, general form ;; psad ;; pshladd ;; pshradd @@ -1110,7 +1360,7 @@ "fswap %0 = %F1, %F2" [(set_attr "itanium_class" "fmisc")]) -(define_insn "*fmix_l" +(define_insn "vec_interleave_highv2sf" [(set (match_operand:V2SF 0 "fr_register_operand" "=f") (vec_select:V2SF (vec_concat:V4SF @@ -1121,7 +1371,7 @@ "fmix.l %0 = %F2, %F1" [(set_attr "itanium_class" "fmisc")]) -(define_insn "fmix_r" +(define_insn "vec_interleave_lowv2sf" [(set (match_operand:V2SF 0 "fr_register_operand" "=f") (vec_select:V2SF (vec_concat:V4SF @@ -1143,6 +1393,29 @@ "fmix.lr %0 = %F2, %F1" [(set_attr "itanium_class" "fmisc")]) +(define_expand "vec_extract_evenv2sf" + [(match_operand:V2SF 0 "gr_register_operand" "") + (match_operand:V2SF 1 "gr_register_operand" "") + (match_operand:V2SF 2 "gr_register_operand" "")] + "" +{ + emit_insn (gen_vec_interleave_lowv2sf (operands[0], operands[1], + operands[2])); + DONE; +}) + +(define_expand "vec_extract_oddv2sf" + [(match_operand:V2SF 0 "gr_register_operand" "") + (match_operand:V2SF 1 "gr_register_operand" "") + (match_operand:V2SF 2 "gr_register_operand" "")] + "" +{ + emit_insn (gen_vec_interleave_highv2sf (operands[0], operands[1], + operands[2])); + DONE; +}) + + (define_expand "vec_setv2sf" [(match_operand:V2SF 0 "fr_register_operand" "") (match_operand:SF 1 "fr_register_operand" "") @@ -1158,7 +1431,7 @@ emit_insn (gen_fmix_lr (operands[0], tmp, operands[0])); break; case 1: - emit_insn (gen_fmix_r (operands[0], operands[0], tmp)); + emit_insn (gen_vec_interleave_lowv2sf (operands[0], operands[0], tmp)); break; default: gcc_unreachable (); @@ -1227,6 +1500,66 @@ "" "") +(define_expand "vec_unpacku_lo_" + [(match_operand: 0 "register_operand" "") + (match_operand:VECINT12 1 "register_operand" "")] + "" +{ + ia64_expand_unpack (operands, true, false); + DONE; +}) + +(define_expand "vec_unpacku_hi_" + [(match_operand: 0 "register_operand" "") + (match_operand:VECINT12 1 "register_operand" "")] + "" +{ + ia64_expand_unpack (operands, true, true); + DONE; +}) + +(define_expand "vec_unpacks_lo_" + [(match_operand: 0 "register_operand" "") + (match_operand:VECINT12 1 "register_operand" "")] + "" +{ + ia64_expand_unpack (operands, false, false); + DONE; +}) + +(define_expand "vec_unpacks_hi_" + [(match_operand: 0 "register_operand" "") + (match_operand:VECINT12 1 "register_operand" "")] + "" +{ + ia64_expand_unpack (operands, false, true); + DONE; +}) + +(define_expand "vec_pack_trunc_v4hi" + [(match_operand:V8QI 0 "gr_register_operand" "") + (match_operand:V4HI 1 "gr_register_operand" "") + (match_operand:V4HI 2 "gr_register_operand" "")] + "" +{ + rtx op1 = gen_lowpart(V8QImode, operands[1]); + rtx op2 = gen_lowpart(V8QImode, operands[2]); + emit_insn (gen_vec_extract_evenv8qi (operands[0], op1, op2)); + DONE; +}) + +(define_expand "vec_pack_trunc_v2si" + [(match_operand:V4HI 0 "gr_register_operand" "") + (match_operand:V2SI 1 "gr_register_operand" "") + (match_operand:V2SI 2 "gr_register_operand" "")] + "" +{ + rtx op1 = gen_lowpart(V4HImode, operands[1]); + rtx op2 = gen_lowpart(V4HImode, operands[2]); + emit_insn (gen_vec_extract_evenv4hi (operands[0], op1, op2)); + DONE; +}) + ;; Missing operations ;; fprcpa ;; fpsqrta diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 019271ae478..efe68e98ef7 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,10 @@ +2010-11-24 Richard Henderson + + * lib/target-supports.exp (vect_widen_sum_hi_to_si_pattern, + vect_widen_mult_hi_to_si, vect_sdot_qi, vect_udot_qi, vect_sdot_hi, + vect_unpack, vect_int_mult, vect_extract_even_odd, + vect_extract_even_odd_wide, vect_interleave): Enable for ia64. + 2010-11-24 H.J. Lu PR target/46519 diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index c4433128989..b2c3d2a9c5b 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -2518,7 +2518,8 @@ proc check_effective_target_vect_widen_sum_hi_to_si_pattern { } { verbose "check_effective_target_vect_widen_sum_hi_to_si_pattern: using cached result" 2 } else { set et_vect_widen_sum_hi_to_si_pattern_saved 0 - if { [istarget powerpc*-*-*] } { + if { [istarget powerpc*-*-*] + || [istarget ia64-*-*] } { set et_vect_widen_sum_hi_to_si_pattern_saved 1 } } @@ -2644,6 +2645,7 @@ proc check_effective_target_vect_widen_mult_hi_to_si { } { } if { [istarget powerpc*-*-*] || [istarget spu-*-*] + || [istarget ia64-*-*] || [istarget i?86-*-*] || [istarget x86_64-*-*] } { set et_vect_widen_mult_hi_to_si_saved 1 @@ -2665,6 +2667,9 @@ proc check_effective_target_vect_sdot_qi { } { verbose "check_effective_target_vect_sdot_qi: using cached result" 2 } else { set et_vect_sdot_qi_saved 0 + if { [istarget ia64-*-*] } { + set et_vect_udot_qi_saved 1 + } } verbose "check_effective_target_vect_sdot_qi: returning $et_vect_sdot_qi_saved" 2 return $et_vect_sdot_qi_saved @@ -2682,7 +2687,8 @@ proc check_effective_target_vect_udot_qi { } { verbose "check_effective_target_vect_udot_qi: using cached result" 2 } else { set et_vect_udot_qi_saved 0 - if { [istarget powerpc*-*-*] } { + if { [istarget powerpc*-*-*] + || [istarget ia64-*-*] } { set et_vect_udot_qi_saved 1 } } @@ -2703,6 +2709,7 @@ proc check_effective_target_vect_sdot_hi { } { } else { set et_vect_sdot_hi_saved 0 if { ([istarget powerpc*-*-*] && ![istarget powerpc-*-linux*paired*]) + || [istarget ia64-*-*] || [istarget i?86-*-*] || [istarget x86_64-*-*] } { set et_vect_sdot_hi_saved 1 @@ -2774,6 +2781,7 @@ proc check_effective_target_vect_unpack { } { || [istarget i?86-*-*] || [istarget x86_64-*-*] || [istarget spu-*-*] + || [istarget ia64-*-*] || ([istarget arm*-*-*] && [check_effective_target_arm_neon]) } { set et_vect_unpack_saved 1 } @@ -3050,6 +3058,7 @@ proc check_effective_target_vect_int_mult { } { || [istarget spu-*-*] || [istarget i?86-*-*] || [istarget x86_64-*-*] + || [istarget ia64-*-*] || [check_effective_target_arm32] } { set et_vect_int_mult_saved 1 } @@ -3071,6 +3080,7 @@ proc check_effective_target_vect_extract_even_odd { } { if { [istarget powerpc*-*-*] || [istarget i?86-*-*] || [istarget x86_64-*-*] + || [istarget ia64-*-*] || [istarget spu-*-*] } { set et_vect_extract_even_odd_saved 1 } @@ -3093,6 +3103,7 @@ proc check_effective_target_vect_extract_even_odd_wide { } { if { [istarget powerpc*-*-*] || [istarget i?86-*-*] || [istarget x86_64-*-*] + || [istarget ia64-*-*] || [istarget spu-*-*] } { set et_vect_extract_even_odd_wide_saved 1 } @@ -3114,6 +3125,7 @@ proc check_effective_target_vect_interleave { } { if { [istarget powerpc*-*-*] || [istarget i?86-*-*] || [istarget x86_64-*-*] + || [istarget ia64-*-*] || [istarget spu-*-*] } { set et_vect_interleave_saved 1 }