diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 73cc1e46367..e8aff1eeef4 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,14 @@ +2007-07-30 Julian Brown + + * config/arm/neon.md (V_ext): New mode attribute. + (neon_vget_lane): Replace with define_expand. + (neon_vget_lane_sext_internal) + (neon_vget_lane_zext_internal): New define_insns for double + and quad precision vectors. + (neon_vget_lanedi): Add bounds check. Remove dead comment. + * config/arm/neon.ml (get_lane): Make 32-bit get-lane intrinsics + have typeless 32-bit result. + 2007-07-30 Andrew Pinski PR tree-opt/32527 diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index 06b9b3c762e..1b09ead3af4 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -266,6 +266,14 @@ (V2SF "SF") (V4SF "SF") (DI "DI") (V2DI "DI")]) +;; Element modes for vector extraction, padded up to register size. + +(define_mode_attr V_ext [(V8QI "SI") (V16QI "SI") + (V4HI "SI") (V8HI "SI") + (V2SI "SI") (V4SI "SI") + (V2SF "SF") (V4SF "SF") + (DI "DI") (V2DI "DI")]) + ;; Mode of pair of elements for each vector mode, to define transfer ;; size for structure lane/dup loads and stores. (define_mode_attr V_two_elem [(V8QI "HI") (V16QI "HI") @@ -2385,27 +2393,107 @@ DONE; }) -;; FIXME: 32-bit element sizes are a bit funky (should be output as .32 not -;; .u32), but the assembler should cope with that. +(define_insn "neon_vget_lane_sext_internal" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (sign_extend:SI + (vec_select: + (match_operand:VD 1 "s_register_operand" "w") + (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] + "TARGET_NEON" + "vmov%?.s\t%0, %P1[%c2]" + [(set_attr "predicable" "yes") + (set_attr "neon_type" "neon_bp_simple")] +) -(define_insn "neon_vget_lane" - [(set (match_operand: 0 "s_register_operand" "=r") - (unspec: [(match_operand:VD 1 "s_register_operand" "w") - (match_operand:SI 2 "immediate_operand" "i") - (match_operand:SI 3 "immediate_operand" "i")] - UNSPEC_VGET_LANE))] +(define_insn "neon_vget_lane_zext_internal" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (zero_extend:SI + (vec_select: + (match_operand:VD 1 "s_register_operand" "w") + (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] + "TARGET_NEON" + "vmov%?.u\t%0, %P1[%c2]" + [(set_attr "predicable" "yes") + (set_attr "neon_type" "neon_bp_simple")] +) + +(define_insn "neon_vget_lane_sext_internal" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (sign_extend:SI + (vec_select: + (match_operand:VQ 1 "s_register_operand" "w") + (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] "TARGET_NEON" { - neon_lane_bounds (operands[2], 0, GET_MODE_NUNITS (mode)); - return "vmov%?.%t3%#\t%0, %P1[%c2]"; + rtx ops[3]; + int regno = REGNO (operands[1]); + unsigned int halfelts = GET_MODE_NUNITS (mode) / 2; + unsigned int elt = INTVAL (operands[2]); + + ops[0] = operands[0]; + ops[1] = gen_rtx_REG (mode, regno + 2 * (elt / halfelts)); + ops[2] = GEN_INT (elt % halfelts); + output_asm_insn ("vmov%?.s\t%0, %P1[%c2]", ops); + + return ""; } [(set_attr "predicable" "yes") (set_attr "neon_type" "neon_bp_simple")] ) -; Operand 2 (lane number) is ignored because we can only extract the zeroth lane -; with this insn. Operand 3 (info word) is ignored because it does nothing -; useful with 64-bit elements. +(define_insn "neon_vget_lane_zext_internal" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (zero_extend:SI + (vec_select: + (match_operand:VQ 1 "s_register_operand" "w") + (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] + "TARGET_NEON" +{ + rtx ops[3]; + int regno = REGNO (operands[1]); + unsigned int halfelts = GET_MODE_NUNITS (mode) / 2; + unsigned int elt = INTVAL (operands[2]); + + ops[0] = operands[0]; + ops[1] = gen_rtx_REG (mode, regno + 2 * (elt / halfelts)); + ops[2] = GEN_INT (elt % halfelts); + output_asm_insn ("vmov%?.u\t%0, %P1[%c2]", ops); + + return ""; +} + [(set_attr "predicable" "yes") + (set_attr "neon_type" "neon_bp_simple")] +) + +(define_expand "neon_vget_lane" + [(match_operand: 0 "s_register_operand" "") + (match_operand:VDQW 1 "s_register_operand" "") + (match_operand:SI 2 "immediate_operand" "") + (match_operand:SI 3 "immediate_operand" "")] + "TARGET_NEON" +{ + HOST_WIDE_INT magic = INTVAL (operands[3]); + rtx insn; + + neon_lane_bounds (operands[2], 0, GET_MODE_NUNITS (mode)); + + if ((magic & 3) == 3 || GET_MODE_BITSIZE (GET_MODE_INNER (mode)) == 32) + insn = gen_vec_extract (operands[0], operands[1], operands[2]); + else + { + if ((magic & 1) != 0) + insn = gen_neon_vget_lane_sext_internal (operands[0], operands[1], + operands[2]); + else + insn = gen_neon_vget_lane_zext_internal (operands[0], operands[1], + operands[2]); + } + emit_insn (insn); + DONE; +}) + +; Operand 3 (info word) is ignored because it does nothing useful with 64-bit +; elements. (define_insn "neon_vget_lanedi" [(set (match_operand:DI 0 "s_register_operand" "=r") @@ -2422,33 +2510,6 @@ (set_attr "neon_type" "neon_bp_simple")] ) -(define_insn "neon_vget_lane" - [(set (match_operand: 0 "s_register_operand" "=r") - (unspec: [(match_operand:VQ 1 "s_register_operand" "w") - (match_operand:SI 2 "immediate_operand" "i") - (match_operand:SI 3 "immediate_operand" "i")] - UNSPEC_VGET_LANE))] - "TARGET_NEON" -{ - rtx ops[4]; - int regno = REGNO (operands[1]); - unsigned int halfelts = GET_MODE_NUNITS (mode) / 2; - unsigned int elt = INTVAL (operands[2]); - - neon_lane_bounds (operands[2], 0, halfelts * 2); - - ops[0] = operands[0]; - ops[1] = gen_rtx_REG (mode, regno + 2 * (elt / halfelts)); - ops[2] = GEN_INT (elt % halfelts); - ops[3] = operands[3]; - output_asm_insn ("vmov%?.%t3%#\t%0, %P1[%c2]", ops); - - return ""; -} - [(set_attr "predicable" "yes") - (set_attr "neon_type" "neon_bp_simple")] -) - (define_insn "neon_vget_lanev2di" [(set (match_operand:DI 0 "s_register_operand" "=r") (unspec:DI [(match_operand:V2DI 1 "s_register_operand" "w") diff --git a/gcc/config/arm/neon.ml b/gcc/config/arm/neon.ml index 39807a48d37..a68c64539da 100644 --- a/gcc/config/arm/neon.ml +++ b/gcc/config/arm/neon.ml @@ -611,7 +611,7 @@ let shift_insert shape elt = let get_lane shape elt = let vtype = type_for_elt shape elt in Arity2 (vtype 0, vtype 1, vtype 2), - (match elt with P8 -> U8 | P16 -> U16 | x -> x) + (match elt with P8 -> U8 | P16 -> U16 | S32 | U32 | F32 -> B32 | x -> x) let set_lane shape elt = let vtype = type_for_elt shape elt in diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index f60df69a862..b7681c36194 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2007-07-30 Julian Brown + + * gcc.target/arm/neon/v*.c: Regenerate. + 2007-07-30 Paolo Carlini PR c++/32108 diff --git a/gcc/testsuite/gcc.target/arm/neon/vgetQ_lanef32.c b/gcc/testsuite/gcc.target/arm/neon/vgetQ_lanef32.c index aa4dad6ecb8..4d0561b1ed0 100644 --- a/gcc/testsuite/gcc.target/arm/neon/vgetQ_lanef32.c +++ b/gcc/testsuite/gcc.target/arm/neon/vgetQ_lanef32.c @@ -15,5 +15,5 @@ void test_vgetQ_lanef32 (void) out_float32_t = vgetq_lane_f32 (arg0_float32x4_t, 1); } -/* { dg-final { scan-assembler "vmov\.f32\[ \]+\[rR\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { scan-assembler "vmov\.32\[ \]+\[rR\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/neon/vgetQ_lanes32.c b/gcc/testsuite/gcc.target/arm/neon/vgetQ_lanes32.c index 551fd28dd37..0f87fdb3b16 100644 --- a/gcc/testsuite/gcc.target/arm/neon/vgetQ_lanes32.c +++ b/gcc/testsuite/gcc.target/arm/neon/vgetQ_lanes32.c @@ -15,5 +15,5 @@ void test_vgetQ_lanes32 (void) out_int32_t = vgetq_lane_s32 (arg0_int32x4_t, 1); } -/* { dg-final { scan-assembler "vmov\.s32\[ \]+\[rR\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { scan-assembler "vmov\.32\[ \]+\[rR\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/neon/vgetQ_laneu32.c b/gcc/testsuite/gcc.target/arm/neon/vgetQ_laneu32.c index e9191726620..5a9344a808a 100644 --- a/gcc/testsuite/gcc.target/arm/neon/vgetQ_laneu32.c +++ b/gcc/testsuite/gcc.target/arm/neon/vgetQ_laneu32.c @@ -15,5 +15,5 @@ void test_vgetQ_laneu32 (void) out_uint32_t = vgetq_lane_u32 (arg0_uint32x4_t, 1); } -/* { dg-final { scan-assembler "vmov\.u32\[ \]+\[rR\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { scan-assembler "vmov\.32\[ \]+\[rR\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/neon/vget_lanef32.c b/gcc/testsuite/gcc.target/arm/neon/vget_lanef32.c index 3f0a02798a4..e469c6ec40b 100644 --- a/gcc/testsuite/gcc.target/arm/neon/vget_lanef32.c +++ b/gcc/testsuite/gcc.target/arm/neon/vget_lanef32.c @@ -15,5 +15,5 @@ void test_vget_lanef32 (void) out_float32_t = vget_lane_f32 (arg0_float32x2_t, 1); } -/* { dg-final { scan-assembler "vmov\.f32\[ \]+\[rR\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { scan-assembler "vmov\.32\[ \]+\[rR\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/neon/vget_lanes32.c b/gcc/testsuite/gcc.target/arm/neon/vget_lanes32.c index 441b623e834..50b8f40cb5a 100644 --- a/gcc/testsuite/gcc.target/arm/neon/vget_lanes32.c +++ b/gcc/testsuite/gcc.target/arm/neon/vget_lanes32.c @@ -15,5 +15,5 @@ void test_vget_lanes32 (void) out_int32_t = vget_lane_s32 (arg0_int32x2_t, 1); } -/* { dg-final { scan-assembler "vmov\.s32\[ \]+\[rR\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { scan-assembler "vmov\.32\[ \]+\[rR\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/neon/vget_laneu32.c b/gcc/testsuite/gcc.target/arm/neon/vget_laneu32.c index 13d33801808..fd09ad4d0d2 100644 --- a/gcc/testsuite/gcc.target/arm/neon/vget_laneu32.c +++ b/gcc/testsuite/gcc.target/arm/neon/vget_laneu32.c @@ -15,5 +15,5 @@ void test_vget_laneu32 (void) out_uint32_t = vget_lane_u32 (arg0_uint32x2_t, 1); } -/* { dg-final { scan-assembler "vmov\.u32\[ \]+\[rR\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { scan-assembler "vmov\.32\[ \]+\[rR\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */