neon.md (V_ext): New mode attribute.
gcc/ * config/arm/neon.md (V_ext): New mode attribute. (neon_vget_lane<mode>): Replace with define_expand. (neon_vget_lane<mode>_sext_internal) (neon_vget_lane<mode>_zext_internal): New define_insns for double and quad precision vectors. (neon_vget_lanedi): Add bounds check. Remove dead comment. * config/arm/neon.ml (get_lane): Make 32-bit get-lane intrinsics have typeless 32-bit result. gcc/testsuite/ * gcc.target/arm/neon/*.c: Regenerate. From-SVN: r127061
This commit is contained in:
parent
7f7639220a
commit
89ffa8fc47
10 changed files with 123 additions and 47 deletions
|
@ -1,3 +1,14 @@
|
|||
2007-07-30 Julian Brown <julian@codesourcery.com>
|
||||
|
||||
* config/arm/neon.md (V_ext): New mode attribute.
|
||||
(neon_vget_lane<mode>): Replace with define_expand.
|
||||
(neon_vget_lane<mode>_sext_internal)
|
||||
(neon_vget_lane<mode>_zext_internal): New define_insns for double
|
||||
and quad precision vectors.
|
||||
(neon_vget_lanedi): Add bounds check. Remove dead comment.
|
||||
* config/arm/neon.ml (get_lane): Make 32-bit get-lane intrinsics
|
||||
have typeless 32-bit result.
|
||||
|
||||
2007-07-30 Andrew Pinski <andrew_pinski@playstation.sony.com>
|
||||
|
||||
PR tree-opt/32527
|
||||
|
|
|
@ -266,6 +266,14 @@
|
|||
(V2SF "SF") (V4SF "SF")
|
||||
(DI "DI") (V2DI "DI")])
|
||||
|
||||
;; Element modes for vector extraction, padded up to register size.
|
||||
|
||||
(define_mode_attr V_ext [(V8QI "SI") (V16QI "SI")
|
||||
(V4HI "SI") (V8HI "SI")
|
||||
(V2SI "SI") (V4SI "SI")
|
||||
(V2SF "SF") (V4SF "SF")
|
||||
(DI "DI") (V2DI "DI")])
|
||||
|
||||
;; Mode of pair of elements for each vector mode, to define transfer
|
||||
;; size for structure lane/dup loads and stores.
|
||||
(define_mode_attr V_two_elem [(V8QI "HI") (V16QI "HI")
|
||||
|
@ -2385,27 +2393,107 @@
|
|||
DONE;
|
||||
})
|
||||
|
||||
;; FIXME: 32-bit element sizes are a bit funky (should be output as .32 not
|
||||
;; .u32), but the assembler should cope with that.
|
||||
(define_insn "neon_vget_lane<mode>_sext_internal"
|
||||
[(set (match_operand:SI 0 "s_register_operand" "=r")
|
||||
(sign_extend:SI
|
||||
(vec_select:<V_elem>
|
||||
(match_operand:VD 1 "s_register_operand" "w")
|
||||
(parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
|
||||
"TARGET_NEON"
|
||||
"vmov%?.s<V_sz_elem>\t%0, %P1[%c2]"
|
||||
[(set_attr "predicable" "yes")
|
||||
(set_attr "neon_type" "neon_bp_simple")]
|
||||
)
|
||||
|
||||
(define_insn "neon_vget_lane<mode>"
|
||||
[(set (match_operand:<V_elem> 0 "s_register_operand" "=r")
|
||||
(unspec:<V_elem> [(match_operand:VD 1 "s_register_operand" "w")
|
||||
(match_operand:SI 2 "immediate_operand" "i")
|
||||
(match_operand:SI 3 "immediate_operand" "i")]
|
||||
UNSPEC_VGET_LANE))]
|
||||
(define_insn "neon_vget_lane<mode>_zext_internal"
|
||||
[(set (match_operand:SI 0 "s_register_operand" "=r")
|
||||
(zero_extend:SI
|
||||
(vec_select:<V_elem>
|
||||
(match_operand:VD 1 "s_register_operand" "w")
|
||||
(parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
|
||||
"TARGET_NEON"
|
||||
"vmov%?.u<V_sz_elem>\t%0, %P1[%c2]"
|
||||
[(set_attr "predicable" "yes")
|
||||
(set_attr "neon_type" "neon_bp_simple")]
|
||||
)
|
||||
|
||||
(define_insn "neon_vget_lane<mode>_sext_internal"
|
||||
[(set (match_operand:SI 0 "s_register_operand" "=r")
|
||||
(sign_extend:SI
|
||||
(vec_select:<V_elem>
|
||||
(match_operand:VQ 1 "s_register_operand" "w")
|
||||
(parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
|
||||
"TARGET_NEON"
|
||||
{
|
||||
neon_lane_bounds (operands[2], 0, GET_MODE_NUNITS (<MODE>mode));
|
||||
return "vmov%?.%t3%#<V_sz_elem>\t%0, %P1[%c2]";
|
||||
rtx ops[3];
|
||||
int regno = REGNO (operands[1]);
|
||||
unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
|
||||
unsigned int elt = INTVAL (operands[2]);
|
||||
|
||||
ops[0] = operands[0];
|
||||
ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
|
||||
ops[2] = GEN_INT (elt % halfelts);
|
||||
output_asm_insn ("vmov%?.s<V_sz_elem>\t%0, %P1[%c2]", ops);
|
||||
|
||||
return "";
|
||||
}
|
||||
[(set_attr "predicable" "yes")
|
||||
(set_attr "neon_type" "neon_bp_simple")]
|
||||
)
|
||||
|
||||
; Operand 2 (lane number) is ignored because we can only extract the zeroth lane
|
||||
; with this insn. Operand 3 (info word) is ignored because it does nothing
|
||||
; useful with 64-bit elements.
|
||||
(define_insn "neon_vget_lane<mode>_zext_internal"
|
||||
[(set (match_operand:SI 0 "s_register_operand" "=r")
|
||||
(zero_extend:SI
|
||||
(vec_select:<V_elem>
|
||||
(match_operand:VQ 1 "s_register_operand" "w")
|
||||
(parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
|
||||
"TARGET_NEON"
|
||||
{
|
||||
rtx ops[3];
|
||||
int regno = REGNO (operands[1]);
|
||||
unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
|
||||
unsigned int elt = INTVAL (operands[2]);
|
||||
|
||||
ops[0] = operands[0];
|
||||
ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
|
||||
ops[2] = GEN_INT (elt % halfelts);
|
||||
output_asm_insn ("vmov%?.u<V_sz_elem>\t%0, %P1[%c2]", ops);
|
||||
|
||||
return "";
|
||||
}
|
||||
[(set_attr "predicable" "yes")
|
||||
(set_attr "neon_type" "neon_bp_simple")]
|
||||
)
|
||||
|
||||
(define_expand "neon_vget_lane<mode>"
|
||||
[(match_operand:<V_ext> 0 "s_register_operand" "")
|
||||
(match_operand:VDQW 1 "s_register_operand" "")
|
||||
(match_operand:SI 2 "immediate_operand" "")
|
||||
(match_operand:SI 3 "immediate_operand" "")]
|
||||
"TARGET_NEON"
|
||||
{
|
||||
HOST_WIDE_INT magic = INTVAL (operands[3]);
|
||||
rtx insn;
|
||||
|
||||
neon_lane_bounds (operands[2], 0, GET_MODE_NUNITS (<MODE>mode));
|
||||
|
||||
if ((magic & 3) == 3 || GET_MODE_BITSIZE (GET_MODE_INNER (<MODE>mode)) == 32)
|
||||
insn = gen_vec_extract<mode> (operands[0], operands[1], operands[2]);
|
||||
else
|
||||
{
|
||||
if ((magic & 1) != 0)
|
||||
insn = gen_neon_vget_lane<mode>_sext_internal (operands[0], operands[1],
|
||||
operands[2]);
|
||||
else
|
||||
insn = gen_neon_vget_lane<mode>_zext_internal (operands[0], operands[1],
|
||||
operands[2]);
|
||||
}
|
||||
emit_insn (insn);
|
||||
DONE;
|
||||
})
|
||||
|
||||
; Operand 3 (info word) is ignored because it does nothing useful with 64-bit
|
||||
; elements.
|
||||
|
||||
(define_insn "neon_vget_lanedi"
|
||||
[(set (match_operand:DI 0 "s_register_operand" "=r")
|
||||
|
@ -2422,33 +2510,6 @@
|
|||
(set_attr "neon_type" "neon_bp_simple")]
|
||||
)
|
||||
|
||||
(define_insn "neon_vget_lane<mode>"
|
||||
[(set (match_operand:<V_elem> 0 "s_register_operand" "=r")
|
||||
(unspec:<V_elem> [(match_operand:VQ 1 "s_register_operand" "w")
|
||||
(match_operand:SI 2 "immediate_operand" "i")
|
||||
(match_operand:SI 3 "immediate_operand" "i")]
|
||||
UNSPEC_VGET_LANE))]
|
||||
"TARGET_NEON"
|
||||
{
|
||||
rtx ops[4];
|
||||
int regno = REGNO (operands[1]);
|
||||
unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
|
||||
unsigned int elt = INTVAL (operands[2]);
|
||||
|
||||
neon_lane_bounds (operands[2], 0, halfelts * 2);
|
||||
|
||||
ops[0] = operands[0];
|
||||
ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
|
||||
ops[2] = GEN_INT (elt % halfelts);
|
||||
ops[3] = operands[3];
|
||||
output_asm_insn ("vmov%?.%t3%#<V_sz_elem>\t%0, %P1[%c2]", ops);
|
||||
|
||||
return "";
|
||||
}
|
||||
[(set_attr "predicable" "yes")
|
||||
(set_attr "neon_type" "neon_bp_simple")]
|
||||
)
|
||||
|
||||
(define_insn "neon_vget_lanev2di"
|
||||
[(set (match_operand:DI 0 "s_register_operand" "=r")
|
||||
(unspec:DI [(match_operand:V2DI 1 "s_register_operand" "w")
|
||||
|
|
|
@ -611,7 +611,7 @@ let shift_insert shape elt =
|
|||
let get_lane shape elt =
|
||||
let vtype = type_for_elt shape elt in
|
||||
Arity2 (vtype 0, vtype 1, vtype 2),
|
||||
(match elt with P8 -> U8 | P16 -> U16 | x -> x)
|
||||
(match elt with P8 -> U8 | P16 -> U16 | S32 | U32 | F32 -> B32 | x -> x)
|
||||
|
||||
let set_lane shape elt =
|
||||
let vtype = type_for_elt shape elt in
|
||||
|
|
|
@ -1,3 +1,7 @@
|
|||
2007-07-30 Julian Brown <julian@codesourcery.com>
|
||||
|
||||
* gcc.target/arm/neon/v*.c: Regenerate.
|
||||
|
||||
2007-07-30 Paolo Carlini <pcarlini@suse.de>
|
||||
|
||||
PR c++/32108
|
||||
|
|
|
@ -15,5 +15,5 @@ void test_vgetQ_lanef32 (void)
|
|||
out_float32_t = vgetq_lane_f32 (arg0_float32x4_t, 1);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "vmov\.f32\[ \]+\[rR\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { scan-assembler "vmov\.32\[ \]+\[rR\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { cleanup-saved-temps } } */
|
||||
|
|
|
@ -15,5 +15,5 @@ void test_vgetQ_lanes32 (void)
|
|||
out_int32_t = vgetq_lane_s32 (arg0_int32x4_t, 1);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "vmov\.s32\[ \]+\[rR\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { scan-assembler "vmov\.32\[ \]+\[rR\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { cleanup-saved-temps } } */
|
||||
|
|
|
@ -15,5 +15,5 @@ void test_vgetQ_laneu32 (void)
|
|||
out_uint32_t = vgetq_lane_u32 (arg0_uint32x4_t, 1);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "vmov\.u32\[ \]+\[rR\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { scan-assembler "vmov\.32\[ \]+\[rR\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { cleanup-saved-temps } } */
|
||||
|
|
|
@ -15,5 +15,5 @@ void test_vget_lanef32 (void)
|
|||
out_float32_t = vget_lane_f32 (arg0_float32x2_t, 1);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "vmov\.f32\[ \]+\[rR\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { scan-assembler "vmov\.32\[ \]+\[rR\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { cleanup-saved-temps } } */
|
||||
|
|
|
@ -15,5 +15,5 @@ void test_vget_lanes32 (void)
|
|||
out_int32_t = vget_lane_s32 (arg0_int32x2_t, 1);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "vmov\.s32\[ \]+\[rR\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { scan-assembler "vmov\.32\[ \]+\[rR\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { cleanup-saved-temps } } */
|
||||
|
|
|
@ -15,5 +15,5 @@ void test_vget_laneu32 (void)
|
|||
out_uint32_t = vget_lane_u32 (arg0_uint32x2_t, 1);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "vmov\.u32\[ \]+\[rR\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { scan-assembler "vmov\.32\[ \]+\[rR\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
|
||||
/* { dg-final { cleanup-saved-temps } } */
|
||||
|
|
Loading…
Add table
Reference in a new issue