spu.md (vec_unpacku_hi_v8hi, [...]): Implement.
* config/spu/spu.md (vec_unpacku_hi_v8hi, vec_unpacku_lo_v8hi, vec_unpacks_hi_v8hi, vec_unpacks_lo_v8hi, vec_unpacku_hi_v16qi, vec_unpacku_lo_v16qi, vec_unpacks_lo_v16qi): Implement. From-SVN: r132472
This commit is contained in:
parent
a8971bdb01
commit
76a893cfc4
5 changed files with 195 additions and 3 deletions
|
@ -1,3 +1,9 @@
|
|||
2008-02-20 Ira Rosen <irar@il.ibm.com>
|
||||
|
||||
* config/spu/spu.md (vec_unpacku_hi_v8hi, vec_unpacku_lo_v8hi,
|
||||
vec_unpacks_hi_v8hi, vec_unpacks_lo_v8hi, vec_unpacku_hi_v16qi,
|
||||
vec_unpacku_lo_v16qi, vec_unpacks_lo_v16qi): Implement.
|
||||
|
||||
2008-02-19 Jan Hubicka <jh@suse.cz>
|
||||
|
||||
* predict.c (tree_bb_level_predictions): Remove variable next
|
||||
|
|
|
@ -4455,3 +4455,179 @@ selb\t%0,%4,%0,%3"
|
|||
|
||||
DONE;
|
||||
}")
|
||||
|
||||
(define_expand "vec_unpacku_hi_v8hi"
|
||||
[(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
|
||||
(zero_extend:V4SI
|
||||
(vec_select:V4HI
|
||||
(match_operand:V8HI 1 "spu_reg_operand" "r")
|
||||
(parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)]))))]
|
||||
""
|
||||
{
|
||||
rtx mask = gen_reg_rtx (TImode);
|
||||
unsigned char arr[16] = {
|
||||
0x80, 0x80, 0x00, 0x01, 0x80, 0x80, 0x02, 0x03,
|
||||
0x80, 0x80, 0x04, 0x05, 0x80, 0x80, 0x06, 0x07};
|
||||
|
||||
emit_move_insn (mask, array_to_constant (TImode, arr));
|
||||
emit_insn (gen_shufb (operands[0], operands[1], operands[1], mask));
|
||||
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_unpacku_lo_v8hi"
|
||||
[(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
|
||||
(zero_extend:V4SI
|
||||
(vec_select:V4HI
|
||||
(match_operand:V8HI 1 "spu_reg_operand" "r")
|
||||
(parallel [(const_int 4)(const_int 5)(const_int 6)(const_int 7)]))))]
|
||||
""
|
||||
{
|
||||
rtx mask = gen_reg_rtx (TImode);
|
||||
unsigned char arr[16] = {
|
||||
0x80, 0x80, 0x08, 0x09, 0x80, 0x80, 0x0A, 0x0B,
|
||||
0x80, 0x80, 0x0C, 0x0D, 0x80, 0x80, 0x0E, 0x0F};
|
||||
|
||||
emit_move_insn (mask, array_to_constant (TImode, arr));
|
||||
emit_insn (gen_shufb (operands[0], operands[1], operands[1], mask));
|
||||
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_unpacks_hi_v8hi"
|
||||
[(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
|
||||
(sign_extend:V4SI
|
||||
(vec_select:V4HI
|
||||
(match_operand:V8HI 1 "spu_reg_operand" "r")
|
||||
(parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)]))))]
|
||||
""
|
||||
{
|
||||
rtx tmp1 = gen_reg_rtx (V8HImode);
|
||||
rtx tmp2 = gen_reg_rtx (V4SImode);
|
||||
rtx mask = gen_reg_rtx (TImode);
|
||||
unsigned char arr[16] = {
|
||||
0x80, 0x80, 0x00, 0x01, 0x80, 0x80, 0x02, 0x03,
|
||||
0x80, 0x80, 0x04, 0x05, 0x80, 0x80, 0x06, 0x07};
|
||||
|
||||
emit_move_insn (mask, array_to_constant (TImode, arr));
|
||||
emit_insn (gen_shufb (tmp1, operands[1], operands[1], mask));
|
||||
emit_insn (gen_spu_xshw (tmp2, tmp1));
|
||||
emit_move_insn (operands[0], tmp2);
|
||||
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_unpacks_lo_v8hi"
|
||||
[(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
|
||||
(sign_extend:V4SI
|
||||
(vec_select:V4HI
|
||||
(match_operand:V8HI 1 "spu_reg_operand" "r")
|
||||
(parallel [(const_int 4)(const_int 5)(const_int 6)(const_int 7)]))))]
|
||||
""
|
||||
{
|
||||
rtx tmp1 = gen_reg_rtx (V8HImode);
|
||||
rtx tmp2 = gen_reg_rtx (V4SImode);
|
||||
rtx mask = gen_reg_rtx (TImode);
|
||||
unsigned char arr[16] = {
|
||||
0x80, 0x80, 0x08, 0x09, 0x80, 0x80, 0x0A, 0x0B,
|
||||
0x80, 0x80, 0x0C, 0x0D, 0x80, 0x80, 0x0E, 0x0F};
|
||||
|
||||
emit_move_insn (mask, array_to_constant (TImode, arr));
|
||||
emit_insn (gen_shufb (tmp1, operands[1], operands[1], mask));
|
||||
emit_insn (gen_spu_xshw (tmp2, tmp1));
|
||||
emit_move_insn (operands[0], tmp2);
|
||||
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_unpacku_hi_v16qi"
|
||||
[(set (match_operand:V8HI 0 "spu_reg_operand" "=r")
|
||||
(zero_extend:V8HI
|
||||
(vec_select:V8QI
|
||||
(match_operand:V16QI 1 "spu_reg_operand" "r")
|
||||
(parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)
|
||||
(const_int 4)(const_int 5)(const_int 6)(const_int 7)]))))]
|
||||
""
|
||||
{
|
||||
rtx mask = gen_reg_rtx (TImode);
|
||||
unsigned char arr[16] = {
|
||||
0x80, 0x00, 0x80, 0x01, 0x80, 0x02, 0x80, 0x03,
|
||||
0x80, 0x04, 0x80, 0x05, 0x80, 0x06, 0x80, 0x07};
|
||||
|
||||
emit_move_insn (mask, array_to_constant (TImode, arr));
|
||||
emit_insn (gen_shufb (operands[0], operands[1], operands[1], mask));
|
||||
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_unpacku_lo_v16qi"
|
||||
[(set (match_operand:V8HI 0 "spu_reg_operand" "=r")
|
||||
(zero_extend:V8HI
|
||||
(vec_select:V8QI
|
||||
(match_operand:V16QI 1 "spu_reg_operand" "r")
|
||||
(parallel [(const_int 8)(const_int 9)(const_int 10)(const_int 11)
|
||||
(const_int 12)(const_int 13)(const_int 14)(const_int 15)]))))]
|
||||
""
|
||||
{
|
||||
rtx mask = gen_reg_rtx (TImode);
|
||||
unsigned char arr[16] = {
|
||||
0x80, 0x08, 0x80, 0x09, 0x80, 0x0A, 0x80, 0x0B,
|
||||
0x80, 0x0C, 0x80, 0x0D, 0x80, 0x0E, 0x80, 0x0F};
|
||||
|
||||
emit_move_insn (mask, array_to_constant (TImode, arr));
|
||||
emit_insn (gen_shufb (operands[0], operands[1], operands[1], mask));
|
||||
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_unpacks_hi_v16qi"
|
||||
[(set (match_operand:V8HI 0 "spu_reg_operand" "=r")
|
||||
(sign_extend:V8HI
|
||||
(vec_select:V8QI
|
||||
(match_operand:V16QI 1 "spu_reg_operand" "r")
|
||||
(parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)
|
||||
(const_int 4)(const_int 5)(const_int 6)(const_int 7)]))))]
|
||||
""
|
||||
{
|
||||
rtx tmp1 = gen_reg_rtx (V16QImode);
|
||||
rtx tmp2 = gen_reg_rtx (V8HImode);
|
||||
rtx mask = gen_reg_rtx (TImode);
|
||||
unsigned char arr[16] = {
|
||||
0x80, 0x00, 0x80, 0x01, 0x80, 0x02, 0x80, 0x03,
|
||||
0x80, 0x04, 0x80, 0x05, 0x80, 0x06, 0x80, 0x07};
|
||||
|
||||
emit_move_insn (mask, array_to_constant (TImode, arr));
|
||||
emit_insn (gen_shufb (tmp1, operands[1], operands[1], mask));
|
||||
emit_insn (gen_spu_xsbh (tmp2, tmp1));
|
||||
emit_move_insn (operands[0], tmp2);
|
||||
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_unpacks_lo_v16qi"
|
||||
[(set (match_operand:V8HI 0 "spu_reg_operand" "=r")
|
||||
(sign_extend:V8HI
|
||||
(vec_select:V8QI
|
||||
(match_operand:V16QI 1 "spu_reg_operand" "r")
|
||||
(parallel [(const_int 8)(const_int 9)(const_int 10)(const_int 11)
|
||||
(const_int 12)(const_int 13)(const_int 14)(const_int 15)]))))]
|
||||
""
|
||||
{
|
||||
rtx tmp1 = gen_reg_rtx (V16QImode);
|
||||
rtx tmp2 = gen_reg_rtx (V8HImode);
|
||||
rtx mask = gen_reg_rtx (TImode);
|
||||
unsigned char arr[16] = {
|
||||
0x80, 0x08, 0x80, 0x09, 0x80, 0x0A, 0x80, 0x0B,
|
||||
0x80, 0x0C, 0x80, 0x0D, 0x80, 0x0E, 0x80, 0x0F};
|
||||
|
||||
emit_move_insn (mask, array_to_constant (TImode, arr));
|
||||
emit_insn (gen_shufb (tmp1, operands[1], operands[1], mask));
|
||||
emit_insn (gen_spu_xsbh (tmp2, tmp1));
|
||||
emit_move_insn (operands[0], tmp2);
|
||||
|
||||
DONE;
|
||||
})
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -1,3 +1,11 @@
|
|||
2008-02-20 Ira Rosen <irar@il.ibm.com>
|
||||
|
||||
* lib/target-supports.exp (check_effective_target_vect_unpack):
|
||||
Return true for SPU.
|
||||
(check_effective_target_vect_short_mult): Likewise.
|
||||
* gcc.dg/vect/vect-reduc-dot-s16b.c: Expect vectorization of
|
||||
the loop on targets that support vect_unpack.
|
||||
|
||||
2008-02-20 Uros Bizjak <ubizjak@gmail.com>
|
||||
|
||||
* g++.dg/compat/struct-layout-1_generate.c (DG_OPTIONS): New define.
|
||||
|
|
|
@ -48,9 +48,9 @@ main (void)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_short_mult && vect_widen_sum_hi_to_si } } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_short_mult && { vect_widen_sum_hi_to_si || vect_unpack } } } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" { target { ! vect_short_mult } } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" { target { ! vect_widen_sum_hi_to_si } } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" { target { { ! vect_widen_sum_hi_to_si } && { ! vect_unpack } } } } } */
|
||||
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
||||
|
|
|
@ -1667,7 +1667,8 @@ proc check_effective_target_vect_unpack { } {
|
|||
set et_vect_unpack_saved 0
|
||||
if { ([istarget powerpc*-*-*] && ![istarget powerpc-*paired*])
|
||||
|| [istarget i?86-*-*]
|
||||
|| [istarget x86_64-*-*] } {
|
||||
|| [istarget x86_64-*-*]
|
||||
|| [istarget spu-*-*] } {
|
||||
set et_vect_unpack_saved 1
|
||||
}
|
||||
}
|
||||
|
@ -1876,6 +1877,7 @@ proc check_effective_target_vect_short_mult { } {
|
|||
} else {
|
||||
set et_vect_short_mult_saved 0
|
||||
if { [istarget ia64-*-*]
|
||||
|| [istarget spu-*-*]
|
||||
|| [istarget i?86-*-*]
|
||||
|| [istarget x86_64-*-*] } {
|
||||
set et_vect_short_mult_saved 1
|
||||
|
|
Loading…
Add table
Reference in a new issue