diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 0bee6c9e681..5a188dcd328 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,9 @@ +2008-02-20 Ira Rosen + + * config/spu/spu.md (vec_unpacku_hi_v8hi, vec_unpacku_lo_v8hi, + vec_unpacks_hi_v8hi, vec_unpacks_lo_v8hi, vec_unpacku_hi_v16qi, + vec_unpacku_lo_v16qi, vec_unpacks_lo_v16qi): Implement. + 2008-02-19 Jan Hubicka * predict.c (tree_bb_level_predictions): Remove variable next diff --git a/gcc/config/spu/spu.md b/gcc/config/spu/spu.md index f1bfdaff607..14854606b8d 100644 --- a/gcc/config/spu/spu.md +++ b/gcc/config/spu/spu.md @@ -4455,3 +4455,179 @@ selb\t%0,%4,%0,%3" DONE; }") + +(define_expand "vec_unpacku_hi_v8hi" + [(set (match_operand:V4SI 0 "spu_reg_operand" "=r") + (zero_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 1 "spu_reg_operand" "r") + (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)]))))] + "" +{ + rtx mask = gen_reg_rtx (TImode); + unsigned char arr[16] = { + 0x80, 0x80, 0x00, 0x01, 0x80, 0x80, 0x02, 0x03, + 0x80, 0x80, 0x04, 0x05, 0x80, 0x80, 0x06, 0x07}; + + emit_move_insn (mask, array_to_constant (TImode, arr)); + emit_insn (gen_shufb (operands[0], operands[1], operands[1], mask)); + + DONE; +}) + +(define_expand "vec_unpacku_lo_v8hi" + [(set (match_operand:V4SI 0 "spu_reg_operand" "=r") + (zero_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 1 "spu_reg_operand" "r") + (parallel [(const_int 4)(const_int 5)(const_int 6)(const_int 7)]))))] +"" +{ + rtx mask = gen_reg_rtx (TImode); + unsigned char arr[16] = { + 0x80, 0x80, 0x08, 0x09, 0x80, 0x80, 0x0A, 0x0B, + 0x80, 0x80, 0x0C, 0x0D, 0x80, 0x80, 0x0E, 0x0F}; + + emit_move_insn (mask, array_to_constant (TImode, arr)); + emit_insn (gen_shufb (operands[0], operands[1], operands[1], mask)); + + DONE; +}) + +(define_expand "vec_unpacks_hi_v8hi" + [(set (match_operand:V4SI 0 "spu_reg_operand" "=r") + (sign_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 1 "spu_reg_operand" "r") + (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)]))))] + "" +{ + rtx tmp1 = gen_reg_rtx (V8HImode); + rtx tmp2 = gen_reg_rtx (V4SImode); + rtx mask = gen_reg_rtx (TImode); + unsigned char arr[16] = { + 0x80, 0x80, 0x00, 0x01, 0x80, 0x80, 0x02, 0x03, + 0x80, 0x80, 0x04, 0x05, 0x80, 0x80, 0x06, 0x07}; + + emit_move_insn (mask, array_to_constant (TImode, arr)); + emit_insn (gen_shufb (tmp1, operands[1], operands[1], mask)); + emit_insn (gen_spu_xshw (tmp2, tmp1)); + emit_move_insn (operands[0], tmp2); + + DONE; +}) + +(define_expand "vec_unpacks_lo_v8hi" + [(set (match_operand:V4SI 0 "spu_reg_operand" "=r") + (sign_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 1 "spu_reg_operand" "r") + (parallel [(const_int 4)(const_int 5)(const_int 6)(const_int 7)]))))] +"" +{ + rtx tmp1 = gen_reg_rtx (V8HImode); + rtx tmp2 = gen_reg_rtx (V4SImode); + rtx mask = gen_reg_rtx (TImode); + unsigned char arr[16] = { + 0x80, 0x80, 0x08, 0x09, 0x80, 0x80, 0x0A, 0x0B, + 0x80, 0x80, 0x0C, 0x0D, 0x80, 0x80, 0x0E, 0x0F}; + + emit_move_insn (mask, array_to_constant (TImode, arr)); + emit_insn (gen_shufb (tmp1, operands[1], operands[1], mask)); + emit_insn (gen_spu_xshw (tmp2, tmp1)); + emit_move_insn (operands[0], tmp2); + +DONE; +}) + +(define_expand "vec_unpacku_hi_v16qi" + [(set (match_operand:V8HI 0 "spu_reg_operand" "=r") + (zero_extend:V8HI + (vec_select:V8QI + (match_operand:V16QI 1 "spu_reg_operand" "r") + (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3) + (const_int 4)(const_int 5)(const_int 6)(const_int 7)]))))] + "" +{ + rtx mask = gen_reg_rtx (TImode); + unsigned char arr[16] = { + 0x80, 0x00, 0x80, 0x01, 0x80, 0x02, 0x80, 0x03, + 0x80, 0x04, 0x80, 0x05, 0x80, 0x06, 0x80, 0x07}; + + emit_move_insn (mask, array_to_constant (TImode, arr)); + emit_insn (gen_shufb (operands[0], operands[1], operands[1], mask)); + + DONE; +}) + +(define_expand "vec_unpacku_lo_v16qi" + [(set (match_operand:V8HI 0 "spu_reg_operand" "=r") + (zero_extend:V8HI + (vec_select:V8QI + (match_operand:V16QI 1 "spu_reg_operand" "r") + (parallel [(const_int 8)(const_int 9)(const_int 10)(const_int 11) + (const_int 12)(const_int 13)(const_int 14)(const_int 15)]))))] +"" +{ + rtx mask = gen_reg_rtx (TImode); + unsigned char arr[16] = { + 0x80, 0x08, 0x80, 0x09, 0x80, 0x0A, 0x80, 0x0B, + 0x80, 0x0C, 0x80, 0x0D, 0x80, 0x0E, 0x80, 0x0F}; + + emit_move_insn (mask, array_to_constant (TImode, arr)); + emit_insn (gen_shufb (operands[0], operands[1], operands[1], mask)); + + DONE; +}) + +(define_expand "vec_unpacks_hi_v16qi" + [(set (match_operand:V8HI 0 "spu_reg_operand" "=r") + (sign_extend:V8HI + (vec_select:V8QI + (match_operand:V16QI 1 "spu_reg_operand" "r") + (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3) + (const_int 4)(const_int 5)(const_int 6)(const_int 7)]))))] +"" +{ + rtx tmp1 = gen_reg_rtx (V16QImode); + rtx tmp2 = gen_reg_rtx (V8HImode); + rtx mask = gen_reg_rtx (TImode); + unsigned char arr[16] = { + 0x80, 0x00, 0x80, 0x01, 0x80, 0x02, 0x80, 0x03, + 0x80, 0x04, 0x80, 0x05, 0x80, 0x06, 0x80, 0x07}; + + emit_move_insn (mask, array_to_constant (TImode, arr)); + emit_insn (gen_shufb (tmp1, operands[1], operands[1], mask)); + emit_insn (gen_spu_xsbh (tmp2, tmp1)); + emit_move_insn (operands[0], tmp2); + + DONE; +}) + +(define_expand "vec_unpacks_lo_v16qi" + [(set (match_operand:V8HI 0 "spu_reg_operand" "=r") + (sign_extend:V8HI + (vec_select:V8QI + (match_operand:V16QI 1 "spu_reg_operand" "r") + (parallel [(const_int 8)(const_int 9)(const_int 10)(const_int 11) + (const_int 12)(const_int 13)(const_int 14)(const_int 15)]))))] +"" +{ + rtx tmp1 = gen_reg_rtx (V16QImode); + rtx tmp2 = gen_reg_rtx (V8HImode); + rtx mask = gen_reg_rtx (TImode); + unsigned char arr[16] = { + 0x80, 0x08, 0x80, 0x09, 0x80, 0x0A, 0x80, 0x0B, + 0x80, 0x0C, 0x80, 0x0D, 0x80, 0x0E, 0x80, 0x0F}; + + emit_move_insn (mask, array_to_constant (TImode, arr)); + emit_insn (gen_shufb (tmp1, operands[1], operands[1], mask)); + emit_insn (gen_spu_xsbh (tmp2, tmp1)); + emit_move_insn (operands[0], tmp2); + +DONE; +}) + + + + diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 1f728fd362f..4037344d742 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,11 @@ +2008-02-20 Ira Rosen + + * lib/target-supports.exp (check_effective_target_vect_unpack): + Return true for SPU. + (check_effective_target_vect_short_mult): Likewise. + * gcc.dg/vect/vect-reduc-dot-s16b.c: Expect vectorization of + the loop on targets that support vect_unpack. + 2008-02-20 Uros Bizjak * g++.dg/compat/struct-layout-1_generate.c (DG_OPTIONS): New define. diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s16b.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s16b.c index 8c636a5f703..587800c032a 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s16b.c +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s16b.c @@ -48,9 +48,9 @@ main (void) return 0; } -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_short_mult && vect_widen_sum_hi_to_si } } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_short_mult && { vect_widen_sum_hi_to_si || vect_unpack } } } } } */ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" { target { ! vect_short_mult } } } } */ -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" { target { ! vect_widen_sum_hi_to_si } } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" { target { { ! vect_widen_sum_hi_to_si } && { ! vect_unpack } } } } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 3f8cfaad753..ba1454eaab8 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -1667,7 +1667,8 @@ proc check_effective_target_vect_unpack { } { set et_vect_unpack_saved 0 if { ([istarget powerpc*-*-*] && ![istarget powerpc-*paired*]) || [istarget i?86-*-*] - || [istarget x86_64-*-*] } { + || [istarget x86_64-*-*] + || [istarget spu-*-*] } { set et_vect_unpack_saved 1 } } @@ -1876,6 +1877,7 @@ proc check_effective_target_vect_short_mult { } { } else { set et_vect_short_mult_saved 0 if { [istarget ia64-*-*] + || [istarget spu-*-*] || [istarget i?86-*-*] || [istarget x86_64-*-*] } { set et_vect_short_mult_saved 1