re PR target/80846 (auto-vectorized AVX2 horizontal sum should narrow to 128b right away, to be more efficient for Ryzen and Intel)
PR target/80846 * config/i386/i386.c (ix86_expand_vector_init_general): Handle V2TImode and V4TImode. (ix86_expand_vector_extract): Likewise. * config/i386/sse.md (VMOVE): Enable V4TImode even for just TARGET_AVX512F, instead of only for TARGET_AVX512BW. (ssescalarmode): Handle V4TImode and V2TImode. (VEC_EXTRACT_MODE): Add V4TImode and V2TImode. (*vec_extractv2ti, *vec_extractv4ti): New insns. (VEXTRACTI128_MODE): New mode iterator. (splitter for *vec_extractv?ti first element): New. (VEC_INIT_MODE): New mode iterator. (vec_init<mode>): Consolidate 3 expanders into one using VEC_INIT_MODE mode iterator. * gcc.target/i386/avx-pr80846.c: New test. * gcc.target/i386/avx2-pr80846.c: New test. * gcc.target/i386/avx512f-pr80846.c: New test. From-SVN: r250397
This commit is contained in:
parent
f0a404561c
commit
2953b72fdd
7 changed files with 155 additions and 22 deletions
|
@ -1,3 +1,20 @@
|
|||
2017-07-20 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR target/80846
|
||||
* config/i386/i386.c (ix86_expand_vector_init_general): Handle
|
||||
V2TImode and V4TImode.
|
||||
(ix86_expand_vector_extract): Likewise.
|
||||
* config/i386/sse.md (VMOVE): Enable V4TImode even for just
|
||||
TARGET_AVX512F, instead of only for TARGET_AVX512BW.
|
||||
(ssescalarmode): Handle V4TImode and V2TImode.
|
||||
(VEC_EXTRACT_MODE): Add V4TImode and V2TImode.
|
||||
(*vec_extractv2ti, *vec_extractv4ti): New insns.
|
||||
(VEXTRACTI128_MODE): New mode iterator.
|
||||
(splitter for *vec_extractv?ti first element): New.
|
||||
(VEC_INIT_MODE): New mode iterator.
|
||||
(vec_init<mode>): Consolidate 3 expanders into one using
|
||||
VEC_INIT_MODE mode iterator.
|
||||
|
||||
2017-07-20 Alexander Monakov <amonakov@ispras.ru>
|
||||
|
||||
* lra-assigns.c (pseudo_compare_func): Fix comparison step based on
|
||||
|
|
|
@ -44118,6 +44118,26 @@ ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode,
|
|||
ix86_expand_vector_init_concat (mode, target, ops, n);
|
||||
return;
|
||||
|
||||
case V2TImode:
|
||||
for (i = 0; i < 2; i++)
|
||||
ops[i] = gen_lowpart (V2DImode, XVECEXP (vals, 0, i));
|
||||
op0 = gen_reg_rtx (V4DImode);
|
||||
ix86_expand_vector_init_concat (V4DImode, op0, ops, 2);
|
||||
emit_move_insn (target, gen_lowpart (GET_MODE (target), op0));
|
||||
return;
|
||||
|
||||
case V4TImode:
|
||||
for (i = 0; i < 4; i++)
|
||||
ops[i] = gen_lowpart (V2DImode, XVECEXP (vals, 0, i));
|
||||
ops[4] = gen_reg_rtx (V4DImode);
|
||||
ix86_expand_vector_init_concat (V4DImode, ops[4], ops, 2);
|
||||
ops[5] = gen_reg_rtx (V4DImode);
|
||||
ix86_expand_vector_init_concat (V4DImode, ops[5], ops + 2, 2);
|
||||
op0 = gen_reg_rtx (V8DImode);
|
||||
ix86_expand_vector_init_concat (V8DImode, op0, ops + 4, 2);
|
||||
emit_move_insn (target, gen_lowpart (GET_MODE (target), op0));
|
||||
return;
|
||||
|
||||
case V32QImode:
|
||||
half_mode = V16QImode;
|
||||
goto half;
|
||||
|
@ -44659,6 +44679,8 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
|
|||
|
||||
case V2DFmode:
|
||||
case V2DImode:
|
||||
case V2TImode:
|
||||
case V4TImode:
|
||||
use_vec_extr = true;
|
||||
break;
|
||||
|
||||
|
|
|
@ -175,7 +175,7 @@
|
|||
(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
|
||||
(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
|
||||
(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
|
||||
(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX") V1TI
|
||||
(V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX") V1TI
|
||||
(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
|
||||
(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
|
||||
|
||||
|
@ -687,7 +687,8 @@
|
|||
(V16SI "SI") (V8SI "SI") (V4SI "SI")
|
||||
(V8DI "DI") (V4DI "DI") (V2DI "DI")
|
||||
(V16SF "SF") (V8SF "SF") (V4SF "SF")
|
||||
(V8DF "DF") (V4DF "DF") (V2DF "DF")])
|
||||
(V8DF "DF") (V4DF "DF") (V2DF "DF")
|
||||
(V4TI "TI") (V2TI "TI")])
|
||||
|
||||
;; Mapping of vector modes to the 128bit modes
|
||||
(define_mode_attr ssexmmmode
|
||||
|
@ -6920,15 +6921,6 @@
|
|||
(set_attr "prefix" "orig,maybe_evex,orig,maybe_evex")
|
||||
(set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
|
||||
|
||||
(define_expand "vec_init<mode>"
|
||||
[(match_operand:V_128 0 "register_operand")
|
||||
(match_operand 1)]
|
||||
"TARGET_SSE"
|
||||
{
|
||||
ix86_expand_vector_init (false, operands[0], operands[1]);
|
||||
DONE;
|
||||
})
|
||||
|
||||
;; Avoid combining registers from different units in a single alternative,
|
||||
;; see comment above inline_secondary_memory_needed function in i386.c
|
||||
(define_insn "vec_set<mode>_0"
|
||||
|
@ -7886,7 +7878,8 @@
|
|||
(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
|
||||
(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
|
||||
(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
|
||||
(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
|
||||
(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF
|
||||
(V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")])
|
||||
|
||||
(define_expand "vec_extract<mode>"
|
||||
[(match_operand:<ssescalarmode> 0 "register_operand")
|
||||
|
@ -13734,6 +13727,50 @@
|
|||
operands[1] = adjust_address (operands[1], <ssescalarmode>mode, offs);
|
||||
})
|
||||
|
||||
(define_insn "*vec_extractv2ti"
|
||||
[(set (match_operand:TI 0 "nonimmediate_operand" "=xm,vm")
|
||||
(vec_select:TI
|
||||
(match_operand:V2TI 1 "register_operand" "x,v")
|
||||
(parallel
|
||||
[(match_operand:SI 2 "const_0_to_1_operand")])))]
|
||||
"TARGET_AVX"
|
||||
"@
|
||||
vextract%~128\t{%2, %1, %0|%0, %1, %2}
|
||||
vextracti32x4\t{%2, %g1, %0|%0, %g1, %2}"
|
||||
[(set_attr "type" "sselog")
|
||||
(set_attr "prefix_extra" "1")
|
||||
(set_attr "length_immediate" "1")
|
||||
(set_attr "prefix" "vex,evex")
|
||||
(set_attr "mode" "OI")])
|
||||
|
||||
(define_insn "*vec_extractv4ti"
|
||||
[(set (match_operand:TI 0 "nonimmediate_operand" "=vm")
|
||||
(vec_select:TI
|
||||
(match_operand:V4TI 1 "register_operand" "v")
|
||||
(parallel
|
||||
[(match_operand:SI 2 "const_0_to_3_operand")])))]
|
||||
"TARGET_AVX512F"
|
||||
"vextracti32x4\t{%2, %1, %0|%0, %1, %2}"
|
||||
[(set_attr "type" "sselog")
|
||||
(set_attr "prefix_extra" "1")
|
||||
(set_attr "length_immediate" "1")
|
||||
(set_attr "prefix" "evex")
|
||||
(set_attr "mode" "XI")])
|
||||
|
||||
(define_mode_iterator VEXTRACTI128_MODE
|
||||
[(V4TI "TARGET_AVX512F") V2TI])
|
||||
|
||||
(define_split
|
||||
[(set (match_operand:TI 0 "nonimmediate_operand")
|
||||
(vec_select:TI
|
||||
(match_operand:VEXTRACTI128_MODE 1 "register_operand")
|
||||
(parallel [(const_int 0)])))]
|
||||
"TARGET_AVX
|
||||
&& reload_completed
|
||||
&& (TARGET_AVX512VL || !EXT_REX_SSE_REG_P (operands[1]))"
|
||||
[(set (match_dup 0) (match_dup 1))]
|
||||
"operands[1] = gen_lowpart (TImode, operands[1]);")
|
||||
|
||||
;; Turn SImode or DImode extraction from arbitrary SSE/AVX/AVX512F
|
||||
;; vector modes into vec_extract*.
|
||||
(define_split
|
||||
|
@ -18738,19 +18775,20 @@
|
|||
<ssehalfvecmode>mode);
|
||||
})
|
||||
|
||||
(define_expand "vec_init<mode>"
|
||||
[(match_operand:V_256 0 "register_operand")
|
||||
(match_operand 1)]
|
||||
"TARGET_AVX"
|
||||
{
|
||||
ix86_expand_vector_init (false, operands[0], operands[1]);
|
||||
DONE;
|
||||
})
|
||||
;; Modes handled by vec_init patterns.
|
||||
(define_mode_iterator VEC_INIT_MODE
|
||||
[(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
|
||||
(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
|
||||
(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
|
||||
(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
|
||||
(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
|
||||
(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")
|
||||
(V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")])
|
||||
|
||||
(define_expand "vec_init<mode>"
|
||||
[(match_operand:VF48_I1248 0 "register_operand")
|
||||
[(match_operand:VEC_INIT_MODE 0 "register_operand")
|
||||
(match_operand 1)]
|
||||
"TARGET_AVX512F"
|
||||
"TARGET_SSE"
|
||||
{
|
||||
ix86_expand_vector_init (false, operands[0], operands[1]);
|
||||
DONE;
|
||||
|
|
|
@ -1,3 +1,10 @@
|
|||
2017-07-20 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR target/80846
|
||||
* gcc.target/i386/avx-pr80846.c: New test.
|
||||
* gcc.target/i386/avx2-pr80846.c: New test.
|
||||
* gcc.target/i386/avx512f-pr80846.c: New test.
|
||||
|
||||
2017-07-20 Bin Cheng <bin.cheng@arm.com>
|
||||
|
||||
PR tree-optimization/81388
|
||||
|
|
39
gcc/testsuite/gcc.target/i386/avx-pr80846.c
Normal file
39
gcc/testsuite/gcc.target/i386/avx-pr80846.c
Normal file
|
@ -0,0 +1,39 @@
|
|||
/* PR target/80846 */
|
||||
/* { dg-do compile { target int128 } } */
|
||||
/* { dg-options "-O2 -mavx -mno-avx2" } */
|
||||
|
||||
typedef __int128 V __attribute__((vector_size (32)));
|
||||
typedef long long W __attribute__((vector_size (32)));
|
||||
typedef int X __attribute__((vector_size (16)));
|
||||
typedef __int128 Y __attribute__((vector_size (64)));
|
||||
typedef long long Z __attribute__((vector_size (64)));
|
||||
|
||||
W f1 (__int128 x, __int128 y) { return (W) ((V) { x, y }); }
|
||||
__int128 f2 (W x) { return ((V)x)[0]; }
|
||||
__int128 f3 (W x) { return ((V)x)[1]; }
|
||||
W f4 (X x, X y) { union { X x; __int128 i; } u = { .x = x }, v = { .x = y }; return (W) ((V) { u.i, v.i }); }
|
||||
X f5 (W x) { return (X)(((V)x)[0]); }
|
||||
X f6 (W x) { return (X)(((V)x)[1]); }
|
||||
W f7 (void) { return (W) ((V) { 2, 3 }); }
|
||||
W f8 (X x) { union { X x; __int128 i; } u = { .x = x }; return (W) ((V) { u.i, 3 }); }
|
||||
W f9 (X x) { union { X x; __int128 i; } u = { .x = x }; return (W) ((V) { 2, u.i }); }
|
||||
W f10 (X x) { union { X x; __int128 i; } u = { .x = x }; return (W) ((V) { u.i, u.i }); }
|
||||
#ifdef __AVX512F__
|
||||
Z f11 (__int128 x, __int128 y, __int128 z, __int128 a) { return (Z) ((Y) { x, y, z, a }); }
|
||||
__int128 f12 (Z x) { return ((Y)x)[0]; }
|
||||
__int128 f13 (Z x) { return ((Y)x)[1]; }
|
||||
__int128 f14 (Z x) { return ((Y)x)[2]; }
|
||||
__int128 f15 (Z x) { return ((Y)x)[3]; }
|
||||
Z f16 (X x, X y, X z, X a) { union { X x; __int128 i; } u = { .x = x }, v = { .x = y }, w = { .x = z }, t = { .x = a };
|
||||
return (Z) ((Y) { u.i, v.i, w.i, t.i }); }
|
||||
X f17 (Z x) { return (X)(((Y)x)[0]); }
|
||||
X f18 (Z x) { return (X)(((Y)x)[1]); }
|
||||
X f19 (Z x) { return (X)(((Y)x)[2]); }
|
||||
X f20 (Z x) { return (X)(((Y)x)[3]); }
|
||||
Z f21 (void) { return (Z) ((Y) { 2, 3, 4, 5 }); }
|
||||
Z f22 (X x) { union { X x; __int128 i; } u = { .x = x }; return (Z) ((Y) { u.i, 3, 4, 5 }); }
|
||||
Z f23 (X x) { union { X x; __int128 i; } u = { .x = x }; return (Z) ((Y) { 2, u.i, 4, 5 }); }
|
||||
Z f24 (X x) { union { X x; __int128 i; } u = { .x = x }; return (Z) ((Y) { 2, 3, u.i, 5 }); }
|
||||
Z f25 (X x) { union { X x; __int128 i; } u = { .x = x }; return (Z) ((Y) { 2, 3, 4, u.i }); }
|
||||
Z f26 (X x) { union { X x; __int128 i; } u = { .x = x }; return (Z) ((Y) { u.i, u.i, u.i, u.i }); }
|
||||
#endif
|
5
gcc/testsuite/gcc.target/i386/avx2-pr80846.c
Normal file
5
gcc/testsuite/gcc.target/i386/avx2-pr80846.c
Normal file
|
@ -0,0 +1,5 @@
|
|||
/* PR target/80846 */
|
||||
/* { dg-do compile { target int128 } } */
|
||||
/* { dg-options "-O2 -mavx2 -mno-avx512f" } */
|
||||
|
||||
#include "avx-pr80846.c"
|
5
gcc/testsuite/gcc.target/i386/avx512f-pr80846.c
Normal file
5
gcc/testsuite/gcc.target/i386/avx512f-pr80846.c
Normal file
|
@ -0,0 +1,5 @@
|
|||
/* PR target/80846 */
|
||||
/* { dg-do compile { target int128 } } */
|
||||
/* { dg-options "-O2 -mavx512f" } */
|
||||
|
||||
#include "avx-pr80846.c"
|
Loading…
Add table
Reference in a new issue