re PR middle-end/85090 (wrong code with -O2 -fno-tree-dominator-opts -mavx512f -fira-algorithm=priority)

PR middle-end/85090
	* config/i386/sse.md (V): Add V64QI and V32HI for TARGET_AVX512F.
	(V_128_256): New mode iterator.
	(*avx512dq_vextract<shuffletype>64x2_1 splitter): New define_split.
	(*avx512f_vextract<shuffletype>32x4_1 splitter): Likewise.
	(xop_pcmov_<mode><avxsizesuffix>): Use V_128_256 mode iterator instead
	of V.
	* config/i386/i386.c (ix86_expand_vector_set): Improve V32HImode and
	V64QImode expansion for !TARGET_AVX512BW && TARGET_AVX512F.

	* gcc.target/i386/avx512f-pr85090-1.c: New test.
	* gcc.target/i386/avx512f-pr85090-2.c: New test.
	* gcc.target/i386/avx512f-pr85090-3.c: New test.
	* gcc.target/i386/avx512bw-pr85090-2.c: New test.
	* gcc.target/i386/avx512bw-pr85090-3.c: New test.

From-SVN: r258994
This commit is contained in:
Jakub Jelinek 2018-04-01 08:05:01 +02:00
parent 0a4800de9f
commit 7be6ee78d6
9 changed files with 290 additions and 19 deletions

View file

@ -1,3 +1,15 @@
2018-04-01 Jakub Jelinek <jakub@redhat.com>
PR middle-end/85090
* config/i386/sse.md (V): Add V64QI and V32HI for TARGET_AVX512F.
(V_128_256): New mode iterator.
(*avx512dq_vextract<shuffletype>64x2_1 splitter): New define_split.
(*avx512f_vextract<shuffletype>32x4_1 splitter): Likewise.
(xop_pcmov_<mode><avxsizesuffix>): Use V_128_256 mode iterator instead
of V.
* config/i386/i386.c (ix86_expand_vector_set): Improve V32HImode and
V64QImode expansion for !TARGET_AVX512BW && TARGET_AVX512F.
2018-03-31 Segher Boessenkool <segher@kernel.crashing.org>
PR target/83315
@ -1123,7 +1135,7 @@
PR target/84807
* config/i386/i386.opt: Replace Enforcment with Enforcement.
2018-03-10 Alexandre Oliva <aoliva@redhat.com>
2018-03-10 Alexandre Oliva <aoliva@redhat.com>
PR debug/84620
* dwarf2out.h (dw_val_class): Add dw_val_class_symview.
@ -1171,7 +1183,7 @@
(builtin_access::generic_overlap): Be prepared to handle non-array
base objects.
2018-03-09 Alexandre Oliva <aoliva@redhat.com>
2018-03-09 Alexandre Oliva <aoliva@redhat.com>
PR rtl-optimization/84682
* lra-constraints.c (process_address_1): Check is_address flag
@ -1302,7 +1314,7 @@
* doc/gcov.texi: Document usage of profile files.
* gcov-io.h: Document changes in the format.
2018-03-08 Alexandre Oliva <aoliva@redhat.com>
2018-03-08 Alexandre Oliva <aoliva@redhat.com>
PR debug/84404
PR debug/84408

View file

@ -44085,21 +44085,69 @@ half:
break;
case E_V32HImode:
if (TARGET_AVX512F && TARGET_AVX512BW)
if (TARGET_AVX512BW)
{
mmode = SImode;
gen_blendm = gen_avx512bw_blendmv32hi;
}
else if (TARGET_AVX512F)
{
half_mode = E_V8HImode;
n = 8;
goto quarter;
}
break;
case E_V64QImode:
if (TARGET_AVX512F && TARGET_AVX512BW)
if (TARGET_AVX512BW)
{
mmode = DImode;
gen_blendm = gen_avx512bw_blendmv64qi;
}
else if (TARGET_AVX512F)
{
half_mode = E_V16QImode;
n = 16;
goto quarter;
}
break;
quarter:
/* Compute offset. */
i = elt / n;
elt %= n;
gcc_assert (i <= 3);
{
/* Extract the quarter. */
tmp = gen_reg_rtx (V4SImode);
rtx tmp2 = gen_lowpart (V16SImode, target);
rtx mask = gen_reg_rtx (QImode);
emit_move_insn (mask, constm1_rtx);
emit_insn (gen_avx512f_vextracti32x4_mask (tmp, tmp2, GEN_INT (i),
tmp, mask));
tmp2 = gen_reg_rtx (half_mode);
emit_move_insn (tmp2, gen_lowpart (half_mode, tmp));
tmp = tmp2;
/* Put val in tmp at elt. */
ix86_expand_vector_set (false, tmp, val, elt);
/* Put it back. */
tmp2 = gen_reg_rtx (V16SImode);
rtx tmp3 = gen_lowpart (V16SImode, target);
mask = gen_reg_rtx (HImode);
emit_move_insn (mask, constm1_rtx);
tmp = gen_lowpart (V4SImode, tmp);
emit_insn (gen_avx512f_vinserti32x4_mask (tmp2, tmp3, tmp, GEN_INT (i),
tmp3, mask));
emit_move_insn (target, gen_lowpart (mode, tmp2));
}
return;
default:
break;
}

View file

@ -229,8 +229,8 @@
;; All vector modes
(define_mode_iterator V
[(V32QI "TARGET_AVX") V16QI
(V16HI "TARGET_AVX") V8HI
[(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
@ -244,6 +244,10 @@
(define_mode_iterator V_256
[V32QI V16HI V8SI V4DI V8SF V4DF])
;; All 128bit and 256bit vector modes
(define_mode_iterator V_128_256
[V32QI V16QI V16HI V8HI V8SI V4SI V4DI V2DI V8SF V4SF V4DF V2DF])
;; All 512bit vector modes
(define_mode_iterator V_512 [V64QI V32HI V16SI V8DI V16SF V8DF])
@ -7351,6 +7355,15 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
(define_split
[(set (match_operand:<ssequartermode> 0 "nonimmediate_operand")
(vec_select:<ssequartermode>
(match_operand:V8FI 1 "register_operand")
(parallel [(const_int 0) (const_int 1)])))]
"TARGET_AVX512DQ && reload_completed"
[(set (match_dup 0) (match_dup 1))]
"operands[1] = gen_lowpart (<ssequartermode>mode, operands[1]);")
(define_insn "<mask_codefor>avx512f_vextract<shuffletype>32x4_1<mask_name>"
[(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
(vec_select:<ssequartermode>
@ -7374,6 +7387,16 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
(define_split
[(set (match_operand:<ssequartermode> 0 "nonimmediate_operand")
(vec_select:<ssequartermode>
(match_operand:V16FI 1 "register_operand")
(parallel [(const_int 0) (const_int 1)
(const_int 2) (const_int 3)])))]
"TARGET_AVX512F && reload_completed"
[(set (match_dup 0) (match_dup 1))]
"operands[1] = gen_lowpart (<ssequartermode>mode, operands[1]);")
(define_mode_attr extract_type_2
[(V16SF "avx512dq") (V16SI "avx512dq") (V8DF "avx512f") (V8DI "avx512f")])
@ -16478,11 +16501,11 @@
;; XOP parallel XMM conditional moves
(define_insn "xop_pcmov_<mode><avxsizesuffix>"
[(set (match_operand:V 0 "register_operand" "=x,x")
(if_then_else:V
(match_operand:V 3 "nonimmediate_operand" "x,m")
(match_operand:V 1 "register_operand" "x,x")
(match_operand:V 2 "nonimmediate_operand" "xm,x")))]
[(set (match_operand:V_128_256 0 "register_operand" "=x,x")
(if_then_else:V_128_256
(match_operand:V_128_256 3 "nonimmediate_operand" "x,m")
(match_operand:V_128_256 1 "register_operand" "x,x")
(match_operand:V_128_256 2 "nonimmediate_operand" "xm,x")))]
"TARGET_XOP"
"vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
[(set_attr "type" "sse4arg")])

View file

@ -1,4 +1,13 @@
2018-03-31 Alexandre Oliva <aoliva@redhat.com>
2018-04-01 Jakub Jelinek <jakub@redhat.com>
PR middle-end/85090
* gcc.target/i386/avx512f-pr85090-1.c: New test.
* gcc.target/i386/avx512f-pr85090-2.c: New test.
* gcc.target/i386/avx512f-pr85090-3.c: New test.
* gcc.target/i386/avx512bw-pr85090-2.c: New test.
* gcc.target/i386/avx512bw-pr85090-3.c: New test.
2018-03-31 Alexandre Oliva <aoliva@redhat.com>
PR c++/85027
* g++.dg/pr85027.C: New.
@ -101,7 +110,7 @@
PR sanitizer/85081
* g++.dg/asan/pr85081.C: New test.
2018-03-28 Alexandre Oliva <aoliva@redhat.com>
2018-03-28 Alexandre Oliva <aoliva@redhat.com>
PR c++/84789
* g++.dg/template/pr84789.C: Adjust for testing with
@ -376,7 +385,7 @@
PR sanitizer/85029
* g++.dg/ubsan/pr85029.C: New test.
2018-03-23 Alexandre Oliva <aoliva@redhat.com>
2018-03-23 Alexandre Oliva <aoliva@redhat.com>
PR c++/71251
* g++.dg/cpp0x/pr71251.C: New.
@ -440,7 +449,7 @@
* gcc.dg/builtin-tgmath-3.c: New test.
2018-03-21 Alexandre Oliva <aoliva@redhat.com>
2018-03-21 Alexandre Oliva <aoliva@redhat.com>
PR c++/71965
* g++.dg/concepts/pr71965.C: New.
@ -1072,7 +1081,7 @@
* gcc.dg/Wrestrict-10.c: New test.
* gcc.dg/Wrestrict-11.c: New test.
2018-03-09 Alexandre Oliva <aoliva@redhat.com>
2018-03-09 Alexandre Oliva <aoliva@redhat.com>
PR rtl-optimization/84682
* gcc.dg/torture/pr84682-1.c: New.
@ -1144,7 +1153,7 @@
* gcc.dg/torture/pr84746.c: New testcase.
2018-03-08 Alexandre Oliva <aoliva@redhat.com>
2018-03-08 Alexandre Oliva <aoliva@redhat.com>
PR debug/84404
PR debug/84408
* gcc.dg/graphite/pr84404.c: New.
@ -1236,7 +1245,7 @@
PR tree-optimization/84687
* gcc.dg/pr84687.c: New test.
2018-03-06 Alexandre Oliva <aoliva@redhat.com>
2018-03-06 Alexandre Oliva <aoliva@redhat.com>
PR c++/84231
* g++.dg/pr84231.C: New.

View file

@ -0,0 +1,35 @@
/* PR middle-end/85090 */
/* { dg-do compile } */
/* { dg-options "-O2 -mavx512bw -mtune=intel -masm=att" } */
typedef short V __attribute__((vector_size (64)));
V
f1 (V x, int y)
{
x[0] = y;
return x;
}
V
f2 (V x, int y)
{
x[7] = y;
return x;
}
V
f3 (V x, int y)
{
x[11] = y;
return x;
}
V
f4 (V x, int y)
{
x[29] = y;
return x;
}
/* { dg-final { scan-assembler-times "vpbroadcastw\t" 4 } } */

View file

@ -0,0 +1,35 @@
/* PR middle-end/85090 */
/* { dg-do compile } */
/* { dg-options "-O2 -mavx512bw -mtune=intel -masm=att" } */
typedef signed char V __attribute__((vector_size (64)));
V
f1 (V x, int y)
{
x[0] = y;
return x;
}
V
f2 (V x, int y)
{
x[15] = y;
return x;
}
V
f3 (V x, int y)
{
x[22] = y;
return x;
}
V
f4 (V x, int y)
{
x[59] = y;
return x;
}
/* { dg-final { scan-assembler-times "vpbroadcastb\t" 4 } } */

View file

@ -0,0 +1,35 @@
/* PR middle-end/85090 */
/* { dg-do run { target int128 } } */
/* { dg-require-effective-target avx512f } */
/* { dg-options "-O2 -fno-tree-dominator-opts -mavx512f -fira-algorithm=priority" } */
#include "avx512f-check.h"
typedef unsigned short U __attribute__ ((vector_size (64)));
typedef unsigned int V __attribute__ ((vector_size (64)));
typedef unsigned __int128 W __attribute__ ((vector_size (64)));
V h;
W d, e, g;
U f;
static __attribute__((noipa)) U
foo (U i)
{
f >>= ((U)d > f) & 1;
i[0] <<= 1;
e = (7 & -d) << (7 & -(g & 7));
return i;
}
void
avx512f_test (void)
{
U x;
for (unsigned i = 0; i < 32; i++)
x[i] = i;
x = foo (x);
for (unsigned i = 0; i < 32; i++)
if (x[i] != i)
abort ();
}

View file

@ -0,0 +1,37 @@
/* PR middle-end/85090 */
/* { dg-do compile } */
/* { dg-options "-O2 -mavx512f -mno-avx512bw -mtune=intel -masm=att" } */
typedef short V __attribute__((vector_size (64)));
V
f1 (V x, int y)
{
x[0] = y;
return x;
}
V
f2 (V x, int y)
{
x[7] = y;
return x;
}
V
f3 (V x, int y)
{
x[11] = y;
return x;
}
V
f4 (V x, int y)
{
x[29] = y;
return x;
}
/* { dg-final { scan-assembler-times "vpinsrw\t" 4 } } */
/* { dg-final { scan-assembler-times "vextracti32x4\t" 2 } } */
/* { dg-final { scan-assembler-times "vinserti32x4\t" 4 } } */

View file

@ -0,0 +1,37 @@
/* PR middle-end/85090 */
/* { dg-do compile } */
/* { dg-options "-O2 -mavx512f -mno-avx512bw -mtune=intel -masm=att" } */
typedef signed char V __attribute__((vector_size (64)));
V
f1 (V x, int y)
{
x[0] = y;
return x;
}
V
f2 (V x, int y)
{
x[15] = y;
return x;
}
V
f3 (V x, int y)
{
x[22] = y;
return x;
}
V
f4 (V x, int y)
{
x[59] = y;
return x;
}
/* { dg-final { scan-assembler-times "vpinsrb\t" 4 } } */
/* { dg-final { scan-assembler-times "vextracti32x4\t" 2 } } */
/* { dg-final { scan-assembler-times "vinserti32x4\t" 4 } } */