From 2ccdd0f22312a14ac64bf944fdc4f8e7532eb0eb Mon Sep 17 00:00:00 2001 From: liuhongt Date: Thu, 20 Jun 2024 12:41:13 +0800 Subject: [PATCH] Optimize a < 0 ? -1 : 0 to (signed)a >> 31. Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31 and x < 0 ? 1 : 0 into (unsigned) x >> 31. Add define_insn_and_split for the optimization did in ix86_expand_int_vcond. gcc/ChangeLog: PR target/115517 * config/i386/sse.md ("*ashr3_1"): New define_insn_and_split. (*avx512_ashr3_1): Ditto. (*avx2_lshr3_1): Ditto. (*avx2_lshr3_2): Ditto and add 2 combine splitter after it. * config/i386/mmx.md (mmxscalarsize): New mode attribute. (*mmw_ashr3_1): New define_insn_and_split. ("mmx_3): Add a combine spiltter after it. (*mmx_ashrv2hi3_1): New define_insn_and_plit, also add a combine splitter after it. gcc/testsuite/ChangeLog: * gcc.target/i386/pr111023-2.c: Adjust testcase. * gcc.target/i386/vect-div-1.c: Ditto. --- gcc/config/i386/mmx.md | 52 ++++++++++++++ gcc/config/i386/sse.md | 83 ++++++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr111023-2.c | 4 +- gcc/testsuite/gcc.target/i386/vect-div-1.c | 2 +- 4 files changed, 138 insertions(+), 3 deletions(-) diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 359dc90628d..fca28df99a1 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -147,6 +147,14 @@ (V4HI "hi") (V2HI "hi") (V8QI "qi")]) +(define_mode_attr mmxscalarsize + [(V1DI "64") + (V2SI "32") (V2SF "32") + (V4HF "16") (V4BF "16") + (V2HF "16") (V2BF "16") + (V4HI "16") (V2HI "16") + (V8QI "8")]) + (define_mode_attr Yv_Yw [(V8QI "Yw") (V4HI "Yw") (V2SI "Yv") (V1DI "Yv") (V2SF "Yv")]) @@ -3620,6 +3628,17 @@ (const_string "0"))) (set_attr "mode" "DI,TI,TI")]) +(define_insn_and_split "*mmx_ashr3_1" + [(set (match_operand:MMXMODE24 0 "register_operand") + (lt:MMXMODE24 + (match_operand:MMXMODE24 1 "register_operand") + (match_operand:MMXMODE24 2 "const0_operand")))] + "TARGET_MMX_WITH_SSE && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (match_dup 0) (ashiftrt:MMXMODE24 (match_dup 1) (match_dup 3)))] + "operands[3] = gen_int_mode ( - 1, DImode);") + (define_expand "ashr3" [(set (match_operand:MMXMODE24 0 "register_operand") (ashiftrt:MMXMODE24 @@ -3646,6 +3665,17 @@ (const_string "0"))) (set_attr "mode" "DI,TI,TI")]) +(define_split + [(set (match_operand:MMXMODE248 0 "register_operand") + (and:MMXMODE248 + (lt:MMXMODE248 + (match_operand:MMXMODE248 1 "register_operand") + (match_operand:MMXMODE248 2 "const0_operand")) + (match_operand:MMXMODE248 3 "const1_operand")))] + "TARGET_MMX_WITH_SSE && ix86_pre_reload_split ()" + [(set (match_dup 0) (lshiftrt:MMXMODE248 (match_dup 1) (match_dup 4)))] + "operands[4] = gen_int_mode ( - 1, DImode);") + (define_expand "3" [(set (match_operand:MMXMODE24 0 "register_operand") (any_lshift:MMXMODE24 @@ -3687,6 +3717,28 @@ (const_string "0"))) (set_attr "mode" "TI")]) +(define_insn_and_split "*mmx_ashrv2hi3_1" + [(set (match_operand:V2HI 0 "register_operand") + (lt:V2HI + (match_operand:V2HI 1 "register_operand") + (match_operand:V2HI 2 "const0_operand")))] + "TARGET_SSE2 && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (match_dup 0) (ashiftrt:V2HI (match_dup 1) (match_dup 3)))] + "operands[3] = gen_int_mode (15, DImode);") + +(define_split + [(set (match_operand:V2HI 0 "register_operand") + (and:V2HI + (lt:V2HI + (match_operand:V2HI 1 "register_operand") + (match_operand:V2HI 2 "const0_operand")) + (match_operand:V2HI 3 "const1_operand")))] + "TARGET_SSE2 && ix86_pre_reload_split ()" + [(set (match_dup 0) (lshiftrt:V2HI (match_dup 1) (match_dup 4)))] + "operands[4] = gen_int_mode (15, DImode);") + (define_expand "v8qi3" [(set (match_operand:V8QI 0 "register_operand") (any_shift:V8QI (match_operand:V8QI 1 "register_operand") diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 3ffa1881c83..1169e93453e 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -16923,6 +16923,17 @@ (set_attr "prefix" "orig,vex") (set_attr "mode" "")]) +(define_insn_and_split "*ashr3_1" + [(set (match_operand:VI24_AVX2 0 "register_operand") + (lt:VI24_AVX2 + (match_operand:VI24_AVX2 1 "register_operand") + (match_operand:VI24_AVX2 2 "const0_operand")))] + "TARGET_SSE2 && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (match_dup 0) (ashiftrt:VI24_AVX2 (match_dup 1) (match_dup 3)))] + "operands[3] = gen_int_mode ( - 1, DImode);") + (define_insn "ashr3" [(set (match_operand:VI248_AVX512BW_AVX512VL 0 "register_operand" "=v,v") (ashiftrt:VI248_AVX512BW_AVX512VL @@ -16937,6 +16948,23 @@ (const_string "0"))) (set_attr "mode" "")]) +(define_insn_and_split "*avx512_ashr3_1" + [(set (match_operand:VI248_AVX512VLBW 0 "register_operand") + (vec_merge:VI248_AVX512VLBW + (match_operand:VI248_AVX512VLBW 1 "vector_all_ones_operand") + (match_operand:VI248_AVX512VLBW 2 "const0_operand") + (unspec: + [(match_operand:VI248_AVX512VLBW 3 "nonimmediate_operand") + (match_operand:VI248_AVX512VLBW 4 "const0_operand") + (const_int 1)] + UNSPEC_PCMP)))] + "TARGET_AVX512F && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (match_dup 0) + (ashiftrt:VI248_AVX512VLBW (match_dup 3) (match_dup 5)))] + "operands[5] = gen_int_mode ( - 1, DImode);") + (define_expand "ashr3" [(set (match_operand:VI248_AVX512BW 0 "register_operand") (ashiftrt:VI248_AVX512BW @@ -17091,6 +17119,61 @@ (set_attr "prefix" "orig,vex") (set_attr "mode" "")]) +(define_insn_and_split "*avx2_lshr3_1" + [(set (match_operand:VI8_AVX2 0 "register_operand") + (and:VI8_AVX2 + (gt:VI8_AVX2 + (match_operand:VI8_AVX2 1 "register_operand") + (match_operand:VI8_AVX2 2 "register_operand")) + (match_operand:VI8_AVX2 3 "const1_operand")))] + "TARGET_SSE4_2 && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (match_dup 5) (gt:VI8_AVX2 (match_dup 1) (match_dup 2))) + (set (match_dup 0) (lshiftrt:VI8_AVX2 (match_dup 5) (match_dup 4)))] +{ + operands[4] = gen_int_mode ( - 1, DImode); + operands[5] = gen_reg_rtx (mode); +}) + +(define_insn_and_split "*avx2_lshr3_2" + [(set (match_operand:VI8_AVX2 0 "register_operand") + (and:VI8_AVX2 + (lt:VI8_AVX2 + (match_operand:VI8_AVX2 1 "register_operand") + (match_operand:VI8_AVX2 2 "const0_operand")) + (match_operand:VI8_AVX2 3 "const1_operand")))] + "TARGET_SSE2 && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (match_dup 0) (lshiftrt:VI8_AVX2 (match_dup 1) (const_int 63)))]) + +(define_split + [(set (match_operand:VI248_AVX2 0 "register_operand") + (and:VI248_AVX2 + (lt:VI248_AVX2 + (match_operand:VI248_AVX2 1 "register_operand") + (match_operand:VI248_AVX2 2 "const0_operand")) + (match_operand:VI248_AVX2 3 "const1_operand")))] + "TARGET_SSE2 && ix86_pre_reload_split ()" + [(set (match_dup 0) (lshiftrt:VI248_AVX2 (match_dup 1) (match_dup 4)))] + "operands[4] = gen_int_mode ( - 1, DImode);") + +(define_split + [(set (match_operand:VI248_AVX512VLBW 0 "register_operand") + (vec_merge:VI248_AVX512VLBW + (match_operand:VI248_AVX512VLBW 1 "const1_operand") + (match_operand:VI248_AVX512VLBW 2 "const0_operand") + (unspec: + [(match_operand:VI248_AVX512VLBW 3 "nonimmediate_operand") + (match_operand:VI248_AVX512VLBW 4 "const0_operand") + (const_int 1)] + UNSPEC_PCMP)))] + "TARGET_AVX512F && ix86_pre_reload_split ()" + [(set (match_dup 0) + (lshiftrt:VI248_AVX512VLBW (match_dup 3) (match_dup 5)))] + "operands[5] = gen_int_mode ( - 1, DImode);") + (define_insn "3" [(set (match_operand:VI248_AVX512BW 0 "register_operand" "=v,v") (any_lshift:VI248_AVX512BW diff --git a/gcc/testsuite/gcc.target/i386/pr111023-2.c b/gcc/testsuite/gcc.target/i386/pr111023-2.c index 6c69f947544..ba52959b357 100644 --- a/gcc/testsuite/gcc.target/i386/pr111023-2.c +++ b/gcc/testsuite/gcc.target/i386/pr111023-2.c @@ -36,7 +36,7 @@ v4si_v4hi (v4si *dst, v8hi src) dst[0] = *(v4si *) tem; } -/* { dg-final { scan-assembler "pcmpgtw" } } */ +/* { dg-final { scan-assembler "(?:pcmpgtw|psraw)" } } */ /* { dg-final { scan-assembler "punpcklwd" } } */ void @@ -48,5 +48,5 @@ v2di_v2si (v2di *dst, v4si src) dst[0] = *(v2di *) tem; } -/* { dg-final { scan-assembler "pcmpgtd" } } */ +/* { dg-final { scan-assembler "(?:pcmpgtd|psrad)" } } */ /* { dg-final { scan-assembler "punpckldq" } } */ diff --git a/gcc/testsuite/gcc.target/i386/vect-div-1.c b/gcc/testsuite/gcc.target/i386/vect-div-1.c index f611088d8df..2d92b9cc2f1 100644 --- a/gcc/testsuite/gcc.target/i386/vect-div-1.c +++ b/gcc/testsuite/gcc.target/i386/vect-div-1.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize -fno-common -msse2" } */ +/* { dg-options "-O2 -ftree-vectorize -fno-common -msse4.1" } */ unsigned short b[1024] = { 0 }; int a[1024] = { 0 };