i386: Optimize pmovmskb on inverted vector to inversion of pmovmskb result [PR98461]
The following patch adds combine splitters to optimize: - vpcmpeqd %ymm1, %ymm1, %ymm1 - vpandn %ymm1, %ymm0, %ymm0 vpmovmskb %ymm0, %eax + notl %eax etc. (for vectors with less than 32 elements with xorl instead of notl). 2020-12-30 Jakub Jelinek <jakub@redhat.com> PR target/98461 * config/i386/sse.md (<sse2_avx2>_pmovmskb): Add splitters for pmovmskb of NOT vector. * gcc.target/i386/sse2-pr98461.c: New test. * gcc.target/i386/avx2-pr98461.c: New test.
This commit is contained in:
parent
86b3edf1ff
commit
8f7941ca37
3 changed files with 151 additions and 0 deletions
|
@ -16099,6 +16099,53 @@
|
|||
(set_attr "prefix" "maybe_vex")
|
||||
(set_attr "mode" "SI")])
|
||||
|
||||
(define_split
|
||||
[(set (match_operand:SI 0 "register_operand")
|
||||
(unspec:SI
|
||||
[(not:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand"))]
|
||||
UNSPEC_MOVMSK))]
|
||||
"TARGET_SSE2"
|
||||
[(set (match_dup 2)
|
||||
(unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))
|
||||
(set (match_dup 0) (match_dup 3))]
|
||||
{
|
||||
operands[2] = gen_reg_rtx (SImode);
|
||||
if (GET_MODE_NUNITS (<MODE>mode) == 32)
|
||||
operands[3] = gen_rtx_NOT (SImode, operands[2]);
|
||||
else
|
||||
{
|
||||
operands[3]
|
||||
= gen_int_mode ((HOST_WIDE_INT_1 << GET_MODE_NUNITS (<MODE>mode)) - 1,
|
||||
SImode);
|
||||
operands[3] = gen_rtx_XOR (SImode, operands[2], operands[3]);
|
||||
}
|
||||
})
|
||||
|
||||
(define_split
|
||||
[(set (match_operand:SI 0 "register_operand")
|
||||
(unspec:SI
|
||||
[(subreg:VI1_AVX2 (not (match_operand 1 "register_operand")) 0)]
|
||||
UNSPEC_MOVMSK))]
|
||||
"TARGET_SSE2
|
||||
&& GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_VECTOR_INT
|
||||
&& GET_MODE_SIZE (GET_MODE (operands[1])) == <MODE_SIZE>"
|
||||
[(set (match_dup 2)
|
||||
(unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))
|
||||
(set (match_dup 0) (match_dup 3))]
|
||||
{
|
||||
operands[2] = gen_reg_rtx (SImode);
|
||||
operands[1] = gen_lowpart (<MODE>mode, operands[1]);
|
||||
if (GET_MODE_NUNITS (<MODE>mode) == 32)
|
||||
operands[3] = gen_rtx_NOT (SImode, operands[2]);
|
||||
else
|
||||
{
|
||||
operands[3]
|
||||
= gen_int_mode ((HOST_WIDE_INT_1 << GET_MODE_NUNITS (<MODE>mode)) - 1,
|
||||
SImode);
|
||||
operands[3] = gen_rtx_XOR (SImode, operands[2], operands[3]);
|
||||
}
|
||||
})
|
||||
|
||||
(define_insn_and_split "*<sse2_avx2>_pmovmskb_lt"
|
||||
[(set (match_operand:SI 0 "register_operand" "=r")
|
||||
(unspec:SI
|
||||
|
|
54
gcc/testsuite/gcc.target/i386/avx2-pr98461.c
Normal file
54
gcc/testsuite/gcc.target/i386/avx2-pr98461.c
Normal file
|
@ -0,0 +1,54 @@
|
|||
/* PR target/98461 */
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -mavx2 -masm=att" } */
|
||||
/* { dg-final { scan-assembler-times "\tvpmovmskb\t" 6 } } */
|
||||
/* { dg-final { scan-assembler-times "\tnotl\t" 6 } } */
|
||||
/* { dg-final { scan-assembler-not "\tvpcmpeq" } } */
|
||||
/* { dg-final { scan-assembler-not "\tvpxor" } } */
|
||||
/* { dg-final { scan-assembler-not "\tvpandn" } } */
|
||||
|
||||
#include <x86intrin.h>
|
||||
|
||||
int
|
||||
f1 (__m256i x)
|
||||
{
|
||||
return ~_mm256_movemask_epi8 (x);
|
||||
}
|
||||
|
||||
int
|
||||
f2 (__m256i x)
|
||||
{
|
||||
return _mm256_movemask_epi8 (_mm256_andnot_si256 (x, _mm256_set1_epi8 (255)));
|
||||
}
|
||||
|
||||
int
|
||||
f3 (__v32qi x)
|
||||
{
|
||||
x ^= (__v32qi) { -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1 };
|
||||
return _mm256_movemask_epi8 ((__m256i) x);
|
||||
}
|
||||
|
||||
long
|
||||
f4 (__m256i x)
|
||||
{
|
||||
return (unsigned) ~_mm256_movemask_epi8 (x);
|
||||
}
|
||||
|
||||
long
|
||||
f5 (__m256i x)
|
||||
{
|
||||
return (unsigned) _mm256_movemask_epi8 (_mm256_andnot_si256 (x, _mm256_set1_epi8 (255)));
|
||||
}
|
||||
|
||||
long
|
||||
f6 (__v32qi x)
|
||||
{
|
||||
x ^= (__v32qi) { -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1 };
|
||||
return (unsigned) _mm256_movemask_epi8 ((__m256i) x);
|
||||
}
|
50
gcc/testsuite/gcc.target/i386/sse2-pr98461.c
Normal file
50
gcc/testsuite/gcc.target/i386/sse2-pr98461.c
Normal file
|
@ -0,0 +1,50 @@
|
|||
/* PR target/98461 */
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -msse2 -mno-sse3 -masm=att" } */
|
||||
/* { dg-final { scan-assembler-times "\tpmovmskb\t" 6 } } */
|
||||
/* { dg-final { scan-assembler-times "\txorl\t" 6 } } */
|
||||
/* { dg-final { scan-assembler-not "\tpcmpeq" } } */
|
||||
/* { dg-final { scan-assembler-not "\tpxor" } } */
|
||||
/* { dg-final { scan-assembler-not "\tpandn" } } */
|
||||
|
||||
#include <x86intrin.h>
|
||||
|
||||
int
|
||||
f1 (__m128i x)
|
||||
{
|
||||
return _mm_movemask_epi8 (x) ^ 65535;
|
||||
}
|
||||
|
||||
int
|
||||
f2 (__m128i x)
|
||||
{
|
||||
return _mm_movemask_epi8 (_mm_andnot_si128 (x, _mm_set1_epi8 (255)));
|
||||
}
|
||||
|
||||
int
|
||||
f3 (__v16qi x)
|
||||
{
|
||||
x ^= (__v16qi) { -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1 };
|
||||
return _mm_movemask_epi8 ((__m128i) x);
|
||||
}
|
||||
|
||||
long
|
||||
f4 (__m128i x)
|
||||
{
|
||||
return (unsigned) (_mm_movemask_epi8 (x) ^ 65535);
|
||||
}
|
||||
|
||||
long
|
||||
f5 (__m128i x)
|
||||
{
|
||||
return (unsigned) _mm_movemask_epi8 (_mm_andnot_si128 (x, _mm_set1_epi8 (255)));
|
||||
}
|
||||
|
||||
long
|
||||
f6 (__v16qi x)
|
||||
{
|
||||
x ^= (__v16qi) { -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1 };
|
||||
return (unsigned) _mm_movemask_epi8 ((__m128i) x);
|
||||
}
|
Loading…
Add table
Reference in a new issue