i386: Fix wrong optimization for consecutive masked scatters [PR 101472]
gcc/ChangeLog: PR target/101472 * config/i386/sse.md: (<avx512>scattersi<mode>): Add mask operand to UNSPEC_VSIBADDR. (<avx512>scattersi<mode>): Likewise. (*avx512f_scattersi<VI48F:mode>): Merge mask operand to set_dest. (*avx512f_scatterdi<VI48F:mode>): Likewise gcc/testsuite/ChangeLog: PR target/101472 * gcc.target/i386/avx512f-pr101472.c: New test. * gcc.target/i386/avx512vl-pr101472.c: New test.
This commit is contained in:
parent
26f5ea5e14
commit
44a545a6ab
3 changed files with 140 additions and 8 deletions
|
@ -24205,8 +24205,9 @@
|
|||
"TARGET_AVX512F"
|
||||
{
|
||||
operands[5]
|
||||
= gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
|
||||
operands[4]), UNSPEC_VSIBADDR);
|
||||
= gen_rtx_UNSPEC (Pmode, gen_rtvec (4, operands[0], operands[2],
|
||||
operands[4], operands[1]),
|
||||
UNSPEC_VSIBADDR);
|
||||
})
|
||||
|
||||
(define_insn "*avx512f_scattersi<VI48F:mode>"
|
||||
|
@ -24214,10 +24215,11 @@
|
|||
[(unspec:P
|
||||
[(match_operand:P 0 "vsib_address_operand" "Tv")
|
||||
(match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
|
||||
(match_operand:SI 4 "const1248_operand" "n")]
|
||||
(match_operand:SI 4 "const1248_operand" "n")
|
||||
(match_operand:<avx512fmaskmode> 6 "register_operand" "1")]
|
||||
UNSPEC_VSIBADDR)])
|
||||
(unspec:VI48F
|
||||
[(match_operand:<avx512fmaskmode> 6 "register_operand" "1")
|
||||
[(match_dup 6)
|
||||
(match_operand:VI48F 3 "register_operand" "v")]
|
||||
UNSPEC_SCATTER))
|
||||
(clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
|
||||
|
@ -24243,8 +24245,9 @@
|
|||
"TARGET_AVX512F"
|
||||
{
|
||||
operands[5]
|
||||
= gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
|
||||
operands[4]), UNSPEC_VSIBADDR);
|
||||
= gen_rtx_UNSPEC (Pmode, gen_rtvec (4, operands[0], operands[2],
|
||||
operands[4], operands[1]),
|
||||
UNSPEC_VSIBADDR);
|
||||
})
|
||||
|
||||
(define_insn "*avx512f_scatterdi<VI48F:mode>"
|
||||
|
@ -24252,10 +24255,11 @@
|
|||
[(unspec:P
|
||||
[(match_operand:P 0 "vsib_address_operand" "Tv")
|
||||
(match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
|
||||
(match_operand:SI 4 "const1248_operand" "n")]
|
||||
(match_operand:SI 4 "const1248_operand" "n")
|
||||
(match_operand:QI 6 "register_operand" "1")]
|
||||
UNSPEC_VSIBADDR)])
|
||||
(unspec:VI48F
|
||||
[(match_operand:QI 6 "register_operand" "1")
|
||||
[(match_dup 6)
|
||||
(match_operand:<VEC_GATHER_SRCDI> 3 "register_operand" "v")]
|
||||
UNSPEC_SCATTER))
|
||||
(clobber (match_scratch:QI 1 "=&Yk"))]
|
||||
|
|
49
gcc/testsuite/gcc.target/i386/avx512f-pr101472.c
Normal file
49
gcc/testsuite/gcc.target/i386/avx512f-pr101472.c
Normal file
|
@ -0,0 +1,49 @@
|
|||
/* PR target/101472 */
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-mavx512f -O2" } */
|
||||
/* { dg-final { scan-assembler-times "vpscatterqd\[ \\t\]+\[^\{\n\]*ymm\[0-9\]\[^\n\]*zmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ \\t\]+#)" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "vpscatterdd\[ \\t\]+\[^\{\n\]*zmm\[0-9\]\[^\n\]*zmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ \\t\]+#)" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "vpscatterqq\[ \\t\]+\[^\{\n\]*zmm\[0-9\]\[^\n\]*zmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ \\t\]+#)" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "vpscatterdq\[ \\t\]+\[^\{\n\]*zmm\[0-9\]\[^\n\]*ymm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ \\t\]+#)" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "vscatterqps\[ \\t\]+\[^\{\n\]*ymm\[0-9\]\[^\n\]*zmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ \\t\]+#)" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "vscatterdps\[ \\t\]+\[^\{\n\]*zmm\[0-9\]\[^\n\]*zmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ \\t\]+#)" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "vscatterqpd\[ \\t\]+\[^\{\n\]*zmm\[0-9\]\[^\n\]*zmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ \\t\]+#)" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "vscatterdpd\[ \\t\]+\[^\{\n\]*zmm\[0-9\]\[^\n\]*ymm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ \\t\]+#)" 2 } } */
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
void two_scatters_epi32(void* addr, __mmask8 k1, __mmask8 k2, __m512i vindex,
|
||||
__m256i a, __m512i b)
|
||||
{
|
||||
_mm512_mask_i64scatter_epi32(addr, k1, vindex, a, 1);
|
||||
_mm512_mask_i64scatter_epi32(addr, k2, vindex, a, 1);
|
||||
_mm512_mask_i32scatter_epi32(addr, k1, vindex, b, 1);
|
||||
_mm512_mask_i32scatter_epi32(addr, k2, vindex, b, 1);
|
||||
}
|
||||
|
||||
void two_scatters_epi64(void* addr, __mmask8 k1, __mmask8 k2, __m512i vindex,
|
||||
__m256i idx, __m512i a)
|
||||
{
|
||||
_mm512_mask_i64scatter_epi64(addr, k1, vindex, a, 1);
|
||||
_mm512_mask_i64scatter_epi64(addr, k2, vindex, a, 1);
|
||||
_mm512_mask_i32scatter_epi64(addr, k1, idx, a, 1);
|
||||
_mm512_mask_i32scatter_epi64(addr, k2, idx, a, 1);
|
||||
}
|
||||
|
||||
void two_scatters_ps(void* addr, __mmask8 k1, __mmask8 k2, __m512i vindex,
|
||||
__m256 a, __m512 b)
|
||||
{
|
||||
_mm512_mask_i64scatter_ps(addr, k1, vindex, a, 1);
|
||||
_mm512_mask_i64scatter_ps(addr, k2, vindex, a, 1);
|
||||
_mm512_mask_i32scatter_ps(addr, k1, vindex, b, 1);
|
||||
_mm512_mask_i32scatter_ps(addr, k2, vindex, b, 1);
|
||||
}
|
||||
|
||||
void two_scatters_pd(void* addr, __mmask8 k1, __mmask8 k2, __m512i vindex,
|
||||
__m256i idx, __m512d a)
|
||||
{
|
||||
_mm512_mask_i64scatter_pd(addr, k1, vindex, a, 1);
|
||||
_mm512_mask_i64scatter_pd(addr, k2, vindex, a, 1);
|
||||
_mm512_mask_i32scatter_pd(addr, k1, idx, a, 1);
|
||||
_mm512_mask_i32scatter_pd(addr, k2, idx, a, 1);
|
||||
}
|
79
gcc/testsuite/gcc.target/i386/avx512vl-pr101472.c
Normal file
79
gcc/testsuite/gcc.target/i386/avx512vl-pr101472.c
Normal file
|
@ -0,0 +1,79 @@
|
|||
/* PR target/101472 */
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-mavx512vl -O2" } */
|
||||
/* { dg-final { scan-assembler-times "vpscatterqd\[ \\t\]+\[^\{\n\]*xmm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ \\t\]+#)" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "vpscatterqd\[ \\t\]+\[^\{\n\]*xmm\[0-9\]\[^\n\]*ymm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ \\t\]+#)" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "vpscatterdd\[ \\t\]+\[^\{\n\]*xmm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ \\t\]+#)" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "vpscatterdd\[ \\t\]+\[^\{\n\]*ymm\[0-9\]\[^\n\]*ymm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ \\t\]+#)" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "vpscatterqq\[ \\t\]+\[^\{\n\]*xmm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ \\t\]+#)" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "vpscatterqq\[ \\t\]+\[^\{\n\]*ymm\[0-9\]\[^\n\]*ymm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ \\t\]+#)" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "vpscatterdq\[ \\t\]+\[^\{\n\]*xmm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ \\t\]+#)" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "vpscatterdq\[ \\t\]+\[^\{\n\]*ymm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ \\t\]+#)" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "vscatterqps\[ \\t\]+\[^\{\n\]*xmm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ \\t\]+#)" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "vscatterqps\[ \\t\]+\[^\{\n\]*xmm\[0-9\]\[^\n\]*ymm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ \\t\]+#)" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "vscatterdps\[ \\t\]+\[^\{\n\]*xmm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ \\t\]+#)" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "vscatterdps\[ \\t\]+\[^\{\n\]*ymm\[0-9\]\[^\n\]*ymm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ \\t\]+#)" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "vscatterqpd\[ \\t\]+\[^\{\n\]*xmm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ \\t\]+#)" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "vscatterqpd\[ \\t\]+\[^\{\n\]*ymm\[0-9\]\[^\n\]*ymm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ \\t\]+#)" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "vscatterdpd\[ \\t\]+\[^\{\n\]*xmm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ \\t\]+#)" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "vscatterdpd\[ \\t\]+\[^\{\n\]*ymm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ \\t\]+#)" 2 } } */
|
||||
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
void two_scatters_epi32(void* addr, __mmask8 k1, __mmask8 k2, __m128i vindex1,
|
||||
__m256i vindex2, __m128i src_epi32,
|
||||
__m256i src_i32_epi32)
|
||||
{
|
||||
_mm_mask_i64scatter_epi32(addr, k1, vindex1, src_epi32, 1);
|
||||
_mm_mask_i64scatter_epi32(addr, k2, vindex1, src_epi32, 1);
|
||||
_mm256_mask_i64scatter_epi32(addr, k1, vindex2, src_epi32, 1);
|
||||
_mm256_mask_i64scatter_epi32(addr, k2, vindex2, src_epi32, 1);
|
||||
|
||||
_mm_mask_i32scatter_epi32(addr, k1, vindex1, src_epi32, 1);
|
||||
_mm_mask_i32scatter_epi32(addr, k2, vindex1, src_epi32, 1);
|
||||
_mm256_mask_i32scatter_epi32(addr, k1, vindex2, src_i32_epi32, 1);
|
||||
_mm256_mask_i32scatter_epi32(addr, k2, vindex2, src_i32_epi32, 1);
|
||||
}
|
||||
|
||||
void two_scatters_epi64(void* addr, __mmask8 k1, __mmask8 k2, __m128i vindex1,
|
||||
__m256i vindex2, __m128i src_epi64_mm,
|
||||
__m256i src_epi64)
|
||||
{
|
||||
_mm_mask_i64scatter_epi64(addr, k1, vindex1, src_epi64_mm, 1);
|
||||
_mm_mask_i64scatter_epi64(addr, k2, vindex1, src_epi64_mm, 1);
|
||||
_mm256_mask_i64scatter_epi64(addr, k1, vindex2, src_epi64, 1);
|
||||
_mm256_mask_i64scatter_epi64(addr, k2, vindex2, src_epi64, 1);
|
||||
|
||||
_mm_mask_i32scatter_epi64(addr, k1, vindex1, src_epi64_mm, 8);
|
||||
_mm_mask_i32scatter_epi64(addr, k2, vindex1, src_epi64_mm, 8);
|
||||
_mm256_mask_i32scatter_epi64(addr, k1, vindex1, src_epi64, 1);
|
||||
_mm256_mask_i32scatter_epi64(addr, k2, vindex1, src_epi64, 1);
|
||||
}
|
||||
void two_scatters_ps(void* addr, __mmask8 k1, __mmask8 k2, __m128i vindex1,
|
||||
__m256i vindex2, __m128 src_ps, __m256 src_i32_ps)
|
||||
{
|
||||
_mm_mask_i64scatter_ps(addr, k1, vindex1, src_ps, 1);
|
||||
_mm_mask_i64scatter_ps(addr, k2, vindex1, src_ps, 1);
|
||||
_mm256_mask_i64scatter_ps(addr, k1, vindex2, src_ps, 1);
|
||||
_mm256_mask_i64scatter_ps(addr, k2, vindex2, src_ps, 1);
|
||||
|
||||
_mm_mask_i32scatter_ps(addr, k1, vindex1, src_ps, 8);
|
||||
_mm_mask_i32scatter_ps(addr, k2, vindex1, src_ps, 8);
|
||||
_mm256_mask_i32scatter_ps(addr, k1, vindex2, src_i32_ps, 1);
|
||||
_mm256_mask_i32scatter_ps(addr, k2, vindex2, src_i32_ps, 1);
|
||||
}
|
||||
|
||||
void two_scatters_pd(void* addr, __mmask8 k1, __mmask8 k2, __m128i vindex1,
|
||||
__m256i vindex2, __m128d src_pd_mm, __m256d src_pd)
|
||||
{
|
||||
_mm_mask_i64scatter_pd(addr, k1, vindex1, src_pd_mm, 1);
|
||||
_mm_mask_i64scatter_pd(addr, k2, vindex1, src_pd_mm, 1);
|
||||
_mm256_mask_i64scatter_pd(addr, k1, vindex2, src_pd, 1);
|
||||
_mm256_mask_i64scatter_pd(addr, k2, vindex2, src_pd, 1);
|
||||
|
||||
_mm_mask_i32scatter_pd(addr, k1, vindex1, src_pd_mm, 8);
|
||||
_mm_mask_i32scatter_pd(addr, k2, vindex1, src_pd_mm, 8);
|
||||
_mm256_mask_i32scatter_pd(addr, k1, vindex1, src_pd, 1);
|
||||
_mm256_mask_i32scatter_pd(addr, k2, vindex1, src_pd, 1);
|
||||
}
|
Loading…
Add table
Reference in a new issue