re PR target/90991 (_mm_loadu_ps instrinsic translates to vmovaps in combination with _mm512_insertf32x4)
PR target/90991 * config/i386/sse.md (*<extract_type>_vinsert<shuffletype><extract_suf>_0): Use vmovupd, vmovups, vmovdqu, vmovdqu32 or vmovdqu64 instead of the aligned insns if operands[2] is misaligned_operand. * gcc.target/i386/avx512dq-pr90991-1.c: New test. From-SVN: r272674
This commit is contained in:
parent
fdfbed383e
commit
d55c1ffd49
4 changed files with 80 additions and 6 deletions
|
@ -1,3 +1,11 @@
|
|||
2019-06-26 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR target/90991
|
||||
* config/i386/sse.md
|
||||
(*<extract_type>_vinsert<shuffletype><extract_suf>_0): Use vmovupd,
|
||||
vmovups, vmovdqu, vmovdqu32 or vmovdqu64 instead of the aligned
|
||||
insns if operands[2] is misaligned_operand.
|
||||
|
||||
2019-06-26 Li Jia He <helijia@linux.ibm.com>
|
||||
|
||||
* config/rs6000/rs6000.h (TARGET_MADDLD): Remove the restriction of
|
||||
|
|
|
@ -13747,15 +13747,29 @@
|
|||
switch (<MODE>mode)
|
||||
{
|
||||
case E_V8DFmode:
|
||||
return "vmovapd\t{%2, %x0|%x0, %2}";
|
||||
if (misaligned_operand (operands[2], <ssequartermode>mode))
|
||||
return "vmovupd\t{%2, %x0|%x0, %2}";
|
||||
else
|
||||
return "vmovapd\t{%2, %x0|%x0, %2}";
|
||||
case E_V16SFmode:
|
||||
return "vmovaps\t{%2, %x0|%x0, %2}";
|
||||
if (misaligned_operand (operands[2], <ssequartermode>mode))
|
||||
return "vmovups\t{%2, %x0|%x0, %2}";
|
||||
else
|
||||
return "vmovaps\t{%2, %x0|%x0, %2}";
|
||||
case E_V8DImode:
|
||||
return which_alternative == 2 ? "vmovdqa64\t{%2, %x0|%x0, %2}"
|
||||
: "vmovdqa\t{%2, %x0|%x0, %2}";
|
||||
if (misaligned_operand (operands[2], <ssequartermode>mode))
|
||||
return which_alternative == 2 ? "vmovdqu64\t{%2, %x0|%x0, %2}"
|
||||
: "vmovdqu\t{%2, %x0|%x0, %2}";
|
||||
else
|
||||
return which_alternative == 2 ? "vmovdqa64\t{%2, %x0|%x0, %2}"
|
||||
: "vmovdqa\t{%2, %x0|%x0, %2}";
|
||||
case E_V16SImode:
|
||||
return which_alternative == 2 ? "vmovdqa32\t{%2, %x0|%x0, %2}"
|
||||
: "vmovdqa\t{%2, %x0|%x0, %2}";
|
||||
if (misaligned_operand (operands[2], <ssequartermode>mode))
|
||||
return which_alternative == 2 ? "vmovdqu32\t{%2, %x0|%x0, %2}"
|
||||
: "vmovdqu\t{%2, %x0|%x0, %2}";
|
||||
else
|
||||
return which_alternative == 2 ? "vmovdqa32\t{%2, %x0|%x0, %2}"
|
||||
: "vmovdqa\t{%2, %x0|%x0, %2}";
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
|
|
@ -1,3 +1,8 @@
|
|||
2019-06-26 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR target/90991
|
||||
* gcc.target/i386/avx512dq-pr90991-1.c: New test.
|
||||
|
||||
2019-06-26 Li Jia He <helijia@linux.ibm.com>
|
||||
|
||||
* gcc.target/powerpc/maddld-1.c: New testcase.
|
||||
|
|
47
gcc/testsuite/gcc.target/i386/avx512dq-pr90991-1.c
Normal file
47
gcc/testsuite/gcc.target/i386/avx512dq-pr90991-1.c
Normal file
|
@ -0,0 +1,47 @@
|
|||
/* PR target/90991 */
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -mavx512dq -masm=att" } */
|
||||
/* { dg-final { scan-assembler-times "vmovaps\[ \t]\+\\(\[^\n\r]*\\), %xmm0" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vmovapd\[ \t]\+\\(\[^\n\r]*\\), %xmm0" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vmovdqa\[ \t]\+\\(\[^\n\r]*\\), %xmm0" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vmovups\[ \t]\+\\(\[^\n\r]*\\), %xmm0" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vmovupd\[ \t]\+\\(\[^\n\r]*\\), %xmm0" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vmovdqu\[ \t]\+\\(\[^\n\r]*\\), %xmm0" 1 } } */
|
||||
|
||||
#include <x86intrin.h>
|
||||
|
||||
__m512
|
||||
f1 (void *a)
|
||||
{
|
||||
return _mm512_insertf32x4 (_mm512_set1_ps (0.0f), _mm_load_ps (a), 0);
|
||||
}
|
||||
|
||||
__m512d
|
||||
f2 (void *a)
|
||||
{
|
||||
return _mm512_insertf64x2 (_mm512_set1_pd (0.0), _mm_load_pd (a), 0);
|
||||
}
|
||||
|
||||
__m512i
|
||||
f3 (void *a)
|
||||
{
|
||||
return _mm512_inserti32x4 (_mm512_set1_epi32 (0), _mm_load_si128 (a), 0);
|
||||
}
|
||||
|
||||
__m512
|
||||
f4 (void *a)
|
||||
{
|
||||
return _mm512_insertf32x4 (_mm512_set1_ps (0.0f), _mm_loadu_ps (a), 0);
|
||||
}
|
||||
|
||||
__m512d
|
||||
f5 (void *a)
|
||||
{
|
||||
return _mm512_insertf64x2 (_mm512_set1_pd (0.0), _mm_loadu_pd (a), 0);
|
||||
}
|
||||
|
||||
__m512i
|
||||
f6 (void *a)
|
||||
{
|
||||
return _mm512_inserti32x4 (_mm512_set1_epi32 (0), _mm_loadu_si128 (a), 0);
|
||||
}
|
Loading…
Add table
Reference in a new issue