[rs6000] PR89338, PR89339: Fix compat vector intrinsics for BE and 32-bit

Test FAILS: sse2-cvtpd2dq-1, sse2-cvtpd2ps, sse2-cvttpd2dq on powerpc64
(big-endian).

_mm_cvtpd_epi32, _mm_cvtpd_ps, _mm_cvttpd_epi32: Type conversion from
vector doubleword type to vector word type leaves the results in even
lanes in big endian mode.

Test FAILS: sse-cvtss2si-1, sse-cvtss2si-2, sse-movmskb-1 on powerpc
(32-bit big-endian).

Incorrect type for interpreting the result from mfvsrd instruction leads
to incorrect results.  Also, mfvsrd instruction only works as expected in
64-bit mode or for 32-bit quantities in 32-bit mode.  A more general,
if slower, solution is needed for 32-bit mode.

2019-02-25  Paul A. Clarke  <pc@us.ibm.com>

[gcc]

	* config/rs6000/emmintrin.h (_mm_cvtpd_epi32): Fix big endian.
	(_mm_cvtpd_ps): Likewise.
	(_mm_cvttpd_epi32): Likewise.

	PR target/89338
	* config/rs6000/xmmintrin.h (_mm_cvtss_f32):  Fix type mismatch.
	(_mm_cvt_ss2si): Fix type mismatch and 32-bit.

	PR target/89339
	* config/rs6000/xmmintrin.h (_mm_movemask_pi8): Fix 32-bit.

From-SVN: r269195
This commit is contained in:
Paul A. Clarke 2019-02-25 19:36:05 +00:00 committed by Paul Clarke
parent b5c44c5778
commit db739d3ca3
3 changed files with 38 additions and 3 deletions

View file

@ -1,3 +1,19 @@
2019-02-25 Paul A. Clarke <pc@us.ibm.com>
[gcc]
* config/rs6000/emmintrin.h (_mm_cvtpd_epi32): Fix big endian.
(_mm_cvtpd_ps): Likewise.
(_mm_cvttpd_epi32): Likewise.
PR target/89338
* config/rs6000/xmmintrin.h (_mm_cvtss_f32): Fix type mismatch.
(_mm_cvt_ss2si): Fix type mismatch and 32-bit.
PR target/89339
* config/rs6000/xmmintrin.h (_mm_movemask_pi8): Fix 32-bit.
2019-02-25 Tamar Christina <tamar.christina@arm.com>
PR target/88530

View file

@ -887,7 +887,11 @@ _mm_cvtpd_epi32 (__m128d __A)
: );
#ifdef _ARCH_PWR8
#ifdef __LITTLE_ENDIAN__
temp = vec_mergeo (temp, temp);
#else
temp = vec_mergee (temp, temp);
#endif
result = (__v4si) vec_vpkudum ((__vector long long) temp,
(__vector long long) vzero);
#else
@ -922,7 +926,11 @@ _mm_cvtpd_ps (__m128d __A)
: );
#ifdef _ARCH_PWR8
#ifdef __LITTLE_ENDIAN__
temp = vec_mergeo (temp, temp);
#else
temp = vec_mergee (temp, temp);
#endif
result = (__v4sf) vec_vpkudum ((__vector long long) temp,
(__vector long long) vzero);
#else
@ -951,7 +959,11 @@ _mm_cvttpd_epi32 (__m128d __A)
: );
#ifdef _ARCH_PWR8
#ifdef __LITTLE_ENDIAN__
temp = vec_mergeo (temp, temp);
#else
temp = vec_mergee (temp, temp);
#endif
result = (__v4si) vec_vpkudum ((__vector long long) temp,
(__vector long long) vzero);
#else

View file

@ -905,7 +905,7 @@ _mm_cvtss_f32 (__m128 __A)
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtss_si32 (__m128 __A)
{
__m64 res = 0;
int res;
#ifdef _ARCH_PWR8
double dtmp;
__asm__(
@ -938,8 +938,8 @@ _mm_cvt_ss2si (__m128 __A)
extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtss_si64 (__m128 __A)
{
__m64 res = 0;
#ifdef _ARCH_PWR8
long long res;
#if defined (_ARCH_PWR8) && defined (__powerpc64__)
double dtmp;
__asm__(
#ifdef __LITTLE_ENDIAN__
@ -1577,6 +1577,7 @@ _m_pminub (__m64 __A, __m64 __B)
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_movemask_pi8 (__m64 __A)
{
#ifdef __powerpc64__
unsigned long long p =
#ifdef __LITTLE_ENDIAN__
0x0008101820283038UL; // permute control for sign bits
@ -1584,6 +1585,12 @@ _mm_movemask_pi8 (__m64 __A)
0x3830282018100800UL; // permute control for sign bits
#endif
return __builtin_bpermd (p, __A);
#else
unsigned int mask = 0x20283038UL;
unsigned int r1 = __builtin_bpermd (mask, __A) & 0xf;
unsigned int r2 = __builtin_bpermd (mask, __A >> 32) & 0xf;
return (r2 << 4) | r1;
#endif
}
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))