config.gcc (i[34567]86-*-*): Add nmmintrin.h to extra_headers.
2007-05-31 H.J. Lu <hongjiu.lu@intel.com> * config.gcc (i[34567]86-*-*): Add nmmintrin.h to extra_headers. (x86_64-*-*): Likewise. * config/i386/i386.c (OPTION_MASK_ISA_MMX_UNSET): New. (OPTION_MASK_ISA_3DNOW_UNSET): Likewise. (OPTION_MASK_ISA_SSE_UNSET): Likewise. (OPTION_MASK_ISA_SSE2_UNSET): Likewise. (OPTION_MASK_ISA_SSE3_UNSET): Likewise. (OPTION_MASK_ISA_SSSE3_UNSET): Likewise. (OPTION_MASK_ISA_SSE4_1_UNSET): Likewise. (OPTION_MASK_ISA_SSE4_2_UNSET): Likewise. (OPTION_MASK_ISA_SSE4): Likewise. (OPTION_MASK_ISA_SSE4_UNSET): Likewise. (OPTION_MASK_ISA_SSE4A_UNSET): Likewise. (ix86_handle_option): Use OPTION_MASK_ISA_*_UNSET. Handle SSE4.2. (override_options): Support SSE4.2. (ix86_build_const_vector): Support SImode and DImode. (ix86_build_signbit_mask): Likewise. (ix86_expand_int_vcond): Support V2DImode. (IX86_BUILTIN_CRC32QI): New for SSE4.2. (IX86_BUILTIN_CRC32HI): Likewise. (IX86_BUILTIN_CRC32SI): Likewise. (IX86_BUILTIN_CRC32DI): Likewise. (IX86_BUILTIN_PCMPGTQ): Likewise. (bdesc_crc32): Likewise. (bdesc_sse_3arg): Likewise. (ix86_expand_crc32): Likewise. (ix86_init_mmx_sse_builtins): Support SSE4.2. (ix86_expand_builtin): Likewise. * config/i386/i386.h (TARGET_CPU_CPP_BUILTINS): Define __SSE4_2__ for -msse4.2. * config/i386/i386.md (UNSPEC_CRC32): New for SSE4.2. (CRC32MODE): Likewise. (crc32modesuffix): Likewise. (crc32modeconstraint): Likewise. (sse4_2_crc32<mode>): Likewise. (sse4_2_crc32di): Likewise. * config/i386/i386.opt (msse4.2): New for SSE4.2. (msse4): Likewise. * config/i386/nmmintrin.h: New. The dummy SSE4.2 intrinsic header file. * config/i386/smmintrin.h: Add SSE4.2 intrinsics. * config/i386/sse.md (sse4_2_gtv2di3): New pattern for SSE4.2. (vcond<mode>): Use SSEMODEI instead of SSEMODE124. (vcondu<mode>): Likewise. * doc/extend.texi: Document SSE4.2 built-in functions. * doc/invoke.texi: Document -msse4.2/-msse4. From-SVN: r125236
This commit is contained in:
parent
ccb4d26be0
commit
3b8dd0716f
11 changed files with 698 additions and 71 deletions
|
@ -1,3 +1,64 @@
|
|||
2007-05-31 H.J. Lu <hongjiu.lu@intel.com>
|
||||
|
||||
* config.gcc (i[34567]86-*-*): Add nmmintrin.h to
|
||||
extra_headers.
|
||||
(x86_64-*-*): Likewise.
|
||||
|
||||
* config/i386/i386.c (OPTION_MASK_ISA_MMX_UNSET): New.
|
||||
(OPTION_MASK_ISA_3DNOW_UNSET): Likewise.
|
||||
(OPTION_MASK_ISA_SSE_UNSET): Likewise.
|
||||
(OPTION_MASK_ISA_SSE2_UNSET): Likewise.
|
||||
(OPTION_MASK_ISA_SSE3_UNSET): Likewise.
|
||||
(OPTION_MASK_ISA_SSSE3_UNSET): Likewise.
|
||||
(OPTION_MASK_ISA_SSE4_1_UNSET): Likewise.
|
||||
(OPTION_MASK_ISA_SSE4_2_UNSET): Likewise.
|
||||
(OPTION_MASK_ISA_SSE4): Likewise.
|
||||
(OPTION_MASK_ISA_SSE4_UNSET): Likewise.
|
||||
(OPTION_MASK_ISA_SSE4A_UNSET): Likewise.
|
||||
(ix86_handle_option): Use OPTION_MASK_ISA_*_UNSET. Handle
|
||||
SSE4.2.
|
||||
(override_options): Support SSE4.2.
|
||||
(ix86_build_const_vector): Support SImode and DImode.
|
||||
(ix86_build_signbit_mask): Likewise.
|
||||
(ix86_expand_int_vcond): Support V2DImode.
|
||||
(IX86_BUILTIN_CRC32QI): New for SSE4.2.
|
||||
(IX86_BUILTIN_CRC32HI): Likewise.
|
||||
(IX86_BUILTIN_CRC32SI): Likewise.
|
||||
(IX86_BUILTIN_CRC32DI): Likewise.
|
||||
(IX86_BUILTIN_PCMPGTQ): Likewise.
|
||||
(bdesc_crc32): Likewise.
|
||||
(bdesc_sse_3arg): Likewise.
|
||||
(ix86_expand_crc32): Likewise.
|
||||
(ix86_init_mmx_sse_builtins): Support SSE4.2.
|
||||
(ix86_expand_builtin): Likewise.
|
||||
|
||||
* config/i386/i386.h (TARGET_CPU_CPP_BUILTINS): Define
|
||||
__SSE4_2__ for -msse4.2.
|
||||
|
||||
* config/i386/i386.md (UNSPEC_CRC32): New for SSE4.2.
|
||||
(CRC32MODE): Likewise.
|
||||
(crc32modesuffix): Likewise.
|
||||
(crc32modeconstraint): Likewise.
|
||||
(sse4_2_crc32<mode>): Likewise.
|
||||
(sse4_2_crc32di): Likewise.
|
||||
|
||||
* config/i386/i386.opt (msse4.2): New for SSE4.2.
|
||||
(msse4): Likewise.
|
||||
|
||||
* config/i386/nmmintrin.h: New. The dummy SSE4.2 intrinsic header
|
||||
file.
|
||||
|
||||
* config/i386/smmintrin.h: Add SSE4.2 intrinsics.
|
||||
|
||||
* config/i386/sse.md (sse4_2_gtv2di3): New pattern for
|
||||
SSE4.2.
|
||||
(vcond<mode>): Use SSEMODEI instead of SSEMODE124.
|
||||
(vcondu<mode>): Likewise.
|
||||
|
||||
* doc/extend.texi: Document SSE4.2 built-in functions.
|
||||
|
||||
* doc/invoke.texi: Document -msse4.2/-msse4.
|
||||
|
||||
2007-05-31 Zdenek Dvorak <dvorakz@suse.cz>
|
||||
|
||||
PR tree-optimization/32160
|
||||
|
|
|
@ -276,12 +276,14 @@ xscale-*-*)
|
|||
i[34567]86-*-*)
|
||||
cpu_type=i386
|
||||
extra_headers="mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h
|
||||
pmmintrin.h tmmintrin.h ammintrin.h smmintrin.h"
|
||||
pmmintrin.h tmmintrin.h ammintrin.h smmintrin.h
|
||||
nmmintrin.h"
|
||||
;;
|
||||
x86_64-*-*)
|
||||
cpu_type=i386
|
||||
extra_headers="mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h
|
||||
pmmintrin.h tmmintrin.h ammintrin.h smmintrin.h"
|
||||
pmmintrin.h tmmintrin.h ammintrin.h smmintrin.h
|
||||
nmmintrin.h"
|
||||
need_64bit_hwint=yes
|
||||
;;
|
||||
ia64-*-*)
|
||||
|
|
|
@ -1551,6 +1551,33 @@ int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
|
|||
was set or cleared on the command line. */
|
||||
static int ix86_isa_flags_explicit;
|
||||
|
||||
/* Define a set of ISAs which aren't available for a given ISA. MMX
|
||||
and SSE ISAs are handled separately. */
|
||||
|
||||
#define OPTION_MASK_ISA_MMX_UNSET \
|
||||
(OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_UNSET)
|
||||
#define OPTION_MASK_ISA_3DNOW_UNSET OPTION_MASK_ISA_3DNOW_A
|
||||
|
||||
#define OPTION_MASK_ISA_SSE_UNSET \
|
||||
(OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE2_UNSET)
|
||||
#define OPTION_MASK_ISA_SSE2_UNSET \
|
||||
(OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE3_UNSET)
|
||||
#define OPTION_MASK_ISA_SSE3_UNSET \
|
||||
(OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSSE3_UNSET)
|
||||
#define OPTION_MASK_ISA_SSSE3_UNSET \
|
||||
(OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_1_UNSET)
|
||||
#define OPTION_MASK_ISA_SSE4_1_UNSET \
|
||||
(OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_2_UNSET)
|
||||
#define OPTION_MASK_ISA_SSE4_2_UNSET OPTION_MASK_ISA_SSE4A
|
||||
|
||||
/* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
|
||||
as -msse4.1 -msse4.2. -mno-sse4 should the same as -msse4.1. */
|
||||
#define OPTION_MASK_ISA_SSE4 \
|
||||
(OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2)
|
||||
#define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
|
||||
|
||||
#define OPTION_MASK_ISA_SSE4A_UNSET OPTION_MASK_ISA_SSE4
|
||||
|
||||
/* Implement TARGET_HANDLE_OPTION. */
|
||||
|
||||
static bool
|
||||
|
@ -1562,10 +1589,8 @@ ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
|
|||
ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX;
|
||||
if (!value)
|
||||
{
|
||||
ix86_isa_flags
|
||||
&= ~(OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A);
|
||||
ix86_isa_flags_explicit
|
||||
|= OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A;
|
||||
ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
|
||||
ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
|
||||
}
|
||||
return true;
|
||||
|
||||
|
@ -1573,8 +1598,8 @@ ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
|
|||
ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW;
|
||||
if (!value)
|
||||
{
|
||||
ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_A;
|
||||
ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_A;
|
||||
ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
|
||||
ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
|
||||
}
|
||||
return true;
|
||||
|
||||
|
@ -1585,14 +1610,8 @@ ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
|
|||
ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE;
|
||||
if (!value)
|
||||
{
|
||||
ix86_isa_flags
|
||||
&= ~(OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3
|
||||
| OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1
|
||||
| OPTION_MASK_ISA_SSE4A);
|
||||
ix86_isa_flags_explicit
|
||||
|= (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3
|
||||
| OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1
|
||||
| OPTION_MASK_ISA_SSE4A);
|
||||
ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
|
||||
ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
|
||||
}
|
||||
return true;
|
||||
|
||||
|
@ -1600,12 +1619,8 @@ ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
|
|||
ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2;
|
||||
if (!value)
|
||||
{
|
||||
ix86_isa_flags
|
||||
&= ~(OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSSE3
|
||||
| OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4A);
|
||||
ix86_isa_flags_explicit
|
||||
|= (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSSE3
|
||||
| OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4A);
|
||||
ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
|
||||
ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
|
||||
}
|
||||
return true;
|
||||
|
||||
|
@ -1613,12 +1628,8 @@ ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
|
|||
ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3;
|
||||
if (!value)
|
||||
{
|
||||
ix86_isa_flags
|
||||
&= ~(OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1
|
||||
| OPTION_MASK_ISA_SSE4A);
|
||||
ix86_isa_flags_explicit
|
||||
|= (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1
|
||||
| OPTION_MASK_ISA_SSE4A);
|
||||
ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
|
||||
ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
|
||||
}
|
||||
return true;
|
||||
|
||||
|
@ -1626,10 +1637,8 @@ ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
|
|||
ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3;
|
||||
if (!value)
|
||||
{
|
||||
ix86_isa_flags
|
||||
&= ~(OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4A);
|
||||
ix86_isa_flags_explicit
|
||||
|= OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4A;
|
||||
ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
|
||||
ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
|
||||
}
|
||||
return true;
|
||||
|
||||
|
@ -1637,17 +1646,36 @@ ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
|
|||
ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1;
|
||||
if (!value)
|
||||
{
|
||||
ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A;
|
||||
ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A;
|
||||
ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
|
||||
ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
|
||||
}
|
||||
return true;
|
||||
|
||||
case OPT_msse4_2:
|
||||
ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2;
|
||||
if (!value)
|
||||
{
|
||||
ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
|
||||
ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
|
||||
}
|
||||
return true;
|
||||
|
||||
case OPT_msse4:
|
||||
ix86_isa_flags |= OPTION_MASK_ISA_SSE4;
|
||||
ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4;
|
||||
return true;
|
||||
|
||||
case OPT_mno_sse4:
|
||||
ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
|
||||
ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
|
||||
return true;
|
||||
|
||||
case OPT_msse4a:
|
||||
ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A;
|
||||
if (!value)
|
||||
{
|
||||
ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1;
|
||||
ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1;
|
||||
ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
|
||||
ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
|
||||
}
|
||||
return true;
|
||||
|
||||
|
@ -1723,7 +1751,8 @@ override_options (void)
|
|||
PTA_ABM = 1 << 11,
|
||||
PTA_SSE4A = 1 << 12,
|
||||
PTA_NO_SAHF = 1 << 13,
|
||||
PTA_SSE4_1 = 1 << 14
|
||||
PTA_SSE4_1 = 1 << 14,
|
||||
PTA_SSE4_2 = 1 << 15
|
||||
} flags;
|
||||
}
|
||||
const processor_alias_table[] =
|
||||
|
@ -2001,6 +2030,9 @@ override_options (void)
|
|||
if (processor_alias_table[i].flags & PTA_SSE4_1
|
||||
&& !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
|
||||
ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
|
||||
if (processor_alias_table[i].flags & PTA_SSE4_2
|
||||
&& !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
|
||||
ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
|
||||
if (processor_alias_table[i].flags & PTA_SSE4A
|
||||
&& !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
|
||||
ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
|
||||
|
@ -2202,6 +2234,13 @@ override_options (void)
|
|||
if (!TARGET_80387)
|
||||
target_flags |= MASK_NO_FANCY_MATH_387;
|
||||
|
||||
/* Turn on SSE4.1 builtins and popcnt instruction for -msse4.2. */
|
||||
if (TARGET_SSE4_2)
|
||||
{
|
||||
ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
|
||||
x86_popcnt = true;
|
||||
}
|
||||
|
||||
/* Turn on SSSE3 builtins for -msse4.1. */
|
||||
if (TARGET_SSE4_1)
|
||||
ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
|
||||
|
@ -10481,6 +10520,16 @@ ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
|
|||
rtvec v;
|
||||
switch (mode)
|
||||
{
|
||||
case SImode:
|
||||
gcc_assert (vect);
|
||||
v = gen_rtvec (4, value, value, value, value);
|
||||
return gen_rtx_CONST_VECTOR (V4SImode, v);
|
||||
|
||||
case DImode:
|
||||
gcc_assert (vect);
|
||||
v = gen_rtvec (2, value, value);
|
||||
return gen_rtx_CONST_VECTOR (V2DImode, v);
|
||||
|
||||
case SFmode:
|
||||
if (vect)
|
||||
v = gen_rtvec (4, value, value, value, value);
|
||||
|
@ -10501,37 +10550,53 @@ ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
|
|||
}
|
||||
}
|
||||
|
||||
/* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
|
||||
Create a mask for the sign bit in MODE for an SSE register. If VECT is
|
||||
true, then replicate the mask for all elements of the vector register.
|
||||
If INVERT is true, then create a mask excluding the sign bit. */
|
||||
/* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
|
||||
and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
|
||||
for an SSE register. If VECT is true, then replicate the mask for
|
||||
all elements of the vector register. If INVERT is true, then create
|
||||
a mask excluding the sign bit. */
|
||||
|
||||
rtx
|
||||
ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
|
||||
{
|
||||
enum machine_mode vec_mode;
|
||||
enum machine_mode vec_mode, imode;
|
||||
HOST_WIDE_INT hi, lo;
|
||||
int shift = 63;
|
||||
rtx v;
|
||||
rtx mask;
|
||||
|
||||
/* Find the sign bit, sign extended to 2*HWI. */
|
||||
if (mode == SFmode)
|
||||
lo = 0x80000000, hi = lo < 0;
|
||||
else if (HOST_BITS_PER_WIDE_INT >= 64)
|
||||
lo = (HOST_WIDE_INT)1 << shift, hi = -1;
|
||||
else
|
||||
lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
|
||||
switch (mode)
|
||||
{
|
||||
case SImode:
|
||||
case SFmode:
|
||||
imode = SImode;
|
||||
vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
|
||||
lo = 0x80000000, hi = lo < 0;
|
||||
break;
|
||||
|
||||
case DImode:
|
||||
case DFmode:
|
||||
imode = DImode;
|
||||
vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
|
||||
if (HOST_BITS_PER_WIDE_INT >= 64)
|
||||
lo = (HOST_WIDE_INT)1 << shift, hi = -1;
|
||||
else
|
||||
lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
|
||||
break;
|
||||
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
if (invert)
|
||||
lo = ~lo, hi = ~hi;
|
||||
|
||||
/* Force this value into the low part of a fp vector constant. */
|
||||
mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
|
||||
mask = immed_double_const (lo, hi, imode);
|
||||
mask = gen_lowpart (mode, mask);
|
||||
|
||||
v = ix86_build_const_vector (mode, vect, mask);
|
||||
vec_mode = (mode == SFmode) ? V4SFmode : V2DFmode;
|
||||
return force_reg (vec_mode, v);
|
||||
}
|
||||
|
||||
|
@ -12739,7 +12804,7 @@ ix86_expand_fp_vcond (rtx operands[])
|
|||
return true;
|
||||
}
|
||||
|
||||
/* Expand a signed integral vector conditional move. */
|
||||
/* Expand a signed/unsigned integral vector conditional move. */
|
||||
|
||||
bool
|
||||
ix86_expand_int_vcond (rtx operands[])
|
||||
|
@ -12783,6 +12848,29 @@ ix86_expand_int_vcond (rtx operands[])
|
|||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
/* Only SSE4.1/SSE4.2 supports V2DImode. */
|
||||
if (mode == V2DImode)
|
||||
{
|
||||
switch (code)
|
||||
{
|
||||
case EQ:
|
||||
/* SSE4.1 supports EQ. */
|
||||
if (!TARGET_SSE4_1)
|
||||
return false;
|
||||
break;
|
||||
|
||||
case GT:
|
||||
case GTU:
|
||||
/* SSE4.2 supports GT/GTU. */
|
||||
if (!TARGET_SSE4_2)
|
||||
return false;
|
||||
break;
|
||||
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
}
|
||||
|
||||
/* Unsigned parallel compare is not supported by the hardware. Play some
|
||||
tricks to turn this into a signed comparison against 0. */
|
||||
if (code == GTU)
|
||||
|
@ -12792,25 +12880,30 @@ ix86_expand_int_vcond (rtx operands[])
|
|||
switch (mode)
|
||||
{
|
||||
case V4SImode:
|
||||
case V2DImode:
|
||||
{
|
||||
rtx t1, t2, mask;
|
||||
|
||||
/* Perform a parallel modulo subtraction. */
|
||||
t1 = gen_reg_rtx (mode);
|
||||
emit_insn (gen_subv4si3 (t1, cop0, cop1));
|
||||
emit_insn ((mode == V4SImode
|
||||
? gen_subv4si3
|
||||
: gen_subv2di3) (t1, cop0, cop1));
|
||||
|
||||
/* Extract the original sign bit of op0. */
|
||||
mask = GEN_INT (-0x80000000);
|
||||
mask = gen_rtx_CONST_VECTOR (mode,
|
||||
gen_rtvec (4, mask, mask, mask, mask));
|
||||
mask = force_reg (mode, mask);
|
||||
mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
|
||||
true, false);
|
||||
t2 = gen_reg_rtx (mode);
|
||||
emit_insn (gen_andv4si3 (t2, cop0, mask));
|
||||
emit_insn ((mode == V4SImode
|
||||
? gen_andv4si3
|
||||
: gen_andv2di3) (t2, cop0, mask));
|
||||
|
||||
/* XOR it back into the result of the subtraction. This results
|
||||
in the sign bit set iff we saw unsigned underflow. */
|
||||
x = gen_reg_rtx (mode);
|
||||
emit_insn (gen_xorv4si3 (x, t1, t2));
|
||||
emit_insn ((mode == V4SImode
|
||||
? gen_xorv4si3
|
||||
: gen_xorv2di3) (x, t1, t2));
|
||||
|
||||
code = GT;
|
||||
}
|
||||
|
@ -16637,6 +16730,14 @@ enum ix86_builtins
|
|||
IX86_BUILTIN_VEC_SET_V4HI,
|
||||
IX86_BUILTIN_VEC_SET_V16QI,
|
||||
|
||||
/* SSE4.2. */
|
||||
IX86_BUILTIN_CRC32QI,
|
||||
IX86_BUILTIN_CRC32HI,
|
||||
IX86_BUILTIN_CRC32SI,
|
||||
IX86_BUILTIN_CRC32DI,
|
||||
|
||||
IX86_BUILTIN_PCMPGTQ,
|
||||
|
||||
IX86_BUILTIN_MAX
|
||||
};
|
||||
|
||||
|
@ -16728,6 +16829,15 @@ static const struct builtin_description bdesc_ptest[] =
|
|||
{ OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, 0 },
|
||||
};
|
||||
|
||||
static const struct builtin_description bdesc_crc32[] =
|
||||
{
|
||||
/* SSE4.2 */
|
||||
{ OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32qi, 0, IX86_BUILTIN_CRC32QI, 0, 0 },
|
||||
{ OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, 0, IX86_BUILTIN_CRC32HI, 0, 0 },
|
||||
{ OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, 0, IX86_BUILTIN_CRC32SI, 0, 0 },
|
||||
{ OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32di, 0, IX86_BUILTIN_CRC32DI, 0, 0 },
|
||||
};
|
||||
|
||||
/* SSE builtins with 3 arguments and the last argument must be a 8 bit
|
||||
constant or xmm0. */
|
||||
static const struct builtin_description bdesc_sse_3arg[] =
|
||||
|
@ -17050,6 +17160,9 @@ static const struct builtin_description bdesc_2arg[] =
|
|||
{ OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, 0, 0 },
|
||||
{ OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, 0, IX86_BUILTIN_PMULDQ128, 0, 0 },
|
||||
{ OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, 0, 0 },
|
||||
|
||||
/* SSE4.2 */
|
||||
{ OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, 0, 0 },
|
||||
};
|
||||
|
||||
static const struct builtin_description bdesc_1arg[] =
|
||||
|
@ -17881,6 +17994,28 @@ ix86_init_mmx_sse_builtins (void)
|
|||
def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_roundsd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_ROUNDSD);
|
||||
def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_roundss", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_ROUNDSS);
|
||||
|
||||
/* SSE4.2. */
|
||||
ftype = build_function_type_list (unsigned_type_node,
|
||||
unsigned_type_node,
|
||||
unsigned_char_type_node,
|
||||
NULL_TREE);
|
||||
def_builtin (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32qi", ftype, IX86_BUILTIN_CRC32QI);
|
||||
ftype = build_function_type_list (unsigned_type_node,
|
||||
unsigned_type_node,
|
||||
short_unsigned_type_node,
|
||||
NULL_TREE);
|
||||
def_builtin (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32hi", ftype, IX86_BUILTIN_CRC32HI);
|
||||
ftype = build_function_type_list (unsigned_type_node,
|
||||
unsigned_type_node,
|
||||
unsigned_type_node,
|
||||
NULL_TREE);
|
||||
def_builtin (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32si", ftype, IX86_BUILTIN_CRC32SI);
|
||||
ftype = build_function_type_list (long_long_unsigned_type_node,
|
||||
long_long_unsigned_type_node,
|
||||
long_long_unsigned_type_node,
|
||||
NULL_TREE);
|
||||
def_builtin (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32di", ftype, IX86_BUILTIN_CRC32DI);
|
||||
|
||||
/* AMDFAM10 SSE4A New built-ins */
|
||||
def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntsd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
|
||||
def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntss", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
|
||||
|
@ -18059,6 +18194,41 @@ ix86_expand_sse_4_operands_builtin (enum insn_code icode, tree exp,
|
|||
return target;
|
||||
}
|
||||
|
||||
/* Subroutine of ix86_expand_builtin to take care of crc32 insns. */
|
||||
|
||||
static rtx
|
||||
ix86_expand_crc32 (enum insn_code icode, tree exp, rtx target)
|
||||
{
|
||||
rtx pat;
|
||||
tree arg0 = CALL_EXPR_ARG (exp, 0);
|
||||
tree arg1 = CALL_EXPR_ARG (exp, 1);
|
||||
rtx op0 = expand_normal (arg0);
|
||||
rtx op1 = expand_normal (arg1);
|
||||
enum machine_mode tmode = insn_data[icode].operand[0].mode;
|
||||
enum machine_mode mode0 = insn_data[icode].operand[1].mode;
|
||||
enum machine_mode mode1 = insn_data[icode].operand[2].mode;
|
||||
|
||||
if (optimize
|
||||
|| !target
|
||||
|| GET_MODE (target) != tmode
|
||||
|| ! (*insn_data[icode].operand[0].predicate) (target, tmode))
|
||||
target = gen_reg_rtx (tmode);
|
||||
|
||||
if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
|
||||
op0 = copy_to_mode_reg (mode0, op0);
|
||||
if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
|
||||
{
|
||||
op1 = copy_to_reg (op1);
|
||||
op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
|
||||
}
|
||||
|
||||
pat = GEN_FCN (icode) (target, op0, op1);
|
||||
if (! pat)
|
||||
return 0;
|
||||
emit_insn (pat);
|
||||
return target;
|
||||
}
|
||||
|
||||
/* Subroutine of ix86_expand_builtin to take care of binop insns. */
|
||||
|
||||
static rtx
|
||||
|
@ -19218,6 +19388,10 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
|
|||
if (d->code == fcode)
|
||||
return ix86_expand_sse_ptest (d, exp, target);
|
||||
|
||||
for (i = 0, d = bdesc_crc32; i < ARRAY_SIZE (bdesc_crc32); i++, d++)
|
||||
if (d->code == fcode)
|
||||
return ix86_expand_crc32 (d->icode, exp, target);
|
||||
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
|
|
|
@ -46,6 +46,7 @@ Boston, MA 02110-1301, USA. */
|
|||
#define TARGET_SSE3 OPTION_ISA_SSE3
|
||||
#define TARGET_SSSE3 OPTION_ISA_SSSE3
|
||||
#define TARGET_SSE4_1 OPTION_ISA_SSE4_1
|
||||
#define TARGET_SSE4_2 OPTION_ISA_SSE4_2
|
||||
#define TARGET_SSE4A OPTION_ISA_SSE4A
|
||||
|
||||
#include "config/vxworks-dummy.h"
|
||||
|
@ -568,6 +569,8 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
|
|||
builtin_define ("__SSSE3__"); \
|
||||
if (TARGET_SSE4_1) \
|
||||
builtin_define ("__SSE4_1__"); \
|
||||
if (TARGET_SSE4_2) \
|
||||
builtin_define ("__SSE4_2__"); \
|
||||
if (TARGET_SSE4A) \
|
||||
builtin_define ("__SSE4A__"); \
|
||||
if (TARGET_SSE_MATH && TARGET_SSE) \
|
||||
|
|
|
@ -173,6 +173,9 @@
|
|||
(UNSPEC_PTEST 140)
|
||||
(UNSPEC_ROUNDP 141)
|
||||
(UNSPEC_ROUNDS 142)
|
||||
|
||||
; For SSE4.2 support
|
||||
(UNSPEC_CRC32 143)
|
||||
])
|
||||
|
||||
(define_constants
|
||||
|
@ -20895,6 +20898,36 @@
|
|||
}
|
||||
[(set_attr "type" "multi")])
|
||||
|
||||
(define_mode_macro CRC32MODE [QI HI SI])
|
||||
(define_mode_attr crc32modesuffix [(QI "b") (HI "w") (SI "l")])
|
||||
(define_mode_attr crc32modeconstraint [(QI "qm") (HI "rm") (SI "rm")])
|
||||
|
||||
(define_insn "sse4_2_crc32<mode>"
|
||||
[(set (match_operand:SI 0 "register_operand" "=r")
|
||||
(unspec:SI
|
||||
[(match_operand:SI 1 "register_operand" "0")
|
||||
(match_operand:CRC32MODE 2 "nonimmediate_operand" "<crc32modeconstraint>")]
|
||||
UNSPEC_CRC32))]
|
||||
"TARGET_SSE4_2"
|
||||
"crc32<crc32modesuffix>\t{%2, %0|%0, %2}"
|
||||
[(set_attr "type" "sselog1")
|
||||
(set_attr "prefix_rep" "1")
|
||||
(set_attr "prefix_extra" "1")
|
||||
(set_attr "mode" "SI")])
|
||||
|
||||
(define_insn "sse4_2_crc32di"
|
||||
[(set (match_operand:DI 0 "register_operand" "=r")
|
||||
(unspec:DI
|
||||
[(match_operand:DI 1 "register_operand" "0")
|
||||
(match_operand:DI 2 "nonimmediate_operand" "rm")]
|
||||
UNSPEC_CRC32))]
|
||||
"TARGET_SSE4_2 && TARGET_64BIT"
|
||||
"crc32q\t{%2, %0|%0, %2}"
|
||||
[(set_attr "type" "sselog1")
|
||||
(set_attr "prefix_rep" "1")
|
||||
(set_attr "prefix_extra" "1")
|
||||
(set_attr "mode" "DI")])
|
||||
|
||||
(include "mmx.md")
|
||||
(include "sse.md")
|
||||
(include "sync.md")
|
||||
|
|
|
@ -225,6 +225,18 @@ msse4.1
|
|||
Target Report Mask(ISA_SSE4_1) Var(ix86_isa_flags) VarExists
|
||||
Support MMX, SSE, SSE2, SSE3, SSSE3 and SSE4.1 built-in functions and code generation
|
||||
|
||||
msse4.2
|
||||
Target Report Mask(ISA_SSE4_2) Var(ix86_isa_flags) VarExists
|
||||
Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1 and SSE4.2 built-in functions and code generation
|
||||
|
||||
msse4
|
||||
Target RejectNegative Report Mask(ISA_SSE4_2) MaskExists Var(ix86_isa_flags) VarExists
|
||||
Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1 and SSE4.2 built-in functions and code generation
|
||||
|
||||
mno-sse4
|
||||
Target RejectNegative Report InverseMask(ISA_SSE4_1) MaskExists Var(ix86_isa_flags) VarExists
|
||||
Do not support SSE4.1 and SSE4.2 built-in functions and code generation
|
||||
|
||||
msse4a
|
||||
Target Report Mask(ISA_SSE4A) Var(ix86_isa_flags) VarExists
|
||||
Support MMX, SSE, SSE2, SSE3 and SSE4A built-in functions and code generation
|
||||
|
|
40
gcc/config/i386/nmmintrin.h
Normal file
40
gcc/config/i386/nmmintrin.h
Normal file
|
@ -0,0 +1,40 @@
|
|||
/* Copyright (C) 2007 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with GCC; see the file COPYING. If not, write to
|
||||
the Free Software Foundation, 59 Temple Place - Suite 330,
|
||||
Boston, MA 02111-1307, USA. */
|
||||
|
||||
/* As a special exception, if you include this header file into source
|
||||
files compiled by GCC, this header file does not by itself cause
|
||||
the resulting executable to be covered by the GNU General Public
|
||||
License. This exception does not however invalidate any other
|
||||
reasons why the executable file might be covered by the GNU General
|
||||
Public License. */
|
||||
|
||||
/* Implemented from the specification included in the Intel C++ Compiler
|
||||
User Guide and Reference, version 10.0. */
|
||||
|
||||
#ifndef _NMMINTRIN_H_INCLUDED
|
||||
#define _NMMINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __SSE4_2__
|
||||
# error "SSE4.2 instruction set not enabled"
|
||||
#else
|
||||
/* We just include SSE4.1 header file. */
|
||||
#include <smmintrin.h>
|
||||
#endif /* __SSE4_2__ */
|
||||
|
||||
#endif /* _NMMINTRIN_H_INCLUDED */
|
|
@ -573,6 +573,246 @@ _mm_stream_load_si128 (__m128i *__X)
|
|||
return (__m128i) __builtin_ia32_movntdqa ((__v2di *) __X);
|
||||
}
|
||||
|
||||
#ifdef __SSE4_2__
|
||||
|
||||
/* These macros specify the source data format. */
|
||||
#define SIDD_UBYTE_OPS 0x00
|
||||
#define SIDD_UWORD_OPS 0x01
|
||||
#define SIDD_SBYTE_OPS 0x02
|
||||
#define SIDD_SWORD_OPS 0x03
|
||||
|
||||
/* These macros specify the comparison operation. */
|
||||
#define SIDD_CMP_EQUAL_ANY 0x00
|
||||
#define SIDD_CMP_RANGES 0x04
|
||||
#define SIDD_CMP_EQUAL_EACH 0x08
|
||||
#define SIDD_CMP_EQUAL_ORDERED 0x0c
|
||||
|
||||
/* These macros specify the the polarity. */
|
||||
#define SIDD_POSITIVE_POLARITY 0x00
|
||||
#define SIDD_NEGATIVE_POLARITY 0x10
|
||||
#define SIDD_MASKED_POSITIVE_POLARITY 0x20
|
||||
#define SIDD_MASKED_NEGATIVE_POLARITY 0x30
|
||||
|
||||
/* These macros specify the output selection in _mm_cmpXstri (). */
|
||||
#define SIDD_LEAST_SIGNIFICANT 0x00
|
||||
#define SIDD_MOST_SIGNIFICANT 0x40
|
||||
|
||||
/* These macros specify the output selection in _mm_cmpXstrm (). */
|
||||
#define SIDD_BIT_MASK 0x00
|
||||
#define SIDD_UNIT_MASK 0x40
|
||||
|
||||
/* Intrinsics for text/string processing. */
|
||||
|
||||
#if 0
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_cmpistrm (__m128i __X, __m128i __Y, const int __M)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pcmpistrm128 ((__v16qi)__X,
|
||||
(__v16qi)__Y,
|
||||
__M);
|
||||
}
|
||||
|
||||
static __inline int __attribute__((__always_inline__))
|
||||
_mm_cmpistri (__m128i __X, __m128i __Y, const int __M)
|
||||
{
|
||||
return __builtin_ia32_pcmpistri128 ((__v16qi)__X,
|
||||
(__v16qi)__Y,
|
||||
__M);
|
||||
}
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_cmpestrm (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pcmpestrm128 ((__v16qi)__X, __LX,
|
||||
(__v16qi)__Y, __LY,
|
||||
__M);
|
||||
}
|
||||
|
||||
static __inline int __attribute__((__always_inline__))
|
||||
_mm_cmpestri (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
|
||||
{
|
||||
return __builtin_ia32_pcmpestri128 ((__v16qi)__X, __LX,
|
||||
(__v16qi)__Y, __LY,
|
||||
__M);
|
||||
}
|
||||
#else
|
||||
#define _mm_cmpistrm(X, Y, M) \
|
||||
((__m128i) __builtin_ia32_pcmpistrm128 ((__v16qi)(X), (__v16qi)(Y), (M)))
|
||||
#define _mm_cmpistri(X, Y, M) \
|
||||
__builtin_ia32_pcmpistri128 ((__v16qi)(X), (__v16qi)(Y), (M))
|
||||
|
||||
#define _mm_cmpestrm(X, LX, Y, LY, M) \
|
||||
((__m128i) __builtin_ia32_pcmpestrm128 ((__v16qi)(X), (int)(LX), \
|
||||
(__v16qi)(Y), (int)(LY), (M)))
|
||||
#define _mm_cmpestri(X, LX, Y, LY, M) \
|
||||
__builtin_ia32_pcmpestri128 ((__v16qi)(X), (int)(LX), \
|
||||
(__v16qi)(Y), (int)(LY), (M))
|
||||
#endif
|
||||
|
||||
/* Intrinsics for text/string processing and reading values of
|
||||
EFlags. */
|
||||
|
||||
#if 0
|
||||
static __inline int __attribute__((__always_inline__))
|
||||
_mm_cmpistra (__m128i __X, __m128i __Y, const int __M)
|
||||
{
|
||||
return __builtin_ia32_pcmpistria128 ((__v16qi)__X,
|
||||
(__v16qi)__Y,
|
||||
__M);
|
||||
}
|
||||
|
||||
static __inline int __attribute__((__always_inline__))
|
||||
_mm_cmpistrc (__m128i __X, __m128i __Y, const int __M)
|
||||
{
|
||||
return __builtin_ia32_pcmpistric128 ((__v16qi)__X,
|
||||
(__v16qi)__Y,
|
||||
__M);
|
||||
}
|
||||
|
||||
static __inline int __attribute__((__always_inline__))
|
||||
_mm_cmpistro (__m128i __X, __m128i __Y, const int __M)
|
||||
{
|
||||
return __builtin_ia32_pcmpistrio128 ((__v16qi)__X,
|
||||
(__v16qi)__Y,
|
||||
__M);
|
||||
}
|
||||
|
||||
static __inline int __attribute__((__always_inline__))
|
||||
_mm_cmpistrs (__m128i __X, __m128i __Y, const int __M)
|
||||
{
|
||||
return __builtin_ia32_pcmpistris128 ((__v16qi)__X,
|
||||
(__v16qi)__Y,
|
||||
__M);
|
||||
}
|
||||
|
||||
static __inline int __attribute__((__always_inline__))
|
||||
_mm_cmpistrz (__m128i __X, __m128i __Y, const int __M)
|
||||
{
|
||||
return __builtin_ia32_pcmpistriz128 ((__v16qi)__X,
|
||||
(__v16qi)__Y,
|
||||
__M);
|
||||
}
|
||||
|
||||
static __inline int __attribute__((__always_inline__))
|
||||
_mm_cmpestra (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
|
||||
{
|
||||
return __builtin_ia32_pcmpestria128 ((__v16qi)__X, __LX,
|
||||
(__v16qi)__Y, __LY,
|
||||
__M);
|
||||
}
|
||||
|
||||
static __inline int __attribute__((__always_inline__))
|
||||
_mm_cmpestrc (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
|
||||
{
|
||||
return __builtin_ia32_pcmpestric128 ((__v16qi)__X, __LX,
|
||||
(__v16qi)__Y, __LY,
|
||||
__M);
|
||||
}
|
||||
|
||||
static __inline int __attribute__((__always_inline__))
|
||||
_mm_cmpestro (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
|
||||
{
|
||||
return __builtin_ia32_pcmpestrio128 ((__v16qi)__X, __LX,
|
||||
(__v16qi)__Y, __LY,
|
||||
__M);
|
||||
}
|
||||
|
||||
static __inline int __attribute__((__always_inline__))
|
||||
_mm_cmpestrs (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
|
||||
{
|
||||
return __builtin_ia32_pcmpestris128 ((__v16qi)__X, __LX,
|
||||
(__v16qi)__Y, __LY,
|
||||
__M);
|
||||
}
|
||||
|
||||
static __inline int __attribute__((__always_inline__))
|
||||
_mm_cmpestrz (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
|
||||
{
|
||||
return __builtin_ia32_pcmpestriz128 ((__v16qi)__X, __LX,
|
||||
(__v16qi)__Y, __LY,
|
||||
__M);
|
||||
}
|
||||
#else
|
||||
#define _mm_cmpistra(X, Y, M) \
|
||||
__builtin_ia32_pcmpistria128 ((__v16qi)(X), (__v16qi)(Y), (M))
|
||||
#define _mm_cmpistrc(X, Y, M) \
|
||||
__builtin_ia32_pcmpistric128 ((__v16qi)(X), (__v16qi)(Y), (M))
|
||||
#define _mm_cmpistro(X, Y, M) \
|
||||
__builtin_ia32_pcmpistrio128 ((__v16qi)(X), (__v16qi)(Y), (M))
|
||||
#define _mm_cmpistrs(X, Y, M) \
|
||||
__builtin_ia32_pcmpistris128 ((__v16qi)(X), (__v16qi)(Y), (M))
|
||||
#define _mm_cmpistrz(X, Y, M) \
|
||||
__builtin_ia32_pcmpistriz128 ((__v16qi)(X), (__v16qi)(Y), (M))
|
||||
|
||||
#define _mm_cmpestra(X, LX, Y, LY, M) \
|
||||
__builtin_ia32_pcmpestria128 ((__v16qi)(X), (int)(LX), \
|
||||
(__v16qi)(Y), (int)(LY), (M))
|
||||
#define _mm_cmpestrc(X, LX, Y, LY, M) \
|
||||
__builtin_ia32_pcmpestric128 ((__v16qi)(X), (int)(LX), \
|
||||
(__v16qi)(Y), (int)(LY), (M))
|
||||
#define _mm_cmpestro(X, LX, Y, LY, M) \
|
||||
__builtin_ia32_pcmpestrio128 ((__v16qi)(X), (int)(LX), \
|
||||
(__v16qi)(Y), (int)(LY), (M))
|
||||
#define _mm_cmpestrs(X, LX, Y, LY, M) \
|
||||
__builtin_ia32_pcmpestris128 ((__v16qi)(X), (int)(LX), \
|
||||
(__v16qi)(Y), (int)(LY), (M))
|
||||
#define _mm_cmpestrz(X, LX, Y, LY, M) \
|
||||
__builtin_ia32_pcmpestriz128 ((__v16qi)(X), (int)(LX), \
|
||||
(__v16qi)(Y), (int)(LY), (M))
|
||||
#endif
|
||||
|
||||
/* Packed integer 64-bit comparison, zeroing or filling with ones
|
||||
corresponding parts of result. */
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_cmpgt_epi64 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pcmpgtq ((__v2di)__X, (__v2di)__Y);
|
||||
}
|
||||
|
||||
/* Calculate a number of bits set to 1. */
|
||||
static __inline int __attribute__((__always_inline__))
|
||||
_mm_popcnt_u32 (unsigned int __X)
|
||||
{
|
||||
return __builtin_popcount (__X);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
static __inline long long __attribute__((__always_inline__))
|
||||
_mm_popcnt_u64 (unsigned long long __X)
|
||||
{
|
||||
return __builtin_popcountll (__X);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Accumulate CRC32 (polynomial 0x11EDC6F41) value. */
|
||||
static __inline unsigned int __attribute__((__always_inline__))
|
||||
_mm_crc32_u8 (unsigned int __C, unsigned char __V)
|
||||
{
|
||||
return __builtin_ia32_crc32qi (__C, __V);
|
||||
}
|
||||
|
||||
static __inline unsigned int __attribute__((__always_inline__))
|
||||
_mm_crc32_u16 (unsigned int __C, unsigned short __V)
|
||||
{
|
||||
return __builtin_ia32_crc32hi (__C, __V);
|
||||
}
|
||||
|
||||
static __inline unsigned int __attribute__((__always_inline__))
|
||||
_mm_crc32_u32 (unsigned int __C, unsigned int __V)
|
||||
{
|
||||
return __builtin_ia32_crc32si (__C, __V);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
static __inline unsigned long long __attribute__((__always_inline__))
|
||||
_mm_crc32_u64 (unsigned long long __C, unsigned long long __V)
|
||||
{
|
||||
return __builtin_ia32_crc32di (__C, __V);
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __SSE4_2__ */
|
||||
|
||||
#endif /* __SSE4_1__ */
|
||||
|
||||
#endif /* _SMMINTRIN_H_INCLUDED */
|
||||
|
|
|
@ -3633,14 +3633,24 @@
|
|||
(set_attr "prefix_data16" "1")
|
||||
(set_attr "mode" "TI")])
|
||||
|
||||
(define_insn "sse4_2_gtv2di3"
|
||||
[(set (match_operand:V2DI 0 "register_operand" "=x")
|
||||
(gt:V2DI
|
||||
(match_operand:V2DI 1 "nonimmediate_operand" "0")
|
||||
(match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
|
||||
"TARGET_SSE4_2"
|
||||
"pcmpgtq\t{%2, %0|%0, %2}"
|
||||
[(set_attr "type" "ssecmp")
|
||||
(set_attr "mode" "TI")])
|
||||
|
||||
(define_expand "vcond<mode>"
|
||||
[(set (match_operand:SSEMODE124 0 "register_operand" "")
|
||||
(if_then_else:SSEMODE124
|
||||
[(set (match_operand:SSEMODEI 0 "register_operand" "")
|
||||
(if_then_else:SSEMODEI
|
||||
(match_operator 3 ""
|
||||
[(match_operand:SSEMODE124 4 "nonimmediate_operand" "")
|
||||
(match_operand:SSEMODE124 5 "nonimmediate_operand" "")])
|
||||
(match_operand:SSEMODE124 1 "general_operand" "")
|
||||
(match_operand:SSEMODE124 2 "general_operand" "")))]
|
||||
[(match_operand:SSEMODEI 4 "nonimmediate_operand" "")
|
||||
(match_operand:SSEMODEI 5 "nonimmediate_operand" "")])
|
||||
(match_operand:SSEMODEI 1 "general_operand" "")
|
||||
(match_operand:SSEMODEI 2 "general_operand" "")))]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
if (ix86_expand_int_vcond (operands))
|
||||
|
@ -3650,13 +3660,13 @@
|
|||
})
|
||||
|
||||
(define_expand "vcondu<mode>"
|
||||
[(set (match_operand:SSEMODE124 0 "register_operand" "")
|
||||
(if_then_else:SSEMODE124
|
||||
[(set (match_operand:SSEMODEI 0 "register_operand" "")
|
||||
(if_then_else:SSEMODEI
|
||||
(match_operator 3 ""
|
||||
[(match_operand:SSEMODE124 4 "nonimmediate_operand" "")
|
||||
(match_operand:SSEMODE124 5 "nonimmediate_operand" "")])
|
||||
(match_operand:SSEMODE124 1 "general_operand" "")
|
||||
(match_operand:SSEMODE124 2 "general_operand" "")))]
|
||||
[(match_operand:SSEMODEI 4 "nonimmediate_operand" "")
|
||||
(match_operand:SSEMODEI 5 "nonimmediate_operand" "")])
|
||||
(match_operand:SSEMODEI 1 "general_operand" "")
|
||||
(match_operand:SSEMODEI 2 "general_operand" "")))]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
if (ix86_expand_int_vcond (operands))
|
||||
|
|
|
@ -7502,6 +7502,54 @@ Generates the @code{pextrd} machine instruction.
|
|||
Generates the @code{pextrq} machine instruction in 64bit mode.
|
||||
@end table
|
||||
|
||||
The following built-in functions are available when @option{-msse4.2} is
|
||||
used. All of them generate the machine instruction that is part of the
|
||||
name.
|
||||
|
||||
@smallexample
|
||||
v16qi __builtin_ia32_pcmpestrm128 (v16qi, int, v16qi, int, const int)
|
||||
int __builtin_ia32_pcmpestri128 (v16qi, int, v16qi, int, const int)
|
||||
int __builtin_ia32_pcmpestria128 (v16qi, int, v16qi, int, const int)
|
||||
int __builtin_ia32_pcmpestric128 (v16qi, int, v16qi, int, const int)
|
||||
int __builtin_ia32_pcmpestrio128 (v16qi, int, v16qi, int, const int)
|
||||
int __builtin_ia32_pcmpestris128 (v16qi, int, v16qi, int, const int)
|
||||
int __builtin_ia32_pcmpestriz128 (v16qi, int, v16qi, int, const int)
|
||||
v16qi __builtin_ia32_pcmpistrm128 (v16qi, v16qi, const int)
|
||||
int __builtin_ia32_pcmpistri128 (v16qi, v16qi, const int)
|
||||
int __builtin_ia32_pcmpistria128 (v16qi, v16qi, const int)
|
||||
int __builtin_ia32_pcmpistric128 (v16qi, v16qi, const int)
|
||||
int __builtin_ia32_pcmpistrio128 (v16qi, v16qi, const int)
|
||||
int __builtin_ia32_pcmpistris128 (v16qi, v16qi, const int)
|
||||
int __builtin_ia32_pcmpistriz128 (v16qi, v16qi, const int)
|
||||
__v2di __builtin_ia32_pcmpgtq (__v2di, __v2di)
|
||||
@end smallexample
|
||||
|
||||
The following built-in functions are available when @option{-msse4.2} is
|
||||
used.
|
||||
|
||||
@table @code
|
||||
unsigned int __builtin_ia32_crc32qi (unsigned int, unsigned char)
|
||||
Generates the @code{crc32b} machine instruction.
|
||||
unsigned int __builtin_ia32_crc32hi (unsigned int, unsigned short)
|
||||
Generates the @code{crc32w} machine instruction.
|
||||
unsigned int __builtin_ia32_crc32si (unsigned int, unsigned int)
|
||||
Generates the @code{crc32l} machine instruction.
|
||||
unsigned long long __builtin_ia32_crc32di (unsigned int, unsigned long long)
|
||||
@end table
|
||||
|
||||
The following built-in functions are changed to generate new SSE4.2
|
||||
instructions when @option{-msse4.2} is used.
|
||||
|
||||
@table @code
|
||||
int __builtin_popcount (unsigned int)
|
||||
Generates the @code{popcntl} machine instruction.
|
||||
int __builtin_popcountl (unsigned long)
|
||||
Generates the @code{popcntl} or @code{popcntq} machine instruction,
|
||||
depending on the size of @code{unsigned long}.
|
||||
int __builtin_popcountll (unsigned long long)
|
||||
Generates the @code{popcntq} machine instruction.
|
||||
@end table
|
||||
|
||||
The following built-in functions are available when @option{-msse4a} is used.
|
||||
|
||||
@smallexample
|
||||
|
|
|
@ -548,7 +548,7 @@ Objective-C and Objective-C++ Dialects}.
|
|||
-mno-fp-ret-in-387 -msoft-float @gol
|
||||
-mno-wide-multiply -mrtd -malign-double @gol
|
||||
-mpreferred-stack-boundary=@var{num} -mcx16 -msahf @gol
|
||||
-mmmx -msse -msse2 -msse3 -mssse3 -msse4.1 @gol
|
||||
-mmmx -msse -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -msse4 @gol
|
||||
-msse4a -m3dnow -mpopcnt -mabm @gol
|
||||
-mthreads -mno-align-stringops -minline-all-stringops @gol
|
||||
-mpush-args -maccumulate-outgoing-args -m128bit-long-double @gol
|
||||
|
@ -10273,6 +10273,10 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
|
|||
@itemx -mno-ssse3
|
||||
@item -msse4.1
|
||||
@itemx -mno-sse4.1
|
||||
@item -msse4.2
|
||||
@itemx -mno-sse4.2
|
||||
@item -msse4
|
||||
@itemx -mno-sse4
|
||||
@item -msse4a
|
||||
@item -mno-sse4a
|
||||
@item -m3dnow
|
||||
|
|
Loading…
Add table
Reference in a new issue