i386-builtin-types.awk (DEF_VECTOR_TYPE): Allow an optional 3rd argument to define the mode.
* config/i386/i386-builtin-types.awk (DEF_VECTOR_TYPE): Allow an optional 3rd argument to define the mode. * config/i386/i386-builtin-types.def (UQI, UHI, USI, UDI): New. (V2UDI, V4USI, V8UHI, V16UQI): New. (V4SF_FTYPE_V4SF_V4SF_V4SI, V2UDI_FTYPE_V2UDI_V2UDI_V2UDI, V4USI_FTYPE_V4USI_V4USI_V4USI, V8UHI_FTYPE_V8UHI_V8UHI_V8UHI, V16UQI_FTYPE_V16UQI_V16UQI_V16UQI): New. * config/i386/i386-modes.def: Rearrange for double-wide AVX. * config/i386/i386-protos.h (ix86_expand_vec_extract_even_odd): New. * config/i386/i386.c (IX86_BUILTIN_VEC_PERM_*): New. (bdesc_args): Add the builtin definitions to match. (ix86_expand_builtin): Expand them. (ix86_builtin_vectorization_cost): Rename from x86_builtin_vectorization_cost. (ix86_vectorize_builtin_vec_perm, struct expand_vec_perm_d, doublesize_vector_mode, expand_vselect, expand_vselect_vconcat, expand_vec_perm_blend, expand_vec_perm_vpermil, expand_vec_perm_pshufb, expand_vec_perm_1, expand_vec_perm_pshuflw_pshufhw, expand_vec_perm_palignr, expand_vec_perm_interleave2, expand_vec_perm_pshufb2, expand_vec_perm_even_odd_1, expand_vec_perm_even_odd, ix86_expand_vec_perm_builtin_1, extract_vec_perm_cst, ix86_expand_vec_perm_builtin, ix86_vectorize_builtin_vec_perm_ok, ix86_expand_vec_extract_even_odd, TARGET_VECTORIZE_BUILTIN_VEC_PERM, TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK): New. * sse.md (SSEMODE_EO): New. (vec_extract_even<mode>): Use SSEMODE_EO and ix86_expand_vec_extract_even_odd. (vec_extract_odd<mode>): Likewise. (mulv16qi3, vec_pack_trunc_v8hi, vec_pack_trunc_v4si, vec_pack_trunc_v2di): Use ix86_expand_vec_extract_even_odd. testsuite/ * gcc.dg/vect/slp-21.c: Succeed with vect_extract_even_odd too. * lib/target-supports.exp (check_effective_target_vect_extract_even_odd): Add x86. * gcc.target/i386/isa-check.h: New. * gcc.target/i386/vperm-2-2.inc, gcc.target/i386/vperm-4-1.inc, gcc.target/i386/vperm-4-2.inc, gcc.target/i386/vperm-v2df.c, gcc.target/i386/vperm-v2di.c, gcc.target/i386/vperm-v4sf-1.c, gcc.target/i386/vperm-v4sf-2.c, gcc.target/i386/vperm-v4si-1.c, gcc.target/i386/vperm-v4si-2.c, gcc.target/i386/vperm-v4si-2x.c, gcc.target/i386/vperm.pl: New files. From-SVN: r154667
This commit is contained in:
parent
dac9d53aef
commit
0fac515143
22 changed files with 6204 additions and 165 deletions
|
@ -1,3 +1,37 @@
|
|||
2009-11-25 Richard Henderson <rth@redhat.com>
|
||||
|
||||
* config/i386/i386-builtin-types.awk (DEF_VECTOR_TYPE): Allow an
|
||||
optional 3rd argument to define the mode.
|
||||
* config/i386/i386-builtin-types.def (UQI, UHI, USI, UDI): New.
|
||||
(V2UDI, V4USI, V8UHI, V16UQI): New.
|
||||
(V4SF_FTYPE_V4SF_V4SF_V4SI, V2UDI_FTYPE_V2UDI_V2UDI_V2UDI,
|
||||
V4USI_FTYPE_V4USI_V4USI_V4USI, V8UHI_FTYPE_V8UHI_V8UHI_V8UHI,
|
||||
V16UQI_FTYPE_V16UQI_V16UQI_V16UQI): New.
|
||||
* config/i386/i386-modes.def: Rearrange for double-wide AVX.
|
||||
* config/i386/i386-protos.h (ix86_expand_vec_extract_even_odd): New.
|
||||
* config/i386/i386.c (IX86_BUILTIN_VEC_PERM_*): New.
|
||||
(bdesc_args): Add the builtin definitions to match.
|
||||
(ix86_expand_builtin): Expand them.
|
||||
(ix86_builtin_vectorization_cost): Rename from
|
||||
x86_builtin_vectorization_cost.
|
||||
(ix86_vectorize_builtin_vec_perm, struct expand_vec_perm_d,
|
||||
doublesize_vector_mode, expand_vselect, expand_vselect_vconcat,
|
||||
expand_vec_perm_blend, expand_vec_perm_vpermil,
|
||||
expand_vec_perm_pshufb, expand_vec_perm_1,
|
||||
expand_vec_perm_pshuflw_pshufhw, expand_vec_perm_palignr,
|
||||
expand_vec_perm_interleave2, expand_vec_perm_pshufb2,
|
||||
expand_vec_perm_even_odd_1, expand_vec_perm_even_odd,
|
||||
ix86_expand_vec_perm_builtin_1, extract_vec_perm_cst,
|
||||
ix86_expand_vec_perm_builtin, ix86_vectorize_builtin_vec_perm_ok,
|
||||
ix86_expand_vec_extract_even_odd, TARGET_VECTORIZE_BUILTIN_VEC_PERM,
|
||||
TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK): New.
|
||||
* sse.md (SSEMODE_EO): New.
|
||||
(vec_extract_even<mode>): Use SSEMODE_EO and
|
||||
ix86_expand_vec_extract_even_odd.
|
||||
(vec_extract_odd<mode>): Likewise.
|
||||
(mulv16qi3, vec_pack_trunc_v8hi, vec_pack_trunc_v4si,
|
||||
vec_pack_trunc_v2di): Use ix86_expand_vec_extract_even_odd.
|
||||
|
||||
2009-11-25 Richard Henderson <rth@redhat.com>
|
||||
|
||||
* target.h (targetm.vectorize.builtin_vec_perm_ok): New.
|
||||
|
|
|
@ -69,11 +69,12 @@ $1 == "DEF_PRIMITIVE_TYPE" {
|
|||
}
|
||||
|
||||
$1 == "DEF_VECTOR_TYPE" {
|
||||
if (NF == 4) {
|
||||
if (NF == 4 || NF == 5) {
|
||||
check_type($3)
|
||||
type_hash[$2] = 1
|
||||
vect_mode[vect_defs] = $2
|
||||
vect_name[vect_defs] = $2
|
||||
vect_base[vect_defs] = $3
|
||||
vect_mode[vect_defs] = (NF == 5 ? $4 : $2)
|
||||
vect_defs++
|
||||
} else
|
||||
do_error("DEF_VECTOR_TYPE expected 2 arguments")
|
||||
|
@ -152,8 +153,8 @@ END {
|
|||
print " IX86_BT_" prim_name[i] ","
|
||||
print " IX86_BT_LAST_PRIM = IX86_BT_" prim_name[i-1] ","
|
||||
for (i = 0; i < vect_defs; ++i)
|
||||
print " IX86_BT_" vect_mode[i] ","
|
||||
print " IX86_BT_LAST_VECT = IX86_BT_" vect_mode[i-1] ","
|
||||
print " IX86_BT_" vect_name[i] ","
|
||||
print " IX86_BT_LAST_VECT = IX86_BT_" vect_name[i-1] ","
|
||||
for (i = 0; i < ptr_defs; ++i)
|
||||
print " IX86_BT_" ptr_name[i] ","
|
||||
print " IX86_BT_LAST_PTR = IX86_BT_" ptr_name[i-1] ","
|
||||
|
|
|
@ -10,12 +10,12 @@
|
|||
# At present, that's all that's required; revisit if it turns out
|
||||
# that we need more than that.
|
||||
#
|
||||
# DEF_VECTOR_TYPE (ENUM, TYPE)
|
||||
# DEF_VECTOR_TYPE (ENUM, TYPE [, MODE])
|
||||
#
|
||||
# This describes a vector type. ENUM doubles as both the identifier
|
||||
# to define in the enumeration as well as the mode of the vector; TYPE is
|
||||
# the enumeral for the inner type which should of course name a type of
|
||||
# the proper inner mode.
|
||||
# This describes a vector type. ENUM is an identifier as above.
|
||||
# TYPE is the enumeral for the inner type which should of course
|
||||
# name a type of the proper inner mode. If present, MODE is the
|
||||
# machine mode, else the machine mode should be the same as ENUM.
|
||||
#
|
||||
# DEF_POINTER_TYPE (ENUM, TYPE [, CONST])
|
||||
#
|
||||
|
@ -40,10 +40,22 @@
|
|||
DEF_PRIMITIVE_TYPE (VOID, void_type_node)
|
||||
DEF_PRIMITIVE_TYPE (CHAR, char_type_node)
|
||||
DEF_PRIMITIVE_TYPE (UCHAR, unsigned_char_type_node)
|
||||
DEF_PRIMITIVE_TYPE (QI, intQI_type_node)
|
||||
# ??? Logically this should be intQI_type_node, but that maps to "signed char"
|
||||
# which is a different type than "char" even if "char" is signed. This must
|
||||
# match the usage in emmintrin.h and changing this would change name mangling
|
||||
# and so is not advisable.
|
||||
DEF_PRIMITIVE_TYPE (QI, char_type_node)
|
||||
DEF_PRIMITIVE_TYPE (HI, intHI_type_node)
|
||||
DEF_PRIMITIVE_TYPE (SI, intSI_type_node)
|
||||
# ??? Logically this should be intDI_type_node, but that maps to "long"
|
||||
# with 64-bit, and that's not how the emmintrin.h is written. Again,
|
||||
# changing this would change name mangling.
|
||||
DEF_PRIMITIVE_TYPE (DI, long_long_integer_type_node)
|
||||
DEF_PRIMITIVE_TYPE (UQI, unsigned_intQI_type_node)
|
||||
DEF_PRIMITIVE_TYPE (UHI, unsigned_intHI_type_node)
|
||||
DEF_PRIMITIVE_TYPE (USI, unsigned_intSI_type_node)
|
||||
DEF_PRIMITIVE_TYPE (UDI, long_long_unsigned_type_node)
|
||||
# ??? Some of the types below should use the mode types above.
|
||||
DEF_PRIMITIVE_TYPE (USHORT, short_unsigned_type_node)
|
||||
DEF_PRIMITIVE_TYPE (INT, integer_type_node)
|
||||
DEF_PRIMITIVE_TYPE (UINT, unsigned_type_node)
|
||||
|
@ -59,23 +71,33 @@ DEF_PRIMITIVE_TYPE (DOUBLE, double_type_node)
|
|||
DEF_PRIMITIVE_TYPE (FLOAT80, float80_type_node)
|
||||
DEF_PRIMITIVE_TYPE (FLOAT128, float128_type_node)
|
||||
|
||||
DEF_VECTOR_TYPE (V16HI, HI)
|
||||
DEF_VECTOR_TYPE (V16QI, CHAR)
|
||||
DEF_VECTOR_TYPE (V1DI, DI)
|
||||
DEF_VECTOR_TYPE (V2DF, DOUBLE)
|
||||
DEF_VECTOR_TYPE (V2DI, DI)
|
||||
# MMX vectors
|
||||
DEF_VECTOR_TYPE (V2SF, FLOAT)
|
||||
DEF_VECTOR_TYPE (V1DI, DI)
|
||||
DEF_VECTOR_TYPE (V2SI, SI)
|
||||
DEF_VECTOR_TYPE (V32QI, CHAR)
|
||||
DEF_VECTOR_TYPE (V4DF, DOUBLE)
|
||||
DEF_VECTOR_TYPE (V4DI, DI)
|
||||
DEF_VECTOR_TYPE (V4HI, HI)
|
||||
DEF_VECTOR_TYPE (V8QI, QI)
|
||||
|
||||
# SSE vectors
|
||||
DEF_VECTOR_TYPE (V2DF, DOUBLE)
|
||||
DEF_VECTOR_TYPE (V4SF, FLOAT)
|
||||
DEF_VECTOR_TYPE (V2DI, DI)
|
||||
DEF_VECTOR_TYPE (V4SI, SI)
|
||||
DEF_VECTOR_TYPE (V8HI, HI)
|
||||
DEF_VECTOR_TYPE (V8QI, CHAR)
|
||||
DEF_VECTOR_TYPE (V16QI, QI)
|
||||
DEF_VECTOR_TYPE (V2UDI, UDI, V2DI)
|
||||
DEF_VECTOR_TYPE (V4USI, USI, V4SI)
|
||||
DEF_VECTOR_TYPE (V8UHI, UHI, V8HI)
|
||||
DEF_VECTOR_TYPE (V16UQI, UQI, V16QI)
|
||||
|
||||
# AVX vectors
|
||||
DEF_VECTOR_TYPE (V4DF, DOUBLE)
|
||||
DEF_VECTOR_TYPE (V8SF, FLOAT)
|
||||
DEF_VECTOR_TYPE (V4DI, DI)
|
||||
DEF_VECTOR_TYPE (V8SI, SI)
|
||||
DEF_VECTOR_TYPE (V16HI, HI)
|
||||
DEF_VECTOR_TYPE (V32QI, QI)
|
||||
|
||||
|
||||
DEF_POINTER_TYPE (PCCHAR, CHAR, CONST)
|
||||
DEF_POINTER_TYPE (PCDOUBLE, DOUBLE, CONST)
|
||||
|
@ -323,6 +345,12 @@ DEF_FUNCTION_TYPE (VOID, UINT64, UINT, UINT)
|
|||
DEF_FUNCTION_TYPE (VOID, USHORT, UINT, USHORT)
|
||||
DEF_FUNCTION_TYPE (VOID, V16QI, V16QI, PCHAR)
|
||||
DEF_FUNCTION_TYPE (VOID, V8QI, V8QI, PCHAR)
|
||||
DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, V2DI)
|
||||
DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, V4SI)
|
||||
DEF_FUNCTION_TYPE (V2UDI, V2UDI, V2UDI, V2UDI)
|
||||
DEF_FUNCTION_TYPE (V4USI, V4USI, V4USI, V4USI)
|
||||
DEF_FUNCTION_TYPE (V8UHI, V8UHI, V8UHI, V8UHI)
|
||||
DEF_FUNCTION_TYPE (V16UQI, V16UQI, V16UQI, V16UQI)
|
||||
|
||||
DEF_FUNCTION_TYPE (V2DI, V2DI, V2DI, UINT, UINT)
|
||||
DEF_FUNCTION_TYPE (V4HI, HI, HI, HI, HI)
|
||||
|
|
|
@ -69,22 +69,20 @@ CC_MODE (CCZ);
|
|||
CC_MODE (CCFP);
|
||||
CC_MODE (CCFPU);
|
||||
|
||||
/* Vector modes. */
|
||||
VECTOR_MODES (INT, 4); /* V4QI V2HI */
|
||||
VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI */
|
||||
VECTOR_MODES (INT, 16); /* V16QI V8HI V4SI V2DI */
|
||||
VECTOR_MODES (INT, 32); /* V32QI V16HI V8SI V4DI */
|
||||
VECTOR_MODES (FLOAT, 8); /* V4HF V2SF */
|
||||
VECTOR_MODES (FLOAT, 16); /* V8HF V4SF V2DF */
|
||||
VECTOR_MODES (FLOAT, 32); /* V16HF V8SF V4DF */
|
||||
VECTOR_MODE (INT, DI, 1); /* V1DI */
|
||||
VECTOR_MODE (INT, SI, 1); /* V1SI */
|
||||
VECTOR_MODE (INT, QI, 2); /* V2QI */
|
||||
VECTOR_MODE (INT, DI, 8); /* V8DI */
|
||||
VECTOR_MODE (INT, HI, 32); /* V32HI */
|
||||
VECTOR_MODE (INT, QI, 64); /* V64QI */
|
||||
VECTOR_MODE (FLOAT, DF, 8); /* V8DF */
|
||||
VECTOR_MODE (FLOAT, SF, 16); /* V16SF */
|
||||
/* Vector modes. Note that VEC_CONCAT patterns require vector
|
||||
sizes twice as big as implemented in hardware. */
|
||||
VECTOR_MODES (INT, 4); /* V4QI V2HI */
|
||||
VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI */
|
||||
VECTOR_MODES (INT, 16); /* V16QI V8HI V4SI V2DI */
|
||||
VECTOR_MODES (INT, 32); /* V32QI V16HI V8SI V4DI */
|
||||
VECTOR_MODES (INT, 64); /* V64QI V32HI V16SI V8DI */
|
||||
VECTOR_MODES (FLOAT, 8); /* V4HF V2SF */
|
||||
VECTOR_MODES (FLOAT, 16); /* V8HF V4SF V2DF */
|
||||
VECTOR_MODES (FLOAT, 32); /* V16HF V8SF V4DF */
|
||||
VECTOR_MODES (FLOAT, 64); /* V32HF V16SF V8DF */
|
||||
VECTOR_MODE (INT, DI, 1); /* V1DI */
|
||||
VECTOR_MODE (INT, SI, 1); /* V1SI */
|
||||
VECTOR_MODE (INT, QI, 2); /* V2QI */
|
||||
|
||||
INT_MODE (OI, 32);
|
||||
|
||||
|
|
|
@ -219,6 +219,8 @@ extern void ix86_expand_reduc_v4sf (rtx (*)(rtx, rtx, rtx), rtx, rtx);
|
|||
extern bool ix86_fma4_valid_op_p (rtx [], rtx, int, bool, int, bool);
|
||||
extern void ix86_expand_fma4_multiple_memory (rtx [], int, enum machine_mode);
|
||||
|
||||
extern void ix86_expand_vec_extract_even_odd (rtx, rtx, rtx, unsigned);
|
||||
|
||||
/* In i386-c.c */
|
||||
extern void ix86_target_macros (void);
|
||||
extern void ix86_register_pragmas (void);
|
||||
|
@ -277,4 +279,3 @@ extern int asm_preferred_eh_data_format (int, int);
|
|||
#ifdef HAVE_ATTR_cpu
|
||||
extern enum attr_cpu ix86_schedule;
|
||||
#endif
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -71,6 +71,14 @@
|
|||
(define_mode_iterator SSEMODE124C8 [V16QI V8HI V4SI
|
||||
(V2DI "TARGET_SSE4_2")])
|
||||
|
||||
;; Modes handled by vec_extract_even/odd pattern.
|
||||
(define_mode_iterator SSEMODE_EO
|
||||
[(V4SF "TARGET_SSE")
|
||||
(V2DF "TARGET_SSE2")
|
||||
(V2DI "TARGET_SSE2") (V4SI "TARGET_SSE2")
|
||||
(V8HI "TARGET_SSE2") (V16QI "TARGET_SSE2")
|
||||
(V4DF "TARGET_AVX") (V8SF "TARGET_AVX")])
|
||||
|
||||
;; Mapping from float mode to required SSE level
|
||||
(define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")])
|
||||
|
||||
|
@ -4693,48 +4701,24 @@
|
|||
})
|
||||
|
||||
(define_expand "vec_extract_even<mode>"
|
||||
[(set (match_operand:SSEMODE4S 0 "register_operand" "")
|
||||
(vec_select:SSEMODE4S
|
||||
(vec_concat:<ssedoublesizemode>
|
||||
(match_operand:SSEMODE4S 1 "register_operand" "")
|
||||
(match_operand:SSEMODE4S 2 "nonimmediate_operand" ""))
|
||||
(parallel [(const_int 0)
|
||||
(const_int 2)
|
||||
(const_int 4)
|
||||
(const_int 6)])))]
|
||||
"TARGET_SSE")
|
||||
[(match_operand:SSEMODE_EO 0 "register_operand" "")
|
||||
(match_operand:SSEMODE_EO 1 "register_operand" "")
|
||||
(match_operand:SSEMODE_EO 2 "register_operand" "")]
|
||||
""
|
||||
{
|
||||
ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 0);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_extract_odd<mode>"
|
||||
[(set (match_operand:SSEMODE4S 0 "register_operand" "")
|
||||
(vec_select:SSEMODE4S
|
||||
(vec_concat:<ssedoublesizemode>
|
||||
(match_operand:SSEMODE4S 1 "register_operand" "")
|
||||
(match_operand:SSEMODE4S 2 "nonimmediate_operand" ""))
|
||||
(parallel [(const_int 1)
|
||||
(const_int 3)
|
||||
(const_int 5)
|
||||
(const_int 7)])))]
|
||||
"TARGET_SSE")
|
||||
|
||||
(define_expand "vec_extract_even<mode>"
|
||||
[(set (match_operand:SSEMODE2D 0 "register_operand" "")
|
||||
(vec_select:SSEMODE2D
|
||||
(vec_concat:<ssedoublesizemode>
|
||||
(match_operand:SSEMODE2D 1 "register_operand" "")
|
||||
(match_operand:SSEMODE2D 2 "nonimmediate_operand" ""))
|
||||
(parallel [(const_int 0)
|
||||
(const_int 2)])))]
|
||||
"TARGET_SSE2")
|
||||
|
||||
(define_expand "vec_extract_odd<mode>"
|
||||
[(set (match_operand:SSEMODE2D 0 "register_operand" "")
|
||||
(vec_select:SSEMODE2D
|
||||
(vec_concat:<ssedoublesizemode>
|
||||
(match_operand:SSEMODE2D 1 "register_operand" "")
|
||||
(match_operand:SSEMODE2D 2 "nonimmediate_operand" ""))
|
||||
(parallel [(const_int 1)
|
||||
(const_int 3)])))]
|
||||
"TARGET_SSE2")
|
||||
[(match_operand:SSEMODE_EO 0 "register_operand" "")
|
||||
(match_operand:SSEMODE_EO 1 "register_operand" "")
|
||||
(match_operand:SSEMODE_EO 2 "register_operand" "")]
|
||||
""
|
||||
{
|
||||
ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 1);
|
||||
DONE;
|
||||
})
|
||||
|
||||
;; punpcklqdq and punpckhqdq are shorter than shufpd.
|
||||
(define_insn "*avx_punpckhqdq"
|
||||
|
@ -5243,20 +5227,16 @@
|
|||
(set_attr "prefix_data16" "1")
|
||||
(set_attr "mode" "TI")])
|
||||
|
||||
(define_insn_and_split "mulv16qi3"
|
||||
(define_expand "mulv16qi3"
|
||||
[(set (match_operand:V16QI 0 "register_operand" "")
|
||||
(mult:V16QI (match_operand:V16QI 1 "register_operand" "")
|
||||
(match_operand:V16QI 2 "register_operand" "")))]
|
||||
"TARGET_SSE2
|
||||
&& can_create_pseudo_p ()"
|
||||
"#"
|
||||
"&& 1"
|
||||
[(const_int 0)]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
rtx t[12];
|
||||
rtx t[6];
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 12; ++i)
|
||||
for (i = 0; i < 6; ++i)
|
||||
t[i] = gen_reg_rtx (V16QImode);
|
||||
|
||||
/* Unpack data such that we've got a source byte in each low byte of
|
||||
|
@ -5278,15 +5258,8 @@
|
|||
gen_lowpart (V8HImode, t[2]),
|
||||
gen_lowpart (V8HImode, t[3])));
|
||||
|
||||
/* Extract the relevant bytes and merge them back together. */
|
||||
emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */
|
||||
emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */
|
||||
emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */
|
||||
emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */
|
||||
emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */
|
||||
emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */
|
||||
|
||||
emit_insn (gen_sse2_punpcklbw (operands[0], t[11], t[10])); /* ABCDEFGHIJKLMNOP */
|
||||
/* Extract the even bytes and merge them back together. */
|
||||
ix86_expand_vec_extract_even_odd (operands[0], t[5], t[4], 0);
|
||||
DONE;
|
||||
})
|
||||
|
||||
|
@ -6578,96 +6551,39 @@
|
|||
;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; Reduce:
|
||||
;; op1 = abcdefghijklmnop
|
||||
;; op2 = qrstuvwxyz012345
|
||||
;; h1 = aqbrcsdteufvgwhx
|
||||
;; l1 = iyjzk0l1m2n3o4p5
|
||||
;; h2 = aiqybjrzcks0dlt1
|
||||
;; l2 = emu2fnv3gow4hpx5
|
||||
;; h3 = aeimquy2bfjnrvz3
|
||||
;; l3 = cgkosw04dhlptx15
|
||||
;; result = bdfhjlnprtvxz135
|
||||
(define_expand "vec_pack_trunc_v8hi"
|
||||
[(match_operand:V16QI 0 "register_operand" "")
|
||||
(match_operand:V8HI 1 "register_operand" "")
|
||||
(match_operand:V8HI 2 "register_operand" "")]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
rtx op1, op2, h1, l1, h2, l2, h3, l3;
|
||||
|
||||
op1 = gen_lowpart (V16QImode, operands[1]);
|
||||
op2 = gen_lowpart (V16QImode, operands[2]);
|
||||
h1 = gen_reg_rtx (V16QImode);
|
||||
l1 = gen_reg_rtx (V16QImode);
|
||||
h2 = gen_reg_rtx (V16QImode);
|
||||
l2 = gen_reg_rtx (V16QImode);
|
||||
h3 = gen_reg_rtx (V16QImode);
|
||||
l3 = gen_reg_rtx (V16QImode);
|
||||
|
||||
emit_insn (gen_vec_interleave_highv16qi (h1, op1, op2));
|
||||
emit_insn (gen_vec_interleave_lowv16qi (l1, op1, op2));
|
||||
emit_insn (gen_vec_interleave_highv16qi (h2, l1, h1));
|
||||
emit_insn (gen_vec_interleave_lowv16qi (l2, l1, h1));
|
||||
emit_insn (gen_vec_interleave_highv16qi (h3, l2, h2));
|
||||
emit_insn (gen_vec_interleave_lowv16qi (l3, l2, h2));
|
||||
emit_insn (gen_vec_interleave_lowv16qi (operands[0], l3, h3));
|
||||
rtx op1 = gen_lowpart (V16QImode, operands[1]);
|
||||
rtx op2 = gen_lowpart (V16QImode, operands[2]);
|
||||
ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
|
||||
DONE;
|
||||
})
|
||||
|
||||
;; Reduce:
|
||||
;; op1 = abcdefgh
|
||||
;; op2 = ijklmnop
|
||||
;; h1 = aibjckdl
|
||||
;; l1 = emfngohp
|
||||
;; h2 = aeimbfjn
|
||||
;; l2 = cgkodhlp
|
||||
;; result = bdfhjlnp
|
||||
(define_expand "vec_pack_trunc_v4si"
|
||||
[(match_operand:V8HI 0 "register_operand" "")
|
||||
(match_operand:V4SI 1 "register_operand" "")
|
||||
(match_operand:V4SI 2 "register_operand" "")]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
rtx op1, op2, h1, l1, h2, l2;
|
||||
|
||||
op1 = gen_lowpart (V8HImode, operands[1]);
|
||||
op2 = gen_lowpart (V8HImode, operands[2]);
|
||||
h1 = gen_reg_rtx (V8HImode);
|
||||
l1 = gen_reg_rtx (V8HImode);
|
||||
h2 = gen_reg_rtx (V8HImode);
|
||||
l2 = gen_reg_rtx (V8HImode);
|
||||
|
||||
emit_insn (gen_vec_interleave_highv8hi (h1, op1, op2));
|
||||
emit_insn (gen_vec_interleave_lowv8hi (l1, op1, op2));
|
||||
emit_insn (gen_vec_interleave_highv8hi (h2, l1, h1));
|
||||
emit_insn (gen_vec_interleave_lowv8hi (l2, l1, h1));
|
||||
emit_insn (gen_vec_interleave_lowv8hi (operands[0], l2, h2));
|
||||
rtx op1 = gen_lowpart (V8HImode, operands[1]);
|
||||
rtx op2 = gen_lowpart (V8HImode, operands[2]);
|
||||
ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
|
||||
DONE;
|
||||
})
|
||||
|
||||
;; Reduce:
|
||||
;; op1 = abcd
|
||||
;; op2 = efgh
|
||||
;; h1 = aebf
|
||||
;; l1 = cgdh
|
||||
;; result = bdfh
|
||||
(define_expand "vec_pack_trunc_v2di"
|
||||
[(match_operand:V4SI 0 "register_operand" "")
|
||||
(match_operand:V2DI 1 "register_operand" "")
|
||||
(match_operand:V2DI 2 "register_operand" "")]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
rtx op1, op2, h1, l1;
|
||||
|
||||
op1 = gen_lowpart (V4SImode, operands[1]);
|
||||
op2 = gen_lowpart (V4SImode, operands[2]);
|
||||
h1 = gen_reg_rtx (V4SImode);
|
||||
l1 = gen_reg_rtx (V4SImode);
|
||||
|
||||
emit_insn (gen_vec_interleave_highv4si (h1, op1, op2));
|
||||
emit_insn (gen_vec_interleave_lowv4si (l1, op1, op2));
|
||||
emit_insn (gen_vec_interleave_lowv4si (operands[0], l1, h1));
|
||||
rtx op1 = gen_lowpart (V4SImode, operands[1]);
|
||||
rtx op2 = gen_lowpart (V4SImode, operands[2]);
|
||||
ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
|
||||
DONE;
|
||||
})
|
||||
|
||||
|
|
|
@ -1,3 +1,18 @@
|
|||
2009-11-25 Richard Henderson <rth@redhat.com>
|
||||
|
||||
* gcc.dg/vect/slp-21.c: Succeed with vect_extract_even_odd too.
|
||||
|
||||
* lib/target-supports.exp
|
||||
(check_effective_target_vect_extract_even_odd): Add x86.
|
||||
|
||||
* gcc.target/i386/isa-check.h: New.
|
||||
* gcc.target/i386/vperm-2-2.inc, gcc.target/i386/vperm-4-1.inc,
|
||||
gcc.target/i386/vperm-4-2.inc, gcc.target/i386/vperm-v2df.c,
|
||||
gcc.target/i386/vperm-v2di.c, gcc.target/i386/vperm-v4sf-1.c,
|
||||
gcc.target/i386/vperm-v4sf-2.c, gcc.target/i386/vperm-v4si-1.c,
|
||||
gcc.target/i386/vperm-v4si-2.c, gcc.target/i386/vperm-v4si-2x.c,
|
||||
gcc.target/i386/vperm.pl: New files.
|
||||
|
||||
2009-11-25 Richard Henderson <rth@redhat.com>
|
||||
|
||||
* gcc.dg/vect/pr36493.c: Call check_vect.
|
||||
|
|
|
@ -200,8 +200,8 @@ int main (void)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 4 loops" 1 "vect" { target vect_strided } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { ! { vect_strided } } } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 4 loops" 1 "vect" { target { vect_strided || vect_extract_even_odd } } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { ! { vect_strided || vect_extract_even_odd } } } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_strided } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { ! { vect_strided } } } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
|
85
gcc/testsuite/gcc.target/i386/isa-check.h
Normal file
85
gcc/testsuite/gcc.target/i386/isa-check.h
Normal file
|
@ -0,0 +1,85 @@
|
|||
#include "cpuid.h"
|
||||
|
||||
extern void exit (int) __attribute__((noreturn));
|
||||
|
||||
/* Determine what instruction set we've been compiled for,
|
||||
and detect that we're running with it. */
|
||||
static void __attribute__((constructor))
|
||||
check_isa (void)
|
||||
{
|
||||
int a, b, c, d;
|
||||
int c1, d1, c1e, d1e;
|
||||
|
||||
c1 = d1 = c1e = d1e = 0;
|
||||
|
||||
#ifdef __MMX__
|
||||
d1 |= bit_MMX;
|
||||
#endif
|
||||
#ifdef __3dNOW__
|
||||
d1e |= bit_3DNOW;
|
||||
#endif
|
||||
#ifdef __3dNOW_A__
|
||||
d1e |= bit_3DNOWP;
|
||||
#endif
|
||||
#ifdef __SSE__
|
||||
d1 |= bit_SSE;
|
||||
#endif
|
||||
#ifdef __SSE2__
|
||||
d1 |= bit_SSE2;
|
||||
#endif
|
||||
#ifdef __SSE3__
|
||||
c1 |= bit_SSE3;
|
||||
#endif
|
||||
#ifdef __SSSE3__
|
||||
c1 |= bit_SSSE3;
|
||||
#endif
|
||||
#ifdef __SSE4_1__
|
||||
c1 |= bit_SSE4_1;
|
||||
#endif
|
||||
#ifdef __SSE4_2__
|
||||
c1 |= bit_SSE4_2;
|
||||
#endif
|
||||
#ifdef __AES__
|
||||
c1 |= bit_AES;
|
||||
#endif
|
||||
#ifdef __PCLMUL__
|
||||
c1 |= bit_PCLMUL;
|
||||
#endif
|
||||
#ifdef __AVX__
|
||||
c1 |= bit_AVX;
|
||||
#endif
|
||||
#ifdef __FMA__
|
||||
c1 |= bit_FMA;
|
||||
#endif
|
||||
#ifdef __SSE4A__
|
||||
c1e |= bit_SSE4a;
|
||||
#endif
|
||||
#ifdef __FMA4__
|
||||
c1e |= bit_FMA4;
|
||||
#endif
|
||||
#ifdef __XOP__
|
||||
c1e |= bit_XOP;
|
||||
#endif
|
||||
#ifdef __LWP__
|
||||
c1e |= bit_LWP;
|
||||
#endif
|
||||
|
||||
if (c1 | d1)
|
||||
{
|
||||
if (!__get_cpuid (1, &a, &b, &c, &d))
|
||||
goto fail;
|
||||
if ((c & c1) != c1 || (d & d1) != d1)
|
||||
goto fail;
|
||||
}
|
||||
if (c1e | d1e)
|
||||
{
|
||||
if (!__get_cpuid (0x80000001, &a, &b, &c, &d))
|
||||
goto fail;
|
||||
if ((c & c1e) != c1e || (d & d1e) != d1e)
|
||||
goto fail;
|
||||
}
|
||||
return;
|
||||
|
||||
fail:
|
||||
exit (0);
|
||||
}
|
27
gcc/testsuite/gcc.target/i386/vperm-2-2.inc
Normal file
27
gcc/testsuite/gcc.target/i386/vperm-2-2.inc
Normal file
|
@ -0,0 +1,27 @@
|
|||
/* This file auto-generated with ./vperm.pl 2 2. */
|
||||
|
||||
void check0(void)
|
||||
{
|
||||
TEST (0, 0)
|
||||
TEST (1, 0)
|
||||
TEST (2, 0)
|
||||
TEST (3, 0)
|
||||
TEST (0, 1)
|
||||
TEST (1, 1)
|
||||
TEST (2, 1)
|
||||
TEST (3, 1)
|
||||
TEST (0, 2)
|
||||
TEST (1, 2)
|
||||
TEST (2, 2)
|
||||
TEST (3, 2)
|
||||
TEST (0, 3)
|
||||
TEST (1, 3)
|
||||
TEST (2, 3)
|
||||
TEST (3, 3)
|
||||
}
|
||||
|
||||
void check(void)
|
||||
{
|
||||
check0 ();
|
||||
}
|
||||
|
272
gcc/testsuite/gcc.target/i386/vperm-4-1.inc
Normal file
272
gcc/testsuite/gcc.target/i386/vperm-4-1.inc
Normal file
|
@ -0,0 +1,272 @@
|
|||
/* This file auto-generated with ./vperm.pl 4 1. */
|
||||
|
||||
void check0(void)
|
||||
{
|
||||
TEST (0, 0, 0, 0)
|
||||
TEST (1, 0, 0, 0)
|
||||
TEST (2, 0, 0, 0)
|
||||
TEST (3, 0, 0, 0)
|
||||
TEST (0, 1, 0, 0)
|
||||
TEST (1, 1, 0, 0)
|
||||
TEST (2, 1, 0, 0)
|
||||
TEST (3, 1, 0, 0)
|
||||
TEST (0, 2, 0, 0)
|
||||
TEST (1, 2, 0, 0)
|
||||
TEST (2, 2, 0, 0)
|
||||
TEST (3, 2, 0, 0)
|
||||
TEST (0, 3, 0, 0)
|
||||
TEST (1, 3, 0, 0)
|
||||
TEST (2, 3, 0, 0)
|
||||
TEST (3, 3, 0, 0)
|
||||
TEST (0, 0, 1, 0)
|
||||
TEST (1, 0, 1, 0)
|
||||
TEST (2, 0, 1, 0)
|
||||
TEST (3, 0, 1, 0)
|
||||
TEST (0, 1, 1, 0)
|
||||
TEST (1, 1, 1, 0)
|
||||
TEST (2, 1, 1, 0)
|
||||
TEST (3, 1, 1, 0)
|
||||
TEST (0, 2, 1, 0)
|
||||
TEST (1, 2, 1, 0)
|
||||
TEST (2, 2, 1, 0)
|
||||
TEST (3, 2, 1, 0)
|
||||
TEST (0, 3, 1, 0)
|
||||
TEST (1, 3, 1, 0)
|
||||
TEST (2, 3, 1, 0)
|
||||
TEST (3, 3, 1, 0)
|
||||
TEST (0, 0, 2, 0)
|
||||
TEST (1, 0, 2, 0)
|
||||
TEST (2, 0, 2, 0)
|
||||
TEST (3, 0, 2, 0)
|
||||
TEST (0, 1, 2, 0)
|
||||
TEST (1, 1, 2, 0)
|
||||
TEST (2, 1, 2, 0)
|
||||
TEST (3, 1, 2, 0)
|
||||
TEST (0, 2, 2, 0)
|
||||
TEST (1, 2, 2, 0)
|
||||
TEST (2, 2, 2, 0)
|
||||
TEST (3, 2, 2, 0)
|
||||
TEST (0, 3, 2, 0)
|
||||
TEST (1, 3, 2, 0)
|
||||
TEST (2, 3, 2, 0)
|
||||
TEST (3, 3, 2, 0)
|
||||
TEST (0, 0, 3, 0)
|
||||
TEST (1, 0, 3, 0)
|
||||
TEST (2, 0, 3, 0)
|
||||
TEST (3, 0, 3, 0)
|
||||
TEST (0, 1, 3, 0)
|
||||
TEST (1, 1, 3, 0)
|
||||
TEST (2, 1, 3, 0)
|
||||
TEST (3, 1, 3, 0)
|
||||
TEST (0, 2, 3, 0)
|
||||
TEST (1, 2, 3, 0)
|
||||
TEST (2, 2, 3, 0)
|
||||
TEST (3, 2, 3, 0)
|
||||
TEST (0, 3, 3, 0)
|
||||
TEST (1, 3, 3, 0)
|
||||
TEST (2, 3, 3, 0)
|
||||
TEST (3, 3, 3, 0)
|
||||
TEST (0, 0, 0, 1)
|
||||
TEST (1, 0, 0, 1)
|
||||
TEST (2, 0, 0, 1)
|
||||
TEST (3, 0, 0, 1)
|
||||
TEST (0, 1, 0, 1)
|
||||
TEST (1, 1, 0, 1)
|
||||
TEST (2, 1, 0, 1)
|
||||
TEST (3, 1, 0, 1)
|
||||
TEST (0, 2, 0, 1)
|
||||
TEST (1, 2, 0, 1)
|
||||
TEST (2, 2, 0, 1)
|
||||
TEST (3, 2, 0, 1)
|
||||
TEST (0, 3, 0, 1)
|
||||
TEST (1, 3, 0, 1)
|
||||
TEST (2, 3, 0, 1)
|
||||
TEST (3, 3, 0, 1)
|
||||
TEST (0, 0, 1, 1)
|
||||
TEST (1, 0, 1, 1)
|
||||
TEST (2, 0, 1, 1)
|
||||
TEST (3, 0, 1, 1)
|
||||
TEST (0, 1, 1, 1)
|
||||
TEST (1, 1, 1, 1)
|
||||
TEST (2, 1, 1, 1)
|
||||
TEST (3, 1, 1, 1)
|
||||
TEST (0, 2, 1, 1)
|
||||
TEST (1, 2, 1, 1)
|
||||
TEST (2, 2, 1, 1)
|
||||
TEST (3, 2, 1, 1)
|
||||
TEST (0, 3, 1, 1)
|
||||
TEST (1, 3, 1, 1)
|
||||
TEST (2, 3, 1, 1)
|
||||
TEST (3, 3, 1, 1)
|
||||
TEST (0, 0, 2, 1)
|
||||
TEST (1, 0, 2, 1)
|
||||
TEST (2, 0, 2, 1)
|
||||
TEST (3, 0, 2, 1)
|
||||
TEST (0, 1, 2, 1)
|
||||
TEST (1, 1, 2, 1)
|
||||
TEST (2, 1, 2, 1)
|
||||
TEST (3, 1, 2, 1)
|
||||
TEST (0, 2, 2, 1)
|
||||
TEST (1, 2, 2, 1)
|
||||
TEST (2, 2, 2, 1)
|
||||
TEST (3, 2, 2, 1)
|
||||
TEST (0, 3, 2, 1)
|
||||
TEST (1, 3, 2, 1)
|
||||
TEST (2, 3, 2, 1)
|
||||
TEST (3, 3, 2, 1)
|
||||
TEST (0, 0, 3, 1)
|
||||
TEST (1, 0, 3, 1)
|
||||
TEST (2, 0, 3, 1)
|
||||
TEST (3, 0, 3, 1)
|
||||
TEST (0, 1, 3, 1)
|
||||
TEST (1, 1, 3, 1)
|
||||
TEST (2, 1, 3, 1)
|
||||
TEST (3, 1, 3, 1)
|
||||
TEST (0, 2, 3, 1)
|
||||
TEST (1, 2, 3, 1)
|
||||
TEST (2, 2, 3, 1)
|
||||
TEST (3, 2, 3, 1)
|
||||
TEST (0, 3, 3, 1)
|
||||
TEST (1, 3, 3, 1)
|
||||
TEST (2, 3, 3, 1)
|
||||
TEST (3, 3, 3, 1)
|
||||
}
|
||||
|
||||
void check1(void)
|
||||
{
|
||||
TEST (0, 0, 0, 2)
|
||||
TEST (1, 0, 0, 2)
|
||||
TEST (2, 0, 0, 2)
|
||||
TEST (3, 0, 0, 2)
|
||||
TEST (0, 1, 0, 2)
|
||||
TEST (1, 1, 0, 2)
|
||||
TEST (2, 1, 0, 2)
|
||||
TEST (3, 1, 0, 2)
|
||||
TEST (0, 2, 0, 2)
|
||||
TEST (1, 2, 0, 2)
|
||||
TEST (2, 2, 0, 2)
|
||||
TEST (3, 2, 0, 2)
|
||||
TEST (0, 3, 0, 2)
|
||||
TEST (1, 3, 0, 2)
|
||||
TEST (2, 3, 0, 2)
|
||||
TEST (3, 3, 0, 2)
|
||||
TEST (0, 0, 1, 2)
|
||||
TEST (1, 0, 1, 2)
|
||||
TEST (2, 0, 1, 2)
|
||||
TEST (3, 0, 1, 2)
|
||||
TEST (0, 1, 1, 2)
|
||||
TEST (1, 1, 1, 2)
|
||||
TEST (2, 1, 1, 2)
|
||||
TEST (3, 1, 1, 2)
|
||||
TEST (0, 2, 1, 2)
|
||||
TEST (1, 2, 1, 2)
|
||||
TEST (2, 2, 1, 2)
|
||||
TEST (3, 2, 1, 2)
|
||||
TEST (0, 3, 1, 2)
|
||||
TEST (1, 3, 1, 2)
|
||||
TEST (2, 3, 1, 2)
|
||||
TEST (3, 3, 1, 2)
|
||||
TEST (0, 0, 2, 2)
|
||||
TEST (1, 0, 2, 2)
|
||||
TEST (2, 0, 2, 2)
|
||||
TEST (3, 0, 2, 2)
|
||||
TEST (0, 1, 2, 2)
|
||||
TEST (1, 1, 2, 2)
|
||||
TEST (2, 1, 2, 2)
|
||||
TEST (3, 1, 2, 2)
|
||||
TEST (0, 2, 2, 2)
|
||||
TEST (1, 2, 2, 2)
|
||||
TEST (2, 2, 2, 2)
|
||||
TEST (3, 2, 2, 2)
|
||||
TEST (0, 3, 2, 2)
|
||||
TEST (1, 3, 2, 2)
|
||||
TEST (2, 3, 2, 2)
|
||||
TEST (3, 3, 2, 2)
|
||||
TEST (0, 0, 3, 2)
|
||||
TEST (1, 0, 3, 2)
|
||||
TEST (2, 0, 3, 2)
|
||||
TEST (3, 0, 3, 2)
|
||||
TEST (0, 1, 3, 2)
|
||||
TEST (1, 1, 3, 2)
|
||||
TEST (2, 1, 3, 2)
|
||||
TEST (3, 1, 3, 2)
|
||||
TEST (0, 2, 3, 2)
|
||||
TEST (1, 2, 3, 2)
|
||||
TEST (2, 2, 3, 2)
|
||||
TEST (3, 2, 3, 2)
|
||||
TEST (0, 3, 3, 2)
|
||||
TEST (1, 3, 3, 2)
|
||||
TEST (2, 3, 3, 2)
|
||||
TEST (3, 3, 3, 2)
|
||||
TEST (0, 0, 0, 3)
|
||||
TEST (1, 0, 0, 3)
|
||||
TEST (2, 0, 0, 3)
|
||||
TEST (3, 0, 0, 3)
|
||||
TEST (0, 1, 0, 3)
|
||||
TEST (1, 1, 0, 3)
|
||||
TEST (2, 1, 0, 3)
|
||||
TEST (3, 1, 0, 3)
|
||||
TEST (0, 2, 0, 3)
|
||||
TEST (1, 2, 0, 3)
|
||||
TEST (2, 2, 0, 3)
|
||||
TEST (3, 2, 0, 3)
|
||||
TEST (0, 3, 0, 3)
|
||||
TEST (1, 3, 0, 3)
|
||||
TEST (2, 3, 0, 3)
|
||||
TEST (3, 3, 0, 3)
|
||||
TEST (0, 0, 1, 3)
|
||||
TEST (1, 0, 1, 3)
|
||||
TEST (2, 0, 1, 3)
|
||||
TEST (3, 0, 1, 3)
|
||||
TEST (0, 1, 1, 3)
|
||||
TEST (1, 1, 1, 3)
|
||||
TEST (2, 1, 1, 3)
|
||||
TEST (3, 1, 1, 3)
|
||||
TEST (0, 2, 1, 3)
|
||||
TEST (1, 2, 1, 3)
|
||||
TEST (2, 2, 1, 3)
|
||||
TEST (3, 2, 1, 3)
|
||||
TEST (0, 3, 1, 3)
|
||||
TEST (1, 3, 1, 3)
|
||||
TEST (2, 3, 1, 3)
|
||||
TEST (3, 3, 1, 3)
|
||||
TEST (0, 0, 2, 3)
|
||||
TEST (1, 0, 2, 3)
|
||||
TEST (2, 0, 2, 3)
|
||||
TEST (3, 0, 2, 3)
|
||||
TEST (0, 1, 2, 3)
|
||||
TEST (1, 1, 2, 3)
|
||||
TEST (2, 1, 2, 3)
|
||||
TEST (3, 1, 2, 3)
|
||||
TEST (0, 2, 2, 3)
|
||||
TEST (1, 2, 2, 3)
|
||||
TEST (2, 2, 2, 3)
|
||||
TEST (3, 2, 2, 3)
|
||||
TEST (0, 3, 2, 3)
|
||||
TEST (1, 3, 2, 3)
|
||||
TEST (2, 3, 2, 3)
|
||||
TEST (3, 3, 2, 3)
|
||||
TEST (0, 0, 3, 3)
|
||||
TEST (1, 0, 3, 3)
|
||||
TEST (2, 0, 3, 3)
|
||||
TEST (3, 0, 3, 3)
|
||||
TEST (0, 1, 3, 3)
|
||||
TEST (1, 1, 3, 3)
|
||||
TEST (2, 1, 3, 3)
|
||||
TEST (3, 1, 3, 3)
|
||||
TEST (0, 2, 3, 3)
|
||||
TEST (1, 2, 3, 3)
|
||||
TEST (2, 2, 3, 3)
|
||||
TEST (3, 2, 3, 3)
|
||||
TEST (0, 3, 3, 3)
|
||||
TEST (1, 3, 3, 3)
|
||||
TEST (2, 3, 3, 3)
|
||||
TEST (3, 3, 3, 3)
|
||||
}
|
||||
|
||||
void check(void)
|
||||
{
|
||||
check0 ();
|
||||
check1 ();
|
||||
}
|
||||
|
4262
gcc/testsuite/gcc.target/i386/vperm-4-2.inc
Normal file
4262
gcc/testsuite/gcc.target/i386/vperm-4-2.inc
Normal file
File diff suppressed because it is too large
Load diff
34
gcc/testsuite/gcc.target/i386/vperm-v2df.c
Normal file
34
gcc/testsuite/gcc.target/i386/vperm-v2df.c
Normal file
|
@ -0,0 +1,34 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-options "-O -msse2" } */
|
||||
|
||||
#include "isa-check.h"
|
||||
|
||||
typedef double S;
|
||||
typedef double V __attribute__((vector_size(16)));
|
||||
typedef long long IV __attribute__((vector_size(16)));
|
||||
typedef union { S s[2]; V v; } U;
|
||||
|
||||
static U i[2], b, c;
|
||||
|
||||
extern int memcmp (const void *, const void *, __SIZE_TYPE__);
|
||||
#define assert(T) ((T) || (__builtin_trap (), 0))
|
||||
|
||||
#define TEST(E0, E1) \
|
||||
b.v = __builtin_ia32_vec_perm_v2df (i[0].v, i[1].v, (IV){E0, E1}); \
|
||||
c.s[0] = i[0].s[E0]; \
|
||||
c.s[1] = i[0].s[E1]; \
|
||||
__asm__("" : : : "memory"); \
|
||||
assert (memcmp (&b, &c, sizeof(c)) == 0);
|
||||
|
||||
#include "vperm-2-2.inc"
|
||||
|
||||
int main()
|
||||
{
|
||||
i[0].s[0] = 0;
|
||||
i[0].s[1] = 1;
|
||||
i[0].s[2] = 2;
|
||||
i[0].s[3] = 3;
|
||||
|
||||
check();
|
||||
return 0;
|
||||
}
|
34
gcc/testsuite/gcc.target/i386/vperm-v2di.c
Normal file
34
gcc/testsuite/gcc.target/i386/vperm-v2di.c
Normal file
|
@ -0,0 +1,34 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-options "-O -msse2" } */
|
||||
|
||||
#include "isa-check.h"
|
||||
|
||||
typedef long long S;
|
||||
typedef long long V __attribute__((vector_size(16)));
|
||||
typedef long long IV __attribute__((vector_size(16)));
|
||||
typedef union { S s[2]; V v; } U;
|
||||
|
||||
static U i[2], b, c;
|
||||
|
||||
extern int memcmp (const void *, const void *, __SIZE_TYPE__);
|
||||
#define assert(T) ((T) || (__builtin_trap (), 0))
|
||||
|
||||
#define TEST(E0, E1) \
|
||||
b.v = __builtin_ia32_vec_perm_v2di (i[0].v, i[1].v, (IV){E0, E1}); \
|
||||
c.s[0] = i[0].s[E0]; \
|
||||
c.s[1] = i[0].s[E1]; \
|
||||
__asm__("" : : : "memory"); \
|
||||
assert (memcmp (&b, &c, sizeof(c)) == 0);
|
||||
|
||||
#include "vperm-2-2.inc"
|
||||
|
||||
int main()
|
||||
{
|
||||
i[0].s[0] = 0;
|
||||
i[0].s[1] = 1;
|
||||
i[0].s[2] = 2;
|
||||
i[0].s[3] = 3;
|
||||
|
||||
check();
|
||||
return 0;
|
||||
}
|
40
gcc/testsuite/gcc.target/i386/vperm-v4sf-1.c
Normal file
40
gcc/testsuite/gcc.target/i386/vperm-v4sf-1.c
Normal file
|
@ -0,0 +1,40 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-options "-O -msse" } */
|
||||
|
||||
#include "isa-check.h"
|
||||
|
||||
typedef float S;
|
||||
typedef float V __attribute__((vector_size(16)));
|
||||
typedef int IV __attribute__((vector_size(16)));
|
||||
typedef union { S s[4]; V v; } U;
|
||||
|
||||
static U i[2], b, c;
|
||||
|
||||
extern int memcmp (const void *, const void *, __SIZE_TYPE__);
|
||||
#define assert(T) ((T) || (__builtin_trap (), 0))
|
||||
|
||||
#define TEST(E0, E1, E2, E3) \
|
||||
b.v = __builtin_ia32_vec_perm_v4sf (i[0].v, i[1].v, (IV){E0, E1, E2, E3}); \
|
||||
c.s[0] = i[0].s[E0]; \
|
||||
c.s[1] = i[0].s[E1]; \
|
||||
c.s[2] = i[0].s[E2]; \
|
||||
c.s[3] = i[0].s[E3]; \
|
||||
__asm__("" : : : "memory"); \
|
||||
assert (memcmp (&b, &c, sizeof(c)) == 0);
|
||||
|
||||
#include "vperm-4-1.inc"
|
||||
|
||||
int main()
|
||||
{
|
||||
i[0].s[0] = 0;
|
||||
i[0].s[1] = 1;
|
||||
i[0].s[2] = 2;
|
||||
i[0].s[3] = 3;
|
||||
i[0].s[4] = 4;
|
||||
i[0].s[5] = 5;
|
||||
i[0].s[6] = 6;
|
||||
i[0].s[7] = 7;
|
||||
|
||||
check();
|
||||
return 0;
|
||||
}
|
40
gcc/testsuite/gcc.target/i386/vperm-v4sf-2.c
Normal file
40
gcc/testsuite/gcc.target/i386/vperm-v4sf-2.c
Normal file
|
@ -0,0 +1,40 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-options "-O -mssse3" } */
|
||||
|
||||
#include "isa-check.h"
|
||||
|
||||
typedef float S;
|
||||
typedef float V __attribute__((vector_size(16)));
|
||||
typedef int IV __attribute__((vector_size(16)));
|
||||
typedef union { S s[4]; V v; } U;
|
||||
|
||||
static U i[2], b, c;
|
||||
|
||||
extern int memcmp (const void *, const void *, __SIZE_TYPE__);
|
||||
#define assert(T) ((T) || (__builtin_trap (), 0))
|
||||
|
||||
#define TEST(E0, E1, E2, E3) \
|
||||
b.v = __builtin_ia32_vec_perm_v4sf (i[0].v, i[1].v, (IV){E0, E1, E2, E3}); \
|
||||
c.s[0] = i[0].s[E0]; \
|
||||
c.s[1] = i[0].s[E1]; \
|
||||
c.s[2] = i[0].s[E2]; \
|
||||
c.s[3] = i[0].s[E3]; \
|
||||
__asm__("" : : : "memory"); \
|
||||
assert (memcmp (&b, &c, sizeof(c)) == 0);
|
||||
|
||||
#include "vperm-4-2.inc"
|
||||
|
||||
int main()
|
||||
{
|
||||
i[0].s[0] = 0;
|
||||
i[0].s[1] = 1;
|
||||
i[0].s[2] = 2;
|
||||
i[0].s[3] = 3;
|
||||
i[0].s[4] = 4;
|
||||
i[0].s[5] = 5;
|
||||
i[0].s[6] = 6;
|
||||
i[0].s[7] = 7;
|
||||
|
||||
check();
|
||||
return 0;
|
||||
}
|
40
gcc/testsuite/gcc.target/i386/vperm-v4si-1.c
Normal file
40
gcc/testsuite/gcc.target/i386/vperm-v4si-1.c
Normal file
|
@ -0,0 +1,40 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-options "-O -msse2" } */
|
||||
|
||||
#include "isa-check.h"
|
||||
|
||||
typedef int S;
|
||||
typedef int V __attribute__((vector_size(16)));
|
||||
typedef int IV __attribute__((vector_size(16)));
|
||||
typedef union { S s[4]; V v; } U;
|
||||
|
||||
static U i[2], b, c;
|
||||
|
||||
extern int memcmp (const void *, const void *, __SIZE_TYPE__);
|
||||
#define assert(T) ((T) || (__builtin_trap (), 0))
|
||||
|
||||
#define TEST(E0, E1, E2, E3) \
|
||||
b.v = __builtin_ia32_vec_perm_v4si (i[0].v, i[1].v, (IV){E0, E1, E2, E3}); \
|
||||
c.s[0] = i[0].s[E0]; \
|
||||
c.s[1] = i[0].s[E1]; \
|
||||
c.s[2] = i[0].s[E2]; \
|
||||
c.s[3] = i[0].s[E3]; \
|
||||
__asm__("" : : : "memory"); \
|
||||
assert (memcmp (&b, &c, sizeof(c)) == 0);
|
||||
|
||||
#include "vperm-4-1.inc"
|
||||
|
||||
int main()
|
||||
{
|
||||
i[0].s[0] = 0;
|
||||
i[0].s[1] = 1;
|
||||
i[0].s[2] = 2;
|
||||
i[0].s[3] = 3;
|
||||
i[0].s[4] = 4;
|
||||
i[0].s[5] = 5;
|
||||
i[0].s[6] = 6;
|
||||
i[0].s[7] = 7;
|
||||
|
||||
check();
|
||||
return 0;
|
||||
}
|
40
gcc/testsuite/gcc.target/i386/vperm-v4si-2.c
Normal file
40
gcc/testsuite/gcc.target/i386/vperm-v4si-2.c
Normal file
|
@ -0,0 +1,40 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-options "-O -mssse3" } */
|
||||
|
||||
#include "isa-check.h"
|
||||
|
||||
typedef int S;
|
||||
typedef int V __attribute__((vector_size(16)));
|
||||
typedef int IV __attribute__((vector_size(16)));
|
||||
typedef union { S s[4]; V v; } U;
|
||||
|
||||
static U i[2], b, c;
|
||||
|
||||
extern int memcmp (const void *, const void *, __SIZE_TYPE__);
|
||||
#define assert(T) ((T) || (__builtin_trap (), 0))
|
||||
|
||||
#define TEST(E0, E1, E2, E3) \
|
||||
b.v = __builtin_ia32_vec_perm_v4si (i[0].v, i[1].v, (IV){E0, E1, E2, E3}); \
|
||||
c.s[0] = i[0].s[E0]; \
|
||||
c.s[1] = i[0].s[E1]; \
|
||||
c.s[2] = i[0].s[E2]; \
|
||||
c.s[3] = i[0].s[E3]; \
|
||||
__asm__("" : : : "memory"); \
|
||||
assert (memcmp (&b, &c, sizeof(c)) == 0);
|
||||
|
||||
#include "vperm-4-2.inc"
|
||||
|
||||
int main()
|
||||
{
|
||||
i[0].s[0] = 0;
|
||||
i[0].s[1] = 1;
|
||||
i[0].s[2] = 2;
|
||||
i[0].s[3] = 3;
|
||||
i[0].s[4] = 4;
|
||||
i[0].s[5] = 5;
|
||||
i[0].s[6] = 6;
|
||||
i[0].s[7] = 7;
|
||||
|
||||
check();
|
||||
return 0;
|
||||
}
|
3
gcc/testsuite/gcc.target/i386/vperm-v4si-2x.c
Normal file
3
gcc/testsuite/gcc.target/i386/vperm-v4si-2x.c
Normal file
|
@ -0,0 +1,3 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-options "-O -mxop" } */
|
||||
#include "vperm-v4si-2.c"
|
41
gcc/testsuite/gcc.target/i386/vperm.pl
Executable file
41
gcc/testsuite/gcc.target/i386/vperm.pl
Executable file
|
@ -0,0 +1,41 @@
|
|||
#!/usr/bin/perl
|
||||
|
||||
$nelt = int($ARGV[0]);
|
||||
$leng = int($ARGV[1]);
|
||||
|
||||
print "/* This file auto-generated with ./vperm.pl $nelt $leng. */\n\n";
|
||||
|
||||
for ($i = 0; $i < $nelt; ++$i) { $perm[$i] = 0; }
|
||||
$ncheck = 0;
|
||||
|
||||
for ($i = 0; $i < ($leng * $nelt) ** $nelt; ++$i)
|
||||
{
|
||||
if ($i % 128 == 0)
|
||||
{
|
||||
print "}\n\n" if $ncheck > 0;
|
||||
print "void check$ncheck(void)\n{\n";
|
||||
++$ncheck;
|
||||
}
|
||||
|
||||
print " TEST (";
|
||||
for ($j = 0; $j < $nelt; ++$j)
|
||||
{
|
||||
print $perm[$j];
|
||||
print ", " if $j < $nelt - 1;
|
||||
}
|
||||
print ")\n";
|
||||
|
||||
INCR: for ($j = 0; $j < $nelt; ++$j)
|
||||
{
|
||||
last INCR if ++$perm[$j] < $leng * $nelt;
|
||||
$perm[$j] = 0;
|
||||
}
|
||||
}
|
||||
print "}\n\n";
|
||||
|
||||
print "void check(void)\n{\n";
|
||||
for ($i = 0; $i < $ncheck; ++$i)
|
||||
{
|
||||
print " check$i ();\n";
|
||||
}
|
||||
print "}\n\n";
|
|
@ -2562,7 +2562,9 @@ proc check_effective_target_vect_extract_even_odd { } {
|
|||
verbose "check_effective_target_vect_extract_even_odd: using cached result" 2
|
||||
} else {
|
||||
set et_vect_extract_even_odd_saved 0
|
||||
if { [istarget powerpc*-*-*]
|
||||
if { [istarget powerpc*-*-*]
|
||||
|| [istarget i?86-*-*]
|
||||
|| [istarget x86_64-*-*]
|
||||
|| [istarget spu-*-*] } {
|
||||
set et_vect_extract_even_odd_saved 1
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue