cgraph.h (struct cgraph_simd_clone): Add mask_mode field.

* cgraph.h (struct cgraph_simd_clone): Add mask_mode field.
	* omp-low.c (simd_clone_init_simd_arrays, simd_clone_adjust): Handle
	node->simdclone->mask_mode != VOIDmode masks.
	(simd_clone_adjust_argument_types): Likewise.  Move sc var definition
	earlier, use it instead of node->simdclone.
	* config/i386/i386.c (ix86_simd_clone_compute_vecsize_and_simdlen):
	Set clonei->mask_mode.

	* c-c++-common/attr-simd.c: Add scan-assembler* directives for AVX512F
	clones.
	* c-c++-common/attr-simd-2.c: Likewise.
	* c-c++-common/attr-simd-4.c: Likewise.
	* gcc.dg/gomp/simd-clones-2.c: Likewise.
	* gcc.dg/gomp/simd-clones-3.c: Likewise.

From-SVN: r234816
This commit is contained in:
Jakub Jelinek 2016-04-07 23:32:05 +02:00 committed by Jakub Jelinek
parent e4ebc74d8c
commit 5f490f9f7a
10 changed files with 164 additions and 42 deletions

View file

@ -1,3 +1,13 @@
2016-04-07 Jakub Jelinek <jakub@redhat.com>
* cgraph.h (struct cgraph_simd_clone): Add mask_mode field.
* omp-low.c (simd_clone_init_simd_arrays, simd_clone_adjust): Handle
node->simdclone->mask_mode != VOIDmode masks.
(simd_clone_adjust_argument_types): Likewise. Move sc var definition
earlier, use it instead of node->simdclone.
* config/i386/i386.c (ix86_simd_clone_compute_vecsize_and_simdlen):
Set clonei->mask_mode.
2016-04-06 Patrick Palka <ppalka@gcc.gnu.org>
PR c/70436

View file

@ -766,6 +766,11 @@ struct GTY(()) cgraph_simd_clone {
/* Max hardware vector size in bits for floating point vectors. */
unsigned int vecsize_float;
/* Machine mode of the mask argument(s), if they are to be passed
as bitmasks in integer argument(s). VOIDmode if masks are passed
as vectors of characteristic type. */
machine_mode mask_mode;
/* The mangling character for a given vector size. This is used
to determine the ISA mangling bit as specified in the Intel
Vector ABI. */

View file

@ -53747,7 +53747,7 @@ ix86_memmodel_check (unsigned HOST_WIDE_INT val)
return val;
}
/* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
/* Set CLONEI->vecsize_mangle, CLONEI->mask_mode, CLONEI->vecsize_int,
CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
or number of vecsize_mangle variants that should be emitted. */
@ -53834,6 +53834,7 @@ ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
clonei->vecsize_mangle = "bcde"[num];
ret = 4;
}
clonei->mask_mode = VOIDmode;
switch (clonei->vecsize_mangle)
{
case 'b':
@ -53851,6 +53852,10 @@ ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
case 'e':
clonei->vecsize_int = 512;
clonei->vecsize_float = 512;
if (TYPE_MODE (base_type) == QImode)
clonei->mask_mode = DImode;
else
clonei->mask_mode = SImode;
break;
}
if (clonei->simdlen == 0)

View file

@ -18918,7 +18918,9 @@ simd_clone_adjust_argument_types (struct cgraph_node *node)
adjustments.create (args.length ());
unsigned i, j, veclen;
struct ipa_parm_adjustment adj;
for (i = 0; i < node->simdclone->nargs; ++i)
struct cgraph_simd_clone *sc = node->simdclone;
for (i = 0; i < sc->nargs; ++i)
{
memset (&adj, 0, sizeof (adj));
tree parm = args[i];
@ -18926,10 +18928,10 @@ simd_clone_adjust_argument_types (struct cgraph_node *node)
adj.base_index = i;
adj.base = parm;
node->simdclone->args[i].orig_arg = node->definition ? parm : NULL_TREE;
node->simdclone->args[i].orig_type = parm_type;
sc->args[i].orig_arg = node->definition ? parm : NULL_TREE;
sc->args[i].orig_type = parm_type;
switch (node->simdclone->args[i].arg_type)
switch (sc->args[i].arg_type)
{
default:
/* No adjustment necessary for scalar arguments. */
@ -18938,29 +18940,29 @@ simd_clone_adjust_argument_types (struct cgraph_node *node)
case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
if (node->definition)
node->simdclone->args[i].simd_array
sc->args[i].simd_array
= create_tmp_simd_array (IDENTIFIER_POINTER (DECL_NAME (parm)),
TREE_TYPE (parm_type),
node->simdclone->simdlen);
sc->simdlen);
adj.op = IPA_PARM_OP_COPY;
break;
case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
case SIMD_CLONE_ARG_TYPE_VECTOR:
if (INTEGRAL_TYPE_P (parm_type) || POINTER_TYPE_P (parm_type))
veclen = node->simdclone->vecsize_int;
veclen = sc->vecsize_int;
else
veclen = node->simdclone->vecsize_float;
veclen = sc->vecsize_float;
veclen /= GET_MODE_BITSIZE (TYPE_MODE (parm_type));
if (veclen > node->simdclone->simdlen)
veclen = node->simdclone->simdlen;
if (veclen > sc->simdlen)
veclen = sc->simdlen;
adj.arg_prefix = "simd";
if (POINTER_TYPE_P (parm_type))
adj.type = build_vector_type (pointer_sized_int_node, veclen);
else
adj.type = build_vector_type (parm_type, veclen);
node->simdclone->args[i].vector_type = adj.type;
for (j = veclen; j < node->simdclone->simdlen; j += veclen)
sc->args[i].vector_type = adj.type;
for (j = veclen; j < sc->simdlen; j += veclen)
{
adjustments.safe_push (adj);
if (j == veclen)
@ -18969,23 +18971,21 @@ simd_clone_adjust_argument_types (struct cgraph_node *node)
adj.op = IPA_PARM_OP_NEW;
adj.arg_prefix = "simd";
adj.base_index = i;
adj.type = node->simdclone->args[i].vector_type;
adj.type = sc->args[i].vector_type;
}
}
if (node->definition)
node->simdclone->args[i].simd_array
sc->args[i].simd_array
= create_tmp_simd_array (IDENTIFIER_POINTER (DECL_NAME (parm)),
parm_type, node->simdclone->simdlen);
parm_type, sc->simdlen);
}
adjustments.safe_push (adj);
}
if (node->simdclone->inbranch)
if (sc->inbranch)
{
tree base_type
= simd_clone_compute_base_data_type (node->simdclone->origin,
node->simdclone);
tree base_type = simd_clone_compute_base_data_type (sc->origin, sc);
memset (&adj, 0, sizeof (adj));
adj.op = IPA_PARM_OP_NEW;
@ -18993,31 +18993,41 @@ simd_clone_adjust_argument_types (struct cgraph_node *node)
adj.base_index = i;
if (INTEGRAL_TYPE_P (base_type) || POINTER_TYPE_P (base_type))
veclen = node->simdclone->vecsize_int;
veclen = sc->vecsize_int;
else
veclen = node->simdclone->vecsize_float;
veclen = sc->vecsize_float;
veclen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
if (veclen > node->simdclone->simdlen)
veclen = node->simdclone->simdlen;
if (POINTER_TYPE_P (base_type))
if (veclen > sc->simdlen)
veclen = sc->simdlen;
if (sc->mask_mode != VOIDmode)
adj.type
= lang_hooks.types.type_for_mode (sc->mask_mode, 1);
else if (POINTER_TYPE_P (base_type))
adj.type = build_vector_type (pointer_sized_int_node, veclen);
else
adj.type = build_vector_type (base_type, veclen);
adjustments.safe_push (adj);
for (j = veclen; j < node->simdclone->simdlen; j += veclen)
for (j = veclen; j < sc->simdlen; j += veclen)
adjustments.safe_push (adj);
/* We have previously allocated one extra entry for the mask. Use
it and fill it. */
struct cgraph_simd_clone *sc = node->simdclone;
sc->nargs++;
if (sc->mask_mode != VOIDmode)
base_type = boolean_type_node;
if (node->definition)
{
sc->args[i].orig_arg
= build_decl (UNKNOWN_LOCATION, PARM_DECL, NULL, base_type);
sc->args[i].simd_array
= create_tmp_simd_array ("mask", base_type, sc->simdlen);
if (sc->mask_mode == VOIDmode)
sc->args[i].simd_array
= create_tmp_simd_array ("mask", base_type, sc->simdlen);
else if (veclen < sc->simdlen)
sc->args[i].simd_array
= create_tmp_simd_array ("mask", adj.type, sc->simdlen / veclen);
else
sc->args[i].simd_array = NULL_TREE;
}
sc->args[i].orig_type = base_type;
sc->args[i].arg_type = SIMD_CLONE_ARG_TYPE_MASK;
@ -19085,6 +19095,27 @@ simd_clone_init_simd_arrays (struct cgraph_node *node,
node->simdclone->args[i].vector_arg = arg;
tree array = node->simdclone->args[i].simd_array;
if (node->simdclone->mask_mode != VOIDmode
&& node->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK)
{
if (array == NULL_TREE)
continue;
unsigned int l
= tree_to_uhwi (TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (array))));
for (k = 0; k <= l; k++)
{
if (k)
{
arg = DECL_CHAIN (arg);
j++;
}
tree t = build4 (ARRAY_REF, TREE_TYPE (TREE_TYPE (array)),
array, size_int (k), NULL, NULL);
t = build2 (MODIFY_EXPR, TREE_TYPE (t), t, arg);
gimplify_and_add (t, &seq);
}
continue;
}
if (TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg)) == node->simdclone->simdlen)
{
tree ptype = build_pointer_type (TREE_TYPE (TREE_TYPE (array)));
@ -19455,7 +19486,7 @@ simd_clone_adjust (struct cgraph_node *node)
e->probability = REG_BR_PROB_BASE;
gsi = gsi_last_bb (incr_bb);
gimple *g = gimple_build_assign (iter2, PLUS_EXPR, iter1,
build_int_cst (unsigned_type_node, 1));
build_int_cst (unsigned_type_node, 1));
gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
/* Mostly annotate the loop for the vectorizer (the rest is done below). */
@ -19471,21 +19502,68 @@ simd_clone_adjust (struct cgraph_node *node)
gimple_stmt_iterator gsi = gsi_last_bb (loop->header);
tree mask_array
= node->simdclone->args[node->simdclone->nargs - 1].simd_array;
tree mask = make_ssa_name (TREE_TYPE (TREE_TYPE (mask_array)));
tree aref = build4 (ARRAY_REF,
TREE_TYPE (TREE_TYPE (mask_array)),
mask_array, iter1,
NULL, NULL);
g = gimple_build_assign (mask, aref);
gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
int bitsize = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (aref)));
if (!INTEGRAL_TYPE_P (TREE_TYPE (aref)))
tree mask;
if (node->simdclone->mask_mode != VOIDmode)
{
aref = build1 (VIEW_CONVERT_EXPR,
build_nonstandard_integer_type (bitsize, 0), mask);
mask = make_ssa_name (TREE_TYPE (aref));
tree shift_cnt;
if (mask_array == NULL_TREE)
{
tree arg = node->simdclone->args[node->simdclone->nargs
- 1].vector_arg;
mask = get_or_create_ssa_default_def (cfun, arg);
shift_cnt = iter1;
}
else
{
tree maskt = TREE_TYPE (mask_array);
int c = tree_to_uhwi (TYPE_MAX_VALUE (TYPE_DOMAIN (maskt)));
c = node->simdclone->simdlen / (c + 1);
int s = exact_log2 (c);
gcc_assert (s > 0);
c--;
tree idx = make_ssa_name (TREE_TYPE (iter1));
g = gimple_build_assign (idx, RSHIFT_EXPR, iter1,
build_int_cst (NULL_TREE, s));
gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
mask = make_ssa_name (TREE_TYPE (TREE_TYPE (mask_array)));
tree aref = build4 (ARRAY_REF,
TREE_TYPE (TREE_TYPE (mask_array)),
mask_array, idx, NULL, NULL);
g = gimple_build_assign (mask, aref);
gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
shift_cnt = make_ssa_name (TREE_TYPE (iter1));
g = gimple_build_assign (shift_cnt, BIT_AND_EXPR, iter1,
build_int_cst (TREE_TYPE (iter1), c));
gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
}
g = gimple_build_assign (make_ssa_name (TREE_TYPE (mask)),
RSHIFT_EXPR, mask, shift_cnt);
gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
mask = gimple_assign_lhs (g);
g = gimple_build_assign (make_ssa_name (TREE_TYPE (mask)),
BIT_AND_EXPR, mask,
build_int_cst (TREE_TYPE (mask), 1));
gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
mask = gimple_assign_lhs (g);
}
else
{
mask = make_ssa_name (TREE_TYPE (TREE_TYPE (mask_array)));
tree aref = build4 (ARRAY_REF,
TREE_TYPE (TREE_TYPE (mask_array)),
mask_array, iter1, NULL, NULL);
g = gimple_build_assign (mask, aref);
gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
int bitsize = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (aref)));
if (!INTEGRAL_TYPE_P (TREE_TYPE (aref)))
{
aref = build1 (VIEW_CONVERT_EXPR,
build_nonstandard_integer_type (bitsize, 0),
mask);
mask = make_ssa_name (TREE_TYPE (aref));
g = gimple_build_assign (mask, aref);
gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
}
}
g = gimple_build_cond (EQ_EXPR, mask, build_zero_cst (TREE_TYPE (mask)),

View file

@ -1,3 +1,12 @@
2016-04-07 Jakub Jelinek <jakub@redhat.com>
* c-c++-common/attr-simd.c: Add scan-assembler* directives for AVX512F
clones.
* c-c++-common/attr-simd-2.c: Likewise.
* c-c++-common/attr-simd-4.c: Likewise.
* gcc.dg/gomp/simd-clones-2.c: Likewise.
* gcc.dg/gomp/simd-clones-3.c: Likewise.
2016-04-07 Thomas Preud'homme <thomas.preudhomme@arm.com>
PR testsuite/70553

View file

@ -19,3 +19,5 @@ int simd_attr (void)
/* { dg-final { scan-assembler-times "_ZGVcM4_simd_attr:" 1 { target { i?86-*-* x86_64-*-* } } } } */
/* { dg-final { scan-assembler-times "_ZGVdN8_simd_attr:" 1 { target { i?86-*-* x86_64-*-* } } } } */
/* { dg-final { scan-assembler-times "_ZGVdM8_simd_attr:" 1 { target { i?86-*-* x86_64-*-* } } } } */
/* { dg-final { scan-assembler-times "_ZGVeN16_simd_attr:" 1 { target { i?86-*-* x86_64-*-* } } } } */
/* { dg-final { scan-assembler-times "_ZGVeM16_simd_attr:" 1 { target { i?86-*-* x86_64-*-* } } } } */

View file

@ -15,9 +15,11 @@ int simd_attr (void)
/* { dg-final { scan-assembler-times "_ZGVbN4_simd_attr:" 1 { target { i?86-*-* x86_64-*-* } } } } */
/* { dg-final { scan-assembler-times "_ZGVcN4_simd_attr:" 1 { target { i?86-*-* x86_64-*-* } } } } */
/* { dg-final { scan-assembler-times "_ZGVdN8_simd_attr:" 1 { target { i?86-*-* x86_64-*-* } } } } */
/* { dg-final { scan-assembler-times "_ZGVeN16_simd_attr:" 1 { target { i?86-*-* x86_64-*-* } } } } */
/* { dg-final { scan-assembler-not "_ZGVbM4_simd_attr:" { target { i?86-*-* x86_64-*-* } } } } */
/* { dg-final { scan-assembler-not "_ZGVcM4_simd_attr:" { target { i?86-*-* x86_64-*-* } } } } */
/* { dg-final { scan-assembler-not "_ZGVdM8_simd_attr:" { target { i?86-*-* x86_64-*-* } } } } */
/* { dg-final { scan-assembler-not "_ZGVeM16_simd_attr:" { target { i?86-*-* x86_64-*-* } } } } */
extern
#ifdef __cplusplus
@ -33,6 +35,8 @@ int simd_attr2 (void)
/* { dg-final { scan-assembler-not "_ZGVbN4_simd_attr2:" { target { i?86-*-* x86_64-*-* } } } } */
/* { dg-final { scan-assembler-not "_ZGVcN4_simd_attr2:" { target { i?86-*-* x86_64-*-* } } } } */
/* { dg-final { scan-assembler-not "_ZGVdN8_simd_attr2:" { target { i?86-*-* x86_64-*-* } } } } */
/* { dg-final { scan-assembler-not "_ZGVeN16_simd_attr2:" { target { i?86-*-* x86_64-*-* } } } } */
/* { dg-final { scan-assembler-times "_ZGVbM4_simd_attr2:" 1 { target { i?86-*-* x86_64-*-* } } } } */
/* { dg-final { scan-assembler-times "_ZGVcM4_simd_attr2:" 1 { target { i?86-*-* x86_64-*-* } } } } */
/* { dg-final { scan-assembler-times "_ZGVdM8_simd_attr2:" 1 { target { i?86-*-* x86_64-*-* } } } } */
/* { dg-final { scan-assembler-times "_ZGVeM16_simd_attr2:" 1 { target { i?86-*-* x86_64-*-* } } } } */

View file

@ -18,6 +18,8 @@ int simd_attr (void)
/* { dg-final { scan-assembler-times "_ZGVcM4_simd_attr:" 1 { target { i?86-*-* x86_64-*-* } } } } */
/* { dg-final { scan-assembler-times "_ZGVdN8_simd_attr:" 1 { target { i?86-*-* x86_64-*-* } } } } */
/* { dg-final { scan-assembler-times "_ZGVdM8_simd_attr:" 1 { target { i?86-*-* x86_64-*-* } } } } */
/* { dg-final { scan-assembler-times "_ZGVeN16_simd_attr:" 1 { target { i?86-*-* x86_64-*-* } } } } */
/* { dg-final { scan-assembler-times "_ZGVeM16_simd_attr:" 1 { target { i?86-*-* x86_64-*-* } } } } */
extern
#ifdef __cplusplus
@ -36,3 +38,5 @@ int simd_attr2 (void)
/* { dg-final { scan-assembler-times "_ZGVcM4_simd_attr2:" 1 { target { i?86-*-* x86_64-*-* } } } } */
/* { dg-final { scan-assembler-times "_ZGVdN8_simd_attr2:" 1 { target { i?86-*-* x86_64-*-* } } } } */
/* { dg-final { scan-assembler-times "_ZGVdM8_simd_attr2:" 1 { target { i?86-*-* x86_64-*-* } } } } */
/* { dg-final { scan-assembler-times "_ZGVeN16_simd_attr2:" 1 { target { i?86-*-* x86_64-*-* } } } } */
/* { dg-final { scan-assembler-times "_ZGVeM16_simd_attr2:" 1 { target { i?86-*-* x86_64-*-* } } } } */

View file

@ -23,3 +23,6 @@ float setArray(float *a, float x, int k)
/* { dg-final { scan-tree-dump "_ZGVdN8ua32vl_setArray" "optimized" { target i?86-*-* x86_64-*-* } } } */
/* { dg-final { scan-tree-dump "_ZGVdN8vvva32_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */
/* { dg-final { scan-tree-dump "_ZGVdM8vl66u_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */
/* { dg-final { scan-tree-dump "_ZGVeN16ua32vl_setArray" "optimized" { target i?86-*-* x86_64-*-* } } } */
/* { dg-final { scan-tree-dump "_ZGVeN16vvva32_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */
/* { dg-final { scan-tree-dump "_ZGVeM16vl66u_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */

View file

@ -15,3 +15,5 @@ int addit(int a, int b, int c)
/* { dg-final { scan-tree-dump "_ZGVcM4vvv_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */
/* { dg-final { scan-tree-dump "_ZGVdN8vvv_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */
/* { dg-final { scan-tree-dump "_ZGVdM8vvv_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */
/* { dg-final { scan-tree-dump "_ZGVeN16vvv_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */
/* { dg-final { scan-tree-dump "_ZGVeM16vvv_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */