arm-protos.h (struct cpu_vec_costs): New struct type.

2013-02-05  Christophe Lyon <christophe.lyon@linaro.org>

	* config/arm/arm-protos.h (struct cpu_vec_costs): New struct type.
	(struct tune_params): Add vec_costs field.
	* config/arm/arm.c (arm_builtin_vectorization_cost)
	(arm_add_stmt_cost): New functions.
	(TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST)
	(TARGET_VECTORIZE_ADD_STMT_COST): Define.
	(arm_default_vec_cost): New struct of type cpu_vec_costs.
	(arm_slowmul_tune, arm_fastmul_tune, arm_strongarm_tune)
	(arm_xscale_tune, arm_9e_tune, arm_v6t2_tune, arm_cortex_tune)
	(arm_cortex_a15_tune, arm_cortex_a5_tune, arm_cortex_a9_tune)
	(arm_v6m_tune, arm_fa726te_tune): Define new vec_costs field.

From-SVN: r195977
This commit is contained in:
Christophe Lyon 2013-02-12 14:53:45 +00:00 committed by Christophe Lyon
parent 43320568b2
commit 2597da229a
3 changed files with 170 additions and 0 deletions

View file

@ -1,3 +1,17 @@
2013-02-12 Christophe Lyon <christophe.lyon@linaro.org>
* config/arm/arm-protos.h (struct cpu_vec_costs): New struct type.
(struct tune_params): Add vec_costs field.
* config/arm/arm.c (arm_builtin_vectorization_cost)
(arm_add_stmt_cost): New functions.
(TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST)
(TARGET_VECTORIZE_ADD_STMT_COST): Define.
(arm_default_vec_cost): New struct of type cpu_vec_costs.
(arm_slowmul_tune, arm_fastmul_tune, arm_strongarm_tune)
(arm_xscale_tune, arm_9e_tune, arm_v6t2_tune, arm_cortex_tune)
(arm_cortex_a15_tune, arm_cortex_a5_tune, arm_cortex_a9_tune)
(arm_v6m_tune, arm_fa726te_tune): Define new vec_costs field.
2013-02-12 Richard Biener <rguenther@suse.de>
PR lto/56295

View file

@ -224,6 +224,27 @@ extern const char *arm_mangle_type (const_tree);
extern void arm_order_regs_for_local_alloc (void);
/* Vectorizer cost model implementation. */
struct cpu_vec_costs {
const int scalar_stmt_cost; /* Cost of any scalar operation, excluding
load and store. */
const int scalar_load_cost; /* Cost of scalar load. */
const int scalar_store_cost; /* Cost of scalar store. */
const int vec_stmt_cost; /* Cost of any vector operation, excluding
load, store, vector-to-scalar and
scalar-to-vector operation. */
const int vec_to_scalar_cost; /* Cost of vect-to-scalar operation. */
const int scalar_to_vec_cost; /* Cost of scalar-to-vector operation. */
const int vec_align_load_cost; /* Cost of aligned vector load. */
const int vec_unalign_load_cost; /* Cost of unaligned vector load. */
const int vec_unalign_store_cost; /* Cost of unaligned vector load. */
const int vec_store_cost; /* Cost of vector store. */
const int cond_taken_branch_cost; /* Cost of taken branch for vectorizer
cost model. */
const int cond_not_taken_branch_cost;/* Cost of not taken branch for
vectorizer cost model. */
};
#ifdef RTX_CODE
/* This needs to be here because we need RTX_CODE and similar. */
@ -246,6 +267,8 @@ struct tune_params
performance. The first element covers Thumb state and the second one
is for ARM state. */
bool logical_op_non_short_circuit[2];
/* Vectorizer costs. */
const struct cpu_vec_costs* vec_costs;
};
extern const struct tune_params *current_tune;

View file

@ -268,6 +268,16 @@ static int arm_cortex_a5_branch_cost (bool, bool);
static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
const unsigned char *sel);
static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
tree vectype,
int misalign ATTRIBUTE_UNUSED);
static unsigned arm_add_stmt_cost (void *data, int count,
enum vect_cost_for_stmt kind,
struct _stmt_vec_info *stmt_info,
int misalign,
enum vect_cost_model_location where);
static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
bool op0_preserve_value);
@ -629,6 +639,12 @@ static const struct attribute_spec arm_attribute_table[] =
#define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
arm_vectorize_vec_perm_const_ok
#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
arm_builtin_vectorization_cost
#undef TARGET_VECTORIZE_ADD_STMT_COST
#define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
#undef TARGET_CANONICALIZE_COMPARISON
#define TARGET_CANONICALIZE_COMPARISON \
arm_canonicalize_comparison
@ -891,6 +907,23 @@ struct processors
l1_size, \
l1_line_size
/* arm generic vectorizer costs. */
static const
struct cpu_vec_costs arm_default_vec_cost = {
1, /* scalar_stmt_cost. */
1, /* scalar load_cost. */
1, /* scalar_store_cost. */
1, /* vec_stmt_cost. */
1, /* vec_to_scalar_cost. */
1, /* scalar_to_vec_cost. */
1, /* vec_align_load_cost. */
1, /* vec_unalign_load_cost. */
1, /* vec_unalign_store_cost. */
1, /* vec_store_cost. */
3, /* cond_taken_branch_cost. */
1, /* cond_not_taken_branch_cost. */
};
const struct tune_params arm_slowmul_tune =
{
arm_slowmul_rtx_costs,
@ -902,6 +935,7 @@ const struct tune_params arm_slowmul_tune =
arm_default_branch_cost,
false, /* Prefer LDRD/STRD. */
{true, true}, /* Prefer non short circuit. */
&arm_default_vec_cost, /* Vectorizer costs. */
};
const struct tune_params arm_fastmul_tune =
@ -915,6 +949,7 @@ const struct tune_params arm_fastmul_tune =
arm_default_branch_cost,
false, /* Prefer LDRD/STRD. */
{true, true}, /* Prefer non short circuit. */
&arm_default_vec_cost, /* Vectorizer costs. */
};
/* StrongARM has early execution of branches, so a sequence that is worth
@ -931,6 +966,7 @@ const struct tune_params arm_strongarm_tune =
arm_default_branch_cost,
false, /* Prefer LDRD/STRD. */
{true, true}, /* Prefer non short circuit. */
&arm_default_vec_cost, /* Vectorizer costs. */
};
const struct tune_params arm_xscale_tune =
@ -944,6 +980,7 @@ const struct tune_params arm_xscale_tune =
arm_default_branch_cost,
false, /* Prefer LDRD/STRD. */
{true, true}, /* Prefer non short circuit. */
&arm_default_vec_cost, /* Vectorizer costs. */
};
const struct tune_params arm_9e_tune =
@ -957,6 +994,7 @@ const struct tune_params arm_9e_tune =
arm_default_branch_cost,
false, /* Prefer LDRD/STRD. */
{true, true}, /* Prefer non short circuit. */
&arm_default_vec_cost, /* Vectorizer costs. */
};
const struct tune_params arm_v6t2_tune =
@ -970,6 +1008,7 @@ const struct tune_params arm_v6t2_tune =
arm_default_branch_cost,
false, /* Prefer LDRD/STRD. */
{true, true}, /* Prefer non short circuit. */
&arm_default_vec_cost, /* Vectorizer costs. */
};
/* Generic Cortex tuning. Use more specific tunings if appropriate. */
@ -984,6 +1023,7 @@ const struct tune_params arm_cortex_tune =
arm_default_branch_cost,
false, /* Prefer LDRD/STRD. */
{true, true}, /* Prefer non short circuit. */
&arm_default_vec_cost, /* Vectorizer costs. */
};
const struct tune_params arm_cortex_a15_tune =
@ -997,6 +1037,7 @@ const struct tune_params arm_cortex_a15_tune =
arm_default_branch_cost,
true, /* Prefer LDRD/STRD. */
{true, true}, /* Prefer non short circuit. */
&arm_default_vec_cost, /* Vectorizer costs. */
};
/* Branches can be dual-issued on Cortex-A5, so conditional execution is
@ -1013,6 +1054,7 @@ const struct tune_params arm_cortex_a5_tune =
arm_cortex_a5_branch_cost,
false, /* Prefer LDRD/STRD. */
{false, false}, /* Prefer non short circuit. */
&arm_default_vec_cost, /* Vectorizer costs. */
};
const struct tune_params arm_cortex_a9_tune =
@ -1026,6 +1068,7 @@ const struct tune_params arm_cortex_a9_tune =
arm_default_branch_cost,
false, /* Prefer LDRD/STRD. */
{true, true}, /* Prefer non short circuit. */
&arm_default_vec_cost, /* Vectorizer costs. */
};
/* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
@ -1041,6 +1084,7 @@ const struct tune_params arm_v6m_tune =
arm_default_branch_cost,
false, /* Prefer LDRD/STRD. */
{false, false}, /* Prefer non short circuit. */
&arm_default_vec_cost, /* Vectorizer costs. */
};
const struct tune_params arm_fa726te_tune =
@ -1054,6 +1098,7 @@ const struct tune_params arm_fa726te_tune =
arm_default_branch_cost,
false, /* Prefer LDRD/STRD. */
{true, true}, /* Prefer non short circuit. */
&arm_default_vec_cost, /* Vectorizer costs. */
};
@ -8696,6 +8741,94 @@ arm_memory_move_cost (enum machine_mode mode, reg_class_t rclass,
}
}
/* Vectorizer cost model implementation. */
/* Implement targetm.vectorize.builtin_vectorization_cost. */
static int
arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
tree vectype,
int misalign ATTRIBUTE_UNUSED)
{
unsigned elements;
switch (type_of_cost)
{
case scalar_stmt:
return current_tune->vec_costs->scalar_stmt_cost;
case scalar_load:
return current_tune->vec_costs->scalar_load_cost;
case scalar_store:
return current_tune->vec_costs->scalar_store_cost;
case vector_stmt:
return current_tune->vec_costs->vec_stmt_cost;
case vector_load:
return current_tune->vec_costs->vec_align_load_cost;
case vector_store:
return current_tune->vec_costs->vec_store_cost;
case vec_to_scalar:
return current_tune->vec_costs->vec_to_scalar_cost;
case scalar_to_vec:
return current_tune->vec_costs->scalar_to_vec_cost;
case unaligned_load:
return current_tune->vec_costs->vec_unalign_load_cost;
case unaligned_store:
return current_tune->vec_costs->vec_unalign_store_cost;
case cond_branch_taken:
return current_tune->vec_costs->cond_taken_branch_cost;
case cond_branch_not_taken:
return current_tune->vec_costs->cond_not_taken_branch_cost;
case vec_perm:
case vec_promote_demote:
return current_tune->vec_costs->vec_stmt_cost;
case vec_construct:
elements = TYPE_VECTOR_SUBPARTS (vectype);
return elements / 2 + 1;
default:
gcc_unreachable ();
}
}
/* Implement targetm.vectorize.add_stmt_cost. */
static unsigned
arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
struct _stmt_vec_info *stmt_info, int misalign,
enum vect_cost_model_location where)
{
unsigned *cost = (unsigned *) data;
unsigned retval = 0;
if (flag_vect_cost_model)
{
tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
/* Statements in an inner loop relative to the loop being
vectorized are weighted more heavily. The value here is
arbitrary and could potentially be improved with analysis. */
if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
count *= 50; /* FIXME. */
retval = (unsigned) (count * stmt_cost);
cost[where] += retval;
}
return retval;
}
/* Return true if and only if this insn can dual-issue only as older. */
static bool