arm.c (arm_option_override): enable loop array prefetching at -O3 for suitable targets, and configure params.

2010-11-26  Ian Bolton  <ian.bolton@arm.com>

	* config/arm/arm.c (arm_option_override): enable loop array
	prefetching at -O3 for suitable targets, and configure params.
	* config/arm/arm-protos.h (struct tune_params): Add fields for
	configuring loop array prefetching.

From-SVN: r167175
This commit is contained in:
Ian Bolton 2010-11-26 13:21:32 +00:00 committed by Ian Bolton
parent 4d00efb6f4
commit 911de8a341
3 changed files with 53 additions and 5 deletions

View file

@ -1,3 +1,10 @@
2010-11-26 Ian Bolton <ian.bolton@arm.com>
* config/arm/arm.c (arm_option_override): enable loop array
prefetching at -O3 for suitable targets, and configure params.
* config/arm/arm-protos.h (struct tune_params): Add fields for
configuring loop array prefetching.
2010-11-26 Christian Borntraeger <borntraeger@de.ibm.com>
* config/s390/2817.md (z196_crack): Add z196_g3 as possible slot.

View file

@ -218,6 +218,9 @@ struct tune_params
bool (*rtx_costs) (rtx, RTX_CODE, RTX_CODE, int *, bool);
bool (*sched_adjust_cost) (rtx, rtx, rtx, int *);
int constant_limit;
int num_prefetch_slots;
int l1_cache_size;
int l1_cache_line_size;
};
extern const struct tune_params *current_tune;

View file

@ -835,39 +835,51 @@ struct processors
const struct tune_params *const tune;
};
#define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
#define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
prefetch_slots, \
l1_size, \
l1_line_size
const struct tune_params arm_slowmul_tune =
{
arm_slowmul_rtx_costs,
NULL,
3
3,
ARM_PREFETCH_NOT_BENEFICIAL
};
const struct tune_params arm_fastmul_tune =
{
arm_fastmul_rtx_costs,
NULL,
1
1,
ARM_PREFETCH_NOT_BENEFICIAL
};
const struct tune_params arm_xscale_tune =
{
arm_xscale_rtx_costs,
xscale_sched_adjust_cost,
2
2,
ARM_PREFETCH_NOT_BENEFICIAL
};
const struct tune_params arm_9e_tune =
{
arm_9e_rtx_costs,
NULL,
1
1,
ARM_PREFETCH_NOT_BENEFICIAL
};
const struct tune_params arm_cortex_a9_tune =
{
arm_9e_rtx_costs,
cortex_a9_sched_adjust_cost,
1
1,
ARM_PREFETCH_BENEFICIAL(4,32,32)
};
@ -1983,6 +1995,32 @@ arm_option_override (void)
if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0)
flag_strict_volatile_bitfields = 1;
/* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
if (flag_prefetch_loop_arrays < 0
&& HAVE_prefetch
&& optimize >= 3
&& current_tune->num_prefetch_slots > 0)
flag_prefetch_loop_arrays = 1;
/* Set up parameters to be used in prefetching algorithm. Do not override the
defaults unless we are tuning for a core we have researched values for. */
if (current_tune->num_prefetch_slots > 0)
maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
current_tune->num_prefetch_slots,
global_options.x_param_values,
global_options_set.x_param_values);
if (current_tune->l1_cache_line_size >= 0)
maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
current_tune->l1_cache_line_size,
global_options.x_param_values,
global_options_set.x_param_values);
if (current_tune->l1_cache_size >= 0)
maybe_set_param_value (PARAM_L1_CACHE_SIZE,
current_tune->l1_cache_size,
global_options.x_param_values,
global_options_set.x_param_values);
/* Register global variables with the garbage collector. */
arm_add_gc_roots ();
}