From 14e444c392d7ab69d0dcd299df8abbac067d07dc Mon Sep 17 00:00:00 2001 From: Changpeng Fang Date: Fri, 2 Jul 2010 16:34:38 +0000 Subject: [PATCH] Reduce the cost in miss rate computation. 2010-07-02 Changpeng Fang * tree-ssa-loop-prefetch.c (compute_miss_rate): Rename to is_miss_rate_acceptable. Pull total_positions computation out of the loops. Early return if miss_positions exceeds the acceptable threshold. * tree-ssa-loop-prefetch.c (prune_ref_by_group_reuse): Call is_miss_rate_acceptable after renaming of compute_miss_rate. From-SVN: r161728 --- gcc/ChangeLog | 9 +++++++++ gcc/tree-ssa-loop-prefetch.c | 39 ++++++++++++++++++------------------ 2 files changed, 29 insertions(+), 19 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 7e7944a6618..98f2ab09da2 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,12 @@ +2010-07-02 Changpeng Fang + + * tree-ssa-loop-prefetch.c (compute_miss_rate): Rename to + is_miss_rate_acceptable. Pull total_positions computation + out of the loops. Early return if miss_positions exceeds + the acceptable threshold. + * tree-ssa-loop-prefetch.c (prune_ref_by_group_reuse): Call + is_miss_rate_acceptable after renaming of compute_miss_rate. + 2010-07-02 Changpeng Fang PR middle-end/44576 diff --git a/gcc/tree-ssa-loop-prefetch.c b/gcc/tree-ssa-loop-prefetch.c index 934b49c0406..c3e90d235ba 100644 --- a/gcc/tree-ssa-loop-prefetch.c +++ b/gcc/tree-ssa-loop-prefetch.c @@ -640,27 +640,29 @@ ddown (HOST_WIDE_INT x, unsigned HOST_WIDE_INT by) /* Given a CACHE_LINE_SIZE and two inductive memory references with a common STEP greater than CACHE_LINE_SIZE and an address difference DELTA, compute the probability that they will fall - in different cache lines. DISTINCT_ITERS is the number of - distinct iterations after which the pattern repeats itself. + in different cache lines. Return true if the computed miss rate + is not greater than the ACCEPTABLE_MISS_RATE. DISTINCT_ITERS is the + number of distinct iterations after which the pattern repeats itself. ALIGN_UNIT is the unit of alignment in bytes. */ -static int -compute_miss_rate (unsigned HOST_WIDE_INT cache_line_size, +static bool +is_miss_rate_acceptable (unsigned HOST_WIDE_INT cache_line_size, HOST_WIDE_INT step, HOST_WIDE_INT delta, unsigned HOST_WIDE_INT distinct_iters, int align_unit) { unsigned align, iter; - int total_positions, miss_positions, miss_rate; + int total_positions, miss_positions, max_allowed_miss_positions; int address1, address2, cache_line1, cache_line2; /* It always misses if delta is greater than or equal to the cache line size. */ - if (delta >= cache_line_size) - return 1000; + if (delta >= (HOST_WIDE_INT) cache_line_size) + return false; - total_positions = 0; miss_positions = 0; + total_positions = (cache_line_size / align_unit) * distinct_iters; + max_allowed_miss_positions = (ACCEPTABLE_MISS_RATE * total_positions) / 1000; /* Iterate through all possible alignments of the first memory reference within its cache line. */ @@ -673,12 +675,14 @@ compute_miss_rate (unsigned HOST_WIDE_INT cache_line_size, address2 = address1 + delta; cache_line1 = address1 / cache_line_size; cache_line2 = address2 / cache_line_size; - total_positions += 1; if (cache_line1 != cache_line2) - miss_positions += 1; + { + miss_positions += 1; + if (miss_positions > max_allowed_miss_positions) + return false; + } } - miss_rate = 1000 * miss_positions / total_positions; - return miss_rate; + return true; } /* Prune the prefetch candidate REF using the reuse with BY. @@ -694,7 +698,6 @@ prune_ref_by_group_reuse (struct mem_ref *ref, struct mem_ref *by, HOST_WIDE_INT delta = delta_b - delta_r; HOST_WIDE_INT hit_from; unsigned HOST_WIDE_INT prefetch_before, prefetch_block; - int miss_rate; HOST_WIDE_INT reduced_step; unsigned HOST_WIDE_INT reduced_prefetch_block; tree ref_type; @@ -793,9 +796,8 @@ prune_ref_by_group_reuse (struct mem_ref *ref, struct mem_ref *by, delta %= step; ref_type = TREE_TYPE (ref->mem); align_unit = TYPE_ALIGN (ref_type) / 8; - miss_rate = compute_miss_rate(prefetch_block, step, delta, - reduced_prefetch_block, align_unit); - if (miss_rate <= ACCEPTABLE_MISS_RATE) + if (is_miss_rate_acceptable (prefetch_block, step, delta, + reduced_prefetch_block, align_unit)) { /* Do not reduce prefetch_before if we meet beyond cache size. */ if (prefetch_before > L2_CACHE_SIZE_BYTES / PREFETCH_BLOCK) @@ -809,9 +811,8 @@ prune_ref_by_group_reuse (struct mem_ref *ref, struct mem_ref *by, /* Try also the following iteration. */ prefetch_before++; delta = step - delta; - miss_rate = compute_miss_rate(prefetch_block, step, delta, - reduced_prefetch_block, align_unit); - if (miss_rate <= ACCEPTABLE_MISS_RATE) + if (is_miss_rate_acceptable (prefetch_block, step, delta, + reduced_prefetch_block, align_unit)) { if (prefetch_before < ref->prefetch_before) ref->prefetch_before = prefetch_before;