Commit 14e444c3 by Changpeng Fang Committed by Sebastian Pop

Reduce the cost in miss rate computation.

2010-07-02  Changpeng Fang  <changpeng.fang@amd.com>

	* tree-ssa-loop-prefetch.c (compute_miss_rate): Rename to
	is_miss_rate_acceptable. Pull total_positions computation
	out of the loops.  Early return if miss_positions exceeds
	the acceptable threshold.
	* tree-ssa-loop-prefetch.c (prune_ref_by_group_reuse): Call
	is_miss_rate_acceptable after renaming of compute_miss_rate.

From-SVN: r161728
parent a245c04b
2010-07-02 Changpeng Fang <changpeng.fang@amd.com> 2010-07-02 Changpeng Fang <changpeng.fang@amd.com>
* tree-ssa-loop-prefetch.c (compute_miss_rate): Rename to
is_miss_rate_acceptable. Pull total_positions computation
out of the loops. Early return if miss_positions exceeds
the acceptable threshold.
* tree-ssa-loop-prefetch.c (prune_ref_by_group_reuse): Call
is_miss_rate_acceptable after renaming of compute_miss_rate.
2010-07-02 Changpeng Fang <changpeng.fang@amd.com>
PR middle-end/44576 PR middle-end/44576
* tree-ssa-loop-prefetch.c (compute_miss_rate): Return 1000 (out * tree-ssa-loop-prefetch.c (compute_miss_rate): Return 1000 (out
of 1000) for miss rate if the address diference is greater than or of 1000) for miss rate if the address diference is greater than or
......
...@@ -640,27 +640,29 @@ ddown (HOST_WIDE_INT x, unsigned HOST_WIDE_INT by) ...@@ -640,27 +640,29 @@ ddown (HOST_WIDE_INT x, unsigned HOST_WIDE_INT by)
/* Given a CACHE_LINE_SIZE and two inductive memory references /* Given a CACHE_LINE_SIZE and two inductive memory references
with a common STEP greater than CACHE_LINE_SIZE and an address with a common STEP greater than CACHE_LINE_SIZE and an address
difference DELTA, compute the probability that they will fall difference DELTA, compute the probability that they will fall
in different cache lines. DISTINCT_ITERS is the number of in different cache lines. Return true if the computed miss rate
distinct iterations after which the pattern repeats itself. is not greater than the ACCEPTABLE_MISS_RATE. DISTINCT_ITERS is the
number of distinct iterations after which the pattern repeats itself.
ALIGN_UNIT is the unit of alignment in bytes. */ ALIGN_UNIT is the unit of alignment in bytes. */
static int static bool
compute_miss_rate (unsigned HOST_WIDE_INT cache_line_size, is_miss_rate_acceptable (unsigned HOST_WIDE_INT cache_line_size,
HOST_WIDE_INT step, HOST_WIDE_INT delta, HOST_WIDE_INT step, HOST_WIDE_INT delta,
unsigned HOST_WIDE_INT distinct_iters, unsigned HOST_WIDE_INT distinct_iters,
int align_unit) int align_unit)
{ {
unsigned align, iter; unsigned align, iter;
int total_positions, miss_positions, miss_rate; int total_positions, miss_positions, max_allowed_miss_positions;
int address1, address2, cache_line1, cache_line2; int address1, address2, cache_line1, cache_line2;
/* It always misses if delta is greater than or equal to the cache /* It always misses if delta is greater than or equal to the cache
line size. */ line size. */
if (delta >= cache_line_size) if (delta >= (HOST_WIDE_INT) cache_line_size)
return 1000; return false;
total_positions = 0;
miss_positions = 0; miss_positions = 0;
total_positions = (cache_line_size / align_unit) * distinct_iters;
max_allowed_miss_positions = (ACCEPTABLE_MISS_RATE * total_positions) / 1000;
/* Iterate through all possible alignments of the first /* Iterate through all possible alignments of the first
memory reference within its cache line. */ memory reference within its cache line. */
...@@ -673,12 +675,14 @@ compute_miss_rate (unsigned HOST_WIDE_INT cache_line_size, ...@@ -673,12 +675,14 @@ compute_miss_rate (unsigned HOST_WIDE_INT cache_line_size,
address2 = address1 + delta; address2 = address1 + delta;
cache_line1 = address1 / cache_line_size; cache_line1 = address1 / cache_line_size;
cache_line2 = address2 / cache_line_size; cache_line2 = address2 / cache_line_size;
total_positions += 1;
if (cache_line1 != cache_line2) if (cache_line1 != cache_line2)
{
miss_positions += 1; miss_positions += 1;
if (miss_positions > max_allowed_miss_positions)
return false;
}
} }
miss_rate = 1000 * miss_positions / total_positions; return true;
return miss_rate;
} }
/* Prune the prefetch candidate REF using the reuse with BY. /* Prune the prefetch candidate REF using the reuse with BY.
...@@ -694,7 +698,6 @@ prune_ref_by_group_reuse (struct mem_ref *ref, struct mem_ref *by, ...@@ -694,7 +698,6 @@ prune_ref_by_group_reuse (struct mem_ref *ref, struct mem_ref *by,
HOST_WIDE_INT delta = delta_b - delta_r; HOST_WIDE_INT delta = delta_b - delta_r;
HOST_WIDE_INT hit_from; HOST_WIDE_INT hit_from;
unsigned HOST_WIDE_INT prefetch_before, prefetch_block; unsigned HOST_WIDE_INT prefetch_before, prefetch_block;
int miss_rate;
HOST_WIDE_INT reduced_step; HOST_WIDE_INT reduced_step;
unsigned HOST_WIDE_INT reduced_prefetch_block; unsigned HOST_WIDE_INT reduced_prefetch_block;
tree ref_type; tree ref_type;
...@@ -793,9 +796,8 @@ prune_ref_by_group_reuse (struct mem_ref *ref, struct mem_ref *by, ...@@ -793,9 +796,8 @@ prune_ref_by_group_reuse (struct mem_ref *ref, struct mem_ref *by,
delta %= step; delta %= step;
ref_type = TREE_TYPE (ref->mem); ref_type = TREE_TYPE (ref->mem);
align_unit = TYPE_ALIGN (ref_type) / 8; align_unit = TYPE_ALIGN (ref_type) / 8;
miss_rate = compute_miss_rate(prefetch_block, step, delta, if (is_miss_rate_acceptable (prefetch_block, step, delta,
reduced_prefetch_block, align_unit); reduced_prefetch_block, align_unit))
if (miss_rate <= ACCEPTABLE_MISS_RATE)
{ {
/* Do not reduce prefetch_before if we meet beyond cache size. */ /* Do not reduce prefetch_before if we meet beyond cache size. */
if (prefetch_before > L2_CACHE_SIZE_BYTES / PREFETCH_BLOCK) if (prefetch_before > L2_CACHE_SIZE_BYTES / PREFETCH_BLOCK)
...@@ -809,9 +811,8 @@ prune_ref_by_group_reuse (struct mem_ref *ref, struct mem_ref *by, ...@@ -809,9 +811,8 @@ prune_ref_by_group_reuse (struct mem_ref *ref, struct mem_ref *by,
/* Try also the following iteration. */ /* Try also the following iteration. */
prefetch_before++; prefetch_before++;
delta = step - delta; delta = step - delta;
miss_rate = compute_miss_rate(prefetch_block, step, delta, if (is_miss_rate_acceptable (prefetch_block, step, delta,
reduced_prefetch_block, align_unit); reduced_prefetch_block, align_unit))
if (miss_rate <= ACCEPTABLE_MISS_RATE)
{ {
if (prefetch_before < ref->prefetch_before) if (prefetch_before < ref->prefetch_before)
ref->prefetch_before = prefetch_before; ref->prefetch_before = prefetch_before;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment