Commit 22458c5a by Jan Hubicka Committed by Jan Hubicka

l_fma_float_?.c: Update.


	* gcc.target/i386/l_fma_float_?.c: Update.
	* gcc.target/i386/l_fma_double_?.c: Update.

	* tree-vect-loop-manip.c (vect_do_peeling_for_loop_bound,
	vect_do_peeling_for_alignment): Fix loop bound computation.
	* tree-vect-loop.c (vect_transform_loop): Maintain loop bounds.

From-SVN: r193241
parent c8fef899
2012-11-06 Jan Hubicka <jh@suse.cz>
* tree-vect-loop-manip.c (vect_do_peeling_for_loop_bound,
vect_do_peeling_for_alignment): Fix loop bound computation.
* tree-vect-loop.c (vect_transform_loop): Maintain loop bounds.
2012-11-06 Oleg Endo <olegendo@gcc.gnu.org>
PR target/54089
2012-11-06 Jan Hubicka <jh@suse.cz>
* gcc.target/i386/l_fma_float_?.c: Update.
* gcc.target/i386/l_fma_double_?.c: Update.
2012-11-06 Oleg Endo <olegendo@gcc.gnu.org>
PR target/54089
......
......@@ -16,11 +16,11 @@
/* { dg-final { scan-assembler-times "vfnmadd231pd" 4 } } */
/* { dg-final { scan-assembler-times "vfnmsub132pd" 4 } } */
/* { dg-final { scan-assembler-times "vfnmsub231pd" 4 } } */
/* { dg-final { scan-assembler-times "vfmadd132sd" 20 } } */
/* { dg-final { scan-assembler-times "vfmadd213sd" 20 } } */
/* { dg-final { scan-assembler-times "vfmsub132sd" 20 } } */
/* { dg-final { scan-assembler-times "vfmsub213sd" 20 } } */
/* { dg-final { scan-assembler-times "vfnmadd132sd" 20 } } */
/* { dg-final { scan-assembler-times "vfnmadd213sd" 20 } } */
/* { dg-final { scan-assembler-times "vfnmsub132sd" 20 } } */
/* { dg-final { scan-assembler-times "vfnmsub213sd" 20 } } */
/* { dg-final { scan-assembler-times "vfmadd132sd" 16 } } */
/* { dg-final { scan-assembler-times "vfmadd213sd" 16 } } */
/* { dg-final { scan-assembler-times "vfmsub132sd" 16 } } */
/* { dg-final { scan-assembler-times "vfmsub213sd" 16 } } */
/* { dg-final { scan-assembler-times "vfnmadd132sd" 16 } } */
/* { dg-final { scan-assembler-times "vfnmadd213sd" 16 } } */
/* { dg-final { scan-assembler-times "vfnmsub132sd" 16 } } */
/* { dg-final { scan-assembler-times "vfnmsub213sd" 16 } } */
......@@ -12,7 +12,7 @@
/* { dg-final { scan-assembler-times "vfmsub132pd" 8 } } */
/* { dg-final { scan-assembler-times "vfnmadd132pd" 8 } } */
/* { dg-final { scan-assembler-times "vfnmsub132pd" 8 } } */
/* { dg-final { scan-assembler-times "vfmadd132sd" 40 } } */
/* { dg-final { scan-assembler-times "vfmsub132sd" 40 } } */
/* { dg-final { scan-assembler-times "vfnmadd132sd" 40 } } */
/* { dg-final { scan-assembler-times "vfnmsub132sd" 40 } } */
/* { dg-final { scan-assembler-times "vfmadd132sd" 32 } } */
/* { dg-final { scan-assembler-times "vfmsub132sd" 32 } } */
/* { dg-final { scan-assembler-times "vfnmadd132sd" 32 } } */
/* { dg-final { scan-assembler-times "vfnmsub132sd" 32 } } */
......@@ -16,11 +16,11 @@
/* { dg-final { scan-assembler-times "vfnmadd231pd" 4 } } */
/* { dg-final { scan-assembler-times "vfnmsub132pd" 4 } } */
/* { dg-final { scan-assembler-times "vfnmsub231pd" 4 } } */
/* { dg-final { scan-assembler-times "vfmadd132sd" 20 } } */
/* { dg-final { scan-assembler-times "vfmadd213sd" 20 } } */
/* { dg-final { scan-assembler-times "vfmsub132sd" 20 } } */
/* { dg-final { scan-assembler-times "vfmsub213sd" 20 } } */
/* { dg-final { scan-assembler-times "vfnmadd132sd" 20 } } */
/* { dg-final { scan-assembler-times "vfnmadd213sd" 20 } } */
/* { dg-final { scan-assembler-times "vfnmsub132sd" 20 } } */
/* { dg-final { scan-assembler-times "vfnmsub213sd" 20 } } */
/* { dg-final { scan-assembler-times "vfmadd132sd" 16 } } */
/* { dg-final { scan-assembler-times "vfmadd213sd" 16 } } */
/* { dg-final { scan-assembler-times "vfmsub132sd" 16 } } */
/* { dg-final { scan-assembler-times "vfmsub213sd" 16 } } */
/* { dg-final { scan-assembler-times "vfnmadd132sd" 16 } } */
/* { dg-final { scan-assembler-times "vfnmadd213sd" 16 } } */
/* { dg-final { scan-assembler-times "vfnmsub132sd" 16 } } */
/* { dg-final { scan-assembler-times "vfnmsub213sd" 16 } } */
......@@ -12,7 +12,7 @@
/* { dg-final { scan-assembler-times "vfmsub132pd" 8 } } */
/* { dg-final { scan-assembler-times "vfnmadd132pd" 8 } } */
/* { dg-final { scan-assembler-times "vfnmsub132pd" 8 } } */
/* { dg-final { scan-assembler-times "vfmadd132sd" 40 } } */
/* { dg-final { scan-assembler-times "vfmsub132sd" 40 } } */
/* { dg-final { scan-assembler-times "vfnmadd132sd" 40 } } */
/* { dg-final { scan-assembler-times "vfnmsub132sd" 40 } } */
/* { dg-final { scan-assembler-times "vfmadd132sd" 32 } } */
/* { dg-final { scan-assembler-times "vfmsub132sd" 32 } } */
/* { dg-final { scan-assembler-times "vfnmadd132sd" 32 } } */
/* { dg-final { scan-assembler-times "vfnmsub132sd" 32 } } */
......@@ -12,7 +12,7 @@
/* { dg-final { scan-assembler-times "vfmsub132pd" 8 } } */
/* { dg-final { scan-assembler-times "vfnmadd132pd" 8 } } */
/* { dg-final { scan-assembler-times "vfnmsub132pd" 8 } } */
/* { dg-final { scan-assembler-times "vfmadd132sd" 40 } } */
/* { dg-final { scan-assembler-times "vfmsub132sd" 40 } } */
/* { dg-final { scan-assembler-times "vfnmadd132sd" 40 } } */
/* { dg-final { scan-assembler-times "vfnmsub132sd" 40 } } */
/* { dg-final { scan-assembler-times "vfmadd132sd" 32 } } */
/* { dg-final { scan-assembler-times "vfmsub132sd" 32 } } */
/* { dg-final { scan-assembler-times "vfnmadd132sd" 32 } } */
/* { dg-final { scan-assembler-times "vfnmsub132sd" 32 } } */
......@@ -12,7 +12,7 @@
/* { dg-final { scan-assembler-times "vfmsub132pd" 8 } } */
/* { dg-final { scan-assembler-times "vfnmadd132pd" 8 } } */
/* { dg-final { scan-assembler-times "vfnmsub132pd" 8 } } */
/* { dg-final { scan-assembler-times "vfmadd132sd" 40 } } */
/* { dg-final { scan-assembler-times "vfmsub132sd" 40 } } */
/* { dg-final { scan-assembler-times "vfnmadd132sd" 40 } } */
/* { dg-final { scan-assembler-times "vfnmsub132sd" 40 } } */
/* { dg-final { scan-assembler-times "vfmadd132sd" 32 } } */
/* { dg-final { scan-assembler-times "vfmsub132sd" 32 } } */
/* { dg-final { scan-assembler-times "vfnmadd132sd" 32 } } */
/* { dg-final { scan-assembler-times "vfnmsub132sd" 32 } } */
......@@ -16,11 +16,11 @@
/* { dg-final { scan-assembler-times "vfnmadd231ps" 4 } } */
/* { dg-final { scan-assembler-times "vfnmsub132ps" 4 } } */
/* { dg-final { scan-assembler-times "vfnmsub231ps" 4 } } */
/* { dg-final { scan-assembler-times "vfmadd132ss" 36 } } */
/* { dg-final { scan-assembler-times "vfmadd213ss" 36 } } */
/* { dg-final { scan-assembler-times "vfmsub132ss" 36 } } */
/* { dg-final { scan-assembler-times "vfmsub213ss" 36 } } */
/* { dg-final { scan-assembler-times "vfnmadd132ss" 36 } } */
/* { dg-final { scan-assembler-times "vfnmadd213ss" 36 } } */
/* { dg-final { scan-assembler-times "vfnmsub132ss" 36 } } */
/* { dg-final { scan-assembler-times "vfnmsub213ss" 36 } } */
/* { dg-final { scan-assembler-times "vfmadd132ss" 32 } } */
/* { dg-final { scan-assembler-times "vfmadd213ss" 32 } } */
/* { dg-final { scan-assembler-times "vfmsub132ss" 32 } } */
/* { dg-final { scan-assembler-times "vfmsub213ss" 32 } } */
/* { dg-final { scan-assembler-times "vfnmadd132ss" 32 } } */
/* { dg-final { scan-assembler-times "vfnmadd213ss" 32 } } */
/* { dg-final { scan-assembler-times "vfnmsub132ss" 32 } } */
/* { dg-final { scan-assembler-times "vfnmsub213ss" 32 } } */
......@@ -12,7 +12,7 @@
/* { dg-final { scan-assembler-times "vfmsub132ps" 8 } } */
/* { dg-final { scan-assembler-times "vfnmadd132ps" 8 } } */
/* { dg-final { scan-assembler-times "vfnmsub132ps" 8 } } */
/* { dg-final { scan-assembler-times "vfmadd132ss" 72 } } */
/* { dg-final { scan-assembler-times "vfmsub132ss" 72 } } */
/* { dg-final { scan-assembler-times "vfnmadd132ss" 72 } } */
/* { dg-final { scan-assembler-times "vfnmsub132ss" 72 } } */
/* { dg-final { scan-assembler-times "vfmadd132ss" 64 } } */
/* { dg-final { scan-assembler-times "vfmsub132ss" 64 } } */
/* { dg-final { scan-assembler-times "vfnmadd132ss" 64 } } */
/* { dg-final { scan-assembler-times "vfnmsub132ss" 64 } } */
......@@ -16,11 +16,11 @@
/* { dg-final { scan-assembler-times "vfnmadd231ps" 4 } } */
/* { dg-final { scan-assembler-times "vfnmsub132ps" 4 } } */
/* { dg-final { scan-assembler-times "vfnmsub231ps" 4 } } */
/* { dg-final { scan-assembler-times "vfmadd132ss" 36 } } */
/* { dg-final { scan-assembler-times "vfmadd213ss" 36 } } */
/* { dg-final { scan-assembler-times "vfmsub132ss" 36 } } */
/* { dg-final { scan-assembler-times "vfmsub213ss" 36 } } */
/* { dg-final { scan-assembler-times "vfnmadd132ss" 36 } } */
/* { dg-final { scan-assembler-times "vfnmadd213ss" 36 } } */
/* { dg-final { scan-assembler-times "vfnmsub132ss" 36 } } */
/* { dg-final { scan-assembler-times "vfnmsub213ss" 36 } } */
/* { dg-final { scan-assembler-times "vfmadd132ss" 32 } } */
/* { dg-final { scan-assembler-times "vfmadd213ss" 32 } } */
/* { dg-final { scan-assembler-times "vfmsub132ss" 32 } } */
/* { dg-final { scan-assembler-times "vfmsub213ss" 32 } } */
/* { dg-final { scan-assembler-times "vfnmadd132ss" 32 } } */
/* { dg-final { scan-assembler-times "vfnmadd213ss" 32 } } */
/* { dg-final { scan-assembler-times "vfnmsub132ss" 32 } } */
/* { dg-final { scan-assembler-times "vfnmsub213ss" 32 } } */
......@@ -12,7 +12,7 @@
/* { dg-final { scan-assembler-times "vfmsub132ps" 8 } } */
/* { dg-final { scan-assembler-times "vfnmadd132ps" 8 } } */
/* { dg-final { scan-assembler-times "vfnmsub132ps" 8 } } */
/* { dg-final { scan-assembler-times "vfmadd132ss" 72 } } */
/* { dg-final { scan-assembler-times "vfmsub132ss" 72 } } */
/* { dg-final { scan-assembler-times "vfnmadd132ss" 72 } } */
/* { dg-final { scan-assembler-times "vfnmsub132ss" 72 } } */
/* { dg-final { scan-assembler-times "vfmadd132ss" 64 } } */
/* { dg-final { scan-assembler-times "vfmsub132ss" 64 } } */
/* { dg-final { scan-assembler-times "vfnmadd132ss" 64 } } */
/* { dg-final { scan-assembler-times "vfnmsub132ss" 64 } } */
......@@ -12,7 +12,7 @@
/* { dg-final { scan-assembler-times "vfmsub132ps" 8 } } */
/* { dg-final { scan-assembler-times "vfnmadd132ps" 8 } } */
/* { dg-final { scan-assembler-times "vfnmsub132ps" 8 } } */
/* { dg-final { scan-assembler-times "vfmadd132ss" 72 } } */
/* { dg-final { scan-assembler-times "vfmsub132ss" 72 } } */
/* { dg-final { scan-assembler-times "vfnmadd132ss" 72 } } */
/* { dg-final { scan-assembler-times "vfnmsub132ss" 72 } } */
/* { dg-final { scan-assembler-times "vfmadd132ss" 64 } } */
/* { dg-final { scan-assembler-times "vfmsub132ss" 64 } } */
/* { dg-final { scan-assembler-times "vfnmadd132ss" 64 } } */
/* { dg-final { scan-assembler-times "vfnmsub132ss" 64 } } */
......@@ -12,7 +12,7 @@
/* { dg-final { scan-assembler-times "vfmsub132ps" 8 } } */
/* { dg-final { scan-assembler-times "vfnmadd132ps" 8 } } */
/* { dg-final { scan-assembler-times "vfnmsub132ps" 8 } } */
/* { dg-final { scan-assembler-times "vfmadd132ss" 72 } } */
/* { dg-final { scan-assembler-times "vfmsub132ss" 72 } } */
/* { dg-final { scan-assembler-times "vfnmadd132ss" 72 } } */
/* { dg-final { scan-assembler-times "vfnmsub132ss" 72 } } */
/* { dg-final { scan-assembler-times "vfmadd132ss" 64 } } */
/* { dg-final { scan-assembler-times "vfmsub132ss" 64 } } */
/* { dg-final { scan-assembler-times "vfnmadd132ss" 64 } } */
/* { dg-final { scan-assembler-times "vfnmsub132ss" 64 } } */
......@@ -1954,9 +1954,16 @@ vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
by ratio_mult_vf_name steps. */
vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
max_iter = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1;
/* For vectorization factor N, we need to copy last N-1 values in epilogue
and this means N-2 loopback edge executions.
PEELING_FOR_GAPS works by subtracting last iteration and thus the epilogue
will execute at least LOOP_VINFO_VECT_FACTOR times. */
max_iter = (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
? LOOP_VINFO_VECT_FACTOR (loop_vinfo) * 2
: LOOP_VINFO_VECT_FACTOR (loop_vinfo)) - 2;
if (check_profitability)
max_iter = MAX (max_iter, (int) th);
max_iter = MAX (max_iter, (int) th - 1);
record_niter_bound (new_loop, double_int::from_shwi (max_iter), false, true);
dump_printf (MSG_OPTIMIZED_LOCATIONS,
"Setting upper bound of nb iterations for epilogue "
......@@ -2186,9 +2193,11 @@ vect_do_peeling_for_alignment (loop_vec_info loop_vinfo,
#ifdef ENABLE_CHECKING
slpeel_verify_cfg_after_peeling (new_loop, loop);
#endif
max_iter = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1;
/* For vectorization factor N, we need to copy at most N-1 values
for alignment and this means N-2 loopback edge executions. */
max_iter = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 2;
if (check_profitability)
max_iter = MAX (max_iter, (int) th);
max_iter = MAX (max_iter, (int) th - 1);
record_niter_bound (new_loop, double_int::from_shwi (max_iter), false, true);
dump_printf (MSG_OPTIMIZED_LOCATIONS,
"Setting upper bound of nb iterations for prologue "
......
......@@ -5448,10 +5448,16 @@ vect_transform_loop (loop_vec_info loop_vinfo)
bool transform_pattern_stmt = false;
bool check_profitability = false;
int th;
/* Record number of iterations before we started tampering with the profile. */
gcov_type expected_iterations = expected_loop_iterations_unbounded (loop);
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location, "=== vec_transform_loop ===");
/* If profile is inprecise, we have chance to fix it up. */
if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
expected_iterations = LOOP_VINFO_INT_NITERS (loop_vinfo);
/* Use the more conservative vectorization threshold. If the number
of iterations is constant assume the cost check has been performed
by our caller. If the threshold makes all loops profitable that
......@@ -5735,6 +5741,25 @@ vect_transform_loop (loop_vec_info loop_vinfo)
slpeel_make_loop_iterate_ntimes (loop, ratio);
/* Reduce loop iterations by the vectorization factor. */
scale_loop_profile (loop, RDIV (REG_BR_PROB_BASE , vectorization_factor),
expected_iterations / vectorization_factor);
loop->nb_iterations_upper_bound
= loop->nb_iterations_upper_bound.udiv (double_int::from_uhwi (vectorization_factor),
FLOOR_DIV_EXPR);
if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
&& loop->nb_iterations_upper_bound != double_int_zero)
loop->nb_iterations_upper_bound = loop->nb_iterations_upper_bound - double_int_one;
if (loop->any_estimate)
{
loop->nb_iterations_estimate
= loop->nb_iterations_estimate.udiv (double_int::from_uhwi (vectorization_factor),
FLOOR_DIV_EXPR);
if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
&& loop->nb_iterations_estimate != double_int_zero)
loop->nb_iterations_estimate = loop->nb_iterations_estimate - double_int_one;
}
/* The memory tags and pointers in vectorized statements need to
have their SSA forms updated. FIXME, why can't this be delayed
until all the loops have been transformed? */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment