Commit b61b1f17 by Michael Matz

re PR tree-optimization/41783 (r151561 (PRE fix) regresses zeusmp)

	PR tree-optimization/41783
	* tree-data-ref.c (toplevel): Include flags.h.
	(dump_data_dependence_relation):  Also dump the inputs if the
	result will be unknown.
	(split_constant_offset_1): Look through some conversions.
	* tree-predcom.c (determine_roots_comp): Restart a new chain if
	the offset from last element is too large.
	(ref_at_iteration): Deal also with MISALIGNED_INDIRECT_REF.
	(reassociate_to_the_same_stmt): Handle vector registers.
	* tree-vect-data-refs.c (vect_equal_offsets): Handle unary operations
	(e.g. conversions).
	* tree-vect-loop-manip.c (vect_gen_niters_for_prolog_loop): Add 
	wide_prolog_niters argument, emit widening instructions.
	(vect_do_peeling_for_alignment): Adjust caller, use widened
	variant of the iteration cound.
	* Makefile.in (tree-data-ref.o): Add $(FLAGS_H).

testsuite/
	* gfortran.dg/vect/fast-math-mgrid-resid.f: New.

From-SVN: r156043
parent b3d7e191
2010-01-19 Michael Matz <matz@suse.de>
PR tree-optimization/41783
* tree-data-ref.c (toplevel): Include flags.h.
(dump_data_dependence_relation): Also dump the inputs if the
result will be unknown.
(split_constant_offset_1): Look through some conversions.
* tree-predcom.c (determine_roots_comp): Restart a new chain if
the offset from last element is too large.
(ref_at_iteration): Deal also with MISALIGNED_INDIRECT_REF.
(reassociate_to_the_same_stmt): Handle vector registers.
* tree-vect-data-refs.c (vect_equal_offsets): Handle unary operations
(e.g. conversions).
* tree-vect-loop-manip.c (vect_gen_niters_for_prolog_loop): Add
wide_prolog_niters argument, emit widening instructions.
(vect_do_peeling_for_alignment): Adjust caller, use widened
variant of the iteration cound.
* Makefile.in (tree-data-ref.o): Add $(FLAGS_H).
2010-01-19 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
PR target/38697
* config/arm/neon-testgen.m (emit_automatics): New parameter
PR target/38697
* config/arm/neon-testgen.m (emit_automatics): New parameter
features. Adjust for Fixed_return_reg feature.
(test_intrinsic): Call emit_automatics with new feature.
* config/arm/neon.ml: Update copyright years.
* config/arm/neon.ml: Update copyright years.
(features): New Fixed_return_reg feature.
(ops): Update feature for Vget_low.
......
......@@ -2548,7 +2548,7 @@ tree-scalar-evolution.o: tree-scalar-evolution.c $(CONFIG_H) $(SYSTEM_H) \
$(TIMEVAR_H) $(CFGLOOP_H) $(SCEV_H) $(TREE_PASS_H) $(FLAGS_H) \
gt-tree-scalar-evolution.h
tree-data-ref.o: tree-data-ref.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
$(GGC_H) $(TREE_H) $(RTL_H) $(BASIC_BLOCK_H) $(DIAGNOSTIC_H) \
$(GGC_H) $(FLAGS_H) $(TREE_H) $(RTL_H) $(BASIC_BLOCK_H) $(DIAGNOSTIC_H) \
$(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) $(CFGLOOP_H) \
$(TREE_DATA_REF_H) $(TREE_PASS_H) langhooks.h
sese.o: sese.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
......
2010-01-19 Michael Matz <matz@suse.de>
PR tree-optimization/41783
* gfortran.dg/vect/fast-math-mgrid-resid.f: New.
2010-01-19 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
PR target/38697.
......
! { dg-do compile }
! { dg-require-effective-target vect_double }
! { dg-options "-O3 -ffast-math -fpredictive-commoning -ftree-vectorize -fdump-tree-optimized" }
******* RESID COMPUTES THE RESIDUAL: R = V - AU
*
* THIS SIMPLE IMPLEMENTATION COSTS 27A + 4M PER RESULT, WHERE
* A AND M DENOTE THE COSTS OF ADDITION (OR SUBTRACTION) AND
* MULTIPLICATION, RESPECTIVELY. BY USING SEVERAL TWO-DIMENSIONAL
* BUFFERS ONE CAN REDUCE THIS COST TO 13A + 4M IN THE GENERAL
* CASE, OR 10A + 3M WHEN THE COEFFICIENT A(1) IS ZERO.
*
SUBROUTINE RESID(U,V,R,N,A)
INTEGER N
REAL*8 U(N,N,N),V(N,N,N),R(N,N,N),A(0:3)
INTEGER I3, I2, I1
C
DO 600 I3=2,N-1
DO 600 I2=2,N-1
DO 600 I1=2,N-1
600 R(I1,I2,I3)=V(I1,I2,I3)
> -A(0)*( U(I1, I2, I3 ) )
> -A(1)*( U(I1-1,I2, I3 ) + U(I1+1,I2, I3 )
> + U(I1, I2-1,I3 ) + U(I1, I2+1,I3 )
> + U(I1, I2, I3-1) + U(I1, I2, I3+1) )
> -A(2)*( U(I1-1,I2-1,I3 ) + U(I1+1,I2-1,I3 )
> + U(I1-1,I2+1,I3 ) + U(I1+1,I2+1,I3 )
> + U(I1, I2-1,I3-1) + U(I1, I2+1,I3-1)
> + U(I1, I2-1,I3+1) + U(I1, I2+1,I3+1)
> + U(I1-1,I2, I3-1) + U(I1-1,I2, I3+1)
> + U(I1+1,I2, I3-1) + U(I1+1,I2, I3+1) )
> -A(3)*( U(I1-1,I2-1,I3-1) + U(I1+1,I2-1,I3-1)
> + U(I1-1,I2+1,I3-1) + U(I1+1,I2+1,I3-1)
> + U(I1-1,I2-1,I3+1) + U(I1+1,I2-1,I3+1)
> + U(I1-1,I2+1,I3+1) + U(I1+1,I2+1,I3+1) )
C
RETURN
END
! we want to check that predictive commoning did something on the
! vectorized loop, which means we have to have exactly 13 vector
! additions.
! { dg-final { scan-tree-dump-times "vect_var\[^\\n\]*\\+ " 13 "optimized" } }
! { dg-final { cleanup-tree-dump "vect" } }
! { dg-final { cleanup-tree-dump "optimized" } }
......@@ -79,6 +79,7 @@ along with GCC; see the file COPYING3. If not see
#include "coretypes.h"
#include "tm.h"
#include "ggc.h"
#include "flags.h"
#include "tree.h"
/* These RTL headers are needed for basic-block.h. */
......@@ -380,6 +381,19 @@ dump_data_dependence_relation (FILE *outf,
if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
{
if (ddr)
{
dra = DDR_A (ddr);
drb = DDR_B (ddr);
if (dra)
dump_data_reference (outf, dra);
else
fprintf (outf, " (nil)\n");
if (drb)
dump_data_reference (outf, drb);
else
fprintf (outf, " (nil)\n");
}
fprintf (outf, " (don't know)\n)\n");
return;
}
......@@ -631,6 +645,24 @@ split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
return split_constant_offset_1 (type, var0, subcode, var1, var, off);
}
CASE_CONVERT:
{
/* We must not introduce undefined overflow, and we must not change the value.
Hence we're okay if the inner type doesn't overflow to start with
(pointer or signed), the outer type also is an integer or pointer
and the outer precision is at least as large as the inner. */
tree itype = TREE_TYPE (op0);
if ((POINTER_TYPE_P (itype)
|| (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
&& TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
&& (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
{
split_constant_offset (op0, &var0, off);
*var = fold_convert (type, var0);
return true;
}
return false;
}
default:
return false;
......
......@@ -1180,6 +1180,7 @@ determine_roots_comp (struct loop *loop,
unsigned i;
dref a;
chain_p chain = NULL;
double_int last_ofs = double_int_zero;
/* Invariants are handled specially. */
if (comp->comp_step == RS_INVARIANT)
......@@ -1194,13 +1195,20 @@ determine_roots_comp (struct loop *loop,
for (i = 0; VEC_iterate (dref, comp->refs, i, a); i++)
{
if (!chain || !DR_IS_READ (a->ref))
if (!chain || !DR_IS_READ (a->ref)
|| double_int_ucmp (uhwi_to_double_int (MAX_DISTANCE),
double_int_add (a->offset,
double_int_neg (last_ofs))) <= 0)
{
if (nontrivial_chain_p (chain))
VEC_safe_push (chain_p, heap, *chains, chain);
{
add_looparound_copies (loop, chain);
VEC_safe_push (chain_p, heap, *chains, chain);
}
else
release_chain (chain);
chain = make_rooted_chain (a);
last_ofs = a->offset;
continue;
}
......@@ -1338,9 +1346,11 @@ ref_at_iteration (struct loop *loop, tree ref, int iter)
else if (!INDIRECT_REF_P (ref))
return unshare_expr (ref);
if (TREE_CODE (ref) == INDIRECT_REF)
if (INDIRECT_REF_P (ref))
{
ret = build1 (INDIRECT_REF, TREE_TYPE (ref), NULL_TREE);
/* Take care for INDIRECT_REF and MISALIGNED_INDIRECT_REF at
the same time. */
ret = copy_node (ref);
idx = TREE_OPERAND (ref, 0);
idx_p = &TREE_OPERAND (ret, 0);
}
......@@ -2205,11 +2215,17 @@ reassociate_to_the_same_stmt (tree name1, tree name2)
/* Insert the new statement combining NAME1 and NAME2 before S1, and
combine it with the rhs of S1. */
var = create_tmp_var (type, "predreastmp");
if (TREE_CODE (type) == COMPLEX_TYPE
|| TREE_CODE (type) == VECTOR_TYPE)
DECL_GIMPLE_REG_P (var) = 1;
add_referenced_var (var);
new_name = make_ssa_name (var, NULL);
new_stmt = gimple_build_assign_with_ops (code, new_name, name1, name2);
var = create_tmp_var (type, "predreastmp");
if (TREE_CODE (type) == COMPLEX_TYPE
|| TREE_CODE (type) == VECTOR_TYPE)
DECL_GIMPLE_REG_P (var) = 1;
add_referenced_var (var);
tmp_name = make_ssa_name (var, NULL);
......
......@@ -294,7 +294,7 @@ vect_update_interleaving_chain (struct data_reference *drb,
static bool
vect_equal_offsets (tree offset1, tree offset2)
{
bool res0, res1;
bool res;
STRIP_NOPS (offset1);
STRIP_NOPS (offset2);
......@@ -303,16 +303,19 @@ vect_equal_offsets (tree offset1, tree offset2)
return true;
if (TREE_CODE (offset1) != TREE_CODE (offset2)
|| !BINARY_CLASS_P (offset1)
|| !BINARY_CLASS_P (offset2))
|| (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
return false;
res0 = vect_equal_offsets (TREE_OPERAND (offset1, 0),
TREE_OPERAND (offset2, 0));
res1 = vect_equal_offsets (TREE_OPERAND (offset1, 1),
TREE_OPERAND (offset2, 1));
res = vect_equal_offsets (TREE_OPERAND (offset1, 0),
TREE_OPERAND (offset2, 0));
return (res0 && res1);
if (!res || !BINARY_CLASS_P (offset1))
return res;
res = vect_equal_offsets (TREE_OPERAND (offset1, 1),
TREE_OPERAND (offset2, 1));
return res;
}
......
......@@ -1961,7 +1961,8 @@ vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
use TYPE_VECTOR_SUBPARTS. */
static tree
vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters,
tree *wide_prolog_niters)
{
struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
......@@ -2045,6 +2046,19 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
add_referenced_var (var);
stmts = NULL;
iters_name = force_gimple_operand (iters, &stmts, false, var);
if (types_compatible_p (sizetype, niters_type))
*wide_prolog_niters = iters_name;
else
{
gimple_seq seq = NULL;
tree wide_iters = fold_convert (sizetype, iters);
var = create_tmp_var (sizetype, "prolog_loop_niters");
add_referenced_var (var);
*wide_prolog_niters = force_gimple_operand (wide_iters, &seq, false,
var);
if (seq)
gimple_seq_add_seq (&stmts, seq);
}
/* Insert stmt on loop preheader edge. */
if (stmts)
......@@ -2115,6 +2129,7 @@ vect_do_peeling_for_alignment (loop_vec_info loop_vinfo)
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
tree niters_of_prolog_loop, ni_name;
tree n_iters;
tree wide_prolog_niters;
struct loop *new_loop;
unsigned int th = 0;
int min_profitable_iters;
......@@ -2125,7 +2140,8 @@ vect_do_peeling_for_alignment (loop_vec_info loop_vinfo)
initialize_original_copy_tables ();
ni_name = vect_build_loop_niters (loop_vinfo, NULL);
niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name,
&wide_prolog_niters);
/* Get profitability threshold for vectorized loop. */
......@@ -2150,7 +2166,7 @@ vect_do_peeling_for_alignment (loop_vec_info loop_vinfo)
TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
/* Update the init conditions of the access functions of all data refs. */
vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
vect_update_inits_of_drs (loop_vinfo, wide_prolog_niters);
/* After peeling we have to reset scalar evolution analyzer. */
scev_reset ();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment