Commit bb642979 by Richard Sandiford Committed by Richard Sandiford

Add DR_BASE_ALIGNMENT and DR_BASE_MISALIGNMENT

This patch records the base alignment and misalignment in
innermost_loop_behavior, to avoid the second-guessing that was
previously done in vect_compute_data_ref_alignment.  It also makes
vect_analyze_data_refs use dr_analyze_innermost, instead of having an
almost-copy of the same code.

I wasn't sure whether the alignments should be measured in bits
(for consistency with most other interfaces) or in bytes (for consistency
with DR_ALIGNED_TO, now DR_OFFSET_ALIGNMENT, and with *_ptr_info_alignment).
I went for bytes because:

- I think in practice most consumers are going to want bytes.
  E.g. using bytes avoids having to mix TYPE_ALIGN and TYPE_ALIGN_UNIT
  in vect_compute_data_ref_alignment.

- It means that any bit-level paranoia is dealt with when building
  the innermost_loop_behavior and doesn't get pushed down to consumers.

2017-07-03  Richard Sandiford  <richard.sandiford@linaro.org>

gcc/
	* tree-data-ref.h (innermost_loop_behavior): Add base_alignment
	and base_misalignment fields.
	(DR_BASE_ALIGNMENT, DR_BASE_MISALIGNMENT): New macros.
	* tree-data-ref.c: Include builtins.h.
	(dr_analyze_innermost): Set up the new innmost_loop_behavior fields.
	* tree-vectorizer.h (STMT_VINFO_DR_BASE_ALIGNMENT): New macro.
	(STMT_VINFO_DR_BASE_MISALIGNMENT): Likewise.
	* tree-vect-data-refs.c: Include tree-cfg.h.
	(vect_compute_data_ref_alignment): Use the new innermost_loop_behavior
	fields instead of calculating an alignment here.
	(vect_analyze_data_refs): Use dr_analyze_innermost.  Dump the new
	innermost_loop_behavior fields.

From-SVN: r249916
parent 832b4117
2017-07-03 Richard Sandiford <richard.sandiford@linaro.org>
* tree-data-ref.h (innermost_loop_behavior): Add base_alignment
and base_misalignment fields.
(DR_BASE_ALIGNMENT, DR_BASE_MISALIGNMENT): New macros.
* tree-data-ref.c: Include builtins.h.
(dr_analyze_innermost): Set up the new innmost_loop_behavior fields.
* tree-vectorizer.h (STMT_VINFO_DR_BASE_ALIGNMENT): New macro.
(STMT_VINFO_DR_BASE_MISALIGNMENT): Likewise.
* tree-vect-data-refs.c: Include tree-cfg.h.
(vect_compute_data_ref_alignment): Use the new innermost_loop_behavior
fields instead of calculating an alignment here.
(vect_analyze_data_refs): Use dr_analyze_innermost. Dump the new
innermost_loop_behavior fields.
2017-07-03 Richard Sandiford <richard.sandiford@linaro.org>
* tree-data-ref.h (innermost_loop_behavior): Add a step_alignment
field.
(DR_STEP_ALIGNMENT): New macro.
......
......@@ -94,6 +94,7 @@ along with GCC; see the file COPYING3. If not see
#include "dumpfile.h"
#include "tree-affine.h"
#include "params.h"
#include "builtins.h"
static struct datadep_stats
{
......@@ -802,11 +803,26 @@ dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
return false;
}
/* Calculate the alignment and misalignment for the inner reference. */
unsigned int HOST_WIDE_INT base_misalignment;
unsigned int base_alignment;
get_object_alignment_1 (base, &base_alignment, &base_misalignment);
/* There are no bitfield references remaining in BASE, so the values
we got back must be whole bytes. */
gcc_assert (base_alignment % BITS_PER_UNIT == 0
&& base_misalignment % BITS_PER_UNIT == 0);
base_alignment /= BITS_PER_UNIT;
base_misalignment /= BITS_PER_UNIT;
if (TREE_CODE (base) == MEM_REF)
{
if (!integer_zerop (TREE_OPERAND (base, 1)))
{
/* Subtract MOFF from the base and add it to POFFSET instead.
Adjust the misalignment to reflect the amount we subtracted. */
offset_int moff = mem_ref_offset (base);
base_misalignment -= moff.to_short_addr ();
tree mofft = wide_int_to_tree (sizetype, moff);
if (!poffset)
poffset = mofft;
......@@ -855,20 +871,46 @@ dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
}
init = ssize_int (pbitpos / BITS_PER_UNIT);
/* Subtract any constant component from the base and add it to INIT instead.
Adjust the misalignment to reflect the amount we subtracted. */
split_constant_offset (base_iv.base, &base_iv.base, &dinit);
init = size_binop (PLUS_EXPR, init, dinit);
init = size_binop (PLUS_EXPR, init, dinit);
base_misalignment -= TREE_INT_CST_LOW (dinit);
split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
init = size_binop (PLUS_EXPR, init, dinit);
init = size_binop (PLUS_EXPR, init, dinit);
step = size_binop (PLUS_EXPR,
fold_convert (ssizetype, base_iv.step),
fold_convert (ssizetype, offset_iv.step));
drb->base_address = canonicalize_base_object_address (base_iv.base);
base = canonicalize_base_object_address (base_iv.base);
/* See if get_pointer_alignment can guarantee a higher alignment than
the one we calculated above. */
unsigned int HOST_WIDE_INT alt_misalignment;
unsigned int alt_alignment;
get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment);
/* As above, these values must be whole bytes. */
gcc_assert (alt_alignment % BITS_PER_UNIT == 0
&& alt_misalignment % BITS_PER_UNIT == 0);
alt_alignment /= BITS_PER_UNIT;
alt_misalignment /= BITS_PER_UNIT;
if (base_alignment < alt_alignment)
{
base_alignment = alt_alignment;
base_misalignment = alt_misalignment;
}
drb->base_address = base;
drb->offset = fold_convert (ssizetype, offset_iv.base);
drb->init = init;
drb->step = step;
drb->base_alignment = base_alignment;
drb->base_misalignment = base_misalignment & (base_alignment - 1);
drb->offset_alignment = highest_pow2_factor (offset_iv.base);
drb->step_alignment = highest_pow2_factor (step);
......@@ -1085,6 +1127,9 @@ create_data_ref (loop_p nest, loop_p loop, tree memref, gimple *stmt,
print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
fprintf (dump_file, "\n\tstep: ");
print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr));
fprintf (dump_file, "\n\tbase misalignment: %d",
DR_BASE_MISALIGNMENT (dr));
fprintf (dump_file, "\n\toffset alignment: %d",
DR_OFFSET_ALIGNMENT (dr));
fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr));
......
......@@ -52,6 +52,42 @@ struct innermost_loop_behavior
tree init;
tree step;
/* BASE_ADDRESS is known to be misaligned by BASE_MISALIGNMENT bytes
from an alignment boundary of BASE_ALIGNMENT bytes. For example,
if we had:
struct S __attribute__((aligned(16))) { ... };
char *ptr;
... *(struct S *) (ptr - 4) ...;
the information would be:
base_address: ptr
base_aligment: 16
base_misalignment: 4
init: -4
where init cancels the base misalignment. If instead we had a
reference to a particular field:
struct S __attribute__((aligned(16))) { ... int f; ... };
char *ptr;
... ((struct S *) (ptr - 4))->f ...;
the information would be:
base_address: ptr
base_aligment: 16
base_misalignment: 4
init: -4 + offsetof (S, f)
where base_address + init might also be misaligned, and by a different
amount from base_address. */
unsigned int base_alignment;
unsigned int base_misalignment;
/* The largest power of two that divides OFFSET, capped to a suitably
high value if the offset is zero. This is a byte rather than a bit
quantity. */
......@@ -147,6 +183,8 @@ struct data_reference
#define DR_INIT(DR) (DR)->innermost.init
#define DR_STEP(DR) (DR)->innermost.step
#define DR_PTR_INFO(DR) (DR)->alias.ptr_info
#define DR_BASE_ALIGNMENT(DR) (DR)->innermost.base_alignment
#define DR_BASE_MISALIGNMENT(DR) (DR)->innermost.base_misalignment
#define DR_OFFSET_ALIGNMENT(DR) (DR)->innermost.offset_alignment
#define DR_STEP_ALIGNMENT(DR) (DR)->innermost.step_alignment
#define DR_INNERMOST(DR) (DR)->innermost
......
......@@ -50,6 +50,7 @@ along with GCC; see the file COPYING3. If not see
#include "expr.h"
#include "builtins.h"
#include "params.h"
#include "tree-cfg.h"
/* Return true if load- or store-lanes optab OPTAB is implemented for
COUNT vectors of type VECTYPE. NAME is the name of OPTAB. */
......@@ -667,8 +668,6 @@ vect_compute_data_ref_alignment (struct data_reference *dr)
struct loop *loop = NULL;
tree ref = DR_REF (dr);
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
tree base;
unsigned HOST_WIDE_INT alignment;
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
......@@ -728,48 +727,18 @@ vect_compute_data_ref_alignment (struct data_reference *dr)
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"step doesn't divide the vector-size.\n");
}
tree base_addr = drb->base_address;
/* To look at alignment of the base we have to preserve an inner MEM_REF
as that carries alignment information of the actual access. */
base = ref;
while (handled_component_p (base))
base = TREE_OPERAND (base, 0);
unsigned int base_alignment = 0;
unsigned HOST_WIDE_INT base_bitpos;
get_object_alignment_1 (base, &base_alignment, &base_bitpos);
/* As data-ref analysis strips the MEM_REF down to its base operand
to form DR_BASE_ADDRESS and adds the offset to DR_INIT we have to
adjust things to make base_alignment valid as the alignment of
DR_BASE_ADDRESS. */
if (TREE_CODE (base) == MEM_REF)
{
/* Note all this only works if DR_BASE_ADDRESS is the same as
MEM_REF operand zero, otherwise DR/SCEV analysis might have factored
in other offsets. We need to rework DR to compute the alingment
of DR_BASE_ADDRESS as long as all information is still available. */
if (operand_equal_p (TREE_OPERAND (base, 0), base_addr, 0))
{
base_bitpos -= mem_ref_offset (base).to_short_addr () * BITS_PER_UNIT;
base_bitpos &= (base_alignment - 1);
}
else
base_bitpos = BITS_PER_UNIT;
}
if (base_bitpos != 0)
base_alignment = base_bitpos & -base_bitpos;
/* Also look at the alignment of the base address DR analysis
computed. */
unsigned int base_addr_alignment = get_pointer_alignment (base_addr);
if (base_addr_alignment > base_alignment)
base_alignment = base_addr_alignment;
if (base_alignment >= TYPE_ALIGN (TREE_TYPE (vectype)))
DR_VECT_AUX (dr)->base_element_aligned = true;
alignment = TYPE_ALIGN_UNIT (vectype);
unsigned int base_alignment = drb->base_alignment;
unsigned int base_misalignment = drb->base_misalignment;
unsigned HOST_WIDE_INT vector_alignment = TYPE_ALIGN_UNIT (vectype);
unsigned HOST_WIDE_INT element_alignment
= TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
if (base_alignment >= element_alignment
&& (base_misalignment & (element_alignment - 1)) == 0)
DR_VECT_AUX (dr)->base_element_aligned = true;
if (drb->offset_alignment < alignment
if (drb->offset_alignment < vector_alignment
|| !step_preserves_misalignment_p
/* We need to know whether the step wrt the vectorized loop is
negative when computing the starting misalignment below. */
......@@ -785,12 +754,13 @@ vect_compute_data_ref_alignment (struct data_reference *dr)
return true;
}
if (base_alignment < TYPE_ALIGN (vectype))
if (base_alignment < vector_alignment)
{
base = base_addr;
tree base = drb->base_address;
if (TREE_CODE (base) == ADDR_EXPR)
base = TREE_OPERAND (base, 0);
if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
if (!vect_can_force_dr_alignment_p (base,
vector_alignment * BITS_PER_UNIT))
{
if (dump_enabled_p ())
{
......@@ -828,24 +798,20 @@ vect_compute_data_ref_alignment (struct data_reference *dr)
DR_VECT_AUX (dr)->base_decl = base;
DR_VECT_AUX (dr)->base_misaligned = true;
DR_VECT_AUX (dr)->base_element_aligned = true;
base_misalignment = 0;
}
unsigned int misalignment = (base_misalignment
+ TREE_INT_CST_LOW (drb->init));
/* If this is a backward running DR then first access in the larger
vectype actually is N-1 elements before the address in the DR.
Adjust misalign accordingly. */
tree misalign = drb->init;
if (tree_int_cst_sgn (drb->step) < 0)
{
tree offset = ssize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
/* DR_STEP(dr) is the same as -TYPE_SIZE of the scalar type,
otherwise we wouldn't be here. */
offset = fold_build2 (MULT_EXPR, ssizetype, offset, drb->step);
/* PLUS because STEP was negative. */
misalign = size_binop (PLUS_EXPR, misalign, offset);
}
/* PLUS because STEP is negative. */
misalignment += ((TYPE_VECTOR_SUBPARTS (vectype) - 1)
* TREE_INT_CST_LOW (drb->step));
SET_DR_MISALIGNMENT (dr,
wi::mod_floor (misalign, alignment, SIGNED).to_uhwi ());
SET_DR_MISALIGNMENT (dr, misalignment & (vector_alignment - 1));
if (dump_enabled_p ())
{
......@@ -3554,100 +3520,27 @@ again:
the outer-loop. */
if (loop && nested_in_vect_loop_p (loop, stmt))
{
tree outer_step, outer_base, outer_init;
HOST_WIDE_INT pbitsize, pbitpos;
tree poffset;
machine_mode pmode;
int punsignedp, preversep, pvolatilep;
affine_iv base_iv, offset_iv;
tree dinit;
/* Build a reference to the first location accessed by the
inner-loop: *(BASE+INIT). (The first location is actually
BASE+INIT+OFFSET, but we add OFFSET separately later). */
tree inner_base = build_fold_indirect_ref
(fold_build_pointer_plus (base, init));
inner loop: *(BASE + INIT + OFFSET). By construction,
this address must be invariant in the inner loop, so we
can consider it as being used in the outer loop. */
tree init_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset),
init, offset);
tree init_addr = fold_build_pointer_plus (base, init_offset);
tree init_ref = build_fold_indirect_ref (init_addr);
if (dump_enabled_p ())
{
dump_printf_loc (MSG_NOTE, vect_location,
"analyze in outer-loop: ");
dump_generic_expr (MSG_NOTE, TDF_SLIM, inner_base);
"analyze in outer loop: ");
dump_generic_expr (MSG_NOTE, TDF_SLIM, init_ref);
dump_printf (MSG_NOTE, "\n");
}
outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
&poffset, &pmode, &punsignedp,
&preversep, &pvolatilep);
gcc_assert (outer_base != NULL_TREE);
if (pbitpos % BITS_PER_UNIT != 0)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"failed: bit offset alignment.\n");
return false;
}
if (preversep)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"failed: reverse storage order.\n");
return false;
}
outer_base = build_fold_addr_expr (outer_base);
if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
&base_iv, false))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"failed: evolution of base is not affine.\n");
return false;
}
if (offset)
{
if (poffset)
poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
poffset);
else
poffset = offset;
}
if (!poffset)
{
offset_iv.base = ssize_int (0);
offset_iv.step = ssize_int (0);
}
else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
&offset_iv, false))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"evolution of offset is not affine.\n");
return false;
}
outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
split_constant_offset (base_iv.base, &base_iv.base, &dinit);
outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
outer_step = size_binop (PLUS_EXPR,
fold_convert (ssizetype, base_iv.step),
fold_convert (ssizetype, offset_iv.step));
STMT_VINFO_DR_STEP (stmt_info) = outer_step;
/* FIXME: Use canonicalize_base_object_address (base_iv.base); */
STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
STMT_VINFO_DR_INIT (stmt_info) = outer_init;
STMT_VINFO_DR_OFFSET (stmt_info) =
fold_convert (ssizetype, offset_iv.base);
STMT_VINFO_DR_OFFSET_ALIGNMENT (stmt_info)
= highest_pow2_factor (offset_iv.base);
if (!dr_analyze_innermost (&STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info),
init_ref, loop))
/* dr_analyze_innermost already explained the failure. */
return false;
if (dump_enabled_p ())
{
......@@ -3665,6 +3558,10 @@ again:
dump_printf (MSG_NOTE, "\n\touter step: ");
dump_generic_expr (MSG_NOTE, TDF_SLIM,
STMT_VINFO_DR_STEP (stmt_info));
dump_printf (MSG_NOTE, "\n\touter base alignment: %d\n",
STMT_VINFO_DR_BASE_ALIGNMENT (stmt_info));
dump_printf (MSG_NOTE, "\n\touter base misalignment: %d\n",
STMT_VINFO_DR_BASE_MISALIGNMENT (stmt_info));
dump_printf (MSG_NOTE, "\n\touter offset alignment: %d\n",
STMT_VINFO_DR_OFFSET_ALIGNMENT (stmt_info));
dump_printf (MSG_NOTE, "\n\touter step alignment: %d\n",
......
......@@ -709,6 +709,9 @@ STMT_VINFO_BB_VINFO (stmt_vec_info stmt_vinfo)
#define STMT_VINFO_DR_INIT(S) (S)->dr_wrt_vec_loop.init
#define STMT_VINFO_DR_OFFSET(S) (S)->dr_wrt_vec_loop.offset
#define STMT_VINFO_DR_STEP(S) (S)->dr_wrt_vec_loop.step
#define STMT_VINFO_DR_BASE_ALIGNMENT(S) (S)->dr_wrt_vec_loop.base_alignment
#define STMT_VINFO_DR_BASE_MISALIGNMENT(S) \
(S)->dr_wrt_vec_loop.base_misalignment
#define STMT_VINFO_DR_OFFSET_ALIGNMENT(S) \
(S)->dr_wrt_vec_loop.offset_alignment
#define STMT_VINFO_DR_STEP_ALIGNMENT(S) \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment