Commit 550a3380 by Richard Sandiford Committed by Richard Sandiford

[AArch64] Add partial SVE vector modes

This patch adds extra vector modes that represent a half, quarter or
eighth of what an SVE vector can hold.  This is useful for describing
the memory vector involved in an extending load or truncating store.
It might also be useful in future for representing "unpacked" SVE
registers, i.e. registers that contain values in the low bits of a
wider containing element.

The new modes could have the same width as an Advanced SIMD mode for
certain -msve-vector-bits=N options, so we need to ensure that they
come later in the mode list and that Advanced SIMD modes always "win".

2019-10-16  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
	* genmodes.c (mode_data::order): New field.
	(blank_mode): Update accordingly.
	(VECTOR_MODES_WITH_PREFIX): Add an order parameter.
	(make_vector_modes): Likewise.
	(VECTOR_MODES): Update use accordingly.
	(cmp_modes): Sort by the new order field ahead of sorting by size.
	* config/aarch64/aarch64-modes.def (VNx2QI, VN2xHI, VNx2SI)
	(VNx4QI, VNx4HI, VNx8QI): New partial vector modes.
	* config/aarch64/aarch64.c (VEC_PARTIAL): New flag value.
	(aarch64_classify_vector_mode): Handle the new partial modes.
	(aarch64_vl_bytes): New function.
	(aarch64_hard_regno_nregs): Use it instead of BYTES_PER_SVE_VECTOR
	when counting the number of registers in an SVE mode.
	(aarch64_class_max_nregs): Likewise.
	(aarch64_hard_regno_mode_ok): Don't allow partial vectors
	in registers yet.
	(aarch64_classify_address): Treat partial vectors analogously
	to full vectors.
	(aarch64_print_address_internal): Consolidate the printing of
	MUL VL addresses, using aarch64_vl_bytes as the number of
	bytes represented by "VL".
	(aarch64_vector_mode_supported_p): Reject partial vector modes.

From-SVN: r277062
parent 9b17a646
2019-10-16 Richard Sandiford <richard.sandiford@arm.com> 2019-10-16 Richard Sandiford <richard.sandiford@arm.com>
* genmodes.c (mode_data::order): New field.
(blank_mode): Update accordingly.
(VECTOR_MODES_WITH_PREFIX): Add an order parameter.
(make_vector_modes): Likewise.
(VECTOR_MODES): Update use accordingly.
(cmp_modes): Sort by the new order field ahead of sorting by size.
* config/aarch64/aarch64-modes.def (VNx2QI, VN2xHI, VNx2SI)
(VNx4QI, VNx4HI, VNx8QI): New partial vector modes.
* config/aarch64/aarch64.c (VEC_PARTIAL): New flag value.
(aarch64_classify_vector_mode): Handle the new partial modes.
(aarch64_vl_bytes): New function.
(aarch64_hard_regno_nregs): Use it instead of BYTES_PER_SVE_VECTOR
when counting the number of registers in an SVE mode.
(aarch64_class_max_nregs): Likewise.
(aarch64_hard_regno_mode_ok): Don't allow partial vectors
in registers yet.
(aarch64_classify_address): Treat partial vectors analogously
to full vectors.
(aarch64_print_address_internal): Consolidate the printing of
MUL VL addresses, using aarch64_vl_bytes as the number of
bytes represented by "VL".
(aarch64_vector_mode_supported_p): Reject partial vector modes.
2019-10-16 Richard Sandiford <richard.sandiford@arm.com>
* config/aarch64/aarch64.c (aarch64_layout_frame): Use is_constant * config/aarch64/aarch64.c (aarch64_layout_frame): Use is_constant
rather than known_lt when choosing frame layouts. rather than known_lt when choosing frame layouts.
......
...@@ -82,8 +82,8 @@ INT_MODE (XI, 64); ...@@ -82,8 +82,8 @@ INT_MODE (XI, 64);
strictly necessary to set the alignment here, since the default would strictly necessary to set the alignment here, since the default would
be clamped to BIGGEST_ALIGNMENT anyhow, but it seems clearer. */ be clamped to BIGGEST_ALIGNMENT anyhow, but it seems clearer. */
#define SVE_MODES(NVECS, VB, VH, VS, VD) \ #define SVE_MODES(NVECS, VB, VH, VS, VD) \
VECTOR_MODES_WITH_PREFIX (VNx, INT, 16 * NVECS); \ VECTOR_MODES_WITH_PREFIX (VNx, INT, 16 * NVECS, 0); \
VECTOR_MODES_WITH_PREFIX (VNx, FLOAT, 16 * NVECS); \ VECTOR_MODES_WITH_PREFIX (VNx, FLOAT, 16 * NVECS, 0); \
\ \
ADJUST_NUNITS (VB##QI, aarch64_sve_vg * NVECS * 8); \ ADJUST_NUNITS (VB##QI, aarch64_sve_vg * NVECS * 8); \
ADJUST_NUNITS (VH##HI, aarch64_sve_vg * NVECS * 4); \ ADJUST_NUNITS (VH##HI, aarch64_sve_vg * NVECS * 4); \
...@@ -108,6 +108,40 @@ SVE_MODES (2, VNx32, VNx16, VNx8, VNx4) ...@@ -108,6 +108,40 @@ SVE_MODES (2, VNx32, VNx16, VNx8, VNx4)
SVE_MODES (3, VNx48, VNx24, VNx12, VNx6) SVE_MODES (3, VNx48, VNx24, VNx12, VNx6)
SVE_MODES (4, VNx64, VNx32, VNx16, VNx8) SVE_MODES (4, VNx64, VNx32, VNx16, VNx8)
/* Partial SVE vectors:
VNx2QI VNx4QI VNx8QI
VNx2HI VNx4HI
VNx2SI
In memory they occupy contiguous locations, in the same way as fixed-length
vectors. E.g. VNx8QImode is half the size of VNx16QImode.
Passing 1 as the final argument ensures that the modes come after all
other modes in the GET_MODE_WIDER chain, so that we never pick them
in preference to a full vector mode. */
VECTOR_MODES_WITH_PREFIX (VNx, INT, 2, 1);
VECTOR_MODES_WITH_PREFIX (VNx, INT, 4, 1);
VECTOR_MODES_WITH_PREFIX (VNx, INT, 8, 1);
ADJUST_NUNITS (VNx2QI, aarch64_sve_vg);
ADJUST_NUNITS (VNx2HI, aarch64_sve_vg);
ADJUST_NUNITS (VNx2SI, aarch64_sve_vg);
ADJUST_NUNITS (VNx4QI, aarch64_sve_vg * 2);
ADJUST_NUNITS (VNx4HI, aarch64_sve_vg * 2);
ADJUST_NUNITS (VNx8QI, aarch64_sve_vg * 4);
ADJUST_ALIGNMENT (VNx2QI, 1);
ADJUST_ALIGNMENT (VNx4QI, 1);
ADJUST_ALIGNMENT (VNx8QI, 1);
ADJUST_ALIGNMENT (VNx2HI, 2);
ADJUST_ALIGNMENT (VNx4HI, 2);
ADJUST_ALIGNMENT (VNx2SI, 4);
/* Quad float: 128-bit floating mode for long doubles. */ /* Quad float: 128-bit floating mode for long doubles. */
FLOAT_MODE (TF, 16, ieee_quad_format); FLOAT_MODE (TF, 16, ieee_quad_format);
......
...@@ -1538,6 +1538,9 @@ const unsigned int VEC_SVE_PRED = 4; ...@@ -1538,6 +1538,9 @@ const unsigned int VEC_SVE_PRED = 4;
/* Can be used in combination with VEC_ADVSIMD or VEC_SVE_DATA to indicate /* Can be used in combination with VEC_ADVSIMD or VEC_SVE_DATA to indicate
a structure of 2, 3 or 4 vectors. */ a structure of 2, 3 or 4 vectors. */
const unsigned int VEC_STRUCT = 8; const unsigned int VEC_STRUCT = 8;
/* Can be used in combination with VEC_SVE_DATA to indicate that the
vector has fewer significant bytes than a full SVE vector. */
const unsigned int VEC_PARTIAL = 16;
/* Useful combinations of the above. */ /* Useful combinations of the above. */
const unsigned int VEC_ANY_SVE = VEC_SVE_DATA | VEC_SVE_PRED; const unsigned int VEC_ANY_SVE = VEC_SVE_DATA | VEC_SVE_PRED;
const unsigned int VEC_ANY_DATA = VEC_ADVSIMD | VEC_SVE_DATA; const unsigned int VEC_ANY_DATA = VEC_ADVSIMD | VEC_SVE_DATA;
...@@ -1558,7 +1561,17 @@ aarch64_classify_vector_mode (machine_mode mode) ...@@ -1558,7 +1561,17 @@ aarch64_classify_vector_mode (machine_mode mode)
of -msve-vector-bits. */ of -msve-vector-bits. */
switch (mode) switch (mode)
{ {
/* Single SVE vectors. */ /* Partial SVE QI vectors. */
case E_VNx2QImode:
case E_VNx4QImode:
case E_VNx8QImode:
/* Partial SVE HI vectors. */
case E_VNx2HImode:
case E_VNx4HImode:
/* Partial SVE SI vector. */
case E_VNx2SImode:
return TARGET_SVE ? VEC_SVE_DATA | VEC_PARTIAL : 0;
case E_VNx16QImode: case E_VNx16QImode:
case E_VNx8HImode: case E_VNx8HImode:
case E_VNx4SImode: case E_VNx4SImode:
...@@ -1641,6 +1654,24 @@ aarch64_sve_data_mode_p (machine_mode mode) ...@@ -1641,6 +1654,24 @@ aarch64_sve_data_mode_p (machine_mode mode)
return aarch64_classify_vector_mode (mode) & VEC_SVE_DATA; return aarch64_classify_vector_mode (mode) & VEC_SVE_DATA;
} }
/* Return the number of defined bytes in one constituent vector of
SVE mode MODE, which has vector flags VEC_FLAGS. */
static poly_int64
aarch64_vl_bytes (machine_mode mode, unsigned int vec_flags)
{
if (vec_flags & VEC_PARTIAL)
/* A single partial vector. */
return GET_MODE_SIZE (mode);
if (vec_flags & VEC_SVE_DATA)
/* A single vector or a tuple. */
return BYTES_PER_SVE_VECTOR;
/* A single predicate. */
gcc_assert (vec_flags & VEC_SVE_PRED);
return BYTES_PER_SVE_PRED;
}
/* Implement target hook TARGET_ARRAY_MODE. */ /* Implement target hook TARGET_ARRAY_MODE. */
static opt_machine_mode static opt_machine_mode
aarch64_array_mode (machine_mode mode, unsigned HOST_WIDE_INT nelems) aarch64_array_mode (machine_mode mode, unsigned HOST_WIDE_INT nelems)
...@@ -1769,10 +1800,13 @@ aarch64_hard_regno_nregs (unsigned regno, machine_mode mode) ...@@ -1769,10 +1800,13 @@ aarch64_hard_regno_nregs (unsigned regno, machine_mode mode)
case FP_REGS: case FP_REGS:
case FP_LO_REGS: case FP_LO_REGS:
case FP_LO8_REGS: case FP_LO8_REGS:
if (aarch64_sve_data_mode_p (mode)) {
return exact_div (GET_MODE_SIZE (mode), unsigned int vec_flags = aarch64_classify_vector_mode (mode);
BYTES_PER_SVE_VECTOR).to_constant (); if (vec_flags & VEC_SVE_DATA)
return CEIL (lowest_size, UNITS_PER_VREG); return exact_div (GET_MODE_SIZE (mode),
aarch64_vl_bytes (mode, vec_flags)).to_constant ();
return CEIL (lowest_size, UNITS_PER_VREG);
}
case PR_REGS: case PR_REGS:
case PR_LO_REGS: case PR_LO_REGS:
case PR_HI_REGS: case PR_HI_REGS:
...@@ -1796,6 +1830,11 @@ aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode) ...@@ -1796,6 +1830,11 @@ aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode)
return mode == DImode; return mode == DImode;
unsigned int vec_flags = aarch64_classify_vector_mode (mode); unsigned int vec_flags = aarch64_classify_vector_mode (mode);
/* At the moment, partial vector modes are only useful for memory
references, but that could change in future. */
if (vec_flags & VEC_PARTIAL)
return false;
if (vec_flags & VEC_SVE_PRED) if (vec_flags & VEC_SVE_PRED)
return PR_REGNUM_P (regno); return PR_REGNUM_P (regno);
...@@ -7441,9 +7480,15 @@ aarch64_classify_address (struct aarch64_address_info *info, ...@@ -7441,9 +7480,15 @@ aarch64_classify_address (struct aarch64_address_info *info,
HOST_WIDE_INT const_size; HOST_WIDE_INT const_size;
/* Whether a vector mode is partial doesn't affect address legitimacy.
Partial vectors like VNx8QImode allow the same indexed addressing
mode and MUL VL addressing mode as full vectors like VNx16QImode;
in both cases, MUL VL counts multiples of GET_MODE_SIZE. */
unsigned int vec_flags = aarch64_classify_vector_mode (mode);
vec_flags &= ~VEC_PARTIAL;
/* On BE, we use load/store pair for all large int mode load/stores. /* On BE, we use load/store pair for all large int mode load/stores.
TI/TFmode may also use a load/store pair. */ TI/TFmode may also use a load/store pair. */
unsigned int vec_flags = aarch64_classify_vector_mode (mode);
bool advsimd_struct_p = (vec_flags == (VEC_ADVSIMD | VEC_STRUCT)); bool advsimd_struct_p = (vec_flags == (VEC_ADVSIMD | VEC_STRUCT));
bool load_store_pair_p = (type == ADDR_QUERY_LDP_STP bool load_store_pair_p = (type == ADDR_QUERY_LDP_STP
|| type == ADDR_QUERY_LDP_STP_N || type == ADDR_QUERY_LDP_STP_N
...@@ -8948,7 +8993,7 @@ aarch64_print_address_internal (FILE *f, machine_mode mode, rtx x, ...@@ -8948,7 +8993,7 @@ aarch64_print_address_internal (FILE *f, machine_mode mode, rtx x,
aarch64_addr_query_type type) aarch64_addr_query_type type)
{ {
struct aarch64_address_info addr; struct aarch64_address_info addr;
unsigned int size; unsigned int size, vec_flags;
/* Check all addresses are Pmode - including ILP32. */ /* Check all addresses are Pmode - including ILP32. */
if (GET_MODE (x) != Pmode if (GET_MODE (x) != Pmode
...@@ -8964,26 +9009,24 @@ aarch64_print_address_internal (FILE *f, machine_mode mode, rtx x, ...@@ -8964,26 +9009,24 @@ aarch64_print_address_internal (FILE *f, machine_mode mode, rtx x,
{ {
case ADDRESS_REG_IMM: case ADDRESS_REG_IMM:
if (known_eq (addr.const_offset, 0)) if (known_eq (addr.const_offset, 0))
asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
else if (aarch64_sve_data_mode_p (mode))
{ {
HOST_WIDE_INT vnum asm_fprintf (f, "[%s]", reg_names[REGNO (addr.base)]);
= exact_div (addr.const_offset, return true;
BYTES_PER_SVE_VECTOR).to_constant ();
asm_fprintf (f, "[%s, #%wd, mul vl]",
reg_names[REGNO (addr.base)], vnum);
} }
else if (aarch64_sve_pred_mode_p (mode))
vec_flags = aarch64_classify_vector_mode (mode);
if (vec_flags & VEC_ANY_SVE)
{ {
HOST_WIDE_INT vnum HOST_WIDE_INT vnum
= exact_div (addr.const_offset, = exact_div (addr.const_offset,
BYTES_PER_SVE_PRED).to_constant (); aarch64_vl_bytes (mode, vec_flags)).to_constant ();
asm_fprintf (f, "[%s, #%wd, mul vl]", asm_fprintf (f, "[%s, #%wd, mul vl]",
reg_names[REGNO (addr.base)], vnum); reg_names[REGNO (addr.base)], vnum);
return true;
} }
else
asm_fprintf (f, "[%s, %wd]", reg_names [REGNO (addr.base)], asm_fprintf (f, "[%s, %wd]", reg_names[REGNO (addr.base)],
INTVAL (addr.offset)); INTVAL (addr.offset));
return true; return true;
case ADDRESS_REG_REG: case ADDRESS_REG_REG:
...@@ -9395,7 +9438,7 @@ aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode) ...@@ -9395,7 +9438,7 @@ aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode)
can hold MODE, but at the moment we need to handle all modes. can hold MODE, but at the moment we need to handle all modes.
Just ignore any runtime parts for registers that can't store them. */ Just ignore any runtime parts for registers that can't store them. */
HOST_WIDE_INT lowest_size = constant_lower_bound (GET_MODE_SIZE (mode)); HOST_WIDE_INT lowest_size = constant_lower_bound (GET_MODE_SIZE (mode));
unsigned int nregs; unsigned int nregs, vec_flags;
switch (regclass) switch (regclass)
{ {
case TAILCALL_ADDR_REGS: case TAILCALL_ADDR_REGS:
...@@ -9406,11 +9449,12 @@ aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode) ...@@ -9406,11 +9449,12 @@ aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode)
case FP_REGS: case FP_REGS:
case FP_LO_REGS: case FP_LO_REGS:
case FP_LO8_REGS: case FP_LO8_REGS:
if (aarch64_sve_data_mode_p (mode) vec_flags = aarch64_classify_vector_mode (mode);
if ((vec_flags & VEC_SVE_DATA)
&& constant_multiple_p (GET_MODE_SIZE (mode), && constant_multiple_p (GET_MODE_SIZE (mode),
BYTES_PER_SVE_VECTOR, &nregs)) aarch64_vl_bytes (mode, vec_flags), &nregs))
return nregs; return nregs;
return (aarch64_vector_data_mode_p (mode) return (vec_flags & VEC_ADVSIMD
? CEIL (lowest_size, UNITS_PER_VREG) ? CEIL (lowest_size, UNITS_PER_VREG)
: CEIL (lowest_size, UNITS_PER_WORD)); : CEIL (lowest_size, UNITS_PER_WORD));
case STACK_REG: case STACK_REG:
...@@ -15057,7 +15101,7 @@ static bool ...@@ -15057,7 +15101,7 @@ static bool
aarch64_vector_mode_supported_p (machine_mode mode) aarch64_vector_mode_supported_p (machine_mode mode)
{ {
unsigned int vec_flags = aarch64_classify_vector_mode (mode); unsigned int vec_flags = aarch64_classify_vector_mode (mode);
return vec_flags != 0 && (vec_flags & VEC_STRUCT) == 0; return vec_flags != 0 && (vec_flags & (VEC_STRUCT | VEC_PARTIAL)) == 0;
} }
/* Return the full-width SVE vector mode for element mode MODE, if one /* Return the full-width SVE vector mode for element mode MODE, if one
......
...@@ -53,6 +53,7 @@ struct mode_data ...@@ -53,6 +53,7 @@ struct mode_data
const char *name; /* printable mode name -- SI, not SImode */ const char *name; /* printable mode name -- SI, not SImode */
enum mode_class cl; /* this mode class */ enum mode_class cl; /* this mode class */
unsigned int order; /* top-level sorting order */
unsigned int precision; /* size in bits, equiv to TYPE_PRECISION */ unsigned int precision; /* size in bits, equiv to TYPE_PRECISION */
unsigned int bytesize; /* storage size in addressable units */ unsigned int bytesize; /* storage size in addressable units */
unsigned int ncomponents; /* number of subunits */ unsigned int ncomponents; /* number of subunits */
...@@ -85,7 +86,7 @@ static struct mode_data *void_mode; ...@@ -85,7 +86,7 @@ static struct mode_data *void_mode;
static const struct mode_data blank_mode = { static const struct mode_data blank_mode = {
0, "<unknown>", MAX_MODE_CLASS, 0, "<unknown>", MAX_MODE_CLASS,
-1U, -1U, -1U, -1U, 0, -1U, -1U, -1U, -1U,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
"<unknown>", 0, 0, 0, 0, false, false, 0 "<unknown>", 0, 0, 0, 0, false, false, 0
}; };
...@@ -484,14 +485,15 @@ make_complex_modes (enum mode_class cl, ...@@ -484,14 +485,15 @@ make_complex_modes (enum mode_class cl,
} }
} }
/* For all modes in class CL, construct vector modes of width /* For all modes in class CL, construct vector modes of width WIDTH,
WIDTH, having as many components as necessary. */ having as many components as necessary. ORDER is the sorting order
#define VECTOR_MODES_WITH_PREFIX(PREFIX, C, W) \ of the mode, with smaller numbers indicating a higher priority. */
make_vector_modes (MODE_##C, #PREFIX, W, __FILE__, __LINE__) #define VECTOR_MODES_WITH_PREFIX(PREFIX, C, W, ORDER) \
#define VECTOR_MODES(C, W) VECTOR_MODES_WITH_PREFIX (V, C, W) make_vector_modes (MODE_##C, #PREFIX, W, ORDER, __FILE__, __LINE__)
#define VECTOR_MODES(C, W) VECTOR_MODES_WITH_PREFIX (V, C, W, 0)
static void ATTRIBUTE_UNUSED static void ATTRIBUTE_UNUSED
make_vector_modes (enum mode_class cl, const char *prefix, unsigned int width, make_vector_modes (enum mode_class cl, const char *prefix, unsigned int width,
const char *file, unsigned int line) unsigned int order, const char *file, unsigned int line)
{ {
struct mode_data *m; struct mode_data *m;
struct mode_data *v; struct mode_data *v;
...@@ -530,6 +532,7 @@ make_vector_modes (enum mode_class cl, const char *prefix, unsigned int width, ...@@ -530,6 +532,7 @@ make_vector_modes (enum mode_class cl, const char *prefix, unsigned int width,
} }
v = new_mode (vclass, xstrdup (buf), file, line); v = new_mode (vclass, xstrdup (buf), file, line);
v->order = order;
v->component = m; v->component = m;
v->ncomponents = ncomponents; v->ncomponents = ncomponents;
} }
...@@ -832,6 +835,11 @@ cmp_modes (const void *a, const void *b) ...@@ -832,6 +835,11 @@ cmp_modes (const void *a, const void *b)
const struct mode_data *const m = *(const struct mode_data *const*)a; const struct mode_data *const m = *(const struct mode_data *const*)a;
const struct mode_data *const n = *(const struct mode_data *const*)b; const struct mode_data *const n = *(const struct mode_data *const*)b;
if (m->order > n->order)
return 1;
else if (m->order < n->order)
return -1;
if (m->bytesize > n->bytesize) if (m->bytesize > n->bytesize)
return 1; return 1;
else if (m->bytesize < n->bytesize) else if (m->bytesize < n->bytesize)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment