Commit 2f7d18dd by Charles Baylis Committed by Charles Baylis

[ARM] PR63870 Add qualifiers for NEON builtins

2015-11-11  Charles Baylis  <charles.baylis@linaro.org>

	PR target/63870
	* config/arm/arm-builtins.c (enum arm_type_qualifiers): New enumerator
	qualifier_struct_load_store_lane_index.
	(builtin_arg): New enumerator NEON_ARG_STRUCT_LOAD_STORE_LANE_INDEX.
	(arm_expand_neon_args): New parameter. Remove ellipsis. Handle NEON
	argument qualifiers.
	(arm_expand_neon_builtin): Handle new NEON argument qualifier.
	* config/arm/arm.h (NEON_ENDIAN_LANE_N): New macro.

From-SVN: r230142
parent 493b929a
2015-11-11 Charles Baylis <charles.baylis@linaro.org>
PR target/63870
* config/arm/arm-builtins.c (enum arm_type_qualifiers): New enumerator
qualifier_struct_load_store_lane_index.
(builtin_arg): New enumerator NEON_ARG_STRUCT_LOAD_STORE_LANE_INDEX.
(arm_expand_neon_args): New parameter. Remove ellipsis. Handle NEON
argument qualifiers.
(arm_expand_neon_builtin): Handle new NEON argument qualifier.
* config/arm/arm.h (NEON_ENDIAN_LANE_N): New macro.
2015-11-10 Nathan Sidwell <nathan@codesourcery.com> 2015-11-10 Nathan Sidwell <nathan@codesourcery.com>
* config/nvptx/nvptx.opt (moptimize): New flag. * config/nvptx/nvptx.opt (moptimize): New flag.
...@@ -67,7 +67,9 @@ enum arm_type_qualifiers ...@@ -67,7 +67,9 @@ enum arm_type_qualifiers
/* Polynomial types. */ /* Polynomial types. */
qualifier_poly = 0x100, qualifier_poly = 0x100,
/* Lane indices - must be within range of previous argument = a vector. */ /* Lane indices - must be within range of previous argument = a vector. */
qualifier_lane_index = 0x200 qualifier_lane_index = 0x200,
/* Lane indices for single lane structure loads and stores. */
qualifier_struct_load_store_lane_index = 0x400
}; };
/* The qualifier_internal allows generation of a unary builtin from /* The qualifier_internal allows generation of a unary builtin from
...@@ -1963,6 +1965,7 @@ typedef enum { ...@@ -1963,6 +1965,7 @@ typedef enum {
NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
NEON_ARG_LANE_INDEX, NEON_ARG_LANE_INDEX,
NEON_ARG_STRUCT_LOAD_STORE_LANE_INDEX,
NEON_ARG_MEMORY, NEON_ARG_MEMORY,
NEON_ARG_STOP NEON_ARG_STOP
} builtin_arg; } builtin_arg;
...@@ -2020,9 +2023,9 @@ neon_dereference_pointer (tree exp, tree type, machine_mode mem_mode, ...@@ -2020,9 +2023,9 @@ neon_dereference_pointer (tree exp, tree type, machine_mode mem_mode,
/* Expand a Neon builtin. */ /* Expand a Neon builtin. */
static rtx static rtx
arm_expand_neon_args (rtx target, machine_mode map_mode, int fcode, arm_expand_neon_args (rtx target, machine_mode map_mode, int fcode,
int icode, int have_retval, tree exp, ...) int icode, int have_retval, tree exp,
builtin_arg *args)
{ {
va_list ap;
rtx pat; rtx pat;
tree arg[SIMD_MAX_BUILTIN_ARGS]; tree arg[SIMD_MAX_BUILTIN_ARGS];
rtx op[SIMD_MAX_BUILTIN_ARGS]; rtx op[SIMD_MAX_BUILTIN_ARGS];
...@@ -2037,13 +2040,11 @@ arm_expand_neon_args (rtx target, machine_mode map_mode, int fcode, ...@@ -2037,13 +2040,11 @@ arm_expand_neon_args (rtx target, machine_mode map_mode, int fcode,
|| !(*insn_data[icode].operand[0].predicate) (target, tmode))) || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
target = gen_reg_rtx (tmode); target = gen_reg_rtx (tmode);
va_start (ap, exp);
formals = TYPE_ARG_TYPES (TREE_TYPE (arm_builtin_decls[fcode])); formals = TYPE_ARG_TYPES (TREE_TYPE (arm_builtin_decls[fcode]));
for (;;) for (;;)
{ {
builtin_arg thisarg = (builtin_arg) va_arg (ap, int); builtin_arg thisarg = args[argc];
if (thisarg == NEON_ARG_STOP) if (thisarg == NEON_ARG_STOP)
break; break;
...@@ -2079,6 +2080,18 @@ arm_expand_neon_args (rtx target, machine_mode map_mode, int fcode, ...@@ -2079,6 +2080,18 @@ arm_expand_neon_args (rtx target, machine_mode map_mode, int fcode,
op[argc] = copy_to_mode_reg (mode[argc], op[argc]); op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
break; break;
case NEON_ARG_STRUCT_LOAD_STORE_LANE_INDEX:
gcc_assert (argc > 1);
if (CONST_INT_P (op[argc]))
{
neon_lane_bounds (op[argc], 0,
GET_MODE_NUNITS (map_mode), exp);
/* Keep to GCC-vector-extension lane indices in the RTL. */
op[argc] =
GEN_INT (NEON_ENDIAN_LANE_N (map_mode, INTVAL (op[argc])));
}
goto constant_arg;
case NEON_ARG_LANE_INDEX: case NEON_ARG_LANE_INDEX:
/* Previous argument must be a vector, which this indexes. */ /* Previous argument must be a vector, which this indexes. */
gcc_assert (argc > 0); gcc_assert (argc > 0);
...@@ -2089,19 +2102,22 @@ arm_expand_neon_args (rtx target, machine_mode map_mode, int fcode, ...@@ -2089,19 +2102,22 @@ arm_expand_neon_args (rtx target, machine_mode map_mode, int fcode,
} }
/* Fall through - if the lane index isn't a constant then /* Fall through - if the lane index isn't a constant then
the next case will error. */ the next case will error. */
case NEON_ARG_CONSTANT: case NEON_ARG_CONSTANT:
constant_arg:
if (!(*insn_data[icode].operand[opno].predicate) if (!(*insn_data[icode].operand[opno].predicate)
(op[argc], mode[argc])) (op[argc], mode[argc]))
error_at (EXPR_LOCATION (exp), "incompatible type for argument %d, " {
"expected %<const int%>", argc + 1); error ("%Kargument %d must be a constant immediate",
exp, argc + 1);
return const0_rtx;
}
break; break;
case NEON_ARG_MEMORY: case NEON_ARG_MEMORY:
/* Check if expand failed. */ /* Check if expand failed. */
if (op[argc] == const0_rtx) if (op[argc] == const0_rtx)
{
va_end (ap);
return 0; return 0;
}
gcc_assert (MEM_P (op[argc])); gcc_assert (MEM_P (op[argc]));
PUT_MODE (op[argc], mode[argc]); PUT_MODE (op[argc], mode[argc]);
/* ??? arm_neon.h uses the same built-in functions for signed /* ??? arm_neon.h uses the same built-in functions for signed
...@@ -2122,8 +2138,6 @@ arm_expand_neon_args (rtx target, machine_mode map_mode, int fcode, ...@@ -2122,8 +2138,6 @@ arm_expand_neon_args (rtx target, machine_mode map_mode, int fcode,
} }
} }
va_end (ap);
if (have_retval) if (have_retval)
switch (argc) switch (argc)
{ {
...@@ -2235,6 +2249,8 @@ arm_expand_neon_builtin (int fcode, tree exp, rtx target) ...@@ -2235,6 +2249,8 @@ arm_expand_neon_builtin (int fcode, tree exp, rtx target)
if (d->qualifiers[qualifiers_k] & qualifier_lane_index) if (d->qualifiers[qualifiers_k] & qualifier_lane_index)
args[k] = NEON_ARG_LANE_INDEX; args[k] = NEON_ARG_LANE_INDEX;
else if (d->qualifiers[qualifiers_k] & qualifier_struct_load_store_lane_index)
args[k] = NEON_ARG_STRUCT_LOAD_STORE_LANE_INDEX;
else if (d->qualifiers[qualifiers_k] & qualifier_immediate) else if (d->qualifiers[qualifiers_k] & qualifier_immediate)
args[k] = NEON_ARG_CONSTANT; args[k] = NEON_ARG_CONSTANT;
else if (d->qualifiers[qualifiers_k] & qualifier_maybe_immediate) else if (d->qualifiers[qualifiers_k] & qualifier_maybe_immediate)
...@@ -2260,11 +2276,7 @@ arm_expand_neon_builtin (int fcode, tree exp, rtx target) ...@@ -2260,11 +2276,7 @@ arm_expand_neon_builtin (int fcode, tree exp, rtx target)
the function is void, and a 1 if it is not. */ the function is void, and a 1 if it is not. */
return arm_expand_neon_args return arm_expand_neon_args
(target, d->mode, fcode, icode, !is_void, exp, (target, d->mode, fcode, icode, !is_void, exp,
args[1], &args[1]);
args[2],
args[3],
args[4],
NEON_ARG_STOP);
} }
/* Expand an expression EXP that calls a built-in function, /* Expand an expression EXP that calls a built-in function,
......
...@@ -30103,4 +30103,5 @@ arm_sched_fusion_priority (rtx_insn *insn, int max_pri, ...@@ -30103,4 +30103,5 @@ arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
*pri = tmp; *pri = tmp;
return; return;
} }
#include "gt-arm.h" #include "gt-arm.h"
...@@ -284,6 +284,12 @@ extern void (*arm_lang_output_object_attributes_hook)(void); ...@@ -284,6 +284,12 @@ extern void (*arm_lang_output_object_attributes_hook)(void);
#define TARGET_BPABI false #define TARGET_BPABI false
#endif #endif
/* Transform lane numbers on big endian targets. This is used to allow for the
endianness difference between NEON architectural lane numbers and those
used in RTL */
#define NEON_ENDIAN_LANE_N(mode, n) \
(BYTES_BIG_ENDIAN ? GET_MODE_NUNITS (mode) - 1 - n : n)
/* Support for a compile-time default CPU, et cetera. The rules are: /* Support for a compile-time default CPU, et cetera. The rules are:
--with-arch is ignored if -march or -mcpu are specified. --with-arch is ignored if -march or -mcpu are specified.
--with-cpu is ignored if -march or -mcpu are specified, and is overridden --with-cpu is ignored if -march or -mcpu are specified, and is overridden
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment