Commit 33b5a38c by Tamar Christina Committed by Tamar Christina

aarch64-builtins.c (aarch64_simd_expand_args): Use correct max nunits for endian swap.

2019-01-16  Tamar Christina  <tamar.christina@arm.com>

	* config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args): Use
	correct max nunits for endian swap.
	(aarch64_expand_fcmla_builtin): Correct subreg code.
	* config/aarch64/aarch64-simd.md (aarch64_fcmla_lane<rot><mode>,
	aarch64_fcmla_laneq<rot>v4hf, aarch64_fcmlaq_lane<rot><mode>): Correct
	lane endianness.

From-SVN: r267983
parent 4e7cbff5
2019-01-16 Tamar Christina <tamar.christina@arm.com>
* config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args): Use
correct max nunits for endian swap.
(aarch64_expand_fcmla_builtin): Correct subreg code.
* config/aarch64/aarch64-simd.md (aarch64_fcmla_lane<rot><mode>,
aarch64_fcmla_laneq<rot>v4hf, aarch64_fcmlaq_lane<rot><mode>): Correct
lane endianness.
2019-01-16 Uroš Bizjak <ubizjak@gmail.com> 2019-01-16 Uroš Bizjak <ubizjak@gmail.com>
* config/alpha/alpha.c (alpha_gimplify_va_arg): * config/alpha/alpha.c (alpha_gimplify_va_arg):
......
...@@ -1197,7 +1197,9 @@ aarch64_simd_expand_args (rtx target, int icode, int have_retval, ...@@ -1197,7 +1197,9 @@ aarch64_simd_expand_args (rtx target, int icode, int have_retval,
= GET_MODE_NUNITS (vmode).to_constant (); = GET_MODE_NUNITS (vmode).to_constant ();
aarch64_simd_lane_bounds (op[opc], 0, nunits / 2, exp); aarch64_simd_lane_bounds (op[opc], 0, nunits / 2, exp);
/* Keep to GCC-vector-extension lane indices in the RTL. */ /* Keep to GCC-vector-extension lane indices in the RTL. */
op[opc] = aarch64_endian_lane_rtx (vmode, INTVAL (op[opc])); int lane = INTVAL (op[opc]);
op[opc] = gen_int_mode (ENDIAN_LANE_N (nunits / 2, lane),
SImode);
} }
/* Fall through - if the lane index isn't a constant then /* Fall through - if the lane index isn't a constant then
the next case will error. */ the next case will error. */
...@@ -1443,14 +1445,12 @@ aarch64_expand_fcmla_builtin (tree exp, rtx target, int fcode) ...@@ -1443,14 +1445,12 @@ aarch64_expand_fcmla_builtin (tree exp, rtx target, int fcode)
int nunits = GET_MODE_NUNITS (quadmode).to_constant (); int nunits = GET_MODE_NUNITS (quadmode).to_constant ();
aarch64_simd_lane_bounds (lane_idx, 0, nunits / 2, exp); aarch64_simd_lane_bounds (lane_idx, 0, nunits / 2, exp);
/* Keep to GCC-vector-extension lane indices in the RTL. */
lane_idx = aarch64_endian_lane_rtx (quadmode, INTVAL (lane_idx));
/* Generate the correct register and mode. */ /* Generate the correct register and mode. */
int lane = INTVAL (lane_idx); int lane = INTVAL (lane_idx);
if (lane < nunits / 4) if (lane < nunits / 4)
op2 = simplify_gen_subreg (d->mode, op2, quadmode, 0); op2 = simplify_gen_subreg (d->mode, op2, quadmode,
subreg_lowpart_offset (d->mode, quadmode));
else else
{ {
/* Select the upper 64 bits, either a V2SF or V4HF, this however /* Select the upper 64 bits, either a V2SF or V4HF, this however
...@@ -1460,15 +1460,24 @@ aarch64_expand_fcmla_builtin (tree exp, rtx target, int fcode) ...@@ -1460,15 +1460,24 @@ aarch64_expand_fcmla_builtin (tree exp, rtx target, int fcode)
gen_highpart_mode generates code that isn't optimal. */ gen_highpart_mode generates code that isn't optimal. */
rtx temp1 = gen_reg_rtx (d->mode); rtx temp1 = gen_reg_rtx (d->mode);
rtx temp2 = gen_reg_rtx (DImode); rtx temp2 = gen_reg_rtx (DImode);
temp1 = simplify_gen_subreg (d->mode, op2, quadmode, 0); temp1 = simplify_gen_subreg (d->mode, op2, quadmode,
subreg_lowpart_offset (d->mode, quadmode));
temp1 = simplify_gen_subreg (V2DImode, temp1, d->mode, 0); temp1 = simplify_gen_subreg (V2DImode, temp1, d->mode, 0);
emit_insn (gen_aarch64_get_lanev2di (temp2, temp1 , const1_rtx)); if (BYTES_BIG_ENDIAN)
emit_insn (gen_aarch64_get_lanev2di (temp2, temp1, const0_rtx));
else
emit_insn (gen_aarch64_get_lanev2di (temp2, temp1, const1_rtx));
op2 = simplify_gen_subreg (d->mode, temp2, GET_MODE (temp2), 0); op2 = simplify_gen_subreg (d->mode, temp2, GET_MODE (temp2), 0);
/* And recalculate the index. */ /* And recalculate the index. */
lane -= nunits / 4; lane -= nunits / 4;
} }
/* Keep to GCC-vector-extension lane indices in the RTL, only nunits / 4
(max nunits in range check) are valid. Which means only 0-1, so we
only need to know the order in a V2mode. */
lane_idx = aarch64_endian_lane_rtx (V2DImode, lane);
if (!target) if (!target)
target = gen_reg_rtx (d->mode); target = gen_reg_rtx (d->mode);
else else
...@@ -1477,8 +1486,7 @@ aarch64_expand_fcmla_builtin (tree exp, rtx target, int fcode) ...@@ -1477,8 +1486,7 @@ aarch64_expand_fcmla_builtin (tree exp, rtx target, int fcode)
rtx pat = NULL_RTX; rtx pat = NULL_RTX;
if (d->lane) if (d->lane)
pat = GEN_FCN (d->icode) (target, op0, op1, op2, pat = GEN_FCN (d->icode) (target, op0, op1, op2, lane_idx);
gen_int_mode (lane, SImode));
else else
pat = GEN_FCN (d->icode) (target, op0, op1, op2); pat = GEN_FCN (d->icode) (target, op0, op1, op2);
......
...@@ -455,7 +455,10 @@ ...@@ -455,7 +455,10 @@
(match_operand:SI 4 "const_int_operand" "n")] (match_operand:SI 4 "const_int_operand" "n")]
FCMLA)))] FCMLA)))]
"TARGET_COMPLEX" "TARGET_COMPLEX"
"fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>" {
operands[4] = aarch64_endian_lane_rtx (<VHALF>mode, INTVAL (operands[4]));
return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
}
[(set_attr "type" "neon_fcmla")] [(set_attr "type" "neon_fcmla")]
) )
...@@ -467,7 +470,10 @@ ...@@ -467,7 +470,10 @@
(match_operand:SI 4 "const_int_operand" "n")] (match_operand:SI 4 "const_int_operand" "n")]
FCMLA)))] FCMLA)))]
"TARGET_COMPLEX" "TARGET_COMPLEX"
"fcmla\t%0.4h, %2.4h, %3.h[%4], #<rot>" {
operands[4] = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
return "fcmla\t%0.4h, %2.4h, %3.h[%4], #<rot>";
}
[(set_attr "type" "neon_fcmla")] [(set_attr "type" "neon_fcmla")]
) )
...@@ -479,7 +485,12 @@ ...@@ -479,7 +485,12 @@
(match_operand:SI 4 "const_int_operand" "n")] (match_operand:SI 4 "const_int_operand" "n")]
FCMLA)))] FCMLA)))]
"TARGET_COMPLEX" "TARGET_COMPLEX"
"fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>" {
int nunits = GET_MODE_NUNITS (<VHALF>mode).to_constant ();
operands[4]
= gen_int_mode (ENDIAN_LANE_N (nunits / 2, INTVAL (operands[4])), SImode);
return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
}
[(set_attr "type" "neon_fcmla")] [(set_attr "type" "neon_fcmla")]
) )
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment