Commit 63e46cce by H.J. Lu Committed by H.J. Lu

i386.c (ix86_expand_vector_init_concat): New.

2008-05-19  H.J. Lu  <hongjiu.lu@intel.com>

	* config/i386/i386.c (ix86_expand_vector_init_concat): New.
	(ix86_expand_vector_init_interleave): Likewise.
	(ix86_expand_vector_init_general): Use them.  Assert word_mode
	== SImode when n_words == 4.

From-SVN: r135578
parent 16a55411
2008-05-19 H.J. Lu <hongjiu.lu@intel.com>
* config/i386/i386.c (ix86_expand_vector_init_concat): New.
(ix86_expand_vector_init_interleave): Likewise.
(ix86_expand_vector_init_general): Use them. Assert word_mode
== SImode when n_words == 4.
2008-05-19 Uros Bizjak <ubizjak@gmail.com> 2008-05-19 Uros Bizjak <ubizjak@gmail.com>
* config/i386/i386.c (ix86_secondary_reload): New static function. * config/i386/i386.c (ix86_secondary_reload): New static function.
......
...@@ -23869,75 +23869,147 @@ ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode, ...@@ -23869,75 +23869,147 @@ ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
return true; return true;
} }
/* A subroutine of ix86_expand_vector_init. Handle the most general case: /* A subroutine of ix86_expand_vector_init_general. Use vector
all values variable, and none identical. */ concatenate to handle the most general case: all values variable,
and none identical. */
static void static void
ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode, ix86_expand_vector_init_concat (enum machine_mode mode,
rtx target, rtx vals) rtx target, rtx *ops, int n)
{ {
enum machine_mode half_mode = GET_MODE_INNER (mode); enum machine_mode cmode, hmode = VOIDmode;
rtx op0 = NULL, op1 = NULL; rtx first[8], second[4];
bool use_vec_concat = false; rtvec v;
int i, j;
switch (n)
{
case 2:
switch (mode) switch (mode)
{ {
case V2SFmode: case V4SImode:
cmode = V2SImode;
break;
case V4SFmode:
cmode = V2SFmode;
break;
case V2DImode:
cmode = DImode;
break;
case V2SImode: case V2SImode:
if (!mmx_ok && !TARGET_SSE) cmode = SImode;
break; break;
/* FALLTHRU */
case V2DFmode: case V2DFmode:
case V2DImode: cmode = DFmode;
/* For the two element vectors, we always implement VEC_CONCAT. */
op0 = XVECEXP (vals, 0, 0);
op1 = XVECEXP (vals, 0, 1);
use_vec_concat = true;
break; break;
case V2SFmode:
cmode = SFmode;
break;
default:
gcc_unreachable ();
}
case V4SFmode: if (!register_operand (ops[1], cmode))
half_mode = V2SFmode; ops[1] = force_reg (cmode, ops[1]);
goto half; if (!register_operand (ops[0], cmode))
ops[0] = force_reg (cmode, ops[0]);
emit_insn (gen_rtx_SET (VOIDmode, target,
gen_rtx_VEC_CONCAT (mode, ops[0],
ops[1])));
break;
case 4:
switch (mode)
{
case V4SImode: case V4SImode:
half_mode = V2SImode; cmode = V2SImode;
break;
case V4SFmode:
cmode = V2SFmode;
break;
default:
gcc_unreachable ();
}
goto half; goto half;
half:
half:
/* FIXME: We process inputs backward to help RA. PR 36222. */
i = n - 1;
j = (n >> 1) - 1;
for (; i > 0; i -= 2, j--)
{ {
rtvec v; first[j] = gen_reg_rtx (cmode);
v = gen_rtvec (2, ops[i - 1], ops[i]);
ix86_expand_vector_init (false, first[j],
gen_rtx_PARALLEL (cmode, v));
}
/* For V4SF and V4SI, we implement a concat of two V2 vectors. n >>= 1;
Recurse to load the two halves. */ if (n > 2)
{
gcc_assert (hmode != VOIDmode);
for (i = j = 0; i < n; i += 2, j++)
{
second[j] = gen_reg_rtx (hmode);
ix86_expand_vector_init_concat (hmode, second [j],
&first [i], 2);
}
n >>= 1;
ix86_expand_vector_init_concat (mode, target, second, n);
}
else
ix86_expand_vector_init_concat (mode, target, first, n);
break;
op1 = gen_reg_rtx (half_mode); default:
v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3)); gcc_unreachable ();
ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v)); }
}
op0 = gen_reg_rtx (half_mode); /* A subroutine of ix86_expand_vector_init_general. Use vector
v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1)); interleave to handle the most general case: all values variable,
ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v)); and none identical. */
use_vec_concat = true; static void
} ix86_expand_vector_init_interleave (enum machine_mode mode,
break; rtx target, rtx *ops, int n)
{
enum machine_mode first_imode, second_imode, third_imode;
int i, j;
rtx op0, op1;
rtx (*gen_load_even) (rtx, rtx, rtx);
rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
case V8HImode: switch (mode)
if (TARGET_SSE2)
{ {
rtx ops[4]; case V8HImode:
unsigned int i, j; gen_load_even = gen_vec_setv8hi;
gen_interleave_first_low = gen_vec_interleave_lowv4si;
gen_interleave_second_low = gen_vec_interleave_lowv2di;
first_imode = V4SImode;
second_imode = V2DImode;
third_imode = VOIDmode;
break;
case V16QImode:
gen_load_even = gen_vec_setv16qi;
gen_interleave_first_low = gen_vec_interleave_lowv8hi;
gen_interleave_second_low = gen_vec_interleave_lowv4si;
first_imode = V8HImode;
second_imode = V4SImode;
third_imode = V2DImode;
break;
default:
gcc_unreachable ();
}
for (i = 0; i < ARRAY_SIZE (ops); i++) for (i = 0; i < n; i++)
{ {
/* Extend the odd elment from HImode to SImode using /* Extend the odd elment to SImode using a paradoxical SUBREG. */
a paradoxical SUBREG. */
op0 = gen_reg_rtx (SImode); op0 = gen_reg_rtx (SImode);
emit_move_insn (op0, gen_lowpart (SImode, emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
XVECEXP (vals, 0,
i + i)));
/* Insert the SImode value as low element of V4SImode /* Insert the SImode value as low element of V4SImode vector. */
vector. */
op1 = gen_reg_rtx (V4SImode); op1 = gen_reg_rtx (V4SImode);
op0 = gen_rtx_VEC_MERGE (V4SImode, op0 = gen_rtx_VEC_MERGE (V4SImode,
gen_rtx_VEC_DUPLICATE (V4SImode, gen_rtx_VEC_DUPLICATE (V4SImode,
...@@ -23946,117 +24018,121 @@ ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode, ...@@ -23946,117 +24018,121 @@ ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
const1_rtx); const1_rtx);
emit_insn (gen_rtx_SET (VOIDmode, op1, op0)); emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
/* Cast the V4SImode vector back to a V8HImode vector. */ /* Cast the V4SImode vector back to a vector in orignal mode. */
op0 = gen_reg_rtx (mode); op0 = gen_reg_rtx (mode);
emit_move_insn (op0, gen_lowpart (mode, op1)); emit_move_insn (op0, gen_lowpart (mode, op1));
/* Load even HI elements into the second positon. */ /* Load even elements into the second positon. */
emit_insn (gen_vec_setv8hi (op0, XVECEXP (vals, 0, emit_insn ((*gen_load_even) (op0, ops [i + i + 1],
i + i + 1),
const1_rtx)); const1_rtx));
/* Cast V8HImode vector to V4SImode vector. */ /* Cast vector to FIRST_IMODE vector. */
ops[i] = gen_reg_rtx (V4SImode); ops[i] = gen_reg_rtx (first_imode);
emit_move_insn (ops[i], gen_lowpart (V4SImode, op0)); emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
}
/* Interleave low FIRST_IMODE vectors. */
for (i = j = 0; i < n; i += 2, j++)
{
op0 = gen_reg_rtx (first_imode);
emit_insn ((*gen_interleave_first_low) (op0, ops[i], ops[i + 1]));
/* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
ops[j] = gen_reg_rtx (second_imode);
emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
} }
/* Interleave low V4SIs. */ /* Interleave low SECOND_IMODE vectors. */
for (i = j = 0; i < ARRAY_SIZE (ops); i += 2, j++) switch (second_imode)
{ {
op0 = gen_reg_rtx (V4SImode); case V4SImode:
emit_insn (gen_vec_interleave_lowv4si (op0, ops[i], for (i = j = 0; i < n / 2; i += 2, j++)
{
op0 = gen_reg_rtx (second_imode);
emit_insn ((*gen_interleave_second_low) (op0, ops[i],
ops[i + 1])); ops[i + 1]));
/* Cast V4SImode vectors to V2DImode vectors. */ /* Cast the SECOND_IMODE vector to the THIRD_IMODE
op1 = gen_reg_rtx (V2DImode); vector. */
emit_move_insn (op1, gen_lowpart (V2DImode, op0)); ops[j] = gen_reg_rtx (third_imode);
ops[j] = op1; emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
} }
second_imode = V2DImode;
gen_interleave_second_low = gen_vec_interleave_lowv2di;
/* FALLTHRU */
/* Interleave low V2DIs. */ case V2DImode:
op0 = gen_reg_rtx (V2DImode); op0 = gen_reg_rtx (second_imode);
emit_insn (gen_vec_interleave_lowv2di (op0, ops[0], ops[1])); emit_insn ((*gen_interleave_second_low) (op0, ops[0],
ops[1]));
/* Cast the V2DImode vector back to a V8HImode vector. */ /* Cast the SECOND_IMODE vector back to a vector on original
mode. */
emit_insn (gen_rtx_SET (VOIDmode, target, emit_insn (gen_rtx_SET (VOIDmode, target,
gen_lowpart (mode, op0))); gen_lowpart (mode, op0)));
return; break;
default:
gcc_unreachable ();
} }
}
case V16QImode: /* A subroutine of ix86_expand_vector_init. Handle the most general case:
if (TARGET_SSE4_1) all values variable, and none identical. */
{
rtx ops[8];
unsigned int i, j;
for (i = 0; i < ARRAY_SIZE (ops); i++) static void
{ ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
/* Extend the odd elment from QImode to SImode using rtx target, rtx vals)
a paradoxical SUBREG. */ {
op0 = gen_reg_rtx (SImode); rtx ops[32], op0, op1;
emit_move_insn (op0, gen_lowpart (SImode, enum machine_mode half_mode = VOIDmode;
XVECEXP (vals, 0, int n, i;
i + i)));
/* Insert the SImode value as low element of V4SImode switch (mode)
vector. */ {
op1 = gen_reg_rtx (V4SImode); case V2SFmode:
op0 = gen_rtx_VEC_MERGE (V4SImode, case V2SImode:
gen_rtx_VEC_DUPLICATE (V4SImode, if (!mmx_ok && !TARGET_SSE)
op0), break;
CONST0_RTX (V4SImode),
const1_rtx);
emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
/* Cast the V4SImode vector back to a V16QImode vector. */ n = 2;
op0 = gen_reg_rtx (mode); goto vec_concat;
emit_move_insn (op0, gen_lowpart (mode, op1));
/* Load even QI elements into the second positon. */ case V4SFmode:
emit_insn (gen_vec_setv16qi (op0, XVECEXP (vals, 0, case V4SImode:
i + i + 1), n = 4;
const1_rtx)); goto vec_concat;
/* Cast V16QImode vector to V8HImode vector. */ case V2DFmode:
ops[i] = gen_reg_rtx (V8HImode); case V2DImode:
emit_move_insn (ops[i], gen_lowpart (V8HImode, op0)); n = 2;
} goto vec_concat;
/* Interleave low V8HIs. */ vec_concat:
for (i = j = 0; i < ARRAY_SIZE (ops); i += 2, j++) for (i = 0; i < n; i++)
{ ops[i] = XVECEXP (vals, 0, i);
op0 = gen_reg_rtx (V8HImode); ix86_expand_vector_init_concat (mode, target, ops, n);
emit_insn (gen_vec_interleave_lowv8hi (op0, ops[i], return;
ops[i + 1]));
/* Cast V8HImode vector to V4SImode vector. */ case V16QImode:
op1 = gen_reg_rtx (V4SImode); if (!TARGET_SSE4_1)
emit_move_insn (op1, gen_lowpart (V4SImode, op0)); break;
ops[j] = op1;
}
/* Interleave low V4SIs. */ n = 16;
for (i = j = 0; i < ARRAY_SIZE (ops) / 2; i += 2, j++) goto vec_interleave;
{
op0 = gen_reg_rtx (V4SImode);
emit_insn (gen_vec_interleave_lowv4si (op0, ops[i],
ops[i + 1]));
/* Cast V4SImode vectors to V2DImode vectors. */ case V8HImode:
op1 = gen_reg_rtx (V2DImode); if (!TARGET_SSE2)
emit_move_insn (op1, gen_lowpart (V2DImode, op0)); break;
ops[j] = op1;
}
/* Interleave low V2DIs. */ n = 8;
op0 = gen_reg_rtx (V2DImode); goto vec_interleave;
emit_insn (gen_vec_interleave_lowv2di (op0, ops[0], ops[1]));
/* Cast the V2DImode vector back to a V8HImode vector. */ vec_interleave:
emit_insn (gen_rtx_SET (VOIDmode, target, for (i = 0; i < n; i++)
gen_lowpart (mode, op0))); ops[i] = XVECEXP (vals, 0, i);
ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
return; return;
}
case V4HImode: case V4HImode:
case V8QImode: case V8QImode:
...@@ -24066,17 +24142,6 @@ ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode, ...@@ -24066,17 +24142,6 @@ ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
gcc_unreachable (); gcc_unreachable ();
} }
if (use_vec_concat)
{
if (!register_operand (op1, half_mode))
op1 = force_reg (half_mode, op1);
if (!register_operand (op0, half_mode))
op0 = force_reg (half_mode, op0);
emit_insn (gen_rtx_SET (VOIDmode, target,
gen_rtx_VEC_CONCAT (mode, op0, op1)));
}
else
{ {
int i, j, n_elts, n_words, n_elt_per_word; int i, j, n_elts, n_words, n_elt_per_word;
enum machine_mode inner_mode; enum machine_mode inner_mode;
...@@ -24124,6 +24189,7 @@ ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode, ...@@ -24124,6 +24189,7 @@ ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
else if (n_words == 4) else if (n_words == 4)
{ {
rtx tmp = gen_reg_rtx (V4SImode); rtx tmp = gen_reg_rtx (V4SImode);
gcc_assert (word_mode == SImode);
vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words)); vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
ix86_expand_vector_init_general (false, V4SImode, tmp, vals); ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
emit_move_insn (target, gen_lowpart (mode, tmp)); emit_move_insn (target, gen_lowpart (mode, tmp));
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment