Commit d8c84975 by Jakub Jelinek Committed by Jakub Jelinek

optabs.c (expand_vec_perm): Avoid vector mode punning SUBREGs in SET_DEST.

	* optabs.c (expand_vec_perm): Avoid vector mode punning
	SUBREGs in SET_DEST.
	* expmed.c (store_bit_field_1): Likewise.
	* config/i386/sse.md (movdi_to_sse, vec_pack_sfix_trunc_v2df,
	vec_pack_sfix_v2df, vec_shl_<mode>, vec_shr_<mode>,
	vec_interleave_high<mode>, vec_interleave_low<mode>): Likewise.
	* config/i386/i386.c (ix86_expand_vector_move_misalign,
	ix86_expand_sse_movcc, ix86_expand_int_vcond, ix86_expand_vec_perm,
	ix86_expand_sse_unpack, ix86_expand_args_builtin,
	ix86_expand_vector_init_duplicate, ix86_expand_vector_set,
	emit_reduc_half, expand_vec_perm_blend, expand_vec_perm_pshufb,
	expand_vec_perm_interleave2, expand_vec_perm_pshufb2,
	expand_vec_perm_vpshufb2_vpermq,
	expand_vec_perm_vpshufb2_vpermq_even_odd, expand_vec_perm_even_odd_1,
	expand_vec_perm_broadcast_1, expand_vec_perm_vpshufb4_vpermq2,
	ix86_expand_sse2_mulv4si3, ix86_expand_pinsr): Likewise.
	(expand_vec_perm_palignr): Likewise.  Modify a copy of *d rather
	than *d itself.

From-SVN: r204274
parent 5a9785fb
2013-10-31 Jakub Jelinek <jakub@redhat.com>
* optabs.c (expand_vec_perm): Avoid vector mode punning
SUBREGs in SET_DEST.
* expmed.c (store_bit_field_1): Likewise.
* config/i386/sse.md (movdi_to_sse, vec_pack_sfix_trunc_v2df,
vec_pack_sfix_v2df, vec_shl_<mode>, vec_shr_<mode>,
vec_interleave_high<mode>, vec_interleave_low<mode>): Likewise.
* config/i386/i386.c (ix86_expand_vector_move_misalign,
ix86_expand_sse_movcc, ix86_expand_int_vcond, ix86_expand_vec_perm,
ix86_expand_sse_unpack, ix86_expand_args_builtin,
ix86_expand_vector_init_duplicate, ix86_expand_vector_set,
emit_reduc_half, expand_vec_perm_blend, expand_vec_perm_pshufb,
expand_vec_perm_interleave2, expand_vec_perm_pshufb2,
expand_vec_perm_vpshufb2_vpermq,
expand_vec_perm_vpshufb2_vpermq_even_odd, expand_vec_perm_even_odd_1,
expand_vec_perm_broadcast_1, expand_vec_perm_vpshufb4_vpermq2,
ix86_expand_sse2_mulv4si3, ix86_expand_pinsr): Likewise.
(expand_vec_perm_palignr): Likewise. Modify a copy of *d rather
than *d itself.
2013-10-31 Uros Bizjak <ubizjak@gmail.com> 2013-10-31 Uros Bizjak <ubizjak@gmail.com>
* config/i386/i386.c (ix86_expand_sse2_abs): Rename function arguments. * config/i386/i386.c (ix86_expand_sse2_abs): Rename function arguments.
...@@ -800,10 +800,13 @@ ...@@ -800,10 +800,13 @@
gen_rtx_SUBREG (SImode, operands[1], 4))); gen_rtx_SUBREG (SImode, operands[1], 4)));
emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0], emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
operands[2])); operands[2]));
} }
else if (memory_operand (operands[1], DImode)) else if (memory_operand (operands[1], DImode))
emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]), {
operands[1], const0_rtx)); rtx tmp = gen_reg_rtx (V2DImode);
emit_insn (gen_vec_concatv2di (tmp, operands[1], const0_rtx));
emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp));
}
else else
gcc_unreachable (); gcc_unreachable ();
}) })
...@@ -4208,7 +4211,7 @@ ...@@ -4208,7 +4211,7 @@
(match_operand:V2DF 2 "nonimmediate_operand")] (match_operand:V2DF 2 "nonimmediate_operand")]
"TARGET_SSE2" "TARGET_SSE2"
{ {
rtx tmp0, tmp1; rtx tmp0, tmp1, tmp2;
if (TARGET_AVX && !TARGET_PREFER_AVX128) if (TARGET_AVX && !TARGET_PREFER_AVX128)
{ {
...@@ -4222,13 +4225,14 @@ ...@@ -4222,13 +4225,14 @@
{ {
tmp0 = gen_reg_rtx (V4SImode); tmp0 = gen_reg_rtx (V4SImode);
tmp1 = gen_reg_rtx (V4SImode); tmp1 = gen_reg_rtx (V4SImode);
tmp2 = gen_reg_rtx (V2DImode);
emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1])); emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1]));
emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2])); emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2]));
emit_insn emit_insn (gen_vec_interleave_lowv2di (tmp2,
(gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]), gen_lowpart (V2DImode, tmp0),
gen_lowpart (V2DImode, tmp0), gen_lowpart (V2DImode, tmp1)));
gen_lowpart (V2DImode, tmp1))); emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
} }
DONE; DONE;
}) })
...@@ -4289,7 +4293,7 @@ ...@@ -4289,7 +4293,7 @@
(match_operand:V2DF 2 "nonimmediate_operand")] (match_operand:V2DF 2 "nonimmediate_operand")]
"TARGET_SSE2" "TARGET_SSE2"
{ {
rtx tmp0, tmp1; rtx tmp0, tmp1, tmp2;
if (TARGET_AVX && !TARGET_PREFER_AVX128) if (TARGET_AVX && !TARGET_PREFER_AVX128)
{ {
...@@ -4303,13 +4307,14 @@ ...@@ -4303,13 +4307,14 @@
{ {
tmp0 = gen_reg_rtx (V4SImode); tmp0 = gen_reg_rtx (V4SImode);
tmp1 = gen_reg_rtx (V4SImode); tmp1 = gen_reg_rtx (V4SImode);
tmp2 = gen_reg_rtx (V2DImode);
emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1])); emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1]));
emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2])); emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2]));
emit_insn emit_insn (gen_vec_interleave_lowv2di (tmp2,
(gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]), gen_lowpart (V2DImode, tmp0),
gen_lowpart (V2DImode, tmp0), gen_lowpart (V2DImode, tmp1)));
gen_lowpart (V2DImode, tmp1))); emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
} }
DONE; DONE;
}) })
...@@ -7328,14 +7333,16 @@ ...@@ -7328,14 +7333,16 @@
(set_attr "mode" "<sseinsnmode>")]) (set_attr "mode" "<sseinsnmode>")])
(define_expand "vec_shl_<mode>" (define_expand "vec_shl_<mode>"
[(set (match_operand:VI_128 0 "register_operand") [(set (match_dup 3)
(ashift:V1TI (ashift:V1TI
(match_operand:VI_128 1 "register_operand") (match_operand:VI_128 1 "register_operand")
(match_operand:SI 2 "const_0_to_255_mul_8_operand")))] (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
(set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
"TARGET_SSE2" "TARGET_SSE2"
{ {
operands[0] = gen_lowpart (V1TImode, operands[0]);
operands[1] = gen_lowpart (V1TImode, operands[1]); operands[1] = gen_lowpart (V1TImode, operands[1]);
operands[3] = gen_reg_rtx (V1TImode);
operands[4] = gen_lowpart (<MODE>mode, operands[3]);
}) })
(define_insn "<sse2_avx2>_ashl<mode>3" (define_insn "<sse2_avx2>_ashl<mode>3"
...@@ -7365,14 +7372,16 @@ ...@@ -7365,14 +7372,16 @@
(set_attr "mode" "<sseinsnmode>")]) (set_attr "mode" "<sseinsnmode>")])
(define_expand "vec_shr_<mode>" (define_expand "vec_shr_<mode>"
[(set (match_operand:VI_128 0 "register_operand") [(set (match_dup 3)
(lshiftrt:V1TI (lshiftrt:V1TI
(match_operand:VI_128 1 "register_operand") (match_operand:VI_128 1 "register_operand")
(match_operand:SI 2 "const_0_to_255_mul_8_operand")))] (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
(set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
"TARGET_SSE2" "TARGET_SSE2"
{ {
operands[0] = gen_lowpart (V1TImode, operands[0]);
operands[1] = gen_lowpart (V1TImode, operands[1]); operands[1] = gen_lowpart (V1TImode, operands[1]);
operands[3] = gen_reg_rtx (V1TImode);
operands[4] = gen_lowpart (<MODE>mode, operands[3]);
}) })
(define_insn "<sse2_avx2>_lshr<mode>3" (define_insn "<sse2_avx2>_lshr<mode>3"
...@@ -8542,12 +8551,13 @@ ...@@ -8542,12 +8551,13 @@
{ {
rtx t1 = gen_reg_rtx (<MODE>mode); rtx t1 = gen_reg_rtx (<MODE>mode);
rtx t2 = gen_reg_rtx (<MODE>mode); rtx t2 = gen_reg_rtx (<MODE>mode);
rtx t3 = gen_reg_rtx (V4DImode);
emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2])); emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2])); emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
emit_insn (gen_avx2_permv2ti emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
(gen_lowpart (V4DImode, operands[0]), gen_lowpart (V4DImode, t2),
gen_lowpart (V4DImode, t1), GEN_INT (1 + (3 << 4))));
gen_lowpart (V4DImode, t2), GEN_INT (1 + (3 << 4)))); emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
DONE; DONE;
}) })
...@@ -8559,12 +8569,13 @@ ...@@ -8559,12 +8569,13 @@
{ {
rtx t1 = gen_reg_rtx (<MODE>mode); rtx t1 = gen_reg_rtx (<MODE>mode);
rtx t2 = gen_reg_rtx (<MODE>mode); rtx t2 = gen_reg_rtx (<MODE>mode);
rtx t3 = gen_reg_rtx (V4DImode);
emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2])); emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2])); emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
emit_insn (gen_avx2_permv2ti emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
(gen_lowpart (V4DImode, operands[0]), gen_lowpart (V4DImode, t2),
gen_lowpart (V4DImode, t1), GEN_INT (0 + (2 << 4))));
gen_lowpart (V4DImode, t2), GEN_INT (0 + (2 << 4)))); emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
DONE; DONE;
}) })
......
...@@ -624,13 +624,28 @@ store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, ...@@ -624,13 +624,28 @@ store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
|| (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0))) || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
{ {
/* Use the subreg machinery either to narrow OP0 to the required /* Use the subreg machinery either to narrow OP0 to the required
words or to cope with mode punning between equal-sized modes. */ words or to cope with mode punning between equal-sized modes.
rtx sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0), In the latter case, use subreg on the rhs side, not lhs. */
bitnum / BITS_PER_UNIT); rtx sub;
if (sub)
if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
{ {
emit_move_insn (sub, value); sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
return true; if (sub)
{
emit_move_insn (op0, sub);
return true;
}
}
else
{
sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
bitnum / BITS_PER_UNIT);
if (sub)
{
emit_move_insn (sub, value);
return true;
}
} }
} }
......
...@@ -6624,8 +6624,8 @@ expand_vec_perm (enum machine_mode mode, rtx v0, rtx v1, rtx sel, rtx target) ...@@ -6624,8 +6624,8 @@ expand_vec_perm (enum machine_mode mode, rtx v0, rtx v1, rtx sel, rtx target)
icode = direct_optab_handler (vec_perm_const_optab, qimode); icode = direct_optab_handler (vec_perm_const_optab, qimode);
if (icode != CODE_FOR_nothing) if (icode != CODE_FOR_nothing)
{ {
tmp = expand_vec_perm_1 (icode, gen_lowpart (qimode, target), tmp = mode != qimode ? gen_reg_rtx (qimode) : target;
gen_lowpart (qimode, v0), tmp = expand_vec_perm_1 (icode, tmp, gen_lowpart (qimode, v0),
gen_lowpart (qimode, v1), sel_qi); gen_lowpart (qimode, v1), sel_qi);
if (tmp) if (tmp)
return gen_lowpart (mode, tmp); return gen_lowpart (mode, tmp);
...@@ -6674,7 +6674,7 @@ expand_vec_perm (enum machine_mode mode, rtx v0, rtx v1, rtx sel, rtx target) ...@@ -6674,7 +6674,7 @@ expand_vec_perm (enum machine_mode mode, rtx v0, rtx v1, rtx sel, rtx target)
} }
tmp = gen_rtx_CONST_VECTOR (qimode, vec); tmp = gen_rtx_CONST_VECTOR (qimode, vec);
sel = gen_lowpart (qimode, sel); sel = gen_lowpart (qimode, sel);
sel = expand_vec_perm (qimode, sel, sel, tmp, NULL); sel = expand_vec_perm (qimode, gen_reg_rtx (qimode), sel, tmp, NULL);
gcc_assert (sel != NULL); gcc_assert (sel != NULL);
/* Add the byte offset to each byte element. */ /* Add the byte offset to each byte element. */
...@@ -6689,8 +6689,8 @@ expand_vec_perm (enum machine_mode mode, rtx v0, rtx v1, rtx sel, rtx target) ...@@ -6689,8 +6689,8 @@ expand_vec_perm (enum machine_mode mode, rtx v0, rtx v1, rtx sel, rtx target)
gcc_assert (sel_qi != NULL); gcc_assert (sel_qi != NULL);
} }
tmp = expand_vec_perm_1 (icode, gen_lowpart (qimode, target), tmp = mode != qimode ? gen_reg_rtx (qimode) : target;
gen_lowpart (qimode, v0), tmp = expand_vec_perm_1 (icode, tmp, gen_lowpart (qimode, v0),
gen_lowpart (qimode, v1), sel_qi); gen_lowpart (qimode, v1), sel_qi);
if (tmp) if (tmp)
tmp = gen_lowpart (mode, tmp); tmp = gen_lowpart (mode, tmp);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment