Commit 7e7cfcf6 by Ulrich Weigand Committed by Ira Rosen

re PR target/48252 (ARM neon: problem with consecutive vzip, vuzp and vtrn)


	PR target/48252
	* config/arm/arm.c (neon_emit_pair_result_insn): Swap arguments
	to match neon_vzip/vuzp/vtrn_internal.
	* config/arm/neon.md (neon_vtrn<mode>_internal): Make both
	outputs explicitly dependent on both inputs.
	(neon_vzip<mode>_internal, neon_vuzp<mode>_internal): Likewise.


Co-Authored-By: Ira Rosen <ira.rosen@linaro.org>

From-SVN: r172639
parent 49eab32e
2011-04-18 Ulrich Weigand <ulrich.weigand@linaro.org>
Ira Rosen <ira.rosen@linaro.org>
PR target/48252
* config/arm/arm.c (neon_emit_pair_result_insn): Swap arguments
to match neon_vzip/vuzp/vtrn_internal.
* config/arm/neon.md (neon_vtrn<mode>_internal): Make both
outputs explicitly dependent on both inputs.
(neon_vzip<mode>_internal, neon_vuzp<mode>_internal): Likewise.
2011-04-18 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/48616
......
......@@ -19632,7 +19632,7 @@ neon_emit_pair_result_insn (enum machine_mode mode,
rtx tmp1 = gen_reg_rtx (mode);
rtx tmp2 = gen_reg_rtx (mode);
emit_insn (intfn (tmp1, op1, tmp2, op2));
emit_insn (intfn (tmp1, op1, op2, tmp2));
emit_move_insn (mem, tmp1);
mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
......
......@@ -4092,13 +4092,14 @@
(define_insn "neon_vtrn<mode>_internal"
[(set (match_operand:VDQW 0 "s_register_operand" "=w")
(unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")]
UNSPEC_VTRN1))
(set (match_operand:VDQW 2 "s_register_operand" "=w")
(unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")]
UNSPEC_VTRN2))]
"TARGET_NEON"
"vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
(unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
(match_operand:VDQW 2 "s_register_operand" "w")]
UNSPEC_VTRN1))
(set (match_operand:VDQW 3 "s_register_operand" "=2")
(unspec:VDQW [(match_dup 1) (match_dup 2)]
UNSPEC_VTRN2))]
"TARGET_NEON"
"vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>3"
[(set (attr "neon_type")
(if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
(const_string "neon_bp_simple")
......@@ -4118,13 +4119,14 @@
(define_insn "neon_vzip<mode>_internal"
[(set (match_operand:VDQW 0 "s_register_operand" "=w")
(unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")]
UNSPEC_VZIP1))
(set (match_operand:VDQW 2 "s_register_operand" "=w")
(unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")]
UNSPEC_VZIP2))]
"TARGET_NEON"
"vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
(unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
(match_operand:VDQW 2 "s_register_operand" "w")]
UNSPEC_VZIP1))
(set (match_operand:VDQW 3 "s_register_operand" "=2")
(unspec:VDQW [(match_dup 1) (match_dup 2)]
UNSPEC_VZIP2))]
"TARGET_NEON"
"vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>3"
[(set (attr "neon_type")
(if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
(const_string "neon_bp_simple")
......@@ -4144,13 +4146,14 @@
(define_insn "neon_vuzp<mode>_internal"
[(set (match_operand:VDQW 0 "s_register_operand" "=w")
(unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")]
(unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
(match_operand:VDQW 2 "s_register_operand" "w")]
UNSPEC_VUZP1))
(set (match_operand:VDQW 2 "s_register_operand" "=w")
(unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")]
UNSPEC_VUZP2))]
(set (match_operand:VDQW 3 "s_register_operand" "=2")
(unspec:VDQW [(match_dup 1) (match_dup 2)]
UNSPEC_VUZP2))]
"TARGET_NEON"
"vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
"vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>3"
[(set (attr "neon_type")
(if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
(const_string "neon_bp_simple")
......
2011-04-18 Ulrich Weigand <ulrich.weigand@linaro.org>
Ira Rosen <ira.rosen@linaro.org>
PR target/48252
* gcc.target/arm/pr48252.c: New test.
2011-04-18 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/48616
......
/* { dg-do run } */
/* { dg-require-effective-target arm_neon_hw } */
/* { dg-options "-O2" } */
/* { dg-add-options arm_neon } */
#include "arm_neon.h"
#include <stdlib.h>
int main(void)
{
uint8x8_t v1 = {1, 1, 1, 1, 1, 1, 1, 1};
uint8x8_t v2 = {2, 2, 2, 2, 2, 2, 2, 2};
uint8x8x2_t vd1, vd2;
union {uint8x8_t v; uint8_t buf[8];} d1, d2, d3, d4;
int i;
vd1 = vzip_u8(v1, vdup_n_u8(0));
vd2 = vzip_u8(v2, vdup_n_u8(0));
vst1_u8(d1.buf, vd1.val[0]);
vst1_u8(d2.buf, vd1.val[1]);
vst1_u8(d3.buf, vd2.val[0]);
vst1_u8(d4.buf, vd2.val[1]);
for (i = 0; i < 8; i++)
if ((i % 2 == 0 && d4.buf[i] != 2)
|| (i % 2 == 1 && d4.buf[i] != 0))
abort ();
return 0;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment