Commit 16122c22 by Michael Meissner Committed by Michael Meissner

re PR target/79799 (Improve vec_insert of float on Power9)

[gcc]
2017-06-20  Michael Meissner  <meissner@linux.vnet.ibm.com>

	PR target/79799
	* config/rs6000/rs6000.c (rs6000_expand_vector_init): Add support
	for doing vector set of SFmode on ISA 3.0.
	* config/rs6000/vsx.md (vsx_set_v4sf_p9): Likewise.
	(vsx_set_v4sf_p9_zero): Special case setting 0.0f to a V4SF
	element.
	(vsx_insert_extract_v4sf_p9): Add an optimization for inserting a
	SFmode value into a V4SF variable that was extracted from another
	V4SF variable without converting the element to double precision
	and back to single precision vector format.
	(vsx_insert_extract_v4sf_p9_2): Likewise.

[gcc/testsuite]
2017-06-20  Michael Meissner  <meissner@linux.vnet.ibm.com>

	PR target/79799
	* gcc.target/powerpc/pr79799-1.c: New test.
	* gcc.target/powerpc/pr79799-2.c: Likewise.
	* gcc.target/powerpc/pr79799-3.c: Likewise.
	* gcc.target/powerpc/pr79799-4.c: Likewise.
	* gcc.target/powerpc/pr79799-5.c: Likewise.

From-SVN: r249395
parent 62be3709
2017-06-20 Michael Meissner <meissner@linux.vnet.ibm.com>
PR target/79799
* config/rs6000/rs6000.c (rs6000_expand_vector_init): Add support
for doing vector set of SFmode on ISA 3.0.
* config/rs6000/vsx.md (vsx_set_v4sf_p9): Likewise.
(vsx_set_v4sf_p9_zero): Special case setting 0.0f to a V4SF
element.
(vsx_insert_extract_v4sf_p9): Add an optimization for inserting a
SFmode value into a V4SF variable that was extracted from another
V4SF variable without converting the element to double precision
and back to single precision vector format.
(vsx_insert_extract_v4sf_p9_2): Likewise.
2017-06-19 Jakub Jelinek <jakub@redhat.com>
* tree-ssa-structalias.c (get_constraint_for_ptr_offset): Multiply
......
......@@ -7451,6 +7451,8 @@ rs6000_expand_vector_set (rtx target, rtx val, int elt)
insn = gen_vsx_set_v8hi_p9 (target, target, val, elt_rtx);
else if (mode == V16QImode)
insn = gen_vsx_set_v16qi_p9 (target, target, val, elt_rtx);
else if (mode == V4SFmode)
insn = gen_vsx_set_v4sf_p9 (target, target, val, elt_rtx);
}
if (insn)
......@@ -3173,6 +3173,134 @@
}
[(set_attr "type" "vecperm")])
(define_insn_and_split "vsx_set_v4sf_p9"
[(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
(unspec:V4SF
[(match_operand:V4SF 1 "gpc_reg_operand" "0")
(match_operand:SF 2 "gpc_reg_operand" "ww")
(match_operand:QI 3 "const_0_to_3_operand" "n")]
UNSPEC_VSX_SET))
(clobber (match_scratch:SI 4 "=&wJwK"))]
"VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER
&& TARGET_UPPER_REGS_DI && TARGET_POWERPC64"
"#"
"&& reload_completed"
[(set (match_dup 5)
(unspec:V4SF [(match_dup 2)]
UNSPEC_VSX_CVDPSPN))
(parallel [(set (match_dup 4)
(vec_select:SI (match_dup 6)
(parallel [(match_dup 7)])))
(clobber (scratch:SI))])
(set (match_dup 8)
(unspec:V4SI [(match_dup 8)
(match_dup 4)
(match_dup 3)]
UNSPEC_VSX_SET))]
{
unsigned int tmp_regno = reg_or_subregno (operands[4]);
operands[5] = gen_rtx_REG (V4SFmode, tmp_regno);
operands[6] = gen_rtx_REG (V4SImode, tmp_regno);
operands[7] = GEN_INT (VECTOR_ELT_ORDER_BIG ? 1 : 2);
operands[8] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0]));
}
[(set_attr "type" "vecperm")
(set_attr "length" "12")])
;; Special case setting 0.0f to a V4SF element
(define_insn_and_split "*vsx_set_v4sf_p9_zero"
[(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
(unspec:V4SF
[(match_operand:V4SF 1 "gpc_reg_operand" "0")
(match_operand:SF 2 "zero_fp_constant" "j")
(match_operand:QI 3 "const_0_to_3_operand" "n")]
UNSPEC_VSX_SET))
(clobber (match_scratch:SI 4 "=&wJwK"))]
"VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER
&& TARGET_UPPER_REGS_DI && TARGET_POWERPC64"
"#"
"&& reload_completed"
[(set (match_dup 4)
(const_int 0))
(set (match_dup 5)
(unspec:V4SI [(match_dup 5)
(match_dup 4)
(match_dup 3)]
UNSPEC_VSX_SET))]
{
operands[5] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0]));
}
[(set_attr "type" "vecperm")
(set_attr "length" "8")])
;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is the element
;; that is in the default scalar position (1 for big endian, 2 for little
;; endian). We just need to do an xxinsertw since the element is in the
;; correct location.
(define_insn "*vsx_insert_extract_v4sf_p9"
[(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
(unspec:V4SF
[(match_operand:V4SF 1 "gpc_reg_operand" "0")
(vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa")
(parallel
[(match_operand:QI 3 "const_0_to_3_operand" "n")]))
(match_operand:QI 4 "const_0_to_3_operand" "n")]
UNSPEC_VSX_SET))]
"VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER
&& TARGET_UPPER_REGS_DI && TARGET_POWERPC64
&& (INTVAL (operands[3]) == (VECTOR_ELT_ORDER_BIG ? 1 : 2))"
{
int ele = INTVAL (operands[4]);
if (!VECTOR_ELT_ORDER_BIG)
ele = GET_MODE_NUNITS (V4SFmode) - 1 - ele;
operands[4] = GEN_INT (GET_MODE_SIZE (SFmode) * ele);
return "xxinsertw %x0,%x2,%4";
}
[(set_attr "type" "vecperm")])
;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is not the element
;; that is in the default scalar position (1 for big endian, 2 for little
;; endian). Convert the insert/extract to int and avoid doing the conversion.
(define_insn_and_split "*vsx_insert_extract_v4sf_p9_2"
[(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
(unspec:V4SF
[(match_operand:V4SF 1 "gpc_reg_operand" "0")
(vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa")
(parallel
[(match_operand:QI 3 "const_0_to_3_operand" "n")]))
(match_operand:QI 4 "const_0_to_3_operand" "n")]
UNSPEC_VSX_SET))
(clobber (match_scratch:SI 5 "=&wJwK"))]
"VECTOR_MEM_VSX_P (V4SFmode) && VECTOR_MEM_VSX_P (V4SImode)
&& TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER
&& TARGET_UPPER_REGS_DI && TARGET_POWERPC64
&& (INTVAL (operands[3]) != (VECTOR_ELT_ORDER_BIG ? 1 : 2))"
"#"
"&& 1"
[(parallel [(set (match_dup 5)
(vec_select:SI (match_dup 6)
(parallel [(match_dup 3)])))
(clobber (scratch:SI))])
(set (match_dup 7)
(unspec:V4SI [(match_dup 8)
(match_dup 5)
(match_dup 4)]
UNSPEC_VSX_SET))]
{
if (GET_CODE (operands[5]) == SCRATCH)
operands[5] = gen_reg_rtx (SImode);
operands[6] = gen_lowpart (V4SImode, operands[2]);
operands[7] = gen_lowpart (V4SImode, operands[0]);
operands[8] = gen_lowpart (V4SImode, operands[1]);
}
[(set_attr "type" "vecperm")])
;; Expanders for builtins
(define_expand "vsx_mergel_<mode>"
[(use (match_operand:VSX_D 0 "vsx_register_operand" ""))
......
2017-06-20 Michael Meissner <meissner@linux.vnet.ibm.com>
PR target/79799
* gcc.target/powerpc/pr79799-1.c: New test.
* gcc.target/powerpc/pr79799-2.c: Likewise.
* gcc.target/powerpc/pr79799-3.c: Likewise.
* gcc.target/powerpc/pr79799-4.c: Likewise.
* gcc.target/powerpc/pr79799-5.c: Likewise.
2017-06-19 Nathan Sidwell <nathan@acm.org>
PR c++/81124
......
/* { dg-do compile { target { powerpc64*-*-* && lp64 } } } */
/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
/* { dg-require-effective-target powerpc_p9vector_ok } */
/* { dg-options "-mcpu=power9 -O2" } */
#include <altivec.h>
/* GCC 7.1 did not have a specialized method for inserting 32-bit floating
point on ISA 3.0 (power9) systems. */
vector float
insert_arg_0 (vector float vf, float f)
{
return vec_insert (f, vf, 0);
}
vector float
insert_arg_1 (vector float vf, float f)
{
return vec_insert (f, vf, 1);
}
vector float
insert_arg_2 (vector float vf, float f)
{
return vec_insert (f, vf, 2);
}
vector float
insert_arg_3 (vector float vf, float f)
{
return vec_insert (f, vf, 3);
}
/* { dg-final { scan-assembler {\mxscvdpspn\M} } } */
/* { dg-final { scan-assembler {\mxxinsertw\M} } } */
/* { dg-final { scan-assembler-not {\mlvewx\M} } } */
/* { dg-final { scan-assembler-not {\mlvx\M} } } */
/* { dg-final { scan-assembler-not {\mvperm\M} } } */
/* { dg-final { scan-assembler-not {\mvpermr\M} } } */
/* { dg-final { scan-assembler-not {\mstfs\M} } } */
/* { dg-final { scan-assembler-not {\mstxssp\M} } } */
/* { dg-final { scan-assembler-not {\mstxsspx\M} } } */
/* { dg-do compile { target { powerpc64*-*-* && lp64 } } } */
/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
/* { dg-require-effective-target powerpc_p9vector_ok } */
/* { dg-options "-mcpu=power9 -O2" } */
#include <altivec.h>
/* Optimize x = vec_insert (vec_extract (v2, N), v1, M) for SFmode if N is the default
scalar position. */
#if __ORDER_LITTLE_ENDIAN__
#define ELE 2
#else
#define ELE 1
#endif
vector float
foo (vector float v1, vector float v2)
{
return vec_insert (vec_extract (v2, ELE), v1, 0);
}
/* { dg-final { scan-assembler {\mxxinsertw\M} } } */
/* { dg-final { scan-assembler-not {\mxxextractuw\M} } } */
/* { dg-final { scan-assembler-not {\mlvewx\M} } } */
/* { dg-final { scan-assembler-not {\mlvx\M} } } */
/* { dg-final { scan-assembler-not {\mvperm\M} } } */
/* { dg-final { scan-assembler-not {\mvpermr\M} } } */
/* { dg-final { scan-assembler-not {\mstfs\M} } } */
/* { dg-final { scan-assembler-not {\mstxssp\M} } } */
/* { dg-final { scan-assembler-not {\mstxsspx\M} } } */
/* { dg-do compile { target { powerpc64*-*-* && lp64 } } } */
/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
/* { dg-require-effective-target powerpc_p9vector_ok } */
/* { dg-options "-mcpu=power9 -O2" } */
#include <altivec.h>
/* Optimize x = vec_insert (vec_extract (v2, N), v1, M) for SFmode. */
vector float
foo (vector float v1, vector float v2)
{
return vec_insert (vec_extract (v2, 4), v1, 0);
}
/* { dg-final { scan-assembler {\mxxinsertw\M} } } */
/* { dg-final { scan-assembler {\mxxextractuw\M} } } */
/* { dg-final { scan-assembler-not {\mlvewx\M} } } */
/* { dg-final { scan-assembler-not {\mlvx\M} } } */
/* { dg-final { scan-assembler-not {\mvperm\M} } } */
/* { dg-final { scan-assembler-not {\mvpermr\M} } } */
/* { dg-final { scan-assembler-not {\mstfs\M} } } */
/* { dg-final { scan-assembler-not {\mstxssp\M} } } */
/* { dg-final { scan-assembler-not {\mstxsspx\M} } } */
/* { dg-do run { target { powerpc*-*-linux* } } } */
/* { dg-require-effective-target vsx_hw } */
/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
/* { dg-require-effective-target p9vector_hw } */
/* { dg-options "-mcpu=power9 -O2" } */
#include <altivec.h>
#include <stdlib.h>
__attribute__ ((__noinline__))
vector float
insert_0 (vector float v, float f)
{
return vec_insert (f, v, 0);
}
__attribute__ ((__noinline__))
vector float
insert_1 (vector float v, float f)
{
return vec_insert (f, v, 1);
}
__attribute__ ((__noinline__))
vector float
insert_2 (vector float v, float f)
{
return vec_insert (f, v, 2);
}
__attribute__ ((__noinline__))
vector float
insert_3 (vector float v, float f)
{
return vec_insert (f, v, 3);
}
__attribute__ ((__noinline__))
void
test_insert (void)
{
vector float v1 = { 1.0f, 2.0f, 3.0f, 4.0f };
vector float v2 = { 5.0f, 6.0f, 7.0f, 8.0f };
v1 = insert_0 (v1, 5.0f);
v1 = insert_1 (v1, 6.0f);
v1 = insert_2 (v1, 7.0f);
v1 = insert_3 (v1, 8.0f);
if (vec_any_ne (v1, v2))
abort ();
}
__attribute__ ((__noinline__))
vector float
insert_extract_0_3 (vector float v1, vector float v2)
{
return vec_insert (vec_extract (v2, 3), v1, 0);
}
__attribute__ ((__noinline__))
vector float
insert_extract_1_2 (vector float v1, vector float v2)
{
return vec_insert (vec_extract (v2, 2), v1, 1);
}
__attribute__ ((__noinline__))
vector float
insert_extract_2_1 (vector float v1, vector float v2)
{
return vec_insert (vec_extract (v2, 1), v1, 2);
}
__attribute__ ((__noinline__))
vector float
insert_extract_3_0 (vector float v1, vector float v2)
{
return vec_insert (vec_extract (v2, 0), v1, 3);
}
__attribute__ ((__noinline__))
void
test_insert_extract (void)
{
vector float v1 = { 1.0f, 2.0f, 3.0f, 4.0f };
vector float v2 = { 5.0f, 6.0f, 7.0f, 8.0f };
vector float v3 = { 8.0f, 7.0f, 6.0f, 5.0f };
v1 = insert_extract_0_3 (v1, v2);
v1 = insert_extract_1_2 (v1, v2);
v1 = insert_extract_2_1 (v1, v2);
v1 = insert_extract_3_0 (v1, v2);
if (vec_any_ne (v1, v3))
abort ();
}
int
main (void)
{
test_insert ();
test_insert_extract ();
return 0;
}
/* { dg-do compile { target { powerpc64*-*-* && lp64 } } } */
/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
/* { dg-require-effective-target powerpc_p9vector_ok } */
/* { dg-options "-mcpu=power9 -O2" } */
#include <altivec.h>
/* Insure setting 0.0f to a V4SFmode element does not do a FP conversion. */
vector float
insert_arg_0 (vector float vf)
{
return vec_insert (0.0f, vf, 0);
}
/* { dg-final { scan-assembler {\mxxinsertw\M} } } */
/* { dg-final { scan-assembler-not {\mlvewx\M} } } */
/* { dg-final { scan-assembler-not {\mlvx\M} } } */
/* { dg-final { scan-assembler-not {\mvperm\M} } } */
/* { dg-final { scan-assembler-not {\mvpermr\M} } } */
/* { dg-final { scan-assembler-not {\mstfs\M} } } */
/* { dg-final { scan-assembler-not {\mstxssp\M} } } */
/* { dg-final { scan-assembler-not {\mstxsspx\M} } } */
/* { dg-final { scan-assembler-not {\mxscvdpspn\M} } } */
/* { dg-final { scan-assembler-not {\mxxextractuw\M} } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment