Commit d0047a25 by Michael Meissner Committed by Michael Meissner

rs6000-protos.h (rs6000_adjust_vec_address): New function that takes a vector memory address...

[gcc]
2016-07-30  Michael Meissner  <meissner@linux.vnet.ibm.com>

	* config/rs6000/rs6000-protos.h (rs6000_adjust_vec_address): New
	function that takes a vector memory address, a hard register, an
	element number and a temporary base register, and recreates an
	address that points to the appropriate element within the vector.
	* config/rs6000/rs6000.c (rs6000_adjust_vec_address): Likewise.
	(rs6000_split_vec_extract_var): Add support for the target of a
	vec_extract with variable element number being a scalar memory
	location.
	(rtx_is_swappable_p): VLSO insns (UNSPEC_VSX_VSLOW) are not
	swappable.
	* config/rs6000/vsx.md (vsx_extract_<mode>_load): Replace
	vsx_extract_<mode>_load insn with a new insn that optimizes
	storing either element to a memory location, using scratch
	registers to pick apart the vector and reconstruct the address.
	(vsx_extract_<P:mode>_<VSX_D:mode>_load): Likewise.
	(vsx_extract_<mode>_store): Rework alternatives to more correctly
	support Altivec registers.  Add support for ISA 3.0 Altivec d-form
	store instruction.
	(vsx_extract_<mode>_var): Add support for extracting a variable
	element number from memory.

[gcc/testsuite]
2016-07-30  Michael Meissner  <meissner@linux.vnet.ibm.com>

	* gcc.target/powerpc/vec-extract-2.c: New tests for vec_extract of
	vector double or vector long where the vector is in memory.
	* gcc.target/powerpc/vec-extract-3.c: Likewise.
	* gcc.target/powerpc/vec-extract-4.c: Likewise.

From-SVN: r238908
parent e4f7a5df
2016-07-30 Michael Meissner <meissner@linux.vnet.ibm.com>
* config/rs6000/rs6000-protos.h (rs6000_adjust_vec_address): New
function that takes a vector memory address, a hard register, an
element number and a temporary base register, and recreates an
address that points to the appropriate element within the vector.
* config/rs6000/rs6000.c (rs6000_adjust_vec_address): Likewise.
(rs6000_split_vec_extract_var): Add support for the target of a
vec_extract with variable element number being a scalar memory
location.
(rtx_is_swappable_p): VLSO insns (UNSPEC_VSX_VSLOW) are not
swappable.
* config/rs6000/vsx.md (vsx_extract_<mode>_load): Replace
vsx_extract_<mode>_load insn with a new insn that optimizes
storing either element to a memory location, using scratch
registers to pick apart the vector and reconstruct the address.
(vsx_extract_<P:mode>_<VSX_D:mode>_load): Likewise.
(vsx_extract_<mode>_store): Rework alternatives to more correctly
support Altivec registers. Add support for ISA 3.0 Altivec d-form
store instruction.
(vsx_extract_<mode>_var): Add support for extracting a variable
element number from memory.
2016-07-29 Georg-Johann Lay <avr@gjlay.de> 2016-07-29 Georg-Johann Lay <avr@gjlay.de>
* config/avr/avr.c (avr_out_compare): Use const0_rtx instead of 0 * config/avr/avr.c (avr_out_compare): Use const0_rtx instead of 0
......
...@@ -63,6 +63,7 @@ extern void paired_expand_vector_init (rtx, rtx); ...@@ -63,6 +63,7 @@ extern void paired_expand_vector_init (rtx, rtx);
extern void rs6000_expand_vector_set (rtx, rtx, int); extern void rs6000_expand_vector_set (rtx, rtx, int);
extern void rs6000_expand_vector_extract (rtx, rtx, rtx); extern void rs6000_expand_vector_extract (rtx, rtx, rtx);
extern void rs6000_split_vec_extract_var (rtx, rtx, rtx, rtx, rtx); extern void rs6000_split_vec_extract_var (rtx, rtx, rtx, rtx, rtx);
extern rtx rs6000_adjust_vec_address (rtx, rtx, rtx, rtx, machine_mode);
extern bool altivec_expand_vec_perm_const (rtx op[4]); extern bool altivec_expand_vec_perm_const (rtx op[4]);
extern void altivec_expand_vec_perm_le (rtx op[4]); extern void altivec_expand_vec_perm_le (rtx op[4]);
extern bool rs6000_expand_vec_perm_const (rtx op[4]); extern bool rs6000_expand_vec_perm_const (rtx op[4]);
......
...@@ -7001,6 +7001,164 @@ rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt) ...@@ -7001,6 +7001,164 @@ rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt)
emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0)); emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
} }
/* Adjust a memory address (MEM) of a vector type to point to a scalar field
within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
temporary (BASE_TMP) to fixup the address. Return the new memory address
that is valid for reads or writes to a given register (SCALAR_REG). */
rtx
rs6000_adjust_vec_address (rtx scalar_reg,
rtx mem,
rtx element,
rtx base_tmp,
machine_mode scalar_mode)
{
unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
rtx addr = XEXP (mem, 0);
rtx element_offset;
rtx new_addr;
bool valid_addr_p;
/* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
/* Calculate what we need to add to the address to get the element
address. */
if (CONST_INT_P (element))
element_offset = GEN_INT (INTVAL (element) * scalar_size);
else
{
int byte_shift = exact_log2 (scalar_size);
gcc_assert (byte_shift >= 0);
if (byte_shift == 0)
element_offset = element;
else
{
if (TARGET_POWERPC64)
emit_insn (gen_ashldi3 (base_tmp, element, GEN_INT (byte_shift)));
else
emit_insn (gen_ashlsi3 (base_tmp, element, GEN_INT (byte_shift)));
element_offset = base_tmp;
}
}
/* Create the new address pointing to the element within the vector. If we
are adding 0, we don't have to change the address. */
if (element_offset == const0_rtx)
new_addr = addr;
/* A simple indirect address can be converted into a reg + offset
address. */
else if (REG_P (addr) || SUBREG_P (addr))
new_addr = gen_rtx_PLUS (Pmode, addr, element_offset);
/* Optimize D-FORM addresses with constant offset with a constant element, to
include the element offset in the address directly. */
else if (GET_CODE (addr) == PLUS)
{
rtx op0 = XEXP (addr, 0);
rtx op1 = XEXP (addr, 1);
rtx insn;
gcc_assert (REG_P (op0) || SUBREG_P (op0));
if (CONST_INT_P (op1) && CONST_INT_P (element_offset))
{
HOST_WIDE_INT offset = INTVAL (op1) + INTVAL (element_offset);
rtx offset_rtx = GEN_INT (offset);
if (IN_RANGE (offset, -32768, 32767)
&& (scalar_size < 8 || (offset & 0x3) == 0))
new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx);
else
{
emit_move_insn (base_tmp, offset_rtx);
new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
}
}
else
{
if (REG_P (op1) || SUBREG_P (op1))
{
insn = gen_add3_insn (base_tmp, op1, element_offset);
gcc_assert (insn != NULL_RTX);
emit_insn (insn);
}
else if (REG_P (element_offset) || SUBREG_P (element_offset))
{
insn = gen_add3_insn (base_tmp, element_offset, op1);
gcc_assert (insn != NULL_RTX);
emit_insn (insn);
}
else
{
emit_move_insn (base_tmp, op1);
emit_insn (gen_add2_insn (base_tmp, element_offset));
}
new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
}
}
else
{
emit_move_insn (base_tmp, addr);
new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
}
/* If we have a PLUS, we need to see whether the particular register class
allows for D-FORM or X-FORM addressing. */
if (GET_CODE (new_addr) == PLUS)
{
rtx op1 = XEXP (new_addr, 1);
addr_mask_type addr_mask;
int scalar_regno;
if (REG_P (scalar_reg))
scalar_regno = REGNO (scalar_reg);
else if (SUBREG_P (scalar_reg))
scalar_regno = subreg_regno (scalar_reg);
else
gcc_unreachable ();
gcc_assert (scalar_regno < FIRST_PSEUDO_REGISTER);
if (INT_REGNO_P (scalar_regno))
addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_GPR];
else if (FP_REGNO_P (scalar_regno))
addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_FPR];
else if (ALTIVEC_REGNO_P (scalar_regno))
addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_VMX];
else
gcc_unreachable ();
if (REG_P (op1) || SUBREG_P (op1))
valid_addr_p = (addr_mask & RELOAD_REG_INDEXED) != 0;
else
valid_addr_p = (addr_mask & RELOAD_REG_OFFSET) != 0;
}
else if (REG_P (new_addr) || SUBREG_P (new_addr))
valid_addr_p = true;
else
valid_addr_p = false;
if (!valid_addr_p)
{
emit_move_insn (base_tmp, new_addr);
new_addr = base_tmp;
}
return change_address (mem, scalar_mode, new_addr);
}
/* Split a variable vec_extract operation into the component instructions. */ /* Split a variable vec_extract operation into the component instructions. */
void void
...@@ -7014,7 +7172,18 @@ rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr, ...@@ -7014,7 +7172,18 @@ rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr,
gcc_assert (byte_shift >= 0); gcc_assert (byte_shift >= 0);
if (REG_P (src) || SUBREG_P (src)) /* If we are given a memory address, optimize to load just the element. We
don't have to adjust the vector element number on little endian
systems. */
if (MEM_P (src))
{
gcc_assert (REG_P (tmp_gpr));
emit_move_insn (dest, rs6000_adjust_vec_address (dest, src, element,
tmp_gpr, scalar_mode));
return;
}
else if (REG_P (src) || SUBREG_P (src))
{ {
int bit_shift = byte_shift + 3; int bit_shift = byte_shift + 3;
rtx element2; rtx element2;
...@@ -38759,6 +38928,7 @@ rtx_is_swappable_p (rtx op, unsigned int *special) ...@@ -38759,6 +38928,7 @@ rtx_is_swappable_p (rtx op, unsigned int *special)
case UNSPEC_VSX_CVSPDP: case UNSPEC_VSX_CVSPDP:
case UNSPEC_VSX_CVSPDPN: case UNSPEC_VSX_CVSPDPN:
case UNSPEC_VSX_EXTRACT: case UNSPEC_VSX_EXTRACT:
case UNSPEC_VSX_VSLO:
return 0; return 0;
case UNSPEC_VSPLT_DIRECT: case UNSPEC_VSPLT_DIRECT:
*special = SH_SPLAT; *special = SH_SPLAT;
...@@ -2174,33 +2174,36 @@ ...@@ -2174,33 +2174,36 @@
} }
[(set_attr "type" "veclogical,mftgpr,mftgpr,vecperm")]) [(set_attr "type" "veclogical,mftgpr,mftgpr,vecperm")])
;; Optimize extracting a single scalar element from memory if the scalar is in ;; Optimize extracting a single scalar element from memory.
;; the correct location to use a single load. (define_insn_and_split "*vsx_extract_<P:mode>_<VSX_D:mode>_load"
(define_insn "*vsx_extract_<mode>_load" [(set (match_operand:<VS_scalar> 0 "register_operand" "=<VSX_D:VS_64reg>,wr")
[(set (match_operand:<VS_scalar> 0 "register_operand" "=d,wv,wr") (vec_select:<VSX_D:VS_scalar>
(vec_select:<VS_scalar> (match_operand:VSX_D 1 "memory_operand" "m,m")
(match_operand:VSX_D 1 "memory_operand" "m,Z,m") (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n,n")])))
(parallel [(const_int 0)])))] (clobber (match_scratch:P 3 "=&b,&b"))]
"VECTOR_MEM_VSX_P (<MODE>mode)" "VECTOR_MEM_VSX_P (<VSX_D:MODE>mode)"
"@ "#"
lfd%U1%X1 %0,%1 "&& reload_completed"
lxsd%U1x %x0,%y1 [(set (match_dup 0) (match_dup 4))]
ld%U1%X1 %0,%1" {
[(set_attr "type" "fpload,fpload,load") operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
(set_attr "length" "4")]) operands[3], <VSX_D:VS_scalar>mode);
}
[(set_attr "type" "fpload,load")
(set_attr "length" "8")])
;; Optimize storing a single scalar element that is the right location to ;; Optimize storing a single scalar element that is the right location to
;; memory ;; memory
(define_insn "*vsx_extract_<mode>_store" (define_insn "*vsx_extract_<mode>_store"
[(set (match_operand:<VS_scalar> 0 "memory_operand" "=m,Z,?Z") [(set (match_operand:<VS_scalar> 0 "memory_operand" "=m,Z,o")
(vec_select:<VS_scalar> (vec_select:<VS_scalar>
(match_operand:VSX_D 1 "register_operand" "d,wd,<VSa>") (match_operand:VSX_D 1 "register_operand" "d,wv,wb")
(parallel [(match_operand:QI 2 "vsx_scalar_64bit" "wD,wD,wD")])))] (parallel [(match_operand:QI 2 "vsx_scalar_64bit" "wD,wD,wD")])))]
"VECTOR_MEM_VSX_P (<MODE>mode)" "VECTOR_MEM_VSX_P (<MODE>mode)"
"@ "@
stfd%U0%X0 %1,%0 stfd%U0%X0 %1,%0
stxsd%U0x %x1,%y0 stxsd%U0x %x1,%y0
stxsd%U0x %x1,%y0" stxsd %1,%0"
[(set_attr "type" "fpstore") [(set_attr "type" "fpstore")
(set_attr "length" "4")]) (set_attr "length" "4")])
...@@ -2216,12 +2219,12 @@ ...@@ -2216,12 +2219,12 @@
;; Variable V2DI/V2DF extract ;; Variable V2DI/V2DF extract
(define_insn_and_split "vsx_extract_<mode>_var" (define_insn_and_split "vsx_extract_<mode>_var"
[(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v") [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v,<VSa>,r")
(unspec:<VS_scalar> [(match_operand:VSX_D 1 "input_operand" "v") (unspec:<VS_scalar> [(match_operand:VSX_D 1 "input_operand" "v,m,m")
(match_operand:DI 2 "gpc_reg_operand" "r")] (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
UNSPEC_VSX_EXTRACT)) UNSPEC_VSX_EXTRACT))
(clobber (match_scratch:DI 3 "=r")) (clobber (match_scratch:DI 3 "=r,&b,&b"))
(clobber (match_scratch:V2DI 4 "=&v"))] (clobber (match_scratch:V2DI 4 "=&v,X,X"))]
"VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT" "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
"#" "#"
"&& reload_completed" "&& reload_completed"
......
2016-07-30 Michael Meissner <meissner@linux.vnet.ibm.com>
* gcc.target/powerpc/vec-extract-2.c: New tests for vec_extract of
vector double or vector long where the vector is in memory.
* gcc.target/powerpc/vec-extract-3.c: Likewise.
* gcc.target/powerpc/vec-extract-4.c: Likewise.
2016-07-30 Steven G. Kargl <kargl@gcc.gnu.org> 2016-07-30 Steven G. Kargl <kargl@gcc.gnu.org>
PR fortran/69867 PR fortran/69867
......
/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
/* { dg-require-effective-target powerpc_vsx_ok } */
/* { dg-options "-O2 -mvsx" } */
#include <altivec.h>
double
add_double_0 (vector double *p, double x)
{
return vec_extract (*p, 0) + x;
}
double
add_double_1 (vector double *p, double x)
{
return vec_extract (*p, 1) + x;
}
long
add_long_0 (vector long *p, long x)
{
return vec_extract (*p, 0) + x;
}
long
add_long_1 (vector long *p, long x)
{
return vec_extract (*p, 1) + x;
}
/* { dg-final { scan-assembler-not "lxvd2x" } } */
/* { dg-final { scan-assembler-not "lxvw4x" } } */
/* { dg-final { scan-assembler-not "lxvx" } } */
/* { dg-final { scan-assembler-not "lxv" } } */
/* { dg-final { scan-assembler-not "lvx" } } */
/* { dg-final { scan-assembler-not "xxpermdi" } } */
/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
/* { dg-require-effective-target powerpc_p8vector_ok } */
/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */
/* { dg-options "-O2 -mcpu=power8" } */
#include <altivec.h>
double
add_double_n (vector double *p, double x, long n)
{
return vec_extract (*p, n) + x;
}
long
add_long_n (vector long *p, long x, long n)
{
return vec_extract (*p, n) + x;
}
/* { dg-final { scan-assembler-not "lxvd2x" } } */
/* { dg-final { scan-assembler-not "lxvw4x" } } */
/* { dg-final { scan-assembler-not "lxvx" } } */
/* { dg-final { scan-assembler-not "lxv" } } */
/* { dg-final { scan-assembler-not "lvx" } } */
/* { dg-final { scan-assembler-not "xxpermdi" } } */
/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
/* { dg-require-effective-target powerpc_p9vector_ok } */
/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
/* { dg-options "-O2 -mcpu=power9" } */
#include <altivec.h>
#ifdef __LITTLE_ENDIAN__
#define ELEMENT 1
#else
#define ELEMENT 0
#endif
void foo (double *p, vector double v)
{
p[10] = vec_extract (v, ELEMENT);
}
/* { dg-final { scan-assembler "stxsd " } } */
/* { dg-final { scan-assembler-not "stxsdx" } } */
/* { dg-final { scan-assembler-not "stfd" } } */
/* { dg-final { scan-assembler-not "xxpermdi" } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment