Commit 6fa6eb35 by Bill Schmidt Committed by William Schmidt

rs6000.c (context.h): New include.

[gcc]

2014-08-20  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>

	* config/rs6000/rs6000.c (context.h): New include.
	(tree-pass.h): Likewise.
	(make_pass_analyze_swaps): New decl.
	(rs6000_option_override): Register pass_analyze_swaps.
	(swap_web_entry): New subsclass of web_entry_base (df.h).
	(special_handling_values): New enum.
	(union_defs): New function.
	(union_uses): Likewise.
	(insn_is_load_p): Likewise.
	(insn_is_store_p): Likewise.
	(insn_is_swap_p): Likewise.
	(rtx_is_swappable_p): Likewise.
	(insn_is_swappable_p): Likewise.
	(chain_purpose): New enum.
	(chain_contains_only_swaps): New function.
	(mark_swaps_for_removal): Likewise.
	(swap_const_vector_halves): Likewise.
	(adjust_subreg_index): Likewise.
	(permute_load): Likewise.
	(permute_store): Likewise.
	(handle_special_swappables): Likewise.
	(replace_swap_with_copy): Likewise.
	(dump_swap_insn_table): Likewise.
	(rs6000_analyze_swaps): Likewise.
	(pass_data_analyze_swaps): New pass_data.
	(pass_analyze_swaps): New rtl_opt_pass.
	(make_pass_analyze_swaps): New function.
	* config/rs6000/rs6000.opt (moptimize-swaps): New option.

[gcc/testsuite]

2014-08-20  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>

	* gcc.target/powerpc/swaps-p8-1.c: New test.
	* gcc.target/powerpc/swaps-p8-2.c: New test.
	* gcc.target/powerpc/swaps-p8-3.c: New test.
	* gcc.target/powerpc/swaps-p8-4.c: New test.
	* gcc.target/powerpc/swaps-p8-5.c: New test.
	* gcc.target/powerpc/swaps-p8-6.c: New test.
	* gcc.target/powerpc/swaps-p8-7.c: New test.
	* gcc.target/powerpc/swaps-p8-8.c: New test.
	* gcc.target/powerpc/swaps-p8-9.c: New test.
	* gcc.target/powerpc/swaps-p8-10.c: New test.
	* gcc.target/powerpc/swaps-p8-11.c: New test.
	* gcc.target/powerpc/swaps-p8-12.c: New test.

From-SVN: r214254
parent 9c068b73
2014-08-20 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
* config/rs6000/rs6000.c (context.h): New include.
(tree-pass.h): Likewise.
(make_pass_analyze_swaps): New decl.
(rs6000_option_override): Register pass_analyze_swaps.
(swap_web_entry): New subsclass of web_entry_base (df.h).
(special_handling_values): New enum.
(union_defs): New function.
(union_uses): Likewise.
(insn_is_load_p): Likewise.
(insn_is_store_p): Likewise.
(insn_is_swap_p): Likewise.
(rtx_is_swappable_p): Likewise.
(insn_is_swappable_p): Likewise.
(chain_purpose): New enum.
(chain_contains_only_swaps): New function.
(mark_swaps_for_removal): Likewise.
(swap_const_vector_halves): Likewise.
(adjust_subreg_index): Likewise.
(permute_load): Likewise.
(permute_store): Likewise.
(handle_special_swappables): Likewise.
(replace_swap_with_copy): Likewise.
(dump_swap_insn_table): Likewise.
(rs6000_analyze_swaps): Likewise.
(pass_data_analyze_swaps): New pass_data.
(pass_analyze_swaps): New rtl_opt_pass.
(make_pass_analyze_swaps): New function.
* config/rs6000/rs6000.opt (moptimize-swaps): New option.
2014-08-21 David Malcolm <dmalcolm@redhat.com>
* sel-sched-ir.h (create_insn_rtx_from_pattern): Strengthen return
......
......@@ -79,6 +79,8 @@
#include "cgraph.h"
#include "target-globals.h"
#include "builtins.h"
#include "context.h"
#include "tree-pass.h"
#if TARGET_XCOFF
#include "xcoffout.h" /* get declarations of xcoff_*_section_name */
#endif
......@@ -1170,6 +1172,7 @@ static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
enum machine_mode,
secondary_reload_info *,
bool);
rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
/* Hash table stuff for keeping track of TOC entries. */
......@@ -4085,6 +4088,15 @@ static void
rs6000_option_override (void)
{
(void) rs6000_option_override_internal (true);
/* Register machine-specific passes. This needs to be done at start-up.
It's convenient to do it here (like i386 does). */
opt_pass *pass_analyze_swaps = make_pass_analyze_swaps (g);
static struct register_pass_info analyze_swaps_info
= { pass_analyze_swaps, "cse1", 1, PASS_POS_INSERT_BEFORE };
register_pass (&analyze_swaps_info);
}
......@@ -33421,7 +33433,1045 @@ emit_fusion_gpr_load (rtx *operands)
return "";
}
/* Analyze vector computations and remove unnecessary doubleword
swaps (xxswapdi instructions). This pass is performed only
for little-endian VSX code generation.
For this specific case, loads and stores of 4x32 and 2x64 vectors
are inefficient. These are implemented using the lvx2dx and
stvx2dx instructions, which invert the order of doublewords in
a vector register. Thus the code generation inserts an xxswapdi
after each such load, and prior to each such store. (For spill
code after register assignment, an additional xxswapdi is inserted
following each store in order to return a hard register to its
unpermuted value.)
The extra xxswapdi instructions reduce performance. This can be
particularly bad for vectorized code. The purpose of this pass
is to reduce the number of xxswapdi instructions required for
correctness.
The primary insight is that much code that operates on vectors
does not care about the relative order of elements in a register,
so long as the correct memory order is preserved. If we have
a computation where all input values are provided by lvxd2x/xxswapdi
sequences, all outputs are stored using xxswapdi/stvxd2x sequences,
and all intermediate computations are pure SIMD (independent of
element order), then all the xxswapdi's associated with the loads
and stores may be removed.
This pass uses some of the infrastructure and logical ideas from
the "web" pass in web.c. We create maximal webs of computations
fitting the description above using union-find. Each such web is
then optimized by removing its unnecessary xxswapdi instructions.
The pass is placed prior to global optimization so that we can
perform the optimization in the safest and simplest way possible;
that is, by replacing each xxswapdi insn with a register copy insn.
Subsequent forward propagation will remove copies where possible.
There are some operations sensitive to element order for which we
can still allow the operation, provided we modify those operations.
These include CONST_VECTORs, for which we must swap the first and
second halves of the constant vector; and SUBREGs, for which we
must adjust the byte offset to account for the swapped doublewords.
A remaining opportunity would be non-immediate-form splats, for
which we should adjust the selected lane of the input. We should
also make code generation adjustments for sum-across operations,
since this is a common vectorizer reduction.
Because we run prior to the first split, we can see loads and stores
here that match *vsx_le_perm_{load,store}_<mode>. These are vanilla
vector loads and stores that have not yet been split into a permuting
load/store and a swap. (One way this can happen is with a builtin
call to vec_vsx_{ld,st}.) We can handle these as well, but rather
than deleting a swap, we convert the load/store into a permuting
load/store (which effectively removes the swap). */
/* This is based on the union-find logic in web.c. web_entry_base is
defined in df.h. */
class swap_web_entry : public web_entry_base
{
public:
/* Pointer to the insn. */
rtx insn;
/* Set if insn contains a mention of a vector register. All other
fields are undefined if this field is unset. */
unsigned int is_relevant : 1;
/* Set if insn is a load. */
unsigned int is_load : 1;
/* Set if insn is a store. */
unsigned int is_store : 1;
/* Set if insn is a doubleword swap. This can either be a register swap
or a permuting load or store (test is_load and is_store for this). */
unsigned int is_swap : 1;
/* Set if the insn has a live-in use of a parameter register. */
unsigned int is_live_in : 1;
/* Set if the insn has a live-out def of a return register. */
unsigned int is_live_out : 1;
/* Set if the insn contains a subreg reference of a vector register. */
unsigned int contains_subreg : 1;
/* Set if the insn contains a 128-bit integer operand. */
unsigned int is_128_int : 1;
/* Set if this is a call-insn. */
unsigned int is_call : 1;
/* Set if this insn does not perform a vector operation for which
element order matters, or if we know how to fix it up if it does.
Undefined if is_swap is set. */
unsigned int is_swappable : 1;
/* A nonzero value indicates what kind of special handling for this
insn is required if doublewords are swapped. Undefined if
is_swappable is not set. */
unsigned int special_handling : 3;
/* Set if the web represented by this entry cannot be optimized. */
unsigned int web_not_optimizable : 1;
/* Set if this insn should be deleted. */
unsigned int will_delete : 1;
};
enum special_handling_values {
SH_NONE = 0,
SH_CONST_VECTOR,
SH_SUBREG,
SH_NOSWAP_LD,
SH_NOSWAP_ST
};
/* Union INSN with all insns containing definitions that reach USE.
Detect whether USE is live-in to the current function. */
static void
union_defs (swap_web_entry *insn_entry, rtx insn, df_ref use)
{
struct df_link *link = DF_REF_CHAIN (use);
if (!link)
insn_entry[INSN_UID (insn)].is_live_in = 1;
while (link)
{
if (DF_REF_IS_ARTIFICIAL (link->ref))
insn_entry[INSN_UID (insn)].is_live_in = 1;
if (DF_REF_INSN_INFO (link->ref))
{
rtx def_insn = DF_REF_INSN (link->ref);
(void)unionfind_union (insn_entry + INSN_UID (insn),
insn_entry + INSN_UID (def_insn));
}
link = link->next;
}
}
/* Union INSN with all insns containing uses reached from DEF.
Detect whether DEF is live-out from the current function. */
static void
union_uses (swap_web_entry *insn_entry, rtx insn, df_ref def)
{
struct df_link *link = DF_REF_CHAIN (def);
if (!link)
insn_entry[INSN_UID (insn)].is_live_out = 1;
while (link)
{
/* This could be an eh use or some other artificial use;
we treat these all the same (killing the optimization). */
if (DF_REF_IS_ARTIFICIAL (link->ref))
insn_entry[INSN_UID (insn)].is_live_out = 1;
if (DF_REF_INSN_INFO (link->ref))
{
rtx use_insn = DF_REF_INSN (link->ref);
(void)unionfind_union (insn_entry + INSN_UID (insn),
insn_entry + INSN_UID (use_insn));
}
link = link->next;
}
}
/* Return 1 iff INSN is a load insn, including permuting loads that
represent an lvxd2x instruction; else return 0. */
static unsigned int
insn_is_load_p (rtx insn)
{
rtx body = PATTERN (insn);
if (GET_CODE (body) == SET)
{
if (GET_CODE (SET_SRC (body)) == MEM)
return 1;
if (GET_CODE (SET_SRC (body)) == VEC_SELECT
&& GET_CODE (XEXP (SET_SRC (body), 0)) == MEM)
return 1;
return 0;
}
if (GET_CODE (body) != PARALLEL)
return 0;
rtx set = XVECEXP (body, 0, 0);
if (GET_CODE (set) == SET && GET_CODE (SET_SRC (set)) == MEM)
return 1;
return 0;
}
/* Return 1 iff INSN is a store insn, including permuting stores that
represent an stvxd2x instruction; else return 0. */
static unsigned int
insn_is_store_p (rtx insn)
{
rtx body = PATTERN (insn);
if (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == MEM)
return 1;
if (GET_CODE (body) != PARALLEL)
return 0;
rtx set = XVECEXP (body, 0, 0);
if (GET_CODE (set) == SET && GET_CODE (SET_DEST (set)) == MEM)
return 1;
return 0;
}
/* Return 1 iff INSN swaps doublewords. This may be a reg-reg swap,
a permuting load, or a permuting store. */
static unsigned int
insn_is_swap_p (rtx insn)
{
rtx body = PATTERN (insn);
if (GET_CODE (body) != SET)
return 0;
rtx rhs = SET_SRC (body);
if (GET_CODE (rhs) != VEC_SELECT)
return 0;
rtx parallel = XEXP (rhs, 1);
if (GET_CODE (parallel) != PARALLEL)
return 0;
unsigned int len = XVECLEN (parallel, 0);
if (len != 2 && len != 4 && len != 8 && len != 16)
return 0;
for (unsigned int i = 0; i < len / 2; ++i)
{
rtx op = XVECEXP (parallel, 0, i);
if (GET_CODE (op) != CONST_INT || INTVAL (op) != len / 2 + i)
return 0;
}
for (unsigned int i = len / 2; i < len; ++i)
{
rtx op = XVECEXP (parallel, 0, i);
if (GET_CODE (op) != CONST_INT || INTVAL (op) != i - len / 2)
return 0;
}
return 1;
}
/* Return 1 iff OP is an operand that will not be affected by having
vector doublewords swapped in memory. */
static unsigned int
rtx_is_swappable_p (rtx op, unsigned int *special)
{
enum rtx_code code = GET_CODE (op);
int i, j;
switch (code)
{
case LABEL_REF:
case SYMBOL_REF:
case CLOBBER:
case REG:
return 1;
case VEC_CONCAT:
case VEC_SELECT:
case ASM_INPUT:
case ASM_OPERANDS:
return 0;
case CONST_VECTOR:
{
*special = SH_CONST_VECTOR;
return 1;
}
case VEC_DUPLICATE:
/* Opportunity: If XEXP (op, 0) has the same mode as the result,
and XEXP (op, 1) is a PARALLEL with a single QImode const int,
it represents a vector splat for which we can do special
handling. */
if (GET_CODE (XEXP (op, 0)) == CONST_INT)
return 1;
else
return 0;
case UNSPEC:
{
/* Various operations are unsafe for this optimization, at least
without significant additional work. Permutes are obviously
problematic, as both the permute control vector and the ordering
of the target values are invalidated by doubleword swapping.
Vector pack and unpack modify the number of vector lanes.
Merge-high/low will not operate correctly on swapped operands.
Vector shifts across element boundaries are clearly uncool,
as are vector select and concatenate operations. Vector
sum-across instructions define one operand with a specific
order-dependent element, so additional fixup code would be
needed to make those work. Vector set and non-immediate-form
vector splat are element-order sensitive. A few of these
cases might be workable with special handling if required. */
int val = XINT (op, 1);
if (val == UNSPEC_VMRGH_DIRECT
|| val == UNSPEC_VMRGL_DIRECT
|| val == UNSPEC_VPACK_SIGN_SIGN_SAT
|| val == UNSPEC_VPACK_SIGN_UNS_SAT
|| val == UNSPEC_VPACK_UNS_UNS_MOD
|| val == UNSPEC_VPACK_UNS_UNS_MOD_DIRECT
|| val == UNSPEC_VPACK_UNS_UNS_SAT
|| val == UNSPEC_VPERM
|| val == UNSPEC_VPERM_UNS
|| val == UNSPEC_VPERMHI
|| val == UNSPEC_VPERMSI
|| val == UNSPEC_VPKPX
|| val == UNSPEC_VSLDOI
|| val == UNSPEC_VSLO
|| val == UNSPEC_VSPLT_DIRECT
|| val == UNSPEC_VSRO
|| val == UNSPEC_VSUM2SWS
|| val == UNSPEC_VSUM4S
|| val == UNSPEC_VSUM4UBS
|| val == UNSPEC_VSUMSWS
|| val == UNSPEC_VSUMSWS_DIRECT
|| val == UNSPEC_VSX_CONCAT
|| val == UNSPEC_VSX_CVSPDP
|| val == UNSPEC_VSX_CVSPDPN
|| val == UNSPEC_VSX_SET
|| val == UNSPEC_VSX_SLDWI
|| val == UNSPEC_VSX_XXSPLTW
|| val == UNSPEC_VUNPACK_HI_SIGN
|| val == UNSPEC_VUNPACK_HI_SIGN_DIRECT
|| val == UNSPEC_VUNPACK_LO_SIGN
|| val == UNSPEC_VUNPACK_LO_SIGN_DIRECT
|| val == UNSPEC_VUPKHPX
|| val == UNSPEC_VUPKHS_V4SF
|| val == UNSPEC_VUPKHU_V4SF
|| val == UNSPEC_VUPKLPX
|| val == UNSPEC_VUPKLS_V4SF
|| val == UNSPEC_VUPKHU_V4SF)
return 0;
}
default:
break;
}
const char *fmt = GET_RTX_FORMAT (code);
int ok = 1;
for (i = 0; i < GET_RTX_LENGTH (code); ++i)
if (fmt[i] == 'e' || fmt[i] == 'u')
{
unsigned int special_op = SH_NONE;
ok &= rtx_is_swappable_p (XEXP (op, i), &special_op);
/* Ensure we never have two kinds of special handling
for the same insn. */
if (*special != SH_NONE && special_op != SH_NONE
&& *special != special_op)
return 0;
*special = special_op;
}
else if (fmt[i] == 'E')
for (j = 0; j < XVECLEN (op, i); ++j)
{
unsigned int special_op = SH_NONE;
ok &= rtx_is_swappable_p (XVECEXP (op, i, j), &special_op);
/* Ensure we never have two kinds of special handling
for the same insn. */
if (*special != SH_NONE && special_op != SH_NONE
&& *special != special_op)
return 0;
*special = special_op;
}
return ok;
}
/* Return 1 iff INSN is an operand that will not be affected by
having vector doublewords swapped in memory (in which case
*SPECIAL is unchanged), or that can be modified to be correct
if vector doublewords are swapped in memory (in which case
*SPECIAL is changed to a value indicating how). */
static unsigned int
insn_is_swappable_p (swap_web_entry *insn_entry, rtx insn,
unsigned int *special)
{
/* Calls are always bad. */
if (GET_CODE (insn) == CALL_INSN)
return 0;
/* Loads and stores seen here are not permuting, but we can still
fix them up by converting them to permuting ones. Exception:
UNSPEC_LVX and UNSPEC_STVX, which have a PARALLEL body instead
of a SET. */
rtx body = PATTERN (insn);
int i = INSN_UID (insn);
if (insn_entry[i].is_load)
{
if (GET_CODE (body) == SET)
{
*special = SH_NOSWAP_LD;
return 1;
}
else
return 0;
}
if (insn_entry[i].is_store)
{
if (GET_CODE (body) == SET)
{
*special = SH_NOSWAP_ST;
return 1;
}
else
return 0;
}
/* Otherwise check the operands for vector lane violations. */
return rtx_is_swappable_p (body, special);
}
enum chain_purpose { FOR_LOADS, FOR_STORES };
/* Return true if the UD or DU chain headed by LINK is non-empty,
and every entry on the chain references an insn that is a
register swap. Furthermore, if PURPOSE is FOR_LOADS, each such
register swap must have only permuting loads as reaching defs.
If PURPOSE is FOR_STORES, each such register swap must have only
register swaps or permuting stores as reached uses. */
static bool
chain_contains_only_swaps (swap_web_entry *insn_entry, struct df_link *link,
enum chain_purpose purpose)
{
if (!link)
return false;
for (; link; link = link->next)
{
if (!VECTOR_MODE_P (GET_MODE (DF_REF_REG (link->ref))))
continue;
if (DF_REF_IS_ARTIFICIAL (link->ref))
return false;
rtx reached_insn = DF_REF_INSN (link->ref);
unsigned uid = INSN_UID (reached_insn);
struct df_insn_info *insn_info = DF_INSN_INFO_GET (reached_insn);
if (!insn_entry[uid].is_swap || insn_entry[uid].is_load
|| insn_entry[uid].is_store)
return false;
if (purpose == FOR_LOADS)
{
df_ref use;
FOR_EACH_INSN_INFO_USE (use, insn_info)
{
struct df_link *swap_link = DF_REF_CHAIN (use);
while (swap_link)
{
if (DF_REF_IS_ARTIFICIAL (link->ref))
return false;
rtx swap_def_insn = DF_REF_INSN (swap_link->ref);
unsigned uid2 = INSN_UID (swap_def_insn);
/* Only permuting loads are allowed. */
if (!insn_entry[uid2].is_swap || !insn_entry[uid2].is_load)
return false;
swap_link = swap_link->next;
}
}
}
else if (purpose == FOR_STORES)
{
df_ref def;
FOR_EACH_INSN_INFO_DEF (def, insn_info)
{
struct df_link *swap_link = DF_REF_CHAIN (def);
while (swap_link)
{
if (DF_REF_IS_ARTIFICIAL (link->ref))
return false;
rtx swap_use_insn = DF_REF_INSN (swap_link->ref);
unsigned uid2 = INSN_UID (swap_use_insn);
/* Permuting stores or register swaps are allowed. */
if (!insn_entry[uid2].is_swap || insn_entry[uid2].is_load)
return false;
swap_link = swap_link->next;
}
}
}
}
return true;
}
/* Mark the xxswapdi instructions associated with permuting loads and
stores for removal. Note that we only flag them for deletion here,
as there is a possibility of a swap being reached from multiple
loads, etc. */
static void
mark_swaps_for_removal (swap_web_entry *insn_entry, unsigned int i)
{
rtx insn = insn_entry[i].insn;
struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
if (insn_entry[i].is_load)
{
df_ref def;
FOR_EACH_INSN_INFO_DEF (def, insn_info)
{
struct df_link *link = DF_REF_CHAIN (def);
/* We know by now that these are swaps, so we can delete
them confidently. */
while (link)
{
rtx use_insn = DF_REF_INSN (link->ref);
insn_entry[INSN_UID (use_insn)].will_delete = 1;
link = link->next;
}
}
}
else if (insn_entry[i].is_store)
{
df_ref use;
FOR_EACH_INSN_INFO_USE (use, insn_info)
{
/* Ignore uses for addressability. */
enum machine_mode mode = GET_MODE (DF_REF_REG (use));
if (!VECTOR_MODE_P (mode))
continue;
struct df_link *link = DF_REF_CHAIN (use);
/* We know by now that these are swaps, so we can delete
them confidently. */
while (link)
{
rtx def_insn = DF_REF_INSN (link->ref);
insn_entry[INSN_UID (def_insn)].will_delete = 1;
link = link->next;
}
}
}
}
/* OP is either a CONST_VECTOR or an expression containing one.
Swap the first half of the vector with the second in the first
case. Recurse to find it in the second. */
static void
swap_const_vector_halves (rtx op)
{
int i;
enum rtx_code code = GET_CODE (op);
if (GET_CODE (op) == CONST_VECTOR)
{
int half_units = GET_MODE_NUNITS (GET_MODE (op)) / 2;
for (i = 0; i < half_units; ++i)
{
rtx temp = CONST_VECTOR_ELT (op, i);
CONST_VECTOR_ELT (op, i) = CONST_VECTOR_ELT (op, i + half_units);
CONST_VECTOR_ELT (op, i + half_units) = temp;
}
}
else
{
int j;
const char *fmt = GET_RTX_FORMAT (code);
for (i = 0; i < GET_RTX_LENGTH (code); ++i)
if (fmt[i] == 'e' || fmt[i] == 'u')
swap_const_vector_halves (XEXP (op, i));
else if (fmt[i] == 'E')
for (j = 0; j < XVECLEN (op, i); ++j)
swap_const_vector_halves (XVECEXP (op, i, j));
}
}
/* Find all subregs of a vector expression that perform a narrowing,
and adjust the subreg index to account for doubleword swapping. */
static void
adjust_subreg_index (rtx op)
{
enum rtx_code code = GET_CODE (op);
if (code == SUBREG
&& (GET_MODE_SIZE (GET_MODE (op))
< GET_MODE_SIZE (GET_MODE (XEXP (op, 0)))))
{
unsigned int index = SUBREG_BYTE (op);
if (index < 8)
index += 8;
else
index -= 8;
SUBREG_BYTE (op) = index;
}
const char *fmt = GET_RTX_FORMAT (code);
int i,j;
for (i = 0; i < GET_RTX_LENGTH (code); ++i)
if (fmt[i] == 'e' || fmt[i] == 'u')
adjust_subreg_index (XEXP (op, i));
else if (fmt[i] == 'E')
for (j = 0; j < XVECLEN (op, i); ++j)
adjust_subreg_index (XVECEXP (op, i, j));
}
/* Convert the non-permuting load INSN to a permuting one. */
static void
permute_load (rtx insn)
{
rtx body = PATTERN (insn);
rtx mem_op = SET_SRC (body);
rtx tgt_reg = SET_DEST (body);
enum machine_mode mode = GET_MODE (tgt_reg);
int n_elts = GET_MODE_NUNITS (mode);
int half_elts = n_elts / 2;
rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
int i, j;
for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
XVECEXP (par, 0, i) = GEN_INT (j);
for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
XVECEXP (par, 0, i) = GEN_INT (j);
rtx sel = gen_rtx_VEC_SELECT (mode, mem_op, par);
SET_SRC (body) = sel;
INSN_CODE (insn) = -1; /* Force re-recognition. */
df_insn_rescan (insn);
if (dump_file)
fprintf (dump_file, "Replacing load %d with permuted load\n",
INSN_UID (insn));
}
/* Convert the non-permuting store INSN to a permuting one. */
static void
permute_store (rtx insn)
{
rtx body = PATTERN (insn);
rtx src_reg = SET_SRC (body);
enum machine_mode mode = GET_MODE (src_reg);
int n_elts = GET_MODE_NUNITS (mode);
int half_elts = n_elts / 2;
rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
int i, j;
for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
XVECEXP (par, 0, i) = GEN_INT (j);
for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
XVECEXP (par, 0, i) = GEN_INT (j);
rtx sel = gen_rtx_VEC_SELECT (mode, src_reg, par);
SET_SRC (body) = sel;
INSN_CODE (insn) = -1; /* Force re-recognition. */
df_insn_rescan (insn);
if (dump_file)
fprintf (dump_file, "Replacing store %d with permuted store\n",
INSN_UID (insn));
}
/* The insn described by INSN_ENTRY[I] can be swapped, but only
with special handling. Take care of that here. */
static void
handle_special_swappables (swap_web_entry *insn_entry, unsigned i)
{
rtx insn = insn_entry[i].insn;
rtx body = PATTERN (insn);
switch (insn_entry[i].special_handling)
{
case SH_CONST_VECTOR:
{
/* A CONST_VECTOR will only show up somewhere in the RHS of a SET. */
gcc_assert (GET_CODE (body) == SET);
rtx rhs = SET_SRC (body);
swap_const_vector_halves (rhs);
if (dump_file)
fprintf (dump_file, "Swapping constant halves in insn %d\n", i);
break;
}
case SH_SUBREG:
/* A subreg of the same size is already safe. For subregs that
select a smaller portion of a reg, adjust the index for
swapped doublewords. */
adjust_subreg_index (body);
if (dump_file)
fprintf (dump_file, "Adjusting subreg in insn %d\n", i);
break;
case SH_NOSWAP_LD:
/* Convert a non-permuting load to a permuting one. */
permute_load (insn);
break;
case SH_NOSWAP_ST:
/* Convert a non-permuting store to a permuting one. */
permute_store (insn);
break;
}
}
/* Find the insn from the Ith table entry, which is known to be a
register swap Y = SWAP(X). Replace it with a copy Y = X. */
static void
replace_swap_with_copy (swap_web_entry *insn_entry, unsigned i)
{
rtx insn = insn_entry[i].insn;
rtx body = PATTERN (insn);
rtx src_reg = XEXP (SET_SRC (body), 0);
rtx copy = gen_rtx_SET (VOIDmode, SET_DEST (body), src_reg);
rtx new_insn = emit_insn_before (copy, insn);
set_block_for_insn (new_insn, BLOCK_FOR_INSN (insn));
df_insn_rescan (new_insn);
if (dump_file)
{
unsigned int new_uid = INSN_UID (new_insn);
fprintf (dump_file, "Replacing swap %d with copy %d\n", i, new_uid);
}
df_insn_delete (insn);
remove_insn (insn);
INSN_DELETED_P (insn) = 1;
}
/* Dump the swap table to DUMP_FILE. */
static void
dump_swap_insn_table (swap_web_entry *insn_entry)
{
int e = get_max_uid ();
fprintf (dump_file, "\nRelevant insns with their flag settings\n\n");
for (int i = 0; i < e; ++i)
if (insn_entry[i].is_relevant)
{
swap_web_entry *pred_entry = (swap_web_entry *)insn_entry[i].pred ();
fprintf (dump_file, "%6d %6d ", i,
pred_entry && pred_entry->insn
? INSN_UID (pred_entry->insn) : 0);
if (insn_entry[i].is_load)
fputs ("load ", dump_file);
if (insn_entry[i].is_store)
fputs ("store ", dump_file);
if (insn_entry[i].is_swap)
fputs ("swap ", dump_file);
if (insn_entry[i].is_live_in)
fputs ("live-in ", dump_file);
if (insn_entry[i].is_live_out)
fputs ("live-out ", dump_file);
if (insn_entry[i].contains_subreg)
fputs ("subreg ", dump_file);
if (insn_entry[i].is_128_int)
fputs ("int128 ", dump_file);
if (insn_entry[i].is_call)
fputs ("call ", dump_file);
if (insn_entry[i].is_swappable)
{
fputs ("swappable ", dump_file);
if (insn_entry[i].special_handling == SH_CONST_VECTOR)
fputs ("special:constvec ", dump_file);
else if (insn_entry[i].special_handling == SH_SUBREG)
fputs ("special:subreg ", dump_file);
else if (insn_entry[i].special_handling == SH_NOSWAP_LD)
fputs ("special:load ", dump_file);
else if (insn_entry[i].special_handling == SH_NOSWAP_ST)
fputs ("special:store ", dump_file);
}
if (insn_entry[i].web_not_optimizable)
fputs ("unoptimizable ", dump_file);
if (insn_entry[i].will_delete)
fputs ("delete ", dump_file);
fputs ("\n", dump_file);
}
fputs ("\n", dump_file);
}
/* Main entry point for this pass. */
unsigned int
rs6000_analyze_swaps (function *fun)
{
swap_web_entry *insn_entry;
basic_block bb;
rtx insn;
/* Dataflow analysis for use-def chains. */
df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
df_analyze ();
df_set_flags (DF_DEFER_INSN_RESCAN);
/* Allocate structure to represent webs of insns. */
insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ());
/* Walk the insns to gather basic data. */
FOR_ALL_BB_FN (bb, fun)
FOR_BB_INSNS (bb, insn)
{
unsigned int uid = INSN_UID (insn);
if (NONDEBUG_INSN_P (insn))
{
insn_entry[uid].insn = insn;
if (GET_CODE (insn) == CALL_INSN)
insn_entry[uid].is_call = 1;
/* Walk the uses and defs to see if we mention vector regs.
Record any constraints on optimization of such mentions. */
struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
df_ref mention;
FOR_EACH_INSN_INFO_USE (mention, insn_info)
{
/* We use DF_REF_REAL_REG here to get inside any subregs. */
enum machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
/* If a use gets its value from a call insn, it will be
a hard register and will look like (reg:V4SI 3 3).
The df analysis creates two mentions for GPR3 and GPR4,
both DImode. We must recognize this and treat it as a
vector mention to ensure the call is unioned with this
use. */
if (mode == DImode && DF_REF_INSN_INFO (mention))
{
rtx feeder = DF_REF_INSN (mention);
/* FIXME: It is pretty hard to get from the df mention
to the mode of the use in the insn. We arbitrarily
pick a vector mode here, even though the use might
be a real DImode. We can be too conservative
(create a web larger than necessary) because of
this, so consider eventually fixing this. */
if (GET_CODE (feeder) == CALL_INSN)
mode = V4SImode;
}
if (VECTOR_MODE_P (mode))
{
insn_entry[uid].is_relevant = 1;
if (mode == TImode || mode == V1TImode)
insn_entry[uid].is_128_int = 1;
if (DF_REF_INSN_INFO (mention))
insn_entry[uid].contains_subreg
= !rtx_equal_p (DF_REF_REG (mention),
DF_REF_REAL_REG (mention));
union_defs (insn_entry, insn, mention);
}
}
FOR_EACH_INSN_INFO_DEF (mention, insn_info)
{
/* We use DF_REF_REAL_REG here to get inside any subregs. */
enum machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
/* If we're loading up a hard vector register for a call,
it looks like (set (reg:V4SI 9 9) (...)). The df
analysis creates two mentions for GPR9 and GPR10, both
DImode. So relying on the mode from the mentions
isn't sufficient to ensure we union the call into the
web with the parameter setup code. */
if (mode == DImode && GET_CODE (insn) == SET
&& VECTOR_MODE_P (GET_MODE (SET_DEST (insn))))
mode = GET_MODE (SET_DEST (insn));
if (VECTOR_MODE_P (mode))
{
insn_entry[uid].is_relevant = 1;
if (mode == TImode || mode == V1TImode)
insn_entry[uid].is_128_int = 1;
if (DF_REF_INSN_INFO (mention))
insn_entry[uid].contains_subreg
= !rtx_equal_p (DF_REF_REG (mention),
DF_REF_REAL_REG (mention));
/* REG_FUNCTION_VALUE_P is not valid for subregs. */
else if (REG_FUNCTION_VALUE_P (DF_REF_REG (mention)))
insn_entry[uid].is_live_out = 1;
union_uses (insn_entry, insn, mention);
}
}
if (insn_entry[uid].is_relevant)
{
/* Determine if this is a load or store. */
insn_entry[uid].is_load = insn_is_load_p (insn);
insn_entry[uid].is_store = insn_is_store_p (insn);
/* Determine if this is a doubleword swap. If not,
determine whether it can legally be swapped. */
if (insn_is_swap_p (insn))
insn_entry[uid].is_swap = 1;
else
{
unsigned int special = SH_NONE;
insn_entry[uid].is_swappable
= insn_is_swappable_p (insn_entry, insn, &special);
if (special != SH_NONE && insn_entry[uid].contains_subreg)
insn_entry[uid].is_swappable = 0;
else if (special != SH_NONE)
insn_entry[uid].special_handling = special;
else if (insn_entry[uid].contains_subreg)
insn_entry[uid].special_handling = SH_SUBREG;
}
}
}
}
if (dump_file)
{
fprintf (dump_file, "\nSwap insn entry table when first built\n");
dump_swap_insn_table (insn_entry);
}
/* Record unoptimizable webs. */
unsigned e = get_max_uid (), i;
for (i = 0; i < e; ++i)
{
if (!insn_entry[i].is_relevant)
continue;
swap_web_entry *root
= (swap_web_entry*)(&insn_entry[i])->unionfind_root ();
if (insn_entry[i].is_live_in || insn_entry[i].is_live_out
|| (insn_entry[i].contains_subreg
&& insn_entry[i].special_handling != SH_SUBREG)
|| insn_entry[i].is_128_int || insn_entry[i].is_call
|| !(insn_entry[i].is_swappable || insn_entry[i].is_swap))
root->web_not_optimizable = 1;
/* If we have loads or stores that aren't permuting then the
optimization isn't appropriate. */
else if ((insn_entry[i].is_load || insn_entry[i].is_store)
&& !insn_entry[i].is_swap && !insn_entry[i].is_swappable)
root->web_not_optimizable = 1;
/* If we have permuting loads or stores that are not accompanied
by a register swap, the optimization isn't appropriate. */
else if (insn_entry[i].is_load && insn_entry[i].is_swap)
{
rtx insn = insn_entry[i].insn;
struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
df_ref def;
FOR_EACH_INSN_INFO_DEF (def, insn_info)
{
struct df_link *link = DF_REF_CHAIN (def);
if (!chain_contains_only_swaps (insn_entry, link, FOR_LOADS))
{
root->web_not_optimizable = 1;
break;
}
}
}
else if (insn_entry[i].is_store && insn_entry[i].is_swap)
{
rtx insn = insn_entry[i].insn;
struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
df_ref use;
FOR_EACH_INSN_INFO_USE (use, insn_info)
{
struct df_link *link = DF_REF_CHAIN (use);
if (!chain_contains_only_swaps (insn_entry, link, FOR_STORES))
{
root->web_not_optimizable = 1;
break;
}
}
}
}
if (dump_file)
{
fprintf (dump_file, "\nSwap insn entry table after web analysis\n");
dump_swap_insn_table (insn_entry);
}
/* For each load and store in an optimizable web (which implies
the loads and stores are permuting), find the associated
register swaps and mark them for removal. Due to various
optimizations we may mark the same swap more than once. Also
perform special handling for swappable insns that require it. */
for (i = 0; i < e; ++i)
if ((insn_entry[i].is_load || insn_entry[i].is_store)
&& insn_entry[i].is_swap)
{
swap_web_entry* root_entry
= (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
if (!root_entry->web_not_optimizable)
mark_swaps_for_removal (insn_entry, i);
}
else if (insn_entry[i].is_swappable && insn_entry[i].special_handling)
{
swap_web_entry* root_entry
= (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
if (!root_entry->web_not_optimizable)
handle_special_swappables (insn_entry, i);
}
/* Now delete the swaps marked for removal. */
for (i = 0; i < e; ++i)
if (insn_entry[i].will_delete)
replace_swap_with_copy (insn_entry, i);
/* Clean up. */
free (insn_entry);
return 0;
}
const pass_data pass_data_analyze_swaps =
{
RTL_PASS, /* type */
"swaps", /* name */
OPTGROUP_NONE, /* optinfo_flags */
TV_NONE, /* tv_id */
0, /* properties_required */
0, /* properties_provided */
0, /* properties_destroyed */
0, /* todo_flags_start */
TODO_df_finish, /* todo_flags_finish */
};
class pass_analyze_swaps : public rtl_opt_pass
{
public:
pass_analyze_swaps(gcc::context *ctxt)
: rtl_opt_pass(pass_data_analyze_swaps, ctxt)
{}
/* opt_pass methods: */
virtual bool gate (function *)
{
return (optimize > 0 && !BYTES_BIG_ENDIAN && TARGET_VSX
&& rs6000_optimize_swaps);
}
virtual unsigned int execute (function *fun)
{
return rs6000_analyze_swaps (fun);
}
}; // class pass_analyze_swaps
rtl_opt_pass *
make_pass_analyze_swaps (gcc::context *ctxt)
{
return new pass_analyze_swaps (ctxt);
}
struct gcc_target targetm = TARGET_INITIALIZER;
......@@ -588,3 +588,7 @@ Allow double variables in upper registers with -mcpu=power7 or -mvsx
mupper-regs-sf
Target Undocumented Mask(UPPER_REGS_SF) Var(rs6000_isa_flags)
Allow float variables in upper registers with -mcpu=power8 or -mp8-vector
moptimize-swaps
Target Undocumented Var(rs6000_optimize_swaps) Init(1) Save
Analyze and remove doubleword swaps from VSX computations.
2014-08-20 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
* gcc.target/powerpc/swaps-p8-1.c: New test.
* gcc.target/powerpc/swaps-p8-2.c: New test.
* gcc.target/powerpc/swaps-p8-3.c: New test.
* gcc.target/powerpc/swaps-p8-4.c: New test.
* gcc.target/powerpc/swaps-p8-5.c: New test.
* gcc.target/powerpc/swaps-p8-6.c: New test.
* gcc.target/powerpc/swaps-p8-7.c: New test.
* gcc.target/powerpc/swaps-p8-8.c: New test.
* gcc.target/powerpc/swaps-p8-9.c: New test.
* gcc.target/powerpc/swaps-p8-10.c: New test.
* gcc.target/powerpc/swaps-p8-11.c: New test.
* gcc.target/powerpc/swaps-p8-12.c: New test.
2014-08-20 Jan Hubicka <hubicka@ucw.cz>
* g++.dg/ipa/devirt-37.C: Fix testcase.
......
/* { dg-do compile { target { powerpc64le-*-* } } } */
/* { dg-options "-mcpu=power8 -O3" } */
/* { dg-final { scan-assembler "lxvd2x" } } */
/* { dg-final { scan-assembler "stxvd2x" } } */
/* { dg-final { scan-assembler-not "xxpermdi" } } */
void abort();
#define N 16
signed char ca[N] __attribute__((aligned(16)));
signed char cb[] __attribute__((aligned(16)))
= {8, 7, 6, 5, 4, 3, 2, 1, 0, -1, -2, -3, -4, -5, -6, -7};
signed char cc[] __attribute__((aligned(16)))
= {1, 1, 2, 2, 3, 3, 2, 2, 1, 1, 0, 0, -1, -1, -2, -2};
__attribute__((noinline)) void foo ()
{
int i;
for (i = 0; i < N; i++) {
ca[i] = cb[i] - cc[i];
}
}
int main ()
{
signed char cd[] = {7, 6, 4, 3, 1, 0, 0, -1, -1, -2, -2, -3, -3, -4, -4, -5};
int i;
foo ();
for (i = 0; i < N; ++i)
if (ca[i] != cd[i])
abort ();
return 0;
}
/* { dg-do run { target { powerpc64le-*-* } } } */
/* { dg-options "-mcpu=power8 -O3" } */
void abort ();
#define N 4096
int ca[N] __attribute__((aligned(16)));
int cb[N] __attribute__((aligned(16)));
int cc[N] __attribute__((aligned(16)));
int cd[N] __attribute__((aligned(16)));
__attribute__((noinline)) void foo ()
{
int i;
for (i = 0; i < N; i++) {
ca[i] = ((cb[i] + cc[i]) * cd[i]) >> 3;
}
}
__attribute__((noinline)) void init ()
{
int i;
for (i = 0; i < N; ++i) {
cb[i] = 3 * i - 2048;
cc[i] = -5 * i + 93;
cd[i] = i % 2 ? 1 : -1;
}
}
int main ()
{
int i;
init ();
foo ();
for (i = 0; i < N; ++i)
if (i % 2 == 1 && ca[i] != (-2 * i - 1955) >> 3)
abort ();
else if (i % 2 == 0 && ca[i] != (1955 + 2 * i) >> 3)
abort ();
return 0;
}
/* { dg-do run { target { powerpc64le-*-* } } } */
/* { dg-options "-mcpu=power8 -O3" } */
#include <altivec.h>
void abort ();
#define N 4096
int ca[N] __attribute__((aligned(16)));
int cb[N] __attribute__((aligned(16)));
int cc[N] __attribute__((aligned(16)));
int cd[N] __attribute__((aligned(16)));
int hey;
__attribute__((noinline)) void foo ()
{
int i;
vector int va, vb, vc, vd, tmp;
vector unsigned int threes = vec_splat_u32(3);
for (i = 0; i < N; i+=4) {
vb = vec_vsx_ld (0, &cb[i]);
vc = vec_vsx_ld (0, &cc[i]);
vd = vec_vsx_ld (0, &cd[i]);
tmp = vec_add (vb, vc);
tmp = vec_sub (tmp, vd);
tmp = vec_sra (tmp, threes);
hey = tmp[3];
vec_vsx_st (tmp, 0, &ca[i]);
}
}
__attribute__((noinline)) void init ()
{
int i;
for (i = 0; i < N; ++i) {
cb[i] = 3 * i - 2048;
cc[i] = -5 * i + 93;
cd[i] = i + 14;
}
}
int main ()
{
int i;
init ();
foo ();
for (i = 0; i < N; ++i)
if (ca[i] != (-3 * i - 1969) >> 3)
abort ();
if (hey != ca[N-1])
abort ();
return 0;
}
/* { dg-do compile { target { powerpc64le-*-* } } } */
/* { dg-options "-mcpu=power8 -O3" } */
/* { dg-final { scan-assembler "lxvd2x" } } */
/* { dg-final { scan-assembler "stxvd2x" } } */
/* { dg-final { scan-assembler-not "xxpermdi" } } */
#include "altivec.h"
void abort ();
#define N 4096
int ca[N] __attribute__((aligned(16)));
int cb[N] __attribute__((aligned(16)));
int cc[N] __attribute__((aligned(16)));
int cd[N] __attribute__((aligned(16)));
int hey;
__attribute__((noinline)) void foo ()
{
int i;
vector int va, vb, vc, vd, tmp;
vector unsigned int threes = vec_splat_u32(3);
for (i = 0; i < N; i+=4) {
vb = vec_vsx_ld (0, &cb[i]);
vc = vec_vsx_ld (0, &cc[i]);
vd = vec_vsx_ld (0, &cd[i]);
tmp = vec_add (vb, vc);
tmp = vec_sub (tmp, vd);
tmp = vec_sra (tmp, threes);
hey = tmp[3];
vec_vsx_st (tmp, 0, &ca[i]);
}
}
__attribute__((noinline)) void init ()
{
int i;
for (i = 0; i < N; ++i) {
cb[i] = 3 * i - 2048;
cc[i] = -5 * i + 93;
cd[i] = i + 14;
}
}
int main ()
{
int i;
init ();
foo ();
for (i = 0; i < N; ++i)
if (ca[i] != (-3 * i - 1969) >> 3)
abort ();
if (hey != ca[N-1])
abort ();
return 0;
}
/* { dg-do compile { target { powerpc64le-*-* } } } */
/* { dg-options "-mcpu=power8 -O3" } */
/* { dg-final { scan-assembler "lxvd2x" } } */
/* { dg-final { scan-assembler "stxvd2x" } } */
/* { dg-final { scan-assembler-not "xxpermdi" } } */
void abort ();
#define N 256
signed char ca[N] __attribute__((aligned(16)));
signed char cb[N] __attribute__((aligned(16)));
signed char cc[N] __attribute__((aligned(16)));
__attribute__((noinline)) void foo ()
{
int i;
for (i = 0; i < N; i++) {
ca[i] = cb[i] - cc[i];
}
}
__attribute__((noinline)) void init ()
{
int i;
for (i = 0; i < N; ++i) {
cb[i] = i - 128;
cc[i] = i/2 - 64;
}
}
int main ()
{
int i;
init ();
foo ();
for (i = 0; i < N; ++i)
if (ca[i] != i - i/2 - 64)
abort ();
return 0;
}
/* { dg-do compile { target { powerpc64le-*-* } } } */
/* { dg-options "-mcpu=power8 -O3" } */
/* { dg-final { scan-assembler "lxvd2x" } } */
/* { dg-final { scan-assembler "stxvd2x" } } */
/* { dg-final { scan-assembler-not "xxpermdi" } } */
void abort ();
#define N 4096
signed char ca[N] __attribute__((aligned(16)));
signed char cb[N] __attribute__((aligned(16)));
signed char cc[N] __attribute__((aligned(16)));
__attribute__((noinline)) void foo ()
{
int i;
for (i = 0; i < N; i++) {
ca[i] = cb[i] - cc[i];
}
}
__attribute__((noinline)) void init ()
{
int i, ii;
for (i = 0, ii = 0; i < N; ++i, ii = (ii + 1) % 128) {
cb[i] = ii - 128;
cc[i] = ii/2 - 64;
}
}
int main ()
{
int i, ii;
init ();
foo ();
for (i = 0; i < N; ++i) {
ii = i % 128;
if (ca[i] != ii - ii/2 - 64)
abort ();
}
return 0;
}
/* { dg-do compile { target { powerpc64le-*-* } } } */
/* { dg-options "-mcpu=power8 -O3" } */
/* { dg-final { scan-assembler "lxvd2x" } } */
/* { dg-final { scan-assembler "stxvd2x" } } */
/* { dg-final { scan-assembler-not "xxpermdi" } } */
void abort ();
#define N 4096
int ca[N] __attribute__((aligned(16)));
int cb[N] __attribute__((aligned(16)));
int cc[N] __attribute__((aligned(16)));
int cd[N] __attribute__((aligned(16)));
__attribute__((noinline)) void foo ()
{
int i;
for (i = 0; i < N; i++) {
ca[i] = (cb[i] + cc[i]) * cd[i];
}
}
__attribute__((noinline)) void init ()
{
int i;
for (i = 0; i < N; ++i) {
cb[i] = 3 * i - 2048;
cc[i] = -5 * i + 93;
cd[i] = i % 2 ? 1 : -1;
}
}
int main ()
{
int i;
init ();
foo ();
for (i = 0; i < N; ++i)
if (i % 2 == 1 && ca[i] != -2 * i - 1955)
abort ();
else if (i % 2 == 0 && ca[i] != 1955 + 2 * i)
abort ();
return 0;
}
/* { dg-do compile { target { powerpc64le-*-* } } } */
/* { dg-options "-mcpu=power8 -O3" } */
/* { dg-final { scan-assembler "lxvd2x" } } */
/* { dg-final { scan-assembler "stxvd2x" } } */
/* { dg-final { scan-assembler-not "xxpermdi" } } */
void abort ();
#define N 4096
int ca[N] __attribute__((aligned(16)));
int cb[N] __attribute__((aligned(16)));
int cc[N] __attribute__((aligned(16)));
int cd[N] __attribute__((aligned(16)));
__attribute__((noinline)) void foo ()
{
int i;
for (i = 0; i < N; i++) {
ca[i] = ((cb[i] + cc[i]) * cd[i]) >> 3;
}
}
__attribute__((noinline)) void init ()
{
int i;
for (i = 0; i < N; ++i) {
cb[i] = 3 * i - 2048;
cc[i] = -5 * i + 93;
cd[i] = i % 2 ? 1 : -1;
}
}
int main ()
{
int i;
init ();
foo ();
for (i = 0; i < N; ++i)
if (i % 2 == 1 && ca[i] != (-2 * i - 1955) >> 3)
abort ();
else if (i % 2 == 0 && ca[i] != (1955 + 2 * i) >> 3)
abort ();
return 0;
}
/* { dg-do run { target { powerpc64le-*-* } } } */
/* { dg-options "-mcpu=power8 -O3" } */
void abort();
#define N 16
signed char ca[N] __attribute__((aligned(16)));
signed char cb[] __attribute__((aligned(16)))
= {8, 7, 6, 5, 4, 3, 2, 1, 0, -1, -2, -3, -4, -5, -6, -7};
signed char cc[] __attribute__((aligned(16)))
= {1, 1, 2, 2, 3, 3, 2, 2, 1, 1, 0, 0, -1, -1, -2, -2};
__attribute__((noinline)) void foo ()
{
int i;
for (i = 0; i < N; i++) {
ca[i] = cb[i] - cc[i];
}
}
int main ()
{
signed char cd[] = {7, 6, 4, 3, 1, 0, 0, -1, -1, -2, -2, -3, -3, -4, -4, -5};
int i;
foo ();
for (i = 0; i < N; ++i)
if (ca[i] != cd[i])
abort ();
return 0;
}
/* { dg-do run { target { powerpc64le-*-* } } } */
/* { dg-options "-mcpu=power8 -O3" } */
void abort ();
#define N 256
signed char ca[N] __attribute__((aligned(16)));
signed char cb[N] __attribute__((aligned(16)));
signed char cc[N] __attribute__((aligned(16)));
__attribute__((noinline)) void foo ()
{
int i;
for (i = 0; i < N; i++) {
ca[i] = cb[i] - cc[i];
}
}
__attribute__((noinline)) void init ()
{
int i;
for (i = 0; i < N; ++i) {
cb[i] = i - 128;
cc[i] = i/2 - 64;
}
}
int main ()
{
int i;
init ();
foo ();
for (i = 0; i < N; ++i)
if (ca[i] != i - i/2 - 64)
abort ();
return 0;
}
/* { dg-do run { target { powerpc64le-*-* } } } */
/* { dg-options "-mcpu=power8 -O3" } */
void abort ();
#define N 4096
signed char ca[N] __attribute__((aligned(16)));
signed char cb[N] __attribute__((aligned(16)));
signed char cc[N] __attribute__((aligned(16)));
__attribute__((noinline)) void foo ()
{
int i;
for (i = 0; i < N; i++) {
ca[i] = cb[i] - cc[i];
}
}
__attribute__((noinline)) void init ()
{
int i, ii;
for (i = 0, ii = 0; i < N; ++i, ii = (ii + 1) % 128) {
cb[i] = ii - 128;
cc[i] = ii/2 - 64;
}
}
int main ()
{
int i, ii;
init ();
foo ();
for (i = 0; i < N; ++i) {
ii = i % 128;
if (ca[i] != ii - ii/2 - 64)
abort ();
}
return 0;
}
/* { dg-do run { target { powerpc64le-*-* } } } */
/* { dg-options "-mcpu=power8 -O3" } */
void abort ();
#define N 4096
int ca[N] __attribute__((aligned(16)));
int cb[N] __attribute__((aligned(16)));
int cc[N] __attribute__((aligned(16)));
int cd[N] __attribute__((aligned(16)));
__attribute__((noinline)) void foo ()
{
int i;
for (i = 0; i < N; i++) {
ca[i] = (cb[i] + cc[i]) * cd[i];
}
}
__attribute__((noinline)) void init ()
{
int i;
for (i = 0; i < N; ++i) {
cb[i] = 3 * i - 2048;
cc[i] = -5 * i + 93;
cd[i] = i % 2 ? 1 : -1;
}
}
int main ()
{
int i;
init ();
foo ();
for (i = 0; i < N; ++i)
if (i % 2 == 1 && ca[i] != -2 * i - 1955)
abort ();
else if (i % 2 == 0 && ca[i] != 1955 + 2 * i)
abort ();
return 0;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment