Commit 4d210b07 by Richard Sandiford

2008-xx-xx Richard Sandiford <rdsandiford@goolemail.com> Peter Fuerst <post@pfrst.de>

gcc/
2008-xx-xx  Richard Sandiford  <rdsandiford@goolemail.com>
	    Peter Fuerst  <post@pfrst.de>

	* doc/invoke.texi: Document -mr10k-cache-barrier=.
	* doc/extend.texi: Document __builtin_mips_cache.
	* config/mips/mips-ftypes.def: Add a (VOID, SI, CVPOINTER) entry.
	* config/mips/mips.opt (mr10k-cache-barrier=): New option.
	* config/mips/mips.h (TARGET_CPU_CPP_BUILTINS): Define
	__GCC_HAVE_BUILTIN_MIPS_CACHE.
	(TARGET_CACHE_BUILTIN, ISA_HAS_CACHE): New macros.
	* config/mips/mips.c (mips_r10k_cache_barrier_setting): New enum.
	(set_push_mips_isas): New variable.
	(mips_r10k_cache_barrier): New variable.
	(cache): New availability predicate.
	(mips_builtins): Add an entry for __builtin_mips_cache.
	(mips_build_cvpointer_type): New function.
	(MIPS_ATYPE_CVPOINTER): New macro.
	(mips_prepare_builtin_arg): Only use the insn's mode if the rtx's
	mode is VOIDmode.
	(r10k_simplified_address_p, r10k_simplify_address)
	(r10k_uncached_address_p, r10k_safe_address_p)
	(r10k_needs_protection_p_1, r10k_needs_protection_p_store)
	(r10k_needs_protection_p_call, r10k_needs_protection_p)
	(r10k_insert_cache_barriers): New functions.
	(mips_reorg_process_insns): Delete cache barriers after a
	branch-likely instruction.
	(mips_reorg): Call r10k_insert_cache_barriers.
	(mips_handle_option): Handle OPT_mr10k_cache_barrier_.
	* config/mips/mips.md (UNSPEC_MIPS_CACHE): New constant.
	(UNSPEC_R10K_CACHE_BARRIER): Likewise.
	(mips_cache, r10k_cache_barrier): New define_insns.

gcc/testsuite/
	* gcc.target/mips/mips.exp (dg-mips-options): Make
	-mr10k-cache-barrier=* imply -mips3 or above.
	* gcc.target/mips/cache-1.c: New test.
	* gcc.target/mips/r10k-cache-barrier-1.c: Likewise.
	* gcc.target/mips/r10k-cache-barrier-2.c: Likewise.
	* gcc.target/mips/r10k-cache-barrier-3.c: Likewise.
	* gcc.target/mips/r10k-cache-barrier-4.c: Likewise.
	* gcc.target/mips/r10k-cache-barrier-5.c: Likewise.
	* gcc.target/mips/r10k-cache-barrier-6.c: Likewise.
	* gcc.target/mips/r10k-cache-barrier-7.c: Likewise.
	* gcc.target/mips/r10k-cache-barrier-8.c: Likewise.
	* gcc.target/mips/r10k-cache-barrier-9.c: Likewise.
	* gcc.target/mips/r10k-cache-barrier-10.c: Likewise.
	* gcc.target/mips/r10k-cache-barrier-11.c: Likewise.
	* gcc.target/mips/r10k-cache-barrier-12.c: Likewise.
	* gcc.target/mips/r10k-cache-barrier-13.c: Likewise.
	* gcc.target/mips/r10k-cache-barrier-14.c: Likewise.
	* gcc.target/mips/r10k-cache-barrier-15.c: Likewise.

From-SVN: r140055
parent 4f341ea0
2008-09-06 Richard Sandiford <rdsandiford@goolemail.com>
Peter Fuerst <post@pfrst.de>
* doc/invoke.texi: Document -mr10k-cache-barrier=.
* doc/extend.texi: Document __builtin_mips_cache.
* config/mips/mips-ftypes.def: Add a (VOID, SI, CVPOINTER) entry.
* config/mips/mips.opt (mr10k-cache-barrier=): New option.
* config/mips/mips.h (TARGET_CPU_CPP_BUILTINS): Define
__GCC_HAVE_BUILTIN_MIPS_CACHE.
(TARGET_CACHE_BUILTIN, ISA_HAS_CACHE): New macros.
* config/mips/mips.c (mips_r10k_cache_barrier_setting): New enum.
(set_push_mips_isas): New variable.
(mips_r10k_cache_barrier): New variable.
(cache): New availability predicate.
(mips_builtins): Add an entry for __builtin_mips_cache.
(mips_build_cvpointer_type): New function.
(MIPS_ATYPE_CVPOINTER): New macro.
(mips_prepare_builtin_arg): Only use the insn's mode if the rtx's
mode is VOIDmode.
(r10k_simplified_address_p, r10k_simplify_address)
(r10k_uncached_address_p, r10k_safe_address_p)
(r10k_needs_protection_p_1, r10k_needs_protection_p_store)
(r10k_needs_protection_p_call, r10k_needs_protection_p)
(r10k_insert_cache_barriers): New functions.
(mips_reorg_process_insns): Delete cache barriers after a
branch-likely instruction.
(mips_reorg): Call r10k_insert_cache_barriers.
(mips_handle_option): Handle OPT_mr10k_cache_barrier_.
* config/mips/mips.md (UNSPEC_MIPS_CACHE): New constant.
(UNSPEC_R10K_CACHE_BARRIER): Likewise.
(mips_cache, r10k_cache_barrier): New define_insns.
2008-09-06 Richard Sandiford <rdsandiford@googlemail.com>
* ira-int.h (ira_zero_hard_reg_set, ira_one_hard_reg_set): Delete.
......
......@@ -120,6 +120,7 @@ DEF_MIPS_FTYPE (2, (V8QI, V4HI, V4HI))
DEF_MIPS_FTYPE (1, (V8QI, V8QI))
DEF_MIPS_FTYPE (2, (V8QI, V8QI, V8QI))
DEF_MIPS_FTYPE (2, (VOID, SI, CVPOINTER))
DEF_MIPS_FTYPE (2, (VOID, SI, SI))
DEF_MIPS_FTYPE (2, (VOID, V2HI, V2HI))
DEF_MIPS_FTYPE (2, (VOID, V4QI, V4QI))
......@@ -148,6 +148,13 @@ enum mips_address_type {
ADDRESS_SYMBOLIC
};
/* Enumerates the setting of the -mr10k-cache-barrier option. */
enum mips_r10k_cache_barrier_setting {
R10K_CACHE_BARRIER_NONE,
R10K_CACHE_BARRIER_STORE,
R10K_CACHE_BARRIER_LOAD_STORE
};
/* Macros to create an enumeration identifier for a function prototype. */
#define MIPS_FTYPE_NAME1(A, B) MIPS_##A##_FTYPE_##B
#define MIPS_FTYPE_NAME2(A, B, C) MIPS_##A##_FTYPE_##B##_##C
......@@ -456,6 +463,9 @@ static int mips_base_align_functions; /* align_functions */
/* The -mcode-readable setting. */
enum mips_code_readable_setting mips_code_readable = CODE_READABLE_YES;
/* The -mr10k-cache-barrier setting. */
static enum mips_r10k_cache_barrier_setting mips_r10k_cache_barrier;
/* Index [M][R] is true if register R is allowed to hold a value of mode M. */
bool mips_hard_regno_mode_ok[(int) MAX_MACHINE_MODE][FIRST_PSEUDO_REGISTER];
......@@ -10922,6 +10932,7 @@ AVAIL_NON_MIPS16 (dspr2, TARGET_DSPR2)
AVAIL_NON_MIPS16 (dsp_32, !TARGET_64BIT && TARGET_DSP)
AVAIL_NON_MIPS16 (dspr2_32, !TARGET_64BIT && TARGET_DSPR2)
AVAIL_NON_MIPS16 (loongson, TARGET_LOONGSON_VECTORS)
AVAIL_NON_MIPS16 (cache, TARGET_CACHE_BUILTIN)
/* Construct a mips_builtin_description from the given arguments.
......@@ -11352,7 +11363,10 @@ static const struct mips_builtin_description mips_builtins[] = {
LOONGSON_BUILTIN_SUFFIX (punpcklwd, u, MIPS_UV2SI_FTYPE_UV2SI_UV2SI),
LOONGSON_BUILTIN_SUFFIX (punpcklbh, s, MIPS_V8QI_FTYPE_V8QI_V8QI),
LOONGSON_BUILTIN_SUFFIX (punpcklhw, s, MIPS_V4HI_FTYPE_V4HI_V4HI),
LOONGSON_BUILTIN_SUFFIX (punpcklwd, s, MIPS_V2SI_FTYPE_V2SI_V2SI)
LOONGSON_BUILTIN_SUFFIX (punpcklwd, s, MIPS_V2SI_FTYPE_V2SI_V2SI),
/* Sundry other built-in functions. */
DIRECT_NO_TARGET_BUILTIN (cache, MIPS_VOID_FTYPE_SI_CVPOINTER, cache)
};
/* MODE is a vector mode whose elements have type TYPE. Return the type
......@@ -11374,10 +11388,25 @@ mips_builtin_vector_type (tree type, enum machine_mode mode)
return types[mode_index];
}
/* Return a type for 'const volatile void *'. */
static tree
mips_build_cvpointer_type (void)
{
static tree cache;
if (cache == NULL_TREE)
cache = build_pointer_type (build_qualified_type
(void_type_node,
TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
return cache;
}
/* Source-level argument types. */
#define MIPS_ATYPE_VOID void_type_node
#define MIPS_ATYPE_INT integer_type_node
#define MIPS_ATYPE_POINTER ptr_type_node
#define MIPS_ATYPE_CVPOINTER mips_build_cvpointer_type ()
/* Standard mode-based argument types. */
#define MIPS_ATYPE_UQI unsigned_intQI_type_node
......@@ -11477,7 +11506,13 @@ mips_prepare_builtin_arg (enum insn_code icode,
mode = insn_data[icode].operand[opno].mode;
if (!insn_data[icode].operand[opno].predicate (value, mode))
{
value = copy_to_mode_reg (mode, value);
/* Cope with address operands, where MODE is not the mode of
VALUE itself. */
if (GET_MODE (value) == VOIDmode)
value = copy_to_mode_reg (mode, value);
else
value = copy_to_reg (value);
/* Check the predicate again. */
if (!insn_data[icode].operand[opno].predicate (value, mode))
{
......@@ -12024,6 +12059,378 @@ mips16_lay_out_constants (void)
mips16_emit_constants (pool.first, get_last_insn ());
}
/* Return true if it is worth r10k_simplify_address's while replacing
an address with X. We are looking for constants, and for addresses
at a known offset from the incoming stack pointer. */
static bool
r10k_simplified_address_p (rtx x)
{
if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1)))
x = XEXP (x, 0);
return x == virtual_incoming_args_rtx || CONSTANT_P (x);
}
/* X is an expression that appears in INSN. Try to use the UD chains
to simplify it, returning the simplified form on success and the
original form otherwise. Replace the incoming value of $sp with
virtual_incoming_args_rtx (which should never occur in X otherwise). */
static rtx
r10k_simplify_address (rtx x, rtx insn)
{
rtx newx, op0, op1, set, def_insn, note;
struct df_ref *use, *def;
struct df_link *defs;
newx = NULL_RTX;
if (UNARY_P (x))
{
op0 = r10k_simplify_address (XEXP (x, 0), insn);
if (op0 != XEXP (x, 0))
newx = simplify_gen_unary (GET_CODE (x), GET_MODE (x),
op0, GET_MODE (XEXP (x, 0)));
}
else if (BINARY_P (x))
{
op0 = r10k_simplify_address (XEXP (x, 0), insn);
op1 = r10k_simplify_address (XEXP (x, 1), insn);
if (op0 != XEXP (x, 0) || op1 != XEXP (x, 1))
newx = simplify_gen_binary (GET_CODE (x), GET_MODE (x), op0, op1);
}
else if (GET_CODE (x) == LO_SUM)
{
/* LO_SUMs can be offset from HIGHs, if we know they won't
overflow. See mips_classify_address for the rationale behind
the lax check. */
op0 = r10k_simplify_address (XEXP (x, 0), insn);
if (GET_CODE (op0) == HIGH)
newx = XEXP (x, 1);
}
else if (REG_P (x))
{
/* Uses are recorded by regno_reg_rtx, not X itself. */
use = df_find_use (insn, regno_reg_rtx[REGNO (x)]);
gcc_assert (use);
defs = DF_REF_CHAIN (use);
/* Require a single definition. */
if (defs && defs->next == NULL)
{
def = defs->ref;
if (DF_REF_IS_ARTIFICIAL (def))
{
/* Replace the incoming value of $sp with
virtual_incoming_args_rtx. */
if (x == stack_pointer_rtx
&& DF_REF_BB (def) == ENTRY_BLOCK_PTR)
newx = virtual_incoming_args_rtx;
}
else if (dominated_by_p (CDI_DOMINATORS, DF_REF_BB (use),
DF_REF_BB (def)))
{
/* Make sure that DEF_INSN is a single set of REG. */
def_insn = DF_REF_INSN (def);
if (NONJUMP_INSN_P (def_insn))
{
set = single_set (def_insn);
if (set && rtx_equal_p (SET_DEST (set), x))
{
/* Prefer to use notes, since the def-use chains
are often shorter. */
note = find_reg_equal_equiv_note (def_insn);
if (note)
newx = XEXP (note, 0);
else
newx = SET_SRC (set);
newx = r10k_simplify_address (newx, def_insn);
}
}
}
}
}
if (newx && r10k_simplified_address_p (newx))
return newx;
return x;
}
/* Return true if ADDRESS is known to be an uncached address
on R10K systems. */
static bool
r10k_uncached_address_p (unsigned HOST_WIDE_INT address)
{
unsigned HOST_WIDE_INT upper;
/* Check for KSEG1. */
if (address + 0x60000000 < 0x20000000)
return true;
/* Check for uncached XKPHYS addresses. */
if (Pmode == DImode)
{
upper = (address >> 40) & 0xf9ffff;
if (upper == 0x900000 || upper == 0xb80000)
return true;
}
return false;
}
/* Return true if we can prove that an access to address X in instruction
INSN would be safe from R10K speculation. This X is a general
expression; it might not be a legitimate address. */
static bool
r10k_safe_address_p (rtx x, rtx insn)
{
rtx base, offset;
HOST_WIDE_INT offset_val;
x = r10k_simplify_address (x, insn);
/* Check for references to the stack frame. It doesn't really matter
how much of the frame has been allocated at INSN; -mr10k-cache-barrier
allows us to assume that accesses to any part of the eventual frame
is safe from speculation at any point in the function. */
mips_split_plus (x, &base, &offset_val);
if (base == virtual_incoming_args_rtx
&& offset_val >= -cfun->machine->frame.total_size
&& offset_val < cfun->machine->frame.args_size)
return true;
/* Check for uncached addresses. */
if (CONST_INT_P (x))
return r10k_uncached_address_p (INTVAL (x));
/* Check for accesses to a static object. */
split_const (x, &base, &offset);
return offset_within_block_p (base, INTVAL (offset));
}
/* Return true if a MEM with MEM_EXPR EXPR and MEM_OFFSET OFFSET is
an in-range access to an automatic variable, or to an object with
a link-time-constant address. */
static bool
r10k_safe_mem_expr_p (tree expr, rtx offset)
{
if (expr == NULL_TREE
|| offset == NULL_RTX
|| !CONST_INT_P (offset)
|| INTVAL (offset) < 0
|| INTVAL (offset) >= int_size_in_bytes (TREE_TYPE (expr)))
return false;
while (TREE_CODE (expr) == COMPONENT_REF)
{
expr = TREE_OPERAND (expr, 0);
if (expr == NULL_TREE)
return false;
}
return DECL_P (expr);
}
/* A for_each_rtx callback for which DATA points to the instruction
containing *X. Stop the search if we find a MEM that is not safe
from R10K speculation. */
static int
r10k_needs_protection_p_1 (rtx *loc, void *data)
{
rtx mem;
mem = *loc;
if (!MEM_P (mem))
return 0;
if (r10k_safe_mem_expr_p (MEM_EXPR (mem), MEM_OFFSET (mem)))
return -1;
if (r10k_safe_address_p (XEXP (mem, 0), (rtx) data))
return -1;
return 1;
}
/* A note_stores callback for which DATA points to an instruction pointer.
If *DATA is nonnull, make it null if it X contains a MEM that is not
safe from R10K speculation. */
static void
r10k_needs_protection_p_store (rtx x, const_rtx pat ATTRIBUTE_UNUSED,
void *data)
{
rtx *insn_ptr;
insn_ptr = (rtx *) data;
if (*insn_ptr && for_each_rtx (&x, r10k_needs_protection_p_1, *insn_ptr))
*insn_ptr = NULL_RTX;
}
/* A for_each_rtx callback that iterates over the pattern of a CALL_INSN.
Return nonzero if the call is not to a declared function. */
static int
r10k_needs_protection_p_call (rtx *loc, void *data ATTRIBUTE_UNUSED)
{
rtx x;
x = *loc;
if (!MEM_P (x))
return 0;
x = XEXP (x, 0);
if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DECL (x))
return -1;
return 1;
}
/* Return true if instruction INSN needs to be protected by an R10K
cache barrier. */
static bool
r10k_needs_protection_p (rtx insn)
{
if (CALL_P (insn))
return for_each_rtx (&PATTERN (insn), r10k_needs_protection_p_call, NULL);
if (mips_r10k_cache_barrier == R10K_CACHE_BARRIER_STORE)
{
note_stores (PATTERN (insn), r10k_needs_protection_p_store, &insn);
return insn == NULL_RTX;
}
return for_each_rtx (&PATTERN (insn), r10k_needs_protection_p_1, insn);
}
/* Return true if BB is only reached by blocks in PROTECTED_BBS and if every
edge is unconditional. */
static bool
r10k_protected_bb_p (basic_block bb, sbitmap protected_bbs)
{
edge_iterator ei;
edge e;
FOR_EACH_EDGE (e, ei, bb->preds)
if (!single_succ_p (e->src)
|| !TEST_BIT (protected_bbs, e->src->index)
|| (e->flags & EDGE_COMPLEX) != 0)
return false;
return true;
}
/* Implement -mr10k-cache-barrier= for the current function. */
static void
r10k_insert_cache_barriers (void)
{
int *rev_post_order;
unsigned int i, n;
basic_block bb;
sbitmap protected_bbs;
rtx insn, end, unprotected_region;
if (TARGET_MIPS16)
{
sorry ("%qs does not support MIPS16 code", "-mr10k-cache-barrier");
return;
}
/* Restore the BLOCK_FOR_INSN pointers, which are needed by DF. */
compute_bb_for_insn ();
/* Create def-use chains. */
df_set_flags (DF_EQ_NOTES);
df_chain_add_problem (DF_UD_CHAIN);
df_analyze ();
/* Calculate dominators. */
calculate_dominance_info (CDI_DOMINATORS);
/* Bit X of PROTECTED_BBS is set if the last operation in basic block
X is protected by a cache barrier. */
protected_bbs = sbitmap_alloc (last_basic_block);
sbitmap_zero (protected_bbs);
/* Iterate over the basic blocks in reverse post-order. */
rev_post_order = XNEWVEC (int, last_basic_block);
n = pre_and_rev_post_order_compute (NULL, rev_post_order, false);
for (i = 0; i < n; i++)
{
bb = BASIC_BLOCK (rev_post_order[i]);
/* If this block is only reached by unconditional edges, and if the
source of every edge is protected, the beginning of the block is
also protected. */
if (r10k_protected_bb_p (bb, protected_bbs))
unprotected_region = NULL_RTX;
else
unprotected_region = pc_rtx;
end = NEXT_INSN (BB_END (bb));
/* UNPROTECTED_REGION is:
- null if we are processing a protected region,
- pc_rtx if we are processing an unprotected region but have
not yet found the first instruction in it
- the first instruction in an unprotected region otherwise. */
for (insn = BB_HEAD (bb); insn != end; insn = NEXT_INSN (insn))
{
if (unprotected_region && INSN_P (insn))
{
if (recog_memoized (insn) == CODE_FOR_mips_cache)
/* This CACHE instruction protects the following code. */
unprotected_region = NULL_RTX;
else
{
/* See if INSN is the first instruction in this
unprotected region. */
if (unprotected_region == pc_rtx)
unprotected_region = insn;
/* See if INSN needs to be protected. If so,
we must insert a cache barrier somewhere between
PREV_INSN (UNPROTECTED_REGION) and INSN. It isn't
clear which position is better performance-wise,
but as a tie-breaker, we assume that it is better
to allow delay slots to be back-filled where
possible, and that it is better not to insert
barriers in the middle of already-scheduled code.
We therefore insert the barrier at the beginning
of the region. */
if (r10k_needs_protection_p (insn))
{
emit_insn_before (gen_r10k_cache_barrier (),
unprotected_region);
unprotected_region = NULL_RTX;
}
}
}
if (CALL_P (insn))
/* The called function is not required to protect the exit path.
The code that follows a call is therefore unprotected. */
unprotected_region = pc_rtx;
}
/* Record whether the end of this block is protected. */
if (unprotected_region == NULL_RTX)
SET_BIT (protected_bbs, bb->index);
}
XDELETEVEC (rev_post_order);
sbitmap_free (protected_bbs);
free_dominance_info (CDI_DOMINATORS);
df_finish_pass (false);
free_bb_for_insn ();
}
/* A temporary variable used by for_each_rtx callbacks, etc. */
static rtx mips_sim_insn;
......@@ -12675,6 +13082,13 @@ mips_reorg_process_insns (void)
orphaned high-part relocation. */
if (mips_orphaned_high_part_p (htab, insn))
delete_insn (insn);
/* Also delete cache barriers if the last instruction
was an annulled branch. INSN will not be speculatively
executed. */
else if (recog_memoized (insn) == CODE_FOR_r10k_cache_barrier
&& last_insn
&& INSN_ANNULLED_BRANCH_P (SEQ_BEGIN (last_insn)))
delete_insn (insn);
else
{
mips_avoid_hazard (last_insn, insn, &hilo_delay,
......@@ -12694,6 +13108,8 @@ static void
mips_reorg (void)
{
mips16_lay_out_constants ();
if (mips_r10k_cache_barrier != R10K_CACHE_BARRIER_NONE)
r10k_insert_cache_barriers ();
if (mips_base_delayed_branch)
dbr_schedule (get_insns ());
mips_reorg_process_insns ();
......@@ -13123,6 +13539,17 @@ mips_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
return false;
return true;
case OPT_mr10k_cache_barrier_:
if (strcmp (arg, "load-store") == 0)
mips_r10k_cache_barrier = R10K_CACHE_BARRIER_LOAD_STORE;
else if (strcmp (arg, "store") == 0)
mips_r10k_cache_barrier = R10K_CACHE_BARRIER_STORE;
else if (strcmp (arg, "none") == 0)
mips_r10k_cache_barrier = R10K_CACHE_BARRIER_NONE;
else
return false;
return true;
default:
return true;
}
......@@ -13358,6 +13785,14 @@ mips_override_options (void)
warning (0, "the %qs architecture does not support paired-single"
" instructions", mips_arch_info->name);
if (mips_r10k_cache_barrier != R10K_CACHE_BARRIER_NONE
&& !TARGET_CACHE_BUILTIN)
{
error ("%qs requires a target that provides the %qs instruction",
"-mr10k-cache-barrier", "cache");
mips_r10k_cache_barrier = R10K_CACHE_BARRIER_NONE;
}
/* If TARGET_DSPR2, enable MASK_DSP. */
if (TARGET_DSPR2)
target_flags |= MASK_DSP;
......
......@@ -563,6 +563,9 @@ enum mips_code_readable_setting {
\
if (mips_abi == ABI_EABI) \
builtin_define ("__mips_eabi"); \
\
if (TARGET_CACHE_BUILTIN) \
builtin_define ("__GCC_HAVE_BUILTIN_MIPS_CACHE"); \
} \
while (0)
......@@ -1020,6 +1023,12 @@ enum mips_code_readable_setting {
/* ISA includes the pop instruction. */
#define ISA_HAS_POP TARGET_OCTEON
/* The CACHE instruction is available in non-MIPS16 code. */
#define TARGET_CACHE_BUILTIN (mips_isa >= 3)
/* The CACHE instruction is available. */
#define ISA_HAS_CACHE (TARGET_CACHE_BUILTIN && !TARGET_MIPS16)
/* Add -G xx support. */
......
......@@ -247,6 +247,9 @@
(UNSPEC_LOONGSON_ALU2_TURN_ENABLED_INSN 531)
(UNSPEC_LOONGSON_FALU1_TURN_ENABLED_INSN 532)
(UNSPEC_LOONGSON_FALU2_TURN_ENABLED_INSN 533)
(UNSPEC_MIPS_CACHE 600)
(UNSPEC_R10K_CACHE_BARRIER 601)
]
)
......@@ -4722,6 +4725,25 @@
}
[(set_attr "length" "20")])
;; Cache operations for R4000-style caches.
(define_insn "mips_cache"
[(set (mem:BLK (scratch))
(unspec:BLK [(match_operand:SI 0 "const_int_operand")
(match_operand:QI 1 "address_operand" "p")]
UNSPEC_MIPS_CACHE))]
"ISA_HAS_CACHE"
"cache\t%X0,%a1")
;; Similar, but with the operands hard-coded to an R10K cache barrier
;; operation. We keep the pattern distinct so that we can identify
;; cache operations inserted by -mr10k-cache-barrier=, and so that
;; the operation is never inserted into a delay slot.
(define_insn "r10k_cache_barrier"
[(set (mem:BLK (scratch))
(unspec:BLK [(const_int 0)] UNSPEC_R10K_CACHE_BARRIER))]
"ISA_HAS_CACHE"
"cache\t0x14,0(%$)"
[(set_attr "can_delay" "no")])
;; Block moves, see mips.c for more details.
;; Argument 0 is the destination
......
......@@ -236,6 +236,10 @@ mpaired-single
Target Report Mask(PAIRED_SINGLE_FLOAT)
Use paired-single floating-point instructions
mr10k-cache-barrier=
Target Joined RejectNegative
-mr10k-cache-barrier=SETTING Specify when r10k cache barriers should be inserted
mshared
Target Report Var(TARGET_SHARED) Init(1)
When generating -mabicalls code, make the code suitable for use in shared libraries
......
......@@ -6985,6 +6985,7 @@ instructions, but allow the compiler to schedule those calls.
* MIPS DSP Built-in Functions::
* MIPS Paired-Single Support::
* MIPS Loongson Built-in Functions::
* Other MIPS Built-in Functions::
* picoChip Built-in Functions::
* PowerPC AltiVec Built-in Functions::
* SPARC VIS Built-in Functions::
......@@ -9440,6 +9441,18 @@ implementing assertions.
@end table
@node Other MIPS Built-in Functions
@subsection Other MIPS Built-in Functions
GCC provides other MIPS-specific built-in functions:
@table @code
@item void __builtin_mips_cache (int @var{op}, const volatile void *@var{addr})
Insert a @samp{cache} instruction with operands @var{op} and @var{addr}.
GCC defines the preprocessor macro @code{___GCC_HAVE_BUILTIN_MIPS_CACHE}
when this function is available.
@end table
@node PowerPC AltiVec Built-in Functions
@subsection PowerPC AltiVec Built-in Functions
......
......@@ -12808,6 +12808,73 @@ Work around certain SB-1 CPU core errata.
(This flag currently works around the SB-1 revision 2
``F1'' and ``F2'' floating point errata.)
@item -mr10k-cache-barrier=@var{setting}
@opindex mr10k-cache-barrier
Specify whether GCC should insert cache barriers to avoid the
side-effects of speculation on R10K processors.
In common with many processors, the R10K tries to predict the outcome
of a conditional branch and speculatively executes instructions from
the ``taken'' branch. It later aborts these instructions if the
predicted outcome was wrong. However, on the R10K, even aborted
instructions can have side effects.
This problem only affects kernel stores and, depending on the system,
kernel loads. As an example, a speculatively-executed store may load
the target memory into cache and mark the cache line as dirty, even if
the store itself is later aborted. If a DMA operation writes to the
same area of memory before the ``dirty'' line is flushed, the cached
data will overwrite the DMA-ed data. See the R10K processor manual
for a full description, including other potential problems.
One workaround is to insert cache barrier instructions before every memory
access that might be speculatively executed and that might have side
effects even if aborted. @option{-mr10k-cache-barrier=@var{setting}}
controls GCC's implementation of this workaround. It assumes that
aborted accesses to any byte in the following regions will not have
side effects:
@enumerate
@item
the memory occupied by the current function's stack frame;
@item
the memory occupied by an incoming stack argument;
@item
the memory occupied by an object with a link-time-constant address.
@end enumerate
It is the kernel's responsibility to ensure that speculative
accesses to these regions are indeed safe.
If the input program contains a function declaration such as:
@smallexample
void foo (void);
@end smallexample
then the implementation of @code{foo} must allow @code{j foo} and
@code{jal foo} to be executed speculatively. GCC honors this
restriction for functions it compiles itself. It expects non-GCC
functions (such as hand-written assembly code) to do the same.
The option has three forms:
@table @gcctabopt
@item -mr10k-cache-barrier=load-store
Insert a cache barrier before a load or store that might be
speculatively executed and that might have side effects even
if aborted.
@item -mr10k-cache-barrier=store
Insert a cache barrier before a store that might be speculatively
executed and that might have side effects even if aborted.
@item -mr10k-cache-barrier=none
Disable the insertion of cache barriers. This is the default setting.
@end table
@item -mflush-func=@var{func}
@itemx -mno-flush-func
@opindex mflush-func
......
2008-09-06 Richard Sandiford <rdsandiford@googlemail.com>
* gcc.target/mips/mips.exp (dg-mips-options): Make
-mr10k-cache-barrier=* imply -mips3 or above.
* gcc.target/mips/cache-1.c: New test.
* gcc.target/mips/r10k-cache-barrier-1.c: Likewise.
* gcc.target/mips/r10k-cache-barrier-2.c: Likewise.
* gcc.target/mips/r10k-cache-barrier-3.c: Likewise.
* gcc.target/mips/r10k-cache-barrier-4.c: Likewise.
* gcc.target/mips/r10k-cache-barrier-5.c: Likewise.
* gcc.target/mips/r10k-cache-barrier-6.c: Likewise.
* gcc.target/mips/r10k-cache-barrier-7.c: Likewise.
* gcc.target/mips/r10k-cache-barrier-8.c: Likewise.
* gcc.target/mips/r10k-cache-barrier-9.c: Likewise.
* gcc.target/mips/r10k-cache-barrier-10.c: Likewise.
* gcc.target/mips/r10k-cache-barrier-11.c: Likewise.
* gcc.target/mips/r10k-cache-barrier-12.c: Likewise.
* gcc.target/mips/r10k-cache-barrier-13.c: Likewise.
* gcc.target/mips/r10k-cache-barrier-14.c: Likewise.
* gcc.target/mips/r10k-cache-barrier-15.c: Likewise.
2008-09-05 Daniel Kraft <d@domob.eu>
PR fortran/35837
......
/* { dg-mips-options "-O2" } */
void
f1 (int *area)
{
__builtin_mips_cache (20, area);
}
void
f2 (const short *area)
{
__builtin_mips_cache (24, area + 10);
}
void
f3 (volatile unsigned int *area, int offset)
{
__builtin_mips_cache (0, area + offset);
}
void
f4 (const volatile unsigned char *area)
{
__builtin_mips_cache (4, area - 80);
}
/* { dg-final { scan-assembler "\tcache\t0x14,0\\(\\\$4\\)" } } */
/* { dg-final { scan-assembler "\tcache\t0x18,20\\(\\\$4\\)" } } */
/* { dg-final { scan-assembler "\tcache\t0x0,0\\(\\\$.\\)" } } */
/* { dg-final { scan-assembler "\tcache\t0x4,-80\\(\\\$4\\)" } } */
......@@ -238,6 +238,10 @@ proc dg-mips-options {args} {
} else {
append flags " -msoft-float"
}
} elseif {[regexp -- {^-mr10k-cache-barrier=(load|store)} $flag]
&& $mips_isa < 3
&& [lsearch -regexp $flags {^(-mips|-march)}] < 0} {
append flags " -mips3"
}
}
foreach flag $flags {
......
/* { dg-mips-options "-O2 -mabi=64 -mr10k-cache-barrier=store" } */
/* Test that stores to uncached addresses do not get unnecessary
cache barriers. */
#define TEST(ADDR) \
NOMIPS16 void \
test_##ADDR (int n) \
{ \
while (n--) \
{ \
*(volatile char *) (0x##ADDR##UL) = 1; \
*(volatile short *) (0x##ADDR##UL + 2) = 2; \
*(volatile int *) (0x##ADDR##UL + 4) = 0; \
} \
}
TEST (9000000000000000)
TEST (900000fffffffff8)
TEST (9200000000000000)
TEST (920000fffffffff8)
TEST (9400000000000000)
TEST (940000fffffffff8)
TEST (9600000000000000)
TEST (960000fffffffff8)
TEST (b800000000000000)
TEST (b80000fffffffff8)
TEST (ba00000000000000)
TEST (ba0000fffffffff8)
TEST (bc00000000000000)
TEST (bc0000fffffffff8)
TEST (be00000000000000)
TEST (be0000fffffffff8)
TEST (ffffffffa0000000)
TEST (ffffffffbffffff8)
/* { dg-final { scan-assembler-not "\tcache\t" } } */
/* { dg-mips-options "-O2 -mr10k-cache-barrier=store -mips4 -mbranch-likely -mno-abicalls" } */
int bar (int);
/* Test that code after a branch-likely does not get an unnecessary
cache barrier. */
NOMIPS16 void
foo (int n, int *x)
{
do
n = bar (n * 4 + 1);
while (n);
/* The preceding branch should be a branch likely, with the shift as
its delay slot. We therefore don't need a cache barrier here. */
x[0] = 0;
}
/* { dg-final { scan-assembler-not "\tcache\t" } } */
/* { dg-mips-options "-O2 -mr10k-cache-barrier=store -mno-abicalls" } */
/* Test that loads are not unnecessarily protected. */
int bar (int);
NOMIPS16 void
foo (int *ptr)
{
*ptr = bar (*ptr);
}
/* { dg-final { scan-assembler-times "\tcache\t" 1 } } */
/* { dg-mips-options "-O2 -mr10k-cache-barrier=load-store -mno-abicalls" } */
/* Test that loads are correctly protected. */
int bar (int);
NOMIPS16 void
foo (int *ptr)
{
*ptr = bar (*ptr);
}
/* { dg-final { scan-assembler-times "\tcache\t" 2 } } */
/* { dg-mips-options "-O2 -mr10k-cache-barrier=store" } */
/* Test that indirect calls are protected. */
int bar (int);
NOMIPS16 void
foo (void (*fn) (void), int x)
{
if (x)
(*fn) ();
}
/* { dg-final { scan-assembler-times "\tcache\t" 1 } } */
/* { dg-do compile { target mips16_attribute } } */
/* { dg-mips-options "-O2 -mr10k-cache-barrier=store" } */
/* { dg-add-options mips16_attribute } */
/* Test that indirect calls are protected. */
MIPS16 void foo (void) { } /* { dg-message "sorry, unimplemented" } */
/* { dg-mips-options "-O2 -mr10k-cache-barrier=store -mips2" } */
/* { dg-error "requires.*cache.*instruction" "" { target *-*-* } 0 } */
/* { dg-mips-options "-O2 -mabi=64 -mr10k-cache-barrier=store" } */
/* Test that stores to constant cached addresses are protected
by cache barriers. */
#define TEST(ADDR) \
NOMIPS16 void \
test_##ADDR (int n) \
{ \
*(volatile int *) (0x##ADDR##UL) = 1; \
}
TEST (8ffffffffffffffc)
TEST (9000010000000000)
TEST (91fffffffffffffc)
TEST (9200010000000000)
TEST (93fffffffffffffc)
TEST (9500010000000000)
TEST (95fffffffffffffc)
TEST (9600010000000000)
TEST (b7fffffffffffffc)
TEST (b800010000000000)
TEST (b9fffffffffffffc)
TEST (ba00010000000000)
TEST (bbfffffffffffffc)
TEST (bc00010000000000)
TEST (bdfffffffffffffc)
TEST (be00010000000000)
TEST (ffffffff9ffffffc)
TEST (ffffffffc0000000)
/* { dg-final { scan-assembler-times "\tcache\t" 18 } } */
/* { dg-mips-options "-O2 -mr10k-cache-barrier=store -mno-abicalls" } */
/* Test that in-range stores to the frame are not protected by
cache barriers. */
void bar (int *x);
NOMIPS16 void
foo (int v)
{
int x[0x100000];
bar (x);
x[0x20] = v;
bar (x);
}
/* { dg-final { scan-assembler-not "\tcache\t" } } */
/* { dg-mips-options "-O2 -mr10k-cache-barrier=store -mno-abicalls" } */
void bar (int *x);
/* Test that out-of-range stores to the frame are protected by cache
barriers. */
NOMIPS16 void
foo (int v)
{
int x[8];
bar (x);
if (v & 1)
x[0x100] = 0;
if (v & 2)
x[-0x100] = 0;
bar (x);
}
/* { dg-final { scan-assembler-times "\tcache\t" 2 } } */
/* { dg-mips-options "-O2 -mr10k-cache-barrier=store -mno-abicalls -mabi=64" } */
/* Test that in-range stores to static objects do not get an unnecessary
cache barrier. */
int x[4];
void bar (void);
NOMIPS16 void
foo (int n)
{
while (n--)
{
x[3] = 1;
bar ();
}
}
/* { dg-final { scan-assembler-not "\tcache\t" } } */
/* { dg-mips-options "-O2 -mr10k-cache-barrier=store -mabi=64" } */
int x[4];
void bar (void);
/* Test that out-of-range stores to static objects are protected by a
cache barrier. */
NOMIPS16 void
foo (int n)
{
while (n--)
{
x[4] = 1;
bar ();
}
}
/* { dg-final { scan-assembler "\tcache\t" } } */
/* { dg-mips-options "-O2 -mr10k-cache-barrier=store -mno-abicalls" } */
void bar1 (void);
void bar2 (void);
void bar3 (void);
NOMIPS16 void
foo (int *x, int sel, int n)
{
if (sel)
{
bar1 ();
x[0] = 1;
}
else
{
bar2 ();
x[1] = 0;
}
/* If there is one copy of this code, reached by two unconditional edges,
then it shouldn't need a third cache barrier. */
x[2] = 2;
while (n--)
bar3 ();
}
/* { dg-final { scan-assembler-times "\tcache\t" 2 } } */
/* { dg-mips-options "-O2 -mr10k-cache-barrier=store -G8" } */
/* Test that in-range stores to components of static objects
do not get an unnecessary cache barrier. */
struct { struct { char i[4]; } a; struct { char j[4]; } b; } s;
NOMIPS16 void
foo (int sel)
{
s.a.i[0] = 1;
s.b.j[3] = 100;
}
/* { dg-final { scan-assembler-not "\tcache\t" } } */
/* { dg-mips-options "-O2 -mr10k-cache-barrier=store -G8" } */
/* Test that out-of-range stores to components of static objects
are protected by a cache barrier. */
struct { struct { char i[4]; } a; struct { char j[4]; } b; } s;
NOMIPS16 void
foo (int sel1, int sel2, int sel3)
{
if (sel1)
s.a.i[8] = 1;
if (sel2)
s.b.j[4] = 100;
if (sel3)
s.a.i[-1] = 0;
}
/* { dg-final { scan-assembler-times "\tcache\t" 3 } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment