Commit d88cd9c4 by Nathan Sidwell Committed by Nathan Sidwell

nvptx.h (struct machine_function): Add axis_predicate.

	* config/nvptx/nvptx.h (struct machine_function): Add
	axis_predicate.
	* config/nvptx/nvptx-protos.h (nvptx_expand_oacc_fork,
	nvptx_expand_oacc_join): Declare.
	* config/nvptx/nvptx.md (UNSPEC_NTID, UNSPEC_TID): Delete.
	(UNSPEC_DIM_SIZE, UNSPEC_SHARED_DATA, UNSPEC_BIT_CONV,
	UNSPEC_SHUFFLE, UNSPEC_BR_UNIFIED): New.
	(UNSPECV_BARSYNC, UNSPECV_DIM_POS, UNSPECV_FORK, UNSPECV_FORKED,
	UNSPECV_JOINING, UNSPECV_JOIN): New.
	(BITS, BITD): New mode iterators.
	(br_true_uni, br_false_uni): New.
	(*oacc_ntid_insn, oacc_ntid, *oacc_tid_insn, oacc_tid): Delete.
	(oacc_dim_size, oacc_dim_pos): New.
	(nvptx_fork, nvptx_forked, nvptx_joining, nvptx_join): New.
	(oacc_fork, oacc_join): New.
	(nvptx_shuffle<mode>, unpack<mode>si2, packsi<mode>2): New.
	(worker_load<mode>, worker_store<mode>): New.
	(nvptx_barsync): New.
	* config/nvptx/nvptx.c: Include gimple.h & dumpfile.h.
	(SHUFFLE_UP, SHUFFLE_DOWN, SHUFFLE_BFLY, SHUFFLE_IDX): Define.
	(worker_bcast_hwm, worker_bcast_align, worker_bcast_name,
	worker_bcast_sym): New.
	(nvptx_option_override): Initialize worker broadcast buffer.
	(nvptx_emit_forking, nvptx_emit_joining): New.
	(nvptx_init_axis_predicate): New.
	(nvptx_declare_function_name): Init axis predicates.
	(nvptx_expand_call): Add fork/join markers around routine call.
	(nvptx_expand_oacc_fork, nvptx_expand_oacc_join): New.
	(nvptx_gen_unpack, nvptx_gen_pack, nvptx_gen_shuffle): New.
	(nvptx_gen_vcast): New.
	(struct wcast_data_t): New.
	(enum propagate_mask): New.
	(nvptx_gen_wcast): New.
	(nvptx_print_operand): Add 'S' case.
	(struct parallel): New.
	(parallel::parallel, parallel::~parallel): New.
	(bb_insn_map_t, insn_bb_t, insn_bb_vec_t): New typedefs.
	(nvptx_split_blocks, nvptx_discover_pre, nvptx_dump_pars,
	nvptx_find_par, nvptx_discover_pars): New.
	(nvptx_propagate): New.
	(vprop_gen, nvptx_vpropagate): New.
	(wprop_gen, nvptx_wpropagate): New.
	(nvptx_wsync): New.
	(nvptx_single, nvptx_skip_par): New.
	(nvptx_process_pars, nvptx_neuter_pars): New.
	(ntptx_reorg): Split blocks, generate parallel structure, apply
	neutering.
	(nvptx_cannot_copy_insn_p): New.
	(nvptx_file_end): Emit worker broadcast decl.
	(nvptx_goacc_fork_join): New.
	(TARGET_CANNOT_COPY_INSN_P): Override.
	(TARGET_GOACC_FORK_JOIN): Override.

From-SVN: r229486
parent 1e355e1d
2015-10-28 Nathan Sidwell <nathan@codesourcery.com>
* config/nvptx/nvptx.h (struct machine_function): Add
axis_predicate.
* config/nvptx/nvptx-protos.h (nvptx_expand_oacc_fork,
nvptx_expand_oacc_join): Declare.
* config/nvptx/nvptx.md (UNSPEC_NTID, UNSPEC_TID): Delete.
(UNSPEC_DIM_SIZE, UNSPEC_SHARED_DATA, UNSPEC_BIT_CONV,
UNSPEC_SHUFFLE, UNSPEC_BR_UNIFIED): New.
(UNSPECV_BARSYNC, UNSPECV_DIM_POS, UNSPECV_FORK, UNSPECV_FORKED,
UNSPECV_JOINING, UNSPECV_JOIN): New.
(BITS, BITD): New mode iterators.
(br_true_uni, br_false_uni): New.
(*oacc_ntid_insn, oacc_ntid, *oacc_tid_insn, oacc_tid): Delete.
(oacc_dim_size, oacc_dim_pos): New.
(nvptx_fork, nvptx_forked, nvptx_joining, nvptx_join): New.
(oacc_fork, oacc_join): New.
(nvptx_shuffle<mode>, unpack<mode>si2, packsi<mode>2): New.
(worker_load<mode>, worker_store<mode>): New.
(nvptx_barsync): New.
* config/nvptx/nvptx.c: Include gimple.h & dumpfile.h.
(SHUFFLE_UP, SHUFFLE_DOWN, SHUFFLE_BFLY, SHUFFLE_IDX): Define.
(worker_bcast_hwm, worker_bcast_align, worker_bcast_name,
worker_bcast_sym): New.
(nvptx_option_override): Initialize worker broadcast buffer.
(nvptx_emit_forking, nvptx_emit_joining): New.
(nvptx_init_axis_predicate): New.
(nvptx_declare_function_name): Init axis predicates.
(nvptx_expand_call): Add fork/join markers around routine call.
(nvptx_expand_oacc_fork, nvptx_expand_oacc_join): New.
(nvptx_gen_unpack, nvptx_gen_pack, nvptx_gen_shuffle): New.
(nvptx_gen_vcast): New.
(struct wcast_data_t): New.
(enum propagate_mask): New.
(nvptx_gen_wcast): New.
(nvptx_print_operand): Add 'S' case.
(struct parallel): New.
(parallel::parallel, parallel::~parallel): New.
(bb_insn_map_t, insn_bb_t, insn_bb_vec_t): New typedefs.
(nvptx_split_blocks, nvptx_discover_pre, nvptx_dump_pars,
nvptx_find_par, nvptx_discover_pars): New.
(nvptx_propagate): New.
(vprop_gen, nvptx_vpropagate): New.
(wprop_gen, nvptx_wpropagate): New.
(nvptx_wsync): New.
(nvptx_single, nvptx_skip_par): New.
(nvptx_process_pars, nvptx_neuter_pars): New.
(ntptx_reorg): Split blocks, generate parallel structure, apply
neutering.
(nvptx_cannot_copy_insn_p): New.
(nvptx_file_end): Emit worker broadcast decl.
(nvptx_goacc_fork_join): New.
(TARGET_CANNOT_COPY_INSN_P): Override.
(TARGET_GOACC_FORK_JOIN): Override.
2015-10-28 Richard Biener <rguenther@suse.de>
* fold-const.c (negate_expr_p): Adjust the division case to
......@@ -32,6 +32,8 @@ extern void nvptx_register_pragmas (void);
extern const char *nvptx_section_for_decl (const_tree);
#ifdef RTX_CODE
extern void nvptx_expand_oacc_fork (unsigned);
extern void nvptx_expand_oacc_join (unsigned);
extern void nvptx_expand_call (rtx, rtx);
extern rtx nvptx_expand_compare (rtx);
extern const char *nvptx_ptx_type_from_mode (machine_mode, bool);
......
......@@ -230,6 +230,7 @@ struct GTY(()) machine_function
HOST_WIDE_INT outgoing_stdarg_size;
int ret_reg_mode; /* machine_mode not defined yet. */
int punning_buffer_size;
rtx axis_predicate[2];
};
#endif
......
......@@ -49,14 +49,27 @@
UNSPEC_ALLOCA
UNSPEC_NTID
UNSPEC_TID
UNSPEC_DIM_SIZE
UNSPEC_SHARED_DATA
UNSPEC_BIT_CONV
UNSPEC_SHUFFLE
UNSPEC_BR_UNIFIED
])
(define_c_enum "unspecv" [
UNSPECV_LOCK
UNSPECV_CAS
UNSPECV_XCHG
UNSPECV_BARSYNC
UNSPECV_DIM_POS
UNSPECV_FORK
UNSPECV_FORKED
UNSPECV_JOINING
UNSPECV_JOIN
])
(define_attr "subregs_ok" "false,true"
......@@ -246,6 +259,8 @@
(define_mode_iterator QHSIM [QI HI SI])
(define_mode_iterator SDFM [SF DF])
(define_mode_iterator SDCM [SC DC])
(define_mode_iterator BITS [SI SF])
(define_mode_iterator BITD [DI DF])
;; This mode iterator allows :P to be used for patterns that operate on
;; pointer-sized quantities. Exactly one of the two alternatives will match.
......@@ -817,6 +832,23 @@
""
"%J0\\tbra\\t%l1;")
;; unified conditional branch
(define_insn "br_true_uni"
[(set (pc) (if_then_else
(ne (unspec:BI [(match_operand:BI 0 "nvptx_register_operand" "R")]
UNSPEC_BR_UNIFIED) (const_int 0))
(label_ref (match_operand 1 "" "")) (pc)))]
""
"%j0\\tbra.uni\\t%l1;")
(define_insn "br_false_uni"
[(set (pc) (if_then_else
(eq (unspec:BI [(match_operand:BI 0 "nvptx_register_operand" "R")]
UNSPEC_BR_UNIFIED) (const_int 0))
(label_ref (match_operand 1 "" "")) (pc)))]
""
"%J0\\tbra.uni\\t%l1;")
(define_expand "cbranch<mode>4"
[(set (pc)
(if_then_else (match_operator 0 "nvptx_comparison_operator"
......@@ -1308,36 +1340,134 @@
DONE;
})
(define_insn "*oacc_ntid_insn"
[(set (match_operand:SI 0 "nvptx_register_operand" "=R")
(unspec:SI [(match_operand:SI 1 "const_int_operand" "n")] UNSPEC_NTID))]
(define_insn "oacc_dim_size"
[(set (match_operand:SI 0 "nvptx_register_operand" "")
(unspec:SI [(match_operand:SI 1 "const_int_operand" "")]
UNSPEC_DIM_SIZE))]
""
"%.\\tmov.u32 %0, %%ntid%d1;")
{
static const char *const asms[] =
{ /* Must match oacc_loop_levels ordering. */
"%.\\tmov.u32\\t%0, %%nctaid.x;", /* gang */
"%.\\tmov.u32\\t%0, %%ntid.y;", /* worker */
"%.\\tmov.u32\\t%0, %%ntid.x;", /* vector */
};
return asms[INTVAL (operands[1])];
})
(define_expand "oacc_ntid"
(define_insn "oacc_dim_pos"
[(set (match_operand:SI 0 "nvptx_register_operand" "")
(unspec:SI [(match_operand:SI 1 "const_int_operand" "")] UNSPEC_NTID))]
(unspec_volatile:SI [(match_operand:SI 1 "const_int_operand" "")]
UNSPECV_DIM_POS))]
""
{
if (INTVAL (operands[1]) < 0 || INTVAL (operands[1]) > 2)
FAIL;
static const char *const asms[] =
{ /* Must match oacc_loop_levels ordering. */
"%.\\tmov.u32\\t%0, %%ctaid.x;", /* gang */
"%.\\tmov.u32\\t%0, %%tid.y;", /* worker */
"%.\\tmov.u32\\t%0, %%tid.x;", /* vector */
};
return asms[INTVAL (operands[1])];
})
(define_insn "*oacc_tid_insn"
[(set (match_operand:SI 0 "nvptx_register_operand" "=R")
(unspec:SI [(match_operand:SI 1 "const_int_operand" "n")] UNSPEC_TID))]
(define_insn "nvptx_fork"
[(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
UNSPECV_FORK)]
""
"%.\\tmov.u32 %0, %%tid%d1;")
"// fork %0;"
)
(define_expand "oacc_tid"
[(set (match_operand:SI 0 "nvptx_register_operand" "")
(unspec:SI [(match_operand:SI 1 "const_int_operand" "")] UNSPEC_TID))]
(define_insn "nvptx_forked"
[(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
UNSPECV_FORKED)]
""
"// forked %0;"
)
(define_insn "nvptx_joining"
[(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
UNSPECV_JOINING)]
""
"// joining %0;"
)
(define_insn "nvptx_join"
[(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
UNSPECV_JOIN)]
""
"// join %0;"
)
(define_expand "oacc_fork"
[(set (match_operand:SI 0 "nvptx_nonmemory_operand" "")
(match_operand:SI 1 "nvptx_general_operand" ""))
(unspec_volatile:SI [(match_operand:SI 2 "const_int_operand" "")]
UNSPECV_FORKED)]
""
{
if (INTVAL (operands[1]) < 0 || INTVAL (operands[1]) > 2)
FAIL;
if (operands[0] != const0_rtx)
emit_move_insn (operands[0], operands[1]);
nvptx_expand_oacc_fork (INTVAL (operands[2]));
DONE;
})
(define_expand "oacc_join"
[(set (match_operand:SI 0 "nvptx_nonmemory_operand" "")
(match_operand:SI 1 "nvptx_general_operand" ""))
(unspec_volatile:SI [(match_operand:SI 2 "const_int_operand" "")]
UNSPECV_JOIN)]
""
{
if (operands[0] != const0_rtx)
emit_move_insn (operands[0], operands[1]);
nvptx_expand_oacc_join (INTVAL (operands[2]));
DONE;
})
;; only 32-bit shuffles exist.
(define_insn "nvptx_shuffle<mode>"
[(set (match_operand:BITS 0 "nvptx_register_operand" "=R")
(unspec:BITS
[(match_operand:BITS 1 "nvptx_register_operand" "R")
(match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")
(match_operand:SI 3 "const_int_operand" "n")]
UNSPEC_SHUFFLE))]
""
"%.\\tshfl%S3.b32\\t%0, %1, %2, 31;")
;; extract parts of a 64 bit object into 2 32-bit ints
(define_insn "unpack<mode>si2"
[(set (match_operand:SI 0 "nvptx_register_operand" "=R")
(unspec:SI [(match_operand:BITD 2 "nvptx_register_operand" "R")
(const_int 0)] UNSPEC_BIT_CONV))
(set (match_operand:SI 1 "nvptx_register_operand" "=R")
(unspec:SI [(match_dup 2) (const_int 1)] UNSPEC_BIT_CONV))]
""
"%.\\tmov.b64\\t{%0,%1}, %2;")
;; pack 2 32-bit ints into a 64 bit object
(define_insn "packsi<mode>2"
[(set (match_operand:BITD 0 "nvptx_register_operand" "=R")
(unspec:BITD [(match_operand:SI 1 "nvptx_register_operand" "R")
(match_operand:SI 2 "nvptx_register_operand" "R")]
UNSPEC_BIT_CONV))]
""
"%.\\tmov.b64\\t%0, {%1,%2};")
(define_insn "worker_load<mode>"
[(set (match_operand:SDISDFM 0 "nvptx_register_operand" "=R")
(unspec:SDISDFM [(match_operand:SDISDFM 1 "memory_operand" "m")]
UNSPEC_SHARED_DATA))]
""
"%.\\tld.shared%u0\\t%0, %1;")
(define_insn "worker_store<mode>"
[(set (unspec:SDISDFM [(match_operand:SDISDFM 1 "memory_operand" "=m")]
UNSPEC_SHARED_DATA)
(match_operand:SDISDFM 0 "nvptx_register_operand" "R"))]
""
"%.\\tst.shared%u1\\t%1, %0;")
;; Atomic insns.
(define_expand "atomic_compare_and_swap<mode>"
......@@ -1423,3 +1553,9 @@
(match_dup 1))]
"0"
"%.\\tatom%A1.b%T0.<logic>\\t%0, %1, %2;")
(define_insn "nvptx_barsync"
[(unspec_volatile [(match_operand:SI 0 "const_int_operand" "")]
UNSPECV_BARSYNC)]
""
"\\tbar.sync\\t%0;")
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment