Commit 930c5599 by Andrew Stubbs Committed by Andrew Stubbs

Remove amdgcn expcnt waits.

2019-07-31  Andrew Stubbs  <ams@codesourcery.com>

	gcc/
	* config/gcn/gcn-valu.md
	(scatter<mode>_insn_1offset<exec_scatter>): Remove s_waitcnt.
	(scatter<mode>_insn_1offset_ds<exec_scatter>): Likewise.
	(scatter<mode>_insn_2offsets<exec_scatter>): Likewise.
	* config/gcn/gcn.c (gcn_md_reorg): Add delayeduse and reads to
	struct ilist. Add nops for delayeduse insns.
	* config/gcn/gcn.md (delayeduse): New attribute.
	(*movbi): Remove s_waitcnt from stores.
	(*mov<mode>_insn): Likewise.
	(*movti_insn): Likewise. Add delayeduse attribute.
	(sync_compare_and_swap<mode>_insn): Add delayeduse attribute.
	(atomic_store<mode>): Remove or adjust s_waitcnt.

From-SVN: r273931
parent f86c2e71
2019-07-31 Andrew Stubbs <ams@codesourcery.com>
* config/gcn/gcn-valu.md
(scatter<mode>_insn_1offset<exec_scatter>): Remove s_waitcnt.
(scatter<mode>_insn_1offset_ds<exec_scatter>): Likewise.
(scatter<mode>_insn_2offsets<exec_scatter>): Likewise.
* config/gcn/gcn.c (gcn_md_reorg): Add delayeduse and reads to
struct ilist. Add nops for delayeduse insns.
* config/gcn/gcn.md (delayeduse): New attribute.
(*movbi): Remove s_waitcnt from stores.
(*mov<mode>_insn): Likewise.
(*movti_insn): Likewise. Add delayeduse attribute.
(sync_compare_and_swap<mode>_insn): Add delayeduse attribute.
(atomic_store<mode>): Remove or adjust s_waitcnt.
2019-07-31 Richard Biener <rguenther@suse.de> 2019-07-31 Richard Biener <rguenther@suse.de>
* vr-values.h (vr_values::swap_vr_value): New. * vr-values.h (vr_values::swap_vr_value): New.
......
...@@ -863,15 +863,12 @@ ...@@ -863,15 +863,12 @@
if (AS_FLAT_P (as)) if (AS_FLAT_P (as))
{ {
if (TARGET_GCN5_PLUS) if (TARGET_GCN5_PLUS)
sprintf (buf, "flat_store%%s2\t%%0, %%2 offset:%%1%s\;" sprintf (buf, "flat_store%%s2\t%%0, %%2 offset:%%1%s", glc);
"s_waitcnt\texpcnt(0)", glc);
else else
sprintf (buf, "flat_store%%s2\t%%0, %%2%s\;s_waitcnt\texpcnt(0)", sprintf (buf, "flat_store%%s2\t%%0, %%2%s", glc);
glc);
} }
else if (AS_GLOBAL_P (as)) else if (AS_GLOBAL_P (as))
sprintf (buf, "global_store%%s2\t%%0, %%2, off offset:%%1%s\;" sprintf (buf, "global_store%%s2\t%%0, %%2, off offset:%%1%s", glc);
"s_waitcnt\texpcnt(0)", glc);
else else
gcc_unreachable (); gcc_unreachable ();
...@@ -895,7 +892,7 @@ ...@@ -895,7 +892,7 @@
{ {
addr_space_t as = INTVAL (operands[3]); addr_space_t as = INTVAL (operands[3]);
static char buf[200]; static char buf[200];
sprintf (buf, "ds_write%%b2\t%%0, %%2 offset:%%1%s\;s_waitcnt\texpcnt(0)", sprintf (buf, "ds_write%%b2\t%%0, %%2 offset:%%1%s",
(AS_GDS_P (as) ? " gds" : "")); (AS_GDS_P (as) ? " gds" : ""));
return buf; return buf;
} }
...@@ -929,8 +926,8 @@ ...@@ -929,8 +926,8 @@
/* Work around assembler bug in which a 64-bit register is expected, /* Work around assembler bug in which a 64-bit register is expected,
but a 32-bit value would be correct. */ but a 32-bit value would be correct. */
int reg = REGNO (operands[1]) - FIRST_VGPR_REG; int reg = REGNO (operands[1]) - FIRST_VGPR_REG;
sprintf (buf, "global_store%%s3\tv[%d:%d], %%3, %%0 offset:%%2%s\;" sprintf (buf, "global_store%%s3\tv[%d:%d], %%3, %%0 offset:%%2%s",
"s_waitcnt\texpcnt(0)", reg, reg + 1, glc); reg, reg + 1, glc);
} }
else else
gcc_unreachable (); gcc_unreachable ();
......
...@@ -4516,7 +4516,9 @@ gcn_md_reorg (void) ...@@ -4516,7 +4516,9 @@ gcn_md_reorg (void)
{ {
rtx_insn *insn; rtx_insn *insn;
attr_unit unit; attr_unit unit;
attr_delayeduse delayeduse;
HARD_REG_SET writes; HARD_REG_SET writes;
HARD_REG_SET reads;
int age; int age;
} back[max_waits]; } back[max_waits];
int oldest = 0; int oldest = 0;
...@@ -4535,6 +4537,7 @@ gcn_md_reorg (void) ...@@ -4535,6 +4537,7 @@ gcn_md_reorg (void)
attr_type itype = get_attr_type (insn); attr_type itype = get_attr_type (insn);
attr_unit iunit = get_attr_unit (insn); attr_unit iunit = get_attr_unit (insn);
attr_delayeduse idelayeduse = get_attr_delayeduse (insn);
HARD_REG_SET ireads, iwrites; HARD_REG_SET ireads, iwrites;
CLEAR_HARD_REG_SET (ireads); CLEAR_HARD_REG_SET (ireads);
CLEAR_HARD_REG_SET (iwrites); CLEAR_HARD_REG_SET (iwrites);
...@@ -4610,6 +4613,14 @@ gcn_md_reorg (void) ...@@ -4610,6 +4613,14 @@ gcn_md_reorg (void)
(regs, reg_class_contents[(int) VGPR_REGS])) (regs, reg_class_contents[(int) VGPR_REGS]))
nops_rqd = 2 - prev_insn->age; nops_rqd = 2 - prev_insn->age;
} }
/* Store that requires input registers are not overwritten by
following instruction. */
if ((prev_insn->age + nops_rqd) < 1
&& prev_insn->delayeduse == DELAYEDUSE_YES
&& ((hard_reg_set_intersect_p
(prev_insn->reads, iwrites))))
nops_rqd = 1 - prev_insn->age;
} }
/* Insert the required number of NOPs. */ /* Insert the required number of NOPs. */
...@@ -4637,7 +4648,9 @@ gcn_md_reorg (void) ...@@ -4637,7 +4648,9 @@ gcn_md_reorg (void)
/* Track the current instruction as a previous instruction. */ /* Track the current instruction as a previous instruction. */
back[oldest].insn = insn; back[oldest].insn = insn;
back[oldest].unit = iunit; back[oldest].unit = iunit;
back[oldest].delayeduse = idelayeduse;
COPY_HARD_REG_SET (back[oldest].writes, iwrites); COPY_HARD_REG_SET (back[oldest].writes, iwrites);
COPY_HARD_REG_SET (back[oldest].reads, ireads);
back[oldest].age = 0; back[oldest].age = 0;
oldest = (oldest + 1) % max_waits; oldest = (oldest + 1) % max_waits;
......
...@@ -285,6 +285,11 @@ ...@@ -285,6 +285,11 @@
(define_attr "laneselect" "yes,no" (const_string "no")) (define_attr "laneselect" "yes,no" (const_string "no"))
; Identify instructions that require a "Manually Inserted Wait State" if
; their inputs are overwritten by subsequent instructions.
(define_attr "delayeduse" "yes,no" (const_string "no"))
;; }}} ;; }}}
;; {{{ Iterators useful across the wole machine description ;; {{{ Iterators useful across the wole machine description
...@@ -475,15 +480,15 @@ ...@@ -475,15 +480,15 @@
case 6: case 6:
return "s_load_dword\t%0, %A1\;s_waitcnt\tlgkmcnt(0)"; return "s_load_dword\t%0, %A1\;s_waitcnt\tlgkmcnt(0)";
case 7: case 7:
return "s_store_dword\t%1, %A0\;s_waitcnt\texpcnt(0)"; return "s_store_dword\t%1, %A0";
case 8: case 8:
return "flat_load_dword\t%0, %A1%O1%g1\;s_waitcnt\t0"; return "flat_load_dword\t%0, %A1%O1%g1\;s_waitcnt\t0";
case 9: case 9:
return "flat_store_dword\t%A0, %1%O0%g0\;s_waitcnt\texpcnt(0)"; return "flat_store_dword\t%A0, %1%O0%g0";
case 10: case 10:
return "global_load_dword\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)"; return "global_load_dword\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)";
case 11: case 11:
return "global_store_dword\t%A0, %1%O0%g0\;s_waitcnt\texpcnt(0)"; return "global_store_dword\t%A0, %1%O0%g0";
default: default:
gcc_unreachable (); gcc_unreachable ();
} }
...@@ -506,20 +511,20 @@ ...@@ -506,20 +511,20 @@
s_movk_i32\t%0, %1 s_movk_i32\t%0, %1
s_mov_b32\t%0, %1 s_mov_b32\t%0, %1
s_buffer_load%s0\t%0, s[0:3], %1\;s_waitcnt\tlgkmcnt(0) s_buffer_load%s0\t%0, s[0:3], %1\;s_waitcnt\tlgkmcnt(0)
s_buffer_store%s1\t%1, s[0:3], %0\;s_waitcnt\texpcnt(0) s_buffer_store%s1\t%1, s[0:3], %0
s_load_dword\t%0, %A1\;s_waitcnt\tlgkmcnt(0) s_load_dword\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
s_store_dword\t%1, %A0\;s_waitcnt\texpcnt(0) s_store_dword\t%1, %A0
v_mov_b32\t%0, %1 v_mov_b32\t%0, %1
v_readlane_b32\t%0, %1, 0 v_readlane_b32\t%0, %1, 0
v_writelane_b32\t%0, %1, 0 v_writelane_b32\t%0, %1, 0
flat_load_dword\t%0, %A1%O1%g1\;s_waitcnt\t0 flat_load_dword\t%0, %A1%O1%g1\;s_waitcnt\t0
flat_store_dword\t%A0, %1%O0%g0\;s_waitcnt\texpcnt(0) flat_store_dword\t%A0, %1%O0%g0
v_mov_b32\t%0, %1 v_mov_b32\t%0, %1
ds_write_b32\t%A0, %1%O0\;s_waitcnt\texpcnt(0) ds_write_b32\t%A0, %1%O0
ds_read_b32\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0) ds_read_b32\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
s_mov_b32\t%0, %1 s_mov_b32\t%0, %1
global_load_dword\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0) global_load_dword\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
global_store_dword\t%A0, %1%O0%g0\;s_waitcnt\texpcnt(0)" global_store_dword\t%A0, %1%O0%g0"
[(set_attr "type" "sop1,sopk,sop1,smem,smem,smem,smem,vop1,vop3a,vop3a,flat, [(set_attr "type" "sop1,sopk,sop1,smem,smem,smem,smem,vop1,vop3a,vop3a,flat,
flat,vop1,ds,ds,sop1,flat,flat") flat,vop1,ds,ds,sop1,flat,flat")
(set_attr "exec" "*,*,*,*,*,*,*,*,none,none,*,*,*,*,*,*,*,*") (set_attr "exec" "*,*,*,*,*,*,*,*,none,none,*,*,*,*,*,*,*,*")
...@@ -541,12 +546,12 @@ ...@@ -541,12 +546,12 @@
v_readlane_b32\t%0, %1, 0 v_readlane_b32\t%0, %1, 0
v_writelane_b32\t%0, %1, 0 v_writelane_b32\t%0, %1, 0
flat_load%o1\t%0, %A1%O1%g1\;s_waitcnt\t0 flat_load%o1\t%0, %A1%O1%g1\;s_waitcnt\t0
flat_store%s0\t%A0, %1%O0%g0\;s_waitcnt\texpcnt(0) flat_store%s0\t%A0, %1%O0%g0
v_mov_b32\t%0, %1 v_mov_b32\t%0, %1
ds_write%b0\t%A0, %1%O0\;s_waitcnt\texpcnt(0) ds_write%b0\t%A0, %1%O0
ds_read%u1\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0) ds_read%u1\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
global_load%o1\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0) global_load%o1\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
global_store%s0\t%A0, %1%O0%g0\;s_waitcnt\texpcnt(0)" global_store%s0\t%A0, %1%O0%g0"
[(set_attr "type" [(set_attr "type"
"sop1,sopk,sop1,vop1,vop3a,vop3a,flat,flat,vop1,ds,ds,flat,flat") "sop1,sopk,sop1,vop1,vop3a,vop3a,flat,flat,vop1,ds,ds,flat,flat")
(set_attr "exec" "*,*,*,*,none,none,*,*,*,*,*,*,*") (set_attr "exec" "*,*,*,*,none,none,*,*,*,*,*,*,*")
...@@ -564,18 +569,18 @@ ...@@ -564,18 +569,18 @@
s_mov_b64\t%0, %1 s_mov_b64\t%0, %1
s_mov_b64\t%0, %1 s_mov_b64\t%0, %1
# #
s_store_dwordx2\t%1, %A0\;s_waitcnt\texpcnt(0) s_store_dwordx2\t%1, %A0
s_load_dwordx2\t%0, %A1\;s_waitcnt\tlgkmcnt(0) s_load_dwordx2\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
# #
# #
# #
# #
flat_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\t0 flat_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\t0
flat_store_dwordx2\t%A0, %1%O0%g0\;s_waitcnt\texpcnt(0) flat_store_dwordx2\t%A0, %1%O0%g0
ds_write_b64\t%A0, %1%O0\;s_waitcnt\texpcnt(0) ds_write_b64\t%A0, %1%O0
ds_read_b64\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0) ds_read_b64\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
global_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0) global_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
global_store_dwordx2\t%A0, %1%O0%g0\;s_waitcnt\texpcnt(0)" global_store_dwordx2\t%A0, %1%O0%g0"
"(reload_completed && !MEM_P (operands[0]) && !MEM_P (operands[1]) "(reload_completed && !MEM_P (operands[0]) && !MEM_P (operands[1])
&& !gcn_sgpr_move_p (operands[0], operands[1])) && !gcn_sgpr_move_p (operands[0], operands[1]))
|| (GET_CODE (operands[1]) == CONST_INT && !gcn_constant64_p (operands[1]))" || (GET_CODE (operands[1]) == CONST_INT && !gcn_constant64_p (operands[1]))"
...@@ -617,16 +622,16 @@ ...@@ -617,16 +622,16 @@
"" ""
"@ "@
# #
s_store_dwordx4\t%1, %A0\;s_waitcnt\texpcnt(0) s_store_dwordx4\t%1, %A0
s_load_dwordx4\t%0, %A1\;s_waitcnt\tlgkmcnt(0) s_load_dwordx4\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
flat_store_dwordx4\t%A0, %1%O0%g0\;s_waitcnt\texpcnt(0) flat_store_dwordx4\t%A0, %1%O0%g0
flat_load_dwordx4\t%0, %A1%O1%g1\;s_waitcnt\t0 flat_load_dwordx4\t%0, %A1%O1%g1\;s_waitcnt\t0
# #
# #
# #
global_store_dwordx4\t%A0, %1%O0%g0\;s_waitcnt\texpcnt(0) global_store_dwordx4\t%A0, %1%O0%g0
global_load_dwordx4\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0) global_load_dwordx4\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
ds_write_b128\t%A0, %1%O0\;s_waitcnt\texpcnt(0) ds_write_b128\t%A0, %1%O0
ds_read_b128\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)" ds_read_b128\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)"
"reload_completed "reload_completed
&& REG_P (operands[0]) && REG_P (operands[0])
...@@ -647,6 +652,7 @@ ...@@ -647,6 +652,7 @@
} }
[(set_attr "type" "mult,smem,smem,flat,flat,vmult,vmult,vmult,flat,flat,\ [(set_attr "type" "mult,smem,smem,flat,flat,vmult,vmult,vmult,flat,flat,\
ds,ds") ds,ds")
(set_attr "delayeduse" "*,*,yes,*,*,*,*,*,*,*,*,*")
(set_attr "length" "*,12,12,12,12,*,*,*,12,12,12,12")]) (set_attr "length" "*,12,12,12,12,*,*,*,12,12,12,12")])
;; }}} ;; }}}
...@@ -1612,7 +1618,8 @@ ...@@ -1612,7 +1618,8 @@
global_atomic_cmpswap<X>\t%0, %A1, %2%O1 glc\;s_waitcnt\tvmcnt(0)" global_atomic_cmpswap<X>\t%0, %A1, %2%O1 glc\;s_waitcnt\tvmcnt(0)"
[(set_attr "type" "smem,flat,flat") [(set_attr "type" "smem,flat,flat")
(set_attr "length" "12") (set_attr "length" "12")
(set_attr "gcn_version" "gcn5,*,gcn5")]) (set_attr "gcn_version" "gcn5,*,gcn5")
(set_attr "delayeduse" "*,yes,*")])
(define_insn "sync_compare_and_swap<mode>_lds_insn" (define_insn "sync_compare_and_swap<mode>_lds_insn"
[(set (match_operand:SIDI 0 "register_operand" "= v") [(set (match_operand:SIDI 0 "register_operand" "= v")
...@@ -1715,14 +1722,11 @@ ...@@ -1715,14 +1722,11 @@
switch (which_alternative) switch (which_alternative)
{ {
case 0: case 0:
return "s_dcache_wb_vol\;s_store%o1\t%1, %A0 glc\;" return "s_dcache_wb_vol\;s_store%o1\t%1, %A0 glc";
"s_waitcnt\texpcnt(0)";
case 1: case 1:
return "buffer_wbinvl1_vol\;flat_store%o1\t%A0, %1%O0 glc\;" return "buffer_wbinvl1_vol\;flat_store%o1\t%A0, %1%O0 glc";
"s_waitcnt\texpcnt(0)";
case 2: case 2:
return "buffer_wbinvl1_vol\;global_store%o1\t%A0, %1%O0 glc\;" return "buffer_wbinvl1_vol\;global_store%o1\t%A0, %1%O0 glc";
"s_waitcnt\texpcnt(0)";
} }
break; break;
case MEMMODEL_ACQ_REL: case MEMMODEL_ACQ_REL:
...@@ -1732,13 +1736,13 @@ ...@@ -1732,13 +1736,13 @@
{ {
case 0: case 0:
return "s_dcache_wb_vol\;s_store%o1\t%1, %A0 glc\;" return "s_dcache_wb_vol\;s_store%o1\t%1, %A0 glc\;"
"s_waitcnt\texpcnt(0)\;s_dcache_inv_vol"; "s_waitcnt\tlgkmcnt(0)\;s_dcache_inv_vol";
case 1: case 1:
return "buffer_wbinvl1_vol\;flat_store%o1\t%A0, %1%O0 glc\;" return "buffer_wbinvl1_vol\;flat_store%o1\t%A0, %1%O0 glc\;"
"s_waitcnt\texpcnt(0)\;buffer_wbinvl1_vol"; "s_waitcnt\t0\;buffer_wbinvl1_vol";
case 2: case 2:
return "buffer_wbinvl1_vol\;global_store%o1\t%A0, %1%O0 glc\;" return "buffer_wbinvl1_vol\;global_store%o1\t%A0, %1%O0 glc\;"
"s_waitcnt\texpcnt(0)\;buffer_wbinvl1_vol"; "s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol";
} }
break; break;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment