Commit 26f74aa3 by Jan Hubicka Committed by Jan Hubicka

re PR c/10308 ([x86] ICE with -O -fgcse or -O2)

	* cfgbuild.c (make_edges):  Do not use next_nonnote_insn when
	looking for fallthru edge.

	* athlon.md (athlon-agu, athlon-store, athlon-fany, athlon-faddmul):
	Fix.
	(athlon-load2, athlon-store2, athlon-fpsched, athlon-fpload,
	athlon-fvector): New.
	(athlon_*): Revisit to match new optimization guide.
	* i386.c (ix86_adjust_cost):  Fix memory operand costs on Athlon/k8
	* i386.md (cvt??2?? patterns): Fix modes.
	(fistp patterns): Set modes.

	Accidentaly commited with my earlier reload patch:
	PR c/10308
	* reload.c (find_reloads_address_1): Reload plus at the place of
	index register.

From-SVN: r66037
parent 796621e8
Thu Apr 24 16:55:26 CEST 2003 Jan Hubicka <jh@suse.cz>
* cfgbuild.c (make_edges): Do not use next_nonnote_insn when
looking for fallthru edge.
* athlon.md (athlon-agu, athlon-store, athlon-fany, athlon-faddmul):
Fix.
(athlon-load2, athlon-store2, athlon-fpsched, athlon-fpload,
athlon-fvector): New.
(athlon_*): Revisit to match new optimization guide.
* i386.c (ix86_adjust_cost): Fix memory operand costs on Athlon/k8
* i386.md (cvt??2?? patterns): Fix modes.
(fistp patterns): Set modes.
Accidentaly commited with my earlier reload patch:
PR c/10308
* reload.c (find_reloads_address_1): Reload plus at the place of
index register.
2003-04-24 Nathan Sidwell <nathan@codesourcery.com>
New GCOV_TAG_FUNCTION layout
......
......@@ -439,15 +439,17 @@ make_edges (label_value_list, min, max, update_p)
}
/* Find out if we can drop through to the next block. */
insn = next_nonnote_insn (insn);
insn = NEXT_INSN (insn);
while (insn
&& GET_CODE (insn) == NOTE
&& NOTE_LINE_NUMBER (insn) != NOTE_INSN_BASIC_BLOCK)
insn = NEXT_INSN (insn);
if (!insn || (bb->next_bb == EXIT_BLOCK_PTR && force_fallthru))
cached_make_edge (edge_cache, bb, EXIT_BLOCK_PTR, EDGE_FALLTHRU);
else if (bb->next_bb != EXIT_BLOCK_PTR)
{
rtx tmp = bb->next_bb->head;
if (GET_CODE (tmp) == NOTE)
tmp = next_nonnote_insn (tmp);
if (force_fallthru || insn == tmp)
if (force_fallthru || insn == bb->next_bb->head)
cached_make_edge (edge_cache, bb, bb->next_bb, EDGE_FALLTHRU);
}
}
......
......@@ -12125,13 +12125,21 @@ ix86_adjust_cost (insn, link, dep_insn, cost)
if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
&& !ix86_agi_dependant (insn, dep_insn, insn_type))
{
/* Claim moves to take one cycle, as core can issue one load
at time and the next load can start cycle later. */
if (dep_insn_type == TYPE_IMOV
|| dep_insn_type == TYPE_FMOV)
cost = 0;
else if (cost >= 3)
cost -= 3;
enum attr_unit unit = get_attr_unit (insn);
int loadcost = 3;
/* Because of the difference between the length of integer and
floating unit pipeline preparation stages, the memory operands
for floating point are cheaper.
??? For Athlon it the difference is most propbably 2. */
if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
loadcost = 3;
else
loadcost = TARGET_ATHLON ? 2 : 0;
if (cost >= loadcost)
cost -= loadcost;
else
cost = 0;
}
......
......@@ -3990,7 +3990,7 @@
}
[(set_attr "type" "ssecvt,ssecvt,fmov")
(set_attr "athlon_decode" "vector,double,*")
(set_attr "mode" "DF,DF,SF")])
(set_attr "mode" "SF,SF,SF")])
(define_insn "*truncdfsf2_2_nooverlap"
[(set (match_operand:SF 0 "nonimmediate_operand" "=&Y,!m")
......@@ -4037,7 +4037,7 @@
"cvtsd2ss\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt")
(set_attr "athlon_decode" "vector,double")
(set_attr "mode" "DF")])
(set_attr "mode" "SF")])
(define_insn "*truncdfsf2_sse_only_nooverlap"
[(set (match_operand:SF 0 "register_operand" "=&Y")
......@@ -4443,7 +4443,8 @@
}
DONE;
}
[(set_attr "type" "fistp")])
[(set_attr "type" "fistp")
(set_attr "mode" "DI")])
(define_insn "fix_truncdi_nomemory"
[(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r")
......@@ -4455,7 +4456,8 @@
"TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1]))
&& (!SSE_FLOAT_MODE_P (GET_MODE (operands[1])) || !TARGET_64BIT)"
"#"
[(set_attr "type" "fistp")])
[(set_attr "type" "fistp")
(set_attr "mode" "DI")])
(define_insn "fix_truncdi_memory"
[(set (match_operand:DI 0 "memory_operand" "=m")
......@@ -4466,7 +4468,8 @@
"TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1]))
&& (!SSE_FLOAT_MODE_P (GET_MODE (operands[1])) || !TARGET_64BIT)"
"* operands[5] = operands[4]; return output_fix_trunc (insn, operands);"
[(set_attr "type" "fistp")])
[(set_attr "type" "fistp")
(set_attr "mode" "DI")])
(define_split
[(set (match_operand:DI 0 "register_operand" "")
......@@ -4504,6 +4507,7 @@
"TARGET_64BIT && TARGET_SSE"
"cvttss2si{q}\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "mode" "SF")
(set_attr "athlon_decode" "double,vector")])
;; Avoid vector decoded form of the instruction.
......@@ -4522,6 +4526,7 @@
"TARGET_64BIT && TARGET_SSE2"
"cvttsd2si{q}\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt,sseicvt")
(set_attr "mode" "DF")
(set_attr "athlon_decode" "double,vector")])
;; Avoid vector decoded form of the instruction.
......@@ -4605,7 +4610,8 @@
}
DONE;
}
[(set_attr "type" "fistp")])
[(set_attr "type" "fistp")
(set_attr "mode" "SI")])
(define_insn "fix_truncsi_nomemory"
[(set (match_operand:SI 0 "nonimmediate_operand" "=m,?r")
......@@ -4616,7 +4622,8 @@
"TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1]))
&& !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))"
"#"
[(set_attr "type" "fistp")])
[(set_attr "type" "fistp")
(set_attr "mode" "SI")])
(define_insn "fix_truncsi_memory"
[(set (match_operand:SI 0 "memory_operand" "=m")
......@@ -4626,7 +4633,8 @@
"TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1]))
&& !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))"
"* return output_fix_trunc (insn, operands);"
[(set_attr "type" "fistp")])
[(set_attr "type" "fistp")
(set_attr "mode" "SI")])
;; When SSE available, it is always faster to use it!
(define_insn "fix_truncsfsi_sse"
......@@ -4635,6 +4643,7 @@
"TARGET_SSE"
"cvttss2si\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "mode" "DF")
(set_attr "athlon_decode" "double,vector")])
;; Avoid vector decoded form of the instruction.
......@@ -4653,6 +4662,7 @@
"TARGET_SSE2"
"cvttsd2si\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "mode" "DF")
(set_attr "athlon_decode" "double,vector")])
;; Avoid vector decoded form of the instruction.
......@@ -4743,7 +4753,8 @@
}
DONE;
}
[(set_attr "type" "fistp")])
[(set_attr "type" "fistp")
(set_attr "mode" "HI")])
(define_insn "fix_trunchi_nomemory"
[(set (match_operand:HI 0 "nonimmediate_operand" "=m,?r")
......@@ -4754,7 +4765,8 @@
"TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1]))
&& !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))"
"#"
[(set_attr "type" "fistp")])
[(set_attr "type" "fistp")
(set_attr "mode" "HI")])
(define_insn "fix_trunchi_memory"
[(set (match_operand:HI 0 "memory_operand" "=m")
......@@ -4764,7 +4776,8 @@
"TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1]))
&& !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))"
"* return output_fix_trunc (insn, operands);"
[(set_attr "type" "fistp")])
[(set_attr "type" "fistp")
(set_attr "mode" "HI")])
(define_split
[(set (match_operand:HI 0 "memory_operand" "")
......@@ -20262,7 +20275,7 @@
"cvtss2si\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "double,vector")
(set_attr "mode" "SF")])
(set_attr "mode" "SI")])
(define_insn "cvtss2siq"
[(set (match_operand:DI 0 "register_operand" "=r,r")
......@@ -20273,7 +20286,7 @@
"cvtss2siq\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "double,vector")
(set_attr "mode" "SF")])
(set_attr "mode" "DI")])
(define_insn "cvttss2si"
[(set (match_operand:SI 0 "register_operand" "=r,r")
......@@ -21891,22 +21904,24 @@
;; Conversions between SI and DF
(define_insn "cvtsd2si"
[(set (match_operand:SI 0 "register_operand" "=r")
(fix:SI (vec_select:DF (match_operand:V2DF 1 "register_operand" "xm")
[(set (match_operand:SI 0 "register_operand" "=r,r")
(fix:SI (vec_select:DF (match_operand:V2DF 1 "register_operand" "x,m")
(parallel [(const_int 0)]))))]
"TARGET_SSE2"
"cvtsd2si\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "double,vector")
(set_attr "mode" "SI")])
(define_insn "cvtsd2siq"
[(set (match_operand:DI 0 "register_operand" "=r")
(fix:DI (vec_select:DF (match_operand:V2DF 1 "register_operand" "xm")
(fix:DI (vec_select:DF (match_operand:V2DF 1 "register_operand" "x,m")
(parallel [(const_int 0)]))))]
"TARGET_SSE2 && TARGET_64BIT"
"cvtsd2siq\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "mode" "SI")])
(set_attr "athlon_decode" "double,vector")
(set_attr "mode" "DI")])
(define_insn "cvttsd2si"
[(set (match_operand:SI 0 "register_operand" "=r,r")
......
......@@ -4618,6 +4618,18 @@ parameter very large effectively disables garbage collection. Setting
this parameter and @option{ggc-min-expand} to zero causes a full
collection to occur at every opportunity.
@item reorder-blocks-duplicate
@itemx reorder-blocks-duplicate-feedback
Used by basic block reordering pass to decide whether to use uncondtional
branch or duplicate the code on it's destination. Code is duplicated when it's
estimated size is smaller than this value multiplied by the estimated size of
unconditinal jump in the hot spots of the program.
The @option{reorder-block-duplicate-feedback} is used only when profile
feedback is available and may be set to higher values than
@option{reorder-block-duplicate} since information about the hot spots is more
accurate.
@end table
@end table
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment