Commit b38ba463 by Zack Weinberg

ia64.md (UNSPEC_SETF_EXP,UNSPEC_FR_SQRT_RECIP_APPROX): New constants.

	* ia64.md (UNSPEC_SETF_EXP,UNSPEC_FR_SQRT_RECIP_APPROX): New constants.
	(*sqrt_approx): New instruction pattern for approximate square roots.
	(*setf_exp_xf): New instruction pattern for exponentiation.
	(*maddxf4_alts_truncsf): New instruction pattern for truncation.
	(sqrtsf2_internal_thr): New define_and_split implementing
	throughput-optimized inline calculation of SFmode square root.
	(sqrtdf2_internal_thr): Likewise for DFmode.
	(sqrtxf2_internal_thr): Likewise for XFmode.
	(sqrtsf2, sqrtdf2, sqrtxf2): New expanders to choose between
	latency- and throughput-optimized square root algorithms.
	* ia64.h (MASK_INLINE_SQRT_LAT, MASK_INLINE_SQRT_THR,
	TARGET_INLINE_SQRT_LAT, TARGET_INLINE_SQRT_THR, TARGET_INLINE_SQRT):
	New macros.
	(TARGET_SWITCHES): Add -minline-sqrt-min-latency and
	-minline-sqrt-max-throughput.
	* ia64.c (ia64_override_options): If both -minline-sqrt-min-latency
	and -minline-sqrt-max-throughput are given, notify the user
	that both options cannot be used simultaneously.
	If -minline-sqrt-min-latency is given, notify the user that
	this mode is not yet implemented.
	(rtx_needs_barrier): Reformat initial comment to obey
	72-character width limit.  Support UNSPEC_SETF_EXP and
	UNSPEC_FR_SQRT_RECIP_APPROX.

From-SVN: r73027
parent 1e8fee4a
2003-10-28 Zack Weinberg <zack@codesourcery.com>
* ia64.md (UNSPEC_SETF_EXP,UNSPEC_FR_SQRT_RECIP_APPROX): New constants.
(*sqrt_approx): New instruction pattern for approximate square roots.
(*setf_exp_xf): New instruction pattern for exponentiation.
(*maddxf4_alts_truncsf): New instruction pattern for truncation.
(sqrtsf2_internal_thr): New define_and_split implementing
throughput-optimized inline calculation of SFmode square root.
(sqrtdf2_internal_thr): Likewise for DFmode.
(sqrtxf2_internal_thr): Likewise for XFmode.
(sqrtsf2, sqrtdf2, sqrtxf2): New expanders to choose between
latency- and throughput-optimized square root algorithms.
* ia64.h (MASK_INLINE_SQRT_LAT, MASK_INLINE_SQRT_THR,
TARGET_INLINE_SQRT_LAT, TARGET_INLINE_SQRT_THR, TARGET_INLINE_SQRT):
New macros.
(TARGET_SWITCHES): Add -minline-sqrt-min-latency and
-minline-sqrt-max-throughput.
* ia64.c (ia64_override_options): If both -minline-sqrt-min-latency
and -minline-sqrt-max-throughput are given, notify the user
that both options cannot be used simultaneously.
If -minline-sqrt-min-latency is given, notify the user that
this mode is not yet implemented.
(rtx_needs_barrier): Reformat initial comment to obey
72-character width limit. Support UNSPEC_SETF_EXP and
UNSPEC_FR_SQRT_RECIP_APPROX.
2003-10-29 Alan Modra <amodra@bigpond.net.au> 2003-10-29 Alan Modra <amodra@bigpond.net.au>
* config/rs6000/rs6000.md (movdf_softfloat64): Allow dummy ctr,ctr * config/rs6000/rs6000.md (movdf_softfloat64): Allow dummy ctr,ctr
......
...@@ -4487,6 +4487,18 @@ ia64_override_options (void) ...@@ -4487,6 +4487,18 @@ ia64_override_options (void)
target_flags &= ~MASK_INLINE_INT_DIV_THR; target_flags &= ~MASK_INLINE_INT_DIV_THR;
} }
if (TARGET_INLINE_SQRT_LAT && TARGET_INLINE_SQRT_THR)
{
warning ("cannot optimize square root for both latency and throughput");
target_flags &= ~MASK_INLINE_SQRT_THR;
}
if (TARGET_INLINE_SQRT_LAT)
{
warning ("not yet implemented: latency-optimized inline square root");
target_flags &= ~MASK_INLINE_SQRT_LAT;
}
if (ia64_fixed_range_string) if (ia64_fixed_range_string)
fix_range (ia64_fixed_range_string); fix_range (ia64_fixed_range_string);
...@@ -4896,9 +4908,9 @@ set_src_needs_barrier (rtx x, struct reg_flags flags, int pred, rtx cond) ...@@ -4896,9 +4908,9 @@ set_src_needs_barrier (rtx x, struct reg_flags flags, int pred, rtx cond)
return need_barrier; return need_barrier;
} }
/* Handle an access to rtx X of type FLAGS using predicate register PRED. /* Handle an access to rtx X of type FLAGS using predicate register
Return 1 is this access creates a dependency with an earlier instruction PRED. Return 1 if this access creates a dependency with an earlier
in the same group. */ instruction in the same group. */
static int static int
rtx_needs_barrier (rtx x, struct reg_flags flags, int pred) rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
...@@ -5124,7 +5136,9 @@ rtx_needs_barrier (rtx x, struct reg_flags flags, int pred) ...@@ -5124,7 +5136,9 @@ rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
case UNSPEC_FR_SPILL: case UNSPEC_FR_SPILL:
case UNSPEC_FR_RESTORE: case UNSPEC_FR_RESTORE:
case UNSPEC_GETF_EXP: case UNSPEC_GETF_EXP:
case UNSPEC_SETF_EXP:
case UNSPEC_ADDP4: case UNSPEC_ADDP4:
case UNSPEC_FR_SQRT_RECIP_APPROX:
need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred); need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
break; break;
......
...@@ -87,6 +87,10 @@ extern int target_flags; ...@@ -87,6 +87,10 @@ extern int target_flags;
#define MASK_INLINE_INT_DIV_THR 0x00001000 /* inline div, max throughput. */ #define MASK_INLINE_INT_DIV_THR 0x00001000 /* inline div, max throughput. */
#define MASK_INLINE_SQRT_LAT 0x00002000 /* inline sqrt, min latency. */
#define MASK_INLINE_SQRT_THR 0x00004000 /* inline sqrt, max throughput. */
#define MASK_DWARF2_ASM 0x40000000 /* test dwarf2 line info via gas. */ #define MASK_DWARF2_ASM 0x40000000 /* test dwarf2 line info via gas. */
#define MASK_EARLY_STOP_BITS 0x00002000 /* tune stop bits for the model. */ #define MASK_EARLY_STOP_BITS 0x00002000 /* tune stop bits for the model. */
...@@ -127,6 +131,13 @@ extern int target_flags; ...@@ -127,6 +131,13 @@ extern int target_flags;
#define TARGET_INLINE_INT_DIV \ #define TARGET_INLINE_INT_DIV \
(target_flags & (MASK_INLINE_INT_DIV_LAT | MASK_INLINE_INT_DIV_THR)) (target_flags & (MASK_INLINE_INT_DIV_LAT | MASK_INLINE_INT_DIV_THR))
#define TARGET_INLINE_SQRT_LAT (target_flags & MASK_INLINE_SQRT_LAT)
#define TARGET_INLINE_SQRT_THR (target_flags & MASK_INLINE_SQRT_THR)
#define TARGET_INLINE_SQRT \
(target_flags & (MASK_INLINE_SQRT_LAT | MASK_INLINE_SQRT_THR))
#define TARGET_DWARF2_ASM (target_flags & MASK_DWARF2_ASM) #define TARGET_DWARF2_ASM (target_flags & MASK_DWARF2_ASM)
extern int ia64_tls_size; extern int ia64_tls_size;
...@@ -186,6 +197,10 @@ extern int ia64_tls_size; ...@@ -186,6 +197,10 @@ extern int ia64_tls_size;
N_("Generate inline integer division, optimize for latency") }, \ N_("Generate inline integer division, optimize for latency") }, \
{ "inline-int-divide-max-throughput", MASK_INLINE_INT_DIV_THR, \ { "inline-int-divide-max-throughput", MASK_INLINE_INT_DIV_THR, \
N_("Generate inline integer division, optimize for throughput") },\ N_("Generate inline integer division, optimize for throughput") },\
{ "inline-sqrt-min-latency", MASK_INLINE_SQRT_LAT, \
N_("Generate inline square root, optimize for latency") }, \
{ "inline-sqrt-max-throughput", MASK_INLINE_SQRT_THR, \
N_("Generate inline square root, optimize for throughput") }, \
{ "dwarf2-asm", MASK_DWARF2_ASM, \ { "dwarf2-asm", MASK_DWARF2_ASM, \
N_("Enable Dwarf 2 line debug info via GNU as")}, \ N_("Enable Dwarf 2 line debug info via GNU as")}, \
{ "no-dwarf2-asm", -MASK_DWARF2_ASM, \ { "no-dwarf2-asm", -MASK_DWARF2_ASM, \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment