Commit 159b4b2f by Oleg Endo

re PR target/53511 (SH Target: Add support for fma patterns)

	PR target/53511
	* config/sh/sh.md (fmasf4): New expander.
	(*macsf3): Rename to fmasf4_i.  Adapt to fma pattern.
	(mac_media): Rename to fmasf4_media.  Adapt to fma pattern.
	* config/sh/sh.opt (mfused-madd): Remove.
	* config/sh/sh.c (sh_option_override): Remove mfused-madd handling.
	(builtin_description bdesc): Remove __builtin_sh_media_FMAC_S.
	* config.gcc (sh[123456789lbe]*-*-* | sh-*-*): Add fused-madd.opt
	as extra options.
	* doc/invoke.texi (SH Options): Update mfused-madd and mno-fused-madd
	descriptions.

	PR target/53511
	* gcc.target/sh/pr53511-1.c: New.

From-SVN: r188396
parent bf27c43e
2012-06-11 Oleg Endo <olegendo@gcc.gnu.org>
PR target/53511
* config/sh/sh.md (fmasf4): New expander.
(*macsf3): Rename to fmasf4_i. Adapt to fma pattern.
(mac_media): Rename to fmasf4_media. Adapt to fma pattern.
* config/sh/sh.opt (mfused-madd): Remove.
* config/sh/sh.c (sh_option_override): Remove mfused-madd handling.
(builtin_description bdesc): Remove __builtin_sh_media_FMAC_S.
* config.gcc (sh[123456789lbe]*-*-* | sh-*-*): Add fused-madd.opt
as extra options.
* doc/invoke.texi (SH Options): Update mfused-madd and mno-fused-madd
descriptions.
2012-06-11 Richard Henderson <rth@redhat.com> 2012-06-11 Richard Henderson <rth@redhat.com>
* dwarf2cfi.c (scan_trace): Handle annulled branch-taken delay slots. * dwarf2cfi.c (scan_trace): Handle annulled branch-taken delay slots.
......
...@@ -449,6 +449,7 @@ s390*-*-*) ...@@ -449,6 +449,7 @@ s390*-*-*)
sh[123456789lbe]*-*-* | sh-*-*) sh[123456789lbe]*-*-* | sh-*-*)
cpu_type=sh cpu_type=sh
need_64bit_hwint=yes need_64bit_hwint=yes
extra_options="${extra_options} fused-madd.opt"
;; ;;
v850*-*-*) v850*-*-*)
cpu_type=v850 cpu_type=v850
......
...@@ -878,13 +878,6 @@ sh_option_override (void) ...@@ -878,13 +878,6 @@ sh_option_override (void)
if (flag_unsafe_math_optimizations) if (flag_unsafe_math_optimizations)
{ {
/* Enable fmac insn for "a * b + c" SFmode calculations when -ffast-math
is enabled and -mno-fused-madd is not specified by the user.
The fmac insn can't be enabled by default due to the implied
FMA semantics. See also PR target/29100. */
if (global_options_set.x_TARGET_FMAC == 0)
TARGET_FMAC = 1;
/* Enable fsca insn for SH4A if not otherwise specified by the user. */ /* Enable fsca insn for SH4A if not otherwise specified by the user. */
if (global_options_set.x_TARGET_FSCA == 0 && TARGET_SH4A_FP) if (global_options_set.x_TARGET_FSCA == 0 && TARGET_SH4A_FP)
TARGET_FSCA = 1; TARGET_FSCA = 1;
...@@ -11231,7 +11224,6 @@ static struct builtin_description bdesc[] = ...@@ -11231,7 +11224,6 @@ static struct builtin_description bdesc[] =
{ CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF, 0 }, { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF, 0 },
{ CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3, 0 }, { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3, 0 },
{ CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3, 0 }, { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3, 0 },
{ CODE_FOR_mac_media, "__builtin_sh_media_FMAC_S", SH_BLTIN_3, 0 },
{ CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2, 0 }, { CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2, 0 },
{ CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2, 0 }, { CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2, 0 },
{ CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2, 0 }, { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2, 0 },
......
...@@ -10329,6 +10329,9 @@ label: ...@@ -10329,6 +10329,9 @@ label:
"fmul.s %1, %2, %0" "fmul.s %1, %2, %0"
[(set_attr "type" "fparith_media")]) [(set_attr "type" "fparith_media")])
;; FIXME: These fmac combine pass assisting specifics are obsolete since
;; we now use the FMA patterns, which do not depend on the combine
;; pass anymore.
;; Unfortunately, the combiner is unable to cope with the USE of the FPSCR ;; Unfortunately, the combiner is unable to cope with the USE of the FPSCR
;; register in feeding fp instructions. Thus, in order to generate fmac, ;; register in feeding fp instructions. Thus, in order to generate fmac,
;; we start out with a mulsf pattern that does not depend on fpscr. ;; we start out with a mulsf pattern that does not depend on fpscr.
...@@ -10359,26 +10362,42 @@ label: ...@@ -10359,26 +10362,42 @@ label:
[(set_attr "type" "fp") [(set_attr "type" "fp")
(set_attr "fp_mode" "single")]) (set_attr "fp_mode" "single")])
(define_insn "mac_media" ;; FMA (fused multiply-add) patterns
[(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") (define_expand "fmasf4"
(plus:SF (mult:SF (match_operand:SF 1 "fp_arith_reg_operand" "%f") [(set (match_operand:SF 0 "fp_arith_reg_operand" "")
(match_operand:SF 2 "fp_arith_reg_operand" "f")) (fma:SF (match_operand:SF 1 "fp_arith_reg_operand" "")
(match_operand:SF 3 "fp_arith_reg_operand" "0")))] (match_operand:SF 2 "fp_arith_reg_operand" "")
"TARGET_SHMEDIA_FPU && TARGET_FMAC" (match_operand:SF 3 "fp_arith_reg_operand" "")))]
"fmac.s %1, %2, %0" "TARGET_SH2E || TARGET_SHMEDIA_FPU"
[(set_attr "type" "fparith_media")]) {
if (TARGET_SH2E)
{
emit_sf_insn (gen_fmasf4_i (operands[0], operands[1], operands[2],
operands[3], get_fpscr_rtx ()));
DONE;
}
})
(define_insn "*macsf3" (define_insn "fmasf4_i"
[(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
(plus:SF (mult:SF (match_operand:SF 1 "fp_arith_reg_operand" "%w") (fma:SF (match_operand:SF 1 "fp_arith_reg_operand" "w")
(match_operand:SF 2 "fp_arith_reg_operand" "f")) (match_operand:SF 2 "fp_arith_reg_operand" "f")
(match_operand:SF 3 "arith_reg_operand" "0"))) (match_operand:SF 3 "fp_arith_reg_operand" "0")))
(use (match_operand:PSI 4 "fpscr_operand" "c"))] (use (match_operand:PSI 4 "fpscr_operand" "c"))]
"TARGET_SH2E && TARGET_FMAC" "TARGET_SH2E"
"fmac fr0,%2,%0" "fmac %1,%2,%0"
[(set_attr "type" "fp") [(set_attr "type" "fp")
(set_attr "fp_mode" "single")]) (set_attr "fp_mode" "single")])
(define_insn "fmasf4_media"
[(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
(fma:SF (match_operand:SF 1 "fp_arith_reg_operand" "f")
(match_operand:SF 2 "fp_arith_reg_operand" "f")
(match_operand:SF 3 "fp_arith_reg_operand" "0")))]
"TARGET_SHMEDIA_FPU"
"fmac.s %1, %2, %0"
[(set_attr "type" "fparith_media")])
(define_expand "divsf3" (define_expand "divsf3"
[(set (match_operand:SF 0 "arith_reg_operand" "") [(set (match_operand:SF 0 "arith_reg_operand" "")
(div:SF (match_operand:SF 1 "arith_reg_operand" "") (div:SF (match_operand:SF 1 "arith_reg_operand" "")
......
...@@ -257,10 +257,6 @@ mfixed-range= ...@@ -257,10 +257,6 @@ mfixed-range=
Target RejectNegative Joined Var(sh_fixed_range_str) Target RejectNegative Joined Var(sh_fixed_range_str)
Specify range of registers to make fixed Specify range of registers to make fixed
mfused-madd
Target Var(TARGET_FMAC)
Enable the use of the fused floating point multiply-accumulate operation
mgettrcost= mgettrcost=
Target RejectNegative Joined UInteger Var(sh_gettrcost) Init(-1) Target RejectNegative Joined UInteger Var(sh_gettrcost) Init(-1)
Cost to assume for gettr insn Cost to assume for gettr insn
......
...@@ -18295,14 +18295,12 @@ is in effect. ...@@ -18295,14 +18295,12 @@ is in effect.
@itemx -mno-fused-madd @itemx -mno-fused-madd
@opindex mfused-madd @opindex mfused-madd
@opindex mno-fused-madd @opindex mno-fused-madd
If the processor type supports it, setting @code{-mfused-madd} will allow the Generate code that uses (does not use) the floating-point multiply and
usage of the @code{fmac} instruction (floating-point multiply-accumulate) for accumulate instructions. These instructions are generated by default
regular calculations. Enabling this option might generate faster code but also if hardware floating point is used. The machine-dependent
produce different numeric floating-point results compared to strict IEEE 754 @option{-mfused-madd} option is now mapped to the machine-independent
arithmetic. @code{-mfused-madd} is enabled by default by option @option{-ffp-contract=fast} option, and @option{-mno-fused-madd} is
@option{-funsafe-math-optimizations}. Setting @code{-mno-fused-madd} will mapped to @option{-ffp-contract=off}.
disallow the usage of the @code{fmac} instruction for regular calculations
even if @option{-funsafe-math-optimizations} is in effect.
@item -mfsca @item -mfsca
@itemx -mno-fsca @itemx -mno-fsca
......
2012-06-11 Oleg Endo <olegendo@gcc.gnu.org>
PR target/53511
* gcc.target/sh/pr53511-1.c: New.
2012-06-11 Richard Guenther <rguenther@suse.de> 2012-06-11 Richard Guenther <rguenther@suse.de>
PR middle-end/53470 PR middle-end/53470
......
/* Verify that the fmac insn is used for the standard fmaf function. */
/* { dg-do compile { target "sh*-*-*" } } */
/* { dg-options "-O1" } */
/* { dg-skip-if "" { "sh*-*-*" } { "-m1" "-m2*" "-m4al" "*nofpu" "-m4-340*" "-m4-400*" "-m4-500*" "-m5*" } { "" } } */
/* { dg-final { scan-assembler "fmac" } } */
#include <math.h>
float
test_func_00 (float a, float b, float c)
{
return fmaf (a, b, c);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment