Commit d3b92f35 by Tejas Joshi Committed by Martin Jambor

i386: Roundeven expansion for SSE4.1+

gcc/ChangeLog:

2019-08-26  Tejas Joshi  <tejasjoshi9673@gmail.com>
            Uros Bizjak  <ubizjak@gmail.com>

	* builtins.c (mathfn_built_in_2): Change CASE_MATHFN to
	CASE_MATHFN_FLOATN for roundeven.
	* config/i386/i386.c (ix86_i387_mode_needed): Add case
	I387_ROUNDEVEN.
	(ix86_mode_needed): Likewise.
	(ix86_mode_after): Likewise.
	(ix86_mode_entry): Likewise.
	(ix86_mode_exit): Likewise.
	(ix86_emit_mode_set): Likewise.
	(emit_i387_cw_initialization): Add case I387_CW_ROUNDEVEN.
	* config/i386/i386.h (ix86_stack_slot): Add SLOT_CW_ROUNDEVEN.
	(ix86_entry): Add I387_ROUNDEVEN.
	(avx_u128_state): Add I387_CW_ANY.
	* config/i386/i386.md: Define UNSPEC_FRNDINT_ROUNDEVEN.
	(define_int_iterator): Likewise.
	(define_int_attr): Likewise for rounding_insn, rounding and ROUNDING.
	(define_constant): Define ROUND_ROUNDEVEN mode.
	(define_attr): Add roundeven mode for i387_cw.
	(<rouding_insn><mode>2): Add condition for ROUND_ROUNDEVEN.
	* internal-fn.def (ROUNDEVEN): New builtin function.
	* optabs.def (roundeven_optab): New optab.

gcc/testsuite/ChangeLog:

2019-08-26  Tejas Joshi  <tejasjoshi9673@gmail.com>

	* gcc.target/i386/sse4_1-round-roundeven-1.c: New test.
	* gcc.target/i386/sse4_1-round-roundeven-2.c: New test.


Co-Authored-By: Uros Bizjak <ubizjak@gmail.com>

From-SVN: r274928
parent 7d7b99f9
2019-08-26 Tejas Joshi <tejasjoshi9673@gmail.com> 2019-08-26 Tejas Joshi <tejasjoshi9673@gmail.com>
Uros Bizjak <ubizjak@gmail.com>
* builtins.c (mathfn_built_in_2): Change CASE_MATHFN to
CASE_MATHFN_FLOATN for roundeven.
* config/i386/i386.c (ix86_i387_mode_needed): Add case
I387_ROUNDEVEN.
(ix86_mode_needed): Likewise.
(ix86_mode_after): Likewise.
(ix86_mode_entry): Likewise.
(ix86_mode_exit): Likewise.
(ix86_emit_mode_set): Likewise.
(emit_i387_cw_initialization): Add case I387_CW_ROUNDEVEN.
* config/i386/i386.h (ix86_stack_slot): Add SLOT_CW_ROUNDEVEN.
(ix86_entry): Add I387_ROUNDEVEN.
(avx_u128_state): Add I387_CW_ANY.
* config/i386/i386.md: Define UNSPEC_FRNDINT_ROUNDEVEN.
(define_int_iterator): Likewise.
(define_int_attr): Likewise for rounding_insn, rounding and ROUNDING.
(define_constant): Define ROUND_ROUNDEVEN mode.
(define_attr): Add roundeven mode for i387_cw.
(<rouding_insn><mode>2): Add condition for ROUND_ROUNDEVEN.
* internal-fn.def (ROUNDEVEN): New builtin function.
* optabs.def (roundeven_optab): New optab.
2019-08-26 Tejas Joshi <tejasjoshi9673@gmail.com>
* builtins.c (mathfn_built_in_2): Added CASE_MATHFN_FLOATN * builtins.c (mathfn_built_in_2): Added CASE_MATHFN_FLOATN
for ROUNDEVEN. for ROUNDEVEN.
......
...@@ -13557,6 +13557,11 @@ ix86_i387_mode_needed (int entity, rtx_insn *insn) ...@@ -13557,6 +13557,11 @@ ix86_i387_mode_needed (int entity, rtx_insn *insn)
switch (entity) switch (entity)
{ {
case I387_ROUNDEVEN:
if (mode == I387_CW_ROUNDEVEN)
return mode;
break;
case I387_TRUNC: case I387_TRUNC:
if (mode == I387_CW_TRUNC) if (mode == I387_CW_TRUNC)
return mode; return mode;
...@@ -13591,6 +13596,7 @@ ix86_mode_needed (int entity, rtx_insn *insn) ...@@ -13591,6 +13596,7 @@ ix86_mode_needed (int entity, rtx_insn *insn)
return ix86_dirflag_mode_needed (insn); return ix86_dirflag_mode_needed (insn);
case AVX_U128: case AVX_U128:
return ix86_avx_u128_mode_needed (insn); return ix86_avx_u128_mode_needed (insn);
case I387_ROUNDEVEN:
case I387_TRUNC: case I387_TRUNC:
case I387_FLOOR: case I387_FLOOR:
case I387_CEIL: case I387_CEIL:
...@@ -13651,6 +13657,7 @@ ix86_mode_after (int entity, int mode, rtx_insn *insn) ...@@ -13651,6 +13657,7 @@ ix86_mode_after (int entity, int mode, rtx_insn *insn)
return mode; return mode;
case AVX_U128: case AVX_U128:
return ix86_avx_u128_mode_after (mode, insn); return ix86_avx_u128_mode_after (mode, insn);
case I387_ROUNDEVEN:
case I387_TRUNC: case I387_TRUNC:
case I387_FLOOR: case I387_FLOOR:
case I387_CEIL: case I387_CEIL:
...@@ -13703,6 +13710,7 @@ ix86_mode_entry (int entity) ...@@ -13703,6 +13710,7 @@ ix86_mode_entry (int entity)
return ix86_dirflag_mode_entry (); return ix86_dirflag_mode_entry ();
case AVX_U128: case AVX_U128:
return ix86_avx_u128_mode_entry (); return ix86_avx_u128_mode_entry ();
case I387_ROUNDEVEN:
case I387_TRUNC: case I387_TRUNC:
case I387_FLOOR: case I387_FLOOR:
case I387_CEIL: case I387_CEIL:
...@@ -13740,6 +13748,7 @@ ix86_mode_exit (int entity) ...@@ -13740,6 +13748,7 @@ ix86_mode_exit (int entity)
return X86_DIRFLAG_ANY; return X86_DIRFLAG_ANY;
case AVX_U128: case AVX_U128:
return ix86_avx_u128_mode_exit (); return ix86_avx_u128_mode_exit ();
case I387_ROUNDEVEN:
case I387_TRUNC: case I387_TRUNC:
case I387_FLOOR: case I387_FLOOR:
case I387_CEIL: case I387_CEIL:
...@@ -13774,6 +13783,12 @@ emit_i387_cw_initialization (int mode) ...@@ -13774,6 +13783,12 @@ emit_i387_cw_initialization (int mode)
switch (mode) switch (mode)
{ {
case I387_CW_ROUNDEVEN:
/* round to nearest */
emit_insn (gen_andhi3 (reg, reg, GEN_INT (0x0c00)));
slot = SLOT_CW_ROUNDEVEN;
break;
case I387_CW_TRUNC: case I387_CW_TRUNC:
/* round toward zero (truncate) */ /* round toward zero (truncate) */
emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00))); emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
...@@ -13820,6 +13835,7 @@ ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED, ...@@ -13820,6 +13835,7 @@ ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
if (mode == AVX_U128_CLEAN) if (mode == AVX_U128_CLEAN)
emit_insn (gen_avx_vzeroupper ()); emit_insn (gen_avx_vzeroupper ());
break; break;
case I387_ROUNDEVEN:
case I387_TRUNC: case I387_TRUNC:
case I387_FLOOR: case I387_FLOOR:
case I387_CEIL: case I387_CEIL:
......
...@@ -2511,6 +2511,7 @@ enum ix86_stack_slot ...@@ -2511,6 +2511,7 @@ enum ix86_stack_slot
{ {
SLOT_TEMP = 0, SLOT_TEMP = 0,
SLOT_CW_STORED, SLOT_CW_STORED,
SLOT_CW_ROUNDEVEN,
SLOT_CW_TRUNC, SLOT_CW_TRUNC,
SLOT_CW_FLOOR, SLOT_CW_FLOOR,
SLOT_CW_CEIL, SLOT_CW_CEIL,
...@@ -2522,6 +2523,7 @@ enum ix86_entity ...@@ -2522,6 +2523,7 @@ enum ix86_entity
{ {
X86_DIRFLAG = 0, X86_DIRFLAG = 0,
AVX_U128, AVX_U128,
I387_ROUNDEVEN,
I387_TRUNC, I387_TRUNC,
I387_FLOOR, I387_FLOOR,
I387_CEIL, I387_CEIL,
...@@ -2557,7 +2559,7 @@ enum avx_u128_state ...@@ -2557,7 +2559,7 @@ enum avx_u128_state
#define NUM_MODES_FOR_MODE_SWITCHING \ #define NUM_MODES_FOR_MODE_SWITCHING \
{ X86_DIRFLAG_ANY, AVX_U128_ANY, \ { X86_DIRFLAG_ANY, AVX_U128_ANY, \
I387_CW_ANY, I387_CW_ANY, I387_CW_ANY } I387_CW_ANY, I387_CW_ANY, I387_CW_ANY, I387_CW_ANY }
/* Avoid renaming of stack registers, as doing so in combination with /* Avoid renaming of stack registers, as doing so in combination with
......
...@@ -141,6 +141,7 @@ ...@@ -141,6 +141,7 @@
UNSPEC_FXAM UNSPEC_FXAM
;; x87 Rounding ;; x87 Rounding
UNSPEC_FRNDINT_ROUNDEVEN
UNSPEC_FRNDINT_FLOOR UNSPEC_FRNDINT_FLOOR
UNSPEC_FRNDINT_CEIL UNSPEC_FRNDINT_CEIL
UNSPEC_FRNDINT_TRUNC UNSPEC_FRNDINT_TRUNC
...@@ -303,7 +304,8 @@ ...@@ -303,7 +304,8 @@
;; Constants to represent rounding modes in the ROUND instruction ;; Constants to represent rounding modes in the ROUND instruction
(define_constants (define_constants
[(ROUND_FLOOR 0x1) [(ROUND_ROUNDEVEN 0x0)
(ROUND_FLOOR 0x1)
(ROUND_CEIL 0x2) (ROUND_CEIL 0x2)
(ROUND_TRUNC 0x3) (ROUND_TRUNC 0x3)
(ROUND_MXCSR 0x4) (ROUND_MXCSR 0x4)
...@@ -779,7 +781,7 @@ ...@@ -779,7 +781,7 @@
;; Defines rounding mode of an FP operation. ;; Defines rounding mode of an FP operation.
(define_attr "i387_cw" "trunc,floor,ceil,uninitialized,any" (define_attr "i387_cw" "roundeven,floor,ceil,trunc,uninitialized,any"
(const_string "any")) (const_string "any"))
;; Define attribute to indicate AVX insns with partial XMM register update. ;; Define attribute to indicate AVX insns with partial XMM register update.
...@@ -16212,7 +16214,8 @@ ...@@ -16212,7 +16214,8 @@
}) })
(define_int_iterator FRNDINT_ROUNDING (define_int_iterator FRNDINT_ROUNDING
[UNSPEC_FRNDINT_FLOOR [UNSPEC_FRNDINT_ROUNDEVEN
UNSPEC_FRNDINT_FLOOR
UNSPEC_FRNDINT_CEIL UNSPEC_FRNDINT_CEIL
UNSPEC_FRNDINT_TRUNC]) UNSPEC_FRNDINT_TRUNC])
...@@ -16222,21 +16225,24 @@ ...@@ -16222,21 +16225,24 @@
;; Base name for define_insn ;; Base name for define_insn
(define_int_attr rounding_insn (define_int_attr rounding_insn
[(UNSPEC_FRNDINT_FLOOR "floor") [(UNSPEC_FRNDINT_ROUNDEVEN "roundeven")
(UNSPEC_FRNDINT_FLOOR "floor")
(UNSPEC_FRNDINT_CEIL "ceil") (UNSPEC_FRNDINT_CEIL "ceil")
(UNSPEC_FRNDINT_TRUNC "btrunc") (UNSPEC_FRNDINT_TRUNC "btrunc")
(UNSPEC_FIST_FLOOR "floor") (UNSPEC_FIST_FLOOR "floor")
(UNSPEC_FIST_CEIL "ceil")]) (UNSPEC_FIST_CEIL "ceil")])
(define_int_attr rounding (define_int_attr rounding
[(UNSPEC_FRNDINT_FLOOR "floor") [(UNSPEC_FRNDINT_ROUNDEVEN "roundeven")
(UNSPEC_FRNDINT_FLOOR "floor")
(UNSPEC_FRNDINT_CEIL "ceil") (UNSPEC_FRNDINT_CEIL "ceil")
(UNSPEC_FRNDINT_TRUNC "trunc") (UNSPEC_FRNDINT_TRUNC "trunc")
(UNSPEC_FIST_FLOOR "floor") (UNSPEC_FIST_FLOOR "floor")
(UNSPEC_FIST_CEIL "ceil")]) (UNSPEC_FIST_CEIL "ceil")])
(define_int_attr ROUNDING (define_int_attr ROUNDING
[(UNSPEC_FRNDINT_FLOOR "FLOOR") [(UNSPEC_FRNDINT_ROUNDEVEN "ROUNDEVEN")
(UNSPEC_FRNDINT_FLOOR "FLOOR")
(UNSPEC_FRNDINT_CEIL "CEIL") (UNSPEC_FRNDINT_CEIL "CEIL")
(UNSPEC_FRNDINT_TRUNC "TRUNC") (UNSPEC_FRNDINT_TRUNC "TRUNC")
(UNSPEC_FIST_FLOOR "FLOOR") (UNSPEC_FIST_FLOOR "FLOOR")
...@@ -16299,8 +16305,9 @@ ...@@ -16299,8 +16305,9 @@
|| TARGET_MIX_SSE_I387) || TARGET_MIX_SSE_I387)
&& (flag_fp_int_builtin_inexact || !flag_trapping_math)) && (flag_fp_int_builtin_inexact || !flag_trapping_math))
|| (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
&& (TARGET_SSE4_1 || flag_fp_int_builtin_inexact && (TARGET_SSE4_1
|| !flag_trapping_math))" || (ROUND_<ROUNDING> != ROUND_ROUNDEVEN
&& (flag_fp_int_builtin_inexact || !flag_trapping_math))))"
{ {
if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
&& (TARGET_SSE4_1 || flag_fp_int_builtin_inexact || !flag_trapping_math)) && (TARGET_SSE4_1 || flag_fp_int_builtin_inexact || !flag_trapping_math))
......
...@@ -238,6 +238,7 @@ DEF_INTERNAL_FLT_FLOATN_FN (FLOOR, ECF_CONST, floor, unary) ...@@ -238,6 +238,7 @@ DEF_INTERNAL_FLT_FLOATN_FN (FLOOR, ECF_CONST, floor, unary)
DEF_INTERNAL_FLT_FLOATN_FN (NEARBYINT, ECF_CONST, nearbyint, unary) DEF_INTERNAL_FLT_FLOATN_FN (NEARBYINT, ECF_CONST, nearbyint, unary)
DEF_INTERNAL_FLT_FLOATN_FN (RINT, ECF_CONST, rint, unary) DEF_INTERNAL_FLT_FLOATN_FN (RINT, ECF_CONST, rint, unary)
DEF_INTERNAL_FLT_FLOATN_FN (ROUND, ECF_CONST, round, unary) DEF_INTERNAL_FLT_FLOATN_FN (ROUND, ECF_CONST, round, unary)
DEF_INTERNAL_FLT_FLOATN_FN (ROUNDEVEN, ECF_CONST, roundeven, unary)
DEF_INTERNAL_FLT_FLOATN_FN (TRUNC, ECF_CONST, btrunc, unary) DEF_INTERNAL_FLT_FLOATN_FN (TRUNC, ECF_CONST, btrunc, unary)
/* Binary math functions. */ /* Binary math functions. */
......
...@@ -271,6 +271,7 @@ OPTAB_D (fnms_optab, "fnms$a4") ...@@ -271,6 +271,7 @@ OPTAB_D (fnms_optab, "fnms$a4")
OPTAB_D (rint_optab, "rint$a2") OPTAB_D (rint_optab, "rint$a2")
OPTAB_D (round_optab, "round$a2") OPTAB_D (round_optab, "round$a2")
OPTAB_D (roundeven_optab, "roundeven$a2")
OPTAB_D (floor_optab, "floor$a2") OPTAB_D (floor_optab, "floor$a2")
OPTAB_D (ceil_optab, "ceil$a2") OPTAB_D (ceil_optab, "ceil$a2")
OPTAB_D (btrunc_optab, "btrunc$a2") OPTAB_D (btrunc_optab, "btrunc$a2")
......
...@@ -1817,6 +1817,7 @@ subst_stack_regs_pat (rtx_insn *insn, stack_ptr regstack, rtx pat) ...@@ -1817,6 +1817,7 @@ subst_stack_regs_pat (rtx_insn *insn, stack_ptr regstack, rtx pat)
case UNSPEC_FRNDINT: case UNSPEC_FRNDINT:
case UNSPEC_F2XM1: case UNSPEC_F2XM1:
case UNSPEC_FRNDINT_ROUNDEVEN:
case UNSPEC_FRNDINT_FLOOR: case UNSPEC_FRNDINT_FLOOR:
case UNSPEC_FRNDINT_CEIL: case UNSPEC_FRNDINT_CEIL:
case UNSPEC_FRNDINT_TRUNC: case UNSPEC_FRNDINT_TRUNC:
......
2019-08-26 Tejas Joshi <tejasjoshi9673@gmail.com> 2019-08-26 Tejas Joshi <tejasjoshi9673@gmail.com>
* gcc.target/i386/sse4_1-round-roundeven-1.c: New test.
* gcc.target/i386/sse4_1-round-roundeven-2.c: New test.
2019-08-26 Tejas Joshi <tejasjoshi9673@gmail.com>
* gcc.dg/torture/builtin-round-roundeven.c: New test. * gcc.dg/torture/builtin-round-roundeven.c: New test.
* gcc.dg/torture/builtin-round-roundevenf128.c: Likewise. * gcc.dg/torture/builtin-round-roundevenf128.c: Likewise.
......
/* { dg-do compile } */
/* { dg-options "-O2 -msse4.1" } */
__attribute__((noinline, noclone)) double
f1 (double x)
{
return __builtin_roundeven (x);
}
__attribute__((noinline, noclone)) float
f2 (float x)
{
return __builtin_roundevenf (x);
}
/* { dg-final { scan-assembler-times "roundsd\[^\n\r\]*xmm" 1 } } */
/* { dg-final { scan-assembler-times "roundss\[^\n\r\]*xmm" 1 } } */
/* { dg-do run } */
/* { dg-require-effective-target sse4 } */
/* { dg-options "-O2 -msse4.1" } */
#include "sse4_1-check.h"
#include "sse4_1-round-roundeven-1.c"
static void
sse4_1_test (void)
{
if (f1 (0.5) != 0.0 || f1 (1.5) != 2.0 || f1 (-0.5) != 0.0 || f1 (-1.5) != -2.0)
abort ();
if (f2 (0.5f) != 0.0f || f2 (1.5f) != 2.0f || f2 (-0.5f) != 0.0f || f2 (-1.5f) != -2.0f)
abort ();
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment