i386.c (ix86_split_to_parts): Return number of part required; handle TFmodes.

* i386.c (ix86_split_to_parts): Return number of part required; handle TFmodes. (print_operand, ix86_expand_branch, ix86_expand_fp_movcc): Handle TFmodes. (ix86_split_long_move): Use number of part returned by ix86_split_to_parts * i386.h (MASK_128BIT_LONG_DOUBLE, TARGET_128BIT_LONG_DOUBLE): New macros. (TARGET_SWITCHES): Add 128bit-long-double and 96bit-long-double (LONG_DOUBLE_TYPE_SIZE): Change from constant. (MAX_LONG_DOUBLE_TYPE_SIZE): New macro. (INTEL_EXTENDED_IEEE_FORMAT): Likewise. (ALIGN_MODE_128): Add TFmode. (IS_STACK_MODE): Likewise. (HARD_REGNO_NREGS): TFmode needs 3 registers. (HARD_REGNO_OK): Support TFmodes. (ASM_OUTPUT_LONG_DOUBLE): Handle TFmodes. * i386.md (scheduler definitions): Use memory operand to determine fst/fld instructions; use mode attribute to determine real mode of the instruction. (*tf): New patterns, expanders and splitters; based on XFmode patterns. * invoke.texi (128bit-long-double, 96bit-long-double): Document. From-SVN: r38633

i386.c (ix86_split_to_parts): Return number of part required; handle TFmodes.
* i386.c (ix86_split_to_parts): Return number of part required; handle TFmodes. (print_operand, ix86_expand_branch, ix86_expand_fp_movcc): Handle TFmodes. (ix86_split_long_move): Use number of part returned by ix86_split_to_parts * i386.h (MASK_128BIT_LONG_DOUBLE, TARGET_128BIT_LONG_DOUBLE): New macros. (TARGET_SWITCHES): Add 128bit-long-double and 96bit-long-double (LONG_DOUBLE_TYPE_SIZE): Change from constant. (MAX_LONG_DOUBLE_TYPE_SIZE): New macro. (INTEL_EXTENDED_IEEE_FORMAT): Likewise. (ALIGN_MODE_128): Add TFmode. (IS_STACK_MODE): Likewise. (HARD_REGNO_NREGS): TFmode needs 3 registers. (HARD_REGNO_OK): Support TFmodes. (ASM_OUTPUT_LONG_DOUBLE): Handle TFmodes. * i386.md (scheduler definitions): Use memory operand to determine fst/fld instructions; use mode attribute to determine real mode of the instruction. (*tf): New patterns, expanders and splitters; based on XFmode patterns. * invoke.texi (128bit-long-double, 96bit-long-double): Document. From-SVN: r38633
2b589241 · Jan Hubicka · Jan Hubicka · 9e1458e7 · 2b589241 · 2b589241
Commit 2b589241 authored Jan 02, 2001 by Jan Hubicka Committed by Jan Hubicka Jan 02, 2001
Expand all Hide whitespace changes
Inline Side-by-side

Showing with 100 additions and 26 deletions

gcc/ChangeLog
+25 -0

gcc/config/i386/i386.c
+22 -12

gcc/config/i386/i386.h
+35 -10

gcc/config/i386/i386.md
+0 -0

gcc/invoke.texi
+18 -4

No files found.
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
+Tue Jan  2 20:21:31 MET 2001  Jan Hubicka  <jh@suse.cz>
+	* i386.c (ix86_split_to_parts): Return number of part required;
+	handle TFmodes.
+	(print_operand, ix86_expand_branch, ix86_expand_fp_movcc): Handle
+	TFmodes.
+	(ix86_split_long_move): Use number of part returned
+	by ix86_split_to_parts
+	* i386.h (MASK_128BIT_LONG_DOUBLE, TARGET_128BIT_LONG_DOUBLE):
+	New macros.
+	(TARGET_SWITCHES): Add 128bit-long-double and 96bit-long-double
+	(LONG_DOUBLE_TYPE_SIZE): Change from constant.
+	(MAX_LONG_DOUBLE_TYPE_SIZE): New macro.
+	(INTEL_EXTENDED_IEEE_FORMAT): Likewise.
+	(ALIGN_MODE_128): Add TFmode.
+	(IS_STACK_MODE): Likewise.
+	(HARD_REGNO_NREGS): TFmode needs 3 registers.
+	(HARD_REGNO_OK): Support TFmodes.
+	(ASM_OUTPUT_LONG_DOUBLE): Handle TFmodes.
+	* i386.md (scheduler definitions): Use memory operand to determine
+	fst/fld instructions; use mode attribute to determine real mode of
+	the instruction.
+	(*tf): New patterns, expanders and splitters; based on XFmode patterns.
+	* invoke.texi (128bit-long-double, 96bit-long-double): Document.
 2001-01-02  Mark Mitchell  <mark@codesourcery.com>
 	* tree.def (TRUTH_NOT_EXPR): Improve documentation.

--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -404,7 +404,7 @@ static rtx * ix86_pent_find_pair PARAMS ((rtx *, rtx *, enum attr_pent_pair,
 					 rtx));
 static void ix86_init_machine_status PARAMS ((struct function *));
 static void ix86_mark_machine_status PARAMS ((struct function *));
-static void ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
+static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
 static int ix86_safe_length_prefix PARAMS ((rtx));
 static HOST_WIDE_INT ix86_compute_frame_size PARAMS((HOST_WIDE_INT,
 						     int *, int *, int *));
@@ -3337,6 +3337,7 @@ print_operand (file, x, code)
 	      return;
 	    case 12:
+	    case 16:
 	      putc ('t', file);
 	      return;
@@ -3466,7 +3467,8 @@ print_operand (file, x, code)
      fprintf (file, "%s", dstr);
    }
-  else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
+  else if (GET_CODE (x) == CONST_DOUBLE
+	   && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
    {
      REAL_VALUE_TYPE r;
      char dstr[30];
@@ -4769,6 +4771,7 @@ ix86_prepare_fp_compare_args (code, pop0, pop1)
  if (fpcmp_mode == CCFPUmode
      || op_mode == XFmode
+      || op_mode == TFmode
      || ix86_use_fcomi_compare (code))
    {
      op0 = force_reg (op_mode, op0);
@@ -5048,6 +5051,7 @@ ix86_expand_branch (code, label)
    case SFmode:
    case DFmode:
    case XFmode:
+    case TFmode:
      /* Don't expand the comparison early, so that we get better code
 	 when jump or whoever decides to reverse the comparison.  */
      {
@@ -5633,13 +5637,13 @@ ix86_expand_fp_movcc (operands)
   For pushes, it returns just stack offsets; the values will be saved
   in the right order.  Maximally three parts are generated.  */
-static void
+static int
 ix86_split_to_parts (operand, parts, mode)
     rtx operand;
     rtx *parts;
     enum machine_mode mode;
 {
-  int size = GET_MODE_SIZE (mode) / 4;
+  int size = mode == TFmode ? 3 : GET_MODE_SIZE (mode) / 4;
  if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
    abort ();
@@ -5689,12 +5693,13 @@ ix86_split_to_parts (operand, parts, mode)
 	  else if (GET_CODE (operand) == CONST_DOUBLE)
 	    {
 	      REAL_VALUE_TYPE r;
-	      long l[3];
+	      long l[4];
 	      REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
 	      switch (mode)
 		{
 		case XFmode:
+		case TFmode:
 		  REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
 		  parts[2] = GEN_INT (l[2]);
 		  break;
@@ -5712,7 +5717,7 @@ ix86_split_to_parts (operand, parts, mode)
 	}
    }
-  return;
+  return size;
 }
 /* Emit insns to perform a move or push of DI, DF, and XF values.
@@ -5726,7 +5731,7 @@ ix86_split_long_move (operands1)
 {
  rtx part[2][3];
  rtx operands[2];
-  int size = GET_MODE_SIZE (GET_MODE (operands1[0])) / 4;
+  int size;
  int push = 0;
  int collisions = 0;
@@ -5734,9 +5739,6 @@ ix86_split_long_move (operands1)
  operands[0] = copy_rtx (operands1[0]);
  operands[1] = copy_rtx (operands1[1]);
-  if (size < 2 || size > 3)
-    abort ();
  /* The only non-offsettable memory we handle is push.  */
  if (push_operand (operands[0], VOIDmode))
    push = 1;
@@ -5744,7 +5746,7 @@ ix86_split_long_move (operands1)
 	   && ! offsettable_memref_p (operands[0]))
    abort ();
-  ix86_split_to_parts (operands[0], part[0], GET_MODE (operands1[0]));
+  size = ix86_split_to_parts (operands[0], part[0], GET_MODE (operands1[0]));
  ix86_split_to_parts (operands[1], part[1], GET_MODE (operands1[0]));
  /* When emitting push, take care for source operands on the stack.  */
@@ -5794,7 +5796,15 @@ ix86_split_long_move (operands1)
  if (push)
    {
      if (size == 3)
-	emit_insn (gen_push (part[1][2]));
+	{
+	  /* We use only first 12 bytes of TFmode value, but for pushing we
+	     are required to adjust stack as if we were pushing real 16byte
+	     value.  */
+	  if (GET_MODE (operands1[0]) == TFmode)
+	    emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
+				   GEN_INT (-4)));
+	  emit_insn (gen_push (part[1][2]));
+	}
      emit_insn (gen_push (part[1][1]));
      emit_insn (gen_push (part[1][0]));
      return 1;

--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -104,6 +104,7 @@ extern int target_flags;
 #define MASK_ACCUMULATE_OUTGOING_ARGS 0x00008000/* Accumulate outgoing args */
 #define MASK_MMX		0x00010000	/* Support MMX regs/builtins */
 #define MASK_SSE		0x00020000	/* Support SSE regs/builtins */
+#define MASK_128BIT_LONG_DOUBLE 0x00040000	/* long double size is 128bit */
 /* Temporary codegen switches */
 #define MASK_INTEL_SYNTAX	0x00000200
@@ -144,6 +145,11 @@ extern int target_flags;
   the 387 to be used, which is compatible with most calling conventions. */
 #define TARGET_FLOAT_RETURNS_IN_80387 (target_flags & MASK_FLOAT_RETURNS)
+/* Long double is 128bit instead of 96bit, even when only 80bits are used.
+   This mode wastes cache, but avoid missaligned data accesses and simplifies
+   address calculations.  */
+#define TARGET_128BIT_LONG_DOUBLE (target_flags & MASK_128BIT_LONG_DOUBLE)
 /* Disable generation of FP sin, cos and sqrt operations for 387.
   This is because FreeBSD lacks these in the math-emulator-code */
 #define TARGET_NO_FANCY_MATH_387 (target_flags & MASK_NO_FANCY_MATH_387)
@@ -295,6 +301,10 @@ extern const int x86_partial_reg_dependency, x86_memory_mismatch_stall;
    N_("Support MMX and SSE builtins") },				      \
  { "no-sse",			-MASK_SSE,				      \
    N_("Do not support MMX and SSE builtins") },			      \
+  { "128bit-long-double",	 MASK_128BIT_LONG_DOUBLE,		      \
+    N_("sizeof(long double) is 16.") },					      \
+  { "96bit-long-double",	-MASK_128BIT_LONG_DOUBLE,		      \
+    N_("sizeof(long double) is 12.") },					      \
  SUBTARGET_SWITCHES							      \
  { "", TARGET_DEFAULT, 0 }}
@@ -446,9 +456,18 @@ extern int ix86_arch;
 /* target machine storage layout */
-/* Define for XFmode extended real floating point support.
+/* Define for XFmode or TFmode extended real floating point support.
-   This will automatically cause REAL_ARITHMETIC to be defined.  */
+   This will automatically cause REAL_ARITHMETIC to be defined.
-#define LONG_DOUBLE_TYPE_SIZE 96
+   The XFmode is specified by i386 ABI, while TFmode may be faster
+   due to alignment and simplifications in the address calculations.
+ */
+#define LONG_DOUBLE_TYPE_SIZE (TARGET_128BIT_LONG_DOUBLE ? 128 : 96)
+#define MAX_LONG_DOUBLE_TYPE_SIZE 128
+/* Tell real.c that this is the 80-bit Intel extended float format
+   packaged in a 128-bit or 96bit entity.  */
+#define INTEL_EXTENDED_IEEE_FORMAT
 /* Define if you don't want extended real, but do want to use the
   software floating point emulator for REAL_ARITHMETIC and
@@ -515,8 +534,8 @@ extern int ix86_arch;
 /* Decide whether a variable of mode MODE must be 128 bit aligned.  */
 #define ALIGN_MODE_128(MODE) \
- ((MODE) == XFmode || ((MODE) == TImode) || (MODE) == V4SFmode	\
+ ((MODE) == XFmode || (MODE) == TFmode || ((MODE) == TImode) \
-  || (MODE) == V4SImode)
+  || (MODE) == V4SFmode	|| (MODE) == V4SImode)
 /* The published ABIs say that doubles should be aligned on word
   boundaries, so lower the aligment for structure fields unless
@@ -596,7 +615,8 @@ extern int ix86_arch;
   for details. */
 #define STACK_REGS
-#define IS_STACK_MODE(mode) (mode==DFmode || mode==SFmode || mode==XFmode)
+#define IS_STACK_MODE(mode) (mode==DFmode || mode==SFmode \
+			     || mode==XFmode || mode==TFmode)
 /* Number of actual hardware registers.
   The hardware registers are assigned numbers for the compiler
@@ -740,7 +760,9 @@ extern int ix86_arch;
 #define HARD_REGNO_NREGS(REGNO, MODE)   \
  (FP_REGNO_P (REGNO) || SSE_REGNO_P (REGNO) || MMX_REGNO_P (REGNO) ? 1 \
-   : ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD))
+   : (MODE == TFmode							\
+      ? 3								\
+      : ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)))
 #define VALID_SSE_REG_MODE(MODE) \
    ((MODE) == TImode || (MODE) == V4SFmode || (MODE) == V4SImode)
@@ -765,7 +787,7 @@ extern int ix86_arch;
   : FP_REGNO_P (REGNO)						\
   ? ((GET_MODE_CLASS (MODE) == MODE_FLOAT			\
       || GET_MODE_CLASS (MODE) == MODE_COMPLEX_FLOAT)		\
-      && GET_MODE_UNIT_SIZE (MODE) <= (LONG_DOUBLE_TYPE_SIZE == 96 ? 12 : 8))\
+      && GET_MODE_UNIT_SIZE (MODE) <= 16)\
   : SSE_REGNO_P (REGNO) ? VALID_SSE_REG_MODE (MODE)		\
   : MMX_REGNO_P (REGNO) ? VALID_MMX_REG_MODE (MODE)		\
   /* Only SSE and MMX regs can hold vector modes.  */		\
@@ -2610,9 +2632,12 @@ do { long l[2];								\
 #undef ASM_OUTPUT_LONG_DOUBLE
 #define ASM_OUTPUT_LONG_DOUBLE(FILE,VALUE)  		\
-do { long l[3];						\
+do { long l[4];						\
     REAL_VALUE_TO_TARGET_LONG_DOUBLE (VALUE, l);	\
-     fprintf (FILE, "%s\t0x%lx,0x%lx,0x%lx\n", ASM_LONG, l[0], l[1], l[2]); \
+     if (TARGET_128BIT_LONG_DOUBLE)			\
+       fprintf (FILE, "%s\t0x%lx,0x%lx,0x%lx,0x0\n", ASM_LONG, l[0], l[1], l[2]); \
+     else \
+       fprintf (FILE, "%s\t0x%lx,0x%lx,0x%lx\n", ASM_LONG, l[0], l[1], l[2]); \
   } while (0)
 /* This is how to output an assembler line defining a `float' constant.  */

--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
--- a/gcc/invoke.texi
+++ b/gcc/invoke.texi
@@ -446,7 +446,8 @@ in the following sections.
 -malign-jumps=@var{num}  -malign-loops=@var{num}
 -malign-functions=@var{num} -mpreferred-stack-boundary=@var{num}
 -mthreads -mno-align-stringops -minline-all-stringops
-mpush-args -maccumulate-outgoing-args
+-mpush-args -maccumulate-outgoing-args -m128bit-long-double
+-m96bit-long-double
 @emph{HPPA Options}
 -march=@var{architecture type}
@@ -6344,9 +6345,22 @@ boundary.  Aligning @code{double} variables on a two word boundary will
 produce code that runs somewhat faster on a @samp{Pentium} at the
 expense of more memory.
-@strong{Warning:} if you use the @samp{-malign-double} switch,
+@item -m128bit-long-double
-structures containing the above types will be aligned differently than
+@itemx -m128bit-long-double
-the published application binary interface specifications for the 386.
+Control the size of @code{long double} type. i386 application binary interface
+specify the size to be 12 bytes, while modern architectures (Pentium and newer)
+preffer @code{long double} aligned to 8 or 16 byte boundary.  This is
+impossible to reach with 12 byte long doubles in the array accesses.
+@strong{Warning:} if you use the @samp{-m128bit-long-double} switch, the
+structures and arrays containing @code{long double} will change their size as
+well as function calling convention for function taking @code{long double}
+will be modified. 
+@item -m96bit-long-double
+@itemx -m96bit-long-double
+Set the size of @code{long double} to 96 bytes as requires by the i386
+application binary interface.  This is the default.
 @item -msvr3-shlib
 @itemx -mno-svr3-shlib