Commit 993f9e7d by Andreas Krebbel Committed by Andreas Krebbel

S/390: Fix the vec_xl / vec_xst style builtins

This patch fixes several problems with the vec_xl/vec_xst builtins:

- vec_xl/vec_xst needs to use the alignment of the scalar memory
  operand for the vector type reference. This is required to emit the
  proper vl/vst alignment hints.
- vec_xl / vec_xld2 / vec_xlw4 should accept const pointer source operands
- vec_xlw4 / vec_xstw4 needs to accept float memory operands

gcc/ChangeLog:

2019-02-07  Andreas Krebbel  <krebbel@linux.ibm.com>

	* config/s390/s390-builtin-types.def: Add new types.
	* config/s390/s390-builtins.def: (s390_vec_xl, s390_vec_xld2)
	(s390_vec_xlw4): Make the memory operand into a const pointer.
	(s390_vec_xld2, s390_vec_xlw4): Add a variant for single precision
	float.
	* config/s390/s390-c.c (s390_expand_overloaded_builtin): Generate
	a new vector type with the alignment of the scalar memory operand.

gcc/testsuite/ChangeLog:

2019-02-07  Andreas Krebbel  <krebbel@linux.ibm.com>

	* gcc.target/s390/zvector/xl-xst-align-1.c: New test.
	* gcc.target/s390/zvector/xl-xst-align-2.c: New test.

From-SVN: r268651
parent c272bbda
2019-02-07 Andreas Krebbel <krebbel@linux.ibm.com>
* config/s390/s390-builtin-types.def: Add new types.
* config/s390/s390-builtins.def: (s390_vec_xl, s390_vec_xld2)
(s390_vec_xlw4): Make the memory operand into a const pointer.
(s390_vec_xld2, s390_vec_xlw4): Add a variant for single precision
float.
* config/s390/s390-c.c (s390_expand_overloaded_builtin): Generate
a new vector type with the alignment of the scalar memory operand.
2019-02-07 Matthew Malcomson <matthew.malcomson@arm.com>
Jakub Jelinek <jakub@redhat.com>
......
......@@ -260,6 +260,7 @@ DEF_FN_TYPE_2 (BT_FN_V4SF_FLT_INT, BT_V4SF, BT_FLT, BT_INT)
DEF_FN_TYPE_2 (BT_FN_V4SF_V4SF_UCHAR, BT_V4SF, BT_V4SF, BT_UCHAR)
DEF_FN_TYPE_2 (BT_FN_V4SF_V4SF_V4SF, BT_V4SF, BT_V4SF, BT_V4SF)
DEF_FN_TYPE_2 (BT_FN_V4SI_BV4SI_V4SI, BT_V4SI, BT_BV4SI, BT_V4SI)
DEF_FN_TYPE_2 (BT_FN_V4SI_INT_VOIDCONSTPTR, BT_V4SI, BT_INT, BT_VOIDCONSTPTR)
DEF_FN_TYPE_2 (BT_FN_V4SI_INT_VOIDPTR, BT_V4SI, BT_INT, BT_VOIDPTR)
DEF_FN_TYPE_2 (BT_FN_V4SI_UV4SI_UV4SI, BT_V4SI, BT_UV4SI, BT_UV4SI)
DEF_FN_TYPE_2 (BT_FN_V4SI_V2DI_V2DI, BT_V4SI, BT_V2DI, BT_V2DI)
......@@ -492,6 +493,7 @@ DEF_OV_TYPE (BT_OV_USHORT_UV8HI_INT, BT_USHORT, BT_UV8HI, BT_INT)
DEF_OV_TYPE (BT_OV_UV16QI_BV16QI_BV16QI, BT_UV16QI, BT_BV16QI, BT_BV16QI)
DEF_OV_TYPE (BT_OV_UV16QI_BV16QI_BV16QI_INTPTR, BT_UV16QI, BT_BV16QI, BT_BV16QI, BT_INTPTR)
DEF_OV_TYPE (BT_OV_UV16QI_BV16QI_UV16QI, BT_UV16QI, BT_BV16QI, BT_UV16QI)
DEF_OV_TYPE (BT_OV_UV16QI_LONG_UCHARCONSTPTR, BT_UV16QI, BT_LONG, BT_UCHARCONSTPTR)
DEF_OV_TYPE (BT_OV_UV16QI_LONG_UCHARPTR, BT_UV16QI, BT_LONG, BT_UCHARPTR)
DEF_OV_TYPE (BT_OV_UV16QI_UCHAR, BT_UV16QI, BT_UCHAR)
DEF_OV_TYPE (BT_OV_UV16QI_UCHARCONSTPTR, BT_UV16QI, BT_UCHARCONSTPTR)
......@@ -523,6 +525,7 @@ DEF_OV_TYPE (BT_OV_UV16QI_UV8HI_UV8HI_INTPTR, BT_UV16QI, BT_UV8HI, BT_UV8HI, BT_
DEF_OV_TYPE (BT_OV_UV16QI_V16QI, BT_UV16QI, BT_V16QI)
DEF_OV_TYPE (BT_OV_UV16QI_V8HI_V8HI, BT_UV16QI, BT_V8HI, BT_V8HI)
DEF_OV_TYPE (BT_OV_UV2DI_BV2DI_UV2DI, BT_UV2DI, BT_BV2DI, BT_UV2DI)
DEF_OV_TYPE (BT_OV_UV2DI_LONG_ULONGLONGCONSTPTR, BT_UV2DI, BT_LONG, BT_ULONGLONGCONSTPTR)
DEF_OV_TYPE (BT_OV_UV2DI_LONG_ULONGLONGPTR, BT_UV2DI, BT_LONG, BT_ULONGLONGPTR)
DEF_OV_TYPE (BT_OV_UV2DI_ULONGLONG, BT_UV2DI, BT_ULONGLONG)
DEF_OV_TYPE (BT_OV_UV2DI_ULONGLONGCONSTPTR, BT_UV2DI, BT_ULONGLONGCONSTPTR)
......@@ -556,6 +559,8 @@ DEF_OV_TYPE (BT_OV_UV2DI_V2DI, BT_UV2DI, BT_V2DI)
DEF_OV_TYPE (BT_OV_UV4SI_BV4SI_BV4SI, BT_UV4SI, BT_BV4SI, BT_BV4SI)
DEF_OV_TYPE (BT_OV_UV4SI_BV4SI_BV4SI_INTPTR, BT_UV4SI, BT_BV4SI, BT_BV4SI, BT_INTPTR)
DEF_OV_TYPE (BT_OV_UV4SI_BV4SI_UV4SI, BT_UV4SI, BT_BV4SI, BT_UV4SI)
DEF_OV_TYPE (BT_OV_UV4SI_LONG_FLTPTR, BT_UV4SI, BT_LONG, BT_FLTPTR)
DEF_OV_TYPE (BT_OV_UV4SI_LONG_UINTCONSTPTR, BT_UV4SI, BT_LONG, BT_UINTCONSTPTR)
DEF_OV_TYPE (BT_OV_UV4SI_LONG_UINTPTR, BT_UV4SI, BT_LONG, BT_UINTPTR)
DEF_OV_TYPE (BT_OV_UV4SI_UINT, BT_UV4SI, BT_UINT)
DEF_OV_TYPE (BT_OV_UV4SI_UINTCONSTPTR, BT_UV4SI, BT_UINTCONSTPTR)
......@@ -593,6 +598,7 @@ DEF_OV_TYPE (BT_OV_UV4SI_V4SI, BT_UV4SI, BT_V4SI)
DEF_OV_TYPE (BT_OV_UV8HI_BV8HI_BV8HI, BT_UV8HI, BT_BV8HI, BT_BV8HI)
DEF_OV_TYPE (BT_OV_UV8HI_BV8HI_BV8HI_INTPTR, BT_UV8HI, BT_BV8HI, BT_BV8HI, BT_INTPTR)
DEF_OV_TYPE (BT_OV_UV8HI_BV8HI_UV8HI, BT_UV8HI, BT_BV8HI, BT_UV8HI)
DEF_OV_TYPE (BT_OV_UV8HI_LONG_USHORTCONSTPTR, BT_UV8HI, BT_LONG, BT_USHORTCONSTPTR)
DEF_OV_TYPE (BT_OV_UV8HI_LONG_USHORTPTR, BT_UV8HI, BT_LONG, BT_USHORTPTR)
DEF_OV_TYPE (BT_OV_UV8HI_USHORT, BT_UV8HI, BT_USHORT)
DEF_OV_TYPE (BT_OV_UV8HI_USHORTCONSTPTR, BT_UV8HI, BT_USHORTCONSTPTR)
......@@ -626,6 +632,7 @@ DEF_OV_TYPE (BT_OV_UV8HI_UV8HI_V8HI, BT_UV8HI, BT_UV8HI, BT_V8HI)
DEF_OV_TYPE (BT_OV_UV8HI_V4SI_V4SI, BT_UV8HI, BT_V4SI, BT_V4SI)
DEF_OV_TYPE (BT_OV_UV8HI_V8HI, BT_UV8HI, BT_V8HI)
DEF_OV_TYPE (BT_OV_V16QI_BV16QI_V16QI, BT_V16QI, BT_BV16QI, BT_V16QI)
DEF_OV_TYPE (BT_OV_V16QI_LONG_SCHARCONSTPTR, BT_V16QI, BT_LONG, BT_SCHARCONSTPTR)
DEF_OV_TYPE (BT_OV_V16QI_LONG_SCHARPTR, BT_V16QI, BT_LONG, BT_SCHARPTR)
DEF_OV_TYPE (BT_OV_V16QI_SCHAR, BT_V16QI, BT_SCHAR)
DEF_OV_TYPE (BT_OV_V16QI_SCHARCONSTPTR, BT_V16QI, BT_SCHARCONSTPTR)
......@@ -660,6 +667,7 @@ DEF_OV_TYPE (BT_OV_V2DF_DBLCONSTPTR_UINT, BT_V2DF, BT_DBLCONSTPTR, BT_UINT)
DEF_OV_TYPE (BT_OV_V2DF_DBLCONSTPTR_USHORT, BT_V2DF, BT_DBLCONSTPTR, BT_USHORT)
DEF_OV_TYPE (BT_OV_V2DF_DBL_INT, BT_V2DF, BT_DBL, BT_INT)
DEF_OV_TYPE (BT_OV_V2DF_DBL_V2DF_INT, BT_V2DF, BT_DBL, BT_V2DF, BT_INT)
DEF_OV_TYPE (BT_OV_V2DF_LONG_DBLCONSTPTR, BT_V2DF, BT_LONG, BT_DBLCONSTPTR)
DEF_OV_TYPE (BT_OV_V2DF_LONG_DBLPTR, BT_V2DF, BT_LONG, BT_DBLPTR)
DEF_OV_TYPE (BT_OV_V2DF_UV2DI, BT_V2DF, BT_UV2DI)
DEF_OV_TYPE (BT_OV_V2DF_UV2DI_INT, BT_V2DF, BT_UV2DI, BT_INT)
......@@ -687,6 +695,7 @@ DEF_OV_TYPE (BT_OV_V2DI_LONGLONGCONSTPTR_USHORT, BT_V2DI, BT_LONGLONGCONSTPTR, B
DEF_OV_TYPE (BT_OV_V2DI_LONGLONG_INT, BT_V2DI, BT_LONGLONG, BT_INT)
DEF_OV_TYPE (BT_OV_V2DI_LONGLONG_LONGLONG, BT_V2DI, BT_LONGLONG, BT_LONGLONG)
DEF_OV_TYPE (BT_OV_V2DI_LONGLONG_V2DI_INT, BT_V2DI, BT_LONGLONG, BT_V2DI, BT_INT)
DEF_OV_TYPE (BT_OV_V2DI_LONG_LONGLONGCONSTPTR, BT_V2DI, BT_LONG, BT_LONGLONGCONSTPTR)
DEF_OV_TYPE (BT_OV_V2DI_LONG_LONGLONGPTR, BT_V2DI, BT_LONG, BT_LONGLONGPTR)
DEF_OV_TYPE (BT_OV_V2DI_V16QI, BT_V2DI, BT_V16QI)
DEF_OV_TYPE (BT_OV_V2DI_V2DI, BT_V2DI, BT_V2DI)
......@@ -716,7 +725,7 @@ DEF_OV_TYPE (BT_OV_V4SF_FLTCONSTPTR_UINT, BT_V4SF, BT_FLTCONSTPTR, BT_UINT)
DEF_OV_TYPE (BT_OV_V4SF_FLTCONSTPTR_USHORT, BT_V4SF, BT_FLTCONSTPTR, BT_USHORT)
DEF_OV_TYPE (BT_OV_V4SF_FLT_INT, BT_V4SF, BT_FLT, BT_INT)
DEF_OV_TYPE (BT_OV_V4SF_FLT_V4SF_INT, BT_V4SF, BT_FLT, BT_V4SF, BT_INT)
DEF_OV_TYPE (BT_OV_V4SF_LONG_FLTPTR, BT_V4SF, BT_LONG, BT_FLTPTR)
DEF_OV_TYPE (BT_OV_V4SF_LONG_FLTCONSTPTR, BT_V4SF, BT_LONG, BT_FLTCONSTPTR)
DEF_OV_TYPE (BT_OV_V4SF_V4SF, BT_V4SF, BT_V4SF)
DEF_OV_TYPE (BT_OV_V4SF_V4SF_BV4SI, BT_V4SF, BT_V4SF, BT_BV4SI)
DEF_OV_TYPE (BT_OV_V4SF_V4SF_UCHAR, BT_V4SF, BT_V4SF, BT_UCHAR)
......@@ -737,6 +746,7 @@ DEF_OV_TYPE (BT_OV_V4SI_INTCONSTPTR_UINT, BT_V4SI, BT_INTCONSTPTR, BT_UINT)
DEF_OV_TYPE (BT_OV_V4SI_INTCONSTPTR_USHORT, BT_V4SI, BT_INTCONSTPTR, BT_USHORT)
DEF_OV_TYPE (BT_OV_V4SI_INT_INT, BT_V4SI, BT_INT, BT_INT)
DEF_OV_TYPE (BT_OV_V4SI_INT_V4SI_INT, BT_V4SI, BT_INT, BT_V4SI, BT_INT)
DEF_OV_TYPE (BT_OV_V4SI_LONG_INTCONSTPTR, BT_V4SI, BT_LONG, BT_INTCONSTPTR)
DEF_OV_TYPE (BT_OV_V4SI_LONG_INTPTR, BT_V4SI, BT_LONG, BT_INTPTR)
DEF_OV_TYPE (BT_OV_V4SI_UV4SI_V4SI_V4SI, BT_V4SI, BT_UV4SI, BT_V4SI, BT_V4SI)
DEF_OV_TYPE (BT_OV_V4SI_V2DI_V2DI, BT_V4SI, BT_V2DI, BT_V2DI)
......@@ -764,6 +774,7 @@ DEF_OV_TYPE (BT_OV_V4SI_V8HI, BT_V4SI, BT_V8HI)
DEF_OV_TYPE (BT_OV_V4SI_V8HI_V8HI, BT_V4SI, BT_V8HI, BT_V8HI)
DEF_OV_TYPE (BT_OV_V4SI_V8HI_V8HI_V4SI, BT_V4SI, BT_V8HI, BT_V8HI, BT_V4SI)
DEF_OV_TYPE (BT_OV_V8HI_BV8HI_V8HI, BT_V8HI, BT_BV8HI, BT_V8HI)
DEF_OV_TYPE (BT_OV_V8HI_LONG_SHORTCONSTPTR, BT_V8HI, BT_LONG, BT_SHORTCONSTPTR)
DEF_OV_TYPE (BT_OV_V8HI_LONG_SHORTPTR, BT_V8HI, BT_LONG, BT_SHORTPTR)
DEF_OV_TYPE (BT_OV_V8HI_SHORT, BT_V8HI, BT_SHORT)
DEF_OV_TYPE (BT_OV_V8HI_SHORTCONSTPTR, BT_V8HI, BT_SHORTCONSTPTR)
......@@ -802,6 +813,7 @@ DEF_OV_TYPE (BT_OV_VOID_UV16QI_UCHARPTR_UINT, BT_VOID, BT_UV16QI, BT_UCHARPTR, B
DEF_OV_TYPE (BT_OV_VOID_UV2DI_LONG_ULONGLONGPTR, BT_VOID, BT_UV2DI, BT_LONG, BT_ULONGLONGPTR)
DEF_OV_TYPE (BT_OV_VOID_UV2DI_ULONGLONGPTR_UINT, BT_VOID, BT_UV2DI, BT_ULONGLONGPTR, BT_UINT)
DEF_OV_TYPE (BT_OV_VOID_UV2DI_UV2DI_ULONGLONGPTR_ULONGLONG, BT_VOID, BT_UV2DI, BT_UV2DI, BT_ULONGLONGPTR, BT_ULONGLONG)
DEF_OV_TYPE (BT_OV_VOID_UV4SI_LONG_FLTPTR, BT_VOID, BT_UV4SI, BT_LONG, BT_FLTPTR)
DEF_OV_TYPE (BT_OV_VOID_UV4SI_LONG_UINTPTR, BT_VOID, BT_UV4SI, BT_LONG, BT_UINTPTR)
DEF_OV_TYPE (BT_OV_VOID_UV4SI_UINTPTR_UINT, BT_VOID, BT_UV4SI, BT_UINTPTR, BT_UINT)
DEF_OV_TYPE (BT_OV_VOID_UV4SI_UV4SI_UINTPTR_ULONGLONG, BT_VOID, BT_UV4SI, BT_UV4SI, BT_UINTPTR, BT_ULONGLONG)
......
......@@ -470,16 +470,30 @@ s390_expand_overloaded_builtin (location_t loc,
case S390_OVERLOADED_BUILTIN_s390_vec_xl:
case S390_OVERLOADED_BUILTIN_s390_vec_xld2:
case S390_OVERLOADED_BUILTIN_s390_vec_xlw4:
return build2 (MEM_REF, return_type,
fold_build_pointer_plus ((*arglist)[1], (*arglist)[0]),
build_int_cst (TREE_TYPE ((*arglist)[1]), 0));
{
/* Build a vector type with the alignment of the source
location in order to enable correct alignment hints to be
generated for vl. */
tree mem_type = build_aligned_type (return_type,
TYPE_ALIGN (TREE_TYPE (TREE_TYPE ((*arglist)[1]))));
return build2 (MEM_REF, mem_type,
fold_build_pointer_plus ((*arglist)[1], (*arglist)[0]),
build_int_cst (TREE_TYPE ((*arglist)[1]), 0));
}
case S390_OVERLOADED_BUILTIN_s390_vec_xst:
case S390_OVERLOADED_BUILTIN_s390_vec_xstd2:
case S390_OVERLOADED_BUILTIN_s390_vec_xstw4:
return build2 (MODIFY_EXPR, TREE_TYPE((*arglist)[0]),
build1 (INDIRECT_REF, TREE_TYPE((*arglist)[0]),
fold_build_pointer_plus ((*arglist)[2], (*arglist)[1])),
(*arglist)[0]);
{
/* Build a vector type with the alignment of the target
location in order to enable correct alignment hints to be
generated for vst. */
tree mem_type = build_aligned_type (TREE_TYPE((*arglist)[0]),
TYPE_ALIGN (TREE_TYPE (TREE_TYPE ((*arglist)[2]))));
return build2 (MODIFY_EXPR, mem_type,
build1 (INDIRECT_REF, mem_type,
fold_build_pointer_plus ((*arglist)[2], (*arglist)[1])),
(*arglist)[0]);
}
case S390_OVERLOADED_BUILTIN_s390_vec_load_pair:
return build_constructor_va (return_type, 2,
NULL_TREE, (*arglist)[0],
......
2019-02-07 Andreas Krebbel <krebbel@linux.ibm.com>
* gcc.target/s390/zvector/xl-xst-align-1.c: New test.
* gcc.target/s390/zvector/xl-xst-align-2.c: New test.
2019-02-07 Matthew Malcomson <matthew.malcomson@arm.com>
Jakub Jelinek <jakub@redhat.com>
......
/* { dg-do compile { target { s390*-*-* } } } */
/* { dg-options "-O3 -mzarch -march=z14 -mzvector" } */
#include <vecintrin.h>
vector float
foo (float *a)
{
return vec_xl (0, a);
}
vector float
bar (const float *a)
{
return vec_xl (0, a);
}
void
baz (float *f, vector float a)
{
vec_xst (a, 0, f);
}
vector float
foo2 (float *a)
{
return vec_xlw4 (0, a);
}
vector float
bar2 (const float *a)
{
return vec_xlw4 (0, a);
}
void
baz2 (float *f, vector float a)
{
vec_xstw4 (a, 0, f);
}
/* Make sure no alignment hints are generated. */
/* { dg-final { scan-assembler-not "vl.*,3" } } */
/* { dg-final { scan-assembler-not "vst.*,3" } } */
/* { dg-do compile { target { s390*-*-* } } } */
/* { dg-options "-O3 -mzarch -march=z14 -mzvector" } */
#include <vecintrin.h>
typedef float __attribute__((aligned(8))) float_aligned;
vector float
foo (float_aligned *a)
{
return vec_xl (0, a);
}
vector float
bar (const float_aligned *a)
{
return vec_xl (0, a);
}
void
baz (float_aligned *f, vector float a)
{
vec_xst (a, 0, f);
}
vector float
foo2 (float_aligned *a)
{
return vec_xlw4 (0, a);
}
vector float
bar2 (const float_aligned *a)
{
return vec_xlw4 (0, a);
}
void
baz2 (float_aligned *f, vector float a)
{
vec_xstw4 (a, 0, f);
}
/* Make sure alignment hints are generated if the source or target
operand is properly aligned. */
/* { dg-final { scan-assembler-times "vl\t%v\[0-9\]*,0\\(%r2\\),3" 4 } } */
/* { dg-final { scan-assembler-times "vst\t%v\[0-9\]*,0\\(%r2\\),3" 2 } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment