Commit 43e78fd7 by Michael Zolotukhin Committed by Kirill Yukhin

slp-13.c: Increase array size, add initialization.


	* gcc.dg/vect/slp-13.c: Increase array size, add initialization.
	* gcc.dg/vect/slp-24.c: Ditto.
	* gcc.dg/vect/slp-3.c: Likewise and fix scans.
	* gcc.dg/vect/slp-34.c: Ditto.
	* gcc.dg/vect/slp-4.c: Ditto.
	* gcc.dg/vect/slp-cond-2.c: Ditto.
	* gcc.dg/vect/slp-multitypes-11.c: Ditto.
	* gcc.dg/vect/vect-1.c: Ditto.
	* gcc.dg/vect/vect-10.c: Ditto.
	* gcc.dg/vect/vect-105.c: Ditto.
	* gcc.dg/vect/vect-112.c: Ditto.
	* gcc.dg/vect/vect-15.c: Ditto.
	* gcc.dg/vect/vect-2.c: Ditto.
	* gcc.dg/vect/vect-31.c: Ditto.
	* gcc.dg/vect/vect-32.c: Ditto.
	* gcc.dg/vect/vect-33.c: Ditto.
	* gcc.dg/vect/vect-34.c: Ditto.
	* gcc.dg/vect/vect-35.c: Ditto.
	* gcc.dg/vect/vect-36.c: Ditto.
	* gcc.dg/vect/vect-6.c: Ditto.
	* gcc.dg/vect/vect-73.c: Ditto.
	* gcc.dg/vect/vect-74.c: Ditto.
	* gcc.dg/vect/vect-75.c: Ditto.
	* gcc.dg/vect/vect-76.c: Ditto.
	* gcc.dg/vect/vect-80.c: Ditto.
	* gcc.dg/vect/vect-85.c: Ditto.
	* gcc.dg/vect/vect-89.c: Ditto.
	* gcc.dg/vect/vect-97.c: Ditto.
	* gcc.dg/vect/vect-98.c: Ditto.
	* gcc.dg/vect/vect-all.c: Ditto.
	* gcc.dg/vect/vect-double-reduc-6.c: Ditto.
	* gcc.dg/vect/vect-iv-8.c: Ditto.
	* gcc.dg/vect/vect-iv-8a.c: Ditto.
	* gcc.dg/vect/vect-outer-1.c: Ditto.
	* gcc.dg/vect/vect-outer-1a.c: Ditto.
	* gcc.dg/vect/vect-outer-1b.c: Ditto.
	* gcc.dg/vect/vect-outer-2.c: Ditto.
	* gcc.dg/vect/vect-outer-2a.c: Ditto.
	* gcc.dg/vect/vect-outer-2c.c: Ditto.
	* gcc.dg/vect/vect-outer-3.c: Ditto.
	* gcc.dg/vect/vect-outer-3a.c: Ditto.
	* gcc.dg/vect/vect-outer-4a.c: Ditto.
	* gcc.dg/vect/vect-outer-4b.c: Ditto.
	* gcc.dg/vect/vect-outer-4c.c: Ditto.
	* gcc.dg/vect/vect-outer-4d.c: Ditto.
	* gcc.dg/vect/vect-outer-4m.c: Ditto.
	* gcc.dg/vect/vect-outer-fir-lb.c: Ditto.
	* gcc.dg/vect/vect-outer-fir.c: Ditto.
	* gcc.dg/vect/vect-over-widen-1.c: Ditto.
	* gcc.dg/vect/vect-over-widen-2.c: Ditto.
	* gcc.dg/vect/vect-over-widen-3.c: Ditto.
	* gcc.dg/vect/vect-over-widen-4.c: Ditto.
	* gcc.dg/vect/vect-reduc-1char.c: Ditto.
	* gcc.dg/vect/vect-reduc-2char.c: Ditto.
	* gcc.dg/vect/vect-reduc-pattern-1b.c: Ditto.
	* gcc.dg/vect/vect-reduc-pattern-1c.c: Ditto.
	* gcc.dg/vect/vect-reduc-pattern-2b.c: Ditto.
	* gcc.dg/vect/vect-shift-2.c: Ditto.
	* gcc.dg/vect/vect-strided-a-u8-i8-gap2.c: Ditto.
	* gcc.dg/vect/vect-strided-a-u8-i8-gap7.c: Ditto.
	* gcc.dg/vect/vect-strided-u8-i8-gap2.c: Ditto.
	* gcc.dg/vect/vect-strided-u8-i8-gap4.c: Ditto.
	* gcc.dg/vect/vect-strided-u8-i8-gap7.c: Ditto.

From-SVN: r182165
parent 6d26322f
2011-12-09 Michael Zolotukhin <michael.v.zolotukhin@intel.com>
* gcc.dg/vect/slp-13.c: Array size increase reverted.
* gcc.dg/vect/slp-24.c: Ditto.
* gcc.dg/vect/slp-3.c: Ditto.
* gcc.dg/vect/slp-34.c: Ditto.
* gcc.dg/vect/slp-4.c: Ditto.
* gcc.dg/vect/slp-cond-2.c: Ditto.
* gcc.dg/vect/slp-multitypes-11.c: Ditto.
* gcc.dg/vect/vect-1.c: Ditto.
* gcc.dg/vect/vect-10.c: Ditto.
* gcc.dg/vect/vect-105.c: Ditto.
* gcc.dg/vect/vect-112.c: Ditto.
* gcc.dg/vect/vect-15.c: Ditto.
* gcc.dg/vect/vect-2.c: Ditto.
* gcc.dg/vect/vect-31.c: Ditto.
* gcc.dg/vect/vect-32.c: Ditto.
* gcc.dg/vect/vect-33.c: Ditto.
* gcc.dg/vect/vect-34.c: Ditto.
* gcc.dg/vect/vect-35.c: Ditto.
* gcc.dg/vect/vect-36.c: Ditto.
* gcc.dg/vect/vect-6.c: Ditto.
* gcc.dg/vect/vect-73.c: Ditto.
* gcc.dg/vect/vect-74.c: Ditto.
* gcc.dg/vect/vect-75.c: Ditto.
* gcc.dg/vect/vect-76.c: Ditto.
* gcc.dg/vect/vect-80.c: Ditto.
* gcc.dg/vect/vect-85.c: Ditto.
* gcc.dg/vect/vect-89.c: Ditto.
* gcc.dg/vect/vect-97.c: Ditto.
* gcc.dg/vect/vect-98.c: Ditto.
* gcc.dg/vect/vect-all.c: Ditto.
* gcc.dg/vect/vect-double-reduc-6.c: Ditto.
* gcc.dg/vect/vect-iv-8.c: Ditto.
* gcc.dg/vect/vect-iv-8a.c: Ditto.
* gcc.dg/vect/vect-outer-1.c: Ditto.
* gcc.dg/vect/vect-outer-1a.c: Ditto.
* gcc.dg/vect/vect-outer-1b.c: Ditto.
* gcc.dg/vect/vect-outer-2.c: Ditto.
* gcc.dg/vect/vect-outer-2a.c: Ditto.
* gcc.dg/vect/vect-outer-2c.c: Ditto.
* gcc.dg/vect/vect-outer-3.c: Ditto.
* gcc.dg/vect/vect-outer-3a.c: Ditto.
* gcc.dg/vect/vect-outer-4a.c: Ditto.
* gcc.dg/vect/vect-outer-4b.c: Ditto.
* gcc.dg/vect/vect-outer-4c.c: Ditto.
* gcc.dg/vect/vect-outer-4d.c: Ditto.
* gcc.dg/vect/vect-outer-4m.c: Ditto.
* gcc.dg/vect/vect-outer-fir-lb.c: Ditto.
* gcc.dg/vect/vect-outer-fir.c: Ditto.
* gcc.dg/vect/vect-over-widen-1.c: Ditto.
* gcc.dg/vect/vect-over-widen-2.c: Ditto.
* gcc.dg/vect/vect-over-widen-3.c: Ditto.
* gcc.dg/vect/vect-over-widen-4.c: Ditto.
* gcc.dg/vect/vect-reduc-1char.c: Ditto.
* gcc.dg/vect/vect-reduc-2char.c: Ditto.
* gcc.dg/vect/vect-reduc-pattern-1b.c: Ditto.
* gcc.dg/vect/vect-reduc-pattern-1c.c: Ditto.
* gcc.dg/vect/vect-reduc-pattern-2b.c: Ditto.
* gcc.dg/vect/vect-shift-2.c: Ditto.
* gcc.dg/vect/vect-strided-a-u8-i8-gap2.c: Ditto.
* gcc.dg/vect/vect-strided-a-u8-i8-gap7.c: Ditto.
* gcc.dg/vect/vect-strided-u8-i8-gap2.c: Ditto.
* gcc.dg/vect/vect-strided-u8-i8-gap4.c: Ditto.
* gcc.dg/vect/vect-strided-u8-i8-gap7.c: Ditto.
* gcc.dg/vect/slp-13-big-array.c: New test.
* gcc.dg/vect/slp-24-big-array.c: Ditto.
* gcc.dg/vect/slp-3-big-array.c: Ditto.
* gcc.dg/vect/slp-34-big-array.c: Ditto.
* gcc.dg/vect/slp-4-big-array.c: Ditto.
* gcc.dg/vect/slp-cond-2-big-array.c: Ditto.
* gcc.dg/vect/slp-multitypes-11-big-array.c: Ditto.
* gcc.dg/vect/vect-1-big-array.c: Ditto.
* gcc.dg/vect/vect-10-big-array.c: Ditto.
* gcc.dg/vect/vect-105-big-array.c: Ditto.
* gcc.dg/vect/vect-112-big-array.c: Ditto.
* gcc.dg/vect/vect-15-big-array.c: Ditto.
* gcc.dg/vect/vect-2-big-array.c: Ditto.
* gcc.dg/vect/vect-31-big-array.c: Ditto.
* gcc.dg/vect/vect-32-big-array.c: Ditto.
* gcc.dg/vect/vect-33-big-array.c: Ditto.
* gcc.dg/vect/vect-34-big-array.c: Ditto.
* gcc.dg/vect/vect-35-big-array.c: Ditto.
* gcc.dg/vect/vect-36-big-array.c: Ditto.
* gcc.dg/vect/vect-6-big-array.c: Ditto.
* gcc.dg/vect/vect-73-big-array.c: Ditto.
* gcc.dg/vect/vect-74-big-array.c: Ditto.
* gcc.dg/vect/vect-75-big-array.c: Ditto.
* gcc.dg/vect/vect-76-big-array.c: Ditto.
* gcc.dg/vect/vect-80-big-array.c: Ditto.
* gcc.dg/vect/vect-85-big-array.c: Ditto.
* gcc.dg/vect/vect-89-big-array.c: Ditto.
* gcc.dg/vect/vect-97-big-array.c: Ditto.
* gcc.dg/vect/vect-98-big-array.c: Ditto.
* gcc.dg/vect/vect-all-big-array.c: Ditto.
* gcc.dg/vect/vect-double-reduc-6-big-array.c: Ditto.
* gcc.dg/vect/vect-iv-8-big-array.c: Ditto.
* gcc.dg/vect/vect-iv-8a-big-array.c: Ditto.
* gcc.dg/vect/vect-outer-1-big-array.c: Ditto.
* gcc.dg/vect/vect-outer-1a-big-array.c: Ditto.
* gcc.dg/vect/vect-outer-1b-big-array.c: Ditto.
* gcc.dg/vect/vect-outer-2-big-array.c: Ditto.
* gcc.dg/vect/vect-outer-2a-big-array.c: Ditto.
* gcc.dg/vect/vect-outer-2c-big-array.c: Ditto.
* gcc.dg/vect/vect-outer-3-big-array.c: Ditto.
* gcc.dg/vect/vect-outer-3a-big-array.c: Ditto.
* gcc.dg/vect/vect-outer-4a-big-array.c: Ditto.
* gcc.dg/vect/vect-outer-4b-big-array.c: Ditto.
* gcc.dg/vect/vect-outer-4c-big-array.c: Ditto.
* gcc.dg/vect/vect-outer-4d-big-array.c: Ditto.
* gcc.dg/vect/vect-outer-4m-big-array.c: Ditto.
* gcc.dg/vect/vect-outer-fir-lb-big-array.c: Ditto.
* gcc.dg/vect/vect-outer-fir-big-array.c: Ditto.
* gcc.dg/vect/vect-over-widen-1-big-array.c: Ditto.
* gcc.dg/vect/vect-over-widen-2-big-array.c: Ditto.
* gcc.dg/vect/vect-over-widen-3-big-array.c: Ditto.
* gcc.dg/vect/vect-over-widen-4-big-array.c: Ditto.
* gcc.dg/vect/vect-reduc-1char-big-array.c: Ditto.
* gcc.dg/vect/vect-reduc-2char-big-array.c: Ditto.
* gcc.dg/vect/vect-reduc-pattern-1b-big-array.c: Ditto.
* gcc.dg/vect/vect-reduc-pattern-1c-big-array.c: Ditto.
* gcc.dg/vect/vect-reduc-pattern-2b-big-array.c: Ditto.
* gcc.dg/vect/vect-shift-2-big-array.c: Ditto.
* gcc.dg/vect/vect-strided-a-u8-i8-gap2-big-array.c: Ditto.
* gcc.dg/vect/vect-strided-a-u8-i8-gap7-big-array.c: Ditto.
* gcc.dg/vect/vect-strided-u8-i8-gap2-big-array.c: Ditto.
* gcc.dg/vect/vect-strided-u8-i8-gap4-big-array.c: Ditto.
* gcc.dg/vect/vect-strided-u8-i8-gap7-big-array.c: Ditto.
2011-12-09 Richard Guenther <rguenther@suse.de> 2011-12-09 Richard Guenther <rguenther@suse.de>
PR lto/48042 PR lto/48042
......
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 64
volatile int y = 0;
int
main1 ()
{
int i;
unsigned short out[N*8];
unsigned short in[N*8];
unsigned int in2[N*8];
unsigned int out2[N*8];
for (i = 0; i < N*8; i++)
{
in[i] = in2[i] = i;
if (y) /* Avoid vectorization. */
abort ();
}
/* Induction is not SLPable yet. */
for (i = 0; i < N; i++)
{
out[i*8] = in[i*8] + i;
out[i*8 + 1] = in[i*8 + 1] + i;
out[i*8 + 2] = in[i*8 + 2] + i;
out[i*8 + 3] = in[i*8 + 3] + i;
out[i*8 + 4] = in[i*8 + 4] + i;
out[i*8 + 5] = in[i*8 + 5] + i;
out[i*8 + 6] = in[i*8 + 6] + i;
out[i*8 + 7] = in[i*8 + 7] + i;
}
/* check results: */
for (i = 0; i < N; i++)
{
if (out[i*8] != in[i*8] + i
|| out[i*8 + 1] != in[i*8 + 1] + i
|| out[i*8 + 2] != in[i*8 + 2] + i
|| out[i*8 + 3] != in[i*8 + 3] + i
|| out[i*8 + 4] != in[i*8 + 4] + i
|| out[i*8 + 5] != in[i*8 + 5] + i
|| out[i*8 + 6] != in[i*8 + 6] + i
|| out[i*8 + 7] != in[i*8 + 7] + i)
abort ();
}
/* Induction is not SLPable yet and strided group size must be a power of 2
to get vectorized. */
for (i = 0; i < N/2; i++)
{
out2[i*12] = in2[i*12] + i;
out2[i*12 + 1] = in2[i*12 + 1] + i;
out2[i*12 + 2] = in2[i*12 + 2] + i;
out2[i*12 + 3] = in2[i*12 + 3] + i;
out2[i*12 + 4] = in2[i*12 + 4] + i;
out2[i*12 + 5] = in2[i*12 + 5] + i;
out2[i*12 + 6] = in2[i*12 + 6] + i;
out2[i*12 + 7] = in2[i*12 + 7] + i;
out2[i*12 + 8] = in2[i*12 + 8] + i;
out2[i*12 + 9] = in2[i*12 + 9] + i;
out2[i*12 + 10] = in2[i*12 + 10] + i;
out2[i*12 + 11] = in2[i*12 + 11] + i;
}
/* check results: */
for (i = 0; i < N/2; i++)
{
if (out2[i*12] != in2[i*12] + i
|| out2[i*12 + 1] != in2[i*12 + 1] + i
|| out2[i*12 + 2] != in2[i*12 + 2] + i
|| out2[i*12 + 3] != in2[i*12 + 3] + i
|| out2[i*12 + 4] != in2[i*12 + 4] + i
|| out2[i*12 + 5] != in2[i*12 + 5] + i
|| out2[i*12 + 6] != in2[i*12 + 6] + i
|| out2[i*12 + 7] != in2[i*12 + 7] + i
|| out2[i*12 + 8] != in2[i*12 + 8] + i
|| out2[i*12 + 9] != in2[i*12 + 9] + i
|| out2[i*12 + 10] != in2[i*12 + 10] + i
|| out2[i*12 + 11] != in2[i*12 + 11] + i)
abort ();
}
/* Not power of 2 but SLPable. */
for (i = 0; i < N/2; i++)
{
out2[i*12] = in2[i*12] + 1;
out2[i*12 + 1] = in2[i*12 + 1] + 2;
out2[i*12 + 2] = in2[i*12 + 2] + 3;
out2[i*12 + 3] = in2[i*12 + 3] + 4;
out2[i*12 + 4] = in2[i*12 + 4] + 5;
out2[i*12 + 5] = in2[i*12 + 5] + 6;
out2[i*12 + 6] = in2[i*12 + 6] + 7;
out2[i*12 + 7] = in2[i*12 + 7] + 8;
out2[i*12 + 8] = in2[i*12 + 8] + 9;
out2[i*12 + 9] = in2[i*12 + 9] + 10;
out2[i*12 + 10] = in2[i*12 + 10] + 11;
out2[i*12 + 11] = in2[i*12 + 11] + 12;
}
/* check results: */
for (i = 0; i < N/2; i++)
{
if (out2[i*12] != in2[i*12] + 1
|| out2[i*12 + 1] != in2[i*12 + 1] + 2
|| out2[i*12 + 2] != in2[i*12 + 2] + 3
|| out2[i*12 + 3] != in2[i*12 + 3] + 4
|| out2[i*12 + 4] != in2[i*12 + 4] + 5
|| out2[i*12 + 5] != in2[i*12 + 5] + 6
|| out2[i*12 + 6] != in2[i*12 + 6] + 7
|| out2[i*12 + 7] != in2[i*12 + 7] + 8
|| out2[i*12 + 8] != in2[i*12 + 8] + 9
|| out2[i*12 + 9] != in2[i*12 + 9] + 10
|| out2[i*12 + 10] != in2[i*12 + 10] + 11
|| out2[i*12 + 11] != in2[i*12 + 11] + 12)
abort ();
}
return 0;
}
int main (void)
{
check_vect ();
main1 ();
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" { xfail *-*-* } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -3,25 +3,17 @@ ...@@ -3,25 +3,17 @@
#include <stdarg.h> #include <stdarg.h>
#include "tree-vect.h" #include "tree-vect.h"
#define N 64 #define N 8
volatile int y = 0;
int int
main1 () main1 ()
{ {
int i; int i;
unsigned short out[N*8]; unsigned short out[N*8];
unsigned short in[N*8]; unsigned short in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
unsigned int in2[N*8]; unsigned int in2[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
unsigned int out2[N*8]; unsigned int out2[N*8];
for (i = 0; i < N*8; i++)
{
in[i] = in2[i] = i;
if (y) /* Avoid vectorization. */
abort ();
}
/* Induction is not SLPable yet. */ /* Induction is not SLPable yet. */
for (i = 0; i < N; i++) for (i = 0; i < N; i++)
{ {
......
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 64
typedef struct {
unsigned char a;
unsigned char b;
unsigned char c;
unsigned char d;
} s;
unsigned char ub[N*2];
unsigned char uc[N];
volatile int y = 0;
unsigned char check_diff = 2;
void
main1 (unsigned char x, unsigned char max_result, unsigned char min_result, s *arr)
{
int i;
unsigned char udiff = 2;
unsigned char umax = x;
unsigned char umin = x;
unsigned char ua1[N*2];
s *pIn = arr;
s out[N];
for (i = 0; i < N; i++) {
udiff += (unsigned char) (ub[i] - uc[i]);
ua1[2*i+1] = ub[2*i+1];
ua1[2*i] = ub[2*i];
out[i].d = pIn->d - 1;
out[i].b = pIn->b - 4;
out[i].c = pIn->c - 8;
out[i].a = pIn->a - 3;
pIn++;
}
for (i = 0; i < N; i++) {
if (ua1[2*i] != ub[2*i]
|| ua1[2*i+1] != ub[2*i+1]
|| out[i].a != arr[i].a - 3
|| out[i].b != arr[i].b - 4
|| out[i].c != arr[i].c - 8
|| out[i].d != arr[i].d - 1)
abort ();
}
/* check results: */
if (udiff != check_diff)
abort ();
}
int main (void)
{
int i;
s arr[N];
check_diff = 2;
ub[0] = uc[0] = 1;
for (i = 1; i < N; i++) {
ub[i] = (i%5 == 0)?i*3:i;
uc[i] = i;
check_diff += (unsigned char) (ub[i] - uc[i]);
if (y) /* Avoid vectorization. */
abort ();
}
for (; i < 2*N; i++) {
ub[i] = 0;
if (y) /* Avoid vectorization. */
abort ();
}
for (i = 0; i < N; i++)
{
arr[i].a = i + 9;
arr[i].b = i * 2 + 10;
arr[i].c = 17;
arr[i].d = i+34;
if (arr[i].a == 178)
abort ();
}
check_vect ();
main1 (100, 100, 1, arr);
main1 (0, 15, 0, arr);
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { vect_no_align && ilp32 } } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { xfail { vect_no_align && ilp32 } } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -3,7 +3,8 @@ ...@@ -3,7 +3,8 @@
#include <stdarg.h> #include <stdarg.h>
#include "tree-vect.h" #include "tree-vect.h"
#define N 64 #define N 16
#define DIFF 242
typedef struct { typedef struct {
unsigned char a; unsigned char a;
...@@ -12,11 +13,8 @@ typedef struct { ...@@ -12,11 +13,8 @@ typedef struct {
unsigned char d; unsigned char d;
} s; } s;
unsigned char ub[N*2]; unsigned char ub[N*2] = {1,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,1,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
unsigned char uc[N]; unsigned char uc[N] = {1,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
volatile int y = 0;
unsigned char check_diff = 2;
void void
main1 (unsigned char x, unsigned char max_result, unsigned char min_result, s *arr) main1 (unsigned char x, unsigned char max_result, unsigned char min_result, s *arr)
...@@ -30,7 +28,7 @@ main1 (unsigned char x, unsigned char max_result, unsigned char min_result, s *a ...@@ -30,7 +28,7 @@ main1 (unsigned char x, unsigned char max_result, unsigned char min_result, s *a
s out[N]; s out[N];
for (i = 0; i < N; i++) { for (i = 0; i < N; i++) {
udiff += (unsigned char) (ub[i] - uc[i]); udiff += (unsigned char)(ub[i] - uc[i]);
ua1[2*i+1] = ub[2*i+1]; ua1[2*i+1] = ub[2*i+1];
ua1[2*i] = ub[2*i]; ua1[2*i] = ub[2*i];
...@@ -54,7 +52,7 @@ main1 (unsigned char x, unsigned char max_result, unsigned char min_result, s *a ...@@ -54,7 +52,7 @@ main1 (unsigned char x, unsigned char max_result, unsigned char min_result, s *a
} }
/* check results: */ /* check results: */
if (udiff != check_diff) if (udiff != DIFF)
abort (); abort ();
} }
...@@ -63,21 +61,6 @@ int main (void) ...@@ -63,21 +61,6 @@ int main (void)
int i; int i;
s arr[N]; s arr[N];
check_diff = 2;
ub[0] = uc[0] = 1;
for (i = 1; i < N; i++) {
ub[i] = (i%5 == 0)?i*3:i;
uc[i] = i;
check_diff += (unsigned char) (ub[i] - uc[i]);
if (y) /* Avoid vectorization. */
abort ();
}
for (; i < 2*N; i++) {
ub[i] = 0;
if (y) /* Avoid vectorization. */
abort ();
}
for (i = 0; i < N; i++) for (i = 0; i < N; i++)
{ {
arr[i].a = i + 9; arr[i].a = i + 9;
......
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 96
unsigned short in[N*8];
volatile int y = 0;
int
main1 ()
{
int i;
unsigned short out[N*8];
for (i = 0; i < N*8; i++)
{
in[i] = i&63;
if (y) /* Avoid vectorization. */
abort ();
}
for (i = 0; i < N; i++)
{
out[i*8] = in[i*8];
out[i*8 + 1] = in[i*8 + 1];
out[i*8 + 2] = in[i*8 + 2];
out[i*8 + 3] = in[i*8 + 3];
out[i*8 + 4] = in[i*8 + 4];
out[i*8 + 5] = in[i*8 + 5];
out[i*8 + 6] = in[i*8 + 6];
out[i*8 + 7] = in[i*8 + 7];
}
/* check results: */
for (i = 0; i < N; i++)
{
if (out[i*8] != in[i*8]
|| out[i*8 + 1] != in[i*8 + 1]
|| out[i*8 + 2] != in[i*8 + 2]
|| out[i*8 + 3] != in[i*8 + 3]
|| out[i*8 + 4] != in[i*8 + 4]
|| out[i*8 + 5] != in[i*8 + 5]
|| out[i*8 + 6] != in[i*8 + 6]
|| out[i*8 + 7] != in[i*8 + 7])
abort ();
}
for (i = 0; i < N*2; i++)
{
out[i*4] = in[i*4];
out[i*4 + 1] = in[i*4 + 1];
out[i*4 + 2] = in[i*4 + 2];
out[i*4 + 3] = in[i*4 + 3];
}
/* check results: */
for (i = 0; i < N*2; i++)
{
if (out[i*4] != in[i*4]
|| out[i*4 + 1] != in[i*4 + 1]
|| out[i*4 + 2] != in[i*4 + 2]
|| out[i*4 + 3] != in[i*4 + 3])
abort ();
}
for (i = 0; i < N/2; i++)
{
out[i*16] = in[i*16];
out[i*16 + 1] = in[i*16 + 1];
out[i*16 + 2] = in[i*16 + 2];
out[i*16 + 3] = in[i*16 + 3];
out[i*16 + 4] = in[i*16 + 4];
out[i*16 + 5] = in[i*16 + 5];
out[i*16 + 6] = in[i*16 + 6];
out[i*16 + 7] = in[i*16 + 7];
out[i*16 + 8] = in[i*16 + 8];
out[i*16 + 9] = in[i*16 + 9];
out[i*16 + 10] = in[i*16 + 10];
out[i*16 + 11] = in[i*16 + 11];
out[i*16 + 12] = in[i*16 + 12];
out[i*16 + 13] = in[i*16 + 13];
out[i*16 + 14] = in[i*16 + 14];
out[i*16 + 15] = in[i*16 + 15];
}
/* check results: */
for (i = 0; i < N/2; i++)
{
if (out[i*16] != in[i*16]
|| out[i*16 + 1] != in[i*16 + 1]
|| out[i*16 + 2] != in[i*16 + 2]
|| out[i*16 + 3] != in[i*16 + 3]
|| out[i*16 + 4] != in[i*16 + 4]
|| out[i*16 + 5] != in[i*16 + 5]
|| out[i*16 + 6] != in[i*16 + 6]
|| out[i*16 + 7] != in[i*16 + 7]
|| out[i*16 + 8] != in[i*16 + 8]
|| out[i*16 + 9] != in[i*16 + 9]
|| out[i*16 + 10] != in[i*16 + 10]
|| out[i*16 + 11] != in[i*16 + 11]
|| out[i*16 + 12] != in[i*16 + 12]
|| out[i*16 + 13] != in[i*16 + 13]
|| out[i*16 + 14] != in[i*16 + 14]
|| out[i*16 + 15] != in[i*16 + 15])
abort ();
}
/* SLP with unrolling by 8. */
for (i = 0; i < N/4; i++)
{
out[i*9] = in[i*9];
out[i*9 + 1] = in[i*9 + 1];
out[i*9 + 2] = in[i*9 + 2];
out[i*9 + 3] = in[i*9 + 3];
out[i*9 + 4] = in[i*9 + 4];
out[i*9 + 5] = in[i*9 + 5];
out[i*9 + 6] = in[i*9 + 6];
out[i*9 + 7] = in[i*9 + 7];
out[i*9 + 8] = in[i*9 + 8];
}
/* check results: */
for (i = 0; i < N/4; i++)
{
if (out[i*9] != in[i*9]
|| out[i*9 + 1] != in[i*9 + 1]
|| out[i*9 + 2] != in[i*9 + 2]
|| out[i*9 + 3] != in[i*9 + 3]
|| out[i*9 + 4] != in[i*9 + 4]
|| out[i*9 + 5] != in[i*9 + 5]
|| out[i*9 + 6] != in[i*9 + 6]
|| out[i*9 + 7] != in[i*9 + 7]
|| out[i*9 + 8] != in[i*9 + 8])
abort ();
}
return 0;
}
int main (void)
{
check_vect ();
main1 ();
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 4 loops" 1 "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -3,10 +3,9 @@ ...@@ -3,10 +3,9 @@
#include <stdarg.h> #include <stdarg.h>
#include "tree-vect.h" #include "tree-vect.h"
#define N 96 #define N 12
unsigned short in[N*8]; unsigned short in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
volatile int y = 0;
int int
main1 () main1 ()
...@@ -14,13 +13,6 @@ main1 () ...@@ -14,13 +13,6 @@ main1 ()
int i; int i;
unsigned short out[N*8]; unsigned short out[N*8];
for (i = 0; i < N*8; i++)
{
in[i] = i&63;
if (y) /* Avoid vectorization. */
abort ();
}
for (i = 0; i < N; i++) for (i = 0; i < N; i++)
{ {
out[i*8] = in[i*8]; out[i*8] = in[i*8];
...@@ -149,7 +141,7 @@ int main (void) ...@@ -149,7 +141,7 @@ int main (void)
return 0; return 0;
} }
/* { dg-final { scan-tree-dump-times "vectorized 4 loops" 1 "vect" } } */ /* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" } } */ /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 64
unsigned short in[N*8];
unsigned short in2[N*8];
volatile int y = 0;
int
main1 ()
{
int i;
unsigned short out[N*8];
unsigned short out2[N*8];
for (i = 0; i < N*8; i++)
{
in[i] = in2[i] = i;
if (y) /* Avoid vectorization. */
abort ();
}
/* SLP with unrolling by 8. */
for (i = 0; i < N; i++)
{
out[i*3] = in[i*3] + 5;
out[i*3 + 1] = in[i*3 + 1] + 6;
out[i*3 + 2] = in[i*3 + 2] + 16;
out2[i*5] = in2[i*5] + 2;
out2[i*5 + 1] = in2[i*5 + 1] + 2;
out2[i*5 + 2] = in2[i*5 + 2] + 1;
out2[i*5 + 3] = in2[i*5 + 3] + 3;
out2[i*5 + 4] = in2[i*5 + 4] + 13;
}
/* check results: */
for (i = 0; i < N; i++)
{
if (out[i*3] != in[i*3] + 5
|| out[i*3 + 1] != in[i*3 + 1] + 6
|| out[i*3 + 2] != in[i*3 + 2] + 16
|| out2[i*5] != in2[i*5] + 2
|| out2[i*5 + 1] != in2[i*5 + 1] + 2
|| out2[i*5 + 2] != in2[i*5 + 2] + 1
|| out2[i*5 + 3] != in2[i*5 + 3] + 3
|| out2[i*5 + 4] != in2[i*5 + 4] + 13)
abort ();
}
return 0;
}
int main (void)
{
check_vect ();
main1 ();
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -3,11 +3,10 @@ ...@@ -3,11 +3,10 @@
#include <stdarg.h> #include <stdarg.h>
#include "tree-vect.h" #include "tree-vect.h"
#define N 64 #define N 8
unsigned short in[N*8]; unsigned short in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
unsigned short in2[N*8]; unsigned short in2[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
volatile int y = 0;
int int
main1 () main1 ()
...@@ -16,13 +15,6 @@ main1 () ...@@ -16,13 +15,6 @@ main1 ()
unsigned short out[N*8]; unsigned short out[N*8];
unsigned short out2[N*8]; unsigned short out2[N*8];
for (i = 0; i < N*8; i++)
{
in[i] = in2[i] = i;
if (y) /* Avoid vectorization. */
abort ();
}
/* SLP with unrolling by 8. */ /* SLP with unrolling by 8. */
for (i = 0; i < N; i++) for (i = 0; i < N; i++)
{ {
......
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 128
volatile int y = 0;
int
main1 ()
{
int i;
unsigned short out[N*8];
unsigned short in[N*8];
unsigned int ia[N*2];
for (i = 0; i < N*8; i++)
{
in[i] = i;
if (y) /* Avoid vectorization. */
abort ();
}
for (i = 0; i < N; i++)
{
out[i*8] = in[i*8];
out[i*8 + 1] = in[i*8 + 1];
out[i*8 + 2] = in[i*8 + 2];
out[i*8 + 3] = in[i*8 + 3];
out[i*8 + 4] = in[i*8 + 4];
out[i*8 + 5] = in[i*8 + 5];
out[i*8 + 6] = in[i*8 + 6];
out[i*8 + 7] = in[i*8 + 7];
ia[i] = 7;
}
/* check results: */
for (i = 0; i < N; i++)
{
if (out[i*8] != in[i*8]
|| out[i*8 + 1] != in[i*8 + 1]
|| out[i*8 + 2] != in[i*8 + 2]
|| out[i*8 + 3] != in[i*8 + 3]
|| out[i*8 + 4] != in[i*8 + 4]
|| out[i*8 + 5] != in[i*8 + 5]
|| out[i*8 + 6] != in[i*8 + 6]
|| out[i*8 + 7] != in[i*8 + 7]
|| ia[i] != 7)
abort ();
}
for (i = 0; i < N*2; i++)
{
out[i*4] = in[i*4];
out[i*4 + 1] = in[i*4 + 1];
out[i*4 + 2] = in[i*4 + 2];
out[i*4 + 3] = in[i*4 + 3];
ia[i] = 12;
}
/* check results: */
for (i = 0; i < N*2; i++)
{
if (out[i*4] != in[i*4]
|| out[i*4 + 1] != in[i*4 + 1]
|| out[i*4 + 2] != in[i*4 + 2]
|| out[i*4 + 3] != in[i*4 + 3]
|| ia[i] != 12)
abort ();
}
for (i = 0; i < N/2; i++)
{
out[i*16] = in[i*16];
out[i*16 + 1] = in[i*16 + 1];
out[i*16 + 2] = in[i*16 + 2];
out[i*16 + 3] = in[i*16 + 3];
out[i*16 + 4] = in[i*16 + 4];
out[i*16 + 5] = in[i*16 + 5];
out[i*16 + 6] = in[i*16 + 6];
out[i*16 + 7] = in[i*16 + 7];
out[i*16 + 8] = in[i*16 + 8];
out[i*16 + 9] = in[i*16 + 9];
out[i*16 + 10] = in[i*16 + 10];
out[i*16 + 11] = in[i*16 + 11];
out[i*16 + 12] = in[i*16 + 12];
out[i*16 + 13] = in[i*16 + 13];
out[i*16 + 14] = in[i*16 + 14];
out[i*16 + 15] = in[i*16 + 15];
ia[i] = 21;
}
/* check results: */
for (i = 0; i < N/2; i++)
{
if (out[i*16] != in[i*16]
|| out[i*16 + 1] != in[i*16 + 1]
|| out[i*16 + 2] != in[i*16 + 2]
|| out[i*16 + 3] != in[i*16 + 3]
|| out[i*16 + 4] != in[i*16 + 4]
|| out[i*16 + 5] != in[i*16 + 5]
|| out[i*16 + 6] != in[i*16 + 6]
|| out[i*16 + 7] != in[i*16 + 7]
|| out[i*16 + 8] != in[i*16 + 8]
|| out[i*16 + 9] != in[i*16 + 9]
|| out[i*16 + 10] != in[i*16 + 10]
|| out[i*16 + 11] != in[i*16 + 11]
|| out[i*16 + 12] != in[i*16 + 12]
|| out[i*16 + 13] != in[i*16 + 13]
|| out[i*16 + 14] != in[i*16 + 14]
|| out[i*16 + 15] != in[i*16 + 15]
|| ia[i] != 21)
abort ();
}
return 0;
}
int main (void)
{
check_vect ();
main1 ();
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -3,24 +3,16 @@ ...@@ -3,24 +3,16 @@
#include <stdarg.h> #include <stdarg.h>
#include "tree-vect.h" #include "tree-vect.h"
#define N 128 #define N 16
volatile int y = 0;
int int
main1 () main1 ()
{ {
int i; int i;
unsigned short out[N*8]; unsigned short out[N*8];
unsigned short in[N*8]; unsigned short in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
unsigned int ia[N*2]; unsigned int ia[N*2];
for (i = 0; i < N*8; i++)
{
in[i] = i;
if (y) /* Avoid vectorization. */
abort ();
}
for (i = 0; i < N; i++) for (i = 0; i < N; i++)
{ {
out[i*8] = in[i*8]; out[i*8] = in[i*8];
......
/* { dg-require-effective-target vect_cond_mixed } */
#include "tree-vect.h"
#define N 128
int d[N], e[N], f[N];
unsigned char k[N];
float a[N], b[N];
__attribute__((noinline, noclone)) void
f1 (void)
{
int i;
for (i = 0; i < N/4; i++)
{
k[4*i] = a[4*i] < b[4*i] ? 17 : 0;
k[4*i+1] = a[4*i+1] < b[4*i+1] ? 17 : 0;
k[4*i+2] = a[4*i+2] < b[4*i+2] ? 17 : 0;
k[4*i+3] = a[4*i+3] < b[4*i+3] ? 17 : 0;
}
}
__attribute__((noinline, noclone)) void
f2 (void)
{
int i;
for (i = 0; i < N/2; ++i)
{
k[2*i] = a[2*i] < b[2*i] ? 0 : 24;
k[2*i+1] = a[2*i+1] < b[2*i+1] ? 7 : 4;
}
}
__attribute__((noinline, noclone)) void
f3 (void)
{
int i;
for (i = 0; i < N/2; ++i)
{
k[2*i] = a[2*i] < b[2*i] ? 51 : 12;
k[2*i+1] = a[2*i+1] > b[2*i+1] ? 51 : 12;
}
}
__attribute__((noinline, noclone)) void
f4 (void)
{
int i;
for (i = 0; i < N/2; ++i)
{
int d0 = d[2*i], e0 = e[2*i];
int d1 = d[2*i+1], e1 = e[2*i+1];
f[2*i] = a[2*i] >= b[2*i] ? d0 : e0;
f[2*i+1] = a[2*i+1] >= b[2*i+1] ? d1 : e1;
}
}
int
main ()
{
int i;
check_vect ();
for (i = 0; i < N; i++)
{
switch (i % 9)
{
case 0: asm (""); a[i] = - i - 1; b[i] = i + 1; break;
case 1: a[i] = 0; b[i] = 0; break;
case 2: a[i] = i + 1; b[i] = - i - 1; break;
case 3: a[i] = i; b[i] = i + 7; break;
case 4: a[i] = i; b[i] = i; break;
case 5: a[i] = i + 16; b[i] = i + 3; break;
case 6: a[i] = - i - 5; b[i] = - i; break;
case 7: a[i] = - i; b[i] = - i; break;
case 8: a[i] = - i; b[i] = - i - 7; break;
}
d[i] = i;
e[i] = 2 * i;
}
f1 ();
for (i = 0; i < N; i++)
if (k[i] != ((i % 3) == 0 ? 17 : 0))
abort ();
f2 ();
for (i = 0; i < N; i++)
{
switch (i % 9)
{
case 0:
case 6:
if (k[i] != ((i/9 % 2) == 0 ? 0 : 7))
abort ();
break;
case 1:
case 5:
case 7:
if (k[i] != ((i/9 % 2) == 0 ? 4 : 24))
abort ();
break;
case 2:
case 4:
case 8:
if (k[i] != ((i/9 % 2) == 0 ? 24 : 4))
abort ();
break;
case 3:
if (k[i] != ((i/9 % 2) == 0 ? 7 : 0))
abort ();
break;
}
}
f3 ();
f4 ();
for (i = 0; i < N; i++)
if (f[i] != ((i % 3) == 0 ? e[i] : d[i]))
abort ();
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_cond_mixed } */ /* { dg-require-effective-target vect_cond_mixed } */
#include "tree-vect.h" #include "tree-vect.h"
#define N 128 #define N 32
int d[N], e[N], f[N]; int d[N], e[N], f[N];
unsigned char k[N]; unsigned char k[N];
float a[N], b[N]; float a[N], b[N];
......
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 144
struct s
{
int a;
int b;
int c;
};
char in[N*3];
volatile int y = 0;
__attribute__ ((noinline)) int
main1 ()
{
int i;
struct s out[N];
for (i = 0; i < N; i++)
{
in[i] = i&127;
if (y) /* Avoid vectorization. */
abort ();
}
for (i = 0; i < N; i++)
{
out[i].a = (int) in[i*3] + 1;
out[i].b = (int) in[i*3 + 1] + 2;
out[i].c = (int) in[i*3 + 2] + 3;
}
/* check results: */
for (i = 0; i < N; i++)
{
if (out[i].a != (int) in[i*3] + 1
|| out[i].b != (int) in[i*3 + 1] + 2
|| out[i].c != (int) in[i*3 + 2] + 3)
abort ();
}
return 0;
}
int main (void)
{
check_vect ();
main1 ();
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_unpack } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_unpack } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
#include <stdarg.h> #include <stdarg.h>
#include "tree-vect.h" #include "tree-vect.h"
#define N 144 #define N 18
struct s struct s
{ {
...@@ -12,8 +12,7 @@ struct s ...@@ -12,8 +12,7 @@ struct s
int c; int c;
}; };
char in[N*3]; char in[N*3] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53};
volatile int y = 0;
__attribute__ ((noinline)) int __attribute__ ((noinline)) int
main1 () main1 ()
...@@ -23,13 +22,6 @@ main1 () ...@@ -23,13 +22,6 @@ main1 ()
for (i = 0; i < N; i++) for (i = 0; i < N; i++)
{ {
in[i] = i&127;
if (y) /* Avoid vectorization. */
abort ();
}
for (i = 0; i < N; i++)
{
out[i].a = (int) in[i*3] + 1; out[i].a = (int) in[i*3] + 1;
out[i].b = (int) in[i*3 + 1] + 2; out[i].b = (int) in[i*3 + 1] + 2;
out[i].c = (int) in[i*3 + 2] + 3; out[i].c = (int) in[i*3 + 2] + 3;
......
/* { dg-do compile } */
/* { dg-require-effective-target vect_int } */
/* { dg-require-effective-target vect_float } */
#define N 128
void fbar (float *);
void ibar (int *);
void sbar (short *);
/* multiple loops */
foo (int n)
{
float a[N+1];
float b[N];
float c[N];
float d[N];
int ia[N];
int ib[N];
int ic[N];
int i,j;
int diff = 0;
char cb[N];
char cc[N];
char image[N][N];
char block[N][N];
/* Vectorizable. */
diff = 0;
for (i = 0; i < N; i++) {
diff += (cb[i] - cc[i]);
}
ibar (&diff);
/* Vectorizable. */
diff = 0;
for (i = 0; i < N; i++) {
for (j = 0; j < N; j++) {
diff += (image[i][j] - block[i][j]);
}
}
ibar (&diff);
/* Vectorizable. */
for (i = 0; i < N; i++){
a[i] = b[i];
}
fbar (a);
/* Vectorizable. */
for (i = 0; i < N; i++){
a[i] = b[i] + c[i] + d[i];
}
fbar (a);
/* Strided access. Vectorizable on platforms that support load of strided
accesses (extract of even/odd vector elements). */
for (i = 0; i < N/2; i++){
a[i] = b[2*i+1] * c[2*i+1] - b[2*i] * c[2*i];
d[i] = b[2*i] * c[2*i+1] + b[2*i+1] * c[2*i];
}
fbar (a);
/* Vectorizable. */
for (i = 0; i < N; i++){
a[i] = b[i] + c[i];
d[i] = b[i] + c[i];
ia[i] = ib[i] + ic[i];
}
ibar (ia);
fbar (a);
fbar (d);
/* Not vetorizable yet (too conservative dependence test). */
for (i = 0; i < N; i++){
a[i] = b[i] + c[i];
a[i+1] = b[i] + c[i];
}
fbar (a);
}
/* { dg-final { scan-tree-dump-times "vectorized 6 loops" 1 "vect" { target vect_strided2 } } } */
/* { dg-final { scan-tree-dump-times "vectorized 5 loops" 1 "vect" { xfail vect_strided2 } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
/* { dg-require-effective-target vect_int } */ /* { dg-require-effective-target vect_int } */
/* { dg-require-effective-target vect_float } */ /* { dg-require-effective-target vect_float } */
#define N 128 #define N 16
void fbar (float *); void fbar (float *);
void ibar (int *); void ibar (int *);
......
/* { dg-do compile } */
/* { dg-require-effective-target vect_int } */
#include <stdlib.h>
#define N 128
short a[N];
short d[N];
volatile int y = 0;
int foo ()
{
int i;
short b[N];
short c[N];
for (i = 0; i < N/2; i++)
{
b[i] = i*3;
c[i] = i;
/* Avoid vectorization. */
if (y)
abort ();
}
/* Strided access pattern. */
for (i = 0; i < N/2; i++)
{
a[i] = b[2*i+1] * c[2*i+1] - b[2*i] * c[2*i];
d[i] = b[2*i] * c[2*i+1] + b[2*i+1] * c[2*i];
}
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { ! vect_strided2 } } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-do compile } */ /* { dg-do compile } */
/* { dg-require-effective-target vect_int } */ /* { dg-require-effective-target vect_int } */
#include <stdlib.h>
#define N 128 #define N 16
short a[N]; short a[N];
short d[N]; short d[N];
volatile int y = 0;
int foo () int foo ()
{ {
int i; int i;
short b[N]; short b[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
short c[N]; short c[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
for (i = 0; i < N/2; i++)
{
b[i] = i*3;
c[i] = i;
/* Avoid vectorization. */
if (y)
abort ();
}
/* Strided access pattern. */ /* Strided access pattern. */
for (i = 0; i < N/2; i++) for (i = 0; i < N/2; i++)
......
/* { dg-require-effective-target vect_int } */
#include <stdlib.h>
#include <stdarg.h>
#include "tree-vect.h"
#define N 16
struct extraction
{
int a[N][N];
int b[N][N];
};
static int a[N][N];
static int b[N][N];
static int c[N][N];
volatile int y;
__attribute__ ((noinline))
int main1 (int x) {
int i,j, off;
struct extraction *p;
p = (struct extraction *) malloc (sizeof (struct extraction));
for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
{
a[i][j] = (i*7 + j*17)%53;
b[i][j] = (i*11+ j*13)%41;
if (y)
abort (); /* to avoid vectorization. */
}
}
for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
{
c[i][j] = a[i][j];
if (y)
abort (); /* to avoid vectorization. */
}
}
for (i = 1; i < N; i++)
{
for (j = 0; j < N; j++)
{
off = x + i + j + N+1;
if (x + i + j > N*N-1)
break;
if (off > N*N-1)
*(&c[0][0]+x+i+j) = *(&b[0][0] + off - N*N);
else
*(&c[0][0]+x+i+j) = *(&a[0][0] + off);
if (y)
abort (); /* to avoid vectorization. */
}
}
for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
{
p->a[i][j] = a[i][j];
p->b[i][j] = b[i][j];
/* Because Y is volatile, the compiler cannot move this check out
of the loop. */
if (y)
abort (); /* to avoid vectorization. */
}
}
/* Vectorizable: distance > number of iterations. */
for (i = 1; i < N; i++)
{
for (j = 0; j < N; j++)
{
*((int *)p + x + i + j) = *((int *)p + x + i + j + N+1);
}
}
/* check results: */
for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
{
if (p->a[i][j] != c[i][j])
abort ();
}
}
return 0;
}
int main (void)
{
check_vect ();
return main1 (N);
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning" 2 "vect" { target vect_no_align } } } */
/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 0 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
#include <stdarg.h> #include <stdarg.h>
#include "tree-vect.h" #include "tree-vect.h"
#define N 16 #define N 4
struct extraction struct extraction
{ {
...@@ -12,51 +12,17 @@ struct extraction ...@@ -12,51 +12,17 @@ struct extraction
int b[N][N]; int b[N][N];
}; };
static int a[N][N]; static int a[N][N] = {{1,2,3,11},{4,5,6,12},{7,8,9,13},{34,45,67,83}};
static int b[N][N]; static int b[N][N] = {{17,28,15,23},{0,2,3,24},{4,31,82,25},{29,31,432,256}};
static int c[N][N]; static int c[N][N] = {{1,2,3,11},{4,9,13,34},{45,67,83,13},{34,45,67,83}};
volatile int y; volatile int y;
__attribute__ ((noinline)) __attribute__ ((noinline))
int main1 (int x) { int main1 (int x) {
int i,j, off; int i,j;
struct extraction *p; struct extraction *p;
p = (struct extraction *) malloc (sizeof (struct extraction)); p = (struct extraction *) malloc (sizeof (struct extraction));
for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
{
a[i][j] = (i*7 + j*17)%53;
b[i][j] = (i*11+ j*13)%41;
if (y)
abort (); /* to avoid vectorization. */
}
}
for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
{
c[i][j] = a[i][j];
if (y)
abort (); /* to avoid vectorization. */
}
}
for (i = 1; i < N; i++)
{
for (j = 0; j < N; j++)
{
off = x + i + j + N+1;
if (x + i + j > N*N-1)
break;
if (off > N*N-1)
*(&c[0][0]+x+i+j) = *(&b[0][0] + off - N*N);
else
*(&c[0][0]+x+i+j) = *(&a[0][0] + off);
if (y)
abort (); /* to avoid vectorization. */
}
}
for (i = 0; i < N; i++) for (i = 0; i < N; i++)
{ {
...@@ -67,7 +33,7 @@ int main1 (int x) { ...@@ -67,7 +33,7 @@ int main1 (int x) {
/* Because Y is volatile, the compiler cannot move this check out /* Because Y is volatile, the compiler cannot move this check out
of the loop. */ of the loop. */
if (y) if (y)
abort (); /* to avoid vectorization. */ abort (); /* to avoid vectorization */
} }
} }
...@@ -76,7 +42,7 @@ int main1 (int x) { ...@@ -76,7 +42,7 @@ int main1 (int x) {
{ {
for (j = 0; j < N; j++) for (j = 0; j < N; j++)
{ {
*((int *)p + x + i + j) = *((int *)p + x + i + j + N+1); *((int *)p + x + i + j) = *((int *)p + x + i + j + 5);
} }
} }
...@@ -86,7 +52,7 @@ int main1 (int x) { ...@@ -86,7 +52,7 @@ int main1 (int x) {
for (j = 0; j < N; j++) for (j = 0; j < N; j++)
{ {
if (p->a[i][j] != c[i][j]) if (p->a[i][j] != c[i][j])
abort (); abort();
} }
} }
return 0; return 0;
...@@ -100,7 +66,7 @@ int main (void) ...@@ -100,7 +66,7 @@ int main (void)
} }
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning" 2 "vect" { target vect_no_align } } } */ /* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning" 2 "vect" { target vect_no_align } } } */
/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 0 "vect" } } */ /* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 0 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 128
char cb[N];
char cc[N];
volatile int y = 0;
__attribute__ ((noinline)) int
main1 (void)
{
int i;
int diff = 0;
int check_diff = 0;
for (i = 0; i < N; i++) {
cb[i] = i + 2;
cc[i] = i + 1;
check_diff += (cb[i] - cc[i]);
/* Avoid vectorization. */
if (y)
abort ();
}
/* Cross-iteration cycle. */
diff = 0;
for (i = 0; i < N; i++) {
diff += (cb[i] - cc[i]);
}
/* Check results. */
if (diff != check_diff)
abort ();
return 0;
}
int main (void)
{
check_vect ();
return main1 ();
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_unpack } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -3,27 +3,16 @@ ...@@ -3,27 +3,16 @@
#include <stdarg.h> #include <stdarg.h>
#include "tree-vect.h" #include "tree-vect.h"
#define N 128 #define N 16
char cb[N]; char cb[N] = {2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17};
char cc[N]; char cc[N] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};
volatile int y = 0;
__attribute__ ((noinline)) int __attribute__ ((noinline)) int
main1 (void) main1 (void)
{ {
int i; int i;
int diff = 0; int diff = 0;
int check_diff = 0;
for (i = 0; i < N; i++) {
cb[i] = i + 2;
cc[i] = i + 1;
check_diff += (cb[i] - cc[i]);
/* Avoid vectorization. */
if (y)
abort ();
}
/* Cross-iteration cycle. */ /* Cross-iteration cycle. */
diff = 0; diff = 0;
...@@ -32,8 +21,8 @@ main1 (void) ...@@ -32,8 +21,8 @@ main1 (void)
} }
/* Check results. */ /* Check results. */
if (diff != check_diff) if (diff != 16)
abort (); abort();
return 0; return 0;
} }
......
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 128
volatile int y = 0;
__attribute__ ((noinline))
int main1 ()
{
int i;
int a[N];
int b[N];
for (i = 0; i <N; i++)
{
b[i] = i*3;
if (y)
abort ();
}
/* Not vectorizable yet (reverse access and forward access). */
for (i = N; i > 0; i--)
{
a[N-i] = b[i-1];
}
/* check results: */
for (i = 0; i <N; i++)
{
if (a[i] != b[N-1-i])
abort ();
}
return 0;
}
int main (void)
{
check_vect ();
return main1 ();
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_perm && vect_hw_misalign } } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -3,23 +3,14 @@ ...@@ -3,23 +3,14 @@
#include <stdarg.h> #include <stdarg.h>
#include "tree-vect.h" #include "tree-vect.h"
#define N 128 #define N 16
volatile int y = 0;
__attribute__ ((noinline)) __attribute__ ((noinline))
int main1 () int main1 ()
{ {
int i; int i;
int a[N]; int a[N];
int b[N]; int b[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
for (i = 0; i <N; i++)
{
b[i] = i*3;
if (y)
abort ();
}
/* Not vectorizable yet (reverse access and forward access). */ /* Not vectorizable yet (reverse access and forward access). */
for (i = N; i > 0; i--) for (i = N; i > 0; i--)
......
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 128
char cb[N];
char ca[N];
volatile int y = 0;
__attribute__ ((noinline))
int main1 ()
{
int i;
for (i = 0; i < N; i++)
{
cb[i] = i*3;
/* To avoid vectorization. */
if (y)
abort ();
}
for (i = 0; i < N; i++)
{
ca[i] = cb[i];
}
/* check results: */
for (i = 0; i < N; i++)
{
if (ca[i] != cb[i])
abort ();
}
return 0;
}
int main (void)
{
check_vect ();
return main1 ();
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -3,13 +3,11 @@ ...@@ -3,13 +3,11 @@
#include <stdarg.h> #include <stdarg.h>
#include "tree-vect.h" #include "tree-vect.h"
#define N 128 #define N 16
char cb[N]; char cb[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
char ca[N]; char ca[N];
volatile int y = 0;
__attribute__ ((noinline)) __attribute__ ((noinline))
int main1 () int main1 ()
{ {
...@@ -17,14 +15,6 @@ int main1 () ...@@ -17,14 +15,6 @@ int main1 ()
for (i = 0; i < N; i++) for (i = 0; i < N; i++)
{ {
cb[i] = i*3;
/* To avoid vectorization. */
if (y)
abort ();
}
for (i = 0; i < N; i++)
{
ca[i] = cb[i]; ca[i] = cb[i];
} }
......
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 256
struct t{
int k[N];
int l;
};
struct s{
char a; /* aligned */
char b[N-1]; /* unaligned (offset 1B) */
char c[N]; /* aligned (offset NB) */
struct t d; /* aligned (offset 2NB) */
struct t e; /* unaligned (offset 2N+4N+4 B) */
};
__attribute__ ((noinline))
int main1 ()
{
int i;
struct s tmp;
/* unaligned */
for (i = 0; i < N/2; i++)
{
tmp.b[i] = 5;
}
/* check results: */
for (i = 0; i <N/2; i++)
{
if (tmp.b[i] != 5)
abort ();
}
/* aligned */
for (i = 0; i < N/2; i++)
{
tmp.c[i] = 6;
}
/* check results: */
for (i = 0; i <N/2; i++)
{
if (tmp.c[i] != 6)
abort ();
}
/* aligned */
for (i = 0; i < N/2; i++)
{
tmp.d.k[i] = 7;
}
/* check results: */
for (i = 0; i <N/2; i++)
{
if (tmp.d.k[i] != 7)
abort ();
}
/* unaligned */
for (i = 0; i < N/2; i++)
{
tmp.e.k[i] = 8;
}
/* check results: */
for (i = 0; i <N/2; i++)
{
if (tmp.e.k[i] != 8)
abort ();
}
return 0;
}
int main (void)
{
check_vect ();
return main1 ();
}
/* { dg-final { scan-tree-dump-times "vectorized 4 loops" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
#include <stdarg.h> #include <stdarg.h>
#include "tree-vect.h" #include "tree-vect.h"
#define N 256 #define N 32
struct t{ struct t{
int k[N]; int k[N];
......
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 128
__attribute__ ((noinline))
int main1 ()
{
struct {
char ca[N];
} s;
int i;
for (i = 0; i < N; i++)
{
s.ca[i] = 5;
}
/* check results: */
for (i = 0; i < N; i++)
{
if (s.ca[i] != 5)
abort ();
}
return 0;
}
int main (void)
{
check_vect ();
return main1 ();
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
#include <stdarg.h> #include <stdarg.h>
#include "tree-vect.h" #include "tree-vect.h"
#define N 128 #define N 16
__attribute__ ((noinline)) __attribute__ ((noinline))
int main1 () int main1 ()
......
/* { dg-do compile } */
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 128
struct test {
char ca[N];
};
extern struct test s;
__attribute__ ((noinline))
int main1 ()
{
int i;
for (i = 0; i < N; i++)
{
s.ca[i] = 5;
}
/* check results: */
for (i = 0; i < N; i++)
{
if (s.ca[i] != 5)
abort ();
}
return 0;
}
int main (void)
{
return main1 ();
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" } } */
/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { target vector_alignment_reachable } } } */
/* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning" 1 "vect" { target { {! vector_alignment_reachable} && {! vect_hw_misalign} } } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
#include <stdarg.h> #include <stdarg.h>
#include "tree-vect.h" #include "tree-vect.h"
#define N 128 #define N 16
struct test { struct test {
char ca[N]; char ca[N];
}; };
......
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 128
struct {
char ca[N];
} s;
char cb[N];
volatile int y = 0;
__attribute__ ((noinline))
int main1 ()
{
int i;
for (i = 0; i < N; i++)
{
cb[i] = i*3;
/* To avoid vectorization. */
if (y)
abort ();
}
for (i = 0; i < N; i++)
{
s.ca[i] = cb[i];
}
/* check results: */
for (i = 0; i < N; i++)
{
if (s.ca[i] != cb[i])
abort ();
}
return 0;
}
int main (void)
{
check_vect ();
return main1 ();
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -3,15 +3,13 @@ ...@@ -3,15 +3,13 @@
#include <stdarg.h> #include <stdarg.h>
#include "tree-vect.h" #include "tree-vect.h"
#define N 128 #define N 16
struct { struct {
char ca[N]; char ca[N];
} s; } s;
char cb[N]; char cb[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
volatile int y = 0;
__attribute__ ((noinline)) __attribute__ ((noinline))
int main1 () int main1 ()
{ {
...@@ -19,13 +17,6 @@ int main1 () ...@@ -19,13 +17,6 @@ int main1 ()
for (i = 0; i < N; i++) for (i = 0; i < N; i++)
{ {
cb[i] = i*3;
/* To avoid vectorization. */
if (y)
abort ();
}
for (i = 0; i < N; i++)
{
s.ca[i] = cb[i]; s.ca[i] = cb[i];
} }
......
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 128
__attribute__ ((noinline))
int main1 ()
{
union {
unsigned char a[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
unsigned char b[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
} s;
int i;
/* Initialization. */
for (i = 0; i < N; i++)
{
s.b[i] = i;
}
/* Dependence analysis fails cause s.a and s.b may overlap.
Use runtime aliasing test with versioning. */
for (i = 0; i < N; i++)
{
s.a[i] = s.b[i] + 1;
}
/* check results: */
for (i = 0; i < N; i++)
{
if (s.a[i] != i + 1)
abort ();
}
return 0;
}
int main (void)
{
check_vect ();
return main1 ();
}
/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { xfail { ia64-*-* sparc*-*-* } } } } */
/* { dg-final { scan-tree-dump-times "can't determine dependence between" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
#include <stdarg.h> #include <stdarg.h>
#include "tree-vect.h" #include "tree-vect.h"
#define N 128 #define N 16
__attribute__ ((noinline)) __attribute__ ((noinline))
int main1 () int main1 ()
......
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 128
__attribute__ ((noinline))
int main1 ()
{
int i;
struct {
char ca[N];
char cb[N];
} s;
for (i = 0; i < N; i++)
{
s.cb[i] = 3*i;
__asm__ volatile ("");
}
for (i = 0; i < N; i++)
{
s.ca[i] = s.cb[i];
}
/* check results: */
for (i = 0; i < N; i++)
{
if (s.ca[i] != s.cb[i])
abort ();
}
return 0;
}
int main (void)
{
check_vect ();
return main1 ();
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
#include <stdarg.h> #include <stdarg.h>
#include "tree-vect.h" #include "tree-vect.h"
#define N 128 #define N 16
__attribute__ ((noinline)) __attribute__ ((noinline))
int main1 () int main1 ()
......
/* { dg-require-effective-target vect_float } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 128
float results1[N];
float results2[N];
float a[N] = {0};
float e[N] = {0};
float b[N];
float c[N];
volatile int y = 0;
__attribute__ ((noinline))
int main1 ()
{
int i;
for (i=0; i<N; i++)
{
b[i] = i*3;
c[i] = i;
results1[i] = 0;
results2[i] = 0;
/* Avoid vectorization. */
if (y)
abort ();
}
for (i=0; i<N/2; i++)
{
results1[i] = b[i+N/2] * c[i+N/2] - b[i] * c[i];
results2[i+N/2] = b[i] * c[i+N/2] + b[i+N/2] * c[i];
/* Avoid vectorization. */
if (y)
abort ();
}
for (i = 0; i < N/2; i++)
{
a[i] = b[i+N/2] * c[i+N/2] - b[i] * c[i];
e[i+N/2] = b[i] * c[i+N/2] + b[i+N/2] * c[i];
}
/* check results: */
for (i=0; i<N; i++)
{
if (a[i] != results1[i] || e[i] != results2[i])
abort ();
}
for (i = 1; i <=N-4; i++)
{
a[i+3] = b[i-1];
}
/* check results: */
for (i = 1; i <=N-4; i++)
{
if (a[i+3] != b[i-1])
abort ();
}
return 0;
}
int main (void)
{
check_vect ();
return main1 ();
}
/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -3,41 +3,20 @@ ...@@ -3,41 +3,20 @@
#include <stdarg.h> #include <stdarg.h>
#include "tree-vect.h" #include "tree-vect.h"
#define N 128 #define N 16
float results1[N]; float results1[N] = {192.00,240.00,288.00,336.00,384.00,432.00,480.00,528.00,0.00};
float results2[N]; float results2[N] = {0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,54.00,120.00,198.00,288.00,390.00,504.00,630.00};
float a[N] = {0}; float a[N] = {0};
float e[N] = {0}; float e[N] = {0};
float b[N]; float b[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
float c[N]; float c[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
volatile int y = 0;
__attribute__ ((noinline)) __attribute__ ((noinline))
int main1 () int main1 ()
{ {
int i; int i;
for (i=0; i<N; i++)
{
b[i] = i*3;
c[i] = i;
results1[i] = 0;
results2[i] = 0;
/* Avoid vectorization. */
if (y)
abort ();
}
for (i=0; i<N/2; i++)
{
results1[i] = b[i+N/2] * c[i+N/2] - b[i] * c[i];
results2[i+N/2] = b[i] * c[i+N/2] + b[i+N/2] * c[i];
/* Avoid vectorization. */
if (y)
abort ();
}
for (i = 0; i < N/2; i++) for (i = 0; i < N/2; i++)
{ {
a[i] = b[i+N/2] * c[i+N/2] - b[i] * c[i]; a[i] = b[i+N/2] * c[i+N/2] - b[i] * c[i];
......
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 128
int ic[N*2];
int ib[N];
#define ia (ic+N)
volatile int y = 0;
__attribute__ ((noinline))
int main1 ()
{
int i, j;
for (i = 0; i < N; i++)
{
ib[i] = i*3;
if (y)
abort ();
}
for (i = 0; i < N; i++)
{
ia[i] = ib[i];
}
/* check results: */
for (i = 0; i < N; i++)
{
if (ia[i] != ib[i])
abort ();
}
return 0;
}
int main (void)
{
check_vect ();
return main1 ();
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -3,15 +3,13 @@ ...@@ -3,15 +3,13 @@
#include <stdarg.h> #include <stdarg.h>
#include "tree-vect.h" #include "tree-vect.h"
#define N 128 #define N 16
int ic[N*2]; int ic[N*2];
int ib[N]; int ib[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
#define ia (ic+N) #define ia (ic+N)
volatile int y = 0;
__attribute__ ((noinline)) __attribute__ ((noinline))
int main1 () int main1 ()
{ {
...@@ -19,13 +17,6 @@ int main1 () ...@@ -19,13 +17,6 @@ int main1 ()
for (i = 0; i < N; i++) for (i = 0; i < N; i++)
{ {
ib[i] = i*3;
if (y)
abort ();
}
for (i = 0; i < N; i++)
{
ia[i] = ib[i]; ia[i] = ib[i];
} }
...@@ -33,7 +24,7 @@ int main1 () ...@@ -33,7 +24,7 @@ int main1 ()
for (i = 0; i < N; i++) for (i = 0; i < N; i++)
{ {
if (ia[i] != ib[i]) if (ia[i] != ib[i])
abort (); abort();
} }
return 0; return 0;
......
/* { dg-require-effective-target vect_float } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 128
/* Check handling of accesses for which the "initial condition" -
the expression that represents the first location accessed - is
more involved than just an ssa_name. */
float a[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
float b[N+4] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))) = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 7.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0};
float c[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))) = {0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 7.5, 9.5, 10.5, 11.5, 12.5, 13.5, 14.5, 15.5};
volatile int y = 0;
__attribute__ ((noinline)) int
main1 (float *__restrict__ pa, float * __restrict__ pb, float * __restrict__ pc)
{
int i;
float *q = pb + 4;
for (i = 0; i < N; i++)
{
b[i] = i;
c[i] = 0.5 + i;
if (y)
abort ();
}
for (; i < N+4; i++)
{
b[i] = i;
if (y)
abort ();
}
for (i = 0; i < N; i++)
{
pa[i] = q[i] * pc[i];
}
for (i = 0; i < N; i++)
{
if (pa[i] != q[i] * pc[i])
abort ();
}
return 0;
}
int main (void)
{
check_vect ();
main1 (a, b, c);
return 0;
}
/* Xfail until handling restrict is refined. See pr29145. */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* Uncomment when this testcase gets vectorized again:
dg-final { scan-tree-dump-times "Alignment of access forced using versioning" 3 "vect" { target vect_no_align } }
dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 2 "vect" { xfail vect_no_align } }
dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail vect_no_align } }
*/
/* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
#include <stdarg.h> #include <stdarg.h>
#include "tree-vect.h" #include "tree-vect.h"
#define N 128 #define N 16
/* Check handling of accesses for which the "initial condition" - /* Check handling of accesses for which the "initial condition" -
the expression that represents the first location accessed - is the expression that represents the first location accessed - is
...@@ -13,8 +13,6 @@ float a[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); ...@@ -13,8 +13,6 @@ float a[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
float b[N+4] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))) = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 7.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0}; float b[N+4] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))) = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 7.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0};
float c[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))) = {0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 7.5, 9.5, 10.5, 11.5, 12.5, 13.5, 14.5, 15.5}; float c[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))) = {0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 7.5, 9.5, 10.5, 11.5, 12.5, 13.5, 14.5, 15.5};
volatile int y = 0;
__attribute__ ((noinline)) int __attribute__ ((noinline)) int
main1 (float *__restrict__ pa, float * __restrict__ pb, float * __restrict__ pc) main1 (float *__restrict__ pa, float * __restrict__ pb, float * __restrict__ pc)
{ {
...@@ -23,27 +21,13 @@ main1 (float *__restrict__ pa, float * __restrict__ pb, float * __restrict__ pc ...@@ -23,27 +21,13 @@ main1 (float *__restrict__ pa, float * __restrict__ pb, float * __restrict__ pc
for (i = 0; i < N; i++) for (i = 0; i < N; i++)
{ {
b[i] = i;
c[i] = 0.5 + i;
if (y)
abort ();
}
for (; i < N+4; i++)
{
b[i] = i;
if (y)
abort ();
}
for (i = 0; i < N; i++)
{
pa[i] = q[i] * pc[i]; pa[i] = q[i] * pc[i];
} }
for (i = 0; i < N; i++) for (i = 0; i < N; i++)
{ {
if (pa[i] != q[i] * pc[i]) if (pa[i] != q[i] * pc[i])
abort (); abort();
} }
return 0; return 0;
......
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 120
#define OFF 8
/* Check handling of accesses for which the "initial condition" -
the expression that represents the first location accessed - is
more involved than just an ssa_name. */
int ib[N+OFF] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))) = {0, 1, 3, 5, 7, 11, 13, 17};
volatile int y = 0;
__attribute__ ((noinline))
int main1 (int *ib)
{
int i;
int ia[N];
for (i = OFF; i < N+OFF; i++)
{
ib[i] = ib[i%OFF]*(i/OFF);
if (y)
abort ();
}
for (i = 0; i < N; i++)
{
ia[i] = ib[i+OFF];
}
/* check results: */
for (i = 0; i < N; i++)
{
if (ia[i] != ib[i+OFF])
abort ();
}
return 0;
}
int main (void)
{
check_vect ();
main1 (ib);
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning" 1 "vect" { target vect_no_align } } } */
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { xfail vect_no_align } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -3,16 +3,14 @@ ...@@ -3,16 +3,14 @@
#include <stdarg.h> #include <stdarg.h>
#include "tree-vect.h" #include "tree-vect.h"
#define N 120 #define N 8
#define OFF 8 #define OFF 8
/* Check handling of accesses for which the "initial condition" - /* Check handling of accesses for which the "initial condition" -
the expression that represents the first location accessed - is the expression that represents the first location accessed - is
more involved than just an ssa_name. */ more involved than just an ssa_name. */
int ib[N+OFF] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))) = {0, 1, 3, 5, 7, 11, 13, 17}; int ib[N+OFF] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))) = {0, 1, 3, 5, 7, 11, 13, 17, 0, 2, 6, 10, 14, 22, 26, 34};
volatile int y = 0;
__attribute__ ((noinline)) __attribute__ ((noinline))
int main1 (int *ib) int main1 (int *ib)
...@@ -20,12 +18,6 @@ int main1 (int *ib) ...@@ -20,12 +18,6 @@ int main1 (int *ib)
int i; int i;
int ia[N]; int ia[N];
for (i = OFF; i < N+OFF; i++)
{
ib[i] = ib[i%OFF]*(i/OFF);
if (y)
abort ();
}
for (i = 0; i < N; i++) for (i = 0; i < N; i++)
{ {
ia[i] = ib[i+OFF]; ia[i] = ib[i+OFF];
......
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 124
#define OFF 4
/* Check handling of accesses for which the "initial condition" -
the expression that represents the first location accessed - is
more involved than just an ssa_name. */
int ib[N+OFF] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))) = {0, 1, 3, 5, 7, 11, 13, 17};
int ic[N+OFF] = {0, 1, 3, 5, 7, 11, 13, 17};
volatile int y = 0;
__attribute__ ((noinline))
int main1 (int *pib)
{
int i;
int ia[N+OFF];
for (i = OFF; i < N+OFF; i++)
{
ib[i] = ib[i%8]*(i/8);
ic[i] = ic[i%8]*(i/8);
if (y)
abort ();
}
for (i = OFF; i < N; i++)
{
ia[i] = pib[i - OFF];
}
/* check results: */
for (i = OFF; i < N; i++)
{
if (ia[i] != pib[i - OFF])
abort ();
}
for (i = 0; i < N; i++)
{
ia[i] = pib[i - OFF];
}
/* check results: */
for (i = 0; i < N; i++)
{
if (ia[i] != pib[i - OFF])
abort ();
}
for (i = OFF; i < N; i++)
{
ia[i] = ic[i - OFF];
}
/* check results: */
for (i = OFF; i < N; i++)
{
if (ia[i] != ic[i - OFF])
abort ();
}
return 0;
}
int main (void)
{
check_vect ();
main1 (&ib[OFF]);
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -3,30 +3,21 @@ ...@@ -3,30 +3,21 @@
#include <stdarg.h> #include <stdarg.h>
#include "tree-vect.h" #include "tree-vect.h"
#define N 124 #define N 24
#define OFF 4 #define OFF 4
/* Check handling of accesses for which the "initial condition" - /* Check handling of accesses for which the "initial condition" -
the expression that represents the first location accessed - is the expression that represents the first location accessed - is
more involved than just an ssa_name. */ more involved than just an ssa_name. */
int ib[N+OFF] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))) = {0, 1, 3, 5, 7, 11, 13, 17}; int ib[N+OFF] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))) = {0, 1, 3, 5, 7, 11, 13, 17, 0, 2, 6, 10};
int ic[N+OFF] = {0, 1, 3, 5, 7, 11, 13, 17}; int ic[N+OFF] = {0, 1, 3, 5, 7, 11, 13, 17, 0, 2, 6, 10};
volatile int y = 0;
__attribute__ ((noinline)) __attribute__ ((noinline))
int main1 (int *pib) int main1 (int *pib)
{ {
int i; int i;
int ia[N+OFF]; int ia[N+OFF];
for (i = OFF; i < N+OFF; i++)
{
ib[i] = ib[i%8]*(i/8);
ic[i] = ic[i%8]*(i/8);
if (y)
abort ();
}
for (i = OFF; i < N; i++) for (i = OFF; i < N; i++)
{ {
......
/* { dg-require-effective-target vect_float } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 128
float fa[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
float fb[N+4] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
float fc[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
/* Check handling of accesses for which the "initial condition" -
the expression that represents the first location accessed - is
more involved than just an ssa_name. */
volatile int y = 0;
__attribute__ ((noinline)) int
main1 (float * __restrict__ pa, float * __restrict__ pb, float *__restrict__ pc)
{
int i;
float *q = pb + 4;
for (i = 0; i < N; i++)
{
fb[i] = i;
fc[i] = 0.5+i;
if (y)
abort ();
}
for (; i < N+4; i++)
{
fb[i] = i;
if (y)
abort ();
}
for (i = 0; i < N; i++)
{
pa[i] = q[i] * pc[i];
}
for (i = 0; i < N; i++)
{
if (pa[i] != q[i] * pc[i])
abort ();
}
return 0;
}
int main (void)
{
check_vect ();
main1 (fa, fb, fc);
return 0;
}
/* For targets that don't support misaligned loads we version for the
all three accesses (peeling to align the store will not force the
two loads to be aligned). */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* Uncomment when this testcase gets vectorized again:
dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 2 "vect" { xfail vect_no_align } }
dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail vect_no_align } }
dg-final { scan-tree-dump-times "Alignment of access forced using versioning." 3 "vect" { target vect_no_align } }
*/
/* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -3,36 +3,21 @@ ...@@ -3,36 +3,21 @@
#include <stdarg.h> #include <stdarg.h>
#include "tree-vect.h" #include "tree-vect.h"
#define N 128 #define N 16
float fa[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); float fa[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
float fb[N+4] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); float fb[N+4] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))) = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 7.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0};
float fc[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); float fc[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))) = {0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 7.5, 9.5, 10.5, 11.5, 12.5, 13.5, 14.5, 15.5};
/* Check handling of accesses for which the "initial condition" - /* Check handling of accesses for which the "initial condition" -
the expression that represents the first location accessed - is the expression that represents the first location accessed - is
more involved than just an ssa_name. */ more involved than just an ssa_name. */
volatile int y = 0;
__attribute__ ((noinline)) int __attribute__ ((noinline)) int
main1 (float * __restrict__ pa, float * __restrict__ pb, float *__restrict__ pc) main1 (float * __restrict__ pa, float * __restrict__ pb, float *__restrict__ pc)
{ {
int i; int i;
float *q = pb + 4; float *q = pb + 4;
for (i = 0; i < N; i++)
{
fb[i] = i;
fc[i] = 0.5+i;
if (y)
abort ();
}
for (; i < N+4; i++)
{
fb[i] = i;
if (y)
abort ();
}
for (i = 0; i < N; i++) for (i = 0; i < N; i++)
{ {
......
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 128
__attribute__ ((noinline))
int main1 (int *a)
{
int i, j, k;
int b[N];
for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
{
k = i + N;
a[j] = k;
}
b[i] = k;
}
for (j = 0; j < N; j++)
if (a[j] != i + N - 1)
abort ();
for (j = 0; j < N; j++)
if (b[j] != j + N)
abort ();
return 0;
}
int main (void)
{
int a[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
check_vect ();
main1 (a);
return 0;
}
/* Fails for targets that don't vectorize PLUS (e.g alpha). */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
#include <stdarg.h> #include <stdarg.h>
#include "tree-vect.h" #include "tree-vect.h"
#define N 128 #define N 16
__attribute__ ((noinline)) __attribute__ ((noinline))
int main1 (int *a) int main1 (int *a)
......
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 128
struct tmp_struct
{
int x;
int y[N];
};
__attribute__ ((noinline))
int main1 ()
{
int i, *q;
struct tmp_struct tmp, *p;
p = &tmp;
q = p->y;
for (i = 0; i < N; i++)
{
*q++ = 5;
}
/* check results: */
for (i = 0; i < N; i++)
{
if (p->y[i] != 5)
{
abort ();
}
}
return 0;
}
int main (void)
{
check_vect ();
return main1 ();
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" } } */
/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
#include <stdarg.h> #include <stdarg.h>
#include "tree-vect.h" #include "tree-vect.h"
#define N 128 #define N 16
struct tmp_struct struct tmp_struct
{ {
......
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 128
char x[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
char cb[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
volatile int y = 0;
__attribute__ ((noinline))
int main1 ()
{
struct {
char *p;
char *q;
} s;
int i;
for (i = 0; i < N; i++)
{
cb[i] = i*3;
if (y)
abort ();
}
/* Check that datarefs analysis can determine that the access via pointer
s.p is based off array x, which enables us to antialias this access from
the access to array cb. */
s.p = x;
for (i = 0; i < N; i++)
{
s.p[i] = cb[i];
}
/* check results: */
for (i = 0; i < N; i++)
{
if (s.p[i] != cb[i])
abort ();
}
/* Check that datarefs analysis can determine that the access via pointer
s.p is based off array x, and that the access via pointer s.q is based off
array cb, which enables us to antialias these two accesses. */
s.q = cb;
for (i = 0; i < N; i++)
{
s.p[i] = s.q[i];
}
/* check results: */
for (i = 0; i < N; i++)
{
if (s.p[i] != s.q[i])
abort ();
}
return 0;
}
int main (void)
{
check_vect ();
return main1 ();
}
/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -3,12 +3,10 @@ ...@@ -3,12 +3,10 @@
#include <stdarg.h> #include <stdarg.h>
#include "tree-vect.h" #include "tree-vect.h"
#define N 128 #define N 16
char x[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); char x[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
char cb[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); char cb[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))) = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
volatile int y = 0;
__attribute__ ((noinline)) __attribute__ ((noinline))
int main1 () int main1 ()
...@@ -18,12 +16,6 @@ int main1 () ...@@ -18,12 +16,6 @@ int main1 ()
char *q; char *q;
} s; } s;
int i; int i;
for (i = 0; i < N; i++)
{
cb[i] = i*3;
if (y)
abort ();
}
/* Check that datarefs analysis can determine that the access via pointer /* Check that datarefs analysis can determine that the access via pointer
s.p is based off array x, which enables us to antialias this access from s.p is based off array x, which enables us to antialias this access from
......
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 16
#define DOT16( a, b) ( a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3] + \
a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7] + \
a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11] + \
a[12]*b[12] + a[13]*b[13] + a[14]*b[14] + a[15]*b[15])
volatile int y = 0;
__attribute__ ((noinline))
int main1 (int ia[][N])
{
int i, j;
int ib[N] = {0,3,6,9};
int ic[N][N];
for (i = 0; i < N; i++)
{
ic[0][i] = DOT16 (ia[i], ib);
}
/* check results: */
for (i = 0; i < N; i++)
{
if (ic[0][i] != DOT16 (ia[i], ib))
abort ();
}
return 0;
}
int main (void)
{
int ia[N][N];
int i,j;
for (i = 0; i < N; i++)
for (j = 0; j < N; j++)
{
ia[i][j] = i + j + 1;
/* Avoid vectorization. */
if (y)
abort ();
}
check_vect ();
return main1 (ia);
}
/* Needs interleaving support. */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided4 } } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" { xfail vect_strided4 } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -3,13 +3,8 @@ ...@@ -3,13 +3,8 @@
#include <stdarg.h> #include <stdarg.h>
#include "tree-vect.h" #include "tree-vect.h"
#define N 16 #define N 4
#define DOT16( a, b) ( a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3] + \ #define DOT4( a, b ) ( a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3] )
a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7] + \
a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11] + \
a[12]*b[12] + a[13]*b[13] + a[14]*b[14] + a[15]*b[15])
volatile int y = 0;
__attribute__ ((noinline)) __attribute__ ((noinline))
int main1 (int ia[][N]) int main1 (int ia[][N])
...@@ -20,14 +15,14 @@ int main1 (int ia[][N]) ...@@ -20,14 +15,14 @@ int main1 (int ia[][N])
for (i = 0; i < N; i++) for (i = 0; i < N; i++)
{ {
ic[0][i] = DOT16 (ia[i], ib); ic[0][i] = DOT4 (ia[i], ib);
} }
/* check results: */ /* check results: */
for (i = 0; i < N; i++) for (i = 0; i < N; i++)
{ {
if (ic[0][i] != DOT16 (ia[i], ib)) if (ic[0][i] != DOT4 (ia[i], ib))
abort (); abort();
} }
return 0; return 0;
...@@ -35,16 +30,7 @@ int main1 (int ia[][N]) ...@@ -35,16 +30,7 @@ int main1 (int ia[][N])
int main (void) int main (void)
{ {
int ia[N][N]; int ia[N][N] = {{1,2,3,4},{2,3,5,7},{2,4,6,8},{22,43,55,77}};
int i,j;
for (i = 0; i < N; i++)
for (j = 0; j < N; j++)
{
ia[i][j] = i + j + 1;
/* Avoid vectorization. */
if (y)
abort ();
}
check_vect (); check_vect ();
......
/* { dg-require-effective-target vect_int } */
/* { dg-require-effective-target vect_float } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 128
int iadd_results[N];
float fadd_results[N];
float fmul_results[N];
float fresults1[N];
float fresults2[N];
/****************************************************/
__attribute__ ((noinline))
void icheck_results (int *a, int *results)
{
int i;
for (i = 0; i < N; i++)
{
if (a[i] != results[i])
abort ();
}
}
__attribute__ ((noinline))
void fcheck_results (float *a, float *results)
{
int i;
for (i = 0; i < N; i++)
{
if (a[i] != results[i])
abort ();
}
}
__attribute__ ((noinline)) void
fbar_mul (float *a)
{
fcheck_results (a, fmul_results);
}
__attribute__ ((noinline)) void
fbar_add (float *a)
{
fcheck_results (a, fadd_results);
}
__attribute__ ((noinline)) void
ibar_add (int *a)
{
icheck_results (a, iadd_results);
}
__attribute__ ((noinline)) void
fbar1 (float *a)
{
fcheck_results (a, fresults1);
}
__attribute__ ((noinline)) void
fbar2 (float *a)
{
fcheck_results (a, fresults2);
}
float a[N];
float e[N];
float b[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
float c[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
float d[N] = {0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30};
int ic[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
int ib[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
int ia[N];
char cb[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
char ca[N];
short sa[N];
volatile int y = 0;
/* All of the loops below are currently vectorizable, except
initialization ones. */
__attribute__ ((noinline)) int
main1 ()
{
int i,j;
/* Initialization. */
for (i = 0; i < N; i++)
{
b[i] = i*3;
c[i] = i;
d[i] = i*2;
ic[i] = i*3;
ib[i] = i*3;
cb[i] = i*3;
fadd_results[i] = b[i] + c[i] + d[i];
iadd_results[i] = ib[i] + ic[i];
fmul_results[i] = b[i] * c[i];
fresults1[i] = 0;
fresults2[i] = 0;
if (y)
abort ();
}
/* Test 1: copy chars. */
for (i = 0; i < N; i++)
{
ca[i] = cb[i];
}
/* check results: */
for (i = 0; i < N; i++)
{
if (ca[i] != cb[i])
abort ();
}
/* Test 2: fp mult. */
for (i = 0; i < N; i++)
{
a[i] = b[i] * c[i];
}
fbar_mul (a);
/* Test 3: mixed types (int, fp), same nunits in vector. */
for (i = 0; i < N; i++)
{
a[i] = b[i] + c[i] + d[i];
e[i] = b[i] + c[i] + d[i];
ia[i] = ib[i] + ic[i];
}
ibar_add (ia);
fbar_add (a);
fbar_add (e);
/* Initialization. */
for (i = 0; i < N; i++)
{
fresults1[i] = a[i];
fresults2[i] = e[i];
if (y)
abort ();
}
for (i = 0; i < N/2; i++)
{
fresults1[i] = b[i+N/2] * c[i+N/2] - b[i] * c[i];
fresults2[i+N/2] = b[i] * c[i+N/2] + b[i+N/2] * c[i];
if (y)
abort ();
}
/* Test 4: access with offset. */
for (i = 0; i < N/2; i++)
{
a[i] = b[i+N/2] * c[i+N/2] - b[i] * c[i];
e[i+N/2] = b[i] * c[i+N/2] + b[i+N/2] * c[i];
}
fbar1 (a);
fbar2 (e);
/* Test 5: access with offset. */
for (i = 1; i <=N-4; i++)
{
a[i+3] = b[i-1];
}
/* check results: */
for (i = 1; i <=N-4; i++)
{
if (a[i+3] != b[i-1])
abort ();
}
/* Test 6 - loop induction with stride != 1. */
i = 0;
j = 0;
while (i < 5*N)
{
a[j] = c[j];
i += 5;
j++;
}
/* check results: */
for (i = 0; i <N; i++)
{
if (a[i] != c[i])
abort ();
}
/* Test 7 - reverse access. */
for (i = N; i > 0; i--)
{
a[N-i] = d[N-i];
}
/* check results: */
for (i = 0; i <N; i++)
{
if (a[i] != d[i])
abort ();
}
/* Tests 8,9,10 - constants. */
for (i = 0; i < N; i++)
{
a[i] = 5.0;
}
/* check results: */
for (i = 0; i < N; i++)
{
if (a[i] != 5.0)
abort ();
}
for (i = 0; i < N; i++)
{
sa[i] = 5;
}
/* check results: */
for (i = 0; i < N; i++)
{
if (sa[i] != 5)
abort ();
}
for (i = 0; i < N; i++)
{
ia[i] = ib[i] + 5;
}
/* check results: */
for (i = 0; i < N; i++)
{
if (ia[i] != ib[i] + 5)
abort ();
}
return 0;
}
int main (void)
{
check_vect ();
return main1 ();
}
/* { dg-final { scan-tree-dump-times "vectorized 10 loops" 1 "vect" } } */
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" } } */
/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 0 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -4,13 +4,13 @@ ...@@ -4,13 +4,13 @@
#include <stdarg.h> #include <stdarg.h>
#include "tree-vect.h" #include "tree-vect.h"
#define N 128 #define N 16
int iadd_results[N]; int iadd_results[N] = {0,6,12,18,24,30,36,42,48,54,60,66,72,78,84,90};
float fadd_results[N]; float fadd_results[N] = {0.0,6.0,12.0,18.0,24.0,30.0,36.0,42.0,48.0,54.0,60.0,66.0,72.0,78.0,84.0,90.0};
float fmul_results[N]; float fmul_results[N] = {0.0,3.0,12.0,27.0,48.0,75.0,108.0,147.0,192.0,243.0,300.0,363.0,432.0,507.0,588.0,675.0};
float fresults1[N]; float fresults1[N] = {192.00,240.00,288.00,336.00,384.00,432.00,480.00,528.00,48.00,54.00,60.00,66.00,72.00,78.00,84.00,90.00};
float fresults2[N]; float fresults2[N] = {0.00,6.00,12.00,18.00,24.00,30.00,36.00,42.00,0.00,54.00,120.00,198.00,288.00,390.00,504.00,630.00};
/****************************************************/ /****************************************************/
__attribute__ ((noinline)) __attribute__ ((noinline))
...@@ -77,32 +77,12 @@ char cb[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45}; ...@@ -77,32 +77,12 @@ char cb[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
char ca[N]; char ca[N];
short sa[N]; short sa[N];
volatile int y = 0; /* All of the loops below are currently vectorizable. */
/* All of the loops below are currently vectorizable, except
initialization ones. */
__attribute__ ((noinline)) int __attribute__ ((noinline)) int
main1 () main1 ()
{ {
int i,j; int i,j;
/* Initialization. */
for (i = 0; i < N; i++)
{
b[i] = i*3;
c[i] = i;
d[i] = i*2;
ic[i] = i*3;
ib[i] = i*3;
cb[i] = i*3;
fadd_results[i] = b[i] + c[i] + d[i];
iadd_results[i] = ib[i] + ic[i];
fmul_results[i] = b[i] * c[i];
fresults1[i] = 0;
fresults2[i] = 0;
if (y)
abort ();
}
/* Test 1: copy chars. */ /* Test 1: copy chars. */
for (i = 0; i < N; i++) for (i = 0; i < N; i++)
...@@ -136,21 +116,7 @@ main1 () ...@@ -136,21 +116,7 @@ main1 ()
fbar_add (a); fbar_add (a);
fbar_add (e); fbar_add (e);
/* Initialization. */
for (i = 0; i < N; i++)
{
fresults1[i] = a[i];
fresults2[i] = e[i];
if (y)
abort ();
}
for (i = 0; i < N/2; i++)
{
fresults1[i] = b[i+N/2] * c[i+N/2] - b[i] * c[i];
fresults2[i+N/2] = b[i] * c[i+N/2] + b[i+N/2] * c[i];
if (y)
abort ();
}
/* Test 4: access with offset. */ /* Test 4: access with offset. */
for (i = 0; i < N/2; i++) for (i = 0; i < N/2; i++)
{ {
......
/* { dg-require-effective-target vect_int_mult } */
#include <stdarg.h>
#include "tree-vect.h"
#define K 16
int in[2*K][K] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
int out[K];
int check_result[K];
volatile int y = 0;
__attribute__ ((noinline)) void
foo ()
{
int sum;
int i, j, k;
for (k = 0; k < K; k++)
{
sum = 1;
for (j = 0; j < K; j++)
for (i = 0; i < K; i++)
{
sum *= in[i+k][j];
/* Avoid vectorization. */
if (y)
abort ();
}
check_result[k] = sum;
}
for (k = 0; k < K; k++)
{
sum = 1;
for (j = 0; j < K; j++)
for (i = 0; i < K; i++)
sum *= in[i+k][j];
out[k] = sum;
}
}
int main ()
{
int i, j, k;
check_vect ();
for (i = 0; i < 2*K; i++)
for (j = 0; j < K; j++)
in[i][j] = (i+2)/3;
foo ();
for (k = 0; k < K; k++)
if (out[k] != check_result[k])
abort ();
return 0;
}
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -3,15 +3,13 @@ ...@@ -3,15 +3,13 @@
#include <stdarg.h> #include <stdarg.h>
#include "tree-vect.h" #include "tree-vect.h"
#define K 16 #define K 4
int in[2*K][K] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); int in[2*K][K] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
int out[K]; int out[K];
int check_result[K]; int check_result[K] = {0,16,256,4096};
volatile int y = 0; __attribute__ ((noinline)) void
__attribute__ ((noinline)) void
foo () foo ()
{ {
int sum; int sum;
...@@ -20,21 +18,7 @@ foo () ...@@ -20,21 +18,7 @@ foo ()
for (k = 0; k < K; k++) for (k = 0; k < K; k++)
{ {
sum = 1; sum = 1;
for (j = 0; j < K; j++) for (j = 0; j < K; j++)
for (i = 0; i < K; i++)
{
sum *= in[i+k][j];
/* Avoid vectorization. */
if (y)
abort ();
}
check_result[k] = sum;
}
for (k = 0; k < K; k++)
{
sum = 1;
for (j = 0; j < K; j++)
for (i = 0; i < K; i++) for (i = 0; i < K; i++)
sum *= in[i+k][j]; sum *= in[i+k][j];
out[k] = sum; out[k] = sum;
...@@ -59,7 +43,7 @@ int main () ...@@ -59,7 +43,7 @@ int main ()
return 0; return 0;
} }
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" } } */ /* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 128
__attribute__ ((noinline)) int main1 (short X)
{
unsigned char a[N];
unsigned short b[N];
unsigned int c[N];
short myX = X;
int i;
/* vectorization of induction with type conversions. */
for (i = 0; i < N; i++)
{
a[i] = (unsigned char)X;
b[i] = X;
c[i] = (unsigned int)X;
X++;
}
/* check results: */
for (i = 0; i < N; i++)
{
if (a[i] != (unsigned char)myX || b[i] != myX || c[i] != (unsigned int)myX++)
abort ();
}
return 0;
}
int main (void)
{
check_vect ();
return main1 (3);
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_pack_trunc && vect_unpack } } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
#include <stdarg.h> #include <stdarg.h>
#include "tree-vect.h" #include "tree-vect.h"
#define N 128 #define N 26
__attribute__ ((noinline)) int main1 (short X) __attribute__ ((noinline)) int main1 (short X)
{ {
......
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 128
__attribute__ ((noinline)) int main1 (short X)
{
signed char a[N];
short b[N];
int c[N];
short myX = X;
int i;
/* vectorization of induction with type conversions. */
for (i = 0; i < N; i++)
{
a[i] = (signed char)X;
b[i] = X;
c[i] = (int)X;
X++;
}
/* check results: */
for (i = 0; i < N; i++)
{
if (a[i] != (signed char)myX || b[i] != myX || c[i] != (int)myX++)
abort ();
}
return 0;
}
int main (void)
{
check_vect ();
return main1 (3);
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_pack_trunc && vect_unpack } } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
#include <stdarg.h> #include <stdarg.h>
#include "tree-vect.h" #include "tree-vect.h"
#define N 128 #define N 26
__attribute__ ((noinline)) int main1 (short X) __attribute__ ((noinline)) int main1 (short X)
{ {
......
/* { dg-do compile } */
#define N 256
signed short image[N][N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
signed short block[N][N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
signed short out[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
/* Can't do outer-loop vectorization because of non-consecutive access. */
void
foo (){
int i,j;
int diff;
for (i = 0; i < N; i++) {
diff = 0;
for (j = 0; j < N; j+=8) {
diff += (image[i][j] - block[i][j]);
}
out[i]=diff;
}
}
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" { xfail vect_multiple_sizes } } } */
/* { dg-final { scan-tree-dump-times "strided access in outer loop" 2 "vect" { target vect_multiple_sizes } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-do compile } */ /* { dg-do compile } */
#define N 256 #define N 64
signed short image[N][N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); signed short image[N][N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
signed short block[N][N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); signed short block[N][N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
signed short out[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); signed short out[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
......
/* { dg-do compile } */
#define N 256
signed short image[N][N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
signed short block[N][N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
/* Can't do outer-loop vectorization because of non-consecutive access. */
int
foo (){
int i,j;
int diff = 0;
for (i = 0; i < N; i++) {
for (j = 0; j < N; j+=8) {
diff += (image[i][j] - block[i][j]);
}
}
return diff;
}
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" { xfail vect_multiple_sizes } } } */
/* { dg-final { scan-tree-dump-times "strided access in outer loop" 2 "vect" { target vect_multiple_sizes } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-do compile } */ /* { dg-do compile } */
#define N 256 #define N 64
signed short image[N][N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); signed short image[N][N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
signed short block[N][N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); signed short block[N][N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
......
/* { dg-do compile } */
#define N 160
signed short image[N][N];
signed short block[N][N];
signed short out[N];
/* Outer-loop cannot get vectorized because of non-consecutive access. */
void
foo (){
int i,j;
int diff;
for (i = 0; i < N; i++) {
diff = 0;
for (j = 0; j < N; j+=4) {
diff += (image[i][j] - block[i][j]);
}
out[i]=diff;
}
}
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" { xfail vect_multiple_sizes } } } */
/* { dg-final { scan-tree-dump-times "strided access in outer loop" 2 "vect" { target vect_multiple_sizes } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-do compile } */ /* { dg-do compile } */
#define N 160 #define N 40
signed short image[N][N]; signed short image[N][N];
signed short block[N][N]; signed short block[N][N];
signed short out[N]; signed short out[N];
......
/* { dg-require-effective-target vect_float } */
/* { dg-require-effective-target vect_intfloat_cvt } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 160
float image[N][N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
float out[N];
/* Outer-loop vectorization. */
__attribute__ ((noinline)) void
foo (){
int i,j;
for (i = 0; i < N; i++) {
for (j = 0; j < N; j++) {
image[j][i] = j+i;
}
}
}
int main (void)
{
check_vect ();
int i, j;
foo ();
for (i = 0; i < N; i++) {
for (j = 0; j < N; j++) {
if (image[j][i] != j+i)
abort ();
}
}
return 0;
}
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
#include <stdarg.h> #include <stdarg.h>
#include "tree-vect.h" #include "tree-vect.h"
#define N 160 #define N 40
float image[N][N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); float image[N][N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
float out[N]; float out[N];
......
/* { dg-require-effective-target vect_float } */
/* { dg-require-effective-target vect_intfloat_cvt } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 160
float image[N][N][N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
__attribute__ ((noinline)) void
foo (){
int i,j,k;
for (k=0; k<N; k++) {
for (i = 0; i < N; i++) {
for (j = 0; j < N; j++) {
image[k][j][i] = j+i+k;
}
}
}
}
int main (void)
{
check_vect ();
int i, j, k;
foo ();
for (k=0; k<N; k++) {
for (i = 0; i < N; i++) {
for (j = 0; j < N; j++) {
if (image[k][j][i] != j+i+k)
abort ();
}
}
}
return 0;
}
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
#include <stdarg.h> #include <stdarg.h>
#include "tree-vect.h" #include "tree-vect.h"
#define N 160 #define N 40
float image[N][N][N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); float image[N][N][N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
__attribute__ ((noinline)) void __attribute__ ((noinline)) void
......
/* { dg-require-effective-target vect_float } */
/* { dg-require-effective-target vect_intfloat_cvt } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 160
float image[2*N][2*N][N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
__attribute__ ((noinline)) void
foo (){
int i,j,k;
for (k=0; k<N; k++) {
for (i = 0; i < N; i++) {
for (j = 0; j < N; j+=2) {
image[k][j][i] = j+i+k;
}
}
}
}
int main (void)
{
check_vect ();
int i, j, k;
foo ();
for (k=0; k<N; k++) {
for (i = 0; i < N; i++) {
for (j = 0; j < N; j+=2) {
if (image[k][j][i] != j+i+k)
abort ();
}
}
}
return 0;
}
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
#include <stdarg.h> #include <stdarg.h>
#include "tree-vect.h" #include "tree-vect.h"
#define N 160 #define N 40
float image[2*N][2*N][N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); float image[2*N][2*N][N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
__attribute__ ((noinline)) void __attribute__ ((noinline)) void
......
/* { dg-require-effective-target vect_float } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 320
float image[N][N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
float out[N];
/* Outer-loop vectoriation. */
__attribute__ ((noinline)) void
foo (){
int i,j;
float diff;
for (i = 0; i < N; i++) {
diff = 0;
for (j = 0; j < N; j++) {
diff += image[j][i];
}
out[i]=diff;
}
}
int main (void)
{
check_vect ();
int i, j;
float diff;
for (i = 0; i < N; i++) {
for (j = 0; j < N; j++) {
image[i][j]=i+j;
}
}
foo ();
for (i = 0; i < N; i++) {
diff = 0;
for (j = 0; j < N; j++) {
diff += image[j][i];
}
if (out[i] != diff)
abort ();
}
return 0;
}
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
#include <stdarg.h> #include <stdarg.h>
#include "tree-vect.h" #include "tree-vect.h"
#define N 320 #define N 40
float image[N][N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); float image[N][N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
float out[N]; float out[N];
......
/* { dg-require-effective-target vect_float } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 320
float image[N][N+1] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
float out[N];
/* Outer-loop vectorization with misaliged accesses in the inner-loop. */
__attribute__ ((noinline)) void
foo (){
int i,j;
float diff;
for (i = 0; i < N; i++) {
diff = 0;
for (j = 0; j < N; j++) {
diff += image[j][i];
}
out[i]=diff;
}
}
int main (void)
{
check_vect ();
int i, j;
float diff;
for (i = 0; i < N; i++) {
for (j = 0; j < N; j++) {
image[i][j]=i+j;
}
}
foo ();
for (i = 0; i < N; i++) {
diff = 0;
for (j = 0; j < N; j++) {
diff += image[j][i];
}
if (out[i] != diff)
abort ();
}
return 0;
}
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail vect_no_align } } } */
/* { dg-final { scan-tree-dump-times "step doesn't divide the vector-size" 2 "vect" { xfail vect_multiple_sizes } } } */
/* { dg-final { scan-tree-dump-times "step doesn't divide the vector-size" 3 "vect" { target vect_multiple_sizes } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
#include <stdarg.h> #include <stdarg.h>
#include "tree-vect.h" #include "tree-vect.h"
#define N 320 #define N 40
float image[N][N+1] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); float image[N][N+1] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
float out[N]; float out[N];
......
/* { dg-do compile } */
#define N 512
#define M 1024
signed short in[N+M];
signed short coeff[M];
signed short out[N];
/* Outer-loop vectorization. */
void
foo (){
int i,j;
int diff;
for (i = 0; i < N; i++) {
diff = 0;
for (j = 0; j < M; j+=8) {
diff += in[j+i]*coeff[j];
}
out[i]=diff;
}
}
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { target { vect_widen_mult_hi_to_si && vect_pack_trunc } } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-do compile } */ /* { dg-do compile } */
#define N 512 #define N 40
#define M 1024 #define M 128
signed short in[N+M]; signed short in[N+M];
signed short coeff[M]; signed short coeff[M];
signed short out[N]; signed short out[N];
......
/* { dg-do compile } */
#define N 320
#define M 1024
signed short in[N+M];
signed short coeff[M];
int out[N];
/* Outer-loop vectorization. */
void
foo (){
int i,j;
int diff;
for (i = 0; i < N; i++) {
diff = 0;
for (j = 0; j < M; j+=8) {
diff += in[j+i]*coeff[j];
}
out[i]=diff;
}
}
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { target vect_widen_mult_hi_to_si } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-do compile } */ /* { dg-do compile } */
#define N 320 #define N 40
#define M 1024 #define M 128
signed short in[N+M]; signed short in[N+M];
signed short coeff[M]; signed short coeff[M];
int out[N]; int out[N];
......
/* { dg-do compile } */
#define N 320
#define M 1024
unsigned short in[N+M];
unsigned short coeff[M];
unsigned int out[N];
/* Outer-loop vectorization. */
void
foo (){
int i,j;
unsigned short diff;
for (i = 0; i < N; i++) {
diff = 0;
for (j = 0; j < M; j+=8) {
diff += in[j+i]*coeff[j];
}
out[i]=diff;
}
}
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { target { vect_short_mult && { ! vect_no_align } } } } } */
/* { dg-final { scan-tree-dump-times "zero step in outer loop." 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-do compile } */ /* { dg-do compile } */
#define N 320 #define N 40
#define M 1024 #define M 128
unsigned short in[N+M]; unsigned short in[N+M];
unsigned short coeff[M]; unsigned short coeff[M];
unsigned int out[N]; unsigned int out[N];
......
/* { dg-require-effective-target vect_float } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 320
#define M 1024
float in[N+M];
float out[N];
/* Outer-loop vectorization. */
__attribute__ ((noinline)) void
foo (){
int i,j;
float diff;
for (i = 0; i < N; i++) {
diff = 0;
for (j = 0; j < M; j+=4) {
diff += in[j+i];
}
out[i]=diff;
}
}
int main (void)
{
check_vect ();
int i, j;
float diff;
for (i = 0; i < N; i++)
in[i] = i;
foo ();
for (i = 0; i < N; i++) {
diff = 0;
for (j = 0; j < M; j+=4) {
diff += in[j+i];
}
if (out[i] != diff)
abort ();
}
return 0;
}
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -3,8 +3,8 @@ ...@@ -3,8 +3,8 @@
#include <stdarg.h> #include <stdarg.h>
#include "tree-vect.h" #include "tree-vect.h"
#define N 320 #define N 40
#define M 1024 #define M 128
float in[N+M]; float in[N+M];
float out[N]; float out[N];
......
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 320
#define M 1024
unsigned short in[N+M];
unsigned int out[N];
/* Outer-loop vectorization. */
/* Not vectorized due to multiple-types in the inner-loop. */
__attribute__ ((noinline)) unsigned int
foo (){
int i,j;
unsigned int diff;
unsigned int s=0;
for (i = 0; i < N; i++) {
diff = 0;
for (j = 0; j < M; j+=8) {
diff += in[j+i];
}
s+=((unsigned short)diff>>3);
}
return s;
}
int main (void)
{
int i, j;
unsigned int diff;
unsigned int s=0,sum=0;
check_vect ();
for (i = 0; i < N+M; i++) {
in[i] = i;
}
sum=foo ();
for (i = 0; i < N; i++) {
diff = 0;
for (j = 0; j < M; j+=8) {
diff += in[j+i];
}
s += ((unsigned short)diff>>3);
}
if (s != sum)
abort ();
return 0;
}
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -2,8 +2,8 @@ ...@@ -2,8 +2,8 @@
#include <stdarg.h> #include <stdarg.h>
#include "tree-vect.h" #include "tree-vect.h"
#define N 320 #define N 40
#define M 1024 #define M 128
unsigned short in[N+M]; unsigned short in[N+M];
unsigned int out[N]; unsigned int out[N];
......
/* { dg-require-effective-target vect_float } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 80
#define M 256
float in[N+M];
float coeff[M];
float out[N];
float fir_out[N];
/* Should be vectorized. Fixed misaligment in the inner-loop. */
__attribute__ ((noinline))
void foo (){
int i,j,k;
float diff;
for (i = 0; i < N; i++) {
out[i] = 0;
}
for (k = 0; k < 4; k++) {
for (i = 0; i < N; i++) {
diff = 0;
for (j = k; j < M; j+=4) {
diff += in[j+i]*coeff[j];
}
out[i] += diff;
}
}
}
/* Vectorized. Changing misalignment in the inner-loop. */
__attribute__ ((noinline))
void fir (){
int i,j,k;
float diff;
for (i = 0; i < N; i++) {
diff = 0;
for (j = 0; j < M; j++) {
diff += in[j+i]*coeff[j];
}
fir_out[i] = diff;
}
}
int main (void)
{
check_vect ();
int i, j;
float diff;
for (i = 0; i < M; i++)
coeff[i] = i;
for (i = 0; i < N+M; i++)
in[i] = i;
foo ();
fir ();
for (i = 0; i < N; i++) {
if (out[i] != fir_out[i])
abort ();
}
return 0;
}
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 2 "vect" { xfail vect_no_align } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_float } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 80
#define M 128
float in[N+M];
float coeff[M];
float out[N];
float fir_out[N];
/* Vectorized. Fixed misaligment in the inner-loop. */
__attribute__ ((noinline))
void foo (){
int i,j,k;
float diff;
for (i = 0; i < N; i++) {
out[i] = 0;
}
for (k = 0; k < 4; k++) {
for (i = 0; i < N; i++) {
diff = 0;
j = k;
do {
diff += in[j+i]*coeff[j];
j+=4;
} while (j < M);
out[i] += diff;
}
}
}
/* Vectorized. Changing misalignment in the inner-loop. */
__attribute__ ((noinline))
void fir (){
int i,j,k;
float diff;
for (i = 0; i < N; i++) {
diff = 0;
for (j = 0; j < M; j++) {
diff += in[j+i]*coeff[j];
}
fir_out[i] = diff;
}
}
int main (void)
{
check_vect ();
int i, j;
float diff;
for (i = 0; i < M; i++)
coeff[i] = i;
for (i = 0; i < N+M; i++)
in[i] = i;
foo ();
fir ();
for (i = 0; i < N; i++) {
if (out[i] != fir_out[i])
abort ();
}
return 0;
}
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 2 "vect" { xfail vect_no_align } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -3,8 +3,8 @@ ...@@ -3,8 +3,8 @@
#include <stdarg.h> #include <stdarg.h>
#include "tree-vect.h" #include "tree-vect.h"
#define N 80 #define N 40
#define M 128 #define M 64
float in[N+M]; float in[N+M];
float coeff[M]; float coeff[M];
float out[N]; float out[N];
......
...@@ -3,8 +3,8 @@ ...@@ -3,8 +3,8 @@
#include <stdarg.h> #include <stdarg.h>
#include "tree-vect.h" #include "tree-vect.h"
#define N 80 #define N 40
#define M 256 #define M 128
float in[N+M]; float in[N+M];
float coeff[M]; float coeff[M];
float out[N]; float out[N];
......
/* { dg-require-effective-target vect_int } */
/* { dg-require-effective-target vect_shift } */
#include <stdlib.h>
#include <stdarg.h>
#include "tree-vect.h"
#define N 512
/* Modified rgb to rgb conversion from FFmpeg. */
__attribute__ ((noinline)) void
foo (unsigned char *src, unsigned char *dst)
{
unsigned char *s = src;
unsigned short *d = (unsigned short *)dst;
int i;
for (i = 0; i < N/4; i++)
{
const int b = *s++;
const int g = *s++;
const int r = *s++;
const int a = *s++;
*d = ((b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8) | (a>>5));
d++;
}
s = src;
d = (unsigned short *)dst;
for (i = 0; i < N/4; i++)
{
const int b = *s++;
const int g = *s++;
const int r = *s++;
const int a = *s++;
if (*d != ((b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8) | (a>>5)))
abort ();
d++;
}
}
int main (void)
{
int i;
unsigned char in[N], out[N];
check_vect ();
for (i = 0; i < N; i++)
{
in[i] = i;
out[i] = 255;
__asm__ volatile ("");
}
foo (in, out);
return 0;
}
/* { dg-final { scan-tree-dump-times "vect_recog_over_widening_pattern: detected" 4 "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
#include <stdarg.h> #include <stdarg.h>
#include "tree-vect.h" #include "tree-vect.h"
#define N 512 #define N 64
/* Modified rgb to rgb conversion from FFmpeg. */ /* Modified rgb to rgb conversion from FFmpeg. */
__attribute__ ((noinline)) void __attribute__ ((noinline)) void
......
/* { dg-require-effective-target vect_int } */
/* { dg-require-effective-target vect_shift } */
#include <stdlib.h>
#include <stdarg.h>
#include "tree-vect.h"
#define N 512
/* Modified rgb to rgb conversion from FFmpeg. */
__attribute__ ((noinline)) void
foo (unsigned char *src, unsigned char *dst)
{
unsigned char *s = src;
int *d = (int *)dst;
int i;
for (i = 0; i < N/4; i++)
{
const int b = *s++;
const int g = *s++;
const int r = *s++;
const int a = *s++;
*d = ((b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8) | (a>>5));
d++;
}
s = src;
d = (int *)dst;
for (i = 0; i < N/4; i++)
{
const int b = *s++;
const int g = *s++;
const int r = *s++;
const int a = *s++;
if (*d != ((b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8) | (a>>5)))
abort ();
d++;
}
}
int main (void)
{
int i;
unsigned char in[N], out[N];
check_vect ();
for (i = 0; i < N; i++)
{
in[i] = i;
out[i] = 255;
__asm__ volatile ("");
}
foo (in, out);
return 0;
}
/* Final value stays in int, so no over-widening is detected at the moment. */
/* { dg-final { scan-tree-dump-times "vect_recog_over_widening_pattern: detected" 0 "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment