Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
R
riscv-gcc-1
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
lvzhengyang
riscv-gcc-1
Commits
e9619a8a
Commit
e9619a8a
authored
Dec 13, 2011
by
Richard Henderson
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
arm: Implement vec_perm and vec_perm_const for NEON.
From-SVN: r182299
parent
8c75d41b
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
619 additions
and
4 deletions
+619
-4
gcc/config/arm/arm-protos.h
+3
-0
gcc/config/arm/arm.c
+525
-1
gcc/config/arm/neon.md
+59
-0
gcc/config/arm/vec-common.md
+26
-0
gcc/testsuite/lib/target-supports.exp
+6
-3
No files found.
gcc/config/arm/arm-protos.h
View file @
e9619a8a
...
...
@@ -244,4 +244,7 @@ extern const struct tune_params *current_tune;
extern
int
vfp3_const_double_for_fract_bits
(
rtx
);
#endif
/* RTX_CODE */
extern
void
arm_expand_vec_perm
(
rtx
target
,
rtx
op0
,
rtx
op1
,
rtx
sel
);
extern
bool
arm_expand_vec_perm_const
(
rtx
target
,
rtx
op0
,
rtx
op1
,
rtx
sel
);
#endif
/* ! GCC_ARM_PROTOS_H */
gcc/config/arm/arm.c
View file @
e9619a8a
...
...
@@ -267,6 +267,9 @@ static unsigned int arm_autovectorize_vector_sizes (void);
static
int
arm_default_branch_cost
(
bool
,
bool
);
static
int
arm_cortex_a5_branch_cost
(
bool
,
bool
);
static
bool
arm_vectorize_vec_perm_const_ok
(
enum
machine_mode
vmode
,
const
unsigned
char
*
sel
);
/* Table of machine attributes. */
static
const
struct
attribute_spec
arm_attribute_table
[]
=
...
...
@@ -604,6 +607,10 @@ static const struct attribute_spec arm_attribute_table[] =
#define TARGET_PREFERRED_RENAME_CLASS \
arm_preferred_rename_class
#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
#define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
arm_vectorize_vec_perm_const_ok
struct
gcc_target
targetm
=
TARGET_INITIALIZER
;
/* Obstack for minipool constant handling. */
...
...
@@ -25065,5 +25072,522 @@ vfp3_const_double_for_fract_bits (rtx operand)
return
0
;
}
#include "gt-arm.h"
#define MAX_VECT_LEN 16
struct
expand_vec_perm_d
{
rtx
target
,
op0
,
op1
;
unsigned
char
perm
[
MAX_VECT_LEN
];
enum
machine_mode
vmode
;
unsigned
char
nelt
;
bool
one_vector_p
;
bool
testing_p
;
};
/* Generate a variable permutation. */
static
void
arm_expand_vec_perm_1
(
rtx
target
,
rtx
op0
,
rtx
op1
,
rtx
sel
)
{
enum
machine_mode
vmode
=
GET_MODE
(
target
);
bool
one_vector_p
=
rtx_equal_p
(
op0
,
op1
);
gcc_checking_assert
(
vmode
==
V8QImode
||
vmode
==
V16QImode
);
gcc_checking_assert
(
GET_MODE
(
op0
)
==
vmode
);
gcc_checking_assert
(
GET_MODE
(
op1
)
==
vmode
);
gcc_checking_assert
(
GET_MODE
(
sel
)
==
vmode
);
gcc_checking_assert
(
TARGET_NEON
);
if
(
one_vector_p
)
{
if
(
vmode
==
V8QImode
)
emit_insn
(
gen_neon_vtbl1v8qi
(
target
,
op0
,
sel
));
else
emit_insn
(
gen_neon_vtbl1v16qi
(
target
,
op0
,
sel
));
}
else
{
enum
machine_mode
mode1
,
mode2
;
rtx
pair
,
part
;
if
(
vmode
==
V8QImode
)
mode1
=
DImode
,
mode2
=
TImode
;
else
mode1
=
TImode
,
mode2
=
OImode
;
pair
=
gen_reg_rtx
(
mode2
);
emit_insn
(
gen_rtx_CLOBBER
(
VOIDmode
,
pair
));
part
=
simplify_gen_subreg
(
mode1
,
pair
,
mode2
,
subreg_lowpart_offset
(
mode1
,
mode2
));
emit_move_insn
(
part
,
gen_lowpart
(
mode1
,
op0
));
part
=
simplify_gen_subreg
(
mode1
,
pair
,
mode2
,
subreg_highpart_offset
(
mode1
,
mode2
));
emit_move_insn
(
part
,
gen_lowpart
(
mode1
,
op1
));
if
(
vmode
==
V8QImode
)
emit_insn
(
gen_neon_vtbl2v8qi
(
target
,
pair
,
sel
));
else
emit_insn
(
gen_neon_vtbl2v16qi
(
target
,
pair
,
sel
));
}
}
void
arm_expand_vec_perm
(
rtx
target
,
rtx
op0
,
rtx
op1
,
rtx
sel
)
{
enum
machine_mode
vmode
=
GET_MODE
(
target
);
unsigned
int
i
,
nelt
=
GET_MODE_NUNITS
(
vmode
);
bool
one_vector_p
=
rtx_equal_p
(
op0
,
op1
);
rtx
rmask
[
MAX_VECT_LEN
],
mask
;
/* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
numbering of elements for big-endian, we must reverse the order. */
gcc_checking_assert
(
!
BYTES_BIG_ENDIAN
);
/* The VTBL instruction does not use a modulo index, so we must take care
of that ourselves. */
mask
=
GEN_INT
(
one_vector_p
?
nelt
-
1
:
2
*
nelt
-
1
);
for
(
i
=
0
;
i
<
nelt
;
++
i
)
rmask
[
i
]
=
mask
;
mask
=
gen_rtx_CONST_VECTOR
(
vmode
,
gen_rtvec_v
(
nelt
,
rmask
));
sel
=
expand_simple_binop
(
vmode
,
AND
,
sel
,
mask
,
NULL
,
0
,
OPTAB_LIB_WIDEN
);
arm_expand_vec_perm_1
(
target
,
op0
,
op1
,
sel
);
}
/* Generate or test for an insn that supports a constant permutation. */
/* Recognize patterns for the VUZP insns. */
static
bool
arm_evpc_neon_vuzp
(
struct
expand_vec_perm_d
*
d
)
{
unsigned
int
i
,
odd
,
mask
,
nelt
=
d
->
nelt
;
rtx
out0
,
out1
,
in0
,
in1
,
x
;
rtx
(
*
gen
)(
rtx
,
rtx
,
rtx
,
rtx
);
if
(
GET_MODE_UNIT_SIZE
(
d
->
vmode
)
>=
8
)
return
false
;
/* Note that these are little-endian tests. Adjust for big-endian later. */
if
(
d
->
perm
[
0
]
==
0
)
odd
=
0
;
else
if
(
d
->
perm
[
0
]
==
1
)
odd
=
1
;
else
return
false
;
mask
=
(
d
->
one_vector_p
?
nelt
-
1
:
2
*
nelt
-
1
);
for
(
i
=
0
;
i
<
nelt
;
i
++
)
{
unsigned
elt
=
(
i
*
2
+
odd
)
&
mask
;
if
(
d
->
perm
[
i
]
!=
elt
)
return
false
;
}
/* Success! */
if
(
d
->
testing_p
)
return
true
;
switch
(
d
->
vmode
)
{
case
V16QImode
:
gen
=
gen_neon_vuzpv16qi_internal
;
break
;
case
V8QImode
:
gen
=
gen_neon_vuzpv8qi_internal
;
break
;
case
V8HImode
:
gen
=
gen_neon_vuzpv8hi_internal
;
break
;
case
V4HImode
:
gen
=
gen_neon_vuzpv4hi_internal
;
break
;
case
V4SImode
:
gen
=
gen_neon_vuzpv4si_internal
;
break
;
case
V2SImode
:
gen
=
gen_neon_vuzpv2si_internal
;
break
;
case
V2SFmode
:
gen
=
gen_neon_vuzpv2sf_internal
;
break
;
case
V4SFmode
:
gen
=
gen_neon_vuzpv4sf_internal
;
break
;
default:
gcc_unreachable
();
}
in0
=
d
->
op0
;
in1
=
d
->
op1
;
if
(
BYTES_BIG_ENDIAN
)
{
x
=
in0
,
in0
=
in1
,
in1
=
x
;
odd
=
!
odd
;
}
out0
=
d
->
target
;
out1
=
gen_reg_rtx
(
d
->
vmode
);
if
(
odd
)
x
=
out0
,
out0
=
out1
,
out1
=
x
;
emit_insn
(
gen
(
out0
,
in0
,
in1
,
out1
));
return
true
;
}
/* Recognize patterns for the VZIP insns. */
static
bool
arm_evpc_neon_vzip
(
struct
expand_vec_perm_d
*
d
)
{
unsigned
int
i
,
high
,
mask
,
nelt
=
d
->
nelt
;
rtx
out0
,
out1
,
in0
,
in1
,
x
;
rtx
(
*
gen
)(
rtx
,
rtx
,
rtx
,
rtx
);
if
(
GET_MODE_UNIT_SIZE
(
d
->
vmode
)
>=
8
)
return
false
;
/* Note that these are little-endian tests. Adjust for big-endian later. */
high
=
nelt
/
2
;
if
(
d
->
perm
[
0
]
==
high
)
;
else
if
(
d
->
perm
[
0
]
==
0
)
high
=
0
;
else
return
false
;
mask
=
(
d
->
one_vector_p
?
nelt
-
1
:
2
*
nelt
-
1
);
for
(
i
=
0
;
i
<
nelt
/
2
;
i
++
)
{
unsigned
elt
=
(
i
+
high
)
&
mask
;
if
(
d
->
perm
[
i
*
2
]
!=
elt
)
return
false
;
elt
=
(
elt
+
nelt
)
&
mask
;
if
(
d
->
perm
[
i
*
2
+
1
]
!=
elt
)
return
false
;
}
/* Success! */
if
(
d
->
testing_p
)
return
true
;
switch
(
d
->
vmode
)
{
case
V16QImode
:
gen
=
gen_neon_vzipv16qi_internal
;
break
;
case
V8QImode
:
gen
=
gen_neon_vzipv8qi_internal
;
break
;
case
V8HImode
:
gen
=
gen_neon_vzipv8hi_internal
;
break
;
case
V4HImode
:
gen
=
gen_neon_vzipv4hi_internal
;
break
;
case
V4SImode
:
gen
=
gen_neon_vzipv4si_internal
;
break
;
case
V2SImode
:
gen
=
gen_neon_vzipv2si_internal
;
break
;
case
V2SFmode
:
gen
=
gen_neon_vzipv2sf_internal
;
break
;
case
V4SFmode
:
gen
=
gen_neon_vzipv4sf_internal
;
break
;
default:
gcc_unreachable
();
}
in0
=
d
->
op0
;
in1
=
d
->
op1
;
if
(
BYTES_BIG_ENDIAN
)
{
x
=
in0
,
in0
=
in1
,
in1
=
x
;
high
=
!
high
;
}
out0
=
d
->
target
;
out1
=
gen_reg_rtx
(
d
->
vmode
);
if
(
high
)
x
=
out0
,
out0
=
out1
,
out1
=
x
;
emit_insn
(
gen
(
out0
,
in0
,
in1
,
out1
));
return
true
;
}
/* Recognize patterns for the VREV insns. */
static
bool
arm_evpc_neon_vrev
(
struct
expand_vec_perm_d
*
d
)
{
unsigned
int
i
,
j
,
diff
,
nelt
=
d
->
nelt
;
rtx
(
*
gen
)(
rtx
,
rtx
,
rtx
);
if
(
!
d
->
one_vector_p
)
return
false
;
diff
=
d
->
perm
[
0
];
switch
(
diff
)
{
case
7
:
switch
(
d
->
vmode
)
{
case
V16QImode
:
gen
=
gen_neon_vrev64v16qi
;
break
;
case
V8QImode
:
gen
=
gen_neon_vrev64v8qi
;
break
;
default:
return
false
;
}
break
;
case
3
:
switch
(
d
->
vmode
)
{
case
V16QImode
:
gen
=
gen_neon_vrev32v16qi
;
break
;
case
V8QImode
:
gen
=
gen_neon_vrev32v8qi
;
break
;
case
V8HImode
:
gen
=
gen_neon_vrev64v8hi
;
break
;
case
V4HImode
:
gen
=
gen_neon_vrev64v4hi
;
break
;
default:
return
false
;
}
break
;
case
1
:
switch
(
d
->
vmode
)
{
case
V16QImode
:
gen
=
gen_neon_vrev16v16qi
;
break
;
case
V8QImode
:
gen
=
gen_neon_vrev16v8qi
;
break
;
case
V8HImode
:
gen
=
gen_neon_vrev32v8hi
;
break
;
case
V4HImode
:
gen
=
gen_neon_vrev32v4hi
;
break
;
case
V4SImode
:
gen
=
gen_neon_vrev64v4si
;
break
;
case
V2SImode
:
gen
=
gen_neon_vrev64v2si
;
break
;
case
V4SFmode
:
gen
=
gen_neon_vrev64v4sf
;
break
;
case
V2SFmode
:
gen
=
gen_neon_vrev64v2sf
;
break
;
default:
return
false
;
}
break
;
default:
return
false
;
}
for
(
i
=
0
;
i
<
nelt
;
i
+=
diff
)
for
(
j
=
0
;
j
<=
diff
;
j
+=
1
)
if
(
d
->
perm
[
i
+
j
]
!=
i
+
diff
-
j
)
return
false
;
/* Success! */
if
(
d
->
testing_p
)
return
true
;
/* ??? The third operand is an artifact of the builtin infrastructure
and is ignored by the actual instruction. */
emit_insn
(
gen
(
d
->
target
,
d
->
op0
,
const0_rtx
));
return
true
;
}
/* Recognize patterns for the VTRN insns. */
static
bool
arm_evpc_neon_vtrn
(
struct
expand_vec_perm_d
*
d
)
{
unsigned
int
i
,
odd
,
mask
,
nelt
=
d
->
nelt
;
rtx
out0
,
out1
,
in0
,
in1
,
x
;
rtx
(
*
gen
)(
rtx
,
rtx
,
rtx
,
rtx
);
if
(
GET_MODE_UNIT_SIZE
(
d
->
vmode
)
>=
8
)
return
false
;
/* Note that these are little-endian tests. Adjust for big-endian later. */
if
(
d
->
perm
[
0
]
==
0
)
odd
=
0
;
else
if
(
d
->
perm
[
0
]
==
1
)
odd
=
1
;
else
return
false
;
mask
=
(
d
->
one_vector_p
?
nelt
-
1
:
2
*
nelt
-
1
);
for
(
i
=
0
;
i
<
nelt
;
i
+=
2
)
{
if
(
d
->
perm
[
i
]
!=
i
+
odd
)
return
false
;
if
(
d
->
perm
[
i
+
1
]
!=
((
i
+
nelt
+
odd
)
&
mask
))
return
false
;
}
/* Success! */
if
(
d
->
testing_p
)
return
true
;
switch
(
d
->
vmode
)
{
case
V16QImode
:
gen
=
gen_neon_vtrnv16qi_internal
;
break
;
case
V8QImode
:
gen
=
gen_neon_vtrnv8qi_internal
;
break
;
case
V8HImode
:
gen
=
gen_neon_vtrnv8hi_internal
;
break
;
case
V4HImode
:
gen
=
gen_neon_vtrnv4hi_internal
;
break
;
case
V4SImode
:
gen
=
gen_neon_vtrnv4si_internal
;
break
;
case
V2SImode
:
gen
=
gen_neon_vtrnv2si_internal
;
break
;
case
V2SFmode
:
gen
=
gen_neon_vtrnv2sf_internal
;
break
;
case
V4SFmode
:
gen
=
gen_neon_vtrnv4sf_internal
;
break
;
default:
gcc_unreachable
();
}
in0
=
d
->
op0
;
in1
=
d
->
op1
;
if
(
BYTES_BIG_ENDIAN
)
{
x
=
in0
,
in0
=
in1
,
in1
=
x
;
odd
=
!
odd
;
}
out0
=
d
->
target
;
out1
=
gen_reg_rtx
(
d
->
vmode
);
if
(
odd
)
x
=
out0
,
out0
=
out1
,
out1
=
x
;
emit_insn
(
gen
(
out0
,
in0
,
in1
,
out1
));
return
true
;
}
/* The NEON VTBL instruction is a fully variable permuation that's even
stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
is mask the index operand as VEC_PERM_EXPR requires. Therefore we
can do slightly better by expanding this as a constant where we don't
have to apply a mask. */
static
bool
arm_evpc_neon_vtbl
(
struct
expand_vec_perm_d
*
d
)
{
rtx
rperm
[
MAX_VECT_LEN
],
sel
;
enum
machine_mode
vmode
=
d
->
vmode
;
unsigned
int
i
,
nelt
=
d
->
nelt
;
/* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
numbering of elements for big-endian, we must reverse the order. */
if
(
BYTES_BIG_ENDIAN
)
return
false
;
if
(
d
->
testing_p
)
return
true
;
/* Generic code will try constant permutation twice. Once with the
original mode and again with the elements lowered to QImode.
So wait and don't do the selector expansion ourselves. */
if
(
vmode
!=
V8QImode
&&
vmode
!=
V16QImode
)
return
false
;
for
(
i
=
0
;
i
<
nelt
;
++
i
)
rperm
[
i
]
=
GEN_INT
(
d
->
perm
[
i
]);
sel
=
gen_rtx_CONST_VECTOR
(
vmode
,
gen_rtvec_v
(
nelt
,
rperm
));
sel
=
force_reg
(
vmode
,
sel
);
arm_expand_vec_perm_1
(
d
->
target
,
d
->
op0
,
d
->
op1
,
sel
);
return
true
;
}
static
bool
arm_expand_vec_perm_const_1
(
struct
expand_vec_perm_d
*
d
)
{
/* The pattern matching functions above are written to look for a small
number to begin the sequence (0, 1, N/2). If we begin with an index
from the second operand, we can swap the operands. */
if
(
d
->
perm
[
0
]
>=
d
->
nelt
)
{
unsigned
i
,
nelt
=
d
->
nelt
;
rtx
x
;
for
(
i
=
0
;
i
<
nelt
;
++
i
)
d
->
perm
[
i
]
=
(
d
->
perm
[
i
]
+
nelt
)
&
(
2
*
nelt
-
1
);
x
=
d
->
op0
;
d
->
op0
=
d
->
op1
;
d
->
op1
=
x
;
}
if
(
TARGET_NEON
)
{
if
(
arm_evpc_neon_vuzp
(
d
))
return
true
;
if
(
arm_evpc_neon_vzip
(
d
))
return
true
;
if
(
arm_evpc_neon_vrev
(
d
))
return
true
;
if
(
arm_evpc_neon_vtrn
(
d
))
return
true
;
return
arm_evpc_neon_vtbl
(
d
);
}
return
false
;
}
/* Expand a vec_perm_const pattern. */
bool
arm_expand_vec_perm_const
(
rtx
target
,
rtx
op0
,
rtx
op1
,
rtx
sel
)
{
struct
expand_vec_perm_d
d
;
int
i
,
nelt
,
which
;
d
.
target
=
target
;
d
.
op0
=
op0
;
d
.
op1
=
op1
;
d
.
vmode
=
GET_MODE
(
target
);
gcc_assert
(
VECTOR_MODE_P
(
d
.
vmode
));
d
.
nelt
=
nelt
=
GET_MODE_NUNITS
(
d
.
vmode
);
d
.
testing_p
=
false
;
for
(
i
=
which
=
0
;
i
<
nelt
;
++
i
)
{
rtx
e
=
XVECEXP
(
sel
,
0
,
i
);
int
ei
=
INTVAL
(
e
)
&
(
2
*
nelt
-
1
);
which
|=
(
ei
<
nelt
?
1
:
2
);
d
.
perm
[
i
]
=
ei
;
}
switch
(
which
)
{
default:
gcc_unreachable
();
case
3
:
d
.
one_vector_p
=
false
;
if
(
!
rtx_equal_p
(
op0
,
op1
))
break
;
/* The elements of PERM do not suggest that only the first operand
is used, but both operands are identical. Allow easier matching
of the permutation by folding the permutation into the single
input vector. */
/* FALLTHRU */
case
2
:
for
(
i
=
0
;
i
<
nelt
;
++
i
)
d
.
perm
[
i
]
&=
nelt
-
1
;
d
.
op0
=
op1
;
d
.
one_vector_p
=
true
;
break
;
case
1
:
d
.
op1
=
op0
;
d
.
one_vector_p
=
true
;
break
;
}
return
arm_expand_vec_perm_const_1
(
&
d
);
}
/* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
static
bool
arm_vectorize_vec_perm_const_ok
(
enum
machine_mode
vmode
,
const
unsigned
char
*
sel
)
{
struct
expand_vec_perm_d
d
;
unsigned
int
i
,
nelt
,
which
;
bool
ret
;
d
.
vmode
=
vmode
;
d
.
nelt
=
nelt
=
GET_MODE_NUNITS
(
d
.
vmode
);
d
.
testing_p
=
true
;
memcpy
(
d
.
perm
,
sel
,
nelt
);
/* Categorize the set of elements in the selector. */
for
(
i
=
which
=
0
;
i
<
nelt
;
++
i
)
{
unsigned
char
e
=
d
.
perm
[
i
];
gcc_assert
(
e
<
2
*
nelt
);
which
|=
(
e
<
nelt
?
1
:
2
);
}
/* For all elements from second vector, fold the elements to first. */
if
(
which
==
2
)
for
(
i
=
0
;
i
<
nelt
;
++
i
)
d
.
perm
[
i
]
-=
nelt
;
/* Check whether the mask can be applied to the vector type. */
d
.
one_vector_p
=
(
which
!=
3
);
d
.
target
=
gen_raw_REG
(
d
.
vmode
,
LAST_VIRTUAL_REGISTER
+
1
);
d
.
op1
=
d
.
op0
=
gen_raw_REG
(
d
.
vmode
,
LAST_VIRTUAL_REGISTER
+
2
);
if
(
!
d
.
one_vector_p
)
d
.
op1
=
gen_raw_REG
(
d
.
vmode
,
LAST_VIRTUAL_REGISTER
+
3
);
start_sequence
();
ret
=
arm_expand_vec_perm_const_1
(
&
d
);
end_sequence
();
return
ret
;
}
#include "gt-arm.h"
gcc/config/arm/neon.md
View file @
e9619a8a
...
...
@@ -3876,6 +3876,65 @@
[
(set_attr "neon_type" "neon_bp_3cycle")
]
)
;; These two are used by the vec_perm infrastructure for V16QImode.
(define_insn_and_split "neon_vtbl1v16qi"
[
(set (match_operand:V16QI 0 "s_register_operand" "=w")
(unspec:V16QI
[
(match_operand:V16QI 1 "s_register_operand" "w")
(match_operand:V16QI 2 "s_register_operand" "w")]
UNSPEC_VTBL))]
"TARGET_NEON"
"#"
"&& reload_completed"
[
(const_int 0)
]
{
rtx op0, op1, op2, part0, part2;
unsigned ofs;
op0 = operands
[
0
]
;
op1 = gen_lowpart (TImode, operands
[
1
]
);
op2 = operands
[
2
]
;
ofs = subreg_lowpart_offset (V8QImode, V16QImode);
part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
ofs = subreg_highpart_offset (V8QImode, V16QImode);
part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
DONE;
})
(define_insn_and_split "neon_vtbl2v16qi"
[
(set (match_operand:V16QI 0 "s_register_operand" "=w")
(unspec:V16QI
[
(match_operand:OI 1 "s_register_operand" "w")
(match_operand:V16QI 2 "s_register_operand" "w")]
UNSPEC_VTBL))]
"TARGET_NEON"
"#"
"&& reload_completed"
[
(const_int 0)
]
{
rtx op0, op1, op2, part0, part2;
unsigned ofs;
op0 = operands
[
0
]
;
op1 = operands
[
1
]
;
op2 = operands
[
2
]
;
ofs = subreg_lowpart_offset (V8QImode, V16QImode);
part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
ofs = subreg_highpart_offset (V8QImode, V16QImode);
part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
DONE;
})
(define_insn "neon_vtbx1v8qi"
[
(set (match_operand:V8QI 0 "s_register_operand" "=w")
(unspec:V8QI
[
(match_operand:V8QI 1 "s_register_operand" "0")
...
...
gcc/config/arm/vec-common.md
View file @
e9619a8a
...
...
@@ -108,3 +108,29 @@
|| (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (
<MODE>
mode))"
{
})
(define_expand "vec_perm_const
<mode>
"
[
(match_operand:VALL 0 "s_register_operand" "")
(match_operand:VALL 1 "s_register_operand" "")
(match_operand:VALL 2 "s_register_operand" "")
(match_operand:
<V
_cmp_result
>
3 "" "")]
"TARGET_NEON
|| (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (
<MODE>
mode))"
{
if (arm_expand_vec_perm_const (operands
[
0
]
, operands
[
1
]
,
operands
[
2
]
, operands
[
3
]
))
DONE;
else
FAIL;
})
(define_expand "vec_perm
<mode>
"
[
(match_operand:VE 0 "s_register_operand" "")
(match_operand:VE 1 "s_register_operand" "")
(match_operand:VE 2 "s_register_operand" "")
(match_operand:VE 3 "s_register_operand" "")]
"TARGET_NEON && !BYTES_BIG_ENDIAN"
{
arm_expand_vec_perm (operands
[
0
]
, operands
[
1
]
, operands
[
2
]
, operands
[
3
]
);
DONE;
})
gcc/testsuite/lib/target-supports.exp
View file @
e9619a8a
...
...
@@ -2725,7 +2725,8 @@ proc check_effective_target_vect_perm { } {
verbose "check_effective_target_vect_perm: using cached result" 2
} else {
set et_vect_perm_saved 0
if { [istarget powerpc*-*-*]
if { [is-effective-target arm_neon_ok]
|| [istarget powerpc*-*-*]
|| [istarget spu-*-*]
|| [istarget i?86-*-*]
|| [istarget x86_64-*-*] } {
...
...
@@ -2748,7 +2749,8 @@ proc check_effective_target_vect_perm_byte { } {
verbose "check_effective_target_vect_perm_byte: using cached result" 2
} else {
set et_vect_perm_byte_saved 0
if { [istarget powerpc*-*-*]
if { [is-effective-target arm_neon_ok]
|| [istarget powerpc*-*-*]
|| [istarget spu-*-*] } {
set et_vect_perm_byte_saved 1
}
...
...
@@ -2769,7 +2771,8 @@ proc check_effective_target_vect_perm_short { } {
verbose "check_effective_target_vect_perm_short: using cached result" 2
} else {
set et_vect_perm_short_saved 0
if { [istarget powerpc*-*-*]
if { [is-effective-target arm_neon_ok]
|| [istarget powerpc*-*-*]
|| [istarget spu-*-*] } {
set et_vect_perm_short_saved 1
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment