Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
R
riscv-gcc-1
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
lvzhengyang
riscv-gcc-1
Commits
e75b54a2
Commit
e75b54a2
authored
13 years ago
by
Richard Earnshaw
Committed by
Richard Earnshaw
13 years ago
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
* lex.c (search_line_fast): Provide Neon-optimized version for ARM.
From-SVN: r185702
parent
b31a2c5a
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
67 additions
and
0 deletions
+67
-0
libcpp/ChangeLog
+4
-0
libcpp/lex.c
+63
-0
No files found.
libcpp/ChangeLog
View file @
e75b54a2
2012
-
03
-
22
Richard
Earnshaw
<
rearnsha
@
arm.com
>
*
lex.c
(
search_line_fast
):
Provide
Neon
-optimized
version
for
ARM.
2012
-
03
-
14
Rainer
Orth
<
ro
@
CeBiTec.Uni
-Bielefeld.DE
>
*
lex.c
:
Remove
Solaris
8
reference.
...
...
This diff is collapsed.
Click to expand it.
libcpp/lex.c
View file @
e75b54a2
...
...
@@ -629,6 +629,69 @@ search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
}
}
#elif defined (__ARM_NEON__)
#include "arm_neon.h"
static
const
uchar
*
search_line_fast
(
const
uchar
*
s
,
const
uchar
*
end
ATTRIBUTE_UNUSED
)
{
const
uint8x16_t
repl_nl
=
vdupq_n_u8
(
'\n'
);
const
uint8x16_t
repl_cr
=
vdupq_n_u8
(
'\r'
);
const
uint8x16_t
repl_bs
=
vdupq_n_u8
(
'\\'
);
const
uint8x16_t
repl_qm
=
vdupq_n_u8
(
'?'
);
const
uint8x16_t
xmask
=
(
uint8x16_t
)
vdupq_n_u64
(
0x8040201008040201ULL
);
unsigned
int
misalign
,
found
,
mask
;
const
uint8_t
*
p
;
uint8x16_t
data
;
/* Align the source pointer. */
misalign
=
(
uintptr_t
)
s
&
15
;
p
=
(
const
uint8_t
*
)((
uintptr_t
)
s
&
-
16
);
data
=
vld1q_u8
(
p
);
/* Create a mask for the bytes that are valid within the first
16-byte block. The Idea here is that the AND with the mask
within the loop is "free", since we need some AND or TEST
insn in order to set the flags for the branch anyway. */
mask
=
(
-
1u
<<
misalign
)
&
0xffff
;
/* Main loop, processing 16 bytes at a time. */
goto
start
;
do
{
uint8x8_t
l
;
uint16x4_t
m
;
uint32x2_t
n
;
uint8x16_t
t
,
u
,
v
,
w
;
p
+=
16
;
data
=
vld1q_u8
(
p
);
mask
=
0xffff
;
start:
t
=
vceqq_u8
(
data
,
repl_nl
);
u
=
vceqq_u8
(
data
,
repl_cr
);
v
=
vorrq_u8
(
t
,
vceqq_u8
(
data
,
repl_bs
));
w
=
vorrq_u8
(
u
,
vceqq_u8
(
data
,
repl_qm
));
t
=
vandq_u8
(
vorrq_u8
(
v
,
w
),
xmask
);
l
=
vpadd_u8
(
vget_low_u8
(
t
),
vget_high_u8
(
t
));
m
=
vpaddl_u8
(
l
);
n
=
vpaddl_u16
(
m
);
found
=
vget_lane_u32
((
uint32x2_t
)
vorr_u64
((
uint64x1_t
)
n
,
vshr_n_u64
((
uint64x1_t
)
n
,
24
)),
0
);
found
&=
mask
;
}
while
(
!
found
);
/* FOUND contains 1 in bits for which we matched a relevant
character. Conversion to the byte index is trivial. */
found
=
__builtin_ctz
(
found
);
return
(
const
uchar
*
)
p
+
found
;
}
#else
/* We only have one accellerated alternative. Use a direct call so that
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment