Commit 0ccaaab0 by Bill Schmidt Committed by William Schmidt

lex.c (search_line_fast): Add new version to be used for Power8 and later…

lex.c (search_line_fast): Add new version to be used for Power8 and later targets when Altivec is enabled.

2014-10-03  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>

	* lex.c (search_line_fast): Add new version to be used for Power8
	and later targets when Altivec is enabled.  Restrict the existing
	Altivec version to big-endian systems so that lvsr is not used on
	little endian, where it is deprecated.  Remove LE-specific code
	from the now-BE-only version.

From-SVN: r215873
parent 3c9aabbd
2014-10-03 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
* lex.c (search_line_fast): Add new version to be used for Power8
and later targets when Altivec is enabled. Restrict the existing
Altivec version to big-endian systems so that lvsr is not used on
little endian, where it is deprecated. Remove LE-specific code
from the now-BE-only version.
2014-10-02 Bernd Edlinger <bernd.edlinger@hotmail.de> 2014-10-02 Bernd Edlinger <bernd.edlinger@hotmail.de>
Jeff Law <law@redhat.com> Jeff Law <law@redhat.com>
......
...@@ -513,9 +513,111 @@ init_vectorized_lexer (void) ...@@ -513,9 +513,111 @@ init_vectorized_lexer (void)
search_line_fast = impl; search_line_fast = impl;
} }
#elif (GCC_VERSION >= 4005) && defined(__ALTIVEC__) #elif defined(_ARCH_PWR8) && defined(__ALTIVEC__)
/* A vection of the fast scanner using AltiVec vectorized byte compares. */ /* A vection of the fast scanner using AltiVec vectorized byte compares
and VSX unaligned loads (when VSX is available). This is otherwise
the same as the pre-GCC 5 version. */
static const uchar *
search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
{
typedef __attribute__((altivec(vector))) unsigned char vc;
const vc repl_nl = {
'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'
};
const vc repl_cr = {
'\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
'\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'
};
const vc repl_bs = {
'\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
'\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'
};
const vc repl_qm = {
'?', '?', '?', '?', '?', '?', '?', '?',
'?', '?', '?', '?', '?', '?', '?', '?',
};
const vc zero = { 0 };
vc data, t;
/* Main loop processing 16 bytes at a time. */
do
{
vc m_nl, m_cr, m_bs, m_qm;
data = *((const vc *)s);
s += 16;
m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl);
m_cr = (vc) __builtin_vec_cmpeq(data, repl_cr);
m_bs = (vc) __builtin_vec_cmpeq(data, repl_bs);
m_qm = (vc) __builtin_vec_cmpeq(data, repl_qm);
t = (m_nl | m_cr) | (m_bs | m_qm);
/* T now contains 0xff in bytes for which we matched one of the relevant
characters. We want to exit the loop if any byte in T is non-zero.
Below is the expansion of vec_any_ne(t, zero). */
}
while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t, zero));
/* Restore s to to point to the 16 bytes we just processed. */
s -= 16;
{
#define N (sizeof(vc) / sizeof(long))
union {
vc v;
/* Statically assert that N is 2 or 4. */
unsigned long l[(N == 2 || N == 4) ? N : -1];
} u;
unsigned long l, i = 0;
u.v = t;
/* Find the first word of T that is non-zero. */
switch (N)
{
case 4:
l = u.l[i++];
if (l != 0)
break;
s += sizeof(unsigned long);
l = u.l[i++];
if (l != 0)
break;
s += sizeof(unsigned long);
case 2:
l = u.l[i++];
if (l != 0)
break;
s += sizeof(unsigned long);
l = u.l[i];
}
/* L now contains 0xff in bytes for which we matched one of the
relevant characters. We can find the byte index by finding
its bit index and dividing by 8. */
#ifdef __BIG_ENDIAN__
l = __builtin_clzl(l) >> 3;
#else
l = __builtin_ctzl(l) >> 3;
#endif
return s + l;
#undef N
}
}
#elif (GCC_VERSION >= 4005) && defined(__ALTIVEC__) && defined (__BIG_ENDIAN__)
/* A vection of the fast scanner using AltiVec vectorized byte compares.
This cannot be used for little endian because vec_lvsl/lvsr are
deprecated for little endian and the code won't work properly. */
/* ??? Unfortunately, attribute(target("altivec")) is not yet supported, /* ??? Unfortunately, attribute(target("altivec")) is not yet supported,
so we can't compile this function without -maltivec on the command line so we can't compile this function without -maltivec on the command line
(or implied by some other switch). */ (or implied by some other switch). */
...@@ -557,13 +659,8 @@ search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED) ...@@ -557,13 +659,8 @@ search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
beginning with all ones and shifting in zeros according to the beginning with all ones and shifting in zeros according to the
mis-alignment. The LVSR instruction pulls the exact shift we mis-alignment. The LVSR instruction pulls the exact shift we
want from the address. */ want from the address. */
#ifdef __BIG_ENDIAN__
mask = __builtin_vec_lvsr(0, s); mask = __builtin_vec_lvsr(0, s);
mask = __builtin_vec_perm(zero, ones, mask); mask = __builtin_vec_perm(zero, ones, mask);
#else
mask = __builtin_vec_lvsl(0, s);
mask = __builtin_vec_perm(ones, zero, mask);
#endif
data &= mask; data &= mask;
/* While altivec loads mask addresses, we still need to align S so /* While altivec loads mask addresses, we still need to align S so
...@@ -627,11 +724,7 @@ search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED) ...@@ -627,11 +724,7 @@ search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
/* L now contains 0xff in bytes for which we matched one of the /* L now contains 0xff in bytes for which we matched one of the
relevant characters. We can find the byte index by finding relevant characters. We can find the byte index by finding
its bit index and dividing by 8. */ its bit index and dividing by 8. */
#ifdef __BIG_ENDIAN__
l = __builtin_clzl(l) >> 3; l = __builtin_clzl(l) >> 3;
#else
l = __builtin_ctzl(l) >> 3;
#endif
return s + l; return s + l;
#undef N #undef N
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment