Commit 9ceafb57 by Edward Thomson

regexec: use pcre as our fallback/builtin regex

Use PCRE 8.42 as the builtin regex implementation, using its POSIX
compatibility layer.  PCRE uses ASCII by default and the users locale
will not influence its behavior, so its `regcomp` implementation is
similar to `regcomp_l` with a C locale.
parent d3a440ca
INCLUDE(CheckIncludeFile)
INCLUDE(CheckFunctionExists)
INCLUDE(CheckTypeSize)
CHECK_INCLUDE_FILE(dirent.h HAVE_DIRENT_H)
CHECK_INCLUDE_FILE(stdint.h HAVE_STDINT_H)
CHECK_INCLUDE_FILE(inttypes.h HAVE_INTTYPES_H)
CHECK_INCLUDE_FILE(sys/stat.h HAVE_SYS_STAT_H)
CHECK_INCLUDE_FILE(sys/types.h HAVE_SYS_TYPES_H)
CHECK_INCLUDE_FILE(unistd.h HAVE_UNISTD_H)
CHECK_INCLUDE_FILE(windows.h HAVE_WINDOWS_H)
CHECK_FUNCTION_EXISTS(bcopy HAVE_BCOPY)
CHECK_FUNCTION_EXISTS(memmove HAVE_MEMMOVE)
CHECK_FUNCTION_EXISTS(strerror HAVE_STRERROR)
CHECK_FUNCTION_EXISTS(strtoll HAVE_STRTOLL)
CHECK_FUNCTION_EXISTS(strtoq HAVE_STRTOQ)
CHECK_FUNCTION_EXISTS(_strtoi64 HAVE__STRTOI64)
CHECK_TYPE_SIZE("long long" LONG_LONG)
CHECK_TYPE_SIZE("unsigned long long" UNSIGNED_LONG_LONG)
# User-configurable options
SET(SUPPORT_PCRE8 1)
SET(PCRE_LINK_SIZE "2")
SET(PCRE_PARENS_NEST_LIMIT "250")
SET(PCRE_MATCH_LIMIT "10000000")
SET(PCRE_MATCH_LIMIT_RECURSION "MATCH_LIMIT")
SET(PCRE_NEWLINE "LF")
SET(NO_RECURSE 1)
SET(PCRE_POSIX_MALLOC_THRESHOLD "10")
SET(BSR_ANYCRLF 0)
IF (MINGW)
OPTION(NON_STANDARD_LIB_PREFIX
"ON=Shared libraries built in mingw will be named pcre.dll, etc., instead of libpcre.dll, etc."
OFF)
OPTION(NON_STANDARD_LIB_SUFFIX
"ON=Shared libraries built in mingw will be named libpcre-0.dll, etc., instead of libpcre.dll, etc."
OFF)
ENDIF(MINGW)
# Prepare build configuration
SET(pcre_have_long_long 0)
SET(pcre_have_ulong_long 0)
IF(HAVE_LONG_LONG)
SET(pcre_have_long_long 1)
ENDIF(HAVE_LONG_LONG)
IF(HAVE_UNSIGNED_LONG_LONG)
SET(pcre_have_ulong_long 1)
ENDIF(HAVE_UNSIGNED_LONG_LONG)
SET(NEWLINE "")
IF(PCRE_NEWLINE STREQUAL "LF")
SET(NEWLINE "10")
ENDIF(PCRE_NEWLINE STREQUAL "LF")
IF(PCRE_NEWLINE STREQUAL "CR")
SET(NEWLINE "13")
ENDIF(PCRE_NEWLINE STREQUAL "CR")
IF(PCRE_NEWLINE STREQUAL "CRLF")
SET(NEWLINE "3338")
ENDIF(PCRE_NEWLINE STREQUAL "CRLF")
IF(PCRE_NEWLINE STREQUAL "ANY")
SET(NEWLINE "-1")
ENDIF(PCRE_NEWLINE STREQUAL "ANY")
IF(PCRE_NEWLINE STREQUAL "ANYCRLF")
SET(NEWLINE "-2")
ENDIF(PCRE_NEWLINE STREQUAL "ANYCRLF")
IF(NEWLINE STREQUAL "")
MESSAGE(FATAL_ERROR "The PCRE_NEWLINE variable must be set to one of the following values: \"LF\", \"CR\", \"CRLF\", \"ANY\", \"ANYCRLF\".")
ENDIF(NEWLINE STREQUAL "")
# Output files
CONFIGURE_FILE(config.h.in
${PROJECT_BINARY_DIR}/src/pcre/config.h
@ONLY)
# Source code
SET(PCRE_HEADERS ${PROJECT_BINARY_DIR}/src/pcre/config.h)
SET(PCRE_SOURCES
pcre_byte_order.c
pcre_chartables.c
pcre_compile.c
pcre_config.c
pcre_dfa_exec.c
pcre_exec.c
pcre_fullinfo.c
pcre_get.c
pcre_globals.c
pcre_jit_compile.c
pcre_maketables.c
pcre_newline.c
pcre_ord2utf8.c
pcre_refcount.c
pcre_string_utils.c
pcre_study.c
pcre_tables.c
pcre_ucd.c
pcre_valid_utf8.c
pcre_version.c
pcre_xclass.c
)
SET(PCREPOSIX_HEADERS pcreposix.h)
SET(PCREPOSIX_SOURCES pcreposix.c)
# Fix static compilation with MSVC: https://bugs.exim.org/show_bug.cgi?id=1681
# This code was taken from the CMake wiki, not from WebM.
# Build setup
ADD_DEFINITIONS(-DHAVE_CONFIG_H)
IF(MSVC)
ADD_DEFINITIONS(-D_CRT_SECURE_NO_DEPRECATE -D_CRT_SECURE_NO_WARNINGS)
ENDIF(MSVC)
SET(CMAKE_INCLUDE_CURRENT_DIR 1)
SET(targets)
# Libraries
# pcre
INCLUDE_DIRECTORIES(${PROJECT_BINARY_DIR}/src/pcre)
ADD_LIBRARY(pcre OBJECT ${PCRE_HEADERS} ${PCRE_SOURCES} ${PCREPOSIX_SOURCES})
# end CMakeLists.txt
PCRE LICENCE
Please see the file LICENCE in the PCRE distribution for licensing details.
End
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The name of the author may not be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# Modified from FindReadline.cmake (PH Feb 2012)
if(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY AND NCURSES_LIBRARY)
set(EDITLINE_FOUND TRUE)
else(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY AND NCURSES_LIBRARY)
FIND_PATH(EDITLINE_INCLUDE_DIR readline.h
/usr/include/editline
/usr/include/edit/readline
/usr/include/readline
)
FIND_LIBRARY(EDITLINE_LIBRARY NAMES edit)
include(FindPackageHandleStandardArgs)
FIND_PACKAGE_HANDLE_STANDARD_ARGS(Editline DEFAULT_MSG EDITLINE_INCLUDE_DIR EDITLINE_LIBRARY )
MARK_AS_ADVANCED(EDITLINE_INCLUDE_DIR EDITLINE_LIBRARY)
endif(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY AND NCURSES_LIBRARY)
# FIND_PACKAGE_HANDLE_STANDARD_ARGS(NAME (DEFAULT_MSG|"Custom failure message") VAR1 ... )
# This macro is intended to be used in FindXXX.cmake modules files.
# It handles the REQUIRED and QUIET argument to FIND_PACKAGE() and
# it also sets the <UPPERCASED_NAME>_FOUND variable.
# The package is found if all variables listed are TRUE.
# Example:
#
# FIND_PACKAGE_HANDLE_STANDARD_ARGS(LibXml2 DEFAULT_MSG LIBXML2_LIBRARIES LIBXML2_INCLUDE_DIR)
#
# LibXml2 is considered to be found, if both LIBXML2_LIBRARIES and
# LIBXML2_INCLUDE_DIR are valid. Then also LIBXML2_FOUND is set to TRUE.
# If it is not found and REQUIRED was used, it fails with FATAL_ERROR,
# independent whether QUIET was used or not.
# If it is found, the location is reported using the VAR1 argument, so
# here a message "Found LibXml2: /usr/lib/libxml2.so" will be printed out.
# If the second argument is DEFAULT_MSG, the message in the failure case will
# be "Could NOT find LibXml2", if you don't like this message you can specify
# your own custom failure message there.
MACRO(FIND_PACKAGE_HANDLE_STANDARD_ARGS _NAME _FAIL_MSG _VAR1 )
IF("${_FAIL_MSG}" STREQUAL "DEFAULT_MSG")
IF (${_NAME}_FIND_REQUIRED)
SET(_FAIL_MESSAGE "Could not find REQUIRED package ${_NAME}")
ELSE (${_NAME}_FIND_REQUIRED)
SET(_FAIL_MESSAGE "Could not find OPTIONAL package ${_NAME}")
ENDIF (${_NAME}_FIND_REQUIRED)
ELSE("${_FAIL_MSG}" STREQUAL "DEFAULT_MSG")
SET(_FAIL_MESSAGE "${_FAIL_MSG}")
ENDIF("${_FAIL_MSG}" STREQUAL "DEFAULT_MSG")
STRING(TOUPPER ${_NAME} _NAME_UPPER)
SET(${_NAME_UPPER}_FOUND TRUE)
IF(NOT ${_VAR1})
SET(${_NAME_UPPER}_FOUND FALSE)
ENDIF(NOT ${_VAR1})
FOREACH(_CURRENT_VAR ${ARGN})
IF(NOT ${_CURRENT_VAR})
SET(${_NAME_UPPER}_FOUND FALSE)
ENDIF(NOT ${_CURRENT_VAR})
ENDFOREACH(_CURRENT_VAR)
IF (${_NAME_UPPER}_FOUND)
IF (NOT ${_NAME}_FIND_QUIETLY)
MESSAGE(STATUS "Found ${_NAME}: ${${_VAR1}}")
ENDIF (NOT ${_NAME}_FIND_QUIETLY)
ELSE (${_NAME_UPPER}_FOUND)
IF (${_NAME}_FIND_REQUIRED)
MESSAGE(FATAL_ERROR "${_FAIL_MESSAGE}")
ELSE (${_NAME}_FIND_REQUIRED)
IF (NOT ${_NAME}_FIND_QUIETLY)
MESSAGE(STATUS "${_FAIL_MESSAGE}")
ENDIF (NOT ${_NAME}_FIND_QUIETLY)
ENDIF (${_NAME}_FIND_REQUIRED)
ENDIF (${_NAME_UPPER}_FOUND)
ENDMACRO(FIND_PACKAGE_HANDLE_STANDARD_ARGS)
# from http://websvn.kde.org/trunk/KDE/kdeedu/cmake/modules/FindReadline.cmake
# http://websvn.kde.org/trunk/KDE/kdeedu/cmake/modules/COPYING-CMAKE-SCRIPTS
# --> BSD licensed
#
# GNU Readline library finder
if(READLINE_INCLUDE_DIR AND READLINE_LIBRARY AND NCURSES_LIBRARY)
set(READLINE_FOUND TRUE)
else(READLINE_INCLUDE_DIR AND READLINE_LIBRARY AND NCURSES_LIBRARY)
FIND_PATH(READLINE_INCLUDE_DIR readline/readline.h
/usr/include/readline
)
# 2008-04-22 The next clause used to read like this:
#
# FIND_LIBRARY(READLINE_LIBRARY NAMES readline)
# FIND_LIBRARY(NCURSES_LIBRARY NAMES ncurses )
# include(FindPackageHandleStandardArgs)
# FIND_PACKAGE_HANDLE_STANDARD_ARGS(Readline DEFAULT_MSG NCURSES_LIBRARY READLINE_INCLUDE_DIR READLINE_LIBRARY )
#
# I was advised to modify it such that it will find an ncurses library if
# required, but not if one was explicitly given, that is, it allows the
# default to be overridden. PH
FIND_LIBRARY(READLINE_LIBRARY NAMES readline)
include(FindPackageHandleStandardArgs)
FIND_PACKAGE_HANDLE_STANDARD_ARGS(Readline DEFAULT_MSG READLINE_INCLUDE_DIR READLINE_LIBRARY )
MARK_AS_ADVANCED(READLINE_INCLUDE_DIR READLINE_LIBRARY)
endif(READLINE_INCLUDE_DIR AND READLINE_LIBRARY AND NCURSES_LIBRARY)
/* config.h for CMake builds */
#cmakedefine HAVE_DIRENT_H 1
#cmakedefine HAVE_SYS_STAT_H 1
#cmakedefine HAVE_SYS_TYPES_H 1
#cmakedefine HAVE_UNISTD_H 1
#cmakedefine HAVE_WINDOWS_H 1
#cmakedefine HAVE_STDINT_H 1
#cmakedefine HAVE_INTTYPES_H 1
#cmakedefine HAVE_TYPE_TRAITS_H 1
#cmakedefine HAVE_BITS_TYPE_TRAITS_H 1
#cmakedefine HAVE_BCOPY 1
#cmakedefine HAVE_MEMMOVE 1
#cmakedefine HAVE_STRERROR 1
#cmakedefine HAVE_STRTOLL 1
#cmakedefine HAVE_STRTOQ 1
#cmakedefine HAVE__STRTOI64 1
#cmakedefine PCRE_STATIC 1
#cmakedefine SUPPORT_PCRE8 1
#cmakedefine SUPPORT_PCRE16 1
#cmakedefine SUPPORT_PCRE32 1
#cmakedefine SUPPORT_JIT 1
#cmakedefine SUPPORT_PCREGREP_JIT 1
#cmakedefine SUPPORT_UTF 1
#cmakedefine SUPPORT_UCP 1
#cmakedefine EBCDIC 1
#cmakedefine EBCDIC_NL25 1
#cmakedefine BSR_ANYCRLF 1
#cmakedefine NO_RECURSE 1
#cmakedefine HAVE_LONG_LONG 1
#cmakedefine HAVE_UNSIGNED_LONG_LONG 1
#cmakedefine SUPPORT_LIBBZ2 1
#cmakedefine SUPPORT_LIBZ 1
#cmakedefine SUPPORT_LIBEDIT 1
#cmakedefine SUPPORT_LIBREADLINE 1
#cmakedefine SUPPORT_VALGRIND 1
#cmakedefine SUPPORT_GCOV 1
#define NEWLINE @NEWLINE@
#define POSIX_MALLOC_THRESHOLD @PCRE_POSIX_MALLOC_THRESHOLD@
#define LINK_SIZE @PCRE_LINK_SIZE@
#define PARENS_NEST_LIMIT @PCRE_PARENS_NEST_LIMIT@
#define MATCH_LIMIT @PCRE_MATCH_LIMIT@
#define MATCH_LIMIT_RECURSION @PCRE_MATCH_LIMIT_RECURSION@
#define PCREGREP_BUFSIZE @PCREGREP_BUFSIZE@
#define MAX_NAME_SIZE 32
#define MAX_NAME_COUNT 10000
/* end config.h for CMake builds */
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2014 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains an internal function that tests a compiled pattern to
see if it was compiled with the opposite endianness. If so, it uses an
auxiliary local function to flip the appropriate bytes. */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre_internal.h"
/*************************************************
* Swap byte functions *
*************************************************/
/* The following functions swap the bytes of a pcre_uint16
and pcre_uint32 value.
Arguments:
value any number
Returns: the byte swapped value
*/
static pcre_uint32
swap_uint32(pcre_uint32 value)
{
return ((value & 0x000000ff) << 24) |
((value & 0x0000ff00) << 8) |
((value & 0x00ff0000) >> 8) |
(value >> 24);
}
static pcre_uint16
swap_uint16(pcre_uint16 value)
{
return (value >> 8) | (value << 8);
}
/*************************************************
* Test for a byte-flipped compiled regex *
*************************************************/
/* This function swaps the bytes of a compiled pattern usually
loaded form the disk. It also sets the tables pointer, which
is likely an invalid pointer after reload.
Arguments:
argument_re points to the compiled expression
extra_data points to extra data or is NULL
tables points to the character tables or NULL
Returns: 0 if the swap is successful, negative on error
*/
#if defined COMPILE_PCRE8
PCRE_EXP_DECL int pcre_pattern_to_host_byte_order(pcre *argument_re,
pcre_extra *extra_data, const unsigned char *tables)
#elif defined COMPILE_PCRE16
PCRE_EXP_DECL int pcre16_pattern_to_host_byte_order(pcre16 *argument_re,
pcre16_extra *extra_data, const unsigned char *tables)
#elif defined COMPILE_PCRE32
PCRE_EXP_DECL int pcre32_pattern_to_host_byte_order(pcre32 *argument_re,
pcre32_extra *extra_data, const unsigned char *tables)
#endif
{
REAL_PCRE *re = (REAL_PCRE *)argument_re;
pcre_study_data *study;
#ifndef COMPILE_PCRE8
pcre_uchar *ptr;
int length;
#if defined SUPPORT_UTF && defined COMPILE_PCRE16
BOOL utf;
BOOL utf16_char;
#endif /* SUPPORT_UTF && COMPILE_PCRE16 */
#endif /* !COMPILE_PCRE8 */
if (re == NULL) return PCRE_ERROR_NULL;
if (re->magic_number == MAGIC_NUMBER)
{
if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
re->tables = tables;
return 0;
}
if (re->magic_number != REVERSED_MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
if ((swap_uint32(re->flags) & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
re->magic_number = MAGIC_NUMBER;
re->size = swap_uint32(re->size);
re->options = swap_uint32(re->options);
re->flags = swap_uint32(re->flags);
re->limit_match = swap_uint32(re->limit_match);
re->limit_recursion = swap_uint32(re->limit_recursion);
#if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
re->first_char = swap_uint16(re->first_char);
re->req_char = swap_uint16(re->req_char);
#elif defined COMPILE_PCRE32
re->first_char = swap_uint32(re->first_char);
re->req_char = swap_uint32(re->req_char);
#endif
re->max_lookbehind = swap_uint16(re->max_lookbehind);
re->top_bracket = swap_uint16(re->top_bracket);
re->top_backref = swap_uint16(re->top_backref);
re->name_table_offset = swap_uint16(re->name_table_offset);
re->name_entry_size = swap_uint16(re->name_entry_size);
re->name_count = swap_uint16(re->name_count);
re->ref_count = swap_uint16(re->ref_count);
re->tables = tables;
if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_STUDY_DATA) != 0)
{
study = (pcre_study_data *)extra_data->study_data;
study->size = swap_uint32(study->size);
study->flags = swap_uint32(study->flags);
study->minlength = swap_uint32(study->minlength);
}
#ifndef COMPILE_PCRE8
ptr = (pcre_uchar *)re + re->name_table_offset;
length = re->name_count * re->name_entry_size;
#if defined SUPPORT_UTF && defined COMPILE_PCRE16
utf = (re->options & PCRE_UTF16) != 0;
utf16_char = FALSE;
#endif /* SUPPORT_UTF && COMPILE_PCRE16 */
while(TRUE)
{
/* Swap previous characters. */
while (length-- > 0)
{
#if defined COMPILE_PCRE16
*ptr = swap_uint16(*ptr);
#elif defined COMPILE_PCRE32
*ptr = swap_uint32(*ptr);
#endif
ptr++;
}
#if defined SUPPORT_UTF && defined COMPILE_PCRE16
if (utf16_char)
{
if (HAS_EXTRALEN(ptr[-1]))
{
/* We know that there is only one extra character in UTF-16. */
*ptr = swap_uint16(*ptr);
ptr++;
}
}
utf16_char = FALSE;
#endif /* SUPPORT_UTF */
/* Get next opcode. */
length = 0;
#if defined COMPILE_PCRE16
*ptr = swap_uint16(*ptr);
#elif defined COMPILE_PCRE32
*ptr = swap_uint32(*ptr);
#endif
switch (*ptr)
{
case OP_END:
return 0;
#if defined SUPPORT_UTF && defined COMPILE_PCRE16
case OP_CHAR:
case OP_CHARI:
case OP_NOT:
case OP_NOTI:
case OP_STAR:
case OP_MINSTAR:
case OP_PLUS:
case OP_MINPLUS:
case OP_QUERY:
case OP_MINQUERY:
case OP_UPTO:
case OP_MINUPTO:
case OP_EXACT:
case OP_POSSTAR:
case OP_POSPLUS:
case OP_POSQUERY:
case OP_POSUPTO:
case OP_STARI:
case OP_MINSTARI:
case OP_PLUSI:
case OP_MINPLUSI:
case OP_QUERYI:
case OP_MINQUERYI:
case OP_UPTOI:
case OP_MINUPTOI:
case OP_EXACTI:
case OP_POSSTARI:
case OP_POSPLUSI:
case OP_POSQUERYI:
case OP_POSUPTOI:
case OP_NOTSTAR:
case OP_NOTMINSTAR:
case OP_NOTPLUS:
case OP_NOTMINPLUS:
case OP_NOTQUERY:
case OP_NOTMINQUERY:
case OP_NOTUPTO:
case OP_NOTMINUPTO:
case OP_NOTEXACT:
case OP_NOTPOSSTAR:
case OP_NOTPOSPLUS:
case OP_NOTPOSQUERY:
case OP_NOTPOSUPTO:
case OP_NOTSTARI:
case OP_NOTMINSTARI:
case OP_NOTPLUSI:
case OP_NOTMINPLUSI:
case OP_NOTQUERYI:
case OP_NOTMINQUERYI:
case OP_NOTUPTOI:
case OP_NOTMINUPTOI:
case OP_NOTEXACTI:
case OP_NOTPOSSTARI:
case OP_NOTPOSPLUSI:
case OP_NOTPOSQUERYI:
case OP_NOTPOSUPTOI:
if (utf) utf16_char = TRUE;
#endif
/* Fall through. */
default:
length = PRIV(OP_lengths)[*ptr] - 1;
break;
case OP_CLASS:
case OP_NCLASS:
/* Skip the character bit map. */
ptr += 32/sizeof(pcre_uchar);
length = 0;
break;
case OP_XCLASS:
/* Reverse the size of the XCLASS instance. */
ptr++;
#if defined COMPILE_PCRE16
*ptr = swap_uint16(*ptr);
#elif defined COMPILE_PCRE32
*ptr = swap_uint32(*ptr);
#endif
#ifndef COMPILE_PCRE32
if (LINK_SIZE > 1)
{
/* LINK_SIZE can be 1 or 2 in 16 bit mode. */
ptr++;
*ptr = swap_uint16(*ptr);
}
#endif
ptr++;
length = (GET(ptr, -LINK_SIZE)) - (1 + LINK_SIZE + 1);
#if defined COMPILE_PCRE16
*ptr = swap_uint16(*ptr);
#elif defined COMPILE_PCRE32
*ptr = swap_uint32(*ptr);
#endif
if ((*ptr & XCL_MAP) != 0)
{
/* Skip the character bit map. */
ptr += 32/sizeof(pcre_uchar);
length -= 32/sizeof(pcre_uchar);
}
break;
}
ptr++;
}
/* Control should never reach here in 16/32 bit mode. */
#else /* In 8-bit mode, the pattern does not need to be processed. */
return 0;
#endif /* !COMPILE_PCRE8 */
}
/* End of pcre_byte_order.c */
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* This file contains character tables that are used when no external tables
are passed to PCRE by the application that calls it. The tables are used only
for characters whose code values are less than 256.
This is a default version of the tables that assumes ASCII encoding. A program
called dftables (which is distributed with PCRE) can be used to build
alternative versions of this file. This is necessary if you are running in an
EBCDIC environment, or if you want to default to a different encoding, for
example ISO-8859-1. When dftables is run, it creates these tables in the
current locale. If PCRE is configured with --enable-rebuild-chartables, this
happens automatically.
The following #includes are present because without them gcc 4.x may remove the
array definition from the final binary if PCRE is built into a static library
and dead code stripping is activated. This leads to link errors. Pulling in the
header ensures that the array gets flagged as "someone outside this compilation
unit might reference this" and so it will always be supplied to the linker. */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre_internal.h"
const pcre_uint8 PRIV(default_tables)[] = {
/* This table is a lower casing table. */
0, 1, 2, 3, 4, 5, 6, 7,
8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23,
24, 25, 26, 27, 28, 29, 30, 31,
32, 33, 34, 35, 36, 37, 38, 39,
40, 41, 42, 43, 44, 45, 46, 47,
48, 49, 50, 51, 52, 53, 54, 55,
56, 57, 58, 59, 60, 61, 62, 63,
64, 97, 98, 99,100,101,102,103,
104,105,106,107,108,109,110,111,
112,113,114,115,116,117,118,119,
120,121,122, 91, 92, 93, 94, 95,
96, 97, 98, 99,100,101,102,103,
104,105,106,107,108,109,110,111,
112,113,114,115,116,117,118,119,
120,121,122,123,124,125,126,127,
128,129,130,131,132,133,134,135,
136,137,138,139,140,141,142,143,
144,145,146,147,148,149,150,151,
152,153,154,155,156,157,158,159,
160,161,162,163,164,165,166,167,
168,169,170,171,172,173,174,175,
176,177,178,179,180,181,182,183,
184,185,186,187,188,189,190,191,
192,193,194,195,196,197,198,199,
200,201,202,203,204,205,206,207,
208,209,210,211,212,213,214,215,
216,217,218,219,220,221,222,223,
224,225,226,227,228,229,230,231,
232,233,234,235,236,237,238,239,
240,241,242,243,244,245,246,247,
248,249,250,251,252,253,254,255,
/* This table is a case flipping table. */
0, 1, 2, 3, 4, 5, 6, 7,
8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23,
24, 25, 26, 27, 28, 29, 30, 31,
32, 33, 34, 35, 36, 37, 38, 39,
40, 41, 42, 43, 44, 45, 46, 47,
48, 49, 50, 51, 52, 53, 54, 55,
56, 57, 58, 59, 60, 61, 62, 63,
64, 97, 98, 99,100,101,102,103,
104,105,106,107,108,109,110,111,
112,113,114,115,116,117,118,119,
120,121,122, 91, 92, 93, 94, 95,
96, 65, 66, 67, 68, 69, 70, 71,
72, 73, 74, 75, 76, 77, 78, 79,
80, 81, 82, 83, 84, 85, 86, 87,
88, 89, 90,123,124,125,126,127,
128,129,130,131,132,133,134,135,
136,137,138,139,140,141,142,143,
144,145,146,147,148,149,150,151,
152,153,154,155,156,157,158,159,
160,161,162,163,164,165,166,167,
168,169,170,171,172,173,174,175,
176,177,178,179,180,181,182,183,
184,185,186,187,188,189,190,191,
192,193,194,195,196,197,198,199,
200,201,202,203,204,205,206,207,
208,209,210,211,212,213,214,215,
216,217,218,219,220,221,222,223,
224,225,226,227,228,229,230,231,
232,233,234,235,236,237,238,239,
240,241,242,243,244,245,246,247,
248,249,250,251,252,253,254,255,
/* This table contains bit maps for various character classes. Each map is 32
bytes long and the bits run from the least significant end of each byte. The
classes that have their own maps are: space, xdigit, digit, upper, lower, word,
graph, print, punct, and cntrl. Other classes are built from combinations. */
0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
/* This table identifies various classes of character by individual bits:
0x01 white space character
0x02 letter
0x04 decimal digit
0x08 hexadecimal digit
0x10 alphanumeric or '_'
0x80 regular expression metacharacter or binary zero
*/
0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
0x00,0x01,0x01,0x01,0x01,0x01,0x00,0x00, /* 8- 15 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
/* End of pcre_chartables.c */
This source diff could not be displayed because it is too large. You can view the blob instead.
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2012 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains the external function pcre_config(). */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
/* Keep the original link size. */
static int real_link_size = LINK_SIZE;
#include "pcre_internal.h"
/*************************************************
* Return info about what features are configured *
*************************************************/
/* This function has an extensible interface so that additional items can be
added compatibly.
Arguments:
what what information is required
where where to put the information
Returns: 0 if data returned, negative on error
*/
#if defined COMPILE_PCRE8
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_config(int what, void *where)
#elif defined COMPILE_PCRE16
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre16_config(int what, void *where)
#elif defined COMPILE_PCRE32
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre32_config(int what, void *where)
#endif
{
switch (what)
{
case PCRE_CONFIG_UTF8:
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
*((int *)where) = 0;
return PCRE_ERROR_BADOPTION;
#else
#if defined SUPPORT_UTF
*((int *)where) = 1;
#else
*((int *)where) = 0;
#endif
break;
#endif
case PCRE_CONFIG_UTF16:
#if defined COMPILE_PCRE8 || defined COMPILE_PCRE32
*((int *)where) = 0;
return PCRE_ERROR_BADOPTION;
#else
#if defined SUPPORT_UTF
*((int *)where) = 1;
#else
*((int *)where) = 0;
#endif
break;
#endif
case PCRE_CONFIG_UTF32:
#if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
*((int *)where) = 0;
return PCRE_ERROR_BADOPTION;
#else
#if defined SUPPORT_UTF
*((int *)where) = 1;
#else
*((int *)where) = 0;
#endif
break;
#endif
case PCRE_CONFIG_UNICODE_PROPERTIES:
#ifdef SUPPORT_UCP
*((int *)where) = 1;
#else
*((int *)where) = 0;
#endif
break;
case PCRE_CONFIG_JIT:
#ifdef SUPPORT_JIT
*((int *)where) = 1;
#else
*((int *)where) = 0;
#endif
break;
case PCRE_CONFIG_JITTARGET:
#ifdef SUPPORT_JIT
*((const char **)where) = PRIV(jit_get_target)();
#else
*((const char **)where) = NULL;
#endif
break;
case PCRE_CONFIG_NEWLINE:
*((int *)where) = NEWLINE;
break;
case PCRE_CONFIG_BSR:
#ifdef BSR_ANYCRLF
*((int *)where) = 1;
#else
*((int *)where) = 0;
#endif
break;
case PCRE_CONFIG_LINK_SIZE:
*((int *)where) = real_link_size;
break;
case PCRE_CONFIG_POSIX_MALLOC_THRESHOLD:
*((int *)where) = POSIX_MALLOC_THRESHOLD;
break;
case PCRE_CONFIG_PARENS_LIMIT:
*((unsigned long int *)where) = PARENS_NEST_LIMIT;
break;
case PCRE_CONFIG_MATCH_LIMIT:
*((unsigned long int *)where) = MATCH_LIMIT;
break;
case PCRE_CONFIG_MATCH_LIMIT_RECURSION:
*((unsigned long int *)where) = MATCH_LIMIT_RECURSION;
break;
case PCRE_CONFIG_STACKRECURSE:
#ifdef NO_RECURSE
*((int *)where) = 0;
#else
*((int *)where) = 1;
#endif
break;
default: return PCRE_ERROR_BADOPTION;
}
return 0;
}
/* End of pcre_config.c */
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2013 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains the external function pcre_fullinfo(), which returns
information about a compiled pattern. */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre_internal.h"
/*************************************************
* Return info about compiled pattern *
*************************************************/
/* This is a newer "info" function which has an extensible interface so
that additional items can be added compatibly.
Arguments:
argument_re points to compiled code
extra_data points extra data, or NULL
what what information is required
where where to put the information
Returns: 0 if data returned, negative on error
*/
#if defined COMPILE_PCRE8
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_fullinfo(const pcre *argument_re, const pcre_extra *extra_data,
int what, void *where)
#elif defined COMPILE_PCRE16
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre16_fullinfo(const pcre16 *argument_re, const pcre16_extra *extra_data,
int what, void *where)
#elif defined COMPILE_PCRE32
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre32_fullinfo(const pcre32 *argument_re, const pcre32_extra *extra_data,
int what, void *where)
#endif
{
const REAL_PCRE *re = (const REAL_PCRE *)argument_re;
const pcre_study_data *study = NULL;
if (re == NULL || where == NULL) return PCRE_ERROR_NULL;
if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_STUDY_DATA) != 0)
study = (const pcre_study_data *)extra_data->study_data;
/* Check that the first field in the block is the magic number. If it is not,
return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
means that the pattern is likely compiled with different endianness. */
if (re->magic_number != MAGIC_NUMBER)
return re->magic_number == REVERSED_MAGIC_NUMBER?
PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
/* Check that this pattern was compiled in the correct bit mode */
if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
switch (what)
{
case PCRE_INFO_OPTIONS:
*((unsigned long int *)where) = re->options & PUBLIC_COMPILE_OPTIONS;
break;
case PCRE_INFO_SIZE:
*((size_t *)where) = re->size;
break;
case PCRE_INFO_STUDYSIZE:
*((size_t *)where) = (study == NULL)? 0 : study->size;
break;
case PCRE_INFO_JITSIZE:
#ifdef SUPPORT_JIT
*((size_t *)where) =
(extra_data != NULL &&
(extra_data->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 &&
extra_data->executable_jit != NULL)?
PRIV(jit_get_size)(extra_data->executable_jit) : 0;
#else
*((size_t *)where) = 0;
#endif
break;
case PCRE_INFO_CAPTURECOUNT:
*((int *)where) = re->top_bracket;
break;
case PCRE_INFO_BACKREFMAX:
*((int *)where) = re->top_backref;
break;
case PCRE_INFO_FIRSTBYTE:
*((int *)where) =
((re->flags & PCRE_FIRSTSET) != 0)? (int)re->first_char :
((re->flags & PCRE_STARTLINE) != 0)? -1 : -2;
break;
case PCRE_INFO_FIRSTCHARACTER:
*((pcre_uint32 *)where) =
(re->flags & PCRE_FIRSTSET) != 0 ? re->first_char : 0;
break;
case PCRE_INFO_FIRSTCHARACTERFLAGS:
*((int *)where) =
((re->flags & PCRE_FIRSTSET) != 0) ? 1 :
((re->flags & PCRE_STARTLINE) != 0) ? 2 : 0;
break;
/* Make sure we pass back the pointer to the bit vector in the external
block, not the internal copy (with flipped integer fields). */
case PCRE_INFO_FIRSTTABLE:
*((const pcre_uint8 **)where) =
(study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0)?
((const pcre_study_data *)extra_data->study_data)->start_bits : NULL;
break;
case PCRE_INFO_MINLENGTH:
*((int *)where) =
(study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0)?
(int)(study->minlength) : -1;
break;
case PCRE_INFO_JIT:
*((int *)where) = extra_data != NULL &&
(extra_data->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 &&
extra_data->executable_jit != NULL;
break;
case PCRE_INFO_LASTLITERAL:
*((int *)where) =
((re->flags & PCRE_REQCHSET) != 0)? (int)re->req_char : -1;
break;
case PCRE_INFO_REQUIREDCHAR:
*((pcre_uint32 *)where) =
((re->flags & PCRE_REQCHSET) != 0) ? re->req_char : 0;
break;
case PCRE_INFO_REQUIREDCHARFLAGS:
*((int *)where) =
((re->flags & PCRE_REQCHSET) != 0);
break;
case PCRE_INFO_NAMEENTRYSIZE:
*((int *)where) = re->name_entry_size;
break;
case PCRE_INFO_NAMECOUNT:
*((int *)where) = re->name_count;
break;
case PCRE_INFO_NAMETABLE:
*((const pcre_uchar **)where) = (const pcre_uchar *)re + re->name_table_offset;
break;
case PCRE_INFO_DEFAULT_TABLES:
*((const pcre_uint8 **)where) = (const pcre_uint8 *)(PRIV(default_tables));
break;
/* From release 8.00 this will always return TRUE because NOPARTIAL is
no longer ever set (the restrictions have been removed). */
case PCRE_INFO_OKPARTIAL:
*((int *)where) = (re->flags & PCRE_NOPARTIAL) == 0;
break;
case PCRE_INFO_JCHANGED:
*((int *)where) = (re->flags & PCRE_JCHANGED) != 0;
break;
case PCRE_INFO_HASCRORLF:
*((int *)where) = (re->flags & PCRE_HASCRORLF) != 0;
break;
case PCRE_INFO_MAXLOOKBEHIND:
*((int *)where) = re->max_lookbehind;
break;
case PCRE_INFO_MATCHLIMIT:
if ((re->flags & PCRE_MLSET) == 0) return PCRE_ERROR_UNSET;
*((pcre_uint32 *)where) = re->limit_match;
break;
case PCRE_INFO_RECURSIONLIMIT:
if ((re->flags & PCRE_RLSET) == 0) return PCRE_ERROR_UNSET;
*((pcre_uint32 *)where) = re->limit_recursion;
break;
case PCRE_INFO_MATCH_EMPTY:
*((int *)where) = (re->flags & PCRE_MATCH_EMPTY) != 0;
break;
default: return PCRE_ERROR_BADOPTION;
}
return 0;
}
/* End of pcre_fullinfo.c */
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2014 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains global variables that are exported by the PCRE library.
PCRE is thread-clean and doesn't use any global variables in the normal sense.
However, it calls memory allocation and freeing functions via the four
indirections below, and it can optionally do callouts, using the fifth
indirection. These values can be changed by the caller, but are shared between
all threads.
For MS Visual Studio and Symbian OS, there are problems in initializing these
variables to non-local functions. In these cases, therefore, an indirection via
a local function is used.
Also, when compiling for Virtual Pascal, things are done differently, and
global variables are not used. */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre_internal.h"
#if defined _MSC_VER || defined __SYMBIAN32__
static void* LocalPcreMalloc(size_t aSize)
{
return malloc(aSize);
}
static void LocalPcreFree(void* aPtr)
{
free(aPtr);
}
PCRE_EXP_DATA_DEFN void *(*PUBL(malloc))(size_t) = LocalPcreMalloc;
PCRE_EXP_DATA_DEFN void (*PUBL(free))(void *) = LocalPcreFree;
PCRE_EXP_DATA_DEFN void *(*PUBL(stack_malloc))(size_t) = LocalPcreMalloc;
PCRE_EXP_DATA_DEFN void (*PUBL(stack_free))(void *) = LocalPcreFree;
PCRE_EXP_DATA_DEFN int (*PUBL(callout))(PUBL(callout_block) *) = NULL;
PCRE_EXP_DATA_DEFN int (*PUBL(stack_guard))(void) = NULL;
#elif !defined VPCOMPAT
PCRE_EXP_DATA_DEFN void *(*PUBL(malloc))(size_t) = malloc;
PCRE_EXP_DATA_DEFN void (*PUBL(free))(void *) = free;
PCRE_EXP_DATA_DEFN void *(*PUBL(stack_malloc))(size_t) = malloc;
PCRE_EXP_DATA_DEFN void (*PUBL(stack_free))(void *) = free;
PCRE_EXP_DATA_DEFN int (*PUBL(callout))(PUBL(callout_block) *) = NULL;
PCRE_EXP_DATA_DEFN int (*PUBL(stack_guard))(void) = NULL;
#endif
/* End of pcre_globals.c */
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2012 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains the external function pcre_maketables(), which builds
character tables for PCRE in the current locale. The file is compiled on its
own as part of the PCRE library. However, it is also included in the
compilation of dftables.c, in which case the macro DFTABLES is defined. */
#ifndef DFTABLES
# ifdef HAVE_CONFIG_H
# include "config.h"
# endif
# include "pcre_internal.h"
#endif
/*************************************************
* Create PCRE character tables *
*************************************************/
/* This function builds a set of character tables for use by PCRE and returns
a pointer to them. They are build using the ctype functions, and consequently
their contents will depend upon the current locale setting. When compiled as
part of the library, the store is obtained via PUBL(malloc)(), but when
compiled inside dftables, use malloc().
Arguments: none
Returns: pointer to the contiguous block of data
*/
#if defined COMPILE_PCRE8
const unsigned char *
pcre_maketables(void)
#elif defined COMPILE_PCRE16
const unsigned char *
pcre16_maketables(void)
#elif defined COMPILE_PCRE32
const unsigned char *
pcre32_maketables(void)
#endif
{
unsigned char *yield, *p;
int i;
#ifndef DFTABLES
yield = (unsigned char*)(PUBL(malloc))(tables_length);
#else
yield = (unsigned char*)malloc(tables_length);
#endif
if (yield == NULL) return NULL;
p = yield;
/* First comes the lower casing table */
for (i = 0; i < 256; i++) *p++ = tolower(i);
/* Next the case-flipping table */
for (i = 0; i < 256; i++) *p++ = islower(i)? toupper(i) : tolower(i);
/* Then the character class tables. Don't try to be clever and save effort on
exclusive ones - in some locales things may be different.
Note that the table for "space" includes everything "isspace" gives, including
VT in the default locale. This makes it work for the POSIX class [:space:].
From release 8.34 is is also correct for Perl space, because Perl added VT at
release 5.18.
Note also that it is possible for a character to be alnum or alpha without
being lower or upper, such as "male and female ordinals" (\xAA and \xBA) in the
fr_FR locale (at least under Debian Linux's locales as of 12/2005). So we must
test for alnum specially. */
memset(p, 0, cbit_length);
for (i = 0; i < 256; i++)
{
if (isdigit(i)) p[cbit_digit + i/8] |= 1 << (i&7);
if (isupper(i)) p[cbit_upper + i/8] |= 1 << (i&7);
if (islower(i)) p[cbit_lower + i/8] |= 1 << (i&7);
if (isalnum(i)) p[cbit_word + i/8] |= 1 << (i&7);
if (i == '_') p[cbit_word + i/8] |= 1 << (i&7);
if (isspace(i)) p[cbit_space + i/8] |= 1 << (i&7);
if (isxdigit(i))p[cbit_xdigit + i/8] |= 1 << (i&7);
if (isgraph(i)) p[cbit_graph + i/8] |= 1 << (i&7);
if (isprint(i)) p[cbit_print + i/8] |= 1 << (i&7);
if (ispunct(i)) p[cbit_punct + i/8] |= 1 << (i&7);
if (iscntrl(i)) p[cbit_cntrl + i/8] |= 1 << (i&7);
}
p += cbit_length;
/* Finally, the character type table. In this, we used to exclude VT from the
white space chars, because Perl didn't recognize it as such for \s and for
comments within regexes. However, Perl changed at release 5.18, so PCRE changed
at release 8.34. */
for (i = 0; i < 256; i++)
{
int x = 0;
if (isspace(i)) x += ctype_space;
if (isalpha(i)) x += ctype_letter;
if (isdigit(i)) x += ctype_digit;
if (isxdigit(i)) x += ctype_xdigit;
if (isalnum(i) || i == '_') x += ctype_word;
/* Note: strchr includes the terminating zero in the characters it considers.
In this instance, that is ok because we want binary zero to be flagged as a
meta-character, which in this sense is any character that terminates a run
of data characters. */
if (strchr("\\*+?{^.$|()[", i) != 0) x += ctype_meta;
*p++ = x;
}
return yield;
}
/* End of pcre_maketables.c */
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2012 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains internal functions for testing newlines when more than
one kind of newline is to be recognized. When a newline is found, its length is
returned. In principle, we could implement several newline "types", each
referring to a different set of newline characters. At present, PCRE supports
only NLTYPE_FIXED, which gets handled without these functions, NLTYPE_ANYCRLF,
and NLTYPE_ANY. The full list of Unicode newline characters is taken from
http://unicode.org/unicode/reports/tr18/. */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre_internal.h"
/*************************************************
* Check for newline at given position *
*************************************************/
/* It is guaranteed that the initial value of ptr is less than the end of the
string that is being processed.
Arguments:
ptr pointer to possible newline
type the newline type
endptr pointer to the end of the string
lenptr where to return the length
utf TRUE if in utf mode
Returns: TRUE or FALSE
*/
BOOL
PRIV(is_newline)(PCRE_PUCHAR ptr, int type, PCRE_PUCHAR endptr, int *lenptr,
BOOL utf)
{
pcre_uint32 c;
(void)utf;
#ifdef SUPPORT_UTF
if (utf)
{
GETCHAR(c, ptr);
}
else
#endif /* SUPPORT_UTF */
c = *ptr;
/* Note that this function is called only for ANY or ANYCRLF. */
if (type == NLTYPE_ANYCRLF) switch(c)
{
case CHAR_LF: *lenptr = 1; return TRUE;
case CHAR_CR: *lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;
return TRUE;
default: return FALSE;
}
/* NLTYPE_ANY */
else switch(c)
{
#ifdef EBCDIC
case CHAR_NEL:
#endif
case CHAR_LF:
case CHAR_VT:
case CHAR_FF: *lenptr = 1; return TRUE;
case CHAR_CR:
*lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;
return TRUE;
#ifndef EBCDIC
#ifdef COMPILE_PCRE8
case CHAR_NEL: *lenptr = utf? 2 : 1; return TRUE;
case 0x2028: /* LS */
case 0x2029: *lenptr = 3; return TRUE; /* PS */
#else /* COMPILE_PCRE16 || COMPILE_PCRE32 */
case CHAR_NEL:
case 0x2028: /* LS */
case 0x2029: *lenptr = 1; return TRUE; /* PS */
#endif /* COMPILE_PCRE8 */
#endif /* Not EBCDIC */
default: return FALSE;
}
}
/*************************************************
* Check for newline at previous position *
*************************************************/
/* It is guaranteed that the initial value of ptr is greater than the start of
the string that is being processed.
Arguments:
ptr pointer to possible newline
type the newline type
startptr pointer to the start of the string
lenptr where to return the length
utf TRUE if in utf mode
Returns: TRUE or FALSE
*/
BOOL
PRIV(was_newline)(PCRE_PUCHAR ptr, int type, PCRE_PUCHAR startptr, int *lenptr,
BOOL utf)
{
pcre_uint32 c;
(void)utf;
ptr--;
#ifdef SUPPORT_UTF
if (utf)
{
BACKCHAR(ptr);
GETCHAR(c, ptr);
}
else
#endif /* SUPPORT_UTF */
c = *ptr;
/* Note that this function is called only for ANY or ANYCRLF. */
if (type == NLTYPE_ANYCRLF) switch(c)
{
case CHAR_LF:
*lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1;
return TRUE;
case CHAR_CR: *lenptr = 1; return TRUE;
default: return FALSE;
}
/* NLTYPE_ANY */
else switch(c)
{
case CHAR_LF:
*lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1;
return TRUE;
#ifdef EBCDIC
case CHAR_NEL:
#endif
case CHAR_VT:
case CHAR_FF:
case CHAR_CR: *lenptr = 1; return TRUE;
#ifndef EBCDIC
#ifdef COMPILE_PCRE8
case CHAR_NEL: *lenptr = utf? 2 : 1; return TRUE;
case 0x2028: /* LS */
case 0x2029: *lenptr = 3; return TRUE; /* PS */
#else /* COMPILE_PCRE16 || COMPILE_PCRE32 */
case CHAR_NEL:
case 0x2028: /* LS */
case 0x2029: *lenptr = 1; return TRUE; /* PS */
#endif /* COMPILE_PCRE8 */
#endif /* NotEBCDIC */
default: return FALSE;
}
}
/* End of pcre_newline.c */
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2012 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This file contains a private PCRE function that converts an ordinal
character value into a UTF8 string. */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#define COMPILE_PCRE8
#include "pcre_internal.h"
/*************************************************
* Convert character value to UTF-8 *
*************************************************/
/* This function takes an integer value in the range 0 - 0x10ffff
and encodes it as a UTF-8 character in 1 to 4 pcre_uchars.
Arguments:
cvalue the character value
buffer pointer to buffer for result - at least 6 pcre_uchars long
Returns: number of characters placed in the buffer
*/
unsigned
int
PRIV(ord2utf)(pcre_uint32 cvalue, pcre_uchar *buffer)
{
#ifdef SUPPORT_UTF
register int i, j;
for (i = 0; i < PRIV(utf8_table1_size); i++)
if ((int)cvalue <= PRIV(utf8_table1)[i]) break;
buffer += i;
for (j = i; j > 0; j--)
{
*buffer-- = 0x80 | (cvalue & 0x3f);
cvalue >>= 6;
}
*buffer = PRIV(utf8_table2)[i] | cvalue;
return i + 1;
#else
(void)(cvalue); /* Keep compiler happy; this function won't ever be */
(void)(buffer); /* called when SUPPORT_UTF is not defined. */
return 0;
#endif
}
/* End of pcre_ord2utf8.c */
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2012 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains the external function pcre_refcount(), which is an
auxiliary function that can be used to maintain a reference count in a compiled
pattern data block. This might be helpful in applications where the block is
shared by different users. */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre_internal.h"
/*************************************************
* Maintain reference count *
*************************************************/
/* The reference count is a 16-bit field, initialized to zero. It is not
possible to transfer a non-zero count from one host to a different host that
has a different byte order - though I can't see why anyone in their right mind
would ever want to do that!
Arguments:
argument_re points to compiled code
adjust value to add to the count
Returns: the (possibly updated) count value (a non-negative number), or
a negative error number
*/
#if defined COMPILE_PCRE8
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_refcount(pcre *argument_re, int adjust)
#elif defined COMPILE_PCRE16
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre16_refcount(pcre16 *argument_re, int adjust)
#elif defined COMPILE_PCRE32
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre32_refcount(pcre32 *argument_re, int adjust)
#endif
{
REAL_PCRE *re = (REAL_PCRE *)argument_re;
if (re == NULL) return PCRE_ERROR_NULL;
if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
re->ref_count = (-adjust > re->ref_count)? 0 :
(adjust + re->ref_count > 65535)? 65535 :
re->ref_count + adjust;
return re->ref_count;
}
/* End of pcre_refcount.c */
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2014 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains internal functions for comparing and finding the length
of strings for different data item sizes. */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre_internal.h"
#ifndef COMPILE_PCRE8
/*************************************************
* Compare string utilities *
*************************************************/
/* The following two functions compares two strings. Basically a strcmp
for non 8 bit characters.
Arguments:
str1 first string
str2 second string
Returns: 0 if both string are equal (like strcmp), 1 otherwise
*/
int
PRIV(strcmp_uc_uc)(const pcre_uchar *str1, const pcre_uchar *str2)
{
pcre_uchar c1;
pcre_uchar c2;
while (*str1 != '\0' || *str2 != '\0')
{
c1 = *str1++;
c2 = *str2++;
if (c1 != c2)
return ((c1 > c2) << 1) - 1;
}
/* Both length and characters must be equal. */
return 0;
}
#ifdef COMPILE_PCRE32
int
PRIV(strcmp_uc_uc_utf)(const pcre_uchar *str1, const pcre_uchar *str2)
{
pcre_uchar c1;
pcre_uchar c2;
while (*str1 != '\0' || *str2 != '\0')
{
c1 = UCHAR21INC(str1);
c2 = UCHAR21INC(str2);
if (c1 != c2)
return ((c1 > c2) << 1) - 1;
}
/* Both length and characters must be equal. */
return 0;
}
#endif /* COMPILE_PCRE32 */
int
PRIV(strcmp_uc_c8)(const pcre_uchar *str1, const char *str2)
{
const pcre_uint8 *ustr2 = (pcre_uint8 *)str2;
pcre_uchar c1;
pcre_uchar c2;
while (*str1 != '\0' || *ustr2 != '\0')
{
c1 = *str1++;
c2 = (pcre_uchar)*ustr2++;
if (c1 != c2)
return ((c1 > c2) << 1) - 1;
}
/* Both length and characters must be equal. */
return 0;
}
#ifdef COMPILE_PCRE32
int
PRIV(strcmp_uc_c8_utf)(const pcre_uchar *str1, const char *str2)
{
const pcre_uint8 *ustr2 = (pcre_uint8 *)str2;
pcre_uchar c1;
pcre_uchar c2;
while (*str1 != '\0' || *ustr2 != '\0')
{
c1 = UCHAR21INC(str1);
c2 = (pcre_uchar)*ustr2++;
if (c1 != c2)
return ((c1 > c2) << 1) - 1;
}
/* Both length and characters must be equal. */
return 0;
}
#endif /* COMPILE_PCRE32 */
/* The following two functions compares two, fixed length
strings. Basically an strncmp for non 8 bit characters.
Arguments:
str1 first string
str2 second string
num size of the string
Returns: 0 if both string are equal (like strcmp), 1 otherwise
*/
int
PRIV(strncmp_uc_uc)(const pcre_uchar *str1, const pcre_uchar *str2, unsigned int num)
{
pcre_uchar c1;
pcre_uchar c2;
while (num-- > 0)
{
c1 = *str1++;
c2 = *str2++;
if (c1 != c2)
return ((c1 > c2) << 1) - 1;
}
/* Both length and characters must be equal. */
return 0;
}
int
PRIV(strncmp_uc_c8)(const pcre_uchar *str1, const char *str2, unsigned int num)
{
const pcre_uint8 *ustr2 = (pcre_uint8 *)str2;
pcre_uchar c1;
pcre_uchar c2;
while (num-- > 0)
{
c1 = *str1++;
c2 = (pcre_uchar)*ustr2++;
if (c1 != c2)
return ((c1 > c2) << 1) - 1;
}
/* Both length and characters must be equal. */
return 0;
}
/* The following function returns with the length of
a zero terminated string. Basically an strlen for non 8 bit characters.
Arguments:
str string
Returns: length of the string
*/
unsigned int
PRIV(strlen_uc)(const pcre_uchar *str)
{
unsigned int len = 0;
while (*str++ != 0)
len++;
return len;
}
#endif /* !COMPILE_PCRE8 */
/* End of pcre_string_utils.c */
This source diff could not be displayed because it is too large. You can view the blob instead.
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2012 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains the external function pcre_version(), which returns a
string that identifies the PCRE version that is in use. */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre_internal.h"
/*************************************************
* Return version string *
*************************************************/
/* These macros are the standard way of turning unquoted text into C strings.
They allow macros like PCRE_MAJOR to be defined without quotes, which is
convenient for user programs that want to test its value. */
#define STRING(a) # a
#define XSTRING(s) STRING(s)
/* A problem turned up with PCRE_PRERELEASE, which is defined empty for
production releases. Originally, it was used naively in this code:
return XSTRING(PCRE_MAJOR)
"." XSTRING(PCRE_MINOR)
XSTRING(PCRE_PRERELEASE)
" " XSTRING(PCRE_DATE);
However, when PCRE_PRERELEASE is empty, this leads to an attempted expansion of
STRING(). The C standard states: "If (before argument substitution) any
argument consists of no preprocessing tokens, the behavior is undefined." It
turns out the gcc treats this case as a single empty string - which is what we
really want - but Visual C grumbles about the lack of an argument for the
macro. Unfortunately, both are within their rights. To cope with both ways of
handling this, I had resort to some messy hackery that does a test at run time.
I could find no way of detecting that a macro is defined as an empty string at
pre-processor time. This hack uses a standard trick for avoiding calling
the STRING macro with an empty argument when doing the test. */
#if defined COMPILE_PCRE8
PCRE_EXP_DEFN const char * PCRE_CALL_CONVENTION
pcre_version(void)
#elif defined COMPILE_PCRE16
PCRE_EXP_DEFN const char * PCRE_CALL_CONVENTION
pcre16_version(void)
#elif defined COMPILE_PCRE32
PCRE_EXP_DEFN const char * PCRE_CALL_CONVENTION
pcre32_version(void)
#endif
{
return (XSTRING(Z PCRE_PRERELEASE)[1] == 0)?
XSTRING(PCRE_MAJOR.PCRE_MINOR PCRE_DATE) :
XSTRING(PCRE_MAJOR.PCRE_MINOR) XSTRING(PCRE_PRERELEASE PCRE_DATE);
}
/* End of pcre_version.c */
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2013 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains an internal function that is used to match an extended
class. It is used by both pcre_exec() and pcre_def_exec(). */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre_internal.h"
/*************************************************
* Match character against an XCLASS *
*************************************************/
/* This function is called to match a character against an extended class that
might contain values > 255 and/or Unicode properties.
Arguments:
c the character
data points to the flag byte of the XCLASS data
Returns: TRUE if character matches, else FALSE
*/
BOOL
PRIV(xclass)(pcre_uint32 c, const pcre_uchar *data, BOOL utf)
{
pcre_uchar t;
BOOL negated = (*data & XCL_NOT) != 0;
(void)utf;
#ifdef COMPILE_PCRE8
/* In 8 bit mode, this must always be TRUE. Help the compiler to know that. */
utf = TRUE;
#endif
/* Character values < 256 are matched against a bitmap, if one is present. If
not, we still carry on, because there may be ranges that start below 256 in the
additional data. */
if (c < 256)
{
if ((*data & XCL_HASPROP) == 0)
{
if ((*data & XCL_MAP) == 0) return negated;
return (((pcre_uint8 *)(data + 1))[c/8] & (1 << (c&7))) != 0;
}
if ((*data & XCL_MAP) != 0 &&
(((pcre_uint8 *)(data + 1))[c/8] & (1 << (c&7))) != 0)
return !negated; /* char found */
}
/* First skip the bit map if present. Then match against the list of Unicode
properties or large chars or ranges that end with a large char. We won't ever
encounter XCL_PROP or XCL_NOTPROP when UCP support is not compiled. */
if ((*data++ & XCL_MAP) != 0) data += 32 / sizeof(pcre_uchar);
while ((t = *data++) != XCL_END)
{
pcre_uint32 x, y;
if (t == XCL_SINGLE)
{
#ifdef SUPPORT_UTF
if (utf)
{
GETCHARINC(x, data); /* macro generates multiple statements */
}
else
#endif
x = *data++;
if (c == x) return !negated;
}
else if (t == XCL_RANGE)
{
#ifdef SUPPORT_UTF
if (utf)
{
GETCHARINC(x, data); /* macro generates multiple statements */
GETCHARINC(y, data); /* macro generates multiple statements */
}
else
#endif
{
x = *data++;
y = *data++;
}
if (c >= x && c <= y) return !negated;
}
#ifdef SUPPORT_UCP
else /* XCL_PROP & XCL_NOTPROP */
{
const ucd_record *prop = GET_UCD(c);
BOOL isprop = t == XCL_PROP;
switch(*data)
{
case PT_ANY:
if (isprop) return !negated;
break;
case PT_LAMP:
if ((prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
prop->chartype == ucp_Lt) == isprop) return !negated;
break;
case PT_GC:
if ((data[1] == PRIV(ucp_gentype)[prop->chartype]) == isprop)
return !negated;
break;
case PT_PC:
if ((data[1] == prop->chartype) == isprop) return !negated;
break;
case PT_SC:
if ((data[1] == prop->script) == isprop) return !negated;
break;
case PT_ALNUM:
if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
PRIV(ucp_gentype)[prop->chartype] == ucp_N) == isprop)
return !negated;
break;
/* Perl space used to exclude VT, but from Perl 5.18 it is included,
which means that Perl space and POSIX space are now identical. PCRE
was changed at release 8.34. */
case PT_SPACE: /* Perl space */
case PT_PXSPACE: /* POSIX space */
switch(c)
{
HSPACE_CASES:
VSPACE_CASES:
if (isprop) return !negated;
break;
default:
if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == isprop)
return !negated;
break;
}
break;
case PT_WORD:
if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
PRIV(ucp_gentype)[prop->chartype] == ucp_N || c == CHAR_UNDERSCORE)
== isprop)
return !negated;
break;
case PT_UCNC:
if (c < 0xa0)
{
if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
c == CHAR_GRAVE_ACCENT) == isprop)
return !negated;
}
else
{
if ((c < 0xd800 || c > 0xdfff) == isprop)
return !negated;
}
break;
/* The following three properties can occur only in an XCLASS, as there
is no \p or \P coding for them. */
/* Graphic character. Implement this as not Z (space or separator) and
not C (other), except for Cf (format) with a few exceptions. This seems
to be what Perl does. The exceptional characters are:
U+061C Arabic Letter Mark
U+180E Mongolian Vowel Separator
U+2066 - U+2069 Various "isolate"s
*/
case PT_PXGRAPH:
if ((PRIV(ucp_gentype)[prop->chartype] != ucp_Z &&
(PRIV(ucp_gentype)[prop->chartype] != ucp_C ||
(prop->chartype == ucp_Cf &&
c != 0x061c && c != 0x180e && (c < 0x2066 || c > 0x2069))
)) == isprop)
return !negated;
break;
/* Printable character: same as graphic, with the addition of Zs, i.e.
not Zl and not Zp, and U+180E. */
case PT_PXPRINT:
if ((prop->chartype != ucp_Zl &&
prop->chartype != ucp_Zp &&
(PRIV(ucp_gentype)[prop->chartype] != ucp_C ||
(prop->chartype == ucp_Cf &&
c != 0x061c && (c < 0x2066 || c > 0x2069))
)) == isprop)
return !negated;
break;
/* Punctuation: all Unicode punctuation, plus ASCII characters that
Unicode treats as symbols rather than punctuation, for Perl
compatibility (these are $+<=>^`|~). */
case PT_PXPUNCT:
if ((PRIV(ucp_gentype)[prop->chartype] == ucp_P ||
(c < 128 && PRIV(ucp_gentype)[prop->chartype] == ucp_S)) == isprop)
return !negated;
break;
/* This should never occur, but compilers may mutter if there is no
default. */
default:
return FALSE;
}
data += 2;
}
#endif /* SUPPORT_UCP */
}
return negated; /* char did not match */
}
/* End of pcre_xclass.c */
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
#ifndef _PCREPOSIX_H
#define _PCREPOSIX_H
/* This is the header for the POSIX wrapper interface to the PCRE Perl-
Compatible Regular Expression library. It defines the things POSIX says should
be there. I hope.
Copyright (c) 1997-2012 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* Have to include stdlib.h in order to ensure that size_t is defined. */
#include <stdlib.h>
#define PCREPOSIX_EXP_DEFN extern
#define PCREPOSIX_EXP_DECL extern
/* Options, mostly defined by POSIX, but with some extras. */
#define PCRE_REG_ICASE 0x0001 /* Maps to PCRE_CASELESS */
#define PCRE_REG_NEWLINE 0x0002 /* Maps to PCRE_MULTILINE */
#define PCRE_REG_NOTBOL 0x0004 /* Maps to PCRE_NOTBOL */
#define PCRE_REG_NOTEOL 0x0008 /* Maps to PCRE_NOTEOL */
#define PCRE_REG_DOTALL 0x0010 /* NOT defined by POSIX; maps to PCRE_DOTALL */
#define PCRE_REG_NOSUB 0x0020 /* Maps to PCRE_NO_AUTO_CAPTURE */
#define PCRE_REG_UTF8 0x0040 /* NOT defined by POSIX; maps to PCRE_UTF8 */
#define PCRE_REG_STARTEND 0x0080 /* BSD feature: pass subject string by so,eo */
#define PCRE_REG_NOTEMPTY 0x0100 /* NOT defined by POSIX; maps to PCRE_NOTEMPTY */
#define PCRE_REG_UNGREEDY 0x0200 /* NOT defined by POSIX; maps to PCRE_UNGREEDY */
#define PCRE_REG_UCP 0x0400 /* NOT defined by POSIX; maps to PCRE_UCP */
/* This is not used by PCRE, but by defining it we make it easier
to slot PCRE into existing programs that make POSIX calls. */
#define PCRE_REG_EXTENDED 0
/* Error values. Not all these are relevant or used by the wrapper. */
enum {
PCRE_REG_ASSERT = 1, /* internal error ? */
PCRE_REG_BADBR, /* invalid repeat counts in {} */
PCRE_REG_BADPAT, /* pattern error */
PCRE_REG_BADRPT, /* ? * + invalid */
PCRE_REG_EBRACE, /* unbalanced {} */
PCRE_REG_EBRACK, /* unbalanced [] */
PCRE_REG_ECOLLATE, /* collation error - not relevant */
PCRE_REG_ECTYPE, /* bad class */
PCRE_REG_EESCAPE, /* bad escape sequence */
PCRE_REG_EMPTY, /* empty expression */
PCRE_REG_EPAREN, /* unbalanced () */
PCRE_REG_ERANGE, /* bad range inside [] */
PCRE_REG_ESIZE, /* expression too big */
PCRE_REG_ESPACE, /* failed to get memory */
PCRE_REG_ESUBREG, /* bad back reference */
PCRE_REG_INVARG, /* bad argument */
PCRE_REG_NOMATCH /* match failed */
};
/* The structure representing a compiled regular expression. */
typedef struct {
void *re_pcre;
size_t re_nsub;
size_t re_erroffset;
} pcre_regex_t;
/* The structure in which a captured offset is returned. */
typedef int pcre_regoff_t;
typedef struct {
pcre_regoff_t rm_so;
pcre_regoff_t rm_eo;
} pcre_regmatch_t;
/* The functions */
PCREPOSIX_EXP_DECL int pcre_regcomp(pcre_regex_t *, const char *, int);
PCREPOSIX_EXP_DECL int pcre_regexec(const pcre_regex_t *, const char *, size_t,
pcre_regmatch_t *, int);
PCREPOSIX_EXP_DECL size_t pcre_regerror(int, const pcre_regex_t *, char *, size_t);
PCREPOSIX_EXP_DECL void pcre_regfree(pcre_regex_t *);
#endif /* End of pcreposix.h */
/*************************************************
* Unicode Property Table handler *
*************************************************/
#ifndef _UCP_H
#define _UCP_H
/* This file contains definitions of the property values that are returned by
the UCD access macros. New values that are added for new releases of Unicode
should always be at the end of each enum, for backwards compatibility.
IMPORTANT: Note also that the specific numeric values of the enums have to be
the same as the values that are generated by the maint/MultiStage2.py script,
where the equivalent property descriptive names are listed in vectors.
ALSO: The specific values of the first two enums are assumed for the table
called catposstab in pcre_compile.c. */
/* These are the general character categories. */
enum {
ucp_C, /* Other */
ucp_L, /* Letter */
ucp_M, /* Mark */
ucp_N, /* Number */
ucp_P, /* Punctuation */
ucp_S, /* Symbol */
ucp_Z /* Separator */
};
/* These are the particular character categories. */
enum {
ucp_Cc, /* Control */
ucp_Cf, /* Format */
ucp_Cn, /* Unassigned */
ucp_Co, /* Private use */
ucp_Cs, /* Surrogate */
ucp_Ll, /* Lower case letter */
ucp_Lm, /* Modifier letter */
ucp_Lo, /* Other letter */
ucp_Lt, /* Title case letter */
ucp_Lu, /* Upper case letter */
ucp_Mc, /* Spacing mark */
ucp_Me, /* Enclosing mark */
ucp_Mn, /* Non-spacing mark */
ucp_Nd, /* Decimal number */
ucp_Nl, /* Letter number */
ucp_No, /* Other number */
ucp_Pc, /* Connector punctuation */
ucp_Pd, /* Dash punctuation */
ucp_Pe, /* Close punctuation */
ucp_Pf, /* Final punctuation */
ucp_Pi, /* Initial punctuation */
ucp_Po, /* Other punctuation */
ucp_Ps, /* Open punctuation */
ucp_Sc, /* Currency symbol */
ucp_Sk, /* Modifier symbol */
ucp_Sm, /* Mathematical symbol */
ucp_So, /* Other symbol */
ucp_Zl, /* Line separator */
ucp_Zp, /* Paragraph separator */
ucp_Zs /* Space separator */
};
/* These are grapheme break properties. Note that the code for processing them
assumes that the values are less than 16. If more values are added that take
the number to 16 or more, the code will have to be rewritten. */
enum {
ucp_gbCR, /* 0 */
ucp_gbLF, /* 1 */
ucp_gbControl, /* 2 */
ucp_gbExtend, /* 3 */
ucp_gbPrepend, /* 4 */
ucp_gbSpacingMark, /* 5 */
ucp_gbL, /* 6 Hangul syllable type L */
ucp_gbV, /* 7 Hangul syllable type V */
ucp_gbT, /* 8 Hangul syllable type T */
ucp_gbLV, /* 9 Hangul syllable type LV */
ucp_gbLVT, /* 10 Hangul syllable type LVT */
ucp_gbRegionalIndicator, /* 11 */
ucp_gbOther /* 12 */
};
/* These are the script identifications. */
enum {
ucp_Arabic,
ucp_Armenian,
ucp_Bengali,
ucp_Bopomofo,
ucp_Braille,
ucp_Buginese,
ucp_Buhid,
ucp_Canadian_Aboriginal,
ucp_Cherokee,
ucp_Common,
ucp_Coptic,
ucp_Cypriot,
ucp_Cyrillic,
ucp_Deseret,
ucp_Devanagari,
ucp_Ethiopic,
ucp_Georgian,
ucp_Glagolitic,
ucp_Gothic,
ucp_Greek,
ucp_Gujarati,
ucp_Gurmukhi,
ucp_Han,
ucp_Hangul,
ucp_Hanunoo,
ucp_Hebrew,
ucp_Hiragana,
ucp_Inherited,
ucp_Kannada,
ucp_Katakana,
ucp_Kharoshthi,
ucp_Khmer,
ucp_Lao,
ucp_Latin,
ucp_Limbu,
ucp_Linear_B,
ucp_Malayalam,
ucp_Mongolian,
ucp_Myanmar,
ucp_New_Tai_Lue,
ucp_Ogham,
ucp_Old_Italic,
ucp_Old_Persian,
ucp_Oriya,
ucp_Osmanya,
ucp_Runic,
ucp_Shavian,
ucp_Sinhala,
ucp_Syloti_Nagri,
ucp_Syriac,
ucp_Tagalog,
ucp_Tagbanwa,
ucp_Tai_Le,
ucp_Tamil,
ucp_Telugu,
ucp_Thaana,
ucp_Thai,
ucp_Tibetan,
ucp_Tifinagh,
ucp_Ugaritic,
ucp_Yi,
/* New for Unicode 5.0: */
ucp_Balinese,
ucp_Cuneiform,
ucp_Nko,
ucp_Phags_Pa,
ucp_Phoenician,
/* New for Unicode 5.1: */
ucp_Carian,
ucp_Cham,
ucp_Kayah_Li,
ucp_Lepcha,
ucp_Lycian,
ucp_Lydian,
ucp_Ol_Chiki,
ucp_Rejang,
ucp_Saurashtra,
ucp_Sundanese,
ucp_Vai,
/* New for Unicode 5.2: */
ucp_Avestan,
ucp_Bamum,
ucp_Egyptian_Hieroglyphs,
ucp_Imperial_Aramaic,
ucp_Inscriptional_Pahlavi,
ucp_Inscriptional_Parthian,
ucp_Javanese,
ucp_Kaithi,
ucp_Lisu,
ucp_Meetei_Mayek,
ucp_Old_South_Arabian,
ucp_Old_Turkic,
ucp_Samaritan,
ucp_Tai_Tham,
ucp_Tai_Viet,
/* New for Unicode 6.0.0: */
ucp_Batak,
ucp_Brahmi,
ucp_Mandaic,
/* New for Unicode 6.1.0: */
ucp_Chakma,
ucp_Meroitic_Cursive,
ucp_Meroitic_Hieroglyphs,
ucp_Miao,
ucp_Sharada,
ucp_Sora_Sompeng,
ucp_Takri,
/* New for Unicode 7.0.0: */
ucp_Bassa_Vah,
ucp_Caucasian_Albanian,
ucp_Duployan,
ucp_Elbasan,
ucp_Grantha,
ucp_Khojki,
ucp_Khudawadi,
ucp_Linear_A,
ucp_Mahajani,
ucp_Manichaean,
ucp_Mende_Kikakui,
ucp_Modi,
ucp_Mro,
ucp_Nabataean,
ucp_Old_North_Arabian,
ucp_Old_Permic,
ucp_Pahawh_Hmong,
ucp_Palmyrene,
ucp_Psalter_Pahlavi,
ucp_Pau_Cin_Hau,
ucp_Siddham,
ucp_Tirhuta,
ucp_Warang_Citi
};
#endif
/* End of ucp.h */
INCLUDE_DIRECTORIES(".")
ADD_LIBRARY(regex OBJECT "regex.c" "regex.h")
#ifndef _REGEX_CONFIG_H_
#define _REGEX_CONFIG_H_
# define GAWK
# define NO_MBSUPPORT
#endif
This source diff could not be displayed because it is too large. You can view the blob instead.
/* Extended regular expression matching and search library.
Copyright (C) 2002, 2003, 2005 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA. */
#include "config.h"
/* Make sure noone compiles this code with a C++ compiler. */
#ifdef __cplusplus
# error "This is C code, use a C compiler"
#endif
#ifdef _LIBC
/* We have to keep the namespace clean. */
# define regfree(preg) __regfree (preg)
# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
# define regerror(errcode, preg, errbuf, errbuf_size) \
__regerror(errcode, preg, errbuf, errbuf_size)
# define re_set_registers(bu, re, nu, st, en) \
__re_set_registers (bu, re, nu, st, en)
# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
__re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
# define re_match(bufp, string, size, pos, regs) \
__re_match (bufp, string, size, pos, regs)
# define re_search(bufp, string, size, startpos, range, regs) \
__re_search (bufp, string, size, startpos, range, regs)
# define re_compile_pattern(pattern, length, bufp) \
__re_compile_pattern (pattern, length, bufp)
# define re_set_syntax(syntax) __re_set_syntax (syntax)
# define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
__re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
# include "../locale/localeinfo.h"
#endif
#if defined (_MSC_VER)
#include <stdio.h> /* for size_t */
#endif
/* On some systems, limits.h sets RE_DUP_MAX to a lower value than
GNU regex allows. Include it before <regex.h>, which correctly
#undefs RE_DUP_MAX and sets it to the right value. */
#include <limits.h>
#ifdef GAWK
#undef alloca
#define alloca alloca_is_bad_you_should_never_use_it
#endif
#include <regex.h>
#include "regex_internal.h"
#include "regex_internal.c"
#ifdef GAWK
# define bool int
# ifndef true
# define true (1)
# endif
# ifndef false
# define false (0)
# endif
#endif
#include "regcomp.c"
#include "regexec.c"
/* Binary backward compatibility. */
#if _LIBC
# include <shlib-compat.h>
# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3)
link_warning (re_max_failures, "the 're_max_failures' variable is obsolete and will go away.")
int re_max_failures = 2000;
# endif
#endif
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -306,6 +306,16 @@ ELSE()
MESSAGE(FATAL_ERROR "Asked for unknown SHA1 backend ${SHA1_BACKEND}")
ENDIF()
# Include PCRE and its POSIX regex compatibility layer when it is required
IF (HAVE_REGCOMP_L)
ADD_FEATURE_INFO(regex ON "using system regcomp_l")
ELSE()
ADD_SUBDIRECTORY("${libgit2_SOURCE_DIR}/deps/pcre" "${libgit2_BINARY_DIR}/deps/pcre")
LIST(APPEND LIBGIT2_INCLUDES "${libgit2_SOURCE_DIR}/deps/pcre")
LIST(APPEND LIBGIT2_OBJECTS $<TARGET_OBJECTS:pcre>)
ADD_FEATURE_INFO(regex ON "using bundled PCRE")
ENDIF()
# Optional external dependency: http-parser
FIND_PACKAGE(HTTP_Parser)
IF (USE_EXT_HTTP_PARSER AND HTTP_PARSER_FOUND AND HTTP_PARSER_VERSION_MAJOR EQUAL 2)
......
......@@ -8,14 +8,17 @@
#define INCLUDE_posix_regex_h__
#include "common.h"
#include <regex.h>
/*
* Regular expressions: if the operating system has p_regcomp_l,
* use that as our p_regcomp implementation, otherwise fall back
* to standard regcomp.
* use it so that we can override the locale environment variable.
* Otherwise, use our bundled PCRE implementation.
*/
#ifdef GIT_USE_REGCOMP_L
# include <regex.h>
# include <xlocale.h>
#define P_REG_EXTENDED REG_EXTENDED
#define P_REG_ICASE REG_ICASE
#define P_REG_NOMATCH REG_NOMATCH
......@@ -23,19 +26,28 @@
#define p_regex_t regex_t
#define p_regmatch_t regmatch_t
#define p_regerror regerror
#define p_regexec regexec
#define p_regfree regfree
#ifdef GIT_USE_REGCOMP_L
#include <xlocale.h>
GIT_INLINE(int) p_regcomp(p_regex_t *preg, const char *pattern, int cflags)
{
return regcomp_l(preg, pattern, cflags, (locale_t) 0);
}
#define p_regerror regerror
#define p_regexec regexec
#define p_regfree regfree
#else
# define p_regcomp regcomp
# include "pcreposix.h"
# define P_REG_EXTENDED PCRE_REG_EXTENDED
# define P_REG_ICASE PCRE_REG_ICASE
# define P_REG_NOMATCH PCRE_REG_NOMATCH
# define p_regex_t pcre_regex_t
# define p_regmatch_t pcre_regmatch_t
# define p_regcomp pcre_regcomp
# define p_regerror pcre_regerror
# define p_regexec pcre_regexec
# define p_regfree pcre_regfree
#endif
#endif
......@@ -13,8 +13,6 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <regex.h>
#include <io.h>
#include <direct.h>
#ifdef GIT_THREADS
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment