Unverified Commit 63307cba by Edward Thomson Committed by GitHub

Merge pull request #5226 from pks-t/pks/regexp-api

regexp: implement a new regular expression API
parents 70325370 f585b129
......@@ -20,15 +20,14 @@ FIND_PATH(PCRE2_INCLUDE_DIR NAMES pcre2posix.h)
# Look for the library.
FIND_LIBRARY(PCRE2_LIBRARY NAMES pcre2-8)
FIND_LIBRARY(PCRE2_POSIX_LIBRARY NAMES pcre2-posix)
# Handle the QUIETLY and REQUIRED arguments and set PCRE2_FOUND to TRUE if all listed variables are TRUE.
INCLUDE(FindPackageHandleStandardArgs)
FIND_PACKAGE_HANDLE_STANDARD_ARGS(PCRE2 DEFAULT_MSG PCRE2_LIBRARY PCRE2_POSIX_LIBRARY PCRE2_INCLUDE_DIR)
FIND_PACKAGE_HANDLE_STANDARD_ARGS(PCRE2 DEFAULT_MSG PCRE2_LIBRARY PCRE2_INCLUDE_DIR)
# Copy the results to the output variables.
IF(PCRE2_FOUND)
SET(PCRE2_LIBRARIES ${PCRE2_LIBRARY} ${PCRE2_POSIX_LIBRARY})
SET(PCRE2_LIBRARIES ${PCRE2_LIBRARY})
SET(PCRE2_INCLUDE_DIRS ${PCRE2_INCLUDE_DIR})
ELSE(PCRE2_FOUND)
SET(PCRE2_LIBRARIES)
......
......@@ -88,7 +88,6 @@
#include "git2/deprecated.h"
#include "posix.h"
#include "posix_regex.h"
#define DEFAULT_BUFSIZE 65536
#define FILEIO_BUFSIZE DEFAULT_BUFSIZE
......
......@@ -7,13 +7,15 @@
#include "config.h"
#include "sysdir.h"
#include "git2/config.h"
#include "git2/sys/config.h"
#include "vector.h"
#include "buf_text.h"
#include "config_backend.h"
#include "regexp.h"
#include "sysdir.h"
#include "transaction.h"
#include "vector.h"
#if GIT_WIN32
# include <windows.h>
#endif
......@@ -345,7 +347,7 @@ typedef struct {
git_config_iterator parent;
git_config_iterator *current;
const git_config *cfg;
p_regex_t regex;
git_regexp regex;
size_t i;
} all_iter;
......@@ -423,7 +425,7 @@ static int all_iter_glob_next(git_config_entry **entry, git_config_iterator *_it
*/
while ((error = all_iter_next(entry, _iter)) == 0) {
/* skip non-matching keys if regexp was provided */
if (p_regexec(&iter->regex, (*entry)->name, 0, NULL, 0) != 0)
if (git_regexp_match(&iter->regex, (*entry)->name) != 0)
continue;
/* and simply return if we like the entry's name */
......@@ -447,7 +449,7 @@ static void all_iter_glob_free(git_config_iterator *_iter)
{
all_iter *iter = (all_iter *) _iter;
p_regfree(&iter->regex);
git_regexp_dispose(&iter->regex);
all_iter_free(_iter);
}
......@@ -480,8 +482,7 @@ int git_config_iterator_glob_new(git_config_iterator **out, const git_config *cf
iter = git__calloc(1, sizeof(all_iter));
GIT_ERROR_CHECK_ALLOC(iter);
if ((result = p_regcomp(&iter->regex, regexp, P_REG_EXTENDED)) != 0) {
git_error_set_regex(&iter->regex, result);
if ((result = git_regexp_compile(&iter->regex, regexp, 0)) < 0) {
git__free(iter);
return -1;
}
......@@ -510,18 +511,13 @@ int git_config_backend_foreach_match(
{
git_config_entry *entry;
git_config_iterator* iter;
p_regex_t regex;
git_regexp regex;
int error = 0;
assert(backend && cb);
if (regexp != NULL) {
if ((error = p_regcomp(&regex, regexp, P_REG_EXTENDED)) != 0) {
git_error_set_regex(&regex, error);
p_regfree(&regex);
return -1;
}
}
if (regexp && git_regexp_compile(&regex, regexp, 0) < 0)
return -1;
if ((error = backend->iterator(&iter, backend)) < 0) {
iter = NULL;
......@@ -530,7 +526,7 @@ int git_config_backend_foreach_match(
while (!(iter->next(&entry, iter) < 0)) {
/* skip non-matching keys if regexp was provided */
if (regexp && p_regexec(&regex, entry->name, 0, NULL, 0) != 0)
if (regexp && git_regexp_match(&regex, entry->name) != 0)
continue;
/* abort iterator on non-zero return value */
......@@ -541,7 +537,7 @@ int git_config_backend_foreach_match(
}
if (regexp != NULL)
p_regfree(&regex);
git_regexp_dispose(&regex);
iter->free(iter);
......@@ -981,7 +977,7 @@ typedef struct {
git_config_iterator parent;
git_config_iterator *iter;
char *name;
p_regex_t regex;
git_regexp regex;
int have_regex;
} multivar_iter;
......@@ -997,7 +993,7 @@ static int multivar_iter_next(git_config_entry **entry, git_config_iterator *_it
if (!iter->have_regex)
return 0;
if (p_regexec(&iter->regex, (*entry)->value, 0, NULL, 0) == 0)
if (git_regexp_match(&iter->regex, (*entry)->value) == 0)
return 0;
}
......@@ -1012,7 +1008,7 @@ void multivar_iter_free(git_config_iterator *_iter)
git__free(iter->name);
if (iter->have_regex)
p_regfree(&iter->regex);
git_regexp_dispose(&iter->regex);
git__free(iter);
}
......@@ -1032,13 +1028,8 @@ int git_config_multivar_iterator_new(git_config_iterator **out, const git_config
goto on_error;
if (regexp != NULL) {
error = p_regcomp(&iter->regex, regexp, P_REG_EXTENDED);
if (error != 0) {
git_error_set_regex(&iter->regex, error);
error = -1;
p_regfree(&iter->regex);
if ((error = git_regexp_compile(&iter->regex, regexp, 0)) < 0)
goto on_error;
}
iter->have_regex = 1;
}
......
......@@ -18,6 +18,7 @@
#include "config_entries.h"
#include "config_parse.h"
#include "filebuf.h"
#include "regexp.h"
#include "strmap.h"
#include "sysdir.h"
#include "wildmatch.h"
......@@ -61,7 +62,7 @@ typedef struct {
static int config_read(git_config_entries *entries, const git_repository *repo, config_file *file, git_config_level_t level, int depth);
static int config_read_buffer(git_config_entries *entries, const git_repository *repo, config_file *file, git_config_level_t level, int depth, const char *buf, size_t buflen);
static int config_write(config_file_backend *cfg, const char *orig_key, const char *key, const p_regex_t *preg, const char *value);
static int config_write(config_file_backend *cfg, const char *orig_key, const char *key, const git_regexp *preg, const char *value);
static char *escape_value(const char *ptr);
/**
......@@ -350,21 +351,17 @@ static int config_set_multivar(
git_config_backend *cfg, const char *name, const char *regexp, const char *value)
{
config_file_backend *b = GIT_CONTAINER_OF(cfg, config_file_backend, parent);
char *key;
p_regex_t preg;
git_regexp preg;
int result;
char *key;
assert(regexp);
if ((result = git_config__normalize_name(name, &key)) < 0)
return result;
result = p_regcomp(&preg, regexp, P_REG_EXTENDED);
if (result != 0) {
git_error_set_regex(&preg, result);
result = -1;
if ((result = git_regexp_compile(&preg, regexp, 0)) < 0)
goto out;
}
/* If we do have it, set call config_write() and reload */
if ((result = config_write(b, name, key, &preg, value)) < 0)
......@@ -372,7 +369,7 @@ static int config_set_multivar(
out:
git__free(key);
p_regfree(&preg);
git_regexp_dispose(&preg);
return result;
}
......@@ -412,7 +409,7 @@ static int config_delete_multivar(git_config_backend *cfg, const char *name, con
config_file_backend *b = GIT_CONTAINER_OF(cfg, config_file_backend, parent);
git_config_entries *entries = NULL;
git_config_entry *entry = NULL;
p_regex_t preg = { 0 };
git_regexp preg = GIT_REGEX_INIT;
char *key = NULL;
int result;
......@@ -430,11 +427,8 @@ static int config_delete_multivar(git_config_backend *cfg, const char *name, con
goto out;
}
if ((result = p_regcomp(&preg, regexp, P_REG_EXTENDED)) != 0) {
git_error_set_regex(&preg, result);
result = -1;
if ((result = git_regexp_compile(&preg, regexp, 0)) < 0)
goto out;
}
if ((result = config_write(b, name, key, &preg, NULL)) < 0)
goto out;
......@@ -442,7 +436,7 @@ static int config_delete_multivar(git_config_backend *cfg, const char *name, con
out:
git_config_entries_free(entries);
git__free(key);
p_regfree(&preg);
git_regexp_dispose(&preg);
return result;
}
......@@ -928,7 +922,7 @@ struct write_data {
const char *section;
const char *orig_name;
const char *name;
const p_regex_t *preg;
const git_regexp *preg;
const char *value;
};
......@@ -1033,7 +1027,7 @@ static int write_on_variable(
/* If we have a regex to match the value, see if it matches */
if (has_matched && write_data->preg != NULL)
has_matched = (p_regexec(write_data->preg, var_value, 0, NULL, 0) == 0);
has_matched = (git_regexp_match(write_data->preg, var_value) == 0);
/* If this isn't the name/value we're looking for, simply dump the
* existing data back out and continue on.
......@@ -1094,7 +1088,8 @@ static int write_on_eof(
/*
* This is pretty much the parsing, except we write out anything we don't have
*/
static int config_write(config_file_backend *cfg, const char *orig_key, const char *key, const p_regex_t *preg, const char* value)
static int config_write(config_file_backend *cfg, const char *orig_key, const char *key, const git_regexp *preg, const char* value)
{
char *orig_section = NULL, *section = NULL, *orig_name, *name, *ldot;
git_buf buf = GIT_BUF_INIT, contents = GIT_BUF_INIT;
......
......@@ -15,6 +15,7 @@
#include "map.h"
#include "buf_text.h"
#include "config.h"
#include "regexp.h"
#include "repository.h"
typedef enum {
......@@ -25,7 +26,7 @@ typedef enum {
} git_diff_driver_t;
typedef struct {
p_regex_t re;
git_regexp re;
int flags;
} git_diff_driver_pattern;
......@@ -39,7 +40,7 @@ struct git_diff_driver {
uint32_t binary_flags;
uint32_t other_flags;
git_array_t(git_diff_driver_pattern) fn_patterns;
p_regex_t word_pattern;
git_regexp word_pattern;
char name[GIT_FLEX_ARRAY];
};
......@@ -113,7 +114,7 @@ static int diff_driver_add_patterns(
if (error < 0)
break;
if ((error = p_regcomp(&pat->re, buf.ptr, regex_flags)) != 0) {
if ((error = git_regexp_compile(&pat->re, buf.ptr, regex_flags)) != 0) {
/*
* TODO: issue a warning
*/
......@@ -130,7 +131,7 @@ static int diff_driver_add_patterns(
static int diff_driver_xfuncname(const git_config_entry *entry, void *payload)
{
return diff_driver_add_patterns(payload, entry->value, P_REG_EXTENDED);
return diff_driver_add_patterns(payload, entry->value, 0);
}
static int diff_driver_funcname(const git_config_entry *entry, void *payload)
......@@ -205,16 +206,12 @@ static int git_diff_driver_builtin(
if (ddef->fns &&
(error = diff_driver_add_patterns(
drv, ddef->fns, ddef->flags | P_REG_EXTENDED)) < 0)
drv, ddef->fns, ddef->flags)) < 0)
goto done;
if (ddef->words &&
(error = p_regcomp(
&drv->word_pattern, ddef->words, ddef->flags | P_REG_EXTENDED)))
{
error = git_error_set_regex(&drv->word_pattern, error);
(error = git_regexp_compile(&drv->word_pattern, ddef->words, ddef->flags)) < 0)
goto done;
}
if ((error = git_strmap_set(reg->drivers, drv->name, drv)) < 0)
goto done;
......@@ -316,11 +313,10 @@ static int git_diff_driver_load(
goto done;
if (!ce || !ce->value)
/* no diff.<driver>.wordregex, so just continue */;
else if (!(error = p_regcomp(&drv->word_pattern, ce->value, P_REG_EXTENDED)))
else if (!(error = git_regexp_compile(&drv->word_pattern, ce->value, 0)))
found_driver = true;
else {
/* TODO: warn about bad regex instead of failure */
error = git_error_set_regex(&drv->word_pattern, error);
goto done;
}
......@@ -400,10 +396,10 @@ void git_diff_driver_free(git_diff_driver *driver)
return;
for (i = 0; i < git_array_size(driver->fn_patterns); ++i)
p_regfree(& git_array_get(driver->fn_patterns, i)->re);
git_regexp_dispose(& git_array_get(driver->fn_patterns, i)->re);
git_array_clear(driver->fn_patterns);
p_regfree(&driver->word_pattern);
git_regexp_dispose(&driver->word_pattern);
git__free(driver);
}
......@@ -451,19 +447,19 @@ static int diff_context_line__pattern_match(
git_diff_driver *driver, git_buf *line)
{
size_t i, maxi = git_array_size(driver->fn_patterns);
p_regmatch_t pmatch[2];
git_regmatch pmatch[2];
for (i = 0; i < maxi; ++i) {
git_diff_driver_pattern *pat = git_array_get(driver->fn_patterns, i);
if (!p_regexec(&pat->re, line->ptr, 2, pmatch, 0)) {
if (!git_regexp_search(&pat->re, line->ptr, 2, pmatch)) {
if (pat->flags & REG_NEGATE)
return false;
/* use pmatch data to trim line data */
i = (pmatch[1].rm_so >= 0) ? 1 : 0;
git_buf_consume(line, git_buf_cstr(line) + pmatch[i].rm_so);
git_buf_truncate(line, pmatch[i].rm_eo - pmatch[i].rm_so);
i = (pmatch[1].start >= 0) ? 1 : 0;
git_buf_consume(line, git_buf_cstr(line) + pmatch[i].start);
git_buf_truncate(line, pmatch[i].end - pmatch[i].start);
git_buf_rtrim(line);
return true;
......
......@@ -110,21 +110,6 @@ void git_error_set_str(int error_class, const char *string)
set_error_from_buffer(error_class);
}
int git_error_set_regex(const p_regex_t *regex, int error_code)
{
char error_buf[1024];
assert(error_code);
p_regerror(error_code, regex, error_buf, sizeof(error_buf));
git_error_set_str(GIT_ERROR_REGEX, error_buf);
if (error_code == P_REG_NOMATCH)
return GIT_ENOTFOUND;
return GIT_EINVALIDSPEC;
}
void git_error_clear(void)
{
if (GIT_GLOBAL->last_error != NULL) {
......
......@@ -8,7 +8,6 @@
#ifndef INCLUDE_errors_h__
#define INCLUDE_errors_h__
#include "posix_regex.h"
#include "common.h"
/*
......@@ -18,12 +17,6 @@ void git_error_set(int error_class, const char *fmt, ...) GIT_FORMAT_PRINTF(2, 3
void git_error_vset(int error_class, const char *fmt, va_list ap);
/**
* Set the error message for a regex failure, using the internal regex
* error code lookup and return a libgit error code.
*/
int git_error_set_regex(const p_regex_t *regex, int error_code);
/**
* Set error message for user callback if needed.
*
* If the error code in non-zero and no error message is set, this
......
/*
* Copyright (C) the libgit2 contributors. All rights reserved.
*
* This file is part of libgit2, distributed under the GNU GPL v2 with
* a Linking Exception. For full terms see the included COPYING file.
*/
#ifndef INCLUDE_posix_regex_h__
#define INCLUDE_posix_regex_h__
#include "common.h"
/*
* Regular expressions: if we were asked to use PCRE (either our
* bundled version or a system version) then use their regcomp
* compatible implementation.
*/
#ifdef GIT_REGEX_BUILTIN
# include "pcreposix.h"
# define P_REG_EXTENDED PCRE_REG_EXTENDED
# define P_REG_ICASE PCRE_REG_ICASE
# define P_REG_NOMATCH PCRE_REG_NOMATCH
# define p_regex_t pcre_regex_t
# define p_regmatch_t pcre_regmatch_t
# define p_regcomp pcre_regcomp
# define p_regerror pcre_regerror
# define p_regexec pcre_regexec
# define p_regfree pcre_regfree
/*
* Use the system-provided `regex` routines, whether that's via the
* PCRE emulation layer, or libc, preferring `regcomp_l` it's available.
*/
#else
# if defined(GIT_REGEX_PCRE2)
# include <pcre2posix.h>
# elif defined(GIT_REGEX_PCRE)
# include <pcreposix.h>
# else
# include <regex.h>
# endif
# define P_REG_EXTENDED REG_EXTENDED
# define P_REG_ICASE REG_ICASE
# define P_REG_NOMATCH REG_NOMATCH
# define p_regex_t regex_t
# define p_regmatch_t regmatch_t
# define p_regerror regerror
# define p_regexec regexec
# define p_regfree regfree
# ifdef GIT_REGEX_REGCOMP_L
# include <xlocale.h>
GIT_INLINE(int) p_regcomp(p_regex_t *preg, const char *pattern, int cflags)
{
return regcomp_l(preg, pattern, cflags, (locale_t) 0);
}
# else
# define p_regcomp regcomp
# endif /* GIT_REGEX_REGCOMP_L */
#endif
#endif
/*
* Copyright (C) the libgit2 contributors. All rights reserved.
*
* This file is part of libgit2, distributed under the GNU GPL v2 with
* a Linking Exception. For full terms see the included COPYING file.
*/
#include "regexp.h"
#if defined(GIT_REGEX_BUILTIN) || defined(GIT_REGEX_PCRE)
int git_regexp_compile(git_regexp *r, const char *pattern, int flags)
{
int erroffset, cflags = 0;
const char *error;
if (flags & GIT_REGEXP_ICASE)
cflags |= PCRE_CASELESS;
if ((*r = pcre_compile(pattern, cflags, &error, &erroffset, NULL)) == NULL) {
git_error_set_str(GIT_ERROR_REGEX, error);
return GIT_EINVALIDSPEC;
}
return 0;
}
void git_regexp_dispose(git_regexp *r)
{
pcre_free(*r);
*r = NULL;
}
int git_regexp_match(const git_regexp *r, const char *string)
{
int error;
if ((error = pcre_exec(*r, NULL, string, (int) strlen(string), 0, 0, NULL, 0)) < 0)
return (error == PCRE_ERROR_NOMATCH) ? GIT_ENOTFOUND : GIT_EINVALIDSPEC;
return 0;
}
int git_regexp_search(const git_regexp *r, const char *string, size_t nmatches, git_regmatch *matches)
{
int static_ovec[9], *ovec;
int error;
size_t i;
/* The ovec array always needs to be a mutiple of three */
if (nmatches <= ARRAY_SIZE(static_ovec) / 3)
ovec = static_ovec;
else
ovec = git__calloc(nmatches * 3, sizeof(*ovec));
GIT_ERROR_CHECK_ALLOC(ovec);
if ((error = pcre_exec(*r, NULL, string, (int) strlen(string), 0, 0, ovec, (int) nmatches * 3)) < 0)
goto out;
if (error == 0)
error = (int) nmatches;
for (i = 0; i < (unsigned int) error; i++) {
matches[i].start = (ovec[i * 2] < 0) ? -1 : ovec[i * 2];
matches[i].end = (ovec[i * 2 + 1] < 0) ? -1 : ovec[i * 2 + 1];
}
for (i = (unsigned int) error; i < nmatches; i++)
matches[i].start = matches[i].end = -1;
out:
if (nmatches > ARRAY_SIZE(static_ovec) / 3)
git__free(ovec);
if (error < 0)
return (error == PCRE_ERROR_NOMATCH) ? GIT_ENOTFOUND : GIT_EINVALIDSPEC;
return 0;
}
#elif defined(GIT_REGEX_PCRE2)
int git_regexp_compile(git_regexp *r, const char *pattern, int flags)
{
unsigned char errmsg[1024];
unsigned long erroff;
int error, cflags = 0;
if (flags & GIT_REGEXP_ICASE)
cflags |= PCRE2_CASELESS;
if ((*r = pcre2_compile((const unsigned char *) pattern, PCRE2_ZERO_TERMINATED,
cflags, &error, &erroff, NULL)) == NULL) {
pcre2_get_error_message(error, errmsg, sizeof(errmsg));
git_error_set_str(GIT_ERROR_REGEX, (char *) errmsg);
return GIT_EINVALIDSPEC;
}
return 0;
}
void git_regexp_dispose(git_regexp *r)
{
pcre2_code_free(*r);
*r = NULL;
}
int git_regexp_match(const git_regexp *r, const char *string)
{
pcre2_match_data *data;
int error;
data = pcre2_match_data_create(1, NULL);
GIT_ERROR_CHECK_ALLOC(data);
if ((error = pcre2_match(*r, (const unsigned char *) string, strlen(string),
0, 0, data, NULL)) < 0)
return (error == PCRE2_ERROR_NOMATCH) ? GIT_ENOTFOUND : GIT_EINVALIDSPEC;
pcre2_match_data_free(data);
return 0;
}
int git_regexp_search(const git_regexp *r, const char *string, size_t nmatches, git_regmatch *matches)
{
pcre2_match_data *data = NULL;
PCRE2_SIZE *ovec;
int error;
size_t i;
if ((data = pcre2_match_data_create(nmatches, NULL)) == NULL) {
git_error_set_oom();
goto out;
}
if ((error = pcre2_match(*r, (const unsigned char *) string, strlen(string),
0, 0, data, NULL)) < 0)
goto out;
if (error == 0 || (unsigned int) error > nmatches)
error = nmatches;
ovec = pcre2_get_ovector_pointer(data);
for (i = 0; i < (unsigned int) error; i++) {
matches[i].start = (ovec[i * 2] == PCRE2_UNSET) ? -1 : (ssize_t) ovec[i * 2];
matches[i].end = (ovec[i * 2 + 1] == PCRE2_UNSET) ? -1 : (ssize_t) ovec[i * 2 + 1];
}
for (i = (unsigned int) error; i < nmatches; i++)
matches[i].start = matches[i].end = -1;
out:
pcre2_match_data_free(data);
if (error < 0)
return (error == PCRE2_ERROR_NOMATCH) ? GIT_ENOTFOUND : GIT_EINVALIDSPEC;
return 0;
}
#elif defined(GIT_REGEX_REGCOMP) || defined(GIT_REGEX_REGCOMP_L)
#if defined(GIT_REGEX_REGCOMP_L)
# include <xlocale.h>
#endif
int git_regexp_compile(git_regexp *r, const char *pattern, int flags)
{
int cflags = REG_EXTENDED, error;
char errmsg[1024];
if (flags & GIT_REGEXP_ICASE)
cflags |= REG_ICASE;
# if defined(GIT_REGEX_REGCOMP)
if ((error = regcomp(r, pattern, cflags)) != 0)
# else
if ((error = regcomp_l(r, pattern, cflags, (locale_t) 0)) != 0)
# endif
{
regerror(error, r, errmsg, sizeof(errmsg));
git_error_set_str(GIT_ERROR_REGEX, errmsg);
return GIT_EINVALIDSPEC;
}
return 0;
}
void git_regexp_dispose(git_regexp *r)
{
regfree(r);
}
int git_regexp_match(const git_regexp *r, const char *string)
{
int error;
if ((error = regexec(r, string, 0, NULL, 0)) != 0)
return (error == REG_NOMATCH) ? GIT_ENOTFOUND : GIT_EINVALIDSPEC;
return 0;
}
int git_regexp_search(const git_regexp *r, const char *string, size_t nmatches, git_regmatch *matches)
{
regmatch_t static_m[3], *m;
int error;
size_t i;
if (nmatches <= ARRAY_SIZE(static_m))
m = static_m;
else
m = git__calloc(nmatches, sizeof(*m));
if ((error = regexec(r, string, nmatches, m, 0)) != 0)
goto out;
for (i = 0; i < nmatches; i++) {
matches[i].start = (m[i].rm_so < 0) ? -1 : m[i].rm_so;
matches[i].end = (m[i].rm_eo < 0) ? -1 : m[i].rm_eo;
}
out:
if (nmatches > ARRAY_SIZE(static_m))
git__free(m);
if (error)
return (error == REG_NOMATCH) ? GIT_ENOTFOUND : GIT_EINVALIDSPEC;
return 0;
}
#endif
/*
* Copyright (C) the libgit2 contributors. All rights reserved.
*
* This file is part of libgit2, distributed under the GNU GPL v2 with
* a Linking Exception. For full terms see the included COPYING file.
*/
#ifndef INCLUDE_regexp_h__
#define INCLUDE_regexp_h__
#include "common.h"
#if defined(GIT_REGEX_BUILTIN) || defined(GIT_REGEX_PCRE)
# include "pcre.h"
typedef pcre *git_regexp;
# define GIT_REGEX_INIT NULL
#elif defined(GIT_REGEX_PCRE2)
# define PCRE2_CODE_UNIT_WIDTH 8
# include <pcre2.h>
typedef pcre2_code *git_regexp;
# define GIT_REGEX_INIT NULL
#elif defined(GIT_REGEX_REGCOMP) || defined(GIT_REGEX_REGCOMP_L)
# include <regex.h>
typedef regex_t git_regexp;
# define GIT_REGEX_INIT { 0 }
#else
# error "No regex backend"
#endif
/** Options supported by @git_regexp_compile. */
typedef enum {
/** Enable case-insensitive matching */
GIT_REGEXP_ICASE = (1 << 0)
} git_regexp_flags_t;
/** Structure containing information about regular expression matching groups */
typedef struct {
/** Start of the given match. -1 if the group didn't match anything */
ssize_t start;
/** End of the given match. -1 if the group didn't match anything */
ssize_t end;
} git_regmatch;
/**
* Compile a regular expression. The compiled expression needs to
* be cleaned up afterwards with `git_regexp_dispose`.
*
* @param r Pointer to the storage where to initialize the regular expression.
* @param pattern The pattern that shall be compiled.
* @param flags Flags to alter how the pattern shall be handled.
* 0 for defaults, otherwise see @git_regexp_flags_t.
* @return 0 on success, otherwise a negative return value.
*/
int git_regexp_compile(git_regexp *r, const char *pattern, int flags);
/**
* Free memory associated with the regular expression
*
* @param r The regular expression structure to dispose.
*/
void git_regexp_dispose(git_regexp *r);
/**
* Test whether a given string matches a compiled regular
* expression.
*
* @param r Compiled regular expression.
* @param string String to match against the regular expression.
* @return 0 if the string matches, a negative error code
* otherwise. GIT_ENOTFOUND if no match was found,
* GIT_EINVALIDSPEC if the regular expression matching
* was invalid.
*/
int git_regexp_match(const git_regexp *r, const char *string);
/**
* Search for matches inside of a given string.
*
* Given a regular expression with capturing groups, this
* function will populate provided @git_regmatch structures with
* offsets for each of the given matches. Non-matching groups
* will have start and end values of the respective @git_regmatch
* structure set to -1.
*
* @param r Compiled regular expression.
* @param string String to match against the regular expression.
* @param nmatches Number of @git_regmatch structures provided by
* the user.
* @param matches Pointer to an array of @git_regmatch structures.
* @return 0 if the string matches, a negative error code
* otherwise. GIT_ENOTFOUND if no match was found,
* GIT_EINVALIDSPEC if the regular expression matching
* was invalid.
*/
int git_regexp_search(const git_regexp *r, const char *string, size_t nmatches, git_regmatch *matches);
#endif
......@@ -12,6 +12,7 @@
#include "buffer.h"
#include "tree.h"
#include "refdb.h"
#include "regexp.h"
#include "git2.h"
......@@ -42,7 +43,7 @@ static int maybe_abbrev(git_object** out, git_repository *repo, const char *spec
return maybe_sha_or_abbrev(out, repo, spec, speclen);
}
static int build_regex(p_regex_t *regex, const char *pattern)
static int build_regex(git_regexp *regex, const char *pattern)
{
int error;
......@@ -51,13 +52,11 @@ static int build_regex(p_regex_t *regex, const char *pattern)
return GIT_EINVALIDSPEC;
}
error = p_regcomp(regex, pattern, P_REG_EXTENDED);
error = git_regexp_compile(regex, pattern, 0);
if (!error)
return 0;
error = git_error_set_regex(regex, error);
p_regfree(regex);
git_regexp_dispose(regex);
return error;
}
......@@ -66,7 +65,7 @@ static int maybe_describe(git_object**out, git_repository *repo, const char *spe
{
const char *substr;
int error;
p_regex_t regex;
git_regexp regex;
substr = strstr(spec, "-g");
......@@ -76,8 +75,8 @@ static int maybe_describe(git_object**out, git_repository *repo, const char *spe
if (build_regex(&regex, ".+-[0-9]+-g[0-9a-fA-F]+") < 0)
return -1;
error = p_regexec(&regex, spec, 0, NULL, 0);
p_regfree(&regex);
error = git_regexp_match(&regex, spec);
git_regexp_dispose(&regex);
if (error)
return GIT_ENOTFOUND;
......@@ -143,12 +142,11 @@ static int retrieve_previously_checked_out_branch_or_revision(git_object **out,
{
git_reference *ref = NULL;
git_reflog *reflog = NULL;
p_regex_t preg;
git_regexp preg;
int error = -1;
size_t i, numentries, cur;
const git_reflog_entry *entry;
const char *msg;
p_regmatch_t regexmatches[2];
git_buf buf = GIT_BUF_INIT;
cur = position;
......@@ -168,12 +166,14 @@ static int retrieve_previously_checked_out_branch_or_revision(git_object **out,
numentries = git_reflog_entrycount(reflog);
for (i = 0; i < numentries; i++) {
git_regmatch regexmatches[2];
entry = git_reflog_entry_byindex(reflog, i);
msg = git_reflog_entry_message(entry);
if (!msg)
continue;
if (p_regexec(&preg, msg, 2, regexmatches, 0))
if (git_regexp_search(&preg, msg, 2, regexmatches) < 0)
continue;
cur--;
......@@ -181,7 +181,8 @@ static int retrieve_previously_checked_out_branch_or_revision(git_object **out,
if (cur > 0)
continue;
git_buf_put(&buf, msg+regexmatches[1].rm_so, regexmatches[1].rm_eo - regexmatches[1].rm_so);
if ((git_buf_put(&buf, msg+regexmatches[1].start, regexmatches[1].end - regexmatches[1].start)) < 0)
goto cleanup;
if ((error = git_reference_dwim(base_ref, repo, git_buf_cstr(&buf))) == 0)
goto cleanup;
......@@ -199,7 +200,7 @@ static int retrieve_previously_checked_out_branch_or_revision(git_object **out,
cleanup:
git_reference_free(ref);
git_buf_dispose(&buf);
p_regfree(&preg);
git_regexp_dispose(&preg);
git_reflog_free(reflog);
return error;
}
......@@ -448,7 +449,7 @@ cleanup:
return error;
}
static int walk_and_search(git_object **out, git_revwalk *walk, p_regex_t *regex)
static int walk_and_search(git_object **out, git_revwalk *walk, git_regexp *regex)
{
int error;
git_oid oid;
......@@ -460,7 +461,7 @@ static int walk_and_search(git_object **out, git_revwalk *walk, p_regex_t *regex
if ((error < 0) && (error != GIT_ENOTFOUND))
return -1;
if (!p_regexec(regex, git_commit_message((git_commit*)obj), 0, NULL, 0)) {
if (!git_regexp_match(regex, git_commit_message((git_commit*)obj))) {
*out = obj;
return 0;
}
......@@ -476,7 +477,7 @@ static int walk_and_search(git_object **out, git_revwalk *walk, p_regex_t *regex
static int handle_grep_syntax(git_object **out, git_repository *repo, const git_oid *spec_oid, const char *pattern)
{
p_regex_t preg;
git_regexp preg;
git_revwalk *walk = NULL;
int error;
......@@ -497,7 +498,7 @@ static int handle_grep_syntax(git_object **out, git_repository *repo, const git_
error = walk_and_search(out, walk, &preg);
cleanup:
p_regfree(&preg);
git_regexp_dispose(&preg);
git_revwalk_free(walk);
return error;
......
......@@ -7,6 +7,8 @@
#ifndef INCLUDE_userdiff_h__
#define INCLUDE_userdiff_h__
#include "regexp.h"
/*
* This file isolates the built in diff driver function name patterns.
* Most of these patterns are taken from Git (with permission from the
......@@ -29,7 +31,7 @@ typedef struct {
#define PATTERNS(NAME, FN_PATS, WORD_PAT) \
{ NAME, FN_PATS, WORD_PAT WORD_DEFAULT, 0 }
#define IPATTERN(NAME, FN_PATS, WORD_PAT) \
{ NAME, FN_PATS, WORD_PAT WORD_DEFAULT, P_REG_ICASE }
{ NAME, FN_PATS, WORD_PAT WORD_DEFAULT, GIT_REGEXP_ICASE }
/*
* The table of diff driver patterns
......
......@@ -9,23 +9,12 @@
# endif
#endif
#include <locale.h>
#include "clar_libgit2.h"
#include "futils.h"
#include "posix.h"
#include "userdiff.h"
#if LC_ALL > 0
static const char *old_locales[LC_ALL];
#endif
void test_core_posix__initialize(void)
{
#if LC_ALL > 0
memset(&old_locales, 0, sizeof(old_locales));
#endif
#ifdef GIT_WIN32
/* on win32, the WSA context needs to be initialized
* before any socket calls can be performed */
......@@ -156,115 +145,6 @@ void test_core_posix__utimes(void)
cl_must_pass(p_unlink("foo"));
}
static void try_set_locale(int category)
{
#if LC_ALL > 0
old_locales[category] = setlocale(category, NULL);
#endif
if (!setlocale(category, "UTF-8") &&
!setlocale(category, "c.utf8") &&
!setlocale(category, "en_US.UTF-8"))
cl_skip();
if (MB_CUR_MAX == 1)
cl_fail("Expected locale to be switched to multibyte");
}
void test_core_posix__p_regcomp_ignores_global_locale_ctype(void)
{
p_regex_t preg;
try_set_locale(LC_CTYPE);
cl_assert(!p_regcomp(&preg, "[\xc0-\xff][\x80-\xbf]", P_REG_EXTENDED));
p_regfree(&preg);
}
void test_core_posix__p_regcomp_ignores_global_locale_collate(void)
{
p_regex_t preg;
#ifdef GIT_WIN32
cl_skip();
#endif
try_set_locale(LC_COLLATE);
cl_assert(!p_regcomp(&preg, "[\xc0-\xff][\x80-\xbf]", P_REG_EXTENDED));
p_regfree(&preg);
}
void test_core_posix__p_regcomp_matches_digits_with_locale(void)
{
p_regex_t preg;
char c, str[2];
#ifdef GIT_WIN32
cl_skip();
#endif
try_set_locale(LC_COLLATE);
try_set_locale(LC_CTYPE);
cl_assert(!p_regcomp(&preg, "[[:digit:]]", P_REG_EXTENDED));
str[1] = '\0';
for (c = '0'; c <= '9'; c++) {
str[0] = c;
cl_assert(!p_regexec(&preg, str, 0, NULL, 0));
}
p_regfree(&preg);
}
void test_core_posix__p_regcomp_matches_alphabet_with_locale(void)
{
p_regex_t preg;
char c, str[2];
#ifdef GIT_WIN32
cl_skip();
#endif
try_set_locale(LC_COLLATE);
try_set_locale(LC_CTYPE);
cl_assert(!p_regcomp(&preg, "[[:alpha:]]", P_REG_EXTENDED));
str[1] = '\0';
for (c = 'a'; c <= 'z'; c++) {
str[0] = c;
cl_assert(!p_regexec(&preg, str, 0, NULL, 0));
}
for (c = 'A'; c <= 'Z'; c++) {
str[0] = c;
cl_assert(!p_regexec(&preg, str, 0, NULL, 0));
}
p_regfree(&preg);
}
void test_core_posix__p_regcomp_compile_userdiff_regexps(void)
{
size_t idx;
for (idx = 0; idx < ARRAY_SIZE(builtin_defs); ++idx) {
git_diff_driver_definition ddef = builtin_defs[idx];
int error = 0;
p_regex_t preg;
error = p_regcomp(&preg, ddef.fns, P_REG_EXTENDED | ddef.flags);
p_regfree(&preg);
cl_assert(!error);
error = p_regcomp(&preg, ddef.words, P_REG_EXTENDED);
p_regfree(&preg);
cl_assert(!error);
}
}
void test_core_posix__unlink_removes_symlink(void)
{
if (!git_path_supports_symlinks(clar_sandbox_path()))
......
#include "clar_libgit2.h"
#include <locale.h>
#include "regexp.h"
#include "userdiff.h"
#if LC_ALL > 0
static const char *old_locales[LC_ALL];
#endif
static git_regexp regex;
void test_core_regexp__initialize(void)
{
#if LC_ALL > 0
memset(&old_locales, 0, sizeof(old_locales));
#endif
}
void test_core_regexp__cleanup(void)
{
git_regexp_dispose(&regex);
}
static void try_set_locale(int category)
{
#if LC_ALL > 0
old_locales[category] = setlocale(category, NULL);
#endif
if (!setlocale(category, "UTF-8") &&
!setlocale(category, "c.utf8") &&
!setlocale(category, "en_US.UTF-8"))
cl_skip();
if (MB_CUR_MAX == 1)
cl_fail("Expected locale to be switched to multibyte");
}
void test_core_regexp__compile_ignores_global_locale_ctype(void)
{
try_set_locale(LC_CTYPE);
cl_git_pass(git_regexp_compile(&regex, "[\xc0-\xff][\x80-\xbf]", 0));
}
void test_core_regexp__compile_ignores_global_locale_collate(void)
{
#ifdef GIT_WIN32
cl_skip();
#endif
try_set_locale(LC_COLLATE);
cl_git_pass(git_regexp_compile(&regex, "[\xc0-\xff][\x80-\xbf]", 0));
}
void test_core_regexp__regex_matches_digits_with_locale(void)
{
char c, str[2];
#ifdef GIT_WIN32
cl_skip();
#endif
try_set_locale(LC_COLLATE);
try_set_locale(LC_CTYPE);
cl_git_pass(git_regexp_compile(&regex, "[[:digit:]]", 0));
str[1] = '\0';
for (c = '0'; c <= '9'; c++) {
str[0] = c;
cl_git_pass(git_regexp_match(&regex, str));
}
}
void test_core_regexp__regex_matches_alphabet_with_locale(void)
{
char c, str[2];
#ifdef GIT_WIN32
cl_skip();
#endif
try_set_locale(LC_COLLATE);
try_set_locale(LC_CTYPE);
cl_git_pass(git_regexp_compile(&regex, "[[:alpha:]]", 0));
str[1] = '\0';
for (c = 'a'; c <= 'z'; c++) {
str[0] = c;
cl_git_pass(git_regexp_match(&regex, str));
}
for (c = 'A'; c <= 'Z'; c++) {
str[0] = c;
cl_git_pass(git_regexp_match(&regex, str));
}
}
void test_core_regexp__compile_userdiff_regexps(void)
{
size_t idx;
for (idx = 0; idx < ARRAY_SIZE(builtin_defs); ++idx) {
git_diff_driver_definition ddef = builtin_defs[idx];
cl_git_pass(git_regexp_compile(&regex, ddef.fns, ddef.flags));
git_regexp_dispose(&regex);
cl_git_pass(git_regexp_compile(&regex, ddef.words, 0));
git_regexp_dispose(&regex);
}
}
void test_core_regexp__simple_search_matches(void)
{
cl_git_pass(git_regexp_compile(&regex, "a", 0));
cl_git_pass(git_regexp_search(&regex, "a", 0, NULL));
}
void test_core_regexp__case_insensitive_search_matches(void)
{
cl_git_pass(git_regexp_compile(&regex, "a", GIT_REGEXP_ICASE));
cl_git_pass(git_regexp_search(&regex, "A", 0, NULL));
}
void test_core_regexp__nonmatching_search_returns_error(void)
{
cl_git_pass(git_regexp_compile(&regex, "a", 0));
cl_git_fail(git_regexp_search(&regex, "b", 0, NULL));
}
void test_core_regexp__search_finds_complete_match(void)
{
git_regmatch matches[1];
cl_git_pass(git_regexp_compile(&regex, "abc", 0));
cl_git_pass(git_regexp_search(&regex, "abc", 1, matches));
cl_assert_equal_i(matches[0].start, 0);
cl_assert_equal_i(matches[0].end, 3);
}
void test_core_regexp__search_finds_correct_offsets(void)
{
git_regmatch matches[3];
cl_git_pass(git_regexp_compile(&regex, "(a*)(b*)", 0));
cl_git_pass(git_regexp_search(&regex, "ab", 3, matches));
cl_assert_equal_i(matches[0].start, 0);
cl_assert_equal_i(matches[0].end, 2);
cl_assert_equal_i(matches[1].start, 0);
cl_assert_equal_i(matches[1].end, 1);
cl_assert_equal_i(matches[2].start, 1);
cl_assert_equal_i(matches[2].end, 2);
}
void test_core_regexp__search_finds_empty_group(void)
{
git_regmatch matches[3];
cl_git_pass(git_regexp_compile(&regex, "(a*)(b*)c", 0));
cl_git_pass(git_regexp_search(&regex, "ac", 3, matches));
cl_assert_equal_i(matches[0].start, 0);
cl_assert_equal_i(matches[0].end, 2);
cl_assert_equal_i(matches[1].start, 0);
cl_assert_equal_i(matches[1].end, 1);
cl_assert_equal_i(matches[2].start, 1);
cl_assert_equal_i(matches[2].end, 1);
}
void test_core_regexp__search_fills_matches_with_first_matching_groups(void)
{
git_regmatch matches[2];
cl_git_pass(git_regexp_compile(&regex, "(a)(b)(c)", 0));
cl_git_pass(git_regexp_search(&regex, "abc", 2, matches));
cl_assert_equal_i(matches[0].start, 0);
cl_assert_equal_i(matches[0].end, 3);
cl_assert_equal_i(matches[1].start, 0);
cl_assert_equal_i(matches[1].end, 1);
}
void test_core_regexp__search_skips_nonmatching_group(void)
{
git_regmatch matches[4];
cl_git_pass(git_regexp_compile(&regex, "(a)(b)?(c)", 0));
cl_git_pass(git_regexp_search(&regex, "ac", 4, matches));
cl_assert_equal_i(matches[0].start, 0);
cl_assert_equal_i(matches[0].end, 2);
cl_assert_equal_i(matches[1].start, 0);
cl_assert_equal_i(matches[1].end, 1);
cl_assert_equal_i(matches[2].start, -1);
cl_assert_equal_i(matches[2].end, -1);
cl_assert_equal_i(matches[3].start, 1);
cl_assert_equal_i(matches[3].end, 2);
}
void test_core_regexp__search_initializes_trailing_nonmatching_groups(void)
{
git_regmatch matches[3];
cl_git_pass(git_regexp_compile(&regex, "(a)bc", 0));
cl_git_pass(git_regexp_search(&regex, "abc", 3, matches));
cl_assert_equal_i(matches[0].start, 0);
cl_assert_equal_i(matches[0].end, 3);
cl_assert_equal_i(matches[1].start, 0);
cl_assert_equal_i(matches[1].end, 1);
cl_assert_equal_i(matches[2].start, -1);
cl_assert_equal_i(matches[2].end, -1);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment