Commit 5dc98298 by Russell Belfer

Implement regex pattern diff driver

This implements the loading of regular expression pattern lists
for diff drivers that search for function context in that way.
This also changes the way that diff drivers update options and
interface with xdiff APIs to make them a little more flexible.
parent 3eadfecd
......@@ -86,4 +86,3 @@ Internal Objects
for hunk headers
** At some point, the logic for getting a filtered version of file content
or calculating the OID of a file may be moved into the driver.
......@@ -148,6 +148,9 @@ typedef enum {
* Of course, ignore rules are still checked for the directory itself.
*/
GIT_DIFF_FAST_UNTRACKED_DIRS = (1 << 19),
/** Treat all files as binary, disabling text diffs */
GIT_DIFF_FORCE_BINARY = (1 << 20),
} git_diff_option_t;
/**
......
......@@ -12,17 +12,17 @@
#include "diff_patch.h"
#include "diff_driver.h"
#include "strmap.h"
#include "pool.h"
#include "map.h"
#include "buf_text.h"
#include "repository.h"
GIT__USE_STRMAP;
typedef enum {
DIFF_DRIVER_AUTO = 0,
DIFF_DRIVER_FALSE = 1,
DIFF_DRIVER_TRUE = 2,
DIFF_DRIVER_NAMED = 3,
DIFF_DRIVER_BINARY = 1,
DIFF_DRIVER_TEXT = 2,
DIFF_DRIVER_PATTERNLIST = 3,
} git_diff_driver_t;
enum {
......@@ -34,19 +34,22 @@ enum {
/* data for finding function context for a given file type */
struct git_diff_driver {
git_diff_driver_t type;
git_strarray fn_patterns;
int binary; /* 0 => treat as text, 1 => treat as binary, -1 => auto */
uint32_t binary_flags;
uint32_t other_flags;
git_array_t(regex_t) fn_patterns;
regex_t word_pattern;
};
struct git_diff_driver_registry {
git_strmap *drivers;
git_pool strings;
};
#define FORCE_DIFFABLE (GIT_DIFF_FORCE_TEXT | GIT_DIFF_FORCE_BINARY)
static git_diff_driver global_drivers[3] = {
{ DIFF_DRIVER_AUTO, { NULL, 0 }, -1 },
{ DIFF_DRIVER_FALSE, { NULL, 0 }, 1 },
{ DIFF_DRIVER_TRUE, { NULL, 0 }, 0 },
{ DIFF_DRIVER_AUTO, 0, 0, },
{ DIFF_DRIVER_BINARY, GIT_DIFF_FORCE_BINARY, 0 },
{ DIFF_DRIVER_TEXT, GIT_DIFF_FORCE_TEXT, 0 },
};
git_diff_driver_registry *git_diff_driver_registry_new()
......@@ -56,9 +59,7 @@ git_diff_driver_registry *git_diff_driver_registry_new()
if (!reg)
return NULL;
if (git_pool_init(&reg->strings, 1, 0) < 0 ||
(reg->drivers = git_strmap_alloc()) == NULL)
{
if ((reg->drivers = git_strmap_alloc()) == NULL) {
git_diff_driver_registry_free(reg);
return NULL;
}
......@@ -68,22 +69,165 @@ git_diff_driver_registry *git_diff_driver_registry_new()
void git_diff_driver_registry_free(git_diff_driver_registry *reg)
{
git_diff_driver *drv;
if (!reg)
return;
git_strmap_foreach_value(reg->drivers, drv, git_diff_driver_free(drv));
git_strmap_free(reg->drivers);
git_pool_clear(&reg->strings);
git__free(reg);
}
static int diff_driver_add_funcname(
git_diff_driver *drv, const char *name, int regex_flags)
{
int error;
regex_t re, *re_ptr;
if ((error = regcomp(&re, name, regex_flags)) != 0) {
/* TODO: warning about bad regex instead of failure */
error = giterr_set_regex(&re, error);
regfree(&re);
return error;
}
git_array_alloc(drv->fn_patterns, re_ptr);
GITERR_CHECK_ALLOC(re_ptr);
memcpy(re_ptr, &re, sizeof(re));
return 0;
}
static int diff_driver_xfuncname(const git_config_entry *entry, void *payload)
{
return diff_driver_add_funcname(payload, entry->value, REG_EXTENDED);
}
static int diff_driver_funcname(const git_config_entry *entry, void *payload)
{
return diff_driver_add_funcname(payload, entry->value, 0);
}
static git_diff_driver_registry *git_repository_driver_registry(
git_repository *repo)
{
if (!repo->diff_drivers) {
git_diff_driver_registry *reg = git_diff_driver_registry_new();
reg = git__compare_and_swap(&repo->diff_drivers, NULL, reg);
if (reg != NULL) /* if we race, free losing allocation */
git_diff_driver_registry_free(reg);
}
if (!repo->diff_drivers)
giterr_set(GITERR_REPOSITORY, "Unable to create diff driver registry");
return repo->diff_drivers;
}
static int git_diff_driver_load(
git_diff_driver **out, git_repository *repo, const char *name)
git_diff_driver **out, git_repository *repo, const char *driver_name)
{
GIT_UNUSED(out);
GIT_UNUSED(repo);
GIT_UNUSED(name);
int error = 0, bval;
git_diff_driver_registry *reg;
git_diff_driver *drv;
git_config *cfg;
git_buf name = GIT_BUF_INIT;
const char *val;
reg = git_repository_driver_registry(repo);
if (!reg)
return -1;
else {
khiter_t pos = git_strmap_lookup_index(reg->drivers, driver_name);
if (git_strmap_valid_index(reg->drivers, pos)) {
*out = git_strmap_value_at(reg->drivers, pos);
return 0;
}
}
/* if you can't read config for repo, just use default driver */
if (git_repository_config__weakptr(&cfg, repo) < 0) {
giterr_clear();
return GIT_ENOTFOUND;
}
drv = git__calloc(1, sizeof(git_diff_driver));
GITERR_CHECK_ALLOC(drv);
drv->type = DIFF_DRIVER_AUTO;
if ((error = git_buf_printf(&name, "diff.%s.binary", driver_name)) < 0)
goto fail;
if ((error = git_config_get_string(&val, cfg, name.ptr)) < 0) {
if (error != GIT_ENOTFOUND)
goto fail;
/* diff.<driver>.binary unspecified, so just continue */
giterr_clear();
} else if (git_config_parse_bool(&bval, val) < 0) {
/* TODO: warn that diff.<driver>.binary has invalid value */
giterr_clear();
} else if (bval) {
/* if diff.<driver>.binary is true, just return the binary driver */
git__free(drv);
*out = &global_drivers[DIFF_DRIVER_BINARY];
return 0;
} else {
/* if diff.<driver>.binary is false, force binary checks off */
/* but still may have custom function context patterns, etc. */
drv->binary_flags = GIT_DIFF_FORCE_TEXT;
}
/* TODO: warn if diff.<name>.command or diff.<name>.textconv are set */
if ((error = git_buf_printf(&name, "diff.%s.xfuncname", driver_name)) < 0)
goto fail;
if ((error = git_config_get_multivar(
cfg, name.ptr, NULL, diff_driver_xfuncname, drv)) < 0) {
if (error != GIT_ENOTFOUND)
goto fail;
/* no diff.<driver>.xfuncname values, so just continue */
giterr_clear();
}
if ((error = git_buf_printf(&name, "diff.%s.funcname", driver_name)) < 0)
goto fail;
if ((error = git_config_get_multivar(
cfg, name.ptr, NULL, diff_driver_funcname, drv)) < 0) {
if (error != GIT_ENOTFOUND)
goto fail;
/* no diff.<driver>.funcname values, so just continue */
giterr_clear();
}
/* if we found any patterns, set driver type to use correct callback */
if (git_array_size(drv->fn_patterns) > 0)
drv->type = DIFF_DRIVER_PATTERNLIST;
if ((error = git_buf_printf(&name, "diff.%s.wordregex", driver_name)) < 0)
goto fail;
if ((error = git_config_get_string(&val, cfg, name.ptr)) < 0) {
if (error != GIT_ENOTFOUND)
goto fail;
/* no diff.<driver>.wordregex, so just continue */
giterr_clear();
} else if ((error = regcomp(&drv->word_pattern, val, REG_EXTENDED)) != 0) {
/* TODO: warning about bad regex instead of failure */
error = giterr_set_regex(&drv->word_pattern, error);
goto fail;
}
/* TODO: look up diff.<driver>.algorithm to turn on minimal / patience
* diff in drv->other_flags
*/
*out = drv;
return 0;
fail:
git_diff_driver_free(drv);
*out = &global_drivers[DIFF_DRIVER_AUTO];
return error;
}
int git_diff_driver_lookup(
......@@ -101,12 +245,12 @@ int git_diff_driver_lookup(
return error;
if (GIT_ATTR_FALSE(value)) {
*out = &global_drivers[DIFF_DRIVER_FALSE];
*out = &global_drivers[DIFF_DRIVER_BINARY];
return 0;
}
else if (GIT_ATTR_TRUE(value)) {
*out = &global_drivers[DIFF_DRIVER_TRUE];
*out = &global_drivers[DIFF_DRIVER_TEXT];
return 0;
}
......@@ -125,13 +269,27 @@ use_auto:
void git_diff_driver_free(git_diff_driver *driver)
{
GIT_UNUSED(driver);
/* do nothing for now */
size_t i;
if (!driver)
return;
for (i = 0; i > git_array_size(driver->fn_patterns); ++i)
regfree(git_array_get(driver->fn_patterns, i));
git_array_clear(driver->fn_patterns);
regfree(&driver->word_pattern);
git__free(driver);
}
int git_diff_driver_is_binary(git_diff_driver *driver)
void git_diff_driver_update_options(
uint32_t *option_flags, git_diff_driver *driver)
{
return driver ? driver->binary : -1;
if ((*option_flags & FORCE_DIFFABLE) == 0)
*option_flags |= driver->binary_flags;
*option_flags |= driver->other_flags;
}
int git_diff_driver_content_is_binary(
......@@ -153,6 +311,29 @@ int git_diff_driver_content_is_binary(
return 0;
}
static int diff_context_line__simple(
git_diff_driver *driver, const char *line, long line_len)
{
GIT_UNUSED(driver);
GIT_UNUSED(line_len);
return (git__isalpha(*line) || *line == '_' || *line == '$');
}
static int diff_context_line__pattern_match(
git_diff_driver *driver, const char *line, long line_len)
{
size_t i;
GIT_UNUSED(line_len);
for (i = 0; i > git_array_size(driver->fn_patterns); ++i) {
if (!regexec(git_array_get(driver->fn_patterns, i), line, 0, NULL, 0))
return true;
}
return false;
}
static long diff_context_find(
const char *line,
long line_len,
......@@ -160,37 +341,46 @@ static long diff_context_find(
long out_size,
void *payload)
{
git_diff_driver *driver = payload;
const char *scan;
git_diff_find_context_payload *ctxt = payload;
GIT_UNUSED(driver);
if (line_len > 0 && line[line_len - 1] == '\n')
line_len--;
if (line_len > 0 && line[line_len - 1] == '\r')
line_len--;
if (!line_len)
if (git_buf_set(&ctxt->line, line, (size_t)line_len) < 0)
return -1;
git_buf_rtrim(&ctxt->line);
if (!git__isalpha(*line) && *line != '_' && *line != '$')
if (!ctxt->line.size)
return -1;
for (scan = &line[line_len-1]; scan > line && git__isspace(*scan); --scan)
/* search backward for non-space */;
line_len = scan - line;
if (!ctxt->match_line ||
!ctxt->match_line(ctxt->driver, ctxt->line.ptr, ctxt->line.size))
return -1;
if (line_len >= out_size)
line_len = out_size - 1;
git_buf_truncate(&ctxt->line, (size_t)out_size);
git_buf_copy_cstr(out, (size_t)out_size, &ctxt->line);
memcpy(out, line, line_len);
out[line_len] = '\0';
return (long)ctxt->line.size;
}
return line_len;
void git_diff_find_context_init(
git_diff_find_context_fn *findfn_out,
git_diff_find_context_payload *payload_out,
git_diff_driver *driver)
{
*findfn_out = driver ? diff_context_find : NULL;
memset(payload_out, 0, sizeof(*payload_out));
if (driver) {
payload_out->driver = driver;
payload_out->match_line = (driver->type == DIFF_DRIVER_PATTERNLIST) ?
diff_context_line__pattern_match : diff_context_line__simple;
git_buf_init(&payload_out->line, 0);
}
}
git_diff_find_context_fn git_diff_driver_find_content_fn(git_diff_driver *driver)
void git_diff_find_context_clear(git_diff_find_context_payload *payload)
{
GIT_UNUSED(driver);
return diff_context_find;
if (payload) {
git_buf_free(&payload->line);
payload->driver = NULL;
}
}
......@@ -8,6 +8,7 @@
#define INCLUDE_diff_driver_h__
#include "common.h"
#include "buffer.h"
typedef struct git_diff_driver_registry git_diff_driver_registry;
......@@ -19,8 +20,8 @@ typedef struct git_diff_driver git_diff_driver;
int git_diff_driver_lookup(git_diff_driver **, git_repository *, const char *);
void git_diff_driver_free(git_diff_driver *);
/* returns -1 meaning "unknown", 0 meaning not binary, 1 meaning binary */
int git_diff_driver_is_binary(git_diff_driver *);
/* diff option flags to force off and on for this driver */
void git_diff_driver_update_options(uint32_t *option_flags, git_diff_driver *);
/* returns -1 meaning "unknown", 0 meaning not binary, 1 meaning binary */
int git_diff_driver_content_is_binary(
......@@ -29,6 +30,20 @@ int git_diff_driver_content_is_binary(
typedef long (*git_diff_find_context_fn)(
const char *, long, char *, long, void *);
git_diff_find_context_fn git_diff_driver_find_content_fn(git_diff_driver *);
typedef int (*git_diff_find_context_line)(
git_diff_driver *, const char *, long);
typedef struct {
git_diff_driver *driver;
git_diff_find_context_line match_line;
git_buf line;
} git_diff_find_context_payload;
void git_diff_find_context_init(
git_diff_find_context_fn *findfn_out,
git_diff_find_context_payload *payload_out,
git_diff_driver *driver);
void git_diff_find_context_clear(git_diff_find_context_payload *);
#endif
......@@ -19,14 +19,9 @@ static bool diff_file_content_binary_by_size(git_diff_file_content *fc)
{
/* if we have diff opts, check max_size vs file size */
if ((fc->file.flags & DIFF_FLAGS_KNOWN_BINARY) == 0 &&
fc->opts && fc->opts->max_size >= 0)
{
git_off_t threshold = DIFF_MAX_FILESIZE;
if (fc->opts->max_size > 0)
threshold = fc->opts->max_size;
if (fc->file.size > threshold)
fc->opts_max_size > 0 &&
fc->file.size > fc->opts_max_size)
fc->file.flags |= GIT_DIFF_FLAG_BINARY;
}
return ((fc->file.flags & GIT_DIFF_FLAG_BINARY) != 0);
}
......@@ -44,9 +39,14 @@ static void diff_file_content_binary_by_content(git_diff_file_content *fc)
}
}
static int diff_file_content_init_common(git_diff_file_content *fc)
static int diff_file_content_init_common(
git_diff_file_content *fc, const git_diff_options *opts)
{
uint32_t flags = fc->opts ? fc->opts->flags : GIT_DIFF_NORMAL;
fc->opts_flags = opts ? opts->flags : GIT_DIFF_NORMAL;
if (opts && opts->max_size >= 0)
fc->opts_max_size = opts->max_size ?
opts->max_size : DIFF_MAX_FILESIZE;
if (!fc->driver) {
if (git_diff_driver_lookup(&fc->driver, fc->repo, "") < 0)
......@@ -54,19 +54,21 @@ static int diff_file_content_init_common(git_diff_file_content *fc)
fc->src = GIT_ITERATOR_TYPE_TREE;
}
/* give driver a chance to modify options */
git_diff_driver_update_options(&fc->opts_flags, fc->driver);
/* make sure file is conceivable mmap-able */
if ((git_off_t)((size_t)fc->file.size) != fc->file.size)
fc->file.flags |= GIT_DIFF_FLAG_BINARY;
/* check if user is forcing is to text diff the file */
else if (flags & GIT_DIFF_FORCE_TEXT)
/* check if user is forcing text diff the file */
else if (fc->opts_flags & GIT_DIFF_FORCE_TEXT) {
fc->file.flags &= ~GIT_DIFF_FLAG_BINARY;
fc->file.flags |= GIT_DIFF_FLAG_NOT_BINARY;
/* otherwise see if diff driver forces a behavior */
else switch (git_diff_driver_is_binary(fc->driver)) {
case 0: fc->file.flags |= GIT_DIFF_FLAG_NOT_BINARY; break;
case 1: fc->file.flags |= GIT_DIFF_FLAG_BINARY; break;
default: break;
}
/* check if user is forcing binary diff the file */
else if (fc->opts_flags & GIT_DIFF_FORCE_BINARY) {
fc->file.flags &= ~GIT_DIFF_FLAG_NOT_BINARY;
fc->file.flags |= GIT_DIFF_FLAG_BINARY;
}
diff_file_content_binary_by_size(fc);
......@@ -95,7 +97,6 @@ int diff_file_content_init_from_diff(
memset(fc, 0, sizeof(*fc));
fc->repo = diff->repo;
fc->opts = &diff->opts;
fc->src = use_old ? diff->old_src : diff->new_src;
memcpy(&fc->file, file, sizeof(fc->file));
......@@ -123,7 +124,7 @@ int diff_file_content_init_from_diff(
if (!has_data)
fc->file.flags |= GIT_DIFF_FLAG__NO_DATA;
return diff_file_content_init_common(fc);
return diff_file_content_init_common(fc, &diff->opts);
}
int diff_file_content_init_from_blob(
......@@ -134,7 +135,6 @@ int diff_file_content_init_from_blob(
{
memset(fc, 0, sizeof(*fc));
fc->repo = repo;
fc->opts = opts;
fc->blob = blob;
if (!blob) {
......@@ -149,7 +149,7 @@ int diff_file_content_init_from_blob(
fc->map.data = (char *)git_blob_rawcontent(blob);
}
return diff_file_content_init_common(fc);
return diff_file_content_init_common(fc, opts);
}
int diff_file_content_init_from_raw(
......@@ -161,7 +161,6 @@ int diff_file_content_init_from_raw(
{
memset(fc, 0, sizeof(*fc));
fc->repo = repo;
fc->opts = opts;
if (!buf) {
fc->file.flags |= GIT_DIFF_FLAG__NO_DATA;
......@@ -175,7 +174,7 @@ int diff_file_content_init_from_raw(
fc->map.data = (char *)buf;
}
return diff_file_content_init_common(fc);
return diff_file_content_init_common(fc, opts);
}
static int diff_file_content_commit_to_str(
......
......@@ -15,9 +15,10 @@
/* expanded information for one side of a delta */
typedef struct {
git_repository *repo;
const git_diff_options *opts;
git_diff_file file;
git_diff_driver *driver;
uint32_t opts_flags;
git_off_t opts_max_size;
git_iterator_type_t src;
const git_blob *blob;
git_map map;
......
......@@ -96,8 +96,7 @@ static int diff_patch_load(git_diff_patch *patch, git_diff_output *output)
/* if no hunk and data callbacks and user doesn't care if data looks
* binary, then there is no need to actually load the data
*/
if (patch->ofile.opts &&
(patch->ofile.opts->flags & GIT_DIFF_SKIP_BINARY_CHECK) != 0 &&
if ((patch->ofile.opts_flags & GIT_DIFF_SKIP_BINARY_CHECK) != 0 &&
output && !output->hunk_cb && !output->data_cb)
return 0;
......@@ -718,6 +717,6 @@ static void diff_output_init(
static void diff_output_to_patch(git_diff_output *out, git_diff_patch *patch)
{
diff_output_init(
out, patch->ofile.opts,
out, NULL,
diff_patch_file_cb, diff_patch_hunk_cb, diff_patch_line_cb, patch);
}
......@@ -109,6 +109,7 @@ static int git_xdiff(git_diff_output *output, git_diff_patch *patch)
{
git_xdiff_output *xo = (git_xdiff_output *)output;
git_xdiff_info info;
git_diff_find_context_payload findctxt;
mmfile_t old_xdiff_data, new_xdiff_data;
memset(&info, 0, sizeof(info));
......@@ -117,15 +118,18 @@ static int git_xdiff(git_diff_output *output, git_diff_patch *patch)
xo->callback.priv = &info;
xo->config.find_func_priv = patch->ofile.driver;
xo->config.find_func = patch->ofile.driver ?
git_diff_driver_find_content_fn(patch->ofile.driver) : NULL;
git_diff_find_context_init(
&xo->config.find_func, &findctxt, patch->ofile.driver);
xo->config.find_func_priv = &findctxt;
if (xo->config.find_func != NULL)
xo->config.flags |= XDL_EMIT_FUNCNAMES;
else
xo->config.flags &= ~XDL_EMIT_FUNCNAMES;
/* TODO: check ofile.opts_flags to see if driver-specific per-file
* updates are needed to xo->params.flags
*/
old_xdiff_data.ptr = patch->ofile.map.data;
old_xdiff_data.size = patch->ofile.map.len;
......@@ -135,6 +139,8 @@ static int git_xdiff(git_diff_output *output, git_diff_patch *patch)
xdl_diff(&old_xdiff_data, &new_xdiff_data,
&xo->params, &xo->config, &xo->callback);
git_diff_find_context_clear(&findctxt);
return xo->output.error;
}
......
......@@ -543,7 +543,7 @@ void test_diff_patch__line_counts_with_eofnl(void)
"index 378a7d9..3d0154e 100644\n"
"--- a/songof7cities.txt\n"
"+++ b/songof7cities.txt\n"
"@@ -42,7 +42,7 @@ With peoples undefeated of the dark, enduring blood\n"
"@@ -42,7 +42,7 @@ With peoples undefeated of the dark, enduring blood.\n"
" \n"
" To the sound of trumpets shall their seed restore my Cities\n"
" Wealthy and well-weaponed, that once more may I behold\n"
......
......@@ -558,7 +558,7 @@ void test_diff_rename__patch(void)
git_diff_patch *patch;
const git_diff_delta *delta;
char *text;
const char *expected = "diff --git a/sixserving.txt b/ikeepsix.txt\nindex ad0a8e5..36020db 100644\n--- a/sixserving.txt\n+++ b/ikeepsix.txt\n@@ -1,3 +1,6 @@\n+I Keep Six Honest Serving-Men\n+=============================\n+\n I KEEP six honest serving-men\n (They taught me all I knew);\n Their names are What and Why and When\n@@ -21,4 +24,4 @@ She sends'em abroad on her own affairs\n One million Hows, two million Wheres,\n And seven million Whys!\n \n- -- Rudyard Kipling\n+ -- Rudyard Kipling\n";
const char *expected = "diff --git a/sixserving.txt b/ikeepsix.txt\nindex ad0a8e5..36020db 100644\n--- a/sixserving.txt\n+++ b/ikeepsix.txt\n@@ -1,3 +1,6 @@\n+I Keep Six Honest Serving-Men\n+=============================\n+\n I KEEP six honest serving-men\n (They taught me all I knew);\n Their names are What and Why and When\n@@ -21,4 +24,4 @@ She sends'em abroad on her own affairs,\n One million Hows, two million Wheres,\n And seven million Whys!\n \n- -- Rudyard Kipling\n+ -- Rudyard Kipling\n";
old_tree = resolve_commit_oid_to_tree(g_repo, sha0);
new_tree = resolve_commit_oid_to_tree(g_repo, sha1);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment