Commit e4acc3ba by Russell Belfer

Fix rename looped reference issues

This makes the diff rename tracking code more careful about the
order in which it processes renames and more thorough in updating
the mapping of correct renames when an earlier rename update
alters the index of a later matched pair.
parent 3b334075
...@@ -46,7 +46,7 @@ static char diff_pick_suffix(int mode) ...@@ -46,7 +46,7 @@ static char diff_pick_suffix(int mode)
{ {
if (S_ISDIR(mode)) if (S_ISDIR(mode))
return '/'; return '/';
else if (mode & 0100) //-V536 else if (mode & 0100) /* -V536 */
/* in git, modes are very regular, so we must have 0100755 mode */ /* in git, modes are very regular, so we must have 0100755 mode */
return '*'; return '*';
else else
...@@ -162,7 +162,7 @@ static int diff_print_one_raw( ...@@ -162,7 +162,7 @@ static int diff_print_one_raw(
if (delta->similarity > 0) if (delta->similarity > 0)
git_buf_printf(out, "%03u", delta->similarity); git_buf_printf(out, "%03u", delta->similarity);
if (delta->status == GIT_DELTA_RENAMED || delta->status == GIT_DELTA_COPIED) if (delta->old_file.path != delta->new_file.path)
git_buf_printf( git_buf_printf(
out, "\t%s %s\n", delta->old_file.path, delta->new_file.path); out, "\t%s %s\n", delta->old_file.path, delta->new_file.path);
else else
......
...@@ -673,6 +673,15 @@ GIT_INLINE(bool) delta_is_new_only(git_diff_delta *delta) ...@@ -673,6 +673,15 @@ GIT_INLINE(bool) delta_is_new_only(git_diff_delta *delta)
delta->status == GIT_DELTA_IGNORED); delta->status == GIT_DELTA_IGNORED);
} }
GIT_INLINE(void) delta_make_rename(
git_diff_delta *to, const git_diff_delta *from, uint32_t similarity)
{
to->status = GIT_DELTA_RENAMED;
to->similarity = similarity;
memcpy(&to->old_file, &from->old_file, sizeof(to->old_file));
to->flags &= ~GIT_DIFF_FLAG__TO_SPLIT;
}
typedef struct { typedef struct {
uint32_t idx; uint32_t idx;
uint32_t similarity; uint32_t similarity;
...@@ -682,13 +691,15 @@ int git_diff_find_similar( ...@@ -682,13 +691,15 @@ int git_diff_find_similar(
git_diff_list *diff, git_diff_list *diff,
git_diff_find_options *given_opts) git_diff_find_options *given_opts)
{ {
size_t i, j, cache_size; size_t i, j, sigcache_size;
int error = 0, similarity; int error = 0, similarity;
git_diff_delta *from, *to; git_diff_delta *from, *to;
git_diff_find_options opts; git_diff_find_options opts;
size_t num_rewrites = 0, num_updates = 0; size_t num_srcs = 0, num_tgts = 0, tried_srcs = 0, tried_tgts = 0;
void **cache; /* cache of similarity metric file signatures */ size_t num_rewrites = 0, num_updates = 0, num_bumped = 0;
diff_find_match *match_sources, *match_targets; /* cache of best matches */ void **sigcache; /* cache of similarity metric file signatures */
diff_find_match *match_srcs = NULL, *match_tgts = NULL, *best_match;
git_diff_file swap;
if ((error = normalize_find_opts(diff, &opts, given_opts)) < 0) if ((error = normalize_find_opts(diff, &opts, given_opts)) < 0)
return error; return error;
...@@ -697,70 +708,109 @@ int git_diff_find_similar( ...@@ -697,70 +708,109 @@ int git_diff_find_similar(
if (!git__is_uint32(diff->deltas.length)) if (!git__is_uint32(diff->deltas.length))
return 0; return 0;
cache_size = diff->deltas.length * 2; /* must store b/c length may change */ sigcache_size = diff->deltas.length * 2; /* keep size b/c diff may change */
cache = git__calloc(cache_size, sizeof(void *)); sigcache = git__calloc(sigcache_size, sizeof(void *));
GITERR_CHECK_ALLOC(cache); GITERR_CHECK_ALLOC(sigcache);
/* Label rename sources and targets
*
* This will also set self-similarity scores for MODIFIED files and
* mark them for splitting if break-rewrites is enabled
*/
git_vector_foreach(&diff->deltas, i, to) {
if (is_rename_source(diff, &opts, i, sigcache))
++num_srcs;
if (is_rename_target(diff, &opts, i, sigcache))
++num_tgts;
}
match_sources = git__calloc(diff->deltas.length, sizeof(diff_find_match)); /* if there are no candidate srcs or tgts, we're done */
match_targets = git__calloc(diff->deltas.length, sizeof(diff_find_match)); if (!num_srcs || !num_tgts)
GITERR_CHECK_ALLOC(match_sources); goto cleanup;
GITERR_CHECK_ALLOC(match_targets);
/* next find the most similar delta for each rename / copy candidate */ match_tgts = git__calloc(diff->deltas.length, sizeof(diff_find_match));
GITERR_CHECK_ALLOC(match_tgts);
match_srcs = git__calloc(diff->deltas.length, sizeof(diff_find_match));
GITERR_CHECK_ALLOC(match_srcs);
git_vector_foreach(&diff->deltas, i, to) { /*
size_t tried_sources = 0; * Find best-fit matches for rename / copy candidates
*/
match_targets[i].idx = (uint32_t)i; find_best_matches:
match_targets[i].similarity = 0; tried_tgts = num_bumped = 0;
git_vector_foreach(&diff->deltas, i, to) {
/* skip things that are not rename targets */ /* skip things that are not rename targets */
if (!is_rename_target(diff, &opts, i, cache)) if ((to->flags & GIT_DIFF_FLAG__IS_RENAME_TARGET) == 0)
continue; continue;
git_vector_foreach(&diff->deltas, j, from) { tried_srcs = 0;
if (i == j)
continue;
git_vector_foreach(&diff->deltas, j, from) {
/* skip things that are not rename sources */ /* skip things that are not rename sources */
if (!is_rename_source(diff, &opts, j, cache)) if ((from->flags & GIT_DIFF_FLAG__IS_RENAME_SOURCE) == 0)
continue; continue;
/* cap on maximum targets we'll examine (per "to" file) */
if (++tried_sources > opts.rename_limit)
break;
/* calculate similarity for this pair and find best match */ /* calculate similarity for this pair and find best match */
if ((error = similarity_measure( if (i == j)
&similarity, diff, &opts, cache, 2 * j, 2 * i + 1)) < 0) similarity = -1; /* don't measure self-similarity here */
else if ((error = similarity_measure(
&similarity, diff, &opts, sigcache, 2 * j, 2 * i + 1)) < 0)
goto cleanup; goto cleanup;
if (similarity < 0) { /* not actually comparable */ /* if this pairing is better for the src and the tgt, keep it */
--tried_sources; if (similarity > 0 &&
continue; match_tgts[i].similarity < (uint32_t)similarity &&
} match_srcs[j].similarity < (uint32_t)similarity)
{
if (match_tgts[i].similarity > 0) {
match_tgts[match_srcs[j].idx].similarity = 0;
match_srcs[match_tgts[i].idx].similarity = 0;
++num_bumped;
}
match_tgts[i].similarity = (uint32_t)similarity;
match_tgts[i].idx = (uint32_t)j;
if (match_targets[i].similarity < (uint32_t)similarity && match_srcs[j].similarity = (uint32_t)similarity;
match_sources[j].similarity < (uint32_t)similarity) { match_srcs[j].idx = (uint32_t)i;
match_targets[i].similarity = (uint32_t)similarity;
match_sources[j].similarity = (uint32_t)similarity;
match_targets[i].idx = (uint32_t)j;
match_sources[j].idx = (uint32_t)i;
} }
if (++tried_srcs >= num_srcs)
break;
/* cap on maximum targets we'll examine (per "to" file) */
if (tried_srcs > opts.rename_limit)
break;
} }
if (++tried_tgts >= num_tgts)
break;
} }
/* next rewrite the diffs with renames / copies */ if (num_bumped > 0) /* try again if we bumped some items */
goto find_best_matches;
/*
* Rewrite the diffs with renames / copies
*/
tried_tgts = 0;
git_vector_foreach(&diff->deltas, i, to) { git_vector_foreach(&diff->deltas, i, to) {
/* check if this delta was the target of a similarity */ /* skip things that are not rename targets */
if ((similarity = (int)match_targets[i].similarity) <= 0) if ((to->flags & GIT_DIFF_FLAG__IS_RENAME_TARGET) == 0)
continue; continue;
assert(to && (to->flags & GIT_DIFF_FLAG__IS_RENAME_TARGET) != 0); /* check if this delta was the target of a similarity */
best_match = &match_tgts[i];
if (!best_match->similarity)
continue;
from = GIT_VECTOR_GET(&diff->deltas, match_targets[i].idx); j = best_match->idx;
assert(from && (from->flags & GIT_DIFF_FLAG__IS_RENAME_SOURCE) != 0); from = GIT_VECTOR_GET(&diff->deltas, j);
/* possible scenarios: /* possible scenarios:
* 1. from DELETE to ADD/UNTRACK/IGNORE = RENAME * 1. from DELETE to ADD/UNTRACK/IGNORE = RENAME
...@@ -774,99 +824,84 @@ int git_diff_find_similar( ...@@ -774,99 +824,84 @@ int git_diff_find_similar(
if (delta_is_new_only(to)) { if (delta_is_new_only(to)) {
if (similarity < (int)opts.rename_threshold) if (best_match->similarity < opts.rename_threshold)
continue; continue;
from->status = GIT_DELTA_RENAMED; delta_make_rename(to, from, best_match->similarity);
from->similarity = (uint32_t)similarity;
memcpy(&from->new_file, &to->new_file, sizeof(from->new_file));
to->flags |= GIT_DIFF_FLAG__TO_DELETE;
from->flags |= GIT_DIFF_FLAG__TO_DELETE;
num_rewrites++; num_rewrites++;
} else { } else {
assert(delta_is_split(to)); assert(delta_is_split(to));
if (similarity < (int)opts.rename_from_rewrite_threshold) if (best_match->similarity < opts.rename_from_rewrite_threshold)
continue; continue;
from->status = GIT_DELTA_RENAMED; memcpy(&swap, &to->old_file, sizeof(swap));
from->similarity = (uint32_t)similarity;
memcpy(&from->new_file, &to->new_file, sizeof(from->new_file));
to->status = GIT_DELTA_DELETED; delta_make_rename(to, from, best_match->similarity);
memset(&to->new_file, 0, sizeof(to->new_file)); num_rewrites--;
to->new_file.path = to->old_file.path;
to->new_file.flags |= GIT_DIFF_FLAG_VALID_OID; from->status = GIT_DELTA_DELETED;
if ((to->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0) { memcpy(&from->old_file, &swap, sizeof(from->old_file));
to->flags &= ~GIT_DIFF_FLAG__TO_SPLIT; memset(&from->new_file, 0, sizeof(from->new_file));
num_rewrites--; from->new_file.path = from->old_file.path;
} from->new_file.flags |= GIT_DIFF_FLAG_VALID_OID;
num_updates++; num_updates++;
} }
} }
else if (delta_is_split(from)) { else if (delta_is_split(from)) {
git_diff_file swap;
if (delta_is_new_only(to)) { if (delta_is_new_only(to)) {
if (similarity < (int)opts.rename_threshold) if (best_match->similarity < opts.rename_threshold)
continue; continue;
memcpy(&swap, &from->new_file, sizeof(swap)); delta_make_rename(to, from, best_match->similarity);
from->status = GIT_DELTA_RENAMED; from->status = (diff->new_src == GIT_ITERATOR_TYPE_WORKDIR) ?
from->similarity = (uint32_t)similarity;
memcpy(&from->new_file, &to->new_file, sizeof(from->new_file));
if ((from->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0) {
from->flags &= ~GIT_DIFF_FLAG__TO_SPLIT;
num_rewrites--;
}
to->status = (diff->new_src == GIT_ITERATOR_TYPE_WORKDIR) ?
GIT_DELTA_UNTRACKED : GIT_DELTA_ADDED; GIT_DELTA_UNTRACKED : GIT_DELTA_ADDED;
memcpy(&to->new_file, &swap, sizeof(to->new_file)); memset(&from->old_file, 0, sizeof(from->old_file));
to->old_file.path = to->new_file.path; from->old_file.path = from->new_file.path;
from->old_file.flags |= GIT_DIFF_FLAG_VALID_OID;
from->flags &= ~GIT_DIFF_FLAG__TO_SPLIT;
num_rewrites--;
num_updates++; num_updates++;
} else { } else {
assert(delta_is_split(from)); assert(delta_is_split(from));
if (similarity < (int)opts.rename_from_rewrite_threshold) if (best_match->similarity < opts.rename_from_rewrite_threshold)
continue; continue;
memcpy(&swap, &to->new_file, sizeof(swap)); memcpy(&swap, &to->old_file, sizeof(swap));
to->status = GIT_DELTA_RENAMED; delta_make_rename(to, from, best_match->similarity);
to->similarity = (uint32_t)similarity; num_rewrites--;
memcpy(&to->new_file, &from->new_file, sizeof(to->new_file)); num_updates++;
if ((to->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0) {
to->flags &= ~GIT_DIFF_FLAG__TO_SPLIT;
num_rewrites--;
}
memcpy(&from->new_file, &swap, sizeof(from->new_file)); memcpy(&from->old_file, &swap, sizeof(from->old_file));
if ((from->flags & GIT_DIFF_FLAG__TO_SPLIT) == 0) {
from->flags |= GIT_DIFF_FLAG__TO_SPLIT;
num_rewrites++;
}
/* in the off chance that we've just swapped the new /* if we've just swapped the new element into the correct
* element into the correct place, clear the SPLIT flag * place, clear the SPLIT flag
*/ */
if (match_targets[match_targets[i].idx].idx == i && if (match_tgts[j].idx == i &&
match_targets[match_targets[i].idx].similarity > match_tgts[j].similarity >
opts.rename_from_rewrite_threshold) { opts.rename_from_rewrite_threshold) {
from->status = GIT_DELTA_RENAMED; from->status = GIT_DELTA_RENAMED;
from->similarity = from->similarity = match_tgts[j].similarity;
(uint32_t)match_targets[match_targets[i].idx].similarity; match_tgts[j].similarity = 0;
match_targets[match_targets[i].idx].similarity = 0;
from->flags &= ~GIT_DIFF_FLAG__TO_SPLIT; from->flags &= ~GIT_DIFF_FLAG__TO_SPLIT;
num_rewrites--; num_rewrites--;
} }
/* otherwise, if we just overwrote a source, update mapping */
else if (j > i && match_srcs[i].similarity > 0) {
match_tgts[match_srcs[i].idx].idx = j;
}
num_updates++; num_updates++;
} }
...@@ -874,31 +909,35 @@ int git_diff_find_similar( ...@@ -874,31 +909,35 @@ int git_diff_find_similar(
else if (delta_is_new_only(to)) { else if (delta_is_new_only(to)) {
if (!FLAG_SET(&opts, GIT_DIFF_FIND_COPIES) || if (!FLAG_SET(&opts, GIT_DIFF_FIND_COPIES) ||
similarity < (int)opts.copy_threshold) best_match->similarity < opts.copy_threshold)
continue; continue;
to->status = GIT_DELTA_COPIED; to->status = GIT_DELTA_COPIED;
to->similarity = (uint32_t)similarity; to->similarity = best_match->similarity;
memcpy(&to->old_file, &from->old_file, sizeof(to->old_file)); memcpy(&to->old_file, &from->old_file, sizeof(to->old_file));
num_updates++; num_updates++;
} }
} }
/*
* Actually split and delete entries as needed
*/
if (num_rewrites > 0 || num_updates > 0) if (num_rewrites > 0 || num_updates > 0)
error = apply_splits_and_deletes( error = apply_splits_and_deletes(
diff, diff->deltas.length - num_rewrites, diff, diff->deltas.length - num_rewrites,
FLAG_SET(&opts, GIT_DIFF_BREAK_REWRITES)); FLAG_SET(&opts, GIT_DIFF_BREAK_REWRITES));
cleanup: cleanup:
git__free(match_sources); git__free(match_srcs);
git__free(match_targets); git__free(match_tgts);
for (i = 0; i < cache_size; ++i) { for (i = 0; i < sigcache_size; ++i) {
if (cache[i] != NULL) if (sigcache[i] != NULL)
opts.metric->free_signature(cache[i], opts.metric->payload); opts.metric->free_signature(sigcache[i], opts.metric->payload);
} }
git__free(cache); git__free(sigcache);
if (!given_opts || !given_opts->metric) if (!given_opts || !given_opts->metric)
git__free(opts.metric); git__free(opts.metric);
......
...@@ -755,7 +755,7 @@ void test_diff_rename__file_partial_exchange(void) ...@@ -755,7 +755,7 @@ void test_diff_rename__file_partial_exchange(void)
git_buf_free(&c2); git_buf_free(&c2);
} }
void test_diff_rename__file_split(void) void test_diff_rename__rename_and_copy_from_same_source(void)
{ {
git_buf c1 = GIT_BUF_INIT, c2 = GIT_BUF_INIT; git_buf c1 = GIT_BUF_INIT, c2 = GIT_BUF_INIT;
git_index *index; git_index *index;
...@@ -947,6 +947,7 @@ void test_diff_rename__rejected_match_can_match_others(void) ...@@ -947,6 +947,7 @@ void test_diff_rename__rejected_match_can_match_others(void)
cl_git_pass( cl_git_pass(
git_diff_tree_to_index(&diff, g_repo, tree, index, &diffopts)); git_diff_tree_to_index(&diff, g_repo, tree, index, &diffopts));
cl_git_pass(git_diff_find_similar(diff, &findopts)); cl_git_pass(git_diff_find_similar(diff, &findopts));
cl_git_pass( cl_git_pass(
...@@ -967,10 +968,10 @@ static void write_similarity_file_two(const char *filename, size_t b_lines) ...@@ -967,10 +968,10 @@ static void write_similarity_file_two(const char *filename, size_t b_lines)
size_t i; size_t i;
for (i = 0; i < b_lines; i++) for (i = 0; i < b_lines; i++)
git_buf_printf(&contents, "%0.2d - bbbbb\r\n", (i+1)); git_buf_printf(&contents, "%0.2d - bbbbb\r\n", (int)(i+1));
for (i = b_lines; i < 50; i++) for (i = b_lines; i < 50; i++)
git_buf_printf(&contents, "%0.2d - aaaaa%s", (i+1), (i == 49 ? "" : "\r\n")); git_buf_printf(&contents, "%0.2d - aaaaa%s", (int)(i+1), (i == 49 ? "" : "\r\n"));
cl_git_pass( cl_git_pass(
git_futils_writebuffer(&contents, filename, O_RDWR|O_CREAT, 0777)); git_futils_writebuffer(&contents, filename, O_RDWR|O_CREAT, 0777));
...@@ -1018,6 +1019,7 @@ void test_diff_rename__rejected_match_can_match_others_two(void) ...@@ -1018,6 +1019,7 @@ void test_diff_rename__rejected_match_can_match_others_two(void)
cl_git_pass( cl_git_pass(
git_diff_tree_to_index(&diff, g_repo, tree, index, &diffopts)); git_diff_tree_to_index(&diff, g_repo, tree, index, &diffopts));
cl_git_pass(git_diff_find_similar(diff, &findopts)); cl_git_pass(git_diff_find_similar(diff, &findopts));
cl_git_pass( cl_git_pass(
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment