Commit 84ba4944 by Vicent Martí

Merge pull request #1659 from arrbee/rename-cycle-fixes

Diff rename detection cycle fixes
parents ffb762fe e4acc3ba
...@@ -46,7 +46,7 @@ static char diff_pick_suffix(int mode) ...@@ -46,7 +46,7 @@ static char diff_pick_suffix(int mode)
{ {
if (S_ISDIR(mode)) if (S_ISDIR(mode))
return '/'; return '/';
else if (mode & 0100) //-V536 else if (mode & 0100) /* -V536 */
/* in git, modes are very regular, so we must have 0100755 mode */ /* in git, modes are very regular, so we must have 0100755 mode */
return '*'; return '*';
else else
...@@ -162,7 +162,7 @@ static int diff_print_one_raw( ...@@ -162,7 +162,7 @@ static int diff_print_one_raw(
if (delta->similarity > 0) if (delta->similarity > 0)
git_buf_printf(out, "%03u", delta->similarity); git_buf_printf(out, "%03u", delta->similarity);
if (delta->status == GIT_DELTA_RENAMED || delta->status == GIT_DELTA_COPIED) if (delta->old_file.path != delta->new_file.path)
git_buf_printf( git_buf_printf(
out, "\t%s %s\n", delta->old_file.path, delta->new_file.path); out, "\t%s %s\n", delta->old_file.path, delta->new_file.path);
else else
......
...@@ -673,6 +673,15 @@ GIT_INLINE(bool) delta_is_new_only(git_diff_delta *delta) ...@@ -673,6 +673,15 @@ GIT_INLINE(bool) delta_is_new_only(git_diff_delta *delta)
delta->status == GIT_DELTA_IGNORED); delta->status == GIT_DELTA_IGNORED);
} }
GIT_INLINE(void) delta_make_rename(
git_diff_delta *to, const git_diff_delta *from, uint32_t similarity)
{
to->status = GIT_DELTA_RENAMED;
to->similarity = similarity;
memcpy(&to->old_file, &from->old_file, sizeof(to->old_file));
to->flags &= ~GIT_DIFF_FLAG__TO_SPLIT;
}
typedef struct { typedef struct {
uint32_t idx; uint32_t idx;
uint32_t similarity; uint32_t similarity;
...@@ -682,13 +691,15 @@ int git_diff_find_similar( ...@@ -682,13 +691,15 @@ int git_diff_find_similar(
git_diff_list *diff, git_diff_list *diff,
git_diff_find_options *given_opts) git_diff_find_options *given_opts)
{ {
size_t i, j, cache_size; size_t i, j, sigcache_size;
int error = 0, similarity; int error = 0, similarity;
git_diff_delta *from, *to; git_diff_delta *from, *to;
git_diff_find_options opts; git_diff_find_options opts;
size_t num_rewrites = 0, num_updates = 0; size_t num_srcs = 0, num_tgts = 0, tried_srcs = 0, tried_tgts = 0;
void **cache; /* cache of similarity metric file signatures */ size_t num_rewrites = 0, num_updates = 0, num_bumped = 0;
diff_find_match *match_sources, *match_targets; /* cache of best matches */ void **sigcache; /* cache of similarity metric file signatures */
diff_find_match *match_srcs = NULL, *match_tgts = NULL, *best_match;
git_diff_file swap;
if ((error = normalize_find_opts(diff, &opts, given_opts)) < 0) if ((error = normalize_find_opts(diff, &opts, given_opts)) < 0)
return error; return error;
...@@ -697,70 +708,109 @@ int git_diff_find_similar( ...@@ -697,70 +708,109 @@ int git_diff_find_similar(
if (!git__is_uint32(diff->deltas.length)) if (!git__is_uint32(diff->deltas.length))
return 0; return 0;
cache_size = diff->deltas.length * 2; /* must store b/c length may change */ sigcache_size = diff->deltas.length * 2; /* keep size b/c diff may change */
cache = git__calloc(cache_size, sizeof(void *)); sigcache = git__calloc(sigcache_size, sizeof(void *));
GITERR_CHECK_ALLOC(cache); GITERR_CHECK_ALLOC(sigcache);
/* Label rename sources and targets
*
* This will also set self-similarity scores for MODIFIED files and
* mark them for splitting if break-rewrites is enabled
*/
git_vector_foreach(&diff->deltas, i, to) {
if (is_rename_source(diff, &opts, i, sigcache))
++num_srcs;
if (is_rename_target(diff, &opts, i, sigcache))
++num_tgts;
}
match_sources = git__calloc(diff->deltas.length, sizeof(diff_find_match)); /* if there are no candidate srcs or tgts, we're done */
match_targets = git__calloc(diff->deltas.length, sizeof(diff_find_match)); if (!num_srcs || !num_tgts)
GITERR_CHECK_ALLOC(match_sources); goto cleanup;
GITERR_CHECK_ALLOC(match_targets);
/* next find the most similar delta for each rename / copy candidate */ match_tgts = git__calloc(diff->deltas.length, sizeof(diff_find_match));
GITERR_CHECK_ALLOC(match_tgts);
match_srcs = git__calloc(diff->deltas.length, sizeof(diff_find_match));
GITERR_CHECK_ALLOC(match_srcs);
git_vector_foreach(&diff->deltas, i, to) { /*
size_t tried_sources = 0; * Find best-fit matches for rename / copy candidates
*/
match_targets[i].idx = (uint32_t)i; find_best_matches:
match_targets[i].similarity = 0; tried_tgts = num_bumped = 0;
git_vector_foreach(&diff->deltas, i, to) {
/* skip things that are not rename targets */ /* skip things that are not rename targets */
if (!is_rename_target(diff, &opts, i, cache)) if ((to->flags & GIT_DIFF_FLAG__IS_RENAME_TARGET) == 0)
continue; continue;
git_vector_foreach(&diff->deltas, j, from) { tried_srcs = 0;
if (i == j)
continue;
git_vector_foreach(&diff->deltas, j, from) {
/* skip things that are not rename sources */ /* skip things that are not rename sources */
if (!is_rename_source(diff, &opts, j, cache)) if ((from->flags & GIT_DIFF_FLAG__IS_RENAME_SOURCE) == 0)
continue; continue;
/* cap on maximum targets we'll examine (per "to" file) */
if (++tried_sources > opts.rename_limit)
break;
/* calculate similarity for this pair and find best match */ /* calculate similarity for this pair and find best match */
if ((error = similarity_measure( if (i == j)
&similarity, diff, &opts, cache, 2 * j, 2 * i + 1)) < 0) similarity = -1; /* don't measure self-similarity here */
else if ((error = similarity_measure(
&similarity, diff, &opts, sigcache, 2 * j, 2 * i + 1)) < 0)
goto cleanup; goto cleanup;
if (similarity < 0) { /* not actually comparable */ /* if this pairing is better for the src and the tgt, keep it */
--tried_sources; if (similarity > 0 &&
continue; match_tgts[i].similarity < (uint32_t)similarity &&
} match_srcs[j].similarity < (uint32_t)similarity)
{
if (match_tgts[i].similarity > 0) {
match_tgts[match_srcs[j].idx].similarity = 0;
match_srcs[match_tgts[i].idx].similarity = 0;
++num_bumped;
}
match_tgts[i].similarity = (uint32_t)similarity;
match_tgts[i].idx = (uint32_t)j;
if (match_targets[i].similarity < (uint32_t)similarity && match_srcs[j].similarity = (uint32_t)similarity;
match_sources[j].similarity < (uint32_t)similarity) { match_srcs[j].idx = (uint32_t)i;
match_targets[i].similarity = (uint32_t)similarity;
match_sources[j].similarity = (uint32_t)similarity;
match_targets[i].idx = (uint32_t)j;
match_sources[j].idx = (uint32_t)i;
} }
if (++tried_srcs >= num_srcs)
break;
/* cap on maximum targets we'll examine (per "to" file) */
if (tried_srcs > opts.rename_limit)
break;
} }
if (++tried_tgts >= num_tgts)
break;
} }
/* next rewrite the diffs with renames / copies */ if (num_bumped > 0) /* try again if we bumped some items */
goto find_best_matches;
/*
* Rewrite the diffs with renames / copies
*/
tried_tgts = 0;
git_vector_foreach(&diff->deltas, i, to) { git_vector_foreach(&diff->deltas, i, to) {
/* check if this delta was the target of a similarity */ /* skip things that are not rename targets */
if ((similarity = (int)match_targets[i].similarity) <= 0) if ((to->flags & GIT_DIFF_FLAG__IS_RENAME_TARGET) == 0)
continue; continue;
assert(to && (to->flags & GIT_DIFF_FLAG__IS_RENAME_TARGET) != 0); /* check if this delta was the target of a similarity */
best_match = &match_tgts[i];
if (!best_match->similarity)
continue;
from = GIT_VECTOR_GET(&diff->deltas, match_targets[i].idx); j = best_match->idx;
assert(from && (from->flags & GIT_DIFF_FLAG__IS_RENAME_SOURCE) != 0); from = GIT_VECTOR_GET(&diff->deltas, j);
/* possible scenarios: /* possible scenarios:
* 1. from DELETE to ADD/UNTRACK/IGNORE = RENAME * 1. from DELETE to ADD/UNTRACK/IGNORE = RENAME
...@@ -774,99 +824,84 @@ int git_diff_find_similar( ...@@ -774,99 +824,84 @@ int git_diff_find_similar(
if (delta_is_new_only(to)) { if (delta_is_new_only(to)) {
if (similarity < (int)opts.rename_threshold) if (best_match->similarity < opts.rename_threshold)
continue; continue;
from->status = GIT_DELTA_RENAMED; delta_make_rename(to, from, best_match->similarity);
from->similarity = (uint32_t)similarity;
memcpy(&from->new_file, &to->new_file, sizeof(from->new_file));
to->flags |= GIT_DIFF_FLAG__TO_DELETE;
from->flags |= GIT_DIFF_FLAG__TO_DELETE;
num_rewrites++; num_rewrites++;
} else { } else {
assert(delta_is_split(to)); assert(delta_is_split(to));
if (similarity < (int)opts.rename_from_rewrite_threshold) if (best_match->similarity < opts.rename_from_rewrite_threshold)
continue; continue;
from->status = GIT_DELTA_RENAMED; memcpy(&swap, &to->old_file, sizeof(swap));
from->similarity = (uint32_t)similarity;
memcpy(&from->new_file, &to->new_file, sizeof(from->new_file));
to->status = GIT_DELTA_DELETED; delta_make_rename(to, from, best_match->similarity);
memset(&to->new_file, 0, sizeof(to->new_file)); num_rewrites--;
to->new_file.path = to->old_file.path;
to->new_file.flags |= GIT_DIFF_FLAG_VALID_OID; from->status = GIT_DELTA_DELETED;
if ((to->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0) { memcpy(&from->old_file, &swap, sizeof(from->old_file));
to->flags &= ~GIT_DIFF_FLAG__TO_SPLIT; memset(&from->new_file, 0, sizeof(from->new_file));
num_rewrites--; from->new_file.path = from->old_file.path;
} from->new_file.flags |= GIT_DIFF_FLAG_VALID_OID;
num_updates++; num_updates++;
} }
} }
else if (delta_is_split(from)) { else if (delta_is_split(from)) {
git_diff_file swap;
if (delta_is_new_only(to)) { if (delta_is_new_only(to)) {
if (similarity < (int)opts.rename_threshold) if (best_match->similarity < opts.rename_threshold)
continue; continue;
memcpy(&swap, &from->new_file, sizeof(swap)); delta_make_rename(to, from, best_match->similarity);
from->status = GIT_DELTA_RENAMED; from->status = (diff->new_src == GIT_ITERATOR_TYPE_WORKDIR) ?
from->similarity = (uint32_t)similarity;
memcpy(&from->new_file, &to->new_file, sizeof(from->new_file));
if ((from->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0) {
from->flags &= ~GIT_DIFF_FLAG__TO_SPLIT;
num_rewrites--;
}
to->status = (diff->new_src == GIT_ITERATOR_TYPE_WORKDIR) ?
GIT_DELTA_UNTRACKED : GIT_DELTA_ADDED; GIT_DELTA_UNTRACKED : GIT_DELTA_ADDED;
memcpy(&to->new_file, &swap, sizeof(to->new_file)); memset(&from->old_file, 0, sizeof(from->old_file));
to->old_file.path = to->new_file.path; from->old_file.path = from->new_file.path;
from->old_file.flags |= GIT_DIFF_FLAG_VALID_OID;
from->flags &= ~GIT_DIFF_FLAG__TO_SPLIT;
num_rewrites--;
num_updates++; num_updates++;
} else { } else {
assert(delta_is_split(from)); assert(delta_is_split(from));
if (similarity < (int)opts.rename_from_rewrite_threshold) if (best_match->similarity < opts.rename_from_rewrite_threshold)
continue; continue;
memcpy(&swap, &to->new_file, sizeof(swap)); memcpy(&swap, &to->old_file, sizeof(swap));
to->status = GIT_DELTA_RENAMED; delta_make_rename(to, from, best_match->similarity);
to->similarity = (uint32_t)similarity; num_rewrites--;
memcpy(&to->new_file, &from->new_file, sizeof(to->new_file)); num_updates++;
if ((to->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0) {
to->flags &= ~GIT_DIFF_FLAG__TO_SPLIT;
num_rewrites--;
}
memcpy(&from->new_file, &swap, sizeof(from->new_file)); memcpy(&from->old_file, &swap, sizeof(from->old_file));
if ((from->flags & GIT_DIFF_FLAG__TO_SPLIT) == 0) {
from->flags |= GIT_DIFF_FLAG__TO_SPLIT;
num_rewrites++;
}
/* in the off chance that we've just swapped the new /* if we've just swapped the new element into the correct
* element into the correct place, clear the SPLIT flag * place, clear the SPLIT flag
*/ */
if (match_targets[match_targets[i].idx].idx == i && if (match_tgts[j].idx == i &&
match_targets[match_targets[i].idx].similarity > match_tgts[j].similarity >
opts.rename_from_rewrite_threshold) { opts.rename_from_rewrite_threshold) {
from->status = GIT_DELTA_RENAMED; from->status = GIT_DELTA_RENAMED;
from->similarity = from->similarity = match_tgts[j].similarity;
(uint32_t)match_targets[match_targets[i].idx].similarity; match_tgts[j].similarity = 0;
match_targets[match_targets[i].idx].similarity = 0;
from->flags &= ~GIT_DIFF_FLAG__TO_SPLIT; from->flags &= ~GIT_DIFF_FLAG__TO_SPLIT;
num_rewrites--; num_rewrites--;
} }
/* otherwise, if we just overwrote a source, update mapping */
else if (j > i && match_srcs[i].similarity > 0) {
match_tgts[match_srcs[i].idx].idx = j;
}
num_updates++; num_updates++;
} }
...@@ -874,31 +909,35 @@ int git_diff_find_similar( ...@@ -874,31 +909,35 @@ int git_diff_find_similar(
else if (delta_is_new_only(to)) { else if (delta_is_new_only(to)) {
if (!FLAG_SET(&opts, GIT_DIFF_FIND_COPIES) || if (!FLAG_SET(&opts, GIT_DIFF_FIND_COPIES) ||
similarity < (int)opts.copy_threshold) best_match->similarity < opts.copy_threshold)
continue; continue;
to->status = GIT_DELTA_COPIED; to->status = GIT_DELTA_COPIED;
to->similarity = (uint32_t)similarity; to->similarity = best_match->similarity;
memcpy(&to->old_file, &from->old_file, sizeof(to->old_file)); memcpy(&to->old_file, &from->old_file, sizeof(to->old_file));
num_updates++; num_updates++;
} }
} }
/*
* Actually split and delete entries as needed
*/
if (num_rewrites > 0 || num_updates > 0) if (num_rewrites > 0 || num_updates > 0)
error = apply_splits_and_deletes( error = apply_splits_and_deletes(
diff, diff->deltas.length - num_rewrites, diff, diff->deltas.length - num_rewrites,
FLAG_SET(&opts, GIT_DIFF_BREAK_REWRITES)); FLAG_SET(&opts, GIT_DIFF_BREAK_REWRITES));
cleanup: cleanup:
git__free(match_sources); git__free(match_srcs);
git__free(match_targets); git__free(match_tgts);
for (i = 0; i < cache_size; ++i) { for (i = 0; i < sigcache_size; ++i) {
if (cache[i] != NULL) if (sigcache[i] != NULL)
opts.metric->free_signature(cache[i], opts.metric->payload); opts.metric->free_signature(sigcache[i], opts.metric->payload);
} }
git__free(cache); git__free(sigcache);
if (!given_opts || !given_opts->metric) if (!given_opts || !given_opts->metric)
git__free(opts.metric); git__free(opts.metric);
......
...@@ -650,6 +650,59 @@ void test_diff_rename__file_exchange(void) ...@@ -650,6 +650,59 @@ void test_diff_rename__file_exchange(void)
git_buf_free(&c2); git_buf_free(&c2);
} }
void test_diff_rename__file_exchange_three(void)
{
git_buf c1 = GIT_BUF_INIT, c2 = GIT_BUF_INIT, c3 = GIT_BUF_INIT;
git_index *index;
git_tree *tree;
git_diff_list *diff;
git_diff_options diffopts = GIT_DIFF_OPTIONS_INIT;
git_diff_find_options opts = GIT_DIFF_FIND_OPTIONS_INIT;
diff_expects exp;
cl_git_pass(git_futils_readbuffer(&c1, "renames/untimely.txt"));
cl_git_pass(git_futils_readbuffer(&c2, "renames/songof7cities.txt"));
cl_git_pass(git_futils_readbuffer(&c3, "renames/ikeepsix.txt"));
cl_git_pass(git_futils_writebuffer(&c1, "renames/ikeepsix.txt", 0, 0));
cl_git_pass(git_futils_writebuffer(&c2, "renames/untimely.txt", 0, 0));
cl_git_pass(git_futils_writebuffer(&c3, "renames/songof7cities.txt", 0, 0));
cl_git_pass(
git_revparse_single((git_object **)&tree, g_repo, "HEAD^{tree}"));
cl_git_pass(git_repository_index(&index, g_repo));
cl_git_pass(git_index_read_tree(index, tree));
cl_git_pass(git_index_add_bypath(index, "songof7cities.txt"));
cl_git_pass(git_index_add_bypath(index, "untimely.txt"));
cl_git_pass(git_index_add_bypath(index, "ikeepsix.txt"));
cl_git_pass(git_diff_tree_to_index(&diff, g_repo, tree, index, &diffopts));
memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach(
diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp));
cl_assert_equal_i(3, exp.files);
cl_assert_equal_i(3, exp.file_status[GIT_DELTA_MODIFIED]);
opts.flags = GIT_DIFF_FIND_ALL;
cl_git_pass(git_diff_find_similar(diff, &opts));
memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach(
diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp));
cl_assert_equal_i(3, exp.files);
cl_assert_equal_i(3, exp.file_status[GIT_DELTA_RENAMED]);
git_diff_list_free(diff);
git_tree_free(tree);
git_index_free(index);
git_buf_free(&c1);
git_buf_free(&c2);
git_buf_free(&c3);
}
void test_diff_rename__file_partial_exchange(void) void test_diff_rename__file_partial_exchange(void)
{ {
git_buf c1 = GIT_BUF_INIT, c2 = GIT_BUF_INIT; git_buf c1 = GIT_BUF_INIT, c2 = GIT_BUF_INIT;
...@@ -702,7 +755,7 @@ void test_diff_rename__file_partial_exchange(void) ...@@ -702,7 +755,7 @@ void test_diff_rename__file_partial_exchange(void)
git_buf_free(&c2); git_buf_free(&c2);
} }
void test_diff_rename__file_split(void) void test_diff_rename__rename_and_copy_from_same_source(void)
{ {
git_buf c1 = GIT_BUF_INIT, c2 = GIT_BUF_INIT; git_buf c1 = GIT_BUF_INIT, c2 = GIT_BUF_INIT;
git_index *index; git_index *index;
...@@ -894,6 +947,7 @@ void test_diff_rename__rejected_match_can_match_others(void) ...@@ -894,6 +947,7 @@ void test_diff_rename__rejected_match_can_match_others(void)
cl_git_pass( cl_git_pass(
git_diff_tree_to_index(&diff, g_repo, tree, index, &diffopts)); git_diff_tree_to_index(&diff, g_repo, tree, index, &diffopts));
cl_git_pass(git_diff_find_similar(diff, &findopts)); cl_git_pass(git_diff_find_similar(diff, &findopts));
cl_git_pass( cl_git_pass(
...@@ -908,6 +962,77 @@ void test_diff_rename__rejected_match_can_match_others(void) ...@@ -908,6 +962,77 @@ void test_diff_rename__rejected_match_can_match_others(void)
git_buf_free(&two); git_buf_free(&two);
} }
static void write_similarity_file_two(const char *filename, size_t b_lines)
{
git_buf contents = GIT_BUF_INIT;
size_t i;
for (i = 0; i < b_lines; i++)
git_buf_printf(&contents, "%0.2d - bbbbb\r\n", (int)(i+1));
for (i = b_lines; i < 50; i++)
git_buf_printf(&contents, "%0.2d - aaaaa%s", (int)(i+1), (i == 49 ? "" : "\r\n"));
cl_git_pass(
git_futils_writebuffer(&contents, filename, O_RDWR|O_CREAT, 0777));
git_buf_free(&contents);
}
void test_diff_rename__rejected_match_can_match_others_two(void)
{
git_reference *head, *selfsimilar;
git_index *index;
git_tree *tree;
git_checkout_opts opts = GIT_CHECKOUT_OPTS_INIT;
git_diff_list *diff;
git_diff_options diffopts = GIT_DIFF_OPTIONS_INIT;
git_diff_find_options findopts = GIT_DIFF_FIND_OPTIONS_INIT;
const char *sources[] = { "a.txt", "b.txt" };
const char *targets[] = { "c.txt", "d.txt" };
struct rename_expected expect = { 2, sources, targets };
opts.checkout_strategy = GIT_CHECKOUT_FORCE;
cl_git_pass(git_reference_lookup(&head, g_repo, "HEAD"));
cl_git_pass(git_reference_symbolic_set_target(
&selfsimilar, head, "refs/heads/renames_similar_two"));
cl_git_pass(git_checkout_head(g_repo, &opts));
cl_git_pass(git_repository_index(&index, g_repo));
cl_git_pass(p_unlink("renames/a.txt"));
cl_git_pass(p_unlink("renames/b.txt"));
cl_git_pass(git_index_remove_bypath(index, "a.txt"));
cl_git_pass(git_index_remove_bypath(index, "b.txt"));
write_similarity_file_two("renames/c.txt", 7);
write_similarity_file_two("renames/d.txt", 8);
cl_git_pass(git_index_add_bypath(index, "c.txt"));
cl_git_pass(git_index_add_bypath(index, "d.txt"));
cl_git_pass(git_index_write(index));
cl_git_pass(
git_revparse_single((git_object **)&tree, g_repo, "HEAD^{tree}"));
cl_git_pass(
git_diff_tree_to_index(&diff, g_repo, tree, index, &diffopts));
cl_git_pass(git_diff_find_similar(diff, &findopts));
cl_git_pass(
git_diff_foreach(diff, test_names_expected, NULL, NULL, &expect));
cl_assert(expect.idx > 0);
git_diff_list_free(diff);
git_tree_free(tree);
git_index_free(index);
git_reference_free(head);
git_reference_free(selfsimilar);
}
void test_diff_rename__case_changes_are_split(void) void test_diff_rename__case_changes_are_split(void)
{ {
git_index *index; git_index *index;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment