Commit e4acc3ba by Russell Belfer

Fix rename looped reference issues

This makes the diff rename tracking code more careful about the
order in which it processes renames and more thorough in updating
the mapping of correct renames when an earlier rename update
alters the index of a later matched pair.
parent 3b334075
......@@ -46,7 +46,7 @@ static char diff_pick_suffix(int mode)
{
if (S_ISDIR(mode))
return '/';
else if (mode & 0100) //-V536
else if (mode & 0100) /* -V536 */
/* in git, modes are very regular, so we must have 0100755 mode */
return '*';
else
......@@ -162,7 +162,7 @@ static int diff_print_one_raw(
if (delta->similarity > 0)
git_buf_printf(out, "%03u", delta->similarity);
if (delta->status == GIT_DELTA_RENAMED || delta->status == GIT_DELTA_COPIED)
if (delta->old_file.path != delta->new_file.path)
git_buf_printf(
out, "\t%s %s\n", delta->old_file.path, delta->new_file.path);
else
......
......@@ -673,6 +673,15 @@ GIT_INLINE(bool) delta_is_new_only(git_diff_delta *delta)
delta->status == GIT_DELTA_IGNORED);
}
GIT_INLINE(void) delta_make_rename(
git_diff_delta *to, const git_diff_delta *from, uint32_t similarity)
{
to->status = GIT_DELTA_RENAMED;
to->similarity = similarity;
memcpy(&to->old_file, &from->old_file, sizeof(to->old_file));
to->flags &= ~GIT_DIFF_FLAG__TO_SPLIT;
}
typedef struct {
uint32_t idx;
uint32_t similarity;
......@@ -682,13 +691,15 @@ int git_diff_find_similar(
git_diff_list *diff,
git_diff_find_options *given_opts)
{
size_t i, j, cache_size;
size_t i, j, sigcache_size;
int error = 0, similarity;
git_diff_delta *from, *to;
git_diff_find_options opts;
size_t num_rewrites = 0, num_updates = 0;
void **cache; /* cache of similarity metric file signatures */
diff_find_match *match_sources, *match_targets; /* cache of best matches */
size_t num_srcs = 0, num_tgts = 0, tried_srcs = 0, tried_tgts = 0;
size_t num_rewrites = 0, num_updates = 0, num_bumped = 0;
void **sigcache; /* cache of similarity metric file signatures */
diff_find_match *match_srcs = NULL, *match_tgts = NULL, *best_match;
git_diff_file swap;
if ((error = normalize_find_opts(diff, &opts, given_opts)) < 0)
return error;
......@@ -697,70 +708,109 @@ int git_diff_find_similar(
if (!git__is_uint32(diff->deltas.length))
return 0;
cache_size = diff->deltas.length * 2; /* must store b/c length may change */
cache = git__calloc(cache_size, sizeof(void *));
GITERR_CHECK_ALLOC(cache);
sigcache_size = diff->deltas.length * 2; /* keep size b/c diff may change */
sigcache = git__calloc(sigcache_size, sizeof(void *));
GITERR_CHECK_ALLOC(sigcache);
/* Label rename sources and targets
*
* This will also set self-similarity scores for MODIFIED files and
* mark them for splitting if break-rewrites is enabled
*/
git_vector_foreach(&diff->deltas, i, to) {
if (is_rename_source(diff, &opts, i, sigcache))
++num_srcs;
if (is_rename_target(diff, &opts, i, sigcache))
++num_tgts;
}
match_sources = git__calloc(diff->deltas.length, sizeof(diff_find_match));
match_targets = git__calloc(diff->deltas.length, sizeof(diff_find_match));
GITERR_CHECK_ALLOC(match_sources);
GITERR_CHECK_ALLOC(match_targets);
/* if there are no candidate srcs or tgts, we're done */
if (!num_srcs || !num_tgts)
goto cleanup;
/* next find the most similar delta for each rename / copy candidate */
match_tgts = git__calloc(diff->deltas.length, sizeof(diff_find_match));
GITERR_CHECK_ALLOC(match_tgts);
match_srcs = git__calloc(diff->deltas.length, sizeof(diff_find_match));
GITERR_CHECK_ALLOC(match_srcs);
git_vector_foreach(&diff->deltas, i, to) {
size_t tried_sources = 0;
/*
* Find best-fit matches for rename / copy candidates
*/
match_targets[i].idx = (uint32_t)i;
match_targets[i].similarity = 0;
find_best_matches:
tried_tgts = num_bumped = 0;
git_vector_foreach(&diff->deltas, i, to) {
/* skip things that are not rename targets */
if (!is_rename_target(diff, &opts, i, cache))
if ((to->flags & GIT_DIFF_FLAG__IS_RENAME_TARGET) == 0)
continue;
git_vector_foreach(&diff->deltas, j, from) {
if (i == j)
continue;
tried_srcs = 0;
git_vector_foreach(&diff->deltas, j, from) {
/* skip things that are not rename sources */
if (!is_rename_source(diff, &opts, j, cache))
if ((from->flags & GIT_DIFF_FLAG__IS_RENAME_SOURCE) == 0)
continue;
/* cap on maximum targets we'll examine (per "to" file) */
if (++tried_sources > opts.rename_limit)
break;
/* calculate similarity for this pair and find best match */
if ((error = similarity_measure(
&similarity, diff, &opts, cache, 2 * j, 2 * i + 1)) < 0)
if (i == j)
similarity = -1; /* don't measure self-similarity here */
else if ((error = similarity_measure(
&similarity, diff, &opts, sigcache, 2 * j, 2 * i + 1)) < 0)
goto cleanup;
if (similarity < 0) { /* not actually comparable */
--tried_sources;
continue;
}
/* if this pairing is better for the src and the tgt, keep it */
if (similarity > 0 &&
match_tgts[i].similarity < (uint32_t)similarity &&
match_srcs[j].similarity < (uint32_t)similarity)
{
if (match_tgts[i].similarity > 0) {
match_tgts[match_srcs[j].idx].similarity = 0;
match_srcs[match_tgts[i].idx].similarity = 0;
++num_bumped;
}
match_tgts[i].similarity = (uint32_t)similarity;
match_tgts[i].idx = (uint32_t)j;
if (match_targets[i].similarity < (uint32_t)similarity &&
match_sources[j].similarity < (uint32_t)similarity) {
match_targets[i].similarity = (uint32_t)similarity;
match_sources[j].similarity = (uint32_t)similarity;
match_targets[i].idx = (uint32_t)j;
match_sources[j].idx = (uint32_t)i;
match_srcs[j].similarity = (uint32_t)similarity;
match_srcs[j].idx = (uint32_t)i;
}
if (++tried_srcs >= num_srcs)
break;
/* cap on maximum targets we'll examine (per "to" file) */
if (tried_srcs > opts.rename_limit)
break;
}
if (++tried_tgts >= num_tgts)
break;
}
/* next rewrite the diffs with renames / copies */
if (num_bumped > 0) /* try again if we bumped some items */
goto find_best_matches;
/*
* Rewrite the diffs with renames / copies
*/
tried_tgts = 0;
git_vector_foreach(&diff->deltas, i, to) {
/* check if this delta was the target of a similarity */
if ((similarity = (int)match_targets[i].similarity) <= 0)
/* skip things that are not rename targets */
if ((to->flags & GIT_DIFF_FLAG__IS_RENAME_TARGET) == 0)
continue;
assert(to && (to->flags & GIT_DIFF_FLAG__IS_RENAME_TARGET) != 0);
/* check if this delta was the target of a similarity */
best_match = &match_tgts[i];
if (!best_match->similarity)
continue;
from = GIT_VECTOR_GET(&diff->deltas, match_targets[i].idx);
assert(from && (from->flags & GIT_DIFF_FLAG__IS_RENAME_SOURCE) != 0);
j = best_match->idx;
from = GIT_VECTOR_GET(&diff->deltas, j);
/* possible scenarios:
* 1. from DELETE to ADD/UNTRACK/IGNORE = RENAME
......@@ -774,99 +824,84 @@ int git_diff_find_similar(
if (delta_is_new_only(to)) {
if (similarity < (int)opts.rename_threshold)
if (best_match->similarity < opts.rename_threshold)
continue;
from->status = GIT_DELTA_RENAMED;
from->similarity = (uint32_t)similarity;
memcpy(&from->new_file, &to->new_file, sizeof(from->new_file));
to->flags |= GIT_DIFF_FLAG__TO_DELETE;
delta_make_rename(to, from, best_match->similarity);
from->flags |= GIT_DIFF_FLAG__TO_DELETE;
num_rewrites++;
} else {
assert(delta_is_split(to));
if (similarity < (int)opts.rename_from_rewrite_threshold)
if (best_match->similarity < opts.rename_from_rewrite_threshold)
continue;
from->status = GIT_DELTA_RENAMED;
from->similarity = (uint32_t)similarity;
memcpy(&from->new_file, &to->new_file, sizeof(from->new_file));
memcpy(&swap, &to->old_file, sizeof(swap));
to->status = GIT_DELTA_DELETED;
memset(&to->new_file, 0, sizeof(to->new_file));
to->new_file.path = to->old_file.path;
to->new_file.flags |= GIT_DIFF_FLAG_VALID_OID;
if ((to->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0) {
to->flags &= ~GIT_DIFF_FLAG__TO_SPLIT;
num_rewrites--;
}
delta_make_rename(to, from, best_match->similarity);
num_rewrites--;
from->status = GIT_DELTA_DELETED;
memcpy(&from->old_file, &swap, sizeof(from->old_file));
memset(&from->new_file, 0, sizeof(from->new_file));
from->new_file.path = from->old_file.path;
from->new_file.flags |= GIT_DIFF_FLAG_VALID_OID;
num_updates++;
}
}
else if (delta_is_split(from)) {
git_diff_file swap;
if (delta_is_new_only(to)) {
if (similarity < (int)opts.rename_threshold)
if (best_match->similarity < opts.rename_threshold)
continue;
memcpy(&swap, &from->new_file, sizeof(swap));
delta_make_rename(to, from, best_match->similarity);
from->status = GIT_DELTA_RENAMED;
from->similarity = (uint32_t)similarity;
memcpy(&from->new_file, &to->new_file, sizeof(from->new_file));
if ((from->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0) {
from->flags &= ~GIT_DIFF_FLAG__TO_SPLIT;
num_rewrites--;
}
to->status = (diff->new_src == GIT_ITERATOR_TYPE_WORKDIR) ?
from->status = (diff->new_src == GIT_ITERATOR_TYPE_WORKDIR) ?
GIT_DELTA_UNTRACKED : GIT_DELTA_ADDED;
memcpy(&to->new_file, &swap, sizeof(to->new_file));
to->old_file.path = to->new_file.path;
memset(&from->old_file, 0, sizeof(from->old_file));
from->old_file.path = from->new_file.path;
from->old_file.flags |= GIT_DIFF_FLAG_VALID_OID;
from->flags &= ~GIT_DIFF_FLAG__TO_SPLIT;
num_rewrites--;
num_updates++;
} else {
assert(delta_is_split(from));
if (similarity < (int)opts.rename_from_rewrite_threshold)
if (best_match->similarity < opts.rename_from_rewrite_threshold)
continue;
memcpy(&swap, &to->new_file, sizeof(swap));
memcpy(&swap, &to->old_file, sizeof(swap));
to->status = GIT_DELTA_RENAMED;
to->similarity = (uint32_t)similarity;
memcpy(&to->new_file, &from->new_file, sizeof(to->new_file));
if ((to->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0) {
to->flags &= ~GIT_DIFF_FLAG__TO_SPLIT;
num_rewrites--;
}
delta_make_rename(to, from, best_match->similarity);
num_rewrites--;
num_updates++;
memcpy(&from->new_file, &swap, sizeof(from->new_file));
if ((from->flags & GIT_DIFF_FLAG__TO_SPLIT) == 0) {
from->flags |= GIT_DIFF_FLAG__TO_SPLIT;
num_rewrites++;
}
memcpy(&from->old_file, &swap, sizeof(from->old_file));
/* in the off chance that we've just swapped the new
* element into the correct place, clear the SPLIT flag
/* if we've just swapped the new element into the correct
* place, clear the SPLIT flag
*/
if (match_targets[match_targets[i].idx].idx == i &&
match_targets[match_targets[i].idx].similarity >
if (match_tgts[j].idx == i &&
match_tgts[j].similarity >
opts.rename_from_rewrite_threshold) {
from->status = GIT_DELTA_RENAMED;
from->similarity =
(uint32_t)match_targets[match_targets[i].idx].similarity;
match_targets[match_targets[i].idx].similarity = 0;
from->status = GIT_DELTA_RENAMED;
from->similarity = match_tgts[j].similarity;
match_tgts[j].similarity = 0;
from->flags &= ~GIT_DIFF_FLAG__TO_SPLIT;
num_rewrites--;
}
/* otherwise, if we just overwrote a source, update mapping */
else if (j > i && match_srcs[i].similarity > 0) {
match_tgts[match_srcs[i].idx].idx = j;
}
num_updates++;
}
......@@ -874,31 +909,35 @@ int git_diff_find_similar(
else if (delta_is_new_only(to)) {
if (!FLAG_SET(&opts, GIT_DIFF_FIND_COPIES) ||
similarity < (int)opts.copy_threshold)
best_match->similarity < opts.copy_threshold)
continue;
to->status = GIT_DELTA_COPIED;
to->similarity = (uint32_t)similarity;
to->status = GIT_DELTA_COPIED;
to->similarity = best_match->similarity;
memcpy(&to->old_file, &from->old_file, sizeof(to->old_file));
num_updates++;
}
}
/*
* Actually split and delete entries as needed
*/
if (num_rewrites > 0 || num_updates > 0)
error = apply_splits_and_deletes(
diff, diff->deltas.length - num_rewrites,
FLAG_SET(&opts, GIT_DIFF_BREAK_REWRITES));
cleanup:
git__free(match_sources);
git__free(match_targets);
git__free(match_srcs);
git__free(match_tgts);
for (i = 0; i < cache_size; ++i) {
if (cache[i] != NULL)
opts.metric->free_signature(cache[i], opts.metric->payload);
for (i = 0; i < sigcache_size; ++i) {
if (sigcache[i] != NULL)
opts.metric->free_signature(sigcache[i], opts.metric->payload);
}
git__free(cache);
git__free(sigcache);
if (!given_opts || !given_opts->metric)
git__free(opts.metric);
......
......@@ -755,7 +755,7 @@ void test_diff_rename__file_partial_exchange(void)
git_buf_free(&c2);
}
void test_diff_rename__file_split(void)
void test_diff_rename__rename_and_copy_from_same_source(void)
{
git_buf c1 = GIT_BUF_INIT, c2 = GIT_BUF_INIT;
git_index *index;
......@@ -947,6 +947,7 @@ void test_diff_rename__rejected_match_can_match_others(void)
cl_git_pass(
git_diff_tree_to_index(&diff, g_repo, tree, index, &diffopts));
cl_git_pass(git_diff_find_similar(diff, &findopts));
cl_git_pass(
......@@ -967,10 +968,10 @@ static void write_similarity_file_two(const char *filename, size_t b_lines)
size_t i;
for (i = 0; i < b_lines; i++)
git_buf_printf(&contents, "%0.2d - bbbbb\r\n", (i+1));
git_buf_printf(&contents, "%0.2d - bbbbb\r\n", (int)(i+1));
for (i = b_lines; i < 50; i++)
git_buf_printf(&contents, "%0.2d - aaaaa%s", (i+1), (i == 49 ? "" : "\r\n"));
git_buf_printf(&contents, "%0.2d - aaaaa%s", (int)(i+1), (i == 49 ? "" : "\r\n"));
cl_git_pass(
git_futils_writebuffer(&contents, filename, O_RDWR|O_CREAT, 0777));
......@@ -1018,6 +1019,7 @@ void test_diff_rename__rejected_match_can_match_others_two(void)
cl_git_pass(
git_diff_tree_to_index(&diff, g_repo, tree, index, &diffopts));
cl_git_pass(git_diff_find_similar(diff, &findopts));
cl_git_pass(
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment