Commit 96fb6a64 by Vicent Martí

Merge pull request #1979 from libgit2/rb/diff-find-delete-unmod

Add GIT_DIFF_FIND_REMOVE_UNMODIFIED flag and fix copy detection bug
parents da02ebe3 f62c174d
......@@ -468,41 +468,71 @@ typedef int (*git_diff_line_cb)(
* Flags to control the behavior of diff rename/copy detection.
*/
typedef enum {
/** look for renames? (`--find-renames`) */
/** Look for renames? (`--find-renames`) */
GIT_DIFF_FIND_RENAMES = (1u << 0),
/** consider old side of modified for renames? (`--break-rewrites=N`) */
/** Consider old side of MODIFIED for renames? (`--break-rewrites=N`) */
GIT_DIFF_FIND_RENAMES_FROM_REWRITES = (1u << 1),
/** look for copies? (a la `--find-copies`) */
/** Look for copies? (a la `--find-copies`). */
GIT_DIFF_FIND_COPIES = (1u << 2),
/** consider unmodified as copy sources? (`--find-copies-harder`) */
/** Consider UNMODIFIED as copy sources? (`--find-copies-harder`).
*
* For this to work correctly, use GIT_DIFF_INCLUDE_UNMODIFIED when
* the initial `git_diff` is being generated.
*/
GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED = (1u << 3),
/** mark large rewrites for split (`--break-rewrites=/M`) */
/** Mark significant rewrites for split (`--break-rewrites=/M`) */
GIT_DIFF_FIND_REWRITES = (1u << 4),
/** actually split large rewrites into delete/add pairs */
/** Actually split large rewrites into delete/add pairs */
GIT_DIFF_BREAK_REWRITES = (1u << 5),
/** mark rewrites for split and break into delete/add pairs */
/** Mark rewrites for split and break into delete/add pairs */
GIT_DIFF_FIND_AND_BREAK_REWRITES =
(GIT_DIFF_FIND_REWRITES | GIT_DIFF_BREAK_REWRITES),
/** find renames/copies for untracked items in working directory */
/** Find renames/copies for UNTRACKED items in working directory.
*
* For this to work correctly, use GIT_DIFF_INCLUDE_UNTRACKED when the
* initial `git_diff` is being generated (and obviously the diff must
* be against the working directory for this to make sense).
*/
GIT_DIFF_FIND_FOR_UNTRACKED = (1u << 6),
/** turn on all finding features */
/** Turn on all finding features. */
GIT_DIFF_FIND_ALL = (0x0ff),
/** measure similarity ignoring leading whitespace (default) */
/** Measure similarity ignoring leading whitespace (default) */
GIT_DIFF_FIND_IGNORE_LEADING_WHITESPACE = 0,
/** measure similarity ignoring all whitespace */
/** Measure similarity ignoring all whitespace */
GIT_DIFF_FIND_IGNORE_WHITESPACE = (1u << 12),
/** measure similarity including all data */
/** Measure similarity including all data */
GIT_DIFF_FIND_DONT_IGNORE_WHITESPACE = (1u << 13),
/** measure similarity only by comparing SHAs (fast and cheap) */
/** Measure similarity only by comparing SHAs (fast and cheap) */
GIT_DIFF_FIND_EXACT_MATCH_ONLY = (1u << 14),
/** do not break rewrites unless they contribute to a rename */
/** Do not break rewrites unless they contribute to a rename.
*
* Normally, GIT_DIFF_FIND_AND_BREAK_REWRITES will measure the self-
* similarity of modified files and split the ones that have changed a
* lot into a DELETE / ADD pair. Then the sides of that pair will be
* considered candidates for rename and copy detection.
*
* If you add this flag in and the split pair is *not* used for an
* actual rename or copy, then the modified record will be restored to
* a regular MODIFIED record instead of being split.
*/
GIT_DIFF_BREAK_REWRITES_FOR_RENAMES_ONLY = (1u << 15),
/** Remove any UNMODIFIED deltas after find_similar is done.
*
* Using GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED to emulate the
* --find-copies-harder behavior requires building a diff with the
* GIT_DIFF_INCLUDE_UNMODIFIED flag. If you do not want UNMODIFIED
* records in the final result, pass this flag to have them removed.
*/
GIT_DIFF_FIND_REMOVE_UNMODIFIED = (1u << 16),
} git_diff_find_t;
/**
......
......@@ -366,12 +366,28 @@ static int normalize_find_opts(
return 0;
}
static int insert_delete_side_of_split(
git_diff *diff, git_vector *onto, const git_diff_delta *delta)
{
/* make new record for DELETED side of split */
git_diff_delta *deleted = diff_delta__dup(delta, &diff->pool);
GITERR_CHECK_ALLOC(deleted);
deleted->status = GIT_DELTA_DELETED;
deleted->nfiles = 1;
memset(&deleted->new_file, 0, sizeof(deleted->new_file));
deleted->new_file.path = deleted->old_file.path;
deleted->new_file.flags |= GIT_DIFF_FLAG_VALID_OID;
return git_vector_insert(onto, deleted);
}
static int apply_splits_and_deletes(
git_diff *diff, size_t expected_size, bool actually_split)
{
git_vector onto = GIT_VECTOR_INIT;
size_t i;
git_diff_delta *delta, *deleted;
git_diff_delta *delta;
if (git_vector_init(&onto, expected_size, git_diff_delta__cmp) < 0)
return -1;
......@@ -384,17 +400,7 @@ static int apply_splits_and_deletes(
if ((delta->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0 && actually_split) {
delta->similarity = 0;
/* make new record for DELETED side of split */
if (!(deleted = diff_delta__dup(delta, &diff->pool)))
goto on_error;
deleted->status = GIT_DELTA_DELETED;
deleted->nfiles = 1;
memset(&deleted->new_file, 0, sizeof(deleted->new_file));
deleted->new_file.path = deleted->old_file.path;
deleted->new_file.flags |= GIT_DIFF_FLAG_VALID_OID;
if (git_vector_insert(&onto, deleted) < 0)
if (insert_delete_side_of_split(diff, &onto, delta) < 0)
goto on_error;
if (diff->new_src == GIT_ITERATOR_TYPE_WORKDIR)
......@@ -740,6 +746,8 @@ static bool is_rename_source(
case GIT_DELTA_UNMODIFIED:
if (!FLAG_SET(opts, GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED))
return false;
if (FLAG_SET(opts, GIT_DIFF_FIND_REMOVE_UNMODIFIED))
delta->flags |= GIT_DIFF_FLAG__TO_DELETE;
break;
default: /* MODIFIED, RENAMED, COPIED */
......@@ -1058,10 +1066,7 @@ find_best_matches:
}
}
else if (delta_is_new_only(tgt)) {
if (!FLAG_SET(&opts, GIT_DIFF_FIND_COPIES))
continue;
else if (FLAG_SET(&opts, GIT_DIFF_FIND_COPIES)) {
if (tgt2src_copy[t].similarity < opts.copy_threshold)
continue;
......@@ -1069,10 +1074,21 @@ find_best_matches:
best_match = &tgt2src_copy[t];
src = GIT_VECTOR_GET(&diff->deltas, best_match->idx);
if (delta_is_split(tgt)) {
error = insert_delete_side_of_split(diff, &diff->deltas, tgt);
if (error < 0)
goto cleanup;
num_rewrites--;
}
if (!delta_is_split(tgt) && !delta_is_new_only(tgt))
continue;
tgt->status = GIT_DELTA_COPIED;
tgt->similarity = best_match->similarity;
tgt->nfiles = 2;
memcpy(&tgt->old_file, &src->old_file, sizeof(tgt->old_file));
tgt->flags &= ~GIT_DIFF_FLAG__TO_SPLIT;
num_updates++;
}
......
......@@ -1284,3 +1284,100 @@ void test_diff_rename__rewrite_on_single_file(void)
git_diff_free(diff);
git_index_free(index);
}
void test_diff_rename__can_find_copy_to_split(void)
{
git_buf c1 = GIT_BUF_INIT;
git_index *index;
git_tree *tree;
git_diff *diff;
git_diff_options diffopts = GIT_DIFF_OPTIONS_INIT;
git_diff_find_options opts = GIT_DIFF_FIND_OPTIONS_INIT;
diff_expects exp;
cl_git_pass(git_futils_readbuffer(&c1, "renames/songof7cities.txt"));
cl_git_pass(git_futils_writebuffer(&c1, "renames/untimely.txt", 0, 0));
cl_git_pass(
git_revparse_single((git_object **)&tree, g_repo, "HEAD^{tree}"));
cl_git_pass(git_repository_index(&index, g_repo));
cl_git_pass(git_index_read_tree(index, tree));
cl_git_pass(git_index_add_bypath(index, "untimely.txt"));
diffopts.flags = GIT_DIFF_INCLUDE_UNMODIFIED;
cl_git_pass(git_diff_tree_to_index(&diff, g_repo, tree, index, &diffopts));
memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach(
diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp));
cl_assert_equal_i(4, exp.files);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_MODIFIED]);
cl_assert_equal_i(3, exp.file_status[GIT_DELTA_UNMODIFIED]);
opts.flags = GIT_DIFF_FIND_ALL;
cl_git_pass(git_diff_find_similar(diff, &opts));
memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach(
diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp));
cl_assert_equal_i(5, exp.files);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_DELETED]);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_COPIED]);
cl_assert_equal_i(3, exp.file_status[GIT_DELTA_UNMODIFIED]);
git_diff_free(diff);
git_tree_free(tree);
git_index_free(index);
git_buf_free(&c1);
}
void test_diff_rename__can_delete_unmodified_deltas(void)
{
git_buf c1 = GIT_BUF_INIT;
git_index *index;
git_tree *tree;
git_diff *diff;
git_diff_options diffopts = GIT_DIFF_OPTIONS_INIT;
git_diff_find_options opts = GIT_DIFF_FIND_OPTIONS_INIT;
diff_expects exp;
cl_git_pass(git_futils_readbuffer(&c1, "renames/songof7cities.txt"));
cl_git_pass(git_futils_writebuffer(&c1, "renames/untimely.txt", 0, 0));
cl_git_pass(
git_revparse_single((git_object **)&tree, g_repo, "HEAD^{tree}"));
cl_git_pass(git_repository_index(&index, g_repo));
cl_git_pass(git_index_read_tree(index, tree));
cl_git_pass(git_index_add_bypath(index, "untimely.txt"));
diffopts.flags = GIT_DIFF_INCLUDE_UNMODIFIED;
cl_git_pass(git_diff_tree_to_index(&diff, g_repo, tree, index, &diffopts));
memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach(
diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp));
cl_assert_equal_i(4, exp.files);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_MODIFIED]);
cl_assert_equal_i(3, exp.file_status[GIT_DELTA_UNMODIFIED]);
opts.flags = GIT_DIFF_FIND_ALL | GIT_DIFF_FIND_REMOVE_UNMODIFIED;
cl_git_pass(git_diff_find_similar(diff, &opts));
memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach(
diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp));
cl_assert_equal_i(2, exp.files);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_DELETED]);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_COPIED]);
git_diff_free(diff);
git_tree_free(tree);
git_index_free(index);
git_buf_free(&c1);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment