Commit 96fb6a64 by Vicent Martí

Merge pull request #1979 from libgit2/rb/diff-find-delete-unmod

Add GIT_DIFF_FIND_REMOVE_UNMODIFIED flag and fix copy detection bug
parents da02ebe3 f62c174d
...@@ -468,41 +468,71 @@ typedef int (*git_diff_line_cb)( ...@@ -468,41 +468,71 @@ typedef int (*git_diff_line_cb)(
* Flags to control the behavior of diff rename/copy detection. * Flags to control the behavior of diff rename/copy detection.
*/ */
typedef enum { typedef enum {
/** look for renames? (`--find-renames`) */ /** Look for renames? (`--find-renames`) */
GIT_DIFF_FIND_RENAMES = (1u << 0), GIT_DIFF_FIND_RENAMES = (1u << 0),
/** consider old side of modified for renames? (`--break-rewrites=N`) */
/** Consider old side of MODIFIED for renames? (`--break-rewrites=N`) */
GIT_DIFF_FIND_RENAMES_FROM_REWRITES = (1u << 1), GIT_DIFF_FIND_RENAMES_FROM_REWRITES = (1u << 1),
/** look for copies? (a la `--find-copies`) */ /** Look for copies? (a la `--find-copies`). */
GIT_DIFF_FIND_COPIES = (1u << 2), GIT_DIFF_FIND_COPIES = (1u << 2),
/** consider unmodified as copy sources? (`--find-copies-harder`) */
/** Consider UNMODIFIED as copy sources? (`--find-copies-harder`).
*
* For this to work correctly, use GIT_DIFF_INCLUDE_UNMODIFIED when
* the initial `git_diff` is being generated.
*/
GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED = (1u << 3), GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED = (1u << 3),
/** mark large rewrites for split (`--break-rewrites=/M`) */ /** Mark significant rewrites for split (`--break-rewrites=/M`) */
GIT_DIFF_FIND_REWRITES = (1u << 4), GIT_DIFF_FIND_REWRITES = (1u << 4),
/** actually split large rewrites into delete/add pairs */ /** Actually split large rewrites into delete/add pairs */
GIT_DIFF_BREAK_REWRITES = (1u << 5), GIT_DIFF_BREAK_REWRITES = (1u << 5),
/** mark rewrites for split and break into delete/add pairs */ /** Mark rewrites for split and break into delete/add pairs */
GIT_DIFF_FIND_AND_BREAK_REWRITES = GIT_DIFF_FIND_AND_BREAK_REWRITES =
(GIT_DIFF_FIND_REWRITES | GIT_DIFF_BREAK_REWRITES), (GIT_DIFF_FIND_REWRITES | GIT_DIFF_BREAK_REWRITES),
/** find renames/copies for untracked items in working directory */ /** Find renames/copies for UNTRACKED items in working directory.
*
* For this to work correctly, use GIT_DIFF_INCLUDE_UNTRACKED when the
* initial `git_diff` is being generated (and obviously the diff must
* be against the working directory for this to make sense).
*/
GIT_DIFF_FIND_FOR_UNTRACKED = (1u << 6), GIT_DIFF_FIND_FOR_UNTRACKED = (1u << 6),
/** turn on all finding features */ /** Turn on all finding features. */
GIT_DIFF_FIND_ALL = (0x0ff), GIT_DIFF_FIND_ALL = (0x0ff),
/** measure similarity ignoring leading whitespace (default) */ /** Measure similarity ignoring leading whitespace (default) */
GIT_DIFF_FIND_IGNORE_LEADING_WHITESPACE = 0, GIT_DIFF_FIND_IGNORE_LEADING_WHITESPACE = 0,
/** measure similarity ignoring all whitespace */ /** Measure similarity ignoring all whitespace */
GIT_DIFF_FIND_IGNORE_WHITESPACE = (1u << 12), GIT_DIFF_FIND_IGNORE_WHITESPACE = (1u << 12),
/** measure similarity including all data */ /** Measure similarity including all data */
GIT_DIFF_FIND_DONT_IGNORE_WHITESPACE = (1u << 13), GIT_DIFF_FIND_DONT_IGNORE_WHITESPACE = (1u << 13),
/** measure similarity only by comparing SHAs (fast and cheap) */ /** Measure similarity only by comparing SHAs (fast and cheap) */
GIT_DIFF_FIND_EXACT_MATCH_ONLY = (1u << 14), GIT_DIFF_FIND_EXACT_MATCH_ONLY = (1u << 14),
/** do not break rewrites unless they contribute to a rename */ /** Do not break rewrites unless they contribute to a rename.
*
* Normally, GIT_DIFF_FIND_AND_BREAK_REWRITES will measure the self-
* similarity of modified files and split the ones that have changed a
* lot into a DELETE / ADD pair. Then the sides of that pair will be
* considered candidates for rename and copy detection.
*
* If you add this flag in and the split pair is *not* used for an
* actual rename or copy, then the modified record will be restored to
* a regular MODIFIED record instead of being split.
*/
GIT_DIFF_BREAK_REWRITES_FOR_RENAMES_ONLY = (1u << 15), GIT_DIFF_BREAK_REWRITES_FOR_RENAMES_ONLY = (1u << 15),
/** Remove any UNMODIFIED deltas after find_similar is done.
*
* Using GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED to emulate the
* --find-copies-harder behavior requires building a diff with the
* GIT_DIFF_INCLUDE_UNMODIFIED flag. If you do not want UNMODIFIED
* records in the final result, pass this flag to have them removed.
*/
GIT_DIFF_FIND_REMOVE_UNMODIFIED = (1u << 16),
} git_diff_find_t; } git_diff_find_t;
/** /**
......
...@@ -366,12 +366,28 @@ static int normalize_find_opts( ...@@ -366,12 +366,28 @@ static int normalize_find_opts(
return 0; return 0;
} }
static int insert_delete_side_of_split(
git_diff *diff, git_vector *onto, const git_diff_delta *delta)
{
/* make new record for DELETED side of split */
git_diff_delta *deleted = diff_delta__dup(delta, &diff->pool);
GITERR_CHECK_ALLOC(deleted);
deleted->status = GIT_DELTA_DELETED;
deleted->nfiles = 1;
memset(&deleted->new_file, 0, sizeof(deleted->new_file));
deleted->new_file.path = deleted->old_file.path;
deleted->new_file.flags |= GIT_DIFF_FLAG_VALID_OID;
return git_vector_insert(onto, deleted);
}
static int apply_splits_and_deletes( static int apply_splits_and_deletes(
git_diff *diff, size_t expected_size, bool actually_split) git_diff *diff, size_t expected_size, bool actually_split)
{ {
git_vector onto = GIT_VECTOR_INIT; git_vector onto = GIT_VECTOR_INIT;
size_t i; size_t i;
git_diff_delta *delta, *deleted; git_diff_delta *delta;
if (git_vector_init(&onto, expected_size, git_diff_delta__cmp) < 0) if (git_vector_init(&onto, expected_size, git_diff_delta__cmp) < 0)
return -1; return -1;
...@@ -384,17 +400,7 @@ static int apply_splits_and_deletes( ...@@ -384,17 +400,7 @@ static int apply_splits_and_deletes(
if ((delta->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0 && actually_split) { if ((delta->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0 && actually_split) {
delta->similarity = 0; delta->similarity = 0;
/* make new record for DELETED side of split */ if (insert_delete_side_of_split(diff, &onto, delta) < 0)
if (!(deleted = diff_delta__dup(delta, &diff->pool)))
goto on_error;
deleted->status = GIT_DELTA_DELETED;
deleted->nfiles = 1;
memset(&deleted->new_file, 0, sizeof(deleted->new_file));
deleted->new_file.path = deleted->old_file.path;
deleted->new_file.flags |= GIT_DIFF_FLAG_VALID_OID;
if (git_vector_insert(&onto, deleted) < 0)
goto on_error; goto on_error;
if (diff->new_src == GIT_ITERATOR_TYPE_WORKDIR) if (diff->new_src == GIT_ITERATOR_TYPE_WORKDIR)
...@@ -740,6 +746,8 @@ static bool is_rename_source( ...@@ -740,6 +746,8 @@ static bool is_rename_source(
case GIT_DELTA_UNMODIFIED: case GIT_DELTA_UNMODIFIED:
if (!FLAG_SET(opts, GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED)) if (!FLAG_SET(opts, GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED))
return false; return false;
if (FLAG_SET(opts, GIT_DIFF_FIND_REMOVE_UNMODIFIED))
delta->flags |= GIT_DIFF_FLAG__TO_DELETE;
break; break;
default: /* MODIFIED, RENAMED, COPIED */ default: /* MODIFIED, RENAMED, COPIED */
...@@ -1058,10 +1066,7 @@ find_best_matches: ...@@ -1058,10 +1066,7 @@ find_best_matches:
} }
} }
else if (delta_is_new_only(tgt)) { else if (FLAG_SET(&opts, GIT_DIFF_FIND_COPIES)) {
if (!FLAG_SET(&opts, GIT_DIFF_FIND_COPIES))
continue;
if (tgt2src_copy[t].similarity < opts.copy_threshold) if (tgt2src_copy[t].similarity < opts.copy_threshold)
continue; continue;
...@@ -1069,10 +1074,21 @@ find_best_matches: ...@@ -1069,10 +1074,21 @@ find_best_matches:
best_match = &tgt2src_copy[t]; best_match = &tgt2src_copy[t];
src = GIT_VECTOR_GET(&diff->deltas, best_match->idx); src = GIT_VECTOR_GET(&diff->deltas, best_match->idx);
if (delta_is_split(tgt)) {
error = insert_delete_side_of_split(diff, &diff->deltas, tgt);
if (error < 0)
goto cleanup;
num_rewrites--;
}
if (!delta_is_split(tgt) && !delta_is_new_only(tgt))
continue;
tgt->status = GIT_DELTA_COPIED; tgt->status = GIT_DELTA_COPIED;
tgt->similarity = best_match->similarity; tgt->similarity = best_match->similarity;
tgt->nfiles = 2; tgt->nfiles = 2;
memcpy(&tgt->old_file, &src->old_file, sizeof(tgt->old_file)); memcpy(&tgt->old_file, &src->old_file, sizeof(tgt->old_file));
tgt->flags &= ~GIT_DIFF_FLAG__TO_SPLIT;
num_updates++; num_updates++;
} }
......
...@@ -1284,3 +1284,100 @@ void test_diff_rename__rewrite_on_single_file(void) ...@@ -1284,3 +1284,100 @@ void test_diff_rename__rewrite_on_single_file(void)
git_diff_free(diff); git_diff_free(diff);
git_index_free(index); git_index_free(index);
} }
void test_diff_rename__can_find_copy_to_split(void)
{
git_buf c1 = GIT_BUF_INIT;
git_index *index;
git_tree *tree;
git_diff *diff;
git_diff_options diffopts = GIT_DIFF_OPTIONS_INIT;
git_diff_find_options opts = GIT_DIFF_FIND_OPTIONS_INIT;
diff_expects exp;
cl_git_pass(git_futils_readbuffer(&c1, "renames/songof7cities.txt"));
cl_git_pass(git_futils_writebuffer(&c1, "renames/untimely.txt", 0, 0));
cl_git_pass(
git_revparse_single((git_object **)&tree, g_repo, "HEAD^{tree}"));
cl_git_pass(git_repository_index(&index, g_repo));
cl_git_pass(git_index_read_tree(index, tree));
cl_git_pass(git_index_add_bypath(index, "untimely.txt"));
diffopts.flags = GIT_DIFF_INCLUDE_UNMODIFIED;
cl_git_pass(git_diff_tree_to_index(&diff, g_repo, tree, index, &diffopts));
memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach(
diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp));
cl_assert_equal_i(4, exp.files);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_MODIFIED]);
cl_assert_equal_i(3, exp.file_status[GIT_DELTA_UNMODIFIED]);
opts.flags = GIT_DIFF_FIND_ALL;
cl_git_pass(git_diff_find_similar(diff, &opts));
memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach(
diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp));
cl_assert_equal_i(5, exp.files);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_DELETED]);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_COPIED]);
cl_assert_equal_i(3, exp.file_status[GIT_DELTA_UNMODIFIED]);
git_diff_free(diff);
git_tree_free(tree);
git_index_free(index);
git_buf_free(&c1);
}
void test_diff_rename__can_delete_unmodified_deltas(void)
{
git_buf c1 = GIT_BUF_INIT;
git_index *index;
git_tree *tree;
git_diff *diff;
git_diff_options diffopts = GIT_DIFF_OPTIONS_INIT;
git_diff_find_options opts = GIT_DIFF_FIND_OPTIONS_INIT;
diff_expects exp;
cl_git_pass(git_futils_readbuffer(&c1, "renames/songof7cities.txt"));
cl_git_pass(git_futils_writebuffer(&c1, "renames/untimely.txt", 0, 0));
cl_git_pass(
git_revparse_single((git_object **)&tree, g_repo, "HEAD^{tree}"));
cl_git_pass(git_repository_index(&index, g_repo));
cl_git_pass(git_index_read_tree(index, tree));
cl_git_pass(git_index_add_bypath(index, "untimely.txt"));
diffopts.flags = GIT_DIFF_INCLUDE_UNMODIFIED;
cl_git_pass(git_diff_tree_to_index(&diff, g_repo, tree, index, &diffopts));
memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach(
diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp));
cl_assert_equal_i(4, exp.files);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_MODIFIED]);
cl_assert_equal_i(3, exp.file_status[GIT_DELTA_UNMODIFIED]);
opts.flags = GIT_DIFF_FIND_ALL | GIT_DIFF_FIND_REMOVE_UNMODIFIED;
cl_git_pass(git_diff_find_similar(diff, &opts));
memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach(
diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp));
cl_assert_equal_i(2, exp.files);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_DELETED]);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_COPIED]);
git_diff_free(diff);
git_tree_free(tree);
git_index_free(index);
git_buf_free(&c1);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment