Commit 97ad85b8 by Russell Belfer

Add GIT_DIFF_FIND_DELETE_UNMODIFIED flag

When doing copy detection, it is often necessary to include
UNMODIFIED records in the git_diff so they are available as source
records for GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED.  Yet in the final
diff, often you will not want to have these UNMODIFIED records.
This adds a flag which marks these UNMODIFIED records for deletion
from the diff list so they will be removed after the rename detect
phase is over.
parent 2123a17f
...@@ -468,41 +468,71 @@ typedef int (*git_diff_line_cb)( ...@@ -468,41 +468,71 @@ typedef int (*git_diff_line_cb)(
* Flags to control the behavior of diff rename/copy detection. * Flags to control the behavior of diff rename/copy detection.
*/ */
typedef enum { typedef enum {
/** look for renames? (`--find-renames`) */ /** Look for renames? (`--find-renames`) */
GIT_DIFF_FIND_RENAMES = (1u << 0), GIT_DIFF_FIND_RENAMES = (1u << 0),
/** consider old side of modified for renames? (`--break-rewrites=N`) */
/** Consider old side of MODIFIED for renames? (`--break-rewrites=N`) */
GIT_DIFF_FIND_RENAMES_FROM_REWRITES = (1u << 1), GIT_DIFF_FIND_RENAMES_FROM_REWRITES = (1u << 1),
/** look for copies? (a la `--find-copies`) */ /** Look for copies? (a la `--find-copies`). */
GIT_DIFF_FIND_COPIES = (1u << 2), GIT_DIFF_FIND_COPIES = (1u << 2),
/** consider unmodified as copy sources? (`--find-copies-harder`) */
/** Consider UNMODIFIED as copy sources? (`--find-copies-harder`).
*
* For this to work correctly, use GIT_DIFF_INCLUDE_UNMODIFIED when
* the initial `git_diff` is being generated.
*/
GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED = (1u << 3), GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED = (1u << 3),
/** mark large rewrites for split (`--break-rewrites=/M`) */ /** Mark significant rewrites for split (`--break-rewrites=/M`) */
GIT_DIFF_FIND_REWRITES = (1u << 4), GIT_DIFF_FIND_REWRITES = (1u << 4),
/** actually split large rewrites into delete/add pairs */ /** Actually split large rewrites into delete/add pairs */
GIT_DIFF_BREAK_REWRITES = (1u << 5), GIT_DIFF_BREAK_REWRITES = (1u << 5),
/** mark rewrites for split and break into delete/add pairs */ /** Mark rewrites for split and break into delete/add pairs */
GIT_DIFF_FIND_AND_BREAK_REWRITES = GIT_DIFF_FIND_AND_BREAK_REWRITES =
(GIT_DIFF_FIND_REWRITES | GIT_DIFF_BREAK_REWRITES), (GIT_DIFF_FIND_REWRITES | GIT_DIFF_BREAK_REWRITES),
/** find renames/copies for untracked items in working directory */ /** Find renames/copies for UNTRACKED items in working directory.
*
* For this to work correctly, use GIT_DIFF_INCLUDE_UNTRACKED when the
* initial `git_diff` is being generated (and obviously the diff must
* be against the working directory for this to make sense).
*/
GIT_DIFF_FIND_FOR_UNTRACKED = (1u << 6), GIT_DIFF_FIND_FOR_UNTRACKED = (1u << 6),
/** turn on all finding features */ /** Turn on all finding features. */
GIT_DIFF_FIND_ALL = (0x0ff), GIT_DIFF_FIND_ALL = (0x0ff),
/** measure similarity ignoring leading whitespace (default) */ /** Measure similarity ignoring leading whitespace (default) */
GIT_DIFF_FIND_IGNORE_LEADING_WHITESPACE = 0, GIT_DIFF_FIND_IGNORE_LEADING_WHITESPACE = 0,
/** measure similarity ignoring all whitespace */ /** Measure similarity ignoring all whitespace */
GIT_DIFF_FIND_IGNORE_WHITESPACE = (1u << 12), GIT_DIFF_FIND_IGNORE_WHITESPACE = (1u << 12),
/** measure similarity including all data */ /** Measure similarity including all data */
GIT_DIFF_FIND_DONT_IGNORE_WHITESPACE = (1u << 13), GIT_DIFF_FIND_DONT_IGNORE_WHITESPACE = (1u << 13),
/** measure similarity only by comparing SHAs (fast and cheap) */ /** Measure similarity only by comparing SHAs (fast and cheap) */
GIT_DIFF_FIND_EXACT_MATCH_ONLY = (1u << 14), GIT_DIFF_FIND_EXACT_MATCH_ONLY = (1u << 14),
/** do not break rewrites unless they contribute to a rename */ /** Do not break rewrites unless they contribute to a rename.
*
* Normally, GIT_DIFF_FIND_AND_BREAK_REWRITES will measure the self-
* similarity of modified files and split the ones that have changed a
* lot into a DELETE / ADD pair. Then the sides of that pair will be
* considered candidates for rename and copy detection.
*
* If you add this flag in and the split pair is *not* used for an
* actual rename or copy, then the modified record will be restored to
* a regular MODIFIED record instead of being split.
*/
GIT_DIFF_BREAK_REWRITES_FOR_RENAMES_ONLY = (1u << 15), GIT_DIFF_BREAK_REWRITES_FOR_RENAMES_ONLY = (1u << 15),
/** Delete any UNMODIFIED records after find_similar is done.
*
* Using GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED to emulate the
* --find-copies-harder behavior requires building a diff with the
* GIT_DIFF_INCLUDE_UNMODIFIED flag. If you do not want UNMODIFIED
* records in the final result, pass this flag to have them removed.
*/
GIT_DIFF_FIND_DELETE_UNMODIFIED = (1u << 16),
} git_diff_find_t; } git_diff_find_t;
/** /**
......
...@@ -746,6 +746,8 @@ static bool is_rename_source( ...@@ -746,6 +746,8 @@ static bool is_rename_source(
case GIT_DELTA_UNMODIFIED: case GIT_DELTA_UNMODIFIED:
if (!FLAG_SET(opts, GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED)) if (!FLAG_SET(opts, GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED))
return false; return false;
if (FLAG_SET(opts, GIT_DIFF_FIND_DELETE_UNMODIFIED))
delta->flags |= GIT_DIFF_FLAG__TO_DELETE;
break; break;
default: /* MODIFIED, RENAMED, COPIED */ default: /* MODIFIED, RENAMED, COPIED */
......
...@@ -1333,3 +1333,51 @@ void test_diff_rename__can_find_copy_to_split(void) ...@@ -1333,3 +1333,51 @@ void test_diff_rename__can_find_copy_to_split(void)
git_buf_free(&c1); git_buf_free(&c1);
} }
void test_diff_rename__can_delete_unmodified_deltas(void)
{
git_buf c1 = GIT_BUF_INIT;
git_index *index;
git_tree *tree;
git_diff *diff;
git_diff_options diffopts = GIT_DIFF_OPTIONS_INIT;
git_diff_find_options opts = GIT_DIFF_FIND_OPTIONS_INIT;
diff_expects exp;
cl_git_pass(git_futils_readbuffer(&c1, "renames/songof7cities.txt"));
cl_git_pass(git_futils_writebuffer(&c1, "renames/untimely.txt", 0, 0));
cl_git_pass(
git_revparse_single((git_object **)&tree, g_repo, "HEAD^{tree}"));
cl_git_pass(git_repository_index(&index, g_repo));
cl_git_pass(git_index_read_tree(index, tree));
cl_git_pass(git_index_add_bypath(index, "untimely.txt"));
diffopts.flags = GIT_DIFF_INCLUDE_UNMODIFIED;
cl_git_pass(git_diff_tree_to_index(&diff, g_repo, tree, index, &diffopts));
memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach(
diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp));
cl_assert_equal_i(4, exp.files);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_MODIFIED]);
cl_assert_equal_i(3, exp.file_status[GIT_DELTA_UNMODIFIED]);
opts.flags = GIT_DIFF_FIND_ALL | GIT_DIFF_FIND_DELETE_UNMODIFIED;
cl_git_pass(git_diff_find_similar(diff, &opts));
memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach(
diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp));
cl_assert_equal_i(2, exp.files);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_DELETED]);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_COPIED]);
git_diff_free(diff);
git_tree_free(tree);
git_index_free(index);
git_buf_free(&c1);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment