Commit db106d01 by Russell Belfer

Move rename detection into new file

This improves the naming for the rename related functionality
moving it to be called `git_diff_find_similar()` and renaming
all the associated constants, etc. to make more sense.

I also moved the new code (plus the existing `git_diff_merge`)
into a new file `diff_tform.c` where I can put new functions
related to manipulating git diff lists.

This also updates the implementation significantly from the
last revision fixing some ordering issues (where break-rewrite
needs to be handled prior to copy and rename detection) and
improving config option handling.
parent b4f5bb07
......@@ -263,31 +263,41 @@ typedef struct git_diff_patch git_diff_patch;
* Flags to control the behavior of diff rename/copy detection.
*/
typedef enum {
/** should we look for renames */
GIT_DIFF_DETECT_RENAMES = (1 << 0),
/** should we look for copies */
GIT_DIFF_DETECT_COPIES = (1 << 1),
/** should we consider unmodified files as possible copy sources */
GIT_DIFF_DETECT_COPIES_FROM_UNMODIFIED = (1 << 2),
/** should we split large rewrites into delete / add pairs */
GIT_DIFF_DETECT_BREAK_REWRITES = (1 << 3),
} git_diff_detect_t;
/** look for renames? (`--find-renames`) */
GIT_DIFF_FIND_RENAMES = (1 << 0),
/** consider old size of modified for renames? (`--break-rewrites=N`) */
GIT_DIFF_FIND_RENAMES_FROM_REWRITES = (1 << 1),
/** look for copies? (a la `--find-copies`) */
GIT_DIFF_FIND_COPIES = (1 << 2),
/** consider unmodified as copy sources? (`--find-copies-harder`) */
GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED = (1 << 3),
/** split large rewrites into delete/add pairs (`--break-rewrites=/M`) */
GIT_DIFF_FIND_AND_BREAK_REWRITES = (1 << 4),
} git_diff_find_t;
/**
* Control behavior of rename and copy detection
*/
typedef struct {
/** Combination of git_diff_detect_t values */
/** Combination of git_diff_find_t values (default FIND_RENAMES) */
unsigned int flags;
/** Threshold on similarity index to consider a file renamed. */
/** Similarity to consider a file renamed (default 50) */
unsigned int rename_threshold;
/** Threshold on similarity index to consider a file a copy. */
/** Similarity of modified to be eligible rename source (default 50) */
unsigned int rename_from_rewrite_threshold;
/** Similarity to consider a file a copy (default 50) */
unsigned int copy_threshold;
/** Threshold on change % to split modify into delete/add pair. */
/** Similarity to split modify into delete/add pair (default 60) */
unsigned int break_rewrite_threshold;
/** Maximum rename/copy targets to check (diff.renameLimit) */
/** Maximum similarity sources to examine (a la diff's `-l` option or
* the `diff.renameLimit` config) (default 200)
*/
unsigned int target_limit;
} git_diff_detect_options;
} git_diff_find_options;
/** @name Diff List Generator Functions
......@@ -405,18 +415,20 @@ GIT_EXTERN(int) git_diff_merge(
const git_diff_list *from);
/**
* Update a diff list with file renames, copies, etc.
* Transform a diff list marking file renames, copies, etc.
*
* This modifies a diff list in place, replacing old entries that look
* like renames or copies with new entries reflecting those changes.
* This also will, if requested, break modified files into add/remove
* pairs if the amount of change is above a threshold.
*
* @param diff Diff list to run detection algorithms on
* @param options Control how detection should be run, NULL for defaults
* @return 0 on success, -1 on failure
*/
GIT_EXTERN(int) git_diff_detect(
GIT_EXTERN(int) git_diff_find_similar(
git_diff_list *diff,
git_diff_detect_options *options);
git_diff_find_options *options);
/**@}*/
......
......@@ -48,6 +48,8 @@ extern void git_diff__cleanup_modes(
extern void git_diff_list_addref(git_diff_list *diff);
extern int git_diff_delta__cmp(const void *a, const void *b);
extern bool git_diff_delta__should_skip(
const git_diff_options *opts, const git_diff_delta *delta);
......
......@@ -241,3 +241,33 @@ void git_vector_swap(git_vector *a, git_vector *b)
memcpy(a, b, sizeof(t));
memcpy(b, &t, sizeof(t));
}
int git_vector_resize_to(git_vector *v, size_t new_length)
{
if (new_length <= v->length)
return 0;
while (new_length >= v->_alloc_size)
if (resize_vector(v) < 0)
return -1;
memset(&v->contents[v->length], 0,
sizeof(void *) * (new_length - v->length));
v->length = new_length;
return 0;
}
int git_vector_set(void **old, git_vector *v, size_t position, void *value)
{
if (git_vector_resize_to(v, position + 1) < 0)
return -1;
if (old != NULL)
*old = v->contents[position];
v->contents[position] = value;
return 0;
}
......@@ -76,4 +76,7 @@ int git_vector_remove(git_vector *v, unsigned int idx);
void git_vector_pop(git_vector *v);
void git_vector_uniq(git_vector *v);
int git_vector_resize_to(git_vector *v, size_t new_length);
int git_vector_set(void **old, git_vector *v, size_t position, void *value);
#endif
......@@ -34,14 +34,14 @@ void test_diff_rename__match_oid(void)
git_tree *old_tree, *new_tree;
git_diff_list *diff;
git_diff_options diffopts = {0};
git_diff_detect_options opts;
git_diff_find_options opts;
diff_expects exp;
old_tree = resolve_commit_oid_to_tree(g_repo, old_sha);
new_tree = resolve_commit_oid_to_tree(g_repo, new_sha);
/* Must pass GIT_DIFF_INCLUDE_UNMODIFIED if you expect to emulate
* --find-copies-harder during rename detection...
* --find-copies-harder during rename transformion...
*/
memset(&diffopts, 0, sizeof(diffopts));
diffopts.flags |= GIT_DIFF_INCLUDE_UNMODIFIED;
......@@ -65,7 +65,7 @@ void test_diff_rename__match_oid(void)
/* git diff 31e47d8c1fa36d7f8d537b96158e3f024de0a9f2 \
* 2bc7f351d20b53f1c72c16c4b036e491c478c49a
*/
cl_git_pass(git_diff_detect(diff, NULL));
cl_git_pass(git_diff_find_similar(diff, NULL));
memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach(
......@@ -86,8 +86,8 @@ void test_diff_rename__match_oid(void)
* 2bc7f351d20b53f1c72c16c4b036e491c478c49a
*/
memset(&opts, 0, sizeof(opts));
opts.flags = GIT_DIFF_DETECT_COPIES_FROM_UNMODIFIED;
cl_git_pass(git_diff_detect(diff, &opts));
opts.flags = GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED;
cl_git_pass(git_diff_find_similar(diff, &opts));
memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach(
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment