Commit db106d01 by Russell Belfer

Move rename detection into new file

This improves the naming for the rename related functionality
moving it to be called `git_diff_find_similar()` and renaming
all the associated constants, etc. to make more sense.

I also moved the new code (plus the existing `git_diff_merge`)
into a new file `diff_tform.c` where I can put new functions
related to manipulating git diff lists.

This also updates the implementation significantly from the
last revision fixing some ordering issues (where break-rewrite
needs to be handled prior to copy and rename detection) and
improving config option handling.
parent b4f5bb07
...@@ -263,31 +263,41 @@ typedef struct git_diff_patch git_diff_patch; ...@@ -263,31 +263,41 @@ typedef struct git_diff_patch git_diff_patch;
* Flags to control the behavior of diff rename/copy detection. * Flags to control the behavior of diff rename/copy detection.
*/ */
typedef enum { typedef enum {
/** should we look for renames */ /** look for renames? (`--find-renames`) */
GIT_DIFF_DETECT_RENAMES = (1 << 0), GIT_DIFF_FIND_RENAMES = (1 << 0),
/** should we look for copies */ /** consider old size of modified for renames? (`--break-rewrites=N`) */
GIT_DIFF_DETECT_COPIES = (1 << 1), GIT_DIFF_FIND_RENAMES_FROM_REWRITES = (1 << 1),
/** should we consider unmodified files as possible copy sources */
GIT_DIFF_DETECT_COPIES_FROM_UNMODIFIED = (1 << 2), /** look for copies? (a la `--find-copies`) */
/** should we split large rewrites into delete / add pairs */ GIT_DIFF_FIND_COPIES = (1 << 2),
GIT_DIFF_DETECT_BREAK_REWRITES = (1 << 3), /** consider unmodified as copy sources? (`--find-copies-harder`) */
} git_diff_detect_t; GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED = (1 << 3),
/** split large rewrites into delete/add pairs (`--break-rewrites=/M`) */
GIT_DIFF_FIND_AND_BREAK_REWRITES = (1 << 4),
} git_diff_find_t;
/** /**
* Control behavior of rename and copy detection * Control behavior of rename and copy detection
*/ */
typedef struct { typedef struct {
/** Combination of git_diff_detect_t values */ /** Combination of git_diff_find_t values (default FIND_RENAMES) */
unsigned int flags; unsigned int flags;
/** Threshold on similarity index to consider a file renamed. */
/** Similarity to consider a file renamed (default 50) */
unsigned int rename_threshold; unsigned int rename_threshold;
/** Threshold on similarity index to consider a file a copy. */ /** Similarity of modified to be eligible rename source (default 50) */
unsigned int rename_from_rewrite_threshold;
/** Similarity to consider a file a copy (default 50) */
unsigned int copy_threshold; unsigned int copy_threshold;
/** Threshold on change % to split modify into delete/add pair. */ /** Similarity to split modify into delete/add pair (default 60) */
unsigned int break_rewrite_threshold; unsigned int break_rewrite_threshold;
/** Maximum rename/copy targets to check (diff.renameLimit) */
/** Maximum similarity sources to examine (a la diff's `-l` option or
* the `diff.renameLimit` config) (default 200)
*/
unsigned int target_limit; unsigned int target_limit;
} git_diff_detect_options; } git_diff_find_options;
/** @name Diff List Generator Functions /** @name Diff List Generator Functions
...@@ -405,18 +415,20 @@ GIT_EXTERN(int) git_diff_merge( ...@@ -405,18 +415,20 @@ GIT_EXTERN(int) git_diff_merge(
const git_diff_list *from); const git_diff_list *from);
/** /**
* Update a diff list with file renames, copies, etc. * Transform a diff list marking file renames, copies, etc.
* *
* This modifies a diff list in place, replacing old entries that look * This modifies a diff list in place, replacing old entries that look
* like renames or copies with new entries reflecting those changes. * like renames or copies with new entries reflecting those changes.
* This also will, if requested, break modified files into add/remove
* pairs if the amount of change is above a threshold.
* *
* @param diff Diff list to run detection algorithms on * @param diff Diff list to run detection algorithms on
* @param options Control how detection should be run, NULL for defaults * @param options Control how detection should be run, NULL for defaults
* @return 0 on success, -1 on failure * @return 0 on success, -1 on failure
*/ */
GIT_EXTERN(int) git_diff_detect( GIT_EXTERN(int) git_diff_find_similar(
git_diff_list *diff, git_diff_list *diff,
git_diff_detect_options *options); git_diff_find_options *options);
/**@}*/ /**@}*/
......
...@@ -48,6 +48,8 @@ extern void git_diff__cleanup_modes( ...@@ -48,6 +48,8 @@ extern void git_diff__cleanup_modes(
extern void git_diff_list_addref(git_diff_list *diff); extern void git_diff_list_addref(git_diff_list *diff);
extern int git_diff_delta__cmp(const void *a, const void *b);
extern bool git_diff_delta__should_skip( extern bool git_diff_delta__should_skip(
const git_diff_options *opts, const git_diff_delta *delta); const git_diff_options *opts, const git_diff_delta *delta);
......
...@@ -241,3 +241,33 @@ void git_vector_swap(git_vector *a, git_vector *b) ...@@ -241,3 +241,33 @@ void git_vector_swap(git_vector *a, git_vector *b)
memcpy(a, b, sizeof(t)); memcpy(a, b, sizeof(t));
memcpy(b, &t, sizeof(t)); memcpy(b, &t, sizeof(t));
} }
int git_vector_resize_to(git_vector *v, size_t new_length)
{
if (new_length <= v->length)
return 0;
while (new_length >= v->_alloc_size)
if (resize_vector(v) < 0)
return -1;
memset(&v->contents[v->length], 0,
sizeof(void *) * (new_length - v->length));
v->length = new_length;
return 0;
}
int git_vector_set(void **old, git_vector *v, size_t position, void *value)
{
if (git_vector_resize_to(v, position + 1) < 0)
return -1;
if (old != NULL)
*old = v->contents[position];
v->contents[position] = value;
return 0;
}
...@@ -76,4 +76,7 @@ int git_vector_remove(git_vector *v, unsigned int idx); ...@@ -76,4 +76,7 @@ int git_vector_remove(git_vector *v, unsigned int idx);
void git_vector_pop(git_vector *v); void git_vector_pop(git_vector *v);
void git_vector_uniq(git_vector *v); void git_vector_uniq(git_vector *v);
int git_vector_resize_to(git_vector *v, size_t new_length);
int git_vector_set(void **old, git_vector *v, size_t position, void *value);
#endif #endif
...@@ -34,14 +34,14 @@ void test_diff_rename__match_oid(void) ...@@ -34,14 +34,14 @@ void test_diff_rename__match_oid(void)
git_tree *old_tree, *new_tree; git_tree *old_tree, *new_tree;
git_diff_list *diff; git_diff_list *diff;
git_diff_options diffopts = {0}; git_diff_options diffopts = {0};
git_diff_detect_options opts; git_diff_find_options opts;
diff_expects exp; diff_expects exp;
old_tree = resolve_commit_oid_to_tree(g_repo, old_sha); old_tree = resolve_commit_oid_to_tree(g_repo, old_sha);
new_tree = resolve_commit_oid_to_tree(g_repo, new_sha); new_tree = resolve_commit_oid_to_tree(g_repo, new_sha);
/* Must pass GIT_DIFF_INCLUDE_UNMODIFIED if you expect to emulate /* Must pass GIT_DIFF_INCLUDE_UNMODIFIED if you expect to emulate
* --find-copies-harder during rename detection... * --find-copies-harder during rename transformion...
*/ */
memset(&diffopts, 0, sizeof(diffopts)); memset(&diffopts, 0, sizeof(diffopts));
diffopts.flags |= GIT_DIFF_INCLUDE_UNMODIFIED; diffopts.flags |= GIT_DIFF_INCLUDE_UNMODIFIED;
...@@ -65,7 +65,7 @@ void test_diff_rename__match_oid(void) ...@@ -65,7 +65,7 @@ void test_diff_rename__match_oid(void)
/* git diff 31e47d8c1fa36d7f8d537b96158e3f024de0a9f2 \ /* git diff 31e47d8c1fa36d7f8d537b96158e3f024de0a9f2 \
* 2bc7f351d20b53f1c72c16c4b036e491c478c49a * 2bc7f351d20b53f1c72c16c4b036e491c478c49a
*/ */
cl_git_pass(git_diff_detect(diff, NULL)); cl_git_pass(git_diff_find_similar(diff, NULL));
memset(&exp, 0, sizeof(exp)); memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach( cl_git_pass(git_diff_foreach(
...@@ -86,8 +86,8 @@ void test_diff_rename__match_oid(void) ...@@ -86,8 +86,8 @@ void test_diff_rename__match_oid(void)
* 2bc7f351d20b53f1c72c16c4b036e491c478c49a * 2bc7f351d20b53f1c72c16c4b036e491c478c49a
*/ */
memset(&opts, 0, sizeof(opts)); memset(&opts, 0, sizeof(opts));
opts.flags = GIT_DIFF_DETECT_COPIES_FROM_UNMODIFIED; opts.flags = GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED;
cl_git_pass(git_diff_detect(diff, &opts)); cl_git_pass(git_diff_find_similar(diff, &opts));
memset(&exp, 0, sizeof(exp)); memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach( cl_git_pass(git_diff_foreach(
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment