Commit a21cbb12 by Russell Belfer

Significant rename detection rewrite

This flips rename detection around so instead of creating a
forward mapping from deltas to possible rename targets, instead
it creates a reverse mapping, looking at possible targets and
trying to find a source that they could have been renamed or
copied from.  This is important because each output can only
have a single source, but a given source could map to multiple
outputs (in the form of COPIED records).

Additionally, this makes a couple of tweaks to the public rename
detection APIs, mostly renaming a couple of options that control
the behavior to make more sense and to be more like core Git.

I walked through the tests looking at the exact results and
updated the expectations based on what I saw.  The new code is
different from the old because it cannot give some nonsense
results (like A was renamed to both B and C) which were part of
the outputs previously.
parent 4742148d
......@@ -429,8 +429,8 @@ typedef enum {
GIT_DIFF_FIND_AND_BREAK_REWRITES =
(GIT_DIFF_FIND_REWRITES | GIT_DIFF_BREAK_REWRITES),
/** consider untracked files as rename/copy targets */
GIT_DIFF_FIND_FROM_UNTRACKED = (1 << 6),
/** find renames/copies for untracked items in working directory */
GIT_DIFF_FIND_FOR_UNTRACKED = (1 << 6),
/** turn on all finding features */
GIT_DIFF_FIND_ALL = (0x0ff),
......@@ -469,7 +469,10 @@ typedef struct {
* - `copy_threshold` is the same as the -C option with a value
* - `rename_from_rewrite_threshold` matches the top of the -B option
* - `break_rewrite_threshold` matches the bottom of the -B option
* - `target_limit` matches the -l option (approximately)
* - `rename_limit` is the maximum number of matches to consider for
* a particular file. This is a little different from the `-l` option
* to regular Git because we will still process up to this many matches
* before abandoning the search.
*
* The `metric` option allows you to plug in a custom similarity metric.
* Set it to NULL for the default internal metric which is based on sampling
......@@ -492,10 +495,10 @@ typedef struct {
/** Similarity to split modify into delete/add pair (default 60) */
uint16_t break_rewrite_threshold;
/** Maximum similarity sources to examine (a la diff's `-l` option or
* the `diff.renameLimit` config) (default 200)
/** Maximum similarity sources to examine for a file (somewhat like
* git-diff's `-l` option or `diff.renameLimit` config) (default 200)
*/
size_t target_limit;
size_t rename_limit;
/** Pluggable similarity metric; pass NULL to use internal metric */
git_diff_similarity_metric *metric;
......
......@@ -34,10 +34,16 @@ enum {
GIT_DIFF_FLAG__FREE_DATA = (1 << 8), /* internal file data is allocated */
GIT_DIFF_FLAG__UNMAP_DATA = (1 << 9), /* internal file data is mmap'ed */
GIT_DIFF_FLAG__NO_DATA = (1 << 10), /* file data should not be loaded */
GIT_DIFF_FLAG__TO_DELETE = (1 << 11), /* delete entry during rename det. */
GIT_DIFF_FLAG__TO_SPLIT = (1 << 12), /* split entry during rename det. */
GIT_DIFF_FLAG__TO_DELETE = (1 << 16), /* delete entry during rename det. */
GIT_DIFF_FLAG__TO_SPLIT = (1 << 17), /* split entry during rename det. */
GIT_DIFF_FLAG__IS_RENAME_TARGET = (1 << 18),
GIT_DIFF_FLAG__IS_RENAME_SOURCE = (1 << 19),
GIT_DIFF_FLAG__HAS_SELF_SIMILARITY = (1 << 20),
};
#define GIT_DIFF_FLAG__CLEAR_INTERNAL(F) (F) = ((F) & 0x00FFFF)
struct git_diff_list {
git_refcount rc;
git_repository *repo;
......
......@@ -222,7 +222,7 @@ int git_diff_find_similar__calc_similarity(
#define DEFAULT_THRESHOLD 50
#define DEFAULT_BREAK_REWRITE_THRESHOLD 60
#define DEFAULT_TARGET_LIMIT 200
#define DEFAULT_RENAME_LIMIT 200
static int normalize_find_opts(
git_diff_list *diff,
......@@ -290,15 +290,15 @@ static int normalize_find_opts(
#undef USE_DEFAULT
if (!opts->target_limit) {
if (!opts->rename_limit) {
int32_t limit = 0;
opts->target_limit = DEFAULT_TARGET_LIMIT;
opts->rename_limit = DEFAULT_RENAME_LIMIT;
if (git_config_get_int32(&limit, cfg, "diff.renameLimit") < 0)
giterr_clear();
else if (limit > 0)
opts->target_limit = limit;
opts->rename_limit = limit;
}
/* assign the internal metric with whitespace flag as payload */
......@@ -322,27 +322,6 @@ static int normalize_find_opts(
return 0;
}
static void validate_delta(git_diff_delta *delta)
{
assert(delta);
return;
/*
switch (delta->status) {
case GIT_DELTA_ADDED:
case GIT_DELTA_UNTRACKED:
case GIT_DELTA_IGNORED:
assert(delta->new_file.path);
break;
case GIT_DELTA_DELETED:
assert(delta->old_file.path);
break;
default:
assert(delta->old_file.path && delta->new_file.path);
break;
}
*/
}
static int apply_splits_and_deletes(
git_diff_list *diff, size_t expected_size, bool actually_split)
{
......@@ -358,16 +337,7 @@ static int apply_splits_and_deletes(
if ((delta->flags & GIT_DIFF_FLAG__TO_DELETE) != 0)
continue;
if ((delta->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0) {
/* just leave delta flagged with score if not actually splitting */
if (!actually_split) {
delta->flags = (delta->flags & ~GIT_DIFF_FLAG__TO_SPLIT);
if (delta->status != GIT_DELTA_MODIFIED)
delta->similarity = 0;
continue;
}
if ((delta->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0 && actually_split) {
delta->similarity = 0;
/* make new record for DELETED side of split */
......@@ -378,7 +348,6 @@ static int apply_splits_and_deletes(
memset(&deleted->new_file, 0, sizeof(deleted->new_file));
deleted->new_file.path = deleted->old_file.path;
deleted->new_file.flags |= GIT_DIFF_FLAG_VALID_OID;
validate_delta(deleted);
if (git_vector_insert(&onto, deleted) < 0)
goto on_error;
......@@ -390,7 +359,6 @@ static int apply_splits_and_deletes(
memset(&delta->old_file, 0, sizeof(delta->old_file));
delta->old_file.path = delta->new_file.path;
delta->old_file.flags |= GIT_DIFF_FLAG_VALID_OID;
validate_delta(delta);
}
if (git_vector_insert(&onto, delta) < 0)
......@@ -398,13 +366,22 @@ static int apply_splits_and_deletes(
}
/* cannot return an error past this point */
git_vector_foreach(&diff->deltas, i, delta)
git_vector_foreach(&diff->deltas, i, delta) {
if ((delta->flags & GIT_DIFF_FLAG__TO_DELETE) != 0)
git__free(delta);
GIT_DIFF_FLAG__CLEAR_INTERNAL(delta->flags);
if (delta->status != GIT_DELTA_COPIED &&
delta->status != GIT_DELTA_RENAMED &&
(delta->status != GIT_DELTA_MODIFIED || actually_split))
delta->similarity = 0;
}
/* swap new delta list into place */
git_vector_swap(&diff->deltas, &onto);
git_vector_free(&onto);
git_vector_sort(&diff->deltas);
return 0;
......@@ -424,7 +401,7 @@ GIT_INLINE(git_diff_file *) similarity_get_file(git_diff_list *diff, size_t idx)
static int similarity_calc(
git_diff_list *diff,
git_diff_find_options *opts,
const git_diff_find_options *opts,
size_t file_idx,
void **cache)
{
......@@ -473,7 +450,7 @@ static int similarity_calc(
return error;
}
#define FLAG_SET(opts,flag_name) (((opts).flags & flag_name) != 0)
#define FLAG_SET(opts,flag_name) (((opts)->flags & flag_name) != 0)
/* - score < 0 means files cannot be compared
* - score >= 100 means files are exact match
......@@ -482,14 +459,14 @@ static int similarity_calc(
static int similarity_measure(
int *score,
git_diff_list *diff,
git_diff_find_options *opts,
const git_diff_find_options *opts,
void **cache,
size_t a_idx,
size_t b_idx)
{
git_diff_file *a_file = similarity_get_file(diff, a_idx);
git_diff_file *b_file = similarity_get_file(diff, b_idx);
bool exact_match = FLAG_SET(*opts, GIT_DIFF_FIND_EXACT_MATCH_ONLY);
bool exact_match = FLAG_SET(opts, GIT_DIFF_FIND_EXACT_MATCH_ONLY);
*score = -1;
......@@ -539,28 +516,152 @@ static int similarity_measure(
score, cache[a_idx], cache[b_idx], opts->metric->payload);
}
static void convert_to_rename_and_add(
static int calc_self_similarity(
git_diff_list *diff,
git_diff_delta *from,
git_diff_delta *to,
int similarity)
const git_diff_find_options *opts,
size_t delta_idx,
void **cache)
{
to->status = GIT_DELTA_RENAMED;
to->flags &= ~GIT_DIFF_FLAG__TO_SPLIT; /* ensure no split */
to->similarity = (uint32_t)similarity;
memcpy(&to->old_file, &from->old_file, sizeof(to->old_file));
validate_delta(to);
if (diff->new_src == GIT_ITERATOR_TYPE_WORKDIR)
from->status = GIT_DELTA_UNTRACKED;
else
from->status = GIT_DELTA_ADDED;
from->flags &= ~GIT_DIFF_FLAG__TO_SPLIT; /* ensure no split */
from->similarity = 0;
memset(&from->old_file, 0, sizeof(from->old_file));
from->old_file.path = from->new_file.path;
from->old_file.flags |= GIT_DIFF_FLAG_VALID_OID;
validate_delta(from);
int error, similarity = -1;
git_diff_delta *delta = GIT_VECTOR_GET(&diff->deltas, delta_idx);
if ((delta->flags & GIT_DIFF_FLAG__HAS_SELF_SIMILARITY) != 0)
return 0;
error = similarity_measure(
&similarity, diff, opts, cache, 2 * delta_idx, 2 * delta_idx + 1);
if (error < 0)
return error;
if (similarity >= 0) {
delta->similarity = (uint32_t)similarity;
delta->flags |= GIT_DIFF_FLAG__HAS_SELF_SIMILARITY;
}
return 0;
}
static bool is_rename_target(
git_diff_list *diff,
const git_diff_find_options *opts,
size_t delta_idx,
void **cache)
{
git_diff_delta *delta = GIT_VECTOR_GET(&diff->deltas, delta_idx);
/* skip things that aren't plain blobs */
if (!GIT_MODE_ISBLOB(delta->new_file.mode))
return false;
/* only consider ADDED, RENAMED, COPIED, and split MODIFIED as
* targets; maybe include UNTRACKED and IGNORED if requested.
*/
switch (delta->status) {
case GIT_DELTA_UNMODIFIED:
case GIT_DELTA_DELETED:
return false;
case GIT_DELTA_MODIFIED:
if (!FLAG_SET(opts, GIT_DIFF_FIND_REWRITES) &&
!FLAG_SET(opts, GIT_DIFF_FIND_RENAMES_FROM_REWRITES))
return false;
if (calc_self_similarity(diff, opts, delta_idx, cache) < 0)
return false;
if (FLAG_SET(opts, GIT_DIFF_BREAK_REWRITES) &&
delta->similarity < opts->break_rewrite_threshold) {
delta->flags |= GIT_DIFF_FLAG__TO_SPLIT;
break;
}
if (FLAG_SET(opts, GIT_DIFF_FIND_RENAMES_FROM_REWRITES) &&
delta->similarity < opts->rename_from_rewrite_threshold)
break;
return false;
case GIT_DELTA_UNTRACKED:
case GIT_DELTA_IGNORED:
if (!FLAG_SET(opts, GIT_DIFF_FIND_FOR_UNTRACKED))
return false;
break;
default: /* all other status values should be checked */
break;
}
delta->flags |= GIT_DIFF_FLAG__IS_RENAME_TARGET;
return true;
}
static bool is_rename_source(
git_diff_list *diff,
const git_diff_find_options *opts,
size_t delta_idx,
void **cache)
{
git_diff_delta *delta = GIT_VECTOR_GET(&diff->deltas, delta_idx);
/* skip things that aren't blobs */
if (!GIT_MODE_ISBLOB(delta->old_file.mode))
return false;
switch (delta->status) {
case GIT_DELTA_ADDED:
case GIT_DELTA_UNTRACKED:
case GIT_DELTA_IGNORED:
return false;
case GIT_DELTA_DELETED:
case GIT_DELTA_TYPECHANGE:
break;
case GIT_DELTA_UNMODIFIED:
if (!FLAG_SET(opts, GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED))
return false;
break;
default: /* MODIFIED, RENAMED, COPIED */
/* if we're finding copies, this could be a source */
if (FLAG_SET(opts, GIT_DIFF_FIND_COPIES))
break;
/* otherwise, this is only a source if we can split it */
if (!FLAG_SET(opts, GIT_DIFF_FIND_REWRITES) &&
!FLAG_SET(opts, GIT_DIFF_FIND_RENAMES_FROM_REWRITES))
return false;
if (calc_self_similarity(diff, opts, delta_idx, cache) < 0)
return false;
if (FLAG_SET(opts, GIT_DIFF_BREAK_REWRITES) &&
delta->similarity < opts->break_rewrite_threshold) {
delta->flags |= GIT_DIFF_FLAG__TO_SPLIT;
break;
}
if (FLAG_SET(opts, GIT_DIFF_FIND_RENAMES_FROM_REWRITES) &&
delta->similarity < opts->rename_from_rewrite_threshold)
break;
return false;
}
delta->flags |= GIT_DIFF_FLAG__IS_RENAME_SOURCE;
return true;
}
GIT_INLINE(bool) delta_is_split(git_diff_delta *delta)
{
return (delta->status == GIT_DELTA_TYPECHANGE ||
(delta->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0);
}
GIT_INLINE(bool) delta_is_new_only(git_diff_delta *delta)
{
return (delta->status == GIT_DELTA_ADDED ||
delta->status == GIT_DELTA_UNTRACKED ||
delta->status == GIT_DELTA_IGNORED);
}
typedef struct {
......@@ -583,7 +684,7 @@ int git_diff_find_similar(
if ((error = normalize_find_opts(diff, &opts, given_opts)) < 0)
return error;
/* TODO: maybe abort if deltas.length > target_limit ??? */
/* TODO: maybe abort if deltas.length > rename_limit ??? */
if (!git__is_uint32(diff->deltas.length))
return 0;
......@@ -594,103 +695,40 @@ int git_diff_find_similar(
matches = git__calloc(diff->deltas.length, sizeof(diff_find_match));
GITERR_CHECK_ALLOC(matches);
/* first mark MODIFIED deltas to split if too different (if requested) */
if (FLAG_SET(opts, GIT_DIFF_FIND_REWRITES)) {
git_vector_foreach(&diff->deltas, i, from) {
if (from->status != GIT_DELTA_MODIFIED)
continue;
/* skip things that aren't plain blobs */
if (!GIT_MODE_ISBLOB(from->old_file.mode))
continue;
/* measure similarity from old_file to new_file */
if ((error = similarity_measure(
&similarity, diff, &opts, cache, 2 * i, 2 * i + 1)) < 0)
goto cleanup;
if (similarity < 0)
continue;
if (similarity < (int)opts.break_rewrite_threshold) {
from->similarity = (uint32_t)similarity;
from->flags |= GIT_DIFF_FLAG__TO_SPLIT;
num_rewrites++;
}
}
}
/* next find the most similar delta for each rename / copy candidate */
git_vector_foreach(&diff->deltas, i, from) {
size_t tried_targets = 0;
git_vector_foreach(&diff->deltas, i, to) {
size_t tried_sources = 0;
matches[i].idx = i;
matches[i].similarity = 0;
/* skip things that aren't plain blobs */
if (!GIT_MODE_ISBLOB(from->old_file.mode))
continue;
/* don't check UNMODIFIED files as source unless given option */
if (from->status == GIT_DELTA_UNMODIFIED &&
!FLAG_SET(opts, GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED))
continue;
/* don't check UNTRACKED files as source unless given option */
if ((from->status == GIT_DELTA_UNTRACKED ||
from->status == GIT_DELTA_IGNORED) &&
!FLAG_SET(opts, GIT_DIFF_FIND_FROM_UNTRACKED))
/* skip things that are not rename targets */
if (!is_rename_target(diff, &opts, i, cache))
continue;
/* only use DELETED (or split MODIFIED) unless copy detection on */
if (!FLAG_SET(opts, GIT_DIFF_FIND_COPIES) &&
from->status != GIT_DELTA_DELETED &&
(from->flags & GIT_DIFF_FLAG__TO_SPLIT) == 0)
continue;
git_vector_foreach(&diff->deltas, j, to) {
git_vector_foreach(&diff->deltas, j, from) {
if (i == j)
continue;
/* skip things that aren't blobs */
if (!GIT_MODE_ISBLOB(to->new_file.mode))
/* skip things that are not rename sources */
if (!is_rename_source(diff, &opts, j, cache))
continue;
/* only consider ADDED, RENAMED, COPIED, and split MODIFIED as
* targets; maybe include UNTRACKED and IGNORED if requested.
*/
switch (to->status) {
case GIT_DELTA_ADDED:
case GIT_DELTA_RENAMED:
case GIT_DELTA_COPIED:
break;
case GIT_DELTA_MODIFIED:
if ((to->flags & GIT_DIFF_FLAG__TO_SPLIT) == 0)
continue;
break;
case GIT_DELTA_UNTRACKED:
case GIT_DELTA_IGNORED:
if (!FLAG_SET(opts, GIT_DIFF_FIND_FROM_UNTRACKED))
continue;
break;
default:
/* all other status values will be skipped */
continue;
}
/* cap on maximum targets we'll examine (per "from" file) */
if (++tried_targets > opts.target_limit)
/* cap on maximum targets we'll examine (per "to" file) */
if (++tried_sources > opts.rename_limit)
break;
/* calculate similarity for this pair and find best match */
if ((error = similarity_measure(
&similarity, diff, &opts, cache, 2 * i, 2 * j + 1)) < 0)
&similarity, diff, &opts, cache, 2 * j, 2 * i + 1)) < 0)
goto cleanup;
if (similarity < 0) {
--tried_targets;
if (similarity < 0) { /* not actually comparable */
--tried_sources;
continue;
}
if (matches[i].similarity < (uint32_t)similarity) {
matches[i].similarity = (uint32_t)similarity;
matches[i].idx = j;
......@@ -700,97 +738,128 @@ int git_diff_find_similar(
/* next rewrite the diffs with renames / copies */
git_vector_foreach(&diff->deltas, i, from) {
if (!matches[i].similarity)
continue;
git_vector_foreach(&diff->deltas, i, to) {
to = GIT_VECTOR_GET(&diff->deltas, matches[i].idx);
assert(to);
/* check if this delta was matched to another one */
if ((similarity = (int)matches[i].similarity) <= 0)
continue;
assert(to && (to->flags & GIT_DIFF_FLAG__IS_RENAME_TARGET) != 0);
similarity = (int)matches[i].similarity;
from = GIT_VECTOR_GET(&diff->deltas, matches[i].idx);
assert(from && (from->flags & GIT_DIFF_FLAG__IS_RENAME_SOURCE) != 0);
/*
* Four possible outcomes here:
/* possible scenarios:
* 1. from DELETE to ADD/UNTRACK/IGNORE = RENAME
* 2. from DELETE to SPLIT/TYPECHANGE = RENAME + DELETE
* 3. from SPLIT/TYPECHANGE to ADD/UNTRACK/IGNORE = ADD + RENAME
* 4. from SPLIT/TYPECHANGE to SPLIT/TYPECHANGE = RENAME + SPLIT
* 5. from OTHER to ADD/UNTRACK/IGNORE = OTHER + COPY
*/
/* 1. DELETED "from" with match over rename threshold becomes
* RENAMED "from" record (and "to" record goes away)
*/
if (from->status == GIT_DELTA_DELETED) {
if (similarity < (int)opts.rename_threshold)
continue;
to->flags |= GIT_DIFF_FLAG__TO_DELETE;
if (delta_is_new_only(to)) {
from->status = GIT_DELTA_RENAMED;
from->similarity = (uint32_t)similarity;
memcpy(&from->new_file, &to->new_file, sizeof(to->new_file));
validate_delta(from);
if (similarity < (int)opts.rename_threshold)
continue;
num_rewrites++;
continue;
}
from->status = GIT_DELTA_RENAMED;
from->similarity = (uint32_t)similarity;
memcpy(&from->new_file, &to->new_file, sizeof(from->new_file));
/* 2. SPLIT MODIFIED "from" with match over rename threshold becomes
* ADDED "from" record (with no SPLIT) and RENAMED "to" record
*/
if (from->status == GIT_DELTA_MODIFIED &&
(from->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0) {
to->flags |= GIT_DIFF_FLAG__TO_DELETE;
if (similarity < (int)opts.rename_threshold)
continue;
num_rewrites++;
} else {
assert(delta_is_split(from));
convert_to_rename_and_add(diff, from, to, similarity);
num_rewrites--;
num_updates++;
continue;
}
if (similarity < (int)opts.rename_from_rewrite_threshold)
continue;
/* 3. MODIFIED "from" with FIND_RENAMES_FROM_REWRITES with similar
* "to" and self-similarity below rename_from_rewrite_threshold
* becomes newly ADDED "from" and RENAMED "to".
*/
if (from->status == GIT_DELTA_MODIFIED &&
FLAG_SET(opts, GIT_DIFF_FIND_RENAMES_FROM_REWRITES) &&
similarity > (int)opts.rename_threshold)
{
int self_similarity;
if ((error = similarity_measure(&self_similarity,
diff, &opts, cache, 2 * i, 2 * i + 1)) < 0)
goto cleanup;
from->status = GIT_DELTA_RENAMED;
from->similarity = (uint32_t)similarity;
memcpy(&from->new_file, &to->new_file, sizeof(from->new_file));
if (self_similarity >= 0 &&
self_similarity < (int)opts.rename_from_rewrite_threshold) {
to->status = GIT_DELTA_DELETED;
memset(&to->new_file, 0, sizeof(to->new_file));
to->new_file.path = to->old_file.path;
to->new_file.flags |= GIT_DIFF_FLAG_VALID_OID;
if ((to->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0) {
to->flags &= ~GIT_DIFF_FLAG__TO_SPLIT;
num_rewrites--;
}
convert_to_rename_and_add(diff, from, to, similarity);
num_updates++;
continue;
}
}
/* 4. if "from" -> "to" over copy threshold, "to" becomes COPIED */
if (similarity < (int)opts.copy_threshold)
continue;
else if (delta_is_split(from)) {
git_diff_file swap;
if (delta_is_new_only(to)) {
/* convert "to" to a COPIED record */
to->status = GIT_DELTA_COPIED;
to->similarity = (uint32_t)similarity;
memcpy(&to->old_file, &from->old_file, sizeof(to->old_file));
validate_delta(to);
if (similarity < (int)opts.rename_threshold)
continue;
validate_delta(from);
memcpy(&swap, &from->new_file, sizeof(swap));
from->status = GIT_DELTA_RENAMED;
from->similarity = (uint32_t)similarity;
memcpy(&from->new_file, &to->new_file, sizeof(from->new_file));
if ((from->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0) {
from->flags &= ~GIT_DIFF_FLAG__TO_SPLIT;
num_rewrites--;
}
num_updates++;
to->status = (diff->new_src == GIT_ITERATOR_TYPE_WORKDIR) ?
GIT_DELTA_UNTRACKED : GIT_DELTA_ADDED;
memcpy(&to->new_file, &swap, sizeof(to->new_file));
to->old_file.path = to->new_file.path;
num_updates++;
} else {
assert(delta_is_split(from));
if (similarity < (int)opts.rename_from_rewrite_threshold)
continue;
memcpy(&swap, &from->new_file, sizeof(swap));
from->status = GIT_DELTA_RENAMED;
from->similarity = (uint32_t)similarity;
memcpy(&from->new_file, &to->new_file, sizeof(from->new_file));
if ((from->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0) {
from->flags &= ~GIT_DIFF_FLAG__TO_SPLIT;
num_rewrites--;
}
memcpy(&to->new_file, &swap, sizeof(to->new_file));
if ((to->flags & GIT_DIFF_FLAG__TO_SPLIT) == 0) {
to->flags |= GIT_DIFF_FLAG__TO_SPLIT;
num_rewrites++;
}
num_updates++;
}
}
else if (delta_is_new_only(to)) {
if (!FLAG_SET(&opts, GIT_DIFF_FIND_COPIES) ||
similarity < (int)opts.copy_threshold)
continue;
to->status = GIT_DELTA_COPIED;
to->similarity = (uint32_t)similarity;
memcpy(&to->old_file, &from->old_file, sizeof(to->old_file));
num_updates++;
}
}
if (num_rewrites > 0)
if (num_rewrites > 0 || num_updates > 0)
error = apply_splits_and_deletes(
diff, diff->deltas.length - num_rewrites,
FLAG_SET(opts, GIT_DIFF_BREAK_REWRITES));
if (num_rewrites > 0 || num_updates > 0)
git_vector_sort(&diff->deltas);
FLAG_SET(&opts, GIT_DIFF_BREAK_REWRITES));
cleanup:
git__free(matches);
......
......@@ -15,6 +15,18 @@ void test_diff_rename__cleanup(void)
}
/*
static int debug_print(
const git_diff_delta *delta, const git_diff_range *range, char usage,
const char *line, size_t line_len, void *data)
{
GIT_UNUSED(delta); GIT_UNUSED(range); GIT_UNUSED(usage);
GIT_UNUSED(line_len); GIT_UNUSED(data);
fputs(line, stderr);
return 0;
}
*/
/*
* Renames repo has:
*
* commit 31e47d8c1fa36d7f8d537b96158e3f024de0a9f2 -
......@@ -72,8 +84,10 @@ void test_diff_rename__match_oid(void)
/* git diff 31e47d8c1fa36d7f8d537b96158e3f024de0a9f2 \
* 2bc7f351d20b53f1c72c16c4b036e491c478c49a
* don't use NULL opts to avoid config `diff.renames` contamination
*/
cl_git_pass(git_diff_find_similar(diff, NULL));
opts.flags = GIT_DIFF_FIND_RENAMES;
cl_git_pass(git_diff_find_similar(diff, &opts));
memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach(
......@@ -243,8 +257,8 @@ void test_diff_rename__not_exact_match(void)
cl_assert_equal_i(5, exp.files);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_UNMODIFIED]);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_MODIFIED]);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_DELETED]);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_ADDED]);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_DELETED]);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_COPIED]);
git_diff_list_free(diff);
......@@ -429,8 +443,8 @@ void test_diff_rename__working_directory_changes(void)
cl_assert_equal_i(6, exp.files);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_MODIFIED]);
cl_assert_equal_i(3, exp.file_status[GIT_DELTA_UNTRACKED]);
cl_assert_equal_i(2, exp.file_status[GIT_DELTA_DELETED]);
cl_assert_equal_i(3, exp.file_status[GIT_DELTA_UNTRACKED]);
/* git diff -M 2bc7f351d20b53f1c72c16c4b036e491c478c49a */
opts.flags = GIT_DIFF_FIND_ALL;
......@@ -441,7 +455,8 @@ void test_diff_rename__working_directory_changes(void)
diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp));
cl_assert_equal_i(5, exp.files);
cl_assert_equal_i(3, exp.file_status[GIT_DELTA_RENAMED]);
cl_assert_equal_i(2, exp.file_status[GIT_DELTA_RENAMED]);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_DELETED]);
cl_assert_equal_i(2, exp.file_status[GIT_DELTA_UNTRACKED]);
git_diff_list_free(diff);
......@@ -466,7 +481,8 @@ void test_diff_rename__working_directory_changes(void)
diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp));
cl_assert_equal_i(5, exp.files);
cl_assert_equal_i(3, exp.file_status[GIT_DELTA_RENAMED]);
cl_assert_equal_i(2, exp.file_status[GIT_DELTA_RENAMED]);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_DELETED]);
cl_assert_equal_i(2, exp.file_status[GIT_DELTA_UNTRACKED]);
git_diff_list_free(diff);
......@@ -521,13 +537,19 @@ void test_diff_rename__working_directory_changes(void)
opts.flags = GIT_DIFF_FIND_ALL | GIT_DIFF_FIND_EXACT_MATCH_ONLY;
cl_git_pass(git_diff_find_similar(diff, &opts));
/*
fprintf(stderr, "\n\n");
cl_git_pass(git_diff_print_raw(diff, debug_print, NULL));
*/
memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach(
diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp));
cl_assert_equal_i(5, exp.files);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_MODIFIED]);
cl_assert_equal_i(2, exp.file_status[GIT_DELTA_RENAMED]);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_DELETED]);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_RENAMED]);
cl_assert_equal_i(2, exp.file_status[GIT_DELTA_UNTRACKED]);
git_diff_list_free(diff);
......
......@@ -87,7 +87,6 @@ void test_object_raw_convert__convert_oid_partially(void)
const char *exp = "16a0123456789abcdef4b775213c23a8bd74f5e0";
git_oid in;
char big[GIT_OID_HEXSZ + 1 + 3]; /* note + 4 => big buffer */
char *str;
cl_git_pass(git_oid_fromstr(&in, exp));
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment