Commit 30caf0cf by Vicent Martí

Merge pull request #1595 from arrbee/even-more-rename-fixes

Even more rename detection fixes
parents 87a56fe0 49f70f2c
......@@ -84,6 +84,10 @@ static int check_uint16_param(const char *arg, const char *pattern, uint16_t *va
char *endptr = NULL;
if (strncmp(arg, pattern, len))
return 0;
if (arg[len] == '\0' && pattern[len - 1] != '=')
return 1;
if (arg[len] == '=')
len++;
strval = strtoul(arg + len, &endptr, 0);
if (endptr == arg)
return 0;
......@@ -110,13 +114,20 @@ static void usage(const char *message, const char *arg)
exit(1);
}
enum {
FORMAT_PATCH = 0,
FORMAT_COMPACT = 1,
FORMAT_RAW = 2
};
int main(int argc, char *argv[])
{
git_repository *repo = NULL;
git_tree *t1 = NULL, *t2 = NULL;
git_diff_options opts = GIT_DIFF_OPTIONS_INIT;
git_diff_find_options findopts = GIT_DIFF_FIND_OPTIONS_INIT;
git_diff_list *diff;
int i, color = -1, compact = 0, cached = 0;
int i, color = -1, format = FORMAT_PATCH, cached = 0;
char *a, *treeish1 = NULL, *treeish2 = NULL;
const char *dir = ".";
......@@ -137,11 +148,13 @@ int main(int argc, char *argv[])
}
else if (!strcmp(a, "-p") || !strcmp(a, "-u") ||
!strcmp(a, "--patch"))
compact = 0;
format = FORMAT_PATCH;
else if (!strcmp(a, "--cached"))
cached = 1;
else if (!strcmp(a, "--name-status"))
compact = 1;
format = FORMAT_COMPACT;
else if (!strcmp(a, "--raw"))
format = FORMAT_RAW;
else if (!strcmp(a, "--color"))
color = 0;
else if (!strcmp(a, "--no-color"))
......@@ -160,6 +173,20 @@ int main(int argc, char *argv[])
opts.flags |= GIT_DIFF_INCLUDE_IGNORED;
else if (!strcmp(a, "--untracked"))
opts.flags |= GIT_DIFF_INCLUDE_UNTRACKED;
else if (check_uint16_param(a, "-M", &findopts.rename_threshold) ||
check_uint16_param(a, "--find-renames",
&findopts.rename_threshold))
findopts.flags |= GIT_DIFF_FIND_RENAMES;
else if (check_uint16_param(a, "-C", &findopts.copy_threshold) ||
check_uint16_param(a, "--find-copies",
&findopts.copy_threshold))
findopts.flags |= GIT_DIFF_FIND_COPIES;
else if (!strcmp(a, "--find-copies-harder"))
findopts.flags |= GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED;
else if (!strncmp(a, "-B", 2) || !strncmp(a, "--break-rewrites", 16)) {
/* TODO: parse thresholds */
findopts.flags |= GIT_DIFF_FIND_REWRITES;
}
else if (!check_uint16_param(a, "-U", &opts.context_lines) &&
!check_uint16_param(a, "--unified=", &opts.context_lines) &&
!check_uint16_param(a, "--inter-hunk-context=",
......@@ -204,13 +231,24 @@ int main(int argc, char *argv[])
else
check(git_diff_index_to_workdir(&diff, repo, NULL, &opts), "Diff");
if ((findopts.flags & GIT_DIFF_FIND_ALL) != 0)
check(git_diff_find_similar(diff, &findopts),
"finding renames and copies ");
if (color >= 0)
fputs(colors[0], stdout);
if (compact)
check(git_diff_print_compact(diff, printer, &color), "Displaying diff");
else
switch (format) {
case FORMAT_PATCH:
check(git_diff_print_patch(diff, printer, &color), "Displaying diff");
break;
case FORMAT_COMPACT:
check(git_diff_print_compact(diff, printer, &color), "Displaying diff");
break;
case FORMAT_RAW:
check(git_diff_print_raw(diff, printer, &color), "Displaying diff");
break;
}
if (color >= 0)
fputs(colors[0], stdout);
......
......@@ -243,6 +243,19 @@ typedef struct {
* `NOT_BINARY` flag set to avoid examining file contents if you do not pass
* in hunk and/or line callbacks to the diff foreach iteration function. It
* will just use the git attributes for those files.
*
* The similarity score is zero unless you call `git_diff_find_similar()`
* which does a similarity analysis of files in the diff. Use that
* function to do rename and copy detection, and to split heavily modified
* files in add/delete pairs. After that call, deltas with a status of
* GIT_DELTA_RENAMED or GIT_DELTA_COPIED will have a similarity score
* between 0 and 100 indicating how similar the old and new sides are.
*
* If you ask `git_diff_find_similar` to find heavily modified files to
* break, but to not *actually* break the records, then GIT_DELTA_MODIFIED
* records may have a non-zero similarity score if the self-similarity is
* below the split threshold. To display this value like core Git, invert
* the score (a la `printf("M%03d", 100 - delta->similarity)`).
*/
typedef struct {
git_diff_file old_file;
......@@ -408,18 +421,28 @@ typedef enum {
/** consider unmodified as copy sources? (`--find-copies-harder`) */
GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED = (1 << 3),
/** split large rewrites into delete/add pairs (`--break-rewrites=/M`) */
GIT_DIFF_FIND_AND_BREAK_REWRITES = (1 << 4),
/** mark large rewrites for split (`--break-rewrites=/M`) */
GIT_DIFF_FIND_REWRITES = (1 << 4),
/** actually split large rewrites into delete/add pairs */
GIT_DIFF_BREAK_REWRITES = (1 << 5),
/** mark rewrites for split and break into delete/add pairs */
GIT_DIFF_FIND_AND_BREAK_REWRITES =
(GIT_DIFF_FIND_REWRITES | GIT_DIFF_BREAK_REWRITES),
/** find renames/copies for untracked items in working directory */
GIT_DIFF_FIND_FOR_UNTRACKED = (1 << 6),
/** turn on all finding features */
GIT_DIFF_FIND_ALL = (0x1f),
GIT_DIFF_FIND_ALL = (0x0ff),
/** measure similarity ignoring leading whitespace (default) */
GIT_DIFF_FIND_IGNORE_LEADING_WHITESPACE = 0,
/** measure similarity ignoring all whitespace */
GIT_DIFF_FIND_IGNORE_WHITESPACE = (1 << 6),
GIT_DIFF_FIND_IGNORE_WHITESPACE = (1 << 12),
/** measure similarity including all data */
GIT_DIFF_FIND_DONT_IGNORE_WHITESPACE = (1 << 7),
GIT_DIFF_FIND_DONT_IGNORE_WHITESPACE = (1 << 13),
/** measure similarity only by comparing SHAs (fast and cheap) */
GIT_DIFF_FIND_EXACT_MATCH_ONLY = (1 << 14),
} git_diff_find_t;
/**
......@@ -446,7 +469,10 @@ typedef struct {
* - `copy_threshold` is the same as the -C option with a value
* - `rename_from_rewrite_threshold` matches the top of the -B option
* - `break_rewrite_threshold` matches the bottom of the -B option
* - `target_limit` matches the -l option
* - `rename_limit` is the maximum number of matches to consider for
* a particular file. This is a little different from the `-l` option
* to regular Git because we will still process up to this many matches
* before abandoning the search.
*
* The `metric` option allows you to plug in a custom similarity metric.
* Set it to NULL for the default internal metric which is based on sampling
......@@ -458,21 +484,21 @@ typedef struct {
unsigned int version;
/** Combination of git_diff_find_t values (default FIND_RENAMES) */
unsigned int flags;
uint32_t flags;
/** Similarity to consider a file renamed (default 50) */
unsigned int rename_threshold;
uint16_t rename_threshold;
/** Similarity of modified to be eligible rename source (default 50) */
unsigned int rename_from_rewrite_threshold;
uint16_t rename_from_rewrite_threshold;
/** Similarity to consider a file a copy (default 50) */
unsigned int copy_threshold;
uint16_t copy_threshold;
/** Similarity to split modify into delete/add pair (default 60) */
unsigned int break_rewrite_threshold;
uint16_t break_rewrite_threshold;
/** Maximum similarity sources to examine (a la diff's `-l` option or
* the `diff.renameLimit` config) (default 200)
/** Maximum similarity sources to examine for a file (somewhat like
* git-diff's `-l` option or `diff.renameLimit` config) (default 200)
*/
unsigned int target_limit;
size_t rename_limit;
/** Pluggable similarity metric; pass NULL to use internal metric */
git_diff_similarity_metric *metric;
......@@ -689,6 +715,22 @@ GIT_EXTERN(int) git_diff_print_compact(
void *payload);
/**
* Iterate over a diff generating text output like "git diff --raw".
*
* Returning a non-zero value from the callbacks will terminate the
* iteration and cause this return `GIT_EUSER`.
*
* @param diff A git_diff_list generated by one of the above functions.
* @param print_cb Callback to make per line of diff text.
* @param payload Reference pointer that will be passed to your callback.
* @return 0 on success, GIT_EUSER on non-zero callback, or error code
*/
GIT_EXTERN(int) git_diff_print_raw(
git_diff_list *diff,
git_diff_data_cb print_cb,
void *payload);
/**
* Look up the single character abbreviation for a delta status code.
*
* When you call `git_diff_print_compact` it prints single letter codes into
......
......@@ -90,6 +90,17 @@ GIT_EXTERN(void) git_oid_fromraw(git_oid *out, const unsigned char *raw);
GIT_EXTERN(void) git_oid_fmt(char *out, const git_oid *id);
/**
* Format a git_oid into a partial hex string.
*
* @param out output hex string; you say how many bytes to write.
* If the number of bytes is > GIT_OID_HEXSZ, extra bytes
* will be zeroed; if not, a '\0' terminator is NOT added.
* @param n number of characters to write into out string
* @param oid oid structure to format.
*/
GIT_EXTERN(void) git_oid_nfmt(char *out, size_t n, const git_oid *id);
/**
* Format a git_oid into a loose-object path string.
*
* The resulting string is "aa/...", where "aa" is the first two
......@@ -117,10 +128,12 @@ GIT_EXTERN(char *) git_oid_allocfmt(const git_oid *id);
* Format a git_oid into a buffer as a hex format c-string.
*
* If the buffer is smaller than GIT_OID_HEXSZ+1, then the resulting
* oid c-string will be truncated to n-1 characters. If there are
* any input parameter errors (out == NULL, n == 0, oid == NULL),
* then a pointer to an empty string is returned, so that the return
* value can always be printed.
* oid c-string will be truncated to n-1 characters (but will still be
* NUL-byte terminated).
*
* If there are any input parameter errors (out == NULL, n == 0, oid ==
* NULL), then a pointer to an empty string is returned, so that the
* return value can always be printed.
*
* @param out the buffer into which the oid string is output.
* @param n the size of the out buffer.
......
......@@ -676,33 +676,26 @@ static int buffer_to_file(
int file_open_flags,
mode_t file_mode)
{
int fd, error;
int error;
if ((error = git_futils_mkpath2file(path, dir_mode)) < 0)
return error;
if ((fd = p_open(path, file_open_flags, file_mode)) < 0) {
giterr_set(GITERR_OS, "Could not open '%s' for writing", path);
return fd;
}
if ((error = p_write(fd, git_buf_cstr(buffer), git_buf_len(buffer))) < 0) {
giterr_set(GITERR_OS, "Could not write to '%s'", path);
(void)p_close(fd);
} else {
if ((error = p_close(fd)) < 0)
giterr_set(GITERR_OS, "Error while closing '%s'", path);
if ((error = git_futils_writebuffer(
buffer, path, file_open_flags, file_mode)) < 0)
return error;
if ((error = p_stat(path, st)) < 0)
giterr_set(GITERR_OS, "Error while statting '%s'", path);
if (st != NULL && (error = p_stat(path, st)) < 0) {
giterr_set(GITERR_OS, "Error while statting '%s'", path);
return error;
}
if (!error &&
(file_mode & 0100) != 0 &&
(error = p_chmod(path, file_mode)) < 0)
if ((file_mode & 0100) != 0 && (error = p_chmod(path, file_mode)) < 0) {
giterr_set(GITERR_OS, "Failed to set permissions on '%s'", path);
return error;
}
return error;
return 0;
}
static int blob_content_to_file(
......
......@@ -231,10 +231,23 @@ static char *diff_strdup_prefix(git_pool *pool, const char *prefix)
return git_pool_strndup(pool, prefix, len + 1);
}
GIT_INLINE(const char *) diff_delta__path(const git_diff_delta *delta)
{
const char *str = delta->old_file.path;
if (!str ||
delta->status == GIT_DELTA_ADDED ||
delta->status == GIT_DELTA_RENAMED ||
delta->status == GIT_DELTA_COPIED)
str = delta->new_file.path;
return str;
}
int git_diff_delta__cmp(const void *a, const void *b)
{
const git_diff_delta *da = a, *db = b;
int val = strcmp(da->old_file.path, db->old_file.path);
int val = strcmp(diff_delta__path(da), diff_delta__path(db));
return val ? val : ((int)da->status - (int)db->status);
}
......
......@@ -34,10 +34,18 @@ enum {
GIT_DIFF_FLAG__FREE_DATA = (1 << 8), /* internal file data is allocated */
GIT_DIFF_FLAG__UNMAP_DATA = (1 << 9), /* internal file data is mmap'ed */
GIT_DIFF_FLAG__NO_DATA = (1 << 10), /* file data should not be loaded */
GIT_DIFF_FLAG__TO_DELETE = (1 << 11), /* delete entry during rename det. */
GIT_DIFF_FLAG__TO_SPLIT = (1 << 12), /* split entry during rename det. */
GIT_DIFF_FLAG__TO_DELETE = (1 << 16), /* delete entry during rename det. */
GIT_DIFF_FLAG__TO_SPLIT = (1 << 17), /* split entry during rename det. */
GIT_DIFF_FLAG__IS_RENAME_TARGET = (1 << 18),
GIT_DIFF_FLAG__IS_RENAME_SOURCE = (1 << 19),
GIT_DIFF_FLAG__HAS_SELF_SIMILARITY = (1 << 20),
};
#define GIT_DIFF_FLAG__CLEAR_INTERNAL(F) (F) = ((F) & 0x00FFFF)
#define GIT_DIFF__VERBOSE (1 << 30)
struct git_diff_list {
git_refcount rc;
git_repository *repo;
......
......@@ -236,9 +236,8 @@ static int get_blob_content(
char oidstr[GIT_OID_HEXSZ+1];
git_buf content = GIT_BUF_INIT;
git_oid_fmt(oidstr, &file->oid);
oidstr[GIT_OID_HEXSZ] = 0;
git_buf_printf(&content, "Subproject commit %s\n", oidstr );
git_oid_tostr(oidstr, sizeof(oidstr), &file->oid);
git_buf_printf(&content, "Subproject commit %s\n", oidstr);
map->data = git_buf_detach(&content);
map->len = strlen(map->data);
......@@ -318,14 +317,13 @@ static int get_workdir_sm_content(
}
}
git_oid_fmt(oidstr, &file->oid);
oidstr[GIT_OID_HEXSZ] = '\0';
git_oid_tostr(oidstr, sizeof(oidstr), &file->oid);
if (GIT_SUBMODULE_STATUS_IS_WD_DIRTY(sm_status))
sm_status_text = "-dirty";
git_buf_printf(&content, "Subproject commit %s%s\n",
oidstr, sm_status_text);
git_buf_printf(
&content, "Subproject commit %s%s\n", oidstr, sm_status_text);
map->data = git_buf_detach(&content);
map->len = strlen(map->data);
......@@ -1021,8 +1019,33 @@ typedef struct {
git_diff_data_cb print_cb;
void *payload;
git_buf *buf;
int oid_strlen;
} diff_print_info;
static int diff_print_info_init(
diff_print_info *pi,
git_buf *out, git_diff_list *diff, git_diff_data_cb cb, void *payload)
{
assert(diff && diff->repo);
pi->diff = diff;
pi->print_cb = cb;
pi->payload = payload;
pi->buf = out;
if (git_repository__cvar(&pi->oid_strlen, diff->repo, GIT_CVAR_ABBREV) < 0)
return -1;
pi->oid_strlen += 1; /* for NUL byte */
if (pi->oid_strlen < 2)
pi->oid_strlen = 2;
else if (pi->oid_strlen > GIT_OID_HEXSZ + 1)
pi->oid_strlen = GIT_OID_HEXSZ + 1;
return 0;
}
static char pick_suffix(int mode)
{
if (S_ISDIR(mode))
......@@ -1106,34 +1129,79 @@ int git_diff_print_compact(
git_buf buf = GIT_BUF_INIT;
diff_print_info pi;
pi.diff = diff;
pi.print_cb = print_cb;
pi.payload = payload;
pi.buf = &buf;
error = git_diff_foreach(diff, print_compact, NULL, NULL, &pi);
if (!(error = diff_print_info_init(&pi, &buf, diff, print_cb, payload)))
error = git_diff_foreach(diff, print_compact, NULL, NULL, &pi);
git_buf_free(&buf);
return error;
}
static int print_oid_range(diff_print_info *pi, const git_diff_delta *delta)
static int print_raw(
const git_diff_delta *delta, float progress, void *data)
{
int abbrevlen;
diff_print_info *pi = data;
char code = git_diff_status_char(delta->status);
char start_oid[GIT_OID_HEXSZ+1], end_oid[GIT_OID_HEXSZ+1];
if (git_repository__cvar(&abbrevlen, pi->diff->repo, GIT_CVAR_ABBREV) < 0)
GIT_UNUSED(progress);
if (code == ' ')
return 0;
git_buf_clear(pi->buf);
git_oid_tostr(start_oid, pi->oid_strlen, &delta->old_file.oid);
git_oid_tostr(end_oid, pi->oid_strlen, &delta->new_file.oid);
git_buf_printf(
pi->buf, ":%06o %06o %s... %s... %c",
delta->old_file.mode, delta->new_file.mode, start_oid, end_oid, code);
if (delta->similarity > 0)
git_buf_printf(pi->buf, "%03u", delta->similarity);
if (delta->status == GIT_DELTA_RENAMED || delta->status == GIT_DELTA_COPIED)
git_buf_printf(
pi->buf, "\t%s %s\n", delta->old_file.path, delta->new_file.path);
else
git_buf_printf(
pi->buf, "\t%s\n", delta->old_file.path ?
delta->old_file.path : delta->new_file.path);
if (git_buf_oom(pi->buf))
return -1;
abbrevlen += 1; /* for NUL byte */
if (abbrevlen < 2)
abbrevlen = 2;
else if (abbrevlen > (int)sizeof(start_oid))
abbrevlen = (int)sizeof(start_oid);
if (pi->print_cb(delta, NULL, GIT_DIFF_LINE_FILE_HDR,
git_buf_cstr(pi->buf), git_buf_len(pi->buf), pi->payload))
return callback_error();
return 0;
}
git_oid_tostr(start_oid, abbrevlen, &delta->old_file.oid);
git_oid_tostr(end_oid, abbrevlen, &delta->new_file.oid);
int git_diff_print_raw(
git_diff_list *diff,
git_diff_data_cb print_cb,
void *payload)
{
int error;
git_buf buf = GIT_BUF_INIT;
diff_print_info pi;
if (!(error = diff_print_info_init(&pi, &buf, diff, print_cb, payload)))
error = git_diff_foreach(diff, print_raw, NULL, NULL, &pi);
git_buf_free(&buf);
return error;
}
static int print_oid_range(diff_print_info *pi, const git_diff_delta *delta)
{
char start_oid[GIT_OID_HEXSZ+1], end_oid[GIT_OID_HEXSZ+1];
git_oid_tostr(start_oid, pi->oid_strlen, &delta->old_file.oid);
git_oid_tostr(end_oid, pi->oid_strlen, &delta->new_file.oid);
/* TODO: Match git diff more closely */
if (delta->old_file.mode == delta->new_file.mode) {
......@@ -1289,13 +1357,9 @@ int git_diff_print_patch(
git_buf buf = GIT_BUF_INIT;
diff_print_info pi;
pi.diff = diff;
pi.print_cb = print_cb;
pi.payload = payload;
pi.buf = &buf;
error = git_diff_foreach(
diff, print_patch_file, print_patch_hunk, print_patch_line, &pi);
if (!(error = diff_print_info_init(&pi, &buf, diff, print_cb, payload)))
error = git_diff_foreach(
diff, print_patch_file, print_patch_hunk, print_patch_line, &pi);
git_buf_free(&buf);
......@@ -1736,12 +1800,9 @@ int git_diff_patch_print(
assert(patch && print_cb);
pi.diff = patch->diff;
pi.print_cb = print_cb;
pi.payload = payload;
pi.buf = &temp;
error = print_patch_file(patch->delta, 0, &pi);
if (!(error = diff_print_info_init(
&pi, &temp, patch->diff, print_cb, payload)))
error = print_patch_file(patch->delta, 0, &pi);
for (h = 0; h < patch->hunks_size && !error; ++h) {
diff_patch_hunk *hunk = &patch->hunks[h];
......
......@@ -18,12 +18,15 @@ static git_diff_delta *diff_delta__dup(
return NULL;
memcpy(delta, d, sizeof(git_diff_delta));
GIT_DIFF_FLAG__CLEAR_INTERNAL(delta->flags);
delta->old_file.path = git_pool_strdup(pool, d->old_file.path);
if (delta->old_file.path == NULL)
goto fail;
if (d->old_file.path != NULL) {
delta->old_file.path = git_pool_strdup(pool, d->old_file.path);
if (delta->old_file.path == NULL)
goto fail;
}
if (d->new_file.path != d->old_file.path) {
if (d->new_file.path != d->old_file.path && d->new_file.path != NULL) {
delta->new_file.path = git_pool_strdup(pool, d->new_file.path);
if (delta->new_file.path == NULL)
goto fail;
......@@ -220,7 +223,7 @@ int git_diff_find_similar__calc_similarity(
#define DEFAULT_THRESHOLD 50
#define DEFAULT_BREAK_REWRITE_THRESHOLD 60
#define DEFAULT_TARGET_LIMIT 200
#define DEFAULT_RENAME_LIMIT 200
static int normalize_find_opts(
git_diff_list *diff,
......@@ -253,12 +256,25 @@ static int normalize_find_opts(
/* some flags imply others */
if (opts->flags & GIT_DIFF_FIND_EXACT_MATCH_ONLY) {
/* if we are only looking for exact matches, then don't turn
* MODIFIED items into ADD/DELETE pairs because it's too picky
*/
opts->flags &= ~(GIT_DIFF_FIND_REWRITES | GIT_DIFF_BREAK_REWRITES);
/* similarly, don't look for self-rewrites to split */
opts->flags &= ~GIT_DIFF_FIND_RENAMES_FROM_REWRITES;
}
if (opts->flags & GIT_DIFF_FIND_RENAMES_FROM_REWRITES)
opts->flags |= GIT_DIFF_FIND_RENAMES;
if (opts->flags & GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED)
opts->flags |= GIT_DIFF_FIND_COPIES;
if (opts->flags & GIT_DIFF_BREAK_REWRITES)
opts->flags |= GIT_DIFF_FIND_REWRITES;
#define USE_DEFAULT(X) ((X) == 0 || (X) > 100)
if (USE_DEFAULT(opts->rename_threshold))
......@@ -275,15 +291,15 @@ static int normalize_find_opts(
#undef USE_DEFAULT
if (!opts->target_limit) {
if (!opts->rename_limit) {
int32_t limit = 0;
opts->target_limit = DEFAULT_TARGET_LIMIT;
opts->rename_limit = DEFAULT_RENAME_LIMIT;
if (git_config_get_int32(&limit, cfg, "diff.renameLimit") < 0)
giterr_clear();
else if (limit > 0)
opts->target_limit = limit;
opts->rename_limit = limit;
}
/* assign the internal metric with whitespace flag as payload */
......@@ -307,11 +323,12 @@ static int normalize_find_opts(
return 0;
}
static int apply_splits_and_deletes(git_diff_list *diff, size_t expected_size)
static int apply_splits_and_deletes(
git_diff_list *diff, size_t expected_size, bool actually_split)
{
git_vector onto = GIT_VECTOR_INIT;
size_t i;
git_diff_delta *delta;
git_diff_delta *delta, *deleted;
if (git_vector_init(&onto, expected_size, git_diff_delta__cmp) < 0)
return -1;
......@@ -321,9 +338,11 @@ static int apply_splits_and_deletes(git_diff_list *diff, size_t expected_size)
if ((delta->flags & GIT_DIFF_FLAG__TO_DELETE) != 0)
continue;
if ((delta->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0) {
git_diff_delta *deleted = diff_delta__dup(delta, &diff->pool);
if (!deleted)
if ((delta->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0 && actually_split) {
delta->similarity = 0;
/* make new record for DELETED side of split */
if (!(deleted = diff_delta__dup(delta, &diff->pool)))
goto on_error;
deleted->status = GIT_DELTA_DELETED;
......@@ -334,32 +353,46 @@ static int apply_splits_and_deletes(git_diff_list *diff, size_t expected_size)
if (git_vector_insert(&onto, deleted) < 0)
goto on_error;
delta->status = GIT_DELTA_ADDED;
if (diff->new_src == GIT_ITERATOR_TYPE_WORKDIR)
delta->status = GIT_DELTA_UNTRACKED;
else
delta->status = GIT_DELTA_ADDED;
memset(&delta->old_file, 0, sizeof(delta->old_file));
delta->old_file.path = delta->new_file.path;
delta->old_file.flags |= GIT_DIFF_FLAG_VALID_OID;
}
/* clean up delta before inserting into new list */
GIT_DIFF_FLAG__CLEAR_INTERNAL(delta->flags);
if (delta->status != GIT_DELTA_COPIED &&
delta->status != GIT_DELTA_RENAMED &&
(delta->status != GIT_DELTA_MODIFIED || actually_split))
delta->similarity = 0;
/* insert into new list */
if (git_vector_insert(&onto, delta) < 0)
goto on_error;
}
/* cannot return an error past this point */
git_vector_foreach(&diff->deltas, i, delta)
/* free deltas from old list that didn't make it to the new one */
git_vector_foreach(&diff->deltas, i, delta) {
if ((delta->flags & GIT_DIFF_FLAG__TO_DELETE) != 0)
git__free(delta);
}
/* swap new delta list into place */
git_vector_sort(&onto);
git_vector_swap(&diff->deltas, &onto);
git_vector_free(&onto);
git_vector_sort(&diff->deltas);
return 0;
on_error:
git_vector_foreach(&onto, i, delta)
git__free(delta);
git_vector_free(&onto);
return -1;
......@@ -373,13 +406,13 @@ GIT_INLINE(git_diff_file *) similarity_get_file(git_diff_list *diff, size_t idx)
static int similarity_calc(
git_diff_list *diff,
git_diff_find_options *opts,
const git_diff_find_options *opts,
size_t file_idx,
void **cache)
{
int error = 0;
git_diff_file *file = similarity_get_file(diff, file_idx);
git_iterator_type_t src = (file_idx & 1) ? diff->old_src : diff->new_src;
git_iterator_type_t src = (file_idx & 1) ? diff->new_src : diff->old_src;
if (src == GIT_ITERATOR_TYPE_WORKDIR) { /* compute hashsig from file */
git_buf path = GIT_BUF_INIT;
......@@ -422,22 +455,56 @@ static int similarity_calc(
return error;
}
#define FLAG_SET(opts,flag_name) (((opts)->flags & flag_name) != 0)
/* - score < 0 means files cannot be compared
* - score >= 100 means files are exact match
* - score == 0 means files are completely different
*/
static int similarity_measure(
int *score,
git_diff_list *diff,
git_diff_find_options *opts,
const git_diff_find_options *opts,
void **cache,
size_t a_idx,
size_t b_idx)
{
int score = 0;
git_diff_file *a_file = similarity_get_file(diff, a_idx);
git_diff_file *b_file = similarity_get_file(diff, b_idx);
bool exact_match = FLAG_SET(opts, GIT_DIFF_FIND_EXACT_MATCH_ONLY);
*score = -1;
/* don't try to compare files of different types */
if (GIT_MODE_TYPE(a_file->mode) != GIT_MODE_TYPE(b_file->mode))
return 0;
if (git_oid__cmp(&a_file->oid, &b_file->oid) == 0)
return 100;
/* if exact match is requested, force calculation of missing OIDs */
if (exact_match) {
if (git_oid_iszero(&a_file->oid) &&
diff->old_src == GIT_ITERATOR_TYPE_WORKDIR &&
!git_diff__oid_for_file(diff->repo, a_file->path,
a_file->mode, a_file->size, &a_file->oid))
a_file->flags |= GIT_DIFF_FLAG_VALID_OID;
if (git_oid_iszero(&b_file->oid) &&
diff->new_src == GIT_ITERATOR_TYPE_WORKDIR &&
!git_diff__oid_for_file(diff->repo, b_file->path,
b_file->mode, b_file->size, &b_file->oid))
b_file->flags |= GIT_DIFF_FLAG_VALID_OID;
}
/* check OID match as a quick test */
if (git_oid__cmp(&a_file->oid, &b_file->oid) == 0) {
*score = 100;
return 0;
}
/* don't calculate signatures if we are doing exact match */
if (exact_match) {
*score = 0;
return 0;
}
/* update signature cache if needed */
if (!cache[a_idx] && similarity_calc(diff, opts, a_idx, cache) < 0)
......@@ -450,231 +517,369 @@ static int similarity_measure(
return 0;
/* compare signatures */
if (opts->metric->similarity(
&score, cache[a_idx], cache[b_idx], opts->metric->payload) < 0)
return -1;
return opts->metric->similarity(
score, cache[a_idx], cache[b_idx], opts->metric->payload);
}
static int calc_self_similarity(
git_diff_list *diff,
const git_diff_find_options *opts,
size_t delta_idx,
void **cache)
{
int error, similarity = -1;
git_diff_delta *delta = GIT_VECTOR_GET(&diff->deltas, delta_idx);
if ((delta->flags & GIT_DIFF_FLAG__HAS_SELF_SIMILARITY) != 0)
return 0;
error = similarity_measure(
&similarity, diff, opts, cache, 2 * delta_idx, 2 * delta_idx + 1);
if (error < 0)
return error;
if (similarity >= 0) {
delta->similarity = (uint32_t)similarity;
delta->flags |= GIT_DIFF_FLAG__HAS_SELF_SIMILARITY;
}
return 0;
}
static bool is_rename_target(
git_diff_list *diff,
const git_diff_find_options *opts,
size_t delta_idx,
void **cache)
{
git_diff_delta *delta = GIT_VECTOR_GET(&diff->deltas, delta_idx);
/* skip things that aren't plain blobs */
if (!GIT_MODE_ISBLOB(delta->new_file.mode))
return false;
/* only consider ADDED, RENAMED, COPIED, and split MODIFIED as
* targets; maybe include UNTRACKED and IGNORED if requested.
*/
switch (delta->status) {
case GIT_DELTA_UNMODIFIED:
case GIT_DELTA_DELETED:
return false;
case GIT_DELTA_MODIFIED:
if (!FLAG_SET(opts, GIT_DIFF_FIND_REWRITES) &&
!FLAG_SET(opts, GIT_DIFF_FIND_RENAMES_FROM_REWRITES))
return false;
if (calc_self_similarity(diff, opts, delta_idx, cache) < 0)
return false;
if (FLAG_SET(opts, GIT_DIFF_BREAK_REWRITES) &&
delta->similarity < opts->break_rewrite_threshold) {
delta->flags |= GIT_DIFF_FLAG__TO_SPLIT;
break;
}
if (FLAG_SET(opts, GIT_DIFF_FIND_RENAMES_FROM_REWRITES) &&
delta->similarity < opts->rename_from_rewrite_threshold)
break;
return false;
case GIT_DELTA_UNTRACKED:
case GIT_DELTA_IGNORED:
if (!FLAG_SET(opts, GIT_DIFF_FIND_FOR_UNTRACKED))
return false;
break;
default: /* all other status values should be checked */
break;
}
delta->flags |= GIT_DIFF_FLAG__IS_RENAME_TARGET;
return true;
}
static bool is_rename_source(
git_diff_list *diff,
const git_diff_find_options *opts,
size_t delta_idx,
void **cache)
{
git_diff_delta *delta = GIT_VECTOR_GET(&diff->deltas, delta_idx);
/* skip things that aren't blobs */
if (!GIT_MODE_ISBLOB(delta->old_file.mode))
return false;
switch (delta->status) {
case GIT_DELTA_ADDED:
case GIT_DELTA_UNTRACKED:
case GIT_DELTA_IGNORED:
return false;
case GIT_DELTA_DELETED:
case GIT_DELTA_TYPECHANGE:
break;
case GIT_DELTA_UNMODIFIED:
if (!FLAG_SET(opts, GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED))
return false;
break;
default: /* MODIFIED, RENAMED, COPIED */
/* if we're finding copies, this could be a source */
if (FLAG_SET(opts, GIT_DIFF_FIND_COPIES))
break;
/* otherwise, this is only a source if we can split it */
if (!FLAG_SET(opts, GIT_DIFF_FIND_REWRITES) &&
!FLAG_SET(opts, GIT_DIFF_FIND_RENAMES_FROM_REWRITES))
return false;
if (calc_self_similarity(diff, opts, delta_idx, cache) < 0)
return false;
if (FLAG_SET(opts, GIT_DIFF_BREAK_REWRITES) &&
delta->similarity < opts->break_rewrite_threshold) {
delta->flags |= GIT_DIFF_FLAG__TO_SPLIT;
break;
}
if (FLAG_SET(opts, GIT_DIFF_FIND_RENAMES_FROM_REWRITES) &&
delta->similarity < opts->rename_from_rewrite_threshold)
break;
/* clip score */
if (score < 0)
score = 0;
else if (score > 100)
score = 100;
return false;
}
return score;
delta->flags |= GIT_DIFF_FLAG__IS_RENAME_SOURCE;
return true;
}
#define FLAG_SET(opts,flag_name) ((opts.flags & flag_name) != 0)
GIT_INLINE(bool) delta_is_split(git_diff_delta *delta)
{
return (delta->status == GIT_DELTA_TYPECHANGE ||
(delta->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0);
}
GIT_INLINE(bool) delta_is_new_only(git_diff_delta *delta)
{
return (delta->status == GIT_DELTA_ADDED ||
delta->status == GIT_DELTA_UNTRACKED ||
delta->status == GIT_DELTA_IGNORED);
}
typedef struct {
uint32_t idx;
uint32_t similarity;
} diff_find_match;
int git_diff_find_similar(
git_diff_list *diff,
git_diff_find_options *given_opts)
{
size_t i, j, cache_size, *matches;
size_t i, j, cache_size;
int error = 0, similarity;
git_diff_delta *from, *to;
git_diff_find_options opts;
size_t tried_targets, num_rewrites = 0;
void **cache;
size_t num_rewrites = 0, num_updates = 0;
void **cache; /* cache of similarity metric file signatures */
diff_find_match *matches; /* cache of best matches */
if ((error = normalize_find_opts(diff, &opts, given_opts)) < 0)
return error;
/* TODO: maybe abort if deltas.length > target_limit ??? */
/* TODO: maybe abort if deltas.length > rename_limit ??? */
if (!git__is_uint32(diff->deltas.length))
return 0;
cache_size = diff->deltas.length * 2; /* must store b/c length may change */
cache = git__calloc(cache_size, sizeof(void *));
GITERR_CHECK_ALLOC(cache);
matches = git__calloc(diff->deltas.length, sizeof(size_t));
matches = git__calloc(diff->deltas.length, sizeof(diff_find_match));
GITERR_CHECK_ALLOC(matches);
/* first break MODIFIED records that are too different (if requested) */
if (FLAG_SET(opts, GIT_DIFF_FIND_AND_BREAK_REWRITES)) {
git_vector_foreach(&diff->deltas, i, from) {
if (from->status != GIT_DELTA_MODIFIED)
continue;
similarity = similarity_measure(
diff, &opts, cache, 2 * i, 2 * i + 1);
if (similarity < 0) {
error = similarity;
goto cleanup;
}
if ((unsigned int)similarity < opts.break_rewrite_threshold) {
from->flags |= GIT_DIFF_FLAG__TO_SPLIT;
num_rewrites++;
}
}
}
/* next find the most similar delta for each rename / copy candidate */
git_vector_foreach(&diff->deltas, i, from) {
tried_targets = 0;
git_vector_foreach(&diff->deltas, i, to) {
size_t tried_sources = 0;
/* skip things that aren't blobs */
if (GIT_MODE_TYPE(from->old_file.mode) !=
GIT_MODE_TYPE(GIT_FILEMODE_BLOB))
continue;
/* don't check UNMODIFIED files as source unless given option */
if (from->status == GIT_DELTA_UNMODIFIED &&
!FLAG_SET(opts, GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED))
continue;
matches[i].idx = i;
matches[i].similarity = 0;
/* skip all but DELETED files unless copy detection is on */
if (!FLAG_SET(opts, GIT_DIFF_FIND_COPIES) &&
from->status != GIT_DELTA_DELETED &&
(from->flags & GIT_DIFF_FLAG__TO_SPLIT) == 0)
/* skip things that are not rename targets */
if (!is_rename_target(diff, &opts, i, cache))
continue;
git_vector_foreach(&diff->deltas, j, to) {
git_vector_foreach(&diff->deltas, j, from) {
if (i == j)
continue;
/* skip things that aren't blobs */
if (GIT_MODE_TYPE(to->new_file.mode) !=
GIT_MODE_TYPE(GIT_FILEMODE_BLOB))
/* skip things that are not rename sources */
if (!is_rename_source(diff, &opts, j, cache))
continue;
switch (to->status) {
case GIT_DELTA_ADDED:
case GIT_DELTA_UNTRACKED:
case GIT_DELTA_RENAMED:
case GIT_DELTA_COPIED:
/* cap on maximum targets we'll examine (per "to" file) */
if (++tried_sources > opts.rename_limit)
break;
case GIT_DELTA_MODIFIED:
if ((to->flags & GIT_DIFF_FLAG__TO_SPLIT) == 0)
continue;
break;
default:
/* only the above status values should be checked */
continue;
}
/* cap on maximum files we'll examine (per "from" file) */
if (++tried_targets > opts.target_limit)
break;
/* calculate similarity and see if this pair beats the
* similarity score of the current best pair.
*/
similarity = similarity_measure(
diff, &opts, cache, 2 * i, 2 * j + 1);
if (similarity < 0) {
error = similarity;
/* calculate similarity for this pair and find best match */
if ((error = similarity_measure(
&similarity, diff, &opts, cache, 2 * j, 2 * i + 1)) < 0)
goto cleanup;
if (similarity < 0) { /* not actually comparable */
--tried_sources;
continue;
}
if (to->similarity < (unsigned int)similarity) {
to->similarity = (unsigned int)similarity;
matches[j] = i + 1;
if (matches[i].similarity < (uint32_t)similarity) {
matches[i].similarity = (uint32_t)similarity;
matches[i].idx = j;
}
}
}
/* next rewrite the diffs with renames / copies */
git_vector_foreach(&diff->deltas, j, to) {
if (!matches[j]) {
assert(to->similarity == 0);
git_vector_foreach(&diff->deltas, i, to) {
/* check if this delta was matched to another one */
if ((similarity = (int)matches[i].similarity) <= 0)
continue;
}
assert(to && (to->flags & GIT_DIFF_FLAG__IS_RENAME_TARGET) != 0);
from = GIT_VECTOR_GET(&diff->deltas, matches[i].idx);
assert(from && (from->flags & GIT_DIFF_FLAG__IS_RENAME_SOURCE) != 0);
i = matches[j] - 1;
from = GIT_VECTOR_GET(&diff->deltas, i);
assert(from);
/* four possible outcomes here:
* 1. old DELETED and if over rename threshold,
* new becomes RENAMED and old goes away
* 2. old SPLIT and if over rename threshold,
* new becomes RENAMED and old becomes ADDED (clear SPLIT)
* 3. old was MODIFIED but FIND_RENAMES_FROM_REWRITES is on and
* old is more similar to new than it is to itself, in which
* case, new becomes RENAMED and old becomed ADDED
* 4. otherwise if over copy threshold, new becomes COPIED
/* possible scenarios:
* 1. from DELETE to ADD/UNTRACK/IGNORE = RENAME
* 2. from DELETE to SPLIT/TYPECHANGE = RENAME + DELETE
* 3. from SPLIT/TYPECHANGE to ADD/UNTRACK/IGNORE = ADD + RENAME
* 4. from SPLIT/TYPECHANGE to SPLIT/TYPECHANGE = RENAME + SPLIT
* 5. from OTHER to ADD/UNTRACK/IGNORE = OTHER + COPY
*/
if (from->status == GIT_DELTA_DELETED) {
if (to->similarity < opts.rename_threshold) {
to->similarity = 0;
continue;
}
to->status = GIT_DELTA_RENAMED;
memcpy(&to->old_file, &from->old_file, sizeof(to->old_file));
if (delta_is_new_only(to)) {
from->flags |= GIT_DIFF_FLAG__TO_DELETE;
num_rewrites++;
if (similarity < (int)opts.rename_threshold)
continue;
continue;
}
from->status = GIT_DELTA_RENAMED;
from->similarity = (uint32_t)similarity;
memcpy(&from->new_file, &to->new_file, sizeof(from->new_file));
if (from->status == GIT_DELTA_MODIFIED &&
(from->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0)
{
if (to->similarity < opts.rename_threshold) {
to->similarity = 0;
continue;
}
to->flags |= GIT_DIFF_FLAG__TO_DELETE;
to->status = GIT_DELTA_RENAMED;
memcpy(&to->old_file, &from->old_file, sizeof(to->old_file));
num_rewrites++;
} else {
assert(delta_is_split(to));
from->status = GIT_DELTA_ADDED;
from->flags &= ~GIT_DIFF_FLAG__TO_SPLIT;
memset(&from->old_file, 0, sizeof(from->old_file));
num_rewrites--;
if (similarity < (int)opts.rename_from_rewrite_threshold)
continue;
continue;
}
from->status = GIT_DELTA_RENAMED;
from->similarity = (uint32_t)similarity;
memcpy(&from->new_file, &to->new_file, sizeof(from->new_file));
if (from->status == GIT_DELTA_MODIFIED &&
FLAG_SET(opts, GIT_DIFF_FIND_RENAMES_FROM_REWRITES) &&
to->similarity > opts.rename_threshold)
{
similarity = similarity_measure(
diff, &opts, cache, 2 * i, 2 * i + 1);
to->status = GIT_DELTA_DELETED;
memset(&to->new_file, 0, sizeof(to->new_file));
to->new_file.path = to->old_file.path;
to->new_file.flags |= GIT_DIFF_FLAG_VALID_OID;
if ((to->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0) {
to->flags &= ~GIT_DIFF_FLAG__TO_SPLIT;
num_rewrites--;
}
if (similarity < 0) {
error = similarity;
goto cleanup;
num_updates++;
}
}
if ((unsigned int)similarity < opts.rename_from_rewrite_threshold) {
to->status = GIT_DELTA_RENAMED;
memcpy(&to->old_file, &from->old_file, sizeof(to->old_file));
else if (delta_is_split(from)) {
git_diff_file swap;
from->status = GIT_DELTA_ADDED;
memset(&from->old_file, 0, sizeof(from->old_file));
from->old_file.path = to->old_file.path;
from->old_file.flags |= GIT_DIFF_FLAG_VALID_OID;
if (delta_is_new_only(to)) {
continue;
if (similarity < (int)opts.rename_threshold)
continue;
memcpy(&swap, &from->new_file, sizeof(swap));
from->status = GIT_DELTA_RENAMED;
from->similarity = (uint32_t)similarity;
memcpy(&from->new_file, &to->new_file, sizeof(from->new_file));
if ((from->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0) {
from->flags &= ~GIT_DIFF_FLAG__TO_SPLIT;
num_rewrites--;
}
to->status = (diff->new_src == GIT_ITERATOR_TYPE_WORKDIR) ?
GIT_DELTA_UNTRACKED : GIT_DELTA_ADDED;
memcpy(&to->new_file, &swap, sizeof(to->new_file));
to->old_file.path = to->new_file.path;
num_updates++;
} else {
assert(delta_is_split(from));
if (similarity < (int)opts.rename_from_rewrite_threshold)
continue;
memcpy(&swap, &to->new_file, sizeof(swap));
to->status = GIT_DELTA_RENAMED;
to->similarity = (uint32_t)similarity;
memcpy(&to->new_file, &from->new_file, sizeof(to->new_file));
if ((to->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0) {
to->flags &= ~GIT_DIFF_FLAG__TO_SPLIT;
num_rewrites--;
}
memcpy(&from->new_file, &swap, sizeof(from->new_file));
if ((from->flags & GIT_DIFF_FLAG__TO_SPLIT) == 0) {
from->flags |= GIT_DIFF_FLAG__TO_SPLIT;
num_rewrites++;
}
/* in the off chance that we've just swapped the new
* element into the correct place, clear the SPLIT flag
*/
if (matches[matches[i].idx].idx == i &&
matches[matches[i].idx].similarity >
opts.rename_from_rewrite_threshold) {
from->status = GIT_DELTA_RENAMED;
from->similarity =
(uint32_t)matches[matches[i].idx].similarity;
matches[matches[i].idx].similarity = 0;
from->flags &= ~GIT_DIFF_FLAG__TO_SPLIT;
num_rewrites--;
}
num_updates++;
}
}
if (to->similarity < opts.copy_threshold) {
to->similarity = 0;
continue;
}
else if (delta_is_new_only(to)) {
if (!FLAG_SET(&opts, GIT_DIFF_FIND_COPIES) ||
similarity < (int)opts.copy_threshold)
continue;
/* convert "to" to a COPIED record */
to->status = GIT_DELTA_COPIED;
memcpy(&to->old_file, &from->old_file, sizeof(to->old_file));
}
to->status = GIT_DELTA_COPIED;
to->similarity = (uint32_t)similarity;
memcpy(&to->old_file, &from->old_file, sizeof(to->old_file));
if (num_rewrites > 0) {
assert(num_rewrites < diff->deltas.length);
num_updates++;
}
}
if (num_rewrites > 0 || num_updates > 0)
error = apply_splits_and_deletes(
diff, diff->deltas.length - num_rewrites);
}
diff, diff->deltas.length - num_rewrites,
FLAG_SET(&opts, GIT_DIFF_BREAK_REWRITES));
cleanup:
git__free(matches);
......
......@@ -202,6 +202,32 @@ int git_futils_readbuffer(git_buf *buf, const char *path)
return git_futils_readbuffer_updated(buf, path, NULL, NULL, NULL);
}
int git_futils_writebuffer(
const git_buf *buf, const char *path, int flags, mode_t mode)
{
int fd, error = 0;
if (flags <= 0)
flags = O_CREAT | O_TRUNC | O_WRONLY;
if (!mode)
mode = GIT_FILEMODE_BLOB;
if ((fd = p_open(path, flags, mode)) < 0) {
giterr_set(GITERR_OS, "Could not open '%s' for writing", path);
return fd;
}
if ((error = p_write(fd, git_buf_cstr(buf), git_buf_len(buf))) < 0) {
giterr_set(GITERR_OS, "Could not write to '%s'", path);
(void)p_close(fd);
}
if ((error = p_close(fd)) < 0)
giterr_set(GITERR_OS, "Error while closing '%s'", path);
return error;
}
int git_futils_mv_withpath(const char *from, const char *to, const mode_t dirmode)
{
if (git_futils_mkpath2file(to, dirmode) < 0)
......
......@@ -22,6 +22,9 @@ extern int git_futils_readbuffer_updated(
git_buf *obj, const char *path, time_t *mtime, size_t *size, int *updated);
extern int git_futils_readbuffer_fd(git_buf *obj, git_file fd, size_t len);
extern int git_futils_writebuffer(
const git_buf *buf, const char *path, int open_flags, mode_t mode);
/**
* File utils
*
......@@ -223,6 +226,7 @@ extern git_off_t git_futils_filesize(git_file fd);
#define GIT_MODE_PERMS_MASK 0777
#define GIT_CANONICAL_PERMS(MODE) (((MODE) & 0100) ? 0755 : 0644)
#define GIT_MODE_TYPE(MODE) ((MODE) & ~GIT_MODE_PERMS_MASK)
#define GIT_MODE_ISBLOB(MODE) (GIT_MODE_TYPE(MODE) == GIT_MODE_TYPE(GIT_FILEMODE_BLOB))
/**
* Convert a mode_t from the OS to a legal git mode_t value.
......
......@@ -68,12 +68,31 @@ GIT_INLINE(char) *fmt_one(char *str, unsigned int val)
return str;
}
void git_oid_fmt(char *str, const git_oid *oid)
void git_oid_nfmt(char *str, size_t n, const git_oid *oid)
{
size_t i;
size_t i, max_i;
if (!oid) {
memset(str, 0, n);
return;
}
if (n > GIT_OID_HEXSZ) {
memset(&str[GIT_OID_HEXSZ], 0, n - GIT_OID_HEXSZ);
n = GIT_OID_HEXSZ;
}
max_i = n / 2;
for (i = 0; i < sizeof(oid->id); i++)
for (i = 0; i < max_i; i++)
str = fmt_one(str, oid->id[i]);
if (n & 1)
*str++ = to_hex[oid->id[i] >> 4];
}
void git_oid_fmt(char *str, const git_oid *oid)
{
git_oid_nfmt(str, GIT_OID_HEXSZ, oid);
}
void git_oid_pathfmt(char *str, const git_oid *oid)
......@@ -91,31 +110,20 @@ char *git_oid_allocfmt(const git_oid *oid)
char *str = git__malloc(GIT_OID_HEXSZ + 1);
if (!str)
return NULL;
git_oid_fmt(str, oid);
str[GIT_OID_HEXSZ] = '\0';
git_oid_nfmt(str, GIT_OID_HEXSZ + 1, oid);
return str;
}
char *git_oid_tostr(char *out, size_t n, const git_oid *oid)
{
char str[GIT_OID_HEXSZ];
if (!out || n == 0)
return "";
n--; /* allow room for terminating NUL */
if (oid == NULL)
n = 0;
if (n > 0) {
git_oid_fmt(str, oid);
if (n > GIT_OID_HEXSZ)
n = GIT_OID_HEXSZ;
memcpy(out, str, n);
}
if (n > GIT_OID_HEXSZ + 1)
n = GIT_OID_HEXSZ + 1;
out[n] = '\0';
git_oid_nfmt(out, n - 1, oid); /* allow room for terminating NUL */
out[n - 1] = '\0';
return out;
}
......
......@@ -109,6 +109,13 @@ GIT_INLINE(int) git__is_sizet(git_off_t p)
return p == (git_off_t)r;
}
/** @return true if p fits into the range of a uint32_t */
GIT_INLINE(int) git__is_uint32(size_t p)
{
uint32_t r = (uint32_t)p;
return p == (size_t)r;
}
/* 32-bit cross-platform rotl */
#ifdef _MSC_VER /* use built-in method in MSVC */
# define git__rotl(v, s) (uint32_t)_rotl(v, s)
......
......@@ -213,3 +213,8 @@ void diff_print(FILE *fp, git_diff_list *diff)
{
cl_git_pass(git_diff_print_patch(diff, diff_print_cb, fp ? fp : stderr));
}
void diff_print_raw(FILE *fp, git_diff_list *diff)
{
cl_git_pass(git_diff_print_raw(diff, diff_print_cb, fp ? fp : stderr));
}
......@@ -65,4 +65,4 @@ extern int diff_foreach_via_iterator(
void *data);
extern void diff_print(FILE *fp, git_diff_list *diff);
extern void diff_print_raw(FILE *fp, git_diff_list *diff);
#include "clar_libgit2.h"
#include "diff_helpers.h"
#include "buf_text.h"
static git_repository *g_repo = NULL;
......@@ -71,8 +72,10 @@ void test_diff_rename__match_oid(void)
/* git diff 31e47d8c1fa36d7f8d537b96158e3f024de0a9f2 \
* 2bc7f351d20b53f1c72c16c4b036e491c478c49a
* don't use NULL opts to avoid config `diff.renames` contamination
*/
cl_git_pass(git_diff_find_similar(diff, NULL));
opts.flags = GIT_DIFF_FIND_RENAMES;
cl_git_pass(git_diff_find_similar(diff, &opts));
memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach(
......@@ -242,8 +245,8 @@ void test_diff_rename__not_exact_match(void)
cl_assert_equal_i(5, exp.files);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_UNMODIFIED]);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_MODIFIED]);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_DELETED]);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_ADDED]);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_DELETED]);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_COPIED]);
git_diff_list_free(diff);
......@@ -377,7 +380,8 @@ void test_diff_rename__handles_small_files(void)
*/
cl_git_pass(git_diff_tree_to_index(&diff, g_repo, tree, index, &diffopts));
opts.flags = GIT_DIFF_FIND_RENAMES | GIT_DIFF_FIND_COPIES | GIT_DIFF_FIND_AND_BREAK_REWRITES;
opts.flags = GIT_DIFF_FIND_RENAMES | GIT_DIFF_FIND_COPIES |
GIT_DIFF_FIND_AND_BREAK_REWRITES;
cl_git_pass(git_diff_find_similar(diff, &opts));
git_diff_list_free(diff);
......@@ -387,9 +391,160 @@ void test_diff_rename__handles_small_files(void)
void test_diff_rename__working_directory_changes(void)
{
/* let's rewrite some files in the working directory on demand */
const char *sha0 = "2bc7f351d20b53f1c72c16c4b036e491c478c49a";
const char *blobsha = "66311f5cfbe7836c27510a3ba2f43e282e2c8bba";
git_oid id;
git_tree *tree;
git_blob *blob;
git_diff_list *diff;
git_diff_options diffopts = GIT_DIFF_OPTIONS_INIT;
git_diff_find_options opts = GIT_DIFF_FIND_OPTIONS_INIT;
diff_expects exp;
git_buf old_content = GIT_BUF_INIT, content = GIT_BUF_INIT;;
tree = resolve_commit_oid_to_tree(g_repo, sha0);
diffopts.flags |= GIT_DIFF_INCLUDE_UNMODIFIED | GIT_DIFF_INCLUDE_UNTRACKED;
/*
$ git cat-file -p 2bc7f351d20b53f1c72c16c4b036e491c478c49a^{tree}
100644 blob 66311f5cfbe7836c27510a3ba2f43e282e2c8bba sevencities.txt
100644 blob ad0a8e55a104ac54a8a29ed4b84b49e76837a113 sixserving.txt
100644 blob 66311f5cfbe7836c27510a3ba2f43e282e2c8bba songofseven.txt
$ for f in *.txt; do
echo `git hash-object -t blob $f` $f
done
eaf4a3e3bfe68585e90cada20736ace491cd100b ikeepsix.txt
f90d4fc20ecddf21eebe6a37e9225d244339d2b5 sixserving.txt
4210ffd5c390b21dd5483375e75288dea9ede512 songof7cities.txt
9a69d960ae94b060f56c2a8702545e2bb1abb935 untimely.txt
*/
cl_git_pass(git_diff_tree_to_workdir(&diff, g_repo, tree, &diffopts));
/* git diff --no-renames 2bc7f351d20b53f1c72c16c4b036e491c478c49a */
memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach(
diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp));
cl_assert_equal_i(6, exp.files);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_MODIFIED]);
cl_assert_equal_i(2, exp.file_status[GIT_DELTA_DELETED]);
cl_assert_equal_i(3, exp.file_status[GIT_DELTA_UNTRACKED]);
/* git diff -M 2bc7f351d20b53f1c72c16c4b036e491c478c49a */
opts.flags = GIT_DIFF_FIND_ALL;
cl_git_pass(git_diff_find_similar(diff, &opts));
memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach(
diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp));
cl_assert_equal_i(5, exp.files);
cl_assert_equal_i(2, exp.file_status[GIT_DELTA_RENAMED]);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_DELETED]);
cl_assert_equal_i(2, exp.file_status[GIT_DELTA_UNTRACKED]);
git_diff_list_free(diff);
/* rewrite files in the working directory with / without CRLF changes */
cl_git_pass(
git_futils_readbuffer(&old_content, "renames/songof7cities.txt"));
cl_git_pass(
git_buf_text_lf_to_crlf(&content, &old_content));
cl_git_pass(
git_futils_writebuffer(&content, "renames/songof7cities.txt", 0, 0));
cl_git_pass(git_diff_tree_to_workdir(&diff, g_repo, tree, &diffopts));
/* git diff -M 2bc7f351d20b53f1c72c16c4b036e491c478c49a */
opts.flags = GIT_DIFF_FIND_ALL;
cl_git_pass(git_diff_find_similar(diff, &opts));
memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach(
diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp));
cl_assert_equal_i(5, exp.files);
cl_assert_equal_i(2, exp.file_status[GIT_DELTA_RENAMED]);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_DELETED]);
cl_assert_equal_i(2, exp.file_status[GIT_DELTA_UNTRACKED]);
git_diff_list_free(diff);
/* try a different whitespace option */
cl_git_pass(git_diff_tree_to_workdir(&diff, g_repo, tree, &diffopts));
opts.flags = GIT_DIFF_FIND_ALL | GIT_DIFF_FIND_DONT_IGNORE_WHITESPACE;
cl_git_pass(git_diff_find_similar(diff, &opts));
memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach(
diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp));
cl_assert_equal_i(6, exp.files);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_RENAMED]);
cl_assert_equal_i(2, exp.file_status[GIT_DELTA_DELETED]);
cl_assert_equal_i(3, exp.file_status[GIT_DELTA_UNTRACKED]);
git_diff_list_free(diff);
/* try a different matching option */
cl_git_pass(git_diff_tree_to_workdir(&diff, g_repo, tree, &diffopts));
opts.flags = GIT_DIFF_FIND_ALL | GIT_DIFF_FIND_EXACT_MATCH_ONLY;
cl_git_pass(git_diff_find_similar(diff, &opts));
memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach(
diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp));
cl_assert_equal_i(6, exp.files);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_MODIFIED]);
cl_assert_equal_i(3, exp.file_status[GIT_DELTA_UNTRACKED]);
cl_assert_equal_i(2, exp.file_status[GIT_DELTA_DELETED]);
git_diff_list_free(diff);
/* again with exact match blob */
cl_git_pass(git_oid_fromstr(&id, blobsha));
cl_git_pass(git_blob_lookup(&blob, g_repo, &id));
cl_git_pass(git_buf_set(
&content, git_blob_rawcontent(blob), git_blob_rawsize(blob)));
cl_git_rewritefile("renames/songof7cities.txt", content.ptr);
git_blob_free(blob);
cl_git_pass(git_diff_tree_to_workdir(&diff, g_repo, tree, &diffopts));
opts.flags = GIT_DIFF_FIND_ALL | GIT_DIFF_FIND_EXACT_MATCH_ONLY;
cl_git_pass(git_diff_find_similar(diff, &opts));
/*
fprintf(stderr, "\n\n");
diff_print_raw(stderr, diff);
*/
memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach(
diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp));
cl_assert_equal_i(5, exp.files);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_MODIFIED]);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_DELETED]);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_RENAMED]);
cl_assert_equal_i(2, exp.file_status[GIT_DELTA_UNTRACKED]);
git_diff_list_free(diff);
/* and with / without CRLF changes */
git_tree_free(tree);
git_buf_free(&content);
git_buf_free(&old_content);
}
void test_diff_rename__patch(void)
......@@ -446,3 +601,213 @@ void test_diff_rename__patch(void)
git_tree_free(old_tree);
git_tree_free(new_tree);
}
void test_diff_rename__file_exchange(void)
{
git_buf c1 = GIT_BUF_INIT, c2 = GIT_BUF_INIT;
git_index *index;
git_tree *tree;
git_diff_list *diff;
git_diff_options diffopts = GIT_DIFF_OPTIONS_INIT;
git_diff_find_options opts = GIT_DIFF_FIND_OPTIONS_INIT;
diff_expects exp;
cl_git_pass(git_futils_readbuffer(&c1, "renames/untimely.txt"));
cl_git_pass(git_futils_readbuffer(&c2, "renames/songof7cities.txt"));
cl_git_pass(git_futils_writebuffer(&c1, "renames/songof7cities.txt", 0, 0));
cl_git_pass(git_futils_writebuffer(&c2, "renames/untimely.txt", 0, 0));
cl_git_pass(
git_revparse_single((git_object **)&tree, g_repo, "HEAD^{tree}"));
cl_git_pass(git_repository_index(&index, g_repo));
cl_git_pass(git_index_read_tree(index, tree));
cl_git_pass(git_index_add_bypath(index, "songof7cities.txt"));
cl_git_pass(git_index_add_bypath(index, "untimely.txt"));
cl_git_pass(git_diff_tree_to_index(&diff, g_repo, tree, index, &diffopts));
memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach(
diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp));
cl_assert_equal_i(2, exp.files);
cl_assert_equal_i(2, exp.file_status[GIT_DELTA_MODIFIED]);
opts.flags = GIT_DIFF_FIND_ALL;
cl_git_pass(git_diff_find_similar(diff, &opts));
memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach(
diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp));
cl_assert_equal_i(2, exp.files);
cl_assert_equal_i(2, exp.file_status[GIT_DELTA_RENAMED]);
git_diff_list_free(diff);
git_tree_free(tree);
git_index_free(index);
git_buf_free(&c1);
git_buf_free(&c2);
}
void test_diff_rename__file_partial_exchange(void)
{
git_buf c1 = GIT_BUF_INIT, c2 = GIT_BUF_INIT;
git_index *index;
git_tree *tree;
git_diff_list *diff;
git_diff_options diffopts = GIT_DIFF_OPTIONS_INIT;
git_diff_find_options opts = GIT_DIFF_FIND_OPTIONS_INIT;
diff_expects exp;
int i;
cl_git_pass(git_futils_readbuffer(&c1, "renames/untimely.txt"));
cl_git_pass(git_futils_writebuffer(&c1, "renames/songof7cities.txt", 0, 0));
for (i = 0; i < 100; ++i)
cl_git_pass(git_buf_puts(&c2, "this is not the content you are looking for\n"));
cl_git_pass(git_futils_writebuffer(&c2, "renames/untimely.txt", 0, 0));
cl_git_pass(
git_revparse_single((git_object **)&tree, g_repo, "HEAD^{tree}"));
cl_git_pass(git_repository_index(&index, g_repo));
cl_git_pass(git_index_read_tree(index, tree));
cl_git_pass(git_index_add_bypath(index, "songof7cities.txt"));
cl_git_pass(git_index_add_bypath(index, "untimely.txt"));
cl_git_pass(git_diff_tree_to_index(&diff, g_repo, tree, index, &diffopts));
memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach(
diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp));
cl_assert_equal_i(2, exp.files);
cl_assert_equal_i(2, exp.file_status[GIT_DELTA_MODIFIED]);
opts.flags = GIT_DIFF_FIND_ALL;
cl_git_pass(git_diff_find_similar(diff, &opts));
memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach(
diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp));
cl_assert_equal_i(3, exp.files);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_RENAMED]);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_ADDED]);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_DELETED]);
git_diff_list_free(diff);
git_tree_free(tree);
git_index_free(index);
git_buf_free(&c1);
git_buf_free(&c2);
}
void test_diff_rename__file_split(void)
{
git_buf c1 = GIT_BUF_INIT, c2 = GIT_BUF_INIT;
git_index *index;
git_tree *tree;
git_diff_list *diff;
git_diff_options diffopts = GIT_DIFF_OPTIONS_INIT;
git_diff_find_options opts = GIT_DIFF_FIND_OPTIONS_INIT;
diff_expects exp;
/* put the first 2/3 of file into one new place
* and the second 2/3 of file into another new place
*/
cl_git_pass(git_futils_readbuffer(&c1, "renames/songof7cities.txt"));
cl_git_pass(git_buf_set(&c2, c1.ptr, c1.size));
git_buf_truncate(&c1, c1.size * 2 / 3);
git_buf_consume(&c2, ((char *)c2.ptr) + (c2.size / 3));
cl_git_pass(git_futils_writebuffer(&c1, "renames/song_a.txt", 0, 0));
cl_git_pass(git_futils_writebuffer(&c2, "renames/song_b.txt", 0, 0));
cl_git_pass(
git_revparse_single((git_object **)&tree, g_repo, "HEAD^{tree}"));
cl_git_pass(git_repository_index(&index, g_repo));
cl_git_pass(git_index_read_tree(index, tree));
cl_git_pass(git_index_add_bypath(index, "song_a.txt"));
cl_git_pass(git_index_add_bypath(index, "song_b.txt"));
diffopts.flags = GIT_DIFF_INCLUDE_UNMODIFIED;
cl_git_pass(git_diff_tree_to_index(&diff, g_repo, tree, index, &diffopts));
memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach(
diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp));
cl_assert_equal_i(6, exp.files);
cl_assert_equal_i(2, exp.file_status[GIT_DELTA_ADDED]);
cl_assert_equal_i(4, exp.file_status[GIT_DELTA_UNMODIFIED]);
opts.flags = GIT_DIFF_FIND_ALL;
cl_git_pass(git_diff_find_similar(diff, &opts));
memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach(
diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp));
cl_assert_equal_i(6, exp.files);
cl_assert_equal_i(2, exp.file_status[GIT_DELTA_COPIED]);
cl_assert_equal_i(4, exp.file_status[GIT_DELTA_UNMODIFIED]);
git_diff_list_free(diff);
git_tree_free(tree);
git_index_free(index);
git_buf_free(&c1);
git_buf_free(&c2);
}
void test_diff_rename__from_deleted_to_split(void)
{
git_buf c1 = GIT_BUF_INIT;
git_index *index;
git_tree *tree;
git_diff_list *diff;
git_diff_options diffopts = GIT_DIFF_OPTIONS_INIT;
git_diff_find_options opts = GIT_DIFF_FIND_OPTIONS_INIT;
diff_expects exp;
/* old file is missing, new file is actually old file renamed */
cl_git_pass(git_futils_readbuffer(&c1, "renames/songof7cities.txt"));
cl_git_pass(git_futils_writebuffer(&c1, "renames/untimely.txt", 0, 0));
cl_git_pass(
git_revparse_single((git_object **)&tree, g_repo, "HEAD^{tree}"));
cl_git_pass(git_repository_index(&index, g_repo));
cl_git_pass(git_index_read_tree(index, tree));
cl_git_pass(git_index_remove_bypath(index, "songof7cities.txt"));
cl_git_pass(git_index_add_bypath(index, "untimely.txt"));
diffopts.flags = GIT_DIFF_INCLUDE_UNMODIFIED;
cl_git_pass(git_diff_tree_to_index(&diff, g_repo, tree, index, &diffopts));
memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach(
diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp));
cl_assert_equal_i(4, exp.files);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_DELETED]);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_MODIFIED]);
cl_assert_equal_i(2, exp.file_status[GIT_DELTA_UNMODIFIED]);
opts.flags = GIT_DIFF_FIND_ALL;
cl_git_pass(git_diff_find_similar(diff, &opts));
memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach(
diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp));
cl_assert_equal_i(4, exp.files);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_DELETED]);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_RENAMED]);
cl_assert_equal_i(2, exp.file_status[GIT_DELTA_UNMODIFIED]);
git_diff_list_free(diff);
git_tree_free(tree);
git_index_free(index);
git_buf_free(&c1);
}
......@@ -73,3 +73,40 @@ void test_object_raw_convert__succeed_on_oid_to_string_conversion_big(void)
cl_assert(str && str == big && *(str+GIT_OID_HEXSZ+2) == 'Y');
cl_assert(str && str == big && *(str+GIT_OID_HEXSZ+3) == 'Z');
}
static void check_partial_oid(
char *buffer, size_t count, const git_oid *oid, const char *expected)
{
git_oid_nfmt(buffer, count, oid);
buffer[count] = '\0';
cl_assert_equal_s(expected, buffer);
}
void test_object_raw_convert__convert_oid_partially(void)
{
const char *exp = "16a0123456789abcdef4b775213c23a8bd74f5e0";
git_oid in;
char big[GIT_OID_HEXSZ + 1 + 3]; /* note + 4 => big buffer */
cl_git_pass(git_oid_fromstr(&in, exp));
git_oid_nfmt(big, sizeof(big), &in);
cl_assert_equal_s(exp, big);
git_oid_nfmt(big, GIT_OID_HEXSZ + 1, &in);
cl_assert_equal_s(exp, big);
check_partial_oid(big, 1, &in, "1");
check_partial_oid(big, 2, &in, "16");
check_partial_oid(big, 3, &in, "16a");
check_partial_oid(big, 4, &in, "16a0");
check_partial_oid(big, 5, &in, "16a01");
check_partial_oid(big, GIT_OID_HEXSZ, &in, exp);
check_partial_oid(
big, GIT_OID_HEXSZ - 1, &in, "16a0123456789abcdef4b775213c23a8bd74f5e");
check_partial_oid(
big, GIT_OID_HEXSZ - 2, &in, "16a0123456789abcdef4b775213c23a8bd74f5");
check_partial_oid(
big, GIT_OID_HEXSZ - 3, &in, "16a0123456789abcdef4b775213c23a8bd74f");
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment