Commit 30caf0cf by Vicent Martí

Merge pull request #1595 from arrbee/even-more-rename-fixes

Even more rename detection fixes
parents 87a56fe0 49f70f2c
...@@ -84,6 +84,10 @@ static int check_uint16_param(const char *arg, const char *pattern, uint16_t *va ...@@ -84,6 +84,10 @@ static int check_uint16_param(const char *arg, const char *pattern, uint16_t *va
char *endptr = NULL; char *endptr = NULL;
if (strncmp(arg, pattern, len)) if (strncmp(arg, pattern, len))
return 0; return 0;
if (arg[len] == '\0' && pattern[len - 1] != '=')
return 1;
if (arg[len] == '=')
len++;
strval = strtoul(arg + len, &endptr, 0); strval = strtoul(arg + len, &endptr, 0);
if (endptr == arg) if (endptr == arg)
return 0; return 0;
...@@ -110,13 +114,20 @@ static void usage(const char *message, const char *arg) ...@@ -110,13 +114,20 @@ static void usage(const char *message, const char *arg)
exit(1); exit(1);
} }
enum {
FORMAT_PATCH = 0,
FORMAT_COMPACT = 1,
FORMAT_RAW = 2
};
int main(int argc, char *argv[]) int main(int argc, char *argv[])
{ {
git_repository *repo = NULL; git_repository *repo = NULL;
git_tree *t1 = NULL, *t2 = NULL; git_tree *t1 = NULL, *t2 = NULL;
git_diff_options opts = GIT_DIFF_OPTIONS_INIT; git_diff_options opts = GIT_DIFF_OPTIONS_INIT;
git_diff_find_options findopts = GIT_DIFF_FIND_OPTIONS_INIT;
git_diff_list *diff; git_diff_list *diff;
int i, color = -1, compact = 0, cached = 0; int i, color = -1, format = FORMAT_PATCH, cached = 0;
char *a, *treeish1 = NULL, *treeish2 = NULL; char *a, *treeish1 = NULL, *treeish2 = NULL;
const char *dir = "."; const char *dir = ".";
...@@ -137,11 +148,13 @@ int main(int argc, char *argv[]) ...@@ -137,11 +148,13 @@ int main(int argc, char *argv[])
} }
else if (!strcmp(a, "-p") || !strcmp(a, "-u") || else if (!strcmp(a, "-p") || !strcmp(a, "-u") ||
!strcmp(a, "--patch")) !strcmp(a, "--patch"))
compact = 0; format = FORMAT_PATCH;
else if (!strcmp(a, "--cached")) else if (!strcmp(a, "--cached"))
cached = 1; cached = 1;
else if (!strcmp(a, "--name-status")) else if (!strcmp(a, "--name-status"))
compact = 1; format = FORMAT_COMPACT;
else if (!strcmp(a, "--raw"))
format = FORMAT_RAW;
else if (!strcmp(a, "--color")) else if (!strcmp(a, "--color"))
color = 0; color = 0;
else if (!strcmp(a, "--no-color")) else if (!strcmp(a, "--no-color"))
...@@ -160,6 +173,20 @@ int main(int argc, char *argv[]) ...@@ -160,6 +173,20 @@ int main(int argc, char *argv[])
opts.flags |= GIT_DIFF_INCLUDE_IGNORED; opts.flags |= GIT_DIFF_INCLUDE_IGNORED;
else if (!strcmp(a, "--untracked")) else if (!strcmp(a, "--untracked"))
opts.flags |= GIT_DIFF_INCLUDE_UNTRACKED; opts.flags |= GIT_DIFF_INCLUDE_UNTRACKED;
else if (check_uint16_param(a, "-M", &findopts.rename_threshold) ||
check_uint16_param(a, "--find-renames",
&findopts.rename_threshold))
findopts.flags |= GIT_DIFF_FIND_RENAMES;
else if (check_uint16_param(a, "-C", &findopts.copy_threshold) ||
check_uint16_param(a, "--find-copies",
&findopts.copy_threshold))
findopts.flags |= GIT_DIFF_FIND_COPIES;
else if (!strcmp(a, "--find-copies-harder"))
findopts.flags |= GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED;
else if (!strncmp(a, "-B", 2) || !strncmp(a, "--break-rewrites", 16)) {
/* TODO: parse thresholds */
findopts.flags |= GIT_DIFF_FIND_REWRITES;
}
else if (!check_uint16_param(a, "-U", &opts.context_lines) && else if (!check_uint16_param(a, "-U", &opts.context_lines) &&
!check_uint16_param(a, "--unified=", &opts.context_lines) && !check_uint16_param(a, "--unified=", &opts.context_lines) &&
!check_uint16_param(a, "--inter-hunk-context=", !check_uint16_param(a, "--inter-hunk-context=",
...@@ -204,13 +231,24 @@ int main(int argc, char *argv[]) ...@@ -204,13 +231,24 @@ int main(int argc, char *argv[])
else else
check(git_diff_index_to_workdir(&diff, repo, NULL, &opts), "Diff"); check(git_diff_index_to_workdir(&diff, repo, NULL, &opts), "Diff");
if ((findopts.flags & GIT_DIFF_FIND_ALL) != 0)
check(git_diff_find_similar(diff, &findopts),
"finding renames and copies ");
if (color >= 0) if (color >= 0)
fputs(colors[0], stdout); fputs(colors[0], stdout);
if (compact) switch (format) {
check(git_diff_print_compact(diff, printer, &color), "Displaying diff"); case FORMAT_PATCH:
else
check(git_diff_print_patch(diff, printer, &color), "Displaying diff"); check(git_diff_print_patch(diff, printer, &color), "Displaying diff");
break;
case FORMAT_COMPACT:
check(git_diff_print_compact(diff, printer, &color), "Displaying diff");
break;
case FORMAT_RAW:
check(git_diff_print_raw(diff, printer, &color), "Displaying diff");
break;
}
if (color >= 0) if (color >= 0)
fputs(colors[0], stdout); fputs(colors[0], stdout);
......
...@@ -243,6 +243,19 @@ typedef struct { ...@@ -243,6 +243,19 @@ typedef struct {
* `NOT_BINARY` flag set to avoid examining file contents if you do not pass * `NOT_BINARY` flag set to avoid examining file contents if you do not pass
* in hunk and/or line callbacks to the diff foreach iteration function. It * in hunk and/or line callbacks to the diff foreach iteration function. It
* will just use the git attributes for those files. * will just use the git attributes for those files.
*
* The similarity score is zero unless you call `git_diff_find_similar()`
* which does a similarity analysis of files in the diff. Use that
* function to do rename and copy detection, and to split heavily modified
* files in add/delete pairs. After that call, deltas with a status of
* GIT_DELTA_RENAMED or GIT_DELTA_COPIED will have a similarity score
* between 0 and 100 indicating how similar the old and new sides are.
*
* If you ask `git_diff_find_similar` to find heavily modified files to
* break, but to not *actually* break the records, then GIT_DELTA_MODIFIED
* records may have a non-zero similarity score if the self-similarity is
* below the split threshold. To display this value like core Git, invert
* the score (a la `printf("M%03d", 100 - delta->similarity)`).
*/ */
typedef struct { typedef struct {
git_diff_file old_file; git_diff_file old_file;
...@@ -408,18 +421,28 @@ typedef enum { ...@@ -408,18 +421,28 @@ typedef enum {
/** consider unmodified as copy sources? (`--find-copies-harder`) */ /** consider unmodified as copy sources? (`--find-copies-harder`) */
GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED = (1 << 3), GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED = (1 << 3),
/** split large rewrites into delete/add pairs (`--break-rewrites=/M`) */ /** mark large rewrites for split (`--break-rewrites=/M`) */
GIT_DIFF_FIND_AND_BREAK_REWRITES = (1 << 4), GIT_DIFF_FIND_REWRITES = (1 << 4),
/** actually split large rewrites into delete/add pairs */
GIT_DIFF_BREAK_REWRITES = (1 << 5),
/** mark rewrites for split and break into delete/add pairs */
GIT_DIFF_FIND_AND_BREAK_REWRITES =
(GIT_DIFF_FIND_REWRITES | GIT_DIFF_BREAK_REWRITES),
/** find renames/copies for untracked items in working directory */
GIT_DIFF_FIND_FOR_UNTRACKED = (1 << 6),
/** turn on all finding features */ /** turn on all finding features */
GIT_DIFF_FIND_ALL = (0x1f), GIT_DIFF_FIND_ALL = (0x0ff),
/** measure similarity ignoring leading whitespace (default) */ /** measure similarity ignoring leading whitespace (default) */
GIT_DIFF_FIND_IGNORE_LEADING_WHITESPACE = 0, GIT_DIFF_FIND_IGNORE_LEADING_WHITESPACE = 0,
/** measure similarity ignoring all whitespace */ /** measure similarity ignoring all whitespace */
GIT_DIFF_FIND_IGNORE_WHITESPACE = (1 << 6), GIT_DIFF_FIND_IGNORE_WHITESPACE = (1 << 12),
/** measure similarity including all data */ /** measure similarity including all data */
GIT_DIFF_FIND_DONT_IGNORE_WHITESPACE = (1 << 7), GIT_DIFF_FIND_DONT_IGNORE_WHITESPACE = (1 << 13),
/** measure similarity only by comparing SHAs (fast and cheap) */
GIT_DIFF_FIND_EXACT_MATCH_ONLY = (1 << 14),
} git_diff_find_t; } git_diff_find_t;
/** /**
...@@ -446,7 +469,10 @@ typedef struct { ...@@ -446,7 +469,10 @@ typedef struct {
* - `copy_threshold` is the same as the -C option with a value * - `copy_threshold` is the same as the -C option with a value
* - `rename_from_rewrite_threshold` matches the top of the -B option * - `rename_from_rewrite_threshold` matches the top of the -B option
* - `break_rewrite_threshold` matches the bottom of the -B option * - `break_rewrite_threshold` matches the bottom of the -B option
* - `target_limit` matches the -l option * - `rename_limit` is the maximum number of matches to consider for
* a particular file. This is a little different from the `-l` option
* to regular Git because we will still process up to this many matches
* before abandoning the search.
* *
* The `metric` option allows you to plug in a custom similarity metric. * The `metric` option allows you to plug in a custom similarity metric.
* Set it to NULL for the default internal metric which is based on sampling * Set it to NULL for the default internal metric which is based on sampling
...@@ -458,21 +484,21 @@ typedef struct { ...@@ -458,21 +484,21 @@ typedef struct {
unsigned int version; unsigned int version;
/** Combination of git_diff_find_t values (default FIND_RENAMES) */ /** Combination of git_diff_find_t values (default FIND_RENAMES) */
unsigned int flags; uint32_t flags;
/** Similarity to consider a file renamed (default 50) */ /** Similarity to consider a file renamed (default 50) */
unsigned int rename_threshold; uint16_t rename_threshold;
/** Similarity of modified to be eligible rename source (default 50) */ /** Similarity of modified to be eligible rename source (default 50) */
unsigned int rename_from_rewrite_threshold; uint16_t rename_from_rewrite_threshold;
/** Similarity to consider a file a copy (default 50) */ /** Similarity to consider a file a copy (default 50) */
unsigned int copy_threshold; uint16_t copy_threshold;
/** Similarity to split modify into delete/add pair (default 60) */ /** Similarity to split modify into delete/add pair (default 60) */
unsigned int break_rewrite_threshold; uint16_t break_rewrite_threshold;
/** Maximum similarity sources to examine (a la diff's `-l` option or /** Maximum similarity sources to examine for a file (somewhat like
* the `diff.renameLimit` config) (default 200) * git-diff's `-l` option or `diff.renameLimit` config) (default 200)
*/ */
unsigned int target_limit; size_t rename_limit;
/** Pluggable similarity metric; pass NULL to use internal metric */ /** Pluggable similarity metric; pass NULL to use internal metric */
git_diff_similarity_metric *metric; git_diff_similarity_metric *metric;
...@@ -689,6 +715,22 @@ GIT_EXTERN(int) git_diff_print_compact( ...@@ -689,6 +715,22 @@ GIT_EXTERN(int) git_diff_print_compact(
void *payload); void *payload);
/** /**
* Iterate over a diff generating text output like "git diff --raw".
*
* Returning a non-zero value from the callbacks will terminate the
* iteration and cause this return `GIT_EUSER`.
*
* @param diff A git_diff_list generated by one of the above functions.
* @param print_cb Callback to make per line of diff text.
* @param payload Reference pointer that will be passed to your callback.
* @return 0 on success, GIT_EUSER on non-zero callback, or error code
*/
GIT_EXTERN(int) git_diff_print_raw(
git_diff_list *diff,
git_diff_data_cb print_cb,
void *payload);
/**
* Look up the single character abbreviation for a delta status code. * Look up the single character abbreviation for a delta status code.
* *
* When you call `git_diff_print_compact` it prints single letter codes into * When you call `git_diff_print_compact` it prints single letter codes into
......
...@@ -90,6 +90,17 @@ GIT_EXTERN(void) git_oid_fromraw(git_oid *out, const unsigned char *raw); ...@@ -90,6 +90,17 @@ GIT_EXTERN(void) git_oid_fromraw(git_oid *out, const unsigned char *raw);
GIT_EXTERN(void) git_oid_fmt(char *out, const git_oid *id); GIT_EXTERN(void) git_oid_fmt(char *out, const git_oid *id);
/** /**
* Format a git_oid into a partial hex string.
*
* @param out output hex string; you say how many bytes to write.
* If the number of bytes is > GIT_OID_HEXSZ, extra bytes
* will be zeroed; if not, a '\0' terminator is NOT added.
* @param n number of characters to write into out string
* @param oid oid structure to format.
*/
GIT_EXTERN(void) git_oid_nfmt(char *out, size_t n, const git_oid *id);
/**
* Format a git_oid into a loose-object path string. * Format a git_oid into a loose-object path string.
* *
* The resulting string is "aa/...", where "aa" is the first two * The resulting string is "aa/...", where "aa" is the first two
...@@ -117,10 +128,12 @@ GIT_EXTERN(char *) git_oid_allocfmt(const git_oid *id); ...@@ -117,10 +128,12 @@ GIT_EXTERN(char *) git_oid_allocfmt(const git_oid *id);
* Format a git_oid into a buffer as a hex format c-string. * Format a git_oid into a buffer as a hex format c-string.
* *
* If the buffer is smaller than GIT_OID_HEXSZ+1, then the resulting * If the buffer is smaller than GIT_OID_HEXSZ+1, then the resulting
* oid c-string will be truncated to n-1 characters. If there are * oid c-string will be truncated to n-1 characters (but will still be
* any input parameter errors (out == NULL, n == 0, oid == NULL), * NUL-byte terminated).
* then a pointer to an empty string is returned, so that the return *
* value can always be printed. * If there are any input parameter errors (out == NULL, n == 0, oid ==
* NULL), then a pointer to an empty string is returned, so that the
* return value can always be printed.
* *
* @param out the buffer into which the oid string is output. * @param out the buffer into which the oid string is output.
* @param n the size of the out buffer. * @param n the size of the out buffer.
......
...@@ -676,33 +676,26 @@ static int buffer_to_file( ...@@ -676,33 +676,26 @@ static int buffer_to_file(
int file_open_flags, int file_open_flags,
mode_t file_mode) mode_t file_mode)
{ {
int fd, error; int error;
if ((error = git_futils_mkpath2file(path, dir_mode)) < 0) if ((error = git_futils_mkpath2file(path, dir_mode)) < 0)
return error; return error;
if ((fd = p_open(path, file_open_flags, file_mode)) < 0) { if ((error = git_futils_writebuffer(
giterr_set(GITERR_OS, "Could not open '%s' for writing", path); buffer, path, file_open_flags, file_mode)) < 0)
return fd; return error;
}
if ((error = p_write(fd, git_buf_cstr(buffer), git_buf_len(buffer))) < 0) {
giterr_set(GITERR_OS, "Could not write to '%s'", path);
(void)p_close(fd);
} else {
if ((error = p_close(fd)) < 0)
giterr_set(GITERR_OS, "Error while closing '%s'", path);
if ((error = p_stat(path, st)) < 0) if (st != NULL && (error = p_stat(path, st)) < 0) {
giterr_set(GITERR_OS, "Error while statting '%s'", path); giterr_set(GITERR_OS, "Error while statting '%s'", path);
return error;
} }
if (!error && if ((file_mode & 0100) != 0 && (error = p_chmod(path, file_mode)) < 0) {
(file_mode & 0100) != 0 &&
(error = p_chmod(path, file_mode)) < 0)
giterr_set(GITERR_OS, "Failed to set permissions on '%s'", path); giterr_set(GITERR_OS, "Failed to set permissions on '%s'", path);
return error; return error;
}
return 0;
} }
static int blob_content_to_file( static int blob_content_to_file(
......
...@@ -231,10 +231,23 @@ static char *diff_strdup_prefix(git_pool *pool, const char *prefix) ...@@ -231,10 +231,23 @@ static char *diff_strdup_prefix(git_pool *pool, const char *prefix)
return git_pool_strndup(pool, prefix, len + 1); return git_pool_strndup(pool, prefix, len + 1);
} }
GIT_INLINE(const char *) diff_delta__path(const git_diff_delta *delta)
{
const char *str = delta->old_file.path;
if (!str ||
delta->status == GIT_DELTA_ADDED ||
delta->status == GIT_DELTA_RENAMED ||
delta->status == GIT_DELTA_COPIED)
str = delta->new_file.path;
return str;
}
int git_diff_delta__cmp(const void *a, const void *b) int git_diff_delta__cmp(const void *a, const void *b)
{ {
const git_diff_delta *da = a, *db = b; const git_diff_delta *da = a, *db = b;
int val = strcmp(da->old_file.path, db->old_file.path); int val = strcmp(diff_delta__path(da), diff_delta__path(db));
return val ? val : ((int)da->status - (int)db->status); return val ? val : ((int)da->status - (int)db->status);
} }
......
...@@ -34,10 +34,18 @@ enum { ...@@ -34,10 +34,18 @@ enum {
GIT_DIFF_FLAG__FREE_DATA = (1 << 8), /* internal file data is allocated */ GIT_DIFF_FLAG__FREE_DATA = (1 << 8), /* internal file data is allocated */
GIT_DIFF_FLAG__UNMAP_DATA = (1 << 9), /* internal file data is mmap'ed */ GIT_DIFF_FLAG__UNMAP_DATA = (1 << 9), /* internal file data is mmap'ed */
GIT_DIFF_FLAG__NO_DATA = (1 << 10), /* file data should not be loaded */ GIT_DIFF_FLAG__NO_DATA = (1 << 10), /* file data should not be loaded */
GIT_DIFF_FLAG__TO_DELETE = (1 << 11), /* delete entry during rename det. */
GIT_DIFF_FLAG__TO_SPLIT = (1 << 12), /* split entry during rename det. */ GIT_DIFF_FLAG__TO_DELETE = (1 << 16), /* delete entry during rename det. */
GIT_DIFF_FLAG__TO_SPLIT = (1 << 17), /* split entry during rename det. */
GIT_DIFF_FLAG__IS_RENAME_TARGET = (1 << 18),
GIT_DIFF_FLAG__IS_RENAME_SOURCE = (1 << 19),
GIT_DIFF_FLAG__HAS_SELF_SIMILARITY = (1 << 20),
}; };
#define GIT_DIFF_FLAG__CLEAR_INTERNAL(F) (F) = ((F) & 0x00FFFF)
#define GIT_DIFF__VERBOSE (1 << 30)
struct git_diff_list { struct git_diff_list {
git_refcount rc; git_refcount rc;
git_repository *repo; git_repository *repo;
......
...@@ -236,9 +236,8 @@ static int get_blob_content( ...@@ -236,9 +236,8 @@ static int get_blob_content(
char oidstr[GIT_OID_HEXSZ+1]; char oidstr[GIT_OID_HEXSZ+1];
git_buf content = GIT_BUF_INIT; git_buf content = GIT_BUF_INIT;
git_oid_fmt(oidstr, &file->oid); git_oid_tostr(oidstr, sizeof(oidstr), &file->oid);
oidstr[GIT_OID_HEXSZ] = 0; git_buf_printf(&content, "Subproject commit %s\n", oidstr);
git_buf_printf(&content, "Subproject commit %s\n", oidstr );
map->data = git_buf_detach(&content); map->data = git_buf_detach(&content);
map->len = strlen(map->data); map->len = strlen(map->data);
...@@ -318,14 +317,13 @@ static int get_workdir_sm_content( ...@@ -318,14 +317,13 @@ static int get_workdir_sm_content(
} }
} }
git_oid_fmt(oidstr, &file->oid); git_oid_tostr(oidstr, sizeof(oidstr), &file->oid);
oidstr[GIT_OID_HEXSZ] = '\0';
if (GIT_SUBMODULE_STATUS_IS_WD_DIRTY(sm_status)) if (GIT_SUBMODULE_STATUS_IS_WD_DIRTY(sm_status))
sm_status_text = "-dirty"; sm_status_text = "-dirty";
git_buf_printf(&content, "Subproject commit %s%s\n", git_buf_printf(
oidstr, sm_status_text); &content, "Subproject commit %s%s\n", oidstr, sm_status_text);
map->data = git_buf_detach(&content); map->data = git_buf_detach(&content);
map->len = strlen(map->data); map->len = strlen(map->data);
...@@ -1021,8 +1019,33 @@ typedef struct { ...@@ -1021,8 +1019,33 @@ typedef struct {
git_diff_data_cb print_cb; git_diff_data_cb print_cb;
void *payload; void *payload;
git_buf *buf; git_buf *buf;
int oid_strlen;
} diff_print_info; } diff_print_info;
static int diff_print_info_init(
diff_print_info *pi,
git_buf *out, git_diff_list *diff, git_diff_data_cb cb, void *payload)
{
assert(diff && diff->repo);
pi->diff = diff;
pi->print_cb = cb;
pi->payload = payload;
pi->buf = out;
if (git_repository__cvar(&pi->oid_strlen, diff->repo, GIT_CVAR_ABBREV) < 0)
return -1;
pi->oid_strlen += 1; /* for NUL byte */
if (pi->oid_strlen < 2)
pi->oid_strlen = 2;
else if (pi->oid_strlen > GIT_OID_HEXSZ + 1)
pi->oid_strlen = GIT_OID_HEXSZ + 1;
return 0;
}
static char pick_suffix(int mode) static char pick_suffix(int mode)
{ {
if (S_ISDIR(mode)) if (S_ISDIR(mode))
...@@ -1106,11 +1129,7 @@ int git_diff_print_compact( ...@@ -1106,11 +1129,7 @@ int git_diff_print_compact(
git_buf buf = GIT_BUF_INIT; git_buf buf = GIT_BUF_INIT;
diff_print_info pi; diff_print_info pi;
pi.diff = diff; if (!(error = diff_print_info_init(&pi, &buf, diff, print_cb, payload)))
pi.print_cb = print_cb;
pi.payload = payload;
pi.buf = &buf;
error = git_diff_foreach(diff, print_compact, NULL, NULL, &pi); error = git_diff_foreach(diff, print_compact, NULL, NULL, &pi);
git_buf_free(&buf); git_buf_free(&buf);
...@@ -1118,22 +1137,71 @@ int git_diff_print_compact( ...@@ -1118,22 +1137,71 @@ int git_diff_print_compact(
return error; return error;
} }
static int print_oid_range(diff_print_info *pi, const git_diff_delta *delta) static int print_raw(
const git_diff_delta *delta, float progress, void *data)
{ {
int abbrevlen; diff_print_info *pi = data;
char code = git_diff_status_char(delta->status);
char start_oid[GIT_OID_HEXSZ+1], end_oid[GIT_OID_HEXSZ+1]; char start_oid[GIT_OID_HEXSZ+1], end_oid[GIT_OID_HEXSZ+1];
if (git_repository__cvar(&abbrevlen, pi->diff->repo, GIT_CVAR_ABBREV) < 0) GIT_UNUSED(progress);
if (code == ' ')
return 0;
git_buf_clear(pi->buf);
git_oid_tostr(start_oid, pi->oid_strlen, &delta->old_file.oid);
git_oid_tostr(end_oid, pi->oid_strlen, &delta->new_file.oid);
git_buf_printf(
pi->buf, ":%06o %06o %s... %s... %c",
delta->old_file.mode, delta->new_file.mode, start_oid, end_oid, code);
if (delta->similarity > 0)
git_buf_printf(pi->buf, "%03u", delta->similarity);
if (delta->status == GIT_DELTA_RENAMED || delta->status == GIT_DELTA_COPIED)
git_buf_printf(
pi->buf, "\t%s %s\n", delta->old_file.path, delta->new_file.path);
else
git_buf_printf(
pi->buf, "\t%s\n", delta->old_file.path ?
delta->old_file.path : delta->new_file.path);
if (git_buf_oom(pi->buf))
return -1; return -1;
abbrevlen += 1; /* for NUL byte */ if (pi->print_cb(delta, NULL, GIT_DIFF_LINE_FILE_HDR,
if (abbrevlen < 2) git_buf_cstr(pi->buf), git_buf_len(pi->buf), pi->payload))
abbrevlen = 2; return callback_error();
else if (abbrevlen > (int)sizeof(start_oid))
abbrevlen = (int)sizeof(start_oid); return 0;
}
int git_diff_print_raw(
git_diff_list *diff,
git_diff_data_cb print_cb,
void *payload)
{
int error;
git_buf buf = GIT_BUF_INIT;
diff_print_info pi;
if (!(error = diff_print_info_init(&pi, &buf, diff, print_cb, payload)))
error = git_diff_foreach(diff, print_raw, NULL, NULL, &pi);
git_buf_free(&buf);
return error;
}
static int print_oid_range(diff_print_info *pi, const git_diff_delta *delta)
{
char start_oid[GIT_OID_HEXSZ+1], end_oid[GIT_OID_HEXSZ+1];
git_oid_tostr(start_oid, abbrevlen, &delta->old_file.oid); git_oid_tostr(start_oid, pi->oid_strlen, &delta->old_file.oid);
git_oid_tostr(end_oid, abbrevlen, &delta->new_file.oid); git_oid_tostr(end_oid, pi->oid_strlen, &delta->new_file.oid);
/* TODO: Match git diff more closely */ /* TODO: Match git diff more closely */
if (delta->old_file.mode == delta->new_file.mode) { if (delta->old_file.mode == delta->new_file.mode) {
...@@ -1289,11 +1357,7 @@ int git_diff_print_patch( ...@@ -1289,11 +1357,7 @@ int git_diff_print_patch(
git_buf buf = GIT_BUF_INIT; git_buf buf = GIT_BUF_INIT;
diff_print_info pi; diff_print_info pi;
pi.diff = diff; if (!(error = diff_print_info_init(&pi, &buf, diff, print_cb, payload)))
pi.print_cb = print_cb;
pi.payload = payload;
pi.buf = &buf;
error = git_diff_foreach( error = git_diff_foreach(
diff, print_patch_file, print_patch_hunk, print_patch_line, &pi); diff, print_patch_file, print_patch_hunk, print_patch_line, &pi);
...@@ -1736,11 +1800,8 @@ int git_diff_patch_print( ...@@ -1736,11 +1800,8 @@ int git_diff_patch_print(
assert(patch && print_cb); assert(patch && print_cb);
pi.diff = patch->diff; if (!(error = diff_print_info_init(
pi.print_cb = print_cb; &pi, &temp, patch->diff, print_cb, payload)))
pi.payload = payload;
pi.buf = &temp;
error = print_patch_file(patch->delta, 0, &pi); error = print_patch_file(patch->delta, 0, &pi);
for (h = 0; h < patch->hunks_size && !error; ++h) { for (h = 0; h < patch->hunks_size && !error; ++h) {
......
...@@ -18,12 +18,15 @@ static git_diff_delta *diff_delta__dup( ...@@ -18,12 +18,15 @@ static git_diff_delta *diff_delta__dup(
return NULL; return NULL;
memcpy(delta, d, sizeof(git_diff_delta)); memcpy(delta, d, sizeof(git_diff_delta));
GIT_DIFF_FLAG__CLEAR_INTERNAL(delta->flags);
if (d->old_file.path != NULL) {
delta->old_file.path = git_pool_strdup(pool, d->old_file.path); delta->old_file.path = git_pool_strdup(pool, d->old_file.path);
if (delta->old_file.path == NULL) if (delta->old_file.path == NULL)
goto fail; goto fail;
}
if (d->new_file.path != d->old_file.path) { if (d->new_file.path != d->old_file.path && d->new_file.path != NULL) {
delta->new_file.path = git_pool_strdup(pool, d->new_file.path); delta->new_file.path = git_pool_strdup(pool, d->new_file.path);
if (delta->new_file.path == NULL) if (delta->new_file.path == NULL)
goto fail; goto fail;
...@@ -220,7 +223,7 @@ int git_diff_find_similar__calc_similarity( ...@@ -220,7 +223,7 @@ int git_diff_find_similar__calc_similarity(
#define DEFAULT_THRESHOLD 50 #define DEFAULT_THRESHOLD 50
#define DEFAULT_BREAK_REWRITE_THRESHOLD 60 #define DEFAULT_BREAK_REWRITE_THRESHOLD 60
#define DEFAULT_TARGET_LIMIT 200 #define DEFAULT_RENAME_LIMIT 200
static int normalize_find_opts( static int normalize_find_opts(
git_diff_list *diff, git_diff_list *diff,
...@@ -253,12 +256,25 @@ static int normalize_find_opts( ...@@ -253,12 +256,25 @@ static int normalize_find_opts(
/* some flags imply others */ /* some flags imply others */
if (opts->flags & GIT_DIFF_FIND_EXACT_MATCH_ONLY) {
/* if we are only looking for exact matches, then don't turn
* MODIFIED items into ADD/DELETE pairs because it's too picky
*/
opts->flags &= ~(GIT_DIFF_FIND_REWRITES | GIT_DIFF_BREAK_REWRITES);
/* similarly, don't look for self-rewrites to split */
opts->flags &= ~GIT_DIFF_FIND_RENAMES_FROM_REWRITES;
}
if (opts->flags & GIT_DIFF_FIND_RENAMES_FROM_REWRITES) if (opts->flags & GIT_DIFF_FIND_RENAMES_FROM_REWRITES)
opts->flags |= GIT_DIFF_FIND_RENAMES; opts->flags |= GIT_DIFF_FIND_RENAMES;
if (opts->flags & GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED) if (opts->flags & GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED)
opts->flags |= GIT_DIFF_FIND_COPIES; opts->flags |= GIT_DIFF_FIND_COPIES;
if (opts->flags & GIT_DIFF_BREAK_REWRITES)
opts->flags |= GIT_DIFF_FIND_REWRITES;
#define USE_DEFAULT(X) ((X) == 0 || (X) > 100) #define USE_DEFAULT(X) ((X) == 0 || (X) > 100)
if (USE_DEFAULT(opts->rename_threshold)) if (USE_DEFAULT(opts->rename_threshold))
...@@ -275,15 +291,15 @@ static int normalize_find_opts( ...@@ -275,15 +291,15 @@ static int normalize_find_opts(
#undef USE_DEFAULT #undef USE_DEFAULT
if (!opts->target_limit) { if (!opts->rename_limit) {
int32_t limit = 0; int32_t limit = 0;
opts->target_limit = DEFAULT_TARGET_LIMIT; opts->rename_limit = DEFAULT_RENAME_LIMIT;
if (git_config_get_int32(&limit, cfg, "diff.renameLimit") < 0) if (git_config_get_int32(&limit, cfg, "diff.renameLimit") < 0)
giterr_clear(); giterr_clear();
else if (limit > 0) else if (limit > 0)
opts->target_limit = limit; opts->rename_limit = limit;
} }
/* assign the internal metric with whitespace flag as payload */ /* assign the internal metric with whitespace flag as payload */
...@@ -307,11 +323,12 @@ static int normalize_find_opts( ...@@ -307,11 +323,12 @@ static int normalize_find_opts(
return 0; return 0;
} }
static int apply_splits_and_deletes(git_diff_list *diff, size_t expected_size) static int apply_splits_and_deletes(
git_diff_list *diff, size_t expected_size, bool actually_split)
{ {
git_vector onto = GIT_VECTOR_INIT; git_vector onto = GIT_VECTOR_INIT;
size_t i; size_t i;
git_diff_delta *delta; git_diff_delta *delta, *deleted;
if (git_vector_init(&onto, expected_size, git_diff_delta__cmp) < 0) if (git_vector_init(&onto, expected_size, git_diff_delta__cmp) < 0)
return -1; return -1;
...@@ -321,9 +338,11 @@ static int apply_splits_and_deletes(git_diff_list *diff, size_t expected_size) ...@@ -321,9 +338,11 @@ static int apply_splits_and_deletes(git_diff_list *diff, size_t expected_size)
if ((delta->flags & GIT_DIFF_FLAG__TO_DELETE) != 0) if ((delta->flags & GIT_DIFF_FLAG__TO_DELETE) != 0)
continue; continue;
if ((delta->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0) { if ((delta->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0 && actually_split) {
git_diff_delta *deleted = diff_delta__dup(delta, &diff->pool); delta->similarity = 0;
if (!deleted)
/* make new record for DELETED side of split */
if (!(deleted = diff_delta__dup(delta, &diff->pool)))
goto on_error; goto on_error;
deleted->status = GIT_DELTA_DELETED; deleted->status = GIT_DELTA_DELETED;
...@@ -334,32 +353,46 @@ static int apply_splits_and_deletes(git_diff_list *diff, size_t expected_size) ...@@ -334,32 +353,46 @@ static int apply_splits_and_deletes(git_diff_list *diff, size_t expected_size)
if (git_vector_insert(&onto, deleted) < 0) if (git_vector_insert(&onto, deleted) < 0)
goto on_error; goto on_error;
if (diff->new_src == GIT_ITERATOR_TYPE_WORKDIR)
delta->status = GIT_DELTA_UNTRACKED;
else
delta->status = GIT_DELTA_ADDED; delta->status = GIT_DELTA_ADDED;
memset(&delta->old_file, 0, sizeof(delta->old_file)); memset(&delta->old_file, 0, sizeof(delta->old_file));
delta->old_file.path = delta->new_file.path; delta->old_file.path = delta->new_file.path;
delta->old_file.flags |= GIT_DIFF_FLAG_VALID_OID; delta->old_file.flags |= GIT_DIFF_FLAG_VALID_OID;
} }
/* clean up delta before inserting into new list */
GIT_DIFF_FLAG__CLEAR_INTERNAL(delta->flags);
if (delta->status != GIT_DELTA_COPIED &&
delta->status != GIT_DELTA_RENAMED &&
(delta->status != GIT_DELTA_MODIFIED || actually_split))
delta->similarity = 0;
/* insert into new list */
if (git_vector_insert(&onto, delta) < 0) if (git_vector_insert(&onto, delta) < 0)
goto on_error; goto on_error;
} }
/* cannot return an error past this point */ /* cannot return an error past this point */
git_vector_foreach(&diff->deltas, i, delta)
/* free deltas from old list that didn't make it to the new one */
git_vector_foreach(&diff->deltas, i, delta) {
if ((delta->flags & GIT_DIFF_FLAG__TO_DELETE) != 0) if ((delta->flags & GIT_DIFF_FLAG__TO_DELETE) != 0)
git__free(delta); git__free(delta);
}
/* swap new delta list into place */ /* swap new delta list into place */
git_vector_sort(&onto);
git_vector_swap(&diff->deltas, &onto); git_vector_swap(&diff->deltas, &onto);
git_vector_free(&onto); git_vector_free(&onto);
git_vector_sort(&diff->deltas);
return 0; return 0;
on_error: on_error:
git_vector_foreach(&onto, i, delta) git_vector_foreach(&onto, i, delta)
git__free(delta); git__free(delta);
git_vector_free(&onto); git_vector_free(&onto);
return -1; return -1;
...@@ -373,13 +406,13 @@ GIT_INLINE(git_diff_file *) similarity_get_file(git_diff_list *diff, size_t idx) ...@@ -373,13 +406,13 @@ GIT_INLINE(git_diff_file *) similarity_get_file(git_diff_list *diff, size_t idx)
static int similarity_calc( static int similarity_calc(
git_diff_list *diff, git_diff_list *diff,
git_diff_find_options *opts, const git_diff_find_options *opts,
size_t file_idx, size_t file_idx,
void **cache) void **cache)
{ {
int error = 0; int error = 0;
git_diff_file *file = similarity_get_file(diff, file_idx); git_diff_file *file = similarity_get_file(diff, file_idx);
git_iterator_type_t src = (file_idx & 1) ? diff->old_src : diff->new_src; git_iterator_type_t src = (file_idx & 1) ? diff->new_src : diff->old_src;
if (src == GIT_ITERATOR_TYPE_WORKDIR) { /* compute hashsig from file */ if (src == GIT_ITERATOR_TYPE_WORKDIR) { /* compute hashsig from file */
git_buf path = GIT_BUF_INIT; git_buf path = GIT_BUF_INIT;
...@@ -422,22 +455,56 @@ static int similarity_calc( ...@@ -422,22 +455,56 @@ static int similarity_calc(
return error; return error;
} }
#define FLAG_SET(opts,flag_name) (((opts)->flags & flag_name) != 0)
/* - score < 0 means files cannot be compared
* - score >= 100 means files are exact match
* - score == 0 means files are completely different
*/
static int similarity_measure( static int similarity_measure(
int *score,
git_diff_list *diff, git_diff_list *diff,
git_diff_find_options *opts, const git_diff_find_options *opts,
void **cache, void **cache,
size_t a_idx, size_t a_idx,
size_t b_idx) size_t b_idx)
{ {
int score = 0;
git_diff_file *a_file = similarity_get_file(diff, a_idx); git_diff_file *a_file = similarity_get_file(diff, a_idx);
git_diff_file *b_file = similarity_get_file(diff, b_idx); git_diff_file *b_file = similarity_get_file(diff, b_idx);
bool exact_match = FLAG_SET(opts, GIT_DIFF_FIND_EXACT_MATCH_ONLY);
*score = -1;
/* don't try to compare files of different types */
if (GIT_MODE_TYPE(a_file->mode) != GIT_MODE_TYPE(b_file->mode)) if (GIT_MODE_TYPE(a_file->mode) != GIT_MODE_TYPE(b_file->mode))
return 0; return 0;
if (git_oid__cmp(&a_file->oid, &b_file->oid) == 0) /* if exact match is requested, force calculation of missing OIDs */
return 100; if (exact_match) {
if (git_oid_iszero(&a_file->oid) &&
diff->old_src == GIT_ITERATOR_TYPE_WORKDIR &&
!git_diff__oid_for_file(diff->repo, a_file->path,
a_file->mode, a_file->size, &a_file->oid))
a_file->flags |= GIT_DIFF_FLAG_VALID_OID;
if (git_oid_iszero(&b_file->oid) &&
diff->new_src == GIT_ITERATOR_TYPE_WORKDIR &&
!git_diff__oid_for_file(diff->repo, b_file->path,
b_file->mode, b_file->size, &b_file->oid))
b_file->flags |= GIT_DIFF_FLAG_VALID_OID;
}
/* check OID match as a quick test */
if (git_oid__cmp(&a_file->oid, &b_file->oid) == 0) {
*score = 100;
return 0;
}
/* don't calculate signatures if we are doing exact match */
if (exact_match) {
*score = 0;
return 0;
}
/* update signature cache if needed */ /* update signature cache if needed */
if (!cache[a_idx] && similarity_calc(diff, opts, a_idx, cache) < 0) if (!cache[a_idx] && similarity_calc(diff, opts, a_idx, cache) < 0)
...@@ -450,231 +517,369 @@ static int similarity_measure( ...@@ -450,231 +517,369 @@ static int similarity_measure(
return 0; return 0;
/* compare signatures */ /* compare signatures */
if (opts->metric->similarity( return opts->metric->similarity(
&score, cache[a_idx], cache[b_idx], opts->metric->payload) < 0) score, cache[a_idx], cache[b_idx], opts->metric->payload);
return -1; }
static int calc_self_similarity(
git_diff_list *diff,
const git_diff_find_options *opts,
size_t delta_idx,
void **cache)
{
int error, similarity = -1;
git_diff_delta *delta = GIT_VECTOR_GET(&diff->deltas, delta_idx);
if ((delta->flags & GIT_DIFF_FLAG__HAS_SELF_SIMILARITY) != 0)
return 0;
error = similarity_measure(
&similarity, diff, opts, cache, 2 * delta_idx, 2 * delta_idx + 1);
if (error < 0)
return error;
if (similarity >= 0) {
delta->similarity = (uint32_t)similarity;
delta->flags |= GIT_DIFF_FLAG__HAS_SELF_SIMILARITY;
}
return 0;
}
static bool is_rename_target(
git_diff_list *diff,
const git_diff_find_options *opts,
size_t delta_idx,
void **cache)
{
git_diff_delta *delta = GIT_VECTOR_GET(&diff->deltas, delta_idx);
/* skip things that aren't plain blobs */
if (!GIT_MODE_ISBLOB(delta->new_file.mode))
return false;
/* only consider ADDED, RENAMED, COPIED, and split MODIFIED as
* targets; maybe include UNTRACKED and IGNORED if requested.
*/
switch (delta->status) {
case GIT_DELTA_UNMODIFIED:
case GIT_DELTA_DELETED:
return false;
case GIT_DELTA_MODIFIED:
if (!FLAG_SET(opts, GIT_DIFF_FIND_REWRITES) &&
!FLAG_SET(opts, GIT_DIFF_FIND_RENAMES_FROM_REWRITES))
return false;
if (calc_self_similarity(diff, opts, delta_idx, cache) < 0)
return false;
if (FLAG_SET(opts, GIT_DIFF_BREAK_REWRITES) &&
delta->similarity < opts->break_rewrite_threshold) {
delta->flags |= GIT_DIFF_FLAG__TO_SPLIT;
break;
}
if (FLAG_SET(opts, GIT_DIFF_FIND_RENAMES_FROM_REWRITES) &&
delta->similarity < opts->rename_from_rewrite_threshold)
break;
return false;
case GIT_DELTA_UNTRACKED:
case GIT_DELTA_IGNORED:
if (!FLAG_SET(opts, GIT_DIFF_FIND_FOR_UNTRACKED))
return false;
break;
default: /* all other status values should be checked */
break;
}
delta->flags |= GIT_DIFF_FLAG__IS_RENAME_TARGET;
return true;
}
static bool is_rename_source(
git_diff_list *diff,
const git_diff_find_options *opts,
size_t delta_idx,
void **cache)
{
git_diff_delta *delta = GIT_VECTOR_GET(&diff->deltas, delta_idx);
/* skip things that aren't blobs */
if (!GIT_MODE_ISBLOB(delta->old_file.mode))
return false;
switch (delta->status) {
case GIT_DELTA_ADDED:
case GIT_DELTA_UNTRACKED:
case GIT_DELTA_IGNORED:
return false;
case GIT_DELTA_DELETED:
case GIT_DELTA_TYPECHANGE:
break;
case GIT_DELTA_UNMODIFIED:
if (!FLAG_SET(opts, GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED))
return false;
break;
default: /* MODIFIED, RENAMED, COPIED */
/* if we're finding copies, this could be a source */
if (FLAG_SET(opts, GIT_DIFF_FIND_COPIES))
break;
/* otherwise, this is only a source if we can split it */
if (!FLAG_SET(opts, GIT_DIFF_FIND_REWRITES) &&
!FLAG_SET(opts, GIT_DIFF_FIND_RENAMES_FROM_REWRITES))
return false;
if (calc_self_similarity(diff, opts, delta_idx, cache) < 0)
return false;
if (FLAG_SET(opts, GIT_DIFF_BREAK_REWRITES) &&
delta->similarity < opts->break_rewrite_threshold) {
delta->flags |= GIT_DIFF_FLAG__TO_SPLIT;
break;
}
/* clip score */ if (FLAG_SET(opts, GIT_DIFF_FIND_RENAMES_FROM_REWRITES) &&
if (score < 0) delta->similarity < opts->rename_from_rewrite_threshold)
score = 0; break;
else if (score > 100)
score = 100; return false;
}
return score; delta->flags |= GIT_DIFF_FLAG__IS_RENAME_SOURCE;
return true;
} }
#define FLAG_SET(opts,flag_name) ((opts.flags & flag_name) != 0) GIT_INLINE(bool) delta_is_split(git_diff_delta *delta)
{
return (delta->status == GIT_DELTA_TYPECHANGE ||
(delta->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0);
}
GIT_INLINE(bool) delta_is_new_only(git_diff_delta *delta)
{
return (delta->status == GIT_DELTA_ADDED ||
delta->status == GIT_DELTA_UNTRACKED ||
delta->status == GIT_DELTA_IGNORED);
}
typedef struct {
uint32_t idx;
uint32_t similarity;
} diff_find_match;
int git_diff_find_similar( int git_diff_find_similar(
git_diff_list *diff, git_diff_list *diff,
git_diff_find_options *given_opts) git_diff_find_options *given_opts)
{ {
size_t i, j, cache_size, *matches; size_t i, j, cache_size;
int error = 0, similarity; int error = 0, similarity;
git_diff_delta *from, *to; git_diff_delta *from, *to;
git_diff_find_options opts; git_diff_find_options opts;
size_t tried_targets, num_rewrites = 0; size_t num_rewrites = 0, num_updates = 0;
void **cache; void **cache; /* cache of similarity metric file signatures */
diff_find_match *matches; /* cache of best matches */
if ((error = normalize_find_opts(diff, &opts, given_opts)) < 0) if ((error = normalize_find_opts(diff, &opts, given_opts)) < 0)
return error; return error;
/* TODO: maybe abort if deltas.length > target_limit ??? */ /* TODO: maybe abort if deltas.length > rename_limit ??? */
if (!git__is_uint32(diff->deltas.length))
return 0;
cache_size = diff->deltas.length * 2; /* must store b/c length may change */ cache_size = diff->deltas.length * 2; /* must store b/c length may change */
cache = git__calloc(cache_size, sizeof(void *)); cache = git__calloc(cache_size, sizeof(void *));
GITERR_CHECK_ALLOC(cache); GITERR_CHECK_ALLOC(cache);
matches = git__calloc(diff->deltas.length, sizeof(size_t)); matches = git__calloc(diff->deltas.length, sizeof(diff_find_match));
GITERR_CHECK_ALLOC(matches); GITERR_CHECK_ALLOC(matches);
/* first break MODIFIED records that are too different (if requested) */
if (FLAG_SET(opts, GIT_DIFF_FIND_AND_BREAK_REWRITES)) {
git_vector_foreach(&diff->deltas, i, from) {
if (from->status != GIT_DELTA_MODIFIED)
continue;
similarity = similarity_measure(
diff, &opts, cache, 2 * i, 2 * i + 1);
if (similarity < 0) {
error = similarity;
goto cleanup;
}
if ((unsigned int)similarity < opts.break_rewrite_threshold) {
from->flags |= GIT_DIFF_FLAG__TO_SPLIT;
num_rewrites++;
}
}
}
/* next find the most similar delta for each rename / copy candidate */ /* next find the most similar delta for each rename / copy candidate */
git_vector_foreach(&diff->deltas, i, from) { git_vector_foreach(&diff->deltas, i, to) {
tried_targets = 0; size_t tried_sources = 0;
/* skip things that aren't blobs */
if (GIT_MODE_TYPE(from->old_file.mode) !=
GIT_MODE_TYPE(GIT_FILEMODE_BLOB))
continue;
/* don't check UNMODIFIED files as source unless given option */ matches[i].idx = i;
if (from->status == GIT_DELTA_UNMODIFIED && matches[i].similarity = 0;
!FLAG_SET(opts, GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED))
continue;
/* skip all but DELETED files unless copy detection is on */ /* skip things that are not rename targets */
if (!FLAG_SET(opts, GIT_DIFF_FIND_COPIES) && if (!is_rename_target(diff, &opts, i, cache))
from->status != GIT_DELTA_DELETED &&
(from->flags & GIT_DIFF_FLAG__TO_SPLIT) == 0)
continue; continue;
git_vector_foreach(&diff->deltas, j, to) { git_vector_foreach(&diff->deltas, j, from) {
if (i == j) if (i == j)
continue; continue;
/* skip things that aren't blobs */ /* skip things that are not rename sources */
if (GIT_MODE_TYPE(to->new_file.mode) != if (!is_rename_source(diff, &opts, j, cache))
GIT_MODE_TYPE(GIT_FILEMODE_BLOB))
continue; continue;
switch (to->status) { /* cap on maximum targets we'll examine (per "to" file) */
case GIT_DELTA_ADDED: if (++tried_sources > opts.rename_limit)
case GIT_DELTA_UNTRACKED:
case GIT_DELTA_RENAMED:
case GIT_DELTA_COPIED:
break;
case GIT_DELTA_MODIFIED:
if ((to->flags & GIT_DIFF_FLAG__TO_SPLIT) == 0)
continue;
break; break;
default:
/* only the above status values should be checked */
continue;
}
/* cap on maximum files we'll examine (per "from" file) */
if (++tried_targets > opts.target_limit)
break;
/* calculate similarity and see if this pair beats the
* similarity score of the current best pair.
*/
similarity = similarity_measure(
diff, &opts, cache, 2 * i, 2 * j + 1);
if (similarity < 0) { /* calculate similarity for this pair and find best match */
error = similarity; if ((error = similarity_measure(
&similarity, diff, &opts, cache, 2 * j, 2 * i + 1)) < 0)
goto cleanup; goto cleanup;
if (similarity < 0) { /* not actually comparable */
--tried_sources;
continue;
} }
if (to->similarity < (unsigned int)similarity) { if (matches[i].similarity < (uint32_t)similarity) {
to->similarity = (unsigned int)similarity; matches[i].similarity = (uint32_t)similarity;
matches[j] = i + 1; matches[i].idx = j;
} }
} }
} }
/* next rewrite the diffs with renames / copies */ /* next rewrite the diffs with renames / copies */
git_vector_foreach(&diff->deltas, j, to) { git_vector_foreach(&diff->deltas, i, to) {
if (!matches[j]) {
assert(to->similarity == 0); /* check if this delta was matched to another one */
if ((similarity = (int)matches[i].similarity) <= 0)
continue; continue;
} assert(to && (to->flags & GIT_DIFF_FLAG__IS_RENAME_TARGET) != 0);
i = matches[j] - 1; from = GIT_VECTOR_GET(&diff->deltas, matches[i].idx);
from = GIT_VECTOR_GET(&diff->deltas, i); assert(from && (from->flags & GIT_DIFF_FLAG__IS_RENAME_SOURCE) != 0);
assert(from);
/* four possible outcomes here: /* possible scenarios:
* 1. old DELETED and if over rename threshold, * 1. from DELETE to ADD/UNTRACK/IGNORE = RENAME
* new becomes RENAMED and old goes away * 2. from DELETE to SPLIT/TYPECHANGE = RENAME + DELETE
* 2. old SPLIT and if over rename threshold, * 3. from SPLIT/TYPECHANGE to ADD/UNTRACK/IGNORE = ADD + RENAME
* new becomes RENAMED and old becomes ADDED (clear SPLIT) * 4. from SPLIT/TYPECHANGE to SPLIT/TYPECHANGE = RENAME + SPLIT
* 3. old was MODIFIED but FIND_RENAMES_FROM_REWRITES is on and * 5. from OTHER to ADD/UNTRACK/IGNORE = OTHER + COPY
* old is more similar to new than it is to itself, in which
* case, new becomes RENAMED and old becomed ADDED
* 4. otherwise if over copy threshold, new becomes COPIED
*/ */
if (from->status == GIT_DELTA_DELETED) { if (from->status == GIT_DELTA_DELETED) {
if (to->similarity < opts.rename_threshold) {
to->similarity = 0; if (delta_is_new_only(to)) {
if (similarity < (int)opts.rename_threshold)
continue; continue;
}
to->status = GIT_DELTA_RENAMED; from->status = GIT_DELTA_RENAMED;
memcpy(&to->old_file, &from->old_file, sizeof(to->old_file)); from->similarity = (uint32_t)similarity;
memcpy(&from->new_file, &to->new_file, sizeof(from->new_file));
to->flags |= GIT_DIFF_FLAG__TO_DELETE;
from->flags |= GIT_DIFF_FLAG__TO_DELETE;
num_rewrites++; num_rewrites++;
} else {
assert(delta_is_split(to));
if (similarity < (int)opts.rename_from_rewrite_threshold)
continue; continue;
from->status = GIT_DELTA_RENAMED;
from->similarity = (uint32_t)similarity;
memcpy(&from->new_file, &to->new_file, sizeof(from->new_file));
to->status = GIT_DELTA_DELETED;
memset(&to->new_file, 0, sizeof(to->new_file));
to->new_file.path = to->old_file.path;
to->new_file.flags |= GIT_DIFF_FLAG_VALID_OID;
if ((to->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0) {
to->flags &= ~GIT_DIFF_FLAG__TO_SPLIT;
num_rewrites--;
} }
if (from->status == GIT_DELTA_MODIFIED && num_updates++;
(from->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0) }
{
if (to->similarity < opts.rename_threshold) {
to->similarity = 0;
continue;
} }
to->status = GIT_DELTA_RENAMED; else if (delta_is_split(from)) {
memcpy(&to->old_file, &from->old_file, sizeof(to->old_file)); git_diff_file swap;
if (delta_is_new_only(to)) {
from->status = GIT_DELTA_ADDED; if (similarity < (int)opts.rename_threshold)
continue;
memcpy(&swap, &from->new_file, sizeof(swap));
from->status = GIT_DELTA_RENAMED;
from->similarity = (uint32_t)similarity;
memcpy(&from->new_file, &to->new_file, sizeof(from->new_file));
if ((from->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0) {
from->flags &= ~GIT_DIFF_FLAG__TO_SPLIT; from->flags &= ~GIT_DIFF_FLAG__TO_SPLIT;
memset(&from->old_file, 0, sizeof(from->old_file));
num_rewrites--; num_rewrites--;
}
to->status = (diff->new_src == GIT_ITERATOR_TYPE_WORKDIR) ?
GIT_DELTA_UNTRACKED : GIT_DELTA_ADDED;
memcpy(&to->new_file, &swap, sizeof(to->new_file));
to->old_file.path = to->new_file.path;
num_updates++;
} else {
assert(delta_is_split(from));
if (similarity < (int)opts.rename_from_rewrite_threshold)
continue; continue;
}
if (from->status == GIT_DELTA_MODIFIED && memcpy(&swap, &to->new_file, sizeof(swap));
FLAG_SET(opts, GIT_DIFF_FIND_RENAMES_FROM_REWRITES) &&
to->similarity > opts.rename_threshold)
{
similarity = similarity_measure(
diff, &opts, cache, 2 * i, 2 * i + 1);
if (similarity < 0) { to->status = GIT_DELTA_RENAMED;
error = similarity; to->similarity = (uint32_t)similarity;
goto cleanup; memcpy(&to->new_file, &from->new_file, sizeof(to->new_file));
if ((to->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0) {
to->flags &= ~GIT_DIFF_FLAG__TO_SPLIT;
num_rewrites--;
} }
if ((unsigned int)similarity < opts.rename_from_rewrite_threshold) { memcpy(&from->new_file, &swap, sizeof(from->new_file));
to->status = GIT_DELTA_RENAMED; if ((from->flags & GIT_DIFF_FLAG__TO_SPLIT) == 0) {
memcpy(&to->old_file, &from->old_file, sizeof(to->old_file)); from->flags |= GIT_DIFF_FLAG__TO_SPLIT;
num_rewrites++;
}
from->status = GIT_DELTA_ADDED; /* in the off chance that we've just swapped the new
memset(&from->old_file, 0, sizeof(from->old_file)); * element into the correct place, clear the SPLIT flag
from->old_file.path = to->old_file.path; */
from->old_file.flags |= GIT_DIFF_FLAG_VALID_OID; if (matches[matches[i].idx].idx == i &&
matches[matches[i].idx].similarity >
opts.rename_from_rewrite_threshold) {
from->status = GIT_DELTA_RENAMED;
from->similarity =
(uint32_t)matches[matches[i].idx].similarity;
matches[matches[i].idx].similarity = 0;
from->flags &= ~GIT_DIFF_FLAG__TO_SPLIT;
num_rewrites--;
}
continue; num_updates++;
} }
} }
if (to->similarity < opts.copy_threshold) { else if (delta_is_new_only(to)) {
to->similarity = 0; if (!FLAG_SET(&opts, GIT_DIFF_FIND_COPIES) ||
similarity < (int)opts.copy_threshold)
continue; continue;
}
/* convert "to" to a COPIED record */
to->status = GIT_DELTA_COPIED; to->status = GIT_DELTA_COPIED;
to->similarity = (uint32_t)similarity;
memcpy(&to->old_file, &from->old_file, sizeof(to->old_file)); memcpy(&to->old_file, &from->old_file, sizeof(to->old_file));
}
if (num_rewrites > 0) { num_updates++;
assert(num_rewrites < diff->deltas.length); }
}
if (num_rewrites > 0 || num_updates > 0)
error = apply_splits_and_deletes( error = apply_splits_and_deletes(
diff, diff->deltas.length - num_rewrites); diff, diff->deltas.length - num_rewrites,
} FLAG_SET(&opts, GIT_DIFF_BREAK_REWRITES));
cleanup: cleanup:
git__free(matches); git__free(matches);
......
...@@ -202,6 +202,32 @@ int git_futils_readbuffer(git_buf *buf, const char *path) ...@@ -202,6 +202,32 @@ int git_futils_readbuffer(git_buf *buf, const char *path)
return git_futils_readbuffer_updated(buf, path, NULL, NULL, NULL); return git_futils_readbuffer_updated(buf, path, NULL, NULL, NULL);
} }
int git_futils_writebuffer(
const git_buf *buf, const char *path, int flags, mode_t mode)
{
int fd, error = 0;
if (flags <= 0)
flags = O_CREAT | O_TRUNC | O_WRONLY;
if (!mode)
mode = GIT_FILEMODE_BLOB;
if ((fd = p_open(path, flags, mode)) < 0) {
giterr_set(GITERR_OS, "Could not open '%s' for writing", path);
return fd;
}
if ((error = p_write(fd, git_buf_cstr(buf), git_buf_len(buf))) < 0) {
giterr_set(GITERR_OS, "Could not write to '%s'", path);
(void)p_close(fd);
}
if ((error = p_close(fd)) < 0)
giterr_set(GITERR_OS, "Error while closing '%s'", path);
return error;
}
int git_futils_mv_withpath(const char *from, const char *to, const mode_t dirmode) int git_futils_mv_withpath(const char *from, const char *to, const mode_t dirmode)
{ {
if (git_futils_mkpath2file(to, dirmode) < 0) if (git_futils_mkpath2file(to, dirmode) < 0)
......
...@@ -22,6 +22,9 @@ extern int git_futils_readbuffer_updated( ...@@ -22,6 +22,9 @@ extern int git_futils_readbuffer_updated(
git_buf *obj, const char *path, time_t *mtime, size_t *size, int *updated); git_buf *obj, const char *path, time_t *mtime, size_t *size, int *updated);
extern int git_futils_readbuffer_fd(git_buf *obj, git_file fd, size_t len); extern int git_futils_readbuffer_fd(git_buf *obj, git_file fd, size_t len);
extern int git_futils_writebuffer(
const git_buf *buf, const char *path, int open_flags, mode_t mode);
/** /**
* File utils * File utils
* *
...@@ -223,6 +226,7 @@ extern git_off_t git_futils_filesize(git_file fd); ...@@ -223,6 +226,7 @@ extern git_off_t git_futils_filesize(git_file fd);
#define GIT_MODE_PERMS_MASK 0777 #define GIT_MODE_PERMS_MASK 0777
#define GIT_CANONICAL_PERMS(MODE) (((MODE) & 0100) ? 0755 : 0644) #define GIT_CANONICAL_PERMS(MODE) (((MODE) & 0100) ? 0755 : 0644)
#define GIT_MODE_TYPE(MODE) ((MODE) & ~GIT_MODE_PERMS_MASK) #define GIT_MODE_TYPE(MODE) ((MODE) & ~GIT_MODE_PERMS_MASK)
#define GIT_MODE_ISBLOB(MODE) (GIT_MODE_TYPE(MODE) == GIT_MODE_TYPE(GIT_FILEMODE_BLOB))
/** /**
* Convert a mode_t from the OS to a legal git mode_t value. * Convert a mode_t from the OS to a legal git mode_t value.
......
...@@ -68,12 +68,31 @@ GIT_INLINE(char) *fmt_one(char *str, unsigned int val) ...@@ -68,12 +68,31 @@ GIT_INLINE(char) *fmt_one(char *str, unsigned int val)
return str; return str;
} }
void git_oid_fmt(char *str, const git_oid *oid) void git_oid_nfmt(char *str, size_t n, const git_oid *oid)
{ {
size_t i; size_t i, max_i;
for (i = 0; i < sizeof(oid->id); i++) if (!oid) {
memset(str, 0, n);
return;
}
if (n > GIT_OID_HEXSZ) {
memset(&str[GIT_OID_HEXSZ], 0, n - GIT_OID_HEXSZ);
n = GIT_OID_HEXSZ;
}
max_i = n / 2;
for (i = 0; i < max_i; i++)
str = fmt_one(str, oid->id[i]); str = fmt_one(str, oid->id[i]);
if (n & 1)
*str++ = to_hex[oid->id[i] >> 4];
}
void git_oid_fmt(char *str, const git_oid *oid)
{
git_oid_nfmt(str, GIT_OID_HEXSZ, oid);
} }
void git_oid_pathfmt(char *str, const git_oid *oid) void git_oid_pathfmt(char *str, const git_oid *oid)
...@@ -91,31 +110,20 @@ char *git_oid_allocfmt(const git_oid *oid) ...@@ -91,31 +110,20 @@ char *git_oid_allocfmt(const git_oid *oid)
char *str = git__malloc(GIT_OID_HEXSZ + 1); char *str = git__malloc(GIT_OID_HEXSZ + 1);
if (!str) if (!str)
return NULL; return NULL;
git_oid_fmt(str, oid); git_oid_nfmt(str, GIT_OID_HEXSZ + 1, oid);
str[GIT_OID_HEXSZ] = '\0';
return str; return str;
} }
char *git_oid_tostr(char *out, size_t n, const git_oid *oid) char *git_oid_tostr(char *out, size_t n, const git_oid *oid)
{ {
char str[GIT_OID_HEXSZ];
if (!out || n == 0) if (!out || n == 0)
return ""; return "";
n--; /* allow room for terminating NUL */ if (n > GIT_OID_HEXSZ + 1)
n = GIT_OID_HEXSZ + 1;
if (oid == NULL)
n = 0;
if (n > 0) {
git_oid_fmt(str, oid);
if (n > GIT_OID_HEXSZ)
n = GIT_OID_HEXSZ;
memcpy(out, str, n);
}
out[n] = '\0'; git_oid_nfmt(out, n - 1, oid); /* allow room for terminating NUL */
out[n - 1] = '\0';
return out; return out;
} }
......
...@@ -109,6 +109,13 @@ GIT_INLINE(int) git__is_sizet(git_off_t p) ...@@ -109,6 +109,13 @@ GIT_INLINE(int) git__is_sizet(git_off_t p)
return p == (git_off_t)r; return p == (git_off_t)r;
} }
/** @return true if p fits into the range of a uint32_t */
GIT_INLINE(int) git__is_uint32(size_t p)
{
uint32_t r = (uint32_t)p;
return p == (size_t)r;
}
/* 32-bit cross-platform rotl */ /* 32-bit cross-platform rotl */
#ifdef _MSC_VER /* use built-in method in MSVC */ #ifdef _MSC_VER /* use built-in method in MSVC */
# define git__rotl(v, s) (uint32_t)_rotl(v, s) # define git__rotl(v, s) (uint32_t)_rotl(v, s)
......
...@@ -213,3 +213,8 @@ void diff_print(FILE *fp, git_diff_list *diff) ...@@ -213,3 +213,8 @@ void diff_print(FILE *fp, git_diff_list *diff)
{ {
cl_git_pass(git_diff_print_patch(diff, diff_print_cb, fp ? fp : stderr)); cl_git_pass(git_diff_print_patch(diff, diff_print_cb, fp ? fp : stderr));
} }
void diff_print_raw(FILE *fp, git_diff_list *diff)
{
cl_git_pass(git_diff_print_raw(diff, diff_print_cb, fp ? fp : stderr));
}
...@@ -65,4 +65,4 @@ extern int diff_foreach_via_iterator( ...@@ -65,4 +65,4 @@ extern int diff_foreach_via_iterator(
void *data); void *data);
extern void diff_print(FILE *fp, git_diff_list *diff); extern void diff_print(FILE *fp, git_diff_list *diff);
extern void diff_print_raw(FILE *fp, git_diff_list *diff);
#include "clar_libgit2.h" #include "clar_libgit2.h"
#include "diff_helpers.h" #include "diff_helpers.h"
#include "buf_text.h"
static git_repository *g_repo = NULL; static git_repository *g_repo = NULL;
...@@ -71,8 +72,10 @@ void test_diff_rename__match_oid(void) ...@@ -71,8 +72,10 @@ void test_diff_rename__match_oid(void)
/* git diff 31e47d8c1fa36d7f8d537b96158e3f024de0a9f2 \ /* git diff 31e47d8c1fa36d7f8d537b96158e3f024de0a9f2 \
* 2bc7f351d20b53f1c72c16c4b036e491c478c49a * 2bc7f351d20b53f1c72c16c4b036e491c478c49a
* don't use NULL opts to avoid config `diff.renames` contamination
*/ */
cl_git_pass(git_diff_find_similar(diff, NULL)); opts.flags = GIT_DIFF_FIND_RENAMES;
cl_git_pass(git_diff_find_similar(diff, &opts));
memset(&exp, 0, sizeof(exp)); memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach( cl_git_pass(git_diff_foreach(
...@@ -242,8 +245,8 @@ void test_diff_rename__not_exact_match(void) ...@@ -242,8 +245,8 @@ void test_diff_rename__not_exact_match(void)
cl_assert_equal_i(5, exp.files); cl_assert_equal_i(5, exp.files);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_UNMODIFIED]); cl_assert_equal_i(1, exp.file_status[GIT_DELTA_UNMODIFIED]);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_MODIFIED]); cl_assert_equal_i(1, exp.file_status[GIT_DELTA_MODIFIED]);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_DELETED]);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_ADDED]); cl_assert_equal_i(1, exp.file_status[GIT_DELTA_ADDED]);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_DELETED]);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_COPIED]); cl_assert_equal_i(1, exp.file_status[GIT_DELTA_COPIED]);
git_diff_list_free(diff); git_diff_list_free(diff);
...@@ -377,7 +380,8 @@ void test_diff_rename__handles_small_files(void) ...@@ -377,7 +380,8 @@ void test_diff_rename__handles_small_files(void)
*/ */
cl_git_pass(git_diff_tree_to_index(&diff, g_repo, tree, index, &diffopts)); cl_git_pass(git_diff_tree_to_index(&diff, g_repo, tree, index, &diffopts));
opts.flags = GIT_DIFF_FIND_RENAMES | GIT_DIFF_FIND_COPIES | GIT_DIFF_FIND_AND_BREAK_REWRITES; opts.flags = GIT_DIFF_FIND_RENAMES | GIT_DIFF_FIND_COPIES |
GIT_DIFF_FIND_AND_BREAK_REWRITES;
cl_git_pass(git_diff_find_similar(diff, &opts)); cl_git_pass(git_diff_find_similar(diff, &opts));
git_diff_list_free(diff); git_diff_list_free(diff);
...@@ -387,9 +391,160 @@ void test_diff_rename__handles_small_files(void) ...@@ -387,9 +391,160 @@ void test_diff_rename__handles_small_files(void)
void test_diff_rename__working_directory_changes(void) void test_diff_rename__working_directory_changes(void)
{ {
/* let's rewrite some files in the working directory on demand */ const char *sha0 = "2bc7f351d20b53f1c72c16c4b036e491c478c49a";
const char *blobsha = "66311f5cfbe7836c27510a3ba2f43e282e2c8bba";
git_oid id;
git_tree *tree;
git_blob *blob;
git_diff_list *diff;
git_diff_options diffopts = GIT_DIFF_OPTIONS_INIT;
git_diff_find_options opts = GIT_DIFF_FIND_OPTIONS_INIT;
diff_expects exp;
git_buf old_content = GIT_BUF_INIT, content = GIT_BUF_INIT;;
tree = resolve_commit_oid_to_tree(g_repo, sha0);
diffopts.flags |= GIT_DIFF_INCLUDE_UNMODIFIED | GIT_DIFF_INCLUDE_UNTRACKED;
/*
$ git cat-file -p 2bc7f351d20b53f1c72c16c4b036e491c478c49a^{tree}
100644 blob 66311f5cfbe7836c27510a3ba2f43e282e2c8bba sevencities.txt
100644 blob ad0a8e55a104ac54a8a29ed4b84b49e76837a113 sixserving.txt
100644 blob 66311f5cfbe7836c27510a3ba2f43e282e2c8bba songofseven.txt
$ for f in *.txt; do
echo `git hash-object -t blob $f` $f
done
eaf4a3e3bfe68585e90cada20736ace491cd100b ikeepsix.txt
f90d4fc20ecddf21eebe6a37e9225d244339d2b5 sixserving.txt
4210ffd5c390b21dd5483375e75288dea9ede512 songof7cities.txt
9a69d960ae94b060f56c2a8702545e2bb1abb935 untimely.txt
*/
cl_git_pass(git_diff_tree_to_workdir(&diff, g_repo, tree, &diffopts));
/* git diff --no-renames 2bc7f351d20b53f1c72c16c4b036e491c478c49a */
memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach(
diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp));
cl_assert_equal_i(6, exp.files);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_MODIFIED]);
cl_assert_equal_i(2, exp.file_status[GIT_DELTA_DELETED]);
cl_assert_equal_i(3, exp.file_status[GIT_DELTA_UNTRACKED]);
/* git diff -M 2bc7f351d20b53f1c72c16c4b036e491c478c49a */
opts.flags = GIT_DIFF_FIND_ALL;
cl_git_pass(git_diff_find_similar(diff, &opts));
memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach(
diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp));
/* and with / without CRLF changes */ cl_assert_equal_i(5, exp.files);
cl_assert_equal_i(2, exp.file_status[GIT_DELTA_RENAMED]);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_DELETED]);
cl_assert_equal_i(2, exp.file_status[GIT_DELTA_UNTRACKED]);
git_diff_list_free(diff);
/* rewrite files in the working directory with / without CRLF changes */
cl_git_pass(
git_futils_readbuffer(&old_content, "renames/songof7cities.txt"));
cl_git_pass(
git_buf_text_lf_to_crlf(&content, &old_content));
cl_git_pass(
git_futils_writebuffer(&content, "renames/songof7cities.txt", 0, 0));
cl_git_pass(git_diff_tree_to_workdir(&diff, g_repo, tree, &diffopts));
/* git diff -M 2bc7f351d20b53f1c72c16c4b036e491c478c49a */
opts.flags = GIT_DIFF_FIND_ALL;
cl_git_pass(git_diff_find_similar(diff, &opts));
memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach(
diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp));
cl_assert_equal_i(5, exp.files);
cl_assert_equal_i(2, exp.file_status[GIT_DELTA_RENAMED]);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_DELETED]);
cl_assert_equal_i(2, exp.file_status[GIT_DELTA_UNTRACKED]);
git_diff_list_free(diff);
/* try a different whitespace option */
cl_git_pass(git_diff_tree_to_workdir(&diff, g_repo, tree, &diffopts));
opts.flags = GIT_DIFF_FIND_ALL | GIT_DIFF_FIND_DONT_IGNORE_WHITESPACE;
cl_git_pass(git_diff_find_similar(diff, &opts));
memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach(
diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp));
cl_assert_equal_i(6, exp.files);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_RENAMED]);
cl_assert_equal_i(2, exp.file_status[GIT_DELTA_DELETED]);
cl_assert_equal_i(3, exp.file_status[GIT_DELTA_UNTRACKED]);
git_diff_list_free(diff);
/* try a different matching option */
cl_git_pass(git_diff_tree_to_workdir(&diff, g_repo, tree, &diffopts));
opts.flags = GIT_DIFF_FIND_ALL | GIT_DIFF_FIND_EXACT_MATCH_ONLY;
cl_git_pass(git_diff_find_similar(diff, &opts));
memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach(
diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp));
cl_assert_equal_i(6, exp.files);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_MODIFIED]);
cl_assert_equal_i(3, exp.file_status[GIT_DELTA_UNTRACKED]);
cl_assert_equal_i(2, exp.file_status[GIT_DELTA_DELETED]);
git_diff_list_free(diff);
/* again with exact match blob */
cl_git_pass(git_oid_fromstr(&id, blobsha));
cl_git_pass(git_blob_lookup(&blob, g_repo, &id));
cl_git_pass(git_buf_set(
&content, git_blob_rawcontent(blob), git_blob_rawsize(blob)));
cl_git_rewritefile("renames/songof7cities.txt", content.ptr);
git_blob_free(blob);
cl_git_pass(git_diff_tree_to_workdir(&diff, g_repo, tree, &diffopts));
opts.flags = GIT_DIFF_FIND_ALL | GIT_DIFF_FIND_EXACT_MATCH_ONLY;
cl_git_pass(git_diff_find_similar(diff, &opts));
/*
fprintf(stderr, "\n\n");
diff_print_raw(stderr, diff);
*/
memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach(
diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp));
cl_assert_equal_i(5, exp.files);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_MODIFIED]);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_DELETED]);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_RENAMED]);
cl_assert_equal_i(2, exp.file_status[GIT_DELTA_UNTRACKED]);
git_diff_list_free(diff);
git_tree_free(tree);
git_buf_free(&content);
git_buf_free(&old_content);
} }
void test_diff_rename__patch(void) void test_diff_rename__patch(void)
...@@ -446,3 +601,213 @@ void test_diff_rename__patch(void) ...@@ -446,3 +601,213 @@ void test_diff_rename__patch(void)
git_tree_free(old_tree); git_tree_free(old_tree);
git_tree_free(new_tree); git_tree_free(new_tree);
} }
void test_diff_rename__file_exchange(void)
{
git_buf c1 = GIT_BUF_INIT, c2 = GIT_BUF_INIT;
git_index *index;
git_tree *tree;
git_diff_list *diff;
git_diff_options diffopts = GIT_DIFF_OPTIONS_INIT;
git_diff_find_options opts = GIT_DIFF_FIND_OPTIONS_INIT;
diff_expects exp;
cl_git_pass(git_futils_readbuffer(&c1, "renames/untimely.txt"));
cl_git_pass(git_futils_readbuffer(&c2, "renames/songof7cities.txt"));
cl_git_pass(git_futils_writebuffer(&c1, "renames/songof7cities.txt", 0, 0));
cl_git_pass(git_futils_writebuffer(&c2, "renames/untimely.txt", 0, 0));
cl_git_pass(
git_revparse_single((git_object **)&tree, g_repo, "HEAD^{tree}"));
cl_git_pass(git_repository_index(&index, g_repo));
cl_git_pass(git_index_read_tree(index, tree));
cl_git_pass(git_index_add_bypath(index, "songof7cities.txt"));
cl_git_pass(git_index_add_bypath(index, "untimely.txt"));
cl_git_pass(git_diff_tree_to_index(&diff, g_repo, tree, index, &diffopts));
memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach(
diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp));
cl_assert_equal_i(2, exp.files);
cl_assert_equal_i(2, exp.file_status[GIT_DELTA_MODIFIED]);
opts.flags = GIT_DIFF_FIND_ALL;
cl_git_pass(git_diff_find_similar(diff, &opts));
memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach(
diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp));
cl_assert_equal_i(2, exp.files);
cl_assert_equal_i(2, exp.file_status[GIT_DELTA_RENAMED]);
git_diff_list_free(diff);
git_tree_free(tree);
git_index_free(index);
git_buf_free(&c1);
git_buf_free(&c2);
}
void test_diff_rename__file_partial_exchange(void)
{
git_buf c1 = GIT_BUF_INIT, c2 = GIT_BUF_INIT;
git_index *index;
git_tree *tree;
git_diff_list *diff;
git_diff_options diffopts = GIT_DIFF_OPTIONS_INIT;
git_diff_find_options opts = GIT_DIFF_FIND_OPTIONS_INIT;
diff_expects exp;
int i;
cl_git_pass(git_futils_readbuffer(&c1, "renames/untimely.txt"));
cl_git_pass(git_futils_writebuffer(&c1, "renames/songof7cities.txt", 0, 0));
for (i = 0; i < 100; ++i)
cl_git_pass(git_buf_puts(&c2, "this is not the content you are looking for\n"));
cl_git_pass(git_futils_writebuffer(&c2, "renames/untimely.txt", 0, 0));
cl_git_pass(
git_revparse_single((git_object **)&tree, g_repo, "HEAD^{tree}"));
cl_git_pass(git_repository_index(&index, g_repo));
cl_git_pass(git_index_read_tree(index, tree));
cl_git_pass(git_index_add_bypath(index, "songof7cities.txt"));
cl_git_pass(git_index_add_bypath(index, "untimely.txt"));
cl_git_pass(git_diff_tree_to_index(&diff, g_repo, tree, index, &diffopts));
memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach(
diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp));
cl_assert_equal_i(2, exp.files);
cl_assert_equal_i(2, exp.file_status[GIT_DELTA_MODIFIED]);
opts.flags = GIT_DIFF_FIND_ALL;
cl_git_pass(git_diff_find_similar(diff, &opts));
memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach(
diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp));
cl_assert_equal_i(3, exp.files);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_RENAMED]);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_ADDED]);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_DELETED]);
git_diff_list_free(diff);
git_tree_free(tree);
git_index_free(index);
git_buf_free(&c1);
git_buf_free(&c2);
}
void test_diff_rename__file_split(void)
{
git_buf c1 = GIT_BUF_INIT, c2 = GIT_BUF_INIT;
git_index *index;
git_tree *tree;
git_diff_list *diff;
git_diff_options diffopts = GIT_DIFF_OPTIONS_INIT;
git_diff_find_options opts = GIT_DIFF_FIND_OPTIONS_INIT;
diff_expects exp;
/* put the first 2/3 of file into one new place
* and the second 2/3 of file into another new place
*/
cl_git_pass(git_futils_readbuffer(&c1, "renames/songof7cities.txt"));
cl_git_pass(git_buf_set(&c2, c1.ptr, c1.size));
git_buf_truncate(&c1, c1.size * 2 / 3);
git_buf_consume(&c2, ((char *)c2.ptr) + (c2.size / 3));
cl_git_pass(git_futils_writebuffer(&c1, "renames/song_a.txt", 0, 0));
cl_git_pass(git_futils_writebuffer(&c2, "renames/song_b.txt", 0, 0));
cl_git_pass(
git_revparse_single((git_object **)&tree, g_repo, "HEAD^{tree}"));
cl_git_pass(git_repository_index(&index, g_repo));
cl_git_pass(git_index_read_tree(index, tree));
cl_git_pass(git_index_add_bypath(index, "song_a.txt"));
cl_git_pass(git_index_add_bypath(index, "song_b.txt"));
diffopts.flags = GIT_DIFF_INCLUDE_UNMODIFIED;
cl_git_pass(git_diff_tree_to_index(&diff, g_repo, tree, index, &diffopts));
memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach(
diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp));
cl_assert_equal_i(6, exp.files);
cl_assert_equal_i(2, exp.file_status[GIT_DELTA_ADDED]);
cl_assert_equal_i(4, exp.file_status[GIT_DELTA_UNMODIFIED]);
opts.flags = GIT_DIFF_FIND_ALL;
cl_git_pass(git_diff_find_similar(diff, &opts));
memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach(
diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp));
cl_assert_equal_i(6, exp.files);
cl_assert_equal_i(2, exp.file_status[GIT_DELTA_COPIED]);
cl_assert_equal_i(4, exp.file_status[GIT_DELTA_UNMODIFIED]);
git_diff_list_free(diff);
git_tree_free(tree);
git_index_free(index);
git_buf_free(&c1);
git_buf_free(&c2);
}
void test_diff_rename__from_deleted_to_split(void)
{
git_buf c1 = GIT_BUF_INIT;
git_index *index;
git_tree *tree;
git_diff_list *diff;
git_diff_options diffopts = GIT_DIFF_OPTIONS_INIT;
git_diff_find_options opts = GIT_DIFF_FIND_OPTIONS_INIT;
diff_expects exp;
/* old file is missing, new file is actually old file renamed */
cl_git_pass(git_futils_readbuffer(&c1, "renames/songof7cities.txt"));
cl_git_pass(git_futils_writebuffer(&c1, "renames/untimely.txt", 0, 0));
cl_git_pass(
git_revparse_single((git_object **)&tree, g_repo, "HEAD^{tree}"));
cl_git_pass(git_repository_index(&index, g_repo));
cl_git_pass(git_index_read_tree(index, tree));
cl_git_pass(git_index_remove_bypath(index, "songof7cities.txt"));
cl_git_pass(git_index_add_bypath(index, "untimely.txt"));
diffopts.flags = GIT_DIFF_INCLUDE_UNMODIFIED;
cl_git_pass(git_diff_tree_to_index(&diff, g_repo, tree, index, &diffopts));
memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach(
diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp));
cl_assert_equal_i(4, exp.files);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_DELETED]);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_MODIFIED]);
cl_assert_equal_i(2, exp.file_status[GIT_DELTA_UNMODIFIED]);
opts.flags = GIT_DIFF_FIND_ALL;
cl_git_pass(git_diff_find_similar(diff, &opts));
memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach(
diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp));
cl_assert_equal_i(4, exp.files);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_DELETED]);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_RENAMED]);
cl_assert_equal_i(2, exp.file_status[GIT_DELTA_UNMODIFIED]);
git_diff_list_free(diff);
git_tree_free(tree);
git_index_free(index);
git_buf_free(&c1);
}
...@@ -73,3 +73,40 @@ void test_object_raw_convert__succeed_on_oid_to_string_conversion_big(void) ...@@ -73,3 +73,40 @@ void test_object_raw_convert__succeed_on_oid_to_string_conversion_big(void)
cl_assert(str && str == big && *(str+GIT_OID_HEXSZ+2) == 'Y'); cl_assert(str && str == big && *(str+GIT_OID_HEXSZ+2) == 'Y');
cl_assert(str && str == big && *(str+GIT_OID_HEXSZ+3) == 'Z'); cl_assert(str && str == big && *(str+GIT_OID_HEXSZ+3) == 'Z');
} }
static void check_partial_oid(
char *buffer, size_t count, const git_oid *oid, const char *expected)
{
git_oid_nfmt(buffer, count, oid);
buffer[count] = '\0';
cl_assert_equal_s(expected, buffer);
}
void test_object_raw_convert__convert_oid_partially(void)
{
const char *exp = "16a0123456789abcdef4b775213c23a8bd74f5e0";
git_oid in;
char big[GIT_OID_HEXSZ + 1 + 3]; /* note + 4 => big buffer */
cl_git_pass(git_oid_fromstr(&in, exp));
git_oid_nfmt(big, sizeof(big), &in);
cl_assert_equal_s(exp, big);
git_oid_nfmt(big, GIT_OID_HEXSZ + 1, &in);
cl_assert_equal_s(exp, big);
check_partial_oid(big, 1, &in, "1");
check_partial_oid(big, 2, &in, "16");
check_partial_oid(big, 3, &in, "16a");
check_partial_oid(big, 4, &in, "16a0");
check_partial_oid(big, 5, &in, "16a01");
check_partial_oid(big, GIT_OID_HEXSZ, &in, exp);
check_partial_oid(
big, GIT_OID_HEXSZ - 1, &in, "16a0123456789abcdef4b775213c23a8bd74f5e");
check_partial_oid(
big, GIT_OID_HEXSZ - 2, &in, "16a0123456789abcdef4b775213c23a8bd74f5");
check_partial_oid(
big, GIT_OID_HEXSZ - 3, &in, "16a0123456789abcdef4b775213c23a8bd74f");
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment