Commit 145e696b by Russell Belfer

Minor fixes, cleanups, and clarifications

There are three actual changes in this commit:

1. When the trailing newline of a file is removed in a diff, the
   change will now be reported with `GIT_DIFF_LINE_DEL_EOFNL` passed
   to the callback.  Previously, the `ADD_EOFNL` constant was given
   which was just an error in my understanding of when the various
   circumstances arose.  `GIT_DIFF_LINE_ADD_EOFNL` is deprecated and
   should never be generated.  A new newline is simply an `ADD`.
2. Rewrote the `diff_delta__merge_like_cgit` function that contains
   the core logic of the `git_diff_merge` implementation.  The new
   version doesn't actually have significantly different behavior,
   but the logic should be much more obvious, I think.
3. Fixed a bug in `git_diff_merge` where it freed a string pool
   while some of the string data was still in use.  This led to
   `git_diff_print_patch` accessing memory that had been freed.

The rest of this commit contains improved documentation in `diff.h`
to make the behavior and the equivalencies with core git clearer,
and a bunch of new tests to cover the various cases, oh and a minor
simplification of `examples/diff.c`.
parent 0abd7244
......@@ -185,9 +185,7 @@ int main(int argc, char *argv[])
/* open repo */
check(git_repository_discover(path, sizeof(path), dir, 0, "/"),
"Could not discover repository");
check(git_repository_open(&repo, path),
check(git_repository_open_ext(&repo, dir, 0, NULL),
"Could not open repository");
if (treeish1)
......
......@@ -29,6 +29,10 @@
*/
GIT_BEGIN_DECL
/**
* Flags for diff options. A combination of these flags can be passed
* in via the `flags` value in the `git_diff_options`.
*/
enum {
GIT_DIFF_NORMAL = 0,
GIT_DIFF_REVERSE = (1 << 0),
......@@ -160,15 +164,16 @@ typedef int (*git_diff_hunk_fn)(
* the file or hunk headers.
*/
enum {
/* these values will be sent to `git_diff_data_fn` along with the line */
/* These values will be sent to `git_diff_data_fn` along with the line */
GIT_DIFF_LINE_CONTEXT = ' ',
GIT_DIFF_LINE_ADDITION = '+',
GIT_DIFF_LINE_DELETION = '-',
GIT_DIFF_LINE_ADD_EOFNL = '\n', /**< LF was added at end of file */
GIT_DIFF_LINE_ADD_EOFNL = '\n', /**< DEPRECATED */
GIT_DIFF_LINE_DEL_EOFNL = '\0', /**< LF was removed at end of file */
/* these values will only be sent to a `git_diff_data_fn` when the content
* of a diff is being formatted (eg. through git_diff_print_patch() or
* git_diff_print_compact(), for instance).
/* The following values will only be sent to a `git_diff_data_fn` when
* the content of a diff is being formatted (eg. through
* git_diff_print_patch() or git_diff_print_compact(), for instance).
*/
GIT_DIFF_LINE_FILE_HDR = 'F',
GIT_DIFF_LINE_HUNK_HDR = 'H',
......@@ -206,6 +211,8 @@ GIT_EXTERN(void) git_diff_list_free(git_diff_list *diff);
/**
* Compute a difference between two tree objects.
*
* This is equivalent to `git diff <treeish> <treeish>`
*
* @param repo The repository containing the trees.
* @param opts Structure with options to influence diff or NULL for defaults.
* @param old_tree A git_tree object to diff from.
......@@ -222,6 +229,9 @@ GIT_EXTERN(int) git_diff_tree_to_tree(
/**
* Compute a difference between a tree and the index.
*
* This is equivalent to `git diff --cached <treeish>` or if you pass
* the HEAD tree, then like `git diff --cached`.
*
* @param repo The repository containing the tree and index.
* @param opts Structure with options to influence diff or NULL for defaults.
* @param old_tree A git_tree object to diff from.
......@@ -236,6 +246,11 @@ GIT_EXTERN(int) git_diff_index_to_tree(
/**
* Compute a difference between the working directory and the index.
*
* This matches the `git diff` command. See the note below on
* `git_diff_workdir_to_tree` for a discussion of the difference between
* `git diff` and `git diff HEAD` and how to emulate a `git diff <treeish>`
* using libgit2.
*
* @param repo The repository.
* @param opts Structure with options to influence diff or NULL for defaults.
* @param diff A pointer to a git_diff_list pointer that will be allocated.
......@@ -248,14 +263,24 @@ GIT_EXTERN(int) git_diff_workdir_to_index(
/**
* Compute a difference between the working directory and a tree.
*
* This returns strictly the differences between the tree and the
* files contained in the working directory, regardless of the state
* of files in the index. There is no direct equivalent in C git.
* This is *NOT* the same as `git diff <treeish>`. Running `git diff HEAD`
* or the like actually uses information from the index, along with the tree
* and workdir dir info.
*
* This is *NOT* the same as 'git diff HEAD' or 'git diff <SHA>'. Those
* commands diff the tree, the index, and the workdir. To emulate those
* functions, call `git_diff_index_to_tree` and `git_diff_workdir_to_index`,
* then call `git_diff_merge` on the results.
* This function returns strictly the differences between the tree and the
* files contained in the working directory, regardless of the state of
* files in the index. It may come as a surprise, but there is no direct
* equivalent in core git.
*
* To emulate `git diff <treeish>`, you should call both
* `git_diff_index_to_tree` and `git_diff_workdir_to_index`, then call
* `git_diff_merge` on the results. That will yield a `git_diff_list` that
* matches the git output.
*
* If this seems confusing, take the case of a file with a staged deletion
* where the file has then been put back into the working dir and modified.
* The tree-to-workdir diff for that file is 'modified', but core git would
* show status 'deleted' since there is a pending deletion in the index.
*
* @param repo The repository containing the tree.
* @param opts Structure with options to influence diff or NULL for defaults.
......@@ -298,10 +323,23 @@ GIT_EXTERN(int) git_diff_merge(
/**
* Iterate over a diff list issuing callbacks.
*
* If the hunk and/or line callbacks are not NULL, then this will calculate
* text diffs for all files it thinks are not binary. If those are both
* NULL, then this will not bother with the text diffs, so it can be
* efficient.
* This will iterate through all of the files described in a diff. You
* should provide a file callback to learn about each file.
*
* The "hunk" and "line" callbacks are optional, and the text diff of the
* files will only be calculated if they are not NULL. Of course, these
* callbacks will not be invoked for binary files on the diff list or for
* files whose only changed is a file mode change.
*
* @param diff A git_diff_list generated by one of the above functions.
* @param cb_data Reference pointer that will be passed to your callbacks.
* @param file_cb Callback function to make per file in the diff.
* @param hunk_cb Optional callback to make per hunk of text diff. This
* callback is called to describe a range of lines in the
* diff. It will not be issued for binary files.
* @param line_cb Optional callback to make per line of diff text. This
* same callback will be made for context lines, added, and
* removed lines, and even for a deleted trailing newline.
*/
GIT_EXTERN(int) git_diff_foreach(
git_diff_list *diff,
......@@ -322,6 +360,14 @@ GIT_EXTERN(int) git_diff_print_compact(
* Iterate over a diff generating text output like "git diff".
*
* This is a super easy way to generate a patch from a diff.
*
* @param diff A git_diff_list generated by one of the above functions.
* @param cb_data Reference pointer that will be passed to your callbacks.
* @param print_cb Callback function to output lines of the diff. This
* same function will be called for file headers, hunk
* headers, and diff lines. Fortunately, you can probably
* use various GIT_DIFF_LINE constants to determine what
* text you are given.
*/
GIT_EXTERN(int) git_diff_print_patch(
git_diff_list *diff,
......@@ -338,13 +384,14 @@ GIT_EXTERN(int) git_diff_print_patch(
/**
* Directly run a text diff on two blobs.
*
* Compared to a file, a blob lacks some contextual information. As such, the
* `git_diff_file` parameters of the callbacks will be filled accordingly to the following:
* `mode` will be set to 0, `path` will be set to NULL. When dealing with a NULL blob, `oid`
* will be set to 0.
* Compared to a file, a blob lacks some contextual information. As such,
* the `git_diff_file` parameters of the callbacks will be filled
* accordingly to the following: `mode` will be set to 0, `path` will be set
* to NULL. When dealing with a NULL blob, `oid` will be set to 0.
*
* When at least one of the blobs being dealt with is binary, the `git_diff_delta` binary
* attribute will be set to 1 and no call to the hunk_cb nor line_cb will be made.
* When at least one of the blobs being dealt with is binary, the
* `git_diff_delta` binary attribute will be set to 1 and no call to the
* hunk_cb nor line_cb will be made.
*/
GIT_EXTERN(int) git_diff_blobs(
git_blob *old_blob,
......
......@@ -130,37 +130,50 @@ fail:
static git_diff_delta *diff_delta__merge_like_cgit(
const git_diff_delta *a, const git_diff_delta *b, git_pool *pool)
{
git_diff_delta *dup = diff_delta__dup(a, pool);
if (!dup)
return NULL;
if (git_oid_cmp(&dup->new_file.oid, &b->new_file.oid) == 0)
return dup;
git_oid_cpy(&dup->new_file.oid, &b->new_file.oid);
dup->new_file.mode = b->new_file.mode;
dup->new_file.size = b->new_file.size;
dup->new_file.flags = b->new_file.flags;
git_diff_delta *dup;
/* Emulate C git for merging two diffs (a la 'git diff <sha>').
*
* When C git does a diff between the work dir and a tree, it actually
* diffs with the index but uses the workdir contents. This emulates
* those choices so we can emulate the type of diff.
*
* We have three file descriptions here, let's call them:
* f1 = a->old_file
* f2 = a->new_file AND b->old_file
* f3 = b->new_file
*/
if (git_oid_cmp(&dup->old_file.oid, &dup->new_file.oid) == 0) {
if (dup->status == GIT_DELTA_DELETED)
/* preserve pending delete info */;
else if (b->status == GIT_DELTA_UNTRACKED ||
b->status == GIT_DELTA_IGNORED)
dup->status = b->status;
else
/* if f2 == f3 or f2 is deleted, then just dup the 'a' diff */
if (b->status == GIT_DELTA_UNMODIFIED || a->status == GIT_DELTA_DELETED)
return diff_delta__dup(a, pool);
/* otherwise, base this diff on the 'b' diff */
if ((dup = diff_delta__dup(b, pool)) == NULL)
return NULL;
/* If 'a' status is uninteresting, then we're done */
if (a->status == GIT_DELTA_UNMODIFIED)
return dup;
assert(a->status != GIT_DELTA_UNMODIFIED);
assert(b->status != GIT_DELTA_UNMODIFIED);
/* A cgit exception is that the diff of a file that is only in the
* index (i.e. not in HEAD nor workdir) is given as empty.
*/
if (dup->status == GIT_DELTA_DELETED) {
if (a->status == GIT_DELTA_ADDED)
dup->status = GIT_DELTA_UNMODIFIED;
/* else don't overwrite DELETE status */
} else {
dup->status = a->status;
}
else if (dup->status == GIT_DELTA_UNMODIFIED ||
b->status == GIT_DELTA_DELETED)
dup->status = b->status;
git_oid_cpy(&dup->old_file.oid, &a->old_file.oid);
dup->old_file.mode = a->old_file.mode;
dup->old_file.size = a->old_file.size;
dup->old_file.flags = a->old_file.flags;
return dup;
}
......@@ -783,6 +796,12 @@ int git_diff_merge(
git_vector_swap(&onto->deltas, &onto_new);
git_pool_swap(&onto->pool, &onto_pool);
onto->new_src = from->new_src;
/* prefix strings also come from old pool, so recreate those.*/
onto->opts.old_prefix =
git_pool_strdup(&onto->pool, onto->opts.old_prefix);
onto->opts.new_prefix =
git_pool_strdup(&onto->pool, onto->opts.new_prefix);
}
git_vector_foreach(&onto_new, i, delta)
......
......@@ -83,12 +83,13 @@ static int diff_output_cb(void *priv, mmbuffer_t *bufs, int len)
info->cb_data, info->delta, &info->range, origin, bufs[1].ptr, bufs[1].size) < 0)
return -1;
/* deal with adding and removing newline at EOF */
/* This should only happen if we are adding a line that does not
* have a newline at the end and the old code did. In that case,
* we have a ADD with a DEL_EOFNL as a pair.
*/
if (len == 3) {
if (origin == GIT_DIFF_LINE_ADDITION)
origin = GIT_DIFF_LINE_ADD_EOFNL;
else
origin = GIT_DIFF_LINE_DEL_EOFNL;
origin = (origin == GIT_DIFF_LINE_ADDITION) ?
GIT_DIFF_LINE_DEL_EOFNL : GIT_DIFF_LINE_ADD_EOFNL;
return info->line_cb(
info->cb_data, info->delta, &info->range, origin, bufs[2].ptr, bufs[2].size);
......
......@@ -85,11 +85,16 @@ int diff_line_fn(
e->line_ctxt++;
break;
case GIT_DIFF_LINE_ADDITION:
case GIT_DIFF_LINE_ADD_EOFNL:
e->line_adds++;
break;
case GIT_DIFF_LINE_ADD_EOFNL:
assert(0);
break;
case GIT_DIFF_LINE_DELETION:
e->line_dels++;
break;
case GIT_DIFF_LINE_DEL_EOFNL:
/* technically not a line delete, but we'll count it as such */
e->line_dels++;
break;
default:
......
......@@ -116,7 +116,7 @@ void test_diff_tree__options(void)
{ 5, 3, 0, 2, 0, 0, 0, 4, 0, 0, 51, 2, 46, 3 },
{ 5, 3, 0, 2, 0, 0, 0, 4, 0, 0, 53, 4, 46, 3 },
{ 5, 0, 3, 2, 0, 0, 0, 4, 0, 0, 52, 3, 3, 46 },
{ 5, 3, 0, 2, 0, 0, 0, 5, 0, 0, 54, 3, 48, 3 },
{ 5, 3, 0, 2, 0, 0, 0, 5, 0, 0, 54, 3, 47, 4 },
/* c vs d tests */
{ 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 22, 9, 10, 3 },
{ 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 19, 12, 7, 0 },
......
......@@ -92,12 +92,12 @@ void test_diff_workdir__to_tree(void)
cl_git_pass(git_diff_foreach(
diff, &exp, diff_file_fn, diff_hunk_fn, diff_line_fn));
cl_assert(exp.files == 14);
cl_assert(exp.file_adds == 0);
cl_assert(exp.file_dels == 4);
cl_assert(exp.file_mods == 4);
cl_assert(exp.file_ignored == 1);
cl_assert(exp.file_untracked == 5);
cl_assert_equal_i(14, exp.files);
cl_assert_equal_i(0, exp.file_adds);
cl_assert_equal_i(4, exp.file_dels);
cl_assert_equal_i(4, exp.file_mods);
cl_assert_equal_i(1, exp.file_ignored);
cl_assert_equal_i(5, exp.file_untracked);
/* Since there is no git diff equivalent, let's just assume that the
* text diffs produced by git_diff_foreach are accurate here. We will
......@@ -120,19 +120,19 @@ void test_diff_workdir__to_tree(void)
cl_git_pass(git_diff_foreach(
diff, &exp, diff_file_fn, diff_hunk_fn, diff_line_fn));
cl_assert(exp.files == 15);
cl_assert(exp.file_adds == 2);
cl_assert(exp.file_dels == 5);
cl_assert(exp.file_mods == 4);
cl_assert(exp.file_ignored == 1);
cl_assert(exp.file_untracked == 3);
cl_assert_equal_i(15, exp.files);
cl_assert_equal_i(2, exp.file_adds);
cl_assert_equal_i(5, exp.file_dels);
cl_assert_equal_i(4, exp.file_mods);
cl_assert_equal_i(1, exp.file_ignored);
cl_assert_equal_i(3, exp.file_untracked);
cl_assert(exp.hunks == 11);
cl_assert_equal_i(11, exp.hunks);
cl_assert(exp.lines == 17);
cl_assert(exp.line_ctxt == 4);
cl_assert(exp.line_adds == 8);
cl_assert(exp.line_dels == 5);
cl_assert_equal_i(17, exp.lines);
cl_assert_equal_i(4, exp.line_ctxt);
cl_assert_equal_i(8, exp.line_adds);
cl_assert_equal_i(5, exp.line_dels);
git_diff_list_free(diff);
diff = NULL;
......@@ -149,19 +149,19 @@ void test_diff_workdir__to_tree(void)
cl_git_pass(git_diff_foreach(
diff, &exp, diff_file_fn, diff_hunk_fn, diff_line_fn));
cl_assert(exp.files == 16);
cl_assert(exp.file_adds == 5);
cl_assert(exp.file_dels == 4);
cl_assert(exp.file_mods == 3);
cl_assert(exp.file_ignored == 1);
cl_assert(exp.file_untracked == 3);
cl_assert_equal_i(16, exp.files);
cl_assert_equal_i(5, exp.file_adds);
cl_assert_equal_i(4, exp.file_dels);
cl_assert_equal_i(3, exp.file_mods);
cl_assert_equal_i(1, exp.file_ignored);
cl_assert_equal_i(3, exp.file_untracked);
cl_assert(exp.hunks == 12);
cl_assert_equal_i(12, exp.hunks);
cl_assert(exp.lines == 19);
cl_assert(exp.line_ctxt == 3);
cl_assert(exp.line_adds == 12);
cl_assert(exp.line_dels == 4);
cl_assert_equal_i(19, exp.lines);
cl_assert_equal_i(3, exp.line_ctxt);
cl_assert_equal_i(12, exp.line_adds);
cl_assert_equal_i(4, exp.line_dels);
git_diff_list_free(diff);
......@@ -340,6 +340,145 @@ void test_diff_workdir__filemode_changes_with_filemode_false(void)
git_config_free(cfg);
}
void test_diff_workdir__head_index_and_workdir_all_differ(void)
{
git_diff_options opts = {0};
git_diff_list *diff_i2t = NULL, *diff_w2i = NULL;
diff_expects exp;
char *pathspec = "staged_changes_modified_file";
git_tree *tree;
/* For this file,
* - head->index diff has 1 line of context, 1 line of diff
* - index->workdir diff has 2 lines of context, 1 line of diff
* but
* - head->workdir diff has 1 line of context, 2 lines of diff
* Let's make sure the right one is returned from each fn.
*/
g_repo = cl_git_sandbox_init("status");
tree = resolve_commit_oid_to_tree(g_repo, "26a125ee1bfc5df1e1b2e9441bbe63c8a7ae989f");
opts.pathspec.strings = &pathspec;
opts.pathspec.count = 1;
cl_git_pass(git_diff_index_to_tree(g_repo, &opts, tree, &diff_i2t));
cl_git_pass(git_diff_workdir_to_index(g_repo, &opts, &diff_w2i));
memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach(
diff_i2t, &exp, diff_file_fn, diff_hunk_fn, diff_line_fn));
cl_assert_equal_i(1, exp.files);
cl_assert_equal_i(0, exp.file_adds);
cl_assert_equal_i(0, exp.file_dels);
cl_assert_equal_i(1, exp.file_mods);
cl_assert_equal_i(1, exp.hunks);
cl_assert_equal_i(2, exp.lines);
cl_assert_equal_i(1, exp.line_ctxt);
cl_assert_equal_i(1, exp.line_adds);
cl_assert_equal_i(0, exp.line_dels);
memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach(
diff_w2i, &exp, diff_file_fn, diff_hunk_fn, diff_line_fn));
cl_assert_equal_i(1, exp.files);
cl_assert_equal_i(0, exp.file_adds);
cl_assert_equal_i(0, exp.file_dels);
cl_assert_equal_i(1, exp.file_mods);
cl_assert_equal_i(1, exp.hunks);
cl_assert_equal_i(3, exp.lines);
cl_assert_equal_i(2, exp.line_ctxt);
cl_assert_equal_i(1, exp.line_adds);
cl_assert_equal_i(0, exp.line_dels);
cl_git_pass(git_diff_merge(diff_i2t, diff_w2i));
memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach(
diff_i2t, &exp, diff_file_fn, diff_hunk_fn, diff_line_fn));
cl_assert_equal_i(1, exp.files);
cl_assert_equal_i(0, exp.file_adds);
cl_assert_equal_i(0, exp.file_dels);
cl_assert_equal_i(1, exp.file_mods);
cl_assert_equal_i(1, exp.hunks);
cl_assert_equal_i(3, exp.lines);
cl_assert_equal_i(1, exp.line_ctxt);
cl_assert_equal_i(2, exp.line_adds);
cl_assert_equal_i(0, exp.line_dels);
git_diff_list_free(diff_i2t);
git_diff_list_free(diff_w2i);
}
void test_diff_workdir__eof_newline_changes(void)
{
git_diff_options opts = {0};
git_diff_list *diff = NULL;
diff_expects exp;
char *pathspec = "current_file";
g_repo = cl_git_sandbox_init("status");
opts.pathspec.strings = &pathspec;
opts.pathspec.count = 1;
cl_git_pass(git_diff_workdir_to_index(g_repo, &opts, &diff));
memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach(
diff, &exp, diff_file_fn, diff_hunk_fn, diff_line_fn));
cl_assert_equal_i(0, exp.files);
cl_assert_equal_i(0, exp.file_adds);
cl_assert_equal_i(0, exp.file_dels);
cl_assert_equal_i(0, exp.file_mods);
cl_assert_equal_i(0, exp.hunks);
cl_assert_equal_i(0, exp.lines);
cl_assert_equal_i(0, exp.line_ctxt);
cl_assert_equal_i(0, exp.line_adds);
cl_assert_equal_i(0, exp.line_dels);
git_diff_list_free(diff);
cl_git_append2file("status/current_file", "\n");
cl_git_pass(git_diff_workdir_to_index(g_repo, &opts, &diff));
memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach(
diff, &exp, diff_file_fn, diff_hunk_fn, diff_line_fn));
cl_assert_equal_i(1, exp.files);
cl_assert_equal_i(0, exp.file_adds);
cl_assert_equal_i(0, exp.file_dels);
cl_assert_equal_i(1, exp.file_mods);
cl_assert_equal_i(1, exp.hunks);
cl_assert_equal_i(2, exp.lines);
cl_assert_equal_i(1, exp.line_ctxt);
cl_assert_equal_i(1, exp.line_adds);
cl_assert_equal_i(0, exp.line_dels);
git_diff_list_free(diff);
cl_git_rewritefile("status/current_file", "current_file");
cl_git_pass(git_diff_workdir_to_index(g_repo, &opts, &diff));
memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach(
diff, &exp, diff_file_fn, diff_hunk_fn, diff_line_fn));
cl_assert_equal_i(1, exp.files);
cl_assert_equal_i(0, exp.file_adds);
cl_assert_equal_i(0, exp.file_dels);
cl_assert_equal_i(1, exp.file_mods);
cl_assert_equal_i(1, exp.hunks);
cl_assert_equal_i(3, exp.lines);
cl_assert_equal_i(0, exp.line_ctxt);
cl_assert_equal_i(1, exp.line_adds);
cl_assert_equal_i(2, exp.line_dels);
git_diff_list_free(diff);
}
/* PREPARATION OF TEST DATA
*
* Since there is no command line equivalent of git_diff_workdir_to_tree,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment