Commit 5f69a31f by Russell Belfer

Initial implementation of new diff patch API

Replacing the `git_iterator` object, this creates a simple API
for accessing the "patch" for any file pair in a diff list and
then gives indexed access to the hunks in the patch and the lines
in the hunk.  This is the initial implementation of this revised
API - it is still broken, but at least builds cleanly.
parent 9a12a625
......@@ -98,6 +98,9 @@ enum {
GIT_DIFF_FILE_FREE_PATH = (1 << 1),
GIT_DIFF_FILE_BINARY = (1 << 2),
GIT_DIFF_FILE_NOT_BINARY = (1 << 3),
GIT_DIFF_FILE_FREE_DATA = (1 << 4),
GIT_DIFF_FILE_UNMAP_DATA = (1 << 5),
GIT_DIFF_FILE_NO_DATA = (1 << 6),
};
/**
......@@ -425,7 +428,7 @@ GIT_EXTERN(int) git_diff_print_patch(
* @param diff A git_diff_list generated by one of the above functions
* @return Count of number of deltas in the list
*/
GIT_EXTERN(size_t) git_diff_entrycount(git_diff_list *diff);
GIT_EXTERN(size_t) git_diff_num_deltas(git_diff_list *diff);
/**
* Query how many diff deltas are there in a diff list filtered by type.
......@@ -438,7 +441,7 @@ GIT_EXTERN(size_t) git_diff_entrycount(git_diff_list *diff);
* @param type A git_delta_t value to filter the count
* @return Count of number of deltas matching delta_t type
*/
GIT_EXTERN(size_t) git_diff_entrycount_of_type(
GIT_EXTERN(size_t) git_diff_num_deltas_of_type(
git_diff_list *diff,
git_delta_t type);
......@@ -469,7 +472,7 @@ GIT_EXTERN(size_t) git_diff_entrycount_of_type(
*/
GIT_EXTERN(int) git_diff_get_patch(
git_diff_patch **patch,
const git_diff_delta **delta,
git_diff_delta **delta,
git_diff_list *diff,
size_t idx);
......@@ -482,43 +485,76 @@ GIT_EXTERN(void) git_diff_patch_free(
/**
* Get the delta associated with a patch
*/
GIT_EXTERN(void) git_diff_patch_get_delta(
const git_diff_delta **delta,
GIT_EXTERN(const git_diff_delta *) git_diff_patch_delta(
git_diff_patch *patch);
/**
* Get the number of hunks in a patch
*/
GIT_EXTERN(size_t) git_diff_patch_hunks(
GIT_EXTERN(size_t) git_diff_patch_num_hunks(
git_diff_patch *patch);
/**
* Get the information about a hunk in a patch
*
* Given a patch and a hunk index into the patch, this returns detailed
* information about that hunk. Any of the output pointers can be passed
* as NULL if you don't care about that particular piece of information.
*
* @param range Output pointer to git_diff_range of hunk
* @param header Output pointer to header string for hunk. Unlike the
* content pointer for each line, this will be NUL-terminated
* @param header_len Output value of characters in header string
* @param lines_in_hunk Output count of total lines in this hunk
* @param patch Input pointer to patch object
* @param hunk_idx Input index of hunk to get information about
* @return 0 on success, GIT_ENOTFOUND if hunk_idx out of range, <0 on error
*/
GIT_EXTERN(int) git_diff_patch_get_hunk(
const git_diff_range **range,
git_diff_range **range,
const char **header,
size_t *header_len,
size_t *lines_in_hunk,
git_diff_patch *patch,
size_t hunk);
size_t hunk_idx);
/**
* Get the number of lines in a hunk
* Get the number of lines in a hunk.
*
* @param patch The git_diff_patch object
* @param hunk_idx Index of the hunk
* @return Number of lines in hunk or -1 if invalid hunk index
*/
GIT_EXTERN(size_t) git_diff_patch_lines_in_hunk(
GIT_EXTERN(int) git_diff_patch_num_lines_in_hunk(
git_diff_patch *patch,
size_t hunk);
size_t hunk_idx);
/**
* Get a line in a hunk of a patch
* Get data about a line in a hunk of a patch.
*
* Given a patch, a hunk index, and a line index in the hunk, this
* will return a lot of details about that line. If you pass a hunk
* index larger than the number of hunks or a line index larger than
* the number of lines in the hunk, this will return -1.
*
* @param line_origin A GIT_DIFF_LINE constant from above
* @param content Pointer to content of diff line, not NUL-terminated
* @param content_len Number of characters in content
* @param old_lineno Line number in old file or -1 if line is added
* @param new_lineno Line number in new file or -1 if line is deleted
* @param patch The patch to look in
* @param hunk_idx The index of the hunk
* @param line_of_index The index of the line in the hunk
* @return 0 on success, <0 on failure
*/
GIT_EXTERN(int) git_diff_patch_get_line_in_hunk(
char *line_origin,
const char **content,
size_t *content_len,
int *old_lineno,
int *new_lineno,
git_diff_patch *patch,
size_t hunk,
size_t hunk_idx,
size_t line_of_hunk);
/**@}*/
......
......@@ -5,8 +5,6 @@
* a Linking Exception. For full terms see the included COPYING file.
*/
#include "common.h"
#include "git2/diff.h"
#include "git2/oid.h"
#include "diff.h"
#include "fileops.h"
#include "config.h"
......@@ -268,9 +266,17 @@ static int diff_delta__from_two(
delta->old_file.mode = old_mode;
delta->old_file.flags |= GIT_DIFF_FILE_VALID_OID;
git_oid_cpy(&delta->new_file.oid, new_oid ? new_oid : &new_entry->oid);
git_oid_cpy(&delta->new_file.oid, &new_entry->oid);
delta->new_file.size = new_entry->file_size;
delta->new_file.mode = new_mode;
if (new_oid) {
if ((diff->opts.flags & GIT_DIFF_REVERSE) != 0)
git_oid_cpy(&delta->old_file.oid, new_oid);
else
git_oid_cpy(&delta->new_file.oid, new_oid);
}
if (new_oid || !git_oid_iszero(&new_entry->oid))
delta->new_file.flags |= GIT_DIFF_FILE_VALID_OID;
......@@ -425,6 +431,11 @@ void git_diff_list_free(git_diff_list *diff)
GIT_REFCOUNT_DEC(diff, diff_list_free);
}
void git_diff_list_addref(git_diff_list *diff)
{
GIT_REFCOUNT_INC(diff);
}
static int oid_for_workdir_item(
git_repository *repo,
const git_index_entry *item,
......@@ -519,17 +530,17 @@ static int maybe_modified(
omode == nmode)
status = GIT_DELTA_UNMODIFIED;
/* if modes match and we have an unknown OID and a workdir iterator,
* then check deeper for matching
/* if we have an unknown OID and a workdir iterator, then check some
* circumstances that can accelerate things or need special handling
*/
else if (omode == nmode &&
git_oid_iszero(&nitem->oid) &&
new_iter->type == GIT_ITERATOR_WORKDIR)
else if (git_oid_iszero(&nitem->oid) &&
new_iter->type == GIT_ITERATOR_WORKDIR)
{
/* TODO: add check against index file st_mtime to avoid racy-git */
/* if they files look exactly alike, then we'll assume the same */
if (oitem->file_size == nitem->file_size &&
/* if the stat data looks exactly alike, then assume the same */
if (omode == nmode &&
oitem->file_size == nitem->file_size &&
(!(diff->diffcaps & GIT_DIFFCAPS_TRUST_CTIME) ||
(oitem->ctime.seconds == nitem->ctime.seconds)) &&
oitem->mtime.seconds == nitem->mtime.seconds &&
......@@ -554,16 +565,15 @@ static int maybe_modified(
status = GIT_DELTA_UNMODIFIED;
}
}
}
/* TODO: check git attributes so we will not have to read the file
* in if it is marked binary.
*/
else if (oid_for_workdir_item(diff->repo, nitem, &noid) < 0)
/* if we got here and decided that the files are modified, but we
* haven't calculated the OID of the new item, then calculate it now
*/
if (status == GIT_DELTA_MODIFIED && git_oid_iszero(&nitem->oid)) {
if (oid_for_workdir_item(diff->repo, nitem, &noid) < 0)
return -1;
else if (git_oid_cmp(&oitem->oid, &noid) == 0 &&
omode == nmode)
else if (omode == nmode && git_oid_equal(&oitem->oid, &noid))
status = GIT_DELTA_UNMODIFIED;
/* store calculated oid so we don't have to recalc later */
......
......@@ -7,6 +7,9 @@
#ifndef INCLUDE_diff_h__
#define INCLUDE_diff_h__
#include "git2/diff.h"
#include "git2/oid.h"
#include <stdio.h>
#include "vector.h"
#include "buffer.h"
......@@ -25,14 +28,17 @@ enum {
GIT_DIFFCAPS_USE_DEV = (1 << 4), /* use st_dev? */
};
#define MAX_DIFF_FILESIZE 0x20000000
typedef struct {
git_refcount rc;
git_diff_delta delta;
} git_diff_delta_refcounted;
struct git_diff_list {
git_refcount rc;
git_repository *repo;
git_diff_options opts;
git_vector pathspec;
git_vector deltas; /* vector of git_diff_file_delta */
git_vector deltas; /* vector of git_diff_delta_refcounted */
git_pool pool;
git_iterator_type_t old_src;
git_iterator_type_t new_src;
......@@ -42,27 +48,7 @@ struct git_diff_list {
extern void git_diff__cleanup_modes(
uint32_t diffcaps, uint32_t *omode, uint32_t *nmode);
/**
* Return the maximum possible number of files in the diff.
*
* NOTE: This number has to be treated as an upper bound on the number of
* files that have changed if the diff is with the working directory.
*
* Why?! For efficiency, we defer loading the file contents as long as
* possible, so if a file has been "touched" in the working directory and
* then reverted to the original content, it may get stored in the diff list
* as MODIFIED along with a flag that the status should be reconfirmed when
* it is actually loaded into memory. When that load happens, it could get
* flipped to UNMODIFIED. If unmodified files are being skipped, then the
* iterator will skip that file and this number may be too high.
*
* This behavior is true of `git_diff_foreach` as well, but the only
* implication there is that the `progress` value would not advance evenly.
*
* @param iterator The iterator object
* @return The maximum number of files to be iterated over
*/
int git_diff_iterator__max_files(git_diff_iterator *iterator);
extern void git_diff_list_addref(git_diff_list *diff);
#endif
/*
* Copyright (C) 2009-2012 the libgit2 contributors
*
* This file is part of libgit2, distributed under the GNU GPL v2 with
* a Linking Exception. For full terms see the included COPYING file.
*/
#ifndef INCLUDE_diff_output_h__
#define INCLUDE_diff_output_h__
#include "git2/blob.h"
#include "diff.h"
#include "map.h"
#include "xdiff/xdiff.h"
#define MAX_DIFF_FILESIZE 0x20000000
enum {
GIT_DIFF_PATCH_ALLOCATED = (1 << 0),
GIT_DIFF_PATCH_PREPPED = (1 << 1),
GIT_DIFF_PATCH_LOADED = (1 << 2),
GIT_DIFF_PATCH_DIFFABLE = (1 << 3),
GIT_DIFF_PATCH_DIFFED = (1 << 4),
};
/* context for performing diffs */
typedef struct {
git_repository *repo;
git_diff_list *diff;
git_diff_options *opts;
git_diff_file_fn file_cb;
git_diff_hunk_fn hunk_cb;
git_diff_data_fn data_cb;
void *cb_data;
int cb_error;
git_diff_range cb_range;
xdemitconf_t xdiff_config;
xpparam_t xdiff_params;
} diff_context;
/* cached information about a single span in a diff */
typedef struct diff_patch_line diff_patch_line;
struct diff_patch_line {
const char *ptr;
size_t len;
int lines, oldno, newno;
char origin;
};
/* cached information about a hunk in a diff */
typedef struct diff_patch_hunk diff_patch_hunk;
struct diff_patch_hunk {
git_diff_range range;
char header[128];
size_t header_len;
size_t line_start;
size_t line_count;
};
struct git_diff_patch {
git_refcount rc;
git_diff_list *diff; /* for refcount purposes, maybe NULL for blob diffs */
git_diff_delta *delta;
diff_context *ctxt; /* only valid while generating patch */
git_iterator_type_t old_src;
git_iterator_type_t new_src;
git_blob *old_blob;
git_blob *new_blob;
git_map old_data;
git_map new_data;
uint32_t flags;
diff_patch_hunk *hunks;
size_t hunks_asize, hunks_size;
diff_patch_line *lines;
size_t lines_asize, lines_size;
};
/* context for performing diff on a single delta */
typedef struct {
git_diff_patch *patch;
uint32_t prepped : 1;
uint32_t loaded : 1;
uint32_t diffable : 1;
uint32_t diffed : 1;
} diff_delta_context;
#endif
......@@ -1457,7 +1457,7 @@ static int submodule_wd_status(unsigned int *status, git_submodule *sm)
error = git_diff_index_to_tree(sm_repo, &opt, sm_head, &diff);
if (!error) {
if (git_diff_entrycount(diff, -1) > 0)
if (git_diff_num_deltas(diff) > 0)
*status |= GIT_SUBMODULE_STATUS_WD_INDEX_MODIFIED;
git_diff_list_free(diff);
......@@ -1474,12 +1474,13 @@ static int submodule_wd_status(unsigned int *status, git_submodule *sm)
error = git_diff_workdir_to_index(sm_repo, &opt, &diff);
if (!error) {
int untracked = git_diff_entrycount(diff, GIT_DELTA_UNTRACKED);
int untracked =
git_diff_num_deltas_of_type(diff, GIT_DELTA_UNTRACKED);
if (untracked > 0)
*status |= GIT_SUBMODULE_STATUS_WD_UNTRACKED;
if (git_diff_entrycount(diff, -1) - untracked > 0)
if ((git_diff_num_deltas(diff) - untracked) > 0)
*status |= GIT_SUBMODULE_STATUS_WD_WD_MODIFIED;
git_diff_list_free(diff);
......
......@@ -112,64 +112,65 @@ int diff_foreach_via_iterator(
git_diff_hunk_fn hunk_cb,
git_diff_data_fn line_cb)
{
int error;
git_diff_iterator *iter;
git_diff_delta *delta;
size_t d, num_d = git_diff_num_deltas(diff);
if ((error = git_diff_iterator_new(&iter, diff)) < 0)
return error;
for (d = 0; d < num_d; ++d) {
git_diff_patch *patch;
git_diff_delta *delta;
size_t h, num_h;
while (!(error = git_diff_iterator_next_file(&delta, iter))) {
git_diff_range *range;
const char *hdr;
size_t hdr_len;
float progress = git_diff_iterator_progress(iter);
cl_git_pass(git_diff_get_patch(&patch, &delta, diff, d));
cl_assert(delta && patch);
/* call file_cb for this file */
if (file_cb != NULL && file_cb(data, delta, progress) != 0)
if (file_cb != NULL && file_cb(data, delta, (float)d / num_d) != 0) {
git_diff_patch_free(patch);
goto abort;
}
if (!hunk_cb && !line_cb)
if (!hunk_cb && !line_cb) {
git_diff_patch_free(patch);
continue;
}
num_h = git_diff_patch_num_hunks(patch);
while (!(error = git_diff_iterator_next_hunk(
&range, &hdr, &hdr_len, iter))) {
char origin;
const char *line;
size_t line_len;
for (h = 0; h < num_h; h++) {
git_diff_range *range;
const char *hdr;
size_t hdr_len, l, num_l;
if (hunk_cb && hunk_cb(data, delta, range, hdr, hdr_len) != 0)
cl_git_pass(git_diff_patch_get_hunk(
&range, &hdr, &hdr_len, &num_l, patch, h));
if (hunk_cb && hunk_cb(data, delta, range, hdr, hdr_len) != 0) {
git_diff_patch_free(patch);
goto abort;
}
if (!line_cb)
continue;
for (l = 0; l < num_l; ++l) {
char origin;
const char *line;
size_t line_len;
int old_lineno, new_lineno;
while (!(error = git_diff_iterator_next_line(
&origin, &line, &line_len, iter))) {
cl_git_pass(git_diff_patch_get_line_in_hunk(
&origin, &line, &line_len, &old_lineno, &new_lineno,
patch, h, l));
if (line_cb(data, delta, range, origin, line, line_len) != 0)
if (line_cb(data, delta, range, origin, line, line_len) != 0) {
git_diff_patch_free(patch);
goto abort;
}
}
if (error && error != GIT_ITEROVER)
goto done;
}
if (error && error != GIT_ITEROVER)
goto done;
git_diff_patch_free(patch);
}
done:
git_diff_iterator_free(iter);
if (error == GIT_ITEROVER)
error = 0;
return error;
return 0;
abort:
git_diff_iterator_free(iter);
giterr_clear();
return GIT_EUSER;
}
......@@ -264,10 +264,12 @@ void test_diff_tree__larger_hunks(void)
git_tree *a, *b;
git_diff_options opts = {0};
git_diff_list *diff = NULL;
git_diff_iterator *iter = NULL;
size_t d, num_d, h, num_h, l, num_l, header_len, line_len;
git_diff_delta *delta;
diff_expects exp;
int error, num_files = 0;
git_diff_patch *patch;
git_diff_range *range;
const char *header, *line;
char origin;
g_repo = cl_git_sandbox_init("diff");
......@@ -277,61 +279,38 @@ void test_diff_tree__larger_hunks(void)
opts.context_lines = 1;
opts.interhunk_lines = 0;
memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_tree_to_tree(g_repo, &opts, a, b, &diff));
cl_git_pass(git_diff_iterator_new(&iter, diff));
/* this should be exact */
cl_assert(git_diff_iterator_progress(iter) == 0.0f);
num_d = git_diff_num_deltas(diff);
for (d = 0; d < num_d; ++d) {
cl_git_pass(git_diff_get_patch(&patch, &delta, diff, d));
cl_assert(patch && delta);
/* You wouldn't actually structure an iterator loop this way, but
* I have here for testing purposes of the return value
*/
while (!(error = git_diff_iterator_next_file(&delta, iter))) {
git_diff_range *range;
const char *header;
size_t header_len;
int actual_hunks = 0, num_hunks;
float expected_progress;
num_files++;
expected_progress = (float)num_files / 2.0f;
cl_assert(expected_progress == git_diff_iterator_progress(iter));
num_hunks = git_diff_iterator_num_hunks_in_file(iter);
while (!(error = git_diff_iterator_next_hunk(
&range, &header, &header_len, iter)))
{
int actual_lines = 0;
int num_lines = git_diff_iterator_num_lines_in_hunk(iter);
char origin;
const char *line;
size_t line_len;
while (!(error = git_diff_iterator_next_line(
&origin, &line, &line_len, iter)))
{
actual_lines++;
}
num_h = git_diff_patch_num_hunks(patch);
for (h = 0; h < num_h; h++) {
cl_git_pass(git_diff_patch_get_hunk(
&range, &header, &header_len, &num_l, patch, h));
cl_assert_equal_i(GIT_ITEROVER, error);
cl_assert_equal_i(actual_lines, num_lines);
for (l = 0; l < num_l; ++l) {
cl_git_pass(git_diff_patch_get_line_in_hunk(
&origin, &line, &line_len, NULL, NULL, patch, h, l));
cl_assert(line);
}
actual_hunks++;
cl_git_fail(git_diff_patch_get_line_in_hunk(
&origin, &line, &line_len, NULL, NULL, patch, h, num_l));
}
cl_assert_equal_i(GIT_ITEROVER, error);
cl_assert_equal_i(actual_hunks, num_hunks);
cl_git_fail(git_diff_patch_get_hunk(
&range, &header, &header_len, &num_l, patch, num_h));
git_diff_patch_free(patch);
}
cl_assert_equal_i(GIT_ITEROVER, error);
cl_assert_equal_i(2, num_files);
cl_assert(git_diff_iterator_progress(iter) == 1.0f);
cl_git_fail(git_diff_get_patch(&patch, &delta, diff, num_d));
cl_assert_equal_i(2, num_d);
git_diff_iterator_free(iter);
git_diff_list_free(diff);
diff = NULL;
......
......@@ -678,7 +678,7 @@ void test_diff_workdir__larger_hunks(void)
const char *b_commit = "7a9e0b02e63179929fed24f0a3e0f19168114d10";
git_tree *a, *b;
git_diff_options opts = {0};
int i, error;
size_t i, d, num_d, h, num_h, l, num_l, header_len, line_len;
g_repo = cl_git_sandbox_init("diff");
......@@ -690,9 +690,10 @@ void test_diff_workdir__larger_hunks(void)
for (i = 0; i <= 2; ++i) {
git_diff_list *diff = NULL;
git_diff_iterator *iter = NULL;
git_diff_delta *delta;
int num_files = 0;
git_diff_patch *patch;
git_diff_range *range;
const char *header, *line;
char origin;
/* okay, this is a bit silly, but oh well */
switch (i) {
......@@ -707,54 +708,36 @@ void test_diff_workdir__larger_hunks(void)
break;
}
cl_git_pass(git_diff_iterator_new(&iter, diff));
num_d = git_diff_num_deltas(diff);
cl_assert_equal_i(2, (int)num_d);
cl_assert(git_diff_iterator_progress(iter) == 0.0f);
for (d = 0; d < num_d; ++d) {
cl_git_pass(git_diff_get_patch(&patch, NULL, diff, d));
cl_assert(patch);
while (!(error = git_diff_iterator_next_file(&delta, iter))) {
git_diff_range *range;
const char *header;
size_t header_len;
int actual_hunks = 0, num_hunks;
float expected_progress;
num_h = git_diff_patch_num_hunks(patch);
for (h = 0; h < num_h; h++) {
cl_git_pass(git_diff_patch_get_hunk(
&range, &header, &header_len, &num_l, patch, h));
num_files++;
expected_progress = (float)num_files / 2.0f;
cl_assert(expected_progress == git_diff_iterator_progress(iter));
num_hunks = git_diff_iterator_num_hunks_in_file(iter);
while (!(error = git_diff_iterator_next_hunk(
&range, &header, &header_len, iter)))
{
int actual_lines = 0;
int num_lines = git_diff_iterator_num_lines_in_hunk(iter);
char origin;
const char *line;
size_t line_len;
while (!(error = git_diff_iterator_next_line(
&origin, &line, &line_len, iter)))
{
actual_lines++;
for (l = 0; l < num_l; ++l) {
cl_git_pass(git_diff_patch_get_line_in_hunk(
&origin, &line, &line_len, NULL, NULL, patch, h, l));
cl_assert(line);
}
cl_assert_equal_i(GIT_ITEROVER, error);
cl_assert_equal_i(actual_lines, num_lines);
actual_hunks++;
/* confirm fail after the last item */
cl_git_fail(git_diff_patch_get_line_in_hunk(
&origin, &line, &line_len, NULL, NULL, patch, h, num_l));
}
cl_assert_equal_i(GIT_ITEROVER, error);
cl_assert_equal_i(actual_hunks, num_hunks);
}
/* confirm fail after the last item */
cl_git_fail(git_diff_patch_get_hunk(
&range, &header, &header_len, &num_l, patch, num_h));
cl_assert_equal_i(GIT_ITEROVER, error);
cl_assert_equal_i(2, num_files);
cl_assert(git_diff_iterator_progress(iter) == 1.0f);
git_diff_patch_free(patch);
}
git_diff_iterator_free(iter);
git_diff_list_free(diff);
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment