Commit 55cbd05b by Russell Belfer

Some diff refactorings to help code reuse

There are some diff functions that are useful in a rewritten
checkout and this lays some groundwork for that.  This contains
three main things:

1. Share the function diff uses to calculate the OID for a file
   in the working directory (now named `git_diff__oid_for_file`
2. Add a `git_diff__paired_foreach` function to iterator over
   two diff lists concurrently.  Convert status to use it.
3. Move all the string/prefix/index entry comparisons into
   function pointers inside the `git_diff_list` object so they
   can be switched between case sensitive and insensitive
   versions.  This makes them easier to reuse in various
   functions without replicating logic.  As part of this, move
   a couple of index functions out of diff.c and into index.c.
parent 2e3d4b96
......@@ -7,6 +7,7 @@
#ifndef INCLUDE_attr_file_h__
#define INCLUDE_attr_file_h__
#include "git2/oid.h"
#include "git2/attr.h"
#include "vector.h"
#include "pool.h"
......
......@@ -327,24 +327,39 @@ void git_diff_list_addref(git_diff_list *diff)
GIT_REFCOUNT_INC(diff);
}
static int oid_for_workdir_item(
int git_diff__oid_for_file(
git_repository *repo,
const git_index_entry *item,
const char *path,
uint16_t mode,
git_off_t size,
git_oid *oid)
{
int result = 0;
git_buf full_path = GIT_BUF_INIT;
if (git_buf_joinpath(
&full_path, git_repository_workdir(repo), item->path) < 0)
&full_path, git_repository_workdir(repo), path) < 0)
return -1;
if (!mode) {
struct stat st;
if (p_stat(path, &st) < 0) {
giterr_set(GITERR_OS, "Could not stat '%s'", path);
result = -1;
goto cleanup;
}
mode = st.st_mode;
size = st.st_size;
}
/* calculate OID for file if possible */
if (S_ISGITLINK(item->mode)) {
if (S_ISGITLINK(mode)) {
git_submodule *sm;
const git_oid *sm_oid;
if (!git_submodule_lookup(&sm, repo, item->path) &&
if (!git_submodule_lookup(&sm, repo, path) &&
(sm_oid = git_submodule_wd_oid(sm)) != NULL)
git_oid_cpy(oid, sm_oid);
else {
......@@ -354,23 +369,22 @@ static int oid_for_workdir_item(
giterr_clear();
memset(oid, 0, sizeof(*oid));
}
} else if (S_ISLNK(item->mode))
} else if (S_ISLNK(mode)) {
result = git_odb__hashlink(oid, full_path.ptr);
else if (!git__is_sizet(item->file_size)) {
giterr_set(GITERR_OS, "File size overflow for 32-bit systems");
} else if (!git__is_sizet(size)) {
giterr_set(GITERR_OS, "File size overflow (for 32-bits) on '%s'", path);
result = -1;
} else {
git_vector filters = GIT_VECTOR_INIT;
result = git_filters_load(
&filters, repo, item->path, GIT_FILTER_TO_ODB);
result = git_filters_load(&filters, repo, path, GIT_FILTER_TO_ODB);
if (result >= 0) {
int fd = git_futils_open_ro(full_path.ptr);
if (fd < 0)
result = fd;
else {
result = git_odb__hashfd_filtered(
oid, fd, (size_t)item->file_size, GIT_OBJ_BLOB, &filters);
oid, fd, (size_t)size, GIT_OBJ_BLOB, &filters);
p_close(fd);
}
}
......@@ -378,8 +392,8 @@ static int oid_for_workdir_item(
git_filters_free(&filters);
}
cleanup:
git_buf_free(&full_path);
return result;
}
......@@ -439,8 +453,7 @@ static int maybe_modified(
}
/* if oids and modes match, then file is unmodified */
else if (git_oid_cmp(&oitem->oid, &nitem->oid) == 0 &&
omode == nmode)
else if (git_oid_equal(&oitem->oid, &nitem->oid) && omode == nmode)
status = GIT_DELTA_UNMODIFIED;
/* if we have an unknown OID and a workdir iterator, then check some
......@@ -493,12 +506,14 @@ static int maybe_modified(
/* if we got here and decided that the files are modified, but we
* haven't calculated the OID of the new item, then calculate it now
*/
if (status != GIT_DELTA_UNMODIFIED && git_oid_iszero(&nitem->oid)) {
if (oid_for_workdir_item(diff->repo, nitem, &noid) < 0)
if (status != GIT_DELTA_UNMODIFIED &&
git_oid_iszero(&nitem->oid) && !use_noid)
{
if (git_diff__oid_for_file(diff->repo,
nitem->path, nitem->mode, nitem->file_size, &noid) < 0)
return -1;
else if (omode == nmode && git_oid_equal(&oitem->oid, &noid))
if (omode == nmode && git_oid_equal(&oitem->oid, &noid))
status = GIT_DELTA_UNMODIFIED;
/* store calculated oid so we don't have to recalc later */
use_noid = &noid;
}
......@@ -507,31 +522,14 @@ static int maybe_modified(
diff, status, oitem, omode, nitem, nmode, use_noid);
}
static int git_index_entry_cmp_case(const void *a, const void *b)
{
const git_index_entry *entry_a = a;
const git_index_entry *entry_b = b;
return strcmp(entry_a->path, entry_b->path);
}
static int git_index_entry_cmp_icase(const void *a, const void *b)
{
const git_index_entry *entry_a = a;
const git_index_entry *entry_b = b;
return strcasecmp(entry_a->path, entry_b->path);
}
static bool entry_is_prefixed(
git_diff_list *diff,
const git_index_entry *item,
git_iterator *prefix_iterator,
const git_index_entry *prefix_item)
{
size_t pathlen;
if (!prefix_item ||
ITERATOR_PREFIXCMP(*prefix_iterator, prefix_item->path, item->path))
if (!prefix_item || diff->prefixcmp(prefix_item->path, item->path))
return false;
pathlen = strlen(item->path);
......@@ -541,6 +539,35 @@ static bool entry_is_prefixed(
prefix_item->path[pathlen] == '/');
}
static int diff_list_init_from_iterators(
git_diff_list *diff,
git_iterator *old_iter,
git_iterator *new_iter)
{
diff->old_src = old_iter->type;
diff->new_src = new_iter->type;
/* Use case-insensitive compare if either iterator has
* the ignore_case bit set */
if (!old_iter->ignore_case && !new_iter->ignore_case) {
diff->opts.flags &= ~GIT_DIFF_DELTAS_ARE_ICASE;
diff->strcmp = strcmp;
diff->strncmp = strncmp;
diff->prefixcmp = git__prefixcmp;
diff->entrycmp = git_index_entry__cmp;
} else {
diff->opts.flags |= GIT_DIFF_DELTAS_ARE_ICASE;
diff->strcmp = strcasecmp;
diff->strncmp = strncasecmp;
diff->prefixcmp = git__prefixcmp_icase;
diff->entrycmp = git_index_entry__cmp_icase;
}
return 0;
}
static int diff_from_iterators(
git_repository *repo,
const git_diff_options *opts, /**< can be NULL for defaults */
......@@ -548,37 +575,31 @@ static int diff_from_iterators(
git_iterator *new_iter,
git_diff_list **diff_ptr)
{
int error = 0;
const git_index_entry *oitem, *nitem;
git_buf ignore_prefix = GIT_BUF_INIT;
git_diff_list *diff = git_diff_list_alloc(repo, opts);
git_vector_cmp entry_compare;
if (!diff)
goto fail;
diff->old_src = old_iter->type;
diff->new_src = new_iter->type;
*diff_ptr = NULL;
/* Use case-insensitive compare if either iterator has
* the ignore_case bit set */
if (!old_iter->ignore_case && !new_iter->ignore_case) {
entry_compare = git_index_entry_cmp_case;
diff->opts.flags &= ~GIT_DIFF_DELTAS_ARE_ICASE;
} else {
entry_compare = git_index_entry_cmp_icase;
diff->opts.flags |= GIT_DIFF_DELTAS_ARE_ICASE;
if (!diff ||
diff_list_init_from_iterators(diff, old_iter, new_iter) < 0)
goto fail;
if (diff->opts.flags & GIT_DIFF_DELTAS_ARE_ICASE) {
/* If one of the iterators doesn't have ignore_case set,
* then that's unfortunate because we'll have to spool
* its data, sort it icase, and then use that for our
* merge join to the other iterator that is icase sorted */
if (!old_iter->ignore_case) {
if (git_iterator_spoolandsort(&old_iter, old_iter, git_index_entry_cmp_icase, true) < 0)
goto fail;
} else if (!new_iter->ignore_case) {
if (git_iterator_spoolandsort(&new_iter, new_iter, git_index_entry_cmp_icase, true) < 0)
goto fail;
}
if (!old_iter->ignore_case &&
git_iterator_spoolandsort(
&old_iter, old_iter, diff->entrycmp, true) < 0)
goto fail;
if (!new_iter->ignore_case &&
git_iterator_spoolandsort(
&new_iter, new_iter, diff->entrycmp, true) < 0)
goto fail;
}
if (git_iterator_current(old_iter, &oitem) < 0 ||
......@@ -589,7 +610,7 @@ static int diff_from_iterators(
while (oitem || nitem) {
/* create DELETED records for old items not matched in new */
if (oitem && (!nitem || entry_compare(oitem, nitem) < 0)) {
if (oitem && (!nitem || diff->entrycmp(oitem, nitem) < 0)) {
if (diff_delta__from_one(diff, GIT_DELTA_DELETED, oitem) < 0)
goto fail;
......@@ -597,7 +618,7 @@ static int diff_from_iterators(
* instead of just generating a DELETE record
*/
if ((diff->opts.flags & GIT_DIFF_INCLUDE_TYPECHANGE_TREES) != 0 &&
entry_is_prefixed(oitem, new_iter, nitem))
entry_is_prefixed(diff, oitem, nitem))
{
/* this entry has become a tree! convert to TYPECHANGE */
git_diff_delta *last = diff_delta__last_for_item(diff, oitem);
......@@ -614,13 +635,12 @@ static int diff_from_iterators(
/* create ADDED, TRACKED, or IGNORED records for new items not
* matched in old (and/or descend into directories as needed)
*/
else if (nitem && (!oitem || entry_compare(oitem, nitem) > 0)) {
else if (nitem && (!oitem || diff->entrycmp(oitem, nitem) > 0)) {
git_delta_t delta_type = GIT_DELTA_UNTRACKED;
/* check if contained in ignored parent directory */
if (git_buf_len(&ignore_prefix) &&
ITERATOR_PREFIXCMP(*old_iter, nitem->path,
git_buf_cstr(&ignore_prefix)) == 0)
diff->prefixcmp(nitem->path, git_buf_cstr(&ignore_prefix)) == 0)
delta_type = GIT_DELTA_IGNORED;
if (S_ISDIR(nitem->mode)) {
......@@ -629,7 +649,7 @@ static int diff_from_iterators(
* directories and it is not under an ignored directory.
*/
bool contains_tracked =
entry_is_prefixed(nitem, old_iter, oitem);
entry_is_prefixed(diff, nitem, oitem);
bool recurse_untracked =
(delta_type == GIT_DELTA_UNTRACKED &&
(diff->opts.flags & GIT_DIFF_RECURSE_UNTRACKED_DIRS) != 0);
......@@ -693,7 +713,7 @@ static int diff_from_iterators(
*/
if (delta_type != GIT_DELTA_IGNORED &&
(diff->opts.flags & GIT_DIFF_INCLUDE_TYPECHANGE_TREES) != 0 &&
entry_is_prefixed(nitem, old_iter, oitem))
entry_is_prefixed(diff, nitem, oitem))
{
/* this entry was a tree! convert to TYPECHANGE */
git_diff_delta *last = diff_delta__last_for_item(diff, oitem);
......@@ -711,7 +731,7 @@ static int diff_from_iterators(
* (or ADDED and DELETED pair if type changed)
*/
else {
assert(oitem && nitem && entry_compare(oitem, nitem) == 0);
assert(oitem && nitem && diff->entrycmp(oitem, nitem) == 0);
if (maybe_modified(old_iter, oitem, new_iter, nitem, diff) < 0 ||
git_iterator_advance(old_iter, &oitem) < 0 ||
......@@ -720,21 +740,19 @@ static int diff_from_iterators(
}
}
git_iterator_free(old_iter);
git_iterator_free(new_iter);
git_buf_free(&ignore_prefix);
*diff_ptr = diff;
return 0;
fail:
if (!*diff_ptr) {
git_diff_list_free(diff);
error = -1;
}
git_iterator_free(old_iter);
git_iterator_free(new_iter);
git_buf_free(&ignore_prefix);
git_diff_list_free(diff);
*diff_ptr = NULL;
return -1;
return error;
}
......
......@@ -41,6 +41,11 @@ struct git_diff_list {
git_iterator_type_t old_src;
git_iterator_type_t new_src;
uint32_t diffcaps;
int (*strcmp)(const char *, const char *);
int (*strncmp)(const char *, const char *, size_t);
int (*prefixcmp)(const char *str, const char *pfx);
int (*entrycmp)(const void *a, const void *b);
};
extern void git_diff__cleanup_modes(
......@@ -53,5 +58,8 @@ extern int git_diff_delta__cmp(const void *a, const void *b);
extern bool git_diff_delta__should_skip(
const git_diff_options *opts, const git_diff_delta *delta);
extern int git_diff__oid_for_file(
git_repository *, const char *, uint16_t, git_off_t, git_oid *);
#endif
......@@ -1573,3 +1573,58 @@ int git_diff_patch_to_str(
return error;
}
int git_diff__paired_foreach(
git_diff_list *idx2head,
git_diff_list *wd2idx,
int (*cb)(void *cbref, git_diff_delta *i2h, git_diff_delta *w2i),
void *cbref)
{
int cmp;
git_diff_delta *i2h, *w2i;
size_t i, j, i_max, j_max;
bool icase = false;
i_max = idx2head ? idx2head->deltas.length : 0;
j_max = wd2idx ? wd2idx->deltas.length : 0;
if (idx2head && wd2idx &&
(0 != (idx2head->opts.flags & GIT_DIFF_DELTAS_ARE_ICASE) ||
0 != (wd2idx->opts.flags & GIT_DIFF_DELTAS_ARE_ICASE)))
{
/* Then use the ignore-case sorter... */
icase = true;
/* and assert that both are ignore-case sorted. If this function
* ever needs to support merge joining result sets that are not sorted
* by the same function, then it will need to be extended to do a spool
* and sort on one of the results before merge joining */
assert(0 != (idx2head->opts.flags & GIT_DIFF_DELTAS_ARE_ICASE) &&
0 != (wd2idx->opts.flags & GIT_DIFF_DELTAS_ARE_ICASE));
}
for (i = 0, j = 0; i < i_max || j < j_max; ) {
i2h = idx2head ? GIT_VECTOR_GET(&idx2head->deltas,i) : NULL;
w2i = wd2idx ? GIT_VECTOR_GET(&wd2idx->deltas,j) : NULL;
cmp = !w2i ? -1 : !i2h ? 1 :
STRCMP_CASESELECT(icase, i2h->old_file.path, w2i->old_file.path);
if (cmp < 0) {
if (cb(cbref, i2h, NULL))
return GIT_EUSER;
i++;
} else if (cmp > 0) {
if (cb(cbref, NULL, w2i))
return GIT_EUSER;
j++;
} else {
if (cb(cbref, i2h, w2i))
return GIT_EUSER;
i++; j++;
}
}
return 0;
}
......@@ -83,4 +83,10 @@ typedef struct {
uint32_t diffed : 1;
} diff_delta_context;
extern int git_diff__paired_foreach(
git_diff_list *idx2head,
git_diff_list *wd2idx,
int (*cb)(void *cbref, git_diff_delta *i2h, git_diff_delta *w2i),
void *cbref);
#endif
......@@ -516,7 +516,7 @@ git_index_entry *git_index_get_bypath(git_index *index, const char *path, int st
return git_index_get_byindex(index, pos);
}
void git_index__init_entry_from_stat(struct stat *st, git_index_entry *entry)
void git_index_entry__init_from_stat(git_index_entry *entry, struct stat *st)
{
entry->ctime.seconds = (git_time_t)st->st_ctime;
entry->mtime.seconds = (git_time_t)st->st_mtime;
......@@ -530,6 +530,22 @@ void git_index__init_entry_from_stat(struct stat *st, git_index_entry *entry)
entry->file_size = st->st_size;
}
int git_index_entry__cmp(const void *a, const void *b)
{
const git_index_entry *entry_a = a;
const git_index_entry *entry_b = b;
return strcmp(entry_a->path, entry_b->path);
}
int git_index_entry__cmp_icase(const void *a, const void *b)
{
const git_index_entry *entry_a = a;
const git_index_entry *entry_b = b;
return strcasecmp(entry_a->path, entry_b->path);
}
static int index_entry_init(git_index_entry **entry_out, git_index *index, const char *rel_path)
{
git_index_entry *entry = NULL;
......@@ -568,7 +584,7 @@ static int index_entry_init(git_index_entry **entry_out, git_index *index, const
entry = git__calloc(1, sizeof(git_index_entry));
GITERR_CHECK_ALLOC(entry);
git_index__init_entry_from_stat(&st, entry);
git_index_entry__init_from_stat(entry, &st);
entry->oid = oid;
entry->path = git__strdup(rel_path);
......
......@@ -41,8 +41,11 @@ struct git_index {
git_vector_cmp reuc_search;
};
extern void git_index__init_entry_from_stat(struct stat *st, git_index_entry *entry);
extern void git_index_entry__init_from_stat(git_index_entry *entry, struct stat *st);
extern unsigned int git_index__prefix_position(git_index *index, const char *path);
extern int git_index_entry__cmp(const void *a, const void *b);
extern int git_index_entry__cmp_icase(const void *a, const void *b);
#endif
......@@ -648,7 +648,7 @@ static int workdir_iterator__update_entry(workdir_iterator *wi)
wi->is_ignored = -1;
git_index__init_entry_from_stat(&ps->st, &wi->entry);
git_index_entry__init_from_stat(&wi->entry, &ps->st);
/* need different mode here to keep directories during iteration */
wi->entry.mode = git_futils_canonical_mode(ps->st.st_mode);
......
......@@ -17,6 +17,7 @@
#include "git2/diff.h"
#include "diff.h"
#include "diff_output.h"
static unsigned int index_delta2status(git_delta_t index_status)
{
......@@ -76,21 +77,43 @@ static unsigned int workdir_delta2status(git_delta_t workdir_status)
return st;
}
typedef struct {
int (*cb)(const char *, unsigned int, void *);
void *cbdata;
} status_user_callback;
static int status_invoke_cb(
void *cbref, git_diff_delta *i2h, git_diff_delta *w2i)
{
status_user_callback *usercb = cbref;
const char *path = NULL;
unsigned int status = 0;
if (w2i) {
path = w2i->old_file.path;
status |= workdir_delta2status(w2i->status);
}
if (i2h) {
path = i2h->old_file.path;
status |= index_delta2status(i2h->status);
}
return usercb->cb(path, status, usercb->cbdata);
}
int git_status_foreach_ext(
git_repository *repo,
const git_status_options *opts,
int (*cb)(const char *, unsigned int, void *),
void *cbdata)
{
int err = 0, cmp;
int err = 0;
git_diff_options diffopt;
git_diff_list *idx2head = NULL, *wd2idx = NULL;
git_tree *head = NULL;
git_status_show_t show =
opts ? opts->show : GIT_STATUS_SHOW_INDEX_AND_WORKDIR;
git_diff_delta *i2h, *w2i;
size_t i, j, i_max, j_max;
bool ignore_case = false;
status_user_callback usercb;
assert(show <= GIT_STATUS_SHOW_INDEX_THEN_WORKDIR);
......@@ -126,55 +149,19 @@ int git_status_foreach_ext(
(err = git_diff_workdir_to_index(repo, &diffopt, &wd2idx)) < 0)
goto cleanup;
usercb.cb = cb;
usercb.cbdata = cbdata;
if (show == GIT_STATUS_SHOW_INDEX_THEN_WORKDIR) {
for (i = 0; !err && i < idx2head->deltas.length; i++) {
i2h = GIT_VECTOR_GET(&idx2head->deltas, i);
if (cb(i2h->old_file.path, index_delta2status(i2h->status), cbdata))
err = GIT_EUSER;
}
if ((err = git_diff__paired_foreach(
idx2head, NULL, status_invoke_cb, &usercb)) < 0)
goto cleanup;
git_diff_list_free(idx2head);
idx2head = NULL;
}
i_max = idx2head ? idx2head->deltas.length : 0;
j_max = wd2idx ? wd2idx->deltas.length : 0;
if (idx2head && wd2idx &&
(0 != (idx2head->opts.flags & GIT_DIFF_DELTAS_ARE_ICASE) ||
0 != (wd2idx->opts.flags & GIT_DIFF_DELTAS_ARE_ICASE)))
{
/* Then use the ignore-case sorter... */
ignore_case = true;
/* and assert that both are ignore-case sorted. If this function
* ever needs to support merge joining result sets that are not sorted
* by the same function, then it will need to be extended to do a spool
* and sort on one of the results before merge joining */
assert(0 != (idx2head->opts.flags & GIT_DIFF_DELTAS_ARE_ICASE) &&
0 != (wd2idx->opts.flags & GIT_DIFF_DELTAS_ARE_ICASE));
}
for (i = 0, j = 0; !err && (i < i_max || j < j_max); ) {
i2h = idx2head ? GIT_VECTOR_GET(&idx2head->deltas,i) : NULL;
w2i = wd2idx ? GIT_VECTOR_GET(&wd2idx->deltas,j) : NULL;
cmp = !w2i ? -1 : !i2h ? 1 : STRCMP_CASESELECT(ignore_case, i2h->old_file.path, w2i->old_file.path);
if (cmp < 0) {
if (cb(i2h->old_file.path, index_delta2status(i2h->status), cbdata))
err = GIT_EUSER;
i++;
} else if (cmp > 0) {
if (cb(w2i->old_file.path, workdir_delta2status(w2i->status), cbdata))
err = GIT_EUSER;
j++;
} else {
if (cb(i2h->old_file.path, index_delta2status(i2h->status) |
workdir_delta2status(w2i->status), cbdata))
err = GIT_EUSER;
i++; j++;
}
}
err = git_diff__paired_foreach(idx2head, wd2idx, status_invoke_cb, &usercb);
cleanup:
git_tree_free(head);
......
......@@ -371,7 +371,7 @@ int git_submodule_add_to_index(git_submodule *sm, int write_index)
memset(&entry, 0, sizeof(entry));
entry.path = sm->path;
git_index__init_entry_from_stat(&st, &entry);
git_index_entry__init_from_stat(&entry, &st);
/* calling git_submodule_open will have set sm->wd_oid if possible */
if ((sm->flags & GIT_SUBMODULE_STATUS__WD_OID_VALID) == 0) {
......
......@@ -81,6 +81,11 @@ extern int git__prefixcmp(const char *str, const char *prefix);
extern int git__prefixcmp_icase(const char *str, const char *prefix);
extern int git__suffixcmp(const char *str, const char *suffix);
GIT_INLINE(int) git__signum(int val)
{
return ((val > 0) - (val < 0));
}
extern int git__strtol32(int32_t *n, const char *buff, const char **end_buf, int base);
extern int git__strtol64(int64_t *n, const char *buff, const char **end_buf, int base);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment