Commit bf9a2e98 by Vicent Martí

Merge pull request #296 from kiryl/index-optimization

Index optimization
parents c68dee2a 245adf4f
...@@ -173,6 +173,13 @@ GIT_EXTERN(int) git_index_write(git_index *index); ...@@ -173,6 +173,13 @@ GIT_EXTERN(int) git_index_write(git_index *index);
GIT_EXTERN(int) git_index_find(git_index *index, const char *path); GIT_EXTERN(int) git_index_find(git_index *index, const char *path);
/** /**
* Remove all entries with equal path except last added
*
* @param index an existing index object
*/
GIT_EXTERN(void) git_index_uniq(git_index *index);
/**
* Add or update an index entry from a file in disk * Add or update an index entry from a file in disk
* *
* The file `path` must be relative to the repository's * The file `path` must be relative to the repository's
......
...@@ -348,6 +348,7 @@ static int index_insert(git_index *index, const git_index_entry *source_entry, i ...@@ -348,6 +348,7 @@ static int index_insert(git_index *index, const git_index_entry *source_entry, i
git_index_entry *entry; git_index_entry *entry;
size_t path_length; size_t path_length;
int position; int position;
git_index_entry **entry_array;
assert(index && source_entry); assert(index && source_entry);
...@@ -375,27 +376,28 @@ static int index_insert(git_index *index, const git_index_entry *source_entry, i ...@@ -375,27 +376,28 @@ static int index_insert(git_index *index, const git_index_entry *source_entry, i
else else
entry->flags |= GIT_IDXENTRY_NAMEMASK;; entry->flags |= GIT_IDXENTRY_NAMEMASK;;
/*
* replacing is not requested: just insert entry at the end;
* the index is no longer sorted
*/
if (!replace)
return git_vector_insert(&index->entries, entry);
/* look if an entry with this path already exists */ /* look if an entry with this path already exists */
position = git_index_find(index, source_entry->path); position = git_index_find(index, source_entry->path);
/* if no entry exists and replace is not set, /*
* add the entry at the end; * if no entry exists add the entry at the end;
* the index is no longer sorted */ * the index is no longer sorted
if (!replace || position == GIT_ENOTFOUND) { */
if (git_vector_insert(&index->entries, entry) < GIT_SUCCESS) if (position == GIT_ENOTFOUND)
return GIT_ENOMEM; return git_vector_insert(&index->entries, entry);
/* if a previous entry exists and replace is set,
* replace it */
} else {
git_index_entry **entry_array = (git_index_entry **)index->entries.contents;
/* exists, replace it */
entry_array = (git_index_entry **) index->entries.contents;
free((char *)entry_array[position]->path); free((char *)entry_array[position]->path);
free(entry_array[position]); free(entry_array[position]);
entry_array[position] = entry; entry_array[position] = entry;
}
return GIT_SUCCESS; return GIT_SUCCESS;
} }
...@@ -485,6 +487,11 @@ int git_index_find(git_index *index, const char *path) ...@@ -485,6 +487,11 @@ int git_index_find(git_index *index, const char *path)
return git_vector_bsearch2(&index->entries, index_srch, path); return git_vector_bsearch2(&index->entries, index_srch, path);
} }
void git_index_uniq(git_index *index)
{
git_vector_uniq(&index->entries);
}
const git_index_entry_unmerged *git_index_get_unmerged_bypath(git_index *index, const char *path) const git_index_entry_unmerged *git_index_get_unmerged_bypath(git_index *index, const char *path)
{ {
int pos; int pos;
......
...@@ -330,3 +330,67 @@ uint32_t git__hash(const void *key, int len, uint32_t seed) ...@@ -330,3 +330,67 @@ uint32_t git__hash(const void *key, int len, uint32_t seed)
return h1; return h1;
} }
#endif #endif
/*
* A merge sort implementation, simplified from the qsort implementation
* by Mike Haertel, which is a part of the GNU C Library.
*/
static void msort_with_tmp(void *b, size_t n, size_t s,
int (*cmp)(const void *, const void *),
char *t)
{
char *tmp;
char *b1, *b2;
size_t n1, n2;
if (n <= 1)
return;
n1 = n / 2;
n2 = n - n1;
b1 = b;
b2 = (char *)b + (n1 * s);
msort_with_tmp(b1, n1, s, cmp, t);
msort_with_tmp(b2, n2, s, cmp, t);
tmp = t;
while (n1 > 0 && n2 > 0) {
if (cmp(b1, b2) <= 0) {
memcpy(tmp, b1, s);
tmp += s;
b1 += s;
--n1;
} else {
memcpy(tmp, b2, s);
tmp += s;
b2 += s;
--n2;
}
}
if (n1 > 0)
memcpy(tmp, b1, n1 * s);
memcpy(b, t, (n - n2) * s);
}
int git__msort(void *b, size_t n, size_t s,
int (*cmp)(const void *, const void *))
{
const size_t size = n * s;
char buf[1024];
if (size < sizeof(buf)) {
/* The temporary array fits on the small on-stack buffer. */
msort_with_tmp(b, n, s, cmp, buf);
} else {
/* It's somewhat large, so malloc it. */
char *tmp = git__malloc(size);
if (tmp == NULL)
return GIT_ENOMEM;
msort_with_tmp(b, n, s, cmp, tmp);
free(tmp);
}
return GIT_SUCCESS;
}
...@@ -118,4 +118,7 @@ extern int git__fnmatch(const char *pattern, const char *name, int flags); ...@@ -118,4 +118,7 @@ extern int git__fnmatch(const char *pattern, const char *name, int flags);
} \ } \
} while (0) } while (0)
extern int git__msort(void *b, size_t n, size_t s,
int (*cmp)(const void *, const void *));
#endif /* INCLUDE_util_h__ */ #endif /* INCLUDE_util_h__ */
...@@ -94,7 +94,7 @@ void git_vector_sort(git_vector *v) ...@@ -94,7 +94,7 @@ void git_vector_sort(git_vector *v)
if (v->sorted || v->_cmp == NULL) if (v->sorted || v->_cmp == NULL)
return; return;
qsort(v->contents, v->length, sizeof(void *), v->_cmp); git__msort(v->contents, v->length, sizeof(void *), v->_cmp);
v->sorted = 1; v->sorted = 1;
} }
...@@ -162,6 +162,26 @@ int git_vector_remove(git_vector *v, unsigned int idx) ...@@ -162,6 +162,26 @@ int git_vector_remove(git_vector *v, unsigned int idx)
return GIT_SUCCESS; return GIT_SUCCESS;
} }
void git_vector_uniq(git_vector *v)
{
git_vector_cmp cmp;
unsigned int i, j;
if (v->length <= 1)
return;
git_vector_sort(v);
cmp = v->_cmp ? v->_cmp : strict_comparison;
for (i = 0, j = 1 ; j < v->length; ++j)
if (!cmp(v->contents + i, v->contents + j))
v->contents[i] = v->contents[j];
else
v->contents[++i] = v->contents[j];
v->length -= j - i - 1;
}
void git_vector_clear(git_vector *v) void git_vector_clear(git_vector *v)
{ {
assert(v); assert(v);
......
...@@ -32,5 +32,5 @@ GIT_INLINE(void *) git_vector_get(git_vector *v, unsigned int position) ...@@ -32,5 +32,5 @@ GIT_INLINE(void *) git_vector_get(git_vector *v, unsigned int position)
int git_vector_insert(git_vector *v, void *element); int git_vector_insert(git_vector *v, void *element);
int git_vector_remove(git_vector *v, unsigned int idx); int git_vector_remove(git_vector *v, unsigned int idx);
void git_vector_uniq(git_vector *v);
#endif #endif
...@@ -73,6 +73,28 @@ BEGIN_TEST(vector1, "don't read past array bounds on remove()") ...@@ -73,6 +73,28 @@ BEGIN_TEST(vector1, "don't read past array bounds on remove()")
git_vector_free(&x); git_vector_free(&x);
END_TEST END_TEST
static int test_cmp(const void *a, const void *b)
{
int n1 = *(int *)a;
int n2 = *(int *)b;
return n1 - n2;
}
BEGIN_TEST(vector2, "remove duplicates")
git_vector x;
must_pass(git_vector_init(&x, 5, test_cmp));
must_pass(git_vector_insert(&x, (void *) 0xdeadbeef));
must_pass(git_vector_insert(&x, (void *) 0xcafebabe));
must_pass(git_vector_insert(&x, (void *) 0xcafebabe));
must_pass(git_vector_insert(&x, (void *) 0xdeadbeef));
must_pass(git_vector_insert(&x, (void *) 0xcafebabe));
must_be_true(x.length == 5);
git_vector_uniq(&x);
must_be_true(x.length == 2);
git_vector_free(&x);
END_TEST
BEGIN_TEST(path0, "get the dirname of a path") BEGIN_TEST(path0, "get the dirname of a path")
char dir[64], *dir2; char dir[64], *dir2;
...@@ -531,6 +553,7 @@ BEGIN_SUITE(core) ...@@ -531,6 +553,7 @@ BEGIN_SUITE(core)
ADD_TEST(vector0); ADD_TEST(vector0);
ADD_TEST(vector1); ADD_TEST(vector1);
ADD_TEST(vector2);
ADD_TEST(path0); ADD_TEST(path0);
ADD_TEST(path1); ADD_TEST(path1);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment