Commit 9fd4c9c8 by Edward Thomson

Merge pull request #3366 from libgit2/cmn/index-hashmap

Use a hashmap for path-based lookups in the index
parents 1cef6b9f 81b76367
/*
* Copyright (C) the libgit2 contributors. All rights reserved.
*
* This file is part of libgit2, distributed under the GNU GPL v2 with
* a Linking Exception. For full terms see the included COPYING file.
*/
#ifndef INCLUDE_idxmap_h__
#define INCLUDE_idxmap_h__
#include <ctype.h>
#include "common.h"
#include "git2/index.h"
#define kmalloc git__malloc
#define kcalloc git__calloc
#define krealloc git__realloc
#define kreallocarray git__reallocarray
#define kfree git__free
#include "khash.h"
__KHASH_TYPE(idx, const git_index_entry *, git_index_entry *)
__KHASH_TYPE(idxicase, const git_index_entry *, git_index_entry *)
typedef khash_t(idx) git_idxmap;
typedef khash_t(idxicase) git_idxmap_icase;
typedef khiter_t git_idxmap_iter;
/* This is __ac_X31_hash_string but with tolower and it takes the entry's stage into account */
static kh_inline khint_t idxentry_hash(const git_index_entry *e)
{
const char *s = e->path;
khint_t h = (khint_t)git__tolower(*s);
if (h) for (++s ; *s; ++s) h = (h << 5) - h + (khint_t)git__tolower(*s);
return h + GIT_IDXENTRY_STAGE(e);
}
#define idxentry_equal(a, b) (GIT_IDXENTRY_STAGE(a) == GIT_IDXENTRY_STAGE(b) && strcmp(a->path, b->path) == 0)
#define idxentry_icase_equal(a, b) (GIT_IDXENTRY_STAGE(a) == GIT_IDXENTRY_STAGE(b) && strcasecmp(a->path, b->path) == 0)
#define GIT__USE_IDXMAP \
__KHASH_IMPL(idx, static kh_inline, const git_index_entry *, git_index_entry *, 1, idxentry_hash, idxentry_equal)
#define GIT__USE_IDXMAP_ICASE \
__KHASH_IMPL(idxicase, static kh_inline, const git_index_entry *, git_index_entry *, 1, idxentry_hash, idxentry_icase_equal)
#define git_idxmap_alloc(hp) \
((*(hp) = kh_init(idx)) == NULL) ? giterr_set_oom(), -1 : 0
#define git_idxmap_icase_alloc(hp) \
((*(hp) = kh_init(idxicase)) == NULL) ? giterr_set_oom(), -1 : 0
#define git_idxmap_insert(h, key, val, rval) do { \
khiter_t __pos = kh_put(idx, h, key, &rval); \
if (rval >= 0) { \
if (rval == 0) kh_key(h, __pos) = key; \
kh_val(h, __pos) = val; \
} } while (0)
#define git_idxmap_icase_insert(h, key, val, rval) do { \
khiter_t __pos = kh_put(idxicase, h, key, &rval); \
if (rval >= 0) { \
if (rval == 0) kh_key(h, __pos) = key; \
kh_val(h, __pos) = val; \
} } while (0)
#define git_idxmap_lookup_index(h, k) kh_get(idx, h, k)
#define git_idxmap_icase_lookup_index(h, k) kh_get(idxicase, h, k)
#define git_idxmap_value_at(h, idx) kh_val(h, idx)
#define git_idxmap_valid_index(h, idx) (idx != kh_end(h))
#define git_idxmap_has_data(h, idx) kh_exist(h, idx)
#define git_idxmap_free(h) kh_destroy(idx, h), h = NULL
#define git_idxmap_clear(h) kh_clear(idx, h)
#define git_idxmap_delete_at(h, id) kh_del(idx, h, id)
#define git_idxmap_icase_delete_at(h, id) kh_del(idxicase, h, id)
#define git_idxmap_delete(h, key) do { \
khiter_t __pos = git_idxmap_lookup_index(h, key); \
if (git_idxmap_valid_index(h, __pos)) \
git_idxmap_delete_at(h, __pos); } while (0)
#define git_idxmap_icase_delete(h, key) do { \
khiter_t __pos = git_idxmap_icase_lookup_index(h, key); \
if (git_idxmap_valid_index(h, __pos)) \
git_idxmap_icase_delete_at(h, __pos); } while (0)
#define git_idxmap_begin kh_begin
#define git_idxmap_end kh_end
#endif
......@@ -17,6 +17,7 @@
#include "pathspec.h"
#include "ignore.h"
#include "blob.h"
#include "idxmap.h"
#include "git2/odb.h"
#include "git2/oid.h"
......@@ -24,6 +25,32 @@
#include "git2/config.h"
#include "git2/sys/index.h"
GIT__USE_IDXMAP
GIT__USE_IDXMAP_ICASE
#define INSERT_IN_MAP_EX(idx, map, e, err) do { \
if ((idx)->ignore_case) \
git_idxmap_icase_insert((khash_t(idxicase) *) (map), (e), (e), (err)); \
else \
git_idxmap_insert((map), (e), (e), (err)); \
} while (0)
#define INSERT_IN_MAP(idx, e, err) INSERT_IN_MAP_EX(idx, (idx)->entries_map, e, err)
#define LOOKUP_IN_MAP(p, idx, k) do { \
if ((idx)->ignore_case) \
(p) = git_idxmap_icase_lookup_index((khash_t(idxicase) *) index->entries_map, (k)); \
else \
(p) = git_idxmap_lookup_index(index->entries_map, (k)); \
} while (0)
#define DELETE_IN_MAP(idx, e) do { \
if ((idx)->ignore_case) \
git_idxmap_icase_delete((khash_t(idxicase) *) (idx)->entries_map, (e)); \
else \
git_idxmap_delete((idx)->entries_map, (e)); \
} while (0)
static int index_apply_to_wd_diff(git_index *index, int action, const git_strarray *paths,
unsigned int flags,
git_index_matched_path_cb cb, void *payload);
......@@ -425,6 +452,7 @@ int git_index_open(git_index **index_out, const char *index_path)
}
if (git_vector_init(&index->entries, 32, git_index_entry_cmp) < 0 ||
git_idxmap_alloc(&index->entries_map) < 0 ||
git_vector_init(&index->names, 8, conflict_name_cmp) < 0 ||
git_vector_init(&index->reuc, 8, reuc_cmp) < 0 ||
git_vector_init(&index->deleted, 8, git_index_entry_cmp) < 0)
......@@ -462,6 +490,7 @@ static void index_free(git_index *index)
assert(!git_atomic_get(&index->readers));
git_index_clear(index);
git_idxmap_free(index->entries_map);
git_vector_free(&index->entries);
git_vector_free(&index->names);
git_vector_free(&index->reuc);
......@@ -508,6 +537,7 @@ static int index_remove_entry(git_index *index, size_t pos)
if (entry != NULL)
git_tree_cache_invalidate_path(index->tree, entry->path);
DELETE_IN_MAP(index, entry);
error = git_vector_remove(&index->entries, pos);
if (!error) {
......@@ -535,6 +565,7 @@ int git_index_clear(git_index *index)
return -1;
}
git_idxmap_clear(index->entries_map);
while (!error && index->entries.length > 0)
error = index_remove_entry(index, index->entries.length - 1);
index_free_deleted(index);
......@@ -805,16 +836,21 @@ const git_index_entry *git_index_get_byindex(
const git_index_entry *git_index_get_bypath(
git_index *index, const char *path, int stage)
{
size_t pos;
khiter_t pos;
git_index_entry key = {{ 0 }};
assert(index);
if (index_find(&pos, index, path, 0, stage, true) < 0) {
key.path = path;
GIT_IDXENTRY_STAGE_SET(&key, stage);
LOOKUP_IN_MAP(pos, index, &key);
if (git_idxmap_valid_index(index->entries_map, pos))
return git_idxmap_value_at(index->entries_map, pos);
giterr_set(GITERR_INDEX, "Index does not contain %s", path);
return NULL;
}
return git_index_get_byindex(index, pos);
}
void git_index_entry__init_from_stat(
......@@ -1140,6 +1176,10 @@ static int index_insert(
* check for dups, this is actually cheaper in the long run.)
*/
error = git_vector_insert_sorted(&index->entries, entry, index_no_dups);
if (error == 0) {
INSERT_IN_MAP(index, entry, error);
}
}
if (error < 0) {
......@@ -1365,12 +1405,18 @@ int git_index_remove(git_index *index, const char *path, int stage)
{
int error;
size_t position;
git_index_entry remove_key = {{ 0 }};
if (git_mutex_lock(&index->lock) < 0) {
giterr_set(GITERR_OS, "Failed to lock index");
return -1;
}
remove_key.path = path;
GIT_IDXENTRY_STAGE_SET(&remove_key, stage);
DELETE_IN_MAP(index, &remove_key);
if (index_find(&position, index, path, 0, stage, false) < 0) {
giterr_set(
GITERR_INDEX, "Index does not contain %s at stage %d", path, stage);
......@@ -2181,6 +2227,11 @@ static int parse_index(git_index *index, const char *buffer, size_t buffer_size)
assert(!index->entries.length);
if (index->ignore_case)
kh_resize(idxicase, (khash_t(idxicase) *) index->entries_map, header.entry_count);
else
kh_resize(idx, index->entries_map, header.entry_count);
/* Parse all the entries */
for (i = 0; i < header.entry_count && buffer_size > INDEX_FOOTER_SIZE; ++i) {
git_index_entry *entry;
......@@ -2197,6 +2248,13 @@ static int parse_index(git_index *index, const char *buffer, size_t buffer_size)
goto done;
}
INSERT_IN_MAP(index, entry, error);
if (error < 0) {
index_entry_free(entry);
goto done;
}
seek_forward(entry_size);
}
......@@ -2611,7 +2669,13 @@ int git_index_read_tree(git_index *index, const git_tree *tree)
{
int error = 0;
git_vector entries = GIT_VECTOR_INIT;
git_idxmap *entries_map;
read_tree_data data;
size_t i;
git_index_entry *e;
if (git_idxmap_alloc(&entries_map) < 0)
return -1;
git_vector_set_cmp(&entries, index->entries._cmp); /* match sort */
......@@ -2626,9 +2690,25 @@ int git_index_read_tree(git_index *index, const git_tree *tree)
if (index_sort_if_needed(index, true) < 0)
return -1;
error = git_tree_walk(tree, GIT_TREEWALK_POST, read_tree_cb, &data);
if ((error = git_tree_walk(tree, GIT_TREEWALK_POST, read_tree_cb, &data)) < 0)
goto cleanup;
if (index->ignore_case)
kh_resize(idxicase, (khash_t(idxicase) *) entries_map, entries.length);
else
kh_resize(idx, entries_map, entries.length);
git_vector_foreach(&entries, i, e) {
INSERT_IN_MAP_EX(index, entries_map, e, error);
if (error < 0) {
giterr_set(GITERR_INDEX, "failed to insert entry into map");
return error;
}
}
error = 0;
if (!error) {
git_vector_sort(&entries);
if ((error = git_index_clear(index)) < 0)
......@@ -2638,11 +2718,13 @@ int git_index_read_tree(git_index *index, const git_tree *tree)
error = -1;
} else {
git_vector_swap(&entries, &index->entries);
entries_map = git__swap(index->entries_map, entries_map);
git_mutex_unlock(&index->lock);
}
}
cleanup:
git_vector_free(&entries);
git_idxmap_free(entries_map);
if (error < 0)
return error;
......
......@@ -10,6 +10,7 @@
#include "fileops.h"
#include "filebuf.h"
#include "vector.h"
#include "idxmap.h"
#include "tree-cache.h"
#include "git2/odb.h"
#include "git2/index.h"
......@@ -25,6 +26,7 @@ struct git_index {
git_oid checksum; /* checksum at the end of the file */
git_vector entries;
git_idxmap *entries_map;
git_mutex lock; /* lock held while entries is being changed */
git_vector deleted; /* deleted entries if readers > 0 */
......
......@@ -239,6 +239,7 @@ void test_index_filemodes__invalid(void)
cl_git_pass(git_repository_index(&index, g_repo));
GIT_IDXENTRY_STAGE_SET(&entry, 0);
entry.path = "foo";
entry.mode = GIT_OBJ_BLOB;
cl_git_fail(git_index_add(index, &entry));
......
......@@ -792,10 +792,43 @@ void test_index_tests__reload_while_ignoring_case(void)
cl_git_pass(git_index_set_caps(index, caps &= ~GIT_INDEXCAP_IGNORE_CASE));
cl_git_pass(git_index_read(index, true));
cl_git_pass(git_vector_verify_sorted(&index->entries));
cl_assert(git_index_get_bypath(index, ".HEADER", 0));
cl_assert_equal_p(NULL, git_index_get_bypath(index, ".header", 0));
cl_git_pass(git_index_set_caps(index, caps | GIT_INDEXCAP_IGNORE_CASE));
cl_git_pass(git_index_read(index, true));
cl_git_pass(git_vector_verify_sorted(&index->entries));
cl_assert(git_index_get_bypath(index, ".HEADER", 0));
cl_assert(git_index_get_bypath(index, ".header", 0));
git_index_free(index);
}
void test_index_tests__change_icase_on_instance(void)
{
git_index *index;
unsigned int caps;
const git_index_entry *e;
cl_git_pass(git_index_open(&index, TEST_INDEX_PATH));
cl_git_pass(git_vector_verify_sorted(&index->entries));
caps = git_index_caps(index);
cl_git_pass(git_index_set_caps(index, caps &= ~GIT_INDEXCAP_IGNORE_CASE));
cl_assert_equal_i(false, index->ignore_case);
cl_git_pass(git_vector_verify_sorted(&index->entries));
cl_assert(e = git_index_get_bypath(index, "src/common.h", 0));
cl_assert_equal_p(NULL, e = git_index_get_bypath(index, "SRC/Common.h", 0));
cl_assert(e = git_index_get_bypath(index, "COPYING", 0));
cl_assert_equal_p(NULL, e = git_index_get_bypath(index, "copying", 0));
cl_git_pass(git_index_set_caps(index, caps | GIT_INDEXCAP_IGNORE_CASE));
cl_assert_equal_i(true, index->ignore_case);
cl_git_pass(git_vector_verify_sorted(&index->entries));
cl_assert(e = git_index_get_bypath(index, "COPYING", 0));
cl_assert_equal_s("COPYING", e->path);
cl_assert(e = git_index_get_bypath(index, "copying", 0));
cl_assert_equal_s("COPYING", e->path);
git_index_free(index);
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment