Commit ad7a51d8 by Colin Stolley

refs: Speed up packed lookups.

Currently ref lookups require loading the entire packed-refs file into
a hashmap in memory. For repos with large numbers of refs this can be
painfully slow.

This patch replaces the existing lookup code and instead mmap()'s the
packed-refs file and performs a binary search to locate the ref entry.
Git uses a similiar approach.

The old hash table codepath is still used for unsorted packed-refs files.

This patch also fixes a minor bug where the "peeled" trait is never
parsed correctly from the packed-refs header.
parent 12b53eb0
...@@ -1963,3 +1963,8 @@ v0.22 ...@@ -1963,3 +1963,8 @@ v0.22
functions. This is not something which we can know to do. A functions. This is not something which we can know to do. A
last-resort convenience function is provided in sys/openssl.h, last-resort convenience function is provided in sys/openssl.h,
`git_openssl_set_locking()` which can be used to set the locking. `git_openssl_set_locking()` which can be used to set the locking.
* `git_reference_*()` functions use mmap() + binary search for packed
refs lookups when using the fs backend. Previously all entries were
read into a hashtable, which could be slow for repositories with a
large number of refs.
...@@ -65,9 +65,13 @@ typedef struct refdb_fs_backend { ...@@ -65,9 +65,13 @@ typedef struct refdb_fs_backend {
git_iterator_flag_t iterator_flags; git_iterator_flag_t iterator_flags;
uint32_t direach_flags; uint32_t direach_flags;
int fsync; int fsync;
git_map packed_refs_map;
git_mutex prlock; /* protect packed_refs_map */
bool sorted;
} refdb_fs_backend; } refdb_fs_backend;
static int refdb_reflog_fs__delete(git_refdb_backend *_backend, const char *name); static int refdb_reflog_fs__delete(git_refdb_backend *_backend, const char *name);
static const char * packed_set_peeling_mode(const char *data, size_t data_sz, refdb_fs_backend *backend);
GIT_INLINE(int) loose_path( GIT_INLINE(int) loose_path(
git_str *out, git_str *out,
...@@ -137,27 +141,9 @@ static int packed_reload(refdb_fs_backend *backend) ...@@ -137,27 +141,9 @@ static int packed_reload(refdb_fs_backend *backend)
scan = (char *)packedrefs.ptr; scan = (char *)packedrefs.ptr;
eof = scan + packedrefs.size; eof = scan + packedrefs.size;
backend->peeling_mode = PEELING_NONE; scan = (char *)packed_set_peeling_mode(scan, packedrefs.size, backend);
if (!scan) {
if (*scan == '#') {
static const char *traits_header = "# pack-refs with: ";
if (git__prefixcmp(scan, traits_header) == 0) {
scan += strlen(traits_header);
eol = strchr(scan, '\n');
if (!eol)
goto parse_failed; goto parse_failed;
*eol = '\0';
if (strstr(scan, " fully-peeled ") != NULL) {
backend->peeling_mode = PEELING_FULL;
} else if (strstr(scan, " peeled ") != NULL) {
backend->peeling_mode = PEELING_STANDARD;
}
scan = eol + 1;
}
} }
while (scan < eof && *scan == '#') { while (scan < eof && *scan == '#') {
...@@ -466,7 +452,173 @@ static int ref_error_notfound(const char *name) ...@@ -466,7 +452,173 @@ static int ref_error_notfound(const char *name)
return GIT_ENOTFOUND; return GIT_ENOTFOUND;
} }
static int packed_lookup( static const char *packed_set_peeling_mode(
const char *data,
size_t data_sz,
refdb_fs_backend *backend)
{
static const char *traits_header = "# pack-refs with:";
const char *eol;
backend->peeling_mode = PEELING_NONE;
if (data_sz == 0 || *data != '#') {
return data;
}
if (git__prefixncmp(data, data_sz, traits_header) == 0) {
size_t hdr_sz = strlen(traits_header);
const char *sorted = " sorted ";
const char *peeled = " peeled ";
const char *fully_peeled = " fully-peeled ";
data += hdr_sz;
data_sz -= hdr_sz;
eol = memchr(data, '\n', data_sz);
if (!eol)
return NULL;
if (git__memmem(data, eol - data, fully_peeled, strlen(fully_peeled))) {
backend->peeling_mode = PEELING_FULL;
} else if (git__memmem(data, eol - data, peeled, strlen(peeled))) {
backend->peeling_mode = PEELING_STANDARD;
}
if (git__memmem(data, eol - data, sorted, strlen(sorted))) {
backend->sorted = true;
} else {
backend->sorted = false;
}
return eol + 1;
}
return data;
}
static int packed_map_check(refdb_fs_backend *backend)
{
int error = 0;
git_file fd = -1;
struct stat st;
if ((error = git_mutex_lock(&backend->prlock)) < 0) {
return error;
}
if (backend->packed_refs_map.data) {
git_mutex_unlock(&backend->prlock);
return error;
}
fd = git_futils_open_ro(backend->refcache->path);
if (fd < 0) {
git_mutex_unlock(&backend->prlock);
if (fd == GIT_ENOTFOUND) {
git_error_clear();
return 0;
}
return fd;
}
if (p_fstat(fd, &st) < 0) {
p_close(fd);
git_mutex_unlock(&backend->prlock);
git_error_set(GIT_ERROR_OS, "unable to stat packed-refs '%s'", backend->refcache->path);
return -1;
}
if (st.st_size == 0) {
p_close(fd);
git_mutex_unlock(&backend->prlock);
return 0;
}
error = git_futils_mmap_ro(&backend->packed_refs_map, fd, 0, (size_t)st.st_size);
p_close(fd);
if (error < 0) {
git_mutex_unlock(&backend->prlock);
return error;
}
packed_set_peeling_mode(
backend->packed_refs_map.data, backend->packed_refs_map.len,
backend);
git_mutex_unlock(&backend->prlock);
return error;
}
/*
* Find beginning of packed-ref record pointed to by p.
* buf - a lower-bound pointer to some memory buffer
* p - an upper-bound pointer to the same memory buffer
*/
static const char *start_of_record(const char *buf, const char *p)
{
const char *nl = p;
while (1) {
nl = git__memrchr(buf, '\n', nl - buf);
if (!nl)
return buf;
if (nl[1] == '^' && nl > buf)
--nl;
else
break;
};
return nl + 1;
}
/*
* Find end of packed-ref record pointed to by p.
* end - an upper-bound pointer to some memory buffer
* p - a lower-bound pointer to the same memory buffer
*/
static const char *end_of_record(const char *p, const char *end)
{
while (1) {
size_t sz = end - p;
p = memchr(p, '\n', sz);
if (!p) {
return end;
}
++p;
if (p < end && p[0] == '^')
++p;
else
break;
}
return p;
}
static int
cmp_record_to_refname(const char *rec, size_t data_end, const char *ref_name)
{
const size_t ref_len = strlen(ref_name);
int cmp_val;
const char *end;
rec += GIT_OID_HEXSZ + 1; /* <oid> + space */
if (data_end < GIT_OID_HEXSZ + 3) {
/* an incomplete (corrupt) record is treated as less than ref_name */
return -1;
}
data_end -= GIT_OID_HEXSZ + 1;
end = memchr(rec, '\n', data_end);
if (end) {
data_end = end - rec;
}
cmp_val = memcmp(rec, ref_name, min(ref_len, data_end));
if (cmp_val == 0 && data_end != ref_len) {
return (data_end > ref_len) ? 1 : -1;
}
return cmp_val;
}
static int packed_unsorted_lookup(
git_reference **out, git_reference **out,
refdb_fs_backend *backend, refdb_fs_backend *backend,
const char *ref_name) const char *ref_name)
...@@ -494,6 +646,86 @@ static int packed_lookup( ...@@ -494,6 +646,86 @@ static int packed_lookup(
return error; return error;
} }
static int packed_lookup(
git_reference **out,
refdb_fs_backend *backend,
const char *ref_name)
{
int error = 0;
const char *left, *right, *data_end;
if ((error = packed_map_check(backend)) < 0)
return error;
if (!backend->sorted) {
return packed_unsorted_lookup(out, backend, ref_name);
}
left = backend->packed_refs_map.data;
right = data_end = ((const char *)backend->packed_refs_map.data) +
backend->packed_refs_map.len;
while (left < right && *left == '#') {
if (!(left = memchr(left, '\n', data_end - left)))
goto parse_failed;
left++;
}
while (left < right) {
const char *mid, *rec;
int compare;
mid = left + (right - left) / 2;
rec = start_of_record(left, mid);
compare = cmp_record_to_refname(rec, data_end - rec, ref_name);
if (compare < 0) {
left = end_of_record(mid, right);
} else if (compare > 0) {
right = rec;
} else {
const char *eol;
git_oid oid, peel, *peel_ptr = NULL;
if (data_end - rec < GIT_OID_HEXSZ ||
git_oid_fromstr(&oid, rec) < 0) {
goto parse_failed;
}
rec += GIT_OID_HEXSZ + 1;
if (!(eol = memchr(rec, '\n', data_end - rec))) {
goto parse_failed;
}
/* look for optional "^<OID>\n" */
if (eol + 1 < data_end) {
rec = eol + 1;
if (*rec == '^') {
rec++;
if (data_end - rec < GIT_OID_HEXSZ ||
git_oid_fromstr(&peel, rec) < 0) {
goto parse_failed;
}
peel_ptr = &peel;
}
}
*out = git_reference__alloc(ref_name, &oid, peel_ptr);
if (!*out) {
return -1;
}
return 0;
}
}
return GIT_ENOTFOUND;
parse_failed:
git_error_set(GIT_ERROR_REFERENCE, "corrupted packed references file");
return -1;
}
static int refdb_fs_backend__lookup( static int refdb_fs_backend__lookup(
git_reference **out, git_reference **out,
git_refdb_backend *_backend, git_refdb_backend *_backend,
...@@ -513,7 +745,6 @@ static int refdb_fs_backend__lookup( ...@@ -513,7 +745,6 @@ static int refdb_fs_backend__lookup(
git_error_clear(); git_error_clear();
error = packed_lookup(out, backend, ref_name); error = packed_lookup(out, backend, ref_name);
} }
return error; return error;
} }
...@@ -1081,6 +1312,18 @@ static int packed_write(refdb_fs_backend *backend) ...@@ -1081,6 +1312,18 @@ static int packed_write(refdb_fs_backend *backend)
int error, open_flags = 0; int error, open_flags = 0;
size_t i; size_t i;
/* take lock and close up packed-refs mmap if open */
if ((error = git_mutex_lock(&backend->prlock)) < 0) {
return error;
}
if (backend->packed_refs_map.data) {
git_futils_mmap_free(&backend->packed_refs_map);
backend->packed_refs_map.data = NULL;
}
git_mutex_unlock(&backend->prlock);
/* lock the cache to updates while we do this */ /* lock the cache to updates while we do this */
if ((error = git_sortedcache_wlock(refcache)) < 0) if ((error = git_sortedcache_wlock(refcache)) < 0)
return error; return error;
...@@ -1568,6 +1811,15 @@ static void refdb_fs_backend__free(git_refdb_backend *_backend) ...@@ -1568,6 +1811,15 @@ static void refdb_fs_backend__free(git_refdb_backend *_backend)
return; return;
git_sortedcache_free(backend->refcache); git_sortedcache_free(backend->refcache);
git_mutex_lock(&backend->prlock);
if (backend->packed_refs_map.data) {
git_futils_mmap_free(&backend->packed_refs_map);
backend->packed_refs_map.data = NULL;
}
git_mutex_unlock(&backend->prlock);
git_mutex_free(&backend->prlock);
git__free(backend->gitpath); git__free(backend->gitpath);
git__free(backend->commonpath); git__free(backend->commonpath);
git__free(backend); git__free(backend);
...@@ -2121,6 +2373,11 @@ int git_refdb_backend_fs( ...@@ -2121,6 +2373,11 @@ int git_refdb_backend_fs(
backend = git__calloc(1, sizeof(refdb_fs_backend)); backend = git__calloc(1, sizeof(refdb_fs_backend));
GIT_ERROR_CHECK_ALLOC(backend); GIT_ERROR_CHECK_ALLOC(backend);
if (git_mutex_init(&backend->prlock) < 0) {
git__free(backend);
return -1;
}
if (git_refdb_init_backend(&backend->parent, GIT_REFDB_BACKEND_VERSION) < 0) if (git_refdb_init_backend(&backend->parent, GIT_REFDB_BACKEND_VERSION) < 0)
goto fail; goto fail;
...@@ -2183,6 +2440,7 @@ int git_refdb_backend_fs( ...@@ -2183,6 +2440,7 @@ int git_refdb_backend_fs(
return 0; return 0;
fail: fail:
git_mutex_free(&backend->prlock);
git_str_dispose(&gitpath); git_str_dispose(&gitpath);
git__free(backend->gitpath); git__free(backend->gitpath);
git__free(backend->commonpath); git__free(backend->commonpath);
......
#include "clar_libgit2.h" #include "clar_libgit2.h"
#include "refs.h"
void test_refs_crashes__double_free(void) void test_refs_crashes__double_free(void)
{ {
...@@ -18,3 +19,26 @@ void test_refs_crashes__double_free(void) ...@@ -18,3 +19,26 @@ void test_refs_crashes__double_free(void)
cl_git_sandbox_cleanup(); cl_git_sandbox_cleanup();
} }
void test_refs_crashes__empty_packedrefs(void)
{
git_repository *repo;
git_reference *ref;
const char *REFNAME = "refs/heads/xxx";
git_str temp_path = GIT_STR_INIT;
int fd = 0;
repo = cl_git_sandbox_init("empty_bare.git");
/* create zero-length packed-refs file */
cl_git_pass(git_str_joinpath(&temp_path, git_repository_path(repo), GIT_PACKEDREFS_FILE));
cl_git_pass(((fd = p_creat(temp_path.ptr, 0644)) < 0));
cl_git_pass(p_close(fd));
/* should fail gracefully */
cl_git_fail_with(
GIT_ENOTFOUND, git_reference_lookup(&ref, repo, REFNAME));
cl_git_sandbox_cleanup();
git_str_dispose(&temp_path);
}
# pack-refs with: peeled fully-peeled # pack-refs with: peeled fully-peeled sorted
c2596aa0151888587ec5c0187f261e63412d9e11 refs/foo/tag-outside-tags c2596aa0151888587ec5c0187f261e63412d9e11 refs/foo/tag-outside-tags
^0df1a5865c8abfc09f1f2182e6a31be550e99f07 ^0df1a5865c8abfc09f1f2182e6a31be550e99f07
0df1a5865c8abfc09f1f2182e6a31be550e99f07 refs/heads/master 0df1a5865c8abfc09f1f2182e6a31be550e99f07 refs/heads/master
......
# pack-refs with: peeled # pack-refs with: peeled sorted
41bc8c69075bbdb46c5c6f0566cc8cc5b46e8bd9 refs/heads/packed 41bc8c69075bbdb46c5c6f0566cc8cc5b46e8bd9 refs/heads/packed
5b5b025afb0b4c913b4c338a42934a3863bf3644 refs/heads/packed-test 5b5b025afb0b4c913b4c338a42934a3863bf3644 refs/heads/packed-test
# pack-refs with: peeled # pack-refs with: peeled sorted
41bc8c69075bbdb46c5c6f0566cc8cc5b46e8bd9 refs/heads/packed 41bc8c69075bbdb46c5c6f0566cc8cc5b46e8bd9 refs/heads/packed
5b5b025afb0b4c913b4c338a42934a3863bf3644 refs/heads/packed-test 5b5b025afb0b4c913b4c338a42934a3863bf3644 refs/heads/packed-test
b25fa35b38051e4ae45d4222e795f9df2e43f1d1 refs/tags/packed-tag b25fa35b38051e4ae45d4222e795f9df2e43f1d1 refs/tags/packed-tag
# pack-refs with: peeled fully-peeled # pack-refs with: peeled fully-peeled sorted
36060c58702ed4c2a40832c51758d5344201d89a refs/remotes/origin/master 36060c58702ed4c2a40832c51758d5344201d89a refs/remotes/origin/master
41bc8c69075bbdb46c5c6f0566cc8cc5b46e8bd9 refs/remotes/origin/packed 41bc8c69075bbdb46c5c6f0566cc8cc5b46e8bd9 refs/remotes/origin/packed
5b5b025afb0b4c913b4c338a42934a3863bf3644 refs/tags/v0.9 5b5b025afb0b4c913b4c338a42934a3863bf3644 refs/tags/v0.9
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment