Commit 0e040c03 by Carlos Martín Nieto

indexer: use a hashtable for keeping track of offsets

These offsets are needed for REF_DELTA objects, which encode which
object they use as a base, but not where it lies in the packfile, so
we need a list.

These objects are mostly from older packfiles, before OFS_DELTA was
widely spread. The time spent in indexing these packfiles is greatly
reduced, though remains above what git is able to do.
parent 29ab8774
......@@ -17,6 +17,7 @@
#include "posix.h"
#include "pack.h"
#include "filebuf.h"
#include "oidmap.h"
#define UINT31_MAX (0x7FFFFFFF)
......@@ -122,14 +123,6 @@ static int objects_cmp(const void *a, const void *b)
return git_oid_cmp(&entrya->oid, &entryb->oid);
}
static int cache_cmp(const void *a, const void *b)
{
const struct git_pack_entry *ea = a;
const struct git_pack_entry *eb = b;
return git_oid_cmp(&ea->sha1, &eb->sha1);
}
int git_indexer_stream_new(
git_indexer_stream **out,
const char *prefix,
......@@ -271,7 +264,8 @@ static int crc_object(uint32_t *crc_out, git_mwindow_file *mwf, git_off_t start,
static int store_object(git_indexer_stream *idx)
{
int i;
int i, error;
khiter_t k;
git_oid oid;
struct entry *entry;
git_off_t entry_size;
......@@ -296,11 +290,15 @@ static int store_object(git_indexer_stream *idx)
git_oid_cpy(&pentry->sha1, &oid);
pentry->offset = entry_start;
if (git_vector_insert(&idx->pack->cache, pentry) < 0) {
k = kh_put(oid, idx->pack->idx_cache, &pentry->sha1, &error);
if (!error) {
git__free(pentry);
goto on_error;
}
kh_value(idx->pack->idx_cache, k) = pentry;
git_oid_cpy(&entry->oid, &oid);
if (crc_object(&entry->crc, &idx->pack->mwf, entry_start, entry_size) < 0)
......@@ -324,7 +322,8 @@ on_error:
static int hash_and_save(git_indexer_stream *idx, git_rawobj *obj, git_off_t entry_start)
{
int i;
int i, error;
khiter_t k;
git_oid oid;
size_t entry_size;
struct entry *entry;
......@@ -351,11 +350,14 @@ static int hash_and_save(git_indexer_stream *idx, git_rawobj *obj, git_off_t ent
git_oid_cpy(&pentry->sha1, &oid);
pentry->offset = entry_start;
if (git_vector_insert(&idx->pack->cache, pentry) < 0) {
k = kh_put(oid, idx->pack->idx_cache, &pentry->sha1, &error);
if (!error) {
git__free(pentry);
goto on_error;
}
kh_value(idx->pack->idx_cache, k) = pentry;
git_oid_cpy(&entry->oid, &oid);
entry->crc = crc32(0L, Z_NULL, 0);
......@@ -426,8 +428,8 @@ int git_indexer_stream_add(git_indexer_stream *idx, const void *data, size_t siz
/* for now, limit to 2^32 objects */
assert(idx->nr_objects == (size_t)((unsigned int)idx->nr_objects));
if (git_vector_init(&idx->pack->cache, (unsigned int)idx->nr_objects, cache_cmp) < 0)
return -1;
idx->pack->idx_cache = git_oidmap_alloc();
GITERR_CHECK_ALLOC(idx->pack->idx_cache);
idx->pack->has_cache = 1;
if (git_vector_init(&idx->objects, (unsigned int)idx->nr_objects, objects_cmp) < 0)
......@@ -718,9 +720,9 @@ on_error:
void git_indexer_stream_free(git_indexer_stream *idx)
{
khiter_t k;
unsigned int i;
struct entry *e;
struct git_pack_entry *pe;
struct delta_info *delta;
if (idx == NULL)
......@@ -729,11 +731,16 @@ void git_indexer_stream_free(git_indexer_stream *idx)
git_vector_foreach(&idx->objects, i, e)
git__free(e);
git_vector_free(&idx->objects);
if (idx->pack) {
git_vector_foreach(&idx->pack->cache, i, pe)
git__free(pe);
git_vector_free(&idx->pack->cache);
for (k = kh_begin(idx->pack->idx_cache); k != kh_end(idx->pack->idx_cache); k++) {
if (kh_exist(idx->pack->idx_cache, k))
git__free(kh_value(idx->pack->idx_cache, k));
}
git_oidmap_free(idx->pack->idx_cache);
}
git_vector_foreach(&idx->deltas, i, delta)
git__free(delta);
git_vector_free(&idx->deltas);
......
......@@ -21,8 +21,6 @@
#include "git2/indexer.h"
#include "git2/config.h"
GIT__USE_OIDMAP;
struct unpacked {
git_pobject *object;
void *data;
......
......@@ -760,13 +760,14 @@ git_off_t get_delta_base(
} else if (type == GIT_OBJ_REF_DELTA) {
/* If we have the cooperative cache, search in it first */
if (p->has_cache) {
size_t pos;
struct git_pack_entry key;
khiter_t k;
git_oid oid;
git_oid_fromraw(&key.sha1, base_info);
if (!git_vector_bsearch(&pos, &p->cache, &key)) {
git_oid_fromraw(&oid, base_info);
k = kh_get(oid, p->idx_cache, &oid);
if (k != kh_end(p->idx_cache)) {
*curpos += 20;
return ((struct git_pack_entry *)git_vector_get(&p->cache, pos))->offset;
return ((struct git_pack_entry *)kh_value(p->idx_cache, k))->offset;
}
}
/* The base entry _must_ be in the same pack */
......
......@@ -16,6 +16,7 @@
#include "map.h"
#include "mwindow.h"
#include "odb.h"
#include "oidmap.h"
#define GIT_PACK_FILE_MODE 0444
......@@ -62,6 +63,7 @@ typedef struct git_pack_cache_entry {
#include "offmap.h"
GIT__USE_OFFMAP;
GIT__USE_OIDMAP;
#define GIT_PACK_CACHE_MEMORY_LIMIT 16 * 1024 * 1024
#define GIT_PACK_CACHE_SIZE_LIMIT 1024 * 1024 /* don't bother caching anything over 1MB */
......@@ -86,7 +88,7 @@ struct git_pack_file {
git_time_t mtime;
unsigned pack_local:1, pack_keep:1, has_cache:1;
git_oid sha1;
git_vector cache;
git_oidmap *idx_cache;
git_oid **oids;
git_pack_cache bases; /* delta base cache */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment