Commit a332e91c by Carlos Martín Nieto

pack: use a cache for delta bases when unpacking

Bring back the use of the delta base cache for unpacking objects. When
generating the delta chain, we stop when we find a delta base in the
pack's cache and use that as the starting point.
parent 2acdf4b8
......@@ -42,8 +42,9 @@ static int pack_entry_find_offset(
/**
* Generate the chain of dependencies which we need to get to the
* object at `off`. As we use a stack, the latest is the base object,
* the rest are deltas.
* object at `off`. `chain` is used a stack, popping gives the right
* order to apply deltas on. If an object is found in the pack's base
* cache, we stop calculating there.
*/
static int pack_dependency_chain(git_dependency_chain *chain, struct git_pack_file *p, git_off_t off);
......@@ -521,67 +522,6 @@ int git_packfile_resolve_header(
return error;
}
static int packfile_unpack_delta(
git_rawobj *obj,
struct git_pack_file *p,
git_mwindow **w_curs,
git_off_t *curpos,
size_t delta_size,
git_otype delta_type,
git_off_t obj_offset)
{
git_off_t base_offset, base_key;
git_rawobj base, delta;
git_pack_cache_entry *cached = NULL;
int error, found_base = 0;
base_offset = get_delta_base(p, w_curs, curpos, delta_type, obj_offset);
git_mwindow_close(w_curs);
if (base_offset == 0)
return packfile_error("delta offset is zero");
if (base_offset < 0) /* must actually be an error code */
return (int)base_offset;
if (!p->bases.entries && (cache_init(&p->bases) < 0))
return -1;
base_key = base_offset; /* git_packfile_unpack modifies base_offset */
if ((cached = cache_get(&p->bases, base_offset)) != NULL) {
memcpy(&base, &cached->raw, sizeof(git_rawobj));
found_base = 1;
}
if (!cached) { /* have to inflate it */
error = git_packfile_unpack(&base, p, &base_offset);
if (error < 0)
return error;
}
error = packfile_unpack_compressed(&delta, p, w_curs, curpos, delta_size, delta_type);
git_mwindow_close(w_curs);
if (error < 0) {
if (!found_base)
git__free(base.data);
return error;
}
obj->type = base.type;
error = git__delta_apply(obj, base.data, base.len, delta.data, delta.len);
if (error < 0)
goto on_error;
if (found_base)
git_atomic_dec(&cached->refcount);
else if (cache_add(&p->bases, &base, base_key) < 0)
git__free(base.data);
on_error:
git__free(delta.data);
return error; /* error set by git__delta_apply */
}
int git_packfile_unpack(
git_rawobj *obj,
struct git_pack_file *p,
......@@ -589,10 +529,10 @@ int git_packfile_unpack(
{
git_mwindow *w_curs = NULL;
git_off_t curpos = *obj_offset;
int error;
git_dependency_chain chain;
int error, free_base = 0;
git_dependency_chain chain = GIT_ARRAY_INIT;
struct pack_chain_elem *elem;
git_pack_cache_entry *cached = NULL;
git_otype base_type;
/*
......@@ -609,16 +549,38 @@ int git_packfile_unpack(
/* the first one is the base, so we expand that one */
elem = git_array_pop(chain);
curpos = elem->offset;
error = packfile_unpack_compressed(obj, p, &w_curs, &curpos, elem->size, elem->type);
git_mwindow_close(&w_curs);
if (elem->cached) {
cached = elem->cached_entry;
memcpy(obj, &cached->raw, sizeof(git_rawobj));
base_type = obj->type;
} else {
curpos = elem->offset;
error = packfile_unpack_compressed(obj, p, &w_curs, &curpos, elem->size, elem->type);
git_mwindow_close(&w_curs);
base_type = elem->type;
free_base = 1;
}
if (error < 0)
goto cleanup;
base_type = elem->type;
/*
* Finding the object we want as the base element is
* problematic, as we need to make sure we don't accidentally
* give the caller the cached object, which it would then feel
* free to free, so we need to copy the data.
*/
if (cached && git_array_size(chain) == 0) {
void *data = obj->data;
obj->data = git__malloc(obj->len + 1);
GITERR_CHECK_ALLOC(obj->data);
memcpy(obj->data, data, obj->len + 1);
git_atomic_dec(&cached->refcount);
goto cleanup;
}
/* we now apply each consecutive delta until we run out */
while (git_array_size(chain) > 0) {
while (git_array_size(chain) > 0 && !error) {
git_rawobj base, delta;
elem = git_array_pop(chain);
......@@ -636,16 +598,39 @@ int git_packfile_unpack(
obj->type = GIT_OBJ_BAD;
error = git__delta_apply(obj, base.data, base.len, delta.data, delta.len);
obj->type = base_type;
/*
* We usually don't want to free the base at this
* point, as we put it into the cache in the previous
* iteration. free_base lets us know that we got the
* base object directly from the packfile, so we can free it.
*/
git__free(delta.data);
git__free(base.data);
if (free_base) {
free_base = 0;
git__free(base.data);
}
if (cached) {
git_atomic_dec(&cached->refcount);
cached = NULL;
}
if (error < 0)
break;
obj->type = base_type;
/* only try to cache if we're not handing this buffer off to the caller */
if (git_array_size(chain) > 0 &&
(error = cache_add(&p->bases, obj, elem->base_key)) < 0)
goto cleanup;
}
cleanup:
if (error < 0)
git__free(obj->data);
*obj_offset = elem->offset;
git_array_clear(chain);
return error;
}
......@@ -1248,8 +1233,12 @@ static int pack_dependency_chain(git_dependency_chain *chain_out, struct git_pac
size_t size;
git_otype type;
if (!p->bases.entries && (cache_init(&p->bases) < 0))
return -1;
while (!found_base && error == 0) {
struct pack_chain_elem *elem;
git_pack_cache_entry *cached = NULL;
curpos = obj_offset;
elem = git_array_alloc(chain);
......@@ -1262,13 +1251,23 @@ static int pack_dependency_chain(git_dependency_chain *chain_out, struct git_pac
if (error < 0)
return error;
elem->cached = 0;
elem->offset = curpos;
elem->size = size;
elem->type = type;
elem->base_key = obj_offset;
switch (type) {
case GIT_OBJ_OFS_DELTA:
case GIT_OBJ_REF_DELTA:
/* if we have a base cached, we can stop here instead */
if ((cached = cache_get(&p->bases, obj_offset)) != NULL) {
elem->cached_entry = cached;
elem->cached = 1;
found_base = 1;
break;
}
base_offset = get_delta_base(p, &w_curs, &curpos, type, obj_offset);
git_mwindow_close(&w_curs);
......
......@@ -62,9 +62,14 @@ typedef struct git_pack_cache_entry {
} git_pack_cache_entry;
struct pack_chain_elem {
int cached;
git_off_t base_key;
/* if we don't have it cached we have this */
git_off_t offset;
size_t size;
git_otype type;
/* if cached, we have this instead */
git_pack_cache_entry *cached_entry;
};
typedef git_array_t(struct pack_chain_elem) git_dependency_chain;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment