Commit 2acdf4b8 by Carlos Martín Nieto

pack: unpack using a loop

We currently make use of recursive function calls to unpack an object,
resolving the deltas as we come back down the chain. This means that we
have unbounded stack growth as we look up objects in a pack.

This is now done in two steps: first we figure out what the dependency
chain is by looking up the delta bases until we reach a non-delta
object, pushing the information we need onto a stack and then we pop
from that stack and apply the deltas until there are no more left.

This version of the code does not make use of the delta base cache so it
is slower than what's in the mainline. A later commit will reintroduce
it.
parent ae081739
...@@ -40,6 +40,13 @@ static int pack_entry_find_offset( ...@@ -40,6 +40,13 @@ static int pack_entry_find_offset(
const git_oid *short_oid, const git_oid *short_oid,
size_t len); size_t len);
/**
* Generate the chain of dependencies which we need to get to the
* object at `off`. As we use a stack, the latest is the base object,
* the rest are deltas.
*/
static int pack_dependency_chain(git_dependency_chain *chain, struct git_pack_file *p, git_off_t off);
static int packfile_error(const char *message) static int packfile_error(const char *message)
{ {
giterr_set(GITERR_ODB, "Invalid pack file - %s", message); giterr_set(GITERR_ODB, "Invalid pack file - %s", message);
...@@ -583,47 +590,63 @@ int git_packfile_unpack( ...@@ -583,47 +590,63 @@ int git_packfile_unpack(
git_mwindow *w_curs = NULL; git_mwindow *w_curs = NULL;
git_off_t curpos = *obj_offset; git_off_t curpos = *obj_offset;
int error; int error;
git_dependency_chain chain;
struct pack_chain_elem *elem;
size_t size = 0; git_otype base_type;
git_otype type;
/* /*
* TODO: optionally check the CRC on the packfile * TODO: optionally check the CRC on the packfile
*/ */
error = pack_dependency_chain(&chain, p, *obj_offset);
if (error < 0)
return error;
obj->data = NULL; obj->data = NULL;
obj->len = 0; obj->len = 0;
obj->type = GIT_OBJ_BAD; obj->type = GIT_OBJ_BAD;
error = git_packfile_unpack_header(&size, &type, &p->mwf, &w_curs, &curpos); /* the first one is the base, so we expand that one */
elem = git_array_pop(chain);
curpos = elem->offset;
error = packfile_unpack_compressed(obj, p, &w_curs, &curpos, elem->size, elem->type);
git_mwindow_close(&w_curs); git_mwindow_close(&w_curs);
if (error < 0) if (error < 0)
return error; goto cleanup;
switch (type) { base_type = elem->type;
case GIT_OBJ_OFS_DELTA: /* we now apply each consecutive delta until we run out */
case GIT_OBJ_REF_DELTA: while (git_array_size(chain) > 0) {
error = packfile_unpack_delta( git_rawobj base, delta;
obj, p, &w_curs, &curpos,
size, type, *obj_offset);
break;
case GIT_OBJ_COMMIT: elem = git_array_pop(chain);
case GIT_OBJ_TREE: curpos = elem->offset;
case GIT_OBJ_BLOB: error = packfile_unpack_compressed(&delta, p, &w_curs, &curpos, elem->size, elem->type);
case GIT_OBJ_TAG: git_mwindow_close(&w_curs);
error = packfile_unpack_compressed(
obj, p, &w_curs, &curpos, if (error < 0)
size, type);
break; break;
default: /* the current object becomes the new base, on which we apply the delta */
error = packfile_error("invalid packfile type in header");; base = *obj;
obj->data = NULL;
obj->len = 0;
obj->type = GIT_OBJ_BAD;
error = git__delta_apply(obj, base.data, base.len, delta.data, delta.len);
git__free(delta.data);
git__free(base.data);
if (error < 0)
break; break;
obj->type = base_type;
} }
*obj_offset = curpos; cleanup:
git_array_clear(chain);
return error; return error;
} }
...@@ -1215,3 +1238,74 @@ int git_pack_entry_find( ...@@ -1215,3 +1238,74 @@ int git_pack_entry_find(
git_oid_cpy(&e->sha1, &found_oid); git_oid_cpy(&e->sha1, &found_oid);
return 0; return 0;
} }
static int pack_dependency_chain(git_dependency_chain *chain_out, struct git_pack_file *p, git_off_t obj_offset)
{
git_dependency_chain chain = GIT_ARRAY_INIT;
git_mwindow *w_curs = NULL;
git_off_t curpos = obj_offset, base_offset;
int error = 0, found_base = 0;
size_t size;
git_otype type;
while (!found_base && error == 0) {
struct pack_chain_elem *elem;
curpos = obj_offset;
elem = git_array_alloc(chain);
if (!elem)
return -1;
error = git_packfile_unpack_header(&size, &type, &p->mwf, &w_curs, &curpos);
git_mwindow_close(&w_curs);
if (error < 0)
return error;
elem->offset = curpos;
elem->size = size;
elem->type = type;
switch (type) {
case GIT_OBJ_OFS_DELTA:
case GIT_OBJ_REF_DELTA:
base_offset = get_delta_base(p, &w_curs, &curpos, type, obj_offset);
git_mwindow_close(&w_curs);
if (base_offset == 0)
return packfile_error("delta offset is zero");
if (base_offset < 0) /* must actually be an error code */
return (int)base_offset;
/* we need to pass the pos *after* the delta-base bit */
elem->offset = curpos;
/* go through the loop again, but with the new object */
obj_offset = base_offset;
break;
/* one of these means we've found the final object in the chain */
case GIT_OBJ_COMMIT:
case GIT_OBJ_TREE:
case GIT_OBJ_BLOB:
case GIT_OBJ_TAG:
found_base = 1;
break;
default:
error = packfile_error("invalid packfile type in header");
break;
}
}
if (!found_base) {
git_array_clear(chain);
return packfile_error("after dependency chain loop; cannot happen");
}
if (error < 0)
git_array_clear(chain);
*chain_out = chain;
return error;
}
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include "mwindow.h" #include "mwindow.h"
#include "odb.h" #include "odb.h"
#include "oidmap.h" #include "oidmap.h"
#include "array.h"
#define GIT_PACK_FILE_MODE 0444 #define GIT_PACK_FILE_MODE 0444
...@@ -60,6 +61,14 @@ typedef struct git_pack_cache_entry { ...@@ -60,6 +61,14 @@ typedef struct git_pack_cache_entry {
git_rawobj raw; git_rawobj raw;
} git_pack_cache_entry; } git_pack_cache_entry;
struct pack_chain_elem {
git_off_t offset;
size_t size;
git_otype type;
};
typedef git_array_t(struct pack_chain_elem) git_dependency_chain;
#include "offmap.h" #include "offmap.h"
GIT__USE_OFFMAP; GIT__USE_OFFMAP;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment