Commit c0f4a011 by Carlos Martín Nieto

pack: introduce a delta base cache

Many delta bases are re-used. Cache them to avoid inflating the same
data repeatedly.

This version doesn't limit the amount of entries to store, so it can
end up using a considerable amound of memory.
parent 2086e1ba
/*
* Copyright (C) 2012 the libgit2 contributors
*
* This file is part of libgit2, distributed under the GNU GPL v2 with
* a Linking Exception. For full terms see the included COPYING file.
*/
#ifndef INCLUDE_offmap_h__
#define INCLUDE_offmap_h__
#include "common.h"
#include "git2/types.h"
#define kmalloc git__malloc
#define kcalloc git__calloc
#define krealloc git__realloc
#define kfree git__free
#include "khash.h"
__KHASH_TYPE(off, git_off_t, void *);
typedef khash_t(off) git_offmap;
#define GIT__USE_OFFMAP \
__KHASH_IMPL(off, static kh_inline, git_off_t, void *, 1, kh_int64_hash_func, kh_int64_hash_equal);
#define git_offmap_alloc() kh_init(off)
#define git_offmap_free(h) kh_destroy(off, h), h = NULL
#define git_offmap_clear(h) kh_clear(off, h)
#define git_offmap_num_entries(h) kh_size(h)
#define git_offmap_lookup_index(h, k) kh_get(off, h, k)
#define git_offmap_valid_index(h, idx) (idx != kh_end(h))
#define git_offmap_exists(h, k) (kh_get(off, h, k) != kh_end(h))
#define git_offmap_value_at(h, idx) kh_val(h, idx)
#define git_offmap_set_value_at(h, idx, v) kh_val(h, idx) = v
#define git_offmap_delete_at(h, idx) kh_del(off, h, idx)
#define git_offmap_insert(h, key, val, rval) do { \
khiter_t __pos = kh_put(off, h, key, &rval); \
if (rval >= 0) { \
if (rval == 0) kh_key(h, __pos) = key; \
kh_val(h, __pos) = val; \
} } while (0)
#define git_offmap_insert2(h, key, val, oldv, rval) do { \
khiter_t __pos = kh_put(off, h, key, &rval); \
if (rval >= 0) { \
if (rval == 0) { \
oldv = kh_val(h, __pos); \
kh_key(h, __pos) = key; \
} else { oldv = NULL; } \
kh_val(h, __pos) = val; \
} } while (0)
#define git_offmap_delete(h, key) do { \
khiter_t __pos = git_offmap_lookup_index(h, key); \
if (git_offmap_valid_index(h, __pos)) \
git_offmap_delete_at(h, __pos); } while (0)
#define git_offmap_foreach kh_foreach
#define git_offmap_foreach_value kh_foreach_value
#endif
...@@ -46,6 +46,29 @@ static int packfile_error(const char *message) ...@@ -46,6 +46,29 @@ static int packfile_error(const char *message)
return -1; return -1;
} }
static git_pack_cache_entry *new_cache_object(git_off_t off, git_rawobj *source)
{
git_pack_cache_entry *e = git__malloc(sizeof(git_pack_cache_entry));
if (!e)
return NULL;
e->off = off;
memcpy(&e->raw, source, sizeof(git_rawobj));
return e;
}
static void free_cache_object(void *o)
{
git_pack_cache_entry *e = (git_pack_cache_entry *)o;
if (e != NULL) {
git__free(e->raw.data);
git__free(e);
}
}
/*********************************************************** /***********************************************************
* *
* PACK INDEX METHODS * PACK INDEX METHODS
...@@ -336,9 +359,11 @@ static int packfile_unpack_delta( ...@@ -336,9 +359,11 @@ static int packfile_unpack_delta(
git_otype delta_type, git_otype delta_type,
git_off_t obj_offset) git_off_t obj_offset)
{ {
git_off_t base_offset; git_off_t base_offset, base_key;
git_rawobj base, delta; git_rawobj base, delta;
int error; git_pack_cache_entry *cached;
int error, found_base = 0;
khiter_t k;
base_offset = get_delta_base(p, w_curs, curpos, delta_type, obj_offset); base_offset = get_delta_base(p, w_curs, curpos, delta_type, obj_offset);
git_mwindow_close(w_curs); git_mwindow_close(w_curs);
...@@ -347,6 +372,18 @@ static int packfile_unpack_delta( ...@@ -347,6 +372,18 @@ static int packfile_unpack_delta(
if (base_offset < 0) /* must actually be an error code */ if (base_offset < 0) /* must actually be an error code */
return (int)base_offset; return (int)base_offset;
if (!p->bases) {
p->bases = git_offmap_alloc();
GITERR_CHECK_ALLOC(p->bases);
}
base_key = base_offset; /* git_packfile_unpack modifies base_offset */
k = kh_get(off, p->bases, base_offset);
if (k != kh_end(p->bases)) { /* found it */
cached = kh_value(p->bases, k);
found_base = 1;
memcpy(&base, &cached->raw, sizeof(git_rawobj));
} else { /* have to inflate it */
error = git_packfile_unpack(&base, p, &base_offset); error = git_packfile_unpack(&base, p, &base_offset);
/* /*
...@@ -357,23 +394,34 @@ static int packfile_unpack_delta( ...@@ -357,23 +394,34 @@ static int packfile_unpack_delta(
*/ */
if (error < 0) if (error < 0)
return error; return error;
}
error = packfile_unpack_compressed(&delta, p, w_curs, curpos, delta_size, delta_type); error = packfile_unpack_compressed(&delta, p, w_curs, curpos, delta_size, delta_type);
git_mwindow_close(w_curs); git_mwindow_close(w_curs);
if (error < 0) { if (error < 0) {
if (!found_base)
git__free(base.data); git__free(base.data);
return error; return error;
} }
obj->type = base.type; obj->type = base.type;
error = git__delta_apply(obj, base.data, base.len, delta.data, delta.len); error = git__delta_apply(obj, base.data, base.len, delta.data, delta.len);
if (error < 0)
goto on_error;
if (!found_base) {
cached = new_cache_object(base_key, &base);
if (cached) {
k = kh_put(off, p->bases, base_key, &error);
assert(error != 0);
kh_value(p->bases, k) = cached;
}
}
git__free(base.data); on_error:
git__free(delta.data); git__free(delta.data);
/* TODO: we might want to cache this. eventually */
//add_delta_base_cache(p, base_offset, base, base_size, *type);
return error; /* error set by git__delta_apply */ return error; /* error set by git__delta_apply */
} }
...@@ -651,9 +699,19 @@ static struct git_pack_file *packfile_alloc(size_t extra) ...@@ -651,9 +699,19 @@ static struct git_pack_file *packfile_alloc(size_t extra)
void packfile_free(struct git_pack_file *p) void packfile_free(struct git_pack_file *p)
{ {
khiter_t k;
assert(p); assert(p);
/* clear_delta_base_cache(); */ if (p->bases) {
for (k = kh_begin(p->bases); k != kh_end(p->bases); k++) {
if (kh_exist(p->bases, k))
free_cache_object(kh_value(p->bases, k));
}
git_offmap_free(p->bases);
}
git_mwindow_free_all(&p->mwf); git_mwindow_free_all(&p->mwf);
git_mwindow_file_deregister(&p->mwf); git_mwindow_file_deregister(&p->mwf);
...@@ -678,6 +736,9 @@ static int packfile_open(struct git_pack_file *p) ...@@ -678,6 +736,9 @@ static int packfile_open(struct git_pack_file *p)
if (!p->index_map.data && pack_index_open(p) < 0) if (!p->index_map.data && pack_index_open(p) < 0)
return git_odb__error_notfound("failed to open packfile", NULL); return git_odb__error_notfound("failed to open packfile", NULL);
p->bases = git_offmap_alloc();
GITERR_CHECK_ALLOC(p->bases);
/* TODO: open with noatime */ /* TODO: open with noatime */
p->mwf.fd = git_futils_open_ro(p->pack_name); p->mwf.fd = git_futils_open_ro(p->pack_name);
if (p->mwf.fd < 0) { if (p->mwf.fd < 0) {
......
...@@ -53,6 +53,15 @@ struct git_pack_idx_header { ...@@ -53,6 +53,15 @@ struct git_pack_idx_header {
uint32_t idx_version; uint32_t idx_version;
}; };
typedef struct git_pack_cache_entry {
git_off_t off;
git_rawobj raw;
} git_pack_cache_entry;
#include "offmap.h"
GIT__USE_OFFMAP;
struct git_pack_file { struct git_pack_file {
git_mwindow_file mwf; git_mwindow_file mwf;
git_map index_map; git_map index_map;
...@@ -68,6 +77,8 @@ struct git_pack_file { ...@@ -68,6 +77,8 @@ struct git_pack_file {
git_vector cache; git_vector cache;
git_oid **oids; git_oid **oids;
git_offmap *bases; /* delta base cache */
/* something like ".git/objects/pack/xxxxx.pack" */ /* something like ".git/objects/pack/xxxxx.pack" */
char pack_name[GIT_FLEX_ARRAY]; /* more */ char pack_name[GIT_FLEX_ARRAY]; /* more */
}; };
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment