Commit 0b33fca0 by Carlos Martín Nieto

indexer: fix thin packs

When given an ODB from which to read objects, the indexer will attempt
to inject the missing bases at the end of the pack and update the
header and trailer to reflect the new contents.
parent 51e82492
......@@ -46,7 +46,7 @@ int index_pack(git_repository *repo, int argc, char **argv)
return EXIT_FAILURE;
}
if (git_indexer_stream_new(&idx, ".", NULL, NULL) < 0) {
if (git_indexer_stream_new(&idx, ".", NULL, NULL, NULL) < 0) {
puts("bad idx");
return -1;
}
......
......@@ -20,12 +20,16 @@ typedef struct git_indexer_stream git_indexer_stream;
*
* @param out where to store the indexer instance
* @param path to the directory where the packfile should be stored
* @param odb object database from which to read base objects when
* fixing thin packs. Pass NULL if no thin pack is expected (an error
* will be returned if there are bases missing)
* @param progress_cb function to call with progress information
* @param progress_cb_payload payload for the progress callback
*/
GIT_EXTERN(int) git_indexer_stream_new(
git_indexer_stream **out,
const char *path,
git_odb *odb,
git_transfer_progress_callback progress_cb,
void *progress_cb_payload);
......
......@@ -80,7 +80,7 @@ struct git_odb_backend {
git_odb_backend *, git_odb_foreach_cb cb, void *payload);
int (* writepack)(
git_odb_writepack **, git_odb_backend *,
git_odb_writepack **, git_odb_backend *, git_odb *odb,
git_transfer_progress_callback progress_cb, void *progress_payload);
void (* free)(git_odb_backend *);
......
......@@ -217,6 +217,7 @@ typedef struct git_transfer_progress {
unsigned int total_objects;
unsigned int indexed_objects;
unsigned int received_objects;
unsigned int local_objects;
size_t received_bytes;
} git_transfer_progress;
......
......@@ -18,6 +18,7 @@
#include "filebuf.h"
#include "oid.h"
#include "oidmap.h"
#include "compress.h"
#define UINT31_MAX (0x7FFFFFFF)
......@@ -33,6 +34,7 @@ struct git_indexer_stream {
opened_pack :1,
have_stream :1,
have_delta :1;
struct git_pack_header hdr;
struct git_pack_file *pack;
git_filebuf pack_file;
git_off_t off;
......@@ -48,6 +50,9 @@ struct git_indexer_stream {
void *progress_payload;
char objbuf[8*1024];
/* Needed to look up objects which we want to inject to fix a thin pack */
git_odb *odb;
/* Fields for calculating the packfile trailer (hash of everything before it) */
char inbuf[GIT_OID_RAWSZ];
int inbuf_len;
......@@ -114,6 +119,7 @@ static int objects_cmp(const void *a, const void *b)
int git_indexer_stream_new(
git_indexer_stream **out,
const char *prefix,
git_odb *odb,
git_transfer_progress_callback progress_cb,
void *progress_payload)
{
......@@ -124,6 +130,7 @@ int git_indexer_stream_new(
idx = git__calloc(1, sizeof(git_indexer_stream));
GITERR_CHECK_ALLOC(idx);
idx->odb = odb;
idx->progress_cb = progress_cb;
idx->progress_payload = progress_payload;
git_hash_ctx_init(&idx->trailer);
......@@ -309,17 +316,10 @@ on_error:
return -1;
}
static int hash_and_save(git_indexer_stream *idx, git_rawobj *obj, git_off_t entry_start)
static int save_entry(git_indexer_stream *idx, struct entry *entry, struct git_pack_entry *pentry, git_off_t entry_start)
{
int i, error;
khiter_t k;
git_oid oid;
size_t entry_size;
struct entry *entry;
struct git_pack_entry *pentry;
entry = git__calloc(1, sizeof(*entry));
GITERR_CHECK_ALLOC(entry);
if (entry_start > UINT31_MAX) {
entry->offset = UINT32_MAX;
......@@ -328,6 +328,34 @@ static int hash_and_save(git_indexer_stream *idx, git_rawobj *obj, git_off_t ent
entry->offset = (uint32_t)entry_start;
}
pentry->offset = entry_start;
k = kh_put(oid, idx->pack->idx_cache, &pentry->sha1, &error);
if (!error)
return -1;
kh_value(idx->pack->idx_cache, k) = pentry;
/* Add the object to the list */
if (git_vector_insert(&idx->objects, entry) < 0)
return -1;
for (i = entry->oid.id[0]; i < 256; ++i) {
idx->fanout[i]++;
}
return 0;
}
static int hash_and_save(git_indexer_stream *idx, git_rawobj *obj, git_off_t entry_start)
{
git_oid oid;
size_t entry_size;
struct entry *entry;
struct git_pack_entry *pentry;
entry = git__calloc(1, sizeof(*entry));
GITERR_CHECK_ALLOC(entry);
if (git_odb__hashobj(&oid, obj) < 0) {
giterr_set(GITERR_INDEXER, "Failed to hash object");
goto on_error;
......@@ -337,15 +365,6 @@ static int hash_and_save(git_indexer_stream *idx, git_rawobj *obj, git_off_t ent
GITERR_CHECK_ALLOC(pentry);
git_oid_cpy(&pentry->sha1, &oid);
pentry->offset = entry_start;
k = kh_put(oid, idx->pack->idx_cache, &pentry->sha1, &error);
if (!error) {
git__free(pentry);
goto on_error;
}
kh_value(idx->pack->idx_cache, k) = pentry;
git_oid_cpy(&entry->oid, &oid);
entry->crc = crc32(0L, Z_NULL, 0);
......@@ -353,15 +372,7 @@ static int hash_and_save(git_indexer_stream *idx, git_rawobj *obj, git_off_t ent
if (crc_object(&entry->crc, &idx->pack->mwf, entry_start, entry_size) < 0)
goto on_error;
/* Add the object to the list */
if (git_vector_insert(&idx->objects, entry) < 0)
goto on_error;
for (i = oid.id[0]; i < 256; ++i) {
idx->fanout[i]++;
}
return 0;
return save_entry(idx, entry, pentry, entry_start);
on_error:
git__free(entry);
......@@ -415,8 +426,8 @@ static void hash_partially(git_indexer_stream *idx, const uint8_t *data, size_t
int git_indexer_stream_add(git_indexer_stream *idx, const void *data, size_t size, git_transfer_progress *stats)
{
int error = -1;
struct git_pack_header hdr;
size_t processed;
struct git_pack_header *hdr = &idx->hdr;
git_mwindow_file *mwf = &idx->pack->mwf;
assert(idx && data && stats);
......@@ -443,14 +454,14 @@ int git_indexer_stream_add(git_indexer_stream *idx, const void *data, size_t siz
if (!idx->parsed_header) {
unsigned int total_objects;
if ((unsigned)idx->pack->mwf.size < sizeof(hdr))
if ((unsigned)idx->pack->mwf.size < sizeof(struct git_pack_header))
return 0;
if (parse_header(&hdr, idx->pack) < 0)
if (parse_header(&idx->hdr, idx->pack) < 0)
return -1;
idx->parsed_header = 1;
idx->nr_objects = ntohl(hdr.hdr_entries);
idx->nr_objects = ntohl(hdr->hdr_entries);
idx->off = sizeof(struct git_pack_header);
/* for now, limit to 2^32 objects */
......@@ -471,6 +482,7 @@ int git_indexer_stream_add(git_indexer_stream *idx, const void *data, size_t siz
return -1;
stats->received_objects = 0;
stats->local_objects = 0;
processed = stats->indexed_objects = 0;
stats->total_objects = total_objects;
do_progress_callback(idx, stats);
......@@ -590,6 +602,135 @@ static int index_path_stream(git_buf *path, git_indexer_stream *idx, const char
return git_buf_oom(path) ? -1 : 0;
}
/**
* Rewind the packfile by the trailer, as we might need to fix the
* packfile by injecting objects at the tail and must overwrite it.
*/
static git_off_t seek_back_trailer(git_indexer_stream *idx)
{
git_off_t off;
if ((off = p_lseek(idx->pack_file.fd, -GIT_OID_RAWSZ, SEEK_CUR)) < 0)
return -1;
idx->pack->mwf.size -= GIT_OID_RAWSZ;
git_mwindow_free_all(&idx->pack->mwf);
return off;
}
static int inject_object(git_indexer_stream *idx, git_oid *id)
{
git_odb_object *obj;
struct entry *entry;
struct git_pack_entry *pentry;
git_oid foo = {{0}};
unsigned char hdr[64];
git_buf buf = GIT_BUF_INIT;
git_off_t entry_start;
const void *data;
size_t len, hdr_len;
int error;
entry = git__calloc(1, sizeof(*entry));
GITERR_CHECK_ALLOC(entry);
entry_start = seek_back_trailer(idx);
if (git_odb_read(&obj, idx->odb, id) < 0)
return -1;
data = git_odb_object_data(obj);
len = git_odb_object_size(obj);
entry->crc = crc32(0L, Z_NULL, 0);
/* Write out the object header */
hdr_len = git_packfile__object_header(hdr, len, git_odb_object_type(obj));
git_filebuf_write(&idx->pack_file, hdr, hdr_len);
idx->pack->mwf.size += hdr_len;
entry->crc = crc32(entry->crc, hdr, hdr_len);
if ((error = git__compress(&buf, data, len)) < 0)
goto cleanup;
/* And then the compressed object */
git_filebuf_write(&idx->pack_file, buf.ptr, buf.size);
idx->pack->mwf.size += buf.size;
entry->crc = htonl(crc32(entry->crc, (unsigned char *)buf.ptr, buf.size));
git_buf_free(&buf);
/* Write a fake trailer so the pack functions play ball */
if ((error = git_filebuf_write(&idx->pack_file, &foo, GIT_OID_RAWSZ)) < 0)
goto cleanup;
idx->pack->mwf.size += GIT_OID_RAWSZ;
pentry = git__calloc(1, sizeof(struct git_pack_entry));
GITERR_CHECK_ALLOC(pentry);
git_oid_cpy(&pentry->sha1, id);
git_oid_cpy(&entry->oid, id);
idx->off = entry_start + hdr_len + len;
if ((error = save_entry(idx, entry, pentry, entry_start)) < 0)
git__free(pentry);
cleanup:
git_odb_object_free(obj);
return error;
}
static int fix_thin_pack(git_indexer_stream *idx, git_transfer_progress *stats)
{
int error;
unsigned int i;
struct delta_info *delta;
if (idx->odb == NULL) {
giterr_set(GITERR_INDEXER, "cannot fix a thin pack without an ODB");
return -1;
}
git_vector_foreach(&idx->deltas, i, delta) {
size_t size;
git_otype type;
git_mwindow *w = NULL;
git_off_t curpos = delta->delta_off;
unsigned char *base_info;
unsigned int left = 0;
git_oid base;
error = git_packfile_unpack_header(&size, &type, &idx->pack->mwf, &w, &curpos);
git_mwindow_close(&w);
if (error < 0)
return error;
if (type != GIT_OBJ_REF_DELTA) {
giterr_set(GITERR_INDEXER, "delta with missing base is not REF_DELTA");
return -1;
}
/* curpos now points to the base information, which is an OID */
base_info = git_mwindow_open(&idx->pack->mwf, &w, curpos, GIT_OID_RAWSZ, &left);
if (base_info == NULL) {
giterr_set(GITERR_INDEXER, "failed to map delta information");
return -1;
}
git_oid_fromraw(&base, base_info);
git_mwindow_close(&w);
if (inject_object(idx, &base) < 0)
return -1;
stats->total_objects++;
stats->local_objects++;
}
return 0;
}
static int resolve_deltas(git_indexer_stream *idx, git_transfer_progress *stats)
{
unsigned int i;
......@@ -619,13 +760,61 @@ static int resolve_deltas(git_indexer_stream *idx, git_transfer_progress *stats)
* delta.
*/
git_vector_remove(&idx->deltas, i);
git__free(delta);
i--;
}
if (!progressed) {
giterr_set(GITERR_INDEXER, "the packfile is missing bases");
if (!progressed && (fix_thin_pack(idx, stats) < 0))
return -1;
}
return 0;
}
static int update_header_and_rehash(git_indexer_stream *idx, git_transfer_progress *stats)
{
void *ptr;
size_t chunk = 1024*1024;
git_off_t hashed = 0;
git_mwindow *w = NULL;
git_mwindow_file *mwf;
unsigned int left;
git_hash_ctx *ctx;
mwf = &idx->pack->mwf;
ctx = &idx->trailer;
git_hash_ctx_init(ctx);
git_mwindow_free_all(mwf);
/* Update the header to include the numer of local objects we injected */
idx->hdr.hdr_entries = htonl(stats->total_objects);
if (p_lseek(idx->pack_file.fd, 0, SEEK_SET) < 0) {
giterr_set(GITERR_OS, "failed to seek to the beginning of the pack");
return -1;
}
if (p_write(idx->pack_file.fd, &idx->hdr, sizeof(struct git_pack_header)) < 0) {
giterr_set(GITERR_OS, "failed to update the pack header");
return -1;
}
/*
* We now use the same technique as before to determine the
* hash. We keep reading up to the end and let
* hash_partially() keep the existing trailer out of the
* calculation.
*/
idx->inbuf_len = 0;
while (hashed < mwf->size) {
ptr = git_mwindow_open(mwf, &w, hashed, chunk, &left);
if (ptr == NULL)
return -1;
}
hash_partially(idx, ptr, left);
hashed += left;
git_mwindow_close(&w);
}
return 0;
......@@ -668,15 +857,28 @@ int git_indexer_stream_finalize(git_indexer_stream *idx, git_transfer_progress *
return -1;
}
if (idx->deltas.length > 0)
if (resolve_deltas(idx, stats) < 0)
return -1;
if (resolve_deltas(idx, stats) < 0)
return -1;
if (stats->indexed_objects != stats->total_objects) {
if (stats->indexed_objects + stats->local_objects != stats->total_objects) {
giterr_set(GITERR_INDEXER, "early EOF");
return -1;
}
if (stats->local_objects > 0) {
if (update_header_and_rehash(idx, stats) < 0)
return -1;
git_hash_final(&trailer_hash, &idx->trailer);
if (p_lseek(idx->pack_file.fd, -GIT_OID_RAWSZ, SEEK_END) < 0)
return -1;
if (p_write(idx->pack_file.fd, &trailer_hash, GIT_OID_RAWSZ) < 0) {
giterr_set(GITERR_OS, "failed to update pack trailer");
return -1;
}
}
git_vector_sort(&idx->objects);
git_buf_sets(&filename, idx->pack->pack_name);
......
......@@ -988,7 +988,7 @@ int git_odb_write_pack(struct git_odb_writepack **out, git_odb *db, git_transfer
if (b->writepack != NULL) {
++writes;
error = b->writepack(out, b, progress_cb, progress_payload);
error = b->writepack(out, b, db, progress_cb, progress_payload);
}
}
......
......@@ -541,6 +541,7 @@ static void pack_backend__writepack_free(struct git_odb_writepack *_writepack)
static int pack_backend__writepack(struct git_odb_writepack **out,
git_odb_backend *_backend,
git_odb *odb,
git_transfer_progress_callback progress_cb,
void *progress_payload)
{
......@@ -557,7 +558,7 @@ static int pack_backend__writepack(struct git_odb_writepack **out,
GITERR_CHECK_ALLOC(writepack);
if (git_indexer_stream_new(&writepack->indexer_stream,
backend->pack_folder, progress_cb, progress_payload) < 0) {
backend->pack_folder, odb, progress_cb, progress_payload) < 0) {
git__free(writepack);
return -1;
}
......
......@@ -1253,7 +1253,7 @@ int git_packbuilder_write(
PREPARE_PACK;
if (git_indexer_stream_new(
&indexer, path, progress_cb, progress_cb_payload) < 0)
&indexer, path, pb->odb, progress_cb, progress_cb_payload) < 0)
return -1;
ctx.indexer = indexer;
......
#include "clar_libgit2.h"
#include <git2.h>
#include "fileops.h"
#include "hash.h"
#include "iterator.h"
#include "vector.h"
#include "posix.h"
/*
* This is a packfile with three objects. The second is a delta which
* depends on the third, which is also a delta.
......@@ -23,6 +25,24 @@ unsigned char out_of_order_pack[] = {
};
unsigned int out_of_order_pack_len = 112;
/*
* Packfile with two objects. The second is a delta against an object
* which is not in the packfile
*/
unsigned char thin_pack[] = {
0x50, 0x41, 0x43, 0x4b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02,
0x32, 0x78, 0x9c, 0x63, 0x67, 0x00, 0x00, 0x00, 0x10, 0x00, 0x08, 0x76,
0xe6, 0x8f, 0xe8, 0x12, 0x9b, 0x54, 0x6b, 0x10, 0x1a, 0xee, 0x95, 0x10,
0xc5, 0x32, 0x8e, 0x7f, 0x21, 0xca, 0x1d, 0x18, 0x78, 0x9c, 0x63, 0x62,
0x66, 0x4e, 0xcb, 0xcf, 0x07, 0x00, 0x02, 0xac, 0x01, 0x4d, 0x42, 0x52,
0x3a, 0x6f, 0x39, 0xd1, 0xfe, 0x66, 0x68, 0x6b, 0xa5, 0xe5, 0xe2, 0x97,
0xac, 0x94, 0x6c, 0x76, 0x0b, 0x04
};
unsigned int thin_pack_len = 78;
unsigned char base_obj[] = { 07, 076 };
unsigned int base_obj_len = 2;
void test_pack_indexer__out_of_order(void)
{
git_indexer_stream *idx;
......@@ -38,3 +58,71 @@ void test_pack_indexer__out_of_order(void)
git_indexer_stream_free(idx);
}
void test_pack_indexer__fix_thin(void)
{
git_indexer_stream *idx;
git_transfer_progress stats;
git_repository *repo;
git_odb *odb;
git_oid id, should_id;
cl_git_pass(git_repository_init(&repo, "thin.git", true));
cl_git_pass(git_repository_odb(&odb, repo));
/* Store the missing base into your ODB so the indexer can fix the pack */
cl_git_pass(git_odb_write(&id, odb, base_obj, base_obj_len, GIT_OBJ_BLOB));
git_oid_fromstr(&should_id, "e68fe8129b546b101aee9510c5328e7f21ca1d18");
cl_assert(!git_oid_cmp(&id, &should_id));
cl_git_pass(git_indexer_stream_new(&idx, ".", odb, NULL, NULL));
cl_git_pass(git_indexer_stream_add(idx, thin_pack, thin_pack_len, &stats));
cl_git_pass(git_indexer_stream_finalize(idx, &stats));
cl_assert_equal_i(stats.total_objects, 3);
cl_assert_equal_i(stats.received_objects, 2);
cl_assert_equal_i(stats.indexed_objects, 2);
cl_assert_equal_i(stats.local_objects, 1);
git_oid_fromstr(&should_id, "11f0f69b334728fdd8bc86b80499f22f29d85b15");
cl_assert(!git_oid_cmp(git_indexer_stream_hash(idx), &should_id));
git_indexer_stream_free(idx);
git_odb_free(odb);
git_repository_free(repo);
/*
* The pack's name/hash only tells us what objects there are,
* so we need to go through the packfile again in order to
* figure out whether we calculated the trailer correctly.
*/
{
unsigned char buffer[128];
int fd;
ssize_t read;
git_off_t left;
struct stat st;
const char *name = "pack-11f0f69b334728fdd8bc86b80499f22f29d85b15.pack";
fd = p_open(name, O_RDONLY);
cl_assert(fd != -1);
cl_git_pass(p_stat(name, &st));
left = st.st_size;
cl_git_pass(git_indexer_stream_new(&idx, ".", NULL, NULL, NULL));
read = p_read(fd, buffer, sizeof(buffer));
cl_assert(read != -1);
p_close(fd);
cl_git_pass(git_indexer_stream_add(idx, buffer, read, &stats));
cl_git_pass(git_indexer_stream_finalize(idx, &stats));
cl_assert_equal_i(stats.total_objects, 3);
cl_assert_equal_i(stats.received_objects, 3);
cl_assert_equal_i(stats.indexed_objects, 3);
cl_assert_equal_i(stats.local_objects, 0);
git_indexer_stream_free(idx);
}
}
......@@ -92,7 +92,7 @@ void test_pack_packbuilder__create_pack(void)
seed_packbuilder();
cl_git_pass(git_indexer_stream_new(&_indexer, ".", NULL, NULL));
cl_git_pass(git_indexer_stream_new(&_indexer, ".", NULL, NULL, NULL));
cl_git_pass(git_packbuilder_foreach(_packbuilder, feed_indexer, &stats));
cl_git_pass(git_indexer_stream_finalize(_indexer, &stats));
......@@ -141,7 +141,7 @@ void test_pack_packbuilder__foreach(void)
git_indexer_stream *idx;
seed_packbuilder();
cl_git_pass(git_indexer_stream_new(&idx, ".", NULL, NULL));
cl_git_pass(git_indexer_stream_new(&idx, ".", NULL, NULL, NULL));
cl_git_pass(git_packbuilder_foreach(_packbuilder, foreach_cb, idx));
cl_git_pass(git_indexer_stream_finalize(idx, &stats));
git_indexer_stream_free(idx);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment