Commit 7800048a by Edward Thomson

Merge pull request #2972 from libgit2/cmn/pack-objects-walk

[WIP] Smarter pack-building
parents 828e5959 e68b31a1
...@@ -115,6 +115,19 @@ GIT_EXTERN(int) git_packbuilder_insert_tree(git_packbuilder *pb, const git_oid * ...@@ -115,6 +115,19 @@ GIT_EXTERN(int) git_packbuilder_insert_tree(git_packbuilder *pb, const git_oid *
GIT_EXTERN(int) git_packbuilder_insert_commit(git_packbuilder *pb, const git_oid *id); GIT_EXTERN(int) git_packbuilder_insert_commit(git_packbuilder *pb, const git_oid *id);
/** /**
* Insert objects as given by the walk
*
* Those commits and all objects they reference will be inserted into
* the packbuilder.
*
* @param pb the packbuilder
* @param walk the revwalk to use to fill the packbuilder
*
* @return 0 or an error code
*/
GIT_EXTERN(int) git_packbuilder_insert_walk(git_packbuilder *pb, git_revwalk *walk);
/**
* Write the contents of the packfile to an in-memory buffer * Write the contents of the packfile to an in-memory buffer
* *
* The contents of the buffer will become a valid packfile, even though there * The contents of the buffer will become a valid packfile, even though there
......
...@@ -19,6 +19,8 @@ ...@@ -19,6 +19,8 @@
#include "vector.h" #include "vector.h"
#include "repository.h" #include "repository.h"
GIT__USE_OIDMAP;
/* Ported from https://github.com/git/git/blob/89dde7882f71f846ccd0359756d27bebc31108de/builtin/describe.c */ /* Ported from https://github.com/git/git/blob/89dde7882f71f846ccd0359756d27bebc31108de/builtin/describe.c */
struct commit_name { struct commit_name {
......
...@@ -18,6 +18,8 @@ ...@@ -18,6 +18,8 @@
#include "oidmap.h" #include "oidmap.h"
#include "zstream.h" #include "zstream.h"
GIT__USE_OIDMAP;
extern git_mutex git__mwindow_mutex; extern git_mutex git__mwindow_mutex;
#define UINT31_MAX (0x7FFFFFFF) #define UINT31_MAX (0x7FFFFFFF)
......
...@@ -15,6 +15,8 @@ ...@@ -15,6 +15,8 @@
#include "thread-utils.h" #include "thread-utils.h"
#include "tree.h" #include "tree.h"
#include "util.h" #include "util.h"
#include "revwalk.h"
#include "commit_list.h"
#include "git2/pack.h" #include "git2/pack.h"
#include "git2/commit.h" #include "git2/commit.h"
...@@ -39,6 +41,8 @@ struct pack_write_context { ...@@ -39,6 +41,8 @@ struct pack_write_context {
git_transfer_progress *stats; git_transfer_progress *stats;
}; };
GIT__USE_OIDMAP;
#ifdef GIT_THREADS #ifdef GIT_THREADS
#define GIT_PACKBUILDER__MUTEX_OP(pb, mtx, op) do { \ #define GIT_PACKBUILDER__MUTEX_OP(pb, mtx, op) do { \
...@@ -124,10 +128,16 @@ int git_packbuilder_new(git_packbuilder **out, git_repository *repo) ...@@ -124,10 +128,16 @@ int git_packbuilder_new(git_packbuilder **out, git_repository *repo)
GITERR_CHECK_ALLOC(pb); GITERR_CHECK_ALLOC(pb);
pb->object_ix = git_oidmap_alloc(); pb->object_ix = git_oidmap_alloc();
if (!pb->object_ix) if (!pb->object_ix)
goto on_error; goto on_error;
pb->walk_objects = git_oidmap_alloc();
if (!pb->walk_objects)
goto on_error;
if (git_pool_init(&pb->object_pool, sizeof(git_walk_object), 0) < 0)
goto on_error;
pb->repo = repo; pb->repo = repo;
pb->nr_threads = 1; /* do not spawn any thread by default */ pb->nr_threads = 1; /* do not spawn any thread by default */
...@@ -1345,6 +1355,7 @@ const git_oid *git_packbuilder_hash(git_packbuilder *pb) ...@@ -1345,6 +1355,7 @@ const git_oid *git_packbuilder_hash(git_packbuilder *pb)
return &pb->pack_oid; return &pb->pack_oid;
} }
static int cb_tree_walk( static int cb_tree_walk(
const char *root, const git_tree_entry *entry, void *payload) const char *root, const git_tree_entry *entry, void *payload)
{ {
...@@ -1403,6 +1414,235 @@ uint32_t git_packbuilder_written(git_packbuilder *pb) ...@@ -1403,6 +1414,235 @@ uint32_t git_packbuilder_written(git_packbuilder *pb)
return pb->nr_written; return pb->nr_written;
} }
int lookup_walk_object(git_walk_object **out, git_packbuilder *pb, const git_oid *id)
{
git_walk_object *obj;
obj = git_pool_mallocz(&pb->object_pool, 1);
if (!obj) {
giterr_set_oom();
return -1;
}
git_oid_cpy(&obj->id, id);
*out = obj;
return 0;
}
static int retrieve_object(git_walk_object **out, git_packbuilder *pb, const git_oid *id)
{
int error;
khiter_t pos;
git_walk_object *obj;
pos = git_oidmap_lookup_index(pb->walk_objects, id);
if (git_oidmap_valid_index(pb->walk_objects, pos)) {
obj = git_oidmap_value_at(pb->walk_objects, pos);
} else {
if ((error = lookup_walk_object(&obj, pb, id)) < 0)
return error;
git_oidmap_insert(pb->walk_objects, &obj->id, obj, error);
}
*out = obj;
return 0;
}
static int mark_blob_uninteresting(git_packbuilder *pb, const git_oid *id)
{
int error;
git_walk_object *obj;
if ((error = retrieve_object(&obj, pb, id)) < 0)
return error;
obj->uninteresting = 1;
return 0;
}
static int mark_tree_uninteresting(git_packbuilder *pb, const git_oid *id)
{
git_walk_object *obj;
git_tree *tree;
int error;
size_t i;
if ((error = retrieve_object(&obj, pb, id)) < 0)
return error;
if (obj->uninteresting)
return 0;
obj->uninteresting = 1;
if ((error = git_tree_lookup(&tree, pb->repo, id)) < 0)
return error;
for (i = 0; i < git_tree_entrycount(tree); i++) {
const git_tree_entry *entry = git_tree_entry_byindex(tree, i);
const git_oid *entry_id = git_tree_entry_id(entry);
switch (git_tree_entry_type(entry)) {
case GIT_OBJ_TREE:
if ((error = mark_tree_uninteresting(pb, entry_id)) < 0)
goto cleanup;
break;
case GIT_OBJ_BLOB:
if ((error = mark_blob_uninteresting(pb, entry_id)) < 0)
goto cleanup;
break;
default:
/* it's a submodule or something unknown, we don't want it */
;
}
}
cleanup:
git_tree_free(tree);
return error;
}
/*
* Mark the edges of the graph uninteresting. Since we start from a
* git_revwalk, the commits are already uninteresting, but we need to
* mark the trees and blobs.
*/
static int mark_edges_uninteresting(git_packbuilder *pb, git_commit_list *commits)
{
int error;
git_commit_list *list;
git_commit *commit;
for (list = commits; list; list = list->next) {
if (!list->item->uninteresting)
continue;
if ((error = git_commit_lookup(&commit, pb->repo, &list->item->oid)) < 0)
return error;
error = mark_tree_uninteresting(pb, git_commit_tree_id(commit));
git_commit_free(commit);
if (error < 0)
return error;
}
return 0;
}
int insert_tree(git_packbuilder *pb, git_tree *tree)
{
size_t i;
int error;
git_tree *subtree;
git_walk_object *obj;
const char *name;
if ((error = retrieve_object(&obj, pb, git_tree_id(tree))) < 0)
return error;
if (obj->seen)
return 0;
obj->seen = 1;
if ((error = git_packbuilder_insert(pb, &obj->id, NULL)))
return error;
for (i = 0; i < git_tree_entrycount(tree); i++) {
const git_tree_entry *entry = git_tree_entry_byindex(tree, i);
const git_oid *entry_id = git_tree_entry_id(entry);
switch (git_tree_entry_type(entry)) {
case GIT_OBJ_TREE:
if ((error = git_tree_lookup(&subtree, pb->repo, entry_id)) < 0)
return error;
error = insert_tree(pb, subtree);
git_tree_free(subtree);
if (error < 0)
return error;
break;
case GIT_OBJ_BLOB:
name = git_tree_entry_name(entry);
if ((error = git_packbuilder_insert(pb, entry_id, name)) < 0)
return error;
break;
default:
/* it's a submodule or something unknown, we don't want it */
;
}
}
return error;
}
int insert_commit(git_packbuilder *pb, git_walk_object *obj)
{
int error;
git_commit *commit = NULL;
git_tree *tree = NULL;
obj->seen = 1;
if ((error = git_packbuilder_insert(pb, &obj->id, NULL)) < 0)
return error;
if ((error = git_commit_lookup(&commit, pb->repo, &obj->id)) < 0)
return error;
if ((error = git_tree_lookup(&tree, pb->repo, git_commit_tree_id(commit))) < 0)
goto cleanup;
if ((error = insert_tree(pb, tree)) < 0)
goto cleanup;
cleanup:
git_commit_free(commit);
git_tree_free(tree);
return error;
}
int git_packbuilder_insert_walk(git_packbuilder *pb, git_revwalk *walk)
{
int error;
git_oid id;
git_walk_object *obj;
assert(pb && walk);
if ((error = mark_edges_uninteresting(pb, walk->user_input)) < 0)
return error;
/*
* TODO: git marks the parents of the edges
* uninteresting. This may provide a speed advantage, but does
* seem to assume the remote does not have a single-commit
* history on the other end.
*/
/* walk down each tree up to the blobs and insert them, stopping when uninteresting */
while ((error = git_revwalk_next(&id, walk)) == 0) {
if ((error = retrieve_object(&obj, pb, &id)) < 0)
return error;
if (obj->seen || obj->uninteresting)
continue;
if ((error = insert_commit(pb, obj)) < 0)
return error;
}
if (error == GIT_ITEROVER)
error = 0;
return 0;
}
int git_packbuilder_set_callbacks(git_packbuilder *pb, git_packbuilder_progress progress_cb, void *progress_cb_payload) int git_packbuilder_set_callbacks(git_packbuilder *pb, git_packbuilder_progress progress_cb, void *progress_cb_payload)
{ {
if (!pb) if (!pb)
...@@ -1436,6 +1676,9 @@ void git_packbuilder_free(git_packbuilder *pb) ...@@ -1436,6 +1676,9 @@ void git_packbuilder_free(git_packbuilder *pb)
if (pb->object_list) if (pb->object_list)
git__free(pb->object_list); git__free(pb->object_list);
git_oidmap_free(pb->walk_objects);
git_pool_clear(&pb->object_pool);
git_hash_ctx_cleanup(&pb->ctx); git_hash_ctx_cleanup(&pb->ctx);
git_zstream_free(&pb->zstream); git_zstream_free(&pb->zstream);
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#include "oidmap.h" #include "oidmap.h"
#include "netops.h" #include "netops.h"
#include "zstream.h" #include "zstream.h"
#include "pool.h"
#include "git2/oid.h" #include "git2/oid.h"
#include "git2/pack.h" #include "git2/pack.h"
...@@ -50,6 +51,12 @@ typedef struct git_pobject { ...@@ -50,6 +51,12 @@ typedef struct git_pobject {
filled:1; filled:1;
} git_pobject; } git_pobject;
typedef struct {
git_oid id;
unsigned int uninteresting:1,
seen:1;
} git_walk_object;
struct git_packbuilder { struct git_packbuilder {
git_repository *repo; /* associated repository */ git_repository *repo; /* associated repository */
git_odb *odb; /* associated object database */ git_odb *odb; /* associated object database */
...@@ -66,6 +73,9 @@ struct git_packbuilder { ...@@ -66,6 +73,9 @@ struct git_packbuilder {
git_oidmap *object_ix; git_oidmap *object_ix;
git_oidmap *walk_objects;
git_pool object_pool;
git_oid pack_oid; /* hash of written pack */ git_oid pack_oid; /* hash of written pack */
/* synchronization objects */ /* synchronization objects */
......
...@@ -16,6 +16,9 @@ ...@@ -16,6 +16,9 @@
#include <zlib.h> #include <zlib.h>
GIT__USE_OFFMAP;
GIT__USE_OIDMAP;
static int packfile_open(struct git_pack_file *p); static int packfile_open(struct git_pack_file *p);
static git_off_t nth_packed_object_offset(const struct git_pack_file *p, uint32_t n); static git_off_t nth_packed_object_offset(const struct git_pack_file *p, uint32_t n);
int packfile_unpack_compressed( int packfile_unpack_compressed(
......
...@@ -71,9 +71,7 @@ struct pack_chain_elem { ...@@ -71,9 +71,7 @@ struct pack_chain_elem {
typedef git_array_t(struct pack_chain_elem) git_dependency_chain; typedef git_array_t(struct pack_chain_elem) git_dependency_chain;
#include "offmap.h" #include "offmap.h"
#include "oidmap.h"
GIT__USE_OFFMAP
GIT__USE_OIDMAP
#define GIT_PACK_CACHE_MEMORY_LIMIT 16 * 1024 * 1024 #define GIT_PACK_CACHE_MEMORY_LIMIT 16 * 1024 * 1024
#define GIT_PACK_CACHE_SIZE_LIMIT 1024 * 1024 /* don't bother caching anything over 1MB */ #define GIT_PACK_CACHE_SIZE_LIMIT 1024 * 1024 /* don't bother caching anything over 1MB */
......
...@@ -14,6 +14,8 @@ ...@@ -14,6 +14,8 @@
#include "git2/revparse.h" #include "git2/revparse.h"
#include "merge.h" #include "merge.h"
GIT__USE_OIDMAP;
git_commit_list_node *git_revwalk__commit_lookup( git_commit_list_node *git_revwalk__commit_lookup(
git_revwalk *walk, const git_oid *oid) git_revwalk *walk, const git_oid *oid)
{ {
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
#include "pool.h" #include "pool.h"
#include "vector.h" #include "vector.h"
GIT__USE_OIDMAP #include "oidmap.h"
struct git_revwalk { struct git_revwalk {
git_repository *repo; git_repository *repo;
......
...@@ -513,7 +513,6 @@ static int local_download_pack( ...@@ -513,7 +513,6 @@ static int local_download_pack(
git_remote_head *rhead; git_remote_head *rhead;
unsigned int i; unsigned int i;
int error = -1; int error = -1;
git_oid oid;
git_packbuilder *pack = NULL; git_packbuilder *pack = NULL;
git_odb_writepack *writepack = NULL; git_odb_writepack *writepack = NULL;
git_odb *odb = NULL; git_odb *odb = NULL;
...@@ -539,15 +538,22 @@ static int local_download_pack( ...@@ -539,15 +538,22 @@ static int local_download_pack(
if (git_object_type(obj) == GIT_OBJ_COMMIT) { if (git_object_type(obj) == GIT_OBJ_COMMIT) {
/* Revwalker includes only wanted commits */ /* Revwalker includes only wanted commits */
error = git_revwalk_push(walk, &rhead->oid); error = git_revwalk_push(walk, &rhead->oid);
if (!git_oid_iszero(&rhead->loid)) if (!error && !git_oid_iszero(&rhead->loid)) {
error = git_revwalk_hide(walk, &rhead->loid); error = git_revwalk_hide(walk, &rhead->loid);
if (error == GIT_ENOTFOUND)
error = 0;
}
} else { } else {
/* Tag or some other wanted object. Add it on its own */
error = git_packbuilder_insert(pack, &rhead->oid, rhead->name); error = git_packbuilder_insert(pack, &rhead->oid, rhead->name);
} }
git_object_free(obj); git_object_free(obj);
if (error < 0)
goto cleanup;
} }
if ((error = git_packbuilder_insert_walk(pack, walk)))
goto cleanup;
if ((error = git_buf_printf(&progress_info, counting_objects_fmt, git_packbuilder_object_count(pack))) < 0) if ((error = git_buf_printf(&progress_info, counting_objects_fmt, git_packbuilder_object_count(pack))) < 0)
goto cleanup; goto cleanup;
...@@ -559,35 +565,6 @@ static int local_download_pack( ...@@ -559,35 +565,6 @@ static int local_download_pack(
if ((error = git_repository_odb__weakptr(&odb, repo)) < 0) if ((error = git_repository_odb__weakptr(&odb, repo)) < 0)
goto cleanup; goto cleanup;
while ((error = git_revwalk_next(&oid, walk)) == 0) {
git_commit *commit;
/* Skip commits we already have */
if (git_odb_exists(odb, &oid)) continue;
if (!git_object_lookup((git_object**)&commit, t->repo, &oid, GIT_OBJ_COMMIT)) {
const git_oid *tree_oid = git_commit_tree_id(commit);
/* Add the commit and its tree */
if ((error = git_packbuilder_insert(pack, &oid, NULL)) < 0 ||
(error = git_packbuilder_insert_tree(pack, tree_oid)) < 0) {
git_commit_free(commit);
goto cleanup;
}
git_commit_free(commit);
git_buf_clear(&progress_info);
if ((error = git_buf_printf(&progress_info, counting_objects_fmt, git_packbuilder_object_count(pack))) < 0)
goto cleanup;
if (t->progress_cb &&
(error = t->progress_cb(git_buf_cstr(&progress_info), git_buf_len(&progress_info), t->message_cb_payload)) < 0)
goto cleanup;
}
}
/* One last one with the newline */ /* One last one with the newline */
git_buf_clear(&progress_info); git_buf_clear(&progress_info);
git_buf_printf(&progress_info, counting_objects_fmt, git_packbuilder_object_count(pack)); git_buf_printf(&progress_info, counting_objects_fmt, git_packbuilder_object_count(pack));
...@@ -615,6 +592,7 @@ static int local_download_pack( ...@@ -615,6 +592,7 @@ static int local_download_pack(
if ((error = git_packbuilder_foreach(pack, foreach_cb, &data)) != 0) if ((error = git_packbuilder_foreach(pack, foreach_cb, &data)) != 0)
goto cleanup; goto cleanup;
} }
error = writepack->commit(writepack, stats); error = writepack->commit(writepack, stats);
cleanup: cleanup:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment