Commit 2306ba10 by Vicent Martí

Merge pull request #803 from schu/gsoc-pack-objects

[GSoC] RFC: pack objects
parents 21e0d297 0cf49e10
...@@ -51,5 +51,6 @@ ...@@ -51,5 +51,6 @@
#include "git2/notes.h" #include "git2/notes.h"
#include "git2/reset.h" #include "git2/reset.h"
#include "git2/message.h" #include "git2/message.h"
#include "git2/pack.h"
#endif #endif
...@@ -56,6 +56,7 @@ typedef enum { ...@@ -56,6 +56,7 @@ typedef enum {
GITERR_INDEXER, GITERR_INDEXER,
GITERR_SSL, GITERR_SSL,
GITERR_SUBMODULE, GITERR_SUBMODULE,
GITERR_THREAD,
} git_error_t; } git_error_t;
/** /**
......
/*
* Copyright (C) 2009-2012 the libgit2 contributors
*
* This file is part of libgit2, distributed under the GNU GPL v2 with
* a Linking Exception. For full terms see the included COPYING file.
*/
#ifndef INCLUDE_git_pack_h__
#define INCLUDE_git_pack_h__
#include "common.h"
#include "oid.h"
/**
* @file git2/pack.h
* @brief Git pack management routines
* @defgroup git_pack Git pack management routines
* @ingroup Git
* @{
*/
GIT_BEGIN_DECL
/**
* Initialize a new packbuilder
*
* @param out The new packbuilder object
* @param repo The repository
*
* @return 0 or an error code
*/
GIT_EXTERN(int) git_packbuilder_new(git_packbuilder **out, git_repository *repo);
/**
* Set number of threads to spawn
*
* By default, libgit2 won't spawn any threads at all;
* when set to 0, libgit2 will autodetect the number of
* CPUs.
*
* @param pb The packbuilder
* @param n Number of threads to spawn
*/
GIT_EXTERN(void) git_packbuilder_set_threads(git_packbuilder *pb, unsigned int n);
/**
* Insert a single object
*
* For an optimal pack it's mandatory to insert objects in recency order,
* commits followed by trees and blobs.
*
* @param pb The packbuilder
* @param oid The oid of the commit
* @param oid The name; might be NULL
*
* @return 0 or an error code
*/
GIT_EXTERN(int) git_packbuilder_insert(git_packbuilder *pb, const git_oid *oid, const char *name);
/**
* Insert a root tree object
*
* This will add the tree as well as all referenced trees and blobs.
*
* @param pb The packbuilder
* @param oid The oid of the root tree
*
* @return 0 or an error code
*/
GIT_EXTERN(int) git_packbuilder_insert_tree(git_packbuilder *pb, const git_oid *oid);
/**
* Write the new pack and the corresponding index to path
*
* @param pb The packbuilder
* @param path Directory to store the new pack and index
*
* @return 0 or an error code
*/
GIT_EXTERN(int) git_packbuilder_write(git_packbuilder *pb, const char *file);
/**
* Free the packbuilder and all associated data
*
* @param pb The packbuilder
*/
GIT_EXTERN(void) git_packbuilder_free(git_packbuilder *pb);
/** @} */
GIT_END_DECL
#endif
...@@ -277,6 +277,21 @@ GIT_EXTERN(int) git_tag_list_match( ...@@ -277,6 +277,21 @@ GIT_EXTERN(int) git_tag_list_match(
const char *pattern, const char *pattern,
git_repository *repo); git_repository *repo);
typedef int (*git_tag_foreach_cb)(const char *name, git_oid *oid, void *data);
/**
* Call callback `cb' for each tag in the repository
*
* @param repo Repository
* @param cb Callback function
* @param cb_data Pointer to callback data (optional)
*/
GIT_EXTERN(int) git_tag_foreach(
git_repository *repo,
git_tag_foreach_cb cb,
void *cb_data);
/** /**
* Recursively peel a tag until a non tag git_object * Recursively peel a tag until a non tag git_object
* is met * is met
......
...@@ -137,6 +137,9 @@ typedef struct git_reflog git_reflog; ...@@ -137,6 +137,9 @@ typedef struct git_reflog git_reflog;
/** Representation of a git note */ /** Representation of a git note */
typedef struct git_note git_note; typedef struct git_note git_note;
/** Representation of a git packbuilder */
typedef struct git_packbuilder git_packbuilder;
/** Time in a signature */ /** Time in a signature */
typedef struct git_time { typedef struct git_time {
git_time_t time; /** time in seconds from epoch */ git_time_t time; /** time in seconds from epoch */
......
/*
* Copyright (C) 2009-2012 the libgit2 contributors
*
* This file is part of libgit2, distributed under the GNU GPL v2 with
* a Linking Exception. For full terms see the included COPYING file.
*/
#include "compress.h"
#include <zlib.h>
#define BUFFER_SIZE (1024 * 1024)
int git__compress(git_buf *buf, const void *buff, size_t len)
{
z_stream zs;
char *zb;
size_t have;
memset(&zs, 0, sizeof(zs));
if (deflateInit(&zs, Z_DEFAULT_COMPRESSION) != Z_OK)
return -1;
zb = git__malloc(BUFFER_SIZE);
GITERR_CHECK_ALLOC(zb);
zs.next_in = (void *)buff;
zs.avail_in = (uInt)len;
do {
zs.next_out = (unsigned char *)zb;
zs.avail_out = BUFFER_SIZE;
if (deflate(&zs, Z_FINISH) == Z_STREAM_ERROR) {
git__free(zb);
return -1;
}
have = BUFFER_SIZE - (size_t)zs.avail_out;
if (git_buf_put(buf, zb, have) < 0) {
git__free(zb);
return -1;
}
} while (zs.avail_out == 0);
assert(zs.avail_in == 0);
deflateEnd(&zs);
git__free(zb);
return 0;
}
/*
* Copyright (C) 2009-2012 the libgit2 contributors
*
* This file is part of libgit2, distributed under the GNU GPL v2 with
* a Linking Exception. For full terms see the included COPYING file.
*/
#ifndef INCLUDE_compress_h__
#define INCLUDE_compress_h__
#include "common.h"
#include "buffer.h"
int git__compress(git_buf *buf, const void *buff, size_t len);
#endif /* INCLUDE_compress_h__ */
This diff is collapsed. Click to expand it.
/*
* diff-delta code taken from git.git. See diff-delta.c for details.
*
*/
#ifndef INCLUDE_git_delta_h__
#define INCLUDE_git_delta_h__
#include "common.h"
/* opaque object for delta index */
struct git_delta_index;
/*
* create_delta_index: compute index data from given buffer
*
* This returns a pointer to a struct delta_index that should be passed to
* subsequent create_delta() calls, or to free_delta_index(). A NULL pointer
* is returned on failure. The given buffer must not be freed nor altered
* before free_delta_index() is called. The returned pointer must be freed
* using free_delta_index().
*/
extern struct git_delta_index *
git_delta_create_index(const void *buf, unsigned long bufsize);
/*
* free_delta_index: free the index created by create_delta_index()
*
* Given pointer must be what create_delta_index() returned, or NULL.
*/
extern void git_delta_free_index(struct git_delta_index *index);
/*
* sizeof_delta_index: returns memory usage of delta index
*
* Given pointer must be what create_delta_index() returned, or NULL.
*/
extern unsigned long git_delta_sizeof_index(struct git_delta_index *index);
/*
* create_delta: create a delta from given index for the given buffer
*
* This function may be called multiple times with different buffers using
* the same delta_index pointer. If max_delta_size is non-zero and the
* resulting delta is to be larger than max_delta_size then NULL is returned.
* On success, a non-NULL pointer to the buffer with the delta data is
* returned and *delta_size is updated with its size. The returned buffer
* must be freed by the caller.
*/
extern void *
git_delta_create(const struct git_delta_index *index,
const void *buf, unsigned long bufsize,
unsigned long *delta_size,
unsigned long max_delta_size);
/*
* diff_delta: create a delta from source buffer to target buffer
*
* If max_delta_size is non-zero and the resulting delta is to be larger
* than max_delta_size then NULL is returned. On success, a non-NULL
* pointer to the buffer with the delta data is returned and *delta_size is
* updated with its size. The returned buffer must be freed by the caller.
*/
GIT_INLINE(void *)
git_delta(const void *src_buf, unsigned long src_bufsize,
const void *trg_buf, unsigned long trg_bufsize,
unsigned long *delta_size, unsigned long max_delta_size)
{
struct git_delta_index *index = git_delta_create_index(src_buf, src_bufsize);
if (index) {
void *delta = git_delta_create(index, trg_buf, trg_bufsize,
delta_size, max_delta_size);
git_delta_free_index(index);
return delta;
}
return NULL;
}
/*
* patch_delta: recreate target buffer given source buffer and delta data
*
* On success, a non-NULL pointer to the target buffer is returned and
* *trg_bufsize is updated with its size. On failure a NULL pointer is
* returned. The returned buffer must be freed by the caller.
*/
extern void *git_delta_patch(const void *src_buf, unsigned long src_size,
const void *delta_buf, unsigned long delta_size,
unsigned long *dst_size);
/* the smallest possible delta size is 4 bytes */
#define GIT_DELTA_SIZE_MIN 4
/*
* This must be called twice on the delta data buffer, first to get the
* expected source buffer size, and again to get the target buffer size.
*/
GIT_INLINE(unsigned long)
git_delta_get_hdr_size(const unsigned char **datap,
const unsigned char *top)
{
const unsigned char *data = *datap;
unsigned long cmd, size = 0;
int i = 0;
do {
cmd = *data++;
size |= (cmd & 0x7f) << i;
i += 7;
} while (cmd & 0x80 && data < top);
*datap = data;
return size;
}
#endif
...@@ -599,11 +599,6 @@ int git_indexer_new(git_indexer **out, const char *packname) ...@@ -599,11 +599,6 @@ int git_indexer_new(git_indexer **out, const char *packname)
assert(out && packname); assert(out && packname);
if (git_path_root(packname) < 0) {
giterr_set(GITERR_INDEXER, "Path is not absolute");
return -1;
}
idx = git__calloc(1, sizeof(git_indexer)); idx = git__calloc(1, sizeof(git_indexer));
GITERR_CHECK_ALLOC(idx); GITERR_CHECK_ALLOC(idx);
......
...@@ -107,6 +107,9 @@ git_otype git_odb_object_type(git_odb_object *object) ...@@ -107,6 +107,9 @@ git_otype git_odb_object_type(git_odb_object *object)
void git_odb_object_free(git_odb_object *object) void git_odb_object_free(git_odb_object *object)
{ {
if (object == NULL)
return;
git_cached_obj_decref((git_cached_obj *)object, &free_odb_object); git_cached_obj_decref((git_cached_obj *)object, &free_odb_object);
} }
......
/*
* Copyright (C) 2009-2012 the libgit2 contributors
*
* This file is part of libgit2, distributed under the GNU GPL v2 with
* a Linking Exception. For full terms see the included COPYING file.
*/
#ifndef INCLUDE_pack_objects_h__
#define INCLUDE_pack_objects_h__
#include "common.h"
#include "buffer.h"
#include "hash.h"
#include "oidmap.h"
#include "git2/oid.h"
#define GIT_PACK_WINDOW 10 /* number of objects to possibly delta against */
#define GIT_PACK_DEPTH 50 /* max delta depth */
#define GIT_PACK_DELTA_CACHE_SIZE (256 * 1024 * 1024)
#define GIT_PACK_DELTA_CACHE_LIMIT 1000
#define GIT_PACK_BIG_FILE_THRESHOLD (512 * 1024 * 1024)
typedef struct git_pobject {
git_oid id;
git_otype type;
git_off_t offset;
size_t size;
unsigned int hash; /* name hint hash */
struct git_pobject *delta; /* delta base object */
struct git_pobject *delta_child; /* deltified objects who bases me */
struct git_pobject *delta_sibling; /* other deltified objects
* who uses the same base as
* me */
void *delta_data;
unsigned long delta_size;
unsigned long z_delta_size;
int written:1,
recursing:1,
no_try_delta:1,
tagged:1,
filled:1;
} git_pobject;
struct git_packbuilder {
git_repository *repo; /* associated repository */
git_odb *odb; /* associated object database */
git_hash_ctx *ctx;
uint32_t nr_objects,
nr_alloc,
nr_written,
nr_remaining;
git_pobject *object_list;
git_oidmap *object_ix;
git_oid pack_oid; /* hash of written pack */
/* configs */
unsigned long delta_cache_size;
unsigned long max_delta_cache_size;
unsigned long cache_max_small_delta_size;
unsigned long big_file_threshold;
unsigned long window_memory_limit;
int nr_threads; /* nr of threads to use */
bool done;
};
int git_packbuilder_send(git_packbuilder *pb, git_transport *t);
int git_packbuilder_write_buf(git_buf *buf, git_packbuilder *pb);
#endif
...@@ -393,20 +393,51 @@ int git_tag__parse(git_tag *tag, git_odb_object *obj) ...@@ -393,20 +393,51 @@ int git_tag__parse(git_tag *tag, git_odb_object *obj)
} }
typedef struct { typedef struct {
git_repository *repo;
git_tag_foreach_cb cb;
void *cb_data;
} tag_cb_data;
static int tags_cb(const char *ref, void *data)
{
git_oid oid;
tag_cb_data *d = (tag_cb_data *)data;
if (git__prefixcmp(ref, GIT_REFS_TAGS_DIR) != 0)
return 0; /* no tag */
if (git_reference_name_to_oid(&oid, d->repo, ref) < 0)
return -1;
return d->cb(ref, &oid, d->cb_data);
}
int git_tag_foreach(git_repository *repo, git_tag_foreach_cb cb, void *cb_data)
{
tag_cb_data data;
assert(repo && cb);
data.cb = cb;
data.cb_data = cb_data;
data.repo = repo;
return git_reference_foreach(repo, GIT_REF_OID | GIT_REF_PACKED,
&tags_cb, &data);
}
typedef struct {
git_vector *taglist; git_vector *taglist;
const char *pattern; const char *pattern;
} tag_filter_data; } tag_filter_data;
#define GIT_REFS_TAGS_DIR_LEN strlen(GIT_REFS_TAGS_DIR) #define GIT_REFS_TAGS_DIR_LEN strlen(GIT_REFS_TAGS_DIR)
static int tag_list_cb(const char *tag_name, void *payload) static int tag_list_cb(const char *tag_name, git_oid *oid, void *data)
{ {
tag_filter_data *filter; tag_filter_data *filter = (tag_filter_data *)data;
GIT_UNUSED(oid);
if (git__prefixcmp(tag_name, GIT_REFS_TAGS_DIR) != 0)
return 0;
filter = (tag_filter_data *)payload;
if (!*filter->pattern || p_fnmatch(filter->pattern, tag_name + GIT_REFS_TAGS_DIR_LEN, 0) == 0) if (!*filter->pattern || p_fnmatch(filter->pattern, tag_name + GIT_REFS_TAGS_DIR_LEN, 0) == 0)
return git_vector_insert(filter->taglist, git__strdup(tag_name + GIT_REFS_TAGS_DIR_LEN)); return git_vector_insert(filter->taglist, git__strdup(tag_name + GIT_REFS_TAGS_DIR_LEN));
...@@ -427,7 +458,7 @@ int git_tag_list_match(git_strarray *tag_names, const char *pattern, git_reposit ...@@ -427,7 +458,7 @@ int git_tag_list_match(git_strarray *tag_names, const char *pattern, git_reposit
filter.taglist = &taglist; filter.taglist = &taglist;
filter.pattern = pattern; filter.pattern = pattern;
error = git_reference_foreach(repo, GIT_REF_OID|GIT_REF_PACKED, &tag_list_cb, (void *)&filter); error = git_tag_foreach(repo, &tag_list_cb, (void *)&filter);
if (error < 0) { if (error < 0) {
git_vector_free(&taglist); git_vector_free(&taglist);
return -1; return -1;
......
...@@ -38,13 +38,13 @@ GIT_INLINE(void) git_atomic_set(git_atomic *a, int val) ...@@ -38,13 +38,13 @@ GIT_INLINE(void) git_atomic_set(git_atomic *a, int val)
#define git_mutex_unlock(a) pthread_mutex_unlock(a) #define git_mutex_unlock(a) pthread_mutex_unlock(a)
#define git_mutex_free(a) pthread_mutex_destroy(a) #define git_mutex_free(a) pthread_mutex_destroy(a)
/* Pthreads condition vars -- disabled by now */ /* Pthreads condition vars */
#define git_cond unsigned int //pthread_cond_t #define git_cond pthread_cond_t
#define git_cond_init(c, a) (void)0 //pthread_cond_init(c, a) #define git_cond_init(c) pthread_cond_init(c, NULL)
#define git_cond_free(c) (void)0 //pthread_cond_destroy(c) #define git_cond_free(c) pthread_cond_destroy(c)
#define git_cond_wait(c, l) (void)0 //pthread_cond_wait(c, l) #define git_cond_wait(c, l) pthread_cond_wait(c, l)
#define git_cond_signal(c) (void)0 //pthread_cond_signal(c) #define git_cond_signal(c) pthread_cond_signal(c)
#define git_cond_broadcast(c) (void)0 //pthread_cond_broadcast(c) #define git_cond_broadcast(c) pthread_cond_broadcast(c)
GIT_INLINE(int) git_atomic_inc(git_atomic *a) GIT_INLINE(int) git_atomic_inc(git_atomic *a)
{ {
......
#include "clar_libgit2.h"
#include "iterator.h"
#include "vector.h"
static git_repository *_repo;
static git_revwalk *_revwalker;
static git_packbuilder *_packbuilder;
static git_indexer *_indexer;
static git_vector _commits;
void test_pack_packbuilder__initialize(void)
{
cl_git_pass(git_repository_open(&_repo, cl_fixture("testrepo.git")));
cl_git_pass(git_revwalk_new(&_revwalker, _repo));
cl_git_pass(git_packbuilder_new(&_packbuilder, _repo));
cl_git_pass(git_vector_init(&_commits, 0, NULL));
}
void test_pack_packbuilder__cleanup(void)
{
git_oid *o;
unsigned int i;
git_vector_foreach(&_commits, i, o) {
git__free(o);
}
git_vector_free(&_commits);
git_packbuilder_free(_packbuilder);
git_revwalk_free(_revwalker);
git_indexer_free(_indexer);
git_repository_free(_repo);
}
void test_pack_packbuilder__create_pack(void)
{
git_indexer_stats stats;
git_oid oid, *o;
unsigned int i;
git_revwalk_sorting(_revwalker, GIT_SORT_TIME);
cl_git_pass(git_revwalk_push_ref(_revwalker, "HEAD"));
while (git_revwalk_next(&oid, _revwalker) == 0) {
o = git__malloc(GIT_OID_RAWSZ);
cl_assert(o != NULL);
git_oid_cpy(o, &oid);
cl_git_pass(git_vector_insert(&_commits, o));
}
git_vector_foreach(&_commits, i, o) {
cl_git_pass(git_packbuilder_insert(_packbuilder, o, NULL));
}
git_vector_foreach(&_commits, i, o) {
git_object *obj;
cl_git_pass(git_object_lookup(&obj, _repo, o, GIT_OBJ_COMMIT));
cl_git_pass(git_packbuilder_insert_tree(_packbuilder,
git_commit_tree_oid((git_commit *)obj)));
git_object_free(obj);
}
cl_git_pass(git_packbuilder_write(_packbuilder, "testpack.pack"));
cl_git_pass(git_indexer_new(&_indexer, "testpack.pack"));
cl_git_pass(git_indexer_run(_indexer, &stats));
cl_git_pass(git_indexer_write(_indexer));
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment