Commit d5930554 by Vicent Martí

Merge remote-tracking branch 'carlosmn/indexer-stream' into new-error-handling

parents 54e4d0f2 1c9c081a
...@@ -13,6 +13,61 @@ int index_cb(const git_indexer_stats *stats, void *data) ...@@ -13,6 +13,61 @@ int index_cb(const git_indexer_stats *stats, void *data)
int index_pack(git_repository *repo, int argc, char **argv) int index_pack(git_repository *repo, int argc, char **argv)
{ {
git_indexer_stream *idx;
git_indexer_stats stats = {0, 0};
int error, fd;
char hash[GIT_OID_HEXSZ + 1] = {0};
ssize_t read_bytes;
char buf[512];
if (argc < 2) {
fprintf(stderr, "I need a packfile\n");
return EXIT_FAILURE;
}
if (git_indexer_stream_new(&idx, ".git") < 0) {
puts("bad idx");
return -1;
}
if ((fd = open(argv[1], 0)) < 0) {
perror("open");
return -1;
}
do {
read_bytes = read(fd, buf, sizeof(buf));
if (read_bytes < 0)
break;
if ((error = git_indexer_stream_add(idx, buf, read_bytes, &stats)) < 0)
goto cleanup;
printf("\rIndexing %d of %d", stats.processed, stats.total);
} while (read_bytes > 0);
if (read_bytes < 0) {
error = -1;
perror("failed reading");
goto cleanup;
}
if ((error = git_indexer_stream_finalize(idx, &stats)) < 0)
goto cleanup;
printf("\rIndexing %d of %d\n", stats.processed, stats.total);
git_oid_fmt(hash, git_indexer_stream_hash(idx));
puts(hash);
cleanup:
close(fd);
git_indexer_stream_free(idx);
return error;
}
int index_pack_old(git_repository *repo, int argc, char **argv)
{
git_indexer *indexer; git_indexer *indexer;
git_indexer_stats stats; git_indexer_stats stats;
int error; int error;
......
...@@ -23,6 +23,51 @@ typedef struct git_indexer_stats { ...@@ -23,6 +23,51 @@ typedef struct git_indexer_stats {
typedef struct git_indexer git_indexer; typedef struct git_indexer git_indexer;
typedef struct git_indexer_stream git_indexer_stream;
/**
* Create a new streaming indexer instance
*
* @param out where to store the inexer instance
* @param path to the gitdir (metadata directory)
*/
GIT_EXTERN(int) git_indexer_stream_new(git_indexer_stream **out, const char *gitdir);
/**
* Add data to the indexer
*
* @param idx the indexer
* @param data the data to add
* @param size the size of the data
* @param stats stat storage
*/
GIT_EXTERN(int) git_indexer_stream_add(git_indexer_stream *idx, void *data, size_t size, git_indexer_stats *stats);
/**
* Finalize the pack and index
*
* Resolve any pending deltas and write out the index file
*
* @param idx the indexer
*/
GIT_EXTERN(int) git_indexer_stream_finalize(git_indexer_stream *idx, git_indexer_stats *stats);
/**
* Get the packfile's hash
*
* A packfile's name is derived from the sorted hashing of all object
* names. This is only correct after the index has been finalized.
*
* @param idx the indexer instance
*/
GIT_EXTERN(const git_oid *) git_indexer_stream_hash(git_indexer_stream *idx);
/**
* Free the indexer and its resources
*
* @param idx the indexer to free
*/
GIT_EXTERN(void) git_indexer_stream_free(git_indexer_stream *idx);
/** /**
* Create a new indexer instance * Create a new indexer instance
......
...@@ -194,14 +194,19 @@ int git_filebuf_open(git_filebuf *file, const char *path, int flags) ...@@ -194,14 +194,19 @@ int git_filebuf_open(git_filebuf *file, const char *path, int flags)
memset(file, 0x0, sizeof(git_filebuf)); memset(file, 0x0, sizeof(git_filebuf));
if (flags & GIT_FILEBUF_DO_NOT_BUFFER)
file->do_not_buffer = true;
file->buf_size = WRITE_BUFFER_SIZE; file->buf_size = WRITE_BUFFER_SIZE;
file->buf_pos = 0; file->buf_pos = 0;
file->fd = -1; file->fd = -1;
file->last_error = BUFERR_OK; file->last_error = BUFERR_OK;
/* Allocate the main cache buffer */ /* Allocate the main cache buffer */
if (!file->do_not_buffer) {
file->buffer = git__malloc(file->buf_size); file->buffer = git__malloc(file->buf_size);
GITERR_CHECK_ALLOC(file->buffer); GITERR_CHECK_ALLOC(file->buffer);
}
/* If we are hashing on-write, allocate a new hash context */ /* If we are hashing on-write, allocate a new hash context */
if (flags & GIT_FILEBUF_HASH_CONTENTS) { if (flags & GIT_FILEBUF_HASH_CONTENTS) {
...@@ -345,6 +350,9 @@ int git_filebuf_write(git_filebuf *file, const void *buff, size_t len) ...@@ -345,6 +350,9 @@ int git_filebuf_write(git_filebuf *file, const void *buff, size_t len)
ENSURE_BUF_OK(file); ENSURE_BUF_OK(file);
if (file->do_not_buffer)
return file->write(file, (void *)buff, len);
for (;;) { for (;;) {
size_t space_left = file->buf_size - file->buf_pos; size_t space_left = file->buf_size - file->buf_pos;
......
...@@ -19,7 +19,8 @@ ...@@ -19,7 +19,8 @@
#define GIT_FILEBUF_APPEND (1 << 2) #define GIT_FILEBUF_APPEND (1 << 2)
#define GIT_FILEBUF_FORCE (1 << 3) #define GIT_FILEBUF_FORCE (1 << 3)
#define GIT_FILEBUF_TEMPORARY (1 << 4) #define GIT_FILEBUF_TEMPORARY (1 << 4)
#define GIT_FILEBUF_DEFLATE_SHIFT (5) #define GIT_FILEBUF_DO_NOT_BUFFER (1 << 5)
#define GIT_FILEBUF_DEFLATE_SHIFT (6)
#define GIT_FILELOCK_EXTENSION ".lock\0" #define GIT_FILELOCK_EXTENSION ".lock\0"
#define GIT_FILELOCK_EXTLENGTH 6 #define GIT_FILELOCK_EXTLENGTH 6
...@@ -41,6 +42,7 @@ struct git_filebuf { ...@@ -41,6 +42,7 @@ struct git_filebuf {
size_t buf_size, buf_pos; size_t buf_size, buf_pos;
git_file fd; git_file fd;
bool fd_is_open; bool fd_is_open;
bool do_not_buffer;
int last_error; int last_error;
}; };
......
...@@ -89,6 +89,7 @@ void git_mwindow_scan_lru( ...@@ -89,6 +89,7 @@ void git_mwindow_scan_lru(
{ {
git_mwindow *w, *w_l; git_mwindow *w, *w_l;
puts("LRU");
for (w_l = NULL, w = mwf->windows; w; w = w->next) { for (w_l = NULL, w = mwf->windows; w; w = w->next) {
if (!w->inuse_cnt) { if (!w->inuse_cnt) {
/* /*
...@@ -210,15 +211,17 @@ unsigned char *git_mwindow_open( ...@@ -210,15 +211,17 @@ unsigned char *git_mwindow_open(
git_mwindow_ctl *ctl = &GIT_GLOBAL->mem_ctl; git_mwindow_ctl *ctl = &GIT_GLOBAL->mem_ctl;
git_mwindow *w = *cursor; git_mwindow *w = *cursor;
if (!w || !git_mwindow_contains(w, offset + extra)) { if (!w || !git_mwindow_contains(w, offset + extra)) {
if (w) { if (w) {
w->inuse_cnt--; w->inuse_cnt--;
} }
for (w = mwf->windows; w; w = w->next) { for (w = mwf->windows; w; w = w->next) {
if (git_mwindow_contains(w, offset + extra)) if (git_mwindow_contains(w, offset + extra)) {
break; break;
} }
}
/* /*
* If there isn't a suitable window, we need to create a new * If there isn't a suitable window, we need to create a new
...@@ -246,6 +249,7 @@ unsigned char *git_mwindow_open( ...@@ -246,6 +249,7 @@ unsigned char *git_mwindow_open(
if (left) if (left)
*left = (unsigned int)(w->window_map.len - offset); *left = (unsigned int)(w->window_map.len - offset);
fflush(stdout);
return (unsigned char *) w->window_map.data + offset; return (unsigned char *) w->window_map.data + offset;
} }
......
...@@ -200,7 +200,8 @@ static unsigned char *pack_window_open( ...@@ -200,7 +200,8 @@ static unsigned char *pack_window_open(
return git_mwindow_open(&p->mwf, w_cursor, offset, 20, left); return git_mwindow_open(&p->mwf, w_cursor, offset, 20, left);
} }
static unsigned long packfile_unpack_header1( static int packfile_unpack_header1(
unsigned long *usedp,
size_t *sizep, size_t *sizep,
git_otype *type, git_otype *type,
const unsigned char *buf, const unsigned char *buf,
...@@ -215,8 +216,13 @@ static unsigned long packfile_unpack_header1( ...@@ -215,8 +216,13 @@ static unsigned long packfile_unpack_header1(
size = c & 15; size = c & 15;
shift = 4; shift = 4;
while (c & 0x80) { while (c & 0x80) {
if (len <= used || bitsizeof(long) <= shift) if (len <= used)
return 0; return GIT_ESHORTBUFFER;
if (bitsizeof(long) <= shift) {
*usedp = 0;
return -1;
}
c = buf[used++]; c = buf[used++];
size += (c & 0x7f) << shift; size += (c & 0x7f) << shift;
...@@ -224,7 +230,8 @@ static unsigned long packfile_unpack_header1( ...@@ -224,7 +230,8 @@ static unsigned long packfile_unpack_header1(
} }
*sizep = (size_t)size; *sizep = (size_t)size;
return used; *usedp = used;
return 0;
} }
int git_packfile_unpack_header( int git_packfile_unpack_header(
...@@ -237,6 +244,7 @@ int git_packfile_unpack_header( ...@@ -237,6 +244,7 @@ int git_packfile_unpack_header(
unsigned char *base; unsigned char *base;
unsigned int left; unsigned int left;
unsigned long used; unsigned long used;
int ret;
/* pack_window_open() assures us we have [base, base + 20) available /* pack_window_open() assures us we have [base, base + 20) available
* as a range that we can look at at. (Its actually the hash * as a range that we can look at at. (Its actually the hash
...@@ -247,10 +255,13 @@ int git_packfile_unpack_header( ...@@ -247,10 +255,13 @@ int git_packfile_unpack_header(
// base = pack_window_open(p, w_curs, *curpos, &left); // base = pack_window_open(p, w_curs, *curpos, &left);
base = git_mwindow_open(mwf, w_curs, *curpos, 20, &left); base = git_mwindow_open(mwf, w_curs, *curpos, 20, &left);
if (base == NULL) if (base == NULL)
return -1; return GIT_ESHORTBUFFER;
used = packfile_unpack_header1(size_p, type_p, base, left); ret = packfile_unpack_header1(&used, size_p, type_p, base, left);
if (used == 0) git_mwindow_close(w_curs);
if (ret == GIT_ESHORTBUFFER)
return ret;
else if (ret < 0)
return packfile_error("header length is zero"); return packfile_error("header length is zero");
*curpos += used; *curpos += used;
...@@ -271,12 +282,12 @@ static int packfile_unpack_delta( ...@@ -271,12 +282,12 @@ static int packfile_unpack_delta(
int error; int error;
base_offset = get_delta_base(p, w_curs, curpos, delta_type, obj_offset); base_offset = get_delta_base(p, w_curs, curpos, delta_type, obj_offset);
git_mwindow_close(w_curs);
if (base_offset == 0) if (base_offset == 0)
return packfile_error("delta offset is zero"); return packfile_error("delta offset is zero");
if (base_offset < 0) /* must actually be an error code */ if (base_offset < 0) /* must actually be an error code */
return (int)base_offset; return (int)base_offset;
git_mwindow_close(w_curs);
error = git_packfile_unpack(&base, p, &base_offset); error = git_packfile_unpack(&base, p, &base_offset);
/* /*
...@@ -289,6 +300,7 @@ static int packfile_unpack_delta( ...@@ -289,6 +300,7 @@ static int packfile_unpack_delta(
return error; return error;
error = packfile_unpack_compressed(&delta, p, w_curs, curpos, delta_size, delta_type); error = packfile_unpack_compressed(&delta, p, w_curs, curpos, delta_size, delta_type);
git_mwindow_close(w_curs);
if (error < 0) { if (error < 0) {
git__free(base.data); git__free(base.data);
return error; return error;
...@@ -327,6 +339,8 @@ int git_packfile_unpack( ...@@ -327,6 +339,8 @@ int git_packfile_unpack(
obj->type = GIT_OBJ_BAD; obj->type = GIT_OBJ_BAD;
error = git_packfile_unpack_header(&size, &type, &p->mwf, &w_curs, &curpos); error = git_packfile_unpack_header(&size, &type, &p->mwf, &w_curs, &curpos);
git_mwindow_close(&w_curs);
if (error < 0) if (error < 0)
return error; return error;
...@@ -352,8 +366,6 @@ int git_packfile_unpack( ...@@ -352,8 +366,6 @@ int git_packfile_unpack(
break; break;
} }
git_mwindow_close(&w_curs);
*obj_offset = curpos; *obj_offset = curpos;
return error; return error;
} }
...@@ -381,6 +393,7 @@ int packfile_unpack_compressed( ...@@ -381,6 +393,7 @@ int packfile_unpack_compressed(
if (st != Z_OK) { if (st != Z_OK) {
git__free(buffer); git__free(buffer);
giterr_set(GITERR_ZLIB, "Failed to inflate packfile"); giterr_set(GITERR_ZLIB, "Failed to inflate packfile");
return -1; return -1;
} }
...@@ -388,10 +401,17 @@ int packfile_unpack_compressed( ...@@ -388,10 +401,17 @@ int packfile_unpack_compressed(
in = pack_window_open(p, w_curs, *curpos, &stream.avail_in); in = pack_window_open(p, w_curs, *curpos, &stream.avail_in);
stream.next_in = in; stream.next_in = in;
st = inflate(&stream, Z_FINISH); st = inflate(&stream, Z_FINISH);
git_mwindow_close(w_curs);
if (!stream.avail_out) if (!stream.avail_out)
break; /* the payload is larger than it should be */ break; /* the payload is larger than it should be */
if (st == Z_BUF_ERROR && in == NULL) {
inflateEnd(&stream);
git__free(buffer);
return GIT_ESHORTBUFFER;
}
*curpos += stream.next_in - in; *curpos += stream.next_in - in;
} while (st == Z_OK || st == Z_BUF_ERROR); } while (st == Z_OK || st == Z_BUF_ERROR);
...@@ -420,10 +440,15 @@ git_off_t get_delta_base( ...@@ -420,10 +440,15 @@ git_off_t get_delta_base(
git_otype type, git_otype type,
git_off_t delta_obj_offset) git_off_t delta_obj_offset)
{ {
unsigned char *base_info = pack_window_open(p, w_curs, *curpos, NULL); unsigned int left = 0;
unsigned char *base_info;
git_off_t base_offset; git_off_t base_offset;
git_oid unused; git_oid unused;
base_info = pack_window_open(p, w_curs, *curpos, &left);
/* Assumption: the only reason this would fail is because the file is too small */
if (base_info == NULL)
return GIT_ESHORTBUFFER;
/* pack_window_open() assured us we have [base_info, base_info + 20) /* pack_window_open() assured us we have [base_info, base_info + 20)
* as a range that we can look at without walking off the * as a range that we can look at without walking off the
* end of the mapped window. Its actually the hash size * end of the mapped window. Its actually the hash size
...@@ -435,6 +460,8 @@ git_off_t get_delta_base( ...@@ -435,6 +460,8 @@ git_off_t get_delta_base(
unsigned char c = base_info[used++]; unsigned char c = base_info[used++];
base_offset = c & 127; base_offset = c & 127;
while (c & 128) { while (c & 128) {
if (left <= used)
return GIT_ESHORTBUFFER;
base_offset += 1; base_offset += 1;
if (!base_offset || MSB(base_offset, 7)) if (!base_offset || MSB(base_offset, 7))
return 0; /* overflow */ return 0; /* overflow */
......
...@@ -83,6 +83,13 @@ int git_packfile_unpack_header( ...@@ -83,6 +83,13 @@ int git_packfile_unpack_header(
git_off_t *curpos); git_off_t *curpos);
int git_packfile_unpack(git_rawobj *obj, struct git_pack_file *p, git_off_t *obj_offset); int git_packfile_unpack(git_rawobj *obj, struct git_pack_file *p, git_off_t *obj_offset);
int packfile_unpack_compressed(
git_rawobj *obj,
struct git_pack_file *p,
git_mwindow **w_curs,
git_off_t *curpos,
size_t size,
git_otype type);
git_off_t get_delta_base(struct git_pack_file *p, git_mwindow **w_curs, git_off_t get_delta_base(struct git_pack_file *p, git_mwindow **w_curs,
git_off_t *curpos, git_otype type, git_off_t *curpos, git_otype type,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment