Unverified Commit 0fd0bfe4 by Edward Thomson Committed by GitHub

Merge pull request #4450 from libgit2/ethomson/odb_loose_readstream

Streaming read support for the loose ODB backend
parents d749822c 09df354e
...@@ -357,11 +357,18 @@ GIT_EXTERN(void) git_odb_stream_free(git_odb_stream *stream); ...@@ -357,11 +357,18 @@ GIT_EXTERN(void) git_odb_stream_free(git_odb_stream *stream);
* @see git_odb_stream * @see git_odb_stream
* *
* @param out pointer where to store the stream * @param out pointer where to store the stream
* @param len pointer where to store the length of the object
* @param type pointer where to store the type of the object
* @param db object database where the stream will read from * @param db object database where the stream will read from
* @param oid oid of the object the stream will read from * @param oid oid of the object the stream will read from
* @return 0 if the stream was created; error code otherwise * @return 0 if the stream was created; error code otherwise
*/ */
GIT_EXTERN(int) git_odb_open_rstream(git_odb_stream **out, git_odb *db, const git_oid *oid); GIT_EXTERN(int) git_odb_open_rstream(
git_odb_stream **out,
size_t *len,
git_otype *type,
git_odb *db,
const git_oid *oid);
/** /**
* Open a stream for writing a pack file to the ODB. * Open a stream for writing a pack file to the ODB.
......
...@@ -56,7 +56,8 @@ struct git_odb_backend { ...@@ -56,7 +56,8 @@ struct git_odb_backend {
git_odb_stream **, git_odb_backend *, git_off_t, git_otype); git_odb_stream **, git_odb_backend *, git_off_t, git_otype);
int (* readstream)( int (* readstream)(
git_odb_stream **, git_odb_backend *, const git_oid *); git_odb_stream **, size_t *, git_otype *,
git_odb_backend *, const git_oid *);
int (* exists)( int (* exists)(
git_odb_backend *, const git_oid *); git_odb_backend *, const git_oid *);
......
...@@ -1396,7 +1396,12 @@ void git_odb_stream_free(git_odb_stream *stream) ...@@ -1396,7 +1396,12 @@ void git_odb_stream_free(git_odb_stream *stream)
stream->free(stream); stream->free(stream);
} }
int git_odb_open_rstream(git_odb_stream **stream, git_odb *db, const git_oid *oid) int git_odb_open_rstream(
git_odb_stream **stream,
size_t *len,
git_otype *type,
git_odb *db,
const git_oid *oid)
{ {
size_t i, reads = 0; size_t i, reads = 0;
int error = GIT_ERROR; int error = GIT_ERROR;
...@@ -1409,7 +1414,7 @@ int git_odb_open_rstream(git_odb_stream **stream, git_odb *db, const git_oid *oi ...@@ -1409,7 +1414,7 @@ int git_odb_open_rstream(git_odb_stream **stream, git_odb *db, const git_oid *oi
if (b->readstream != NULL) { if (b->readstream != NULL) {
++reads; ++reads;
error = b->readstream(stream, b, oid); error = b->readstream(stream, len, type, b, oid);
} }
} }
......
...@@ -87,9 +87,52 @@ size_t git_zstream_suggest_output_len(git_zstream *zstream) ...@@ -87,9 +87,52 @@ size_t git_zstream_suggest_output_len(git_zstream *zstream)
return ZSTREAM_BUFFER_MIN_EXTRA; return ZSTREAM_BUFFER_MIN_EXTRA;
} }
int git_zstream_get_output_chunk(
void *out, size_t *out_len, git_zstream *zstream)
{
size_t in_queued, in_used, out_queued;
/* set up input data */
zstream->z.next_in = (Bytef *)zstream->in;
/* feed as much data to zlib as it can consume, at most UINT_MAX */
if (zstream->in_len > UINT_MAX) {
zstream->z.avail_in = UINT_MAX;
zstream->flush = Z_NO_FLUSH;
} else {
zstream->z.avail_in = (uInt)zstream->in_len;
zstream->flush = Z_FINISH;
}
in_queued = (size_t)zstream->z.avail_in;
/* set up output data */
zstream->z.next_out = out;
zstream->z.avail_out = (uInt)*out_len;
if ((size_t)zstream->z.avail_out != *out_len)
zstream->z.avail_out = UINT_MAX;
out_queued = (size_t)zstream->z.avail_out;
/* compress next chunk */
if (zstream->type == GIT_ZSTREAM_INFLATE)
zstream->zerr = inflate(&zstream->z, zstream->flush);
else
zstream->zerr = deflate(&zstream->z, zstream->flush);
if (zstream_seterr(zstream))
return -1;
in_used = (in_queued - zstream->z.avail_in);
zstream->in_len -= in_used;
zstream->in += in_used;
*out_len = (out_queued - zstream->z.avail_out);
return 0;
}
int git_zstream_get_output(void *out, size_t *out_len, git_zstream *zstream) int git_zstream_get_output(void *out, size_t *out_len, git_zstream *zstream)
{ {
int zflush = Z_FINISH;
size_t out_remain = *out_len; size_t out_remain = *out_len;
if (zstream->in_len && zstream->zerr == Z_STREAM_END) { if (zstream->in_len && zstream->zerr == Z_STREAM_END) {
...@@ -98,47 +141,17 @@ int git_zstream_get_output(void *out, size_t *out_len, git_zstream *zstream) ...@@ -98,47 +141,17 @@ int git_zstream_get_output(void *out, size_t *out_len, git_zstream *zstream)
} }
while (out_remain > 0 && zstream->zerr != Z_STREAM_END) { while (out_remain > 0 && zstream->zerr != Z_STREAM_END) {
size_t out_queued, in_queued, out_used, in_used; size_t out_written = out_remain;
/* set up in data */ if (git_zstream_get_output_chunk(out, &out_written, zstream) < 0)
zstream->z.next_in = (Bytef *)zstream->in;
zstream->z.avail_in = (uInt)zstream->in_len;
if ((size_t)zstream->z.avail_in != zstream->in_len) {
zstream->z.avail_in = UINT_MAX;
zflush = Z_NO_FLUSH;
} else {
zflush = Z_FINISH;
}
in_queued = (size_t)zstream->z.avail_in;
/* set up out data */
zstream->z.next_out = out;
zstream->z.avail_out = (uInt)out_remain;
if ((size_t)zstream->z.avail_out != out_remain)
zstream->z.avail_out = UINT_MAX;
out_queued = (size_t)zstream->z.avail_out;
/* compress next chunk */
if (zstream->type == GIT_ZSTREAM_INFLATE)
zstream->zerr = inflate(&zstream->z, zflush);
else
zstream->zerr = deflate(&zstream->z, zflush);
if (zstream_seterr(zstream))
return -1; return -1;
out_used = (out_queued - zstream->z.avail_out); out_remain -= out_written;
out_remain -= out_used; out = ((char *)out) + out_written;
out = ((char *)out) + out_used;
in_used = (in_queued - zstream->z.avail_in);
zstream->in_len -= in_used;
zstream->in += in_used;
} }
/* either we finished the input or we did not flush the data */ /* either we finished the input or we did not flush the data */
assert(zstream->in_len > 0 || zflush == Z_FINISH); assert(zstream->in_len > 0 || zstream->flush == Z_FINISH);
/* set out_size to number of bytes actually written to output */ /* set out_size to number of bytes actually written to output */
*out_len = *out_len - out_remain; *out_len = *out_len - out_remain;
......
...@@ -23,6 +23,7 @@ typedef struct { ...@@ -23,6 +23,7 @@ typedef struct {
git_zstream_t type; git_zstream_t type;
const char *in; const char *in;
size_t in_len; size_t in_len;
int flush;
int zerr; int zerr;
} git_zstream; } git_zstream;
...@@ -35,6 +36,11 @@ int git_zstream_set_input(git_zstream *zstream, const void *in, size_t in_len); ...@@ -35,6 +36,11 @@ int git_zstream_set_input(git_zstream *zstream, const void *in, size_t in_len);
size_t git_zstream_suggest_output_len(git_zstream *zstream); size_t git_zstream_suggest_output_len(git_zstream *zstream);
/* get as much output as is available in the input buffer */
int git_zstream_get_output_chunk(
void *out, size_t *out_len, git_zstream *zstream);
/* get all the output from the entire input buffer */
int git_zstream_get_output(void *out, size_t *out_len, git_zstream *zstream); int git_zstream_get_output(void *out, size_t *out_len, git_zstream *zstream);
bool git_zstream_done(git_zstream *zstream); bool git_zstream_done(git_zstream *zstream);
......
#include "clar_libgit2.h" #include "clar_libgit2.h"
#include "git2/odb_backend.h" #include "git2/odb_backend.h"
#include "hash.h"
#include "odb.h"
#define LARGEFILE_SIZE 5368709122
static git_repository *repo; static git_repository *repo;
static git_odb *odb; static git_odb *odb;
...@@ -25,7 +29,7 @@ static void writefile(git_oid *oid) ...@@ -25,7 +29,7 @@ static void writefile(git_oid *oid)
for (i = 0; i < 3041; i++) for (i = 0; i < 3041; i++)
cl_git_pass(git_buf_puts(&buf, "Hello, world.\n")); cl_git_pass(git_buf_puts(&buf, "Hello, world.\n"));
cl_git_pass(git_odb_open_wstream(&stream, odb, 5368709122, GIT_OBJ_BLOB)); cl_git_pass(git_odb_open_wstream(&stream, odb, LARGEFILE_SIZE, GIT_OBJ_BLOB));
for (i = 0; i < 126103; i++) for (i = 0; i < 126103; i++)
cl_git_pass(git_odb_stream_write(stream, buf.ptr, buf.size)); cl_git_pass(git_odb_stream_write(stream, buf.ptr, buf.size));
...@@ -63,6 +67,10 @@ void test_odb_largefiles__streamwrite(void) ...@@ -63,6 +67,10 @@ void test_odb_largefiles__streamwrite(void)
{ {
git_oid expected, oid; git_oid expected, oid;
#ifndef GIT_ARCH_64
cl_skip();
#endif
if (!cl_is_env_set("GITTEST_INVASIVE_FS_SIZE") || if (!cl_is_env_set("GITTEST_INVASIVE_FS_SIZE") ||
!cl_is_env_set("GITTEST_SLOW")) !cl_is_env_set("GITTEST_SLOW"))
cl_skip(); cl_skip();
...@@ -73,6 +81,52 @@ void test_odb_largefiles__streamwrite(void) ...@@ -73,6 +81,52 @@ void test_odb_largefiles__streamwrite(void)
cl_assert_equal_oid(&expected, &oid); cl_assert_equal_oid(&expected, &oid);
} }
void test_odb_largefiles__streamread(void)
{
git_oid oid, read_oid;
git_odb_stream *stream;
char buf[10240];
char hdr[64];
size_t len, total = 0;
git_hash_ctx hash;
git_otype type;
int hdr_len, ret;
#ifndef GIT_ARCH_64
cl_skip();
#endif
if (!cl_is_env_set("GITTEST_INVASIVE_FS_SIZE") ||
!cl_is_env_set("GITTEST_SLOW"))
cl_skip();
writefile(&oid);
cl_git_pass(git_odb_open_rstream(&stream, &len, &type, odb, &oid));
cl_assert_equal_sz(LARGEFILE_SIZE, len);
cl_assert_equal_i(GIT_OBJ_BLOB, type);
cl_git_pass(git_hash_ctx_init(&hash));
hdr_len = git_odb__format_object_header(hdr, sizeof(hdr), len, type);
cl_git_pass(git_hash_update(&hash, hdr, hdr_len));
while ((ret = git_odb_stream_read(stream, buf, 10240)) > 0) {
cl_git_pass(git_hash_update(&hash, buf, ret));
total += ret;
}
cl_assert_equal_sz(LARGEFILE_SIZE, total);
git_hash_final(&read_oid, &hash);
cl_assert_equal_oid(&oid, &read_oid);
git_hash_ctx_cleanup(&hash);
git_odb_stream_free(stream);
}
void test_odb_largefiles__read_into_memory(void) void test_odb_largefiles__read_into_memory(void)
{ {
git_oid oid; git_oid oid;
...@@ -112,3 +166,24 @@ void test_odb_largefiles__read_into_memory_rejected_on_32bit(void) ...@@ -112,3 +166,24 @@ void test_odb_largefiles__read_into_memory_rejected_on_32bit(void)
git_odb_object_free(obj); git_odb_object_free(obj);
} }
void test_odb_largefiles__read_header(void)
{
git_oid oid;
size_t len;
git_otype type;
#ifndef GIT_ARCH_64
cl_skip();
#endif
if (!cl_is_env_set("GITTEST_INVASIVE_FS_SIZE") ||
!cl_is_env_set("GITTEST_SLOW"))
cl_skip();
writefile(&oid);
cl_git_pass(git_odb_read_header(&len, &type, odb, &oid));
cl_assert_equal_sz(LARGEFILE_SIZE, len);
cl_assert_equal_i(GIT_OBJ_BLOB, type);
}
...@@ -55,6 +55,63 @@ static void test_read_object(object_data *data) ...@@ -55,6 +55,63 @@ static void test_read_object(object_data *data)
git_odb_free(odb); git_odb_free(odb);
} }
static void test_read_header(object_data *data)
{
git_oid id;
git_odb *odb;
size_t len;
git_otype type;
write_object_files(data);
cl_git_pass(git_odb_open(&odb, "test-objects"));
cl_git_pass(git_oid_fromstr(&id, data->id));
cl_git_pass(git_odb_read_header(&len, &type, odb, &id));
cl_assert_equal_sz(data->dlen, len);
cl_assert_equal_i(git_object_string2type(data->type), type);
git_odb_free(odb);
}
static void test_readstream_object(object_data *data, size_t blocksize)
{
git_oid id;
git_odb *odb;
git_odb_stream *stream;
git_rawobj tmp;
char buf[2048], *ptr = buf;
size_t remain;
int ret;
write_object_files(data);
cl_git_pass(git_odb_open(&odb, "test-objects"));
cl_git_pass(git_oid_fromstr(&id, data->id));
cl_git_pass(git_odb_open_rstream(&stream, &tmp.len, &tmp.type, odb, &id));
remain = tmp.len;
while (remain) {
cl_assert((ret = git_odb_stream_read(stream, ptr, blocksize)) >= 0);
if (ret == 0)
break;
cl_assert(remain >= (size_t)ret);
remain -= ret;
ptr += ret;
}
cl_assert(remain == 0);
tmp.data = buf;
cmp_objects(&tmp, data);
git_odb_stream_free(stream);
git_odb_free(odb);
}
void test_odb_loose__initialize(void) void test_odb_loose__initialize(void)
{ {
p_fsync__cnt = 0; p_fsync__cnt = 0;
...@@ -103,6 +160,33 @@ void test_odb_loose__simple_reads(void) ...@@ -103,6 +160,33 @@ void test_odb_loose__simple_reads(void)
test_read_object(&some); test_read_object(&some);
} }
void test_odb_loose__streaming_reads(void)
{
size_t blocksizes[] = { 1, 2, 4, 16, 99, 1024, 123456789 };
size_t i;
for (i = 0; i < ARRAY_SIZE(blocksizes); i++) {
test_readstream_object(&commit, blocksizes[i]);
test_readstream_object(&tree, blocksizes[i]);
test_readstream_object(&tag, blocksizes[i]);
test_readstream_object(&zero, blocksizes[i]);
test_readstream_object(&one, blocksizes[i]);
test_readstream_object(&two, blocksizes[i]);
test_readstream_object(&some, blocksizes[i]);
}
}
void test_odb_loose__read_header(void)
{
test_read_header(&commit);
test_read_header(&tree);
test_read_header(&tag);
test_read_header(&zero);
test_read_header(&one);
test_read_header(&two);
test_read_header(&some);
}
void test_write_object_permission( void test_write_object_permission(
mode_t dir_mode, mode_t file_mode, mode_t dir_mode, mode_t file_mode,
mode_t expected_dir_mode, mode_t expected_file_mode) mode_t expected_dir_mode, mode_t expected_file_mode)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment