Unverified Commit e14bf97e by Edward Thomson Committed by GitHub

Merge pull request #4443 from libgit2/ethomson/large_loose_blobs

Inflate large loose blobs
parents 083b1a2e 456e5218
......@@ -16,6 +16,8 @@ struct git_hash_ctx {
CC_SHA1_CTX c;
};
#define CC_LONG_MAX ((CC_LONG)-1)
#define git_hash_global_init() 0
#define git_hash_ctx_init(ctx) git_hash_init(ctx)
#define git_hash_ctx_cleanup(ctx)
......@@ -27,10 +29,21 @@ GIT_INLINE(int) git_hash_init(git_hash_ctx *ctx)
return 0;
}
GIT_INLINE(int) git_hash_update(git_hash_ctx *ctx, const void *data, size_t len)
GIT_INLINE(int) git_hash_update(git_hash_ctx *ctx, const void *_data, size_t len)
{
const unsigned char *data = _data;
assert(ctx);
CC_SHA1_Update(&ctx->c, data, len);
while (len > 0) {
CC_LONG chunk = (len > CC_LONG_MAX) ? CC_LONG_MAX : (CC_LONG)len;
CC_SHA1_Update(&ctx->c, data, chunk);
data += chunk;
len -= chunk;
}
return 0;
}
......
......@@ -136,12 +136,21 @@ GIT_INLINE(int) hash_cryptoapi_init(git_hash_ctx *ctx)
return 0;
}
GIT_INLINE(int) hash_cryptoapi_update(git_hash_ctx *ctx, const void *data, size_t len)
GIT_INLINE(int) hash_cryptoapi_update(git_hash_ctx *ctx, const void *_data, size_t len)
{
const BYTE *data = (BYTE *)_data;
assert(ctx->ctx.cryptoapi.valid);
if (!CryptHashData(ctx->ctx.cryptoapi.hash_handle, (const BYTE *)data, (DWORD)len, 0))
return -1;
while (len > 0) {
DWORD chunk = (len > MAXDWORD) ? MAXDWORD : (DWORD)len;
if (!CryptHashData(ctx->ctx.cryptoapi.hash_handle, data, chunk, 0))
return -1;
data += chunk;
len -= chunk;
}
return 0;
}
......@@ -202,10 +211,19 @@ GIT_INLINE(int) hash_cng_init(git_hash_ctx *ctx)
return 0;
}
GIT_INLINE(int) hash_cng_update(git_hash_ctx *ctx, const void *data, size_t len)
GIT_INLINE(int) hash_cng_update(git_hash_ctx *ctx, const void *_data, size_t len)
{
if (ctx->prov->prov.cng.hash_data(ctx->ctx.cng.hash_handle, (PBYTE)data, (ULONG)len, 0) < 0)
return -1;
PBYTE data = (PBYTE)_data;
while (len > 0) {
ULONG chunk = (len > ULONG_MAX) ? ULONG_MAX : (ULONG)len;
if (ctx->prov->prov.cng.hash_data(ctx->ctx.cng.hash_handle, data, chunk, 0) < 0)
return -1;
data += chunk;
len -= chunk;
}
return 0;
}
......
......@@ -236,13 +236,22 @@ const char *git_object_type2string(git_otype type)
git_otype git_object_string2type(const char *str)
{
if (!str)
return GIT_OBJ_BAD;
return git_object_stringn2type(str, strlen(str));
}
git_otype git_object_stringn2type(const char *str, size_t len)
{
size_t i;
if (!str || !*str)
if (!str || !len || !*str)
return GIT_OBJ_BAD;
for (i = 0; i < ARRAY_SIZE(git_objects_table); i++)
if (!strcmp(str, git_objects_table[i].str))
if (*git_objects_table[i].str &&
!git__prefixncmp(str, len, git_objects_table[i].str))
return (git_otype)i;
return GIT_OBJ_BAD;
......
......@@ -30,6 +30,8 @@ int git_object__from_odb_object(
int git_object__resolve_to_type(git_object **obj, git_otype type);
git_otype git_object_stringn2type(const char *str, size_t len);
int git_oid__parse(git_oid *oid, const char **buffer_out, const char *buffer_end, const char *header);
void git_oid__writebuf(git_buf *buf, const char *header, const git_oid *oid);
......
......@@ -16,6 +16,7 @@
#include "delta.h"
#include "filebuf.h"
#include "object.h"
#include "zstream.h"
#include "git2/odb_backend.h"
#include "git2/types.h"
......@@ -119,53 +120,58 @@ static size_t get_binary_object_header(obj_hdr *hdr, git_buf *obj)
return used;
}
static size_t get_object_header(obj_hdr *hdr, unsigned char *data)
static int parse_header(
obj_hdr *out,
size_t *out_len,
const unsigned char *_data,
size_t data_len)
{
char c, typename[10];
size_t size, used = 0;
const char *data = (char *)_data;
size_t i, typename_len, size_idx, size_len;
int64_t size;
/*
* type name string followed by space.
*/
while ((c = data[used]) != ' ') {
typename[used++] = c;
if (used >= sizeof(typename))
return 0;
*out_len = 0;
/* find the object type name */
for (i = 0, typename_len = 0; i < data_len; i++, typename_len++) {
if (data[i] == ' ')
break;
}
typename[used] = 0;
if (used == 0)
return 0;
hdr->type = git_object_string2type(typename);
used++; /* consume the space */
/*
* length follows immediately in decimal (without
* leading zeros).
*/
size = data[used++] - '0';
if (size > 9)
return 0;
if (size) {
while ((c = data[used]) != '\0') {
size_t d = c - '0';
if (d > 9)
break;
used++;
size = size * 10 + d;
}
if (typename_len == data_len)
goto on_error;
out->type = git_object_stringn2type(data, typename_len);
size_idx = typename_len + 1;
for (i = size_idx, size_len = 0; i < data_len; i++, size_len++) {
if (data[i] == '\0')
break;
}
hdr->size = size;
/*
* the length must be followed by a zero byte
*/
if (data[used++] != '\0')
return 0;
if (i == data_len)
goto on_error;
return used;
}
if (git__strntol64(&size, &data[size_idx], size_len, NULL, 10) < 0 ||
size < 0)
goto on_error;
if ((uint64_t)size > SIZE_MAX) {
giterr_set(GITERR_OBJECT, "object is larger than available memory");
return -1;
}
out->size = size;
if (GIT_ADD_SIZET_OVERFLOW(out_len, i, 1))
goto on_error;
return 0;
on_error:
giterr_set(GITERR_OBJECT, "failed to parse loose object: invalid header");
return -1;
}
/***********************************************************
*
......@@ -269,45 +275,6 @@ static int inflate_buffer(void *in, size_t inlen, void *out, size_t outlen)
return 0;
}
static void *inflate_tail(z_stream *s, void *hb, size_t used, obj_hdr *hdr)
{
unsigned char *buf, *head = hb;
size_t tail, alloc_size;
/*
* allocate a buffer to hold the inflated data and copy the
* initial sequence of inflated data from the tail of the
* head buffer, if any.
*/
if (GIT_ADD_SIZET_OVERFLOW(&alloc_size, hdr->size, 1) ||
(buf = git__malloc(alloc_size)) == NULL) {
inflateEnd(s);
return NULL;
}
tail = s->total_out - used;
if (used > 0 && tail > 0) {
if (tail > hdr->size)
tail = hdr->size;
memcpy(buf, head + used, tail);
}
used = tail;
/*
* inflate the remainder of the object data, if any
*/
if (hdr->size < used)
inflateEnd(s);
else {
set_stream_output(s, buf + used, hdr->size - used);
if (finish_inflate(s)) {
git__free(buf);
return NULL;
}
}
return buf;
}
/*
* At one point, there was a loose object format that was intended to
* mimic the format used in pack-files. This was to allow easy copying
......@@ -354,43 +321,74 @@ static int inflate_packlike_loose_disk_obj(git_rawobj *out, git_buf *obj)
static int inflate_disk_obj(git_rawobj *out, git_buf *obj)
{
unsigned char head[64], *buf;
z_stream zs;
git_zstream zstream = GIT_ZSTREAM_INIT;
unsigned char head[64], *body = NULL;
size_t decompressed, head_len, body_len, alloc_size;
obj_hdr hdr;
size_t used;
int error;
/*
* check for a pack-like loose object
*/
/* check for a pack-like loose object */
if (!is_zlib_compressed_data((unsigned char *)obj->ptr))
return inflate_packlike_loose_disk_obj(out, obj);
if ((error = git_zstream_init(&zstream, GIT_ZSTREAM_INFLATE)) < 0 ||
(error = git_zstream_set_input(&zstream, git_buf_cstr(obj), git_buf_len(obj))) < 0)
goto done;
decompressed = sizeof(head);
/*
* inflate the initial part of the io buffer in order
* to parse the object header (type and size).
*/
if (start_inflate(&zs, obj, head, sizeof(head)) < Z_OK ||
(used = get_object_header(&hdr, head)) == 0 ||
!git_object_typeisloose(hdr.type))
{
abort_inflate(&zs);
* inflate the initial part of the compressed buffer in order to parse the
* header; read the largest header possible, then push back the remainder.
*/
if ((error = git_zstream_get_output(head, &decompressed, &zstream)) < 0 ||
(error = parse_header(&hdr, &head_len, head, decompressed)) < 0)
goto done;
if (!git_object_typeisloose(hdr.type)) {
giterr_set(GITERR_ODB, "failed to inflate disk object");
return -1;
error = -1;
goto done;
}
/*
* allocate a buffer and inflate the object data into it
* (including the initial sequence in the head buffer).
*/
if ((buf = inflate_tail(&zs, head, used, &hdr)) == NULL)
return -1;
buf[hdr.size] = '\0';
if (GIT_ADD_SIZET_OVERFLOW(&alloc_size, hdr.size, 1) ||
(body = git__malloc(alloc_size)) == NULL) {
error = -1;
goto done;
}
out->data = buf;
assert(decompressed >= head_len);
body_len = decompressed - head_len;
if (body_len)
memcpy(body, head + head_len, body_len);
decompressed = hdr.size - body_len;
if ((error = git_zstream_get_output(body + body_len, &decompressed, &zstream)) < 0)
goto done;
if (!git_zstream_done(&zstream)) {
giterr_set(GITERR_ZLIB, "failed to finish zlib inflation: stream aborted prematurely");
error = -1;
goto done;
}
body[hdr.size] = '\0';
out->data = body;
out->len = hdr.size;
out->type = hdr.type;
return 0;
done:
if (error < 0)
git__free(body);
git_zstream_free(&zstream);
return error;
}
......@@ -435,6 +433,7 @@ static int read_header_loose(git_rawobj *out, git_buf *loc)
git_file fd;
z_stream zs;
obj_hdr header_obj;
size_t header_len;
unsigned char raw_buffer[16], inflated_buffer[64];
assert(out && loc);
......@@ -460,7 +459,7 @@ static int read_header_loose(git_rawobj *out, git_buf *loc)
}
if ((z_return != Z_STREAM_END && z_return != Z_BUF_ERROR)
|| get_object_header(&header_obj, inflated_buffer) == 0
|| parse_header(&header_obj, &header_len, inflated_buffer, sizeof(inflated_buffer)) < 0
|| git_object_typeisloose(header_obj.type) == 0)
{
giterr_set(GITERR_ZLIB, "failed to read loose object header");
......
......@@ -252,35 +252,47 @@ void git__strtolower(char *str)
git__strntolower(str, strlen(str));
}
int git__prefixcmp(const char *str, const char *prefix)
GIT_INLINE(int) prefixcmp(const char *str, size_t str_n, const char *prefix, bool icase)
{
for (;;) {
unsigned char p = *(prefix++), s;
int s, p;
while (str_n--) {
s = (unsigned char)*str++;
p = (unsigned char)*prefix++;
if (icase) {
s = git__tolower(s);
p = git__tolower(p);
}
if (!p)
return 0;
if ((s = *(str++)) != p)
if (s != p)
return s - p;
}
return (0 - *prefix);
}
int git__prefixcmp_icase(const char *str, const char *prefix)
int git__prefixcmp(const char *str, const char *prefix)
{
return strncasecmp(str, prefix, strlen(prefix));
return prefixcmp(str, SIZE_MAX, prefix, false);
}
int git__prefixncmp_icase(const char *str, size_t str_n, const char *prefix)
int git__prefixncmp(const char *str, size_t str_n, const char *prefix)
{
int s, p;
while(str_n--) {
s = (unsigned char)git__tolower(*str++);
p = (unsigned char)git__tolower(*prefix++);
return prefixcmp(str, str_n, prefix, false);
}
if (s != p)
return s - p;
}
int git__prefixcmp_icase(const char *str, const char *prefix)
{
return prefixcmp(str, SIZE_MAX, prefix, true);
}
return (0 - *prefix);
int git__prefixncmp_icase(const char *str, size_t str_n, const char *prefix)
{
return prefixcmp(str, str_n, prefix, true);
}
int git__suffixcmp(const char *str, const char *suffix)
......
......@@ -180,6 +180,7 @@ GIT_INLINE(void) git__free(void *ptr)
extern int git__prefixcmp(const char *str, const char *prefix);
extern int git__prefixcmp_icase(const char *str, const char *prefix);
extern int git__prefixncmp(const char *str, size_t str_n, const char *prefix);
extern int git__prefixncmp_icase(const char *str, size_t str_n, const char *prefix);
extern int git__suffixcmp(const char *str, const char *suffix);
......
......@@ -14,17 +14,22 @@
#define ZSTREAM_BUFFER_SIZE (1024 * 1024)
#define ZSTREAM_BUFFER_MIN_EXTRA 8
static int zstream_seterr(git_zstream *zs)
GIT_INLINE(int) zstream_seterr(git_zstream *zs)
{
if (zs->zerr == Z_OK || zs->zerr == Z_STREAM_END)
switch (zs->zerr) {
case Z_OK:
case Z_STREAM_END:
case Z_BUF_ERROR: /* not fatal; we retry with a larger buffer */
return 0;
if (zs->zerr == Z_MEM_ERROR)
case Z_MEM_ERROR:
giterr_set_oom();
else if (zs->z.msg)
giterr_set_str(GITERR_ZLIB, zs->z.msg);
else
giterr_set(GITERR_ZLIB, "unknown compression error");
break;
default:
if (zs->z.msg)
giterr_set_str(GITERR_ZLIB, zs->z.msg);
else
giterr_set(GITERR_ZLIB, "unknown compression error");
}
return -1;
}
......@@ -98,8 +103,9 @@ int git_zstream_get_output(void *out, size_t *out_len, git_zstream *zstream)
/* set up in data */
zstream->z.next_in = (Bytef *)zstream->in;
zstream->z.avail_in = (uInt)zstream->in_len;
if ((size_t)zstream->z.avail_in != zstream->in_len) {
zstream->z.avail_in = INT_MAX;
zstream->z.avail_in = UINT_MAX;
zflush = Z_NO_FLUSH;
} else {
zflush = Z_FINISH;
......@@ -110,7 +116,7 @@ int git_zstream_get_output(void *out, size_t *out_len, git_zstream *zstream)
zstream->z.next_out = out;
zstream->z.avail_out = (uInt)out_remain;
if ((size_t)zstream->z.avail_out != out_remain)
zstream->z.avail_out = INT_MAX;
zstream->z.avail_out = UINT_MAX;
out_queued = (size_t)zstream->z.avail_out;
/* compress next chunk */
......@@ -119,8 +125,8 @@ int git_zstream_get_output(void *out, size_t *out_len, git_zstream *zstream)
else
zstream->zerr = deflate(&zstream->z, zflush);
if (zstream->zerr == Z_STREAM_ERROR)
return zstream_seterr(zstream);
if (zstream_seterr(zstream))
return -1;
out_used = (out_queued - zstream->z.avail_out);
out_remain -= out_used;
......
......@@ -40,6 +40,48 @@ void test_core_string__2(void)
cl_assert(git__strcasesort_cmp("fooBar", "foobar") < 0);
}
/* compare prefixes with len */
void test_core_string__prefixncmp(void)
{
cl_assert(git__prefixncmp("", 0, "") == 0);
cl_assert(git__prefixncmp("a", 1, "") == 0);
cl_assert(git__prefixncmp("", 0, "a") < 0);
cl_assert(git__prefixncmp("a", 1, "b") < 0);
cl_assert(git__prefixncmp("b", 1, "a") > 0);
cl_assert(git__prefixncmp("ab", 2, "a") == 0);
cl_assert(git__prefixncmp("ab", 1, "a") == 0);
cl_assert(git__prefixncmp("ab", 2, "ac") < 0);
cl_assert(git__prefixncmp("a", 1, "ac") < 0);
cl_assert(git__prefixncmp("ab", 1, "ac") < 0);
cl_assert(git__prefixncmp("ab", 2, "aa") > 0);
cl_assert(git__prefixncmp("ab", 1, "aa") < 0);
}
/* compare prefixes with len */
void test_core_string__prefixncmp_icase(void)
{
cl_assert(git__prefixncmp_icase("", 0, "") == 0);
cl_assert(git__prefixncmp_icase("a", 1, "") == 0);
cl_assert(git__prefixncmp_icase("", 0, "a") < 0);
cl_assert(git__prefixncmp_icase("a", 1, "b") < 0);
cl_assert(git__prefixncmp_icase("A", 1, "b") < 0);
cl_assert(git__prefixncmp_icase("a", 1, "B") < 0);
cl_assert(git__prefixncmp_icase("b", 1, "a") > 0);
cl_assert(git__prefixncmp_icase("B", 1, "a") > 0);
cl_assert(git__prefixncmp_icase("b", 1, "A") > 0);
cl_assert(git__prefixncmp_icase("ab", 2, "a") == 0);
cl_assert(git__prefixncmp_icase("Ab", 2, "a") == 0);
cl_assert(git__prefixncmp_icase("ab", 2, "A") == 0);
cl_assert(git__prefixncmp_icase("ab", 1, "a") == 0);
cl_assert(git__prefixncmp_icase("ab", 2, "ac") < 0);
cl_assert(git__prefixncmp_icase("Ab", 2, "ac") < 0);
cl_assert(git__prefixncmp_icase("ab", 2, "Ac") < 0);
cl_assert(git__prefixncmp_icase("a", 1, "ac") < 0);
cl_assert(git__prefixncmp_icase("ab", 1, "ac") < 0);
cl_assert(git__prefixncmp_icase("ab", 2, "aa") > 0);
cl_assert(git__prefixncmp_icase("ab", 1, "aa") < 0);
}
void test_core_string__strcmp(void)
{
cl_assert(git__strcmp("", "") == 0);
......
#include "clar_libgit2.h"
#include "git2/odb_backend.h"
static git_repository *repo;
static git_odb *odb;
void test_odb_largefiles__initialize(void)
{
repo = cl_git_sandbox_init("testrepo.git");
cl_git_pass(git_repository_odb(&odb, repo));
}
void test_odb_largefiles__cleanup(void)
{
git_odb_free(odb);
cl_git_sandbox_cleanup();
}
static void writefile(git_oid *oid)
{
static git_odb_stream *stream;
git_buf buf = GIT_BUF_INIT;
size_t i;
for (i = 0; i < 3041; i++)
cl_git_pass(git_buf_puts(&buf, "Hello, world.\n"));
cl_git_pass(git_odb_open_wstream(&stream, odb, 5368709122, GIT_OBJ_BLOB));
for (i = 0; i < 126103; i++)
cl_git_pass(git_odb_stream_write(stream, buf.ptr, buf.size));
cl_git_pass(git_odb_stream_finalize_write(oid, stream));
git_odb_stream_free(stream);
git_buf_free(&buf);
}
void test_odb_largefiles__write_from_memory(void)
{
git_oid expected, oid;
git_buf buf = GIT_BUF_INIT;
size_t i;
#ifndef GIT_ARCH_64
cl_skip();
#endif
if (!cl_is_env_set("GITTEST_INVASIVE_FS_SIZE") ||
!cl_is_env_set("GITTEST_INVASIVE_MEMORY") ||
!cl_is_env_set("GITTEST_SLOW"))
cl_skip();
for (i = 0; i < (3041*126103); i++)
cl_git_pass(git_buf_puts(&buf, "Hello, world.\n"));
git_oid_fromstr(&expected, "3fb56989cca483b21ba7cb0a6edb229d10e1c26c");
cl_git_pass(git_odb_write(&oid, odb, buf.ptr, buf.size, GIT_OBJ_BLOB));
cl_assert_equal_oid(&expected, &oid);
}
void test_odb_largefiles__streamwrite(void)
{
git_oid expected, oid;
if (!cl_is_env_set("GITTEST_INVASIVE_FS_SIZE") ||
!cl_is_env_set("GITTEST_SLOW"))
cl_skip();
git_oid_fromstr(&expected, "3fb56989cca483b21ba7cb0a6edb229d10e1c26c");
writefile(&oid);
cl_assert_equal_oid(&expected, &oid);
}
void test_odb_largefiles__read_into_memory(void)
{
git_oid oid;
git_odb_object *obj;
#ifndef GIT_ARCH_64
cl_skip();
#endif
if (!cl_is_env_set("GITTEST_INVASIVE_FS_SIZE") ||
!cl_is_env_set("GITTEST_INVASIVE_MEMORY") ||
!cl_is_env_set("GITTEST_SLOW"))
cl_skip();
writefile(&oid);
cl_git_pass(git_odb_read(&obj, odb, &oid));
git_odb_object_free(obj);
}
void test_odb_largefiles__read_into_memory_rejected_on_32bit(void)
{
git_oid oid;
git_odb_object *obj = NULL;
#ifdef GIT_ARCH_64
cl_skip();
#endif
if (!cl_is_env_set("GITTEST_INVASIVE_FS_SIZE") ||
!cl_is_env_set("GITTEST_INVASIVE_MEMORY") ||
!cl_is_env_set("GITTEST_SLOW"))
cl_skip();
writefile(&oid);
cl_git_fail(git_odb_read(&obj, odb, &oid));
git_odb_object_free(obj);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment