Commit ddefea75 by Edward Thomson

odb: support large loose objects

zlib will only inflate/deflate an `int`s worth of data at a time.
We need to loop through large files in order to ensure that we inflate
the entire file, not just an `int`s worth of data.  Thankfully, we
already have this loop in our `git_zstream` layer.  Handle large objects
using the `git_zstream`.
parent d1e44655
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include "delta.h" #include "delta.h"
#include "filebuf.h" #include "filebuf.h"
#include "object.h" #include "object.h"
#include "zstream.h"
#include "git2/odb_backend.h" #include "git2/odb_backend.h"
#include "git2/types.h" #include "git2/types.h"
...@@ -119,53 +120,53 @@ static size_t get_binary_object_header(obj_hdr *hdr, git_buf *obj) ...@@ -119,53 +120,53 @@ static size_t get_binary_object_header(obj_hdr *hdr, git_buf *obj)
return used; return used;
} }
static size_t get_object_header(obj_hdr *hdr, unsigned char *data) static int parse_header(
obj_hdr *out,
size_t *out_len,
const unsigned char *_data,
size_t data_len)
{ {
char c, typename[10]; const char *data = (char *)_data;
size_t size, used = 0; size_t i, typename_len, size_idx, size_len;
int64_t size;
/* *out_len = 0;
* type name string followed by space.
*/
while ((c = data[used]) != ' ') {
typename[used++] = c;
if (used >= sizeof(typename))
return 0;
}
typename[used] = 0;
if (used == 0)
return 0;
hdr->type = git_object_string2type(typename);
used++; /* consume the space */
/* /* find the object type name */
* length follows immediately in decimal (without for (i = 0, typename_len = 0; i < data_len; i++, typename_len++) {
* leading zeros). if (data[i] == ' ')
*/
size = data[used++] - '0';
if (size > 9)
return 0;
if (size) {
while ((c = data[used]) != '\0') {
size_t d = c - '0';
if (d > 9)
break; break;
used++;
size = size * 10 + d;
} }
if (typename_len == data_len)
goto on_error;
out->type = git_object_stringn2type(data, typename_len);
size_idx = typename_len + 1;
for (i = size_idx, size_len = 0; i < data_len; i++, size_len++) {
if (data[i] == '\0')
break;
} }
hdr->size = size;
/* if (i == data_len)
* the length must be followed by a zero byte goto on_error;
*/
if (data[used++] != '\0')
return 0;
return used; if (git__strntol64(&size, &data[size_idx], size_len, NULL, 10) < 0 ||
} size < 0)
goto on_error;
out->size = size;
if (GIT_ADD_SIZET_OVERFLOW(out_len, i, 1))
goto on_error;
return 0;
on_error:
giterr_set(GITERR_OBJECT, "failed to parse loose object: invalid header");
return -1;
}
/*********************************************************** /***********************************************************
* *
...@@ -269,45 +270,6 @@ static int inflate_buffer(void *in, size_t inlen, void *out, size_t outlen) ...@@ -269,45 +270,6 @@ static int inflate_buffer(void *in, size_t inlen, void *out, size_t outlen)
return 0; return 0;
} }
static void *inflate_tail(z_stream *s, void *hb, size_t used, obj_hdr *hdr)
{
unsigned char *buf, *head = hb;
size_t tail, alloc_size;
/*
* allocate a buffer to hold the inflated data and copy the
* initial sequence of inflated data from the tail of the
* head buffer, if any.
*/
if (GIT_ADD_SIZET_OVERFLOW(&alloc_size, hdr->size, 1) ||
(buf = git__malloc(alloc_size)) == NULL) {
inflateEnd(s);
return NULL;
}
tail = s->total_out - used;
if (used > 0 && tail > 0) {
if (tail > hdr->size)
tail = hdr->size;
memcpy(buf, head + used, tail);
}
used = tail;
/*
* inflate the remainder of the object data, if any
*/
if (hdr->size < used)
inflateEnd(s);
else {
set_stream_output(s, buf + used, hdr->size - used);
if (finish_inflate(s)) {
git__free(buf);
return NULL;
}
}
return buf;
}
/* /*
* At one point, there was a loose object format that was intended to * At one point, there was a loose object format that was intended to
* mimic the format used in pack-files. This was to allow easy copying * mimic the format used in pack-files. This was to allow easy copying
...@@ -354,43 +316,74 @@ static int inflate_packlike_loose_disk_obj(git_rawobj *out, git_buf *obj) ...@@ -354,43 +316,74 @@ static int inflate_packlike_loose_disk_obj(git_rawobj *out, git_buf *obj)
static int inflate_disk_obj(git_rawobj *out, git_buf *obj) static int inflate_disk_obj(git_rawobj *out, git_buf *obj)
{ {
unsigned char head[64], *buf; git_zstream zstream = GIT_ZSTREAM_INIT;
z_stream zs; unsigned char head[64], *body = NULL;
size_t decompressed, head_len, body_len, alloc_size;
obj_hdr hdr; obj_hdr hdr;
size_t used; int error;
/* /* check for a pack-like loose object */
* check for a pack-like loose object
*/
if (!is_zlib_compressed_data((unsigned char *)obj->ptr)) if (!is_zlib_compressed_data((unsigned char *)obj->ptr))
return inflate_packlike_loose_disk_obj(out, obj); return inflate_packlike_loose_disk_obj(out, obj);
if ((error = git_zstream_init(&zstream, GIT_ZSTREAM_INFLATE)) < 0 ||
(error = git_zstream_set_input(&zstream, git_buf_cstr(obj), git_buf_len(obj))) < 0)
goto done;
decompressed = sizeof(head);
/* /*
* inflate the initial part of the io buffer in order * inflate the initial part of the compressed buffer in order to parse the
* to parse the object header (type and size). * header; read the largest header possible, then push back the remainder.
*/ */
if (start_inflate(&zs, obj, head, sizeof(head)) < Z_OK || if ((error = git_zstream_get_output(head, &decompressed, &zstream)) < 0 ||
(used = get_object_header(&hdr, head)) == 0 || (error = parse_header(&hdr, &head_len, head, decompressed)) < 0)
!git_object_typeisloose(hdr.type)) goto done;
{
abort_inflate(&zs); if (!git_object_typeisloose(hdr.type)) {
giterr_set(GITERR_ODB, "failed to inflate disk object"); giterr_set(GITERR_ODB, "failed to inflate disk object");
return -1; error = -1;
goto done;
} }
/* /*
* allocate a buffer and inflate the object data into it * allocate a buffer and inflate the object data into it
* (including the initial sequence in the head buffer). * (including the initial sequence in the head buffer).
*/ */
if ((buf = inflate_tail(&zs, head, used, &hdr)) == NULL) if (GIT_ADD_SIZET_OVERFLOW(&alloc_size, hdr.size, 1) ||
return -1; (body = git__malloc(alloc_size)) == NULL) {
buf[hdr.size] = '\0'; error = -1;
goto done;
}
out->data = buf; assert(decompressed >= head_len);
body_len = decompressed - head_len;
if (body_len)
memcpy(body, head + head_len, body_len);
decompressed = hdr.size - body_len;
if ((error = git_zstream_get_output(body + body_len, &decompressed, &zstream)) < 0)
goto done;
if (!git_zstream_done(&zstream)) {
giterr_set(GITERR_ZLIB, "failed to finish zlib inflation: stream aborted prematurely");
error = -1;
goto done;
}
body[hdr.size] = '\0';
out->data = body;
out->len = hdr.size; out->len = hdr.size;
out->type = hdr.type; out->type = hdr.type;
return 0; done:
if (error < 0)
git__free(body);
git_zstream_free(&zstream);
return error;
} }
...@@ -435,6 +428,7 @@ static int read_header_loose(git_rawobj *out, git_buf *loc) ...@@ -435,6 +428,7 @@ static int read_header_loose(git_rawobj *out, git_buf *loc)
git_file fd; git_file fd;
z_stream zs; z_stream zs;
obj_hdr header_obj; obj_hdr header_obj;
size_t header_len;
unsigned char raw_buffer[16], inflated_buffer[64]; unsigned char raw_buffer[16], inflated_buffer[64];
assert(out && loc); assert(out && loc);
...@@ -460,7 +454,7 @@ static int read_header_loose(git_rawobj *out, git_buf *loc) ...@@ -460,7 +454,7 @@ static int read_header_loose(git_rawobj *out, git_buf *loc)
} }
if ((z_return != Z_STREAM_END && z_return != Z_BUF_ERROR) if ((z_return != Z_STREAM_END && z_return != Z_BUF_ERROR)
|| get_object_header(&header_obj, inflated_buffer) == 0 || parse_header(&header_obj, &header_len, inflated_buffer, sizeof(inflated_buffer)) < 0
|| git_object_typeisloose(header_obj.type) == 0) || git_object_typeisloose(header_obj.type) == 0)
{ {
giterr_set(GITERR_ZLIB, "failed to read loose object header"); giterr_set(GITERR_ZLIB, "failed to read loose object header");
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment