Commit 9ac79ecc by Patrick Steinhardt

blob: implement function to parse raw data

Currently, parsing objects is strictly tied to having an ODB object
available. This makes it hard to parse an object when all that is
available is its raw object and size. Furthermore, hacking around that
limitation by directly creating an ODB structure either on stack or on
heap does not really work that well due to ODB objects being reference
counted and then automatically free'd when reaching a reference count of
zero.

In some occasions parsing raw objects without touching the ODB
is actually recuired, though. One use case is for example object
verification, where we want to assure that an object is valid before
inserting it into the ODB or writing it into the git repository.

Asa first step towards that, introduce a distinction between raw and ODB
objects for blobs. Creation of ODB objects stays the same by simply
using `git_blob__parse`, but a new function `git_blob__parse_raw` has
been added that creates a blob from a pair of data and size. By setting
a new flag inside of the blob, we can now distinguish whether it is a
raw or ODB object now and treat it accordingly in several places.

Note that the blob data passed in is not being copied. Because of that,
callers need to make sure to keep it alive during the blob's life time.
This is being used to avoid unnecessarily increasing the memory
footprint when parsing largish blobs.
parent bbbe8441
......@@ -19,13 +19,19 @@
const void *git_blob_rawcontent(const git_blob *blob)
{
assert(blob);
return git_odb_object_data(blob->odb_object);
if (blob->raw)
return blob->data.raw.data;
else
return git_odb_object_data(blob->data.odb);
}
git_off_t git_blob_rawsize(const git_blob *blob)
{
assert(blob);
return (git_off_t)git_odb_object_size(blob->odb_object);
if (blob->raw)
return blob->data.raw.size;
else
return (git_off_t)git_odb_object_size(blob->data.odb);
}
int git_blob__getbuf(git_buf *buffer, git_blob *blob)
......@@ -36,17 +42,31 @@ int git_blob__getbuf(git_buf *buffer, git_blob *blob)
git_blob_rawsize(blob));
}
void git_blob__free(void *blob)
void git_blob__free(void *_blob)
{
git_odb_object_free(((git_blob *)blob)->odb_object);
git_blob *blob = (git_blob *) _blob;
if (!blob->raw)
git_odb_object_free(blob->data.odb);
git__free(blob);
}
int git_blob__parse(void *blob, git_odb_object *odb_obj)
int git_blob__parse_raw(void *_blob, const char *data, size_t size)
{
git_blob *blob = (git_blob *) _blob;
assert(blob);
blob->raw = 1;
blob->data.raw.data = data;
blob->data.raw.size = size;
return 0;
}
int git_blob__parse(void *_blob, git_odb_object *odb_obj)
{
git_blob *blob = (git_blob *) _blob;
assert(blob);
git_cached_obj_incref((git_cached_obj *)odb_obj);
((git_blob *)blob)->odb_object = odb_obj;
blob->raw = 0;
blob->data.odb = odb_obj;
return 0;
}
......
......@@ -16,11 +16,20 @@
struct git_blob {
git_object object;
git_odb_object *odb_object;
union {
git_odb_object *odb;
struct {
const char *data;
git_off_t size;
} raw;
} data;
unsigned int raw:1;
};
void git_blob__free(void *blob);
int git_blob__parse(void *blob, git_odb_object *obj);
int git_blob__parse_raw(void *blob, const char *data, size_t size);
int git_blob__getbuf(git_buf *buffer, git_blob *blob);
extern int git_blob__create_from_paths(
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment