Unverified Commit 12b53eb0 by Edward Thomson Committed by GitHub

Merge pull request #6128 from libgit2/ethomson/object_validation

Introduce `git_object_rawcontent_is_valid`
parents 6fdb1b2f 9f03ebd1
...@@ -224,6 +224,28 @@ GIT_EXTERN(int) git_object_peel( ...@@ -224,6 +224,28 @@ GIT_EXTERN(int) git_object_peel(
*/ */
GIT_EXTERN(int) git_object_dup(git_object **dest, git_object *source); GIT_EXTERN(int) git_object_dup(git_object **dest, git_object *source);
/**
* Analyzes a buffer of raw object content and determines its validity.
* Tree, commit, and tag objects will be parsed and ensured that they
* are valid, parseable content. (Blobs are always valid by definition.)
* An error message will be set with an informative message if the object
* is not valid.
*
* @warning This function is experimental and its signature may change in
* the future.
*
* @param valid Output pointer to set with validity of the object content
* @param buf The contents to validate
* @param len The length of the buffer
* @param type The type of the object in the buffer
* @return 0 on success or an error code
*/
GIT_EXTERN(int) git_object_rawcontent_is_valid(
int *valid,
const char *buf,
size_t len,
git_object_t type);
/** @} */ /** @} */
GIT_END_DECL GIT_END_DECL
......
...@@ -71,7 +71,7 @@ GIT_EXTERN(int) git_signature_default(git_signature **out, git_repository *repo) ...@@ -71,7 +71,7 @@ GIT_EXTERN(int) git_signature_default(git_signature **out, git_repository *repo)
* *
* @param out new signature * @param out new signature
* @param buf signature string * @param buf signature string
* @return 0 on success, or an error code * @return 0 on success, GIT_EINVALID if the signature is not parseable, or an error code
*/ */
GIT_EXTERN(int) git_signature_from_buffer(git_signature **out, const char *buf); GIT_EXTERN(int) git_signature_from_buffer(git_signature **out, const char *buf);
......
...@@ -395,6 +395,7 @@ static int commit_parse(git_commit *commit, const char *data, size_t size, unsig ...@@ -395,6 +395,7 @@ static int commit_parse(git_commit *commit, const char *data, size_t size, unsig
git_oid parent_id; git_oid parent_id;
size_t header_len; size_t header_len;
git_signature dummy_sig; git_signature dummy_sig;
int error;
GIT_ASSERT_ARG(commit); GIT_ASSERT_ARG(commit);
GIT_ASSERT_ARG(data); GIT_ASSERT_ARG(data);
...@@ -431,14 +432,14 @@ static int commit_parse(git_commit *commit, const char *data, size_t size, unsig ...@@ -431,14 +432,14 @@ static int commit_parse(git_commit *commit, const char *data, size_t size, unsig
commit->author = git__malloc(sizeof(git_signature)); commit->author = git__malloc(sizeof(git_signature));
GIT_ERROR_CHECK_ALLOC(commit->author); GIT_ERROR_CHECK_ALLOC(commit->author);
if (git_signature__parse(commit->author, &buffer, buffer_end, "author ", '\n') < 0) if ((error = git_signature__parse(commit->author, &buffer, buffer_end, "author ", '\n')) < 0)
return -1; return error;
} }
/* Some tools create multiple author fields, ignore the extra ones */ /* Some tools create multiple author fields, ignore the extra ones */
while (!git__prefixncmp(buffer, buffer_end - buffer, "author ")) { while (!git__prefixncmp(buffer, buffer_end - buffer, "author ")) {
if (git_signature__parse(&dummy_sig, &buffer, buffer_end, "author ", '\n') < 0) if ((error = git_signature__parse(&dummy_sig, &buffer, buffer_end, "author ", '\n')) < 0)
return -1; return error;
git__free(dummy_sig.name); git__free(dummy_sig.name);
git__free(dummy_sig.email); git__free(dummy_sig.email);
...@@ -448,8 +449,8 @@ static int commit_parse(git_commit *commit, const char *data, size_t size, unsig ...@@ -448,8 +449,8 @@ static int commit_parse(git_commit *commit, const char *data, size_t size, unsig
commit->committer = git__malloc(sizeof(git_signature)); commit->committer = git__malloc(sizeof(git_signature));
GIT_ERROR_CHECK_ALLOC(commit->committer); GIT_ERROR_CHECK_ALLOC(commit->committer);
if (git_signature__parse(commit->committer, &buffer, buffer_end, "committer ", '\n') < 0) if ((error = git_signature__parse(commit->committer, &buffer, buffer_end, "committer ", '\n')) < 0)
return -1; return error;
if (flags & GIT_COMMIT_PARSE_QUICK) if (flags & GIT_COMMIT_PARSE_QUICK)
return 0; return 0;
...@@ -493,7 +494,7 @@ static int commit_parse(git_commit *commit, const char *data, size_t size, unsig ...@@ -493,7 +494,7 @@ static int commit_parse(git_commit *commit, const char *data, size_t size, unsig
bad_buffer: bad_buffer:
git_error_set(GIT_ERROR_OBJECT, "failed to parse bad commit object"); git_error_set(GIT_ERROR_OBJECT, "failed to parse bad commit object");
return -1; return GIT_EINVALID;
} }
int git_commit__parse_raw(void *commit, const char *data, size_t size) int git_commit__parse_raw(void *commit, const char *data, size_t size)
...@@ -971,8 +972,10 @@ int git_commit_create_with_signature( ...@@ -971,8 +972,10 @@ int git_commit_create_with_signature(
/* The first step is to verify that all the tree and parents exist */ /* The first step is to verify that all the tree and parents exist */
parsed = git__calloc(1, sizeof(git_commit)); parsed = git__calloc(1, sizeof(git_commit));
GIT_ERROR_CHECK_ALLOC(parsed); GIT_ERROR_CHECK_ALLOC(parsed);
if ((error = commit_parse(parsed, commit_content, strlen(commit_content), 0)) < 0) if (commit_parse(parsed, commit_content, strlen(commit_content), 0) < 0) {
error = -1;
goto cleanup; goto cleanup;
}
if ((error = validate_tree_and_parents(&parents, repo, &parsed->tree_id, commit_parent_from_commit, parsed, NULL, true)) < 0) if ((error = validate_tree_and_parents(&parents, repo, &parsed->tree_id, commit_parent_from_commit, parsed, NULL, true)) < 0)
goto cleanup; goto cleanup;
......
...@@ -124,16 +124,15 @@ static int commit_quick_parse( ...@@ -124,16 +124,15 @@ static int commit_quick_parse(
{ {
git_oid *parent_oid; git_oid *parent_oid;
git_commit *commit; git_commit *commit;
int error;
size_t i; size_t i;
commit = git__calloc(1, sizeof(*commit)); commit = git__calloc(1, sizeof(*commit));
GIT_ERROR_CHECK_ALLOC(commit); GIT_ERROR_CHECK_ALLOC(commit);
commit->object.repo = walk->repo; commit->object.repo = walk->repo;
if ((error = git_commit__parse_ext(commit, obj, GIT_COMMIT_PARSE_QUICK)) < 0) { if (git_commit__parse_ext(commit, obj, GIT_COMMIT_PARSE_QUICK) < 0) {
git__free(commit); git__free(commit);
return error; return -1;
} }
if (!git__is_uint16(git_array_size(commit->parent_ids))) { if (!git__is_uint16(git_array_size(commit->parent_ids))) {
......
...@@ -340,7 +340,7 @@ static int check_object_connectivity(git_indexer *idx, const git_rawobj *obj) ...@@ -340,7 +340,7 @@ static int check_object_connectivity(git_indexer *idx, const git_rawobj *obj)
{ {
git_object *object; git_object *object;
git_oid *expected; git_oid *expected;
int error; int error = 0;
if (obj->type != GIT_OBJECT_BLOB && if (obj->type != GIT_OBJECT_BLOB &&
obj->type != GIT_OBJECT_TREE && obj->type != GIT_OBJECT_TREE &&
...@@ -348,8 +348,14 @@ static int check_object_connectivity(git_indexer *idx, const git_rawobj *obj) ...@@ -348,8 +348,14 @@ static int check_object_connectivity(git_indexer *idx, const git_rawobj *obj)
obj->type != GIT_OBJECT_TAG) obj->type != GIT_OBJECT_TAG)
return 0; return 0;
if ((error = git_object__from_raw(&object, obj->data, obj->len, obj->type)) < 0) if (git_object__from_raw(&object, obj->data, obj->len, obj->type) < 0) {
/*
* parse_raw returns EINVALID on invalid data; downgrade
* that to a normal -1 error code.
*/
error = -1;
goto out; goto out;
}
if ((expected = git_oidmap_get(idx->expected_oids, &object->cached.oid)) != NULL) { if ((expected = git_oidmap_get(idx->expected_oids, &object->cached.oid)) != NULL) {
git_oidmap_delete(idx->expected_oids, &object->cached.oid); git_oidmap_delete(idx->expected_oids, &object->cached.oid);
......
...@@ -144,12 +144,17 @@ int git_object__from_odb_object( ...@@ -144,12 +144,17 @@ int git_object__from_odb_object(
def = &git_objects_table[odb_obj->cached.type]; def = &git_objects_table[odb_obj->cached.type];
GIT_ASSERT(def->free && def->parse); GIT_ASSERT(def->free && def->parse);
if ((error = def->parse(object, odb_obj)) < 0) if ((error = def->parse(object, odb_obj)) < 0) {
/*
* parse returns EINVALID on invalid data; downgrade
* that to a normal -1 error code.
*/
def->free(object); def->free(object);
else return -1;
*object_out = git_cache_store_parsed(&repo->objects, object); }
return error; *object_out = git_cache_store_parsed(&repo->objects, object);
return 0;
} }
void git_object__free(void *obj) void git_object__free(void *obj)
...@@ -562,3 +567,35 @@ bool git_object__is_valid( ...@@ -562,3 +567,35 @@ bool git_object__is_valid(
return true; return true;
} }
int git_object_rawcontent_is_valid(
int *valid,
const char *buf,
size_t len,
git_object_t type)
{
git_object *obj = NULL;
int error;
GIT_ASSERT_ARG(valid);
GIT_ASSERT_ARG(buf);
/* Blobs are always valid; don't bother parsing. */
if (type == GIT_OBJECT_BLOB) {
*valid = 1;
return 0;
}
error = git_object__from_raw(&obj, buf, len, type);
git_object_free(obj);
if (error == 0) {
*valid = 1;
return 0;
} else if (error == GIT_EINVALID) {
*valid = 0;
return 0;
}
return error;
}
...@@ -23,6 +23,12 @@ void git_signature_free(git_signature *sig) ...@@ -23,6 +23,12 @@ void git_signature_free(git_signature *sig)
git__free(sig); git__free(sig);
} }
static int signature_parse_error(const char *msg)
{
git_error_set(GIT_ERROR_INVALID, "failed to parse signature - %s", msg);
return GIT_EINVALID;
}
static int signature_error(const char *msg) static int signature_error(const char *msg)
{ {
git_error_set(GIT_ERROR_INVALID, "failed to parse signature - %s", msg); git_error_set(GIT_ERROR_INVALID, "failed to parse signature - %s", msg);
...@@ -206,13 +212,13 @@ int git_signature__parse(git_signature *sig, const char **buffer_out, ...@@ -206,13 +212,13 @@ int git_signature__parse(git_signature *sig, const char **buffer_out,
if (ender && if (ender &&
(buffer_end = memchr(buffer, ender, buffer_end - buffer)) == NULL) (buffer_end = memchr(buffer, ender, buffer_end - buffer)) == NULL)
return signature_error("no newline given"); return signature_parse_error("no newline given");
if (header) { if (header) {
const size_t header_len = strlen(header); const size_t header_len = strlen(header);
if (buffer + header_len >= buffer_end || memcmp(buffer, header, header_len) != 0) if (buffer + header_len >= buffer_end || memcmp(buffer, header, header_len) != 0)
return signature_error("expected prefix doesn't match actual"); return signature_parse_error("expected prefix doesn't match actual");
buffer += header_len; buffer += header_len;
} }
...@@ -221,7 +227,7 @@ int git_signature__parse(git_signature *sig, const char **buffer_out, ...@@ -221,7 +227,7 @@ int git_signature__parse(git_signature *sig, const char **buffer_out,
email_end = git__memrchr(buffer, '>', buffer_end - buffer); email_end = git__memrchr(buffer, '>', buffer_end - buffer);
if (!email_start || !email_end || email_end <= email_start) if (!email_start || !email_end || email_end <= email_start)
return signature_error("malformed e-mail"); return signature_parse_error("malformed e-mail");
email_start += 1; email_start += 1;
sig->name = extract_trimmed(buffer, email_start - buffer - 1); sig->name = extract_trimmed(buffer, email_start - buffer - 1);
...@@ -237,7 +243,7 @@ int git_signature__parse(git_signature *sig, const char **buffer_out, ...@@ -237,7 +243,7 @@ int git_signature__parse(git_signature *sig, const char **buffer_out,
git__free(sig->name); git__free(sig->name);
git__free(sig->email); git__free(sig->email);
sig->name = sig->email = NULL; sig->name = sig->email = NULL;
return signature_error("invalid Unix timestamp"); return signature_parse_error("invalid Unix timestamp");
} }
/* do we have a timezone? */ /* do we have a timezone? */
......
...@@ -62,7 +62,7 @@ const char *git_tag_message(const git_tag *t) ...@@ -62,7 +62,7 @@ const char *git_tag_message(const git_tag *t)
static int tag_error(const char *str) static int tag_error(const char *str)
{ {
git_error_set(GIT_ERROR_TAG, "failed to parse tag: %s", str); git_error_set(GIT_ERROR_TAG, "failed to parse tag: %s", str);
return -1; return GIT_EINVALID;
} }
static int tag_parse(git_tag *tag, const char *buffer, const char *buffer_end) static int tag_parse(git_tag *tag, const char *buffer, const char *buffer_end)
...@@ -73,6 +73,7 @@ static int tag_parse(git_tag *tag, const char *buffer, const char *buffer_end) ...@@ -73,6 +73,7 @@ static int tag_parse(git_tag *tag, const char *buffer, const char *buffer_end)
size_t text_len, alloc_len; size_t text_len, alloc_len;
const char *search; const char *search;
unsigned int i; unsigned int i;
int error;
if (git_oid__parse(&tag->target, &buffer, buffer_end, "object ") < 0) if (git_oid__parse(&tag->target, &buffer, buffer_end, "object ") < 0)
return tag_error("object field invalid"); return tag_error("object field invalid");
...@@ -130,8 +131,8 @@ static int tag_parse(git_tag *tag, const char *buffer, const char *buffer_end) ...@@ -130,8 +131,8 @@ static int tag_parse(git_tag *tag, const char *buffer, const char *buffer_end)
tag->tagger = git__malloc(sizeof(git_signature)); tag->tagger = git__malloc(sizeof(git_signature));
GIT_ERROR_CHECK_ALLOC(tag->tagger); GIT_ERROR_CHECK_ALLOC(tag->tagger);
if (git_signature__parse(tag->tagger, &buffer, buffer_end, "tagger ", '\n') < 0) if ((error = git_signature__parse(tag->tagger, &buffer, buffer_end, "tagger ", '\n')) < 0)
return -1; return error;
} }
tag->message = NULL; tag->message = NULL;
......
...@@ -351,15 +351,26 @@ size_t git_treebuilder_entrycount(git_treebuilder *bld) ...@@ -351,15 +351,26 @@ size_t git_treebuilder_entrycount(git_treebuilder *bld)
return git_strmap_size(bld->map); return git_strmap_size(bld->map);
} }
static int tree_error(const char *str, const char *path) GIT_INLINE(void) set_error(const char *str, const char *path)
{ {
if (path) if (path)
git_error_set(GIT_ERROR_TREE, "%s - %s", str, path); git_error_set(GIT_ERROR_TREE, "%s - %s", str, path);
else else
git_error_set(GIT_ERROR_TREE, "%s", str); git_error_set(GIT_ERROR_TREE, "%s", str);
}
static int tree_error(const char *str, const char *path)
{
set_error(str, path);
return -1; return -1;
} }
static int tree_parse_error(const char *str, const char *path)
{
set_error(str, path);
return GIT_EINVALID;
}
static int parse_mode(uint16_t *mode_out, const char *buffer, size_t buffer_len, const char **buffer_out) static int parse_mode(uint16_t *mode_out, const char *buffer, size_t buffer_len, const char **buffer_out)
{ {
int32_t mode; int32_t mode;
...@@ -399,19 +410,19 @@ int git_tree__parse_raw(void *_tree, const char *data, size_t size) ...@@ -399,19 +410,19 @@ int git_tree__parse_raw(void *_tree, const char *data, size_t size)
uint16_t attr; uint16_t attr;
if (parse_mode(&attr, buffer, buffer_end - buffer, &buffer) < 0 || !buffer) if (parse_mode(&attr, buffer, buffer_end - buffer, &buffer) < 0 || !buffer)
return tree_error("failed to parse tree: can't parse filemode", NULL); return tree_parse_error("failed to parse tree: can't parse filemode", NULL);
if (buffer >= buffer_end || (*buffer++) != ' ') if (buffer >= buffer_end || (*buffer++) != ' ')
return tree_error("failed to parse tree: missing space after filemode", NULL); return tree_parse_error("failed to parse tree: missing space after filemode", NULL);
if ((nul = memchr(buffer, 0, buffer_end - buffer)) == NULL) if ((nul = memchr(buffer, 0, buffer_end - buffer)) == NULL)
return tree_error("failed to parse tree: object is corrupted", NULL); return tree_parse_error("failed to parse tree: object is corrupted", NULL);
if ((filename_len = nul - buffer) == 0 || filename_len > UINT16_MAX) if ((filename_len = nul - buffer) == 0 || filename_len > UINT16_MAX)
return tree_error("failed to parse tree: can't parse filename", NULL); return tree_parse_error("failed to parse tree: can't parse filename", NULL);
if ((buffer_end - (nul + 1)) < GIT_OID_RAWSZ) if ((buffer_end - (nul + 1)) < GIT_OID_RAWSZ)
return tree_error("failed to parse tree: can't parse OID", NULL); return tree_parse_error("failed to parse tree: can't parse OID", NULL);
/* Allocate the entry */ /* Allocate the entry */
{ {
...@@ -434,16 +445,15 @@ int git_tree__parse_raw(void *_tree, const char *data, size_t size) ...@@ -434,16 +445,15 @@ int git_tree__parse_raw(void *_tree, const char *data, size_t size)
int git_tree__parse(void *_tree, git_odb_object *odb_obj) int git_tree__parse(void *_tree, git_odb_object *odb_obj)
{ {
git_tree *tree = _tree; git_tree *tree = _tree;
const char *data = git_odb_object_data(odb_obj);
size_t size = git_odb_object_size(odb_obj);
int error;
if ((git_tree__parse_raw(tree, if ((error = git_tree__parse_raw(tree, data, size)) < 0 ||
git_odb_object_data(odb_obj), (error = git_odb_object_dup(&tree->odb_obj, odb_obj)) < 0)
git_odb_object_size(odb_obj))) < 0) return error;
return -1;
if (git_odb_object_dup(&tree->odb_obj, odb_obj) < 0)
return -1;
return 0; return error;
} }
static size_t find_next_dir(const char *dirname, git_index *index, size_t start) static size_t find_next_dir(const char *dirname, git_index *index, size_t start)
......
#include "clar_libgit2.h"
#define VALID_COMMIT "tree bdd24e358576f1baa275df98cdcaf3ac9a3f4233\n" \
"parent d6d956f1d66210bfcd0484166befab33b5987a39\n" \
"author Edward Thomson <ethomson@edwardthomson.com> 1638286404 -0500\n" \
"committer Edward Thomson <ethomson@edwardthomson.com> 1638324642 -0500\n" \
"\n" \
"commit go here.\n"
#define VALID_TREE "100644 HEADER\0\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42"
#define INVALID_COMMIT "tree bdd24e358576f1baa275df98cdcaf3ac9a3f4233\n" \
"parent d6d956f1d66210bfcd0484166befab33b5987a39\n" \
"committer Edward Thomson <ethomson@edwardthomson.com> 1638324642 -0500\n" \
"\n" \
"commit go here.\n"
#define INVALID_TREE "100644 HEADER \x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42"
void test_object_validate__valid(void)
{
int valid;
cl_git_pass(git_object_rawcontent_is_valid(&valid, "", 0, GIT_OBJECT_BLOB));
cl_assert_equal_i(1, valid);
cl_git_pass(git_object_rawcontent_is_valid(&valid, "foobar", 0, GIT_OBJECT_BLOB));
cl_assert_equal_i(1, valid);
cl_git_pass(git_object_rawcontent_is_valid(&valid, VALID_COMMIT, CONST_STRLEN(VALID_COMMIT), GIT_OBJECT_COMMIT));
cl_assert_equal_i(1, valid);
cl_git_pass(git_object_rawcontent_is_valid(&valid, VALID_TREE, CONST_STRLEN(VALID_TREE), GIT_OBJECT_TREE));
cl_assert_equal_i(1, valid);
}
void test_object_validate__invalid(void)
{
int valid;
cl_git_pass(git_object_rawcontent_is_valid(&valid, "", 0, GIT_OBJECT_COMMIT));
cl_assert_equal_i(0, valid);
cl_git_pass(git_object_rawcontent_is_valid(&valid, "foobar", 0, GIT_OBJECT_COMMIT));
cl_assert_equal_i(0, valid);
cl_git_pass(git_object_rawcontent_is_valid(&valid, INVALID_COMMIT, CONST_STRLEN(INVALID_COMMIT), GIT_OBJECT_COMMIT));
cl_assert_equal_i(0, valid);
cl_git_pass(git_object_rawcontent_is_valid(&valid, INVALID_TREE, CONST_STRLEN(INVALID_TREE), GIT_OBJECT_TREE));
cl_assert_equal_i(0, valid);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment