Commit 28a0741f by Patrick Steinhardt

odb: verify object hashes

The upstream git.git project verifies objects when looking them up from
disk. This avoids scenarios where objects have somehow become corrupt on
disk, e.g. due to hardware failures or bit flips. While our mantra is
usually to follow upstream behavior, we do not do so in this case, as we
never check hashes of objects we have just read from disk.

To fix this, we create a new error class `GIT_EMISMATCH` which denotes
that we have looked up an object with a hashsum mismatch. `odb_read_1`
will then, after having read the object from its backend, hash the
object and compare the resulting hash to the expected hash. If hashes do
not match, it will return an error.

This obviously introduces another computation of checksums and could
potentially impact performance. Note though that we usually perform I/O
operations directly before doing this computation, and as such the
actual overhead should be drowned out by I/O. Running our test suite
seems to confirm this guess. On a Linux system with best-of-five
timings, we had 21.592s with the check enabled and 21.590s with the
ckeck disabled. Note though that our test suite mostly contains very
small blobs only. It is expected that repositories with bigger blobs may
notice an increased hit by this check.

In addition to a new test, we also had to change the
odb::backend::nonrefreshing test suite, which now triggers a hashsum
mismatch when looking up the commit "deadbeef...". This is expected, as
the fake backend allocated inside of the test will return an empty
object for the OID "deadbeef...", which will obviously not hash back to
"deadbeef..." again. We can simply adjust the hash to equal the hash of
the empty object here to fix this test.
parent d59dabe5
......@@ -54,6 +54,7 @@ typedef enum {
GIT_PASSTHROUGH = -30, /**< Internal only */
GIT_ITEROVER = -31, /**< Signals end of iteration with iterator */
GIT_RETRY = -32, /**< Internal only */
GIT_EMISMATCH = -33, /**< Hashsum mismatch in object */
} git_error_code;
/**
......
......@@ -998,7 +998,9 @@ static int odb_read_1(git_odb_object **out, git_odb *db, const git_oid *id,
size_t i;
git_rawobj raw;
git_odb_object *object;
git_oid hashed;
bool found = false;
int error;
if (!only_refreshed && odb_read_hardcoded(&raw, id) == 0)
found = true;
......@@ -1011,7 +1013,7 @@ static int odb_read_1(git_odb_object **out, git_odb *db, const git_oid *id,
continue;
if (b->read != NULL) {
int error = b->read(&raw.data, &raw.len, &raw.type, b, id);
error = b->read(&raw.data, &raw.len, &raw.type, b, id);
if (error == GIT_PASSTHROUGH || error == GIT_ENOTFOUND)
continue;
......@@ -1025,12 +1027,24 @@ static int odb_read_1(git_odb_object **out, git_odb *db, const git_oid *id,
if (!found)
return GIT_ENOTFOUND;
if ((error = git_odb_hash(&hashed, raw.data, raw.len, raw.type)) < 0)
goto out;
if (!git_oid_equal(id, &hashed)) {
error = git_odb__error_mismatch(id, &hashed);
goto out;
}
giterr_clear();
if ((object = odb_object__alloc(id, &raw)) == NULL)
return -1;
goto out;
*out = git_cache_store_raw(odb_cache(db), object);
return 0;
out:
if (error)
git__free(raw.data);
return error;
}
int git_odb_read(git_odb_object **out, git_odb *db, const git_oid *id)
......@@ -1411,6 +1425,19 @@ int git_odb_refresh(struct git_odb *db)
return 0;
}
int git_odb__error_mismatch(const git_oid *expected, const git_oid *actual)
{
char expected_oid[GIT_OID_HEXSZ + 1], actual_oid[GIT_OID_HEXSZ + 1];
git_oid_tostr(expected_oid, sizeof(expected_oid), expected);
git_oid_tostr(actual_oid, sizeof(actual_oid), actual);
giterr_set(GITERR_ODB, "object hash mismatch - expected %s but got %s",
expected_oid, actual_oid);
return GIT_EMISMATCH;
}
int git_odb__error_notfound(
const char *message, const git_oid *oid, size_t oid_len)
{
......
......@@ -96,6 +96,12 @@ int git_odb__hashfd_filtered(
*/
int git_odb__hashlink(git_oid *out, const char *path);
/**
* Generate a GIT_EMISMATCH error for the ODB.
*/
int git_odb__error_mismatch(
const git_oid *expected, const git_oid *actual);
/*
* Generate a GIT_ENOTFOUND error for the ODB.
*/
......
......@@ -92,3 +92,25 @@ void test_object_lookup__lookup_corrupt_object_returns_error(void)
git_buf_free(&contents);
}
void test_object_lookup__lookup_object_with_wrong_hash_returns_error(void)
{
const char *oldloose = "objects/8e/73b769e97678d684b809b163bebdae2911720f",
*newloose = "objects/8e/73b769e97678d684b809b163bebdae2911720e",
*commit = "8e73b769e97678d684b809b163bebdae2911720e";
git_buf oldpath = GIT_BUF_INIT, newpath = GIT_BUF_INIT;
git_object *object;
git_oid oid;
cl_git_pass(git_oid_fromstr(&oid, commit));
/* Copy object to another location with wrong hash */
cl_git_pass(git_buf_joinpath(&oldpath, git_repository_path(g_repo), oldloose));
cl_git_pass(git_buf_joinpath(&newpath, git_repository_path(g_repo), newloose));
cl_git_pass(git_futils_cp(oldpath.ptr, newpath.ptr, 0644));
/* Verify that lookup fails due to a hashsum mismatch */
cl_git_fail_with(GIT_EMISMATCH, git_object_lookup(&object, g_repo, &oid, GIT_OBJ_COMMIT));
git_buf_free(&oldpath);
git_buf_free(&newpath);
}
......@@ -18,6 +18,7 @@ static fake_backend *_fake;
static git_oid _oid;
#define HASH "deadbeefdeadbeefdeadbeefdeadbeefdeadbeef"
#define EMPTY_HASH "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391"
static int fake_backend__exists(git_odb_backend *backend, const git_oid *oid)
{
......@@ -225,7 +226,7 @@ void test_odb_backend_nonrefreshing__read_is_invoked_once_on_success(void)
{
git_object *obj;
setup_repository_and_backend(GIT_OK, HASH);
setup_repository_and_backend(GIT_OK, EMPTY_HASH);
cl_git_pass(git_object_lookup(&obj, _repo, &_oid, GIT_OBJ_ANY));
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment