Commit bd670abd by Edward Thomson

Merge pull request #3226 from libgit2/cmn/racy-diff-again

racy-git, the missing link
parents 8351abc7 bb4896f2
......@@ -69,6 +69,9 @@ support for HTTPS connections insead of OpenSSL.
and `git_diff_buffers` now accept a new binary callback of type
`git_diff_binary_cb` that includes the binary diff information.
* The race condition mitigations described in `racy-git.txt` have been
implemented.
### API additions
* The `git_merge_options` gained a `file_flags` member.
......
......@@ -816,11 +816,11 @@ static int maybe_modified(
} else if (git_oid_iszero(&nitem->id) && new_is_workdir) {
bool use_ctime = ((diff->diffcaps & GIT_DIFFCAPS_TRUST_CTIME) != 0);
bool use_nanos = ((diff->diffcaps & GIT_DIFFCAPS_TRUST_NANOSECS) != 0);
git_index *index;
git_iterator_index(&index, info->new_iter);
status = GIT_DELTA_UNMODIFIED;
/* TODO: add check against index file st_mtime to avoid racy-git */
if (S_ISGITLINK(nmode)) {
if ((error = maybe_modified_submodule(&status, &noid, diff, info)) < 0)
return error;
......@@ -839,7 +839,8 @@ static int maybe_modified(
!diff_time_eq(&oitem->ctime, &nitem->ctime, use_nanos)) ||
oitem->ino != nitem->ino ||
oitem->uid != nitem->uid ||
oitem->gid != nitem->gid)
oitem->gid != nitem->gid ||
(index && nitem->mtime.seconds >= index->stamp.mtime))
{
status = GIT_DELTA_MODIFIED;
modified_uncertain = true;
......
......@@ -688,20 +688,59 @@ int git_index__changed_relative_to(
return !!git_oid_cmp(&index->checksum, checksum);
}
static bool is_racy_timestamp(git_time_t stamp, git_index_entry *entry)
{
/* Git special-cases submodules in the check */
if (S_ISGITLINK(entry->mode))
return false;
/* If we never read the index, we can't have this race either */
if (stamp == 0)
return false;
/* If the timestamp is the same or newer than the index, it's racy */
return ((int32_t) stamp) <= entry->mtime.seconds;
}
/*
* Force the next diff to take a look at those entries which have the
* same timestamp as the current index.
*/
static void truncate_racily_clean(git_index *index)
static int truncate_racily_clean(git_index *index)
{
size_t i;
int error;
git_index_entry *entry;
git_time_t ts = index->stamp.mtime;
git_diff_options diff_opts = GIT_DIFF_OPTIONS_INIT;
git_diff *diff;
/* Nothing to do if there's no repo to talk about */
if (!INDEX_OWNER(index))
return 0;
/* If there's no workdir, we can't know where to even check */
if (!git_repository_workdir(INDEX_OWNER(index)))
return 0;
diff_opts.flags |= GIT_DIFF_INCLUDE_TYPECHANGE | GIT_DIFF_IGNORE_SUBMODULES | GIT_DIFF_DISABLE_PATHSPEC_MATCH;
git_vector_foreach(&index->entries, i, entry) {
if (entry->mtime.seconds == ts || ts == 0)
if (!is_racy_timestamp(ts, entry))
continue;
diff_opts.pathspec.count = 1;
diff_opts.pathspec.strings = (char **) &entry->path;
if ((error = git_diff_index_to_workdir(&diff, INDEX_OWNER(index), index, &diff_opts)) < 0)
return error;
if (git_diff_num_deltas(diff) > 0)
entry->file_size = 0;
git_diff_free(diff);
}
return 0;
}
int git_index_write(git_index *index)
......
......@@ -1762,6 +1762,18 @@ int git_iterator_current_workdir_path(git_buf **path, git_iterator *iter)
return 0;
}
int git_iterator_index(git_index **out, git_iterator *iter)
{
workdir_iterator *wi = (workdir_iterator *)iter;
if (iter->type != GIT_ITERATOR_TYPE_WORKDIR)
*out = NULL;
*out = wi->index;
return 0;
}
int git_iterator_advance_over_with_status(
const git_index_entry **entryptr,
git_iterator_status_t *status,
......
......@@ -11,6 +11,7 @@
#include "git2/index.h"
#include "vector.h"
#include "buffer.h"
#include "ignore.h"
typedef struct git_iterator git_iterator;
......@@ -286,4 +287,11 @@ typedef enum {
extern int git_iterator_advance_over_with_status(
const git_index_entry **entry, git_iterator_status_t *status, git_iterator *iter);
/**
* Retrieve the index stored in the iterator.
*
* Only implemented for the workdir iterator
*/
extern int git_iterator_index(git_index **out, git_iterator *iter);
#endif
#include "clar_libgit2.h"
#include "buffer.h"
static git_repository *g_repo;
void test_diff_racy__initialize(void)
{
cl_git_pass(git_repository_init(&g_repo, "diff_racy", false));
}
void test_diff_racy__cleanup(void)
{
cl_git_sandbox_cleanup();
}
void test_diff_racy__diff(void)
{
git_index *index;
git_diff *diff;
git_buf path = GIT_BUF_INIT;
cl_git_pass(git_buf_joinpath(&path, git_repository_workdir(g_repo), "A"));
cl_git_mkfile(path.ptr, "A");
/* Put 'A' into the index */
cl_git_pass(git_repository_index(&index, g_repo));
cl_git_pass(git_index_add_bypath(index, "A"));
cl_git_pass(git_index_write(index));
cl_git_pass(git_diff_index_to_workdir(&diff, g_repo, index, NULL));
cl_assert_equal_i(0, git_diff_num_deltas(diff));
/* Change its contents quickly, so we get the same timestamp */
cl_git_mkfile(path.ptr, "B");
cl_git_pass(git_diff_index_to_workdir(&diff, g_repo, index, NULL));
cl_assert_equal_i(1, git_diff_num_deltas(diff));
}
......@@ -1623,6 +1623,8 @@ void test_diff_workdir__can_update_index(void)
/* now if we do it again, we should see fewer OID calculations */
/* tick again as the index updating from the previous diff might have reset the timestamp */
tick_index(index);
basic_diff_status(&diff, &opts);
cl_git_pass(git_diff_get_perfdata(&perf, diff));
......
#include "clar_libgit2.h"
#include "../checkout/checkout_helpers.h"
#include "buffer.h"
#include "index.h"
static git_repository *g_repo;
void test_index_racy__initialize(void)
{
cl_git_pass(git_repository_init(&g_repo, "diff_racy", false));
}
void test_index_racy__cleanup(void)
{
git_repository_free(g_repo);
g_repo = NULL;
cl_fixture_cleanup("diff_racy");
}
void test_index_racy__diff(void)
{
git_index *index;
git_diff *diff;
git_buf path = GIT_BUF_INIT;
cl_git_pass(git_buf_joinpath(&path, git_repository_workdir(g_repo), "A"));
cl_git_mkfile(path.ptr, "A");
/* Put 'A' into the index */
cl_git_pass(git_repository_index(&index, g_repo));
cl_git_pass(git_index_add_bypath(index, "A"));
cl_git_pass(git_index_write(index));
cl_git_pass(git_diff_index_to_workdir(&diff, g_repo, index, NULL));
cl_assert_equal_i(0, git_diff_num_deltas(diff));
git_diff_free(diff);
/* Change its contents quickly, so we get the same timestamp */
cl_git_mkfile(path.ptr, "B");
cl_git_pass(git_diff_index_to_workdir(&diff, g_repo, index, NULL));
cl_assert_equal_i(1, git_diff_num_deltas(diff));
git_index_free(index);
git_diff_free(diff);
git_buf_free(&path);
}
void test_index_racy__write_index_just_after_file(void)
{
git_index *index;
git_diff *diff;
git_buf path = GIT_BUF_INIT;
struct timeval times[2];
/* Make sure we do have a timestamp */
cl_git_pass(git_repository_index(&index, g_repo));
cl_git_pass(git_index_write(index));
cl_git_pass(git_buf_joinpath(&path, git_repository_workdir(g_repo), "A"));
cl_git_mkfile(path.ptr, "A");
/* Force the file's timestamp to be a second after we wrote the index */
times[0].tv_sec = index->stamp.mtime + 1;
times[0].tv_usec = 0;
times[1].tv_sec = index->stamp.mtime + 1;
times[1].tv_usec = 0;
cl_git_pass(p_utimes(path.ptr, times));
/*
* Put 'A' into the index, the size field will be filled,
* because the index' on-disk timestamp does not match the
* file's timestamp.
*/
cl_git_pass(git_index_add_bypath(index, "A"));
cl_git_pass(git_index_write(index));
cl_git_mkfile(path.ptr, "B");
/*
* Pretend this index' modification happend a second after the
* file update, and rewrite the file in that same second.
*/
times[0].tv_sec = index->stamp.mtime + 2;
times[0].tv_usec = 0;
times[1].tv_sec = index->stamp.mtime + 2;
times[0].tv_usec = 0;
cl_git_pass(p_utimes(git_index_path(index), times));
cl_git_pass(p_utimes(path.ptr, times));
cl_git_pass(git_index_read(index, true));
cl_git_pass(git_diff_index_to_workdir(&diff, g_repo, index, NULL));
cl_assert_equal_i(1, git_diff_num_deltas(diff));
git_buf_free(&path);
git_diff_free(diff);
git_index_free(index);
}
void test_index_racy__empty_file_after_smudge(void)
{
git_index *index;
git_diff *diff;
git_buf path = GIT_BUF_INIT;
int i, found_race = 0;
const git_index_entry *entry;
/* Make sure we do have a timestamp */
cl_git_pass(git_repository_index(&index, g_repo));
cl_git_pass(git_index_write(index));
cl_git_pass(git_buf_joinpath(&path, git_repository_workdir(g_repo), "A"));
/* Make sure writing the file, adding and rewriting happen in the same second */
for (i = 0; i < 10; i++) {
struct stat st;
cl_git_mkfile(path.ptr, "A");
cl_git_pass(git_index_add_bypath(index, "A"));
cl_git_mkfile(path.ptr, "B");
cl_git_pass(git_index_write(index));
cl_git_mkfile(path.ptr, "");
cl_git_pass(p_stat(path.ptr, &st));
cl_assert(entry = git_index_get_bypath(index, "A", 0));
if (entry->mtime.seconds == (int32_t) st.st_mtime) {
found_race = 1;
break;
}
}
if (!found_race)
cl_fail("failed to find race after 10 attempts");
cl_assert_equal_i(0, entry->file_size);
cl_git_pass(git_diff_index_to_workdir(&diff, g_repo, index, NULL));
cl_assert_equal_i(1, git_diff_num_deltas(diff));
}
......@@ -133,12 +133,25 @@ static void hack_index(char *files[])
struct stat statbuf;
git_buf path = GIT_BUF_INIT;
git_index_entry *entry;
struct timeval times[2];
time_t now;
size_t i;
/* Update the index to suggest that checkout placed these files on
* disk, keeping the object id but updating the cache, which will
* emulate a Git implementation's different filter.
*
* We set the file's timestamp to before now to pretend that
* it was an old checkout so we don't trigger the racy
* protections would would check the content.
*/
now = time(NULL);
times[0].tv_sec = now - 5;
times[0].tv_usec = 0;
times[1].tv_sec = now - 5;
times[1].tv_usec = 0;
for (i = 0, filename = files[i]; filename; filename = files[++i]) {
git_buf_clear(&path);
......@@ -146,6 +159,7 @@ static void hack_index(char *files[])
git_index_get_bypath(repo_index, filename, 0));
cl_git_pass(git_buf_printf(&path, "%s/%s", TEST_REPO_PATH, filename));
cl_git_pass(p_utimes(path.ptr, times));
cl_git_pass(p_stat(path.ptr, &statbuf));
entry->ctime.seconds = (git_time_t)statbuf.st_ctime;
......@@ -245,7 +259,6 @@ static int merge_differently_filtered_files(char *files[])
write_files(files);
hack_index(files);
repo_index->stamp.mtime = time(NULL) + 1;
cl_git_pass(git_index_write(repo_index));
error = merge_branch();
......
......@@ -985,6 +985,8 @@ void test_status_worktree__update_stat_cache_0(void)
opts.flags &= ~GIT_STATUS_OPT_UPDATE_INDEX;
/* tick again as the index updating from the previous diff might have reset the timestamp */
tick_index(index);
cl_git_pass(git_status_list_new(&status, repo, &opts));
check_status0(status);
cl_git_pass(git_status_list_get_perfdata(&perf, status));
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment