Commit 322c15ee by lhchavez

Make the pack and mwindow implementations data-race-free

This change fixes a packfile heap corruption that can happen when
interacting with multiple packfiles concurrently across multiple
threads. This is exacerbated by setting a lower mwindow open file limit.

This change:

* Renames most of the internal methods in pack.c to clearly indicate
  that they expect to be called with a certain lock held, making
  reasoning about the state of locks a bit easier.
* Splits the `git_pack_file` lock in two: the one in `git_pack_file`
  only protects the `index_map`. The protection to `git_mwindow_file` is
  now in that struct.
* Explicitly checks for freshness of the `git_pack_file` in
  `git_packfile_unpack_header`: this allows the mwindow implementation
  to close files whenever there is enough cache pressure, and
  `git_packfile_unpack_header` will reopen the packfile if needed.
* After a call to `p_munmap()`, the `data` and `len` fields are poisoned
  with `NULL` to make use-after-frees more evident and crash rather than
  being open to the possibility of heap corruption.
* Adds a test case to prevent this from regressing in the future.

Fixes: #5591
parent 4ae41f9c
......@@ -3,6 +3,19 @@
# consistent lock hierarchy that is easy to understand.
deadlock:attr_cache_lock
# git_mwindow_file_register has the possibility of evicting some files from the
# global cache. In order to avoid races and closing files that are currently
# being accessed, before evicting any file it will attempt to acquire that
# file's lock. Finally, git_mwindow_file_register is typically called with a
# file lock held, because the caller will use the fd in the mwf immediately
# after registering it. This causes ThreadSanitizer to observe different orders
# of acquisition of the mutex (which implies a possibility of a deadlock),
# _but_ since the files are added to the cache after other files have been
# evicted, there cannot be a case where mwf A is trying to be registered while
# evicting mwf B concurrently and viceversa: at most one of them can be present
# in the cache.
deadlock:git_mwindow_file_register
# When invoking the time/timezone functions from git_signature_now(), they
# access libc methods that need to be instrumented to correctly analyze the
# data races.
......
......@@ -24,8 +24,6 @@
#include "zstream.h"
#include "object.h"
extern git_mutex git__mwindow_mutex;
size_t git_indexer__max_objects = UINT32_MAX;
#define UINT31_MAX (0x7FFFFFFF)
......@@ -679,7 +677,7 @@ static int read_stream_object(git_indexer *idx, git_indexer_progress *stats)
return GIT_EBUFS;
if (!idx->have_stream) {
error = git_packfile_unpack_header(&entry_size, &type, &idx->pack->mwf, &w, &idx->off);
error = git_packfile_unpack_header(&entry_size, &type, idx->pack, &w, &idx->off);
if (error == GIT_EBUFS) {
idx->off = entry_start;
return error;
......@@ -970,7 +968,7 @@ static int fix_thin_pack(git_indexer *idx, git_indexer_progress *stats)
continue;
curpos = delta->delta_off;
error = git_packfile_unpack_header(&size, &type, &idx->pack->mwf, &w, &curpos);
error = git_packfile_unpack_header(&size, &type, idx->pack, &w, &curpos);
if (error < 0)
return error;
......@@ -1333,13 +1331,7 @@ void git_indexer_free(git_indexer *idx)
git_vector_free_deep(&idx->deltas);
if (!git_mutex_lock(&git__mwindow_mutex)) {
if (!idx->pack_committed)
git_packfile_close(idx->pack, true);
git_packfile_free(idx->pack);
git_mutex_unlock(&git__mwindow_mutex);
}
git_packfile_free(idx->pack, !idx->pack_committed);
iter = 0;
while (git_oidmap_iterate((void **) &value, idx->expected_oids, &iter, &key) == 0)
......
......@@ -29,10 +29,10 @@ size_t git_mwindow__window_size = DEFAULT_WINDOW_SIZE;
size_t git_mwindow__mapped_limit = DEFAULT_MAPPED_LIMIT;
size_t git_mwindow__file_limit = DEFAULT_FILE_LIMIT;
/* Mutex to control access */
/* Mutex to control access to `git_mwindow__mem_ctl` and `git__pack_cache`. */
git_mutex git__mwindow_mutex;
/* Whenever you want to read or modify this, grab git__mwindow_mutex */
/* Whenever you want to read or modify this, grab `git__mwindow_mutex` */
git_mwindow_ctl git_mwindow__mem_ctl;
/* Global list of mwindow files, to open packs once across repos */
......@@ -95,10 +95,9 @@ int git_mwindow_get_pack(struct git_pack_file **out, const char *path)
error = git_strmap_set(git__pack_cache, pack->pack_name, pack);
git_mutex_unlock(&git__mwindow_mutex);
if (error < 0) {
git_packfile_free(pack);
return -1;
git_packfile_free(pack, false);
return error;
}
*out = pack;
......@@ -108,6 +107,7 @@ int git_mwindow_get_pack(struct git_pack_file **out, const char *path)
int git_mwindow_put_pack(struct git_pack_file *pack)
{
int count, error;
struct git_pack_file *pack_to_delete = NULL;
if ((error = git_mutex_lock(&git__mwindow_mutex)) < 0)
return error;
......@@ -121,34 +121,19 @@ int git_mwindow_put_pack(struct git_pack_file *pack)
count = git_atomic_dec(&pack->refcount);
if (count == 0) {
git_strmap_delete(git__pack_cache, pack->pack_name);
git_packfile_free(pack);
pack_to_delete = pack;
}
git_mutex_unlock(&git__mwindow_mutex);
return 0;
}
git_packfile_free(pack_to_delete, false);
int git_mwindow_free_all(git_mwindow_file *mwf)
{
int error;
if (git_mutex_lock(&git__mwindow_mutex)) {
git_error_set(GIT_ERROR_THREAD, "unable to lock mwindow mutex");
return -1;
}
error = git_mwindow_free_all_locked(mwf);
git_mutex_unlock(&git__mwindow_mutex);
return error;
return 0;
}
/*
* Free all the windows in a sequence, typically because we're done
* with the file
* with the file. Needs to hold the git__mwindow_mutex.
*/
int git_mwindow_free_all_locked(git_mwindow_file *mwf)
static int git_mwindow_free_all_locked(git_mwindow_file *mwf)
{
git_mwindow_ctl *ctl = &git_mwindow__mem_ctl;
size_t i;
......@@ -184,6 +169,22 @@ int git_mwindow_free_all_locked(git_mwindow_file *mwf)
return 0;
}
int git_mwindow_free_all(git_mwindow_file *mwf)
{
int error;
if (git_mutex_lock(&git__mwindow_mutex)) {
git_error_set(GIT_ERROR_THREAD, "unable to lock mwindow mutex");
return -1;
}
error = git_mwindow_free_all_locked(mwf);
git_mutex_unlock(&git__mwindow_mutex);
return error;
}
/*
* Check if a window 'win' contains the address 'offset'
*/
......@@ -256,9 +257,9 @@ static bool git_mwindow_scan_recently_used(
/*
* Close the least recently used window (that is currently not being used) out
* of all the files. Called under lock from new_window.
* of all the files. Called under lock from new_window_locked.
*/
static int git_mwindow_close_lru_window(void)
static int git_mwindow_close_lru_window_locked(void)
{
git_mwindow_ctl *ctl = &git_mwindow__mem_ctl;
git_mwindow_file *cur;
......@@ -292,13 +293,13 @@ static int git_mwindow_close_lru_window(void)
}
/*
* Close the file that does not have any open windows AND whose
* Finds the file that does not have any open windows AND whose
* most-recently-used window is the least-recently used one across all
* currently open files.
*
* Called under lock from new_window.
* Called under lock from new_window_locked.
*/
static int git_mwindow_close_lru_file(void)
static int git_mwindow_find_lru_file_locked(git_mwindow_file **out)
{
git_mwindow_ctl *ctl = &git_mwindow__mem_ctl;
git_mwindow_file *lru_file = NULL, *current_file = NULL;
......@@ -320,15 +321,12 @@ static int git_mwindow_close_lru_file(void)
return -1;
}
git_mwindow_free_all_locked(lru_file);
p_close(lru_file->fd);
lru_file->fd = -1;
*out = lru_file;
return 0;
}
/* This gets called under lock from git_mwindow_open */
static git_mwindow *new_window(
static git_mwindow *new_window_locked(
git_file fd,
off64_t size,
off64_t offset)
......@@ -338,12 +336,11 @@ static git_mwindow *new_window(
off64_t len;
git_mwindow *w;
w = git__malloc(sizeof(*w));
w = git__calloc(1, sizeof(*w));
if (w == NULL)
return NULL;
memset(w, 0x0, sizeof(*w));
w->offset = (offset / walign) * walign;
len = size - w->offset;
......@@ -353,7 +350,7 @@ static git_mwindow *new_window(
ctl->mapped += (size_t)len;
while (git_mwindow__mapped_limit < ctl->mapped &&
git_mwindow_close_lru_window() == 0) /* nop */;
git_mwindow_close_lru_window_locked() == 0) /* nop */;
/*
* We treat `mapped_limit` as a soft limit. If we can't find a
......@@ -367,7 +364,7 @@ static git_mwindow *new_window(
* we're below our soft limits, so free up what we can and try again.
*/
while (git_mwindow_close_lru_window() == 0)
while (git_mwindow_close_lru_window_locked() == 0)
/* nop */;
if (git_futils_mmap_ro(&w->window_map, fd, w->offset, (size_t)len) < 0) {
......@@ -423,7 +420,7 @@ unsigned char *git_mwindow_open(
* one.
*/
if (!w) {
w = new_window(mwf->fd, mwf->size, offset);
w = new_window_locked(mwf->fd, mwf->size, offset);
if (w == NULL) {
git_mutex_unlock(&git__mwindow_mutex);
return NULL;
......@@ -451,8 +448,11 @@ unsigned char *git_mwindow_open(
int git_mwindow_file_register(git_mwindow_file *mwf)
{
git_vector closed_files = GIT_VECTOR_INIT;
git_mwindow_ctl *ctl = &git_mwindow__mem_ctl;
int ret;
int error;
size_t i;
git_mwindow_file *closed_file = NULL;
if (git_mutex_lock(&git__mwindow_mutex)) {
git_error_set(GIT_ERROR_THREAD, "unable to lock mwindow mutex");
......@@ -460,20 +460,48 @@ int git_mwindow_file_register(git_mwindow_file *mwf)
}
if (ctl->windowfiles.length == 0 &&
git_vector_init(&ctl->windowfiles, 8, NULL) < 0) {
(error = git_vector_init(&ctl->windowfiles, 8, NULL)) < 0) {
git_mutex_unlock(&git__mwindow_mutex);
return -1;
goto cleanup;
}
if (git_mwindow__file_limit) {
git_mwindow_file *lru_file;
while (git_mwindow__file_limit <= ctl->windowfiles.length &&
git_mwindow_close_lru_file() == 0) /* nop */;
git_mwindow_find_lru_file_locked(&lru_file) == 0) {
if ((error = git_vector_insert(&closed_files, lru_file)) < 0) {
/*
* Exceeding the file limit seems preferrable to being open to
* data races that can end up corrupting the heap.
*/
break;
}
git_mwindow_free_all_locked(lru_file);
}
}
ret = git_vector_insert(&ctl->windowfiles, mwf);
error = git_vector_insert(&ctl->windowfiles, mwf);
git_mutex_unlock(&git__mwindow_mutex);
if (error < 0)
goto cleanup;
return ret;
/*
* Once we have released the global windowfiles lock, we can close each
* individual file. Before doing so, acquire that file's lock to avoid
* closing a file that is currently being used.
*/
git_vector_foreach(&closed_files, i, closed_file) {
error = git_mutex_lock(&closed_file->lock);
if (error < 0)
continue;
p_close(closed_file->fd);
closed_file->fd = -1;
git_mutex_unlock(&closed_file->lock);
}
cleanup:
git_vector_free(&closed_files);
return error;
}
void git_mwindow_file_deregister(git_mwindow_file *mwf)
......
......@@ -13,8 +13,6 @@
#include "map.h"
#include "vector.h"
extern git_mutex git__mwindow_mutex;
typedef struct git_mwindow {
struct git_mwindow *next;
git_map window_map;
......@@ -24,6 +22,7 @@ typedef struct git_mwindow {
} git_mwindow;
typedef struct git_mwindow_file {
git_mutex lock; /* protects updates to fd */
git_mwindow *windows;
int fd;
off64_t size;
......@@ -41,7 +40,6 @@ typedef struct git_mwindow_ctl {
int git_mwindow_contains(git_mwindow *win, off64_t offset);
int git_mwindow_free_all(git_mwindow_file *mwf); /* locks */
int git_mwindow_free_all_locked(git_mwindow_file *mwf); /* run under lock */
unsigned char *git_mwindow_open(git_mwindow_file *mwf, git_mwindow **cursor, off64_t offset, size_t extra, unsigned int *left);
int git_mwindow_file_register(git_mwindow_file *mwf);
void git_mwindow_file_deregister(git_mwindow_file *mwf);
......
......@@ -85,7 +85,7 @@ typedef struct {
struct git_pack_file {
git_mwindow_file mwf;
git_map index_map;
git_mutex lock; /* protect updates to mwf and index_map */
git_mutex lock; /* protect updates to index_map */
git_atomic refcount;
uint32_t num_objects;
......@@ -140,7 +140,7 @@ int git_packfile__name(char **out, const char *path);
int git_packfile_unpack_header(
size_t *size_p,
git_object_t *type_p,
git_mwindow_file *mwf,
struct git_pack_file *p,
git_mwindow **w_curs,
off64_t *curpos);
......@@ -164,8 +164,7 @@ int get_delta_base(
git_object_t type,
off64_t delta_obj_offset);
void git_packfile_close(struct git_pack_file *p, bool unlink_packfile);
void git_packfile_free(struct git_pack_file *p);
void git_packfile_free(struct git_pack_file *p, bool unlink_packfile);
int git_packfile_alloc(struct git_pack_file **pack_out, const char *path);
int git_pack_entry_find(
......
......@@ -68,6 +68,8 @@ int p_munmap(git_map *map)
{
GIT_ASSERT_ARG(map);
munmap(map->data, map->len);
map->data = NULL;
map->len = 0;
return 0;
}
......
......@@ -8,6 +8,7 @@
static size_t expected_open_mwindow_files = 0;
static size_t original_mwindow_file_limit = 0;
extern git_mutex git__mwindow_mutex;
extern git_mwindow_ctl git_mwindow__mem_ctl;
void test_pack_filelimit__initialize_tiny(void)
......
#include "clar_libgit2.h"
#include "pool.h"
#include <git2.h>
#include "git2/sys/commit.h"
#include "git2/sys/mempack.h"
static size_t original_mwindow_file_limit = 0;
void test_pack_threadsafety__initialize(void)
{
size_t open_mwindow_files = 1;
cl_git_pass(git_libgit2_opts(GIT_OPT_GET_MWINDOW_FILE_LIMIT, &original_mwindow_file_limit));
cl_git_pass(git_libgit2_opts(GIT_OPT_SET_MWINDOW_FILE_LIMIT, open_mwindow_files));
}
void test_pack_threadsafety__cleanup(void)
{
cl_git_pass(git_libgit2_opts(GIT_OPT_SET_MWINDOW_FILE_LIMIT, original_mwindow_file_limit));
}
static void *get_status(void *arg)
{
const char *repo_path = (const char *)arg;
git_repository *repo;
git_status_list *status;
cl_git_pass(git_repository_open(&repo, repo_path));
cl_git_pass(git_status_list_new(&status, repo, NULL));
git_status_list_free(status);
git_repository_free(repo);
return NULL;
}
void test_pack_threadsafety__open_repo_in_multiple_threads(void)
{
#ifdef GIT_THREADS
const char *repo_path = cl_fixture("../..");
git_repository *repo;
git_thread threads[8];
size_t i;
/* If we can't open the libgit2 repo or if it isn't a full repo
* with proper history, just skip this test */
if (git_repository_open(&repo, repo_path) < 0)
cl_skip();
if (git_repository_is_shallow(repo))
cl_skip();
git_repository_free(repo);
for (i = 0; i < ARRAY_SIZE(threads); i++)
git_thread_create(&threads[i], get_status, (void *)repo_path);
for (i = 0; i < ARRAY_SIZE(threads); i++)
git_thread_join(&threads[i], NULL);
#else
cl_skip();
#endif
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment