Commit 322c15ee by lhchavez

Make the pack and mwindow implementations data-race-free

This change fixes a packfile heap corruption that can happen when
interacting with multiple packfiles concurrently across multiple
threads. This is exacerbated by setting a lower mwindow open file limit.

This change:

* Renames most of the internal methods in pack.c to clearly indicate
  that they expect to be called with a certain lock held, making
  reasoning about the state of locks a bit easier.
* Splits the `git_pack_file` lock in two: the one in `git_pack_file`
  only protects the `index_map`. The protection to `git_mwindow_file` is
  now in that struct.
* Explicitly checks for freshness of the `git_pack_file` in
  `git_packfile_unpack_header`: this allows the mwindow implementation
  to close files whenever there is enough cache pressure, and
  `git_packfile_unpack_header` will reopen the packfile if needed.
* After a call to `p_munmap()`, the `data` and `len` fields are poisoned
  with `NULL` to make use-after-frees more evident and crash rather than
  being open to the possibility of heap corruption.
* Adds a test case to prevent this from regressing in the future.

Fixes: #5591
parent 4ae41f9c
...@@ -3,6 +3,19 @@ ...@@ -3,6 +3,19 @@
# consistent lock hierarchy that is easy to understand. # consistent lock hierarchy that is easy to understand.
deadlock:attr_cache_lock deadlock:attr_cache_lock
# git_mwindow_file_register has the possibility of evicting some files from the
# global cache. In order to avoid races and closing files that are currently
# being accessed, before evicting any file it will attempt to acquire that
# file's lock. Finally, git_mwindow_file_register is typically called with a
# file lock held, because the caller will use the fd in the mwf immediately
# after registering it. This causes ThreadSanitizer to observe different orders
# of acquisition of the mutex (which implies a possibility of a deadlock),
# _but_ since the files are added to the cache after other files have been
# evicted, there cannot be a case where mwf A is trying to be registered while
# evicting mwf B concurrently and viceversa: at most one of them can be present
# in the cache.
deadlock:git_mwindow_file_register
# When invoking the time/timezone functions from git_signature_now(), they # When invoking the time/timezone functions from git_signature_now(), they
# access libc methods that need to be instrumented to correctly analyze the # access libc methods that need to be instrumented to correctly analyze the
# data races. # data races.
......
...@@ -24,8 +24,6 @@ ...@@ -24,8 +24,6 @@
#include "zstream.h" #include "zstream.h"
#include "object.h" #include "object.h"
extern git_mutex git__mwindow_mutex;
size_t git_indexer__max_objects = UINT32_MAX; size_t git_indexer__max_objects = UINT32_MAX;
#define UINT31_MAX (0x7FFFFFFF) #define UINT31_MAX (0x7FFFFFFF)
...@@ -679,7 +677,7 @@ static int read_stream_object(git_indexer *idx, git_indexer_progress *stats) ...@@ -679,7 +677,7 @@ static int read_stream_object(git_indexer *idx, git_indexer_progress *stats)
return GIT_EBUFS; return GIT_EBUFS;
if (!idx->have_stream) { if (!idx->have_stream) {
error = git_packfile_unpack_header(&entry_size, &type, &idx->pack->mwf, &w, &idx->off); error = git_packfile_unpack_header(&entry_size, &type, idx->pack, &w, &idx->off);
if (error == GIT_EBUFS) { if (error == GIT_EBUFS) {
idx->off = entry_start; idx->off = entry_start;
return error; return error;
...@@ -970,7 +968,7 @@ static int fix_thin_pack(git_indexer *idx, git_indexer_progress *stats) ...@@ -970,7 +968,7 @@ static int fix_thin_pack(git_indexer *idx, git_indexer_progress *stats)
continue; continue;
curpos = delta->delta_off; curpos = delta->delta_off;
error = git_packfile_unpack_header(&size, &type, &idx->pack->mwf, &w, &curpos); error = git_packfile_unpack_header(&size, &type, idx->pack, &w, &curpos);
if (error < 0) if (error < 0)
return error; return error;
...@@ -1333,13 +1331,7 @@ void git_indexer_free(git_indexer *idx) ...@@ -1333,13 +1331,7 @@ void git_indexer_free(git_indexer *idx)
git_vector_free_deep(&idx->deltas); git_vector_free_deep(&idx->deltas);
if (!git_mutex_lock(&git__mwindow_mutex)) { git_packfile_free(idx->pack, !idx->pack_committed);
if (!idx->pack_committed)
git_packfile_close(idx->pack, true);
git_packfile_free(idx->pack);
git_mutex_unlock(&git__mwindow_mutex);
}
iter = 0; iter = 0;
while (git_oidmap_iterate((void **) &value, idx->expected_oids, &iter, &key) == 0) while (git_oidmap_iterate((void **) &value, idx->expected_oids, &iter, &key) == 0)
......
...@@ -29,10 +29,10 @@ size_t git_mwindow__window_size = DEFAULT_WINDOW_SIZE; ...@@ -29,10 +29,10 @@ size_t git_mwindow__window_size = DEFAULT_WINDOW_SIZE;
size_t git_mwindow__mapped_limit = DEFAULT_MAPPED_LIMIT; size_t git_mwindow__mapped_limit = DEFAULT_MAPPED_LIMIT;
size_t git_mwindow__file_limit = DEFAULT_FILE_LIMIT; size_t git_mwindow__file_limit = DEFAULT_FILE_LIMIT;
/* Mutex to control access */ /* Mutex to control access to `git_mwindow__mem_ctl` and `git__pack_cache`. */
git_mutex git__mwindow_mutex; git_mutex git__mwindow_mutex;
/* Whenever you want to read or modify this, grab git__mwindow_mutex */ /* Whenever you want to read or modify this, grab `git__mwindow_mutex` */
git_mwindow_ctl git_mwindow__mem_ctl; git_mwindow_ctl git_mwindow__mem_ctl;
/* Global list of mwindow files, to open packs once across repos */ /* Global list of mwindow files, to open packs once across repos */
...@@ -95,10 +95,9 @@ int git_mwindow_get_pack(struct git_pack_file **out, const char *path) ...@@ -95,10 +95,9 @@ int git_mwindow_get_pack(struct git_pack_file **out, const char *path)
error = git_strmap_set(git__pack_cache, pack->pack_name, pack); error = git_strmap_set(git__pack_cache, pack->pack_name, pack);
git_mutex_unlock(&git__mwindow_mutex); git_mutex_unlock(&git__mwindow_mutex);
if (error < 0) { if (error < 0) {
git_packfile_free(pack); git_packfile_free(pack, false);
return -1; return error;
} }
*out = pack; *out = pack;
...@@ -108,6 +107,7 @@ int git_mwindow_get_pack(struct git_pack_file **out, const char *path) ...@@ -108,6 +107,7 @@ int git_mwindow_get_pack(struct git_pack_file **out, const char *path)
int git_mwindow_put_pack(struct git_pack_file *pack) int git_mwindow_put_pack(struct git_pack_file *pack)
{ {
int count, error; int count, error;
struct git_pack_file *pack_to_delete = NULL;
if ((error = git_mutex_lock(&git__mwindow_mutex)) < 0) if ((error = git_mutex_lock(&git__mwindow_mutex)) < 0)
return error; return error;
...@@ -121,34 +121,19 @@ int git_mwindow_put_pack(struct git_pack_file *pack) ...@@ -121,34 +121,19 @@ int git_mwindow_put_pack(struct git_pack_file *pack)
count = git_atomic_dec(&pack->refcount); count = git_atomic_dec(&pack->refcount);
if (count == 0) { if (count == 0) {
git_strmap_delete(git__pack_cache, pack->pack_name); git_strmap_delete(git__pack_cache, pack->pack_name);
git_packfile_free(pack); pack_to_delete = pack;
} }
git_mutex_unlock(&git__mwindow_mutex); git_mutex_unlock(&git__mwindow_mutex);
return 0; git_packfile_free(pack_to_delete, false);
}
int git_mwindow_free_all(git_mwindow_file *mwf) return 0;
{
int error;
if (git_mutex_lock(&git__mwindow_mutex)) {
git_error_set(GIT_ERROR_THREAD, "unable to lock mwindow mutex");
return -1;
}
error = git_mwindow_free_all_locked(mwf);
git_mutex_unlock(&git__mwindow_mutex);
return error;
} }
/* /*
* Free all the windows in a sequence, typically because we're done * Free all the windows in a sequence, typically because we're done
* with the file * with the file. Needs to hold the git__mwindow_mutex.
*/ */
int git_mwindow_free_all_locked(git_mwindow_file *mwf) static int git_mwindow_free_all_locked(git_mwindow_file *mwf)
{ {
git_mwindow_ctl *ctl = &git_mwindow__mem_ctl; git_mwindow_ctl *ctl = &git_mwindow__mem_ctl;
size_t i; size_t i;
...@@ -184,6 +169,22 @@ int git_mwindow_free_all_locked(git_mwindow_file *mwf) ...@@ -184,6 +169,22 @@ int git_mwindow_free_all_locked(git_mwindow_file *mwf)
return 0; return 0;
} }
int git_mwindow_free_all(git_mwindow_file *mwf)
{
int error;
if (git_mutex_lock(&git__mwindow_mutex)) {
git_error_set(GIT_ERROR_THREAD, "unable to lock mwindow mutex");
return -1;
}
error = git_mwindow_free_all_locked(mwf);
git_mutex_unlock(&git__mwindow_mutex);
return error;
}
/* /*
* Check if a window 'win' contains the address 'offset' * Check if a window 'win' contains the address 'offset'
*/ */
...@@ -256,9 +257,9 @@ static bool git_mwindow_scan_recently_used( ...@@ -256,9 +257,9 @@ static bool git_mwindow_scan_recently_used(
/* /*
* Close the least recently used window (that is currently not being used) out * Close the least recently used window (that is currently not being used) out
* of all the files. Called under lock from new_window. * of all the files. Called under lock from new_window_locked.
*/ */
static int git_mwindow_close_lru_window(void) static int git_mwindow_close_lru_window_locked(void)
{ {
git_mwindow_ctl *ctl = &git_mwindow__mem_ctl; git_mwindow_ctl *ctl = &git_mwindow__mem_ctl;
git_mwindow_file *cur; git_mwindow_file *cur;
...@@ -292,13 +293,13 @@ static int git_mwindow_close_lru_window(void) ...@@ -292,13 +293,13 @@ static int git_mwindow_close_lru_window(void)
} }
/* /*
* Close the file that does not have any open windows AND whose * Finds the file that does not have any open windows AND whose
* most-recently-used window is the least-recently used one across all * most-recently-used window is the least-recently used one across all
* currently open files. * currently open files.
* *
* Called under lock from new_window. * Called under lock from new_window_locked.
*/ */
static int git_mwindow_close_lru_file(void) static int git_mwindow_find_lru_file_locked(git_mwindow_file **out)
{ {
git_mwindow_ctl *ctl = &git_mwindow__mem_ctl; git_mwindow_ctl *ctl = &git_mwindow__mem_ctl;
git_mwindow_file *lru_file = NULL, *current_file = NULL; git_mwindow_file *lru_file = NULL, *current_file = NULL;
...@@ -320,15 +321,12 @@ static int git_mwindow_close_lru_file(void) ...@@ -320,15 +321,12 @@ static int git_mwindow_close_lru_file(void)
return -1; return -1;
} }
git_mwindow_free_all_locked(lru_file); *out = lru_file;
p_close(lru_file->fd);
lru_file->fd = -1;
return 0; return 0;
} }
/* This gets called under lock from git_mwindow_open */ /* This gets called under lock from git_mwindow_open */
static git_mwindow *new_window( static git_mwindow *new_window_locked(
git_file fd, git_file fd,
off64_t size, off64_t size,
off64_t offset) off64_t offset)
...@@ -338,12 +336,11 @@ static git_mwindow *new_window( ...@@ -338,12 +336,11 @@ static git_mwindow *new_window(
off64_t len; off64_t len;
git_mwindow *w; git_mwindow *w;
w = git__malloc(sizeof(*w)); w = git__calloc(1, sizeof(*w));
if (w == NULL) if (w == NULL)
return NULL; return NULL;
memset(w, 0x0, sizeof(*w));
w->offset = (offset / walign) * walign; w->offset = (offset / walign) * walign;
len = size - w->offset; len = size - w->offset;
...@@ -353,7 +350,7 @@ static git_mwindow *new_window( ...@@ -353,7 +350,7 @@ static git_mwindow *new_window(
ctl->mapped += (size_t)len; ctl->mapped += (size_t)len;
while (git_mwindow__mapped_limit < ctl->mapped && while (git_mwindow__mapped_limit < ctl->mapped &&
git_mwindow_close_lru_window() == 0) /* nop */; git_mwindow_close_lru_window_locked() == 0) /* nop */;
/* /*
* We treat `mapped_limit` as a soft limit. If we can't find a * We treat `mapped_limit` as a soft limit. If we can't find a
...@@ -367,7 +364,7 @@ static git_mwindow *new_window( ...@@ -367,7 +364,7 @@ static git_mwindow *new_window(
* we're below our soft limits, so free up what we can and try again. * we're below our soft limits, so free up what we can and try again.
*/ */
while (git_mwindow_close_lru_window() == 0) while (git_mwindow_close_lru_window_locked() == 0)
/* nop */; /* nop */;
if (git_futils_mmap_ro(&w->window_map, fd, w->offset, (size_t)len) < 0) { if (git_futils_mmap_ro(&w->window_map, fd, w->offset, (size_t)len) < 0) {
...@@ -423,7 +420,7 @@ unsigned char *git_mwindow_open( ...@@ -423,7 +420,7 @@ unsigned char *git_mwindow_open(
* one. * one.
*/ */
if (!w) { if (!w) {
w = new_window(mwf->fd, mwf->size, offset); w = new_window_locked(mwf->fd, mwf->size, offset);
if (w == NULL) { if (w == NULL) {
git_mutex_unlock(&git__mwindow_mutex); git_mutex_unlock(&git__mwindow_mutex);
return NULL; return NULL;
...@@ -451,8 +448,11 @@ unsigned char *git_mwindow_open( ...@@ -451,8 +448,11 @@ unsigned char *git_mwindow_open(
int git_mwindow_file_register(git_mwindow_file *mwf) int git_mwindow_file_register(git_mwindow_file *mwf)
{ {
git_vector closed_files = GIT_VECTOR_INIT;
git_mwindow_ctl *ctl = &git_mwindow__mem_ctl; git_mwindow_ctl *ctl = &git_mwindow__mem_ctl;
int ret; int error;
size_t i;
git_mwindow_file *closed_file = NULL;
if (git_mutex_lock(&git__mwindow_mutex)) { if (git_mutex_lock(&git__mwindow_mutex)) {
git_error_set(GIT_ERROR_THREAD, "unable to lock mwindow mutex"); git_error_set(GIT_ERROR_THREAD, "unable to lock mwindow mutex");
...@@ -460,20 +460,48 @@ int git_mwindow_file_register(git_mwindow_file *mwf) ...@@ -460,20 +460,48 @@ int git_mwindow_file_register(git_mwindow_file *mwf)
} }
if (ctl->windowfiles.length == 0 && if (ctl->windowfiles.length == 0 &&
git_vector_init(&ctl->windowfiles, 8, NULL) < 0) { (error = git_vector_init(&ctl->windowfiles, 8, NULL)) < 0) {
git_mutex_unlock(&git__mwindow_mutex); git_mutex_unlock(&git__mwindow_mutex);
return -1; goto cleanup;
} }
if (git_mwindow__file_limit) { if (git_mwindow__file_limit) {
git_mwindow_file *lru_file;
while (git_mwindow__file_limit <= ctl->windowfiles.length && while (git_mwindow__file_limit <= ctl->windowfiles.length &&
git_mwindow_close_lru_file() == 0) /* nop */; git_mwindow_find_lru_file_locked(&lru_file) == 0) {
if ((error = git_vector_insert(&closed_files, lru_file)) < 0) {
/*
* Exceeding the file limit seems preferrable to being open to
* data races that can end up corrupting the heap.
*/
break;
}
git_mwindow_free_all_locked(lru_file);
}
} }
ret = git_vector_insert(&ctl->windowfiles, mwf); error = git_vector_insert(&ctl->windowfiles, mwf);
git_mutex_unlock(&git__mwindow_mutex); git_mutex_unlock(&git__mwindow_mutex);
if (error < 0)
goto cleanup;
return ret; /*
* Once we have released the global windowfiles lock, we can close each
* individual file. Before doing so, acquire that file's lock to avoid
* closing a file that is currently being used.
*/
git_vector_foreach(&closed_files, i, closed_file) {
error = git_mutex_lock(&closed_file->lock);
if (error < 0)
continue;
p_close(closed_file->fd);
closed_file->fd = -1;
git_mutex_unlock(&closed_file->lock);
}
cleanup:
git_vector_free(&closed_files);
return error;
} }
void git_mwindow_file_deregister(git_mwindow_file *mwf) void git_mwindow_file_deregister(git_mwindow_file *mwf)
......
...@@ -13,8 +13,6 @@ ...@@ -13,8 +13,6 @@
#include "map.h" #include "map.h"
#include "vector.h" #include "vector.h"
extern git_mutex git__mwindow_mutex;
typedef struct git_mwindow { typedef struct git_mwindow {
struct git_mwindow *next; struct git_mwindow *next;
git_map window_map; git_map window_map;
...@@ -24,6 +22,7 @@ typedef struct git_mwindow { ...@@ -24,6 +22,7 @@ typedef struct git_mwindow {
} git_mwindow; } git_mwindow;
typedef struct git_mwindow_file { typedef struct git_mwindow_file {
git_mutex lock; /* protects updates to fd */
git_mwindow *windows; git_mwindow *windows;
int fd; int fd;
off64_t size; off64_t size;
...@@ -41,7 +40,6 @@ typedef struct git_mwindow_ctl { ...@@ -41,7 +40,6 @@ typedef struct git_mwindow_ctl {
int git_mwindow_contains(git_mwindow *win, off64_t offset); int git_mwindow_contains(git_mwindow *win, off64_t offset);
int git_mwindow_free_all(git_mwindow_file *mwf); /* locks */ int git_mwindow_free_all(git_mwindow_file *mwf); /* locks */
int git_mwindow_free_all_locked(git_mwindow_file *mwf); /* run under lock */
unsigned char *git_mwindow_open(git_mwindow_file *mwf, git_mwindow **cursor, off64_t offset, size_t extra, unsigned int *left); unsigned char *git_mwindow_open(git_mwindow_file *mwf, git_mwindow **cursor, off64_t offset, size_t extra, unsigned int *left);
int git_mwindow_file_register(git_mwindow_file *mwf); int git_mwindow_file_register(git_mwindow_file *mwf);
void git_mwindow_file_deregister(git_mwindow_file *mwf); void git_mwindow_file_deregister(git_mwindow_file *mwf);
......
...@@ -85,7 +85,7 @@ typedef struct { ...@@ -85,7 +85,7 @@ typedef struct {
struct git_pack_file { struct git_pack_file {
git_mwindow_file mwf; git_mwindow_file mwf;
git_map index_map; git_map index_map;
git_mutex lock; /* protect updates to mwf and index_map */ git_mutex lock; /* protect updates to index_map */
git_atomic refcount; git_atomic refcount;
uint32_t num_objects; uint32_t num_objects;
...@@ -140,7 +140,7 @@ int git_packfile__name(char **out, const char *path); ...@@ -140,7 +140,7 @@ int git_packfile__name(char **out, const char *path);
int git_packfile_unpack_header( int git_packfile_unpack_header(
size_t *size_p, size_t *size_p,
git_object_t *type_p, git_object_t *type_p,
git_mwindow_file *mwf, struct git_pack_file *p,
git_mwindow **w_curs, git_mwindow **w_curs,
off64_t *curpos); off64_t *curpos);
...@@ -164,8 +164,7 @@ int get_delta_base( ...@@ -164,8 +164,7 @@ int get_delta_base(
git_object_t type, git_object_t type,
off64_t delta_obj_offset); off64_t delta_obj_offset);
void git_packfile_close(struct git_pack_file *p, bool unlink_packfile); void git_packfile_free(struct git_pack_file *p, bool unlink_packfile);
void git_packfile_free(struct git_pack_file *p);
int git_packfile_alloc(struct git_pack_file **pack_out, const char *path); int git_packfile_alloc(struct git_pack_file **pack_out, const char *path);
int git_pack_entry_find( int git_pack_entry_find(
......
...@@ -68,6 +68,8 @@ int p_munmap(git_map *map) ...@@ -68,6 +68,8 @@ int p_munmap(git_map *map)
{ {
GIT_ASSERT_ARG(map); GIT_ASSERT_ARG(map);
munmap(map->data, map->len); munmap(map->data, map->len);
map->data = NULL;
map->len = 0;
return 0; return 0;
} }
......
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
static size_t expected_open_mwindow_files = 0; static size_t expected_open_mwindow_files = 0;
static size_t original_mwindow_file_limit = 0; static size_t original_mwindow_file_limit = 0;
extern git_mutex git__mwindow_mutex;
extern git_mwindow_ctl git_mwindow__mem_ctl; extern git_mwindow_ctl git_mwindow__mem_ctl;
void test_pack_filelimit__initialize_tiny(void) void test_pack_filelimit__initialize_tiny(void)
......
#include "clar_libgit2.h"
#include "pool.h"
#include <git2.h>
#include "git2/sys/commit.h"
#include "git2/sys/mempack.h"
static size_t original_mwindow_file_limit = 0;
void test_pack_threadsafety__initialize(void)
{
size_t open_mwindow_files = 1;
cl_git_pass(git_libgit2_opts(GIT_OPT_GET_MWINDOW_FILE_LIMIT, &original_mwindow_file_limit));
cl_git_pass(git_libgit2_opts(GIT_OPT_SET_MWINDOW_FILE_LIMIT, open_mwindow_files));
}
void test_pack_threadsafety__cleanup(void)
{
cl_git_pass(git_libgit2_opts(GIT_OPT_SET_MWINDOW_FILE_LIMIT, original_mwindow_file_limit));
}
static void *get_status(void *arg)
{
const char *repo_path = (const char *)arg;
git_repository *repo;
git_status_list *status;
cl_git_pass(git_repository_open(&repo, repo_path));
cl_git_pass(git_status_list_new(&status, repo, NULL));
git_status_list_free(status);
git_repository_free(repo);
return NULL;
}
void test_pack_threadsafety__open_repo_in_multiple_threads(void)
{
#ifdef GIT_THREADS
const char *repo_path = cl_fixture("../..");
git_repository *repo;
git_thread threads[8];
size_t i;
/* If we can't open the libgit2 repo or if it isn't a full repo
* with proper history, just skip this test */
if (git_repository_open(&repo, repo_path) < 0)
cl_skip();
if (git_repository_is_shallow(repo))
cl_skip();
git_repository_free(repo);
for (i = 0; i < ARRAY_SIZE(threads); i++)
git_thread_create(&threads[i], get_status, (void *)repo_path);
for (i = 0; i < ARRAY_SIZE(threads); i++)
git_thread_join(&threads[i], NULL);
#else
cl_skip();
#endif
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment