Commit 9679df57 by lhchavez

mwindow: set limit on number of open files

There are some cases in which repositories accrue a large number of
packfiles. The existing mwindow limit applies only to the total size of
mmap'd files, not on their number. This leads to a situation in which
having lots of small packfiles could exhaust the allowed number of open
files, particularly on macOS, where the default ulimit is very low
(256).

This change adds a new configuration parameter
(GIT_OPT_SET_MWINDOW_FILE_LIMIT) that sets the maximum number of open
packfiles, with a default of 128. This is low enough so that even macOS
users should not hit it during normal use.

Based on PR #5386, originally written by @josharian.

Fixes: #2758
parent d43d490c

Too many changes to show.

To preserve performance only 1000 of 1000+ files are displayed.

......@@ -205,7 +205,9 @@ typedef enum {
GIT_OPT_GET_PACK_MAX_OBJECTS,
GIT_OPT_SET_PACK_MAX_OBJECTS,
GIT_OPT_DISABLE_PACK_KEEP_FILE_CHECKS,
GIT_OPT_ENABLE_HTTP_EXPECT_CONTINUE
GIT_OPT_ENABLE_HTTP_EXPECT_CONTINUE,
GIT_OPT_GET_MWINDOW_FILE_LIMIT,
GIT_OPT_SET_MWINDOW_FILE_LIMIT
} git_libgit2_opt_t;
/**
......@@ -227,8 +229,18 @@ typedef enum {
*
* * opts(GIT_OPT_SET_MWINDOW_MAPPED_LIMIT, size_t):
*
* >Set the maximum amount of memory that can be mapped at any time
* by the library
* > Set the maximum amount of memory that can be mapped at any time
* > by the library
*
* * opts(GIT_OPT_GET_MWINDOW_FILE_LIMIT, size_t *):
*
* > Get the maximum number of files that will be mapped at any time by the
* > library
*
* * opts(GIT_OPT_SET_MWINDOW_FILE_LIMIT, size_t):
*
* > Set the maximum number of files that can be mapped at any time
* > by the library
*
* * opts(GIT_OPT_GET_SEARCH_PATH, int level, git_buf *buf)
*
......
......@@ -22,8 +22,12 @@
#define DEFAULT_MAPPED_LIMIT \
((1024 * 1024) * (sizeof(void*) >= 8 ? 8192ULL : 256UL))
/* default ulimit -n on macOS is just 256 */
#define DEFAULT_FILE_LIMIT 128
size_t git_mwindow__window_size = DEFAULT_WINDOW_SIZE;
size_t git_mwindow__mapped_limit = DEFAULT_MAPPED_LIMIT;
size_t git_mwindow__file_limit = DEFAULT_FILE_LIMIT;
/* Whenever you want to read or modify this, grab git__mwindow_mutex */
static git_mwindow_ctl mem_ctl;
......@@ -201,23 +205,17 @@ static void git_mwindow_scan_lru(
}
/*
* Close the least recently used window. You should check to see if
* the file descriptors need closing from time to time. Called under
* lock from new_window.
* Close the least recently used window. Called under lock from new_window.
*/
static int git_mwindow_close_lru(git_mwindow_file *mwf)
static int git_mwindow_close_lru_window(void)
{
git_mwindow_ctl *ctl = &mem_ctl;
git_mwindow_file *cur;
size_t i;
git_mwindow *lru_w = NULL, *lru_l = NULL, **list = &mwf->windows;
/* FIXME: Does this give us any advantage? */
if(mwf->windows)
git_mwindow_scan_lru(mwf, &lru_w, &lru_l);
git_mwindow *lru_w = NULL, *lru_l = NULL, **list = NULL;
for (i = 0; i < ctl->windowfiles.length; ++i) {
git_vector_foreach(&ctl->windowfiles, i, cur) {
git_mwindow *last = lru_w;
git_mwindow_file *cur = git_vector_get(&ctl->windowfiles, i);
git_mwindow_scan_lru(cur, &lru_w, &lru_l);
if (lru_w != last)
list = &cur->windows;
......@@ -242,9 +240,38 @@ static int git_mwindow_close_lru(git_mwindow_file *mwf)
return 0;
}
/*
* Close the file that contains the least recently used window. Called under
* lock from new_window.
*/
static int git_mwindow_close_lru_file(void)
{
git_mwindow_ctl *ctl = &mem_ctl;
git_mwindow_file *lru_f = NULL, *cur;
size_t i;
git_mwindow *lru_w = NULL, *lru_l = NULL;
git_vector_foreach(&ctl->windowfiles, i, cur) {
git_mwindow *last = lru_w;
git_mwindow_scan_lru(cur, &lru_w, &lru_l);
if (lru_w != last)
lru_f = cur;
}
if (!lru_f) {
git_error_set(GIT_ERROR_OS, "failed to close memory window file; couldn't find LRU");
return -1;
}
git_mwindow_free_all_locked(lru_f);
p_close(lru_f->fd);
lru_f->fd = -1;
return 0;
}
/* This gets called under lock from git_mwindow_open */
static git_mwindow *new_window(
git_mwindow_file *mwf,
git_file fd,
off64_t size,
off64_t offset)
......@@ -269,7 +296,7 @@ static git_mwindow *new_window(
ctl->mapped += (size_t)len;
while (git_mwindow__mapped_limit < ctl->mapped &&
git_mwindow_close_lru(mwf) == 0) /* nop */;
git_mwindow_close_lru_window() == 0) /* nop */;
/*
* We treat `mapped_limit` as a soft limit. If we can't find a
......@@ -283,7 +310,7 @@ static git_mwindow *new_window(
* we're below our soft limits, so free up what we can and try again.
*/
while (git_mwindow_close_lru(mwf) == 0)
while (git_mwindow_close_lru_window() == 0)
/* nop */;
if (git_futils_mmap_ro(&w->window_map, fd, w->offset, (size_t)len) < 0) {
......@@ -339,7 +366,7 @@ unsigned char *git_mwindow_open(
* one.
*/
if (!w) {
w = new_window(mwf, mwf->fd, mwf->size, offset);
w = new_window(mwf->fd, mwf->size, offset);
if (w == NULL) {
git_mutex_unlock(&git__mwindow_mutex);
return NULL;
......@@ -381,6 +408,9 @@ int git_mwindow_file_register(git_mwindow_file *mwf)
return -1;
}
while (git_mwindow__file_limit <= ctl->windowfiles.length &&
git_mwindow_close_lru_file() == 0) /* nop */;
ret = git_vector_insert(&ctl->windowfiles, mwf);
git_mutex_unlock(&git__mwindow_mutex);
......
......@@ -59,6 +59,7 @@ int git_libgit2_features(void)
/* Declarations for tuneable settings */
extern size_t git_mwindow__window_size;
extern size_t git_mwindow__mapped_limit;
extern size_t git_mwindow__file_limit;
extern size_t git_indexer__max_objects;
extern bool git_disable_pack_keep_file_checks;
......@@ -124,6 +125,14 @@ int git_libgit2_opts(int key, ...)
*(va_arg(ap, size_t *)) = git_mwindow__mapped_limit;
break;
case GIT_OPT_SET_MWINDOW_FILE_LIMIT:
git_mwindow__file_limit = va_arg(ap, size_t);
break;
case GIT_OPT_GET_MWINDOW_FILE_LIMIT:
*(va_arg(ap, size_t *)) = git_mwindow__file_limit;
break;
case GIT_OPT_GET_SEARCH_PATH:
if ((error = config_level_to_sysdir(va_arg(ap, int))) >= 0) {
git_buf *out = va_arg(ap, git_buf *);
......
#include "clar_libgit2.h"
#include <git2.h>
void test_pack_filelimit__open_repo_with_1025_packfiles(void)
{
git_repository *repo;
git_revwalk *walk;
git_oid id;
int i;
/*
* This repository contains 1025 packfiles, each with one commit, one tree,
* and two blobs. The first blob (README.md) has the same content in all
* commits, but the second one (file.txt) has a different content in each
* commit.
*/
cl_git_pass(git_repository_open(&repo, cl_fixture("1025.git")));
cl_git_pass(git_revwalk_new(&walk, repo));
cl_git_pass(git_revwalk_sorting(walk, GIT_SORT_TOPOLOGICAL));
cl_git_pass(git_revwalk_push_ref(walk, "refs/heads/master"));
/*
* Walking the tree requires opening each of the 1025 packfiles. This should
* work in all platforms, including those where the default limit of open
* file descriptors is small (e.g. 256 in macOS).
*/
i = 0;
while (git_revwalk_next(&id, walk) == 0)
++i;
cl_assert_equal_i(1025, i);
git_revwalk_free(walk);
git_repository_free(repo);
}
ref: refs/heads/master
[core]
bare = true
repositoryformatversion = 0
filemode = true
Unnamed repository; edit this file 'description' to name the repository.
#!/bin/sh
#
# Place appropriately named executable hook scripts into this directory
# to intercept various actions that git takes. See `git help hooks` for
# more information.
# File patterns to ignore; see `git help ignore` for more information.
# Lines that start with '#' are comments.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment