Commit 0b7cdc02 by Russell Belfer

Add sorted cache data type

This adds a convenient new data type for caching the contents of
file in memory when each item in that file corresponds to a name
and you need to both be able to lookup items by name and iterate
over them in some sorted order.  The new data type has locks in
place to manage usage in a threaded environment.
parent 0f0f5655
#include "sortedcache.h"
GIT__USE_STRMAP;
int git_sortedcache_new(
git_sortedcache **out,
size_t item_path_offset,
git_sortedcache_free_item_fn free_item,
void *free_item_payload,
git_vector_cmp item_cmp,
const char *path)
{
git_sortedcache *sc;
size_t pathlen;
pathlen = path ? strlen(path) : 0;
sc = git__calloc(sizeof(git_sortedcache) + pathlen + 1, 1);
GITERR_CHECK_ALLOC(sc);
if (git_pool_init(&sc->pool, 1, 0) < 0 ||
git_vector_init(&sc->items, 4, item_cmp) < 0 ||
(sc->map = git_strmap_alloc()) == NULL)
goto fail;
if (git_mutex_init(&sc->lock)) {
giterr_set(GITERR_OS, "Failed to initialize mutex");
goto fail;
}
sc->item_path_offset = item_path_offset;
sc->free_item = free_item;
sc->free_item_payload = free_item_payload;
GIT_REFCOUNT_INC(sc);
if (pathlen)
memcpy(sc->path, path, pathlen);
*out = sc;
return 0;
fail:
if (sc->map)
git_strmap_free(sc->map);
git_vector_free(&sc->items);
git_pool_clear(&sc->pool);
git__free(sc);
return -1;
}
void git_sortedcache_incref(git_sortedcache *sc)
{
GIT_REFCOUNT_INC(sc);
}
static void sortedcache_clear(git_sortedcache *sc)
{
git_strmap_clear(sc->map);
if (sc->free_item) {
size_t i;
void *item;
git_vector_foreach(&sc->items, i, item) {
sc->free_item(sc->free_item_payload, item);
}
}
git_vector_clear(&sc->items);
git_pool_clear(&sc->pool);
}
static void sortedcache_free(git_sortedcache *sc)
{
if (git_mutex_lock(&sc->lock) < 0) {
giterr_set(GITERR_OS, "Unable to acquire mutex lock for free");
return;
}
sortedcache_clear(sc);
git_vector_free(&sc->items);
git_strmap_free(sc->map);
git_mutex_unlock(&sc->lock);
git_mutex_free(&sc->lock);
git__free(sc);
}
void git_sortedcache_free(git_sortedcache *sc)
{
if (!sc)
return;
GIT_REFCOUNT_DEC(sc, sortedcache_free);
}
static int sortedcache_copy_item(void *payload, void *tgt_item, void *src_item)
{
git_sortedcache *sc = payload;
/* path will already have been copied by upsert */
memcpy(tgt_item, src_item, sc->item_path_offset);
return 0;
}
/* copy a sorted cache */
int git_sortedcache_copy(
git_sortedcache **out,
git_sortedcache *src,
int (*copy_item)(void *payload, void *tgt_item, void *src_item),
void *payload)
{
git_sortedcache *tgt;
size_t i;
void *src_item, *tgt_item;
if (!copy_item) {
copy_item = sortedcache_copy_item;
payload = src;
}
if (git_sortedcache_new(
&tgt, src->item_path_offset,
src->free_item, src->free_item_payload,
src->items._cmp, src->path) < 0)
return -1;
if (git_sortedcache_lock(src) < 0) {
git_sortedcache_free(tgt);
return -1;
}
if (git_sortedcache_lock(tgt) < 0)
goto fail;
git_vector_foreach(&src->items, i, src_item) {
if (git_sortedcache_upsert(
&tgt_item, tgt, ((char *)src_item) + src->item_path_offset) < 0)
goto fail;
if (copy_item(payload, tgt_item, src_item) < 0)
goto fail;
}
git_sortedcache_unlock(tgt);
git_sortedcache_unlock(src);
*out = tgt;
return 0;
fail:
git_sortedcache_unlock(src);
git_sortedcache_free(tgt);
return -1;
}
/* release all items in sorted cache */
void git_sortedcache_clear(git_sortedcache *sc, bool lock)
{
if (lock && git_mutex_lock(&sc->lock) < 0) {
giterr_set(GITERR_OS, "Unable to acquire mutex lock for clear");
return;
}
sortedcache_clear(sc);
if (lock)
git_mutex_unlock(&sc->lock);
}
/* check file stamp to see if reload is required */
bool git_sortedcache_out_of_date(git_sortedcache *sc)
{
return (git_futils_filestamp_check(&sc->stamp, sc->path) != 0);
}
/* lock sortedcache while making modifications */
int git_sortedcache_lock(git_sortedcache *sc)
{
if (git_mutex_lock(&sc->lock) < 0) {
giterr_set(GITERR_OS, "Unable to acquire mutex lock");
return -1;
}
return 0;
}
/* unlock sorted cache when done with modifications */
int git_sortedcache_unlock(git_sortedcache *sc)
{
git_vector_sort(&sc->items);
git_mutex_unlock(&sc->lock);
return 0;
}
/* if the file has changed, lock cache and load file contents into buf;
* returns <0 on error, >0 if file has not changed
*/
int git_sortedcache_lockandload(git_sortedcache *sc, git_buf *buf)
{
int error, fd;
if ((error = git_sortedcache_lock(sc)) < 0)
return error;
if ((error = git_futils_filestamp_check(&sc->stamp, sc->path)) <= 0)
goto unlock;
if (!git__is_sizet(sc->stamp.size)) {
giterr_set(GITERR_INVALID, "Unable to load file larger than size_t");
error = -1;
goto unlock;
}
if ((fd = git_futils_open_ro(sc->path)) < 0) {
error = fd;
goto unlock;
}
if (buf)
error = git_futils_readbuffer_fd(buf, fd, (size_t)sc->stamp.size);
(void)p_close(fd);
if (error < 0)
goto unlock;
return 1; /* return 1 -> file needs reload and was successfully loaded */
unlock:
git_sortedcache_unlock(sc);
return error;
}
/* find and/or insert item, returning pointer to item data */
int git_sortedcache_upsert(
void **out, git_sortedcache *sc, const char *key)
{
int error = 0;
khiter_t pos;
void *item;
size_t keylen;
char *item_key;
pos = git_strmap_lookup_index(sc->map, key);
if (git_strmap_valid_index(sc->map, pos)) {
item = git_strmap_value_at(sc->map, pos);
goto done;
}
keylen = strlen(key);
item = git_pool_mallocz(&sc->pool, sc->item_path_offset + keylen + 1);
GITERR_CHECK_ALLOC(item);
/* one strange thing is that even if the vector or hash table insert
* fail, there is no way to free the pool item so we just abandon it
*/
item_key = ((char *)item) + sc->item_path_offset;
memcpy(item_key, key, keylen);
pos = kh_put(str, sc->map, item_key, &error);
if (error < 0)
goto done;
if (!error)
kh_key(sc->map, pos) = item_key;
kh_val(sc->map, pos) = item;
error = git_vector_insert(&sc->items, item);
if (error < 0)
git_strmap_delete_at(sc->map, pos);
done:
if (out)
*out = !error ? item : NULL;
return error;
}
/* lookup item by key */
void *git_sortedcache_lookup(const git_sortedcache *sc, const char *key)
{
khiter_t pos = git_strmap_lookup_index(sc->map, key);
if (git_strmap_valid_index(sc->map, pos))
return git_strmap_value_at(sc->map, pos);
return NULL;
}
/* find out how many items are in the cache */
size_t git_sortedcache_entrycount(const git_sortedcache *sc)
{
return git_vector_length(&sc->items);
}
/* lookup item by index */
void *git_sortedcache_entry(const git_sortedcache *sc, size_t pos)
{
return git_vector_get(&sc->items, pos);
}
/*
* Copyright (C) the libgit2 contributors. All rights reserved.
*
* This file is part of libgit2, distributed under the GNU GPL v2 with
* a Linking Exception. For full terms see the included COPYING file.
*/
#ifndef INCLUDE_sorted_cache_h__
#define INCLUDE_sorted_cache_h__
#include "util.h"
#include "fileops.h"
#include "vector.h"
#include "thread-utils.h"
#include "pool.h"
#include "strmap.h"
/*
* The purpose of this data structure is to cache the parsed contents of a
* file where each item in the file can be identified by a key string and
* you want to both look them up by name and traverse them in sorted
* order. Each item is assumed to itself end in a GIT_FLEX_ARRAY.
*/
typedef void (*git_sortedcache_free_item_fn)(void *payload, void *item);
typedef struct {
git_refcount rc;
git_mutex lock;
size_t item_path_offset;
git_sortedcache_free_item_fn free_item;
void *free_item_payload;
git_pool pool;
git_vector items;
git_strmap *map;
git_futils_filestamp stamp;
char path[GIT_FLEX_ARRAY];
} git_sortedcache;
/* create a new sortedcache
*
* even though every sortedcache stores items with a GIT_FLEX_ARRAY at
* the end containing their key string, you have to provide the item_cmp
* sorting function because the sorting function doesn't get a payload
* and therefore can't know the offset to the item key string. :-(
*/
int git_sortedcache_new(
git_sortedcache **out,
size_t item_path_offset, /* use offsetof() macro */
git_sortedcache_free_item_fn free_item,
void *free_item_payload,
git_vector_cmp item_cmp,
const char *path);
/* copy a sorted cache
*
* - copy_item can be NULL to memcpy
* - locks src while copying
*/
int git_sortedcache_copy(
git_sortedcache **out,
git_sortedcache *src,
int (*copy_item)(void *payload, void *tgt_item, void *src_item),
void *payload);
/* free sorted cache (first calling free_item callbacks) */
void git_sortedcache_free(git_sortedcache *sc);
/* increment reference count */
void git_sortedcache_incref(git_sortedcache *sc);
/* release all items in sorted cache - lock during clear if lock is true */
void git_sortedcache_clear(git_sortedcache *sc, bool lock);
/* check file stamp to see if reload is required */
bool git_sortedcache_out_of_date(git_sortedcache *sc);
/* lock sortedcache while making modifications */
int git_sortedcache_lock(git_sortedcache *sc);
/* unlock sorted cache when done with modifications */
int git_sortedcache_unlock(git_sortedcache *sc);
/* if the file has changed, lock cache and load file contents into buf;
* @return 0 if up-to-date, 1 if out-of-date, <0 on error
*/
int git_sortedcache_lockandload(git_sortedcache *sc, git_buf *buf);
/* find and/or insert item, returning pointer to item data - lock first */
int git_sortedcache_upsert(
void **out, git_sortedcache *sc, const char *key);
/* lookup item by key */
void *git_sortedcache_lookup(const git_sortedcache *sc, const char *key);
/* find out how many items are in the cache */
size_t git_sortedcache_entrycount(const git_sortedcache *sc);
/* lookup item by index */
void *git_sortedcache_entry(const git_sortedcache *sc, size_t pos);
#endif
...@@ -55,6 +55,11 @@ GIT_INLINE(void *) git_vector_get(const git_vector *v, size_t position) ...@@ -55,6 +55,11 @@ GIT_INLINE(void *) git_vector_get(const git_vector *v, size_t position)
#define GIT_VECTOR_GET(V,I) ((I) < (V)->length ? (V)->contents[(I)] : NULL) #define GIT_VECTOR_GET(V,I) ((I) < (V)->length ? (V)->contents[(I)] : NULL)
GIT_INLINE(size_t) git_vector_length(const git_vector *v)
{
return v->length;
}
GIT_INLINE(void *) git_vector_last(const git_vector *v) GIT_INLINE(void *) git_vector_last(const git_vector *v)
{ {
return (v->length > 0) ? git_vector_get(v, v->length - 1) : NULL; return (v->length > 0) ? git_vector_get(v, v->length - 1) : NULL;
......
#include "clar_libgit2.h"
#include "sortedcache.h"
static int name_only_cmp(const void *a, const void *b)
{
return strcmp(a, b);
}
void test_core_sortedcache__name_only(void)
{
git_sortedcache *sc;
void *item;
cl_git_pass(git_sortedcache_new(
&sc, 0, NULL, NULL, name_only_cmp, NULL));
cl_git_pass(git_sortedcache_lock(sc));
cl_git_pass(git_sortedcache_upsert(&item, sc, "aaa"));
cl_git_pass(git_sortedcache_upsert(&item, sc, "bbb"));
cl_git_pass(git_sortedcache_upsert(&item, sc, "zzz"));
cl_git_pass(git_sortedcache_upsert(&item, sc, "mmm"));
cl_git_pass(git_sortedcache_upsert(&item, sc, "iii"));
cl_git_pass(git_sortedcache_unlock(sc));
cl_assert_equal_sz(5, git_sortedcache_entrycount(sc));
cl_assert((item = git_sortedcache_lookup(sc, "aaa")) != NULL);
cl_assert_equal_s("aaa", item);
cl_assert((item = git_sortedcache_lookup(sc, "mmm")) != NULL);
cl_assert_equal_s("mmm", item);
cl_assert((item = git_sortedcache_lookup(sc, "zzz")) != NULL);
cl_assert_equal_s("zzz", item);
cl_assert(git_sortedcache_lookup(sc, "qqq") == NULL);
cl_assert((item = git_sortedcache_entry(sc, 0)) != NULL);
cl_assert_equal_s("aaa", item);
cl_assert((item = git_sortedcache_entry(sc, 1)) != NULL);
cl_assert_equal_s("bbb", item);
cl_assert((item = git_sortedcache_entry(sc, 2)) != NULL);
cl_assert_equal_s("iii", item);
cl_assert((item = git_sortedcache_entry(sc, 3)) != NULL);
cl_assert_equal_s("mmm", item);
cl_assert((item = git_sortedcache_entry(sc, 4)) != NULL);
cl_assert_equal_s("zzz", item);
cl_assert(git_sortedcache_entry(sc, 5) == NULL);
git_sortedcache_clear(sc, true);
cl_assert_equal_sz(0, git_sortedcache_entrycount(sc));
cl_assert(git_sortedcache_entry(sc, 0) == NULL);
cl_assert(git_sortedcache_lookup(sc, "aaa") == NULL);
cl_assert(git_sortedcache_entry(sc, 0) == NULL);
git_sortedcache_free(sc);
}
typedef struct {
int value;
char smaller_value;
char path[GIT_FLEX_ARRAY];
} sortedcache_test_struct;
static int sortedcache_test_struct_cmp(const void *a_, const void *b_)
{
const sortedcache_test_struct *a = a_, *b = b_;
return strcmp(a->path, b->path);
}
static void sortedcache_test_struct_free(void *payload, void *item_)
{
sortedcache_test_struct *item = item_;
int *count = payload;
(*count)++;
item->smaller_value = 0;
}
void test_core_sortedcache__in_memory(void)
{
git_sortedcache *sc;
sortedcache_test_struct *item;
int free_count = 0;
cl_git_pass(git_sortedcache_new(
&sc, offsetof(sortedcache_test_struct, path),
sortedcache_test_struct_free, &free_count,
sortedcache_test_struct_cmp, NULL));
cl_git_pass(git_sortedcache_lock(sc));
cl_git_pass(git_sortedcache_upsert((void **)&item, sc, "aaa"));
item->value = 10;
item->smaller_value = 1;
cl_git_pass(git_sortedcache_upsert((void **)&item, sc, "bbb"));
item->value = 20;
item->smaller_value = 2;
cl_git_pass(git_sortedcache_upsert((void **)&item, sc, "zzz"));
item->value = 30;
item->smaller_value = 26;
cl_git_pass(git_sortedcache_upsert((void **)&item, sc, "mmm"));
item->value = 40;
item->smaller_value = 14;
cl_git_pass(git_sortedcache_upsert((void **)&item, sc, "iii"));
item->value = 50;
item->smaller_value = 9;
cl_git_pass(git_sortedcache_unlock(sc));
cl_assert_equal_sz(5, git_sortedcache_entrycount(sc));
cl_assert((item = git_sortedcache_lookup(sc, "aaa")) != NULL);
cl_assert_equal_s("aaa", item->path);
cl_assert_equal_i(10, item->value);
cl_assert((item = git_sortedcache_lookup(sc, "mmm")) != NULL);
cl_assert_equal_s("mmm", item->path);
cl_assert_equal_i(40, item->value);
cl_assert((item = git_sortedcache_lookup(sc, "zzz")) != NULL);
cl_assert_equal_s("zzz", item->path);
cl_assert_equal_i(30, item->value);
cl_assert(git_sortedcache_lookup(sc, "abc") == NULL);
cl_assert((item = git_sortedcache_entry(sc, 0)) != NULL);
cl_assert_equal_s("aaa", item->path);
cl_assert_equal_i(10, item->value);
cl_assert((item = git_sortedcache_entry(sc, 1)) != NULL);
cl_assert_equal_s("bbb", item->path);
cl_assert_equal_i(20, item->value);
cl_assert((item = git_sortedcache_entry(sc, 2)) != NULL);
cl_assert_equal_s("iii", item->path);
cl_assert_equal_i(50, item->value);
cl_assert((item = git_sortedcache_entry(sc, 3)) != NULL);
cl_assert_equal_s("mmm", item->path);
cl_assert_equal_i(40, item->value);
cl_assert((item = git_sortedcache_entry(sc, 4)) != NULL);
cl_assert_equal_s("zzz", item->path);
cl_assert_equal_i(30, item->value);
cl_assert(git_sortedcache_entry(sc, 5) == NULL);
cl_assert_equal_i(0, free_count);
git_sortedcache_clear(sc, true);
cl_assert_equal_i(5, free_count);
cl_assert_equal_sz(0, git_sortedcache_entrycount(sc));
cl_assert(git_sortedcache_entry(sc, 0) == NULL);
cl_assert(git_sortedcache_lookup(sc, "aaa") == NULL);
cl_assert(git_sortedcache_entry(sc, 0) == NULL);
free_count = 0;
cl_git_pass(git_sortedcache_lock(sc));
cl_git_pass(git_sortedcache_upsert((void **)&item, sc, "testing"));
item->value = 10;
item->smaller_value = 3;
cl_git_pass(git_sortedcache_upsert((void **)&item, sc, "again"));
item->value = 20;
item->smaller_value = 1;
cl_git_pass(git_sortedcache_upsert((void **)&item, sc, "final"));
item->value = 30;
item->smaller_value = 2;
cl_git_pass(git_sortedcache_unlock(sc));
cl_assert_equal_sz(3, git_sortedcache_entrycount(sc));
cl_assert((item = git_sortedcache_lookup(sc, "testing")) != NULL);
cl_assert_equal_s("testing", item->path);
cl_assert_equal_i(10, item->value);
cl_assert((item = git_sortedcache_lookup(sc, "again")) != NULL);
cl_assert_equal_s("again", item->path);
cl_assert_equal_i(20, item->value);
cl_assert((item = git_sortedcache_lookup(sc, "final")) != NULL);
cl_assert_equal_s("final", item->path);
cl_assert_equal_i(30, item->value);
cl_assert(git_sortedcache_lookup(sc, "zzz") == NULL);
cl_assert((item = git_sortedcache_entry(sc, 0)) != NULL);
cl_assert_equal_s("again", item->path);
cl_assert_equal_i(20, item->value);
cl_assert((item = git_sortedcache_entry(sc, 1)) != NULL);
cl_assert_equal_s("final", item->path);
cl_assert_equal_i(30, item->value);
cl_assert((item = git_sortedcache_entry(sc, 2)) != NULL);
cl_assert_equal_s("testing", item->path);
cl_assert_equal_i(10, item->value);
cl_assert(git_sortedcache_entry(sc, 3) == NULL);
git_sortedcache_free(sc);
cl_assert_equal_i(3, free_count);
}
static void sortedcache_test_reload(git_sortedcache *sc)
{
int count = 0;
git_buf buf = GIT_BUF_INIT;
char *scan, *after;
sortedcache_test_struct *item;
cl_assert(git_sortedcache_lockandload(sc, &buf) > 0);
git_sortedcache_clear(sc, false); /* clear once we already have lock */
for (scan = buf.ptr; *scan; scan = after + 1) {
int val = strtol(scan, &after, 0);
cl_assert(after > scan);
scan = after;
for (scan = after; git__isspace(*scan); ++scan) /* find start */;
for (after = scan; *after && *after != '\n'; ++after) /* find eol */;
*after = '\0';
cl_git_pass(git_sortedcache_upsert((void **)&item, sc, scan));
item->value = val;
item->smaller_value = (char)(count++);
}
cl_git_pass(git_sortedcache_unlock(sc));
git_buf_free(&buf);
}
void test_core_sortedcache__on_disk(void)
{
git_sortedcache *sc;
sortedcache_test_struct *item;
int free_count = 0;
cl_git_mkfile("cacheitems.txt", "10 abc\n20 bcd\n30 cde\n");
cl_git_pass(git_sortedcache_new(
&sc, offsetof(sortedcache_test_struct, path),
sortedcache_test_struct_free, &free_count,
sortedcache_test_struct_cmp, "cacheitems.txt"));
/* should need to reload the first time */
sortedcache_test_reload(sc);
/* test what we loaded */
cl_assert_equal_sz(3, git_sortedcache_entrycount(sc));
cl_assert((item = git_sortedcache_lookup(sc, "abc")) != NULL);
cl_assert_equal_s("abc", item->path);
cl_assert_equal_i(10, item->value);
cl_assert((item = git_sortedcache_lookup(sc, "cde")) != NULL);
cl_assert_equal_s("cde", item->path);
cl_assert_equal_i(30, item->value);
cl_assert(git_sortedcache_lookup(sc, "aaa") == NULL);
cl_assert((item = git_sortedcache_entry(sc, 0)) != NULL);
cl_assert_equal_s("abc", item->path);
cl_assert_equal_i(10, item->value);
cl_assert((item = git_sortedcache_entry(sc, 1)) != NULL);
cl_assert_equal_s("bcd", item->path);
cl_assert_equal_i(20, item->value);
cl_assert(git_sortedcache_entry(sc, 3) == NULL);
/* should not need to reload this time */
cl_assert_equal_i(0, git_sortedcache_lockandload(sc, NULL));
/* rewrite ondisk file and reload */
cl_assert_equal_i(0, free_count);
cl_git_rewritefile(
"cacheitems.txt", "100 abc\n200 zzz\n500 aaa\n10 final\n");
sortedcache_test_reload(sc);
cl_assert_equal_i(3, free_count);
/* test what we loaded */
cl_assert_equal_sz(4, git_sortedcache_entrycount(sc));
cl_assert((item = git_sortedcache_lookup(sc, "abc")) != NULL);
cl_assert_equal_s("abc", item->path);
cl_assert_equal_i(100, item->value);
cl_assert((item = git_sortedcache_lookup(sc, "final")) != NULL);
cl_assert_equal_s("final", item->path);
cl_assert_equal_i(10, item->value);
cl_assert(git_sortedcache_lookup(sc, "cde") == NULL);
cl_assert((item = git_sortedcache_entry(sc, 0)) != NULL);
cl_assert_equal_s("aaa", item->path);
cl_assert_equal_i(500, item->value);
cl_assert((item = git_sortedcache_entry(sc, 2)) != NULL);
cl_assert_equal_s("final", item->path);
cl_assert_equal_i(10, item->value);
cl_assert((item = git_sortedcache_entry(sc, 3)) != NULL);
cl_assert_equal_s("zzz", item->path);
cl_assert_equal_i(200, item->value);
git_sortedcache_free(sc);
cl_assert_equal_i(7, free_count);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment