Commit c16c8b9a by Russell Belfer

Adding stash to hashtable implementation

Adding a small stash of nodes with key conflicts has been
demonstrated to greatly increase the efficiency of a cuckoo
hashtable.  See:

  http://research.microsoft.com/pubs/73856/stash-full.9-30.pdf

for more details.
parent 25f258e7
...@@ -18,28 +18,37 @@ static git_hashtable_node *node_with_hash(git_hashtable *self, const void *key, ...@@ -18,28 +18,37 @@ static git_hashtable_node *node_with_hash(git_hashtable *self, const void *key,
static void node_swap_with(git_hashtable_node *self, git_hashtable_node *other); static void node_swap_with(git_hashtable_node *self, git_hashtable_node *other);
static int node_insert(git_hashtable *self, git_hashtable_node *new_node); static int node_insert(git_hashtable *self, git_hashtable_node *new_node);
static int insert_nodes(git_hashtable *self, git_hashtable_node *old_nodes, size_t old_size); static int insert_nodes(git_hashtable *self, git_hashtable_node *old_nodes, size_t old_size);
static void reinsert_stash(git_hashtable *self);
static int resize_to(git_hashtable *self, size_t new_size) static int resize_to(git_hashtable *self, size_t new_size)
{ {
git_hashtable_node *old_nodes = self->nodes; git_hashtable_node *old_nodes = self->nodes;
size_t old_size = self->size; size_t old_size = self->size;
git_hashtable_node old_stash[GIT_HASHTABLE_STASH_SIZE];
size_t old_stash_count = self->stash_count;
self->is_resizing = 1; self->is_resizing = 1;
if (old_stash_count > 0)
memcpy(old_stash, self->stash,
old_stash_count * sizeof(git_hashtable_node));
do { do {
self->size = new_size; self->size = new_size;
self->size_mask = new_size - 1; self->size_mask = new_size - 1;
self->key_count = 0; self->key_count = 0;
self->stash_count = 0;
self->nodes = git__calloc(1, sizeof(git_hashtable_node) * self->size); self->nodes = git__calloc(1, sizeof(git_hashtable_node) * self->size);
GITERR_CHECK_ALLOC(self->nodes); GITERR_CHECK_ALLOC(self->nodes);
if (insert_nodes(self, old_nodes, old_size) == 0) if (insert_nodes(self, old_nodes, old_size) == 0 &&
insert_nodes(self, old_stash, old_stash_count) == 0)
self->is_resizing = 0; self->is_resizing = 0;
else { else {
new_size *= 2; new_size *= 2;
git__free(self->nodes); git__free(self->nodes);
} }
} while(self->is_resizing); } while (self->is_resizing);
git__free(old_nodes); git__free(old_nodes);
return 0; return 0;
...@@ -47,26 +56,28 @@ static int resize_to(git_hashtable *self, size_t new_size) ...@@ -47,26 +56,28 @@ static int resize_to(git_hashtable *self, size_t new_size)
static int set_size(git_hashtable *self, size_t new_size) static int set_size(git_hashtable *self, size_t new_size)
{ {
self->nodes = git__realloc(self->nodes, new_size * sizeof(git_hashtable_node)); self->nodes =
git__realloc(self->nodes, new_size * sizeof(git_hashtable_node));
GITERR_CHECK_ALLOC(self->nodes); GITERR_CHECK_ALLOC(self->nodes);
if (new_size > self->size) { if (new_size > self->size)
memset(&self->nodes[self->size], 0x0, memset(&self->nodes[self->size], 0x0,
(new_size - self->size) * sizeof(git_hashtable_node)); (new_size - self->size) * sizeof(git_hashtable_node));
}
self->size = new_size; self->size = new_size;
self->size_mask = new_size - 1; self->size_mask = new_size - 1;
return 0; return 0;
} }
static git_hashtable_node *node_with_hash(git_hashtable *self, const void *key, int hash_id) GIT_INLINE(git_hashtable_node *)node_with_hash(
git_hashtable *self, const void *key, int hash_id)
{ {
size_t pos = self->hash(key, hash_id) & self->size_mask; size_t pos = self->hash(key, hash_id) & self->size_mask;
return git_hashtable_node_at(self->nodes, pos); return git_hashtable_node_at(self->nodes, pos);
} }
static void node_swap_with(git_hashtable_node *self, git_hashtable_node *other) GIT_INLINE(void) node_swap_with(
git_hashtable_node *self, git_hashtable_node *other)
{ {
git_hashtable_node tmp = *self; git_hashtable_node tmp = *self;
*self = *other; *self = *other;
...@@ -76,19 +87,26 @@ static void node_swap_with(git_hashtable_node *self, git_hashtable_node *other) ...@@ -76,19 +87,26 @@ static void node_swap_with(git_hashtable_node *self, git_hashtable_node *other)
static int node_insert(git_hashtable *self, git_hashtable_node *new_node) static int node_insert(git_hashtable *self, git_hashtable_node *new_node)
{ {
int iteration, hash_id; int iteration, hash_id;
git_hashtable_node *node;
for (iteration = 0; iteration < MAX_LOOPS; iteration++) { for (iteration = 0; iteration < MAX_LOOPS; iteration++) {
for (hash_id = 0; hash_id < GIT_HASHTABLE_HASHES; ++hash_id) { for (hash_id = 0; hash_id < GIT_HASHTABLE_HASHES; ++hash_id) {
git_hashtable_node *node;
node = node_with_hash(self, new_node->key, hash_id); node = node_with_hash(self, new_node->key, hash_id);
node_swap_with(new_node, node); node_swap_with(new_node, node);
if (new_node->key == 0x0){ if (new_node->key == 0x0) {
self->key_count++; self->key_count++;
return 0; return 0;
} }
} }
} }
/* Insert into stash if there is space */
if (self->stash_count < GIT_HASHTABLE_STASH_SIZE) {
node_swap_with(new_node, &self->stash[self->stash_count++]);
self->key_count++;
return 0;
}
/* Failed to insert node. Hashtable is currently resizing */ /* Failed to insert node. Hashtable is currently resizing */
assert(!self->is_resizing); assert(!self->is_resizing);
...@@ -105,14 +123,29 @@ static int insert_nodes( ...@@ -105,14 +123,29 @@ static int insert_nodes(
for (i = 0; i < old_size; ++i) { for (i = 0; i < old_size; ++i) {
git_hashtable_node *node = git_hashtable_node_at(old_nodes, i); git_hashtable_node *node = git_hashtable_node_at(old_nodes, i);
if (node->key && if (node->key && node_insert(self, node) < 0)
git_hashtable_insert(self, node->key, node->value) < 0)
return -1; return -1;
} }
return 0; return 0;
} }
static void reinsert_stash(git_hashtable *self)
{
int stash_count;
struct git_hashtable_node stash[GIT_HASHTABLE_STASH_SIZE];
if (self->stash_count <= 0)
return;
memcpy(stash, self->stash, self->stash_count * sizeof(git_hashtable_node));
stash_count = self->stash_count;
self->stash_count = 0;
/* the node_insert() calls *cannot* fail because the stash is empty */
insert_nodes(self, stash, stash_count);
}
git_hashtable *git_hashtable_alloc( git_hashtable *git_hashtable_alloc(
size_t min_size, size_t min_size,
git_hash_ptr hash, git_hash_ptr hash,
...@@ -127,21 +160,11 @@ git_hashtable *git_hashtable_alloc( ...@@ -127,21 +160,11 @@ git_hashtable *git_hashtable_alloc(
memset(table, 0x0, sizeof(git_hashtable)); memset(table, 0x0, sizeof(git_hashtable));
if (min_size < 8)
min_size = 8;
/* round up size to closest power of 2 */
min_size--;
min_size |= min_size >> 1;
min_size |= min_size >> 2;
min_size |= min_size >> 4;
min_size |= min_size >> 8;
min_size |= min_size >> 16;
table->hash = hash; table->hash = hash;
table->key_equal = key_eq; table->key_equal = key_eq;
set_size(table, min_size + 1); min_size = git__size_t_powerof2(min_size < 8 ? 8 : min_size);
set_size(table, min_size);
return table; return table;
} }
...@@ -151,6 +174,8 @@ void git_hashtable_clear(git_hashtable *self) ...@@ -151,6 +174,8 @@ void git_hashtable_clear(git_hashtable *self)
assert(self); assert(self);
memset(self->nodes, 0x0, sizeof(git_hashtable_node) * self->size); memset(self->nodes, 0x0, sizeof(git_hashtable_node) * self->size);
self->stash_count = 0;
self->key_count = 0; self->key_count = 0;
} }
...@@ -200,50 +225,86 @@ int git_hashtable_insert2( ...@@ -200,50 +225,86 @@ int git_hashtable_insert2(
} }
} }
void *git_hashtable_lookup(git_hashtable *self, const void *key) static git_hashtable_node *find_node(git_hashtable *self, const void *key)
{ {
int hash_id; int hash_id, count = 0;
git_hashtable_node *node; git_hashtable_node *node;
assert(self && self->nodes);
for (hash_id = 0; hash_id < GIT_HASHTABLE_HASHES; ++hash_id) { for (hash_id = 0; hash_id < GIT_HASHTABLE_HASHES; ++hash_id) {
node = node_with_hash(self, key, hash_id); node = node_with_hash(self, key, hash_id);
if (node->key && self->key_equal(key, node->key) == 0) if (node->key) {
return node->value; ++count;
if (self->key_equal(key, node->key) == 0)
return node;
}
}
/* check stash if not found but all slots were filled */
if (count == GIT_HASHTABLE_HASHES) {
for (count = 0; count < self->stash_count; ++count)
if (self->key_equal(key, self->stash[count].key) == 0)
return &self->stash[count];
} }
return NULL; return NULL;
} }
static void reset_stash(git_hashtable *self, git_hashtable_node *node)
{
/* if node was in stash, then compact stash */
ssize_t offset = node - self->stash;
if (offset >= 0 && offset < self->stash_count) {
if (offset < self->stash_count - 1)
memmove(node, node + 1, (self->stash_count - offset) *
sizeof(git_hashtable_node));
self->stash_count--;
}
reinsert_stash(self);
}
void *git_hashtable_lookup(git_hashtable *self, const void *key)
{
git_hashtable_node *node;
assert(self && key);
node = find_node(self, key);
return node ? node->value : NULL;
}
int git_hashtable_remove2( int git_hashtable_remove2(
git_hashtable *self, const void *key, void **old_value) git_hashtable *self, const void *key, void **old_value)
{ {
int hash_id;
git_hashtable_node *node; git_hashtable_node *node;
assert(self && self->nodes); assert(self && self->nodes);
for (hash_id = 0; hash_id < GIT_HASHTABLE_HASHES; ++hash_id) { node = find_node(self, key);
node = node_with_hash(self, key, hash_id); if (node) {
if (node->key && self->key_equal(key, node->key) == 0) {
*old_value = node->value; *old_value = node->value;
node->key = NULL; node->key = NULL;
node->value = NULL; node->value = NULL;
self->key_count--; self->key_count--;
reset_stash(self, node);
return 0; return 0;
} }
}
return GIT_ENOTFOUND; return GIT_ENOTFOUND;
} }
int git_hashtable_merge(git_hashtable *self, git_hashtable *other) int git_hashtable_merge(git_hashtable *self, git_hashtable *other)
{ {
if (resize_to(self, (self->size + other->size) * 2) < 0) size_t new_size = git__size_t_powerof2(self->size + other->size);
if (resize_to(self, new_size) < 0)
return -1;
if (insert_nodes(self, other->nodes, other->key_count) < 0)
return -1; return -1;
return insert_nodes(self, other->nodes, other->key_count); return insert_nodes(self, other->stash, other->stash_count);
} }
......
...@@ -22,6 +22,8 @@ struct git_hashtable_node { ...@@ -22,6 +22,8 @@ struct git_hashtable_node {
void *value; void *value;
}; };
#define GIT_HASHTABLE_STASH_SIZE 3
struct git_hashtable { struct git_hashtable {
struct git_hashtable_node *nodes; struct git_hashtable_node *nodes;
...@@ -29,6 +31,9 @@ struct git_hashtable { ...@@ -29,6 +31,9 @@ struct git_hashtable {
size_t size; size_t size;
size_t key_count; size_t key_count;
struct git_hashtable_node stash[GIT_HASHTABLE_STASH_SIZE];
int stash_count;
int is_resizing; int is_resizing;
git_hash_ptr hash; git_hash_ptr hash;
...@@ -38,9 +43,11 @@ struct git_hashtable { ...@@ -38,9 +43,11 @@ struct git_hashtable {
typedef struct git_hashtable_node git_hashtable_node; typedef struct git_hashtable_node git_hashtable_node;
typedef struct git_hashtable git_hashtable; typedef struct git_hashtable git_hashtable;
git_hashtable *git_hashtable_alloc(size_t min_size, git_hashtable *git_hashtable_alloc(
size_t min_size,
git_hash_ptr hash, git_hash_ptr hash,
git_hash_keyeq_ptr key_eq); git_hash_keyeq_ptr key_eq);
void *git_hashtable_lookup(git_hashtable *h, const void *key); void *git_hashtable_lookup(git_hashtable *h, const void *key);
int git_hashtable_remove2(git_hashtable *table, const void *key, void **old_value); int git_hashtable_remove2(git_hashtable *table, const void *key, void **old_value);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment