Commit 1ad48c8a by Edward Thomson Committed by Edward Thomson

khash: update to klib f28c067

parent 190b76a6
...@@ -46,6 +46,19 @@ int main() { ...@@ -46,6 +46,19 @@ int main() {
*/ */
/* /*
2013-05-02 (0.2.8):
* Use quadratic probing. When the capacity is power of 2, stepping function
i*(i+1)/2 guarantees to traverse each bucket. It is better than double
hashing on cache performance and is more robust than linear probing.
In theory, double hashing should be more robust than quadratic probing.
However, my implementation is probably not for large hash tables, because
the second hash function is closely tied to the first hash function,
which reduce the effectiveness of double hashing.
Reference: http://research.cs.vt.edu/AVresearch/hashing/quadratic.php
2011-12-29 (0.2.7): 2011-12-29 (0.2.7):
* Minor code clean up; no actual effect. * Minor code clean up; no actual effect.
...@@ -110,13 +123,13 @@ int main() { ...@@ -110,13 +123,13 @@ int main() {
Generic hash table library. Generic hash table library.
*/ */
#define AC_VERSION_KHASH_H "0.2.6" #define AC_VERSION_KHASH_H "0.2.8"
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <limits.h> #include <limits.h>
/* compipler specific configuration */ /* compiler specific configuration */
#if UINT_MAX == 0xffffffffu #if UINT_MAX == 0xffffffffu
typedef unsigned int khint32_t; typedef unsigned int khint32_t;
...@@ -130,11 +143,13 @@ typedef unsigned long khint64_t; ...@@ -130,11 +143,13 @@ typedef unsigned long khint64_t;
typedef unsigned long long khint64_t; typedef unsigned long long khint64_t;
#endif #endif
#ifndef kh_inline
#ifdef _MSC_VER #ifdef _MSC_VER
#define kh_inline __inline #define kh_inline __inline
#else #else
#define kh_inline inline #define kh_inline inline
#endif #endif
#endif /* kh_inline */
typedef khint32_t khint_t; typedef khint32_t khint_t;
typedef khint_t khiter_t; typedef khint_t khiter_t;
...@@ -147,12 +162,6 @@ typedef khint_t khiter_t; ...@@ -147,12 +162,6 @@ typedef khint_t khiter_t;
#define __ac_set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1))) #define __ac_set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1)))
#define __ac_set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1)) #define __ac_set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1))
#ifdef KHASH_LINEAR
#define __ac_inc(k, m) 1
#else
#define __ac_inc(k, m) (((k)>>3 ^ (k)<<3) | 1) & (m)
#endif
#define __ac_fsize(m) ((m) < 16? 1 : (m)>>4) #define __ac_fsize(m) ((m) < 16? 1 : (m)>>4)
#ifndef kroundup32 #ifndef kroundup32
...@@ -175,7 +184,7 @@ typedef khint_t khiter_t; ...@@ -175,7 +184,7 @@ typedef khint_t khiter_t;
static const double __ac_HASH_UPPER = 0.77; static const double __ac_HASH_UPPER = 0.77;
#define __KHASH_TYPE(name, khkey_t, khval_t) \ #define __KHASH_TYPE(name, khkey_t, khval_t) \
typedef struct { \ typedef struct kh_##name##_s { \
khint_t n_buckets, size, n_occupied, upper_bound; \ khint_t n_buckets, size, n_occupied, upper_bound; \
khint32_t *flags; \ khint32_t *flags; \
khkey_t *keys; \ khkey_t *keys; \
...@@ -213,19 +222,19 @@ static const double __ac_HASH_UPPER = 0.77; ...@@ -213,19 +222,19 @@ static const double __ac_HASH_UPPER = 0.77;
SCOPE khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key) \ SCOPE khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key) \
{ \ { \
if (h->n_buckets) { \ if (h->n_buckets) { \
khint_t inc, k, i, last, mask; \ khint_t k, i, last, mask, step = 0; \
mask = h->n_buckets - 1; \ mask = h->n_buckets - 1; \
k = __hash_func(key); i = k & mask; \ k = __hash_func(key); i = k & mask; \
inc = __ac_inc(k, mask); last = i; /* inc==1 for linear probing */ \ last = i; \
while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \ while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
i = (i + inc) & mask; \ i = (i + (++step)) & mask; \
if (i == last) return h->n_buckets; \ if (i == last) return h->n_buckets; \
} \ } \
return __ac_iseither(h->flags, i)? h->n_buckets : i; \ return __ac_iseither(h->flags, i)? h->n_buckets : i; \
} else return 0; \ } else return 0; \
} \ } \
SCOPE int kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \ SCOPE int kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \
{ /* This function uses 0.25*n_bucktes bytes of working space instead of [sizeof(key_t+val_t)+.25]*n_buckets. */ \ { /* This function uses 0.25*n_buckets bytes of working space instead of [sizeof(key_t+val_t)+.25]*n_buckets. */ \
khint32_t *new_flags = 0; \ khint32_t *new_flags = 0; \
khint_t j = 1; \ khint_t j = 1; \
{ \ { \
...@@ -238,11 +247,11 @@ static const double __ac_HASH_UPPER = 0.77; ...@@ -238,11 +247,11 @@ static const double __ac_HASH_UPPER = 0.77;
memset(new_flags, 0xaa, __ac_fsize(new_n_buckets) * sizeof(khint32_t)); \ memset(new_flags, 0xaa, __ac_fsize(new_n_buckets) * sizeof(khint32_t)); \
if (h->n_buckets < new_n_buckets) { /* expand */ \ if (h->n_buckets < new_n_buckets) { /* expand */ \
khkey_t *new_keys = (khkey_t*)krealloc((void *)h->keys, new_n_buckets * sizeof(khkey_t)); \ khkey_t *new_keys = (khkey_t*)krealloc((void *)h->keys, new_n_buckets * sizeof(khkey_t)); \
if (!new_keys) return -1; \ if (!new_keys) { kfree(new_flags); return -1; } \
h->keys = new_keys; \ h->keys = new_keys; \
if (kh_is_map) { \ if (kh_is_map) { \
khval_t *new_vals = (khval_t*)krealloc((void *)h->vals, new_n_buckets * sizeof(khval_t)); \ khval_t *new_vals = (khval_t*)krealloc((void *)h->vals, new_n_buckets * sizeof(khval_t)); \
if (!new_vals) return -1; \ if (!new_vals) { kfree(new_flags); return -1; } \
h->vals = new_vals; \ h->vals = new_vals; \
} \ } \
} /* otherwise shrink */ \ } /* otherwise shrink */ \
...@@ -258,11 +267,10 @@ static const double __ac_HASH_UPPER = 0.77; ...@@ -258,11 +267,10 @@ static const double __ac_HASH_UPPER = 0.77;
if (kh_is_map) val = h->vals[j]; \ if (kh_is_map) val = h->vals[j]; \
__ac_set_isdel_true(h->flags, j); \ __ac_set_isdel_true(h->flags, j); \
while (1) { /* kick-out process; sort of like in Cuckoo hashing */ \ while (1) { /* kick-out process; sort of like in Cuckoo hashing */ \
khint_t inc, k, i; \ khint_t k, i, step = 0; \
k = __hash_func(key); \ k = __hash_func(key); \
i = k & new_mask; \ i = k & new_mask; \
inc = __ac_inc(k, new_mask); \ while (!__ac_isempty(new_flags, i)) i = (i + (++step)) & new_mask; \
while (!__ac_isempty(new_flags, i)) i = (i + inc) & new_mask; \
__ac_set_isempty_false(new_flags, i); \ __ac_set_isempty_false(new_flags, i); \
if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) { /* kick out the existing element */ \ if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) { /* kick out the existing element */ \
{ khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \ { khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \
...@@ -301,14 +309,14 @@ static const double __ac_HASH_UPPER = 0.77; ...@@ -301,14 +309,14 @@ static const double __ac_HASH_UPPER = 0.77;
} \ } \
} /* TODO: to implement automatically shrinking; resize() already support shrinking */ \ } /* TODO: to implement automatically shrinking; resize() already support shrinking */ \
{ \ { \
khint_t inc, k, i, site, last, mask = h->n_buckets - 1; \ khint_t k, i, site, last, mask = h->n_buckets - 1, step = 0; \
x = site = h->n_buckets; k = __hash_func(key); i = k & mask; \ x = site = h->n_buckets; k = __hash_func(key); i = k & mask; \
if (__ac_isempty(h->flags, i)) x = i; /* for speed up */ \ if (__ac_isempty(h->flags, i)) x = i; /* for speed up */ \
else { \ else { \
inc = __ac_inc(k, mask); last = i; \ last = i; \
while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \ while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
if (__ac_isdel(h->flags, i)) site = i; \ if (__ac_isdel(h->flags, i)) site = i; \
i = (i + inc) & mask; \ i = (i + (++step)) & mask; \
if (i == last) { x = site; break; } \ if (i == last) { x = site; break; } \
} \ } \
if (x == h->n_buckets) { \ if (x == h->n_buckets) { \
...@@ -449,7 +457,8 @@ static kh_inline khint_t __ac_Wang_hash(khint_t key) ...@@ -449,7 +457,8 @@ static kh_inline khint_t __ac_Wang_hash(khint_t key)
@param name Name of the hash table [symbol] @param name Name of the hash table [symbol]
@param h Pointer to the hash table [khash_t(name)*] @param h Pointer to the hash table [khash_t(name)*]
@param k Key [type of keys] @param k Key [type of keys]
@param r Extra return code: 0 if the key is present in the hash table; @param r Extra return code: -1 if the operation failed;
0 if the key is present in the hash table;
1 if the bucket is empty (never used); 2 if the element in 1 if the bucket is empty (never used); 2 if the element in
the bucket has been deleted [int*] the bucket has been deleted [int*]
@return Iterator to the inserted element [khint_t] @return Iterator to the inserted element [khint_t]
...@@ -461,7 +470,7 @@ static kh_inline khint_t __ac_Wang_hash(khint_t key) ...@@ -461,7 +470,7 @@ static kh_inline khint_t __ac_Wang_hash(khint_t key)
@param name Name of the hash table [symbol] @param name Name of the hash table [symbol]
@param h Pointer to the hash table [khash_t(name)*] @param h Pointer to the hash table [khash_t(name)*]
@param k Key [type of keys] @param k Key [type of keys]
@return Iterator to the found element, or kh_end(h) is the element is absent [khint_t] @return Iterator to the found element, or kh_end(h) if the element is absent [khint_t]
*/ */
#define kh_get(name, h, k) kh_get_##name(h, k) #define kh_get(name, h, k) kh_get_##name(h, k)
...@@ -607,4 +616,4 @@ typedef const char *kh_cstr_t; ...@@ -607,4 +616,4 @@ typedef const char *kh_cstr_t;
#define KHASH_MAP_INIT_STR(name, khval_t) \ #define KHASH_MAP_INIT_STR(name, khval_t) \
KHASH_INIT(name, kh_cstr_t, khval_t, 1, kh_str_hash_func, kh_str_hash_equal) KHASH_INIT(name, kh_cstr_t, khval_t, 1, kh_str_hash_func, kh_str_hash_equal)
#endif /* __AC_KHASH_H */ #endif /* __AC_KHASH_H */
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment