Commit e0646b38 by Vicent Marti

Add generic hash function to util.c

It's MurmurHash3 slightly edited to make it
cross-platform. Fast and neat.

Use this for hashing strings on hash tables instead
of a full SHA1 hash. It's very fast and well distributed.

Obviously not crypto-secure.

Signed-off-by: Vicent Marti <tanoku@gmail.com>
parent 51035184
...@@ -141,3 +141,99 @@ void git__hexdump(const char *buffer, size_t len) ...@@ -141,3 +141,99 @@ void git__hexdump(const char *buffer, size_t len)
printf("\n"); printf("\n");
} }
#ifdef GIT_LEGACY_HASH
uint32_t git__hash(const void *key, int len, unsigned int seed)
{
const uint32_t m = 0x5bd1e995;
const int r = 24;
uint32_t h = seed ^ len;
const unsigned char *data = (const unsigned char *)key;
while(len >= 4) {
uint32_t k = *(uint32_t *)data;
k *= m;
k ^= k >> r;
k *= m;
h *= m;
h ^= k;
data += 4;
len -= 4;
}
switch(len) {
case 3: h ^= data[2] << 16;
case 2: h ^= data[1] << 8;
case 1: h ^= data[0];
h *= m;
};
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
}
#else
/*
Cross-platform version of Murmurhash3
http://code.google.com/p/smhasher/wiki/MurmurHash3
by Austin Appleby (aappleby@gmail.com)
This code is on the public domain.
*/
uint32_t git__hash(const void *key, int len, uint32_t seed)
{
#define MURMUR_BLOCK() {\
k1 *= c1; \
k1 = git__rotl(k1,11);\
k1 *= c2;\
h1 ^= k1;\
h1 = h1*3 + 0x52dce729;\
c1 = c1*5 + 0x7b7d159c;\
c2 = c2*5 + 0x6bce6396;\
}
const uint8_t *data = (const uint8_t*)key;
const int nblocks = len / 4;
const uint32_t *blocks = (const uint32_t *)(data + nblocks * 4);
const uint8_t *tail = (const uint8_t *)(data + nblocks * 4);
uint32_t h1 = 0x971e137b ^ seed;
uint32_t k1;
uint32_t c1 = 0x95543787;
uint32_t c2 = 0x2ad7eb25;
int i;
for (i = -nblocks; i; i++) {
k1 = blocks[i];
MURMUR_BLOCK();
}
k1 = 0;
switch(len & 3) {
case 3: k1 ^= tail[2] << 16;
case 2: k1 ^= tail[1] << 8;
case 1: k1 ^= tail[0];
MURMUR_BLOCK();
}
h1 ^= len;
h1 ^= h1 >> 16;
h1 *= 0x85ebca6b;
h1 ^= h1 >> 13;
h1 *= 0xc2b2ae35;
h1 ^= h1 >> 16;
return h1;
}
#endif
...@@ -23,6 +23,8 @@ extern int git__dirname(char *dir, size_t n, char *path); ...@@ -23,6 +23,8 @@ extern int git__dirname(char *dir, size_t n, char *path);
extern int git__basename(char *base, size_t n, char *path); extern int git__basename(char *base, size_t n, char *path);
extern void git__hexdump(const char *buffer, size_t n); extern void git__hexdump(const char *buffer, size_t n);
extern uint32_t git__hash(const void *key, int len, uint32_t seed);
/** @return true if p fits into the range of a size_t */ /** @return true if p fits into the range of a size_t */
GIT_INLINE(int) git__is_sizet(off_t p) GIT_INLINE(int) git__is_sizet(off_t p)
...@@ -31,6 +33,13 @@ GIT_INLINE(int) git__is_sizet(off_t p) ...@@ -31,6 +33,13 @@ GIT_INLINE(int) git__is_sizet(off_t p)
return p == (off_t)r; return p == (off_t)r;
} }
/* 32-bit cross-platform rotl */
#ifdef _MSC_VER /* use built-in method in MSVC */
# define git__rotl(v, s) (uint32_t)_rotl(v, s)
#else /* use bitops in GCC; with o2 this gets optimized to a rotl instruction */
# define git__rotl(v, s) (uint32_t)(((uint32_t)(v) << (s)) | ((uint32_t)(v) >> (32 - (s))))
#endif
/* /*
* Realloc the buffer pointed at by variable 'x' so that it can hold * Realloc the buffer pointed at by variable 'x' so that it can hold
* at least 'nr' entries; the number of entries currently allocated * at least 'nr' entries; the number of entries currently allocated
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment