oid.c 11 KB
Newer Older
1
/*
Edward Thomson committed
2
 * Copyright (C) the libgit2 contributors. All rights reserved.
3
 *
Vicent Marti committed
4 5
 * This file is part of libgit2, distributed under the GNU GPL v2 with
 * a Linking Exception. For full terms see the included COPYING file.
6 7
 */

8 9
#include "oid.h"

10
#include "git2/oid.h"
11
#include "repository.h"
12
#include "threadstate.h"
13
#include <string.h>
14
#include <limits.h>
15

16
const git_oid git_oid__empty_blob_sha1 =
17
	GIT_OID_INIT(GIT_OID_SHA1,
Edward Thomson committed
18
	  { 0xe6, 0x9d, 0xe2, 0x9b, 0xb2, 0xd1, 0xd6, 0x43, 0x4b, 0x8b,
19
	    0x29, 0xae, 0x77, 0x5a, 0xd8, 0xc2, 0xe4, 0x8c, 0x53, 0x91 });
20
const git_oid git_oid__empty_tree_sha1 =
21
	GIT_OID_INIT(GIT_OID_SHA1,
Edward Thomson committed
22
	  { 0x4b, 0x82, 0x5d, 0xc6, 0x42, 0xcb, 0x6e, 0xb9, 0xa0, 0x60,
23
	    0xe5, 0x4b, 0xf8, 0xd6, 0x92, 0x88, 0xfb, 0xee, 0x49, 0x04 });
24

25 26
static int oid_error_invalid(const char *msg)
{
27
	git_error_set(GIT_ERROR_INVALID, "unable to parse OID - %s", msg);
28 29 30
	return -1;
}

31
int git_oid__fromstrn(
Edward Thomson committed
32 33 34 35
	git_oid *out,
	const char *str,
	size_t length,
	git_oid_t type)
36
{
Edward Thomson committed
37
	size_t size, p;
38
	int v;
39

Edward Thomson committed
40 41
	GIT_ASSERT_ARG(out);
	GIT_ASSERT_ARG(str);
42

Edward Thomson committed
43 44 45
	if (!(size = git_oid_size(type)))
		return oid_error_invalid("unknown type");

46 47
	if (!length)
		return oid_error_invalid("too short");
48

Edward Thomson committed
49
	if (length > git_oid_hexsize(type))
50
		return oid_error_invalid("too long");
51

52
#ifdef GIT_EXPERIMENTAL_SHA256
Edward Thomson committed
53
	out->type = type;
54
#endif
Edward Thomson committed
55
	memset(out->id, 0, size);
56

57 58
	for (p = 0; p < length; p++) {
		v = git__fromhex(str[p]);
59
		if (v < 0)
60
			return oid_error_invalid("contains invalid characters");
61

62
		out->id[p / 2] |= (unsigned char)(v << (p % 2 ? 0 : 4));
63 64
	}

65
	return 0;
66
}
67

68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87
int git_oid__fromstrp(git_oid *out, const char *str, git_oid_t type)
{
	return git_oid__fromstrn(out, str, strlen(str), type);
}

int git_oid__fromstr(git_oid *out, const char *str, git_oid_t type)
{
	return git_oid__fromstrn(out, str, git_oid_hexsize(type), type);
}

#ifdef GIT_EXPERIMENTAL_SHA256
int git_oid_fromstrn(
	git_oid *out,
	const char *str,
	size_t length,
	git_oid_t type)
{
	return git_oid__fromstrn(out, str, length, type);
}

Edward Thomson committed
88
int git_oid_fromstrp(git_oid *out, const char *str, git_oid_t type)
89
{
Edward Thomson committed
90
	return git_oid_fromstrn(out, str, strlen(str), type);
91 92
}

Edward Thomson committed
93
int git_oid_fromstr(git_oid *out, const char *str, git_oid_t type)
94
{
Edward Thomson committed
95
	return git_oid_fromstrn(out, str, git_oid_hexsize(type), type);
96
}
97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115
#else
int git_oid_fromstrn(
	git_oid *out,
	const char *str,
	size_t length)
{
	return git_oid__fromstrn(out, str, length, GIT_OID_SHA1);
}

int git_oid_fromstrp(git_oid *out, const char *str)
{
	return git_oid__fromstrn(out, str, strlen(str), GIT_OID_SHA1);
}

int git_oid_fromstr(git_oid *out, const char *str)
{
	return git_oid__fromstrn(out, str, GIT_OID_SHA1_HEXSIZE, GIT_OID_SHA1);
}
#endif
116

117
int git_oid_nfmt(char *str, size_t n, const git_oid *oid)
118
{
119
	size_t hex_size;
120 121 122

	if (!oid) {
		memset(str, 0, n);
123
		return 0;
124
	}
Edward Thomson committed
125

126
	if (!(hex_size = git_oid_hexsize(git_oid_type(oid))))
Edward Thomson committed
127 128 129 130 131
		return oid_error_invalid("unknown type");

	if (n > hex_size) {
		memset(&str[hex_size], 0, n - hex_size);
		n = hex_size;
132 133
	}

134
	git_oid_fmt_substr(str, oid, 0, n);
135
	return 0;
136 137
}

138
int git_oid_fmt(char *str, const git_oid *oid)
139
{
140
	return git_oid_nfmt(str, git_oid_hexsize(git_oid_type(oid)), oid);
141 142
}

143
int git_oid_pathfmt(char *str, const git_oid *oid)
144
{
145
	size_t hex_size;
Edward Thomson committed
146

147
	if (!(hex_size = git_oid_hexsize(git_oid_type(oid))))
Edward Thomson committed
148
		return oid_error_invalid("unknown type");
149

150 151 152
	git_oid_fmt_substr(str, oid, 0, 2);
	str[2] = '/';
	git_oid_fmt_substr(&str[3], oid, 2, (hex_size - 2));
153
	return 0;
154 155
}

156 157
char *git_oid_tostr_s(const git_oid *oid)
{
158
	git_threadstate *threadstate = git_threadstate_get();
159 160 161 162 163 164
	char *str;

	if (!threadstate)
		return NULL;

	str = threadstate->oid_fmt;
165
	git_oid_nfmt(str, git_oid_hexsize(git_oid_type(oid)) + 1, oid);
166 167 168
	return str;
}

169 170
char *git_oid_allocfmt(const git_oid *oid)
{
171
	size_t hex_size = git_oid_hexsize(git_oid_type(oid));
Edward Thomson committed
172 173 174
	char *str = git__malloc(hex_size + 1);

	if (!hex_size || !str)
175
		return NULL;
Edward Thomson committed
176 177 178 179 180 181

	if (git_oid_nfmt(str, hex_size + 1, oid) < 0) {
		git__free(str);
		return NULL;
	}

182 183
	return str;
}
184

185
char *git_oid_tostr(char *out, size_t n, const git_oid *oid)
186
{
Edward Thomson committed
187 188
	size_t hex_size;

189
	if (!out || n == 0)
190 191
		return "";

192
	hex_size = oid ? git_oid_hexsize(git_oid_type(oid)) : 0;
Edward Thomson committed
193 194 195

	if (n > hex_size + 1)
		n = hex_size + 1;
196

197 198
	git_oid_nfmt(out, n - 1, oid); /* allow room for terminating NUL */
	out[n - 1] = '\0';
199 200 201 202

	return out;
}

203
int git_oid__fromraw(git_oid *out, const unsigned char *raw, git_oid_t type)
204
{
Edward Thomson committed
205 206 207 208 209
	size_t size;

	if (!(size = git_oid_size(type)))
		return oid_error_invalid("unknown type");

210
#ifdef GIT_EXPERIMENTAL_SHA256
Edward Thomson committed
211
	out->type = type;
212
#endif
Edward Thomson committed
213
	memcpy(out->id, raw, size);
214
	return 0;
215 216
}

217 218 219 220 221 222 223 224 225 226 227 228
#ifdef GIT_EXPERIMENTAL_SHA256
int git_oid_fromraw(git_oid *out, const unsigned char *raw, git_oid_t type)
{
	return git_oid__fromraw(out, raw, type);
}
#else
int git_oid_fromraw(git_oid *out, const unsigned char *raw)
{
	return git_oid__fromraw(out, raw, GIT_OID_SHA1);
}
#endif

229
int git_oid_cpy(git_oid *out, const git_oid *src)
230
{
Edward Thomson committed
231 232
	size_t size;

233
	if (!(size = git_oid_size(git_oid_type(src))))
Edward Thomson committed
234 235
		return oid_error_invalid("unknown type");

236
#ifdef GIT_EXPERIMENTAL_SHA256
Edward Thomson committed
237
	out->type = src->type;
238 239
#endif

Edward Thomson committed
240
	return git_oid_raw_cpy(out->id, src->id, size);
241 242
}

243
int git_oid_cmp(const git_oid *a, const git_oid *b)
244
{
245
	return git_oid__cmp(a, b);
246 247
}

248 249 250 251 252
int git_oid_equal(const git_oid *a, const git_oid *b)
{
	return (git_oid__cmp(a, b) == 0);
}

253
int git_oid_ncmp(const git_oid *oid_a, const git_oid *oid_b, size_t len)
254
{
255
#ifdef GIT_EXPERIMENTAL_SHA256
Edward Thomson committed
256 257
	if (oid_a->type != oid_b->type)
		return oid_a->type - oid_b->type;
258
#endif
Edward Thomson committed
259

260
	return git_oid_raw_ncmp(oid_a->id, oid_b->id, len);
261 262
}

263
int git_oid_strcmp(const git_oid *oid_a, const char *str)
264
{
265
	const unsigned char *a;
266
	unsigned char strval;
267
	long size = (long)git_oid_size(git_oid_type(oid_a));
268 269
	int hexval;

Edward Thomson committed
270
	for (a = oid_a->id; *str && (a - oid_a->id) < size; ++a) {
271 272
		if ((hexval = git__fromhex(*str++)) < 0)
			return -1;
Linquize committed
273
		strval = (unsigned char)(hexval << 4);
274 275 276 277 278 279 280 281
		if (*str) {
			if ((hexval = git__fromhex(*str++)) < 0)
				return -1;
			strval |= hexval;
		}
		if (*a != strval)
			return (*a - strval);
	}
282

283 284
	return 0;
}
285

286 287 288
int git_oid_streq(const git_oid *oid_a, const char *str)
{
	return git_oid_strcmp(oid_a, str) == 0 ? 0 : -1;
289 290
}

291
int git_oid_is_zero(const git_oid *oid_a)
292 293
{
	const unsigned char *a = oid_a->id;
294
	size_t size = git_oid_size(git_oid_type(oid_a)), i;
Edward Thomson committed
295

296
#ifdef GIT_EXPERIMENTAL_SHA256
Edward Thomson committed
297 298 299 300
	if (!oid_a->type)
		return 1;
	else if (!size)
		return 0;
301
#endif
Edward Thomson committed
302 303

	for (i = 0; i < size; ++i, ++a)
304 305 306 307 308
		if (*a != 0)
			return 0;
	return 1;
}

309
#ifndef GIT_DEPRECATE_HARD
310 311 312 313
int git_oid_iszero(const git_oid *oid_a)
{
	return git_oid_is_zero(oid_a);
}
314
#endif
315

316 317 318 319 320 321 322 323 324 325 326 327 328 329 330
typedef short node_index;

typedef union {
	const char *tail;
	node_index children[16];
} trie_node;

struct git_oid_shorten {
	trie_node *nodes;
	size_t node_count, size;
	int min_length, full;
};

static int resize_trie(git_oid_shorten *self, size_t new_size)
{
331
	self->nodes = git__reallocarray(self->nodes, new_size, sizeof(trie_node));
332
	GIT_ERROR_CHECK_ALLOC(self->nodes);
333 334 335 336 337 338

	if (new_size > self->size) {
		memset(&self->nodes[self->size], 0x0, (new_size - self->size) * sizeof(trie_node));
	}

	self->size = new_size;
339
	return 0;
340 341 342 343 344 345 346 347
}

static trie_node *push_leaf(git_oid_shorten *os, node_index idx, int push_at, const char *oid)
{
	trie_node *node, *leaf;
	node_index idx_leaf;

	if (os->node_count >= os->size) {
348
		if (resize_trie(os, os->size * 2) < 0)
349 350 351 352 353
			return NULL;
	}

	idx_leaf = (node_index)os->node_count++;

354
	if (os->node_count == SHRT_MAX) {
355
		os->full = 1;
356 357
        return NULL;
    }
358 359 360 361 362 363 364 365 366 367 368 369 370 371

	node = &os->nodes[idx];
	node->children[push_at] = -idx_leaf;

	leaf = &os->nodes[idx_leaf];
	leaf->tail = oid;

	return node;
}

git_oid_shorten *git_oid_shorten_new(size_t min_length)
{
	git_oid_shorten *os;

Edward Thomson committed
372
	GIT_ASSERT_ARG_WITH_RETVAL((size_t)((int)min_length) == min_length, NULL);
373

374
	os = git__calloc(1, sizeof(git_oid_shorten));
375 376 377
	if (os == NULL)
		return NULL;

378
	if (resize_trie(os, 16) < 0) {
379
		git__free(os);
380 381 382 383
		return NULL;
	}

	os->node_count = 1;
384
	os->min_length = (int)min_length;
385 386 387 388 389 390

	return os;
}

void git_oid_shorten_free(git_oid_shorten *os)
{
391 392 393
	if (os == NULL)
		return;

394 395
	git__free(os->nodes);
	git__free(os);
396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414
}


/*
 * What wizardry is this?
 *
 * This is just a memory-optimized trie: basically a very fancy
 * 16-ary tree, which is used to store the prefixes of the OID
 * strings.
 *
 * Read more: http://en.wikipedia.org/wiki/Trie
 *
 * Magic that happens in this method:
 *
 *	- Each node in the trie is an union, so it can work both as
 *	a normal node, or as a leaf.
 *
 *	- Each normal node points to 16 children (one for each possible
 *	character in the oid). This is *not* stored in an array of
415
 *	pointers, because in a 64-bit arch this would be sucking
416
 *	16*sizeof(void*) = 128 bytes of memory per node, which is
417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444
 *	insane. What we do is store Node Indexes, and use these indexes
 *	to look up each node in the om->index array. These indexes are
 *	signed shorts, so this limits the amount of unique OIDs that
 *	fit in the structure to about 20000 (assuming a more or less uniform
 *	distribution).
 *
 *	- All the nodes in om->index array are stored contiguously in
 *	memory, and each of them is 32 bytes, so we fit 2x nodes per
 *	cache line. Convenient for speed.
 *
 *	- To differentiate the leafs from the normal nodes, we store all
 *	the indexes towards a leaf as a negative index (indexes to normal
 *	nodes are positives). When we find that one of the children for
 *	a node has a negative value, that means it's going to be a leaf.
 *	This reduces the amount of indexes we have by two, but also reduces
 *	the size of each node by 1-4 bytes (the amount we would need to
 *	add a `is_leaf` field): this is good because it allows the nodes
 *	to fit cleanly in cache lines.
 *
 *	- Once we reach an empty children, instead of continuing to insert
 *	new nodes for each remaining character of the OID, we store a pointer
 *	to the tail in the leaf; if the leaf is reached again, we turn it
 *	into a normal node and use the tail to create a new leaf.
 *
 *	This is a pretty good balance between performance and memory usage.
 */
int git_oid_shorten_add(git_oid_shorten *os, const char *text_oid)
{
445 446
	int i;
	bool is_leaf;
447 448
	node_index idx;

449
	if (os->full) {
450
		git_error_set(GIT_ERROR_INVALID, "unable to shorten OID - OID set full");
451
		return -1;
452
	}
453

454 455 456
	if (text_oid == NULL)
		return os->min_length;

457
	idx = 0;
458
	is_leaf = false;
459

460
	for (i = 0; i < GIT_OID_SHA1_HEXSIZE; ++i) {
nulltoken committed
461
		int c = git__fromhex(text_oid[i]);
462 463
		trie_node *node;

464
		if (c == -1) {
465
			git_error_set(GIT_ERROR_INVALID, "unable to shorten OID - invalid hex value");
466 467
			return -1;
		}
468 469 470 471 472 473 474 475 476

		node = &os->nodes[idx];

		if (is_leaf) {
			const char *tail;

			tail = node->tail;
			node->tail = NULL;

nulltoken committed
477
			node = push_leaf(os, idx, git__fromhex(tail[0]), &tail[1]);
478 479
			if (node == NULL) {
				if (os->full)
480
					git_error_set(GIT_ERROR_INVALID, "unable to shorten OID - OID set full");
481 482
				return -1;
			}
483 484 485
		}

		if (node->children[c] == 0) {
486 487
			if (push_leaf(os, idx, c, &text_oid[i + 1]) == NULL) {
				if (os->full)
488
					git_error_set(GIT_ERROR_INVALID, "unable to shorten OID - OID set full");
489
				return -1;
490
			}
491 492 493 494
			break;
		}

		idx = node->children[c];
495
		is_leaf = false;
496 497 498

		if (idx < 0) {
			node->children[c] = idx = -idx;
499
			is_leaf = true;
500 501 502 503 504 505 506 507 508
		}
	}

	if (++i > os->min_length)
		os->min_length = i;

	return os->min_length;
}