pack.c 32.7 KB
Newer Older
1
/*
Edward Thomson committed
2
 * Copyright (C) the libgit2 contributors. All rights reserved.
3
 *
Vicent Marti committed
4 5
 * This file is part of libgit2, distributed under the GNU GPL v2 with
 * a Linking Exception. For full terms see the included COPYING file.
6 7
 */

8
#include "common.h"
9 10
#include "odb.h"
#include "pack.h"
11
#include "delta.h"
12
#include "sha1_lookup.h"
13 14
#include "mwindow.h"
#include "fileops.h"
15
#include "oid.h"
16

17
#include <zlib.h>
18

19 20
GIT__USE_OFFMAP
GIT__USE_OIDMAP
21

22
static int packfile_open(struct git_pack_file *p);
23
static git_off_t nth_packed_object_offset(const struct git_pack_file *p, uint32_t n);
24
static int packfile_unpack_compressed(
25 26 27
		git_rawobj *obj,
		struct git_pack_file *p,
		git_mwindow **w_curs,
28
		git_off_t *curpos,
29 30 31 32 33
		size_t size,
		git_otype type);

/* Can find the offset of an object given
 * a prefix of an identifier.
34
 * Throws GIT_EAMBIGUOUSOIDPREFIX if short oid
35 36 37 38 39
 * is ambiguous within the pack.
 * This method assumes that len is between
 * GIT_OID_MINPREFIXLEN and GIT_OID_HEXSZ.
 */
static int pack_entry_find_offset(
40
		git_off_t *offset_out,
41 42 43
		git_oid *found_oid,
		struct git_pack_file *p,
		const git_oid *short_oid,
44
		size_t len);
45

46 47
static int packfile_error(const char *message)
{
48
	giterr_set(GITERR_ODB, "invalid pack file - %s", message);
49 50 51
	return -1;
}

52 53 54
/********************
 * Delta base cache
 ********************/
55

56
static git_pack_cache_entry *new_cache_object(git_rawobj *source)
57
{
58
	git_pack_cache_entry *e = git__calloc(1, sizeof(git_pack_cache_entry));
59 60 61
	if (!e)
		return NULL;

62
	git_atomic_inc(&e->refcount);
63 64 65 66 67 68 69 70 71 72
	memcpy(&e->raw, source, sizeof(git_rawobj));

	return e;
}

static void free_cache_object(void *o)
{
	git_pack_cache_entry *e = (git_pack_cache_entry *)o;

	if (e != NULL) {
73
		assert(e->refcount.val == 0);
74 75 76 77 78
		git__free(e->raw.data);
		git__free(e);
	}
}

79 80 81 82 83 84 85 86 87 88 89
static void cache_free(git_pack_cache *cache)
{
	khiter_t k;

	if (cache->entries) {
		for (k = kh_begin(cache->entries); k != kh_end(cache->entries); k++) {
			if (kh_exist(cache->entries, k))
				free_cache_object(kh_value(cache->entries, k));
		}

		git_offmap_free(cache->entries);
90
		cache->entries = NULL;
91 92 93 94 95 96 97
	}
}

static int cache_init(git_pack_cache *cache)
{
	cache->entries = git_offmap_alloc();
	GITERR_CHECK_ALLOC(cache->entries);
98

99
	cache->memory_limit = GIT_PACK_CACHE_MEMORY_LIMIT;
Russell Belfer committed
100 101

	if (git_mutex_init(&cache->lock)) {
102
		giterr_set(GITERR_OS, "failed to initialize pack cache mutex");
Russell Belfer committed
103 104 105 106 107 108

		git__free(cache->entries);
		cache->entries = NULL;

		return -1;
	}
109 110 111 112

	return 0;
}

113
static git_pack_cache_entry *cache_get(git_pack_cache *cache, git_off_t offset)
114 115 116 117
{
	khiter_t k;
	git_pack_cache_entry *entry = NULL;

118 119 120
	if (git_mutex_lock(&cache->lock) < 0)
		return NULL;

121 122 123 124
	k = kh_get(off, cache->entries, offset);
	if (k != kh_end(cache->entries)) { /* found it */
		entry = kh_value(cache->entries, k);
		git_atomic_inc(&entry->refcount);
125
		entry->last_usage = cache->use_ctr++;
126 127 128 129 130 131
	}
	git_mutex_unlock(&cache->lock);

	return entry;
}

132 133 134
/* Run with the cache lock held */
static void free_lowest_entry(git_pack_cache *cache)
{
135 136 137
	git_pack_cache_entry *entry;
	khiter_t k;

138 139 140 141 142
	for (k = kh_begin(cache->entries); k != kh_end(cache->entries); k++) {
		if (!kh_exist(cache->entries, k))
			continue;

		entry = kh_value(cache->entries, k);
143

144 145 146 147
		if (entry && entry->refcount.val == 0) {
			cache->memory_used -= entry->raw.len;
			kh_del(off, cache->entries, k);
			free_cache_object(entry);
148 149
		}
	}
150 151
}

152 153 154 155 156
static int cache_add(
		git_pack_cache_entry **cached_out,
		git_pack_cache *cache,
		git_rawobj *base,
		git_off_t offset)
157 158 159 160 161
{
	git_pack_cache_entry *entry;
	int error, exists = 0;
	khiter_t k;

162 163 164
	if (base->len > GIT_PACK_CACHE_SIZE_LIMIT)
		return -1;

165 166
	entry = new_cache_object(base);
	if (entry) {
167 168
		if (git_mutex_lock(&cache->lock) < 0) {
			giterr_set(GITERR_OS, "failed to lock cache");
Jacques Germishuys committed
169
			git__free(entry);
170 171
			return -1;
		}
172 173 174
		/* Add it to the cache if nobody else has */
		exists = kh_get(off, cache->entries, offset) != kh_end(cache->entries);
		if (!exists) {
175 176 177
			while (cache->memory_used + base->len > cache->memory_limit)
				free_lowest_entry(cache);

178 179 180
			k = kh_put(off, cache->entries, offset, &error);
			assert(error != 0);
			kh_value(cache->entries, k) = entry;
181
			cache->memory_used += entry->raw.len;
182 183

			*cached_out = entry;
184 185 186 187 188 189 190 191 192 193 194 195
		}
		git_mutex_unlock(&cache->lock);
		/* Somebody beat us to adding it into the cache */
		if (exists) {
			git__free(entry);
			return -1;
		}
	}

	return 0;
}

196 197 198 199 200 201 202 203
/***********************************************************
 *
 * PACK INDEX METHODS
 *
 ***********************************************************/

static void pack_index_free(struct git_pack_file *p)
{
204 205 206 207
	if (p->oids) {
		git__free(p->oids);
		p->oids = NULL;
	}
208 209 210 211 212 213
	if (p->index_map.data) {
		git_futils_mmap_free(&p->index_map);
		p->index_map.data = NULL;
	}
}

Vicent Marti committed
214
static int pack_index_check(const char *path, struct git_pack_file *p)
215 216 217 218 219 220 221
{
	struct git_pack_idx_header *hdr;
	uint32_t version, nr, i, *index;
	void *idx_map;
	size_t idx_size;
	struct stat st;
	int error;
222 223
	/* TODO: properly open the file without access time using O_NOATIME */
	git_file fd = git_futils_open_ro(path);
224
	if (fd < 0)
225
		return fd;
226

227 228
	if (p_fstat(fd, &st) < 0) {
		p_close(fd);
229
		giterr_set(GITERR_OS, "unable to stat pack index '%s'", path);
230 231 232 233
		return -1;
	}

	if (!S_ISREG(st.st_mode) ||
234 235 236
		!git__is_sizet(st.st_size) ||
		(idx_size = (size_t)st.st_size) < 4 * 256 + 20 + 20)
	{
237
		p_close(fd);
238
		giterr_set(GITERR_ODB, "invalid pack index '%s'", path);
239
		return -1;
240 241 242
	}

	error = git_futils_mmap_ro(&p->index_map, fd, 0, idx_size);
243

244 245
	p_close(fd);

246 247
	if (error < 0)
		return error;
248 249 250 251 252 253 254 255

	hdr = idx_map = p->index_map.data;

	if (hdr->idx_signature == htonl(PACK_IDX_SIGNATURE)) {
		version = ntohl(hdr->idx_version);

		if (version < 2 || version > 2) {
			git_futils_mmap_free(&p->index_map);
256
			return packfile_error("unsupported index version");
257 258 259 260 261 262 263 264 265
		}

	} else
		version = 1;

	nr = 0;
	index = idx_map;

	if (version > 1)
Vicent Marti committed
266
		index += 2; /* skip index header */
267 268 269 270 271

	for (i = 0; i < 256; i++) {
		uint32_t n = ntohl(index[i]);
		if (n < nr) {
			git_futils_mmap_free(&p->index_map);
272
			return packfile_error("index is non-monotonic");
273 274 275 276 277 278 279
		}
		nr = n;
	}

	if (version == 1) {
		/*
		 * Total size:
Vicent Marti committed
280 281 282 283
		 * - 256 index entries 4 bytes each
		 * - 24-byte entries * nr (20-byte sha1 + 4-byte offset)
		 * - 20-byte SHA1 of the packfile
		 * - 20-byte SHA1 file checksum
284 285 286
		 */
		if (idx_size != 4*256 + nr * 24 + 20 + 20) {
			git_futils_mmap_free(&p->index_map);
287
			return packfile_error("index is corrupted");
288 289 290 291
		}
	} else if (version == 2) {
		/*
		 * Minimum size:
Vicent Marti committed
292 293 294 295 296 297 298
		 * - 8 bytes of header
		 * - 256 index entries 4 bytes each
		 * - 20-byte sha1 entry * nr
		 * - 4-byte crc entry * nr
		 * - 4-byte offset entry * nr
		 * - 20-byte SHA1 of the packfile
		 * - 20-byte SHA1 file checksum
299 300 301 302 303 304 305 306 307 308 309 310
		 * And after the 4-byte offset table might be a
		 * variable sized table containing 8-byte entries
		 * for offsets larger than 2^31.
		 */
		unsigned long min_size = 8 + 4*256 + nr*(20 + 4 + 4) + 20 + 20;
		unsigned long max_size = min_size;

		if (nr)
			max_size += (nr - 1)*8;

		if (idx_size < min_size || idx_size > max_size) {
			git_futils_mmap_free(&p->index_map);
311
			return packfile_error("wrong index size");
312 313 314 315
		}
	}

	p->num_objects = nr;
316
	p->index_version = version;
317
	return 0;
318 319 320 321
}

static int pack_index_open(struct git_pack_file *p)
{
322
	int error = 0;
323 324
	size_t name_len;
	git_buf idx_name = GIT_BUF_INIT;
325

326
	if (p->index_version > -1)
Russell Belfer committed
327
		return 0;
328

329 330
	name_len = strlen(p->pack_name);
	assert(name_len > strlen(".pack")); /* checked by git_pack_file alloc */
331

332 333 334 335 336
	git_buf_grow(&idx_name, name_len);
	git_buf_put(&idx_name, p->pack_name, name_len - strlen(".pack"));
	git_buf_puts(&idx_name, ".idx");
	if (git_buf_oom(&idx_name)) {
		giterr_set_oom();
Russell Belfer committed
337
		return -1;
338
	}
339

340
	if ((error = git_mutex_lock(&p->lock)) < 0) {
341
		git_buf_free(&idx_name);
Russell Belfer committed
342
		return error;
343
	}
Russell Belfer committed
344

345
	if (p->index_version == -1)
346
		error = pack_index_check(idx_name.ptr, p);
347

348
	git_buf_free(&idx_name);
349

350 351
	git_mutex_unlock(&p->lock);

352
	return error;
353 354 355 356
}

static unsigned char *pack_window_open(
		struct git_pack_file *p,
357
		git_mwindow **w_cursor,
358
		git_off_t offset,
359 360
		unsigned int *left)
{
361
	if (p->mwf.fd == -1 && packfile_open(p) < 0)
362 363 364 365 366 367
		return NULL;

	/* Since packfiles end in a hash of their content and it's
	 * pointless to ask for an offset into the middle of that
	 * hash, and the pack_window_contains function above wouldn't match
	 * don't allow an offset too close to the end of the file.
368 369 370
	 *
	 * Don't allow a negative offset, as that means we've wrapped
	 * around.
371 372 373
	 */
	if (offset > (p->mwf.size - 20))
		return NULL;
374 375
	if (offset < 0)
		return NULL;
376 377 378 379

	return git_mwindow_open(&p->mwf, w_cursor, offset, 20, left);
 }

380 381 382 383 384 385 386 387
/*
 * The per-object header is a pretty dense thing, which is
 *  - first byte: low four bits are "size",
 *    then three bits of "type",
 *    with the high bit being "size continues".
 *  - each byte afterwards: low seven bits are size continuation,
 *    with the high bit being "size continues"
 */
388
size_t git_packfile__object_header(unsigned char *hdr, size_t size, git_otype type)
389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407
{
	unsigned char *hdr_base;
	unsigned char c;

	assert(type >= GIT_OBJ_COMMIT && type <= GIT_OBJ_REF_DELTA);

	/* TODO: add support for chunked objects; see git.git 6c0d19b1 */

	c = (unsigned char)((type << 4) | (size & 15));
	size >>= 4;
	hdr_base = hdr;

	while (size) {
		*hdr++ = c | 0x80;
		c = size & 0x7f;
		size >>= 7;
	}
	*hdr++ = c;

408
	return (hdr - hdr_base);
409 410 411
}


412 413
static int packfile_unpack_header1(
		unsigned long *usedp,
414 415 416 417 418 419 420 421
		size_t *sizep,
		git_otype *type,
		const unsigned char *buf,
		unsigned long len)
{
	unsigned shift;
	unsigned long size, c;
	unsigned long used = 0;
422

423 424 425 426 427
	c = buf[used++];
	*type = (c >> 4) & 7;
	size = c & 15;
	shift = 4;
	while (c & 0x80) {
428 429
		if (len <= used) {
			giterr_set(GITERR_ODB, "buffer too small");
430
			return GIT_EBUFS;
431
		}
432 433 434

		if (bitsizeof(long) <= shift) {
			*usedp = 0;
435
			giterr_set(GITERR_ODB, "packfile corrupted");
436 437
			return -1;
		}
438 439 440 441 442 443 444

		c = buf[used++];
		size += (c & 0x7f) << shift;
		shift += 7;
	}

	*sizep = (size_t)size;
445 446
	*usedp = used;
	return 0;
447 448 449 450 451 452 453
}

int git_packfile_unpack_header(
		size_t *size_p,
		git_otype *type_p,
		git_mwindow_file *mwf,
		git_mwindow **w_curs,
454
		git_off_t *curpos)
455 456 457 458
{
	unsigned char *base;
	unsigned int left;
	unsigned long used;
459
	int ret;
460 461

	/* pack_window_open() assures us we have [base, base + 20) available
Vicent Marti committed
462 463
	 * as a range that we can look at at. (Its actually the hash
	 * size that is assured.) With our object header encoding
464 465 466
	 * the maximum deflated object size is 2^137, which is just
	 * insane, so we know won't exceed what we have been given.
	 */
467
/*	base = pack_window_open(p, w_curs, *curpos, &left); */
468 469
	base = git_mwindow_open(mwf, w_curs, *curpos, 20, &left);
	if (base == NULL)
470
		return GIT_EBUFS;
471

472
	ret = packfile_unpack_header1(&used, size_p, type_p, base, left);
473
	git_mwindow_close(w_curs);
474
	if (ret == GIT_EBUFS)
475 476
		return ret;
	else if (ret < 0)
477
		return packfile_error("header length is zero");
478 479

	*curpos += used;
480
	return 0;
481 482
}

483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501
int git_packfile_resolve_header(
		size_t *size_p,
		git_otype *type_p,
		struct git_pack_file *p,
		git_off_t offset)
{
	git_mwindow *w_curs = NULL;
	git_off_t curpos = offset;
	size_t size;
	git_otype type;
	git_off_t base_offset;
	int error;

	error = git_packfile_unpack_header(&size, &type, &p->mwf, &w_curs, &curpos);
	if (error < 0)
		return error;

	if (type == GIT_OBJ_OFS_DELTA || type == GIT_OBJ_REF_DELTA) {
		size_t base_size;
502 503
		git_packfile_stream stream;

504 505
		base_offset = get_delta_base(p, &w_curs, &curpos, type, offset);
		git_mwindow_close(&w_curs);
506
		if ((error = git_packfile_stream_open(&stream, p, curpos)) < 0)
507
			return error;
508
		error = git_delta_read_header_fromstream(&base_size, size_p, &stream);
509
		git_packfile_stream_free(&stream);
510 511
		if (error < 0)
			return error;
512
	} else {
513
		*size_p = size;
514 515
		base_offset = 0;
	}
516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531

	while (type == GIT_OBJ_OFS_DELTA || type == GIT_OBJ_REF_DELTA) {
		curpos = base_offset;
		error = git_packfile_unpack_header(&size, &type, &p->mwf, &w_curs, &curpos);
		if (error < 0)
			return error;
		if (type != GIT_OBJ_OFS_DELTA && type != GIT_OBJ_REF_DELTA)
			break;
		base_offset = get_delta_base(p, &w_curs, &curpos, type, base_offset);
		git_mwindow_close(&w_curs);
	}
	*type_p = type;

	return error;
}

532 533
#define SMALL_STACK_SIZE 64

534 535 536 537 538 539
/**
 * Generate the chain of dependencies which we need to get to the
 * object at `off`. `chain` is used a stack, popping gives the right
 * order to apply deltas on. If an object is found in the pack's base
 * cache, we stop calculating there.
 */
540 541 542 543
static int pack_dependency_chain(git_dependency_chain *chain_out,
				 git_pack_cache_entry **cached_out, git_off_t *cached_off,
				 struct pack_chain_elem *small_stack, size_t *stack_sz,
				 struct git_pack_file *p, git_off_t obj_offset)
544 545 546 547
{
	git_dependency_chain chain = GIT_ARRAY_INIT;
	git_mwindow *w_curs = NULL;
	git_off_t curpos = obj_offset, base_offset;
548 549
	int error = 0, use_heap = 0;
	size_t size, elem_pos;
550 551
	git_otype type;

552
	elem_pos = 0;
553 554 555 556 557 558 559 560 561 562 563
	while (true) {
		struct pack_chain_elem *elem;
		git_pack_cache_entry *cached = NULL;

		/* if we have a base cached, we can stop here instead */
		if ((cached = cache_get(&p->bases, obj_offset)) != NULL) {
			*cached_out = cached;
			*cached_off = obj_offset;
			break;
		}

564 565 566 567 568 569 570 571 572
		/* if we run out of space on the small stack, use the array */
		if (elem_pos == SMALL_STACK_SIZE) {
			git_array_init_to_size(chain, elem_pos);
			GITERR_CHECK_ARRAY(chain);
			memcpy(chain.ptr, small_stack, elem_pos * sizeof(struct pack_chain_elem));
			chain.size = elem_pos;
			use_heap = 1;
		}

573
		curpos = obj_offset;
574 575 576 577 578 579 580 581
		if (!use_heap) {
			elem = &small_stack[elem_pos];
		} else {
			elem = git_array_alloc(chain);
			if (!elem) {
				error = -1;
				goto on_error;
			}
582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615
		}

		elem->base_key = obj_offset;

		error = git_packfile_unpack_header(&size, &type, &p->mwf, &w_curs, &curpos);

		if (error < 0)
			goto on_error;

		elem->offset = curpos;
		elem->size = size;
		elem->type = type;
		elem->base_key = obj_offset;

		if (type != GIT_OBJ_OFS_DELTA && type != GIT_OBJ_REF_DELTA)
			break;

		base_offset = get_delta_base(p, &w_curs, &curpos, type, obj_offset);
		git_mwindow_close(&w_curs);

		if (base_offset == 0) {
			error = packfile_error("delta offset is zero");
			goto on_error;
		}
		if (base_offset < 0) { /* must actually be an error code */
			error = (int)base_offset;
			goto on_error;
		}

		/* we need to pass the pos *after* the delta-base bit */
		elem->offset = curpos;

		/* go through the loop again, but with the new object */
		obj_offset = base_offset;
616
		elem_pos++;
617 618
	}

619 620
	
	*stack_sz = elem_pos + 1;
621 622 623 624 625 626 627 628
	*chain_out = chain;
	return error;

on_error:
	git_array_clear(chain);
	return error;
}

629
int git_packfile_unpack(
630 631 632
	git_rawobj *obj,
	struct git_pack_file *p,
	git_off_t *obj_offset)
633 634
{
	git_mwindow *w_curs = NULL;
635
	git_off_t curpos = *obj_offset;
636 637
	int error, free_base = 0;
	git_dependency_chain chain = GIT_ARRAY_INIT;
638
	struct pack_chain_elem *elem = NULL, *stack;
639
	git_pack_cache_entry *cached = NULL;
640
	struct pack_chain_elem small_stack[SMALL_STACK_SIZE];
641
	size_t stack_size = 0, elem_pos, alloclen;
642
	git_otype base_type;
643 644 645 646 647

	/*
	 * TODO: optionally check the CRC on the packfile
	 */

648
	error = pack_dependency_chain(&chain, &cached, obj_offset, small_stack, &stack_size, p, *obj_offset);
649 650 651
	if (error < 0)
		return error;

652 653 654 655
	obj->data = NULL;
	obj->len = 0;
	obj->type = GIT_OBJ_BAD;

656 657 658 659
	/* let's point to the right stack */
	stack = chain.ptr ? chain.ptr : small_stack;

	elem_pos = stack_size;
660
	if (cached) {
661
		memcpy(obj, &cached->raw, sizeof(git_rawobj));
662
		base_type = obj->type;
663
		elem_pos--;	/* stack_size includes the base, which isn't actually there */
664
	} else {
665
		elem = &stack[--elem_pos];
666
		base_type = elem->type;
667
	}
668

669 670 671 672 673
	switch (base_type) {
	case GIT_OBJ_COMMIT:
	case GIT_OBJ_TREE:
	case GIT_OBJ_BLOB:
	case GIT_OBJ_TAG:
674
		if (!cached) {
675 676 677
			curpos = elem->offset;
			error = packfile_unpack_compressed(obj, p, &w_curs, &curpos, elem->size, elem->type);
			git_mwindow_close(&w_curs);
678
			base_type = elem->type;
679 680 681 682 683 684 685 686 687 688 689 690 691
		}
		if (error < 0)
			goto cleanup;
		break;
	case GIT_OBJ_OFS_DELTA:
	case GIT_OBJ_REF_DELTA:
		error = packfile_error("dependency chain ends in a delta");
		goto cleanup;
	default:
		error = packfile_error("invalid packfile type in header");
		goto cleanup;
	}

692
	/*
693
	 * Finding the object we want a cached base element is
694 695 696 697
	 * problematic, as we need to make sure we don't accidentally
	 * give the caller the cached object, which it would then feel
	 * free to free, so we need to copy the data.
	 */
698
	if (cached && stack_size == 1) {
699
		void *data = obj->data;
700

701 702
		GITERR_CHECK_ALLOC_ADD(&alloclen, obj->len, 1);
		obj->data = git__malloc(alloclen);
703
		GITERR_CHECK_ALLOC(obj->data);
704

705 706 707 708 709
		memcpy(obj->data, data, obj->len + 1);
		git_atomic_dec(&cached->refcount);
		goto cleanup;
	}

710
	/* we now apply each consecutive delta until we run out */
711
	while (elem_pos > 0 && !error) {
712 713
		git_rawobj base, delta;

714 715 716 717 718
		/*
		 * We can now try to add the base to the cache, as
		 * long as it's not already the cached one.
		 */
		if (!cached)
719
			free_base = !!cache_add(&cached, &p->bases, obj, elem->base_key);
720

721
		elem = &stack[elem_pos - 1];
722 723 724 725 726 727 728 729 730 731 732 733 734
		curpos = elem->offset;
		error = packfile_unpack_compressed(&delta, p, &w_curs, &curpos, elem->size, elem->type);
		git_mwindow_close(&w_curs);

		if (error < 0)
			break;

		/* the current object becomes the new base, on which we apply the delta */
		base = *obj;
		obj->data = NULL;
		obj->len = 0;
		obj->type = GIT_OBJ_BAD;

735
		error = git_delta_apply(&obj->data, &obj->len, base.data, base.len, delta.data, delta.len);
736
		obj->type = base_type;
737

738 739 740 741 742 743
		/*
		 * We usually don't want to free the base at this
		 * point, as we put it into the cache in the previous
		 * iteration. free_base lets us know that we got the
		 * base object directly from the packfile, so we can free it.
		 */
744
		git__free(delta.data);
745 746 747 748 749 750 751 752 753
		if (free_base) {
			free_base = 0;
			git__free(base.data);
		}

		if (cached) {
			git_atomic_dec(&cached->refcount);
			cached = NULL;
		}
754 755 756

		if (error < 0)
			break;
757

758
		elem_pos--;
759 760
	}

761
cleanup:
762
	if (error < 0) {
763
		git__free(obj->data);
764 765 766
		if (cached)
			git_atomic_dec(&cached->refcount);
	}
767

768
	if (elem)
769
		*obj_offset = curpos;
770

771
	git_array_clear(chain);
772
	return error;
773 774
}

Russell Belfer committed
775 776 777 778 779 780 781 782 783 784 785 786
static void *use_git_alloc(void *opaq, unsigned int count, unsigned int size)
{
	GIT_UNUSED(opaq);
	return git__calloc(count, size);
}

static void use_git_free(void *opaq, void *ptr)
{
	GIT_UNUSED(opaq);
	git__free(ptr);
}

787 788 789 790 791 792 793 794 795 796 797 798 799
int git_packfile_stream_open(git_packfile_stream *obj, struct git_pack_file *p, git_off_t curpos)
{
	int st;

	memset(obj, 0, sizeof(git_packfile_stream));
	obj->curpos = curpos;
	obj->p = p;
	obj->zstream.zalloc = use_git_alloc;
	obj->zstream.zfree = use_git_free;
	obj->zstream.next_in = Z_NULL;
	obj->zstream.next_out = Z_NULL;
	st = inflateInit(&obj->zstream);
	if (st != Z_OK) {
800
		giterr_set(GITERR_ZLIB, "failed to init packfile stream");
801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820
		return -1;
	}

	return 0;
}

ssize_t git_packfile_stream_read(git_packfile_stream *obj, void *buffer, size_t len)
{
	unsigned char *in;
	size_t written;
	int st;

	if (obj->done)
		return 0;

	in = pack_window_open(obj->p, &obj->mw, obj->curpos, &obj->zstream.avail_in);
	if (in == NULL)
		return GIT_EBUFS;

	obj->zstream.next_out = buffer;
821
	obj->zstream.avail_out = (unsigned int)len;
822 823 824 825 826 827 828 829 830
	obj->zstream.next_in = in;

	st = inflate(&obj->zstream, Z_SYNC_FLUSH);
	git_mwindow_close(&obj->mw);

	obj->curpos += obj->zstream.next_in - in;
	written = len - obj->zstream.avail_out;

	if (st != Z_OK && st != Z_STREAM_END) {
831
		giterr_set(GITERR_ZLIB, "error reading from the zlib stream");
832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851
		return -1;
	}

	if (st == Z_STREAM_END)
		obj->done = 1;


	/* If we didn't write anything out but we're not done, we need more data */
	if (!written && st != Z_STREAM_END)
		return GIT_EBUFS;

	return written;

}

void git_packfile_stream_free(git_packfile_stream *obj)
{
	inflateEnd(&obj->zstream);
}

852
static int packfile_unpack_compressed(
853 854 855 856 857 858
	git_rawobj *obj,
	struct git_pack_file *p,
	git_mwindow **w_curs,
	git_off_t *curpos,
	size_t size,
	git_otype type)
859
{
860
	size_t buf_size;
861 862 863 864
	int st;
	z_stream stream;
	unsigned char *buffer, *in;

865 866
	GITERR_CHECK_ALLOC_ADD(&buf_size, size, 1);
	buffer = git__calloc(1, buf_size);
867
	GITERR_CHECK_ALLOC(buffer);
868 869 870

	memset(&stream, 0, sizeof(stream));
	stream.next_out = buffer;
871
	stream.avail_out = (uInt)buf_size;
Russell Belfer committed
872 873
	stream.zalloc = use_git_alloc;
	stream.zfree = use_git_free;
874 875 876

	st = inflateInit(&stream);
	if (st != Z_OK) {
877
		git__free(buffer);
878
		giterr_set(GITERR_ZLIB, "failed to init zlib stream on unpack");
879

880
		return -1;
881 882 883
	}

	do {
884
		in = pack_window_open(p, w_curs, *curpos, &stream.avail_in);
885 886
		stream.next_in = in;
		st = inflate(&stream, Z_FINISH);
887
		git_mwindow_close(w_curs);
888 889 890 891

		if (!stream.avail_out)
			break; /* the payload is larger than it should be */

892 893 894
		if (st == Z_BUF_ERROR && in == NULL) {
			inflateEnd(&stream);
			git__free(buffer);
895
			return GIT_EBUFS;
896 897
		}

898
		*curpos += stream.next_in - in;
899 900 901 902 903
	} while (st == Z_OK || st == Z_BUF_ERROR);

	inflateEnd(&stream);

	if ((st != Z_STREAM_END) || stream.total_out != size) {
904
		git__free(buffer);
905
		giterr_set(GITERR_ZLIB, "error inflating zlib stream");
906
		return -1;
907 908 909 910 911
	}

	obj->type = type;
	obj->len = size;
	obj->data = buffer;
912
	return 0;
913 914
}

915 916 917 918
/*
 * curpos is where the data starts, delta_obj_offset is the where the
 * header starts
 */
919 920 921 922 923 924
git_off_t get_delta_base(
	struct git_pack_file *p,
	git_mwindow **w_curs,
	git_off_t *curpos,
	git_otype type,
	git_off_t delta_obj_offset)
925
{
926 927
	unsigned int left = 0;
	unsigned char *base_info;
928
	git_off_t base_offset;
929 930
	git_oid unused;

931 932 933
	base_info = pack_window_open(p, w_curs, *curpos, &left);
	/* Assumption: the only reason this would fail is because the file is too small */
	if (base_info == NULL)
934
		return GIT_EBUFS;
935 936
	/* pack_window_open() assured us we have [base_info, base_info + 20)
	 * as a range that we can look at without walking off the
Vicent Marti committed
937 938
	 * end of the mapped window. Its actually the hash size
	 * that is assured. An OFS_DELTA longer than the hash size
939 940 941 942 943 944 945
	 * is stupid, as then a REF_DELTA would be smaller to store.
	 */
	if (type == GIT_OBJ_OFS_DELTA) {
		unsigned used = 0;
		unsigned char c = base_info[used++];
		base_offset = c & 127;
		while (c & 128) {
946
			if (left <= used)
947
				return GIT_EBUFS;
948 949
			base_offset += 1;
			if (!base_offset || MSB(base_offset, 7))
Vicent Marti committed
950
				return 0; /* overflow */
951 952 953 954 955
			c = base_info[used++];
			base_offset = (base_offset << 7) + (c & 127);
		}
		base_offset = delta_obj_offset - base_offset;
		if (base_offset <= 0 || base_offset >= delta_obj_offset)
Vicent Marti committed
956
			return 0; /* out of bound */
957 958
		*curpos += used;
	} else if (type == GIT_OBJ_REF_DELTA) {
959 960
		/* If we have the cooperative cache, search in it first */
		if (p->has_cache) {
961 962
			khiter_t k;
			git_oid oid;
963

964 965 966
			git_oid_fromraw(&oid, base_info);
			k = kh_get(oid, p->idx_cache, &oid);
			if (k != kh_end(p->idx_cache)) {
967
				*curpos += 20;
968
				return ((struct git_pack_entry *)kh_value(p->idx_cache, k))->offset;
969 970 971 972 973 974
			} else {
				/* If we're building an index, don't try to find the pack
				 * entry; we just haven't seen it yet.  We'll make
				 * progress again in the next loop.
				 */
				return GIT_PASSTHROUGH;
975 976
			}
		}
977

978
		/* The base entry _must_ be in the same pack */
979 980
		if (pack_entry_find_offset(&base_offset, &unused, p, (git_oid *)base_info, GIT_OID_HEXSZ) < 0)
			return packfile_error("base entry delta is not in the same pack");
981 982 983 984 985 986
		*curpos += 20;
	} else
		return 0;

	return base_offset;
}
987 988 989 990 991 992 993

/***********************************************************
 *
 * PACKFILE METHODS
 *
 ***********************************************************/

994 995 996 997 998 999 1000 1001 1002 1003 1004 1005
void git_packfile_close(struct git_pack_file *p, bool unlink_packfile)
{
	if (p->mwf.fd >= 0) {
		git_mwindow_free_all_locked(&p->mwf);
		p_close(p->mwf.fd);
		p->mwf.fd = -1;
	}

	if (unlink_packfile)
		p_unlink(p->pack_name);
}

1006
void git_packfile_free(struct git_pack_file *p)
1007
{
1008 1009 1010
	if (!p)
		return;

1011
	cache_free(&p->bases);
1012

1013
	git_packfile_close(p, false);
1014 1015 1016

	pack_index_free(p);

1017
	git__free(p->bad_object_sha1);
1018 1019

	git_mutex_free(&p->lock);
1020
	git_mutex_free(&p->bases.lock);
1021
	git__free(p);
1022 1023 1024 1025 1026 1027 1028 1029 1030
}

static int packfile_open(struct git_pack_file *p)
{
	struct stat st;
	struct git_pack_header hdr;
	git_oid sha1;
	unsigned char *idx_sha1;

1031
	if (p->index_version == -1 && pack_index_open(p) < 0)
1032
		return git_odb__error_notfound("failed to open packfile", NULL, 0);
1033

1034 1035 1036 1037 1038 1039 1040 1041 1042
	/* if mwf opened by another thread, return now */
	if (git_mutex_lock(&p->lock) < 0)
		return packfile_error("failed to get lock for open");

	if (p->mwf.fd >= 0) {
		git_mutex_unlock(&p->lock);
		return 0;
	}

1043
	/* TODO: open with noatime */
1044
	p->mwf.fd = git_futils_open_ro(p->pack_name);
1045 1046
	if (p->mwf.fd < 0)
		goto cleanup;
1047

1048 1049 1050
	if (p_fstat(p->mwf.fd, &st) < 0 ||
		git_mwindow_file_register(&p->mwf) < 0)
		goto cleanup;
1051 1052 1053 1054 1055

	/* If we created the struct before we had the pack we lack size. */
	if (!p->mwf.size) {
		if (!S_ISREG(st.st_mode))
			goto cleanup;
1056
		p->mwf.size = (git_off_t)st.st_size;
1057 1058 1059 1060 1061 1062 1063 1064 1065
	} else if (p->mwf.size != st.st_size)
		goto cleanup;

#if 0
	/* We leave these file descriptors open with sliding mmap;
	 * there is no point keeping them open across exec(), though.
	 */
	fd_flag = fcntl(p->mwf.fd, F_GETFD, 0);
	if (fd_flag < 0)
1066
		goto cleanup;
1067 1068 1069

	fd_flag |= FD_CLOEXEC;
	if (fcntl(p->pack_fd, F_SETFD, fd_flag) == -1)
1070
		goto cleanup;
1071 1072 1073
#endif

	/* Verify we recognize this pack file format. */
1074 1075 1076
	if (p_read(p->mwf.fd, &hdr, sizeof(hdr)) < 0 ||
		hdr.hdr_signature != htonl(PACK_SIGNATURE) ||
		!pack_version_ok(hdr.hdr_version))
1077 1078 1079
		goto cleanup;

	/* Verify the pack matches its index. */
1080 1081 1082
	if (p->num_objects != ntohl(hdr.hdr_entries) ||
		p_lseek(p->mwf.fd, p->mwf.size - GIT_OID_RAWSZ, SEEK_SET) == -1 ||
		p_read(p->mwf.fd, sha1.id, GIT_OID_RAWSZ) < 0)
1083 1084 1085 1086
		goto cleanup;

	idx_sha1 = ((unsigned char *)p->index_map.data) + p->index_map.len - 40;

1087 1088 1089 1090 1091
	if (git_oid__cmp(&sha1, (git_oid *)idx_sha1) != 0)
		goto cleanup;

	git_mutex_unlock(&p->lock);
	return 0;
1092 1093

cleanup:
1094
	giterr_set(GITERR_OS, "invalid packfile '%s'", p->pack_name);
1095

1096 1097
	if (p->mwf.fd >= 0)
		p_close(p->mwf.fd);
1098
	p->mwf.fd = -1;
1099 1100 1101

	git_mutex_unlock(&p->lock);

1102
	return -1;
1103 1104
}

1105 1106 1107 1108 1109 1110 1111 1112
int git_packfile__name(char **out, const char *path)
{
	size_t path_len;
	git_buf buf = GIT_BUF_INIT;

	path_len = strlen(path);

	if (path_len < strlen(".idx"))
1113
		return git_odb__error_notfound("invalid packfile path", NULL, 0);
1114 1115 1116 1117 1118 1119 1120 1121

	if (git_buf_printf(&buf, "%.*s.pack", (int)(path_len - strlen(".idx")), path) < 0)
		return -1;

	*out = git_buf_detach(&buf);
	return 0;
}

1122
int git_packfile_alloc(struct git_pack_file **pack_out, const char *path)
1123 1124 1125
{
	struct stat st;
	struct git_pack_file *p;
1126
	size_t path_len = path ? strlen(path) : 0, alloc_len;
1127 1128

	*pack_out = NULL;
1129

1130
	if (path_len < strlen(".idx"))
1131
		return git_odb__error_notfound("invalid packfile path", NULL, 0);
1132

1133 1134
	GITERR_CHECK_ALLOC_ADD(&alloc_len, sizeof(*p), path_len);
	GITERR_CHECK_ALLOC_ADD(&alloc_len, alloc_len, 2);
1135

1136
	p = git__calloc(1, alloc_len);
1137
	GITERR_CHECK_ALLOC(p);
1138

1139 1140
	memcpy(p->pack_name, path, path_len + 1);

1141 1142 1143 1144
	/*
	 * Make sure a corresponding .pack file exists and that
	 * the index looks sane.
	 */
1145 1146 1147 1148 1149 1150
	if (git__suffixcmp(path, ".idx") == 0) {
		size_t root_len = path_len - strlen(".idx");

		memcpy(p->pack_name + root_len, ".keep", sizeof(".keep"));
		if (git_path_exists(p->pack_name) == true)
			p->pack_keep = 1;
1151

1152 1153
		memcpy(p->pack_name + root_len, ".pack", sizeof(".pack"));
	}
1154

1155
	if (p_stat(p->pack_name, &st) < 0 || !S_ISREG(st.st_mode)) {
1156
		git__free(p);
1157
		return git_odb__error_notfound("packfile not found", NULL, 0);
1158 1159 1160 1161 1162
	}

	/* ok, it looks sane as far as we can check without
	 * actually mapping the pack file.
	 */
1163
	p->mwf.fd = -1;
1164
	p->mwf.size = st.st_size;
1165 1166
	p->pack_local = 1;
	p->mtime = (git_time_t)st.st_mtime;
1167
	p->index_version = -1;
1168

Russell Belfer committed
1169
	if (git_mutex_init(&p->lock)) {
1170
		giterr_set(GITERR_OS, "failed to initialize packfile mutex");
Russell Belfer committed
1171 1172 1173
		git__free(p);
		return -1;
	}
1174

1175 1176 1177 1178 1179
	if (cache_init(&p->bases) < 0) {
		git__free(p);
		return -1;
	}

1180
	*pack_out = p;
1181 1182

	return 0;
1183 1184 1185 1186 1187 1188 1189 1190
}

/***********************************************************
 *
 * PACKFILE ENTRY SEARCH INTERNALS
 *
 ***********************************************************/

1191
static git_off_t nth_packed_object_offset(const struct git_pack_file *p, uint32_t n)
1192 1193
{
	const unsigned char *index = p->index_map.data;
1194
	const unsigned char *end = index + p->index_map.len;
1195 1196 1197 1198 1199 1200 1201 1202 1203 1204
	index += 4 * 256;
	if (p->index_version == 1) {
		return ntohl(*((uint32_t *)(index + 24 * n)));
	} else {
		uint32_t off;
		index += 8 + p->num_objects * (20 + 4);
		off = ntohl(*((uint32_t *)(index + 4 * n)));
		if (!(off & 0x80000000))
			return off;
		index += p->num_objects * 4 + (off & 0x7fffffff) * 8;
1205 1206 1207 1208 1209

		/* Make sure we're not being sent out of bounds */
		if (index >= end - 8)
			return -1;

1210
		return (((uint64_t)ntohl(*((uint32_t *)(index + 0)))) << 32) |
Vicent Marti committed
1211
					ntohl(*((uint32_t *)(index + 4)));
1212 1213 1214
	}
}

1215 1216 1217 1218
static int git__memcmp4(const void *a, const void *b) {
	return memcmp(a, b, 4);
}

1219
int git_pack_foreach_entry(
1220
	struct git_pack_file *p,
1221
	git_odb_foreach_cb cb,
1222
	void *data)
1223 1224 1225
{
	const unsigned char *index = p->index_map.data, *current;
	uint32_t i;
1226
	int error = 0;
1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242

	if (index == NULL) {
		if ((error = pack_index_open(p)) < 0)
			return error;

		assert(p->index_map.data);

		index = p->index_map.data;
	}

	if (p->index_version > 1) {
		index += 8;
	}

	index += 4 * 256;

1243 1244
	if (p->oids == NULL) {
		git_vector offsets, oids;
1245

1246 1247 1248 1249 1250
		if ((error = git_vector_init(&oids, p->num_objects, NULL)))
			return error;

		if ((error = git_vector_init(&offsets, p->num_objects, git__memcmp4)))
			return error;
1251

1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265
		if (p->index_version > 1) {
			const unsigned char *off = index + 24 * p->num_objects;
			for (i = 0; i < p->num_objects; i++)
				git_vector_insert(&offsets, (void*)&off[4 * i]);
			git_vector_sort(&offsets);
			git_vector_foreach(&offsets, i, current)
				git_vector_insert(&oids, (void*)&index[5 * (current - off)]);
		} else {
			for (i = 0; i < p->num_objects; i++)
				git_vector_insert(&offsets, (void*)&index[24 * i]);
			git_vector_sort(&offsets);
			git_vector_foreach(&offsets, i, current)
				git_vector_insert(&oids, (void*)&current[4]);
		}
1266

1267
		git_vector_free(&offsets);
1268
		p->oids = (git_oid **)git_vector_detach(NULL, NULL, &oids);
1269 1270
	}

1271
	for (i = 0; i < p->num_objects; i++)
1272 1273
		if ((error = cb(p->oids[i], data)) != 0)
			return giterr_set_after_callback(error);
1274

1275
	return error;
1276 1277
}

1278
static int pack_entry_find_offset(
1279 1280 1281 1282
	git_off_t *offset_out,
	git_oid *found_oid,
	struct git_pack_file *p,
	const git_oid *short_oid,
1283
	size_t len)
1284
{
1285 1286
	const uint32_t *level1_ofs;
	const unsigned char *index;
1287 1288
	unsigned hi, lo, stride;
	int pos, found = 0;
1289
	git_off_t offset;
1290 1291 1292 1293
	const unsigned char *current = 0;

	*offset_out = 0;

1294
	if (p->index_version == -1) {
1295
		int error;
1296

1297 1298 1299 1300
		if ((error = pack_index_open(p)) < 0)
			return error;
		assert(p->index_map.data);
	}
1301

1302 1303 1304
	index = p->index_map.data;
	level1_ofs = p->index_map.data;

1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325
	if (p->index_version > 1) {
		level1_ofs += 2;
		index += 8;
	}

	index += 4 * 256;
	hi = ntohl(level1_ofs[(int)short_oid->id[0]]);
	lo = ((short_oid->id[0] == 0x0) ? 0 : ntohl(level1_ofs[(int)short_oid->id[0] - 1]));

	if (p->index_version > 1) {
		stride = 20;
	} else {
		stride = 24;
		index += 4;
	}

#ifdef INDEX_DEBUG_LOOKUP
	printf("%02x%02x%02x... lo %u hi %u nr %d\n",
		short_oid->id[0], short_oid->id[1], short_oid->id[2], lo, hi, p->num_objects);
#endif

1326
#ifdef GIT_USE_LOOKUP
Vicent Marti committed
1327
	pos = sha1_entry_pos(index, stride, 0, lo, hi, p->num_objects, short_oid->id);
1328 1329 1330
#else
	pos = sha1_position(index, stride, lo, hi, short_oid->id);
#endif
1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342

	if (pos >= 0) {
		/* An object matching exactly the oid was found */
		found = 1;
		current = index + pos * stride;
	} else {
		/* No object was found */
		/* pos refers to the object with the "closest" oid to short_oid */
		pos = - 1 - pos;
		if (pos < (int)p->num_objects) {
			current = index + pos * stride;

Russell Belfer committed
1343
			if (!git_oid_ncmp(short_oid, (const git_oid *)current, len))
1344 1345 1346 1347
				found = 1;
		}
	}

1348
	if (found && len != GIT_OID_HEXSZ && pos + 1 < (int)p->num_objects) {
1349 1350 1351 1352 1353 1354 1355 1356
		/* Check for ambiguousity */
		const unsigned char *next = current + stride;

		if (!git_oid_ncmp(short_oid, (const git_oid *)next, len)) {
			found = 2;
		}
	}

1357
	if (!found)
1358
		return git_odb__error_notfound("failed to find offset for pack entry", short_oid, len);
1359 1360
	if (found > 1)
		return git_odb__error_ambiguous("found multiple offsets for pack entry");
1361

1362 1363 1364 1365 1366 1367
	if ((offset = nth_packed_object_offset(p, pos)) < 0) {
		giterr_set(GITERR_ODB, "packfile index is corrupt");
		return -1;
	}

	*offset_out = offset;
1368
	git_oid_fromraw(found_oid, current);
1369 1370

#ifdef INDEX_DEBUG_LOOKUP
1371
	{
1372 1373 1374 1375 1376
		unsigned char hex_sha1[GIT_OID_HEXSZ + 1];
		git_oid_fmt(hex_sha1, found_oid);
		hex_sha1[GIT_OID_HEXSZ] = '\0';
		printf("found lo=%d %s\n", lo, hex_sha1);
	}
1377
#endif
1378

1379
	return 0;
1380 1381 1382 1383 1384 1385
}

int git_pack_entry_find(
		struct git_pack_entry *e,
		struct git_pack_file *p,
		const git_oid *short_oid,
1386
		size_t len)
1387
{
1388
	git_off_t offset;
1389 1390 1391 1392 1393 1394 1395 1396
	git_oid found_oid;
	int error;

	assert(p);

	if (len == GIT_OID_HEXSZ && p->num_bad_objects) {
		unsigned i;
		for (i = 0; i < p->num_bad_objects; i++)
1397
			if (git_oid__cmp(short_oid, &p->bad_object_sha1[i]) == 0)
1398
				return packfile_error("bad object found in packfile");
1399 1400 1401
	}

	error = pack_entry_find_offset(&offset, &found_oid, p, short_oid, len);
1402 1403
	if (error < 0)
		return error;
1404 1405 1406 1407

	/* we found a unique entry in the index;
	 * make sure the packfile backing the index
	 * still exists on disk */
1408 1409
	if (p->mwf.fd == -1 && (error = packfile_open(p)) < 0)
		return error;
1410 1411 1412 1413 1414

	e->offset = offset;
	e->p = p;

	git_oid_cpy(&e->sha1, &found_oid);
1415
	return 0;
1416
}