pack.c 31.9 KB
Newer Older
1
/*
Edward Thomson committed
2
 * Copyright (C) the libgit2 contributors. All rights reserved.
3
 *
Vicent Marti committed
4 5
 * This file is part of libgit2, distributed under the GNU GPL v2 with
 * a Linking Exception. For full terms see the included COPYING file.
6 7
 */

8
#include "common.h"
9 10 11
#include "odb.h"
#include "pack.h"
#include "delta-apply.h"
12
#include "sha1_lookup.h"
13 14
#include "mwindow.h"
#include "fileops.h"
15
#include "oid.h"
16

17
#include <zlib.h>
18

19
static int packfile_open(struct git_pack_file *p);
20
static git_off_t nth_packed_object_offset(const struct git_pack_file *p, uint32_t n);
21 22 23 24
int packfile_unpack_compressed(
		git_rawobj *obj,
		struct git_pack_file *p,
		git_mwindow **w_curs,
25
		git_off_t *curpos,
26 27 28 29 30
		size_t size,
		git_otype type);

/* Can find the offset of an object given
 * a prefix of an identifier.
31
 * Throws GIT_EAMBIGUOUSOIDPREFIX if short oid
32 33 34 35 36
 * is ambiguous within the pack.
 * This method assumes that len is between
 * GIT_OID_MINPREFIXLEN and GIT_OID_HEXSZ.
 */
static int pack_entry_find_offset(
37
		git_off_t *offset_out,
38 39 40
		git_oid *found_oid,
		struct git_pack_file *p,
		const git_oid *short_oid,
41
		size_t len);
42

43 44 45 46 47 48
static int packfile_error(const char *message)
{
	giterr_set(GITERR_ODB, "Invalid pack file - %s", message);
	return -1;
}

49 50 51
/********************
 * Delta base cache
 ********************/
52

53
static git_pack_cache_entry *new_cache_object(git_rawobj *source)
54
{
55
	git_pack_cache_entry *e = git__calloc(1, sizeof(git_pack_cache_entry));
56 57 58
	if (!e)
		return NULL;

59
	git_atomic_inc(&e->refcount);
60 61 62 63 64 65 66 67 68 69
	memcpy(&e->raw, source, sizeof(git_rawobj));

	return e;
}

static void free_cache_object(void *o)
{
	git_pack_cache_entry *e = (git_pack_cache_entry *)o;

	if (e != NULL) {
70
		assert(e->refcount.val == 0);
71 72 73 74 75
		git__free(e->raw.data);
		git__free(e);
	}
}

76 77 78 79 80 81 82 83 84 85 86
static void cache_free(git_pack_cache *cache)
{
	khiter_t k;

	if (cache->entries) {
		for (k = kh_begin(cache->entries); k != kh_end(cache->entries); k++) {
			if (kh_exist(cache->entries, k))
				free_cache_object(kh_value(cache->entries, k));
		}

		git_offmap_free(cache->entries);
87
		cache->entries = NULL;
88 89 90 91 92 93 94
	}
}

static int cache_init(git_pack_cache *cache)
{
	cache->entries = git_offmap_alloc();
	GITERR_CHECK_ALLOC(cache->entries);
95

96
	cache->memory_limit = GIT_PACK_CACHE_MEMORY_LIMIT;
Russell Belfer committed
97 98 99 100 101 102 103 104 105

	if (git_mutex_init(&cache->lock)) {
		giterr_set(GITERR_OS, "Failed to initialize pack cache mutex");

		git__free(cache->entries);
		cache->entries = NULL;

		return -1;
	}
106 107 108 109

	return 0;
}

110
static git_pack_cache_entry *cache_get(git_pack_cache *cache, git_off_t offset)
111 112 113 114
{
	khiter_t k;
	git_pack_cache_entry *entry = NULL;

115 116 117
	if (git_mutex_lock(&cache->lock) < 0)
		return NULL;

118 119 120 121
	k = kh_get(off, cache->entries, offset);
	if (k != kh_end(cache->entries)) { /* found it */
		entry = kh_value(cache->entries, k);
		git_atomic_inc(&entry->refcount);
122
		entry->last_usage = cache->use_ctr++;
123 124 125 126 127 128
	}
	git_mutex_unlock(&cache->lock);

	return entry;
}

129 130 131
/* Run with the cache lock held */
static void free_lowest_entry(git_pack_cache *cache)
{
132 133 134
	git_pack_cache_entry *entry;
	khiter_t k;

135 136 137 138 139
	for (k = kh_begin(cache->entries); k != kh_end(cache->entries); k++) {
		if (!kh_exist(cache->entries, k))
			continue;

		entry = kh_value(cache->entries, k);
140

141 142 143 144
		if (entry && entry->refcount.val == 0) {
			cache->memory_used -= entry->raw.len;
			kh_del(off, cache->entries, k);
			free_cache_object(entry);
145 146
		}
	}
147 148
}

149 150 151 152 153
static int cache_add(
		git_pack_cache_entry **cached_out,
		git_pack_cache *cache,
		git_rawobj *base,
		git_off_t offset)
154 155 156 157 158
{
	git_pack_cache_entry *entry;
	int error, exists = 0;
	khiter_t k;

159 160 161
	if (base->len > GIT_PACK_CACHE_SIZE_LIMIT)
		return -1;

162 163
	entry = new_cache_object(base);
	if (entry) {
164 165
		if (git_mutex_lock(&cache->lock) < 0) {
			giterr_set(GITERR_OS, "failed to lock cache");
Jacques Germishuys committed
166
			git__free(entry);
167 168
			return -1;
		}
169 170 171
		/* Add it to the cache if nobody else has */
		exists = kh_get(off, cache->entries, offset) != kh_end(cache->entries);
		if (!exists) {
172 173 174
			while (cache->memory_used + base->len > cache->memory_limit)
				free_lowest_entry(cache);

175 176 177
			k = kh_put(off, cache->entries, offset, &error);
			assert(error != 0);
			kh_value(cache->entries, k) = entry;
178
			cache->memory_used += entry->raw.len;
179 180

			*cached_out = entry;
181 182 183 184 185 186 187 188 189 190 191 192
		}
		git_mutex_unlock(&cache->lock);
		/* Somebody beat us to adding it into the cache */
		if (exists) {
			git__free(entry);
			return -1;
		}
	}

	return 0;
}

193 194 195 196 197 198 199 200
/***********************************************************
 *
 * PACK INDEX METHODS
 *
 ***********************************************************/

static void pack_index_free(struct git_pack_file *p)
{
201 202 203 204
	if (p->oids) {
		git__free(p->oids);
		p->oids = NULL;
	}
205 206 207 208 209 210
	if (p->index_map.data) {
		git_futils_mmap_free(&p->index_map);
		p->index_map.data = NULL;
	}
}

Vicent Marti committed
211
static int pack_index_check(const char *path, struct git_pack_file *p)
212 213 214 215 216 217 218
{
	struct git_pack_idx_header *hdr;
	uint32_t version, nr, i, *index;
	void *idx_map;
	size_t idx_size;
	struct stat st;
	int error;
219 220
	/* TODO: properly open the file without access time using O_NOATIME */
	git_file fd = git_futils_open_ro(path);
221
	if (fd < 0)
222
		return fd;
223

224 225 226 227 228 229 230
	if (p_fstat(fd, &st) < 0) {
		p_close(fd);
		giterr_set(GITERR_OS, "Unable to stat pack index '%s'", path);
		return -1;
	}

	if (!S_ISREG(st.st_mode) ||
231 232 233
		!git__is_sizet(st.st_size) ||
		(idx_size = (size_t)st.st_size) < 4 * 256 + 20 + 20)
	{
234
		p_close(fd);
235
		giterr_set(GITERR_ODB, "Invalid pack index '%s'", path);
236
		return -1;
237 238 239
	}

	error = git_futils_mmap_ro(&p->index_map, fd, 0, idx_size);
240

241 242
	p_close(fd);

243 244
	if (error < 0)
		return error;
245 246 247 248 249 250 251 252

	hdr = idx_map = p->index_map.data;

	if (hdr->idx_signature == htonl(PACK_IDX_SIGNATURE)) {
		version = ntohl(hdr->idx_version);

		if (version < 2 || version > 2) {
			git_futils_mmap_free(&p->index_map);
253
			return packfile_error("unsupported index version");
254 255 256 257 258 259 260 261 262
		}

	} else
		version = 1;

	nr = 0;
	index = idx_map;

	if (version > 1)
Vicent Marti committed
263
		index += 2; /* skip index header */
264 265 266 267 268

	for (i = 0; i < 256; i++) {
		uint32_t n = ntohl(index[i]);
		if (n < nr) {
			git_futils_mmap_free(&p->index_map);
269
			return packfile_error("index is non-monotonic");
270 271 272 273 274 275 276
		}
		nr = n;
	}

	if (version == 1) {
		/*
		 * Total size:
Vicent Marti committed
277 278 279 280
		 * - 256 index entries 4 bytes each
		 * - 24-byte entries * nr (20-byte sha1 + 4-byte offset)
		 * - 20-byte SHA1 of the packfile
		 * - 20-byte SHA1 file checksum
281 282 283
		 */
		if (idx_size != 4*256 + nr * 24 + 20 + 20) {
			git_futils_mmap_free(&p->index_map);
284
			return packfile_error("index is corrupted");
285 286 287 288
		}
	} else if (version == 2) {
		/*
		 * Minimum size:
Vicent Marti committed
289 290 291 292 293 294 295
		 * - 8 bytes of header
		 * - 256 index entries 4 bytes each
		 * - 20-byte sha1 entry * nr
		 * - 4-byte crc entry * nr
		 * - 4-byte offset entry * nr
		 * - 20-byte SHA1 of the packfile
		 * - 20-byte SHA1 file checksum
296 297 298 299 300 301 302 303 304 305 306 307
		 * And after the 4-byte offset table might be a
		 * variable sized table containing 8-byte entries
		 * for offsets larger than 2^31.
		 */
		unsigned long min_size = 8 + 4*256 + nr*(20 + 4 + 4) + 20 + 20;
		unsigned long max_size = min_size;

		if (nr)
			max_size += (nr - 1)*8;

		if (idx_size < min_size || idx_size > max_size) {
			git_futils_mmap_free(&p->index_map);
308
			return packfile_error("wrong index size");
309 310 311 312
		}
	}

	p->num_objects = nr;
313
	p->index_version = version;
314
	return 0;
315 316 317 318 319
}

static int pack_index_open(struct git_pack_file *p)
{
	char *idx_name;
320 321 322
	int error = 0;
	size_t name_len, base_len;

323
	if (p->index_version > -1)
Russell Belfer committed
324
		return 0;
325

326 327
	name_len = strlen(p->pack_name);
	assert(name_len > strlen(".pack")); /* checked by git_pack_file alloc */
328

Russell Belfer committed
329 330
	if ((idx_name = git__malloc(name_len)) == NULL)
		return -1;
331

332 333 334
	base_len = name_len - strlen(".pack");
	memcpy(idx_name, p->pack_name, base_len);
	memcpy(idx_name + base_len, ".idx", sizeof(".idx"));
335

336 337
	if ((error = git_mutex_lock(&p->lock)) < 0) {
		git__free(idx_name);
Russell Belfer committed
338
		return error;
339
	}
Russell Belfer committed
340

341
	if (p->index_version == -1)
Russell Belfer committed
342
		error = pack_index_check(idx_name, p);
343

344
	git__free(idx_name);
345

346 347
	git_mutex_unlock(&p->lock);

348
	return error;
349 350 351 352
}

static unsigned char *pack_window_open(
		struct git_pack_file *p,
353
		git_mwindow **w_cursor,
354
		git_off_t offset,
355 356
		unsigned int *left)
{
357
	if (p->mwf.fd == -1 && packfile_open(p) < 0)
358 359 360 361 362 363 364 365 366 367 368 369 370
		return NULL;

	/* Since packfiles end in a hash of their content and it's
	 * pointless to ask for an offset into the middle of that
	 * hash, and the pack_window_contains function above wouldn't match
	 * don't allow an offset too close to the end of the file.
	 */
	if (offset > (p->mwf.size - 20))
		return NULL;

	return git_mwindow_open(&p->mwf, w_cursor, offset, 20, left);
 }

371 372 373 374 375 376 377 378
/*
 * The per-object header is a pretty dense thing, which is
 *  - first byte: low four bits are "size",
 *    then three bits of "type",
 *    with the high bit being "size continues".
 *  - each byte afterwards: low seven bits are size continuation,
 *    with the high bit being "size continues"
 */
379
size_t git_packfile__object_header(unsigned char *hdr, size_t size, git_otype type)
380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398
{
	unsigned char *hdr_base;
	unsigned char c;

	assert(type >= GIT_OBJ_COMMIT && type <= GIT_OBJ_REF_DELTA);

	/* TODO: add support for chunked objects; see git.git 6c0d19b1 */

	c = (unsigned char)((type << 4) | (size & 15));
	size >>= 4;
	hdr_base = hdr;

	while (size) {
		*hdr++ = c | 0x80;
		c = size & 0x7f;
		size >>= 7;
	}
	*hdr++ = c;

399
	return (hdr - hdr_base);
400 401 402
}


403 404
static int packfile_unpack_header1(
		unsigned long *usedp,
405 406 407 408 409 410 411 412
		size_t *sizep,
		git_otype *type,
		const unsigned char *buf,
		unsigned long len)
{
	unsigned shift;
	unsigned long size, c;
	unsigned long used = 0;
413

414 415 416 417 418
	c = buf[used++];
	*type = (c >> 4) & 7;
	size = c & 15;
	shift = 4;
	while (c & 0x80) {
419 420
		if (len <= used) {
			giterr_set(GITERR_ODB, "buffer too small");
421
			return GIT_EBUFS;
422
		}
423 424 425

		if (bitsizeof(long) <= shift) {
			*usedp = 0;
426
			giterr_set(GITERR_ODB, "packfile corrupted");
427 428
			return -1;
		}
429 430 431 432 433 434 435

		c = buf[used++];
		size += (c & 0x7f) << shift;
		shift += 7;
	}

	*sizep = (size_t)size;
436 437
	*usedp = used;
	return 0;
438 439 440 441 442 443 444
}

int git_packfile_unpack_header(
		size_t *size_p,
		git_otype *type_p,
		git_mwindow_file *mwf,
		git_mwindow **w_curs,
445
		git_off_t *curpos)
446 447 448 449
{
	unsigned char *base;
	unsigned int left;
	unsigned long used;
450
	int ret;
451 452

	/* pack_window_open() assures us we have [base, base + 20) available
Vicent Marti committed
453 454
	 * as a range that we can look at at. (Its actually the hash
	 * size that is assured.) With our object header encoding
455 456 457
	 * the maximum deflated object size is 2^137, which is just
	 * insane, so we know won't exceed what we have been given.
	 */
458
/*	base = pack_window_open(p, w_curs, *curpos, &left); */
459 460
	base = git_mwindow_open(mwf, w_curs, *curpos, 20, &left);
	if (base == NULL)
461
		return GIT_EBUFS;
462

463
	ret = packfile_unpack_header1(&used, size_p, type_p, base, left);
464
	git_mwindow_close(w_curs);
465
	if (ret == GIT_EBUFS)
466 467
		return ret;
	else if (ret < 0)
468
		return packfile_error("header length is zero");
469 470

	*curpos += used;
471
	return 0;
472 473
}

474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523
int git_packfile_resolve_header(
		size_t *size_p,
		git_otype *type_p,
		struct git_pack_file *p,
		git_off_t offset)
{
	git_mwindow *w_curs = NULL;
	git_off_t curpos = offset;
	size_t size;
	git_otype type;
	git_off_t base_offset;
	int error;

	error = git_packfile_unpack_header(&size, &type, &p->mwf, &w_curs, &curpos);
	git_mwindow_close(&w_curs);
	if (error < 0)
		return error;

	if (type == GIT_OBJ_OFS_DELTA || type == GIT_OBJ_REF_DELTA) {
		size_t base_size;
		git_rawobj delta;
		base_offset = get_delta_base(p, &w_curs, &curpos, type, offset);
		git_mwindow_close(&w_curs);
		error = packfile_unpack_compressed(&delta, p, &w_curs, &curpos, size, type);
		git_mwindow_close(&w_curs);
		if (error < 0)
			return error;
		error = git__delta_read_header(delta.data, delta.len, &base_size, size_p);
		git__free(delta.data);
		if (error < 0)
			return error;
	} else
		*size_p = size;

	while (type == GIT_OBJ_OFS_DELTA || type == GIT_OBJ_REF_DELTA) {
		curpos = base_offset;
		error = git_packfile_unpack_header(&size, &type, &p->mwf, &w_curs, &curpos);
		git_mwindow_close(&w_curs);
		if (error < 0)
			return error;
		if (type != GIT_OBJ_OFS_DELTA && type != GIT_OBJ_REF_DELTA)
			break;
		base_offset = get_delta_base(p, &w_curs, &curpos, type, base_offset);
		git_mwindow_close(&w_curs);
	}
	*type_p = type;

	return error;
}

524 525
#define SMALL_STACK_SIZE 64

526 527 528 529 530 531
/**
 * Generate the chain of dependencies which we need to get to the
 * object at `off`. `chain` is used a stack, popping gives the right
 * order to apply deltas on. If an object is found in the pack's base
 * cache, we stop calculating there.
 */
532 533 534 535
static int pack_dependency_chain(git_dependency_chain *chain_out,
				 git_pack_cache_entry **cached_out, git_off_t *cached_off,
				 struct pack_chain_elem *small_stack, size_t *stack_sz,
				 struct git_pack_file *p, git_off_t obj_offset)
536 537 538 539
{
	git_dependency_chain chain = GIT_ARRAY_INIT;
	git_mwindow *w_curs = NULL;
	git_off_t curpos = obj_offset, base_offset;
540 541
	int error = 0, use_heap = 0;
	size_t size, elem_pos;
542 543
	git_otype type;

544
	elem_pos = 0;
545 546 547 548 549 550 551 552 553 554 555
	while (true) {
		struct pack_chain_elem *elem;
		git_pack_cache_entry *cached = NULL;

		/* if we have a base cached, we can stop here instead */
		if ((cached = cache_get(&p->bases, obj_offset)) != NULL) {
			*cached_out = cached;
			*cached_off = obj_offset;
			break;
		}

556 557 558 559 560 561 562 563 564
		/* if we run out of space on the small stack, use the array */
		if (elem_pos == SMALL_STACK_SIZE) {
			git_array_init_to_size(chain, elem_pos);
			GITERR_CHECK_ARRAY(chain);
			memcpy(chain.ptr, small_stack, elem_pos * sizeof(struct pack_chain_elem));
			chain.size = elem_pos;
			use_heap = 1;
		}

565
		curpos = obj_offset;
566 567 568 569 570 571 572 573
		if (!use_heap) {
			elem = &small_stack[elem_pos];
		} else {
			elem = git_array_alloc(chain);
			if (!elem) {
				error = -1;
				goto on_error;
			}
574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608
		}

		elem->base_key = obj_offset;

		error = git_packfile_unpack_header(&size, &type, &p->mwf, &w_curs, &curpos);
		git_mwindow_close(&w_curs);

		if (error < 0)
			goto on_error;

		elem->offset = curpos;
		elem->size = size;
		elem->type = type;
		elem->base_key = obj_offset;

		if (type != GIT_OBJ_OFS_DELTA && type != GIT_OBJ_REF_DELTA)
			break;

		base_offset = get_delta_base(p, &w_curs, &curpos, type, obj_offset);
		git_mwindow_close(&w_curs);

		if (base_offset == 0) {
			error = packfile_error("delta offset is zero");
			goto on_error;
		}
		if (base_offset < 0) { /* must actually be an error code */
			error = (int)base_offset;
			goto on_error;
		}

		/* we need to pass the pos *after* the delta-base bit */
		elem->offset = curpos;

		/* go through the loop again, but with the new object */
		obj_offset = base_offset;
609
		elem_pos++;
610 611
	}

612 613
	
	*stack_sz = elem_pos + 1;
614 615 616 617 618 619 620 621
	*chain_out = chain;
	return error;

on_error:
	git_array_clear(chain);
	return error;
}

622
int git_packfile_unpack(
623 624 625
	git_rawobj *obj,
	struct git_pack_file *p,
	git_off_t *obj_offset)
626 627
{
	git_mwindow *w_curs = NULL;
628
	git_off_t curpos = *obj_offset;
629 630
	int error, free_base = 0;
	git_dependency_chain chain = GIT_ARRAY_INIT;
631
	struct pack_chain_elem *elem = NULL, *stack;
632
	git_pack_cache_entry *cached = NULL;
633
	struct pack_chain_elem small_stack[SMALL_STACK_SIZE];
634
	size_t stack_size = 0, elem_pos, alloclen;
635
	git_otype base_type;
636 637 638 639 640

	/*
	 * TODO: optionally check the CRC on the packfile
	 */

641
	error = pack_dependency_chain(&chain, &cached, obj_offset, small_stack, &stack_size, p, *obj_offset);
642 643 644
	if (error < 0)
		return error;

645 646 647 648
	obj->data = NULL;
	obj->len = 0;
	obj->type = GIT_OBJ_BAD;

649 650 651 652
	/* let's point to the right stack */
	stack = chain.ptr ? chain.ptr : small_stack;

	elem_pos = stack_size;
653
	if (cached) {
654
		memcpy(obj, &cached->raw, sizeof(git_rawobj));
655
		base_type = obj->type;
656
		elem_pos--;	/* stack_size includes the base, which isn't actually there */
657
	} else {
658
		elem = &stack[--elem_pos];
659
		base_type = elem->type;
660
	}
661

662 663 664 665 666
	switch (base_type) {
	case GIT_OBJ_COMMIT:
	case GIT_OBJ_TREE:
	case GIT_OBJ_BLOB:
	case GIT_OBJ_TAG:
667
		if (!cached) {
668 669 670
			curpos = elem->offset;
			error = packfile_unpack_compressed(obj, p, &w_curs, &curpos, elem->size, elem->type);
			git_mwindow_close(&w_curs);
671
			base_type = elem->type;
672 673 674 675 676 677 678 679 680 681 682 683 684
		}
		if (error < 0)
			goto cleanup;
		break;
	case GIT_OBJ_OFS_DELTA:
	case GIT_OBJ_REF_DELTA:
		error = packfile_error("dependency chain ends in a delta");
		goto cleanup;
	default:
		error = packfile_error("invalid packfile type in header");
		goto cleanup;
	}

685
	/*
686
	 * Finding the object we want a cached base element is
687 688 689 690
	 * problematic, as we need to make sure we don't accidentally
	 * give the caller the cached object, which it would then feel
	 * free to free, so we need to copy the data.
	 */
691
	if (cached && stack_size == 1) {
692
		void *data = obj->data;
693

694 695
		GITERR_CHECK_ALLOC_ADD(&alloclen, obj->len, 1);
		obj->data = git__malloc(alloclen);
696
		GITERR_CHECK_ALLOC(obj->data);
697

698 699 700 701 702
		memcpy(obj->data, data, obj->len + 1);
		git_atomic_dec(&cached->refcount);
		goto cleanup;
	}

703
	/* we now apply each consecutive delta until we run out */
704
	while (elem_pos > 0 && !error) {
705 706
		git_rawobj base, delta;

707 708 709 710 711
		/*
		 * We can now try to add the base to the cache, as
		 * long as it's not already the cached one.
		 */
		if (!cached)
712
			free_base = !!cache_add(&cached, &p->bases, obj, elem->base_key);
713

714
		elem = &stack[elem_pos - 1];
715 716 717 718 719 720 721 722 723 724 725 726 727 728
		curpos = elem->offset;
		error = packfile_unpack_compressed(&delta, p, &w_curs, &curpos, elem->size, elem->type);
		git_mwindow_close(&w_curs);

		if (error < 0)
			break;

		/* the current object becomes the new base, on which we apply the delta */
		base = *obj;
		obj->data = NULL;
		obj->len = 0;
		obj->type = GIT_OBJ_BAD;

		error = git__delta_apply(obj, base.data, base.len, delta.data, delta.len);
729 730 731 732 733 734 735
		obj->type = base_type;
		/*
		 * We usually don't want to free the base at this
		 * point, as we put it into the cache in the previous
		 * iteration. free_base lets us know that we got the
		 * base object directly from the packfile, so we can free it.
		 */
736
		git__free(delta.data);
737 738 739 740 741 742 743 744 745
		if (free_base) {
			free_base = 0;
			git__free(base.data);
		}

		if (cached) {
			git_atomic_dec(&cached->refcount);
			cached = NULL;
		}
746 747 748

		if (error < 0)
			break;
749

750
		elem_pos--;
751 752
	}

753
cleanup:
754 755 756
	if (error < 0)
		git__free(obj->data);

757
	if (elem)
758
		*obj_offset = curpos;
759

760
	git_array_clear(chain);
761
	return error;
762 763
}

Russell Belfer committed
764 765 766 767 768 769 770 771 772 773 774 775
static void *use_git_alloc(void *opaq, unsigned int count, unsigned int size)
{
	GIT_UNUSED(opaq);
	return git__calloc(count, size);
}

static void use_git_free(void *opaq, void *ptr)
{
	GIT_UNUSED(opaq);
	git__free(ptr);
}

776 777 778 779 780 781 782 783 784 785 786 787 788 789
int git_packfile_stream_open(git_packfile_stream *obj, struct git_pack_file *p, git_off_t curpos)
{
	int st;

	memset(obj, 0, sizeof(git_packfile_stream));
	obj->curpos = curpos;
	obj->p = p;
	obj->zstream.zalloc = use_git_alloc;
	obj->zstream.zfree = use_git_free;
	obj->zstream.next_in = Z_NULL;
	obj->zstream.next_out = Z_NULL;
	st = inflateInit(&obj->zstream);
	if (st != Z_OK) {
		git__free(obj);
790
		giterr_set(GITERR_ZLIB, "failed to init packfile stream");
791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810
		return -1;
	}

	return 0;
}

ssize_t git_packfile_stream_read(git_packfile_stream *obj, void *buffer, size_t len)
{
	unsigned char *in;
	size_t written;
	int st;

	if (obj->done)
		return 0;

	in = pack_window_open(obj->p, &obj->mw, obj->curpos, &obj->zstream.avail_in);
	if (in == NULL)
		return GIT_EBUFS;

	obj->zstream.next_out = buffer;
811
	obj->zstream.avail_out = (unsigned int)len;
812 813 814 815 816 817 818 819 820
	obj->zstream.next_in = in;

	st = inflate(&obj->zstream, Z_SYNC_FLUSH);
	git_mwindow_close(&obj->mw);

	obj->curpos += obj->zstream.next_in - in;
	written = len - obj->zstream.avail_out;

	if (st != Z_OK && st != Z_STREAM_END) {
821
		giterr_set(GITERR_ZLIB, "error reading from the zlib stream");
822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841
		return -1;
	}

	if (st == Z_STREAM_END)
		obj->done = 1;


	/* If we didn't write anything out but we're not done, we need more data */
	if (!written && st != Z_STREAM_END)
		return GIT_EBUFS;

	return written;

}

void git_packfile_stream_free(git_packfile_stream *obj)
{
	inflateEnd(&obj->zstream);
}

842
int packfile_unpack_compressed(
843 844 845 846 847 848
	git_rawobj *obj,
	struct git_pack_file *p,
	git_mwindow **w_curs,
	git_off_t *curpos,
	size_t size,
	git_otype type)
849
{
850
	size_t buf_size;
851 852 853 854
	int st;
	z_stream stream;
	unsigned char *buffer, *in;

855 856
	GITERR_CHECK_ALLOC_ADD(&buf_size, size, 1);
	buffer = git__calloc(1, buf_size);
857
	GITERR_CHECK_ALLOC(buffer);
858 859 860

	memset(&stream, 0, sizeof(stream));
	stream.next_out = buffer;
861
	stream.avail_out = (uInt)buf_size;
Russell Belfer committed
862 863
	stream.zalloc = use_git_alloc;
	stream.zfree = use_git_free;
864 865 866

	st = inflateInit(&stream);
	if (st != Z_OK) {
867
		git__free(buffer);
868
		giterr_set(GITERR_ZLIB, "failed to init zlib stream on unpack");
869

870
		return -1;
871 872 873
	}

	do {
874
		in = pack_window_open(p, w_curs, *curpos, &stream.avail_in);
875 876
		stream.next_in = in;
		st = inflate(&stream, Z_FINISH);
877
		git_mwindow_close(w_curs);
878 879 880 881

		if (!stream.avail_out)
			break; /* the payload is larger than it should be */

882 883 884
		if (st == Z_BUF_ERROR && in == NULL) {
			inflateEnd(&stream);
			git__free(buffer);
885
			return GIT_EBUFS;
886 887
		}

888
		*curpos += stream.next_in - in;
889 890 891 892 893
	} while (st == Z_OK || st == Z_BUF_ERROR);

	inflateEnd(&stream);

	if ((st != Z_STREAM_END) || stream.total_out != size) {
894
		git__free(buffer);
895
		giterr_set(GITERR_ZLIB, "error inflating zlib stream");
896
		return -1;
897 898 899 900 901
	}

	obj->type = type;
	obj->len = size;
	obj->data = buffer;
902
	return 0;
903 904
}

905 906 907 908
/*
 * curpos is where the data starts, delta_obj_offset is the where the
 * header starts
 */
909 910 911 912 913 914
git_off_t get_delta_base(
	struct git_pack_file *p,
	git_mwindow **w_curs,
	git_off_t *curpos,
	git_otype type,
	git_off_t delta_obj_offset)
915
{
916 917
	unsigned int left = 0;
	unsigned char *base_info;
918
	git_off_t base_offset;
919 920
	git_oid unused;

921 922 923
	base_info = pack_window_open(p, w_curs, *curpos, &left);
	/* Assumption: the only reason this would fail is because the file is too small */
	if (base_info == NULL)
924
		return GIT_EBUFS;
925 926
	/* pack_window_open() assured us we have [base_info, base_info + 20)
	 * as a range that we can look at without walking off the
Vicent Marti committed
927 928
	 * end of the mapped window. Its actually the hash size
	 * that is assured. An OFS_DELTA longer than the hash size
929 930 931 932 933 934 935
	 * is stupid, as then a REF_DELTA would be smaller to store.
	 */
	if (type == GIT_OBJ_OFS_DELTA) {
		unsigned used = 0;
		unsigned char c = base_info[used++];
		base_offset = c & 127;
		while (c & 128) {
936
			if (left <= used)
937
				return GIT_EBUFS;
938 939
			base_offset += 1;
			if (!base_offset || MSB(base_offset, 7))
Vicent Marti committed
940
				return 0; /* overflow */
941 942 943 944 945
			c = base_info[used++];
			base_offset = (base_offset << 7) + (c & 127);
		}
		base_offset = delta_obj_offset - base_offset;
		if (base_offset <= 0 || base_offset >= delta_obj_offset)
Vicent Marti committed
946
			return 0; /* out of bound */
947 948
		*curpos += used;
	} else if (type == GIT_OBJ_REF_DELTA) {
949 950
		/* If we have the cooperative cache, search in it first */
		if (p->has_cache) {
951 952
			khiter_t k;
			git_oid oid;
953

954 955 956
			git_oid_fromraw(&oid, base_info);
			k = kh_get(oid, p->idx_cache, &oid);
			if (k != kh_end(p->idx_cache)) {
957
				*curpos += 20;
958
				return ((struct git_pack_entry *)kh_value(p->idx_cache, k))->offset;
959 960
			}
		}
961
		/* The base entry _must_ be in the same pack */
962 963
		if (pack_entry_find_offset(&base_offset, &unused, p, (git_oid *)base_info, GIT_OID_HEXSZ) < 0)
			return packfile_error("base entry delta is not in the same pack");
964 965 966 967 968 969
		*curpos += 20;
	} else
		return 0;

	return base_offset;
}
970 971 972 973 974 975 976

/***********************************************************
 *
 * PACKFILE METHODS
 *
 ***********************************************************/

977
void git_packfile_free(struct git_pack_file *p)
978
{
979 980 981
	if (!p)
		return;

982
	cache_free(&p->bases);
983

984 985
	if (p->mwf.fd >= 0) {
		git_mwindow_free_all_locked(&p->mwf);
986
		p_close(p->mwf.fd);
987
	}
988 989 990

	pack_index_free(p);

991
	git__free(p->bad_object_sha1);
992 993

	git_mutex_free(&p->lock);
994
	git_mutex_free(&p->bases.lock);
995
	git__free(p);
996 997 998 999 1000 1001 1002 1003 1004
}

static int packfile_open(struct git_pack_file *p)
{
	struct stat st;
	struct git_pack_header hdr;
	git_oid sha1;
	unsigned char *idx_sha1;

1005
	if (p->index_version == -1 && pack_index_open(p) < 0)
Russell Belfer committed
1006
		return git_odb__error_notfound("failed to open packfile", NULL);
1007

1008 1009 1010 1011 1012 1013 1014 1015 1016
	/* if mwf opened by another thread, return now */
	if (git_mutex_lock(&p->lock) < 0)
		return packfile_error("failed to get lock for open");

	if (p->mwf.fd >= 0) {
		git_mutex_unlock(&p->lock);
		return 0;
	}

1017
	/* TODO: open with noatime */
1018
	p->mwf.fd = git_futils_open_ro(p->pack_name);
1019 1020
	if (p->mwf.fd < 0)
		goto cleanup;
1021

1022 1023 1024
	if (p_fstat(p->mwf.fd, &st) < 0 ||
		git_mwindow_file_register(&p->mwf) < 0)
		goto cleanup;
1025 1026 1027 1028 1029

	/* If we created the struct before we had the pack we lack size. */
	if (!p->mwf.size) {
		if (!S_ISREG(st.st_mode))
			goto cleanup;
1030
		p->mwf.size = (git_off_t)st.st_size;
1031 1032 1033 1034 1035 1036 1037 1038 1039
	} else if (p->mwf.size != st.st_size)
		goto cleanup;

#if 0
	/* We leave these file descriptors open with sliding mmap;
	 * there is no point keeping them open across exec(), though.
	 */
	fd_flag = fcntl(p->mwf.fd, F_GETFD, 0);
	if (fd_flag < 0)
1040
		goto cleanup;
1041 1042 1043

	fd_flag |= FD_CLOEXEC;
	if (fcntl(p->pack_fd, F_SETFD, fd_flag) == -1)
1044
		goto cleanup;
1045 1046 1047
#endif

	/* Verify we recognize this pack file format. */
1048 1049 1050
	if (p_read(p->mwf.fd, &hdr, sizeof(hdr)) < 0 ||
		hdr.hdr_signature != htonl(PACK_SIGNATURE) ||
		!pack_version_ok(hdr.hdr_version))
1051 1052 1053
		goto cleanup;

	/* Verify the pack matches its index. */
1054 1055 1056
	if (p->num_objects != ntohl(hdr.hdr_entries) ||
		p_lseek(p->mwf.fd, p->mwf.size - GIT_OID_RAWSZ, SEEK_SET) == -1 ||
		p_read(p->mwf.fd, sha1.id, GIT_OID_RAWSZ) < 0)
1057 1058 1059 1060
		goto cleanup;

	idx_sha1 = ((unsigned char *)p->index_map.data) + p->index_map.len - 40;

1061 1062 1063 1064 1065
	if (git_oid__cmp(&sha1, (git_oid *)idx_sha1) != 0)
		goto cleanup;

	git_mutex_unlock(&p->lock);
	return 0;
1066 1067

cleanup:
1068
	giterr_set(GITERR_OS, "Invalid packfile '%s'", p->pack_name);
1069

1070 1071
	if (p->mwf.fd >= 0)
		p_close(p->mwf.fd);
1072
	p->mwf.fd = -1;
1073 1074 1075

	git_mutex_unlock(&p->lock);

1076
	return -1;
1077 1078
}

1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095
int git_packfile__name(char **out, const char *path)
{
	size_t path_len;
	git_buf buf = GIT_BUF_INIT;

	path_len = strlen(path);

	if (path_len < strlen(".idx"))
		return git_odb__error_notfound("invalid packfile path", NULL);

	if (git_buf_printf(&buf, "%.*s.pack", (int)(path_len - strlen(".idx")), path) < 0)
		return -1;

	*out = git_buf_detach(&buf);
	return 0;
}

1096
int git_packfile_alloc(struct git_pack_file **pack_out, const char *path)
1097 1098 1099
{
	struct stat st;
	struct git_pack_file *p;
1100
	size_t path_len = path ? strlen(path) : 0, alloc_len;
1101 1102

	*pack_out = NULL;
1103

1104
	if (path_len < strlen(".idx"))
1105 1106
		return git_odb__error_notfound("invalid packfile path", NULL);

1107 1108
	GITERR_CHECK_ALLOC_ADD(&alloc_len, sizeof(*p), path_len);
	GITERR_CHECK_ALLOC_ADD(&alloc_len, alloc_len, 2);
1109

1110
	p = git__calloc(1, alloc_len);
1111
	GITERR_CHECK_ALLOC(p);
1112

1113 1114
	memcpy(p->pack_name, path, path_len + 1);

1115 1116 1117 1118
	/*
	 * Make sure a corresponding .pack file exists and that
	 * the index looks sane.
	 */
1119 1120 1121 1122 1123 1124
	if (git__suffixcmp(path, ".idx") == 0) {
		size_t root_len = path_len - strlen(".idx");

		memcpy(p->pack_name + root_len, ".keep", sizeof(".keep"));
		if (git_path_exists(p->pack_name) == true)
			p->pack_keep = 1;
1125

1126 1127
		memcpy(p->pack_name + root_len, ".pack", sizeof(".pack"));
	}
1128

1129
	if (p_stat(p->pack_name, &st) < 0 || !S_ISREG(st.st_mode)) {
1130
		git__free(p);
Russell Belfer committed
1131
		return git_odb__error_notfound("packfile not found", NULL);
1132 1133 1134 1135 1136
	}

	/* ok, it looks sane as far as we can check without
	 * actually mapping the pack file.
	 */
1137
	p->mwf.fd = -1;
1138
	p->mwf.size = st.st_size;
1139 1140
	p->pack_local = 1;
	p->mtime = (git_time_t)st.st_mtime;
1141
	p->index_version = -1;
1142

Russell Belfer committed
1143 1144 1145 1146 1147
	if (git_mutex_init(&p->lock)) {
		giterr_set(GITERR_OS, "Failed to initialize packfile mutex");
		git__free(p);
		return -1;
	}
1148

1149 1150 1151 1152 1153
	if (cache_init(&p->bases) < 0) {
		git__free(p);
		return -1;
	}

1154
	*pack_out = p;
1155 1156

	return 0;
1157 1158 1159 1160 1161 1162 1163 1164
}

/***********************************************************
 *
 * PACKFILE ENTRY SEARCH INTERNALS
 *
 ***********************************************************/

1165
static git_off_t nth_packed_object_offset(const struct git_pack_file *p, uint32_t n)
1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178
{
	const unsigned char *index = p->index_map.data;
	index += 4 * 256;
	if (p->index_version == 1) {
		return ntohl(*((uint32_t *)(index + 24 * n)));
	} else {
		uint32_t off;
		index += 8 + p->num_objects * (20 + 4);
		off = ntohl(*((uint32_t *)(index + 4 * n)));
		if (!(off & 0x80000000))
			return off;
		index += p->num_objects * 4 + (off & 0x7fffffff) * 8;
		return (((uint64_t)ntohl(*((uint32_t *)(index + 0)))) << 32) |
Vicent Marti committed
1179
					ntohl(*((uint32_t *)(index + 4)));
1180 1181 1182
	}
}

1183 1184 1185 1186
static int git__memcmp4(const void *a, const void *b) {
	return memcmp(a, b, 4);
}

1187
int git_pack_foreach_entry(
1188
	struct git_pack_file *p,
1189
	git_odb_foreach_cb cb,
1190
	void *data)
1191 1192 1193
{
	const unsigned char *index = p->index_map.data, *current;
	uint32_t i;
1194
	int error = 0;
1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210

	if (index == NULL) {
		if ((error = pack_index_open(p)) < 0)
			return error;

		assert(p->index_map.data);

		index = p->index_map.data;
	}

	if (p->index_version > 1) {
		index += 8;
	}

	index += 4 * 256;

1211 1212
	if (p->oids == NULL) {
		git_vector offsets, oids;
1213

1214 1215 1216 1217 1218
		if ((error = git_vector_init(&oids, p->num_objects, NULL)))
			return error;

		if ((error = git_vector_init(&offsets, p->num_objects, git__memcmp4)))
			return error;
1219

1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233
		if (p->index_version > 1) {
			const unsigned char *off = index + 24 * p->num_objects;
			for (i = 0; i < p->num_objects; i++)
				git_vector_insert(&offsets, (void*)&off[4 * i]);
			git_vector_sort(&offsets);
			git_vector_foreach(&offsets, i, current)
				git_vector_insert(&oids, (void*)&index[5 * (current - off)]);
		} else {
			for (i = 0; i < p->num_objects; i++)
				git_vector_insert(&offsets, (void*)&index[24 * i]);
			git_vector_sort(&offsets);
			git_vector_foreach(&offsets, i, current)
				git_vector_insert(&oids, (void*)&current[4]);
		}
1234

1235
		git_vector_free(&offsets);
1236
		p->oids = (git_oid **)git_vector_detach(NULL, NULL, &oids);
1237 1238
	}

1239
	for (i = 0; i < p->num_objects; i++)
1240 1241
		if ((error = cb(p->oids[i], data)) != 0)
			return giterr_set_after_callback(error);
1242

1243
	return error;
1244 1245
}

1246
static int pack_entry_find_offset(
1247 1248 1249 1250
	git_off_t *offset_out,
	git_oid *found_oid,
	struct git_pack_file *p,
	const git_oid *short_oid,
1251
	size_t len)
1252
{
1253 1254
	const uint32_t *level1_ofs = p->index_map.data;
	const unsigned char *index = p->index_map.data;
1255 1256 1257 1258 1259 1260
	unsigned hi, lo, stride;
	int pos, found = 0;
	const unsigned char *current = 0;

	*offset_out = 0;

1261
	if (p->index_version == -1) {
1262
		int error;
1263

1264 1265 1266
		if ((error = pack_index_open(p)) < 0)
			return error;
		assert(p->index_map.data);
1267

1268 1269 1270
		index = p->index_map.data;
		level1_ofs = p->index_map.data;
	}
1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292

	if (p->index_version > 1) {
		level1_ofs += 2;
		index += 8;
	}

	index += 4 * 256;
	hi = ntohl(level1_ofs[(int)short_oid->id[0]]);
	lo = ((short_oid->id[0] == 0x0) ? 0 : ntohl(level1_ofs[(int)short_oid->id[0] - 1]));

	if (p->index_version > 1) {
		stride = 20;
	} else {
		stride = 24;
		index += 4;
	}

#ifdef INDEX_DEBUG_LOOKUP
	printf("%02x%02x%02x... lo %u hi %u nr %d\n",
		short_oid->id[0], short_oid->id[1], short_oid->id[2], lo, hi, p->num_objects);
#endif

1293
#ifdef GIT_USE_LOOKUP
Vicent Marti committed
1294
	pos = sha1_entry_pos(index, stride, 0, lo, hi, p->num_objects, short_oid->id);
1295 1296 1297
#else
	pos = sha1_position(index, stride, lo, hi, short_oid->id);
#endif
1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309

	if (pos >= 0) {
		/* An object matching exactly the oid was found */
		found = 1;
		current = index + pos * stride;
	} else {
		/* No object was found */
		/* pos refers to the object with the "closest" oid to short_oid */
		pos = - 1 - pos;
		if (pos < (int)p->num_objects) {
			current = index + pos * stride;

Russell Belfer committed
1310
			if (!git_oid_ncmp(short_oid, (const git_oid *)current, len))
1311 1312 1313 1314
				found = 1;
		}
	}

1315
	if (found && len != GIT_OID_HEXSZ && pos + 1 < (int)p->num_objects) {
1316 1317 1318 1319 1320 1321 1322 1323
		/* Check for ambiguousity */
		const unsigned char *next = current + stride;

		if (!git_oid_ncmp(short_oid, (const git_oid *)next, len)) {
			found = 2;
		}
	}

1324
	if (!found)
Russell Belfer committed
1325
		return git_odb__error_notfound("failed to find offset for pack entry", short_oid);
1326 1327
	if (found > 1)
		return git_odb__error_ambiguous("found multiple offsets for pack entry");
1328

1329 1330
	*offset_out = nth_packed_object_offset(p, pos);
	git_oid_fromraw(found_oid, current);
1331 1332

#ifdef INDEX_DEBUG_LOOKUP
1333
	{
1334 1335 1336 1337 1338
		unsigned char hex_sha1[GIT_OID_HEXSZ + 1];
		git_oid_fmt(hex_sha1, found_oid);
		hex_sha1[GIT_OID_HEXSZ] = '\0';
		printf("found lo=%d %s\n", lo, hex_sha1);
	}
1339
#endif
1340

1341
	return 0;
1342 1343 1344 1345 1346 1347
}

int git_pack_entry_find(
		struct git_pack_entry *e,
		struct git_pack_file *p,
		const git_oid *short_oid,
1348
		size_t len)
1349
{
1350
	git_off_t offset;
1351 1352 1353 1354 1355 1356 1357 1358
	git_oid found_oid;
	int error;

	assert(p);

	if (len == GIT_OID_HEXSZ && p->num_bad_objects) {
		unsigned i;
		for (i = 0; i < p->num_bad_objects; i++)
1359
			if (git_oid__cmp(short_oid, &p->bad_object_sha1[i]) == 0)
1360
				return packfile_error("bad object found in packfile");
1361 1362 1363
	}

	error = pack_entry_find_offset(&offset, &found_oid, p, short_oid, len);
1364 1365
	if (error < 0)
		return error;
1366 1367 1368 1369

	/* we found a unique entry in the index;
	 * make sure the packfile backing the index
	 * still exists on disk */
1370 1371
	if (p->mwf.fd == -1 && (error = packfile_open(p)) < 0)
		return error;
1372 1373 1374 1375 1376

	e->offset = offset;
	e->p = p;

	git_oid_cpy(&e->sha1, &found_oid);
1377
	return 0;
1378
}