pack.c 26.4 KB
Newer Older
1
/*
Edward Thomson committed
2
 * Copyright (C) the libgit2 contributors. All rights reserved.
3
 *
Vicent Marti committed
4 5
 * This file is part of libgit2, distributed under the GNU GPL v2 with
 * a Linking Exception. For full terms see the included COPYING file.
6 7
 */

8
#include "common.h"
9 10 11
#include "odb.h"
#include "pack.h"
#include "delta-apply.h"
12
#include "sha1_lookup.h"
13 14
#include "mwindow.h"
#include "fileops.h"
15 16

#include "git2/oid.h"
17
#include <zlib.h>
18

19
static int packfile_open(struct git_pack_file *p);
20
static git_off_t nth_packed_object_offset(const struct git_pack_file *p, uint32_t n);
21 22 23 24
int packfile_unpack_compressed(
		git_rawobj *obj,
		struct git_pack_file *p,
		git_mwindow **w_curs,
25
		git_off_t *curpos,
26 27 28 29 30
		size_t size,
		git_otype type);

/* Can find the offset of an object given
 * a prefix of an identifier.
31
 * Throws GIT_EAMBIGUOUSOIDPREFIX if short oid
32 33 34 35 36
 * is ambiguous within the pack.
 * This method assumes that len is between
 * GIT_OID_MINPREFIXLEN and GIT_OID_HEXSZ.
 */
static int pack_entry_find_offset(
37
		git_off_t *offset_out,
38 39 40
		git_oid *found_oid,
		struct git_pack_file *p,
		const git_oid *short_oid,
41
		size_t len);
42

43 44 45 46 47 48
static int packfile_error(const char *message)
{
	giterr_set(GITERR_ODB, "Invalid pack file - %s", message);
	return -1;
}

49 50 51
/********************
 * Delta base cache
 ********************/
52

53
static git_pack_cache_entry *new_cache_object(git_rawobj *source)
54
{
55
	git_pack_cache_entry *e = git__calloc(1, sizeof(git_pack_cache_entry));
56 57 58 59 60 61 62 63 64 65 66 67 68
	if (!e)
		return NULL;

	memcpy(&e->raw, source, sizeof(git_rawobj));

	return e;
}

static void free_cache_object(void *o)
{
	git_pack_cache_entry *e = (git_pack_cache_entry *)o;

	if (e != NULL) {
69
		assert(e->refcount.val == 0);
70 71 72 73 74
		git__free(e->raw.data);
		git__free(e);
	}
}

75 76 77 78 79 80 81 82 83 84 85
static void cache_free(git_pack_cache *cache)
{
	khiter_t k;

	if (cache->entries) {
		for (k = kh_begin(cache->entries); k != kh_end(cache->entries); k++) {
			if (kh_exist(cache->entries, k))
				free_cache_object(kh_value(cache->entries, k));
		}

		git_offmap_free(cache->entries);
86
		git_mutex_free(&cache->lock);
87 88 89 90 91 92 93 94 95 96 97 98 99 100
	}
}

static int cache_init(git_pack_cache *cache)
{
	memset(cache, 0, sizeof(git_pack_cache));
	cache->entries = git_offmap_alloc();
	GITERR_CHECK_ALLOC(cache->entries);
	cache->memory_limit = GIT_PACK_CACHE_MEMORY_LIMIT;
	git_mutex_init(&cache->lock);

	return 0;
}

101
static git_pack_cache_entry *cache_get(git_pack_cache *cache, git_off_t offset)
102 103 104 105
{
	khiter_t k;
	git_pack_cache_entry *entry = NULL;

106 107 108
	if (git_mutex_lock(&cache->lock) < 0)
		return NULL;

109 110 111 112
	k = kh_get(off, cache->entries, offset);
	if (k != kh_end(cache->entries)) { /* found it */
		entry = kh_value(cache->entries, k);
		git_atomic_inc(&entry->refcount);
113
		entry->last_usage = cache->use_ctr++;
114 115 116 117 118 119
	}
	git_mutex_unlock(&cache->lock);

	return entry;
}

120 121 122
/* Run with the cache lock held */
static void free_lowest_entry(git_pack_cache *cache)
{
123 124 125
	git_pack_cache_entry *entry;
	khiter_t k;

126 127 128 129 130
	for (k = kh_begin(cache->entries); k != kh_end(cache->entries); k++) {
		if (!kh_exist(cache->entries, k))
			continue;

		entry = kh_value(cache->entries, k);
131

132 133 134 135
		if (entry && entry->refcount.val == 0) {
			cache->memory_used -= entry->raw.len;
			kh_del(off, cache->entries, k);
			free_cache_object(entry);
136 137
		}
	}
138 139
}

140 141 142 143 144 145
static int cache_add(git_pack_cache *cache, git_rawobj *base, git_off_t offset)
{
	git_pack_cache_entry *entry;
	int error, exists = 0;
	khiter_t k;

146 147 148
	if (base->len > GIT_PACK_CACHE_SIZE_LIMIT)
		return -1;

149 150
	entry = new_cache_object(base);
	if (entry) {
151 152 153 154
		if (git_mutex_lock(&cache->lock) < 0) {
			giterr_set(GITERR_OS, "failed to lock cache");
			return -1;
		}
155 156 157
		/* Add it to the cache if nobody else has */
		exists = kh_get(off, cache->entries, offset) != kh_end(cache->entries);
		if (!exists) {
158 159 160
			while (cache->memory_used + base->len > cache->memory_limit)
				free_lowest_entry(cache);

161 162 163
			k = kh_put(off, cache->entries, offset, &error);
			assert(error != 0);
			kh_value(cache->entries, k) = entry;
164
			cache->memory_used += entry->raw.len;
165 166 167 168 169 170 171 172 173 174 175 176
		}
		git_mutex_unlock(&cache->lock);
		/* Somebody beat us to adding it into the cache */
		if (exists) {
			git__free(entry);
			return -1;
		}
	}

	return 0;
}

177 178 179 180 181 182 183 184
/***********************************************************
 *
 * PACK INDEX METHODS
 *
 ***********************************************************/

static void pack_index_free(struct git_pack_file *p)
{
185 186 187 188
	if (p->oids) {
		git__free(p->oids);
		p->oids = NULL;
	}
189 190 191 192 193 194
	if (p->index_map.data) {
		git_futils_mmap_free(&p->index_map);
		p->index_map.data = NULL;
	}
}

Vicent Marti committed
195
static int pack_index_check(const char *path, struct git_pack_file *p)
196 197 198 199 200 201 202
{
	struct git_pack_idx_header *hdr;
	uint32_t version, nr, i, *index;
	void *idx_map;
	size_t idx_size;
	struct stat st;
	int error;
203 204
	/* TODO: properly open the file without access time using O_NOATIME */
	git_file fd = git_futils_open_ro(path);
205
	if (fd < 0)
206
		return fd;
207

208 209 210 211 212
	if (p_fstat(fd, &st) < 0 ||
		!S_ISREG(st.st_mode) ||
		!git__is_sizet(st.st_size) ||
		(idx_size = (size_t)st.st_size) < 4 * 256 + 20 + 20)
	{
213
		p_close(fd);
214 215
		giterr_set(GITERR_OS, "Failed to check pack index.");
		return -1;
216 217 218
	}

	error = git_futils_mmap_ro(&p->index_map, fd, 0, idx_size);
219

220 221
	p_close(fd);

222 223
	if (error < 0)
		return error;
224 225 226 227 228 229 230 231

	hdr = idx_map = p->index_map.data;

	if (hdr->idx_signature == htonl(PACK_IDX_SIGNATURE)) {
		version = ntohl(hdr->idx_version);

		if (version < 2 || version > 2) {
			git_futils_mmap_free(&p->index_map);
232
			return packfile_error("unsupported index version");
233 234 235 236 237 238 239 240 241
		}

	} else
		version = 1;

	nr = 0;
	index = idx_map;

	if (version > 1)
Vicent Marti committed
242
		index += 2; /* skip index header */
243 244 245 246 247

	for (i = 0; i < 256; i++) {
		uint32_t n = ntohl(index[i]);
		if (n < nr) {
			git_futils_mmap_free(&p->index_map);
248
			return packfile_error("index is non-monotonic");
249 250 251 252 253 254 255
		}
		nr = n;
	}

	if (version == 1) {
		/*
		 * Total size:
Vicent Marti committed
256 257 258 259
		 * - 256 index entries 4 bytes each
		 * - 24-byte entries * nr (20-byte sha1 + 4-byte offset)
		 * - 20-byte SHA1 of the packfile
		 * - 20-byte SHA1 file checksum
260 261 262
		 */
		if (idx_size != 4*256 + nr * 24 + 20 + 20) {
			git_futils_mmap_free(&p->index_map);
263
			return packfile_error("index is corrupted");
264 265 266 267
		}
	} else if (version == 2) {
		/*
		 * Minimum size:
Vicent Marti committed
268 269 270 271 272 273 274
		 * - 8 bytes of header
		 * - 256 index entries 4 bytes each
		 * - 20-byte sha1 entry * nr
		 * - 4-byte crc entry * nr
		 * - 4-byte offset entry * nr
		 * - 20-byte SHA1 of the packfile
		 * - 20-byte SHA1 file checksum
275 276 277 278 279 280 281 282 283 284 285 286
		 * And after the 4-byte offset table might be a
		 * variable sized table containing 8-byte entries
		 * for offsets larger than 2^31.
		 */
		unsigned long min_size = 8 + 4*256 + nr*(20 + 4 + 4) + 20 + 20;
		unsigned long max_size = min_size;

		if (nr)
			max_size += (nr - 1)*8;

		if (idx_size < min_size || idx_size > max_size) {
			git_futils_mmap_free(&p->index_map);
287
			return packfile_error("wrong index size");
288 289 290 291 292
		}
	}

	p->index_version = version;
	p->num_objects = nr;
293
	return 0;
294 295 296 297 298 299
}

static int pack_index_open(struct git_pack_file *p)
{
	char *idx_name;
	int error;
300
	size_t name_len, offset;
301 302

	if (p->index_map.data)
303
		return 0;
304 305

	idx_name = git__strdup(p->pack_name);
306 307
	GITERR_CHECK_ALLOC(idx_name);

308 309 310 311 312
	name_len = strlen(idx_name);
	offset = name_len - strlen(".pack");
	assert(offset < name_len); /* make sure no underflow */

	strncpy(idx_name + offset, ".idx", name_len - offset);
313 314

	error = pack_index_check(idx_name, p);
315
	git__free(idx_name);
316

317
	return error;
318 319 320 321
}

static unsigned char *pack_window_open(
		struct git_pack_file *p,
322
		git_mwindow **w_cursor,
323
		git_off_t offset,
324 325
		unsigned int *left)
{
326
	if (p->mwf.fd == -1 && packfile_open(p) < 0)
327 328 329 330 331 332 333 334 335 336 337 338 339
		return NULL;

	/* Since packfiles end in a hash of their content and it's
	 * pointless to ask for an offset into the middle of that
	 * hash, and the pack_window_contains function above wouldn't match
	 * don't allow an offset too close to the end of the file.
	 */
	if (offset > (p->mwf.size - 20))
		return NULL;

	return git_mwindow_open(&p->mwf, w_cursor, offset, 20, left);
 }

340 341
static int packfile_unpack_header1(
		unsigned long *usedp,
342 343 344 345 346 347 348 349
		size_t *sizep,
		git_otype *type,
		const unsigned char *buf,
		unsigned long len)
{
	unsigned shift;
	unsigned long size, c;
	unsigned long used = 0;
350

351 352 353 354 355
	c = buf[used++];
	*type = (c >> 4) & 7;
	size = c & 15;
	shift = 4;
	while (c & 0x80) {
356
		if (len <= used)
357
			return GIT_EBUFS;
358 359 360 361 362

		if (bitsizeof(long) <= shift) {
			*usedp = 0;
			return -1;
		}
363 364 365 366 367 368 369

		c = buf[used++];
		size += (c & 0x7f) << shift;
		shift += 7;
	}

	*sizep = (size_t)size;
370 371
	*usedp = used;
	return 0;
372 373 374 375 376 377 378
}

int git_packfile_unpack_header(
		size_t *size_p,
		git_otype *type_p,
		git_mwindow_file *mwf,
		git_mwindow **w_curs,
379
		git_off_t *curpos)
380 381 382 383
{
	unsigned char *base;
	unsigned int left;
	unsigned long used;
384
	int ret;
385 386

	/* pack_window_open() assures us we have [base, base + 20) available
Vicent Marti committed
387 388
	 * as a range that we can look at at. (Its actually the hash
	 * size that is assured.) With our object header encoding
389 390 391 392 393 394
	 * the maximum deflated object size is 2^137, which is just
	 * insane, so we know won't exceed what we have been given.
	 */
//	base = pack_window_open(p, w_curs, *curpos, &left);
	base = git_mwindow_open(mwf, w_curs, *curpos, 20, &left);
	if (base == NULL)
395
		return GIT_EBUFS;
396 397

		ret = packfile_unpack_header1(&used, size_p, type_p, base, left);
398
	git_mwindow_close(w_curs);
399
	if (ret == GIT_EBUFS)
400 401
		return ret;
	else if (ret < 0)
402
		return packfile_error("header length is zero");
403 404

	*curpos += used;
405
	return 0;
406 407
}

408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457
int git_packfile_resolve_header(
		size_t *size_p,
		git_otype *type_p,
		struct git_pack_file *p,
		git_off_t offset)
{
	git_mwindow *w_curs = NULL;
	git_off_t curpos = offset;
	size_t size;
	git_otype type;
	git_off_t base_offset;
	int error;

	error = git_packfile_unpack_header(&size, &type, &p->mwf, &w_curs, &curpos);
	git_mwindow_close(&w_curs);
	if (error < 0)
		return error;

	if (type == GIT_OBJ_OFS_DELTA || type == GIT_OBJ_REF_DELTA) {
		size_t base_size;
		git_rawobj delta;
		base_offset = get_delta_base(p, &w_curs, &curpos, type, offset);
		git_mwindow_close(&w_curs);
		error = packfile_unpack_compressed(&delta, p, &w_curs, &curpos, size, type);
		git_mwindow_close(&w_curs);
		if (error < 0)
			return error;
		error = git__delta_read_header(delta.data, delta.len, &base_size, size_p);
		git__free(delta.data);
		if (error < 0)
			return error;
	} else
		*size_p = size;

	while (type == GIT_OBJ_OFS_DELTA || type == GIT_OBJ_REF_DELTA) {
		curpos = base_offset;
		error = git_packfile_unpack_header(&size, &type, &p->mwf, &w_curs, &curpos);
		git_mwindow_close(&w_curs);
		if (error < 0)
			return error;
		if (type != GIT_OBJ_OFS_DELTA && type != GIT_OBJ_REF_DELTA)
			break;
		base_offset = get_delta_base(p, &w_curs, &curpos, type, base_offset);
		git_mwindow_close(&w_curs);
	}
	*type_p = type;

	return error;
}

458
static int packfile_unpack_delta(
459
		git_rawobj *obj,
460
		struct git_pack_file *p,
461
		git_mwindow **w_curs,
462
		git_off_t *curpos,
463 464
		size_t delta_size,
		git_otype delta_type,
465
		git_off_t obj_offset)
466
{
467
	git_off_t base_offset, base_key;
468
	git_rawobj base, delta;
469
	git_pack_cache_entry *cached = NULL;
470
	int error, found_base = 0;
471

472
	base_offset = get_delta_base(p, w_curs, curpos, delta_type, obj_offset);
473
	git_mwindow_close(w_curs);
474
	if (base_offset == 0)
475 476 477
		return packfile_error("delta offset is zero");
	if (base_offset < 0) /* must actually be an error code */
		return (int)base_offset;
478

479 480
	if (!p->bases.entries && (cache_init(&p->bases) < 0))
		return -1;
481

482
	base_key = base_offset; /* git_packfile_unpack modifies base_offset */
483
	if ((cached = cache_get(&p->bases, base_offset)) != NULL) {
484
		memcpy(&base, &cached->raw, sizeof(git_rawobj));
485
		found_base = 1;
486 487 488
	}

	if (!cached) { /* have to inflate it */
489 490 491 492 493 494 495 496 497 498 499
		error = git_packfile_unpack(&base, p, &base_offset);

		/*
		 * TODO: git.git tries to load the base from other packfiles
		 * or loose objects.
		 *
		 * We'll need to do this in order to support thin packs.
		 */
		if (error < 0)
			return error;
	}
500 501

	error = packfile_unpack_compressed(&delta, p, w_curs, curpos, delta_size, delta_type);
502
	git_mwindow_close(w_curs);
503

504
	if (error < 0) {
505 506
		if (!found_base)
			git__free(base.data);
507
		return error;
508 509 510
	}

	obj->type = base.type;
511
	error = git__delta_apply(obj, base.data, base.len, delta.data, delta.len);
512 513 514
	if (error < 0)
		goto on_error;

515
	if (found_base)
516
		git_atomic_dec(&cached->refcount);
517 518
	else if (cache_add(&p->bases, &base, base_key) < 0)
		git__free(base.data);
519

520
on_error:
521
	git__free(delta.data);
522 523 524 525

	return error; /* error set by git__delta_apply */
}

526
int git_packfile_unpack(
527 528 529
	git_rawobj *obj,
	struct git_pack_file *p,
	git_off_t *obj_offset)
530 531
{
	git_mwindow *w_curs = NULL;
532
	git_off_t curpos = *obj_offset;
533 534 535 536 537 538 539 540 541 542 543 544 545 546
	int error;

	size_t size = 0;
	git_otype type;

	/*
	 * TODO: optionally check the CRC on the packfile
	 */

	obj->data = NULL;
	obj->len = 0;
	obj->type = GIT_OBJ_BAD;

	error = git_packfile_unpack_header(&size, &type, &p->mwf, &w_curs, &curpos);
547 548
	git_mwindow_close(&w_curs);

549 550
	if (error < 0)
		return error;
551 552 553 554 555

	switch (type) {
	case GIT_OBJ_OFS_DELTA:
	case GIT_OBJ_REF_DELTA:
		error = packfile_unpack_delta(
556 557
				obj, p, &w_curs, &curpos,
				size, type, *obj_offset);
558 559 560 561 562 563 564
		break;

	case GIT_OBJ_COMMIT:
	case GIT_OBJ_TREE:
	case GIT_OBJ_BLOB:
	case GIT_OBJ_TAG:
		error = packfile_unpack_compressed(
565
				obj, p, &w_curs, &curpos,
566 567 568 569
				size, type);
		break;

	default:
570
		error = packfile_error("invalid packfile type in header");;
571 572 573
		break;
	}

574
	*obj_offset = curpos;
575
	return error;
576 577
}

Russell Belfer committed
578 579 580 581 582 583 584 585 586 587 588 589
static void *use_git_alloc(void *opaq, unsigned int count, unsigned int size)
{
	GIT_UNUSED(opaq);
	return git__calloc(count, size);
}

static void use_git_free(void *opaq, void *ptr)
{
	GIT_UNUSED(opaq);
	git__free(ptr);
}

590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624
int git_packfile_stream_open(git_packfile_stream *obj, struct git_pack_file *p, git_off_t curpos)
{
	int st;

	memset(obj, 0, sizeof(git_packfile_stream));
	obj->curpos = curpos;
	obj->p = p;
	obj->zstream.zalloc = use_git_alloc;
	obj->zstream.zfree = use_git_free;
	obj->zstream.next_in = Z_NULL;
	obj->zstream.next_out = Z_NULL;
	st = inflateInit(&obj->zstream);
	if (st != Z_OK) {
		git__free(obj);
		giterr_set(GITERR_ZLIB, "Failed to inflate packfile");
		return -1;
	}

	return 0;
}

ssize_t git_packfile_stream_read(git_packfile_stream *obj, void *buffer, size_t len)
{
	unsigned char *in;
	size_t written;
	int st;

	if (obj->done)
		return 0;

	in = pack_window_open(obj->p, &obj->mw, obj->curpos, &obj->zstream.avail_in);
	if (in == NULL)
		return GIT_EBUFS;

	obj->zstream.next_out = buffer;
625
	obj->zstream.avail_out = (unsigned int)len;
626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655
	obj->zstream.next_in = in;

	st = inflate(&obj->zstream, Z_SYNC_FLUSH);
	git_mwindow_close(&obj->mw);

	obj->curpos += obj->zstream.next_in - in;
	written = len - obj->zstream.avail_out;

	if (st != Z_OK && st != Z_STREAM_END) {
		giterr_set(GITERR_ZLIB, "Failed to inflate packfile");
		return -1;
	}

	if (st == Z_STREAM_END)
		obj->done = 1;


	/* If we didn't write anything out but we're not done, we need more data */
	if (!written && st != Z_STREAM_END)
		return GIT_EBUFS;

	return written;

}

void git_packfile_stream_free(git_packfile_stream *obj)
{
	inflateEnd(&obj->zstream);
}

656
int packfile_unpack_compressed(
657 658 659 660 661 662
	git_rawobj *obj,
	struct git_pack_file *p,
	git_mwindow **w_curs,
	git_off_t *curpos,
	size_t size,
	git_otype type)
663 664 665 666 667
{
	int st;
	z_stream stream;
	unsigned char *buffer, *in;

668 669
	buffer = git__calloc(1, size + 1);
	GITERR_CHECK_ALLOC(buffer);
670 671 672

	memset(&stream, 0, sizeof(stream));
	stream.next_out = buffer;
673
	stream.avail_out = (uInt)size + 1;
Russell Belfer committed
674 675
	stream.zalloc = use_git_alloc;
	stream.zfree = use_git_free;
676 677 678

	st = inflateInit(&stream);
	if (st != Z_OK) {
679
		git__free(buffer);
680
		giterr_set(GITERR_ZLIB, "Failed to inflate packfile");
681

682
		return -1;
683 684 685
	}

	do {
686
		in = pack_window_open(p, w_curs, *curpos, &stream.avail_in);
687 688
		stream.next_in = in;
		st = inflate(&stream, Z_FINISH);
689
		git_mwindow_close(w_curs);
690 691 692 693

		if (!stream.avail_out)
			break; /* the payload is larger than it should be */

694 695 696
		if (st == Z_BUF_ERROR && in == NULL) {
			inflateEnd(&stream);
			git__free(buffer);
697
			return GIT_EBUFS;
698 699
		}

700
		*curpos += stream.next_in - in;
701 702 703 704 705
	} while (st == Z_OK || st == Z_BUF_ERROR);

	inflateEnd(&stream);

	if ((st != Z_STREAM_END) || stream.total_out != size) {
706
		git__free(buffer);
707 708
		giterr_set(GITERR_ZLIB, "Failed to inflate packfile");
		return -1;
709 710 711 712 713
	}

	obj->type = type;
	obj->len = size;
	obj->data = buffer;
714
	return 0;
715 716
}

717 718 719 720
/*
 * curpos is where the data starts, delta_obj_offset is the where the
 * header starts
 */
721 722 723 724 725 726
git_off_t get_delta_base(
	struct git_pack_file *p,
	git_mwindow **w_curs,
	git_off_t *curpos,
	git_otype type,
	git_off_t delta_obj_offset)
727
{
728 729
	unsigned int left = 0;
	unsigned char *base_info;
730
	git_off_t base_offset;
731 732
	git_oid unused;

733 734 735
	base_info = pack_window_open(p, w_curs, *curpos, &left);
	/* Assumption: the only reason this would fail is because the file is too small */
	if (base_info == NULL)
736
		return GIT_EBUFS;
737 738
	/* pack_window_open() assured us we have [base_info, base_info + 20)
	 * as a range that we can look at without walking off the
Vicent Marti committed
739 740
	 * end of the mapped window. Its actually the hash size
	 * that is assured. An OFS_DELTA longer than the hash size
741 742 743 744 745 746 747
	 * is stupid, as then a REF_DELTA would be smaller to store.
	 */
	if (type == GIT_OBJ_OFS_DELTA) {
		unsigned used = 0;
		unsigned char c = base_info[used++];
		base_offset = c & 127;
		while (c & 128) {
748
			if (left <= used)
749
				return GIT_EBUFS;
750 751
			base_offset += 1;
			if (!base_offset || MSB(base_offset, 7))
Vicent Marti committed
752
				return 0; /* overflow */
753 754 755 756 757
			c = base_info[used++];
			base_offset = (base_offset << 7) + (c & 127);
		}
		base_offset = delta_obj_offset - base_offset;
		if (base_offset <= 0 || base_offset >= delta_obj_offset)
Vicent Marti committed
758
			return 0; /* out of bound */
759 760
		*curpos += used;
	} else if (type == GIT_OBJ_REF_DELTA) {
761 762
		/* If we have the cooperative cache, search in it first */
		if (p->has_cache) {
763 764
			khiter_t k;
			git_oid oid;
765

766 767 768
			git_oid_fromraw(&oid, base_info);
			k = kh_get(oid, p->idx_cache, &oid);
			if (k != kh_end(p->idx_cache)) {
769
				*curpos += 20;
770
				return ((struct git_pack_entry *)kh_value(p->idx_cache, k))->offset;
771 772
			}
		}
773
		/* The base entry _must_ be in the same pack */
774 775
		if (pack_entry_find_offset(&base_offset, &unused, p, (git_oid *)base_info, GIT_OID_HEXSZ) < 0)
			return packfile_error("base entry delta is not in the same pack");
776 777 778 779 780 781
		*curpos += 20;
	} else
		return 0;

	return base_offset;
}
782 783 784 785 786 787 788

/***********************************************************
 *
 * PACKFILE METHODS
 *
 ***********************************************************/

789
static struct git_pack_file *packfile_alloc(size_t extra)
790
{
791 792 793
	struct git_pack_file *p = git__calloc(1, sizeof(*p) + extra);
	if (p != NULL)
		p->mwf.fd = -1;
794 795 796 797
	return p;
}


798
void git_packfile_free(struct git_pack_file *p)
799 800 801
{
	assert(p);

802
	cache_free(&p->bases);
803

804
	git_mwindow_free_all(&p->mwf);
805
	git_mwindow_file_deregister(&p->mwf);
806 807 808 809 810 811

	if (p->mwf.fd != -1)
		p_close(p->mwf.fd);

	pack_index_free(p);

812 813
	git__free(p->bad_object_sha1);
	git__free(p);
814 815 816 817 818 819 820 821 822
}

static int packfile_open(struct git_pack_file *p)
{
	struct stat st;
	struct git_pack_header hdr;
	git_oid sha1;
	unsigned char *idx_sha1;

823 824
	assert(p->index_map.data);

825
	if (!p->index_map.data && pack_index_open(p) < 0)
Russell Belfer committed
826
		return git_odb__error_notfound("failed to open packfile", NULL);
827 828

	/* TODO: open with noatime */
829
	p->mwf.fd = git_futils_open_ro(p->pack_name);
830 831 832 833
	if (p->mwf.fd < 0) {
		p->mwf.fd = -1;
		return -1;
	}
834

835 836 837
	if (p_fstat(p->mwf.fd, &st) < 0 ||
		git_mwindow_file_register(&p->mwf) < 0)
		goto cleanup;
838 839 840 841 842

	/* If we created the struct before we had the pack we lack size. */
	if (!p->mwf.size) {
		if (!S_ISREG(st.st_mode))
			goto cleanup;
843
		p->mwf.size = (git_off_t)st.st_size;
844 845 846 847 848 849 850 851 852
	} else if (p->mwf.size != st.st_size)
		goto cleanup;

#if 0
	/* We leave these file descriptors open with sliding mmap;
	 * there is no point keeping them open across exec(), though.
	 */
	fd_flag = fcntl(p->mwf.fd, F_GETFD, 0);
	if (fd_flag < 0)
853
		goto cleanup;
854 855 856

	fd_flag |= FD_CLOEXEC;
	if (fcntl(p->pack_fd, F_SETFD, fd_flag) == -1)
857
		goto cleanup;
858 859 860
#endif

	/* Verify we recognize this pack file format. */
861 862 863
	if (p_read(p->mwf.fd, &hdr, sizeof(hdr)) < 0 ||
		hdr.hdr_signature != htonl(PACK_SIGNATURE) ||
		!pack_version_ok(hdr.hdr_version))
864 865 866
		goto cleanup;

	/* Verify the pack matches its index. */
867 868 869
	if (p->num_objects != ntohl(hdr.hdr_entries) ||
		p_lseek(p->mwf.fd, p->mwf.size - GIT_OID_RAWSZ, SEEK_SET) == -1 ||
		p_read(p->mwf.fd, sha1.id, GIT_OID_RAWSZ) < 0)
870 871 872 873
		goto cleanup;

	idx_sha1 = ((unsigned char *)p->index_map.data) + p->index_map.len - 40;

874 875
	if (git_oid_cmp(&sha1, (git_oid *)idx_sha1) == 0)
		return 0;
876 877

cleanup:
878
	giterr_set(GITERR_OS, "Invalid packfile '%s'", p->pack_name);
879 880
	p_close(p->mwf.fd);
	p->mwf.fd = -1;
881
	return -1;
882 883 884 885 886 887 888 889 890 891 892
}

int git_packfile_check(struct git_pack_file **pack_out, const char *path)
{
	struct stat st;
	struct git_pack_file *p;
	size_t path_len;

	*pack_out = NULL;
	path_len = strlen(path);
	p = packfile_alloc(path_len + 2);
893
	GITERR_CHECK_ALLOC(p);
894 895 896 897 898

	/*
	 * Make sure a corresponding .pack file exists and that
	 * the index looks sane.
	 */
899
	path_len -= strlen(".idx");
900
	if (path_len < 1) {
901
		git__free(p);
Russell Belfer committed
902
		return git_odb__error_notfound("invalid packfile path", NULL);
903 904 905 906 907
	}

	memcpy(p->pack_name, path, path_len);

	strcpy(p->pack_name + path_len, ".keep");
908
	if (git_path_exists(p->pack_name) == true)
909 910 911
		p->pack_keep = 1;

	strcpy(p->pack_name + path_len, ".pack");
912
	if (p_stat(p->pack_name, &st) < 0 || !S_ISREG(st.st_mode)) {
913
		git__free(p);
Russell Belfer committed
914
		return git_odb__error_notfound("packfile not found", NULL);
915 916 917 918 919
	}

	/* ok, it looks sane as far as we can check without
	 * actually mapping the pack file.
	 */
920
	p->mwf.size = st.st_size;
921 922 923 924 925
	p->pack_local = 1;
	p->mtime = (git_time_t)st.st_mtime;

	/* see if we can parse the sha1 oid in the packfile name */
	if (path_len < 40 ||
926
		git_oid_fromstr(&p->sha1, path + path_len - GIT_OID_HEXSZ) < 0)
927 928 929
		memset(&p->sha1, 0x0, GIT_OID_RAWSZ);

	*pack_out = p;
930 931

	return 0;
932 933 934 935 936 937 938 939
}

/***********************************************************
 *
 * PACKFILE ENTRY SEARCH INTERNALS
 *
 ***********************************************************/

940
static git_off_t nth_packed_object_offset(const struct git_pack_file *p, uint32_t n)
941 942 943 944 945 946 947 948 949 950 951 952 953
{
	const unsigned char *index = p->index_map.data;
	index += 4 * 256;
	if (p->index_version == 1) {
		return ntohl(*((uint32_t *)(index + 24 * n)));
	} else {
		uint32_t off;
		index += 8 + p->num_objects * (20 + 4);
		off = ntohl(*((uint32_t *)(index + 4 * n)));
		if (!(off & 0x80000000))
			return off;
		index += p->num_objects * 4 + (off & 0x7fffffff) * 8;
		return (((uint64_t)ntohl(*((uint32_t *)(index + 0)))) << 32) |
Vicent Marti committed
954
					ntohl(*((uint32_t *)(index + 4)));
955 956 957
	}
}

958 959 960 961
static int git__memcmp4(const void *a, const void *b) {
	return memcmp(a, b, 4);
}

962
int git_pack_foreach_entry(
963
	struct git_pack_file *p,
964
	git_odb_foreach_cb cb,
965
	void *data)
966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986
{
	const unsigned char *index = p->index_map.data, *current;
	uint32_t i;

	if (index == NULL) {
		int error;

		if ((error = pack_index_open(p)) < 0)
			return error;

		assert(p->index_map.data);

		index = p->index_map.data;
	}

	if (p->index_version > 1) {
		index += 8;
	}

	index += 4 * 256;

987 988 989
	if (p->oids == NULL) {
		git_vector offsets, oids;
		int error;
990

991 992 993 994 995
		if ((error = git_vector_init(&oids, p->num_objects, NULL)))
			return error;

		if ((error = git_vector_init(&offsets, p->num_objects, git__memcmp4)))
			return error;
996

997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012
		if (p->index_version > 1) {
			const unsigned char *off = index + 24 * p->num_objects;
			for (i = 0; i < p->num_objects; i++)
				git_vector_insert(&offsets, (void*)&off[4 * i]);
			git_vector_sort(&offsets);
			git_vector_foreach(&offsets, i, current)
				git_vector_insert(&oids, (void*)&index[5 * (current - off)]);
		} else {
			for (i = 0; i < p->num_objects; i++)
				git_vector_insert(&offsets, (void*)&index[24 * i]);
			git_vector_sort(&offsets);
			git_vector_foreach(&offsets, i, current)
				git_vector_insert(&oids, (void*)&current[4]);
		}
		git_vector_free(&offsets);
		p->oids = (git_oid **)oids.contents;
1013 1014
	}

1015 1016 1017 1018
	for (i = 0; i < p->num_objects; i++)
		if (cb(p->oids[i], data))
			return GIT_EUSER;

1019 1020 1021
	return 0;
}

1022
static int pack_entry_find_offset(
1023 1024 1025 1026
	git_off_t *offset_out,
	git_oid *found_oid,
	struct git_pack_file *p,
	const git_oid *short_oid,
1027
	size_t len)
1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039
{
	const uint32_t *level1_ofs = p->index_map.data;
	const unsigned char *index = p->index_map.data;
	unsigned hi, lo, stride;
	int pos, found = 0;
	const unsigned char *current = 0;

	*offset_out = 0;

	if (index == NULL) {
		int error;

1040 1041
		if ((error = pack_index_open(p)) < 0)
			return error;
1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070

		assert(p->index_map.data);

		index = p->index_map.data;
		level1_ofs = p->index_map.data;
	}

	if (p->index_version > 1) {
		level1_ofs += 2;
		index += 8;
	}

	index += 4 * 256;
	hi = ntohl(level1_ofs[(int)short_oid->id[0]]);
	lo = ((short_oid->id[0] == 0x0) ? 0 : ntohl(level1_ofs[(int)short_oid->id[0] - 1]));

	if (p->index_version > 1) {
		stride = 20;
	} else {
		stride = 24;
		index += 4;
	}

#ifdef INDEX_DEBUG_LOOKUP
	printf("%02x%02x%02x... lo %u hi %u nr %d\n",
		short_oid->id[0], short_oid->id[1], short_oid->id[2], lo, hi, p->num_objects);
#endif

	/* Use git.git lookup code */
Vicent Marti committed
1071
	pos = sha1_entry_pos(index, stride, 0, lo, hi, p->num_objects, short_oid->id);
1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083

	if (pos >= 0) {
		/* An object matching exactly the oid was found */
		found = 1;
		current = index + pos * stride;
	} else {
		/* No object was found */
		/* pos refers to the object with the "closest" oid to short_oid */
		pos = - 1 - pos;
		if (pos < (int)p->num_objects) {
			current = index + pos * stride;

Russell Belfer committed
1084
			if (!git_oid_ncmp(short_oid, (const git_oid *)current, len))
1085 1086 1087 1088
				found = 1;
		}
	}

1089
	if (found && len != GIT_OID_HEXSZ && pos + 1 < (int)p->num_objects) {
1090 1091 1092 1093 1094 1095 1096 1097
		/* Check for ambiguousity */
		const unsigned char *next = current + stride;

		if (!git_oid_ncmp(short_oid, (const git_oid *)next, len)) {
			found = 2;
		}
	}

1098
	if (!found)
Russell Belfer committed
1099
		return git_odb__error_notfound("failed to find offset for pack entry", short_oid);
1100 1101 1102 1103
	if (found > 1)
		return git_odb__error_ambiguous("found multiple offsets for pack entry");
	*offset_out = nth_packed_object_offset(p, pos);
	git_oid_fromraw(found_oid, current);
1104 1105

#ifdef INDEX_DEBUG_LOOKUP
1106
	{
1107 1108 1109 1110 1111
		unsigned char hex_sha1[GIT_OID_HEXSZ + 1];
		git_oid_fmt(hex_sha1, found_oid);
		hex_sha1[GIT_OID_HEXSZ] = '\0';
		printf("found lo=%d %s\n", lo, hex_sha1);
	}
1112 1113
#endif
	return 0;
1114 1115 1116 1117 1118 1119
}

int git_pack_entry_find(
		struct git_pack_entry *e,
		struct git_pack_file *p,
		const git_oid *short_oid,
1120
		size_t len)
1121
{
1122
	git_off_t offset;
1123 1124 1125 1126 1127 1128 1129 1130 1131
	git_oid found_oid;
	int error;

	assert(p);

	if (len == GIT_OID_HEXSZ && p->num_bad_objects) {
		unsigned i;
		for (i = 0; i < p->num_bad_objects; i++)
			if (git_oid_cmp(short_oid, &p->bad_object_sha1[i]) == 0)
1132
				return packfile_error("bad object found in packfile");
1133 1134 1135
	}

	error = pack_entry_find_offset(&offset, &found_oid, p, short_oid, len);
1136 1137
	if (error < 0)
		return error;
1138 1139 1140 1141

	/* we found a unique entry in the index;
	 * make sure the packfile backing the index
	 * still exists on disk */
1142 1143
	if (p->mwf.fd == -1 && (error = packfile_open(p)) < 0)
		return error;
1144 1145 1146 1147 1148

	e->offset = offset;
	e->p = p;

	git_oid_cpy(&e->sha1, &found_oid);
1149
	return 0;
1150
}