indexer.c 25.2 KB
Newer Older
1
/*
Edward Thomson committed
2
 * Copyright (C) the libgit2 contributors. All rights reserved.
3
 *
Vicent Marti committed
4 5
 * This file is part of libgit2, distributed under the GNU GPL v2 with
 * a Linking Exception. For full terms see the included COPYING file.
6 7
 */

Carlos Martín Nieto committed
8
#include "git2/indexer.h"
9
#include "git2/object.h"
Carlos Martín Nieto committed
10

11 12
#include "common.h"
#include "pack.h"
Carlos Martín Nieto committed
13
#include "mwindow.h"
14
#include "posix.h"
15 16
#include "pack.h"
#include "filebuf.h"
17
#include "oid.h"
18
#include "oidmap.h"
19
#include "zstream.h"
20

21
GIT__USE_OIDMAP
22

23 24
extern git_mutex git__mwindow_mutex;

25
#define UINT31_MAX (0x7FFFFFFF)
26

27
struct entry {
28
	git_oid oid;
29 30 31 32 33
	uint32_t crc;
	uint32_t offset;
	uint64_t offset_long;
};

34
struct git_indexer {
35
	unsigned int parsed_header :1,
36
		opened_pack :1,
37 38
		have_stream :1,
		have_delta :1;
39
	struct git_pack_header hdr;
40
	struct git_pack_file *pack;
41
	unsigned int mode;
42
	git_off_t off;
43 44
	git_off_t entry_start;
	git_packfile_stream stream;
45 46 47 48
	size_t nr_objects;
	git_vector objects;
	git_vector deltas;
	unsigned int fanout[256];
49
	git_hash_ctx hash_ctx;
50
	git_oid hash;
51
	git_transfer_progress_cb progress_cb;
52
	void *progress_payload;
53
	char objbuf[8*1024];
54

55 56 57
	/* Needed to look up objects which we want to inject to fix a thin pack */
	git_odb *odb;

58 59
	/* Fields for calculating the packfile trailer (hash of everything before it) */
	char inbuf[GIT_OID_RAWSZ];
60
	size_t inbuf_len;
61
	git_hash_ctx trailer;
62 63 64
};

struct delta_info {
65
	git_off_t delta_off;
66 67
};

68
const git_oid *git_indexer_hash(const git_indexer *idx)
69 70 71 72
{
	return &idx->hash;
}

73
static int parse_header(struct git_pack_header *hdr, struct git_pack_file *pack)
74 75
{
	int error;
76
	git_map map;
77

78
	if ((error = p_mmap(&map, sizeof(*hdr), GIT_PROT_READ, GIT_MAP_SHARED, pack->mwf.fd, 0)) < 0)
79
		return error;
80

81 82 83 84
	memcpy(hdr, map.data, sizeof(*hdr));
	p_munmap(&map);

	/* Verify we recognize this pack file format. */
85
	if (hdr->hdr_signature != ntohl(PACK_SIGNATURE)) {
86
		giterr_set(GITERR_INDEXER, "Wrong pack signature");
87 88
		return -1;
	}
89

90
	if (!pack_version_ok(hdr->hdr_version)) {
91
		giterr_set(GITERR_INDEXER, "Wrong pack version");
92 93
		return -1;
	}
Carlos Martín Nieto committed
94

95
	return 0;
96 97
}

98
static int objects_cmp(const void *a, const void *b)
99 100 101 102
{
	const struct entry *entrya = a;
	const struct entry *entryb = b;

103
	return git_oid__cmp(&entrya->oid, &entryb->oid);
104 105
}

106 107
int git_indexer_new(
		git_indexer **out,
108
		const char *prefix,
109
		unsigned int mode,
110
		git_odb *odb,
111
		git_transfer_progress_cb progress_cb,
112
		void *progress_payload)
113
{
114
	git_indexer *idx;
115
	git_buf path = GIT_BUF_INIT, tmp_path = GIT_BUF_INIT;
116
	static const char suff[] = "/pack";
117
	int error, fd = -1;
118

119
	idx = git__calloc(1, sizeof(git_indexer));
120
	GITERR_CHECK_ALLOC(idx);
121
	idx->odb = odb;
122 123
	idx->progress_cb = progress_cb;
	idx->progress_payload = progress_payload;
124
	idx->mode = mode ? mode : GIT_PACK_FILE_MODE;
125
	git_hash_ctx_init(&idx->hash_ctx);
126
	git_hash_ctx_init(&idx->trailer);
127 128 129 130 131

	error = git_buf_joinpath(&path, prefix, suff);
	if (error < 0)
		goto cleanup;

132
	fd = git_futils_mktmp(&tmp_path, git_buf_cstr(&path), idx->mode);
133
	git_buf_free(&path);
134 135 136 137 138 139
	if (fd < 0)
		goto cleanup;

	error = git_packfile_alloc(&idx->pack, git_buf_cstr(&tmp_path));
	git_buf_free(&tmp_path);

140 141 142
	if (error < 0)
		goto cleanup;

143 144 145 146
	idx->pack->mwf.fd = fd;
	if ((error = git_mwindow_file_register(&idx->pack->mwf)) < 0)
		goto cleanup;

147 148 149 150
	*out = idx;
	return 0;

cleanup:
151 152 153
	if (fd != -1)
		p_close(fd);

154
	git_buf_free(&path);
155
	git_buf_free(&tmp_path);
156 157 158 159 160
	git__free(idx);
	return -1;
}

/* Try to store the delta so we can try to resolve it later */
161
static int store_delta(git_indexer *idx)
162
{
163 164
	struct delta_info *delta;

165 166
	delta = git__calloc(1, sizeof(struct delta_info));
	GITERR_CHECK_ALLOC(delta);
167
	delta->delta_off = idx->entry_start;
168

169
	if (git_vector_insert(&idx->deltas, delta) < 0)
170 171 172 173 174
		return -1;

	return 0;
}

175 176 177 178 179
static void hash_header(git_hash_ctx *ctx, git_off_t len, git_otype type)
{
	char buffer[64];
	size_t hdrlen;

180
	hdrlen = git_odb__format_object_header(buffer, sizeof(buffer), (size_t)len, type);
181 182 183
	git_hash_update(ctx, buffer, hdrlen);
}

184
static int hash_object_stream(git_indexer*idx, git_packfile_stream *stream)
185 186 187
{
	ssize_t read;

188
	assert(idx && stream);
189 190

	do {
191
		if ((read = git_packfile_stream_read(stream, idx->objbuf, sizeof(idx->objbuf))) < 0)
192 193
			break;

194
		git_hash_update(&idx->hash_ctx, idx->objbuf, read);
195 196 197 198 199 200 201 202
	} while (read > 0);

	if (read < 0)
		return (int)read;

	return 0;
}

203
/* In order to create the packfile stream, we need to skip over the delta base description */
204
static int advance_delta_offset(git_indexer *idx, git_otype type)
205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222
{
	git_mwindow *w = NULL;

	assert(type == GIT_OBJ_REF_DELTA || type == GIT_OBJ_OFS_DELTA);

	if (type == GIT_OBJ_REF_DELTA) {
		idx->off += GIT_OID_RAWSZ;
	} else {
		git_off_t base_off = get_delta_base(idx->pack, &w, &idx->off, type, idx->entry_start);
		git_mwindow_close(&w);
		if (base_off < 0)
			return (int)base_off;
	}

	return 0;
}

/* Read from the stream and discard any output */
223
static int read_object_stream(git_indexer *idx, git_packfile_stream *stream)
224 225 226 227 228 229
{
	ssize_t read;

	assert(stream);

	do {
230
		read = git_packfile_stream_read(stream, idx->objbuf, sizeof(idx->objbuf));
231 232 233 234 235 236 237 238
	} while (read > 0);

	if (read < 0)
		return (int)read;

	return 0;
}

239 240 241 242 243 244 245 246 247
static int crc_object(uint32_t *crc_out, git_mwindow_file *mwf, git_off_t start, git_off_t size)
{
	void *ptr;
	uint32_t crc;
	unsigned int left, len;
	git_mwindow *w = NULL;

	crc = crc32(0L, Z_NULL, 0);
	while (size) {
248
		ptr = git_mwindow_open(mwf, &w, start, (size_t)size, &left);
249 250 251
		if (ptr == NULL)
			return -1;

252
		len = min(left, (unsigned int)size);
253 254 255 256 257 258 259 260 261 262
		crc = crc32(crc, ptr, len);
		size -= len;
		start += len;
		git_mwindow_close(&w);
	}

	*crc_out = htonl(crc);
	return 0;
}

263
static int store_object(git_indexer *idx)
264
{
265 266
	int i, error;
	khiter_t k;
267 268 269 270
	git_oid oid;
	struct entry *entry;
	git_off_t entry_size;
	struct git_pack_entry *pentry;
271
	git_off_t entry_start = idx->entry_start;
272 273 274 275

	entry = git__calloc(1, sizeof(*entry));
	GITERR_CHECK_ALLOC(entry);

Linquize committed
276
	pentry = git__calloc(1, sizeof(struct git_pack_entry));
277 278
	GITERR_CHECK_ALLOC(pentry);

279
	git_hash_final(&oid, &idx->hash_ctx);
280 281 282 283 284 285 286 287 288 289
	entry_size = idx->off - entry_start;
	if (entry_start > UINT31_MAX) {
		entry->offset = UINT32_MAX;
		entry->offset_long = entry_start;
	} else {
		entry->offset = (uint32_t)entry_start;
	}

	git_oid_cpy(&pentry->sha1, &oid);
	pentry->offset = entry_start;
290 291

	k = kh_put(oid, idx->pack->idx_cache, &pentry->sha1, &error);
292
	if (error == -1) {
293
		git__free(pentry);
294
		giterr_set_oom();
295
		goto on_error;
296
	}
297

298 299 300 301 302 303 304
	if (error == 0) {
		giterr_set(GITERR_INDEXER, "duplicate object %s found in pack", git_oid_tostr_s(&pentry->sha1));
		git__free(pentry);
		goto on_error;
	}


305 306
	kh_value(idx->pack->idx_cache, k) = pentry;

307 308
	git_oid_cpy(&entry->oid, &oid);

309
	if (crc_object(&entry->crc, &idx->pack->mwf, entry_start, entry_size) < 0)
310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327
		goto on_error;

	/* Add the object to the list */
	if (git_vector_insert(&idx->objects, entry) < 0)
		goto on_error;

	for (i = oid.id[0]; i < 256; ++i) {
		idx->fanout[i]++;
	}

	return 0;

on_error:
	git__free(entry);

	return -1;
}

328 329 330 331 332 333 334
GIT_INLINE(bool) has_entry(git_indexer *idx, git_oid *id)
{
	khiter_t k;
	k = kh_get(oid, idx->pack->idx_cache, id);
	return (k != kh_end(idx->pack->idx_cache));
}

335
static int save_entry(git_indexer *idx, struct entry *entry, struct git_pack_entry *pentry, git_off_t entry_start)
336
{
337 338
	int i, error;
	khiter_t k;
339 340 341 342 343 344 345 346

	if (entry_start > UINT31_MAX) {
		entry->offset = UINT32_MAX;
		entry->offset_long = entry_start;
	} else {
		entry->offset = (uint32_t)entry_start;
	}

347 348
	pentry->offset = entry_start;
	k = kh_put(oid, idx->pack->idx_cache, &pentry->sha1, &error);
349 350 351

	if (error <= 0) {
		giterr_set(GITERR_INDEXER, "cannot insert object into pack");
352
		return -1;
353
	}
354 355 356 357 358 359 360 361 362 363 364 365 366 367

	kh_value(idx->pack->idx_cache, k) = pentry;

	/* Add the object to the list */
	if (git_vector_insert(&idx->objects, entry) < 0)
		return -1;

	for (i = entry->oid.id[0]; i < 256; ++i) {
		idx->fanout[i]++;
	}

	return 0;
}

368
static int hash_and_save(git_indexer *idx, git_rawobj *obj, git_off_t entry_start)
369 370 371 372
{
	git_oid oid;
	size_t entry_size;
	struct entry *entry;
373
	struct git_pack_entry *pentry = NULL;
374 375 376 377

	entry = git__calloc(1, sizeof(*entry));
	GITERR_CHECK_ALLOC(entry);

378
	if (git_odb__hashobj(&oid, obj) < 0) {
379
		giterr_set(GITERR_INDEXER, "Failed to hash object");
380
		goto on_error;
381 382
	}

Linquize committed
383
	pentry = git__calloc(1, sizeof(struct git_pack_entry));
384 385 386 387 388 389 390
	GITERR_CHECK_ALLOC(pentry);

	git_oid_cpy(&pentry->sha1, &oid);
	git_oid_cpy(&entry->oid, &oid);
	entry->crc = crc32(0L, Z_NULL, 0);

	entry_size = (size_t)(idx->off - entry_start);
391
	if (crc_object(&entry->crc, &idx->pack->mwf, entry_start, entry_size) < 0)
392 393
		goto on_error;

394
	return save_entry(idx, entry, pentry, entry_start);
395

396
on_error:
397
	git__free(pentry);
398 399
	git__free(entry);
	git__free(obj->data);
400 401
	return -1;
}
402

403
static int do_progress_callback(git_indexer *idx, git_transfer_progress *stats)
404
{
405
	if (idx->progress_cb)
406
		return giterr_set_after_callback_function(
407 408
			idx->progress_cb(stats, idx->progress_payload),
			"indexer progress");
409
	return 0;
410 411
}

412
/* Hash everything but the last 20B of input */
413
static void hash_partially(git_indexer *idx, const uint8_t *data, size_t size)
414
{
415
	size_t to_expell, to_keep;
416 417 418 419 420

	if (size == 0)
		return;

	/* Easy case, dump the buffer and the data minus the last 20 bytes */
421
	if (size >= GIT_OID_RAWSZ) {
422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438
		git_hash_update(&idx->trailer, idx->inbuf, idx->inbuf_len);
		git_hash_update(&idx->trailer, data, size - GIT_OID_RAWSZ);

		data += size - GIT_OID_RAWSZ;
		memcpy(idx->inbuf, data, GIT_OID_RAWSZ);
		idx->inbuf_len = GIT_OID_RAWSZ;
		return;
	}

	/* We can just append */
	if (idx->inbuf_len + size <= GIT_OID_RAWSZ) {
		memcpy(idx->inbuf + idx->inbuf_len, data, size);
		idx->inbuf_len += size;
		return;
	}

	/* We need to partially drain the buffer and then append */
439 440
	to_keep   = GIT_OID_RAWSZ - size;
	to_expell = idx->inbuf_len - to_keep;
441 442 443 444 445 446 447 448

	git_hash_update(&idx->trailer, idx->inbuf, to_expell);

	memmove(idx->inbuf, idx->inbuf + to_expell, to_keep);
	memcpy(idx->inbuf + to_keep, data, size);
	idx->inbuf_len += size - to_expell;
}

449 450 451
static int write_at(git_indexer *idx, const void *data, git_off_t offset, size_t size)
{
	git_file fd = idx->pack->mwf.fd;
452 453 454
	size_t page_size;
	size_t page_offset;
	git_off_t page_start;
455
	unsigned char *map_data;
456 457 458
	git_map map;
	int error;

459 460
	assert(data && size);

461 462 463
	if ((error = git__page_size(&page_size)) < 0)
		return error;

464
	/* the offset needs to be at the beginning of the a page boundary */
465 466
	page_offset = offset % page_size;
	page_start = offset - page_offset;
467 468 469 470

	if ((error = p_mmap(&map, page_offset + size, GIT_PROT_WRITE, GIT_MAP_SHARED, fd, page_start)) < 0)
		return error;

471 472
	map_data = (unsigned char *)map.data;
	memcpy(map_data + page_offset, data, size);
473 474 475 476 477 478 479 480
	p_munmap(&map);

	return 0;
}

static int append_to_pack(git_indexer *idx, const void *data, size_t size)
{
	git_off_t current_size = idx->pack->mwf.size;
481
	int fd = idx->pack->mwf.fd;
482

483 484 485
	if (!size)
		return 0;

486 487 488
	if (p_lseek(fd, current_size + size - 1, SEEK_SET) < 0 ||
	    p_write(idx->pack->mwf.fd, data, 1) < 0) {
		giterr_set(GITERR_OS, "cannot extend packfile '%s'", idx->pack->pack_name);
489 490 491 492 493 494
		return -1;
	}

	return write_at(idx, data, idx->pack->mwf.size, size);
}

495
int git_indexer_append(git_indexer *idx, const void *data, size_t size, git_transfer_progress *stats)
496
{
497
	int error = -1;
498
	size_t processed;
499
	struct git_pack_header *hdr = &idx->hdr;
500
	git_mwindow_file *mwf = &idx->pack->mwf;
501

502 503
	assert(idx && data && stats);

504
	processed = stats->indexed_objects;
505

506
	if ((error = append_to_pack(idx, data, size)) < 0)
507
		return error;
508

nulltoken committed
509
	hash_partially(idx, data, (int)size);
510

511
	/* Make sure we set the new size of the pack */
512
	idx->pack->mwf.size += size;
513 514

	if (!idx->parsed_header) {
515 516
		unsigned int total_objects;

517
		if ((unsigned)idx->pack->mwf.size < sizeof(struct git_pack_header))
518 519
			return 0;

520 521
		if ((error = parse_header(&idx->hdr, idx->pack)) < 0)
			return error;
522 523

		idx->parsed_header = 1;
524
		idx->nr_objects = ntohl(hdr->hdr_entries);
525 526 527 528
		idx->off = sizeof(struct git_pack_header);

		/* for now, limit to 2^32 objects */
		assert(idx->nr_objects == (size_t)((unsigned int)idx->nr_objects));
529 530 531 532
		if (idx->nr_objects == (size_t)((unsigned int)idx->nr_objects))
			total_objects = (unsigned int)idx->nr_objects;
		else
			total_objects = UINT_MAX;
533

534 535
		idx->pack->idx_cache = git_oidmap_alloc();
		GITERR_CHECK_ALLOC(idx->pack->idx_cache);
536 537

		idx->pack->has_cache = 1;
538
		if (git_vector_init(&idx->objects, total_objects, objects_cmp) < 0)
539 540
			return -1;

541
		if (git_vector_init(&idx->deltas, total_objects / 2, NULL) < 0)
542 543
			return -1;

544
		stats->received_objects = 0;
545
		stats->local_objects = 0;
546 547
		stats->total_deltas = 0;
		stats->indexed_deltas = 0;
548
		processed = stats->indexed_objects = 0;
549
		stats->total_objects = total_objects;
550

551
		if ((error = do_progress_callback(idx, stats)) != 0)
552
			return error;
553 554 555 556 557 558
	}

	/* Now that we have data in the pack, let's try to parse it */

	/* As the file grows any windows we try to use will be out of date */
	git_mwindow_free_all(mwf);
559

560
	while (processed < idx->nr_objects) {
561
		git_packfile_stream *stream = &idx->stream;
562
		git_off_t entry_start = idx->off;
563 564 565
		size_t entry_size;
		git_otype type;
		git_mwindow *w = NULL;
566

567 568 569
		if (idx->pack->mwf.size <= idx->off + 20)
			return 0;

570 571
		if (!idx->have_stream) {
			error = git_packfile_unpack_header(&entry_size, &type, mwf, &w, &idx->off);
572 573
			if (error == GIT_EBUFS) {
				idx->off = entry_start;
574
				return 0;
575
			}
576
			if (error < 0)
577
				goto on_error;
578 579 580

			git_mwindow_close(&w);
			idx->entry_start = entry_start;
581
			git_hash_init(&idx->hash_ctx);
582 583

			if (type == GIT_OBJ_REF_DELTA || type == GIT_OBJ_OFS_DELTA) {
584
				error = advance_delta_offset(idx, type);
585 586 587 588 589
				if (error == GIT_EBUFS) {
					idx->off = entry_start;
					return 0;
				}
				if (error < 0)
590
					goto on_error;
591

592 593 594 595
				idx->have_delta = 1;
			} else {
				idx->have_delta = 0;
				hash_header(&idx->hash_ctx, entry_size, type);
596
			}
597

598
			idx->have_stream = 1;
599

600 601 602
			error = git_packfile_stream_open(stream, idx->pack, idx->off);
			if (error < 0)
				goto on_error;
603 604 605
		}

		if (idx->have_delta) {
606
			error = read_object_stream(idx, stream);
607
		} else {
608
			error = hash_object_stream(idx, stream);
609 610
		}

611
		idx->off = stream->curpos;
612
		if (error == GIT_EBUFS)
613
			return 0;
614 615 616 617 618

		/* We want to free the stream reasorces no matter what here */
		idx->have_stream = 0;
		git_packfile_stream_free(stream);

619
		if (error < 0)
620 621
			goto on_error;

622 623 624 625 626 627 628
		if (idx->have_delta) {
			error = store_delta(idx);
		} else {
			error = store_object(idx);
		}

		if (error < 0)
629
			goto on_error;
630

631 632 633
		if (!idx->have_delta) {
			stats->indexed_objects = (unsigned int)++processed;
		}
634
		stats->received_objects++;
635

636
		if ((error = do_progress_callback(idx, stats)) != 0)
637
			goto on_error;
638
	}
639

640
	return 0;
641

642 643
on_error:
	git_mwindow_free_all(mwf);
644
	return error;
645
}
646

647
static int index_path(git_buf *path, git_indexer *idx, const char *suffix)
648 649 650
{
	const char prefix[] = "pack-";
	size_t slash = (size_t)path->size;
651

652 653 654
	/* search backwards for '/' */
	while (slash > 0 && path->ptr[slash - 1] != '/')
		slash--;
655

656 657 658 659 660 661
	if (git_buf_grow(path, slash + 1 + strlen(prefix) +
					 GIT_OID_HEXSZ + strlen(suffix) + 1) < 0)
		return -1;

	git_buf_truncate(path, slash);
	git_buf_puts(path, prefix);
nulltoken committed
662
	git_oid_fmt(path->ptr + git_buf_len(path), &idx->hash);
663 664 665 666 667 668
	path->size += GIT_OID_HEXSZ;
	git_buf_puts(path, suffix);

	return git_buf_oom(path) ? -1 : 0;
}

669 670 671 672
/**
 * Rewind the packfile by the trailer, as we might need to fix the
 * packfile by injecting objects at the tail and must overwrite it.
 */
673
static void seek_back_trailer(git_indexer *idx)
674 675 676 677 678
{
	idx->pack->mwf.size -= GIT_OID_RAWSZ;
	git_mwindow_free_all(&idx->pack->mwf);
}

679
static int inject_object(git_indexer *idx, git_oid *id)
680
{
681 682
	git_odb_object *obj;
	struct entry *entry;
683
	struct git_pack_entry *pentry = NULL;
684 685 686 687 688 689 690 691
	git_oid foo = {{0}};
	unsigned char hdr[64];
	git_buf buf = GIT_BUF_INIT;
	git_off_t entry_start;
	const void *data;
	size_t len, hdr_len;
	int error;

692 693
	seek_back_trailer(idx);
	entry_start = idx->pack->mwf.size;
694

695 696
	if (git_odb_read(&obj, idx->odb, id) < 0) {
		giterr_set(GITERR_INDEXER, "missing delta bases");
697
		return -1;
698
	}
699 700 701 702

	data = git_odb_object_data(obj);
	len = git_odb_object_size(obj);

703 704 705
	entry = git__calloc(1, sizeof(*entry));
	GITERR_CHECK_ALLOC(entry);

706 707 708 709
	entry->crc = crc32(0L, Z_NULL, 0);

	/* Write out the object header */
	hdr_len = git_packfile__object_header(hdr, len, git_odb_object_type(obj));
710 711 712
	if ((error = append_to_pack(idx, hdr, hdr_len)) < 0)
		goto cleanup;

713
	idx->pack->mwf.size += hdr_len;
714
	entry->crc = crc32(entry->crc, hdr, (uInt)hdr_len);
715

716
	if ((error = git_zstream_deflatebuf(&buf, data, len)) < 0)
717 718 719
		goto cleanup;

	/* And then the compressed object */
720 721 722
	if ((error = append_to_pack(idx, buf.ptr, buf.size)) < 0)
		goto cleanup;

723
	idx->pack->mwf.size += buf.size;
Linquize committed
724
	entry->crc = htonl(crc32(entry->crc, (unsigned char *)buf.ptr, (uInt)buf.size));
725 726 727
	git_buf_free(&buf);

	/* Write a fake trailer so the pack functions play ball */
728 729

	if ((error = append_to_pack(idx, &foo, GIT_OID_RAWSZ)) < 0)
730 731 732 733 734 735 736 737 738 739 740
		goto cleanup;

	idx->pack->mwf.size += GIT_OID_RAWSZ;

	pentry = git__calloc(1, sizeof(struct git_pack_entry));
	GITERR_CHECK_ALLOC(pentry);

	git_oid_cpy(&pentry->sha1, id);
	git_oid_cpy(&entry->oid, id);
	idx->off = entry_start + hdr_len + len;

741
	error = save_entry(idx, entry, pentry, entry_start);
742 743

cleanup:
744 745 746 747
	if (error) {
		git__free(entry);
		git__free(pentry);
	}
748

749 750 751 752
	git_odb_object_free(obj);
	return error;
}

753
static int fix_thin_pack(git_indexer *idx, git_transfer_progress *stats)
754
{
755
	int error, found_ref_delta = 0;
756 757
	unsigned int i;
	struct delta_info *delta;
758 759 760
	size_t size;
	git_otype type;
	git_mwindow *w = NULL;
Linquize committed
761
	git_off_t curpos = 0;
762 763 764 765 766
	unsigned char *base_info;
	unsigned int left = 0;
	git_oid base;

	assert(git_vector_length(&idx->deltas) > 0);
767 768 769 770 771

	if (idx->odb == NULL) {
		giterr_set(GITERR_INDEXER, "cannot fix a thin pack without an ODB");
		return -1;
	}
772

773
	/* Loop until we find the first REF delta */
774
	git_vector_foreach(&idx->deltas, i, delta) {
775 776 777
		if (!delta)
			continue;

778
		curpos = delta->delta_off;
779 780 781 782
		error = git_packfile_unpack_header(&size, &type, &idx->pack->mwf, &w, &curpos);
		if (error < 0)
			return error;

783 784 785
		if (type == GIT_OBJ_REF_DELTA) {
			found_ref_delta = 1;
			break;
786
		}
787
	}
788

789 790 791 792
	if (!found_ref_delta) {
		giterr_set(GITERR_INDEXER, "no REF_DELTA found, cannot inject object");
		return -1;
	}
793

794 795 796 797 798 799
	/* curpos now points to the base information, which is an OID */
	base_info = git_mwindow_open(&idx->pack->mwf, &w, curpos, GIT_OID_RAWSZ, &left);
	if (base_info == NULL) {
		giterr_set(GITERR_INDEXER, "failed to map delta information");
		return -1;
	}
800

801 802
	git_oid_fromraw(&base, base_info);
	git_mwindow_close(&w);
803

804 805 806
	if (has_entry(idx, &base))
		return 0;

807 808 809 810
	if (inject_object(idx, &base) < 0)
		return -1;

	stats->local_objects++;
811 812 813 814

	return 0;
}

815
static int resolve_deltas(git_indexer *idx, git_transfer_progress *stats)
816 817 818
{
	unsigned int i;
	struct delta_info *delta;
819
	int progressed = 0, non_null = 0, progress_cb_result;
820 821 822

	while (idx->deltas.length > 0) {
		progressed = 0;
823
		non_null = 0;
824
		git_vector_foreach(&idx->deltas, i, delta) {
825
			git_rawobj obj = {NULL};
826

827 828 829 830
			if (!delta)
				continue;

			non_null = 1;
831 832 833 834 835 836 837 838 839
			idx->off = delta->delta_off;
			if (git_packfile_unpack(&obj, idx->pack, &idx->off) < 0)
				continue;

			if (hash_and_save(idx, &obj, delta->delta_off) < 0)
				continue;

			git__free(obj.data);
			stats->indexed_objects++;
840
			stats->indexed_deltas++;
841
			progressed = 1;
842 843
			if ((progress_cb_result = do_progress_callback(idx, stats)) < 0)
				return progress_cb_result;
844

845 846
			/* remove from the list */
			git_vector_set(NULL, &idx->deltas, i, NULL);
847
			git__free(delta);
848
		}
849

850 851 852 853
		/* if none were actually set, we're done */
		if (!non_null)
			break;

854
		if (!progressed && (fix_thin_pack(idx, stats) < 0)) {
855
			return -1;
856
		}
857 858 859 860 861
	}

	return 0;
}

862
static int update_header_and_rehash(git_indexer *idx, git_transfer_progress *stats)
863 864 865 866 867 868 869 870 871 872
{
	void *ptr;
	size_t chunk = 1024*1024;
	git_off_t hashed = 0;
	git_mwindow *w = NULL;
	git_mwindow_file *mwf;
	unsigned int left;

	mwf = &idx->pack->mwf;

873
	git_hash_init(&idx->trailer);
874

875 876

	/* Update the header to include the numer of local objects we injected */
877
	idx->hdr.hdr_entries = htonl(stats->total_objects + stats->local_objects);
878
	if (write_at(idx, &idx->hdr, 0, sizeof(struct git_pack_header)) < 0)
879
		return -1;
880

881 882 883 884 885 886
	/*
	 * We now use the same technique as before to determine the
	 * hash. We keep reading up to the end and let
	 * hash_partially() keep the existing trailer out of the
	 * calculation.
	 */
887
	git_mwindow_free_all(mwf);
888 889 890 891
	idx->inbuf_len = 0;
	while (hashed < mwf->size) {
		ptr = git_mwindow_open(mwf, &w, hashed, chunk, &left);
		if (ptr == NULL)
892
			return -1;
893

894 895 896 897
		hash_partially(idx, ptr, left);
		hashed += left;

		git_mwindow_close(&w);
898
	}
899

900 901 902
	return 0;
}

903
int git_indexer_commit(git_indexer *idx, git_transfer_progress *stats)
904 905 906
{
	git_mwindow *w = NULL;
	unsigned int i, long_offsets = 0, left;
907
	int error;
908 909 910
	struct git_pack_idx_header hdr;
	git_buf filename = GIT_BUF_INIT;
	struct entry *entry;
911
	git_oid trailer_hash, file_hash;
912
	git_hash_ctx ctx;
913
	git_filebuf index_file = {0};
914
	void *packfile_trailer;
915

916 917 918 919 920
	if (!idx->parsed_header) {
		giterr_set(GITERR_INDEXER, "incomplete pack header");
		return -1;
	}

921 922
	if (git_hash_ctx_init(&ctx) < 0)
		return -1;
923

924
	/* Test for this before resolve_deltas(), as it plays with idx->off */
925 926
	if (idx->off + 20 < idx->pack->mwf.size) {
		giterr_set(GITERR_INDEXER, "unexpected data at the end of the pack");
927 928 929
		return -1;
	}

930 931 932 933 934 935 936 937 938 939 940 941
	packfile_trailer = git_mwindow_open(&idx->pack->mwf, &w, idx->pack->mwf.size - GIT_OID_RAWSZ, GIT_OID_RAWSZ, &left);
	if (packfile_trailer == NULL) {
		git_mwindow_close(&w);
		goto on_error;
	}

	/* Compare the packfile trailer as it was sent to us and what we calculated */
	git_oid_fromraw(&file_hash, packfile_trailer);
	git_mwindow_close(&w);

	git_hash_final(&trailer_hash, &idx->trailer);
	if (git_oid_cmp(&file_hash, &trailer_hash)) {
942
		giterr_set(GITERR_INDEXER, "packfile trailer mismatch");
943 944 945
		return -1;
	}

946 947 948
	/* Freeze the number of deltas */
	stats->total_deltas = stats->total_objects - stats->indexed_objects;

949 950
	if ((error = resolve_deltas(idx, stats)) < 0)
		return error;
951

952
	if (stats->indexed_objects != stats->total_objects) {
953
		giterr_set(GITERR_INDEXER, "early EOF");
954 955 956
		return -1;
	}

957 958 959 960 961
	if (stats->local_objects > 0) {
		if (update_header_and_rehash(idx, stats) < 0)
			return -1;

		git_hash_final(&trailer_hash, &idx->trailer);
962
		write_at(idx, &trailer_hash, idx->pack->mwf.size - GIT_OID_RAWSZ, GIT_OID_RAWSZ);
963 964
	}

965 966 967
	git_vector_sort(&idx->objects);

	git_buf_sets(&filename, idx->pack->pack_name);
968
	git_buf_shorten(&filename, strlen("pack"));
969 970 971 972
	git_buf_puts(&filename, "idx");
	if (git_buf_oom(&filename))
		return -1;

973
	if (git_filebuf_open(&index_file, filename.ptr,
974
		GIT_FILEBUF_HASH_CONTENTS, idx->mode) < 0)
975 976 977 978 979
		goto on_error;

	/* Write out the header */
	hdr.idx_signature = htonl(PACK_IDX_SIGNATURE);
	hdr.idx_version = htonl(2);
980
	git_filebuf_write(&index_file, &hdr, sizeof(hdr));
981 982 983 984

	/* Write out the fanout table */
	for (i = 0; i < 256; ++i) {
		uint32_t n = htonl(idx->fanout[i]);
985
		git_filebuf_write(&index_file, &n, sizeof(n));
986 987
	}

988 989
	/* Write out the object names (SHA-1 hashes) */
	git_vector_foreach(&idx->objects, i, entry) {
990
		git_filebuf_write(&index_file, &entry->oid, sizeof(git_oid));
991
		git_hash_update(&ctx, &entry->oid, GIT_OID_RAWSZ);
992
	}
993
	git_hash_final(&idx->hash, &ctx);
994 995 996

	/* Write out the CRC32 values */
	git_vector_foreach(&idx->objects, i, entry) {
997
		git_filebuf_write(&index_file, &entry->crc, sizeof(uint32_t));
998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008
	}

	/* Write out the offsets */
	git_vector_foreach(&idx->objects, i, entry) {
		uint32_t n;

		if (entry->offset == UINT32_MAX)
			n = htonl(0x80000000 | long_offsets++);
		else
			n = htonl(entry->offset);

1009
		git_filebuf_write(&index_file, &n, sizeof(uint32_t));
1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021
	}

	/* Write out the long offsets */
	git_vector_foreach(&idx->objects, i, entry) {
		uint32_t split[2];

		if (entry->offset != UINT32_MAX)
			continue;

		split[0] = htonl(entry->offset_long >> 32);
		split[1] = htonl(entry->offset_long & 0xffffffff);

1022
		git_filebuf_write(&index_file, &split, sizeof(uint32_t) * 2);
1023 1024
	}

1025 1026
	/* Write out the packfile trailer to the index */
	if (git_filebuf_write(&index_file, &trailer_hash, GIT_OID_RAWSZ) < 0)
1027 1028
		goto on_error;

1029 1030
	/* Write out the hash of the idx */
	if (git_filebuf_hash(&trailer_hash, &index_file) < 0)
1031 1032
		goto on_error;

1033
	git_filebuf_write(&index_file, &trailer_hash, sizeof(git_oid));
1034 1035

	/* Figure out what the final name should be */
1036
	if (index_path(&filename, idx, ".idx") < 0)
1037 1038 1039
		goto on_error;

	/* Commit file */
1040
	if (git_filebuf_commit_at(&index_file, filename.ptr) < 0)
1041 1042 1043
		goto on_error;

	git_mwindow_free_all(&idx->pack->mwf);
1044
	/* We need to close the descriptor here so Windows doesn't choke on commit_at */
1045 1046 1047 1048 1049
	if (p_close(idx->pack->mwf.fd) < 0) {
		giterr_set(GITERR_OS, "failed to close packfile");
		goto on_error;
	}

1050
	idx->pack->mwf.fd = -1;
1051

1052
	if (index_path(&filename, idx, ".pack") < 0)
1053
		goto on_error;
1054

1055
	/* And don't forget to rename the packfile to its new place. */
1056
	p_rename(idx->pack->pack_name, git_buf_cstr(&filename));
1057 1058

	git_buf_free(&filename);
1059
	git_hash_ctx_cleanup(&ctx);
1060 1061 1062
	return 0;

on_error:
1063
	git_mwindow_free_all(&idx->pack->mwf);
1064
	git_filebuf_cleanup(&index_file);
1065
	git_buf_free(&filename);
1066
	git_hash_ctx_cleanup(&ctx);
1067 1068 1069
	return -1;
}

1070
void git_indexer_free(git_indexer *idx)
1071 1072 1073 1074
{
	if (idx == NULL)
		return;

1075
	git_vector_free_deep(&idx->objects);
1076

1077
	if (idx->pack && idx->pack->idx_cache) {
Russell Belfer committed
1078 1079 1080
		struct git_pack_entry *pentry;
		kh_foreach_value(
			idx->pack->idx_cache, pentry, { git__free(pentry); });
1081 1082

		git_oidmap_free(idx->pack->idx_cache);
1083
	}
1084

1085
	git_vector_free_deep(&idx->deltas);
1086 1087 1088 1089 1090 1091

	if (!git_mutex_lock(&git__mwindow_mutex)) {
		git_packfile_free(idx->pack);
		git_mutex_unlock(&git__mwindow_mutex);
	}

1092 1093
	git_hash_ctx_cleanup(&idx->trailer);
	git_hash_ctx_cleanup(&idx->hash_ctx);
1094 1095
	git__free(idx);
}