indexer.c 24.1 KB
Newer Older
1
/*
Edward Thomson committed
2
 * Copyright (C) the libgit2 contributors. All rights reserved.
3
 *
Vicent Marti committed
4 5
 * This file is part of libgit2, distributed under the GNU GPL v2 with
 * a Linking Exception. For full terms see the included COPYING file.
6 7
 */

8 9
#include <zlib.h>

Carlos Martín Nieto committed
10
#include "git2/indexer.h"
11
#include "git2/object.h"
Carlos Martín Nieto committed
12

13 14
#include "common.h"
#include "pack.h"
Carlos Martín Nieto committed
15
#include "mwindow.h"
16
#include "posix.h"
17 18
#include "pack.h"
#include "filebuf.h"
19
#include "oid.h"
20
#include "oidmap.h"
21
#include "compress.h"
22 23

#define UINT31_MAX (0x7FFFFFFF)
24

25
struct entry {
26
	git_oid oid;
27 28 29 30 31
	uint32_t crc;
	uint32_t offset;
	uint64_t offset_long;
};

32 33
struct git_indexer_stream {
	unsigned int parsed_header :1,
34
		opened_pack :1,
35 36
		have_stream :1,
		have_delta :1;
37
	struct git_pack_header hdr;
38 39 40
	struct git_pack_file *pack;
	git_filebuf pack_file;
	git_off_t off;
41 42
	git_off_t entry_start;
	git_packfile_stream stream;
43 44 45 46
	size_t nr_objects;
	git_vector objects;
	git_vector deltas;
	unsigned int fanout[256];
47
	git_hash_ctx hash_ctx;
48
	git_oid hash;
49
	git_transfer_progress_callback progress_cb;
50
	void *progress_payload;
51
	char objbuf[8*1024];
52

53 54 55
	/* Needed to look up objects which we want to inject to fix a thin pack */
	git_odb *odb;

56 57
	/* Fields for calculating the packfile trailer (hash of everything before it) */
	char inbuf[GIT_OID_RAWSZ];
58
	size_t inbuf_len;
59
	git_hash_ctx trailer;
60 61 62
};

struct delta_info {
63
	git_off_t delta_off;
64 65
};

Ben Straub committed
66
const git_oid *git_indexer_stream_hash(const git_indexer_stream *idx)
67 68 69 70
{
	return &idx->hash;
}

71 72 73 74
static int open_pack(struct git_pack_file **out, const char *filename)
{
	struct git_pack_file *pack;

75
	if (git_packfile_alloc(&pack, filename) < 0)
76
		return -1;
77

78
	if ((pack->mwf.fd = p_open(pack->pack_name, O_RDONLY)) < 0) {
79
		giterr_set(GITERR_OS, "Failed to open packfile.");
80 81
		git_packfile_free(pack);
		return -1;
82 83 84 85 86 87 88
	}

	*out = pack;
	return 0;
}

static int parse_header(struct git_pack_header *hdr, struct git_pack_file *pack)
89 90 91 92
{
	int error;

	/* Verify we recognize this pack file format. */
93
	if ((error = p_read(pack->mwf.fd, hdr, sizeof(*hdr))) < 0) {
94 95 96
		giterr_set(GITERR_OS, "Failed to read in pack header");
		return error;
	}
97

98
	if (hdr->hdr_signature != ntohl(PACK_SIGNATURE)) {
99
		giterr_set(GITERR_INDEXER, "Wrong pack signature");
100 101
		return -1;
	}
102

103
	if (!pack_version_ok(hdr->hdr_version)) {
104
		giterr_set(GITERR_INDEXER, "Wrong pack version");
105 106
		return -1;
	}
Carlos Martín Nieto committed
107

108
	return 0;
109 110
}

111
static int objects_cmp(const void *a, const void *b)
112 113 114 115
{
	const struct entry *entrya = a;
	const struct entry *entryb = b;

116
	return git_oid__cmp(&entrya->oid, &entryb->oid);
117 118
}

119 120 121
int git_indexer_stream_new(
		git_indexer_stream **out,
		const char *prefix,
122
		git_odb *odb,
123
		git_transfer_progress_callback progress_cb,
124
		void *progress_payload)
125 126 127
{
	git_indexer_stream *idx;
	git_buf path = GIT_BUF_INIT;
128
	static const char suff[] = "/pack";
129
	int error;
130

131 132
	idx = git__calloc(1, sizeof(git_indexer_stream));
	GITERR_CHECK_ALLOC(idx);
133
	idx->odb = odb;
134 135
	idx->progress_cb = progress_cb;
	idx->progress_payload = progress_payload;
136
	git_hash_ctx_init(&idx->trailer);
137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158

	error = git_buf_joinpath(&path, prefix, suff);
	if (error < 0)
		goto cleanup;

	error = git_filebuf_open(&idx->pack_file, path.ptr,
				 GIT_FILEBUF_TEMPORARY | GIT_FILEBUF_DO_NOT_BUFFER);
	git_buf_free(&path);
	if (error < 0)
		goto cleanup;

	*out = idx;
	return 0;

cleanup:
	git_buf_free(&path);
	git_filebuf_cleanup(&idx->pack_file);
	git__free(idx);
	return -1;
}

/* Try to store the delta so we can try to resolve it later */
159
static int store_delta(git_indexer_stream *idx)
160
{
161 162
	struct delta_info *delta;

163 164
	delta = git__calloc(1, sizeof(struct delta_info));
	GITERR_CHECK_ALLOC(delta);
165
	delta->delta_off = idx->entry_start;
166

167
	if (git_vector_insert(&idx->deltas, delta) < 0)
168 169 170 171 172
		return -1;

	return 0;
}

173 174 175 176 177
static void hash_header(git_hash_ctx *ctx, git_off_t len, git_otype type)
{
	char buffer[64];
	size_t hdrlen;

178
	hdrlen = git_odb__format_object_header(buffer, sizeof(buffer), (size_t)len, type);
179 180 181
	git_hash_update(ctx, buffer, hdrlen);
}

182
static int hash_object_stream(git_indexer_stream *idx, git_packfile_stream *stream)
183 184 185
{
	ssize_t read;

186
	assert(idx && stream);
187 188

	do {
189
		if ((read = git_packfile_stream_read(stream, idx->objbuf, sizeof(idx->objbuf))) < 0)
190 191
			break;

192
		git_hash_update(&idx->hash_ctx, idx->objbuf, read);
193 194 195 196 197 198 199 200
	} while (read > 0);

	if (read < 0)
		return (int)read;

	return 0;
}

201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220
/* In order to create the packfile stream, we need to skip over the delta base description */
static int advance_delta_offset(git_indexer_stream *idx, git_otype type)
{
	git_mwindow *w = NULL;

	assert(type == GIT_OBJ_REF_DELTA || type == GIT_OBJ_OFS_DELTA);

	if (type == GIT_OBJ_REF_DELTA) {
		idx->off += GIT_OID_RAWSZ;
	} else {
		git_off_t base_off = get_delta_base(idx->pack, &w, &idx->off, type, idx->entry_start);
		git_mwindow_close(&w);
		if (base_off < 0)
			return (int)base_off;
	}

	return 0;
}

/* Read from the stream and discard any output */
221
static int read_object_stream(git_indexer_stream *idx, git_packfile_stream *stream)
222 223 224 225 226 227
{
	ssize_t read;

	assert(stream);

	do {
228
		read = git_packfile_stream_read(stream, idx->objbuf, sizeof(idx->objbuf));
229 230 231 232 233 234 235 236
	} while (read > 0);

	if (read < 0)
		return (int)read;

	return 0;
}

237 238 239 240 241 242 243 244 245
static int crc_object(uint32_t *crc_out, git_mwindow_file *mwf, git_off_t start, git_off_t size)
{
	void *ptr;
	uint32_t crc;
	unsigned int left, len;
	git_mwindow *w = NULL;

	crc = crc32(0L, Z_NULL, 0);
	while (size) {
246
		ptr = git_mwindow_open(mwf, &w, start, (size_t)size, &left);
247 248 249
		if (ptr == NULL)
			return -1;

250
		len = min(left, (unsigned int)size);
251 252 253 254 255 256 257 258 259 260
		crc = crc32(crc, ptr, len);
		size -= len;
		start += len;
		git_mwindow_close(&w);
	}

	*crc_out = htonl(crc);
	return 0;
}

261
static int store_object(git_indexer_stream *idx)
262
{
263 264
	int i, error;
	khiter_t k;
265 266 267 268
	git_oid oid;
	struct entry *entry;
	git_off_t entry_size;
	struct git_pack_entry *pentry;
269 270
	git_hash_ctx *ctx = &idx->hash_ctx;
	git_off_t entry_start = idx->entry_start;
271 272 273 274

	entry = git__calloc(1, sizeof(*entry));
	GITERR_CHECK_ALLOC(entry);

Linquize committed
275
	pentry = git__calloc(1, sizeof(struct git_pack_entry));
276 277 278 279 280 281 282 283 284 285 286 287 288
	GITERR_CHECK_ALLOC(pentry);

	git_hash_final(&oid, ctx);
	entry_size = idx->off - entry_start;
	if (entry_start > UINT31_MAX) {
		entry->offset = UINT32_MAX;
		entry->offset_long = entry_start;
	} else {
		entry->offset = (uint32_t)entry_start;
	}

	git_oid_cpy(&pentry->sha1, &oid);
	pentry->offset = entry_start;
289 290 291

	k = kh_put(oid, idx->pack->idx_cache, &pentry->sha1, &error);
	if (!error) {
292
		git__free(pentry);
293
		goto on_error;
294
	}
295

296 297
	kh_value(idx->pack->idx_cache, k) = pentry;

298 299
	git_oid_cpy(&entry->oid, &oid);

300
	if (crc_object(&entry->crc, &idx->pack->mwf, entry_start, entry_size) < 0)
301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318
		goto on_error;

	/* Add the object to the list */
	if (git_vector_insert(&idx->objects, entry) < 0)
		goto on_error;

	for (i = oid.id[0]; i < 256; ++i) {
		idx->fanout[i]++;
	}

	return 0;

on_error:
	git__free(entry);

	return -1;
}

319
static int save_entry(git_indexer_stream *idx, struct entry *entry, struct git_pack_entry *pentry, git_off_t entry_start)
320
{
321 322
	int i, error;
	khiter_t k;
323 324 325 326 327 328 329 330

	if (entry_start > UINT31_MAX) {
		entry->offset = UINT32_MAX;
		entry->offset_long = entry_start;
	} else {
		entry->offset = (uint32_t)entry_start;
	}

331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358
	pentry->offset = entry_start;
	k = kh_put(oid, idx->pack->idx_cache, &pentry->sha1, &error);
	if (!error)
		return -1;

	kh_value(idx->pack->idx_cache, k) = pentry;

	/* Add the object to the list */
	if (git_vector_insert(&idx->objects, entry) < 0)
		return -1;

	for (i = entry->oid.id[0]; i < 256; ++i) {
		idx->fanout[i]++;
	}

	return 0;
}

static int hash_and_save(git_indexer_stream *idx, git_rawobj *obj, git_off_t entry_start)
{
	git_oid oid;
	size_t entry_size;
	struct entry *entry;
	struct git_pack_entry *pentry;

	entry = git__calloc(1, sizeof(*entry));
	GITERR_CHECK_ALLOC(entry);

359
	if (git_odb__hashobj(&oid, obj) < 0) {
360
		giterr_set(GITERR_INDEXER, "Failed to hash object");
361
		goto on_error;
362 363
	}

Linquize committed
364
	pentry = git__calloc(1, sizeof(struct git_pack_entry));
365 366 367 368 369 370 371
	GITERR_CHECK_ALLOC(pentry);

	git_oid_cpy(&pentry->sha1, &oid);
	git_oid_cpy(&entry->oid, &oid);
	entry->crc = crc32(0L, Z_NULL, 0);

	entry_size = (size_t)(idx->off - entry_start);
372
	if (crc_object(&entry->crc, &idx->pack->mwf, entry_start, entry_size) < 0)
373 374
		goto on_error;

375
	return save_entry(idx, entry, pentry, entry_start);
376

377
on_error:
378 379
	git__free(entry);
	git__free(obj->data);
380 381
	return -1;
}
382

383
static int do_progress_callback(git_indexer_stream *idx, git_transfer_progress *stats)
384
{
385 386
	if (!idx->progress_cb) return 0;
	return idx->progress_cb(stats, idx->progress_payload);
387 388
}

389
/* Hash everything but the last 20B of input */
390
static void hash_partially(git_indexer_stream *idx, const uint8_t *data, size_t size)
391
{
392
	size_t to_expell, to_keep;
393 394 395 396 397

	if (size == 0)
		return;

	/* Easy case, dump the buffer and the data minus the last 20 bytes */
398
	if (size >= GIT_OID_RAWSZ) {
399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415
		git_hash_update(&idx->trailer, idx->inbuf, idx->inbuf_len);
		git_hash_update(&idx->trailer, data, size - GIT_OID_RAWSZ);

		data += size - GIT_OID_RAWSZ;
		memcpy(idx->inbuf, data, GIT_OID_RAWSZ);
		idx->inbuf_len = GIT_OID_RAWSZ;
		return;
	}

	/* We can just append */
	if (idx->inbuf_len + size <= GIT_OID_RAWSZ) {
		memcpy(idx->inbuf + idx->inbuf_len, data, size);
		idx->inbuf_len += size;
		return;
	}

	/* We need to partially drain the buffer and then append */
416 417
	to_keep   = GIT_OID_RAWSZ - size;
	to_expell = idx->inbuf_len - to_keep;
418 419 420 421 422 423 424 425

	git_hash_update(&idx->trailer, idx->inbuf, to_expell);

	memmove(idx->inbuf, idx->inbuf + to_expell, to_keep);
	memcpy(idx->inbuf + to_keep, data, size);
	idx->inbuf_len += size - to_expell;
}

426
int git_indexer_stream_add(git_indexer_stream *idx, const void *data, size_t size, git_transfer_progress *stats)
427
{
428
	int error = -1;
429
	size_t processed;
430
	struct git_pack_header *hdr = &idx->hdr;
431
	git_mwindow_file *mwf = &idx->pack->mwf;
432

433 434
	assert(idx && data && stats);

435
	processed = stats->indexed_objects;
436

437 438 439
	if (git_filebuf_write(&idx->pack_file, data, size) < 0)
		return -1;

nulltoken committed
440
	hash_partially(idx, data, (int)size);
441

442 443 444 445 446 447 448 449 450 451 452 453 454
	/* Make sure we set the new size of the pack */
	if (idx->opened_pack) {
		idx->pack->mwf.size += size;
	} else {
		if (open_pack(&idx->pack, idx->pack_file.path_lock) < 0)
			return -1;
		idx->opened_pack = 1;
		mwf = &idx->pack->mwf;
		if (git_mwindow_file_register(&idx->pack->mwf) < 0)
			return -1;
	}

	if (!idx->parsed_header) {
455 456
		unsigned int total_objects;

457
		if ((unsigned)idx->pack->mwf.size < sizeof(struct git_pack_header))
458 459
			return 0;

460
		if (parse_header(&idx->hdr, idx->pack) < 0)
461 462 463
			return -1;

		idx->parsed_header = 1;
464
		idx->nr_objects = ntohl(hdr->hdr_entries);
465 466 467 468
		idx->off = sizeof(struct git_pack_header);

		/* for now, limit to 2^32 objects */
		assert(idx->nr_objects == (size_t)((unsigned int)idx->nr_objects));
469 470 471 472
		if (idx->nr_objects == (size_t)((unsigned int)idx->nr_objects))
			total_objects = (unsigned int)idx->nr_objects;
		else
			total_objects = UINT_MAX;
473

474 475
		idx->pack->idx_cache = git_oidmap_alloc();
		GITERR_CHECK_ALLOC(idx->pack->idx_cache);
476 477

		idx->pack->has_cache = 1;
478
		if (git_vector_init(&idx->objects, total_objects, objects_cmp) < 0)
479 480
			return -1;

481
		if (git_vector_init(&idx->deltas, total_objects / 2, NULL) < 0)
482 483
			return -1;

484
		stats->received_objects = 0;
485
		stats->local_objects = 0;
486 487
		stats->total_deltas = 0;
		stats->indexed_deltas = 0;
488
		processed = stats->indexed_objects = 0;
489
		stats->total_objects = total_objects;
490
		do_progress_callback(idx, stats);
491 492 493 494 495 496 497
	}

	/* Now that we have data in the pack, let's try to parse it */

	/* As the file grows any windows we try to use will be out of date */
	git_mwindow_free_all(mwf);
	while (processed < idx->nr_objects) {
498
		git_packfile_stream *stream = &idx->stream;
499
		git_off_t entry_start = idx->off;
500 501 502
		size_t entry_size;
		git_otype type;
		git_mwindow *w = NULL;
503

504 505 506
		if (idx->pack->mwf.size <= idx->off + 20)
			return 0;

507 508
		if (!idx->have_stream) {
			error = git_packfile_unpack_header(&entry_size, &type, mwf, &w, &idx->off);
509 510
			if (error == GIT_EBUFS) {
				idx->off = entry_start;
511
				return 0;
512
			}
513
			if (error < 0)
514 515 516 517
				return -1;

			git_mwindow_close(&w);
			idx->entry_start = entry_start;
518
			git_hash_ctx_init(&idx->hash_ctx);
519 520

			if (type == GIT_OBJ_REF_DELTA || type == GIT_OBJ_OFS_DELTA) {
521
				error = advance_delta_offset(idx, type);
522 523 524 525 526
				if (error == GIT_EBUFS) {
					idx->off = entry_start;
					return 0;
				}
				if (error < 0)
527
					return -1;
528

529 530 531 532
				idx->have_delta = 1;
			} else {
				idx->have_delta = 0;
				hash_header(&idx->hash_ctx, entry_size, type);
533
			}
534

535 536 537
			idx->have_stream = 1;
			if (git_packfile_stream_open(stream, idx->pack, idx->off) < 0)
				goto on_error;
538 539 540 541

		}

		if (idx->have_delta) {
542
			error = read_object_stream(idx, stream);
543
		} else {
544
			error = hash_object_stream(idx, stream);
545 546
		}

547
		idx->off = stream->curpos;
548
		if (error == GIT_EBUFS)
549
			return 0;
550 551 552 553 554

		/* We want to free the stream reasorces no matter what here */
		idx->have_stream = 0;
		git_packfile_stream_free(stream);

555
		if (error < 0)
556 557
			goto on_error;

558 559 560 561 562 563 564
		if (idx->have_delta) {
			error = store_delta(idx);
		} else {
			error = store_object(idx);
		}

		if (error < 0)
565
			goto on_error;
566

567 568 569
		if (!idx->have_delta) {
			stats->indexed_objects = (unsigned int)++processed;
		}
570
		stats->received_objects++;
571

572
		if (do_progress_callback(idx, stats) != 0) {
573
			giterr_clear();
574 575 576
			error = GIT_EUSER;
			goto on_error;
		}
577
	}
578

579
	return 0;
580

581 582
on_error:
	git_mwindow_free_all(mwf);
583
	return error;
584
}
585

586 587 588 589
static int index_path_stream(git_buf *path, git_indexer_stream *idx, const char *suffix)
{
	const char prefix[] = "pack-";
	size_t slash = (size_t)path->size;
590

591 592 593
	/* search backwards for '/' */
	while (slash > 0 && path->ptr[slash - 1] != '/')
		slash--;
594

595 596 597 598 599 600
	if (git_buf_grow(path, slash + 1 + strlen(prefix) +
					 GIT_OID_HEXSZ + strlen(suffix) + 1) < 0)
		return -1;

	git_buf_truncate(path, slash);
	git_buf_puts(path, prefix);
nulltoken committed
601
	git_oid_fmt(path->ptr + git_buf_len(path), &idx->hash);
602 603 604 605 606 607
	path->size += GIT_OID_HEXSZ;
	git_buf_puts(path, suffix);

	return git_buf_oom(path) ? -1 : 0;
}

608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625
/**
 * Rewind the packfile by the trailer, as we might need to fix the
 * packfile by injecting objects at the tail and must overwrite it.
 */
static git_off_t seek_back_trailer(git_indexer_stream *idx)
{
	git_off_t off;

	if ((off = p_lseek(idx->pack_file.fd, -GIT_OID_RAWSZ, SEEK_CUR)) < 0)
		return -1;

	idx->pack->mwf.size -= GIT_OID_RAWSZ;
	git_mwindow_free_all(&idx->pack->mwf);

	return off;
}

static int inject_object(git_indexer_stream *idx, git_oid *id)
626
{
627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688
	git_odb_object *obj;
	struct entry *entry;
	struct git_pack_entry *pentry;
	git_oid foo = {{0}};
	unsigned char hdr[64];
	git_buf buf = GIT_BUF_INIT;
	git_off_t entry_start;
	const void *data;
	size_t len, hdr_len;
	int error;

	entry = git__calloc(1, sizeof(*entry));
	GITERR_CHECK_ALLOC(entry);

	entry_start = seek_back_trailer(idx);

	if (git_odb_read(&obj, idx->odb, id) < 0)
		return -1;

	data = git_odb_object_data(obj);
	len = git_odb_object_size(obj);

	entry->crc = crc32(0L, Z_NULL, 0);

	/* Write out the object header */
	hdr_len = git_packfile__object_header(hdr, len, git_odb_object_type(obj));
	git_filebuf_write(&idx->pack_file, hdr, hdr_len);
	idx->pack->mwf.size += hdr_len;
	entry->crc = crc32(entry->crc, hdr, hdr_len);

	if ((error = git__compress(&buf, data, len)) < 0)
		goto cleanup;

	/* And then the compressed object */
	git_filebuf_write(&idx->pack_file, buf.ptr, buf.size);
	idx->pack->mwf.size += buf.size;
	entry->crc = htonl(crc32(entry->crc, (unsigned char *)buf.ptr, buf.size));
	git_buf_free(&buf);

	/* Write a fake trailer so the pack functions play ball */
	if ((error = git_filebuf_write(&idx->pack_file, &foo, GIT_OID_RAWSZ)) < 0)
		goto cleanup;

	idx->pack->mwf.size += GIT_OID_RAWSZ;

	pentry = git__calloc(1, sizeof(struct git_pack_entry));
	GITERR_CHECK_ALLOC(pentry);

	git_oid_cpy(&pentry->sha1, id);
	git_oid_cpy(&entry->oid, id);
	idx->off = entry_start + hdr_len + len;

	if ((error = save_entry(idx, entry, pentry, entry_start)) < 0)
		git__free(pentry);

cleanup:
	git_odb_object_free(obj);
	return error;
}

static int fix_thin_pack(git_indexer_stream *idx, git_transfer_progress *stats)
{
689
	int error, found_ref_delta = 0;
690 691
	unsigned int i;
	struct delta_info *delta;
692 693 694 695 696 697 698 699 700
	size_t size;
	git_otype type;
	git_mwindow *w = NULL;
	git_off_t curpos;
	unsigned char *base_info;
	unsigned int left = 0;
	git_oid base;

	assert(git_vector_length(&idx->deltas) > 0);
701 702 703 704 705

	if (idx->odb == NULL) {
		giterr_set(GITERR_INDEXER, "cannot fix a thin pack without an ODB");
		return -1;
	}
706

707
	/* Loop until we find the first REF delta */
708
	git_vector_foreach(&idx->deltas, i, delta) {
709
		curpos = delta->delta_off;
710 711 712 713 714
		error = git_packfile_unpack_header(&size, &type, &idx->pack->mwf, &w, &curpos);
		git_mwindow_close(&w);
		if (error < 0)
			return error;

715 716 717
		if (type == GIT_OBJ_REF_DELTA) {
			found_ref_delta = 1;
			break;
718
		}
719
	}
720

721 722 723 724
	if (!found_ref_delta) {
		giterr_set(GITERR_INDEXER, "no REF_DELTA found, cannot inject object");
		return -1;
	}
725

726 727 728 729 730 731
	/* curpos now points to the base information, which is an OID */
	base_info = git_mwindow_open(&idx->pack->mwf, &w, curpos, GIT_OID_RAWSZ, &left);
	if (base_info == NULL) {
		giterr_set(GITERR_INDEXER, "failed to map delta information");
		return -1;
	}
732

733 734
	git_oid_fromraw(&base, base_info);
	git_mwindow_close(&w);
735

736 737 738 739
	if (inject_object(idx, &base) < 0)
		return -1;

	stats->local_objects++;
740 741 742 743

	return 0;
}

744
static int resolve_deltas(git_indexer_stream *idx, git_transfer_progress *stats)
745 746 747
{
	unsigned int i;
	struct delta_info *delta;
748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763
	int progressed = 0;

	while (idx->deltas.length > 0) {
		progressed = 0;
		git_vector_foreach(&idx->deltas, i, delta) {
			git_rawobj obj;

			idx->off = delta->delta_off;
			if (git_packfile_unpack(&obj, idx->pack, &idx->off) < 0)
				continue;

			if (hash_and_save(idx, &obj, delta->delta_off) < 0)
				continue;

			git__free(obj.data);
			stats->indexed_objects++;
764
			stats->indexed_deltas++;
765 766 767 768 769 770 771 772 773
			progressed = 1;
			do_progress_callback(idx, stats);

			/*
			 * Remove this delta from the list and
			 * decrease i so we don't skip over the next
			 * delta.
			 */
			git_vector_remove(&idx->deltas, i);
774
			git__free(delta);
775 776
			i--;
		}
777

778 779
		if (!progressed && (fix_thin_pack(idx, stats) < 0)) {
			giterr_set(GITERR_INDEXER, "missing delta bases");
780
			return -1;
781
		}
782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803
	}

	return 0;
}

static int update_header_and_rehash(git_indexer_stream *idx, git_transfer_progress *stats)
{
	void *ptr;
	size_t chunk = 1024*1024;
	git_off_t hashed = 0;
	git_mwindow *w = NULL;
	git_mwindow_file *mwf;
	unsigned int left;
	git_hash_ctx *ctx;

	mwf = &idx->pack->mwf;
	ctx = &idx->trailer;

	git_hash_ctx_init(ctx);
	git_mwindow_free_all(mwf);

	/* Update the header to include the numer of local objects we injected */
804
	idx->hdr.hdr_entries = htonl(stats->total_objects + stats->local_objects);
805 806 807 808 809 810 811 812 813
	if (p_lseek(idx->pack_file.fd, 0, SEEK_SET) < 0) {
		giterr_set(GITERR_OS, "failed to seek to the beginning of the pack");
		return -1;
	}

	if (p_write(idx->pack_file.fd, &idx->hdr, sizeof(struct git_pack_header)) < 0) {
		giterr_set(GITERR_OS, "failed to update the pack header");
		return -1;
	}
814

815 816 817 818 819 820 821 822 823 824
	/*
	 * We now use the same technique as before to determine the
	 * hash. We keep reading up to the end and let
	 * hash_partially() keep the existing trailer out of the
	 * calculation.
	 */
	idx->inbuf_len = 0;
	while (hashed < mwf->size) {
		ptr = git_mwindow_open(mwf, &w, hashed, chunk, &left);
		if (ptr == NULL)
825
			return -1;
826

827 828 829 830
		hash_partially(idx, ptr, left);
		hashed += left;

		git_mwindow_close(&w);
831
	}
832

833 834 835
	return 0;
}

836
int git_indexer_stream_finalize(git_indexer_stream *idx, git_transfer_progress *stats)
837 838 839 840 841 842
{
	git_mwindow *w = NULL;
	unsigned int i, long_offsets = 0, left;
	struct git_pack_idx_header hdr;
	git_buf filename = GIT_BUF_INIT;
	struct entry *entry;
843
	git_oid trailer_hash, file_hash;
844
	git_hash_ctx ctx;
845
	git_filebuf index_file = {0};
846
	void *packfile_trailer;
847

848 849
	if (git_hash_ctx_init(&ctx) < 0)
		return -1;
850

851
	/* Test for this before resolve_deltas(), as it plays with idx->off */
852
	if (idx->off < idx->pack->mwf.size - 20) {
853
		giterr_set(GITERR_INDEXER, "unexpected data at the end of the pack");
854 855 856
		return -1;
	}

857 858 859 860 861 862 863 864 865 866 867 868
	packfile_trailer = git_mwindow_open(&idx->pack->mwf, &w, idx->pack->mwf.size - GIT_OID_RAWSZ, GIT_OID_RAWSZ, &left);
	if (packfile_trailer == NULL) {
		git_mwindow_close(&w);
		goto on_error;
	}

	/* Compare the packfile trailer as it was sent to us and what we calculated */
	git_oid_fromraw(&file_hash, packfile_trailer);
	git_mwindow_close(&w);

	git_hash_final(&trailer_hash, &idx->trailer);
	if (git_oid_cmp(&file_hash, &trailer_hash)) {
869
		giterr_set(GITERR_INDEXER, "packfile trailer mismatch");
870 871 872
		return -1;
	}

873 874 875
	/* Freeze the number of deltas */
	stats->total_deltas = stats->total_objects - stats->indexed_objects;

876 877
	if (resolve_deltas(idx, stats) < 0)
		return -1;
878

879
	if (stats->indexed_objects != stats->total_objects) {
880
		giterr_set(GITERR_INDEXER, "early EOF");
881 882 883
		return -1;
	}

884 885 886 887 888 889 890 891 892 893 894 895 896 897
	if (stats->local_objects > 0) {
		if (update_header_and_rehash(idx, stats) < 0)
			return -1;

		git_hash_final(&trailer_hash, &idx->trailer);
		if (p_lseek(idx->pack_file.fd, -GIT_OID_RAWSZ, SEEK_END) < 0)
			return -1;

		if (p_write(idx->pack_file.fd, &trailer_hash, GIT_OID_RAWSZ) < 0) {
			giterr_set(GITERR_OS, "failed to update pack trailer");
			return -1;
		}
	}

898 899 900
	git_vector_sort(&idx->objects);

	git_buf_sets(&filename, idx->pack->pack_name);
901
	git_buf_shorten(&filename, strlen("pack"));
902 903 904 905
	git_buf_puts(&filename, "idx");
	if (git_buf_oom(&filename))
		return -1;

906
	if (git_filebuf_open(&index_file, filename.ptr, GIT_FILEBUF_HASH_CONTENTS) < 0)
907 908 909 910 911
		goto on_error;

	/* Write out the header */
	hdr.idx_signature = htonl(PACK_IDX_SIGNATURE);
	hdr.idx_version = htonl(2);
912
	git_filebuf_write(&index_file, &hdr, sizeof(hdr));
913 914 915 916

	/* Write out the fanout table */
	for (i = 0; i < 256; ++i) {
		uint32_t n = htonl(idx->fanout[i]);
917
		git_filebuf_write(&index_file, &n, sizeof(n));
918 919
	}

920 921
	/* Write out the object names (SHA-1 hashes) */
	git_vector_foreach(&idx->objects, i, entry) {
922
		git_filebuf_write(&index_file, &entry->oid, sizeof(git_oid));
923
		git_hash_update(&ctx, &entry->oid, GIT_OID_RAWSZ);
924
	}
925
	git_hash_final(&idx->hash, &ctx);
926 927 928

	/* Write out the CRC32 values */
	git_vector_foreach(&idx->objects, i, entry) {
929
		git_filebuf_write(&index_file, &entry->crc, sizeof(uint32_t));
930 931 932 933 934 935 936 937 938 939 940
	}

	/* Write out the offsets */
	git_vector_foreach(&idx->objects, i, entry) {
		uint32_t n;

		if (entry->offset == UINT32_MAX)
			n = htonl(0x80000000 | long_offsets++);
		else
			n = htonl(entry->offset);

941
		git_filebuf_write(&index_file, &n, sizeof(uint32_t));
942 943 944 945 946 947 948 949 950 951 952 953
	}

	/* Write out the long offsets */
	git_vector_foreach(&idx->objects, i, entry) {
		uint32_t split[2];

		if (entry->offset != UINT32_MAX)
			continue;

		split[0] = htonl(entry->offset_long >> 32);
		split[1] = htonl(entry->offset_long & 0xffffffff);

954
		git_filebuf_write(&index_file, &split, sizeof(uint32_t) * 2);
955 956
	}

957 958
	/* Write out the packfile trailer to the index */
	if (git_filebuf_write(&index_file, &trailer_hash, GIT_OID_RAWSZ) < 0)
959 960
		goto on_error;

961 962
	/* Write out the hash of the idx */
	if (git_filebuf_hash(&trailer_hash, &index_file) < 0)
963 964
		goto on_error;

965
	git_filebuf_write(&index_file, &trailer_hash, sizeof(git_oid));
966 967 968 969 970 971

	/* Figure out what the final name should be */
	if (index_path_stream(&filename, idx, ".idx") < 0)
		goto on_error;

	/* Commit file */
972
	if (git_filebuf_commit_at(&index_file, filename.ptr, GIT_PACK_FILE_MODE) < 0)
973 974 975
		goto on_error;

	git_mwindow_free_all(&idx->pack->mwf);
976
	/* We need to close the descriptor here so Windows doesn't choke on commit_at */
977
	p_close(idx->pack->mwf.fd);
978
	idx->pack->mwf.fd = -1;
979 980 981 982 983 984 985 986

	if (index_path_stream(&filename, idx, ".pack") < 0)
		goto on_error;
	/* And don't forget to rename the packfile to its new place. */
	if (git_filebuf_commit_at(&idx->pack_file, filename.ptr, GIT_PACK_FILE_MODE) < 0)
		return -1;

	git_buf_free(&filename);
987 988 989
	return 0;

on_error:
990
	git_mwindow_free_all(&idx->pack->mwf);
991
	git_filebuf_cleanup(&index_file);
992
	git_buf_free(&filename);
993
	git_hash_ctx_cleanup(&ctx);
994 995 996
	return -1;
}

997 998
void git_indexer_stream_free(git_indexer_stream *idx)
{
999
	khiter_t k;
1000 1001 1002 1003 1004 1005 1006 1007 1008 1009
	unsigned int i;
	struct entry *e;
	struct delta_info *delta;

	if (idx == NULL)
		return;

	git_vector_foreach(&idx->objects, i, e)
		git__free(e);
	git_vector_free(&idx->objects);
1010

1011
	if (idx->pack) {
1012 1013 1014 1015 1016 1017
		for (k = kh_begin(idx->pack->idx_cache); k != kh_end(idx->pack->idx_cache); k++) {
			if (kh_exist(idx->pack->idx_cache, k))
				git__free(kh_value(idx->pack->idx_cache, k));
		}

		git_oidmap_free(idx->pack->idx_cache);
1018
	}
1019

1020 1021 1022
	git_vector_foreach(&idx->deltas, i, delta)
		git__free(delta);
	git_vector_free(&idx->deltas);
1023
	git_packfile_free(idx->pack);
1024
	git_filebuf_cleanup(&idx->pack_file);
1025 1026
	git__free(idx);
}