indexer.c 21.3 KB
Newer Older
1
/*
schu committed
2
 * Copyright (C) 2009-2012 the libgit2 contributors
3
 *
Vicent Marti committed
4 5
 * This file is part of libgit2, distributed under the GNU GPL v2 with
 * a Linking Exception. For full terms see the included COPYING file.
6 7
 */

8 9
#include <zlib.h>

Carlos Martín Nieto committed
10
#include "git2/indexer.h"
11
#include "git2/object.h"
12
#include "git2/oid.h"
Carlos Martín Nieto committed
13

14 15
#include "common.h"
#include "pack.h"
Carlos Martín Nieto committed
16
#include "mwindow.h"
17
#include "posix.h"
18 19 20 21
#include "pack.h"
#include "filebuf.h"

#define UINT31_MAX (0x7FFFFFFF)
22

23
struct entry {
24
	git_oid oid;
25 26 27 28 29
	uint32_t crc;
	uint32_t offset;
	uint64_t offset_long;
};

30
struct git_indexer {
31
	struct git_pack_file *pack;
32 33 34 35 36
	size_t nr_objects;
	git_vector objects;
	git_filebuf file;
	unsigned int fanout[256];
	git_oid hash;
37
};
Carlos Martín Nieto committed
38

39 40 41 42 43 44 45 46 47 48 49 50
struct git_indexer_stream {
	unsigned int parsed_header :1,
		opened_pack;
	struct git_pack_file *pack;
	git_filebuf pack_file;
	git_filebuf index_file;
	git_off_t off;
	size_t nr_objects;
	git_vector objects;
	git_vector deltas;
	unsigned int fanout[256];
	git_oid hash;
51
	git_transfer_progress_callback progress_cb;
52
	void *progress_payload;
53 54 55
};

struct delta_info {
56
	git_off_t delta_off;
57 58
};

59 60 61 62 63
const git_oid *git_indexer_hash(git_indexer *idx)
{
	return &idx->hash;
}

64 65 66 67 68
const git_oid *git_indexer_stream_hash(git_indexer_stream *idx)
{
	return &idx->hash;
}

69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103
static int open_pack(struct git_pack_file **out, const char *filename)
{
	size_t namelen;
	struct git_pack_file *pack;
	struct stat st;
	int fd;

	namelen = strlen(filename);
	pack = git__calloc(1, sizeof(struct git_pack_file) + namelen + 1);
	GITERR_CHECK_ALLOC(pack);

	memcpy(pack->pack_name, filename, namelen + 1);

	if (p_stat(filename, &st) < 0) {
		giterr_set(GITERR_OS, "Failed to stat packfile.");
		goto cleanup;
	}

	if ((fd = p_open(pack->pack_name, O_RDONLY)) < 0) {
		giterr_set(GITERR_OS, "Failed to open packfile.");
		goto cleanup;
	}

	pack->mwf.fd = fd;
	pack->mwf.size = (git_off_t)st.st_size;

	*out = pack;
	return 0;

cleanup:
	git__free(pack);
	return -1;
}

static int parse_header(struct git_pack_header *hdr, struct git_pack_file *pack)
104 105 106 107
{
	int error;

	/* Verify we recognize this pack file format. */
108
	if ((error = p_read(pack->mwf.fd, hdr, sizeof(*hdr))) < 0) {
109 110 111
		giterr_set(GITERR_OS, "Failed to read in pack header");
		return error;
	}
112

113
	if (hdr->hdr_signature != ntohl(PACK_SIGNATURE)) {
114
		giterr_set(GITERR_INDEXER, "Wrong pack signature");
115 116
		return -1;
	}
117

118
	if (!pack_version_ok(hdr->hdr_version)) {
119
		giterr_set(GITERR_INDEXER, "Wrong pack version");
120 121
		return -1;
	}
Carlos Martín Nieto committed
122

123
	return 0;
124 125
}

126
static int objects_cmp(const void *a, const void *b)
127 128 129 130 131 132 133
{
	const struct entry *entrya = a;
	const struct entry *entryb = b;

	return git_oid_cmp(&entrya->oid, &entryb->oid);
}

134 135 136 137 138 139 140 141
static int cache_cmp(const void *a, const void *b)
{
	const struct git_pack_entry *ea = a;
	const struct git_pack_entry *eb = b;

	return git_oid_cmp(&ea->sha1, &eb->sha1);
}

142 143 144
int git_indexer_stream_new(
		git_indexer_stream **out,
		const char *prefix,
145
		git_transfer_progress_callback progress_cb,
146
		void *progress_payload)
147 148 149
{
	git_indexer_stream *idx;
	git_buf path = GIT_BUF_INIT;
150
	static const char suff[] = "/pack";
151
	int error;
152

153 154
	idx = git__calloc(1, sizeof(git_indexer_stream));
	GITERR_CHECK_ALLOC(idx);
155 156
	idx->progress_cb = progress_cb;
	idx->progress_payload = progress_payload;
157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178

	error = git_buf_joinpath(&path, prefix, suff);
	if (error < 0)
		goto cleanup;

	error = git_filebuf_open(&idx->pack_file, path.ptr,
				 GIT_FILEBUF_TEMPORARY | GIT_FILEBUF_DO_NOT_BUFFER);
	git_buf_free(&path);
	if (error < 0)
		goto cleanup;

	*out = idx;
	return 0;

cleanup:
	git_buf_free(&path);
	git_filebuf_cleanup(&idx->pack_file);
	git__free(idx);
	return -1;
}

/* Try to store the delta so we can try to resolve it later */
179
static int store_delta(git_indexer_stream *idx, git_off_t entry_start, size_t entry_size, git_otype type)
180
{
181 182
	git_mwindow *w = NULL;
	struct delta_info *delta;
183
	git_rawobj obj;
184
	int error;
185

186
	assert(type == GIT_OBJ_REF_DELTA || type == GIT_OBJ_OFS_DELTA);
187 188 189 190

	if (type == GIT_OBJ_REF_DELTA) {
		idx->off += GIT_OID_RAWSZ;
	} else {
191 192 193
		git_off_t base_off;

		base_off = get_delta_base(idx->pack, &w, &idx->off, type, entry_start);
194
		git_mwindow_close(&w);
195 196
		if (base_off < 0)
			return (int)base_off;
197
	}
198

199
	error = packfile_unpack_compressed(&obj, idx->pack, &w, &idx->off, entry_size, type);
200
	if (error == GIT_EBUFS) {
201
		idx->off = entry_start;
202
		return GIT_EBUFS;
203
	} else if (error < 0){
204
		return -1;
205 206
	}

207 208 209 210 211 212
	delta = git__calloc(1, sizeof(struct delta_info));
	GITERR_CHECK_ALLOC(delta);
	delta->delta_off = entry_start;

	git__free(obj.data);

213
	if (git_vector_insert(&idx->deltas, delta) < 0)
214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242
		return -1;

	return 0;
}

static int hash_and_save(git_indexer_stream *idx, git_rawobj *obj, git_off_t entry_start)
{
	int i;
	git_oid oid;
	void *packed;
	size_t entry_size;
	unsigned int left;
	struct entry *entry;
	git_mwindow *w = NULL;
	git_mwindow_file *mwf = &idx->pack->mwf;
	struct git_pack_entry *pentry;

	entry = git__calloc(1, sizeof(*entry));
	GITERR_CHECK_ALLOC(entry);

	if (entry_start > UINT31_MAX) {
		entry->offset = UINT32_MAX;
		entry->offset_long = entry_start;
	} else {
		entry->offset = (uint32_t)entry_start;
	}

	/* FIXME: Parse the object instead of hashing it */
	if (git_odb__hashobj(&oid, obj) < 0) {
243
		giterr_set(GITERR_INDEXER, "Failed to hash object");
244 245 246 247 248 249 250 251 252
		return -1;
	}

	pentry = git__malloc(sizeof(struct git_pack_entry));
	GITERR_CHECK_ALLOC(pentry);

	git_oid_cpy(&pentry->sha1, &oid);
	pentry->offset = entry_start;
	if (git_vector_insert(&idx->pack->cache, pentry) < 0)
253 254
		goto on_error;

255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273
	git_oid_cpy(&entry->oid, &oid);
	entry->crc = crc32(0L, Z_NULL, 0);

	entry_size = (size_t)(idx->off - entry_start);
	packed = git_mwindow_open(mwf, &w, entry_start, entry_size, &left);
	if (packed == NULL)
		goto on_error;

	entry->crc = htonl(crc32(entry->crc, packed, (uInt)entry_size));
	git_mwindow_close(&w);

	/* Add the object to the list */
	if (git_vector_insert(&idx->objects, entry) < 0)
		goto on_error;

	for (i = oid.id[0]; i < 256; ++i) {
		idx->fanout[i]++;
	}

274
	return 0;
275

276
on_error:
277 278 279
	git__free(entry);
	git__free(pentry);
	git__free(obj->data);
280 281
	return -1;
}
282

283
static void do_progress_callback(git_indexer_stream *idx, git_transfer_progress *stats)
284 285 286 287 288
{
	if (!idx->progress_cb) return;
	idx->progress_cb(stats, idx->progress_payload);
}

289
int git_indexer_stream_add(git_indexer_stream *idx, const void *data, size_t size, git_transfer_progress *stats)
290 291 292
{
	int error;
	struct git_pack_header hdr;
293
	size_t processed; 
294
	git_mwindow_file *mwf = &idx->pack->mwf;
295

296 297
	assert(idx && data && stats);

298
	processed = stats->indexed_objects;
299

300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339
	if (git_filebuf_write(&idx->pack_file, data, size) < 0)
		return -1;

	/* Make sure we set the new size of the pack */
	if (idx->opened_pack) {
		idx->pack->mwf.size += size;
		//printf("\nadding %zu for %zu\n", size, idx->pack->mwf.size);
	} else {
		if (open_pack(&idx->pack, idx->pack_file.path_lock) < 0)
			return -1;
		idx->opened_pack = 1;
		mwf = &idx->pack->mwf;
		if (git_mwindow_file_register(&idx->pack->mwf) < 0)
			return -1;
	}

	if (!idx->parsed_header) {
		if ((unsigned)idx->pack->mwf.size < sizeof(hdr))
			return 0;

		if (parse_header(&hdr, idx->pack) < 0)
			return -1;

		idx->parsed_header = 1;
		idx->nr_objects = ntohl(hdr.hdr_entries);
		idx->off = sizeof(struct git_pack_header);

		/* for now, limit to 2^32 objects */
		assert(idx->nr_objects == (size_t)((unsigned int)idx->nr_objects));

		if (git_vector_init(&idx->pack->cache, (unsigned int)idx->nr_objects, cache_cmp) < 0)
			return -1;

		idx->pack->has_cache = 1;
		if (git_vector_init(&idx->objects, (unsigned int)idx->nr_objects, objects_cmp) < 0)
			return -1;

		if (git_vector_init(&idx->deltas, (unsigned int)(idx->nr_objects / 2), NULL) < 0)
			return -1;

340 341
		stats->received_objects = 0;
		stats->indexed_objects = 0;
342
		stats->total_objects = (unsigned int)idx->nr_objects;
343
		do_progress_callback(idx, stats);
344 345 346 347 348 349 350 351 352
	}

	/* Now that we have data in the pack, let's try to parse it */

	/* As the file grows any windows we try to use will be out of date */
	git_mwindow_free_all(mwf);
	while (processed < idx->nr_objects) {
		git_rawobj obj;
		git_off_t entry_start = idx->off;
353 354 355
		size_t entry_size;
		git_otype type;
		git_mwindow *w = NULL;
356

357 358 359
		if (idx->pack->mwf.size <= idx->off + 20)
			return 0;

360
		error = git_packfile_unpack_header(&entry_size, &type, mwf, &w, &idx->off);
361
		if (error == GIT_EBUFS) {
362 363 364
			idx->off = entry_start;
			return 0;
		}
365 366
		if (error < 0)
			return -1;
367

368
		git_mwindow_close(&w);
369

370 371 372 373
		if (type == GIT_OBJ_REF_DELTA || type == GIT_OBJ_OFS_DELTA) {
			error = store_delta(idx, entry_start, entry_size, type);
			if (error == GIT_EBUFS) {
				idx->off = entry_start;
374
				return 0;
375
			}
376 377
			if (error < 0)
				return error;
378

379
			stats->received_objects++;
380
			do_progress_callback(idx, stats);
381 382 383
			continue;
		}

384 385 386 387 388 389 390 391 392
		idx->off = entry_start;
		error = git_packfile_unpack(&obj, idx->pack, &idx->off);
		if (error == GIT_EBUFS) {
			idx->off = entry_start;
			return 0;
		}
		if (error < 0)
			return -1;

393
		if (hash_and_save(idx, &obj, entry_start) < 0)
394 395
			goto on_error;

396
		git__free(obj.data);
397

398 399
		stats->indexed_objects = (unsigned int)++processed;
		stats->received_objects++;
400
		do_progress_callback(idx, stats);
401
	}
402

403
	return 0;
404

405 406 407 408
on_error:
	git_mwindow_free_all(mwf);
	return -1;
}
409

410 411 412 413
static int index_path_stream(git_buf *path, git_indexer_stream *idx, const char *suffix)
{
	const char prefix[] = "pack-";
	size_t slash = (size_t)path->size;
414

415 416 417
	/* search backwards for '/' */
	while (slash > 0 && path->ptr[slash - 1] != '/')
		slash--;
418

419 420 421 422 423 424
	if (git_buf_grow(path, slash + 1 + strlen(prefix) +
					 GIT_OID_HEXSZ + strlen(suffix) + 1) < 0)
		return -1;

	git_buf_truncate(path, slash);
	git_buf_puts(path, prefix);
nulltoken committed
425
	git_oid_fmt(path->ptr + git_buf_len(path), &idx->hash);
426 427 428 429 430 431
	path->size += GIT_OID_HEXSZ;
	git_buf_puts(path, suffix);

	return git_buf_oom(path) ? -1 : 0;
}

432
static int resolve_deltas(git_indexer_stream *idx, git_transfer_progress *stats)
433 434 435 436 437 438 439 440 441 442 443 444 445
{
	unsigned int i;
	struct delta_info *delta;

	git_vector_foreach(&idx->deltas, i, delta) {
		git_rawobj obj;

		idx->off = delta->delta_off;
		if (git_packfile_unpack(&obj, idx->pack, &idx->off) < 0)
			return -1;

		if (hash_and_save(idx, &obj, delta->delta_off) < 0)
			return -1;
446 447

		git__free(obj.data);
448
		stats->indexed_objects++;
449
		do_progress_callback(idx, stats);
450
	}
451

452 453 454
	return 0;
}

455
int git_indexer_stream_finalize(git_indexer_stream *idx, git_transfer_progress *stats)
456 457 458 459 460 461 462 463
{
	git_mwindow *w = NULL;
	unsigned int i, long_offsets = 0, left;
	struct git_pack_idx_header hdr;
	git_buf filename = GIT_BUF_INIT;
	struct entry *entry;
	void *packfile_hash;
	git_oid file_hash;
464
	git_hash_ctx ctx;
465

466 467
	if (git_hash_ctx_init(&ctx) < 0)
		return -1;
468

469 470 471 472 473 474
	/* Test for this before resolve_deltas(), as it plays with idx->off */
	if (idx->off < idx->pack->mwf.size - GIT_OID_RAWSZ) {
		giterr_set(GITERR_INDEXER, "Indexing error: junk at the end of the pack");
		return -1;
	}

475 476 477 478
	if (idx->deltas.length > 0)
		if (resolve_deltas(idx, stats) < 0)
			return -1;

479
	if (stats->indexed_objects != stats->total_objects) {
480 481 482 483
		giterr_set(GITERR_INDEXER, "Indexing error: early EOF");
		return -1;
	}

484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503
	git_vector_sort(&idx->objects);

	git_buf_sets(&filename, idx->pack->pack_name);
	git_buf_truncate(&filename, filename.size - strlen("pack"));
	git_buf_puts(&filename, "idx");
	if (git_buf_oom(&filename))
		return -1;

	if (git_filebuf_open(&idx->index_file, filename.ptr, GIT_FILEBUF_HASH_CONTENTS) < 0)
		goto on_error;

	/* Write out the header */
	hdr.idx_signature = htonl(PACK_IDX_SIGNATURE);
	hdr.idx_version = htonl(2);
	git_filebuf_write(&idx->index_file, &hdr, sizeof(hdr));

	/* Write out the fanout table */
	for (i = 0; i < 256; ++i) {
		uint32_t n = htonl(idx->fanout[i]);
		git_filebuf_write(&idx->index_file, &n, sizeof(n));
504 505
	}

506 507 508
	/* Write out the object names (SHA-1 hashes) */
	git_vector_foreach(&idx->objects, i, entry) {
		git_filebuf_write(&idx->index_file, &entry->oid, sizeof(git_oid));
509
		git_hash_update(&ctx, &entry->oid, GIT_OID_RAWSZ);
510
	}
511
	git_hash_final(&idx->hash, &ctx);
512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569

	/* Write out the CRC32 values */
	git_vector_foreach(&idx->objects, i, entry) {
		git_filebuf_write(&idx->index_file, &entry->crc, sizeof(uint32_t));
	}

	/* Write out the offsets */
	git_vector_foreach(&idx->objects, i, entry) {
		uint32_t n;

		if (entry->offset == UINT32_MAX)
			n = htonl(0x80000000 | long_offsets++);
		else
			n = htonl(entry->offset);

		git_filebuf_write(&idx->index_file, &n, sizeof(uint32_t));
	}

	/* Write out the long offsets */
	git_vector_foreach(&idx->objects, i, entry) {
		uint32_t split[2];

		if (entry->offset != UINT32_MAX)
			continue;

		split[0] = htonl(entry->offset_long >> 32);
		split[1] = htonl(entry->offset_long & 0xffffffff);

		git_filebuf_write(&idx->index_file, &split, sizeof(uint32_t) * 2);
	}

	/* Write out the packfile trailer */
	packfile_hash = git_mwindow_open(&idx->pack->mwf, &w, idx->pack->mwf.size - GIT_OID_RAWSZ, GIT_OID_RAWSZ, &left);
	if (packfile_hash == NULL) {
		git_mwindow_close(&w);
		goto on_error;
	}

	memcpy(&file_hash, packfile_hash, GIT_OID_RAWSZ);
	git_mwindow_close(&w);

	git_filebuf_write(&idx->index_file, &file_hash, sizeof(git_oid));

	/* Write out the packfile trailer to the idx file as well */
	if (git_filebuf_hash(&file_hash, &idx->index_file) < 0)
		goto on_error;

	git_filebuf_write(&idx->index_file, &file_hash, sizeof(git_oid));

	/* Figure out what the final name should be */
	if (index_path_stream(&filename, idx, ".idx") < 0)
		goto on_error;

	/* Commit file */
	if (git_filebuf_commit_at(&idx->index_file, filename.ptr, GIT_PACK_FILE_MODE) < 0)
		goto on_error;

	git_mwindow_free_all(&idx->pack->mwf);
570
	p_close(idx->pack->mwf.fd);
571 572 573 574 575 576 577 578

	if (index_path_stream(&filename, idx, ".pack") < 0)
		goto on_error;
	/* And don't forget to rename the packfile to its new place. */
	if (git_filebuf_commit_at(&idx->pack_file, filename.ptr, GIT_PACK_FILE_MODE) < 0)
		return -1;

	git_buf_free(&filename);
579 580 581
	return 0;

on_error:
582
	git_mwindow_free_all(&idx->pack->mwf);
583
	p_close(idx->pack->mwf.fd);
584 585
	git_filebuf_cleanup(&idx->index_file);
	git_buf_free(&filename);
586
	git_hash_ctx_cleanup(&ctx);
587 588 589
	return -1;
}

590 591 592 593 594 595 596 597 598 599 600 601 602
void git_indexer_stream_free(git_indexer_stream *idx)
{
	unsigned int i;
	struct entry *e;
	struct git_pack_entry *pe;
	struct delta_info *delta;

	if (idx == NULL)
		return;

	git_vector_foreach(&idx->objects, i, e)
		git__free(e);
	git_vector_free(&idx->objects);
603 604 605 606 607
	if (idx->pack) {
		git_vector_foreach(&idx->pack->cache, i, pe)
			git__free(pe);
		git_vector_free(&idx->pack->cache);
	}
608 609 610 611 612 613 614
	git_vector_foreach(&idx->deltas, i, delta)
		git__free(delta);
	git_vector_free(&idx->deltas);
	git__free(idx->pack);
	git__free(idx);
}

615 616 617 618 619 620 621 622 623 624
int git_indexer_new(git_indexer **out, const char *packname)
{
	git_indexer *idx;
	struct git_pack_header hdr;
	int error;

	assert(out && packname);

	idx = git__calloc(1, sizeof(git_indexer));
	GITERR_CHECK_ALLOC(idx);
625

626 627 628
	open_pack(&idx->pack, packname);

	if ((error = parse_header(&hdr, idx->pack)) < 0)
629 630
		goto cleanup;

631
	idx->nr_objects = ntohl(hdr.hdr_entries);
632

633 634 635 636
	/* for now, limit to 2^32 objects */
	assert(idx->nr_objects == (size_t)((unsigned int)idx->nr_objects));

	error = git_vector_init(&idx->pack->cache, (unsigned int)idx->nr_objects, cache_cmp);
637
	if (error < 0)
638 639 640
		goto cleanup;

	idx->pack->has_cache = 1;
641
	error = git_vector_init(&idx->objects, (unsigned int)idx->nr_objects, objects_cmp);
642
	if (error < 0)
643 644
		goto cleanup;

645 646
	*out = idx;

647
	return 0;
648 649

cleanup:
650
	git_indexer_free(idx);
651

652
	return -1;
653 654
}

655
static int index_path(git_buf *path, git_indexer *idx)
Carlos Martín Nieto committed
656
{
657
	const char prefix[] = "pack-", suffix[] = ".idx";
658
	size_t slash = (size_t)path->size;
659

660 661 662
	/* search backwards for '/' */
	while (slash > 0 && path->ptr[slash - 1] != '/')
		slash--;
663

664
	if (git_buf_grow(path, slash + 1 + strlen(prefix) +
665 666
					 GIT_OID_HEXSZ + strlen(suffix) + 1) < 0)
		return -1;
667

668
	git_buf_truncate(path, slash);
669
	git_buf_puts(path, prefix);
nulltoken committed
670
	git_oid_fmt(path->ptr + git_buf_len(path), &idx->hash);
671 672 673
	path->size += GIT_OID_HEXSZ;
	git_buf_puts(path, suffix);

674
	return git_buf_oom(path) ? -1 : 0;
675 676
}

677
int git_indexer_write(git_indexer *idx)
678 679
{
	git_mwindow *w = NULL;
680
	int error;
681
	unsigned int i, long_offsets = 0, left;
682
	struct git_pack_idx_header hdr;
683
	git_buf filename = GIT_BUF_INIT;
684 685 686
	struct entry *entry;
	void *packfile_hash;
	git_oid file_hash;
687
	git_hash_ctx ctx;
688

689 690
	if (git_hash_ctx_init(&ctx) < 0)
		return -1;
691 692 693

	git_vector_sort(&idx->objects);

694 695 696
	git_buf_sets(&filename, idx->pack->pack_name);
	git_buf_truncate(&filename, filename.size - strlen("pack"));
	git_buf_puts(&filename, "idx");
697
	if (git_buf_oom(&filename))
698
		return -1;
699 700

	error = git_filebuf_open(&idx->file, filename.ptr, GIT_FILEBUF_HASH_CONTENTS);
701
	if (error < 0)
702
		goto cleanup;
703 704 705 706 707

	/* Write out the header */
	hdr.idx_signature = htonl(PACK_IDX_SIGNATURE);
	hdr.idx_version = htonl(2);
	error = git_filebuf_write(&idx->file, &hdr, sizeof(hdr));
708
	if (error < 0)
709
		goto cleanup;
710 711 712 713 714

	/* Write out the fanout table */
	for (i = 0; i < 256; ++i) {
		uint32_t n = htonl(idx->fanout[i]);
		error = git_filebuf_write(&idx->file, &n, sizeof(n));
715
		if (error < 0)
716 717 718 719 720
			goto cleanup;
	}

	/* Write out the object names (SHA-1 hashes) */
	git_vector_foreach(&idx->objects, i, entry) {
721
		if ((error = git_filebuf_write(&idx->file, &entry->oid, sizeof(git_oid))) < 0 ||
722
			(error = git_hash_update(&ctx, &entry->oid, GIT_OID_RAWSZ)) < 0)
723 724
			goto cleanup;
	}
725

726
	if ((error = git_hash_final(&idx->hash, &ctx)) < 0)
727
		goto cleanup;
728 729 730 731

	/* Write out the CRC32 values */
	git_vector_foreach(&idx->objects, i, entry) {
		error = git_filebuf_write(&idx->file, &entry->crc, sizeof(uint32_t));
732
		if (error < 0)
733 734 735 736 737 738 739 740 741 742 743 744 745
			goto cleanup;
	}

	/* Write out the offsets */
	git_vector_foreach(&idx->objects, i, entry) {
		uint32_t n;

		if (entry->offset == UINT32_MAX)
			n = htonl(0x80000000 | long_offsets++);
		else
			n = htonl(entry->offset);

		error = git_filebuf_write(&idx->file, &n, sizeof(uint32_t));
746
		if (error < 0)
747 748 749 750 751 752 753 754 755 756 757 758 759 760
			goto cleanup;
	}

	/* Write out the long offsets */
	git_vector_foreach(&idx->objects, i, entry) {
		uint32_t split[2];

		if (entry->offset != UINT32_MAX)
			continue;

		split[0] = htonl(entry->offset_long >> 32);
		split[1] = htonl(entry->offset_long & 0xffffffff);

		error = git_filebuf_write(&idx->file, &split, sizeof(uint32_t) * 2);
761
		if (error < 0)
762 763 764 765 766
			goto cleanup;
	}

	/* Write out the packfile trailer */

767
	packfile_hash = git_mwindow_open(&idx->pack->mwf, &w, idx->pack->mwf.size - GIT_OID_RAWSZ, GIT_OID_RAWSZ, &left);
768
	git_mwindow_close(&w);
769
	if (packfile_hash == NULL) {
770
		error = -1;
771 772 773 774 775 776 777 778
		goto cleanup;
	}

	memcpy(&file_hash, packfile_hash, GIT_OID_RAWSZ);

	git_mwindow_close(&w);

	error = git_filebuf_write(&idx->file, &file_hash, sizeof(git_oid));
779 780
	if (error < 0)
		goto cleanup;
781 782 783

	/* Write out the index sha */
	error = git_filebuf_hash(&file_hash, &idx->file);
784
	if (error < 0)
785 786 787
		goto cleanup;

	error = git_filebuf_write(&idx->file, &file_hash, sizeof(git_oid));
788
	if (error < 0)
789 790 791
		goto cleanup;

	/* Figure out what the final name should be */
792
	error = index_path(&filename, idx);
793
	if (error < 0)
794 795
		goto cleanup;

796
	/* Commit file */
797
	error = git_filebuf_commit_at(&idx->file, filename.ptr, GIT_PACK_FILE_MODE);
798 799

cleanup:
800
	git_mwindow_free_all(&idx->pack->mwf);
801
	git_mwindow_file_deregister(&idx->pack->mwf);
802
	if (error < 0)
803
		git_filebuf_cleanup(&idx->file);
804
	git_buf_free(&filename);
805
	git_hash_ctx_cleanup(&ctx);
806 807 808 809

	return error;
}

810
int git_indexer_run(git_indexer *idx, git_transfer_progress *stats)
811 812
{
	git_mwindow_file *mwf;
813
	git_off_t off = sizeof(struct git_pack_header);
Carlos Martín Nieto committed
814
	int error;
815 816
	struct entry *entry;
	unsigned int left, processed;
Carlos Martín Nieto committed
817

818
	assert(idx && stats);
819

820
	mwf = &idx->pack->mwf;
Carlos Martín Nieto committed
821
	error = git_mwindow_file_register(mwf);
822 823
	if (error < 0)
		return error;
Carlos Martín Nieto committed
824

825 826
	stats->total_objects = (unsigned int)idx->nr_objects;
	stats->indexed_objects = processed = 0;
Carlos Martín Nieto committed
827

828
	while (processed < idx->nr_objects) {
829 830
		git_rawobj obj;
		git_oid oid;
831
		struct git_pack_entry *pentry;
832
		git_mwindow *w = NULL;
833
		int i;
834
		git_off_t entry_start = off;
835 836
		void *packed;
		size_t entry_size;
837
		char fmt[GIT_OID_HEXSZ] = {0};
838

839 840
		entry = git__calloc(1, sizeof(*entry));
		GITERR_CHECK_ALLOC(entry);
841 842

		if (off > UINT31_MAX) {
843 844
			entry->offset = UINT32_MAX;
			entry->offset_long = off;
845
		} else {
846
			entry->offset = (uint32_t)off;
847 848 849
		}

		error = git_packfile_unpack(&obj, idx->pack, &off);
850
		if (error < 0)
851 852
			goto cleanup;

853
		/* FIXME: Parse the object instead of hashing it */
854
		error = git_odb__hashobj(&oid, &obj);
855
		if (error < 0) {
856
			giterr_set(GITERR_INDEXER, "Failed to hash object");
857 858 859
			goto cleanup;
		}

860 861
		pentry = git__malloc(sizeof(struct git_pack_entry));
		if (pentry == NULL) {
862
			error = -1;
863 864
			goto cleanup;
		}
865

866 867
		git_oid_cpy(&pentry->sha1, &oid);
		pentry->offset = entry_start;
868
		git_oid_fmt(fmt, &oid);
869
		error = git_vector_insert(&idx->pack->cache, pentry);
870
		if (error < 0)
871 872
			goto cleanup;

873 874 875
		git_oid_cpy(&entry->oid, &oid);
		entry->crc = crc32(0L, Z_NULL, 0);

876
		entry_size = (size_t)(off - entry_start);
877 878
		packed = git_mwindow_open(mwf, &w, entry_start, entry_size, &left);
		if (packed == NULL) {
879
			error = -1;
880 881
			goto cleanup;
		}
882
		entry->crc = htonl(crc32(entry->crc, packed, (uInt)entry_size));
883
		git_mwindow_close(&w);
884

885
		/* Add the object to the list */
886
		error = git_vector_insert(&idx->objects, entry);
887
		if (error < 0)
888
			goto cleanup;
889 890

		for (i = oid.id[0]; i < 256; ++i) {
891
			idx->fanout[i]++;
892 893
		}

894
		git__free(obj.data);
895

896
		stats->indexed_objects = ++processed;
897 898 899 900 901
	}

cleanup:
	git_mwindow_free_all(mwf);

Carlos Martín Nieto committed
902
	return error;
903

Carlos Martín Nieto committed
904 905
}

906
void git_indexer_free(git_indexer *idx)
907
{
908 909
	unsigned int i;
	struct entry *e;
910
	struct git_pack_entry *pe;
911

912 913 914
	if (idx == NULL)
		return;

915
	p_close(idx->pack->mwf.fd);
916
	git_mwindow_file_deregister(&idx->pack->mwf);
917
	git_vector_foreach(&idx->objects, i, e)
918
		git__free(e);
919
	git_vector_free(&idx->objects);
920
	git_vector_foreach(&idx->pack->cache, i, pe)
921
		git__free(pe);
922
	git_vector_free(&idx->pack->cache);
923 924
	git__free(idx->pack);
	git__free(idx);
925
}
926