odb_pack.c 16.6 KB
Newer Older
1
/*
Edward Thomson committed
2
 * Copyright (C) the libgit2 contributors. All rights reserved.
3
 *
Vicent Marti committed
4 5
 * This file is part of libgit2, distributed under the GNU GPL v2 with
 * a Linking Exception. For full terms see the included COPYING file.
6 7 8
 */

#include "common.h"
9
#include <zlib.h>
10
#include "git2/repository.h"
11
#include "git2/oid.h"
12 13 14 15
#include "fileops.h"
#include "hash.h"
#include "odb.h"
#include "delta-apply.h"
16
#include "sha1_lookup.h"
17
#include "mwindow.h"
18
#include "pack.h"
19

20
#include "git2/odb_backend.h"
21

Vicent Marti committed
22 23 24
struct pack_backend {
	git_odb_backend parent;
	git_vector packs;
25
	struct git_pack_file *last_found;
26
	char *pack_folder;
Vicent Marti committed
27 28
};

29 30 31 32 33
struct pack_writepack {
	struct git_odb_writepack parent;
	git_indexer_stream *indexer_stream;
};

Vicent Marti committed
34 35 36
/**
 * The wonderful tale of a Packed Object lookup query
 * ===================================================
Vicent Marti committed
37 38 39
 *	A riveting and epic story of epicness and ASCII
 *			art, presented by yours truly,
 *				Sir Vicent of Marti
Vicent Marti committed
40 41 42 43 44 45 46 47 48 49
 *
 *
 *	Chapter 1: Once upon a time...
 *	Initialization of the Pack Backend
 *	--------------------------------------------------
 *
 *	# git_odb_backend_pack
 *	| Creates the pack backend structure, initializes the
 *	| callback pointers to our default read() and exist() methods,
 *	| and tries to preload all the known packfiles in the ODB.
Vicent Marti committed
50
 * |
Vicent Marti committed
51
 *	|-# packfile_load_all
Vicent Marti committed
52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
 *	 | Tries to find the `pack` folder, if it exists. ODBs without
 *	 | a pack folder are ignored altogether. If there's a `pack` folder
 *	 | we run a `dirent` callback through every file in the pack folder
 *	 | to find our packfiles. The packfiles are then sorted according
 *	 | to a sorting callback.
 * 	 |
 *	 |-# packfile_load__cb
 *	 | | This callback is called from `dirent` with every single file
 *	 | | inside the pack folder. We find the packs by actually locating
 *	 | | their index (ends in ".idx"). From that index, we verify that
 *	 | | the corresponding packfile exists and is valid, and if so, we
 *	| | add it to the pack list.
 *	 | |
 *	 | |-# packfile_check
 *	 |		Make sure that there's a packfile to back this index, and store
 *	 |		some very basic information regarding the packfile itself,
 *	 |		such as the full path, the size, and the modification time.
 *	 |		We don't actually open the packfile to check for internal consistency.
 *	|
 *	|-# packfile_sort__cb
 *		Sort all the preloaded packs according to some specific criteria:
 *		we prioritize the "newer" packs because it's more likely they
 *		contain the objects we are looking for, and we prioritize local
 *		packs over remote ones.
Vicent Marti committed
76 77 78 79 80 81 82
 *
 *
 *
 *	Chapter 2: To be, or not to be...
 *	A standard packed `exist` query for an OID
 *	--------------------------------------------------
 *
Vicent Marti committed
83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
 * # pack_backend__exists
 * | Check if the given SHA1 oid exists in any of the packs
 * | that have been loaded for our ODB.
 * |
 * |-# pack_entry_find
 *	| Iterate through all the packs that have been preloaded
 *	| (starting by the pack where the latest object was found)
 *	| to try to find the OID in one of them.
 *	|
 *	|-# pack_entry_find1
 *		| Check the index of an individual pack to see if the SHA1
 *		| OID can be found. If we can find the offset to that SHA1
 *		| inside of the index, that means the object is contained
 *		| inside of the packfile and we can stop searching.
 *		| Before returning, we verify that the packfile behing the
 *		| index we are searching still exists on disk.
 *		|
 *		|-# pack_entry_find_offset
 *		| | Mmap the actual index file to disk if it hasn't been opened
 *		| | yet, and run a binary search through it to find the OID.
 *		| | See <http://book.git-scm.com/7_the_packfile.html> for specifics
 *		| | on the Packfile Index format and how do we find entries in it.
 *		| |
 *		| |-# pack_index_open
 *		|	| Guess the name of the index based on the full path to the
 *		|	| packfile, open it and verify its contents. Only if the index
 *		|	| has not been opened already.
 *		|	|
 *		|	|-# pack_index_check
 *		|		Mmap the index file and do a quick run through the header
 *		|		to guess the index version (right now we support v1 and v2),
 *		|		and to verify that the size of the index makes sense.
 *		|
 *		|-# packfile_open
 *			See `packfile_open` in Chapter 3
Vicent Marti committed
118 119 120 121 122 123 124 125 126
 *
 *
 *
 *	Chapter 3: The neverending story...
 *	A standard packed `lookup` query for an OID
 *	--------------------------------------------------
 *	TODO
 *
 */
127 128 129 130


/***********************************************************
 *
Vicent Marti committed
131
 * FORWARD DECLARATIONS
132 133 134
 *
 ***********************************************************/

135
static void pack_window_free_all(struct pack_backend *backend, struct git_pack_file *p);
136
static int pack_window_contains(git_mwindow *win, off_t offset);
137

Vicent Marti committed
138
static int packfile_sort__cb(const void *a_, const void *b_);
139

140
static int packfile_load__cb(void *_data, git_buf *path);
141

142
static int pack_entry_find(struct git_pack_entry *e,
143
	struct pack_backend *backend, const git_oid *oid);
144

145 146
/* Can find the offset of an object given
 * a prefix of an identifier.
147
 * Sets GIT_EAMBIGUOUS if short oid is ambiguous.
148 149
 * This method assumes that len is between
 * GIT_OID_MINPREFIXLEN and GIT_OID_HEXSZ.
150
 */
151 152 153 154
static int pack_entry_find_prefix(
	struct git_pack_entry *e,
	struct pack_backend *backend,
	const git_oid *short_oid,
155
	size_t len);
156

Vicent Marti committed
157 158 159 160 161 162 163


/***********************************************************
 *
 * PACK WINDOW MANAGEMENT
 *
 ***********************************************************/
164

165
GIT_INLINE(void) pack_window_free_all(struct pack_backend *backend, struct git_pack_file *p)
166
{
167
	GIT_UNUSED(backend);
168
	git_mwindow_free_all(&p->mwf);
169 170
}

171
GIT_INLINE(int) pack_window_contains(git_mwindow *win, off_t offset)
172
{
Vicent Marti committed
173 174 175
	/* We must promise at least 20 bytes (one hash) after the
	 * offset is available from this window, otherwise the offset
	 * is not actually in this window and a different window (which
Vicent Marti committed
176
	 * has that one hash excess) must be used. This is to support
Vicent Marti committed
177 178
	 * the object header and delta base parsing routines below.
	 */
179
	return git_mwindow_contains(win, offset + 20);
180 181
}

Vicent Marti committed
182
static int packfile_sort__cb(const void *a_, const void *b_)
183
{
184 185
	const struct git_pack_file *a = a_;
	const struct git_pack_file *b = b_;
Vicent Marti committed
186
	int st;
187

Vicent Marti committed
188 189
	/*
	 * Local packs tend to contain objects specific to our
Vicent Marti committed
190
	 * variant of the project than remote ones. In addition,
Vicent Marti committed
191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208
	 * remote ones could be on a network mounted filesystem.
	 * Favor local ones for these reasons.
	 */
	st = a->pack_local - b->pack_local;
	if (st)
		return -st;

	/*
	 * Younger packs tend to contain more recent objects,
	 * and more recent objects tend to get accessed more
	 * often.
	 */
	if (a->mtime < b->mtime)
		return 1;
	else if (a->mtime == b->mtime)
		return 0;

	return -1;
209 210
}

Vicent Marti committed
211

212

213
static int packfile_load__cb(void *_data, git_buf *path)
214
{
215
	struct pack_backend *backend = (struct pack_backend *)_data;
216
	struct git_pack_file *pack;
Vicent Marti committed
217
	int error;
218
	unsigned int i;
219

220
	if (git__suffixcmp(path->ptr, ".idx") != 0)
221
		return 0; /* not an index */
222

223
	for (i = 0; i < backend->packs.length; ++i) {
224
		struct git_pack_file *p = git_vector_get(&backend->packs, i);
nulltoken committed
225
		if (memcmp(p->pack_name, git_buf_cstr(path), git_buf_len(path) - strlen(".idx")) == 0)
226
			return 0;
227
	}
228

229
	error = git_packfile_check(&pack, path->ptr);
230
	if (error == GIT_ENOTFOUND)
231
		/* ignore missing .pack file as git does */
232
		return 0;
233 234
	else if (error < 0)
		return error;
Vicent Marti committed
235

236
	return git_vector_insert(&backend->packs, pack);
237 238
}

239 240 241 242 243
static int pack_entry_find_inner(
	struct git_pack_entry *e,
	struct pack_backend *backend,
	const git_oid *oid,
	struct git_pack_file *last_found)
Vicent Marti committed
244
{
245
	unsigned int i;
246

247 248
	if (last_found &&
		git_pack_entry_find(e, last_found, oid, GIT_OID_HEXSZ) == 0)
249
		return 0;
250

Vicent Marti committed
251
	for (i = 0; i < backend->packs.length; ++i) {
252
		struct git_pack_file *p;
253

Vicent Marti committed
254
		p = git_vector_get(&backend->packs, i);
255
		if (p == last_found)
Vicent Marti committed
256
			continue;
257

258
		if (git_pack_entry_find(e, p, oid, GIT_OID_HEXSZ) == 0) {
Vicent Marti committed
259
			backend->last_found = p;
260
			return 0;
261
		}
Vicent Marti committed
262 263
	}

264 265 266 267 268
	return -1;
}

static int pack_entry_find(struct git_pack_entry *e, struct pack_backend *backend, const git_oid *oid)
{
269
	struct git_pack_file *last_found = backend->last_found;
270 271 272 273 274

	if (backend->last_found &&
		git_pack_entry_find(e, backend->last_found, oid, GIT_OID_HEXSZ) == 0)
		return 0;

275
	if (!pack_entry_find_inner(e, backend, oid, last_found))
276 277
		return 0;

Russell Belfer committed
278
	return git_odb__error_notfound("failed to find pack entry", oid);
Vicent Marti committed
279 280
}

281 282 283 284 285 286
static unsigned pack_entry_find_prefix_inner(
		struct git_pack_entry *e,
		struct pack_backend *backend,
		const git_oid *short_oid,
		size_t len,
		struct git_pack_file *last_found)
287 288
{
	int error;
289
	unsigned int i;
290
	unsigned found = 0;
291

292 293
	if (last_found) {
		error = git_pack_entry_find(e, last_found, short_oid, len);
294 295 296
		if (error == GIT_EAMBIGUOUS)
			return error;
		if (!error)
297 298 299 300
			found = 1;
	}

	for (i = 0; i < backend->packs.length; ++i) {
301
		struct git_pack_file *p;
302 303

		p = git_vector_get(&backend->packs, i);
304
		if (p == last_found)
305 306
			continue;

307
		error = git_pack_entry_find(e, p, short_oid, len);
308 309 310 311
		if (error == GIT_EAMBIGUOUS)
			return error;
		if (!error) {
			if (++found > 1)
312 313 314 315 316
				break;
			backend->last_found = p;
		}
	}

317 318 319 320 321 322 323 324 325
	return found;
}

static int pack_entry_find_prefix(
	struct git_pack_entry *e,
	struct pack_backend *backend,
	const git_oid *short_oid,
	size_t len)
{
326
	struct git_pack_file *last_found = backend->last_found;
Vicent Marti committed
327
	unsigned int found = pack_entry_find_prefix_inner(e, backend, short_oid, len, last_found);
328

329
	if (!found)
Russell Belfer committed
330
		return git_odb__error_notfound("no matching pack entry for prefix", short_oid);
331 332 333 334
	else if (found > 1)
		return git_odb__error_ambiguous("found multiple pack entries");
	else
		return 0;
335 336
}

Vicent Marti committed
337

338 339 340 341 342 343 344
/***********************************************************
 *
 * PACKED BACKEND PUBLIC API
 *
 * Implement the git_odb_backend API calls
 *
 ***********************************************************/
Vicent Marti committed
345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372
static int pack_backend__refresh(git_odb_backend *_backend)
{
	struct pack_backend *backend = (struct pack_backend *)_backend;

	int error;
	struct stat st;
	git_buf path = GIT_BUF_INIT;

	if (backend->pack_folder == NULL)
		return 0;

	if (p_stat(backend->pack_folder, &st) < 0 || !S_ISDIR(st.st_mode))
		return git_odb__error_notfound("failed to refresh packfiles", NULL);

	git_buf_sets(&path, backend->pack_folder);

	/* reload all packs */
	error = git_path_direach(&path, packfile_load__cb, (void *)backend);

	git_buf_free(&path);

	if (error < 0)
		return error;

	git_vector_sort(&backend->packs);
	return 0;
}

373

374
static int pack_backend__read_header(size_t *len_p, git_otype *type_p, struct git_odb_backend *backend, const git_oid *oid)
375
{
376 377
	struct git_pack_entry e;
	int error;
378

379
	assert(len_p && type_p && backend && oid);
380

381 382
	if ((error = pack_entry_find(&e, (struct pack_backend *)backend, oid)) < 0)
		return error;
383

384
	return git_packfile_resolve_header(len_p, type_p, e.p, e.offset);
385 386
}

387
static int pack_backend__read(void **buffer_p, size_t *len_p, git_otype *type_p, git_odb_backend *backend, const git_oid *oid)
388
{
389
	struct git_pack_entry e;
Vicent Marti committed
390
	git_rawobj raw;
Vicent Marti committed
391
	int error;
392

393 394 395
	if ((error = pack_entry_find(&e, (struct pack_backend *)backend, oid)) < 0 ||
		(error = git_packfile_unpack(&raw, e.p, &e.offset)) < 0)
		return error;
Vicent Marti committed
396 397 398 399 400

	*buffer_p = raw.data;
	*len_p = raw.len;
	*type_p = raw.type;

401
	return 0;
402 403
}

404
static int pack_backend__read_prefix(
Vicent Marti committed
405 406 407 408 409 410
	git_oid *out_oid,
	void **buffer_p,
	size_t *len_p,
	git_otype *type_p,
	git_odb_backend *backend,
	const git_oid *short_oid,
411
	size_t len)
412
{
413 414
	int error = 0;

415
	if (len < GIT_OID_MINPREFIXLEN)
416
		error = git_odb__error_ambiguous("prefix length too short");
417

418
	else if (len >= GIT_OID_HEXSZ) {
419
		/* We can fall back to regular read method */
420 421
		error = pack_backend__read(buffer_p, len_p, type_p, backend, short_oid);
		if (!error)
422 423
			git_oid_cpy(out_oid, short_oid);
	} else {
424
		struct git_pack_entry e;
425 426
		git_rawobj raw;

427 428 429 430 431 432 433 434 435
		if ((error = pack_entry_find_prefix(
				&e, (struct pack_backend *)backend, short_oid, len)) == 0 &&
			(error = git_packfile_unpack(&raw, e.p, &e.offset)) == 0)
		{
			*buffer_p = raw.data;
			*len_p = raw.len;
			*type_p = raw.type;
			git_oid_cpy(out_oid, &e.sha1);
		}
436
	}
437

438
	return error;
439 440
}

441
static int pack_backend__exists(git_odb_backend *backend, const git_oid *oid)
442
{
443
	struct git_pack_entry e;
444
	return pack_entry_find(&e, (struct pack_backend *)backend, oid) == 0;
445 446
}

447
static int pack_backend__foreach(git_odb_backend *_backend, git_odb_foreach_cb cb, void *data)
448
{
449
	int error;
450 451 452 453 454 455 456 457
	struct git_pack_file *p;
	struct pack_backend *backend;
	unsigned int i;

	assert(_backend && cb);
	backend = (struct pack_backend *)_backend;

	/* Make sure we know about the packfiles */
Vicent Marti committed
458
	if ((error = pack_backend__refresh(_backend)) < 0)
459
		return error;
460 461

	git_vector_foreach(&backend->packs, i, p) {
462
		if ((error = git_pack_foreach_entry(p, cb, data)) < 0)
463
			return error;
464
	}
465

466 467 468
	return 0;
}

469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529
static int pack_backend__writepack_add(struct git_odb_writepack *_writepack, const void *data, size_t size, git_transfer_progress *stats)
{
	struct pack_writepack *writepack = (struct pack_writepack *)_writepack;

	assert(writepack);

	return git_indexer_stream_add(writepack->indexer_stream, data, size, stats);
}

static int pack_backend__writepack_commit(struct git_odb_writepack *_writepack, git_transfer_progress *stats)
{
	struct pack_writepack *writepack = (struct pack_writepack *)_writepack;

	assert(writepack);

	return git_indexer_stream_finalize(writepack->indexer_stream, stats);
}

static void pack_backend__writepack_free(struct git_odb_writepack *_writepack)
{
	struct pack_writepack *writepack = (struct pack_writepack *)_writepack;

	assert(writepack);

	git_indexer_stream_free(writepack->indexer_stream);
	git__free(writepack);
}

static int pack_backend__writepack(struct git_odb_writepack **out,
	git_odb_backend *_backend,
	git_transfer_progress_callback progress_cb,
	void *progress_payload)
{
	struct pack_backend *backend;
	struct pack_writepack *writepack;

	assert(out && _backend);

	*out = NULL;

	backend = (struct pack_backend *)_backend;

	writepack = git__calloc(1, sizeof(struct pack_writepack));
	GITERR_CHECK_ALLOC(writepack);

	if (git_indexer_stream_new(&writepack->indexer_stream,
		backend->pack_folder, progress_cb, progress_payload) < 0) {
		git__free(writepack);
		return -1;
	}

	writepack->parent.backend = _backend;
	writepack->parent.add = pack_backend__writepack_add;
	writepack->parent.commit = pack_backend__writepack_commit;
	writepack->parent.free = pack_backend__writepack_free;

	*out = (git_odb_writepack *)writepack;

	return 0;
}

530
static void pack_backend__free(git_odb_backend *_backend)
531
{
Vicent Marti committed
532
	struct pack_backend *backend;
533
	unsigned int i;
534 535 536

	assert(_backend);

Vicent Marti committed
537
	backend = (struct pack_backend *)_backend;
538

Vicent Marti committed
539
	for (i = 0; i < backend->packs.length; ++i) {
540
		struct git_pack_file *p = git_vector_get(&backend->packs, i);
541
		git_packfile_free(p);
Vicent Marti committed
542
	}
543

Vicent Marti committed
544
	git_vector_free(&backend->packs);
545 546
	git__free(backend->pack_folder);
	git__free(backend);
547 548
}

549 550 551 552 553 554 555 556 557 558
int git_odb_backend_one_pack(git_odb_backend **backend_out, const char *idx)
{
	struct pack_backend *backend = NULL;
	struct git_pack_file *packfile = NULL;

	if (git_packfile_check(&packfile, idx) < 0)
		return -1;

	backend = git__calloc(1, sizeof(struct pack_backend));
	GITERR_CHECK_ALLOC(backend);
559
	backend->parent.version = GIT_ODB_BACKEND_VERSION;
560 561 562 563 564 565 566 567 568

	if (git_vector_init(&backend->packs, 1, NULL) < 0)
		goto on_error;

	if (git_vector_insert(&backend->packs, packfile) < 0)
		goto on_error;

	backend->parent.read = &pack_backend__read;
	backend->parent.read_prefix = &pack_backend__read_prefix;
569
	backend->parent.read_header = &pack_backend__read_header;
570
	backend->parent.exists = &pack_backend__exists;
Vicent Marti committed
571
	backend->parent.refresh = &pack_backend__refresh;
572 573 574 575 576 577 578 579 580 581 582 583 584 585
	backend->parent.foreach = &pack_backend__foreach;
	backend->parent.free = &pack_backend__free;

	*backend_out = (git_odb_backend *)backend;

	return 0;

on_error:
	git_vector_free(&backend->packs);
	git__free(backend);
	git__free(packfile);
	return -1;
}

586 587
int git_odb_backend_pack(git_odb_backend **backend_out, const char *objects_dir)
{
588 589
	struct pack_backend *backend = NULL;
	git_buf path = GIT_BUF_INIT;
590

Vicent Marti committed
591
	backend = git__calloc(1, sizeof(struct pack_backend));
592
	GITERR_CHECK_ALLOC(backend);
593
	backend->parent.version = GIT_ODB_BACKEND_VERSION;
594

595 596 597 598 599 600
	if (git_vector_init(&backend->packs, 8, packfile_sort__cb) < 0 ||
		git_buf_joinpath(&path, objects_dir, "pack") < 0)
	{
		git__free(backend);
		return -1;
	}
601

602
	if (git_path_isdir(git_buf_cstr(&path)) == true) {
Vicent Marti committed
603 604
		int error;

605
		backend->pack_folder = git_buf_detach(&path);
Vicent Marti committed
606 607 608
		error = pack_backend__refresh((git_odb_backend *)backend);
		if (error < 0)
			return error;
Vicent Marti committed
609
	}
610 611

	backend->parent.read = &pack_backend__read;
Vicent Marti committed
612
	backend->parent.read_prefix = &pack_backend__read_prefix;
613
	backend->parent.read_header = &pack_backend__read_header;
614
	backend->parent.exists = &pack_backend__exists;
Vicent Marti committed
615
	backend->parent.refresh = &pack_backend__refresh;
616
	backend->parent.foreach = &pack_backend__foreach;
617
	backend->parent.writepack = &pack_backend__writepack;
618 619 620
	backend->parent.free = &pack_backend__free;

	*backend_out = (git_odb_backend *)backend;
621 622 623

	git_buf_free(&path);

624
	return 0;
625
}