odb_pack.c 12.6 KB
Newer Older
1
/*
schu committed
2
 * Copyright (C) 2009-2012 the libgit2 contributors
3
 *
Vicent Marti committed
4 5
 * This file is part of libgit2, distributed under the GNU GPL v2 with
 * a Linking Exception. For full terms see the included COPYING file.
6 7 8
 */

#include "common.h"
9
#include <zlib.h>
10
#include "git2/repository.h"
11
#include "git2/oid.h"
12 13 14 15
#include "fileops.h"
#include "hash.h"
#include "odb.h"
#include "delta-apply.h"
16
#include "sha1_lookup.h"
17
#include "mwindow.h"
18
#include "pack.h"
19

20
#include "git2/odb_backend.h"
21

Vicent Marti committed
22 23 24
struct pack_backend {
	git_odb_backend parent;
	git_vector packs;
25
	struct git_pack_file *last_found;
26
	char *pack_folder;
27
	time_t pack_folder_mtime;
Vicent Marti committed
28 29 30 31 32
};

/**
 * The wonderful tale of a Packed Object lookup query
 * ===================================================
Vicent Marti committed
33 34 35
 *	A riveting and epic story of epicness and ASCII
 *			art, presented by yours truly,
 *				Sir Vicent of Marti
Vicent Marti committed
36 37 38 39 40 41 42 43 44 45
 *
 *
 *	Chapter 1: Once upon a time...
 *	Initialization of the Pack Backend
 *	--------------------------------------------------
 *
 *	# git_odb_backend_pack
 *	| Creates the pack backend structure, initializes the
 *	| callback pointers to our default read() and exist() methods,
 *	| and tries to preload all the known packfiles in the ODB.
Vicent Marti committed
46
 * |
Vicent Marti committed
47
 *	|-# packfile_load_all
Vicent Marti committed
48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
 *	 | Tries to find the `pack` folder, if it exists. ODBs without
 *	 | a pack folder are ignored altogether. If there's a `pack` folder
 *	 | we run a `dirent` callback through every file in the pack folder
 *	 | to find our packfiles. The packfiles are then sorted according
 *	 | to a sorting callback.
 * 	 |
 *	 |-# packfile_load__cb
 *	 | | This callback is called from `dirent` with every single file
 *	 | | inside the pack folder. We find the packs by actually locating
 *	 | | their index (ends in ".idx"). From that index, we verify that
 *	 | | the corresponding packfile exists and is valid, and if so, we
 *	| | add it to the pack list.
 *	 | |
 *	 | |-# packfile_check
 *	 |		Make sure that there's a packfile to back this index, and store
 *	 |		some very basic information regarding the packfile itself,
 *	 |		such as the full path, the size, and the modification time.
 *	 |		We don't actually open the packfile to check for internal consistency.
 *	|
 *	|-# packfile_sort__cb
 *		Sort all the preloaded packs according to some specific criteria:
 *		we prioritize the "newer" packs because it's more likely they
 *		contain the objects we are looking for, and we prioritize local
 *		packs over remote ones.
Vicent Marti committed
72 73 74 75 76 77 78
 *
 *
 *
 *	Chapter 2: To be, or not to be...
 *	A standard packed `exist` query for an OID
 *	--------------------------------------------------
 *
Vicent Marti committed
79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113
 * # pack_backend__exists
 * | Check if the given SHA1 oid exists in any of the packs
 * | that have been loaded for our ODB.
 * |
 * |-# pack_entry_find
 *	| Iterate through all the packs that have been preloaded
 *	| (starting by the pack where the latest object was found)
 *	| to try to find the OID in one of them.
 *	|
 *	|-# pack_entry_find1
 *		| Check the index of an individual pack to see if the SHA1
 *		| OID can be found. If we can find the offset to that SHA1
 *		| inside of the index, that means the object is contained
 *		| inside of the packfile and we can stop searching.
 *		| Before returning, we verify that the packfile behing the
 *		| index we are searching still exists on disk.
 *		|
 *		|-# pack_entry_find_offset
 *		| | Mmap the actual index file to disk if it hasn't been opened
 *		| | yet, and run a binary search through it to find the OID.
 *		| | See <http://book.git-scm.com/7_the_packfile.html> for specifics
 *		| | on the Packfile Index format and how do we find entries in it.
 *		| |
 *		| |-# pack_index_open
 *		|	| Guess the name of the index based on the full path to the
 *		|	| packfile, open it and verify its contents. Only if the index
 *		|	| has not been opened already.
 *		|	|
 *		|	|-# pack_index_check
 *		|		Mmap the index file and do a quick run through the header
 *		|		to guess the index version (right now we support v1 and v2),
 *		|		and to verify that the size of the index makes sense.
 *		|
 *		|-# packfile_open
 *			See `packfile_open` in Chapter 3
Vicent Marti committed
114 115 116 117 118 119 120 121 122
 *
 *
 *
 *	Chapter 3: The neverending story...
 *	A standard packed `lookup` query for an OID
 *	--------------------------------------------------
 *	TODO
 *
 */
123 124 125 126


/***********************************************************
 *
Vicent Marti committed
127
 * FORWARD DECLARATIONS
128 129 130
 *
 ***********************************************************/

131
static void pack_window_free_all(struct pack_backend *backend, struct git_pack_file *p);
132
static int pack_window_contains(git_mwindow *win, off_t offset);
133

Vicent Marti committed
134
static int packfile_sort__cb(const void *a_, const void *b_);
135

136
static int packfile_load__cb(void *_data, git_buf *path);
137
static int packfile_refresh_all(struct pack_backend *backend);
138

139
static int pack_entry_find(struct git_pack_entry *e,
140
	struct pack_backend *backend, const git_oid *oid);
141

142 143
/* Can find the offset of an object given
 * a prefix of an identifier.
144
 * Sets GIT_EAMBIGUOUS if short oid is ambiguous.
145 146
 * This method assumes that len is between
 * GIT_OID_MINPREFIXLEN and GIT_OID_HEXSZ.
147
 */
148 149 150 151 152
static int pack_entry_find_prefix(
	struct git_pack_entry *e,
	struct pack_backend *backend,
	const git_oid *short_oid,
	unsigned int len);
153

Vicent Marti committed
154 155 156 157 158 159 160


/***********************************************************
 *
 * PACK WINDOW MANAGEMENT
 *
 ***********************************************************/
161

162
GIT_INLINE(void) pack_window_free_all(struct pack_backend *backend, struct git_pack_file *p)
163
{
164
	GIT_UNUSED(backend);
165
	git_mwindow_free_all(&p->mwf);
166 167
}

168
GIT_INLINE(int) pack_window_contains(git_mwindow *win, off_t offset)
169
{
Vicent Marti committed
170 171 172
	/* We must promise at least 20 bytes (one hash) after the
	 * offset is available from this window, otherwise the offset
	 * is not actually in this window and a different window (which
Vicent Marti committed
173
	 * has that one hash excess) must be used. This is to support
Vicent Marti committed
174 175
	 * the object header and delta base parsing routines below.
	 */
176
	return git_mwindow_contains(win, offset + 20);
177 178
}

Vicent Marti committed
179
static int packfile_sort__cb(const void *a_, const void *b_)
180
{
181 182
	const struct git_pack_file *a = a_;
	const struct git_pack_file *b = b_;
Vicent Marti committed
183
	int st;
184

Vicent Marti committed
185 186
	/*
	 * Local packs tend to contain objects specific to our
Vicent Marti committed
187
	 * variant of the project than remote ones. In addition,
Vicent Marti committed
188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205
	 * remote ones could be on a network mounted filesystem.
	 * Favor local ones for these reasons.
	 */
	st = a->pack_local - b->pack_local;
	if (st)
		return -st;

	/*
	 * Younger packs tend to contain more recent objects,
	 * and more recent objects tend to get accessed more
	 * often.
	 */
	if (a->mtime < b->mtime)
		return 1;
	else if (a->mtime == b->mtime)
		return 0;

	return -1;
206 207
}

Vicent Marti committed
208

209

210
static int packfile_load__cb(void *_data, git_buf *path)
211
{
212
	struct pack_backend *backend = (struct pack_backend *)_data;
213
	struct git_pack_file *pack;
Vicent Marti committed
214
	int error;
215
	unsigned int i;
216

217
	if (git__suffixcmp(path->ptr, ".idx") != 0)
218
		return 0; /* not an index */
219

220
	for (i = 0; i < backend->packs.length; ++i) {
221
		struct git_pack_file *p = git_vector_get(&backend->packs, i);
nulltoken committed
222
		if (memcmp(p->pack_name, git_buf_cstr(path), git_buf_len(path) - strlen(".idx")) == 0)
223
			return 0;
224
	}
225

226
	error = git_packfile_check(&pack, path->ptr);
227
	if (error == GIT_ENOTFOUND)
228
		/* ignore missing .pack file as git does */
229
		return 0;
230 231
	else if (error < 0)
		return error;
Vicent Marti committed
232

233
	return git_vector_insert(&backend->packs, pack);
234 235
}

236
static int packfile_refresh_all(struct pack_backend *backend)
237
{
Vicent Marti committed
238
	int error;
239
	struct stat st;
240

241
	if (backend->pack_folder == NULL)
242
		return 0;
243

Vicent Marti committed
244
	if (p_stat(backend->pack_folder, &st) < 0 || !S_ISDIR(st.st_mode))
Russell Belfer committed
245
		return git_odb__error_notfound("failed to refresh packfiles", NULL);
246

247
	if (st.st_mtime != backend->pack_folder_mtime) {
248 249
		git_buf path = GIT_BUF_INIT;
		git_buf_sets(&path, backend->pack_folder);
250 251

		/* reload all packs */
252
		error = git_path_direach(&path, packfile_load__cb, (void *)backend);
253 254

		git_buf_free(&path);
255 256 257

		if (error < 0)
			return error;
258 259

		git_vector_sort(&backend->packs);
260
		backend->pack_folder_mtime = st.st_mtime;
261
	}
262

263
	return 0;
264 265
}

266
static int pack_entry_find(struct git_pack_entry *e, struct pack_backend *backend, const git_oid *oid)
Vicent Marti committed
267
{
268
	int error;
269
	unsigned int i;
270

271 272
	if ((error = packfile_refresh_all(backend)) < 0)
		return error;
273

Vicent Marti committed
274
	if (backend->last_found &&
275 276
		git_pack_entry_find(e, backend->last_found, oid, GIT_OID_HEXSZ) == 0)
		return 0;
277

Vicent Marti committed
278
	for (i = 0; i < backend->packs.length; ++i) {
279
		struct git_pack_file *p;
280

Vicent Marti committed
281 282 283
		p = git_vector_get(&backend->packs, i);
		if (p == backend->last_found)
			continue;
284

285
		if (git_pack_entry_find(e, p, oid, GIT_OID_HEXSZ) == 0) {
Vicent Marti committed
286
			backend->last_found = p;
287
			return 0;
288
		}
Vicent Marti committed
289 290
	}

Russell Belfer committed
291
	return git_odb__error_notfound("failed to find pack entry", oid);
Vicent Marti committed
292 293
}

Vicent Marti committed
294
static int pack_entry_find_prefix(
295
	struct git_pack_entry *e,
Vicent Marti committed
296 297 298
	struct pack_backend *backend,
	const git_oid *short_oid,
	unsigned int len)
299 300
{
	int error;
301
	unsigned int i;
302
	unsigned found = 0;
303

304 305
	if ((error = packfile_refresh_all(backend)) < 0)
		return error;
306 307

	if (backend->last_found) {
308
		error = git_pack_entry_find(e, backend->last_found, short_oid, len);
309 310 311
		if (error == GIT_EAMBIGUOUS)
			return error;
		if (!error)
312 313 314 315
			found = 1;
	}

	for (i = 0; i < backend->packs.length; ++i) {
316
		struct git_pack_file *p;
317 318 319 320 321

		p = git_vector_get(&backend->packs, i);
		if (p == backend->last_found)
			continue;

322
		error = git_pack_entry_find(e, p, short_oid, len);
323 324 325 326
		if (error == GIT_EAMBIGUOUS)
			return error;
		if (!error) {
			if (++found > 1)
327 328 329 330 331
				break;
			backend->last_found = p;
		}
	}

332
	if (!found)
Russell Belfer committed
333
		return git_odb__error_notfound("no matching pack entry for prefix", short_oid);
334 335 336 337
	else if (found > 1)
		return git_odb__error_ambiguous("found multiple pack entries");
	else
		return 0;
338 339
}

Vicent Marti committed
340

341 342 343 344 345 346 347 348
/***********************************************************
 *
 * PACKED BACKEND PUBLIC API
 *
 * Implement the git_odb_backend API calls
 *
 ***********************************************************/

Vicent Marti committed
349
/*
350 351 352 353 354 355
int pack_backend__read_header(git_rawobj *obj, git_odb_backend *backend, const git_oid *oid)
{
	pack_location location;

	assert(obj && backend && oid);

Vicent Marti committed
356
	if (locate_packfile(&location, (struct pack_backend *)backend, oid) < 0)
357 358 359 360
		return GIT_ENOTFOUND;

	return read_header_packed(obj, &location);
}
Vicent Marti committed
361
*/
362

363
static int pack_backend__read(void **buffer_p, size_t *len_p, git_otype *type_p, git_odb_backend *backend, const git_oid *oid)
364
{
365
	struct git_pack_entry e;
Vicent Marti committed
366
	git_rawobj raw;
Vicent Marti committed
367
	int error;
368

369 370 371
	if ((error = pack_entry_find(&e, (struct pack_backend *)backend, oid)) < 0 ||
		(error = git_packfile_unpack(&raw, e.p, &e.offset)) < 0)
		return error;
Vicent Marti committed
372 373 374 375 376

	*buffer_p = raw.data;
	*len_p = raw.len;
	*type_p = raw.type;

377
	return 0;
378 379
}

380
static int pack_backend__read_prefix(
Vicent Marti committed
381 382 383 384 385 386 387
	git_oid *out_oid,
	void **buffer_p,
	size_t *len_p,
	git_otype *type_p,
	git_odb_backend *backend,
	const git_oid *short_oid,
	unsigned int len)
388
{
389 390
	int error = 0;

391
	if (len < GIT_OID_MINPREFIXLEN)
392
		error = git_odb__error_ambiguous("prefix length too short");
393

394
	else if (len >= GIT_OID_HEXSZ) {
395
		/* We can fall back to regular read method */
396 397
		error = pack_backend__read(buffer_p, len_p, type_p, backend, short_oid);
		if (!error)
398 399
			git_oid_cpy(out_oid, short_oid);
	} else {
400
		struct git_pack_entry e;
401 402
		git_rawobj raw;

403 404 405 406 407 408 409 410 411
		if ((error = pack_entry_find_prefix(
				&e, (struct pack_backend *)backend, short_oid, len)) == 0 &&
			(error = git_packfile_unpack(&raw, e.p, &e.offset)) == 0)
		{
			*buffer_p = raw.data;
			*len_p = raw.len;
			*type_p = raw.type;
			git_oid_cpy(out_oid, &e.sha1);
		}
412
	}
413

414
	return error;
415 416
}

417
static int pack_backend__exists(git_odb_backend *backend, const git_oid *oid)
418
{
419
	struct git_pack_entry e;
420
	return pack_entry_find(&e, (struct pack_backend *)backend, oid) == 0;
421 422
}

423
static void pack_backend__free(git_odb_backend *_backend)
424
{
Vicent Marti committed
425
	struct pack_backend *backend;
426
	unsigned int i;
427 428 429

	assert(_backend);

Vicent Marti committed
430
	backend = (struct pack_backend *)_backend;
431

Vicent Marti committed
432
	for (i = 0; i < backend->packs.length; ++i) {
433 434
		struct git_pack_file *p = git_vector_get(&backend->packs, i);
		packfile_free(p);
Vicent Marti committed
435
	}
436

Vicent Marti committed
437
	git_vector_free(&backend->packs);
438 439
	git__free(backend->pack_folder);
	git__free(backend);
440 441 442 443
}

int git_odb_backend_pack(git_odb_backend **backend_out, const char *objects_dir)
{
444 445
	struct pack_backend *backend = NULL;
	git_buf path = GIT_BUF_INIT;
446

Vicent Marti committed
447
	backend = git__calloc(1, sizeof(struct pack_backend));
448
	GITERR_CHECK_ALLOC(backend);
449

450 451 452 453 454 455
	if (git_vector_init(&backend->packs, 8, packfile_sort__cb) < 0 ||
		git_buf_joinpath(&path, objects_dir, "pack") < 0)
	{
		git__free(backend);
		return -1;
	}
456

457
	if (git_path_isdir(git_buf_cstr(&path)) == true) {
458 459
		backend->pack_folder = git_buf_detach(&path);
		backend->pack_folder_mtime = 0;
Vicent Marti committed
460
	}
461 462

	backend->parent.read = &pack_backend__read;
Vicent Marti committed
463
	backend->parent.read_prefix = &pack_backend__read_prefix;
Vicent Marti committed
464
	backend->parent.read_header = NULL;
465 466 467 468
	backend->parent.exists = &pack_backend__exists;
	backend->parent.free = &pack_backend__free;

	*backend_out = (git_odb_backend *)backend;
469 470 471

	git_buf_free(&path);

472
	return 0;
473
}