odb.c 31.3 KB
Newer Older
1
/*
Edward Thomson committed
2
 * Copyright (C) the libgit2 contributors. All rights reserved.
3
 *
Vicent Marti committed
4 5
 * This file is part of libgit2, distributed under the GNU GPL v2 with
 * a Linking Exception. For full terms see the included COPYING file.
6 7
 */

8
#include "common.h"
9
#include <zlib.h>
10
#include "git2/object.h"
11
#include "git2/sys/odb_backend.h"
12
#include "fileops.h"
13
#include "hash.h"
14
#include "odb.h"
15
#include "delta.h"
16
#include "filter.h"
17
#include "repository.h"
18

19
#include "git2/odb_backend.h"
20
#include "git2/oid.h"
21
#include "git2/oidarray.h"
22

23 24
#define GIT_ALTERNATES_FILE "info/alternates"

25 26 27 28 29 30
/*
 * We work under the assumption that most objects for long-running
 * operations will be packed
 */
#define GIT_LOOSE_PRIORITY 1
#define GIT_PACKED_PRIORITY 2
31

32 33
#define GIT_ALTERNATES_MAX_DEPTH 5

34 35 36 37
typedef struct
{
	git_odb_backend *backend;
	int priority;
38 39
	bool is_alternate;
	ino_t disk_inode;
40 41
} backend_internal;

42 43 44 45 46 47 48 49 50
static git_cache *odb_cache(git_odb *odb)
{
	if (odb->rc.owner != NULL) {
		git_repository *owner = odb->rc.owner;
		return &owner->objects;
	}

	return &odb->own_cache;
}
51

52
static int odb_otype_fast(git_otype *type_p, git_odb *db, const git_oid *id);
53 54
static int load_alternates(git_odb *odb, const char *objects_dir, int alternate_depth);

55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77
static git_otype odb_hardcoded_type(const git_oid *id)
{
	static git_oid empty_tree = {{ 0x4b, 0x82, 0x5d, 0xc6, 0x42, 0xcb, 0x6e, 0xb9, 0xa0, 0x60,
					   0xe5, 0x4b, 0xf8, 0xd6, 0x92, 0x88, 0xfb, 0xee, 0x49, 0x04 }};

	if (!git_oid_cmp(id, &empty_tree))
		return GIT_OBJ_TREE;

	return GIT_OBJ_BAD;
}

static int odb_read_hardcoded(git_rawobj *raw, const git_oid *id)
{
	git_otype type = odb_hardcoded_type(id);
	if (type == GIT_OBJ_BAD)
		return -1;

	raw->type = type;
	raw->len = 0;
	raw->data = git__calloc(1, sizeof(uint8_t));
	return 0;
}

78
int git_odb__format_object_header(char *hdr, size_t n, git_off_t obj_len, git_otype obj_type)
79
{
Vicent Marti committed
80
	const char *type_str = git_object_type2string(obj_type);
81
	int len = p_snprintf(hdr, n, "%s %lld", type_str, (long long)obj_len);
82
	assert(len > 0 && len <= (int)n);
83 84 85
	return len+1;
}

86
int git_odb__hashobj(git_oid *id, git_rawobj *obj)
87 88
{
	git_buf_vec vec[2];
89 90
	char header[64];
	int hdrlen;
91

92
	assert(id && obj);
93

94
	if (!git_object_typeisloose(obj->type))
95
		return -1;
Vicent Marti committed
96

97
	if (!obj->data && obj->len != 0)
98
		return -1;
99

100
	hdrlen = git_odb__format_object_header(header, sizeof(header), obj->len, obj->type);
101

102
	vec[0].data = header;
Vicent Marti committed
103
	vec[0].len = hdrlen;
104
	vec[1].data = obj->data;
Vicent Marti committed
105
	vec[1].len = obj->len;
106 107 108

	git_hash_vec(id, vec, 2);

109
	return 0;
110 111
}

112

113
static git_odb_object *odb_object__alloc(const git_oid *oid, git_rawobj *source)
Ramsay Jones committed
114
{
115
	git_odb_object *object = git__calloc(1, sizeof(git_odb_object));
Ramsay Jones committed
116

117 118 119 120 121 122
	if (object != NULL) {
		git_oid_cpy(&object->cached.oid, oid);
		object->cached.type = source->type;
		object->cached.size = source->len;
		object->buffer      = source->data;
	}
Ramsay Jones committed
123

Vicent Marti committed
124
	return object;
125 126
}

127
void git_odb_object__free(void *object)
128
{
Vicent Marti committed
129
	if (object != NULL) {
130
		git__free(((git_odb_object *)object)->buffer);
131
		git__free(object);
Vicent Marti committed
132 133
	}
}
134

135 136 137 138 139 140 141
const git_oid *git_odb_object_id(git_odb_object *object)
{
	return &object->cached.oid;
}

const void *git_odb_object_data(git_odb_object *object)
{
Vicent Marti committed
142
	return object->buffer;
143 144 145 146
}

size_t git_odb_object_size(git_odb_object *object)
{
Vicent Marti committed
147
	return object->cached.size;
148 149 150 151
}

git_otype git_odb_object_type(git_odb_object *object)
{
Vicent Marti committed
152
	return object->cached.type;
153 154
}

155 156 157 158 159 160 161
int git_odb_object_dup(git_odb_object **dest, git_odb_object *source)
{
	git_cached_obj_incref(source);
	*dest = source;
	return 0;
}

162
void git_odb_object_free(git_odb_object *object)
Vicent Marti committed
163
{
164 165 166
	if (object == NULL)
		return;

167
	git_cached_obj_decref(object);
Vicent Marti committed
168
}
169

170
int git_odb__hashfd(git_oid *out, git_file fd, size_t size, git_otype type)
Vicent Marti committed
171
{
172
	int hdr_len;
173
	char hdr[64], buffer[FILEIO_BUFSIZE];
174
	git_hash_ctx ctx;
175
	ssize_t read_len = 0;
176
	int error = 0;
Vicent Marti committed
177

178
	if (!git_object_typeisloose(type)) {
179
		giterr_set(GITERR_INVALID, "invalid object type for hash");
180 181 182
		return -1;
	}

183 184
	if ((error = git_hash_ctx_init(&ctx)) < 0)
		return -1;
Vicent Marti committed
185

186
	hdr_len = git_odb__format_object_header(hdr, sizeof(hdr), size, type);
187

188
	if ((error = git_hash_update(&ctx, hdr, hdr_len)) < 0)
189
		goto done;
Vicent Marti committed
190

Vicent Marti committed
191
	while (size > 0 && (read_len = p_read(fd, buffer, sizeof(buffer))) > 0) {
192
		if ((error = git_hash_update(&ctx, buffer, read_len)) < 0)
193 194
			goto done;

Vicent Marti committed
195 196 197
		size -= read_len;
	}

Vicent Marti committed
198 199 200 201
	/* If p_read returned an error code, the read obviously failed.
	 * If size is not zero, the file was truncated after we originally
	 * stat'd it, so we consider this a read failure too */
	if (read_len < 0 || size > 0) {
202
		giterr_set(GITERR_OS, "error reading file for hashing");
203 204 205
		error = -1;

		goto done;
Vicent Marti committed
206 207
	}

208
	error = git_hash_final(out, &ctx);
Vicent Marti committed
209

210
done:
211
	git_hash_ctx_cleanup(&ctx);
212
	return error;
Vicent Marti committed
213 214
}

215
int git_odb__hashfd_filtered(
216
	git_oid *out, git_file fd, size_t size, git_otype type, git_filter_list *fl)
217 218 219 220
{
	int error;
	git_buf raw = GIT_BUF_INIT;

221
	if (!fl)
222 223 224 225 226 227
		return git_odb__hashfd(out, fd, size, type);

	/* size of data is used in header, so we have to read the whole file
	 * into memory to apply filters before beginning to calculate the hash
	 */

228
	if (!(error = git_futils_readbuffer_fd(&raw, fd, size))) {
229
		git_buf post = GIT_BUF_INIT;
230

231
		error = git_filter_list_apply_to_data(&post, fl, &raw);
232

233
		git_buf_free(&raw);
234

235 236 237
		if (!error)
			error = git_odb_hash(out, post.ptr, post.size, type);

238
		git_buf_free(&post);
239
	}
240 241 242 243

	return error;
}

244 245 246
int git_odb__hashlink(git_oid *out, const char *path)
{
	struct stat st;
247
	int size;
248
	int result;
249

250
	if (git_path_lstat(path, &st) < 0)
251
		return -1;
252

253
	if (!git__is_int(st.st_size) || (int)st.st_size < 0) {
254
		giterr_set(GITERR_FILESYSTEM, "file size overflow for 32-bit systems");
255 256
		return -1;
	}
257

258
	size = (int)st.st_size;
259

260 261
	if (S_ISLNK(st.st_mode)) {
		char *link_data;
262 263
		int read_len;
		size_t alloc_size;
264

265 266
		GITERR_CHECK_ALLOC_ADD(&alloc_size, size, 1);
		link_data = git__malloc(alloc_size);
267
		GITERR_CHECK_ALLOC(link_data);
268

269
		read_len = p_readlink(path, link_data, size);
270
		link_data[size] = '\0';
271
		if (read_len != size) {
272
			giterr_set(GITERR_OS, "failed to read symlink data for '%s'", path);
273
			git__free(link_data);
274 275
			return -1;
		}
276

277
		result = git_odb_hash(out, link_data, size, GIT_OBJ_BLOB);
278
		git__free(link_data);
279
	} else {
280 281 282
		int fd = git_futils_open_ro(path);
		if (fd < 0)
			return -1;
283
		result = git_odb__hashfd(out, fd, size, GIT_OBJ_BLOB);
284 285 286
		p_close(fd);
	}

287
	return result;
288 289
}

290 291 292
int git_odb_hashfile(git_oid *out, const char *path, git_otype type)
{
	git_off_t size;
293 294
	int result, fd = git_futils_open_ro(path);
	if (fd < 0)
295
		return fd;
296 297

	if ((size = git_futils_filesize(fd)) < 0 || !git__is_sizet(size)) {
298
		giterr_set(GITERR_OS, "file size overflow for 32-bit systems");
299
		p_close(fd);
300
		return -1;
301 302
	}

303
	result = git_odb__hashfd(out, fd, (size_t)size, type);
304
	p_close(fd);
305
	return result;
306 307
}

Vicent Marti committed
308 309 310
int git_odb_hash(git_oid *id, const void *data, size_t len, git_otype type)
{
	git_rawobj raw;
311

Vicent Marti committed
312
	assert(id);
313

Vicent Marti committed
314 315 316
	raw.data = (void *)data;
	raw.len = len;
	raw.type = type;
317

318
	return git_odb__hashobj(id, &raw);
319 320
}

321 322 323 324 325 326 327 328 329 330 331
/**
 * FAKE WSTREAM
 */

typedef struct {
	git_odb_stream stream;
	char *buffer;
	size_t size, written;
	git_otype type;
} fake_wstream;

332
static int fake_wstream__fwrite(git_odb_stream *_stream, const git_oid *oid)
333 334
{
	fake_wstream *stream = (fake_wstream *)_stream;
335
	return _stream->backend->write(_stream->backend, oid, stream->buffer, stream->size, stream->type);
336 337 338 339 340 341
}

static int fake_wstream__write(git_odb_stream *_stream, const char *data, size_t len)
{
	fake_wstream *stream = (fake_wstream *)_stream;

342
	if (stream->written + len > stream->size)
343
		return -1;
344 345 346

	memcpy(stream->buffer + stream->written, data, len);
	stream->written += len;
347
	return 0;
348 349 350 351 352 353
}

static void fake_wstream__free(git_odb_stream *_stream)
{
	fake_wstream *stream = (fake_wstream *)_stream;

354 355
	git__free(stream->buffer);
	git__free(stream);
356 357
}

358
static int init_fake_wstream(git_odb_stream **stream_p, git_odb_backend *backend, git_off_t size, git_otype type)
359 360 361
{
	fake_wstream *stream;

362 363 364 365 366
	if (!git__is_ssizet(size)) {
		giterr_set(GITERR_ODB, "object size too large to keep in memory");
		return -1;
	}

367
	stream = git__calloc(1, sizeof(fake_wstream));
368
	GITERR_CHECK_ALLOC(stream);
369 370 371 372 373

	stream->size = size;
	stream->type = type;
	stream->buffer = git__malloc(size);
	if (stream->buffer == NULL) {
374
		git__free(stream);
375
		return -1;
376 377 378 379 380 381 382 383 384 385
	}

	stream->stream.backend = backend;
	stream->stream.read = NULL; /* read only */
	stream->stream.write = &fake_wstream__write;
	stream->stream.finalize_write = &fake_wstream__fwrite;
	stream->stream.free = &fake_wstream__free;
	stream->stream.mode = GIT_STREAM_WRONLY;

	*stream_p = (git_odb_stream *)stream;
386
	return 0;
387
}
388

389 390 391 392 393 394 395
/***********************************************************
 *
 * OBJECT DATABASE PUBLIC API
 *
 * Public calls for the ODB functionality
 *
 ***********************************************************/
396

397
static int backend_sort_cmp(const void *a, const void *b)
398
{
399 400
	const backend_internal *backend_a = (const backend_internal *)(a);
	const backend_internal *backend_b = (const backend_internal *)(b);
401

402 403 404 405 406 407 408 409
	if (backend_b->priority == backend_a->priority) {
		if (backend_a->is_alternate)
			return -1;
		if (backend_b->is_alternate)
			return 1;
		return 0;
	}
	return (backend_b->priority - backend_a->priority);
410 411
}

412
int git_odb_new(git_odb **out)
413
{
414
	git_odb *db = git__calloc(1, sizeof(*db));
415
	GITERR_CHECK_ALLOC(db);
416

417 418
	if (git_cache_init(&db->own_cache) < 0 ||
		git_vector_init(&db->backends, 4, backend_sort_cmp) < 0) {
419
		git__free(db);
420
		return -1;
421
	}
422

423
	*out = db;
424
	GIT_REFCOUNT_INC(db);
425
	return 0;
426 427
}

428 429 430
static int add_backend_internal(
	git_odb *odb, git_odb_backend *backend,
	int priority, bool is_alternate, ino_t disk_inode)
Ramsay Jones committed
431
{
432 433
	backend_internal *internal;

434
	assert(odb && backend);
Ramsay Jones committed
435

Ben Straub committed
436
	GITERR_CHECK_VERSION(backend, GIT_ODB_BACKEND_VERSION, "git_odb_backend");
437

438 439
	/* Check if the backend is already owned by another ODB */
	assert(!backend->odb || backend->odb == odb);
Ramsay Jones committed
440

441
	internal = git__malloc(sizeof(backend_internal));
442
	GITERR_CHECK_ALLOC(internal);
443 444 445 446

	internal->backend = backend;
	internal->priority = priority;
	internal->is_alternate = is_alternate;
447
	internal->disk_inode = disk_inode;
Ramsay Jones committed
448

449
	if (git_vector_insert(&odb->backends, internal) < 0) {
450
		git__free(internal);
451
		return -1;
452
	}
Ramsay Jones committed
453

454
	git_vector_sort(&odb->backends);
455
	internal->backend->odb = odb;
456
	return 0;
Ramsay Jones committed
457 458
}

459 460
int git_odb_add_backend(git_odb *odb, git_odb_backend *backend, int priority)
{
461
	return add_backend_internal(odb, backend, priority, false, 0);
462 463 464 465
}

int git_odb_add_alternate(git_odb *odb, git_odb_backend *backend, int priority)
{
466
	return add_backend_internal(odb, backend, priority, true, 0);
467 468
}

469 470 471 472 473 474
size_t git_odb_num_backends(git_odb *odb)
{
	assert(odb);
	return odb->backends.length;
}

475 476 477
static int git_odb__error_unsupported_in_backend(const char *action)
{
	giterr_set(GITERR_ODB,
478
		"cannot %s - unsupported in the loaded odb backends", action);
479 480 481 482
	return -1;
}


483 484 485 486
int git_odb_get_backend(git_odb_backend **out, git_odb *odb, size_t pos)
{
	backend_internal *internal;

Linquize committed
487
	assert(out && odb);
488 489 490 491 492 493 494
	internal = git_vector_get(&odb->backends, pos);

	if (internal && internal->backend) {
		*out = internal->backend;
		return 0;
	}

495
	giterr_set(GITERR_ODB, "no ODB backend loaded at index %" PRIuZ, pos);
496 497 498
	return GIT_ENOTFOUND;
}

499
int git_odb__add_default_backends(
500 501
	git_odb *db, const char *objects_dir,
	bool as_alternates, int alternate_depth)
502
{
503 504
	size_t i;
	struct stat st;
505
	ino_t inode;
506 507
	git_odb_backend *loose, *packed;

508 509
	/* TODO: inodes are not really relevant on Win32, so we need to find
	 * a cross-platform workaround for this */
510 511 512 513 514 515
#ifdef GIT_WIN32
	GIT_UNUSED(i);
	GIT_UNUSED(st);

	inode = 0;
#else
516
	if (p_stat(objects_dir, &st) < 0) {
517 518 519
		if (as_alternates)
			return 0;

520
		giterr_set(GITERR_ODB, "failed to load object database in '%s'", objects_dir);
521 522 523
		return -1;
	}

524 525
	inode = st.st_ino;

526 527
	for (i = 0; i < db->backends.length; ++i) {
		backend_internal *backend = git_vector_get(&db->backends, i);
528
		if (backend->disk_inode == inode)
529 530
			return 0;
	}
531
#endif
532

533
	/* add the loose object backend */
534
	if (git_odb_backend_loose(&loose, objects_dir, -1, db->do_fsync, 0, 0) < 0 ||
535
		add_backend_internal(db, loose, GIT_LOOSE_PRIORITY, as_alternates, inode) < 0)
536
		return -1;
537 538

	/* add the packed file backend */
539
	if (git_odb_backend_pack(&packed, objects_dir) < 0 ||
540
		add_backend_internal(db, packed, GIT_PACKED_PRIORITY, as_alternates, inode) < 0)
541
		return -1;
542

543
	return load_alternates(db, objects_dir, alternate_depth);
544 545
}

546
static int load_alternates(git_odb *odb, const char *objects_dir, int alternate_depth)
547
{
548
	git_buf alternates_path = GIT_BUF_INIT;
549
	git_buf alternates_buf = GIT_BUF_INIT;
550 551
	char *buffer;
	const char *alternate;
552
	int result = 0;
553

554
	/* Git reports an error, we just ignore anything deeper */
555
	if (alternate_depth > GIT_ALTERNATES_MAX_DEPTH)
556 557
		return 0;

558 559
	if (git_buf_joinpath(&alternates_path, objects_dir, GIT_ALTERNATES_FILE) < 0)
		return -1;
560

561
	if (git_path_exists(alternates_path.ptr) == false) {
562
		git_buf_free(&alternates_path);
563
		return 0;
564
	}
565

566
	if (git_futils_readbuffer(&alternates_buf, alternates_path.ptr) < 0) {
567
		git_buf_free(&alternates_path);
568
		return -1;
569
	}
570

571
	buffer = (char *)alternates_buf.ptr;
572 573

	/* add each alternate as a new backend; one alternate per line */
574 575 576 577
	while ((alternate = git__strtok(&buffer, "\r\n")) != NULL) {
		if (*alternate == '\0' || *alternate == '#')
			continue;

578 579 580 581 582 583
		/*
		 * Relative path: build based on the current `objects`
		 * folder. However, relative paths are only allowed in
		 * the current repository.
		 */
		if (*alternate == '.' && !alternate_depth) {
584
			if ((result = git_buf_joinpath(&alternates_path, objects_dir, alternate)) < 0)
585 586
				break;
			alternate = git_buf_cstr(&alternates_path);
587 588
		}

589
		if ((result = git_odb__add_default_backends(odb, alternate, true, alternate_depth + 1)) < 0)
590 591
			break;
	}
592

593
	git_buf_free(&alternates_path);
594 595
	git_buf_free(&alternates_buf);

596
	return result;
597
}
Ramsay Jones committed
598

599 600
int git_odb_add_disk_alternate(git_odb *odb, const char *path)
{
601
	return git_odb__add_default_backends(odb, path, true, 0);
602 603
}

604
int git_odb_open(git_odb **out, const char *objects_dir)
Ramsay Jones committed
605
{
606
	git_odb *db;
Ramsay Jones committed
607

608 609 610 611
	assert(out && objects_dir);

	*out = NULL;

612 613
	if (git_odb_new(&db) < 0)
		return -1;
Ramsay Jones committed
614

615
	if (git_odb__add_default_backends(db, objects_dir, 0, 0) < 0) {
616 617 618
		git_odb_free(db);
		return -1;
	}
Ramsay Jones committed
619

620
	*out = db;
621
	return 0;
622
}
Ramsay Jones committed
623

624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641
int git_odb__set_caps(git_odb *odb, int caps)
{
	if (caps == GIT_ODB_CAP_FROM_OWNER) {
		git_repository *repo = odb->rc.owner;
		int val;

		if (!repo) {
			giterr_set(GITERR_ODB, "cannot access repository to set odb caps");
			return -1;
		}

		if (!git_repository__cvar(&val, repo, GIT_CVAR_FSYNCOBJECTFILES))
			odb->do_fsync = !!val;
	}

	return 0;
}

642
static void odb_free(git_odb *db)
643
{
644
	size_t i;
645

646
	for (i = 0; i < db->backends.length; ++i) {
647 648
		backend_internal *internal = git_vector_get(&db->backends, i);
		git_odb_backend *backend = internal->backend;
649

650
		backend->free(backend);
651

652
		git__free(internal);
653 654
	}

655
	git_vector_free(&db->backends);
656
	git_cache_free(&db->own_cache);
657

658
	git__memzero(db, sizeof(*db));
659
	git__free(db);
660 661
}

662 663 664 665 666 667 668 669
void git_odb_free(git_odb *db)
{
	if (db == NULL)
		return;

	GIT_REFCOUNT_DEC(db, odb_free);
}

670 671 672 673
static int odb_exists_1(
	git_odb *db,
	const git_oid *id,
	bool only_refreshed)
674
{
675
	size_t i;
676
	bool found = false;
677

678
	for (i = 0; i < db->backends.length && !found; ++i) {
679 680
		backend_internal *internal = git_vector_get(&db->backends, i);
		git_odb_backend *b = internal->backend;
681

682 683 684
		if (only_refreshed && !b->refresh)
			continue;

685
		if (b->exists != NULL)
Linquize committed
686
			found = (bool)b->exists(b, id);
687 688
	}

689
	return (int)found;
690 691
}

692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715
static int odb_freshen_1(
	git_odb *db,
	const git_oid *id,
	bool only_refreshed)
{
	size_t i;
	bool found = false;

	for (i = 0; i < db->backends.length && !found; ++i) {
		backend_internal *internal = git_vector_get(&db->backends, i);
		git_odb_backend *b = internal->backend;

		if (only_refreshed && !b->refresh)
			continue;

		if (b->freshen != NULL)
			found = !b->freshen(b, id);
		else if (b->exists != NULL)
			found = b->exists(b, id);
	}

	return (int)found;
}

716
int git_odb__freshen(git_odb *db, const git_oid *id)
717 718 719 720 721 722 723 724 725 726 727 728 729
{
	assert(db && id);

	if (odb_freshen_1(db, id, false))
		return 1;

	if (!git_odb_refresh(db))
		return odb_freshen_1(db, id, true);

	/* Failed to refresh, hence not found */
	return 0;
}

730
int git_odb_exists(git_odb *db, const git_oid *id)
731
{
732
	git_odb_object *object;
733

734
	assert(db && id);
735

736 737
	if ((object = git_cache_get_raw(odb_cache(db), id)) != NULL) {
		git_odb_object_free(object);
738
		return 1;
739 740
	}

741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756
	if (odb_exists_1(db, id, false))
		return 1;

	if (!git_odb_refresh(db))
		return odb_exists_1(db, id, true);

	/* Failed to refresh, hence not found */
	return 0;
}

static int odb_exists_prefix_1(git_oid *out, git_odb *db,
	const git_oid *key, size_t len, bool only_refreshed)
{
	size_t i;
	int error = GIT_ENOTFOUND, num_found = 0;
	git_oid last_found = {{0}}, found;
757

758 759 760 761
	for (i = 0; i < db->backends.length; ++i) {
		backend_internal *internal = git_vector_get(&db->backends, i);
		git_odb_backend *b = internal->backend;

762 763 764
		if (only_refreshed && !b->refresh)
			continue;

765 766 767
		if (!b->exists_prefix)
			continue;

768
		error = b->exists_prefix(&found, b, key, len);
769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784
		if (error == GIT_ENOTFOUND || error == GIT_PASSTHROUGH)
			continue;
		if (error)
			return error;

		/* make sure found item doesn't introduce ambiguity */
		if (num_found) {
			if (git_oid__cmp(&last_found, &found))
				return git_odb__error_ambiguous("multiple matches for prefix");
		} else {
			git_oid_cpy(&last_found, &found);
			num_found++;
		}
	}

	if (!num_found)
785 786
		return GIT_ENOTFOUND;

787 788 789
	if (out)
		git_oid_cpy(out, &last_found);

790
	return 0;
791 792
}

793 794 795 796 797 798 799 800 801 802 803
int git_odb_exists_prefix(
	git_oid *out, git_odb *db, const git_oid *short_id, size_t len)
{
	int error;
	git_oid key = {{0}}; 

	assert(db && short_id);

	if (len < GIT_OID_MINPREFIXLEN)
		return git_odb__error_ambiguous("prefix length too short");

804
	if (len >= GIT_OID_HEXSZ) {
805 806 807 808 809
		if (git_odb_exists(db, short_id)) {
			if (out)
				git_oid_cpy(out, short_id);
			return 0;
		} else {
810 811
			return git_odb__error_notfound(
				"no match for id prefix", short_id, len);
812 813 814
		}
	}

815
	git_oid__cpy_prefix(&key, short_id, len);
816 817 818 819 820 821 822

	error = odb_exists_prefix_1(out, db, &key, len, false);

	if (error == GIT_ENOTFOUND && !git_odb_refresh(db))
		error = odb_exists_prefix_1(out, db, &key, len, true);

	if (error == GIT_ENOTFOUND)
823
		return git_odb__error_notfound("no match for id prefix", &key, len);
824 825 826 827

	return error;
}

828
int git_odb_expand_ids(
829
	git_odb *db,
830 831
	git_odb_expand_id *ids,
	size_t count)
832
{
833
	size_t i;
834

835
	assert(db && ids);
836

837 838
	for (i = 0; i < count; i++) {
		git_odb_expand_id *query = &ids[i];
839
		int error = GIT_EAMBIGUOUS;
840

841 842 843 844 845 846 847 848 849 850 851 852
		if (!query->type)
			query->type = GIT_OBJ_ANY;

		/* if we have a short OID, expand it first */
		if (query->length >= GIT_OID_MINPREFIXLEN && query->length < GIT_OID_HEXSZ) {
			git_oid actual_id;

			error = odb_exists_prefix_1(&actual_id, db, &query->id, query->length, false);
			if (!error) {
				git_oid_cpy(&query->id, &actual_id);
				query->length = GIT_OID_HEXSZ;
			}
853 854
		}

855
		/*
856 857
		 * now we ought to have a 40-char OID, either because we've expanded it
		 * or because the user passed a full OID. Ensure its type is right.
858
		 */
859 860
		if (query->length >= GIT_OID_HEXSZ) {
			git_otype actual_type;
861

862 863 864 865 866 867 868 869
			error = odb_otype_fast(&actual_type, db, &query->id);
			if (!error) {
				if (query->type != GIT_OBJ_ANY && query->type != actual_type)
					error = GIT_ENOTFOUND;
				else
					query->type = actual_type;
			}
		}
870

871
		switch (error) {
872
		/* no errors, so we've successfully expanded the OID */
873
		case 0:
874
			continue;
875 876 877 878

		/* the object is missing or ambiguous */
		case GIT_ENOTFOUND:
		case GIT_EAMBIGUOUS:
879 880 881
			memset(&query->id, 0, sizeof(git_oid));
			query->length = 0;
			query->type = 0;
882 883 884 885 886
			break;

		/* something went very wrong with the ODB; bail hard */
		default:
			return error;
887 888 889
		}
	}

890 891
	giterr_clear();
	return 0;
892 893
}

Vicent Marti committed
894
int git_odb_read_header(size_t *len_p, git_otype *type_p, git_odb *db, const git_oid *id)
895
{
896 897 898 899 900 901 902 903 904 905 906
	int error;
	git_odb_object *object;

	error = git_odb__read_header_or_object(&object, len_p, type_p, db, id);

	if (object)
		git_odb_object_free(object);

	return error;
}

907 908 909 910 911 912
static int odb_read_header_1(
	size_t *len_p, git_otype *type_p, git_odb *db,
	const git_oid *id, bool only_refreshed)
{
	size_t i;
	git_otype ht;
913 914
	bool passthrough = false;
	int error;
915 916 917 918 919 920 921

	if (!only_refreshed && (ht = odb_hardcoded_type(id)) != GIT_OBJ_BAD) {
		*type_p = ht;
		*len_p = 0;
		return 0;
	}

922
	for (i = 0; i < db->backends.length; ++i) {
923 924 925 926 927 928
		backend_internal *internal = git_vector_get(&db->backends, i);
		git_odb_backend *b = internal->backend;

		if (only_refreshed && !b->refresh)
			continue;

929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944
		if (!b->read_header) {
			passthrough = true;
			continue;
		}

		error = b->read_header(len_p, type_p, b, id);

		switch (error) {
		case GIT_PASSTHROUGH:
			passthrough = true;
			break;
		case GIT_ENOTFOUND:
			break;
		default:
			return error;
		}
945 946
	}

947
	return passthrough ? GIT_PASSTHROUGH : GIT_ENOTFOUND;
948 949
}

950 951 952 953
int git_odb__read_header_or_object(
	git_odb_object **out, size_t *len_p, git_otype *type_p,
	git_odb *db, const git_oid *id)
{
954
	int error = GIT_ENOTFOUND;
Vicent Marti committed
955
	git_odb_object *object;
956

957
	assert(db && id && out && len_p && type_p);
Vicent Marti committed
958

959
	if ((object = git_cache_get_raw(odb_cache(db), id)) != NULL) {
Vicent Marti committed
960 961
		*len_p = object->cached.size;
		*type_p = object->cached.type;
962
		*out = object;
963
		return 0;
Vicent Marti committed
964
	}
965

966
	*out = NULL;
967
	error = odb_read_header_1(len_p, type_p, db, id, false);
968

969 970
	if (error == GIT_ENOTFOUND && !git_odb_refresh(db))
		error = odb_read_header_1(len_p, type_p, db, id, true);
971

972 973
	if (error == GIT_ENOTFOUND)
		return git_odb__error_notfound("cannot read header for", id, GIT_OID_HEXSZ);
974

975 976
	/* we found the header; return early */
	if (!error)
977
		return 0;
Vicent Marti committed
978

979 980 981 982 983 984 985 986 987 988 989
	if (error == GIT_PASSTHROUGH) {
		/*
		 * no backend has header-reading functionality
		 * so try using `git_odb_read` instead
		 */
		error = git_odb_read(&object, db, id);
		if (!error) {
			*len_p = object->cached.size;
			*type_p = object->cached.type;
			*out = object;
		}
990
	}
991 992

	return error;
993 994
}

995 996
static int odb_read_1(git_odb_object **out, git_odb *db, const git_oid *id,
		bool only_refreshed)
997
{
998
	size_t i;
Vicent Marti committed
999
	git_rawobj raw;
1000
	git_odb_object *object;
1001
	bool found = false;
1002

1003
	if (!only_refreshed && odb_read_hardcoded(&raw, id) == 0)
1004
		found = true;
Vicent Marti committed
1005

1006
	for (i = 0; i < db->backends.length && !found; ++i) {
1007 1008
		backend_internal *internal = git_vector_get(&db->backends, i);
		git_odb_backend *b = internal->backend;
1009

1010 1011 1012
		if (only_refreshed && !b->refresh)
			continue;

1013
		if (b->read != NULL) {
1014 1015 1016 1017 1018 1019 1020 1021
			int error = b->read(&raw.data, &raw.len, &raw.type, b, id);
			if (error == GIT_PASSTHROUGH || error == GIT_ENOTFOUND)
				continue;

			if (error < 0)
				return error;

			found = true;
1022
		}
Vicent Marti committed
1023 1024
	}

1025 1026
	if (!found)
		return GIT_ENOTFOUND;
1027

1028
	giterr_clear();
1029 1030 1031 1032
	if ((object = odb_object__alloc(id, &raw)) == NULL)
		return -1;

	*out = git_cache_store_raw(odb_cache(db), object);
1033
	return 0;
1034 1035
}

1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051
int git_odb_read(git_odb_object **out, git_odb *db, const git_oid *id)
{
	int error;

	assert(out && db && id);

	*out = git_cache_get_raw(odb_cache(db), id);
	if (*out != NULL)
		return 0;

	error = odb_read_1(out, db, id, false);

	if (error == GIT_ENOTFOUND && !git_odb_refresh(db))
		error = odb_read_1(out, db, id, true);

	if (error == GIT_ENOTFOUND)
1052
		return git_odb__error_notfound("no match for id", id, GIT_OID_HEXSZ);
1053 1054 1055 1056

	return error;
}

1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079
static int odb_otype_fast(git_otype *type_p, git_odb *db, const git_oid *id)
{
	git_odb_object *object;
	size_t _unused;
	int error;

	if ((object = git_cache_get_raw(odb_cache(db), id)) != NULL) {
		*type_p = object->cached.type;
		return 0;
	}
	
	error = odb_read_header_1(&_unused, type_p, db, id, false);

	if (error == GIT_PASSTHROUGH) {
		error = odb_read_1(&object, db, id, false);
		if (!error)
			*type_p = object->cached.type;
		git_odb_object_free(object);
	}

	return error;
}

1080 1081
static int read_prefix_1(git_odb_object **out, git_odb *db,
		const git_oid *key, size_t len, bool only_refreshed)
1082
{
1083
	size_t i;
1084
	int error = GIT_ENOTFOUND;
1085
	git_oid found_full_oid = {{0}};
1086
	git_rawobj raw;
1087
	void *data = NULL;
1088
	bool found = false;
1089
	git_odb_object *object;
1090

1091
	for (i = 0; i < db->backends.length; ++i) {
1092 1093 1094
		backend_internal *internal = git_vector_get(&db->backends, i);
		git_odb_backend *b = internal->backend;

1095 1096 1097
		if (only_refreshed && !b->refresh)
			continue;

1098
		if (b->read_prefix != NULL) {
1099
			git_oid full_oid;
1100
			error = b->read_prefix(&full_oid, &raw.data, &raw.len, &raw.type, b, key, len);
1101
			if (error == GIT_ENOTFOUND || error == GIT_PASSTHROUGH)
1102 1103 1104
				continue;

			if (error)
1105
				return error;
1106

1107 1108
			git__free(data);
			data = raw.data;
Vicent Marti committed
1109

1110 1111
			if (found && git_oid__cmp(&full_oid, &found_full_oid)) {
				git__free(raw.data);
1112
				return git_odb__error_ambiguous("multiple matches for prefix");
1113
			}
Vicent Marti committed
1114

1115 1116
			found_full_oid = full_oid;
			found = true;
1117 1118 1119
		}
	}

1120
	if (!found)
1121
		return GIT_ENOTFOUND;
1122

1123 1124 1125 1126
	if ((object = odb_object__alloc(&found_full_oid, &raw)) == NULL)
		return -1;

	*out = git_cache_store_raw(odb_cache(db), object);
1127
	return 0;
1128 1129
}

1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149
int git_odb_read_prefix(
	git_odb_object **out, git_odb *db, const git_oid *short_id, size_t len)
{
	git_oid key = {{0}};
	int error;

	assert(out && db);

	if (len < GIT_OID_MINPREFIXLEN)
		return git_odb__error_ambiguous("prefix length too short");

	if (len > GIT_OID_HEXSZ)
		len = GIT_OID_HEXSZ;

	if (len == GIT_OID_HEXSZ) {
		*out = git_cache_get_raw(odb_cache(db), short_id);
		if (*out != NULL)
			return 0;
	}

1150
	git_oid__cpy_prefix(&key, short_id, len);
1151 1152 1153 1154 1155 1156 1157

	error = read_prefix_1(out, db, &key, len, false);

	if (error == GIT_ENOTFOUND && !git_odb_refresh(db))
		error = read_prefix_1(out, db, &key, len, true);

	if (error == GIT_ENOTFOUND)
1158
		return git_odb__error_notfound("no match for prefix", &key, len);
1159 1160 1161 1162

	return error;
}

Ben Straub committed
1163
int git_odb_foreach(git_odb *db, git_odb_foreach_cb cb, void *payload)
1164 1165 1166
{
	unsigned int i;
	backend_internal *internal;
1167

1168 1169
	git_vector_foreach(&db->backends, i, internal) {
		git_odb_backend *b = internal->backend;
Ben Straub committed
1170
		int error = b->foreach(b, cb, payload);
1171 1172
		if (error < 0)
			return error;
1173 1174 1175 1176 1177
	}

	return 0;
}

1178 1179
int git_odb_write(
	git_oid *oid, git_odb *db, const void *data, size_t len, git_otype type)
1180
{
1181
	size_t i;
1182
	int error = GIT_ERROR;
Vicent Marti committed
1183
	git_odb_stream *stream;
1184 1185 1186

	assert(oid && db);

1187
	git_odb_hash(oid, data, len, type);
1188
	if (git_odb__freshen(db, oid))
1189 1190
		return 0;

1191 1192 1193 1194 1195 1196 1197 1198 1199
	for (i = 0; i < db->backends.length && error < 0; ++i) {
		backend_internal *internal = git_vector_get(&db->backends, i);
		git_odb_backend *b = internal->backend;

		/* we don't write in alternates! */
		if (internal->is_alternate)
			continue;

		if (b->write != NULL)
1200
			error = b->write(b, oid, data, len, type);
1201 1202
	}

1203
	if (!error || error == GIT_PASSTHROUGH)
1204
		return 0;
Vicent Marti committed
1205

1206 1207 1208 1209
	/* if no backends were able to write the object directly, we try a
	 * streaming write to the backends; just write the whole object into the
	 * stream in one push
	 */
1210 1211
	if ((error = git_odb_open_wstream(&stream, db, len, type)) != 0)
		return error;
1212

1213 1214
	stream->write(stream, data, len);
	error = stream->finalize_write(stream, oid);
1215
	git_odb_stream_free(stream);
1216 1217

	return error;
1218 1219
}

1220
static void hash_header(git_hash_ctx *ctx, git_off_t size, git_otype type)
1221 1222 1223 1224 1225 1226 1227 1228
{
	char header[64];
	int hdrlen;

	hdrlen = git_odb__format_object_header(header, sizeof(header), size, type);
	git_hash_update(ctx, header, hdrlen);
}

1229
int git_odb_open_wstream(
1230
	git_odb_stream **stream, git_odb *db, git_off_t size, git_otype type)
1231
{
1232
	size_t i, writes = 0;
1233
	int error = GIT_ERROR;
1234
	git_hash_ctx *ctx = NULL;
1235

Vicent Marti committed
1236
	assert(stream && db);
1237

1238
	for (i = 0; i < db->backends.length && error < 0; ++i) {
1239 1240 1241 1242 1243 1244
		backend_internal *internal = git_vector_get(&db->backends, i);
		git_odb_backend *b = internal->backend;

		/* we don't write in alternates! */
		if (internal->is_alternate)
			continue;
1245

1246 1247
		if (b->writestream != NULL) {
			++writes;
Vicent Marti committed
1248
			error = b->writestream(stream, b, size, type);
1249 1250
		} else if (b->write != NULL) {
			++writes;
1251
			error = init_fake_wstream(stream, b, size, type);
1252
		}
Vicent Marti committed
1253 1254
	}

1255 1256 1257 1258 1259 1260 1261 1262
	if (error < 0) {
		if (error == GIT_PASSTHROUGH)
			error = 0;
		else if (!writes)
			error = git_odb__error_unsupported_in_backend("write object");

		goto done;
	}
Vicent Marti committed
1263

1264 1265 1266
	ctx = git__malloc(sizeof(git_hash_ctx));
	GITERR_CHECK_ALLOC(ctx);

1267 1268
	if ((error = git_hash_ctx_init(ctx)) < 0)
		goto done;
1269 1270 1271 1272

	hash_header(ctx, size, type);
	(*stream)->hash_ctx = ctx;

1273 1274 1275
	(*stream)->declared_size = size;
	(*stream)->received_bytes = 0;

1276
done:
1277
	return error;
Vicent Marti committed
1278 1279
}

1280 1281 1282 1283 1284
static int git_odb_stream__invalid_length(
	const git_odb_stream *stream,
	const char *action)
{
	giterr_set(GITERR_ODB,
1285
		"cannot %s - "
1286 1287 1288 1289 1290 1291 1292
		"Invalid length. %"PRIuZ" was expected. The "
		"total size of the received chunks amounts to %"PRIuZ".",
		action, stream->declared_size, stream->received_bytes);		

	return -1;
}

1293 1294
int git_odb_stream_write(git_odb_stream *stream, const char *buffer, size_t len)
{
1295
	git_hash_update(stream->hash_ctx, buffer, len);
1296 1297 1298 1299 1300 1301 1302

	stream->received_bytes += len;

	if (stream->received_bytes > stream->declared_size)
		return git_odb_stream__invalid_length(stream,
			"stream_write()");

1303 1304 1305 1306 1307
	return stream->write(stream, buffer, len);
}

int git_odb_stream_finalize_write(git_oid *out, git_odb_stream *stream)
{
1308 1309 1310 1311
	if (stream->received_bytes != stream->declared_size)
		return git_odb_stream__invalid_length(stream,
			"stream_finalize_write()");

1312
	git_hash_final(out, stream->hash_ctx);
1313

1314
	if (git_odb__freshen(stream->backend->odb, out))
1315 1316
		return 0;

1317
	return stream->finalize_write(stream, out);
1318 1319 1320 1321 1322 1323 1324 1325 1326
}

int git_odb_stream_read(git_odb_stream *stream, char *buffer, size_t len)
{
	return stream->read(stream, buffer, len);
}

void git_odb_stream_free(git_odb_stream *stream)
{
1327 1328 1329
	if (stream == NULL)
		return;

1330
	git_hash_ctx_cleanup(stream->hash_ctx);
1331
	git__free(stream->hash_ctx);
1332 1333 1334
	stream->free(stream);
}

1335
int git_odb_open_rstream(git_odb_stream **stream, git_odb *db, const git_oid *oid)
Vicent Marti committed
1336
{
1337
	size_t i, reads = 0;
Vicent Marti committed
1338 1339 1340 1341 1342 1343 1344 1345
	int error = GIT_ERROR;

	assert(stream && db);

	for (i = 0; i < db->backends.length && error < 0; ++i) {
		backend_internal *internal = git_vector_get(&db->backends, i);
		git_odb_backend *b = internal->backend;

1346 1347
		if (b->readstream != NULL) {
			++reads;
Vicent Marti committed
1348
			error = b->readstream(stream, b, oid);
1349
		}
1350 1351
	}

1352
	if (error == GIT_PASSTHROUGH)
1353
		error = 0;
1354 1355
	if (error < 0 && !reads)
		error = git_odb__error_unsupported_in_backend("read object streamed");
Vicent Marti committed
1356

1357 1358 1359
	return error;
}

1360
int git_odb_write_pack(struct git_odb_writepack **out, git_odb *db, git_transfer_progress_cb progress_cb, void *progress_payload)
1361
{
1362
	size_t i, writes = 0;
1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374
	int error = GIT_ERROR;

	assert(out && db);

	for (i = 0; i < db->backends.length && error < 0; ++i) {
		backend_internal *internal = git_vector_get(&db->backends, i);
		git_odb_backend *b = internal->backend;

		/* we don't write in alternates! */
		if (internal->is_alternate)
			continue;

1375 1376
		if (b->writepack != NULL) {
			++writes;
1377
			error = b->writepack(out, b, db, progress_cb, progress_payload);
1378
		}
1379 1380 1381 1382
	}

	if (error == GIT_PASSTHROUGH)
		error = 0;
1383 1384
	if (error < 0 && !writes)
		error = git_odb__error_unsupported_in_backend("write pack");
1385 1386 1387 1388

	return error;
}

Vicent Marti committed
1389
void *git_odb_backend_malloc(git_odb_backend *backend, size_t len)
1390
{
1391
	GIT_UNUSED(backend);
1392 1393 1394
	return git__malloc(len);
}

Vicent Marti committed
1395 1396
int git_odb_refresh(struct git_odb *db)
{
1397
	size_t i;
Vicent Marti committed
1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413
	assert(db);

	for (i = 0; i < db->backends.length; ++i) {
		backend_internal *internal = git_vector_get(&db->backends, i);
		git_odb_backend *b = internal->backend;

		if (b->refresh != NULL) {
			int error = b->refresh(b);
			if (error < 0)
				return error;
		}
	}

	return 0;
}

1414 1415
int git_odb__error_notfound(
	const char *message, const git_oid *oid, size_t oid_len)
1416
{
Russell Belfer committed
1417 1418
	if (oid != NULL) {
		char oid_str[GIT_OID_HEXSZ + 1];
1419
		git_oid_tostr(oid_str, oid_len+1, oid);
1420
		giterr_set(GITERR_ODB, "object not found - %s (%.*s)",
1421
			message, (int) oid_len, oid_str);
Russell Belfer committed
1422
	} else
1423
		giterr_set(GITERR_ODB, "object not found - %s", message);
Russell Belfer committed
1424

1425
	return GIT_ENOTFOUND;
1426 1427 1428 1429
}

int git_odb__error_ambiguous(const char *message)
{
1430
	giterr_set(GITERR_ODB, "ambiguous SHA1 prefix - %s", message);
1431
	return GIT_EAMBIGUOUS;
1432 1433
}

1434
int git_odb_init_backend(git_odb_backend *backend, unsigned int version)
1435
{
1436 1437 1438
	GIT_INIT_STRUCTURE_FROM_TEMPLATE(
		backend, version, git_odb_backend, GIT_ODB_BACKEND_INIT);
	return 0;
1439
}