odb_loose.c 28.3 KB
Newer Older
1
/*
Edward Thomson committed
2
 * Copyright (C) the libgit2 contributors. All rights reserved.
3
 *
Vicent Marti committed
4 5
 * This file is part of libgit2, distributed under the GNU GPL v2 with
 * a Linking Exception. For full terms see the included COPYING file.
6 7 8
 */

#include "common.h"
9

10
#include <zlib.h>
11
#include "git2/object.h"
12
#include "git2/sys/odb_backend.h"
13
#include "futils.h"
14 15
#include "hash.h"
#include "odb.h"
16
#include "delta.h"
Vicent Marti committed
17
#include "filebuf.h"
18
#include "object.h"
19
#include "zstream.h"
20

21
#include "git2/odb_backend.h"
Vicent Marti committed
22
#include "git2/types.h"
23

24
/* maximum possible header length */
25
#define MAX_HEADER_LEN 64
26

Vicent Marti committed
27
typedef struct { /* object header data */
28
	git_object_t type; /* object type */
Vicent Marti committed
29
	size_t	size; /* object size */
30 31
} obj_hdr;

Vicent Marti committed
32 33 34 35 36
typedef struct {
	git_odb_stream stream;
	git_filebuf fbuf;
} loose_writestream;

37 38 39
typedef struct {
	git_odb_stream stream;
	git_map map;
40
	char start[MAX_HEADER_LEN];
41
	size_t start_len;
42
	size_t start_read;
43 44 45
	git_zstream zstream;
} loose_readstream;

46 47 48 49 50
typedef struct loose_backend {
	git_odb_backend parent;

	int object_zlib_level; /** loose object zlib compression level. */
	int fsync_object_files; /** loose object file fsync flag. */
51 52
	mode_t object_file_mode;
	mode_t object_dir_mode;
53 54 55

	size_t objects_dirlen;
	char objects_dir[GIT_FLEX_ARRAY];
56 57
} loose_backend;

58 59 60 61 62 63
/* State structure for exploring directories,
 * in order to locate objects matching a short oid.
 */
typedef struct {
	size_t dir_len;
	unsigned char short_oid[GIT_OID_HEXSZ]; /* hex formatted oid to match */
64
	size_t short_oid_len;
65 66 67 68 69 70 71
	int found;				/* number of matching
						 * objects already found */
	unsigned char res_oid[GIT_OID_HEXSZ];	/* hex formatted oid of
						 * the object found */
} loose_locate_object_state;


72 73
/***********************************************************
 *
Will Stamper committed
74
 * MISCELLANEOUS HELPER FUNCTIONS
75 76 77
 *
 ***********************************************************/

78 79
static int object_file_name(
	git_buf *name, const loose_backend *be, const git_oid *id)
80
{
81 82
	size_t alloclen;

83
	/* expand length for object root + 40 hex sha1 chars + 2 * '/' + '\0' */
84 85
	GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, be->objects_dirlen, GIT_OID_HEXSZ);
	GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, alloclen, 3);
86
	if (git_buf_grow(name, alloclen) < 0)
87
		return -1;
88

89
	git_buf_set(name, be->objects_dir, be->objects_dirlen);
90
	git_path_to_dir(name);
91 92

	/* loose object filename: aa/aaa... (41 bytes) */
93
	git_oid_pathfmt(name->ptr + name->size, id);
94 95
	name->size += GIT_OID_HEXSZ + 1;
	name->ptr[name->size] = '\0';
96

97
	return 0;
98 99
}

100 101
static int object_mkdir(const git_buf *name, const loose_backend *be)
{
102
	return git_futils_mkdir_relative(
103
		name->ptr + be->objects_dirlen, be->objects_dir, be->object_dir_mode,
104
		GIT_MKDIR_PATH | GIT_MKDIR_SKIP_LAST | GIT_MKDIR_VERIFY_DIR, NULL);
105
}
106

107 108
static int parse_header_packlike(
	obj_hdr *out, size_t *out_len, const unsigned char *data, size_t len)
109
{
110
	unsigned long c;
111 112
	size_t shift, size, used = 0;

113
	if (len == 0)
114
		goto on_error;
115 116

	c = data[used++];
117
	out->type = (c >> 4) & 7;
118 119 120 121

	size = c & 15;
	shift = 4;
	while (c & 0x80) {
122
		if (len <= used)
123 124
			goto on_error;

125
		if (sizeof(size_t) * 8 <= shift)
126 127
			goto on_error;

128 129 130 131 132
		c = data[used++];
		size += (c & 0x7f) << shift;
		shift += 7;
	}

133
	out->size = size;
134 135 136 137

	if (out_len)
		*out_len = used;

138 139 140
	return 0;

on_error:
141
	git_error_set(GIT_ERROR_OBJECT, "failed to parse loose object: invalid header");
142
	return -1;
143 144
}

145 146 147 148 149
static int parse_header(
	obj_hdr *out,
       	size_t *out_len,
	const unsigned char *_data,
	size_t data_len)
150
{
151 152 153
	const char *data = (char *)_data;
	size_t i, typename_len, size_idx, size_len;
	int64_t size;
154

155 156 157 158 159 160
	*out_len = 0;

	/* find the object type name */
	for (i = 0, typename_len = 0; i < data_len; i++, typename_len++) {
		if (data[i] == ' ')
			break;
161 162
	}

163 164 165 166 167 168 169 170 171
	if (typename_len == data_len)
		goto on_error;

	out->type = git_object_stringn2type(data, typename_len);

	size_idx = typename_len + 1;
	for (i = size_idx, size_len = 0; i < data_len; i++, size_len++) {
		if (data[i] == '\0')
			break;
172 173
	}

174 175
	if (i == data_len)
		goto on_error;
176

177 178 179 180
	if (git__strntol64(&size, &data[size_idx], size_len, NULL, 10) < 0 ||
		size < 0)
		goto on_error;

181
	if ((uint64_t)size > SIZE_MAX) {
182
		git_error_set(GIT_ERROR_OBJECT, "object is larger than available memory");
183 184 185
		return -1;
	}

186
	out->size = (size_t)size;
187

188 189
	if (GIT_ADD_SIZET_OVERFLOW(out_len, i, 1))
		goto on_error;
190

191 192 193
	return 0;

on_error:
194
	git_error_set(GIT_ERROR_OBJECT, "failed to parse loose object: invalid header");
195 196
	return -1;
}
197

198
static int is_zlib_compressed_data(unsigned char *data, size_t data_len)
199
{
Vicent Marti committed
200
	unsigned int w;
201

202 203 204
	if (data_len < 2)
		return 0;

Vicent Marti committed
205
	w = ((unsigned int)(data[0]) << 8) + data[1];
206
	return (data[0] & 0x8F) == 0x08 && !(w % 31);
207 208
}

209 210 211 212 213 214 215 216
/***********************************************************
 *
 * ODB OBJECT READING & WRITING
 *
 * Backend for the public API; read headers and full objects
 * from the ODB. Write raw data to the ODB.
 *
 ***********************************************************/
217 218 219 220 221 222 223 224


/*
 * At one point, there was a loose object format that was intended to
 * mimic the format used in pack-files. This was to allow easy copying
 * of loose object data into packs. This format is no longer used, but
 * we must still read it.
 */
225
static int read_loose_packlike(git_rawobj *out, git_buf *obj)
226
{
227 228
	git_buf body = GIT_BUF_INIT;
	const unsigned char *obj_data;
229
	obj_hdr hdr;
230 231 232 233 234
	size_t obj_len, head_len, alloc_size;
	int error;

	obj_data = (unsigned char *)obj->ptr;
	obj_len = obj->size;
235 236 237 238 239

	/*
	 * read the object header, which is an (uncompressed)
	 * binary encoding of the object type and size.
	 */
240 241 242 243
	if ((error = parse_header_packlike(&hdr, &head_len, obj_data, obj_len)) < 0)
		goto done;

	if (!git_object_typeisloose(hdr.type) || head_len > obj_len) {
244
		git_error_set(GIT_ERROR_ODB, "failed to inflate loose object");
245 246
		error = -1;
		goto done;
247
	}
248

249 250 251
	obj_data += head_len;
	obj_len -= head_len;

252 253 254
	/*
	 * allocate a buffer and inflate the data into it
	 */
255 256 257 258
	if (GIT_ADD_SIZET_OVERFLOW(&alloc_size, hdr.size, 1) ||
		git_buf_init(&body, alloc_size) < 0) {
		error = -1;
		goto done;
259 260
	}

261 262 263
	if ((error = git_zstream_inflatebuf(&body, obj_data, obj_len)) < 0)
		goto done;

Vicent Marti committed
264
	out->len = hdr.size;
265
	out->type = hdr.type;
266
	out->data = git_buf_detach(&body);
267

268
done:
269
	git_buf_dispose(&body);
270
	return error;
271 272
}

273
static int read_loose_standard(git_rawobj *out, git_buf *obj)
274
{
275
	git_zstream zstream = GIT_ZSTREAM_INIT;
276
	unsigned char head[MAX_HEADER_LEN], *body = NULL;
277
	size_t decompressed, head_len, body_len, alloc_size;
278
	obj_hdr hdr;
279
	int error;
280

281 282 283 284 285 286
	if ((error = git_zstream_init(&zstream, GIT_ZSTREAM_INFLATE)) < 0 ||
		(error = git_zstream_set_input(&zstream, git_buf_cstr(obj), git_buf_len(obj))) < 0)
		goto done;

	decompressed = sizeof(head);

287
	/*
288 289 290 291
	 * inflate the initial part of the compressed buffer in order to
	 * parse the header; read the largest header possible, then push the
	 * remainder into the body buffer.
	 */
292 293 294 295 296
	if ((error = git_zstream_get_output(head, &decompressed, &zstream)) < 0 ||
		(error = parse_header(&hdr, &head_len, head, decompressed)) < 0)
		goto done;

	if (!git_object_typeisloose(hdr.type)) {
297
		git_error_set(GIT_ERROR_ODB, "failed to inflate disk object");
298 299
		error = -1;
		goto done;
300
	}
301 302 303 304 305

	/*
	 * allocate a buffer and inflate the object data into it
	 * (including the initial sequence in the head buffer).
	 */
306
	if (GIT_ADD_SIZET_OVERFLOW(&alloc_size, hdr.size, 1) ||
307
		(body = git__calloc(1, alloc_size)) == NULL) {
308 309 310
		error = -1;
		goto done;
	}
311

312
	GIT_ASSERT(decompressed >= head_len);
313 314 315 316 317 318 319 320 321 322
	body_len = decompressed - head_len;

	if (body_len)
		memcpy(body, head + head_len, body_len);

	decompressed = hdr.size - body_len;
	if ((error = git_zstream_get_output(body + body_len, &decompressed, &zstream)) < 0)
		goto done;

	if (!git_zstream_done(&zstream)) {
323
		git_error_set(GIT_ERROR_ZLIB, "failed to finish zlib inflation: stream aborted prematurely");
324 325 326 327 328 329 330
		error = -1;
		goto done;
	}

	body[hdr.size] = '\0';

	out->data = body;
Vicent Marti committed
331
	out->len = hdr.size;
332 333
	out->type = hdr.type;

334 335 336 337 338 339
done:
	if (error < 0)
		git__free(body);

	git_zstream_free(&zstream);
	return error;
340 341
}

342
static int read_loose(git_rawobj *out, git_buf *loc)
343 344
{
	int error;
345
	git_buf obj = GIT_BUF_INIT;
346

347 348
	GIT_ASSERT_ARG(out);
	GIT_ASSERT_ARG(loc);
349

350
	if (git_buf_oom(loc))
351
		return -1;
352

353
	out->data = NULL;
Vicent Marti committed
354
	out->len = 0;
355
	out->type = GIT_OBJECT_INVALID;
356

357 358
	if ((error = git_futils_readbuffer(&obj, loc->ptr)) < 0)
		goto done;
359

360
	if (!is_zlib_compressed_data((unsigned char *)obj.ptr, obj.size))
361 362 363
		error = read_loose_packlike(out, &obj);
	else
		error = read_loose_standard(out, &obj);
364

365
done:
366
	git_buf_dispose(&obj);
367
	return error;
368 369
}

370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387
static int read_header_loose_packlike(
	git_rawobj *out, const unsigned char *data, size_t len)
{
	obj_hdr hdr;
	size_t header_len;
	int error;

	if ((error = parse_header_packlike(&hdr, &header_len, data, len)) < 0)
		return error;

	out->len = hdr.size;
	out->type = hdr.type;

	return error;
}

static int read_header_loose_standard(
	git_rawobj *out, const unsigned char *data, size_t len)
388
{
389
	git_zstream zs = GIT_ZSTREAM_INIT;
390 391
	obj_hdr hdr = {0};
	unsigned char inflated[MAX_HEADER_LEN] = {0};
392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411
	size_t header_len, inflated_len = sizeof(inflated);
	int error;

	if ((error = git_zstream_init(&zs, GIT_ZSTREAM_INFLATE)) < 0 ||
		(error = git_zstream_set_input(&zs, data, len)) < 0 ||
		(error = git_zstream_get_output_chunk(inflated, &inflated_len, &zs)) < 0 ||
		(error = parse_header(&hdr, &header_len, inflated, inflated_len)) < 0)
		goto done;

	out->len = hdr.size;
	out->type = hdr.type;

done:
	git_zstream_free(&zs);
	return error;
}

static int read_header_loose(git_rawobj *out, git_buf *loc)
{
	unsigned char obj[1024];
412 413
	ssize_t obj_len;
	int fd, error;
414

415 416
	GIT_ASSERT_ARG(out);
	GIT_ASSERT_ARG(loc);
417

418
	if (git_buf_oom(loc))
419
		return -1;
420

421 422
	out->data = NULL;

423
	if ((error = fd = git_futils_open_ro(loc->ptr)) < 0)
424
		goto done;
425

426 427 428 429 430
	if ((obj_len = p_read(fd, obj, sizeof(obj))) < 0) {
		error = (int)obj_len;
		goto done;
	}

431
	if (!is_zlib_compressed_data(obj, (size_t)obj_len))
432 433 434
		error = read_header_loose_packlike(out, obj, (size_t)obj_len);
	else
		error = read_header_loose_standard(out, obj, (size_t)obj_len);
435

436
	if (!error && !git_object_typeisloose(out->type)) {
437
		git_error_set(GIT_ERROR_ZLIB, "failed to read loose object header");
438
		error = -1;
439
		goto done;
440 441
	}

442
done:
443 444
	if (fd >= 0)
		p_close(fd);
445
	return error;
446 447
}

448 449 450 451
static int locate_object(
	git_buf *object_location,
	loose_backend *backend,
	const git_oid *oid)
452
{
453
	int error = object_file_name(object_location, backend, oid);
454

455 456
	if (!error && !git_path_exists(object_location->ptr))
		return GIT_ENOTFOUND;
457 458

	return error;
459 460
}

461
/* Explore an entry of a directory and see if it matches a short oid */
462
static int fn_locate_object_short_oid(void *state, git_buf *pathbuf) {
463 464
	loose_locate_object_state *sstate = (loose_locate_object_state *)state;

nulltoken committed
465
	if (git_buf_len(pathbuf) - sstate->dir_len != GIT_OID_HEXSZ - 2) {
466
		/* Entry cannot be an object. Continue to next entry */
467
		return 0;
468 469
	}

470
	if (git_path_isdir(pathbuf->ptr) == false) {
471 472 473
		/* We are already in the directory matching the 2 first hex characters,
		 * compare the first ncmp characters of the oids */
		if (!memcmp(sstate->short_oid + 2,
474
			(unsigned char *)pathbuf->ptr + sstate->dir_len,
475 476
			sstate->short_oid_len - 2)) {

477 478 479
			if (!sstate->found) {
				sstate->res_oid[0] = sstate->short_oid[0];
				sstate->res_oid[1] = sstate->short_oid[1];
480
				memcpy(sstate->res_oid+2, pathbuf->ptr+sstate->dir_len, GIT_OID_HEXSZ-2);
481 482 483 484
			}
			sstate->found++;
		}
	}
485

Vicent Marti committed
486
	if (sstate->found > 1)
487
		return GIT_EAMBIGUOUS;
Vicent Marti committed
488

489
	return 0;
490 491 492
}

/* Locate an object matching a given short oid */
493 494 495 496 497
static int locate_object_short_oid(
	git_buf *object_location,
	git_oid *res_oid,
	loose_backend *backend,
	const git_oid *short_oid,
498
	size_t len)
499 500
{
	char *objects_dir = backend->objects_dir;
501
	size_t dir_len = strlen(objects_dir), alloc_len;
502 503 504
	loose_locate_object_state state;
	int error;

505
	/* prealloc memory for OBJ_DIR/xx/xx..38x..xx */
506 507
	GIT_ERROR_CHECK_ALLOC_ADD(&alloc_len, dir_len, GIT_OID_HEXSZ);
	GIT_ERROR_CHECK_ALLOC_ADD(&alloc_len, alloc_len, 3);
508
	if (git_buf_grow(object_location, alloc_len) < 0)
509
		return -1;
510

511
	git_buf_set(object_location, objects_dir, dir_len);
512
	git_path_to_dir(object_location);
513

514
	/* save adjusted position at end of dir so it can be restored later */
nulltoken committed
515
	dir_len = git_buf_len(object_location);
516 517

	/* Convert raw oid to hex formatted oid */
Vicent Marti committed
518
	git_oid_fmt((char *)state.short_oid, short_oid);
519

520
	/* Explore OBJ_DIR/xx/ where xx is the beginning of hex formatted short oid */
521
	if (git_buf_put(object_location, (char *)state.short_oid, 3) < 0)
522
		return -1;
523
	object_location->ptr[object_location->size - 1] = '/';
524 525

	/* Check that directory exists */
526
	if (git_path_isdir(object_location->ptr) == false)
527 528
		return git_odb__error_notfound("no matching loose object for prefix",
			short_oid, len);
529

nulltoken committed
530
	state.dir_len = git_buf_len(object_location);
531 532
	state.short_oid_len = len;
	state.found = 0;
533

534
	/* Explore directory to find a unique object matching short_oid */
535
	error = git_path_direach(
536
		object_location, 0, fn_locate_object_short_oid, &state);
537
	if (error < 0 && error != GIT_EAMBIGUOUS)
538
		return error;
539

540
	if (!state.found)
541 542
		return git_odb__error_notfound("no matching loose object for prefix",
			short_oid, len);
543

544 545 546
	if (state.found > 1)
		return git_odb__error_ambiguous("multiple matches in loose objects");

547
	/* Convert obtained hex formatted oid to raw */
Vicent Marti committed
548
	error = git_oid_fromstr(res_oid, (char *)state.res_oid);
549 550
	if (error)
		return error;
551 552

	/* Update the location according to the oid obtained */
553 554
	GIT_ERROR_CHECK_ALLOC_ADD(&alloc_len, dir_len, GIT_OID_HEXSZ);
	GIT_ERROR_CHECK_ALLOC_ADD(&alloc_len, alloc_len, 2);
555 556

	git_buf_truncate(object_location, dir_len);
557
	if (git_buf_grow(object_location, alloc_len) < 0)
558
		return -1;
559 560 561 562 563

	git_oid_pathfmt(object_location->ptr + dir_len, res_oid);

	object_location->size += GIT_OID_HEXSZ + 1;
	object_location->ptr[object_location->size] = '\0';
564

565
	return 0;
566 567
}

568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583








/***********************************************************
 *
 * LOOSE BACKEND PUBLIC API
 *
 * Implement the git_odb_backend API calls
 *
 ***********************************************************/

584
static int loose_backend__read_header(size_t *len_p, git_object_t *type_p, git_odb_backend *backend, const git_oid *oid)
585
{
586
	git_buf object_path = GIT_BUF_INIT;
Vicent Marti committed
587
	git_rawobj raw;
588
	int error;
589

590 591
	GIT_ASSERT_ARG(backend);
	GIT_ASSERT_ARG(oid);
592

Vicent Marti committed
593
	raw.len = 0;
594
	raw.type = GIT_OBJECT_INVALID;
Vicent Marti committed
595

596 597 598 599
	if (locate_object(&object_path, (loose_backend *)backend, oid) < 0) {
		error = git_odb__error_notfound("no matching loose object",
			oid, GIT_OID_HEXSZ);
	} else if ((error = read_header_loose(&raw, &object_path)) == 0) {
600 601 602
		*len_p = raw.len;
		*type_p = raw.type;
	}
603

604
	git_buf_dispose(&object_path);
605

606
	return error;
Vicent Marti committed
607
}
608

609
static int loose_backend__read(void **buffer_p, size_t *len_p, git_object_t *type_p, git_odb_backend *backend, const git_oid *oid)
610
{
611
	git_buf object_path = GIT_BUF_INIT;
Vicent Marti committed
612
	git_rawobj raw;
613
	int error = 0;
614

615 616
	GIT_ASSERT_ARG(backend);
	GIT_ASSERT_ARG(oid);
617

618 619 620 621
	if (locate_object(&object_path, (loose_backend *)backend, oid) < 0) {
		error = git_odb__error_notfound("no matching loose object",
			oid, GIT_OID_HEXSZ);
	} else if ((error = read_loose(&raw, &object_path)) == 0) {
622 623 624 625
		*buffer_p = raw.data;
		*len_p = raw.len;
		*type_p = raw.type;
	}
Vicent Marti committed
626

627
	git_buf_dispose(&object_path);
Vicent Marti committed
628

629
	return error;
630 631
}

632
static int loose_backend__read_prefix(
Vicent Marti committed
633 634 635
	git_oid *out_oid,
	void **buffer_p,
	size_t *len_p,
636
	git_object_t *type_p,
Vicent Marti committed
637 638
	git_odb_backend *backend,
	const git_oid *short_oid,
639
	size_t len)
640
{
641
	int error = 0;
642

643
	GIT_ASSERT_ARG(len >= GIT_OID_MINPREFIXLEN && len <= GIT_OID_HEXSZ);
644

645
	if (len == GIT_OID_HEXSZ) {
646
		/* We can fall back to regular read method */
647
		error = loose_backend__read(buffer_p, len_p, type_p, backend, short_oid);
648
		if (!error)
649
			git_oid_cpy(out_oid, short_oid);
650
	} else {
651
		git_buf object_path = GIT_BUF_INIT;
652 653
		git_rawobj raw;

654
		GIT_ASSERT_ARG(backend && short_oid);
655

656
		if ((error = locate_object_short_oid(&object_path, out_oid,
657 658 659
				(loose_backend *)backend, short_oid, len)) == 0 &&
			(error = read_loose(&raw, &object_path)) == 0)
		{
660 661 662
			*buffer_p = raw.data;
			*len_p = raw.len;
			*type_p = raw.type;
663 664
		}

665
		git_buf_dispose(&object_path);
666
	}
667

668
	return error;
669 670
}

671
static int loose_backend__exists(git_odb_backend *backend, const git_oid *oid)
672
{
673 674
	git_buf object_path = GIT_BUF_INIT;
	int error;
675

676 677
	GIT_ASSERT_ARG(backend);
	GIT_ASSERT_ARG(oid);
678

679 680
	error = locate_object(&object_path, (loose_backend *)backend, oid);

681
	git_buf_dispose(&object_path);
682

683
	return !error;
684 685
}

686 687 688 689 690 691
static int loose_backend__exists_prefix(
	git_oid *out, git_odb_backend *backend, const git_oid *short_id, size_t len)
{
	git_buf object_path = GIT_BUF_INIT;
	int error;

692 693 694 695
	GIT_ASSERT_ARG(backend);
	GIT_ASSERT_ARG(out);
	GIT_ASSERT_ARG(short_id);
	GIT_ASSERT_ARG(len >= GIT_OID_MINPREFIXLEN);
696 697 698 699

	error = locate_object_short_oid(
		&object_path, out, (loose_backend *)backend, short_id, len);

700
	git_buf_dispose(&object_path);
701 702 703 704

	return error;
}

705 706
struct foreach_state {
	size_t dir_len;
707
	git_odb_foreach_cb cb;
708 709 710
	void *data;
};

711
GIT_INLINE(int) filename_to_oid(git_oid *oid, const char *ptr)
712 713
{
	int v, i = 0;
714
	if (strlen(ptr) != GIT_OID_HEXSZ+1)
715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746
		return -1;

	if (ptr[2] != '/') {
		return -1;
	}

	v = (git__fromhex(ptr[i]) << 4) | git__fromhex(ptr[i+1]);
	if (v < 0)
		return -1;

	oid->id[0] = (unsigned char) v;

	ptr += 3;
	for (i = 0; i < 38; i += 2) {
		v = (git__fromhex(ptr[i]) << 4) | git__fromhex(ptr[i + 1]);
		if (v < 0)
			return -1;

		oid->id[1 + i/2] = (unsigned char) v;
	}

	return 0;
}

static int foreach_object_dir_cb(void *_state, git_buf *path)
{
	git_oid oid;
	struct foreach_state *state = (struct foreach_state *) _state;

	if (filename_to_oid(&oid, path->ptr + state->dir_len) < 0)
		return 0;

747
	return git_error_set_after_callback_function(
748
		state->cb(&oid, state->data), "git_odb_foreach");
749 750 751 752 753 754
}

static int foreach_cb(void *_state, git_buf *path)
{
	struct foreach_state *state = (struct foreach_state *) _state;

755 756 757 758
	/* non-dir is some stray file, ignore it */
	if (!git_path_isdir(git_buf_cstr(path)))
		return 0;

759
	return git_path_direach(path, 0, foreach_object_dir_cb, state);
760 761
}

762
static int loose_backend__foreach(git_odb_backend *_backend, git_odb_foreach_cb cb, void *data)
763 764 765 766 767 768 769
{
	char *objects_dir;
	int error;
	git_buf buf = GIT_BUF_INIT;
	struct foreach_state state;
	loose_backend *backend = (loose_backend *) _backend;

770 771
	GIT_ASSERT_ARG(backend);
	GIT_ASSERT_ARG(cb);
772 773 774

	objects_dir = backend->objects_dir;

775
	git_buf_sets(&buf, objects_dir);
776
	git_path_to_dir(&buf);
777 778
	if (git_buf_oom(&buf))
		return -1;
779

780
	memset(&state, 0, sizeof(state));
781 782 783 784
	state.cb = cb;
	state.data = data;
	state.dir_len = git_buf_len(&buf);

785
	error = git_path_direach(&buf, 0, foreach_cb, &state);
786

787
	git_buf_dispose(&buf);
788

789
	return error;
790 791
}

792
static int loose_backend__writestream_finalize(git_odb_stream *_stream, const git_oid *oid)
Vicent Marti committed
793 794 795
{
	loose_writestream *stream = (loose_writestream *)_stream;
	loose_backend *backend = (loose_backend *)_stream->backend;
796
	git_buf final_path = GIT_BUF_INIT;
797
	int error = 0;
Vicent Marti committed
798

799
	if (object_file_name(&final_path, backend, oid) < 0 ||
800
		object_mkdir(&final_path, backend) < 0)
801 802 803
		error = -1;
	else
		error = git_filebuf_commit_at(
804
			&stream->fbuf, final_path.ptr);
805

806
	git_buf_dispose(&final_path);
807 808

	return error;
Vicent Marti committed
809 810
}

811
static int loose_backend__writestream_write(git_odb_stream *_stream, const char *data, size_t len)
812
{
Vicent Marti committed
813 814 815 816
	loose_writestream *stream = (loose_writestream *)_stream;
	return git_filebuf_write(&stream->fbuf, data, len);
}

817
static void loose_backend__writestream_free(git_odb_stream *_stream)
Vicent Marti committed
818 819 820
{
	loose_writestream *stream = (loose_writestream *)_stream;

821
	git_filebuf_cleanup(&stream->fbuf);
822
	git__free(stream);
Vicent Marti committed
823 824
}

825 826 827 828 829
static int filebuf_flags(loose_backend *backend)
{
	int flags = GIT_FILEBUF_TEMPORARY |
		(backend->object_zlib_level << GIT_FILEBUF_DEFLATE_SHIFT);

830
	if (backend->fsync_object_files || git_repository__fsync_gitdir)
831 832 833 834 835
		flags |= GIT_FILEBUF_FSYNC;

	return flags;
}

836
static int loose_backend__writestream(git_odb_stream **stream_out, git_odb_backend *_backend, git_object_size_t length, git_object_t type)
Vicent Marti committed
837 838
{
	loose_backend *backend;
839
	loose_writestream *stream = NULL;
840
	char hdr[MAX_HEADER_LEN];
841
	git_buf tmp_path = GIT_BUF_INIT;
842 843
	size_t hdrlen;
	int error;
844

845
	GIT_ASSERT_ARG(_backend);
846 847

	backend = (loose_backend *)_backend;
Vicent Marti committed
848
	*stream_out = NULL;
849

850 851 852
	if ((error = git_odb__format_object_header(&hdrlen,
		hdr, sizeof(hdr), length, type)) < 0)
		return error;
Vicent Marti committed
853 854

	stream = git__calloc(1, sizeof(loose_writestream));
855
	GIT_ERROR_CHECK_ALLOC(stream);
856

Vicent Marti committed
857 858
	stream->stream.backend = _backend;
	stream->stream.read = NULL; /* read only */
859 860 861
	stream->stream.write = &loose_backend__writestream_write;
	stream->stream.finalize_write = &loose_backend__writestream_finalize;
	stream->stream.free = &loose_backend__writestream_free;
Vicent Marti committed
862
	stream->stream.mode = GIT_STREAM_WRONLY;
863

864
	if (git_buf_joinpath(&tmp_path, backend->objects_dir, "tmp_object") < 0 ||
865
		git_filebuf_open(&stream->fbuf, tmp_path.ptr, filebuf_flags(backend),
866
			backend->object_file_mode) < 0 ||
867 868 869 870 871 872
		stream->stream.write((git_odb_stream *)stream, hdr, hdrlen) < 0)
	{
		git_filebuf_cleanup(&stream->fbuf);
		git__free(stream);
		stream = NULL;
	}
873
	git_buf_dispose(&tmp_path);
Vicent Marti committed
874
	*stream_out = (git_odb_stream *)stream;
875

876
	return !stream ? -1 : 0;
877 878
}

879 880 881
static int loose_backend__readstream_read(
	git_odb_stream *_stream,
	char *buffer,
882
	size_t buffer_len)
883 884
{
	loose_readstream *stream = (loose_readstream *)_stream;
885
	size_t start_remain = stream->start_len - stream->start_read;
886 887
	int total = 0, error;

888 889
	buffer_len = min(buffer_len, INT_MAX);

890 891 892 893
	/*
	 * if we read more than just the header in the initial read, play
	 * that back for the caller.
	 */
894 895 896
	if (start_remain && buffer_len) {
		size_t chunk = min(start_remain, buffer_len);
		memcpy(buffer, stream->start + stream->start_read, chunk);
897 898

		buffer += chunk;
899
		stream->start_read += chunk;
900

901
		total += (int)chunk;
902
		buffer_len -= chunk;
903 904
	}

905
	if (buffer_len) {
906
		size_t chunk = buffer_len;
907 908 909 910

		if ((error = git_zstream_get_output(buffer, &chunk, &stream->zstream)) < 0)
			return error;

911
		total += (int)chunk;
912 913
	}

914
	return (int)total;
915 916 917 918 919 920 921 922 923 924 925
}

static void loose_backend__readstream_free(git_odb_stream *_stream)
{
	loose_readstream *stream = (loose_readstream *)_stream;

	git_futils_mmap_free(&stream->map);
	git_zstream_free(&stream->zstream);
	git__free(stream);
}

926 927 928 929
static int loose_backend__readstream_packlike(
	obj_hdr *hdr,
	loose_readstream *stream)
{
930 931
	const unsigned char *data;
	size_t data_len, head_len;
932 933
	int error;

934 935 936
	data = stream->map.data;
	data_len = stream->map.len;

937 938 939 940
	/*
	 * read the object header, which is an (uncompressed)
	 * binary encoding of the object type and size.
	 */
941 942 943 944
	if ((error = parse_header_packlike(hdr, &head_len, data, data_len)) < 0)
		return error;

	if (!git_object_typeisloose(hdr->type)) {
945
		git_error_set(GIT_ERROR_ODB, "failed to inflate loose object");
946 947 948
		return -1;
	}

949 950
	return git_zstream_set_input(&stream->zstream,
		data + head_len, data_len - head_len);
951 952 953 954 955 956
}

static int loose_backend__readstream_standard(
	obj_hdr *hdr,
	loose_readstream *stream)
{
957
	unsigned char head[MAX_HEADER_LEN];
958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976
	size_t init, head_len;
	int error;

	if ((error = git_zstream_set_input(&stream->zstream,
			stream->map.data, stream->map.len)) < 0)
		return error;

	init = sizeof(head);

	/*
	 * inflate the initial part of the compressed buffer in order to
	 * parse the header; read the largest header possible, then store
	 * it in the `start` field of the stream object.
	 */
	if ((error = git_zstream_get_output(head, &init, &stream->zstream)) < 0 ||
		(error = parse_header(hdr, &head_len, head, init)) < 0)
		return error;

	if (!git_object_typeisloose(hdr->type)) {
977
		git_error_set(GIT_ERROR_ODB, "failed to inflate disk object");
978 979 980 981 982 983 984 985 986 987 988
		return -1;
	}

	if (init > head_len) {
		stream->start_len = init - head_len;
		memcpy(stream->start, head + head_len, init - head_len);
	}

	return 0;
}

989 990 991
static int loose_backend__readstream(
	git_odb_stream **stream_out,
	size_t *len_out,
992
	git_object_t *type_out,
993 994 995 996 997 998 999 1000 1001 1002
	git_odb_backend *_backend,
	const git_oid *oid)
{
	loose_backend *backend;
	loose_readstream *stream = NULL;
	git_hash_ctx *hash_ctx = NULL;
	git_buf object_path = GIT_BUF_INIT;
	obj_hdr hdr;
	int error = 0;

1003 1004 1005 1006 1007
	GIT_ASSERT_ARG(stream_out);
	GIT_ASSERT_ARG(len_out);
	GIT_ASSERT_ARG(type_out);
	GIT_ASSERT_ARG(_backend);
	GIT_ASSERT_ARG(oid);
1008 1009 1010 1011

	backend = (loose_backend *)_backend;
	*stream_out = NULL;
	*len_out = 0;
1012
	*type_out = GIT_OBJECT_INVALID;
1013 1014 1015 1016 1017 1018 1019 1020

	if (locate_object(&object_path, backend, oid) < 0) {
		error = git_odb__error_notfound("no matching loose object",
			oid, GIT_OID_HEXSZ);
		goto done;
	}

	stream = git__calloc(1, sizeof(loose_readstream));
1021
	GIT_ERROR_CHECK_ALLOC(stream);
1022 1023

	hash_ctx = git__malloc(sizeof(git_hash_ctx));
1024
	GIT_ERROR_CHECK_ALLOC(hash_ctx);
1025 1026 1027

	if ((error = git_hash_ctx_init(hash_ctx)) < 0 ||
		(error = git_futils_mmap_ro_file(&stream->map, object_path.ptr)) < 0 ||
1028
		(error = git_zstream_init(&stream->zstream, GIT_ZSTREAM_INFLATE)) < 0)
1029 1030
		goto done;

1031
	/* check for a packlike loose object */
1032
	if (!is_zlib_compressed_data(stream->map.data, stream->map.len))
1033 1034 1035
		error = loose_backend__readstream_packlike(&hdr, stream);
	else
		error = loose_backend__readstream_standard(&hdr, stream);
1036

1037
	if (error < 0)
1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050
		goto done;

	stream->stream.backend = _backend;
	stream->stream.hash_ctx = hash_ctx;
	stream->stream.read = &loose_backend__readstream_read;
	stream->stream.free = &loose_backend__readstream_free;

	*stream_out = (git_odb_stream *)stream;
	*len_out = hdr.size;
	*type_out = hdr.type;

done:
	if (error < 0) {
Joe Rabinoff committed
1051 1052 1053 1054 1055 1056 1057 1058 1059
		if (stream) {
			git_futils_mmap_free(&stream->map);
			git_zstream_free(&stream->zstream);
			git__free(stream);
		}
		if (hash_ctx) {
			git_hash_ctx_cleanup(hash_ctx);
			git__free(hash_ctx);
		}
1060 1061
	}

1062
	git_buf_dispose(&object_path);
1063 1064 1065
	return error;
}

1066
static int loose_backend__write(git_odb_backend *_backend, const git_oid *oid, const void *data, size_t len, git_object_t type)
1067
{
1068
	int error = 0;
1069
	git_buf final_path = GIT_BUF_INIT;
1070
	char header[MAX_HEADER_LEN];
1071
	size_t header_len;
1072
	git_filebuf fbuf = GIT_FILEBUF_INIT;
1073 1074 1075 1076 1077
	loose_backend *backend;

	backend = (loose_backend *)_backend;

	/* prepare the header for the file */
1078 1079 1080
	if ((error = git_odb__format_object_header(&header_len,
		header, sizeof(header), len, type)) < 0)
		goto cleanup;
1081

1082
	if (git_buf_joinpath(&final_path, backend->objects_dir, "tmp_object") < 0 ||
1083
		git_filebuf_open(&fbuf, final_path.ptr, filebuf_flags(backend),
1084
			backend->object_file_mode) < 0)
1085 1086
	{
		error = -1;
1087
		goto cleanup;
1088
	}
1089 1090 1091 1092

	git_filebuf_write(&fbuf, header, header_len);
	git_filebuf_write(&fbuf, data, len);

1093 1094
	if (object_file_name(&final_path, backend, oid) < 0 ||
		object_mkdir(&final_path, backend) < 0 ||
1095
		git_filebuf_commit_at(&fbuf, final_path.ptr) < 0)
1096
		error = -1;
1097 1098

cleanup:
1099
	if (error < 0)
1100
		git_filebuf_cleanup(&fbuf);
1101
	git_buf_dispose(&final_path);
1102 1103 1104
	return error;
}

1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115
static int loose_backend__freshen(
	git_odb_backend *_backend,
	const git_oid *oid)
{
	loose_backend *backend = (loose_backend *)_backend;
	git_buf path = GIT_BUF_INIT;
	int error;

	if (object_file_name(&path, backend, oid) < 0)
		return -1;

1116
	error = git_futils_touch(path.ptr, NULL);
1117
	git_buf_dispose(&path);
1118 1119 1120 1121

	return error;
}

1122
static void loose_backend__free(git_odb_backend *_backend)
1123
{
1124
	git__free(_backend);
1125 1126
}

1127 1128 1129 1130
int git_odb_backend_loose(
	git_odb_backend **backend_out,
	const char *objects_dir,
	int compression_level,
1131
	int do_fsync,
1132 1133
	unsigned int dir_mode,
	unsigned int file_mode)
1134 1135
{
	loose_backend *backend;
1136
	size_t objects_dirlen, alloclen;
1137

1138 1139
	GIT_ASSERT_ARG(backend_out);
	GIT_ASSERT_ARG(objects_dir);
1140 1141

	objects_dirlen = strlen(objects_dir);
1142

1143 1144
	GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, sizeof(loose_backend), objects_dirlen);
	GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, alloclen, 2);
1145
	backend = git__calloc(1, alloclen);
1146
	GIT_ERROR_CHECK_ALLOC(backend);
1147

1148
	backend->parent.version = GIT_ODB_BACKEND_VERSION;
1149 1150 1151 1152
	backend->objects_dirlen = objects_dirlen;
	memcpy(backend->objects_dir, objects_dir, objects_dirlen);
	if (backend->objects_dir[backend->objects_dirlen - 1] != '/')
		backend->objects_dir[backend->objects_dirlen++] = '/';
1153

1154 1155 1156
	if (compression_level < 0)
		compression_level = Z_BEST_SPEED;

1157 1158 1159 1160 1161 1162
	if (dir_mode == 0)
		dir_mode = GIT_OBJECT_DIR_MODE;

	if (file_mode == 0)
		file_mode = GIT_OBJECT_FILE_MODE;

1163 1164
	backend->object_zlib_level = compression_level;
	backend->fsync_object_files = do_fsync;
1165 1166
	backend->object_dir_mode = dir_mode;
	backend->object_file_mode = file_mode;
1167 1168

	backend->parent.read = &loose_backend__read;
1169
	backend->parent.write = &loose_backend__write;
Vicent Marti committed
1170
	backend->parent.read_prefix = &loose_backend__read_prefix;
1171
	backend->parent.read_header = &loose_backend__read_header;
1172
	backend->parent.writestream = &loose_backend__writestream;
1173
	backend->parent.readstream = &loose_backend__readstream;
1174
	backend->parent.exists = &loose_backend__exists;
1175
	backend->parent.exists_prefix = &loose_backend__exists_prefix;
1176
	backend->parent.foreach = &loose_backend__foreach;
1177
	backend->parent.freshen = &loose_backend__freshen;
1178 1179 1180
	backend->parent.free = &loose_backend__free;

	*backend_out = (git_odb_backend *)backend;
1181
	return 0;
1182
}