odb_loose.c 27.8 KB
Newer Older
1
/*
Edward Thomson committed
2
 * Copyright (C) the libgit2 contributors. All rights reserved.
3
 *
Vicent Marti committed
4 5
 * This file is part of libgit2, distributed under the GNU GPL v2 with
 * a Linking Exception. For full terms see the included COPYING file.
6 7 8
 */

#include "common.h"
9

10
#include <zlib.h>
11
#include "git2/object.h"
12
#include "git2/sys/odb_backend.h"
13 14 15
#include "fileops.h"
#include "hash.h"
#include "odb.h"
16
#include "delta.h"
Vicent Marti committed
17
#include "filebuf.h"
18
#include "object.h"
19
#include "zstream.h"
20

21
#include "git2/odb_backend.h"
Vicent Marti committed
22
#include "git2/types.h"
23

24
/* maximum possible header length */
25
#define MAX_HEADER_LEN 64
26

Vicent Marti committed
27 28 29
typedef struct { /* object header data */
	git_otype type; /* object type */
	size_t	size; /* object size */
30 31
} obj_hdr;

Vicent Marti committed
32 33 34 35 36
typedef struct {
	git_odb_stream stream;
	git_filebuf fbuf;
} loose_writestream;

37 38 39
typedef struct {
	git_odb_stream stream;
	git_map map;
40
	char start[MAX_HEADER_LEN];
41
	size_t start_len;
42
	size_t start_read;
43 44 45
	git_zstream zstream;
} loose_readstream;

46 47 48 49 50
typedef struct loose_backend {
	git_odb_backend parent;

	int object_zlib_level; /** loose object zlib compression level. */
	int fsync_object_files; /** loose object file fsync flag. */
51 52
	mode_t object_file_mode;
	mode_t object_dir_mode;
53 54 55

	size_t objects_dirlen;
	char objects_dir[GIT_FLEX_ARRAY];
56 57
} loose_backend;

58 59 60 61 62 63
/* State structure for exploring directories,
 * in order to locate objects matching a short oid.
 */
typedef struct {
	size_t dir_len;
	unsigned char short_oid[GIT_OID_HEXSZ]; /* hex formatted oid to match */
64
	size_t short_oid_len;
65 66 67 68 69 70 71
	int found;				/* number of matching
						 * objects already found */
	unsigned char res_oid[GIT_OID_HEXSZ];	/* hex formatted oid of
						 * the object found */
} loose_locate_object_state;


72 73
/***********************************************************
 *
Will Stamper committed
74
 * MISCELLANEOUS HELPER FUNCTIONS
75 76 77
 *
 ***********************************************************/

78 79
static int object_file_name(
	git_buf *name, const loose_backend *be, const git_oid *id)
80
{
81 82
	size_t alloclen;

83
	/* expand length for object root + 40 hex sha1 chars + 2 * '/' + '\0' */
84 85 86
	GITERR_CHECK_ALLOC_ADD(&alloclen, be->objects_dirlen, GIT_OID_HEXSZ);
	GITERR_CHECK_ALLOC_ADD(&alloclen, alloclen, 3);
	if (git_buf_grow(name, alloclen) < 0)
87
		return -1;
88

89
	git_buf_set(name, be->objects_dir, be->objects_dirlen);
90
	git_path_to_dir(name);
91 92

	/* loose object filename: aa/aaa... (41 bytes) */
93
	git_oid_pathfmt(name->ptr + name->size, id);
94 95
	name->size += GIT_OID_HEXSZ + 1;
	name->ptr[name->size] = '\0';
96

97
	return 0;
98 99
}

100 101
static int object_mkdir(const git_buf *name, const loose_backend *be)
{
102
	return git_futils_mkdir_relative(
103
		name->ptr + be->objects_dirlen, be->objects_dir, be->object_dir_mode,
104
		GIT_MKDIR_PATH | GIT_MKDIR_SKIP_LAST | GIT_MKDIR_VERIFY_DIR, NULL);
105
}
106

107 108
static int parse_header_packlike(
	obj_hdr *out, size_t *out_len, const unsigned char *data, size_t len)
109
{
110
	unsigned long c;
111 112
	size_t shift, size, used = 0;

113
	if (len == 0)
114
		goto on_error;
115 116

	c = data[used++];
117
	out->type = (c >> 4) & 7;
118 119 120 121

	size = c & 15;
	shift = 4;
	while (c & 0x80) {
122
		if (len <= used)
123 124
			goto on_error;

125
		if (sizeof(size_t) * 8 <= shift)
126 127
			goto on_error;

128 129 130 131 132
		c = data[used++];
		size += (c & 0x7f) << shift;
		shift += 7;
	}

133
	out->size = size;
134 135 136 137

	if (out_len)
		*out_len = used;

138 139 140 141 142
	return 0;

on_error:
	giterr_set(GITERR_OBJECT, "failed to parse loose object: invalid header");
	return -1;
143 144
}

145 146 147 148 149
static int parse_header(
	obj_hdr *out,
       	size_t *out_len,
	const unsigned char *_data,
	size_t data_len)
150
{
151 152 153
	const char *data = (char *)_data;
	size_t i, typename_len, size_idx, size_len;
	int64_t size;
154

155 156 157 158 159 160
	*out_len = 0;

	/* find the object type name */
	for (i = 0, typename_len = 0; i < data_len; i++, typename_len++) {
		if (data[i] == ' ')
			break;
161 162
	}

163 164 165 166 167 168 169 170 171
	if (typename_len == data_len)
		goto on_error;

	out->type = git_object_stringn2type(data, typename_len);

	size_idx = typename_len + 1;
	for (i = size_idx, size_len = 0; i < data_len; i++, size_len++) {
		if (data[i] == '\0')
			break;
172 173
	}

174 175
	if (i == data_len)
		goto on_error;
176

177 178 179 180
	if (git__strntol64(&size, &data[size_idx], size_len, NULL, 10) < 0 ||
		size < 0)
		goto on_error;

181 182 183 184 185
	if ((uint64_t)size > SIZE_MAX) {
		giterr_set(GITERR_OBJECT, "object is larger than available memory");
		return -1;
	}

186
	out->size = size;
187

188 189
	if (GIT_ADD_SIZET_OVERFLOW(out_len, i, 1))
		goto on_error;
190

191 192 193 194 195 196
	return 0;

on_error:
	giterr_set(GITERR_OBJECT, "failed to parse loose object: invalid header");
	return -1;
}
197

198
static int is_zlib_compressed_data(unsigned char *data, size_t data_len)
199
{
Vicent Marti committed
200
	unsigned int w;
201

202 203 204
	if (data_len < 2)
		return 0;

Vicent Marti committed
205
	w = ((unsigned int)(data[0]) << 8) + data[1];
206
	return (data[0] & 0x8F) == 0x08 && !(w % 31);
207 208
}

209 210 211 212 213 214 215 216
/***********************************************************
 *
 * ODB OBJECT READING & WRITING
 *
 * Backend for the public API; read headers and full objects
 * from the ODB. Write raw data to the ODB.
 *
 ***********************************************************/
217 218 219 220 221 222 223 224


/*
 * At one point, there was a loose object format that was intended to
 * mimic the format used in pack-files. This was to allow easy copying
 * of loose object data into packs. This format is no longer used, but
 * we must still read it.
 */
225
static int read_loose_packlike(git_rawobj *out, git_buf *obj)
226
{
227 228
	git_buf body = GIT_BUF_INIT;
	const unsigned char *obj_data;
229
	obj_hdr hdr;
230 231 232 233 234
	size_t obj_len, head_len, alloc_size;
	int error;

	obj_data = (unsigned char *)obj->ptr;
	obj_len = obj->size;
235 236 237 238 239

	/*
	 * read the object header, which is an (uncompressed)
	 * binary encoding of the object type and size.
	 */
240 241 242 243
	if ((error = parse_header_packlike(&hdr, &head_len, obj_data, obj_len)) < 0)
		goto done;

	if (!git_object_typeisloose(hdr.type) || head_len > obj_len) {
244
		giterr_set(GITERR_ODB, "failed to inflate loose object");
245 246
		error = -1;
		goto done;
247
	}
248

249 250 251
	obj_data += head_len;
	obj_len -= head_len;

252 253 254
	/*
	 * allocate a buffer and inflate the data into it
	 */
255 256 257 258
	if (GIT_ADD_SIZET_OVERFLOW(&alloc_size, hdr.size, 1) ||
		git_buf_init(&body, alloc_size) < 0) {
		error = -1;
		goto done;
259 260
	}

261 262 263
	if ((error = git_zstream_inflatebuf(&body, obj_data, obj_len)) < 0)
		goto done;

Vicent Marti committed
264
	out->len = hdr.size;
265
	out->type = hdr.type;
266
	out->data = git_buf_detach(&body);
267

268
done:
269
	git_buf_dispose(&body);
270
	return error;
271 272
}

273
static int read_loose_standard(git_rawobj *out, git_buf *obj)
274
{
275
	git_zstream zstream = GIT_ZSTREAM_INIT;
276
	unsigned char head[MAX_HEADER_LEN], *body = NULL;
277
	size_t decompressed, head_len, body_len, alloc_size;
278
	obj_hdr hdr;
279
	int error;
280

281 282 283 284 285 286
	if ((error = git_zstream_init(&zstream, GIT_ZSTREAM_INFLATE)) < 0 ||
		(error = git_zstream_set_input(&zstream, git_buf_cstr(obj), git_buf_len(obj))) < 0)
		goto done;

	decompressed = sizeof(head);

287
	/*
288 289 290 291
	 * inflate the initial part of the compressed buffer in order to
	 * parse the header; read the largest header possible, then push the
	 * remainder into the body buffer.
	 */
292 293 294 295 296
	if ((error = git_zstream_get_output(head, &decompressed, &zstream)) < 0 ||
		(error = parse_header(&hdr, &head_len, head, decompressed)) < 0)
		goto done;

	if (!git_object_typeisloose(hdr.type)) {
297
		giterr_set(GITERR_ODB, "failed to inflate disk object");
298 299
		error = -1;
		goto done;
300
	}
301 302 303 304 305

	/*
	 * allocate a buffer and inflate the object data into it
	 * (including the initial sequence in the head buffer).
	 */
306 307 308 309 310
	if (GIT_ADD_SIZET_OVERFLOW(&alloc_size, hdr.size, 1) ||
		(body = git__malloc(alloc_size)) == NULL) {
		error = -1;
		goto done;
	}
311

312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330
	assert(decompressed >= head_len);
	body_len = decompressed - head_len;

	if (body_len)
		memcpy(body, head + head_len, body_len);

	decompressed = hdr.size - body_len;
	if ((error = git_zstream_get_output(body + body_len, &decompressed, &zstream)) < 0)
		goto done;

	if (!git_zstream_done(&zstream)) {
		giterr_set(GITERR_ZLIB, "failed to finish zlib inflation: stream aborted prematurely");
		error = -1;
		goto done;
	}

	body[hdr.size] = '\0';

	out->data = body;
Vicent Marti committed
331
	out->len = hdr.size;
332 333
	out->type = hdr.type;

334 335 336 337 338 339
done:
	if (error < 0)
		git__free(body);

	git_zstream_free(&zstream);
	return error;
340 341
}

342
static int read_loose(git_rawobj *out, git_buf *loc)
343 344
{
	int error;
345
	git_buf obj = GIT_BUF_INIT;
346 347 348

	assert(out && loc);

349
	if (git_buf_oom(loc))
350
		return -1;
351

352
	out->data = NULL;
Vicent Marti committed
353
	out->len = 0;
354 355
	out->type = GIT_OBJ_BAD;

356 357
	if ((error = git_futils_readbuffer(&obj, loc->ptr)) < 0)
		goto done;
358

359
	if (!is_zlib_compressed_data((unsigned char *)obj.ptr, obj.size))
360 361 362
		error = read_loose_packlike(out, &obj);
	else
		error = read_loose_standard(out, &obj);
363

364
done:
365
	git_buf_dispose(&obj);
366
	return error;
367 368
}

369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386
static int read_header_loose_packlike(
	git_rawobj *out, const unsigned char *data, size_t len)
{
	obj_hdr hdr;
	size_t header_len;
	int error;

	if ((error = parse_header_packlike(&hdr, &header_len, data, len)) < 0)
		return error;

	out->len = hdr.size;
	out->type = hdr.type;

	return error;
}

static int read_header_loose_standard(
	git_rawobj *out, const unsigned char *data, size_t len)
387
{
388 389
	git_zstream zs = GIT_ZSTREAM_INIT;
	obj_hdr hdr;
390
	unsigned char inflated[MAX_HEADER_LEN];
391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410
	size_t header_len, inflated_len = sizeof(inflated);
	int error;

	if ((error = git_zstream_init(&zs, GIT_ZSTREAM_INFLATE)) < 0 ||
		(error = git_zstream_set_input(&zs, data, len)) < 0 ||
		(error = git_zstream_get_output_chunk(inflated, &inflated_len, &zs)) < 0 ||
		(error = parse_header(&hdr, &header_len, inflated, inflated_len)) < 0)
		goto done;

	out->len = hdr.size;
	out->type = hdr.type;

done:
	git_zstream_free(&zs);
	return error;
}

static int read_header_loose(git_rawobj *out, git_buf *loc)
{
	unsigned char obj[1024];
411
	int fd, obj_len, error;
412 413 414

	assert(out && loc);

415
	if (git_buf_oom(loc))
416
		return -1;
417

418 419
	out->data = NULL;

420 421
	if ((error = fd = git_futils_open_ro(loc->ptr)) < 0 ||
		(error = obj_len = p_read(fd, obj, sizeof(obj))) < 0)
422
		goto done;
423

424
	if (!is_zlib_compressed_data(obj, (size_t)obj_len))
425 426 427
		error = read_header_loose_packlike(out, obj, (size_t)obj_len);
	else
		error = read_header_loose_standard(out, obj, (size_t)obj_len);
428

429
	if (!error && !git_object_typeisloose(out->type)) {
430
		giterr_set(GITERR_ZLIB, "failed to read loose object header");
431
		error = -1;
432
		goto done;
433 434
	}

435
done:
436 437
	if (fd >= 0)
		p_close(fd);
438
	return error;
439 440
}

441 442 443 444
static int locate_object(
	git_buf *object_location,
	loose_backend *backend,
	const git_oid *oid)
445
{
446
	int error = object_file_name(object_location, backend, oid);
447

448 449
	if (!error && !git_path_exists(object_location->ptr))
		return GIT_ENOTFOUND;
450 451

	return error;
452 453
}

454
/* Explore an entry of a directory and see if it matches a short oid */
455
static int fn_locate_object_short_oid(void *state, git_buf *pathbuf) {
456 457
	loose_locate_object_state *sstate = (loose_locate_object_state *)state;

nulltoken committed
458
	if (git_buf_len(pathbuf) - sstate->dir_len != GIT_OID_HEXSZ - 2) {
459
		/* Entry cannot be an object. Continue to next entry */
460
		return 0;
461 462
	}

463
	if (git_path_isdir(pathbuf->ptr) == false) {
464 465 466
		/* We are already in the directory matching the 2 first hex characters,
		 * compare the first ncmp characters of the oids */
		if (!memcmp(sstate->short_oid + 2,
467
			(unsigned char *)pathbuf->ptr + sstate->dir_len,
468 469
			sstate->short_oid_len - 2)) {

470 471 472
			if (!sstate->found) {
				sstate->res_oid[0] = sstate->short_oid[0];
				sstate->res_oid[1] = sstate->short_oid[1];
473
				memcpy(sstate->res_oid+2, pathbuf->ptr+sstate->dir_len, GIT_OID_HEXSZ-2);
474 475 476 477
			}
			sstate->found++;
		}
	}
478

Vicent Marti committed
479
	if (sstate->found > 1)
480
		return GIT_EAMBIGUOUS;
Vicent Marti committed
481

482
	return 0;
483 484 485
}

/* Locate an object matching a given short oid */
486 487 488 489 490
static int locate_object_short_oid(
	git_buf *object_location,
	git_oid *res_oid,
	loose_backend *backend,
	const git_oid *short_oid,
491
	size_t len)
492 493
{
	char *objects_dir = backend->objects_dir;
494
	size_t dir_len = strlen(objects_dir), alloc_len;
495 496 497
	loose_locate_object_state state;
	int error;

498
	/* prealloc memory for OBJ_DIR/xx/xx..38x..xx */
499 500 501
	GITERR_CHECK_ALLOC_ADD(&alloc_len, dir_len, GIT_OID_HEXSZ);
	GITERR_CHECK_ALLOC_ADD(&alloc_len, alloc_len, 3);
	if (git_buf_grow(object_location, alloc_len) < 0)
502
		return -1;
503

504
	git_buf_set(object_location, objects_dir, dir_len);
505
	git_path_to_dir(object_location);
506

507
	/* save adjusted position at end of dir so it can be restored later */
nulltoken committed
508
	dir_len = git_buf_len(object_location);
509 510

	/* Convert raw oid to hex formatted oid */
Vicent Marti committed
511
	git_oid_fmt((char *)state.short_oid, short_oid);
512

513
	/* Explore OBJ_DIR/xx/ where xx is the beginning of hex formatted short oid */
514
	if (git_buf_put(object_location, (char *)state.short_oid, 3) < 0)
515
		return -1;
516
	object_location->ptr[object_location->size - 1] = '/';
517 518

	/* Check that directory exists */
519
	if (git_path_isdir(object_location->ptr) == false)
520 521
		return git_odb__error_notfound("no matching loose object for prefix",
			short_oid, len);
522

nulltoken committed
523
	state.dir_len = git_buf_len(object_location);
524 525
	state.short_oid_len = len;
	state.found = 0;
526

527
	/* Explore directory to find a unique object matching short_oid */
528
	error = git_path_direach(
529
		object_location, 0, fn_locate_object_short_oid, &state);
530
	if (error < 0 && error != GIT_EAMBIGUOUS)
531
		return error;
532

533
	if (!state.found)
534 535
		return git_odb__error_notfound("no matching loose object for prefix",
			short_oid, len);
536

537 538 539
	if (state.found > 1)
		return git_odb__error_ambiguous("multiple matches in loose objects");

540
	/* Convert obtained hex formatted oid to raw */
Vicent Marti committed
541
	error = git_oid_fromstr(res_oid, (char *)state.res_oid);
542 543
	if (error)
		return error;
544 545

	/* Update the location according to the oid obtained */
546 547
	GITERR_CHECK_ALLOC_ADD(&alloc_len, dir_len, GIT_OID_HEXSZ);
	GITERR_CHECK_ALLOC_ADD(&alloc_len, alloc_len, 2);
548 549

	git_buf_truncate(object_location, dir_len);
550
	if (git_buf_grow(object_location, alloc_len) < 0)
551
		return -1;
552 553 554 555 556

	git_oid_pathfmt(object_location->ptr + dir_len, res_oid);

	object_location->size += GIT_OID_HEXSZ + 1;
	object_location->ptr[object_location->size] = '\0';
557

558
	return 0;
559 560
}

561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576








/***********************************************************
 *
 * LOOSE BACKEND PUBLIC API
 *
 * Implement the git_odb_backend API calls
 *
 ***********************************************************/

577
static int loose_backend__read_header(size_t *len_p, git_otype *type_p, git_odb_backend *backend, const git_oid *oid)
578
{
579
	git_buf object_path = GIT_BUF_INIT;
Vicent Marti committed
580
	git_rawobj raw;
581
	int error;
582

Vicent Marti committed
583
	assert(backend && oid);
584

Vicent Marti committed
585 586 587
	raw.len = 0;
	raw.type = GIT_OBJ_BAD;

588 589 590 591
	if (locate_object(&object_path, (loose_backend *)backend, oid) < 0) {
		error = git_odb__error_notfound("no matching loose object",
			oid, GIT_OID_HEXSZ);
	} else if ((error = read_header_loose(&raw, &object_path)) == 0) {
592 593 594
		*len_p = raw.len;
		*type_p = raw.type;
	}
595

596
	git_buf_dispose(&object_path);
597

598
	return error;
Vicent Marti committed
599
}
600

601
static int loose_backend__read(void **buffer_p, size_t *len_p, git_otype *type_p, git_odb_backend *backend, const git_oid *oid)
602
{
603
	git_buf object_path = GIT_BUF_INIT;
Vicent Marti committed
604
	git_rawobj raw;
605
	int error = 0;
606

Vicent Marti committed
607
	assert(backend && oid);
608

609 610 611 612
	if (locate_object(&object_path, (loose_backend *)backend, oid) < 0) {
		error = git_odb__error_notfound("no matching loose object",
			oid, GIT_OID_HEXSZ);
	} else if ((error = read_loose(&raw, &object_path)) == 0) {
613 614 615 616
		*buffer_p = raw.data;
		*len_p = raw.len;
		*type_p = raw.type;
	}
Vicent Marti committed
617

618
	git_buf_dispose(&object_path);
Vicent Marti committed
619

620
	return error;
621 622
}

623
static int loose_backend__read_prefix(
Vicent Marti committed
624 625 626 627 628 629
	git_oid *out_oid,
	void **buffer_p,
	size_t *len_p,
	git_otype *type_p,
	git_odb_backend *backend,
	const git_oid *short_oid,
630
	size_t len)
631
{
632
	int error = 0;
633

634
	assert(len >= GIT_OID_MINPREFIXLEN && len <= GIT_OID_HEXSZ);
635

636
	if (len == GIT_OID_HEXSZ) {
637
		/* We can fall back to regular read method */
638
		error = loose_backend__read(buffer_p, len_p, type_p, backend, short_oid);
639
		if (!error)
640
			git_oid_cpy(out_oid, short_oid);
641
	} else {
642
		git_buf object_path = GIT_BUF_INIT;
643 644 645 646
		git_rawobj raw;

		assert(backend && short_oid);

647
		if ((error = locate_object_short_oid(&object_path, out_oid,
648 649 650
				(loose_backend *)backend, short_oid, len)) == 0 &&
			(error = read_loose(&raw, &object_path)) == 0)
		{
651 652 653
			*buffer_p = raw.data;
			*len_p = raw.len;
			*type_p = raw.type;
654 655
		}

656
		git_buf_dispose(&object_path);
657
	}
658

659
	return error;
660 661
}

662
static int loose_backend__exists(git_odb_backend *backend, const git_oid *oid)
663
{
664 665
	git_buf object_path = GIT_BUF_INIT;
	int error;
666 667 668

	assert(backend && oid);

669 670
	error = locate_object(&object_path, (loose_backend *)backend, oid);

671
	git_buf_dispose(&object_path);
672

673
	return !error;
674 675
}

676 677 678 679 680 681
static int loose_backend__exists_prefix(
	git_oid *out, git_odb_backend *backend, const git_oid *short_id, size_t len)
{
	git_buf object_path = GIT_BUF_INIT;
	int error;

682
	assert(backend && out && short_id && len >= GIT_OID_MINPREFIXLEN);
683 684 685 686

	error = locate_object_short_oid(
		&object_path, out, (loose_backend *)backend, short_id, len);

687
	git_buf_dispose(&object_path);
688 689 690 691

	return error;
}

692 693
struct foreach_state {
	size_t dir_len;
694
	git_odb_foreach_cb cb;
695 696 697
	void *data;
};

698
GIT_INLINE(int) filename_to_oid(git_oid *oid, const char *ptr)
699 700
{
	int v, i = 0;
701
	if (strlen(ptr) != GIT_OID_HEXSZ+1)
702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733
		return -1;

	if (ptr[2] != '/') {
		return -1;
	}

	v = (git__fromhex(ptr[i]) << 4) | git__fromhex(ptr[i+1]);
	if (v < 0)
		return -1;

	oid->id[0] = (unsigned char) v;

	ptr += 3;
	for (i = 0; i < 38; i += 2) {
		v = (git__fromhex(ptr[i]) << 4) | git__fromhex(ptr[i + 1]);
		if (v < 0)
			return -1;

		oid->id[1 + i/2] = (unsigned char) v;
	}

	return 0;
}

static int foreach_object_dir_cb(void *_state, git_buf *path)
{
	git_oid oid;
	struct foreach_state *state = (struct foreach_state *) _state;

	if (filename_to_oid(&oid, path->ptr + state->dir_len) < 0)
		return 0;

734
	return giterr_set_after_callback_function(
735
		state->cb(&oid, state->data), "git_odb_foreach");
736 737 738 739 740 741
}

static int foreach_cb(void *_state, git_buf *path)
{
	struct foreach_state *state = (struct foreach_state *) _state;

742 743 744 745
	/* non-dir is some stray file, ignore it */
	if (!git_path_isdir(git_buf_cstr(path)))
		return 0;

746
	return git_path_direach(path, 0, foreach_object_dir_cb, state);
747 748
}

749
static int loose_backend__foreach(git_odb_backend *_backend, git_odb_foreach_cb cb, void *data)
750 751 752 753 754 755 756 757 758 759 760
{
	char *objects_dir;
	int error;
	git_buf buf = GIT_BUF_INIT;
	struct foreach_state state;
	loose_backend *backend = (loose_backend *) _backend;

	assert(backend && cb);

	objects_dir = backend->objects_dir;

761
	git_buf_sets(&buf, objects_dir);
762
	git_path_to_dir(&buf);
763 764
	if (git_buf_oom(&buf))
		return -1;
765

766
	memset(&state, 0, sizeof(state));
767 768 769 770
	state.cb = cb;
	state.data = data;
	state.dir_len = git_buf_len(&buf);

771
	error = git_path_direach(&buf, 0, foreach_cb, &state);
772

773
	git_buf_dispose(&buf);
774

775
	return error;
776 777
}

778
static int loose_backend__writestream_finalize(git_odb_stream *_stream, const git_oid *oid)
Vicent Marti committed
779 780 781
{
	loose_writestream *stream = (loose_writestream *)_stream;
	loose_backend *backend = (loose_backend *)_stream->backend;
782
	git_buf final_path = GIT_BUF_INIT;
783
	int error = 0;
Vicent Marti committed
784

785
	if (object_file_name(&final_path, backend, oid) < 0 ||
786
		object_mkdir(&final_path, backend) < 0)
787 788 789
		error = -1;
	else
		error = git_filebuf_commit_at(
790
			&stream->fbuf, final_path.ptr);
791

792
	git_buf_dispose(&final_path);
793 794

	return error;
Vicent Marti committed
795 796
}

797
static int loose_backend__writestream_write(git_odb_stream *_stream, const char *data, size_t len)
798
{
Vicent Marti committed
799 800 801 802
	loose_writestream *stream = (loose_writestream *)_stream;
	return git_filebuf_write(&stream->fbuf, data, len);
}

803
static void loose_backend__writestream_free(git_odb_stream *_stream)
Vicent Marti committed
804 805 806
{
	loose_writestream *stream = (loose_writestream *)_stream;

807
	git_filebuf_cleanup(&stream->fbuf);
808
	git__free(stream);
Vicent Marti committed
809 810
}

811 812 813 814 815
static int filebuf_flags(loose_backend *backend)
{
	int flags = GIT_FILEBUF_TEMPORARY |
		(backend->object_zlib_level << GIT_FILEBUF_DEFLATE_SHIFT);

816
	if (backend->fsync_object_files || git_repository__fsync_gitdir)
817 818 819 820 821
		flags |= GIT_FILEBUF_FSYNC;

	return flags;
}

822
static int loose_backend__writestream(git_odb_stream **stream_out, git_odb_backend *_backend, git_off_t length, git_otype type)
Vicent Marti committed
823 824
{
	loose_backend *backend;
825
	loose_writestream *stream = NULL;
826
	char hdr[MAX_HEADER_LEN];
827
	git_buf tmp_path = GIT_BUF_INIT;
828 829
	size_t hdrlen;
	int error;
830

831
	assert(_backend && length >= 0);
832 833

	backend = (loose_backend *)_backend;
Vicent Marti committed
834
	*stream_out = NULL;
835

836 837 838
	if ((error = git_odb__format_object_header(&hdrlen,
		hdr, sizeof(hdr), length, type)) < 0)
		return error;
Vicent Marti committed
839 840

	stream = git__calloc(1, sizeof(loose_writestream));
841
	GITERR_CHECK_ALLOC(stream);
842

Vicent Marti committed
843 844
	stream->stream.backend = _backend;
	stream->stream.read = NULL; /* read only */
845 846 847
	stream->stream.write = &loose_backend__writestream_write;
	stream->stream.finalize_write = &loose_backend__writestream_finalize;
	stream->stream.free = &loose_backend__writestream_free;
Vicent Marti committed
848
	stream->stream.mode = GIT_STREAM_WRONLY;
849

850
	if (git_buf_joinpath(&tmp_path, backend->objects_dir, "tmp_object") < 0 ||
851
		git_filebuf_open(&stream->fbuf, tmp_path.ptr, filebuf_flags(backend),
852
			backend->object_file_mode) < 0 ||
853 854 855 856 857 858
		stream->stream.write((git_odb_stream *)stream, hdr, hdrlen) < 0)
	{
		git_filebuf_cleanup(&stream->fbuf);
		git__free(stream);
		stream = NULL;
	}
859
	git_buf_dispose(&tmp_path);
Vicent Marti committed
860
	*stream_out = (git_odb_stream *)stream;
861

862
	return !stream ? -1 : 0;
863 864
}

865 866 867
static int loose_backend__readstream_read(
	git_odb_stream *_stream,
	char *buffer,
868
	size_t buffer_len)
869 870
{
	loose_readstream *stream = (loose_readstream *)_stream;
871
	size_t start_remain = stream->start_len - stream->start_read;
872 873 874 875 876 877
	int total = 0, error;

	/*
	 * if we read more than just the header in the initial read, play
	 * that back for the caller.
	 */
878 879 880
	if (start_remain && buffer_len) {
		size_t chunk = min(start_remain, buffer_len);
		memcpy(buffer, stream->start + stream->start_read, chunk);
881 882

		buffer += chunk;
883
		stream->start_read += chunk;
884

885 886
		total += chunk;
		buffer_len -= chunk;
887 888
	}

889 890
	if (buffer_len) {
		size_t chunk = min(buffer_len, INT_MAX);
891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909

		if ((error = git_zstream_get_output(buffer, &chunk, &stream->zstream)) < 0)
			return error;

		total += chunk;
	}

	return total;
}

static void loose_backend__readstream_free(git_odb_stream *_stream)
{
	loose_readstream *stream = (loose_readstream *)_stream;

	git_futils_mmap_free(&stream->map);
	git_zstream_free(&stream->zstream);
	git__free(stream);
}

910 911 912 913
static int loose_backend__readstream_packlike(
	obj_hdr *hdr,
	loose_readstream *stream)
{
914 915
	const unsigned char *data;
	size_t data_len, head_len;
916 917
	int error;

918 919 920
	data = stream->map.data;
	data_len = stream->map.len;

921 922 923 924
	/*
	 * read the object header, which is an (uncompressed)
	 * binary encoding of the object type and size.
	 */
925 926 927 928
	if ((error = parse_header_packlike(hdr, &head_len, data, data_len)) < 0)
		return error;

	if (!git_object_typeisloose(hdr->type)) {
929 930 931 932
		giterr_set(GITERR_ODB, "failed to inflate loose object");
		return -1;
	}

933 934
	return git_zstream_set_input(&stream->zstream,
		data + head_len, data_len - head_len);
935 936 937 938 939 940
}

static int loose_backend__readstream_standard(
	obj_hdr *hdr,
	loose_readstream *stream)
{
941
	unsigned char head[MAX_HEADER_LEN];
942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972
	size_t init, head_len;
	int error;

	if ((error = git_zstream_set_input(&stream->zstream,
			stream->map.data, stream->map.len)) < 0)
		return error;

	init = sizeof(head);

	/*
	 * inflate the initial part of the compressed buffer in order to
	 * parse the header; read the largest header possible, then store
	 * it in the `start` field of the stream object.
	 */
	if ((error = git_zstream_get_output(head, &init, &stream->zstream)) < 0 ||
		(error = parse_header(hdr, &head_len, head, init)) < 0)
		return error;

	if (!git_object_typeisloose(hdr->type)) {
		giterr_set(GITERR_ODB, "failed to inflate disk object");
		return -1;
	}

	if (init > head_len) {
		stream->start_len = init - head_len;
		memcpy(stream->start, head + head_len, init - head_len);
	}

	return 0;
}

973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007
static int loose_backend__readstream(
	git_odb_stream **stream_out,
	size_t *len_out,
	git_otype *type_out,
	git_odb_backend *_backend,
	const git_oid *oid)
{
	loose_backend *backend;
	loose_readstream *stream = NULL;
	git_hash_ctx *hash_ctx = NULL;
	git_buf object_path = GIT_BUF_INIT;
	obj_hdr hdr;
	int error = 0;

	assert(stream_out && len_out && type_out && _backend && oid);

	backend = (loose_backend *)_backend;
	*stream_out = NULL;
	*len_out = 0;
	*type_out = GIT_OBJ_BAD;

	if (locate_object(&object_path, backend, oid) < 0) {
		error = git_odb__error_notfound("no matching loose object",
			oid, GIT_OID_HEXSZ);
		goto done;
	}

	stream = git__calloc(1, sizeof(loose_readstream));
	GITERR_CHECK_ALLOC(stream);

	hash_ctx = git__malloc(sizeof(git_hash_ctx));
	GITERR_CHECK_ALLOC(hash_ctx);

	if ((error = git_hash_ctx_init(hash_ctx)) < 0 ||
		(error = git_futils_mmap_ro_file(&stream->map, object_path.ptr)) < 0 ||
1008
		(error = git_zstream_init(&stream->zstream, GIT_ZSTREAM_INFLATE)) < 0)
1009 1010
		goto done;

1011
	/* check for a packlike loose object */
1012
	if (!is_zlib_compressed_data(stream->map.data, stream->map.len))
1013 1014 1015
		error = loose_backend__readstream_packlike(&hdr, stream);
	else
		error = loose_backend__readstream_standard(&hdr, stream);
1016

1017
	if (error < 0)
1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033
		goto done;

	stream->stream.backend = _backend;
	stream->stream.hash_ctx = hash_ctx;
	stream->stream.read = &loose_backend__readstream_read;
	stream->stream.free = &loose_backend__readstream_free;

	*stream_out = (git_odb_stream *)stream;
	*len_out = hdr.size;
	*type_out = hdr.type;

done:
	if (error < 0) {
		git_futils_mmap_free(&stream->map);
		git_zstream_free(&stream->zstream);
		git_hash_ctx_cleanup(hash_ctx);
1034
		git__free(hash_ctx);
1035 1036 1037
		git__free(stream);
	}

1038
	git_buf_dispose(&object_path);
1039 1040 1041
	return error;
}

1042
static int loose_backend__write(git_odb_backend *_backend, const git_oid *oid, const void *data, size_t len, git_otype type)
1043
{
1044
	int error = 0;
1045
	git_buf final_path = GIT_BUF_INIT;
1046
	char header[MAX_HEADER_LEN];
1047
	size_t header_len;
1048
	git_filebuf fbuf = GIT_FILEBUF_INIT;
1049 1050 1051 1052 1053
	loose_backend *backend;

	backend = (loose_backend *)_backend;

	/* prepare the header for the file */
1054 1055 1056
	if ((error = git_odb__format_object_header(&header_len,
		header, sizeof(header), len, type)) < 0)
		goto cleanup;
1057

1058
	if (git_buf_joinpath(&final_path, backend->objects_dir, "tmp_object") < 0 ||
1059
		git_filebuf_open(&fbuf, final_path.ptr, filebuf_flags(backend),
1060
			backend->object_file_mode) < 0)
1061 1062
	{
		error = -1;
1063
		goto cleanup;
1064
	}
1065 1066 1067 1068

	git_filebuf_write(&fbuf, header, header_len);
	git_filebuf_write(&fbuf, data, len);

1069 1070
	if (object_file_name(&final_path, backend, oid) < 0 ||
		object_mkdir(&final_path, backend) < 0 ||
1071
		git_filebuf_commit_at(&fbuf, final_path.ptr) < 0)
1072
		error = -1;
1073 1074

cleanup:
1075
	if (error < 0)
1076
		git_filebuf_cleanup(&fbuf);
1077
	git_buf_dispose(&final_path);
1078 1079 1080
	return error;
}

1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091
static int loose_backend__freshen(
	git_odb_backend *_backend,
	const git_oid *oid)
{
	loose_backend *backend = (loose_backend *)_backend;
	git_buf path = GIT_BUF_INIT;
	int error;

	if (object_file_name(&path, backend, oid) < 0)
		return -1;

1092
	error = git_futils_touch(path.ptr, NULL);
1093
	git_buf_dispose(&path);
1094 1095 1096 1097

	return error;
}

1098
static void loose_backend__free(git_odb_backend *_backend)
1099 1100 1101 1102 1103
{
	loose_backend *backend;
	assert(_backend);
	backend = (loose_backend *)_backend;

1104
	git__free(backend);
1105 1106
}

1107 1108 1109 1110
int git_odb_backend_loose(
	git_odb_backend **backend_out,
	const char *objects_dir,
	int compression_level,
1111
	int do_fsync,
1112 1113
	unsigned int dir_mode,
	unsigned int file_mode)
1114 1115
{
	loose_backend *backend;
1116
	size_t objects_dirlen, alloclen;
1117 1118 1119 1120

	assert(backend_out && objects_dir);

	objects_dirlen = strlen(objects_dir);
1121

1122 1123 1124
	GITERR_CHECK_ALLOC_ADD(&alloclen, sizeof(loose_backend), objects_dirlen);
	GITERR_CHECK_ALLOC_ADD(&alloclen, alloclen, 2);
	backend = git__calloc(1, alloclen);
1125
	GITERR_CHECK_ALLOC(backend);
1126

1127
	backend->parent.version = GIT_ODB_BACKEND_VERSION;
1128 1129 1130 1131
	backend->objects_dirlen = objects_dirlen;
	memcpy(backend->objects_dir, objects_dir, objects_dirlen);
	if (backend->objects_dir[backend->objects_dirlen - 1] != '/')
		backend->objects_dir[backend->objects_dirlen++] = '/';
1132

1133 1134 1135
	if (compression_level < 0)
		compression_level = Z_BEST_SPEED;

1136 1137 1138 1139 1140 1141
	if (dir_mode == 0)
		dir_mode = GIT_OBJECT_DIR_MODE;

	if (file_mode == 0)
		file_mode = GIT_OBJECT_FILE_MODE;

1142 1143
	backend->object_zlib_level = compression_level;
	backend->fsync_object_files = do_fsync;
1144 1145
	backend->object_dir_mode = dir_mode;
	backend->object_file_mode = file_mode;
1146 1147

	backend->parent.read = &loose_backend__read;
1148
	backend->parent.write = &loose_backend__write;
Vicent Marti committed
1149
	backend->parent.read_prefix = &loose_backend__read_prefix;
1150
	backend->parent.read_header = &loose_backend__read_header;
1151
	backend->parent.writestream = &loose_backend__writestream;
1152
	backend->parent.readstream = &loose_backend__readstream;
1153
	backend->parent.exists = &loose_backend__exists;
1154
	backend->parent.exists_prefix = &loose_backend__exists_prefix;
1155
	backend->parent.foreach = &loose_backend__foreach;
1156
	backend->parent.freshen = &loose_backend__freshen;
1157 1158 1159
	backend->parent.free = &loose_backend__free;

	*backend_out = (git_odb_backend *)backend;
1160
	return 0;
1161
}