blob.c 12.2 KB
Newer Older
1
/*
Edward Thomson committed
2
 * Copyright (C) the libgit2 contributors. All rights reserved.
3
 *
Vicent Marti committed
4 5
 * This file is part of libgit2, distributed under the GNU GPL v2 with
 * a Linking Exception. For full terms see the included COPYING file.
6 7
 */

8 9
#include "blob.h"

10 11 12
#include "git2/common.h"
#include "git2/object.h"
#include "git2/repository.h"
13
#include "git2/odb_backend.h"
14

15
#include "filebuf.h"
16
#include "filter.h"
17

18
const void *git_blob_rawcontent(const git_blob *blob)
19
{
Edward Thomson committed
20 21
	GIT_ASSERT_ARG_WITH_RETVAL(blob, NULL);

22 23 24 25
	if (blob->raw)
		return blob->data.raw.data;
	else
		return git_odb_object_data(blob->data.odb);
26 27
}

28
git_object_size_t git_blob_rawsize(const git_blob *blob)
29
{
Edward Thomson committed
30 31
	GIT_ASSERT_ARG(blob);

32 33 34
	if (blob->raw)
		return blob->data.raw.size;
	else
35
		return (git_object_size_t)git_odb_object_size(blob->data.odb);
36 37
}

38 39
int git_blob__getbuf(git_buf *buffer, git_blob *blob)
{
40
	git_object_size_t size = git_blob_rawsize(blob);
41 42 43

	GIT_ERROR_CHECK_BLOBSIZE(size);
	return git_buf_set(buffer, git_blob_rawcontent(blob), (size_t)size);
44 45
}

46
void git_blob__free(void *_blob)
47
{
48 49 50
	git_blob *blob = (git_blob *) _blob;
	if (!blob->raw)
		git_odb_object_free(blob->data.odb);
51
	git__free(blob);
52 53
}

54
int git_blob__parse_raw(void *_blob, const char *data, size_t size)
55
{
56
	git_blob *blob = (git_blob *) _blob;
Edward Thomson committed
57 58 59

	GIT_ASSERT_ARG(blob);

60 61 62 63 64 65 66 67 68
	blob->raw = 1;
	blob->data.raw.data = data;
	blob->data.raw.size = size;
	return 0;
}

int git_blob__parse(void *_blob, git_odb_object *odb_obj)
{
	git_blob *blob = (git_blob *) _blob;
Edward Thomson committed
69 70 71

	GIT_ASSERT_ARG(blob);

Vicent Marti committed
72
	git_cached_obj_incref((git_cached_obj *)odb_obj);
73 74
	blob->raw = 0;
	blob->data.odb = odb_obj;
75
	return 0;
76 77
}

78
int git_blob_create_from_buffer(
79
	git_oid *id, git_repository *repo, const void *buffer, size_t len)
80
{
Vicent Marti committed
81
	int error;
82
	git_odb *odb;
Vicent Marti committed
83
	git_odb_stream *stream;
84

Edward Thomson committed
85 86
	GIT_ASSERT_ARG(id);
	GIT_ASSERT_ARG(repo);
87

88
	if ((error = git_repository_odb__weakptr(&odb, repo)) < 0 ||
89
		(error = git_odb_open_wstream(&stream, odb, len, GIT_OBJECT_BLOB)) < 0)
90 91
		return error;

92
	if ((error = git_odb_stream_write(stream, buffer, len)) == 0)
93
		error = git_odb_stream_finalize_write(id, stream);
94

95
	git_odb_stream_free(stream);
96
	return error;
97 98
}

99
static int write_file_stream(
100
	git_oid *id, git_odb *odb, const char *path, git_object_size_t file_size)
101 102
{
	int fd, error;
103
	char buffer[FILEIO_BUFSIZE];
104
	git_odb_stream *stream = NULL;
105
	ssize_t read_len = -1;
106
	git_object_size_t written = 0;
107

108
	if ((error = git_odb_open_wstream(
109
			&stream, odb, file_size, GIT_OBJECT_BLOB)) < 0)
110 111
		return error;

112
	if ((fd = git_futils_open_ro(path)) < 0) {
113
		git_odb_stream_free(stream);
114
		return -1;
115 116
	}

Vicent Marti committed
117
	while (!error && (read_len = p_read(fd, buffer, sizeof(buffer))) > 0) {
118
		error = git_odb_stream_write(stream, buffer, read_len);
Vicent Marti committed
119
		written += read_len;
120 121 122 123
	}

	p_close(fd);

Vicent Marti committed
124
	if (written != file_size || read_len < 0) {
125
		git_error_set(GIT_ERROR_OS, "failed to read file into stream");
Vicent Marti committed
126 127 128
		error = -1;
	}

129
	if (!error)
130
		error = git_odb_stream_finalize_write(id, stream);
131

132
	git_odb_stream_free(stream);
133 134 135 136
	return error;
}

static int write_file_filtered(
137
	git_oid *id,
138
	git_object_size_t *size,
139
	git_odb *odb,
140
	const char *full_path,
141 142
	git_filter_list *fl,
	git_repository* repo)
143 144
{
	int error;
145
	git_buf tgt = GIT_BUF_INIT;
146

147
	error = git_filter_list_apply_to_file(&tgt, fl, repo, full_path);
148

149
	/* Write the file to disk if it was properly filtered */
150
	if (!error) {
151
		*size = tgt.size;
152

153
		error = git_odb_write(id, odb, tgt.ptr, tgt.size, GIT_OBJECT_BLOB);
154
	}
155

156
	git_buf_dispose(&tgt);
157
	return error;
158 159
}

160
static int write_symlink(
161
	git_oid *id, git_odb *odb, const char *path, size_t link_size)
162 163 164 165 166 167
{
	char *link_data;
	ssize_t read_len;
	int error;

	link_data = git__malloc(link_size);
168
	GIT_ERROR_CHECK_ALLOC(link_data);
169 170 171

	read_len = p_readlink(path, link_data, link_size);
	if (read_len != (ssize_t)link_size) {
172
		git_error_set(GIT_ERROR_OS, "failed to create blob: cannot read symlink '%s'", path);
173
		git__free(link_data);
174
		return -1;
175 176
	}

177
	error = git_odb_write(id, odb, (void *)link_data, link_size, GIT_OBJECT_BLOB);
178
	git__free(link_data);
179 180 181
	return error;
}

182
int git_blob__create_from_paths(
183
	git_oid *id,
184 185 186 187 188 189
	struct stat *out_st,
	git_repository *repo,
	const char *content_path,
	const char *hint_path,
	mode_t hint_mode,
	bool try_load_filters)
190
{
191
	int error;
192
	struct stat st;
193
	git_odb *odb = NULL;
194
	git_object_size_t size;
195 196
	mode_t mode;
	git_buf path = GIT_BUF_INIT;
197

Edward Thomson committed
198
	GIT_ASSERT_ARG(hint_path || !try_load_filters);
199

200
	if (!content_path) {
201
		if (git_repository_workdir_path(&path, repo, hint_path) < 0)
202 203 204 205 206 207 208 209 210
			return -1;

		content_path = path.ptr;
	}

	if ((error = git_path_lstat(content_path, &st)) < 0 ||
		(error = git_repository_odb(&odb, repo)) < 0)
		goto done;

211
	if (S_ISDIR(st.st_mode)) {
212
		git_error_set(GIT_ERROR_ODB, "cannot create blob from '%s': it is a directory", content_path);
213 214 215 216
		error = GIT_EDIRECTORY;
		goto done;
	}

217 218
	if (out_st)
		memcpy(out_st, &st, sizeof(st));
219

220
	size = st.st_size;
221
	mode = hint_mode ? hint_mode : st.st_mode;
222

Russell Belfer committed
223
	if (S_ISLNK(mode)) {
224
		error = write_symlink(id, odb, content_path, (size_t)size);
225
	} else {
226
		git_filter_list *fl = NULL;
227

228
		if (try_load_filters)
229
			/* Load the filters for writing this file to the ODB */
230
			error = git_filter_list_load(
231
				&fl, repo, NULL, hint_path,
232
				GIT_FILTER_TO_ODB, GIT_FILTER_DEFAULT);
233

234 235 236
		if (error < 0)
			/* well, that didn't work */;
		else if (fl == NULL)
237 238
			/* No filters need to be applied to the document: we can stream
			 * directly from disk */
239
			error = write_file_stream(id, odb, content_path, size);
240
		else {
241
			/* We need to apply one or more filters */
242
			error = write_file_filtered(id, &size, odb, content_path, fl, repo);
243

244 245
			git_filter_list_free(fl);
		}
246

247
		/*
248 249 250 251 252
		 * TODO: eventually support streaming filtered files, for files
		 * which are bigger than a given threshold. This is not a priority
		 * because applying a filter in streaming mode changes the final
		 * size of the blob, and without knowing its final size, the blob
		 * cannot be written in stream mode to the ODB.
253
		 *
254 255 256
		 * The plan is to do streaming writes to a tempfile on disk and then
		 * opening streaming that file to the ODB, using
		 * `write_file_stream`.
257 258 259
		 *
		 * CAREFULLY DESIGNED APIS YO
		 */
Vicent Marti committed
260 261
	}

262 263
done:
	git_odb_free(odb);
264
	git_buf_dispose(&path);
265

266 267 268
	return error;
}

269
int git_blob_create_from_workdir(
270
	git_oid *id, git_repository *repo, const char *path)
271
{
272
	return git_blob__create_from_paths(id, NULL, repo, NULL, path, 0, true);
273 274
}

275
int git_blob_create_from_disk(
276
	git_oid *id, git_repository *repo, const char *path)
277 278 279
{
	int error;
	git_buf full_path = GIT_BUF_INIT;
280
	const char *workdir, *hintpath;
281 282

	if ((error = git_path_prettify(&full_path, path, NULL)) < 0) {
283
		git_buf_dispose(&full_path);
284 285 286
		return error;
	}

287 288 289 290 291 292
	hintpath = git_buf_cstr(&full_path);
	workdir  = git_repository_workdir(repo);

	if (workdir && !git__prefixcmp(hintpath, workdir))
		hintpath += strlen(workdir);

293
	error = git_blob__create_from_paths(
294
		id, NULL, repo, git_buf_cstr(&full_path), hintpath, 0, true);
295

296
	git_buf_dispose(&full_path);
297 298
	return error;
}
299

300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330
typedef struct {
	git_writestream parent;
	git_filebuf fbuf;
	git_repository *repo;
	char *hintpath;
} blob_writestream;

static int blob_writestream_close(git_writestream *_stream)
{
	blob_writestream *stream = (blob_writestream *) _stream;

	git_filebuf_cleanup(&stream->fbuf);
	return 0;
}

static void blob_writestream_free(git_writestream *_stream)
{
	blob_writestream *stream = (blob_writestream *) _stream;

	git_filebuf_cleanup(&stream->fbuf);
	git__free(stream->hintpath);
	git__free(stream);
}

static int blob_writestream_write(git_writestream *_stream, const char *buffer, size_t len)
{
	blob_writestream *stream = (blob_writestream *) _stream;

	return git_filebuf_write(&stream->fbuf, buffer, len);
}

331
int git_blob_create_from_stream(git_writestream **out, git_repository *repo, const char *hintpath)
332 333 334 335 336
{
	int error;
	git_buf path = GIT_BUF_INIT;
	blob_writestream *stream;

Edward Thomson committed
337 338
	GIT_ASSERT_ARG(out);
	GIT_ASSERT_ARG(repo);
339 340

	stream = git__calloc(1, sizeof(blob_writestream));
341
	GIT_ERROR_CHECK_ALLOC(stream);
342 343 344

	if (hintpath) {
		stream->hintpath = git__strdup(hintpath);
345
		GIT_ERROR_CHECK_ALLOC(stream->hintpath);
346 347 348 349 350 351 352
	}

	stream->repo = repo;
	stream->parent.write = blob_writestream_write;
	stream->parent.close = blob_writestream_close;
	stream->parent.free  = blob_writestream_free;

353 354
	if ((error = git_repository_item_path(&path, repo, GIT_REPOSITORY_ITEM_OBJECTS)) < 0
		|| (error = git_buf_joinpath(&path, path.ptr, "streamed")) < 0)
355 356 357 358 359 360 361 362 363 364 365 366
		goto cleanup;

	if ((error = git_filebuf_open_withsize(&stream->fbuf, git_buf_cstr(&path), GIT_FILEBUF_TEMPORARY,
					       0666, 2 * 1024 * 1024)) < 0)
		goto cleanup;

	*out = (git_writestream *) stream;

cleanup:
	if (error < 0)
		blob_writestream_free((git_writestream *) stream);

367
	git_buf_dispose(&path);
368 369 370
	return error;
}

371
int git_blob_create_from_stream_commit(git_oid *out, git_writestream *_stream)
372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392
{
	int error;
	blob_writestream *stream = (blob_writestream *) _stream;

	/*
	 * We can make this more officient by avoiding writing to
	 * disk, but for now let's re-use the helper functions we
	 * have.
	 */
	if ((error = git_filebuf_flush(&stream->fbuf)) < 0)
		goto cleanup;

	error = git_blob__create_from_paths(out, NULL, stream->repo, stream->fbuf.path_lock,
					    stream->hintpath, 0, !!stream->hintpath);

cleanup:
	blob_writestream_free(_stream);
	return error;

}

Jacques Germishuys committed
393
int git_blob_is_binary(const git_blob *blob)
394
{
395
	git_buf content = GIT_BUF_INIT;
396
	git_object_size_t size;
397

Edward Thomson committed
398
	GIT_ASSERT_ARG(blob);
399

400 401
	size = git_blob_rawsize(blob);

402
	git_buf_attach_notowned(&content, git_blob_rawcontent(blob),
403
		(size_t)min(size, GIT_FILTER_BYTES_TO_CHECK_NUL));
404
	return git_buf_is_binary(&content);
405
}
406

407 408 409 410 411 412 413 414 415
int git_blob_filter_options_init(
	git_blob_filter_options *opts,
	unsigned int version)
{
	GIT_INIT_STRUCTURE_FROM_TEMPLATE(opts, version,
		git_blob_filter_options, GIT_BLOB_FILTER_OPTIONS_INIT);
	return 0;
}

416
int git_blob_filter(
417
	git_buf *out,
418
	git_blob *blob,
Russell Belfer committed
419
	const char *path,
420
	git_blob_filter_options *given_opts)
421
{
422 423
	int error = 0;
	git_filter_list *fl = NULL;
424
	git_blob_filter_options opts = GIT_BLOB_FILTER_OPTIONS_INIT;
425
	git_filter_options filter_opts = GIT_FILTER_OPTIONS_INIT;
426

Edward Thomson committed
427 428 429
	GIT_ASSERT_ARG(blob);
	GIT_ASSERT_ARG(path);
	GIT_ASSERT_ARG(out);
430

431 432 433
	GIT_ERROR_CHECK_VERSION(
		given_opts, GIT_BLOB_FILTER_OPTIONS_VERSION, "git_blob_filter_options");

434 435 436
	if (git_buf_sanitize(out) < 0)
		return -1;

437 438 439 440 441
	if (given_opts != NULL)
		memcpy(&opts, given_opts, sizeof(git_blob_filter_options));

	if ((opts.flags & GIT_BLOB_FILTER_CHECK_FOR_BINARY) != 0 &&
	    git_blob_is_binary(blob))
442 443
		return 0;

444
	if ((opts.flags & GIT_BLOB_FILTER_NO_SYSTEM_ATTRIBUTES) != 0)
445
		filter_opts.flags |= GIT_FILTER_NO_SYSTEM_ATTRIBUTES;
446

447
	if ((opts.flags & GIT_BLOB_FILTER_ATTRIBUTES_FROM_HEAD) != 0)
448
		filter_opts.flags |= GIT_FILTER_ATTRIBUTES_FROM_HEAD;
449

450 451
	if ((opts.flags & GIT_BLOB_FILTER_ATTRIBUTES_FROM_COMMIT) != 0) {
		filter_opts.flags |= GIT_FILTER_ATTRIBUTES_FROM_COMMIT;
452 453 454 455 456 457 458

#ifndef GIT_DEPRECATE_HARD
		if (opts.commit_id)
			git_oid_cpy(&filter_opts.attr_commit_id, opts.commit_id);
		else
#endif
		git_oid_cpy(&filter_opts.attr_commit_id, &opts.attr_commit_id);
459 460 461
	}

	if (!(error = git_filter_list_load_ext(
462
			&fl, git_blob_owner(blob), blob, path,
463
			GIT_FILTER_TO_WORKTREE, &filter_opts))) {
464

465
		error = git_filter_list_apply_to_blob(out, fl, blob);
466

467
		git_filter_list_free(fl);
468 469 470 471
	}

	return error;
}
472 473 474

/* Deprecated functions */

475
#ifndef GIT_DEPRECATE_HARD
476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505
int git_blob_create_frombuffer(
	git_oid *id, git_repository *repo, const void *buffer, size_t len)
{
	return git_blob_create_from_buffer(id, repo, buffer, len);
}

int git_blob_create_fromworkdir(git_oid *id, git_repository *repo, const char *relative_path)
{
	return git_blob_create_from_workdir(id, repo, relative_path);
}

int git_blob_create_fromdisk(git_oid *id, git_repository *repo, const char *path)
{
	return git_blob_create_from_disk(id, repo, path);
}

int git_blob_create_fromstream(
    git_writestream **out,
    git_repository *repo,
    const char *hintpath)
{
	return  git_blob_create_from_stream(out, repo, hintpath);
}

int git_blob_create_fromstream_commit(
	git_oid *out,
	git_writestream *stream)
{
	return git_blob_create_from_stream_commit(out, stream);
}
506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521

int git_blob_filtered_content(
	git_buf *out,
	git_blob *blob,
	const char *path,
	int check_for_binary_data)
{
	git_blob_filter_options opts = GIT_BLOB_FILTER_OPTIONS_INIT;

	if (check_for_binary_data)
		opts.flags |= GIT_BLOB_FILTER_CHECK_FOR_BINARY;
	else
		opts.flags &= ~GIT_BLOB_FILTER_CHECK_FOR_BINARY;

	return git_blob_filter(out, blob, path, &opts);
}
522
#endif