blob.c 12.4 KB
Newer Older
1
/*
Edward Thomson committed
2
 * Copyright (C) the libgit2 contributors. All rights reserved.
3
 *
Vicent Marti committed
4 5
 * This file is part of libgit2, distributed under the GNU GPL v2 with
 * a Linking Exception. For full terms see the included COPYING file.
6 7
 */

8 9
#include "blob.h"

10 11 12
#include "git2/common.h"
#include "git2/object.h"
#include "git2/repository.h"
13
#include "git2/odb_backend.h"
14

15
#include "buf.h"
16
#include "filebuf.h"
17
#include "filter.h"
18

19
const void *git_blob_rawcontent(const git_blob *blob)
20
{
Edward Thomson committed
21 22
	GIT_ASSERT_ARG_WITH_RETVAL(blob, NULL);

23 24 25 26
	if (blob->raw)
		return blob->data.raw.data;
	else
		return git_odb_object_data(blob->data.odb);
27 28
}

29
git_object_size_t git_blob_rawsize(const git_blob *blob)
30
{
Edward Thomson committed
31 32
	GIT_ASSERT_ARG(blob);

33 34 35
	if (blob->raw)
		return blob->data.raw.size;
	else
36
		return (git_object_size_t)git_odb_object_size(blob->data.odb);
37 38
}

39
int git_blob__getbuf(git_str *buffer, git_blob *blob)
40
{
41
	git_object_size_t size = git_blob_rawsize(blob);
42 43

	GIT_ERROR_CHECK_BLOBSIZE(size);
44
	return git_str_set(buffer, git_blob_rawcontent(blob), (size_t)size);
45 46
}

47
void git_blob__free(void *_blob)
48
{
49 50 51
	git_blob *blob = (git_blob *) _blob;
	if (!blob->raw)
		git_odb_object_free(blob->data.odb);
52
	git__free(blob);
53 54
}

55
int git_blob__parse_raw(void *_blob, const char *data, size_t size)
56
{
57
	git_blob *blob = (git_blob *) _blob;
Edward Thomson committed
58 59 60

	GIT_ASSERT_ARG(blob);

61 62 63 64 65 66 67 68 69
	blob->raw = 1;
	blob->data.raw.data = data;
	blob->data.raw.size = size;
	return 0;
}

int git_blob__parse(void *_blob, git_odb_object *odb_obj)
{
	git_blob *blob = (git_blob *) _blob;
Edward Thomson committed
70 71 72

	GIT_ASSERT_ARG(blob);

Vicent Marti committed
73
	git_cached_obj_incref((git_cached_obj *)odb_obj);
74 75
	blob->raw = 0;
	blob->data.odb = odb_obj;
76
	return 0;
77 78
}

79
int git_blob_create_from_buffer(
80
	git_oid *id, git_repository *repo, const void *buffer, size_t len)
81
{
Vicent Marti committed
82
	int error;
83
	git_odb *odb;
Vicent Marti committed
84
	git_odb_stream *stream;
85

Edward Thomson committed
86 87
	GIT_ASSERT_ARG(id);
	GIT_ASSERT_ARG(repo);
88

89
	if ((error = git_repository_odb__weakptr(&odb, repo)) < 0 ||
90
		(error = git_odb_open_wstream(&stream, odb, len, GIT_OBJECT_BLOB)) < 0)
91 92
		return error;

93
	if ((error = git_odb_stream_write(stream, buffer, len)) == 0)
94
		error = git_odb_stream_finalize_write(id, stream);
95

96
	git_odb_stream_free(stream);
97
	return error;
98 99
}

100
static int write_file_stream(
101
	git_oid *id, git_odb *odb, const char *path, git_object_size_t file_size)
102 103
{
	int fd, error;
104
	char buffer[GIT_BUFSIZE_FILEIO];
105
	git_odb_stream *stream = NULL;
106
	ssize_t read_len = -1;
107
	git_object_size_t written = 0;
108

109
	if ((error = git_odb_open_wstream(
110
			&stream, odb, file_size, GIT_OBJECT_BLOB)) < 0)
111 112
		return error;

113
	if ((fd = git_futils_open_ro(path)) < 0) {
114
		git_odb_stream_free(stream);
115
		return -1;
116 117
	}

Vicent Marti committed
118
	while (!error && (read_len = p_read(fd, buffer, sizeof(buffer))) > 0) {
119
		error = git_odb_stream_write(stream, buffer, read_len);
Vicent Marti committed
120
		written += read_len;
121 122 123 124
	}

	p_close(fd);

Vicent Marti committed
125
	if (written != file_size || read_len < 0) {
126
		git_error_set(GIT_ERROR_OS, "failed to read file into stream");
Vicent Marti committed
127 128 129
		error = -1;
	}

130
	if (!error)
131
		error = git_odb_stream_finalize_write(id, stream);
132

133
	git_odb_stream_free(stream);
134 135 136 137
	return error;
}

static int write_file_filtered(
138
	git_oid *id,
139
	git_object_size_t *size,
140
	git_odb *odb,
141
	const char *full_path,
142 143
	git_filter_list *fl,
	git_repository* repo)
144 145
{
	int error;
146
	git_str tgt = GIT_STR_INIT;
147

148
	error = git_filter_list__apply_to_file(&tgt, fl, repo, full_path);
149

150
	/* Write the file to disk if it was properly filtered */
151
	if (!error) {
152
		*size = tgt.size;
153

154
		error = git_odb_write(id, odb, tgt.ptr, tgt.size, GIT_OBJECT_BLOB);
155
	}
156

157
	git_str_dispose(&tgt);
158
	return error;
159 160
}

161
static int write_symlink(
162
	git_oid *id, git_odb *odb, const char *path, size_t link_size)
163 164 165 166 167 168
{
	char *link_data;
	ssize_t read_len;
	int error;

	link_data = git__malloc(link_size);
169
	GIT_ERROR_CHECK_ALLOC(link_data);
170 171 172

	read_len = p_readlink(path, link_data, link_size);
	if (read_len != (ssize_t)link_size) {
173
		git_error_set(GIT_ERROR_OS, "failed to create blob: cannot read symlink '%s'", path);
174
		git__free(link_data);
175
		return -1;
176 177
	}

178
	error = git_odb_write(id, odb, (void *)link_data, link_size, GIT_OBJECT_BLOB);
179
	git__free(link_data);
180 181 182
	return error;
}

183
int git_blob__create_from_paths(
184
	git_oid *id,
185 186 187 188 189 190
	struct stat *out_st,
	git_repository *repo,
	const char *content_path,
	const char *hint_path,
	mode_t hint_mode,
	bool try_load_filters)
191
{
192
	int error;
193
	struct stat st;
194
	git_odb *odb = NULL;
195
	git_object_size_t size;
196
	mode_t mode;
197
	git_str path = GIT_STR_INIT;
198

Edward Thomson committed
199
	GIT_ASSERT_ARG(hint_path || !try_load_filters);
200

201
	if (!content_path) {
202
		if (git_repository_workdir_path(&path, repo, hint_path) < 0)
203 204 205 206 207
			return -1;

		content_path = path.ptr;
	}

208
	if ((error = git_fs_path_lstat(content_path, &st)) < 0 ||
209 210 211
		(error = git_repository_odb(&odb, repo)) < 0)
		goto done;

212
	if (S_ISDIR(st.st_mode)) {
213
		git_error_set(GIT_ERROR_ODB, "cannot create blob from '%s': it is a directory", content_path);
214 215 216 217
		error = GIT_EDIRECTORY;
		goto done;
	}

218 219
	if (out_st)
		memcpy(out_st, &st, sizeof(st));
220

221
	size = st.st_size;
222
	mode = hint_mode ? hint_mode : st.st_mode;
223

Russell Belfer committed
224
	if (S_ISLNK(mode)) {
225
		error = write_symlink(id, odb, content_path, (size_t)size);
226
	} else {
227
		git_filter_list *fl = NULL;
228

229
		if (try_load_filters)
230
			/* Load the filters for writing this file to the ODB */
231
			error = git_filter_list_load(
232
				&fl, repo, NULL, hint_path,
233
				GIT_FILTER_TO_ODB, GIT_FILTER_DEFAULT);
234

235 236 237
		if (error < 0)
			/* well, that didn't work */;
		else if (fl == NULL)
238 239
			/* No filters need to be applied to the document: we can stream
			 * directly from disk */
240
			error = write_file_stream(id, odb, content_path, size);
241
		else {
242
			/* We need to apply one or more filters */
243
			error = write_file_filtered(id, &size, odb, content_path, fl, repo);
244

245 246
			git_filter_list_free(fl);
		}
247

248
		/*
249 250 251 252 253
		 * TODO: eventually support streaming filtered files, for files
		 * which are bigger than a given threshold. This is not a priority
		 * because applying a filter in streaming mode changes the final
		 * size of the blob, and without knowing its final size, the blob
		 * cannot be written in stream mode to the ODB.
254
		 *
255 256 257
		 * The plan is to do streaming writes to a tempfile on disk and then
		 * opening streaming that file to the ODB, using
		 * `write_file_stream`.
258 259 260
		 *
		 * CAREFULLY DESIGNED APIS YO
		 */
Vicent Marti committed
261 262
	}

263 264
done:
	git_odb_free(odb);
265
	git_str_dispose(&path);
266

267 268 269
	return error;
}

270
int git_blob_create_from_workdir(
271
	git_oid *id, git_repository *repo, const char *path)
272
{
273
	return git_blob__create_from_paths(id, NULL, repo, NULL, path, 0, true);
274 275
}

276
int git_blob_create_from_disk(
277
	git_oid *id, git_repository *repo, const char *path)
278 279
{
	int error;
280
	git_str full_path = GIT_STR_INIT;
281
	const char *workdir, *hintpath = NULL;
282

283
	if ((error = git_fs_path_prettify(&full_path, path, NULL)) < 0) {
284
		git_str_dispose(&full_path);
285 286 287
		return error;
	}

288 289
	workdir  = git_repository_workdir(repo);

290 291
	if (workdir && !git__prefixcmp(full_path.ptr, workdir))
		hintpath = full_path.ptr + strlen(workdir);
292

293
	error = git_blob__create_from_paths(
294
		id, NULL, repo, git_str_cstr(&full_path), hintpath, 0, !!hintpath);
295

296
	git_str_dispose(&full_path);
297 298
	return error;
}
299

300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330
typedef struct {
	git_writestream parent;
	git_filebuf fbuf;
	git_repository *repo;
	char *hintpath;
} blob_writestream;

static int blob_writestream_close(git_writestream *_stream)
{
	blob_writestream *stream = (blob_writestream *) _stream;

	git_filebuf_cleanup(&stream->fbuf);
	return 0;
}

static void blob_writestream_free(git_writestream *_stream)
{
	blob_writestream *stream = (blob_writestream *) _stream;

	git_filebuf_cleanup(&stream->fbuf);
	git__free(stream->hintpath);
	git__free(stream);
}

static int blob_writestream_write(git_writestream *_stream, const char *buffer, size_t len)
{
	blob_writestream *stream = (blob_writestream *) _stream;

	return git_filebuf_write(&stream->fbuf, buffer, len);
}

331
int git_blob_create_from_stream(git_writestream **out, git_repository *repo, const char *hintpath)
332 333
{
	int error;
334
	git_str path = GIT_STR_INIT;
335 336
	blob_writestream *stream;

Edward Thomson committed
337 338
	GIT_ASSERT_ARG(out);
	GIT_ASSERT_ARG(repo);
339 340

	stream = git__calloc(1, sizeof(blob_writestream));
341
	GIT_ERROR_CHECK_ALLOC(stream);
342 343 344

	if (hintpath) {
		stream->hintpath = git__strdup(hintpath);
345
		GIT_ERROR_CHECK_ALLOC(stream->hintpath);
346 347 348 349 350 351 352
	}

	stream->repo = repo;
	stream->parent.write = blob_writestream_write;
	stream->parent.close = blob_writestream_close;
	stream->parent.free  = blob_writestream_free;

353 354
	if ((error = git_repository__item_path(&path, repo, GIT_REPOSITORY_ITEM_OBJECTS)) < 0
		|| (error = git_str_joinpath(&path, path.ptr, "streamed")) < 0)
355 356
		goto cleanup;

357
	if ((error = git_filebuf_open_withsize(&stream->fbuf, git_str_cstr(&path), GIT_FILEBUF_TEMPORARY,
358 359 360 361 362 363 364 365 366
					       0666, 2 * 1024 * 1024)) < 0)
		goto cleanup;

	*out = (git_writestream *) stream;

cleanup:
	if (error < 0)
		blob_writestream_free((git_writestream *) stream);

367
	git_str_dispose(&path);
368 369 370
	return error;
}

371
int git_blob_create_from_stream_commit(git_oid *out, git_writestream *_stream)
372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392
{
	int error;
	blob_writestream *stream = (blob_writestream *) _stream;

	/*
	 * We can make this more officient by avoiding writing to
	 * disk, but for now let's re-use the helper functions we
	 * have.
	 */
	if ((error = git_filebuf_flush(&stream->fbuf)) < 0)
		goto cleanup;

	error = git_blob__create_from_paths(out, NULL, stream->repo, stream->fbuf.path_lock,
					    stream->hintpath, 0, !!stream->hintpath);

cleanup:
	blob_writestream_free(_stream);
	return error;

}

Jacques Germishuys committed
393
int git_blob_is_binary(const git_blob *blob)
394
{
395
	git_str content = GIT_STR_INIT;
396
	git_object_size_t size;
397

Edward Thomson committed
398
	GIT_ASSERT_ARG(blob);
399

400 401
	size = git_blob_rawsize(blob);

402
	git_str_attach_notowned(&content, git_blob_rawcontent(blob),
403
		(size_t)min(size, GIT_FILTER_BYTES_TO_CHECK_NUL));
404
	return git_str_is_binary(&content);
405
}
406

407 408 409 410 411 412 413 414 415
int git_blob_data_is_binary(const char *str, size_t len)
{
	git_str content = GIT_STR_INIT;

	git_str_attach_notowned(&content, str, len);

	return git_str_is_binary(&content);
}

416 417 418 419 420 421 422 423 424
int git_blob_filter_options_init(
	git_blob_filter_options *opts,
	unsigned int version)
{
	GIT_INIT_STRUCTURE_FROM_TEMPLATE(opts, version,
		git_blob_filter_options, GIT_BLOB_FILTER_OPTIONS_INIT);
	return 0;
}

425
int git_blob_filter(
426
	git_buf *out,
427
	git_blob *blob,
Russell Belfer committed
428
	const char *path,
429
	git_blob_filter_options *given_opts)
430
{
431
	git_blob_filter_options opts = GIT_BLOB_FILTER_OPTIONS_INIT;
432
	git_filter_options filter_opts = GIT_FILTER_OPTIONS_INIT;
433 434
	git_filter_list *fl = NULL;
	int error = 0;
435

Edward Thomson committed
436 437 438
	GIT_ASSERT_ARG(blob);
	GIT_ASSERT_ARG(path);
	GIT_ASSERT_ARG(out);
439

440 441 442 443 444 445 446 447
	GIT_ERROR_CHECK_VERSION(
		given_opts, GIT_BLOB_FILTER_OPTIONS_VERSION, "git_blob_filter_options");

	if (given_opts != NULL)
		memcpy(&opts, given_opts, sizeof(git_blob_filter_options));

	if ((opts.flags & GIT_BLOB_FILTER_CHECK_FOR_BINARY) != 0 &&
	    git_blob_is_binary(blob))
448 449
		return 0;

450
	if ((opts.flags & GIT_BLOB_FILTER_NO_SYSTEM_ATTRIBUTES) != 0)
451
		filter_opts.flags |= GIT_FILTER_NO_SYSTEM_ATTRIBUTES;
452

453
	if ((opts.flags & GIT_BLOB_FILTER_ATTRIBUTES_FROM_HEAD) != 0)
454
		filter_opts.flags |= GIT_FILTER_ATTRIBUTES_FROM_HEAD;
455

456 457
	if ((opts.flags & GIT_BLOB_FILTER_ATTRIBUTES_FROM_COMMIT) != 0) {
		filter_opts.flags |= GIT_FILTER_ATTRIBUTES_FROM_COMMIT;
458 459 460 461 462 463 464

#ifndef GIT_DEPRECATE_HARD
		if (opts.commit_id)
			git_oid_cpy(&filter_opts.attr_commit_id, opts.commit_id);
		else
#endif
		git_oid_cpy(&filter_opts.attr_commit_id, &opts.attr_commit_id);
465 466 467
	}

	if (!(error = git_filter_list_load_ext(
468
			&fl, git_blob_owner(blob), blob, path,
469
			GIT_FILTER_TO_WORKTREE, &filter_opts))) {
470

471
		error = git_filter_list_apply_to_blob(out, fl, blob);
472

473
		git_filter_list_free(fl);
474 475 476 477
	}

	return error;
}
478 479 480

/* Deprecated functions */

481
#ifndef GIT_DEPRECATE_HARD
482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511
int git_blob_create_frombuffer(
	git_oid *id, git_repository *repo, const void *buffer, size_t len)
{
	return git_blob_create_from_buffer(id, repo, buffer, len);
}

int git_blob_create_fromworkdir(git_oid *id, git_repository *repo, const char *relative_path)
{
	return git_blob_create_from_workdir(id, repo, relative_path);
}

int git_blob_create_fromdisk(git_oid *id, git_repository *repo, const char *path)
{
	return git_blob_create_from_disk(id, repo, path);
}

int git_blob_create_fromstream(
    git_writestream **out,
    git_repository *repo,
    const char *hintpath)
{
	return  git_blob_create_from_stream(out, repo, hintpath);
}

int git_blob_create_fromstream_commit(
	git_oid *out,
	git_writestream *stream)
{
	return git_blob_create_from_stream_commit(out, stream);
}
512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527

int git_blob_filtered_content(
	git_buf *out,
	git_blob *blob,
	const char *path,
	int check_for_binary_data)
{
	git_blob_filter_options opts = GIT_BLOB_FILTER_OPTIONS_INIT;

	if (check_for_binary_data)
		opts.flags |= GIT_BLOB_FILTER_CHECK_FOR_BINARY;
	else
		opts.flags &= ~GIT_BLOB_FILTER_CHECK_FOR_BINARY;

	return git_blob_filter(out, blob, path, &opts);
}
528
#endif