blob.c 12.1 KB
Newer Older
1
/*
Edward Thomson committed
2
 * Copyright (C) the libgit2 contributors. All rights reserved.
3
 *
Vicent Marti committed
4 5
 * This file is part of libgit2, distributed under the GNU GPL v2 with
 * a Linking Exception. For full terms see the included COPYING file.
6 7
 */

8 9
#include "blob.h"

10 11 12
#include "git2/common.h"
#include "git2/object.h"
#include "git2/repository.h"
13
#include "git2/odb_backend.h"
14

15
#include "filebuf.h"
16
#include "filter.h"
17

18
const void *git_blob_rawcontent(const git_blob *blob)
19
{
Edward Thomson committed
20 21
	GIT_ASSERT_ARG_WITH_RETVAL(blob, NULL);

22 23 24 25
	if (blob->raw)
		return blob->data.raw.data;
	else
		return git_odb_object_data(blob->data.odb);
26 27
}

28
git_object_size_t git_blob_rawsize(const git_blob *blob)
29
{
Edward Thomson committed
30 31
	GIT_ASSERT_ARG(blob);

32 33 34
	if (blob->raw)
		return blob->data.raw.size;
	else
35
		return (git_object_size_t)git_odb_object_size(blob->data.odb);
36 37
}

38 39
int git_blob__getbuf(git_buf *buffer, git_blob *blob)
{
40
	git_object_size_t size = git_blob_rawsize(blob);
41 42 43

	GIT_ERROR_CHECK_BLOBSIZE(size);
	return git_buf_set(buffer, git_blob_rawcontent(blob), (size_t)size);
44 45
}

46
void git_blob__free(void *_blob)
47
{
48 49 50
	git_blob *blob = (git_blob *) _blob;
	if (!blob->raw)
		git_odb_object_free(blob->data.odb);
51
	git__free(blob);
52 53
}

54
int git_blob__parse_raw(void *_blob, const char *data, size_t size)
55
{
56
	git_blob *blob = (git_blob *) _blob;
Edward Thomson committed
57 58 59

	GIT_ASSERT_ARG(blob);

60 61 62 63 64 65 66 67 68
	blob->raw = 1;
	blob->data.raw.data = data;
	blob->data.raw.size = size;
	return 0;
}

int git_blob__parse(void *_blob, git_odb_object *odb_obj)
{
	git_blob *blob = (git_blob *) _blob;
Edward Thomson committed
69 70 71

	GIT_ASSERT_ARG(blob);

Vicent Marti committed
72
	git_cached_obj_incref((git_cached_obj *)odb_obj);
73 74
	blob->raw = 0;
	blob->data.odb = odb_obj;
75
	return 0;
76 77
}

78
int git_blob_create_from_buffer(
79
	git_oid *id, git_repository *repo, const void *buffer, size_t len)
80
{
Vicent Marti committed
81
	int error;
82
	git_odb *odb;
Vicent Marti committed
83
	git_odb_stream *stream;
84

Edward Thomson committed
85 86
	GIT_ASSERT_ARG(id);
	GIT_ASSERT_ARG(repo);
87

88
	if ((error = git_repository_odb__weakptr(&odb, repo)) < 0 ||
89
		(error = git_odb_open_wstream(&stream, odb, len, GIT_OBJECT_BLOB)) < 0)
90 91
		return error;

92
	if ((error = git_odb_stream_write(stream, buffer, len)) == 0)
93
		error = git_odb_stream_finalize_write(id, stream);
94

95
	git_odb_stream_free(stream);
96
	return error;
97 98
}

99
static int write_file_stream(
100
	git_oid *id, git_odb *odb, const char *path, git_object_size_t file_size)
101 102
{
	int fd, error;
103
	char buffer[FILEIO_BUFSIZE];
104
	git_odb_stream *stream = NULL;
105
	ssize_t read_len = -1;
106
	git_object_size_t written = 0;
107

108
	if ((error = git_odb_open_wstream(
109
			&stream, odb, file_size, GIT_OBJECT_BLOB)) < 0)
110 111
		return error;

112
	if ((fd = git_futils_open_ro(path)) < 0) {
113
		git_odb_stream_free(stream);
114
		return -1;
115 116
	}

Vicent Marti committed
117
	while (!error && (read_len = p_read(fd, buffer, sizeof(buffer))) > 0) {
118
		error = git_odb_stream_write(stream, buffer, read_len);
Vicent Marti committed
119
		written += read_len;
120 121 122 123
	}

	p_close(fd);

Vicent Marti committed
124
	if (written != file_size || read_len < 0) {
125
		git_error_set(GIT_ERROR_OS, "failed to read file into stream");
Vicent Marti committed
126 127 128
		error = -1;
	}

129
	if (!error)
130
		error = git_odb_stream_finalize_write(id, stream);
131

132
	git_odb_stream_free(stream);
133 134 135 136
	return error;
}

static int write_file_filtered(
137
	git_oid *id,
138
	git_object_size_t *size,
139
	git_odb *odb,
140
	const char *full_path,
141
	git_filter_list *fl)
142 143
{
	int error;
144
	git_buf tgt = GIT_BUF_INIT;
145

146
	error = git_filter_list_apply_to_file(&tgt, fl, NULL, full_path);
147

148
	/* Write the file to disk if it was properly filtered */
149
	if (!error) {
150
		*size = tgt.size;
151

152
		error = git_odb_write(id, odb, tgt.ptr, tgt.size, GIT_OBJECT_BLOB);
153
	}
154

155
	git_buf_dispose(&tgt);
156
	return error;
157 158
}

159
static int write_symlink(
160
	git_oid *id, git_odb *odb, const char *path, size_t link_size)
161 162 163 164 165 166
{
	char *link_data;
	ssize_t read_len;
	int error;

	link_data = git__malloc(link_size);
167
	GIT_ERROR_CHECK_ALLOC(link_data);
168 169 170

	read_len = p_readlink(path, link_data, link_size);
	if (read_len != (ssize_t)link_size) {
171
		git_error_set(GIT_ERROR_OS, "failed to create blob: cannot read symlink '%s'", path);
172
		git__free(link_data);
173
		return -1;
174 175
	}

176
	error = git_odb_write(id, odb, (void *)link_data, link_size, GIT_OBJECT_BLOB);
177
	git__free(link_data);
178 179 180
	return error;
}

181
int git_blob__create_from_paths(
182
	git_oid *id,
183 184 185 186 187 188
	struct stat *out_st,
	git_repository *repo,
	const char *content_path,
	const char *hint_path,
	mode_t hint_mode,
	bool try_load_filters)
189
{
190
	int error;
191
	struct stat st;
192
	git_odb *odb = NULL;
193
	git_object_size_t size;
194 195
	mode_t mode;
	git_buf path = GIT_BUF_INIT;
196

Edward Thomson committed
197
	GIT_ASSERT_ARG(hint_path || !try_load_filters);
198

199
	if (!content_path) {
200
		if (git_repository_workdir_path(&path, repo, hint_path) < 0)
201 202 203 204 205 206 207 208 209
			return -1;

		content_path = path.ptr;
	}

	if ((error = git_path_lstat(content_path, &st)) < 0 ||
		(error = git_repository_odb(&odb, repo)) < 0)
		goto done;

210
	if (S_ISDIR(st.st_mode)) {
211
		git_error_set(GIT_ERROR_ODB, "cannot create blob from '%s': it is a directory", content_path);
212 213 214 215
		error = GIT_EDIRECTORY;
		goto done;
	}

216 217
	if (out_st)
		memcpy(out_st, &st, sizeof(st));
218

219
	size = st.st_size;
220
	mode = hint_mode ? hint_mode : st.st_mode;
221

Russell Belfer committed
222
	if (S_ISLNK(mode)) {
223
		error = write_symlink(id, odb, content_path, (size_t)size);
224
	} else {
225
		git_filter_list *fl = NULL;
226

227
		if (try_load_filters)
228
			/* Load the filters for writing this file to the ODB */
229
			error = git_filter_list_load(
230
				&fl, repo, NULL, hint_path,
231
				GIT_FILTER_TO_ODB, GIT_FILTER_DEFAULT);
232

233 234 235
		if (error < 0)
			/* well, that didn't work */;
		else if (fl == NULL)
236 237
			/* No filters need to be applied to the document: we can stream
			 * directly from disk */
238
			error = write_file_stream(id, odb, content_path, size);
239
		else {
240
			/* We need to apply one or more filters */
241
			error = write_file_filtered(id, &size, odb, content_path, fl);
242

243 244
			git_filter_list_free(fl);
		}
245

246
		/*
247 248 249 250 251
		 * TODO: eventually support streaming filtered files, for files
		 * which are bigger than a given threshold. This is not a priority
		 * because applying a filter in streaming mode changes the final
		 * size of the blob, and without knowing its final size, the blob
		 * cannot be written in stream mode to the ODB.
252
		 *
253 254 255
		 * The plan is to do streaming writes to a tempfile on disk and then
		 * opening streaming that file to the ODB, using
		 * `write_file_stream`.
256 257 258
		 *
		 * CAREFULLY DESIGNED APIS YO
		 */
Vicent Marti committed
259 260
	}

261 262
done:
	git_odb_free(odb);
263
	git_buf_dispose(&path);
264

265 266 267
	return error;
}

268
int git_blob_create_from_workdir(
269
	git_oid *id, git_repository *repo, const char *path)
270
{
271
	return git_blob__create_from_paths(id, NULL, repo, NULL, path, 0, true);
272 273
}

274
int git_blob_create_from_disk(
275
	git_oid *id, git_repository *repo, const char *path)
276 277 278
{
	int error;
	git_buf full_path = GIT_BUF_INIT;
279
	const char *workdir, *hintpath;
280 281

	if ((error = git_path_prettify(&full_path, path, NULL)) < 0) {
282
		git_buf_dispose(&full_path);
283 284 285
		return error;
	}

286 287 288 289 290 291
	hintpath = git_buf_cstr(&full_path);
	workdir  = git_repository_workdir(repo);

	if (workdir && !git__prefixcmp(hintpath, workdir))
		hintpath += strlen(workdir);

292
	error = git_blob__create_from_paths(
293
		id, NULL, repo, git_buf_cstr(&full_path), hintpath, 0, true);
294

295
	git_buf_dispose(&full_path);
296 297
	return error;
}
298

299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329
typedef struct {
	git_writestream parent;
	git_filebuf fbuf;
	git_repository *repo;
	char *hintpath;
} blob_writestream;

static int blob_writestream_close(git_writestream *_stream)
{
	blob_writestream *stream = (blob_writestream *) _stream;

	git_filebuf_cleanup(&stream->fbuf);
	return 0;
}

static void blob_writestream_free(git_writestream *_stream)
{
	blob_writestream *stream = (blob_writestream *) _stream;

	git_filebuf_cleanup(&stream->fbuf);
	git__free(stream->hintpath);
	git__free(stream);
}

static int blob_writestream_write(git_writestream *_stream, const char *buffer, size_t len)
{
	blob_writestream *stream = (blob_writestream *) _stream;

	return git_filebuf_write(&stream->fbuf, buffer, len);
}

330
int git_blob_create_from_stream(git_writestream **out, git_repository *repo, const char *hintpath)
331 332 333 334 335
{
	int error;
	git_buf path = GIT_BUF_INIT;
	blob_writestream *stream;

Edward Thomson committed
336 337
	GIT_ASSERT_ARG(out);
	GIT_ASSERT_ARG(repo);
338 339

	stream = git__calloc(1, sizeof(blob_writestream));
340
	GIT_ERROR_CHECK_ALLOC(stream);
341 342 343

	if (hintpath) {
		stream->hintpath = git__strdup(hintpath);
344
		GIT_ERROR_CHECK_ALLOC(stream->hintpath);
345 346 347 348 349 350 351
	}

	stream->repo = repo;
	stream->parent.write = blob_writestream_write;
	stream->parent.close = blob_writestream_close;
	stream->parent.free  = blob_writestream_free;

352 353
	if ((error = git_repository_item_path(&path, repo, GIT_REPOSITORY_ITEM_OBJECTS)) < 0
		|| (error = git_buf_joinpath(&path, path.ptr, "streamed")) < 0)
354 355 356 357 358 359 360 361 362 363 364 365
		goto cleanup;

	if ((error = git_filebuf_open_withsize(&stream->fbuf, git_buf_cstr(&path), GIT_FILEBUF_TEMPORARY,
					       0666, 2 * 1024 * 1024)) < 0)
		goto cleanup;

	*out = (git_writestream *) stream;

cleanup:
	if (error < 0)
		blob_writestream_free((git_writestream *) stream);

366
	git_buf_dispose(&path);
367 368 369
	return error;
}

370
int git_blob_create_from_stream_commit(git_oid *out, git_writestream *_stream)
371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391
{
	int error;
	blob_writestream *stream = (blob_writestream *) _stream;

	/*
	 * We can make this more officient by avoiding writing to
	 * disk, but for now let's re-use the helper functions we
	 * have.
	 */
	if ((error = git_filebuf_flush(&stream->fbuf)) < 0)
		goto cleanup;

	error = git_blob__create_from_paths(out, NULL, stream->repo, stream->fbuf.path_lock,
					    stream->hintpath, 0, !!stream->hintpath);

cleanup:
	blob_writestream_free(_stream);
	return error;

}

Jacques Germishuys committed
392
int git_blob_is_binary(const git_blob *blob)
393
{
394
	git_buf content = GIT_BUF_INIT;
395
	git_object_size_t size;
396

Edward Thomson committed
397
	GIT_ASSERT_ARG(blob);
398

399 400
	size = git_blob_rawsize(blob);

401
	git_buf_attach_notowned(&content, git_blob_rawcontent(blob),
402
		(size_t)min(size, GIT_FILTER_BYTES_TO_CHECK_NUL));
403
	return git_buf_is_binary(&content);
404
}
405

406 407 408 409 410 411 412 413 414
int git_blob_filter_options_init(
	git_blob_filter_options *opts,
	unsigned int version)
{
	GIT_INIT_STRUCTURE_FROM_TEMPLATE(opts, version,
		git_blob_filter_options, GIT_BLOB_FILTER_OPTIONS_INIT);
	return 0;
}

415
int git_blob_filter(
416
	git_buf *out,
417
	git_blob *blob,
Russell Belfer committed
418
	const char *path,
419
	git_blob_filter_options *given_opts)
420
{
421 422
	int error = 0;
	git_filter_list *fl = NULL;
423
	git_blob_filter_options opts = GIT_BLOB_FILTER_OPTIONS_INIT;
424
	git_filter_options filter_opts = GIT_FILTER_OPTIONS_INIT;
425

Edward Thomson committed
426 427 428
	GIT_ASSERT_ARG(blob);
	GIT_ASSERT_ARG(path);
	GIT_ASSERT_ARG(out);
429

430 431 432
	GIT_ERROR_CHECK_VERSION(
		given_opts, GIT_BLOB_FILTER_OPTIONS_VERSION, "git_blob_filter_options");

433 434 435
	if (git_buf_sanitize(out) < 0)
		return -1;

436 437 438 439 440
	if (given_opts != NULL)
		memcpy(&opts, given_opts, sizeof(git_blob_filter_options));

	if ((opts.flags & GIT_BLOB_FILTER_CHECK_FOR_BINARY) != 0 &&
	    git_blob_is_binary(blob))
441 442
		return 0;

443
	if ((opts.flags & GIT_BLOB_FILTER_NO_SYSTEM_ATTRIBUTES) != 0)
444
		filter_opts.flags |= GIT_FILTER_NO_SYSTEM_ATTRIBUTES;
445

446
	if ((opts.flags & GIT_BLOB_FILTER_ATTRIBUTES_FROM_HEAD) != 0)
447
		filter_opts.flags |= GIT_FILTER_ATTRIBUTES_FROM_HEAD;
448

449 450 451 452 453 454
	if ((opts.flags & GIT_BLOB_FILTER_ATTRIBUTES_FROM_COMMIT) != 0) {
		filter_opts.flags |= GIT_FILTER_ATTRIBUTES_FROM_COMMIT;
		filter_opts.commit_id = opts.commit_id;
	}

	if (!(error = git_filter_list_load_ext(
455
			&fl, git_blob_owner(blob), blob, path,
456
			GIT_FILTER_TO_WORKTREE, &filter_opts))) {
457

458
		error = git_filter_list_apply_to_blob(out, fl, blob);
459

460
		git_filter_list_free(fl);
461 462 463 464
	}

	return error;
}
465 466 467

/* Deprecated functions */

468
#ifndef GIT_DEPRECATE_HARD
469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498
int git_blob_create_frombuffer(
	git_oid *id, git_repository *repo, const void *buffer, size_t len)
{
	return git_blob_create_from_buffer(id, repo, buffer, len);
}

int git_blob_create_fromworkdir(git_oid *id, git_repository *repo, const char *relative_path)
{
	return git_blob_create_from_workdir(id, repo, relative_path);
}

int git_blob_create_fromdisk(git_oid *id, git_repository *repo, const char *path)
{
	return git_blob_create_from_disk(id, repo, path);
}

int git_blob_create_fromstream(
    git_writestream **out,
    git_repository *repo,
    const char *hintpath)
{
	return  git_blob_create_from_stream(out, repo, hintpath);
}

int git_blob_create_fromstream_commit(
	git_oid *out,
	git_writestream *stream)
{
	return git_blob_create_from_stream_commit(out, stream);
}
499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514

int git_blob_filtered_content(
	git_buf *out,
	git_blob *blob,
	const char *path,
	int check_for_binary_data)
{
	git_blob_filter_options opts = GIT_BLOB_FILTER_OPTIONS_INIT;

	if (check_for_binary_data)
		opts.flags |= GIT_BLOB_FILTER_CHECK_FOR_BINARY;
	else
		opts.flags &= ~GIT_BLOB_FILTER_CHECK_FOR_BINARY;

	return git_blob_filter(out, blob, path, &opts);
}
515
#endif