diff_file.c 11.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10
/*
 * Copyright (C) the libgit2 contributors. All rights reserved.
 *
 * This file is part of libgit2, distributed under the GNU GPL v2 with
 * a Linking Exception. For full terms see the included COPYING file.
 */
#include "common.h"
#include "git2/blob.h"
#include "git2/submodule.h"
#include "diff.h"
11
#include "diff_generate.h"
12 13 14 15 16 17 18 19 20 21
#include "diff_file.h"
#include "odb.h"
#include "fileops.h"
#include "filter.h"

#define DIFF_MAX_FILESIZE 0x20000000

static bool diff_file_content_binary_by_size(git_diff_file_content *fc)
{
	/* if we have diff opts, check max_size vs file size */
22
	if ((fc->file->flags & DIFF_FLAGS_KNOWN_BINARY) == 0 &&
23
		fc->opts_max_size > 0 &&
24 25
		fc->file->size > fc->opts_max_size)
		fc->file->flags |= GIT_DIFF_FLAG_BINARY;
26

27
	return ((fc->file->flags & GIT_DIFF_FLAG_BINARY) != 0);
28 29 30 31
}

static void diff_file_content_binary_by_content(git_diff_file_content *fc)
{
32
	if ((fc->file->flags & DIFF_FLAGS_KNOWN_BINARY) != 0)
33 34 35 36
		return;

	switch (git_diff_driver_content_is_binary(
		fc->driver, fc->map.data, fc->map.len)) {
37 38
	case 0: fc->file->flags |= GIT_DIFF_FLAG_NOT_BINARY; break;
	case 1: fc->file->flags |= GIT_DIFF_FLAG_BINARY; break;
39 40 41 42
	default: break;
	}
}

43 44
static int diff_file_content_init_common(
	git_diff_file_content *fc, const git_diff_options *opts)
45
{
46 47 48 49 50
	fc->opts_flags = opts ? opts->flags : GIT_DIFF_NORMAL;

	if (opts && opts->max_size >= 0)
		fc->opts_max_size = opts->max_size ?
			opts->max_size : DIFF_MAX_FILESIZE;
51

52
	if (fc->src == GIT_ITERATOR_TYPE_EMPTY)
53
		fc->src = GIT_ITERATOR_TYPE_TREE;
54 55 56 57

	if (!fc->driver &&
		git_diff_driver_lookup(&fc->driver, fc->repo, fc->file->path) < 0)
		return -1;
58

59 60 61
	/* give driver a chance to modify options */
	git_diff_driver_update_options(&fc->opts_flags, fc->driver);

62
	/* make sure file is conceivable mmap-able */
63 64
	if ((git_off_t)((size_t)fc->file->size) != fc->file->size)
		fc->file->flags |= GIT_DIFF_FLAG_BINARY;
65 66
	/* check if user is forcing text diff the file */
	else if (fc->opts_flags & GIT_DIFF_FORCE_TEXT) {
67 68
		fc->file->flags &= ~GIT_DIFF_FLAG_BINARY;
		fc->file->flags |= GIT_DIFF_FLAG_NOT_BINARY;
69 70 71
	}
	/* check if user is forcing binary diff the file */
	else if (fc->opts_flags & GIT_DIFF_FORCE_BINARY) {
72 73
		fc->file->flags &= ~GIT_DIFF_FLAG_NOT_BINARY;
		fc->file->flags |= GIT_DIFF_FLAG_BINARY;
74
	}
75 76 77

	diff_file_content_binary_by_size(fc);

78 79
	if ((fc->flags & GIT_DIFF_FLAG__NO_DATA) != 0) {
		fc->flags |= GIT_DIFF_FLAG__LOADED;
80 81 82 83
		fc->map.len  = 0;
		fc->map.data = "";
	}

84
	if ((fc->flags & GIT_DIFF_FLAG__LOADED) != 0)
85 86 87 88 89
		diff_file_content_binary_by_content(fc);

	return 0;
}

90
int git_diff_file_content__init_from_diff(
91
	git_diff_file_content *fc,
92
	git_diff *diff,
93
	git_diff_delta *delta,
94 95 96 97 98 99
	bool use_old)
{
	bool has_data = true;

	memset(fc, 0, sizeof(*fc));
	fc->repo = diff->repo;
100
	fc->file = use_old ? &delta->old_file : &delta->new_file;
101 102
	fc->src  = use_old ? diff->old_src : diff->new_src;

103
	if (git_diff_driver_lookup(&fc->driver, fc->repo, fc->file->path) < 0)
104 105 106 107 108 109 110 111 112
		return -1;

	switch (delta->status) {
	case GIT_DELTA_ADDED:
		has_data = !use_old; break;
	case GIT_DELTA_DELETED:
		has_data = use_old; break;
	case GIT_DELTA_UNTRACKED:
		has_data = !use_old &&
Russell Belfer committed
113
			(diff->opts.flags & GIT_DIFF_SHOW_UNTRACKED_CONTENT) != 0;
114
		break;
115
	case GIT_DELTA_UNREADABLE:
116 117 118 119 120 121 122 123 124 125
	case GIT_DELTA_MODIFIED:
	case GIT_DELTA_COPIED:
	case GIT_DELTA_RENAMED:
		break;
	default:
		has_data = false;
		break;
	}

	if (!has_data)
126
		fc->flags |= GIT_DIFF_FLAG__NO_DATA;
127

128
	return diff_file_content_init_common(fc, &diff->opts);
129 130
}

131
int git_diff_file_content__init_from_src(
132 133 134
	git_diff_file_content *fc,
	git_repository *repo,
	const git_diff_options *opts,
135
	const git_diff_file_content_src *src,
136
	git_diff_file *as_file)
137 138 139
{
	memset(fc, 0, sizeof(*fc));
	fc->repo = repo;
140
	fc->file = as_file;
141
	fc->blob = src->blob;
142

143
	if (!src->blob && !src->buf) {
144
		fc->flags |= GIT_DIFF_FLAG__NO_DATA;
145
	} else {
146
		fc->flags |= GIT_DIFF_FLAG__LOADED;
147
		fc->file->flags |= GIT_DIFF_FLAG_VALID_ID;
148
		fc->file->mode = GIT_FILEMODE_BLOB;
149

150 151 152
		if (src->blob) {
			fc->file->size = git_blob_rawsize(src->blob);
			git_oid_cpy(&fc->file->id, git_blob_id(src->blob));
153
			fc->file->id_abbrev = GIT_OID_HEXSZ;
154

155 156 157 158 159
			fc->map.len  = (size_t)fc->file->size;
			fc->map.data = (char *)git_blob_rawcontent(src->blob);
		} else {
			fc->file->size = src->buflen;
			git_odb_hash(&fc->file->id, src->buf, src->buflen, GIT_OBJ_BLOB);
160
			fc->file->id_abbrev = GIT_OID_HEXSZ;
161

162 163 164
			fc->map.len  = src->buflen;
			fc->map.data = (char *)src->buf;
		}
165 166
	}

167
	return diff_file_content_init_common(fc, opts);
168 169 170 171 172 173 174 175 176 177 178 179 180 181 182
}

static int diff_file_content_commit_to_str(
	git_diff_file_content *fc, bool check_status)
{
	char oid[GIT_OID_HEXSZ+1];
	git_buf content = GIT_BUF_INIT;
	const char *status = "";

	if (check_status) {
		int error = 0;
		git_submodule *sm = NULL;
		unsigned int sm_status = 0;
		const git_oid *sm_head;

183
		if ((error = git_submodule_lookup(&sm, fc->repo, fc->file->path)) < 0) {
184
			/* GIT_EEXISTS means a "submodule" that has not been git added */
185 186
			if (error == GIT_EEXISTS) {
				giterr_clear();
187
				error = 0;
188 189 190 191
			}
			return error;
		}

192
		if ((error = git_submodule_status(&sm_status, fc->repo, fc->file->path, GIT_SUBMODULE_IGNORE_UNSPECIFIED)) < 0) {
193
			git_submodule_free(sm);
194 195 196 197
			return error;
		}

		/* update OID if we didn't have it previously */
198
		if ((fc->file->flags & GIT_DIFF_FLAG_VALID_ID) == 0 &&
199 200 201
			((sm_head = git_submodule_wd_id(sm)) != NULL ||
			 (sm_head = git_submodule_head_id(sm)) != NULL))
		{
202 203
			git_oid_cpy(&fc->file->id, sm_head);
			fc->file->flags |= GIT_DIFF_FLAG_VALID_ID;
204 205 206 207
		}

		if (GIT_SUBMODULE_STATUS_IS_WD_DIRTY(sm_status))
			status = "-dirty";
208 209

		git_submodule_free(sm);
210 211
	}

212
	git_oid_tostr(oid, sizeof(oid), &fc->file->id);
213 214 215 216 217
	if (git_buf_printf(&content, "Subproject commit %s%s\n", oid, status) < 0)
		return -1;

	fc->map.len  = git_buf_len(&content);
	fc->map.data = git_buf_detach(&content);
218
	fc->flags |= GIT_DIFF_FLAG__FREE_DATA;
219 220 221 222

	return 0;
}

223 224 225
static int diff_file_content_load_blob(
	git_diff_file_content *fc,
	git_diff_options *opts)
226 227 228 229
{
	int error = 0;
	git_odb_object *odb_obj = NULL;

230
	if (git_oid_iszero(&fc->file->id))
231 232
		return 0;

233
	if (fc->file->mode == GIT_FILEMODE_COMMIT)
234 235 236
		return diff_file_content_commit_to_str(fc, false);

	/* if we don't know size, try to peek at object header first */
237
	if (!fc->file->size) {
238 239
		if ((error = git_diff_file__resolve_zero_size(
				fc->file, &odb_obj, fc->repo)) < 0)
240 241 242
			return error;
	}

243 244
	if ((opts->flags & GIT_DIFF_SHOW_BINARY) == 0 &&
		diff_file_content_binary_by_size(fc))
245 246 247 248 249 250 251 252
		return 0;

	if (odb_obj != NULL) {
		error = git_object__from_odb_object(
			(git_object **)&fc->blob, fc->repo, odb_obj, GIT_OBJ_BLOB);
		git_odb_object_free(odb_obj);
	} else {
		error = git_blob_lookup(
253
			(git_blob **)&fc->blob, fc->repo, &fc->file->id);
254 255 256
	}

	if (!error) {
257
		fc->flags |= GIT_DIFF_FLAG__FREE_BLOB;
258 259 260 261 262 263 264
		fc->map.data = (void *)git_blob_rawcontent(fc->blob);
		fc->map.len  = (size_t)git_blob_rawsize(fc->blob);
	}

	return error;
}

265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281
static int diff_file_content_load_workdir_symlink_fake(
	git_diff_file_content *fc, git_buf *path)
{
	git_buf target = GIT_BUF_INIT;
	int error;

	if ((error = git_futils_readbuffer(&target, path->ptr)) < 0)
		return error;

	fc->map.len = git_buf_len(&target);
	fc->map.data = git_buf_detach(&target);
	fc->flags |= GIT_DIFF_FLAG__FREE_DATA;

	git_buf_free(&target);
	return error;
}

282 283 284 285
static int diff_file_content_load_workdir_symlink(
	git_diff_file_content *fc, git_buf *path)
{
	ssize_t alloc_len, read_len;
286 287 288 289 290 291 292 293
	int symlink_supported, error;

	if ((error = git_repository__cvar(
		&symlink_supported, fc->repo, GIT_CVAR_SYMLINKS)) < 0)
		return -1;

	if (!symlink_supported)
		return diff_file_content_load_workdir_symlink_fake(fc, path);
294 295 296 297

	/* link path on disk could be UTF-16, so prepare a buffer that is
	 * big enough to handle some UTF-8 data expansion
	 */
298
	alloc_len = (ssize_t)(fc->file->size * 2) + 1;
299 300 301 302

	fc->map.data = git__calloc(alloc_len, sizeof(char));
	GITERR_CHECK_ALLOC(fc->map.data);

303
	fc->flags |= GIT_DIFF_FLAG__FREE_DATA;
304 305 306

	read_len = p_readlink(git_buf_cstr(path), fc->map.data, alloc_len);
	if (read_len < 0) {
307
		giterr_set(GITERR_OS, "failed to read symlink '%s'", fc->file->path);
308 309 310 311 312 313 314 315
		return -1;
	}

	fc->map.len = read_len;
	return 0;
}

static int diff_file_content_load_workdir_file(
316 317 318
	git_diff_file_content *fc,
	git_buf *path,
	git_diff_options *diff_opts)
319 320
{
	int error = 0;
321
	git_filter_list *fl = NULL;
322
	git_file fd = git_futils_open_ro(git_buf_cstr(path));
323
	git_buf raw = GIT_BUF_INIT;
324 325 326 327

	if (fd < 0)
		return fd;

328 329
	if (!fc->file->size &&
		!(fc->file->size = git_futils_filesize(fd)))
330 331
		goto cleanup;

332 333
	if ((diff_opts->flags & GIT_DIFF_SHOW_BINARY) == 0 &&
		diff_file_content_binary_by_size(fc))
334 335
		goto cleanup;

336
	if ((error = git_filter_list_load(
337
			&fl, fc->repo, NULL, fc->file->path,
338
			GIT_FILTER_TO_ODB, GIT_FILTER_ALLOW_UNSAFE)) < 0)
339 340
		goto cleanup;

341 342
	/* if there are no filters, try to mmap the file */
	if (fl == NULL) {
343
		if (!(error = git_futils_mmap_ro(
344
				&fc->map, fd, 0, (size_t)fc->file->size))) {
345
			fc->flags |= GIT_DIFF_FLAG__UNMAP_DATA;
346 347 348
			goto cleanup;
		}

349 350
		/* if mmap failed, fall through to try readbuffer below */
		giterr_clear();
351 352
	}

353
	if (!(error = git_futils_readbuffer_fd(&raw, fd, (size_t)fc->file->size))) {
354
		git_buf out = GIT_BUF_INIT;
355

356
		error = git_filter_list_apply_to_data(&out, fl, &raw);
357

358 359
		if (out.ptr != raw.ptr)
			git_buf_free(&raw);
360

361 362 363 364 365 366
		if (!error) {
			fc->map.len  = out.size;
			fc->map.data = out.ptr;
			fc->flags |= GIT_DIFF_FLAG__FREE_DATA;
		}
	}
367

368
cleanup:
369
	git_filter_list_free(fl);
370 371 372 373 374
	p_close(fd);

	return error;
}

375 376 377
static int diff_file_content_load_workdir(
	git_diff_file_content *fc,
	git_diff_options *diff_opts)
378 379 380 381
{
	int error = 0;
	git_buf path = GIT_BUF_INIT;

382
	if (fc->file->mode == GIT_FILEMODE_COMMIT)
383 384
		return diff_file_content_commit_to_str(fc, true);

385
	if (fc->file->mode == GIT_FILEMODE_TREE)
386 387 388
		return 0;

	if (git_buf_joinpath(
389
			&path, git_repository_workdir(fc->repo), fc->file->path) < 0)
390 391
		return -1;

392
	if (S_ISLNK(fc->file->mode))
393 394
		error = diff_file_content_load_workdir_symlink(fc, &path);
	else
395
		error = diff_file_content_load_workdir_file(fc, &path, diff_opts);
396 397

	/* once data is loaded, update OID if we didn't have it previously */
398
	if (!error && (fc->file->flags & GIT_DIFF_FLAG_VALID_ID) == 0) {
399
		error = git_odb_hash(
400 401
			&fc->file->id, fc->map.data, fc->map.len, GIT_OBJ_BLOB);
		fc->file->flags |= GIT_DIFF_FLAG_VALID_ID;
402 403 404 405 406 407
	}

	git_buf_free(&path);
	return error;
}

408 409 410
int git_diff_file_content__load(
	git_diff_file_content *fc,
	git_diff_options *diff_opts)
411 412 413
{
	int error = 0;

414
	if ((fc->flags & GIT_DIFF_FLAG__LOADED) != 0)
415 416
		return 0;

417 418
	if ((fc->file->flags & GIT_DIFF_FLAG_BINARY) != 0 &&
		(diff_opts->flags & GIT_DIFF_SHOW_BINARY) == 0)
419 420 421
		return 0;

	if (fc->src == GIT_ITERATOR_TYPE_WORKDIR)
422
		error = diff_file_content_load_workdir(fc, diff_opts);
423
	else
424
		error = diff_file_content_load_blob(fc, diff_opts);
425 426 427
	if (error)
		return error;

428
	fc->flags |= GIT_DIFF_FLAG__LOADED;
429 430 431 432 433 434

	diff_file_content_binary_by_content(fc);

	return 0;
}

435
void git_diff_file_content__unload(git_diff_file_content *fc)
436
{
437 438 439
	if ((fc->flags & GIT_DIFF_FLAG__LOADED) == 0)
		return;

440
	if (fc->flags & GIT_DIFF_FLAG__FREE_DATA) {
441 442 443
		git__free(fc->map.data);
		fc->map.data = "";
		fc->map.len  = 0;
444
		fc->flags &= ~GIT_DIFF_FLAG__FREE_DATA;
445
	}
446
	else if (fc->flags & GIT_DIFF_FLAG__UNMAP_DATA) {
447 448 449
		git_futils_mmap_free(&fc->map);
		fc->map.data = "";
		fc->map.len  = 0;
450
		fc->flags &= ~GIT_DIFF_FLAG__UNMAP_DATA;
451 452
	}

453
	if (fc->flags & GIT_DIFF_FLAG__FREE_BLOB) {
454 455
		git_blob_free((git_blob *)fc->blob);
		fc->blob = NULL;
456
		fc->flags &= ~GIT_DIFF_FLAG__FREE_BLOB;
457 458
	}

459
	fc->flags &= ~GIT_DIFF_FLAG__LOADED;
460 461
}

462
void git_diff_file_content__clear(git_diff_file_content *fc)
463
{
464
	git_diff_file_content__unload(fc);
465 466 467

	/* for now, nothing else to do */
}