diff_file.c 11.9 KB
Newer Older
1 2 3 4 5 6
/*
 * Copyright (C) the libgit2 contributors. All rights reserved.
 *
 * This file is part of libgit2, distributed under the GNU GPL v2 with
 * a Linking Exception. For full terms see the included COPYING file.
 */
7 8 9

#include "diff_file.h"

10 11 12
#include "git2/blob.h"
#include "git2/submodule.h"
#include "diff.h"
13
#include "diff_generate.h"
14
#include "odb.h"
15
#include "futils.h"
16 17 18 19 20 21 22
#include "filter.h"

#define DIFF_MAX_FILESIZE 0x20000000

static bool diff_file_content_binary_by_size(git_diff_file_content *fc)
{
	/* if we have diff opts, check max_size vs file size */
23
	if ((fc->file->flags & DIFF_FLAGS_KNOWN_BINARY) == 0 &&
24
		fc->opts_max_size > 0 &&
25 26
		fc->file->size > fc->opts_max_size)
		fc->file->flags |= GIT_DIFF_FLAG_BINARY;
27

28
	return ((fc->file->flags & GIT_DIFF_FLAG_BINARY) != 0);
29 30 31 32
}

static void diff_file_content_binary_by_content(git_diff_file_content *fc)
{
33
	if ((fc->file->flags & DIFF_FLAGS_KNOWN_BINARY) != 0)
34 35 36 37
		return;

	switch (git_diff_driver_content_is_binary(
		fc->driver, fc->map.data, fc->map.len)) {
38 39
	case 0: fc->file->flags |= GIT_DIFF_FLAG_NOT_BINARY; break;
	case 1: fc->file->flags |= GIT_DIFF_FLAG_BINARY; break;
40 41 42 43
	default: break;
	}
}

44 45
static int diff_file_content_init_common(
	git_diff_file_content *fc, const git_diff_options *opts)
46
{
47 48 49 50 51
	fc->opts_flags = opts ? opts->flags : GIT_DIFF_NORMAL;

	if (opts && opts->max_size >= 0)
		fc->opts_max_size = opts->max_size ?
			opts->max_size : DIFF_MAX_FILESIZE;
52

53 54
	if (fc->src == GIT_ITERATOR_EMPTY)
		fc->src = GIT_ITERATOR_TREE;
55 56

	if (!fc->driver &&
57 58
		git_diff_driver_lookup(&fc->driver, fc->repo,
		    NULL, fc->file->path) < 0)
59
		return -1;
60

61 62 63
	/* give driver a chance to modify options */
	git_diff_driver_update_options(&fc->opts_flags, fc->driver);

64
	/* make sure file is conceivable mmap-able */
65
	if ((size_t)fc->file->size != fc->file->size)
66
		fc->file->flags |= GIT_DIFF_FLAG_BINARY;
67 68
	/* check if user is forcing text diff the file */
	else if (fc->opts_flags & GIT_DIFF_FORCE_TEXT) {
69 70
		fc->file->flags &= ~GIT_DIFF_FLAG_BINARY;
		fc->file->flags |= GIT_DIFF_FLAG_NOT_BINARY;
71 72 73
	}
	/* check if user is forcing binary diff the file */
	else if (fc->opts_flags & GIT_DIFF_FORCE_BINARY) {
74 75
		fc->file->flags &= ~GIT_DIFF_FLAG_NOT_BINARY;
		fc->file->flags |= GIT_DIFF_FLAG_BINARY;
76
	}
77 78 79

	diff_file_content_binary_by_size(fc);

80 81
	if ((fc->flags & GIT_DIFF_FLAG__NO_DATA) != 0) {
		fc->flags |= GIT_DIFF_FLAG__LOADED;
82 83 84 85
		fc->map.len  = 0;
		fc->map.data = "";
	}

86
	if ((fc->flags & GIT_DIFF_FLAG__LOADED) != 0)
87 88 89 90 91
		diff_file_content_binary_by_content(fc);

	return 0;
}

92
int git_diff_file_content__init_from_diff(
93
	git_diff_file_content *fc,
94
	git_diff *diff,
95
	git_diff_delta *delta,
96 97 98 99 100 101
	bool use_old)
{
	bool has_data = true;

	memset(fc, 0, sizeof(*fc));
	fc->repo = diff->repo;
102
	fc->file = use_old ? &delta->old_file : &delta->new_file;
103 104
	fc->src  = use_old ? diff->old_src : diff->new_src;

105 106
	if (git_diff_driver_lookup(&fc->driver, fc->repo,
		    &diff->attrsession, fc->file->path) < 0)
107 108 109 110 111 112 113 114 115
		return -1;

	switch (delta->status) {
	case GIT_DELTA_ADDED:
		has_data = !use_old; break;
	case GIT_DELTA_DELETED:
		has_data = use_old; break;
	case GIT_DELTA_UNTRACKED:
		has_data = !use_old &&
Russell Belfer committed
116
			(diff->opts.flags & GIT_DIFF_SHOW_UNTRACKED_CONTENT) != 0;
117
		break;
118
	case GIT_DELTA_UNREADABLE:
119 120 121 122 123 124 125 126 127 128
	case GIT_DELTA_MODIFIED:
	case GIT_DELTA_COPIED:
	case GIT_DELTA_RENAMED:
		break;
	default:
		has_data = false;
		break;
	}

	if (!has_data)
129
		fc->flags |= GIT_DIFF_FLAG__NO_DATA;
130

131
	return diff_file_content_init_common(fc, &diff->opts);
132 133
}

134
int git_diff_file_content__init_from_src(
135 136 137
	git_diff_file_content *fc,
	git_repository *repo,
	const git_diff_options *opts,
138
	const git_diff_file_content_src *src,
139
	git_diff_file *as_file)
140 141 142
{
	memset(fc, 0, sizeof(*fc));
	fc->repo = repo;
143
	fc->file = as_file;
144

145
	if (!src->blob && !src->buf) {
146
		fc->flags |= GIT_DIFF_FLAG__NO_DATA;
147
	} else {
148
		fc->flags |= GIT_DIFF_FLAG__LOADED;
149
		fc->file->flags |= GIT_DIFF_FLAG_VALID_ID;
150
		fc->file->mode = GIT_FILEMODE_BLOB;
151

152
		if (src->blob) {
153
			git_blob_dup((git_blob **)&fc->blob, (git_blob *) src->blob);
154 155
			fc->file->size = git_blob_rawsize(src->blob);
			git_oid_cpy(&fc->file->id, git_blob_id(src->blob));
156
			fc->file->id_abbrev = GIT_OID_HEXSZ;
157

158 159
			fc->map.len  = (size_t)fc->file->size;
			fc->map.data = (char *)git_blob_rawcontent(src->blob);
160 161

			fc->flags |= GIT_DIFF_FLAG__FREE_BLOB;
162
		} else {
163 164 165
			int error;
			if ((error = git_odb_hash(&fc->file->id, src->buf, src->buflen, GIT_OBJECT_BLOB)) < 0)
				return error;
166
			fc->file->size = src->buflen;
167
			fc->file->id_abbrev = GIT_OID_HEXSZ;
168

169 170 171
			fc->map.len  = src->buflen;
			fc->map.data = (char *)src->buf;
		}
172 173
	}

174
	return diff_file_content_init_common(fc, opts);
175 176 177 178 179 180
}

static int diff_file_content_commit_to_str(
	git_diff_file_content *fc, bool check_status)
{
	char oid[GIT_OID_HEXSZ+1];
181
	git_str content = GIT_STR_INIT;
182 183 184 185 186 187 188 189
	const char *status = "";

	if (check_status) {
		int error = 0;
		git_submodule *sm = NULL;
		unsigned int sm_status = 0;
		const git_oid *sm_head;

190
		if ((error = git_submodule_lookup(&sm, fc->repo, fc->file->path)) < 0) {
191
			/* GIT_EEXISTS means a "submodule" that has not been git added */
192
			if (error == GIT_EEXISTS) {
193
				git_error_clear();
194
				error = 0;
195 196 197 198
			}
			return error;
		}

199
		if ((error = git_submodule_status(&sm_status, fc->repo, fc->file->path, GIT_SUBMODULE_IGNORE_UNSPECIFIED)) < 0) {
200
			git_submodule_free(sm);
201 202 203 204
			return error;
		}

		/* update OID if we didn't have it previously */
205
		if ((fc->file->flags & GIT_DIFF_FLAG_VALID_ID) == 0 &&
206 207 208
			((sm_head = git_submodule_wd_id(sm)) != NULL ||
			 (sm_head = git_submodule_head_id(sm)) != NULL))
		{
209 210
			git_oid_cpy(&fc->file->id, sm_head);
			fc->file->flags |= GIT_DIFF_FLAG_VALID_ID;
211 212 213 214
		}

		if (GIT_SUBMODULE_STATUS_IS_WD_DIRTY(sm_status))
			status = "-dirty";
215 216

		git_submodule_free(sm);
217 218
	}

219
	git_oid_tostr(oid, sizeof(oid), &fc->file->id);
220
	if (git_str_printf(&content, "Subproject commit %s%s\n", oid, status) < 0)
221 222
		return -1;

223 224
	fc->map.len  = git_str_len(&content);
	fc->map.data = git_str_detach(&content);
225
	fc->flags |= GIT_DIFF_FLAG__FREE_DATA;
226 227 228 229

	return 0;
}

230 231 232
static int diff_file_content_load_blob(
	git_diff_file_content *fc,
	git_diff_options *opts)
233 234 235 236
{
	int error = 0;
	git_odb_object *odb_obj = NULL;

237
	if (git_oid_is_zero(&fc->file->id))
238 239
		return 0;

240
	if (fc->file->mode == GIT_FILEMODE_COMMIT)
241 242 243
		return diff_file_content_commit_to_str(fc, false);

	/* if we don't know size, try to peek at object header first */
244
	if (!fc->file->size) {
245 246
		if ((error = git_diff_file__resolve_zero_size(
				fc->file, &odb_obj, fc->repo)) < 0)
247 248 249
			return error;
	}

250 251
	if ((opts->flags & GIT_DIFF_SHOW_BINARY) == 0 &&
		diff_file_content_binary_by_size(fc))
252 253 254 255
		return 0;

	if (odb_obj != NULL) {
		error = git_object__from_odb_object(
256
			(git_object **)&fc->blob, fc->repo, odb_obj, GIT_OBJECT_BLOB);
257 258 259
		git_odb_object_free(odb_obj);
	} else {
		error = git_blob_lookup(
260
			(git_blob **)&fc->blob, fc->repo, &fc->file->id);
261 262 263
	}

	if (!error) {
264
		fc->flags |= GIT_DIFF_FLAG__FREE_BLOB;
265 266 267 268 269 270 271
		fc->map.data = (void *)git_blob_rawcontent(fc->blob);
		fc->map.len  = (size_t)git_blob_rawsize(fc->blob);
	}

	return error;
}

272
static int diff_file_content_load_workdir_symlink_fake(
273
	git_diff_file_content *fc, git_str *path)
274
{
275
	git_str target = GIT_STR_INIT;
276 277 278 279 280
	int error;

	if ((error = git_futils_readbuffer(&target, path->ptr)) < 0)
		return error;

281 282
	fc->map.len = git_str_len(&target);
	fc->map.data = git_str_detach(&target);
283 284
	fc->flags |= GIT_DIFF_FLAG__FREE_DATA;

285
	git_str_dispose(&target);
286 287 288
	return error;
}

289
static int diff_file_content_load_workdir_symlink(
290
	git_diff_file_content *fc, git_str *path)
291 292
{
	ssize_t alloc_len, read_len;
293 294
	int symlink_supported, error;

295 296
	if ((error = git_repository__configmap_lookup(
		&symlink_supported, fc->repo, GIT_CONFIGMAP_SYMLINKS)) < 0)
297 298 299 300
		return -1;

	if (!symlink_supported)
		return diff_file_content_load_workdir_symlink_fake(fc, path);
301 302 303 304

	/* link path on disk could be UTF-16, so prepare a buffer that is
	 * big enough to handle some UTF-8 data expansion
	 */
305
	alloc_len = (ssize_t)(fc->file->size * 2) + 1;
306 307

	fc->map.data = git__calloc(alloc_len, sizeof(char));
308
	GIT_ERROR_CHECK_ALLOC(fc->map.data);
309

310
	fc->flags |= GIT_DIFF_FLAG__FREE_DATA;
311

312
	read_len = p_readlink(git_str_cstr(path), fc->map.data, alloc_len);
313
	if (read_len < 0) {
314
		git_error_set(GIT_ERROR_OS, "failed to read symlink '%s'", fc->file->path);
315 316 317 318 319 320 321 322
		return -1;
	}

	fc->map.len = read_len;
	return 0;
}

static int diff_file_content_load_workdir_file(
323
	git_diff_file_content *fc,
324
	git_str *path,
325
	git_diff_options *diff_opts)
326 327
{
	int error = 0;
328
	git_filter_list *fl = NULL;
329 330
	git_file fd = git_futils_open_ro(git_str_cstr(path));
	git_str raw = GIT_STR_INIT;
331
	git_object_size_t new_file_size = 0;
332 333 334 335

	if (fd < 0)
		return fd;

336
	error = git_futils_filesize(&new_file_size, fd);
337

338
	if (error < 0)
339 340
		goto cleanup;

341 342 343 344
	if (!(fc->file->flags & GIT_DIFF_FLAG_VALID_SIZE)) {
		fc->file->size = new_file_size;
		fc->file->flags |= GIT_DIFF_FLAG_VALID_SIZE;
	} else if (fc->file->size != new_file_size) {
345
		git_error_set(GIT_ERROR_FILESYSTEM, "file changed before we could read it");
346 347 348 349
		error = -1;
		goto cleanup;
	}

350 351
	if ((diff_opts->flags & GIT_DIFF_SHOW_BINARY) == 0 &&
		diff_file_content_binary_by_size(fc))
352 353
		goto cleanup;

354
	if ((error = git_filter_list_load(
355
			&fl, fc->repo, NULL, fc->file->path,
356
			GIT_FILTER_TO_ODB, GIT_FILTER_ALLOW_UNSAFE)) < 0)
357 358
		goto cleanup;

359 360
	/* if there are no filters, try to mmap the file */
	if (fl == NULL) {
361
		if (!(error = git_futils_mmap_ro(
362
				&fc->map, fd, 0, (size_t)fc->file->size))) {
363
			fc->flags |= GIT_DIFF_FLAG__UNMAP_DATA;
364 365 366
			goto cleanup;
		}

367
		/* if mmap failed, fall through to try readbuffer below */
368
		git_error_clear();
369 370
	}

371
	if (!(error = git_futils_readbuffer_fd(&raw, fd, (size_t)fc->file->size))) {
372
		git_str out = GIT_STR_INIT;
373

374
		error = git_filter_list__convert_buf(&out, fl, &raw);
375

376 377 378 379 380 381
		if (!error) {
			fc->map.len  = out.size;
			fc->map.data = out.ptr;
			fc->flags |= GIT_DIFF_FLAG__FREE_DATA;
		}
	}
382

383
cleanup:
384
	git_filter_list_free(fl);
385 386 387 388 389
	p_close(fd);

	return error;
}

390 391 392
static int diff_file_content_load_workdir(
	git_diff_file_content *fc,
	git_diff_options *diff_opts)
393 394
{
	int error = 0;
395
	git_str path = GIT_STR_INIT;
396

397
	if (fc->file->mode == GIT_FILEMODE_COMMIT)
398 399
		return diff_file_content_commit_to_str(fc, true);

400
	if (fc->file->mode == GIT_FILEMODE_TREE)
401 402
		return 0;

403
	if (git_repository_workdir_path(&path, fc->repo, fc->file->path) < 0)
404 405
		return -1;

406
	if (S_ISLNK(fc->file->mode))
407 408
		error = diff_file_content_load_workdir_symlink(fc, &path);
	else
409
		error = diff_file_content_load_workdir_file(fc, &path, diff_opts);
410 411

	/* once data is loaded, update OID if we didn't have it previously */
412
	if (!error && (fc->file->flags & GIT_DIFF_FLAG_VALID_ID) == 0) {
413
		error = git_odb_hash(
414
			&fc->file->id, fc->map.data, fc->map.len, GIT_OBJECT_BLOB);
415
		fc->file->flags |= GIT_DIFF_FLAG_VALID_ID;
416 417
	}

418
	git_str_dispose(&path);
419 420 421
	return error;
}

422 423 424
int git_diff_file_content__load(
	git_diff_file_content *fc,
	git_diff_options *diff_opts)
425 426 427
{
	int error = 0;

428
	if ((fc->flags & GIT_DIFF_FLAG__LOADED) != 0)
429 430
		return 0;

431 432
	if ((fc->file->flags & GIT_DIFF_FLAG_BINARY) != 0 &&
		(diff_opts->flags & GIT_DIFF_SHOW_BINARY) == 0)
433 434
		return 0;

435
	if (fc->src == GIT_ITERATOR_WORKDIR)
436
		error = diff_file_content_load_workdir(fc, diff_opts);
437
	else
438
		error = diff_file_content_load_blob(fc, diff_opts);
439 440 441
	if (error)
		return error;

442
	fc->flags |= GIT_DIFF_FLAG__LOADED;
443 444 445 446 447 448

	diff_file_content_binary_by_content(fc);

	return 0;
}

449
void git_diff_file_content__unload(git_diff_file_content *fc)
450
{
451 452 453
	if ((fc->flags & GIT_DIFF_FLAG__LOADED) == 0)
		return;

454
	if (fc->flags & GIT_DIFF_FLAG__FREE_DATA) {
455 456 457
		git__free(fc->map.data);
		fc->map.data = "";
		fc->map.len  = 0;
458
		fc->flags &= ~GIT_DIFF_FLAG__FREE_DATA;
459
	}
460
	else if (fc->flags & GIT_DIFF_FLAG__UNMAP_DATA) {
461 462 463
		git_futils_mmap_free(&fc->map);
		fc->map.data = "";
		fc->map.len  = 0;
464
		fc->flags &= ~GIT_DIFF_FLAG__UNMAP_DATA;
465 466
	}

467
	if (fc->flags & GIT_DIFF_FLAG__FREE_BLOB) {
468 469
		git_blob_free((git_blob *)fc->blob);
		fc->blob = NULL;
470
		fc->flags &= ~GIT_DIFF_FLAG__FREE_BLOB;
471 472
	}

473
	fc->flags &= ~GIT_DIFF_FLAG__LOADED;
474 475
}

476
void git_diff_file_content__clear(git_diff_file_content *fc)
477
{
478
	git_diff_file_content__unload(fc);
479 480 481

	/* for now, nothing else to do */
}