diff.c 15.2 KB
Newer Older
1
/*
2
 * Copyright (C) 2012 the libgit2 contributors
3 4 5 6 7 8
 *
 * This file is part of libgit2, distributed under the GNU GPL v2 with
 * a Linking Exception. For full terms see the included COPYING file.
 */
#include "common.h"
#include "git2/diff.h"
9
#include "diff.h"
10
#include "fileops.h"
11

12
static void diff_delta__free(git_diff_delta *delta)
13 14 15 16
{
	if (!delta)
		return;

17 18 19
	if (delta->new_file.flags & GIT_DIFF_FILE_FREE_PATH) {
		git__free((char *)delta->new_file.path);
		delta->new_file.path = NULL;
20 21
	}

22 23 24
	if (delta->old_file.flags & GIT_DIFF_FILE_FREE_PATH) {
		git__free((char *)delta->old_file.path);
		delta->old_file.path = NULL;
25
	}
26 27 28 29

	git__free(delta);
}

30
static git_diff_delta *diff_delta__alloc(
31
	git_diff_list *diff,
32
	git_delta_t status,
33
	const char *path)
34
{
35 36
	git_diff_delta *delta = git__calloc(1, sizeof(git_diff_delta));
	if (!delta)
37
		return NULL;
38

39 40
	delta->old_file.path = git__strdup(path);
	if (delta->old_file.path == NULL) {
41
		git__free(delta);
42
		return NULL;
43
	}
44 45
	delta->old_file.flags |= GIT_DIFF_FILE_FREE_PATH;
	delta->new_file.path = delta->old_file.path;
46

47 48
	if (diff->opts.flags & GIT_DIFF_REVERSE) {
		switch (status) {
49 50
		case GIT_DELTA_ADDED:   status = GIT_DELTA_DELETED; break;
		case GIT_DELTA_DELETED: status = GIT_DELTA_ADDED; break;
51 52 53
		default: break; /* leave other status values alone */
		}
	}
54 55
	delta->status = status;

56 57 58 59 60 61 62 63 64 65 66
	return delta;
}

static git_diff_delta *diff_delta__dup(const git_diff_delta *d)
{
	git_diff_delta *delta = git__malloc(sizeof(git_diff_delta));
	if (!delta)
		return NULL;

	memcpy(delta, d, sizeof(git_diff_delta));

67 68
	delta->old_file.path = git__strdup(d->old_file.path);
	if (delta->old_file.path == NULL) {
69 70
		git__free(delta);
		return NULL;
71
	}
72
	delta->old_file.flags |= GIT_DIFF_FILE_FREE_PATH;
73

74 75 76 77
	if (d->new_file.path != d->old_file.path) {
		delta->new_file.path = git__strdup(d->new_file.path);
		if (delta->new_file.path == NULL) {
			git__free(delta->old_file.path);
78 79 80
			git__free(delta);
			return NULL;
		}
81
		delta->new_file.flags |= GIT_DIFF_FILE_FREE_PATH;
82
	} else {
83 84
		delta->new_file.path = delta->old_file.path;
		delta->new_file.flags &= ~GIT_DIFF_FILE_FREE_PATH;
85
	}
86

87
	return delta;
88 89
}

90 91
static git_diff_delta *diff_delta__merge_like_cgit(
	const git_diff_delta *a, const git_diff_delta *b)
92
{
93 94 95
	git_diff_delta *dup = diff_delta__dup(a);
	if (!dup)
		return NULL;
96

97
	if (git_oid_cmp(&dup->new_file.oid, &b->new_file.oid) == 0)
98
		return dup;
99

100
	git_oid_cpy(&dup->new_file.oid, &b->new_file.oid);
101

102 103 104 105 106
	dup->new_file.mode = b->new_file.mode;
	dup->new_file.size = b->new_file.size;
	dup->new_file.flags =
		(dup->new_file.flags & GIT_DIFF_FILE_FREE_PATH) |
		(b->new_file.flags & ~GIT_DIFF_FILE_FREE_PATH);
107

108 109 110 111 112 113
	/* Emulate C git for merging two diffs (a la 'git diff <sha>').
	 *
	 * When C git does a diff between the work dir and a tree, it actually
	 * diffs with the index but uses the workdir contents.  This emulates
	 * those choices so we can emulate the type of diff.
	 */
114
	if (git_oid_cmp(&dup->old_file.oid, &dup->new_file.oid) == 0) {
115
		if (dup->status == GIT_DELTA_DELETED)
116
			/* preserve pending delete info */;
117 118
		else if (b->status == GIT_DELTA_UNTRACKED ||
				 b->status == GIT_DELTA_IGNORED)
119 120
			dup->status = b->status;
		else
121
			dup->status = GIT_DELTA_UNMODIFIED;
122
	}
123 124
	else if (dup->status == GIT_DELTA_UNMODIFIED ||
			 b->status == GIT_DELTA_DELETED)
125
		dup->status = b->status;
126

127
	return dup;
128 129
}

130 131
static int diff_delta__from_one(
	git_diff_list *diff,
132
	git_delta_t   status,
133
	const git_index_entry *entry)
134 135
{
	int error;
136 137 138
	git_diff_delta *delta = diff_delta__alloc(diff, status, entry->path);
	if (!delta)
		return git__rethrow(GIT_ENOMEM, "Could not allocate diff record");
139

140
	/* This fn is just for single-sided diffs */
141
	assert(status != GIT_DELTA_MODIFIED);
142

143
	if (delta->status == GIT_DELTA_DELETED) {
144 145 146
		delta->old_file.mode = entry->mode;
		delta->old_file.size = entry->file_size;
		git_oid_cpy(&delta->old_file.oid, &entry->oid);
147
	} else /* ADDED, IGNORED, UNTRACKED */ {
148 149 150
		delta->new_file.mode = entry->mode;
		delta->new_file.size = entry->file_size;
		git_oid_cpy(&delta->new_file.oid, &entry->oid);
151
	}
152

153 154
	delta->old_file.flags |= GIT_DIFF_FILE_VALID_OID;
	delta->new_file.flags |= GIT_DIFF_FILE_VALID_OID;
155

156 157
	if ((error = git_vector_insert(&diff->deltas, delta)) < GIT_SUCCESS)
		diff_delta__free(delta);
158 159 160 161

	return error;
}

162 163
static int diff_delta__from_two(
	git_diff_list *diff,
164
	git_delta_t   status,
165 166
	const git_index_entry *old_entry,
	const git_index_entry *new_entry,
167
	git_oid *new_oid)
168 169
{
	int error;
170
	git_diff_delta *delta;
171

172
	if ((diff->opts.flags & GIT_DIFF_REVERSE) != 0) {
173 174 175
		const git_index_entry *temp = old_entry;
		old_entry = new_entry;
		new_entry = temp;
176
	}
177

178
	delta = diff_delta__alloc(diff, status, old_entry->path);
179 180 181
	if (!delta)
		return git__rethrow(GIT_ENOMEM, "Could not allocate diff record");

182 183 184
	delta->old_file.mode = old_entry->mode;
	git_oid_cpy(&delta->old_file.oid, &old_entry->oid);
	delta->old_file.flags |= GIT_DIFF_FILE_VALID_OID;
185

186 187 188 189
	delta->new_file.mode = new_entry->mode;
	git_oid_cpy(&delta->new_file.oid, new_oid ? new_oid : &new_entry->oid);
	if (new_oid || !git_oid_iszero(&new_entry->oid))
		delta->new_file.flags |= GIT_DIFF_FILE_VALID_OID;
190 191 192

	if ((error = git_vector_insert(&diff->deltas, delta)) < GIT_SUCCESS)
		diff_delta__free(delta);
193

194 195 196
	return error;
}

197 198
#define DIFF_OLD_PREFIX_DEFAULT "a/"
#define DIFF_NEW_PREFIX_DEFAULT "b/"
199

200
static char *diff_strdup_prefix(const char *prefix)
201 202 203 204 205 206 207 208 209 210 211 212 213 214
{
	size_t len = strlen(prefix);
	char *str = git__malloc(len + 2);
	if (str != NULL) {
		memcpy(str, prefix, len + 1);
		/* append '/' at end if needed */
		if (len > 0 && str[len - 1] != '/') {
			str[len] = '/';
			str[len + 1] = '\0';
		}
	}
	return str;
}

215 216 217
static int diff_delta__cmp(const void *a, const void *b)
{
	const git_diff_delta *da = a, *db = b;
218
	int val = strcmp(da->old_file.path, db->old_file.path);
219 220 221
	return val ? val : ((int)da->status - (int)db->status);
}

222 223 224 225
static git_diff_list *git_diff_list_alloc(
	git_repository *repo, const git_diff_options *opts)
{
	git_diff_list *diff = git__calloc(1, sizeof(git_diff_list));
226 227 228 229 230 231 232 233 234 235
	if (diff == NULL)
		return NULL;

	diff->repo = repo;

	if (opts == NULL)
		return diff;

	memcpy(&diff->opts, opts, sizeof(git_diff_options));

236 237 238 239
	diff->opts.old_prefix = diff_strdup_prefix(
		opts->old_prefix ? opts->old_prefix : DIFF_OLD_PREFIX_DEFAULT);
	diff->opts.new_prefix = diff_strdup_prefix(
		opts->new_prefix ? opts->new_prefix : DIFF_NEW_PREFIX_DEFAULT);
240

241
	if (!diff->opts.old_prefix || !diff->opts.new_prefix) {
242 243 244
		git__free(diff);
		return NULL;
	}
245

246
	if (diff->opts.flags & GIT_DIFF_REVERSE) {
247 248 249
		char *swap = diff->opts.old_prefix;
		diff->opts.old_prefix = diff->opts.new_prefix;
		diff->opts.new_prefix = swap;
250
	}
251

252
	if (git_vector_init(&diff->deltas, 0, diff_delta__cmp) < GIT_SUCCESS) {
253 254
		git__free(diff->opts.old_prefix);
		git__free(diff->opts.new_prefix);
255 256 257 258
		git__free(diff);
		return NULL;
	}

259 260
	/* do something safe with the pathspec strarray */

261 262 263 264 265
	return diff;
}

void git_diff_list_free(git_diff_list *diff)
{
266 267 268
	git_diff_delta *delta;
	unsigned int i;

269 270
	if (!diff)
		return;
271

272 273 274
	git_vector_foreach(&diff->deltas, i, delta) {
		diff_delta__free(delta);
		diff->deltas.contents[i] = NULL;
275
	}
276
	git_vector_free(&diff->deltas);
277 278
	git__free(diff->opts.old_prefix);
	git__free(diff->opts.new_prefix);
279 280 281
	git__free(diff);
}

282
static int oid_for_workdir_item(
283
	git_repository *repo,
284 285
	const git_index_entry *item,
	git_oid *oid)
286
{
287 288
	int error = GIT_SUCCESS;
	git_buf full_path = GIT_BUF_INIT;
289

290 291 292
	error = git_buf_joinpath(
		&full_path, git_repository_workdir(repo), item->path);
	if (error != GIT_SUCCESS)
293 294
		return error;

295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310
	/* otherwise calculate OID for file */
	if (S_ISLNK(item->mode))
		error = git_odb__hashlink(oid, full_path.ptr);
	else if (!git__is_sizet(item->file_size))
		error = git__throw(GIT_ERROR, "File size overflow for 32-bit systems");
	else {
		int fd;

		if ((fd = p_open(full_path.ptr, O_RDONLY)) < 0)
			error = git__throw(
				GIT_EOSERR, "Could not open '%s'", item->path);
		else {
			error = git_odb__hashfd(
				oid, fd, (size_t)item->file_size, GIT_OBJ_BLOB);
			p_close(fd);
		}
311 312
	}

313
	git_buf_free(&full_path);
314 315 316 317

	return error;
}

318
static int maybe_modified(
319
	git_iterator *old_iter,
320
	const git_index_entry *oitem,
321
	git_iterator *new_iter,
322 323
	const git_index_entry *nitem,
	git_diff_list *diff)
324 325
{
	int error = GIT_SUCCESS;
326
	git_oid noid, *use_noid = NULL;
327

328
	GIT_UNUSED(old_iter);
329

330 331 332
	/* support "assume unchanged" & "skip worktree" bits */
	if ((oitem->flags_extended & GIT_IDXENTRY_INTENT_TO_ADD) != 0 ||
		(oitem->flags_extended & GIT_IDXENTRY_SKIP_WORKTREE) != 0)
333 334
		return GIT_SUCCESS;

335
	if (GIT_MODE_TYPE(oitem->mode) != GIT_MODE_TYPE(nitem->mode)) {
336
		error = diff_delta__from_one(diff, GIT_DELTA_DELETED, oitem);
337
		if (error == GIT_SUCCESS)
338
			error = diff_delta__from_one(diff, GIT_DELTA_ADDED, nitem);
339 340 341
		return error;
	}

342 343 344
	if (git_oid_cmp(&oitem->oid, &nitem->oid) == 0 &&
		oitem->mode == nitem->mode)
		return GIT_SUCCESS;
345

346
	if (git_oid_iszero(&nitem->oid) && new_iter->type == GIT_ITERATOR_WORKDIR) {
347 348 349 350 351 352 353 354 355
		/* if they files look exactly alike, then we'll assume the same */
		if (oitem->file_size == nitem->file_size &&
			oitem->ctime.seconds == nitem->ctime.seconds &&
			oitem->mtime.seconds == nitem->mtime.seconds &&
			oitem->dev == nitem->dev &&
			oitem->ino == nitem->ino &&
			oitem->uid == nitem->uid &&
			oitem->gid == nitem->gid)
			return GIT_SUCCESS;
356

357 358 359 360 361
		/* TODO: check git attributes so we will not have to read the file
		 * in if it is marked binary.
		 */
		error = oid_for_workdir_item(diff->repo, nitem, &noid);
		if (error != GIT_SUCCESS)
362 363
			return error;

364 365 366
		if (git_oid_cmp(&oitem->oid, &noid) == 0 &&
			oitem->mode == nitem->mode)
			return GIT_SUCCESS;
367

368 369
		/* store calculated oid so we don't have to recalc later */
		use_noid = &noid;
370 371
	}

372
	return diff_delta__from_two(
373
		diff, GIT_DELTA_MODIFIED, oitem, nitem, use_noid);
374 375
}

376
static int diff_from_iterators(
377
	git_repository *repo,
378
	const git_diff_options *opts, /**< can be NULL for defaults */
379 380
	git_iterator *old_iter,
	git_iterator *new_iter,
381
	git_diff_list **diff_ptr)
382 383
{
	int error;
384 385 386 387 388 389
	const git_index_entry *oitem, *nitem;
	char *ignore_prefix = NULL;
	git_diff_list *diff = git_diff_list_alloc(repo, opts);
	if (!diff) {
		error = GIT_ENOMEM;
		goto cleanup;
390 391
	}

392 393
	diff->old_src = old_iter->type;
	diff->new_src = new_iter->type;
394

395 396
	if ((error = git_iterator_current(old_iter, &oitem)) < GIT_SUCCESS ||
		(error = git_iterator_current(new_iter, &nitem)) < GIT_SUCCESS)
397
		goto cleanup;
398

399 400
	/* run iterators building diffs */
	while (!error && (oitem || nitem)) {
401

402 403
		/* create DELETED records for old items not matched in new */
		if (oitem && (!nitem || strcmp(oitem->path, nitem->path) < 0)) {
404
			error = diff_delta__from_one(diff, GIT_DELTA_DELETED, oitem);
405
			if (error == GIT_SUCCESS)
406
				error = git_iterator_advance(old_iter, &oitem);
407
			continue;
408
		}
409

410 411 412 413 414
		/* create ADDED, TRACKED, or IGNORED records for new items not
		 * matched in old (and/or descend into directories as needed)
		 */
		if (nitem && (!oitem || strcmp(oitem->path, nitem->path) > 0)) {
			int is_ignored;
415
			git_delta_t delta_type = GIT_DELTA_ADDED;
416

417 418 419
			/* contained in ignored parent directory, so this can be skipped. */
			if (ignore_prefix != NULL &&
				git__prefixcmp(nitem->path, ignore_prefix) == 0)
420
			{
421
				error = git_iterator_advance(new_iter, &nitem);
422
				continue;
423 424
			}

425
			is_ignored = git_iterator_current_is_ignored(new_iter);
426 427 428 429 430

			if (S_ISDIR(nitem->mode)) {
				if (git__prefixcmp(oitem->path, nitem->path) == 0) {
					if (is_ignored)
						ignore_prefix = nitem->path;
431
					error = git_iterator_advance_into_directory(new_iter, &nitem);
432 433
					continue;
				}
434
				delta_type = GIT_DELTA_UNTRACKED;
435
			}
436
			else if (is_ignored)
437
				delta_type = GIT_DELTA_IGNORED;
438
			else if (new_iter->type == GIT_ITERATOR_WORKDIR)
439
				delta_type = GIT_DELTA_UNTRACKED;
440

441
			error = diff_delta__from_one(diff, delta_type, nitem);
442
			if (error == GIT_SUCCESS)
443
				error = git_iterator_advance(new_iter, &nitem);
444
			continue;
445 446
		}

447 448
		/* otherwise item paths match, so create MODIFIED record
		 * (or ADDED and DELETED pair if type changed)
449
		 */
450
		assert(oitem && nitem && strcmp(oitem->path, nitem->path) == 0);
451

452
		error = maybe_modified(old_iter, oitem, new_iter, nitem, diff);
453
		if (error == GIT_SUCCESS)
454
			error = git_iterator_advance(old_iter, &oitem);
455
		if (error == GIT_SUCCESS)
456
			error = git_iterator_advance(new_iter, &nitem);
457 458
	}

459
cleanup:
460 461
	git_iterator_free(old_iter);
	git_iterator_free(new_iter);
462

463 464 465
	if (error != GIT_SUCCESS) {
		git_diff_list_free(diff);
		diff = NULL;
466 467
	}

468
	*diff_ptr = diff;
469

470
	return error;
471 472
}

473 474 475 476

int git_diff_tree_to_tree(
	git_repository *repo,
	const git_diff_options *opts, /**< can be NULL for defaults */
477 478
	git_tree *old_tree,
	git_tree *new_tree,
479
	git_diff_list **diff)
480
{
481
	int error;
482
	git_iterator *a = NULL, *b = NULL;
483

484
	assert(repo && old_tree && new_tree && diff);
485

486 487
	if ((error = git_iterator_for_tree(repo, old_tree, &a)) < GIT_SUCCESS ||
		(error = git_iterator_for_tree(repo, new_tree, &b)) < GIT_SUCCESS)
488
		return error;
489

490
	return diff_from_iterators(repo, opts, a, b, diff);
491 492
}

493 494 495
int git_diff_index_to_tree(
	git_repository *repo,
	const git_diff_options *opts,
496
	git_tree *old_tree,
497
	git_diff_list **diff)
498
{
499 500
	int error;
	git_iterator *a = NULL, *b = NULL;
501

502
	assert(repo && old_tree && diff);
503

504
	if ((error = git_iterator_for_tree(repo, old_tree, &a)) < GIT_SUCCESS ||
505 506
		(error = git_iterator_for_index(repo, &b)) < GIT_SUCCESS)
		return error;
507

508
	return diff_from_iterators(repo, opts, a, b, diff);
509 510
}

511 512 513 514
int git_diff_workdir_to_index(
	git_repository *repo,
	const git_diff_options *opts,
	git_diff_list **diff)
515
{
516
	int error;
517
	git_iterator *a = NULL, *b = NULL;
518

519
	assert(repo && diff);
520

521 522
	if ((error = git_iterator_for_index(repo, &a)) < GIT_SUCCESS ||
		(error = git_iterator_for_workdir(repo, &b)) < GIT_SUCCESS)
523 524
		return error;

525
	return diff_from_iterators(repo, opts, a, b, diff);
526 527 528
}


529 530 531
int git_diff_workdir_to_tree(
	git_repository *repo,
	const git_diff_options *opts,
532
	git_tree *old_tree,
533
	git_diff_list **diff)
534
{
535 536
	int error;
	git_iterator *a = NULL, *b = NULL;
537

538
	assert(repo && old_tree && diff);
539

540
	if ((error = git_iterator_for_tree(repo, old_tree, &a)) < GIT_SUCCESS ||
541 542
		(error = git_iterator_for_workdir(repo, &b)) < GIT_SUCCESS)
		return error;
543

544
	return diff_from_iterators(repo, opts, a, b, diff);
545 546
}

547 548 549
int git_diff_merge(
	git_diff_list *onto,
	const git_diff_list *from)
550
{
551
	int error;
552 553 554
	unsigned int i = 0, j = 0;
	git_vector onto_new;
	git_diff_delta *delta;
555

556 557 558
	error = git_vector_init(&onto_new, onto->deltas.length, diff_delta__cmp);
	if (error < GIT_SUCCESS)
		return error;
559

560 561 562
	while (i < onto->deltas.length || j < from->deltas.length) {
		git_diff_delta       *o = git_vector_get(&onto->deltas, i);
		const git_diff_delta *f = git_vector_get_const(&from->deltas, j);
563 564 565 566
		const char *opath =
			!o ? NULL : o->old_file.path ? o->old_file.path : o->new_file.path;
		const char *fpath =
			!f ? NULL : f->old_file.path ? f->old_file.path : f->new_file.path;
567 568 569 570 571 572 573 574 575 576 577 578

		if (opath && (!fpath || strcmp(opath, fpath) < 0)) {
			delta = diff_delta__dup(o);
			i++;
		} else if (fpath && (!opath || strcmp(opath, fpath) > 0)) {
			delta = diff_delta__dup(f);
			j++;
		} else {
			delta = diff_delta__merge_like_cgit(o, f);
			i++;
			j++;
		}
579

580 581 582 583
		if (!delta)
			error = GIT_ENOMEM;
		else
			error = git_vector_insert(&onto_new, delta);
584

585 586
		if (error != GIT_SUCCESS)
			break;
587
	}
588

589 590 591
	if (error == GIT_SUCCESS) {
		git_vector_swap(&onto->deltas, &onto_new);
		onto->new_src = from->new_src;
592
	}
593

594 595 596
	git_vector_foreach(&onto_new, i, delta)
		diff_delta__free(delta);
	git_vector_free(&onto_new);
597

598
	return error;
599
}