patch_parse.c 30.4 KB
Newer Older
1 2 3 4 5 6
/*
 * Copyright (C) the libgit2 contributors. All rights reserved.
 *
 * This file is part of libgit2, distributed under the GNU GPL v2 with
 * a Linking Exception. For full terms see the included COPYING file.
 */
7 8 9

#include "patch_parse.h"

10 11
#include "git2/patch.h"
#include "patch.h"
12
#include "diff_parse.h"
13 14 15
#include "path.h"

typedef struct {
16 17 18
	git_patch base;

	git_patch_parse_ctx *ctx;
19

20 21 22 23 24 25 26 27 28 29 30 31 32 33
	/* the paths from the `diff --git` header, these will be used if this is not
	 * a rename (and rename paths are specified) or if no `+++`/`---` line specify
	 * the paths.
	 */
	char *header_old_path, *header_new_path;

	/* renamed paths are precise and are not prefixed */
	char *rename_old_path, *rename_new_path;

	/* the paths given in `---` and `+++` lines */
	char *old_path, *new_path;

	/* the prefixes from the old/new paths */
	char *old_prefix, *new_prefix;
34 35
} git_patch_parsed;

36 37 38 39 40 41 42 43 44 45 46 47
static int git_parse_err(const char *fmt, ...) GIT_FORMAT_PRINTF(1, 2);
static int git_parse_err(const char *fmt, ...)
{
	va_list ap;

	va_start(ap, fmt);
	git_error_vset(GIT_ERROR_PATCH, fmt, ap);
	va_end(ap);

	return -1;
}

48
static size_t header_path_len(git_patch_parse_ctx *ctx)
49 50
{
	bool inquote = 0;
51
	bool quoted = git_parse_ctx_contains_s(&ctx->parse_ctx, "\"");
52 53
	size_t len;

54 55
	for (len = quoted; len < ctx->parse_ctx.line_len; len++) {
		if (!quoted && git__isspace(ctx->parse_ctx.line[len]))
56
			break;
57
		else if (quoted && !inquote && ctx->parse_ctx.line[len] == '"') {
58 59 60 61
			len++;
			break;
		}

62
		inquote = (!inquote && ctx->parse_ctx.line[len] == '\\');
63 64 65 66 67
	}

	return len;
}

68
static int parse_header_path_buf(git_buf *path, git_patch_parse_ctx *ctx, size_t path_len)
69
{
70
	int error;
71

72
	if ((error = git_buf_put(path, ctx->parse_ctx.line, path_len)) < 0)
73 74
		goto done;

75
	git_parse_advance_chars(&ctx->parse_ctx, path_len);
76 77 78 79 80 81 82 83 84 85 86 87 88 89 90

	git_buf_rtrim(path);

	if (path->size > 0 && path->ptr[0] == '"')
		error = git_buf_unquote(path);

	if (error < 0)
		goto done;

	git_path_squash_slashes(path);

done:
	return error;
}

91
static int parse_header_path(char **out, git_patch_parse_ctx *ctx)
92 93
{
	git_buf path = GIT_BUF_INIT;
94
	int error = parse_header_path_buf(&path, ctx, header_path_len(ctx));
95 96 97 98 99 100 101

	*out = git_buf_detach(&path);

	return error;
}

static int parse_header_git_oldpath(
102
	git_patch_parsed *patch, git_patch_parse_ctx *ctx)
103
{
104 105 106 107 108 109 110 111 112
	git_buf old_path = GIT_BUF_INIT;
	int error;

	if ((error = parse_header_path_buf(&old_path, ctx, ctx->parse_ctx.line_len - 1)) <  0)
		goto out;

	patch->old_path = git_buf_detach(&old_path);

out:
113
	git_buf_dispose(&old_path);
114
	return error;
115 116 117
}

static int parse_header_git_newpath(
118
	git_patch_parsed *patch, git_patch_parse_ctx *ctx)
119
{
120 121 122 123 124 125 126 127 128
	git_buf new_path = GIT_BUF_INIT;
	int error;

	if ((error = parse_header_path_buf(&new_path, ctx, ctx->parse_ctx.line_len - 1)) <  0)
		goto out;

	patch->new_path = git_buf_detach(&new_path);

out:
129
	git_buf_dispose(&new_path);
130
	return error;
131 132
}

133
static int parse_header_mode(uint16_t *mode, git_patch_parse_ctx *ctx)
134
{
135
	int64_t m;
136

137
	if ((git_parse_advance_digit(&m, &ctx->parse_ctx, 8)) < 0)
138
		return git_parse_err("invalid file mode at line %"PRIuZ, ctx->parse_ctx.line_num);
139 140 141 142 143 144

	if (m > UINT16_MAX)
		return -1;

	*mode = (uint16_t)m;

145
	return 0;
146 147 148 149
}

static int parse_header_oid(
	git_oid *oid,
150
	uint16_t *oid_len,
151
	git_patch_parse_ctx *ctx)
152 153 154
{
	size_t len;

155 156
	for (len = 0; len < ctx->parse_ctx.line_len && len < GIT_OID_HEXSZ; len++) {
		if (!git__isxdigit(ctx->parse_ctx.line[len]))
157 158 159
			break;
	}

160
	if (len < GIT_OID_MINPREFIXLEN || len > GIT_OID_HEXSZ ||
161 162 163
		git_oid_fromstrn(oid, ctx->parse_ctx.line, len) < 0)
		return git_parse_err("invalid hex formatted object id at line %"PRIuZ,
			ctx->parse_ctx.line_num);
164

165
	git_parse_advance_chars(&ctx->parse_ctx, len);
166

167
	*oid_len = (uint16_t)len;
168 169 170 171 172

	return 0;
}

static int parse_header_git_index(
173
	git_patch_parsed *patch, git_patch_parse_ctx *ctx)
174
{
175 176
	char c;

177 178
	if (parse_header_oid(&patch->base.delta->old_file.id,
			&patch->base.delta->old_file.id_abbrev, ctx) < 0 ||
179
		git_parse_advance_expected_str(&ctx->parse_ctx, "..") < 0 ||
180 181
		parse_header_oid(&patch->base.delta->new_file.id,
			&patch->base.delta->new_file.id_abbrev, ctx) < 0)
182 183
		return -1;

184
	if (git_parse_peek(&c, &ctx->parse_ctx, 0) == 0 && c == ' ') {
185 186
		uint16_t mode;

187
		git_parse_advance_chars(&ctx->parse_ctx, 1);
188 189 190 191 192 193 194 195 196 197 198 199 200 201 202

		if (parse_header_mode(&mode, ctx) < 0)
			return -1;

		if (!patch->base.delta->new_file.mode)
			patch->base.delta->new_file.mode = mode;

		if (!patch->base.delta->old_file.mode)
			patch->base.delta->old_file.mode = mode;
	}

	return 0;
}

static int parse_header_git_oldmode(
203
	git_patch_parsed *patch, git_patch_parse_ctx *ctx)
204
{
205
	return parse_header_mode(&patch->base.delta->old_file.mode, ctx);
206 207 208
}

static int parse_header_git_newmode(
209
	git_patch_parsed *patch, git_patch_parse_ctx *ctx)
210
{
211
	return parse_header_mode(&patch->base.delta->new_file.mode, ctx);
212 213 214 215
}

static int parse_header_git_deletedfilemode(
	git_patch_parsed *patch,
216
	git_patch_parse_ctx *ctx)
217
{
218
	git__free((char *)patch->base.delta->old_file.path);
219

220
	patch->base.delta->old_file.path = NULL;
221
	patch->base.delta->status = GIT_DELTA_DELETED;
222
	patch->base.delta->nfiles = 1;
223

224
	return parse_header_mode(&patch->base.delta->old_file.mode, ctx);
225 226 227 228
}

static int parse_header_git_newfilemode(
	git_patch_parsed *patch,
229
	git_patch_parse_ctx *ctx)
230
{
231
	git__free((char *)patch->base.delta->new_file.path);
232

233
	patch->base.delta->new_file.path = NULL;
234
	patch->base.delta->status = GIT_DELTA_ADDED;
235
	patch->base.delta->nfiles = 1;
236

237
	return parse_header_mode(&patch->base.delta->new_file.mode, ctx);
238 239 240 241
}

static int parse_header_rename(
	char **out,
242
	git_patch_parse_ctx *ctx)
243 244 245
{
	git_buf path = GIT_BUF_INIT;

246
	if (parse_header_path_buf(&path, ctx, header_path_len(ctx)) < 0)
247 248
		return -1;

249 250 251 252
	/* Note: the `rename from` and `rename to` lines include the literal
	 * filename.  They do *not* include the prefix.  (Who needs consistency?)
	 */
	*out = git_buf_detach(&path);
253 254 255 256
	return 0;
}

static int parse_header_renamefrom(
257
	git_patch_parsed *patch, git_patch_parse_ctx *ctx)
258
{
259
	patch->base.delta->status = GIT_DELTA_RENAMED;
260
	return parse_header_rename(&patch->rename_old_path, ctx);
261 262 263
}

static int parse_header_renameto(
264
	git_patch_parsed *patch, git_patch_parse_ctx *ctx)
265
{
266
	patch->base.delta->status = GIT_DELTA_RENAMED;
267
	return parse_header_rename(&patch->rename_new_path, ctx);
268 269
}

270 271 272 273 274 275 276 277 278 279 280 281 282 283
static int parse_header_copyfrom(
	git_patch_parsed *patch, git_patch_parse_ctx *ctx)
{
	patch->base.delta->status = GIT_DELTA_COPIED;
	return parse_header_rename(&patch->rename_old_path, ctx);
}

static int parse_header_copyto(
	git_patch_parsed *patch, git_patch_parse_ctx *ctx)
{
	patch->base.delta->status = GIT_DELTA_COPIED;
	return parse_header_rename(&patch->rename_new_path, ctx);
}

284
static int parse_header_percent(uint16_t *out, git_patch_parse_ctx *ctx)
285
{
286
	int64_t val;
287

288
	if (git_parse_advance_digit(&val, &ctx->parse_ctx, 10) < 0)
289 290
		return -1;

291
	if (git_parse_advance_expected_str(&ctx->parse_ctx, "%") < 0)
292 293
		return -1;

294
	if (val < 0 || val > 100)
295 296
		return -1;

297
	*out = (uint16_t)val;
298 299 300 301
	return 0;
}

static int parse_header_similarity(
302
	git_patch_parsed *patch, git_patch_parse_ctx *ctx)
303 304
{
	if (parse_header_percent(&patch->base.delta->similarity, ctx) < 0)
305 306
		return git_parse_err("invalid similarity percentage at line %"PRIuZ,
			ctx->parse_ctx.line_num);
307 308 309 310 311

	return 0;
}

static int parse_header_dissimilarity(
312
	git_patch_parsed *patch, git_patch_parse_ctx *ctx)
313 314 315 316
{
	uint16_t dissimilarity;

	if (parse_header_percent(&dissimilarity, ctx) < 0)
317 318
		return git_parse_err("invalid similarity percentage at line %"PRIuZ,
			ctx->parse_ctx.line_num);
319 320 321 322 323 324

	patch->base.delta->similarity = 100 - dissimilarity;

	return 0;
}

325 326 327
static int parse_header_start(git_patch_parsed *patch, git_patch_parse_ctx *ctx)
{
	if (parse_header_path(&patch->header_old_path, ctx) < 0)
328 329
		return git_parse_err("corrupt old path in git diff header at line %"PRIuZ,
			ctx->parse_ctx.line_num);
330

331
	if (git_parse_advance_ws(&ctx->parse_ctx) < 0 ||
332
		parse_header_path(&patch->header_new_path, ctx) < 0)
333 334
		return git_parse_err("corrupt new path in git diff header at line %"PRIuZ,
			ctx->parse_ctx.line_num);
335

336 337 338 339 340 341 342
	/*
	 * We cannot expect to be able to always parse paths correctly at this
	 * point. Due to the possibility of unquoted names, whitespaces in
	 * filenames and custom prefixes we have to allow that, though, and just
	 * proceeed here. We then hope for the "---" and "+++" lines to fix that
	 * for us.
	 */
343 344
	if (!git_parse_ctx_contains(&ctx->parse_ctx, "\n", 1) &&
	    !git_parse_ctx_contains(&ctx->parse_ctx, "\r\n", 2)) {
345 346 347 348 349 350 351 352
		git_parse_advance_chars(&ctx->parse_ctx, ctx->parse_ctx.line_len - 1);

		git__free(patch->header_old_path);
		patch->header_old_path = NULL;
		git__free(patch->header_new_path);
		patch->header_new_path = NULL;
	}

353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371
	return 0;
}

typedef enum {
	STATE_START,

	STATE_DIFF,
	STATE_FILEMODE,
	STATE_MODE,
	STATE_INDEX,
	STATE_PATH,

	STATE_SIMILARITY,
	STATE_RENAME,
	STATE_COPY,

	STATE_END,
} parse_header_state;

372 373
typedef struct {
	const char *str;
374 375
	parse_header_state expected_state;
	parse_header_state next_state;
376
	int(*fn)(git_patch_parsed *, git_patch_parse_ctx *);
377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408
} parse_header_transition;

static const parse_header_transition transitions[] = {
	/* Start */
	{ "diff --git "         , STATE_START,      STATE_DIFF,       parse_header_start },

	{ "deleted file mode "  , STATE_DIFF,       STATE_FILEMODE,   parse_header_git_deletedfilemode },
	{ "new file mode "      , STATE_DIFF,       STATE_FILEMODE,   parse_header_git_newfilemode },
	{ "old mode "           , STATE_DIFF,       STATE_MODE,       parse_header_git_oldmode },
	{ "new mode "           , STATE_MODE,       STATE_END,        parse_header_git_newmode },

	{ "index "              , STATE_FILEMODE,   STATE_INDEX,      parse_header_git_index },
	{ "index "              , STATE_DIFF,       STATE_INDEX,      parse_header_git_index },
	{ "index "              , STATE_END,        STATE_INDEX,      parse_header_git_index },

	{ "--- "                , STATE_INDEX,      STATE_PATH,       parse_header_git_oldpath },
	{ "+++ "                , STATE_PATH,       STATE_END,        parse_header_git_newpath },
	{ "GIT binary patch"    , STATE_INDEX,      STATE_END,        NULL },
	{ "Binary files "       , STATE_INDEX,      STATE_END,        NULL },

	{ "similarity index "   , STATE_DIFF,       STATE_SIMILARITY, parse_header_similarity },
	{ "dissimilarity index ", STATE_DIFF,       STATE_SIMILARITY, parse_header_dissimilarity },
	{ "rename from "        , STATE_SIMILARITY, STATE_RENAME,     parse_header_renamefrom },
	{ "rename old "         , STATE_SIMILARITY, STATE_RENAME,     parse_header_renamefrom },
	{ "copy from "          , STATE_SIMILARITY, STATE_COPY,       parse_header_copyfrom },
	{ "rename to "          , STATE_RENAME,     STATE_END,        parse_header_renameto },
	{ "rename new "         , STATE_RENAME,     STATE_END,        parse_header_renameto },
	{ "copy to "            , STATE_COPY,       STATE_END,        parse_header_copyto },

	/* Next patch */
	{ "diff --git "         , STATE_END,        0,                NULL },
	{ "@@ -"                , STATE_END,        0,                NULL },
409
	{ "-- "                 , STATE_END,        0,                NULL },
410 411 412 413
};

static int parse_header_git(
	git_patch_parsed *patch,
414
	git_patch_parse_ctx *ctx)
415 416 417
{
	size_t i;
	int error = 0;
418
	parse_header_state state = STATE_START;
419 420

	/* Parse remaining header lines */
421
	for (; ctx->parse_ctx.remain_len > 0; git_parse_advance_line(&ctx->parse_ctx)) {
422 423
		bool found = false;

424
		if (ctx->parse_ctx.line_len == 0 || ctx->parse_ctx.line[ctx->parse_ctx.line_len - 1] != '\n')
425 426
			break;

427 428 429
		for (i = 0; i < ARRAY_SIZE(transitions); i++) {
			const parse_header_transition *transition = &transitions[i];
			size_t op_len = strlen(transition->str);
430

431
			if (transition->expected_state != state ||
432
			    git__prefixcmp(ctx->parse_ctx.line, transition->str) != 0)
433 434
				continue;

435 436
			state = transition->next_state;

437
			/* Do not advance if this is the patch separator */
438
			if (transition->fn == NULL)
439 440
				goto done;

441
			git_parse_advance_chars(&ctx->parse_ctx, op_len);
442

443
			if ((error = transition->fn(patch, ctx)) < 0)
444 445
				goto done;

446
			git_parse_advance_ws(&ctx->parse_ctx);
447

448 449 450
			if (git_parse_advance_expected_str(&ctx->parse_ctx, "\n") < 0 ||
			    ctx->parse_ctx.line_len > 0) {
				error = git_parse_err("trailing data at line %"PRIuZ, ctx->parse_ctx.line_num);
451 452 453
				goto done;
			}

454
			found = true;
455 456
			break;
		}
457

458
		if (!found) {
459 460
			error = git_parse_err("invalid patch header at line %"PRIuZ,
				ctx->parse_ctx.line_num);
461 462
			goto done;
		}
463 464
	}

465
	if (state != STATE_END) {
466
		error = git_parse_err("unexpected header line %"PRIuZ, ctx->parse_ctx.line_num);
467 468 469
		goto done;
	}

470 471 472 473
done:
	return error;
}

474
static int parse_int(int *out, git_patch_parse_ctx *ctx)
475 476 477
{
	git_off_t num;

478
	if (git_parse_advance_digit(&num, &ctx->parse_ctx, 10) < 0 || !git__is_int(num))
479 480 481 482 483 484 485 486
		return -1;

	*out = (int)num;
	return 0;
}

static int parse_hunk_header(
	git_patch_hunk *hunk,
487
	git_patch_parse_ctx *ctx)
488
{
489
	const char *header_start = ctx->parse_ctx.line;
490
	char c;
491 492 493 494

	hunk->hunk.old_lines = 1;
	hunk->hunk.new_lines = 1;

495
	if (git_parse_advance_expected_str(&ctx->parse_ctx, "@@ -") < 0 ||
496 497 498
		parse_int(&hunk->hunk.old_start, ctx) < 0)
		goto fail;

499
	if (git_parse_peek(&c, &ctx->parse_ctx, 0) == 0 && c == ',') {
500
		if (git_parse_advance_expected_str(&ctx->parse_ctx, ",") < 0 ||
501 502 503 504
			parse_int(&hunk->hunk.old_lines, ctx) < 0)
			goto fail;
	}

505
	if (git_parse_advance_expected_str(&ctx->parse_ctx, " +") < 0 ||
506 507 508
		parse_int(&hunk->hunk.new_start, ctx) < 0)
		goto fail;

509
	if (git_parse_peek(&c, &ctx->parse_ctx, 0) == 0 && c == ',') {
510
		if (git_parse_advance_expected_str(&ctx->parse_ctx, ",") < 0 ||
511 512 513 514
			parse_int(&hunk->hunk.new_lines, ctx) < 0)
			goto fail;
	}

515
	if (git_parse_advance_expected_str(&ctx->parse_ctx, " @@") < 0)
516 517
		goto fail;

518
	git_parse_advance_line(&ctx->parse_ctx);
519 520 521 522

	if (!hunk->hunk.old_lines && !hunk->hunk.new_lines)
		goto fail;

523
	hunk->hunk.header_len = ctx->parse_ctx.line - header_start;
524
	if (hunk->hunk.header_len > (GIT_DIFF_HUNK_HEADER_SIZE - 1))
525 526
		return git_parse_err("oversized patch hunk header at line %"PRIuZ,
			ctx->parse_ctx.line_num);
527 528 529 530 531 532 533

	memcpy(hunk->hunk.header, header_start, hunk->hunk.header_len);
	hunk->hunk.header[hunk->hunk.header_len] = '\0';

	return 0;

fail:
534
	git_error_set(GIT_ERROR_PATCH, "invalid patch hunk header at line %"PRIuZ,
535
		ctx->parse_ctx.line_num);
536 537 538
	return -1;
}

539 540 541 542 543 544 545 546
static int eof_for_origin(int origin) {
	if (origin == GIT_DIFF_LINE_ADDITION)
		return GIT_DIFF_LINE_ADD_EOFNL;
	if (origin == GIT_DIFF_LINE_DELETION)
		return GIT_DIFF_LINE_DEL_EOFNL;
	return GIT_DIFF_LINE_CONTEXT_EOFNL;
}

547 548 549
static int parse_hunk_body(
	git_patch_parsed *patch,
	git_patch_hunk *hunk,
550
	git_patch_parse_ctx *ctx)
551 552 553 554 555 556
{
	git_diff_line *line;
	int error = 0;

	int oldlines = hunk->hunk.old_lines;
	int newlines = hunk->hunk.new_lines;
557
	int last_origin = 0;
558 559

	for (;
560
		ctx->parse_ctx.remain_len > 1 &&
561
		(oldlines || newlines) &&
562
		!git_parse_ctx_contains_s(&ctx->parse_ctx, "@@ -");
563
		git_parse_advance_line(&ctx->parse_ctx)) {
564

565
		char c;
566 567
		int origin;
		int prefix = 1;
568 569
		int old_lineno = hunk->hunk.old_start + (hunk->hunk.old_lines - oldlines);
		int new_lineno = hunk->hunk.new_start + (hunk->hunk.new_lines - newlines);
570

571 572 573
		if (ctx->parse_ctx.line_len == 0 || ctx->parse_ctx.line[ctx->parse_ctx.line_len - 1] != '\n') {
			error = git_parse_err("invalid patch instruction at line %"PRIuZ,
				ctx->parse_ctx.line_num);
574 575 576
			goto done;
		}

577 578 579
		git_parse_peek(&c, &ctx->parse_ctx, 0);

		switch (c) {
580 581
		case '\n':
			prefix = 0;
582
			/* fall through */
583 584 585 586 587 588 589 590 591 592

		case ' ':
			origin = GIT_DIFF_LINE_CONTEXT;
			oldlines--;
			newlines--;
			break;

		case '-':
			origin = GIT_DIFF_LINE_DELETION;
			oldlines--;
593
			new_lineno = -1;
594 595 596 597 598
			break;

		case '+':
			origin = GIT_DIFF_LINE_ADDITION;
			newlines--;
599
			old_lineno = -1;
600 601
			break;

602 603 604 605 606 607
		case '\\':
			/*
			 * If there are no oldlines left, then this is probably
			 * the "\ No newline at end of file" marker. Do not
			 * verify its format, as it may be localized.
			 */
608 609 610 611 612 613 614
			if (!oldlines) {
				prefix = 0;
				origin = eof_for_origin(last_origin);
				old_lineno = -1;
				new_lineno = -1;
				break;
			}
615 616
			/* fall through */

617
		default:
618
			error = git_parse_err("invalid patch hunk at line %"PRIuZ, ctx->parse_ctx.line_num);
619 620 621 622
			goto done;
		}

		line = git_array_alloc(patch->base.lines);
623
		GIT_ERROR_CHECK_ALLOC(line);
624 625 626

		memset(line, 0x0, sizeof(git_diff_line));

627
		line->content_len = ctx->parse_ctx.line_len - prefix;
628
		line->content = git__strndup(ctx->parse_ctx.line + prefix, line->content_len);
629
		line->content_offset = ctx->parse_ctx.content_len - ctx->parse_ctx.remain_len;
630
		line->origin = origin;
631 632 633
		line->num_lines = 1;
		line->old_lineno = old_lineno;
		line->new_lineno = new_lineno;
634 635

		hunk->line_count++;
636 637

		last_origin = origin;
638 639 640
	}

	if (oldlines || newlines) {
641
		error = git_parse_err(
642 643 644 645 646
			"invalid patch hunk, expected %d old lines and %d new lines",
			hunk->hunk.old_lines, hunk->hunk.new_lines);
		goto done;
	}

647
	/*
648
	 * Handle "\ No newline at end of file". Only expect the leading
649 650 651 652
	 * backslash, though, because the rest of the string could be
	 * localized.  Because `diff` optimizes for the case where you
	 * want to apply the patch by hand.
	 */
653
	if (git_parse_ctx_contains_s(&ctx->parse_ctx, "\\ ") &&
654 655 656 657 658
		git_array_size(patch->base.lines) > 0) {

		line = git_array_get(patch->base.lines, git_array_size(patch->base.lines) - 1);

		if (line->content_len < 1) {
659
			error = git_parse_err("last line has no trailing newline");
660 661 662
			goto done;
		}

663 664 665 666 667
		line = git_array_alloc(patch->base.lines);
		GIT_ERROR_CHECK_ALLOC(line);

		memset(line, 0x0, sizeof(git_diff_line));

668
		line->content = git__strdup(ctx->parse_ctx.line);
669 670 671 672 673 674 675 676
		line->content_len = ctx->parse_ctx.line_len;
		line->content_offset = ctx->parse_ctx.content_len - ctx->parse_ctx.remain_len;
		line->origin = eof_for_origin(last_origin);
		line->num_lines = 1;
		line->old_lineno = -1;
		line->new_lineno = -1;

		hunk->line_count++;
677

678
		git_parse_advance_line(&ctx->parse_ctx);
679 680 681 682 683 684
	}

done:
	return error;
}

685
static int parse_patch_header(
686
	git_patch_parsed *patch,
687
	git_patch_parse_ctx *ctx)
688 689 690
{
	int error = 0;

691
	for (; ctx->parse_ctx.remain_len > 0; git_parse_advance_line(&ctx->parse_ctx)) {
692
		/* This line is too short to be a patch header. */
693
		if (ctx->parse_ctx.line_len < 6)
694 695 696
			continue;

		/* This might be a hunk header without a patch header, provide a
697
		 * sensible error message. */
698 699
		if (git_parse_ctx_contains_s(&ctx->parse_ctx, "@@ -")) {
			size_t line_num = ctx->parse_ctx.line_num;
700 701 702 703 704 705
			git_patch_hunk hunk;

			/* If this cannot be parsed as a hunk header, it's just leading
			* noise, continue.
			*/
			if (parse_hunk_header(&hunk, ctx) < 0) {
706
				git_error_clear();
707 708 709
				continue;
			}

710
			error = git_parse_err("invalid hunk header outside patch at line %"PRIuZ,
711 712 713 714 715
				line_num);
			goto done;
		}

		/* This buffer is too short to contain a patch. */
716
		if (ctx->parse_ctx.remain_len < ctx->parse_ctx.line_len + 6)
717 718 719
			break;

		/* A proper git patch */
720
		if (git_parse_ctx_contains_s(&ctx->parse_ctx, "diff --git ")) {
721
			error = parse_header_git(patch, ctx);
722 723 724 725 726 727 728
			goto done;
		}

		error = 0;
		continue;
	}

729
	git_error_set(GIT_ERROR_PATCH, "no patch found");
730
	error = GIT_ENOTFOUND;
731 732 733 734 735

done:
	return error;
}

736
static int parse_patch_binary_side(
737
	git_diff_binary_file *binary,
738
	git_patch_parse_ctx *ctx)
739 740 741 742 743 744
{
	git_diff_binary_t type = GIT_DIFF_BINARY_NONE;
	git_buf base85 = GIT_BUF_INIT, decoded = GIT_BUF_INIT;
	git_off_t len;
	int error = 0;

745
	if (git_parse_ctx_contains_s(&ctx->parse_ctx, "literal ")) {
746
		type = GIT_DIFF_BINARY_LITERAL;
747 748
		git_parse_advance_chars(&ctx->parse_ctx, 8);
	} else if (git_parse_ctx_contains_s(&ctx->parse_ctx, "delta ")) {
749
		type = GIT_DIFF_BINARY_DELTA;
750
		git_parse_advance_chars(&ctx->parse_ctx, 6);
751
	} else {
752 753
		error = git_parse_err(
			"unknown binary delta type at line %"PRIuZ, ctx->parse_ctx.line_num);
754 755 756
		goto done;
	}

757 758
	if (git_parse_advance_digit(&len, &ctx->parse_ctx, 10) < 0 ||
	    git_parse_advance_nl(&ctx->parse_ctx) < 0 || len < 0) {
759
		error = git_parse_err("invalid binary size at line %"PRIuZ, ctx->parse_ctx.line_num);
760 761 762
		goto done;
	}

763
	while (ctx->parse_ctx.line_len) {
764
		char c;
765 766
		size_t encoded_len, decoded_len = 0, decoded_orig = decoded.size;

767 768
		git_parse_peek(&c, &ctx->parse_ctx, 0);

769 770 771 772 773 774 775 776
		if (c == '\n')
			break;
		else if (c >= 'A' && c <= 'Z')
			decoded_len = c - 'A' + 1;
		else if (c >= 'a' && c <= 'z')
			decoded_len = c - 'a' + (('z' - 'a') + 1) + 1;

		if (!decoded_len) {
777
			error = git_parse_err("invalid binary length at line %"PRIuZ, ctx->parse_ctx.line_num);
778 779 780
			goto done;
		}

781
		git_parse_advance_chars(&ctx->parse_ctx, 1);
782 783 784

		encoded_len = ((decoded_len / 4) + !!(decoded_len % 4)) * 5;

785 786
		if (encoded_len > ctx->parse_ctx.line_len - 1) {
			error = git_parse_err("truncated binary data at line %"PRIuZ, ctx->parse_ctx.line_num);
787 788 789 790
			goto done;
		}

		if ((error = git_buf_decode_base85(
791
			&decoded, ctx->parse_ctx.line, encoded_len, decoded_len)) < 0)
792 793 794
			goto done;

		if (decoded.size - decoded_orig != decoded_len) {
795
			error = git_parse_err("truncated binary data at line %"PRIuZ, ctx->parse_ctx.line_num);
796 797 798
			goto done;
		}

799
		git_parse_advance_chars(&ctx->parse_ctx, encoded_len);
800

801 802
		if (git_parse_advance_nl(&ctx->parse_ctx) < 0) {
			error = git_parse_err("trailing data at line %"PRIuZ, ctx->parse_ctx.line_num);
803 804 805 806 807 808 809 810 811 812
			goto done;
		}
	}

	binary->type = type;
	binary->inflatedlen = (size_t)len;
	binary->datalen = decoded.size;
	binary->data = git_buf_detach(&decoded);

done:
813 814
	git_buf_dispose(&base85);
	git_buf_dispose(&decoded);
815 816 817
	return error;
}

818
static int parse_patch_binary(
819
	git_patch_parsed *patch,
820
	git_patch_parse_ctx *ctx)
821 822 823
{
	int error;

824 825 826
	if (git_parse_advance_expected_str(&ctx->parse_ctx, "GIT binary patch") < 0 ||
		git_parse_advance_nl(&ctx->parse_ctx) < 0)
		return git_parse_err("corrupt git binary header at line %"PRIuZ, ctx->parse_ctx.line_num);
827 828

	/* parse old->new binary diff */
829
	if ((error = parse_patch_binary_side(
830 831 832
			&patch->base.binary.new_file, ctx)) < 0)
		return error;

833 834 835
	if (git_parse_advance_nl(&ctx->parse_ctx) < 0)
		return git_parse_err("corrupt git binary separator at line %"PRIuZ,
			ctx->parse_ctx.line_num);
836 837

	/* parse new->old binary diff */
838
	if ((error = parse_patch_binary_side(
839 840 841
			&patch->base.binary.old_file, ctx)) < 0)
		return error;

842 843 844
	if (git_parse_advance_nl(&ctx->parse_ctx) < 0)
		return git_parse_err("corrupt git binary patch separator at line %"PRIuZ,
			ctx->parse_ctx.line_num);
845

846 847 848 849 850 851 852 853 854
	patch->base.binary.contains_data = 1;
	patch->base.delta->flags |= GIT_DIFF_FLAG_BINARY;
	return 0;
}

static int parse_patch_binary_nodata(
	git_patch_parsed *patch,
	git_patch_parse_ctx *ctx)
{
855 856 857 858 859 860 861
	if (git_parse_advance_expected_str(&ctx->parse_ctx, "Binary files ") < 0 ||
		git_parse_advance_expected_str(&ctx->parse_ctx, patch->header_old_path) < 0 ||
		git_parse_advance_expected_str(&ctx->parse_ctx, " and ") < 0 ||
		git_parse_advance_expected_str(&ctx->parse_ctx, patch->header_new_path) < 0 ||
		git_parse_advance_expected_str(&ctx->parse_ctx, " differ") < 0 ||
		git_parse_advance_nl(&ctx->parse_ctx) < 0)
		return git_parse_err("corrupt git binary header at line %"PRIuZ, ctx->parse_ctx.line_num);
862 863

	patch->base.binary.contains_data = 0;
864 865 866 867
	patch->base.delta->flags |= GIT_DIFF_FLAG_BINARY;
	return 0;
}

868
static int parse_patch_hunks(
869
	git_patch_parsed *patch,
870
	git_patch_parse_ctx *ctx)
871 872 873 874
{
	git_patch_hunk *hunk;
	int error = 0;

875
	while (git_parse_ctx_contains_s(&ctx->parse_ctx, "@@ -")) {
876
		hunk = git_array_alloc(patch->base.hunks);
877
		GIT_ERROR_CHECK_ALLOC(hunk);
878 879 880 881 882 883 884 885 886 887 888

		memset(hunk, 0, sizeof(git_patch_hunk));

		hunk->line_start = git_array_size(patch->base.lines);
		hunk->line_count = 0;

		if ((error = parse_hunk_header(hunk, ctx)) < 0 ||
			(error = parse_hunk_body(patch, hunk, ctx)) < 0)
			goto done;
	}

889 890
	patch->base.delta->flags |= GIT_DIFF_FLAG_NOT_BINARY;

891 892 893 894
done:
	return error;
}

895 896
static int parse_patch_body(
	git_patch_parsed *patch, git_patch_parse_ctx *ctx)
897
{
898
	if (git_parse_ctx_contains_s(&ctx->parse_ctx, "GIT binary patch"))
899
		return parse_patch_binary(patch, ctx);
900
	else if (git_parse_ctx_contains_s(&ctx->parse_ctx, "Binary files "))
901
		return parse_patch_binary_nodata(patch, ctx);
902
	else
903
		return parse_patch_hunks(patch, ctx);
904 905
}

906 907 908 909 910 911 912 913 914 915
int check_header_names(
	const char *one,
	const char *two,
	const char *old_or_new,
	bool two_null)
{
	if (!one || !two)
		return 0;

	if (two_null && strcmp(two, "/dev/null") != 0)
916
		return git_parse_err("expected %s path of '/dev/null'", old_or_new);
917 918

	else if (!two_null && strcmp(one, two) != 0)
919
		return git_parse_err("mismatched %s path names", old_or_new);
920 921 922 923 924 925 926 927 928 929 930

	return 0;
}

static int check_prefix(
	char **out,
	size_t *out_len,
	git_patch_parsed *patch,
	const char *path_start)
{
	const char *path = path_start;
931
	size_t prefix_len = patch->ctx->opts.prefix_len;
932
	size_t remain_len = prefix_len;
933 934 935 936

	*out = NULL;
	*out_len = 0;

937
	if (prefix_len == 0)
938 939 940 941 942 943
		goto done;

	/* leading slashes do not count as part of the prefix in git apply */
	while (*path == '/')
		path++;

944
	while (*path && remain_len) {
945
		if (*path == '/')
946
			remain_len--;
947 948 949 950

		path++;
	}

951
	if (remain_len || !*path)
952
		return git_parse_err(
953
			"header filename does not contain %"PRIuZ" path components",
954
			prefix_len);
955 956 957 958 959

done:
	*out_len = (path - path_start);
	*out = git__strndup(path_start, *out_len);

960
	return (*out == NULL) ? -1 : 0;
961 962 963
}

static int check_filenames(git_patch_parsed *patch)
964
{
965 966 967 968 969 970
	const char *prefixed_new, *prefixed_old;
	size_t old_prefixlen = 0, new_prefixlen = 0;
	bool added = (patch->base.delta->status == GIT_DELTA_ADDED);
	bool deleted = (patch->base.delta->status == GIT_DELTA_DELETED);

	if (patch->old_path && !patch->new_path)
971
		return git_parse_err("missing new path");
972 973

	if (!patch->old_path && patch->new_path)
974
		return git_parse_err("missing old path");
975 976

	/* Ensure (non-renamed) paths match */
977 978
	if (check_header_names(patch->header_old_path, patch->old_path, "old", added) < 0 ||
	    check_header_names(patch->header_new_path, patch->new_path, "new", deleted) < 0)
979 980
		return -1;

981 982
	prefixed_old = (!added && patch->old_path) ? patch->old_path : patch->header_old_path;
	prefixed_new = (!deleted && patch->new_path) ? patch->new_path : patch->header_new_path;
983

984 985
	if ((prefixed_old && check_prefix(&patch->old_prefix, &old_prefixlen, patch, prefixed_old) < 0) ||
	    (prefixed_new && check_prefix(&patch->new_prefix, &new_prefixlen, patch, prefixed_new) < 0))
986 987 988 989 990 991 992 993 994 995 996 997 998
		return -1;

	/* Prefer the rename filenames as they are unambiguous and unprefixed */
	if (patch->rename_old_path)
		patch->base.delta->old_file.path = patch->rename_old_path;
	else
		patch->base.delta->old_file.path = prefixed_old + old_prefixlen;

	if (patch->rename_new_path)
		patch->base.delta->new_file.path = patch->rename_new_path;
	else
		patch->base.delta->new_file.path = prefixed_new + new_prefixlen;

999
	if (!patch->base.delta->old_file.path &&
1000
	    !patch->base.delta->new_file.path)
1001
		return git_parse_err("git diff header lacks old / new paths");
1002

1003 1004 1005 1006 1007
	return 0;
}

static int check_patch(git_patch_parsed *patch)
{
1008 1009
	git_diff_delta *delta = patch->base.delta;

1010 1011
	if (check_filenames(patch) < 0)
		return -1;
1012

1013
	if (delta->old_file.path &&
1014 1015
	    delta->status != GIT_DELTA_DELETED &&
	    !delta->new_file.mode)
1016
		delta->new_file.mode = delta->old_file.mode;
1017

1018
	if (delta->status == GIT_DELTA_MODIFIED &&
1019 1020 1021
	    !(delta->flags & GIT_DIFF_FLAG_BINARY) &&
	    delta->new_file.mode == delta->old_file.mode &&
	    git_array_size(patch->base.hunks) == 0)
1022
		return git_parse_err("patch with no hunks");
1023

1024 1025 1026 1027 1028 1029 1030 1031 1032 1033
	if (delta->status == GIT_DELTA_ADDED) {
		memset(&delta->old_file.id, 0x0, sizeof(git_oid));
		delta->old_file.id_abbrev = 0;
	}

	if (delta->status == GIT_DELTA_DELETED) {
		memset(&delta->new_file.id, 0x0, sizeof(git_oid));
		delta->new_file.id_abbrev = 0;
	}

1034 1035 1036
	return 0;
}

1037
git_patch_parse_ctx *git_patch_parse_ctx_init(
1038 1039 1040 1041 1042 1043 1044 1045 1046 1047
	const char *content,
	size_t content_len,
	const git_patch_options *opts)
{
	git_patch_parse_ctx *ctx;
	git_patch_options default_opts = GIT_PATCH_OPTIONS_INIT;

	if ((ctx = git__calloc(1, sizeof(git_patch_parse_ctx))) == NULL)
		return NULL;

1048 1049 1050
	if ((git_parse_ctx_init(&ctx->parse_ctx, content, content_len)) < 0) {
		git__free(ctx);
		return NULL;
1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066
	}

	if (opts)
		memcpy(&ctx->opts, opts, sizeof(git_patch_options));
	else
		memcpy(&ctx->opts, &default_opts, sizeof(git_patch_options));

	GIT_REFCOUNT_INC(ctx);
	return ctx;
}

static void patch_parse_ctx_free(git_patch_parse_ctx *ctx)
{
	if (!ctx)
		return;

1067
	git_parse_ctx_clear(&ctx->parse_ctx);
1068 1069 1070
	git__free(ctx);
}

1071
void git_patch_parse_ctx_free(git_patch_parse_ctx *ctx)
1072 1073 1074 1075
{
	GIT_REFCOUNT_DEC(ctx, patch_parse_ctx_free);
}

1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089
int git_patch_parsed_from_diff(git_patch **out, git_diff *d, size_t idx)
{
	git_diff_parsed *diff = (git_diff_parsed *)d;
	git_patch *p;

	if ((p = git_vector_get(&diff->patches, idx)) == NULL)
		return -1;

	GIT_REFCOUNT_INC(p);
	*out = p;

	return 0;
}

1090 1091 1092
static void patch_parsed__free(git_patch *p)
{
	git_patch_parsed *patch = (git_patch_parsed *)p;
1093 1094
	git_diff_line *line;
	size_t i;
1095 1096 1097 1098

	if (!patch)
		return;

1099 1100
	git_patch_parse_ctx_free(patch->ctx);

1101 1102 1103
	git__free((char *)patch->base.binary.old_file.data);
	git__free((char *)patch->base.binary.new_file.data);
	git_array_clear(patch->base.hunks);
1104 1105
	git_array_foreach(patch->base.lines, i, line)
		git__free((char *) line->content);
1106 1107 1108
	git_array_clear(patch->base.lines);
	git__free(patch->base.delta);

1109 1110 1111 1112 1113 1114 1115 1116
	git__free(patch->old_prefix);
	git__free(patch->new_prefix);
	git__free(patch->header_old_path);
	git__free(patch->header_new_path);
	git__free(patch->rename_old_path);
	git__free(patch->rename_new_path);
	git__free(patch->old_path);
	git__free(patch->new_path);
1117
	git__free(patch);
1118 1119
}

1120
int git_patch_parse(
1121
	git_patch **out,
1122
	git_patch_parse_ctx *ctx)
1123 1124
{
	git_patch_parsed *patch;
1125
	size_t start, used;
1126 1127
	int error = 0;

1128 1129
	assert(out && ctx);

1130 1131 1132
	*out = NULL;

	patch = git__calloc(1, sizeof(git_patch_parsed));
1133
	GIT_ERROR_CHECK_ALLOC(patch);
1134

1135 1136
	patch->ctx = ctx;
	GIT_REFCOUNT_INC(patch->ctx);
1137 1138

	patch->base.free_fn = patch_parsed__free;
1139

1140
	patch->base.delta = git__calloc(1, sizeof(git_diff_delta));
1141
	GIT_ERROR_CHECK_ALLOC(patch->base.delta);
1142

1143
	patch->base.delta->status = GIT_DELTA_MODIFIED;
1144
	patch->base.delta->nfiles = 2;
1145

1146
	start = ctx->parse_ctx.remain_len;
1147

1148 1149
	if ((error = parse_patch_header(patch, ctx)) < 0 ||
		(error = parse_patch_body(patch, ctx)) < 0 ||
1150 1151 1152
		(error = check_patch(patch)) < 0)
		goto done;

1153 1154
	used = start - ctx->parse_ctx.remain_len;
	ctx->parse_ctx.remain += used;
1155

1156 1157 1158 1159
	patch->base.diff_opts.old_prefix = patch->old_prefix;
	patch->base.diff_opts.new_prefix = patch->new_prefix;
	patch->base.diff_opts.flags |= GIT_DIFF_SHOW_BINARY;

1160
	GIT_REFCOUNT_INC(&patch->base);
1161 1162 1163
	*out = &patch->base;

done:
1164 1165 1166
	if (error < 0)
		patch_parsed__free(&patch->base);

1167 1168
	return error;
}
1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179

int git_patch_from_buffer(
	git_patch **out,
	const char *content,
	size_t content_len,
	const git_patch_options *opts)
{
	git_patch_parse_ctx *ctx;
	int error;

	ctx = git_patch_parse_ctx_init(content, content_len, opts);
1180
	GIT_ERROR_CHECK_ALLOC(ctx);
1181 1182 1183 1184 1185 1186 1187

	error = git_patch_parse(out, ctx);

	git_patch_parse_ctx_free(ctx);
	return error;
}