patch_parse.c 32.3 KB
Newer Older
1 2 3 4 5 6
/*
 * Copyright (C) the libgit2 contributors. All rights reserved.
 *
 * This file is part of libgit2, distributed under the GNU GPL v2 with
 * a Linking Exception. For full terms see the included COPYING file.
 */
7 8 9

#include "patch_parse.h"

10 11
#include "git2/patch.h"
#include "patch.h"
12
#include "diff_parse.h"
13
#include "fs_path.h"
14 15

typedef struct {
16 17 18
	git_patch base;

	git_patch_parse_ctx *ctx;
19

20 21 22 23 24 25 26 27 28 29 30 31 32 33
	/* the paths from the `diff --git` header, these will be used if this is not
	 * a rename (and rename paths are specified) or if no `+++`/`---` line specify
	 * the paths.
	 */
	char *header_old_path, *header_new_path;

	/* renamed paths are precise and are not prefixed */
	char *rename_old_path, *rename_new_path;

	/* the paths given in `---` and `+++` lines */
	char *old_path, *new_path;

	/* the prefixes from the old/new paths */
	char *old_prefix, *new_prefix;
34 35
} git_patch_parsed;

36 37 38 39 40 41 42 43 44 45 46 47
static int git_parse_err(const char *fmt, ...) GIT_FORMAT_PRINTF(1, 2);
static int git_parse_err(const char *fmt, ...)
{
	va_list ap;

	va_start(ap, fmt);
	git_error_vset(GIT_ERROR_PATCH, fmt, ap);
	va_end(ap);

	return -1;
}

48
static size_t header_path_len(git_patch_parse_ctx *ctx)
49 50
{
	bool inquote = 0;
51
	bool quoted = git_parse_ctx_contains_s(&ctx->parse_ctx, "\"");
52 53
	size_t len;

54 55
	for (len = quoted; len < ctx->parse_ctx.line_len; len++) {
		if (!quoted && git__isspace(ctx->parse_ctx.line[len]))
56
			break;
57
		else if (quoted && !inquote && ctx->parse_ctx.line[len] == '"') {
58 59 60 61
			len++;
			break;
		}

62
		inquote = (!inquote && ctx->parse_ctx.line[len] == '\\');
63 64 65 66 67
	}

	return len;
}

68
static int parse_header_path_buf(git_str *path, git_patch_parse_ctx *ctx, size_t path_len)
69
{
70
	int error;
71

72
	if ((error = git_str_put(path, ctx->parse_ctx.line, path_len)) < 0)
73
		return error;
74

75
	git_parse_advance_chars(&ctx->parse_ctx, path_len);
76

77
	git_str_rtrim(path);
78

79
	if (path->size > 0 && path->ptr[0] == '"' &&
80
	    (error = git_str_unquote(path)) < 0)
81
		return error;
82

83
	git_fs_path_squash_slashes(path);
84

85 86 87 88 89
	if (!path->size)
		return git_parse_err("patch contains empty path at line %"PRIuZ,
				     ctx->parse_ctx.line_num);

	return 0;
90 91
}

92
static int parse_header_path(char **out, git_patch_parse_ctx *ctx)
93
{
94
	git_str path = GIT_STR_INIT;
95
	int error;
96

97 98
	if ((error = parse_header_path_buf(&path, ctx, header_path_len(ctx))) < 0)
		goto out;
99
	*out = git_str_detach(&path);
100

101
out:
102
	git_str_dispose(&path);
103 104 105 106
	return error;
}

static int parse_header_git_oldpath(
107
	git_patch_parsed *patch, git_patch_parse_ctx *ctx)
108
{
109
	git_str old_path = GIT_STR_INIT;
110 111
	int error;

112 113 114 115 116 117
	if (patch->old_path) {
		error = git_parse_err("patch contains duplicate old path at line %"PRIuZ,
				      ctx->parse_ctx.line_num);
		goto out;
	}

118 119 120
	if ((error = parse_header_path_buf(&old_path, ctx, ctx->parse_ctx.line_len - 1)) <  0)
		goto out;

121
	patch->old_path = git_str_detach(&old_path);
122 123

out:
124
	git_str_dispose(&old_path);
125
	return error;
126 127 128
}

static int parse_header_git_newpath(
129
	git_patch_parsed *patch, git_patch_parse_ctx *ctx)
130
{
131
	git_str new_path = GIT_STR_INIT;
132 133
	int error;

134 135 136
	if (patch->new_path) {
		error = git_parse_err("patch contains duplicate new path at line %"PRIuZ,
				      ctx->parse_ctx.line_num);
137
		goto out;
138
	}
139

140 141
	if ((error = parse_header_path_buf(&new_path, ctx, ctx->parse_ctx.line_len - 1)) <  0)
		goto out;
142
	patch->new_path = git_str_detach(&new_path);
143 144

out:
145
	git_str_dispose(&new_path);
146
	return error;
147 148
}

149
static int parse_header_mode(uint16_t *mode, git_patch_parse_ctx *ctx)
150
{
151
	int64_t m;
152

153
	if ((git_parse_advance_digit(&m, &ctx->parse_ctx, 8)) < 0)
154
		return git_parse_err("invalid file mode at line %"PRIuZ, ctx->parse_ctx.line_num);
155 156 157 158 159 160

	if (m > UINT16_MAX)
		return -1;

	*mode = (uint16_t)m;

161
	return 0;
162 163 164 165
}

static int parse_header_oid(
	git_oid *oid,
166
	uint16_t *oid_len,
167
	git_patch_parse_ctx *ctx)
168
{
169 170 171
	size_t hexsize, len;

	hexsize = git_oid_hexsize(ctx->opts.oid_type);
172

173 174 175
	for (len = 0;
	     len < ctx->parse_ctx.line_len && len < hexsize;
	     len++) {
176
		if (!git__isxdigit(ctx->parse_ctx.line[len]))
177 178 179
			break;
	}

180 181
	if (len < GIT_OID_MINPREFIXLEN || len > hexsize ||
		git_oid__fromstrn(oid, ctx->parse_ctx.line, len, ctx->opts.oid_type) < 0)
182 183
		return git_parse_err("invalid hex formatted object id at line %"PRIuZ,
			ctx->parse_ctx.line_num);
184

185
	git_parse_advance_chars(&ctx->parse_ctx, len);
186

187
	*oid_len = (uint16_t)len;
188 189 190 191 192

	return 0;
}

static int parse_header_git_index(
193
	git_patch_parsed *patch, git_patch_parse_ctx *ctx)
194
{
195 196
	char c;

197 198
	if (parse_header_oid(&patch->base.delta->old_file.id,
			&patch->base.delta->old_file.id_abbrev, ctx) < 0 ||
199
		git_parse_advance_expected_str(&ctx->parse_ctx, "..") < 0 ||
200 201
		parse_header_oid(&patch->base.delta->new_file.id,
			&patch->base.delta->new_file.id_abbrev, ctx) < 0)
202 203
		return -1;

204
	if (git_parse_peek(&c, &ctx->parse_ctx, 0) == 0 && c == ' ') {
205
		uint16_t mode = 0;
206

207
		git_parse_advance_chars(&ctx->parse_ctx, 1);
208 209 210 211 212 213 214 215 216 217 218 219 220 221 222

		if (parse_header_mode(&mode, ctx) < 0)
			return -1;

		if (!patch->base.delta->new_file.mode)
			patch->base.delta->new_file.mode = mode;

		if (!patch->base.delta->old_file.mode)
			patch->base.delta->old_file.mode = mode;
	}

	return 0;
}

static int parse_header_git_oldmode(
223
	git_patch_parsed *patch, git_patch_parse_ctx *ctx)
224
{
225
	return parse_header_mode(&patch->base.delta->old_file.mode, ctx);
226 227 228
}

static int parse_header_git_newmode(
229
	git_patch_parsed *patch, git_patch_parse_ctx *ctx)
230
{
231
	return parse_header_mode(&patch->base.delta->new_file.mode, ctx);
232 233 234 235
}

static int parse_header_git_deletedfilemode(
	git_patch_parsed *patch,
236
	git_patch_parse_ctx *ctx)
237
{
238
	git__free((char *)patch->base.delta->new_file.path);
239

240
	patch->base.delta->new_file.path = NULL;
241
	patch->base.delta->status = GIT_DELTA_DELETED;
242
	patch->base.delta->nfiles = 1;
243

244
	return parse_header_mode(&patch->base.delta->old_file.mode, ctx);
245 246 247 248
}

static int parse_header_git_newfilemode(
	git_patch_parsed *patch,
249
	git_patch_parse_ctx *ctx)
250
{
251
	git__free((char *)patch->base.delta->old_file.path);
252

253
	patch->base.delta->old_file.path = NULL;
254
	patch->base.delta->status = GIT_DELTA_ADDED;
255
	patch->base.delta->nfiles = 1;
256

257
	return parse_header_mode(&patch->base.delta->new_file.mode, ctx);
258 259 260 261
}

static int parse_header_rename(
	char **out,
262
	git_patch_parse_ctx *ctx)
263
{
264
	git_str path = GIT_STR_INIT;
265

266
	if (parse_header_path_buf(&path, ctx, header_path_len(ctx)) < 0)
267 268
		return -1;

269 270 271
	/* Note: the `rename from` and `rename to` lines include the literal
	 * filename.  They do *not* include the prefix.  (Who needs consistency?)
	 */
272
	*out = git_str_detach(&path);
273 274 275 276
	return 0;
}

static int parse_header_renamefrom(
277
	git_patch_parsed *patch, git_patch_parse_ctx *ctx)
278
{
279
	patch->base.delta->status = GIT_DELTA_RENAMED;
280
	return parse_header_rename(&patch->rename_old_path, ctx);
281 282 283
}

static int parse_header_renameto(
284
	git_patch_parsed *patch, git_patch_parse_ctx *ctx)
285
{
286
	patch->base.delta->status = GIT_DELTA_RENAMED;
287
	return parse_header_rename(&patch->rename_new_path, ctx);
288 289
}

290 291 292 293 294 295 296 297 298 299 300 301 302 303
static int parse_header_copyfrom(
	git_patch_parsed *patch, git_patch_parse_ctx *ctx)
{
	patch->base.delta->status = GIT_DELTA_COPIED;
	return parse_header_rename(&patch->rename_old_path, ctx);
}

static int parse_header_copyto(
	git_patch_parsed *patch, git_patch_parse_ctx *ctx)
{
	patch->base.delta->status = GIT_DELTA_COPIED;
	return parse_header_rename(&patch->rename_new_path, ctx);
}

304
static int parse_header_percent(uint16_t *out, git_patch_parse_ctx *ctx)
305
{
306
	int64_t val;
307

308
	if (git_parse_advance_digit(&val, &ctx->parse_ctx, 10) < 0)
309 310
		return -1;

311
	if (git_parse_advance_expected_str(&ctx->parse_ctx, "%") < 0)
312 313
		return -1;

314
	if (val < 0 || val > 100)
315 316
		return -1;

317
	*out = (uint16_t)val;
318 319 320 321
	return 0;
}

static int parse_header_similarity(
322
	git_patch_parsed *patch, git_patch_parse_ctx *ctx)
323 324
{
	if (parse_header_percent(&patch->base.delta->similarity, ctx) < 0)
325 326
		return git_parse_err("invalid similarity percentage at line %"PRIuZ,
			ctx->parse_ctx.line_num);
327 328 329 330 331

	return 0;
}

static int parse_header_dissimilarity(
332
	git_patch_parsed *patch, git_patch_parse_ctx *ctx)
333 334 335 336
{
	uint16_t dissimilarity;

	if (parse_header_percent(&dissimilarity, ctx) < 0)
337 338
		return git_parse_err("invalid similarity percentage at line %"PRIuZ,
			ctx->parse_ctx.line_num);
339 340 341 342 343 344

	patch->base.delta->similarity = 100 - dissimilarity;

	return 0;
}

345 346 347
static int parse_header_start(git_patch_parsed *patch, git_patch_parse_ctx *ctx)
{
	if (parse_header_path(&patch->header_old_path, ctx) < 0)
348 349
		return git_parse_err("corrupt old path in git diff header at line %"PRIuZ,
			ctx->parse_ctx.line_num);
350

351
	if (git_parse_advance_ws(&ctx->parse_ctx) < 0 ||
352
		parse_header_path(&patch->header_new_path, ctx) < 0)
353 354
		return git_parse_err("corrupt new path in git diff header at line %"PRIuZ,
			ctx->parse_ctx.line_num);
355

356 357 358 359
	/*
	 * We cannot expect to be able to always parse paths correctly at this
	 * point. Due to the possibility of unquoted names, whitespaces in
	 * filenames and custom prefixes we have to allow that, though, and just
Dimitris Apostolou committed
360
	 * proceed here. We then hope for the "---" and "+++" lines to fix that
361 362
	 * for us.
	 */
363 364
	if (!git_parse_ctx_contains(&ctx->parse_ctx, "\n", 1) &&
	    !git_parse_ctx_contains(&ctx->parse_ctx, "\r\n", 2)) {
365 366 367 368 369 370 371 372
		git_parse_advance_chars(&ctx->parse_ctx, ctx->parse_ctx.line_len - 1);

		git__free(patch->header_old_path);
		patch->header_old_path = NULL;
		git__free(patch->header_new_path);
		patch->header_new_path = NULL;
	}

373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388
	return 0;
}

typedef enum {
	STATE_START,

	STATE_DIFF,
	STATE_FILEMODE,
	STATE_MODE,
	STATE_INDEX,
	STATE_PATH,

	STATE_SIMILARITY,
	STATE_RENAME,
	STATE_COPY,

389
	STATE_END
390 391
} parse_header_state;

392 393
typedef struct {
	const char *str;
394 395
	parse_header_state expected_state;
	parse_header_state next_state;
396
	int(*fn)(git_patch_parsed *, git_patch_parse_ctx *);
397 398 399 400 401 402 403 404 405 406 407 408 409 410 411
} parse_header_transition;

static const parse_header_transition transitions[] = {
	/* Start */
	{ "diff --git "         , STATE_START,      STATE_DIFF,       parse_header_start },

	{ "deleted file mode "  , STATE_DIFF,       STATE_FILEMODE,   parse_header_git_deletedfilemode },
	{ "new file mode "      , STATE_DIFF,       STATE_FILEMODE,   parse_header_git_newfilemode },
	{ "old mode "           , STATE_DIFF,       STATE_MODE,       parse_header_git_oldmode },
	{ "new mode "           , STATE_MODE,       STATE_END,        parse_header_git_newmode },

	{ "index "              , STATE_FILEMODE,   STATE_INDEX,      parse_header_git_index },
	{ "index "              , STATE_DIFF,       STATE_INDEX,      parse_header_git_index },
	{ "index "              , STATE_END,        STATE_INDEX,      parse_header_git_index },

412
	{ "--- "                , STATE_DIFF,       STATE_PATH,       parse_header_git_oldpath },
413
	{ "--- "                , STATE_INDEX,      STATE_PATH,       parse_header_git_oldpath },
414
	{ "--- "                , STATE_FILEMODE,   STATE_PATH,       parse_header_git_oldpath },
415 416 417 418
	{ "+++ "                , STATE_PATH,       STATE_END,        parse_header_git_newpath },
	{ "GIT binary patch"    , STATE_INDEX,      STATE_END,        NULL },
	{ "Binary files "       , STATE_INDEX,      STATE_END,        NULL },

419
	{ "similarity index "   , STATE_END,        STATE_SIMILARITY, parse_header_similarity },
420 421 422 423 424 425 426 427 428 429 430 431
	{ "similarity index "   , STATE_DIFF,       STATE_SIMILARITY, parse_header_similarity },
	{ "dissimilarity index ", STATE_DIFF,       STATE_SIMILARITY, parse_header_dissimilarity },
	{ "rename from "        , STATE_SIMILARITY, STATE_RENAME,     parse_header_renamefrom },
	{ "rename old "         , STATE_SIMILARITY, STATE_RENAME,     parse_header_renamefrom },
	{ "copy from "          , STATE_SIMILARITY, STATE_COPY,       parse_header_copyfrom },
	{ "rename to "          , STATE_RENAME,     STATE_END,        parse_header_renameto },
	{ "rename new "         , STATE_RENAME,     STATE_END,        parse_header_renameto },
	{ "copy to "            , STATE_COPY,       STATE_END,        parse_header_copyto },

	/* Next patch */
	{ "diff --git "         , STATE_END,        0,                NULL },
	{ "@@ -"                , STATE_END,        0,                NULL },
432
	{ "-- "                 , STATE_INDEX,      0,                NULL },
433
	{ "-- "                 , STATE_END,        0,                NULL },
434 435 436 437
};

static int parse_header_git(
	git_patch_parsed *patch,
438
	git_patch_parse_ctx *ctx)
439 440 441
{
	size_t i;
	int error = 0;
442
	parse_header_state state = STATE_START;
443 444

	/* Parse remaining header lines */
445
	for (; ctx->parse_ctx.remain_len > 0; git_parse_advance_line(&ctx->parse_ctx)) {
446 447
		bool found = false;

448
		if (ctx->parse_ctx.line_len == 0 || ctx->parse_ctx.line[ctx->parse_ctx.line_len - 1] != '\n')
449 450
			break;

451 452 453
		for (i = 0; i < ARRAY_SIZE(transitions); i++) {
			const parse_header_transition *transition = &transitions[i];
			size_t op_len = strlen(transition->str);
454

455
			if (transition->expected_state != state ||
456
			    git__prefixcmp(ctx->parse_ctx.line, transition->str) != 0)
457 458
				continue;

459 460
			state = transition->next_state;

461
			/* Do not advance if this is the patch separator */
462
			if (transition->fn == NULL)
463 464
				goto done;

465
			git_parse_advance_chars(&ctx->parse_ctx, op_len);
466

467
			if ((error = transition->fn(patch, ctx)) < 0)
468 469
				goto done;

470
			git_parse_advance_ws(&ctx->parse_ctx);
471

472 473 474
			if (git_parse_advance_expected_str(&ctx->parse_ctx, "\n") < 0 ||
			    ctx->parse_ctx.line_len > 0) {
				error = git_parse_err("trailing data at line %"PRIuZ, ctx->parse_ctx.line_num);
475 476 477
				goto done;
			}

478
			found = true;
479 480
			break;
		}
481

482
		if (!found) {
483 484
			error = git_parse_err("invalid patch header at line %"PRIuZ,
				ctx->parse_ctx.line_num);
485 486
			goto done;
		}
487 488
	}

489
	if (state != STATE_END) {
490
		error = git_parse_err("unexpected header line %"PRIuZ, ctx->parse_ctx.line_num);
491 492 493
		goto done;
	}

494 495 496 497
done:
	return error;
}

498
static int parse_int(int *out, git_patch_parse_ctx *ctx)
499
{
500
	int64_t num;
501

502
	if (git_parse_advance_digit(&num, &ctx->parse_ctx, 10) < 0 || !git__is_int(num))
503 504 505 506 507 508 509 510
		return -1;

	*out = (int)num;
	return 0;
}

static int parse_hunk_header(
	git_patch_hunk *hunk,
511
	git_patch_parse_ctx *ctx)
512
{
513
	const char *header_start = ctx->parse_ctx.line;
514
	char c;
515 516 517 518

	hunk->hunk.old_lines = 1;
	hunk->hunk.new_lines = 1;

519
	if (git_parse_advance_expected_str(&ctx->parse_ctx, "@@ -") < 0 ||
520 521 522
		parse_int(&hunk->hunk.old_start, ctx) < 0)
		goto fail;

523
	if (git_parse_peek(&c, &ctx->parse_ctx, 0) == 0 && c == ',') {
524
		if (git_parse_advance_expected_str(&ctx->parse_ctx, ",") < 0 ||
525 526 527 528
			parse_int(&hunk->hunk.old_lines, ctx) < 0)
			goto fail;
	}

529
	if (git_parse_advance_expected_str(&ctx->parse_ctx, " +") < 0 ||
530 531 532
		parse_int(&hunk->hunk.new_start, ctx) < 0)
		goto fail;

533
	if (git_parse_peek(&c, &ctx->parse_ctx, 0) == 0 && c == ',') {
534
		if (git_parse_advance_expected_str(&ctx->parse_ctx, ",") < 0 ||
535 536 537 538
			parse_int(&hunk->hunk.new_lines, ctx) < 0)
			goto fail;
	}

539
	if (git_parse_advance_expected_str(&ctx->parse_ctx, " @@") < 0)
540 541
		goto fail;

542
	git_parse_advance_line(&ctx->parse_ctx);
543 544 545 546

	if (!hunk->hunk.old_lines && !hunk->hunk.new_lines)
		goto fail;

547
	hunk->hunk.header_len = ctx->parse_ctx.line - header_start;
548
	if (hunk->hunk.header_len > (GIT_DIFF_HUNK_HEADER_SIZE - 1))
549 550
		return git_parse_err("oversized patch hunk header at line %"PRIuZ,
			ctx->parse_ctx.line_num);
551 552 553 554 555 556 557

	memcpy(hunk->hunk.header, header_start, hunk->hunk.header_len);
	hunk->hunk.header[hunk->hunk.header_len] = '\0';

	return 0;

fail:
558
	git_error_set(GIT_ERROR_PATCH, "invalid patch hunk header at line %"PRIuZ,
559
		ctx->parse_ctx.line_num);
560 561 562
	return -1;
}

563 564 565 566 567 568 569 570
static int eof_for_origin(int origin) {
	if (origin == GIT_DIFF_LINE_ADDITION)
		return GIT_DIFF_LINE_ADD_EOFNL;
	if (origin == GIT_DIFF_LINE_DELETION)
		return GIT_DIFF_LINE_DEL_EOFNL;
	return GIT_DIFF_LINE_CONTEXT_EOFNL;
}

571 572 573
static int parse_hunk_body(
	git_patch_parsed *patch,
	git_patch_hunk *hunk,
574
	git_patch_parse_ctx *ctx)
575 576 577 578 579 580
{
	git_diff_line *line;
	int error = 0;

	int oldlines = hunk->hunk.old_lines;
	int newlines = hunk->hunk.new_lines;
581
	int last_origin = 0;
582 583

	for (;
584
		ctx->parse_ctx.remain_len > 1 &&
585
		(oldlines || newlines) &&
586
		!git_parse_ctx_contains_s(&ctx->parse_ctx, "@@ -");
587
		git_parse_advance_line(&ctx->parse_ctx)) {
588

589
		int old_lineno, new_lineno, origin, prefix = 1;
590
		char c;
591 592 593 594 595 596 597 598 599

		if (git__add_int_overflow(&old_lineno, hunk->hunk.old_start, hunk->hunk.old_lines) ||
		    git__sub_int_overflow(&old_lineno, old_lineno, oldlines) ||
		    git__add_int_overflow(&new_lineno, hunk->hunk.new_start, hunk->hunk.new_lines) ||
		    git__sub_int_overflow(&new_lineno, new_lineno, newlines)) {
			error = git_parse_err("unrepresentable line count at line %"PRIuZ,
					      ctx->parse_ctx.line_num);
			goto done;
		}
600

601 602 603
		if (ctx->parse_ctx.line_len == 0 || ctx->parse_ctx.line[ctx->parse_ctx.line_len - 1] != '\n') {
			error = git_parse_err("invalid patch instruction at line %"PRIuZ,
				ctx->parse_ctx.line_num);
604 605 606
			goto done;
		}

607 608 609
		git_parse_peek(&c, &ctx->parse_ctx, 0);

		switch (c) {
610 611
		case '\n':
			prefix = 0;
612
			/* fall through */
613 614 615 616 617 618 619 620 621 622

		case ' ':
			origin = GIT_DIFF_LINE_CONTEXT;
			oldlines--;
			newlines--;
			break;

		case '-':
			origin = GIT_DIFF_LINE_DELETION;
			oldlines--;
623
			new_lineno = -1;
624 625 626 627 628
			break;

		case '+':
			origin = GIT_DIFF_LINE_ADDITION;
			newlines--;
629
			old_lineno = -1;
630 631
			break;

632 633 634 635 636 637
		case '\\':
			/*
			 * If there are no oldlines left, then this is probably
			 * the "\ No newline at end of file" marker. Do not
			 * verify its format, as it may be localized.
			 */
638 639 640 641 642 643 644
			if (!oldlines) {
				prefix = 0;
				origin = eof_for_origin(last_origin);
				old_lineno = -1;
				new_lineno = -1;
				break;
			}
645 646
			/* fall through */

647
		default:
648
			error = git_parse_err("invalid patch hunk at line %"PRIuZ, ctx->parse_ctx.line_num);
649 650 651 652
			goto done;
		}

		line = git_array_alloc(patch->base.lines);
653
		GIT_ERROR_CHECK_ALLOC(line);
654 655 656

		memset(line, 0x0, sizeof(git_diff_line));

657
		line->content_len = ctx->parse_ctx.line_len - prefix;
658
		line->content = git__strndup(ctx->parse_ctx.line + prefix, line->content_len);
659
		GIT_ERROR_CHECK_ALLOC(line->content);
660
		line->content_offset = ctx->parse_ctx.content_len - ctx->parse_ctx.remain_len;
661
		line->origin = origin;
662 663 664
		line->num_lines = 1;
		line->old_lineno = old_lineno;
		line->new_lineno = new_lineno;
665 666

		hunk->line_count++;
667 668

		last_origin = origin;
669 670 671
	}

	if (oldlines || newlines) {
672
		error = git_parse_err(
673 674 675 676 677
			"invalid patch hunk, expected %d old lines and %d new lines",
			hunk->hunk.old_lines, hunk->hunk.new_lines);
		goto done;
	}

678
	/*
679
	 * Handle "\ No newline at end of file". Only expect the leading
680 681 682 683
	 * backslash, though, because the rest of the string could be
	 * localized.  Because `diff` optimizes for the case where you
	 * want to apply the patch by hand.
	 */
684
	if (git_parse_ctx_contains_s(&ctx->parse_ctx, "\\ ") &&
685 686 687 688 689
		git_array_size(patch->base.lines) > 0) {

		line = git_array_get(patch->base.lines, git_array_size(patch->base.lines) - 1);

		if (line->content_len < 1) {
690
			error = git_parse_err("last line has no trailing newline");
691 692 693
			goto done;
		}

694 695 696 697 698 699
		line = git_array_alloc(patch->base.lines);
		GIT_ERROR_CHECK_ALLOC(line);

		memset(line, 0x0, sizeof(git_diff_line));

		line->content_len = ctx->parse_ctx.line_len;
700 701
		line->content = git__strndup(ctx->parse_ctx.line, line->content_len);
		GIT_ERROR_CHECK_ALLOC(line->content);
702 703 704 705 706 707 708
		line->content_offset = ctx->parse_ctx.content_len - ctx->parse_ctx.remain_len;
		line->origin = eof_for_origin(last_origin);
		line->num_lines = 1;
		line->old_lineno = -1;
		line->new_lineno = -1;

		hunk->line_count++;
709

710
		git_parse_advance_line(&ctx->parse_ctx);
711 712 713 714 715 716
	}

done:
	return error;
}

717
static int parse_patch_header(
718
	git_patch_parsed *patch,
719
	git_patch_parse_ctx *ctx)
720 721 722
{
	int error = 0;

723
	for (; ctx->parse_ctx.remain_len > 0; git_parse_advance_line(&ctx->parse_ctx)) {
724
		/* This line is too short to be a patch header. */
725
		if (ctx->parse_ctx.line_len < 6)
726 727 728
			continue;

		/* This might be a hunk header without a patch header, provide a
729
		 * sensible error message. */
730 731
		if (git_parse_ctx_contains_s(&ctx->parse_ctx, "@@ -")) {
			size_t line_num = ctx->parse_ctx.line_num;
732 733 734 735 736 737
			git_patch_hunk hunk;

			/* If this cannot be parsed as a hunk header, it's just leading
			* noise, continue.
			*/
			if (parse_hunk_header(&hunk, ctx) < 0) {
738
				git_error_clear();
739 740 741
				continue;
			}

742
			error = git_parse_err("invalid hunk header outside patch at line %"PRIuZ,
743 744 745 746 747
				line_num);
			goto done;
		}

		/* This buffer is too short to contain a patch. */
748
		if (ctx->parse_ctx.remain_len < ctx->parse_ctx.line_len + 6)
749 750 751
			break;

		/* A proper git patch */
752
		if (git_parse_ctx_contains_s(&ctx->parse_ctx, "diff --git ")) {
753
			error = parse_header_git(patch, ctx);
754 755 756 757 758 759 760
			goto done;
		}

		error = 0;
		continue;
	}

761
	git_error_set(GIT_ERROR_PATCH, "no patch found");
762
	error = GIT_ENOTFOUND;
763 764 765 766 767

done:
	return error;
}

768
static int parse_patch_binary_side(
769
	git_diff_binary_file *binary,
770
	git_patch_parse_ctx *ctx)
771 772
{
	git_diff_binary_t type = GIT_DIFF_BINARY_NONE;
773
	git_str base85 = GIT_STR_INIT, decoded = GIT_STR_INIT;
774
	int64_t len;
775 776
	int error = 0;

777
	if (git_parse_ctx_contains_s(&ctx->parse_ctx, "literal ")) {
778
		type = GIT_DIFF_BINARY_LITERAL;
779 780
		git_parse_advance_chars(&ctx->parse_ctx, 8);
	} else if (git_parse_ctx_contains_s(&ctx->parse_ctx, "delta ")) {
781
		type = GIT_DIFF_BINARY_DELTA;
782
		git_parse_advance_chars(&ctx->parse_ctx, 6);
783
	} else {
784 785
		error = git_parse_err(
			"unknown binary delta type at line %"PRIuZ, ctx->parse_ctx.line_num);
786 787 788
		goto done;
	}

789 790
	if (git_parse_advance_digit(&len, &ctx->parse_ctx, 10) < 0 ||
	    git_parse_advance_nl(&ctx->parse_ctx) < 0 || len < 0) {
791
		error = git_parse_err("invalid binary size at line %"PRIuZ, ctx->parse_ctx.line_num);
792 793 794
		goto done;
	}

795
	while (ctx->parse_ctx.line_len) {
796
		char c;
797 798
		size_t encoded_len, decoded_len = 0, decoded_orig = decoded.size;

799 800
		git_parse_peek(&c, &ctx->parse_ctx, 0);

801 802 803 804 805 806 807 808
		if (c == '\n')
			break;
		else if (c >= 'A' && c <= 'Z')
			decoded_len = c - 'A' + 1;
		else if (c >= 'a' && c <= 'z')
			decoded_len = c - 'a' + (('z' - 'a') + 1) + 1;

		if (!decoded_len) {
809
			error = git_parse_err("invalid binary length at line %"PRIuZ, ctx->parse_ctx.line_num);
810 811 812
			goto done;
		}

813
		git_parse_advance_chars(&ctx->parse_ctx, 1);
814 815 816

		encoded_len = ((decoded_len / 4) + !!(decoded_len % 4)) * 5;

817
		if (!encoded_len || !ctx->parse_ctx.line_len || encoded_len > ctx->parse_ctx.line_len - 1) {
818
			error = git_parse_err("truncated binary data at line %"PRIuZ, ctx->parse_ctx.line_num);
819 820 821
			goto done;
		}

822
		if ((error = git_str_decode_base85(
823
			&decoded, ctx->parse_ctx.line, encoded_len, decoded_len)) < 0)
824 825 826
			goto done;

		if (decoded.size - decoded_orig != decoded_len) {
827
			error = git_parse_err("truncated binary data at line %"PRIuZ, ctx->parse_ctx.line_num);
828 829 830
			goto done;
		}

831
		git_parse_advance_chars(&ctx->parse_ctx, encoded_len);
832

833 834
		if (git_parse_advance_nl(&ctx->parse_ctx) < 0) {
			error = git_parse_err("trailing data at line %"PRIuZ, ctx->parse_ctx.line_num);
835 836 837 838 839 840 841
			goto done;
		}
	}

	binary->type = type;
	binary->inflatedlen = (size_t)len;
	binary->datalen = decoded.size;
842
	binary->data = git_str_detach(&decoded);
843 844

done:
845 846
	git_str_dispose(&base85);
	git_str_dispose(&decoded);
847 848 849
	return error;
}

850
static int parse_patch_binary(
851
	git_patch_parsed *patch,
852
	git_patch_parse_ctx *ctx)
853 854 855
{
	int error;

856 857 858
	if (git_parse_advance_expected_str(&ctx->parse_ctx, "GIT binary patch") < 0 ||
		git_parse_advance_nl(&ctx->parse_ctx) < 0)
		return git_parse_err("corrupt git binary header at line %"PRIuZ, ctx->parse_ctx.line_num);
859 860

	/* parse old->new binary diff */
861
	if ((error = parse_patch_binary_side(
862 863 864
			&patch->base.binary.new_file, ctx)) < 0)
		return error;

865 866 867
	if (git_parse_advance_nl(&ctx->parse_ctx) < 0)
		return git_parse_err("corrupt git binary separator at line %"PRIuZ,
			ctx->parse_ctx.line_num);
868 869

	/* parse new->old binary diff */
870
	if ((error = parse_patch_binary_side(
871 872 873
			&patch->base.binary.old_file, ctx)) < 0)
		return error;

874 875 876
	if (git_parse_advance_nl(&ctx->parse_ctx) < 0)
		return git_parse_err("corrupt git binary patch separator at line %"PRIuZ,
			ctx->parse_ctx.line_num);
877

878 879 880 881 882 883 884 885 886
	patch->base.binary.contains_data = 1;
	patch->base.delta->flags |= GIT_DIFF_FLAG_BINARY;
	return 0;
}

static int parse_patch_binary_nodata(
	git_patch_parsed *patch,
	git_patch_parse_ctx *ctx)
{
887 888 889 890 891 892
	const char *old = patch->old_path ? patch->old_path : patch->header_old_path;
	const char *new = patch->new_path ? patch->new_path : patch->header_new_path;

	if (!old || !new)
		return git_parse_err("corrupt binary data without paths at line %"PRIuZ, ctx->parse_ctx.line_num);

893 894 895 896 897
	if (patch->base.delta->status == GIT_DELTA_ADDED)
		old = "/dev/null";
	else if (patch->base.delta->status == GIT_DELTA_DELETED)
		new = "/dev/null";

898
	if (git_parse_advance_expected_str(&ctx->parse_ctx, "Binary files ") < 0 ||
899 900 901 902 903
	    git_parse_advance_expected_str(&ctx->parse_ctx, old) < 0 ||
	    git_parse_advance_expected_str(&ctx->parse_ctx, " and ") < 0 ||
	    git_parse_advance_expected_str(&ctx->parse_ctx, new) < 0 ||
	    git_parse_advance_expected_str(&ctx->parse_ctx, " differ") < 0 ||
	    git_parse_advance_nl(&ctx->parse_ctx) < 0)
904
		return git_parse_err("corrupt git binary header at line %"PRIuZ, ctx->parse_ctx.line_num);
905 906

	patch->base.binary.contains_data = 0;
907 908 909 910
	patch->base.delta->flags |= GIT_DIFF_FLAG_BINARY;
	return 0;
}

911
static int parse_patch_hunks(
912
	git_patch_parsed *patch,
913
	git_patch_parse_ctx *ctx)
914 915 916 917
{
	git_patch_hunk *hunk;
	int error = 0;

918
	while (git_parse_ctx_contains_s(&ctx->parse_ctx, "@@ -")) {
919
		hunk = git_array_alloc(patch->base.hunks);
920
		GIT_ERROR_CHECK_ALLOC(hunk);
921 922 923 924 925 926 927 928 929 930 931

		memset(hunk, 0, sizeof(git_patch_hunk));

		hunk->line_start = git_array_size(patch->base.lines);
		hunk->line_count = 0;

		if ((error = parse_hunk_header(hunk, ctx)) < 0 ||
			(error = parse_hunk_body(patch, hunk, ctx)) < 0)
			goto done;
	}

932 933
	patch->base.delta->flags |= GIT_DIFF_FLAG_NOT_BINARY;

934 935 936 937
done:
	return error;
}

938 939
static int parse_patch_body(
	git_patch_parsed *patch, git_patch_parse_ctx *ctx)
940
{
941
	if (git_parse_ctx_contains_s(&ctx->parse_ctx, "GIT binary patch"))
942
		return parse_patch_binary(patch, ctx);
943
	else if (git_parse_ctx_contains_s(&ctx->parse_ctx, "Binary files "))
944
		return parse_patch_binary_nodata(patch, ctx);
945
	else
946
		return parse_patch_hunks(patch, ctx);
947 948
}

949
static int check_header_names(
950 951 952 953 954 955 956 957 958
	const char *one,
	const char *two,
	const char *old_or_new,
	bool two_null)
{
	if (!one || !two)
		return 0;

	if (two_null && strcmp(two, "/dev/null") != 0)
959
		return git_parse_err("expected %s path of '/dev/null'", old_or_new);
960 961

	else if (!two_null && strcmp(one, two) != 0)
962
		return git_parse_err("mismatched %s path names", old_or_new);
963 964 965 966 967 968 969 970 971 972 973

	return 0;
}

static int check_prefix(
	char **out,
	size_t *out_len,
	git_patch_parsed *patch,
	const char *path_start)
{
	const char *path = path_start;
974
	size_t prefix_len = patch->ctx->opts.prefix_len;
975
	size_t remain_len = prefix_len;
976 977 978 979

	*out = NULL;
	*out_len = 0;

980
	if (prefix_len == 0)
981 982 983 984 985 986
		goto done;

	/* leading slashes do not count as part of the prefix in git apply */
	while (*path == '/')
		path++;

987
	while (*path && remain_len) {
988
		if (*path == '/')
989
			remain_len--;
990 991 992 993

		path++;
	}

994
	if (remain_len || !*path)
995
		return git_parse_err(
996
			"header filename does not contain %"PRIuZ" path components",
997
			prefix_len);
998 999 1000 1001 1002

done:
	*out_len = (path - path_start);
	*out = git__strndup(path_start, *out_len);

1003
	return (*out == NULL) ? -1 : 0;
1004 1005 1006
}

static int check_filenames(git_patch_parsed *patch)
1007
{
1008 1009 1010 1011 1012 1013
	const char *prefixed_new, *prefixed_old;
	size_t old_prefixlen = 0, new_prefixlen = 0;
	bool added = (patch->base.delta->status == GIT_DELTA_ADDED);
	bool deleted = (patch->base.delta->status == GIT_DELTA_DELETED);

	if (patch->old_path && !patch->new_path)
1014
		return git_parse_err("missing new path");
1015 1016

	if (!patch->old_path && patch->new_path)
1017
		return git_parse_err("missing old path");
1018 1019

	/* Ensure (non-renamed) paths match */
1020 1021
	if (check_header_names(patch->header_old_path, patch->old_path, "old", added) < 0 ||
	    check_header_names(patch->header_new_path, patch->new_path, "new", deleted) < 0)
1022 1023
		return -1;

1024 1025
	prefixed_old = (!added && patch->old_path) ? patch->old_path : patch->header_old_path;
	prefixed_new = (!deleted && patch->new_path) ? patch->new_path : patch->header_new_path;
1026

1027 1028
	if ((prefixed_old && check_prefix(&patch->old_prefix, &old_prefixlen, patch, prefixed_old) < 0) ||
	    (prefixed_new && check_prefix(&patch->new_prefix, &new_prefixlen, patch, prefixed_new) < 0))
1029 1030 1031 1032 1033
		return -1;

	/* Prefer the rename filenames as they are unambiguous and unprefixed */
	if (patch->rename_old_path)
		patch->base.delta->old_file.path = patch->rename_old_path;
1034
	else if (prefixed_old)
1035
		patch->base.delta->old_file.path = prefixed_old + old_prefixlen;
1036 1037
	else
		patch->base.delta->old_file.path = NULL;
1038 1039 1040

	if (patch->rename_new_path)
		patch->base.delta->new_file.path = patch->rename_new_path;
1041
	else if (prefixed_new)
1042
		patch->base.delta->new_file.path = prefixed_new + new_prefixlen;
1043 1044
	else
		patch->base.delta->new_file.path = NULL;
1045

1046
	if (!patch->base.delta->old_file.path &&
1047
	    !patch->base.delta->new_file.path)
1048
		return git_parse_err("git diff header lacks old / new paths");
1049

1050 1051 1052 1053 1054
	return 0;
}

static int check_patch(git_patch_parsed *patch)
{
1055 1056
	git_diff_delta *delta = patch->base.delta;

1057 1058
	if (check_filenames(patch) < 0)
		return -1;
1059

1060
	if (delta->old_file.path &&
1061 1062
	    delta->status != GIT_DELTA_DELETED &&
	    !delta->new_file.mode)
1063
		delta->new_file.mode = delta->old_file.mode;
1064

1065
	if (delta->status == GIT_DELTA_MODIFIED &&
1066 1067 1068
	    !(delta->flags & GIT_DIFF_FLAG_BINARY) &&
	    delta->new_file.mode == delta->old_file.mode &&
	    git_array_size(patch->base.hunks) == 0)
1069
		return git_parse_err("patch with no hunks");
1070

1071
	if (delta->status == GIT_DELTA_ADDED) {
1072 1073
		git_oid_clear(&delta->old_file.id,
			patch->base.diff_opts.oid_type);
1074 1075 1076 1077
		delta->old_file.id_abbrev = 0;
	}

	if (delta->status == GIT_DELTA_DELETED) {
1078 1079
		git_oid_clear(&delta->new_file.id,
			patch->base.diff_opts.oid_type);
1080 1081 1082
		delta->new_file.id_abbrev = 0;
	}

1083 1084 1085
	return 0;
}

1086
git_patch_parse_ctx *git_patch_parse_ctx_init(
1087 1088 1089 1090 1091 1092 1093 1094 1095 1096
	const char *content,
	size_t content_len,
	const git_patch_options *opts)
{
	git_patch_parse_ctx *ctx;
	git_patch_options default_opts = GIT_PATCH_OPTIONS_INIT;

	if ((ctx = git__calloc(1, sizeof(git_patch_parse_ctx))) == NULL)
		return NULL;

1097 1098 1099
	if ((git_parse_ctx_init(&ctx->parse_ctx, content, content_len)) < 0) {
		git__free(ctx);
		return NULL;
1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115
	}

	if (opts)
		memcpy(&ctx->opts, opts, sizeof(git_patch_options));
	else
		memcpy(&ctx->opts, &default_opts, sizeof(git_patch_options));

	GIT_REFCOUNT_INC(ctx);
	return ctx;
}

static void patch_parse_ctx_free(git_patch_parse_ctx *ctx)
{
	if (!ctx)
		return;

1116
	git_parse_ctx_clear(&ctx->parse_ctx);
1117 1118 1119
	git__free(ctx);
}

1120
void git_patch_parse_ctx_free(git_patch_parse_ctx *ctx)
1121 1122 1123 1124
{
	GIT_REFCOUNT_DEC(ctx, patch_parse_ctx_free);
}

1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138
int git_patch_parsed_from_diff(git_patch **out, git_diff *d, size_t idx)
{
	git_diff_parsed *diff = (git_diff_parsed *)d;
	git_patch *p;

	if ((p = git_vector_get(&diff->patches, idx)) == NULL)
		return -1;

	GIT_REFCOUNT_INC(p);
	*out = p;

	return 0;
}

1139 1140 1141
static void patch_parsed__free(git_patch *p)
{
	git_patch_parsed *patch = (git_patch_parsed *)p;
1142 1143
	git_diff_line *line;
	size_t i;
1144 1145 1146 1147

	if (!patch)
		return;

1148 1149
	git_patch_parse_ctx_free(patch->ctx);

1150 1151 1152
	git__free((char *)patch->base.binary.old_file.data);
	git__free((char *)patch->base.binary.new_file.data);
	git_array_clear(patch->base.hunks);
1153 1154
	git_array_foreach(patch->base.lines, i, line)
		git__free((char *) line->content);
1155 1156 1157
	git_array_clear(patch->base.lines);
	git__free(patch->base.delta);

1158 1159 1160 1161 1162 1163 1164 1165
	git__free(patch->old_prefix);
	git__free(patch->new_prefix);
	git__free(patch->header_old_path);
	git__free(patch->header_new_path);
	git__free(patch->rename_old_path);
	git__free(patch->rename_new_path);
	git__free(patch->old_path);
	git__free(patch->new_path);
1166
	git__free(patch);
1167 1168
}

1169
int git_patch_parse(
1170
	git_patch **out,
1171
	git_patch_parse_ctx *ctx)
1172 1173
{
	git_patch_parsed *patch;
1174
	size_t start, used;
1175 1176
	int error = 0;

Edward Thomson committed
1177 1178
	GIT_ASSERT_ARG(out);
	GIT_ASSERT_ARG(ctx);
1179

1180 1181 1182
	*out = NULL;

	patch = git__calloc(1, sizeof(git_patch_parsed));
1183
	GIT_ERROR_CHECK_ALLOC(patch);
1184

1185 1186
	patch->ctx = ctx;
	GIT_REFCOUNT_INC(patch->ctx);
1187 1188

	patch->base.free_fn = patch_parsed__free;
1189

1190
	patch->base.delta = git__calloc(1, sizeof(git_diff_delta));
1191
	GIT_ERROR_CHECK_ALLOC(patch->base.delta);
1192

1193
	patch->base.delta->status = GIT_DELTA_MODIFIED;
1194
	patch->base.delta->nfiles = 2;
1195

1196 1197
	patch->base.diff_opts.oid_type = ctx->opts.oid_type;

1198
	start = ctx->parse_ctx.remain_len;
1199

1200
	if ((error = parse_patch_header(patch, ctx)) < 0 ||
1201 1202
	    (error = parse_patch_body(patch, ctx)) < 0 ||
	    (error = check_patch(patch)) < 0)
1203 1204
		goto done;

1205 1206
	used = start - ctx->parse_ctx.remain_len;
	ctx->parse_ctx.remain += used;
1207

1208 1209 1210 1211
	patch->base.diff_opts.old_prefix = patch->old_prefix;
	patch->base.diff_opts.new_prefix = patch->new_prefix;
	patch->base.diff_opts.flags |= GIT_DIFF_SHOW_BINARY;

1212
	GIT_REFCOUNT_INC(&patch->base);
1213 1214 1215
	*out = &patch->base;

done:
1216 1217 1218
	if (error < 0)
		patch_parsed__free(&patch->base);

1219 1220
	return error;
}
1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231

int git_patch_from_buffer(
	git_patch **out,
	const char *content,
	size_t content_len,
	const git_patch_options *opts)
{
	git_patch_parse_ctx *ctx;
	int error;

	ctx = git_patch_parse_ctx_init(content, content_len, opts);
1232
	GIT_ERROR_CHECK_ALLOC(ctx);
1233 1234 1235 1236 1237 1238 1239

	error = git_patch_parse(out, ctx);

	git_patch_parse_ctx_free(ctx);
	return error;
}