trailer.c 9.44 KB
Newer Older
1 2 3 4 5 6
/*
 * Copyright (C) the libgit2 contributors. All rights reserved.
 *
 * This file is part of libgit2, distributed under the GNU GPL v2 with
 * a Linking Exception. For full terms see the included COPYING file.
 */
7
#include "array.h"
8 9 10 11 12 13 14
#include "common.h"
#include "git2/message.h"

#include <stddef.h>
#include <string.h>
#include <ctype.h>

15
#define COMMENT_LINE_CHAR '#'
16
#define TRAILER_SEPARATORS ":"
17 18 19 20 21 22 23

static const char *const git_generated_prefixes[] = {
	"Signed-off-by: ",
	"(cherry picked from commit ",
	NULL
};

24
static int is_blank_line(const char *str)
25 26 27 28 29 30 31 32 33 34 35 36 37 38
{
	const char *s = str;
	while (*s && *s != '\n' && isspace(*s))
		s++;
	return !*s || *s == '\n';
}

static const char *next_line(const char *str)
{
	const char *nl = strchr(str, '\n');

	if (nl) {
		return nl + 1;
	} else {
39
		/* return pointer to the NUL terminator: */
40 41 42 43 44
		return str + strlen(str);
	}
}

/*
45
 * Return the position of the start of the last line. If len is 0, return 0.
46
 */
47
static bool last_line(size_t *out, const char *buf, size_t len)
48
{
49 50 51 52
	size_t i;

	*out = 0;

53
	if (len == 0)
54
		return false;
55
	if (len == 1)
56 57
		return true;

58 59 60 61 62 63 64
	/*
	 * Skip the last character (in addition to the null terminator),
	 * because if the last character is a newline, it is considered as part
	 * of the last line anyway.
	 */
	i = len - 2;

65 66 67 68 69
	for (; i > 0; i--) {
		if (buf[i] == '\n') {
			*out = i + 1;
			return true;
		}
70
	}
71
	return true;
72 73 74 75
}

/*
 * If the given line is of the form
76 77 78 79
 * "<token><optional whitespace><separator>..." or "<separator>...", sets out
 * to the location of the separator and returns true.  Otherwise, returns
 * false.  The optional whitespace is allowed there primarily to allow things
 * like "Bug #43" where <token> is "Bug" and <separator> is "#".
80
 *
81 82 83 84
 * The separator-starts-line case (in which this function returns true and
 * sets out to 0) is distinguished from the non-well-formed-line case (in
 * which this function returns false) because some callers of this function
 * need such a distinction.
85
 */
86
static bool find_separator(size_t *out, const char *line, const char *separators)
87 88 89 90
{
	int whitespace_found = 0;
	const char *c;
	for (c = line; *c; c++) {
91 92 93 94 95
		if (strchr(separators, *c)) {
			*out = c - line;
			return true;
		}

96 97 98 99 100 101 102 103
		if (!whitespace_found && (isalnum(*c) || *c == '-'))
			continue;
		if (c != line && (*c == ' ' || *c == '\t')) {
			whitespace_found = 1;
			continue;
		}
		break;
	}
104
	return false;
105 106 107 108 109 110 111 112 113 114 115 116
}

/*
 * Inspect the given string and determine the true "end" of the log message, in
 * order to find where to put a new Signed-off-by: line.  Ignored are
 * trailing comment lines and blank lines.  To support "git commit -s
 * --amend" on an existing commit, we also ignore "Conflicts:".  To
 * support "git commit -v", we truncate at cut lines.
 *
 * Returns the number of bytes from the tail to ignore, to be fed as
 * the second parameter to append_signoff().
 */
117
static size_t ignore_non_trailer(const char *buf, size_t len)
118
{
119
	size_t boc = 0, bol = 0;
120 121 122 123 124 125 126 127 128 129 130
	int in_old_conflicts_block = 0;
	size_t cutoff = len;

	while (bol < cutoff) {
		const char *next_line = memchr(buf + bol, '\n', len - bol);

		if (!next_line)
			next_line = buf + len;
		else
			next_line++;

131
		if (buf[bol] == COMMENT_LINE_CHAR || buf[bol] == '\n') {
132 133 134 135
			/* is this the first of the run of comments? */
			if (!boc)
				boc = bol;
			/* otherwise, it is just continuing */
136
		} else if (git__prefixcmp(buf + bol, "Conflicts:\n") == 0) {
137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
			in_old_conflicts_block = 1;
			if (!boc)
				boc = bol;
		} else if (in_old_conflicts_block && buf[bol] == '\t') {
			; /* a pathname in the conflicts block */
		} else if (boc) {
			/* the previous was not trailing comment */
			boc = 0;
			in_old_conflicts_block = 0;
		}
		bol = next_line - buf;
	}
	return boc ? len - boc : len - cutoff;
}

/*
 * Return the position of the start of the patch or the length of str if there
 * is no patch in the message.
 */
156
static size_t find_patch_start(const char *str)
157 158 159 160
{
	const char *s;

	for (s = str; *s; s = next_line(s)) {
161
		if (git__prefixcmp(s, "---") == 0)
162 163 164 165 166 167 168 169 170 171
			return s - str;
	}

	return s - str;
}

/*
 * Return the position of the first trailer line or len if there are no
 * trailers.
 */
172
static size_t find_trailer_start(const char *buf, size_t len)
173 174
{
	const char *s;
175 176
	size_t end_of_title, l;
	int only_spaces = 1;
177 178 179 180 181 182 183 184 185 186 187 188
	int recognized_prefix = 0, trailer_lines = 0, non_trailer_lines = 0;
	/*
	 * Number of possible continuation lines encountered. This will be
	 * reset to 0 if we encounter a trailer (since those lines are to be
	 * considered continuations of that trailer), and added to
	 * non_trailer_lines if we encounter a non-trailer (since those lines
	 * are to be considered non-trailers).
	 */
	int possible_continuation_lines = 0;

	/* The first paragraph is the title and cannot be trailers */
	for (s = buf; s < buf + len; s = next_line(s)) {
189
		if (s[0] == COMMENT_LINE_CHAR)
190 191 192 193 194 195 196 197 198 199 200 201
			continue;
		if (is_blank_line(s))
			break;
	}
	end_of_title = s - buf;

	/*
	 * Get the start of the trailers by looking starting from the end for a
	 * blank line before a set of non-blank lines that (i) are all
	 * trailers, or (ii) contains at least one Git-generated trailer and
	 * consists of at least 25% trailers.
	 */
202 203
	l = len;
	while (last_line(&l, buf, l) && l >= end_of_title) {
204 205
		const char *bol = buf + l;
		const char *const *p;
206
		size_t separator_pos = 0;
207

208
		if (bol[0] == COMMENT_LINE_CHAR) {
209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226
			non_trailer_lines += possible_continuation_lines;
			possible_continuation_lines = 0;
			continue;
		}
		if (is_blank_line(bol)) {
			if (only_spaces)
				continue;
			non_trailer_lines += possible_continuation_lines;
			if (recognized_prefix &&
			    trailer_lines * 3 >= non_trailer_lines)
				return next_line(bol) - buf;
			else if (trailer_lines && !non_trailer_lines)
				return next_line(bol) - buf;
			return len;
		}
		only_spaces = 0;

		for (p = git_generated_prefixes; *p; p++) {
227
			if (git__prefixcmp(bol, *p) == 0) {
228 229 230 231 232 233 234
				trailer_lines++;
				possible_continuation_lines = 0;
				recognized_prefix = 1;
				goto continue_outer_loop;
			}
		}

235
		find_separator(&separator_pos, bol, TRAILER_SEPARATORS);
236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255
		if (separator_pos >= 1 && !isspace(bol[0])) {
			trailer_lines++;
			possible_continuation_lines = 0;
			if (recognized_prefix)
				continue;
		} else if (isspace(bol[0]))
			possible_continuation_lines++;
		else {
			non_trailer_lines++;
			non_trailer_lines += possible_continuation_lines;
			possible_continuation_lines = 0;
		}
continue_outer_loop:
		;
	}

	return len;
}

/* Return the position of the end of the trailers. */
256
static size_t find_trailer_end(const char *buf, size_t len)
257 258 259 260
{
	return len - ignore_non_trailer(buf, len);
}

261
static char *extract_trailer_block(const char *message, size_t* len)
262 263 264 265 266 267 268 269
{
	size_t patch_start = find_patch_start(message);
	size_t trailer_end = find_trailer_end(message, patch_start);
	size_t trailer_start = find_trailer_start(message, trailer_end);

	size_t trailer_len = trailer_end - trailer_start;

	char *buffer = git__malloc(trailer_len + 1);
270 271 272
	if (buffer == NULL)
		return NULL;

273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294
	memcpy(buffer, message + trailer_start, trailer_len);
	buffer[trailer_len] = 0;

	*len = trailer_len;

	return buffer;
}

enum trailer_state {
	S_START = 0,
	S_KEY = 1,
	S_KEY_WS = 2,
	S_SEP_WS = 3,
	S_VALUE = 4,
	S_VALUE_NL = 5,
	S_VALUE_END = 6,
	S_IGNORE = 7,
};

#define NEXT(st) { state = (st); ptr++; continue; }
#define GOTO(st) { state = (st); continue; }

295 296 297
typedef git_array_t(git_message_trailer) git_array_trailer_t;

int git_message_trailers(git_message_trailer_array *trailer_arr, const char *message)
298 299 300 301 302 303
{
	enum trailer_state state = S_START;
	int rc = 0;
	char *ptr;
	char *key = NULL;
	char *value = NULL;
304
	git_array_trailer_t arr = GIT_ARRAY_INIT;
305 306

	size_t trailer_len;
307
	char *trailer = extract_trailer_block(message, &trailer_len);
308 309
	if (trailer == NULL)
		return -1;
310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326

	for (ptr = trailer;;) {
		switch (state) {
			case S_START: {
				if (*ptr == 0) {
					goto ret;
				}

				key = ptr;
				GOTO(S_KEY);
			}
			case S_KEY: {
				if (*ptr == 0) {
					goto ret;
				}

				if (isalnum(*ptr) || *ptr == '-') {
327
					/* legal key character */
328 329 330 331
					NEXT(S_KEY);
				}

				if (*ptr == ' ' || *ptr == '\t') {
332
					/* optional whitespace before separator */
333 334 335 336
					*ptr = 0;
					NEXT(S_KEY_WS);
				}

337
				if (strchr(TRAILER_SEPARATORS, *ptr)) {
338 339 340 341
					*ptr = 0;
					NEXT(S_SEP_WS);
				}

342
				/* illegal character */
343 344 345 346 347 348 349 350 351 352 353
				GOTO(S_IGNORE);
			}
			case S_KEY_WS: {
				if (*ptr == 0) {
					goto ret;
				}

				if (*ptr == ' ' || *ptr == '\t') {
					NEXT(S_KEY_WS);
				}

354
				if (strchr(TRAILER_SEPARATORS, *ptr)) {
355 356 357
					NEXT(S_SEP_WS);
				}

358
				/* illegal character */
359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385
				GOTO(S_IGNORE);
			}
			case S_SEP_WS: {
				if (*ptr == 0) {
					goto ret;
				}

				if (*ptr == ' ' || *ptr == '\t') {
					NEXT(S_SEP_WS);
				}

				value = ptr;
				NEXT(S_VALUE);
			}
			case S_VALUE: {
				if (*ptr == 0) {
					GOTO(S_VALUE_END);
				}

				if (*ptr == '\n') {
					NEXT(S_VALUE_NL);
				}

				NEXT(S_VALUE);
			}
			case S_VALUE_NL: {
				if (*ptr == ' ') {
386
					/* continuation; */
387 388 389 390 391 392 393
					NEXT(S_VALUE);
				}

				ptr[-1] = 0;
				GOTO(S_VALUE_END);
			}
			case S_VALUE_END: {
394 395 396 397
				git_message_trailer *t = git_array_alloc(arr);

				t->key = key;
				t->value = value;
398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418

				key = NULL;
				value = NULL;

				GOTO(S_START);
			}
			case S_IGNORE: {
				if (*ptr == 0) {
					goto ret;
				}

				if (*ptr == '\n') {
					NEXT(S_START);
				}

				NEXT(S_IGNORE);
			}
		}
	}

ret:
419 420 421 422
	trailer_arr->_trailer_block = trailer;
	trailer_arr->trailers = arr.ptr;
	trailer_arr->count = arr.size;

423 424
	return rc;
}
425 426 427 428 429 430

void git_message_trailer_array_free(git_message_trailer_array *arr)
{
	git__free(arr->_trailer_block);
	git__free(arr->trailers);
}