attr_file.c 14.9 KB
Newer Older
1
#include "common.h"
2
#include "repository.h"
3
#include "filebuf.h"
4
#include "attr.h"
5 6
#include "git2/blob.h"
#include "git2/tree.h"
7 8 9
#include <ctype.h>

static int sort_by_hash_and_name(const void *a_raw, const void *b_raw);
Russell Belfer committed
10
static void git_attr_rule__clear(git_attr_rule *rule);
11 12 13 14
static bool parse_optimized_patterns(
	git_attr_fnmatch *spec,
	git_pool *pool,
	const char *pattern);
15

16 17 18 19 20
int git_attr_file__new(
	git_attr_file **attrs_ptr,
	git_attr_file_source from,
	const char *path,
	git_pool *pool)
21
{
22 23 24
	git_attr_file *attrs = NULL;

	attrs = git__calloc(1, sizeof(git_attr_file));
25
	GITERR_CHECK_ALLOC(attrs);
26

27 28 29 30 31 32 33
	if (pool)
		attrs->pool = pool;
	else {
		attrs->pool = git__calloc(1, sizeof(git_pool));
		if (!attrs->pool || git_pool_init(attrs->pool, 1, 0) < 0)
			goto fail;
		attrs->pool_is_allocated = true;
34 35
	}

36 37 38
	if (path) {
		size_t len = strlen(path);

39
		attrs->key = git_pool_malloc(attrs->pool, (uint32_t)len + 3);
40 41 42 43 44 45 46 47
		GITERR_CHECK_ALLOC(attrs->key);

		attrs->key[0] = '0' + from;
		attrs->key[1] = '#';
		memcpy(&attrs->key[2], path, len);
		attrs->key[len + 2] = '\0';
	}

48 49 50
	if (git_vector_init(&attrs->rules, 4, NULL) < 0)
		goto fail;

51
	*attrs_ptr = attrs;
52
	return 0;
53

54 55 56 57
fail:
	git_attr_file__free(attrs);
	attrs_ptr = NULL;
	return -1;
58 59
}

60
int git_attr_file__parse_buffer(
61
	git_repository *repo, void *parsedata, const char *buffer, git_attr_file *attrs)
62
{
63
	int error = 0;
64
	const char *scan = NULL;
65
	char *context = NULL;
66 67
	git_attr_rule *rule = NULL;

68 69
	GIT_UNUSED(parsedata);

70
	assert(buffer && attrs);
71 72 73

	scan = buffer;

74 75 76 77
	/* if subdir file path, convert context for file paths */
	if (attrs->key && git__suffixcmp(attrs->key, "/" GIT_ATTR_FILE) == 0) {
		context = attrs->key + 2;
		context[strlen(context) - strlen(GIT_ATTR_FILE)] = '\0';
78 79
	}

80
	while (!error && *scan) {
81 82
		/* allocate rule if needed */
		if (!rule && !(rule = git__calloc(1, sizeof(git_attr_rule)))) {
83
			error = -1;
84 85 86 87
			break;
		}

		/* parse the next "pattern attr attr attr" line */
88 89 90 91
		if (!(error = git_attr_fnmatch__parse(
				&rule->match, attrs->pool, context, &scan)) &&
			!(error = git_attr_assignment__parse(
				repo, attrs->pool, &rule->assigns, &scan)))
92 93 94 95 96 97 98 99 100
		{
			if (rule->match.flags & GIT_ATTR_FNMATCH_MACRO)
				/* should generate error/warning if this is coming from any
				 * file other than .gitattributes at repo root.
				 */
				error = git_attr_cache__insert_macro(repo, rule);
			else
				error = git_vector_insert(&attrs->rules, rule);
		}
101 102

		/* if the rule wasn't a pattern, on to the next */
103
		if (error < 0) {
Russell Belfer committed
104
			git_attr_rule__clear(rule); /* reset rule contents */
105
			if (error == GIT_ENOTFOUND)
106
				error = 0;
107 108 109 110 111
		} else {
			rule = NULL; /* vector now "owns" the rule */
		}
	}

112
	git_attr_rule__free(rule);
113 114 115 116

	/* restore file path used for context */
	if (context)
		context[strlen(context)] = '.'; /* first char of GIT_ATTR_FILE */
117 118 119 120

	return error;
}

121 122 123
int git_attr_file__new_and_load(
	git_attr_file **attrs_ptr,
	const char *path)
124
{
125
	int error;
126
	git_buf content = GIT_BUF_INIT;
127

128 129
	if ((error = git_attr_file__new(attrs_ptr, 0, path, NULL)) < 0)
		return error;
130

131 132
	if (!(error = git_futils_readbuffer(&content, path)))
		error = git_attr_file__parse_buffer(
133
			NULL, NULL, git_buf_cstr(&content), *attrs_ptr);
134

135
	git_buf_free(&content);
136

137 138 139 140
	if (error) {
		git_attr_file__free(*attrs_ptr);
		*attrs_ptr = NULL;
	}
141 142 143 144

	return error;
}

145
void git_attr_file__clear_rules(git_attr_file *file)
146 147 148 149
{
	unsigned int i;
	git_attr_rule *rule;

Russell Belfer committed
150
	git_vector_foreach(&file->rules, i, rule)
151
		git_attr_rule__free(rule);
152 153

	git_vector_free(&file->rules);
154 155 156 157 158 159 160 161
}

void git_attr_file__free(git_attr_file *file)
{
	if (!file)
		return;

	git_attr_file__clear_rules(file);
162

163 164 165 166 167 168
	if (file->pool_is_allocated) {
		git_pool_clear(file->pool);
		git__free(file->pool);
	}
	file->pool = NULL;

Russell Belfer committed
169
	git__free(file);
170 171
}

172
uint32_t git_attr_file__name_hash(const char *name)
173
{
174
	uint32_t h = 5381;
175 176 177 178 179 180 181 182 183 184 185 186 187 188
	int c;
	assert(name);
	while ((c = (int)*name++) != 0)
		h = ((h << 5) + h) + c;
	return h;
}


int git_attr_file__lookup_one(
	git_attr_file *file,
	const git_attr_path *path,
	const char *attr,
	const char **value)
{
189
	size_t i;
190 191 192 193 194 195 196 197 198
	git_attr_name name;
	git_attr_rule *rule;

	*value = NULL;

	name.name = attr;
	name.name_hash = git_attr_file__name_hash(attr);

	git_attr_file__foreach_matching_rule(file, path, i, rule) {
199
		size_t pos;
200

201
		if (!git_vector_bsearch(&pos, &rule->assigns, &name)) {
202 203 204 205 206 207
			*value = ((git_attr_assignment *)
					  git_vector_get(&rule->assigns, pos))->value;
			break;
		}
	}

208
	return 0;
209 210 211
}


212
bool git_attr_fnmatch__match(
213
	git_attr_fnmatch *match,
214 215
	const git_attr_path *path)
{
216
	int fnm;
217
	int icase_flags = (match->flags & GIT_ATTR_FNMATCH_ICASE) ? FNM_CASEFOLD : 0;
218

219
	if (match->flags & GIT_ATTR_FNMATCH_DIRECTORY && !path->is_dir)
220
		return false;
221

222
	if (match->flags & GIT_ATTR_FNMATCH_FULLPATH)
223
		fnm = p_fnmatch(match->pattern, path->path, FNM_PATHNAME | icase_flags);
224
	else if (path->is_dir)
225
		fnm = p_fnmatch(match->pattern, path->basename, FNM_LEADING_DIR | icase_flags);
226
	else
227
		fnm = p_fnmatch(match->pattern, path->basename, icase_flags);
228

229
	return (fnm == FNM_NOMATCH) ? false : true;
230 231
}

232
bool git_attr_rule__match(
233 234 235
	git_attr_rule *rule,
	const git_attr_path *path)
{
236
	bool matched = git_attr_fnmatch__match(&rule->match, path);
237

238
	if (rule->match.flags & GIT_ATTR_FNMATCH_NEGATIVE)
239
		matched = !matched;
240 241 242 243

	return matched;
}

244

245 246 247
git_attr_assignment *git_attr_rule__lookup_assignment(
	git_attr_rule *rule, const char *name)
{
248
	size_t pos;
249 250 251 252
	git_attr_name key;
	key.name = name;
	key.name_hash = git_attr_file__name_hash(name);

253 254
	if (git_vector_bsearch(&pos, &rule->assigns, &key))
		return NULL;
255

256
	return git_vector_get(&rule->assigns, pos);
257 258 259
}

int git_attr_path__init(
260
	git_attr_path *info, const char *path, const char *base)
261
{
262 263
	ssize_t root;

264 265
	/* build full path as best we can */
	git_buf_init(&info->full, 0);
266

267 268 269 270
	if (git_path_join_unrooted(&info->full, path, base, &root) < 0)
		return -1;

	info->path = info->full.ptr + root;
271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291

	/* remove trailing slashes */
	while (info->full.size > 0) {
		if (info->full.ptr[info->full.size - 1] != '/')
			break;
		info->full.size--;
	}
	info->full.ptr[info->full.size] = '\0';

	/* skip leading slashes in path */
	while (*info->path == '/')
		info->path++;

	/* find trailing basename component */
	info->basename = strrchr(info->path, '/');
	if (info->basename)
		info->basename++;
	if (!info->basename || !*info->basename)
		info->basename = info->path;

	info->is_dir = (int)git_path_isdir(info->full.ptr);
292

293
	return 0;
294 295
}

296 297 298 299 300 301 302
void git_attr_path__free(git_attr_path *info)
{
	git_buf_free(&info->full);
	info->path = NULL;
	info->basename = NULL;
}

303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335
/*
 * From gitattributes(5):
 *
 * Patterns have the following format:
 *
 * - A blank line matches no files, so it can serve as a separator for
 *   readability.
 *
 * - A line starting with # serves as a comment.
 *
 * - An optional prefix ! which negates the pattern; any matching file
 *   excluded by a previous pattern will become included again. If a negated
 *   pattern matches, this will override lower precedence patterns sources.
 *
 * - If the pattern ends with a slash, it is removed for the purpose of the
 *   following description, but it would only find a match with a directory. In
 *   other words, foo/ will match a directory foo and paths underneath it, but
 *   will not match a regular file or a symbolic link foo (this is consistent
 *   with the way how pathspec works in general in git).
 *
 * - If the pattern does not contain a slash /, git treats it as a shell glob
 *   pattern and checks for a match against the pathname without leading
 *   directories.
 *
 * - Otherwise, git treats the pattern as a shell glob suitable for consumption
 *   by fnmatch(3) with the FNM_PATHNAME flag: wildcards in the pattern will
 *   not match a / in the pathname. For example, "Documentation/\*.html" matches
 *   "Documentation/git.html" but not "Documentation/ppc/ppc.html". A leading
 *   slash matches the beginning of the pathname; for example, "/\*.c" matches
 *   "cat-file.c" but not "mozilla-sha1/sha1.c".
 */

/*
336
 * This will return 0 if the spec was filled out,
337 338 339
 * GIT_ENOTFOUND if the fnmatch does not require matching, or
 * another error code there was an actual problem.
 */
340
int git_attr_fnmatch__parse(
341
	git_attr_fnmatch *spec,
342
	git_pool *pool,
343
	const char *source,
344 345
	const char **base)
{
346
	const char *pattern, *scan;
347
	int slash_count, allow_space;
348

349
	assert(spec && base && *base);
350

351 352 353
	if (parse_optimized_patterns(spec, pool, *base))
		return 0;

354 355 356
	spec->flags = (spec->flags & GIT_ATTR_FNMATCH_ALLOWSPACE);
	allow_space = (spec->flags != 0);

357 358
	pattern = *base;

359
	while (git__isspace(*pattern)) pattern++;
360
	if (!*pattern || *pattern == '#') {
361 362
		*base = git__next_line(pattern);
		return GIT_ENOTFOUND;
363 364
	}

365 366 367 368 369
	if (*pattern == '[') {
		if (strncmp(pattern, "[attr]", 6) == 0) {
			spec->flags = spec->flags | GIT_ATTR_FNMATCH_MACRO;
			pattern += 6;
		}
370
		/* else a character range like [a-e]* which is accepted */
371 372
	}

373
	if (*pattern == '!') {
374
		spec->flags = spec->flags | GIT_ATTR_FNMATCH_NEGATIVE;
375 376 377 378 379
		pattern++;
	}

	slash_count = 0;
	for (scan = pattern; *scan != '\0'; ++scan) {
380
		/* scan until (non-escaped) white space */
381 382 383 384
		if (git__isspace(*scan) && *(scan - 1) != '\\') {
			if (!allow_space || (*scan != ' ' && *scan != '\t'))
				break;
		}
385 386

		if (*scan == '/') {
387
			spec->flags = spec->flags | GIT_ATTR_FNMATCH_FULLPATH;
388
			slash_count++;
389 390
			if (pattern == scan)
				pattern++;
391
		}
392
		/* remember if we see an unescaped wildcard in pattern */
393
		else if (git__iswildcard(*scan) &&
394 395
			(scan == pattern || (*(scan - 1) != '\\')))
			spec->flags = spec->flags | GIT_ATTR_FNMATCH_HASWILD;
396 397 398
	}

	*base = scan;
399

400 401 402 403
	spec->length = scan - pattern;

	if (pattern[spec->length - 1] == '/') {
		spec->length--;
404
		spec->flags = spec->flags | GIT_ATTR_FNMATCH_DIRECTORY;
405
		if (--slash_count <= 0)
406
			spec->flags = spec->flags & ~GIT_ATTR_FNMATCH_FULLPATH;
407 408
	}

409 410 411 412 413 414 415
	if ((spec->flags & GIT_ATTR_FNMATCH_FULLPATH) != 0 &&
		source != NULL && git_path_root(pattern) < 0)
	{
		size_t sourcelen = strlen(source);
		/* given an unrooted fullpath match from a file inside a repo,
		 * prefix the pattern with the relative directory of the source file
		 */
Russell Belfer committed
416 417
		spec->pattern = git_pool_malloc(
			pool, (uint32_t)(sourcelen + spec->length + 1));
418 419 420 421 422 423 424
		if (spec->pattern) {
			memcpy(spec->pattern, source, sourcelen);
			memcpy(spec->pattern + sourcelen, pattern, spec->length);
			spec->length += sourcelen;
			spec->pattern[spec->length] = '\0';
		}
	} else {
425
		spec->pattern = git_pool_strndup(pool, pattern, spec->length);
426 427 428 429
	}

	if (!spec->pattern) {
		*base = git__next_line(pattern);
430
		return -1;
431
	} else {
432
		/* strip '\' that might have be used for internal whitespace */
433
		spec->length = git__unescape(spec->pattern);
434 435
	}

436
	return 0;
437 438
}

439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454
static bool parse_optimized_patterns(
	git_attr_fnmatch *spec,
	git_pool *pool,
	const char *pattern)
{
	if (!pattern[1] && (pattern[0] == '*' || pattern[0] == '.')) {
		spec->flags = GIT_ATTR_FNMATCH_MATCH_ALL;
		spec->pattern = git_pool_strndup(pool, pattern, 1);
		spec->length = 1;

		return true;
	}

	return false;
}

455 456 457 458 459 460 461 462 463 464 465 466 467
static int sort_by_hash_and_name(const void *a_raw, const void *b_raw)
{
	const git_attr_name *a = a_raw;
	const git_attr_name *b = b_raw;

	if (b->name_hash < a->name_hash)
		return 1;
	else if (b->name_hash > a->name_hash)
		return -1;
	else
		return strcmp(b->name, a->name);
}

468
static void git_attr_assignment__free(git_attr_assignment *assign)
469
{
470 471 472
	/* name and value are stored in a git_pool associated with the
	 * git_attr_file, so they do not need to be freed here
	 */
473
	assign->name = NULL;
474
	assign->value = NULL;
475 476 477
	git__free(assign);
}

478 479 480 481 482 483 484 485 486 487
static int merge_assignments(void **old_raw, void *new_raw)
{
	git_attr_assignment **old = (git_attr_assignment **)old_raw;
	git_attr_assignment *new = (git_attr_assignment *)new_raw;

	GIT_REFCOUNT_DEC(*old, git_attr_assignment__free);
	*old = new;
	return GIT_EEXISTS;
}

488 489
int git_attr_assignment__parse(
	git_repository *repo,
490
	git_pool *pool,
491 492 493
	git_vector *assigns,
	const char **base)
{
494
	int error;
495 496 497 498 499
	const char *scan = *base;
	git_attr_assignment *assign = NULL;

	assert(assigns && !assigns->length);

500 501
	assigns->_cmp = sort_by_hash_and_name;

502
	while (*scan && *scan != '\n') {
503 504 505
		const char *name_start, *value_start;

		/* skip leading blanks */
506
		while (git__isspace(*scan) && *scan != '\n') scan++;
507 508 509 510

		/* allocate assign if needed */
		if (!assign) {
			assign = git__calloc(1, sizeof(git_attr_assignment));
511
			GITERR_CHECK_ALLOC(assign);
512
			GIT_REFCOUNT_INC(assign);
513 514 515
		}

		assign->name_hash = 5381;
516
		assign->value = git_attr__true;
517 518 519

		/* look for magic name prefixes */
		if (*scan == '-') {
520
			assign->value = git_attr__false;
521 522
			scan++;
		} else if (*scan == '!') {
523
			assign->value = git_attr__unset; /* explicit unspecified state */
524 525 526 527 528 529
			scan++;
		} else if (*scan == '#') /* comment rest of line */
			break;

		/* find the name */
		name_start = scan;
530
		while (*scan && !git__isspace(*scan) && *scan != '=') {
531 532 533 534
			assign->name_hash =
				((assign->name_hash << 5) + assign->name_hash) + *scan;
			scan++;
		}
535
		if (scan == name_start) {
536 537 538
			/* must have found lone prefix (" - ") or leading = ("=foo")
			 * or end of buffer -- advance until whitespace and continue
			 */
539
			while (*scan && !git__isspace(*scan)) scan++;
540 541 542
			continue;
		}

543
		/* allocate permanent storage for name */
544
		assign->name = git_pool_strndup(pool, name_start, scan - name_start);
545
		GITERR_CHECK_ALLOC(assign->name);
546

547 548
		/* if there is an equals sign, find the value */
		if (*scan == '=') {
549
			for (value_start = ++scan; *scan && !git__isspace(*scan); ++scan);
550 551 552

			/* if we found a value, allocate permanent storage for it */
			if (scan > value_start) {
553
				assign->value = git_pool_strndup(pool, value_start, scan - value_start);
554
				GITERR_CHECK_ALLOC(assign->value);
555 556 557
			}
		}

558
		/* expand macros (if given a repo with a macro cache) */
559
		if (repo != NULL && assign->value == git_attr__true) {
560 561
			git_attr_rule *macro =
				git_attr_cache__lookup_macro(repo, assign->name);
562 563 564 565 566

			if (macro != NULL) {
				unsigned int i;
				git_attr_assignment *massign;

567 568
				git_vector_foreach(&macro->assigns, i, massign) {
					GIT_REFCOUNT_INC(massign);
569

570 571
					error = git_vector_insert_sorted(
						assigns, massign, &merge_assignments);
572 573
					if (error < 0 && error != GIT_EEXISTS)
						return error;
574 575
				}
			}
576 577 578
		}

		/* insert allocated assign into vector */
579
		error = git_vector_insert_sorted(assigns, assign, &merge_assignments);
580 581
		if (error < 0 && error != GIT_EEXISTS)
			return error;
582 583 584 585 586

		/* clear assign since it is now "owned" by the vector */
		assign = NULL;
	}

587
	if (assign != NULL)
588
		git_attr_assignment__free(assign);
589

590
	*base = git__next_line(scan);
591

592
	return (assigns->length == 0) ? GIT_ENOTFOUND : 0;
593 594
}

Russell Belfer committed
595
static void git_attr_rule__clear(git_attr_rule *rule)
596 597 598 599 600
{
	unsigned int i;
	git_attr_assignment *assign;

	if (!rule)
601
		return;
602

603 604 605 606 607 608
	if (!(rule->match.flags & GIT_ATTR_FNMATCH_IGNORE)) {
		git_vector_foreach(&rule->assigns, i, assign)
			GIT_REFCOUNT_DEC(assign, git_attr_assignment__free);
		git_vector_free(&rule->assigns);
	}

609
	/* match.pattern is stored in a git_pool, so no need to free */
610 611 612
	rule->match.pattern = NULL;
	rule->match.length = 0;
}
Russell Belfer committed
613 614 615 616 617 618 619

void git_attr_rule__free(git_attr_rule *rule)
{
	git_attr_rule__clear(rule);
	git__free(rule);
}