attr_file.c 15.1 KB
Newer Older
1
#include "common.h"
2
#include "repository.h"
3
#include "filebuf.h"
4
#include "attr.h"
5 6
#include "git2/blob.h"
#include "git2/tree.h"
7 8 9
#include <ctype.h>

static int sort_by_hash_and_name(const void *a_raw, const void *b_raw);
Russell Belfer committed
10
static void git_attr_rule__clear(git_attr_rule *rule);
11 12 13 14
static bool parse_optimized_patterns(
	git_attr_fnmatch *spec,
	git_pool *pool,
	const char *pattern);
15

16 17 18 19 20
int git_attr_file__new(
	git_attr_file **attrs_ptr,
	git_attr_file_source from,
	const char *path,
	git_pool *pool)
21
{
22 23 24
	git_attr_file *attrs = NULL;

	attrs = git__calloc(1, sizeof(git_attr_file));
25
	GITERR_CHECK_ALLOC(attrs);
26

27 28 29 30 31 32 33
	if (pool)
		attrs->pool = pool;
	else {
		attrs->pool = git__calloc(1, sizeof(git_pool));
		if (!attrs->pool || git_pool_init(attrs->pool, 1, 0) < 0)
			goto fail;
		attrs->pool_is_allocated = true;
34 35
	}

36 37 38
	if (path) {
		size_t len = strlen(path);

39
		attrs->key = git_pool_malloc(attrs->pool, (uint32_t)len + 3);
40 41
		GITERR_CHECK_ALLOC(attrs->key);

Linquize committed
42
		attrs->key[0] = '0' + (char)from;
43 44 45 46 47
		attrs->key[1] = '#';
		memcpy(&attrs->key[2], path, len);
		attrs->key[len + 2] = '\0';
	}

48 49 50
	if (git_vector_init(&attrs->rules, 4, NULL) < 0)
		goto fail;

51
	*attrs_ptr = attrs;
52
	return 0;
53

54 55 56 57
fail:
	git_attr_file__free(attrs);
	attrs_ptr = NULL;
	return -1;
58 59
}

60
int git_attr_file__parse_buffer(
61
	git_repository *repo, void *parsedata, const char *buffer, git_attr_file *attrs)
62
{
63
	int error = 0;
64
	const char *scan = NULL;
65
	char *context = NULL;
66 67
	git_attr_rule *rule = NULL;

68 69
	GIT_UNUSED(parsedata);

70
	assert(buffer && attrs);
71 72 73

	scan = buffer;

74 75 76 77
	/* if subdir file path, convert context for file paths */
	if (attrs->key && git__suffixcmp(attrs->key, "/" GIT_ATTR_FILE) == 0) {
		context = attrs->key + 2;
		context[strlen(context) - strlen(GIT_ATTR_FILE)] = '\0';
78 79
	}

80
	while (!error && *scan) {
81
		/* allocate rule if needed */
82 83 84 85 86 87 88
		if (!rule) {
			if (!(rule = git__calloc(1, sizeof(git_attr_rule)))) {
				error = -1;
				break;
			}
			rule->match.flags = GIT_ATTR_FNMATCH_ALLOWNEG |
				GIT_ATTR_FNMATCH_ALLOWMACRO;
89 90 91
		}

		/* parse the next "pattern attr attr attr" line */
92
		if (!(error = git_attr_fnmatch__parse(
93 94 95
				&rule->match, attrs->pool, context, &scan)) &&
			!(error = git_attr_assignment__parse(
				repo, attrs->pool, &rule->assigns, &scan)))
96 97 98 99 100 101 102 103 104
		{
			if (rule->match.flags & GIT_ATTR_FNMATCH_MACRO)
				/* should generate error/warning if this is coming from any
				 * file other than .gitattributes at repo root.
				 */
				error = git_attr_cache__insert_macro(repo, rule);
			else
				error = git_vector_insert(&attrs->rules, rule);
		}
105 106

		/* if the rule wasn't a pattern, on to the next */
107
		if (error < 0) {
Russell Belfer committed
108
			git_attr_rule__clear(rule); /* reset rule contents */
109
			if (error == GIT_ENOTFOUND)
110
				error = 0;
111 112 113 114 115
		} else {
			rule = NULL; /* vector now "owns" the rule */
		}
	}

116
	git_attr_rule__free(rule);
117 118 119 120

	/* restore file path used for context */
	if (context)
		context[strlen(context)] = '.'; /* first char of GIT_ATTR_FILE */
121 122 123 124

	return error;
}

125 126 127
int git_attr_file__new_and_load(
	git_attr_file **attrs_ptr,
	const char *path)
128
{
129
	int error;
130
	git_buf content = GIT_BUF_INIT;
131

132 133
	if ((error = git_attr_file__new(attrs_ptr, 0, path, NULL)) < 0)
		return error;
134

135 136
	if (!(error = git_futils_readbuffer(&content, path)))
		error = git_attr_file__parse_buffer(
137
			NULL, NULL, git_buf_cstr(&content), *attrs_ptr);
138

139
	git_buf_free(&content);
140

141 142 143 144
	if (error) {
		git_attr_file__free(*attrs_ptr);
		*attrs_ptr = NULL;
	}
145 146 147 148

	return error;
}

149
void git_attr_file__clear_rules(git_attr_file *file)
150 151 152 153
{
	unsigned int i;
	git_attr_rule *rule;

Russell Belfer committed
154
	git_vector_foreach(&file->rules, i, rule)
155
		git_attr_rule__free(rule);
156 157

	git_vector_free(&file->rules);
158 159 160 161 162 163 164 165
}

void git_attr_file__free(git_attr_file *file)
{
	if (!file)
		return;

	git_attr_file__clear_rules(file);
166

167 168 169 170 171 172
	if (file->pool_is_allocated) {
		git_pool_clear(file->pool);
		git__free(file->pool);
	}
	file->pool = NULL;

Russell Belfer committed
173
	git__free(file);
174 175
}

176
uint32_t git_attr_file__name_hash(const char *name)
177
{
178
	uint32_t h = 5381;
179 180 181 182 183 184 185 186 187 188 189 190 191 192
	int c;
	assert(name);
	while ((c = (int)*name++) != 0)
		h = ((h << 5) + h) + c;
	return h;
}


int git_attr_file__lookup_one(
	git_attr_file *file,
	const git_attr_path *path,
	const char *attr,
	const char **value)
{
193
	size_t i;
194 195 196 197 198 199 200 201 202
	git_attr_name name;
	git_attr_rule *rule;

	*value = NULL;

	name.name = attr;
	name.name_hash = git_attr_file__name_hash(attr);

	git_attr_file__foreach_matching_rule(file, path, i, rule) {
203
		size_t pos;
204

205
		if (!git_vector_bsearch(&pos, &rule->assigns, &name)) {
206 207 208 209 210 211
			*value = ((git_attr_assignment *)
					  git_vector_get(&rule->assigns, pos))->value;
			break;
		}
	}

212
	return 0;
213 214 215
}


216
bool git_attr_fnmatch__match(
217
	git_attr_fnmatch *match,
218 219
	const git_attr_path *path)
{
220
	int fnm;
221
	int icase_flags = (match->flags & GIT_ATTR_FNMATCH_ICASE) ? FNM_CASEFOLD : 0;
222

223
	if (match->flags & GIT_ATTR_FNMATCH_DIRECTORY && !path->is_dir)
224
		return false;
225

226
	if (match->flags & GIT_ATTR_FNMATCH_FULLPATH)
227
		fnm = p_fnmatch(match->pattern, path->path, FNM_PATHNAME | icase_flags);
228
	else if (path->is_dir)
229
		fnm = p_fnmatch(match->pattern, path->basename, FNM_LEADING_DIR | icase_flags);
230
	else
231
		fnm = p_fnmatch(match->pattern, path->basename, icase_flags);
232

233
	return (fnm == FNM_NOMATCH) ? false : true;
234 235
}

236
bool git_attr_rule__match(
237 238 239
	git_attr_rule *rule,
	const git_attr_path *path)
{
240
	bool matched = git_attr_fnmatch__match(&rule->match, path);
241

242
	if (rule->match.flags & GIT_ATTR_FNMATCH_NEGATIVE)
243
		matched = !matched;
244 245 246 247

	return matched;
}

248

249 250 251
git_attr_assignment *git_attr_rule__lookup_assignment(
	git_attr_rule *rule, const char *name)
{
252
	size_t pos;
253 254 255 256
	git_attr_name key;
	key.name = name;
	key.name_hash = git_attr_file__name_hash(name);

257 258
	if (git_vector_bsearch(&pos, &rule->assigns, &key))
		return NULL;
259

260
	return git_vector_get(&rule->assigns, pos);
261 262 263
}

int git_attr_path__init(
264
	git_attr_path *info, const char *path, const char *base)
265
{
266 267
	ssize_t root;

268 269
	/* build full path as best we can */
	git_buf_init(&info->full, 0);
270

271 272 273 274
	if (git_path_join_unrooted(&info->full, path, base, &root) < 0)
		return -1;

	info->path = info->full.ptr + root;
275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295

	/* remove trailing slashes */
	while (info->full.size > 0) {
		if (info->full.ptr[info->full.size - 1] != '/')
			break;
		info->full.size--;
	}
	info->full.ptr[info->full.size] = '\0';

	/* skip leading slashes in path */
	while (*info->path == '/')
		info->path++;

	/* find trailing basename component */
	info->basename = strrchr(info->path, '/');
	if (info->basename)
		info->basename++;
	if (!info->basename || !*info->basename)
		info->basename = info->path;

	info->is_dir = (int)git_path_isdir(info->full.ptr);
296

297
	return 0;
298 299
}

300 301 302 303 304 305 306
void git_attr_path__free(git_attr_path *info)
{
	git_buf_free(&info->full);
	info->path = NULL;
	info->basename = NULL;
}

307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339
/*
 * From gitattributes(5):
 *
 * Patterns have the following format:
 *
 * - A blank line matches no files, so it can serve as a separator for
 *   readability.
 *
 * - A line starting with # serves as a comment.
 *
 * - An optional prefix ! which negates the pattern; any matching file
 *   excluded by a previous pattern will become included again. If a negated
 *   pattern matches, this will override lower precedence patterns sources.
 *
 * - If the pattern ends with a slash, it is removed for the purpose of the
 *   following description, but it would only find a match with a directory. In
 *   other words, foo/ will match a directory foo and paths underneath it, but
 *   will not match a regular file or a symbolic link foo (this is consistent
 *   with the way how pathspec works in general in git).
 *
 * - If the pattern does not contain a slash /, git treats it as a shell glob
 *   pattern and checks for a match against the pathname without leading
 *   directories.
 *
 * - Otherwise, git treats the pattern as a shell glob suitable for consumption
 *   by fnmatch(3) with the FNM_PATHNAME flag: wildcards in the pattern will
 *   not match a / in the pathname. For example, "Documentation/\*.html" matches
 *   "Documentation/git.html" but not "Documentation/ppc/ppc.html". A leading
 *   slash matches the beginning of the pathname; for example, "/\*.c" matches
 *   "cat-file.c" but not "mozilla-sha1/sha1.c".
 */

/*
340
 * This will return 0 if the spec was filled out,
341 342 343
 * GIT_ENOTFOUND if the fnmatch does not require matching, or
 * another error code there was an actual problem.
 */
344
int git_attr_fnmatch__parse(
345
	git_attr_fnmatch *spec,
346
	git_pool *pool,
347
	const char *source,
348 349
	const char **base)
{
350 351
	const char *pattern, *scan;
	int slash_count, allow_space;
352

353
	assert(spec && base && *base);
354

355 356 357 358 359 360
	if (parse_optimized_patterns(spec, pool, *base))
		return 0;

	spec->flags = (spec->flags & GIT_ATTR_FNMATCH__INCOMING);
	allow_space = ((spec->flags & GIT_ATTR_FNMATCH_ALLOWSPACE) != 0);

361 362
	pattern = *base;

363
	while (git__isspace(*pattern)) pattern++;
364
	if (!*pattern || *pattern == '#') {
365 366
		*base = git__next_line(pattern);
		return GIT_ENOTFOUND;
367 368
	}

369
	if (*pattern == '[' && (spec->flags & GIT_ATTR_FNMATCH_ALLOWMACRO) != 0) {
370 371 372 373
		if (strncmp(pattern, "[attr]", 6) == 0) {
			spec->flags = spec->flags | GIT_ATTR_FNMATCH_MACRO;
			pattern += 6;
		}
374
		/* else a character range like [a-e]* which is accepted */
375 376
	}

377
	if (*pattern == '!' && (spec->flags & GIT_ATTR_FNMATCH_ALLOWNEG) != 0) {
378
		spec->flags = spec->flags | GIT_ATTR_FNMATCH_NEGATIVE;
379 380 381 382 383
		pattern++;
	}

	slash_count = 0;
	for (scan = pattern; *scan != '\0'; ++scan) {
384
		/* scan until (non-escaped) white space */
385 386 387 388
		if (git__isspace(*scan) && *(scan - 1) != '\\') {
			if (!allow_space || (*scan != ' ' && *scan != '\t'))
				break;
		}
389 390

		if (*scan == '/') {
391
			spec->flags = spec->flags | GIT_ATTR_FNMATCH_FULLPATH;
392
			slash_count++;
393 394
			if (pattern == scan)
				pattern++;
395
		}
396
		/* remember if we see an unescaped wildcard in pattern */
397
		else if (git__iswildcard(*scan) &&
398 399
			(scan == pattern || (*(scan - 1) != '\\')))
			spec->flags = spec->flags | GIT_ATTR_FNMATCH_HASWILD;
400 401 402
	}

	*base = scan;
403

404 405
	if ((spec->length = scan - pattern) == 0)
		return GIT_ENOTFOUND;
406 407 408

	if (pattern[spec->length - 1] == '/') {
		spec->length--;
409
		spec->flags = spec->flags | GIT_ATTR_FNMATCH_DIRECTORY;
410
		if (--slash_count <= 0)
411
			spec->flags = spec->flags & ~GIT_ATTR_FNMATCH_FULLPATH;
412 413
	}

414 415 416 417 418 419 420
	if ((spec->flags & GIT_ATTR_FNMATCH_FULLPATH) != 0 &&
		source != NULL && git_path_root(pattern) < 0)
	{
		size_t sourcelen = strlen(source);
		/* given an unrooted fullpath match from a file inside a repo,
		 * prefix the pattern with the relative directory of the source file
		 */
Russell Belfer committed
421 422
		spec->pattern = git_pool_malloc(
			pool, (uint32_t)(sourcelen + spec->length + 1));
423 424 425 426 427 428 429
		if (spec->pattern) {
			memcpy(spec->pattern, source, sourcelen);
			memcpy(spec->pattern + sourcelen, pattern, spec->length);
			spec->length += sourcelen;
			spec->pattern[spec->length] = '\0';
		}
	} else {
430
		spec->pattern = git_pool_strndup(pool, pattern, spec->length);
431 432 433 434
	}

	if (!spec->pattern) {
		*base = git__next_line(pattern);
435
		return -1;
436
	} else {
437
		/* strip '\' that might have be used for internal whitespace */
438
		spec->length = git__unescape(spec->pattern);
439 440
	}

441
	return 0;
442 443
}

444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459
static bool parse_optimized_patterns(
	git_attr_fnmatch *spec,
	git_pool *pool,
	const char *pattern)
{
	if (!pattern[1] && (pattern[0] == '*' || pattern[0] == '.')) {
		spec->flags = GIT_ATTR_FNMATCH_MATCH_ALL;
		spec->pattern = git_pool_strndup(pool, pattern, 1);
		spec->length = 1;

		return true;
	}

	return false;
}

460 461 462 463 464 465 466 467 468 469 470 471 472
static int sort_by_hash_and_name(const void *a_raw, const void *b_raw)
{
	const git_attr_name *a = a_raw;
	const git_attr_name *b = b_raw;

	if (b->name_hash < a->name_hash)
		return 1;
	else if (b->name_hash > a->name_hash)
		return -1;
	else
		return strcmp(b->name, a->name);
}

473
static void git_attr_assignment__free(git_attr_assignment *assign)
474
{
475 476 477
	/* name and value are stored in a git_pool associated with the
	 * git_attr_file, so they do not need to be freed here
	 */
478
	assign->name = NULL;
479
	assign->value = NULL;
480 481 482
	git__free(assign);
}

483 484 485 486 487 488 489 490 491 492
static int merge_assignments(void **old_raw, void *new_raw)
{
	git_attr_assignment **old = (git_attr_assignment **)old_raw;
	git_attr_assignment *new = (git_attr_assignment *)new_raw;

	GIT_REFCOUNT_DEC(*old, git_attr_assignment__free);
	*old = new;
	return GIT_EEXISTS;
}

493 494
int git_attr_assignment__parse(
	git_repository *repo,
495
	git_pool *pool,
496 497 498
	git_vector *assigns,
	const char **base)
{
499
	int error;
500 501 502 503 504
	const char *scan = *base;
	git_attr_assignment *assign = NULL;

	assert(assigns && !assigns->length);

505
	git_vector_set_cmp(assigns, sort_by_hash_and_name);
506

507
	while (*scan && *scan != '\n') {
508 509 510
		const char *name_start, *value_start;

		/* skip leading blanks */
511
		while (git__isspace(*scan) && *scan != '\n') scan++;
512 513 514 515

		/* allocate assign if needed */
		if (!assign) {
			assign = git__calloc(1, sizeof(git_attr_assignment));
516
			GITERR_CHECK_ALLOC(assign);
517
			GIT_REFCOUNT_INC(assign);
518 519 520
		}

		assign->name_hash = 5381;
521
		assign->value = git_attr__true;
522 523 524

		/* look for magic name prefixes */
		if (*scan == '-') {
525
			assign->value = git_attr__false;
526 527
			scan++;
		} else if (*scan == '!') {
528
			assign->value = git_attr__unset; /* explicit unspecified state */
529 530 531 532 533 534
			scan++;
		} else if (*scan == '#') /* comment rest of line */
			break;

		/* find the name */
		name_start = scan;
535
		while (*scan && !git__isspace(*scan) && *scan != '=') {
536 537 538 539
			assign->name_hash =
				((assign->name_hash << 5) + assign->name_hash) + *scan;
			scan++;
		}
540
		if (scan == name_start) {
541 542 543
			/* must have found lone prefix (" - ") or leading = ("=foo")
			 * or end of buffer -- advance until whitespace and continue
			 */
544
			while (*scan && !git__isspace(*scan)) scan++;
545 546 547
			continue;
		}

548
		/* allocate permanent storage for name */
549
		assign->name = git_pool_strndup(pool, name_start, scan - name_start);
550
		GITERR_CHECK_ALLOC(assign->name);
551

552 553
		/* if there is an equals sign, find the value */
		if (*scan == '=') {
554
			for (value_start = ++scan; *scan && !git__isspace(*scan); ++scan);
555 556 557

			/* if we found a value, allocate permanent storage for it */
			if (scan > value_start) {
558
				assign->value = git_pool_strndup(pool, value_start, scan - value_start);
559
				GITERR_CHECK_ALLOC(assign->value);
560 561 562
			}
		}

563
		/* expand macros (if given a repo with a macro cache) */
564
		if (repo != NULL && assign->value == git_attr__true) {
565 566
			git_attr_rule *macro =
				git_attr_cache__lookup_macro(repo, assign->name);
567 568 569 570 571

			if (macro != NULL) {
				unsigned int i;
				git_attr_assignment *massign;

572 573
				git_vector_foreach(&macro->assigns, i, massign) {
					GIT_REFCOUNT_INC(massign);
574

575 576
					error = git_vector_insert_sorted(
						assigns, massign, &merge_assignments);
577 578
					if (error < 0 && error != GIT_EEXISTS)
						return error;
579 580
				}
			}
581 582 583
		}

		/* insert allocated assign into vector */
584
		error = git_vector_insert_sorted(assigns, assign, &merge_assignments);
585 586
		if (error < 0 && error != GIT_EEXISTS)
			return error;
587 588 589 590 591

		/* clear assign since it is now "owned" by the vector */
		assign = NULL;
	}

592
	if (assign != NULL)
593
		git_attr_assignment__free(assign);
594

595
	*base = git__next_line(scan);
596

597
	return (assigns->length == 0) ? GIT_ENOTFOUND : 0;
598 599
}

Russell Belfer committed
600
static void git_attr_rule__clear(git_attr_rule *rule)
601 602 603 604 605
{
	unsigned int i;
	git_attr_assignment *assign;

	if (!rule)
606
		return;
607

608 609 610 611 612 613
	if (!(rule->match.flags & GIT_ATTR_FNMATCH_IGNORE)) {
		git_vector_foreach(&rule->assigns, i, assign)
			GIT_REFCOUNT_DEC(assign, git_attr_assignment__free);
		git_vector_free(&rule->assigns);
	}

614
	/* match.pattern is stored in a git_pool, so no need to free */
615 616 617
	rule->match.pattern = NULL;
	rule->match.length = 0;
}
Russell Belfer committed
618 619 620 621 622 623 624

void git_attr_rule__free(git_attr_rule *rule)
{
	git_attr_rule__clear(rule);
	git__free(rule);
}