diff_driver.c 12.3 KB
Newer Older
1 2 3 4 5 6
/*
 * Copyright (C) the libgit2 contributors. All rights reserved.
 *
 * This file is part of libgit2, distributed under the GNU GPL v2 with
 * a Linking Exception. For full terms see the included COPYING file.
 */
7 8

#include "diff_driver.h"
9 10 11

#include "git2/attr.h"

12
#include "common.h"
13 14 15 16
#include "diff.h"
#include "strmap.h"
#include "map.h"
#include "buf_text.h"
17
#include "config.h"
18
#include "repository.h"
19 20 21

typedef enum {
	DIFF_DRIVER_AUTO = 0,
22 23 24
	DIFF_DRIVER_BINARY = 1,
	DIFF_DRIVER_TEXT = 2,
	DIFF_DRIVER_PATTERNLIST = 3,
25 26
} git_diff_driver_t;

27
typedef struct {
28
	p_regex_t re;
29 30 31
	int flags;
} git_diff_driver_pattern;

32
enum {
33
	REG_NEGATE = (1 << 15) /* get out of the way of existing flags */
34 35 36 37 38
};

/* data for finding function context for a given file type */
struct git_diff_driver {
	git_diff_driver_t type;
39 40
	uint32_t binary_flags;
	uint32_t other_flags;
41
	git_array_t(git_diff_driver_pattern) fn_patterns;
42
	p_regex_t  word_pattern;
43
	char name[GIT_FLEX_ARRAY];
44 45
};

46
#include "userdiff.h"
47

48 49 50 51
struct git_diff_driver_registry {
	git_strmap *drivers;
};

52 53
#define FORCE_DIFFABLE (GIT_DIFF_FORCE_TEXT | GIT_DIFF_FORCE_BINARY)

54
static git_diff_driver global_drivers[3] = {
55 56 57
	{ DIFF_DRIVER_AUTO,   0, 0, },
	{ DIFF_DRIVER_BINARY, GIT_DIFF_FORCE_BINARY, 0 },
	{ DIFF_DRIVER_TEXT,   GIT_DIFF_FORCE_TEXT, 0 },
58 59
};

60
git_diff_driver_registry *git_diff_driver_registry_new(void)
61
{
62 63 64 65 66
	git_diff_driver_registry *reg =
		git__calloc(1, sizeof(git_diff_driver_registry));
	if (!reg)
		return NULL;

67
	if (git_strmap_new(&reg->drivers) < 0) {
68 69 70 71 72
		git_diff_driver_registry_free(reg);
		return NULL;
	}

	return reg;
73 74 75 76
}

void git_diff_driver_registry_free(git_diff_driver_registry *reg)
{
77 78
	git_diff_driver *drv;

79 80 81
	if (!reg)
		return;

82
	git_strmap_foreach_value(reg->drivers, drv, git_diff_driver_free(drv));
83
	git_strmap_free(reg->drivers);
84 85 86
	git__free(reg);
}

87 88
static int diff_driver_add_patterns(
	git_diff_driver *drv, const char *regex_str, int regex_flags)
89
{
90 91 92 93 94 95 96 97
	int error = 0;
	const char *scan, *end;
	git_diff_driver_pattern *pat = NULL;
	git_buf buf = GIT_BUF_INIT;

	for (scan = regex_str; scan; scan = end) {
		/* get pattern to fill in */
		if ((pat = git_array_alloc(drv->fn_patterns)) == NULL) {
98
			return -1;
99
		}
100

101 102 103 104 105 106 107 108 109 110 111 112 113 114 115
		pat->flags = regex_flags;
		if (*scan == '!') {
			pat->flags |= REG_NEGATE;
			++scan;
		}

		if ((end = strchr(scan, '\n')) != NULL) {
			error = git_buf_set(&buf, scan, end - scan);
			end++;
		} else {
			error = git_buf_sets(&buf, scan);
		}
		if (error < 0)
			break;

116
		if ((error = p_regcomp(&pat->re, buf.ptr, regex_flags)) != 0) {
117 118 119
			/*
			 * TODO: issue a warning
			 */
120
		}
121 122
	}

123 124
	if (error && pat != NULL)
		(void)git_array_pop(drv->fn_patterns); /* release last item */
125
	git_buf_dispose(&buf);
126

127 128
	/* We want to ignore bad patterns, so return success regardless */
	return 0;
129 130 131 132
}

static int diff_driver_xfuncname(const git_config_entry *entry, void *payload)
{
133
	return diff_driver_add_patterns(payload, entry->value, P_REG_EXTENDED);
134 135 136 137
}

static int diff_driver_funcname(const git_config_entry *entry, void *payload)
{
138
	return diff_driver_add_patterns(payload, entry->value, 0);
139 140 141 142 143 144 145 146 147 148 149 150 151 152
}

static git_diff_driver_registry *git_repository_driver_registry(
	git_repository *repo)
{
	if (!repo->diff_drivers) {
		git_diff_driver_registry *reg = git_diff_driver_registry_new();
		reg = git__compare_and_swap(&repo->diff_drivers, NULL, reg);

		if (reg != NULL) /* if we race, free losing allocation */
			git_diff_driver_registry_free(reg);
	}

	if (!repo->diff_drivers)
153
		git_error_set(GIT_ERROR_REPOSITORY, "unable to create diff driver registry");
154 155 156 157

	return repo->diff_drivers;
}

158 159 160 161 162
static int diff_driver_alloc(
	git_diff_driver **out, size_t *namelen_out, const char *name)
{
	git_diff_driver *driver;
	size_t driverlen = sizeof(git_diff_driver),
163 164
		namelen = strlen(name),
		alloclen;
165

166 167
	GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, driverlen, namelen);
	GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, alloclen, 1);
168

169
	driver = git__calloc(1, alloclen);
170
	GIT_ERROR_CHECK_ALLOC(driver);
171 172 173 174 175 176 177 178 179 180 181

	memcpy(driver->name, name, namelen);

	*out = driver;

	if (namelen_out)
		*namelen_out = namelen;

	return 0;
}

182 183 184 185 186 187 188
static int git_diff_driver_builtin(
	git_diff_driver **out,
	git_diff_driver_registry *reg,
	const char *driver_name)
{
	git_diff_driver_definition *ddef = NULL;
	git_diff_driver *drv = NULL;
189
	int error = 0;
190
	size_t idx;
191 192 193 194 195 196 197 198 199 200

	for (idx = 0; idx < ARRAY_SIZE(builtin_defs); ++idx) {
		if (!strcasecmp(driver_name, builtin_defs[idx].name)) {
			ddef = &builtin_defs[idx];
			break;
		}
	}
	if (!ddef)
		goto done;

201 202
	if ((error = diff_driver_alloc(&drv, NULL, ddef->name)) < 0)
		goto done;
203 204 205 206 207

	drv->type = DIFF_DRIVER_PATTERNLIST;

	if (ddef->fns &&
		(error = diff_driver_add_patterns(
208
			drv, ddef->fns, ddef->flags | P_REG_EXTENDED)) < 0)
209 210 211
		goto done;

	if (ddef->words &&
212
		(error = p_regcomp(
213
			&drv->word_pattern, ddef->words, ddef->flags | P_REG_EXTENDED)))
214
	{
215
		error = git_error_set_regex(&drv->word_pattern, error);
216 217 218
		goto done;
	}

219 220
	if ((error = git_strmap_set(reg->drivers, drv->name, drv)) < 0)
		goto done;
221 222

done:
223
	if (error && drv)
224
		git_diff_driver_free(drv);
225
	else
226 227 228 229 230
		*out = drv;

	return error;
}

231
static int git_diff_driver_load(
232
	git_diff_driver **out, git_repository *repo, const char *driver_name)
233
{
234
	int error = 0;
235
	git_diff_driver_registry *reg;
236 237
	git_diff_driver *drv;
	size_t namelen;
238
	git_config *cfg = NULL;
239
	git_buf name = GIT_BUF_INIT;
240
	git_config_entry *ce = NULL;
241
	bool found_driver = false;
242

243
	if ((reg = git_repository_driver_registry(repo)) == NULL)
244
		return -1;
245

246 247
	if ((drv = git_strmap_get(reg->drivers, driver_name)) != NULL) {
		*out = drv;
248
		return 0;
249 250
	}

251 252 253
	if ((error = diff_driver_alloc(&drv, &namelen, driver_name)) < 0)
		goto done;

254 255
	drv->type = DIFF_DRIVER_AUTO;

256
	/* if you can't read config for repo, just use default driver */
257
	if (git_repository_config_snapshot(&cfg, repo) < 0) {
258
		git_error_clear();
259
		goto done;
260 261 262
	}

	if ((error = git_buf_printf(&name, "diff.%s.binary", driver_name)) < 0)
263
		goto done;
264 265 266

	switch (git_config__get_bool_force(cfg, name.ptr, -1)) {
	case true:
267 268
		/* if diff.<driver>.binary is true, just return the binary driver */
		*out = &global_drivers[DIFF_DRIVER_BINARY];
269
		goto done;
270
	case false:
271 272 273
		/* if diff.<driver>.binary is false, force binary checks off */
		/* but still may have custom function context patterns, etc. */
		drv->binary_flags = GIT_DIFF_FORCE_TEXT;
274
		found_driver = true;
275 276
		break;
	default:
277
		/* diff.<driver>.binary unspecified or "auto", so just continue */
278
		break;
279 280 281 282
	}

	/* TODO: warn if diff.<name>.command or diff.<name>.textconv are set */

283
	git_buf_truncate(&name, namelen + strlen("diff.."));
284 285 286
	if ((error = git_buf_PUTS(&name, "xfuncname")) < 0)
		goto done;

287
	if ((error = git_config_get_multivar_foreach(
288 289
			cfg, name.ptr, NULL, diff_driver_xfuncname, drv)) < 0) {
		if (error != GIT_ENOTFOUND)
290
			goto done;
291
		git_error_clear(); /* no diff.<driver>.xfuncname, so just continue */
292
	}
293

294
	git_buf_truncate(&name, namelen + strlen("diff.."));
295 296 297
	if ((error = git_buf_PUTS(&name, "funcname")) < 0)
		goto done;

298
	if ((error = git_config_get_multivar_foreach(
299 300
			cfg, name.ptr, NULL, diff_driver_funcname, drv)) < 0) {
		if (error != GIT_ENOTFOUND)
301
			goto done;
302
		git_error_clear(); /* no diff.<driver>.funcname, so just continue */
303 304 305
	}

	/* if we found any patterns, set driver type to use correct callback */
306
	if (git_array_size(drv->fn_patterns) > 0) {
307
		drv->type = DIFF_DRIVER_PATTERNLIST;
308 309
		found_driver = true;
	}
310

311
	git_buf_truncate(&name, namelen + strlen("diff.."));
312 313 314
	if ((error = git_buf_PUTS(&name, "wordregex")) < 0)
		goto done;

315
	if ((error = git_config__lookup_entry(&ce, cfg, name.ptr, false)) < 0)
316
		goto done;
317 318
	if (!ce || !ce->value)
		/* no diff.<driver>.wordregex, so just continue */;
319
	else if (!(error = p_regcomp(&drv->word_pattern, ce->value, P_REG_EXTENDED)))
320
		found_driver = true;
321 322
	else {
		/* TODO: warn about bad regex instead of failure */
323
		error = git_error_set_regex(&drv->word_pattern, error);
324
		goto done;
325 326 327 328 329 330
	}

	/* TODO: look up diff.<driver>.algorithm to turn on minimal / patience
	 * diff in drv->other_flags
	 */

331
	/* if no driver config found at all, fall back on AUTO driver */
332
	if (!found_driver)
333
		goto done;
334 335

	/* store driver in registry */
336
	if ((error = git_strmap_set(reg->drivers, drv->name, drv)) < 0)
337
		goto done;
338

339 340
	*out = drv;

341
done:
342
	git_config_entry_free(ce);
343
	git_buf_dispose(&name);
344
	git_config_free(cfg);
345

346 347 348 349 350
	if (!*out) {
		int error2 = git_diff_driver_builtin(out, reg, driver_name);
		if (!error)
			error = error2;
	}
351 352 353 354

	if (drv && drv != *out)
		git_diff_driver_free(drv);

355
	return error;
356 357
}

358
int git_diff_driver_lookup(
359 360
	git_diff_driver **out, git_repository *repo,
	git_attr_session *attrsession, const char *path)
361
{
362
	int error = 0;
363
	const char *values[1], *attrs[] = { "diff" };
364 365

	assert(out);
366
	*out = NULL;
367 368

	if (!repo || !path || !strlen(path))
369
		/* just use the auto value */;
370 371
	else if ((error = git_attr_get_many_with_session(values, repo,
			attrsession, 0, path, 1, attrs)) < 0)
372
		/* return error below */;
373

374
	else if (GIT_ATTR_IS_UNSPECIFIED(values[0]))
375
		/* just use the auto value */;
376
	else if (GIT_ATTR_IS_FALSE(values[0]))
377
		*out = &global_drivers[DIFF_DRIVER_BINARY];
378
	else if (GIT_ATTR_IS_TRUE(values[0]))
379
		*out = &global_drivers[DIFF_DRIVER_TEXT];
380 381

	/* otherwise look for driver information in config and build driver */
382
	else if ((error = git_diff_driver_load(out, repo, values[0])) < 0) {
383 384
		if (error == GIT_ENOTFOUND) {
			error = 0;
385
			git_error_clear();
386
		}
387
	}
388

389 390 391
	if (!*out)
		*out = &global_drivers[DIFF_DRIVER_AUTO];

392
	return error;
393 394 395 396
}

void git_diff_driver_free(git_diff_driver *driver)
{
397 398 399 400 401
	size_t i;

	if (!driver)
		return;

402
	for (i = 0; i < git_array_size(driver->fn_patterns); ++i)
403
		p_regfree(& git_array_get(driver->fn_patterns, i)->re);
404 405
	git_array_clear(driver->fn_patterns);

406
	p_regfree(&driver->word_pattern);
407 408

	git__free(driver);
409 410
}

411 412
void git_diff_driver_update_options(
	uint32_t *option_flags, git_diff_driver *driver)
413
{
414 415 416 417
	if ((*option_flags & FORCE_DIFFABLE) == 0)
		*option_flags |= driver->binary_flags;

	*option_flags |= driver->other_flags;
418 419 420 421 422
}

int git_diff_driver_content_is_binary(
	git_diff_driver *driver, const char *content, size_t content_len)
{
423
	git_buf search = GIT_BUF_INIT;
424 425 426

	GIT_UNUSED(driver);

427 428 429
	git_buf_attach_notowned(&search, content,
		min(content_len, GIT_FILTER_BYTES_TO_CHECK_NUL));

430 431 432 433 434 435 436 437 438 439 440 441
	/* TODO: provide encoding / binary detection callbacks that can
	 * be UTF-8 aware, etc.  For now, instead of trying to be smart,
	 * let's just use the simple NUL-byte detection that core git uses.
	 */

	/* previously was: if (git_buf_text_is_binary(&search)) */
	if (git_buf_text_contains_nul(&search))
		return 1;

	return 0;
}

442
static int diff_context_line__simple(
443
	git_diff_driver *driver, git_buf *line)
444
{
445
	char firstch = line->ptr[0];
446
	GIT_UNUSED(driver);
447
	return (git__isalpha(firstch) || firstch == '_' || firstch == '$');
448 449 450
}

static int diff_context_line__pattern_match(
451
	git_diff_driver *driver, git_buf *line)
452
{
453
	size_t i, maxi = git_array_size(driver->fn_patterns);
454
	p_regmatch_t pmatch[2];
455

456
	for (i = 0; i < maxi; ++i) {
457 458
		git_diff_driver_pattern *pat = git_array_get(driver->fn_patterns, i);

459
		if (!p_regexec(&pat->re, line->ptr, 2, pmatch, 0)) {
460 461
			if (pat->flags & REG_NEGATE)
				return false;
462 463 464 465 466

			/* use pmatch data to trim line data */
			i = (pmatch[1].rm_so >= 0) ? 1 : 0;
			git_buf_consume(line, git_buf_cstr(line) + pmatch[i].rm_so);
			git_buf_truncate(line, pmatch[i].rm_eo - pmatch[i].rm_so);
467
			git_buf_rtrim(line);
468

469
			return true;
470
		}
471 472 473 474 475
	}

	return false;
}

476 477 478 479 480 481 482
static long diff_context_find(
	const char *line,
	long line_len,
	char *out,
	long out_size,
	void *payload)
{
483
	git_diff_find_context_payload *ctxt = payload;
484

485
	if (git_buf_set(&ctxt->line, line, (size_t)line_len) < 0)
486
		return -1;
487
	git_buf_rtrim(&ctxt->line);
488

489
	if (!ctxt->line.size)
490 491
		return -1;

492
	if (!ctxt->match_line || !ctxt->match_line(ctxt->driver, &ctxt->line))
493
		return -1;
494

495
	if (out_size > (long)ctxt->line.size)
Russell Belfer committed
496
		out_size = (long)ctxt->line.size;
497
	memcpy(out, ctxt->line.ptr, (size_t)out_size);
498

499
	return out_size;
500
}
501

502 503 504 505 506 507 508 509 510 511 512 513 514 515
void git_diff_find_context_init(
	git_diff_find_context_fn *findfn_out,
	git_diff_find_context_payload *payload_out,
	git_diff_driver *driver)
{
	*findfn_out = driver ? diff_context_find : NULL;

	memset(payload_out, 0, sizeof(*payload_out));
	if (driver) {
		payload_out->driver = driver;
		payload_out->match_line = (driver->type == DIFF_DRIVER_PATTERNLIST) ?
			diff_context_line__pattern_match : diff_context_line__simple;
		git_buf_init(&payload_out->line, 0);
	}
516 517
}

518
void git_diff_find_context_clear(git_diff_find_context_payload *payload)
519
{
520
	if (payload) {
521
		git_buf_dispose(&payload->line);
522 523
		payload->driver = NULL;
	}
524
}