diff_driver.c 11.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/*
 * Copyright (C) the libgit2 contributors. All rights reserved.
 *
 * This file is part of libgit2, distributed under the GNU GPL v2 with
 * a Linking Exception. For full terms see the included COPYING file.
 */
#include "common.h"

#include "git2/attr.h"

#include "diff.h"
#include "diff_patch.h"
#include "diff_driver.h"
#include "strmap.h"
#include "map.h"
#include "buf_text.h"
17
#include "config.h"
18
#include "repository.h"
19

20 21
GIT__USE_STRMAP;

22 23
typedef enum {
	DIFF_DRIVER_AUTO = 0,
24 25 26
	DIFF_DRIVER_BINARY = 1,
	DIFF_DRIVER_TEXT = 2,
	DIFF_DRIVER_PATTERNLIST = 3,
27 28
} git_diff_driver_t;

29 30 31 32 33
typedef struct {
	regex_t re;
	int flags;
} git_diff_driver_pattern;

34
enum {
35
	REG_NEGATE = (1 << 15) /* get out of the way of existing flags */
36 37 38 39 40
};

/* data for finding function context for a given file type */
struct git_diff_driver {
	git_diff_driver_t type;
41 42
	uint32_t binary_flags;
	uint32_t other_flags;
43
	git_array_t(git_diff_driver_pattern) fn_patterns;
44
	regex_t  word_pattern;
45
	char name[GIT_FLEX_ARRAY];
46 47
};

48
#include "userdiff.h"
49

50 51 52 53
struct git_diff_driver_registry {
	git_strmap *drivers;
};

54 55
#define FORCE_DIFFABLE (GIT_DIFF_FORCE_TEXT | GIT_DIFF_FORCE_BINARY)

56
static git_diff_driver global_drivers[3] = {
57 58 59
	{ DIFF_DRIVER_AUTO,   0, 0, },
	{ DIFF_DRIVER_BINARY, GIT_DIFF_FORCE_BINARY, 0 },
	{ DIFF_DRIVER_TEXT,   GIT_DIFF_FORCE_TEXT, 0 },
60 61 62 63
};

git_diff_driver_registry *git_diff_driver_registry_new()
{
64 65 66 67 68
	git_diff_driver_registry *reg =
		git__calloc(1, sizeof(git_diff_driver_registry));
	if (!reg)
		return NULL;

69
	if ((reg->drivers = git_strmap_alloc()) == NULL) {
70 71 72 73 74
		git_diff_driver_registry_free(reg);
		return NULL;
	}

	return reg;
75 76 77 78
}

void git_diff_driver_registry_free(git_diff_driver_registry *reg)
{
79 80
	git_diff_driver *drv;

81 82 83
	if (!reg)
		return;

84
	git_strmap_foreach_value(reg->drivers, drv, git_diff_driver_free(drv));
85
	git_strmap_free(reg->drivers);
86 87 88
	git__free(reg);
}

89 90
static int diff_driver_add_patterns(
	git_diff_driver *drv, const char *regex_str, int regex_flags)
91
{
92 93 94 95 96 97 98 99 100 101 102
	int error = 0;
	const char *scan, *end;
	git_diff_driver_pattern *pat = NULL;
	git_buf buf = GIT_BUF_INIT;

	for (scan = regex_str; scan; scan = end) {
		/* get pattern to fill in */
		if ((pat = git_array_alloc(drv->fn_patterns)) == NULL) {
			error = -1;
			break;
		}
103

104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124
		pat->flags = regex_flags;
		if (*scan == '!') {
			pat->flags |= REG_NEGATE;
			++scan;
		}

		if ((end = strchr(scan, '\n')) != NULL) {
			error = git_buf_set(&buf, scan, end - scan);
			end++;
		} else {
			error = git_buf_sets(&buf, scan);
		}
		if (error < 0)
			break;

		if ((error = regcomp(&pat->re, buf.ptr, regex_flags)) < 0) {
			/* if regex fails to compile, warn? fail? */
			error = giterr_set_regex(&pat->re, error);
			regfree(&pat->re);
			break;
		}
125 126
	}

127 128 129
	if (error && pat != NULL)
		(void)git_array_pop(drv->fn_patterns); /* release last item */
	git_buf_free(&buf);
130

131
	return error;
132 133 134 135
}

static int diff_driver_xfuncname(const git_config_entry *entry, void *payload)
{
136
	return diff_driver_add_patterns(payload, entry->value, REG_EXTENDED);
137 138 139 140
}

static int diff_driver_funcname(const git_config_entry *entry, void *payload)
{
141
	return diff_driver_add_patterns(payload, entry->value, 0);
142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
}

static git_diff_driver_registry *git_repository_driver_registry(
	git_repository *repo)
{
	if (!repo->diff_drivers) {
		git_diff_driver_registry *reg = git_diff_driver_registry_new();
		reg = git__compare_and_swap(&repo->diff_drivers, NULL, reg);

		if (reg != NULL) /* if we race, free losing allocation */
			git_diff_driver_registry_free(reg);
	}

	if (!repo->diff_drivers)
		giterr_set(GITERR_REPOSITORY, "Unable to create diff driver registry");

	return repo->diff_drivers;
}

161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201
static int git_diff_driver_builtin(
	git_diff_driver **out,
	git_diff_driver_registry *reg,
	const char *driver_name)
{
	int error = 0;
	git_diff_driver_definition *ddef = NULL;
	git_diff_driver *drv = NULL;
	size_t namelen, idx;

	for (idx = 0; idx < ARRAY_SIZE(builtin_defs); ++idx) {
		if (!strcasecmp(driver_name, builtin_defs[idx].name)) {
			ddef = &builtin_defs[idx];
			break;
		}
	}
	if (!ddef)
		goto done;

	namelen = strlen(ddef->name);

	drv = git__calloc(1, sizeof(git_diff_driver) + namelen + 1);
	GITERR_CHECK_ALLOC(drv);

	drv->type = DIFF_DRIVER_PATTERNLIST;
	memcpy(drv->name, ddef->name, namelen);

	if (ddef->fns &&
		(error = diff_driver_add_patterns(
			drv, ddef->fns, ddef->flags | REG_EXTENDED)) < 0)
		goto done;

	if (ddef->words &&
		(error = regcomp(
			&drv->word_pattern, ddef->words, ddef->flags | REG_EXTENDED)))
	{
		error = giterr_set_regex(&drv->word_pattern, error);
		goto done;
	}

	git_strmap_insert(reg->drivers, drv->name, drv, error);
202 203
	if (error > 0)
		error = 0;
204 205

done:
206
	if (error && drv)
207
		git_diff_driver_free(drv);
208
	else
209 210 211 212 213
		*out = drv;

	return error;
}

214
static int git_diff_driver_load(
215
	git_diff_driver **out, git_repository *repo, const char *driver_name)
216
{
217
	int error = 0;
218
	git_diff_driver_registry *reg;
219
	git_diff_driver *drv = NULL;
220 221
	size_t namelen = strlen(driver_name);
	khiter_t pos;
222 223
	git_config *cfg;
	git_buf name = GIT_BUF_INIT;
224
	const git_config_entry *ce;
225
	bool found_driver = false;
226

227
	if ((reg = git_repository_driver_registry(repo)) == NULL)
228
		return -1;
229 230 231 232 233

	pos = git_strmap_lookup_index(reg->drivers, driver_name);
	if (git_strmap_valid_index(reg->drivers, pos)) {
		*out = git_strmap_value_at(reg->drivers, pos);
		return 0;
234 235 236 237 238
	}

	/* if you can't read config for repo, just use default driver */
	if (git_repository_config__weakptr(&cfg, repo) < 0) {
		giterr_clear();
239
		goto done;
240 241
	}

242
	drv = git__calloc(1, sizeof(git_diff_driver) + namelen + 1);
243 244
	GITERR_CHECK_ALLOC(drv);
	drv->type = DIFF_DRIVER_AUTO;
245
	memcpy(drv->name, driver_name, namelen);
246 247

	if ((error = git_buf_printf(&name, "diff.%s.binary", driver_name)) < 0)
248
		goto done;
249 250 251

	switch (git_config__get_bool_force(cfg, name.ptr, -1)) {
	case true:
252 253
		/* if diff.<driver>.binary is true, just return the binary driver */
		*out = &global_drivers[DIFF_DRIVER_BINARY];
254
		goto done;
255
	case false:
256 257 258
		/* if diff.<driver>.binary is false, force binary checks off */
		/* but still may have custom function context patterns, etc. */
		drv->binary_flags = GIT_DIFF_FORCE_TEXT;
259
		found_driver = true;
260 261
		break;
	default:
262
		/* diff.<driver>.binary unspecified or "auto", so just continue */
263
		break;
264 265 266 267
	}

	/* TODO: warn if diff.<name>.command or diff.<name>.textconv are set */

268 269
	git_buf_truncate(&name, namelen + strlen("diff.."));
	git_buf_put(&name, "xfuncname", strlen("xfuncname"));
270
	if ((error = git_config_get_multivar_foreach(
271 272
			cfg, name.ptr, NULL, diff_driver_xfuncname, drv)) < 0) {
		if (error != GIT_ENOTFOUND)
273
			goto done;
274
		giterr_clear(); /* no diff.<driver>.xfuncname, so just continue */
275
	}
276

277 278
	git_buf_truncate(&name, namelen + strlen("diff.."));
	git_buf_put(&name, "funcname", strlen("funcname"));
279
	if ((error = git_config_get_multivar_foreach(
280 281
			cfg, name.ptr, NULL, diff_driver_funcname, drv)) < 0) {
		if (error != GIT_ENOTFOUND)
282
			goto done;
283
		giterr_clear(); /* no diff.<driver>.funcname, so just continue */
284 285 286
	}

	/* if we found any patterns, set driver type to use correct callback */
287
	if (git_array_size(drv->fn_patterns) > 0) {
288
		drv->type = DIFF_DRIVER_PATTERNLIST;
289 290
		found_driver = true;
	}
291

292 293
	git_buf_truncate(&name, namelen + strlen("diff.."));
	git_buf_put(&name, "wordregex", strlen("wordregex"));
294
	if ((error = git_config__lookup_entry(&ce, cfg, name.ptr, false)) < 0)
295
		goto done;
296 297 298
	if (!ce || !ce->value)
		/* no diff.<driver>.wordregex, so just continue */;
	else if (!(error = regcomp(&drv->word_pattern, ce->value, REG_EXTENDED)))
299
		found_driver = true;
300 301 302 303
	else {
		/* TODO: warn about bad regex instead of failure */
		error = giterr_set_regex(&drv->word_pattern, error);
		goto done;
304 305 306 307 308 309
	}

	/* TODO: look up diff.<driver>.algorithm to turn on minimal / patience
	 * diff in drv->other_flags
	 */

310
	/* if no driver config found at all, fall back on AUTO driver */
311
	if (!found_driver)
312
		goto done;
313 314 315 316

	/* store driver in registry */
	git_strmap_insert(reg->drivers, drv->name, drv, error);
	if (error < 0)
317
		goto done;
318
	error = 0;
319

320 321
	*out = drv;

322 323 324
done:
	git_buf_free(&name);

325 326 327 328 329
	if (!*out) {
		int error2 = git_diff_driver_builtin(out, reg, driver_name);
		if (!error)
			error = error2;
	}
330 331 332 333

	if (drv && drv != *out)
		git_diff_driver_free(drv);

334
	return error;
335 336
}

337 338 339
int git_diff_driver_lookup(
	git_diff_driver **out, git_repository *repo, const char *path)
{
340
	int error = 0;
341 342 343
	const char *value;

	assert(out);
344
	*out = NULL;
345 346

	if (!repo || !path || !strlen(path))
347 348 349 350
		/* just use the auto value */;
	else if ((error = git_attr_get(&value, repo, 0, path, "diff")) < 0)
		/* return error below */;
	else if (GIT_ATTR_UNSPECIFIED(value))
351 352
		/* just use the auto value */;
	else if (GIT_ATTR_FALSE(value))
353
		*out = &global_drivers[DIFF_DRIVER_BINARY];
354
	else if (GIT_ATTR_TRUE(value))
355
		*out = &global_drivers[DIFF_DRIVER_TEXT];
356 357

	/* otherwise look for driver information in config and build driver */
358
	else if ((error = git_diff_driver_load(out, repo, value)) < 0) {
359 360
		if (error == GIT_ENOTFOUND) {
			error = 0;
361
			giterr_clear();
362
		}
363
	}
364

365 366 367
	if (!*out)
		*out = &global_drivers[DIFF_DRIVER_AUTO];

368
	return error;
369 370 371 372
}

void git_diff_driver_free(git_diff_driver *driver)
{
373 374 375 376 377
	size_t i;

	if (!driver)
		return;

378
	for (i = 0; i < git_array_size(driver->fn_patterns); ++i)
379
		regfree(& git_array_get(driver->fn_patterns, i)->re);
380 381 382 383 384
	git_array_clear(driver->fn_patterns);

	regfree(&driver->word_pattern);

	git__free(driver);
385 386
}

387 388
void git_diff_driver_update_options(
	uint32_t *option_flags, git_diff_driver *driver)
389
{
390 391 392 393
	if ((*option_flags & FORCE_DIFFABLE) == 0)
		*option_flags |= driver->binary_flags;

	*option_flags |= driver->other_flags;
394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414
}

int git_diff_driver_content_is_binary(
	git_diff_driver *driver, const char *content, size_t content_len)
{
	const git_buf search = { (char *)content, 0, min(content_len, 4000) };

	GIT_UNUSED(driver);

	/* TODO: provide encoding / binary detection callbacks that can
	 * be UTF-8 aware, etc.  For now, instead of trying to be smart,
	 * let's just use the simple NUL-byte detection that core git uses.
	 */

	/* previously was: if (git_buf_text_is_binary(&search)) */
	if (git_buf_text_contains_nul(&search))
		return 1;

	return 0;
}

415
static int diff_context_line__simple(
416
	git_diff_driver *driver, git_buf *line)
417
{
418
	char firstch = line->ptr[0];
419
	GIT_UNUSED(driver);
420
	return (git__isalpha(firstch) || firstch == '_' || firstch == '$');
421 422 423
}

static int diff_context_line__pattern_match(
424
	git_diff_driver *driver, git_buf *line)
425
{
426
	size_t i, maxi = git_array_size(driver->fn_patterns);
427
	regmatch_t pmatch[2];
428

429
	for (i = 0; i < maxi; ++i) {
430 431 432 433 434
		git_diff_driver_pattern *pat = git_array_get(driver->fn_patterns, i);

		if (!regexec(&pat->re, line->ptr, 2, pmatch, 0)) {
			if (pat->flags & REG_NEGATE)
				return false;
435 436 437 438 439

			/* use pmatch data to trim line data */
			i = (pmatch[1].rm_so >= 0) ? 1 : 0;
			git_buf_consume(line, git_buf_cstr(line) + pmatch[i].rm_so);
			git_buf_truncate(line, pmatch[i].rm_eo - pmatch[i].rm_so);
440
			git_buf_rtrim(line);
441

442
			return true;
443
		}
444 445 446 447 448
	}

	return false;
}

449 450 451 452 453 454 455
static long diff_context_find(
	const char *line,
	long line_len,
	char *out,
	long out_size,
	void *payload)
{
456
	git_diff_find_context_payload *ctxt = payload;
457

458
	if (git_buf_set(&ctxt->line, line, (size_t)line_len) < 0)
459
		return -1;
460
	git_buf_rtrim(&ctxt->line);
461

462
	if (!ctxt->line.size)
463 464
		return -1;

465
	if (!ctxt->match_line || !ctxt->match_line(ctxt->driver, &ctxt->line))
466
		return -1;
467

468
	if (out_size > (long)ctxt->line.size)
Russell Belfer committed
469
		out_size = (long)ctxt->line.size;
470
	memcpy(out, ctxt->line.ptr, (size_t)out_size);
471

472
	return out_size;
473
}
474

475 476 477 478 479 480 481 482 483 484 485 486 487 488
void git_diff_find_context_init(
	git_diff_find_context_fn *findfn_out,
	git_diff_find_context_payload *payload_out,
	git_diff_driver *driver)
{
	*findfn_out = driver ? diff_context_find : NULL;

	memset(payload_out, 0, sizeof(*payload_out));
	if (driver) {
		payload_out->driver = driver;
		payload_out->match_line = (driver->type == DIFF_DRIVER_PATTERNLIST) ?
			diff_context_line__pattern_match : diff_context_line__simple;
		git_buf_init(&payload_out->line, 0);
	}
489 490
}

491
void git_diff_find_context_clear(git_diff_find_context_payload *payload)
492
{
493 494 495 496
	if (payload) {
		git_buf_free(&payload->line);
		payload->driver = NULL;
	}
497 498
}