crlf.c 9.97 KB
Newer Older
1
/*
Edward Thomson committed
2
 * Copyright (C) the libgit2 contributors. All rights reserved.
3 4 5 6 7
 *
 * This file is part of libgit2, distributed under the GNU GPL v2 with
 * a Linking Exception. For full terms see the included COPYING file.
 */

8 9
#include "common.h"

10 11 12
#include "git2/attr.h"
#include "git2/blob.h"
#include "git2/index.h"
13
#include "git2/sys/filter.h"
14

15
#include "buf.h"
16
#include "futils.h"
17 18 19 20
#include "hash.h"
#include "filter.h"
#include "repository.h"

21 22 23 24 25 26 27 28
typedef enum {
	GIT_CRLF_UNDEFINED,
	GIT_CRLF_BINARY,
	GIT_CRLF_TEXT,
	GIT_CRLF_TEXT_INPUT,
	GIT_CRLF_TEXT_CRLF,
	GIT_CRLF_AUTO,
	GIT_CRLF_AUTO_INPUT,
29
	GIT_CRLF_AUTO_CRLF
30 31
} git_crlf_t;

32
struct crlf_attrs {
33 34 35
	int attr_action; /* the .gitattributes setting */
	int crlf_action; /* the core.autocrlf setting */

36
	int auto_crlf;
37
	int safe_crlf;
38
	int core_eol;
39 40 41 42 43 44
};

struct crlf_filter {
	git_filter f;
};

45
static git_crlf_t check_crlf(const char *value)
46
{
47
	if (GIT_ATTR_IS_TRUE(value))
48
		return GIT_CRLF_TEXT;
49
	else if (GIT_ATTR_IS_FALSE(value))
50
		return GIT_CRLF_BINARY;
51
	else if (GIT_ATTR_IS_UNSPECIFIED(value))
52 53 54 55
		;
	else if (strcmp(value, "input") == 0)
		return GIT_CRLF_TEXT_INPUT;
	else if (strcmp(value, "auto") == 0)
56 57
		return GIT_CRLF_AUTO;

58
	return GIT_CRLF_UNDEFINED;
59 60
}

61
static git_configmap_value check_eol(const char *value)
62
{
63
	if (GIT_ATTR_IS_UNSPECIFIED(value))
64 65
		;
	else if (strcmp(value, "lf") == 0)
66
		return GIT_EOL_LF;
67
	else if (strcmp(value, "crlf") == 0)
68 69 70 71 72
		return GIT_EOL_CRLF;

	return GIT_EOL_UNSET;
}

73
static int has_cr_in_index(const git_filter_source *src)
74
{
75 76
	git_repository *repo = git_filter_source_repo(src);
	const char *path = git_filter_source_path(src);
77 78 79 80
	git_index *index;
	const git_index_entry *entry;
	git_blob *blob;
	const void *blobcontent;
81
	git_object_size_t blobsize;
82 83
	bool found_cr;

84 85 86
	if (!path)
		return false;

87
	if (git_repository_index__weakptr(&index, repo) < 0) {
88
		git_error_clear();
89 90 91
		return false;
	}

92 93
	if (!(entry = git_index_get_bypath(index, path, 0)) &&
		!(entry = git_index_get_bypath(index, path, 1)))
94 95 96 97 98
		return false;

	if (!S_ISREG(entry->mode)) /* don't crlf filter non-blobs */
		return true;

99
	if (git_blob_lookup(&blob, repo, &entry->id) < 0)
100 101 102 103 104 105 106 107 108 109 110 111 112 113 114
		return false;

	blobcontent = git_blob_rawcontent(blob);
	blobsize    = git_blob_rawsize(blob);
	if (!git__is_sizet(blobsize))
		blobsize = (size_t)-1;

	found_cr = (blobcontent != NULL &&
		blobsize > 0 &&
		memchr(blobcontent, '\r', (size_t)blobsize) != NULL);

	git_blob_free(blob);
	return found_cr;
}

115
static int text_eol_is_crlf(struct crlf_attrs *ca)
116
{
117 118 119
	if (ca->auto_crlf == GIT_AUTO_CRLF_TRUE)
		return 1;
	else if (ca->auto_crlf == GIT_AUTO_CRLF_INPUT)
120 121
		return 0;

122 123 124 125
	if (ca->core_eol == GIT_EOL_CRLF)
		return 1;
	if (ca->core_eol == GIT_EOL_UNSET && GIT_EOL_NATIVE == GIT_EOL_CRLF)
		return 1;
126

127 128
	return 0;
}
129

130
static git_configmap_value output_eol(struct crlf_attrs *ca)
131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
{
	switch (ca->crlf_action) {
	case GIT_CRLF_BINARY:
		return GIT_EOL_UNSET;
	case GIT_CRLF_TEXT_CRLF:
		return GIT_EOL_CRLF;
	case GIT_CRLF_TEXT_INPUT:
		return GIT_EOL_LF;
	case GIT_CRLF_UNDEFINED:
	case GIT_CRLF_AUTO_CRLF:
		return GIT_EOL_CRLF;
	case GIT_CRLF_AUTO_INPUT:
		return GIT_EOL_LF;
	case GIT_CRLF_TEXT:
	case GIT_CRLF_AUTO:
		return text_eol_is_crlf(ca) ? GIT_EOL_CRLF : GIT_EOL_LF;
	}

	/* TODO: warn when available */
	return ca->core_eol;
}

GIT_INLINE(int) check_safecrlf(
	struct crlf_attrs *ca,
	const git_filter_source *src,
156
	git_str_text_stats *stats)
157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172
{
	const char *filename = git_filter_source_path(src);

	if (!ca->safe_crlf)
		return 0;

	if (output_eol(ca) == GIT_EOL_LF) {
		/*
		 * CRLFs would not be restored by checkout:
		 * check if we'd remove CRLFs
		 */
		if (stats->crlf) {
			if (ca->safe_crlf == GIT_SAFE_CRLF_WARN) {
				/* TODO: issue a warning when available */
			} else {
				if (filename && *filename)
173 174
					git_error_set(
						GIT_ERROR_FILTER, "CRLF would be replaced by LF in '%s'",
175 176
						filename);
				else
177 178
					git_error_set(
						GIT_ERROR_FILTER, "CRLF would be replaced by LF");
179

180 181
				return -1;
			}
182
		}
183
	} else if (output_eol(ca) == GIT_EOL_CRLF) {
184
		/*
185 186
		 * CRLFs would be added by checkout:
		 * check if we have "naked" LFs
187
		 */
188 189 190 191 192
		if (stats->crlf != stats->lf) {
			if (ca->safe_crlf == GIT_SAFE_CRLF_WARN) {
				/* TODO: issue a warning when available */
			} else {
				if (filename && *filename)
193 194
					git_error_set(
						GIT_ERROR_FILTER, "LF would be replaced by CRLF in '%s'",
195 196
						filename);
				else
197 198
					git_error_set(
						GIT_ERROR_FILTER, "LF would be replaced by CRLF");
199

200 201
				return -1;
			}
202 203 204
		}
	}

205
	return 0;
Ben Straub committed
206 207
}

208 209
static int crlf_apply_to_odb(
	struct crlf_attrs *ca,
210 211
	git_str *to,
	const git_str *from,
212
	const git_filter_source *src)
Ben Straub committed
213
{
214
	git_str_text_stats stats;
215 216
	bool is_binary;
	int error;
Ben Straub committed
217

218
	/* Binary attribute? Empty file? Nothing to do */
219
	if (ca->crlf_action == GIT_CRLF_BINARY || from->size == 0)
220
		return GIT_PASSTHROUGH;
Ben Straub committed
221

222
	is_binary = git_str_gather_text_stats(&stats, from, false);
223

224 225 226 227 228 229
	/* Heuristics to see if we can skip the conversion.
	 * Straight from Core Git.
	 */
	if (ca->crlf_action == GIT_CRLF_AUTO ||
		ca->crlf_action == GIT_CRLF_AUTO_INPUT ||
		ca->crlf_action == GIT_CRLF_AUTO_CRLF) {
Ben Straub committed
230

231 232 233 234 235 236 237 238 239
		if (is_binary)
			return GIT_PASSTHROUGH;

		/*
		 * If the file in the index has any CR in it, do not convert.
		 * This is the new safer autocrlf handling.
		 */
		if (has_cr_in_index(src))
			return GIT_PASSTHROUGH;
Ben Straub committed
240 241
	}

242 243 244 245 246 247 248 249
	if ((error = check_safecrlf(ca, src, &stats)) < 0)
		return error;

	/* If there are no CR characters to filter out, then just pass */
	if (!stats.crlf)
		return GIT_PASSTHROUGH;

	/* Actually drop the carriage returns */
250
	return git_str_crlf_to_lf(to, from);
Ben Straub committed
251 252
}

253
static int crlf_apply_to_workdir(
254
	struct crlf_attrs *ca,
255 256
	git_str *to,
	const git_str *from)
Ben Straub committed
257
{
258
	git_str_text_stats stats;
259
	bool is_binary;
Ben Straub committed
260 261

	/* Empty file? Nothing to do. */
262
	if (git_str_len(from) == 0 || output_eol(ca) != GIT_EOL_CRLF)
Edward Thomson committed
263
		return GIT_PASSTHROUGH;
264

265
	is_binary = git_str_gather_text_stats(&stats, from, false);
266

267
	/* If there are no LFs, or all LFs are part of a CRLF, nothing to do */
268 269 270 271
	if (stats.lf == 0 || stats.lf == stats.crlf)
		return GIT_PASSTHROUGH;

	if (ca->crlf_action == GIT_CRLF_AUTO ||
272 273
		ca->crlf_action == GIT_CRLF_AUTO_INPUT ||
		ca->crlf_action == GIT_CRLF_AUTO_CRLF) {
274 275

		/* If we have any existing CR or CRLF line endings, do nothing */
276
		if (stats.cr > 0)
277 278 279 280 281 282 283
			return GIT_PASSTHROUGH;

		/* Don't filter binary files */
		if (is_binary)
			return GIT_PASSTHROUGH;
	}

284
	return git_str_lf_to_crlf(to, from);
Ben Straub committed
285 286
}

287 288 289 290
static int convert_attrs(
	struct crlf_attrs *ca,
	const char **attr_values,
	const git_filter_source *src)
291 292
{
	int error;
293

294 295
	memset(ca, 0, sizeof(struct crlf_attrs));

296 297 298 299 300 301
	if ((error = git_repository__configmap_lookup(&ca->auto_crlf,
		 git_filter_source_repo(src), GIT_CONFIGMAP_AUTO_CRLF)) < 0 ||
		(error = git_repository__configmap_lookup(&ca->safe_crlf,
		 git_filter_source_repo(src), GIT_CONFIGMAP_SAFE_CRLF)) < 0 ||
		(error = git_repository__configmap_lookup(&ca->core_eol,
		 git_filter_source_repo(src), GIT_CONFIGMAP_EOL)) < 0)
302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328
		return error;

	/* downgrade FAIL to WARN if ALLOW_UNSAFE option is used */
	if ((git_filter_source_flags(src) & GIT_FILTER_ALLOW_UNSAFE) &&
		ca->safe_crlf == GIT_SAFE_CRLF_FAIL)
		ca->safe_crlf = GIT_SAFE_CRLF_WARN;

	if (attr_values) {
		/* load the text attribute */
		ca->crlf_action = check_crlf(attr_values[2]); /* text */

		if (ca->crlf_action == GIT_CRLF_UNDEFINED)
			ca->crlf_action = check_crlf(attr_values[0]); /* crlf */

		if (ca->crlf_action != GIT_CRLF_BINARY) {
			/* load the eol attribute */
			int eol_attr = check_eol(attr_values[1]);

			if (ca->crlf_action == GIT_CRLF_AUTO && eol_attr == GIT_EOL_LF)
				ca->crlf_action = GIT_CRLF_AUTO_INPUT;
			else if (ca->crlf_action == GIT_CRLF_AUTO && eol_attr == GIT_EOL_CRLF)
				ca->crlf_action = GIT_CRLF_AUTO_CRLF;
			else if (eol_attr == GIT_EOL_LF)
				ca->crlf_action = GIT_CRLF_TEXT_INPUT;
			else if (eol_attr == GIT_EOL_CRLF)
				ca->crlf_action = GIT_CRLF_TEXT_CRLF;
		}
329

330
		ca->attr_action = ca->crlf_action;
331
	} else {
332
		ca->crlf_action = GIT_CRLF_UNDEFINED;
333
	}
334

335 336 337 338 339 340 341 342
	if (ca->crlf_action == GIT_CRLF_TEXT)
		ca->crlf_action = text_eol_is_crlf(ca) ? GIT_CRLF_TEXT_CRLF : GIT_CRLF_TEXT_INPUT;
	if (ca->crlf_action == GIT_CRLF_UNDEFINED && ca->auto_crlf == GIT_AUTO_CRLF_FALSE)
		ca->crlf_action = GIT_CRLF_BINARY;
	if (ca->crlf_action == GIT_CRLF_UNDEFINED && ca->auto_crlf == GIT_AUTO_CRLF_TRUE)
		ca->crlf_action = GIT_CRLF_AUTO_CRLF;
	if (ca->crlf_action == GIT_CRLF_UNDEFINED && ca->auto_crlf == GIT_AUTO_CRLF_INPUT)
		ca->crlf_action = GIT_CRLF_AUTO_INPUT;
343

344 345
	return 0;
}
346

347 348 349 350 351 352 353
static int crlf_check(
	git_filter *self,
	void **payload, /* points to NULL ptr on entry, may be set */
	const git_filter_source *src,
	const char **attr_values)
{
	struct crlf_attrs ca;
354

355
	GIT_UNUSED(self);
356

357
	convert_attrs(&ca, attr_values, src);
358

359 360
	if (ca.crlf_action == GIT_CRLF_BINARY)
		return GIT_PASSTHROUGH;
361

362
	*payload = git__malloc(sizeof(ca));
363
	GIT_ERROR_CHECK_ALLOC(*payload);
364
	memcpy(*payload, &ca, sizeof(ca));
365

366 367 368 369
	return 0;
}

static int crlf_apply(
370 371
	git_filter *self,
	void **payload, /* may be read and/or set */
372 373
	git_str *to,
	const git_str *from,
374 375
	const git_filter_source *src)
{
376 377
	int error = 0;

378 379
	/* initialize payload in case `check` was bypassed */
	if (!*payload) {
380
		if ((error = crlf_check(self, payload, src, NULL)) < 0)
381 382
			return error;
	}
383

384
	if (git_filter_source_mode(src) == GIT_FILTER_SMUDGE)
385
		error = crlf_apply_to_workdir(*payload, to, from);
386
	else
387 388 389
		error = crlf_apply_to_odb(*payload, to, from, src);

	return error;
390 391
}

392 393 394 395 396 397 398 399 400 401 402
static int crlf_stream(
	git_writestream **out,
	git_filter *self,
	void **payload,
	const git_filter_source *src,
	git_writestream *next)
{
	return git_filter_buffered_stream_new(out,
		self, crlf_apply, NULL, payload, src, next);
}

403 404 405
static void crlf_cleanup(
	git_filter *self,
	void       *payload)
406
{
407 408
	GIT_UNUSED(self);
	git__free(payload);
409 410
}

411
git_filter *git_crlf_filter_new(void)
412
{
413
	struct crlf_filter *f = git__calloc(1, sizeof(struct crlf_filter));
Jacques Germishuys committed
414 415
	if (f == NULL)
		return NULL;
416

417
	f->f.version = GIT_FILTER_VERSION;
418 419
	f->f.attributes = "crlf eol text";
	f->f.initialize = NULL;
Russell Belfer committed
420
	f->f.shutdown = git_filter_free;
421
	f->f.check    = crlf_check;
422
	f->f.stream   = crlf_stream;
423
	f->f.cleanup  = crlf_cleanup;
424

425
	return (git_filter *)f;
426
}