crlf.c 7.93 KB
Newer Older
1
/*
Edward Thomson committed
2
 * Copyright (C) the libgit2 contributors. All rights reserved.
3 4 5 6 7
 *
 * This file is part of libgit2, distributed under the GNU GPL v2 with
 * a Linking Exception. For full terms see the included COPYING file.
 */

8 9 10
#include "git2/attr.h"
#include "git2/blob.h"
#include "git2/index.h"
11
#include "git2/sys/filter.h"
12

13 14 15 16
#include "common.h"
#include "fileops.h"
#include "hash.h"
#include "filter.h"
17
#include "buf_text.h"
18 19 20 21 22
#include "repository.h"

struct crlf_attrs {
	int crlf_action;
	int eol;
23
	int auto_crlf;
24
	int safe_crlf;
25 26 27 28 29 30 31 32
};

struct crlf_filter {
	git_filter f;
};

static int check_crlf(const char *value)
{
33
	if (GIT_ATTR_TRUE(value))
34 35
		return GIT_CRLF_TEXT;

36
	if (GIT_ATTR_FALSE(value))
37 38
		return GIT_CRLF_BINARY;

39
	if (GIT_ATTR_UNSPECIFIED(value))
40 41 42 43 44 45 46 47 48 49 50 51 52
		return GIT_CRLF_GUESS;

	if (strcmp(value, "input") == 0)
		return GIT_CRLF_INPUT;

	if (strcmp(value, "auto") == 0)
		return GIT_CRLF_AUTO;

	return GIT_CRLF_GUESS;
}

static int check_eol(const char *value)
{
53
	if (GIT_ATTR_UNSPECIFIED(value))
54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78
		return GIT_EOL_UNSET;

	if (strcmp(value, "lf") == 0)
		return GIT_EOL_LF;

	if (strcmp(value, "crlf") == 0)
		return GIT_EOL_CRLF;

	return GIT_EOL_UNSET;
}

static int crlf_input_action(struct crlf_attrs *ca)
{
	if (ca->crlf_action == GIT_CRLF_BINARY)
		return GIT_CRLF_BINARY;

	if (ca->eol == GIT_EOL_LF)
		return GIT_CRLF_INPUT;

	if (ca->eol == GIT_EOL_CRLF)
		return GIT_CRLF_CRLF;

	return ca->crlf_action;
}

79
static int has_cr_in_index(const git_filter_source *src)
80
{
81 82
	git_repository *repo = git_filter_source_repo(src);
	const char *path = git_filter_source_path(src);
83 84 85 86 87 88 89
	git_index *index;
	const git_index_entry *entry;
	git_blob *blob;
	const void *blobcontent;
	git_off_t blobsize;
	bool found_cr;

90 91 92
	if (!path)
		return false;

93
	if (git_repository_index__weakptr(&index, repo) < 0) {
94 95 96 97
		giterr_clear();
		return false;
	}

98 99
	if (!(entry = git_index_get_bypath(index, path, 0)) &&
		!(entry = git_index_get_bypath(index, path, 1)))
100 101 102 103 104
		return false;

	if (!S_ISREG(entry->mode)) /* don't crlf filter non-blobs */
		return true;

105
	if (git_blob_lookup(&blob, repo, &entry->id) < 0)
106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121
		return false;

	blobcontent = git_blob_rawcontent(blob);
	blobsize    = git_blob_rawsize(blob);
	if (!git__is_sizet(blobsize))
		blobsize = (size_t)-1;

	found_cr = (blobcontent != NULL &&
		blobsize > 0 &&
		memchr(blobcontent, '\r', (size_t)blobsize) != NULL);

	git_blob_free(blob);
	return found_cr;
}

static int crlf_apply_to_odb(
122
	struct crlf_attrs *ca,
123 124
	git_buf *to,
	const git_buf *from,
125
	const git_filter_source *src)
126 127
{
	/* Empty file? Nothing to do */
128
	if (!git_buf_len(from))
129 130 131 132 133
		return 0;

	/* Heuristics to see if we can skip the conversion.
	 * Straight from Core Git.
	 */
134
	if (ca->crlf_action == GIT_CRLF_AUTO || ca->crlf_action == GIT_CRLF_GUESS) {
135 136
		git_buf_text_stats stats;

137
		/* Check heuristics for binary vs text - returns true if binary */
138
		if (git_buf_text_gather_stats(&stats, from, false))
139
			return GIT_PASSTHROUGH;
140

141 142 143
		/* If there are no CR characters to filter out, then just pass */
		if (!stats.cr)
			return GIT_PASSTHROUGH;
144

145 146
		/* If safecrlf is enabled, sanity-check the result. */
		if (stats.cr != stats.crlf || stats.lf != stats.crlf) {
147 148 149 150 151 152 153 154 155 156 157 158
			switch (ca->safe_crlf) {
			case GIT_SAFE_CRLF_FAIL:
				giterr_set(
					GITERR_FILTER, "LF would be replaced by CRLF in '%s'",
					git_filter_source_path(src));
				return -1;
			case GIT_SAFE_CRLF_WARN:
				/* TODO: issue warning when warning API is available */;
				break;
			default:
				break;
			}
159 160
		}

161 162 163 164 165 166
		/*
		 * We're currently not going to even try to convert stuff
		 * that has bare CR characters. Does anybody do that crazy
		 * stuff?
		 */
		if (stats.cr != stats.crlf)
Russell Belfer committed
167
			return GIT_PASSTHROUGH;
168

169
		if (ca->crlf_action == GIT_CRLF_GUESS) {
170 171 172 173
			/*
			 * If the file in the index has any CR in it, do not convert.
			 * This is the new safer autocrlf handling.
			 */
174
			if (has_cr_in_index(src))
Russell Belfer committed
175
				return GIT_PASSTHROUGH;
176 177
		}

178
		if (!stats.cr)
Russell Belfer committed
179
			return GIT_PASSTHROUGH;
180 181
	}

182
	/* Actually drop the carriage returns */
183
	return git_buf_text_crlf_to_lf(to, from);
Ben Straub committed
184 185
}

186
static const char *line_ending(struct crlf_attrs *ca)
Ben Straub committed
187
{
188
	switch (ca->crlf_action) {
Ben Straub committed
189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204
	case GIT_CRLF_BINARY:
	case GIT_CRLF_INPUT:
		return "\n";

	case GIT_CRLF_CRLF:
		return "\r\n";

	case GIT_CRLF_AUTO:
	case GIT_CRLF_TEXT:
	case GIT_CRLF_GUESS:
		break;

	default:
		goto line_ending_error;
	}

205
	switch (ca->eol) {
Ben Straub committed
206
	case GIT_EOL_UNSET:
207
		return GIT_EOL_NATIVE == GIT_EOL_CRLF ? "\r\n" : "\n";
Ben Straub committed
208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223

	case GIT_EOL_CRLF:
		return "\r\n";

	case GIT_EOL_LF:
		return "\n";

	default:
		goto line_ending_error;
	}

line_ending_error:
	giterr_set(GITERR_INVALID, "Invalid input to line ending filter");
	return NULL;
}

224
static int crlf_apply_to_workdir(
225
	struct crlf_attrs *ca, git_buf *to, const git_buf *from)
Ben Straub committed
226 227 228 229
{
	const char *workdir_ending = NULL;

	/* Empty file? Nothing to do. */
230
	if (git_buf_len(from) == 0)
231
		return 0;
Ben Straub committed
232

233 234
	/* Don't filter binary files */
	if (git_buf_text_is_binary(from))
Russell Belfer committed
235
		return GIT_PASSTHROUGH;
236

Ben Straub committed
237
	/* Determine proper line ending */
238
	workdir_ending = line_ending(ca);
239 240
	if (!workdir_ending)
		return -1;
Ben Straub committed
241

Edward Thomson committed
242 243 244
	/* only LF->CRLF conversion is supported, do nothing on LF platforms */
	if (strcmp(workdir_ending, "\r\n") != 0)
		return GIT_PASSTHROUGH;
245

Edward Thomson committed
246
	return git_buf_text_lf_to_crlf(to, from);
Ben Straub committed
247 248
}

249 250 251
static int crlf_check(
	git_filter        *self,
	void              **payload, /* points to NULL ptr on entry, may be set */
252 253
	const git_filter_source *src,
	const char **attr_values)
254 255
{
	int error;
256 257 258
	struct crlf_attrs ca;

	GIT_UNUSED(self);
259

260 261 262 263 264 265 266 267 268 269
	if (!attr_values) {
		ca.crlf_action = GIT_CRLF_GUESS;
		ca.eol = GIT_EOL_UNSET;
	} else {
		ca.crlf_action = check_crlf(attr_values[2]); /* text */
		if (ca.crlf_action == GIT_CRLF_GUESS)
			ca.crlf_action = check_crlf(attr_values[0]); /* clrf */
		ca.eol = check_eol(attr_values[1]); /* eol */
	}
	ca.auto_crlf = GIT_AUTO_CRLF_DEFAULT;
270

271 272
	/*
	 * Use the core Git logic to see if we should perform CRLF for this file
273
	 * based on its attributes & the value of `core.autocrlf`
274 275
	 */
	ca.crlf_action = crlf_input_action(&ca);
276

277
	if (ca.crlf_action == GIT_CRLF_BINARY)
Russell Belfer committed
278
		return GIT_PASSTHROUGH;
279

280 281 282
	if (ca.crlf_action == GIT_CRLF_GUESS ||
		(ca.crlf_action == GIT_CRLF_AUTO &&
		git_filter_source_mode(src) == GIT_FILTER_SMUDGE)) {
283

284 285 286
		error = git_repository__cvar(
			&ca.auto_crlf, git_filter_source_repo(src), GIT_CVAR_AUTO_CRLF);
		if (error < 0)
287 288
			return error;

289 290
		if (ca.crlf_action == GIT_CRLF_GUESS &&
			ca.auto_crlf == GIT_AUTO_CRLF_FALSE)
Russell Belfer committed
291
			return GIT_PASSTHROUGH;
292 293 294 295

		if (ca.auto_crlf == GIT_AUTO_CRLF_INPUT &&
			git_filter_source_mode(src) == GIT_FILTER_SMUDGE)
			return GIT_PASSTHROUGH;
296
	}
297

298 299 300 301 302
	if (git_filter_source_mode(src) == GIT_FILTER_CLEAN) {
		error = git_repository__cvar(
			&ca.safe_crlf, git_filter_source_repo(src), GIT_CVAR_SAFE_CRLF);
		if (error < 0)
			return error;
303 304 305 306 307

		/* downgrade FAIL to WARN if ALLOW_UNSAFE option is used */
		if ((git_filter_source_options(src) & GIT_FILTER_OPT_ALLOW_UNSAFE) &&
			ca.safe_crlf == GIT_SAFE_CRLF_FAIL)
			ca.safe_crlf = GIT_SAFE_CRLF_WARN;
308 309
	}

310 311 312
	*payload = git__malloc(sizeof(ca));
	GITERR_CHECK_ALLOC(*payload);
	memcpy(*payload, &ca, sizeof(ca));
313

314 315 316 317
	return 0;
}

static int crlf_apply(
318 319 320 321
	git_filter    *self,
	void         **payload, /* may be read and/or set */
	git_buf       *to,
	const git_buf *from,
322 323
	const git_filter_source *src)
{
324 325 326
	/* initialize payload in case `check` was bypassed */
	if (!*payload) {
		int error = crlf_check(self, payload, src, NULL);
Russell Belfer committed
327
		if (error < 0 && error != GIT_PASSTHROUGH)
328 329
			return error;
	}
330

331
	if (git_filter_source_mode(src) == GIT_FILTER_SMUDGE)
332 333 334
		return crlf_apply_to_workdir(*payload, to, from);
	else
		return crlf_apply_to_odb(*payload, to, from, src);
335 336
}

337 338 339
static void crlf_cleanup(
	git_filter *self,
	void       *payload)
340
{
341 342
	GIT_UNUSED(self);
	git__free(payload);
343 344
}

345
git_filter *git_crlf_filter_new(void)
346
{
347
	struct crlf_filter *f = git__calloc(1, sizeof(struct crlf_filter));
348

349
	f->f.version = GIT_FILTER_VERSION;
350 351
	f->f.attributes = "crlf eol text";
	f->f.initialize = NULL;
Russell Belfer committed
352
	f->f.shutdown = git_filter_free;
353 354 355
	f->f.check    = crlf_check;
	f->f.apply    = crlf_apply;
	f->f.cleanup  = crlf_cleanup;
356

357
	return (git_filter *)f;
358
}