Commit ef8f8ec6 by Edward Thomson

crlf: update to match git's logic

Examine the recent CRLF changes to git by Torsten Bögershausen and
include similar changes to update our CRLF logic to match.

Note: Torsten Bögershausen has previously agreed to allow his changes to
be included in libgit2.
parent 59b054cb
...@@ -310,6 +310,7 @@ bool git_buf_text_gather_stats( ...@@ -310,6 +310,7 @@ bool git_buf_text_gather_stats(
} }
} }
return (stats->nul > 0 || /* Treat files with a bare CR as binary */
return (stats->cr != stats->crlf || stats->nul > 0 ||
((stats->printable >> 7) < stats->nonprintable)); ((stats->printable >> 7) < stats->nonprintable));
} }
...@@ -18,68 +18,58 @@ ...@@ -18,68 +18,58 @@
#include "buf_text.h" #include "buf_text.h"
#include "repository.h" #include "repository.h"
typedef enum {
GIT_CRLF_UNDEFINED,
GIT_CRLF_BINARY,
GIT_CRLF_TEXT,
GIT_CRLF_TEXT_INPUT,
GIT_CRLF_TEXT_CRLF,
GIT_CRLF_AUTO,
GIT_CRLF_AUTO_INPUT,
GIT_CRLF_AUTO_CRLF,
} git_crlf_t;
struct crlf_attrs { struct crlf_attrs {
int crlf_action; int attr_action; /* the .gitattributes setting */
int eol; int crlf_action; /* the core.autocrlf setting */
int auto_crlf; int auto_crlf;
int safe_crlf; int safe_crlf;
git_cvar_value core_eol;
}; };
struct crlf_filter { struct crlf_filter {
git_filter f; git_filter f;
}; };
static int check_crlf(const char *value) static git_crlf_t check_crlf(const char *value)
{ {
if (GIT_ATTR_TRUE(value)) if (GIT_ATTR_TRUE(value))
return GIT_CRLF_TEXT; return GIT_CRLF_TEXT;
else if (GIT_ATTR_FALSE(value))
if (GIT_ATTR_FALSE(value))
return GIT_CRLF_BINARY; return GIT_CRLF_BINARY;
else if (GIT_ATTR_UNSPECIFIED(value))
if (GIT_ATTR_UNSPECIFIED(value)) ;
return GIT_CRLF_GUESS; else if (strcmp(value, "input") == 0)
return GIT_CRLF_TEXT_INPUT;
if (strcmp(value, "input") == 0) else if (strcmp(value, "auto") == 0)
return GIT_CRLF_INPUT;
if (strcmp(value, "auto") == 0)
return GIT_CRLF_AUTO; return GIT_CRLF_AUTO;
return GIT_CRLF_GUESS; return GIT_CRLF_UNDEFINED;
} }
static int check_eol(const char *value) static git_cvar_value check_eol(const char *value)
{ {
if (GIT_ATTR_UNSPECIFIED(value)) if (GIT_ATTR_UNSPECIFIED(value))
return GIT_EOL_UNSET; ;
else if (strcmp(value, "lf") == 0)
if (strcmp(value, "lf") == 0)
return GIT_EOL_LF; return GIT_EOL_LF;
else if (strcmp(value, "crlf") == 0)
if (strcmp(value, "crlf") == 0)
return GIT_EOL_CRLF; return GIT_EOL_CRLF;
return GIT_EOL_UNSET; return GIT_EOL_UNSET;
} }
static int crlf_input_action(struct crlf_attrs *ca)
{
if (ca->crlf_action == GIT_CRLF_BINARY)
return GIT_CRLF_BINARY;
if (ca->eol == GIT_EOL_LF)
return GIT_CRLF_INPUT;
if (ca->crlf_action == GIT_CRLF_AUTO)
return GIT_CRLF_AUTO;
if (ca->eol == GIT_EOL_CRLF)
return GIT_CRLF_CRLF;
return ca->crlf_action;
}
static int has_cr_in_index(const git_filter_source *src) static int has_cr_in_index(const git_filter_source *src)
{ {
git_repository *repo = git_filter_source_repo(src); git_repository *repo = git_filter_source_repo(src);
...@@ -122,147 +112,168 @@ static int has_cr_in_index(const git_filter_source *src) ...@@ -122,147 +112,168 @@ static int has_cr_in_index(const git_filter_source *src)
return found_cr; return found_cr;
} }
static int crlf_apply_to_odb( static int text_eol_is_crlf(struct crlf_attrs *ca)
struct crlf_attrs *ca,
git_buf *to,
const git_buf *from,
const git_filter_source *src)
{ {
/* Empty file? Nothing to do */ if (ca->auto_crlf == GIT_AUTO_CRLF_TRUE)
if (!git_buf_len(from)) return 1;
else if (ca->auto_crlf == GIT_AUTO_CRLF_INPUT)
return 0; return 0;
/* Heuristics to see if we can skip the conversion. if (ca->core_eol == GIT_EOL_CRLF)
* Straight from Core Git. return 1;
*/ if (ca->core_eol == GIT_EOL_UNSET && GIT_EOL_NATIVE == GIT_EOL_CRLF)
if (ca->crlf_action == GIT_CRLF_AUTO || ca->crlf_action == GIT_CRLF_GUESS) { return 1;
git_buf_text_stats stats;
/* Check heuristics for binary vs text - returns true if binary */ return 0;
if (git_buf_text_gather_stats(&stats, from, false)) }
return GIT_PASSTHROUGH;
/* If there are no CR characters to filter out, then just pass */ static git_cvar_value output_eol(struct crlf_attrs *ca)
if (!stats.cr) {
return GIT_PASSTHROUGH; switch (ca->crlf_action) {
case GIT_CRLF_BINARY:
return GIT_EOL_UNSET;
case GIT_CRLF_TEXT_CRLF:
return GIT_EOL_CRLF;
case GIT_CRLF_TEXT_INPUT:
return GIT_EOL_LF;
case GIT_CRLF_UNDEFINED:
case GIT_CRLF_AUTO_CRLF:
return GIT_EOL_CRLF;
case GIT_CRLF_AUTO_INPUT:
return GIT_EOL_LF;
case GIT_CRLF_TEXT:
case GIT_CRLF_AUTO:
return text_eol_is_crlf(ca) ? GIT_EOL_CRLF : GIT_EOL_LF;
}
/* TODO: warn when available */
return ca->core_eol;
}
GIT_INLINE(int) check_safecrlf(
struct crlf_attrs *ca,
const git_filter_source *src,
git_buf_text_stats *stats)
{
const char *filename = git_filter_source_path(src);
if (!ca->safe_crlf)
return 0;
if (output_eol(ca) == GIT_EOL_LF) {
/*
* CRLFs would not be restored by checkout:
* check if we'd remove CRLFs
*/
if (stats->crlf) {
if (ca->safe_crlf == GIT_SAFE_CRLF_WARN) {
/* TODO: issue a warning when available */
} else {
if (filename && *filename)
giterr_set(
GITERR_FILTER, "CRLF would be replaced by LF in '%s'",
filename);
else
giterr_set(
GITERR_FILTER, "CRLF would be replaced by LF");
/* If safecrlf is enabled, sanity-check the result. */
if (stats.cr != stats.crlf || stats.lf != stats.crlf) {
switch (ca->safe_crlf) {
case GIT_SAFE_CRLF_FAIL:
giterr_set(
GITERR_FILTER, "LF would be replaced by CRLF in '%s'",
git_filter_source_path(src));
return -1; return -1;
case GIT_SAFE_CRLF_WARN:
/* TODO: issue warning when warning API is available */;
break;
default:
break;
} }
} }
} else if (output_eol(ca) == GIT_EOL_CRLF) {
/* /*
* We're currently not going to even try to convert stuff * CRLFs would be added by checkout:
* that has bare CR characters. Does anybody do that crazy * check if we have "naked" LFs
* stuff?
*/ */
if (stats.cr != stats.crlf) if (stats->crlf != stats->lf) {
return GIT_PASSTHROUGH; if (ca->safe_crlf == GIT_SAFE_CRLF_WARN) {
/* TODO: issue a warning when available */
} else {
if (filename && *filename)
giterr_set(
GITERR_FILTER, "LF would be replaced by CRLF in '%s'",
filename);
else
giterr_set(
GITERR_FILTER, "LF would be replaced by CRLF");
if (ca->crlf_action == GIT_CRLF_GUESS) { return -1;
/* }
* If the file in the index has any CR in it, do not convert.
* This is the new safer autocrlf handling.
*/
if (has_cr_in_index(src))
return GIT_PASSTHROUGH;
} }
if (!stats.cr)
return GIT_PASSTHROUGH;
} }
/* Actually drop the carriage returns */ return 0;
return git_buf_text_crlf_to_lf(to, from);
} }
static const char *line_ending(struct crlf_attrs *ca) static int crlf_apply_to_odb(
struct crlf_attrs *ca,
git_buf *to,
const git_buf *from,
const git_filter_source *src)
{ {
switch (ca->crlf_action) { git_buf_text_stats stats;
case GIT_CRLF_BINARY: bool is_binary;
case GIT_CRLF_INPUT: int error;
return "\n";
case GIT_CRLF_CRLF: /* Binary attribute? Empty file? Nothing to do */
return "\r\n"; if (ca->crlf_action == GIT_CRLF_BINARY || !git_buf_len(from))
return GIT_PASSTHROUGH;
case GIT_CRLF_GUESS: is_binary = git_buf_text_gather_stats(&stats, from, false);
if (ca->auto_crlf == GIT_AUTO_CRLF_FALSE)
return "\n";
break;
case GIT_CRLF_AUTO: /* Heuristics to see if we can skip the conversion.
if (ca->eol == GIT_EOL_CRLF) * Straight from Core Git.
return "\r\n"; */
case GIT_CRLF_TEXT: if (ca->crlf_action == GIT_CRLF_AUTO ||
break; ca->crlf_action == GIT_CRLF_AUTO_INPUT ||
ca->crlf_action == GIT_CRLF_AUTO_CRLF) {
default: if (is_binary)
goto line_ending_error; return GIT_PASSTHROUGH;
/*
* If the file in the index has any CR in it, do not convert.
* This is the new safer autocrlf handling.
*/
if (has_cr_in_index(src))
return GIT_PASSTHROUGH;
} }
if (ca->auto_crlf == GIT_AUTO_CRLF_TRUE) if ((error = check_safecrlf(ca, src, &stats)) < 0)
return "\r\n"; return error;
else if (ca->auto_crlf == GIT_AUTO_CRLF_INPUT)
return "\n"; /* If there are no CR characters to filter out, then just pass */
else if (ca->eol == GIT_EOL_UNSET) if (!stats.crlf)
return GIT_EOL_NATIVE == GIT_EOL_CRLF ? "\r\n" : "\n"; return GIT_PASSTHROUGH;
else if (ca->eol == GIT_EOL_LF)
return "\n"; /* Actually drop the carriage returns */
else if (ca->eol == GIT_EOL_CRLF) return git_buf_text_crlf_to_lf(to, from);
return "\r\n";
line_ending_error:
giterr_set(GITERR_INVALID, "invalid input to line ending filter");
return NULL;
} }
static int crlf_apply_to_workdir( static int crlf_apply_to_workdir(
struct crlf_attrs *ca, git_buf *to, const git_buf *from) struct crlf_attrs *ca,
git_buf *to,
const git_buf *from)
{ {
git_buf_text_stats stats; git_buf_text_stats stats;
const char *workdir_ending = NULL;
bool is_binary; bool is_binary;
/* Empty file? Nothing to do. */ /* Empty file? Nothing to do. */
if (git_buf_len(from) == 0) if (git_buf_len(from) == 0 || output_eol(ca) != GIT_EOL_CRLF)
return 0;
/* Determine proper line ending */
workdir_ending = line_ending(ca);
if (!workdir_ending)
return -1;
/* only LF->CRLF conversion is supported, do nothing on LF platforms */
if (strcmp(workdir_ending, "\r\n") != 0)
return GIT_PASSTHROUGH; return GIT_PASSTHROUGH;
/* If there are no LFs, or all LFs are part of a CRLF, nothing to do */
is_binary = git_buf_text_gather_stats(&stats, from, false); is_binary = git_buf_text_gather_stats(&stats, from, false);
/* If there are no LFs, or all LFs are part of a CRLF, nothing to do */
if (stats.lf == 0 || stats.lf == stats.crlf) if (stats.lf == 0 || stats.lf == stats.crlf)
return GIT_PASSTHROUGH; return GIT_PASSTHROUGH;
if (ca->crlf_action == GIT_CRLF_AUTO || if (ca->crlf_action == GIT_CRLF_AUTO ||
ca->crlf_action == GIT_CRLF_GUESS) { ca->crlf_action == GIT_CRLF_AUTO_INPUT ||
ca->crlf_action == GIT_CRLF_AUTO_CRLF) {
/* If we have any existing CR or CRLF line endings, do nothing */ /* If we have any existing CR or CRLF line endings, do nothing */
if (stats.cr > 0 && stats.crlf > 0) if (stats.cr > 0)
return GIT_PASSTHROUGH;
/* If we have bare CR characters, do nothing */
if (stats.cr != stats.crlf)
return GIT_PASSTHROUGH; return GIT_PASSTHROUGH;
/* Don't filter binary files */ /* Don't filter binary files */
...@@ -273,69 +284,80 @@ static int crlf_apply_to_workdir( ...@@ -273,69 +284,80 @@ static int crlf_apply_to_workdir(
return git_buf_text_lf_to_crlf(to, from); return git_buf_text_lf_to_crlf(to, from);
} }
static int crlf_check( static int convert_attrs(
git_filter *self, struct crlf_attrs *ca,
void **payload, /* points to NULL ptr on entry, may be set */ const char **attr_values,
const git_filter_source *src, const git_filter_source *src)
const char **attr_values)
{ {
int error; int error;
struct crlf_attrs ca;
GIT_UNUSED(self); memset(ca, 0, sizeof(struct crlf_attrs));
if ((error = git_repository__cvar(&ca->auto_crlf,
git_filter_source_repo(src), GIT_CVAR_AUTO_CRLF)) < 0 ||
(error = git_repository__cvar(&ca->safe_crlf,
git_filter_source_repo(src), GIT_CVAR_SAFE_CRLF)) < 0 ||
(error = git_repository__cvar(&ca->core_eol,
git_filter_source_repo(src), GIT_CVAR_EOL)) < 0)
return error;
/* downgrade FAIL to WARN if ALLOW_UNSAFE option is used */
if ((git_filter_source_flags(src) & GIT_FILTER_ALLOW_UNSAFE) &&
ca->safe_crlf == GIT_SAFE_CRLF_FAIL)
ca->safe_crlf = GIT_SAFE_CRLF_WARN;
if (attr_values) {
/* load the text attribute */
ca->crlf_action = check_crlf(attr_values[2]); /* text */
if (ca->crlf_action == GIT_CRLF_UNDEFINED)
ca->crlf_action = check_crlf(attr_values[0]); /* crlf */
if (ca->crlf_action != GIT_CRLF_BINARY) {
/* load the eol attribute */
int eol_attr = check_eol(attr_values[1]);
if (ca->crlf_action == GIT_CRLF_AUTO && eol_attr == GIT_EOL_LF)
ca->crlf_action = GIT_CRLF_AUTO_INPUT;
else if (ca->crlf_action == GIT_CRLF_AUTO && eol_attr == GIT_EOL_CRLF)
ca->crlf_action = GIT_CRLF_AUTO_CRLF;
else if (eol_attr == GIT_EOL_LF)
ca->crlf_action = GIT_CRLF_TEXT_INPUT;
else if (eol_attr == GIT_EOL_CRLF)
ca->crlf_action = GIT_CRLF_TEXT_CRLF;
}
if (!attr_values) { ca->attr_action = ca->crlf_action;
ca.crlf_action = GIT_CRLF_GUESS;
ca.eol = GIT_EOL_UNSET;
} else { } else {
ca.crlf_action = check_crlf(attr_values[2]); /* text */ ca->crlf_action = GIT_CRLF_UNDEFINED;
if (ca.crlf_action == GIT_CRLF_GUESS)
ca.crlf_action = check_crlf(attr_values[0]); /* clrf */
ca.eol = check_eol(attr_values[1]); /* eol */
} }
ca.auto_crlf = GIT_AUTO_CRLF_DEFAULT;
ca.safe_crlf = GIT_SAFE_CRLF_DEFAULT;
/* if (ca->crlf_action == GIT_CRLF_TEXT)
* Use the core Git logic to see if we should perform CRLF for this file ca->crlf_action = text_eol_is_crlf(ca) ? GIT_CRLF_TEXT_CRLF : GIT_CRLF_TEXT_INPUT;
* based on its attributes & the value of `core.autocrlf` if (ca->crlf_action == GIT_CRLF_UNDEFINED && ca->auto_crlf == GIT_AUTO_CRLF_FALSE)
*/ ca->crlf_action = GIT_CRLF_BINARY;
ca.crlf_action = crlf_input_action(&ca); if (ca->crlf_action == GIT_CRLF_UNDEFINED && ca->auto_crlf == GIT_AUTO_CRLF_TRUE)
ca->crlf_action = GIT_CRLF_AUTO_CRLF;
if (ca->crlf_action == GIT_CRLF_UNDEFINED && ca->auto_crlf == GIT_AUTO_CRLF_INPUT)
ca->crlf_action = GIT_CRLF_AUTO_INPUT;
if (ca.crlf_action == GIT_CRLF_BINARY) return 0;
return GIT_PASSTHROUGH; }
if (ca.crlf_action == GIT_CRLF_GUESS ||
((ca.crlf_action == GIT_CRLF_AUTO ||
ca.crlf_action == GIT_CRLF_TEXT) &&
git_filter_source_mode(src) == GIT_FILTER_SMUDGE)) {
error = git_repository__cvar(
&ca.auto_crlf, git_filter_source_repo(src), GIT_CVAR_AUTO_CRLF);
if (error < 0)
return error;
if (ca.crlf_action == GIT_CRLF_GUESS && static int crlf_check(
ca.auto_crlf == GIT_AUTO_CRLF_FALSE) git_filter *self,
return GIT_PASSTHROUGH; void **payload, /* points to NULL ptr on entry, may be set */
const git_filter_source *src,
const char **attr_values)
{
struct crlf_attrs ca;
if (ca.auto_crlf == GIT_AUTO_CRLF_INPUT && GIT_UNUSED(self);
ca.eol != GIT_EOL_CRLF &&
git_filter_source_mode(src) == GIT_FILTER_SMUDGE)
return GIT_PASSTHROUGH;
}
if (git_filter_source_mode(src) == GIT_FILTER_CLEAN) { convert_attrs(&ca, attr_values, src);
error = git_repository__cvar(
&ca.safe_crlf, git_filter_source_repo(src), GIT_CVAR_SAFE_CRLF);
if (error < 0)
return error;
/* downgrade FAIL to WARN if ALLOW_UNSAFE option is used */ if (ca.crlf_action == GIT_CRLF_BINARY)
if ((git_filter_source_flags(src) & GIT_FILTER_ALLOW_UNSAFE) && return GIT_PASSTHROUGH;
ca.safe_crlf == GIT_SAFE_CRLF_FAIL)
ca.safe_crlf = GIT_SAFE_CRLF_WARN;
}
*payload = git__malloc(sizeof(ca)); *payload = git__malloc(sizeof(ca));
GITERR_CHECK_ALLOC(*payload); GITERR_CHECK_ALLOC(*payload);
...@@ -345,15 +367,16 @@ static int crlf_check( ...@@ -345,15 +367,16 @@ static int crlf_check(
} }
static int crlf_apply( static int crlf_apply(
git_filter *self, git_filter *self,
void **payload, /* may be read and/or set */ void **payload, /* may be read and/or set */
git_buf *to, git_buf *to,
const git_buf *from, const git_buf *from,
const git_filter_source *src) const git_filter_source *src)
{ {
/* initialize payload in case `check` was bypassed */ /* initialize payload in case `check` was bypassed */
if (!*payload) { if (!*payload) {
int error = crlf_check(self, payload, src, NULL); int error = crlf_check(self, payload, src, NULL);
if (error < 0) if (error < 0)
return error; return error;
} }
......
...@@ -15,16 +15,6 @@ ...@@ -15,16 +15,6 @@
/* Amount of file to examine for NUL byte when checking binary-ness */ /* Amount of file to examine for NUL byte when checking binary-ness */
#define GIT_FILTER_BYTES_TO_CHECK_NUL 8000 #define GIT_FILTER_BYTES_TO_CHECK_NUL 8000
/* Possible CRLF values */
typedef enum {
GIT_CRLF_GUESS = -1,
GIT_CRLF_BINARY = 0,
GIT_CRLF_TEXT,
GIT_CRLF_INPUT,
GIT_CRLF_CRLF,
GIT_CRLF_AUTO,
} git_crlf_t;
typedef struct { typedef struct {
git_attr_session *attr_session; git_attr_session *attr_session;
git_buf *temp_buf; git_buf *temp_buf;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment