Commit 93b4a50d by Edward Thomson

Merge pull request #2704

parents 2136240d b4c6a9da
...@@ -3,6 +3,9 @@ v0.22 + 1 ...@@ -3,6 +3,9 @@ v0.22 + 1
### Changes or improvements ### Changes or improvements
* Updated binary identification in CRLF filtering to avoid false positives in
UTF-8 files.
* Rename and copy detection is enabled for small files. * Rename and copy detection is enabled for small files.
### API additions ### API additions
......
...@@ -191,7 +191,10 @@ bool git_buf_text_is_binary(const git_buf *buf) ...@@ -191,7 +191,10 @@ bool git_buf_text_is_binary(const git_buf *buf)
while (scan < end) { while (scan < end) {
unsigned char c = *scan++; unsigned char c = *scan++;
if (c > 0x1F && c < 0x7F) /* Printable characters are those above SPACE (0x1F) excluding DEL,
* and including BS, ESC and FF.
*/
if ((c > 0x1F && c != 127) || c == '\b' || c == '\033' || c == '\014')
printable++; printable++;
else if (c == '\0') else if (c == '\0')
return true; return true;
......
...@@ -106,6 +106,31 @@ void test_checkout_crlf__all_crlf_autocrlf_true(void) ...@@ -106,6 +106,31 @@ void test_checkout_crlf__all_crlf_autocrlf_true(void)
check_file_contents("./crlf/all-crlf", ALL_CRLF_TEXT_RAW); check_file_contents("./crlf/all-crlf", ALL_CRLF_TEXT_RAW);
} }
void test_checkout_crlf__detect_crlf_autocrlf_true_utf8(void)
{
git_checkout_options opts = GIT_CHECKOUT_OPTIONS_INIT;
opts.checkout_strategy = GIT_CHECKOUT_SAFE_CREATE;
cl_repo_set_bool(g_repo, "core.autocrlf", true);
git_repository_set_head(g_repo, "refs/heads/utf8", NULL, NULL);
git_checkout_head(g_repo, &opts);
if (GIT_EOL_NATIVE == GIT_EOL_LF)
{
check_file_contents("./crlf/few-utf8-chars-lf.txt", FEW_UTF8_LF_RAW);
check_file_contents("./crlf/many-utf8-chars-lf.txt", MANY_UTF8_LF_RAW);
}
else
{
check_file_contents("./crlf/few-utf8-chars-lf.txt", FEW_UTF8_CRLF_RAW);
check_file_contents("./crlf/many-utf8-chars-lf.txt", MANY_UTF8_CRLF_RAW);
}
check_file_contents("./crlf/few-utf8-chars-crlf.txt", FEW_UTF8_CRLF_RAW);
check_file_contents("./crlf/many-utf8-chars-crlf.txt", MANY_UTF8_CRLF_RAW);
}
void test_checkout_crlf__autocrlf_true_index_size_is_filtered_size(void) void test_checkout_crlf__autocrlf_true_index_size_is_filtered_size(void)
{ {
git_index *index; git_index *index;
......
...@@ -830,7 +830,7 @@ void test_core_buffer__classify_with_utf8(void) ...@@ -830,7 +830,7 @@ void test_core_buffer__classify_with_utf8(void)
cl_assert(!git_buf_text_contains_nul(&b)); cl_assert(!git_buf_text_contains_nul(&b));
b.ptr = data1; b.size = b.asize = data1len; b.ptr = data1; b.size = b.asize = data1len;
cl_assert(git_buf_text_is_binary(&b)); cl_assert(!git_buf_text_is_binary(&b));
cl_assert(!git_buf_text_contains_nul(&b)); cl_assert(!git_buf_text_contains_nul(&b));
b.ptr = data2; b.size = b.asize = data2len; b.ptr = data2; b.size = b.asize = data2len;
......
...@@ -22,4 +22,9 @@ ...@@ -22,4 +22,9 @@
#define MORE_CRLF_TEXT_AS_LF "crlf\ncrlf\nlf\ncrlf\ncrlf\n" #define MORE_CRLF_TEXT_AS_LF "crlf\ncrlf\nlf\ncrlf\ncrlf\n"
#define MORE_LF_TEXT_AS_LF "lf\nlf\ncrlf\nlf\nlf\n" #define MORE_LF_TEXT_AS_LF "lf\nlf\ncrlf\nlf\nlf\n"
#define FEW_UTF8_CRLF_RAW "\xe2\x9a\xbdThe rest is ASCII01.\r\nThe rest is ASCII02.\r\nThe rest is ASCII03.\r\nThe rest is ASCII04.\r\nThe rest is ASCII05.\r\nThe rest is ASCII06.\r\nThe rest is ASCII07.\r\nThe rest is ASCII08.\r\nThe rest is ASCII09.\r\nThe rest is ASCII10.\r\nThe rest is ASCII11.\r\nThe rest is ASCII12.\r\nThe rest is ASCII13.\r\nThe rest is ASCII14.\r\nThe rest is ASCII15.\r\nThe rest is ASCII16.\r\nThe rest is ASCII17.\r\nThe rest is ASCII18.\r\nThe rest is ASCII19.\r\nThe rest is ASCII20.\r\nThe rest is ASCII21.\r\nThe rest is ASCII22.\r\n"
#define FEW_UTF8_LF_RAW "\xe2\x9a\xbdThe rest is ASCII01.\nThe rest is ASCII02.\nThe rest is ASCII03.\nThe rest is ASCII04.\nThe rest is ASCII05.\nThe rest is ASCII06.\nThe rest is ASCII07.\nThe rest is ASCII08.\nThe rest is ASCII09.\nThe rest is ASCII10.\nThe rest is ASCII11.\nThe rest is ASCII12.\nThe rest is ASCII13.\nThe rest is ASCII14.\nThe rest is ASCII15.\nThe rest is ASCII16.\nThe rest is ASCII17.\nThe rest is ASCII18.\nThe rest is ASCII19.\nThe rest is ASCII20.\nThe rest is ASCII21.\nThe rest is ASCII22.\n"
#define MANY_UTF8_CRLF_RAW "Lets sing!\r\n\xe2\x99\xab\xe2\x99\xaa\xe2\x99\xac\xe2\x99\xa9\r\nEat food\r\n\xf0\x9f\x8d\x85\xf0\x9f\x8d\x95\r\n"
#define MANY_UTF8_LF_RAW "Lets sing!\n\xe2\x99\xab\xe2\x99\xaa\xe2\x99\xac\xe2\x99\xa9\nEat food\n\xf0\x9f\x8d\x85\xf0\x9f\x8d\x95\n"
#endif #endif
2b55b4b94f655c857635b6a9005c056aa7de3532 a234455d62297f1856c4603686150c59fcb0aafe
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment