Commit 6890e00c by Russell Belfer

Merge pull request #1801 from ethomson/utf8_bom

Skip UTF-8 BOM in binary detection
parents 5e1fb282 c0b01b75
...@@ -170,8 +170,14 @@ int git_buf_text_common_prefix(git_buf *buf, const git_strarray *strings) ...@@ -170,8 +170,14 @@ int git_buf_text_common_prefix(git_buf *buf, const git_strarray *strings)
bool git_buf_text_is_binary(const git_buf *buf) bool git_buf_text_is_binary(const git_buf *buf)
{ {
const char *scan = buf->ptr, *end = buf->ptr + buf->size; const char *scan = buf->ptr, *end = buf->ptr + buf->size;
git_bom_t bom;
int printable = 0, nonprintable = 0; int printable = 0, nonprintable = 0;
scan += git_buf_text_detect_bom(&bom, buf, 0);
if (bom > GIT_BOM_UTF8)
return 1;
while (scan < end) { while (scan < end) {
unsigned char c = *scan++; unsigned char c = *scan++;
......
...@@ -718,6 +718,8 @@ void test_core_buffer__classify_with_utf8(void) ...@@ -718,6 +718,8 @@ void test_core_buffer__classify_with_utf8(void)
size_t data1len = 31; size_t data1len = 31;
char *data2 = "Internal NUL!!!\000\n\nI see you!\n"; char *data2 = "Internal NUL!!!\000\n\nI see you!\n";
size_t data2len = 29; size_t data2len = 29;
char *data3 = "\xef\xbb\xbfThis is UTF-8 with a BOM.\n";
size_t data3len = 20;
git_buf b; git_buf b;
b.ptr = data0; b.size = b.asize = data0len; b.ptr = data0; b.size = b.asize = data0len;
...@@ -731,6 +733,10 @@ void test_core_buffer__classify_with_utf8(void) ...@@ -731,6 +733,10 @@ void test_core_buffer__classify_with_utf8(void)
b.ptr = data2; b.size = b.asize = data2len; b.ptr = data2; b.size = b.asize = data2len;
cl_assert(git_buf_text_is_binary(&b)); cl_assert(git_buf_text_is_binary(&b));
cl_assert(git_buf_text_contains_nul(&b)); cl_assert(git_buf_text_contains_nul(&b));
b.ptr = data3; b.size = b.asize = data3len;
cl_assert(!git_buf_text_is_binary(&b));
cl_assert(!git_buf_text_contains_nul(&b));
} }
#define SIMILARITY_TEST_DATA_1 \ #define SIMILARITY_TEST_DATA_1 \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment