Commit 0d65acad by Russell Belfer

Match binary file check of core git in diff

Core git just looks for NUL bytes in files when deciding about
is-binary inside diff (although it uses a better algorithm in
checkout, when deciding if CRLF conversion should be done).
Libgit2 was using the better algorithm in both places, but that
is causing some confusion.  For now, this makes diff just look
for NUL bytes to decide if a file is binary by content in diff.
parent d0b14cea
...@@ -109,6 +109,11 @@ bool git_buf_text_is_binary(const git_buf *buf) ...@@ -109,6 +109,11 @@ bool git_buf_text_is_binary(const git_buf *buf)
return ((printable >> 7) < nonprintable); return ((printable >> 7) < nonprintable);
} }
bool git_buf_text_contains_nul(const git_buf *buf)
{
return (strnlen(buf->ptr, buf->size) != buf->size);
}
int git_buf_text_detect_bom(git_bom_t *bom, const git_buf *buf, size_t offset) int git_buf_text_detect_bom(git_bom_t *bom, const git_buf *buf, size_t offset)
{ {
const char *ptr; const char *ptr;
......
...@@ -71,6 +71,14 @@ extern int git_buf_text_common_prefix(git_buf *buf, const git_strarray *strs); ...@@ -71,6 +71,14 @@ extern int git_buf_text_common_prefix(git_buf *buf, const git_strarray *strs);
extern bool git_buf_text_is_binary(const git_buf *buf); extern bool git_buf_text_is_binary(const git_buf *buf);
/** /**
* Check quickly if buffer contains a NUL byte
*
* @param buf Buffer to check
* @return true if buffer contains a NUL byte
*/
extern bool git_buf_text_contains_nul(const git_buf *buf);
/**
* Check if a buffer begins with a UTF BOM * Check if a buffer begins with a UTF BOM
* *
* @param bom Set to the type of BOM detected or GIT_BOM_NONE * @param bom Set to the type of BOM detected or GIT_BOM_NONE
......
...@@ -142,7 +142,12 @@ static int diff_delta_is_binary_by_content( ...@@ -142,7 +142,12 @@ static int diff_delta_is_binary_by_content(
GIT_UNUSED(ctxt); GIT_UNUSED(ctxt);
if ((file->flags & KNOWN_BINARY_FLAGS) == 0) { if ((file->flags & KNOWN_BINARY_FLAGS) == 0) {
if (git_buf_text_is_binary(&search)) /* TODO: provide encoding / binary detection callbacks that can
* be UTF-8 aware, etc. For now, instead of trying to be smart,
* let's just use the simple NUL-byte detection that core git uses.
*/
/* previously was: if (git_buf_text_is_binary(&search)) */
if (git_buf_text_contains_nul(&search))
file->flags |= GIT_DIFF_FILE_BINARY; file->flags |= GIT_DIFF_FILE_BINARY;
else else
file->flags |= GIT_DIFF_FILE_NOT_BINARY; file->flags |= GIT_DIFF_FILE_NOT_BINARY;
......
...@@ -704,3 +704,26 @@ void test_core_buffer__base64(void) ...@@ -704,3 +704,26 @@ void test_core_buffer__base64(void)
git_buf_free(&buf); git_buf_free(&buf);
} }
void test_core_buffer__classify_with_utf8(void)
{
char *data0 = "Simple text\n";
size_t data0len = 12;
char *data1 = "Is that UTF-8 data I see…\nYep!\n";
size_t data1len = 31;
char *data2 = "Internal NUL!!!\000\n\nI see you!\n";
size_t data2len = 29;
git_buf b;
b.ptr = data0; b.size = b.asize = data0len;
cl_assert(!git_buf_text_is_binary(&b));
cl_assert(!git_buf_text_contains_nul(&b));
b.ptr = data1; b.size = b.asize = data1len;
cl_assert(git_buf_text_is_binary(&b));
cl_assert(!git_buf_text_contains_nul(&b));
b.ptr = data2; b.size = b.asize = data2len;
cl_assert(git_buf_text_is_binary(&b));
cl_assert(git_buf_text_contains_nul(&b));
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment