Commit 160e4fb7 by Vicent Martí

Merge pull request #1230 from arrbee/match-core-git-diff-binary-detection

Match binary file check of core git in diff
parents 6e19edaa 0d65acad
......@@ -109,6 +109,11 @@ bool git_buf_text_is_binary(const git_buf *buf)
return ((printable >> 7) < nonprintable);
}
bool git_buf_text_contains_nul(const git_buf *buf)
{
return (strnlen(buf->ptr, buf->size) != buf->size);
}
int git_buf_text_detect_bom(git_bom_t *bom, const git_buf *buf, size_t offset)
{
const char *ptr;
......
......@@ -71,6 +71,14 @@ extern int git_buf_text_common_prefix(git_buf *buf, const git_strarray *strs);
extern bool git_buf_text_is_binary(const git_buf *buf);
/**
* Check quickly if buffer contains a NUL byte
*
* @param buf Buffer to check
* @return true if buffer contains a NUL byte
*/
extern bool git_buf_text_contains_nul(const git_buf *buf);
/**
* Check if a buffer begins with a UTF BOM
*
* @param bom Set to the type of BOM detected or GIT_BOM_NONE
......
......@@ -142,7 +142,12 @@ static int diff_delta_is_binary_by_content(
GIT_UNUSED(ctxt);
if ((file->flags & KNOWN_BINARY_FLAGS) == 0) {
if (git_buf_text_is_binary(&search))
/* TODO: provide encoding / binary detection callbacks that can
* be UTF-8 aware, etc. For now, instead of trying to be smart,
* let's just use the simple NUL-byte detection that core git uses.
*/
/* previously was: if (git_buf_text_is_binary(&search)) */
if (git_buf_text_contains_nul(&search))
file->flags |= GIT_DIFF_FILE_BINARY;
else
file->flags |= GIT_DIFF_FILE_NOT_BINARY;
......
......@@ -704,3 +704,26 @@ void test_core_buffer__base64(void)
git_buf_free(&buf);
}
void test_core_buffer__classify_with_utf8(void)
{
char *data0 = "Simple text\n";
size_t data0len = 12;
char *data1 = "Is that UTF-8 data I see…\nYep!\n";
size_t data1len = 31;
char *data2 = "Internal NUL!!!\000\n\nI see you!\n";
size_t data2len = 29;
git_buf b;
b.ptr = data0; b.size = b.asize = data0len;
cl_assert(!git_buf_text_is_binary(&b));
cl_assert(!git_buf_text_contains_nul(&b));
b.ptr = data1; b.size = b.asize = data1len;
cl_assert(git_buf_text_is_binary(&b));
cl_assert(!git_buf_text_contains_nul(&b));
b.ptr = data2; b.size = b.asize = data2len;
cl_assert(git_buf_text_is_binary(&b));
cl_assert(git_buf_text_contains_nul(&b));
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment