Commit b59c73d3 by Russell Belfer

Optimize away git_text_gather_stats in diff

GProf shows `git_text_gather_stats` as the most expensive call
in large diffs.  The function calculates a lot of information
that is not actually used and does not do so in a optimal
order.  This introduces a tuned `git_buf_is_binary` function
that executes the same algorithm in a fraction of the time.
parent 706a9974
...@@ -445,3 +445,21 @@ int git_buf_common_prefix(git_buf *buf, const git_strarray *strings) ...@@ -445,3 +445,21 @@ int git_buf_common_prefix(git_buf *buf, const git_strarray *strings)
return 0; return 0;
} }
bool git_buf_is_binary(const git_buf *buf)
{
int i, printable = 0, nonprintable = 0;
for (i = 0; i < buf->size; i++) {
unsigned char c = buf->ptr[i];
if (c > 0x1F && c < 0x7f)
printable++;
else if (c == '\0')
return true;
else if (!git__isspace(c))
nonprintable++;
}
return ((printable >> 7) < nonprintable);
}
...@@ -125,4 +125,7 @@ int git_buf_cmp(const git_buf *a, const git_buf *b); ...@@ -125,4 +125,7 @@ int git_buf_cmp(const git_buf *a, const git_buf *b);
/* Fill buf with the common prefix of a array of strings */ /* Fill buf with the common prefix of a array of strings */
int git_buf_common_prefix(git_buf *buf, const git_strarray *strings); int git_buf_common_prefix(git_buf *buf, const git_strarray *strings);
/* Check if buffer looks like it contains binary data */
bool git_buf_is_binary(const git_buf *buf);
#endif #endif
...@@ -174,15 +174,12 @@ static int file_is_binary_by_content( ...@@ -174,15 +174,12 @@ static int file_is_binary_by_content(
git_map *new_data) git_map *new_data)
{ {
git_buf search; git_buf search;
git_text_stats stats;
if ((delta->old_file.flags & BINARY_DIFF_FLAGS) == 0) { if ((delta->old_file.flags & BINARY_DIFF_FLAGS) == 0) {
search.ptr = old_data->data; search.ptr = old_data->data;
search.size = min(old_data->len, 4000); search.size = min(old_data->len, 4000);
git_text_gather_stats(&stats, &search); if (git_buf_is_binary(&search))
if (git_text_is_binary(&stats))
delta->old_file.flags |= GIT_DIFF_FILE_BINARY; delta->old_file.flags |= GIT_DIFF_FILE_BINARY;
else else
delta->old_file.flags |= GIT_DIFF_FILE_NOT_BINARY; delta->old_file.flags |= GIT_DIFF_FILE_NOT_BINARY;
...@@ -192,9 +189,7 @@ static int file_is_binary_by_content( ...@@ -192,9 +189,7 @@ static int file_is_binary_by_content(
search.ptr = new_data->data; search.ptr = new_data->data;
search.size = min(new_data->len, 4000); search.size = min(new_data->len, 4000);
git_text_gather_stats(&stats, &search); if (git_buf_is_binary(&search))
if (git_text_is_binary(&stats))
delta->new_file.flags |= GIT_DIFF_FILE_BINARY; delta->new_file.flags |= GIT_DIFF_FILE_BINARY;
else else
delta->new_file.flags |= GIT_DIFF_FILE_NOT_BINARY; delta->new_file.flags |= GIT_DIFF_FILE_NOT_BINARY;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment