Commit e7da560a by Vicent Martí

Merge pull request #1088 from arrbee/consolidate-text-functions

Consolidate text buffer functions
parents 69302126 7bf87ab6
/*
* Copyright (C) 2009-2012 the libgit2 contributors
*
* This file is part of libgit2, distributed under the GNU GPL v2 with
* a Linking Exception. For full terms see the included COPYING file.
*/
#include "buf_text.h"
int git_buf_text_puts_escaped(
git_buf *buf,
const char *string,
const char *esc_chars,
const char *esc_with)
{
const char *scan;
size_t total = 0, esc_len = strlen(esc_with), count;
if (!string)
return 0;
for (scan = string; *scan; ) {
/* count run of non-escaped characters */
count = strcspn(scan, esc_chars);
total += count;
scan += count;
/* count run of escaped characters */
count = strspn(scan, esc_chars);
total += count * (esc_len + 1);
scan += count;
}
if (git_buf_grow(buf, buf->size + total + 1) < 0)
return -1;
for (scan = string; *scan; ) {
count = strcspn(scan, esc_chars);
memmove(buf->ptr + buf->size, scan, count);
scan += count;
buf->size += count;
for (count = strspn(scan, esc_chars); count > 0; --count) {
/* copy escape sequence */
memmove(buf->ptr + buf->size, esc_with, esc_len);
buf->size += esc_len;
/* copy character to be escaped */
buf->ptr[buf->size] = *scan;
buf->size++;
scan++;
}
}
buf->ptr[buf->size] = '\0';
return 0;
}
void git_buf_text_unescape(git_buf *buf)
{
buf->size = git__unescape(buf->ptr);
}
int git_buf_text_common_prefix(git_buf *buf, const git_strarray *strings)
{
size_t i;
const char *str, *pfx;
git_buf_clear(buf);
if (!strings || !strings->count)
return 0;
/* initialize common prefix to first string */
if (git_buf_sets(buf, strings->strings[0]) < 0)
return -1;
/* go through the rest of the strings, truncating to shared prefix */
for (i = 1; i < strings->count; ++i) {
for (str = strings->strings[i], pfx = buf->ptr;
*str && *str == *pfx; str++, pfx++)
/* scanning */;
git_buf_truncate(buf, pfx - buf->ptr);
if (!buf->size)
break;
}
return 0;
}
bool git_buf_text_is_binary(const git_buf *buf)
{
const char *scan = buf->ptr, *end = buf->ptr + buf->size;
int printable = 0, nonprintable = 0;
while (scan < end) {
unsigned char c = *scan++;
if (c > 0x1F && c < 0x7F)
printable++;
else if (c == '\0')
return true;
else if (!git__isspace(c))
nonprintable++;
}
return ((printable >> 7) < nonprintable);
}
int git_buf_text_detect_bom(git_bom_t *bom, const git_buf *buf, size_t offset)
{
const char *ptr;
size_t len;
/* need at least 2 bytes after offset to look for any BOM */
if (buf->size < offset + 2)
return 0;
ptr = buf->ptr + offset;
len = buf->size - offset;
switch (*ptr++) {
case 0:
if (len >= 4 && ptr[0] == 0 && ptr[1] == '\xFE' && ptr[2] == '\xFF') {
*bom = GIT_BOM_UTF32_BE;
return 4;
}
break;
case '\xEF':
if (len >= 3 && ptr[0] == '\xBB' && ptr[1] == '\xBF') {
*bom = GIT_BOM_UTF8;
return 3;
}
break;
case '\xFE':
if (*ptr == '\xFF') {
*bom = GIT_BOM_UTF16_BE;
return 2;
}
break;
case '\xFF':
if (*ptr != '\xFE')
break;
if (len >= 4 && ptr[1] == 0 && ptr[2] == 0) {
*bom = GIT_BOM_UTF32_LE;
return 4;
} else {
*bom = GIT_BOM_UTF16_LE;
return 2;
}
break;
default:
break;
}
return 0;
}
bool git_buf_text_gather_stats(
git_buf_text_stats *stats, const git_buf *buf, bool skip_bom)
{
const char *scan = buf->ptr, *end = buf->ptr + buf->size;
int skip;
memset(stats, 0, sizeof(*stats));
/* BOM detection */
skip = git_buf_text_detect_bom(&stats->bom, buf, 0);
if (skip_bom)
scan += skip;
/* Ignore EOF character */
if (buf->size > 0 && end[-1] == '\032')
end--;
/* Counting loop */
while (scan < end) {
unsigned char c = *scan++;
if ((c > 0x1F && c < 0x7F) || c > 0x9f)
stats->printable++;
else switch (c) {
case '\0':
stats->nul++;
stats->nonprintable++;
break;
case '\n':
stats->lf++;
break;
case '\r':
stats->cr++;
if (scan < end && *scan == '\n')
stats->crlf++;
break;
case '\t': case '\f': case '\v': case '\b': case 0x1b: /*ESC*/
stats->printable++;
break;
default:
stats->nonprintable++;
break;
}
}
return (stats->nul > 0 ||
((stats->printable >> 7) < stats->nonprintable));
}
/*
* Copyright (C) 2009-2012 the libgit2 contributors
*
* This file is part of libgit2, distributed under the GNU GPL v2 with
* a Linking Exception. For full terms see the included COPYING file.
*/
#ifndef INCLUDE_buf_text_h__
#define INCLUDE_buf_text_h__
#include "buffer.h"
typedef enum {
GIT_BOM_NONE = 0,
GIT_BOM_UTF8 = 1,
GIT_BOM_UTF16_LE = 2,
GIT_BOM_UTF16_BE = 3,
GIT_BOM_UTF32_LE = 4,
GIT_BOM_UTF32_BE = 5
} git_bom_t;
typedef struct {
git_bom_t bom; /* BOM found at head of text */
unsigned int nul, cr, lf, crlf; /* NUL, CR, LF and CRLF counts */
unsigned int printable, nonprintable; /* These are just approximations! */
} git_buf_text_stats;
/**
* Append string to buffer, prefixing each character from `esc_chars` with
* `esc_with` string.
*
* @param buf Buffer to append data to
* @param string String to escape and append
* @param esc_chars Characters to be escaped
* @param esc_with String to insert in from of each found character
* @return 0 on success, <0 on failure (probably allocation problem)
*/
extern int git_buf_text_puts_escaped(
git_buf *buf,
const char *string,
const char *esc_chars,
const char *esc_with);
/**
* Append string escaping characters that are regex special
*/
GIT_INLINE(int) git_buf_text_puts_escape_regex(git_buf *buf, const char *string)
{
return git_buf_text_puts_escaped(buf, string, "^.[]$()|*+?{}\\", "\\");
}
/**
* Unescape all characters in a buffer in place
*
* I.e. remove backslashes
*/
extern void git_buf_text_unescape(git_buf *buf);
/**
* Fill buffer with the common prefix of a array of strings
*
* Buffer will be set to empty if there is no common prefix
*/
extern int git_buf_text_common_prefix(git_buf *buf, const git_strarray *strs);
/**
* Check quickly if buffer looks like it contains binary data
*
* @param buf Buffer to check
* @return true if buffer looks like non-text data
*/
extern bool git_buf_text_is_binary(const git_buf *buf);
/**
* Check if a buffer begins with a UTF BOM
*
* @param bom Set to the type of BOM detected or GIT_BOM_NONE
* @param buf Buffer in which to check the first bytes for a BOM
* @param offset Offset into buffer to look for BOM
* @return Number of bytes of BOM data (or 0 if no BOM found)
*/
extern int git_buf_text_detect_bom(
git_bom_t *bom, const git_buf *buf, size_t offset);
/**
* Gather stats for a piece of text
*
* Fill the `stats` structure with counts of unreadable characters, carriage
* returns, etc, so it can be used in heuristics. This automatically skips
* a trailing EOF (\032 character). Also it will look for a BOM at the
* start of the text and can be told to skip that as well.
*
* @param stats Structure to be filled in
* @param buf Text to process
* @param skip_bom Exclude leading BOM from stats if true
* @return Does the buffer heuristically look like binary data
*/
extern bool git_buf_text_gather_stats(
git_buf_text_stats *stats, const git_buf *buf, bool skip_bom);
#endif
...@@ -31,15 +31,7 @@ void git_buf_init(git_buf *buf, size_t initial_size) ...@@ -31,15 +31,7 @@ void git_buf_init(git_buf *buf, size_t initial_size)
git_buf_grow(buf, initial_size); git_buf_grow(buf, initial_size);
} }
int git_buf_grow(git_buf *buf, size_t target_size) int git_buf_try_grow(git_buf *buf, size_t target_size, bool mark_oom)
{
int error = git_buf_try_grow(buf, target_size);
if (error != 0)
buf->ptr = git_buf__oom;
return error;
}
int git_buf_try_grow(git_buf *buf, size_t target_size)
{ {
char *new_ptr; char *new_ptr;
size_t new_size; size_t new_size;
...@@ -67,8 +59,12 @@ int git_buf_try_grow(git_buf *buf, size_t target_size) ...@@ -67,8 +59,12 @@ int git_buf_try_grow(git_buf *buf, size_t target_size)
new_size = (new_size + 7) & ~7; new_size = (new_size + 7) & ~7;
new_ptr = git__realloc(new_ptr, new_size); new_ptr = git__realloc(new_ptr, new_size);
if (!new_ptr)
if (!new_ptr) {
if (mark_oom)
buf->ptr = git_buf__oom;
return -1; return -1;
}
buf->asize = new_size; buf->asize = new_size;
buf->ptr = new_ptr; buf->ptr = new_ptr;
...@@ -141,51 +137,6 @@ int git_buf_puts(git_buf *buf, const char *string) ...@@ -141,51 +137,6 @@ int git_buf_puts(git_buf *buf, const char *string)
return git_buf_put(buf, string, strlen(string)); return git_buf_put(buf, string, strlen(string));
} }
int git_buf_puts_escaped(
git_buf *buf, const char *string, const char *esc_chars, const char *esc_with)
{
const char *scan;
size_t total = 0, esc_len = strlen(esc_with), count;
if (!string)
return 0;
for (scan = string; *scan; ) {
/* count run of non-escaped characters */
count = strcspn(scan, esc_chars);
total += count;
scan += count;
/* count run of escaped characters */
count = strspn(scan, esc_chars);
total += count * (esc_len + 1);
scan += count;
}
ENSURE_SIZE(buf, buf->size + total + 1);
for (scan = string; *scan; ) {
count = strcspn(scan, esc_chars);
memmove(buf->ptr + buf->size, scan, count);
scan += count;
buf->size += count;
for (count = strspn(scan, esc_chars); count > 0; --count) {
/* copy escape sequence */
memmove(buf->ptr + buf->size, esc_with, esc_len);
buf->size += esc_len;
/* copy character to be escaped */
buf->ptr[buf->size] = *scan;
buf->size++;
scan++;
}
}
buf->ptr[buf->size] = '\0';
return 0;
}
static const char b64str[64] = static const char b64str[64] =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
...@@ -497,59 +448,6 @@ int git_buf_cmp(const git_buf *a, const git_buf *b) ...@@ -497,59 +448,6 @@ int git_buf_cmp(const git_buf *a, const git_buf *b)
(a->size < b->size) ? -1 : (a->size > b->size) ? 1 : 0; (a->size < b->size) ? -1 : (a->size > b->size) ? 1 : 0;
} }
int git_buf_common_prefix(git_buf *buf, const git_strarray *strings)
{
size_t i;
const char *str, *pfx;
git_buf_clear(buf);
if (!strings || !strings->count)
return 0;
/* initialize common prefix to first string */
if (git_buf_sets(buf, strings->strings[0]) < 0)
return -1;
/* go through the rest of the strings, truncating to shared prefix */
for (i = 1; i < strings->count; ++i) {
for (str = strings->strings[i], pfx = buf->ptr;
*str && *str == *pfx; str++, pfx++)
/* scanning */;
git_buf_truncate(buf, pfx - buf->ptr);
if (!buf->size)
break;
}
return 0;
}
bool git_buf_is_binary(const git_buf *buf)
{
size_t i;
int printable = 0, nonprintable = 0;
for (i = 0; i < buf->size; i++) {
unsigned char c = buf->ptr[i];
if (c > 0x1F && c < 0x7F)
printable++;
else if (c == '\0')
return true;
else if (!git__isspace(c))
nonprintable++;
}
return ((printable >> 7) < nonprintable);
}
void git_buf_unescape(git_buf *buf)
{
buf->size = git__unescape(buf->ptr);
}
int git_buf_splice( int git_buf_splice(
git_buf *buf, git_buf *buf,
size_t where, size_t where,
......
...@@ -27,30 +27,35 @@ extern char git_buf__oom[]; ...@@ -27,30 +27,35 @@ extern char git_buf__oom[];
* For the cases where GIT_BUF_INIT cannot be used to do static * For the cases where GIT_BUF_INIT cannot be used to do static
* initialization. * initialization.
*/ */
void git_buf_init(git_buf *buf, size_t initial_size); extern void git_buf_init(git_buf *buf, size_t initial_size);
/** /**
* Grow the buffer to hold at least `target_size` bytes. * Attempt to grow the buffer to hold at least `target_size` bytes.
* *
* If the allocation fails, this will return an error and the buffer * If the allocation fails, this will return an error. If mark_oom is true,
* will be marked as invalid for future operations. The existing * this will mark the buffer as invalid for future operations; if false,
* contents of the buffer will be preserved however. * existing buffer content will be preserved, but calling code must handle
* @return 0 on success or -1 on failure * that buffer was not expanded.
*/ */
int git_buf_grow(git_buf *buf, size_t target_size); extern int git_buf_try_grow(git_buf *buf, size_t target_size, bool mark_oom);
/** /**
* Attempt to grow the buffer to hold at least `target_size` bytes. * Grow the buffer to hold at least `target_size` bytes.
* *
* This is just like `git_buf_grow` except that even if the allocation * If the allocation fails, this will return an error and the buffer will be
* fails, the git_buf will still be left in a valid state. * marked as invalid for future operations, invaliding contents.
*
* @return 0 on success or -1 on failure
*/ */
int git_buf_try_grow(git_buf *buf, size_t target_size); GIT_INLINE(int) git_buf_grow(git_buf *buf, size_t target_size)
{
return git_buf_try_grow(buf, target_size, true);
}
void git_buf_free(git_buf *buf); extern void git_buf_free(git_buf *buf);
void git_buf_swap(git_buf *buf_a, git_buf *buf_b); extern void git_buf_swap(git_buf *buf_a, git_buf *buf_b);
char *git_buf_detach(git_buf *buf); extern char *git_buf_detach(git_buf *buf);
void git_buf_attach(git_buf *buf, char *ptr, size_t asize); extern void git_buf_attach(git_buf *buf, char *ptr, size_t asize);
/** /**
* Test if there have been any reallocation failures with this git_buf. * Test if there have been any reallocation failures with this git_buf.
...@@ -92,18 +97,6 @@ int git_buf_join_n(git_buf *buf, char separator, int nbuf, ...); ...@@ -92,18 +97,6 @@ int git_buf_join_n(git_buf *buf, char separator, int nbuf, ...);
int git_buf_join(git_buf *buf, char separator, const char *str_a, const char *str_b); int git_buf_join(git_buf *buf, char separator, const char *str_a, const char *str_b);
/** /**
* Copy string into buf prefixing every character that is contained in the
* esc_chars string with the esc_with string.
*/
int git_buf_puts_escaped(
git_buf *buf, const char *string, const char *esc_chars, const char *esc_with);
GIT_INLINE(int) git_buf_puts_escape_regex(git_buf *buf, const char *string)
{
return git_buf_puts_escaped(buf, string, "^.[]$()|*+?{}\\", "\\");
}
/**
* Join two strings as paths, inserting a slash between as needed. * Join two strings as paths, inserting a slash between as needed.
* @return 0 on success, -1 on failure * @return 0 on success, -1 on failure
*/ */
...@@ -146,15 +139,6 @@ void git_buf_rtrim(git_buf *buf); ...@@ -146,15 +139,6 @@ void git_buf_rtrim(git_buf *buf);
int git_buf_cmp(const git_buf *a, const git_buf *b); int git_buf_cmp(const git_buf *a, const git_buf *b);
/* Fill buf with the common prefix of a array of strings */
int git_buf_common_prefix(git_buf *buf, const git_strarray *strings);
/* Check if buffer looks like it contains binary data */
bool git_buf_is_binary(const git_buf *buf);
/* Unescape all characters in a buffer */
void git_buf_unescape(git_buf *buf);
/* Write data as base64 encoded in buffer */ /* Write data as base64 encoded in buffer */
int git_buf_put_base64(git_buf *buf, const char *data, size_t len); int git_buf_put_base64(git_buf *buf, const char *data, size_t len);
......
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
#include "config.h" #include "config.h"
#include "git2/config.h" #include "git2/config.h"
#include "vector.h" #include "vector.h"
#include "buf_text.h"
#if GIT_WIN32 #if GIT_WIN32
# include <windows.h> # include <windows.h>
#endif #endif
...@@ -803,7 +804,7 @@ int git_config_rename_section( ...@@ -803,7 +804,7 @@ int git_config_rename_section(
int error = -1; int error = -1;
struct rename_data data; struct rename_data data;
git_buf_puts_escape_regex(&pattern, old_section_name); git_buf_text_puts_escape_regex(&pattern, old_section_name);
git_buf_puts(&pattern, "\\..+"); git_buf_puts(&pattern, "\\..+");
if (git_buf_oom(&pattern)) if (git_buf_oom(&pattern))
goto cleanup; goto cleanup;
......
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
#include "fileops.h" #include "fileops.h"
#include "filebuf.h" #include "filebuf.h"
#include "buffer.h" #include "buffer.h"
#include "buf_text.h"
#include "git2/config.h" #include "git2/config.h"
#include "git2/types.h" #include "git2/types.h"
#include "strmap.h" #include "strmap.h"
...@@ -854,17 +855,14 @@ fail_parse: ...@@ -854,17 +855,14 @@ fail_parse:
static int skip_bom(diskfile_backend *cfg) static int skip_bom(diskfile_backend *cfg)
{ {
static const char utf8_bom[] = { '\xef', '\xbb', '\xbf' }; git_bom_t bom;
int bom_offset = git_buf_text_detect_bom(&bom,
&cfg->reader.buffer, cfg->reader.read_ptr - cfg->reader.buffer.ptr);
if (cfg->reader.buffer.size < sizeof(utf8_bom)) if (bom == GIT_BOM_UTF8)
return 0; cfg->reader.read_ptr += bom_offset;
if (memcmp(cfg->reader.read_ptr, utf8_bom, sizeof(utf8_bom)) == 0)
cfg->reader.read_ptr += sizeof(utf8_bom);
/* TODO: the reference implementation does pretty stupid /* TODO: reference implementation is pretty stupid with BoM */
stuff with the BoM
*/
return 0; return 0;
} }
......
...@@ -148,8 +148,11 @@ static int crlf_apply_to_odb(git_filter *self, git_buf *dest, const git_buf *sou ...@@ -148,8 +148,11 @@ static int crlf_apply_to_odb(git_filter *self, git_buf *dest, const git_buf *sou
if (filter->attrs.crlf_action == GIT_CRLF_AUTO || if (filter->attrs.crlf_action == GIT_CRLF_AUTO ||
filter->attrs.crlf_action == GIT_CRLF_GUESS) { filter->attrs.crlf_action == GIT_CRLF_GUESS) {
git_text_stats stats; git_buf_text_stats stats;
git_text_gather_stats(&stats, source);
/* Check heuristics for binary vs text... */
if (git_buf_text_gather_stats(&stats, source, false))
return -1;
/* /*
* We're currently not going to even try to convert stuff * We're currently not going to even try to convert stuff
...@@ -159,12 +162,6 @@ static int crlf_apply_to_odb(git_filter *self, git_buf *dest, const git_buf *sou ...@@ -159,12 +162,6 @@ static int crlf_apply_to_odb(git_filter *self, git_buf *dest, const git_buf *sou
if (stats.cr != stats.crlf) if (stats.cr != stats.crlf)
return -1; return -1;
/*
* And add some heuristics for binary vs text, of course...
*/
if (git_text_is_binary(&stats))
return -1;
#if 0 #if 0
if (crlf_action == CRLF_GUESS) { if (crlf_action == CRLF_GUESS) {
/* /*
......
...@@ -142,7 +142,7 @@ static int diff_delta_is_binary_by_content( ...@@ -142,7 +142,7 @@ static int diff_delta_is_binary_by_content(
search.ptr = map->data; search.ptr = map->data;
search.size = min(map->len, 4000); search.size = min(map->len, 4000);
if (git_buf_is_binary(&search)) if (git_buf_text_is_binary(&search))
file->flags |= GIT_DIFF_FILE_BINARY; file->flags |= GIT_DIFF_FILE_BINARY;
else else
file->flags |= GIT_DIFF_FILE_NOT_BINARY; file->flags |= GIT_DIFF_FILE_NOT_BINARY;
......
...@@ -13,75 +13,6 @@ ...@@ -13,75 +13,6 @@
#include "git2/config.h" #include "git2/config.h"
#include "blob.h" #include "blob.h"
/* Tweaked from Core Git. I wonder what we could use this for... */
void git_text_gather_stats(git_text_stats *stats, const git_buf *text)
{
size_t i;
memset(stats, 0, sizeof(*stats));
for (i = 0; i < git_buf_len(text); i++) {
unsigned char c = text->ptr[i];
if (c == '\r') {
stats->cr++;
if (i + 1 < git_buf_len(text) && text->ptr[i + 1] == '\n')
stats->crlf++;
}
else if (c == '\n')
stats->lf++;
else if (c == 127)
/* DEL */
stats->nonprintable++;
else if (c <= 0x1F || (c >= 0x80 && c <= 0x9F)) {
switch (c) {
/* BS, HT, ESC and FF */
case '\b': case '\t': case '\033': case '\014':
stats->printable++;
break;
case 0:
stats->nul++;
/* fall through */
default:
stats->nonprintable++;
}
}
else
stats->printable++;
}
/* If file ends with EOF then don't count this EOF as non-printable. */
if (git_buf_len(text) >= 1 && text->ptr[text->size - 1] == '\032')
stats->nonprintable--;
}
/*
* Fresh from Core Git
*/
int git_text_is_binary(git_text_stats *stats)
{
if (stats->nul)
return 1;
if ((stats->printable >> 7) < stats->nonprintable)
return 1;
/*
* Other heuristics? Average line length might be relevant,
* as might LF vs CR vs CRLF counts..
*
* NOTE! It might be normal to have a low ratio of CRLF to LF
* (somebody starts with a LF-only file and edits it with an editor
* that adds CRLF only to lines that are added..). But do we
* want to support CR-only? Probably not.
*/
return 0;
}
int git_filters_load(git_vector *filters, git_repository *repo, const char *path, int mode) int git_filters_load(git_vector *filters, git_repository *repo, const char *path, int mode)
{ {
int error; int error;
......
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
#include "common.h" #include "common.h"
#include "buffer.h" #include "buffer.h"
#include "buf_text.h"
#include "git2/odb.h" #include "git2/odb.h"
#include "git2/repository.h" #include "git2/repository.h"
...@@ -31,14 +32,6 @@ typedef enum { ...@@ -31,14 +32,6 @@ typedef enum {
GIT_CRLF_AUTO, GIT_CRLF_AUTO,
} git_crlf_t; } git_crlf_t;
typedef struct {
/* NUL, CR, LF and CRLF counts */
unsigned int nul, cr, lf, crlf;
/* These are just approximations! */
unsigned int printable, nonprintable;
} git_text_stats;
/* /*
* FILTER API * FILTER API
*/ */
...@@ -99,24 +92,4 @@ extern int git_filter_add__crlf_to_odb(git_vector *filters, git_repository *repo ...@@ -99,24 +92,4 @@ extern int git_filter_add__crlf_to_odb(git_vector *filters, git_repository *repo
/* Add CRLF, from ODB to worktree */ /* Add CRLF, from ODB to worktree */
extern int git_filter_add__crlf_to_workdir(git_vector *filters, git_repository *repo, const char *path); extern int git_filter_add__crlf_to_workdir(git_vector *filters, git_repository *repo, const char *path);
/*
* PLAINTEXT API
*/
/*
* Gather stats for a piece of text
*
* Fill the `stats` structure with information on the number of
* unreadable characters, carriage returns, etc, so it can be
* used in heuristics.
*/
extern void git_text_gather_stats(git_text_stats *stats, const git_buf *text);
/*
* Process `git_text_stats` data generated by `git_text_stat` to see
* if it qualifies as a binary file
*/
extern int git_text_is_binary(git_text_stats *stats);
#endif #endif
...@@ -511,7 +511,7 @@ static bool _check_dir_contents( ...@@ -511,7 +511,7 @@ static bool _check_dir_contents(
size_t sub_size = strlen(sub); size_t sub_size = strlen(sub);
/* leave base valid even if we could not make space for subdir */ /* leave base valid even if we could not make space for subdir */
if (git_buf_try_grow(dir, dir_size + sub_size + 2) < 0) if (git_buf_try_grow(dir, dir_size + sub_size + 2, false) < 0)
return false; return false;
/* save excursion */ /* save excursion */
......
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
*/ */
#include "pathspec.h" #include "pathspec.h"
#include "buf_text.h"
#include "attr_file.h" #include "attr_file.h"
/* what is the common non-wildcard prefix for all items in the pathspec */ /* what is the common non-wildcard prefix for all items in the pathspec */
...@@ -15,7 +16,7 @@ char *git_pathspec_prefix(const git_strarray *pathspec) ...@@ -15,7 +16,7 @@ char *git_pathspec_prefix(const git_strarray *pathspec)
const char *scan; const char *scan;
if (!pathspec || !pathspec->count || if (!pathspec || !pathspec->count ||
git_buf_common_prefix(&prefix, pathspec) < 0) git_buf_text_common_prefix(&prefix, pathspec) < 0)
return NULL; return NULL;
/* diff prefix will only be leading non-wildcards */ /* diff prefix will only be leading non-wildcards */
...@@ -31,7 +32,7 @@ char *git_pathspec_prefix(const git_strarray *pathspec) ...@@ -31,7 +32,7 @@ char *git_pathspec_prefix(const git_strarray *pathspec)
return NULL; return NULL;
} }
git_buf_unescape(&prefix); git_buf_text_unescape(&prefix);
return git_buf_detach(&prefix); return git_buf_detach(&prefix);
} }
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#include "git2/index.h" #include "git2/index.h"
#include "git2/submodule.h" #include "git2/submodule.h"
#include "buffer.h" #include "buffer.h"
#include "buf_text.h"
#include "vector.h" #include "vector.h"
#include "posix.h" #include "posix.h"
#include "config_file.h" #include "config_file.h"
...@@ -782,7 +783,7 @@ int git_submodule_reload(git_submodule *submodule) ...@@ -782,7 +783,7 @@ int git_submodule_reload(git_submodule *submodule)
git_buf path = GIT_BUF_INIT; git_buf path = GIT_BUF_INIT;
git_buf_sets(&path, "submodule\\."); git_buf_sets(&path, "submodule\\.");
git_buf_puts_escape_regex(&path, submodule->name); git_buf_text_puts_escape_regex(&path, submodule->name);
git_buf_puts(&path, ".*"); git_buf_puts(&path, ".*");
if (git_buf_oom(&path)) if (git_buf_oom(&path))
......
#include "clar_libgit2.h" #include "clar_libgit2.h"
#include "buffer.h" #include "buffer.h"
#include "buf_text.h"
#define TESTSTR "Have you seen that? Have you seeeen that??" #define TESTSTR "Have you seen that? Have you seeeen that??"
const char *test_string = TESTSTR; const char *test_string = TESTSTR;
...@@ -576,37 +577,37 @@ void test_core_buffer__11(void) ...@@ -576,37 +577,37 @@ void test_core_buffer__11(void)
t.strings = t1; t.strings = t1;
t.count = 3; t.count = 3;
cl_git_pass(git_buf_common_prefix(&a, &t)); cl_git_pass(git_buf_text_common_prefix(&a, &t));
cl_assert_equal_s(a.ptr, ""); cl_assert_equal_s(a.ptr, "");
t.strings = t2; t.strings = t2;
t.count = 3; t.count = 3;
cl_git_pass(git_buf_common_prefix(&a, &t)); cl_git_pass(git_buf_text_common_prefix(&a, &t));
cl_assert_equal_s(a.ptr, "some"); cl_assert_equal_s(a.ptr, "some");
t.strings = t3; t.strings = t3;
t.count = 3; t.count = 3;
cl_git_pass(git_buf_common_prefix(&a, &t)); cl_git_pass(git_buf_text_common_prefix(&a, &t));
cl_assert_equal_s(a.ptr, ""); cl_assert_equal_s(a.ptr, "");
t.strings = t4; t.strings = t4;
t.count = 3; t.count = 3;
cl_git_pass(git_buf_common_prefix(&a, &t)); cl_git_pass(git_buf_text_common_prefix(&a, &t));
cl_assert_equal_s(a.ptr, "happ"); cl_assert_equal_s(a.ptr, "happ");
t.strings = t5; t.strings = t5;
t.count = 3; t.count = 3;
cl_git_pass(git_buf_common_prefix(&a, &t)); cl_git_pass(git_buf_text_common_prefix(&a, &t));
cl_assert_equal_s(a.ptr, "happ"); cl_assert_equal_s(a.ptr, "happ");
t.strings = t6; t.strings = t6;
t.count = 3; t.count = 3;
cl_git_pass(git_buf_common_prefix(&a, &t)); cl_git_pass(git_buf_text_common_prefix(&a, &t));
cl_assert_equal_s(a.ptr, ""); cl_assert_equal_s(a.ptr, "");
t.strings = t7; t.strings = t7;
t.count = 3; t.count = 3;
cl_git_pass(git_buf_common_prefix(&a, &t)); cl_git_pass(git_buf_text_common_prefix(&a, &t));
cl_assert_equal_s(a.ptr, ""); cl_assert_equal_s(a.ptr, "");
git_buf_free(&a); git_buf_free(&a);
...@@ -641,19 +642,19 @@ void test_core_buffer__puts_escaped(void) ...@@ -641,19 +642,19 @@ void test_core_buffer__puts_escaped(void)
git_buf a = GIT_BUF_INIT; git_buf a = GIT_BUF_INIT;
git_buf_clear(&a); git_buf_clear(&a);
cl_git_pass(git_buf_puts_escaped(&a, "this is a test", "", "")); cl_git_pass(git_buf_text_puts_escaped(&a, "this is a test", "", ""));
cl_assert_equal_s("this is a test", a.ptr); cl_assert_equal_s("this is a test", a.ptr);
git_buf_clear(&a); git_buf_clear(&a);
cl_git_pass(git_buf_puts_escaped(&a, "this is a test", "t", "\\")); cl_git_pass(git_buf_text_puts_escaped(&a, "this is a test", "t", "\\"));
cl_assert_equal_s("\\this is a \\tes\\t", a.ptr); cl_assert_equal_s("\\this is a \\tes\\t", a.ptr);
git_buf_clear(&a); git_buf_clear(&a);
cl_git_pass(git_buf_puts_escaped(&a, "this is a test", "i ", "__")); cl_git_pass(git_buf_text_puts_escaped(&a, "this is a test", "i ", "__"));
cl_assert_equal_s("th__is__ __is__ a__ test", a.ptr); cl_assert_equal_s("th__is__ __is__ a__ test", a.ptr);
git_buf_clear(&a); git_buf_clear(&a);
cl_git_pass(git_buf_puts_escape_regex(&a, "^match\\s*[A-Z]+.*")); cl_git_pass(git_buf_text_puts_escape_regex(&a, "^match\\s*[A-Z]+.*"));
cl_assert_equal_s("\\^match\\\\s\\*\\[A-Z\\]\\+\\.\\*", a.ptr); cl_assert_equal_s("\\^match\\\\s\\*\\[A-Z\\]\\+\\.\\*", a.ptr);
git_buf_free(&a); git_buf_free(&a);
...@@ -663,7 +664,7 @@ static void assert_unescape(char *expected, char *to_unescape) { ...@@ -663,7 +664,7 @@ static void assert_unescape(char *expected, char *to_unescape) {
git_buf buf = GIT_BUF_INIT; git_buf buf = GIT_BUF_INIT;
cl_git_pass(git_buf_sets(&buf, to_unescape)); cl_git_pass(git_buf_sets(&buf, to_unescape));
git_buf_unescape(&buf); git_buf_text_unescape(&buf);
cl_assert_equal_s(expected, buf.ptr); cl_assert_equal_s(expected, buf.ptr);
cl_assert_equal_sz(strlen(expected), buf.size); cl_assert_equal_sz(strlen(expected), buf.size);
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
#include "filter.h" #include "filter.h"
static git_repository *g_repo = NULL; static git_repository *g_repo = NULL;
#define NUM_TEST_OBJECTS 6 #define NUM_TEST_OBJECTS 8
static git_oid g_oids[NUM_TEST_OBJECTS]; static git_oid g_oids[NUM_TEST_OBJECTS];
static const char *g_raw[NUM_TEST_OBJECTS] = { static const char *g_raw[NUM_TEST_OBJECTS] = {
"", "",
...@@ -12,16 +12,20 @@ static const char *g_raw[NUM_TEST_OBJECTS] = { ...@@ -12,16 +12,20 @@ static const char *g_raw[NUM_TEST_OBJECTS] = {
"foo\rbar\r", "foo\rbar\r",
"foo\r\nbar\r\n", "foo\r\nbar\r\n",
"foo\nbar\rboth\r\nreversed\n\ragain\nproblems\r", "foo\nbar\rboth\r\nreversed\n\ragain\nproblems\r",
"123\n\000\001\002\003\004abc\255\254\253\r\n" "123\n\000\001\002\003\004abc\255\254\253\r\n",
"\xEF\xBB\xBFThis is UTF-8\n",
"\xFE\xFF\x00T\x00h\x00i\x00s\x00!"
}; };
static git_off_t g_len[NUM_TEST_OBJECTS] = { -1, -1, -1, -1, -1, 17 }; static git_off_t g_len[NUM_TEST_OBJECTS] = { -1, -1, -1, -1, -1, 17, -1, 12 };
static git_text_stats g_stats[NUM_TEST_OBJECTS] = { static git_buf_text_stats g_stats[NUM_TEST_OBJECTS] = {
{ 0, 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0, 0, 0 },
{ 0, 0, 2, 0, 6, 0 }, { 0, 0, 0, 2, 0, 6, 0 },
{ 0, 2, 0, 0, 6, 0 }, { 0, 0, 2, 0, 0, 6, 0 },
{ 0, 2, 2, 2, 6, 0 }, { 0, 0, 2, 2, 2, 6, 0 },
{ 0, 4, 4, 1, 31, 0 }, { 0, 0, 4, 4, 1, 31, 0 },
{ 1, 1, 2, 1, 9, 5 } { 0, 1, 1, 2, 1, 9, 5 },
{ GIT_BOM_UTF8, 0, 0, 1, 0, 16, 0 },
{ GIT_BOM_UTF16_BE, 5, 0, 0, 0, 7, 5 },
}; };
static git_buf g_crlf_filtered[NUM_TEST_OBJECTS] = { static git_buf g_crlf_filtered[NUM_TEST_OBJECTS] = {
{ "", 0, 0 }, { "", 0, 0 },
...@@ -29,7 +33,9 @@ static git_buf g_crlf_filtered[NUM_TEST_OBJECTS] = { ...@@ -29,7 +33,9 @@ static git_buf g_crlf_filtered[NUM_TEST_OBJECTS] = {
{ "foo\rbar\r", 0, 8 }, { "foo\rbar\r", 0, 8 },
{ "foo\nbar\n", 0, 8 }, { "foo\nbar\n", 0, 8 },
{ "foo\nbar\rboth\nreversed\n\ragain\nproblems\r", 0, 38 }, { "foo\nbar\rboth\nreversed\n\ragain\nproblems\r", 0, 38 },
{ "123\n\000\001\002\003\004abc\255\254\253\n", 0, 16 } { "123\n\000\001\002\003\004abc\255\254\253\n", 0, 16 },
{ "\xEF\xBB\xBFThis is UTF-8\n", 0, 17 },
{ "\xFE\xFF\x00T\x00h\x00i\x00s\x00!", 0, 12 }
}; };
void test_object_blob_filter__initialize(void) void test_object_blob_filter__initialize(void)
...@@ -76,12 +82,12 @@ void test_object_blob_filter__stats(void) ...@@ -76,12 +82,12 @@ void test_object_blob_filter__stats(void)
int i; int i;
git_blob *blob; git_blob *blob;
git_buf buf = GIT_BUF_INIT; git_buf buf = GIT_BUF_INIT;
git_text_stats stats; git_buf_text_stats stats;
for (i = 0; i < NUM_TEST_OBJECTS; i++) { for (i = 0; i < NUM_TEST_OBJECTS; i++) {
cl_git_pass(git_blob_lookup(&blob, g_repo, &g_oids[i])); cl_git_pass(git_blob_lookup(&blob, g_repo, &g_oids[i]));
cl_git_pass(git_blob__getbuf(&buf, blob)); cl_git_pass(git_blob__getbuf(&buf, blob));
git_text_gather_stats(&stats, &buf); git_buf_text_gather_stats(&stats, &buf, false);
cl_assert(memcmp(&g_stats[i], &stats, sizeof(stats)) == 0); cl_assert(memcmp(&g_stats[i], &stats, sizeof(stats)) == 0);
git_blob_free(blob); git_blob_free(blob);
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment