Unverified Commit ff78aea6 by Edward Thomson Committed by GitHub

Merge pull request #5860 from libgit2/ethomson/buf_text

buf: remove unnecessary buf_text namespace
parents 05548e66 14f6950b
......@@ -10,7 +10,6 @@
#include "repository.h"
#include "filebuf.h"
#include "attrcache.h"
#include "buf_text.h"
#include "git2/blob.h"
#include "git2/tree.h"
#include "blob.h"
......@@ -123,7 +122,7 @@ int git_attr_file__load(
struct stat st;
bool nonexistent = false;
int bom_offset;
git_bom_t bom;
git_buf_bom_t bom;
git_oid id;
git_object_size_t blobsize;
......@@ -192,9 +191,9 @@ int git_attr_file__load(
/* advance over a UTF8 BOM */
content_str = git_buf_cstr(&content);
bom_offset = git_buf_text_detect_bom(&bom, &content);
bom_offset = git_buf_detect_bom(&bom, &content);
if (bom == GIT_BOM_UTF8)
if (bom == GIT_BUF_BOM_UTF8)
content_str += bom_offset;
/* store the key of the attr_reader; don't bother with cache
......
......@@ -14,7 +14,6 @@
#include "filebuf.h"
#include "filter.h"
#include "buf_text.h"
const void *git_blob_rawcontent(const git_blob *blob)
{
......@@ -401,7 +400,7 @@ int git_blob_is_binary(const git_blob *blob)
git_buf_attach_notowned(&content, git_blob_rawcontent(blob),
(size_t)min(size, GIT_FILTER_BYTES_TO_CHECK_NUL));
return git_buf_text_is_binary(&content);
return git_buf_is_binary(&content);
}
int git_blob_filter_options_init(
......
/*
* Copyright (C) the libgit2 contributors. All rights reserved.
*
* This file is part of libgit2, distributed under the GNU GPL v2 with
* a Linking Exception. For full terms see the included COPYING file.
*/
#include "buf_text.h"
int git_buf_text_puts_escaped(
git_buf *buf,
const char *string,
const char *esc_chars,
const char *esc_with)
{
const char *scan;
size_t total = 0, esc_len = strlen(esc_with), count, alloclen;
if (!string)
return 0;
for (scan = string; *scan; ) {
/* count run of non-escaped characters */
count = strcspn(scan, esc_chars);
total += count;
scan += count;
/* count run of escaped characters */
count = strspn(scan, esc_chars);
total += count * (esc_len + 1);
scan += count;
}
GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, total, 1);
if (git_buf_grow_by(buf, alloclen) < 0)
return -1;
for (scan = string; *scan; ) {
count = strcspn(scan, esc_chars);
memmove(buf->ptr + buf->size, scan, count);
scan += count;
buf->size += count;
for (count = strspn(scan, esc_chars); count > 0; --count) {
/* copy escape sequence */
memmove(buf->ptr + buf->size, esc_with, esc_len);
buf->size += esc_len;
/* copy character to be escaped */
buf->ptr[buf->size] = *scan;
buf->size++;
scan++;
}
}
buf->ptr[buf->size] = '\0';
return 0;
}
void git_buf_text_unescape(git_buf *buf)
{
buf->size = git__unescape(buf->ptr);
}
int git_buf_text_crlf_to_lf(git_buf *tgt, const git_buf *src)
{
const char *scan = src->ptr;
const char *scan_end = src->ptr + src->size;
const char *next = memchr(scan, '\r', src->size);
size_t new_size;
char *out;
GIT_ASSERT(tgt != src);
if (!next)
return git_buf_set(tgt, src->ptr, src->size);
/* reduce reallocs while in the loop */
GIT_ERROR_CHECK_ALLOC_ADD(&new_size, src->size, 1);
if (git_buf_grow(tgt, new_size) < 0)
return -1;
out = tgt->ptr;
tgt->size = 0;
/* Find the next \r and copy whole chunk up to there to tgt */
for (; next; scan = next + 1, next = memchr(scan, '\r', scan_end - scan)) {
if (next > scan) {
size_t copylen = (size_t)(next - scan);
memcpy(out, scan, copylen);
out += copylen;
}
/* Do not drop \r unless it is followed by \n */
if (next + 1 == scan_end || next[1] != '\n')
*out++ = '\r';
}
/* Copy remaining input into dest */
if (scan < scan_end) {
size_t remaining = (size_t)(scan_end - scan);
memcpy(out, scan, remaining);
out += remaining;
}
tgt->size = (size_t)(out - tgt->ptr);
tgt->ptr[tgt->size] = '\0';
return 0;
}
int git_buf_text_lf_to_crlf(git_buf *tgt, const git_buf *src)
{
const char *start = src->ptr;
const char *end = start + src->size;
const char *scan = start;
const char *next = memchr(scan, '\n', src->size);
size_t alloclen;
GIT_ASSERT(tgt != src);
if (!next)
return git_buf_set(tgt, src->ptr, src->size);
/* attempt to reduce reallocs while in the loop */
GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, src->size, src->size >> 4);
GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, alloclen, 1);
if (git_buf_grow(tgt, alloclen) < 0)
return -1;
tgt->size = 0;
for (; next; scan = next + 1, next = memchr(scan, '\n', end - scan)) {
size_t copylen = next - scan;
/* if we find mixed line endings, carry on */
if (copylen && next[-1] == '\r')
copylen--;
GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, copylen, 3);
if (git_buf_grow_by(tgt, alloclen) < 0)
return -1;
if (copylen) {
memcpy(tgt->ptr + tgt->size, scan, copylen);
tgt->size += copylen;
}
tgt->ptr[tgt->size++] = '\r';
tgt->ptr[tgt->size++] = '\n';
}
tgt->ptr[tgt->size] = '\0';
return git_buf_put(tgt, scan, end - scan);
}
int git_buf_text_common_prefix(git_buf *buf, const git_strarray *strings)
{
size_t i;
const char *str, *pfx;
git_buf_clear(buf);
if (!strings || !strings->count)
return 0;
/* initialize common prefix to first string */
if (git_buf_sets(buf, strings->strings[0]) < 0)
return -1;
/* go through the rest of the strings, truncating to shared prefix */
for (i = 1; i < strings->count; ++i) {
for (str = strings->strings[i], pfx = buf->ptr;
*str && *str == *pfx; str++, pfx++)
/* scanning */;
git_buf_truncate(buf, pfx - buf->ptr);
if (!buf->size)
break;
}
return 0;
}
bool git_buf_text_is_binary(const git_buf *buf)
{
const char *scan = buf->ptr, *end = buf->ptr + buf->size;
git_bom_t bom;
int printable = 0, nonprintable = 0;
scan += git_buf_text_detect_bom(&bom, buf);
if (bom > GIT_BOM_UTF8)
return 1;
while (scan < end) {
unsigned char c = *scan++;
/* Printable characters are those above SPACE (0x1F) excluding DEL,
* and including BS, ESC and FF.
*/
if ((c > 0x1F && c != 127) || c == '\b' || c == '\033' || c == '\014')
printable++;
else if (c == '\0')
return true;
else if (!git__isspace(c))
nonprintable++;
}
return ((printable >> 7) < nonprintable);
}
bool git_buf_text_contains_nul(const git_buf *buf)
{
return (memchr(buf->ptr, '\0', buf->size) != NULL);
}
int git_buf_text_detect_bom(git_bom_t *bom, const git_buf *buf)
{
const char *ptr;
size_t len;
*bom = GIT_BOM_NONE;
/* need at least 2 bytes to look for any BOM */
if (buf->size < 2)
return 0;
ptr = buf->ptr;
len = buf->size;
switch (*ptr++) {
case 0:
if (len >= 4 && ptr[0] == 0 && ptr[1] == '\xFE' && ptr[2] == '\xFF') {
*bom = GIT_BOM_UTF32_BE;
return 4;
}
break;
case '\xEF':
if (len >= 3 && ptr[0] == '\xBB' && ptr[1] == '\xBF') {
*bom = GIT_BOM_UTF8;
return 3;
}
break;
case '\xFE':
if (*ptr == '\xFF') {
*bom = GIT_BOM_UTF16_BE;
return 2;
}
break;
case '\xFF':
if (*ptr != '\xFE')
break;
if (len >= 4 && ptr[1] == 0 && ptr[2] == 0) {
*bom = GIT_BOM_UTF32_LE;
return 4;
} else {
*bom = GIT_BOM_UTF16_LE;
return 2;
}
break;
default:
break;
}
return 0;
}
bool git_buf_text_gather_stats(
git_buf_text_stats *stats, const git_buf *buf, bool skip_bom)
{
const char *scan = buf->ptr, *end = buf->ptr + buf->size;
int skip;
memset(stats, 0, sizeof(*stats));
/* BOM detection */
skip = git_buf_text_detect_bom(&stats->bom, buf);
if (skip_bom)
scan += skip;
/* Ignore EOF character */
if (buf->size > 0 && end[-1] == '\032')
end--;
/* Counting loop */
while (scan < end) {
unsigned char c = *scan++;
if (c > 0x1F && c != 0x7F)
stats->printable++;
else switch (c) {
case '\0':
stats->nul++;
stats->nonprintable++;
break;
case '\n':
stats->lf++;
break;
case '\r':
stats->cr++;
if (scan < end && *scan == '\n')
stats->crlf++;
break;
case '\t': case '\f': case '\v': case '\b': case 0x1b: /*ESC*/
stats->printable++;
break;
default:
stats->nonprintable++;
break;
}
}
/* Treat files with a bare CR as binary */
return (stats->cr != stats->crlf || stats->nul > 0 ||
((stats->printable >> 7) < stats->nonprintable));
}
/*
* Copyright (C) the libgit2 contributors. All rights reserved.
*
* This file is part of libgit2, distributed under the GNU GPL v2 with
* a Linking Exception. For full terms see the included COPYING file.
*/
#ifndef INCLUDE_buf_text_h__
#define INCLUDE_buf_text_h__
#include "common.h"
#include "buffer.h"
typedef enum {
GIT_BOM_NONE = 0,
GIT_BOM_UTF8 = 1,
GIT_BOM_UTF16_LE = 2,
GIT_BOM_UTF16_BE = 3,
GIT_BOM_UTF32_LE = 4,
GIT_BOM_UTF32_BE = 5
} git_bom_t;
typedef struct {
git_bom_t bom; /* BOM found at head of text */
unsigned int nul, cr, lf, crlf; /* NUL, CR, LF and CRLF counts */
unsigned int printable, nonprintable; /* These are just approximations! */
} git_buf_text_stats;
/**
* Append string to buffer, prefixing each character from `esc_chars` with
* `esc_with` string.
*
* @param buf Buffer to append data to
* @param string String to escape and append
* @param esc_chars Characters to be escaped
* @param esc_with String to insert in from of each found character
* @return 0 on success, <0 on failure (probably allocation problem)
*/
extern int git_buf_text_puts_escaped(
git_buf *buf,
const char *string,
const char *esc_chars,
const char *esc_with);
/**
* Append string escaping characters that are regex special
*/
GIT_INLINE(int) git_buf_text_puts_escape_regex(git_buf *buf, const char *string)
{
return git_buf_text_puts_escaped(buf, string, "^.[]$()|*+?{}\\", "\\");
}
/**
* Unescape all characters in a buffer in place
*
* I.e. remove backslashes
*/
extern void git_buf_text_unescape(git_buf *buf);
/**
* Replace all \r\n with \n.
*
* @return 0 on success, -1 on memory error
*/
extern int git_buf_text_crlf_to_lf(git_buf *tgt, const git_buf *src);
/**
* Replace all \n with \r\n. Does not modify existing \r\n.
*
* @return 0 on success, -1 on memory error
*/
extern int git_buf_text_lf_to_crlf(git_buf *tgt, const git_buf *src);
/**
* Fill buffer with the common prefix of a array of strings
*
* Buffer will be set to empty if there is no common prefix
*/
extern int git_buf_text_common_prefix(git_buf *buf, const git_strarray *strs);
/**
* Check quickly if buffer looks like it contains binary data
*
* @param buf Buffer to check
* @return true if buffer looks like non-text data
*/
extern bool git_buf_text_is_binary(const git_buf *buf);
/**
* Check quickly if buffer contains a NUL byte
*
* @param buf Buffer to check
* @return true if buffer contains a NUL byte
*/
extern bool git_buf_text_contains_nul(const git_buf *buf);
/**
* Check if a buffer begins with a UTF BOM
*
* @param bom Set to the type of BOM detected or GIT_BOM_NONE
* @param buf Buffer in which to check the first bytes for a BOM
* @return Number of bytes of BOM data (or 0 if no BOM found)
*/
extern int git_buf_text_detect_bom(git_bom_t *bom, const git_buf *buf);
/**
* Gather stats for a piece of text
*
* Fill the `stats` structure with counts of unreadable characters, carriage
* returns, etc, so it can be used in heuristics. This automatically skips
* a trailing EOF (\032 character). Also it will look for a BOM at the
* start of the text and can be told to skip that as well.
*
* @param stats Structure to be filled in
* @param buf Text to process
* @param skip_bom Exclude leading BOM from stats if true
* @return Does the buffer heuristically look like binary data
*/
extern bool git_buf_text_gather_stats(
git_buf_text_stats *stats, const git_buf *buf, bool skip_bom);
#endif
......@@ -7,7 +7,6 @@
#include "buffer.h"
#include "posix.h"
#include "git2/buffer.h"
#include "buf_text.h"
#include <ctype.h>
/* Used as default value for git_buf->ptr so that people can always
......@@ -187,16 +186,6 @@ int git_buf_set(git_buf *buf, const void *data, size_t len)
return 0;
}
int git_buf_is_binary(const git_buf *buf)
{
return git_buf_text_is_binary(buf);
}
int git_buf_contains_nul(const git_buf *buf)
{
return git_buf_text_contains_nul(buf);
}
int git_buf_sets(git_buf *buf, const char *string)
{
return git_buf_set(buf, string, string ? strlen(string) : 0);
......@@ -1058,3 +1047,312 @@ invalid:
git_error_set(GIT_ERROR_INVALID, "invalid quoted line");
return -1;
}
int git_buf_puts_escaped(
git_buf *buf,
const char *string,
const char *esc_chars,
const char *esc_with)
{
const char *scan;
size_t total = 0, esc_len = strlen(esc_with), count, alloclen;
if (!string)
return 0;
for (scan = string; *scan; ) {
/* count run of non-escaped characters */
count = strcspn(scan, esc_chars);
total += count;
scan += count;
/* count run of escaped characters */
count = strspn(scan, esc_chars);
total += count * (esc_len + 1);
scan += count;
}
GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, total, 1);
if (git_buf_grow_by(buf, alloclen) < 0)
return -1;
for (scan = string; *scan; ) {
count = strcspn(scan, esc_chars);
memmove(buf->ptr + buf->size, scan, count);
scan += count;
buf->size += count;
for (count = strspn(scan, esc_chars); count > 0; --count) {
/* copy escape sequence */
memmove(buf->ptr + buf->size, esc_with, esc_len);
buf->size += esc_len;
/* copy character to be escaped */
buf->ptr[buf->size] = *scan;
buf->size++;
scan++;
}
}
buf->ptr[buf->size] = '\0';
return 0;
}
void git_buf_unescape(git_buf *buf)
{
buf->size = git__unescape(buf->ptr);
}
int git_buf_crlf_to_lf(git_buf *tgt, const git_buf *src)
{
const char *scan = src->ptr;
const char *scan_end = src->ptr + src->size;
const char *next = memchr(scan, '\r', src->size);
size_t new_size;
char *out;
GIT_ASSERT(tgt != src);
if (!next)
return git_buf_set(tgt, src->ptr, src->size);
/* reduce reallocs while in the loop */
GIT_ERROR_CHECK_ALLOC_ADD(&new_size, src->size, 1);
if (git_buf_grow(tgt, new_size) < 0)
return -1;
out = tgt->ptr;
tgt->size = 0;
/* Find the next \r and copy whole chunk up to there to tgt */
for (; next; scan = next + 1, next = memchr(scan, '\r', scan_end - scan)) {
if (next > scan) {
size_t copylen = (size_t)(next - scan);
memcpy(out, scan, copylen);
out += copylen;
}
/* Do not drop \r unless it is followed by \n */
if (next + 1 == scan_end || next[1] != '\n')
*out++ = '\r';
}
/* Copy remaining input into dest */
if (scan < scan_end) {
size_t remaining = (size_t)(scan_end - scan);
memcpy(out, scan, remaining);
out += remaining;
}
tgt->size = (size_t)(out - tgt->ptr);
tgt->ptr[tgt->size] = '\0';
return 0;
}
int git_buf_lf_to_crlf(git_buf *tgt, const git_buf *src)
{
const char *start = src->ptr;
const char *end = start + src->size;
const char *scan = start;
const char *next = memchr(scan, '\n', src->size);
size_t alloclen;
GIT_ASSERT(tgt != src);
if (!next)
return git_buf_set(tgt, src->ptr, src->size);
/* attempt to reduce reallocs while in the loop */
GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, src->size, src->size >> 4);
GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, alloclen, 1);
if (git_buf_grow(tgt, alloclen) < 0)
return -1;
tgt->size = 0;
for (; next; scan = next + 1, next = memchr(scan, '\n', end - scan)) {
size_t copylen = next - scan;
/* if we find mixed line endings, carry on */
if (copylen && next[-1] == '\r')
copylen--;
GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, copylen, 3);
if (git_buf_grow_by(tgt, alloclen) < 0)
return -1;
if (copylen) {
memcpy(tgt->ptr + tgt->size, scan, copylen);
tgt->size += copylen;
}
tgt->ptr[tgt->size++] = '\r';
tgt->ptr[tgt->size++] = '\n';
}
tgt->ptr[tgt->size] = '\0';
return git_buf_put(tgt, scan, end - scan);
}
int git_buf_common_prefix(git_buf *buf, const git_strarray *strings)
{
size_t i;
const char *str, *pfx;
git_buf_clear(buf);
if (!strings || !strings->count)
return 0;
/* initialize common prefix to first string */
if (git_buf_sets(buf, strings->strings[0]) < 0)
return -1;
/* go through the rest of the strings, truncating to shared prefix */
for (i = 1; i < strings->count; ++i) {
for (str = strings->strings[i], pfx = buf->ptr;
*str && *str == *pfx; str++, pfx++)
/* scanning */;
git_buf_truncate(buf, pfx - buf->ptr);
if (!buf->size)
break;
}
return 0;
}
int git_buf_is_binary(const git_buf *buf)
{
const char *scan = buf->ptr, *end = buf->ptr + buf->size;
git_buf_bom_t bom;
int printable = 0, nonprintable = 0;
scan += git_buf_detect_bom(&bom, buf);
if (bom > GIT_BUF_BOM_UTF8)
return 1;
while (scan < end) {
unsigned char c = *scan++;
/* Printable characters are those above SPACE (0x1F) excluding DEL,
* and including BS, ESC and FF.
*/
if ((c > 0x1F && c != 127) || c == '\b' || c == '\033' || c == '\014')
printable++;
else if (c == '\0')
return true;
else if (!git__isspace(c))
nonprintable++;
}
return ((printable >> 7) < nonprintable);
}
int git_buf_contains_nul(const git_buf *buf)
{
return (memchr(buf->ptr, '\0', buf->size) != NULL);
}
int git_buf_detect_bom(git_buf_bom_t *bom, const git_buf *buf)
{
const char *ptr;
size_t len;
*bom = GIT_BUF_BOM_NONE;
/* need at least 2 bytes to look for any BOM */
if (buf->size < 2)
return 0;
ptr = buf->ptr;
len = buf->size;
switch (*ptr++) {
case 0:
if (len >= 4 && ptr[0] == 0 && ptr[1] == '\xFE' && ptr[2] == '\xFF') {
*bom = GIT_BUF_BOM_UTF32_BE;
return 4;
}
break;
case '\xEF':
if (len >= 3 && ptr[0] == '\xBB' && ptr[1] == '\xBF') {
*bom = GIT_BUF_BOM_UTF8;
return 3;
}
break;
case '\xFE':
if (*ptr == '\xFF') {
*bom = GIT_BUF_BOM_UTF16_BE;
return 2;
}
break;
case '\xFF':
if (*ptr != '\xFE')
break;
if (len >= 4 && ptr[1] == 0 && ptr[2] == 0) {
*bom = GIT_BUF_BOM_UTF32_LE;
return 4;
} else {
*bom = GIT_BUF_BOM_UTF16_LE;
return 2;
}
break;
default:
break;
}
return 0;
}
bool git_buf_gather_text_stats(
git_buf_text_stats *stats, const git_buf *buf, bool skip_bom)
{
const char *scan = buf->ptr, *end = buf->ptr + buf->size;
int skip;
memset(stats, 0, sizeof(*stats));
/* BOM detection */
skip = git_buf_detect_bom(&stats->bom, buf);
if (skip_bom)
scan += skip;
/* Ignore EOF character */
if (buf->size > 0 && end[-1] == '\032')
end--;
/* Counting loop */
while (scan < end) {
unsigned char c = *scan++;
if (c > 0x1F && c != 0x7F)
stats->printable++;
else switch (c) {
case '\0':
stats->nul++;
stats->nonprintable++;
break;
case '\n':
stats->lf++;
break;
case '\r':
stats->cr++;
if (scan < end && *scan == '\n')
stats->crlf++;
break;
case '\t': case '\f': case '\v': case '\b': case 0x1b: /*ESC*/
stats->printable++;
break;
default:
stats->nonprintable++;
break;
}
}
/* Treat files with a bare CR as binary */
return (stats->cr != stats->crlf || stats->nul > 0 ||
((stats->printable >> 7) < stats->nonprintable));
}
......@@ -17,6 +17,21 @@
* } git_buf;
*/
typedef enum {
GIT_BUF_BOM_NONE = 0,
GIT_BUF_BOM_UTF8 = 1,
GIT_BUF_BOM_UTF16_LE = 2,
GIT_BUF_BOM_UTF16_BE = 3,
GIT_BUF_BOM_UTF32_LE = 4,
GIT_BUF_BOM_UTF32_BE = 5
} git_buf_bom_t;
typedef struct {
git_buf_bom_t bom; /* BOM found at head of text */
unsigned int nul, cr, lf, crlf; /* NUL, CR, LF and CRLF counts */
unsigned int printable, nonprintable; /* These are just approximations! */
} git_buf_text_stats;
extern char git_buf__initbuf[];
extern char git_buf__oom[];
......@@ -219,4 +234,81 @@ int git_buf_splice(
const char *data,
size_t nb_to_insert);
/**
* Append string to buffer, prefixing each character from `esc_chars` with
* `esc_with` string.
*
* @param buf Buffer to append data to
* @param string String to escape and append
* @param esc_chars Characters to be escaped
* @param esc_with String to insert in from of each found character
* @return 0 on success, <0 on failure (probably allocation problem)
*/
extern int git_buf_puts_escaped(
git_buf *buf,
const char *string,
const char *esc_chars,
const char *esc_with);
/**
* Append string escaping characters that are regex special
*/
GIT_INLINE(int) git_buf_puts_escape_regex(git_buf *buf, const char *string)
{
return git_buf_puts_escaped(buf, string, "^.[]$()|*+?{}\\", "\\");
}
/**
* Unescape all characters in a buffer in place
*
* I.e. remove backslashes
*/
extern void git_buf_unescape(git_buf *buf);
/**
* Replace all \r\n with \n.
*
* @return 0 on success, -1 on memory error
*/
extern int git_buf_crlf_to_lf(git_buf *tgt, const git_buf *src);
/**
* Replace all \n with \r\n. Does not modify existing \r\n.
*
* @return 0 on success, -1 on memory error
*/
extern int git_buf_lf_to_crlf(git_buf *tgt, const git_buf *src);
/**
* Fill buffer with the common prefix of a array of strings
*
* Buffer will be set to empty if there is no common prefix
*/
extern int git_buf_common_prefix(git_buf *buf, const git_strarray *strs);
/**
* Check if a buffer begins with a UTF BOM
*
* @param bom Set to the type of BOM detected or GIT_BOM_NONE
* @param buf Buffer in which to check the first bytes for a BOM
* @return Number of bytes of BOM data (or 0 if no BOM found)
*/
extern int git_buf_detect_bom(git_buf_bom_t *bom, const git_buf *buf);
/**
* Gather stats for a piece of text
*
* Fill the `stats` structure with counts of unreadable characters, carriage
* returns, etc, so it can be used in heuristics. This automatically skips
* a trailing EOF (\032 character). Also it will look for a BOM at the
* start of the text and can be told to skip that as well.
*
* @param stats Structure to be filled in
* @param buf Text to process
* @param skip_bom Exclude leading BOM from stats if true
* @return Does the buffer heuristically look like binary data
*/
extern bool git_buf_gather_text_stats(
git_buf_text_stats *stats, const git_buf *buf, bool skip_bom);
#endif
......@@ -26,7 +26,6 @@
#include "diff.h"
#include "diff_generate.h"
#include "pathspec.h"
#include "buf_text.h"
#include "diff_xdiff.h"
#include "path.h"
#include "attr.h"
......
......@@ -10,7 +10,6 @@
#include "git2/config.h"
#include "git2/sys/config.h"
#include "buf_text.h"
#include "config_backend.h"
#include "regexp.h"
#include "sysdir.h"
......@@ -1497,7 +1496,7 @@ int git_config_rename_section(
int error = 0;
struct rename_data data;
git_buf_text_puts_escape_regex(&pattern, old_section_name);
git_buf_puts_escape_regex(&pattern, old_section_name);
if ((error = git_buf_puts(&pattern, "\\..+")) < 0)
goto cleanup;
......
......@@ -7,8 +7,6 @@
#include "config_parse.h"
#include "buf_text.h"
#include <ctype.h>
const char *git_config_escapes = "ntb\"\\";
......@@ -230,10 +228,10 @@ fail_parse:
static int skip_bom(git_parse_ctx *parser)
{
git_buf buf = GIT_BUF_INIT_CONST(parser->content, parser->content_len);
git_bom_t bom;
int bom_offset = git_buf_text_detect_bom(&bom, &buf);
git_buf_bom_t bom;
int bom_offset = git_buf_detect_bom(&bom, &buf);
if (bom == GIT_BOM_UTF8)
if (bom == GIT_BUF_BOM_UTF8)
git_parse_advance_chars(parser, bom_offset);
/* TODO: reference implementation is pretty stupid with BoM */
......
......@@ -15,7 +15,6 @@
#include "futils.h"
#include "hash.h"
#include "filter.h"
#include "buf_text.h"
#include "repository.h"
typedef enum {
......@@ -219,7 +218,7 @@ static int crlf_apply_to_odb(
if (ca->crlf_action == GIT_CRLF_BINARY || !git_buf_len(from))
return GIT_PASSTHROUGH;
is_binary = git_buf_text_gather_stats(&stats, from, false);
is_binary = git_buf_gather_text_stats(&stats, from, false);
/* Heuristics to see if we can skip the conversion.
* Straight from Core Git.
......@@ -247,7 +246,7 @@ static int crlf_apply_to_odb(
return GIT_PASSTHROUGH;
/* Actually drop the carriage returns */
return git_buf_text_crlf_to_lf(to, from);
return git_buf_crlf_to_lf(to, from);
}
static int crlf_apply_to_workdir(
......@@ -262,7 +261,7 @@ static int crlf_apply_to_workdir(
if (git_buf_len(from) == 0 || output_eol(ca) != GIT_EOL_CRLF)
return GIT_PASSTHROUGH;
is_binary = git_buf_text_gather_stats(&stats, from, false);
is_binary = git_buf_gather_text_stats(&stats, from, false);
/* If there are no LFs, or all LFs are part of a CRLF, nothing to do */
if (stats.lf == 0 || stats.lf == stats.crlf)
......@@ -281,7 +280,7 @@ static int crlf_apply_to_workdir(
return GIT_PASSTHROUGH;
}
return git_buf_text_lf_to_crlf(to, from);
return git_buf_lf_to_crlf(to, from);
}
static int convert_attrs(
......
......@@ -13,7 +13,6 @@
#include "diff.h"
#include "strmap.h"
#include "map.h"
#include "buf_text.h"
#include "config.h"
#include "regexp.h"
#include "repository.h"
......@@ -428,8 +427,8 @@ int git_diff_driver_content_is_binary(
* let's just use the simple NUL-byte detection that core git uses.
*/
/* previously was: if (git_buf_text_is_binary(&search)) */
if (git_buf_text_contains_nul(&search))
/* previously was: if (git_buf_is_binary(&search)) */
if (git_buf_contains_nul(&search))
return 1;
return 0;
......
......@@ -10,7 +10,6 @@
#include "git2/sys/filter.h"
#include "filter.h"
#include "buffer.h"
#include "buf_text.h"
static int ident_find_id(
const char **id_start, const char **id_end, const char *start, size_t len)
......@@ -105,7 +104,7 @@ static int ident_apply(
GIT_UNUSED(self); GIT_UNUSED(payload);
/* Don't filter binary files */
if (git_buf_text_is_binary(from))
if (git_buf_is_binary(from))
return GIT_PASSTHROUGH;
if (git_filter_source_mode(src) == GIT_FILTER_SMUDGE)
......
......@@ -9,7 +9,6 @@
#include "git2/pathspec.h"
#include "git2/diff.h"
#include "buf_text.h"
#include "attr_file.h"
#include "iterator.h"
#include "repository.h"
......@@ -25,7 +24,7 @@ char *git_pathspec_prefix(const git_strarray *pathspec)
const char *scan;
if (!pathspec || !pathspec->count ||
git_buf_text_common_prefix(&prefix, pathspec) < 0)
git_buf_common_prefix(&prefix, pathspec) < 0)
return NULL;
/* diff prefix will only be leading non-wildcards */
......@@ -41,7 +40,7 @@ char *git_pathspec_prefix(const git_strarray *pathspec)
return NULL;
}
git_buf_text_unescape(&prefix);
git_buf_unescape(&prefix);
return git_buf_detach(&prefix);
}
......
......@@ -12,7 +12,6 @@
#include "git2/types.h"
#include "git2/index.h"
#include "buffer.h"
#include "buf_text.h"
#include "vector.h"
#include "posix.h"
#include "config_backend.h"
......
......@@ -3,7 +3,6 @@
#include "apply.h"
#include "repository.h"
#include "buf_text.h"
#include "../patch/patch_common.h"
......
......@@ -5,7 +5,6 @@
#include "patch.h"
#include "patch_parse.h"
#include "repository.h"
#include "buf_text.h"
#include "../patch/patch_common.h"
......
......@@ -3,7 +3,6 @@
#include "apply.h"
#include "repository.h"
#include "buf_text.h"
#include "../patch/patch_common.h"
......
#include "clar_libgit2.h"
#include "buffer.h"
#include "buf_text.h"
#include "git2/sys/hashsig.h"
#include "futils.h"
......@@ -644,37 +643,37 @@ void test_core_buffer__11(void)
t.strings = t1;
t.count = 3;
cl_git_pass(git_buf_text_common_prefix(&a, &t));
cl_git_pass(git_buf_common_prefix(&a, &t));
cl_assert_equal_s(a.ptr, "");
t.strings = t2;
t.count = 3;
cl_git_pass(git_buf_text_common_prefix(&a, &t));
cl_git_pass(git_buf_common_prefix(&a, &t));
cl_assert_equal_s(a.ptr, "some");
t.strings = t3;
t.count = 3;
cl_git_pass(git_buf_text_common_prefix(&a, &t));
cl_git_pass(git_buf_common_prefix(&a, &t));
cl_assert_equal_s(a.ptr, "");
t.strings = t4;
t.count = 3;
cl_git_pass(git_buf_text_common_prefix(&a, &t));
cl_git_pass(git_buf_common_prefix(&a, &t));
cl_assert_equal_s(a.ptr, "happ");
t.strings = t5;
t.count = 3;
cl_git_pass(git_buf_text_common_prefix(&a, &t));
cl_git_pass(git_buf_common_prefix(&a, &t));
cl_assert_equal_s(a.ptr, "happ");
t.strings = t6;
t.count = 3;
cl_git_pass(git_buf_text_common_prefix(&a, &t));
cl_git_pass(git_buf_common_prefix(&a, &t));
cl_assert_equal_s(a.ptr, "");
t.strings = t7;
t.count = 3;
cl_git_pass(git_buf_text_common_prefix(&a, &t));
cl_git_pass(git_buf_common_prefix(&a, &t));
cl_assert_equal_s(a.ptr, "");
git_buf_dispose(&a);
......@@ -709,19 +708,19 @@ void test_core_buffer__puts_escaped(void)
git_buf a = GIT_BUF_INIT;
git_buf_clear(&a);
cl_git_pass(git_buf_text_puts_escaped(&a, "this is a test", "", ""));
cl_git_pass(git_buf_puts_escaped(&a, "this is a test", "", ""));
cl_assert_equal_s("this is a test", a.ptr);
git_buf_clear(&a);
cl_git_pass(git_buf_text_puts_escaped(&a, "this is a test", "t", "\\"));
cl_git_pass(git_buf_puts_escaped(&a, "this is a test", "t", "\\"));
cl_assert_equal_s("\\this is a \\tes\\t", a.ptr);
git_buf_clear(&a);
cl_git_pass(git_buf_text_puts_escaped(&a, "this is a test", "i ", "__"));
cl_git_pass(git_buf_puts_escaped(&a, "this is a test", "i ", "__"));
cl_assert_equal_s("th__is__ __is__ a__ test", a.ptr);
git_buf_clear(&a);
cl_git_pass(git_buf_text_puts_escape_regex(&a, "^match\\s*[A-Z]+.*"));
cl_git_pass(git_buf_puts_escape_regex(&a, "^match\\s*[A-Z]+.*"));
cl_assert_equal_s("\\^match\\\\s\\*\\[A-Z\\]\\+\\.\\*", a.ptr);
git_buf_dispose(&a);
......@@ -731,7 +730,7 @@ static void assert_unescape(char *expected, char *to_unescape) {
git_buf buf = GIT_BUF_INIT;
cl_git_pass(git_buf_sets(&buf, to_unescape));
git_buf_text_unescape(&buf);
git_buf_unescape(&buf);
cl_assert_equal_s(expected, buf.ptr);
cl_assert_equal_sz(strlen(expected), buf.size);
......@@ -864,20 +863,20 @@ void test_core_buffer__classify_with_utf8(void)
git_buf b;
b.ptr = data0; b.size = b.asize = data0len;
cl_assert(!git_buf_text_is_binary(&b));
cl_assert(!git_buf_text_contains_nul(&b));
cl_assert(!git_buf_is_binary(&b));
cl_assert(!git_buf_contains_nul(&b));
b.ptr = data1; b.size = b.asize = data1len;
cl_assert(!git_buf_text_is_binary(&b));
cl_assert(!git_buf_text_contains_nul(&b));
cl_assert(!git_buf_is_binary(&b));
cl_assert(!git_buf_contains_nul(&b));
b.ptr = data2; b.size = b.asize = data2len;
cl_assert(git_buf_text_is_binary(&b));
cl_assert(git_buf_text_contains_nul(&b));
cl_assert(git_buf_is_binary(&b));
cl_assert(git_buf_contains_nul(&b));
b.ptr = data3; b.size = b.asize = data3len;
cl_assert(!git_buf_text_is_binary(&b));
cl_assert(!git_buf_text_contains_nul(&b));
cl_assert(!git_buf_is_binary(&b));
cl_assert(!git_buf_contains_nul(&b));
}
#define SIMILARITY_TEST_DATA_1 \
......@@ -1074,80 +1073,80 @@ void test_core_buffer__lf_and_crlf_conversions(void)
git_buf_sets(&src, "lf\nlf\nlf\nlf\n");
cl_git_pass(git_buf_text_lf_to_crlf(&tgt, &src));
cl_git_pass(git_buf_lf_to_crlf(&tgt, &src));
check_buf("lf\r\nlf\r\nlf\r\nlf\r\n", tgt);
cl_git_pass(git_buf_text_crlf_to_lf(&tgt, &src));
cl_git_pass(git_buf_crlf_to_lf(&tgt, &src));
check_buf(src.ptr, tgt);
git_buf_sets(&src, "\nlf\nlf\nlf\nlf\nlf");
cl_git_pass(git_buf_text_lf_to_crlf(&tgt, &src));
cl_git_pass(git_buf_lf_to_crlf(&tgt, &src));
check_buf("\r\nlf\r\nlf\r\nlf\r\nlf\r\nlf", tgt);
cl_git_pass(git_buf_text_crlf_to_lf(&tgt, &src));
cl_git_pass(git_buf_crlf_to_lf(&tgt, &src));
check_buf(src.ptr, tgt);
/* CRLF source */
git_buf_sets(&src, "crlf\r\ncrlf\r\ncrlf\r\ncrlf\r\n");
cl_git_pass(git_buf_text_lf_to_crlf(&tgt, &src));
cl_git_pass(git_buf_lf_to_crlf(&tgt, &src));
check_buf("crlf\r\ncrlf\r\ncrlf\r\ncrlf\r\n", tgt);
git_buf_sets(&src, "crlf\r\ncrlf\r\ncrlf\r\ncrlf\r\n");
cl_git_pass(git_buf_text_crlf_to_lf(&tgt, &src));
cl_git_pass(git_buf_crlf_to_lf(&tgt, &src));
check_buf("crlf\ncrlf\ncrlf\ncrlf\n", tgt);
git_buf_sets(&src, "\r\ncrlf\r\ncrlf\r\ncrlf\r\ncrlf\r\ncrlf");
cl_git_pass(git_buf_text_lf_to_crlf(&tgt, &src));
cl_git_pass(git_buf_lf_to_crlf(&tgt, &src));
check_buf("\r\ncrlf\r\ncrlf\r\ncrlf\r\ncrlf\r\ncrlf", tgt);
git_buf_sets(&src, "\r\ncrlf\r\ncrlf\r\ncrlf\r\ncrlf\r\ncrlf");
cl_git_pass(git_buf_text_crlf_to_lf(&tgt, &src));
cl_git_pass(git_buf_crlf_to_lf(&tgt, &src));
check_buf("\ncrlf\ncrlf\ncrlf\ncrlf\ncrlf", tgt);
/* CRLF in LF text */
git_buf_sets(&src, "\nlf\nlf\ncrlf\r\nlf\nlf\ncrlf\r\n");
cl_git_pass(git_buf_text_lf_to_crlf(&tgt, &src));
cl_git_pass(git_buf_lf_to_crlf(&tgt, &src));
check_buf("\r\nlf\r\nlf\r\ncrlf\r\nlf\r\nlf\r\ncrlf\r\n", tgt);
git_buf_sets(&src, "\nlf\nlf\ncrlf\r\nlf\nlf\ncrlf\r\n");
cl_git_pass(git_buf_text_crlf_to_lf(&tgt, &src));
cl_git_pass(git_buf_crlf_to_lf(&tgt, &src));
check_buf("\nlf\nlf\ncrlf\nlf\nlf\ncrlf\n", tgt);
/* LF in CRLF text */
git_buf_sets(&src, "\ncrlf\r\ncrlf\r\nlf\ncrlf\r\ncrlf");
cl_git_pass(git_buf_text_lf_to_crlf(&tgt, &src));
cl_git_pass(git_buf_lf_to_crlf(&tgt, &src));
check_buf("\r\ncrlf\r\ncrlf\r\nlf\r\ncrlf\r\ncrlf", tgt);
cl_git_pass(git_buf_text_crlf_to_lf(&tgt, &src));
cl_git_pass(git_buf_crlf_to_lf(&tgt, &src));
check_buf("\ncrlf\ncrlf\nlf\ncrlf\ncrlf", tgt);
/* bare CR test */
git_buf_sets(&src, "\rcrlf\r\nlf\nlf\ncr\rcrlf\r\nlf\ncr\r");
cl_git_pass(git_buf_text_lf_to_crlf(&tgt, &src));
cl_git_pass(git_buf_lf_to_crlf(&tgt, &src));
check_buf("\rcrlf\r\nlf\r\nlf\r\ncr\rcrlf\r\nlf\r\ncr\r", tgt);
git_buf_sets(&src, "\rcrlf\r\nlf\nlf\ncr\rcrlf\r\nlf\ncr\r");
cl_git_pass(git_buf_text_crlf_to_lf(&tgt, &src));
cl_git_pass(git_buf_crlf_to_lf(&tgt, &src));
check_buf("\rcrlf\nlf\nlf\ncr\rcrlf\nlf\ncr\r", tgt);
git_buf_sets(&src, "\rcr\r");
cl_git_pass(git_buf_text_lf_to_crlf(&tgt, &src));
cl_git_pass(git_buf_lf_to_crlf(&tgt, &src));
check_buf(src.ptr, tgt);
cl_git_pass(git_buf_text_crlf_to_lf(&tgt, &src));
cl_git_pass(git_buf_crlf_to_lf(&tgt, &src));
check_buf("\rcr\r", tgt);
git_buf_dispose(&src);
......@@ -1156,37 +1155,37 @@ void test_core_buffer__lf_and_crlf_conversions(void)
/* blob correspondence tests */
git_buf_sets(&src, ALL_CRLF_TEXT_RAW);
cl_git_pass(git_buf_text_lf_to_crlf(&tgt, &src));
cl_git_pass(git_buf_lf_to_crlf(&tgt, &src));
check_buf(ALL_CRLF_TEXT_AS_CRLF, tgt);
git_buf_sets(&src, ALL_CRLF_TEXT_RAW);
cl_git_pass(git_buf_text_crlf_to_lf(&tgt, &src));
cl_git_pass(git_buf_crlf_to_lf(&tgt, &src));
check_buf(ALL_CRLF_TEXT_AS_LF, tgt);
git_buf_dispose(&src);
git_buf_dispose(&tgt);
git_buf_sets(&src, ALL_LF_TEXT_RAW);
cl_git_pass(git_buf_text_lf_to_crlf(&tgt, &src));
cl_git_pass(git_buf_lf_to_crlf(&tgt, &src));
check_buf(ALL_LF_TEXT_AS_CRLF, tgt);
git_buf_sets(&src, ALL_LF_TEXT_RAW);
cl_git_pass(git_buf_text_crlf_to_lf(&tgt, &src));
cl_git_pass(git_buf_crlf_to_lf(&tgt, &src));
check_buf(ALL_LF_TEXT_AS_LF, tgt);
git_buf_dispose(&src);
git_buf_dispose(&tgt);
git_buf_sets(&src, MORE_CRLF_TEXT_RAW);
cl_git_pass(git_buf_text_lf_to_crlf(&tgt, &src));
cl_git_pass(git_buf_lf_to_crlf(&tgt, &src));
check_buf(MORE_CRLF_TEXT_AS_CRLF, tgt);
git_buf_sets(&src, MORE_CRLF_TEXT_RAW);
cl_git_pass(git_buf_text_crlf_to_lf(&tgt, &src));
cl_git_pass(git_buf_crlf_to_lf(&tgt, &src));
check_buf(MORE_CRLF_TEXT_AS_LF, tgt);
git_buf_dispose(&src);
git_buf_dispose(&tgt);
git_buf_sets(&src, MORE_LF_TEXT_RAW);
cl_git_pass(git_buf_text_lf_to_crlf(&tgt, &src));
cl_git_pass(git_buf_lf_to_crlf(&tgt, &src));
check_buf(MORE_LF_TEXT_AS_CRLF, tgt);
git_buf_sets(&src, MORE_LF_TEXT_RAW);
cl_git_pass(git_buf_text_crlf_to_lf(&tgt, &src));
cl_git_pass(git_buf_crlf_to_lf(&tgt, &src));
check_buf(MORE_LF_TEXT_AS_LF, tgt);
git_buf_dispose(&src);
git_buf_dispose(&tgt);
......
......@@ -4,7 +4,6 @@
#include "diff_helpers.h"
#include "diff.h"
#include "repository.h"
#include "buf_text.h"
static git_repository *g_repo = NULL;
......
#include "clar_libgit2.h"
#include "diff_helpers.h"
#include "buf_text.h"
static git_repository *g_repo = NULL;
......@@ -513,7 +512,7 @@ void test_diff_rename__working_directory_changes(void)
cl_git_pass(
git_futils_readbuffer(&old_content, "renames/songof7cities.txt"));
cl_git_pass(
git_buf_text_lf_to_crlf(&content, &old_content));
git_buf_lf_to_crlf(&content, &old_content));
cl_git_pass(
git_futils_writebuffer(&content, "renames/songof7cities.txt", 0, 0));
......
......@@ -2,7 +2,6 @@
#include "posix.h"
#include "blob.h"
#include "filter.h"
#include "buf_text.h"
#include "git2/sys/filter.h"
#include "git2/sys/repository.h"
#include "custom_helpers.h"
......
#include "clar_libgit2.h"
#include "posix.h"
#include "filter.h"
#include "buf_text.h"
#include "git2/sys/filter.h"
#define VERY_SECURE_ENCRYPTION(b) ((b) ^ 0xff)
......
......@@ -2,7 +2,6 @@
#include "posix.h"
#include "blob.h"
#include "filter.h"
#include "buf_text.h"
#include "git2/sys/filter.h"
#include "git2/sys/repository.h"
......
......@@ -2,7 +2,6 @@
#include "posix.h"
#include "blob.h"
#include "filter.h"
#include "buf_text.h"
#include "git2/sys/filter.h"
#include "git2/sys/repository.h"
#include "custom_helpers.h"
......
......@@ -2,7 +2,6 @@
#include "repository.h"
#include "git2/sys/repository.h"
#include "mailmap_testdata.h"
#include "buf_text.h"
static git_repository *g_repo;
static git_mailmap *g_mailmap;
......@@ -109,7 +108,7 @@ void test_mailmap_parsing__windows_string(void)
/* Parse with windows-style line endings */
git_buf_attach_notowned(&unixbuf, string_mailmap, strlen(string_mailmap));
cl_git_pass(git_buf_text_lf_to_crlf(&winbuf, &unixbuf));
cl_git_pass(git_buf_lf_to_crlf(&winbuf, &unixbuf));
cl_git_pass(git_mailmap_from_buffer(&g_mailmap, winbuf.ptr, winbuf.size));
git_buf_dispose(&winbuf);
......
#include "clar_libgit2.h"
#include "posix.h"
#include "blob.h"
#include "buf_text.h"
static git_repository *g_repo = NULL;
......@@ -44,9 +43,9 @@ static git_buf_text_stats g_crlf_filtered_stats[CRLF_NUM_TEST_OBJECTS] = {
{ 0, 0, 2, 2, 2, 6, 0 },
{ 0, 0, 4, 4, 1, 31, 0 },
{ 0, 1, 1, 2, 1, 9, 5 },
{ GIT_BOM_UTF8, 0, 0, 1, 0, 16, 0 },
{ GIT_BOM_UTF8, 0, 2, 2, 2, 27, 0 },
{ GIT_BOM_UTF16_BE, 5, 0, 0, 0, 7, 5 },
{ GIT_BUF_BOM_UTF8, 0, 0, 1, 0, 16, 0 },
{ GIT_BUF_BOM_UTF8, 0, 2, 2, 2, 27, 0 },
{ GIT_BUF_BOM_UTF16_BE, 5, 0, 0, 0, 7, 5 },
};
void test_object_blob_filter__initialize(void)
......@@ -97,7 +96,7 @@ void test_object_blob_filter__stats(void)
for (i = 0; i < CRLF_NUM_TEST_OBJECTS; i++) {
cl_git_pass(git_blob_lookup(&blob, g_repo, &g_crlf_oids[i]));
cl_git_pass(git_blob__getbuf(&buf, blob));
git_buf_text_gather_stats(&stats, &buf, false);
git_buf_gather_text_stats(&stats, &buf, false);
cl_assert_equal_i(
0, memcmp(&g_crlf_filtered_stats[i], &stats, sizeof(stats)));
git_blob_free(blob);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment