buf_text.h 3.39 KB
Newer Older
1
/*
Edward Thomson committed
2
 * Copyright (C) the libgit2 contributors. All rights reserved.
3 4 5 6 7 8 9
 *
 * This file is part of libgit2, distributed under the GNU GPL v2 with
 * a Linking Exception. For full terms see the included COPYING file.
 */
#ifndef INCLUDE_buf_text_h__
#define INCLUDE_buf_text_h__

10 11
#include "common.h"

12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
#include "buffer.h"

typedef enum {
	GIT_BOM_NONE = 0,
	GIT_BOM_UTF8 = 1,
	GIT_BOM_UTF16_LE = 2,
	GIT_BOM_UTF16_BE = 3,
	GIT_BOM_UTF32_LE = 4,
	GIT_BOM_UTF32_BE = 5
} git_bom_t;

typedef struct {
	git_bom_t bom; /* BOM found at head of text */
	unsigned int nul, cr, lf, crlf; /* NUL, CR, LF and CRLF counts */
	unsigned int printable, nonprintable; /* These are just approximations! */
} git_buf_text_stats;

/**
 * Append string to buffer, prefixing each character from `esc_chars` with
 * `esc_with` string.
 *
 * @param buf Buffer to append data to
 * @param string String to escape and append
 * @param esc_chars Characters to be escaped
 * @param esc_with String to insert in from of each found character
 * @return 0 on success, <0 on failure (probably allocation problem)
 */
extern int git_buf_text_puts_escaped(
	git_buf *buf,
	const char *string,
	const char *esc_chars,
	const char *esc_with);

/**
 * Append string escaping characters that are regex special
 */
GIT_INLINE(int) git_buf_text_puts_escape_regex(git_buf *buf, const char *string)
{
	return git_buf_text_puts_escaped(buf, string, "^.[]$()|*+?{}\\", "\\");
}

/**
 * Unescape all characters in a buffer in place
 *
 * I.e. remove backslashes
 */
extern void git_buf_text_unescape(git_buf *buf);

/**
61
 * Replace all \r\n with \n.
62
 *
63
 * @return 0 on success, -1 on memory error
64 65 66 67
 */
extern int git_buf_text_crlf_to_lf(git_buf *tgt, const git_buf *src);

/**
68
 * Replace all \n with \r\n. Does not modify existing \r\n.
69
 *
70
 * @return 0 on success, -1 on memory error
71 72 73 74
 */
extern int git_buf_text_lf_to_crlf(git_buf *tgt, const git_buf *src);

/**
75 76 77 78 79 80 81 82 83 84 85 86 87 88
 * Fill buffer with the common prefix of a array of strings
 *
 * Buffer will be set to empty if there is no common prefix
 */
extern int git_buf_text_common_prefix(git_buf *buf, const git_strarray *strs);

/**
 * Check quickly if buffer looks like it contains binary data
 *
 * @param buf Buffer to check
 * @return true if buffer looks like non-text data
 */
extern bool git_buf_text_is_binary(const git_buf *buf);

89 90 91 92 93 94 95 96
/**
 * Check quickly if buffer contains a NUL byte
 *
 * @param buf Buffer to check
 * @return true if buffer contains a NUL byte
 */
extern bool git_buf_text_contains_nul(const git_buf *buf);

97 98 99 100 101 102 103
/**
 * Check if a buffer begins with a UTF BOM
 *
 * @param bom Set to the type of BOM detected or GIT_BOM_NONE
 * @param buf Buffer in which to check the first bytes for a BOM
 * @return Number of bytes of BOM data (or 0 if no BOM found)
 */
104
extern int git_buf_text_detect_bom(git_bom_t *bom, const git_buf *buf);
105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122

/**
 * Gather stats for a piece of text
 *
 * Fill the `stats` structure with counts of unreadable characters, carriage
 * returns, etc, so it can be used in heuristics.  This automatically skips
 * a trailing EOF (\032 character).  Also it will look for a BOM at the
 * start of the text and can be told to skip that as well.
 *
 * @param stats Structure to be filled in
 * @param buf Text to process
 * @param skip_bom Exclude leading BOM from stats if true
 * @return Does the buffer heuristically look like binary data
 */
extern bool git_buf_text_gather_stats(
	git_buf_text_stats *stats, const git_buf *buf, bool skip_bom);

#endif