Unverified Commit 1d7c15ad by Edward Thomson Committed by GitHub

Merge pull request #4310 from pks-t/pks/common-parser

Common parser interface
parents 46e1dabb 9e66590b
......@@ -17,6 +17,7 @@
#include "git2/types.h"
#include "strmap.h"
#include "array.h"
#include "config_parse.h"
#include <ctype.h>
#include <sys/types.h>
......@@ -75,20 +76,6 @@ typedef struct git_config_file_iter {
(iter) && (((tmp) = CVAR_LIST_NEXT(iter) || 1));\
(iter) = (tmp))
struct config_file {
git_oid checksum;
char *path;
git_array_t(struct config_file) includes;
};
struct reader {
struct config_file *file;
git_buf buffer;
char *read_ptr;
int line_number;
int eof;
};
typedef struct {
git_atomic refcount;
git_strmap *values;
......@@ -107,6 +94,8 @@ typedef struct {
git_config_level_t level;
const git_repository *repo;
git_array_t(git_config_parser) readers;
bool locked;
git_filebuf locked_buf;
git_buf locked_content;
......@@ -120,19 +109,13 @@ typedef struct {
diskfile_backend *snapshot_from;
} diskfile_readonly_backend;
static int config_read(git_strmap *values, const git_repository *repo, struct config_file *file, git_config_level_t level, int depth);
static int config_read(git_strmap *values, const git_repository *repo, git_config_file *file, git_config_level_t level, int depth);
static int config_write(diskfile_backend *cfg, const char *orig_key, const char *key, const regex_t *preg, const char *value);
static char *escape_value(const char *ptr);
int git_config_file__snapshot(git_config_backend **out, diskfile_backend *in);
static int config_snapshot(git_config_backend **out, git_config_backend *in);
static void set_parse_error(struct reader *reader, int col, const char *error_str)
{
giterr_set(GITERR_CONFIG, "failed to parse config file: %s (in %s:%d, column %d)",
error_str, reader->file->path, reader->line_number, col);
}
static int config_error_readonly(void)
{
giterr_set(GITERR_CONFIG, "this backend is read-only");
......@@ -293,7 +276,6 @@ static int config_open(git_config_backend *cfg, git_config_level_t level, const
if ((res = refcounted_strmap_alloc(&b->header.values)) < 0)
return res;
/* It's fine if the file doesn't exist */
if (!git_path_exists(b->file.path))
return 0;
......@@ -307,11 +289,11 @@ static int config_open(git_config_backend *cfg, git_config_level_t level, const
static int config_is_modified(int *modified, struct config_file *file)
{
struct config_file *include;
git_config_file *include;
git_buf buf = GIT_BUF_INIT;
git_oid hash;
uint32_t i;
int error;
int error = 0;
*modified = 0;
......@@ -341,7 +323,7 @@ static int config_refresh(git_config_backend *cfg)
{
diskfile_backend *b = (diskfile_backend *)cfg;
refcounted_strmap *values = NULL, *tmp;
struct config_file *include;
git_config_file *include;
int error, modified;
uint32_t i;
......@@ -885,397 +867,6 @@ int git_config_file__snapshot(git_config_backend **out, diskfile_backend *in)
return 0;
}
static int reader_getchar_raw(struct reader *reader)
{
int c;
c = *reader->read_ptr++;
/*
Win 32 line breaks: if we find a \r\n sequence,
return only the \n as a newline
*/
if (c == '\r' && *reader->read_ptr == '\n') {
reader->read_ptr++;
c = '\n';
}
if (c == '\n')
reader->line_number++;
if (c == 0) {
reader->eof = 1;
c = '\0';
}
return c;
}
#define SKIP_WHITESPACE (1 << 1)
#define SKIP_COMMENTS (1 << 2)
static int reader_getchar(struct reader *reader, int flags)
{
const int skip_whitespace = (flags & SKIP_WHITESPACE);
const int skip_comments = (flags & SKIP_COMMENTS);
int c;
assert(reader->read_ptr);
do {
c = reader_getchar_raw(reader);
} while (c != '\n' && c != '\0' && skip_whitespace && git__isspace(c));
if (skip_comments && (c == '#' || c == ';')) {
do {
c = reader_getchar_raw(reader);
} while (c != '\n' && c != '\0');
}
return c;
}
/*
* Read the next char, but don't move the reading pointer.
*/
static int reader_peek(struct reader *reader, int flags)
{
void *old_read_ptr;
int old_lineno, old_eof;
int ret;
assert(reader->read_ptr);
old_read_ptr = reader->read_ptr;
old_lineno = reader->line_number;
old_eof = reader->eof;
ret = reader_getchar(reader, flags);
reader->read_ptr = old_read_ptr;
reader->line_number = old_lineno;
reader->eof = old_eof;
return ret;
}
/*
* Read and consume a line, returning it in newly-allocated memory.
*/
static char *reader_readline(struct reader *reader, bool skip_whitespace)
{
char *line = NULL;
char *line_src, *line_end;
size_t line_len, alloc_len;
line_src = reader->read_ptr;
if (skip_whitespace) {
/* Skip empty empty lines */
while (git__isspace(*line_src))
++line_src;
}
line_end = strchr(line_src, '\n');
/* no newline at EOF */
if (line_end == NULL)
line_end = strchr(line_src, 0);
line_len = line_end - line_src;
if (GIT_ADD_SIZET_OVERFLOW(&alloc_len, line_len, 1) ||
(line = git__malloc(alloc_len)) == NULL) {
return NULL;
}
memcpy(line, line_src, line_len);
do line[line_len] = '\0';
while (line_len-- > 0 && git__isspace(line[line_len]));
if (*line_end == '\n')
line_end++;
if (*line_end == '\0')
reader->eof = 1;
reader->line_number++;
reader->read_ptr = line_end;
return line;
}
/*
* Consume a line, without storing it anywhere
*/
static void reader_consume_line(struct reader *reader)
{
char *line_start, *line_end;
line_start = reader->read_ptr;
line_end = strchr(line_start, '\n');
/* No newline at EOF */
if(line_end == NULL){
line_end = strchr(line_start, '\0');
}
if (*line_end == '\n')
line_end++;
if (*line_end == '\0')
reader->eof = 1;
reader->line_number++;
reader->read_ptr = line_end;
}
GIT_INLINE(int) config_keychar(int c)
{
return isalnum(c) || c == '-';
}
static int parse_section_header_ext(struct reader *reader, const char *line, const char *base_name, char **section_name)
{
int c, rpos;
char *first_quote, *last_quote;
git_buf buf = GIT_BUF_INIT;
size_t quoted_len, alloc_len, base_name_len = strlen(base_name);
/*
* base_name is what came before the space. We should be at the
* first quotation mark, except for now, line isn't being kept in
* sync so we only really use it to calculate the length.
*/
first_quote = strchr(line, '"');
if (first_quote == NULL) {
set_parse_error(reader, 0, "Missing quotation marks in section header");
goto end_error;
}
last_quote = strrchr(line, '"');
quoted_len = last_quote - first_quote;
if (quoted_len == 0) {
set_parse_error(reader, 0, "Missing closing quotation mark in section header");
goto end_error;
}
GITERR_CHECK_ALLOC_ADD(&alloc_len, base_name_len, quoted_len);
GITERR_CHECK_ALLOC_ADD(&alloc_len, alloc_len, 2);
if (git_buf_grow(&buf, alloc_len) < 0 ||
git_buf_printf(&buf, "%s.", base_name) < 0)
goto end_error;
rpos = 0;
line = first_quote;
c = line[++rpos];
/*
* At the end of each iteration, whatever is stored in c will be
* added to the string. In case of error, jump to out
*/
do {
switch (c) {
case 0:
set_parse_error(reader, 0, "Unexpected end-of-line in section header");
goto end_error;
case '"':
goto end_parse;
case '\\':
c = line[++rpos];
if (c == 0) {
set_parse_error(reader, rpos, "Unexpected end-of-line in section header");
goto end_error;
}
default:
break;
}
git_buf_putc(&buf, (char)c);
c = line[++rpos];
} while (line + rpos < last_quote);
end_parse:
if (git_buf_oom(&buf))
goto end_error;
if (line[rpos] != '"' || line[rpos + 1] != ']') {
set_parse_error(reader, rpos, "Unexpected text after closing quotes");
git_buf_free(&buf);
return -1;
}
*section_name = git_buf_detach(&buf);
return 0;
end_error:
git_buf_free(&buf);
return -1;
}
static int parse_section_header(struct reader *reader, char **section_out)
{
char *name, *name_end;
int name_length, c, pos;
int result;
char *line;
size_t line_len;
line = reader_readline(reader, true);
if (line == NULL)
return -1;
/* find the end of the variable's name */
name_end = strrchr(line, ']');
if (name_end == NULL) {
git__free(line);
set_parse_error(reader, 0, "Missing ']' in section header");
return -1;
}
GITERR_CHECK_ALLOC_ADD(&line_len, (size_t)(name_end - line), 1);
name = git__malloc(line_len);
GITERR_CHECK_ALLOC(name);
name_length = 0;
pos = 0;
/* Make sure we were given a section header */
c = line[pos++];
assert(c == '[');
c = line[pos++];
do {
if (git__isspace(c)){
name[name_length] = '\0';
result = parse_section_header_ext(reader, line, name, section_out);
git__free(line);
git__free(name);
return result;
}
if (!config_keychar(c) && c != '.') {
set_parse_error(reader, pos, "Unexpected character in header");
goto fail_parse;
}
name[name_length++] = (char)git__tolower(c);
} while ((c = line[pos++]) != ']');
if (line[pos - 1] != ']') {
set_parse_error(reader, pos, "Unexpected end of file");
goto fail_parse;
}
git__free(line);
name[name_length] = 0;
*section_out = name;
return 0;
fail_parse:
git__free(line);
git__free(name);
return -1;
}
static int skip_bom(struct reader *reader)
{
git_bom_t bom;
int bom_offset = git_buf_text_detect_bom(&bom,
&reader->buffer, reader->read_ptr - reader->buffer.ptr);
if (bom == GIT_BOM_UTF8)
reader->read_ptr += bom_offset;
/* TODO: reference implementation is pretty stupid with BoM */
return 0;
}
/*
(* basic types *)
digit = "0".."9"
integer = digit { digit }
alphabet = "a".."z" + "A" .. "Z"
section_char = alphabet | "." | "-"
extension_char = (* any character except newline *)
any_char = (* any character *)
variable_char = "alphabet" | "-"
(* actual grammar *)
config = { section }
section = header { definition }
header = "[" section [subsection | subsection_ext] "]"
subsection = "." section
subsection_ext = "\"" extension "\""
section = section_char { section_char }
extension = extension_char { extension_char }
definition = variable_name ["=" variable_value] "\n"
variable_name = variable_char { variable_char }
variable_value = string | boolean | integer
string = quoted_string | plain_string
quoted_string = "\"" plain_string "\""
plain_string = { any_char }
boolean = boolean_true | boolean_false
boolean_true = "yes" | "1" | "true" | "on"
boolean_false = "no" | "0" | "false" | "off"
*/
static int strip_comments(char *line, int in_quotes)
{
int quote_count = in_quotes, backslash_count = 0;
char *ptr;
for (ptr = line; *ptr; ++ptr) {
if (ptr[0] == '"' && ptr > line && ptr[-1] != '\\')
quote_count++;
if ((ptr[0] == ';' || ptr[0] == '#') &&
(quote_count % 2) == 0 &&
(backslash_count % 2) == 0) {
ptr[0] = '\0';
break;
}
if (ptr[0] == '\\')
backslash_count++;
else
backslash_count = 0;
}
/* skip any space at the end */
while (ptr > line && git__isspace(ptr[-1])) {
ptr--;
}
ptr[0] = '\0';
return quote_count;
}
static int included_path(git_buf *out, const char *dir, const char *path)
{
/* From the user's home */
......@@ -1285,9 +876,6 @@ static int included_path(git_buf *out, const char *dir, const char *path)
return git_path_join_unrooted(out, path, dir, NULL);
}
static const char *escapes = "ntb\"\\";
static const char *escaped = "\n\t\b\"\\";
/* Escape the values to write them to the file */
static char *escape_value(const char *ptr)
{
......@@ -1305,9 +893,9 @@ static char *escape_value(const char *ptr)
return NULL;
while (*ptr != '\0') {
if ((esc = strchr(escaped, *ptr)) != NULL) {
if ((esc = strchr(git_config_escaped, *ptr)) != NULL) {
git_buf_putc(&buf, '\\');
git_buf_putc(&buf, escapes[esc - escaped]);
git_buf_putc(&buf, git_config_escapes[esc - git_config_escaped]);
} else {
git_buf_putc(&buf, *ptr);
}
......@@ -1322,254 +910,6 @@ static char *escape_value(const char *ptr)
return git_buf_detach(&buf);
}
/* '\"' -> '"' etc */
static int unescape_line(
char **out, bool *is_multi, const char *ptr, int quote_count)
{
char *str, *fixed, *esc;
size_t ptr_len = strlen(ptr), alloc_len;
*is_multi = false;
if (GIT_ADD_SIZET_OVERFLOW(&alloc_len, ptr_len, 1) ||
(str = git__malloc(alloc_len)) == NULL) {
return -1;
}
fixed = str;
while (*ptr != '\0') {
if (*ptr == '"') {
quote_count++;
} else if (*ptr != '\\') {
*fixed++ = *ptr;
} else {
/* backslash, check the next char */
ptr++;
/* if we're at the end, it's a multiline, so keep the backslash */
if (*ptr == '\0') {
*is_multi = true;
goto done;
}
if ((esc = strchr(escapes, *ptr)) != NULL) {
*fixed++ = escaped[esc - escapes];
} else {
git__free(str);
giterr_set(GITERR_CONFIG, "invalid escape at %s", ptr);
return -1;
}
}
ptr++;
}
done:
*fixed = '\0';
*out = str;
return 0;
}
static int parse_multiline_variable(struct reader *reader, git_buf *value, int in_quotes)
{
char *line = NULL, *proc_line = NULL;
int quote_count;
bool multiline;
/* Check that the next line exists */
line = reader_readline(reader, false);
if (line == NULL)
return -1;
/* We've reached the end of the file, there is no continuation.
* (this is not an error).
*/
if (line[0] == '\0') {
git__free(line);
return 0;
}
quote_count = strip_comments(line, !!in_quotes);
/* If it was just a comment, pretend it didn't exist */
if (line[0] == '\0') {
git__free(line);
return parse_multiline_variable(reader, value, quote_count);
/* TODO: unbounded recursion. This **could** be exploitable */
}
if (unescape_line(&proc_line, &multiline, line, in_quotes) < 0) {
git__free(line);
return -1;
}
/* add this line to the multiline var */
git_buf_puts(value, proc_line);
git__free(line);
git__free(proc_line);
/*
* If we need to continue reading the next line, let's just
* keep putting stuff in the buffer
*/
if (multiline)
return parse_multiline_variable(reader, value, quote_count);
return 0;
}
GIT_INLINE(bool) is_namechar(char c)
{
return isalnum(c) || c == '-';
}
static int parse_name(
char **name, const char **value, struct reader *reader, const char *line)
{
const char *name_end = line, *value_start;
*name = NULL;
*value = NULL;
while (*name_end && is_namechar(*name_end))
name_end++;
if (line == name_end) {
set_parse_error(reader, 0, "Invalid configuration key");
return -1;
}
value_start = name_end;
while (*value_start && git__isspace(*value_start))
value_start++;
if (*value_start == '=') {
*value = value_start + 1;
} else if (*value_start) {
set_parse_error(reader, 0, "Invalid configuration key");
return -1;
}
if ((*name = git__strndup(line, name_end - line)) == NULL)
return -1;
return 0;
}
static int parse_variable(struct reader *reader, char **var_name, char **var_value)
{
const char *value_start = NULL;
char *line;
int quote_count;
bool multiline;
line = reader_readline(reader, true);
if (line == NULL)
return -1;
quote_count = strip_comments(line, 0);
/* If there is no value, boolean true is assumed */
*var_value = NULL;
if (parse_name(var_name, &value_start, reader, line) < 0)
goto on_error;
/*
* Now, let's try to parse the value
*/
if (value_start != NULL) {
while (git__isspace(value_start[0]))
value_start++;
if (unescape_line(var_value, &multiline, value_start, 0) < 0)
goto on_error;
if (multiline) {
git_buf multi_value = GIT_BUF_INIT;
git_buf_attach(&multi_value, *var_value, 0);
if (parse_multiline_variable(reader, &multi_value, quote_count) < 0 ||
git_buf_oom(&multi_value)) {
git_buf_free(&multi_value);
goto on_error;
}
*var_value = git_buf_detach(&multi_value);
}
}
git__free(line);
return 0;
on_error:
git__free(*var_name);
git__free(line);
return -1;
}
static int config_parse(
struct reader *reader,
int (*on_section)(struct reader *reader, const char *current_section, const char *line, size_t line_len, void *data),
int (*on_variable)(struct reader *reader, const char *current_section, char *var_name, char *var_value, const char *line, size_t line_len, void *data),
int (*on_comment)(struct reader *reader, const char *line, size_t line_len, void *data),
int (*on_eof)(struct reader *reader, const char *current_section, void *data),
void *data)
{
char *current_section = NULL, *var_name, *var_value, *line_start;
char c;
size_t line_len;
int result = 0;
skip_bom(reader);
while (result == 0 && !reader->eof) {
line_start = reader->read_ptr;
c = reader_peek(reader, SKIP_WHITESPACE);
switch (c) {
case '\0': /* EOF when peeking, set EOF in the reader to exit the loop */
reader->eof = 1;
break;
case '[': /* section header, new section begins */
git__free(current_section);
current_section = NULL;
if ((result = parse_section_header(reader, &current_section)) == 0 && on_section) {
line_len = reader->read_ptr - line_start;
result = on_section(reader, current_section, line_start, line_len, data);
}
break;
case '\n': /* comment or whitespace-only */
case ';':
case '#':
reader_consume_line(reader);
if (on_comment) {
line_len = reader->read_ptr - line_start;
result = on_comment(reader, line_start, line_len, data);
}
break;
default: /* assume variable declaration */
if ((result = parse_variable(reader, &var_name, &var_value)) == 0 && on_variable) {
line_len = reader->read_ptr - line_start;
result = on_variable(reader, current_section, var_name, var_value, line_start, line_len, data);
}
break;
}
}
if (on_eof)
result = on_eof(reader, current_section, data);
git__free(current_section);
return result;
}
struct parse_data {
const git_repository *repo;
const char *file_path;
......@@ -1578,7 +918,7 @@ struct parse_data {
int depth;
};
static int parse_include(struct reader *reader,
static int parse_include(git_config_parser *reader,
struct parse_data *parse_data, const char *file)
{
struct config_file *include;
......@@ -1680,7 +1020,7 @@ static const struct {
{ "gitdir/i:", conditional_match_gitdir_i }
};
static int parse_conditional_include(struct reader *reader,
static int parse_conditional_include(git_config_parser *reader,
struct parse_data *parse_data, const char *section, const char *file)
{
char *condition;
......@@ -1714,7 +1054,7 @@ static int parse_conditional_include(struct reader *reader,
}
static int read_on_variable(
struct reader *reader,
git_config_parser *reader,
const char *current_section,
char *var_name,
char *var_value,
......@@ -1769,12 +1109,13 @@ static int read_on_variable(
static int config_read(
git_strmap *values,
const git_repository *repo,
struct config_file *file,
git_config_file *file,
git_config_level_t level,
int depth)
{
struct parse_data parse_data;
struct reader reader;
git_config_parser reader;
git_buf contents = GIT_BUF_INIT;
int error;
if (depth >= MAX_INCLUDE_DEPTH) {
......@@ -1782,22 +1123,20 @@ static int config_read(
return -1;
}
git_buf_init(&reader.buffer, 0);
if ((error = git_futils_readbuffer(&reader.buffer, file->path)) < 0)
if ((error = git_futils_readbuffer(&contents, file->path)) < 0)
goto out;
if ((error = git_hash_buf(&file->checksum, reader.buffer.ptr, reader.buffer.size)) < 0)
git_parse_ctx_init(&reader.ctx, contents.ptr, contents.size);
if ((error = git_hash_buf(&file->checksum, contents.ptr, contents.size)) < 0)
goto out;
/* Initialize the reading position */
reader.file = file;
reader.line_number = 0;
reader.read_ptr = reader.buffer.ptr;
reader.eof = 0;
git_parse_ctx_init(&reader.ctx, contents.ptr, contents.size);
/* If the file is empty, there's nothing for us to do */
if (*reader.read_ptr == '\0')
if (!reader.ctx.content || *reader.ctx.content == '\0')
goto out;
parse_data.repo = repo;
......@@ -1806,10 +1145,10 @@ static int config_read(
parse_data.level = level;
parse_data.depth = depth;
error = config_parse(&reader, NULL, read_on_variable, NULL, NULL, &parse_data);
error = git_config_parse(&reader, NULL, read_on_variable, NULL, NULL, &parse_data);
out:
git_buf_free(&reader.buffer);
git_buf_free(&contents);
return error;
}
......@@ -1909,7 +1248,7 @@ static int write_value(struct write_data *write_data)
}
static int write_on_section(
struct reader *reader,
git_config_parser *reader,
const char *current_section,
const char *line,
size_t line_len,
......@@ -1945,7 +1284,7 @@ static int write_on_section(
}
static int write_on_variable(
struct reader *reader,
git_config_parser *reader,
const char *current_section,
char *var_name,
char *var_value,
......@@ -1995,7 +1334,7 @@ static int write_on_variable(
return write_value(write_data);
}
static int write_on_comment(struct reader *reader, const char *line, size_t line_len, void *data)
static int write_on_comment(git_config_parser *reader, const char *line, size_t line_len, void *data)
{
struct write_data *write_data;
......@@ -2006,7 +1345,7 @@ static int write_on_comment(struct reader *reader, const char *line, size_t line
}
static int write_on_eof(
struct reader *reader, const char *current_section, void *data)
git_config_parser *reader, const char *current_section, void *data)
{
struct write_data *write_data = (struct write_data *)data;
int result = 0;
......@@ -2044,36 +1383,30 @@ static int config_write(diskfile_backend *cfg, const char *orig_key, const char
int result;
char *orig_section, *section, *orig_name, *name, *ldot;
git_filebuf file = GIT_FILEBUF_INIT;
git_buf buf = GIT_BUF_INIT;
struct reader reader;
git_buf buf = GIT_BUF_INIT, contents = GIT_BUF_INIT;
git_config_parser reader;
struct write_data write_data;
memset(&reader, 0, sizeof(reader));
git_buf_init(&reader.buffer, 0);
reader.file = &cfg->file;
if (cfg->locked) {
result = git_buf_puts(&reader.buffer, git_buf_cstr(&cfg->locked_content));
result = git_buf_puts(&contents, git_buf_cstr(&cfg->locked_content));
} else {
/* Lock the file */
if ((result = git_filebuf_open(
&file, cfg->file.path, GIT_FILEBUF_HASH_CONTENTS, GIT_CONFIG_FILE_MODE)) < 0) {
git_buf_free(&reader.buffer);
git_buf_free(&contents);
return result;
}
/* We need to read in our own config file */
result = git_futils_readbuffer(&reader.buffer, cfg->file.path);
result = git_futils_readbuffer(&contents, cfg->file.path);
}
/* Initialise the reading position */
if (result == GIT_ENOTFOUND) {
reader.read_ptr = NULL;
reader.eof = 1;
git_buf_clear(&reader.buffer);
} else if (result == 0) {
reader.read_ptr = reader.buffer.ptr;
reader.eof = 0;
if (result == 0 || result == GIT_ENOTFOUND) {
git_parse_ctx_init(&reader.ctx, contents.ptr, contents.size);
} else {
git_filebuf_cleanup(&file);
return -1; /* OS error when reading the file */
......@@ -2100,7 +1433,12 @@ static int config_write(diskfile_backend *cfg, const char *orig_key, const char
write_data.preg = preg;
write_data.value = value;
result = config_parse(&reader, write_on_section, write_on_variable, write_on_comment, write_on_eof, &write_data);
result = git_config_parse(&reader,
write_on_section,
write_on_variable,
write_on_comment,
write_on_eof,
&write_data);
git__free(section);
git__free(orig_section);
git_buf_free(&write_data.buffered_comment);
......@@ -2122,6 +1460,7 @@ static int config_write(diskfile_backend *cfg, const char *orig_key, const char
done:
git_buf_free(&buf);
git_buf_free(&reader.buffer);
git_buf_free(&contents);
git_parse_ctx_clear(&reader.ctx);
return result;
}
/*
* Copyright (C) the libgit2 contributors. All rights reserved.
*
* This file is part of libgit2, distributed under the GNU GPL v2 with
* a Linking Exception. For full terms see the included COPYING file.
*/
#include "config_parse.h"
#include "buf_text.h"
#include <ctype.h>
static void set_parse_error(git_config_parser *reader, int col, const char *error_str)
{
giterr_set(GITERR_CONFIG, "failed to parse config file: %s (in %s:%"PRIuZ", column %d)",
error_str, reader->file->path, reader->ctx.line_num, col);
}
GIT_INLINE(int) config_keychar(int c)
{
return isalnum(c) || c == '-';
}
static int strip_comments(char *line, int in_quotes)
{
int quote_count = in_quotes, backslash_count = 0;
char *ptr;
for (ptr = line; *ptr; ++ptr) {
if (ptr[0] == '"' && ptr > line && ptr[-1] != '\\')
quote_count++;
if ((ptr[0] == ';' || ptr[0] == '#') &&
(quote_count % 2) == 0 &&
(backslash_count % 2) == 0) {
ptr[0] = '\0';
break;
}
if (ptr[0] == '\\')
backslash_count++;
else
backslash_count = 0;
}
/* skip any space at the end */
while (ptr > line && git__isspace(ptr[-1])) {
ptr--;
}
ptr[0] = '\0';
return quote_count;
}
static int parse_section_header_ext(git_config_parser *reader, const char *line, const char *base_name, char **section_name)
{
int c, rpos;
char *first_quote, *last_quote;
git_buf buf = GIT_BUF_INIT;
size_t quoted_len, alloc_len, base_name_len = strlen(base_name);
/*
* base_name is what came before the space. We should be at the
* first quotation mark, except for now, line isn't being kept in
* sync so we only really use it to calculate the length.
*/
first_quote = strchr(line, '"');
if (first_quote == NULL) {
set_parse_error(reader, 0, "Missing quotation marks in section header");
goto end_error;
}
last_quote = strrchr(line, '"');
quoted_len = last_quote - first_quote;
if (quoted_len == 0) {
set_parse_error(reader, 0, "Missing closing quotation mark in section header");
goto end_error;
}
GITERR_CHECK_ALLOC_ADD(&alloc_len, base_name_len, quoted_len);
GITERR_CHECK_ALLOC_ADD(&alloc_len, alloc_len, 2);
if (git_buf_grow(&buf, alloc_len) < 0 ||
git_buf_printf(&buf, "%s.", base_name) < 0)
goto end_error;
rpos = 0;
line = first_quote;
c = line[++rpos];
/*
* At the end of each iteration, whatever is stored in c will be
* added to the string. In case of error, jump to out
*/
do {
switch (c) {
case 0:
set_parse_error(reader, 0, "Unexpected end-of-line in section header");
goto end_error;
case '"':
goto end_parse;
case '\\':
c = line[++rpos];
if (c == 0) {
set_parse_error(reader, rpos, "Unexpected end-of-line in section header");
goto end_error;
}
default:
break;
}
git_buf_putc(&buf, (char)c);
c = line[++rpos];
} while (line + rpos < last_quote);
end_parse:
if (git_buf_oom(&buf))
goto end_error;
if (line[rpos] != '"' || line[rpos + 1] != ']') {
set_parse_error(reader, rpos, "Unexpected text after closing quotes");
git_buf_free(&buf);
return -1;
}
*section_name = git_buf_detach(&buf);
return 0;
end_error:
git_buf_free(&buf);
return -1;
}
static int parse_section_header(git_config_parser *reader, char **section_out)
{
char *name, *name_end;
int name_length, c, pos;
int result;
char *line;
size_t line_len;
git_parse_advance_ws(&reader->ctx);
line = git__strndup(reader->ctx.line, reader->ctx.line_len);
if (line == NULL)
return -1;
/* find the end of the variable's name */
name_end = strrchr(line, ']');
if (name_end == NULL) {
git__free(line);
set_parse_error(reader, 0, "Missing ']' in section header");
return -1;
}
GITERR_CHECK_ALLOC_ADD(&line_len, (size_t)(name_end - line), 1);
name = git__malloc(line_len);
GITERR_CHECK_ALLOC(name);
name_length = 0;
pos = 0;
/* Make sure we were given a section header */
c = line[pos++];
assert(c == '[');
c = line[pos++];
do {
if (git__isspace(c)){
name[name_length] = '\0';
result = parse_section_header_ext(reader, line, name, section_out);
git__free(line);
git__free(name);
return result;
}
if (!config_keychar(c) && c != '.') {
set_parse_error(reader, pos, "Unexpected character in header");
goto fail_parse;
}
name[name_length++] = (char)git__tolower(c);
} while ((c = line[pos++]) != ']');
if (line[pos - 1] != ']') {
set_parse_error(reader, pos, "Unexpected end of file");
goto fail_parse;
}
git__free(line);
name[name_length] = 0;
*section_out = name;
return 0;
fail_parse:
git__free(line);
git__free(name);
return -1;
}
static int skip_bom(git_parse_ctx *parser)
{
git_buf buf = GIT_BUF_INIT_CONST(parser->content, parser->content_len);
git_bom_t bom;
int bom_offset = git_buf_text_detect_bom(&bom, &buf, parser->content_len);
if (bom == GIT_BOM_UTF8)
git_parse_advance_chars(parser, bom_offset);
/* TODO: reference implementation is pretty stupid with BoM */
return 0;
}
/*
(* basic types *)
digit = "0".."9"
integer = digit { digit }
alphabet = "a".."z" + "A" .. "Z"
section_char = alphabet | "." | "-"
extension_char = (* any character except newline *)
any_char = (* any character *)
variable_char = "alphabet" | "-"
(* actual grammar *)
config = { section }
section = header { definition }
header = "[" section [subsection | subsection_ext] "]"
subsection = "." section
subsection_ext = "\"" extension "\""
section = section_char { section_char }
extension = extension_char { extension_char }
definition = variable_name ["=" variable_value] "\n"
variable_name = variable_char { variable_char }
variable_value = string | boolean | integer
string = quoted_string | plain_string
quoted_string = "\"" plain_string "\""
plain_string = { any_char }
boolean = boolean_true | boolean_false
boolean_true = "yes" | "1" | "true" | "on"
boolean_false = "no" | "0" | "false" | "off"
*/
/* '\"' -> '"' etc */
static int unescape_line(
char **out, bool *is_multi, const char *ptr, int quote_count)
{
char *str, *fixed, *esc;
size_t ptr_len = strlen(ptr), alloc_len;
*is_multi = false;
if (GIT_ADD_SIZET_OVERFLOW(&alloc_len, ptr_len, 1) ||
(str = git__malloc(alloc_len)) == NULL) {
return -1;
}
fixed = str;
while (*ptr != '\0') {
if (*ptr == '"') {
quote_count++;
} else if (*ptr != '\\') {
*fixed++ = *ptr;
} else {
/* backslash, check the next char */
ptr++;
/* if we're at the end, it's a multiline, so keep the backslash */
if (*ptr == '\0') {
*is_multi = true;
goto done;
}
if ((esc = strchr(git_config_escapes, *ptr)) != NULL) {
*fixed++ = git_config_escaped[esc - git_config_escapes];
} else {
git__free(str);
giterr_set(GITERR_CONFIG, "invalid escape at %s", ptr);
return -1;
}
}
ptr++;
}
done:
*fixed = '\0';
*out = str;
return 0;
}
static int parse_multiline_variable(git_config_parser *reader, git_buf *value, int in_quotes)
{
char *line = NULL, *proc_line = NULL;
int quote_count;
bool multiline;
/* Check that the next line exists */
git_parse_advance_line(&reader->ctx);
line = git__strndup(reader->ctx.line, reader->ctx.line_len);
if (line == NULL)
return -1;
/* We've reached the end of the file, there is no continuation.
* (this is not an error).
*/
if (line[0] == '\0') {
git__free(line);
return 0;
}
quote_count = strip_comments(line, !!in_quotes);
/* If it was just a comment, pretend it didn't exist */
if (line[0] == '\0') {
git__free(line);
return parse_multiline_variable(reader, value, quote_count);
/* TODO: unbounded recursion. This **could** be exploitable */
}
if (unescape_line(&proc_line, &multiline, line, in_quotes) < 0) {
git__free(line);
return -1;
}
/* add this line to the multiline var */
git_buf_puts(value, proc_line);
git__free(line);
git__free(proc_line);
/*
* If we need to continue reading the next line, let's just
* keep putting stuff in the buffer
*/
if (multiline)
return parse_multiline_variable(reader, value, quote_count);
return 0;
}
GIT_INLINE(bool) is_namechar(char c)
{
return isalnum(c) || c == '-';
}
static int parse_name(
char **name, const char **value, git_config_parser *reader, const char *line)
{
const char *name_end = line, *value_start;
*name = NULL;
*value = NULL;
while (*name_end && is_namechar(*name_end))
name_end++;
if (line == name_end) {
set_parse_error(reader, 0, "Invalid configuration key");
return -1;
}
value_start = name_end;
while (*value_start && git__isspace(*value_start))
value_start++;
if (*value_start == '=') {
*value = value_start + 1;
} else if (*value_start) {
set_parse_error(reader, 0, "Invalid configuration key");
return -1;
}
if ((*name = git__strndup(line, name_end - line)) == NULL)
return -1;
return 0;
}
static int parse_variable(git_config_parser *reader, char **var_name, char **var_value)
{
const char *value_start = NULL;
char *line;
int quote_count;
bool multiline;
git_parse_advance_ws(&reader->ctx);
line = git__strndup(reader->ctx.line, reader->ctx.line_len);
if (line == NULL)
return -1;
quote_count = strip_comments(line, 0);
/* If there is no value, boolean true is assumed */
*var_value = NULL;
if (parse_name(var_name, &value_start, reader, line) < 0)
goto on_error;
/*
* Now, let's try to parse the value
*/
if (value_start != NULL) {
while (git__isspace(value_start[0]))
value_start++;
if (unescape_line(var_value, &multiline, value_start, 0) < 0)
goto on_error;
if (multiline) {
git_buf multi_value = GIT_BUF_INIT;
git_buf_attach(&multi_value, *var_value, 0);
if (parse_multiline_variable(reader, &multi_value, quote_count) < 0 ||
git_buf_oom(&multi_value)) {
git_buf_free(&multi_value);
goto on_error;
}
*var_value = git_buf_detach(&multi_value);
}
}
git__free(line);
return 0;
on_error:
git__free(*var_name);
git__free(line);
return -1;
}
int git_config_parse(
git_config_parser *parser,
git_config_parser_section_cb on_section,
git_config_parser_variable_cb on_variable,
git_config_parser_comment_cb on_comment,
git_config_parser_eof_cb on_eof,
void *data)
{
git_parse_ctx *ctx;
char *current_section = NULL, *var_name, *var_value;
int result = 0;
ctx = &parser->ctx;
skip_bom(ctx);
for (; ctx->remain_len > 0; git_parse_advance_line(ctx)) {
const char *line_start = parser->ctx.line;
size_t line_len = parser->ctx.line_len;
char c;
if (git_parse_peek(&c, ctx, GIT_PARSE_PEEK_SKIP_WHITESPACE) < 0 &&
git_parse_peek(&c, ctx, 0) < 0)
continue;
switch (c) {
case '[': /* section header, new section begins */
git__free(current_section);
current_section = NULL;
if ((result = parse_section_header(parser, &current_section)) == 0 && on_section) {
result = on_section(parser, current_section, line_start, line_len, data);
}
break;
case '\n': /* comment or whitespace-only */
case ' ':
case '\t':
case ';':
case '#':
if (on_comment) {
result = on_comment(parser, line_start, line_len, data);
}
break;
default: /* assume variable declaration */
if ((result = parse_variable(parser, &var_name, &var_value)) == 0 && on_variable) {
result = on_variable(parser, current_section, var_name, var_value, line_start, line_len, data);
}
break;
}
if (result < 0)
goto out;
}
if (on_eof)
result = on_eof(parser, current_section, data);
out:
git__free(current_section);
return result;
}
/*
* Copyright (C) the libgit2 contributors. All rights reserved.
*
* This file is part of libgit2, distributed under the GNU GPL v2 with
* a Linking Exception. For full terms see the included COPYING file.
*/
#include "common.h"
#include "array.h"
#include "oid.h"
#include "parse.h"
static const char *git_config_escapes = "ntb\"\\";
static const char *git_config_escaped = "\n\t\b\"\\";
typedef struct config_file {
git_oid checksum;
char *path;
git_array_t(struct config_file) includes;
} git_config_file;
typedef struct {
struct config_file *file;
git_parse_ctx ctx;
} git_config_parser;
typedef int (*git_config_parser_section_cb)(
git_config_parser *parser,
const char *current_section,
const char *line,
size_t line_len,
void *data);
typedef int (*git_config_parser_variable_cb)(
git_config_parser *parser,
const char *current_section,
char *var_name,
char *var_value,
const char *line,
size_t line_len,
void *data);
typedef int (*git_config_parser_comment_cb)(
git_config_parser *parser,
const char *line,
size_t line_len,
void *data);
typedef int (*git_config_parser_eof_cb)(
git_config_parser *parser,
const char *current_section,
void *data);
int git_config_parse(
git_config_parser *parser,
git_config_parser_section_cb on_section,
git_config_parser_variable_cb on_variable,
git_config_parser_comment_cb on_comment,
git_config_parser_eof_cb on_eof,
void *data);
......@@ -83,7 +83,7 @@ int git_diff_from_buffer(
ctx = git_patch_parse_ctx_init(content, content_len, NULL);
GITERR_CHECK_ALLOC(ctx);
while (ctx->remain_len) {
while (ctx->parse_ctx.remain_len) {
if ((error = git_patch_parse(&patch, ctx)) < 0)
break;
......
/*
* Copyright (C) the libgit2 contributors. All rights reserved.
*
* This file is part of libgit2, distributed under the GNU GPL v2 with
* a Linking Exception. For full terms see the included COPYING file.
*/
#include "parse.h"
int git_parse_ctx_init(git_parse_ctx *ctx, const char *content, size_t content_len)
{
if (content_len)
ctx->content = content;
else
ctx->content = NULL;
ctx->content_len = content_len;
ctx->remain = ctx->content;
ctx->remain_len = ctx->content_len;
ctx->line = ctx->remain;
ctx->line_len = git__linenlen(ctx->line, ctx->remain_len);
ctx->line_num = 1;
return 0;
}
void git_parse_ctx_clear(git_parse_ctx *ctx)
{
memset(ctx, 0, sizeof(*ctx));
}
void git_parse_advance_line(git_parse_ctx *ctx)
{
ctx->line += ctx->line_len;
ctx->remain_len -= ctx->line_len;
ctx->line_len = git__linenlen(ctx->line, ctx->remain_len);
ctx->line_num++;
}
void git_parse_advance_chars(git_parse_ctx *ctx, size_t char_cnt)
{
ctx->line += char_cnt;
ctx->remain_len -= char_cnt;
ctx->line_len -= char_cnt;
}
int git_parse_advance_expected(
git_parse_ctx *ctx,
const char *expected,
size_t expected_len)
{
if (ctx->line_len < expected_len)
return -1;
if (memcmp(ctx->line, expected, expected_len) != 0)
return -1;
git_parse_advance_chars(ctx, expected_len);
return 0;
}
int git_parse_advance_ws(git_parse_ctx *ctx)
{
int ret = -1;
while (ctx->line_len > 0 &&
ctx->line[0] != '\n' &&
git__isspace(ctx->line[0])) {
ctx->line++;
ctx->line_len--;
ctx->remain_len--;
ret = 0;
}
return ret;
}
int git_parse_advance_nl(git_parse_ctx *ctx)
{
if (ctx->line_len != 1 || ctx->line[0] != '\n')
return -1;
git_parse_advance_line(ctx);
return 0;
}
int git_parse_advance_digit(int64_t *out, git_parse_ctx *ctx, int base)
{
const char *end;
int ret;
if (ctx->line_len < 1 || !git__isdigit(ctx->line[0]))
return -1;
if ((ret = git__strntol64(out, ctx->line, ctx->line_len, &end, base)) < 0)
return -1;
git_parse_advance_chars(ctx, (end - ctx->line));
return 0;
}
int git_parse_peek(char *out, git_parse_ctx *ctx, int flags)
{
size_t remain = ctx->line_len;
const char *ptr = ctx->line;
while (remain) {
char c = *ptr;
if ((flags & GIT_PARSE_PEEK_SKIP_WHITESPACE) &&
git__isspace(c)) {
remain--;
ptr++;
continue;
}
*out = c;
return 0;
}
return -1;
}
/*
* Copyright (C) the libgit2 contributors. All rights reserved.
*
* This file is part of libgit2, distributed under the GNU GPL v2 with
* a Linking Exception. For full terms see the included COPYING file.
*/
#include "common.h"
typedef struct {
/* Original content buffer */
const char *content;
size_t content_len;
/* The remaining (unparsed) buffer */
const char *remain;
size_t remain_len;
const char *line;
size_t line_len;
size_t line_num;
} git_parse_ctx;
int git_parse_ctx_init(git_parse_ctx *ctx, const char *content, size_t content_len);
void git_parse_ctx_clear(git_parse_ctx *ctx);
#define git_parse_err(...) \
( giterr_set(GITERR_PATCH, __VA_ARGS__), -1 )
#define git_parse_ctx_contains_s(ctx, str) \
git_parse_ctx_contains(ctx, str, sizeof(str) - 1)
GIT_INLINE(bool) git_parse_ctx_contains(
git_parse_ctx *ctx, const char *str, size_t len)
{
return (ctx->line_len >= len && memcmp(ctx->line, str, len) == 0);
}
void git_parse_advance_line(git_parse_ctx *ctx);
void git_parse_advance_chars(git_parse_ctx *ctx, size_t char_cnt);
int git_parse_advance_expected(
git_parse_ctx *ctx,
const char *expected,
size_t expected_len);
#define git_parse_advance_expected_str(ctx, str) \
git_parse_advance_expected(ctx, str, strlen(str))
int git_parse_advance_ws(git_parse_ctx *ctx);
int git_parse_advance_nl(git_parse_ctx *ctx);
int git_parse_advance_digit(int64_t *out, git_parse_ctx *ctx, int base);
enum GIT_PARSE_PEEK_FLAGS {
GIT_PARSE_PEEK_SKIP_WHITESPACE = (1 << 0)
};
int git_parse_peek(char *out, git_parse_ctx *ctx, int flags);
......@@ -12,9 +12,6 @@
#include "diff_parse.h"
#include "path.h"
#define parse_err(...) \
( giterr_set(GITERR_PATCH, __VA_ARGS__), -1 )
typedef struct {
git_patch base;
......@@ -36,89 +33,21 @@ typedef struct {
char *old_prefix, *new_prefix;
} git_patch_parsed;
GIT_INLINE(bool) parse_ctx_contains(
git_patch_parse_ctx *ctx, const char *str, size_t len)
{
return (ctx->line_len >= len && memcmp(ctx->line, str, len) == 0);
}
#define parse_ctx_contains_s(ctx, str) \
parse_ctx_contains(ctx, str, sizeof(str) - 1)
static void parse_advance_line(git_patch_parse_ctx *ctx)
{
ctx->line += ctx->line_len;
ctx->remain_len -= ctx->line_len;
ctx->line_len = git__linenlen(ctx->line, ctx->remain_len);
ctx->line_num++;
}
static void parse_advance_chars(git_patch_parse_ctx *ctx, size_t char_cnt)
{
ctx->line += char_cnt;
ctx->remain_len -= char_cnt;
ctx->line_len -= char_cnt;
}
static int parse_advance_expected(
git_patch_parse_ctx *ctx,
const char *expected,
size_t expected_len)
{
if (ctx->line_len < expected_len)
return -1;
if (memcmp(ctx->line, expected, expected_len) != 0)
return -1;
parse_advance_chars(ctx, expected_len);
return 0;
}
#define parse_advance_expected_str(ctx, str) \
parse_advance_expected(ctx, str, strlen(str))
static int parse_advance_ws(git_patch_parse_ctx *ctx)
{
int ret = -1;
while (ctx->line_len > 0 &&
ctx->line[0] != '\n' &&
git__isspace(ctx->line[0])) {
ctx->line++;
ctx->line_len--;
ctx->remain_len--;
ret = 0;
}
return ret;
}
static int parse_advance_nl(git_patch_parse_ctx *ctx)
{
if (ctx->line_len != 1 || ctx->line[0] != '\n')
return -1;
parse_advance_line(ctx);
return 0;
}
static int header_path_len(git_patch_parse_ctx *ctx)
{
bool inquote = 0;
bool quoted = (ctx->line_len > 0 && ctx->line[0] == '"');
bool quoted = git_parse_ctx_contains_s(&ctx->parse_ctx, "\"");
size_t len;
for (len = quoted; len < ctx->line_len; len++) {
if (!quoted && git__isspace(ctx->line[len]))
for (len = quoted; len < ctx->parse_ctx.line_len; len++) {
if (!quoted && git__isspace(ctx->parse_ctx.line[len]))
break;
else if (quoted && !inquote && ctx->line[len] == '"') {
else if (quoted && !inquote && ctx->parse_ctx.line[len] == '"') {
len++;
break;
}
inquote = (!inquote && ctx->line[len] == '\\');
inquote = (!inquote && ctx->parse_ctx.line[len] == '\\');
}
return len;
......@@ -130,10 +59,10 @@ static int parse_header_path_buf(git_buf *path, git_patch_parse_ctx *ctx)
path_len = header_path_len(ctx);
if ((error = git_buf_put(path, ctx->line, path_len)) < 0)
if ((error = git_buf_put(path, ctx->parse_ctx.line, path_len)) < 0)
goto done;
parse_advance_chars(ctx, path_len);
git_parse_advance_chars(&ctx->parse_ctx, path_len);
git_buf_rtrim(path);
......@@ -173,24 +102,17 @@ static int parse_header_git_newpath(
static int parse_header_mode(uint16_t *mode, git_patch_parse_ctx *ctx)
{
const char *end;
int32_t m;
int ret;
if (ctx->line_len < 1 || !git__isdigit(ctx->line[0]))
return parse_err("invalid file mode at line %"PRIuZ, ctx->line_num);
int64_t m;
if ((ret = git__strntol32(&m, ctx->line, ctx->line_len, &end, 8)) < 0)
return ret;
if ((git_parse_advance_digit(&m, &ctx->parse_ctx, 8)) < 0)
return git_parse_err("invalid file mode at line %"PRIuZ, ctx->parse_ctx.line_num);
if (m > UINT16_MAX)
return -1;
*mode = (uint16_t)m;
parse_advance_chars(ctx, (end - ctx->line));
return ret;
return 0;
}
static int parse_header_oid(
......@@ -200,17 +122,17 @@ static int parse_header_oid(
{
size_t len;
for (len = 0; len < ctx->line_len && len < GIT_OID_HEXSZ; len++) {
if (!git__isxdigit(ctx->line[len]))
for (len = 0; len < ctx->parse_ctx.line_len && len < GIT_OID_HEXSZ; len++) {
if (!git__isxdigit(ctx->parse_ctx.line[len]))
break;
}
if (len < GIT_OID_MINPREFIXLEN || len > GIT_OID_HEXSZ ||
git_oid_fromstrn(oid, ctx->line, len) < 0)
return parse_err("invalid hex formatted object id at line %"PRIuZ,
ctx->line_num);
git_oid_fromstrn(oid, ctx->parse_ctx.line, len) < 0)
return git_parse_err("invalid hex formatted object id at line %"PRIuZ,
ctx->parse_ctx.line_num);
parse_advance_chars(ctx, len);
git_parse_advance_chars(&ctx->parse_ctx, len);
*oid_len = (uint16_t)len;
......@@ -220,17 +142,19 @@ static int parse_header_oid(
static int parse_header_git_index(
git_patch_parsed *patch, git_patch_parse_ctx *ctx)
{
char c;
if (parse_header_oid(&patch->base.delta->old_file.id,
&patch->base.delta->old_file.id_abbrev, ctx) < 0 ||
parse_advance_expected_str(ctx, "..") < 0 ||
git_parse_advance_expected_str(&ctx->parse_ctx, "..") < 0 ||
parse_header_oid(&patch->base.delta->new_file.id,
&patch->base.delta->new_file.id_abbrev, ctx) < 0)
return -1;
if (ctx->line_len > 0 && ctx->line[0] == ' ') {
if (git_parse_peek(&c, &ctx->parse_ctx, 0) == 0 && c == ' ') {
uint16_t mode;
parse_advance_chars(ctx, 1);
git_parse_advance_chars(&ctx->parse_ctx, 1);
if (parse_header_mode(&mode, ctx) < 0)
return -1;
......@@ -329,19 +253,15 @@ static int parse_header_copyto(
static int parse_header_percent(uint16_t *out, git_patch_parse_ctx *ctx)
{
int32_t val;
const char *end;
int64_t val;
if (ctx->line_len < 1 || !git__isdigit(ctx->line[0]) ||
git__strntol32(&val, ctx->line, ctx->line_len, &end, 10) < 0)
if (git_parse_advance_digit(&val, &ctx->parse_ctx, 10) < 0)
return -1;
parse_advance_chars(ctx, (end - ctx->line));
if (parse_advance_expected_str(ctx, "%") < 0)
if (git_parse_advance_expected_str(&ctx->parse_ctx, "%") < 0)
return -1;
if (val > 100)
if (val < 0 || val > 100)
return -1;
*out = val;
......@@ -352,8 +272,8 @@ static int parse_header_similarity(
git_patch_parsed *patch, git_patch_parse_ctx *ctx)
{
if (parse_header_percent(&patch->base.delta->similarity, ctx) < 0)
return parse_err("invalid similarity percentage at line %"PRIuZ,
ctx->line_num);
return git_parse_err("invalid similarity percentage at line %"PRIuZ,
ctx->parse_ctx.line_num);
return 0;
}
......@@ -364,8 +284,8 @@ static int parse_header_dissimilarity(
uint16_t dissimilarity;
if (parse_header_percent(&dissimilarity, ctx) < 0)
return parse_err("invalid similarity percentage at line %"PRIuZ,
ctx->line_num);
return git_parse_err("invalid similarity percentage at line %"PRIuZ,
ctx->parse_ctx.line_num);
patch->base.delta->similarity = 100 - dissimilarity;
......@@ -375,13 +295,13 @@ static int parse_header_dissimilarity(
static int parse_header_start(git_patch_parsed *patch, git_patch_parse_ctx *ctx)
{
if (parse_header_path(&patch->header_old_path, ctx) < 0)
return parse_err("corrupt old path in git diff header at line %"PRIuZ,
ctx->line_num);
return git_parse_err("corrupt old path in git diff header at line %"PRIuZ,
ctx->parse_ctx.line_num);
if (parse_advance_ws(ctx) < 0 ||
if (git_parse_advance_ws(&ctx->parse_ctx) < 0 ||
parse_header_path(&patch->header_new_path, ctx) < 0)
return parse_err("corrupt new path in git diff header at line %"PRIuZ,
ctx->line_num);
return git_parse_err("corrupt new path in git diff header at line %"PRIuZ,
ctx->parse_ctx.line_num);
return 0;
}
......@@ -451,10 +371,10 @@ static int parse_header_git(
parse_header_state state = STATE_START;
/* Parse remaining header lines */
for (; ctx->remain_len > 0; parse_advance_line(ctx)) {
for (; ctx->parse_ctx.remain_len > 0; git_parse_advance_line(&ctx->parse_ctx)) {
bool found = false;
if (ctx->line_len == 0 || ctx->line[ctx->line_len - 1] != '\n')
if (ctx->parse_ctx.line_len == 0 || ctx->parse_ctx.line[ctx->parse_ctx.line_len - 1] != '\n')
break;
for (i = 0; i < ARRAY_SIZE(transitions); i++) {
......@@ -462,7 +382,7 @@ static int parse_header_git(
size_t op_len = strlen(transition->str);
if (transition->expected_state != state ||
memcmp(ctx->line, transition->str, min(op_len, ctx->line_len)) != 0)
git__prefixcmp(ctx->parse_ctx.line, transition->str) != 0)
continue;
state = transition->next_state;
......@@ -471,16 +391,16 @@ static int parse_header_git(
if (transition->fn == NULL)
goto done;
parse_advance_chars(ctx, op_len);
git_parse_advance_chars(&ctx->parse_ctx, op_len);
if ((error = transition->fn(patch, ctx)) < 0)
goto done;
parse_advance_ws(ctx);
git_parse_advance_ws(&ctx->parse_ctx);
if (parse_advance_expected_str(ctx, "\n") < 0 ||
ctx->line_len > 0) {
error = parse_err("trailing data at line %"PRIuZ, ctx->line_num);
if (git_parse_advance_expected_str(&ctx->parse_ctx, "\n") < 0 ||
ctx->parse_ctx.line_len > 0) {
error = git_parse_err("trailing data at line %"PRIuZ, ctx->parse_ctx.line_num);
goto done;
}
......@@ -489,14 +409,14 @@ static int parse_header_git(
}
if (!found) {
error = parse_err("invalid patch header at line %"PRIuZ,
ctx->line_num);
error = git_parse_err("invalid patch header at line %"PRIuZ,
ctx->parse_ctx.line_num);
goto done;
}
}
if (state != STATE_END) {
error = parse_err("unexpected header line %"PRIuZ, ctx->line_num);
error = git_parse_err("unexpected header line %"PRIuZ, ctx->parse_ctx.line_num);
goto done;
}
......@@ -509,17 +429,17 @@ static int parse_number(git_off_t *out, git_patch_parse_ctx *ctx)
const char *end;
int64_t num;
if (!git__isdigit(ctx->line[0]))
if (!git__isdigit(ctx->parse_ctx.line[0]))
return -1;
if (git__strntol64(&num, ctx->line, ctx->line_len, &end, 10) < 0)
if (git__strntol64(&num, ctx->parse_ctx.line, ctx->parse_ctx.line_len, &end, 10) < 0)
return -1;
if (num < 0)
return -1;
*out = num;
parse_advance_chars(ctx, (end - ctx->line));
git_parse_advance_chars(&ctx->parse_ctx, (end - ctx->parse_ctx.line));
return 0;
}
......@@ -528,7 +448,7 @@ static int parse_int(int *out, git_patch_parse_ctx *ctx)
{
git_off_t num;
if (parse_number(&num, ctx) < 0 || !git__is_int(num))
if (git_parse_advance_digit(&num, &ctx->parse_ctx, 10) < 0 || !git__is_int(num))
return -1;
*out = (int)num;
......@@ -539,43 +459,44 @@ static int parse_hunk_header(
git_patch_hunk *hunk,
git_patch_parse_ctx *ctx)
{
const char *header_start = ctx->line;
const char *header_start = ctx->parse_ctx.line;
char c;
hunk->hunk.old_lines = 1;
hunk->hunk.new_lines = 1;
if (parse_advance_expected_str(ctx, "@@ -") < 0 ||
if (git_parse_advance_expected_str(&ctx->parse_ctx, "@@ -") < 0 ||
parse_int(&hunk->hunk.old_start, ctx) < 0)
goto fail;
if (ctx->line_len > 0 && ctx->line[0] == ',') {
if (parse_advance_expected_str(ctx, ",") < 0 ||
if (git_parse_peek(&c, &ctx->parse_ctx, 0) == 0 && c == ',') {
if (git_parse_advance_expected_str(&ctx->parse_ctx, ",") < 0 ||
parse_int(&hunk->hunk.old_lines, ctx) < 0)
goto fail;
}
if (parse_advance_expected_str(ctx, " +") < 0 ||
if (git_parse_advance_expected_str(&ctx->parse_ctx, " +") < 0 ||
parse_int(&hunk->hunk.new_start, ctx) < 0)
goto fail;
if (ctx->line_len > 0 && ctx->line[0] == ',') {
if (parse_advance_expected_str(ctx, ",") < 0 ||
if (git_parse_peek(&c, &ctx->parse_ctx, 0) == 0 && c == ',') {
if (git_parse_advance_expected_str(&ctx->parse_ctx, ",") < 0 ||
parse_int(&hunk->hunk.new_lines, ctx) < 0)
goto fail;
}
if (parse_advance_expected_str(ctx, " @@") < 0)
if (git_parse_advance_expected_str(&ctx->parse_ctx, " @@") < 0)
goto fail;
parse_advance_line(ctx);
git_parse_advance_line(&ctx->parse_ctx);
if (!hunk->hunk.old_lines && !hunk->hunk.new_lines)
goto fail;
hunk->hunk.header_len = ctx->line - header_start;
hunk->hunk.header_len = ctx->parse_ctx.line - header_start;
if (hunk->hunk.header_len > (GIT_DIFF_HUNK_HEADER_SIZE - 1))
return parse_err("oversized patch hunk header at line %"PRIuZ,
ctx->line_num);
return git_parse_err("oversized patch hunk header at line %"PRIuZ,
ctx->parse_ctx.line_num);
memcpy(hunk->hunk.header, header_start, hunk->hunk.header_len);
hunk->hunk.header[hunk->hunk.header_len] = '\0';
......@@ -584,7 +505,7 @@ static int parse_hunk_header(
fail:
giterr_set(GITERR_PATCH, "invalid patch hunk header at line %"PRIuZ,
ctx->line_num);
ctx->parse_ctx.line_num);
return -1;
}
......@@ -600,21 +521,24 @@ static int parse_hunk_body(
int newlines = hunk->hunk.new_lines;
for (;
ctx->remain_len > 1 &&
ctx->parse_ctx.remain_len > 1 &&
(oldlines || newlines) &&
(ctx->remain_len <= 4 || memcmp(ctx->line, "@@ -", 4) != 0);
parse_advance_line(ctx)) {
!git_parse_ctx_contains_s(&ctx->parse_ctx, "@@ -");
git_parse_advance_line(&ctx->parse_ctx)) {
char c;
int origin;
int prefix = 1;
if (ctx->line_len == 0 || ctx->line[ctx->line_len - 1] != '\n') {
error = parse_err("invalid patch instruction at line %"PRIuZ,
ctx->line_num);
if (ctx->parse_ctx.line_len == 0 || ctx->parse_ctx.line[ctx->parse_ctx.line_len - 1] != '\n') {
error = git_parse_err("invalid patch instruction at line %"PRIuZ,
ctx->parse_ctx.line_num);
goto done;
}
switch (ctx->line[0]) {
git_parse_peek(&c, &ctx->parse_ctx, 0);
switch (c) {
case '\n':
prefix = 0;
......@@ -635,7 +559,7 @@ static int parse_hunk_body(
break;
default:
error = parse_err("invalid patch hunk at line %"PRIuZ, ctx->line_num);
error = git_parse_err("invalid patch hunk at line %"PRIuZ, ctx->parse_ctx.line_num);
goto done;
}
......@@ -644,16 +568,16 @@ static int parse_hunk_body(
memset(line, 0x0, sizeof(git_diff_line));
line->content = ctx->line + prefix;
line->content_len = ctx->line_len - prefix;
line->content_offset = ctx->content_len - ctx->remain_len;
line->content = ctx->parse_ctx.line + prefix;
line->content_len = ctx->parse_ctx.line_len - prefix;
line->content_offset = ctx->parse_ctx.content_len - ctx->parse_ctx.remain_len;
line->origin = origin;
hunk->line_count++;
}
if (oldlines || newlines) {
error = parse_err(
error = git_parse_err(
"invalid patch hunk, expected %d old lines and %d new lines",
hunk->hunk.old_lines, hunk->hunk.new_lines);
goto done;
......@@ -664,19 +588,19 @@ static int parse_hunk_body(
* localized. Because `diff` optimizes for the case where you
* want to apply the patch by hand.
*/
if (parse_ctx_contains_s(ctx, "\\ ") &&
if (git_parse_ctx_contains_s(&ctx->parse_ctx, "\\ ") &&
git_array_size(patch->base.lines) > 0) {
line = git_array_get(patch->base.lines, git_array_size(patch->base.lines) - 1);
if (line->content_len < 1) {
error = parse_err("cannot trim trailing newline of empty line");
error = git_parse_err("cannot trim trailing newline of empty line");
goto done;
}
line->content_len--;
parse_advance_line(ctx);
git_parse_advance_line(&ctx->parse_ctx);
}
done:
......@@ -689,18 +613,15 @@ static int parse_patch_header(
{
int error = 0;
for (ctx->line = ctx->remain;
ctx->remain_len > 0;
parse_advance_line(ctx)) {
for (; ctx->parse_ctx.remain_len > 0; git_parse_advance_line(&ctx->parse_ctx)) {
/* This line is too short to be a patch header. */
if (ctx->line_len < 6)
if (ctx->parse_ctx.line_len < 6)
continue;
/* This might be a hunk header without a patch header, provide a
* sensible error message. */
if (parse_ctx_contains_s(ctx, "@@ -")) {
size_t line_num = ctx->line_num;
if (git_parse_ctx_contains_s(&ctx->parse_ctx, "@@ -")) {
size_t line_num = ctx->parse_ctx.line_num;
git_patch_hunk hunk;
/* If this cannot be parsed as a hunk header, it's just leading
......@@ -711,17 +632,17 @@ static int parse_patch_header(
continue;
}
error = parse_err("invalid hunk header outside patch at line %"PRIuZ,
error = git_parse_err("invalid hunk header outside patch at line %"PRIuZ,
line_num);
goto done;
}
/* This buffer is too short to contain a patch. */
if (ctx->remain_len < ctx->line_len + 6)
if (ctx->parse_ctx.remain_len < ctx->parse_ctx.line_len + 6)
break;
/* A proper git patch */
if (parse_ctx_contains_s(ctx, "diff --git ")) {
if (git_parse_ctx_contains_s(&ctx->parse_ctx, "diff --git ")) {
error = parse_header_git(patch, ctx);
goto done;
}
......@@ -746,27 +667,30 @@ static int parse_patch_binary_side(
git_off_t len;
int error = 0;
if (parse_ctx_contains_s(ctx, "literal ")) {
if (git_parse_ctx_contains_s(&ctx->parse_ctx, "literal ")) {
type = GIT_DIFF_BINARY_LITERAL;
parse_advance_chars(ctx, 8);
} else if (parse_ctx_contains_s(ctx, "delta ")) {
git_parse_advance_chars(&ctx->parse_ctx, 8);
} else if (git_parse_ctx_contains_s(&ctx->parse_ctx, "delta ")) {
type = GIT_DIFF_BINARY_DELTA;
parse_advance_chars(ctx, 6);
git_parse_advance_chars(&ctx->parse_ctx, 6);
} else {
error = parse_err(
"unknown binary delta type at line %"PRIuZ, ctx->line_num);
error = git_parse_err(
"unknown binary delta type at line %"PRIuZ, ctx->parse_ctx.line_num);
goto done;
}
if (parse_number(&len, ctx) < 0 || parse_advance_nl(ctx) < 0 || len < 0) {
error = parse_err("invalid binary size at line %"PRIuZ, ctx->line_num);
if (git_parse_advance_digit(&len, &ctx->parse_ctx, 10) < 0 ||
git_parse_advance_nl(&ctx->parse_ctx) < 0 || len < 0) {
error = git_parse_err("invalid binary size at line %"PRIuZ, ctx->parse_ctx.line_num);
goto done;
}
while (ctx->line_len) {
char c = ctx->line[0];
while (ctx->parse_ctx.line_len) {
char c;
size_t encoded_len, decoded_len = 0, decoded_orig = decoded.size;
git_parse_peek(&c, &ctx->parse_ctx, 0);
if (c == '\n')
break;
else if (c >= 'A' && c <= 'Z')
......@@ -775,32 +699,32 @@ static int parse_patch_binary_side(
decoded_len = c - 'a' + (('z' - 'a') + 1) + 1;
if (!decoded_len) {
error = parse_err("invalid binary length at line %"PRIuZ, ctx->line_num);
error = git_parse_err("invalid binary length at line %"PRIuZ, ctx->parse_ctx.line_num);
goto done;
}
parse_advance_chars(ctx, 1);
git_parse_advance_chars(&ctx->parse_ctx, 1);
encoded_len = ((decoded_len / 4) + !!(decoded_len % 4)) * 5;
if (encoded_len > ctx->line_len - 1) {
error = parse_err("truncated binary data at line %"PRIuZ, ctx->line_num);
if (encoded_len > ctx->parse_ctx.line_len - 1) {
error = git_parse_err("truncated binary data at line %"PRIuZ, ctx->parse_ctx.line_num);
goto done;
}
if ((error = git_buf_decode_base85(
&decoded, ctx->line, encoded_len, decoded_len)) < 0)
&decoded, ctx->parse_ctx.line, encoded_len, decoded_len)) < 0)
goto done;
if (decoded.size - decoded_orig != decoded_len) {
error = parse_err("truncated binary data at line %"PRIuZ, ctx->line_num);
error = git_parse_err("truncated binary data at line %"PRIuZ, ctx->parse_ctx.line_num);
goto done;
}
parse_advance_chars(ctx, encoded_len);
git_parse_advance_chars(&ctx->parse_ctx, encoded_len);
if (parse_advance_nl(ctx) < 0) {
error = parse_err("trailing data at line %"PRIuZ, ctx->line_num);
if (git_parse_advance_nl(&ctx->parse_ctx) < 0) {
error = git_parse_err("trailing data at line %"PRIuZ, ctx->parse_ctx.line_num);
goto done;
}
}
......@@ -822,27 +746,27 @@ static int parse_patch_binary(
{
int error;
if (parse_advance_expected_str(ctx, "GIT binary patch") < 0 ||
parse_advance_nl(ctx) < 0)
return parse_err("corrupt git binary header at line %"PRIuZ, ctx->line_num);
if (git_parse_advance_expected_str(&ctx->parse_ctx, "GIT binary patch") < 0 ||
git_parse_advance_nl(&ctx->parse_ctx) < 0)
return git_parse_err("corrupt git binary header at line %"PRIuZ, ctx->parse_ctx.line_num);
/* parse old->new binary diff */
if ((error = parse_patch_binary_side(
&patch->base.binary.new_file, ctx)) < 0)
return error;
if (parse_advance_nl(ctx) < 0)
return parse_err("corrupt git binary separator at line %"PRIuZ,
ctx->line_num);
if (git_parse_advance_nl(&ctx->parse_ctx) < 0)
return git_parse_err("corrupt git binary separator at line %"PRIuZ,
ctx->parse_ctx.line_num);
/* parse new->old binary diff */
if ((error = parse_patch_binary_side(
&patch->base.binary.old_file, ctx)) < 0)
return error;
if (parse_advance_nl(ctx) < 0)
return parse_err("corrupt git binary patch separator at line %"PRIuZ,
ctx->line_num);
if (git_parse_advance_nl(&ctx->parse_ctx) < 0)
return git_parse_err("corrupt git binary patch separator at line %"PRIuZ,
ctx->parse_ctx.line_num);
patch->base.binary.contains_data = 1;
patch->base.delta->flags |= GIT_DIFF_FLAG_BINARY;
......@@ -853,13 +777,13 @@ static int parse_patch_binary_nodata(
git_patch_parsed *patch,
git_patch_parse_ctx *ctx)
{
if (parse_advance_expected_str(ctx, "Binary files ") < 0 ||
parse_advance_expected_str(ctx, patch->header_old_path) < 0 ||
parse_advance_expected_str(ctx, " and ") < 0 ||
parse_advance_expected_str(ctx, patch->header_new_path) < 0 ||
parse_advance_expected_str(ctx, " differ") < 0 ||
parse_advance_nl(ctx) < 0)
return parse_err("corrupt git binary header at line %"PRIuZ, ctx->line_num);
if (git_parse_advance_expected_str(&ctx->parse_ctx, "Binary files ") < 0 ||
git_parse_advance_expected_str(&ctx->parse_ctx, patch->header_old_path) < 0 ||
git_parse_advance_expected_str(&ctx->parse_ctx, " and ") < 0 ||
git_parse_advance_expected_str(&ctx->parse_ctx, patch->header_new_path) < 0 ||
git_parse_advance_expected_str(&ctx->parse_ctx, " differ") < 0 ||
git_parse_advance_nl(&ctx->parse_ctx) < 0)
return git_parse_err("corrupt git binary header at line %"PRIuZ, ctx->parse_ctx.line_num);
patch->base.binary.contains_data = 0;
patch->base.delta->flags |= GIT_DIFF_FLAG_BINARY;
......@@ -873,7 +797,7 @@ static int parse_patch_hunks(
git_patch_hunk *hunk;
int error = 0;
while (parse_ctx_contains_s(ctx, "@@ -")) {
while (git_parse_ctx_contains_s(&ctx->parse_ctx, "@@ -")) {
hunk = git_array_alloc(patch->base.hunks);
GITERR_CHECK_ALLOC(hunk);
......@@ -896,9 +820,9 @@ done:
static int parse_patch_body(
git_patch_parsed *patch, git_patch_parse_ctx *ctx)
{
if (parse_ctx_contains_s(ctx, "GIT binary patch"))
if (git_parse_ctx_contains_s(&ctx->parse_ctx, "GIT binary patch"))
return parse_patch_binary(patch, ctx);
else if (parse_ctx_contains_s(ctx, "Binary files "))
else if (git_parse_ctx_contains_s(&ctx->parse_ctx, "Binary files "))
return parse_patch_binary_nodata(patch, ctx);
else
return parse_patch_hunks(patch, ctx);
......@@ -914,10 +838,10 @@ int check_header_names(
return 0;
if (two_null && strcmp(two, "/dev/null") != 0)
return parse_err("expected %s path of '/dev/null'", old_or_new);
return git_parse_err("expected %s path of '/dev/null'", old_or_new);
else if (!two_null && strcmp(one, two) != 0)
return parse_err("mismatched %s path names", old_or_new);
return git_parse_err("mismatched %s path names", old_or_new);
return 0;
}
......@@ -950,7 +874,7 @@ static int check_prefix(
}
if (remain_len || !*path)
return parse_err(
return git_parse_err(
"header filename does not contain %"PRIuZ" path components",
prefix_len);
......@@ -969,10 +893,10 @@ static int check_filenames(git_patch_parsed *patch)
bool deleted = (patch->base.delta->status == GIT_DELTA_DELETED);
if (patch->old_path && !patch->new_path)
return parse_err("missing new path");
return git_parse_err("missing new path");
if (!patch->old_path && patch->new_path)
return parse_err("missing old path");
return git_parse_err("missing old path");
/* Ensure (non-renamed) paths match */
if (check_header_names(
......@@ -1005,7 +929,7 @@ static int check_filenames(git_patch_parsed *patch)
if (!patch->base.delta->old_file.path &&
!patch->base.delta->new_file.path)
return parse_err("git diff header lacks old / new paths");
return git_parse_err("git diff header lacks old / new paths");
return 0;
}
......@@ -1026,7 +950,7 @@ static int check_patch(git_patch_parsed *patch)
!(delta->flags & GIT_DIFF_FLAG_BINARY) &&
delta->new_file.mode == delta->old_file.mode &&
git_array_size(patch->base.hunks) == 0)
return parse_err("patch with no hunks");
return git_parse_err("patch with no hunks");
if (delta->status == GIT_DELTA_ADDED) {
memset(&delta->old_file.id, 0x0, sizeof(git_oid));
......@@ -1052,19 +976,11 @@ git_patch_parse_ctx *git_patch_parse_ctx_init(
if ((ctx = git__calloc(1, sizeof(git_patch_parse_ctx))) == NULL)
return NULL;
if (content_len) {
if ((ctx->content = git__malloc(content_len)) == NULL) {
if ((git_parse_ctx_init(&ctx->parse_ctx, content, content_len)) < 0) {
git__free(ctx);
return NULL;
}
memcpy((char *)ctx->content, content, content_len);
}
ctx->content_len = content_len;
ctx->remain = ctx->content;
ctx->remain_len = ctx->content_len;
if (opts)
memcpy(&ctx->opts, opts, sizeof(git_patch_options));
else
......@@ -1079,7 +995,7 @@ static void patch_parse_ctx_free(git_patch_parse_ctx *ctx)
if (!ctx)
return;
git__free((char *)ctx->content);
git_parse_ctx_clear(&ctx->parse_ctx);
git__free(ctx);
}
......@@ -1154,15 +1070,15 @@ int git_patch_parse(
patch->base.delta->status = GIT_DELTA_MODIFIED;
patch->base.delta->nfiles = 2;
start = ctx->remain_len;
start = ctx->parse_ctx.remain_len;
if ((error = parse_patch_header(patch, ctx)) < 0 ||
(error = parse_patch_body(patch, ctx)) < 0 ||
(error = check_patch(patch)) < 0)
goto done;
used = start - ctx->remain_len;
ctx->remain += used;
used = start - ctx->parse_ctx.remain_len;
ctx->parse_ctx.remain += used;
patch->base.diff_opts.old_prefix = patch->old_prefix;
patch->base.diff_opts.new_prefix = patch->new_prefix;
......
......@@ -9,24 +9,15 @@
#include "common.h"
#include "parse.h"
#include "patch.h"
typedef struct {
git_refcount rc;
/* Original content buffer */
const char *content;
size_t content_len;
git_patch_options opts;
/* The remaining (unparsed) buffer */
const char *remain;
size_t remain_len;
const char *line;
size_t line_len;
size_t line_num;
git_parse_ctx parse_ctx;
} git_patch_parse_ctx;
extern git_patch_parse_ctx *git_patch_parse_ctx_init(
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment