Commit 47a899ff by Vicent Martí

filter: Beautiful refactoring

Comments soothe my soul.
parent 788430c8
......@@ -115,19 +115,18 @@ static int write_file_filtered(
if (error < GIT_SUCCESS)
return error;
error = git_filter__apply(&dest, &source, filters);
error = git_filters_apply(&dest, &source, filters);
if (error < GIT_SUCCESS) {
/* Free the source as soon as possible. This can be big in memory,
* and we don't want to ODB write to choke */
git_buf_free(&source);
git_buf_free(&dest);
return error;
}
if (error == GIT_SUCCESS) {
/* Write the file to disk if it was properly filtered */
error = git_odb_write(oid, odb, dest.ptr, dest.size, GIT_OBJ_BLOB);
}
git_buf_free(&source);
git_buf_free(&dest);
return GIT_SUCCESS;
}
......@@ -186,18 +185,25 @@ int git_blob_create_fromfile(git_oid *oid, git_repository *repo, const char *pat
error = write_symlink(oid, odb, full_path.ptr, (size_t)size);
} else {
git_vector write_filters = GIT_VECTOR_INIT;
int filter_count;
if ((error = git_filter__load_for_file(
&write_filters, repo, path, GIT_FILTER_TO_ODB)) < GIT_SUCCESS)
goto cleanup;
/* Load the filters for writing this file to the ODB */
filter_count = git_filters_load(&write_filters, repo, path, GIT_FILTER_TO_ODB);
if (write_filters.length == 0) {
if (filter_count < 0) {
/* Negative value means there was a critical error */
error = filter_count;
goto cleanup;
} else if (filter_count == 0) {
/* No filters need to be applied to the document: we can stream
* directly from disk */
error = write_file_stream(oid, odb, full_path.ptr, size);
} else {
/* We need to apply one or more filters */
error = write_file_filtered(oid, odb, full_path.ptr, &write_filters);
}
git_filter__free(&write_filters);
git_filters_free(&write_filters);
/*
* TODO: eventually support streaming filtered files, for files which are bigger
......
......@@ -102,18 +102,74 @@ static int crlf_load_attributes(struct crlf_attrs *ca, git_repository *repo, con
return error;
}
static int crlf_apply_to_odb(git_filter *self, git_buf *dest, const git_buf *source)
static int drop_crlf(git_buf *dest, const git_buf *source)
{
size_t psize = source->size - 1;
size_t i = 0;
/* Initial scan: see if we can reach the end of the document
* without finding a single carriage return */
while (i < psize && source->ptr[i] != '\r')
i++;
/* Clean file? Tell the library to skip this filter */
if (i == psize)
return -1;
/* Main scan loop. Keep moving forward until we find a carriage
* return, and then copy the whole chunk to the destination
* buffer.
*
* Note that we only scan until `size - 1`, because we cannot drop a
* carriage return if it's the last character in the file (what a weird
* file, anyway)
*/
while (i < psize) {
size_t org = i;
while (i < psize && source->ptr[i] != '\r')
i++;
if (i > org)
git_buf_put(dest, source->ptr + org, i - org);
/* We found a carriage return. Is the next character a newline?
* If it is, we just keep moving. The newline will be copied
* to the dest in the next chunk.
*
* If it's not a newline, we need to insert the carriage return
* into the dest buffer, because we don't drop lone CRs.
*/
if (source->ptr[i + 1] != '\n') {
git_buf_putc(dest, '\r');
}
i++;
}
/* Copy the last character in the file */
git_buf_putc(dest, source->ptr[psize]);
return 0;
}
static int crlf_apply_to_odb(git_filter *self, git_buf *dest, const git_buf *source)
{
struct crlf_filter *filter = (struct crlf_filter *)self;
assert(self && dest && source);
/* Empty file? Nothing to do */
if (source->size == 0)
return 0;
/* Heuristics to see if we can skip the conversion.
* Straight from Core Git.
*/
if (filter->attrs.crlf_action == GIT_CRLF_AUTO ||
filter->attrs.crlf_action == GIT_CRLF_GUESS) {
git_text_stats stats;
git_text__stat(&stats, source);
git_text_gather_stats(&stats, source);
/*
* We're currently not going to even try to convert stuff
......@@ -126,7 +182,7 @@ static int crlf_apply_to_odb(git_filter *self, git_buf *dest, const git_buf *sou
/*
* And add some heuristics for binary vs text, of course...
*/
if (git_text__is_binary(&stats))
if (git_text_is_binary(&stats))
return -1;
#if 0
......@@ -144,50 +200,42 @@ static int crlf_apply_to_odb(git_filter *self, git_buf *dest, const git_buf *sou
return -1;
}
/* TODO: do not copy anything if there isn't a single CR */
while (i < source->size) {
size_t org = i;
while (i < source->size && source->ptr[i] != '\r')
i++;
if (i > org)
git_buf_put(dest, source->ptr + org, i - org);
i++;
if (i >= source->size || source->ptr[i] != '\n') {
git_buf_putc(dest, '\r');
}
}
return 0;
/* Actually drop the carriage returns */
return drop_crlf(dest, source);
}
int git_filter__crlf_to_odb(git_filter **filter_out, git_repository *repo, const char *path)
int git_filter_add__crlf_to_odb(git_vector *filters, git_repository *repo, const char *path)
{
struct crlf_filter filter;
struct crlf_attrs ca;
struct crlf_filter *filter;
int error;
filter.f.apply = &crlf_apply_to_odb;
filter.f.do_free = NULL;
if ((error = crlf_load_attributes(&filter.attrs, repo, path)) < 0)
/* Load gitattributes for the path */
if ((error = crlf_load_attributes(&ca, repo, path)) < 0)
return error;
filter.attrs.crlf_action = crlf_input_action(&filter.attrs);
/*
* Use the core Git logic to see if we should perform CRLF for this file
* based on its attributes & the value of `core.auto_crlf`
*/
ca.crlf_action = crlf_input_action(&ca);
if (filter.attrs.crlf_action == GIT_CRLF_BINARY)
if (ca.crlf_action == GIT_CRLF_BINARY)
return 0;
if (filter.attrs.crlf_action == GIT_CRLF_GUESS && repo->filter_options.auto_crlf == GIT_AUTO_CRLF_FALSE)
if (ca.crlf_action == GIT_CRLF_GUESS && repo->filter_options.auto_crlf == GIT_AUTO_CRLF_FALSE)
return 0;
*filter_out = git__malloc(sizeof(struct crlf_filter));
if (*filter_out == NULL)
/* If we're good, we create a new filter object and push it
* into the filters array */
filter = git__malloc(sizeof(struct crlf_filter));
if (filter == NULL)
return GIT_ENOMEM;
memcpy(*filter_out, &filter, sizeof(struct crlf_attrs));
return 0;
filter->f.apply = &crlf_apply_to_odb;
filter->f.do_free = NULL;
memcpy(&filter->attrs, &ca, sizeof(struct crlf_attrs));
return git_vector_insert(filters, filter);
}
......@@ -13,7 +13,7 @@
#include "git2/config.h"
/* Fresh from Core Git. I wonder what we could use this for... */
void git_text__stat(git_text_stats *stats, const git_buf *text)
void git_text_gather_stats(git_text_stats *stats, const git_buf *text)
{
size_t i;
......@@ -65,7 +65,7 @@ void git_text__stat(git_text_stats *stats, const git_buf *text)
/*
* Fresh from Core Git
*/
int git_text__is_binary(git_text_stats *stats)
int git_text_is_binary(git_text_stats *stats)
{
if (stats->nul)
return 1;
......@@ -84,32 +84,74 @@ int git_text__is_binary(git_text_stats *stats)
return 0;
}
int git_filter__load_for_file(git_vector *filters, git_repository *repo, const char *path, int mode)
static int load_repository_settings(git_repository *repo)
{
static git_cvar_map map_eol[] = {
{GIT_CVAR_FALSE, NULL, GIT_EOL_UNSET},
{GIT_CVAR_STRING, "lf", GIT_EOL_LF},
{GIT_CVAR_STRING, "crlf", GIT_EOL_CRLF},
{GIT_CVAR_STRING, "native", GIT_EOL_NATIVE}
};
static git_cvar_map map_crlf[] = {
{GIT_CVAR_FALSE, NULL, GIT_AUTO_CRLF_FALSE},
{GIT_CVAR_TRUE, NULL, GIT_AUTO_CRLF_TRUE},
{GIT_CVAR_STRING, "input", GIT_AUTO_CRLF_INPUT}
};
git_config *config;
int error;
git_filter *crlf_filter = NULL;
error = git_filter__load_settings(repo);
if (repo->filter_options.loaded)
return GIT_SUCCESS;
repo->filter_options.eol = GIT_EOL_DEFAULT;
repo->filter_options.auto_crlf = GIT_AUTO_CRLF_DEFAULT;
error = git_repository_config__weakptr(&config, repo);
if (error < GIT_SUCCESS)
return error;
if (mode == GIT_FILTER_TO_ODB) {
error = git_filter__crlf_to_odb(&crlf_filter, repo, path);
if (error < GIT_SUCCESS)
error = git_config_get_mapped(
config, "core.eol", map_eol, ARRAY_SIZE(map_eol), &repo->filter_options.eol);
if (error < GIT_SUCCESS && error != GIT_ENOTFOUND)
return error;
if (crlf_filter != NULL)
git_vector_insert(filters, crlf_filter);
error = git_config_get_mapped(
config, "core.auto_crlf", map_crlf, ARRAY_SIZE(map_crlf), &repo->filter_options.auto_crlf);
if (error < GIT_SUCCESS && error != GIT_ENOTFOUND)
return error;
repo->filter_options.loaded = 1;
return 0;
}
int git_filters_load(git_vector *filters, git_repository *repo, const char *path, int mode)
{
int error;
/* Make sure that the relevant settings from `gitconfig` have been
* cached on the repository struct to speed things up */
error = load_repository_settings(repo);
if (error < GIT_SUCCESS)
return error;
if (mode == GIT_FILTER_TO_ODB) {
/* Load the CRLF cleanup filter when writing to the ODB */
error = git_filter_add__crlf_to_odb(filters, repo, path);
if (error < GIT_SUCCESS)
return error;
} else {
return git__throw(GIT_ENOTIMPLEMENTED,
"Worktree filters are not implemented yet");
}
return 0;
return (int)filters->length;
}
void git_filter__free(git_vector *filters)
void git_filters_free(git_vector *filters)
{
size_t i;
git_filter *filter;
......@@ -124,7 +166,7 @@ void git_filter__free(git_vector *filters)
git_vector_free(filters);
}
int git_filter__apply(git_buf *dest, git_buf *source, git_vector *filters)
int git_filters_apply(git_buf *dest, git_buf *source, git_vector *filters)
{
unsigned int src, dst, i;
git_buf *dbuffer[2];
......@@ -134,6 +176,11 @@ int git_filter__apply(git_buf *dest, git_buf *source, git_vector *filters)
src = 0;
if (source->size == 0) {
git_buf_clear(dest);
return GIT_SUCCESS;
}
/* Pre-grow the destination buffer to more or less the size
* we expect it to have */
if (git_buf_grow(dest, source->size) < 0)
......@@ -167,46 +214,3 @@ int git_filter__apply(git_buf *dest, git_buf *source, git_vector *filters)
return GIT_SUCCESS;
}
int git_filter__load_settings(git_repository *repo)
{
static git_cvar_map map_eol[] = {
{GIT_CVAR_FALSE, NULL, GIT_EOL_UNSET},
{GIT_CVAR_STRING, "lf", GIT_EOL_LF},
{GIT_CVAR_STRING, "crlf", GIT_EOL_CRLF},
{GIT_CVAR_STRING, "native", GIT_EOL_NATIVE}
};
static git_cvar_map map_crlf[] = {
{GIT_CVAR_FALSE, NULL, GIT_AUTO_CRLF_FALSE},
{GIT_CVAR_TRUE, NULL, GIT_AUTO_CRLF_TRUE},
{GIT_CVAR_STRING, "input", GIT_AUTO_CRLF_INPUT}
};
git_config *config;
int error;
if (repo->filter_options.loaded)
return GIT_SUCCESS;
repo->filter_options.eol = GIT_EOL_DEFAULT;
repo->filter_options.auto_crlf = GIT_AUTO_CRLF_DEFAULT;
error = git_repository_config__weakptr(&config, repo);
if (error < GIT_SUCCESS)
return error;
error = git_config_get_mapped(
config, "core.eol", map_eol, ARRAY_SIZE(map_eol), &repo->filter_options.eol);
if (error < GIT_SUCCESS && error != GIT_ENOTFOUND)
return error;
error = git_config_get_mapped(
config, "core.auto_crlf", map_crlf, ARRAY_SIZE(map_crlf), &repo->filter_options.auto_crlf);
if (error < GIT_SUCCESS && error != GIT_ENOTFOUND)
return error;
repo->filter_options.loaded = 1;
return 0;
}
......@@ -60,19 +60,81 @@ typedef struct {
unsigned int printable, nonprintable;
} git_text_stats;
extern int git_filter__load_settings(git_repository *repo);
extern int git_filter__load_for_file(git_vector *filters, git_repository *repo, const char *full_path, int mode);
extern void git_filter__free(git_vector *filters);
extern int git_filter__apply(git_buf *dest, git_buf *source, git_vector *filters);
/*
* FILTER API
*/
/*
* For any given path in the working directory, fill the `filters`
* array with the relevant filters that need to be applied.
*
* Mode is either `GIT_FILTER_TO_WORKTREE` if you need to load the
* filters that will be used when checking out a file to the working
* directory, or `GIT_FILTER_TO_ODB` for the filters used when writing
* a file to the ODB.
*
* @param filters Vector where to store all the loaded filters
* @param repo Repository object that contains `path`
* @param path Relative path of the file to be filtered
* @param mode Filtering direction (WT->ODB or ODB->WT)
* @return the number of filters loaded for the file (0 if the file
* doesn't need filtering), or a negative error code
*/
extern int git_filters_load(git_vector *filters, git_repository *repo, const char *path, int mode);
/*
* Apply one or more filters to a file.
*
* The file must have been loaded as a `git_buf` object. Both the `source`
* and `dest` buffers are owned by the caller and must be freed once
* they are no longer needed.
*
* NOTE: Because of the double-buffering schema, the `source` buffer that contains
* the original file may be tampered once the filtering is complete. Regardless,
* the `dest` buffer will always contain the final result of the filtering
*
* @param dest Buffer to store the result of the filtering
* @param source Buffer containing the document to filter
* @param filters A non-empty vector of filters as supplied by `git_filters_load`
* @return GIT_SUCCESS on success, an error code otherwise
*/
extern int git_filters_apply(git_buf *dest, git_buf *source, git_vector *filters);
/*
* Free the `filters` array generated by `git_filters_load`.
*
* Note that this frees both the array and its contents. The array will
* be clean/reusable after this call.
*
* @param filters A filters array as supplied by `git_filters_load`
*/
extern void git_filters_free(git_vector *filters);
/*
* Available filters
*/
/* Gather stats for a piece of text */
extern void git_text__stat(git_text_stats *stats, const git_buf *text);
/* Strip CRLF, from Worktree to ODB */
extern int git_filter_add__crlf_to_odb(git_vector *filters, git_repository *repo, const char *path);
/* Heuristics on a set of text stats to check whether it's binary
* text or not */
extern int git_text__is_binary(git_text_stats *stats);
/* Available filters */
extern int git_filter__crlf_to_odb(git_filter **filter_out, git_repository *repo, const char *path);
/*
* PLAINTEXT API
*/
/*
* Gather stats for a piece of text
*
* Fill the `stats` structure with information on the number of
* unreadable characters, carriage returns, etc, so it can be
* used in heuristics.
*/
extern void git_text_gather_stats(git_text_stats *stats, const git_buf *text);
/*
* Process `git_text_stats` data generated by `git_text_stat` to see
* if it qualifies as a binary file
*/
extern int git_text_is_binary(git_text_stats *stats);
#endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment