Commit 85d54812 by Russell Belfer

Create public filter object and use it

This creates include/sys/filter.h with a basic definition of a
git_filter and then converts the internal code to use it.  There
are related internal objects (git_filter_list) that we will want
to publish at some point, but this is a first step.
parent 0cf77103
...@@ -39,7 +39,10 @@ typedef enum { ...@@ -39,7 +39,10 @@ typedef enum {
* A filter that can transform file data * A filter that can transform file data
* *
* This represents a filter that can be used to transform or even replace * This represents a filter that can be used to transform or even replace
* file data. Libgit2 currently includes one built in filter: * file data. Libgit2 includes one built in filter and it is possible to
* write your own (see git2/sys/filter.h for information on that).
*
* The built in filter is:
* *
* * "crlf" which uses the complex rules with the "text", "eol", and * * "crlf" which uses the complex rules with the "text", "eol", and
* "crlf" file attributes to decide how to convert between LF and CRLF * "crlf" file attributes to decide how to convert between LF and CRLF
...@@ -47,6 +50,17 @@ typedef enum { ...@@ -47,6 +50,17 @@ typedef enum {
*/ */
typedef struct git_filter git_filter; typedef struct git_filter git_filter;
GIT_EXTERN(git_filter *) git_filter_lookup(const char *name);
#define GIT_FILTER_CRLF "crlf"
GIT_EXTERN(int) git_filter_apply_to_buffer(
git_buffer *out,
git_filter *filter,
const git_buffer *input,
const char *as_path,
git_filter_mode_t mode);
GIT_END_DECL GIT_END_DECL
/** @} */ /** @} */
......
/*
* Copyright (C) the libgit2 contributors. All rights reserved.
*
* This file is part of libgit2, distributed under the GNU GPL v2 with
* a Linking Exception. For full terms see the included COPYING file.
*/
#ifndef INCLUDE_sys_git_config_backend_h__
#define INCLUDE_sys_git_config_backend_h__
#include "git2/filter.h"
/**
* @file git2/sys/filter.h
* @brief Git filter backend and plugin routines
* @defgroup git_backend Git custom backend APIs
* @ingroup Git
* @{
*/
GIT_BEGIN_DECL
/**
* A filter source represents a file/blob to be processed
*/
typedef struct git_filter_source git_filter_source;
struct git_filter_source {
git_repository *repo;
const char *path;
git_oid oid; /* zero if unknown (which is likely) */
uint16_t filemode; /* zero if unknown */
};
/**
* Callback to actually perform the data filtering
*/
typedef int (*git_filter_apply_fn)(
git_filter *self,
void **payload, /* may be read and/or set */
git_filter_mode_t mode,
git_buffer *to,
const git_buffer *from,
const git_filter_source *src);
/**
* Callback to decide if a given source needs this filter
*/
typedef int (*git_filter_check_fn)(
git_filter *self,
void **payload, /* points to NULL ptr on entry, may be set */
git_filter_mode_t mode,
const git_filter_source *src);
/**
* Callback to clean up after filtering has been applied
*/
typedef void (*git_filter_cleanup_fn)(
git_filter *self,
void *payload);
/**
* Filter structure used to register a new filter.
*
* To associate extra data with a filter, simply allocate extra data
* and put the `git_filter` struct at the start of your data buffer,
* then cast the `self` pointer to your larger structure when your
* callback is invoked.
*
* `version` should be set to GIT_FILTER_VERSION
*
* `apply` is the callback that actually filters data.
*
* `check` is an optional callback that checks if filtering is needed for
* a given source.
*
* `cleanup` is an optional callback that is made after the filter has
* been applied. Both the `check` and `apply` callbacks are able to
* allocate a `payload` to keep per-source filter state, and this callback
* is given that value and can clean up as needed.
*/
struct git_filter {
unsigned int version;
git_filter_apply_fn apply;
git_filter_check_fn check;
git_filter_cleanup_fn cleanup;
};
#define GIT_FILTER_VERSION 1
/**
* Register a filter under a given name
*
* Two filters will be preregistered with libgit2: GIT_FILTER_CRLF and
* GIT_FILTER_IDENT.
*/
GIT_EXTERN(int) git_filter_register(
const char *name, const git_filter *filter);
/**
* Remove the filter with the given name
*/
GIT_EXTERN(int) git_filter_unregister(const char *name);
/** @} */
GIT_END_DECL
#endif
...@@ -108,7 +108,7 @@ static int write_file_filtered( ...@@ -108,7 +108,7 @@ static int write_file_filtered(
git_off_t *size, git_off_t *size,
git_odb *odb, git_odb *odb,
const char *full_path, const char *full_path,
git_vector *filters) git_filter_list *fl)
{ {
int error; int error;
git_buf source = GIT_BUF_INIT; git_buf source = GIT_BUF_INIT;
...@@ -117,7 +117,7 @@ static int write_file_filtered( ...@@ -117,7 +117,7 @@ static int write_file_filtered(
if ((error = git_futils_readbuffer(&source, full_path)) < 0) if ((error = git_futils_readbuffer(&source, full_path)) < 0)
return error; return error;
error = git_filters_apply(&dest, &source, filters); error = git_filter_list_apply(&dest, &source, fl);
/* Free the source as soon as possible. This can be big in memory, /* Free the source as soon as possible. This can be big in memory,
* and we don't want to ODB write to choke */ * and we don't want to ODB write to choke */
...@@ -198,29 +198,25 @@ int git_blob__create_from_paths( ...@@ -198,29 +198,25 @@ int git_blob__create_from_paths(
if (S_ISLNK(mode)) { if (S_ISLNK(mode)) {
error = write_symlink(oid, odb, content_path, (size_t)size); error = write_symlink(oid, odb, content_path, (size_t)size);
} else { } else {
git_vector write_filters = GIT_VECTOR_INIT; git_filter_list *fl = NULL;
int filter_count = 0;
if (try_load_filters) { if (try_load_filters)
/* Load the filters for writing this file to the ODB */ /* Load the filters for writing this file to the ODB */
filter_count = git_filters_load( error = git_filter_list_load(
&write_filters, repo, hint_path, GIT_FILTER_TO_ODB); &fl, repo, hint_path, GIT_FILTER_TO_ODB);
}
if (filter_count < 0) { if (error < 0)
/* Negative value means there was a critical error */ /* well, that didn't work */;
error = filter_count; else if (fl == NULL)
} else if (filter_count == 0) {
/* No filters need to be applied to the document: we can stream /* No filters need to be applied to the document: we can stream
* directly from disk */ * directly from disk */
error = write_file_stream(oid, odb, content_path, size); error = write_file_stream(oid, odb, content_path, size);
} else { else {
/* We need to apply one or more filters */ /* We need to apply one or more filters */
error = write_file_filtered( error = write_file_filtered(oid, &size, odb, content_path, fl);
oid, &size, odb, content_path, &write_filters);
}
git_filters_free(&write_filters); git_filter_list_free(fl);
}
/* /*
* TODO: eventually support streaming filtered files, for files * TODO: eventually support streaming filtered files, for files
...@@ -345,9 +341,9 @@ int git_blob_filtered_content( ...@@ -345,9 +341,9 @@ int git_blob_filtered_content(
const char *as_path, const char *as_path,
int check_for_binary_data) int check_for_binary_data)
{ {
int error = 0, num_filters = 0; int error = 0;
git_buf filtered = GIT_BUF_INIT, unfiltered = GIT_BUF_INIT; git_buf filtered = GIT_BUF_INIT, unfiltered = GIT_BUF_INIT;
git_vector filters = GIT_VECTOR_INIT; git_filter_list *fl = NULL;
assert(blob && as_path && out); assert(blob && as_path && out);
...@@ -359,12 +355,12 @@ int git_blob_filtered_content( ...@@ -359,12 +355,12 @@ int git_blob_filtered_content(
if (check_for_binary_data && git_buf_text_is_binary(&filtered)) if (check_for_binary_data && git_buf_text_is_binary(&filtered))
return 0; return 0;
num_filters = git_filters_load( error = git_filter_list_load(
&filters, git_blob_owner(blob), as_path, GIT_FILTER_TO_WORKTREE); &fl, git_blob_owner(blob), as_path, GIT_FILTER_TO_WORKTREE);
if (num_filters < 0) if (error < 0)
return num_filters; return error;
if (num_filters > 0) { if (fl != NULL) {
if (out->ptr && out->available) { if (out->ptr && out->available) {
filtered.ptr = out->ptr; filtered.ptr = out->ptr;
filtered.size = out->size; filtered.size = out->size;
...@@ -374,9 +370,9 @@ int git_blob_filtered_content( ...@@ -374,9 +370,9 @@ int git_blob_filtered_content(
} }
if (!(error = git_blob__getbuf(&unfiltered, blob))) if (!(error = git_blob__getbuf(&unfiltered, blob)))
error = git_filters_apply(&filtered, &unfiltered, &filters); error = git_filter_list_apply(&filtered, &unfiltered, fl);
git_filters_free(&filters); git_filter_list_free(fl);
git_buf_free(&unfiltered); git_buf_free(&unfiltered);
} }
......
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
#include "common.h" #include "common.h"
#include "git2/strarray.h" #include "git2/strarray.h"
#include "git2/buffer.h"
#include <stdarg.h> #include <stdarg.h>
typedef struct { typedef struct {
...@@ -174,4 +175,25 @@ int git_buf_splice( ...@@ -174,4 +175,25 @@ int git_buf_splice(
const char *data, const char *data,
size_t nb_to_insert); size_t nb_to_insert);
#define GIT_BUF_FROM_BUFFER(buffer) \
{ (buffer)->ptr, (buffer)->available, (buffer)->size }
GIT_INLINE(void) git_buf_from_buffer(git_buf *buf, const git_buffer *buffer)
{
buf->ptr = buffer->ptr;
buf->size = buffer->size;
buf->asize = buffer->available;
}
#define GIT_BUFFER_FROM_BUF(buf) \
{ (buf)->ptr, (buf)->size, (buf)->asize }
GIT_INLINE(void) git_buffer_from_buf(git_buffer *buffer, const git_buf *buf)
{
buffer->ptr = buf->ptr;
buffer->size = buf->size;
buffer->available = buf->asize;
}
#endif #endif
...@@ -710,56 +710,40 @@ static int blob_content_to_file( ...@@ -710,56 +710,40 @@ static int blob_content_to_file(
mode_t entry_filemode, mode_t entry_filemode,
git_checkout_opts *opts) git_checkout_opts *opts)
{ {
int error = -1, nb_filters = 0; int error = 0;
mode_t file_mode = opts->file_mode; mode_t file_mode = opts->file_mode ? opts->file_mode : entry_filemode;
bool dont_free_filtered;
git_buf unfiltered = GIT_BUF_INIT, filtered = GIT_BUF_INIT; git_buf unfiltered = GIT_BUF_INIT, filtered = GIT_BUF_INIT;
git_vector filters = GIT_VECTOR_INIT; git_filter_list *fl = NULL;
/* Create a fake git_buf from the blob raw data... */ /* Create a fake git_buf from the blob raw data... */
filtered.ptr = (void *)git_blob_rawcontent(blob); filtered.ptr = (void *)git_blob_rawcontent(blob);
filtered.size = (size_t)git_blob_rawsize(blob); filtered.size = (size_t)git_blob_rawsize(blob);
/* ... and make sure it doesn't get unexpectedly freed */
dont_free_filtered = true; if (!opts->disable_filters && !git_buf_text_is_binary(&filtered)) {
error = git_filter_list_load(
if (!opts->disable_filters && &fl, git_blob_owner(blob), path, GIT_FILTER_TO_WORKTREE);
!git_buf_text_is_binary(&filtered) && }
(nb_filters = git_filters_load(
&filters, if (fl != NULL) {
git_object_owner((git_object *)blob),
path,
GIT_FILTER_TO_WORKTREE)) > 0)
{
/* reset 'filtered' so it can be a filter target */ /* reset 'filtered' so it can be a filter target */
git_buf_init(&filtered, 0); git_buf_init(&filtered, 0);
dont_free_filtered = false;
}
if (nb_filters < 0) if (!(error = git_blob__getbuf(&unfiltered, blob))) {
return nb_filters; error = git_filter_list_apply(&filtered, &unfiltered, fl);
if (nb_filters > 0) { git_buf_free(&unfiltered);
if ((error = git_blob__getbuf(&unfiltered, blob)) < 0) }
goto cleanup;
if ((error = git_filters_apply(&filtered, &unfiltered, &filters)) < 0) git_filter_list_free(fl);
goto cleanup;
} }
/* Allow overriding of file mode */ if (!error &&
if (!file_mode) !(error = buffer_to_file(
file_mode = entry_filemode; st, &filtered, path, opts->dir_mode,
opts->file_open_flags, file_mode)))
error = buffer_to_file(
st, &filtered, path, opts->dir_mode, opts->file_open_flags, file_mode);
if (!error)
st->st_mode = entry_filemode; st->st_mode = entry_filemode;
cleanup: if (filtered.asize != 0)
git_filters_free(&filters);
git_buf_free(&unfiltered);
if (!dont_free_filtered)
git_buf_free(&filtered); git_buf_free(&filtered);
return error; return error;
......
...@@ -19,13 +19,11 @@ ...@@ -19,13 +19,11 @@
struct crlf_attrs { struct crlf_attrs {
int crlf_action; int crlf_action;
int eol; int eol;
int auto_crlf;
}; };
struct crlf_filter { struct crlf_filter {
git_filter f; git_filter f;
struct crlf_attrs attrs;
git_repository *repo;
char path[GIT_FLEX_ARRAY];
}; };
static int check_crlf(const char *value) static int check_crlf(const char *value)
...@@ -76,7 +74,8 @@ static int crlf_input_action(struct crlf_attrs *ca) ...@@ -76,7 +74,8 @@ static int crlf_input_action(struct crlf_attrs *ca)
return ca->crlf_action; return ca->crlf_action;
} }
static int crlf_load_attributes(struct crlf_attrs *ca, git_repository *repo, const char *path) static int crlf_load_attributes(
struct crlf_attrs *ca, git_repository *repo, const char *path)
{ {
#define NUM_CONV_ATTRS 3 #define NUM_CONV_ATTRS 3
...@@ -108,9 +107,8 @@ static int crlf_load_attributes(struct crlf_attrs *ca, git_repository *repo, con ...@@ -108,9 +107,8 @@ static int crlf_load_attributes(struct crlf_attrs *ca, git_repository *repo, con
return -1; return -1;
} }
static int has_cr_in_index(git_filter *self) static int has_cr_in_index(git_repository *repo, const char *path)
{ {
struct crlf_filter *filter = (struct crlf_filter *)self;
git_index *index; git_index *index;
const git_index_entry *entry; const git_index_entry *entry;
git_blob *blob; git_blob *blob;
...@@ -118,19 +116,19 @@ static int has_cr_in_index(git_filter *self) ...@@ -118,19 +116,19 @@ static int has_cr_in_index(git_filter *self)
git_off_t blobsize; git_off_t blobsize;
bool found_cr; bool found_cr;
if (git_repository_index__weakptr(&index, filter->repo) < 0) { if (git_repository_index__weakptr(&index, repo) < 0) {
giterr_clear(); giterr_clear();
return false; return false;
} }
if (!(entry = git_index_get_bypath(index, filter->path, 0)) && if (!(entry = git_index_get_bypath(index, path, 0)) &&
!(entry = git_index_get_bypath(index, filter->path, 1))) !(entry = git_index_get_bypath(index, path, 1)))
return false; return false;
if (!S_ISREG(entry->mode)) /* don't crlf filter non-blobs */ if (!S_ISREG(entry->mode)) /* don't crlf filter non-blobs */
return true; return true;
if (git_blob_lookup(&blob, filter->repo, &entry->oid) < 0) if (git_blob_lookup(&blob, repo, &entry->oid) < 0)
return false; return false;
blobcontent = git_blob_rawcontent(blob); blobcontent = git_blob_rawcontent(blob);
...@@ -147,26 +145,26 @@ static int has_cr_in_index(git_filter *self) ...@@ -147,26 +145,26 @@ static int has_cr_in_index(git_filter *self)
} }
static int crlf_apply_to_odb( static int crlf_apply_to_odb(
git_filter *self, git_buf *dest, const git_buf *source) struct crlf_attrs *ca,
git_buffer *to,
const git_buffer *from,
const git_filter_source *src)
{ {
struct crlf_filter *filter = (struct crlf_filter *)self; const git_buf from_buf = GIT_BUF_FROM_BUFFER(from);
git_buf to_buf = GIT_BUF_FROM_BUFFER(to);
assert(self && dest && source);
/* Empty file? Nothing to do */ /* Empty file? Nothing to do */
if (git_buf_len(source) == 0) if (!git_buf_len(&from_buf))
return 0; return 0;
/* Heuristics to see if we can skip the conversion. /* Heuristics to see if we can skip the conversion.
* Straight from Core Git. * Straight from Core Git.
*/ */
if (filter->attrs.crlf_action == GIT_CRLF_AUTO || if (ca->crlf_action == GIT_CRLF_AUTO || ca->crlf_action == GIT_CRLF_GUESS) {
filter->attrs.crlf_action == GIT_CRLF_GUESS) {
git_buf_text_stats stats; git_buf_text_stats stats;
/* Check heuristics for binary vs text... */ /* Check heuristics for binary vs text... */
if (git_buf_text_gather_stats(&stats, source, false)) if (git_buf_text_gather_stats(&stats, &from_buf, false))
return -1; return -1;
/* /*
...@@ -175,28 +173,34 @@ static int crlf_apply_to_odb( ...@@ -175,28 +173,34 @@ static int crlf_apply_to_odb(
* stuff? * stuff?
*/ */
if (stats.cr != stats.crlf) if (stats.cr != stats.crlf)
return -1; return GIT_ENOTFOUND;
if (filter->attrs.crlf_action == GIT_CRLF_GUESS) { if (ca->crlf_action == GIT_CRLF_GUESS) {
/* /*
* If the file in the index has any CR in it, do not convert. * If the file in the index has any CR in it, do not convert.
* This is the new safer autocrlf handling. * This is the new safer autocrlf handling.
*/ */
if (has_cr_in_index(self)) if (has_cr_in_index(src->repo, src->path))
return -1; return GIT_ENOTFOUND;
} }
if (!stats.cr) if (!stats.cr)
return -1; return GIT_ENOTFOUND;
} }
/* Actually drop the carriage returns */ /* Actually drop the carriage returns */
return git_buf_text_crlf_to_lf(dest, source); if (git_buf_text_crlf_to_lf(&to_buf, &from_buf) < 0)
return -1;
/* Overwrite "to" buffer in case data was resized */
git_buffer_from_buf(to, &to_buf);
return 0;
} }
static const char *line_ending(struct crlf_filter *filter) static const char *line_ending(struct crlf_attrs *ca)
{ {
switch (filter->attrs.crlf_action) { switch (ca->crlf_action) {
case GIT_CRLF_BINARY: case GIT_CRLF_BINARY:
case GIT_CRLF_INPUT: case GIT_CRLF_INPUT:
return "\n"; return "\n";
...@@ -213,7 +217,7 @@ static const char *line_ending(struct crlf_filter *filter) ...@@ -213,7 +217,7 @@ static const char *line_ending(struct crlf_filter *filter)
goto line_ending_error; goto line_ending_error;
} }
switch (filter->attrs.eol) { switch (ca->eol) {
case GIT_EOL_UNSET: case GIT_EOL_UNSET:
return GIT_EOL_NATIVE == GIT_EOL_CRLF return GIT_EOL_NATIVE == GIT_EOL_CRLF
? "\r\n" ? "\r\n"
...@@ -235,44 +239,58 @@ line_ending_error: ...@@ -235,44 +239,58 @@ line_ending_error:
} }
static int crlf_apply_to_workdir( static int crlf_apply_to_workdir(
git_filter *self, git_buf *tgt, const git_buf *src) struct crlf_attrs *ca, git_buffer *to, const git_buffer *from)
{ {
struct crlf_filter *filter = (struct crlf_filter *)self; const git_buf from_buf = GIT_BUF_FROM_BUFFER(from);
git_buf to_buf = GIT_BUF_FROM_BUFFER(to);
const char *workdir_ending = NULL; const char *workdir_ending = NULL;
assert(self && tgt && src);
/* Empty file? Nothing to do. */ /* Empty file? Nothing to do. */
if (git_buf_len(src) == 0) if (git_buf_len(&from_buf) == 0)
return -1; return 0;
/* Determine proper line ending */ /* Determine proper line ending */
workdir_ending = line_ending(filter); workdir_ending = line_ending(ca);
if (!workdir_ending) if (!workdir_ending)
return -1; return -1;
if (!strcmp("\n", workdir_ending)) { if (!strcmp("\n", workdir_ending)) {
if (git_buf_find(src, '\r') < 0) if (ca->crlf_action == GIT_CRLF_GUESS && ca->auto_crlf)
return GIT_ENOTFOUND;
if (git_buf_find(&from_buf, '\r') < 0)
return GIT_ENOTFOUND;
if (git_buf_text_crlf_to_lf(&to_buf, &from_buf) < 0)
return -1;
} else {
/* only other supported option is lf->crlf conversion */
assert(!strcmp("\r\n", workdir_ending));
if (git_buf_text_lf_to_crlf(&to_buf, &from_buf) < 0)
return -1; return -1;
return git_buf_text_crlf_to_lf(tgt, src);
} }
/* only other supported option is lf->crlf conversion */ /* Overwrite "to" buffer in case data was resized */
assert(!strcmp("\r\n", workdir_ending)); git_buffer_from_buf(to, &to_buf);
return git_buf_text_lf_to_crlf(tgt, src);
return 0;
} }
static int find_and_add_filter( static int crlf_check(
git_vector *filters, git_repository *repo, const char *path, git_filter *self,
int (*apply)(struct git_filter *self, git_buf *tgt, const git_buf *src)) void **payload, /* points to NULL ptr on entry, may be set */
git_filter_mode_t mode,
const git_filter_source *src)
{ {
struct crlf_attrs ca;
struct crlf_filter *filter;
size_t pathlen;
int error; int error;
struct crlf_attrs ca;
GIT_UNUSED(self);
GIT_UNUSED(mode);
/* Load gitattributes for the path */ /* Load gitattributes for the path */
if ((error = crlf_load_attributes(&ca, repo, path)) < 0) if ((error = crlf_load_attributes(&ca, src->repo, src->path)) < 0)
return error; return error;
/* /*
...@@ -282,41 +300,54 @@ static int find_and_add_filter( ...@@ -282,41 +300,54 @@ static int find_and_add_filter(
ca.crlf_action = crlf_input_action(&ca); ca.crlf_action = crlf_input_action(&ca);
if (ca.crlf_action == GIT_CRLF_BINARY) if (ca.crlf_action == GIT_CRLF_BINARY)
return 0; return GIT_ENOTFOUND;
if (ca.crlf_action == GIT_CRLF_GUESS) { if (ca.crlf_action == GIT_CRLF_GUESS) {
int auto_crlf; if ((error = git_repository__cvar(
&ca.auto_crlf, src->repo, GIT_CVAR_AUTO_CRLF)) < 0)
if ((error = git_repository__cvar(&auto_crlf, repo, GIT_CVAR_AUTO_CRLF)) < 0)
return error; return error;
if (auto_crlf == GIT_AUTO_CRLF_FALSE) if (ca.auto_crlf == GIT_AUTO_CRLF_FALSE)
return 0; return GIT_ENOTFOUND;
} }
/* If we're good, we create a new filter object and push it *payload = git__malloc(sizeof(ca));
* into the filters array */ GITERR_CHECK_ALLOC(*payload);
pathlen = strlen(path); memcpy(*payload, &ca, sizeof(ca));
filter = git__malloc(sizeof(struct crlf_filter) + pathlen + 1);
GITERR_CHECK_ALLOC(filter);
filter->f.apply = apply; return 0;
filter->f.do_free = NULL; }
memcpy(&filter->attrs, &ca, sizeof(struct crlf_attrs));
filter->repo = repo; static int crlf_apply(
memcpy(filter->path, path, pathlen + 1); git_filter *self,
void **payload, /* may be read and/or set */
git_filter_mode_t mode,
git_buffer *to,
const git_buffer *from,
const git_filter_source *src)
{
GIT_UNUSED(self);
return git_vector_insert(filters, filter); if (mode == GIT_FILTER_SMUDGE)
return crlf_apply_to_workdir(*payload, to, from);
else
return crlf_apply_to_odb(*payload, to, from, src);
} }
int git_filter_add__crlf_to_odb( static void crlf_cleanup(
git_vector *filters, git_repository *repo, const char *path) git_filter *self,
void *payload)
{ {
return find_and_add_filter(filters, repo, path, &crlf_apply_to_odb); GIT_UNUSED(self);
git__free(payload);
} }
int git_filter_add__crlf_to_workdir( git_filter *git_crlf_filter_new(void)
git_vector *filters, git_repository *repo, const char *path)
{ {
return find_and_add_filter(filters, repo, path, &crlf_apply_to_workdir); struct crlf_filter *f = git__calloc(1, sizeof(struct crlf_filter));
f->f.version = GIT_FILTER_VERSION;
f->f.check = crlf_check;
f->f.apply = crlf_apply;
f->f.cleanup = crlf_cleanup;
return (git_filter *)f;
} }
...@@ -568,21 +568,21 @@ int git_diff__oid_for_file( ...@@ -568,21 +568,21 @@ int git_diff__oid_for_file(
giterr_set(GITERR_OS, "File size overflow (for 32-bits) on '%s'", path); giterr_set(GITERR_OS, "File size overflow (for 32-bits) on '%s'", path);
result = -1; result = -1;
} else { } else {
git_vector filters = GIT_VECTOR_INIT; git_filter_list *fl = NULL;
result = git_filters_load(&filters, repo, path, GIT_FILTER_TO_ODB); result = git_filter_list_load(&fl, repo, path, GIT_FILTER_TO_ODB);
if (result >= 0) { if (!result) {
int fd = git_futils_open_ro(full_path.ptr); int fd = git_futils_open_ro(full_path.ptr);
if (fd < 0) if (fd < 0)
result = fd; result = fd;
else { else {
result = git_odb__hashfd_filtered( result = git_odb__hashfd_filtered(
oid, fd, (size_t)size, GIT_OBJ_BLOB, &filters); oid, fd, (size_t)size, GIT_OBJ_BLOB, fl);
p_close(fd); p_close(fd);
} }
}
git_filters_free(&filters); git_filter_list_free(fl);
}
} }
cleanup: cleanup:
......
...@@ -296,7 +296,7 @@ static int diff_file_content_load_workdir_file( ...@@ -296,7 +296,7 @@ static int diff_file_content_load_workdir_file(
git_diff_file_content *fc, git_buf *path) git_diff_file_content *fc, git_buf *path)
{ {
int error = 0; int error = 0;
git_vector filters = GIT_VECTOR_INIT; git_filter_list *fl = NULL;
git_buf raw = GIT_BUF_INIT, filtered = GIT_BUF_INIT; git_buf raw = GIT_BUF_INIT, filtered = GIT_BUF_INIT;
git_file fd = git_futils_open_ro(git_buf_cstr(path)); git_file fd = git_futils_open_ro(git_buf_cstr(path));
...@@ -310,41 +310,35 @@ static int diff_file_content_load_workdir_file( ...@@ -310,41 +310,35 @@ static int diff_file_content_load_workdir_file(
if (diff_file_content_binary_by_size(fc)) if (diff_file_content_binary_by_size(fc))
goto cleanup; goto cleanup;
if ((error = git_filters_load( if ((error = git_filter_list_load(
&filters, fc->repo, fc->file->path, GIT_FILTER_TO_ODB)) < 0) &fl, fc->repo, fc->file->path, GIT_FILTER_TO_ODB)) < 0)
goto cleanup; goto cleanup;
/* error >= is a filter count */
if (error == 0) { /* if there are no filters, try to mmap the file */
if (fl == NULL) {
if (!(error = git_futils_mmap_ro( if (!(error = git_futils_mmap_ro(
&fc->map, fd, 0, (size_t)fc->file->size))) &fc->map, fd, 0, (size_t)fc->file->size))) {
fc->flags |= GIT_DIFF_FLAG__UNMAP_DATA; fc->flags |= GIT_DIFF_FLAG__UNMAP_DATA;
else /* fall through to try readbuffer below */
giterr_clear();
}
if (error != 0) {
error = git_futils_readbuffer_fd(&raw, fd, (size_t)fc->file->size);
if (error < 0)
goto cleanup; goto cleanup;
if (!filters.length)
git_buf_swap(&filtered, &raw);
else
error = git_filters_apply(&filtered, &raw, &filters);
if (!error) {
fc->map.len = git_buf_len(&filtered);
fc->map.data = git_buf_detach(&filtered);
fc->flags |= GIT_DIFF_FLAG__FREE_DATA;
} }
git_buf_free(&raw); /* if mmap failed, fall through to try readbuffer below */
git_buf_free(&filtered); giterr_clear();
} }
if (!(error = git_futils_readbuffer_fd(&raw, fd, (size_t)fc->file->size)) &&
!(error = git_filter_list_apply(&filtered, &raw, fl)))
{
fc->map.len = git_buf_len(&filtered);
fc->map.data = git_buf_detach(&filtered);
fc->flags |= GIT_DIFF_FLAG__FREE_DATA;
}
git_buf_free(&raw);
git_buf_free(&filtered);
cleanup: cleanup:
git_filters_free(&filters); git_filter_list_free(fl);
p_close(fd); p_close(fd);
return error; return error;
......
...@@ -13,62 +13,155 @@ ...@@ -13,62 +13,155 @@
#include "git2/config.h" #include "git2/config.h"
#include "blob.h" #include "blob.h"
int git_filters_load(git_vector *filters, git_repository *repo, const char *path, int mode) typedef struct {
git_filter *filter;
void *payload;
} git_filter_entry;
struct git_filter_list {
git_array_t(git_filter_entry) filters;
git_filter_mode_t mode;
git_filter_source source;
char path[GIT_FLEX_ARRAY];
};
typedef struct {
const char *filter_name;
git_filter *filter;
} git_filter_def;
static git_array_t(git_filter_def) filter_registry = GIT_ARRAY_INIT;
static int filter_load_defaults(void)
{ {
int error; if (!git_array_size(filter_registry)) {
git_filter_def *fdef = git_array_alloc(filter_registry);
if (mode == GIT_FILTER_TO_ODB) { GITERR_CHECK_ALLOC(fdef);
/* Load the CRLF cleanup filter when writing to the ODB */
error = git_filter_add__crlf_to_odb(filters, repo, path); fdef->filter_name = GIT_FILTER_CRLF;
if (error < 0) fdef->filter = git_crlf_filter_new();
return error; GITERR_CHECK_ALLOC(fdef->filter);
} else {
error = git_filter_add__crlf_to_workdir(filters, repo, path);
if (error < 0)
return error;
} }
return (int)filters->length; return 0;
} }
void git_filters_free(git_vector *filters) static int git_filter_list_new(
git_filter_list **out, git_filter_mode_t mode, const git_filter_source *src)
{ {
size_t i; git_filter_list *fl = NULL;
git_filter *filter; size_t pathlen = src->path ? strlen(src->path) : 0;
fl = git__calloc(1, sizeof(git_filter_list) + pathlen + 1);
GITERR_CHECK_ALLOC(fl);
fl->mode = mode;
if (src->path)
memcpy(fl->path, src->path, pathlen);
fl->source.repo = src->repo;
fl->source.path = fl->path;
*out = fl;
return 0;
}
int git_filter_list_load(
git_filter_list **filters,
git_repository *repo,
const char *path,
git_filter_mode_t mode)
{
int error = 0;
git_filter_list *fl = NULL;
git_filter_source src = { 0 };
git_filter_entry *fe;
uint32_t f;
if (filter_load_defaults() < 0)
return -1;
src.repo = repo;
src.path = path;
for (f = 0; f < git_array_size(filter_registry); ++f) {
void *payload = NULL;
git_filter_def *fdef = git_array_get(filter_registry, f);
if (!fdef || !fdef->filter)
continue;
git_vector_foreach(filters, i, filter) { if (fdef->filter->check)
if (filter->do_free != NULL) error = fdef->filter->check(fdef->filter, &payload, mode, &src);
filter->do_free(filter);
else if (error == GIT_ENOTFOUND)
git__free(filter); error = 0;
else if (error < 0)
break;
else {
if (!fl && (error = git_filter_list_new(&fl, mode, &src)) < 0)
return error;
fe = git_array_alloc(fl->filters);
GITERR_CHECK_ALLOC(fe);
fe->filter = fdef->filter;
fe->payload = payload;
}
}
if (error && fl != NULL) {
git_array_clear(fl->filters);
git__free(fl);
fl = NULL;
}
*filters = fl;
return error;
}
void git_filter_list_free(git_filter_list *fl)
{
uint32_t i;
if (!fl)
return;
for (i = 0; i < git_array_size(fl->filters); ++i) {
git_filter_entry *fe = git_array_get(fl->filters, i);
if (fe->filter->cleanup)
fe->filter->cleanup(fe->filter, fe->payload);
} }
git_vector_free(filters); git_array_clear(fl->filters);
git__free(fl);
} }
int git_filters_apply(git_buf *dest, git_buf *source, git_vector *filters) int git_filter_list_apply(
git_buf *dest,
git_buf *source,
git_filter_list *fl)
{ {
size_t i; int error = 0;
uint32_t i;
unsigned int src; unsigned int src;
git_buf *dbuffer[2]; git_buf *dbuffer[2];
if (!fl) {
git_buf_swap(dest, source);
return 0;
}
dbuffer[0] = source; dbuffer[0] = source;
dbuffer[1] = dest; dbuffer[1] = dest;
src = 0; src = 0;
if (git_buf_len(source) == 0) {
git_buf_clear(dest);
return 0;
}
/* Pre-grow the destination buffer to more or less the size /* Pre-grow the destination buffer to more or less the size
* we expect it to have */ * we expect it to have */
if (git_buf_grow(dest, git_buf_len(source)) < 0) if (git_buf_grow(dest, git_buf_len(source)) < 0)
return -1; return -1;
for (i = 0; i < filters->length; ++i) { for (i = 0; i < git_array_size(fl->filters); ++i) {
git_filter *filter = git_vector_get(filters, i); git_filter_entry *fe = git_array_get(fl->filters, i);
unsigned int dst = 1 - src; unsigned int dst = 1 - src;
git_buf_clear(dbuffer[dst]); git_buf_clear(dbuffer[dst]);
...@@ -79,8 +172,25 @@ int git_filters_apply(git_buf *dest, git_buf *source, git_vector *filters) ...@@ -79,8 +172,25 @@ int git_filters_apply(git_buf *dest, git_buf *source, git_vector *filters)
* of the double buffering (so that the text goes through * of the double buffering (so that the text goes through
* cleanly). * cleanly).
*/ */
if (filter->apply(filter, dbuffer[dst], dbuffer[src]) == 0) {
src = dst; git_buffer srcb = GIT_BUFFER_FROM_BUF(dbuffer[src]);
git_buffer dstb = GIT_BUFFER_FROM_BUF(dbuffer[dst]);
error = fe->filter->apply(
fe->filter, &fe->payload, fl->mode, &dstb, &srcb, &fl->source);
if (error == GIT_ENOTFOUND)
error = 0;
else if (error < 0) {
git_buf_clear(dest);
return error;
}
else {
git_buf_from_buffer(dbuffer[src], &srcb);
git_buf_from_buffer(dbuffer[dst], &dstb);
src = dst;
}
}
if (git_buf_oom(dbuffer[dst])) if (git_buf_oom(dbuffer[dst]))
return -1; return -1;
......
...@@ -9,14 +9,11 @@ ...@@ -9,14 +9,11 @@
#include "common.h" #include "common.h"
#include "buffer.h" #include "buffer.h"
#include "array.h"
#include "git2/odb.h" #include "git2/odb.h"
#include "git2/repository.h" #include "git2/repository.h"
#include "git2/filter.h" #include "git2/filter.h"
#include "git2/sys/filter.h"
struct git_filter {
int (*apply)(struct git_filter *self, git_buf *dest, const git_buf *source);
void (*do_free)(struct git_filter *self);
};
typedef enum { typedef enum {
GIT_CRLF_GUESS = -1, GIT_CRLF_GUESS = -1,
...@@ -27,34 +24,38 @@ typedef enum { ...@@ -27,34 +24,38 @@ typedef enum {
GIT_CRLF_AUTO, GIT_CRLF_AUTO,
} git_crlf_t; } git_crlf_t;
typedef struct git_filter_list git_filter_list;
/* /*
* FILTER API * FILTER API
*/ */
/* /*
* For any given path in the working directory, fill the `filters` * For any given path in the working directory, create a `git_filter_list`
* array with the relevant filters that need to be applied. * with the relevant filters that need to be applied.
* *
* Mode is either `GIT_FILTER_TO_WORKTREE` if you need to load the * This will return 0 (success) but set the output git_filter_list to NULL
* filters that will be used when checking out a file to the working * if no filters are requested for the given file.
* directory, or `GIT_FILTER_TO_ODB` for the filters used when writing
* a file to the ODB.
* *
* @param filters Vector where to store all the loaded filters * @param filters Output newly created git_filter_list (or NULL)
* @param repo Repository object that contains `path` * @param repo Repository object that contains `path`
* @param path Relative path of the file to be filtered * @param path Relative path of the file to be filtered
* @param mode Filtering direction (WT->ODB or ODB->WT) * @param mode Filtering direction (WT->ODB or ODB->WT)
* @return the number of filters loaded for the file (0 if the file * @return 0 on success (which could still return NULL if no filters are
* doesn't need filtering), or a negative error code * needed for the requested file), <0 on error
*/ */
extern int git_filters_load(git_vector *filters, git_repository *repo, const char *path, int mode); extern int git_filter_list_load(
git_filter_list **filters,
git_repository *repo,
const char *path,
git_filter_mode_t mode);
/* /*
* Apply one or more filters to a file. * Apply one or more filters to a data buffer.
* *
* The file must have been loaded as a `git_buf` object. Both the `source` * The source data must have been loaded as a `git_buf` object. Both the
* and `dest` buffers are owned by the caller and must be freed once * `source` and `dest` buffers are owned by the caller and must be freed
* they are no longer needed. * once they are no longer needed.
* *
* NOTE: Because of the double-buffering schema, the `source` buffer that * NOTE: Because of the double-buffering schema, the `source` buffer that
* contains the original file may be tampered once the filtering is * contains the original file may be tampered once the filtering is
...@@ -63,29 +64,25 @@ extern int git_filters_load(git_vector *filters, git_repository *repo, const cha ...@@ -63,29 +64,25 @@ extern int git_filters_load(git_vector *filters, git_repository *repo, const cha
* *
* @param dest Buffer to store the result of the filtering * @param dest Buffer to store the result of the filtering
* @param source Buffer containing the document to filter * @param source Buffer containing the document to filter
* @param filters Vector of filters as supplied by `git_filters_load` * @param filters An already loaded git_filter_list
* @return 0 on success, an error code otherwise * @return 0 on success, an error code otherwise
*/ */
extern int git_filters_apply(git_buf *dest, git_buf *source, git_vector *filters); extern int git_filter_list_apply(
git_buf *dest,
git_buf *source,
git_filter_list *filters);
/* /*
* Free the `filters` array generated by `git_filters_load`. * Free the git_filter_list
* *
* Note that this frees both the array and its contents. The array will * @param filters A git_filter_list created by `git_filter_list_load`
* be clean/reusable after this call.
*
* @param filters A filters array as supplied by `git_filters_load`
*/ */
extern void git_filters_free(git_vector *filters); extern void git_filter_list_free(git_filter_list *filters);
/* /*
* Available filters * Available filters
*/ */
/* Strip CRLF, from Worktree to ODB */ extern git_filter *git_crlf_filter_new(void);
extern int git_filter_add__crlf_to_odb(git_vector *filters, git_repository *repo, const char *path);
/* Add CRLF, from ODB to worktree */
extern int git_filter_add__crlf_to_workdir(git_vector *filters, git_repository *repo, const char *path);
#endif #endif
...@@ -179,13 +179,13 @@ done: ...@@ -179,13 +179,13 @@ done:
} }
int git_odb__hashfd_filtered( int git_odb__hashfd_filtered(
git_oid *out, git_file fd, size_t size, git_otype type, git_vector *filters) git_oid *out, git_file fd, size_t size, git_otype type, git_filter_list *fl)
{ {
int error; int error;
git_buf raw = GIT_BUF_INIT; git_buf raw = GIT_BUF_INIT;
git_buf filtered = GIT_BUF_INIT; git_buf filtered = GIT_BUF_INIT;
if (!filters || !filters->length) if (!fl)
return git_odb__hashfd(out, fd, size, type); return git_odb__hashfd(out, fd, size, type);
/* size of data is used in header, so we have to read the whole file /* size of data is used in header, so we have to read the whole file
...@@ -193,7 +193,7 @@ int git_odb__hashfd_filtered( ...@@ -193,7 +193,7 @@ int git_odb__hashfd_filtered(
*/ */
if (!(error = git_futils_readbuffer_fd(&raw, fd, size))) if (!(error = git_futils_readbuffer_fd(&raw, fd, size)))
error = git_filters_apply(&filtered, &raw, filters); error = git_filter_list_apply(&filtered, &raw, fl);
git_buf_free(&raw); git_buf_free(&raw);
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include "vector.h" #include "vector.h"
#include "cache.h" #include "cache.h"
#include "posix.h" #include "posix.h"
#include "filter.h"
#define GIT_OBJECTS_DIR "objects/" #define GIT_OBJECTS_DIR "objects/"
#define GIT_OBJECT_DIR_MODE 0777 #define GIT_OBJECT_DIR_MODE 0777
...@@ -66,7 +67,7 @@ int git_odb__hashfd(git_oid *out, git_file fd, size_t size, git_otype type); ...@@ -66,7 +67,7 @@ int git_odb__hashfd(git_oid *out, git_file fd, size_t size, git_otype type);
* Acts just like git_odb__hashfd with the addition of filters... * Acts just like git_odb__hashfd with the addition of filters...
*/ */
int git_odb__hashfd_filtered( int git_odb__hashfd_filtered(
git_oid *out, git_file fd, size_t len, git_otype type, git_vector *filters); git_oid *out, git_file fd, size_t len, git_otype type, git_filter_list *fl);
/* /*
* Hash a `path`, assuming it could be a POSIX symlink: if the path is a * Hash a `path`, assuming it could be a POSIX symlink: if the path is a
......
...@@ -1649,7 +1649,7 @@ int git_repository_hashfile( ...@@ -1649,7 +1649,7 @@ int git_repository_hashfile(
const char *as_path) const char *as_path)
{ {
int error; int error;
git_vector filters = GIT_VECTOR_INIT; git_filter_list *fl = NULL;
git_file fd = -1; git_file fd = -1;
git_off_t len; git_off_t len;
git_buf full_path = GIT_BUF_INIT; git_buf full_path = GIT_BUF_INIT;
...@@ -1671,7 +1671,7 @@ int git_repository_hashfile( ...@@ -1671,7 +1671,7 @@ int git_repository_hashfile(
/* passing empty string for "as_path" indicated --no-filters */ /* passing empty string for "as_path" indicated --no-filters */
if (strlen(as_path) > 0) { if (strlen(as_path) > 0) {
error = git_filters_load(&filters, repo, as_path, GIT_FILTER_TO_ODB); error = git_filter_list_load(&fl, repo, as_path, GIT_FILTER_TO_ODB);
if (error < 0) if (error < 0)
return error; return error;
} else { } else {
...@@ -1698,12 +1698,12 @@ int git_repository_hashfile( ...@@ -1698,12 +1698,12 @@ int git_repository_hashfile(
goto cleanup; goto cleanup;
} }
error = git_odb__hashfd_filtered(out, fd, (size_t)len, type, &filters); error = git_odb__hashfd_filtered(out, fd, (size_t)len, type, fl);
cleanup: cleanup:
if (fd >= 0) if (fd >= 0)
p_close(fd); p_close(fd);
git_filters_free(&filters); git_filter_list_free(fl);
git_buf_free(&full_path); git_buf_free(&full_path);
return error; return error;
......
...@@ -101,7 +101,7 @@ void test_object_blob_filter__stats(void) ...@@ -101,7 +101,7 @@ void test_object_blob_filter__stats(void)
void test_object_blob_filter__to_odb(void) void test_object_blob_filter__to_odb(void)
{ {
git_vector filters = GIT_VECTOR_INIT; git_filter_list *fl = NULL;
git_config *cfg; git_config *cfg;
int i; int i;
git_blob *blob; git_blob *blob;
...@@ -113,21 +113,21 @@ void test_object_blob_filter__to_odb(void) ...@@ -113,21 +113,21 @@ void test_object_blob_filter__to_odb(void)
git_attr_cache_flush(g_repo); git_attr_cache_flush(g_repo);
cl_git_append2file("empty_standard_repo/.gitattributes", "*.txt text\n"); cl_git_append2file("empty_standard_repo/.gitattributes", "*.txt text\n");
cl_assert(git_filters_load( cl_git_pass(
&filters, g_repo, "filename.txt", GIT_FILTER_TO_ODB) > 0); git_filter_list_load(&fl, g_repo, "filename.txt", GIT_FILTER_TO_ODB));
cl_assert(filters.length == 1); cl_assert(fl != NULL);
for (i = 0; i < NUM_TEST_OBJECTS; i++) { for (i = 0; i < NUM_TEST_OBJECTS; i++) {
cl_git_pass(git_blob_lookup(&blob, g_repo, &g_oids[i])); cl_git_pass(git_blob_lookup(&blob, g_repo, &g_oids[i]));
cl_git_pass(git_blob__getbuf(&orig, blob)); cl_git_pass(git_blob__getbuf(&orig, blob));
cl_git_pass(git_filters_apply(&out, &orig, &filters)); cl_git_pass(git_filter_list_apply(&out, &orig, fl));
cl_assert(git_buf_cmp(&out, &g_crlf_filtered[i]) == 0); cl_assert(git_buf_cmp(&out, &g_crlf_filtered[i]) == 0);
git_blob_free(blob); git_blob_free(blob);
} }
git_filters_free(&filters); git_filter_list_free(fl);
git_buf_free(&orig); git_buf_free(&orig);
git_buf_free(&out); git_buf_free(&out);
git_config_free(cfg); git_config_free(cfg);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment