Commit d15884ce by Carlos Martín Nieto

Merge pull request #2911 from ethomson/streaming_filters

Streaming filters
parents 818302c8 feb0e022
......@@ -15,6 +15,7 @@ compiler:
env:
global:
- secure: "YnhS+8n6B+uoyaYfaJ3Lei7cSJqHDPiKJCKFIF2c87YDfmCvAJke8QtE7IzjYDs7UFkTCM4ox+ph2bERUrxZbSCyEkHdjIZpKuMJfYWja/jgMqTMxdyOH9y8JLFbZsSXDIXDwqBlC6vVyl1fP90M35wuWcNTs6tctfVWVofEFbs="
- GITTEST_INVASIVE_FS_SIZE=1
matrix:
- OPTIONS="-DTHREADSAFE=ON -DCMAKE_BUILD_TYPE=Release"
- OPTIONS="-DTHREADSAFE=OFF -DBUILD_EXAMPLES=ON"
......
......@@ -3,7 +3,8 @@ branches:
only:
- master
environment:
GITTEST_INVASIVE_FILESYSTEM: 1
GITTEST_INVASIVE_FS_STRUCTURE: 1
GITTEST_INVASIVE_FS_SIZE: 1
matrix:
- GENERATOR: "Visual Studio 11"
......
......@@ -39,9 +39,9 @@ typedef enum {
* Filter option flags.
*/
typedef enum {
GIT_FILTER_OPT_DEFAULT = 0u,
GIT_FILTER_OPT_ALLOW_UNSAFE = (1u << 0),
} git_filter_opt_t;
GIT_FILTER_DEFAULT = 0u,
GIT_FILTER_ALLOW_UNSAFE = (1u << 0),
} git_filter_flag_t;
/**
* A filter that can transform file data
......@@ -83,7 +83,7 @@ typedef struct git_filter_list git_filter_list;
* @param blob The blob to which the filter will be applied (if known)
* @param path Relative path of the file to be filtered
* @param mode Filtering direction (WT->ODB or ODB->WT)
* @param options Combination of `git_filter_opt_t` flags
* @param flags Combination of `git_filter_flag_t` flags
* @return 0 on success (which could still return NULL if no filters are
* needed for the requested file), <0 on error
*/
......@@ -93,7 +93,7 @@ GIT_EXTERN(int) git_filter_list_load(
git_blob *blob, /* can be NULL */
const char *path,
git_filter_mode_t mode,
uint32_t options);
uint32_t flags);
/**
* Apply filter list to a data buffer.
......@@ -137,6 +137,22 @@ GIT_EXTERN(int) git_filter_list_apply_to_blob(
git_filter_list *filters,
git_blob *blob);
GIT_EXTERN(int) git_filter_list_stream_data(
git_filter_list *filters,
git_buf *data,
git_writestream *target);
GIT_EXTERN(int) git_filter_list_stream_file(
git_filter_list *filters,
git_repository *repo,
const char *path,
git_writestream *target);
GIT_EXTERN(int) git_filter_list_stream_blob(
git_filter_list *filters,
git_blob *blob,
git_writestream *target);
/**
* Free a git_filter_list
*
......
......@@ -123,9 +123,9 @@ GIT_EXTERN(const git_oid *) git_filter_source_id(const git_filter_source *src);
GIT_EXTERN(git_filter_mode_t) git_filter_source_mode(const git_filter_source *src);
/**
* Get the combination git_filter_opt_t options to be applied
* Get the combination git_filter_flag_t options to be applied
*/
GIT_EXTERN(uint32_t) git_filter_source_options(const git_filter_source *src);
GIT_EXTERN(uint32_t) git_filter_source_flags(const git_filter_source *src);
/*
* struct git_filter
......@@ -208,6 +208,13 @@ typedef int (*git_filter_apply_fn)(
const git_buf *from,
const git_filter_source *src);
typedef int (*git_filter_stream_fn)(
git_writestream **out,
git_filter *self,
void **payload,
const git_filter_source *src,
git_writestream *next);
/**
* Callback to clean up after filtering has been applied
*
......@@ -247,6 +254,7 @@ struct git_filter {
git_filter_shutdown_fn shutdown;
git_filter_check_fn check;
git_filter_apply_fn apply;
git_filter_stream_fn stream;
git_filter_cleanup_fn cleanup;
};
......
......@@ -410,6 +410,15 @@ typedef enum {
GIT_SUBMODULE_RECURSE_ONDEMAND = 2,
} git_submodule_recurse_t;
/** A type to write in a streaming fashion, for example, for filters. */
typedef struct git_writestream git_writestream;
struct git_writestream {
int (*write)(git_writestream *stream, const char *buffer, size_t len);
int (*close)(git_writestream *stream);
void (*free)(git_writestream *stream);
};
/** @} */
GIT_END_DECL
......
......@@ -282,9 +282,8 @@ static int system_attr_file(
* a consumer. This allows them to treat this as a regular `git_buf`,
* but their call to `git_buf_free` will not attempt to free it.
*/
out->ptr = attr_session->sysdir.ptr;
out->size = attr_session->sysdir.size;
out->asize = 0;
git_buf_attach_notowned(
out, attr_session->sysdir.ptr, attr_session->sysdir.size);
return 0;
}
......
......@@ -199,7 +199,7 @@ int git_blob__create_from_paths(
/* Load the filters for writing this file to the ODB */
error = git_filter_list_load(
&fl, repo, NULL, hint_path,
GIT_FILTER_TO_ODB, GIT_FILTER_OPT_DEFAULT);
GIT_FILTER_TO_ODB, GIT_FILTER_DEFAULT);
if (error < 0)
/* well, that didn't work */;
......@@ -329,15 +329,13 @@ cleanup:
int git_blob_is_binary(const git_blob *blob)
{
git_buf content;
git_buf content = GIT_BUF_INIT;
assert(blob);
content.ptr = blob->odb_object->buffer;
content.size =
min(blob->odb_object->cached.size, GIT_FILTER_BYTES_TO_CHECK_NUL);
content.asize = 0;
git_buf_attach_notowned(&content, blob->odb_object->buffer,
min(blob->odb_object->cached.size,
GIT_FILTER_BYTES_TO_CHECK_NUL));
return git_buf_text_is_binary(&content);
}
......@@ -359,7 +357,7 @@ int git_blob_filtered_content(
if (!(error = git_filter_list_load(
&fl, git_blob_owner(blob), blob, path,
GIT_FILTER_TO_WORKTREE, GIT_FILTER_OPT_DEFAULT))) {
GIT_FILTER_TO_WORKTREE, GIT_FILTER_DEFAULT))) {
error = git_filter_list_apply_to_blob(out, fl, blob);
......
......@@ -500,6 +500,20 @@ void git_buf_attach(git_buf *buf, char *ptr, size_t asize)
}
}
void git_buf_attach_notowned(git_buf *buf, const char *ptr, size_t size)
{
if (git_buf_is_allocated(buf))
git_buf_free(buf);
if (!size) {
git_buf_init(buf, 0);
} else {
buf->ptr = (char *)ptr;
buf->asize = 0;
buf->size = size;
}
}
int git_buf_join_n(git_buf *buf, char separator, int nbuf, ...)
{
va_list ap;
......
......@@ -74,6 +74,12 @@ extern void git_buf_swap(git_buf *buf_a, git_buf *buf_b);
extern char *git_buf_detach(git_buf *buf);
extern void git_buf_attach(git_buf *buf, char *ptr, size_t asize);
/* Populates a `git_buf` where the contents are not "owned" by the
* buffer, and calls to `git_buf_free` will not free the given buf.
*/
extern void git_buf_attach_notowned(
git_buf *buf, const char *ptr, size_t size);
/**
* Test if there have been any reallocation failures with this git_buf.
*
......
......@@ -17,6 +17,7 @@
#include "git2/diff.h"
#include "git2/submodule.h"
#include "git2/sys/index.h"
#include "git2/sys/filter.h"
#include "refs.h"
#include "repository.h"
......@@ -1371,39 +1372,37 @@ static int mkpath2file(
return error;
}
static int buffer_to_file(
checkout_data *data,
struct stat *st,
git_buf *buf,
const char *path,
mode_t file_mode)
{
int error;
if ((error = mkpath2file(data, path, data->opts.dir_mode)) < 0)
return error;
struct checkout_stream {
git_writestream base;
const char *path;
int fd;
int open;
};
if ((error = git_futils_writebuffer(
buf, path, data->opts.file_open_flags, file_mode)) < 0)
return error;
static int checkout_stream_write(
git_writestream *s, const char *buffer, size_t len)
{
struct checkout_stream *stream = (struct checkout_stream *)s;
int ret;
if (st) {
data->perfdata.stat_calls++;
if ((ret = p_write(stream->fd, buffer, len)) < 0)
giterr_set(GITERR_OS, "Could not write to '%s'", stream->path);
if ((error = p_stat(path, st)) < 0) {
giterr_set(GITERR_OS, "Error statting '%s'", path);
return error;
}
}
return ret;
}
if (GIT_PERMS_IS_EXEC(file_mode)) {
data->perfdata.chmod_calls++;
static int checkout_stream_close(git_writestream *s)
{
struct checkout_stream *stream = (struct checkout_stream *)s;
assert(stream && stream->open);
if ((error = p_chmod(path, file_mode)) < 0)
giterr_set(GITERR_OS, "Failed to set permissions on '%s'", path);
}
stream->open = 0;
return p_close(stream->fd);
}
return error;
static void checkout_stream_free(git_writestream *s)
{
GIT_UNUSED(s);
}
static int blob_content_to_file(
......@@ -1411,36 +1410,83 @@ static int blob_content_to_file(
struct stat *st,
git_blob *blob,
const char *path,
const char * hint_path,
const char *hint_path,
mode_t entry_filemode)
{
int flags = data->opts.file_open_flags;
mode_t file_mode = data->opts.file_mode ?
data->opts.file_mode : entry_filemode;
git_buf out = GIT_BUF_INIT;
git_filter_options filter_opts = GIT_FILTER_OPTIONS_INIT;
struct checkout_stream writer;
mode_t mode;
git_filter_list *fl = NULL;
int fd;
int error = 0;
if (hint_path == NULL)
hint_path = path;
if (!data->opts.disable_filters)
error = git_filter_list__load_with_attr_session(
&fl, data->repo, &data->attr_session, blob, hint_path,
GIT_FILTER_TO_WORKTREE, GIT_FILTER_OPT_DEFAULT);
if ((error = mkpath2file(data, path, data->opts.dir_mode)) < 0)
return error;
if (flags <= 0)
flags = O_CREAT | O_TRUNC | O_WRONLY;
if (!(mode = file_mode))
mode = GIT_FILEMODE_BLOB;
if (!error)
error = git_filter_list_apply_to_blob(&out, fl, blob);
if ((fd = p_open(path, flags, mode)) < 0) {
giterr_set(GITERR_OS, "Could not open '%s' for writing", path);
return fd;
}
filter_opts.attr_session = &data->attr_session;
filter_opts.temp_buf = &data->tmp;
if (!data->opts.disable_filters &&
(error = git_filter_list__load_ext(
&fl, data->repo, blob, hint_path,
GIT_FILTER_TO_WORKTREE, &filter_opts)))
return error;
/* setup the writer */
memset(&writer, 0, sizeof(struct checkout_stream));
writer.base.write = checkout_stream_write;
writer.base.close = checkout_stream_close;
writer.base.free = checkout_stream_free;
writer.path = path;
writer.fd = fd;
writer.open = 1;
error = git_filter_list_stream_blob(fl, blob, (git_writestream *)&writer);
assert(writer.open == 0);
git_filter_list_free(fl);
if (!error) {
error = buffer_to_file(data, st, &out, path, file_mode);
st->st_mode = entry_filemode;
if (error < 0)
return error;
if (GIT_PERMS_IS_EXEC(mode)) {
data->perfdata.chmod_calls++;
git_buf_free(&out);
if ((error = p_chmod(path, mode)) < 0) {
giterr_set(GITERR_OS, "Failed to set permissions on '%s'", path);
return error;
}
}
if (st) {
data->perfdata.stat_calls++;
if ((error = p_stat(path, st)) < 0) {
giterr_set(GITERR_OS, "Error statting '%s'", path);
return error;
}
st->st_mode = entry_filemode;
}
return 0;
}
static int blob_content_to_link(
......@@ -1958,6 +2004,7 @@ static int checkout_write_merge(
git_merge_file_result result = {0};
git_filebuf output = GIT_FILEBUF_INIT;
git_filter_list *fl = NULL;
git_filter_options filter_opts = GIT_FILTER_OPTIONS_INIT;
int error = 0;
if (data->opts.checkout_strategy & GIT_CHECKOUT_CONFLICT_STYLE_DIFF3)
......@@ -2007,9 +2054,12 @@ static int checkout_write_merge(
in_data.ptr = (char *)result.ptr;
in_data.size = result.len;
if ((error = git_filter_list__load_with_attr_session(
&fl, data->repo, &data->attr_session, NULL, git_buf_cstr(&path_workdir),
GIT_FILTER_TO_WORKTREE, GIT_FILTER_OPT_DEFAULT)) < 0 ||
filter_opts.attr_session = &data->attr_session;
filter_opts.temp_buf = &data->tmp;
if ((error = git_filter_list__load_ext(
&fl, data->repo, NULL, git_buf_cstr(&path_workdir),
GIT_FILTER_TO_WORKTREE, &filter_opts)) < 0 ||
(error = git_filter_list_apply_to_data(&out_data, fl, &in_data)) < 0)
goto done;
} else {
......
......@@ -302,7 +302,7 @@ static int crlf_check(
return error;
/* downgrade FAIL to WARN if ALLOW_UNSAFE option is used */
if ((git_filter_source_options(src) & GIT_FILTER_OPT_ALLOW_UNSAFE) &&
if ((git_filter_source_flags(src) & GIT_FILTER_ALLOW_UNSAFE) &&
ca.safe_crlf == GIT_SAFE_CRLF_FAIL)
ca.safe_crlf = GIT_SAFE_CRLF_WARN;
}
......
......@@ -600,7 +600,7 @@ int git_diff__oid_for_entry(
error = -1;
} else if (!(error = git_filter_list_load(
&fl, diff->repo, NULL, entry.path,
GIT_FILTER_TO_ODB, GIT_FILTER_OPT_ALLOW_UNSAFE)))
GIT_FILTER_TO_ODB, GIT_FILTER_ALLOW_UNSAFE)))
{
int fd = git_futils_open_ro(full_path.ptr);
if (fd < 0)
......
......@@ -418,14 +418,13 @@ void git_diff_driver_update_options(
int git_diff_driver_content_is_binary(
git_diff_driver *driver, const char *content, size_t content_len)
{
git_buf search;
search.ptr = (char *)content;
search.size = min(content_len, GIT_FILTER_BYTES_TO_CHECK_NUL);
search.asize = 0;
git_buf search = GIT_BUF_INIT;
GIT_UNUSED(driver);
git_buf_attach_notowned(&search, content,
min(content_len, GIT_FILTER_BYTES_TO_CHECK_NUL));
/* TODO: provide encoding / binary detection callbacks that can
* be UTF-8 aware, etc. For now, instead of trying to be smart,
* let's just use the simple NUL-byte detection that core git uses.
......
......@@ -302,7 +302,7 @@ static int diff_file_content_load_workdir_file(
if ((error = git_filter_list_load(
&fl, fc->repo, NULL, fc->file->path,
GIT_FILTER_TO_ODB, GIT_FILTER_OPT_ALLOW_UNSAFE)) < 0)
GIT_FILTER_TO_ODB, GIT_FILTER_ALLOW_UNSAFE)) < 0)
goto cleanup;
/* if there are no filters, try to mmap the file */
......
......@@ -23,7 +23,7 @@ struct git_filter_source {
git_oid oid; /* zero if unknown (which is likely) */
uint16_t filemode; /* zero if unknown */
git_filter_mode_t mode;
uint32_t options;
uint32_t flags;
};
typedef struct {
......@@ -34,6 +34,7 @@ typedef struct {
struct git_filter_list {
git_array_t(git_filter_entry) filters;
git_filter_source source;
git_buf *temp_buf;
char path[GIT_FLEX_ARRAY];
};
......@@ -371,9 +372,9 @@ git_filter_mode_t git_filter_source_mode(const git_filter_source *src)
return src->mode;
}
uint32_t git_filter_source_options(const git_filter_source *src)
uint32_t git_filter_source_flags(const git_filter_source *src)
{
return src->options;
return src->flags;
}
static int filter_list_new(
......@@ -393,7 +394,7 @@ static int filter_list_new(
fl->source.repo = src->repo;
fl->source.path = fl->path;
fl->source.mode = src->mode;
fl->source.options = src->options;
fl->source.flags = src->flags;
*out = fl;
return 0;
......@@ -448,24 +449,23 @@ int git_filter_list_new(
git_filter_list **out,
git_repository *repo,
git_filter_mode_t mode,
uint32_t options)
uint32_t flags)
{
git_filter_source src = { 0 };
src.repo = repo;
src.path = NULL;
src.mode = mode;
src.options = options;
src.flags = flags;
return filter_list_new(out, &src);
}
int git_filter_list__load_with_attr_session(
int git_filter_list__load_ext(
git_filter_list **filters,
git_repository *repo,
git_attr_session *attr_session,
git_blob *blob, /* can be NULL */
const char *path,
git_filter_mode_t mode,
uint32_t options)
git_filter_options *filter_opts)
{
int error = 0;
git_filter_list *fl = NULL;
......@@ -480,7 +480,8 @@ int git_filter_list__load_with_attr_session(
src.repo = repo;
src.path = path;
src.mode = mode;
src.options = options;
src.flags = filter_opts->flags;
if (blob)
git_oid_cpy(&src.oid, git_blob_id(blob));
......@@ -493,7 +494,7 @@ int git_filter_list__load_with_attr_session(
if (fdef->nattrs > 0) {
error = filter_list_check_attributes(
&values, repo, attr_session, fdef, &src);
&values, repo, filter_opts->attr_session, fdef, &src);
if (error == GIT_ENOTFOUND) {
error = 0;
......@@ -516,9 +517,13 @@ int git_filter_list__load_with_attr_session(
else if (error < 0)
break;
else {
if (!fl && (error = filter_list_new(&fl, &src)) < 0)
if (!fl) {
if ((error = filter_list_new(&fl, &src)) < 0)
return error;
fl->temp_buf = filter_opts->temp_buf;
}
fe = git_array_alloc(fl->filters);
GITERR_CHECK_ALLOC(fe);
fe->filter = fdef->filter;
......@@ -542,10 +547,14 @@ int git_filter_list_load(
git_blob *blob, /* can be NULL */
const char *path,
git_filter_mode_t mode,
uint32_t options)
uint32_t flags)
{
return git_filter_list__load_with_attr_session(
filters, repo, NULL, blob, path, mode, options);
git_filter_options filter_opts = GIT_FILTER_OPTIONS_INIT;
filter_opts.flags = flags;
return git_filter_list__load_ext(
filters, repo, blob, path, mode, &filter_opts);
}
void git_filter_list_free(git_filter_list *fl)
......@@ -600,128 +609,347 @@ size_t git_filter_list_length(const git_filter_list *fl)
return fl ? git_array_size(fl->filters) : 0;
}
static int filter_list_out_buffer_from_raw(
git_buf *out, const void *ptr, size_t size)
struct buf_stream {
git_writestream parent;
git_buf *target;
bool complete;
};
static int buf_stream_write(
git_writestream *s, const char *buffer, size_t len)
{
if (git_buf_is_allocated(out))
git_buf_free(out);
struct buf_stream *buf_stream = (struct buf_stream *)s;
assert(buf_stream);
if (!size) {
git_buf_init(out, 0);
} else {
out->ptr = (char *)ptr;
out->asize = 0;
out->size = size;
}
assert(buf_stream->complete == 0);
return git_buf_put(buf_stream->target, buffer, len);
}
static int buf_stream_close(git_writestream *s)
{
struct buf_stream *buf_stream = (struct buf_stream *)s;
assert(buf_stream);
assert(buf_stream->complete == 0);
buf_stream->complete = 1;
return 0;
}
static void buf_stream_free(git_writestream *s)
{
GIT_UNUSED(s);
}
static void buf_stream_init(struct buf_stream *writer, git_buf *target)
{
memset(writer, 0, sizeof(struct buf_stream));
writer->parent.write = buf_stream_write;
writer->parent.close = buf_stream_close;
writer->parent.free = buf_stream_free;
writer->target = target;
git_buf_clear(target);
}
int git_filter_list_apply_to_data(
git_buf *tgt, git_filter_list *fl, git_buf *src)
git_buf *tgt, git_filter_list *filters, git_buf *src)
{
int error = 0;
uint32_t i;
git_buf *dbuffer[2], local = GIT_BUF_INIT;
unsigned int si = 0;
struct buf_stream writer;
int error;
git_buf_sanitize(tgt);
git_buf_sanitize(src);
if (!fl)
return filter_list_out_buffer_from_raw(tgt, src->ptr, src->size);
if (!filters) {
git_buf_attach_notowned(tgt, src->ptr, src->size);
return 0;
}
buf_stream_init(&writer, tgt);
if ((error = git_filter_list_stream_data(filters, src,
(git_writestream *)&writer)) < 0)
return error;
assert(writer.complete);
return error;
}
int git_filter_list_apply_to_file(
git_buf *out,
git_filter_list *filters,
git_repository *repo,
const char *path)
{
struct buf_stream writer;
int error;
buf_stream_init(&writer, out);
dbuffer[0] = src;
dbuffer[1] = tgt;
if ((error = git_filter_list_stream_file(
filters, repo, path, (git_writestream *)&writer)) < 0)
return error;
/* if `src` buffer is reallocable, then use it, otherwise copy it */
if (!git_buf_is_allocated(src)) {
if (git_buf_set(&local, src->ptr, src->size) < 0)
assert(writer.complete);
return error;
}
static int buf_from_blob(git_buf *out, git_blob *blob)
{
git_off_t rawsize = git_blob_rawsize(blob);
if (!git__is_sizet(rawsize)) {
giterr_set(GITERR_OS, "Blob is too large to filter");
return -1;
dbuffer[0] = &local;
}
for (i = 0; i < git_array_size(fl->filters); ++i) {
unsigned int di = 1 - si;
uint32_t fidx = (fl->source.mode == GIT_FILTER_TO_WORKTREE) ?
i : git_array_size(fl->filters) - 1 - i;
git_filter_entry *fe = git_array_get(fl->filters, fidx);
dbuffer[di]->size = 0;
/* Apply the filter from dbuffer[src] to the other buffer;
* if the filtering is canceled by the user mid-filter,
* we skip to the next filter without changing the source
* of the double buffering (so that the text goes through
* cleanly).
*/
git_buf_attach_notowned(out, git_blob_rawcontent(blob), (size_t)rawsize);
return 0;
}
int git_filter_list_apply_to_blob(
git_buf *out,
git_filter_list *filters,
git_blob *blob)
{
struct buf_stream writer;
int error;
buf_stream_init(&writer, out);
error = fe->filter->apply(
fe->filter, &fe->payload, dbuffer[di], dbuffer[si], &fl->source);
if ((error = git_filter_list_stream_blob(
filters, blob, (git_writestream *)&writer)) < 0)
return error;
assert(writer.complete);
return error;
}
struct proxy_stream {
git_writestream parent;
git_filter *filter;
const git_filter_source *source;
void **payload;
git_buf input;
git_buf temp_buf;
git_buf *output;
git_writestream *target;
};
static int proxy_stream_write(
git_writestream *s, const char *buffer, size_t len)
{
struct proxy_stream *proxy_stream = (struct proxy_stream *)s;
assert(proxy_stream);
return git_buf_put(&proxy_stream->input, buffer, len);
}
static int proxy_stream_close(git_writestream *s)
{
struct proxy_stream *proxy_stream = (struct proxy_stream *)s;
git_buf *writebuf;
int error;
assert(proxy_stream);
error = proxy_stream->filter->apply(
proxy_stream->filter,
proxy_stream->payload,
proxy_stream->output,
&proxy_stream->input,
proxy_stream->source);
if (error == GIT_PASSTHROUGH) {
/* PASSTHROUGH means filter decided not to process the buffer */
error = 0;
} else if (!error) {
git_buf_sanitize(dbuffer[di]); /* force NUL termination */
si = di; /* swap buffers */
writebuf = &proxy_stream->input;
} else if (error == 0) {
git_buf_sanitize(proxy_stream->output);
writebuf = proxy_stream->output;
} else {
tgt->size = 0;
goto cleanup;
return error;
}
if ((error = proxy_stream->target->write(
proxy_stream->target, writebuf->ptr, writebuf->size)) == 0)
error = proxy_stream->target->close(proxy_stream->target);
return error;
}
static void proxy_stream_free(git_writestream *s)
{
struct proxy_stream *proxy_stream = (struct proxy_stream *)s;
assert(proxy_stream);
git_buf_free(&proxy_stream->input);
git_buf_free(&proxy_stream->temp_buf);
git__free(proxy_stream);
}
static int proxy_stream_init(
git_writestream **out,
git_filter *filter,
git_buf *temp_buf,
void **payload,
const git_filter_source *source,
git_writestream *target)
{
struct proxy_stream *proxy_stream = git__calloc(1, sizeof(struct proxy_stream));
GITERR_CHECK_ALLOC(proxy_stream);
proxy_stream->parent.write = proxy_stream_write;
proxy_stream->parent.close = proxy_stream_close;
proxy_stream->parent.free = proxy_stream_free;
proxy_stream->filter = filter;
proxy_stream->payload = payload;
proxy_stream->source = source;
proxy_stream->target = target;
proxy_stream->output = temp_buf ? temp_buf : &proxy_stream->temp_buf;
*out = (git_writestream *)proxy_stream;
return 0;
}
static int stream_list_init(
git_writestream **out,
git_vector *streams,
git_filter_list *filters,
git_writestream *target)
{
git_writestream *last_stream = target;
size_t i;
int error = 0;
*out = NULL;
if (!filters) {
*out = target;
return 0;
}
/* Ensure that the output ends up in dbuffer[1] (i.e. the dest) */
if (si != 1)
git_buf_swap(dbuffer[0], dbuffer[1]);
/* Create filters last to first to get the chaining direction */
for (i = 0; i < git_array_size(filters->filters); ++i) {
size_t filter_idx = (filters->source.mode == GIT_FILTER_TO_WORKTREE) ?
git_array_size(filters->filters) - 1 - i : i;
git_filter_entry *fe = git_array_get(filters->filters, filter_idx);
git_writestream *filter_stream;
cleanup:
git_buf_free(&local); /* don't leak if we allocated locally */
assert(fe->filter->stream || fe->filter->apply);
/* If necessary, create a stream that proxies the traditional
* application.
*/
if (fe->filter->stream)
error = fe->filter->stream(&filter_stream, fe->filter,
&fe->payload, &filters->source, last_stream);
else
/* Create a stream that proxies the one-shot apply */
error = proxy_stream_init(&filter_stream, fe->filter,
filters->temp_buf, &fe->payload, &filters->source,
last_stream);
if (error < 0)
return error;
git_vector_insert(streams, filter_stream);
last_stream = filter_stream;
}
*out = last_stream;
return 0;
}
int git_filter_list_apply_to_file(
git_buf *out,
void stream_list_free(git_vector *streams)
{
git_writestream *stream;
size_t i;
git_vector_foreach(streams, i, stream)
stream->free(stream);
git_vector_free(streams);
}
#define STREAM_BUFSIZE 10240
/* TODO: maybe not use filter_stream as a target but create one */
int git_filter_list_stream_file(
git_filter_list *filters,
git_repository *repo,
const char *path)
const char *path,
git_writestream *target)
{
int error;
char buf[STREAM_BUFSIZE];
git_buf abspath = GIT_BUF_INIT;
const char *base = repo ? git_repository_workdir(repo) : NULL;
git_buf abspath = GIT_BUF_INIT, raw = GIT_BUF_INIT;
if (!(error = git_path_join_unrooted(&abspath, path, base, NULL)) &&
!(error = git_futils_readbuffer(&raw, abspath.ptr)))
{
error = git_filter_list_apply_to_data(out, filters, &raw);
git_vector filter_streams = GIT_VECTOR_INIT;
git_writestream *stream_start;
ssize_t readlen;
int fd, error;
if ((error = stream_list_init(
&stream_start, &filter_streams, filters, target)) < 0 ||
(error = git_path_join_unrooted(&abspath, path, base, NULL)) < 0)
goto done;
if ((fd = git_futils_open_ro(path)) < 0) {
error = fd;
goto done;
}
git_buf_free(&raw);
while ((readlen = p_read(fd, buf, STREAM_BUFSIZE)) > 0) {
if ((error = stream_start->write(stream_start, buf, readlen)) < 0)
goto done;
}
if (!readlen)
error = stream_start->close(stream_start);
else if (readlen < 0)
error = readlen;
p_close(fd);
done:
stream_list_free(&filter_streams);
git_buf_free(&abspath);
return error;
}
int git_filter_list_apply_to_blob(
git_buf *out,
int git_filter_list_stream_data(
git_filter_list *filters,
git_blob *blob)
git_buf *data,
git_writestream *target)
{
git_vector filter_streams = GIT_VECTOR_INIT;
git_writestream *stream_start;
int error = 0;
git_buf_sanitize(data);
if ((error = stream_list_init(
&stream_start, &filter_streams, filters, target)) == 0 &&
(error =
stream_start->write(stream_start, data->ptr, data->size)) == 0)
error = stream_start->close(stream_start);
stream_list_free(&filter_streams);
return error;
}
int git_filter_list_stream_blob(
git_filter_list *filters,
git_blob *blob,
git_writestream *target)
{
git_buf in = GIT_BUF_INIT;
git_off_t rawsize = git_blob_rawsize(blob);
if (!git__is_sizet(rawsize)) {
giterr_set(GITERR_OS, "Blob is too large to filter");
if (buf_from_blob(&in, blob) < 0)
return -1;
}
in.ptr = (char *)git_blob_rawcontent(blob);
in.asize = 0;
in.size = (size_t)rawsize;
if (filters)
git_oid_cpy(&filters->source.oid, git_blob_id(blob));
return git_filter_list_apply_to_data(out, filters, &in);
return git_filter_list_stream_data(filters, &in, target);
}
......@@ -24,16 +24,23 @@ typedef enum {
GIT_CRLF_AUTO,
} git_crlf_t;
typedef struct {
git_attr_session *attr_session;
git_buf *temp_buf;
uint32_t flags;
} git_filter_options;
#define GIT_FILTER_OPTIONS_INIT {0}
extern void git_filter_free(git_filter *filter);
extern int git_filter_list__load_with_attr_session(
extern int git_filter_list__load_ext(
git_filter_list **filters,
git_repository *repo,
git_attr_session *attr_session,
git_blob *blob, /* can be NULL */
const char *path,
git_filter_mode_t mode,
uint32_t options);
git_filter_options *filter_opts);
/*
* Available filters
......
......@@ -1849,7 +1849,7 @@ int git_repository_hashfile(
if (strlen(as_path) > 0) {
error = git_filter_list_load(
&fl, repo, NULL, as_path,
GIT_FILTER_TO_ODB, GIT_FILTER_OPT_DEFAULT);
GIT_FILTER_TO_ODB, GIT_FILTER_DEFAULT);
if (error < 0)
return error;
} else {
......
......@@ -123,7 +123,7 @@ void test_filter_crlf__with_safecrlf_and_unsafe_allowed(void)
cl_repo_set_bool(g_repo, "core.safecrlf", true);
cl_git_pass(git_filter_list_new(
&fl, g_repo, GIT_FILTER_TO_ODB, GIT_FILTER_OPT_ALLOW_UNSAFE));
&fl, g_repo, GIT_FILTER_TO_ODB, GIT_FILTER_ALLOW_UNSAFE));
crlf = git_filter_lookup(GIT_FILTER_CRLF);
cl_assert(crlf != NULL);
......
#include "clar_libgit2.h"
#include "posix.h"
#include "blob.h"
#include "filter.h"
#include "buf_text.h"
#include "git2/sys/filter.h"
#include "git2/sys/repository.h"
static git_repository *g_repo = NULL;
static git_filter *create_compress_filter(void);
static git_filter *compress_filter;
void test_filter_stream__initialize(void)
{
compress_filter = create_compress_filter();
cl_git_pass(git_filter_register("compress", compress_filter, 50));
g_repo = cl_git_sandbox_init("empty_standard_repo");
}
void test_filter_stream__cleanup(void)
{
cl_git_sandbox_cleanup();
g_repo = NULL;
git_filter_unregister("compress");
}
#define CHUNKSIZE 10240
struct compress_stream {
git_writestream parent;
git_writestream *next;
git_filter_mode_t mode;
char current;
size_t current_chunk;
};
static int compress_stream_write__deflated(struct compress_stream *stream, const char *buffer, size_t len)
{
size_t idx = 0;
while (len > 0) {
size_t chunkremain, chunksize;
if (stream->current_chunk == 0)
stream->current = buffer[idx];
chunkremain = CHUNKSIZE - stream->current_chunk;
chunksize = min(chunkremain, len);
stream->current_chunk += chunksize;
len -= chunksize;
idx += chunksize;
if (stream->current_chunk == CHUNKSIZE) {
cl_git_pass(stream->next->write(stream->next, &stream->current, 1));
stream->current_chunk = 0;
}
}
return 0;
}
static int compress_stream_write__inflated(struct compress_stream *stream, const char *buffer, size_t len)
{
char inflated[CHUNKSIZE];
size_t i, j;
for (i = 0; i < len; i++) {
for (j = 0; j < CHUNKSIZE; j++)
inflated[j] = buffer[i];
cl_git_pass(stream->next->write(stream->next, inflated, CHUNKSIZE));
}
return 0;
}
static int compress_stream_write(git_writestream *s, const char *buffer, size_t len)
{
struct compress_stream *stream = (struct compress_stream *)s;
return (stream->mode == GIT_FILTER_TO_ODB) ?
compress_stream_write__deflated(stream, buffer, len) :
compress_stream_write__inflated(stream, buffer, len);
}
static int compress_stream_close(git_writestream *s)
{
struct compress_stream *stream = (struct compress_stream *)s;
cl_assert_equal_i(0, stream->current_chunk);
stream->next->close(stream->next);
return 0;
}
static void compress_stream_free(git_writestream *stream)
{
git__free(stream);
}
static int compress_filter_stream_init(
git_writestream **out,
git_filter *self,
void **payload,
const git_filter_source *src,
git_writestream *next)
{
struct compress_stream *stream = git__calloc(1, sizeof(struct compress_stream));
cl_assert(stream);
GIT_UNUSED(self);
GIT_UNUSED(payload);
stream->parent.write = compress_stream_write;
stream->parent.close = compress_stream_close;
stream->parent.free = compress_stream_free;
stream->next = next;
stream->mode = git_filter_source_mode(src);
*out = (git_writestream *)stream;
return 0;
}
static void compress_filter_free(git_filter *f)
{
git__free(f);
}
git_filter *create_compress_filter(void)
{
git_filter *filter = git__calloc(1, sizeof(git_filter));
cl_assert(filter);
filter->version = GIT_FILTER_VERSION;
filter->attributes = "+compress";
filter->stream = compress_filter_stream_init;
filter->shutdown = compress_filter_free;
return filter;
}
static void writefile(const char *filename, size_t numchunks)
{
git_buf path = GIT_BUF_INIT;
char buf[CHUNKSIZE];
size_t i = 0, j = 0;
int fd;
cl_git_pass(git_buf_joinpath(&path, "empty_standard_repo", filename));
fd = p_open(path.ptr, O_RDWR|O_CREAT, 0666);
cl_assert(fd >= 0);
for (i = 0; i < numchunks; i++) {
for (j = 0; j < CHUNKSIZE; j++) {
buf[j] = i % 256;
}
cl_git_pass(p_write(fd, buf, CHUNKSIZE));
}
p_close(fd);
git_buf_free(&path);
}
static void test_stream(size_t numchunks)
{
git_index *index;
const git_index_entry *entry;
git_blob *blob;
struct stat st;
git_checkout_options checkout_opts = GIT_CHECKOUT_OPTIONS_INIT;
checkout_opts.checkout_strategy = GIT_CHECKOUT_FORCE;
cl_git_mkfile(
"empty_standard_repo/.gitattributes",
"* compress\n");
/* write a file to disk */
writefile("streamed_file", numchunks);
/* place it in the index */
cl_git_pass(git_repository_index(&index, g_repo));
cl_git_pass(git_index_add_bypath(index, "streamed_file"));
cl_git_pass(git_index_write(index));
/* ensure it was appropriately compressed */
cl_assert(entry = git_index_get_bypath(index, "streamed_file", 0));
cl_git_pass(git_blob_lookup(&blob, g_repo, &entry->id));
cl_assert_equal_i(numchunks, git_blob_rawsize(blob));
/* check the file back out */
cl_must_pass(p_unlink("empty_standard_repo/streamed_file"));
cl_git_pass(git_checkout_index(g_repo, index, &checkout_opts));
/* ensure it was decompressed */
cl_must_pass(p_stat("empty_standard_repo/streamed_file", &st));
cl_assert_equal_sz((numchunks * CHUNKSIZE), st.st_size);
git_index_free(index);
git_blob_free(blob);
}
/* write a 50KB file through the "compression" stream */
void test_filter_stream__smallfile(void)
{
test_stream(5);
}
/* optionally write a 500 MB file through the compression stream */
void test_filter_stream__bigfile(void)
{
if (!cl_getenv("GITTEST_INVASIVE_FS_SIZE"))
cl_skip();
test_stream(51200);
}
......@@ -722,7 +722,7 @@ void test_repo_init__at_filesystem_root(void)
git_buf root = GIT_BUF_INIT;
int root_len;
if (!cl_getenv("GITTEST_INVASIVE_FILESYSTEM"))
if (!cl_getenv("GITTEST_INVASIVE_FS_STRUCTURE"))
cl_skip();
root_len = git_path_root(sandbox);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment