Commit d15884ce by Carlos Martín Nieto

Merge pull request #2911 from ethomson/streaming_filters

Streaming filters
parents 818302c8 feb0e022
...@@ -15,6 +15,7 @@ compiler: ...@@ -15,6 +15,7 @@ compiler:
env: env:
global: global:
- secure: "YnhS+8n6B+uoyaYfaJ3Lei7cSJqHDPiKJCKFIF2c87YDfmCvAJke8QtE7IzjYDs7UFkTCM4ox+ph2bERUrxZbSCyEkHdjIZpKuMJfYWja/jgMqTMxdyOH9y8JLFbZsSXDIXDwqBlC6vVyl1fP90M35wuWcNTs6tctfVWVofEFbs=" - secure: "YnhS+8n6B+uoyaYfaJ3Lei7cSJqHDPiKJCKFIF2c87YDfmCvAJke8QtE7IzjYDs7UFkTCM4ox+ph2bERUrxZbSCyEkHdjIZpKuMJfYWja/jgMqTMxdyOH9y8JLFbZsSXDIXDwqBlC6vVyl1fP90M35wuWcNTs6tctfVWVofEFbs="
- GITTEST_INVASIVE_FS_SIZE=1
matrix: matrix:
- OPTIONS="-DTHREADSAFE=ON -DCMAKE_BUILD_TYPE=Release" - OPTIONS="-DTHREADSAFE=ON -DCMAKE_BUILD_TYPE=Release"
- OPTIONS="-DTHREADSAFE=OFF -DBUILD_EXAMPLES=ON" - OPTIONS="-DTHREADSAFE=OFF -DBUILD_EXAMPLES=ON"
......
...@@ -3,7 +3,8 @@ branches: ...@@ -3,7 +3,8 @@ branches:
only: only:
- master - master
environment: environment:
GITTEST_INVASIVE_FILESYSTEM: 1 GITTEST_INVASIVE_FS_STRUCTURE: 1
GITTEST_INVASIVE_FS_SIZE: 1
matrix: matrix:
- GENERATOR: "Visual Studio 11" - GENERATOR: "Visual Studio 11"
......
...@@ -39,9 +39,9 @@ typedef enum { ...@@ -39,9 +39,9 @@ typedef enum {
* Filter option flags. * Filter option flags.
*/ */
typedef enum { typedef enum {
GIT_FILTER_OPT_DEFAULT = 0u, GIT_FILTER_DEFAULT = 0u,
GIT_FILTER_OPT_ALLOW_UNSAFE = (1u << 0), GIT_FILTER_ALLOW_UNSAFE = (1u << 0),
} git_filter_opt_t; } git_filter_flag_t;
/** /**
* A filter that can transform file data * A filter that can transform file data
...@@ -83,7 +83,7 @@ typedef struct git_filter_list git_filter_list; ...@@ -83,7 +83,7 @@ typedef struct git_filter_list git_filter_list;
* @param blob The blob to which the filter will be applied (if known) * @param blob The blob to which the filter will be applied (if known)
* @param path Relative path of the file to be filtered * @param path Relative path of the file to be filtered
* @param mode Filtering direction (WT->ODB or ODB->WT) * @param mode Filtering direction (WT->ODB or ODB->WT)
* @param options Combination of `git_filter_opt_t` flags * @param flags Combination of `git_filter_flag_t` flags
* @return 0 on success (which could still return NULL if no filters are * @return 0 on success (which could still return NULL if no filters are
* needed for the requested file), <0 on error * needed for the requested file), <0 on error
*/ */
...@@ -93,7 +93,7 @@ GIT_EXTERN(int) git_filter_list_load( ...@@ -93,7 +93,7 @@ GIT_EXTERN(int) git_filter_list_load(
git_blob *blob, /* can be NULL */ git_blob *blob, /* can be NULL */
const char *path, const char *path,
git_filter_mode_t mode, git_filter_mode_t mode,
uint32_t options); uint32_t flags);
/** /**
* Apply filter list to a data buffer. * Apply filter list to a data buffer.
...@@ -137,6 +137,22 @@ GIT_EXTERN(int) git_filter_list_apply_to_blob( ...@@ -137,6 +137,22 @@ GIT_EXTERN(int) git_filter_list_apply_to_blob(
git_filter_list *filters, git_filter_list *filters,
git_blob *blob); git_blob *blob);
GIT_EXTERN(int) git_filter_list_stream_data(
git_filter_list *filters,
git_buf *data,
git_writestream *target);
GIT_EXTERN(int) git_filter_list_stream_file(
git_filter_list *filters,
git_repository *repo,
const char *path,
git_writestream *target);
GIT_EXTERN(int) git_filter_list_stream_blob(
git_filter_list *filters,
git_blob *blob,
git_writestream *target);
/** /**
* Free a git_filter_list * Free a git_filter_list
* *
......
...@@ -123,9 +123,9 @@ GIT_EXTERN(const git_oid *) git_filter_source_id(const git_filter_source *src); ...@@ -123,9 +123,9 @@ GIT_EXTERN(const git_oid *) git_filter_source_id(const git_filter_source *src);
GIT_EXTERN(git_filter_mode_t) git_filter_source_mode(const git_filter_source *src); GIT_EXTERN(git_filter_mode_t) git_filter_source_mode(const git_filter_source *src);
/** /**
* Get the combination git_filter_opt_t options to be applied * Get the combination git_filter_flag_t options to be applied
*/ */
GIT_EXTERN(uint32_t) git_filter_source_options(const git_filter_source *src); GIT_EXTERN(uint32_t) git_filter_source_flags(const git_filter_source *src);
/* /*
* struct git_filter * struct git_filter
...@@ -208,6 +208,13 @@ typedef int (*git_filter_apply_fn)( ...@@ -208,6 +208,13 @@ typedef int (*git_filter_apply_fn)(
const git_buf *from, const git_buf *from,
const git_filter_source *src); const git_filter_source *src);
typedef int (*git_filter_stream_fn)(
git_writestream **out,
git_filter *self,
void **payload,
const git_filter_source *src,
git_writestream *next);
/** /**
* Callback to clean up after filtering has been applied * Callback to clean up after filtering has been applied
* *
...@@ -247,6 +254,7 @@ struct git_filter { ...@@ -247,6 +254,7 @@ struct git_filter {
git_filter_shutdown_fn shutdown; git_filter_shutdown_fn shutdown;
git_filter_check_fn check; git_filter_check_fn check;
git_filter_apply_fn apply; git_filter_apply_fn apply;
git_filter_stream_fn stream;
git_filter_cleanup_fn cleanup; git_filter_cleanup_fn cleanup;
}; };
......
...@@ -410,6 +410,15 @@ typedef enum { ...@@ -410,6 +410,15 @@ typedef enum {
GIT_SUBMODULE_RECURSE_ONDEMAND = 2, GIT_SUBMODULE_RECURSE_ONDEMAND = 2,
} git_submodule_recurse_t; } git_submodule_recurse_t;
/** A type to write in a streaming fashion, for example, for filters. */
typedef struct git_writestream git_writestream;
struct git_writestream {
int (*write)(git_writestream *stream, const char *buffer, size_t len);
int (*close)(git_writestream *stream);
void (*free)(git_writestream *stream);
};
/** @} */ /** @} */
GIT_END_DECL GIT_END_DECL
......
...@@ -282,9 +282,8 @@ static int system_attr_file( ...@@ -282,9 +282,8 @@ static int system_attr_file(
* a consumer. This allows them to treat this as a regular `git_buf`, * a consumer. This allows them to treat this as a regular `git_buf`,
* but their call to `git_buf_free` will not attempt to free it. * but their call to `git_buf_free` will not attempt to free it.
*/ */
out->ptr = attr_session->sysdir.ptr; git_buf_attach_notowned(
out->size = attr_session->sysdir.size; out, attr_session->sysdir.ptr, attr_session->sysdir.size);
out->asize = 0;
return 0; return 0;
} }
......
...@@ -199,7 +199,7 @@ int git_blob__create_from_paths( ...@@ -199,7 +199,7 @@ int git_blob__create_from_paths(
/* Load the filters for writing this file to the ODB */ /* Load the filters for writing this file to the ODB */
error = git_filter_list_load( error = git_filter_list_load(
&fl, repo, NULL, hint_path, &fl, repo, NULL, hint_path,
GIT_FILTER_TO_ODB, GIT_FILTER_OPT_DEFAULT); GIT_FILTER_TO_ODB, GIT_FILTER_DEFAULT);
if (error < 0) if (error < 0)
/* well, that didn't work */; /* well, that didn't work */;
...@@ -329,15 +329,13 @@ cleanup: ...@@ -329,15 +329,13 @@ cleanup:
int git_blob_is_binary(const git_blob *blob) int git_blob_is_binary(const git_blob *blob)
{ {
git_buf content; git_buf content = GIT_BUF_INIT;
assert(blob); assert(blob);
content.ptr = blob->odb_object->buffer; git_buf_attach_notowned(&content, blob->odb_object->buffer,
content.size = min(blob->odb_object->cached.size,
min(blob->odb_object->cached.size, GIT_FILTER_BYTES_TO_CHECK_NUL); GIT_FILTER_BYTES_TO_CHECK_NUL));
content.asize = 0;
return git_buf_text_is_binary(&content); return git_buf_text_is_binary(&content);
} }
...@@ -359,7 +357,7 @@ int git_blob_filtered_content( ...@@ -359,7 +357,7 @@ int git_blob_filtered_content(
if (!(error = git_filter_list_load( if (!(error = git_filter_list_load(
&fl, git_blob_owner(blob), blob, path, &fl, git_blob_owner(blob), blob, path,
GIT_FILTER_TO_WORKTREE, GIT_FILTER_OPT_DEFAULT))) { GIT_FILTER_TO_WORKTREE, GIT_FILTER_DEFAULT))) {
error = git_filter_list_apply_to_blob(out, fl, blob); error = git_filter_list_apply_to_blob(out, fl, blob);
......
...@@ -500,6 +500,20 @@ void git_buf_attach(git_buf *buf, char *ptr, size_t asize) ...@@ -500,6 +500,20 @@ void git_buf_attach(git_buf *buf, char *ptr, size_t asize)
} }
} }
void git_buf_attach_notowned(git_buf *buf, const char *ptr, size_t size)
{
if (git_buf_is_allocated(buf))
git_buf_free(buf);
if (!size) {
git_buf_init(buf, 0);
} else {
buf->ptr = (char *)ptr;
buf->asize = 0;
buf->size = size;
}
}
int git_buf_join_n(git_buf *buf, char separator, int nbuf, ...) int git_buf_join_n(git_buf *buf, char separator, int nbuf, ...)
{ {
va_list ap; va_list ap;
......
...@@ -74,6 +74,12 @@ extern void git_buf_swap(git_buf *buf_a, git_buf *buf_b); ...@@ -74,6 +74,12 @@ extern void git_buf_swap(git_buf *buf_a, git_buf *buf_b);
extern char *git_buf_detach(git_buf *buf); extern char *git_buf_detach(git_buf *buf);
extern void git_buf_attach(git_buf *buf, char *ptr, size_t asize); extern void git_buf_attach(git_buf *buf, char *ptr, size_t asize);
/* Populates a `git_buf` where the contents are not "owned" by the
* buffer, and calls to `git_buf_free` will not free the given buf.
*/
extern void git_buf_attach_notowned(
git_buf *buf, const char *ptr, size_t size);
/** /**
* Test if there have been any reallocation failures with this git_buf. * Test if there have been any reallocation failures with this git_buf.
* *
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include "git2/diff.h" #include "git2/diff.h"
#include "git2/submodule.h" #include "git2/submodule.h"
#include "git2/sys/index.h" #include "git2/sys/index.h"
#include "git2/sys/filter.h"
#include "refs.h" #include "refs.h"
#include "repository.h" #include "repository.h"
...@@ -1371,39 +1372,37 @@ static int mkpath2file( ...@@ -1371,39 +1372,37 @@ static int mkpath2file(
return error; return error;
} }
static int buffer_to_file( struct checkout_stream {
checkout_data *data, git_writestream base;
struct stat *st, const char *path;
git_buf *buf, int fd;
const char *path, int open;
mode_t file_mode) };
{
int error;
if ((error = mkpath2file(data, path, data->opts.dir_mode)) < 0)
return error;
if ((error = git_futils_writebuffer( static int checkout_stream_write(
buf, path, data->opts.file_open_flags, file_mode)) < 0) git_writestream *s, const char *buffer, size_t len)
return error; {
struct checkout_stream *stream = (struct checkout_stream *)s;
int ret;
if (st) { if ((ret = p_write(stream->fd, buffer, len)) < 0)
data->perfdata.stat_calls++; giterr_set(GITERR_OS, "Could not write to '%s'", stream->path);
if ((error = p_stat(path, st)) < 0) { return ret;
giterr_set(GITERR_OS, "Error statting '%s'", path); }
return error;
}
}
if (GIT_PERMS_IS_EXEC(file_mode)) { static int checkout_stream_close(git_writestream *s)
data->perfdata.chmod_calls++; {
struct checkout_stream *stream = (struct checkout_stream *)s;
assert(stream && stream->open);
if ((error = p_chmod(path, file_mode)) < 0) stream->open = 0;
giterr_set(GITERR_OS, "Failed to set permissions on '%s'", path); return p_close(stream->fd);
} }
return error; static void checkout_stream_free(git_writestream *s)
{
GIT_UNUSED(s);
} }
static int blob_content_to_file( static int blob_content_to_file(
...@@ -1411,36 +1410,83 @@ static int blob_content_to_file( ...@@ -1411,36 +1410,83 @@ static int blob_content_to_file(
struct stat *st, struct stat *st,
git_blob *blob, git_blob *blob,
const char *path, const char *path,
const char * hint_path, const char *hint_path,
mode_t entry_filemode) mode_t entry_filemode)
{ {
int flags = data->opts.file_open_flags;
mode_t file_mode = data->opts.file_mode ? mode_t file_mode = data->opts.file_mode ?
data->opts.file_mode : entry_filemode; data->opts.file_mode : entry_filemode;
git_buf out = GIT_BUF_INIT; git_filter_options filter_opts = GIT_FILTER_OPTIONS_INIT;
struct checkout_stream writer;
mode_t mode;
git_filter_list *fl = NULL; git_filter_list *fl = NULL;
int fd;
int error = 0; int error = 0;
if (hint_path == NULL) if (hint_path == NULL)
hint_path = path; hint_path = path;
if (!data->opts.disable_filters) if ((error = mkpath2file(data, path, data->opts.dir_mode)) < 0)
error = git_filter_list__load_with_attr_session( return error;
&fl, data->repo, &data->attr_session, blob, hint_path,
GIT_FILTER_TO_WORKTREE, GIT_FILTER_OPT_DEFAULT); if (flags <= 0)
flags = O_CREAT | O_TRUNC | O_WRONLY;
if (!(mode = file_mode))
mode = GIT_FILEMODE_BLOB;
if (!error) if ((fd = p_open(path, flags, mode)) < 0) {
error = git_filter_list_apply_to_blob(&out, fl, blob); giterr_set(GITERR_OS, "Could not open '%s' for writing", path);
return fd;
}
filter_opts.attr_session = &data->attr_session;
filter_opts.temp_buf = &data->tmp;
if (!data->opts.disable_filters &&
(error = git_filter_list__load_ext(
&fl, data->repo, blob, hint_path,
GIT_FILTER_TO_WORKTREE, &filter_opts)))
return error;
/* setup the writer */
memset(&writer, 0, sizeof(struct checkout_stream));
writer.base.write = checkout_stream_write;
writer.base.close = checkout_stream_close;
writer.base.free = checkout_stream_free;
writer.path = path;
writer.fd = fd;
writer.open = 1;
error = git_filter_list_stream_blob(fl, blob, (git_writestream *)&writer);
assert(writer.open == 0);
git_filter_list_free(fl); git_filter_list_free(fl);
if (!error) { if (error < 0)
error = buffer_to_file(data, st, &out, path, file_mode); return error;
st->st_mode = entry_filemode;
if (GIT_PERMS_IS_EXEC(mode)) {
data->perfdata.chmod_calls++;
git_buf_free(&out); if ((error = p_chmod(path, mode)) < 0) {
giterr_set(GITERR_OS, "Failed to set permissions on '%s'", path);
return error;
} }
}
if (st) {
data->perfdata.stat_calls++;
if ((error = p_stat(path, st)) < 0) {
giterr_set(GITERR_OS, "Error statting '%s'", path);
return error; return error;
}
st->st_mode = entry_filemode;
}
return 0;
} }
static int blob_content_to_link( static int blob_content_to_link(
...@@ -1958,6 +2004,7 @@ static int checkout_write_merge( ...@@ -1958,6 +2004,7 @@ static int checkout_write_merge(
git_merge_file_result result = {0}; git_merge_file_result result = {0};
git_filebuf output = GIT_FILEBUF_INIT; git_filebuf output = GIT_FILEBUF_INIT;
git_filter_list *fl = NULL; git_filter_list *fl = NULL;
git_filter_options filter_opts = GIT_FILTER_OPTIONS_INIT;
int error = 0; int error = 0;
if (data->opts.checkout_strategy & GIT_CHECKOUT_CONFLICT_STYLE_DIFF3) if (data->opts.checkout_strategy & GIT_CHECKOUT_CONFLICT_STYLE_DIFF3)
...@@ -2007,9 +2054,12 @@ static int checkout_write_merge( ...@@ -2007,9 +2054,12 @@ static int checkout_write_merge(
in_data.ptr = (char *)result.ptr; in_data.ptr = (char *)result.ptr;
in_data.size = result.len; in_data.size = result.len;
if ((error = git_filter_list__load_with_attr_session( filter_opts.attr_session = &data->attr_session;
&fl, data->repo, &data->attr_session, NULL, git_buf_cstr(&path_workdir), filter_opts.temp_buf = &data->tmp;
GIT_FILTER_TO_WORKTREE, GIT_FILTER_OPT_DEFAULT)) < 0 ||
if ((error = git_filter_list__load_ext(
&fl, data->repo, NULL, git_buf_cstr(&path_workdir),
GIT_FILTER_TO_WORKTREE, &filter_opts)) < 0 ||
(error = git_filter_list_apply_to_data(&out_data, fl, &in_data)) < 0) (error = git_filter_list_apply_to_data(&out_data, fl, &in_data)) < 0)
goto done; goto done;
} else { } else {
......
...@@ -302,7 +302,7 @@ static int crlf_check( ...@@ -302,7 +302,7 @@ static int crlf_check(
return error; return error;
/* downgrade FAIL to WARN if ALLOW_UNSAFE option is used */ /* downgrade FAIL to WARN if ALLOW_UNSAFE option is used */
if ((git_filter_source_options(src) & GIT_FILTER_OPT_ALLOW_UNSAFE) && if ((git_filter_source_flags(src) & GIT_FILTER_ALLOW_UNSAFE) &&
ca.safe_crlf == GIT_SAFE_CRLF_FAIL) ca.safe_crlf == GIT_SAFE_CRLF_FAIL)
ca.safe_crlf = GIT_SAFE_CRLF_WARN; ca.safe_crlf = GIT_SAFE_CRLF_WARN;
} }
......
...@@ -600,7 +600,7 @@ int git_diff__oid_for_entry( ...@@ -600,7 +600,7 @@ int git_diff__oid_for_entry(
error = -1; error = -1;
} else if (!(error = git_filter_list_load( } else if (!(error = git_filter_list_load(
&fl, diff->repo, NULL, entry.path, &fl, diff->repo, NULL, entry.path,
GIT_FILTER_TO_ODB, GIT_FILTER_OPT_ALLOW_UNSAFE))) GIT_FILTER_TO_ODB, GIT_FILTER_ALLOW_UNSAFE)))
{ {
int fd = git_futils_open_ro(full_path.ptr); int fd = git_futils_open_ro(full_path.ptr);
if (fd < 0) if (fd < 0)
......
...@@ -418,14 +418,13 @@ void git_diff_driver_update_options( ...@@ -418,14 +418,13 @@ void git_diff_driver_update_options(
int git_diff_driver_content_is_binary( int git_diff_driver_content_is_binary(
git_diff_driver *driver, const char *content, size_t content_len) git_diff_driver *driver, const char *content, size_t content_len)
{ {
git_buf search; git_buf search = GIT_BUF_INIT;
search.ptr = (char *)content;
search.size = min(content_len, GIT_FILTER_BYTES_TO_CHECK_NUL);
search.asize = 0;
GIT_UNUSED(driver); GIT_UNUSED(driver);
git_buf_attach_notowned(&search, content,
min(content_len, GIT_FILTER_BYTES_TO_CHECK_NUL));
/* TODO: provide encoding / binary detection callbacks that can /* TODO: provide encoding / binary detection callbacks that can
* be UTF-8 aware, etc. For now, instead of trying to be smart, * be UTF-8 aware, etc. For now, instead of trying to be smart,
* let's just use the simple NUL-byte detection that core git uses. * let's just use the simple NUL-byte detection that core git uses.
......
...@@ -302,7 +302,7 @@ static int diff_file_content_load_workdir_file( ...@@ -302,7 +302,7 @@ static int diff_file_content_load_workdir_file(
if ((error = git_filter_list_load( if ((error = git_filter_list_load(
&fl, fc->repo, NULL, fc->file->path, &fl, fc->repo, NULL, fc->file->path,
GIT_FILTER_TO_ODB, GIT_FILTER_OPT_ALLOW_UNSAFE)) < 0) GIT_FILTER_TO_ODB, GIT_FILTER_ALLOW_UNSAFE)) < 0)
goto cleanup; goto cleanup;
/* if there are no filters, try to mmap the file */ /* if there are no filters, try to mmap the file */
......
...@@ -23,7 +23,7 @@ struct git_filter_source { ...@@ -23,7 +23,7 @@ struct git_filter_source {
git_oid oid; /* zero if unknown (which is likely) */ git_oid oid; /* zero if unknown (which is likely) */
uint16_t filemode; /* zero if unknown */ uint16_t filemode; /* zero if unknown */
git_filter_mode_t mode; git_filter_mode_t mode;
uint32_t options; uint32_t flags;
}; };
typedef struct { typedef struct {
...@@ -34,6 +34,7 @@ typedef struct { ...@@ -34,6 +34,7 @@ typedef struct {
struct git_filter_list { struct git_filter_list {
git_array_t(git_filter_entry) filters; git_array_t(git_filter_entry) filters;
git_filter_source source; git_filter_source source;
git_buf *temp_buf;
char path[GIT_FLEX_ARRAY]; char path[GIT_FLEX_ARRAY];
}; };
...@@ -371,9 +372,9 @@ git_filter_mode_t git_filter_source_mode(const git_filter_source *src) ...@@ -371,9 +372,9 @@ git_filter_mode_t git_filter_source_mode(const git_filter_source *src)
return src->mode; return src->mode;
} }
uint32_t git_filter_source_options(const git_filter_source *src) uint32_t git_filter_source_flags(const git_filter_source *src)
{ {
return src->options; return src->flags;
} }
static int filter_list_new( static int filter_list_new(
...@@ -393,7 +394,7 @@ static int filter_list_new( ...@@ -393,7 +394,7 @@ static int filter_list_new(
fl->source.repo = src->repo; fl->source.repo = src->repo;
fl->source.path = fl->path; fl->source.path = fl->path;
fl->source.mode = src->mode; fl->source.mode = src->mode;
fl->source.options = src->options; fl->source.flags = src->flags;
*out = fl; *out = fl;
return 0; return 0;
...@@ -448,24 +449,23 @@ int git_filter_list_new( ...@@ -448,24 +449,23 @@ int git_filter_list_new(
git_filter_list **out, git_filter_list **out,
git_repository *repo, git_repository *repo,
git_filter_mode_t mode, git_filter_mode_t mode,
uint32_t options) uint32_t flags)
{ {
git_filter_source src = { 0 }; git_filter_source src = { 0 };
src.repo = repo; src.repo = repo;
src.path = NULL; src.path = NULL;
src.mode = mode; src.mode = mode;
src.options = options; src.flags = flags;
return filter_list_new(out, &src); return filter_list_new(out, &src);
} }
int git_filter_list__load_with_attr_session( int git_filter_list__load_ext(
git_filter_list **filters, git_filter_list **filters,
git_repository *repo, git_repository *repo,
git_attr_session *attr_session,
git_blob *blob, /* can be NULL */ git_blob *blob, /* can be NULL */
const char *path, const char *path,
git_filter_mode_t mode, git_filter_mode_t mode,
uint32_t options) git_filter_options *filter_opts)
{ {
int error = 0; int error = 0;
git_filter_list *fl = NULL; git_filter_list *fl = NULL;
...@@ -480,7 +480,8 @@ int git_filter_list__load_with_attr_session( ...@@ -480,7 +480,8 @@ int git_filter_list__load_with_attr_session(
src.repo = repo; src.repo = repo;
src.path = path; src.path = path;
src.mode = mode; src.mode = mode;
src.options = options; src.flags = filter_opts->flags;
if (blob) if (blob)
git_oid_cpy(&src.oid, git_blob_id(blob)); git_oid_cpy(&src.oid, git_blob_id(blob));
...@@ -493,7 +494,7 @@ int git_filter_list__load_with_attr_session( ...@@ -493,7 +494,7 @@ int git_filter_list__load_with_attr_session(
if (fdef->nattrs > 0) { if (fdef->nattrs > 0) {
error = filter_list_check_attributes( error = filter_list_check_attributes(
&values, repo, attr_session, fdef, &src); &values, repo, filter_opts->attr_session, fdef, &src);
if (error == GIT_ENOTFOUND) { if (error == GIT_ENOTFOUND) {
error = 0; error = 0;
...@@ -516,9 +517,13 @@ int git_filter_list__load_with_attr_session( ...@@ -516,9 +517,13 @@ int git_filter_list__load_with_attr_session(
else if (error < 0) else if (error < 0)
break; break;
else { else {
if (!fl && (error = filter_list_new(&fl, &src)) < 0) if (!fl) {
if ((error = filter_list_new(&fl, &src)) < 0)
return error; return error;
fl->temp_buf = filter_opts->temp_buf;
}
fe = git_array_alloc(fl->filters); fe = git_array_alloc(fl->filters);
GITERR_CHECK_ALLOC(fe); GITERR_CHECK_ALLOC(fe);
fe->filter = fdef->filter; fe->filter = fdef->filter;
...@@ -542,10 +547,14 @@ int git_filter_list_load( ...@@ -542,10 +547,14 @@ int git_filter_list_load(
git_blob *blob, /* can be NULL */ git_blob *blob, /* can be NULL */
const char *path, const char *path,
git_filter_mode_t mode, git_filter_mode_t mode,
uint32_t options) uint32_t flags)
{ {
return git_filter_list__load_with_attr_session( git_filter_options filter_opts = GIT_FILTER_OPTIONS_INIT;
filters, repo, NULL, blob, path, mode, options);
filter_opts.flags = flags;
return git_filter_list__load_ext(
filters, repo, blob, path, mode, &filter_opts);
} }
void git_filter_list_free(git_filter_list *fl) void git_filter_list_free(git_filter_list *fl)
...@@ -600,128 +609,347 @@ size_t git_filter_list_length(const git_filter_list *fl) ...@@ -600,128 +609,347 @@ size_t git_filter_list_length(const git_filter_list *fl)
return fl ? git_array_size(fl->filters) : 0; return fl ? git_array_size(fl->filters) : 0;
} }
static int filter_list_out_buffer_from_raw( struct buf_stream {
git_buf *out, const void *ptr, size_t size) git_writestream parent;
git_buf *target;
bool complete;
};
static int buf_stream_write(
git_writestream *s, const char *buffer, size_t len)
{ {
if (git_buf_is_allocated(out)) struct buf_stream *buf_stream = (struct buf_stream *)s;
git_buf_free(out); assert(buf_stream);
if (!size) { assert(buf_stream->complete == 0);
git_buf_init(out, 0);
} else { return git_buf_put(buf_stream->target, buffer, len);
out->ptr = (char *)ptr; }
out->asize = 0;
out->size = size; static int buf_stream_close(git_writestream *s)
} {
struct buf_stream *buf_stream = (struct buf_stream *)s;
assert(buf_stream);
assert(buf_stream->complete == 0);
buf_stream->complete = 1;
return 0; return 0;
} }
static void buf_stream_free(git_writestream *s)
{
GIT_UNUSED(s);
}
static void buf_stream_init(struct buf_stream *writer, git_buf *target)
{
memset(writer, 0, sizeof(struct buf_stream));
writer->parent.write = buf_stream_write;
writer->parent.close = buf_stream_close;
writer->parent.free = buf_stream_free;
writer->target = target;
git_buf_clear(target);
}
int git_filter_list_apply_to_data( int git_filter_list_apply_to_data(
git_buf *tgt, git_filter_list *fl, git_buf *src) git_buf *tgt, git_filter_list *filters, git_buf *src)
{ {
int error = 0; struct buf_stream writer;
uint32_t i; int error;
git_buf *dbuffer[2], local = GIT_BUF_INIT;
unsigned int si = 0;
git_buf_sanitize(tgt); git_buf_sanitize(tgt);
git_buf_sanitize(src); git_buf_sanitize(src);
if (!fl) if (!filters) {
return filter_list_out_buffer_from_raw(tgt, src->ptr, src->size); git_buf_attach_notowned(tgt, src->ptr, src->size);
return 0;
}
buf_stream_init(&writer, tgt);
if ((error = git_filter_list_stream_data(filters, src,
(git_writestream *)&writer)) < 0)
return error;
assert(writer.complete);
return error;
}
int git_filter_list_apply_to_file(
git_buf *out,
git_filter_list *filters,
git_repository *repo,
const char *path)
{
struct buf_stream writer;
int error;
buf_stream_init(&writer, out);
dbuffer[0] = src; if ((error = git_filter_list_stream_file(
dbuffer[1] = tgt; filters, repo, path, (git_writestream *)&writer)) < 0)
return error;
/* if `src` buffer is reallocable, then use it, otherwise copy it */ assert(writer.complete);
if (!git_buf_is_allocated(src)) { return error;
if (git_buf_set(&local, src->ptr, src->size) < 0) }
static int buf_from_blob(git_buf *out, git_blob *blob)
{
git_off_t rawsize = git_blob_rawsize(blob);
if (!git__is_sizet(rawsize)) {
giterr_set(GITERR_OS, "Blob is too large to filter");
return -1; return -1;
dbuffer[0] = &local;
} }
for (i = 0; i < git_array_size(fl->filters); ++i) { git_buf_attach_notowned(out, git_blob_rawcontent(blob), (size_t)rawsize);
unsigned int di = 1 - si; return 0;
uint32_t fidx = (fl->source.mode == GIT_FILTER_TO_WORKTREE) ? }
i : git_array_size(fl->filters) - 1 - i;
git_filter_entry *fe = git_array_get(fl->filters, fidx); int git_filter_list_apply_to_blob(
git_buf *out,
dbuffer[di]->size = 0; git_filter_list *filters,
git_blob *blob)
/* Apply the filter from dbuffer[src] to the other buffer; {
* if the filtering is canceled by the user mid-filter, struct buf_stream writer;
* we skip to the next filter without changing the source int error;
* of the double buffering (so that the text goes through
* cleanly). buf_stream_init(&writer, out);
*/
error = fe->filter->apply( if ((error = git_filter_list_stream_blob(
fe->filter, &fe->payload, dbuffer[di], dbuffer[si], &fl->source); filters, blob, (git_writestream *)&writer)) < 0)
return error;
assert(writer.complete);
return error;
}
struct proxy_stream {
git_writestream parent;
git_filter *filter;
const git_filter_source *source;
void **payload;
git_buf input;
git_buf temp_buf;
git_buf *output;
git_writestream *target;
};
static int proxy_stream_write(
git_writestream *s, const char *buffer, size_t len)
{
struct proxy_stream *proxy_stream = (struct proxy_stream *)s;
assert(proxy_stream);
return git_buf_put(&proxy_stream->input, buffer, len);
}
static int proxy_stream_close(git_writestream *s)
{
struct proxy_stream *proxy_stream = (struct proxy_stream *)s;
git_buf *writebuf;
int error;
assert(proxy_stream);
error = proxy_stream->filter->apply(
proxy_stream->filter,
proxy_stream->payload,
proxy_stream->output,
&proxy_stream->input,
proxy_stream->source);
if (error == GIT_PASSTHROUGH) { if (error == GIT_PASSTHROUGH) {
/* PASSTHROUGH means filter decided not to process the buffer */ writebuf = &proxy_stream->input;
error = 0; } else if (error == 0) {
} else if (!error) { git_buf_sanitize(proxy_stream->output);
git_buf_sanitize(dbuffer[di]); /* force NUL termination */ writebuf = proxy_stream->output;
si = di; /* swap buffers */
} else { } else {
tgt->size = 0; return error;
goto cleanup;
} }
if ((error = proxy_stream->target->write(
proxy_stream->target, writebuf->ptr, writebuf->size)) == 0)
error = proxy_stream->target->close(proxy_stream->target);
return error;
}
static void proxy_stream_free(git_writestream *s)
{
struct proxy_stream *proxy_stream = (struct proxy_stream *)s;
assert(proxy_stream);
git_buf_free(&proxy_stream->input);
git_buf_free(&proxy_stream->temp_buf);
git__free(proxy_stream);
}
static int proxy_stream_init(
git_writestream **out,
git_filter *filter,
git_buf *temp_buf,
void **payload,
const git_filter_source *source,
git_writestream *target)
{
struct proxy_stream *proxy_stream = git__calloc(1, sizeof(struct proxy_stream));
GITERR_CHECK_ALLOC(proxy_stream);
proxy_stream->parent.write = proxy_stream_write;
proxy_stream->parent.close = proxy_stream_close;
proxy_stream->parent.free = proxy_stream_free;
proxy_stream->filter = filter;
proxy_stream->payload = payload;
proxy_stream->source = source;
proxy_stream->target = target;
proxy_stream->output = temp_buf ? temp_buf : &proxy_stream->temp_buf;
*out = (git_writestream *)proxy_stream;
return 0;
}
static int stream_list_init(
git_writestream **out,
git_vector *streams,
git_filter_list *filters,
git_writestream *target)
{
git_writestream *last_stream = target;
size_t i;
int error = 0;
*out = NULL;
if (!filters) {
*out = target;
return 0;
} }
/* Ensure that the output ends up in dbuffer[1] (i.e. the dest) */ /* Create filters last to first to get the chaining direction */
if (si != 1) for (i = 0; i < git_array_size(filters->filters); ++i) {
git_buf_swap(dbuffer[0], dbuffer[1]); size_t filter_idx = (filters->source.mode == GIT_FILTER_TO_WORKTREE) ?
git_array_size(filters->filters) - 1 - i : i;
git_filter_entry *fe = git_array_get(filters->filters, filter_idx);
git_writestream *filter_stream;
cleanup: assert(fe->filter->stream || fe->filter->apply);
git_buf_free(&local); /* don't leak if we allocated locally */
/* If necessary, create a stream that proxies the traditional
* application.
*/
if (fe->filter->stream)
error = fe->filter->stream(&filter_stream, fe->filter,
&fe->payload, &filters->source, last_stream);
else
/* Create a stream that proxies the one-shot apply */
error = proxy_stream_init(&filter_stream, fe->filter,
filters->temp_buf, &fe->payload, &filters->source,
last_stream);
if (error < 0)
return error; return error;
git_vector_insert(streams, filter_stream);
last_stream = filter_stream;
}
*out = last_stream;
return 0;
} }
int git_filter_list_apply_to_file( void stream_list_free(git_vector *streams)
git_buf *out, {
git_writestream *stream;
size_t i;
git_vector_foreach(streams, i, stream)
stream->free(stream);
git_vector_free(streams);
}
#define STREAM_BUFSIZE 10240
/* TODO: maybe not use filter_stream as a target but create one */
int git_filter_list_stream_file(
git_filter_list *filters, git_filter_list *filters,
git_repository *repo, git_repository *repo,
const char *path) const char *path,
git_writestream *target)
{ {
int error; char buf[STREAM_BUFSIZE];
git_buf abspath = GIT_BUF_INIT;
const char *base = repo ? git_repository_workdir(repo) : NULL; const char *base = repo ? git_repository_workdir(repo) : NULL;
git_buf abspath = GIT_BUF_INIT, raw = GIT_BUF_INIT; git_vector filter_streams = GIT_VECTOR_INIT;
git_writestream *stream_start;
if (!(error = git_path_join_unrooted(&abspath, path, base, NULL)) && ssize_t readlen;
!(error = git_futils_readbuffer(&raw, abspath.ptr))) int fd, error;
{
error = git_filter_list_apply_to_data(out, filters, &raw); if ((error = stream_list_init(
&stream_start, &filter_streams, filters, target)) < 0 ||
(error = git_path_join_unrooted(&abspath, path, base, NULL)) < 0)
goto done;
if ((fd = git_futils_open_ro(path)) < 0) {
error = fd;
goto done;
}
git_buf_free(&raw); while ((readlen = p_read(fd, buf, STREAM_BUFSIZE)) > 0) {
if ((error = stream_start->write(stream_start, buf, readlen)) < 0)
goto done;
} }
if (!readlen)
error = stream_start->close(stream_start);
else if (readlen < 0)
error = readlen;
p_close(fd);
done:
stream_list_free(&filter_streams);
git_buf_free(&abspath); git_buf_free(&abspath);
return error; return error;
} }
int git_filter_list_apply_to_blob( int git_filter_list_stream_data(
git_buf *out,
git_filter_list *filters, git_filter_list *filters,
git_blob *blob) git_buf *data,
git_writestream *target)
{
git_vector filter_streams = GIT_VECTOR_INIT;
git_writestream *stream_start;
int error = 0;
git_buf_sanitize(data);
if ((error = stream_list_init(
&stream_start, &filter_streams, filters, target)) == 0 &&
(error =
stream_start->write(stream_start, data->ptr, data->size)) == 0)
error = stream_start->close(stream_start);
stream_list_free(&filter_streams);
return error;
}
int git_filter_list_stream_blob(
git_filter_list *filters,
git_blob *blob,
git_writestream *target)
{ {
git_buf in = GIT_BUF_INIT; git_buf in = GIT_BUF_INIT;
git_off_t rawsize = git_blob_rawsize(blob);
if (!git__is_sizet(rawsize)) { if (buf_from_blob(&in, blob) < 0)
giterr_set(GITERR_OS, "Blob is too large to filter");
return -1; return -1;
}
in.ptr = (char *)git_blob_rawcontent(blob);
in.asize = 0;
in.size = (size_t)rawsize;
if (filters) if (filters)
git_oid_cpy(&filters->source.oid, git_blob_id(blob)); git_oid_cpy(&filters->source.oid, git_blob_id(blob));
return git_filter_list_apply_to_data(out, filters, &in); return git_filter_list_stream_data(filters, &in, target);
} }
...@@ -24,16 +24,23 @@ typedef enum { ...@@ -24,16 +24,23 @@ typedef enum {
GIT_CRLF_AUTO, GIT_CRLF_AUTO,
} git_crlf_t; } git_crlf_t;
typedef struct {
git_attr_session *attr_session;
git_buf *temp_buf;
uint32_t flags;
} git_filter_options;
#define GIT_FILTER_OPTIONS_INIT {0}
extern void git_filter_free(git_filter *filter); extern void git_filter_free(git_filter *filter);
extern int git_filter_list__load_with_attr_session( extern int git_filter_list__load_ext(
git_filter_list **filters, git_filter_list **filters,
git_repository *repo, git_repository *repo,
git_attr_session *attr_session,
git_blob *blob, /* can be NULL */ git_blob *blob, /* can be NULL */
const char *path, const char *path,
git_filter_mode_t mode, git_filter_mode_t mode,
uint32_t options); git_filter_options *filter_opts);
/* /*
* Available filters * Available filters
......
...@@ -1849,7 +1849,7 @@ int git_repository_hashfile( ...@@ -1849,7 +1849,7 @@ int git_repository_hashfile(
if (strlen(as_path) > 0) { if (strlen(as_path) > 0) {
error = git_filter_list_load( error = git_filter_list_load(
&fl, repo, NULL, as_path, &fl, repo, NULL, as_path,
GIT_FILTER_TO_ODB, GIT_FILTER_OPT_DEFAULT); GIT_FILTER_TO_ODB, GIT_FILTER_DEFAULT);
if (error < 0) if (error < 0)
return error; return error;
} else { } else {
......
...@@ -123,7 +123,7 @@ void test_filter_crlf__with_safecrlf_and_unsafe_allowed(void) ...@@ -123,7 +123,7 @@ void test_filter_crlf__with_safecrlf_and_unsafe_allowed(void)
cl_repo_set_bool(g_repo, "core.safecrlf", true); cl_repo_set_bool(g_repo, "core.safecrlf", true);
cl_git_pass(git_filter_list_new( cl_git_pass(git_filter_list_new(
&fl, g_repo, GIT_FILTER_TO_ODB, GIT_FILTER_OPT_ALLOW_UNSAFE)); &fl, g_repo, GIT_FILTER_TO_ODB, GIT_FILTER_ALLOW_UNSAFE));
crlf = git_filter_lookup(GIT_FILTER_CRLF); crlf = git_filter_lookup(GIT_FILTER_CRLF);
cl_assert(crlf != NULL); cl_assert(crlf != NULL);
......
#include "clar_libgit2.h"
#include "posix.h"
#include "blob.h"
#include "filter.h"
#include "buf_text.h"
#include "git2/sys/filter.h"
#include "git2/sys/repository.h"
static git_repository *g_repo = NULL;
static git_filter *create_compress_filter(void);
static git_filter *compress_filter;
void test_filter_stream__initialize(void)
{
compress_filter = create_compress_filter();
cl_git_pass(git_filter_register("compress", compress_filter, 50));
g_repo = cl_git_sandbox_init("empty_standard_repo");
}
void test_filter_stream__cleanup(void)
{
cl_git_sandbox_cleanup();
g_repo = NULL;
git_filter_unregister("compress");
}
#define CHUNKSIZE 10240
struct compress_stream {
git_writestream parent;
git_writestream *next;
git_filter_mode_t mode;
char current;
size_t current_chunk;
};
static int compress_stream_write__deflated(struct compress_stream *stream, const char *buffer, size_t len)
{
size_t idx = 0;
while (len > 0) {
size_t chunkremain, chunksize;
if (stream->current_chunk == 0)
stream->current = buffer[idx];
chunkremain = CHUNKSIZE - stream->current_chunk;
chunksize = min(chunkremain, len);
stream->current_chunk += chunksize;
len -= chunksize;
idx += chunksize;
if (stream->current_chunk == CHUNKSIZE) {
cl_git_pass(stream->next->write(stream->next, &stream->current, 1));
stream->current_chunk = 0;
}
}
return 0;
}
static int compress_stream_write__inflated(struct compress_stream *stream, const char *buffer, size_t len)
{
char inflated[CHUNKSIZE];
size_t i, j;
for (i = 0; i < len; i++) {
for (j = 0; j < CHUNKSIZE; j++)
inflated[j] = buffer[i];
cl_git_pass(stream->next->write(stream->next, inflated, CHUNKSIZE));
}
return 0;
}
static int compress_stream_write(git_writestream *s, const char *buffer, size_t len)
{
struct compress_stream *stream = (struct compress_stream *)s;
return (stream->mode == GIT_FILTER_TO_ODB) ?
compress_stream_write__deflated(stream, buffer, len) :
compress_stream_write__inflated(stream, buffer, len);
}
static int compress_stream_close(git_writestream *s)
{
struct compress_stream *stream = (struct compress_stream *)s;
cl_assert_equal_i(0, stream->current_chunk);
stream->next->close(stream->next);
return 0;
}
static void compress_stream_free(git_writestream *stream)
{
git__free(stream);
}
static int compress_filter_stream_init(
git_writestream **out,
git_filter *self,
void **payload,
const git_filter_source *src,
git_writestream *next)
{
struct compress_stream *stream = git__calloc(1, sizeof(struct compress_stream));
cl_assert(stream);
GIT_UNUSED(self);
GIT_UNUSED(payload);
stream->parent.write = compress_stream_write;
stream->parent.close = compress_stream_close;
stream->parent.free = compress_stream_free;
stream->next = next;
stream->mode = git_filter_source_mode(src);
*out = (git_writestream *)stream;
return 0;
}
static void compress_filter_free(git_filter *f)
{
git__free(f);
}
git_filter *create_compress_filter(void)
{
git_filter *filter = git__calloc(1, sizeof(git_filter));
cl_assert(filter);
filter->version = GIT_FILTER_VERSION;
filter->attributes = "+compress";
filter->stream = compress_filter_stream_init;
filter->shutdown = compress_filter_free;
return filter;
}
static void writefile(const char *filename, size_t numchunks)
{
git_buf path = GIT_BUF_INIT;
char buf[CHUNKSIZE];
size_t i = 0, j = 0;
int fd;
cl_git_pass(git_buf_joinpath(&path, "empty_standard_repo", filename));
fd = p_open(path.ptr, O_RDWR|O_CREAT, 0666);
cl_assert(fd >= 0);
for (i = 0; i < numchunks; i++) {
for (j = 0; j < CHUNKSIZE; j++) {
buf[j] = i % 256;
}
cl_git_pass(p_write(fd, buf, CHUNKSIZE));
}
p_close(fd);
git_buf_free(&path);
}
static void test_stream(size_t numchunks)
{
git_index *index;
const git_index_entry *entry;
git_blob *blob;
struct stat st;
git_checkout_options checkout_opts = GIT_CHECKOUT_OPTIONS_INIT;
checkout_opts.checkout_strategy = GIT_CHECKOUT_FORCE;
cl_git_mkfile(
"empty_standard_repo/.gitattributes",
"* compress\n");
/* write a file to disk */
writefile("streamed_file", numchunks);
/* place it in the index */
cl_git_pass(git_repository_index(&index, g_repo));
cl_git_pass(git_index_add_bypath(index, "streamed_file"));
cl_git_pass(git_index_write(index));
/* ensure it was appropriately compressed */
cl_assert(entry = git_index_get_bypath(index, "streamed_file", 0));
cl_git_pass(git_blob_lookup(&blob, g_repo, &entry->id));
cl_assert_equal_i(numchunks, git_blob_rawsize(blob));
/* check the file back out */
cl_must_pass(p_unlink("empty_standard_repo/streamed_file"));
cl_git_pass(git_checkout_index(g_repo, index, &checkout_opts));
/* ensure it was decompressed */
cl_must_pass(p_stat("empty_standard_repo/streamed_file", &st));
cl_assert_equal_sz((numchunks * CHUNKSIZE), st.st_size);
git_index_free(index);
git_blob_free(blob);
}
/* write a 50KB file through the "compression" stream */
void test_filter_stream__smallfile(void)
{
test_stream(5);
}
/* optionally write a 500 MB file through the compression stream */
void test_filter_stream__bigfile(void)
{
if (!cl_getenv("GITTEST_INVASIVE_FS_SIZE"))
cl_skip();
test_stream(51200);
}
...@@ -722,7 +722,7 @@ void test_repo_init__at_filesystem_root(void) ...@@ -722,7 +722,7 @@ void test_repo_init__at_filesystem_root(void)
git_buf root = GIT_BUF_INIT; git_buf root = GIT_BUF_INIT;
int root_len; int root_len;
if (!cl_getenv("GITTEST_INVASIVE_FILESYSTEM")) if (!cl_getenv("GITTEST_INVASIVE_FS_STRUCTURE"))
cl_skip(); cl_skip();
root_len = git_path_root(sandbox); root_len = git_path_root(sandbox);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment