Commit d15884ce by Carlos Martín Nieto

Merge pull request #2911 from ethomson/streaming_filters

Streaming filters
parents 818302c8 feb0e022
......@@ -15,6 +15,7 @@ compiler:
env:
global:
- secure: "YnhS+8n6B+uoyaYfaJ3Lei7cSJqHDPiKJCKFIF2c87YDfmCvAJke8QtE7IzjYDs7UFkTCM4ox+ph2bERUrxZbSCyEkHdjIZpKuMJfYWja/jgMqTMxdyOH9y8JLFbZsSXDIXDwqBlC6vVyl1fP90M35wuWcNTs6tctfVWVofEFbs="
- GITTEST_INVASIVE_FS_SIZE=1
matrix:
- OPTIONS="-DTHREADSAFE=ON -DCMAKE_BUILD_TYPE=Release"
- OPTIONS="-DTHREADSAFE=OFF -DBUILD_EXAMPLES=ON"
......
......@@ -3,7 +3,8 @@ branches:
only:
- master
environment:
GITTEST_INVASIVE_FILESYSTEM: 1
GITTEST_INVASIVE_FS_STRUCTURE: 1
GITTEST_INVASIVE_FS_SIZE: 1
matrix:
- GENERATOR: "Visual Studio 11"
......
......@@ -39,9 +39,9 @@ typedef enum {
* Filter option flags.
*/
typedef enum {
GIT_FILTER_OPT_DEFAULT = 0u,
GIT_FILTER_OPT_ALLOW_UNSAFE = (1u << 0),
} git_filter_opt_t;
GIT_FILTER_DEFAULT = 0u,
GIT_FILTER_ALLOW_UNSAFE = (1u << 0),
} git_filter_flag_t;
/**
* A filter that can transform file data
......@@ -83,7 +83,7 @@ typedef struct git_filter_list git_filter_list;
* @param blob The blob to which the filter will be applied (if known)
* @param path Relative path of the file to be filtered
* @param mode Filtering direction (WT->ODB or ODB->WT)
* @param options Combination of `git_filter_opt_t` flags
* @param flags Combination of `git_filter_flag_t` flags
* @return 0 on success (which could still return NULL if no filters are
* needed for the requested file), <0 on error
*/
......@@ -93,7 +93,7 @@ GIT_EXTERN(int) git_filter_list_load(
git_blob *blob, /* can be NULL */
const char *path,
git_filter_mode_t mode,
uint32_t options);
uint32_t flags);
/**
* Apply filter list to a data buffer.
......@@ -137,6 +137,22 @@ GIT_EXTERN(int) git_filter_list_apply_to_blob(
git_filter_list *filters,
git_blob *blob);
GIT_EXTERN(int) git_filter_list_stream_data(
git_filter_list *filters,
git_buf *data,
git_writestream *target);
GIT_EXTERN(int) git_filter_list_stream_file(
git_filter_list *filters,
git_repository *repo,
const char *path,
git_writestream *target);
GIT_EXTERN(int) git_filter_list_stream_blob(
git_filter_list *filters,
git_blob *blob,
git_writestream *target);
/**
* Free a git_filter_list
*
......
......@@ -123,9 +123,9 @@ GIT_EXTERN(const git_oid *) git_filter_source_id(const git_filter_source *src);
GIT_EXTERN(git_filter_mode_t) git_filter_source_mode(const git_filter_source *src);
/**
* Get the combination git_filter_opt_t options to be applied
* Get the combination git_filter_flag_t options to be applied
*/
GIT_EXTERN(uint32_t) git_filter_source_options(const git_filter_source *src);
GIT_EXTERN(uint32_t) git_filter_source_flags(const git_filter_source *src);
/*
* struct git_filter
......@@ -208,6 +208,13 @@ typedef int (*git_filter_apply_fn)(
const git_buf *from,
const git_filter_source *src);
typedef int (*git_filter_stream_fn)(
git_writestream **out,
git_filter *self,
void **payload,
const git_filter_source *src,
git_writestream *next);
/**
* Callback to clean up after filtering has been applied
*
......@@ -247,6 +254,7 @@ struct git_filter {
git_filter_shutdown_fn shutdown;
git_filter_check_fn check;
git_filter_apply_fn apply;
git_filter_stream_fn stream;
git_filter_cleanup_fn cleanup;
};
......
......@@ -410,6 +410,15 @@ typedef enum {
GIT_SUBMODULE_RECURSE_ONDEMAND = 2,
} git_submodule_recurse_t;
/** A type to write in a streaming fashion, for example, for filters. */
typedef struct git_writestream git_writestream;
struct git_writestream {
int (*write)(git_writestream *stream, const char *buffer, size_t len);
int (*close)(git_writestream *stream);
void (*free)(git_writestream *stream);
};
/** @} */
GIT_END_DECL
......
......@@ -282,9 +282,8 @@ static int system_attr_file(
* a consumer. This allows them to treat this as a regular `git_buf`,
* but their call to `git_buf_free` will not attempt to free it.
*/
out->ptr = attr_session->sysdir.ptr;
out->size = attr_session->sysdir.size;
out->asize = 0;
git_buf_attach_notowned(
out, attr_session->sysdir.ptr, attr_session->sysdir.size);
return 0;
}
......
......@@ -199,7 +199,7 @@ int git_blob__create_from_paths(
/* Load the filters for writing this file to the ODB */
error = git_filter_list_load(
&fl, repo, NULL, hint_path,
GIT_FILTER_TO_ODB, GIT_FILTER_OPT_DEFAULT);
GIT_FILTER_TO_ODB, GIT_FILTER_DEFAULT);
if (error < 0)
/* well, that didn't work */;
......@@ -329,15 +329,13 @@ cleanup:
int git_blob_is_binary(const git_blob *blob)
{
git_buf content;
git_buf content = GIT_BUF_INIT;
assert(blob);
content.ptr = blob->odb_object->buffer;
content.size =
min(blob->odb_object->cached.size, GIT_FILTER_BYTES_TO_CHECK_NUL);
content.asize = 0;
git_buf_attach_notowned(&content, blob->odb_object->buffer,
min(blob->odb_object->cached.size,
GIT_FILTER_BYTES_TO_CHECK_NUL));
return git_buf_text_is_binary(&content);
}
......@@ -359,7 +357,7 @@ int git_blob_filtered_content(
if (!(error = git_filter_list_load(
&fl, git_blob_owner(blob), blob, path,
GIT_FILTER_TO_WORKTREE, GIT_FILTER_OPT_DEFAULT))) {
GIT_FILTER_TO_WORKTREE, GIT_FILTER_DEFAULT))) {
error = git_filter_list_apply_to_blob(out, fl, blob);
......
......@@ -500,6 +500,20 @@ void git_buf_attach(git_buf *buf, char *ptr, size_t asize)
}
}
void git_buf_attach_notowned(git_buf *buf, const char *ptr, size_t size)
{
if (git_buf_is_allocated(buf))
git_buf_free(buf);
if (!size) {
git_buf_init(buf, 0);
} else {
buf->ptr = (char *)ptr;
buf->asize = 0;
buf->size = size;
}
}
int git_buf_join_n(git_buf *buf, char separator, int nbuf, ...)
{
va_list ap;
......
......@@ -74,6 +74,12 @@ extern void git_buf_swap(git_buf *buf_a, git_buf *buf_b);
extern char *git_buf_detach(git_buf *buf);
extern void git_buf_attach(git_buf *buf, char *ptr, size_t asize);
/* Populates a `git_buf` where the contents are not "owned" by the
* buffer, and calls to `git_buf_free` will not free the given buf.
*/
extern void git_buf_attach_notowned(
git_buf *buf, const char *ptr, size_t size);
/**
* Test if there have been any reallocation failures with this git_buf.
*
......
......@@ -17,6 +17,7 @@
#include "git2/diff.h"
#include "git2/submodule.h"
#include "git2/sys/index.h"
#include "git2/sys/filter.h"
#include "refs.h"
#include "repository.h"
......@@ -1371,39 +1372,37 @@ static int mkpath2file(
return error;
}
static int buffer_to_file(
checkout_data *data,
struct stat *st,
git_buf *buf,
const char *path,
mode_t file_mode)
{
int error;
if ((error = mkpath2file(data, path, data->opts.dir_mode)) < 0)
return error;
struct checkout_stream {
git_writestream base;
const char *path;
int fd;
int open;
};
if ((error = git_futils_writebuffer(
buf, path, data->opts.file_open_flags, file_mode)) < 0)
return error;
static int checkout_stream_write(
git_writestream *s, const char *buffer, size_t len)
{
struct checkout_stream *stream = (struct checkout_stream *)s;
int ret;
if (st) {
data->perfdata.stat_calls++;
if ((ret = p_write(stream->fd, buffer, len)) < 0)
giterr_set(GITERR_OS, "Could not write to '%s'", stream->path);
if ((error = p_stat(path, st)) < 0) {
giterr_set(GITERR_OS, "Error statting '%s'", path);
return error;
}
}
return ret;
}
if (GIT_PERMS_IS_EXEC(file_mode)) {
data->perfdata.chmod_calls++;
static int checkout_stream_close(git_writestream *s)
{
struct checkout_stream *stream = (struct checkout_stream *)s;
assert(stream && stream->open);
if ((error = p_chmod(path, file_mode)) < 0)
giterr_set(GITERR_OS, "Failed to set permissions on '%s'", path);
}
stream->open = 0;
return p_close(stream->fd);
}
return error;
static void checkout_stream_free(git_writestream *s)
{
GIT_UNUSED(s);
}
static int blob_content_to_file(
......@@ -1411,36 +1410,83 @@ static int blob_content_to_file(
struct stat *st,
git_blob *blob,
const char *path,
const char * hint_path,
const char *hint_path,
mode_t entry_filemode)
{
int flags = data->opts.file_open_flags;
mode_t file_mode = data->opts.file_mode ?
data->opts.file_mode : entry_filemode;
git_buf out = GIT_BUF_INIT;
git_filter_options filter_opts = GIT_FILTER_OPTIONS_INIT;
struct checkout_stream writer;
mode_t mode;
git_filter_list *fl = NULL;
int fd;
int error = 0;
if (hint_path == NULL)
hint_path = path;
if (!data->opts.disable_filters)
error = git_filter_list__load_with_attr_session(
&fl, data->repo, &data->attr_session, blob, hint_path,
GIT_FILTER_TO_WORKTREE, GIT_FILTER_OPT_DEFAULT);
if ((error = mkpath2file(data, path, data->opts.dir_mode)) < 0)
return error;
if (!error)
error = git_filter_list_apply_to_blob(&out, fl, blob);
if (flags <= 0)
flags = O_CREAT | O_TRUNC | O_WRONLY;
if (!(mode = file_mode))
mode = GIT_FILEMODE_BLOB;
if ((fd = p_open(path, flags, mode)) < 0) {
giterr_set(GITERR_OS, "Could not open '%s' for writing", path);
return fd;
}
filter_opts.attr_session = &data->attr_session;
filter_opts.temp_buf = &data->tmp;
if (!data->opts.disable_filters &&
(error = git_filter_list__load_ext(
&fl, data->repo, blob, hint_path,
GIT_FILTER_TO_WORKTREE, &filter_opts)))
return error;
/* setup the writer */
memset(&writer, 0, sizeof(struct checkout_stream));
writer.base.write = checkout_stream_write;
writer.base.close = checkout_stream_close;
writer.base.free = checkout_stream_free;
writer.path = path;
writer.fd = fd;
writer.open = 1;
error = git_filter_list_stream_blob(fl, blob, (git_writestream *)&writer);
assert(writer.open == 0);
git_filter_list_free(fl);
if (!error) {
error = buffer_to_file(data, st, &out, path, file_mode);
st->st_mode = entry_filemode;
if (error < 0)
return error;
if (GIT_PERMS_IS_EXEC(mode)) {
data->perfdata.chmod_calls++;
if ((error = p_chmod(path, mode)) < 0) {
giterr_set(GITERR_OS, "Failed to set permissions on '%s'", path);
return error;
}
}
if (st) {
data->perfdata.stat_calls++;
git_buf_free(&out);
if ((error = p_stat(path, st)) < 0) {
giterr_set(GITERR_OS, "Error statting '%s'", path);
return error;
}
st->st_mode = entry_filemode;
}
return error;
return 0;
}
static int blob_content_to_link(
......@@ -1958,6 +2004,7 @@ static int checkout_write_merge(
git_merge_file_result result = {0};
git_filebuf output = GIT_FILEBUF_INIT;
git_filter_list *fl = NULL;
git_filter_options filter_opts = GIT_FILTER_OPTIONS_INIT;
int error = 0;
if (data->opts.checkout_strategy & GIT_CHECKOUT_CONFLICT_STYLE_DIFF3)
......@@ -2007,9 +2054,12 @@ static int checkout_write_merge(
in_data.ptr = (char *)result.ptr;
in_data.size = result.len;
if ((error = git_filter_list__load_with_attr_session(
&fl, data->repo, &data->attr_session, NULL, git_buf_cstr(&path_workdir),
GIT_FILTER_TO_WORKTREE, GIT_FILTER_OPT_DEFAULT)) < 0 ||
filter_opts.attr_session = &data->attr_session;
filter_opts.temp_buf = &data->tmp;
if ((error = git_filter_list__load_ext(
&fl, data->repo, NULL, git_buf_cstr(&path_workdir),
GIT_FILTER_TO_WORKTREE, &filter_opts)) < 0 ||
(error = git_filter_list_apply_to_data(&out_data, fl, &in_data)) < 0)
goto done;
} else {
......
......@@ -302,7 +302,7 @@ static int crlf_check(
return error;
/* downgrade FAIL to WARN if ALLOW_UNSAFE option is used */
if ((git_filter_source_options(src) & GIT_FILTER_OPT_ALLOW_UNSAFE) &&
if ((git_filter_source_flags(src) & GIT_FILTER_ALLOW_UNSAFE) &&
ca.safe_crlf == GIT_SAFE_CRLF_FAIL)
ca.safe_crlf = GIT_SAFE_CRLF_WARN;
}
......
......@@ -600,7 +600,7 @@ int git_diff__oid_for_entry(
error = -1;
} else if (!(error = git_filter_list_load(
&fl, diff->repo, NULL, entry.path,
GIT_FILTER_TO_ODB, GIT_FILTER_OPT_ALLOW_UNSAFE)))
GIT_FILTER_TO_ODB, GIT_FILTER_ALLOW_UNSAFE)))
{
int fd = git_futils_open_ro(full_path.ptr);
if (fd < 0)
......
......@@ -418,14 +418,13 @@ void git_diff_driver_update_options(
int git_diff_driver_content_is_binary(
git_diff_driver *driver, const char *content, size_t content_len)
{
git_buf search;
search.ptr = (char *)content;
search.size = min(content_len, GIT_FILTER_BYTES_TO_CHECK_NUL);
search.asize = 0;
git_buf search = GIT_BUF_INIT;
GIT_UNUSED(driver);
git_buf_attach_notowned(&search, content,
min(content_len, GIT_FILTER_BYTES_TO_CHECK_NUL));
/* TODO: provide encoding / binary detection callbacks that can
* be UTF-8 aware, etc. For now, instead of trying to be smart,
* let's just use the simple NUL-byte detection that core git uses.
......
......@@ -302,7 +302,7 @@ static int diff_file_content_load_workdir_file(
if ((error = git_filter_list_load(
&fl, fc->repo, NULL, fc->file->path,
GIT_FILTER_TO_ODB, GIT_FILTER_OPT_ALLOW_UNSAFE)) < 0)
GIT_FILTER_TO_ODB, GIT_FILTER_ALLOW_UNSAFE)) < 0)
goto cleanup;
/* if there are no filters, try to mmap the file */
......
......@@ -24,16 +24,23 @@ typedef enum {
GIT_CRLF_AUTO,
} git_crlf_t;
typedef struct {
git_attr_session *attr_session;
git_buf *temp_buf;
uint32_t flags;
} git_filter_options;
#define GIT_FILTER_OPTIONS_INIT {0}
extern void git_filter_free(git_filter *filter);
extern int git_filter_list__load_with_attr_session(
extern int git_filter_list__load_ext(
git_filter_list **filters,
git_repository *repo,
git_attr_session *attr_session,
git_blob *blob, /* can be NULL */
const char *path,
git_filter_mode_t mode,
uint32_t options);
git_filter_options *filter_opts);
/*
* Available filters
......
......@@ -1849,7 +1849,7 @@ int git_repository_hashfile(
if (strlen(as_path) > 0) {
error = git_filter_list_load(
&fl, repo, NULL, as_path,
GIT_FILTER_TO_ODB, GIT_FILTER_OPT_DEFAULT);
GIT_FILTER_TO_ODB, GIT_FILTER_DEFAULT);
if (error < 0)
return error;
} else {
......
......@@ -123,7 +123,7 @@ void test_filter_crlf__with_safecrlf_and_unsafe_allowed(void)
cl_repo_set_bool(g_repo, "core.safecrlf", true);
cl_git_pass(git_filter_list_new(
&fl, g_repo, GIT_FILTER_TO_ODB, GIT_FILTER_OPT_ALLOW_UNSAFE));
&fl, g_repo, GIT_FILTER_TO_ODB, GIT_FILTER_ALLOW_UNSAFE));
crlf = git_filter_lookup(GIT_FILTER_CRLF);
cl_assert(crlf != NULL);
......
#include "clar_libgit2.h"
#include "posix.h"
#include "blob.h"
#include "filter.h"
#include "buf_text.h"
#include "git2/sys/filter.h"
#include "git2/sys/repository.h"
static git_repository *g_repo = NULL;
static git_filter *create_compress_filter(void);
static git_filter *compress_filter;
void test_filter_stream__initialize(void)
{
compress_filter = create_compress_filter();
cl_git_pass(git_filter_register("compress", compress_filter, 50));
g_repo = cl_git_sandbox_init("empty_standard_repo");
}
void test_filter_stream__cleanup(void)
{
cl_git_sandbox_cleanup();
g_repo = NULL;
git_filter_unregister("compress");
}
#define CHUNKSIZE 10240
struct compress_stream {
git_writestream parent;
git_writestream *next;
git_filter_mode_t mode;
char current;
size_t current_chunk;
};
static int compress_stream_write__deflated(struct compress_stream *stream, const char *buffer, size_t len)
{
size_t idx = 0;
while (len > 0) {
size_t chunkremain, chunksize;
if (stream->current_chunk == 0)
stream->current = buffer[idx];
chunkremain = CHUNKSIZE - stream->current_chunk;
chunksize = min(chunkremain, len);
stream->current_chunk += chunksize;
len -= chunksize;
idx += chunksize;
if (stream->current_chunk == CHUNKSIZE) {
cl_git_pass(stream->next->write(stream->next, &stream->current, 1));
stream->current_chunk = 0;
}
}
return 0;
}
static int compress_stream_write__inflated(struct compress_stream *stream, const char *buffer, size_t len)
{
char inflated[CHUNKSIZE];
size_t i, j;
for (i = 0; i < len; i++) {
for (j = 0; j < CHUNKSIZE; j++)
inflated[j] = buffer[i];
cl_git_pass(stream->next->write(stream->next, inflated, CHUNKSIZE));
}
return 0;
}
static int compress_stream_write(git_writestream *s, const char *buffer, size_t len)
{
struct compress_stream *stream = (struct compress_stream *)s;
return (stream->mode == GIT_FILTER_TO_ODB) ?
compress_stream_write__deflated(stream, buffer, len) :
compress_stream_write__inflated(stream, buffer, len);
}
static int compress_stream_close(git_writestream *s)
{
struct compress_stream *stream = (struct compress_stream *)s;
cl_assert_equal_i(0, stream->current_chunk);
stream->next->close(stream->next);
return 0;
}
static void compress_stream_free(git_writestream *stream)
{
git__free(stream);
}
static int compress_filter_stream_init(
git_writestream **out,
git_filter *self,
void **payload,
const git_filter_source *src,
git_writestream *next)
{
struct compress_stream *stream = git__calloc(1, sizeof(struct compress_stream));
cl_assert(stream);
GIT_UNUSED(self);
GIT_UNUSED(payload);
stream->parent.write = compress_stream_write;
stream->parent.close = compress_stream_close;
stream->parent.free = compress_stream_free;
stream->next = next;
stream->mode = git_filter_source_mode(src);
*out = (git_writestream *)stream;
return 0;
}
static void compress_filter_free(git_filter *f)
{
git__free(f);
}
git_filter *create_compress_filter(void)
{
git_filter *filter = git__calloc(1, sizeof(git_filter));
cl_assert(filter);
filter->version = GIT_FILTER_VERSION;
filter->attributes = "+compress";
filter->stream = compress_filter_stream_init;
filter->shutdown = compress_filter_free;
return filter;
}
static void writefile(const char *filename, size_t numchunks)
{
git_buf path = GIT_BUF_INIT;
char buf[CHUNKSIZE];
size_t i = 0, j = 0;
int fd;
cl_git_pass(git_buf_joinpath(&path, "empty_standard_repo", filename));
fd = p_open(path.ptr, O_RDWR|O_CREAT, 0666);
cl_assert(fd >= 0);
for (i = 0; i < numchunks; i++) {
for (j = 0; j < CHUNKSIZE; j++) {
buf[j] = i % 256;
}
cl_git_pass(p_write(fd, buf, CHUNKSIZE));
}
p_close(fd);
git_buf_free(&path);
}
static void test_stream(size_t numchunks)
{
git_index *index;
const git_index_entry *entry;
git_blob *blob;
struct stat st;
git_checkout_options checkout_opts = GIT_CHECKOUT_OPTIONS_INIT;
checkout_opts.checkout_strategy = GIT_CHECKOUT_FORCE;
cl_git_mkfile(
"empty_standard_repo/.gitattributes",
"* compress\n");
/* write a file to disk */
writefile("streamed_file", numchunks);
/* place it in the index */
cl_git_pass(git_repository_index(&index, g_repo));
cl_git_pass(git_index_add_bypath(index, "streamed_file"));
cl_git_pass(git_index_write(index));
/* ensure it was appropriately compressed */
cl_assert(entry = git_index_get_bypath(index, "streamed_file", 0));
cl_git_pass(git_blob_lookup(&blob, g_repo, &entry->id));
cl_assert_equal_i(numchunks, git_blob_rawsize(blob));
/* check the file back out */
cl_must_pass(p_unlink("empty_standard_repo/streamed_file"));
cl_git_pass(git_checkout_index(g_repo, index, &checkout_opts));
/* ensure it was decompressed */
cl_must_pass(p_stat("empty_standard_repo/streamed_file", &st));
cl_assert_equal_sz((numchunks * CHUNKSIZE), st.st_size);
git_index_free(index);
git_blob_free(blob);
}
/* write a 50KB file through the "compression" stream */
void test_filter_stream__smallfile(void)
{
test_stream(5);
}
/* optionally write a 500 MB file through the compression stream */
void test_filter_stream__bigfile(void)
{
if (!cl_getenv("GITTEST_INVASIVE_FS_SIZE"))
cl_skip();
test_stream(51200);
}
......@@ -722,7 +722,7 @@ void test_repo_init__at_filesystem_root(void)
git_buf root = GIT_BUF_INIT;
int root_len;
if (!cl_getenv("GITTEST_INVASIVE_FILESYSTEM"))
if (!cl_getenv("GITTEST_INVASIVE_FS_STRUCTURE"))
cl_skip();
root_len = git_path_root(sandbox);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment