Commit 0cf77103 by Russell Belfer

Start of filter API + git_blob_filtered_content

This begins the process of exposing git_filter objects to the
public API.  This includes:

* new public type and API for `git_buffer` through which an
  allocated buffer can be passed to the user
* new API `git_blob_filtered_content`
* make the git_filter type and GIT_FILTER_TO_... constants public
parent 4581f9d8
......@@ -58,4 +58,7 @@
#include "git2/stash.h"
#include "git2/pathspec.h"
#include "git2/buffer.h"
#include "git2/filter.h"
#endif
......@@ -11,6 +11,7 @@
#include "types.h"
#include "oid.h"
#include "object.h"
#include "buffer.h"
/**
* @file git2/blob.h
......@@ -96,6 +97,37 @@ GIT_EXTERN(const void *) git_blob_rawcontent(const git_blob *blob);
GIT_EXTERN(git_off_t) git_blob_rawsize(const git_blob *blob);
/**
* Get a buffer with the filtered content of a blob.
*
* This applies filters as if the blob was being checked out to the
* working directory under the specified filename. This may apply
* CRLF filtering or other types of changes depending on the file
* attributes set for the blob and the content detected in it.
*
* The output is written into a `git_buffer` which the caller must free
* when done (via `git_buffer_free`).
*
* If no filters need to be applied, then the `out` buffer will just be
* populated with a pointer to the raw content of the blob. In that case,
* be careful to *not* free the blob until done with the buffer. To keep
* the data detached from the blob, call `git_buffer_resize` on the buffer
* with a `want_size` of 0 and the buffer will be reallocated to be
* detached from the blob.
*
* @param out The git_buffer to be filled in
* @param blob Pointer to the blob
* @param as_path Path used for file attribute lookups, etc.
* @param check_for_binary_data Should this test if blob content contains
* NUL bytes / looks like binary data before applying filters?
* @return 0 on success or an error code
*/
GIT_EXTERN(int) git_blob_filtered_content(
git_buffer *out,
git_blob *blob,
const char *as_path,
int check_for_binary_data);
/**
* Read a file from the working folder of a repository
* and write it to the Object Database as a loose blob
*
......
/*
* Copyright (C) the libgit2 contributors. All rights reserved.
*
* This file is part of libgit2, distributed under the GNU GPL v2 with
* a Linking Exception. For full terms see the included COPYING file.
*/
#ifndef INCLUDE_git_buffer_h__
#define INCLUDE_git_buffer_h__
#include "common.h"
/**
* @file git2/buffer.h
* @brief Buffer export structure
*
* @ingroup Git
* @{
*/
GIT_BEGIN_DECL
/**
* A data buffer for exporting data from libgit2
*
* There are a number of places where libgit2 wants to return an allocated
* data buffer to the caller and have the caller take ownership of that
* allocated memory. This can be awkward if the caller does not have easy
* access to the same allocation functions that libgit2 is using. In those
* cases, libgit2 will instead fill in a `git_buffer` and the caller can
* use `git_buffer_free()` to release it when they are done.
*
* * `ptr` refers to the start of the allocated memory.
* * `size` contains the size of the data in `ptr` that is actually used.
* * `available` refers to the known total amount of allocated memory in
* cases where it is larger than the `size` actually in use.
*
* In a few cases, for uniformity and simplicity, an API may populate a
* `git_buffer` with data that should *not* be freed (i.e. the lifetime of
* the data buffer is actually tied to another libgit2 object). These
* cases will be clearly documented in the APIs that use the `git_buffer`.
* In those cases, the `available` field will be set to zero even though
* the `ptr` and `size` will be valid.
*/
typedef struct git_buffer {
char *ptr;
size_t size;
size_t available;
} git_buffer;
/**
* Use to initialize buffer structure when git_buffer is on stack
*/
#define GIT_BUFFER_INIT { NULL, 0, 0 }
/**
* Free the memory referred to by the git_buffer.
*
* Note that this does not free the `git_buffer` itself, just the memory
* pointed to by `buffer->ptr`. If that memory was not allocated by
* libgit2 itself, be careful with using this function because it could
* cause problems.
*
* @param buffer The buffer with allocated memory
*/
GIT_EXTERN(void) git_buffer_free(git_buffer *buffer);
/**
* Resize the buffer allocation to make more space.
*
* This will update `buffer->available` with the new size (which will be
* at least `want_size` and may be larger). This may or may not change
* `buffer->ptr` depending on whether there is an existing allocation and
* whether that allocation can be increased in place.
*
* Currently, this will never shrink the buffer, only expand it.
*
* @param buffer The buffer to be resized; may or may not be allocated yet
* @param want_size The desired available size
* @return 0 on success, negative error code on allocation failure
*/
GIT_EXTERN(int) git_buffer_resize(git_buffer *buffer, size_t want_size);
GIT_END_DECL
/** @} */
#endif
/*
* Copyright (C) the libgit2 contributors. All rights reserved.
*
* This file is part of libgit2, distributed under the GNU GPL v2 with
* a Linking Exception. For full terms see the included COPYING file.
*/
#ifndef INCLUDE_git_filter_h__
#define INCLUDE_git_filter_h__
#include "common.h"
#include "types.h"
#include "oid.h"
#include "buffer.h"
/**
* @file git2/filter.h
* @brief Git filter APIs
*
* @ingroup Git
* @{
*/
GIT_BEGIN_DECL
/**
* Filters are applied in one of two directions: smudging - which is
* exporting a file from the Git object database to the working directory,
* and cleaning - which is importing a file from the working directory to
* the Git object database. These values control which direction of
* change is being applied.
*/
typedef enum {
GIT_FILTER_SMUDGE = 0,
GIT_FILTER_TO_WORKTREE = GIT_FILTER_SMUDGE,
GIT_FILTER_CLEAN = 1,
GIT_FILTER_TO_ODB = GIT_FILTER_CLEAN,
} git_filter_mode_t;
/**
* A filter that can transform file data
*
* This represents a filter that can be used to transform or even replace
* file data. Libgit2 currently includes one built in filter:
*
* * "crlf" which uses the complex rules with the "text", "eol", and
* "crlf" file attributes to decide how to convert between LF and CRLF
* line endings
*/
typedef struct git_filter git_filter;
GIT_END_DECL
/** @} */
#endif
......@@ -338,3 +338,54 @@ int git_blob_is_binary(git_blob *blob)
return git_buf_text_is_binary(&content);
}
int git_blob_filtered_content(
git_buffer *out,
git_blob *blob,
const char *as_path,
int check_for_binary_data)
{
int error = 0, num_filters = 0;
git_buf filtered = GIT_BUF_INIT, unfiltered = GIT_BUF_INIT;
git_vector filters = GIT_VECTOR_INIT;
assert(blob && as_path && out);
/* Create a fake git_buf from the blob raw data... */
filtered.ptr = (void *)git_blob_rawcontent(blob);
filtered.size = (size_t)git_blob_rawsize(blob);
filtered.asize = 0;
if (check_for_binary_data && git_buf_text_is_binary(&filtered))
return 0;
num_filters = git_filters_load(
&filters, git_blob_owner(blob), as_path, GIT_FILTER_TO_WORKTREE);
if (num_filters < 0)
return num_filters;
if (num_filters > 0) {
if (out->ptr && out->available) {
filtered.ptr = out->ptr;
filtered.size = out->size;
filtered.asize = out->available;
} else {
git_buf_init(&filtered, filtered.size + 1);
}
if (!(error = git_blob__getbuf(&unfiltered, blob)))
error = git_filters_apply(&filtered, &unfiltered, &filters);
git_filters_free(&filters);
git_buf_free(&unfiltered);
}
if (!error) {
out->ptr = filtered.ptr;
out->size = filtered.size;
out->available = filtered.asize;
}
return error;
}
......@@ -70,10 +70,10 @@ int git_buf_text_crlf_to_lf(git_buf *tgt, const git_buf *src)
assert(tgt != src);
if (!next)
return GIT_ENOTFOUND;
return git_buf_set(tgt, src->ptr, src->size);
/* reduce reallocs while in the loop */
if (git_buf_grow(tgt, src->size) < 0)
if (git_buf_grow(tgt, src->size + 1) < 0)
return -1;
out = tgt->ptr;
tgt->size = 0;
......@@ -81,7 +81,7 @@ int git_buf_text_crlf_to_lf(git_buf *tgt, const git_buf *src)
/* Find the next \r and copy whole chunk up to there to tgt */
for (; next; scan = next + 1, next = memchr(scan, '\r', scan_end - scan)) {
if (next > scan) {
size_t copylen = next - scan;
size_t copylen = (size_t)(next - scan);
memcpy(out, scan, copylen);
out += copylen;
}
......@@ -92,9 +92,14 @@ int git_buf_text_crlf_to_lf(git_buf *tgt, const git_buf *src)
}
/* Copy remaining input into dest */
memcpy(out, scan, scan_end - scan + 1); /* +1 for NUL byte */
out += (scan_end - scan);
tgt->size = out - tgt->ptr;
if (scan < scan_end) {
size_t remaining = (size_t)(scan_end - scan);
memcpy(out, scan, remaining);
out += remaining;
}
tgt->size = (size_t)(out - tgt->ptr);
tgt->ptr[tgt->size] = '\0';
return 0;
}
......@@ -109,7 +114,7 @@ int git_buf_text_lf_to_crlf(git_buf *tgt, const git_buf *src)
assert(tgt != src);
if (!next)
return GIT_ENOTFOUND;
return git_buf_set(tgt, src->ptr, src->size);
/* attempt to reduce reallocs while in the loop */
if (git_buf_grow(tgt, src->size + (src->size >> 4) + 1) < 0)
......
......@@ -56,16 +56,16 @@ GIT_INLINE(int) git_buf_text_puts_escape_regex(git_buf *buf, const char *string)
extern void git_buf_text_unescape(git_buf *buf);
/**
* Replace all \r\n with \n (or do nothing if no \r\n are found)
* Replace all \r\n with \n. Does not modify \r without trailing \n.
*
* @return 0 on success, GIT_ENOTFOUND if no \r\n, -1 on memory error
* @return 0 on success, -1 on memory error
*/
extern int git_buf_text_crlf_to_lf(git_buf *tgt, const git_buf *src);
/**
* Replace all \n with \r\n (or do nothing if no \n are found)
* Replace all \n with \r\n. Does not modify existing \r\n.
*
* @return 0 on success, GIT_ENOTFOUND if no \n, -1 on memory error
* @return 0 on success, -1 on memory error
*/
extern int git_buf_text_lf_to_crlf(git_buf *tgt, const git_buf *src);
......
......@@ -6,6 +6,7 @@
*/
#include "buffer.h"
#include "posix.h"
#include "git2/buffer.h"
#include <stdarg.h>
#include <ctype.h>
......@@ -484,3 +485,56 @@ int git_buf_splice(
buf->ptr[buf->size] = '\0';
return 0;
}
/*
* Public buffers API
*/
void git_buffer_free(git_buffer *buffer)
{
if (!buffer)
return;
if (buffer->ptr != NULL && buffer->available > 0)
git__free(buffer->ptr);
git__memzero(buffer, sizeof(*buffer));
}
int git_buffer_resize(git_buffer *buffer, size_t want_size)
{
int non_allocated_buffer = 0;
char *new_ptr;
assert(buffer);
/* check if buffer->ptr points to memory owned elsewhere */
non_allocated_buffer = (buffer->ptr != NULL && buffer->available == 0);
if (non_allocated_buffer && !want_size)
want_size = buffer->size;
if (buffer->available <= want_size)
return 0;
if (non_allocated_buffer) {
new_ptr = NULL;
if (want_size < buffer->size)
want_size = buffer->size;
} else {
new_ptr = buffer->ptr;
}
want_size = (want_size + 7) & ~7; /* round up to multiple of 8 */
new_ptr = git__realloc(new_ptr, want_size);
GITERR_CHECK_ALLOC(new_ptr);
if (non_allocated_buffer)
memcpy(new_ptr, buffer->ptr, buffer->size);
buffer->ptr = new_ptr;
buffer->available = want_size;
return 0;
}
......@@ -235,32 +235,36 @@ line_ending_error:
}
static int crlf_apply_to_workdir(
git_filter *self, git_buf *dest, const git_buf *source)
git_filter *self, git_buf *tgt, const git_buf *src)
{
struct crlf_filter *filter = (struct crlf_filter *)self;
const char *workdir_ending = NULL;
assert(self && dest && source);
assert(self && tgt && src);
/* Empty file? Nothing to do. */
if (git_buf_len(source) == 0)
if (git_buf_len(src) == 0)
return -1;
/* Determine proper line ending */
workdir_ending = line_ending(filter);
if (!workdir_ending)
return -1;
if (!strcmp("\n", workdir_ending)) /* do nothing for \n ending */
if (!strcmp("\n", workdir_ending)) {
if (git_buf_find(src, '\r') < 0)
return -1;
return git_buf_text_crlf_to_lf(tgt, src);
}
/* for now, only lf->crlf conversion is supported here */
/* only other supported option is lf->crlf conversion */
assert(!strcmp("\r\n", workdir_ending));
return git_buf_text_lf_to_crlf(dest, source);
return git_buf_text_lf_to_crlf(tgt, src);
}
static int find_and_add_filter(
git_vector *filters, git_repository *repo, const char *path,
int (*apply)(struct git_filter *self, git_buf *dest, const git_buf *source))
int (*apply)(struct git_filter *self, git_buf *tgt, const git_buf *src))
{
struct crlf_attrs ca;
struct crlf_filter *filter;
......
......@@ -11,16 +11,12 @@
#include "buffer.h"
#include "git2/odb.h"
#include "git2/repository.h"
#include "git2/filter.h"
typedef struct git_filter {
struct git_filter {
int (*apply)(struct git_filter *self, git_buf *dest, const git_buf *source);
void (*do_free)(struct git_filter *self);
} git_filter;
typedef enum {
GIT_FILTER_TO_WORKTREE,
GIT_FILTER_TO_ODB
} git_filter_mode;
};
typedef enum {
GIT_CRLF_GUESS = -1,
......@@ -60,13 +56,14 @@ extern int git_filters_load(git_vector *filters, git_repository *repo, const cha
* and `dest` buffers are owned by the caller and must be freed once
* they are no longer needed.
*
* NOTE: Because of the double-buffering schema, the `source` buffer that contains
* the original file may be tampered once the filtering is complete. Regardless,
* the `dest` buffer will always contain the final result of the filtering
* NOTE: Because of the double-buffering schema, the `source` buffer that
* contains the original file may be tampered once the filtering is
* complete. Regardless, the `dest` buffer will always contain the final
* result of the filtering
*
* @param dest Buffer to store the result of the filtering
* @param source Buffer containing the document to filter
* @param filters A non-empty vector of filters as supplied by `git_filters_load`
* @param filters Vector of filters as supplied by `git_filters_load`
* @return 0 on success, an error code otherwise
*/
extern int git_filters_apply(git_buf *dest, git_buf *source, git_vector *filters);
......
#include "clar_libgit2.h"
#include "checkout_helpers.h"
#include "../filter/crlf.h"
#include "git2/checkout.h"
#include "repository.h"
#define UTF8_BOM "\xEF\xBB\xBF"
#define ALL_CRLF_TEXT_RAW "crlf\r\ncrlf\r\ncrlf\r\ncrlf\r\n"
#define ALL_LF_TEXT_RAW "lf\nlf\nlf\nlf\nlf\n"
#define MORE_CRLF_TEXT_RAW "crlf\r\ncrlf\r\nlf\ncrlf\r\ncrlf\r\n"
#define MORE_LF_TEXT_RAW "lf\nlf\ncrlf\r\nlf\nlf\n"
#define ALL_LF_TEXT_AS_CRLF "lf\r\nlf\r\nlf\r\nlf\r\nlf\r\n"
#define MORE_CRLF_TEXT_AS_CRLF "crlf\r\ncrlf\r\nlf\r\ncrlf\r\ncrlf\r\n"
#define MORE_LF_TEXT_AS_CRLF "lf\r\nlf\r\ncrlf\r\nlf\r\nlf\r\n"
static git_repository *g_repo;
void test_checkout_crlf__initialize(void)
......
......@@ -919,6 +919,8 @@ void test_core_buffer__similarity_metric_whitespace(void)
git_buf_free(&buf);
}
#include "../filter/crlf.h"
#define check_buf(expected,buf) do { \
cl_assert_equal_s(expected, buf.ptr); \
cl_assert_equal_sz(strlen(expected), buf.size); } while (0)
......@@ -934,16 +936,16 @@ void test_core_buffer__lf_and_crlf_conversions(void)
cl_git_pass(git_buf_text_lf_to_crlf(&tgt, &src));
check_buf("lf\r\nlf\r\nlf\r\nlf\r\n", tgt);
cl_assert_equal_i(GIT_ENOTFOUND, git_buf_text_crlf_to_lf(&tgt, &src));
/* no conversion needed if all LFs already */
cl_git_pass(git_buf_text_crlf_to_lf(&tgt, &src));
check_buf(src.ptr, tgt);
git_buf_sets(&src, "\nlf\nlf\nlf\nlf\nlf");
cl_git_pass(git_buf_text_lf_to_crlf(&tgt, &src));
check_buf("\r\nlf\r\nlf\r\nlf\r\nlf\r\nlf", tgt);
cl_assert_equal_i(GIT_ENOTFOUND, git_buf_text_crlf_to_lf(&tgt, &src));
/* no conversion needed if all LFs already */
cl_git_pass(git_buf_text_crlf_to_lf(&tgt, &src));
check_buf(src.ptr, tgt);
/* CRLF source */
......@@ -993,10 +995,45 @@ void test_core_buffer__lf_and_crlf_conversions(void)
check_buf("\rcrlf\nlf\nlf\ncr\rcrlf\nlf\ncr\r", tgt);
git_buf_sets(&src, "\rcr\r");
cl_assert_equal_i(GIT_ENOTFOUND, git_buf_text_lf_to_crlf(&tgt, &src));
cl_git_pass(git_buf_text_lf_to_crlf(&tgt, &src));
check_buf(src.ptr, tgt);
cl_git_pass(git_buf_text_crlf_to_lf(&tgt, &src));
check_buf("\rcr\r", tgt);
git_buf_free(&src);
git_buf_free(&tgt);
/* blob correspondence tests */
git_buf_sets(&src, ALL_CRLF_TEXT_RAW);
cl_git_pass(git_buf_text_lf_to_crlf(&tgt, &src));
check_buf(ALL_CRLF_TEXT_AS_CRLF, tgt);
cl_git_pass(git_buf_text_crlf_to_lf(&tgt, &src));
check_buf(ALL_CRLF_TEXT_AS_LF, tgt);
git_buf_free(&src);
git_buf_free(&tgt);
git_buf_sets(&src, ALL_LF_TEXT_RAW);
cl_git_pass(git_buf_text_lf_to_crlf(&tgt, &src));
check_buf(ALL_LF_TEXT_AS_CRLF, tgt);
cl_git_pass(git_buf_text_crlf_to_lf(&tgt, &src));
check_buf(ALL_LF_TEXT_AS_LF, tgt);
git_buf_free(&src);
git_buf_free(&tgt);
git_buf_sets(&src, MORE_CRLF_TEXT_RAW);
cl_git_pass(git_buf_text_lf_to_crlf(&tgt, &src));
check_buf(MORE_CRLF_TEXT_AS_CRLF, tgt);
cl_git_pass(git_buf_text_crlf_to_lf(&tgt, &src));
check_buf(MORE_CRLF_TEXT_AS_LF, tgt);
git_buf_free(&src);
git_buf_free(&tgt);
git_buf_sets(&src, MORE_LF_TEXT_RAW);
cl_git_pass(git_buf_text_lf_to_crlf(&tgt, &src));
check_buf(MORE_LF_TEXT_AS_CRLF, tgt);
cl_git_pass(git_buf_text_crlf_to_lf(&tgt, &src));
check_buf(MORE_LF_TEXT_AS_LF, tgt);
git_buf_free(&src);
git_buf_free(&tgt);
}
#include "clar_libgit2.h"
#include "crlf.h"
static git_repository *g_repo = NULL;
void test_filter_blob__initialize(void)
{
g_repo = cl_git_sandbox_init("crlf");
cl_git_mkfile("crlf/.gitattributes",
"*.txt text\n*.bin binary\n*.crlf text eol=crlf\n*.lf text eol=lf\n");
}
void test_filter_blob__cleanup(void)
{
cl_git_sandbox_cleanup();
}
void test_filter_blob__all_crlf(void)
{
git_blob *blob;
git_buffer buf = GIT_BUFFER_INIT;
cl_git_pass(git_revparse_single(
(git_object **)&blob, g_repo, "a9a2e891")); /* all-crlf */
cl_assert_equal_s(ALL_CRLF_TEXT_RAW, git_blob_rawcontent(blob));
cl_git_pass(git_blob_filtered_content(&buf, blob, "file.bin", 1));
cl_assert_equal_s(ALL_CRLF_TEXT_RAW, buf.ptr);
cl_git_pass(git_blob_filtered_content(&buf, blob, "file.crlf", 1));
/* in this case, raw content has crlf in it already */
cl_assert_equal_s(ALL_CRLF_TEXT_AS_CRLF, buf.ptr);
cl_git_pass(git_blob_filtered_content(&buf, blob, "file.lf", 1));
cl_assert_equal_s(ALL_CRLF_TEXT_AS_LF, buf.ptr);
git_buffer_free(&buf);
git_blob_free(blob);
}
#ifndef INCLUDE_filter_crlf_h__
#define INCLUDE_filter_crlf_h__
/*
* file content for files in the resources/crlf repository
*/
#define UTF8_BOM "\xEF\xBB\xBF"
#define ALL_CRLF_TEXT_RAW "crlf\r\ncrlf\r\ncrlf\r\ncrlf\r\n"
#define ALL_LF_TEXT_RAW "lf\nlf\nlf\nlf\nlf\n"
#define MORE_CRLF_TEXT_RAW "crlf\r\ncrlf\r\nlf\ncrlf\r\ncrlf\r\n"
#define MORE_LF_TEXT_RAW "lf\nlf\ncrlf\r\nlf\nlf\n"
#define ALL_CRLF_TEXT_AS_CRLF ALL_CRLF_TEXT_RAW
#define ALL_LF_TEXT_AS_CRLF "lf\r\nlf\r\nlf\r\nlf\r\nlf\r\n"
#define MORE_CRLF_TEXT_AS_CRLF "crlf\r\ncrlf\r\nlf\r\ncrlf\r\ncrlf\r\n"
#define MORE_LF_TEXT_AS_CRLF "lf\r\nlf\r\ncrlf\r\nlf\r\nlf\r\n"
#define ALL_CRLF_TEXT_AS_LF "crlf\ncrlf\ncrlf\ncrlf\n"
#define ALL_LF_TEXT_AS_LF ALL_LF_TEXT_RAW
#define MORE_CRLF_TEXT_AS_LF "crlf\ncrlf\nlf\ncrlf\ncrlf\n"
#define MORE_LF_TEXT_AS_LF "lf\nlf\ncrlf\nlf\nlf\n"
#endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment