Commit a16e4172 by Russell Belfer

Fix rename detection to use actual blob size

The size data in the index may not reflect the actual size of the
blob data from the ODB when content filtering comes into play.
This commit fixes rename detection to use the actual blob size when
calculating data signatures instead of the value from the index.

Because of a misunderstanding on my part, I first converted the
git_index_add_bypath API to use the post-filtered blob data size
in creating the index entry.  I backed that change out, but I
kept the overall refactoring of that routine and the new internal
git_blob__create_from_paths API because it eliminates an extra
stat() call from the code that adds a file to the index.

The existing tests actually cover this code path, at least when
running on Windows, so at this point I'm not adding new tests to
cover the changes.
parent effdbeb3
......@@ -105,6 +105,7 @@ static int write_file_stream(
static int write_file_filtered(
git_oid *oid,
git_off_t *size,
git_odb *odb,
const char *full_path,
git_vector *filters)
......@@ -123,8 +124,11 @@ static int write_file_filtered(
git_buf_free(&source);
/* Write the file to disk if it was properly filtered */
if (!error)
if (!error) {
*size = dest.size;
error = git_odb_write(oid, odb, dest.ptr, dest.size, GIT_OBJ_BLOB);
}
git_buf_free(&dest);
return error;
......@@ -152,21 +156,46 @@ static int write_symlink(
return error;
}
static int blob_create_internal(git_oid *oid, git_repository *repo, const char *content_path, const char *hint_path, bool try_load_filters)
int git_blob__create_from_paths(
git_oid *oid,
struct stat *out_st,
git_repository *repo,
const char *content_path,
const char *hint_path,
mode_t hint_mode,
bool try_load_filters)
{
int error;
struct stat st;
git_odb *odb = NULL;
git_off_t size;
mode_t mode;
git_buf path = GIT_BUF_INIT;
assert(hint_path || !try_load_filters);
if ((error = git_path_lstat(content_path, &st)) < 0 || (error = git_repository_odb__weakptr(&odb, repo)) < 0)
return error;
if (!content_path) {
if (git_repository__ensure_not_bare(repo, "create blob from file") < 0)
return GIT_EBAREREPO;
if (git_buf_joinpath(
&path, git_repository_workdir(repo), hint_path) < 0)
return -1;
content_path = path.ptr;
}
if ((error = git_path_lstat(content_path, &st)) < 0 ||
(error = git_repository_odb(&odb, repo)) < 0)
goto done;
if (out_st)
memcpy(out_st, &st, sizeof(st));
size = st.st_size;
mode = hint_mode ? hint_mode : st.st_mode;
if (S_ISLNK(st.st_mode)) {
if (S_ISLNK(hint_mode)) {
error = write_symlink(oid, odb, content_path, (size_t)size);
} else {
git_vector write_filters = GIT_VECTOR_INIT;
......@@ -187,7 +216,8 @@ static int blob_create_internal(git_oid *oid, git_repository *repo, const char *
error = write_file_stream(oid, odb, content_path, size);
} else {
/* We need to apply one or more filters */
error = write_file_filtered(oid, odb, content_path, &write_filters);
error = write_file_filtered(
oid, &size, odb, content_path, &write_filters);
}
git_filters_free(&write_filters);
......@@ -207,34 +237,21 @@ static int blob_create_internal(git_oid *oid, git_repository *repo, const char *
*/
}
done:
git_odb_free(odb);
git_buf_free(&path);
return error;
}
int git_blob_create_fromworkdir(git_oid *oid, git_repository *repo, const char *path)
int git_blob_create_fromworkdir(
git_oid *oid, git_repository *repo, const char *path)
{
git_buf full_path = GIT_BUF_INIT;
const char *workdir;
int error;
if ((error = git_repository__ensure_not_bare(repo, "create blob from file")) < 0)
return error;
workdir = git_repository_workdir(repo);
if (git_buf_joinpath(&full_path, workdir, path) < 0) {
git_buf_free(&full_path);
return -1;
}
error = blob_create_internal(
oid, repo, git_buf_cstr(&full_path),
git_buf_cstr(&full_path) + strlen(workdir), true);
git_buf_free(&full_path);
return error;
return git_blob__create_from_paths(oid, NULL, repo, NULL, path, 0, true);
}
int git_blob_create_fromdisk(git_oid *oid, git_repository *repo, const char *path)
int git_blob_create_fromdisk(
git_oid *oid, git_repository *repo, const char *path)
{
int error;
git_buf full_path = GIT_BUF_INIT;
......@@ -251,8 +268,8 @@ int git_blob_create_fromdisk(git_oid *oid, git_repository *repo, const char *pat
if (workdir && !git__prefixcmp(hintpath, workdir))
hintpath += strlen(workdir);
error = blob_create_internal(
oid, repo, git_buf_cstr(&full_path), hintpath, true);
error = git_blob__create_from_paths(
oid, NULL, repo, git_buf_cstr(&full_path), hintpath, 0, true);
git_buf_free(&full_path);
return error;
......@@ -272,11 +289,8 @@ int git_blob_create_fromchunks(
git_filebuf file = GIT_FILEBUF_INIT;
git_buf path = GIT_BUF_INIT;
if (git_buf_join_n(
&path, '/', 3,
git_repository_path(repo),
GIT_OBJECTS_DIR,
"streamed") < 0)
if (git_buf_joinpath(
&path, git_repository_path(repo), GIT_OBJECTS_DIR "streamed") < 0)
goto cleanup;
content = git__malloc(BUFFER_SIZE);
......@@ -303,7 +317,8 @@ int git_blob_create_fromchunks(
if (git_filebuf_flush(&file) < 0)
goto cleanup;
error = blob_create_internal(oid, repo, file.path_lock, hintpath, hintpath != NULL);
error = git_blob__create_from_paths(
oid, NULL, repo, file.path_lock, hintpath, 0, hintpath != NULL);
cleanup:
git_buf_free(&path);
......
......@@ -21,4 +21,13 @@ void git_blob__free(void *blob);
int git_blob__parse(void *blob, git_odb_object *obj);
int git_blob__getbuf(git_buf *buffer, git_blob *blob);
extern int git_blob__create_from_paths(
git_oid *out_oid,
struct stat *out_st,
git_repository *repo,
const char *full_path,
const char *hint_path,
mode_t hint_mode,
bool apply_filters);
#endif
......@@ -473,7 +473,13 @@ static int similarity_calc(
/* if lookup fails, just skip this item in similarity calc */
giterr_clear();
} else {
size_t sz = (size_t)(git__is_sizet(file->size) ? file->size : -1);
size_t sz;
/* index size may not be actual blob size if filtered */
if (file->size != git_blob_rawsize(info->blob))
file->size = git_blob_rawsize(info->blob);
sz = (size_t)(git__is_sizet(file->size) ? file->size : -1);
error = opts->metric->buffer_signature(
&cache[info->idx], info->file,
......
......@@ -16,6 +16,7 @@
#include "iterator.h"
#include "pathspec.h"
#include "ignore.h"
#include "blob.h"
#include "git2/odb.h"
#include "git2/oid.h"
......@@ -604,42 +605,23 @@ int git_index_entry__cmp_icase(const void *a, const void *b)
return strcasecmp(entry_a->path, entry_b->path);
}
static int index_entry_init(git_index_entry **entry_out, git_index *index, const char *rel_path)
static int index_entry_init(
git_index_entry **entry_out, git_index *index, const char *rel_path)
{
int error = 0;
git_index_entry *entry = NULL;
struct stat st;
git_oid oid;
const char *workdir;
git_buf full_path = GIT_BUF_INIT;
int error;
if (INDEX_OWNER(index) == NULL)
return create_index_error(-1,
"Could not initialize index entry. "
"Index is not backed up by an existing repository.");
workdir = git_repository_workdir(INDEX_OWNER(index));
if (!workdir)
return create_index_error(GIT_EBAREREPO,
"Could not initialize index entry. Repository is bare");
if ((error = git_buf_joinpath(&full_path, workdir, rel_path)) < 0)
return error;
if ((error = git_path_lstat(full_path.ptr, &st)) < 0) {
git_buf_free(&full_path);
return error;
}
git_buf_free(&full_path); /* done with full path */
/* There is no need to validate the rel_path here, since it will be
* immediately validated by the call to git_blob_create_fromfile.
*/
/* write the blob to disk and get the oid */
if ((error = git_blob_create_fromworkdir(&oid, INDEX_OWNER(index), rel_path)) < 0)
/* write the blob to disk and get the oid and stat info */
error = git_blob__create_from_paths(
&oid, &st, INDEX_OWNER(index), NULL, rel_path, 0, true);
if (error < 0)
return error;
entry = git__calloc(1, sizeof(git_index_entry));
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment