Commit 3bc95cfe by Edward Thomson Committed by GitHub

Merge pull request #4236 from pks-t/pks/index-v4-fixes

Fix path computations for compressed index entries
parents 6a13cf1e 92d3ea4e
......@@ -54,10 +54,6 @@ static int index_apply_to_wd_diff(git_index *index, int action, const git_strarr
unsigned int flags,
git_index_matched_path_cb cb, void *payload);
#define entry_size(type,len) ((offsetof(type, path) + (len) + 8) & ~7)
#define short_entry_size(len) entry_size(struct entry_short, len)
#define long_entry_size(len) entry_size(struct entry_long, len)
#define minimal_entry_size (offsetof(struct entry_short, path))
static const size_t INDEX_FOOTER_SIZE = GIT_OID_RAWSZ;
......@@ -2282,12 +2278,29 @@ out_err:
return 0;
}
static size_t index_entry_size(size_t path_len, size_t varint_len, uint32_t flags)
{
if (varint_len) {
if (flags & GIT_IDXENTRY_EXTENDED)
return offsetof(struct entry_long, path) + path_len + 1 + varint_len;
else
return offsetof(struct entry_short, path) + path_len + 1 + varint_len;
} else {
#define entry_size(type,len) ((offsetof(type, path) + (len) + 8) & ~7)
if (flags & GIT_IDXENTRY_EXTENDED)
return entry_size(struct entry_long, path_len);
else
return entry_size(struct entry_short, path_len);
#undef entry_size
}
}
static size_t read_entry(
git_index_entry **out,
git_index *index,
const void *buffer,
size_t buffer_size,
const char **last)
const char *last)
{
size_t path_length, entry_size;
const char *path_ptr;
......@@ -2344,35 +2357,34 @@ static size_t read_entry(
path_length = path_end - path_ptr;
}
if (entry.flags & GIT_IDXENTRY_EXTENDED)
entry_size = long_entry_size(path_length);
else
entry_size = short_entry_size(path_length);
if (INDEX_FOOTER_SIZE + entry_size > buffer_size)
return 0;
entry_size = index_entry_size(path_length, 0, entry.flags);
entry.path = (char *)path_ptr;
} else {
size_t varint_len;
size_t shared = git_decode_varint((const unsigned char *)path_ptr,
&varint_len);
size_t len = strlen(path_ptr + varint_len);
size_t last_len = strlen(*last);
size_t tmp_path_len;
size_t strip_len = git_decode_varint((const unsigned char *)path_ptr,
&varint_len);
size_t last_len = strlen(last);
size_t prefix_len = last_len - strip_len;
size_t suffix_len = strlen(path_ptr + varint_len);
size_t path_len;
if (varint_len == 0)
return index_error_invalid("incorrect prefix length");
GITERR_CHECK_ALLOC_ADD(&tmp_path_len, shared, len + 1);
tmp_path = git__malloc(tmp_path_len);
GITERR_CHECK_ALLOC_ADD(&path_len, prefix_len, suffix_len);
GITERR_CHECK_ALLOC_ADD(&path_len, path_len, 1);
tmp_path = git__malloc(path_len);
GITERR_CHECK_ALLOC(tmp_path);
memcpy(tmp_path, last, last_len);
memcpy(tmp_path + last_len, path_ptr + varint_len, len);
entry_size = long_entry_size(shared + len);
memcpy(tmp_path, last, prefix_len);
memcpy(tmp_path + prefix_len, path_ptr + varint_len, suffix_len + 1);
entry_size = index_entry_size(suffix_len, varint_len, entry.flags);
entry.path = tmp_path;
}
if (INDEX_FOOTER_SIZE + entry_size > buffer_size)
return 0;
if (index_entry_dup(out, index, &entry) < 0) {
git__free(tmp_path);
return 0;
......@@ -2445,7 +2457,7 @@ static int parse_index(git_index *index, const char *buffer, size_t buffer_size)
unsigned int i;
struct index_header header = { 0 };
git_oid checksum_calculated, checksum_expected;
const char **last = NULL;
const char *last = NULL;
const char *empty = "";
#define seek_forward(_increase) { \
......@@ -2469,7 +2481,7 @@ static int parse_index(git_index *index, const char *buffer, size_t buffer_size)
index->version = header.version;
if (index->version >= INDEX_VERSION_NUMBER_COMP)
last = &empty;
last = empty;
seek_forward(INDEX_HEADER_SIZE);
......@@ -2504,6 +2516,9 @@ static int parse_index(git_index *index, const char *buffer, size_t buffer_size)
}
error = 0;
if (index->version >= INDEX_VERSION_NUMBER_COMP)
last = entry->path;
seek_forward(entry_size);
}
......@@ -2574,11 +2589,12 @@ static bool is_index_extended(git_index *index)
return (extended > 0);
}
static int write_disk_entry(git_filebuf *file, git_index_entry *entry, const char **last)
static int write_disk_entry(git_filebuf *file, git_index_entry *entry, const char *last)
{
void *mem = NULL;
struct entry_short *ondisk;
size_t path_len, disk_size;
int varint_len = 0;
char *path;
const char *path_start = entry->path;
size_t same_len = 0;
......@@ -2586,7 +2602,7 @@ static int write_disk_entry(git_filebuf *file, git_index_entry *entry, const cha
path_len = ((struct entry_internal *)entry)->pathlen;
if (last) {
const char *last_c = *last;
const char *last_c = last;
while (*path_start == *last_c) {
if (!*path_start || !*last_c)
......@@ -2596,13 +2612,10 @@ static int write_disk_entry(git_filebuf *file, git_index_entry *entry, const cha
++same_len;
}
path_len -= same_len;
*last = entry->path;
varint_len = git_encode_varint(NULL, 0, same_len);
}
if (entry->flags & GIT_IDXENTRY_EXTENDED)
disk_size = long_entry_size(path_len);
else
disk_size = short_entry_size(path_len);
disk_size = index_entry_size(path_len, varint_len, entry->flags);
if (git_filebuf_reserve(file, &mem, disk_size) < 0)
return -1;
......@@ -2642,16 +2655,34 @@ static int write_disk_entry(git_filebuf *file, git_index_entry *entry, const cha
ondisk_ext->flags_extended = htons(entry->flags_extended &
GIT_IDXENTRY_EXTENDED_FLAGS);
path = ondisk_ext->path;
}
else
disk_size -= offsetof(struct entry_long, path);
} else {
path = ondisk->path;
disk_size -= offsetof(struct entry_short, path);
}
if (last) {
path += git_encode_varint((unsigned char *) path,
disk_size,
path_len - same_len);
varint_len = git_encode_varint((unsigned char *) path,
disk_size, same_len);
assert(varint_len > 0);
path += varint_len;
disk_size -= varint_len;
/*
* If using path compression, we are not allowed
* to have additional trailing NULs.
*/
assert(disk_size == path_len + 1);
} else {
/*
* If no path compression is used, we do have
* NULs as padding. As such, simply assert that
* we have enough space left to write the path.
*/
assert(disk_size > path_len);
}
memcpy(path, path_start, path_len);
memcpy(path, path_start, path_len + 1);
return 0;
}
......@@ -2662,8 +2693,7 @@ static int write_entries(git_index *index, git_filebuf *file)
size_t i;
git_vector case_sorted, *entries;
git_index_entry *entry;
const char **last = NULL;
const char *empty = "";
const char *last = NULL;
/* If index->entries is sorted case-insensitively, then we need
* to re-sort it case-sensitively before writing */
......@@ -2676,11 +2706,14 @@ static int write_entries(git_index *index, git_filebuf *file)
}
if (index->version >= INDEX_VERSION_NUMBER_COMP)
last = &empty;
last = "";
git_vector_foreach(entries, i, entry)
git_vector_foreach(entries, i, entry) {
if ((error = write_disk_entry(file, entry, last)) < 0)
break;
if (index->version >= INDEX_VERSION_NUMBER_COMP)
last = entry->path;
}
if (index->ignore_case)
git_vector_free(&case_sorted);
......
......@@ -36,7 +36,7 @@ int git_encode_varint(unsigned char *buf, size_t bufsize, uintmax_t value)
while (value >>= 7)
varint[--pos] = 128 | (--value & 127);
if (buf) {
if (bufsize < pos)
if (bufsize < (sizeof(varint) - pos))
return -1;
memcpy(buf, varint + pos, sizeof(varint) - pos);
}
......
......@@ -29,6 +29,9 @@ void test_core_encoding__encode(void)
cl_assert(git_encode_varint(buf, 100, 65) == 1);
cl_assert(buf[0] == 'A');
cl_assert(git_encode_varint(buf, 1, 1) == 1);
cl_assert(!memcmp(buf, "\x01", 1));
cl_assert(git_encode_varint(buf, 100, 267869656) == 4);
cl_assert(!memcmp(buf, "\xfe\xdc\xbaX", 4));
......
......@@ -3,39 +3,135 @@
static git_repository *g_repo = NULL;
void test_index_version__can_write_v4(void)
void test_index_version__cleanup(void)
{
cl_git_sandbox_cleanup();
g_repo = NULL;
}
void test_index_version__can_read_v4(void)
{
const char *paths[] = {
"file.tx", "file.txt", "file.txz", "foo", "zzz",
};
git_index *index;
const git_index_entry *entry;
size_t i;
g_repo = cl_git_sandbox_init("indexv4");
g_repo = cl_git_sandbox_init("filemodes");
cl_git_pass(git_repository_index(&index, g_repo));
cl_assert_equal_sz(git_index_entrycount(index), 5);
cl_assert(index->on_disk);
cl_assert(git_index_version(index) == 2);
for (i = 0; i < ARRAY_SIZE(paths); i++) {
const git_index_entry *entry =
git_index_get_bypath(index, paths[i], GIT_INDEX_STAGE_NORMAL);
cl_assert(git_index_entrycount(index) == 6);
cl_assert(entry != NULL);
}
git_index_free(index);
}
void test_index_version__can_write_v4(void)
{
const char *paths[] = {
"foo",
"foox",
"foobar",
"foobal",
"x",
"xz",
"xyzzyx"
};
git_index_entry entry;
git_index *index;
size_t i;
g_repo = cl_git_sandbox_init("empty_standard_repo");
cl_git_pass(git_repository_index(&index, g_repo));
cl_git_pass(git_index_set_version(index, 4));
for (i = 0; i < ARRAY_SIZE(paths); i++) {
memset(&entry, 0, sizeof(entry));
entry.path = paths[i];
entry.mode = GIT_FILEMODE_BLOB;
cl_git_pass(git_index_add_frombuffer(index, &entry, paths[i],
strlen(paths[i]) + 1));
}
cl_assert_equal_sz(git_index_entrycount(index), ARRAY_SIZE(paths));
cl_git_pass(git_index_write(index));
git_index_free(index);
cl_git_pass(git_repository_index(&index, g_repo));
cl_assert(git_index_version(index) == 4);
entry = git_index_get_bypath(index, "exec_off", 0);
cl_assert(entry);
entry = git_index_get_bypath(index, "exec_off2on_staged", 0);
cl_assert(entry);
entry = git_index_get_bypath(index, "exec_on", 0);
cl_assert(entry);
for (i = 0; i < ARRAY_SIZE(paths); i++) {
const git_index_entry *e;
cl_assert(e = git_index_get_bypath(index, paths[i], 0));
cl_assert_equal_s(paths[i], e->path);
}
git_index_free(index);
}
void test_index_version__cleanup(void)
void test_index_version__v4_uses_path_compression(void)
{
cl_git_sandbox_cleanup();
g_repo = NULL;
git_index_entry entry;
git_index *index;
char path[250], buf[1];
struct stat st;
char i, j;
memset(path, 'a', sizeof(path));
memset(buf, 'a', sizeof(buf));
memset(&entry, 0, sizeof(entry));
entry.path = path;
entry.mode = GIT_FILEMODE_BLOB;
g_repo = cl_git_sandbox_init("indexv4");
cl_git_pass(git_repository_index(&index, g_repo));
/* write 676 paths of 250 bytes length */
for (i = 'a'; i <= 'z'; i++) {
for (j = 'a'; j < 'z'; j++) {
path[ARRAY_SIZE(path) - 3] = i;
path[ARRAY_SIZE(path) - 2] = j;
path[ARRAY_SIZE(path) - 1] = '\0';
cl_git_pass(git_index_add_frombuffer(index, &entry, buf, sizeof(buf)));
}
}
cl_git_pass(git_index_write(index));
cl_git_pass(p_stat(git_index_path(index), &st));
/*
* Without path compression, the written paths would at
* least take
*
* (entries * pathlen) = len
* (676 * 250) = 169000
*
* bytes. As index v4 uses suffix-compression and our
* written paths only differ in the last two entries,
* this number will be much smaller, e.g.
*
* (1 * pathlen) + (675 * 2) = len
* 676 + 1350 = 2026
*
* bytes.
*
* Note that the above calculations do not include
* additional metadata of the index, e.g. OIDs or
* index extensions. Including those we get an index
* of approx. 200kB without compression and 40kB with
* compression. As this is a lot smaller than without
* compression, we can verify that path compression is
* used.
*/
cl_assert_(st.st_size < 75000, "path compression not enabled");
git_index_free(index);
}
[core]
repositoryformatversion = 0
filemode = true
bare = false
logallrefupdates = true
b0952dbb50bed5f01e03e31b296184cb183e54a7
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment