Commit 28c1451a by Vicent Marti

tree: Fix name lookups once and for all

Double-pass binary search. Jeez.
parent 8cf2de07
...@@ -88,9 +88,6 @@ GIT_EXTERN(unsigned int) git_tree_entrycount(git_tree *tree); ...@@ -88,9 +88,6 @@ GIT_EXTERN(unsigned int) git_tree_entrycount(git_tree *tree);
/** /**
* Lookup a tree entry by its filename * Lookup a tree entry by its filename
* *
* Note that if the entry in the tree is a folder instead of
* a standard file, the given name must be ended with a slash.
*
* @param tree a previously loaded tree. * @param tree a previously loaded tree.
* @param filename the filename of the desired entry * @param filename the filename of the desired entry
* @return the tree entry; NULL if not found * @return the tree entry; NULL if not found
...@@ -206,9 +203,6 @@ GIT_EXTERN(void) git_treebuilder_free(git_treebuilder *bld); ...@@ -206,9 +203,6 @@ GIT_EXTERN(void) git_treebuilder_free(git_treebuilder *bld);
/** /**
* Get an entry from the builder from its filename * Get an entry from the builder from its filename
* *
* Note that if the entry in the tree is a folder instead of
* a standard file, the given name must be ended with a slash.
*
* The returned entry is owned by the builder and should * The returned entry is owned by the builder and should
* not be freed manually. * not be freed manually.
* *
......
...@@ -20,53 +20,104 @@ static int valid_attributes(const int attributes) ...@@ -20,53 +20,104 @@ static int valid_attributes(const int attributes)
return attributes >= 0 && attributes <= MAX_FILEMODE; return attributes >= 0 && attributes <= MAX_FILEMODE;
} }
static int valid_entry_name(const char *filename)
{
return strlen(filename) > 0 && strchr(filename, '/') == NULL;
}
static int entry_sort_cmp(const void *a, const void *b)
{
const git_tree_entry *entry_a = (const git_tree_entry *)(a);
const git_tree_entry *entry_b = (const git_tree_entry *)(b);
return git_futils_cmp_path(
entry_a->filename, entry_a->filename_len, entry_a->attr & 040000,
entry_b->filename, entry_b->filename_len, entry_b->attr & 040000);
}
struct tree_key_search { struct tree_key_search {
const char *filename; const char *filename;
size_t filename_len; size_t filename_len;
int is_folder;
}; };
static int entry_search_cmp(const void *key, const void *array_member) static int homing_search_cmp(const void *key, const void *array_member)
{ {
const struct tree_key_search *ksearch = key; const struct tree_key_search *ksearch = key;
const git_tree_entry *entry = array_member; const git_tree_entry *entry = array_member;
int result = const size_t len1 = ksearch->filename_len;
git_futils_cmp_path( const size_t len2 = entry->filename_len;
ksearch->filename, ksearch->filename_len, ksearch->is_folder,
entry->filename, entry->filename_len, entry->attr & 040000);
return result ? result : ((int)ksearch->filename_len - (int)entry->filename_len); return memcmp(
ksearch->filename,
entry->filename,
len1 < len2 ? len1 : len2
);
} }
static int entry_sort_cmp(const void *a, const void *b) /*
* Search for an entry in a given tree.
*
* Note that this search is performed in two steps because
* of the way tree entries are sorted internally in git:
*
* Entries in a tree are not sorted alphabetically; two entries
* with the same root prefix will have different positions
* depending on whether they are folders (subtrees) or normal files.
*
* Consequently, it is not possible to find an entry on the tree
* with a binary search if you don't know whether the filename
* you're looking for is a folder or a normal file.
*
* To work around this, we first perform a homing binary search
* on the tree, using the minimal length root prefix of our filename.
* Once the comparisons for this homing search start becoming
* ambiguous because of folder vs file sorting, we look linearly
* around the area for our target file.
*/
static int tree_key_search(git_vector *entries, const char *filename)
{ {
const git_tree_entry *entry_a = (const git_tree_entry *)(a); struct tree_key_search ksearch;
const git_tree_entry *entry_b = (const git_tree_entry *)(b); const git_tree_entry *entry;
return git_futils_cmp_path( int homing, i;
entry_a->filename, entry_a->filename_len, entry_a->attr & 040000,
entry_b->filename, entry_b->filename_len, entry_b->attr & 040000);
}
static int build_ksearch(struct tree_key_search *ksearch, const char *path) ksearch.filename = filename;
{ ksearch.filename_len = strlen(filename);
size_t len = strlen(path);
int is_folder = 0; /* Initial homing search; find an entry on the tree with
* the same prefix as the filename we're looking for */
homing = git_vector_bsearch2(entries, &homing_search_cmp, &ksearch);
if (homing < 0)
return homing;
if (len && path[len - 1] == '/') { /* We found a common prefix. Look forward as long as
is_folder = 1; * there are entries that share the common prefix */
len--; for (i = homing; i < (int)entries->length; ++i) {
entry = entries->contents[i];
if (homing_search_cmp(&ksearch, entry) != 0)
break;
if (strcmp(filename, entry->filename) == 0)
return i;
} }
if (len == 0 || memchr(path, '/', len) != NULL) /* If we haven't found our filename yet, look backwards
return GIT_ERROR; * too as long as we have entries with the same prefix */
for (i = homing - 1; i >= 0; --i) {
entry = entries->contents[i];
ksearch->filename = path; if (homing_search_cmp(&ksearch, entry) != 0)
ksearch->filename_len = len; break;
ksearch->is_folder = is_folder;
return GIT_SUCCESS; if (strcmp(filename, entry->filename) == 0)
return i;
}
/* The filename doesn't exist at all */
return GIT_ENOTFOUND;
} }
void git_tree__free(git_tree *tree) void git_tree__free(git_tree *tree)
...@@ -128,14 +179,10 @@ int git_tree_entry_2object(git_object **object_out, git_repository *repo, const ...@@ -128,14 +179,10 @@ int git_tree_entry_2object(git_object **object_out, git_repository *repo, const
const git_tree_entry *git_tree_entry_byname(git_tree *tree, const char *filename) const git_tree_entry *git_tree_entry_byname(git_tree *tree, const char *filename)
{ {
int idx; int idx;
struct tree_key_search ksearch;
assert(tree && filename); assert(tree && filename);
if (build_ksearch(&ksearch, filename) < GIT_SUCCESS) idx = tree_key_search(&tree->entries, filename);
return NULL;
idx = git_vector_bsearch2(&tree->entries, entry_search_cmp, &ksearch);
if (idx == GIT_ENOTFOUND) if (idx == GIT_ENOTFOUND)
return NULL; return NULL;
...@@ -415,21 +462,18 @@ int git_treebuilder_insert(git_tree_entry **entry_out, git_treebuilder *bld, con ...@@ -415,21 +462,18 @@ int git_treebuilder_insert(git_tree_entry **entry_out, git_treebuilder *bld, con
{ {
git_tree_entry *entry; git_tree_entry *entry;
int pos; int pos;
struct tree_key_search ksearch;
assert(bld && id && filename); assert(bld && id && filename);
if (!valid_attributes(attributes)) if (!valid_attributes(attributes))
return git__throw(GIT_ERROR, "Failed to insert entry. Invalid attributes"); return git__throw(GIT_ERROR, "Failed to insert entry. Invalid attributes");
if (build_ksearch(&ksearch, filename) < GIT_SUCCESS) if (!valid_entry_name(filename))
return git__throw(GIT_ERROR, "Failed to insert entry. Invalid filename '%s'", filename); return git__throw(GIT_ERROR, "Failed to insert entry. Invalid name for a tree entry");
if ((attributes & 040000) != 0) { pos = tree_key_search(&bld->entries, filename);
ksearch.is_folder = 1;
}
if ((pos = git_vector_bsearch2(&bld->entries, entry_search_cmp, &ksearch)) != GIT_ENOTFOUND) { if (pos >= 0) {
entry = git_vector_get(&bld->entries, pos); entry = git_vector_get(&bld->entries, pos);
if (entry->removed) { if (entry->removed) {
entry->removed = 0; entry->removed = 0;
...@@ -464,15 +508,11 @@ static git_tree_entry *treebuilder_get(git_treebuilder *bld, const char *filenam ...@@ -464,15 +508,11 @@ static git_tree_entry *treebuilder_get(git_treebuilder *bld, const char *filenam
{ {
int idx; int idx;
git_tree_entry *entry; git_tree_entry *entry;
struct tree_key_search ksearch;
assert(bld && filename); assert(bld && filename);
if (build_ksearch(&ksearch, filename) < GIT_SUCCESS) idx = tree_key_search(&bld->entries, filename);
return NULL; if (idx < 0)
idx = git_vector_bsearch2(&bld->entries, entry_search_cmp, &ksearch);
if (idx == GIT_ENOTFOUND)
return NULL; return NULL;
entry = git_vector_get(&bld->entries, idx); entry = git_vector_get(&bld->entries, idx);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment