Commit f2400a9c by Patrick Steinhardt

config_entries: Avoid excessive map operations

When appending config entries, we currently always first get the
currently existing map entry and then afterwards update the map to
contain the current config value. In the common scenario where keys
aren't being overridden, this is the best we can do. But in case a key
gets set multiple times, then we'll also perform these two map
operations. In extreme cases, hashing the map keys will thus start to
dominate performance.

Let's optimize the pattern by using a separately allocated map entry.
Currently, we always put the current list entry into the map and update
it to get any overridden multivar. As these list entries are also used
to iterate config entries, we cannot update them in-place in the map and
are thus forced to always set the map to contain the new entry. But with
a separately allocated map entry, we can now create one once per config
key and insert it into the map. Whenever appending a new config value
with the same key, we can now just update the map entry in-place instead
of having to replace the map entry completely.

This reduces calls to the hashing function by half and trades the
improved runtime for one more allocation per unique config key. Given
that the refactoring arguably improves code readability by splitting
concerns of the `config_entry_list` type and not having to track it in
two different structures, this alone would already be reason enough to
take the trade.

Given a pathological case of a gitconfig with 100.000 repeated keys and
a section of length 10.000 characters, this reduces runtime by half from
approximately 14 seconds to 7 seconds as expected.
parent ae30009e
...@@ -11,9 +11,13 @@ typedef struct config_entry_list { ...@@ -11,9 +11,13 @@ typedef struct config_entry_list {
struct config_entry_list *next; struct config_entry_list *next;
struct config_entry_list *last; struct config_entry_list *last;
git_config_entry *entry; git_config_entry *entry;
bool first;
} config_entry_list; } config_entry_list;
typedef struct {
git_config_entry *entry;
bool multivar;
} config_entry_map_head;
typedef struct config_entries_iterator { typedef struct config_entries_iterator {
git_config_iterator parent; git_config_iterator parent;
git_config_entries *entries; git_config_entries *entries;
...@@ -102,14 +106,16 @@ void git_config_entries_incref(git_config_entries *entries) ...@@ -102,14 +106,16 @@ void git_config_entries_incref(git_config_entries *entries)
static void config_entries_free(git_config_entries *entries) static void config_entries_free(git_config_entries *entries)
{ {
config_entry_list *list = NULL, *next; config_entry_list *list = NULL, *next;
config_entry_map_head *head;
git_strmap_foreach_value(entries->map, head,
git__free((char *) head->entry->name); git__free(head)
);
git_strmap_free(entries->map); git_strmap_free(entries->map);
list = entries->list; list = entries->list;
while (list != NULL) { while (list != NULL) {
next = list->next; next = list->next;
if (list->first)
git__free((char *) list->entry->name);
git__free((char *) list->entry->value); git__free((char *) list->entry->value);
git__free(list->entry); git__free(list->entry);
git__free(list); git__free(list);
...@@ -127,40 +133,42 @@ void git_config_entries_free(git_config_entries *entries) ...@@ -127,40 +133,42 @@ void git_config_entries_free(git_config_entries *entries)
int git_config_entries_append(git_config_entries *entries, git_config_entry *entry) int git_config_entries_append(git_config_entries *entries, git_config_entry *entry)
{ {
config_entry_list *existing, *head; config_entry_list *list_head;
config_entry_map_head *map_head;
head = git__calloc(1, sizeof(config_entry_list));
GIT_ERROR_CHECK_ALLOC(head);
head->entry = entry;
if ((map_head = git_strmap_get(entries->map, entry->name)) != NULL) {
map_head->multivar = true;
/* /*
* This is a micro-optimization for configuration files * This is a micro-optimization for configuration files
* with a lot of same keys. As for multivars the entry's * with a lot of same keys. As for multivars the entry's
* key will be the same for all entries, we can just free * key will be the same for all entries, we can just free
* all except the first entry's name and just re-use it. * all except the first entry's name and just re-use it.
*/ */
if ((existing = git_strmap_get(entries->map, entry->name)) != NULL) {
git__free((char *) entry->name); git__free((char *) entry->name);
entry->name = existing->entry->name; entry->name = map_head->entry->name;
} else { } else {
head->first = 1; map_head = git__calloc(1, sizeof(*map_head));
if ((git_strmap_set(entries->map, entry->name, map_head)) < 0)
return -1;
} }
map_head->entry = entry;
list_head = git__calloc(1, sizeof(config_entry_list));
GIT_ERROR_CHECK_ALLOC(list_head);
list_head->entry = entry;
if (entries->list) if (entries->list)
entries->list->last->next = head; entries->list->last->next = list_head;
else else
entries->list = head; entries->list = list_head;
entries->list->last = head; entries->list->last = list_head;
if (git_strmap_set(entries->map, entry->name, head) < 0)
return -1;
return 0; return 0;
} }
int git_config_entries_get(git_config_entry **out, git_config_entries *entries, const char *key) int git_config_entries_get(git_config_entry **out, git_config_entries *entries, const char *key)
{ {
config_entry_list *entry; config_entry_map_head *entry;
if ((entry = git_strmap_get(entries->map, key)) == NULL) if ((entry = git_strmap_get(entries->map, key)) == NULL)
return GIT_ENOTFOUND; return GIT_ENOTFOUND;
*out = entry->entry; *out = entry->entry;
...@@ -169,12 +177,12 @@ int git_config_entries_get(git_config_entry **out, git_config_entries *entries, ...@@ -169,12 +177,12 @@ int git_config_entries_get(git_config_entry **out, git_config_entries *entries,
int git_config_entries_get_unique(git_config_entry **out, git_config_entries *entries, const char *key) int git_config_entries_get_unique(git_config_entry **out, git_config_entries *entries, const char *key)
{ {
config_entry_list *entry; config_entry_map_head *entry;
if ((entry = git_strmap_get(entries->map, key)) == NULL) if ((entry = git_strmap_get(entries->map, key)) == NULL)
return GIT_ENOTFOUND; return GIT_ENOTFOUND;
if (!entry->first) { if (entry->multivar) {
git_error_set(GIT_ERROR_CONFIG, "entry is not unique due to being a multivar"); git_error_set(GIT_ERROR_CONFIG, "entry is not unique due to being a multivar");
return -1; return -1;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment