Commit afeecf4f by Vicent Marti

odb: Direct writes are back

DIRECT WRITES ARE BACK AND FASTER THAN EVER. The streaming writer to the
ODB was an overkill for the smaller objects like Commit and Tags; most
of the streaming logic was taking too long.

This commit makes Commits, Tags and Trees to be built-up in memory, and
then written to disk in 2 pushes (header + data), instead of streaming
everything.

This is *always* faster, even for big files (since the git_filebuf class
still does streaming writes when the memory cache overflows). This is
also a gazillion lines of code smaller, because we don't have to
precompute the final size of the object before starting the stream (this
was kind of defeating the point of streaming, anyway).

Blobs are still written with full streaming instead of loading them in
memory, since this is still the fastest way.

A new `git_buf` class has been added. It's missing some features, but
it'll get there.
parent 2fc78e70
#include "buffer.h"
#include "posix.h"
#include <stdarg.h>
#define ENSURE_SIZE(b, d) \
if ((ssize_t)(d) >= buf->asize && git_buf_grow(b, (d)) < GIT_SUCCESS)\
return;
int git_buf_grow(git_buf *buf, size_t target_size)
{
char *new_ptr;
if (buf->asize < 0)
return GIT_ENOMEM;
if (buf->asize == 0)
buf->asize = target_size;
/* grow the buffer size by 1.5, until it's big enough
* to fit our target size */
while (buf->asize < (int)target_size)
buf->asize = (buf->asize << 1) - (buf->asize >> 1);
new_ptr = git__realloc(buf->ptr, buf->asize);
if (!new_ptr) {
buf->asize = -1;
return GIT_ENOMEM;
}
buf->ptr = new_ptr;
return GIT_SUCCESS;
}
int git_buf_oom(const git_buf *buf)
{
return (buf->asize < 0);
}
void git_buf_putc(git_buf *buf, char c)
{
ENSURE_SIZE(buf, buf->size + 1);
buf->ptr[buf->size++] = c;
}
void git_buf_put(git_buf *buf, const char *data, size_t len)
{
ENSURE_SIZE(buf, buf->size + len);
memcpy(buf->ptr + buf->size, data, len);
buf->size += len;
}
void git_buf_puts(git_buf *buf, const char *string)
{
git_buf_put(buf, string, strlen(string));
}
void git_buf_printf(git_buf *buf, const char *format, ...)
{
int len;
va_list arglist;
ENSURE_SIZE(buf, buf->size + 1);
while (1) {
va_start(arglist, format);
len = p_vsnprintf(buf->ptr + buf->size, buf->asize - buf->size, format, arglist);
va_end(arglist);
if (len < 0) {
buf->asize = -1;
return;
}
if (len + 1 <= buf->asize - buf->size) {
buf->size += len;
return;
}
ENSURE_SIZE(buf, buf->size + len + 1);
}
}
const char *git_buf_cstr(git_buf *buf)
{
if (buf->size + 1 >= buf->asize && git_buf_grow(buf, buf->size + 1) < GIT_SUCCESS)
return NULL;
buf->ptr[buf->size] = '\0';
return buf->ptr;
}
void git_buf_free(git_buf *buf)
{
free(buf->ptr);
}
#ifndef INCLUDE_buffer_h__
#define INCLUDE_buffer_h__
#include "common.h"
typedef struct {
char *ptr;
ssize_t asize, size;
} git_buf;
#define GIT_BUF_INIT {NULL, 0, 0}
int git_buf_grow(git_buf *buf, size_t target_size);
int git_buf_oom(const git_buf *buf);
void git_buf_putc(git_buf *buf, char c);
void git_buf_put(git_buf *buf, const char *data, size_t len);
void git_buf_puts(git_buf *buf, const char *string);
void git_buf_printf(git_buf *buf, const char *format, ...) GIT_FORMAT_PRINTF(2, 3);
const char *git_buf_cstr(git_buf *buf);
void git_buf_free(git_buf *buf);
#define git_buf_PUTS(buf, str) git_buf_put(buf, str, sizeof(str) - 1)
#endif
...@@ -116,53 +116,36 @@ int git_commit_create( ...@@ -116,53 +116,36 @@ int git_commit_create(
int parent_count, int parent_count,
const git_commit *parents[]) const git_commit *parents[])
{ {
size_t final_size = 0; git_buf commit = GIT_BUF_INIT;
int message_length, author_length, committer_length;
char *author_str, *committer_str;
int error, i; int error, i;
git_odb_stream *stream;
message_length = strlen(message);
author_length = git_signature__write(&author_str, "author ", author);
committer_length = git_signature__write(&committer_str, "committer ", committer);
if (author_length < 0 || committer_length < 0)
return git__throw(GIT_EINVALIDARGS, "Cannot create commit. Failed to parse signature");
final_size += GIT_OID_LINE_LENGTH("tree");
final_size += GIT_OID_LINE_LENGTH("parent") * parent_count;
final_size += author_length;
final_size += committer_length;
final_size += 1 + message_length;
if ((error = git_odb_open_wstream(&stream, repo->db, final_size, GIT_OBJ_COMMIT)) < GIT_SUCCESS)
return git__rethrow(error, "Failed to create commit");
if (git_object_owner((const git_object *)tree) != repo) if (git_object_owner((const git_object *)tree) != repo)
return git__throw(GIT_EINVALIDARGS, "The given tree does not belong to this repository"); return git__throw(GIT_EINVALIDARGS, "The given tree does not belong to this repository");
git__write_oid(stream, "tree", git_object_id((const git_object *)tree)); git_oid__writebuf(&commit, "tree ", git_object_id((const git_object *)tree));
for (i = 0; i < parent_count; ++i) { for (i = 0; i < parent_count; ++i) {
if (git_object_owner((const git_object *)parents[i]) != repo) if (git_object_owner((const git_object *)parents[i]) != repo) {
return git__throw(GIT_EINVALIDARGS, "The given parent does not belong to this repository"); error = git__throw(GIT_EINVALIDARGS, "The given parent does not belong to this repository");
goto cleanup;
}
git__write_oid(stream, "parent", git_object_id((const git_object *)parents[i])); git_oid__writebuf(&commit, "parent ", git_object_id((const git_object *)parents[i]));
} }
stream->write(stream, author_str, author_length); git_signature__writebuf(&commit, "author ", author);
free(author_str); git_signature__writebuf(&commit, "committer ", committer);
stream->write(stream, committer_str, committer_length); git_buf_putc(&commit, '\n');
free(committer_str); git_buf_puts(&commit, message);
stream->write(stream, "\n", 1); if (git_buf_oom(&commit)) {
stream->write(stream, message, message_length); error = git__throw(GIT_ENOMEM, "Not enough memory to build the commit data");
goto cleanup;
}
error = stream->finalize_write(oid, stream); error = git_odb_write(oid, git_repository_database(repo), commit.ptr, commit.size, GIT_OBJ_COMMIT);
stream->free(stream); git_buf_free(&commit);
if (error == GIT_SUCCESS && update_ref != NULL) { if (error == GIT_SUCCESS && update_ref != NULL) {
git_reference *head; git_reference *head;
...@@ -192,6 +175,10 @@ int git_commit_create( ...@@ -192,6 +175,10 @@ int git_commit_create(
return git__rethrow(error, "Failed to create commit"); return git__rethrow(error, "Failed to create commit");
return GIT_SUCCESS; return GIT_SUCCESS;
cleanup:
git_buf_free(&commit);
return error;
} }
int commit_parse_buffer(git_commit *commit, const void *data, size_t len) int commit_parse_buffer(git_commit *commit, const void *data, size_t len)
......
...@@ -374,7 +374,7 @@ int git_filebuf_printf(git_filebuf *file, const char *format, ...) ...@@ -374,7 +374,7 @@ int git_filebuf_printf(git_filebuf *file, const char *format, ...)
if (len < 0) if (len < 0)
return git__throw(GIT_EOSERR, "Failed to format string"); return git__throw(GIT_EOSERR, "Failed to format string");
if ((size_t)len <= space_left) { if ((size_t)len + 1 <= space_left) {
file->buf_pos += len; file->buf_pos += len;
return GIT_SUCCESS; return GIT_SUCCESS;
} }
...@@ -384,7 +384,7 @@ int git_filebuf_printf(git_filebuf *file, const char *format, ...) ...@@ -384,7 +384,7 @@ int git_filebuf_printf(git_filebuf *file, const char *format, ...)
space_left = file->buf_size - file->buf_pos; space_left = file->buf_size - file->buf_pos;
} while ((size_t)len <= space_left); } while ((size_t)len + 1 <= space_left);
tmp_buffer = git__malloc(len + 1); tmp_buffer = git__malloc(len + 1);
if (!tmp_buffer) if (!tmp_buffer)
......
...@@ -41,7 +41,7 @@ typedef struct git_filebuf git_filebuf; ...@@ -41,7 +41,7 @@ typedef struct git_filebuf git_filebuf;
int git_filebuf_write(git_filebuf *lock, const void *buff, size_t len); int git_filebuf_write(git_filebuf *lock, const void *buff, size_t len);
int git_filebuf_reserve(git_filebuf *file, void **buff, size_t len); int git_filebuf_reserve(git_filebuf *file, void **buff, size_t len);
int git_filebuf_printf(git_filebuf *file, const char *format, ...); int git_filebuf_printf(git_filebuf *file, const char *format, ...) GIT_FORMAT_PRINTF(2, 3);
int git_filebuf_open(git_filebuf *lock, const char *path, int flags); int git_filebuf_open(git_filebuf *lock, const char *path, int flags);
int git_filebuf_commit(git_filebuf *lock); int git_filebuf_commit(git_filebuf *lock);
......
...@@ -772,6 +772,49 @@ int loose_backend__stream(git_odb_stream **stream_out, git_odb_backend *_backend ...@@ -772,6 +772,49 @@ int loose_backend__stream(git_odb_stream **stream_out, git_odb_backend *_backend
return GIT_SUCCESS; return GIT_SUCCESS;
} }
int loose_backend__write(git_oid *oid, git_odb_backend *_backend, const void *data, size_t len, git_otype type)
{
int error, header_len;
char final_path[GIT_PATH_MAX], header[64];
git_filebuf fbuf;
loose_backend *backend;
backend = (loose_backend *)_backend;
/* prepare the header for the file */
{
header_len = format_object_header(header, sizeof(header), len, type);
if (header_len < GIT_SUCCESS)
return GIT_EOBJCORRUPTED;
}
git_path_join(final_path, backend->objects_dir, "tmp_object");
error = git_filebuf_open(&fbuf, final_path,
GIT_FILEBUF_HASH_CONTENTS |
GIT_FILEBUF_DEFLATE_CONTENTS |
GIT_FILEBUF_TEMPORARY);
if (error < GIT_SUCCESS)
return error;
git_filebuf_write(&fbuf, header, header_len);
git_filebuf_write(&fbuf, data, len);
git_filebuf_hash(oid, &fbuf);
if ((error = object_file_name(final_path, sizeof(final_path), backend->objects_dir, oid)) < GIT_SUCCESS)
goto cleanup;
if ((error = git_futils_mkpath2file(final_path)) < GIT_SUCCESS)
goto cleanup;
return git_filebuf_commit_at(&fbuf, final_path);
cleanup:
git_filebuf_cleanup(&fbuf);
return error;
}
void loose_backend__free(git_odb_backend *_backend) void loose_backend__free(git_odb_backend *_backend)
{ {
loose_backend *backend; loose_backend *backend;
...@@ -800,6 +843,7 @@ int git_odb_backend_loose(git_odb_backend **backend_out, const char *objects_dir ...@@ -800,6 +843,7 @@ int git_odb_backend_loose(git_odb_backend **backend_out, const char *objects_dir
backend->fsync_object_files = 0; backend->fsync_object_files = 0;
backend->parent.read = &loose_backend__read; backend->parent.read = &loose_backend__read;
backend->parent.write = &loose_backend__write;
backend->parent.read_prefix = &loose_backend__read_prefix; backend->parent.read_prefix = &loose_backend__read_prefix;
backend->parent.read_header = &loose_backend__read_header; backend->parent.read_header = &loose_backend__read_header;
backend->parent.writestream = &loose_backend__stream; backend->parent.writestream = &loose_backend__stream;
......
...@@ -175,6 +175,16 @@ int git__write_oid(git_odb_stream *stream, const char *header, const git_oid *oi ...@@ -175,6 +175,16 @@ int git__write_oid(git_odb_stream *stream, const char *header, const git_oid *oi
return GIT_SUCCESS; return GIT_SUCCESS;
} }
void git_oid__writebuf(git_buf *buf, const char *header, const git_oid *oid)
{
char hex_oid[GIT_OID_HEXSZ];
git_oid_fmt(hex_oid, oid);
git_buf_puts(buf, header);
git_buf_put(buf, hex_oid, GIT_OID_HEXSZ);
git_buf_putc(buf, '\n');
}
void git_oid_fromraw(git_oid *out, const unsigned char *raw) void git_oid_fromraw(git_oid *out, const unsigned char *raw)
{ {
memcpy(out->id, raw, sizeof(out->id)); memcpy(out->id, raw, sizeof(out->id));
......
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
#include "index.h" #include "index.h"
#include "cache.h" #include "cache.h"
#include "refs.h" #include "refs.h"
#include "buffer.h"
#define DOT_GIT ".git" #define DOT_GIT ".git"
#define GIT_DIR DOT_GIT "/" #define GIT_DIR DOT_GIT "/"
...@@ -44,5 +45,6 @@ void git_object__free(void *object); ...@@ -44,5 +45,6 @@ void git_object__free(void *object);
int git__parse_oid(git_oid *oid, const char **buffer_out, const char *buffer_end, const char *header); int git__parse_oid(git_oid *oid, const char **buffer_out, const char *buffer_end, const char *header);
int git__write_oid(git_odb_stream *src, const char *header, const git_oid *oid); int git__write_oid(git_odb_stream *src, const char *header, const git_oid *oid);
void git_oid__writebuf(git_buf *buf, const char *header, const git_oid *oid);
#endif #endif
...@@ -342,4 +342,22 @@ int git_signature__write(char **signature, const char *header, const git_signatu ...@@ -342,4 +342,22 @@ int git_signature__write(char **signature, const char *header, const git_signatu
return sig_buffer_len; return sig_buffer_len;
} }
void git_signature__writebuf(git_buf *buf, const char *header, const git_signature *sig)
{
int offset, hours, mins;
char sign;
offset = sig->when.offset;
sign = (sig->when.offset < 0) ? '-' : '+';
if (offset < 0)
offset = -offset;
hours = offset / 60;
mins = offset % 60;
git_buf_printf(buf, "%s%s <%s> %u %c%02d%02d\n",
header ? header : "", sig->name, sig->email,
(unsigned)sig->when.time, sign, hours, mins);
}
...@@ -8,5 +8,6 @@ ...@@ -8,5 +8,6 @@
int git_signature__parse(git_signature *sig, const char **buffer_out, const char *buffer_end, const char *header); int git_signature__parse(git_signature *sig, const char **buffer_out, const char *buffer_end, const char *header);
int git_signature__write(char **signature, const char *header, const git_signature *sig); int git_signature__write(char **signature, const char *header, const git_signature *sig);
void git_signature__writebuf(git_buf *buf, const char *header, const git_signature *sig);
#endif #endif
...@@ -189,16 +189,10 @@ int git_tag_create( ...@@ -189,16 +189,10 @@ int git_tag_create(
const char *message, const char *message,
int allow_ref_overwrite) int allow_ref_overwrite)
{ {
size_t final_size = 0;
git_odb_stream *stream;
const char *type_str;
char *tagger_str;
git_reference *new_ref = NULL; git_reference *new_ref = NULL;
char ref_name[GIT_REFNAME_MAX]; char ref_name[GIT_REFNAME_MAX];
git_buf tag = GIT_BUF_INIT;
int type_str_len, tag_name_len, tagger_str_len, message_len;
int error, should_update_ref = 0; int error, should_update_ref = 0;
if (git_object_owner(target) != repo) if (git_object_owner(target) != repo)
...@@ -226,40 +220,20 @@ int git_tag_create( ...@@ -226,40 +220,20 @@ int git_tag_create(
} }
} }
type_str = git_object_type2string(git_object_type(target)); git_oid__writebuf(&tag, "object ", git_object_id(target));
tagger_str_len = git_signature__write(&tagger_str, "tagger ", tagger); git_buf_printf(&tag, "type %s\n", git_object_type2string(git_object_type(target)));
git_buf_printf(&tag, "tag %s\n", tag_name);
type_str_len = strlen(type_str); git_signature__writebuf(&tag, "tagger ", tagger);
tag_name_len = strlen(tag_name); git_buf_putc(&tag, '\n');
message_len = strlen(message); git_buf_puts(&tag, message);
final_size += GIT_OID_LINE_LENGTH("object");
final_size += STRLEN("type ") + type_str_len + 1;
final_size += STRLEN("tag ") + tag_name_len + 1;
final_size += tagger_str_len;
final_size += 1 + message_len;
if ((error = git_odb_open_wstream(&stream, repo->db, final_size, GIT_OBJ_TAG)) < GIT_SUCCESS)
return git__rethrow(error, "Failed to create tag");
git__write_oid(stream, "object", git_object_id(target));
stream->write(stream, "type ", STRLEN("type "));
stream->write(stream, type_str, type_str_len);
stream->write(stream, "\ntag ", STRLEN("\ntag "));
stream->write(stream, tag_name, tag_name_len);
stream->write(stream, "\n", 1);
stream->write(stream, tagger_str, tagger_str_len);
free(tagger_str);
stream->write(stream, "\n", 1);
stream->write(stream, message, message_len);
if (git_buf_oom(&tag)) {
git_buf_free(&tag);
return git__throw(GIT_ENOMEM, "Not enough memory to build the tag data");
}
error = stream->finalize_write(oid, stream); error = git_odb_write(oid, git_repository_database(repo), tag.ptr, tag.size, GIT_OBJ_TAG);
stream->free(stream); git_buf_free(&tag);
if (error < GIT_SUCCESS) if (error < GIT_SUCCESS)
return git__rethrow(error, "Failed to create tag"); return git__rethrow(error, "Failed to create tag");
......
...@@ -421,44 +421,35 @@ int git_treebuilder_remove(git_treebuilder *bld, const char *filename) ...@@ -421,44 +421,35 @@ int git_treebuilder_remove(git_treebuilder *bld, const char *filename)
int git_treebuilder_write(git_oid *oid, git_repository *repo, git_treebuilder *bld) int git_treebuilder_write(git_oid *oid, git_repository *repo, git_treebuilder *bld)
{ {
unsigned int i, size = 0; unsigned int i;
char filemode[MAX_FILEMODE_BYTES + 1 + 1];
git_odb_stream *stream;
int error; int error;
git_buf tree = GIT_BUF_INIT;
assert(bld); assert(bld);
sort_entries(bld); sort_entries(bld);
/* Grow the buffer beforehand to an estimated size */
git_buf_grow(&tree, bld->entries.length * 72);
for (i = 0; i < bld->entries.length; ++i) { for (i = 0; i < bld->entries.length; ++i) {
git_tree_entry *entry = bld->entries.contents[i]; git_tree_entry *entry = bld->entries.contents[i];
if (entry->removed) if (entry->removed)
continue; continue;
snprintf(filemode, sizeof(filemode), "%o ", entry->attr); git_buf_printf(&tree, "%o ", entry->attr);
size += strlen(filemode); git_buf_put(&tree, entry->filename, entry->filename_len + 1);
size += entry->filename_len + 1; git_buf_put(&tree, (char *)entry->oid.id, GIT_OID_RAWSZ);
size += GIT_OID_RAWSZ;
} }
if ((error = git_odb_open_wstream(&stream, git_repository_database(repo), size, GIT_OBJ_TREE)) < GIT_SUCCESS) if (git_buf_oom(&tree)) {
return git__rethrow(error, "Failed to write tree. Can't open write stream"); git_buf_free(&tree);
return git__throw(GIT_ENOMEM, "Not enough memory to build the tree data");
for (i = 0; i < bld->entries.length; ++i) {
git_tree_entry *entry = bld->entries.contents[i];
if (entry->removed)
continue;
snprintf(filemode, sizeof(filemode), "%o ", entry->attr);
stream->write(stream, filemode, strlen(filemode));
stream->write(stream, entry->filename, entry->filename_len + 1);
stream->write(stream, (char *)entry->oid.id, GIT_OID_RAWSZ);
} }
error = stream->finalize_write(oid, stream); error = git_odb_write(oid, git_repository_database(repo), tree.ptr, tree.size, GIT_OBJ_TREE);
stream->free(stream); git_buf_free(&tree);
return error == GIT_SUCCESS ? GIT_SUCCESS : git__rethrow(error, "Failed to write tree"); return error == GIT_SUCCESS ? GIT_SUCCESS : git__rethrow(error, "Failed to write tree");
} }
......
/*
* This file is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License, version 2,
* as published by the Free Software Foundation.
*
* In addition to the permissions in the GNU General Public License,
* the authors give you unlimited permission to link the compiled
* version of this file into combinations with other programs,
* and to distribute those combinations without any restriction
* coming from the use of this file. (The General Public License
* restrictions do apply in other respects; for example, they cover
* modification of the file, and distribution when not linked into
* a combined executable.)
*
* This file is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; see the file COPYING. If not, write to
* the Free Software Foundation, 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#include "test_lib.h"
#include "test_helpers.h"
#include <git2.h>
#include "buffer.h"
const char *test_string = "Have you seen that? Have you seeeen that??";
BEGIN_TEST(buf0, "check that resizing works properly")
git_buf buf = GIT_BUF_INIT;
git_buf_puts(&buf, test_string);
must_be_true(git_buf_oom(&buf) == 0);
must_be_true(strcmp(git_buf_cstr(&buf), test_string) == 0);
git_buf_puts(&buf, test_string);
must_be_true(strlen(git_buf_cstr(&buf)) == strlen(test_string) * 2);
END_TEST
BEGIN_TEST(buf1, "check that printf works properly")
git_buf buf = GIT_BUF_INIT;
git_buf_printf(&buf, "%s %s %d ", "shoop", "da", 23);
must_be_true(git_buf_oom(&buf) == 0);
must_be_true(strcmp(git_buf_cstr(&buf), "shoop da 23 ") == 0);
git_buf_printf(&buf, "%s %d", "woop", 42);
must_be_true(git_buf_oom(&buf) == 0);
must_be_true(strcmp(git_buf_cstr(&buf), "shoop da 23 woop 42") == 0);
END_TEST
BEGIN_SUITE(buffers)
ADD_TEST(buf0)
ADD_TEST(buf1)
END_SUITE
...@@ -44,6 +44,7 @@ DECLARE_SUITE(repository); ...@@ -44,6 +44,7 @@ DECLARE_SUITE(repository);
DECLARE_SUITE(threads); DECLARE_SUITE(threads);
DECLARE_SUITE(config); DECLARE_SUITE(config);
DECLARE_SUITE(remotes); DECLARE_SUITE(remotes);
DECLARE_SUITE(buffers);
static libgit2_suite suite_methods[]= { static libgit2_suite suite_methods[]= {
SUITE_NAME(core), SUITE_NAME(core),
...@@ -61,6 +62,7 @@ static libgit2_suite suite_methods[]= { ...@@ -61,6 +62,7 @@ static libgit2_suite suite_methods[]= {
SUITE_NAME(threads), SUITE_NAME(threads),
SUITE_NAME(config), SUITE_NAME(config),
SUITE_NAME(remotes), SUITE_NAME(remotes),
SUITE_NAME(buffers),
}; };
#define GIT_SUITE_COUNT (ARRAY_SIZE(suite_methods)) #define GIT_SUITE_COUNT (ARRAY_SIZE(suite_methods))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment