Commit 817c2820 by Vicent Marti

Rewrite all file IO for more performance

The new `git_filebuf` structure provides atomic high-performance writes
to disk by using a write cache, and optionally a double-buffered scheme
through a worker thread (not enabled yet).

Writes can be done 3-layered, like in git.git (user code -> write cache
-> disk), or 2-layered, by writing directly on the cache. This makes
index writing considerably faster.

The `git_filebuf` structure contains all the old functionality of
`git_filelock` for atomic file writes and reads. The `git_filelock`
structure has been removed.

Additionally, the `git_filebuf` API allows to automatically hash (SHA1)
all the data as it is written to disk (hashing is done smartly on big
chunks to improve performance).

Signed-off-by: Vicent Marti <tanoku@gmail.com>
parent 874c3b6f
/*
* This file is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License, version 2,
* as published by the Free Software Foundation.
*
* In addition to the permissions in the GNU General Public License,
* the authors give you unlimited permission to link the compiled
* version of this file into combinations with other programs,
* and to distribute those combinations without any restriction
* coming from the use of this file. (The General Public License
* restrictions do apply in other respects; for example, they cover
* modification of the file, and distribution when not linked into
* a combined executable.)
*
* This file is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; see the file COPYING. If not, write to
* the Free Software Foundation, 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#include "common.h"
#include "filebuf.h"
#include "fileops.h"
static const char *GIT_FILELOCK_EXTENSION = ".lock\0";
static const size_t GIT_FILELOCK_EXTLENGTH = 6;
static const size_t WRITE_BUFFER_SIZE = (4096 * 2);
static int lock_file(git_filebuf *file, int flags)
{
if (gitfo_exists(file->path_lock) == 0) {
if (flags & GIT_FILEBUF_FORCE)
gitfo_unlink(file->path_lock);
else
return GIT_EOSERR;
}
file->fd = gitfo_creat(file->path_lock, 0644);
if (file->fd < 0)
return GIT_EOSERR;
/* TODO: do a flock() in the descriptor file_lock */
if ((flags & GIT_FILEBUF_APPEND) && gitfo_exists(file->path_original) == 0) {
git_file source;
char buffer[2048];
size_t read_bytes;
source = gitfo_open(file->path_original, O_RDONLY);
if (source < 0)
return GIT_EOSERR;
while ((read_bytes = gitfo_read(source, buffer, 2048)) > 0) {
gitfo_write(file->fd, buffer, read_bytes);
if (file->digest)
git_hash_update(file->digest, buffer, read_bytes);
}
gitfo_close(source);
}
return GIT_SUCCESS;
}
void git_filebuf_cleanup(git_filebuf *file)
{
if (file->fd >= 0) {
gitfo_close(file->fd);
gitfo_unlink(file->path_lock);
}
if (file->digest)
git_hash_free_ctx(file->digest);
free(file->buffer);
#ifdef GIT_FILEBUF_THREADS
free(file->buffer_back);
#endif
free(file->path_original);
free(file->path_lock);
}
static int flush_buffer(git_filebuf *file)
{
int result = GIT_SUCCESS;
if (file->buf_pos > 0) {
result = gitfo_write(file->fd, file->buffer, file->buf_pos);
if (file->digest)
git_hash_update(file->digest, file->buffer, file->buf_pos);
file->buf_pos = 0;
}
return result;
}
int git_filebuf_open(git_filebuf *file, const char *path, int flags)
{
int error;
size_t path_len;
if (file == NULL || path == NULL)
return GIT_ERROR;
memset(file, 0x0, sizeof(git_filebuf));
file->buf_size = WRITE_BUFFER_SIZE;
file->buf_pos = 0;
file->fd = -1;
path_len = strlen(path);
file->path_original = git__strdup(path);
if (file->path_original == NULL) {
error = GIT_ENOMEM;
goto cleanup;
}
file->path_lock = git__malloc(path_len + GIT_FILELOCK_EXTLENGTH);
if (file->path_lock == NULL) {
error = GIT_ENOMEM;
goto cleanup;
}
memcpy(file->path_lock, file->path_original, path_len);
memcpy(file->path_lock + path_len, GIT_FILELOCK_EXTENSION, GIT_FILELOCK_EXTLENGTH);
file->buffer = git__malloc(file->buf_size);
if (file->buffer == NULL){
error = GIT_ENOMEM;
goto cleanup;
}
#ifdef GIT_FILEBUF_THREADS
file->buffer_back = git__malloc(file->buf_size);
if (file->buffer_back == NULL){
error = GIT_ENOMEM;
goto cleanup;
}
#endif
if (flags & GIT_FILEBUF_HASH_CONTENTS) {
if ((file->digest = git_hash_new_ctx()) == NULL) {
error = GIT_ENOMEM;
goto cleanup;
}
}
if ((error = lock_file(file, flags)) < GIT_SUCCESS)
goto cleanup;
return GIT_SUCCESS;
cleanup:
git_filebuf_cleanup(file);
return error;
}
int git_filebuf_hash(git_oid *oid, git_filebuf *file)
{
int error;
if (file->digest == NULL)
return GIT_ERROR;
if ((error = flush_buffer(file)) < GIT_SUCCESS)
return error;
git_hash_final(oid, file->digest);
git_hash_free_ctx(file->digest);
file->digest = NULL;
return GIT_SUCCESS;
}
int git_filebuf_commit(git_filebuf *file)
{
int error;
if ((error = flush_buffer(file)) < GIT_SUCCESS)
goto cleanup;
gitfo_close(file->fd);
error = gitfo_move_file(file->path_lock, file->path_original);
cleanup:
git_filebuf_cleanup(file);
return error;
}
GIT_INLINE(void) add_to_cache(git_filebuf *file, void *buf, size_t len)
{
memcpy(file->buffer + file->buf_pos, buf, len);
file->buf_pos += len;
}
int git_filebuf_write(git_filebuf *file, void *buff, size_t len)
{
int error;
unsigned char *buf = buff;
for (;;) {
size_t space_left = file->buf_size - file->buf_pos;
/* cache if it's small */
if (space_left > len) {
add_to_cache(file, buf, len);
return GIT_SUCCESS;
}
/* flush the cache if it doesn't fit */
if (file->buf_pos > 0) {
add_to_cache(file, buf, space_left);
if ((error = flush_buffer(file)) < GIT_SUCCESS)
return error;
len -= space_left;
buf += space_left;
}
/* write too-large chunks immediately */
if (len > file->buf_size) {
error = gitfo_write(file->fd, buf, len);
if (file->digest)
git_hash_update(file->digest, buf, len);
}
}
}
int git_filebuf_reserve(git_filebuf *file, void **buffer, size_t len)
{
int error;
size_t space_left = file->buf_size - file->buf_pos;
*buffer = NULL;
if (len > file->buf_size)
return GIT_ENOMEM;
if (space_left <= len) {
if ((error = flush_buffer(file)) < GIT_SUCCESS)
return error;
}
*buffer = (file->buffer + file->buf_pos);
file->buf_pos += len;
return GIT_SUCCESS;
}
#ifndef INCLUDE_filebuf_h__
#define INCLUDE_filebuf_h__
#include "fileops.h"
#include "hash.h"
#ifdef GIT_THREADS
# define GIT_FILEBUF_THREADS
#endif
#define GIT_FILEBUF_HASH_CONTENTS 0x1
#define GIT_FILEBUF_APPEND 0x2
#define GIT_FILEBUF_FORCE 0x4
struct git_filebuf {
char *path_original;
char *path_lock;
git_hash_ctx *digest;
unsigned char *buffer;
#ifdef GIT_FILEBUF_THREADS
unsigned char *buffer_back;
#endif
size_t buf_size, buf_pos;
git_file fd;
};
typedef struct git_filebuf git_filebuf;
int git_filebuf_write(git_filebuf *lock, void *buff, size_t len);
int git_filebuf_reserve(git_filebuf *file, void **buff, size_t len);
int git_filebuf_open(git_filebuf *lock, const char *path, int flags);
int git_filebuf_commit(git_filebuf *lock);
void git_filebuf_cleanup(git_filebuf *lock);
int git_filebuf_hash(git_oid *oid, git_filebuf *file);
#endif
/*
* This file is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License, version 2,
* as published by the Free Software Foundation.
*
* In addition to the permissions in the GNU General Public License,
* the authors give you unlimited permission to link the compiled
* version of this file into combinations with other programs,
* and to distribute those combinations without any restriction
* coming from the use of this file. (The General Public License
* restrictions do apply in other respects; for example, they cover
* modification of the file, and distribution when not linked into
* a combined executable.)
*
* This file is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; see the file COPYING. If not, write to
* the Free Software Foundation, 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#include "common.h"
#include "filelock.h"
#include "fileops.h"
static const char *GIT_FILELOCK_EXTENSION = ".lock\0";
static const size_t GIT_FILELOCK_EXTLENGTH = 6;
#define BUILD_PATH_LOCK(_lock, _path) { \
memcpy(_path, _lock->path, _lock->path_length); \
memcpy(_path + _lock->path_length, GIT_FILELOCK_EXTENSION,\
GIT_FILELOCK_EXTLENGTH);\
}
int git_filelock_init(git_filelock *lock, const char *path)
{
if (lock == NULL || path == NULL)
return GIT_ERROR;
memset(lock, 0x0, sizeof(git_filelock));
lock->path_length = strlen(path);
if (lock->path_length + GIT_FILELOCK_EXTLENGTH >= GIT_PATH_MAX)
return GIT_ERROR;
memcpy(lock->path, path, lock->path_length);
return GIT_SUCCESS;
}
int git_filelock_lock(git_filelock *lock, int append)
{
char path_lock[GIT_PATH_MAX];
BUILD_PATH_LOCK(lock, path_lock);
/* If file already exists, we cannot create a lock */
if (gitfo_exists(path_lock) == 0)
return GIT_EOSERR;
lock->file_lock = gitfo_creat(path_lock, 0666);
if (lock->file_lock < 0)
return GIT_EOSERR;
lock->is_locked = 1;
/* TODO: do a flock() in the descriptor file_lock */
if (append && gitfo_exists(lock->path) == 0) {
git_file source;
char buffer[2048];
size_t read_bytes;
source = gitfo_open(lock->path, O_RDONLY);
if (source < 0)
return GIT_EOSERR;
while ((read_bytes = gitfo_read(source, buffer, 2048)) > 0)
gitfo_write(lock->file_lock, buffer, read_bytes);
gitfo_close(source);
}
return GIT_SUCCESS;
}
void git_filelock_unlock(git_filelock *lock)
{
char path_lock[GIT_PATH_MAX];
BUILD_PATH_LOCK(lock, path_lock);
if (lock->is_locked) {
/* The flock() in lock->file_lock is removed
* automatically when closing the descriptor */
gitfo_close(lock->file_lock);
gitfo_unlink(path_lock);
lock->is_locked = 0;
}
}
int git_filelock_commit(git_filelock *lock)
{
int error;
char path_lock[GIT_PATH_MAX];
BUILD_PATH_LOCK(lock, path_lock);
if (!lock->is_locked || lock->file_lock < 0)
return GIT_ERROR;
/* FIXME: flush the descriptor? */
gitfo_close(lock->file_lock);
error = gitfo_move_file(path_lock, lock->path);
if (error < GIT_SUCCESS)
gitfo_unlink(path_lock);
lock->is_locked = 0;
return error;
}
int git_filelock_write(git_filelock *lock, const void *buffer, size_t length)
{
if (!lock->is_locked || lock->file_lock < 0)
return GIT_ERROR;
return gitfo_write(lock->file_lock, (void *)buffer, length);
}
#ifndef INCLUDE_filelock_h__
#define INCLUDE_filelock_h__
#include "fileops.h"
struct git_filelock {
char path[GIT_PATH_MAX];
size_t path_length;
git_file file_lock;
int is_locked;
};
typedef struct git_filelock git_filelock;
int git_filelock_init(git_filelock *lock, const char *path);
int git_filelock_lock(git_filelock *lock, int append);
void git_filelock_unlock(git_filelock *lock);
int git_filelock_commit(git_filelock *lock);
int git_filelock_write(git_filelock *lock, const void *buffer, size_t length);
#endif
......@@ -102,7 +102,7 @@ static git_index_tree *read_tree_internal(const char **, const char *, git_index
static int parse_index(git_index *index, const char *buffer, size_t buffer_size);
static void sort_index(git_index *index);
static int write_index(git_index *index, git_filelock *file);
static int write_index(git_index *index, git_filebuf *file);
int index_srch(const void *key, const void *array_member)
{
......@@ -255,25 +255,22 @@ int git_index_read(git_index *index)
int git_index_write(git_index *index)
{
git_filelock file;
git_filebuf file;
struct stat indexst;
int error;
if (!index->sorted)
sort_index(index);
if ((error = git_filelock_init(&file, index->index_file_path)) < GIT_SUCCESS)
return error;
if ((error = git_filelock_lock(&file, 0)) < GIT_SUCCESS)
if ((error = git_filebuf_open(&file, index->index_file_path, GIT_FILEBUF_HASH_CONTENTS)) < GIT_SUCCESS)
return error;
if ((error = write_index(index, &file)) < GIT_SUCCESS) {
git_filelock_unlock(&file);
git_filebuf_cleanup(&file);
return error;
}
if ((error = git_filelock_commit(&file)) < GIT_SUCCESS)
if ((error = git_filebuf_commit(&file)) < GIT_SUCCESS)
return error;
if (gitfo_stat(index->index_file_path, &indexst) == 0) {
......@@ -684,22 +681,23 @@ static int parse_index(git_index *index, const char *buffer, size_t buffer_size)
return GIT_SUCCESS;
}
static void *create_disk_entry(size_t *disk_size, git_index_entry *entry)
static int write_disk_entry(git_filebuf *file, git_index_entry *entry)
{
struct entry_short *ondisk;
size_t path_len;
size_t path_len, disk_size;
char *path;
path_len = strlen(entry->path);
if (entry->flags & GIT_IDXENTRY_EXTENDED)
*disk_size = long_entry_size(path_len);
disk_size = long_entry_size(path_len);
else
*disk_size = short_entry_size(path_len);
disk_size = short_entry_size(path_len);
ondisk = git__calloc(1, *disk_size);
if (ondisk == NULL)
return NULL;
if (git_filebuf_reserve(file, (void **)&ondisk, disk_size) < GIT_SUCCESS)
return GIT_ENOMEM;
memset(ondisk, 0x0, disk_size);
ondisk->ctime.seconds = htonl((unsigned long)entry->ctime.seconds);
ondisk->mtime.seconds = htonl((unsigned long)entry->mtime.seconds);
......@@ -727,265 +725,51 @@ static void *create_disk_entry(size_t *disk_size, git_index_entry *entry)
memcpy(path, entry->path, path_len);
return ondisk;
}
#if defined(GIT_THREADS) && defined(GIT_INDEX_THREADED)
#define THREAD_QUEUE_SIZE 64
typedef struct {
void *data;
size_t size;
git_refcnt refcount;
} index_thread_entry;
typedef struct {
void *extra_data;
void (*process_entry)(void *extra_data, index_thread_entry *entry);
index_thread_entry *buffer[THREAD_QUEUE_SIZE];
int count, read_pos, write_pos;
git_lck mutex;
git_cnd entry_available, space_available;
} index_thread_queue;
void index_thread_enqueue(index_thread_queue *queue, index_thread_entry *entry)
{
gitlck_lock(&queue->mutex);
if (queue->count == THREAD_QUEUE_SIZE)
gitcnd_wait(&queue->space_available, &queue->mutex);
queue->buffer[queue->write_pos++ % THREAD_QUEUE_SIZE] = entry;
queue->count++;
gitcnd_signal(&queue->entry_available);
gitlck_unlock(&queue->mutex);
}
void thread_hash_entry(void *digest, index_thread_entry *entry)
{
git_hash_update((git_hash_ctx *)digest, entry->data, entry->size);
}
void thread_write_entry(void *file, index_thread_entry *entry)
{
git_filelock_write((git_filelock *)file, entry->data, entry->size);
}
void *index_thread(void *attr)
{
index_thread_queue *queue = (index_thread_queue *)attr;
for (;;) {
index_thread_entry *entry;
gitlck_lock(&queue->mutex);
if (queue->count == 0)
gitcnd_wait(&queue->entry_available, &queue->mutex);
entry = queue->buffer[queue->read_pos++ % THREAD_QUEUE_SIZE];
queue->count--;
gitcnd_signal(&queue->space_available);
gitlck_unlock(&queue->mutex);
if (entry == NULL)
break;
queue->process_entry(queue->extra_data, entry);
if (gitrc_dec(&entry->refcount)) {
gitrc_free(&entry->refcount);
free(entry->data);
free(entry);
}
}
git_thread_exit(NULL);
}
static int write_entries(git_index *index, git_filelock *file, git_hash_ctx *digest)
{
git_thread write_thread, hash_thread;
index_thread_queue *write_queue, *hash_queue;
int error = GIT_SUCCESS;
unsigned int i;
write_queue = git__malloc(sizeof(index_thread_queue));
hash_queue = git__malloc(sizeof(index_thread_queue));
if (write_queue == NULL || hash_queue == NULL)
return GIT_ENOMEM;
/*
* Init the writer thread.
* This thread takes care of all the blocking I/O: reads
* the produced index entries and writes them back to disk
* via the filelock API.
*/
{
write_queue->extra_data = (void *)file;
write_queue->process_entry = thread_write_entry;
write_queue->count = 0;
write_queue->read_pos = 0;
write_queue->write_pos = 0;
gitlck_init(&write_queue->mutex);
gitcnd_init(&write_queue->space_available, NULL);
gitcnd_init(&write_queue->entry_available, NULL);
if (git_thread_create(&write_thread, NULL, index_thread, (void *)write_queue) < 0) {
error = GIT_EOSERR;
goto thread_error;
}
}
/*
* Init the hasher thread.
* This thread takes care of doing an incremental
* SHA1 hash on all the written data; the final value
* of this hash must be appended at the end of the
* written index file.
*/
{
hash_queue->extra_data = (void *)digest;
hash_queue->process_entry = thread_hash_entry;
hash_queue->count = 0;
hash_queue->read_pos = 0;
hash_queue->write_pos = 0;
gitlck_init(&hash_queue->mutex);
gitcnd_init(&hash_queue->space_available, NULL);
gitcnd_init(&hash_queue->entry_available, NULL);
if (git_thread_create(&hash_thread, NULL, index_thread, (void *)hash_queue) < 0) {
error = GIT_EOSERR;
goto thread_error;
}
}
/*
* Do the processing.
* This is the main thread. Takes care of preparing all
* the entries that will be written to disk
*/
for (i = 0; i < index->entries.length; ++i) {
git_index_entry *entry;
index_thread_entry *thread_entry;
entry = git_vector_get(&index->entries, i);
thread_entry = git__malloc(sizeof(index_thread_entry));
if (thread_entry == NULL) {
error = GIT_ENOMEM;
goto thread_error;
}
thread_entry->data = create_disk_entry(&thread_entry->size, entry);
if (thread_entry->data == NULL) {
error = GIT_ENOMEM;
goto thread_error;
}
/* queue in both queues */
gitrc_init(&thread_entry->refcount, 2);
index_thread_enqueue(write_queue, thread_entry);
index_thread_enqueue(hash_queue, thread_entry);
}
/* kill the two threads by queuing a NULL item */
{
index_thread_enqueue(write_queue, NULL);
index_thread_enqueue(hash_queue, NULL);
}
/* wait for them to terminate */
git_thread_join(write_thread, NULL);
git_thread_join(hash_thread, NULL);
free(write_queue);
free(hash_queue);
return GIT_SUCCESS;
thread_error:
git_thread_kill(write_thread);
git_thread_kill(hash_thread);
free(write_queue);
free(hash_queue);
return error;
}
#else
static int write_entries(git_index *index, git_filelock *file, git_hash_ctx *digest)
static int write_entries(git_index *index, git_filebuf *file)
{
unsigned int i;
for (i = 0; i < index->entries.length; ++i) {
git_index_entry *entry;
void *disk_entry;
size_t disk_size;
entry = git_vector_get(&index->entries, i);
disk_entry = create_disk_entry(&disk_size, entry);
if (disk_entry == NULL)
if (write_disk_entry(file, entry) < GIT_SUCCESS)
return GIT_ENOMEM;
if (git_filelock_write(file, disk_entry, disk_size) < GIT_SUCCESS)
return GIT_EOSERR;
git_hash_update(digest, disk_entry, disk_size);
free(disk_entry);
}
return GIT_SUCCESS;
}
#endif
static int write_index(git_index *index, git_filelock *file)
static int write_index(git_index *index, git_filebuf *file)
{
int error = GIT_SUCCESS;
git_hash_ctx *digest;
git_oid hash_final;
struct index_header header;
int is_extended = 1;
assert(index && file && file->is_locked);
if ((digest = git_hash_new_ctx()) == NULL)
return GIT_ENOMEM;
assert(index && file);
header.signature = htonl(INDEX_HEADER_SIG);
header.version = htonl(is_extended ? INDEX_VERSION_NUMBER : INDEX_VERSION_NUMBER_EXT);
header.entry_count = htonl(index->entries.length);
git_filelock_write(file, &header, sizeof(struct index_header));
git_hash_update(digest, &header, sizeof(struct index_header));
git_filebuf_write(file, &header, sizeof(struct index_header));
error = write_entries(index, file, digest);
error = write_entries(index, file);
if (error < GIT_SUCCESS)
goto cleanup;
return error;
/* TODO: write extensions (tree cache) */
git_hash_final(&hash_final, digest);
git_filelock_write(file, hash_final.id, GIT_OID_RAWSZ);
/* get out the hash for all the contents we've appended to the file */
git_filebuf_hash(&hash_final, file);
/* write it at the end of the file */
git_filebuf_write(file, hash_final.id, GIT_OID_RAWSZ);
cleanup:
git_hash_free_ctx(digest);
return error;
}
......@@ -2,7 +2,7 @@
#define INCLUDE_index_h__
#include "fileops.h"
#include "filelock.h"
#include "filebuf.h"
#include "vector.h"
#include "git2/odb.h"
#include "git2/index.h"
......
......@@ -511,7 +511,7 @@ int git_reference_resolve(git_reference **resolved_ref, git_reference *ref)
int git_reference_write(git_reference *ref)
{
git_filelock lock;
git_filebuf file;
char ref_path[GIT_PATH_MAX];
int error, contents_size;
char *ref_contents = NULL;
......@@ -528,10 +528,7 @@ int git_reference_write(git_reference *ref)
git__joinpath(ref_path, ref->owner->path_repository, ref->name);
if ((error = git_filelock_init(&lock, ref_path)) < GIT_SUCCESS)
goto error_cleanup;
if ((error = git_filelock_lock(&lock, 0)) < GIT_SUCCESS)
if ((error = git_filebuf_open(&file, ref_path, 0)) < GIT_SUCCESS)
goto error_cleanup;
if (ref->type == GIT_REF_OID) {
......@@ -560,20 +557,21 @@ int git_reference_write(git_reference *ref)
ref_contents[contents_size - 1] = '\n';
}
if ((error = git_filelock_write(&lock, ref_contents, contents_size)) < GIT_SUCCESS)
if ((error = git_filebuf_write(&file, ref_contents, contents_size)) < GIT_SUCCESS)
goto error_cleanup;
if ((error = git_filelock_commit(&lock)) < GIT_SUCCESS)
goto error_cleanup;
free(ref_contents);
error = git_filebuf_commit(&file);
if (error == GIT_SUCCESS)
ref->modified = 0;
free(ref_contents);
return GIT_SUCCESS;
return error;
error_cleanup:
free(ref_contents);
git_filelock_unlock(&lock);
git_filebuf_cleanup(&file);
return error;
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment