/*
 * Copyright (C) the libgit2 contributors. All rights reserved.
 *
 * This file is part of libgit2, distributed under the GNU GPL v2 with
 * a Linking Exception. For full terms see the included COPYING file.
 */

#include "indexer.h"

#include "git2/indexer.h"
#include "git2/object.h"

#include "commit.h"
#include "tree.h"
#include "tag.h"
#include "pack.h"
#include "mwindow.h"
#include "posix.h"
#include "pack.h"
#include "filebuf.h"
#include "oid.h"
#include "oidarray.h"
#include "oidmap.h"
#include "zstream.h"
#include "object.h"

extern git_mutex git__mwindow_mutex;

size_t git_indexer__max_objects = UINT32_MAX;

#define UINT31_MAX (0x7FFFFFFF)

struct entry {
	git_oid oid;
	uint32_t crc;
	uint32_t offset;
	uint64_t offset_long;
};

struct git_indexer {
	unsigned int parsed_header :1,
		pack_committed :1,
		have_stream :1,
		have_delta :1,
		do_fsync :1,
		do_verify :1;
	struct git_pack_header hdr;
	struct git_pack_file *pack;
	unsigned int mode;
	off64_t off;
	off64_t entry_start;
	git_object_t entry_type;
	git_buf entry_data;
	git_packfile_stream stream;
	size_t nr_objects;
	git_vector objects;
	git_vector deltas;
	unsigned int fanout[256];
	git_hash_ctx hash_ctx;
	git_oid hash;
	git_indexer_progress_cb progress_cb;
	void *progress_payload;
	char objbuf[8*1024];

	/* OIDs referenced from pack objects. Used for verification. */
	git_oidmap *expected_oids;

	/* Needed to look up objects which we want to inject to fix a thin pack */
	git_odb *odb;

	/* Fields for calculating the packfile trailer (hash of everything before it) */
	char inbuf[GIT_OID_RAWSZ];
	size_t inbuf_len;
	git_hash_ctx trailer;
};

struct delta_info {
	off64_t delta_off;
};

const git_oid *git_indexer_hash(const git_indexer *idx)
{
	return &idx->hash;
}

static int parse_header(struct git_pack_header *hdr, struct git_pack_file *pack)
{
	int error;
	git_map map;

	if ((error = p_mmap(&map, sizeof(*hdr), GIT_PROT_READ, GIT_MAP_SHARED, pack->mwf.fd, 0)) < 0)
		return error;

	memcpy(hdr, map.data, sizeof(*hdr));
	p_munmap(&map);

	/* Verify we recognize this pack file format. */
	if (hdr->hdr_signature != ntohl(PACK_SIGNATURE)) {
		git_error_set(GIT_ERROR_INDEXER, "wrong pack signature");
		return -1;
	}

	if (!pack_version_ok(hdr->hdr_version)) {
		git_error_set(GIT_ERROR_INDEXER, "wrong pack version");
		return -1;
	}

	return 0;
}

static int objects_cmp(const void *a, const void *b)
{
	const struct entry *entrya = a;
	const struct entry *entryb = b;

	return git_oid__cmp(&entrya->oid, &entryb->oid);
}

int git_indexer_options_init(git_indexer_options *opts, unsigned int version)
{
	GIT_INIT_STRUCTURE_FROM_TEMPLATE(
		opts, version, git_indexer_options, GIT_INDEXER_OPTIONS_INIT);
	return 0;
}

#ifndef GIT_DEPRECATE_HARD
int git_indexer_init_options(git_indexer_options *opts, unsigned int version)
{
	return git_indexer_options_init(opts, version);
}
#endif

int git_indexer_new(
		git_indexer **out,
		const char *prefix,
		unsigned int mode,
		git_odb *odb,
		git_indexer_options *in_opts)
{
	git_indexer_options opts = GIT_INDEXER_OPTIONS_INIT;
	git_indexer *idx;
	git_buf path = GIT_BUF_INIT, tmp_path = GIT_BUF_INIT;
	static const char suff[] = "/pack";
	int error, fd = -1;

	if (in_opts)
		memcpy(&opts, in_opts, sizeof(opts));

	idx = git__calloc(1, sizeof(git_indexer));
	GIT_ERROR_CHECK_ALLOC(idx);
	idx->odb = odb;
	idx->progress_cb = opts.progress_cb;
	idx->progress_payload = opts.progress_cb_payload;
	idx->mode = mode ? mode : GIT_PACK_FILE_MODE;
	git_buf_init(&idx->entry_data, 0);

	if ((error = git_hash_ctx_init(&idx->hash_ctx)) < 0 ||
	    (error = git_hash_ctx_init(&idx->trailer)) < 0 ||
	    (error = git_oidmap_new(&idx->expected_oids)) < 0)
		goto cleanup;

	idx->do_verify = opts.verify;

	if (git_repository__fsync_gitdir)
		idx->do_fsync = 1;

	error = git_buf_joinpath(&path, prefix, suff);
	if (error < 0)
		goto cleanup;

	fd = git_futils_mktmp(&tmp_path, git_buf_cstr(&path), idx->mode);
	git_buf_dispose(&path);
	if (fd < 0)
		goto cleanup;

	error = git_packfile_alloc(&idx->pack, git_buf_cstr(&tmp_path));
	git_buf_dispose(&tmp_path);

	if (error < 0)
		goto cleanup;

	idx->pack->mwf.fd = fd;
	if ((error = git_mwindow_file_register(&idx->pack->mwf)) < 0)
		goto cleanup;

	*out = idx;
	return 0;

cleanup:
	if (fd != -1)
		p_close(fd);

	if (git_buf_len(&tmp_path) > 0)
		p_unlink(git_buf_cstr(&tmp_path));

	if (idx->pack != NULL)
		p_unlink(idx->pack->pack_name);

	git_buf_dispose(&path);
	git_buf_dispose(&tmp_path);
	git__free(idx);
	return -1;
}

void git_indexer__set_fsync(git_indexer *idx, int do_fsync)
{
	idx->do_fsync = !!do_fsync;
}

/* Try to store the delta so we can try to resolve it later */
static int store_delta(git_indexer *idx)
{
	struct delta_info *delta;

	delta = git__calloc(1, sizeof(struct delta_info));
	GIT_ERROR_CHECK_ALLOC(delta);
	delta->delta_off = idx->entry_start;

	if (git_vector_insert(&idx->deltas, delta) < 0)
		return -1;

	return 0;
}

static int hash_header(git_hash_ctx *ctx, off64_t len, git_object_t type)
{
	char buffer[64];
	size_t hdrlen;
	int error;

	if ((error = git_odb__format_object_header(&hdrlen,
		buffer, sizeof(buffer), (size_t)len, type)) < 0)
		return error;

	return git_hash_update(ctx, buffer, hdrlen);
}

static int hash_object_stream(git_indexer*idx, git_packfile_stream *stream)
{
	ssize_t read;

	assert(idx && stream);

	do {
		if ((read = git_packfile_stream_read(stream, idx->objbuf, sizeof(idx->objbuf))) < 0)
			break;

		if (idx->do_verify)
			git_buf_put(&idx->entry_data, idx->objbuf, read);

		git_hash_update(&idx->hash_ctx, idx->objbuf, read);
	} while (read > 0);

	if (read < 0)
		return (int)read;

	return 0;
}

/* In order to create the packfile stream, we need to skip over the delta base description */
static int advance_delta_offset(git_indexer *idx, git_object_t type)
{
	git_mwindow *w = NULL;

	assert(type == GIT_OBJECT_REF_DELTA || type == GIT_OBJECT_OFS_DELTA);

	if (type == GIT_OBJECT_REF_DELTA) {
		idx->off += GIT_OID_RAWSZ;
	} else {
		off64_t base_off;
		int error = get_delta_base(&base_off, idx->pack, &w, &idx->off, type, idx->entry_start);
		git_mwindow_close(&w);
		if (error < 0)
			return error;
	}

	return 0;
}

/* Read from the stream and discard any output */
static int read_object_stream(git_indexer *idx, git_packfile_stream *stream)
{
	ssize_t read;

	assert(stream);

	do {
		read = git_packfile_stream_read(stream, idx->objbuf, sizeof(idx->objbuf));
	} while (read > 0);

	if (read < 0)
		return (int)read;

	return 0;
}

static int crc_object(uint32_t *crc_out, git_mwindow_file *mwf, off64_t start, off64_t size)
{
	void *ptr;
	uint32_t crc;
	unsigned int left, len;
	git_mwindow *w = NULL;

	crc = crc32(0L, Z_NULL, 0);
	while (size) {
		ptr = git_mwindow_open(mwf, &w, start, (size_t)size, &left);
		if (ptr == NULL)
			return -1;

		len = min(left, (unsigned int)size);
		crc = crc32(crc, ptr, len);
		size -= len;
		start += len;
		git_mwindow_close(&w);
	}

	*crc_out = htonl(crc);
	return 0;
}

static int add_expected_oid(git_indexer *idx, const git_oid *oid)
{
	/*
	 * If we know about that object because it is stored in our ODB or
	 * because we have already processed it as part of our pack file, we do
	 * not have to expect it.
	 */
	if ((!idx->odb || !git_odb_exists(idx->odb, oid)) &&
	    !git_oidmap_exists(idx->pack->idx_cache, oid) &&
	    !git_oidmap_exists(idx->expected_oids, oid)) {
		    git_oid *dup = git__malloc(sizeof(*oid));
		    GIT_ERROR_CHECK_ALLOC(dup);
		    git_oid_cpy(dup, oid);
		    return git_oidmap_set(idx->expected_oids, dup, dup);
	}

	return 0;
}

static int check_object_connectivity(git_indexer *idx, const git_rawobj *obj)
{
	git_object *object;
	git_oid *expected;
	int error;

	if (obj->type != GIT_OBJECT_BLOB &&
	    obj->type != GIT_OBJECT_TREE &&
	    obj->type != GIT_OBJECT_COMMIT &&
	    obj->type != GIT_OBJECT_TAG)
		return 0;

	if ((error = git_object__from_raw(&object, obj->data, obj->len, obj->type)) < 0)
		goto out;

	if ((expected = git_oidmap_get(idx->expected_oids, &object->cached.oid)) != NULL) {
		git_oidmap_delete(idx->expected_oids, &object->cached.oid);
		git__free(expected);
	}

	/*
	 * Check whether this is a known object. If so, we can just continue as
	 * we assume that the ODB has a complete graph.
	 */
	if (idx->odb && git_odb_exists(idx->odb, &object->cached.oid))
		return 0;

	switch (obj->type) {
		case GIT_OBJECT_TREE:
		{
			git_tree *tree = (git_tree *) object;
			git_tree_entry *entry;
			size_t i;

			git_array_foreach(tree->entries, i, entry)
				if (add_expected_oid(idx, entry->oid) < 0)
					goto out;

			break;
		}
		case GIT_OBJECT_COMMIT:
		{
			git_commit *commit = (git_commit *) object;
			git_oid *parent_oid;
			size_t i;

			git_array_foreach(commit->parent_ids, i, parent_oid)
				if (add_expected_oid(idx, parent_oid) < 0)
					goto out;

			if (add_expected_oid(idx, &commit->tree_id) < 0)
				goto out;

			break;
		}
		case GIT_OBJECT_TAG:
		{
			git_tag *tag = (git_tag *) object;

			if (add_expected_oid(idx, &tag->target) < 0)
				goto out;

			break;
		}
		case GIT_OBJECT_BLOB:
		default:
			break;
	}

out:
	git_object_free(object);

	return error;
}

static int store_object(git_indexer *idx)
{
	int i, error;
	git_oid oid;
	struct entry *entry;
	off64_t entry_size;
	struct git_pack_entry *pentry;
	off64_t entry_start = idx->entry_start;

	entry = git__calloc(1, sizeof(*entry));
	GIT_ERROR_CHECK_ALLOC(entry);

	pentry = git__calloc(1, sizeof(struct git_pack_entry));
	GIT_ERROR_CHECK_ALLOC(pentry);

	if (git_hash_final(&oid, &idx->hash_ctx)) {
		git__free(pentry);
		goto on_error;
	}
	entry_size = idx->off - entry_start;
	if (entry_start > UINT31_MAX) {
		entry->offset = UINT32_MAX;
		entry->offset_long = entry_start;
	} else {
		entry->offset = (uint32_t)entry_start;
	}

	if (idx->do_verify) {
		git_rawobj rawobj = {
		    idx->entry_data.ptr,
		    idx->entry_data.size,
		    idx->entry_type
		};

		if ((error = check_object_connectivity(idx, &rawobj)) < 0)
			goto on_error;
	}

	git_oid_cpy(&pentry->sha1, &oid);
	pentry->offset = entry_start;

	if (git_oidmap_exists(idx->pack->idx_cache, &pentry->sha1)) {
		git_error_set(GIT_ERROR_INDEXER, "duplicate object %s found in pack", git_oid_tostr_s(&pentry->sha1));
		git__free(pentry);
		goto on_error;
	}

	if ((error = git_oidmap_set(idx->pack->idx_cache, &pentry->sha1, pentry)) < 0) {
		git__free(pentry);
		git_error_set_oom();
		goto on_error;
	}

	git_oid_cpy(&entry->oid, &oid);

	if (crc_object(&entry->crc, &idx->pack->mwf, entry_start, entry_size) < 0)
		goto on_error;

	/* Add the object to the list */
	if (git_vector_insert(&idx->objects, entry) < 0)
		goto on_error;

	for (i = oid.id[0]; i < 256; ++i) {
		idx->fanout[i]++;
	}

	return 0;

on_error:
	git__free(entry);

	return -1;
}

GIT_INLINE(bool) has_entry(git_indexer *idx, git_oid *id)
{
	return git_oidmap_exists(idx->pack->idx_cache, id);
}

static int save_entry(git_indexer *idx, struct entry *entry, struct git_pack_entry *pentry, off64_t entry_start)
{
	int i;

	if (entry_start > UINT31_MAX) {
		entry->offset = UINT32_MAX;
		entry->offset_long = entry_start;
	} else {
		entry->offset = (uint32_t)entry_start;
	}

	pentry->offset = entry_start;

	if (git_oidmap_exists(idx->pack->idx_cache, &pentry->sha1) ||
	    git_oidmap_set(idx->pack->idx_cache, &pentry->sha1, pentry) < 0) {
		git_error_set(GIT_ERROR_INDEXER, "cannot insert object into pack");
		return -1;
	}

	/* Add the object to the list */
	if (git_vector_insert(&idx->objects, entry) < 0)
		return -1;

	for (i = entry->oid.id[0]; i < 256; ++i) {
		idx->fanout[i]++;
	}

	return 0;
}

static int hash_and_save(git_indexer *idx, git_rawobj *obj, off64_t entry_start)
{
	git_oid oid;
	size_t entry_size;
	struct entry *entry;
	struct git_pack_entry *pentry = NULL;

	entry = git__calloc(1, sizeof(*entry));
	GIT_ERROR_CHECK_ALLOC(entry);

	if (git_odb__hashobj(&oid, obj) < 0) {
		git_error_set(GIT_ERROR_INDEXER, "failed to hash object");
		goto on_error;
	}

	pentry = git__calloc(1, sizeof(struct git_pack_entry));
	GIT_ERROR_CHECK_ALLOC(pentry);

	git_oid_cpy(&pentry->sha1, &oid);
	git_oid_cpy(&entry->oid, &oid);
	entry->crc = crc32(0L, Z_NULL, 0);

	entry_size = (size_t)(idx->off - entry_start);
	if (crc_object(&entry->crc, &idx->pack->mwf, entry_start, entry_size) < 0)
		goto on_error;

	return save_entry(idx, entry, pentry, entry_start);

on_error:
	git__free(pentry);
	git__free(entry);
	git__free(obj->data);
	return -1;
}

static int do_progress_callback(git_indexer *idx, git_indexer_progress *stats)
{
	if (idx->progress_cb)
		return git_error_set_after_callback_function(
			idx->progress_cb(stats, idx->progress_payload),
			"indexer progress");
	return 0;
}

/* Hash everything but the last 20B of input */
static void hash_partially(git_indexer *idx, const uint8_t *data, size_t size)
{
	size_t to_expell, to_keep;

	if (size == 0)
		return;

	/* Easy case, dump the buffer and the data minus the last 20 bytes */
	if (size >= GIT_OID_RAWSZ) {
		git_hash_update(&idx->trailer, idx->inbuf, idx->inbuf_len);
		git_hash_update(&idx->trailer, data, size - GIT_OID_RAWSZ);

		data += size - GIT_OID_RAWSZ;
		memcpy(idx->inbuf, data, GIT_OID_RAWSZ);
		idx->inbuf_len = GIT_OID_RAWSZ;
		return;
	}

	/* We can just append */
	if (idx->inbuf_len + size <= GIT_OID_RAWSZ) {
		memcpy(idx->inbuf + idx->inbuf_len, data, size);
		idx->inbuf_len += size;
		return;
	}

	/* We need to partially drain the buffer and then append */
	to_keep   = GIT_OID_RAWSZ - size;
	to_expell = idx->inbuf_len - to_keep;

	git_hash_update(&idx->trailer, idx->inbuf, to_expell);

	memmove(idx->inbuf, idx->inbuf + to_expell, to_keep);
	memcpy(idx->inbuf + to_keep, data, size);
	idx->inbuf_len += size - to_expell;
}

static int write_at(git_indexer *idx, const void *data, off64_t offset, size_t size)
{
	git_file fd = idx->pack->mwf.fd;
	size_t mmap_alignment;
	size_t page_offset;
	off64_t page_start;
	unsigned char *map_data;
	git_map map;
	int error;

	assert(data && size);

	if ((error = git__mmap_alignment(&mmap_alignment)) < 0)
		return error;

	/* the offset needs to be at the mmap boundary for the platform */
	page_offset = offset % mmap_alignment;
	page_start = offset - page_offset;

	if ((error = p_mmap(&map, page_offset + size, GIT_PROT_WRITE, GIT_MAP_SHARED, fd, page_start)) < 0)
		return error;

	map_data = (unsigned char *)map.data;
	memcpy(map_data + page_offset, data, size);
	p_munmap(&map);

	return 0;
}

static int append_to_pack(git_indexer *idx, const void *data, size_t size)
{
	off64_t new_size;
	size_t mmap_alignment;
	size_t page_offset;
	off64_t page_start;
	off64_t current_size = idx->pack->mwf.size;
	int fd = idx->pack->mwf.fd;
	int error;

	if (!size)
		return 0;

	if ((error = git__mmap_alignment(&mmap_alignment)) < 0)
		return error;

	/* Write a single byte to force the file system to allocate space now or
	 * report an error, since we can't report errors when writing using mmap.
	 * Round the size up to the nearest page so that we only need to perform file
	 * I/O when we add a page, instead of whenever we write even a single byte. */
	new_size = current_size + size;
	page_offset = new_size % mmap_alignment;
	page_start = new_size - page_offset;

	if (p_lseek(fd, page_start + mmap_alignment - 1, SEEK_SET) < 0 ||
	    p_write(idx->pack->mwf.fd, data, 1) < 0) {
		git_error_set(GIT_ERROR_OS, "cannot extend packfile '%s'", idx->pack->pack_name);
		return -1;
	}

	return write_at(idx, data, idx->pack->mwf.size, size);
}

static int read_stream_object(git_indexer *idx, git_indexer_progress *stats)
{
	git_packfile_stream *stream = &idx->stream;
	off64_t entry_start = idx->off;
	size_t entry_size;
	git_object_t type;
	git_mwindow *w = NULL;
	int error;

	if (idx->pack->mwf.size <= idx->off + 20)
		return GIT_EBUFS;

	if (!idx->have_stream) {
		error = git_packfile_unpack_header(&entry_size, &type, &idx->pack->mwf, &w, &idx->off);
		if (error == GIT_EBUFS) {
			idx->off = entry_start;
			return error;
		}
		if (error < 0)
			return error;

		git_mwindow_close(&w);
		idx->entry_start = entry_start;
		git_hash_init(&idx->hash_ctx);
		git_buf_clear(&idx->entry_data);

		if (type == GIT_OBJECT_REF_DELTA || type == GIT_OBJECT_OFS_DELTA) {
			error = advance_delta_offset(idx, type);
			if (error == GIT_EBUFS) {
				idx->off = entry_start;
				return error;
			}
			if (error < 0)
				return error;

			idx->have_delta = 1;
		} else {
			idx->have_delta = 0;

			error = hash_header(&idx->hash_ctx, entry_size, type);
			if (error < 0)
				return error;
		}

		idx->have_stream = 1;
		idx->entry_type = type;

		error = git_packfile_stream_open(stream, idx->pack, idx->off);
		if (error < 0)
			return error;
	}

	if (idx->have_delta) {
		error = read_object_stream(idx, stream);
	} else {
		error = hash_object_stream(idx, stream);
	}

	idx->off = stream->curpos;
	if (error == GIT_EBUFS)
		return error;

	/* We want to free the stream reasorces no matter what here */
	idx->have_stream = 0;
	git_packfile_stream_dispose(stream);

	if (error < 0)
		return error;

	if (idx->have_delta) {
		error = store_delta(idx);
	} else {
		error = store_object(idx);
	}

	if (error < 0)
		return error;

	if (!idx->have_delta) {
		stats->indexed_objects++;
	}
	stats->received_objects++;

	if ((error = do_progress_callback(idx, stats)) != 0)
		return error;

	return 0;
}

int git_indexer_append(git_indexer *idx, const void *data, size_t size, git_indexer_progress *stats)
{
	int error = -1;
	struct git_pack_header *hdr = &idx->hdr;
	git_mwindow_file *mwf = &idx->pack->mwf;

	assert(idx && data && stats);

	if ((error = append_to_pack(idx, data, size)) < 0)
		return error;

	hash_partially(idx, data, (int)size);

	/* Make sure we set the new size of the pack */
	idx->pack->mwf.size += size;

	if (!idx->parsed_header) {
		unsigned int total_objects;

		if ((unsigned)idx->pack->mwf.size < sizeof(struct git_pack_header))
			return 0;

		if ((error = parse_header(&idx->hdr, idx->pack)) < 0)
			return error;

		idx->parsed_header = 1;
		idx->nr_objects = ntohl(hdr->hdr_entries);
		idx->off = sizeof(struct git_pack_header);

		if (idx->nr_objects <= git_indexer__max_objects) {
			total_objects = (unsigned int)idx->nr_objects;
		} else {
			git_error_set(GIT_ERROR_INDEXER, "too many objects");
			return -1;
		}

		if (git_oidmap_new(&idx->pack->idx_cache) < 0)
			return -1;

		idx->pack->has_cache = 1;
		if (git_vector_init(&idx->objects, total_objects, objects_cmp) < 0)
			return -1;

		if (git_vector_init(&idx->deltas, total_objects / 2, NULL) < 0)
			return -1;

		stats->received_objects = 0;
		stats->local_objects = 0;
		stats->total_deltas = 0;
		stats->indexed_deltas = 0;
		stats->indexed_objects = 0;
		stats->total_objects = total_objects;

		if ((error = do_progress_callback(idx, stats)) != 0)
			return error;
	}

	/* Now that we have data in the pack, let's try to parse it */

	/* As the file grows any windows we try to use will be out of date */
	git_mwindow_free_all(mwf);

	while (stats->indexed_objects < idx->nr_objects) {
		if ((error = read_stream_object(idx, stats)) != 0) {
			if (error == GIT_EBUFS)
				break;
			else
				goto on_error;
		}
	}

	return 0;

on_error:
	git_mwindow_free_all(mwf);
	return error;
}

static int index_path(git_buf *path, git_indexer *idx, const char *suffix)
{
	const char prefix[] = "pack-";
	size_t slash = (size_t)path->size;

	/* search backwards for '/' */
	while (slash > 0 && path->ptr[slash - 1] != '/')
		slash--;

	if (git_buf_grow(path, slash + 1 + strlen(prefix) +
					 GIT_OID_HEXSZ + strlen(suffix) + 1) < 0)
		return -1;

	git_buf_truncate(path, slash);
	git_buf_puts(path, prefix);
	git_oid_fmt(path->ptr + git_buf_len(path), &idx->hash);
	path->size += GIT_OID_HEXSZ;
	git_buf_puts(path, suffix);

	return git_buf_oom(path) ? -1 : 0;
}

/**
 * Rewind the packfile by the trailer, as we might need to fix the
 * packfile by injecting objects at the tail and must overwrite it.
 */
static void seek_back_trailer(git_indexer *idx)
{
	idx->pack->mwf.size -= GIT_OID_RAWSZ;
	git_mwindow_free_all(&idx->pack->mwf);
}

static int inject_object(git_indexer *idx, git_oid *id)
{
	git_odb_object *obj;
	struct entry *entry;
	struct git_pack_entry *pentry = NULL;
	git_oid foo = {{0}};
	unsigned char hdr[64];
	git_buf buf = GIT_BUF_INIT;
	off64_t entry_start;
	const void *data;
	size_t len, hdr_len;
	int error;

	seek_back_trailer(idx);
	entry_start = idx->pack->mwf.size;

	if (git_odb_read(&obj, idx->odb, id) < 0) {
		git_error_set(GIT_ERROR_INDEXER, "missing delta bases");
		return -1;
	}

	data = git_odb_object_data(obj);
	len = git_odb_object_size(obj);

	entry = git__calloc(1, sizeof(*entry));
	GIT_ERROR_CHECK_ALLOC(entry);

	entry->crc = crc32(0L, Z_NULL, 0);

	/* Write out the object header */
	hdr_len = git_packfile__object_header(hdr, len, git_odb_object_type(obj));
	if ((error = append_to_pack(idx, hdr, hdr_len)) < 0)
		goto cleanup;

	idx->pack->mwf.size += hdr_len;
	entry->crc = crc32(entry->crc, hdr, (uInt)hdr_len);

	if ((error = git_zstream_deflatebuf(&buf, data, len)) < 0)
		goto cleanup;

	/* And then the compressed object */
	if ((error = append_to_pack(idx, buf.ptr, buf.size)) < 0)
		goto cleanup;

	idx->pack->mwf.size += buf.size;
	entry->crc = htonl(crc32(entry->crc, (unsigned char *)buf.ptr, (uInt)buf.size));
	git_buf_dispose(&buf);

	/* Write a fake trailer so the pack functions play ball */

	if ((error = append_to_pack(idx, &foo, GIT_OID_RAWSZ)) < 0)
		goto cleanup;

	idx->pack->mwf.size += GIT_OID_RAWSZ;

	pentry = git__calloc(1, sizeof(struct git_pack_entry));
	GIT_ERROR_CHECK_ALLOC(pentry);

	git_oid_cpy(&pentry->sha1, id);
	git_oid_cpy(&entry->oid, id);
	idx->off = entry_start + hdr_len + len;

	error = save_entry(idx, entry, pentry, entry_start);

cleanup:
	if (error) {
		git__free(entry);
		git__free(pentry);
	}

	git_odb_object_free(obj);
	return error;
}

static int fix_thin_pack(git_indexer *idx, git_indexer_progress *stats)
{
	int error, found_ref_delta = 0;
	unsigned int i;
	struct delta_info *delta;
	size_t size;
	git_object_t type;
	git_mwindow *w = NULL;
	off64_t curpos = 0;
	unsigned char *base_info;
	unsigned int left = 0;
	git_oid base;

	assert(git_vector_length(&idx->deltas) > 0);

	if (idx->odb == NULL) {
		git_error_set(GIT_ERROR_INDEXER, "cannot fix a thin pack without an ODB");
		return -1;
	}

	/* Loop until we find the first REF delta */
	git_vector_foreach(&idx->deltas, i, delta) {
		if (!delta)
			continue;

		curpos = delta->delta_off;
		error = git_packfile_unpack_header(&size, &type, &idx->pack->mwf, &w, &curpos);
		if (error < 0)
			return error;

		if (type == GIT_OBJECT_REF_DELTA) {
			found_ref_delta = 1;
			break;
		}
	}

	if (!found_ref_delta) {
		git_error_set(GIT_ERROR_INDEXER, "no REF_DELTA found, cannot inject object");
		return -1;
	}

	/* curpos now points to the base information, which is an OID */
	base_info = git_mwindow_open(&idx->pack->mwf, &w, curpos, GIT_OID_RAWSZ, &left);
	if (base_info == NULL) {
		git_error_set(GIT_ERROR_INDEXER, "failed to map delta information");
		return -1;
	}

	git_oid_fromraw(&base, base_info);
	git_mwindow_close(&w);

	if (has_entry(idx, &base))
		return 0;

	if (inject_object(idx, &base) < 0)
		return -1;

	stats->local_objects++;

	return 0;
}

static int resolve_deltas(git_indexer *idx, git_indexer_progress *stats)
{
	unsigned int i;
	int error;
	struct delta_info *delta;
	int progressed = 0, non_null = 0, progress_cb_result;

	while (idx->deltas.length > 0) {
		progressed = 0;
		non_null = 0;
		git_vector_foreach(&idx->deltas, i, delta) {
			git_rawobj obj = {0};

			if (!delta)
				continue;

			non_null = 1;
			idx->off = delta->delta_off;
			if ((error = git_packfile_unpack(&obj, idx->pack, &idx->off)) < 0) {
				if (error == GIT_PASSTHROUGH) {
					/* We have not seen the base object, we'll try again later. */
					continue;
				}
				return -1;
			}

			if (idx->do_verify && check_object_connectivity(idx, &obj) < 0)
				/* TODO: error? continue? */
				continue;

			if (hash_and_save(idx, &obj, delta->delta_off) < 0)
				continue;

			git__free(obj.data);
			stats->indexed_objects++;
			stats->indexed_deltas++;
			progressed = 1;
			if ((progress_cb_result = do_progress_callback(idx, stats)) < 0)
				return progress_cb_result;

			/* remove from the list */
			git_vector_set(NULL, &idx->deltas, i, NULL);
			git__free(delta);
		}

		/* if none were actually set, we're done */
		if (!non_null)
			break;

		if (!progressed && (fix_thin_pack(idx, stats) < 0)) {
			return -1;
		}
	}

	return 0;
}

static int update_header_and_rehash(git_indexer *idx, git_indexer_progress *stats)
{
	void *ptr;
	size_t chunk = 1024*1024;
	off64_t hashed = 0;
	git_mwindow *w = NULL;
	git_mwindow_file *mwf;
	unsigned int left;

	mwf = &idx->pack->mwf;

	git_hash_init(&idx->trailer);


	/* Update the header to include the numer of local objects we injected */
	idx->hdr.hdr_entries = htonl(stats->total_objects + stats->local_objects);
	if (write_at(idx, &idx->hdr, 0, sizeof(struct git_pack_header)) < 0)
		return -1;

	/*
	 * We now use the same technique as before to determine the
	 * hash. We keep reading up to the end and let
	 * hash_partially() keep the existing trailer out of the
	 * calculation.
	 */
	git_mwindow_free_all(mwf);
	idx->inbuf_len = 0;
	while (hashed < mwf->size) {
		ptr = git_mwindow_open(mwf, &w, hashed, chunk, &left);
		if (ptr == NULL)
			return -1;

		hash_partially(idx, ptr, left);
		hashed += left;

		git_mwindow_close(&w);
	}

	return 0;
}

int git_indexer_commit(git_indexer *idx, git_indexer_progress *stats)
{
	git_mwindow *w = NULL;
	unsigned int i, long_offsets = 0, left;
	int error;
	struct git_pack_idx_header hdr;
	git_buf filename = GIT_BUF_INIT;
	struct entry *entry;
	git_oid trailer_hash, file_hash;
	git_filebuf index_file = {0};
	void *packfile_trailer;

	if (!idx->parsed_header) {
		git_error_set(GIT_ERROR_INDEXER, "incomplete pack header");
		return -1;
	}

	/* Test for this before resolve_deltas(), as it plays with idx->off */
	if (idx->off + 20 < idx->pack->mwf.size) {
		git_error_set(GIT_ERROR_INDEXER, "unexpected data at the end of the pack");
		return -1;
	}
	if (idx->off + 20 > idx->pack->mwf.size) {
		git_error_set(GIT_ERROR_INDEXER, "missing trailer at the end of the pack");
		return -1;
	}

	packfile_trailer = git_mwindow_open(&idx->pack->mwf, &w, idx->pack->mwf.size - GIT_OID_RAWSZ, GIT_OID_RAWSZ, &left);
	if (packfile_trailer == NULL) {
		git_mwindow_close(&w);
		goto on_error;
	}

	/* Compare the packfile trailer as it was sent to us and what we calculated */
	git_oid_fromraw(&file_hash, packfile_trailer);
	git_mwindow_close(&w);

	git_hash_final(&trailer_hash, &idx->trailer);
	if (git_oid_cmp(&file_hash, &trailer_hash)) {
		git_error_set(GIT_ERROR_INDEXER, "packfile trailer mismatch");
		return -1;
	}

	/* Freeze the number of deltas */
	stats->total_deltas = stats->total_objects - stats->indexed_objects;

	if ((error = resolve_deltas(idx, stats)) < 0)
		return error;

	if (stats->indexed_objects != stats->total_objects) {
		git_error_set(GIT_ERROR_INDEXER, "early EOF");
		return -1;
	}

	if (stats->local_objects > 0) {
		if (update_header_and_rehash(idx, stats) < 0)
			return -1;

		git_hash_final(&trailer_hash, &idx->trailer);
		write_at(idx, &trailer_hash, idx->pack->mwf.size - GIT_OID_RAWSZ, GIT_OID_RAWSZ);
	}

	/*
	 * Is the resulting graph fully connected or are we still
	 * missing some objects? In the second case, we can
	 * bail out due to an incomplete and thus corrupt
	 * packfile.
	 */
	if (git_oidmap_size(idx->expected_oids) > 0) {
		git_error_set(GIT_ERROR_INDEXER, "packfile is missing %"PRIuZ" objects",
			git_oidmap_size(idx->expected_oids));
		return -1;
	}

	git_vector_sort(&idx->objects);

	/* Use the trailer hash as the pack file name to ensure
	 * files with different contents have different names */
	git_oid_cpy(&idx->hash, &trailer_hash);

	git_buf_sets(&filename, idx->pack->pack_name);
	git_buf_shorten(&filename, strlen("pack"));
	git_buf_puts(&filename, "idx");
	if (git_buf_oom(&filename))
		return -1;

	if (git_filebuf_open(&index_file, filename.ptr,
		GIT_FILEBUF_HASH_CONTENTS |
		(idx->do_fsync ? GIT_FILEBUF_FSYNC : 0),
		idx->mode) < 0)
		goto on_error;

	/* Write out the header */
	hdr.idx_signature = htonl(PACK_IDX_SIGNATURE);
	hdr.idx_version = htonl(2);
	git_filebuf_write(&index_file, &hdr, sizeof(hdr));

	/* Write out the fanout table */
	for (i = 0; i < 256; ++i) {
		uint32_t n = htonl(idx->fanout[i]);
		git_filebuf_write(&index_file, &n, sizeof(n));
	}

	/* Write out the object names (SHA-1 hashes) */
	git_vector_foreach(&idx->objects, i, entry) {
		git_filebuf_write(&index_file, &entry->oid, sizeof(git_oid));
	}

	/* Write out the CRC32 values */
	git_vector_foreach(&idx->objects, i, entry) {
		git_filebuf_write(&index_file, &entry->crc, sizeof(uint32_t));
	}

	/* Write out the offsets */
	git_vector_foreach(&idx->objects, i, entry) {
		uint32_t n;

		if (entry->offset == UINT32_MAX)
			n = htonl(0x80000000 | long_offsets++);
		else
			n = htonl(entry->offset);

		git_filebuf_write(&index_file, &n, sizeof(uint32_t));
	}

	/* Write out the long offsets */
	git_vector_foreach(&idx->objects, i, entry) {
		uint32_t split[2];

		if (entry->offset != UINT32_MAX)
			continue;

		split[0] = htonl(entry->offset_long >> 32);
		split[1] = htonl(entry->offset_long & 0xffffffff);

		git_filebuf_write(&index_file, &split, sizeof(uint32_t) * 2);
	}

	/* Write out the packfile trailer to the index */
	if (git_filebuf_write(&index_file, &trailer_hash, GIT_OID_RAWSZ) < 0)
		goto on_error;

	/* Write out the hash of the idx */
	if (git_filebuf_hash(&trailer_hash, &index_file) < 0)
		goto on_error;

	git_filebuf_write(&index_file, &trailer_hash, sizeof(git_oid));

	/* Figure out what the final name should be */
	if (index_path(&filename, idx, ".idx") < 0)
		goto on_error;

	/* Commit file */
	if (git_filebuf_commit_at(&index_file, filename.ptr) < 0)
		goto on_error;

	git_mwindow_free_all(&idx->pack->mwf);

	/* Truncate file to undo rounding up to next page_size in append_to_pack */
	if (p_ftruncate(idx->pack->mwf.fd, idx->pack->mwf.size) < 0) {
		git_error_set(GIT_ERROR_OS, "failed to truncate pack file '%s'", idx->pack->pack_name);
		return -1;
	}

	if (idx->do_fsync && p_fsync(idx->pack->mwf.fd) < 0) {
		git_error_set(GIT_ERROR_OS, "failed to fsync packfile");
		goto on_error;
	}

	/* We need to close the descriptor here so Windows doesn't choke on commit_at */
	if (p_close(idx->pack->mwf.fd) < 0) {
		git_error_set(GIT_ERROR_OS, "failed to close packfile");
		goto on_error;
	}

	idx->pack->mwf.fd = -1;

	if (index_path(&filename, idx, ".pack") < 0)
		goto on_error;

	/* And don't forget to rename the packfile to its new place. */
	if (p_rename(idx->pack->pack_name, git_buf_cstr(&filename)) < 0)
		goto on_error;

	/* And fsync the parent directory if we're asked to. */
	if (idx->do_fsync &&
		git_futils_fsync_parent(git_buf_cstr(&filename)) < 0)
		goto on_error;

	idx->pack_committed = 1;

	git_buf_dispose(&filename);
	return 0;

on_error:
	git_mwindow_free_all(&idx->pack->mwf);
	git_filebuf_cleanup(&index_file);
	git_buf_dispose(&filename);
	return -1;
}

void git_indexer_free(git_indexer *idx)
{
	const git_oid *key;
	git_oid *value;
	size_t iter;

	if (idx == NULL)
		return;

	if (idx->have_stream)
		git_packfile_stream_dispose(&idx->stream);

	git_vector_free_deep(&idx->objects);

	if (idx->pack->idx_cache) {
		struct git_pack_entry *pentry;
		git_oidmap_foreach_value(idx->pack->idx_cache, pentry, {
			git__free(pentry);
		});

		git_oidmap_free(idx->pack->idx_cache);
	}

	git_vector_free_deep(&idx->deltas);

	if (!git_mutex_lock(&git__mwindow_mutex)) {
		if (!idx->pack_committed)
			git_packfile_close(idx->pack, true);

		git_packfile_free(idx->pack);
		git_mutex_unlock(&git__mwindow_mutex);
	}

	iter = 0;
	while (git_oidmap_iterate((void **) &value, idx->expected_oids, &iter, &key) == 0)
		git__free(value);

	git_hash_ctx_cleanup(&idx->trailer);
	git_hash_ctx_cleanup(&idx->hash_ctx);
	git_buf_dispose(&idx->entry_data);
	git_oidmap_free(idx->expected_oids);
	git__free(idx);
}