Commit b7c44096 by Carlos Martín Nieto

Implement the indexer

Only v2 index files are supported.

Signed-off-by: Carlos Martín Nieto <carlos@cmartin.tk>
parent bcf21c55
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
#define _INCLUDE_git_indexer_h__ #define _INCLUDE_git_indexer_h__
#include "git2/common.h" #include "git2/common.h"
#include "git2/oid.h"
typedef struct git_indexer_stats { typedef struct git_indexer_stats {
unsigned int total; unsigned int total;
...@@ -12,7 +13,8 @@ typedef struct git_indexer_stats { ...@@ -12,7 +13,8 @@ typedef struct git_indexer_stats {
typedef struct git_indexer git_indexer; typedef struct git_indexer git_indexer;
GIT_EXTERN(int) git_indexer_new(git_indexer **out, const char *packname); GIT_EXTERN(int) git_indexer_new(git_indexer **out, const char *packname);
GIT_EXTERN(int) git_indexer_run(git_indexer *idx, int (*cb)(const git_indexer_stats *, void *), void *data); GIT_EXTERN(int) git_indexer_run(git_indexer *idx, git_indexer_stats *stats);
GIT_EXTERN(const git_oid *) git_indexer_result(git_indexer *idx);
GIT_EXTERN(void) git_indexer_free(git_indexer *idx); GIT_EXTERN(void) git_indexer_free(git_indexer *idx);
......
...@@ -26,14 +26,20 @@ ...@@ -26,14 +26,20 @@
#include "git2/indexer.h" #include "git2/indexer.h"
#include "git2/object.h" #include "git2/object.h"
#include "git2/zlib.h" #include "git2/zlib.h"
#include "git2/oid.h"
#include "common.h" #include "common.h"
#include "pack.h" #include "pack.h"
#include "mwindow.h" #include "mwindow.h"
#include "posix.h" #include "posix.h"
#include "pack.h"
#include "filebuf.h"
#include "sha1.h"
#define UINT31_MAX (0x7FFFFFFF)
struct entry { struct entry {
unsigned char sha[GIT_OID_RAWSZ]; git_oid oid;
uint32_t crc; uint32_t crc;
uint32_t offset; uint32_t offset;
uint64_t offset_long; uint64_t offset_long;
...@@ -42,11 +48,19 @@ struct entry { ...@@ -42,11 +48,19 @@ struct entry {
typedef struct git_indexer { typedef struct git_indexer {
struct git_pack_file *pack; struct git_pack_file *pack;
struct stat st; struct stat st;
git_indexer_stats stats;
struct git_pack_header hdr; struct git_pack_header hdr;
struct entry *objects; size_t nr_objects;
git_vector objects;
git_filebuf file;
unsigned int fanout[256];
git_oid hash;
} git_indexer; } git_indexer;
const git_oid *git_indexer_hash(git_indexer *idx)
{
return &idx->hash;
}
static int parse_header(git_indexer *idx) static int parse_header(git_indexer *idx)
{ {
int error; int error;
...@@ -55,7 +69,7 @@ static int parse_header(git_indexer *idx) ...@@ -55,7 +69,7 @@ static int parse_header(git_indexer *idx)
if ((error = p_read(idx->pack->mwf.fd, &idx->hdr, sizeof(idx->hdr))) < GIT_SUCCESS) if ((error = p_read(idx->pack->mwf.fd, &idx->hdr, sizeof(idx->hdr))) < GIT_SUCCESS)
return git__rethrow(error, "Failed to read in pack header"); return git__rethrow(error, "Failed to read in pack header");
if (idx->hdr.hdr_signature != htonl(PACK_SIGNATURE)) if (idx->hdr.hdr_signature != ntohl(PACK_SIGNATURE))
return git__throw(GIT_EOBJCORRUPTED, "Wrong pack signature"); return git__throw(GIT_EOBJCORRUPTED, "Wrong pack signature");
if (!pack_version_ok(idx->hdr.hdr_version)) if (!pack_version_ok(idx->hdr.hdr_version))
...@@ -65,12 +79,23 @@ static int parse_header(git_indexer *idx) ...@@ -65,12 +79,23 @@ static int parse_header(git_indexer *idx)
return GIT_SUCCESS; return GIT_SUCCESS;
} }
int objects_cmp(const void *a, const void *b)
{
const struct entry *entrya = a;
const struct entry *entryb = b;
return git_oid_cmp(&entrya->oid, &entryb->oid);
}
int git_indexer_new(git_indexer **out, const char *packname) int git_indexer_new(git_indexer **out, const char *packname)
{ {
git_indexer *idx; git_indexer *idx;
unsigned int namelen; unsigned int namelen;
int ret, error; int ret, error;
if (git_path_root(packname) < 0)
return git__throw(GIT_EINVALIDPATH, "Path is not absolute");
idx = git__malloc(sizeof(git_indexer)); idx = git__malloc(sizeof(git_indexer));
if (idx == NULL) if (idx == NULL)
return GIT_ENOMEM; return GIT_ENOMEM;
...@@ -83,7 +108,7 @@ int git_indexer_new(git_indexer **out, const char *packname) ...@@ -83,7 +108,7 @@ int git_indexer_new(git_indexer **out, const char *packname)
goto cleanup; goto cleanup;
memset(idx->pack, 0x0, sizeof(struct git_pack_file)); memset(idx->pack, 0x0, sizeof(struct git_pack_file));
memcpy(idx->pack->pack_name, packname, namelen); memcpy(idx->pack->pack_name, packname, namelen + 1);
ret = p_stat(packname, &idx->st); ret = p_stat(packname, &idx->st);
if (ret < 0) { if (ret < 0) {
...@@ -102,6 +127,7 @@ int git_indexer_new(git_indexer **out, const char *packname) ...@@ -102,6 +127,7 @@ int git_indexer_new(git_indexer **out, const char *packname)
} }
idx->pack->mwf.fd = ret; idx->pack->mwf.fd = ret;
idx->pack->mwf.size = idx->st.st_size;
error = parse_header(idx); error = parse_header(idx);
if (error < GIT_SUCCESS) { if (error < GIT_SUCCESS) {
...@@ -109,61 +135,187 @@ int git_indexer_new(git_indexer **out, const char *packname) ...@@ -109,61 +135,187 @@ int git_indexer_new(git_indexer **out, const char *packname)
goto cleanup; goto cleanup;
} }
idx->objects = git__calloc(sizeof(struct entry), idx->hdr.hdr_entries); idx->nr_objects = ntohl(idx->hdr.hdr_entries);
if (idx->objects == NULL) {
error = GIT_ENOMEM; error = git_vector_init(&idx->objects, idx->nr_objects, objects_cmp);
if (error < GIT_SUCCESS) {
goto cleanup; goto cleanup;
} }
idx->stats.total = idx->hdr.hdr_entries;
*out = idx; *out = idx;
return GIT_SUCCESS; return GIT_SUCCESS;
cleanup: cleanup:
free(idx->pack); git_indexer_free(idx);
free(idx);
return error; return error;
} }
/* static void index_path(char *path, git_indexer *idx)
* Create the index. Every time something interesting happens {
* (something has been parse or resolved), the callback gets called char *ptr;
* with some stats so it can tell the user how hard we're working const char prefix[] = "pack-", suffix[] = ".idx\0";
*/
int git_indexer_run(git_indexer *idx, int (*cb)(const git_indexer_stats *, void *), void *cb_data) ptr = strrchr(path, '/') + 1;
memcpy(ptr, prefix, STRLEN(prefix));
ptr += STRLEN(prefix);
git_oid_fmt(ptr, &idx->hash);
ptr += GIT_OID_HEXSZ;
memcpy(ptr, suffix, STRLEN(suffix));
}
static int write_index(git_indexer *idx)
{ {
git_mwindow_file *mwf = &idx->pack->mwf; git_mwindow *w = NULL;
off_t off = 0; int error, namelen;
unsigned int i, long_offsets, left;
struct git_pack_idx_header hdr;
char filename[GIT_PATH_MAX];
struct entry *entry;
void *packfile_hash;
git_oid file_hash;
SHA_CTX ctx;
git_vector_sort(&idx->objects);
namelen = strlen(idx->pack->pack_name);
memcpy(filename, idx->pack->pack_name, namelen);
memcpy(filename + namelen - STRLEN("pack"), "idx\0", STRLEN("idx\0"));
error = git_filebuf_open(&idx->file, filename, GIT_FILEBUF_HASH_CONTENTS);
/* Write out the header */
hdr.idx_signature = htonl(PACK_IDX_SIGNATURE);
hdr.idx_version = htonl(2);
error = git_filebuf_write(&idx->file, &hdr, sizeof(hdr));
/* Write out the fanout table */
for (i = 0; i < 256; ++i) {
uint32_t n = htonl(idx->fanout[i]);
error = git_filebuf_write(&idx->file, &n, sizeof(n));
if (error < GIT_SUCCESS)
goto cleanup;
}
/* Write out the object names (SHA-1 hashes) */
SHA1_Init(&ctx);
git_vector_foreach(&idx->objects, i, entry) {
error = git_filebuf_write(&idx->file, &entry->oid, sizeof(git_oid));
SHA1_Update(&ctx, &entry->oid, GIT_OID_RAWSZ);
if (error < GIT_SUCCESS)
goto cleanup;
}
SHA1_Final(idx->hash.id, &ctx);
/* Write out the CRC32 values */
git_vector_foreach(&idx->objects, i, entry) {
error = git_filebuf_write(&idx->file, &entry->crc, sizeof(uint32_t));
if (error < GIT_SUCCESS)
goto cleanup;
}
/* Write out the offsets */
git_vector_foreach(&idx->objects, i, entry) {
uint32_t n;
if (entry->offset == UINT32_MAX)
n = htonl(0x80000000 | long_offsets++);
else
n = htonl(entry->offset);
error = git_filebuf_write(&idx->file, &n, sizeof(uint32_t));
if (error < GIT_SUCCESS)
goto cleanup;
}
/* Write out the long offsets */
git_vector_foreach(&idx->objects, i, entry) {
uint32_t split[2];
if (entry->offset != UINT32_MAX)
continue;
split[0] = htonl(entry->offset_long >> 32);
split[1] = htonl(entry->offset_long & 0xffffffff);
error = git_filebuf_write(&idx->file, &split, sizeof(uint32_t) * 2);
if (error < GIT_SUCCESS)
goto cleanup;
}
/* Write out the packfile trailer */
packfile_hash = git_mwindow_open(&idx->pack->mwf, &w, idx->st.st_size - GIT_OID_RAWSZ, GIT_OID_RAWSZ, &left);
if (packfile_hash == NULL) {
error = git__rethrow(GIT_ENOMEM, "Failed to open window to packfile hash");
goto cleanup;
}
memcpy(&file_hash, packfile_hash, GIT_OID_RAWSZ);
git_mwindow_close(&w);
error = git_filebuf_write(&idx->file, &file_hash, sizeof(git_oid));
/* Write out the index sha */
error = git_filebuf_hash(&file_hash, &idx->file);
if (error < GIT_SUCCESS)
goto cleanup;
error = git_filebuf_write(&idx->file, &file_hash, sizeof(git_oid));
if (error < GIT_SUCCESS)
goto cleanup;
/* Figure out what the final name should be */
index_path(filename, idx);
/* Commit file */
error = git_filebuf_commit_at(&idx->file, filename);
cleanup:
if (error < GIT_SUCCESS)
git_filebuf_cleanup(&idx->file);
return error;
}
int git_indexer_run(git_indexer *idx, git_indexer_stats *stats)
{
git_mwindow_file *mwf;
off_t off = sizeof(struct git_pack_header);
int error; int error;
unsigned int fanout[256] = {0}; struct entry *entry;
unsigned int left, processed;
/* FIXME: Write the keep file */ assert(idx && stats);
mwf = &idx->pack->mwf;
error = git_mwindow_file_register(mwf); error = git_mwindow_file_register(mwf);
if (error < GIT_SUCCESS) if (error < GIT_SUCCESS)
return git__rethrow(error, "Failed to register mwindow file"); return git__rethrow(error, "Failed to register mwindow file");
/* Notify before the first one */ stats->total = idx->nr_objects;
if (cb) stats->processed = processed = 0;
cb(&idx->stats, cb_data);
while (idx->stats.processed < idx->stats.total) { while (processed < idx->nr_objects) {
git_rawobj obj; git_rawobj obj;
git_oid oid; git_oid oid;
struct entry entry; git_mwindow *w = NULL;
char hdr[512] = {0}; /* FIXME: How long should this be? */ char hdr[512] = {0}; /* FIXME: How long should this be? */
int i, hdr_len; int i, hdr_len;
off_t entry_start = off;
void *packed;
size_t entry_size;
memset(&entry, 0x0, sizeof(entry)); /* Necessary? */ entry = git__malloc(sizeof(struct entry));
memset(entry, 0x0, sizeof(struct entry));
if (off > UINT31_MAX) { if (off > UINT31_MAX) {
entry.offset = ~0ULL; entry->offset = UINT32_MAX;
entry.offset_long = off; entry->offset_long = off;
} else { } else {
entry.offset = off; entry->offset = off;
} }
error = git_packfile_unpack(&obj, idx->pack, &off); error = git_packfile_unpack(&obj, idx->pack, &off);
...@@ -178,30 +330,40 @@ int git_indexer_run(git_indexer *idx, int (*cb)(const git_indexer_stats *, void ...@@ -178,30 +330,40 @@ int git_indexer_run(git_indexer *idx, int (*cb)(const git_indexer_stats *, void
goto cleanup; goto cleanup;
} }
memcpy(&entry.sha, oid.id, GIT_OID_RAWSZ); git_oid_cpy(&entry->oid, &oid);
/* entry.crc = crc32(obj.data) */ entry->crc = crc32(0L, Z_NULL, 0);
entry_size = off - entry_start;
packed = git_mwindow_open(mwf, &w, entry_start, entry_size, &left);
if (packed == NULL) {
error = git__rethrow(error, "Failed to open window to read packed data");
goto cleanup;
}
entry->crc = htonl(crc32(entry->crc, packed, entry_size));
git_mwindow_close(&w);
/* Add the object to the list */ /* Add the object to the list */
//memcpy(&idx->objects[idx->stats.processed], &entry, sizeof(entry)); error = git_vector_insert(&idx->objects, entry);
idx->objects[idx->stats.processed] = entry; if (error < GIT_SUCCESS) {
error = git__rethrow(error, "Failed to add entry to list");
goto cleanup;
}
for (i = oid.id[0]; i < 256; ++i) { for (i = oid.id[0]; i < 256; ++i) {
fanout[i]++; idx->fanout[i]++;
} }
free(obj.data); free(obj.data);
idx->stats.processed++; stats->processed = ++processed;
if (cb)
cb(&idx->stats, cb_data);
} }
/* /*
* All's gone well, so let's write the index file. * All's gone well, so let's write the index file.
*/ */
error = write_index(idx);
/* Delete keep file */
cleanup: cleanup:
git_mwindow_free_all(mwf); git_mwindow_free_all(mwf);
...@@ -211,8 +373,13 @@ cleanup: ...@@ -211,8 +373,13 @@ cleanup:
void git_indexer_free(git_indexer *idx) void git_indexer_free(git_indexer *idx)
{ {
unsigned int i;
struct entry *e;
p_close(idx->pack->mwf.fd); p_close(idx->pack->mwf.fd);
free(idx->objects); git_vector_foreach(&idx->objects, i, e)
free(e);
git_vector_free(&idx->objects);
free(idx->pack); free(idx->pack);
free(idx); free(idx);
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment