Commit f6ed5e2d by Philip Kelley

Revert changes from git/git diff-delta.c by dak@gnu.org, proski@gnu.org

parent 05e9fc58
/* /*
* diff-delta.c: generate a delta between two buffers * Copyright (C) 2009-2012 the libgit2 contributors
* *
* This code was greatly inspired by parts of LibXDiff from Davide Libenzi * This file is part of libgit2, distributed under the GNU GPL v2 with
* http://www.xmailserver.org/xdiff-lib.html * a Linking Exception. For full terms see the included COPYING file.
*
* Rewritten for GIT by Nicolas Pitre <nico@fluxnic.net>, (C) 2005-2007
*
* Modified for libgit2 by Michael Schubert <schu@schu.io>, (C) 2012
*
* This code is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/ */
#include "delta.h" #include "delta.h"
...@@ -116,11 +108,7 @@ static const unsigned int U[256] = { ...@@ -116,11 +108,7 @@ static const unsigned int U[256] = {
struct index_entry { struct index_entry {
const unsigned char *ptr; const unsigned char *ptr;
unsigned int val; unsigned int val;
}; struct index_entry *next;
struct unpacked_index_entry {
struct index_entry entry;
struct unpacked_index_entry *next;
}; };
struct git_delta_index { struct git_delta_index {
...@@ -137,8 +125,7 @@ git_delta_create_index(const void *buf, unsigned long bufsize) ...@@ -137,8 +125,7 @@ git_delta_create_index(const void *buf, unsigned long bufsize)
unsigned int i, hsize, hmask, entries, prev_val, *hash_count; unsigned int i, hsize, hmask, entries, prev_val, *hash_count;
const unsigned char *data, *buffer = buf; const unsigned char *data, *buffer = buf;
struct git_delta_index *index; struct git_delta_index *index;
struct unpacked_index_entry *entry, **hash; struct index_entry *entry, **hash;
struct index_entry *packed_entry, **packed_hash;
void *mem; void *mem;
unsigned long memsize; unsigned long memsize;
...@@ -146,7 +133,7 @@ git_delta_create_index(const void *buf, unsigned long bufsize) ...@@ -146,7 +133,7 @@ git_delta_create_index(const void *buf, unsigned long bufsize)
return NULL; return NULL;
/* Determine index hash size. Note that indexing skips the /* Determine index hash size. Note that indexing skips the
first byte to allow for optimizing the Rabin's polynomial first byte to allow for optimizing the rabin polynomial
initialization in create_delta(). */ initialization in create_delta(). */
entries = (unsigned int)(bufsize - 1) / RABIN_WINDOW; entries = (unsigned int)(bufsize - 1) / RABIN_WINDOW;
if (bufsize >= 0xffffffffUL) { if (bufsize >= 0xffffffffUL) {
...@@ -162,21 +149,28 @@ git_delta_create_index(const void *buf, unsigned long bufsize) ...@@ -162,21 +149,28 @@ git_delta_create_index(const void *buf, unsigned long bufsize)
hmask = hsize - 1; hmask = hsize - 1;
/* allocate lookup index */ /* allocate lookup index */
memsize = sizeof(*hash) * hsize + memsize = sizeof(*index) +
sizeof(*hash) * hsize +
sizeof(*entry) * entries; sizeof(*entry) * entries;
mem = git__malloc(memsize); mem = git__malloc(memsize);
if (!mem) if (!mem)
return NULL; return NULL;
index = mem;
mem = index->hash;
hash = mem; hash = mem;
mem = hash + hsize; mem = hash + hsize;
entry = mem; entry = mem;
index->memsize = memsize;
index->src_buf = buf;
index->src_size = bufsize;
index->hash_mask = hmask;
memset(hash, 0, hsize * sizeof(*hash)); memset(hash, 0, hsize * sizeof(*hash));
/* allocate an array to count hash entries */ /* allocate an array to count hash entries */
hash_count = calloc(hsize, sizeof(*hash_count)); hash_count = calloc(hsize, sizeof(*hash_count));
if (!hash_count) { if (!hash_count) {
git__free(hash); git__free(index);
return NULL; return NULL;
} }
...@@ -190,13 +184,12 @@ git_delta_create_index(const void *buf, unsigned long bufsize) ...@@ -190,13 +184,12 @@ git_delta_create_index(const void *buf, unsigned long bufsize)
val = ((val << 8) | data[i]) ^ T[val >> RABIN_SHIFT]; val = ((val << 8) | data[i]) ^ T[val >> RABIN_SHIFT];
if (val == prev_val) { if (val == prev_val) {
/* keep the lowest of consecutive identical blocks */ /* keep the lowest of consecutive identical blocks */
entry[-1].entry.ptr = data + RABIN_WINDOW; entry[-1].ptr = data + RABIN_WINDOW;
--entries;
} else { } else {
prev_val = val; prev_val = val;
i = val & hmask; i = val & hmask;
entry->entry.ptr = data + RABIN_WINDOW; entry->ptr = data + RABIN_WINDOW;
entry->entry.val = val; entry->val = val;
entry->next = hash[i]; entry->next = hash[i];
hash[i] = entry++; hash[i] = entry++;
hash_count[i]++; hash_count[i]++;
...@@ -205,7 +198,7 @@ git_delta_create_index(const void *buf, unsigned long bufsize) ...@@ -205,7 +198,7 @@ git_delta_create_index(const void *buf, unsigned long bufsize)
/* /*
* Determine a limit on the number of entries in the same hash * Determine a limit on the number of entries in the same hash
* bucket. This guards us against pathological data sets causing * bucket. This guard us against patological data sets causing
* really bad hash distribution with most entries in the same hash * really bad hash distribution with most entries in the same hash
* bucket that would bring us to O(m*n) computing costs (m and n * bucket that would bring us to O(m*n) computing costs (m and n
* corresponding to reference and target buffer sizes). * corresponding to reference and target buffer sizes).
...@@ -216,84 +209,21 @@ git_delta_create_index(const void *buf, unsigned long bufsize) ...@@ -216,84 +209,21 @@ git_delta_create_index(const void *buf, unsigned long bufsize)
* the reference buffer. * the reference buffer.
*/ */
for (i = 0; i < hsize; i++) { for (i = 0; i < hsize; i++) {
int acc; if (hash_count[i] < HASH_LIMIT)
if (hash_count[i] <= HASH_LIMIT)
continue; continue;
/* We leave exactly HASH_LIMIT entries in the bucket */
entries -= hash_count[i] - HASH_LIMIT;
entry = hash[i]; entry = hash[i];
acc = 0;
/*
* Assume that this loop is gone through exactly
* HASH_LIMIT times and is entered and left with
* acc==0. So the first statement in the loop
* contributes (hash_count[i]-HASH_LIMIT)*HASH_LIMIT
* to the accumulator, and the inner loop consequently
* is run (hash_count[i]-HASH_LIMIT) times, removing
* one element from the list each time. Since acc
* balances out to 0 at the final run, the inner loop
* body can't be left with entry==NULL. So we indeed
* encounter entry==NULL in the outer loop only.
*/
do { do {
acc += hash_count[i] - HASH_LIMIT; struct index_entry *keep = entry;
if (acc > 0) { int skip = hash_count[i] / HASH_LIMIT / 2;
struct unpacked_index_entry *keep = entry; do {
do { entry = entry->next;
entry = entry->next; } while(--skip && entry);
acc -= HASH_LIMIT; keep->next = entry;
} while (acc > 0);
keep->next = entry->next;
}
entry = entry->next;
} while (entry); } while (entry);
} }
git__free(hash_count); git__free(hash_count);
/*
* Now create the packed index in array form
* rather than linked lists.
*/
memsize = sizeof(*index)
+ sizeof(*packed_hash) * (hsize+1)
+ sizeof(*packed_entry) * entries;
mem = git__malloc(memsize);
if (!mem) {
git__free(hash);
return NULL;
}
index = mem;
index->memsize = memsize;
index->src_buf = buf;
index->src_size = bufsize;
index->hash_mask = hmask;
mem = index->hash;
packed_hash = mem;
mem = packed_hash + (hsize+1);
packed_entry = mem;
for (i = 0; i < hsize; i++) {
/*
* Coalesce all entries belonging to one linked list
* into consecutive array entries.
*/
packed_hash[i] = packed_entry;
for (entry = hash[i]; entry; entry = entry->next)
*packed_entry++ = entry->entry;
}
/* Sentinel value to indicate the length of the last hash bucket */
packed_hash[hsize] = packed_entry;
assert(packed_entry - (struct index_entry *)mem == entries);
git__free(hash);
return index; return index;
} }
...@@ -312,7 +242,7 @@ unsigned long git_delta_sizeof_index(struct git_delta_index *index) ...@@ -312,7 +242,7 @@ unsigned long git_delta_sizeof_index(struct git_delta_index *index)
/* /*
* The maximum size for any opcode sequence, including the initial header * The maximum size for any opcode sequence, including the initial header
* plus Rabin window plus biggest copy. * plus rabin window plus biggest copy.
*/ */
#define MAX_OP_SIZE (5 + 5 + 1 + RABIN_WINDOW + 7) #define MAX_OP_SIZE (5 + 5 + 1 + RABIN_WINDOW + 7)
...@@ -377,7 +307,7 @@ git_delta_create( ...@@ -377,7 +307,7 @@ git_delta_create(
val ^= U[data[-RABIN_WINDOW]]; val ^= U[data[-RABIN_WINDOW]];
val = ((val << 8) | *data) ^ T[val >> RABIN_SHIFT]; val = ((val << 8) | *data) ^ T[val >> RABIN_SHIFT];
i = val & index->hash_mask; i = val & index->hash_mask;
for (entry = index->hash[i]; entry < index->hash[i+1]; entry++) { for (entry = index->hash[i]; entry; entry = entry->next) {
const unsigned char *ref = entry->ptr; const unsigned char *ref = entry->ptr;
const unsigned char *src = data; const unsigned char *src = data;
unsigned int ref_size = (unsigned int)(ref_top - ref); unsigned int ref_size = (unsigned int)(ref_top - ref);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment