delta.c 18.1 KB
Newer Older
1
/*
Edward Thomson committed
2
 * Copyright (C) the libgit2 contributors. All rights reserved.
3
 *
4 5
 * This file is part of libgit2, distributed under the GNU GPL v2 with
 * a Linking Exception. For full terms see the included COPYING file.
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110
 */

#include "delta.h"

/* maximum hash entry list for the same hash bucket */
#define HASH_LIMIT 64

#define RABIN_SHIFT 23
#define RABIN_WINDOW 16

static const unsigned int T[256] = {
	0x00000000, 0xab59b4d1, 0x56b369a2, 0xfdeadd73, 0x063f6795, 0xad66d344,
	0x508c0e37, 0xfbd5bae6, 0x0c7ecf2a, 0xa7277bfb, 0x5acda688, 0xf1941259,
	0x0a41a8bf, 0xa1181c6e, 0x5cf2c11d, 0xf7ab75cc, 0x18fd9e54, 0xb3a42a85,
	0x4e4ef7f6, 0xe5174327, 0x1ec2f9c1, 0xb59b4d10, 0x48719063, 0xe32824b2,
	0x1483517e, 0xbfdae5af, 0x423038dc, 0xe9698c0d, 0x12bc36eb, 0xb9e5823a,
	0x440f5f49, 0xef56eb98, 0x31fb3ca8, 0x9aa28879, 0x6748550a, 0xcc11e1db,
	0x37c45b3d, 0x9c9defec, 0x6177329f, 0xca2e864e, 0x3d85f382, 0x96dc4753,
	0x6b369a20, 0xc06f2ef1, 0x3bba9417, 0x90e320c6, 0x6d09fdb5, 0xc6504964,
	0x2906a2fc, 0x825f162d, 0x7fb5cb5e, 0xd4ec7f8f, 0x2f39c569, 0x846071b8,
	0x798aaccb, 0xd2d3181a, 0x25786dd6, 0x8e21d907, 0x73cb0474, 0xd892b0a5,
	0x23470a43, 0x881ebe92, 0x75f463e1, 0xdeadd730, 0x63f67950, 0xc8afcd81,
	0x354510f2, 0x9e1ca423, 0x65c91ec5, 0xce90aa14, 0x337a7767, 0x9823c3b6,
	0x6f88b67a, 0xc4d102ab, 0x393bdfd8, 0x92626b09, 0x69b7d1ef, 0xc2ee653e,
	0x3f04b84d, 0x945d0c9c, 0x7b0be704, 0xd05253d5, 0x2db88ea6, 0x86e13a77,
	0x7d348091, 0xd66d3440, 0x2b87e933, 0x80de5de2, 0x7775282e, 0xdc2c9cff,
	0x21c6418c, 0x8a9ff55d, 0x714a4fbb, 0xda13fb6a, 0x27f92619, 0x8ca092c8,
	0x520d45f8, 0xf954f129, 0x04be2c5a, 0xafe7988b, 0x5432226d, 0xff6b96bc,
	0x02814bcf, 0xa9d8ff1e, 0x5e738ad2, 0xf52a3e03, 0x08c0e370, 0xa39957a1,
	0x584ced47, 0xf3155996, 0x0eff84e5, 0xa5a63034, 0x4af0dbac, 0xe1a96f7d,
	0x1c43b20e, 0xb71a06df, 0x4ccfbc39, 0xe79608e8, 0x1a7cd59b, 0xb125614a,
	0x468e1486, 0xedd7a057, 0x103d7d24, 0xbb64c9f5, 0x40b17313, 0xebe8c7c2,
	0x16021ab1, 0xbd5bae60, 0x6cb54671, 0xc7ecf2a0, 0x3a062fd3, 0x915f9b02,
	0x6a8a21e4, 0xc1d39535, 0x3c394846, 0x9760fc97, 0x60cb895b, 0xcb923d8a,
	0x3678e0f9, 0x9d215428, 0x66f4eece, 0xcdad5a1f, 0x3047876c, 0x9b1e33bd,
	0x7448d825, 0xdf116cf4, 0x22fbb187, 0x89a20556, 0x7277bfb0, 0xd92e0b61,
	0x24c4d612, 0x8f9d62c3, 0x7836170f, 0xd36fa3de, 0x2e857ead, 0x85dcca7c,
	0x7e09709a, 0xd550c44b, 0x28ba1938, 0x83e3ade9, 0x5d4e7ad9, 0xf617ce08,
	0x0bfd137b, 0xa0a4a7aa, 0x5b711d4c, 0xf028a99d, 0x0dc274ee, 0xa69bc03f,
	0x5130b5f3, 0xfa690122, 0x0783dc51, 0xacda6880, 0x570fd266, 0xfc5666b7,
	0x01bcbbc4, 0xaae50f15, 0x45b3e48d, 0xeeea505c, 0x13008d2f, 0xb85939fe,
	0x438c8318, 0xe8d537c9, 0x153feaba, 0xbe665e6b, 0x49cd2ba7, 0xe2949f76,
	0x1f7e4205, 0xb427f6d4, 0x4ff24c32, 0xe4abf8e3, 0x19412590, 0xb2189141,
	0x0f433f21, 0xa41a8bf0, 0x59f05683, 0xf2a9e252, 0x097c58b4, 0xa225ec65,
	0x5fcf3116, 0xf49685c7, 0x033df00b, 0xa86444da, 0x558e99a9, 0xfed72d78,
	0x0502979e, 0xae5b234f, 0x53b1fe3c, 0xf8e84aed, 0x17bea175, 0xbce715a4,
	0x410dc8d7, 0xea547c06, 0x1181c6e0, 0xbad87231, 0x4732af42, 0xec6b1b93,
	0x1bc06e5f, 0xb099da8e, 0x4d7307fd, 0xe62ab32c, 0x1dff09ca, 0xb6a6bd1b,
	0x4b4c6068, 0xe015d4b9, 0x3eb80389, 0x95e1b758, 0x680b6a2b, 0xc352defa,
	0x3887641c, 0x93ded0cd, 0x6e340dbe, 0xc56db96f, 0x32c6cca3, 0x999f7872,
	0x6475a501, 0xcf2c11d0, 0x34f9ab36, 0x9fa01fe7, 0x624ac294, 0xc9137645,
	0x26459ddd, 0x8d1c290c, 0x70f6f47f, 0xdbaf40ae, 0x207afa48, 0x8b234e99,
	0x76c993ea, 0xdd90273b, 0x2a3b52f7, 0x8162e626, 0x7c883b55, 0xd7d18f84,
	0x2c043562, 0x875d81b3, 0x7ab75cc0, 0xd1eee811
};

static const unsigned int U[256] = {
	0x00000000, 0x7eb5200d, 0x5633f4cb, 0x2886d4c6, 0x073e5d47, 0x798b7d4a,
	0x510da98c, 0x2fb88981, 0x0e7cba8e, 0x70c99a83, 0x584f4e45, 0x26fa6e48,
	0x0942e7c9, 0x77f7c7c4, 0x5f711302, 0x21c4330f, 0x1cf9751c, 0x624c5511,
	0x4aca81d7, 0x347fa1da, 0x1bc7285b, 0x65720856, 0x4df4dc90, 0x3341fc9d,
	0x1285cf92, 0x6c30ef9f, 0x44b63b59, 0x3a031b54, 0x15bb92d5, 0x6b0eb2d8,
	0x4388661e, 0x3d3d4613, 0x39f2ea38, 0x4747ca35, 0x6fc11ef3, 0x11743efe,
	0x3eccb77f, 0x40799772, 0x68ff43b4, 0x164a63b9, 0x378e50b6, 0x493b70bb,
	0x61bda47d, 0x1f088470, 0x30b00df1, 0x4e052dfc, 0x6683f93a, 0x1836d937,
	0x250b9f24, 0x5bbebf29, 0x73386bef, 0x0d8d4be2, 0x2235c263, 0x5c80e26e,
	0x740636a8, 0x0ab316a5, 0x2b7725aa, 0x55c205a7, 0x7d44d161, 0x03f1f16c,
	0x2c4978ed, 0x52fc58e0, 0x7a7a8c26, 0x04cfac2b, 0x73e5d470, 0x0d50f47d,
	0x25d620bb, 0x5b6300b6, 0x74db8937, 0x0a6ea93a, 0x22e87dfc, 0x5c5d5df1,
	0x7d996efe, 0x032c4ef3, 0x2baa9a35, 0x551fba38, 0x7aa733b9, 0x041213b4,
	0x2c94c772, 0x5221e77f, 0x6f1ca16c, 0x11a98161, 0x392f55a7, 0x479a75aa,
	0x6822fc2b, 0x1697dc26, 0x3e1108e0, 0x40a428ed, 0x61601be2, 0x1fd53bef,
	0x3753ef29, 0x49e6cf24, 0x665e46a5, 0x18eb66a8, 0x306db26e, 0x4ed89263,
	0x4a173e48, 0x34a21e45, 0x1c24ca83, 0x6291ea8e, 0x4d29630f, 0x339c4302,
	0x1b1a97c4, 0x65afb7c9, 0x446b84c6, 0x3adea4cb, 0x1258700d, 0x6ced5000,
	0x4355d981, 0x3de0f98c, 0x15662d4a, 0x6bd30d47, 0x56ee4b54, 0x285b6b59,
	0x00ddbf9f, 0x7e689f92, 0x51d01613, 0x2f65361e, 0x07e3e2d8, 0x7956c2d5,
	0x5892f1da, 0x2627d1d7, 0x0ea10511, 0x7014251c, 0x5facac9d, 0x21198c90,
	0x099f5856, 0x772a785b, 0x4c921c31, 0x32273c3c, 0x1aa1e8fa, 0x6414c8f7,
	0x4bac4176, 0x3519617b, 0x1d9fb5bd, 0x632a95b0, 0x42eea6bf, 0x3c5b86b2,
	0x14dd5274, 0x6a687279, 0x45d0fbf8, 0x3b65dbf5, 0x13e30f33, 0x6d562f3e,
	0x506b692d, 0x2ede4920, 0x06589de6, 0x78edbdeb, 0x5755346a, 0x29e01467,
	0x0166c0a1, 0x7fd3e0ac, 0x5e17d3a3, 0x20a2f3ae, 0x08242768, 0x76910765,
	0x59298ee4, 0x279caee9, 0x0f1a7a2f, 0x71af5a22, 0x7560f609, 0x0bd5d604,
	0x235302c2, 0x5de622cf, 0x725eab4e, 0x0ceb8b43, 0x246d5f85, 0x5ad87f88,
	0x7b1c4c87, 0x05a96c8a, 0x2d2fb84c, 0x539a9841, 0x7c2211c0, 0x029731cd,
	0x2a11e50b, 0x54a4c506, 0x69998315, 0x172ca318, 0x3faa77de, 0x411f57d3,
	0x6ea7de52, 0x1012fe5f, 0x38942a99, 0x46210a94, 0x67e5399b, 0x19501996,
	0x31d6cd50, 0x4f63ed5d, 0x60db64dc, 0x1e6e44d1, 0x36e89017, 0x485db01a,
	0x3f77c841, 0x41c2e84c, 0x69443c8a, 0x17f11c87, 0x38499506, 0x46fcb50b,
	0x6e7a61cd, 0x10cf41c0, 0x310b72cf, 0x4fbe52c2, 0x67388604, 0x198da609,
	0x36352f88, 0x48800f85, 0x6006db43, 0x1eb3fb4e, 0x238ebd5d, 0x5d3b9d50,
	0x75bd4996, 0x0b08699b, 0x24b0e01a, 0x5a05c017, 0x728314d1, 0x0c3634dc,
	0x2df207d3, 0x534727de, 0x7bc1f318, 0x0574d315, 0x2acc5a94, 0x54797a99,
	0x7cffae5f, 0x024a8e52, 0x06852279, 0x78300274, 0x50b6d6b2, 0x2e03f6bf,
	0x01bb7f3e, 0x7f0e5f33, 0x57888bf5, 0x293dabf8, 0x08f998f7, 0x764cb8fa,
	0x5eca6c3c, 0x207f4c31, 0x0fc7c5b0, 0x7172e5bd, 0x59f4317b, 0x27411176,
	0x1a7c5765, 0x64c97768, 0x4c4fa3ae, 0x32fa83a3, 0x1d420a22, 0x63f72a2f,
	0x4b71fee9, 0x35c4dee4, 0x1400edeb, 0x6ab5cde6, 0x42331920, 0x3c86392d,
	0x133eb0ac, 0x6d8b90a1, 0x450d4467, 0x3bb8646a
};

struct index_entry {
	const unsigned char *ptr;
	unsigned int val;
111
	struct index_entry *next;
112 113 114 115 116
};

struct git_delta_index {
	unsigned long memsize;
	const void *src_buf;
117
	size_t src_size;
118 119 120 121
	unsigned int hash_mask;
	struct index_entry *hash[GIT_FLEX_ARRAY];
};

122 123 124
static int lookup_index_alloc(
	void **out, unsigned long *out_len, size_t entries, size_t hash_count)
{
125
	size_t entries_len, hash_len, index_len;
126

127 128
	GITERR_CHECK_ALLOC_MULTIPLY(&entries_len, entries, sizeof(struct index_entry));
	GITERR_CHECK_ALLOC_MULTIPLY(&hash_len, hash_count, sizeof(struct index_entry *));
129

130 131
	GITERR_CHECK_ALLOC_ADD(&index_len, sizeof(struct git_delta_index), entries_len);
	GITERR_CHECK_ALLOC_ADD(&index_len, index_len, hash_len);
132 133

	if (!git__is_ulong(index_len)) {
134
		giterr_set(GITERR_NOMEMORY, "overly large delta");
135 136 137 138 139 140 141 142 143 144
		return -1;
	}

	*out = git__malloc(index_len);
	GITERR_CHECK_ALLOC(*out);

	*out_len = index_len;
	return 0;
}

145 146
int git_delta_index_init(
	git_delta_index **out, const void *buf, size_t bufsize)
147 148 149 150
{
	unsigned int i, hsize, hmask, entries, prev_val, *hash_count;
	const unsigned char *data, *buffer = buf;
	struct git_delta_index *index;
151
	struct index_entry *entry, **hash;
152 153 154
	void *mem;
	unsigned long memsize;

155 156
	*out = NULL;

157
	if (!buf || !bufsize)
158
		return 0;
159 160

	/* Determine index hash size.  Note that indexing skips the
161
	   first byte to allow for optimizing the rabin polynomial
162
	   initialization in create_delta(). */
163
	entries = (unsigned int)(bufsize - 1) / RABIN_WINDOW;
164 165 166 167 168 169 170 171
	if (bufsize >= 0xffffffffUL) {
		/*
		 * Current delta format can't encode offsets into
		 * reference buffer with more than 32 bits.
		 */
		entries = 0xfffffffeU / RABIN_WINDOW;
	}
	hsize = entries / 4;
172
	for (i = 4; i < 31 && (1u << i) < hsize; i++);
173 174 175
	hsize = 1 << i;
	hmask = hsize - 1;

176
	if (lookup_index_alloc(&mem, &memsize, entries, hsize) < 0)
177
		return -1;
178

179 180
	index = mem;
	mem = index->hash;
181 182 183 184
	hash = mem;
	mem = hash + hsize;
	entry = mem;

185 186 187 188
	index->memsize = memsize;
	index->src_buf = buf;
	index->src_size = bufsize;
	index->hash_mask = hmask;
189 190 191
	memset(hash, 0, hsize * sizeof(*hash));

	/* allocate an array to count hash entries */
Russell Belfer committed
192
	hash_count = git__calloc(hsize, sizeof(*hash_count));
193
	if (!hash_count) {
194
		git__free(index);
195
		return -1;
196 197 198 199 200 201 202 203 204 205 206 207
	}

	/* then populate the index */
	prev_val = ~0;
	for (data = buffer + entries * RABIN_WINDOW - RABIN_WINDOW;
	     data >= buffer;
	     data -= RABIN_WINDOW) {
		unsigned int val = 0;
		for (i = 1; i <= RABIN_WINDOW; i++)
			val = ((val << 8) | data[i]) ^ T[val >> RABIN_SHIFT];
		if (val == prev_val) {
			/* keep the lowest of consecutive identical blocks */
208
			entry[-1].ptr = data + RABIN_WINDOW;
209 210 211
		} else {
			prev_val = val;
			i = val & hmask;
212 213
			entry->ptr = data + RABIN_WINDOW;
			entry->val = val;
214 215 216 217 218 219 220 221
			entry->next = hash[i];
			hash[i] = entry++;
			hash_count[i]++;
		}
	}

	/*
	 * Determine a limit on the number of entries in the same hash
222
	 * bucket.  This guard us against patological data sets causing
223 224 225 226 227 228 229 230 231 232
	 * really bad hash distribution with most entries in the same hash
	 * bucket that would bring us to O(m*n) computing costs (m and n
	 * corresponding to reference and target buffer sizes).
	 *
	 * Make sure none of the hash buckets has more entries than
	 * we're willing to test.  Otherwise we cull the entry list
	 * uniformly to still preserve a good repartition across
	 * the reference buffer.
	 */
	for (i = 0; i < hsize; i++) {
233
		if (hash_count[i] < HASH_LIMIT)
234 235 236 237
			continue;

		entry = hash[i];
		do {
238 239 240 241 242 243
			struct index_entry *keep = entry;
			int skip = hash_count[i] / HASH_LIMIT / 2;
			do {
				entry = entry->next;
			} while(--skip && entry);
			keep->next = entry;
244 245 246 247
		} while (entry);
	}
	git__free(hash_count);

248 249
	*out = index;
	return 0;
250 251
}

252
void git_delta_index_free(git_delta_index *index)
253 254 255 256
{
	git__free(index);
}

257
size_t git_delta_index_size(git_delta_index *index)
258
{
259 260 261
	assert(index);

	return index->memsize;
262 263 264 265
}

/*
 * The maximum size for any opcode sequence, including the initial header
266
 * plus rabin window plus biggest copy.
267 268 269
 */
#define MAX_OP_SIZE	(5 + 5 + 1 + RABIN_WINDOW + 7)

270 271 272
int git_delta_create_from_index(
	void **out,
	size_t *out_len,
273 274
	const struct git_delta_index *index,
	const void *trg_buf,
275 276
	size_t trg_size,
	size_t max_size)
277
{
278
	unsigned int i, bufpos, bufsize, moff, msize, val;
279 280
	int inscnt;
	const unsigned char *ref_data, *ref_top, *data, *top;
281 282 283 284
	unsigned char *buf;

	*out = NULL;
	*out_len = 0;
285 286

	if (!trg_buf || !trg_size)
287
		return 0;
288

289 290 291 292 293 294
	bufpos = 0;
	bufsize = 8192;
	if (max_size && bufsize >= max_size)
		bufsize = (unsigned int)(max_size + MAX_OP_SIZE + 1);
	buf = git__malloc(bufsize);
	GITERR_CHECK_ALLOC(buf);
295 296 297 298

	/* store reference buffer size */
	i = index->src_size;
	while (i >= 0x80) {
299
		buf[bufpos++] = i | 0x80;
300 301
		i >>= 7;
	}
302
	buf[bufpos++] = i;
303 304 305 306

	/* store target buffer size */
	i = trg_size;
	while (i >= 0x80) {
307
		buf[bufpos++] = i | 0x80;
308 309
		i >>= 7;
	}
310
	buf[bufpos++] = i;
311 312 313 314 315 316

	ref_data = index->src_buf;
	ref_top = ref_data + index->src_size;
	data = trg_buf;
	top = (const unsigned char *) trg_buf + trg_size;

317
	bufpos++;
318 319
	val = 0;
	for (i = 0; i < RABIN_WINDOW && data < top; i++, data++) {
320
		buf[bufpos++] = *data;
321 322 323 324 325 326 327 328 329 330 331 332
		val = ((val << 8) | *data) ^ T[val >> RABIN_SHIFT];
	}
	inscnt = i;

	moff = 0;
	msize = 0;
	while (data < top) {
		if (msize < 4096) {
			struct index_entry *entry;
			val ^= U[data[-RABIN_WINDOW]];
			val = ((val << 8) | *data) ^ T[val >> RABIN_SHIFT];
			i = val & index->hash_mask;
333
			for (entry = index->hash[i]; entry; entry = entry->next) {
334 335
				const unsigned char *ref = entry->ptr;
				const unsigned char *src = data;
336
				unsigned int ref_size = (unsigned int)(ref_top - ref);
337 338 339
				if (entry->val != val)
					continue;
				if (ref_size > (unsigned int)(top - src))
340
					ref_size = (unsigned int)(top - src);
341 342 343 344 345 346
				if (ref_size <= msize)
					break;
				while (ref_size-- && *src++ == *ref)
					ref++;
				if (msize < (unsigned int)(ref - entry->ptr)) {
					/* this is our best match so far */
347 348
					msize = (unsigned int)(ref - entry->ptr);
					moff = (unsigned int)(entry->ptr - ref_data);
349 350 351 352 353 354 355 356
					if (msize >= 4096) /* good enough */
						break;
				}
			}
		}

		if (msize < 4) {
			if (!inscnt)
357 358
				bufpos++;
			buf[bufpos++] = *data++;
359 360
			inscnt++;
			if (inscnt == 0x7f) {
361
				buf[bufpos - inscnt - 1] = inscnt;
362 363 364 365 366 367 368 369 370 371 372 373 374
				inscnt = 0;
			}
			msize = 0;
		} else {
			unsigned int left;
			unsigned char *op;

			if (inscnt) {
				while (moff && ref_data[moff-1] == data[-1]) {
					/* we can match one byte back */
					msize++;
					moff--;
					data--;
375
					bufpos--;
376 377
					if (--inscnt)
						continue;
378
					bufpos--;  /* remove count slot */
379 380 381
					inscnt--;  /* make it -1 */
					break;
				}
382
				buf[bufpos - inscnt - 1] = inscnt;
383 384 385 386 387 388 389
				inscnt = 0;
			}

			/* A copy op is currently limited to 64KB (pack v2) */
			left = (msize < 0x10000) ? 0 : (msize - 0x10000);
			msize -= left;

390
			op = buf + bufpos++;
391 392 393
			i = 0x80;

			if (moff & 0x000000ff)
394
				buf[bufpos++] = moff >> 0,  i |= 0x01;
395
			if (moff & 0x0000ff00)
396
				buf[bufpos++] = moff >> 8,  i |= 0x02;
397
			if (moff & 0x00ff0000)
398
				buf[bufpos++] = moff >> 16, i |= 0x04;
399
			if (moff & 0xff000000)
400
				buf[bufpos++] = moff >> 24, i |= 0x08;
401 402

			if (msize & 0x00ff)
403
				buf[bufpos++] = msize >> 0, i |= 0x10;
404
			if (msize & 0xff00)
405
				buf[bufpos++] = msize >> 8, i |= 0x20;
406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421

			*op = i;

			data += msize;
			moff += msize;
			msize = left;

			if (msize < 4096) {
				int j;
				val = 0;
				for (j = -RABIN_WINDOW; j < 0; j++)
					val = ((val << 8) | data[j])
					      ^ T[val >> RABIN_SHIFT];
			}
		}

422 423 424 425 426 427
		if (bufpos >= bufsize - MAX_OP_SIZE) {
			void *tmp = buf;
			bufsize = bufsize * 3 / 2;
			if (max_size && bufsize >= max_size)
				bufsize = max_size + MAX_OP_SIZE + 1;
			if (max_size && bufpos > max_size)
428
				break;
429 430
			buf = git__realloc(buf, bufsize);
			if (!buf) {
431
				git__free(tmp);
432
				return -1;
433 434 435 436 437
			}
		}
	}

	if (inscnt)
438
		buf[bufpos - inscnt - 1] = inscnt;
439

440 441 442 443
	if (max_size && bufpos > max_size) {
		giterr_set(GITERR_NOMEMORY, "delta would be larger than maximum size");
		git__free(buf);
		return GIT_EBUFS;
444 445
	}

446 447 448
	*out_len = bufpos;
	*out = buf;
	return 0;
449
}
450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467

/*
* Delta application was heavily cribbed from BinaryDelta.java in JGit, which
* itself was heavily cribbed from <code>patch-delta.c</code> in the
* GIT project.	The original delta patching code was written by
* Nicolas Pitre <nico@cam.org>.
*/

static int hdr_sz(
	size_t *size,
	const unsigned char **delta,
	const unsigned char *end)
{
	const unsigned char *d = *delta;
	size_t r = 0;
	unsigned int c, shift = 0;

	do {
468 469
		if (d == end) {
			giterr_set(GITERR_INVALID, "truncated delta");
470
			return -1;
471 472
		}

473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546
		c = *d++;
		r |= (c & 0x7f) << shift;
		shift += 7;
	} while (c & 0x80);
	*delta = d;
	*size = r;
	return 0;
}

int git_delta_read_header(
	size_t *base_out,
	size_t *result_out,
	const unsigned char *delta,
	size_t delta_len)
{
	const unsigned char *delta_end = delta + delta_len;
	if ((hdr_sz(base_out, &delta, delta_end) < 0) ||
		(hdr_sz(result_out, &delta, delta_end) < 0))
		return -1;
	return 0;
}

#define DELTA_HEADER_BUFFER_LEN 16
int git_delta_read_header_fromstream(
	size_t *base_sz, size_t *res_sz, git_packfile_stream *stream)
{
	static const size_t buffer_len = DELTA_HEADER_BUFFER_LEN;
	unsigned char buffer[DELTA_HEADER_BUFFER_LEN];
	const unsigned char *delta, *delta_end;
	size_t len;
	ssize_t read;

	len = read = 0;
	while (len < buffer_len) {
		read = git_packfile_stream_read(stream, &buffer[len], buffer_len - len);

		if (read == 0)
			break;

		if (read == GIT_EBUFS)
			continue;

		len += read;
	}

	delta = buffer;
	delta_end = delta + len;
	if ((hdr_sz(base_sz, &delta, delta_end) < 0) ||
		(hdr_sz(res_sz, &delta, delta_end) < 0))
		return -1;

	return 0;
}

int git_delta_apply(
	void **out,
	size_t *out_len,
	const unsigned char *base,
	size_t base_len,
	const unsigned char *delta,
	size_t delta_len)
{
	const unsigned char *delta_end = delta + delta_len;
	size_t base_sz, res_sz, alloc_sz;
	unsigned char *res_dp;

	*out = NULL;
	*out_len = 0;

	/* Check that the base size matches the data we were given;
	* if not we would underflow while accessing data from the
	* base object, resulting in data corruption or segfault.
	*/
	if ((hdr_sz(&base_sz, &delta, delta_end) < 0) || (base_sz != base_len)) {
547
		giterr_set(GITERR_INVALID, "failed to apply delta: base size does not match given data");
548 549 550 551
		return -1;
	}

	if (hdr_sz(&res_sz, &delta, delta_end) < 0) {
552
		giterr_set(GITERR_INVALID, "failed to apply delta: base size does not match given data");
553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616
		return -1;
	}

	GITERR_CHECK_ALLOC_ADD(&alloc_sz, res_sz, 1);
	res_dp = git__malloc(alloc_sz);
	GITERR_CHECK_ALLOC(res_dp);

	res_dp[res_sz] = '\0';
	*out = res_dp;
	*out_len = res_sz;

	while (delta < delta_end) {
		unsigned char cmd = *delta++;
		if (cmd & 0x80) {
			/* cmd is a copy instruction; copy from the base.
			*/
			size_t off = 0, len = 0;

			if (cmd & 0x01) off = *delta++;
			if (cmd & 0x02) off |= *delta++ << 8UL;
			if (cmd & 0x04) off |= *delta++ << 16UL;
			if (cmd & 0x08) off |= *delta++ << 24UL;

			if (cmd & 0x10) len = *delta++;
			if (cmd & 0x20) len |= *delta++ << 8UL;
			if (cmd & 0x40) len |= *delta++ << 16UL;
			if (!len)		len = 0x10000;

			if (base_len < off + len || res_sz < len)
				goto fail;
			memcpy(res_dp, base + off, len);
			res_dp += len;
			res_sz -= len;

		}
		else if (cmd) {
			/* cmd is a literal insert instruction; copy from
			* the delta stream itself.
			*/
			if (delta_end - delta < cmd || res_sz < cmd)
				goto fail;
			memcpy(res_dp, delta, cmd);
			delta += cmd;
			res_dp += cmd;
			res_sz -= cmd;

		}
		else {
			/* cmd == 0 is reserved for future encodings.
			*/
			goto fail;
		}
	}

	if (delta != delta_end || res_sz)
		goto fail;
	return 0;

fail:
	git__free(*out);

	*out = NULL;
	*out_len = 0;

617
	giterr_set(GITERR_INVALID, "failed to apply delta");
618 619
	return -1;
}