path.c 24.7 KB
Newer Older
Vicent Marti committed
1
/*
Edward Thomson committed
2
 * Copyright (C) the libgit2 contributors. All rights reserved.
Vicent Marti committed
3 4 5 6
 *
 * This file is part of libgit2, distributed under the GNU GPL v2 with
 * a Linking Exception. For full terms see the included COPYING file.
 */
Vicent Marti committed
7
#include "common.h"
8 9
#include "path.h"
#include "posix.h"
10
#ifdef GIT_WIN32
11
#include "win32/posix.h"
12
#include "win32/w32_util.h"
13 14 15
#else
#include <dirent.h>
#endif
Vicent Marti committed
16 17 18
#include <stdio.h>
#include <ctype.h>

19 20
#define LOOKS_LIKE_DRIVE_PREFIX(S) (git__isalpha((S)[0]) && (S)[1] == ':')

21
#ifdef GIT_WIN32
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
static bool looks_like_network_computer_name(const char *path, int pos)
{
	if (pos < 3)
		return false;

	if (path[0] != '/' || path[1] != '/')
		return false;

	while (pos-- > 2) {
		if (path[pos] == '/')
			return false;
	}

	return true;
}
37
#endif
38

Vicent Marti committed
39 40
/*
 * Based on the Android implementation, BSD licensed.
41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67
 * http://android.git.kernel.org/
 *
 * Copyright (C) 2008 The Android Open Source Project
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * * Redistributions of source code must retain the above copyright
 *   notice, this list of conditions and the following disclaimer.
 * * Redistributions in binary form must reproduce the above copyright
 *   notice, this list of conditions and the following disclaimer in
 *   the documentation and/or other materials provided with the
 *   distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
Vicent Marti committed
68
 */
69
int git_path_basename_r(git_buf *buffer, const char *path)
Vicent Marti committed
70 71 72 73 74 75
{
	const char *endp, *startp;
	int len, result;

	/* Empty or NULL string gets treated as "." */
	if (path == NULL || *path == '\0') {
Vicent Marti committed
76 77
		startp = ".";
		len		= 1;
Vicent Marti committed
78 79 80 81 82 83 84 85 86 87 88
		goto Exit;
	}

	/* Strip trailing slashes */
	endp = path + strlen(path) - 1;
	while (endp > path && *endp == '/')
		endp--;

	/* All slashes becomes "/" */
	if (endp == path && *endp == '/') {
		startp = "/";
Vicent Marti committed
89
		len	= 1;
Vicent Marti committed
90 91 92 93 94 95 96 97
		goto Exit;
	}

	/* Find the start of the base */
	startp = endp;
	while (startp > path && *(startp - 1) != '/')
		startp--;

98 99
	/* Cast is safe because max path < max int */
	len = (int)(endp - startp + 1);
Vicent Marti committed
100 101 102 103

Exit:
	result = len;

104 105
	if (buffer != NULL && git_buf_set(buffer, startp, len) < 0)
		return -1;
106

Vicent Marti committed
107 108 109 110 111 112 113
	return result;
}

/*
 * Based on the Android implementation, BSD licensed.
 * Check http://android.git.kernel.org/
 */
114
int git_path_dirname_r(git_buf *buffer, const char *path)
Vicent Marti committed
115
{
Vicent Marti committed
116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145
	const char *endp;
	int result, len;

	/* Empty or NULL string gets treated as "." */
	if (path == NULL || *path == '\0') {
		path = ".";
		len = 1;
		goto Exit;
	}

	/* Strip trailing slashes */
	endp = path + strlen(path) - 1;
	while (endp > path && *endp == '/')
		endp--;

	/* Find the start of the dir */
	while (endp > path && *endp != '/')
		endp--;

	/* Either the dir is "/" or there are no slashes */
	if (endp == path) {
		path = (*endp == '/') ? "/" : ".";
		len = 1;
		goto Exit;
	}

	do {
		endp--;
	} while (endp > path && *endp == '/');

146 147
	/* Cast is safe because max path < max int */
	len = (int)(endp - path + 1);
Vicent Marti committed
148

Jerome Lambourg committed
149
#ifdef GIT_WIN32
Vicent Marti committed
150 151
	/* Mimic unix behavior where '/.git' returns '/': 'C:/.git' will return
		'C:/' here */
Jerome Lambourg committed
152

153
	if (len == 2 && LOOKS_LIKE_DRIVE_PREFIX(path)) {
Vicent Marti committed
154 155 156
		len = 3;
		goto Exit;
	}
157 158 159 160 161 162 163 164 165

	/* Similarly checks if we're dealing with a network computer name
		'//computername/.git' will return '//computername/' */

	if (looks_like_network_computer_name(path, len)) {
		len++;
		goto Exit;
	}

Jerome Lambourg committed
166 167
#endif

Vicent Marti committed
168
Exit:
Vicent Marti committed
169 170
	result = len;

171 172
	if (buffer != NULL && git_buf_set(buffer, path, len) < 0)
		return -1;
Vicent Marti committed
173 174

	return result;
Vicent Marti committed
175 176 177 178 179
}


char *git_path_dirname(const char *path)
{
180 181
	git_buf buf = GIT_BUF_INIT;
	char *dirname;
Vicent Marti committed
182

183 184 185
	git_path_dirname_r(&buf, path);
	dirname = git_buf_detach(&buf);
	git_buf_free(&buf); /* avoid memleak if error occurs */
Vicent Marti committed
186

187
	return dirname;
Vicent Marti committed
188 189 190 191
}

char *git_path_basename(const char *path)
{
192 193
	git_buf buf = GIT_BUF_INIT;
	char *basename;
Vicent Marti committed
194

195 196 197
	git_path_basename_r(&buf, path);
	basename = git_buf_detach(&buf);
	git_buf_free(&buf); /* avoid memleak if error occurs */
Vicent Marti committed
198

199
	return basename;
Vicent Marti committed
200 201
}

202 203 204 205 206 207 208 209 210 211 212 213 214 215
size_t git_path_basename_offset(git_buf *buffer)
{
	ssize_t slash;

	if (!buffer || buffer->size <= 0)
		return 0;

	slash = git_buf_rfind_next(buffer, '/');

	if (slash >= 0 && buffer->ptr[slash] == '/')
		return (size_t)(slash + 1);

	return 0;
}
Vicent Marti committed
216 217 218 219

const char *git_path_topdir(const char *path)
{
	size_t len;
220
	ssize_t i;
Vicent Marti committed
221 222 223 224 225 226 227

	assert(path);
	len = strlen(path);

	if (!len || path[len - 1] != '/')
		return NULL;

228
	for (i = (ssize_t)len - 2; i >= 0; --i)
Vicent Marti committed
229 230 231 232 233 234
		if (path[i] == '/')
			break;

	return &path[i + 1];
}

235 236 237 238 239
int git_path_root(const char *path)
{
	int offset = 0;

	/* Does the root of the path look like a windows drive ? */
240
	if (LOOKS_LIKE_DRIVE_PREFIX(path))
241
		offset += 2;
242

243
#ifdef GIT_WIN32
244
	/* Are we dealing with a windows network path? */
245 246
	else if ((path[0] == '/' && path[1] == '/' && path[2] != '/') ||
		(path[0] == '\\' && path[1] == '\\' && path[2] != '\\'))
247
	{
248
		offset += 2;
249

250
		/* Skip the computer name segment */
251
		while (path[offset] && path[offset] != '/' && path[offset] != '\\')
252 253
			offset++;
	}
254 255
#endif

256
	if (path[offset] == '/' || path[offset] == '\\')
257 258
		return offset;

259
	return -1;	/* Not a real error - signals that path is not rooted */
260 261
}

262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286
int git_path_join_unrooted(
	git_buf *path_out, const char *path, const char *base, ssize_t *root_at)
{
	int error, root;

	assert(path && path_out);

	root = git_path_root(path);

	if (base != NULL && root < 0) {
		error = git_buf_joinpath(path_out, base, path);

		if (root_at)
			*root_at = (ssize_t)strlen(base);
	}
	else {
		error = git_buf_sets(path_out, path);

		if (root_at)
			*root_at = (root < 0) ? 0 : (ssize_t)root;
	}

	return error;
}

287
int git_path_prettify(git_buf *path_out, const char *path, const char *base)
288
{
289
	char buf[GIT_PATH_MAX];
290

291
	assert(path && path_out);
292 293 294

	/* construct path if needed */
	if (base != NULL && git_path_root(path) < 0) {
295 296
		if (git_buf_joinpath(path_out, base, path) < 0)
			return -1;
297 298 299
		path = path_out->ptr;
	}

300
	if (p_realpath(path, buf) == NULL) {
301 302
		/* giterr_set resets the errno when dealing with a GITERR_OS kind of error */
		int error = (errno == ENOENT || errno == ENOTDIR) ? GIT_ENOTFOUND : -1;
303
		giterr_set(GITERR_OS, "Failed to resolve path '%s'", path);
304

305
		git_buf_clear(path_out);
306

307
		return error;
308
	}
309

310
	return git_buf_sets(path_out, buf);
311 312
}

313
int git_path_prettify_dir(git_buf *path_out, const char *path, const char *base)
314
{
315
	int error = git_path_prettify(path_out, path, base);
316
	return (error < 0) ? error : git_path_to_dir(path_out);
317
}
318

319 320 321
int git_path_to_dir(git_buf *path)
{
	if (path->asize > 0 &&
nulltoken committed
322 323
		git_buf_len(path) > 0 &&
		path->ptr[git_buf_len(path) - 1] != '/')
324
		git_buf_putc(path, '/');
325

326
	return git_buf_oom(path) ? -1 : 0;
327
}
328 329 330 331 332 333 334 335 336 337 338

void git_path_string_to_dir(char* path, size_t size)
{
	size_t end = strlen(path);

	if (end && path[end - 1] != '/' && end < size) {
		path[end] = '/';
		path[end + 1] = '\0';
	}
}

339 340
int git__percent_decode(git_buf *decoded_out, const char *input)
{
341
	int len, hi, lo, i;
342 343
	assert(decoded_out && input);

344
	len = (int)strlen(input);
345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366
	git_buf_clear(decoded_out);

	for(i = 0; i < len; i++)
	{
		char c = input[i];

		if (c != '%')
			goto append;

		if (i >= len - 2)
			goto append;

		hi = git__fromhex(input[i + 1]);
		lo = git__fromhex(input[i + 2]);

		if (hi < 0 || lo < 0)
			goto append;

		c = (char)(hi << 4 | lo);
		i += 2;

append:
367 368
		if (git_buf_putc(decoded_out, c) < 0)
			return -1;
369 370
	}

371 372 373 374 375 376 377
	return 0;
}

static int error_invalid_local_file_uri(const char *uri)
{
	giterr_set(GITERR_CONFIG, "'%s' is not a valid local file URI", uri);
	return -1;
378
}
nulltoken committed
379 380 381

int git_path_fromurl(git_buf *local_path_out, const char *file_url)
{
382
	int offset = 0, len;
nulltoken committed
383 384 385 386

	assert(local_path_out && file_url);

	if (git__prefixcmp(file_url, "file://") != 0)
387
		return error_invalid_local_file_uri(file_url);
nulltoken committed
388 389

	offset += 7;
390
	len = (int)strlen(file_url);
nulltoken committed
391 392 393 394 395 396

	if (offset < len && file_url[offset] == '/')
		offset++;
	else if (offset < len && git__prefixcmp(file_url + offset, "localhost/") == 0)
		offset += 10;
	else
397
		return error_invalid_local_file_uri(file_url);
nulltoken committed
398 399

	if (offset >= len || file_url[offset] == '/')
400
		return error_invalid_local_file_uri(file_url);
nulltoken committed
401

402
#ifndef GIT_WIN32
nulltoken committed
403 404 405 406 407
	offset--;	/* A *nix absolute path starts with a forward slash */
#endif

	git_buf_clear(local_path_out);

408
	return git__percent_decode(local_path_out, file_url + offset);
nulltoken committed
409
}
410 411 412 413 414 415 416

int git_path_walk_up(
	git_buf *path,
	const char *ceiling,
	int (*cb)(void *data, git_buf *),
	void *data)
{
417
	int error = 0;
418 419 420 421 422 423 424
	git_buf iter;
	ssize_t stop = 0, scan;
	char oldc = '\0';

	assert(path && cb);

	if (ceiling != NULL) {
425
		if (git__prefixcmp(path->ptr, ceiling) == 0)
426 427
			stop = (ssize_t)strlen(ceiling);
		else
nulltoken committed
428
			stop = git_buf_len(path);
429
	}
nulltoken committed
430
	scan = git_buf_len(path);
431 432

	iter.ptr = path->ptr;
nulltoken committed
433
	iter.size = git_buf_len(path);
434
	iter.asize = path->asize;
435 436

	while (scan >= stop) {
437
		error = cb(data, &iter);
438
		iter.ptr[scan] = oldc;
439 440

		if (error) {
441
			giterr_set_after_callback(error);
442
			break;
443
		}
444

445 446 447 448 449 450 451 452 453
		scan = git_buf_rfind_next(&iter, '/');
		if (scan >= 0) {
			scan++;
			oldc = iter.ptr[scan];
			iter.size = scan;
			iter.ptr[scan] = '\0';
		}
	}

454 455
	if (scan >= 0)
		iter.ptr[scan] = oldc;
456 457 458

	return error;
}
459

460
bool git_path_exists(const char *path)
461 462
{
	assert(path);
463
	return p_access(path, F_OK) == 0;
464 465
}

466
bool git_path_isdir(const char *path)
467 468
{
	struct stat st;
469 470
	if (p_stat(path, &st) < 0)
		return false;
471

472
	return S_ISDIR(st.st_mode) != 0;
473 474
}

475
bool git_path_isfile(const char *path)
476 477 478 479
{
	struct stat st;

	assert(path);
480 481
	if (p_stat(path, &st) < 0)
		return false;
482

483
	return S_ISREG(st.st_mode) != 0;
484 485
}

Ben Straub committed
486 487 488 489
#ifdef GIT_WIN32

bool git_path_is_empty_dir(const char *path)
{
490 491 492 493 494 495 496 497
	git_win32_path filter_w;
	bool empty = false;

	if (git_win32__findfirstfile_filter(filter_w, path)) {
		WIN32_FIND_DATAW findData;
		HANDLE hFind = FindFirstFileW(filter_w, &findData);

		/* If the find handle was created successfully, then it's a directory */
498
		if (hFind != INVALID_HANDLE_VALUE) {
499 500 501 502 503 504 505 506 507 508 509 510 511 512
			empty = true;

			do {
				/* Allow the enumeration to return . and .. and still be considered
				 * empty. In the special case of drive roots (i.e. C:\) where . and
				 * .. do not occur, we can still consider the path to be an empty
				 * directory if there's nothing there. */
				if (!git_path_is_dot_or_dotdotW(findData.cFileName)) {
					empty = false;
					break;
				}
			} while (FindNextFileW(hFind, &findData));

			FindClose(hFind);
513
		}
514
	}
515

516
	return empty;
Ben Straub committed
517 518 519 520
}

#else

521
static int path_found_entry(void *payload, git_buf *path)
Ben Straub committed
522
{
523 524 525
	GIT_UNUSED(payload);
	return !git_path_is_dot_or_dotdot(path->ptr);
}
Ben Straub committed
526

527 528 529 530
bool git_path_is_empty_dir(const char *path)
{
	int error;
	git_buf dir = GIT_BUF_INIT;
Ben Straub committed
531

532
	if (!git_path_isdir(path))
Ben Straub committed
533 534
		return false;

535 536 537
	if ((error = git_buf_sets(&dir, path)) != 0)
		giterr_clear();
	else
538
		error = git_path_direach(&dir, 0, path_found_entry, NULL);
Ben Straub committed
539

540 541 542
	git_buf_free(&dir);

	return !error;
Ben Straub committed
543
}
544

Ben Straub committed
545 546
#endif

547
int git_path_set_error(int errno_value, const char *path, const char *action)
548
{
549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566
	switch (errno_value) {
	case ENOENT:
	case ENOTDIR:
		giterr_set(GITERR_OS, "Could not find '%s' to %s", path, action);
		return GIT_ENOTFOUND;

	case EINVAL:
	case ENAMETOOLONG:
		giterr_set(GITERR_OS, "Invalid path for filesystem '%s'", path);
		return GIT_EINVALIDSPEC;

	case EEXIST:
		giterr_set(GITERR_OS, "Failed %s - '%s' already exists", action, path);
		return GIT_EEXISTS;

	default:
		giterr_set(GITERR_OS, "Could not %s '%s'", action, path);
		return -1;
567
	}
568 569 570 571 572 573
}

int git_path_lstat(const char *path, struct stat *st)
{
	if (p_lstat(path, st) == 0)
		return 0;
574

575
	return git_path_set_error(errno, path, "stat");
576 577
}

578
static bool _check_dir_contents(
579 580
	git_buf *dir,
	const char *sub,
581
	bool (*predicate)(const char *))
582
{
583
	bool result;
nulltoken committed
584
	size_t dir_size = git_buf_len(dir);
585 586
	size_t sub_size = strlen(sub);

587
	/* leave base valid even if we could not make space for subdir */
588
	if (git_buf_try_grow(dir, dir_size + sub_size + 2, false, false) < 0)
589 590 591
		return false;

	/* save excursion */
592 593
	git_buf_joinpath(dir, dir->ptr, sub);

594
	result = predicate(dir->ptr);
595

596 597
	/* restore path */
	git_buf_truncate(dir, dir_size);
598
	return result;
599 600
}

601
bool git_path_contains(git_buf *dir, const char *item)
602
{
603
	return _check_dir_contents(dir, item, &git_path_exists);
604 605
}

606
bool git_path_contains_dir(git_buf *base, const char *subdir)
607
{
608
	return _check_dir_contents(base, subdir, &git_path_isdir);
609 610
}

611
bool git_path_contains_file(git_buf *base, const char *file)
612
{
613
	return _check_dir_contents(base, file, &git_path_isfile);
614 615 616 617
}

int git_path_find_dir(git_buf *dir, const char *path, const char *base)
{
618
	int error = git_path_join_unrooted(dir, path, base, NULL);
619

620
	if (!error) {
621 622 623 624 625 626
		char buf[GIT_PATH_MAX];
		if (p_realpath(dir->ptr, buf) != NULL)
			error = git_buf_sets(dir, buf);
	}

	/* call dirname if this is not a directory */
627
	if (!error) /* && git_path_isdir(dir->ptr) == false) */
628
		error = (git_path_dirname_r(dir, dir->ptr) < 0) ? -1 : 0;
629

630
	if (!error)
631 632 633 634 635
		error = git_path_to_dir(dir);

	return error;
}

636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668
int git_path_resolve_relative(git_buf *path, size_t ceiling)
{
	char *base, *to, *from, *next;
	size_t len;

	if (!path || git_buf_oom(path))
		return -1;

	if (ceiling > path->size)
		ceiling = path->size;

	/* recognize drive prefixes, etc. that should not be backed over */
	if (ceiling == 0)
		ceiling = git_path_root(path->ptr) + 1;

	/* recognize URL prefixes that should not be backed over */
	if (ceiling == 0) {
		for (next = path->ptr; *next && git__isalpha(*next); ++next);
		if (next[0] == ':' && next[1] == '/' && next[2] == '/')
			ceiling = (next + 3) - path->ptr;
	}

	base = to = from = path->ptr + ceiling;

	while (*from) {
		for (next = from; *next && *next != '/'; ++next);

		len = next - from;

		if (len == 1 && from[0] == '.')
			/* do nothing with singleton dot */;

		else if (len == 2 && from[0] == '.' && from[1] == '.') {
669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695
			/* error out if trying to up one from a hard base */
			if (to == base && ceiling != 0) {
				giterr_set(GITERR_INVALID,
					"Cannot strip root component off url");
				return -1;
			}

			/* no more path segments to strip,
			 * use '../' as a new base path */
			if (to == base) {
				if (*next == '/')
					len++;

				if (to != from)
					memmove(to, from, len);

				to += len;
				/* this is now the base, can't back up from a
				 * relative prefix */
				base = to;
			} else {
				/* back up a path segment */
				while (to > base && to[-1] == '/') to--;
				while (to > base && to[-1] != '/') to--;
			}
		} else {
			if (*next == '/' && *from != '/')
696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721
				len++;

			if (to != from)
				memmove(to, from, len);

			to += len;
		}

		from += len;

		while (*from == '/') from++;
	}

	*to = '\0';

	path->size = to - path->ptr;

	return 0;
}

int git_path_apply_relative(git_buf *target, const char *relpath)
{
	git_buf_joinpath(target, git_buf_cstr(target), relpath);
	return git_path_resolve_relative(target, 0);
}

722 723
int git_path_cmp(
	const char *name1, size_t len1, int isdir1,
724 725
	const char *name2, size_t len2, int isdir2,
	int (*compare)(const char *, const char *, size_t))
726
{
727
	unsigned char c1, c2;
728
	size_t len = len1 < len2 ? len1 : len2;
729 730
	int cmp;

731
	cmp = compare(name1, name2, len);
732 733 734 735 736 737 738 739 740 741 742 743 744 745 746
	if (cmp)
		return cmp;

	c1 = name1[len];
	c2 = name2[len];

	if (c1 == '\0' && isdir1)
		c1 = '/';

	if (c2 == '\0' && isdir2)
		c2 = '/';

	return (c1 < c2) ? -1 : (c1 > c2) ? 1 : 0;
}

747
bool git_path_has_non_ascii(const char *path, size_t pathlen)
748 749 750 751 752 753 754 755 756 757
{
	const uint8_t *scan = (const uint8_t *)path, *end;

	for (end = scan + pathlen; scan < end; ++scan)
		if (*scan & 0x80)
			return true;

	return false;
}

758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776
#ifdef GIT_USE_ICONV

int git_path_iconv_init_precompose(git_path_iconv_t *ic)
{
	git_buf_init(&ic->buf, 0);
	ic->map = iconv_open(GIT_PATH_REPO_ENCODING, GIT_PATH_NATIVE_ENCODING);
	return 0;
}

void git_path_iconv_clear(git_path_iconv_t *ic)
{
	if (ic) {
		if (ic->map != (iconv_t)-1)
			iconv_close(ic->map);
		git_buf_free(&ic->buf);
	}
}

int git_path_iconv(git_path_iconv_t *ic, char **in, size_t *inlen)
777 778 779 780 781
{
	char *nfd = *in, *nfc;
	size_t nfdlen = *inlen, nfclen, wantlen = nfdlen, rv;
	int retry = 1;

782 783
	if (!ic || ic->map == (iconv_t)-1 ||
		!git_path_has_non_ascii(*in, *inlen))
784 785
		return 0;

Russell Belfer committed
786
	git_buf_clear(&ic->buf);
787

788
	while (1) {
789
		if (git_buf_grow(&ic->buf, wantlen + 1) < 0)
790 791
			return -1;

792 793
		nfc    = ic->buf.ptr   + ic->buf.size;
		nfclen = ic->buf.asize - ic->buf.size;
794

795
		rv = iconv(ic->map, &nfd, &nfdlen, &nfc, &nfclen);
796

797
		ic->buf.size = (nfc - ic->buf.ptr);
798 799 800 801

		if (rv != (size_t)-1)
			break;

802 803 804
		/* if we cannot convert the data (probably because iconv thinks
		 * it is not valid UTF-8 source data), then use original data
		 */
805
		if (errno != E2BIG)
806
			return 0;
807 808 809 810

		/* make space for 2x the remaining data to be converted
		 * (with per retry overhead to avoid infinite loops)
		 */
811
		wantlen = ic->buf.size + max(nfclen, nfdlen) * 2 + (size_t)(retry * 4);
812 813

		if (retry++ > 4)
814
			goto fail;
815 816
	}

817
	ic->buf.ptr[ic->buf.size] = '\0';
818

819 820
	*in    = ic->buf.ptr;
	*inlen = ic->buf.size;
821 822

	return 0;
823 824 825 826

fail:
	giterr_set(GITERR_OS, "Unable to convert unicode path data");
	return -1;
827
}
828

829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886
static const char *nfc_file = "\xC3\x85\x73\x74\x72\xC3\xB6\x6D.XXXXXX";
static const char *nfd_file = "\x41\xCC\x8A\x73\x74\x72\x6F\xCC\x88\x6D.XXXXXX";

/* Check if the platform is decomposing unicode data for us.  We will
 * emulate core Git and prefer to use precomposed unicode data internally
 * on these platforms, composing the decomposed unicode on the fly.
 *
 * This mainly happens on the Mac where HDFS stores filenames as
 * decomposed unicode.  Even on VFAT and SAMBA file systems, the Mac will
 * return decomposed unicode from readdir() even when the actual
 * filesystem is storing precomposed unicode.
 */
bool git_path_does_fs_decompose_unicode(const char *root)
{
	git_buf path = GIT_BUF_INIT;
	int fd;
	bool found_decomposed = false;
	char tmp[6];

	/* Create a file using a precomposed path and then try to find it
	 * using the decomposed name.  If the lookup fails, then we will mark
	 * that we should precompose unicode for this repository.
	 */
	if (git_buf_joinpath(&path, root, nfc_file) < 0 ||
		(fd = p_mkstemp(path.ptr)) < 0)
		goto done;
	p_close(fd);

	/* record trailing digits generated by mkstemp */
	memcpy(tmp, path.ptr + path.size - sizeof(tmp), sizeof(tmp));

	/* try to look up as NFD path */
	if (git_buf_joinpath(&path, root, nfd_file) < 0)
		goto done;
	memcpy(path.ptr + path.size - sizeof(tmp), tmp, sizeof(tmp));

	found_decomposed = git_path_exists(path.ptr);

	/* remove temporary file (using original precomposed path) */
	if (git_buf_joinpath(&path, root, nfc_file) < 0)
		goto done;
	memcpy(path.ptr + path.size - sizeof(tmp), tmp, sizeof(tmp));

	(void)p_unlink(path.ptr);

done:
	git_buf_free(&path);
	return found_decomposed;
}

#else

bool git_path_does_fs_decompose_unicode(const char *root)
{
	GIT_UNUSED(root);
	return false;
}

887 888 889 890 891 892 893 894
#endif

#if defined(__sun) || defined(__GNU__)
typedef char path_dirent_data[sizeof(struct dirent) + FILENAME_MAX + 1];
#else
typedef struct dirent path_dirent_data;
#endif

895 896
int git_path_direach(
	git_buf *path,
897
	uint32_t flags,
898 899 900
	int (*fn)(void *, git_buf *),
	void *arg)
{
901
	int error = 0;
902 903
	ssize_t wd_len;
	DIR *dir;
904 905
	path_dirent_data de_data;
	struct dirent *de, *de_buf = (struct dirent *)&de_data;
906 907 908 909

	(void)flags;

#ifdef GIT_USE_ICONV
910
	git_path_iconv_t ic = GIT_PATH_ICONV_INIT;
911
#endif
912

913
	if (git_path_to_dir(path) < 0)
914
		return -1;
915

nulltoken committed
916
	wd_len = git_buf_len(path);
917

918 919
	if ((dir = opendir(path->ptr)) == NULL) {
		giterr_set(GITERR_OS, "Failed to open directory '%s'", path->ptr);
920 921 922
		if (errno == ENOENT)
			return GIT_ENOTFOUND;

923 924
		return -1;
	}
925

926
#ifdef GIT_USE_ICONV
927
	if ((flags & GIT_PATH_DIR_PRECOMPOSE_UNICODE) != 0)
928
		(void)git_path_iconv_init_precompose(&ic);
929
#endif
930 931

	while (p_readdir_r(dir, de_buf, &de) == 0 && de != NULL) {
932 933
		char *de_path = de->d_name;
		size_t de_len = strlen(de_path);
934

935
		if (git_path_is_dot_or_dotdot(de_path))
936 937
			continue;

938 939 940 941
#ifdef GIT_USE_ICONV
		if ((error = git_path_iconv(&ic, &de_path, &de_len)) < 0)
			break;
#endif
942

943
		if ((error = git_buf_put(path, de_path, de_len)) < 0)
944 945
			break;

946
		error = fn(arg, path);
947 948 949

		git_buf_truncate(path, wd_len); /* restore path */

950
		if (error != 0) {
951
			giterr_set_after_callback(error);
952
			break;
953
		}
954 955 956
	}

	closedir(dir);
957 958

#ifdef GIT_USE_ICONV
959
	git_path_iconv_clear(&ic);
960
#endif
961 962

	return error;
963
}
964 965 966 967 968

int git_path_dirload(
	const char *path,
	size_t prefix_len,
	size_t alloc_extra,
969
	unsigned int flags,
970 971 972 973 974
	git_vector *contents)
{
	int error, need_slash;
	DIR *dir;
	size_t path_len;
975 976
	path_dirent_data de_data;
	struct dirent *de, *de_buf = (struct dirent *)&de_data;
977 978 979 980

	(void)flags;

#ifdef GIT_USE_ICONV
981
	git_path_iconv_t ic = GIT_PATH_ICONV_INIT;
982
#endif
983 984

	assert(path && contents);
985 986 987

	path_len = strlen(path);

988 989 990 991
	if (!path_len || path_len < prefix_len) {
		giterr_set(GITERR_INVALID, "Invalid directory path '%s'", path);
		return -1;
	}
992 993 994 995
	if ((dir = opendir(path)) == NULL) {
		giterr_set(GITERR_OS, "Failed to open directory '%s'", path);
		return -1;
	}
996

997
#ifdef GIT_USE_ICONV
998
	if ((flags & GIT_PATH_DIR_PRECOMPOSE_UNICODE) != 0)
999
		(void)git_path_iconv_init_precompose(&ic);
1000
#endif
1001

1002 1003 1004 1005
	path += prefix_len;
	path_len -= prefix_len;
	need_slash = (path_len > 0 && path[path_len-1] != '/') ? 1 : 0;

1006
	while ((error = p_readdir_r(dir, de_buf, &de)) == 0 && de != NULL) {
1007 1008
		char *entry_path, *de_path = de->d_name;
		size_t alloc_size, de_len = strlen(de_path);
1009

1010
		if (git_path_is_dot_or_dotdot(de_path))
1011 1012
			continue;

1013
#ifdef GIT_USE_ICONV
1014
		if ((error = git_path_iconv(&ic, &de_path, &de_len)) < 0)
1015
			break;
1016
#endif
1017

1018 1019 1020 1021
		alloc_size = path_len + need_slash + de_len + 1 + alloc_extra;
		if ((entry_path = git__calloc(alloc_size, 1)) == NULL) {
			error = -1;
			break;
1022 1023
		}

1024 1025 1026 1027
		if (path_len)
			memcpy(entry_path, path, path_len);
		if (need_slash)
			entry_path[path_len] = '/';
1028
		memcpy(&entry_path[path_len + need_slash], de_path, de_len);
1029

1030 1031
		if ((error = git_vector_insert(contents, entry_path)) < 0)
			break;
1032 1033 1034
	}

	closedir(dir);
1035 1036

#ifdef GIT_USE_ICONV
1037
	git_path_iconv_clear(&ic);
1038
#endif
1039

1040 1041
	if (error != 0)
		giterr_set(GITERR_OS, "Failed to process directory entry in '%s'", path);
1042

1043
	return error;
1044 1045
}

1046 1047 1048
int git_path_with_stat_cmp(const void *a, const void *b)
{
	const git_path_with_stat *psa = a, *psb = b;
1049 1050 1051 1052 1053 1054 1055
	return strcmp(psa->path, psb->path);
}

int git_path_with_stat_cmp_icase(const void *a, const void *b)
{
	const git_path_with_stat *psa = a, *psb = b;
	return strcasecmp(psa->path, psb->path);
1056 1057 1058 1059 1060
}

int git_path_dirload_with_stat(
	const char *path,
	size_t prefix_len,
1061
	unsigned int flags,
1062 1063
	const char *start_stat,
	const char *end_stat,
1064 1065 1066 1067 1068 1069
	git_vector *contents)
{
	int error;
	unsigned int i;
	git_path_with_stat *ps;
	git_buf full = GIT_BUF_INIT;
1070 1071 1072
	int (*strncomp)(const char *a, const char *b, size_t sz);
	size_t start_len = start_stat ? strlen(start_stat) : 0;
	size_t end_len = end_stat ? strlen(end_stat) : 0, cmp_len;
1073

1074 1075
	if (git_buf_set(&full, path, prefix_len) < 0)
		return -1;
1076

1077
	error = git_path_dirload(
1078
		path, prefix_len, sizeof(git_path_with_stat) + 1, flags, contents);
1079
	if (error < 0) {
1080 1081 1082 1083
		git_buf_free(&full);
		return error;
	}

1084
	strncomp = (flags & GIT_PATH_DIR_IGNORE_CASE) != 0 ?
1085
		git__strncasecmp : git__strncmp;
1086 1087

	/* stat struct at start of git_path_with_stat, so shift path text */
1088 1089 1090 1091
	git_vector_foreach(contents, i, ps) {
		size_t path_len = strlen((char *)ps);
		memmove(ps->path, ps, path_len + 1);
		ps->path_len = path_len;
1092 1093 1094 1095 1096 1097 1098 1099 1100 1101
	}

	git_vector_foreach(contents, i, ps) {
		/* skip if before start_stat or after end_stat */
		cmp_len = min(start_len, ps->path_len);
		if (cmp_len && strncomp(ps->path, start_stat, cmp_len) < 0)
			continue;
		cmp_len = min(end_len, ps->path_len);
		if (cmp_len && strncomp(ps->path, end_stat, cmp_len) > 0)
			continue;
1102

1103 1104
		git_buf_truncate(&full, prefix_len);

1105
		if ((error = git_buf_joinpath(&full, full.ptr, ps->path)) < 0 ||
1106 1107 1108 1109 1110 1111 1112 1113
			(error = git_path_lstat(full.ptr, &ps->st)) < 0) {
			if (error == GIT_ENOTFOUND) {
				giterr_clear();
				error = 0;
				git_vector_remove(contents, i--);
				continue;
			}

1114
			break;
1115
		}
1116

1117
		if (S_ISDIR(ps->st.st_mode)) {
1118 1119
			ps->path[ps->path_len++] = '/';
			ps->path[ps->path_len] = '\0';
1120 1121 1122
		}
	}

1123 1124 1125
	/* sort now that directory suffix is added */
	git_vector_sort(contents);

1126 1127
	git_buf_free(&full);

1128 1129
	return error;
}
1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147

int git_path_from_url_or_path(git_buf *local_path_out, const char *url_or_path)
{
	int error;

	/* If url_or_path begins with file:// treat it as a URL */
	if (!git__prefixcmp(url_or_path, "file://")) {
		if ((error = git_path_fromurl(local_path_out, url_or_path)) < 0) {
			return error;
		}
	} else { /* We assume url_or_path is already a path */
		if ((error = git_buf_sets(local_path_out, url_or_path)) < 0) {
			return error;
		}
	}

	return 0;
}