Commit 09361dfe by Edward Thomson

win32: use NT-prefixed "\\?\" paths

When turning UTF-8 paths into UCS-2 paths for Windows, always use
the \\?\-prefixed paths.  Because this bypasses the system's
path canonicalization, handle the canonicalization functions ourselves.

We must:
 1. always use a backslash as a directory separator
 2. only use a single backslash between directories
 3. not rely on the system to translate "." and ".." in paths
 4. remove trailing backslashes, except at the drive root (C:\)
parent b5ee184c
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
* a Linking Exception. For full terms see the included COPYING file. * a Linking Exception. For full terms see the included COPYING file.
*/ */
#include "path_w32.h"
#include "utf-conv.h" #include "utf-conv.h"
#include "path.h" #include "path.h"
#include "findfile.h" #include "findfile.h"
......
/*
* Copyright (C) the libgit2 contributors. All rights reserved.
*
* This file is part of libgit2, distributed under the GNU GPL v2 with
* a Linking Exception. For full terms see the included COPYING file.
*/
#include "common.h"
#include "path.h"
#include "path_w32.h"
#include "utf-conv.h"
#define PATH__NT_NAMESPACE L"\\\\?\\"
#define PATH__NT_NAMESPACE_LEN 4
#define PATH__ABSOLUTE_LEN 3
#define path__is_dirsep(p) ((p) == '/' || (p) == '\\')
#define path__is_absolute(p) \
(git__isalpha((p)[0]) && (p)[1] == ':' && ((p)[2] == '\\' || (p)[2] == '/'))
#define path__is_nt_namespace(p) \
(((p)[0] == '\\' && (p)[1] == '\\' && (p)[2] == '?' && (p)[3] == '\\') || \
((p)[0] == '/' && (p)[1] == '/' && (p)[2] == '?' && (p)[3] == '/'))
#define path__is_unc(p) \
(((p)[0] == '\\' && (p)[1] == '\\') || ((p)[0] == '/' && (p)[1] == '/'))
GIT_INLINE(int) path__cwd(wchar_t *path, int size)
{
int len;
if ((len = GetCurrentDirectoryW(size, path)) == 0) {
errno = GetLastError() == ERROR_ACCESS_DENIED ? EACCES : ENOENT;
return -1;
} else if (len > size) {
errno = ENAMETOOLONG;
return -1;
}
/* The Win32 APIs may return "\\?\" once you've used it first.
* But it may not. What a gloriously predictible API!
*/
if (wcsncmp(path, PATH__NT_NAMESPACE, PATH__NT_NAMESPACE_LEN))
return len;
len -= PATH__NT_NAMESPACE_LEN;
memmove(path, path + PATH__NT_NAMESPACE_LEN, sizeof(wchar_t) * len);
return len;
}
static wchar_t *path__skip_server(wchar_t *path)
{
wchar_t *c;
for (c = path; *c; c++) {
if (path__is_dirsep(*c))
return c + 1;
}
return c;
}
static wchar_t *path__skip_prefix(wchar_t *path)
{
if (path__is_nt_namespace(path)) {
path += PATH__NT_NAMESPACE_LEN;
if (wcsncmp(path, L"UNC\\", 4) == 0)
path = path__skip_server(path + 4);
else if (path__is_absolute(path))
path += PATH__ABSOLUTE_LEN;
} else if (path__is_absolute(path)) {
path += PATH__ABSOLUTE_LEN;
} else if (path__is_unc(path)) {
path = path__skip_server(path + 2);
}
return path;
}
int git_win32_path_canonicalize(git_win32_path path)
{
wchar_t *base, *from, *to, *next;
size_t len;
base = to = path__skip_prefix(path);
/* Unposixify if the prefix */
for (from = path; from < to; from++) {
if (*from == L'/')
*from = L'\\';
}
while (*from) {
for (next = from; *next; ++next) {
if (*next == L'/') {
*next = L'\\';
break;
}
if (*next == L'\\')
break;
}
len = next - from;
if (len == 1 && from[0] == L'.')
/* do nothing with singleton dot */;
else if (len == 2 && from[0] == L'.' && from[1] == L'.') {
if (to == base) {
/* no more path segments to strip, eat the "../" */
if (*next == L'\\')
len++;
base = to;
} else {
/* back up a path segment */
while (to > base && to[-1] == L'\\') to--;
while (to > base && to[-1] != L'\\') to--;
}
} else {
if (*next == L'\\' && *from != L'\\')
len++;
if (to != from)
memmove(to, from, sizeof(wchar_t) * len);
to += len;
}
from += len;
while (*from == L'\\') from++;
}
/* Strip trailing backslashes */
while (to > base && to[-1] == L'\\') to--;
*to = L'\0';
return (to - path);
}
int git_win32_path__cwd(wchar_t *out, size_t len)
{
int cwd_len;
if ((cwd_len = path__cwd(out, len)) < 0)
return -1;
/* UNC paths */
if (wcsncmp(L"\\\\", out, 2) == 0) {
/* Our buffer must be at least 5 characters larger than the
* current working directory: we swallow one of the leading
* '\'s, but we we add a 'UNC' specifier to the path, plus
* a trailing directory separator, plus a NUL.
*/
if (cwd_len > MAX_PATH - 4) {
errno = ENAMETOOLONG;
return -1;
}
memmove(out+2, out, sizeof(wchar_t) * cwd_len);
out[0] = L'U';
out[1] = L'N';
out[2] = L'C';
cwd_len += 2;
}
/* Our buffer must be at least 2 characters larger than the current
* working directory. (One character for the directory separator,
* one for the null.
*/
else if (cwd_len > MAX_PATH - 2) {
errno = ENAMETOOLONG;
return -1;
}
return cwd_len;
}
int git_win32_path_from_utf8(git_win32_path out, const char *src)
{
wchar_t *dest = out;
/* All win32 paths are in NT-prefixed format, beginning with "\\?\". */
memcpy(dest, PATH__NT_NAMESPACE, sizeof(wchar_t) * PATH__NT_NAMESPACE_LEN);
dest += PATH__NT_NAMESPACE_LEN;
/* See if this is an absolute path (beginning with a drive letter) */
if (path__is_absolute(src)) {
if (git__utf8_to_16(dest, MAX_PATH, src) < 0)
return -1;
}
/* File-prefixed NT-style paths beginning with \\?\ */
else if (path__is_nt_namespace(src)) {
/* Skip the NT prefix, the destination already contains it */
if (git__utf8_to_16(dest, MAX_PATH, src + PATH__NT_NAMESPACE_LEN) < 0)
return -1;
}
/* UNC paths */
else if (path__is_unc(src)) {
memcpy(dest, L"UNC\\", sizeof(wchar_t) * 4);
dest += 4;
/* Skip the leading "\\" */
if (git__utf8_to_16(dest, MAX_PATH - 2, src + 2) < 0)
return -1;
}
/* Absolute paths omitting the drive letter */
else if (src[0] == '\\' || src[0] == '/') {
if (path__cwd(dest, MAX_PATH) < 0)
return -1;
if (!path__is_absolute(dest)) {
errno = ENOENT;
return -1;
}
/* Skip the drive letter specification ("C:") */
if (git__utf8_to_16(dest + 2, MAX_PATH - 2, src) < 0)
return -1;
}
/* Relative paths */
else {
int cwd_len;
if ((cwd_len = git_win32_path__cwd(dest, MAX_PATH)) < 0)
return -1;
dest[cwd_len++] = L'\\';
if (git__utf8_to_16(dest + cwd_len, MAX_PATH - cwd_len, src) < 0)
return -1;
}
return git_win32_path_canonicalize(out);
}
int git_win32_path_to_utf8(git_win32_utf8_path dest, const wchar_t *src)
{
char *out = dest;
int len;
/* Strip NT namespacing "\\?\" */
if (path__is_nt_namespace(src)) {
src += 4;
/* "\\?\UNC\server\share" -> "\\server\share" */
if (wcsncmp(src, L"UNC\\", 4) == 0) {
src += 4;
memcpy(dest, "\\\\", 2);
out = dest + 2;
}
}
if ((len = git__utf16_to_8(out, GIT_WIN_PATH_UTF8, src)) < 0)
return len;
git_path_mkposix(dest);
return len;
}
/*
* Copyright (C) the libgit2 contributors. All rights reserved.
*
* This file is part of libgit2, distributed under the GNU GPL v2 with
* a Linking Exception. For full terms see the included COPYING file.
*/
#ifndef INCLUDE_git_path_w32_h__
#define INCLUDE_git_path_w32_h__
/*
* Provides a large enough buffer to support Windows paths: MAX_PATH is
* 260, corresponding to a maximum path length of 259 characters plus a
* NULL terminator. Prefixing with "\\?\" adds 4 characters, but if the
* original was a UNC path, then we turn "\\server\share" into
* "\\?\UNC\server\share". So we replace the first two characters with
* 8 characters, a net gain of 6, so the maximum length is MAX_PATH+6.
*/
#define GIT_WIN_PATH_UTF16 MAX_PATH+6
/* Maximum size of a UTF-8 Win32 path. We remove the "\\?\" or "\\?\UNC\"
* prefixes for presentation, bringing us back to 259 (non-NULL)
* characters. UTF-8 does have 4-byte sequences, but they are encoded in
* UTF-16 using surrogate pairs, which takes up the space of two characters.
* Two characters in the range U+0800 -> U+FFFF take up more space in UTF-8
* (6 bytes) than one surrogate pair (4 bytes).
*/
#define GIT_WIN_PATH_UTF8 (259 * 3 + 1)
/* Win32 path types */
typedef wchar_t git_win32_path[GIT_WIN_PATH_UTF16];
typedef char git_win32_utf8_path[GIT_WIN_PATH_UTF8];
/**
* Create a Win32 path (in UCS-2 format) from a UTF-8 string.
*
* @param dest The buffer to receive the wide string.
* @param src The UTF-8 string to convert.
* @return The length of the wide string, in characters (not counting the NULL terminator), or < 0 for failure
*/
extern int git_win32_path_from_utf8(git_win32_path dest, const char *src);
/**
* Canonicalize a Win32 UCS-2 path so that it is suitable for delivery to the
* Win32 APIs: remove multiple directory separators, squashing to a single one,
* strip trailing directory separators, ensure directory separators are all
* canonical (always backslashes, never forward slashes) and process any
* directory entries of '.' or '..'.
*
* This processes the buffer in place.
*
* @param path The buffer to process
* @return The new length of the buffer, in wchar_t's (not counting the NULL terminator)
*/
extern int git_win32_path_canonicalize(git_win32_path path);
/**
* Create an internal format (posix-style) UTF-8 path from a Win32 UCS-2 path.
*
* @param dest The buffer to receive the UTF-8 string.
* @param src The wide string to convert.
* @return The length of the UTF-8 string, in bytes (not counting the NULL terminator), or < 0 for failure
*/
extern int git_win32_path_to_utf8(git_win32_utf8_path dest, const wchar_t *src);
#endif
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
#include "common.h" #include "common.h"
#include "../posix.h" #include "../posix.h"
#include "path_w32.h"
#include "utf-conv.h" #include "utf-conv.h"
#include "dir.h" #include "dir.h"
......
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
#include "../posix.h" #include "../posix.h"
#include "../fileops.h" #include "../fileops.h"
#include "path.h" #include "path.h"
#include "path_w32.h"
#include "utf-conv.h" #include "utf-conv.h"
#include "repository.h" #include "repository.h"
#include "reparse.h" #include "reparse.h"
...@@ -31,29 +32,13 @@ ...@@ -31,29 +32,13 @@
/* GetFinalPathNameByHandleW signature */ /* GetFinalPathNameByHandleW signature */
typedef DWORD(WINAPI *PFGetFinalPathNameByHandleW)(HANDLE, LPWSTR, DWORD, DWORD); typedef DWORD(WINAPI *PFGetFinalPathNameByHandleW)(HANDLE, LPWSTR, DWORD, DWORD);
/* Helper function which converts UTF-8 paths to UTF-16.
* On failure, errno is set. */
static int utf8_to_16_with_errno(git_win32_path dest, const char *src)
{
int len = git_win32_path_from_utf8(dest, src);
if (len < 0) {
if (GetLastError() == ERROR_INSUFFICIENT_BUFFER)
errno = ENAMETOOLONG;
else
errno = EINVAL; /* Bad code point, presumably */
}
return len;
}
int p_mkdir(const char *path, mode_t mode) int p_mkdir(const char *path, mode_t mode)
{ {
git_win32_path buf; git_win32_path buf;
GIT_UNUSED(mode); GIT_UNUSED(mode);
if (utf8_to_16_with_errno(buf, path) < 0) if (git_win32_path_from_utf8(buf, path) < 0)
return -1; return -1;
return _wmkdir(buf); return _wmkdir(buf);
...@@ -64,7 +49,7 @@ int p_unlink(const char *path) ...@@ -64,7 +49,7 @@ int p_unlink(const char *path)
git_win32_path buf; git_win32_path buf;
int error; int error;
if (utf8_to_16_with_errno(buf, path) < 0) if (git_win32_path_from_utf8(buf, path) < 0)
return -1; return -1;
error = _wunlink(buf); error = _wunlink(buf);
...@@ -271,7 +256,7 @@ static int do_lstat(const char *path, struct stat *buf, bool posixly_correct) ...@@ -271,7 +256,7 @@ static int do_lstat(const char *path, struct stat *buf, bool posixly_correct)
git_win32_path path_w; git_win32_path path_w;
int len; int len;
if ((len = utf8_to_16_with_errno(path_w, path)) < 0) if ((len = git_win32_path_from_utf8(path_w, path)) < 0)
return -1; return -1;
git_win32__path_trim_end(path_w, len); git_win32__path_trim_end(path_w, len);
...@@ -302,7 +287,7 @@ int p_readlink(const char *path, char *buf, size_t bufsiz) ...@@ -302,7 +287,7 @@ int p_readlink(const char *path, char *buf, size_t bufsiz)
* could occur in the middle of the encoding of a code point, * could occur in the middle of the encoding of a code point,
* we need to buffer the result on the stack. */ * we need to buffer the result on the stack. */
if (utf8_to_16_with_errno(path_w, path) < 0 || if (git_win32_path_from_utf8(path_w, path) < 0 ||
readlink_w(target_w, path_w) < 0 || readlink_w(target_w, path_w) < 0 ||
(len = git_win32_path_to_utf8(target, target_w)) < 0) (len = git_win32_path_to_utf8(target, target_w)) < 0)
return -1; return -1;
...@@ -326,7 +311,7 @@ int p_open(const char *path, int flags, ...) ...@@ -326,7 +311,7 @@ int p_open(const char *path, int flags, ...)
git_win32_path buf; git_win32_path buf;
mode_t mode = 0; mode_t mode = 0;
if (utf8_to_16_with_errno(buf, path) < 0) if (git_win32_path_from_utf8(buf, path) < 0)
return -1; return -1;
if (flags & O_CREAT) { if (flags & O_CREAT) {
...@@ -344,7 +329,7 @@ int p_creat(const char *path, mode_t mode) ...@@ -344,7 +329,7 @@ int p_creat(const char *path, mode_t mode)
{ {
git_win32_path buf; git_win32_path buf;
if (utf8_to_16_with_errno(buf, path) < 0) if (git_win32_path_from_utf8(buf, path) < 0)
return -1; return -1;
return _wopen(buf, _O_WRONLY | _O_CREAT | _O_TRUNC | STANDARD_OPEN_FLAGS, mode); return _wopen(buf, _O_WRONLY | _O_CREAT | _O_TRUNC | STANDARD_OPEN_FLAGS, mode);
...@@ -442,7 +427,7 @@ int p_stat(const char* path, struct stat* buf) ...@@ -442,7 +427,7 @@ int p_stat(const char* path, struct stat* buf)
git_win32_path path_w; git_win32_path path_w;
int len; int len;
if ((len = utf8_to_16_with_errno(path_w, path)) < 0) if ((len = git_win32_path_from_utf8(path_w, path)) < 0)
return -1; return -1;
git_win32__path_trim_end(path_w, len); git_win32__path_trim_end(path_w, len);
...@@ -462,7 +447,7 @@ int p_chdir(const char* path) ...@@ -462,7 +447,7 @@ int p_chdir(const char* path)
{ {
git_win32_path buf; git_win32_path buf;
if (utf8_to_16_with_errno(buf, path) < 0) if (git_win32_path_from_utf8(buf, path) < 0)
return -1; return -1;
return _wchdir(buf); return _wchdir(buf);
...@@ -472,7 +457,7 @@ int p_chmod(const char* path, mode_t mode) ...@@ -472,7 +457,7 @@ int p_chmod(const char* path, mode_t mode)
{ {
git_win32_path buf; git_win32_path buf;
if (utf8_to_16_with_errno(buf, path) < 0) if (git_win32_path_from_utf8(buf, path) < 0)
return -1; return -1;
return _wchmod(buf, mode); return _wchmod(buf, mode);
...@@ -483,7 +468,7 @@ int p_rmdir(const char* path) ...@@ -483,7 +468,7 @@ int p_rmdir(const char* path)
git_win32_path buf; git_win32_path buf;
int error; int error;
if (utf8_to_16_with_errno(buf, path) < 0) if (git_win32_path_from_utf8(buf, path) < 0)
return -1; return -1;
error = _wrmdir(buf); error = _wrmdir(buf);
...@@ -512,7 +497,7 @@ char *p_realpath(const char *orig_path, char *buffer) ...@@ -512,7 +497,7 @@ char *p_realpath(const char *orig_path, char *buffer)
{ {
git_win32_path orig_path_w, buffer_w; git_win32_path orig_path_w, buffer_w;
if (utf8_to_16_with_errno(orig_path_w, orig_path) < 0) if (git_win32_path_from_utf8(orig_path_w, orig_path) < 0)
return NULL; return NULL;
/* Note that if the path provided is a relative path, then the current directory /* Note that if the path provided is a relative path, then the current directory
...@@ -533,20 +518,17 @@ char *p_realpath(const char *orig_path, char *buffer) ...@@ -533,20 +518,17 @@ char *p_realpath(const char *orig_path, char *buffer)
return NULL; return NULL;
} }
/* Convert the path to UTF-8. */ if (!buffer && !(buffer = git__malloc(GIT_WIN_PATH_UTF8))) {
if (buffer) { errno = ENOMEM;
/* If the caller provided a buffer, then it is assumed to be GIT_WIN_PATH_UTF8 return NULL;
* characters in size. If it isn't, then we may overflow. */
if (git__utf16_to_8(buffer, GIT_WIN_PATH_UTF8, buffer_w) < 0)
return NULL;
} else {
/* If the caller did not provide a buffer, then we allocate one for the caller
* from the heap. */
if (git__utf16_to_8_alloc(&buffer, buffer_w) < 0)
return NULL;
} }
/* Convert backslashes to forward slashes */ /* Convert the path to UTF-8. If the caller provided a buffer, then it
* is assumed to be GIT_WIN_PATH_UTF8 characters in size. If it isn't,
* then we may overflow. */
if (git_win32_path_to_utf8(buffer, buffer_w) < 0)
return NULL;
git_path_mkposix(buffer); git_path_mkposix(buffer);
return buffer; return buffer;
...@@ -579,6 +561,7 @@ int p_snprintf(char *buffer, size_t count, const char *format, ...) ...@@ -579,6 +561,7 @@ int p_snprintf(char *buffer, size_t count, const char *format, ...)
return r; return r;
} }
/* TODO: wut? */
int p_mkstemp(char *tmp_path) int p_mkstemp(char *tmp_path)
{ {
#if defined(_MSC_VER) #if defined(_MSC_VER)
...@@ -596,7 +579,7 @@ int p_access(const char* path, mode_t mode) ...@@ -596,7 +579,7 @@ int p_access(const char* path, mode_t mode)
{ {
git_win32_path buf; git_win32_path buf;
if (utf8_to_16_with_errno(buf, path) < 0) if (git_win32_path_from_utf8(buf, path) < 0)
return -1; return -1;
return _waccess(buf, mode); return _waccess(buf, mode);
...@@ -610,8 +593,8 @@ int p_rename(const char *from, const char *to) ...@@ -610,8 +593,8 @@ int p_rename(const char *from, const char *to)
int rename_succeeded; int rename_succeeded;
int error; int error;
if (utf8_to_16_with_errno(wfrom, from) < 0 || if (git_win32_path_from_utf8(wfrom, from) < 0 ||
utf8_to_16_with_errno(wto, to) < 0) git_win32_path_from_utf8(wto, to) < 0)
return -1; return -1;
/* wait up to 50ms if file is locked by another thread or process */ /* wait up to 50ms if file is locked by another thread or process */
......
...@@ -26,6 +26,14 @@ GIT_INLINE(DWORD) get_wc_flags(void) ...@@ -26,6 +26,14 @@ GIT_INLINE(DWORD) get_wc_flags(void)
return flags; return flags;
} }
GIT_INLINE(void) git__set_errno(void)
{
if (GetLastError() == ERROR_INSUFFICIENT_BUFFER)
errno = ENAMETOOLONG;
else
errno = EINVAL;
}
/** /**
* Converts a UTF-8 string to wide characters. * Converts a UTF-8 string to wide characters.
* *
...@@ -36,10 +44,15 @@ GIT_INLINE(DWORD) get_wc_flags(void) ...@@ -36,10 +44,15 @@ GIT_INLINE(DWORD) get_wc_flags(void)
*/ */
int git__utf8_to_16(wchar_t *dest, size_t dest_size, const char *src) int git__utf8_to_16(wchar_t *dest, size_t dest_size, const char *src)
{ {
int len;
/* Length of -1 indicates NULL termination of the input string. Subtract 1 from the result to /* Length of -1 indicates NULL termination of the input string. Subtract 1 from the result to
* turn 0 into -1 (an error code) and to not count the NULL terminator as part of the string's * turn 0 into -1 (an error code) and to not count the NULL terminator as part of the string's
* length. MultiByteToWideChar never returns int's minvalue, so underflow is not possible */ * length. MultiByteToWideChar never returns int's minvalue, so underflow is not possible */
return MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, src, -1, dest, (int)dest_size) - 1; if ((len = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, src, -1, dest, (int)dest_size) - 1) < 0)
git__set_errno();
return len;
} }
/** /**
...@@ -52,10 +65,15 @@ int git__utf8_to_16(wchar_t *dest, size_t dest_size, const char *src) ...@@ -52,10 +65,15 @@ int git__utf8_to_16(wchar_t *dest, size_t dest_size, const char *src)
*/ */
int git__utf16_to_8(char *dest, size_t dest_size, const wchar_t *src) int git__utf16_to_8(char *dest, size_t dest_size, const wchar_t *src)
{ {
int len;
/* Length of -1 indicates NULL termination of the input string. Subtract 1 from the result to /* Length of -1 indicates NULL termination of the input string. Subtract 1 from the result to
* turn 0 into -1 (an error code) and to not count the NULL terminator as part of the string's * turn 0 into -1 (an error code) and to not count the NULL terminator as part of the string's
* length. WideCharToMultiByte never returns int's minvalue, so underflow is not possible */ * length. WideCharToMultiByte never returns int's minvalue, so underflow is not possible */
return WideCharToMultiByte(CP_UTF8, get_wc_flags(), src, -1, dest, (int)dest_size, NULL, NULL) - 1; if ((len = WideCharToMultiByte(CP_UTF8, get_wc_flags(), src, -1, dest, (int)dest_size, NULL, NULL) - 1) < 0)
git__set_errno();
return len;
} }
/** /**
...@@ -76,17 +94,23 @@ int git__utf8_to_16_alloc(wchar_t **dest, const char *src) ...@@ -76,17 +94,23 @@ int git__utf8_to_16_alloc(wchar_t **dest, const char *src)
/* Length of -1 indicates NULL termination of the input string */ /* Length of -1 indicates NULL termination of the input string */
utf16_size = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, src, -1, NULL, 0); utf16_size = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, src, -1, NULL, 0);
if (!utf16_size) if (!utf16_size) {
git__set_errno();
return -1; return -1;
}
*dest = git__malloc(utf16_size * sizeof(wchar_t)); *dest = git__malloc(utf16_size * sizeof(wchar_t));
if (!*dest) if (!*dest) {
errno = ENOMEM;
return -1; return -1;
}
utf16_size = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, src, -1, *dest, utf16_size); utf16_size = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, src, -1, *dest, utf16_size);
if (!utf16_size) { if (!utf16_size) {
git__set_errno();
git__free(*dest); git__free(*dest);
*dest = NULL; *dest = NULL;
} }
...@@ -116,17 +140,23 @@ int git__utf16_to_8_alloc(char **dest, const wchar_t *src) ...@@ -116,17 +140,23 @@ int git__utf16_to_8_alloc(char **dest, const wchar_t *src)
/* Length of -1 indicates NULL termination of the input string */ /* Length of -1 indicates NULL termination of the input string */
utf8_size = WideCharToMultiByte(CP_UTF8, dwFlags, src, -1, NULL, 0, NULL, NULL); utf8_size = WideCharToMultiByte(CP_UTF8, dwFlags, src, -1, NULL, 0, NULL, NULL);
if (!utf8_size) if (!utf8_size) {
git__set_errno();
return -1; return -1;
}
*dest = git__malloc(utf8_size); *dest = git__malloc(utf8_size);
if (!*dest) if (!*dest) {
errno = ENOMEM;
return -1; return -1;
}
utf8_size = WideCharToMultiByte(CP_UTF8, dwFlags, src, -1, *dest, utf8_size, NULL, NULL); utf8_size = WideCharToMultiByte(CP_UTF8, dwFlags, src, -1, *dest, utf8_size, NULL, NULL);
if (!utf8_size) { if (!utf8_size) {
git__set_errno();
git__free(*dest); git__free(*dest);
*dest = NULL; *dest = NULL;
} }
......
...@@ -10,21 +10,6 @@ ...@@ -10,21 +10,6 @@
#include <wchar.h> #include <wchar.h>
#include "common.h" #include "common.h"
/* Equal to the Win32 MAX_PATH constant. The maximum path length is 259
* characters plus a NULL terminator. */
#define GIT_WIN_PATH_UTF16 260
/* Maximum size of a UTF-8 Win32 path. UTF-8 does have 4-byte sequences,
* but they are encoded in UTF-16 using surrogate pairs, which takes up
* the space of two characters. Two characters in the range U+0800 ->
* U+FFFF take up more space in UTF-8 (6 bytes) than one surrogate pair
* (4 bytes). */
#define GIT_WIN_PATH_UTF8 (259 * 3 + 1)
/* Win32 path types */
typedef wchar_t git_win32_path[GIT_WIN_PATH_UTF16];
typedef char git_win32_utf8_path[GIT_WIN_PATH_UTF8];
/** /**
* Converts a UTF-8 string to wide characters. * Converts a UTF-8 string to wide characters.
* *
...@@ -67,28 +52,4 @@ int git__utf8_to_16_alloc(wchar_t **dest, const char *src); ...@@ -67,28 +52,4 @@ int git__utf8_to_16_alloc(wchar_t **dest, const char *src);
*/ */
int git__utf16_to_8_alloc(char **dest, const wchar_t *src); int git__utf16_to_8_alloc(char **dest, const wchar_t *src);
/**
* Converts a UTF-8 Win32 path to wide characters.
*
* @param dest The buffer to receive the wide string.
* @param src The UTF-8 string to convert.
* @return The length of the wide string, in characters (not counting the NULL terminator), or < 0 for failure
*/
GIT_INLINE(int) git_win32_path_from_utf8(git_win32_path dest, const char *src)
{
return git__utf8_to_16(dest, GIT_WIN_PATH_UTF16, src);
}
/**
* Converts a wide Win32 path to UTF-8.
*
* @param dest The buffer to receive the UTF-8 string.
* @param src The wide string to convert.
* @return The length of the UTF-8 string, in bytes (not counting the NULL terminator), or < 0 for failure
*/
GIT_INLINE(int) git_win32_path_to_utf8(git_win32_utf8_path dest, const wchar_t *src)
{
return git__utf16_to_8(dest, GIT_WIN_PATH_UTF8, src);
}
#endif #endif
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
#define INCLUDE_w32_util_h__ #define INCLUDE_w32_util_h__
#include "utf-conv.h" #include "utf-conv.h"
#include "path_w32.h"
GIT_INLINE(bool) git_win32__isalpha(wchar_t c) GIT_INLINE(bool) git_win32__isalpha(wchar_t c)
{ {
......
...@@ -196,19 +196,6 @@ static void do_custom_reparse(const char *path) ...@@ -196,19 +196,6 @@ static void do_custom_reparse(const char *path)
#endif #endif
git_buf *unslashify(git_buf *buf)
{
#ifdef GIT_WIN32
size_t i;
for (i = 0; i < buf->size; i++)
if (buf->ptr[i] == '/')
buf->ptr[i] = '\\';
#endif
return buf;
}
void test_core_link__stat_regular_file(void) void test_core_link__stat_regular_file(void)
{ {
struct stat st; struct stat st;
...@@ -547,7 +534,7 @@ void test_core_link__readlink_symlink(void) ...@@ -547,7 +534,7 @@ void test_core_link__readlink_symlink(void)
buf[len] = 0; buf[len] = 0;
cl_assert_equal_s(git_buf_cstr(unslashify(&target_path)), buf); cl_assert_equal_s(git_buf_cstr(&target_path), buf);
git_buf_free(&target_path); git_buf_free(&target_path);
} }
...@@ -567,7 +554,7 @@ void test_core_link__readlink_dangling(void) ...@@ -567,7 +554,7 @@ void test_core_link__readlink_dangling(void)
buf[len] = 0; buf[len] = 0;
cl_assert_equal_s(git_buf_cstr(unslashify(&target_path)), buf); cl_assert_equal_s(git_buf_cstr(&target_path), buf);
git_buf_free(&target_path); git_buf_free(&target_path);
} }
...@@ -593,7 +580,7 @@ void test_core_link__readlink_multiple(void) ...@@ -593,7 +580,7 @@ void test_core_link__readlink_multiple(void)
buf[len] = 0; buf[len] = 0;
cl_assert_equal_s(git_buf_cstr(unslashify(&path2)), buf); cl_assert_equal_s(git_buf_cstr(&path2), buf);
git_buf_free(&path1); git_buf_free(&path1);
git_buf_free(&path2); git_buf_free(&path2);
......
#include "clar_libgit2.h"
#include "path.h"
#ifdef GIT_WIN32
#include "win32/path_w32.h"
#endif
void test_utf8_to_utf16(const char *utf8_in, const wchar_t *utf16_expected)
{
#ifdef GIT_WIN32
git_win32_path path_utf16;
int path_utf16len;
cl_assert((path_utf16len = git_win32_path_from_utf8(path_utf16, utf8_in)) >= 0);
cl_assert_equal_wcs(utf16_expected, path_utf16);
cl_assert_equal_i(wcslen(utf16_expected), path_utf16len);
#else
GIT_UNUSED(utf8_in);
GIT_UNUSED(utf16_expected);
#endif
}
void test_path_win32__utf8_to_utf16(void)
{
#ifdef GIT_WIN32
test_utf8_to_utf16("C:\\", L"\\\\?\\C:\\");
test_utf8_to_utf16("c:\\", L"\\\\?\\c:\\");
test_utf8_to_utf16("C:/", L"\\\\?\\C:\\");
test_utf8_to_utf16("c:/", L"\\\\?\\c:\\");
#endif
}
void test_path_win32__removes_trailing_slash(void)
{
#ifdef GIT_WIN32
test_utf8_to_utf16("C:\\Foo\\", L"\\\\?\\C:\\Foo");
test_utf8_to_utf16("C:\\Foo\\\\", L"\\\\?\\C:\\Foo");
test_utf8_to_utf16("C:\\Foo\\\\", L"\\\\?\\C:\\Foo");
test_utf8_to_utf16("C:/Foo/", L"\\\\?\\C:\\Foo");
test_utf8_to_utf16("C:/Foo///", L"\\\\?\\C:\\Foo");
#endif
}
void test_path_win32__squashes_multiple_slashes(void)
{
#ifdef GIT_WIN32
test_utf8_to_utf16("C:\\\\Foo\\Bar\\\\Foobar", L"\\\\?\\C:\\Foo\\Bar\\Foobar");
test_utf8_to_utf16("C://Foo/Bar///Foobar", L"\\\\?\\C:\\Foo\\Bar\\Foobar");
#endif
}
void test_path_win32__unc(void)
{
#ifdef GIT_WIN32
test_utf8_to_utf16("\\\\server\\c$\\unc\\path", L"\\\\?\\UNC\\server\\c$\\unc\\path");
test_utf8_to_utf16("//server/git/style/unc/path", L"\\\\?\\UNC\\server\\git\\style\\unc\\path");
#endif
}
void test_path_win32__honors_max_path(void)
{
#ifdef GIT_WIN32
git_win32_path path_utf16;
test_utf8_to_utf16("C:\\This path is 259 chars and is the max length in windows\\0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij",
L"\\\\?\\C:\\This path is 259 chars and is the max length in windows\\0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij");
test_utf8_to_utf16("\\\\unc\\paths may also be 259 characters including the server\\123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij",
L"\\\\?\\UNC\\unc\\paths may also be 259 characters including the server\\123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij");
cl_check_fail(git_win32_path_from_utf8(path_utf16, "C:\\This path is 260 chars and is sadly too long for windows\\0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij"));
cl_check_fail(git_win32_path_from_utf8(path_utf16, "\\\\unc\\paths are also bound by 260 character restrictions\\including the server name portion\\bcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij"));
#endif
}
void test_path_win32__dot_and_dotdot(void)
{
#ifdef GIT_WIN32
test_utf8_to_utf16("C:\\Foo\\..\\Foobar", L"\\\\?\\C:\\Foobar");
test_utf8_to_utf16("C:\\Foo\\Bar\\..\\Foobar", L"\\\\?\\C:\\Foo\\Foobar");
test_utf8_to_utf16("C:\\Foo\\Bar\\..\\Foobar\\..", L"\\\\?\\C:\\Foo");
test_utf8_to_utf16("C:\\Foobar\\..", L"\\\\?\\C:\\");
test_utf8_to_utf16("C:/Foo/Bar/../Foobar", L"\\\\?\\C:\\Foo\\Foobar");
test_utf8_to_utf16("C:/Foo/Bar/../Foobar/../Asdf/", L"\\\\?\\C:\\Foo\\Asdf");
test_utf8_to_utf16("C:/Foo/Bar/../Foobar/..", L"\\\\?\\C:\\Foo");
test_utf8_to_utf16("C:/Foo/..", L"\\\\?\\C:\\");
test_utf8_to_utf16("C:\\Foo\\Bar\\.\\Foobar", L"\\\\?\\C:\\Foo\\Bar\\Foobar");
test_utf8_to_utf16("C:\\.\\Foo\\.\\Bar\\.\\Foobar\\.\\", L"\\\\?\\C:\\Foo\\Bar\\Foobar");
test_utf8_to_utf16("C:/Foo/Bar/./Foobar", L"\\\\?\\C:\\Foo\\Bar\\Foobar");
test_utf8_to_utf16("C:/Foo/../Bar/./Foobar/../", L"\\\\?\\C:\\Bar");
test_utf8_to_utf16("C:\\Foo\\..\\..\\Bar", L"\\\\?\\C:\\Bar");
#endif
}
void test_path_win32__absolute_from_no_drive_letter(void)
{
#ifdef GIT_WIN32
test_utf8_to_utf16("\\Foo", L"\\\\?\\C:\\Foo");
test_utf8_to_utf16("\\Foo\\Bar", L"\\\\?\\C:\\Foo\\Bar");
test_utf8_to_utf16("/Foo/Bar", L"\\\\?\\C:\\Foo\\Bar");
#endif
}
void test_path_win32__absolute_from_relative(void)
{
#ifdef GIT_WIN32
char cwd_backup[MAX_PATH];
cl_must_pass(p_getcwd(cwd_backup, MAX_PATH));
cl_must_pass(p_chdir("C:/"));
test_utf8_to_utf16("Foo", L"\\\\?\\C:\\Foo");
test_utf8_to_utf16("..\\..\\Foo", L"\\\\?\\C:\\Foo");
test_utf8_to_utf16("Foo\\..", L"\\\\?\\C:\\");
test_utf8_to_utf16("Foo\\..\\..", L"\\\\?\\C:\\");
test_utf8_to_utf16("", L"\\\\?\\C:\\");
cl_must_pass(p_chdir("C:/Windows"));
test_utf8_to_utf16("Foo", L"\\\\?\\C:\\Windows\\Foo");
test_utf8_to_utf16("Foo\\Bar", L"\\\\?\\C:\\Windows\\Foo\\Bar");
test_utf8_to_utf16("..\\Foo", L"\\\\?\\C:\\Foo");
test_utf8_to_utf16("Foo\\..\\Bar", L"\\\\?\\C:\\Windows\\Bar");
test_utf8_to_utf16("", L"\\\\?\\C:\\Windows");
cl_must_pass(p_chdir(cwd_backup));
#endif
}
void test_canonicalize(const wchar_t *in, const wchar_t *expected)
{
#ifdef GIT_WIN32
git_win32_path canonical;
cl_assert(wcslen(in) < MAX_PATH);
wcscpy(canonical, in);
cl_must_pass(git_win32_path_canonicalize(canonical));
cl_assert_equal_wcs(expected, canonical);
#else
GIT_UNUSED(in);
GIT_UNUSED(expected);
#endif
}
void test_path_win32__canonicalize(void)
{
#ifdef GIT_WIN32
test_canonicalize(L"C:\\Foo\\Bar", L"C:\\Foo\\Bar");
test_canonicalize(L"C:\\Foo\\", L"C:\\Foo");
test_canonicalize(L"C:\\Foo\\\\", L"C:\\Foo");
test_canonicalize(L"C:\\Foo\\..\\Bar", L"C:\\Bar");
test_canonicalize(L"C:\\Foo\\..\\..\\Bar", L"C:\\Bar");
test_canonicalize(L"C:\\Foo\\..\\..\\..\\..\\", L"C:\\");
test_canonicalize(L"C:/Foo/Bar", L"C:\\Foo\\Bar");
test_canonicalize(L"C:/", L"C:\\");
test_canonicalize(L"Foo\\\\Bar\\\\Asdf\\\\", L"Foo\\Bar\\Asdf");
test_canonicalize(L"Foo\\\\Bar\\\\..\\\\Asdf\\", L"Foo\\Asdf");
test_canonicalize(L"Foo\\\\Bar\\\\.\\\\Asdf\\", L"Foo\\Bar\\Asdf");
test_canonicalize(L"Foo\\\\..\\Bar\\\\.\\\\Asdf\\", L"Bar\\Asdf");
test_canonicalize(L"\\", L"");
test_canonicalize(L"", L"");
test_canonicalize(L"Foo\\..\\..\\..\\..", L"");
test_canonicalize(L"..\\..\\..\\..", L"");
test_canonicalize(L"\\..\\..\\..\\..", L"");
test_canonicalize(L"\\\\?\\C:\\Foo\\Bar", L"\\\\?\\C:\\Foo\\Bar");
test_canonicalize(L"\\\\?\\C:\\Foo\\Bar\\", L"\\\\?\\C:\\Foo\\Bar");
test_canonicalize(L"\\\\?\\C:\\\\Foo\\.\\Bar\\\\..\\", L"\\\\?\\C:\\Foo");
test_canonicalize(L"\\\\?\\C:\\\\", L"\\\\?\\C:\\");
test_canonicalize(L"//?/C:/", L"\\\\?\\C:\\");
test_canonicalize(L"//?/C:/../../Foo/", L"\\\\?\\C:\\Foo");
test_canonicalize(L"//?/C:/Foo/../../", L"\\\\?\\C:\\");
test_canonicalize(L"\\\\?\\UNC\\server\\C$\\folder", L"\\\\?\\UNC\\server\\C$\\folder");
test_canonicalize(L"\\\\?\\UNC\\server\\C$\\folder\\", L"\\\\?\\UNC\\server\\C$\\folder");
test_canonicalize(L"\\\\?\\UNC\\server\\C$\\folder\\", L"\\\\?\\UNC\\server\\C$\\folder");
test_canonicalize(L"\\\\?\\UNC\\server\\C$\\folder\\..\\..\\..\\..\\share\\", L"\\\\?\\UNC\\server\\share");
test_canonicalize(L"\\\\server\\share", L"\\\\server\\share");
test_canonicalize(L"\\\\server\\share\\", L"\\\\server\\share");
test_canonicalize(L"\\\\server\\share\\\\foo\\\\bar", L"\\\\server\\share\\foo\\bar");
test_canonicalize(L"\\\\server\\\\share\\\\foo\\\\bar", L"\\\\server\\share\\foo\\bar");
test_canonicalize(L"\\\\server\\share\\..\\foo", L"\\\\server\\foo");
test_canonicalize(L"\\\\server\\..\\..\\share\\.\\foo", L"\\\\server\\share\\foo");
#endif
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment