Commit 0f4c6175 by Vicent Marti

Add bounds checking to UTF-8 conversion

parent 6813169a
...@@ -56,7 +56,7 @@ int git_futils_creat_locked(const char *path, const mode_t mode) ...@@ -56,7 +56,7 @@ int git_futils_creat_locked(const char *path, const mode_t mode)
#ifdef GIT_WIN32 #ifdef GIT_WIN32
wchar_t buf[GIT_WIN_PATH]; wchar_t buf[GIT_WIN_PATH];
git__utf8_to_16(buf, path); git__utf8_to_16(buf, GIT_WIN_PATH, path);
fd = _wopen(buf, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_EXCL, mode); fd = _wopen(buf, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_EXCL, mode);
#else #else
fd = open(path, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_EXCL, mode); fd = open(path, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_EXCL, mode);
...@@ -381,7 +381,7 @@ static int win32_expand_path(struct win32_path *s_root, const wchar_t *templ) ...@@ -381,7 +381,7 @@ static int win32_expand_path(struct win32_path *s_root, const wchar_t *templ)
static int win32_find_file(git_buf *path, const struct win32_path *root, const char *filename) static int win32_find_file(git_buf *path, const struct win32_path *root, const char *filename)
{ {
size_t len; size_t len, alloc_len;
wchar_t *file_utf16 = NULL; wchar_t *file_utf16 = NULL;
char file_utf8[GIT_PATH_MAX]; char file_utf8[GIT_PATH_MAX];
...@@ -389,7 +389,8 @@ static int win32_find_file(git_buf *path, const struct win32_path *root, const c ...@@ -389,7 +389,8 @@ static int win32_find_file(git_buf *path, const struct win32_path *root, const c
return GIT_ENOTFOUND; return GIT_ENOTFOUND;
/* allocate space for wchar_t path to file */ /* allocate space for wchar_t path to file */
file_utf16 = git__calloc(root->len + len + 2, sizeof(wchar_t)); alloc_len = root->len + len + 2;
file_utf16 = git__calloc(alloc_len, sizeof(wchar_t));
GITERR_CHECK_ALLOC(file_utf16); GITERR_CHECK_ALLOC(file_utf16);
/* append root + '\\' + filename as wchar_t */ /* append root + '\\' + filename as wchar_t */
...@@ -398,7 +399,7 @@ static int win32_find_file(git_buf *path, const struct win32_path *root, const c ...@@ -398,7 +399,7 @@ static int win32_find_file(git_buf *path, const struct win32_path *root, const c
if (*filename == '/' || *filename == '\\') if (*filename == '/' || *filename == '\\')
filename++; filename++;
git__utf8_to_16(file_utf16 + root->len - 1, filename); git__utf8_to_16(file_utf16 + root->len - 1, alloc_len, filename);
/* check access */ /* check access */
if (_waccess(file_utf16, F_OK) < 0) { if (_waccess(file_utf16, F_OK) < 0) {
......
...@@ -432,14 +432,14 @@ bool git_path_is_empty_dir(const char *path) ...@@ -432,14 +432,14 @@ bool git_path_is_empty_dir(const char *path)
{ {
git_buf pathbuf = GIT_BUF_INIT; git_buf pathbuf = GIT_BUF_INIT;
HANDLE hFind = INVALID_HANDLE_VALUE; HANDLE hFind = INVALID_HANDLE_VALUE;
wchar_t *wbuf; wchar_t wbuf[GIT_WIN_PATH];
WIN32_FIND_DATAW ffd; WIN32_FIND_DATAW ffd;
bool retval = true; bool retval = true;
if (!git_path_isdir(path)) return false; if (!git_path_isdir(path)) return false;
git_buf_printf(&pathbuf, "%s\\*", path); git_buf_printf(&pathbuf, "%s\\*", path);
wbuf = gitwin_to_utf16(git_buf_cstr(&pathbuf)); git__utf8_to_16(wbuf, GIT_WIN_PATH, git_buf_cstr(&pathbuf));
hFind = FindFirstFileW(wbuf, &ffd); hFind = FindFirstFileW(wbuf, &ffd);
if (INVALID_HANDLE_VALUE == hFind) { if (INVALID_HANDLE_VALUE == hFind) {
...@@ -455,7 +455,6 @@ bool git_path_is_empty_dir(const char *path) ...@@ -455,7 +455,6 @@ bool git_path_is_empty_dir(const char *path)
FindClose(hFind); FindClose(hFind);
git_buf_free(&pathbuf); git_buf_free(&pathbuf);
git__free(wbuf);
return retval; return retval;
} }
......
...@@ -40,7 +40,7 @@ git__DIR *git__opendir(const char *dir) ...@@ -40,7 +40,7 @@ git__DIR *git__opendir(const char *dir)
if (!new->dir) if (!new->dir)
goto fail; goto fail;
git__utf8_to_16(filter_w, filter); git__utf8_to_16(filter_w, GIT_WIN_PATH, filter);
new->h = FindFirstFileW(filter_w, &new->f); new->h = FindFirstFileW(filter_w, &new->f);
if (new->h == INVALID_HANDLE_VALUE) { if (new->h == INVALID_HANDLE_VALUE) {
...@@ -116,7 +116,7 @@ void git__rewinddir(git__DIR *d) ...@@ -116,7 +116,7 @@ void git__rewinddir(git__DIR *d)
if (!init_filter(filter, sizeof(filter), d->dir)) if (!init_filter(filter, sizeof(filter), d->dir))
return; return;
git__utf8_to_16(filter_w, filter); git__utf8_to_16(filter_w, GIT_WIN_PATH, filter);
d->h = FindFirstFileW(filter_w, &d->f); d->h = FindFirstFileW(filter_w, &d->f);
if (d->h == INVALID_HANDLE_VALUE) if (d->h == INVALID_HANDLE_VALUE)
......
...@@ -23,7 +23,7 @@ GIT_INLINE(int) p_mkdir(const char *path, mode_t mode) ...@@ -23,7 +23,7 @@ GIT_INLINE(int) p_mkdir(const char *path, mode_t mode)
{ {
wchar_t buf[GIT_WIN_PATH]; wchar_t buf[GIT_WIN_PATH];
GIT_UNUSED(mode); GIT_UNUSED(mode);
git__utf8_to_16(buf, path); git__utf8_to_16(buf, GIT_WIN_PATH, path);
return _wmkdir(buf); return _wmkdir(buf);
} }
......
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
int p_unlink(const char *path) int p_unlink(const char *path)
{ {
wchar_t buf[GIT_WIN_PATH]; wchar_t buf[GIT_WIN_PATH];
git__utf8_to_16(buf, path); git__utf8_to_16(buf, GIT_WIN_PATH, path);
_wchmod(buf, 0666); _wchmod(buf, 0666);
return _wunlink(buf); return _wunlink(buf);
} }
...@@ -58,7 +58,7 @@ static int do_lstat(const char *file_name, struct stat *buf) ...@@ -58,7 +58,7 @@ static int do_lstat(const char *file_name, struct stat *buf)
wchar_t fbuf[GIT_WIN_PATH]; wchar_t fbuf[GIT_WIN_PATH];
DWORD last_error; DWORD last_error;
git__utf8_to_16(fbuf, file_name); git__utf8_to_16(fbuf, GIT_WIN_PATH, file_name);
if (GetFileAttributesExW(fbuf, GetFileExInfoStandard, &fdata)) { if (GetFileAttributesExW(fbuf, GetFileExInfoStandard, &fdata)) {
int fMode = S_IREAD; int fMode = S_IREAD;
...@@ -157,7 +157,7 @@ int p_readlink(const char *link, char *target, size_t target_len) ...@@ -157,7 +157,7 @@ int p_readlink(const char *link, char *target, size_t target_len)
} }
} }
git__utf8_to_16(link_w, link); git__utf8_to_16(link_w, GIT_WIN_PATH, link);
hFile = CreateFileW(link_w, // file to open hFile = CreateFileW(link_w, // file to open
GENERIC_READ, // open for reading GENERIC_READ, // open for reading
...@@ -226,7 +226,7 @@ int p_open(const char *path, int flags, ...) ...@@ -226,7 +226,7 @@ int p_open(const char *path, int flags, ...)
wchar_t buf[GIT_WIN_PATH]; wchar_t buf[GIT_WIN_PATH];
mode_t mode = 0; mode_t mode = 0;
git__utf8_to_16(buf, path); git__utf8_to_16(buf, GIT_WIN_PATH, path);
if (flags & O_CREAT) { if (flags & O_CREAT) {
va_list arg_list; va_list arg_list;
...@@ -242,7 +242,7 @@ int p_open(const char *path, int flags, ...) ...@@ -242,7 +242,7 @@ int p_open(const char *path, int flags, ...)
int p_creat(const char *path, mode_t mode) int p_creat(const char *path, mode_t mode)
{ {
wchar_t buf[GIT_WIN_PATH]; wchar_t buf[GIT_WIN_PATH];
git__utf8_to_16(buf, path); git__utf8_to_16(buf, GIT_WIN_PATH, path);
return _wopen(buf, _O_WRONLY | _O_CREAT | _O_TRUNC | _O_BINARY, mode); return _wopen(buf, _O_WRONLY | _O_CREAT | _O_TRUNC | _O_BINARY, mode);
} }
...@@ -274,28 +274,28 @@ int p_stat(const char* path, struct stat* buf) ...@@ -274,28 +274,28 @@ int p_stat(const char* path, struct stat* buf)
int p_chdir(const char* path) int p_chdir(const char* path)
{ {
wchar_t buf[GIT_WIN_PATH]; wchar_t buf[GIT_WIN_PATH];
git__utf8_to_16(buf, path); git__utf8_to_16(buf, GIT_WIN_PATH, path);
return _wchdir(buf); return _wchdir(buf);
} }
int p_chmod(const char* path, mode_t mode) int p_chmod(const char* path, mode_t mode)
{ {
wchar_t buf[GIT_WIN_PATH]; wchar_t buf[GIT_WIN_PATH];
git__utf8_to_16(buf, path); git__utf8_to_16(buf, GIT_WIN_PATH, path);
return _wchmod(buf, mode); return _wchmod(buf, mode);
} }
int p_rmdir(const char* path) int p_rmdir(const char* path)
{ {
wchar_t buf[GIT_WIN_PATH]; wchar_t buf[GIT_WIN_PATH];
git__utf8_to_16(buf, path); git__utf8_to_16(buf, GIT_WIN_PATH, path);
return _wrmdir(buf); return _wrmdir(buf);
} }
int p_hide_directory__w32(const char *path) int p_hide_directory__w32(const char *path)
{ {
wchar_t buf[GIT_WIN_PATH]; wchar_t buf[GIT_WIN_PATH];
git__utf8_to_16(buf, path); git__utf8_to_16(buf, GIT_WIN_PATH, path);
return (SetFileAttributesW(buf, FILE_ATTRIBUTE_HIDDEN) != 0) ? 0 : -1; return (SetFileAttributesW(buf, FILE_ATTRIBUTE_HIDDEN) != 0) ? 0 : -1;
} }
...@@ -305,7 +305,7 @@ char *p_realpath(const char *orig_path, char *buffer) ...@@ -305,7 +305,7 @@ char *p_realpath(const char *orig_path, char *buffer)
wchar_t orig_path_w[GIT_WIN_PATH]; wchar_t orig_path_w[GIT_WIN_PATH];
wchar_t buffer_w[GIT_WIN_PATH]; wchar_t buffer_w[GIT_WIN_PATH];
git__utf8_to_16(orig_path_w, orig_path); git__utf8_to_16(orig_path_w, GIT_WIN_PATH, orig_path);
ret = GetFullPathNameW(orig_path_w, GIT_WIN_PATH, buffer_w, NULL); ret = GetFullPathNameW(orig_path_w, GIT_WIN_PATH, buffer_w, NULL);
/* According to MSDN, a return value equals to zero means a failure. */ /* According to MSDN, a return value equals to zero means a failure. */
...@@ -399,7 +399,7 @@ int p_setenv(const char* name, const char* value, int overwrite) ...@@ -399,7 +399,7 @@ int p_setenv(const char* name, const char* value, int overwrite)
int p_access(const char* path, mode_t mode) int p_access(const char* path, mode_t mode)
{ {
wchar_t buf[GIT_WIN_PATH]; wchar_t buf[GIT_WIN_PATH];
git__utf8_to_16(buf, path); git__utf8_to_16(buf, GIT_WIN_PATH, path);
return _waccess(buf, mode); return _waccess(buf, mode);
} }
...@@ -408,8 +408,8 @@ int p_rename(const char *from, const char *to) ...@@ -408,8 +408,8 @@ int p_rename(const char *from, const char *to)
wchar_t wfrom[GIT_WIN_PATH]; wchar_t wfrom[GIT_WIN_PATH];
wchar_t wto[GIT_WIN_PATH]; wchar_t wto[GIT_WIN_PATH];
git__utf8_to_16(wfrom, from); git__utf8_to_16(wfrom, GIT_WIN_PATH, from);
git__utf8_to_16(wto, to); git__utf8_to_16(wto, GIT_WIN_PATH, to);
return MoveFileExW(wfrom, wto, MOVEFILE_REPLACE_EXISTING | MOVEFILE_COPY_ALLOWED) ? 0 : -1; return MoveFileExW(wfrom, wto, MOVEFILE_REPLACE_EXISTING | MOVEFILE_COPY_ALLOWED) ? 0 : -1;
} }
......
...@@ -11,83 +11,52 @@ ...@@ -11,83 +11,52 @@
#define U16_LEAD(c) (wchar_t)(((c)>>10)+0xd7c0) #define U16_LEAD(c) (wchar_t)(((c)>>10)+0xd7c0)
#define U16_TRAIL(c) (wchar_t)(((c)&0x3ff)|0xdc00) #define U16_TRAIL(c) (wchar_t)(((c)&0x3ff)|0xdc00)
void git__utf8_to_16(wchar_t *dest, const char *src) #if 0
void git__utf8_to_16(wchar_t *dest, size_t length, const char *src)
{ {
wchar_t *pDest = dest; wchar_t *pDest = dest;
uint32_t ch; uint32_t ch;
const uint8_t* pSrc = (uint8_t*) src; const uint8_t* pSrc = (uint8_t*) src;
const uint8_t *pSrcLimit = pSrc + strlen(src);
assert(dest && src); assert(dest && src && length);
if ((pSrcLimit - pSrc) >= 4) { length--;
pSrcLimit -= 3; /* temporarily reduce pSrcLimit */
/* in this loop, we can always access at least 4 bytes, up to pSrc+3 */ while(*pSrc && length > 0) {
do {
ch = *pSrc++; ch = *pSrc++;
if(ch < 0xc0) { length--;
/*
* ASCII, or a trail byte in lead position which is treated like
* a single-byte sequence for better character boundary
* resynchronization after illegal sequences.
*/
*pDest++=(wchar_t)ch;
} else if(ch < 0xe0) { /* U+0080..U+07FF */
/* 0x3080 = (0xc0 << 6) + 0x80 */
*pDest++ = (wchar_t)((ch << 6) + *pSrc++ - 0x3080);
} else if(ch < 0xf0) { /* U+0800..U+FFFF */
/* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
/* 0x2080 = (0x80 << 6) + 0x80 */
ch = (ch << 12) + (*pSrc++ << 6);
*pDest++ = (wchar_t)(ch + *pSrc++ - 0x2080);
} else /* f0..f4 */ { /* U+10000..U+10FFFF */
/* 0x3c82080 = (0xf0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */
ch = (ch << 18) + (*pSrc++ << 12);
ch += *pSrc++ << 6;
ch += *pSrc++ - 0x3c82080;
*(pDest++) = U16_LEAD(ch);
*(pDest++) = U16_TRAIL(ch);
}
} while(pSrc < pSrcLimit);
pSrcLimit += 3; /* restore original pSrcLimit */
}
while(pSrc < pSrcLimit) {
ch = *pSrc++;
if(ch < 0xc0) { if(ch < 0xc0) {
/* /*
* ASCII, or a trail byte in lead position which is treated like * ASCII, or a trail byte in lead position which is treated like
* a single-byte sequence for better character boundary * a single-byte sequence for better character boundary
* resynchronization after illegal sequences. * resynchronization after illegal sequences.
*/ */
*pDest++=(wchar_t)ch; *pDest++ = (wchar_t)ch;
continue; continue;
} else if(ch < 0xe0) { /* U+0080..U+07FF */ } else if(ch < 0xe0) { /* U+0080..U+07FF */
if(pSrc < pSrcLimit) { if (pSrc[0]) {
/* 0x3080 = (0xc0 << 6) + 0x80 */ /* 0x3080 = (0xc0 << 6) + 0x80 */
*pDest++ = (wchar_t)((ch << 6) + *pSrc++ - 0x3080); *pDest++ = (wchar_t)((ch << 6) + *pSrc++ - 0x3080);
continue; continue;
} }
} else if(ch < 0xf0) { /* U+0800..U+FFFF */ } else if(ch < 0xf0) { /* U+0800..U+FFFF */
if((pSrcLimit - pSrc) >= 2) { if (pSrc[0] && pSrc[1]) {
/* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
/* 0x2080 = (0x80 << 6) + 0x80 */ /* 0x2080 = (0x80 << 6) + 0x80 */
ch = (ch << 12) + (*pSrc++ << 6); ch = (ch << 12) + (*pSrc++ << 6);
*pDest++ = (wchar_t)(ch + *pSrc++ - 0x2080); *pDest++ = (wchar_t)(ch + *pSrc++ - 0x2080);
pSrc += 3;
continue; continue;
} }
} else /* f0..f4 */ { /* U+10000..U+10FFFF */ } else /* f0..f4 */ { /* U+10000..U+10FFFF */
if((pSrcLimit - pSrc) >= 3) { if (length >= 1 && pSrc[0] && pSrc[1] && pSrc[2]) {
/* 0x3c82080 = (0xf0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */ /* 0x3c82080 = (0xf0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */
ch = (ch << 18) + (*pSrc++ << 12); ch = (ch << 18) + (*pSrc++ << 12);
ch += *pSrc++ << 6; ch += *pSrc++ << 6;
ch += *pSrc++ - 0x3c82080; ch += *pSrc++ - 0x3c82080;
*(pDest++) = U16_LEAD(ch); *(pDest++) = U16_LEAD(ch);
*(pDest++) = U16_TRAIL(ch); *(pDest++) = U16_TRAIL(ch);
pSrc += 4; length--; /* two bytes for this character */
continue; continue;
} }
} }
...@@ -99,6 +68,12 @@ void git__utf8_to_16(wchar_t *dest, const char *src) ...@@ -99,6 +68,12 @@ void git__utf8_to_16(wchar_t *dest, const char *src)
*pDest++ = 0x0; *pDest++ = 0x0;
} }
#endif
void git__utf8_to_16(wchar_t *dest, size_t length, const char *src)
{
MultiByteToWideChar(CP_UTF8, 0, src, -1, dest, length);
}
void git__utf16_to_8(char *out, const wchar_t *input) void git__utf16_to_8(char *out, const wchar_t *input)
{ {
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
#define GIT_WIN_PATH (260 + 1) #define GIT_WIN_PATH (260 + 1)
void git__utf8_to_16(wchar_t *dest, const char *src); void git__utf8_to_16(wchar_t *dest, size_t length, const char *src);
void git__utf16_to_8(char *dest, const wchar_t *src); void git__utf16_to_8(char *dest, const wchar_t *src);
#endif #endif
......
...@@ -60,7 +60,7 @@ char *cl_getenv(const char *name) ...@@ -60,7 +60,7 @@ char *cl_getenv(const char *name)
wchar_t *value_utf16; wchar_t *value_utf16;
char *value_utf8; char *value_utf8;
git__utf8_to_16(name_utf16, name); git__utf8_to_16(name_utf16, GIT_WIN_PATH, name);
alloc_len = GetEnvironmentVariableW(name_utf16, NULL, 0); alloc_len = GetEnvironmentVariableW(name_utf16, NULL, 0);
if (alloc_len <= 0) if (alloc_len <= 0)
return NULL; return NULL;
...@@ -83,10 +83,10 @@ int cl_setenv(const char *name, const char *value) ...@@ -83,10 +83,10 @@ int cl_setenv(const char *name, const char *value)
wchar_t name_utf16[GIT_WIN_PATH]; wchar_t name_utf16[GIT_WIN_PATH];
wchar_t value_utf16[GIT_WIN_PATH]; wchar_t value_utf16[GIT_WIN_PATH];
git__utf8_to_16(name_utf16, name); git__utf8_to_16(name_utf16, GIT_WIN_PATH, name);
if (value != NULL) if (value != NULL)
git__utf8_to_16(value_utf16, value); git__utf8_to_16(value_utf16, GIT_WIN_PATH, value);
cl_assert(SetEnvironmentVariableW(name_utf16, value ? value_utf16 : NULL)); cl_assert(SetEnvironmentVariableW(name_utf16, value ? value_utf16 : NULL));
return 0; return 0;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment