Commit 9e35d7fd by Russell Belfer

Fix bugs in UTF-8 <-> UTF-16 conversion

The function to convert UTF-16 to UTF-8 was only allocating a
buffer of wcslen(utf16str) bytes for the UTF-8 string, but that
is not sufficient if you have multibyte characters, and so when
those occured, the conversion was failing.  This updates the
conversion functions to use the Win APIs to calculate the correct
buffer lengths.

Also fixes a comparison in the unit tests that would fail if
you did not have a particular environment variable set.
parent 23059130
...@@ -32,19 +32,16 @@ void gitwin_set_utf8(void) ...@@ -32,19 +32,16 @@ void gitwin_set_utf8(void)
wchar_t* gitwin_to_utf16(const char* str) wchar_t* gitwin_to_utf16(const char* str)
{ {
wchar_t* ret; wchar_t* ret;
size_t cb; int cb;
if (!str) if (!str)
return NULL; return NULL;
cb = strlen(str) * sizeof(wchar_t); cb = MultiByteToWideChar(_active_codepage, 0, str, -1, NULL, 0);
if (cb == 0) if (cb == 0)
return (wchar_t *)git__calloc(1, sizeof(wchar_t)); return (wchar_t *)git__calloc(1, sizeof(wchar_t));
/* Add space for null terminator */ ret = (wchar_t *)git__malloc(cb * sizeof(wchar_t));
cb += sizeof(wchar_t);
ret = (wchar_t *)git__malloc(cb);
if (!ret) if (!ret)
return NULL; return NULL;
...@@ -59,7 +56,8 @@ wchar_t* gitwin_to_utf16(const char* str) ...@@ -59,7 +56,8 @@ wchar_t* gitwin_to_utf16(const char* str)
int gitwin_append_utf16(wchar_t *buffer, const char *str, size_t len) int gitwin_append_utf16(wchar_t *buffer, const char *str, size_t len)
{ {
int result = MultiByteToWideChar(_active_codepage, 0, str, -1, buffer, (int)len); int result = MultiByteToWideChar(
_active_codepage, 0, str, -1, buffer, (int)len);
if (result == 0) if (result == 0)
giterr_set(GITERR_OS, "Could not convert string to UTF-16"); giterr_set(GITERR_OS, "Could not convert string to UTF-16");
return result; return result;
...@@ -68,23 +66,22 @@ int gitwin_append_utf16(wchar_t *buffer, const char *str, size_t len) ...@@ -68,23 +66,22 @@ int gitwin_append_utf16(wchar_t *buffer, const char *str, size_t len)
char* gitwin_from_utf16(const wchar_t* str) char* gitwin_from_utf16(const wchar_t* str)
{ {
char* ret; char* ret;
size_t cb; int cb;
if (!str) if (!str)
return NULL; return NULL;
cb = wcslen(str) * sizeof(char); cb = WideCharToMultiByte(_active_codepage, 0, str, -1, NULL, 0, NULL, NULL);
if (cb == 0) if (cb == 0)
return (char *)git__calloc(1, sizeof(char)); return (char *)git__calloc(1, sizeof(char));
/* Add space for null terminator */
cb += sizeof(char);
ret = (char*)git__malloc(cb); ret = (char*)git__malloc(cb);
if (!ret) if (!ret)
return NULL; return NULL;
if (WideCharToMultiByte(_active_codepage, 0, str, -1, ret, (int)cb, NULL, NULL) == 0) { if (WideCharToMultiByte(
_active_codepage, 0, str, -1, ret, (int)cb, NULL, NULL) == 0)
{
giterr_set(GITERR_OS, "Could not convert string to UTF-8"); giterr_set(GITERR_OS, "Could not convert string to UTF-8");
git__free(ret); git__free(ret);
ret = NULL; ret = NULL;
......
...@@ -15,12 +15,16 @@ static char *cl_getenv(const char *name) ...@@ -15,12 +15,16 @@ static char *cl_getenv(const char *name)
cl_assert(name_utf16); cl_assert(name_utf16);
alloc_len = GetEnvironmentVariableW(name_utf16, NULL, 0); alloc_len = GetEnvironmentVariableW(name_utf16, NULL, 0);
if (alloc_len < 0) if (alloc_len <= 0)
return NULL; return NULL;
cl_assert(value_utf16 = git__calloc(alloc_len, sizeof(wchar_t))); cl_assert(value_utf16 = git__calloc(alloc_len, sizeof(wchar_t)));
value_len = GetEnvironmentVariableW(name_utf16, value_utf16, alloc_len); value_len = GetEnvironmentVariableW(name_utf16, value_utf16, alloc_len);
cl_assert_equal_i(value_len, alloc_len - 1); cl_assert_equal_i(value_len, alloc_len - 1);
cl_assert(value_utf8 = gitwin_from_utf16(value_utf16)); cl_assert(value_utf8 = gitwin_from_utf16(value_utf16));
git__free(value_utf16); git__free(value_utf16);
return value_utf8; return value_utf8;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment