utf-conv.c 4.35 KB
Newer Older
1
/*
Edward Thomson committed
2
 * Copyright (C) the libgit2 contributors. All rights reserved.
3 4 5 6 7
 *
 * This file is part of libgit2, distributed under the GNU GPL v2 with
 * a Linking Exception. For full terms see the included COPYING file.
 */

8 9
#include "utf-conv.h"

10 11 12 13 14 15 16 17
GIT_INLINE(void) git__set_errno(void)
{
	if (GetLastError() == ERROR_INSUFFICIENT_BUFFER)
		errno = ENAMETOOLONG;
	else
		errno = EINVAL;
}

18 19 20 21 22 23 24 25 26 27
/**
 * Converts a UTF-8 string to wide characters.
 *
 * @param dest The buffer to receive the wide string.
 * @param dest_size The size of the buffer, in characters.
 * @param src The UTF-8 string to convert.
 * @return The length of the wide string, in characters (not counting the NULL terminator), or < 0 for failure
 */
int git__utf8_to_16(wchar_t *dest, size_t dest_size, const char *src)
{
28 29
	int len;

30 31 32
	/* Length of -1 indicates NULL termination of the input string. Subtract 1 from the result to
	* turn 0 into -1 (an error code) and to not count the NULL terminator as part of the string's
	* length. MultiByteToWideChar never returns int's minvalue, so underflow is not possible */
33 34 35 36
	if ((len = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, src, -1, dest, (int)dest_size) - 1) < 0)
		git__set_errno();

	return len;
37 38 39 40 41 42 43 44 45 46
}

/**
 * Converts a wide string to UTF-8.
 *
 * @param dest The buffer to receive the UTF-8 string.
 * @param dest_size The size of the buffer, in bytes.
 * @param src The wide string to convert.
 * @return The length of the UTF-8 string, in bytes (not counting the NULL terminator), or < 0 for failure
 */
47
int git__utf16_to_8(char *dest, size_t dest_size, const wchar_t *src)
48
{
49 50
	int len;

51 52 53
	/* Length of -1 indicates NULL termination of the input string. Subtract 1 from the result to
	 * turn 0 into -1 (an error code) and to not count the NULL terminator as part of the string's
	 * length. WideCharToMultiByte never returns int's minvalue, so underflow is not possible */
54
	if ((len = WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS, src, -1, dest, (int)dest_size, NULL, NULL) - 1) < 0)
55 56 57
		git__set_errno();

	return len;
58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77
}

/**
 * Converts a UTF-8 string to wide characters.
 * Memory is allocated to hold the converted string.
 * The caller is responsible for freeing the string with git__free.
 *
 * @param dest Receives a pointer to the wide string.
 * @param src The UTF-8 string to convert.
 * @return The length of the wide string, in characters (not counting the NULL terminator), or < 0 for failure
 */
int git__utf8_to_16_alloc(wchar_t **dest, const char *src)
{
	int utf16_size;

	*dest = NULL;

	/* Length of -1 indicates NULL termination of the input string */
	utf16_size = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, src, -1, NULL, 0);

78 79
	if (!utf16_size) {
		git__set_errno();
80
		return -1;
81
	}
82

83
	if (!(*dest = git__mallocarray(utf16_size, sizeof(wchar_t)))) {
84
		errno = ENOMEM;
85
		return -1;
86
	}
87 88 89 90

	utf16_size = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, src, -1, *dest, utf16_size);

	if (!utf16_size) {
91 92
		git__set_errno();

93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118
		git__free(*dest);
		*dest = NULL;
	}

	/* Subtract 1 from the result to turn 0 into -1 (an error code) and to not count the NULL
	 * terminator as part of the string's length. MultiByteToWideChar never returns int's minvalue,
	 * so underflow is not possible */
	return utf16_size - 1;
}

/**
 * Converts a wide string to UTF-8.
 * Memory is allocated to hold the converted string.
 * The caller is responsible for freeing the string with git__free.
 *
 * @param dest Receives a pointer to the UTF-8 string.
 * @param src The wide string to convert.
 * @return The length of the UTF-8 string, in bytes (not counting the NULL terminator), or < 0 for failure
 */
int git__utf16_to_8_alloc(char **dest, const wchar_t *src)
{
	int utf8_size;

	*dest = NULL;

	/* Length of -1 indicates NULL termination of the input string */
119
	utf8_size = WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS, src, -1, NULL, 0, NULL, NULL);
120

121 122
	if (!utf8_size) {
		git__set_errno();
123
		return -1;
124
	}
125 126 127

	*dest = git__malloc(utf8_size);

128 129
	if (!*dest) {
		errno = ENOMEM;
130
		return -1;
131
	}
132

133
	utf8_size = WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS, src, -1, *dest, utf8_size, NULL, NULL);
134 135

	if (!utf8_size) {
136 137
		git__set_errno();

138 139 140 141 142 143 144 145
		git__free(*dest);
		*dest = NULL;
	}

	/* Subtract 1 from the result to turn 0 into -1 (an error code) and to not count the NULL
	 * terminator as part of the string's length. MultiByteToWideChar never returns int's minvalue,
	 * so underflow is not possible */
	return utf8_size - 1;
146
}