utf-conv.c 4.37 KB
Newer Older
1
/*
Edward Thomson committed
2
 * Copyright (C) the libgit2 contributors. All rights reserved.
3 4 5 6 7 8
 *
 * This file is part of libgit2, distributed under the GNU GPL v2 with
 * a Linking Exception. For full terms see the included COPYING file.
 */

#include "common.h"
9 10
#include "utf-conv.h"

11 12 13 14 15 16 17 18
GIT_INLINE(void) git__set_errno(void)
{
	if (GetLastError() == ERROR_INSUFFICIENT_BUFFER)
		errno = ENAMETOOLONG;
	else
		errno = EINVAL;
}

19 20 21 22 23 24 25 26 27 28
/**
 * Converts a UTF-8 string to wide characters.
 *
 * @param dest The buffer to receive the wide string.
 * @param dest_size The size of the buffer, in characters.
 * @param src The UTF-8 string to convert.
 * @return The length of the wide string, in characters (not counting the NULL terminator), or < 0 for failure
 */
int git__utf8_to_16(wchar_t *dest, size_t dest_size, const char *src)
{
29 30
	int len;

31 32 33
	/* Length of -1 indicates NULL termination of the input string. Subtract 1 from the result to
	* turn 0 into -1 (an error code) and to not count the NULL terminator as part of the string's
	* length. MultiByteToWideChar never returns int's minvalue, so underflow is not possible */
34 35 36 37
	if ((len = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, src, -1, dest, (int)dest_size) - 1) < 0)
		git__set_errno();

	return len;
38 39 40 41 42 43 44 45 46 47
}

/**
 * Converts a wide string to UTF-8.
 *
 * @param dest The buffer to receive the UTF-8 string.
 * @param dest_size The size of the buffer, in bytes.
 * @param src The wide string to convert.
 * @return The length of the UTF-8 string, in bytes (not counting the NULL terminator), or < 0 for failure
 */
48
int git__utf16_to_8(char *dest, size_t dest_size, const wchar_t *src)
49
{
50 51
	int len;

52 53 54
	/* Length of -1 indicates NULL termination of the input string. Subtract 1 from the result to
	 * turn 0 into -1 (an error code) and to not count the NULL terminator as part of the string's
	 * length. WideCharToMultiByte never returns int's minvalue, so underflow is not possible */
55
	if ((len = WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS, src, -1, dest, (int)dest_size, NULL, NULL) - 1) < 0)
56 57 58
		git__set_errno();

	return len;
59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78
}

/**
 * Converts a UTF-8 string to wide characters.
 * Memory is allocated to hold the converted string.
 * The caller is responsible for freeing the string with git__free.
 *
 * @param dest Receives a pointer to the wide string.
 * @param src The UTF-8 string to convert.
 * @return The length of the wide string, in characters (not counting the NULL terminator), or < 0 for failure
 */
int git__utf8_to_16_alloc(wchar_t **dest, const char *src)
{
	int utf16_size;

	*dest = NULL;

	/* Length of -1 indicates NULL termination of the input string */
	utf16_size = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, src, -1, NULL, 0);

79 80
	if (!utf16_size) {
		git__set_errno();
81
		return -1;
82
	}
83

84
	if (!(*dest = git__mallocarray(utf16_size, sizeof(wchar_t)))) {
85
		errno = ENOMEM;
86
		return -1;
87
	}
88 89 90 91

	utf16_size = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, src, -1, *dest, utf16_size);

	if (!utf16_size) {
92 93
		git__set_errno();

94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119
		git__free(*dest);
		*dest = NULL;
	}

	/* Subtract 1 from the result to turn 0 into -1 (an error code) and to not count the NULL
	 * terminator as part of the string's length. MultiByteToWideChar never returns int's minvalue,
	 * so underflow is not possible */
	return utf16_size - 1;
}

/**
 * Converts a wide string to UTF-8.
 * Memory is allocated to hold the converted string.
 * The caller is responsible for freeing the string with git__free.
 *
 * @param dest Receives a pointer to the UTF-8 string.
 * @param src The wide string to convert.
 * @return The length of the UTF-8 string, in bytes (not counting the NULL terminator), or < 0 for failure
 */
int git__utf16_to_8_alloc(char **dest, const wchar_t *src)
{
	int utf8_size;

	*dest = NULL;

	/* Length of -1 indicates NULL termination of the input string */
120
	utf8_size = WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS, src, -1, NULL, 0, NULL, NULL);
121

122 123
	if (!utf8_size) {
		git__set_errno();
124
		return -1;
125
	}
126 127 128

	*dest = git__malloc(utf8_size);

129 130
	if (!*dest) {
		errno = ENOMEM;
131
		return -1;
132
	}
133

134
	utf8_size = WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS, src, -1, *dest, utf8_size, NULL, NULL);
135 136

	if (!utf8_size) {
137 138
		git__set_errno();

139 140 141 142 143 144 145 146
		git__free(*dest);
		*dest = NULL;
	}

	/* Subtract 1 from the result to turn 0 into -1 (an error code) and to not count the NULL
	 * terminator as part of the string's length. MultiByteToWideChar never returns int's minvalue,
	 * so underflow is not possible */
	return utf8_size - 1;
147
}