utf-conv.c 2.1 KB
Newer Older
1
/*
Edward Thomson committed
2
 * Copyright (C) the libgit2 contributors. All rights reserved.
3 4 5 6 7 8
 *
 * This file is part of libgit2, distributed under the GNU GPL v2 with
 * a Linking Exception. For full terms see the included COPYING file.
 */

#include "common.h"
9 10
#include "utf-conv.h"

Vicent Martí committed
11 12 13
#define U16_LEAD(c) (wchar_t)(((c)>>10)+0xd7c0)
#define U16_TRAIL(c) (wchar_t)(((c)&0x3ff)|0xdc00)

14 15
#if 0
void git__utf8_to_16(wchar_t *dest, size_t length, const char *src)
Vicent Martí committed
16 17 18 19 20
{
	wchar_t *pDest = dest;
	uint32_t ch;
	const uint8_t* pSrc = (uint8_t*) src;

21
	assert(dest && src && length);
Vicent Martí committed
22

23
	length--;
Vicent Martí committed
24

25
	while(*pSrc && length > 0) {
Vicent Martí committed
26
		ch = *pSrc++;
27 28
		length--;

Vicent Martí committed
29 30 31 32 33 34
		if(ch < 0xc0) {
			/*
			 * ASCII, or a trail byte in lead position which is treated like
			 * a single-byte sequence for better character boundary
			 * resynchronization after illegal sequences.
			 */
35
			*pDest++ = (wchar_t)ch;
Vicent Martí committed
36 37
			continue;
		} else if(ch < 0xe0) { /* U+0080..U+07FF */
38
			if (pSrc[0]) {
Vicent Martí committed
39 40 41 42 43
				/* 0x3080 = (0xc0 << 6) + 0x80 */
				*pDest++ = (wchar_t)((ch << 6) + *pSrc++ - 0x3080);
				continue;
			}
		} else if(ch < 0xf0) { /* U+0800..U+FFFF */
44
			if (pSrc[0] && pSrc[1]) {
Vicent Martí committed
45 46 47 48 49 50 51
				/* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
				/* 0x2080 = (0x80 << 6) + 0x80 */
				ch = (ch << 12) + (*pSrc++ << 6);
				*pDest++ = (wchar_t)(ch + *pSrc++ - 0x2080);
				continue;
			}
		} else /* f0..f4 */ { /* U+10000..U+10FFFF */
52
			if (length >= 1 && pSrc[0] && pSrc[1] && pSrc[2]) {
Vicent Martí committed
53 54 55 56 57 58
				/* 0x3c82080 = (0xf0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */
				ch = (ch << 18) + (*pSrc++ << 12);
				ch += *pSrc++ << 6;
				ch += *pSrc++ - 0x3c82080;
				*(pDest++) = U16_LEAD(ch);
				*(pDest++) = U16_TRAIL(ch);
59
				length--; /* two bytes for this character */
Vicent Martí committed
60 61 62 63 64 65 66 67 68 69 70
				continue;
			}
		}

		/* truncated character at the end */
		*pDest++ = 0xfffd;
		break;
	}

	*pDest++ = 0x0;
}
71 72
#endif

73
int git__utf8_to_16(wchar_t *dest, size_t length, const char *src)
74
{
75
	return MultiByteToWideChar(CP_UTF8, 0, src, -1, dest, (int)length);
76
}
Vicent Martí committed
77

78
int git__utf16_to_8(char *out, const wchar_t *input)
79
{
80
	return WideCharToMultiByte(CP_UTF8, 0, input, -1, out, GIT_WIN_PATH, NULL, NULL);
81
}