Commit 3b73a034 by Vicent Martí Committed by Vicent Marti

UTF-8 changes yo

parent 319ad0ba
......@@ -29,6 +29,98 @@ void gitwin_set_utf8(void)
_active_codepage = CP_UTF8;
}
#define U16_LEAD(c) (wchar_t)(((c)>>10)+0xd7c0)
#define U16_TRAIL(c) (wchar_t)(((c)&0x3ff)|0xdc00)
void git__utf8_to_16(wchar_t *dest, const char *src, size_t srcLength)
{
wchar_t *pDest = dest;
uint32_t ch;
const uint8_t* pSrc = (uint8_t*) src;
const uint8_t *pSrcLimit = pSrc + srcLength;
assert(dest && src && srcLength > 0);
if ((pSrcLimit - pSrc) >= 4) {
pSrcLimit -= 3; /* temporarily reduce pSrcLimit */
/* in this loop, we can always access at least 4 bytes, up to pSrc+3 */
do {
ch = *pSrc++;
if(ch < 0xc0) {
/*
* ASCII, or a trail byte in lead position which is treated like
* a single-byte sequence for better character boundary
* resynchronization after illegal sequences.
*/
*pDest++=(wchar_t)ch;
} else if(ch < 0xe0) { /* U+0080..U+07FF */
/* 0x3080 = (0xc0 << 6) + 0x80 */
*pDest++ = (wchar_t)((ch << 6) + *pSrc++ - 0x3080);
} else if(ch < 0xf0) { /* U+0800..U+FFFF */
/* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
/* 0x2080 = (0x80 << 6) + 0x80 */
ch = (ch << 12) + (*pSrc++ << 6);
*pDest++ = (wchar_t)(ch + *pSrc++ - 0x2080);
} else /* f0..f4 */ { /* U+10000..U+10FFFF */
/* 0x3c82080 = (0xf0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */
ch = (ch << 18) + (*pSrc++ << 12);
ch += *pSrc++ << 6;
ch += *pSrc++ - 0x3c82080;
*(pDest++) = U16_LEAD(ch);
*(pDest++) = U16_TRAIL(ch);
}
} while(pSrc < pSrcLimit);
pSrcLimit += 3; /* restore original pSrcLimit */
}
while(pSrc < pSrcLimit) {
ch = *pSrc++;
if(ch < 0xc0) {
/*
* ASCII, or a trail byte in lead position which is treated like
* a single-byte sequence for better character boundary
* resynchronization after illegal sequences.
*/
*pDest++=(wchar_t)ch;
continue;
} else if(ch < 0xe0) { /* U+0080..U+07FF */
if(pSrc < pSrcLimit) {
/* 0x3080 = (0xc0 << 6) + 0x80 */
*pDest++ = (wchar_t)((ch << 6) + *pSrc++ - 0x3080);
continue;
}
} else if(ch < 0xf0) { /* U+0800..U+FFFF */
if((pSrcLimit - pSrc) >= 2) {
/* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
/* 0x2080 = (0x80 << 6) + 0x80 */
ch = (ch << 12) + (*pSrc++ << 6);
*pDest++ = (wchar_t)(ch + *pSrc++ - 0x2080);
pSrc += 3;
continue;
}
} else /* f0..f4 */ { /* U+10000..U+10FFFF */
if((pSrcLimit - pSrc) >= 3) {
/* 0x3c82080 = (0xf0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */
ch = (ch << 18) + (*pSrc++ << 12);
ch += *pSrc++ << 6;
ch += *pSrc++ - 0x3c82080;
*(pDest++) = U16_LEAD(ch);
*(pDest++) = U16_TRAIL(ch);
pSrc += 4;
continue;
}
}
/* truncated character at the end */
*pDest++ = 0xfffd;
break;
}
*pDest++ = 0x0;
}
wchar_t* gitwin_to_utf16(const char* str)
{
wchar_t* ret;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment