Commit ad6f2153 by Edward Thomson

utf8: use size_t for length of buffer

The `git__utf8_charlen` now takes `size_t` as the buffer length, since
it contains the full length of the buffer at the current position.  It
now returns `-1` in all cases where utf8 codepoints are invalid, since
callers only care about a valid length of a sequence of codepoints, or
if the current position is not valid utf8.
parent 5d5b76df
......@@ -802,23 +802,23 @@ static const int8_t utf8proc_utf8class[256] = {
4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0
};
int git__utf8_charlen(const uint8_t *str, int str_len)
int git__utf8_charlen(const uint8_t *str, size_t str_len)
{
int length, i;
size_t length, i;
length = utf8proc_utf8class[str[0]];
if (!length)
return -1;
if (str_len >= 0 && length > str_len)
return -str_len;
if (str_len > 0 && length > str_len)
return -1;
for (i = 1; i < length; i++) {
if ((str[i] & 0xC0) != 0x80)
return -i;
return -1;
}
return length;
return (int)length;
}
int git__utf8_iterate(const uint8_t *str, int str_len, int32_t *dst)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment