Commit ad6f2153 by Edward Thomson

utf8: use size_t for length of buffer

The `git__utf8_charlen` now takes `size_t` as the buffer length, since
it contains the full length of the buffer at the current position.  It
now returns `-1` in all cases where utf8 codepoints are invalid, since
callers only care about a valid length of a sequence of codepoints, or
if the current position is not valid utf8.
parent 5d5b76df
...@@ -802,23 +802,23 @@ static const int8_t utf8proc_utf8class[256] = { ...@@ -802,23 +802,23 @@ static const int8_t utf8proc_utf8class[256] = {
4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0
}; };
int git__utf8_charlen(const uint8_t *str, int str_len) int git__utf8_charlen(const uint8_t *str, size_t str_len)
{ {
int length, i; size_t length, i;
length = utf8proc_utf8class[str[0]]; length = utf8proc_utf8class[str[0]];
if (!length) if (!length)
return -1; return -1;
if (str_len >= 0 && length > str_len) if (str_len > 0 && length > str_len)
return -str_len; return -1;
for (i = 1; i < length; i++) { for (i = 1; i < length; i++) {
if ((str[i] & 0xC0) != 0x80) if ((str[i] & 0xC0) != 0x80)
return -i; return -1;
} }
return length; return (int)length;
} }
int git__utf8_iterate(const uint8_t *str, int str_len, int32_t *dst) int git__utf8_iterate(const uint8_t *str, int str_len, int32_t *dst)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment