Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
G
git2
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
lvzhengyang
git2
Commits
8e35527d
Commit
8e35527d
authored
Dec 16, 2014
by
Vicent Marti
Committed by
Edward Thomson
Dec 16, 2014
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
path: Use UTF8 iteration for HFS chars
parent
11d67b75
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
132 additions
and
81 deletions
+132
-81
src/path.c
+45
-81
src/util.c
+76
-0
src/util.h
+11
-0
No files found.
src/path.c
View file @
8e35527d
...
...
@@ -1282,93 +1282,57 @@ GIT_INLINE(bool) verify_dospath(
component
[
last
]
!=
':'
);
}
GIT_INLINE
(
bool
)
verify_dotgit_hfs
(
const
char
*
component
,
size_t
len
)
static
int32_t
next_hfs_char
(
const
char
**
in
,
size_t
*
len
)
{
const
unsigned
char
*
c
;
int
git
=
0
,
ign
=
0
;
unsigned
char
one
,
two
;
while
(
len
)
{
switch
(
*
(
c
=
(
const
unsigned
char
*
)
component
++
))
{
case
'.'
:
if
(
ign
||
git
++
!=
0
)
return
true
;
break
;
case
'g'
:
case
'G'
:
if
(
ign
||
git
++
!=
1
)
return
true
;
break
;
case
'i'
:
case
'I'
:
if
(
ign
||
git
++
!=
2
)
return
true
;
break
;
case
't'
:
case
'T'
:
if
(
ign
||
git
++
!=
3
)
return
true
;
break
;
case
0xe2
:
case
0xef
:
if
(
ign
++
!=
0
)
return
true
;
one
=
*
c
;
break
;
case
0x80
:
case
0x81
:
if
(
ign
++
!=
1
||
one
!=
0xe2
)
return
true
;
two
=
*
c
;
break
;
case
0xbb
:
if
(
ign
++
!=
1
||
one
!=
0xef
)
return
true
;
two
=
*
c
;
break
;
case
0x8c
:
case
0x8d
:
case
0x8e
:
case
0x8f
:
if
(
ign
!=
2
||
two
!=
0x80
)
return
true
;
ign
=
0
;
break
;
case
0xaa
:
case
0xab
:
case
0xac
:
case
0xad
:
case
0xae
:
if
(
ign
!=
2
||
(
two
!=
0x80
&&
two
!=
0x81
))
return
true
;
ign
=
0
;
break
;
case
0xaf
:
if
(
ign
!=
2
||
two
!=
0x81
)
return
true
;
ign
=
0
;
break
;
case
0xbf
:
if
(
ign
!=
2
||
two
!=
0xbb
)
return
true
;
ign
=
0
;
break
;
while
(
*
len
)
{
int32_t
codepoint
;
int
cp_len
=
git__utf8_iterate
((
const
uint8_t
*
)(
*
in
),
(
int
)(
*
len
),
&
codepoint
);
if
(
cp_len
<
0
)
return
-
1
;
default
:
return
true
;
(
*
in
)
+=
cp_len
;
(
*
len
)
-=
cp_len
;
/* these code points are ignored completely */
switch
(
codepoint
)
{
case
0x200c
:
/* ZERO WIDTH NON-JOINER */
case
0x200d
:
/* ZERO WIDTH JOINER */
case
0x200e
:
/* LEFT-TO-RIGHT MARK */
case
0x200f
:
/* RIGHT-TO-LEFT MARK */
case
0x202a
:
/* LEFT-TO-RIGHT EMBEDDING */
case
0x202b
:
/* RIGHT-TO-LEFT EMBEDDING */
case
0x202c
:
/* POP DIRECTIONAL FORMATTING */
case
0x202d
:
/* LEFT-TO-RIGHT OVERRIDE */
case
0x202e
:
/* RIGHT-TO-LEFT OVERRIDE */
case
0x206a
:
/* INHIBIT SYMMETRIC SWAPPING */
case
0x206b
:
/* ACTIVATE SYMMETRIC SWAPPING */
case
0x206c
:
/* INHIBIT ARABIC FORM SHAPING */
case
0x206d
:
/* ACTIVATE ARABIC FORM SHAPING */
case
0x206e
:
/* NATIONAL DIGIT SHAPES */
case
0x206f
:
/* NOMINAL DIGIT SHAPES */
case
0xfeff
:
/* ZERO WIDTH NO-BREAK SPACE */
continue
;
}
len
--
;
/* fold into lowercase -- this will only fold characters in
* the ASCII range, which is perfectly fine, because the
* git folder name can only be composed of ascii characters
*/
return
tolower
(
codepoint
);
}
return
0
;
/* NULL byte -- end of string */
}
static
bool
verify_dotgit_hfs
(
const
char
*
path
,
size_t
len
)
{
if
(
next_hfs_char
(
&
path
,
&
len
)
!=
'.'
||
next_hfs_char
(
&
path
,
&
len
)
!=
'g'
||
next_hfs_char
(
&
path
,
&
len
)
!=
'i'
||
next_hfs_char
(
&
path
,
&
len
)
!=
't'
||
next_hfs_char
(
&
path
,
&
len
)
!=
0
)
return
true
;
return
(
ign
||
git
!=
4
)
;
return
false
;
}
GIT_INLINE
(
bool
)
verify_char
(
unsigned
char
c
,
unsigned
int
flags
)
...
...
src/util.c
View file @
8e35527d
...
...
@@ -664,3 +664,79 @@ void git__insertsort_r(
if
(
freeswap
)
git__free
(
swapel
);
}
static
const
int8_t
utf8proc_utf8class
[
256
]
=
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
4
,
4
,
4
,
4
,
4
,
4
,
4
,
4
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
int
git__utf8_charlen
(
const
uint8_t
*
str
,
int
str_len
)
{
int
length
,
i
;
length
=
utf8proc_utf8class
[
str
[
0
]];
if
(
!
length
)
return
-
1
;
if
(
str_len
>=
0
&&
length
>
str_len
)
return
-
str_len
;
for
(
i
=
1
;
i
<
length
;
i
++
)
{
if
((
str
[
i
]
&
0xC0
)
!=
0x80
)
return
-
i
;
}
return
length
;
}
int
git__utf8_iterate
(
const
uint8_t
*
str
,
int
str_len
,
int32_t
*
dst
)
{
int
length
;
int32_t
uc
=
-
1
;
*
dst
=
-
1
;
length
=
git__utf8_charlen
(
str
,
str_len
);
if
(
length
<
0
)
return
-
1
;
switch
(
length
)
{
case
1
:
uc
=
str
[
0
];
break
;
case
2
:
uc
=
((
str
[
0
]
&
0x1F
)
<<
6
)
+
(
str
[
1
]
&
0x3F
);
if
(
uc
<
0x80
)
uc
=
-
1
;
break
;
case
3
:
uc
=
((
str
[
0
]
&
0x0F
)
<<
12
)
+
((
str
[
1
]
&
0x3F
)
<<
6
)
+
(
str
[
2
]
&
0x3F
);
if
(
uc
<
0x800
||
(
uc
>=
0xD800
&&
uc
<
0xE000
)
||
(
uc
>=
0xFDD0
&&
uc
<
0xFDF0
))
uc
=
-
1
;
break
;
case
4
:
uc
=
((
str
[
0
]
&
0x07
)
<<
18
)
+
((
str
[
1
]
&
0x3F
)
<<
12
)
+
((
str
[
2
]
&
0x3F
)
<<
6
)
+
(
str
[
3
]
&
0x3F
);
if
(
uc
<
0x10000
||
uc
>=
0x110000
)
uc
=
-
1
;
break
;
}
if
(
uc
<
0
||
((
uc
&
0xFFFF
)
>=
0xFFFE
))
return
-
1
;
*
dst
=
uc
;
return
length
;
}
src/util.h
View file @
8e35527d
...
...
@@ -368,6 +368,17 @@ extern int git__date_rfc2822_fmt(char *out, size_t len, const git_time *date);
extern
size_t
git__unescape
(
char
*
str
);
/*
* Iterate through an UTF-8 string, yielding one
* codepoint at a time.
*
* @param str current position in the string
* @param str_len size left in the string; -1 if the string is NULL-terminated
* @param dst pointer where to store the current codepoint
* @return length in bytes of the read codepoint; -1 if the codepoint was invalid
*/
extern
int
git__utf8_iterate
(
const
uint8_t
*
str
,
int
str_len
,
int32_t
*
dst
);
/*
* Safely zero-out memory, making sure that the compiler
* doesn't optimize away the operation.
*/
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment