Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
R
riscv-gcc-1
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
lvzhengyang
riscv-gcc-1
Commits
785e11cc
Commit
785e11cc
authored
Dec 17, 2010
by
Ian Lance Taylor
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Update to current master source.
From-SVN: r167972
parent
2bc4b682
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
159 additions
and
252 deletions
+159
-252
libgo/go/regexp/regexp.go
+159
-252
No files found.
libgo/go/regexp/regexp.go
View file @
785e11cc
...
...
@@ -89,114 +89,83 @@ var (
ErrBadBackslash
=
Error
(
"illegal backslash escape"
)
)
// An instruction executed by the NFA
type
instr
interface
{
kind
()
int
// the type of this instruction: _CHAR, _ANY, etc.
next
()
instr
// the instruction to execute after this one
setNext
(
i
instr
)
index
()
int
setIndex
(
i
int
)
print
()
}
const
(
iStart
=
iota
// beginning of program
iEnd
// end of program: success
iBOT
// '^' beginning of text
iEOT
// '$' end of text
iChar
// 'a' regular character
iCharClass
// [a-z] character class
iAny
// '.' any character including newline
iNotNL
// [^\n] special case: any character but newline
iBra
// '(' parenthesized expression: 2*braNum for left, 2*braNum+1 for right
iAlt
// '|' alternation
iNop
// do nothing; makes it easy to link without patching
)
// Fields and methods common to all instructions
type
common
struct
{
_next
instr
_index
int
// An instruction executed by the NFA
type
instr
struct
{
kind
int
// the type of this instruction: iChar, iAny, etc.
index
int
// used only in debugging; could be eliminated
next
*
instr
// the instruction to execute after this one
// Special fields valid only for some items.
char
int
// iChar
braNum
int
// iBra, iEbra
cclass
*
charClass
// iCharClass
left
*
instr
// iAlt, other branch
}
func
(
i
*
instr
)
print
()
{
switch
i
.
kind
{
case
iStart
:
print
(
"start"
)
case
iEnd
:
print
(
"end"
)
case
iBOT
:
print
(
"bot"
)
case
iEOT
:
print
(
"eot"
)
case
iChar
:
print
(
"char "
,
string
(
i
.
char
))
case
iCharClass
:
i
.
cclass
.
print
()
case
iAny
:
print
(
"any"
)
case
iNotNL
:
print
(
"notnl"
)
case
iBra
:
if
i
.
braNum
&
1
==
0
{
print
(
"bra"
,
i
.
braNum
/
2
)
}
else
{
print
(
"ebra"
,
i
.
braNum
/
2
)
}
case
iAlt
:
print
(
"alt("
,
i
.
left
.
index
,
")"
)
case
iNop
:
print
(
"nop"
)
}
}
func
(
c
*
common
)
next
()
instr
{
return
c
.
_next
}
func
(
c
*
common
)
setNext
(
i
instr
)
{
c
.
_next
=
i
}
func
(
c
*
common
)
index
()
int
{
return
c
.
_index
}
func
(
c
*
common
)
setIndex
(
i
int
)
{
c
.
_index
=
i
}
// Regexp is the representation of a compiled regular expression.
// The public interface is entirely through methods.
type
Regexp
struct
{
expr
string
// the original expression
prefix
string
// initial plain text string
prefixBytes
[]
byte
// initial plain text bytes
inst
[]
instr
start
instr
// first instruction of machine
prefixStart
instr
// where to start if there is a prefix
inst
[]
*
instr
start
*
instr
// first instruction of machine
prefixStart
*
instr
// where to start if there is a prefix
nbra
int
// number of brackets in expression, for subexpressions
}
const
(
_START
=
iota
// beginning of program
_END
// end of program: success
_BOT
// '^' beginning of text
_EOT
// '$' end of text
_CHAR
// 'a' regular character
_CHARCLASS
// [a-z] character class
_ANY
// '.' any character including newline
_NOTNL
// [^\n] special case: any character but newline
_BRA
// '(' parenthesized expression
_EBRA
// ')'; end of '(' parenthesized expression
_ALT
// '|' alternation
_NOP
// do nothing; makes it easy to link without patching
)
// --- START start of program
type
_Start
struct
{
common
}
func
(
start
*
_Start
)
kind
()
int
{
return
_START
}
func
(
start
*
_Start
)
print
()
{
print
(
"start"
)
}
// --- END end of program
type
_End
struct
{
common
}
func
(
end
*
_End
)
kind
()
int
{
return
_END
}
func
(
end
*
_End
)
print
()
{
print
(
"end"
)
}
// --- BOT beginning of text
type
_Bot
struct
{
common
}
func
(
bot
*
_Bot
)
kind
()
int
{
return
_BOT
}
func
(
bot
*
_Bot
)
print
()
{
print
(
"bot"
)
}
// --- EOT end of text
type
_Eot
struct
{
common
}
func
(
eot
*
_Eot
)
kind
()
int
{
return
_EOT
}
func
(
eot
*
_Eot
)
print
()
{
print
(
"eot"
)
}
// --- CHAR a regular character
type
_Char
struct
{
common
char
int
}
func
(
char
*
_Char
)
kind
()
int
{
return
_CHAR
}
func
(
char
*
_Char
)
print
()
{
print
(
"char "
,
string
(
char
.
char
))
}
func
newChar
(
char
int
)
*
_Char
{
c
:=
new
(
_Char
)
c
.
char
=
char
return
c
}
// --- CHARCLASS [a-z]
type
_CharClass
struct
{
common
type
charClass
struct
{
negate
bool
// is character class negated? ([^a-z])
// slice of int, stored pairwise: [a-z] is (a,z); x is (x,x):
ranges
[]
int
cmin
,
cmax
int
}
func
(
cclass
*
_CharClass
)
kind
()
int
{
return
_CHARCLASS
}
func
(
cclass
*
_CharClass
)
print
()
{
func
(
cclass
*
charClass
)
print
()
{
print
(
"charclass"
)
if
cclass
.
negate
{
print
(
" (negated)"
)
...
...
@@ -212,7 +181,7 @@ func (cclass *_CharClass) print() {
}
}
func
(
cclass
*
_C
harClass
)
addRange
(
a
,
b
int
)
{
func
(
cclass
*
c
harClass
)
addRange
(
a
,
b
int
)
{
// range is a through b inclusive
cclass
.
ranges
=
append
(
cclass
.
ranges
,
a
,
b
)
if
a
<
cclass
.
cmin
{
...
...
@@ -223,7 +192,7 @@ func (cclass *_CharClass) addRange(a, b int) {
}
}
func
(
cclass
*
_C
harClass
)
matches
(
c
int
)
bool
{
func
(
cclass
*
c
harClass
)
matches
(
c
int
)
bool
{
if
c
<
cclass
.
cmin
||
c
>
cclass
.
cmax
{
return
cclass
.
negate
}
...
...
@@ -236,67 +205,17 @@ func (cclass *_CharClass) matches(c int) bool {
return
cclass
.
negate
}
func
newCharClass
()
*
_CharClass
{
c
:=
new
(
_CharClass
)
c
.
ranges
=
make
([]
int
,
0
,
4
)
c
.
cmin
=
0x10FFFF
+
1
// MaxRune + 1
c
.
cmax
=
-
1
return
c
}
// --- ANY any character
type
_Any
struct
{
common
}
func
(
any
*
_Any
)
kind
()
int
{
return
_ANY
}
func
(
any
*
_Any
)
print
()
{
print
(
"any"
)
}
// --- NOTNL any character but newline
type
_NotNl
struct
{
common
}
func
(
notnl
*
_NotNl
)
kind
()
int
{
return
_NOTNL
}
func
(
notnl
*
_NotNl
)
print
()
{
print
(
"notnl"
)
}
// --- BRA parenthesized expression
type
_Bra
struct
{
common
n
int
// subexpression number
}
func
(
bra
*
_Bra
)
kind
()
int
{
return
_BRA
}
func
(
bra
*
_Bra
)
print
()
{
print
(
"bra"
,
bra
.
n
)
}
// --- EBRA end of parenthesized expression
type
_Ebra
struct
{
common
n
int
// subexpression number
}
func
(
ebra
*
_Ebra
)
kind
()
int
{
return
_EBRA
}
func
(
ebra
*
_Ebra
)
print
()
{
print
(
"ebra "
,
ebra
.
n
)
}
// --- ALT alternation
type
_Alt
struct
{
common
left
instr
// other branch
}
func
(
alt
*
_Alt
)
kind
()
int
{
return
_ALT
}
func
(
alt
*
_Alt
)
print
()
{
print
(
"alt("
,
alt
.
left
.
index
(),
")"
)
}
// --- NOP no operation
type
_Nop
struct
{
common
func
newCharClass
()
*
instr
{
i
:=
&
instr
{
kind
:
iCharClass
}
i
.
cclass
=
new
(
charClass
)
i
.
cclass
.
ranges
=
make
([]
int
,
0
,
4
)
i
.
cclass
.
cmin
=
0x10FFFF
+
1
// MaxRune + 1
i
.
cclass
.
cmax
=
-
1
return
i
}
func
(
nop
*
_Nop
)
kind
()
int
{
return
_NOP
}
func
(
nop
*
_Nop
)
print
()
{
print
(
"nop"
)
}
func
(
re
*
Regexp
)
add
(
i
instr
)
instr
{
i
.
setIndex
(
len
(
re
.
inst
))
func
(
re
*
Regexp
)
add
(
i
*
instr
)
*
instr
{
i
.
index
=
len
(
re
.
inst
)
re
.
inst
=
append
(
re
.
inst
,
i
)
return
i
}
...
...
@@ -364,8 +283,9 @@ func escape(c int) int {
return
-
1
}
func
(
p
*
parser
)
charClass
()
instr
{
cc
:=
newCharClass
()
func
(
p
*
parser
)
charClass
()
*
instr
{
i
:=
newCharClass
()
cc
:=
i
.
cclass
if
p
.
c
()
==
'^'
{
cc
.
negate
=
true
p
.
nextc
()
...
...
@@ -380,18 +300,18 @@ func (p *parser) charClass() instr {
// Is it [^\n]?
if
cc
.
negate
&&
len
(
cc
.
ranges
)
==
2
&&
cc
.
ranges
[
0
]
==
'\n'
&&
cc
.
ranges
[
1
]
==
'\n'
{
nl
:=
new
(
_NotNl
)
nl
:=
&
instr
{
kind
:
iNotNL
}
p
.
re
.
add
(
nl
)
return
nl
}
// Special common case: "[a]" -> "a"
if
!
cc
.
negate
&&
len
(
cc
.
ranges
)
==
2
&&
cc
.
ranges
[
0
]
==
cc
.
ranges
[
1
]
{
c
:=
newChar
(
cc
.
ranges
[
0
])
c
:=
&
instr
{
kind
:
iChar
,
char
:
cc
.
ranges
[
0
]}
p
.
re
.
add
(
c
)
return
c
}
p
.
re
.
add
(
cc
)
return
cc
p
.
re
.
add
(
i
)
return
i
case
'-'
:
// do this before backslash processing
p
.
error
(
ErrBadRange
)
case
'\\'
:
...
...
@@ -428,7 +348,7 @@ func (p *parser) charClass() instr {
return
nil
}
func
(
p
*
parser
)
term
()
(
start
,
end
instr
)
{
func
(
p
*
parser
)
term
()
(
start
,
end
*
instr
)
{
switch
c
:=
p
.
c
();
c
{
case
'|'
,
endOfFile
:
return
nil
,
nil
...
...
@@ -443,15 +363,15 @@ func (p *parser) term() (start, end instr) {
p
.
error
(
ErrUnmatchedRbkt
)
case
'^'
:
p
.
nextc
()
start
=
p
.
re
.
add
(
new
(
_Bot
)
)
start
=
p
.
re
.
add
(
&
instr
{
kind
:
iBOT
}
)
return
start
,
start
case
'$'
:
p
.
nextc
()
start
=
p
.
re
.
add
(
new
(
_Eot
)
)
start
=
p
.
re
.
add
(
&
instr
{
kind
:
iEOT
}
)
return
start
,
start
case
'.'
:
p
.
nextc
()
start
=
p
.
re
.
add
(
new
(
_Any
)
)
start
=
p
.
re
.
add
(
&
instr
{
kind
:
iAny
}
)
return
start
,
start
case
'['
:
p
.
nextc
()
...
...
@@ -472,12 +392,10 @@ func (p *parser) term() (start, end instr) {
}
p
.
nlpar
--
p
.
nextc
()
bra
:=
new
(
_Bra
)
bra
:=
&
instr
{
kind
:
iBra
,
braNum
:
2
*
nbra
}
p
.
re
.
add
(
bra
)
ebra
:=
new
(
_Ebra
)
ebra
:=
&
instr
{
kind
:
iBra
,
braNum
:
2
*
nbra
+
1
}
p
.
re
.
add
(
ebra
)
bra
.
n
=
nbra
ebra
.
n
=
nbra
if
start
==
nil
{
if
end
==
nil
{
p
.
error
(
ErrInternal
)
...
...
@@ -485,9 +403,9 @@ func (p *parser) term() (start, end instr) {
}
start
=
ebra
}
else
{
end
.
setNext
(
ebra
)
end
.
next
=
ebra
}
bra
.
setNext
(
start
)
bra
.
next
=
start
return
bra
,
ebra
case
'\\'
:
c
=
p
.
nextc
()
...
...
@@ -504,14 +422,14 @@ func (p *parser) term() (start, end instr) {
fallthrough
default
:
p
.
nextc
()
start
=
newChar
(
c
)
start
=
&
instr
{
kind
:
iChar
,
char
:
c
}
p
.
re
.
add
(
start
)
return
start
,
start
}
panic
(
"unreachable"
)
}
func
(
p
*
parser
)
closure
()
(
start
,
end
instr
)
{
func
(
p
*
parser
)
closure
()
(
start
,
end
*
instr
)
{
start
,
end
=
p
.
term
()
if
start
==
nil
{
return
...
...
@@ -519,28 +437,28 @@ func (p *parser) closure() (start, end instr) {
switch
p
.
c
()
{
case
'*'
:
// (start,end)*:
alt
:=
new
(
_Alt
)
alt
:=
&
instr
{
kind
:
iAlt
}
p
.
re
.
add
(
alt
)
end
.
setNext
(
alt
)
// after end, do alt
end
.
next
=
alt
// after end, do alt
alt
.
left
=
start
// alternate brach: return to start
start
=
alt
// alt becomes new (start, end)
end
=
alt
case
'+'
:
// (start,end)+:
alt
:=
new
(
_Alt
)
alt
:=
&
instr
{
kind
:
iAlt
}
p
.
re
.
add
(
alt
)
end
.
setNext
(
alt
)
// after end, do alt
end
.
next
=
alt
// after end, do alt
alt
.
left
=
start
// alternate brach: return to start
end
=
alt
// start is unchanged; end is alt
case
'?'
:
// (start,end)?:
alt
:=
new
(
_Alt
)
alt
:=
&
instr
{
kind
:
iAlt
}
p
.
re
.
add
(
alt
)
nop
:=
new
(
_Nop
)
nop
:=
&
instr
{
kind
:
iNop
}
p
.
re
.
add
(
nop
)
alt
.
left
=
start
// alternate branch is start
alt
.
setNext
(
nop
)
// follow on to nop
end
.
setNext
(
nop
)
// after end, go to nop
alt
.
next
=
nop
// follow on to nop
end
.
next
=
nop
// after end, go to nop
start
=
alt
// start is now alt
end
=
nop
// end is nop pointed to by both branches
default
:
...
...
@@ -553,27 +471,27 @@ func (p *parser) closure() (start, end instr) {
return
}
func
(
p
*
parser
)
concatenation
()
(
start
,
end
instr
)
{
func
(
p
*
parser
)
concatenation
()
(
start
,
end
*
instr
)
{
for
{
nstart
,
nend
:=
p
.
closure
()
switch
{
case
nstart
==
nil
:
// end of this concatenation
if
start
==
nil
{
// this is the empty string
nop
:=
p
.
re
.
add
(
new
(
_Nop
)
)
nop
:=
p
.
re
.
add
(
&
instr
{
kind
:
iNop
}
)
return
nop
,
nop
}
return
case
start
==
nil
:
// this is first element of concatenation
start
,
end
=
nstart
,
nend
default
:
end
.
setNext
(
nstart
)
end
.
next
=
nstart
end
=
nend
}
}
panic
(
"unreachable"
)
}
func
(
p
*
parser
)
regexp
()
(
start
,
end
instr
)
{
func
(
p
*
parser
)
regexp
()
(
start
,
end
*
instr
)
{
start
,
end
=
p
.
concatenation
()
for
{
switch
p
.
c
()
{
...
...
@@ -582,36 +500,35 @@ func (p *parser) regexp() (start, end instr) {
case
'|'
:
p
.
nextc
()
nstart
,
nend
:=
p
.
concatenation
()
alt
:=
new
(
_Alt
)
alt
:=
&
instr
{
kind
:
iAlt
}
p
.
re
.
add
(
alt
)
alt
.
left
=
start
alt
.
setNext
(
nstart
)
nop
:=
new
(
_Nop
)
alt
.
next
=
nstart
nop
:=
&
instr
{
kind
:
iNop
}
p
.
re
.
add
(
nop
)
end
.
setNext
(
nop
)
nend
.
setNext
(
nop
)
end
.
next
=
nop
nend
.
next
=
nop
start
,
end
=
alt
,
nop
}
}
panic
(
"unreachable"
)
}
func
unNop
(
i
instr
)
instr
{
for
i
.
kind
()
==
_NOP
{
i
=
i
.
next
()
func
unNop
(
i
*
instr
)
*
instr
{
for
i
.
kind
==
iNop
{
i
=
i
.
next
}
return
i
}
func
(
re
*
Regexp
)
eliminateNops
()
{
for
_
,
inst
:=
range
re
.
inst
{
if
inst
.
kind
()
==
_END
{
if
inst
.
kind
==
iEnd
{
continue
}
inst
.
setNext
(
unNop
(
inst
.
next
()))
if
inst
.
kind
()
==
_ALT
{
alt
:=
inst
.
(
*
_Alt
)
alt
.
left
=
unNop
(
alt
.
left
)
inst
.
next
=
unNop
(
inst
.
next
)
if
inst
.
kind
==
iAlt
{
inst
.
left
=
unNop
(
inst
.
left
)
}
}
}
...
...
@@ -619,10 +536,10 @@ func (re *Regexp) eliminateNops() {
func
(
re
*
Regexp
)
dump
()
{
print
(
"prefix <"
,
re
.
prefix
,
">
\n
"
)
for
_
,
inst
:=
range
re
.
inst
{
print
(
inst
.
index
()
,
": "
)
print
(
inst
.
index
,
": "
)
inst
.
print
()
if
inst
.
kind
()
!=
_END
{
print
(
" -> "
,
inst
.
next
()
.
index
()
)
if
inst
.
kind
!=
iEnd
{
print
(
" -> "
,
inst
.
next
.
index
)
}
print
(
"
\n
"
)
}
...
...
@@ -630,12 +547,12 @@ func (re *Regexp) dump() {
func
(
re
*
Regexp
)
doParse
()
{
p
:=
newParser
(
re
)
start
:=
new
(
_Start
)
start
:=
&
instr
{
kind
:
iStart
}
re
.
add
(
start
)
s
,
e
:=
p
.
regexp
()
start
.
setNext
(
s
)
start
.
next
=
s
re
.
start
=
start
e
.
setNext
(
re
.
add
(
new
(
_End
))
)
e
.
next
=
re
.
add
(
&
instr
{
kind
:
iEnd
}
)
if
debug
{
re
.
dump
()
...
...
@@ -659,27 +576,25 @@ func (re *Regexp) doParse() {
func
(
re
*
Regexp
)
setPrefix
()
{
var
b
[]
byte
var
utf
=
make
([]
byte
,
utf8
.
UTFMax
)
var
inst
*
instr
// First instruction is start; skip that.
i
:=
re
.
inst
[
0
]
.
next
()
.
index
()
Loop
:
for
i
<
len
(
re
.
inst
)
{
inst
:=
re
.
inst
[
i
]
for
inst
=
re
.
inst
[
0
]
.
next
;
inst
.
kind
!=
iEnd
;
inst
=
inst
.
next
{
// stop if this is not a char
if
inst
.
kind
()
!=
_CHAR
{
if
inst
.
kind
!=
iChar
{
break
}
// stop if this char can be followed by a match for an empty string,
// which includes closures, ^, and $.
switch
re
.
inst
[
inst
.
next
()
.
index
()]
.
kind
()
{
case
_BOT
,
_EOT
,
_ALT
:
switch
inst
.
next
.
kind
{
case
iBOT
,
iEOT
,
iAlt
:
break
Loop
}
n
:=
utf8
.
EncodeRune
(
inst
.
(
*
_Char
)
.
char
,
utf
)
b
=
bytes
.
Add
(
b
,
utf
[
0
:
n
])
i
=
inst
.
next
()
.
index
()
n
:=
utf8
.
EncodeRune
(
inst
.
char
,
utf
)
b
=
append
(
b
,
utf
[
0
:
n
]
...
)
}
// point prefixStart instruction to first non-CHAR after prefix
re
.
prefixStart
=
re
.
inst
[
i
]
re
.
prefixStart
=
inst
re
.
prefixBytes
=
b
re
.
prefix
=
string
(
b
)
}
...
...
@@ -696,7 +611,7 @@ func Compile(str string) (regexp *Regexp, error os.Error) {
}
}()
regexp
.
expr
=
str
regexp
.
inst
=
make
([]
instr
,
0
,
10
)
regexp
.
inst
=
make
([]
*
instr
,
0
,
10
)
regexp
.
doParse
()
return
}
...
...
@@ -772,7 +687,7 @@ func (a *matchArena) noMatch() *matchVec {
}
type
state
struct
{
inst
instr
// next instruction to execute
inst
*
instr
// next instruction to execute
prefixed
bool
// this match began with a fixed prefix
match
*
matchVec
}
...
...
@@ -780,44 +695,37 @@ type state struct {
// Append new state to to-do list. Leftmost-longest wins so avoid
// adding a state that's already active. The matchVec will be inc-ref'ed
// if it is assigned to a state.
func
(
a
*
matchArena
)
addState
(
s
[]
state
,
inst
instr
,
prefixed
bool
,
match
*
matchVec
,
pos
,
end
int
)
[]
state
{
switch
inst
.
kind
()
{
case
_
BOT
:
func
(
a
*
matchArena
)
addState
(
s
[]
state
,
inst
*
instr
,
prefixed
bool
,
match
*
matchVec
,
pos
,
end
int
)
[]
state
{
switch
inst
.
kind
{
case
i
BOT
:
if
pos
==
0
{
s
=
a
.
addState
(
s
,
inst
.
next
()
,
prefixed
,
match
,
pos
,
end
)
s
=
a
.
addState
(
s
,
inst
.
next
,
prefixed
,
match
,
pos
,
end
)
}
return
s
case
_
EOT
:
case
i
EOT
:
if
pos
==
end
{
s
=
a
.
addState
(
s
,
inst
.
next
()
,
prefixed
,
match
,
pos
,
end
)
s
=
a
.
addState
(
s
,
inst
.
next
,
prefixed
,
match
,
pos
,
end
)
}
return
s
case
_BRA
:
n
:=
inst
.
(
*
_Bra
)
.
n
match
.
m
[
2
*
n
]
=
pos
s
=
a
.
addState
(
s
,
inst
.
next
(),
prefixed
,
match
,
pos
,
end
)
return
s
case
_EBRA
:
n
:=
inst
.
(
*
_Ebra
)
.
n
match
.
m
[
2
*
n
+
1
]
=
pos
s
=
a
.
addState
(
s
,
inst
.
next
(),
prefixed
,
match
,
pos
,
end
)
case
iBra
:
match
.
m
[
inst
.
braNum
]
=
pos
s
=
a
.
addState
(
s
,
inst
.
next
,
prefixed
,
match
,
pos
,
end
)
return
s
}
index
:=
inst
.
index
()
l
:=
len
(
s
)
// States are inserted in order so it's sufficient to see if we have the same
// instruction; no need to see if existing match is earlier (it is).
for
i
:=
0
;
i
<
l
;
i
++
{
if
s
[
i
]
.
inst
.
index
()
==
index
{
if
s
[
i
]
.
inst
==
inst
{
return
s
}
}
s
=
append
(
s
,
state
{
inst
,
prefixed
,
match
})
match
.
ref
++
if
inst
.
kind
()
==
_ALT
{
s
=
a
.
addState
(
s
,
inst
.
(
*
_Alt
)
.
left
,
prefixed
,
a
.
copy
(
match
),
pos
,
end
)
if
inst
.
kind
==
iAlt
{
s
=
a
.
addState
(
s
,
inst
.
left
,
prefixed
,
a
.
copy
(
match
),
pos
,
end
)
// give other branch a copy of this match vector
s
=
a
.
addState
(
s
,
inst
.
next
()
,
prefixed
,
a
.
copy
(
match
),
pos
,
end
)
s
=
a
.
addState
(
s
,
inst
.
next
,
prefixed
,
a
.
copy
(
match
),
pos
,
end
)
}
return
s
}
...
...
@@ -860,7 +768,7 @@ func (re *Regexp) doExecute(str string, bytestr []byte, pos int) []int {
s
[
out
]
=
arena
.
addState
(
s
[
out
],
re
.
prefixStart
,
true
,
match
,
pos
,
end
)
prefixed
=
false
// next iteration should start at beginning of machine.
}
else
{
s
[
out
]
=
arena
.
addState
(
s
[
out
],
re
.
start
.
next
()
,
false
,
match
,
pos
,
end
)
s
[
out
]
=
arena
.
addState
(
s
[
out
],
re
.
start
.
next
,
false
,
match
,
pos
,
end
)
}
arena
.
free
(
match
)
// if addState saved it, ref was incremented
}
...
...
@@ -886,29 +794,28 @@ func (re *Regexp) doExecute(str string, bytestr []byte, pos int) []int {
}
pos
+=
charwidth
for
_
,
st
:=
range
s
[
in
]
{
switch
st
.
inst
.
kind
()
{
case
_
BOT
:
case
_
EOT
:
case
_CHAR
:
if
c
==
st
.
inst
.
(
*
_Char
)
.
char
{
s
[
out
]
=
arena
.
addState
(
s
[
out
],
st
.
inst
.
next
()
,
st
.
prefixed
,
st
.
match
,
pos
,
end
)
}
case
_CHARCLASS
:
if
st
.
inst
.
(
*
_CharClass
)
.
matches
(
c
)
{
s
[
out
]
=
arena
.
addState
(
s
[
out
],
st
.
inst
.
next
()
,
st
.
prefixed
,
st
.
match
,
pos
,
end
)
}
case
_ANY
:
switch
st
.
inst
.
kind
{
case
i
BOT
:
case
i
EOT
:
case
iChar
:
if
c
==
st
.
inst
.
char
{
s
[
out
]
=
arena
.
addState
(
s
[
out
],
st
.
inst
.
next
,
st
.
prefixed
,
st
.
match
,
pos
,
end
)
}
case
iCharClass
:
if
st
.
inst
.
cclass
.
matches
(
c
)
{
s
[
out
]
=
arena
.
addState
(
s
[
out
],
st
.
inst
.
next
,
st
.
prefixed
,
st
.
match
,
pos
,
end
)
}
case
iAny
:
if
c
!=
endOfFile
{
s
[
out
]
=
arena
.
addState
(
s
[
out
],
st
.
inst
.
next
()
,
st
.
prefixed
,
st
.
match
,
pos
,
end
)
s
[
out
]
=
arena
.
addState
(
s
[
out
],
st
.
inst
.
next
,
st
.
prefixed
,
st
.
match
,
pos
,
end
)
}
case
_NOT
NL
:
case
iNot
NL
:
if
c
!=
endOfFile
&&
c
!=
'\n'
{
s
[
out
]
=
arena
.
addState
(
s
[
out
],
st
.
inst
.
next
()
,
st
.
prefixed
,
st
.
match
,
pos
,
end
)
s
[
out
]
=
arena
.
addState
(
s
[
out
],
st
.
inst
.
next
,
st
.
prefixed
,
st
.
match
,
pos
,
end
)
}
case
_BRA
:
case
_EBRA
:
case
_ALT
:
case
_END
:
case
iBra
:
case
iAlt
:
case
iEnd
:
// choose leftmost longest
if
!
found
||
// first
st
.
match
.
m
[
0
]
<
final
.
match
.
m
[
0
]
||
// leftmost
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment