chore(deps): resolve dependency conflict

This commit is contained in:
Chris Lane
2020-05-11 20:12:28 -04:00
parent 1b17ab1914
commit 4eeec6c868
92 changed files with 1750 additions and 317 deletions

Binary file not shown.

View File

@ -22,4 +22,6 @@ _testmain.go
*.exe
*.test
*.prof
*.out
*.out
.DS_Store

View File

@ -57,6 +57,21 @@ The __last__ capture is embedded in each group, so `g.String()` will return the
| named ascii character class `[[:foo:]]`| yes | no |
| conditionals `((expr)yes\|no)` | no | yes |
## RE2 compatibility mode
The default behavior of `regexp2` is to match the .NET regexp engine, however the `RE2` option is provided to change the parsing to increase compatibility with RE2. Using the `RE2` option when compiling a regexp will not take away any features, but will change the following behaviors:
* add support for named ascii character classes (e.g. `[[:foo:]]`)
* add support for python-style capture groups (e.g. `(P<name>re)`)
```go
re := regexp2.MustCompile(`Your RE2-compatible pattern`, regexp2.RE2)
if isMatch, _ := re.MatchString(`Something to match`); isMatch {
//do something
}
```
This feature is a work in progress and I'm open to ideas for more things to put here (maybe more relaxed character escaping rules?).
## Library features that I'm still working on
- Regex split

View File

@ -120,6 +120,7 @@ const (
RightToLeft = 0x0040 // "r"
Debug = 0x0080 // "d"
ECMAScript = 0x0100 // "e"
RE2 = 0x0200 // RE2 (regexp package) compatibility mode
)
func (re *Regexp) RightToLeft() bool {

View File

@ -484,6 +484,29 @@ func (c *CharSet) addRanges(ranges []singleRange) {
c.canonicalize()
}
// Merges everything but the new ranges into our own
func (c *CharSet) addNegativeRanges(ranges []singleRange) {
if c.anything {
return
}
var hi rune
// convert incoming ranges into opposites, assume they are in order
for _, r := range ranges {
if hi < r.first {
c.ranges = append(c.ranges, singleRange{hi, r.first - 1})
}
hi = r.last + 1
}
if hi < utf8.MaxRune {
c.ranges = append(c.ranges, singleRange{hi, utf8.MaxRune})
}
c.canonicalize()
}
func isValidUnicodeCat(catName string) bool {
_, ok := unicodeCategories[catName]
return ok
@ -515,6 +538,53 @@ func (c *CharSet) addRange(chMin, chMax rune) {
c.canonicalize()
}
func (c *CharSet) addNamedASCII(name string, negate bool) bool {
var rs []singleRange
switch name {
case "alnum":
rs = []singleRange{singleRange{'0', '9'}, singleRange{'A', 'Z'}, singleRange{'a', 'z'}}
case "alpha":
rs = []singleRange{singleRange{'A', 'Z'}, singleRange{'a', 'z'}}
case "ascii":
rs = []singleRange{singleRange{0, 0x7f}}
case "blank":
rs = []singleRange{singleRange{'\t', '\t'}, singleRange{' ', ' '}}
case "cntrl":
rs = []singleRange{singleRange{0, 0x1f}, singleRange{0x7f, 0x7f}}
case "digit":
c.addDigit(false, negate, "")
case "graph":
rs = []singleRange{singleRange{'!', '~'}}
case "lower":
rs = []singleRange{singleRange{'a', 'z'}}
case "print":
rs = []singleRange{singleRange{' ', '~'}}
case "punct": //[!-/:-@[-`{-~]
rs = []singleRange{singleRange{'!', '/'}, singleRange{':', '@'}, singleRange{'[', '`'}, singleRange{'{', '~'}}
case "space":
c.addSpace(true, negate)
case "upper":
rs = []singleRange{singleRange{'A', 'Z'}}
case "word":
c.addWord(true, negate)
case "xdigit":
rs = []singleRange{singleRange{'0', '9'}, singleRange{'A', 'F'}, singleRange{'a', 'f'}}
default:
return false
}
if len(rs) > 0 {
if negate {
c.addNegativeRanges(rs)
} else {
c.addRanges(rs)
}
}
return true
}
type singleRangeSorter []singleRange
func (p singleRangeSorter) Len() int { return len(p) }

View File

@ -21,6 +21,7 @@ const (
RightToLeft = 0x0040 // "r"
Debug = 0x0080 // "d"
ECMAScript = 0x0100 // "e"
RE2 = 0x0200 // RE2 compat mode
)
func optionFromCode(ch rune) RegexOptions {
@ -310,7 +311,7 @@ func (p *parser) countCaptures() error {
switch ch {
case '\\':
if p.charsRight() > 0 {
p.moveRight(1)
p.scanBackslash(true)
}
case '#':
@ -354,6 +355,14 @@ func (p *parser) countCaptures() error {
p.noteCaptureName(p.scanCapname(), pos)
}
}
} else if p.useRE2() && p.charsRight() > 2 && (p.rightChar(0) == 'P' && p.rightChar(1) == '<') {
// RE2-compat (?P<)
p.moveRight(2)
ch = p.rightChar(0)
if IsWordChar(ch) {
p.noteCaptureName(p.scanCapname(), pos)
}
} else {
// (?...
@ -520,7 +529,7 @@ func (p *parser) scanRegex() (*regexNode, error) {
}
case '\\':
n, err := p.scanBackslash()
n, err := p.scanBackslash(false)
if err != nil {
return nil, err
}
@ -1022,6 +1031,50 @@ func (p *parser) scanGroupOpen() (*regexNode, error) {
}
}
case 'P':
if p.useRE2() {
// support for P<name> syntax
if p.charsRight() < 3 {
goto BreakRecognize
}
ch = p.moveRightGetChar()
if ch != '<' {
goto BreakRecognize
}
ch = p.moveRightGetChar()
p.moveLeft()
if IsWordChar(ch) {
capnum := -1
capname := p.scanCapname()
if p.isCaptureName(capname) {
capnum = p.captureSlotFromName(capname)
}
// check if we have bogus character after the name
if p.charsRight() > 0 && p.rightChar(0) != '>' {
return nil, p.getErr(ErrInvalidGroupName)
}
// actually make the node
if capnum != -1 && p.charsRight() > 0 && p.moveRightGetChar() == '>' {
return newRegexNodeMN(ntCapture, p.options, capnum, -1), nil
}
goto BreakRecognize
} else {
// bad group name - starts with something other than a word character and isn't a number
return nil, p.getErr(ErrInvalidGroupName)
}
}
// if we're not using RE2 compat mode then
// we just behave like normal
fallthrough
default:
p.moveLeft()
@ -1055,7 +1108,7 @@ BreakRecognize:
}
// scans backslash specials and basics
func (p *parser) scanBackslash() (*regexNode, error) {
func (p *parser) scanBackslash(scanOnly bool) (*regexNode, error) {
if p.charsRight() == 0 {
return nil, p.getErr(ErrIllegalEndEscape)
@ -1123,12 +1176,12 @@ func (p *parser) scanBackslash() (*regexNode, error) {
return newRegexNodeSet(ntSet, p.options, cc), nil
default:
return p.scanBasicBackslash()
return p.scanBasicBackslash(scanOnly)
}
}
// Scans \-style backreferences and character escapes
func (p *parser) scanBasicBackslash() (*regexNode, error) {
func (p *parser) scanBasicBackslash(scanOnly bool) (*regexNode, error) {
if p.charsRight() == 0 {
return nil, p.getErr(ErrIllegalEndEscape)
}
@ -1184,15 +1237,19 @@ func (p *parser) scanBasicBackslash() (*regexNode, error) {
if p.charsRight() > 0 && p.moveRightGetChar() == close {
if p.isCaptureSlot(capnum) {
return newRegexNodeM(ntRef, p.options, capnum), nil
} else {
return nil, p.getErr(ErrUndefinedBackRef, capnum)
}
return nil, p.getErr(ErrUndefinedBackRef, capnum)
}
} else if !angled && ch >= '1' && ch <= '9' { // Try to parse backreference or octal: \1
capnum, err := p.scanDecimal()
if err != nil {
return nil, err
}
if scanOnly {
return nil, nil
}
if p.useOptionE() || p.isCaptureSlot(capnum) {
return newRegexNodeM(ntRef, p.options, capnum), nil
}
@ -1448,11 +1505,26 @@ func (p *parser) scanCharSet(caseInsensitive, scanOnly bool) (*CharSet, error) {
savePos := p.textpos()
p.moveRight(1)
p.scanCapname() // throwaway the name
negate := false
if p.charsRight() > 1 && p.rightChar(0) == '^' {
negate = true
p.moveRight(1)
}
nm := p.scanCapname() // snag the name
if !scanOnly && p.useRE2() {
// look up the name since these are valid for RE2
// add the group based on the name
if ok := cc.addNamedASCII(nm, negate); !ok {
return nil, p.getErr(ErrInvalidCharRange)
}
}
if p.charsRight() < 2 || p.moveRightGetChar() != ':' || p.moveRightGetChar() != ']' {
p.textto(savePos)
} else if p.useRE2() {
// move on
continue
}
// else lookup name (nyi)
}
}
@ -1547,7 +1619,7 @@ func (p *parser) scanDecimal() (int, error) {
// Returns true for options allowed only at the top level
func isOnlyTopOption(option RegexOptions) bool {
return option == RightToLeft || option == ECMAScript
return option == RightToLeft || option == ECMAScript || option == RE2
}
// Scans cimsx-cimsx option string, stops at the first unrecognized char.
@ -1861,6 +1933,11 @@ func (p *parser) useOptionE() bool {
return (p.options & ECMAScript) != 0
}
// true to use RE2 compatibility parsing behavior.
func (p *parser) useRE2() bool {
return (p.options & RE2) != 0
}
// True if options stack is empty.
func (p *parser) emptyOptionsStack() bool {
return len(p.optionsStack) == 0