chore(deps): update dependencies

This commit is contained in:
Christopher Allen Lane
2022-08-02 20:30:31 -04:00
parent 0737af2fec
commit 7406ebfb5e
25 changed files with 5681 additions and 42 deletions

View File

@ -37,6 +37,8 @@ var (
ecmaSpace = []rune{0x0009, 0x000e, 0x0020, 0x0021, 0x00a0, 0x00a1, 0x1680, 0x1681, 0x2000, 0x200b, 0x2028, 0x202a, 0x202f, 0x2030, 0x205f, 0x2060, 0x3000, 0x3001, 0xfeff, 0xff00}
ecmaWord = []rune{0x0030, 0x003a, 0x0041, 0x005b, 0x005f, 0x0060, 0x0061, 0x007b}
ecmaDigit = []rune{0x0030, 0x003a}
re2Space = []rune{0x0009, 0x000b, 0x000c, 0x000e, 0x0020, 0x0021}
)
var (
@ -56,6 +58,9 @@ var (
NotSpaceClass = getCharSetFromCategoryString(true, false, spaceCategoryText)
DigitClass = getCharSetFromCategoryString(false, false, "Nd")
NotDigitClass = getCharSetFromCategoryString(false, true, "Nd")
RE2SpaceClass = getCharSetFromOldString(re2Space, false)
NotRE2SpaceClass = getCharSetFromOldString(re2Space, true)
)
var unicodeCategories = func() map[string]*unicode.RangeTable {
@ -401,13 +406,19 @@ func (c *CharSet) addChar(ch rune) {
c.addRange(ch, ch)
}
func (c *CharSet) addSpace(ecma, negate bool) {
func (c *CharSet) addSpace(ecma, re2, negate bool) {
if ecma {
if negate {
c.addRanges(NotECMASpaceClass().ranges)
} else {
c.addRanges(ECMASpaceClass().ranges)
}
} else if re2 {
if negate {
c.addRanges(NotRE2SpaceClass().ranges)
} else {
c.addRanges(RE2SpaceClass().ranges)
}
} else {
c.addCategories(category{cat: spaceCategoryText, negate: negate})
}
@ -563,7 +574,7 @@ func (c *CharSet) addNamedASCII(name string, negate bool) bool {
case "punct": //[!-/:-@[-`{-~]
rs = []singleRange{singleRange{'!', '/'}, singleRange{':', '@'}, singleRange{'[', '`'}, singleRange{'{', '~'}}
case "space":
c.addSpace(true, negate)
c.addSpace(true, false, negate)
case "upper":
rs = []singleRange{singleRange{'A', 'Z'}}
case "word":

View File

@ -22,6 +22,7 @@ const (
Debug = 0x0080 // "d"
ECMAScript = 0x0100 // "e"
RE2 = 0x0200 // RE2 compat mode
Unicode = 0x0400 // "u"
)
func optionFromCode(ch rune) RegexOptions {
@ -43,6 +44,8 @@ func optionFromCode(ch rune) RegexOptions {
return Debug
case 'e', 'E':
return ECMAScript
case 'u', 'U':
return Unicode
default:
return 0
}
@ -104,7 +107,7 @@ const (
ErrBadClassInCharRange = "cannot include class \\%v in character range"
ErrUnterminatedBracket = "unterminated [] set"
ErrSubtractionMustBeLast = "a subtraction must be the last element in a character class"
ErrReversedCharRange = "[x-y] range in reverse order"
ErrReversedCharRange = "[%c-%c] range in reverse order"
)
func (e ErrorCode) String() string {
@ -1121,14 +1124,14 @@ func (p *parser) scanBackslash(scanOnly bool) (*regexNode, error) {
case 'w':
p.moveRight(1)
if p.useOptionE() {
if p.useOptionE() || p.useRE2() {
return newRegexNodeSet(ntSet, p.options, ECMAWordClass()), nil
}
return newRegexNodeSet(ntSet, p.options, WordClass()), nil
case 'W':
p.moveRight(1)
if p.useOptionE() {
if p.useOptionE() || p.useRE2() {
return newRegexNodeSet(ntSet, p.options, NotECMAWordClass()), nil
}
return newRegexNodeSet(ntSet, p.options, NotWordClass()), nil
@ -1137,6 +1140,8 @@ func (p *parser) scanBackslash(scanOnly bool) (*regexNode, error) {
p.moveRight(1)
if p.useOptionE() {
return newRegexNodeSet(ntSet, p.options, ECMASpaceClass()), nil
} else if p.useRE2() {
return newRegexNodeSet(ntSet, p.options, RE2SpaceClass()), nil
}
return newRegexNodeSet(ntSet, p.options, SpaceClass()), nil
@ -1144,19 +1149,21 @@ func (p *parser) scanBackslash(scanOnly bool) (*regexNode, error) {
p.moveRight(1)
if p.useOptionE() {
return newRegexNodeSet(ntSet, p.options, NotECMASpaceClass()), nil
} else if p.useRE2() {
return newRegexNodeSet(ntSet, p.options, NotRE2SpaceClass()), nil
}
return newRegexNodeSet(ntSet, p.options, NotSpaceClass()), nil
case 'd':
p.moveRight(1)
if p.useOptionE() {
if p.useOptionE() || p.useRE2() {
return newRegexNodeSet(ntSet, p.options, ECMADigitClass()), nil
}
return newRegexNodeSet(ntSet, p.options, DigitClass()), nil
case 'D':
p.moveRight(1)
if p.useOptionE() {
if p.useOptionE() || p.useRE2() {
return newRegexNodeSet(ntSet, p.options, NotECMADigitClass()), nil
}
return newRegexNodeSet(ntSet, p.options, NotDigitClass()), nil
@ -1186,19 +1193,24 @@ func (p *parser) scanBasicBackslash(scanOnly bool) (*regexNode, error) {
return nil, p.getErr(ErrIllegalEndEscape)
}
angled := false
k := false
close := '\x00'
backpos := p.textpos()
ch := p.rightChar(0)
// allow \k<foo> instead of \<foo>, which is now deprecated
// Allow \k<foo> instead of \<foo>, which is now deprecated.
if ch == 'k' {
// According to ECMAScript specification, \k<name> is only parsed as a named group reference if
// there is at least one group name in the regexp.
// See https://www.ecma-international.org/ecma-262/#sec-isvalidregularexpressionliteral, step 7.
// Note, during the first (scanOnly) run we may not have all group names scanned, but that's ok.
if ch == 'k' && (!p.useOptionE() || len(p.capnames) > 0) {
if p.charsRight() >= 2 {
p.moveRight(1)
ch = p.moveRightGetChar()
if ch == '<' || ch == '\'' {
if ch == '<' || (!p.useOptionE() && ch == '\'') { // No support for \k'name' in ECMAScript
angled = true
if ch == '\'' {
close = '\''
@ -1213,8 +1225,9 @@ func (p *parser) scanBasicBackslash(scanOnly bool) (*regexNode, error) {
}
ch = p.rightChar(0)
k = true
} else if (ch == '<' || ch == '\'') && p.charsRight() > 1 { // Note angle without \g
} else if !p.useOptionE() && (ch == '<' || ch == '\'') && p.charsRight() > 1 { // Note angle without \g
angled = true
if ch == '\'' {
close = '\''
@ -1257,14 +1270,23 @@ func (p *parser) scanBasicBackslash(scanOnly bool) (*regexNode, error) {
return nil, p.getErr(ErrUndefinedBackRef, capnum)
}
} else if angled && IsWordChar(ch) {
} else if angled {
capname := p.scanCapname()
if p.charsRight() > 0 && p.moveRightGetChar() == close {
if capname != "" && p.charsRight() > 0 && p.moveRightGetChar() == close {
if scanOnly {
return nil, nil
}
if p.isCaptureName(capname) {
return newRegexNodeM(ntRef, p.options, p.captureSlotFromName(capname)), nil
}
return nil, p.getErr(ErrUndefinedNameRef, capname)
} else {
if k {
return nil, p.getErr(ErrMalformedNameRef)
}
}
}
@ -1276,6 +1298,10 @@ func (p *parser) scanBasicBackslash(scanOnly bool) (*regexNode, error) {
return nil, err
}
if scanOnly {
return nil, nil
}
if p.useOptionI() {
ch = unicode.ToLower(ch)
}
@ -1443,7 +1469,7 @@ func (p *parser) scanCharSet(caseInsensitive, scanOnly bool) (*CharSet, error) {
if inRange {
return nil, p.getErr(ErrBadClassInCharRange, ch)
}
cc.addDigit(p.useOptionE(), ch == 'D', p.patternRaw)
cc.addDigit(p.useOptionE() || p.useRE2(), ch == 'D', p.patternRaw)
}
continue
@ -1452,7 +1478,7 @@ func (p *parser) scanCharSet(caseInsensitive, scanOnly bool) (*CharSet, error) {
if inRange {
return nil, p.getErr(ErrBadClassInCharRange, ch)
}
cc.addSpace(p.useOptionE(), ch == 'S')
cc.addSpace(p.useOptionE(), p.useRE2(), ch == 'S')
}
continue
@ -1462,7 +1488,7 @@ func (p *parser) scanCharSet(caseInsensitive, scanOnly bool) (*CharSet, error) {
return nil, p.getErr(ErrBadClassInCharRange, ch)
}
cc.addWord(p.useOptionE(), ch == 'W')
cc.addWord(p.useOptionE() || p.useRE2(), ch == 'W')
}
continue
@ -1548,7 +1574,7 @@ func (p *parser) scanCharSet(caseInsensitive, scanOnly bool) (*CharSet, error) {
} else {
// a regular range, like a-z
if chPrev > ch {
return nil, p.getErr(ErrReversedCharRange)
return nil, p.getErr(ErrReversedCharRange, chPrev, ch)
}
cc.addRange(chPrev, ch)
}
@ -1672,7 +1698,13 @@ func (p *parser) scanCharEscape() (r rune, err error) {
r, err = p.scanHex(2)
}
case 'u':
r, err = p.scanHex(4)
// ECMAscript suppot \u{HEX} only if `u` is also set
if p.useOptionE() && p.useOptionU() && p.charsRight() > 0 && p.rightChar(0) == '{' {
p.moveRight(1)
return p.scanHexUntilBrace()
} else {
r, err = p.scanHex(4)
}
case 'a':
return '\u0007', nil
case 'b':
@ -1692,7 +1724,7 @@ func (p *parser) scanCharEscape() (r rune, err error) {
case 'c':
r, err = p.scanControl()
default:
if !p.useOptionE() && IsWordChar(ch) {
if !p.useOptionE() && !p.useRE2() && IsWordChar(ch) {
return 0, p.getErr(ErrUnrecognizedEscape, string(ch))
}
return ch, nil
@ -1949,6 +1981,11 @@ func (p *parser) useRE2() bool {
return (p.options & RE2) != 0
}
// True if U option enabling ECMAScript's Unicode behavior on.
func (p *parser) useOptionU() bool {
return (p.options & Unicode) != 0
}
// True if options stack is empty.
func (p *parser) emptyOptionsStack() bool {
return len(p.optionsStack) == 0
@ -2044,7 +2081,8 @@ func (p *parser) addToConcatenate(pos, cch int, isReplacement bool) {
}
if cch > 1 {
str := p.pattern[pos : pos+cch]
str := make([]rune, cch)
copy(str, p.pattern[pos:pos+cch])
if p.useOptionI() && !isReplacement {
// We do the ToLower character by character for consistency. With surrogate chars, doing

View File

@ -712,7 +712,7 @@ func (b *BmPrefix) Scan(text []rune, index, beglimit, endlimit int) int {
if chTest != b.pattern[match] {
advance = b.positive[match]
if (chTest & 0xFF80) == 0 {
if chTest < 128 {
test2 = (match - startmatch) + b.negativeASCII[chTest]
} else if chTest < 0xffff && len(b.negativeUnicode) > 0 {
unicodeLookup = b.negativeUnicode[chTest>>8]