chore(deps): update dependencies

2026-01-01 20:02:07 +01:00 · 2022-08-02 20:30:31 -04:00
parent 0737af2fec
commit 7406ebfb5e
25 changed files with 5681 additions and 42 deletions
--- a/vendor/github.com/dlclark/regexp2/.travis.yml
+++ b/vendor/github.com/dlclark/regexp2/.travis.yml
@@ -1,5 +1,7 @@
 language: go
-
+arch:
+  - AMD64
+  - ppc64le
 go:
  - 1.9
-  - tip
+  - tip
--- a/vendor/github.com/dlclark/regexp2/README.md
+++ b/vendor/github.com/dlclark/regexp2/README.md
@@ -39,6 +39,24 @@ Group 0 is embedded in the Match.  Group 0 is an automatically-assigned group th

 The __last__ capture is embedded in each group, so `g.String()` will return the same thing as `g.Capture.String()` and  `g.Captures[len(g.Captures)-1].String()`.

+If you want to find multiple matches from a single input string you should use the `FindNextMatch` method.  For example, to implement a function similar to `regexp.FindAllString`:
+
+```go
+func regexp2FindAllString(re *regexp2.Regexp, s string) []string {
+	var matches []string
+	m, _ := re.FindStringMatch(s)
+	for m != nil {
+		matches = append(matches, m.String())
+		m, _ = re.FindNextMatch(m)
+	}
+	return matches
+}
+```
+
+`FindNextMatch` is optmized so that it re-uses the underlying string/rune slice.
+
+The internals of `regexp2` always operate on `[]rune` so `Index` and `Length` data in a `Match` always reference a position in `rune`s rather than `byte`s (even if the input was given as a string). This is a dramatic difference between `regexp` and `regexp2`.  It's advisable to use the provided `String()` methods to avoid having to work with indices.
+
 ## Compare `regexp` and `regexp2`
 | Category | regexp | regexp2 |
 | --- | --- | --- |
@@ -62,6 +80,8 @@ The default behavior of `regexp2` is to match the .NET regexp engine, however th
 * add support for named ascii character classes (e.g. `[[:foo:]]`)
 * add support for python-style capture groups (e.g. `(P<name>re)`)
 * change singleline behavior for `$` to only match end of string (like RE2) (see [#24](https://github.com/dlclark/regexp2/issues/24))
+* change the character classes `\d` `\s` and `\w` to match the same characters as RE2. NOTE: if you also use the `ECMAScript` option then this will change the `\s` character class to match ECMAScript instead of RE2.  ECMAScript allows more whitespace characters in `\s` than RE2 (but still fewer than the the default behavior).
+* allow character escape sequences to have defaults. For example, by default `\_` isn't a known character escape and will fail to compile, but in RE2 mode it will match the literal character `_`
 
 ```go
 re := regexp2.MustCompile(`Your RE2-compatible pattern`, regexp2.RE2)
@@ -72,6 +92,10 @@ if isMatch, _ := re.MatchString(`Something to match`); isMatch {

 This feature is a work in progress and I'm open to ideas for more things to put here (maybe more relaxed character escaping rules?).

+## ECMAScript compatibility mode
+In this mode the engine provides compatibility with the [regex engine](https://tc39.es/ecma262/multipage/text-processing.html#sec-regexp-regular-expression-objects) described in the ECMAScript specification.
+
+Additionally a Unicode mode is provided which allows parsing of `\u{CodePoint}` syntax that is only when both are provided.

 ## Library features that I'm still working on
 - Regex split
--- a/vendor/github.com/dlclark/regexp2/go.mod
+++ b/vendor/github.com/dlclark/regexp2/go.mod
@@ -0,0 +1,3 @@
+module github.com/dlclark/regexp2
+
+go 1.13
--- a/vendor/github.com/dlclark/regexp2/regexp.go
+++ b/vendor/github.com/dlclark/regexp2/regexp.go
@@ -121,6 +121,7 @@ const (
 	Debug                                = 0x0080 // "d"
 	ECMAScript                           = 0x0100 // "e"
 	RE2                                  = 0x0200 // RE2 (regexp package) compatibility mode
+	Unicode                              = 0x0400 // "u"
 )

 func (re *Regexp) RightToLeft() bool {
--- a/vendor/github.com/dlclark/regexp2/syntax/charclass.go
+++ b/vendor/github.com/dlclark/regexp2/syntax/charclass.go
@@ -37,6 +37,8 @@ var (
 	ecmaSpace = []rune{0x0009, 0x000e, 0x0020, 0x0021, 0x00a0, 0x00a1, 0x1680, 0x1681, 0x2000, 0x200b, 0x2028, 0x202a, 0x202f, 0x2030, 0x205f, 0x2060, 0x3000, 0x3001, 0xfeff, 0xff00}
 	ecmaWord  = []rune{0x0030, 0x003a, 0x0041, 0x005b, 0x005f, 0x0060, 0x0061, 0x007b}
 	ecmaDigit = []rune{0x0030, 0x003a}
+
+	re2Space = []rune{0x0009, 0x000b, 0x000c, 0x000e, 0x0020, 0x0021}
 )

 var (
@@ -56,6 +58,9 @@ var (
 	NotSpaceClass = getCharSetFromCategoryString(true, false, spaceCategoryText)
 	DigitClass    = getCharSetFromCategoryString(false, false, "Nd")
 	NotDigitClass = getCharSetFromCategoryString(false, true, "Nd")
+
+	RE2SpaceClass    = getCharSetFromOldString(re2Space, false)
+	NotRE2SpaceClass = getCharSetFromOldString(re2Space, true)
 )

 var unicodeCategories = func() map[string]*unicode.RangeTable {
@@ -401,13 +406,19 @@ func (c *CharSet) addChar(ch rune) {
 	c.addRange(ch, ch)
 }

-func (c *CharSet) addSpace(ecma, negate bool) {
+func (c *CharSet) addSpace(ecma, re2, negate bool) {
 	if ecma {
 		if negate {
 			c.addRanges(NotECMASpaceClass().ranges)
 		} else {
 			c.addRanges(ECMASpaceClass().ranges)
 		}
+	} else if re2 {
+		if negate {
+			c.addRanges(NotRE2SpaceClass().ranges)
+		} else {
+			c.addRanges(RE2SpaceClass().ranges)
+		}
 	} else {
 		c.addCategories(category{cat: spaceCategoryText, negate: negate})
 	}
@@ -563,7 +574,7 @@ func (c *CharSet) addNamedASCII(name string, negate bool) bool {
 	case "punct": //[!-/:-@[-`{-~]
 		rs = []singleRange{singleRange{'!', '/'}, singleRange{':', '@'}, singleRange{'[', '`'}, singleRange{'{', '~'}}
 	case "space":
-		c.addSpace(true, negate)
+		c.addSpace(true, false, negate)
 	case "upper":
 		rs = []singleRange{singleRange{'A', 'Z'}}
 	case "word":
--- a/vendor/github.com/dlclark/regexp2/syntax/parser.go
+++ b/vendor/github.com/dlclark/regexp2/syntax/parser.go
@@ -22,6 +22,7 @@ const (
 	Debug                                = 0x0080 // "d"
 	ECMAScript                           = 0x0100 // "e"
 	RE2                                  = 0x0200 // RE2 compat mode
+	Unicode                              = 0x0400 // "u"
 )

 func optionFromCode(ch rune) RegexOptions {
@@ -43,6 +44,8 @@ func optionFromCode(ch rune) RegexOptions {
 		return Debug
 	case 'e', 'E':
 		return ECMAScript
+	case 'u', 'U':
+		return Unicode
 	default:
 		return 0
 	}
@@ -104,7 +107,7 @@ const (
 	ErrBadClassInCharRange        = "cannot include class \\%v in character range"
 	ErrUnterminatedBracket        = "unterminated [] set"
 	ErrSubtractionMustBeLast      = "a subtraction must be the last element in a character class"
-	ErrReversedCharRange          = "[x-y] range in reverse order"
+	ErrReversedCharRange          = "[%c-%c] range in reverse order"
 )

 func (e ErrorCode) String() string {
@@ -1121,14 +1124,14 @@ func (p *parser) scanBackslash(scanOnly bool) (*regexNode, error) {

 	case 'w':
 		p.moveRight(1)
-		if p.useOptionE() {
+		if p.useOptionE() || p.useRE2() {
 			return newRegexNodeSet(ntSet, p.options, ECMAWordClass()), nil
 		}
 		return newRegexNodeSet(ntSet, p.options, WordClass()), nil

 	case 'W':
 		p.moveRight(1)
-		if p.useOptionE() {
+		if p.useOptionE() || p.useRE2() {
 			return newRegexNodeSet(ntSet, p.options, NotECMAWordClass()), nil
 		}
 		return newRegexNodeSet(ntSet, p.options, NotWordClass()), nil
@@ -1137,6 +1140,8 @@ func (p *parser) scanBackslash(scanOnly bool) (*regexNode, error) {
 		p.moveRight(1)
 		if p.useOptionE() {
 			return newRegexNodeSet(ntSet, p.options, ECMASpaceClass()), nil
+		} else if p.useRE2() {
+			return newRegexNodeSet(ntSet, p.options, RE2SpaceClass()), nil
 		}
 		return newRegexNodeSet(ntSet, p.options, SpaceClass()), nil

@@ -1144,19 +1149,21 @@ func (p *parser) scanBackslash(scanOnly bool) (*regexNode, error) {
 		p.moveRight(1)
 		if p.useOptionE() {
 			return newRegexNodeSet(ntSet, p.options, NotECMASpaceClass()), nil
+		} else if p.useRE2() {
+			return newRegexNodeSet(ntSet, p.options, NotRE2SpaceClass()), nil
 		}
 		return newRegexNodeSet(ntSet, p.options, NotSpaceClass()), nil

 	case 'd':
 		p.moveRight(1)
-		if p.useOptionE() {
+		if p.useOptionE() || p.useRE2() {
 			return newRegexNodeSet(ntSet, p.options, ECMADigitClass()), nil
 		}
 		return newRegexNodeSet(ntSet, p.options, DigitClass()), nil

 	case 'D':
 		p.moveRight(1)
-		if p.useOptionE() {
+		if p.useOptionE() || p.useRE2() {
 			return newRegexNodeSet(ntSet, p.options, NotECMADigitClass()), nil
 		}
 		return newRegexNodeSet(ntSet, p.options, NotDigitClass()), nil
@@ -1186,19 +1193,24 @@ func (p *parser) scanBasicBackslash(scanOnly bool) (*regexNode, error) {
 		return nil, p.getErr(ErrIllegalEndEscape)
 	}
 	angled := false
+	k := false
 	close := '\x00'

 	backpos := p.textpos()
 	ch := p.rightChar(0)

-	// allow \k<foo> instead of \<foo>, which is now deprecated
+	// Allow \k<foo> instead of \<foo>, which is now deprecated.

-	if ch == 'k' {
+	// According to ECMAScript specification, \k<name> is only parsed as a named group reference if
+	// there is at least one group name in the regexp.
+	// See https://www.ecma-international.org/ecma-262/#sec-isvalidregularexpressionliteral, step 7.
+	// Note, during the first (scanOnly) run we may not have all group names scanned, but that's ok.
+	if ch == 'k' && (!p.useOptionE() || len(p.capnames) > 0) {
 		if p.charsRight() >= 2 {
 			p.moveRight(1)
 			ch = p.moveRightGetChar()

-			if ch == '<' || ch == '\'' {
+			if ch == '<' || (!p.useOptionE() && ch == '\'') { // No support for \k'name' in ECMAScript
 				angled = true
 				if ch == '\'' {
 					close = '\''
@@ -1213,8 +1225,9 @@ func (p *parser) scanBasicBackslash(scanOnly bool) (*regexNode, error) {
 		}

 		ch = p.rightChar(0)
+		k = true

-	} else if (ch == '<' || ch == '\'') && p.charsRight() > 1 { // Note angle without \g
+	} else if !p.useOptionE() && (ch == '<' || ch == '\'') && p.charsRight() > 1 { // Note angle without \g
 		angled = true
 		if ch == '\'' {
 			close = '\''
@@ -1257,14 +1270,23 @@ func (p *parser) scanBasicBackslash(scanOnly bool) (*regexNode, error) {
 			return nil, p.getErr(ErrUndefinedBackRef, capnum)
 		}

-	} else if angled && IsWordChar(ch) {
+	} else if angled {
 		capname := p.scanCapname()

-		if p.charsRight() > 0 && p.moveRightGetChar() == close {
+		if capname != "" && p.charsRight() > 0 && p.moveRightGetChar() == close {
+
+			if scanOnly {
+				return nil, nil
+			}
+
 			if p.isCaptureName(capname) {
 				return newRegexNodeM(ntRef, p.options, p.captureSlotFromName(capname)), nil
 			}
 			return nil, p.getErr(ErrUndefinedNameRef, capname)
+		} else {
+			if k {
+				return nil, p.getErr(ErrMalformedNameRef)
+			}
 		}
 	}

@@ -1276,6 +1298,10 @@ func (p *parser) scanBasicBackslash(scanOnly bool) (*regexNode, error) {
 		return nil, err
 	}

+	if scanOnly {
+		return nil, nil
+	}
+
 	if p.useOptionI() {
 		ch = unicode.ToLower(ch)
 	}
@@ -1443,7 +1469,7 @@ func (p *parser) scanCharSet(caseInsensitive, scanOnly bool) (*CharSet, error) {
 					if inRange {
 						return nil, p.getErr(ErrBadClassInCharRange, ch)
 					}
-					cc.addDigit(p.useOptionE(), ch == 'D', p.patternRaw)
+					cc.addDigit(p.useOptionE() || p.useRE2(), ch == 'D', p.patternRaw)
 				}
 				continue

@@ -1452,7 +1478,7 @@ func (p *parser) scanCharSet(caseInsensitive, scanOnly bool) (*CharSet, error) {
 					if inRange {
 						return nil, p.getErr(ErrBadClassInCharRange, ch)
 					}
-					cc.addSpace(p.useOptionE(), ch == 'S')
+					cc.addSpace(p.useOptionE(), p.useRE2(), ch == 'S')
 				}
 				continue

@@ -1462,7 +1488,7 @@ func (p *parser) scanCharSet(caseInsensitive, scanOnly bool) (*CharSet, error) {
 						return nil, p.getErr(ErrBadClassInCharRange, ch)
 					}

-					cc.addWord(p.useOptionE(), ch == 'W')
+					cc.addWord(p.useOptionE() || p.useRE2(), ch == 'W')
 				}
 				continue

@@ -1548,7 +1574,7 @@ func (p *parser) scanCharSet(caseInsensitive, scanOnly bool) (*CharSet, error) {
 				} else {
 					// a regular range, like a-z
 					if chPrev > ch {
-						return nil, p.getErr(ErrReversedCharRange)
+						return nil, p.getErr(ErrReversedCharRange, chPrev, ch)
 					}
 					cc.addRange(chPrev, ch)
 				}
@@ -1672,7 +1698,13 @@ func (p *parser) scanCharEscape() (r rune, err error) {
 			r, err = p.scanHex(2)
 		}
 	case 'u':
-		r, err = p.scanHex(4)
+		// ECMAscript suppot \u{HEX} only if `u` is also set
+		if p.useOptionE() && p.useOptionU() && p.charsRight() > 0 && p.rightChar(0) == '{' {
+			p.moveRight(1)
+			return p.scanHexUntilBrace()
+		} else {
+			r, err = p.scanHex(4)
+		}
 	case 'a':
 		return '\u0007', nil
 	case 'b':
@@ -1692,7 +1724,7 @@ func (p *parser) scanCharEscape() (r rune, err error) {
 	case 'c':
 		r, err = p.scanControl()
 	default:
-		if !p.useOptionE() && IsWordChar(ch) {
+		if !p.useOptionE() && !p.useRE2() && IsWordChar(ch) {
 			return 0, p.getErr(ErrUnrecognizedEscape, string(ch))
 		}
 		return ch, nil
@@ -1949,6 +1981,11 @@ func (p *parser) useRE2() bool {
 	return (p.options & RE2) != 0
 }

+// True if U option enabling ECMAScript's Unicode behavior on.
+func (p *parser) useOptionU() bool {
+	return (p.options & Unicode) != 0
+}
+
 // True if options stack is empty.
 func (p *parser) emptyOptionsStack() bool {
 	return len(p.optionsStack) == 0
@@ -2044,7 +2081,8 @@ func (p *parser) addToConcatenate(pos, cch int, isReplacement bool) {
 	}

 	if cch > 1 {
-		str := p.pattern[pos : pos+cch]
+		str := make([]rune, cch)
+		copy(str, p.pattern[pos:pos+cch])

 		if p.useOptionI() && !isReplacement {
 			// We do the ToLower character by character for consistency.  With surrogate chars, doing
--- a/vendor/github.com/dlclark/regexp2/syntax/prefix.go
+++ b/vendor/github.com/dlclark/regexp2/syntax/prefix.go
@@ -712,7 +712,7 @@ func (b *BmPrefix) Scan(text []rune, index, beglimit, endlimit int) int {

 				if chTest != b.pattern[match] {
 					advance = b.positive[match]
-					if (chTest & 0xFF80) == 0 {
+					if chTest < 128 {
 						test2 = (match - startmatch) + b.negativeASCII[chTest]
 					} else if chTest < 0xffff && len(b.negativeUnicode) > 0 {
 						unicodeLookup = b.negativeUnicode[chTest>>8]