chore(deps): upgrade dependencies

Upgrade all dependencies to newest versions.
This commit is contained in:
Christopher Allen Lane
2023-12-13 08:29:02 -05:00
parent 0d9c92c8c0
commit 95a4e31b6c
769 changed files with 28936 additions and 12954 deletions

View File

@@ -92,6 +92,66 @@ if isMatch, _ := re.MatchString(`Something to match`); isMatch {
This feature is a work in progress and I'm open to ideas for more things to put here (maybe more relaxed character escaping rules?).
## Catastrophic Backtracking and Timeouts
`regexp2` supports features that can lead to catastrophic backtracking.
`Regexp.MatchTimeout` can be set to to limit the impact of such behavior; the
match will fail with an error after approximately MatchTimeout. No timeout
checks are done by default.
Timeout checking is not free. The current timeout checking implementation starts
a background worker that updates a clock value approximately once every 100
milliseconds. The matching code compares this value against the precomputed
deadline for the match. The performance impact is as follows.
1. A match with a timeout runs almost as fast as a match without a timeout.
2. If any live matches have a timeout, there will be a background CPU load
(`~0.15%` currently on a modern machine). This load will remain constant
regardless of the number of matches done including matches done in parallel.
3. If no live matches are using a timeout, the background load will remain
until the longest deadline (match timeout + the time when the match started)
is reached. E.g., if you set a timeout of one minute the load will persist
for approximately a minute even if the match finishes quickly.
See [PR #58](https://github.com/dlclark/regexp2/pull/58) for more details and
alternatives considered.
## Goroutine leak error
If you're using a library during unit tests (e.g. https://github.com/uber-go/goleak) that validates all goroutines are exited then you'll likely get an error if you or any of your dependencies use regex's with a MatchTimeout.
To remedy the problem you'll need to tell the unit test to wait until the backgroup timeout goroutine is exited.
```go
func TestSomething(t *testing.T) {
defer goleak.VerifyNone(t)
defer regexp2.StopTimeoutClock()
// ... test
}
//or
func TestMain(m *testing.M) {
// setup
// ...
// run
m.Run()
//tear down
regexp2.StopTimeoutClock()
goleak.VerifyNone(t)
}
```
This will add ~100ms runtime to each test (or TestMain). If that's too much time you can set the clock cycle rate of the timeout goroutine in an init function in a test file. `regexp2.SetTimeoutCheckPeriod` isn't threadsafe so it must be setup before starting any regex's with Timeouts.
```go
func init() {
//speed up testing by making the timeout clock 1ms
regexp2.SetTimeoutCheckPeriod(time.Millisecond)
}
```
## ECMAScript compatibility mode
In this mode the engine provides compatibility with the [regex engine](https://tc39.es/ecma262/multipage/text-processing.html#sec-regexp-regular-expression-objects) described in the ECMAScript specification.

129
vendor/github.com/dlclark/regexp2/fastclock.go generated vendored Normal file
View File

@@ -0,0 +1,129 @@
package regexp2
import (
"sync"
"sync/atomic"
"time"
)
// fasttime holds a time value (ticks since clock initialization)
type fasttime int64
// fastclock provides a fast clock implementation.
//
// A background goroutine periodically stores the current time
// into an atomic variable.
//
// A deadline can be quickly checked for expiration by comparing
// its value to the clock stored in the atomic variable.
//
// The goroutine automatically stops once clockEnd is reached.
// (clockEnd covers the largest deadline seen so far + some
// extra time). This ensures that if regexp2 with timeouts
// stops being used we will stop background work.
type fastclock struct {
// instances of atomicTime must be at the start of the struct (or at least 64-bit aligned)
// otherwise 32-bit architectures will panic
current atomicTime // Current time (approximate)
clockEnd atomicTime // When clock updater is supposed to stop (>= any existing deadline)
// current and clockEnd can be read via atomic loads.
// Reads and writes of other fields require mu to be held.
mu sync.Mutex
start time.Time // Time corresponding to fasttime(0)
running bool // Is a clock updater running?
}
var fast fastclock
// reached returns true if current time is at or past t.
func (t fasttime) reached() bool {
return fast.current.read() >= t
}
// makeDeadline returns a time that is approximately time.Now().Add(d)
func makeDeadline(d time.Duration) fasttime {
// Increase the deadline since the clock we are reading may be
// just about to tick forwards.
end := fast.current.read() + durationToTicks(d+clockPeriod)
// Start or extend clock if necessary.
if end > fast.clockEnd.read() {
extendClock(end)
}
return end
}
// extendClock ensures that clock is live and will run until at least end.
func extendClock(end fasttime) {
fast.mu.Lock()
defer fast.mu.Unlock()
if fast.start.IsZero() {
fast.start = time.Now()
}
// Extend the running time to cover end as well as a bit of slop.
if shutdown := end + durationToTicks(time.Second); shutdown > fast.clockEnd.read() {
fast.clockEnd.write(shutdown)
}
// Start clock if necessary
if !fast.running {
fast.running = true
go runClock()
}
}
// stop the timeout clock in the background
// should only used for unit tests to abandon the background goroutine
func stopClock() {
fast.mu.Lock()
if fast.running {
fast.clockEnd.write(fasttime(0))
}
fast.mu.Unlock()
// pause until not running
// get and release the lock
isRunning := true
for isRunning {
time.Sleep(clockPeriod / 2)
fast.mu.Lock()
isRunning = fast.running
fast.mu.Unlock()
}
}
func durationToTicks(d time.Duration) fasttime {
// Downscale nanoseconds to approximately a millisecond so that we can avoid
// overflow even if the caller passes in math.MaxInt64.
return fasttime(d) >> 20
}
const DefaultClockPeriod = 100 * time.Millisecond
// clockPeriod is the approximate interval between updates of approximateClock.
var clockPeriod = DefaultClockPeriod
func runClock() {
fast.mu.Lock()
defer fast.mu.Unlock()
for fast.current.read() <= fast.clockEnd.read() {
// Unlock while sleeping.
fast.mu.Unlock()
time.Sleep(clockPeriod)
fast.mu.Lock()
newTime := durationToTicks(time.Since(fast.start))
fast.current.write(newTime)
}
fast.running = false
}
type atomicTime struct{ v int64 } // Should change to atomic.Int64 when we can use go 1.19
func (t *atomicTime) read() fasttime { return fasttime(atomic.LoadInt64(&t.v)) }
func (t *atomicTime) write(v fasttime) { atomic.StoreInt64(&t.v, int64(v)) }

View File

@@ -24,7 +24,11 @@ var DefaultMatchTimeout = time.Duration(math.MaxInt64)
// Regexp is the representation of a compiled regular expression.
// A Regexp is safe for concurrent use by multiple goroutines.
type Regexp struct {
//timeout when trying to find matches
// A match will time out if it takes (approximately) more than
// MatchTimeout. This is a safety check in case the match
// encounters catastrophic backtracking. The default value
// (DefaultMatchTimeout) causes all time out checking to be
// suppressed.
MatchTimeout time.Duration
// read-only after Compile
@@ -92,6 +96,19 @@ func Unescape(input string) (string, error) {
return syntax.Unescape(input)
}
// SetTimeoutPeriod is a debug function that sets the frequency of the timeout goroutine's sleep cycle.
// Defaults to 100ms. The only benefit of setting this lower is that the 1 background goroutine that manages
// timeouts may exit slightly sooner after all the timeouts have expired. See Github issue #63
func SetTimeoutCheckPeriod(d time.Duration) {
clockPeriod = d
}
// StopTimeoutClock should only be used in unit tests to prevent the timeout clock goroutine
// from appearing like a leaking goroutine
func StopTimeoutClock() {
stopClock()
}
// String returns the source text used to compile the regular expression.
func (re *Regexp) String() string {
return re.pattern

View File

@@ -58,10 +58,9 @@ type runner struct {
runmatch *Match // result object
ignoreTimeout bool
timeout time.Duration // timeout in milliseconds (needed for actual)
timeoutChecksToSkip int
timeoutAt time.Time
ignoreTimeout bool
timeout time.Duration // timeout in milliseconds (needed for actual)
deadline fasttime
operator syntax.InstOp
codepos int
@@ -1551,39 +1550,15 @@ func (r *runner) isECMABoundary(index, startpos, endpos int) bool {
(index < endpos && syntax.IsECMAWordChar(r.runtext[index]))
}
// this seems like a comment to justify randomly picking 1000 :-P
// We have determined this value in a series of experiments where x86 retail
// builds (ono-lab-optimized) were run on different pattern/input pairs. Larger values
// of TimeoutCheckFrequency did not tend to increase performance; smaller values
// of TimeoutCheckFrequency tended to slow down the execution.
const timeoutCheckFrequency int = 1000
func (r *runner) startTimeoutWatch() {
if r.ignoreTimeout {
return
}
r.timeoutChecksToSkip = timeoutCheckFrequency
r.timeoutAt = time.Now().Add(r.timeout)
r.deadline = makeDeadline(r.timeout)
}
func (r *runner) checkTimeout() error {
if r.ignoreTimeout {
return nil
}
r.timeoutChecksToSkip--
if r.timeoutChecksToSkip != 0 {
return nil
}
r.timeoutChecksToSkip = timeoutCheckFrequency
return r.doCheckTimeout()
}
func (r *runner) doCheckTimeout() error {
current := time.Now()
if current.Before(r.timeoutAt) {
if r.ignoreTimeout || !r.deadline.reached() {
return nil
}

View File

@@ -1311,6 +1311,17 @@ func (p *parser) scanBasicBackslash(scanOnly bool) (*regexNode, error) {
// Scans X for \p{X} or \P{X}
func (p *parser) parseProperty() (string, error) {
// RE2 and PCRE supports \pX syntax (no {} and only 1 letter unicode cats supported)
// since this is purely additive syntax it's not behind a flag
if p.charsRight() >= 1 && p.rightChar(0) != '{' {
ch := string(p.moveRightGetChar())
// check if it's a valid cat
if !isValidUnicodeCat(ch) {
return "", p.getErr(ErrUnknownSlashP, ch)
}
return ch, nil
}
if p.charsRight() < 3 {
return "", p.getErr(ErrIncompleteSlashP)
}
@@ -1427,7 +1438,7 @@ func (p *parser) scanCapname() string {
return string(p.pattern[startpos:p.textpos()])
}
//Scans contents of [] (not including []'s), and converts to a set.
// Scans contents of [] (not including []'s), and converts to a set.
func (p *parser) scanCharSet(caseInsensitive, scanOnly bool) (*CharSet, error) {
ch := '\x00'
chPrev := '\x00'