chore: modernize CI and update Go toolchain

- Bump Go from 1.19 to 1.26 and update all dependencies
- Rewrite CI workflow with matrix strategy (Linux, macOS, Windows)
- Update GitHub Actions to current versions (checkout@v4, setup-go@v5)
- Update CodeQL actions from v1 to v3
- Fix cross-platform bug in mock/path.go (path.Join -> filepath.Join)
- Clean up dependabot config (weekly schedule, remove stale ignore)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Christopher Allen Lane
2026-02-14 20:58:51 -05:00
parent cc85a4bdb1
commit 2a19755804
657 changed files with 49050 additions and 32001 deletions

View File

@@ -1,39 +0,0 @@
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !go1.13
package poly1305
// Generic fallbacks for the math/bits intrinsics, copied from
// src/math/bits/bits.go. They were added in Go 1.12, but Add64 and Sum64 had
// variable time fallbacks until Go 1.13.
func bitsAdd64(x, y, carry uint64) (sum, carryOut uint64) {
sum = x + y + carry
carryOut = ((x & y) | ((x | y) &^ sum)) >> 63
return
}
func bitsSub64(x, y, borrow uint64) (diff, borrowOut uint64) {
diff = x - y - borrow
borrowOut = ((^x & y) | (^(x ^ y) & diff)) >> 63
return
}
func bitsMul64(x, y uint64) (hi, lo uint64) {
const mask32 = 1<<32 - 1
x0 := x & mask32
x1 := x >> 32
y0 := y & mask32
y1 := y >> 32
w0 := x0 * y0
t := x1*y0 + w0>>32
w1 := t & mask32
w2 := t >> 32
w1 += x0 * y1
hi = x1*y1 + w2 + w1>>32
lo = x * y
return
}

View File

@@ -1,21 +0,0 @@
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build go1.13
package poly1305
import "math/bits"
func bitsAdd64(x, y, carry uint64) (sum, carryOut uint64) {
return bits.Add64(x, y, carry)
}
func bitsSub64(x, y, borrow uint64) (diff, borrowOut uint64) {
return bits.Sub64(x, y, borrow)
}
func bitsMul64(x, y uint64) (hi, lo uint64) {
return bits.Mul64(x, y)
}

View File

@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build (!amd64 && !ppc64le && !s390x) || !gc || purego
//go:build (!amd64 && !loong64 && !ppc64le && !ppc64 && !s390x) || !gc || purego
package poly1305

View File

@@ -1,108 +1,93 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Code generated by command: go run sum_amd64_asm.go -out ../sum_amd64.s -pkg poly1305. DO NOT EDIT.
//go:build gc && !purego
#include "textflag.h"
#define POLY1305_ADD(msg, h0, h1, h2) \
ADDQ 0(msg), h0; \
ADCQ 8(msg), h1; \
ADCQ $1, h2; \
LEAQ 16(msg), msg
#define POLY1305_MUL(h0, h1, h2, r0, r1, t0, t1, t2, t3) \
MOVQ r0, AX; \
MULQ h0; \
MOVQ AX, t0; \
MOVQ DX, t1; \
MOVQ r0, AX; \
MULQ h1; \
ADDQ AX, t1; \
ADCQ $0, DX; \
MOVQ r0, t2; \
IMULQ h2, t2; \
ADDQ DX, t2; \
\
MOVQ r1, AX; \
MULQ h0; \
ADDQ AX, t1; \
ADCQ $0, DX; \
MOVQ DX, h0; \
MOVQ r1, t3; \
IMULQ h2, t3; \
MOVQ r1, AX; \
MULQ h1; \
ADDQ AX, t2; \
ADCQ DX, t3; \
ADDQ h0, t2; \
ADCQ $0, t3; \
\
MOVQ t0, h0; \
MOVQ t1, h1; \
MOVQ t2, h2; \
ANDQ $3, h2; \
MOVQ t2, t0; \
ANDQ $0xFFFFFFFFFFFFFFFC, t0; \
ADDQ t0, h0; \
ADCQ t3, h1; \
ADCQ $0, h2; \
SHRQ $2, t3, t2; \
SHRQ $2, t3; \
ADDQ t2, h0; \
ADCQ t3, h1; \
ADCQ $0, h2
// func update(state *[7]uint64, msg []byte)
// func update(state *macState, msg []byte)
TEXT ·update(SB), $0-32
MOVQ state+0(FP), DI
MOVQ msg_base+8(FP), SI
MOVQ msg_len+16(FP), R15
MOVQ 0(DI), R8 // h0
MOVQ 8(DI), R9 // h1
MOVQ 16(DI), R10 // h2
MOVQ 24(DI), R11 // r0
MOVQ 32(DI), R12 // r1
CMPQ R15, $16
MOVQ (DI), R8
MOVQ 8(DI), R9
MOVQ 16(DI), R10
MOVQ 24(DI), R11
MOVQ 32(DI), R12
CMPQ R15, $0x10
JB bytes_between_0_and_15
loop:
POLY1305_ADD(SI, R8, R9, R10)
ADDQ (SI), R8
ADCQ 8(SI), R9
ADCQ $0x01, R10
LEAQ 16(SI), SI
multiply:
POLY1305_MUL(R8, R9, R10, R11, R12, BX, CX, R13, R14)
SUBQ $16, R15
CMPQ R15, $16
JAE loop
MOVQ R11, AX
MULQ R8
MOVQ AX, BX
MOVQ DX, CX
MOVQ R11, AX
MULQ R9
ADDQ AX, CX
ADCQ $0x00, DX
MOVQ R11, R13
IMULQ R10, R13
ADDQ DX, R13
MOVQ R12, AX
MULQ R8
ADDQ AX, CX
ADCQ $0x00, DX
MOVQ DX, R8
MOVQ R12, R14
IMULQ R10, R14
MOVQ R12, AX
MULQ R9
ADDQ AX, R13
ADCQ DX, R14
ADDQ R8, R13
ADCQ $0x00, R14
MOVQ BX, R8
MOVQ CX, R9
MOVQ R13, R10
ANDQ $0x03, R10
MOVQ R13, BX
ANDQ $-4, BX
ADDQ BX, R8
ADCQ R14, R9
ADCQ $0x00, R10
SHRQ $0x02, R14, R13
SHRQ $0x02, R14
ADDQ R13, R8
ADCQ R14, R9
ADCQ $0x00, R10
SUBQ $0x10, R15
CMPQ R15, $0x10
JAE loop
bytes_between_0_and_15:
TESTQ R15, R15
JZ done
MOVQ $1, BX
MOVQ $0x00000001, BX
XORQ CX, CX
XORQ R13, R13
ADDQ R15, SI
flush_buffer:
SHLQ $8, BX, CX
SHLQ $8, BX
SHLQ $0x08, BX, CX
SHLQ $0x08, BX
MOVB -1(SI), R13
XORQ R13, BX
DECQ SI
DECQ R15
JNZ flush_buffer
ADDQ BX, R8
ADCQ CX, R9
ADCQ $0, R10
MOVQ $16, R15
ADCQ $0x00, R10
MOVQ $0x00000010, R15
JMP multiply
done:
MOVQ R8, 0(DI)
MOVQ R8, (DI)
MOVQ R9, 8(DI)
MOVQ R10, 16(DI)
RET

View File

@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build gc && !purego
//go:build gc && !purego && (amd64 || loong64 || ppc64 || ppc64le)
package poly1305

View File

@@ -7,7 +7,10 @@
package poly1305
import "encoding/binary"
import (
"encoding/binary"
"math/bits"
)
// Poly1305 [RFC 7539] is a relatively simple algorithm: the authentication tag
// for a 64 bytes message is approximately
@@ -114,13 +117,13 @@ type uint128 struct {
}
func mul64(a, b uint64) uint128 {
hi, lo := bitsMul64(a, b)
hi, lo := bits.Mul64(a, b)
return uint128{lo, hi}
}
func add128(a, b uint128) uint128 {
lo, c := bitsAdd64(a.lo, b.lo, 0)
hi, c := bitsAdd64(a.hi, b.hi, c)
lo, c := bits.Add64(a.lo, b.lo, 0)
hi, c := bits.Add64(a.hi, b.hi, c)
if c != 0 {
panic("poly1305: unexpected overflow")
}
@@ -155,8 +158,8 @@ func updateGeneric(state *macState, msg []byte) {
// hide leading zeroes. For full chunks, that's 1 << 128, so we can just
// add 1 to the most significant (2¹²⁸) limb, h2.
if len(msg) >= TagSize {
h0, c = bitsAdd64(h0, binary.LittleEndian.Uint64(msg[0:8]), 0)
h1, c = bitsAdd64(h1, binary.LittleEndian.Uint64(msg[8:16]), c)
h0, c = bits.Add64(h0, binary.LittleEndian.Uint64(msg[0:8]), 0)
h1, c = bits.Add64(h1, binary.LittleEndian.Uint64(msg[8:16]), c)
h2 += c + 1
msg = msg[TagSize:]
@@ -165,8 +168,8 @@ func updateGeneric(state *macState, msg []byte) {
copy(buf[:], msg)
buf[len(msg)] = 1
h0, c = bitsAdd64(h0, binary.LittleEndian.Uint64(buf[0:8]), 0)
h1, c = bitsAdd64(h1, binary.LittleEndian.Uint64(buf[8:16]), c)
h0, c = bits.Add64(h0, binary.LittleEndian.Uint64(buf[0:8]), 0)
h1, c = bits.Add64(h1, binary.LittleEndian.Uint64(buf[8:16]), c)
h2 += c
msg = nil
@@ -219,9 +222,9 @@ func updateGeneric(state *macState, msg []byte) {
m3 := h2r1
t0 := m0.lo
t1, c := bitsAdd64(m1.lo, m0.hi, 0)
t2, c := bitsAdd64(m2.lo, m1.hi, c)
t3, _ := bitsAdd64(m3.lo, m2.hi, c)
t1, c := bits.Add64(m1.lo, m0.hi, 0)
t2, c := bits.Add64(m2.lo, m1.hi, c)
t3, _ := bits.Add64(m3.lo, m2.hi, c)
// Now we have the result as 4 64-bit limbs, and we need to reduce it
// modulo 2¹³⁰ - 5. The special shape of this Crandall prime lets us do
@@ -243,14 +246,14 @@ func updateGeneric(state *macState, msg []byte) {
// To add c * 5 to h, we first add cc = c * 4, and then add (cc >> 2) = c.
h0, c = bitsAdd64(h0, cc.lo, 0)
h1, c = bitsAdd64(h1, cc.hi, c)
h0, c = bits.Add64(h0, cc.lo, 0)
h1, c = bits.Add64(h1, cc.hi, c)
h2 += c
cc = shiftRightBy2(cc)
h0, c = bitsAdd64(h0, cc.lo, 0)
h1, c = bitsAdd64(h1, cc.hi, c)
h0, c = bits.Add64(h0, cc.lo, 0)
h1, c = bits.Add64(h1, cc.hi, c)
h2 += c
// h2 is at most 3 + 1 + 1 = 5, making the whole of h at most
@@ -287,9 +290,9 @@ func finalize(out *[TagSize]byte, h *[3]uint64, s *[2]uint64) {
// in constant time, we compute t = h - (2¹³⁰ - 5), and select h as the
// result if the subtraction underflows, and t otherwise.
hMinusP0, b := bitsSub64(h0, p0, 0)
hMinusP1, b := bitsSub64(h1, p1, b)
_, b = bitsSub64(h2, p2, b)
hMinusP0, b := bits.Sub64(h0, p0, 0)
hMinusP1, b := bits.Sub64(h1, p1, b)
_, b = bits.Sub64(h2, p2, b)
// h = h if h < p else h - p
h0 = select64(b, h0, hMinusP0)
@@ -301,8 +304,8 @@ func finalize(out *[TagSize]byte, h *[3]uint64, s *[2]uint64) {
//
// by just doing a wide addition with the 128 low bits of h and discarding
// the overflow.
h0, c := bitsAdd64(h0, s[0], 0)
h1, _ = bitsAdd64(h1, s[1], c)
h0, c := bits.Add64(h0, s[0], 0)
h1, _ = bits.Add64(h1, s[1], c)
binary.LittleEndian.PutUint64(out[0:8], h0)
binary.LittleEndian.PutUint64(out[8:16], h1)

View File

@@ -0,0 +1,123 @@
// Copyright 2025 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build gc && !purego
// func update(state *macState, msg []byte)
TEXT ·update(SB), $0-32
MOVV state+0(FP), R4
MOVV msg_base+8(FP), R5
MOVV msg_len+16(FP), R6
MOVV $0x10, R7
MOVV (R4), R8 // h0
MOVV 8(R4), R9 // h1
MOVV 16(R4), R10 // h2
MOVV 24(R4), R11 // r0
MOVV 32(R4), R12 // r1
BLT R6, R7, bytes_between_0_and_15
loop:
MOVV (R5), R14 // msg[0:8]
MOVV 8(R5), R16 // msg[8:16]
ADDV R14, R8, R8 // h0 (x1 + y1 = z1', if z1' < x1 then z1' overflow)
ADDV R16, R9, R27
SGTU R14, R8, R24 // h0.carry
SGTU R9, R27, R28
ADDV R27, R24, R9 // h1
SGTU R27, R9, R24
OR R24, R28, R24 // h1.carry
ADDV $0x01, R24, R24
ADDV R10, R24, R10 // h2
ADDV $16, R5, R5 // msg = msg[16:]
multiply:
MULV R8, R11, R14 // h0r0.lo
MULHVU R8, R11, R15 // h0r0.hi
MULV R9, R11, R13 // h1r0.lo
MULHVU R9, R11, R16 // h1r0.hi
ADDV R13, R15, R15
SGTU R13, R15, R24
ADDV R24, R16, R16
MULV R10, R11, R25
ADDV R16, R25, R25
MULV R8, R12, R13 // h0r1.lo
MULHVU R8, R12, R16 // h0r1.hi
ADDV R13, R15, R15
SGTU R13, R15, R24
ADDV R24, R16, R16
MOVV R16, R8
MULV R10, R12, R26 // h2r1
MULV R9, R12, R13 // h1r1.lo
MULHVU R9, R12, R16 // h1r1.hi
ADDV R13, R25, R25
ADDV R16, R26, R27
SGTU R13, R25, R24
ADDV R27, R24, R26
ADDV R8, R25, R25
SGTU R8, R25, R24
ADDV R24, R26, R26
AND $3, R25, R10
AND $-4, R25, R17
ADDV R17, R14, R8
ADDV R26, R15, R27
SGTU R17, R8, R24
SGTU R26, R27, R28
ADDV R27, R24, R9
SGTU R27, R9, R24
OR R24, R28, R24
ADDV R24, R10, R10
SLLV $62, R26, R27
SRLV $2, R25, R28
SRLV $2, R26, R26
OR R27, R28, R25
ADDV R25, R8, R8
ADDV R26, R9, R27
SGTU R25, R8, R24
SGTU R26, R27, R28
ADDV R27, R24, R9
SGTU R27, R9, R24
OR R24, R28, R24
ADDV R24, R10, R10
SUBV $16, R6, R6
BGE R6, R7, loop
bytes_between_0_and_15:
BEQ R6, R0, done
MOVV $1, R14
XOR R15, R15
ADDV R6, R5, R5
flush_buffer:
MOVBU -1(R5), R25
SRLV $56, R14, R24
SLLV $8, R15, R28
SLLV $8, R14, R14
OR R24, R28, R15
XOR R25, R14, R14
SUBV $1, R6, R6
SUBV $1, R5, R5
BNE R6, R0, flush_buffer
ADDV R14, R8, R8
SGTU R14, R8, R24
ADDV R15, R9, R27
SGTU R15, R27, R28
ADDV R27, R24, R9
SGTU R27, R9, R24
OR R24, R28, R24
ADDV R10, R24, R10
MOVV $16, R6
JMP multiply
done:
MOVV R8, (R4)
MOVV R9, 8(R4)
MOVV R10, 16(R4)
RET

View File

@@ -1,47 +0,0 @@
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build gc && !purego
package poly1305
//go:noescape
func update(state *macState, msg []byte)
// mac is a wrapper for macGeneric that redirects calls that would have gone to
// updateGeneric to update.
//
// Its Write and Sum methods are otherwise identical to the macGeneric ones, but
// using function pointers would carry a major performance cost.
type mac struct{ macGeneric }
func (h *mac) Write(p []byte) (int, error) {
nn := len(p)
if h.offset > 0 {
n := copy(h.buffer[h.offset:], p)
if h.offset+n < TagSize {
h.offset += n
return nn, nil
}
p = p[n:]
h.offset = 0
update(&h.macState, h.buffer[:])
}
if n := len(p) - (len(p) % TagSize); n > 0 {
update(&h.macState, p[:n])
p = p[n:]
}
if len(p) > 0 {
h.offset += copy(h.buffer[h.offset:], p)
}
return nn, nil
}
func (h *mac) Sum(out *[16]byte) {
state := h.macState
if h.offset > 0 {
update(&state, h.buffer[:h.offset])
}
finalize(out, &state.h, &state.s)
}

View File

@@ -2,15 +2,25 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build gc && !purego
//go:build gc && !purego && (ppc64 || ppc64le)
#include "textflag.h"
// This was ported from the amd64 implementation.
#ifdef GOARCH_ppc64le
#define LE_MOVD MOVD
#define LE_MOVWZ MOVWZ
#define LE_MOVHZ MOVHZ
#else
#define LE_MOVD MOVDBR
#define LE_MOVWZ MOVWBR
#define LE_MOVHZ MOVHBR
#endif
#define POLY1305_ADD(msg, h0, h1, h2, t0, t1, t2) \
MOVD (msg), t0; \
MOVD 8(msg), t1; \
LE_MOVD (msg)( R0), t0; \
LE_MOVD (msg)(R24), t1; \
MOVD $1, t2; \
ADDC t0, h0, h0; \
ADDE t1, h1, h1; \
@@ -19,15 +29,14 @@
#define POLY1305_MUL(h0, h1, h2, r0, r1, t0, t1, t2, t3, t4, t5) \
MULLD r0, h0, t0; \
MULLD r0, h1, t4; \
MULHDU r0, h0, t1; \
MULLD r0, h1, t4; \
MULHDU r0, h1, t5; \
ADDC t4, t1, t1; \
MULLD r0, h2, t2; \
ADDZE t5; \
MULHDU r1, h0, t4; \
MULLD r1, h0, h0; \
ADD t5, t2, t2; \
ADDE t5, t2, t2; \
ADDC h0, t1, t1; \
MULLD h2, r1, t3; \
ADDZE t4, h0; \
@@ -37,13 +46,11 @@
ADDE t5, t3, t3; \
ADDC h0, t2, t2; \
MOVD $-4, t4; \
MOVD t0, h0; \
MOVD t1, h1; \
ADDZE t3; \
ANDCC $3, t2, h2; \
AND t2, t4, t0; \
RLDICL $0, t2, $62, h2; \
AND t2, t4, h0; \
ADDC t0, h0, h0; \
ADDE t3, h1, h1; \
ADDE t3, t1, h1; \
SLD $62, t3, t4; \
SRD $2, t2; \
ADDZE h2; \
@@ -53,10 +60,6 @@
ADDE t3, h1, h1; \
ADDZE h2
DATA ·poly1305Mask<>+0x00(SB)/8, $0x0FFFFFFC0FFFFFFF
DATA ·poly1305Mask<>+0x08(SB)/8, $0x0FFFFFFC0FFFFFFC
GLOBL ·poly1305Mask<>(SB), RODATA, $16
// func update(state *[7]uint64, msg []byte)
TEXT ·update(SB), $0-32
MOVD state+0(FP), R3
@@ -69,12 +72,15 @@ TEXT ·update(SB), $0-32
MOVD 24(R3), R11 // r0
MOVD 32(R3), R12 // r1
MOVD $8, R24
CMP R5, $16
BLT bytes_between_0_and_15
loop:
POLY1305_ADD(R4, R8, R9, R10, R20, R21, R22)
PCALIGN $16
multiply:
POLY1305_MUL(R8, R9, R10, R11, R12, R16, R17, R18, R14, R20, R21)
ADD $-16, R5
@@ -96,7 +102,7 @@ flush_buffer:
// Greater than 8 -- load the rightmost remaining bytes in msg
// and put into R17 (h1)
MOVD (R4)(R21), R17
LE_MOVD (R4)(R21), R17
MOVD $16, R22
// Find the offset to those bytes
@@ -120,7 +126,7 @@ just1:
BLT less8
// Exactly 8
MOVD (R4), R16
LE_MOVD (R4), R16
CMP R17, $0
@@ -135,7 +141,7 @@ less8:
MOVD $0, R22 // shift count
CMP R5, $4
BLT less4
MOVWZ (R4), R16
LE_MOVWZ (R4), R16
ADD $4, R4
ADD $-4, R5
MOVD $32, R22
@@ -143,7 +149,7 @@ less8:
less4:
CMP R5, $2
BLT less2
MOVHZ (R4), R21
LE_MOVHZ (R4), R21
SLD R22, R21, R21
OR R16, R21, R16
ADD $16, R22