Files
cheat/vendor/github.com/pjbgf/sha1cd/sha1cdblock_amd64.s
Christopher Allen Lane 2a19755804 chore: modernize CI and update Go toolchain
- Bump Go from 1.19 to 1.26 and update all dependencies
- Rewrite CI workflow with matrix strategy (Linux, macOS, Windows)
- Update GitHub Actions to current versions (checkout@v4, setup-go@v5)
- Update CodeQL actions from v1 to v3
- Fix cross-platform bug in mock/path.go (path.Join -> filepath.Join)
- Clean up dependabot config (weekly schedule, remove stale ignore)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-14 20:58:51 -05:00

275 lines
5.4 KiB
ArmAsm

//go:build !noasm && gc && amd64 && !arm64
#include "textflag.h"
// License information for the original SHA1 arm64 implemention:
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found at:
// - https://github.com/golang/go/blob/master/LICENSE
//
// Reference implementations:
// - https://github.com/golang/go/blob/master/src/crypto/sha1/sha1block_amd64.s
#define LOADCS(abcd, e, index, target) \
VPEXTRD $3, abcd, ((index*20)+0)(target); \
VPEXTRD $2, abcd, ((index*20)+4)(target); \
VPEXTRD $1, abcd, ((index*20)+8)(target); \
VPEXTRD $0, abcd, ((index*20)+12)(target); \
MOVL e, ((index*20)+16)(target);
#define LOADM1(m1, index, target) \
VPSHUFD $0x1B, m1, X8; \
VMOVDQU X8, ((index*16)+0)(target);
// func blockAMD64(h []uint32, p []byte, m1 []uint32, cs [][5]uint32)
// Requires: AVX, SHA, SSE2, SSE4.1, SSSE3
TEXT ·blockAMD64(SB), NOSPLIT, $80-96
MOVQ h_base+0(FP), DI
MOVQ p_base+24(FP), SI
MOVQ p_len+32(FP), DX
MOVQ m1_base+48(FP), R13
MOVQ cs_base+72(FP), R15
CMPQ DX, $0x00
JEQ done
ADDQ SI, DX
// Allocate space on the stack for saving ABCD and E0, and align it to 16 bytes
LEAQ 15(SP), AX
MOVQ $0x000000000000000f, CX
NOTQ CX
ANDQ CX, AX
// Load initial hash state
PINSRD $0x03, 16(DI), X5
VMOVDQU (DI), X0
PAND upper_mask<>+0(SB), X5
PSHUFD $0x1b, X0, X0
VMOVDQA shuffle_mask<>+0(SB), X7
loop:
// Save ABCD and E working values
VMOVDQA X5, (AX)
VMOVDQA X0, 16(AX)
// LOAD CS 0
VPEXTRD $3, X5, R12
LOADCS(X0, R12, 0, R15)
// Rounds 0-3
VMOVDQU (SI), X1
PSHUFB X7, X1
PADDD X1, X5
VMOVDQA X0, X6
SHA1RNDS4 $0x00, X5, X0
LOADM1(X1, 0, R13)
// Rounds 4-7
VMOVDQU 16(SI), X2
PSHUFB X7, X2
SHA1NEXTE X2, X6
VMOVDQA X0, X5
SHA1RNDS4 $0x00, X6, X0
SHA1MSG1 X2, X1
LOADM1(X2, 1, R13)
// Rounds 8-11
VMOVDQU 32(SI), X3
PSHUFB X7, X3
SHA1NEXTE X3, X5
VMOVDQA X0, X6
SHA1RNDS4 $0x00, X5, X0
SHA1MSG1 X3, X2
PXOR X3, X1
LOADM1(X3, 2, R13)
// Rounds 12-15
VMOVDQU 48(SI), X4
PSHUFB X7, X4
SHA1NEXTE X4, X6
VMOVDQA X0, X5
SHA1MSG2 X4, X1
SHA1RNDS4 $0x00, X6, X0
SHA1MSG1 X4, X3
PXOR X4, X2
LOADM1(X4, 3, R13)
// Rounds 16-19
SHA1NEXTE X1, X5
VMOVDQA X0, X6
SHA1MSG2 X1, X2
SHA1RNDS4 $0x00, X5, X0
SHA1MSG1 X1, X4
PXOR X1, X3
LOADM1(X1, 4, R13)
// Rounds 20-23
SHA1NEXTE X2, X6
VMOVDQA X0, X5
SHA1MSG2 X2, X3
SHA1RNDS4 $0x01, X6, X0
SHA1MSG1 X2, X1
PXOR X2, X4
LOADM1(X2, 5, R13)
// Rounds 24-27
SHA1NEXTE X3, X5
VMOVDQA X0, X6
SHA1MSG2 X3, X4
SHA1RNDS4 $0x01, X5, X0
SHA1MSG1 X3, X2
PXOR X3, X1
LOADM1(X3, 6, R13)
// Rounds 28-31
SHA1NEXTE X4, X6
VMOVDQA X0, X5
SHA1MSG2 X4, X1
SHA1RNDS4 $0x01, X6, X0
SHA1MSG1 X4, X3
PXOR X4, X2
LOADM1(X4, 7, R13)
// Rounds 32-35
SHA1NEXTE X1, X5
VMOVDQA X0, X6
SHA1MSG2 X1, X2
SHA1RNDS4 $0x01, X5, X0
SHA1MSG1 X1, X4
PXOR X1, X3
LOADM1(X1, 8, R13)
// Rounds 36-39
SHA1NEXTE X2, X6
VMOVDQA X0, X5
SHA1MSG2 X2, X3
SHA1RNDS4 $0x01, X6, X0
SHA1MSG1 X2, X1
PXOR X2, X4
LOADM1(X2, 9, R13)
// Rounds 40-43
SHA1NEXTE X3, X5
VMOVDQA X0, X6
SHA1MSG2 X3, X4
SHA1RNDS4 $0x02, X5, X0
SHA1MSG1 X3, X2
PXOR X3, X1
LOADM1(X3, 10, R13)
// Rounds 44-47
SHA1NEXTE X4, X6
VMOVDQA X0, X5
SHA1MSG2 X4, X1
SHA1RNDS4 $0x02, X6, X0
SHA1MSG1 X4, X3
PXOR X4, X2
LOADM1(X4, 11, R13)
// Rounds 48-51
SHA1NEXTE X1, X5
VMOVDQA X0, X6
SHA1MSG2 X1, X2
SHA1RNDS4 $0x02, X5, X0
VPEXTRD $0, X5, R12
SHA1MSG1 X1, X4
PXOR X1, X3
LOADM1(X1, 12, R13)
// derive pre-round 56's E out of round 51's A.
VPEXTRD $3, X0, R12
ROLL $30, R12
// Rounds 52-55
SHA1NEXTE X2, X6
VMOVDQA X0, X5
SHA1MSG2 X2, X3
SHA1RNDS4 $0x02, X6, X0
SHA1MSG1 X2, X1
PXOR X2, X4
LOADM1(X2, 13, R13)
// LOAD CS 58 (gathers 56 which will be rectified in Go)
LOADCS(X0, R12, 1, R15)
// Rounds 56-59
SHA1NEXTE X3, X5
VMOVDQA X0, X6
SHA1MSG2 X3, X4
SHA1RNDS4 $0x02, X5, X0
VPEXTRD $0, X5, R12
SHA1MSG1 X3, X2
PXOR X3, X1
LOADM1(X3, 14, R13)
// derive pre-round 64's E out of round 59's A.
VPEXTRD $3, X0, R12
ROLL $30, R12
// Rounds 60-63
SHA1NEXTE X4, X6
VMOVDQA X0, X5
SHA1MSG2 X4, X1
SHA1RNDS4 $0x03, X6, X0
SHA1MSG1 X4, X3
PXOR X4, X2
LOADM1(X4, 15, R13)
// LOAD CS 65 (gathers 64 which will be rectified in Go)
LOADCS(X0, R12, 2, R15)
// Rounds 64-67
SHA1NEXTE X1, X5
VMOVDQA X0, X6
SHA1MSG2 X1, X2
SHA1RNDS4 $0x03, X5, X0
SHA1MSG1 X1, X4
PXOR X1, X3
LOADM1(X1, 16, R13)
// Rounds 68-71
SHA1NEXTE X2, X6
VMOVDQA X0, X5
SHA1MSG2 X2, X3
SHA1RNDS4 $0x03, X6, X0
PXOR X2, X4
LOADM1(X2, 17, R13)
// Rounds 72-75
SHA1NEXTE X3, X5
VMOVDQA X0, X6
SHA1MSG2 X3, X4
SHA1RNDS4 $0x03, X5, X0
LOADM1(X3, 18, R13)
// Rounds 76-79
SHA1NEXTE X4, X6
VMOVDQA X0, X5
SHA1RNDS4 $0x03, X6, X0
LOADM1(X4, 19, R13)
// Add saved E and ABCD
SHA1NEXTE (AX), X5
PADDD 16(AX), X0
// Check if we are done, if not return to the loop
ADDQ $0x40, SI
CMPQ SI, DX
JNE loop
// Write the hash state back to digest
PSHUFD $0x1b, X0, X0
VMOVDQU X0, (DI)
PEXTRD $0x03, X5, 16(DI)
done:
RET
DATA upper_mask<>+0(SB)/8, $0x0000000000000000
DATA upper_mask<>+8(SB)/8, $0xffffffff00000000
GLOBL upper_mask<>(SB), RODATA, $16
DATA shuffle_mask<>+0(SB)/8, $0x08090a0b0c0d0e0f
DATA shuffle_mask<>+8(SB)/8, $0x0001020304050607
GLOBL shuffle_mask<>(SB), RODATA, $16