init - add project files

This commit is contained in:
2025-03-06 23:54:11 -05:00
commit e724ff1120
1363 changed files with 897467 additions and 0 deletions

1
vendor/github.com/minio/highwayhash/.gitignore generated vendored Normal file
View File

@@ -0,0 +1 @@
*.test

28
vendor/github.com/minio/highwayhash/.golangci.yml generated vendored Normal file
View File

@@ -0,0 +1,28 @@
linters-settings:
golint:
min-confidence: 0
misspell:
locale: US
linters:
disable-all: true
enable:
- typecheck
- goimports
- misspell
- govet
- revive
- ineffassign
- gosimple
- unparam
- unused
issues:
exclude-use-default: false
exclude:
- should have a package comment
- error strings should not be capitalized or end with punctuation or a newline
- should have comment # TODO(aead): Remove once all exported ident. have comments!
service:
golangci-lint-version: 1.51.2 # use the fixed version to not introduce new linters unexpectedly

202
vendor/github.com/minio/highwayhash/LICENSE generated vendored Normal file
View File

@@ -0,0 +1,202 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

99
vendor/github.com/minio/highwayhash/README.md generated vendored Normal file
View File

@@ -0,0 +1,99 @@
[![Godoc Reference](https://godoc.org/github.com/minio/highwayhash?status.svg)](https://godoc.org/github.com/minio/highwayhash)
[![Build Status](https://travis-ci.org/minio/highwayhash.svg?branch=master)](https://travis-ci.org/minio/highwayhash)
## HighwayHash
[HighwayHash](https://github.com/google/highwayhash) is a pseudo-random-function (PRF) developed by Jyrki Alakuijala, Bill Cox and Jan Wassenberg (Google research). HighwayHash takes a 256 bit key and computes 64, 128 or 256 bit hash values of given messages.
It can be used to prevent hash-flooding attacks or authenticate short-lived messages. Additionally it can be used as a fingerprinting function. HighwayHash is not a general purpose cryptographic hash function (such as Blake2b, SHA-3 or SHA-2) and should not be used if strong collision resistance is required.
This repository contains a native Go version and optimized assembly implementations for Intel, ARM and ppc64le architectures.
### High performance
HighwayHash is an approximately 5x faster SIMD hash function as compared to [SipHash](https://www.131002.net/siphash/siphash.pdf) which in itself is a fast and 'cryptographically strong' pseudo-random function designed by Aumasson and Bernstein.
HighwayHash uses a new way of mixing inputs with AVX2 multiply and permute instructions. The multiplications are 32x32 bit giving 64 bits-wide results and are therefore infeasible to reverse. Additionally permuting equalizes the distribution of the resulting bytes. The algorithm outputs digests ranging from 64 bits up to 256 bits at no extra cost.
### Stable
All three output sizes of HighwayHash have been declared [stable](https://github.com/google/highwayhash/#versioning-and-stability) as of January 2018. This means that the hash results for any given input message are guaranteed not to change.
### Installation
Install: `go get -u github.com/minio/highwayhash`
### Intel Performance
Below are the single core results on an Intel Core i7 (3.1 GHz) for 256 bit outputs:
```
BenchmarkSum256_16 204.17 MB/s
BenchmarkSum256_64 1040.63 MB/s
BenchmarkSum256_1K 8653.30 MB/s
BenchmarkSum256_8K 13476.07 MB/s
BenchmarkSum256_1M 14928.71 MB/s
BenchmarkSum256_5M 14180.04 MB/s
BenchmarkSum256_10M 12458.65 MB/s
BenchmarkSum256_25M 11927.25 MB/s
```
So for moderately sized messages it tops out at about 15 GB/sec. Also for small messages (1K) the performance is already at approximately 60% of the maximum throughput.
### ARM Performance
Below are the single core results on an EC2 c7g.4xlarge (Graviton3) instance for 256 bit outputs:
```
BenchmarkSum256_16 143.66 MB/s
BenchmarkSum256_64 628.75 MB/s
BenchmarkSum256_1K 3621.71 MB/s
BenchmarkSum256_8K 5039.64 MB/s
BenchmarkSum256_1M 5279.79 MB/s
BenchmarkSum256_5M 5474.60 MB/s
BenchmarkSum256_10M 5621.73 MB/s
BenchmarkSum256_25M 5250.47 MB/s
```
### ppc64le Performance
The ppc64le accelerated version is roughly 10x faster compared to the non-optimized version:
```
benchmark old MB/s new MB/s speedup
BenchmarkWrite_8K 531.19 5566.41 10.48x
BenchmarkSum64_8K 518.86 4971.88 9.58x
BenchmarkSum256_8K 502.45 4474.20 8.90x
```
### Performance compared to other hashing techniques
On a Skylake CPU (3.0 GHz Xeon Platinum 8124M) the table below shows how HighwayHash compares to other hashing techniques for 5 MB messages (single core performance, all Golang implementations, see [benchmark](https://github.com/fwessels/HashCompare/blob/master/benchmarks_test.go)).
```
BenchmarkHighwayHash 11986.98 MB/s
BenchmarkSHA256_AVX512 3552.74 MB/s
BenchmarkBlake2b 972.38 MB/s
BenchmarkSHA1 950.64 MB/s
BenchmarkMD5 684.18 MB/s
BenchmarkSHA512 562.04 MB/s
BenchmarkSHA256 383.07 MB/s
```
*Note: the AVX512 version of SHA256 uses the [multi-buffer crypto library](https://github.com/intel/intel-ipsec-mb) technique as developed by Intel, more details can be found in [sha256-simd](https://github.com/minio/sha256-simd/).*
### Qualitative assessment
We have performed a 'qualitative' assessment of how HighwayHash compares to Blake2b in terms of the distribution of the checksums for varying numbers of messages. It shows that HighwayHash behaves similarly according to the following graph:
![Hash Comparison Overview](https://s3.amazonaws.com/s3git-assets/hash-comparison-final.png)
More information can be found in [HashCompare](https://github.com/fwessels/HashCompare).
### Requirements
All Go versions >= 1.11 are supported (needed for required assembly support for the different platforms).
### Contributing
Contributions are welcome, please send PRs for any enhancements.

225
vendor/github.com/minio/highwayhash/highwayhash.go generated vendored Normal file
View File

@@ -0,0 +1,225 @@
// Copyright (c) 2017 Minio Inc. All rights reserved.
// Use of this source code is governed by a license that can be
// found in the LICENSE file.
// Package highwayhash implements the pseudo-random-function (PRF) HighwayHash.
// HighwayHash is a fast hash function designed to defend hash-flooding attacks
// or to authenticate short-lived messages.
//
// HighwayHash is not a general purpose cryptographic hash function and does not
// provide (strong) collision resistance.
package highwayhash
import (
"encoding/binary"
"errors"
"hash"
)
const (
// Size is the size of HighwayHash-256 checksum in bytes.
Size = 32
// Size128 is the size of HighwayHash-128 checksum in bytes.
Size128 = 16
// Size64 is the size of HighwayHash-64 checksum in bytes.
Size64 = 8
)
var errKeySize = errors.New("highwayhash: invalid key size")
// New returns a hash.Hash computing the HighwayHash-256 checksum.
// It returns a non-nil error if the key is not 32 bytes long.
func New(key []byte) (hash.Hash, error) {
if len(key) != Size {
return nil, errKeySize
}
h := &digest{size: Size}
copy(h.key[:], key)
h.Reset()
return h, nil
}
// New128 returns a hash.Hash computing the HighwayHash-128 checksum.
// It returns a non-nil error if the key is not 32 bytes long.
func New128(key []byte) (hash.Hash, error) {
if len(key) != Size {
return nil, errKeySize
}
h := &digest{size: Size128}
copy(h.key[:], key)
h.Reset()
return h, nil
}
// New64 returns a hash.Hash computing the HighwayHash-64 checksum.
// It returns a non-nil error if the key is not 32 bytes long.
func New64(key []byte) (hash.Hash64, error) {
if len(key) != Size {
return nil, errKeySize
}
h := new(digest64)
h.size = Size64
copy(h.key[:], key)
h.Reset()
return h, nil
}
// Sum computes the HighwayHash-256 checksum of data.
// It panics if the key is not 32 bytes long.
func Sum(data, key []byte) [Size]byte {
if len(key) != Size {
panic(errKeySize)
}
var state [16]uint64
initialize(&state, key)
if n := len(data) & (^(Size - 1)); n > 0 {
update(&state, data[:n])
data = data[n:]
}
if len(data) > 0 {
var block [Size]byte
offset := copy(block[:], data)
hashBuffer(&state, &block, offset)
}
var hash [Size]byte
finalize(hash[:], &state)
return hash
}
// Sum128 computes the HighwayHash-128 checksum of data.
// It panics if the key is not 32 bytes long.
func Sum128(data, key []byte) [Size128]byte {
if len(key) != Size {
panic(errKeySize)
}
var state [16]uint64
initialize(&state, key)
if n := len(data) & (^(Size - 1)); n > 0 {
update(&state, data[:n])
data = data[n:]
}
if len(data) > 0 {
var block [Size]byte
offset := copy(block[:], data)
hashBuffer(&state, &block, offset)
}
var hash [Size128]byte
finalize(hash[:], &state)
return hash
}
// Sum64 computes the HighwayHash-64 checksum of data.
// It panics if the key is not 32 bytes long.
func Sum64(data, key []byte) uint64 {
if len(key) != Size {
panic(errKeySize)
}
var state [16]uint64
initialize(&state, key)
if n := len(data) & (^(Size - 1)); n > 0 {
update(&state, data[:n])
data = data[n:]
}
if len(data) > 0 {
var block [Size]byte
offset := copy(block[:], data)
hashBuffer(&state, &block, offset)
}
var hash [Size64]byte
finalize(hash[:], &state)
return binary.LittleEndian.Uint64(hash[:])
}
type digest64 struct{ digest }
func (d *digest64) Sum64() uint64 {
state := d.state
if d.offset > 0 {
hashBuffer(&state, &d.buffer, d.offset)
}
var hash [8]byte
finalize(hash[:], &state)
return binary.LittleEndian.Uint64(hash[:])
}
type digest struct {
state [16]uint64 // v0 | v1 | mul0 | mul1
key, buffer [Size]byte
offset int
size int
}
func (d *digest) Size() int { return d.size }
func (d *digest) BlockSize() int { return Size }
func (d *digest) Reset() {
initialize(&d.state, d.key[:])
d.offset = 0
}
func (d *digest) Write(p []byte) (n int, err error) {
n = len(p)
if d.offset > 0 {
remaining := Size - d.offset
if n < remaining {
d.offset += copy(d.buffer[d.offset:], p)
return
}
copy(d.buffer[d.offset:], p[:remaining])
update(&d.state, d.buffer[:])
p = p[remaining:]
d.offset = 0
}
if nn := len(p) & (^(Size - 1)); nn > 0 {
update(&d.state, p[:nn])
p = p[nn:]
}
if len(p) > 0 {
d.offset = copy(d.buffer[d.offset:], p)
}
return
}
func (d *digest) Sum(b []byte) []byte {
state := d.state
if d.offset > 0 {
hashBuffer(&state, &d.buffer, d.offset)
}
var hash [Size]byte
finalize(hash[:d.size], &state)
return append(b, hash[:d.size]...)
}
func hashBuffer(state *[16]uint64, buffer *[32]byte, offset int) {
var block [Size]byte
mod32 := (uint64(offset) << 32) + uint64(offset)
for i := range state[:4] {
state[i] += mod32
}
for i := range state[4:8] {
t0 := uint32(state[i+4])
t0 = (t0 << uint(offset)) | (t0 >> uint(32-offset))
t1 := uint32(state[i+4] >> 32)
t1 = (t1 << uint(offset)) | (t1 >> uint(32-offset))
state[i+4] = (uint64(t1) << 32) | uint64(t0)
}
mod4 := offset & 3
remain := offset - mod4
copy(block[:], buffer[:remain])
if offset >= 16 {
copy(block[28:], buffer[offset-4:])
} else if mod4 != 0 {
last := uint32(buffer[remain])
last += uint32(buffer[remain+mod4>>1]) << 8
last += uint32(buffer[offset-1]) << 16
binary.LittleEndian.PutUint32(block[16:], last)
}
update(state, block[:])
}

View File

@@ -0,0 +1,248 @@
// Copyright (c) 2017 Minio Inc. All rights reserved.
// Use of this source code is governed by a license that can be
// found in the LICENSE file.
// +build amd64,!gccgo,!appengine,!nacl,!noasm
#include "textflag.h"
DATA ·consAVX2<>+0x00(SB)/8, $0xdbe6d5d5fe4cce2f
DATA ·consAVX2<>+0x08(SB)/8, $0xa4093822299f31d0
DATA ·consAVX2<>+0x10(SB)/8, $0x13198a2e03707344
DATA ·consAVX2<>+0x18(SB)/8, $0x243f6a8885a308d3
DATA ·consAVX2<>+0x20(SB)/8, $0x3bd39e10cb0ef593
DATA ·consAVX2<>+0x28(SB)/8, $0xc0acf169b5f18a8c
DATA ·consAVX2<>+0x30(SB)/8, $0xbe5466cf34e90c6c
DATA ·consAVX2<>+0x38(SB)/8, $0x452821e638d01377
GLOBL ·consAVX2<>(SB), (NOPTR+RODATA), $64
DATA ·zipperMergeAVX2<>+0x00(SB)/8, $0xf010e05020c03
DATA ·zipperMergeAVX2<>+0x08(SB)/8, $0x70806090d0a040b
DATA ·zipperMergeAVX2<>+0x10(SB)/8, $0xf010e05020c03
DATA ·zipperMergeAVX2<>+0x18(SB)/8, $0x70806090d0a040b
GLOBL ·zipperMergeAVX2<>(SB), (NOPTR+RODATA), $32
#define REDUCE_MOD(x0, x1, x2, x3, tmp0, tmp1, y0, y1) \
MOVQ $0x3FFFFFFFFFFFFFFF, tmp0 \
ANDQ tmp0, x3 \
MOVQ x2, y0 \
MOVQ x3, y1 \
\
MOVQ x2, tmp0 \
MOVQ x3, tmp1 \
SHLQ $1, tmp1 \
SHRQ $63, tmp0 \
MOVQ tmp1, x3 \
ORQ tmp0, x3 \
\
SHLQ $1, x2 \
\
MOVQ y0, tmp0 \
MOVQ y1, tmp1 \
SHLQ $2, tmp1 \
SHRQ $62, tmp0 \
MOVQ tmp1, y1 \
ORQ tmp0, y1 \
\
SHLQ $2, y0 \
\
XORQ x0, y0 \
XORQ x2, y0 \
XORQ x1, y1 \
XORQ x3, y1
#define UPDATE(msg) \
VPADDQ msg, Y2, Y2 \
VPADDQ Y3, Y2, Y2 \
\
VPSRLQ $32, Y1, Y0 \
BYTE $0xC5; BYTE $0xFD; BYTE $0xF4; BYTE $0xC2 \ // VPMULUDQ Y2, Y0, Y0
VPXOR Y0, Y3, Y3 \
\
VPADDQ Y4, Y1, Y1 \
\
VPSRLQ $32, Y2, Y0 \
BYTE $0xC5; BYTE $0xFD; BYTE $0xF4; BYTE $0xC1 \ // VPMULUDQ Y1, Y0, Y0
VPXOR Y0, Y4, Y4 \
\
VPSHUFB Y5, Y2, Y0 \
VPADDQ Y0, Y1, Y1 \
\
VPSHUFB Y5, Y1, Y0 \
VPADDQ Y0, Y2, Y2
// func initializeAVX2(state *[16]uint64, key []byte)
TEXT ·initializeAVX2(SB), 4, $0-32
MOVQ state+0(FP), AX
MOVQ key_base+8(FP), BX
MOVQ $·consAVX2<>(SB), CX
VMOVDQU 0(BX), Y1
VPSHUFD $177, Y1, Y2
VMOVDQU 0(CX), Y3
VMOVDQU 32(CX), Y4
VPXOR Y3, Y1, Y1
VPXOR Y4, Y2, Y2
VMOVDQU Y1, 0(AX)
VMOVDQU Y2, 32(AX)
VMOVDQU Y3, 64(AX)
VMOVDQU Y4, 96(AX)
VZEROUPPER
RET
// func updateAVX2(state *[16]uint64, msg []byte)
TEXT ·updateAVX2(SB), 4, $0-32
MOVQ state+0(FP), AX
MOVQ msg_base+8(FP), BX
MOVQ msg_len+16(FP), CX
CMPQ CX, $32
JB DONE
VMOVDQU 0(AX), Y1
VMOVDQU 32(AX), Y2
VMOVDQU 64(AX), Y3
VMOVDQU 96(AX), Y4
VMOVDQU ·zipperMergeAVX2<>(SB), Y5
LOOP:
VMOVDQU 0(BX), Y0
UPDATE(Y0)
ADDQ $32, BX
SUBQ $32, CX
JA LOOP
VMOVDQU Y1, 0(AX)
VMOVDQU Y2, 32(AX)
VMOVDQU Y3, 64(AX)
VMOVDQU Y4, 96(AX)
VZEROUPPER
DONE:
RET
// func finalizeAVX2(out []byte, state *[16]uint64)
TEXT ·finalizeAVX2(SB), 4, $0-32
MOVQ state+24(FP), AX
MOVQ out_base+0(FP), BX
MOVQ out_len+8(FP), CX
VMOVDQU 0(AX), Y1
VMOVDQU 32(AX), Y2
VMOVDQU 64(AX), Y3
VMOVDQU 96(AX), Y4
VMOVDQU ·zipperMergeAVX2<>(SB), Y5
VPERM2I128 $1, Y1, Y1, Y0
VPSHUFD $177, Y0, Y0
UPDATE(Y0)
VPERM2I128 $1, Y1, Y1, Y0
VPSHUFD $177, Y0, Y0
UPDATE(Y0)
VPERM2I128 $1, Y1, Y1, Y0
VPSHUFD $177, Y0, Y0
UPDATE(Y0)
VPERM2I128 $1, Y1, Y1, Y0
VPSHUFD $177, Y0, Y0
UPDATE(Y0)
CMPQ CX, $8
JE skipUpdate // Just 4 rounds for 64-bit checksum
VPERM2I128 $1, Y1, Y1, Y0
VPSHUFD $177, Y0, Y0
UPDATE(Y0)
VPERM2I128 $1, Y1, Y1, Y0
VPSHUFD $177, Y0, Y0
UPDATE(Y0)
CMPQ CX, $16
JE skipUpdate // 6 rounds for 128-bit checksum
VPERM2I128 $1, Y1, Y1, Y0
VPSHUFD $177, Y0, Y0
UPDATE(Y0)
VPERM2I128 $1, Y1, Y1, Y0
VPSHUFD $177, Y0, Y0
UPDATE(Y0)
VPERM2I128 $1, Y1, Y1, Y0
VPSHUFD $177, Y0, Y0
UPDATE(Y0)
VPERM2I128 $1, Y1, Y1, Y0
VPSHUFD $177, Y0, Y0
UPDATE(Y0)
skipUpdate:
VMOVDQU Y1, 0(AX)
VMOVDQU Y2, 32(AX)
VMOVDQU Y3, 64(AX)
VMOVDQU Y4, 96(AX)
VZEROUPPER
CMPQ CX, $8
JE hash64
CMPQ CX, $16
JE hash128
// 256-bit checksum
MOVQ 0*8(AX), R8
MOVQ 1*8(AX), R9
MOVQ 4*8(AX), R10
MOVQ 5*8(AX), R11
ADDQ 8*8(AX), R8
ADDQ 9*8(AX), R9
ADDQ 12*8(AX), R10
ADDQ 13*8(AX), R11
REDUCE_MOD(R8, R9, R10, R11, R12, R13, R14, R15)
MOVQ R14, 0(BX)
MOVQ R15, 8(BX)
MOVQ 2*8(AX), R8
MOVQ 3*8(AX), R9
MOVQ 6*8(AX), R10
MOVQ 7*8(AX), R11
ADDQ 10*8(AX), R8
ADDQ 11*8(AX), R9
ADDQ 14*8(AX), R10
ADDQ 15*8(AX), R11
REDUCE_MOD(R8, R9, R10, R11, R12, R13, R14, R15)
MOVQ R14, 16(BX)
MOVQ R15, 24(BX)
RET
hash128:
MOVQ 0*8(AX), R8
MOVQ 1*8(AX), R9
ADDQ 6*8(AX), R8
ADDQ 7*8(AX), R9
ADDQ 8*8(AX), R8
ADDQ 9*8(AX), R9
ADDQ 14*8(AX), R8
ADDQ 15*8(AX), R9
MOVQ R8, 0(BX)
MOVQ R9, 8(BX)
RET
hash64:
MOVQ 0*8(AX), DX
ADDQ 4*8(AX), DX
ADDQ 8*8(AX), DX
ADDQ 12*8(AX), DX
MOVQ DX, 0(BX)
RET

View File

@@ -0,0 +1,132 @@
//
// Copyright (c) 2024 Minio Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
//+build !noasm,!appengine
#include "textflag.h"
TEXT ·getVectorLength(SB), NOSPLIT, $0
WORD $0xd2800002 // mov x2, #0
WORD $0x04225022 // addvl x2, x2, #1
WORD $0xd37df042 // lsl x2, x2, #3
WORD $0xd2800003 // mov x3, #0
WORD $0x04635023 // addpl x3, x3, #1
WORD $0xd37df063 // lsl x3, x3, #3
MOVD R2, vl+0(FP)
MOVD R3, pl+8(FP)
RET
TEXT ·updateArm64Sve(SB), NOSPLIT, $0
MOVD state+0(FP), R0
MOVD msg_base+8(FP), R1
MOVD msg_len+16(FP), R2 // length of message
SUBS $32, R2
BMI completeSve
WORD $0x2518e3e1 // ptrue p1.b
WORD $0xa5e0a401 // ld1d z1.d, p1/z, [x0]
WORD $0xa5e1a402 // ld1d z2.d, p1/z, [x0, #1, MUL VL]
WORD $0xa5e2a403 // ld1d z3.d, p1/z, [x0, #2, MUL VL]
WORD $0xa5e3a404 // ld1d z4.d, p1/z, [x0, #3, MUL VL]
// Load zipper merge constants table pointer
MOVD $·zipperMergeSve(SB), R3
WORD $0xa5e0a465 // ld1d z5.d, p1/z, [x3]
WORD $0x25b8c006 // mov z6.s, #0
WORD $0x25d8e3e2 // ptrue p2.d /* set every other lane for "s" type */
loopSve:
WORD $0xa5e0a420 // ld1d z0.d, p1/z, [x1]
ADD $32, R1
WORD $0x04e00042 // add z2.d, z2.d, z0.d
WORD $0x04e30042 // add z2.d, z2.d, z3.d
WORD $0x04e09420 // lsr z0.d, z1.d, #32
WORD $0x05a6c847 // sel z7.s, p2, z2.s, z6.s
WORD $0x04d004e0 // mul z0.d, p1/m, z0.d, z7.d
WORD $0x04a33003 // eor z3.d, z0.d, z3.d
WORD $0x04e10081 // add z1.d, z4.d, z1.d
WORD $0x04e09440 // lsr z0.d, z2.d, #32
WORD $0x05a6c827 // sel z7.s, p2, z1.s, z6.s
WORD $0x04d004e0 // mul z0.d, p1/m, z0.d, z7.d
WORD $0x04a43004 // eor z4.d, z0.d, z4.d
WORD $0x05253040 // tbl z0.b, z2.b, z5.b
WORD $0x04e00021 // add z1.d, z1.d, z0.d
WORD $0x05253020 // tbl z0.b, z1.b, z5.b
WORD $0x04e00042 // add z2.d, z2.d, z0.d
SUBS $32, R2
BPL loopSve
WORD $0xe5e0e401 // st1d z1.d, p1, [x0]
WORD $0xe5e1e402 // st1d z2.d, p1, [x0, #1, MUL VL]
WORD $0xe5e2e403 // st1d z3.d, p1, [x0, #2, MUL VL]
WORD $0xe5e3e404 // st1d z4.d, p1, [x0, #3, MUL VL]
completeSve:
RET
TEXT ·updateArm64Sve2(SB), NOSPLIT, $0
MOVD state+0(FP), R0
MOVD msg_base+8(FP), R1
MOVD msg_len+16(FP), R2 // length of message
SUBS $32, R2
BMI completeSve2
WORD $0x2518e3e1 // ptrue p1.b
WORD $0xa5e0a401 // ld1d z1.d, p1/z, [x0]
WORD $0xa5e1a402 // ld1d z2.d, p1/z, [x0, #1, MUL VL]
WORD $0xa5e2a403 // ld1d z3.d, p1/z, [x0, #2, MUL VL]
WORD $0xa5e3a404 // ld1d z4.d, p1/z, [x0, #3, MUL VL]
// Load zipper merge constants table pointer
MOVD $·zipperMergeSve(SB), R3
WORD $0xa5e0a465 // ld1d z5.d, p1/z, [x3]
loopSve2:
WORD $0xa5e0a420 // ld1d z0.d, p1/z, [x1]
ADD $32, R1
WORD $0x04e00042 // add z2.d, z2.d, z0.d
WORD $0x04e30042 // add z2.d, z2.d, z3.d
WORD $0x04e09420 // lsr z0.d, z1.d, #32
WORD $0x45c27800 // umullb z0.d, z0.s, z2.s
WORD $0x04a33003 // eor z3.d, z0.d, z3.d
WORD $0x04e10081 // add z1.d, z4.d, z1.d
WORD $0x04e09440 // lsr z0.d, z2.d, #32
WORD $0x45c17800 // umullb z0.d, z0.s, z1.s
WORD $0x04a43004 // eor z4.d, z0.d, z4.d
WORD $0x05253040 // tbl z0.b, z2.b, z5.b
WORD $0x04e00021 // add z1.d, z1.d, z0.d
WORD $0x05253020 // tbl z0.b, z1.b, z5.b
WORD $0x04e00042 // add z2.d, z2.d, z0.d
SUBS $32, R2
BPL loopSve2
WORD $0xe5e0e401 // st1d z1.d, p1, [x0]
WORD $0xe5e1e402 // st1d z2.d, p1, [x0, #1, MUL VL]
WORD $0xe5e2e403 // st1d z3.d, p1, [x0, #2, MUL VL]
WORD $0xe5e3e404 // st1d z4.d, p1, [x0, #3, MUL VL]
completeSve2:
RET
DATA ·zipperMergeSve+0x00(SB)/8, $0x000f010e05020c03
DATA ·zipperMergeSve+0x08(SB)/8, $0x070806090d0a040b
DATA ·zipperMergeSve+0x10(SB)/8, $0x101f111e15121c13
DATA ·zipperMergeSve+0x18(SB)/8, $0x171816191d1a141b
GLOBL ·zipperMergeSve(SB), (NOPTR+RODATA), $32

View File

@@ -0,0 +1,70 @@
// Copyright (c) 2017 Minio Inc. All rights reserved.
// Use of this source code is governed by a license that can be
// found in the LICENSE file.
//go:build amd64 && !gccgo && !appengine && !nacl && !noasm
// +build amd64,!gccgo,!appengine,!nacl,!noasm
package highwayhash
import "golang.org/x/sys/cpu"
var (
useSSE4 = cpu.X86.HasSSE41
useAVX2 = cpu.X86.HasAVX2
useNEON = false
useSVE = false
useSVE2 = false
useVMX = false
)
//go:noescape
func initializeSSE4(state *[16]uint64, key []byte)
//go:noescape
func initializeAVX2(state *[16]uint64, key []byte)
//go:noescape
func updateSSE4(state *[16]uint64, msg []byte)
//go:noescape
func updateAVX2(state *[16]uint64, msg []byte)
//go:noescape
func finalizeSSE4(out []byte, state *[16]uint64)
//go:noescape
func finalizeAVX2(out []byte, state *[16]uint64)
func initialize(state *[16]uint64, key []byte) {
switch {
case useAVX2:
initializeAVX2(state, key)
case useSSE4:
initializeSSE4(state, key)
default:
initializeGeneric(state, key)
}
}
func update(state *[16]uint64, msg []byte) {
switch {
case useAVX2:
updateAVX2(state, msg)
case useSSE4:
updateSSE4(state, msg)
default:
updateGeneric(state, msg)
}
}
func finalize(out []byte, state *[16]uint64) {
switch {
case useAVX2:
finalizeAVX2(out, state)
case useSSE4:
finalizeSSE4(out, state)
default:
finalizeGeneric(out, state)
}
}

294
vendor/github.com/minio/highwayhash/highwayhash_amd64.s generated vendored Normal file
View File

@@ -0,0 +1,294 @@
// Copyright (c) 2017 Minio Inc. All rights reserved.
// Use of this source code is governed by a license that can be
// found in the LICENSE file.
// +build amd64 !gccgo !appengine !nacl
#include "textflag.h"
DATA ·asmConstants<>+0x00(SB)/8, $0xdbe6d5d5fe4cce2f
DATA ·asmConstants<>+0x08(SB)/8, $0xa4093822299f31d0
DATA ·asmConstants<>+0x10(SB)/8, $0x13198a2e03707344
DATA ·asmConstants<>+0x18(SB)/8, $0x243f6a8885a308d3
DATA ·asmConstants<>+0x20(SB)/8, $0x3bd39e10cb0ef593
DATA ·asmConstants<>+0x28(SB)/8, $0xc0acf169b5f18a8c
DATA ·asmConstants<>+0x30(SB)/8, $0xbe5466cf34e90c6c
DATA ·asmConstants<>+0x38(SB)/8, $0x452821e638d01377
GLOBL ·asmConstants<>(SB), (NOPTR+RODATA), $64
DATA ·asmZipperMerge<>+0x00(SB)/8, $0xf010e05020c03
DATA ·asmZipperMerge<>+0x08(SB)/8, $0x70806090d0a040b
GLOBL ·asmZipperMerge<>(SB), (NOPTR+RODATA), $16
#define v00 X0
#define v01 X1
#define v10 X2
#define v11 X3
#define m00 X4
#define m01 X5
#define m10 X6
#define m11 X7
#define t0 X8
#define t1 X9
#define t2 X10
#define REDUCE_MOD(x0, x1, x2, x3, tmp0, tmp1, y0, y1) \
MOVQ $0x3FFFFFFFFFFFFFFF, tmp0 \
ANDQ tmp0, x3 \
MOVQ x2, y0 \
MOVQ x3, y1 \
\
MOVQ x2, tmp0 \
MOVQ x3, tmp1 \
SHLQ $1, tmp1 \
SHRQ $63, tmp0 \
MOVQ tmp1, x3 \
ORQ tmp0, x3 \
\
SHLQ $1, x2 \
\
MOVQ y0, tmp0 \
MOVQ y1, tmp1 \
SHLQ $2, tmp1 \
SHRQ $62, tmp0 \
MOVQ tmp1, y1 \
ORQ tmp0, y1 \
\
SHLQ $2, y0 \
\
XORQ x0, y0 \
XORQ x2, y0 \
XORQ x1, y1 \
XORQ x3, y1
#define UPDATE(msg0, msg1) \
PADDQ msg0, v10 \
PADDQ m00, v10 \
PADDQ msg1, v11 \
PADDQ m01, v11 \
\
MOVO v00, t0 \
MOVO v01, t1 \
PSRLQ $32, t0 \
PSRLQ $32, t1 \
PMULULQ v10, t0 \
PMULULQ v11, t1 \
PXOR t0, m00 \
PXOR t1, m01 \
\
PADDQ m10, v00 \
PADDQ m11, v01 \
\
MOVO v10, t0 \
MOVO v11, t1 \
PSRLQ $32, t0 \
PSRLQ $32, t1 \
PMULULQ v00, t0 \
PMULULQ v01, t1 \
PXOR t0, m10 \
PXOR t1, m11 \
\
MOVO v10, t0 \
PSHUFB t2, t0 \
MOVO v11, t1 \
PSHUFB t2, t1 \
PADDQ t0, v00 \
PADDQ t1, v01 \
\
MOVO v00, t0 \
PSHUFB t2, t0 \
MOVO v01, t1 \
PSHUFB t2, t1 \
PADDQ t0, v10 \
PADDQ t1, v11
// func initializeSSE4(state *[16]uint64, key []byte)
TEXT ·initializeSSE4(SB), NOSPLIT, $0-32
MOVQ state+0(FP), AX
MOVQ key_base+8(FP), BX
MOVQ $·asmConstants<>(SB), CX
MOVOU 0(BX), v00
MOVOU 16(BX), v01
PSHUFD $177, v00, v10
PSHUFD $177, v01, v11
MOVOU 0(CX), m00
MOVOU 16(CX), m01
MOVOU 32(CX), m10
MOVOU 48(CX), m11
PXOR m00, v00
PXOR m01, v01
PXOR m10, v10
PXOR m11, v11
MOVOU v00, 0(AX)
MOVOU v01, 16(AX)
MOVOU v10, 32(AX)
MOVOU v11, 48(AX)
MOVOU m00, 64(AX)
MOVOU m01, 80(AX)
MOVOU m10, 96(AX)
MOVOU m11, 112(AX)
RET
// func updateSSE4(state *[16]uint64, msg []byte)
TEXT ·updateSSE4(SB), NOSPLIT, $0-32
MOVQ state+0(FP), AX
MOVQ msg_base+8(FP), BX
MOVQ msg_len+16(FP), CX
CMPQ CX, $32
JB DONE
MOVOU 0(AX), v00
MOVOU 16(AX), v01
MOVOU 32(AX), v10
MOVOU 48(AX), v11
MOVOU 64(AX), m00
MOVOU 80(AX), m01
MOVOU 96(AX), m10
MOVOU 112(AX), m11
MOVOU ·asmZipperMerge<>(SB), t2
LOOP:
MOVOU 0(BX), t0
MOVOU 16(BX), t1
UPDATE(t0, t1)
ADDQ $32, BX
SUBQ $32, CX
JA LOOP
MOVOU v00, 0(AX)
MOVOU v01, 16(AX)
MOVOU v10, 32(AX)
MOVOU v11, 48(AX)
MOVOU m00, 64(AX)
MOVOU m01, 80(AX)
MOVOU m10, 96(AX)
MOVOU m11, 112(AX)
DONE:
RET
// func finalizeSSE4(out []byte, state *[16]uint64)
TEXT ·finalizeSSE4(SB), NOSPLIT, $0-32
MOVQ state+24(FP), AX
MOVQ out_base+0(FP), BX
MOVQ out_len+8(FP), CX
MOVOU 0(AX), v00
MOVOU 16(AX), v01
MOVOU 32(AX), v10
MOVOU 48(AX), v11
MOVOU 64(AX), m00
MOVOU 80(AX), m01
MOVOU 96(AX), m10
MOVOU 112(AX), m11
MOVOU ·asmZipperMerge<>(SB), t2
PSHUFD $177, v01, t0
PSHUFD $177, v00, t1
UPDATE(t0, t1)
PSHUFD $177, v01, t0
PSHUFD $177, v00, t1
UPDATE(t0, t1)
PSHUFD $177, v01, t0
PSHUFD $177, v00, t1
UPDATE(t0, t1)
PSHUFD $177, v01, t0
PSHUFD $177, v00, t1
UPDATE(t0, t1)
CMPQ CX, $8
JE skipUpdate // Just 4 rounds for 64-bit checksum
PSHUFD $177, v01, t0
PSHUFD $177, v00, t1
UPDATE(t0, t1)
PSHUFD $177, v01, t0
PSHUFD $177, v00, t1
UPDATE(t0, t1)
CMPQ CX, $16
JE skipUpdate // 6 rounds for 128-bit checksum
PSHUFD $177, v01, t0
PSHUFD $177, v00, t1
UPDATE(t0, t1)
PSHUFD $177, v01, t0
PSHUFD $177, v00, t1
UPDATE(t0, t1)
PSHUFD $177, v01, t0
PSHUFD $177, v00, t1
UPDATE(t0, t1)
PSHUFD $177, v01, t0
PSHUFD $177, v00, t1
UPDATE(t0, t1)
skipUpdate:
MOVOU v00, 0(AX)
MOVOU v01, 16(AX)
MOVOU v10, 32(AX)
MOVOU v11, 48(AX)
MOVOU m00, 64(AX)
MOVOU m01, 80(AX)
MOVOU m10, 96(AX)
MOVOU m11, 112(AX)
CMPQ CX, $8
JE hash64
CMPQ CX, $16
JE hash128
// 256-bit checksum
PADDQ v00, m00
PADDQ v10, m10
PADDQ v01, m01
PADDQ v11, m11
MOVQ m00, R8
PEXTRQ $1, m00, R9
MOVQ m10, R10
PEXTRQ $1, m10, R11
REDUCE_MOD(R8, R9, R10, R11, R12, R13, R14, R15)
MOVQ R14, 0(BX)
MOVQ R15, 8(BX)
MOVQ m01, R8
PEXTRQ $1, m01, R9
MOVQ m11, R10
PEXTRQ $1, m11, R11
REDUCE_MOD(R8, R9, R10, R11, R12, R13, R14, R15)
MOVQ R14, 16(BX)
MOVQ R15, 24(BX)
RET
hash128:
PADDQ v00, v11
PADDQ m00, m11
PADDQ v11, m11
MOVOU m11, 0(BX)
RET
hash64:
PADDQ v00, v10
PADDQ m00, m10
PADDQ v10, m10
MOVQ m10, DX
MOVQ DX, 0(BX)
RET

View File

@@ -0,0 +1,81 @@
// Copyright (c) 2017-2024 Minio Inc. All rights reserved.
// Use of this source code is governed by a license that can be
// found in the LICENSE file.
//go:build !noasm && !appengine
// +build !noasm,!appengine
package highwayhash
import (
"golang.org/x/sys/cpu"
)
var (
useSSE4 = false
useAVX2 = false
useNEON = cpu.ARM64.HasASIMD
useSVE = cpu.ARM64.HasSVE
useSVE2 = false // cpu.ARM64.HasSVE2 -- disable until tested on real hardware
useVMX = false
)
func init() {
if useSVE {
if vl, _ := getVectorLength(); vl != 256 {
//
// Since HighwahHash is designed for AVX2,
// SVE/SVE2 instructions only run correctly
// for vector length of 256
//
useSVE2 = false
useSVE = false
}
}
}
//go:noescape
func initializeArm64(state *[16]uint64, key []byte)
//go:noescape
func updateArm64(state *[16]uint64, msg []byte)
//go:noescape
func getVectorLength() (vl, pl uint64)
//go:noescape
func updateArm64Sve(state *[16]uint64, msg []byte)
//go:noescape
func updateArm64Sve2(state *[16]uint64, msg []byte)
//go:noescape
func finalizeArm64(out []byte, state *[16]uint64)
func initialize(state *[16]uint64, key []byte) {
if useNEON {
initializeArm64(state, key)
} else {
initializeGeneric(state, key)
}
}
func update(state *[16]uint64, msg []byte) {
if useSVE2 {
updateArm64Sve2(state, msg)
} else if useSVE {
updateArm64Sve(state, msg)
} else if useNEON {
updateArm64(state, msg)
} else {
updateGeneric(state, msg)
}
}
func finalize(out []byte, state *[16]uint64) {
if useNEON {
finalizeArm64(out, state)
} else {
finalizeGeneric(out, state)
}
}

324
vendor/github.com/minio/highwayhash/highwayhash_arm64.s generated vendored Normal file
View File

@@ -0,0 +1,324 @@
//
// Minio Cloud Storage, (C) 2017 Minio, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
//+build !noasm,!appengine
// Use github.com/minio/asm2plan9s on this file to assemble ARM instructions to
// the opcodes of their Plan9 equivalents
#include "textflag.h"
#define REDUCE_MOD(x0, x1, x2, x3, tmp0, tmp1, y0, y1) \
MOVD $0x3FFFFFFFFFFFFFFF, tmp0 \
AND tmp0, x3 \
MOVD x2, y0 \
MOVD x3, y1 \
\
MOVD x2, tmp0 \
MOVD x3, tmp1 \
LSL $1, tmp1 \
LSR $63, tmp0 \
MOVD tmp1, x3 \
ORR tmp0, x3 \
\
LSL $1, x2 \
\
MOVD y0, tmp0 \
MOVD y1, tmp1 \
LSL $2, tmp1 \
LSR $62, tmp0 \
MOVD tmp1, y1 \
ORR tmp0, y1 \
\
LSL $2, y0 \
\
EOR x0, y0 \
EOR x2, y0 \
EOR x1, y1 \
EOR x3, y1
#define UPDATE(MSG1, MSG2) \
\ // Add message
VADD MSG1.D2, V2.D2, V2.D2 \
VADD MSG2.D2, V3.D2, V3.D2 \
\
\ // v1 += mul0
VADD V4.D2, V2.D2, V2.D2 \
VADD V5.D2, V3.D2, V3.D2 \
\
\ // First pair of multiplies
VTBL V29.B16, [V0.B16, V1.B16], V10.B16 \
VTBL V30.B16, [V2.B16, V3.B16], V11.B16 \
\
\ // VUMULL V10.S2, V11.S2, V12.D2 /* assembler support missing */
\ // VUMULL2 V10.S4, V11.S4, V13.D2 /* assembler support missing */
WORD $0x2eaac16c \ // umull v12.2d, v11.2s, v10.2s
WORD $0x6eaac16d \ // umull2 v13.2d, v11.4s, v10.4s
\
\ // v0 += mul1
VADD V6.D2, V0.D2, V0.D2 \
VADD V7.D2, V1.D2, V1.D2 \
\
\ // Second pair of multiplies
VTBL V29.B16, [V2.B16, V3.B16], V15.B16 \
VTBL V30.B16, [V0.B16, V1.B16], V14.B16 \
\
\ // EOR multiplication result in
VEOR V12.B16, V4.B16, V4.B16 \
VEOR V13.B16, V5.B16, V5.B16 \
\
\ // VUMULL V14.S2, V15.S2, V16.D2 /* assembler support missing */
\ // VUMULL2 V14.S4, V15.S4, V17.D2 /* assembler support missing */
WORD $0x2eaec1f0 \ // umull v16.2d, v15.2s, v14.2s
WORD $0x6eaec1f1 \ // umull2 v17.2d, v15.4s, v14.4s
\
\ // First pair of zipper-merges
VTBL V28.B16, [V2.B16], V18.B16 \
VADD V18.D2, V0.D2, V0.D2 \
VTBL V28.B16, [V3.B16], V19.B16 \
VADD V19.D2, V1.D2, V1.D2 \
\
\ // Second pair of zipper-merges
VTBL V28.B16, [V0.B16], V20.B16 \
VADD V20.D2, V2.D2, V2.D2 \
VTBL V28.B16, [V1.B16], V21.B16 \
VADD V21.D2, V3.D2, V3.D2 \
\
\ // EOR multiplication result in
VEOR V16.B16, V6.B16, V6.B16 \
VEOR V17.B16, V7.B16, V7.B16
// func initializeArm64(state *[16]uint64, key []byte)
TEXT ·initializeArm64(SB), NOSPLIT, $0
MOVD state+0(FP), R0
MOVD key_base+8(FP), R1
VLD1 (R1), [V1.S4, V2.S4]
VREV64 V1.S4, V3.S4
VREV64 V2.S4, V4.S4
MOVD $·asmConstants(SB), R3
VLD1 (R3), [V5.S4, V6.S4, V7.S4, V8.S4]
VEOR V5.B16, V1.B16, V1.B16
VEOR V6.B16, V2.B16, V2.B16
VEOR V7.B16, V3.B16, V3.B16
VEOR V8.B16, V4.B16, V4.B16
VST1.P [V1.D2, V2.D2, V3.D2, V4.D2], 64(R0)
VST1 [V5.D2, V6.D2, V7.D2, V8.D2], (R0)
RET
TEXT ·updateArm64(SB), NOSPLIT, $0
MOVD state+0(FP), R0
MOVD msg_base+8(FP), R1
MOVD msg_len+16(FP), R2 // length of message
SUBS $32, R2
BMI complete
// Definition of registers
// v0 = v0.lo
// v1 = v0.hi
// v2 = v1.lo
// v3 = v1.hi
// v4 = mul0.lo
// v5 = mul0.hi
// v6 = mul1.lo
// v7 = mul1.hi
// Load zipper merge constants table pointer
MOVD $·asmZipperMerge(SB), R3
// and load zipper merge constants into v28, v29, and v30
VLD1 (R3), [V28.B16, V29.B16, V30.B16]
VLD1.P 64(R0), [V0.D2, V1.D2, V2.D2, V3.D2]
VLD1 (R0), [V4.D2, V5.D2, V6.D2, V7.D2]
SUBS $64, R0
loop:
// Main loop
VLD1.P 32(R1), [V26.S4, V27.S4]
UPDATE(V26, V27)
SUBS $32, R2
BPL loop
// Store result
VST1.P [V0.D2, V1.D2, V2.D2, V3.D2], 64(R0)
VST1 [V4.D2, V5.D2, V6.D2, V7.D2], (R0)
complete:
RET
// func finalizeArm64(out []byte, state *[16]uint64)
TEXT ·finalizeArm64(SB), NOSPLIT, $0-32
MOVD state+24(FP), R0
MOVD out_base+0(FP), R1
MOVD out_len+8(FP), R2
// Load zipper merge constants table pointer
MOVD $·asmZipperMerge(SB), R3
// and load zipper merge constants into v28, v29, and v30
VLD1 (R3), [V28.B16, V29.B16, V30.B16]
VLD1.P 64(R0), [V0.D2, V1.D2, V2.D2, V3.D2]
VLD1 (R0), [V4.D2, V5.D2, V6.D2, V7.D2]
SUB $64, R0
VREV64 V1.S4, V26.S4
VREV64 V0.S4, V27.S4
UPDATE(V26, V27)
VREV64 V1.S4, V26.S4
VREV64 V0.S4, V27.S4
UPDATE(V26, V27)
VREV64 V1.S4, V26.S4
VREV64 V0.S4, V27.S4
UPDATE(V26, V27)
VREV64 V1.S4, V26.S4
VREV64 V0.S4, V27.S4
UPDATE(V26, V27)
CMP $8, R2
BEQ skipUpdate // Just 4 rounds for 64-bit checksum
VREV64 V1.S4, V26.S4
VREV64 V0.S4, V27.S4
UPDATE(V26, V27)
VREV64 V1.S4, V26.S4
VREV64 V0.S4, V27.S4
UPDATE(V26, V27)
CMP $16, R2
BEQ skipUpdate // 6 rounds for 128-bit checksum
VREV64 V1.S4, V26.S4
VREV64 V0.S4, V27.S4
UPDATE(V26, V27)
VREV64 V1.S4, V26.S4
VREV64 V0.S4, V27.S4
UPDATE(V26, V27)
VREV64 V1.S4, V26.S4
VREV64 V0.S4, V27.S4
UPDATE(V26, V27)
VREV64 V1.S4, V26.S4
VREV64 V0.S4, V27.S4
UPDATE(V26, V27)
skipUpdate:
// Store result
VST1.P [V0.D2, V1.D2, V2.D2, V3.D2], 64(R0)
VST1 [V4.D2, V5.D2, V6.D2, V7.D2], (R0)
SUB $64, R0
CMP $8, R2
BEQ hash64
CMP $16, R2
BEQ hash128
// 256-bit checksum
MOVD 0*8(R0), R8
MOVD 1*8(R0), R9
MOVD 4*8(R0), R10
MOVD 5*8(R0), R11
MOVD 8*8(R0), R4
MOVD 9*8(R0), R5
MOVD 12*8(R0), R6
MOVD 13*8(R0), R7
ADD R4, R8
ADD R5, R9
ADD R6, R10
ADD R7, R11
REDUCE_MOD(R8, R9, R10, R11, R4, R5, R6, R7)
MOVD R6, 0(R1)
MOVD R7, 8(R1)
MOVD 2*8(R0), R8
MOVD 3*8(R0), R9
MOVD 6*8(R0), R10
MOVD 7*8(R0), R11
MOVD 10*8(R0), R4
MOVD 11*8(R0), R5
MOVD 14*8(R0), R6
MOVD 15*8(R0), R7
ADD R4, R8
ADD R5, R9
ADD R6, R10
ADD R7, R11
REDUCE_MOD(R8, R9, R10, R11, R4, R5, R6, R7)
MOVD R6, 16(R1)
MOVD R7, 24(R1)
RET
hash128:
MOVD 0*8(R0), R8
MOVD 1*8(R0), R9
MOVD 6*8(R0), R10
MOVD 7*8(R0), R11
ADD R10, R8
ADD R11, R9
MOVD 8*8(R0), R10
MOVD 9*8(R0), R11
ADD R10, R8
ADD R11, R9
MOVD 14*8(R0), R10
MOVD 15*8(R0), R11
ADD R10, R8
ADD R11, R9
MOVD R8, 0(R1)
MOVD R9, 8(R1)
RET
hash64:
MOVD 0*8(R0), R4
MOVD 4*8(R0), R5
MOVD 8*8(R0), R6
MOVD 12*8(R0), R7
ADD R5, R4
ADD R7, R6
ADD R6, R4
MOVD R4, (R1)
RET
DATA ·asmConstants+0x00(SB)/8, $0xdbe6d5d5fe4cce2f
DATA ·asmConstants+0x08(SB)/8, $0xa4093822299f31d0
DATA ·asmConstants+0x10(SB)/8, $0x13198a2e03707344
DATA ·asmConstants+0x18(SB)/8, $0x243f6a8885a308d3
DATA ·asmConstants+0x20(SB)/8, $0x3bd39e10cb0ef593
DATA ·asmConstants+0x28(SB)/8, $0xc0acf169b5f18a8c
DATA ·asmConstants+0x30(SB)/8, $0xbe5466cf34e90c6c
DATA ·asmConstants+0x38(SB)/8, $0x452821e638d01377
GLOBL ·asmConstants(SB), 8, $64
// Constants for TBL instructions
DATA ·asmZipperMerge+0x0(SB)/8, $0x000f010e05020c03 // zipper merge constant
DATA ·asmZipperMerge+0x8(SB)/8, $0x070806090d0a040b
DATA ·asmZipperMerge+0x10(SB)/8, $0x0f0e0d0c07060504 // setup first register for multiply
DATA ·asmZipperMerge+0x18(SB)/8, $0x1f1e1d1c17161514
DATA ·asmZipperMerge+0x20(SB)/8, $0x0b0a090803020100 // setup second register for multiply
DATA ·asmZipperMerge+0x28(SB)/8, $0x1b1a191813121110
GLOBL ·asmZipperMerge(SB), 8, $48

View File

@@ -0,0 +1,338 @@
// Copyright (c) 2017 Minio Inc. All rights reserved.
// Use of this source code is governed by a license that can be
// found in the LICENSE file.
package highwayhash
import (
"encoding/binary"
)
const (
v0 = 0
v1 = 4
mul0 = 8
mul1 = 12
)
var (
init0 = [4]uint64{0xdbe6d5d5fe4cce2f, 0xa4093822299f31d0, 0x13198a2e03707344, 0x243f6a8885a308d3}
init1 = [4]uint64{0x3bd39e10cb0ef593, 0xc0acf169b5f18a8c, 0xbe5466cf34e90c6c, 0x452821e638d01377}
)
func initializeGeneric(state *[16]uint64, k []byte) {
var key [4]uint64
key[0] = binary.LittleEndian.Uint64(k[0:])
key[1] = binary.LittleEndian.Uint64(k[8:])
key[2] = binary.LittleEndian.Uint64(k[16:])
key[3] = binary.LittleEndian.Uint64(k[24:])
copy(state[mul0:], init0[:])
copy(state[mul1:], init1[:])
for i, k := range key {
state[v0+i] = init0[i] ^ k
}
key[0] = key[0]>>32 | key[0]<<32
key[1] = key[1]>>32 | key[1]<<32
key[2] = key[2]>>32 | key[2]<<32
key[3] = key[3]>>32 | key[3]<<32
for i, k := range key {
state[v1+i] = init1[i] ^ k
}
}
func updateGeneric(state *[16]uint64, msg []byte) {
for len(msg) >= 32 {
m := msg[:32]
// add message + mul0
// Interleave operations to hide multiplication
state[v1+0] += binary.LittleEndian.Uint64(m) + state[mul0+0]
state[mul0+0] ^= uint64(uint32(state[v1+0])) * (state[v0+0] >> 32)
state[v0+0] += state[mul1+0]
state[mul1+0] ^= uint64(uint32(state[v0+0])) * (state[v1+0] >> 32)
state[v1+1] += binary.LittleEndian.Uint64(m[8:]) + state[mul0+1]
state[mul0+1] ^= uint64(uint32(state[v1+1])) * (state[v0+1] >> 32)
state[v0+1] += state[mul1+1]
state[mul1+1] ^= uint64(uint32(state[v0+1])) * (state[v1+1] >> 32)
state[v1+2] += binary.LittleEndian.Uint64(m[16:]) + state[mul0+2]
state[mul0+2] ^= uint64(uint32(state[v1+2])) * (state[v0+2] >> 32)
state[v0+2] += state[mul1+2]
state[mul1+2] ^= uint64(uint32(state[v0+2])) * (state[v1+2] >> 32)
state[v1+3] += binary.LittleEndian.Uint64(m[24:]) + state[mul0+3]
state[mul0+3] ^= uint64(uint32(state[v1+3])) * (state[v0+3] >> 32)
state[v0+3] += state[mul1+3]
state[mul1+3] ^= uint64(uint32(state[v0+3])) * (state[v1+3] >> 32)
// inlined: zipperMerge(state[v1+0], state[v1+1], &state[v0+0], &state[v0+1])
{
val0 := state[v1+0]
val1 := state[v1+1]
res := val0 & (0xff << (2 * 8))
res2 := (val0 & (0xff << (7 * 8))) + (val1 & (0xff << (2 * 8)))
res += (val1 & (0xff << (7 * 8))) >> 8
res2 += (val0 & (0xff << (6 * 8))) >> 8
res += ((val0 & (0xff << (5 * 8))) + (val1 & (0xff << (6 * 8)))) >> 16
res2 += (val1 & (0xff << (5 * 8))) >> 16
res += ((val0 & (0xff << (3 * 8))) + (val1 & (0xff << (4 * 8)))) >> 24
res2 += ((val1 & (0xff << (3 * 8))) + (val0 & (0xff << (4 * 8)))) >> 24
res += (val0 & (0xff << (1 * 8))) << 32
res2 += (val1 & 0xff) << 48
res += val0 << 56
res2 += (val1 & (0xff << (1 * 8))) << 24
state[v0+0] += res
state[v0+1] += res2
}
// zipperMerge(state[v1+2], state[v1+3], &state[v0+2], &state[v0+3])
{
val0 := state[v1+2]
val1 := state[v1+3]
res := val0 & (0xff << (2 * 8))
res2 := (val0 & (0xff << (7 * 8))) + (val1 & (0xff << (2 * 8)))
res += (val1 & (0xff << (7 * 8))) >> 8
res2 += (val0 & (0xff << (6 * 8))) >> 8
res += ((val0 & (0xff << (5 * 8))) + (val1 & (0xff << (6 * 8)))) >> 16
res2 += (val1 & (0xff << (5 * 8))) >> 16
res += ((val0 & (0xff << (3 * 8))) + (val1 & (0xff << (4 * 8)))) >> 24
res2 += ((val1 & (0xff << (3 * 8))) + (val0 & (0xff << (4 * 8)))) >> 24
res += (val0 & (0xff << (1 * 8))) << 32
res2 += (val1 & 0xff) << 48
res += val0 << 56
res2 += (val1 & (0xff << (1 * 8))) << 24
state[v0+2] += res
state[v0+3] += res2
}
// inlined: zipperMerge(state[v0+0], state[v0+1], &state[v1+0], &state[v1+1])
{
val0 := state[v0+0]
val1 := state[v0+1]
res := val0 & (0xff << (2 * 8))
res2 := (val0 & (0xff << (7 * 8))) + (val1 & (0xff << (2 * 8)))
res += (val1 & (0xff << (7 * 8))) >> 8
res2 += (val0 & (0xff << (6 * 8))) >> 8
res += ((val0 & (0xff << (5 * 8))) + (val1 & (0xff << (6 * 8)))) >> 16
res2 += (val1 & (0xff << (5 * 8))) >> 16
res += ((val0 & (0xff << (3 * 8))) + (val1 & (0xff << (4 * 8)))) >> 24
res2 += ((val1 & (0xff << (3 * 8))) + (val0 & (0xff << (4 * 8)))) >> 24
res += (val0 & (0xff << (1 * 8))) << 32
res2 += (val1 & 0xff) << 48
res += val0 << 56
res2 += (val1 & (0xff << (1 * 8))) << 24
state[v1+0] += res
state[v1+1] += res2
}
//inlined: zipperMerge(state[v0+2], state[v0+3], &state[v1+2], &state[v1+3])
{
val0 := state[v0+2]
val1 := state[v0+3]
res := val0 & (0xff << (2 * 8))
res2 := (val0 & (0xff << (7 * 8))) + (val1 & (0xff << (2 * 8)))
res += (val1 & (0xff << (7 * 8))) >> 8
res2 += (val0 & (0xff << (6 * 8))) >> 8
res += ((val0 & (0xff << (5 * 8))) + (val1 & (0xff << (6 * 8)))) >> 16
res2 += (val1 & (0xff << (5 * 8))) >> 16
res += ((val0 & (0xff << (3 * 8))) + (val1 & (0xff << (4 * 8)))) >> 24
res2 += ((val1 & (0xff << (3 * 8))) + (val0 & (0xff << (4 * 8)))) >> 24
res += (val0 & (0xff << (1 * 8))) << 32
res2 += (val1 & 0xff) << 48
res += val0 << 56
res2 += (val1 & (0xff << (1 * 8))) << 24
state[v1+2] += res
state[v1+3] += res2
}
msg = msg[32:]
}
}
func finalizeGeneric(out []byte, state *[16]uint64) {
var perm [4]uint64
var tmp [32]byte
runs := 4
if len(out) == 16 {
runs = 6
} else if len(out) == 32 {
runs = 10
}
for i := 0; i < runs; i++ {
perm[0] = state[v0+2]>>32 | state[v0+2]<<32
perm[1] = state[v0+3]>>32 | state[v0+3]<<32
perm[2] = state[v0+0]>>32 | state[v0+0]<<32
perm[3] = state[v0+1]>>32 | state[v0+1]<<32
binary.LittleEndian.PutUint64(tmp[0:], perm[0])
binary.LittleEndian.PutUint64(tmp[8:], perm[1])
binary.LittleEndian.PutUint64(tmp[16:], perm[2])
binary.LittleEndian.PutUint64(tmp[24:], perm[3])
update(state, tmp[:])
}
switch len(out) {
case 8:
binary.LittleEndian.PutUint64(out, state[v0+0]+state[v1+0]+state[mul0+0]+state[mul1+0])
case 16:
binary.LittleEndian.PutUint64(out, state[v0+0]+state[v1+2]+state[mul0+0]+state[mul1+2])
binary.LittleEndian.PutUint64(out[8:], state[v0+1]+state[v1+3]+state[mul0+1]+state[mul1+3])
case 32:
h0, h1 := reduceMod(state[v0+0]+state[mul0+0], state[v0+1]+state[mul0+1], state[v1+0]+state[mul1+0], state[v1+1]+state[mul1+1])
binary.LittleEndian.PutUint64(out[0:], h0)
binary.LittleEndian.PutUint64(out[8:], h1)
h0, h1 = reduceMod(state[v0+2]+state[mul0+2], state[v0+3]+state[mul0+3], state[v1+2]+state[mul1+2], state[v1+3]+state[mul1+3])
binary.LittleEndian.PutUint64(out[16:], h0)
binary.LittleEndian.PutUint64(out[24:], h1)
}
}
// Experiments on variations left for future reference...
/*
func zipperMerge(v0, v1 uint64, d0, d1 *uint64) {
if true {
// fastest. original interleaved...
res := v0 & (0xff << (2 * 8))
res2 := (v0 & (0xff << (7 * 8))) + (v1 & (0xff << (2 * 8)))
res += (v1 & (0xff << (7 * 8))) >> 8
res2 += (v0 & (0xff << (6 * 8))) >> 8
res += ((v0 & (0xff << (5 * 8))) + (v1 & (0xff << (6 * 8)))) >> 16
res2 += (v1 & (0xff << (5 * 8))) >> 16
res += ((v0 & (0xff << (3 * 8))) + (v1 & (0xff << (4 * 8)))) >> 24
res2 += ((v1 & (0xff << (3 * 8))) + (v0 & (0xff << (4 * 8)))) >> 24
res += (v0 & (0xff << (1 * 8))) << 32
res2 += (v1 & 0xff) << 48
res += v0 << 56
res2 += (v1 & (0xff << (1 * 8))) << 24
*d0 += res
*d1 += res2
} else if false {
// Reading bytes and combining into uint64
var v0b [8]byte
binary.LittleEndian.PutUint64(v0b[:], v0)
var v1b [8]byte
binary.LittleEndian.PutUint64(v1b[:], v1)
var res, res2 uint64
res = uint64(v0b[0]) << (7 * 8)
res2 = uint64(v1b[0]) << (6 * 8)
res |= uint64(v0b[1]) << (5 * 8)
res2 |= uint64(v1b[1]) << (4 * 8)
res |= uint64(v0b[2]) << (2 * 8)
res2 |= uint64(v1b[2]) << (2 * 8)
res |= uint64(v0b[3])
res2 |= uint64(v0b[4]) << (1 * 8)
res |= uint64(v0b[5]) << (3 * 8)
res2 |= uint64(v0b[6]) << (5 * 8)
res |= uint64(v1b[4]) << (1 * 8)
res2 |= uint64(v0b[7]) << (7 * 8)
res |= uint64(v1b[6]) << (4 * 8)
res2 |= uint64(v1b[3])
res |= uint64(v1b[7]) << (6 * 8)
res2 |= uint64(v1b[5]) << (3 * 8)
*d0 += res
*d1 += res2
} else if false {
// bytes to bytes shuffle
var v0b [8]byte
binary.LittleEndian.PutUint64(v0b[:], v0)
var v1b [8]byte
binary.LittleEndian.PutUint64(v1b[:], v1)
var res [8]byte
//res += ((v0 & (0xff << (3 * 8))) + (v1 & (0xff << (4 * 8)))) >> 24
res[0] = v0b[3]
res[1] = v1b[4]
// res := v0 & (0xff << (2 * 8))
res[2] = v0b[2]
//res += ((v0 & (0xff << (5 * 8))) + (v1 & (0xff << (6 * 8)))) >> 16
res[3] = v0b[5]
res[4] = v1b[6]
//res += (v0 & (0xff << (1 * 8))) << 32
res[5] = v0b[1]
//res += (v1 & (0xff << (7 * 8))) >> 8
res[6] += v1b[7]
//res += v0 << 56
res[7] = v0b[0]
v0 = binary.LittleEndian.Uint64(res[:])
*d0 += v0
//res += ((v1 & (0xff << (3 * 8))) + (v0 & (0xff << (4 * 8)))) >> 24
res[0] = v1b[3]
res[1] = v0b[4]
res[2] = v1b[2]
// res += (v1 & (0xff << (5 * 8))) >> 16
res[3] = v1b[5]
//res += (v1 & (0xff << (1 * 8))) << 24
res[4] = v1b[1]
// res += (v0 & (0xff << (6 * 8))) >> 8
res[5] = v0b[6]
//res := (v0 & (0xff << (7 * 8))) + (v1 & (0xff << (2 * 8)))
res[7] = v0b[7]
//res += (v1 & 0xff) << 48
res[6] = v1b[0]
v0 = binary.LittleEndian.Uint64(res[:])
*d1 += v0
} else {
// original.
res := v0 & (0xff << (2 * 8))
res += (v1 & (0xff << (7 * 8))) >> 8
res += ((v0 & (0xff << (5 * 8))) + (v1 & (0xff << (6 * 8)))) >> 16
res += ((v0 & (0xff << (3 * 8))) + (v1 & (0xff << (4 * 8)))) >> 24
res += (v0 & (0xff << (1 * 8))) << 32
res += v0 << 56
*d0 += res
res = (v0 & (0xff << (7 * 8))) + (v1 & (0xff << (2 * 8)))
res += (v0 & (0xff << (6 * 8))) >> 8
res += (v1 & (0xff << (5 * 8))) >> 16
res += ((v1 & (0xff << (3 * 8))) + (v0 & (0xff << (4 * 8)))) >> 24
res += (v1 & 0xff) << 48
res += (v1 & (0xff << (1 * 8))) << 24
*d1 += res
}
}
*/
// reduce v = [v0, v1, v2, v3] mod the irreducible polynomial x^128 + x^2 + x
func reduceMod(v0, v1, v2, v3 uint64) (r0, r1 uint64) {
v3 &= 0x3FFFFFFFFFFFFFFF
r0, r1 = v2, v3
v3 = (v3 << 1) | (v2 >> (64 - 1))
v2 <<= 1
r1 = (r1 << 2) | (r0 >> (64 - 2))
r0 <<= 2
r0 ^= v0 ^ v2
r1 ^= v1 ^ v3
return
}

View File

@@ -0,0 +1,36 @@
// Copyright (c) 2017 Minio Inc. All rights reserved.
// Use of this source code is governed by a license that can be
// found in the LICENSE file.
//go:build !noasm && !appengine
// +build !noasm,!appengine
package highwayhash
var (
useSSE4 = false
useAVX2 = false
useNEON = false
useSVE = false
useSVE2 = false
useVMX = true
)
//go:noescape
func updatePpc64Le(state *[16]uint64, msg []byte)
func initialize(state *[16]uint64, key []byte) {
initializeGeneric(state, key)
}
func update(state *[16]uint64, msg []byte) {
if useVMX {
updatePpc64Le(state, msg)
} else {
updateGeneric(state, msg)
}
}
func finalize(out []byte, state *[16]uint64) {
finalizeGeneric(out, state)
}

View File

@@ -0,0 +1,182 @@
//
// Minio Cloud Storage, (C) 2018 Minio, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
//+build !noasm,!appengine
#include "textflag.h"
// Definition of registers
#define V0_LO VS32
#define V0_LO_ V0
#define V0_HI VS33
#define V0_HI_ V1
#define V1_LO VS34
#define V1_LO_ V2
#define V1_HI VS35
#define V1_HI_ V3
#define MUL0_LO VS36
#define MUL0_LO_ V4
#define MUL0_HI VS37
#define MUL0_HI_ V5
#define MUL1_LO VS38
#define MUL1_LO_ V6
#define MUL1_HI VS39
#define MUL1_HI_ V7
// Message
#define MSG_LO VS40
#define MSG_LO_ V8
#define MSG_HI VS41
// Constants
#define ROTATE VS42
#define ROTATE_ V10
#define MASK VS43
#define MASK_ V11
// Temps
#define TEMP1 VS44
#define TEMP1_ V12
#define TEMP2 VS45
#define TEMP2_ V13
#define TEMP3 VS46
#define TEMP3_ V14
#define TEMP4_ V15
#define TEMP5_ V16
#define TEMP6_ V17
#define TEMP7_ V18
// Regular registers
#define STATE R3
#define MSG_BASE R4
#define MSG_LEN R5
#define CONSTANTS R6
#define P1 R7
#define P2 R8
#define P3 R9
#define P4 R10
#define P5 R11
#define P6 R12
#define P7 R14 // avoid using R13
TEXT ·updatePpc64Le(SB), NOFRAME|NOSPLIT, $0-32
MOVD state+0(FP), STATE
MOVD msg_base+8(FP), MSG_BASE
MOVD msg_len+16(FP), MSG_LEN // length of message
// Sanity check for length
CMPU MSG_LEN, $31
BLE complete
// Setup offsets
MOVD $16, P1
MOVD $32, P2
MOVD $48, P3
MOVD $64, P4
MOVD $80, P5
MOVD $96, P6
MOVD $112, P7
// Load state
LXVD2X (STATE)(R0), V0_LO
LXVD2X (STATE)(P1), V0_HI
LXVD2X (STATE)(P2), V1_LO
LXVD2X (STATE)(P3), V1_HI
LXVD2X (STATE)(P4), MUL0_LO
LXVD2X (STATE)(P5), MUL0_HI
LXVD2X (STATE)(P6), MUL1_LO
LXVD2X (STATE)(P7), MUL1_HI
XXPERMDI V0_LO, V0_LO, $2, V0_LO
XXPERMDI V0_HI, V0_HI, $2, V0_HI
XXPERMDI V1_LO, V1_LO, $2, V1_LO
XXPERMDI V1_HI, V1_HI, $2, V1_HI
XXPERMDI MUL0_LO, MUL0_LO, $2, MUL0_LO
XXPERMDI MUL0_HI, MUL0_HI, $2, MUL0_HI
XXPERMDI MUL1_LO, MUL1_LO, $2, MUL1_LO
XXPERMDI MUL1_HI, MUL1_HI, $2, MUL1_HI
// Load asmConstants table pointer
MOVD $·asmConstants(SB), CONSTANTS
LXVD2X (CONSTANTS)(R0), ROTATE
LXVD2X (CONSTANTS)(P1), MASK
XXLNAND MASK, MASK, MASK
loop:
// Main highwayhash update loop
LXVD2X (MSG_BASE)(R0), MSG_LO
VADDUDM V0_LO_, MUL1_LO_, TEMP1_
VRLD V0_LO_, ROTATE_, TEMP2_
VADDUDM MUL1_HI_, V0_HI_, TEMP3_
LXVD2X (MSG_BASE)(P1), MSG_HI
ADD $32, MSG_BASE, MSG_BASE
XXPERMDI MSG_LO, MSG_LO, $2, MSG_LO
XXPERMDI MSG_HI, MSG_HI, $2, V0_LO
VADDUDM MSG_LO_, MUL0_LO_, MSG_LO_
VADDUDM V0_LO_, MUL0_HI_, V0_LO_
VADDUDM MSG_LO_, V1_LO_, V1_LO_
VSRD V0_HI_, ROTATE_, MSG_LO_
VADDUDM V0_LO_, V1_HI_, V1_HI_
VPERM V1_LO_, V1_LO_, MASK_, V0_LO_
VMULOUW V1_LO_, TEMP2_, TEMP2_
VPERM V1_HI_, V1_HI_, MASK_, TEMP7_
VADDUDM V0_LO_, TEMP1_, V0_LO_
VMULOUW V1_HI_, MSG_LO_, MSG_LO_
VADDUDM TEMP7_, TEMP3_, V0_HI_
VPERM V0_LO_, V0_LO_, MASK_, TEMP6_
VRLD V1_LO_, ROTATE_, TEMP4_
VSRD V1_HI_, ROTATE_, TEMP5_
VPERM V0_HI_, V0_HI_, MASK_, TEMP7_
XXLXOR MUL0_LO, TEMP2, MUL0_LO
VMULOUW TEMP1_, TEMP4_, TEMP1_
VMULOUW TEMP3_, TEMP5_, TEMP3_
XXLXOR MUL0_HI, MSG_LO, MUL0_HI
XXLXOR MUL1_LO, TEMP1, MUL1_LO
XXLXOR MUL1_HI, TEMP3, MUL1_HI
VADDUDM TEMP6_, V1_LO_, V1_LO_
VADDUDM TEMP7_, V1_HI_, V1_HI_
SUB $32, MSG_LEN, MSG_LEN
CMPU MSG_LEN, $32
BGE loop
// Save state
XXPERMDI V0_LO, V0_LO, $2, V0_LO
XXPERMDI V0_HI, V0_HI, $2, V0_HI
XXPERMDI V1_LO, V1_LO, $2, V1_LO
XXPERMDI V1_HI, V1_HI, $2, V1_HI
XXPERMDI MUL0_LO, MUL0_LO, $2, MUL0_LO
XXPERMDI MUL0_HI, MUL0_HI, $2, MUL0_HI
XXPERMDI MUL1_LO, MUL1_LO, $2, MUL1_LO
XXPERMDI MUL1_HI, MUL1_HI, $2, MUL1_HI
STXVD2X V0_LO, (STATE)(R0)
STXVD2X V0_HI, (STATE)(P1)
STXVD2X V1_LO, (STATE)(P2)
STXVD2X V1_HI, (STATE)(P3)
STXVD2X MUL0_LO, (STATE)(P4)
STXVD2X MUL0_HI, (STATE)(P5)
STXVD2X MUL1_LO, (STATE)(P6)
STXVD2X MUL1_HI, (STATE)(P7)
complete:
RET
// Constants table
DATA ·asmConstants+0x0(SB)/8, $0x0000000000000020
DATA ·asmConstants+0x8(SB)/8, $0x0000000000000020
DATA ·asmConstants+0x10(SB)/8, $0x070806090d0a040b // zipper merge constant
DATA ·asmConstants+0x18(SB)/8, $0x000f010e05020c03 // zipper merge constant
GLOBL ·asmConstants(SB), 8, $32

29
vendor/github.com/minio/highwayhash/highwayhash_ref.go generated vendored Normal file
View File

@@ -0,0 +1,29 @@
// Copyright (c) 2017 Minio Inc. All rights reserved.
// Use of this source code is governed by a license that can be
// found in the LICENSE file.
//go:build noasm || (!amd64 && !arm64 && !ppc64le)
// +build noasm !amd64,!arm64,!ppc64le
package highwayhash
var (
useSSE4 = false
useAVX2 = false
useNEON = false
useSVE = false
useSVE2 = false
useVMX = false
)
func initialize(state *[16]uint64, k []byte) {
initializeGeneric(state, k)
}
func update(state *[16]uint64, msg []byte) {
updateGeneric(state, msg)
}
func finalize(out []byte, state *[16]uint64) {
finalizeGeneric(out, state)
}