152 lines
3.5 KiB
Go
152 lines
3.5 KiB
Go
|
|
// Package md5x16 implements 16-lane parallel MD5 with AVX-512 instructions.
|
||
|
|
package md5x16
|
||
|
|
|
||
|
|
import (
|
||
|
|
"encoding/binary"
|
||
|
|
"errors"
|
||
|
|
"math"
|
||
|
|
"reflect"
|
||
|
|
"unsafe"
|
||
|
|
)
|
||
|
|
|
||
|
|
//go:generate go run asm.go -out md5x16.s -stubs stub.go
|
||
|
|
|
||
|
|
// Size of a MD5 checksum in bytes.
|
||
|
|
const Size = 16
|
||
|
|
|
||
|
|
// BlockSize is the block size of MD5 in bytes.
|
||
|
|
const BlockSize = 64
|
||
|
|
|
||
|
|
// Lanes is the maximum number of parallel MD5 computations.
|
||
|
|
const Lanes = 16
|
||
|
|
|
||
|
|
// Validate checks whether the preconditions required by Sum() are met.
|
||
|
|
func Validate(data [Lanes][]byte) error {
|
||
|
|
_, err := config(data)
|
||
|
|
return err
|
||
|
|
}
|
||
|
|
|
||
|
|
// Sum returns the MD5 checksum of up to Lanes data of the same length.
|
||
|
|
//
|
||
|
|
// Non-nil inputs must all have the same length, and occupy a memory span not
|
||
|
|
// exceeding 32 bits.
|
||
|
|
func Sum(data [Lanes][]byte) [Lanes][Size]byte {
|
||
|
|
// Determine lane configuration.
|
||
|
|
cfg, err := config(data)
|
||
|
|
if err != nil {
|
||
|
|
panic(err)
|
||
|
|
}
|
||
|
|
|
||
|
|
// Initialize hash.
|
||
|
|
var h [4][Lanes]uint32
|
||
|
|
for _, l := range cfg.active {
|
||
|
|
h[0][l] = 0x67452301
|
||
|
|
h[1][l] = 0xefcdab89
|
||
|
|
h[2][l] = 0x98badcfe
|
||
|
|
h[3][l] = 0x10325476
|
||
|
|
}
|
||
|
|
|
||
|
|
// Consume full blocks.
|
||
|
|
base, n := cfg.base, cfg.n
|
||
|
|
for ; n >= BlockSize; n -= BlockSize {
|
||
|
|
block(&h, base, &cfg.offsets, cfg.mask)
|
||
|
|
base += BlockSize
|
||
|
|
}
|
||
|
|
|
||
|
|
// Final block.
|
||
|
|
var last [Lanes][]byte
|
||
|
|
var buffer [Lanes * BlockSize]byte
|
||
|
|
base = dataptr(buffer[:])
|
||
|
|
var offsets [Lanes]uint32
|
||
|
|
for _, l := range cfg.active {
|
||
|
|
last[l] = buffer[l*BlockSize : (l+1)*BlockSize]
|
||
|
|
offsets[l] = uint32(l * BlockSize)
|
||
|
|
copy(last[l], data[l][cfg.n-n:])
|
||
|
|
last[l][n] = 0x80
|
||
|
|
}
|
||
|
|
|
||
|
|
if n >= 56 {
|
||
|
|
block(&h, base, &offsets, cfg.mask)
|
||
|
|
for i := range buffer {
|
||
|
|
buffer[i] = 0
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
for _, l := range cfg.active {
|
||
|
|
binary.LittleEndian.PutUint64(last[l][56:], uint64(8*cfg.n))
|
||
|
|
}
|
||
|
|
block(&h, base, &offsets, cfg.mask)
|
||
|
|
|
||
|
|
// Write into byte array.
|
||
|
|
var digest [Lanes][Size]byte
|
||
|
|
for _, l := range cfg.active {
|
||
|
|
for i := 0; i < 4; i++ {
|
||
|
|
binary.LittleEndian.PutUint32(digest[l][4*i:], h[i][l])
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
return digest
|
||
|
|
}
|
||
|
|
|
||
|
|
// lanes represents the configuration of the 16 data lanes of an MD5
|
||
|
|
// computation.
|
||
|
|
type lanes struct {
|
||
|
|
n int // length of all active (non-nil) lanes
|
||
|
|
active []int // indexes of active lanes
|
||
|
|
mask uint16 // mask of active lanes
|
||
|
|
base uintptr // base pointer
|
||
|
|
offsets [Lanes]uint32 // offset of data lanes relative to base
|
||
|
|
}
|
||
|
|
|
||
|
|
// config determines the lane configuration for the provided data. Returns an
|
||
|
|
// error if there are no active lanes, there's a length mismatch among active
|
||
|
|
// lanes, or the data spans a memory region larger than 32-bits.
|
||
|
|
func config(data [Lanes][]byte) (*lanes, error) {
|
||
|
|
cfg := &lanes{}
|
||
|
|
|
||
|
|
// Populate active lanes, and ensure they're all the same length.
|
||
|
|
for l, d := range data {
|
||
|
|
if d != nil {
|
||
|
|
cfg.active = append(cfg.active, l)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
if len(cfg.active) == 0 {
|
||
|
|
return nil, errors.New("no active lanes")
|
||
|
|
}
|
||
|
|
|
||
|
|
cfg.n = len(data[cfg.active[0]])
|
||
|
|
for _, l := range cfg.active {
|
||
|
|
cfg.mask |= 1 << l
|
||
|
|
if len(data[l]) != cfg.n {
|
||
|
|
return nil, errors.New("length mismatch")
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// Compute base pointer and lane offsets.
|
||
|
|
cfg.base = ^uintptr(0)
|
||
|
|
for _, l := range cfg.active {
|
||
|
|
ptr := dataptr(data[l])
|
||
|
|
if ptr < cfg.base {
|
||
|
|
cfg.base = ptr
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
for _, l := range cfg.active {
|
||
|
|
ptr := dataptr(data[l])
|
||
|
|
offset := ptr - cfg.base
|
||
|
|
if offset > math.MaxUint32 {
|
||
|
|
return nil, errors.New("input data exceed 32-bit memory region")
|
||
|
|
}
|
||
|
|
cfg.offsets[l] = uint32(offset)
|
||
|
|
}
|
||
|
|
|
||
|
|
return cfg, nil
|
||
|
|
}
|
||
|
|
|
||
|
|
// dataptr extracts the data pointer from the given slice.
|
||
|
|
func dataptr(data []byte) uintptr {
|
||
|
|
hdr := (*reflect.SliceHeader)(unsafe.Pointer(&data))
|
||
|
|
return hdr.Data
|
||
|
|
}
|