all: AVX-512 (#217)

Extends avo to support most AVX-512 instruction sets.

The instruction type is extended to support suffixes. The K family of opmask
registers is added to the register package, and the operand package is updated
to support the new operand types. Move instruction deduction in `Load` and
`Store` is extended to support KMOV* and VMOV* forms.

Internal code generation packages were overhauled. Instruction database loading
required various messy changes to account for the additional complexities of the
AVX-512 instruction sets. The internal/api package was added to introduce a
separation between instruction forms in the database, and the functions avo
provides to create them. This was required since with instruction suffixes there
is no longer a one-to-one mapping between instruction constructors and opcodes.

AVX-512 bloated generated source code size substantially, initially increasing
compilation and CI test times to an unacceptable level. Two changes were made to
address this:

1.  Instruction constructors in the `x86` package moved to an optab-based
    approach. This compiles substantially faster than the verbose code
    generation we had before.

2.  The most verbose code-generated tests are moved under build tags and
    limited to a stress test mode. Stress test builds are run on
    schedule but not in regular CI.

An example of AVX-512 accelerated 16-lane MD5 is provided to demonstrate and
test the new functionality.

Updates #20 #163 #229

Co-authored-by: Vaughn Iverson <vsivsi@yahoo.com>
This commit is contained in:
Michael McLoughlin
2021-11-12 18:35:36 -08:00
parent 2867bd7e01
commit b76e849b5c
71 changed files with 257395 additions and 61474 deletions

View File

@@ -1,4 +1,5 @@
package x86
//go:generate avogen -output zoptab.go optab
//go:generate avogen -output zctors.go ctors
//go:generate avogen -output zctors_test.go ctorstest

100
x86/inst_test.go Normal file
View File

@@ -0,0 +1,100 @@
package x86
import (
"reflect"
"testing"
"github.com/mmcloughlin/avo/ir"
"github.com/mmcloughlin/avo/operand"
"github.com/mmcloughlin/avo/reg"
)
func TestCases(t *testing.T) {
must := MustInstruction(t)
m128 := operand.Mem{Base: reg.RAX}
cases := []struct {
Name string
Instruction *ir.Instruction
Expect *ir.Instruction
}{
// In the merge-masking case, the output register should also be an
// input. This test confirms that Z3 appears in the input operands list.
{
Name: "avx512_masking_merging_input_registers",
Instruction: must(VPADDD(reg.Z1, reg.Z2, reg.K1, reg.Z3)),
Expect: &ir.Instruction{
Opcode: "VPADDD",
Operands: []operand.Op{reg.Z1, reg.Z2, reg.K1, reg.Z3},
Inputs: []operand.Op{reg.Z1, reg.Z2, reg.K1, reg.Z3},
Outputs: []operand.Op{reg.Z3},
ISA: []string{"AVX512F"},
},
},
// In the zeroing-masking case, the output register is not an input.
// This test case is the same as above, but with the zeroing suffix. In
// this case Z3 should not be an input.
{
Name: "avx512_masking_zeroing_input_registers",
Instruction: must(VPADDD_Z(reg.Z1, reg.Z2, reg.K1, reg.Z3)),
Expect: &ir.Instruction{
Opcode: "VPADDD",
Suffixes: []string{"Z"},
Operands: []operand.Op{reg.Z1, reg.Z2, reg.K1, reg.Z3},
Inputs: []operand.Op{reg.Z1, reg.Z2, reg.K1}, // not Z3
Outputs: []operand.Op{reg.Z3},
ISA: []string{"AVX512F"},
},
},
// Many existing AVX instructions gained EVEX-encoded forms when AVX-512
// was added. In a previous broken implementation, this led to multiple
// forms of the same instruction in the database, both the VEX and EVEX
// encoded versions. This causes the computed ISA list to be wrong,
// since it can think AVX-512 is required when in fact the instruction
// existed before. These test cases confirm the correct ISA is selected.
{
Name: "vex_evex_xmm_xmm_xmm",
Instruction: must(VFMADD132PS(reg.X1, reg.X2, reg.X3)),
Expect: &ir.Instruction{
Opcode: "VFMADD132PS",
Operands: []operand.Op{reg.X1, reg.X2, reg.X3},
Inputs: []operand.Op{reg.X1, reg.X2, reg.X3},
Outputs: []operand.Op{reg.X3},
ISA: []string{"FMA3"}, // not AVX512F
},
},
{
Name: "vex_evex_m128_xmm_xmm",
Instruction: must(VFMADD132PS(m128, reg.X2, reg.X3)),
Expect: &ir.Instruction{
Opcode: "VFMADD132PS",
Operands: []operand.Op{m128, reg.X2, reg.X3},
Inputs: []operand.Op{m128, reg.X2, reg.X3},
Outputs: []operand.Op{reg.X3},
ISA: []string{"FMA3"}, // not AVX512F
},
},
}
for _, c := range cases {
c := c // scopelint
t.Run(c.Name, func(t *testing.T) {
if !reflect.DeepEqual(c.Instruction, c.Expect) {
t.Logf(" got = %#v", c.Instruction)
t.Logf("expect = %#v", c.Expect)
t.FailNow()
}
})
}
}
func MustInstruction(t *testing.T) func(*ir.Instruction, error) *ir.Instruction {
return func(i *ir.Instruction, err error) *ir.Instruction {
t.Helper()
if err != nil {
t.Fatal(err)
}
return i
}
}

130
x86/optab.go Normal file
View File

@@ -0,0 +1,130 @@
package x86
import (
"errors"
"github.com/mmcloughlin/avo/ir"
"github.com/mmcloughlin/avo/operand"
)
// build constructs an instruction object from a list of acceptable forms, and
// given input operands and suffixes.
func build(forms []form, suffixes sffxs, ops []operand.Op) (*ir.Instruction, error) {
for i := range forms {
f := &forms[i]
if f.match(suffixes, ops) {
return f.build(suffixes, ops), nil
}
}
return nil, errors.New("bad operands")
}
// form represents an instruction form.
type form struct {
Opcode opc
SuffixesClass sffxscls
Features feature
ISAs isas
Arity uint8
Operands oprnds
}
// feature is a flags enumeration type representing instruction properties.
type feature uint8
const (
featureTerminal feature = 1 << iota
featureBranch
featureConditionalBranch
featureCancellingInputs
)
// oprnds is a list of explicit and implicit operands of an instruction form.
// The size of the array is output by optab generator.
type oprnds [maxoperands]oprnd
// oprnd represents an explicit or implicit operand to an instruction form.
type oprnd struct {
Type uint8
Implicit bool
Action action
}
// action an instruction form applies to an operand.
type action uint8
const (
actionN action = iota
actionR
actionW
actionRW action = actionR | actionW
)
// Read reports if the action includes read.
func (a action) Read() bool { return (a & actionR) != 0 }
// Read reports if the action includes write.
func (a action) Write() bool { return (a & actionW) != 0 }
// match reports whether this form matches the given suffixes and operand
// list.
func (f *form) match(suffixes sffxs, ops []operand.Op) bool {
// Match suffix.
accept := f.SuffixesClass.SuffixesSet()
if !accept[suffixes] {
return false
}
// Match operands.
if len(ops) != int(f.Arity) {
return false
}
for i, op := range ops {
t := oprndtype(f.Operands[i].Type)
if !t.Match(op) {
return false
}
}
return true
}
// build the full instruction object for this form and the given suffixes and
// operands. Assumes the form already matches the inputs.
func (f *form) build(suffixes sffxs, ops []operand.Op) *ir.Instruction {
// Base instruction properties.
i := &ir.Instruction{
Opcode: f.Opcode.String(),
Suffixes: suffixes.Strings(),
Operands: ops,
IsTerminal: (f.Features & featureTerminal) != 0,
IsBranch: (f.Features & featureBranch) != 0,
IsConditional: (f.Features & featureConditionalBranch) != 0,
CancellingInputs: (f.Features & featureCancellingInputs) != 0,
ISA: f.ISAs.List(),
}
// Input/output operands.
for _, spec := range f.Operands {
if spec.Type == 0 {
break
}
var op operand.Op
if spec.Implicit {
op = implreg(spec.Type).Register()
} else {
op, ops = ops[0], ops[1:]
}
if spec.Action.Read() {
i.Inputs = append(i.Inputs, op)
}
if spec.Action.Write() {
i.Outputs = append(i.Outputs, op)
}
}
return i
}

10
x86/stress_test.go Normal file
View File

@@ -0,0 +1,10 @@
// Constructors test that rely on huge generated files that bloat compile time
// are limited to stress-test mode.
//go:build stress
// +build stress
package x86
//go:generate avogen -output zstress_test.go ctorsstress
//go:generate avogen -output zbench_test.go ctorsbench

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

15641
x86/zoptab.go Normal file

File diff suppressed because it is too large Load Diff