all: AVX-512 (#217)

Extends avo to support most AVX-512 instruction sets.

The instruction type is extended to support suffixes. The K family of opmask
registers is added to the register package, and the operand package is updated
to support the new operand types. Move instruction deduction in `Load` and
`Store` is extended to support KMOV* and VMOV* forms.

Internal code generation packages were overhauled. Instruction database loading
required various messy changes to account for the additional complexities of the
AVX-512 instruction sets. The internal/api package was added to introduce a
separation between instruction forms in the database, and the functions avo
provides to create them. This was required since with instruction suffixes there
is no longer a one-to-one mapping between instruction constructors and opcodes.

AVX-512 bloated generated source code size substantially, initially increasing
compilation and CI test times to an unacceptable level. Two changes were made to
address this:

1.  Instruction constructors in the `x86` package moved to an optab-based
    approach. This compiles substantially faster than the verbose code
    generation we had before.

2.  The most verbose code-generated tests are moved under build tags and
    limited to a stress test mode. Stress test builds are run on
    schedule but not in regular CI.

An example of AVX-512 accelerated 16-lane MD5 is provided to demonstrate and
test the new functionality.

Updates #20 #163 #229

Co-authored-by: Vaughn Iverson <vsivsi@yahoo.com>
This commit is contained in:
Michael McLoughlin
2021-11-12 18:35:36 -08:00
parent 2867bd7e01
commit b76e849b5c
71 changed files with 257395 additions and 61474 deletions

View File

@@ -1,8 +1,11 @@
package gen
import (
"bytes"
"fmt"
"strings"
"github.com/mmcloughlin/avo/internal/api"
"github.com/mmcloughlin/avo/internal/prnt"
"github.com/mmcloughlin/avo/printer"
@@ -21,93 +24,173 @@ func NewCtorsTest(cfg printer.Config) Interface {
func (c *ctorstest) Generate(is []inst.Instruction) ([]byte, error) {
c.Printf("// %s\n\n", c.cfg.GeneratedWarning())
c.BuildTag("!integration")
c.NL()
c.Printf("package x86\n\n")
c.Printf("import (\n")
c.Printf("\t\"testing\"\n")
c.Printf("\t\"math\"\n")
c.Printf("\t\"testing\"\n")
c.NL()
c.Printf("\t\"%s/reg\"\n", pkg)
c.Printf("\t\"%s/operand\"\n", pkg)
c.Printf("\t%q\n", api.ImportPath(api.OperandPackage))
c.Printf("\t%q\n", api.ImportPath(api.RegisterPackage))
c.Printf(")\n\n")
for _, i := range is {
c.instruction(i)
DeclareTestArguments(&c.Generator)
fns := api.InstructionsFunctions(is)
for _, fn := range fns {
c.function(fn)
}
return c.Result()
}
func (c *ctorstest) instruction(i inst.Instruction) {
c.Printf("func Test%sValidForms(t *testing.T) {", i.Opcode)
for _, f := range i.Forms {
name := strings.Join(f.Signature(), "_")
c.Printf("t.Run(\"form=%s\", func(t *testing.T) {\n", name)
for _, args := range validFormArgs(f) {
c.Printf("if _, err := %s(%s)", i.Opcode, strings.Join(args, ", "))
c.Printf("; err != nil { t.Fatal(err) }\n")
}
c.Printf("})\n")
func (c *ctorstest) function(fn *api.Function) {
c.Printf("func Test%sValidFormsNoError(t *testing.T) {", fn.Name())
for _, f := range fn.Forms {
s := TestSignature(f)
c.Printf("if _, err := %s(%s); err != nil { t.Fatal(err) }\n", fn.Name(), s.Arguments())
}
c.Printf("}\n\n")
}
func validFormArgs(f inst.Form) [][]string {
n := len(f.Operands)
args := make([][]string, n)
for i, op := range f.Operands {
valid, ok := validArgs[op.Type]
if !ok {
panic("missing operands for type " + op.Type)
}
args[i] = valid
type ctorsstress struct {
cfg printer.Config
prnt.Generator
}
// NewCtorsStress autogenerates stress tests for instruction constructors.
func NewCtorsStress(cfg printer.Config) Interface {
return GoFmt(&ctorsstress{cfg: cfg})
}
func (c *ctorsstress) Generate(is []inst.Instruction) ([]byte, error) {
c.Printf("// %s\n\n", c.cfg.GeneratedWarning())
c.BuildTag("stress")
c.NL()
c.Printf("package x86\n\n")
c.Printf("import (\n")
c.Printf("\t\"reflect\"\n")
c.Printf("\t\"testing\"\n")
c.NL()
c.Printf("\t%q\n", api.ImportPath(api.IRPackage))
c.Printf("\t%q\n", api.ImportPath(api.OperandPackage))
c.Printf("\t%q\n", api.ImportPath(api.RegisterPackage))
c.Printf(")\n\n")
fns := api.InstructionsFunctions(is)
for _, fn := range fns {
c.function(fn)
}
return cross(args)
return c.Result()
}
var validArgs = map[string][]string{
// Immediates
"1": {"operand.Imm(1)"},
"3": {"operand.Imm(3)"},
"imm2u": {"operand.Imm(1)", "operand.Imm(3)"},
"imm8": {"operand.Imm(math.MaxInt8)"},
"imm16": {"operand.Imm(math.MaxInt16)"},
"imm32": {"operand.Imm(math.MaxInt32)"},
"imm64": {"operand.Imm(math.MaxInt64)"},
// Registers
"al": {"reg.AL"},
"cl": {"reg.CL"},
"ax": {"reg.AX"},
"eax": {"reg.EAX"},
"rax": {"reg.RAX"},
"r8": {"reg.CH", "reg.BL", "reg.R13B"},
"r16": {"reg.CX", "reg.R9W"},
"r32": {"reg.R10L"},
"r64": {"reg.R11"},
"xmm0": {"reg.X0"},
"xmm": {"reg.X7"},
"ymm": {"reg.Y15"},
// Memory
"m": {"operand.Mem{Base: reg.BX, Index: reg.CX, Scale: 2}"},
"m8": {"operand.Mem{Base: reg.BL, Index: reg.CH, Scale: 1}"},
"m16": {"operand.Mem{Base: reg.BX, Index: reg.CX, Scale: 2}"},
"m32": {"operand.Mem{Base: reg.EBX, Index: reg.ECX, Scale: 4}"},
"m64": {"operand.Mem{Base: reg.RBX, Index: reg.RCX, Scale: 8}"},
"m128": {"operand.Mem{Base: reg.RBX, Index: reg.RCX, Scale: 8}"},
"m256": {"operand.Mem{Base: reg.RBX, Index: reg.RCX, Scale: 8}"},
// Vector memory
"vm32x": {"operand.Mem{Base: reg.R13, Index: reg.X4, Scale: 1}"},
"vm64x": {"operand.Mem{Base: reg.R13, Index: reg.X8, Scale: 1}"},
"vm32y": {"operand.Mem{Base: reg.R13, Index: reg.Y4, Scale: 1}"},
"vm64y": {"operand.Mem{Base: reg.R13, Index: reg.Y8, Scale: 1}"},
// Relative
"rel8": {"operand.Rel(math.MaxInt8)"},
"rel32": {"operand.Rel(math.MaxInt32)", "operand.LabelRef(\"lbl\")"},
func (c *ctorsstress) function(fn *api.Function) {
c.Printf("func Test%sValidFormsCorrectInstruction(t *testing.T) {", fn.Name())
for _, f := range fn.Forms {
name := strings.Join(f.Signature(), "_")
c.Printf("t.Run(\"form=%s\", func(t *testing.T) {\n", name)
s := TestSignature(f)
c.Printf("expect := &%s\n", construct(fn, f, s))
c.Printf("got, err := %s(%s);\n", fn.Name(), s.Arguments())
c.Printf("if err != nil { t.Fatal(err) }\n")
c.Printf("if !reflect.DeepEqual(got, expect) { t.Fatal(\"mismatch\") }\n")
c.Printf("})\n")
}
c.Printf("}\n\n")
}
type ctorsbench struct {
cfg printer.Config
prnt.Generator
}
// NewCtorsBench autogenerates a benchmark for the instruction constructors.
func NewCtorsBench(cfg printer.Config) Interface {
return GoFmt(&ctorsbench{cfg: cfg})
}
func (c *ctorsbench) Generate(is []inst.Instruction) ([]byte, error) {
c.Printf("// %s\n\n", c.cfg.GeneratedWarning())
c.BuildTag("stress")
c.NL()
c.Printf("package x86\n\n")
c.Printf("import (\n")
c.Printf("\t\"time\"\n")
c.Printf("\t\"testing\"\n")
c.Printf(")\n\n")
c.Printf("func BenchmarkConstructors(b *testing.B) {\n")
c.Printf("start := time.Now()\n")
c.Printf("for i := 0; i < b.N; i++ {\n")
n := 0
for _, fn := range api.InstructionsFunctions(is) {
for _, f := range fn.Forms {
n++
c.Printf("%s(%s)\n", fn.Name(), TestSignature(f).Arguments())
}
}
c.Printf("}\n")
c.Printf("elapsed := time.Since(start)\n")
c.Printf("\tb.ReportMetric(%d * float64(b.N) / elapsed.Seconds(), \"inst/s\")\n", n)
c.Printf("}\n\n")
return c.Result()
}
func construct(fn *api.Function, f inst.Form, s api.Signature) string {
buf := bytes.NewBuffer(nil)
fmt.Fprintf(buf, "ir.Instruction{\n")
fmt.Fprintf(buf, "\tOpcode: %#v,\n", fn.Instruction.Opcode)
if len(fn.Suffixes) > 0 {
fmt.Fprintf(buf, "\tSuffixes: %#v,\n", fn.Suffixes.Strings())
}
fmt.Fprintf(buf, "\tOperands: %s,\n", s.ParameterSlice())
// Inputs.
fmt.Fprintf(buf, "\tInputs: %s,\n", operandsWithAction(f, inst.R, s))
// Outputs.
fmt.Fprintf(buf, "\tOutputs: %s,\n", operandsWithAction(f, inst.W, s))
// ISAs.
if len(f.ISA) > 0 {
fmt.Fprintf(buf, "\tISA: %#v,\n", f.ISA)
}
// Branch variables.
if fn.Instruction.IsTerminal() {
fmt.Fprintf(buf, "\tIsTerminal: true,\n")
}
if fn.Instruction.IsBranch() {
fmt.Fprintf(buf, "\tIsBranch: true,\n")
fmt.Fprintf(buf, "\tIsConditional: %#v,\n", fn.Instruction.IsConditionalBranch())
}
// Cancelling inputs.
if f.CancellingInputs {
fmt.Fprintf(buf, "\tCancellingInputs: true,\n")
}
fmt.Fprintf(buf, "}")
return buf.String()
}
func operandsWithAction(f inst.Form, a inst.Action, s api.Signature) string {
var opexprs []string
for i, op := range f.Operands {
if op.Action.ContainsAny(a) {
opexprs = append(opexprs, s.ParameterName(i))
}
}
for _, op := range f.ImplicitOperands {
if op.Action.ContainsAny(a) {
opexprs = append(opexprs, api.ImplicitRegister(op.Register))
}
}
if len(opexprs) == 0 {
return "nil"
}
return fmt.Sprintf("[]%s{%s}", api.OperandType, strings.Join(opexprs, ", "))
}