all: AVX-512 (#217)

Extends avo to support most AVX-512 instruction sets.

The instruction type is extended to support suffixes. The K family of opmask
registers is added to the register package, and the operand package is updated
to support the new operand types. Move instruction deduction in `Load` and
`Store` is extended to support KMOV* and VMOV* forms.

Internal code generation packages were overhauled. Instruction database loading
required various messy changes to account for the additional complexities of the
AVX-512 instruction sets. The internal/api package was added to introduce a
separation between instruction forms in the database, and the functions avo
provides to create them. This was required since with instruction suffixes there
is no longer a one-to-one mapping between instruction constructors and opcodes.

AVX-512 bloated generated source code size substantially, initially increasing
compilation and CI test times to an unacceptable level. Two changes were made to
address this:

1.  Instruction constructors in the `x86` package moved to an optab-based
    approach. This compiles substantially faster than the verbose code
    generation we had before.

2.  The most verbose code-generated tests are moved under build tags and
    limited to a stress test mode. Stress test builds are run on
    schedule but not in regular CI.

An example of AVX-512 accelerated 16-lane MD5 is provided to demonstrate and
test the new functionality.

Updates #20 #163 #229

Co-authored-by: Vaughn Iverson <vsivsi@yahoo.com>
This commit is contained in:
Michael McLoughlin
2021-11-12 18:35:36 -08:00
parent 2867bd7e01
commit b76e849b5c
71 changed files with 257395 additions and 61474 deletions

View File

@@ -1,10 +1,7 @@
package gen
import (
"bytes"
"fmt"
"strings"
"github.com/mmcloughlin/avo/internal/api"
"github.com/mmcloughlin/avo/internal/inst"
"github.com/mmcloughlin/avo/internal/prnt"
"github.com/mmcloughlin/avo/printer"
@@ -15,9 +12,9 @@ type ctors struct {
prnt.Generator
}
// NewCtors will build instruction constructors. Each constructor will check
// that the provided operands match one of the allowed instruction forms. If so
// it will return an Instruction object that can be added to an avo Function.
// NewCtors will build instruction constructors. Each constructor delegates to
// the optab-based instruction builder, providing it with a candidate list of
// forms to match against.
func NewCtors(cfg printer.Config) Interface {
return GoFmt(&ctors{cfg: cfg})
}
@@ -26,107 +23,30 @@ func (c *ctors) Generate(is []inst.Instruction) ([]byte, error) {
c.Printf("// %s\n\n", c.cfg.GeneratedWarning())
c.Printf("package x86\n\n")
c.Printf("import (\n")
c.Printf("\t\"errors\"\n")
c.NL()
c.Printf("\tintrep \"%s/ir\"\n", pkg)
c.Printf("\t\"%s/reg\"\n", pkg)
c.Printf("\t\"%s/operand\"\n", pkg)
c.Printf("\tintrep %q\n", api.ImportPath(api.IRPackage))
c.Printf("\t%q\n", api.ImportPath(api.OperandPackage))
c.Printf(")\n\n")
for _, i := range is {
c.instruction(i)
fns := api.InstructionsFunctions(is)
table := NewTable(is)
for _, fn := range fns {
c.function(fn, table)
}
return c.Result()
}
func (c *ctors) instruction(i inst.Instruction) {
c.Comment(doc(i)...)
func (c *ctors) function(fn *api.Function, table *Table) {
c.Comment(fn.Doc()...)
s := params(i)
s := fn.Signature()
c.Printf("func %s(%s) (*intrep.Instruction, error) {\n", i.Opcode, s.ParameterList())
c.forms(i, s)
c.Printf("func %s(%s) (*intrep.Instruction, error) {\n", fn.Name(), s.ParameterList())
c.Printf(
"return build(%s.Forms(), %s, %s)\n",
table.OpcodeConst(fn.Instruction.Opcode),
table.SuffixesConst(fn.Suffixes),
s.ParameterSlice(),
)
c.Printf("}\n\n")
}
func (c *ctors) forms(i inst.Instruction, s signature) {
if i.IsNiladic() {
if len(i.Forms) != 1 {
c.AddError(fmt.Errorf("%s breaks assumption that niladic instructions have one form", i.Opcode))
}
c.Printf("return &%s, nil\n", construct(i, i.Forms[0], s))
return
}
c.Printf("switch {\n")
for _, f := range i.Forms {
var conds []string
if i.IsVariadic() {
checklen := fmt.Sprintf("%s == %d", s.Length(), len(f.Operands))
conds = append(conds, checklen)
}
for j, op := range f.Operands {
checktype := fmt.Sprintf("%s(%s)", checkername(op.Type), s.ParameterName(j))
conds = append(conds, checktype)
}
c.Printf("case %s:\n", strings.Join(conds, " && "))
c.Printf("return &%s, nil\n", construct(i, f, s))
}
c.Printf("}\n")
c.Printf("return nil, errors.New(\"%s: bad operands\")\n", i.Opcode)
}
func construct(i inst.Instruction, f inst.Form, s signature) string {
buf := bytes.NewBuffer(nil)
fmt.Fprintf(buf, "intrep.Instruction{\n")
fmt.Fprintf(buf, "\tOpcode: %#v,\n", i.Opcode)
fmt.Fprintf(buf, "\tOperands: %s,\n", s.ParameterSlice())
// Input output.
fmt.Fprintf(buf, "\tInputs: %s,\n", operandsWithAction(f, inst.R, s))
fmt.Fprintf(buf, "\tOutputs: %s,\n", operandsWithAction(f, inst.W, s))
// ISAs.
if len(f.ISA) > 0 {
fmt.Fprintf(buf, "\tISA: %#v,\n", f.ISA)
}
// Branch variables.
if i.IsTerminal() {
fmt.Fprintf(buf, "\tIsTerminal: true,\n")
}
if i.IsBranch() {
fmt.Fprintf(buf, "\tIsBranch: true,\n")
fmt.Fprintf(buf, "\tIsConditional: %#v,\n", i.IsConditionalBranch())
}
// Cancelling inputs.
if f.CancellingInputs {
fmt.Fprintf(buf, "\tCancellingInputs: true,\n")
}
fmt.Fprintf(buf, "}")
return buf.String()
}
func operandsWithAction(f inst.Form, a inst.Action, s signature) string {
opexprs := []string{}
for i, op := range f.Operands {
if op.Action.Contains(a) {
opexprs = append(opexprs, s.ParameterName(i))
}
}
for _, op := range f.ImplicitOperands {
if op.Action.Contains(a) {
opexprs = append(opexprs, implicitRegister(op.Register))
}
}
return fmt.Sprintf("[]%s{%s}", operandType, strings.Join(opexprs, ", "))
}