all: AVX-512 (#217)
Extends avo to support most AVX-512 instruction sets.
The instruction type is extended to support suffixes. The K family of opmask
registers is added to the register package, and the operand package is updated
to support the new operand types. Move instruction deduction in `Load` and
`Store` is extended to support KMOV* and VMOV* forms.
Internal code generation packages were overhauled. Instruction database loading
required various messy changes to account for the additional complexities of the
AVX-512 instruction sets. The internal/api package was added to introduce a
separation between instruction forms in the database, and the functions avo
provides to create them. This was required since with instruction suffixes there
is no longer a one-to-one mapping between instruction constructors and opcodes.
AVX-512 bloated generated source code size substantially, initially increasing
compilation and CI test times to an unacceptable level. Two changes were made to
address this:
1. Instruction constructors in the `x86` package moved to an optab-based
approach. This compiles substantially faster than the verbose code
generation we had before.
2. The most verbose code-generated tests are moved under build tags and
limited to a stress test mode. Stress test builds are run on
schedule but not in regular CI.
An example of AVX-512 accelerated 16-lane MD5 is provided to demonstrate and
test the new functionality.
Updates #20 #163 #229
Co-authored-by: Vaughn Iverson <vsivsi@yahoo.com>
This commit is contained in:
@@ -132,17 +132,18 @@ func (p *goasm) flush() {
|
||||
// considered in this calculation.
|
||||
width := 0
|
||||
for _, i := range p.instructions {
|
||||
if len(i.Operands) > 0 && len(i.Opcode) > width {
|
||||
width = len(i.Opcode)
|
||||
opcode := i.OpcodeWithSuffixes()
|
||||
if len(i.Operands) > 0 && len(opcode) > width {
|
||||
width = len(opcode)
|
||||
}
|
||||
}
|
||||
|
||||
// Output instruction block.
|
||||
for _, i := range p.instructions {
|
||||
if len(i.Operands) > 0 {
|
||||
p.Printf("\t%-*s%s\n", width+1, i.Opcode, joinOperands(i.Operands))
|
||||
p.Printf("\t%-*s%s\n", width+1, i.OpcodeWithSuffixes(), joinOperands(i.Operands))
|
||||
} else {
|
||||
p.Printf("\t%s\n", i.Opcode)
|
||||
p.Printf("\t%s\n", i.OpcodeWithSuffixes())
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -117,3 +117,21 @@ func TestAlignmentNoOperands(t *testing.T) {
|
||||
"",
|
||||
})
|
||||
}
|
||||
|
||||
func TestOpcodeSuffixes(t *testing.T) {
|
||||
ctx := build.NewContext()
|
||||
ctx.Function("suffixes")
|
||||
ctx.SignatureExpr("func()")
|
||||
ctx.VADDPD_RD_SAE_Z(reg.Z1, reg.Z2, reg.K1, reg.Z3)
|
||||
ctx.ADDQ(reg.RAX, reg.RBX)
|
||||
|
||||
AssertPrintsLines(t, ctx, printer.NewGoAsm, []string{
|
||||
"// Code generated by avo. DO NOT EDIT.",
|
||||
"",
|
||||
"// func suffixes()",
|
||||
"TEXT ·suffixes(SB), $0",
|
||||
"\tVADDPD.RD_SAE.Z Z1, Z2, K1, Z3",
|
||||
"\tADDQ AX, BX", // suffixes count towards alignment width
|
||||
"",
|
||||
})
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user