diff --git a/internal/gen/gen.go b/internal/gen/gen.go new file mode 100644 index 0000000..acc26a5 --- /dev/null +++ b/internal/gen/gen.go @@ -0,0 +1,34 @@ +package gen + +import ( + "fmt" + "io" + + "github.com/mmcloughlin/avo/internal/inst" +) + +type Interface interface { + Generate(io.Writer, []*inst.Instruction) error +} + +type Func func(io.Writer, []*inst.Instruction) error + +func (f Func) Generate(w io.Writer, is []*inst.Instruction) error { + return f(w, is) +} + +type printer struct { + w io.Writer + err error +} + +func (p *printer) printf(format string, args ...interface{}) { + if p.err != nil { + return + } + _, p.err = fmt.Fprintf(p.w, format, args...) +} + +func (p *printer) Err() error { + return p.err +} diff --git a/internal/gen/loadertest.go b/internal/gen/loadertest.go new file mode 100644 index 0000000..db3a74a --- /dev/null +++ b/internal/gen/loadertest.go @@ -0,0 +1,127 @@ +package gen + +import ( + "io" + "strings" + + "github.com/mmcloughlin/avo/internal/inst" +) + +type LoaderTest struct{} + +func (l LoaderTest) Generate(w io.Writer, is []*inst.Instruction) error { + p := &printer{w: w} + + p.printf("TEXT loadertest(SB), 0, $0\n") + + for _, i := range is { + p.printf("\t// %s %s\n", i.Opcode, i.Summary) + for _, f := range i.Forms { + as := args(f.Operands) + p.printf("\t// %#v\n", f.Operands) + if as == nil { + p.printf("\t// skip\n") + continue + } + p.printf("\t%s\t%s\n", i.Opcode, strings.Join(as, ", ")) + } + p.printf("\n") + } + + p.printf("\tRET\n") + + return p.Err() +} + +func args(ops []inst.Operand) []string { + as := make([]string, len(ops)) + for i, op := range ops { + a := arg(op.Type) + if a == "" { + return nil + } + as[i] = a + } + return as +} + +// arg generates an argument for an operand of the given type. +func arg(t string) string { + m := map[string]string{ + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + "r64": "R8", + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + } + return m[t] +} diff --git a/internal/inst/types.go b/internal/inst/types.go index 98e2c93..c0b10e2 100644 --- a/internal/inst/types.go +++ b/internal/inst/types.go @@ -1,8 +1,9 @@ package inst type Instruction struct { - Opcode string - Forms []Form + Opcode string + Summary string + Forms []Form } type Form struct { @@ -22,3 +23,33 @@ const ( W Action = 0x2 RW Action = R | W ) + +func ActionFromReadWrite(r, w bool) Action { + var a Action + if r { + a |= R + } + if w { + a |= W + } + return a +} + +func (a Action) Read() bool { + return (a & R) != 0 +} + +func (a Action) Write() bool { + return (a & W) != 0 +} + +func (a Action) String() string { + s := "" + if a.Read() { + s += "r" + } + if a.Write() { + s += "w" + } + return s +} diff --git a/internal/load/ifind.sh b/internal/load/ifind.sh new file mode 100755 index 0000000..c09e308 --- /dev/null +++ b/internal/load/ifind.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +inst=$1 + +eval $(go env) + +section() { + echo '------------------------------------------' + echo $1 + echo '------------------------------------------' +} + +section 'stdlib cmd/' +grep -iR ${inst} ${GOROOT}/src/cmd/{asm,internal/obj/x86} + +section 'x/arch/x86 repo' +grep -iR ${inst} ${GOPATH}/src/golang.org/x/arch/x86/ + +section '*_amd64.s files in stdlib' +find ${GOROOT}/src -name '*_amd64.s' | xargs grep -i ${inst} \ No newline at end of file diff --git a/internal/load/load.go b/internal/load/load.go new file mode 100644 index 0000000..098e18f --- /dev/null +++ b/internal/load/load.go @@ -0,0 +1,181 @@ +package load + +import ( + "path/filepath" + "strconv" + "strings" + + "github.com/mmcloughlin/avo/internal/inst" + "github.com/mmcloughlin/avo/internal/opcodescsv" + "github.com/mmcloughlin/avo/internal/opcodesxml" +) + +const ( + defaultCSVName = "x86.v0.2.csv" + defaultOpcodesXMLName = "x86_64.xml" +) + +type Loader struct { + X86CSVPath string + OpcodesXMLPath string + + alias map[opcodescsv.Alias]string +} + +func NewLoaderFromDataDir(dir string) *Loader { + return &Loader{ + X86CSVPath: filepath.Join(dir, defaultCSVName), + OpcodesXMLPath: filepath.Join(dir, defaultOpcodesXMLName), + } +} + +func (l *Loader) Load() ([]*inst.Instruction, error) { + if err := l.init(); err != nil { + return nil, err + } + + // Load Opcodes XML file. + iset, err := opcodesxml.ReadFile(l.OpcodesXMLPath) + if err != nil { + return nil, err + } + + // Load opcodes XML data, grouped by Go opcode. + im := map[string]*inst.Instruction{} + for _, i := range iset.Instructions { + for _, f := range i.Forms { + if !l.include(f) { + continue + } + + opcode := l.goname(f) + + if im[opcode] == nil { + im[opcode] = &inst.Instruction{ + Opcode: opcode, + Summary: i.Summary, + } + } + + im[opcode].Forms = append(im[opcode].Forms, l.form(f)) + } + } + + // Convert to a slice to return. + is := make([]*inst.Instruction, 0, len(im)) + for _, i := range im { + is = append(is, i) + } + + return is, nil +} + +func (l *Loader) init() error { + icsv, err := opcodescsv.ReadFile(l.X86CSVPath) + if err != nil { + return err + } + + l.alias, err = opcodescsv.BuildAliasMap(icsv) + if err != nil { + return err + } + + return nil +} + +// include decides whether to include the instruction form in the avo listing. +// This discards some opcodes that are not supported in Go. +func (l Loader) include(f opcodesxml.Form) bool { + // Exclude certain ISAs simply not present in Go. + for _, isa := range f.ISA { + switch isa.ID { + case "TBM", "CLZERO", "MONITORX", "FEMMS": + return false + } + } + + // x86 csv contains a number of CMOV* instructions which are actually not valid + // Go instructions. The valid Go forms should have different opcodes from GNU. + // Therefore a decent "heuristic" is CMOV* instructions that do not have + // aliases. + if strings.HasPrefix(f.GASName, "cmov") && l.lookupAlias(f) == "" { + return false + } + + // Some specific exclusions. + switch f.GASName { + case "callq": + return false + } + + return true +} + +func (l Loader) lookupAlias(f opcodesxml.Form) string { + a := opcodescsv.Alias{Opcode: f.GASName, DataSize: datasize(f)} + return l.alias[a] +} + +func (l Loader) goname(f opcodesxml.Form) string { + // Use go opcode from Opcodes XML where available. + if f.GoName != "" { + return f.GoName + } + + // Return alias if available. + if a := l.lookupAlias(f); a != "" { + return a + } + + n := strings.ToUpper(f.GASName) + + // Some need data sizes added to them. + // TODO(mbm): is there a better way of determining which ones these are? + s := datasize(f) + suffix := map[int]string{16: "W", 32: "L", 64: "Q"} + switch n { + case "RDRAND", "RDSEED": + n += suffix[s] + } + + return n +} + +func (l Loader) form(f opcodesxml.Form) inst.Form { + return inst.Form{ + Operands: operands(f.Operands), + } +} + +// operands maps Opcodes XML operands to avo format. +func operands(ops []opcodesxml.Operand) []inst.Operand { + r := make([]inst.Operand, 0, len(ops)) + for _, op := range ops { + r = append(r, operand(op)) + } + return r +} + +// operand maps an Opcodes XML operand to avo format. +func operand(op opcodesxml.Operand) inst.Operand { + return inst.Operand{ + Type: op.Type, + Action: inst.ActionFromReadWrite(op.Input, op.Output), + } +} + +// datasize (intelligently) guesses the datasize of an instruction form. +func datasize(f opcodesxml.Form) int { + for _, op := range f.Operands { + if !op.Output { + continue + } + for s := 8; s <= 64; s *= 2 { + if strings.HasSuffix(op.Type, strconv.Itoa(s)) { + return s + } + } + } + return 0 +} diff --git a/internal/load/load_test.go b/internal/load/load_test.go new file mode 100644 index 0000000..3cfcef8 --- /dev/null +++ b/internal/load/load_test.go @@ -0,0 +1,31 @@ +package load_test + +import ( + "bytes" + "testing" + + "github.com/mmcloughlin/avo/internal/gen" + "github.com/mmcloughlin/avo/internal/inst" + "github.com/mmcloughlin/avo/internal/load" + "github.com/mmcloughlin/avo/internal/test" +) + +func Load(t *testing.T) []*inst.Instruction { + t.Helper() + l := load.NewLoaderFromDataDir("testdata") + is, err := l.Load() + if err != nil { + t.Fatal(err) + } + return is +} + +func TestAssembles(t *testing.T) { + is := Load(t) + + g := &gen.LoaderTest{} + var buf bytes.Buffer + g.Generate(&buf, is) + + test.Assembles(t, buf.Bytes()) +} diff --git a/internal/load/testdata b/internal/load/testdata new file mode 120000 index 0000000..4909e06 --- /dev/null +++ b/internal/load/testdata @@ -0,0 +1 @@ +../data \ No newline at end of file diff --git a/internal/opcodescsv/analysis.go b/internal/opcodescsv/analysis.go new file mode 100644 index 0000000..c813a46 --- /dev/null +++ b/internal/opcodescsv/analysis.go @@ -0,0 +1,38 @@ +package opcodescsv + +import ( + "strconv" + "strings" + + "golang.org/x/arch/x86/x86csv" +) + +type Alias struct { + Opcode string + DataSize int +} + +// BuildAliasMap constructs a map from AT&T/GNU/Intel to Go syntax. +func BuildAliasMap(is []*x86csv.Inst) (map[Alias]string, error) { + m := map[Alias]string{} + for _, i := range is { + s, err := datasize(i.DataSize) + if err != nil { + return nil, err + } + + for _, alt := range []string{i.IntelOpcode(), i.GNUOpcode()} { + if strings.ToUpper(alt) != i.GoOpcode() { + m[Alias{Opcode: strings.ToLower(alt), DataSize: s}] = i.GoOpcode() + } + } + } + return m, nil +} + +func datasize(s string) (int, error) { + if s == "" { + return 0, nil + } + return strconv.Atoi(s) +} diff --git a/internal/opcodescsv/io.go b/internal/opcodescsv/io.go new file mode 100644 index 0000000..78275c3 --- /dev/null +++ b/internal/opcodescsv/io.go @@ -0,0 +1,19 @@ +package opcodescsv + +import ( + "os" + + "golang.org/x/arch/x86/x86csv" +) + +// ReadFile reads the given x86 CSV file. +func ReadFile(filename string) ([]*x86csv.Inst, error) { + f, err := os.Open(filename) + if err != nil { + return nil, err + } + defer f.Close() + + r := x86csv.NewReader(f) + return r.ReadAll() +} diff --git a/internal/test/utils.go b/internal/test/utils.go new file mode 100644 index 0000000..f56babe --- /dev/null +++ b/internal/test/utils.go @@ -0,0 +1,69 @@ +package test + +import ( + "io/ioutil" + "os" + "os/exec" + "path/filepath" + "runtime" + "testing" +) + +// Assembles asserts that the given assembly code passes the go assembler. +func Assembles(t *testing.T, asm []byte) { + t.Helper() + + dir, clean := TempDir(t) + defer clean() + + asmfilename := filepath.Join(dir, "asm.s") + if err := ioutil.WriteFile(asmfilename, asm, 0600); err != nil { + t.Fatal(err) + } + + objfilename := filepath.Join(dir, "asm.o") + + goexec(t, "tool", "asm", "-e", "-o", objfilename, asmfilename) +} + +// TempDir creates a temp directory. Returns the path to the directory and a +// cleanup function. +func TempDir(t *testing.T) (string, func()) { + t.Helper() + + dir, err := ioutil.TempDir("", "avo") + if err != nil { + t.Fatal(err) + } + + return dir, func() { + if err := os.RemoveAll(dir); err != nil { + t.Fatal(err) + } + } +} + +// gobin returns a best guess path to the "go" binary. +func gobin() string { + var exeSuffix string + if runtime.GOOS == "windows" { + exeSuffix = ".exe" + } + path := filepath.Join(runtime.GOROOT(), "bin", "go"+exeSuffix) + if _, err := os.Stat(path); err == nil { + return path + } + return "go" +} + +// goexec runs a "go" command and checks the output. +func goexec(t *testing.T, arg ...string) { + t.Helper() + cmd := exec.Command(gobin(), arg...) + t.Logf("exec: %s", cmd.Args) + b, err := cmd.CombinedOutput() + t.Logf("output:\n%s\n", string(b)) + if err != nil { + t.Fatal(err) + } +}