start at some basic passes

This commit is contained in:
Michael McLoughlin
2018-12-02 12:28:33 -08:00
parent 0ceb1c55a4
commit 43575d8b61
14 changed files with 7209 additions and 3904 deletions

46
ast.go
View File

@@ -1,5 +1,9 @@
package avo
import (
"github.com/mmcloughlin/avo/operand"
)
type Asm interface {
Asm() string
}
@@ -28,7 +32,26 @@ func (l Label) node() {}
// Instruction is a single instruction in a function.
type Instruction struct {
Opcode string
Operands []Operand
Operands []operand.Op
IsTerminal bool
IsBranch bool
IsConditional bool
// CFG.
Pred []*Instruction
Succ []*Instruction
}
func (i Instruction) TargetLabel() *Label {
if !i.IsBranch {
return nil
}
if ref, ok := i.Operands[0].(operand.LabelRef); ok {
lbl := Label(ref)
return &lbl
}
return nil
}
func (i Instruction) node() {}
@@ -46,7 +69,10 @@ func NewFile() *File {
type Function struct {
name string
params []Parameter
nodes []Node
Nodes []Node
// LabelTarget maps from label name to the following instruction.
LabelTarget map[Label]*Instruction
}
func NewFunction(name string) *Function {
@@ -55,7 +81,7 @@ func NewFunction(name string) *Function {
}
}
func (f *Function) AddInstruction(i Instruction) {
func (f *Function) AddInstruction(i *Instruction) {
f.AddNode(i)
}
@@ -64,7 +90,19 @@ func (f *Function) AddLabel(l Label) {
}
func (f *Function) AddNode(n Node) {
f.nodes = append(f.nodes, n)
f.Nodes = append(f.Nodes, n)
}
// Instructions returns just the list of instruction nodes.
func (f *Function) Instructions() []*Instruction {
var is []*Instruction
for _, n := range f.Nodes {
i, ok := n.(*Instruction)
if ok {
is = append(is, i)
}
}
return is
}
// Name returns the function name.

File diff suppressed because it is too large Load Diff

View File

@@ -16,7 +16,7 @@ func (b *build) Generate(is []inst.Instruction) ([]byte, error) {
b.Printf("package build\n\n")
b.Printf("import (\n")
b.Printf("\t\"%s\"\n", pkg)
b.Printf("\t\"%s/operand\"\n", pkg)
b.Printf("\t\"%s/x86\"\n", pkg)
b.Printf(")\n\n")

View File

@@ -42,7 +42,7 @@ func (c *ctors) instruction(i inst.Instruction) {
c.Printf("func %s(%s) (*avo.Instruction, error) {\n", i.Opcode, s.ParameterList())
c.checkargs(i, s)
c.Printf("\treturn &avo.Instruction{Opcode: %#v, Operands: %s}, nil\n", i.Opcode, s.ParameterSlice())
c.Printf("\treturn &%s, nil\n", construct(i, s))
c.Printf("}\n\n")
}
@@ -72,6 +72,19 @@ func (c *ctors) doc(i inst.Instruction) []string {
return lines
}
func construct(i inst.Instruction, s signature) string {
buf := bytes.NewBuffer(nil)
fmt.Fprintf(buf, "avo.Instruction{\n")
fmt.Fprintf(buf, "\tOpcode: %#v,\n", i.Opcode)
fmt.Fprintf(buf, "\tOperands: %s,\n", s.ParameterSlice())
if i.IsBranch() {
fmt.Fprintf(buf, "\tIsBranch: true,\n")
fmt.Fprintf(buf, "\tIsConditional: %#v,\n", i.IsConditionalBranch())
}
fmt.Fprintf(buf, "}")
return buf.String()
}
func (c *ctors) checkargs(i inst.Instruction, s signature) {
if i.IsNiladic() {
return

View File

@@ -9,6 +9,9 @@ import (
"github.com/mmcloughlin/avo/internal/inst"
)
// operandType
const operandType = "operand.Op"
// signature provides access to details about the signature of an instruction function.
type signature interface {
ParameterList() string
@@ -21,11 +24,11 @@ type signature interface {
// argslist is the signature for a function with the given named parameters.
type argslist []string
func (a argslist) ParameterList() string { return strings.Join(a, ", ") + " avo.Operand" }
func (a argslist) ParameterList() string { return strings.Join(a, ", ") + " " + operandType }
func (a argslist) Arguments() string { return strings.Join(a, ", ") }
func (a argslist) ParameterName(i int) string { return a[i] }
func (a argslist) ParameterSlice() string {
return fmt.Sprintf("[]avo.Operand{%s}", strings.Join(a, ", "))
return fmt.Sprintf("[]%s{%s}", operandType, strings.Join(a, ", "))
}
func (a argslist) Length() string { return strconv.Itoa(len(a)) }
@@ -34,7 +37,7 @@ type variadic struct {
name string
}
func (v variadic) ParameterList() string { return v.name + " ...avo.Operand" }
func (v variadic) ParameterList() string { return v.name + " ..." + operandType }
func (v variadic) Arguments() string { return v.name + "..." }
func (v variadic) ParameterName(i int) string { return fmt.Sprintf("%s[%d]", v.name, i) }
func (v variadic) ParameterSlice() string { return v.name }

View File

@@ -1,6 +1,9 @@
package inst
import "sort"
import (
"sort"
"strings"
)
type Instruction struct {
Opcode string
@@ -9,6 +12,29 @@ type Instruction struct {
Forms []Form
}
func (i Instruction) IsTerminal() bool {
// TODO(mbm): how about the RETF* instructions
return i.Opcode == "RET"
}
func (i Instruction) IsBranch() bool {
if i.Opcode == "CALL" {
return false
}
for _, f := range i.Forms {
for _, op := range f.Operands {
if strings.HasPrefix(op.Type, "rel") {
return true
}
}
}
return false
}
func (i Instruction) IsConditionalBranch() bool {
return i.IsBranch() && i.Opcode != "JMP"
}
func (i Instruction) Arities() []int {
s := map[int]bool{}
for _, f := range i.Forms {

View File

@@ -4,132 +4,130 @@ import (
"math"
"github.com/mmcloughlin/avo/reg"
"github.com/mmcloughlin/avo"
)
// Is1 returns true if op is the immediate constant 1.
func Is1(op avo.Operand) bool {
func Is1(op Op) bool {
i, ok := op.(Imm)
return ok && i == 1
}
// Is3 returns true if op is the immediate constant 3.
func Is3(op avo.Operand) bool {
func Is3(op Op) bool {
i, ok := op.(Imm)
return ok && i == 3
}
// IsImm2u returns true if op is a 2-bit unsigned immediate (less than 4).
func IsImm2u(op avo.Operand) bool {
func IsImm2u(op Op) bool {
i, ok := op.(Imm)
return ok && i < 4
}
// IsImm8 returns true is op is an 8-bit immediate.
func IsImm8(op avo.Operand) bool {
func IsImm8(op Op) bool {
i, ok := op.(Imm)
return ok && i <= math.MaxUint8
}
// IsImm16 returns true is op is a 16-bit immediate.
func IsImm16(op avo.Operand) bool {
func IsImm16(op Op) bool {
i, ok := op.(Imm)
return ok && i <= math.MaxUint16
}
// IsImm32 returns true is op is a 32-bit immediate.
func IsImm32(op avo.Operand) bool {
func IsImm32(op Op) bool {
i, ok := op.(Imm)
return ok && i <= math.MaxUint32
}
// IsImm64 returns true is op is a 64-bit immediate.
func IsImm64(op avo.Operand) bool {
func IsImm64(op Op) bool {
_, ok := op.(Imm)
return ok
}
// IsAl returns true if op is the AL register.
func IsAl(op avo.Operand) bool {
func IsAl(op Op) bool {
return op == reg.AL
}
// IsCl returns true if op is the CL register.
func IsCl(op avo.Operand) bool {
func IsCl(op Op) bool {
return op == reg.CL
}
// IsAx returns true if op is the 16-bit AX register.
func IsAx(op avo.Operand) bool {
func IsAx(op Op) bool {
return op == reg.AX
}
// IsEax returns true if op is the 32-bit EAX register.
func IsEax(op avo.Operand) bool {
func IsEax(op Op) bool {
return op == reg.EAX
}
// IsRax returns true if op is the 64-bit RAX register.
func IsRax(op avo.Operand) bool {
func IsRax(op Op) bool {
return op == reg.RAX
}
// IsR8 returns true if op is an 8-bit general-purpose register.
func IsR8(op avo.Operand) bool {
func IsR8(op Op) bool {
return IsGP(op, 1)
}
// IsR16 returns true if op is a 16-bit general-purpose register.
func IsR16(op avo.Operand) bool {
func IsR16(op Op) bool {
return IsGP(op, 2)
}
// IsR32 returns true if op is a 32-bit general-purpose register.
func IsR32(op avo.Operand) bool {
func IsR32(op Op) bool {
return IsGP(op, 4)
}
// IsR64 returns true if op is a 64-bit general-purpose register.
func IsR64(op avo.Operand) bool {
func IsR64(op Op) bool {
return IsGP(op, 8)
}
// IsGP returns true if op is a general-purpose register of size n bytes.
func IsGP(op avo.Operand, n uint) bool {
func IsGP(op Op, n uint) bool {
return IsRegisterKindSize(op, reg.GP, n)
}
// IsXmm0 returns true if op is the X0 register.
func IsXmm0(op avo.Operand) bool {
func IsXmm0(op Op) bool {
return op == reg.X0
}
// IsXmm returns true if op is a 128-bit XMM register.
func IsXmm(op avo.Operand) bool {
func IsXmm(op Op) bool {
return IsRegisterKindSize(op, reg.SSEAVX, 16)
}
// IsYmm returns true if op is a 256-bit YMM register.
func IsYmm(op avo.Operand) bool {
func IsYmm(op Op) bool {
return IsRegisterKindSize(op, reg.SSEAVX, 32)
}
// IsRegisterKindSize returns true if op is a register of the given kind and size in bytes.
func IsRegisterKindSize(op avo.Operand, k reg.Kind, n uint) bool {
func IsRegisterKindSize(op Op, k reg.Kind, n uint) bool {
r, ok := op.(reg.Register)
return ok && r.Kind() == k && r.Bytes() == n
}
// IsM returns true if op is a 16-, 32- or 64-bit memory operand.
func IsM(op avo.Operand) bool {
func IsM(op Op) bool {
// TODO(mbm): confirm "m" check is defined correctly
// Intel manual: "A 16-, 32- or 64-bit operand in memory."
return IsM16(op) || IsM32(op) || IsM64(op)
}
// IsM8 returns true if op is an 8-bit memory operand.
func IsM8(op avo.Operand) bool {
func IsM8(op Op) bool {
// TODO(mbm): confirm "m8" check is defined correctly
// Intel manual: "A byte operand in memory, usually expressed as a variable or
// array name, but pointed to by the DS:(E)SI or ES:(E)DI registers. In 64-bit
@@ -138,84 +136,84 @@ func IsM8(op avo.Operand) bool {
}
// IsM16 returns true if op is a 16-bit memory operand.
func IsM16(op avo.Operand) bool {
func IsM16(op Op) bool {
return IsMSize(op, 2)
}
// IsM32 returns true if op is a 16-bit memory operand.
func IsM32(op avo.Operand) bool {
func IsM32(op Op) bool {
return IsMSize(op, 4)
}
// IsM64 returns true if op is a 64-bit memory operand.
func IsM64(op avo.Operand) bool {
func IsM64(op Op) bool {
return IsMSize(op, 8)
}
// IsMSize returns true if op is a memory operand using general-purpose address
// registers of the given size in bytes.
func IsMSize(op avo.Operand, n uint) bool {
func IsMSize(op Op, n uint) bool {
// TODO(mbm): should memory operands have a size attribute as well?
m, ok := op.(Mem)
return ok && IsGP(m.Base, n) && (m.Index == nil || IsGP(m.Index, n))
}
// IsM128 returns true if op is a 128-bit memory operand.
func IsM128(op avo.Operand) bool {
func IsM128(op Op) bool {
// TODO(mbm): should "m128" be the same as "m64"?
return IsM64(op)
}
// IsM256 returns true if op is a 256-bit memory operand.
func IsM256(op avo.Operand) bool {
func IsM256(op Op) bool {
// TODO(mbm): should "m256" be the same as "m64"?
return IsM64(op)
}
// IsVm32x returns true if op is a vector memory operand with 32-bit XMM index.
func IsVm32x(op avo.Operand) bool {
func IsVm32x(op Op) bool {
return IsVmx(op)
}
// IsVm64x returns true if op is a vector memory operand with 64-bit XMM index.
func IsVm64x(op avo.Operand) bool {
func IsVm64x(op Op) bool {
return IsVmx(op)
}
// IsVmx returns true if op is a vector memory operand with XMM index.
func IsVmx(op avo.Operand) bool {
func IsVmx(op Op) bool {
return isvm(op, IsXmm)
}
// IsVm32y returns true if op is a vector memory operand with 32-bit YMM index.
func IsVm32y(op avo.Operand) bool {
func IsVm32y(op Op) bool {
return IsVmy(op)
}
// IsVm64y returns true if op is a vector memory operand with 64-bit YMM index.
func IsVm64y(op avo.Operand) bool {
func IsVm64y(op Op) bool {
return IsVmy(op)
}
// IsVmy returns true if op is a vector memory operand with YMM index.
func IsVmy(op avo.Operand) bool {
func IsVmy(op Op) bool {
return isvm(op, IsYmm)
}
func isvm(op avo.Operand, idx func(avo.Operand) bool) bool {
func isvm(op Op, idx func(Op) bool) bool {
m, ok := op.(Mem)
return ok && IsR64(m.Base) && idx(m.Index)
}
// IsRel8 returns true if op is an 8-bit offset relative to instruction pointer.
func IsRel8(op avo.Operand) bool {
func IsRel8(op Op) bool {
r, ok := op.(Rel)
return ok && r == Rel(int8(r))
}
// IsRel32 returns true if op is an offset relative to instruction pointer, or a
// label reference.
func IsRel32(op avo.Operand) bool {
func IsRel32(op Op) bool {
// TODO(mbm): should labels be considered separately?
_, rel := op.(Rel)
_, label := op.(LabelRef)

View File

@@ -6,14 +6,13 @@ import (
"runtime"
"testing"
"github.com/mmcloughlin/avo"
"github.com/mmcloughlin/avo/reg"
)
func TestChecks(t *testing.T) {
cases := []struct {
Predicate func(avo.Operand) bool
Operand avo.Operand
Predicate func(Op) bool
Operand Op
Expect bool
}{
// Immediates

View File

@@ -6,6 +6,10 @@ import (
"github.com/mmcloughlin/avo/reg"
)
type Op interface {
Asm() string
}
type Mem struct {
Disp int
Base reg.Register

83
pass/cfg.go Normal file
View File

@@ -0,0 +1,83 @@
package pass
import (
"errors"
"fmt"
"github.com/mmcloughlin/avo"
)
// LabelTarget populates the LabelTarget of the given function. This maps from
// label name to the following instruction.
func LabelTarget(fn *avo.Function) error {
target := map[avo.Label]*avo.Instruction{}
for idx := 0; idx < len(fn.Nodes); idx++ {
// Is this a label?
lbl, ok := fn.Nodes[idx].(avo.Label)
if !ok {
continue
}
// Check for a duplicate label.
if _, found := target[lbl]; found {
return fmt.Errorf("duplicate label \"%s\"", lbl)
}
// Advance to next node.
if idx == len(fn.Nodes)-1 {
return errors.New("function ends with label")
}
idx++
// Should be an instruction.
i, ok := fn.Nodes[idx].(*avo.Instruction)
if !ok {
return errors.New("instruction should follow a label")
}
target[lbl] = i
}
fn.LabelTarget = target
return nil
}
// CFG constructs the call-flow-graph of each function.
func CFG(fn *avo.Function) error {
is := fn.Instructions()
n := len(is)
// Populate successors.
for i := 0; i < n; i++ {
cur := is[i]
var nxt *avo.Instruction
if i+1 < n {
nxt = is[i+1]
}
// If it's a branch, locate the target.
if cur.IsBranch {
lbl := cur.TargetLabel()
if lbl == nil {
return errors.New("no label for branch instruction")
}
target, found := fn.LabelTarget[*lbl]
if !found {
return errors.New("unknown label")
}
cur.Succ = append(cur.Succ, target)
}
// Otherwise, could continue to the following instruction.
switch {
case cur.IsTerminal:
case cur.IsBranch && !cur.IsConditional:
default:
cur.Succ = append(cur.Succ, nxt)
}
}
// Populate predecessors.
for _, i := range is {
for _, s := range i.Succ {
s.Pred = append(s.Pred, i)
}
}
return nil
}

77
pass/cfg_test.go Normal file
View File

@@ -0,0 +1,77 @@
package pass
import (
"reflect"
"testing"
"github.com/mmcloughlin/avo"
)
func TestLabelTarget(t *testing.T) {
expect := map[avo.Label]*avo.Instruction{
"lblA": &avo.Instruction{Opcode: "A"},
"lblB": &avo.Instruction{Opcode: "B"},
}
f := avo.NewFunction("happypath")
for lbl, i := range expect {
f.AddLabel(lbl)
f.AddInstruction(i)
f.AddInstruction(&avo.Instruction{Opcode: "IDK"})
}
if err := LabelTarget(f); err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(expect, f.LabelTarget) {
t.Fatalf("incorrect LabelTarget value\ngot=%#v\nexpext=%#v\n", f.LabelTarget, expect)
}
}
func TestLabelTargetDuplicate(t *testing.T) {
f := avo.NewFunction("dupelabel")
f.AddLabel(avo.Label("lblA"))
f.AddInstruction(&avo.Instruction{Opcode: "A"})
f.AddLabel(avo.Label("lblA"))
f.AddInstruction(&avo.Instruction{Opcode: "A"})
err := LabelTarget(f)
if err == nil || err.Error() != "duplicate label \"lblA\"" {
t.Fatalf("expected error on duplcate label; got %v", err)
}
}
func TestLabelTargetEndsWithLabel(t *testing.T) {
f := avo.NewFunction("endswithlabel")
f.AddInstruction(&avo.Instruction{Opcode: "A"})
f.AddLabel(avo.Label("theend"))
err := LabelTarget(f)
if err == nil || err.Error() != "function ends with label" {
t.Fatalf("expected error when function ends with label; got %v", err)
}
}
func TestLabelTargetInstructionFollowLabel(t *testing.T) {
f := avo.NewFunction("expectinstafterlabel")
f.AddLabel(avo.Label("lblA"))
f.AddLabel(avo.Label("lblB"))
f.AddInstruction(&avo.Instruction{Opcode: "A"})
err := LabelTarget(f)
if err == nil || err.Error() != "instruction should follow a label" {
t.Fatalf("expected error when label is not followed by instruction; got %v", err)
}
}
func TestCFG(t *testing.T) {
// TODO(mbm): jump backward
// TODO(mbm): jump forward
// TODO(mbm): multiple returns
// TODO(mbm): infinite loop
// TODO(mbm): very short infinite loop
}

17
pass/pass.go Normal file
View File

@@ -0,0 +1,17 @@
package pass
import "github.com/mmcloughlin/avo"
// TODO(mbm): pass types
// FunctionPass builds a full pass that operates on all functions independently.
func FunctionPass(p func(*avo.Function) error) func(*avo.File) error {
return func(f *avo.File) error {
for _, fn := range f.Functions {
if err := p(fn); err != nil {
return err
}
}
return nil
}
}

View File

@@ -4,6 +4,8 @@ import (
"fmt"
"io"
"strings"
"github.com/mmcloughlin/avo/operand"
)
// dot is the pesky unicode dot used in Go assembly.
@@ -69,7 +71,7 @@ func (p *GoPrinter) multicomment(lines []string) {
func (p *GoPrinter) function(f *Function) {
p.printf("TEXT %s%s(SB),0,$%d-%d\n", dot, f.Name(), f.FrameBytes(), f.ArgumentBytes())
for _, node := range f.nodes {
for _, node := range f.Nodes {
switch n := node.(type) {
case Instruction:
p.printf("\t%s\t%s\n", n.Opcode, joinOperands(n.Operands))
@@ -91,7 +93,7 @@ func (p *GoPrinter) printf(format string, args ...interface{}) {
}
}
func joinOperands(operands []Operand) string {
func joinOperands(operands []operand.Op) string {
asm := make([]string, len(operands))
for i, op := range operands {
asm[i] = op.Asm()

File diff suppressed because it is too large Load Diff