reg,pass: refactor allocation of aliased registers (#121)
Issue #100 demonstrated that register allocation for aliased registers is fundamentally broken. The root of the issue is that currently accesses to the same virtual register with different masks are treated as different registers. This PR takes a different approach: * Liveness analysis is masked: we now properly consider which parts of a register are live * Register allocation produces a mapping from virtual to physical ID, and aliasing is applied later In addition, a new pass ZeroExtend32BitOutputs accounts for the fact that 32-bit writes in 64-bit mode should actually be treated as 64-bit writes (the result is zero-extended). Closes #100
This commit is contained in:
committed by
GitHub
parent
126469f13d
commit
f40d602170
66
tests/alloc/upper32/asm.go
Normal file
66
tests/alloc/upper32/asm.go
Normal file
@@ -0,0 +1,66 @@
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
|
||||
. "github.com/mmcloughlin/avo/build"
|
||||
. "github.com/mmcloughlin/avo/operand"
|
||||
. "github.com/mmcloughlin/avo/reg"
|
||||
)
|
||||
|
||||
// The goal is to test for correct handling of 32-bit operands in 64-bit mode,
|
||||
// specifically that writes are zero-extended to 64 bits. This test is
|
||||
// constructed such that the register allocator would fail if this feature is
|
||||
// not accounted for. It consists of multiple copies of a 32-bit write followed
|
||||
// by a 64-bit read of the same register. Without special treatment liveness
|
||||
// analysis would consider the upper 32 bits to still be live prior to the
|
||||
// write. Therefore if we stack up enough copies of this, we could cause the
|
||||
// register allocator to fail.
|
||||
|
||||
func main() {
|
||||
const (
|
||||
r = 14 // number of registers
|
||||
m = 3 // number of iterations
|
||||
n = r * m
|
||||
)
|
||||
|
||||
TEXT("Upper32", NOSPLIT, "func() uint64")
|
||||
Doc("Upper32 computes the sum 1+2+...+" + strconv.Itoa(n) + ".")
|
||||
|
||||
Comment("Initialize sum.")
|
||||
s := GP64()
|
||||
XORQ(s, s)
|
||||
|
||||
// Allocate n 64-bit registers and populate them.
|
||||
Comment("Initialize registers.")
|
||||
x := make([]GPVirtual, n)
|
||||
for i := 0; i < n; i++ {
|
||||
x[i] = GP64()
|
||||
MOVQ(U64(0x9e77d78aacb8cbcc), x[i])
|
||||
}
|
||||
|
||||
k := 0
|
||||
for i := 0; i < m; i++ {
|
||||
Commentf("Iteration %d.", i+1)
|
||||
|
||||
// Write to the 32-bit aliases of r registers.
|
||||
for j := 0; j < r; j++ {
|
||||
MOVL(U32(k+j+1), x[k+j].As32())
|
||||
}
|
||||
|
||||
// Sum them up.
|
||||
for j := 0; j < r; j++ {
|
||||
ADDQ(x[k+j], s)
|
||||
}
|
||||
|
||||
k += r
|
||||
}
|
||||
|
||||
Comment("Store result and return.")
|
||||
Store(s, ReturnIndex(0))
|
||||
RET()
|
||||
|
||||
Generate()
|
||||
}
|
||||
2
tests/alloc/upper32/doc.go
Normal file
2
tests/alloc/upper32/doc.go
Normal file
@@ -0,0 +1,2 @@
|
||||
// Package upper32 tests liveness analysis of 32-bit operations on 64-bit registers.
|
||||
package upper32
|
||||
6
tests/alloc/upper32/stub.go
Normal file
6
tests/alloc/upper32/stub.go
Normal file
@@ -0,0 +1,6 @@
|
||||
// Code generated by command: go run asm.go -out upper32.s -stubs stub.go. DO NOT EDIT.
|
||||
|
||||
package upper32
|
||||
|
||||
// Upper32 computes the sum 1+2+...+42.
|
||||
func Upper32() uint64
|
||||
146
tests/alloc/upper32/upper32.s
Normal file
146
tests/alloc/upper32/upper32.s
Normal file
@@ -0,0 +1,146 @@
|
||||
// Code generated by command: go run asm.go -out upper32.s -stubs stub.go. DO NOT EDIT.
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
// func Upper32() uint64
|
||||
TEXT ·Upper32(SB), NOSPLIT, $0-8
|
||||
// Initialize sum.
|
||||
XORQ AX, AX
|
||||
|
||||
// Initialize registers.
|
||||
MOVQ $0x9e77d78aacb8cbcc, CX
|
||||
MOVQ $0x9e77d78aacb8cbcc, DX
|
||||
MOVQ $0x9e77d78aacb8cbcc, BX
|
||||
MOVQ $0x9e77d78aacb8cbcc, BP
|
||||
MOVQ $0x9e77d78aacb8cbcc, SI
|
||||
MOVQ $0x9e77d78aacb8cbcc, DI
|
||||
MOVQ $0x9e77d78aacb8cbcc, R8
|
||||
MOVQ $0x9e77d78aacb8cbcc, R9
|
||||
MOVQ $0x9e77d78aacb8cbcc, R10
|
||||
MOVQ $0x9e77d78aacb8cbcc, R11
|
||||
MOVQ $0x9e77d78aacb8cbcc, R12
|
||||
MOVQ $0x9e77d78aacb8cbcc, R13
|
||||
MOVQ $0x9e77d78aacb8cbcc, R14
|
||||
MOVQ $0x9e77d78aacb8cbcc, R15
|
||||
MOVQ $0x9e77d78aacb8cbcc, CX
|
||||
MOVQ $0x9e77d78aacb8cbcc, DX
|
||||
MOVQ $0x9e77d78aacb8cbcc, BX
|
||||
MOVQ $0x9e77d78aacb8cbcc, BP
|
||||
MOVQ $0x9e77d78aacb8cbcc, SI
|
||||
MOVQ $0x9e77d78aacb8cbcc, DI
|
||||
MOVQ $0x9e77d78aacb8cbcc, R8
|
||||
MOVQ $0x9e77d78aacb8cbcc, R9
|
||||
MOVQ $0x9e77d78aacb8cbcc, R10
|
||||
MOVQ $0x9e77d78aacb8cbcc, R11
|
||||
MOVQ $0x9e77d78aacb8cbcc, R12
|
||||
MOVQ $0x9e77d78aacb8cbcc, R13
|
||||
MOVQ $0x9e77d78aacb8cbcc, R14
|
||||
MOVQ $0x9e77d78aacb8cbcc, R15
|
||||
MOVQ $0x9e77d78aacb8cbcc, CX
|
||||
MOVQ $0x9e77d78aacb8cbcc, DX
|
||||
MOVQ $0x9e77d78aacb8cbcc, BX
|
||||
MOVQ $0x9e77d78aacb8cbcc, BP
|
||||
MOVQ $0x9e77d78aacb8cbcc, SI
|
||||
MOVQ $0x9e77d78aacb8cbcc, DI
|
||||
MOVQ $0x9e77d78aacb8cbcc, R8
|
||||
MOVQ $0x9e77d78aacb8cbcc, R9
|
||||
MOVQ $0x9e77d78aacb8cbcc, R10
|
||||
MOVQ $0x9e77d78aacb8cbcc, R11
|
||||
MOVQ $0x9e77d78aacb8cbcc, R12
|
||||
MOVQ $0x9e77d78aacb8cbcc, R13
|
||||
MOVQ $0x9e77d78aacb8cbcc, R14
|
||||
MOVQ $0x9e77d78aacb8cbcc, R15
|
||||
|
||||
// Iteration 1.
|
||||
MOVL $0x00000001, CX
|
||||
MOVL $0x00000002, DX
|
||||
MOVL $0x00000003, BX
|
||||
MOVL $0x00000004, BP
|
||||
MOVL $0x00000005, SI
|
||||
MOVL $0x00000006, DI
|
||||
MOVL $0x00000007, R8
|
||||
MOVL $0x00000008, R9
|
||||
MOVL $0x00000009, R10
|
||||
MOVL $0x0000000a, R11
|
||||
MOVL $0x0000000b, R12
|
||||
MOVL $0x0000000c, R13
|
||||
MOVL $0x0000000d, R14
|
||||
MOVL $0x0000000e, R15
|
||||
ADDQ CX, AX
|
||||
ADDQ DX, AX
|
||||
ADDQ BX, AX
|
||||
ADDQ BP, AX
|
||||
ADDQ SI, AX
|
||||
ADDQ DI, AX
|
||||
ADDQ R8, AX
|
||||
ADDQ R9, AX
|
||||
ADDQ R10, AX
|
||||
ADDQ R11, AX
|
||||
ADDQ R12, AX
|
||||
ADDQ R13, AX
|
||||
ADDQ R14, AX
|
||||
ADDQ R15, AX
|
||||
|
||||
// Iteration 2.
|
||||
MOVL $0x0000000f, CX
|
||||
MOVL $0x00000010, DX
|
||||
MOVL $0x00000011, BX
|
||||
MOVL $0x00000012, BP
|
||||
MOVL $0x00000013, SI
|
||||
MOVL $0x00000014, DI
|
||||
MOVL $0x00000015, R8
|
||||
MOVL $0x00000016, R9
|
||||
MOVL $0x00000017, R10
|
||||
MOVL $0x00000018, R11
|
||||
MOVL $0x00000019, R12
|
||||
MOVL $0x0000001a, R13
|
||||
MOVL $0x0000001b, R14
|
||||
MOVL $0x0000001c, R15
|
||||
ADDQ CX, AX
|
||||
ADDQ DX, AX
|
||||
ADDQ BX, AX
|
||||
ADDQ BP, AX
|
||||
ADDQ SI, AX
|
||||
ADDQ DI, AX
|
||||
ADDQ R8, AX
|
||||
ADDQ R9, AX
|
||||
ADDQ R10, AX
|
||||
ADDQ R11, AX
|
||||
ADDQ R12, AX
|
||||
ADDQ R13, AX
|
||||
ADDQ R14, AX
|
||||
ADDQ R15, AX
|
||||
|
||||
// Iteration 3.
|
||||
MOVL $0x0000001d, CX
|
||||
MOVL $0x0000001e, DX
|
||||
MOVL $0x0000001f, BX
|
||||
MOVL $0x00000020, BP
|
||||
MOVL $0x00000021, SI
|
||||
MOVL $0x00000022, DI
|
||||
MOVL $0x00000023, R8
|
||||
MOVL $0x00000024, R9
|
||||
MOVL $0x00000025, R10
|
||||
MOVL $0x00000026, R11
|
||||
MOVL $0x00000027, R12
|
||||
MOVL $0x00000028, R13
|
||||
MOVL $0x00000029, R14
|
||||
MOVL $0x0000002a, R15
|
||||
ADDQ CX, AX
|
||||
ADDQ DX, AX
|
||||
ADDQ BX, AX
|
||||
ADDQ BP, AX
|
||||
ADDQ SI, AX
|
||||
ADDQ DI, AX
|
||||
ADDQ R8, AX
|
||||
ADDQ R9, AX
|
||||
ADDQ R10, AX
|
||||
ADDQ R11, AX
|
||||
ADDQ R12, AX
|
||||
ADDQ R13, AX
|
||||
ADDQ R14, AX
|
||||
ADDQ R15, AX
|
||||
|
||||
// Store result and return.
|
||||
MOVQ AX, ret+0(FP)
|
||||
RET
|
||||
15
tests/alloc/upper32/upper32_test.go
Normal file
15
tests/alloc/upper32/upper32_test.go
Normal file
@@ -0,0 +1,15 @@
|
||||
package upper32
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
//go:generate go run asm.go -out upper32.s -stubs stub.go
|
||||
|
||||
func TestUpper32(t *testing.T) {
|
||||
const n = 14 * 3
|
||||
const expect = n * (n + 1) / 2
|
||||
if got := Upper32(); got != expect {
|
||||
t.Fatalf("Upper32() = %v; expect %v", got, expect)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user