pass: de-prioritize base pointer in register allocation (#184)

Updates #156
This commit is contained in:
Michael McLoughlin
2021-04-18 19:22:09 -07:00
committed by GitHub
parent f295bde84c
commit c32f24fb1e
9 changed files with 5273 additions and 5207 deletions

View File

@@ -3,18 +3,18 @@
#include "textflag.h"
// func Hash64(data []byte) uint64
TEXT ·Hash64(SB), NOSPLIT, $8-32
TEXT ·Hash64(SB), NOSPLIT, $0-32
MOVQ data_base+0(FP), CX
MOVQ data_len+8(FP), BX
MOVQ $0xcbf29ce484222325, AX
MOVQ $0x00000100000001b3, BP
MOVQ $0x00000100000001b3, SI
loop:
CMPQ BX, $0x00
JE done
MOVBQZX (CX), DX
XORQ DX, AX
MULQ BP
MULQ SI
INCQ CX
DECQ BX
JMP loop

File diff suppressed because it is too large Load Diff

View File

@@ -3,68 +3,68 @@
#include "textflag.h"
// func Hash(state *State, key []byte) uint64
TEXT ·Hash(SB), NOSPLIT, $8-40
TEXT ·Hash(SB), NOSPLIT, $0-40
MOVQ state+0(FP), AX
MOVQ key_base+8(FP), CX
MOVQ key_len+16(FP), DX
MOVQ (AX), BX
MOVQ 8(AX), BP
MOVQ DX, SI
ADDQ $0x00000001, SI
MOVQ $0xb89b0f8e1655514f, DI
IMULQ DI, SI
XORQ SI, BX
MOVQ DX, SI
ADDQ $0x00000002, SI
MOVQ $0x8c6f736011bd5127, DI
IMULQ DI, SI
XORQ SI, BP
MOVQ 8(AX), SI
MOVQ DX, DI
ADDQ $0x00000001, DI
MOVQ $0xb89b0f8e1655514f, R8
IMULQ R8, DI
XORQ DI, BX
MOVQ DX, DI
ADDQ $0x00000002, DI
MOVQ $0x8c6f736011bd5127, R8
IMULQ R8, DI
XORQ DI, SI
CMPQ DX, $0x00000020
JGE coreLong
MOVQ DX, SI
SHRQ $0x03, SI
CMPQ SI, $0x00000000
MOVQ DX, DI
SHRQ $0x03, DI
CMPQ DI, $0x00000000
JE shortCore0
CMPQ SI, $0x00000001
CMPQ DI, $0x00000001
JE shortCore1
CMPQ SI, $0x00000002
CMPQ DI, $0x00000002
JE shortCore2
CMPQ SI, $0x00000003
CMPQ DI, $0x00000003
JE shortCore3
shortCore3:
MOVQ (CX), AX
MOVQ $0x9c1b8e1e9628323f, SI
IMULQ SI, AX
MOVQ $0x9c1b8e1e9628323f, DI
IMULQ DI, AX
ADDQ AX, BX
RORQ $0x11, BX
XORQ BP, BX
RORQ $0x35, BP
ADDQ BX, BP
XORQ SI, BX
RORQ $0x35, SI
ADDQ BX, SI
ADDQ $0x00000008, CX
SUBQ $0x00000008, DX
shortCore2:
MOVQ (CX), AX
MOVQ $0x9c1b8e1e9628323f, SI
IMULQ SI, AX
MOVQ $0x9c1b8e1e9628323f, DI
IMULQ DI, AX
ADDQ AX, BX
RORQ $0x11, BX
XORQ BP, BX
RORQ $0x35, BP
ADDQ BX, BP
XORQ SI, BX
RORQ $0x35, SI
ADDQ BX, SI
ADDQ $0x00000008, CX
SUBQ $0x00000008, DX
shortCore1:
MOVQ (CX), AX
MOVQ $0x9c1b8e1e9628323f, SI
IMULQ SI, AX
MOVQ $0x9c1b8e1e9628323f, DI
IMULQ DI, AX
ADDQ AX, BX
RORQ $0x11, BX
XORQ BP, BX
RORQ $0x35, BP
ADDQ BX, BP
XORQ SI, BX
RORQ $0x35, SI
ADDQ BX, SI
ADDQ $0x00000008, CX
SUBQ $0x00000008, DX
@@ -94,7 +94,7 @@ shortTail7:
shortTail6:
MOVBQZX 5(CX), DX
SHLQ $0x30, DX
ADDQ DX, BP
ADDQ DX, SI
shortTail5:
MOVBQZX 4(CX), DX
@@ -103,7 +103,7 @@ shortTail5:
shortTail4:
MOVLQZX (CX), DX
ADDQ DX, BP
ADDQ DX, SI
JMP shortAfter
shortTail3:
@@ -113,7 +113,7 @@ shortTail3:
shortTail2:
MOVWQZX (CX), DX
ADDQ DX, BP
ADDQ DX, SI
JMP shortAfter
shortTail1:
@@ -121,129 +121,129 @@ shortTail1:
ADDQ DX, BX
shortTail0:
RORQ $0x20, BP
XORQ $0x000000ff, BP
RORQ $0x20, SI
XORQ $0x000000ff, SI
shortAfter:
XORQ BX, BP
XORQ BX, SI
RORQ $0x21, BX
ADDQ BP, BX
ROLQ $0x11, BP
XORQ BX, BP
ADDQ SI, BX
ROLQ $0x11, SI
XORQ BX, SI
ROLQ $0x2b, BX
ADDQ BP, BX
ROLQ $0x1f, BP
SUBQ BX, BP
ADDQ SI, BX
ROLQ $0x1f, SI
SUBQ BX, SI
ROLQ $0x0d, BX
XORQ BP, BX
SUBQ BX, BP
XORQ SI, BX
SUBQ BX, SI
ROLQ $0x29, BX
ADDQ BP, BX
ROLQ $0x25, BP
XORQ BX, BP
ADDQ SI, BX
ROLQ $0x25, SI
XORQ BX, SI
RORQ $0x27, BX
ADDQ BP, BX
RORQ $0x0f, BP
ADDQ BX, BP
ADDQ SI, BX
RORQ $0x0f, SI
ADDQ BX, SI
ROLQ $0x0f, BX
XORQ BP, BX
RORQ $0x05, BP
XORQ BP, BX
XORQ SI, BX
RORQ $0x05, SI
XORQ SI, BX
MOVQ BX, ret+32(FP)
RET
coreLong:
MOVQ 16(AX), DI
MOVQ 16(AX), R8
MOVQ 24(AX), AX
MOVQ DX, SI
ADDQ $0x00000003, SI
MOVQ $0x8f29bd94edce7b39, R8
IMULQ R8, SI
XORQ SI, DI
MOVQ DX, SI
ADDQ $0x00000004, SI
MOVQ $0x9c1b8e1e9628323f, R8
IMULQ R8, SI
XORQ SI, AX
MOVQ DX, DI
ADDQ $0x00000003, DI
MOVQ $0x8f29bd94edce7b39, R9
IMULQ R9, DI
XORQ DI, R8
MOVQ DX, DI
ADDQ $0x00000004, DI
MOVQ $0x9c1b8e1e9628323f, R9
IMULQ R9, DI
XORQ DI, AX
block:
MOVQ (CX), SI
MOVQ $0x00000000802910e3, R8
IMULQ R8, SI
ADDQ SI, BX
MOVQ (CX), DI
MOVQ $0x00000000802910e3, R9
IMULQ R9, DI
ADDQ DI, BX
ROLQ $0x39, BX
XORQ AX, BX
MOVQ 8(CX), SI
MOVQ $0x00000000819b13af, R8
IMULQ R8, SI
ADDQ SI, BP
ROLQ $0x3f, BP
XORQ DI, BP
MOVQ 16(CX), SI
MOVQ $0x0000000091cb27e5, R8
IMULQ R8, SI
ADDQ SI, DI
RORQ $0x2f, DI
ADDQ BX, DI
MOVQ 24(CX), SI
MOVQ $0x00000000c1a269c1, R8
IMULQ R8, SI
ADDQ SI, AX
MOVQ 8(CX), DI
MOVQ $0x00000000819b13af, R9
IMULQ R9, DI
ADDQ DI, SI
ROLQ $0x3f, SI
XORQ R8, SI
MOVQ 16(CX), DI
MOVQ $0x0000000091cb27e5, R9
IMULQ R9, DI
ADDQ DI, R8
RORQ $0x2f, R8
ADDQ BX, R8
MOVQ 24(CX), DI
MOVQ $0x00000000c1a269c1, R9
IMULQ R9, DI
ADDQ DI, AX
RORQ $0x0b, AX
SUBQ BP, AX
SUBQ SI, AX
ADDQ $0x00000020, CX
SUBQ $0x00000020, DX
CMPQ DX, $0x00000020
JGE block
MOVQ DX, R8
MOVQ DX, SI
SHRQ $0x03, SI
CMPQ SI, $0x00000000
MOVQ DX, R9
MOVQ DX, DI
SHRQ $0x03, DI
CMPQ DI, $0x00000000
JE longCore0
CMPQ SI, $0x00000001
CMPQ DI, $0x00000001
JE longCore1
CMPQ SI, $0x00000002
CMPQ DI, $0x00000002
JE longCore2
CMPQ SI, $0x00000003
CMPQ DI, $0x00000003
JE longCore3
longCore3:
MOVQ (CX), SI
MOVQ $0x00000000802910e3, R9
IMULQ R9, SI
ADDQ SI, BX
MOVQ (CX), DI
MOVQ $0x00000000802910e3, R10
IMULQ R10, DI
ADDQ DI, BX
ROLQ $0x39, BX
XORQ AX, BX
ADDQ $0x00000008, CX
SUBQ $0x00000008, DX
longCore2:
MOVQ (CX), SI
MOVQ $0x00000000819b13af, R9
IMULQ R9, SI
ADDQ SI, BP
ROLQ $0x3f, BP
XORQ DI, BP
MOVQ (CX), DI
MOVQ $0x00000000819b13af, R10
IMULQ R10, DI
ADDQ DI, SI
ROLQ $0x3f, SI
XORQ R8, SI
ADDQ $0x00000008, CX
SUBQ $0x00000008, DX
longCore1:
MOVQ (CX), SI
MOVQ $0x0000000091cb27e5, R9
IMULQ R9, SI
ADDQ SI, DI
RORQ $0x2f, DI
ADDQ BX, DI
MOVQ (CX), DI
MOVQ $0x0000000091cb27e5, R10
IMULQ R10, DI
ADDQ DI, R8
RORQ $0x2f, R8
ADDQ BX, R8
ADDQ $0x00000008, CX
SUBQ $0x00000008, DX
longCore0:
RORQ $0x0b, AX
SUBQ BP, AX
ADDQ $0x00000001, R8
MOVQ $0x9c1b8e1e9628323f, SI
IMULQ SI, R8
XORQ R8, BX
SUBQ SI, AX
ADDQ $0x00000001, R9
MOVQ $0x9c1b8e1e9628323f, DI
IMULQ DI, R9
XORQ R9, BX
CMPQ DX, $0x00000000
JE longTail0
CMPQ DX, $0x00000001
@@ -263,22 +263,22 @@ longCore0:
longTail7:
MOVBQZX 6(CX), DX
ADDQ DX, BP
ADDQ DX, SI
longTail6:
MOVWQZX 4(CX), DX
ADDQ DX, DI
ADDQ DX, R8
MOVLQZX (CX), DX
ADDQ DX, AX
JMP longAfter
longTail5:
MOVBQZX 4(CX), DX
ADDQ DX, BP
ADDQ DX, SI
longTail4:
MOVLQZX (CX), DX
ADDQ DX, DI
ADDQ DX, R8
JMP longAfter
longTail3:
@@ -287,52 +287,52 @@ longTail3:
longTail2:
MOVWQZX (CX), DX
ADDQ DX, BP
ADDQ DX, SI
JMP longAfter
longTail1:
MOVBQZX (CX), DX
ADDQ DX, DI
ADDQ DX, R8
longTail0:
ROLQ $0x20, AX
XORQ $0x000000ff, AX
longAfter:
SUBQ DI, BP
SUBQ R8, SI
RORQ $0x13, BX
SUBQ BX, BP
RORQ $0x35, BP
XORQ BP, AX
SUBQ BX, SI
RORQ $0x35, SI
XORQ SI, AX
SUBQ AX, BX
ROLQ $0x2b, AX
ADDQ AX, BX
RORQ $0x03, BX
SUBQ BX, AX
RORQ $0x2b, DI
SUBQ AX, DI
ROLQ $0x37, DI
XORQ BX, DI
SUBQ DI, BP
RORQ $0x2b, R8
SUBQ AX, R8
ROLQ $0x37, R8
XORQ BX, R8
SUBQ R8, SI
RORQ $0x07, AX
SUBQ DI, AX
RORQ $0x1f, DI
ADDQ DI, AX
SUBQ BP, DI
SUBQ R8, AX
RORQ $0x1f, R8
ADDQ R8, AX
SUBQ SI, R8
RORQ $0x27, AX
XORQ AX, DI
XORQ AX, R8
RORQ $0x11, AX
XORQ DI, AX
ADDQ AX, BP
RORQ $0x09, BP
XORQ BP, DI
ROLQ $0x18, DI
XORQ DI, AX
XORQ R8, AX
ADDQ AX, SI
RORQ $0x09, SI
XORQ SI, R8
ROLQ $0x18, R8
XORQ R8, AX
RORQ $0x3b, AX
RORQ $0x01, BX
SUBQ BP, BX
XORQ BP, BX
XORQ AX, DI
XORQ DI, BX
SUBQ SI, BX
XORQ SI, BX
XORQ AX, R8
XORQ R8, BX
MOVQ BX, ret+32(FP)
RET

View File

@@ -74,7 +74,7 @@ func Liveness(fn *ir.Function) error {
// AllocateRegisters performs register allocation.
func AllocateRegisters(fn *ir.Function) error {
// Populate allocators (one per kind).
// Initialize one allocator per kind.
as := map[reg.Kind]*Allocator{}
for _, i := range fn.Instructions() {
for _, r := range i.Registers() {
@@ -86,7 +86,28 @@ func AllocateRegisters(fn *ir.Function) error {
}
as[k] = a
}
as[k].Add(r.ID())
}
}
// De-prioritize the base pointer register. This can be used as a general
// purpose register, but it's callee-save so needs to be saved/restored if
// it is clobbered. For this reason we prefer to avoid using it unless
// forced to by register pressure.
for k, a := range as {
f := reg.FamilyOfKind(k)
for _, r := range f.Registers() {
if (r.Info() & reg.BasePointer) != 0 {
// Negative priority penalizes this register relative to all
// others (having default zero priority).
a.SetPriority(r.ID(), -1)
}
}
}
// Populate registers to be allocated.
for _, i := range fn.Instructions() {
for _, r := range i.Registers() {
as[r.Kind()].Add(r.ID())
}
}

View File

@@ -106,6 +106,51 @@ func ConstructLiveness(t *testing.T, ctx *build.Context) *ir.Function {
return BuildFunction(t, ctx, pass.LabelTarget, pass.CFG, pass.Liveness)
}
func TestAllocateRegistersBasePointerDeprioritized(t *testing.T) {
// Construct a function that requires n general-purpose registers all live
// at once. Choose n to be the maximal possible number of registers without
// touching the base pointer.
n := 14
ctx := build.NewContext()
ctx.Function("sum")
ctx.SignatureExpr("func() uint64")
x := make([]reg.GPVirtual, n)
for i := 0; i < n; i++ {
x[i] = ctx.GP64()
ctx.MOVQ(operand.U64(i), x[i])
}
for i := 1; i < n; i++ {
ctx.ADDQ(x[i], x[0])
}
ctx.Store(x[0], ctx.ReturnIndex(0))
ctx.RET()
// Build and compile the function up to register allocation.
fn := BuildFunction(t, ctx, pass.LabelTarget, pass.CFG, pass.Liveness, pass.AllocateRegisters, pass.BindRegisters)
// Verify this function uses n registers, but not the base pointer.
ps := map[reg.Physical]bool{}
for _, i := range fn.Instructions() {
for _, r := range i.OutputRegisters() {
ps[reg.ToPhysical(r)] = true
}
}
if len(ps) != n {
t.Fatalf("expected function to require %d registers", n)
}
for p := range ps {
if (p.Info() & reg.BasePointer) != 0 {
t.Fatal("base pointer used")
}
}
}
func TestEnsureBasePointerCalleeSavedFrameless(t *testing.T) {
// Construct a function that writes to the base pointer.
ctx := build.NewContext()

View File

@@ -8,17 +8,17 @@ TEXT ·GP8(SB), NOSPLIT, $8-1
MOVB $0x02, CL
MOVB $0x03, DL
MOVB $0x04, BL
MOVB $0x05, BP
MOVB $0x06, SI
MOVB $0x07, DI
MOVB $0x08, R8
MOVB $0x09, R9
MOVB $0x0a, R10
MOVB $0x0b, R11
MOVB $0x0c, R12
MOVB $0x0d, R13
MOVB $0x0e, R14
MOVB $0x0f, R15
MOVB $0x05, SI
MOVB $0x06, DI
MOVB $0x07, R8
MOVB $0x08, R9
MOVB $0x09, R10
MOVB $0x0a, R11
MOVB $0x0b, R12
MOVB $0x0c, R13
MOVB $0x0d, R14
MOVB $0x0e, R15
MOVB $0x0f, BP
MOVB $0x10, AH
MOVB $0x11, CH
MOVB $0x12, DH
@@ -26,7 +26,6 @@ TEXT ·GP8(SB), NOSPLIT, $8-1
ADDB CL, AL
ADDB DL, AL
ADDB BL, AL
ADDB BP, AL
ADDB SI, AL
ADDB DI, AL
ADDB R8, AL
@@ -37,6 +36,7 @@ TEXT ·GP8(SB), NOSPLIT, $8-1
ADDB R13, AL
ADDB R14, AL
ADDB R15, AL
ADDB BP, AL
ADDB AH, AL
ADDB CH, AL
ADDB DH, AL

View File

@@ -8,36 +8,35 @@ TEXT ·Masks(SB), NOSPLIT, $8-16
MOVQ $0x0002002a, CX
MOVQ $0x0003002a, DX
MOVQ $0x0004002a, BX
MOVQ $0x0005002a, BP
MOVQ $0x0006002a, SI
MOVQ $0x0007002a, DI
MOVQ $0x0008002a, R8
MOVQ $0x0009002a, R9
MOVQ $0x000a002a, R10
MOVQ $0x000b002a, R11
MOVQ $0x000c002a, R12
MOVQ $0x000d002a, R13
MOVQ $0x000e002a, R14
MOVQ $0x000f002a, R15
MOVQ $0x0005002a, SI
MOVQ $0x0006002a, DI
MOVQ $0x0007002a, R8
MOVQ $0x0008002a, R9
MOVQ $0x0009002a, R10
MOVQ $0x000a002a, R11
MOVQ $0x000b002a, R12
MOVQ $0x000c002a, R13
MOVQ $0x000d002a, R14
MOVQ $0x000e002a, R15
MOVQ $0x000f002a, BP
MOVW $0x0001, AX
MOVW $0x0002, CX
MOVW $0x0003, DX
MOVW $0x0004, BX
MOVW $0x0005, BP
MOVW $0x0006, SI
MOVW $0x0007, DI
MOVW $0x0008, R8
MOVW $0x0009, R9
MOVW $0x000a, R10
MOVW $0x000b, R11
MOVW $0x000c, R12
MOVW $0x000d, R13
MOVW $0x000e, R14
MOVW $0x000f, R15
MOVW $0x0005, SI
MOVW $0x0006, DI
MOVW $0x0007, R8
MOVW $0x0008, R9
MOVW $0x0009, R10
MOVW $0x000a, R11
MOVW $0x000b, R12
MOVW $0x000c, R13
MOVW $0x000d, R14
MOVW $0x000e, R15
MOVW $0x000f, BP
ADDW CX, AX
ADDW DX, AX
ADDW BX, AX
ADDW BP, AX
ADDW SI, AX
ADDW DI, AX
ADDW R8, AX
@@ -48,12 +47,12 @@ TEXT ·Masks(SB), NOSPLIT, $8-16
ADDW R13, AX
ADDW R14, AX
ADDW R15, AX
ADDW BP, AX
MOVW AX, ret+0(FP)
MOVW $0x0000, AX
MOVW $0x0000, CX
MOVW $0x0000, DX
MOVW $0x0000, BX
MOVW $0x0000, BP
MOVW $0x0000, SI
MOVW $0x0000, DI
MOVW $0x0000, R8
@@ -64,10 +63,10 @@ TEXT ·Masks(SB), NOSPLIT, $8-16
MOVW $0x0000, R13
MOVW $0x0000, R14
MOVW $0x0000, R15
MOVW $0x0000, BP
ADDQ CX, AX
ADDQ DX, AX
ADDQ BX, AX
ADDQ BP, AX
ADDQ SI, AX
ADDQ DI, AX
ADDQ R8, AX
@@ -78,6 +77,7 @@ TEXT ·Masks(SB), NOSPLIT, $8-16
ADDQ R13, AX
ADDQ R14, AX
ADDQ R15, AX
ADDQ BP, AX
SHRQ $0x10, AX
MOVQ AX, ret1+8(FP)
RET

View File

@@ -11,7 +11,6 @@ TEXT ·Upper32(SB), NOSPLIT, $8-8
MOVQ $0x9e77d78aacb8cbcc, CX
MOVQ $0x9e77d78aacb8cbcc, DX
MOVQ $0x9e77d78aacb8cbcc, BX
MOVQ $0x9e77d78aacb8cbcc, BP
MOVQ $0x9e77d78aacb8cbcc, SI
MOVQ $0x9e77d78aacb8cbcc, DI
MOVQ $0x9e77d78aacb8cbcc, R8
@@ -22,10 +21,10 @@ TEXT ·Upper32(SB), NOSPLIT, $8-8
MOVQ $0x9e77d78aacb8cbcc, R13
MOVQ $0x9e77d78aacb8cbcc, R14
MOVQ $0x9e77d78aacb8cbcc, R15
MOVQ $0x9e77d78aacb8cbcc, BP
MOVQ $0x9e77d78aacb8cbcc, CX
MOVQ $0x9e77d78aacb8cbcc, DX
MOVQ $0x9e77d78aacb8cbcc, BX
MOVQ $0x9e77d78aacb8cbcc, BP
MOVQ $0x9e77d78aacb8cbcc, SI
MOVQ $0x9e77d78aacb8cbcc, DI
MOVQ $0x9e77d78aacb8cbcc, R8
@@ -36,10 +35,10 @@ TEXT ·Upper32(SB), NOSPLIT, $8-8
MOVQ $0x9e77d78aacb8cbcc, R13
MOVQ $0x9e77d78aacb8cbcc, R14
MOVQ $0x9e77d78aacb8cbcc, R15
MOVQ $0x9e77d78aacb8cbcc, BP
MOVQ $0x9e77d78aacb8cbcc, CX
MOVQ $0x9e77d78aacb8cbcc, DX
MOVQ $0x9e77d78aacb8cbcc, BX
MOVQ $0x9e77d78aacb8cbcc, BP
MOVQ $0x9e77d78aacb8cbcc, SI
MOVQ $0x9e77d78aacb8cbcc, DI
MOVQ $0x9e77d78aacb8cbcc, R8
@@ -50,26 +49,26 @@ TEXT ·Upper32(SB), NOSPLIT, $8-8
MOVQ $0x9e77d78aacb8cbcc, R13
MOVQ $0x9e77d78aacb8cbcc, R14
MOVQ $0x9e77d78aacb8cbcc, R15
MOVQ $0x9e77d78aacb8cbcc, BP
// Iteration 1.
MOVL $0x00000001, CX
MOVL $0x00000002, DX
MOVL $0x00000003, BX
MOVL $0x00000004, BP
MOVL $0x00000005, SI
MOVL $0x00000006, DI
MOVL $0x00000007, R8
MOVL $0x00000008, R9
MOVL $0x00000009, R10
MOVL $0x0000000a, R11
MOVL $0x0000000b, R12
MOVL $0x0000000c, R13
MOVL $0x0000000d, R14
MOVL $0x0000000e, R15
MOVL $0x00000004, SI
MOVL $0x00000005, DI
MOVL $0x00000006, R8
MOVL $0x00000007, R9
MOVL $0x00000008, R10
MOVL $0x00000009, R11
MOVL $0x0000000a, R12
MOVL $0x0000000b, R13
MOVL $0x0000000c, R14
MOVL $0x0000000d, R15
MOVL $0x0000000e, BP
ADDQ CX, AX
ADDQ DX, AX
ADDQ BX, AX
ADDQ BP, AX
ADDQ SI, AX
ADDQ DI, AX
ADDQ R8, AX
@@ -80,26 +79,26 @@ TEXT ·Upper32(SB), NOSPLIT, $8-8
ADDQ R13, AX
ADDQ R14, AX
ADDQ R15, AX
ADDQ BP, AX
// Iteration 2.
MOVL $0x0000000f, CX
MOVL $0x00000010, DX
MOVL $0x00000011, BX
MOVL $0x00000012, BP
MOVL $0x00000013, SI
MOVL $0x00000014, DI
MOVL $0x00000015, R8
MOVL $0x00000016, R9
MOVL $0x00000017, R10
MOVL $0x00000018, R11
MOVL $0x00000019, R12
MOVL $0x0000001a, R13
MOVL $0x0000001b, R14
MOVL $0x0000001c, R15
MOVL $0x00000012, SI
MOVL $0x00000013, DI
MOVL $0x00000014, R8
MOVL $0x00000015, R9
MOVL $0x00000016, R10
MOVL $0x00000017, R11
MOVL $0x00000018, R12
MOVL $0x00000019, R13
MOVL $0x0000001a, R14
MOVL $0x0000001b, R15
MOVL $0x0000001c, BP
ADDQ CX, AX
ADDQ DX, AX
ADDQ BX, AX
ADDQ BP, AX
ADDQ SI, AX
ADDQ DI, AX
ADDQ R8, AX
@@ -110,26 +109,26 @@ TEXT ·Upper32(SB), NOSPLIT, $8-8
ADDQ R13, AX
ADDQ R14, AX
ADDQ R15, AX
ADDQ BP, AX
// Iteration 3.
MOVL $0x0000001d, CX
MOVL $0x0000001e, DX
MOVL $0x0000001f, BX
MOVL $0x00000020, BP
MOVL $0x00000021, SI
MOVL $0x00000022, DI
MOVL $0x00000023, R8
MOVL $0x00000024, R9
MOVL $0x00000025, R10
MOVL $0x00000026, R11
MOVL $0x00000027, R12
MOVL $0x00000028, R13
MOVL $0x00000029, R14
MOVL $0x0000002a, R15
MOVL $0x00000020, SI
MOVL $0x00000021, DI
MOVL $0x00000022, R8
MOVL $0x00000023, R9
MOVL $0x00000024, R10
MOVL $0x00000025, R11
MOVL $0x00000026, R12
MOVL $0x00000027, R13
MOVL $0x00000028, R14
MOVL $0x00000029, R15
MOVL $0x0000002a, BP
ADDQ CX, AX
ADDQ DX, AX
ADDQ BX, AX
ADDQ BP, AX
ADDQ SI, AX
ADDQ DI, AX
ADDQ R8, AX
@@ -140,6 +139,7 @@ TEXT ·Upper32(SB), NOSPLIT, $8-8
ADDQ R13, AX
ADDQ R14, AX
ADDQ R15, AX
ADDQ BP, AX
// Store result and return.
MOVQ AX, ret+0(FP)

File diff suppressed because it is too large Load Diff