pass: de-prioritize base pointer in register allocation (#184)
Updates #156
This commit is contained in:
committed by
GitHub
parent
f295bde84c
commit
c32f24fb1e
@@ -3,18 +3,18 @@
|
||||
#include "textflag.h"
|
||||
|
||||
// func Hash64(data []byte) uint64
|
||||
TEXT ·Hash64(SB), NOSPLIT, $8-32
|
||||
TEXT ·Hash64(SB), NOSPLIT, $0-32
|
||||
MOVQ data_base+0(FP), CX
|
||||
MOVQ data_len+8(FP), BX
|
||||
MOVQ $0xcbf29ce484222325, AX
|
||||
MOVQ $0x00000100000001b3, BP
|
||||
MOVQ $0x00000100000001b3, SI
|
||||
|
||||
loop:
|
||||
CMPQ BX, $0x00
|
||||
JE done
|
||||
MOVBQZX (CX), DX
|
||||
XORQ DX, AX
|
||||
MULQ BP
|
||||
MULQ SI
|
||||
INCQ CX
|
||||
DECQ BX
|
||||
JMP loop
|
||||
|
||||
1810
examples/sha1/sha1.s
1810
examples/sha1/sha1.s
File diff suppressed because it is too large
Load Diff
@@ -3,68 +3,68 @@
|
||||
#include "textflag.h"
|
||||
|
||||
// func Hash(state *State, key []byte) uint64
|
||||
TEXT ·Hash(SB), NOSPLIT, $8-40
|
||||
TEXT ·Hash(SB), NOSPLIT, $0-40
|
||||
MOVQ state+0(FP), AX
|
||||
MOVQ key_base+8(FP), CX
|
||||
MOVQ key_len+16(FP), DX
|
||||
MOVQ (AX), BX
|
||||
MOVQ 8(AX), BP
|
||||
MOVQ DX, SI
|
||||
ADDQ $0x00000001, SI
|
||||
MOVQ $0xb89b0f8e1655514f, DI
|
||||
IMULQ DI, SI
|
||||
XORQ SI, BX
|
||||
MOVQ DX, SI
|
||||
ADDQ $0x00000002, SI
|
||||
MOVQ $0x8c6f736011bd5127, DI
|
||||
IMULQ DI, SI
|
||||
XORQ SI, BP
|
||||
MOVQ 8(AX), SI
|
||||
MOVQ DX, DI
|
||||
ADDQ $0x00000001, DI
|
||||
MOVQ $0xb89b0f8e1655514f, R8
|
||||
IMULQ R8, DI
|
||||
XORQ DI, BX
|
||||
MOVQ DX, DI
|
||||
ADDQ $0x00000002, DI
|
||||
MOVQ $0x8c6f736011bd5127, R8
|
||||
IMULQ R8, DI
|
||||
XORQ DI, SI
|
||||
CMPQ DX, $0x00000020
|
||||
JGE coreLong
|
||||
MOVQ DX, SI
|
||||
SHRQ $0x03, SI
|
||||
CMPQ SI, $0x00000000
|
||||
MOVQ DX, DI
|
||||
SHRQ $0x03, DI
|
||||
CMPQ DI, $0x00000000
|
||||
JE shortCore0
|
||||
CMPQ SI, $0x00000001
|
||||
CMPQ DI, $0x00000001
|
||||
JE shortCore1
|
||||
CMPQ SI, $0x00000002
|
||||
CMPQ DI, $0x00000002
|
||||
JE shortCore2
|
||||
CMPQ SI, $0x00000003
|
||||
CMPQ DI, $0x00000003
|
||||
JE shortCore3
|
||||
|
||||
shortCore3:
|
||||
MOVQ (CX), AX
|
||||
MOVQ $0x9c1b8e1e9628323f, SI
|
||||
IMULQ SI, AX
|
||||
MOVQ $0x9c1b8e1e9628323f, DI
|
||||
IMULQ DI, AX
|
||||
ADDQ AX, BX
|
||||
RORQ $0x11, BX
|
||||
XORQ BP, BX
|
||||
RORQ $0x35, BP
|
||||
ADDQ BX, BP
|
||||
XORQ SI, BX
|
||||
RORQ $0x35, SI
|
||||
ADDQ BX, SI
|
||||
ADDQ $0x00000008, CX
|
||||
SUBQ $0x00000008, DX
|
||||
|
||||
shortCore2:
|
||||
MOVQ (CX), AX
|
||||
MOVQ $0x9c1b8e1e9628323f, SI
|
||||
IMULQ SI, AX
|
||||
MOVQ $0x9c1b8e1e9628323f, DI
|
||||
IMULQ DI, AX
|
||||
ADDQ AX, BX
|
||||
RORQ $0x11, BX
|
||||
XORQ BP, BX
|
||||
RORQ $0x35, BP
|
||||
ADDQ BX, BP
|
||||
XORQ SI, BX
|
||||
RORQ $0x35, SI
|
||||
ADDQ BX, SI
|
||||
ADDQ $0x00000008, CX
|
||||
SUBQ $0x00000008, DX
|
||||
|
||||
shortCore1:
|
||||
MOVQ (CX), AX
|
||||
MOVQ $0x9c1b8e1e9628323f, SI
|
||||
IMULQ SI, AX
|
||||
MOVQ $0x9c1b8e1e9628323f, DI
|
||||
IMULQ DI, AX
|
||||
ADDQ AX, BX
|
||||
RORQ $0x11, BX
|
||||
XORQ BP, BX
|
||||
RORQ $0x35, BP
|
||||
ADDQ BX, BP
|
||||
XORQ SI, BX
|
||||
RORQ $0x35, SI
|
||||
ADDQ BX, SI
|
||||
ADDQ $0x00000008, CX
|
||||
SUBQ $0x00000008, DX
|
||||
|
||||
@@ -94,7 +94,7 @@ shortTail7:
|
||||
shortTail6:
|
||||
MOVBQZX 5(CX), DX
|
||||
SHLQ $0x30, DX
|
||||
ADDQ DX, BP
|
||||
ADDQ DX, SI
|
||||
|
||||
shortTail5:
|
||||
MOVBQZX 4(CX), DX
|
||||
@@ -103,7 +103,7 @@ shortTail5:
|
||||
|
||||
shortTail4:
|
||||
MOVLQZX (CX), DX
|
||||
ADDQ DX, BP
|
||||
ADDQ DX, SI
|
||||
JMP shortAfter
|
||||
|
||||
shortTail3:
|
||||
@@ -113,7 +113,7 @@ shortTail3:
|
||||
|
||||
shortTail2:
|
||||
MOVWQZX (CX), DX
|
||||
ADDQ DX, BP
|
||||
ADDQ DX, SI
|
||||
JMP shortAfter
|
||||
|
||||
shortTail1:
|
||||
@@ -121,129 +121,129 @@ shortTail1:
|
||||
ADDQ DX, BX
|
||||
|
||||
shortTail0:
|
||||
RORQ $0x20, BP
|
||||
XORQ $0x000000ff, BP
|
||||
RORQ $0x20, SI
|
||||
XORQ $0x000000ff, SI
|
||||
|
||||
shortAfter:
|
||||
XORQ BX, BP
|
||||
XORQ BX, SI
|
||||
RORQ $0x21, BX
|
||||
ADDQ BP, BX
|
||||
ROLQ $0x11, BP
|
||||
XORQ BX, BP
|
||||
ADDQ SI, BX
|
||||
ROLQ $0x11, SI
|
||||
XORQ BX, SI
|
||||
ROLQ $0x2b, BX
|
||||
ADDQ BP, BX
|
||||
ROLQ $0x1f, BP
|
||||
SUBQ BX, BP
|
||||
ADDQ SI, BX
|
||||
ROLQ $0x1f, SI
|
||||
SUBQ BX, SI
|
||||
ROLQ $0x0d, BX
|
||||
XORQ BP, BX
|
||||
SUBQ BX, BP
|
||||
XORQ SI, BX
|
||||
SUBQ BX, SI
|
||||
ROLQ $0x29, BX
|
||||
ADDQ BP, BX
|
||||
ROLQ $0x25, BP
|
||||
XORQ BX, BP
|
||||
ADDQ SI, BX
|
||||
ROLQ $0x25, SI
|
||||
XORQ BX, SI
|
||||
RORQ $0x27, BX
|
||||
ADDQ BP, BX
|
||||
RORQ $0x0f, BP
|
||||
ADDQ BX, BP
|
||||
ADDQ SI, BX
|
||||
RORQ $0x0f, SI
|
||||
ADDQ BX, SI
|
||||
ROLQ $0x0f, BX
|
||||
XORQ BP, BX
|
||||
RORQ $0x05, BP
|
||||
XORQ BP, BX
|
||||
XORQ SI, BX
|
||||
RORQ $0x05, SI
|
||||
XORQ SI, BX
|
||||
MOVQ BX, ret+32(FP)
|
||||
RET
|
||||
|
||||
coreLong:
|
||||
MOVQ 16(AX), DI
|
||||
MOVQ 16(AX), R8
|
||||
MOVQ 24(AX), AX
|
||||
MOVQ DX, SI
|
||||
ADDQ $0x00000003, SI
|
||||
MOVQ $0x8f29bd94edce7b39, R8
|
||||
IMULQ R8, SI
|
||||
XORQ SI, DI
|
||||
MOVQ DX, SI
|
||||
ADDQ $0x00000004, SI
|
||||
MOVQ $0x9c1b8e1e9628323f, R8
|
||||
IMULQ R8, SI
|
||||
XORQ SI, AX
|
||||
MOVQ DX, DI
|
||||
ADDQ $0x00000003, DI
|
||||
MOVQ $0x8f29bd94edce7b39, R9
|
||||
IMULQ R9, DI
|
||||
XORQ DI, R8
|
||||
MOVQ DX, DI
|
||||
ADDQ $0x00000004, DI
|
||||
MOVQ $0x9c1b8e1e9628323f, R9
|
||||
IMULQ R9, DI
|
||||
XORQ DI, AX
|
||||
|
||||
block:
|
||||
MOVQ (CX), SI
|
||||
MOVQ $0x00000000802910e3, R8
|
||||
IMULQ R8, SI
|
||||
ADDQ SI, BX
|
||||
MOVQ (CX), DI
|
||||
MOVQ $0x00000000802910e3, R9
|
||||
IMULQ R9, DI
|
||||
ADDQ DI, BX
|
||||
ROLQ $0x39, BX
|
||||
XORQ AX, BX
|
||||
MOVQ 8(CX), SI
|
||||
MOVQ $0x00000000819b13af, R8
|
||||
IMULQ R8, SI
|
||||
ADDQ SI, BP
|
||||
ROLQ $0x3f, BP
|
||||
XORQ DI, BP
|
||||
MOVQ 16(CX), SI
|
||||
MOVQ $0x0000000091cb27e5, R8
|
||||
IMULQ R8, SI
|
||||
ADDQ SI, DI
|
||||
RORQ $0x2f, DI
|
||||
ADDQ BX, DI
|
||||
MOVQ 24(CX), SI
|
||||
MOVQ $0x00000000c1a269c1, R8
|
||||
IMULQ R8, SI
|
||||
ADDQ SI, AX
|
||||
MOVQ 8(CX), DI
|
||||
MOVQ $0x00000000819b13af, R9
|
||||
IMULQ R9, DI
|
||||
ADDQ DI, SI
|
||||
ROLQ $0x3f, SI
|
||||
XORQ R8, SI
|
||||
MOVQ 16(CX), DI
|
||||
MOVQ $0x0000000091cb27e5, R9
|
||||
IMULQ R9, DI
|
||||
ADDQ DI, R8
|
||||
RORQ $0x2f, R8
|
||||
ADDQ BX, R8
|
||||
MOVQ 24(CX), DI
|
||||
MOVQ $0x00000000c1a269c1, R9
|
||||
IMULQ R9, DI
|
||||
ADDQ DI, AX
|
||||
RORQ $0x0b, AX
|
||||
SUBQ BP, AX
|
||||
SUBQ SI, AX
|
||||
ADDQ $0x00000020, CX
|
||||
SUBQ $0x00000020, DX
|
||||
CMPQ DX, $0x00000020
|
||||
JGE block
|
||||
MOVQ DX, R8
|
||||
MOVQ DX, SI
|
||||
SHRQ $0x03, SI
|
||||
CMPQ SI, $0x00000000
|
||||
MOVQ DX, R9
|
||||
MOVQ DX, DI
|
||||
SHRQ $0x03, DI
|
||||
CMPQ DI, $0x00000000
|
||||
JE longCore0
|
||||
CMPQ SI, $0x00000001
|
||||
CMPQ DI, $0x00000001
|
||||
JE longCore1
|
||||
CMPQ SI, $0x00000002
|
||||
CMPQ DI, $0x00000002
|
||||
JE longCore2
|
||||
CMPQ SI, $0x00000003
|
||||
CMPQ DI, $0x00000003
|
||||
JE longCore3
|
||||
|
||||
longCore3:
|
||||
MOVQ (CX), SI
|
||||
MOVQ $0x00000000802910e3, R9
|
||||
IMULQ R9, SI
|
||||
ADDQ SI, BX
|
||||
MOVQ (CX), DI
|
||||
MOVQ $0x00000000802910e3, R10
|
||||
IMULQ R10, DI
|
||||
ADDQ DI, BX
|
||||
ROLQ $0x39, BX
|
||||
XORQ AX, BX
|
||||
ADDQ $0x00000008, CX
|
||||
SUBQ $0x00000008, DX
|
||||
|
||||
longCore2:
|
||||
MOVQ (CX), SI
|
||||
MOVQ $0x00000000819b13af, R9
|
||||
IMULQ R9, SI
|
||||
ADDQ SI, BP
|
||||
ROLQ $0x3f, BP
|
||||
XORQ DI, BP
|
||||
MOVQ (CX), DI
|
||||
MOVQ $0x00000000819b13af, R10
|
||||
IMULQ R10, DI
|
||||
ADDQ DI, SI
|
||||
ROLQ $0x3f, SI
|
||||
XORQ R8, SI
|
||||
ADDQ $0x00000008, CX
|
||||
SUBQ $0x00000008, DX
|
||||
|
||||
longCore1:
|
||||
MOVQ (CX), SI
|
||||
MOVQ $0x0000000091cb27e5, R9
|
||||
IMULQ R9, SI
|
||||
ADDQ SI, DI
|
||||
RORQ $0x2f, DI
|
||||
ADDQ BX, DI
|
||||
MOVQ (CX), DI
|
||||
MOVQ $0x0000000091cb27e5, R10
|
||||
IMULQ R10, DI
|
||||
ADDQ DI, R8
|
||||
RORQ $0x2f, R8
|
||||
ADDQ BX, R8
|
||||
ADDQ $0x00000008, CX
|
||||
SUBQ $0x00000008, DX
|
||||
|
||||
longCore0:
|
||||
RORQ $0x0b, AX
|
||||
SUBQ BP, AX
|
||||
ADDQ $0x00000001, R8
|
||||
MOVQ $0x9c1b8e1e9628323f, SI
|
||||
IMULQ SI, R8
|
||||
XORQ R8, BX
|
||||
SUBQ SI, AX
|
||||
ADDQ $0x00000001, R9
|
||||
MOVQ $0x9c1b8e1e9628323f, DI
|
||||
IMULQ DI, R9
|
||||
XORQ R9, BX
|
||||
CMPQ DX, $0x00000000
|
||||
JE longTail0
|
||||
CMPQ DX, $0x00000001
|
||||
@@ -263,22 +263,22 @@ longCore0:
|
||||
|
||||
longTail7:
|
||||
MOVBQZX 6(CX), DX
|
||||
ADDQ DX, BP
|
||||
ADDQ DX, SI
|
||||
|
||||
longTail6:
|
||||
MOVWQZX 4(CX), DX
|
||||
ADDQ DX, DI
|
||||
ADDQ DX, R8
|
||||
MOVLQZX (CX), DX
|
||||
ADDQ DX, AX
|
||||
JMP longAfter
|
||||
|
||||
longTail5:
|
||||
MOVBQZX 4(CX), DX
|
||||
ADDQ DX, BP
|
||||
ADDQ DX, SI
|
||||
|
||||
longTail4:
|
||||
MOVLQZX (CX), DX
|
||||
ADDQ DX, DI
|
||||
ADDQ DX, R8
|
||||
JMP longAfter
|
||||
|
||||
longTail3:
|
||||
@@ -287,52 +287,52 @@ longTail3:
|
||||
|
||||
longTail2:
|
||||
MOVWQZX (CX), DX
|
||||
ADDQ DX, BP
|
||||
ADDQ DX, SI
|
||||
JMP longAfter
|
||||
|
||||
longTail1:
|
||||
MOVBQZX (CX), DX
|
||||
ADDQ DX, DI
|
||||
ADDQ DX, R8
|
||||
|
||||
longTail0:
|
||||
ROLQ $0x20, AX
|
||||
XORQ $0x000000ff, AX
|
||||
|
||||
longAfter:
|
||||
SUBQ DI, BP
|
||||
SUBQ R8, SI
|
||||
RORQ $0x13, BX
|
||||
SUBQ BX, BP
|
||||
RORQ $0x35, BP
|
||||
XORQ BP, AX
|
||||
SUBQ BX, SI
|
||||
RORQ $0x35, SI
|
||||
XORQ SI, AX
|
||||
SUBQ AX, BX
|
||||
ROLQ $0x2b, AX
|
||||
ADDQ AX, BX
|
||||
RORQ $0x03, BX
|
||||
SUBQ BX, AX
|
||||
RORQ $0x2b, DI
|
||||
SUBQ AX, DI
|
||||
ROLQ $0x37, DI
|
||||
XORQ BX, DI
|
||||
SUBQ DI, BP
|
||||
RORQ $0x2b, R8
|
||||
SUBQ AX, R8
|
||||
ROLQ $0x37, R8
|
||||
XORQ BX, R8
|
||||
SUBQ R8, SI
|
||||
RORQ $0x07, AX
|
||||
SUBQ DI, AX
|
||||
RORQ $0x1f, DI
|
||||
ADDQ DI, AX
|
||||
SUBQ BP, DI
|
||||
SUBQ R8, AX
|
||||
RORQ $0x1f, R8
|
||||
ADDQ R8, AX
|
||||
SUBQ SI, R8
|
||||
RORQ $0x27, AX
|
||||
XORQ AX, DI
|
||||
XORQ AX, R8
|
||||
RORQ $0x11, AX
|
||||
XORQ DI, AX
|
||||
ADDQ AX, BP
|
||||
RORQ $0x09, BP
|
||||
XORQ BP, DI
|
||||
ROLQ $0x18, DI
|
||||
XORQ DI, AX
|
||||
XORQ R8, AX
|
||||
ADDQ AX, SI
|
||||
RORQ $0x09, SI
|
||||
XORQ SI, R8
|
||||
ROLQ $0x18, R8
|
||||
XORQ R8, AX
|
||||
RORQ $0x3b, AX
|
||||
RORQ $0x01, BX
|
||||
SUBQ BP, BX
|
||||
XORQ BP, BX
|
||||
XORQ AX, DI
|
||||
XORQ DI, BX
|
||||
SUBQ SI, BX
|
||||
XORQ SI, BX
|
||||
XORQ AX, R8
|
||||
XORQ R8, BX
|
||||
MOVQ BX, ret+32(FP)
|
||||
RET
|
||||
|
||||
25
pass/reg.go
25
pass/reg.go
@@ -74,7 +74,7 @@ func Liveness(fn *ir.Function) error {
|
||||
|
||||
// AllocateRegisters performs register allocation.
|
||||
func AllocateRegisters(fn *ir.Function) error {
|
||||
// Populate allocators (one per kind).
|
||||
// Initialize one allocator per kind.
|
||||
as := map[reg.Kind]*Allocator{}
|
||||
for _, i := range fn.Instructions() {
|
||||
for _, r := range i.Registers() {
|
||||
@@ -86,7 +86,28 @@ func AllocateRegisters(fn *ir.Function) error {
|
||||
}
|
||||
as[k] = a
|
||||
}
|
||||
as[k].Add(r.ID())
|
||||
}
|
||||
}
|
||||
|
||||
// De-prioritize the base pointer register. This can be used as a general
|
||||
// purpose register, but it's callee-save so needs to be saved/restored if
|
||||
// it is clobbered. For this reason we prefer to avoid using it unless
|
||||
// forced to by register pressure.
|
||||
for k, a := range as {
|
||||
f := reg.FamilyOfKind(k)
|
||||
for _, r := range f.Registers() {
|
||||
if (r.Info() & reg.BasePointer) != 0 {
|
||||
// Negative priority penalizes this register relative to all
|
||||
// others (having default zero priority).
|
||||
a.SetPriority(r.ID(), -1)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Populate registers to be allocated.
|
||||
for _, i := range fn.Instructions() {
|
||||
for _, r := range i.Registers() {
|
||||
as[r.Kind()].Add(r.ID())
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -106,6 +106,51 @@ func ConstructLiveness(t *testing.T, ctx *build.Context) *ir.Function {
|
||||
return BuildFunction(t, ctx, pass.LabelTarget, pass.CFG, pass.Liveness)
|
||||
}
|
||||
|
||||
func TestAllocateRegistersBasePointerDeprioritized(t *testing.T) {
|
||||
// Construct a function that requires n general-purpose registers all live
|
||||
// at once. Choose n to be the maximal possible number of registers without
|
||||
// touching the base pointer.
|
||||
n := 14
|
||||
|
||||
ctx := build.NewContext()
|
||||
ctx.Function("sum")
|
||||
ctx.SignatureExpr("func() uint64")
|
||||
|
||||
x := make([]reg.GPVirtual, n)
|
||||
for i := 0; i < n; i++ {
|
||||
x[i] = ctx.GP64()
|
||||
ctx.MOVQ(operand.U64(i), x[i])
|
||||
}
|
||||
|
||||
for i := 1; i < n; i++ {
|
||||
ctx.ADDQ(x[i], x[0])
|
||||
}
|
||||
|
||||
ctx.Store(x[0], ctx.ReturnIndex(0))
|
||||
ctx.RET()
|
||||
|
||||
// Build and compile the function up to register allocation.
|
||||
fn := BuildFunction(t, ctx, pass.LabelTarget, pass.CFG, pass.Liveness, pass.AllocateRegisters, pass.BindRegisters)
|
||||
|
||||
// Verify this function uses n registers, but not the base pointer.
|
||||
ps := map[reg.Physical]bool{}
|
||||
for _, i := range fn.Instructions() {
|
||||
for _, r := range i.OutputRegisters() {
|
||||
ps[reg.ToPhysical(r)] = true
|
||||
}
|
||||
}
|
||||
|
||||
if len(ps) != n {
|
||||
t.Fatalf("expected function to require %d registers", n)
|
||||
}
|
||||
|
||||
for p := range ps {
|
||||
if (p.Info() & reg.BasePointer) != 0 {
|
||||
t.Fatal("base pointer used")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestEnsureBasePointerCalleeSavedFrameless(t *testing.T) {
|
||||
// Construct a function that writes to the base pointer.
|
||||
ctx := build.NewContext()
|
||||
|
||||
@@ -8,17 +8,17 @@ TEXT ·GP8(SB), NOSPLIT, $8-1
|
||||
MOVB $0x02, CL
|
||||
MOVB $0x03, DL
|
||||
MOVB $0x04, BL
|
||||
MOVB $0x05, BP
|
||||
MOVB $0x06, SI
|
||||
MOVB $0x07, DI
|
||||
MOVB $0x08, R8
|
||||
MOVB $0x09, R9
|
||||
MOVB $0x0a, R10
|
||||
MOVB $0x0b, R11
|
||||
MOVB $0x0c, R12
|
||||
MOVB $0x0d, R13
|
||||
MOVB $0x0e, R14
|
||||
MOVB $0x0f, R15
|
||||
MOVB $0x05, SI
|
||||
MOVB $0x06, DI
|
||||
MOVB $0x07, R8
|
||||
MOVB $0x08, R9
|
||||
MOVB $0x09, R10
|
||||
MOVB $0x0a, R11
|
||||
MOVB $0x0b, R12
|
||||
MOVB $0x0c, R13
|
||||
MOVB $0x0d, R14
|
||||
MOVB $0x0e, R15
|
||||
MOVB $0x0f, BP
|
||||
MOVB $0x10, AH
|
||||
MOVB $0x11, CH
|
||||
MOVB $0x12, DH
|
||||
@@ -26,7 +26,6 @@ TEXT ·GP8(SB), NOSPLIT, $8-1
|
||||
ADDB CL, AL
|
||||
ADDB DL, AL
|
||||
ADDB BL, AL
|
||||
ADDB BP, AL
|
||||
ADDB SI, AL
|
||||
ADDB DI, AL
|
||||
ADDB R8, AL
|
||||
@@ -37,6 +36,7 @@ TEXT ·GP8(SB), NOSPLIT, $8-1
|
||||
ADDB R13, AL
|
||||
ADDB R14, AL
|
||||
ADDB R15, AL
|
||||
ADDB BP, AL
|
||||
ADDB AH, AL
|
||||
ADDB CH, AL
|
||||
ADDB DH, AL
|
||||
|
||||
@@ -8,36 +8,35 @@ TEXT ·Masks(SB), NOSPLIT, $8-16
|
||||
MOVQ $0x0002002a, CX
|
||||
MOVQ $0x0003002a, DX
|
||||
MOVQ $0x0004002a, BX
|
||||
MOVQ $0x0005002a, BP
|
||||
MOVQ $0x0006002a, SI
|
||||
MOVQ $0x0007002a, DI
|
||||
MOVQ $0x0008002a, R8
|
||||
MOVQ $0x0009002a, R9
|
||||
MOVQ $0x000a002a, R10
|
||||
MOVQ $0x000b002a, R11
|
||||
MOVQ $0x000c002a, R12
|
||||
MOVQ $0x000d002a, R13
|
||||
MOVQ $0x000e002a, R14
|
||||
MOVQ $0x000f002a, R15
|
||||
MOVQ $0x0005002a, SI
|
||||
MOVQ $0x0006002a, DI
|
||||
MOVQ $0x0007002a, R8
|
||||
MOVQ $0x0008002a, R9
|
||||
MOVQ $0x0009002a, R10
|
||||
MOVQ $0x000a002a, R11
|
||||
MOVQ $0x000b002a, R12
|
||||
MOVQ $0x000c002a, R13
|
||||
MOVQ $0x000d002a, R14
|
||||
MOVQ $0x000e002a, R15
|
||||
MOVQ $0x000f002a, BP
|
||||
MOVW $0x0001, AX
|
||||
MOVW $0x0002, CX
|
||||
MOVW $0x0003, DX
|
||||
MOVW $0x0004, BX
|
||||
MOVW $0x0005, BP
|
||||
MOVW $0x0006, SI
|
||||
MOVW $0x0007, DI
|
||||
MOVW $0x0008, R8
|
||||
MOVW $0x0009, R9
|
||||
MOVW $0x000a, R10
|
||||
MOVW $0x000b, R11
|
||||
MOVW $0x000c, R12
|
||||
MOVW $0x000d, R13
|
||||
MOVW $0x000e, R14
|
||||
MOVW $0x000f, R15
|
||||
MOVW $0x0005, SI
|
||||
MOVW $0x0006, DI
|
||||
MOVW $0x0007, R8
|
||||
MOVW $0x0008, R9
|
||||
MOVW $0x0009, R10
|
||||
MOVW $0x000a, R11
|
||||
MOVW $0x000b, R12
|
||||
MOVW $0x000c, R13
|
||||
MOVW $0x000d, R14
|
||||
MOVW $0x000e, R15
|
||||
MOVW $0x000f, BP
|
||||
ADDW CX, AX
|
||||
ADDW DX, AX
|
||||
ADDW BX, AX
|
||||
ADDW BP, AX
|
||||
ADDW SI, AX
|
||||
ADDW DI, AX
|
||||
ADDW R8, AX
|
||||
@@ -48,12 +47,12 @@ TEXT ·Masks(SB), NOSPLIT, $8-16
|
||||
ADDW R13, AX
|
||||
ADDW R14, AX
|
||||
ADDW R15, AX
|
||||
ADDW BP, AX
|
||||
MOVW AX, ret+0(FP)
|
||||
MOVW $0x0000, AX
|
||||
MOVW $0x0000, CX
|
||||
MOVW $0x0000, DX
|
||||
MOVW $0x0000, BX
|
||||
MOVW $0x0000, BP
|
||||
MOVW $0x0000, SI
|
||||
MOVW $0x0000, DI
|
||||
MOVW $0x0000, R8
|
||||
@@ -64,10 +63,10 @@ TEXT ·Masks(SB), NOSPLIT, $8-16
|
||||
MOVW $0x0000, R13
|
||||
MOVW $0x0000, R14
|
||||
MOVW $0x0000, R15
|
||||
MOVW $0x0000, BP
|
||||
ADDQ CX, AX
|
||||
ADDQ DX, AX
|
||||
ADDQ BX, AX
|
||||
ADDQ BP, AX
|
||||
ADDQ SI, AX
|
||||
ADDQ DI, AX
|
||||
ADDQ R8, AX
|
||||
@@ -78,6 +77,7 @@ TEXT ·Masks(SB), NOSPLIT, $8-16
|
||||
ADDQ R13, AX
|
||||
ADDQ R14, AX
|
||||
ADDQ R15, AX
|
||||
ADDQ BP, AX
|
||||
SHRQ $0x10, AX
|
||||
MOVQ AX, ret1+8(FP)
|
||||
RET
|
||||
|
||||
@@ -11,7 +11,6 @@ TEXT ·Upper32(SB), NOSPLIT, $8-8
|
||||
MOVQ $0x9e77d78aacb8cbcc, CX
|
||||
MOVQ $0x9e77d78aacb8cbcc, DX
|
||||
MOVQ $0x9e77d78aacb8cbcc, BX
|
||||
MOVQ $0x9e77d78aacb8cbcc, BP
|
||||
MOVQ $0x9e77d78aacb8cbcc, SI
|
||||
MOVQ $0x9e77d78aacb8cbcc, DI
|
||||
MOVQ $0x9e77d78aacb8cbcc, R8
|
||||
@@ -22,10 +21,10 @@ TEXT ·Upper32(SB), NOSPLIT, $8-8
|
||||
MOVQ $0x9e77d78aacb8cbcc, R13
|
||||
MOVQ $0x9e77d78aacb8cbcc, R14
|
||||
MOVQ $0x9e77d78aacb8cbcc, R15
|
||||
MOVQ $0x9e77d78aacb8cbcc, BP
|
||||
MOVQ $0x9e77d78aacb8cbcc, CX
|
||||
MOVQ $0x9e77d78aacb8cbcc, DX
|
||||
MOVQ $0x9e77d78aacb8cbcc, BX
|
||||
MOVQ $0x9e77d78aacb8cbcc, BP
|
||||
MOVQ $0x9e77d78aacb8cbcc, SI
|
||||
MOVQ $0x9e77d78aacb8cbcc, DI
|
||||
MOVQ $0x9e77d78aacb8cbcc, R8
|
||||
@@ -36,10 +35,10 @@ TEXT ·Upper32(SB), NOSPLIT, $8-8
|
||||
MOVQ $0x9e77d78aacb8cbcc, R13
|
||||
MOVQ $0x9e77d78aacb8cbcc, R14
|
||||
MOVQ $0x9e77d78aacb8cbcc, R15
|
||||
MOVQ $0x9e77d78aacb8cbcc, BP
|
||||
MOVQ $0x9e77d78aacb8cbcc, CX
|
||||
MOVQ $0x9e77d78aacb8cbcc, DX
|
||||
MOVQ $0x9e77d78aacb8cbcc, BX
|
||||
MOVQ $0x9e77d78aacb8cbcc, BP
|
||||
MOVQ $0x9e77d78aacb8cbcc, SI
|
||||
MOVQ $0x9e77d78aacb8cbcc, DI
|
||||
MOVQ $0x9e77d78aacb8cbcc, R8
|
||||
@@ -50,26 +49,26 @@ TEXT ·Upper32(SB), NOSPLIT, $8-8
|
||||
MOVQ $0x9e77d78aacb8cbcc, R13
|
||||
MOVQ $0x9e77d78aacb8cbcc, R14
|
||||
MOVQ $0x9e77d78aacb8cbcc, R15
|
||||
MOVQ $0x9e77d78aacb8cbcc, BP
|
||||
|
||||
// Iteration 1.
|
||||
MOVL $0x00000001, CX
|
||||
MOVL $0x00000002, DX
|
||||
MOVL $0x00000003, BX
|
||||
MOVL $0x00000004, BP
|
||||
MOVL $0x00000005, SI
|
||||
MOVL $0x00000006, DI
|
||||
MOVL $0x00000007, R8
|
||||
MOVL $0x00000008, R9
|
||||
MOVL $0x00000009, R10
|
||||
MOVL $0x0000000a, R11
|
||||
MOVL $0x0000000b, R12
|
||||
MOVL $0x0000000c, R13
|
||||
MOVL $0x0000000d, R14
|
||||
MOVL $0x0000000e, R15
|
||||
MOVL $0x00000004, SI
|
||||
MOVL $0x00000005, DI
|
||||
MOVL $0x00000006, R8
|
||||
MOVL $0x00000007, R9
|
||||
MOVL $0x00000008, R10
|
||||
MOVL $0x00000009, R11
|
||||
MOVL $0x0000000a, R12
|
||||
MOVL $0x0000000b, R13
|
||||
MOVL $0x0000000c, R14
|
||||
MOVL $0x0000000d, R15
|
||||
MOVL $0x0000000e, BP
|
||||
ADDQ CX, AX
|
||||
ADDQ DX, AX
|
||||
ADDQ BX, AX
|
||||
ADDQ BP, AX
|
||||
ADDQ SI, AX
|
||||
ADDQ DI, AX
|
||||
ADDQ R8, AX
|
||||
@@ -80,26 +79,26 @@ TEXT ·Upper32(SB), NOSPLIT, $8-8
|
||||
ADDQ R13, AX
|
||||
ADDQ R14, AX
|
||||
ADDQ R15, AX
|
||||
ADDQ BP, AX
|
||||
|
||||
// Iteration 2.
|
||||
MOVL $0x0000000f, CX
|
||||
MOVL $0x00000010, DX
|
||||
MOVL $0x00000011, BX
|
||||
MOVL $0x00000012, BP
|
||||
MOVL $0x00000013, SI
|
||||
MOVL $0x00000014, DI
|
||||
MOVL $0x00000015, R8
|
||||
MOVL $0x00000016, R9
|
||||
MOVL $0x00000017, R10
|
||||
MOVL $0x00000018, R11
|
||||
MOVL $0x00000019, R12
|
||||
MOVL $0x0000001a, R13
|
||||
MOVL $0x0000001b, R14
|
||||
MOVL $0x0000001c, R15
|
||||
MOVL $0x00000012, SI
|
||||
MOVL $0x00000013, DI
|
||||
MOVL $0x00000014, R8
|
||||
MOVL $0x00000015, R9
|
||||
MOVL $0x00000016, R10
|
||||
MOVL $0x00000017, R11
|
||||
MOVL $0x00000018, R12
|
||||
MOVL $0x00000019, R13
|
||||
MOVL $0x0000001a, R14
|
||||
MOVL $0x0000001b, R15
|
||||
MOVL $0x0000001c, BP
|
||||
ADDQ CX, AX
|
||||
ADDQ DX, AX
|
||||
ADDQ BX, AX
|
||||
ADDQ BP, AX
|
||||
ADDQ SI, AX
|
||||
ADDQ DI, AX
|
||||
ADDQ R8, AX
|
||||
@@ -110,26 +109,26 @@ TEXT ·Upper32(SB), NOSPLIT, $8-8
|
||||
ADDQ R13, AX
|
||||
ADDQ R14, AX
|
||||
ADDQ R15, AX
|
||||
ADDQ BP, AX
|
||||
|
||||
// Iteration 3.
|
||||
MOVL $0x0000001d, CX
|
||||
MOVL $0x0000001e, DX
|
||||
MOVL $0x0000001f, BX
|
||||
MOVL $0x00000020, BP
|
||||
MOVL $0x00000021, SI
|
||||
MOVL $0x00000022, DI
|
||||
MOVL $0x00000023, R8
|
||||
MOVL $0x00000024, R9
|
||||
MOVL $0x00000025, R10
|
||||
MOVL $0x00000026, R11
|
||||
MOVL $0x00000027, R12
|
||||
MOVL $0x00000028, R13
|
||||
MOVL $0x00000029, R14
|
||||
MOVL $0x0000002a, R15
|
||||
MOVL $0x00000020, SI
|
||||
MOVL $0x00000021, DI
|
||||
MOVL $0x00000022, R8
|
||||
MOVL $0x00000023, R9
|
||||
MOVL $0x00000024, R10
|
||||
MOVL $0x00000025, R11
|
||||
MOVL $0x00000026, R12
|
||||
MOVL $0x00000027, R13
|
||||
MOVL $0x00000028, R14
|
||||
MOVL $0x00000029, R15
|
||||
MOVL $0x0000002a, BP
|
||||
ADDQ CX, AX
|
||||
ADDQ DX, AX
|
||||
ADDQ BX, AX
|
||||
ADDQ BP, AX
|
||||
ADDQ SI, AX
|
||||
ADDQ DI, AX
|
||||
ADDQ R8, AX
|
||||
@@ -140,6 +139,7 @@ TEXT ·Upper32(SB), NOSPLIT, $8-8
|
||||
ADDQ R13, AX
|
||||
ADDQ R14, AX
|
||||
ADDQ R15, AX
|
||||
ADDQ BP, AX
|
||||
|
||||
// Store result and return.
|
||||
MOVQ AX, ret+0(FP)
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user