pass: de-prioritize base pointer in register allocation (#184)

Updates #156
This commit is contained in:
Michael McLoughlin
2021-04-18 19:22:09 -07:00
committed by GitHub
parent f295bde84c
commit c32f24fb1e
9 changed files with 5273 additions and 5207 deletions

View File

@@ -3,18 +3,18 @@
#include "textflag.h"
// func Hash64(data []byte) uint64
TEXT ·Hash64(SB), NOSPLIT, $8-32
TEXT ·Hash64(SB), NOSPLIT, $0-32
MOVQ data_base+0(FP), CX
MOVQ data_len+8(FP), BX
MOVQ $0xcbf29ce484222325, AX
MOVQ $0x00000100000001b3, BP
MOVQ $0x00000100000001b3, SI
loop:
CMPQ BX, $0x00
JE done
MOVBQZX (CX), DX
XORQ DX, AX
MULQ BP
MULQ SI
INCQ CX
DECQ BX
JMP loop

File diff suppressed because it is too large Load Diff

View File

@@ -3,68 +3,68 @@
#include "textflag.h"
// func Hash(state *State, key []byte) uint64
TEXT ·Hash(SB), NOSPLIT, $8-40
TEXT ·Hash(SB), NOSPLIT, $0-40
MOVQ state+0(FP), AX
MOVQ key_base+8(FP), CX
MOVQ key_len+16(FP), DX
MOVQ (AX), BX
MOVQ 8(AX), BP
MOVQ DX, SI
ADDQ $0x00000001, SI
MOVQ $0xb89b0f8e1655514f, DI
IMULQ DI, SI
XORQ SI, BX
MOVQ DX, SI
ADDQ $0x00000002, SI
MOVQ $0x8c6f736011bd5127, DI
IMULQ DI, SI
XORQ SI, BP
MOVQ 8(AX), SI
MOVQ DX, DI
ADDQ $0x00000001, DI
MOVQ $0xb89b0f8e1655514f, R8
IMULQ R8, DI
XORQ DI, BX
MOVQ DX, DI
ADDQ $0x00000002, DI
MOVQ $0x8c6f736011bd5127, R8
IMULQ R8, DI
XORQ DI, SI
CMPQ DX, $0x00000020
JGE coreLong
MOVQ DX, SI
SHRQ $0x03, SI
CMPQ SI, $0x00000000
MOVQ DX, DI
SHRQ $0x03, DI
CMPQ DI, $0x00000000
JE shortCore0
CMPQ SI, $0x00000001
CMPQ DI, $0x00000001
JE shortCore1
CMPQ SI, $0x00000002
CMPQ DI, $0x00000002
JE shortCore2
CMPQ SI, $0x00000003
CMPQ DI, $0x00000003
JE shortCore3
shortCore3:
MOVQ (CX), AX
MOVQ $0x9c1b8e1e9628323f, SI
IMULQ SI, AX
MOVQ $0x9c1b8e1e9628323f, DI
IMULQ DI, AX
ADDQ AX, BX
RORQ $0x11, BX
XORQ BP, BX
RORQ $0x35, BP
ADDQ BX, BP
XORQ SI, BX
RORQ $0x35, SI
ADDQ BX, SI
ADDQ $0x00000008, CX
SUBQ $0x00000008, DX
shortCore2:
MOVQ (CX), AX
MOVQ $0x9c1b8e1e9628323f, SI
IMULQ SI, AX
MOVQ $0x9c1b8e1e9628323f, DI
IMULQ DI, AX
ADDQ AX, BX
RORQ $0x11, BX
XORQ BP, BX
RORQ $0x35, BP
ADDQ BX, BP
XORQ SI, BX
RORQ $0x35, SI
ADDQ BX, SI
ADDQ $0x00000008, CX
SUBQ $0x00000008, DX
shortCore1:
MOVQ (CX), AX
MOVQ $0x9c1b8e1e9628323f, SI
IMULQ SI, AX
MOVQ $0x9c1b8e1e9628323f, DI
IMULQ DI, AX
ADDQ AX, BX
RORQ $0x11, BX
XORQ BP, BX
RORQ $0x35, BP
ADDQ BX, BP
XORQ SI, BX
RORQ $0x35, SI
ADDQ BX, SI
ADDQ $0x00000008, CX
SUBQ $0x00000008, DX
@@ -94,7 +94,7 @@ shortTail7:
shortTail6:
MOVBQZX 5(CX), DX
SHLQ $0x30, DX
ADDQ DX, BP
ADDQ DX, SI
shortTail5:
MOVBQZX 4(CX), DX
@@ -103,7 +103,7 @@ shortTail5:
shortTail4:
MOVLQZX (CX), DX
ADDQ DX, BP
ADDQ DX, SI
JMP shortAfter
shortTail3:
@@ -113,7 +113,7 @@ shortTail3:
shortTail2:
MOVWQZX (CX), DX
ADDQ DX, BP
ADDQ DX, SI
JMP shortAfter
shortTail1:
@@ -121,129 +121,129 @@ shortTail1:
ADDQ DX, BX
shortTail0:
RORQ $0x20, BP
XORQ $0x000000ff, BP
RORQ $0x20, SI
XORQ $0x000000ff, SI
shortAfter:
XORQ BX, BP
XORQ BX, SI
RORQ $0x21, BX
ADDQ BP, BX
ROLQ $0x11, BP
XORQ BX, BP
ADDQ SI, BX
ROLQ $0x11, SI
XORQ BX, SI
ROLQ $0x2b, BX
ADDQ BP, BX
ROLQ $0x1f, BP
SUBQ BX, BP
ADDQ SI, BX
ROLQ $0x1f, SI
SUBQ BX, SI
ROLQ $0x0d, BX
XORQ BP, BX
SUBQ BX, BP
XORQ SI, BX
SUBQ BX, SI
ROLQ $0x29, BX
ADDQ BP, BX
ROLQ $0x25, BP
XORQ BX, BP
ADDQ SI, BX
ROLQ $0x25, SI
XORQ BX, SI
RORQ $0x27, BX
ADDQ BP, BX
RORQ $0x0f, BP
ADDQ BX, BP
ADDQ SI, BX
RORQ $0x0f, SI
ADDQ BX, SI
ROLQ $0x0f, BX
XORQ BP, BX
RORQ $0x05, BP
XORQ BP, BX
XORQ SI, BX
RORQ $0x05, SI
XORQ SI, BX
MOVQ BX, ret+32(FP)
RET
coreLong:
MOVQ 16(AX), DI
MOVQ 16(AX), R8
MOVQ 24(AX), AX
MOVQ DX, SI
ADDQ $0x00000003, SI
MOVQ $0x8f29bd94edce7b39, R8
IMULQ R8, SI
XORQ SI, DI
MOVQ DX, SI
ADDQ $0x00000004, SI
MOVQ $0x9c1b8e1e9628323f, R8
IMULQ R8, SI
XORQ SI, AX
MOVQ DX, DI
ADDQ $0x00000003, DI
MOVQ $0x8f29bd94edce7b39, R9
IMULQ R9, DI
XORQ DI, R8
MOVQ DX, DI
ADDQ $0x00000004, DI
MOVQ $0x9c1b8e1e9628323f, R9
IMULQ R9, DI
XORQ DI, AX
block:
MOVQ (CX), SI
MOVQ $0x00000000802910e3, R8
IMULQ R8, SI
ADDQ SI, BX
MOVQ (CX), DI
MOVQ $0x00000000802910e3, R9
IMULQ R9, DI
ADDQ DI, BX
ROLQ $0x39, BX
XORQ AX, BX
MOVQ 8(CX), SI
MOVQ $0x00000000819b13af, R8
IMULQ R8, SI
ADDQ SI, BP
ROLQ $0x3f, BP
XORQ DI, BP
MOVQ 16(CX), SI
MOVQ $0x0000000091cb27e5, R8
IMULQ R8, SI
ADDQ SI, DI
RORQ $0x2f, DI
ADDQ BX, DI
MOVQ 24(CX), SI
MOVQ $0x00000000c1a269c1, R8
IMULQ R8, SI
ADDQ SI, AX
MOVQ 8(CX), DI
MOVQ $0x00000000819b13af, R9
IMULQ R9, DI
ADDQ DI, SI
ROLQ $0x3f, SI
XORQ R8, SI
MOVQ 16(CX), DI
MOVQ $0x0000000091cb27e5, R9
IMULQ R9, DI
ADDQ DI, R8
RORQ $0x2f, R8
ADDQ BX, R8
MOVQ 24(CX), DI
MOVQ $0x00000000c1a269c1, R9
IMULQ R9, DI
ADDQ DI, AX
RORQ $0x0b, AX
SUBQ BP, AX
SUBQ SI, AX
ADDQ $0x00000020, CX
SUBQ $0x00000020, DX
CMPQ DX, $0x00000020
JGE block
MOVQ DX, R8
MOVQ DX, SI
SHRQ $0x03, SI
CMPQ SI, $0x00000000
MOVQ DX, R9
MOVQ DX, DI
SHRQ $0x03, DI
CMPQ DI, $0x00000000
JE longCore0
CMPQ SI, $0x00000001
CMPQ DI, $0x00000001
JE longCore1
CMPQ SI, $0x00000002
CMPQ DI, $0x00000002
JE longCore2
CMPQ SI, $0x00000003
CMPQ DI, $0x00000003
JE longCore3
longCore3:
MOVQ (CX), SI
MOVQ $0x00000000802910e3, R9
IMULQ R9, SI
ADDQ SI, BX
MOVQ (CX), DI
MOVQ $0x00000000802910e3, R10
IMULQ R10, DI
ADDQ DI, BX
ROLQ $0x39, BX
XORQ AX, BX
ADDQ $0x00000008, CX
SUBQ $0x00000008, DX
longCore2:
MOVQ (CX), SI
MOVQ $0x00000000819b13af, R9
IMULQ R9, SI
ADDQ SI, BP
ROLQ $0x3f, BP
XORQ DI, BP
MOVQ (CX), DI
MOVQ $0x00000000819b13af, R10
IMULQ R10, DI
ADDQ DI, SI
ROLQ $0x3f, SI
XORQ R8, SI
ADDQ $0x00000008, CX
SUBQ $0x00000008, DX
longCore1:
MOVQ (CX), SI
MOVQ $0x0000000091cb27e5, R9
IMULQ R9, SI
ADDQ SI, DI
RORQ $0x2f, DI
ADDQ BX, DI
MOVQ (CX), DI
MOVQ $0x0000000091cb27e5, R10
IMULQ R10, DI
ADDQ DI, R8
RORQ $0x2f, R8
ADDQ BX, R8
ADDQ $0x00000008, CX
SUBQ $0x00000008, DX
longCore0:
RORQ $0x0b, AX
SUBQ BP, AX
ADDQ $0x00000001, R8
MOVQ $0x9c1b8e1e9628323f, SI
IMULQ SI, R8
XORQ R8, BX
SUBQ SI, AX
ADDQ $0x00000001, R9
MOVQ $0x9c1b8e1e9628323f, DI
IMULQ DI, R9
XORQ R9, BX
CMPQ DX, $0x00000000
JE longTail0
CMPQ DX, $0x00000001
@@ -263,22 +263,22 @@ longCore0:
longTail7:
MOVBQZX 6(CX), DX
ADDQ DX, BP
ADDQ DX, SI
longTail6:
MOVWQZX 4(CX), DX
ADDQ DX, DI
ADDQ DX, R8
MOVLQZX (CX), DX
ADDQ DX, AX
JMP longAfter
longTail5:
MOVBQZX 4(CX), DX
ADDQ DX, BP
ADDQ DX, SI
longTail4:
MOVLQZX (CX), DX
ADDQ DX, DI
ADDQ DX, R8
JMP longAfter
longTail3:
@@ -287,52 +287,52 @@ longTail3:
longTail2:
MOVWQZX (CX), DX
ADDQ DX, BP
ADDQ DX, SI
JMP longAfter
longTail1:
MOVBQZX (CX), DX
ADDQ DX, DI
ADDQ DX, R8
longTail0:
ROLQ $0x20, AX
XORQ $0x000000ff, AX
longAfter:
SUBQ DI, BP
SUBQ R8, SI
RORQ $0x13, BX
SUBQ BX, BP
RORQ $0x35, BP
XORQ BP, AX
SUBQ BX, SI
RORQ $0x35, SI
XORQ SI, AX
SUBQ AX, BX
ROLQ $0x2b, AX
ADDQ AX, BX
RORQ $0x03, BX
SUBQ BX, AX
RORQ $0x2b, DI
SUBQ AX, DI
ROLQ $0x37, DI
XORQ BX, DI
SUBQ DI, BP
RORQ $0x2b, R8
SUBQ AX, R8
ROLQ $0x37, R8
XORQ BX, R8
SUBQ R8, SI
RORQ $0x07, AX
SUBQ DI, AX
RORQ $0x1f, DI
ADDQ DI, AX
SUBQ BP, DI
SUBQ R8, AX
RORQ $0x1f, R8
ADDQ R8, AX
SUBQ SI, R8
RORQ $0x27, AX
XORQ AX, DI
XORQ AX, R8
RORQ $0x11, AX
XORQ DI, AX
ADDQ AX, BP
RORQ $0x09, BP
XORQ BP, DI
ROLQ $0x18, DI
XORQ DI, AX
XORQ R8, AX
ADDQ AX, SI
RORQ $0x09, SI
XORQ SI, R8
ROLQ $0x18, R8
XORQ R8, AX
RORQ $0x3b, AX
RORQ $0x01, BX
SUBQ BP, BX
XORQ BP, BX
XORQ AX, DI
XORQ DI, BX
SUBQ SI, BX
XORQ SI, BX
XORQ AX, R8
XORQ R8, BX
MOVQ BX, ret+32(FP)
RET