pass: de-prioritize base pointer in register allocation (#184)

Updates #156
This commit is contained in:
Michael McLoughlin
2021-04-18 19:22:09 -07:00
committed by GitHub
parent f295bde84c
commit c32f24fb1e
9 changed files with 5273 additions and 5207 deletions

View File

@@ -3,68 +3,68 @@
#include "textflag.h"
// func Hash(state *State, key []byte) uint64
TEXT ·Hash(SB), NOSPLIT, $8-40
TEXT ·Hash(SB), NOSPLIT, $0-40
MOVQ state+0(FP), AX
MOVQ key_base+8(FP), CX
MOVQ key_len+16(FP), DX
MOVQ (AX), BX
MOVQ 8(AX), BP
MOVQ DX, SI
ADDQ $0x00000001, SI
MOVQ $0xb89b0f8e1655514f, DI
IMULQ DI, SI
XORQ SI, BX
MOVQ DX, SI
ADDQ $0x00000002, SI
MOVQ $0x8c6f736011bd5127, DI
IMULQ DI, SI
XORQ SI, BP
MOVQ 8(AX), SI
MOVQ DX, DI
ADDQ $0x00000001, DI
MOVQ $0xb89b0f8e1655514f, R8
IMULQ R8, DI
XORQ DI, BX
MOVQ DX, DI
ADDQ $0x00000002, DI
MOVQ $0x8c6f736011bd5127, R8
IMULQ R8, DI
XORQ DI, SI
CMPQ DX, $0x00000020
JGE coreLong
MOVQ DX, SI
SHRQ $0x03, SI
CMPQ SI, $0x00000000
MOVQ DX, DI
SHRQ $0x03, DI
CMPQ DI, $0x00000000
JE shortCore0
CMPQ SI, $0x00000001
CMPQ DI, $0x00000001
JE shortCore1
CMPQ SI, $0x00000002
CMPQ DI, $0x00000002
JE shortCore2
CMPQ SI, $0x00000003
CMPQ DI, $0x00000003
JE shortCore3
shortCore3:
MOVQ (CX), AX
MOVQ $0x9c1b8e1e9628323f, SI
IMULQ SI, AX
MOVQ $0x9c1b8e1e9628323f, DI
IMULQ DI, AX
ADDQ AX, BX
RORQ $0x11, BX
XORQ BP, BX
RORQ $0x35, BP
ADDQ BX, BP
XORQ SI, BX
RORQ $0x35, SI
ADDQ BX, SI
ADDQ $0x00000008, CX
SUBQ $0x00000008, DX
shortCore2:
MOVQ (CX), AX
MOVQ $0x9c1b8e1e9628323f, SI
IMULQ SI, AX
MOVQ $0x9c1b8e1e9628323f, DI
IMULQ DI, AX
ADDQ AX, BX
RORQ $0x11, BX
XORQ BP, BX
RORQ $0x35, BP
ADDQ BX, BP
XORQ SI, BX
RORQ $0x35, SI
ADDQ BX, SI
ADDQ $0x00000008, CX
SUBQ $0x00000008, DX
shortCore1:
MOVQ (CX), AX
MOVQ $0x9c1b8e1e9628323f, SI
IMULQ SI, AX
MOVQ $0x9c1b8e1e9628323f, DI
IMULQ DI, AX
ADDQ AX, BX
RORQ $0x11, BX
XORQ BP, BX
RORQ $0x35, BP
ADDQ BX, BP
XORQ SI, BX
RORQ $0x35, SI
ADDQ BX, SI
ADDQ $0x00000008, CX
SUBQ $0x00000008, DX
@@ -94,7 +94,7 @@ shortTail7:
shortTail6:
MOVBQZX 5(CX), DX
SHLQ $0x30, DX
ADDQ DX, BP
ADDQ DX, SI
shortTail5:
MOVBQZX 4(CX), DX
@@ -103,7 +103,7 @@ shortTail5:
shortTail4:
MOVLQZX (CX), DX
ADDQ DX, BP
ADDQ DX, SI
JMP shortAfter
shortTail3:
@@ -113,7 +113,7 @@ shortTail3:
shortTail2:
MOVWQZX (CX), DX
ADDQ DX, BP
ADDQ DX, SI
JMP shortAfter
shortTail1:
@@ -121,129 +121,129 @@ shortTail1:
ADDQ DX, BX
shortTail0:
RORQ $0x20, BP
XORQ $0x000000ff, BP
RORQ $0x20, SI
XORQ $0x000000ff, SI
shortAfter:
XORQ BX, BP
XORQ BX, SI
RORQ $0x21, BX
ADDQ BP, BX
ROLQ $0x11, BP
XORQ BX, BP
ADDQ SI, BX
ROLQ $0x11, SI
XORQ BX, SI
ROLQ $0x2b, BX
ADDQ BP, BX
ROLQ $0x1f, BP
SUBQ BX, BP
ADDQ SI, BX
ROLQ $0x1f, SI
SUBQ BX, SI
ROLQ $0x0d, BX
XORQ BP, BX
SUBQ BX, BP
XORQ SI, BX
SUBQ BX, SI
ROLQ $0x29, BX
ADDQ BP, BX
ROLQ $0x25, BP
XORQ BX, BP
ADDQ SI, BX
ROLQ $0x25, SI
XORQ BX, SI
RORQ $0x27, BX
ADDQ BP, BX
RORQ $0x0f, BP
ADDQ BX, BP
ADDQ SI, BX
RORQ $0x0f, SI
ADDQ BX, SI
ROLQ $0x0f, BX
XORQ BP, BX
RORQ $0x05, BP
XORQ BP, BX
XORQ SI, BX
RORQ $0x05, SI
XORQ SI, BX
MOVQ BX, ret+32(FP)
RET
coreLong:
MOVQ 16(AX), DI
MOVQ 16(AX), R8
MOVQ 24(AX), AX
MOVQ DX, SI
ADDQ $0x00000003, SI
MOVQ $0x8f29bd94edce7b39, R8
IMULQ R8, SI
XORQ SI, DI
MOVQ DX, SI
ADDQ $0x00000004, SI
MOVQ $0x9c1b8e1e9628323f, R8
IMULQ R8, SI
XORQ SI, AX
MOVQ DX, DI
ADDQ $0x00000003, DI
MOVQ $0x8f29bd94edce7b39, R9
IMULQ R9, DI
XORQ DI, R8
MOVQ DX, DI
ADDQ $0x00000004, DI
MOVQ $0x9c1b8e1e9628323f, R9
IMULQ R9, DI
XORQ DI, AX
block:
MOVQ (CX), SI
MOVQ $0x00000000802910e3, R8
IMULQ R8, SI
ADDQ SI, BX
MOVQ (CX), DI
MOVQ $0x00000000802910e3, R9
IMULQ R9, DI
ADDQ DI, BX
ROLQ $0x39, BX
XORQ AX, BX
MOVQ 8(CX), SI
MOVQ $0x00000000819b13af, R8
IMULQ R8, SI
ADDQ SI, BP
ROLQ $0x3f, BP
XORQ DI, BP
MOVQ 16(CX), SI
MOVQ $0x0000000091cb27e5, R8
IMULQ R8, SI
ADDQ SI, DI
RORQ $0x2f, DI
ADDQ BX, DI
MOVQ 24(CX), SI
MOVQ $0x00000000c1a269c1, R8
IMULQ R8, SI
ADDQ SI, AX
MOVQ 8(CX), DI
MOVQ $0x00000000819b13af, R9
IMULQ R9, DI
ADDQ DI, SI
ROLQ $0x3f, SI
XORQ R8, SI
MOVQ 16(CX), DI
MOVQ $0x0000000091cb27e5, R9
IMULQ R9, DI
ADDQ DI, R8
RORQ $0x2f, R8
ADDQ BX, R8
MOVQ 24(CX), DI
MOVQ $0x00000000c1a269c1, R9
IMULQ R9, DI
ADDQ DI, AX
RORQ $0x0b, AX
SUBQ BP, AX
SUBQ SI, AX
ADDQ $0x00000020, CX
SUBQ $0x00000020, DX
CMPQ DX, $0x00000020
JGE block
MOVQ DX, R8
MOVQ DX, SI
SHRQ $0x03, SI
CMPQ SI, $0x00000000
MOVQ DX, R9
MOVQ DX, DI
SHRQ $0x03, DI
CMPQ DI, $0x00000000
JE longCore0
CMPQ SI, $0x00000001
CMPQ DI, $0x00000001
JE longCore1
CMPQ SI, $0x00000002
CMPQ DI, $0x00000002
JE longCore2
CMPQ SI, $0x00000003
CMPQ DI, $0x00000003
JE longCore3
longCore3:
MOVQ (CX), SI
MOVQ $0x00000000802910e3, R9
IMULQ R9, SI
ADDQ SI, BX
MOVQ (CX), DI
MOVQ $0x00000000802910e3, R10
IMULQ R10, DI
ADDQ DI, BX
ROLQ $0x39, BX
XORQ AX, BX
ADDQ $0x00000008, CX
SUBQ $0x00000008, DX
longCore2:
MOVQ (CX), SI
MOVQ $0x00000000819b13af, R9
IMULQ R9, SI
ADDQ SI, BP
ROLQ $0x3f, BP
XORQ DI, BP
MOVQ (CX), DI
MOVQ $0x00000000819b13af, R10
IMULQ R10, DI
ADDQ DI, SI
ROLQ $0x3f, SI
XORQ R8, SI
ADDQ $0x00000008, CX
SUBQ $0x00000008, DX
longCore1:
MOVQ (CX), SI
MOVQ $0x0000000091cb27e5, R9
IMULQ R9, SI
ADDQ SI, DI
RORQ $0x2f, DI
ADDQ BX, DI
MOVQ (CX), DI
MOVQ $0x0000000091cb27e5, R10
IMULQ R10, DI
ADDQ DI, R8
RORQ $0x2f, R8
ADDQ BX, R8
ADDQ $0x00000008, CX
SUBQ $0x00000008, DX
longCore0:
RORQ $0x0b, AX
SUBQ BP, AX
ADDQ $0x00000001, R8
MOVQ $0x9c1b8e1e9628323f, SI
IMULQ SI, R8
XORQ R8, BX
SUBQ SI, AX
ADDQ $0x00000001, R9
MOVQ $0x9c1b8e1e9628323f, DI
IMULQ DI, R9
XORQ R9, BX
CMPQ DX, $0x00000000
JE longTail0
CMPQ DX, $0x00000001
@@ -263,22 +263,22 @@ longCore0:
longTail7:
MOVBQZX 6(CX), DX
ADDQ DX, BP
ADDQ DX, SI
longTail6:
MOVWQZX 4(CX), DX
ADDQ DX, DI
ADDQ DX, R8
MOVLQZX (CX), DX
ADDQ DX, AX
JMP longAfter
longTail5:
MOVBQZX 4(CX), DX
ADDQ DX, BP
ADDQ DX, SI
longTail4:
MOVLQZX (CX), DX
ADDQ DX, DI
ADDQ DX, R8
JMP longAfter
longTail3:
@@ -287,52 +287,52 @@ longTail3:
longTail2:
MOVWQZX (CX), DX
ADDQ DX, BP
ADDQ DX, SI
JMP longAfter
longTail1:
MOVBQZX (CX), DX
ADDQ DX, DI
ADDQ DX, R8
longTail0:
ROLQ $0x20, AX
XORQ $0x000000ff, AX
longAfter:
SUBQ DI, BP
SUBQ R8, SI
RORQ $0x13, BX
SUBQ BX, BP
RORQ $0x35, BP
XORQ BP, AX
SUBQ BX, SI
RORQ $0x35, SI
XORQ SI, AX
SUBQ AX, BX
ROLQ $0x2b, AX
ADDQ AX, BX
RORQ $0x03, BX
SUBQ BX, AX
RORQ $0x2b, DI
SUBQ AX, DI
ROLQ $0x37, DI
XORQ BX, DI
SUBQ DI, BP
RORQ $0x2b, R8
SUBQ AX, R8
ROLQ $0x37, R8
XORQ BX, R8
SUBQ R8, SI
RORQ $0x07, AX
SUBQ DI, AX
RORQ $0x1f, DI
ADDQ DI, AX
SUBQ BP, DI
SUBQ R8, AX
RORQ $0x1f, R8
ADDQ R8, AX
SUBQ SI, R8
RORQ $0x27, AX
XORQ AX, DI
XORQ AX, R8
RORQ $0x11, AX
XORQ DI, AX
ADDQ AX, BP
RORQ $0x09, BP
XORQ BP, DI
ROLQ $0x18, DI
XORQ DI, AX
XORQ R8, AX
ADDQ AX, SI
RORQ $0x09, SI
XORQ SI, R8
ROLQ $0x18, R8
XORQ R8, AX
RORQ $0x3b, AX
RORQ $0x01, BX
SUBQ BP, BX
XORQ BP, BX
XORQ AX, DI
XORQ DI, BX
SUBQ SI, BX
XORQ SI, BX
XORQ AX, R8
XORQ R8, BX
MOVQ BX, ret+32(FP)
RET