Currently `avo` uses `BP` as a standard general-purpose register. However, `BP` is used for the frame pointer and should be callee-save. Under some circumstances, the Go assembler will do this automatically, but not always. At the moment `avo` can produce code that clobbers the `BP` register. Since Go 1.16 this code will also fail a new `go vet` check. This PR provides a (currently sub-optimal) fix for the issue. It introduces an `EnsureBasePointerCalleeSaved` pass which will check if the base pointer is written to by a function, and if so will artificially ensure that the function has a non-zero frame size. This will trigger the Go assembler to automatically save and restore the BP register. In addition, we update the `asmdecl` tool to `asmvet`, which includes the `framepointer` vet check. Updates #156
9826 lines
243 KiB
ArmAsm
9826 lines
243 KiB
ArmAsm
// Code generated by command: go run asm.go -out allocfail.s -stubs stubs.go. DO NOT EDIT.
|
|
|
|
// +build !appengine
|
|
// +build !noasm
|
|
// +build gc
|
|
|
|
#include "textflag.h"
|
|
|
|
// func encodeBlockAsm(dst []byte, src []byte) int
|
|
// Requires: SSE2
|
|
TEXT ·encodeBlockAsm(SB), $65568-56
|
|
MOVQ $0x00000200, AX
|
|
LEAQ 32(SP), CX
|
|
PXOR X0, X0
|
|
|
|
zero_loop_encodeBlockAsm:
|
|
MOVOU X0, (CX)
|
|
MOVOU X0, 16(CX)
|
|
MOVOU X0, 32(CX)
|
|
MOVOU X0, 48(CX)
|
|
MOVOU X0, 64(CX)
|
|
MOVOU X0, 80(CX)
|
|
MOVOU X0, 96(CX)
|
|
MOVOU X0, 112(CX)
|
|
ADDQ $0x80, CX
|
|
DECQ AX
|
|
JNZ zero_loop_encodeBlockAsm
|
|
MOVL AX, 20(SP)
|
|
MOVQ src_len+32(FP), AX
|
|
LEAQ -5(AX), CX
|
|
LEAQ -8(AX), BX
|
|
SHRQ $0x05, AX
|
|
SUBL AX, CX
|
|
MOVL BX, 16(SP)
|
|
MOVQ dst_base+0(FP), AX
|
|
MOVQ AX, 8(SP)
|
|
LEAQ (AX)(CX*1), CX
|
|
MOVQ CX, (SP)
|
|
MOVL $0x00000001, AX
|
|
MOVL AX, 24(SP)
|
|
MOVQ src_base+24(FP), CX
|
|
|
|
search_loop_encodeBlockAsm:
|
|
MOVQ (CX)(AX*1), BP
|
|
MOVL AX, BX
|
|
SUBL 20(SP), BX
|
|
SHRL $0x06, BX
|
|
LEAQ 4(AX)(BX*1), BX
|
|
MOVL 16(SP), SI
|
|
CMPL BX, SI
|
|
JGT emit_remainder_encodeBlockAsm
|
|
MOVL BX, 28(SP)
|
|
MOVQ $0x0000cf1bbcdcbf9b, BX
|
|
MOVQ BP, DI
|
|
MOVQ BP, R8
|
|
SHRQ $0x08, R8
|
|
SHLQ $0x10, DI
|
|
IMULQ BX, DI
|
|
SHRQ $0x30, DI
|
|
SHLQ $0x10, R8
|
|
IMULQ BX, R8
|
|
SHRQ $0x30, R8
|
|
MOVL 32(SP)(DI*1), BX
|
|
MOVL 32(SP)(R8*1), SI
|
|
MOVL AX, 32(SP)(DI*1)
|
|
LEAL 1(AX), DI
|
|
MOVL DI, 32(SP)(R8*1)
|
|
MOVL AX, DI
|
|
SUBL 24(SP), DI
|
|
MOVL 1(CX)(DI*1), R9
|
|
MOVQ BP, R8
|
|
SHLQ $0x08, R8
|
|
CMPL R8, R9
|
|
JNE no_repeat_found_encodeBlockAsm
|
|
LEAQ 1(AX), BP
|
|
MOVL 20(SP), BX
|
|
TESTL DI, DI
|
|
JZ repeat_extend_back_end_encodeBlockAsm
|
|
|
|
repeat_extend_back_loop_encodeBlockAsm:
|
|
CMPL BP, BX
|
|
JG repeat_extend_back_end_encodeBlockAsm
|
|
MOVB -1(CX)(DI*1), DL
|
|
MOVB -1(CX)(BP*1), SI
|
|
CMPB DL, SI
|
|
JNE repeat_extend_back_end_encodeBlockAsm
|
|
LEAQ -1(BP), BP
|
|
DECL DI
|
|
JZ repeat_extend_back_end_encodeBlockAsm
|
|
JMP repeat_extend_back_loop_encodeBlockAsm
|
|
|
|
repeat_extend_back_end_encodeBlockAsm:
|
|
MOVL 20(SP), BX
|
|
CMPL BX, BP
|
|
JEQ emit_literal_skip_repeat_emit_encodeBlockAsm
|
|
MOVL BP, SI
|
|
MOVL BP, 20(SP)
|
|
LEAQ (CX)(BX*1), DI
|
|
SUBL BX, SI
|
|
MOVQ dst_base+0(FP), BX
|
|
MOVQ SI, R8
|
|
SUBL $0x01, R8
|
|
JC emit_literal_done_repeat_emit_encodeBlockAsm
|
|
CMPL R8, $0x3c
|
|
JLT one_byte_repeat_emit_encodeBlockAsm
|
|
CMPL R8, $0x00000100
|
|
JLT two_bytes_repeat_emit_encodeBlockAsm
|
|
CMPL R8, $0x00010000
|
|
JLT three_bytes_repeat_emit_encodeBlockAsm
|
|
CMPL R8, $0x01000000
|
|
JLT four_bytes_repeat_emit_encodeBlockAsm
|
|
MOVB $0xfc, (BX)
|
|
MOVL R8, 1(BX)
|
|
ADDQ $0x05, BX
|
|
JMP memmove_repeat_emit_encodeBlockAsm
|
|
|
|
four_bytes_repeat_emit_encodeBlockAsm:
|
|
MOVQ R8, R9
|
|
SHRL $0x10, R9
|
|
MOVB $0xf8, (BX)
|
|
MOVW R8, 1(BX)
|
|
MOVB R9, 3(BX)
|
|
ADDQ $0x04, BX
|
|
JMP memmove_repeat_emit_encodeBlockAsm
|
|
|
|
three_bytes_repeat_emit_encodeBlockAsm:
|
|
MOVB $0xf4, (BX)
|
|
MOVW R8, 1(BX)
|
|
ADDQ $0x03, BX
|
|
JMP memmove_repeat_emit_encodeBlockAsm
|
|
|
|
two_bytes_repeat_emit_encodeBlockAsm:
|
|
MOVB $0xf0, (BX)
|
|
MOVB R8, 1(BX)
|
|
ADDQ $0x02, BX
|
|
JMP memmove_repeat_emit_encodeBlockAsm
|
|
|
|
one_byte_repeat_emit_encodeBlockAsm:
|
|
SHLB $0x02, R8
|
|
MOVB R8, (BX)
|
|
ADDQ $0x01, BX
|
|
|
|
memmove_repeat_emit_encodeBlockAsm:
|
|
LEAQ (BX)(SI*1), R8
|
|
NOP
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_tail:
|
|
TESTQ SI, SI
|
|
JEQ emit_literal_done_repeat_emit_encodeBlockAsm
|
|
CMPQ SI, $0x02
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_1or2
|
|
CMPQ SI, $0x04
|
|
JB emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_3
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_4
|
|
CMPQ SI, $0x08
|
|
JB emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_5through7
|
|
JE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8
|
|
CMPQ SI, $0x10
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_9through16
|
|
CMPQ SI, $0x20
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_17through32
|
|
CMPQ SI, $0x40
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_33through64
|
|
CMPQ SI, $0x80
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_65through128
|
|
CMPQ SI, $0x00000100
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_129through256
|
|
JMP emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_256through2048
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_1or2:
|
|
MOVB (DI), R8
|
|
MOVB -1(DI)(SI*1), DI
|
|
MOVB R8, (BX)
|
|
MOVB DI, -1(BX)(SI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_4:
|
|
MOVL (DI), R8
|
|
MOVL R8, (BX)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_3:
|
|
MOVW (DI), R8
|
|
MOVB 2(DI), DI
|
|
MOVW R8, (BX)
|
|
MOVB DI, 2(BX)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_5through7:
|
|
MOVL (DI), R8
|
|
MOVL -4(DI)(SI*1), DI
|
|
MOVL R8, (BX)
|
|
MOVL DI, -4(BX)(SI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8:
|
|
MOVQ (DI), R8
|
|
MOVQ R8, (BX)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_9through16:
|
|
MOVQ (DI), R8
|
|
MOVQ -8(DI)(SI*1), DI
|
|
MOVQ R8, (BX)
|
|
MOVQ DI, -8(BX)(SI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_17through32:
|
|
MOVOU (DI), X0
|
|
MOVOU -16(DI)(SI*1), X1
|
|
MOVOU X0, (BX)
|
|
MOVOU X1, -16(BX)(SI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_33through64:
|
|
MOVOU (DI), X0
|
|
MOVOU 16(DI), X1
|
|
MOVOU -32(DI)(SI*1), X2
|
|
MOVOU -16(DI)(SI*1), X3
|
|
MOVOU X0, (BX)
|
|
MOVOU X1, 16(BX)
|
|
MOVOU X2, -32(BX)(SI*1)
|
|
MOVOU X3, -16(BX)(SI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_65through128:
|
|
MOVOU (DI), X0
|
|
MOVOU 16(DI), X1
|
|
MOVOU 32(DI), X2
|
|
MOVOU 48(DI), X3
|
|
MOVOU -64(DI)(SI*1), X12
|
|
MOVOU -48(DI)(SI*1), X13
|
|
MOVOU -32(DI)(SI*1), X14
|
|
MOVOU -16(DI)(SI*1), X15
|
|
MOVOU X0, (BX)
|
|
MOVOU X1, 16(BX)
|
|
MOVOU X2, 32(BX)
|
|
MOVOU X3, 48(BX)
|
|
MOVOU X12, -64(BX)(SI*1)
|
|
MOVOU X13, -48(BX)(SI*1)
|
|
MOVOU X14, -32(BX)(SI*1)
|
|
MOVOU X15, -16(BX)(SI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_129through256:
|
|
MOVOU (DI), X0
|
|
MOVOU 16(DI), X1
|
|
MOVOU 32(DI), X2
|
|
MOVOU 48(DI), X3
|
|
MOVOU 64(DI), X4
|
|
MOVOU 80(DI), X5
|
|
MOVOU 96(DI), X6
|
|
MOVOU 112(DI), X7
|
|
MOVOU -128(DI)(SI*1), X8
|
|
MOVOU -112(DI)(SI*1), X9
|
|
MOVOU -96(DI)(SI*1), X10
|
|
MOVOU -80(DI)(SI*1), X11
|
|
MOVOU -64(DI)(SI*1), X12
|
|
MOVOU -48(DI)(SI*1), X13
|
|
MOVOU -32(DI)(SI*1), X14
|
|
MOVOU -16(DI)(SI*1), X15
|
|
MOVOU X0, (BX)
|
|
MOVOU X1, 16(BX)
|
|
MOVOU X2, 32(BX)
|
|
MOVOU X3, 48(BX)
|
|
MOVOU X4, 64(BX)
|
|
MOVOU X5, 80(BX)
|
|
MOVOU X6, 96(BX)
|
|
MOVOU X7, 112(BX)
|
|
MOVOU X8, -128(BX)(SI*1)
|
|
MOVOU X9, -112(BX)(SI*1)
|
|
MOVOU X10, -96(BX)(SI*1)
|
|
MOVOU X11, -80(BX)(SI*1)
|
|
MOVOU X12, -64(BX)(SI*1)
|
|
MOVOU X13, -48(BX)(SI*1)
|
|
MOVOU X14, -32(BX)(SI*1)
|
|
MOVOU X15, -16(BX)(SI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_256through2048:
|
|
LEAQ -256(SI), SI
|
|
MOVOU (DI), X0
|
|
MOVOU 16(DI), X1
|
|
MOVOU 32(DI), X2
|
|
MOVOU 48(DI), X3
|
|
MOVOU 64(DI), X4
|
|
MOVOU 80(DI), X5
|
|
MOVOU 96(DI), X6
|
|
MOVOU 112(DI), X7
|
|
MOVOU 128(DI), X8
|
|
MOVOU 144(DI), X9
|
|
MOVOU 160(DI), X10
|
|
MOVOU 176(DI), X11
|
|
MOVOU 192(DI), X12
|
|
MOVOU 208(DI), X13
|
|
MOVOU 224(DI), X14
|
|
MOVOU 240(DI), X15
|
|
MOVOU X0, (BX)
|
|
MOVOU X1, 16(BX)
|
|
MOVOU X2, 32(BX)
|
|
MOVOU X3, 48(BX)
|
|
MOVOU X4, 64(BX)
|
|
MOVOU X5, 80(BX)
|
|
MOVOU X6, 96(BX)
|
|
MOVOU X7, 112(BX)
|
|
MOVOU X8, 128(BX)
|
|
MOVOU X9, 144(BX)
|
|
MOVOU X10, 160(BX)
|
|
MOVOU X11, 176(BX)
|
|
MOVOU X12, 192(BX)
|
|
MOVOU X13, 208(BX)
|
|
MOVOU X14, 224(BX)
|
|
MOVOU X15, 240(BX)
|
|
CMPQ SI, $0x00000100
|
|
LEAQ 256(DI), DI
|
|
LEAQ 256(BX), BX
|
|
JGE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_256through2048
|
|
JMP emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_tail
|
|
MOVQ R8, BX
|
|
|
|
emit_literal_done_repeat_emit_encodeBlockAsm:
|
|
MOVQ BX, dst_base+0(FP)
|
|
|
|
emit_literal_skip_repeat_emit_encodeBlockAsm:
|
|
ADDL $0x05, AX
|
|
MOVL AX, BX
|
|
SUBL 24(SP), BX
|
|
MOVL 16(SP), BX
|
|
SUBL AX, BX
|
|
XORQ DI, DI
|
|
CMPQ BX, $0x08
|
|
JL matchlen_single_repeat_extend
|
|
|
|
matchlen_loopback_repeat_extend:
|
|
MOVQ (CX)(DI*1), SI
|
|
XORQ (CX)(DI*1), SI
|
|
TESTQ SI, SI
|
|
JZ matchlen_loop_repeat_extend
|
|
BSFQ SI, SI
|
|
SARQ $0x03, SI
|
|
LEAQ (DI)(SI*1), DI
|
|
JMP repeat_extend_forward_end_encodeBlockAsm
|
|
|
|
matchlen_loop_repeat_extend:
|
|
LEAQ -8(BX), BX
|
|
LEAQ 8(DI), DI
|
|
CMPQ BX, $0x08
|
|
JGE matchlen_loopback_repeat_extend
|
|
|
|
matchlen_single_repeat_extend:
|
|
TESTQ BX, BX
|
|
JZ repeat_extend_forward_end_encodeBlockAsm
|
|
|
|
matchlen_single_loopback_repeat_extend:
|
|
MOVB (CX)(DI*1), SI
|
|
CMPB (CX)(DI*1), SI
|
|
JNE repeat_extend_forward_end_encodeBlockAsm
|
|
LEAQ 1(DI), DI
|
|
DECQ BX
|
|
JNZ matchlen_single_loopback_repeat_extend
|
|
|
|
repeat_extend_forward_end_encodeBlockAsm:
|
|
ADDL DI, AX
|
|
MOVL AX, BX
|
|
SUBL BP, BX
|
|
MOVL 24(SP), BP
|
|
MOVQ dst_base+0(FP), SI
|
|
MOVL 20(SP), DI
|
|
TESTL DI, DI
|
|
JZ repeat_as_copy_encodeBlockAsm
|
|
|
|
emit_repeat_again_match_repeat_:
|
|
MOVQ BX, DI
|
|
LEAQ -4(BX), BX
|
|
CMPL DI, $0x08
|
|
JLE repeat_two_match_repeat_
|
|
CMPL DI, $0x0c
|
|
JGE cant_repeat_two_offset_match_repeat_
|
|
CMPL BP, $0x00000800
|
|
JLT repeat_two_offset_match_repeat_
|
|
|
|
cant_repeat_two_offset_match_repeat_:
|
|
CMPL BX, $0x00000104
|
|
JLT repeat_three_match_repeat_
|
|
CMPL BX, $0x00010100
|
|
JLT repeat_four_match_repeat_
|
|
CMPL BX, $0x0100ffff
|
|
JLT repeat_five_match_repeat_
|
|
LEAQ -16842747(BX), BX
|
|
MOVW $0x001d, (SI)
|
|
MOVW $0xfffb, 2(SI)
|
|
MOVB $0xff, 4(SI)
|
|
ADDQ $0x05, SI
|
|
JMP emit_repeat_again_match_repeat_
|
|
|
|
repeat_five_match_repeat_:
|
|
LEAQ -65536(BX), BX
|
|
MOVQ BX, BP
|
|
MOVW $0x001d, (SI)
|
|
MOVW BX, 2(SI)
|
|
SARQ $0x10, BP
|
|
MOVB BP, 4(SI)
|
|
ADDQ $0x05, SI
|
|
JMP repeat_end_emit_encodeBlockAsm
|
|
|
|
repeat_four_match_repeat_:
|
|
LEAQ -256(BX), BX
|
|
MOVW $0x0019, (SI)
|
|
MOVW BX, 2(SI)
|
|
ADDQ $0x04, SI
|
|
JMP repeat_end_emit_encodeBlockAsm
|
|
|
|
repeat_three_match_repeat_:
|
|
LEAQ -4(BX), BX
|
|
MOVW $0x0015, (SI)
|
|
MOVB BL, 2(SI)
|
|
ADDQ $0x03, SI
|
|
JMP repeat_end_emit_encodeBlockAsm
|
|
|
|
repeat_two_match_repeat_:
|
|
SHLL $0x02, BX
|
|
ORL $0x01, BX
|
|
MOVW BX, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP repeat_end_emit_encodeBlockAsm
|
|
|
|
repeat_two_offset_match_repeat_:
|
|
XORQ DI, DI
|
|
LEAQ 1(DI)(BX*4), BX
|
|
MOVB BP, 1(SI)
|
|
SARL $0x08, BP
|
|
SHLL $0x05, BP
|
|
ORL BP, BX
|
|
MOVB BL, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP repeat_end_emit_encodeBlockAsm
|
|
|
|
repeat_as_copy_encodeBlockAsm:
|
|
CMPL BP, $0x00010000
|
|
JL two_byte_offset_repeat_as_copy_encodeBlockAsm
|
|
CMPL BX, $0x40
|
|
JLE four_bytes_remain_repeat_as_copy_encodeBlockAsm
|
|
MOVB $0xff, (SI)
|
|
MOVD BP, 1(SI)
|
|
LEAQ -64(BX), BX
|
|
ADDQ $0x05, SI
|
|
CMPL BX, $0x04
|
|
JL four_bytes_remain_repeat_as_copy_encodeBlockAsm
|
|
|
|
emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy:
|
|
MOVQ BX, DI
|
|
LEAQ -4(BX), BX
|
|
CMPL DI, $0x08
|
|
JLE repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy
|
|
CMPL DI, $0x0c
|
|
JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy
|
|
CMPL BP, $0x00000800
|
|
JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy
|
|
|
|
cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy:
|
|
CMPL BX, $0x00000104
|
|
JLT repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy
|
|
CMPL BX, $0x00010100
|
|
JLT repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy
|
|
CMPL BX, $0x0100ffff
|
|
JLT repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy
|
|
LEAQ -16842747(BX), BX
|
|
MOVW $0x001d, (SI)
|
|
MOVW $0xfffb, 2(SI)
|
|
MOVB $0xff, 4(SI)
|
|
ADDQ $0x05, SI
|
|
JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy
|
|
|
|
repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy:
|
|
LEAQ -65536(BX), BX
|
|
MOVQ BX, BP
|
|
MOVW $0x001d, (SI)
|
|
MOVW BX, 2(SI)
|
|
SARQ $0x10, BP
|
|
MOVB BP, 4(SI)
|
|
ADDQ $0x05, SI
|
|
JMP repeat_end_emit_encodeBlockAsm
|
|
|
|
repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy:
|
|
LEAQ -256(BX), BX
|
|
MOVW $0x0019, (SI)
|
|
MOVW BX, 2(SI)
|
|
ADDQ $0x04, SI
|
|
JMP repeat_end_emit_encodeBlockAsm
|
|
|
|
repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy:
|
|
LEAQ -4(BX), BX
|
|
MOVW $0x0015, (SI)
|
|
MOVB BL, 2(SI)
|
|
ADDQ $0x03, SI
|
|
JMP repeat_end_emit_encodeBlockAsm
|
|
|
|
repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy:
|
|
SHLL $0x02, BX
|
|
ORL $0x01, BX
|
|
MOVW BX, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP repeat_end_emit_encodeBlockAsm
|
|
|
|
repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy:
|
|
XORQ DI, DI
|
|
LEAQ 1(DI)(BX*4), BX
|
|
MOVB BP, 1(SI)
|
|
SARL $0x08, BP
|
|
SHLL $0x05, BP
|
|
ORL BP, BX
|
|
MOVB BL, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP repeat_end_emit_encodeBlockAsm
|
|
|
|
four_bytes_remain_repeat_as_copy_encodeBlockAsm:
|
|
TESTL BX, BX
|
|
JZ repeat_end_emit_encodeBlockAsm
|
|
MOVB $0x03, DL
|
|
LEAQ -4(DX)(BX*4), BX
|
|
MOVB BL, (SI)
|
|
MOVD BP, 1(SI)
|
|
ADDQ $0x05, SI
|
|
JMP repeat_end_emit_encodeBlockAsm
|
|
|
|
two_byte_offset_repeat_as_copy_encodeBlockAsm:
|
|
CMPL BX, $0x40
|
|
JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm
|
|
MOVB $0xee, (SI)
|
|
MOVW BP, 1(SI)
|
|
LEAQ -60(BX), BX
|
|
ADDQ $0x03, SI
|
|
|
|
emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short:
|
|
MOVQ BX, DI
|
|
LEAQ -4(BX), BX
|
|
CMPL DI, $0x08
|
|
JLE repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short
|
|
CMPL DI, $0x0c
|
|
JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short
|
|
CMPL BP, $0x00000800
|
|
JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short
|
|
|
|
cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short:
|
|
CMPL BX, $0x00000104
|
|
JLT repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short
|
|
CMPL BX, $0x00010100
|
|
JLT repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short
|
|
CMPL BX, $0x0100ffff
|
|
JLT repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short
|
|
LEAQ -16842747(BX), BX
|
|
MOVW $0x001d, (SI)
|
|
MOVW $0xfffb, 2(SI)
|
|
MOVB $0xff, 4(SI)
|
|
ADDQ $0x05, SI
|
|
JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short
|
|
|
|
repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short:
|
|
LEAQ -65536(BX), BX
|
|
MOVQ BX, BP
|
|
MOVW $0x001d, (SI)
|
|
MOVW BX, 2(SI)
|
|
SARQ $0x10, BP
|
|
MOVB BP, 4(SI)
|
|
ADDQ $0x05, SI
|
|
JMP repeat_end_emit_encodeBlockAsm
|
|
|
|
repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short:
|
|
LEAQ -256(BX), BX
|
|
MOVW $0x0019, (SI)
|
|
MOVW BX, 2(SI)
|
|
ADDQ $0x04, SI
|
|
JMP repeat_end_emit_encodeBlockAsm
|
|
|
|
repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short:
|
|
LEAQ -4(BX), BX
|
|
MOVW $0x0015, (SI)
|
|
MOVB BL, 2(SI)
|
|
ADDQ $0x03, SI
|
|
JMP repeat_end_emit_encodeBlockAsm
|
|
|
|
repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short:
|
|
SHLL $0x02, BX
|
|
ORL $0x01, BX
|
|
MOVW BX, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP repeat_end_emit_encodeBlockAsm
|
|
|
|
repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short:
|
|
XORQ DI, DI
|
|
LEAQ 1(DI)(BX*4), BX
|
|
MOVB BP, 1(SI)
|
|
SARL $0x08, BP
|
|
SHLL $0x05, BP
|
|
ORL BP, BX
|
|
MOVB BL, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP repeat_end_emit_encodeBlockAsm
|
|
|
|
two_byte_offset_short_repeat_as_copy_encodeBlockAsm:
|
|
CMPL BX, $0x0c
|
|
JGE emit_copy_three_repeat_as_copy_encodeBlockAsm
|
|
CMPL BP, $0x00000800
|
|
JGE emit_copy_three_repeat_as_copy_encodeBlockAsm
|
|
MOVB $0x01, DL
|
|
LEAQ -16(DX)(BX*4), BX
|
|
MOVB BP, 1(SI)
|
|
SHRL $0x08, BP
|
|
SHLL $0x05, BP
|
|
ORL BP, BX
|
|
MOVB BL, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP repeat_end_emit_encodeBlockAsm
|
|
|
|
emit_copy_three_repeat_as_copy_encodeBlockAsm:
|
|
MOVB $0x02, DL
|
|
LEAQ -4(DX)(BX*4), BX
|
|
MOVB BL, (SI)
|
|
MOVW BP, 1(SI)
|
|
ADDQ $0x03, SI
|
|
|
|
repeat_end_emit_encodeBlockAsm:
|
|
MOVQ SI, dst_base+0(FP)
|
|
MOVL 16(SP), BX
|
|
CMPL AX, BX
|
|
JGT emit_remainder_encodeBlockAsm
|
|
JMP search_loop_encodeBlockAsm
|
|
|
|
no_repeat_found_encodeBlockAsm:
|
|
MOVQ $0x0000cf1bbcdcbf9b, R8
|
|
MOVQ BP, DI
|
|
SHRQ $0x10, DI
|
|
SHLQ $0x10, DI
|
|
IMULQ R8, DI
|
|
SHRQ $0x30, DI
|
|
CMPL (CX)(BX*1), BP
|
|
SHRQ $0x08, BP
|
|
JEQ candidate_match_encodeBlockAsm
|
|
MOVL 32(SP)(DI*1), BX
|
|
CMPL (CX)(SI*1), BP
|
|
JEQ candidate2_match_encodeBlockAsm
|
|
LEAQ 2(AX), SI
|
|
MOVL SI, 32(SP)(DI*1)
|
|
SHRQ $0x08, BP
|
|
CMPL (CX)(BX*1), BP
|
|
JEQ candidate3_match_encodeBlockAsm
|
|
MOVL 28(SP), AX
|
|
JMP search_loop_encodeBlockAsm
|
|
|
|
candidate3_match_encodeBlockAsm:
|
|
ADDL $0x02, AX
|
|
JMP candidate_match_encodeBlockAsm
|
|
|
|
candidate2_match_encodeBlockAsm:
|
|
LEAQ -2(AX), BX
|
|
MOVL BX, 32(SP)(DI*1)
|
|
INCL AX
|
|
MOVL SI, BX
|
|
|
|
candidate_match_encodeBlockAsm:
|
|
MOVL 20(SP), BP
|
|
TESTL BX, BX
|
|
JZ match_extend_back_end_encodeBlockAsm
|
|
|
|
match_extend_back_loop_encodeBlockAsm:
|
|
CMPL AX, BP
|
|
JG match_extend_back_end_encodeBlockAsm
|
|
MOVB -1(CX)(BX*1), DL
|
|
MOVB -1(CX)(AX*1), SI
|
|
CMPB DL, SI
|
|
JNE match_extend_back_end_encodeBlockAsm
|
|
LEAL -1(AX), AX
|
|
DECL BX
|
|
JZ match_extend_back_end_encodeBlockAsm
|
|
JMP match_extend_back_loop_encodeBlockAsm
|
|
|
|
match_extend_back_end_encodeBlockAsm:
|
|
MOVL AX, BP
|
|
SUBL 20(SP), BP
|
|
LEAQ dst_base+0(FP)(BP*1), BP
|
|
CMPQ BP, (SP)
|
|
JL match_dst_size_check_encodeBlockAsm
|
|
MOVQ $0x00000000, ret+48(FP)
|
|
RET
|
|
|
|
match_dst_size_check_encodeBlockAsm:
|
|
MOVL BX, BP
|
|
MOVL 20(SP), SI
|
|
CMPL SI, BP
|
|
JEQ emit_literal_skip_match_emit_encodeBlockAsm
|
|
MOVL BP, DI
|
|
MOVL BP, 20(SP)
|
|
LEAQ (CX)(SI*1), BP
|
|
SUBL SI, DI
|
|
MOVQ dst_base+0(FP), SI
|
|
MOVQ DI, R8
|
|
SUBL $0x01, R8
|
|
JC emit_literal_done_match_emit_encodeBlockAsm
|
|
CMPL R8, $0x3c
|
|
JLT one_byte_match_emit_encodeBlockAsm
|
|
CMPL R8, $0x00000100
|
|
JLT two_bytes_match_emit_encodeBlockAsm
|
|
CMPL R8, $0x00010000
|
|
JLT three_bytes_match_emit_encodeBlockAsm
|
|
CMPL R8, $0x01000000
|
|
JLT four_bytes_match_emit_encodeBlockAsm
|
|
MOVB $0xfc, (SI)
|
|
MOVL R8, 1(SI)
|
|
ADDQ $0x05, SI
|
|
JMP memmove_match_emit_encodeBlockAsm
|
|
|
|
four_bytes_match_emit_encodeBlockAsm:
|
|
MOVQ R8, R9
|
|
SHRL $0x10, R9
|
|
MOVB $0xf8, (SI)
|
|
MOVW R8, 1(SI)
|
|
MOVB R9, 3(SI)
|
|
ADDQ $0x04, SI
|
|
JMP memmove_match_emit_encodeBlockAsm
|
|
|
|
three_bytes_match_emit_encodeBlockAsm:
|
|
MOVB $0xf4, (SI)
|
|
MOVW R8, 1(SI)
|
|
ADDQ $0x03, SI
|
|
JMP memmove_match_emit_encodeBlockAsm
|
|
|
|
two_bytes_match_emit_encodeBlockAsm:
|
|
MOVB $0xf0, (SI)
|
|
MOVB R8, 1(SI)
|
|
ADDQ $0x02, SI
|
|
JMP memmove_match_emit_encodeBlockAsm
|
|
|
|
one_byte_match_emit_encodeBlockAsm:
|
|
SHLB $0x02, R8
|
|
MOVB R8, (SI)
|
|
ADDQ $0x01, SI
|
|
|
|
memmove_match_emit_encodeBlockAsm:
|
|
LEAQ (SI)(DI*1), R8
|
|
NOP
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm_memmove_tail:
|
|
TESTQ DI, DI
|
|
JEQ emit_literal_done_match_emit_encodeBlockAsm
|
|
CMPQ DI, $0x02
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_1or2
|
|
CMPQ DI, $0x04
|
|
JB emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_3
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_4
|
|
CMPQ DI, $0x08
|
|
JB emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_5through7
|
|
JE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8
|
|
CMPQ DI, $0x10
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_9through16
|
|
CMPQ DI, $0x20
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_17through32
|
|
CMPQ DI, $0x40
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_33through64
|
|
CMPQ DI, $0x80
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_65through128
|
|
CMPQ DI, $0x00000100
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_129through256
|
|
JMP emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_256through2048
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_1or2:
|
|
MOVB (BP), R8
|
|
MOVB -1(BP)(DI*1), BP
|
|
MOVB R8, (SI)
|
|
MOVB BP, -1(SI)(DI*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_4:
|
|
MOVL (BP), R8
|
|
MOVL R8, (SI)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_3:
|
|
MOVW (BP), R8
|
|
MOVB 2(BP), BP
|
|
MOVW R8, (SI)
|
|
MOVB BP, 2(SI)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_5through7:
|
|
MOVL (BP), R8
|
|
MOVL -4(BP)(DI*1), BP
|
|
MOVL R8, (SI)
|
|
MOVL BP, -4(SI)(DI*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8:
|
|
MOVQ (BP), R8
|
|
MOVQ R8, (SI)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_9through16:
|
|
MOVQ (BP), R8
|
|
MOVQ -8(BP)(DI*1), BP
|
|
MOVQ R8, (SI)
|
|
MOVQ BP, -8(SI)(DI*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_17through32:
|
|
MOVOU (BP), X0
|
|
MOVOU -16(BP)(DI*1), X1
|
|
MOVOU X0, (SI)
|
|
MOVOU X1, -16(SI)(DI*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_33through64:
|
|
MOVOU (BP), X0
|
|
MOVOU 16(BP), X1
|
|
MOVOU -32(BP)(DI*1), X2
|
|
MOVOU -16(BP)(DI*1), X3
|
|
MOVOU X0, (SI)
|
|
MOVOU X1, 16(SI)
|
|
MOVOU X2, -32(SI)(DI*1)
|
|
MOVOU X3, -16(SI)(DI*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_65through128:
|
|
MOVOU (BP), X0
|
|
MOVOU 16(BP), X1
|
|
MOVOU 32(BP), X2
|
|
MOVOU 48(BP), X3
|
|
MOVOU -64(BP)(DI*1), X12
|
|
MOVOU -48(BP)(DI*1), X13
|
|
MOVOU -32(BP)(DI*1), X14
|
|
MOVOU -16(BP)(DI*1), X15
|
|
MOVOU X0, (SI)
|
|
MOVOU X1, 16(SI)
|
|
MOVOU X2, 32(SI)
|
|
MOVOU X3, 48(SI)
|
|
MOVOU X12, -64(SI)(DI*1)
|
|
MOVOU X13, -48(SI)(DI*1)
|
|
MOVOU X14, -32(SI)(DI*1)
|
|
MOVOU X15, -16(SI)(DI*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_129through256:
|
|
MOVOU (BP), X0
|
|
MOVOU 16(BP), X1
|
|
MOVOU 32(BP), X2
|
|
MOVOU 48(BP), X3
|
|
MOVOU 64(BP), X4
|
|
MOVOU 80(BP), X5
|
|
MOVOU 96(BP), X6
|
|
MOVOU 112(BP), X7
|
|
MOVOU -128(BP)(DI*1), X8
|
|
MOVOU -112(BP)(DI*1), X9
|
|
MOVOU -96(BP)(DI*1), X10
|
|
MOVOU -80(BP)(DI*1), X11
|
|
MOVOU -64(BP)(DI*1), X12
|
|
MOVOU -48(BP)(DI*1), X13
|
|
MOVOU -32(BP)(DI*1), X14
|
|
MOVOU -16(BP)(DI*1), X15
|
|
MOVOU X0, (SI)
|
|
MOVOU X1, 16(SI)
|
|
MOVOU X2, 32(SI)
|
|
MOVOU X3, 48(SI)
|
|
MOVOU X4, 64(SI)
|
|
MOVOU X5, 80(SI)
|
|
MOVOU X6, 96(SI)
|
|
MOVOU X7, 112(SI)
|
|
MOVOU X8, -128(SI)(DI*1)
|
|
MOVOU X9, -112(SI)(DI*1)
|
|
MOVOU X10, -96(SI)(DI*1)
|
|
MOVOU X11, -80(SI)(DI*1)
|
|
MOVOU X12, -64(SI)(DI*1)
|
|
MOVOU X13, -48(SI)(DI*1)
|
|
MOVOU X14, -32(SI)(DI*1)
|
|
MOVOU X15, -16(SI)(DI*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_256through2048:
|
|
LEAQ -256(DI), DI
|
|
MOVOU (BP), X0
|
|
MOVOU 16(BP), X1
|
|
MOVOU 32(BP), X2
|
|
MOVOU 48(BP), X3
|
|
MOVOU 64(BP), X4
|
|
MOVOU 80(BP), X5
|
|
MOVOU 96(BP), X6
|
|
MOVOU 112(BP), X7
|
|
MOVOU 128(BP), X8
|
|
MOVOU 144(BP), X9
|
|
MOVOU 160(BP), X10
|
|
MOVOU 176(BP), X11
|
|
MOVOU 192(BP), X12
|
|
MOVOU 208(BP), X13
|
|
MOVOU 224(BP), X14
|
|
MOVOU 240(BP), X15
|
|
MOVOU X0, (SI)
|
|
MOVOU X1, 16(SI)
|
|
MOVOU X2, 32(SI)
|
|
MOVOU X3, 48(SI)
|
|
MOVOU X4, 64(SI)
|
|
MOVOU X5, 80(SI)
|
|
MOVOU X6, 96(SI)
|
|
MOVOU X7, 112(SI)
|
|
MOVOU X8, 128(SI)
|
|
MOVOU X9, 144(SI)
|
|
MOVOU X10, 160(SI)
|
|
MOVOU X11, 176(SI)
|
|
MOVOU X12, 192(SI)
|
|
MOVOU X13, 208(SI)
|
|
MOVOU X14, 224(SI)
|
|
MOVOU X15, 240(SI)
|
|
CMPQ DI, $0x00000100
|
|
LEAQ 256(BP), BP
|
|
LEAQ 256(SI), SI
|
|
JGE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_256through2048
|
|
JMP emit_lit_memmove_match_emit_encodeBlockAsm_memmove_tail
|
|
MOVQ R8, SI
|
|
|
|
emit_literal_done_match_emit_encodeBlockAsm:
|
|
MOVQ SI, dst_base+0(FP)
|
|
|
|
emit_literal_skip_match_emit_encodeBlockAsm:
|
|
NOP
|
|
|
|
match_nolit_loop_encodeBlockAsm:
|
|
MOVL AX, BP
|
|
MOVL AX, BP
|
|
SUBL BX, BP
|
|
MOVL BP, 24(SP)
|
|
ADDL $0x04, AX
|
|
ADDL $0x04, BX
|
|
MOVL 16(SP), BP
|
|
SUBL AX, BP
|
|
XORQ DI, DI
|
|
CMPQ BP, $0x08
|
|
JL matchlen_single_match_nolit_encodeBlockAsm
|
|
|
|
matchlen_loopback_match_nolit_encodeBlockAsm:
|
|
MOVQ (CX)(DI*1), SI
|
|
XORQ (CX)(DI*1), SI
|
|
TESTQ SI, SI
|
|
JZ matchlen_loop_match_nolit_encodeBlockAsm
|
|
BSFQ SI, SI
|
|
SARQ $0x03, SI
|
|
LEAQ (DI)(SI*1), DI
|
|
JMP match_nolit_end_encodeBlockAsm
|
|
|
|
matchlen_loop_match_nolit_encodeBlockAsm:
|
|
LEAQ -8(BP), BP
|
|
LEAQ 8(DI), DI
|
|
CMPQ BP, $0x08
|
|
JGE matchlen_loopback_match_nolit_encodeBlockAsm
|
|
|
|
matchlen_single_match_nolit_encodeBlockAsm:
|
|
TESTQ BP, BP
|
|
JZ match_nolit_end_encodeBlockAsm
|
|
|
|
matchlen_single_loopback_match_nolit_encodeBlockAsm:
|
|
MOVB (CX)(DI*1), SI
|
|
CMPB (CX)(DI*1), SI
|
|
JNE match_nolit_end_encodeBlockAsm
|
|
LEAQ 1(DI), DI
|
|
DECQ BP
|
|
JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm
|
|
|
|
match_nolit_end_encodeBlockAsm:
|
|
MOVL 24(SP), BP
|
|
ADDQ $0x04, DI
|
|
MOVQ dst_base+0(FP), SI
|
|
ADDL DI, AX
|
|
CMPL BP, $0x00010000
|
|
JL two_byte_offset_match_nolit_encodeBlockAsm
|
|
CMPL DI, $0x40
|
|
JLE four_bytes_remain_match_nolit_encodeBlockAsm
|
|
MOVB $0xff, (SI)
|
|
MOVD BP, 1(SI)
|
|
LEAQ -64(DI), DI
|
|
ADDQ $0x05, SI
|
|
CMPL DI, $0x04
|
|
JL four_bytes_remain_match_nolit_encodeBlockAsm
|
|
|
|
emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy:
|
|
MOVQ DI, R8
|
|
LEAQ -4(DI), DI
|
|
CMPL R8, $0x08
|
|
JLE repeat_two_match_nolit_encodeBlockAsm_emit_copy
|
|
CMPL R8, $0x0c
|
|
JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy
|
|
CMPL BP, $0x00000800
|
|
JLT repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy
|
|
|
|
cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy:
|
|
CMPL DI, $0x00000104
|
|
JLT repeat_three_match_nolit_encodeBlockAsm_emit_copy
|
|
CMPL DI, $0x00010100
|
|
JLT repeat_four_match_nolit_encodeBlockAsm_emit_copy
|
|
CMPL DI, $0x0100ffff
|
|
JLT repeat_five_match_nolit_encodeBlockAsm_emit_copy
|
|
LEAQ -16842747(DI), DI
|
|
MOVW $0x001d, (SI)
|
|
MOVW $0xfffb, 2(SI)
|
|
MOVB $0xff, 4(SI)
|
|
ADDQ $0x05, SI
|
|
JMP emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy
|
|
|
|
repeat_five_match_nolit_encodeBlockAsm_emit_copy:
|
|
LEAQ -65536(DI), DI
|
|
MOVQ DI, BP
|
|
MOVW $0x001d, (SI)
|
|
MOVW DI, 2(SI)
|
|
SARQ $0x10, BP
|
|
MOVB BP, 4(SI)
|
|
ADDQ $0x05, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm
|
|
|
|
repeat_four_match_nolit_encodeBlockAsm_emit_copy:
|
|
LEAQ -256(DI), DI
|
|
MOVW $0x0019, (SI)
|
|
MOVW DI, 2(SI)
|
|
ADDQ $0x04, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm
|
|
|
|
repeat_three_match_nolit_encodeBlockAsm_emit_copy:
|
|
LEAQ -4(DI), DI
|
|
MOVW $0x0015, (SI)
|
|
MOVB DI, 2(SI)
|
|
ADDQ $0x03, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm
|
|
|
|
repeat_two_match_nolit_encodeBlockAsm_emit_copy:
|
|
SHLL $0x02, DI
|
|
ORL $0x01, DI
|
|
MOVW DI, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm
|
|
|
|
repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy:
|
|
XORQ R8, R8
|
|
LEAQ 1(R8)(DI*4), DI
|
|
MOVB BP, 1(SI)
|
|
SARL $0x08, BP
|
|
SHLL $0x05, BP
|
|
ORL BP, DI
|
|
MOVB DI, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm
|
|
|
|
four_bytes_remain_match_nolit_encodeBlockAsm:
|
|
TESTL DI, DI
|
|
JZ match_nolit_emitcopy_end_encodeBlockAsm
|
|
MOVB $0x03, DL
|
|
LEAQ -4(DX)(DI*4), DI
|
|
MOVB DI, (SI)
|
|
MOVD BP, 1(SI)
|
|
ADDQ $0x05, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm
|
|
|
|
two_byte_offset_match_nolit_encodeBlockAsm:
|
|
CMPL DI, $0x40
|
|
JLE two_byte_offset_short_match_nolit_encodeBlockAsm
|
|
MOVB $0xee, (SI)
|
|
MOVW BP, 1(SI)
|
|
LEAQ -60(DI), DI
|
|
ADDQ $0x03, SI
|
|
|
|
emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short:
|
|
MOVQ DI, R8
|
|
LEAQ -4(DI), DI
|
|
CMPL R8, $0x08
|
|
JLE repeat_two_match_nolit_encodeBlockAsm_emit_copy_short
|
|
CMPL R8, $0x0c
|
|
JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short
|
|
CMPL BP, $0x00000800
|
|
JLT repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short
|
|
|
|
cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short:
|
|
CMPL DI, $0x00000104
|
|
JLT repeat_three_match_nolit_encodeBlockAsm_emit_copy_short
|
|
CMPL DI, $0x00010100
|
|
JLT repeat_four_match_nolit_encodeBlockAsm_emit_copy_short
|
|
CMPL DI, $0x0100ffff
|
|
JLT repeat_five_match_nolit_encodeBlockAsm_emit_copy_short
|
|
LEAQ -16842747(DI), DI
|
|
MOVW $0x001d, (SI)
|
|
MOVW $0xfffb, 2(SI)
|
|
MOVB $0xff, 4(SI)
|
|
ADDQ $0x05, SI
|
|
JMP emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short
|
|
|
|
repeat_five_match_nolit_encodeBlockAsm_emit_copy_short:
|
|
LEAQ -65536(DI), DI
|
|
MOVQ DI, BP
|
|
MOVW $0x001d, (SI)
|
|
MOVW DI, 2(SI)
|
|
SARQ $0x10, BP
|
|
MOVB BP, 4(SI)
|
|
ADDQ $0x05, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm
|
|
|
|
repeat_four_match_nolit_encodeBlockAsm_emit_copy_short:
|
|
LEAQ -256(DI), DI
|
|
MOVW $0x0019, (SI)
|
|
MOVW DI, 2(SI)
|
|
ADDQ $0x04, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm
|
|
|
|
repeat_three_match_nolit_encodeBlockAsm_emit_copy_short:
|
|
LEAQ -4(DI), DI
|
|
MOVW $0x0015, (SI)
|
|
MOVB DI, 2(SI)
|
|
ADDQ $0x03, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm
|
|
|
|
repeat_two_match_nolit_encodeBlockAsm_emit_copy_short:
|
|
SHLL $0x02, DI
|
|
ORL $0x01, DI
|
|
MOVW DI, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm
|
|
|
|
repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short:
|
|
XORQ R8, R8
|
|
LEAQ 1(R8)(DI*4), DI
|
|
MOVB BP, 1(SI)
|
|
SARL $0x08, BP
|
|
SHLL $0x05, BP
|
|
ORL BP, DI
|
|
MOVB DI, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm
|
|
|
|
two_byte_offset_short_match_nolit_encodeBlockAsm:
|
|
CMPL DI, $0x0c
|
|
JGE emit_copy_three_match_nolit_encodeBlockAsm
|
|
CMPL BP, $0x00000800
|
|
JGE emit_copy_three_match_nolit_encodeBlockAsm
|
|
MOVB $0x01, DL
|
|
LEAQ -16(DX)(DI*4), DI
|
|
MOVB BP, 1(SI)
|
|
SHRL $0x08, BP
|
|
SHLL $0x05, BP
|
|
ORL BP, DI
|
|
MOVB DI, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm
|
|
|
|
emit_copy_three_match_nolit_encodeBlockAsm:
|
|
MOVB $0x02, DL
|
|
LEAQ -4(DX)(DI*4), DI
|
|
MOVB DI, (SI)
|
|
MOVW BP, 1(SI)
|
|
ADDQ $0x03, SI
|
|
|
|
match_nolit_emitcopy_end_encodeBlockAsm:
|
|
MOVQ SI, dst_base+0(FP)
|
|
MOVL AX, 20(SP)
|
|
CMPL AX, 16(SP)
|
|
JGE emit_remainder_encodeBlockAsm
|
|
CMPQ SI, (SP)
|
|
JL match_nolit_dst_ok_encodeBlockAsm
|
|
MOVQ $0x00000000, ret+48(FP)
|
|
RET
|
|
|
|
match_nolit_dst_ok_encodeBlockAsm:
|
|
MOVQ -2(CX)(AX*1), BP
|
|
MOVQ $0x0000cf1bbcdcbf9b, SI
|
|
MOVQ BP, DI
|
|
SHRQ $0x10, BP
|
|
MOVQ BP, R8
|
|
SHLQ $0x10, DI
|
|
IMULQ SI, DI
|
|
SHRQ $0x30, DI
|
|
SHLQ $0x10, R8
|
|
IMULQ SI, R8
|
|
SHRQ $0x30, R8
|
|
MOVL 32(SP)(DI*1), SI
|
|
MOVL 32(SP)(R8*1), SI
|
|
LEAQ -2(AX), SI
|
|
MOVL SI, 32(SP)(DI*1)
|
|
MOVL AX, 32(SP)(R8*1)
|
|
CMPL (CX)(R8*1), BP
|
|
JEQ match_nolit_loop_encodeBlockAsm
|
|
INCL AX
|
|
JMP search_loop_encodeBlockAsm
|
|
|
|
emit_remainder_encodeBlockAsm:
|
|
MOVQ src_len+32(FP), AX
|
|
SUBL 20(SP), AX
|
|
MOVQ dst_base+0(FP), DX
|
|
LEAQ (DX)(AX*1), DX
|
|
CMPQ DX, (SP)
|
|
JL emit_remainder_ok_encodeBlockAsm
|
|
MOVQ $0x00000000, ret+48(FP)
|
|
RET
|
|
|
|
emit_remainder_ok_encodeBlockAsm:
|
|
MOVQ src_len+32(FP), AX
|
|
MOVL 20(SP), DX
|
|
CMPL DX, AX
|
|
JEQ emit_literal_skip_emit_remainder_encodeBlockAsm
|
|
MOVL AX, BX
|
|
MOVL AX, 20(SP)
|
|
LEAQ (CX)(DX*1), AX
|
|
SUBL DX, BX
|
|
MOVQ dst_base+0(FP), CX
|
|
MOVQ BX, DX
|
|
SUBL $0x01, DX
|
|
JC emit_literal_done_emit_remainder_encodeBlockAsm
|
|
CMPL DX, $0x3c
|
|
JLT one_byte_emit_remainder_encodeBlockAsm
|
|
CMPL DX, $0x00000100
|
|
JLT two_bytes_emit_remainder_encodeBlockAsm
|
|
CMPL DX, $0x00010000
|
|
JLT three_bytes_emit_remainder_encodeBlockAsm
|
|
CMPL DX, $0x01000000
|
|
JLT four_bytes_emit_remainder_encodeBlockAsm
|
|
MOVB $0xfc, (CX)
|
|
MOVL DX, 1(CX)
|
|
ADDQ $0x05, CX
|
|
JMP memmove_emit_remainder_encodeBlockAsm
|
|
|
|
four_bytes_emit_remainder_encodeBlockAsm:
|
|
MOVQ DX, BP
|
|
SHRL $0x10, BP
|
|
MOVB $0xf8, (CX)
|
|
MOVW DX, 1(CX)
|
|
MOVB BP, 3(CX)
|
|
ADDQ $0x04, CX
|
|
JMP memmove_emit_remainder_encodeBlockAsm
|
|
|
|
three_bytes_emit_remainder_encodeBlockAsm:
|
|
MOVB $0xf4, (CX)
|
|
MOVW DX, 1(CX)
|
|
ADDQ $0x03, CX
|
|
JMP memmove_emit_remainder_encodeBlockAsm
|
|
|
|
two_bytes_emit_remainder_encodeBlockAsm:
|
|
MOVB $0xf0, (CX)
|
|
MOVB DL, 1(CX)
|
|
ADDQ $0x02, CX
|
|
JMP memmove_emit_remainder_encodeBlockAsm
|
|
|
|
one_byte_emit_remainder_encodeBlockAsm:
|
|
SHLB $0x02, DL
|
|
MOVB DL, (CX)
|
|
ADDQ $0x01, CX
|
|
|
|
memmove_emit_remainder_encodeBlockAsm:
|
|
LEAQ (CX)(BX*1), DX
|
|
NOP
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_tail:
|
|
TESTQ BX, BX
|
|
JEQ emit_literal_done_emit_remainder_encodeBlockAsm
|
|
CMPQ BX, $0x02
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_1or2
|
|
CMPQ BX, $0x04
|
|
JB emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_3
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_4
|
|
CMPQ BX, $0x08
|
|
JB emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_5through7
|
|
JE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_8
|
|
CMPQ BX, $0x10
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_9through16
|
|
CMPQ BX, $0x20
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_17through32
|
|
CMPQ BX, $0x40
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_33through64
|
|
CMPQ BX, $0x80
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_65through128
|
|
CMPQ BX, $0x00000100
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_129through256
|
|
JMP emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_256through2048
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_1or2:
|
|
MOVB (AX), DL
|
|
MOVB -1(AX)(BX*1), AL
|
|
MOVB DL, (CX)
|
|
MOVB AL, -1(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_4:
|
|
MOVL (AX), DX
|
|
MOVL DX, (CX)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_3:
|
|
MOVW (AX), DX
|
|
MOVB 2(AX), AL
|
|
MOVW DX, (CX)
|
|
MOVB AL, 2(CX)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_5through7:
|
|
MOVL (AX), DX
|
|
MOVL -4(AX)(BX*1), AX
|
|
MOVL DX, (CX)
|
|
MOVL AX, -4(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_8:
|
|
MOVQ (AX), DX
|
|
MOVQ DX, (CX)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_9through16:
|
|
MOVQ (AX), DX
|
|
MOVQ -8(AX)(BX*1), AX
|
|
MOVQ DX, (CX)
|
|
MOVQ AX, -8(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_17through32:
|
|
MOVOU (AX), X0
|
|
MOVOU -16(AX)(BX*1), X1
|
|
MOVOU X0, (CX)
|
|
MOVOU X1, -16(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_33through64:
|
|
MOVOU (AX), X0
|
|
MOVOU 16(AX), X1
|
|
MOVOU -32(AX)(BX*1), X2
|
|
MOVOU -16(AX)(BX*1), X3
|
|
MOVOU X0, (CX)
|
|
MOVOU X1, 16(CX)
|
|
MOVOU X2, -32(CX)(BX*1)
|
|
MOVOU X3, -16(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_65through128:
|
|
MOVOU (AX), X0
|
|
MOVOU 16(AX), X1
|
|
MOVOU 32(AX), X2
|
|
MOVOU 48(AX), X3
|
|
MOVOU -64(AX)(BX*1), X12
|
|
MOVOU -48(AX)(BX*1), X13
|
|
MOVOU -32(AX)(BX*1), X14
|
|
MOVOU -16(AX)(BX*1), X15
|
|
MOVOU X0, (CX)
|
|
MOVOU X1, 16(CX)
|
|
MOVOU X2, 32(CX)
|
|
MOVOU X3, 48(CX)
|
|
MOVOU X12, -64(CX)(BX*1)
|
|
MOVOU X13, -48(CX)(BX*1)
|
|
MOVOU X14, -32(CX)(BX*1)
|
|
MOVOU X15, -16(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_129through256:
|
|
MOVOU (AX), X0
|
|
MOVOU 16(AX), X1
|
|
MOVOU 32(AX), X2
|
|
MOVOU 48(AX), X3
|
|
MOVOU 64(AX), X4
|
|
MOVOU 80(AX), X5
|
|
MOVOU 96(AX), X6
|
|
MOVOU 112(AX), X7
|
|
MOVOU -128(AX)(BX*1), X8
|
|
MOVOU -112(AX)(BX*1), X9
|
|
MOVOU -96(AX)(BX*1), X10
|
|
MOVOU -80(AX)(BX*1), X11
|
|
MOVOU -64(AX)(BX*1), X12
|
|
MOVOU -48(AX)(BX*1), X13
|
|
MOVOU -32(AX)(BX*1), X14
|
|
MOVOU -16(AX)(BX*1), X15
|
|
MOVOU X0, (CX)
|
|
MOVOU X1, 16(CX)
|
|
MOVOU X2, 32(CX)
|
|
MOVOU X3, 48(CX)
|
|
MOVOU X4, 64(CX)
|
|
MOVOU X5, 80(CX)
|
|
MOVOU X6, 96(CX)
|
|
MOVOU X7, 112(CX)
|
|
MOVOU X8, -128(CX)(BX*1)
|
|
MOVOU X9, -112(CX)(BX*1)
|
|
MOVOU X10, -96(CX)(BX*1)
|
|
MOVOU X11, -80(CX)(BX*1)
|
|
MOVOU X12, -64(CX)(BX*1)
|
|
MOVOU X13, -48(CX)(BX*1)
|
|
MOVOU X14, -32(CX)(BX*1)
|
|
MOVOU X15, -16(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_256through2048:
|
|
LEAQ -256(BX), BX
|
|
MOVOU (AX), X0
|
|
MOVOU 16(AX), X1
|
|
MOVOU 32(AX), X2
|
|
MOVOU 48(AX), X3
|
|
MOVOU 64(AX), X4
|
|
MOVOU 80(AX), X5
|
|
MOVOU 96(AX), X6
|
|
MOVOU 112(AX), X7
|
|
MOVOU 128(AX), X8
|
|
MOVOU 144(AX), X9
|
|
MOVOU 160(AX), X10
|
|
MOVOU 176(AX), X11
|
|
MOVOU 192(AX), X12
|
|
MOVOU 208(AX), X13
|
|
MOVOU 224(AX), X14
|
|
MOVOU 240(AX), X15
|
|
MOVOU X0, (CX)
|
|
MOVOU X1, 16(CX)
|
|
MOVOU X2, 32(CX)
|
|
MOVOU X3, 48(CX)
|
|
MOVOU X4, 64(CX)
|
|
MOVOU X5, 80(CX)
|
|
MOVOU X6, 96(CX)
|
|
MOVOU X7, 112(CX)
|
|
MOVOU X8, 128(CX)
|
|
MOVOU X9, 144(CX)
|
|
MOVOU X10, 160(CX)
|
|
MOVOU X11, 176(CX)
|
|
MOVOU X12, 192(CX)
|
|
MOVOU X13, 208(CX)
|
|
MOVOU X14, 224(CX)
|
|
MOVOU X15, 240(CX)
|
|
CMPQ BX, $0x00000100
|
|
LEAQ 256(AX), AX
|
|
LEAQ 256(CX), CX
|
|
JGE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_256through2048
|
|
JMP emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_tail
|
|
MOVQ DX, CX
|
|
|
|
emit_literal_done_emit_remainder_encodeBlockAsm:
|
|
MOVQ CX, dst_base+0(FP)
|
|
|
|
emit_literal_skip_emit_remainder_encodeBlockAsm:
|
|
MOVQ 8(SP), AX
|
|
SUBQ dst_base+0(FP), AX
|
|
MOVQ AX, ret+48(FP)
|
|
RET
|
|
|
|
// func encodeBlockAsm14B(dst []byte, src []byte) int
|
|
// Requires: SSE2
|
|
TEXT ·encodeBlockAsm14B(SB), $16416-56
|
|
MOVQ $0x00000080, AX
|
|
LEAQ 32(SP), CX
|
|
PXOR X0, X0
|
|
|
|
zero_loop_encodeBlockAsm14B:
|
|
MOVOU X0, (CX)
|
|
MOVOU X0, 16(CX)
|
|
MOVOU X0, 32(CX)
|
|
MOVOU X0, 48(CX)
|
|
MOVOU X0, 64(CX)
|
|
MOVOU X0, 80(CX)
|
|
MOVOU X0, 96(CX)
|
|
MOVOU X0, 112(CX)
|
|
ADDQ $0x80, CX
|
|
DECQ AX
|
|
JNZ zero_loop_encodeBlockAsm14B
|
|
MOVL AX, 20(SP)
|
|
MOVQ src_len+32(FP), AX
|
|
LEAQ -5(AX), CX
|
|
LEAQ -8(AX), BX
|
|
SHRQ $0x05, AX
|
|
SUBL AX, CX
|
|
MOVL BX, 16(SP)
|
|
MOVQ dst_base+0(FP), AX
|
|
MOVQ AX, 8(SP)
|
|
LEAQ (AX)(CX*1), CX
|
|
MOVQ CX, (SP)
|
|
MOVL $0x00000001, AX
|
|
MOVL AX, 24(SP)
|
|
MOVQ src_base+24(FP), CX
|
|
|
|
search_loop_encodeBlockAsm14B:
|
|
MOVQ (CX)(AX*1), BP
|
|
MOVL AX, BX
|
|
SUBL 20(SP), BX
|
|
SHRL $0x05, BX
|
|
LEAQ 4(AX)(BX*1), BX
|
|
MOVL 16(SP), SI
|
|
CMPL BX, SI
|
|
JGT emit_remainder_encodeBlockAsm14B
|
|
MOVL BX, 28(SP)
|
|
MOVQ $0x0000cf1bbcdcbf9b, BX
|
|
MOVQ BP, DI
|
|
MOVQ BP, R8
|
|
SHRQ $0x08, R8
|
|
SHLQ $0x10, DI
|
|
IMULQ BX, DI
|
|
SHRQ $0x32, DI
|
|
SHLQ $0x10, R8
|
|
IMULQ BX, R8
|
|
SHRQ $0x32, R8
|
|
MOVL 32(SP)(DI*1), BX
|
|
MOVL 32(SP)(R8*1), SI
|
|
MOVL AX, 32(SP)(DI*1)
|
|
LEAL 1(AX), DI
|
|
MOVL DI, 32(SP)(R8*1)
|
|
MOVL AX, DI
|
|
SUBL 24(SP), DI
|
|
MOVL 1(CX)(DI*1), R9
|
|
MOVQ BP, R8
|
|
SHLQ $0x08, R8
|
|
CMPL R8, R9
|
|
JNE no_repeat_found_encodeBlockAsm14B
|
|
LEAQ 1(AX), BP
|
|
MOVL 20(SP), BX
|
|
TESTL DI, DI
|
|
JZ repeat_extend_back_end_encodeBlockAsm14B
|
|
|
|
repeat_extend_back_loop_encodeBlockAsm14B:
|
|
CMPL BP, BX
|
|
JG repeat_extend_back_end_encodeBlockAsm14B
|
|
MOVB -1(CX)(DI*1), DL
|
|
MOVB -1(CX)(BP*1), SI
|
|
CMPB DL, SI
|
|
JNE repeat_extend_back_end_encodeBlockAsm14B
|
|
LEAQ -1(BP), BP
|
|
DECL DI
|
|
JZ repeat_extend_back_end_encodeBlockAsm14B
|
|
JMP repeat_extend_back_loop_encodeBlockAsm14B
|
|
|
|
repeat_extend_back_end_encodeBlockAsm14B:
|
|
MOVL 20(SP), BX
|
|
CMPL BX, BP
|
|
JEQ emit_literal_skip_repeat_emit_encodeBlockAsm14B
|
|
MOVL BP, SI
|
|
MOVL BP, 20(SP)
|
|
LEAQ (CX)(BX*1), DI
|
|
SUBL BX, SI
|
|
MOVQ dst_base+0(FP), BX
|
|
MOVQ SI, R8
|
|
SUBL $0x01, R8
|
|
JC emit_literal_done_repeat_emit_encodeBlockAsm14B
|
|
CMPL R8, $0x3c
|
|
JLT one_byte_repeat_emit_encodeBlockAsm14B
|
|
CMPL R8, $0x00000100
|
|
JLT two_bytes_repeat_emit_encodeBlockAsm14B
|
|
CMPL R8, $0x00010000
|
|
JLT three_bytes_repeat_emit_encodeBlockAsm14B
|
|
CMPL R8, $0x01000000
|
|
JLT four_bytes_repeat_emit_encodeBlockAsm14B
|
|
MOVB $0xfc, (BX)
|
|
MOVL R8, 1(BX)
|
|
ADDQ $0x05, BX
|
|
JMP memmove_repeat_emit_encodeBlockAsm14B
|
|
|
|
four_bytes_repeat_emit_encodeBlockAsm14B:
|
|
MOVQ R8, R9
|
|
SHRL $0x10, R9
|
|
MOVB $0xf8, (BX)
|
|
MOVW R8, 1(BX)
|
|
MOVB R9, 3(BX)
|
|
ADDQ $0x04, BX
|
|
JMP memmove_repeat_emit_encodeBlockAsm14B
|
|
|
|
three_bytes_repeat_emit_encodeBlockAsm14B:
|
|
MOVB $0xf4, (BX)
|
|
MOVW R8, 1(BX)
|
|
ADDQ $0x03, BX
|
|
JMP memmove_repeat_emit_encodeBlockAsm14B
|
|
|
|
two_bytes_repeat_emit_encodeBlockAsm14B:
|
|
MOVB $0xf0, (BX)
|
|
MOVB R8, 1(BX)
|
|
ADDQ $0x02, BX
|
|
JMP memmove_repeat_emit_encodeBlockAsm14B
|
|
|
|
one_byte_repeat_emit_encodeBlockAsm14B:
|
|
SHLB $0x02, R8
|
|
MOVB R8, (BX)
|
|
ADDQ $0x01, BX
|
|
|
|
memmove_repeat_emit_encodeBlockAsm14B:
|
|
LEAQ (BX)(SI*1), R8
|
|
NOP
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_tail:
|
|
TESTQ SI, SI
|
|
JEQ emit_literal_done_repeat_emit_encodeBlockAsm14B
|
|
CMPQ SI, $0x02
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_1or2
|
|
CMPQ SI, $0x04
|
|
JB emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_3
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_4
|
|
CMPQ SI, $0x08
|
|
JB emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_5through7
|
|
JE emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_8
|
|
CMPQ SI, $0x10
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_9through16
|
|
CMPQ SI, $0x20
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_17through32
|
|
CMPQ SI, $0x40
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_33through64
|
|
CMPQ SI, $0x80
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_65through128
|
|
CMPQ SI, $0x00000100
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_129through256
|
|
JMP emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_256through2048
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_1or2:
|
|
MOVB (DI), R8
|
|
MOVB -1(DI)(SI*1), DI
|
|
MOVB R8, (BX)
|
|
MOVB DI, -1(BX)(SI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm14B
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_4:
|
|
MOVL (DI), R8
|
|
MOVL R8, (BX)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm14B
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_3:
|
|
MOVW (DI), R8
|
|
MOVB 2(DI), DI
|
|
MOVW R8, (BX)
|
|
MOVB DI, 2(BX)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm14B
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_5through7:
|
|
MOVL (DI), R8
|
|
MOVL -4(DI)(SI*1), DI
|
|
MOVL R8, (BX)
|
|
MOVL DI, -4(BX)(SI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm14B
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_8:
|
|
MOVQ (DI), R8
|
|
MOVQ R8, (BX)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm14B
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_9through16:
|
|
MOVQ (DI), R8
|
|
MOVQ -8(DI)(SI*1), DI
|
|
MOVQ R8, (BX)
|
|
MOVQ DI, -8(BX)(SI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm14B
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_17through32:
|
|
MOVOU (DI), X0
|
|
MOVOU -16(DI)(SI*1), X1
|
|
MOVOU X0, (BX)
|
|
MOVOU X1, -16(BX)(SI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm14B
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_33through64:
|
|
MOVOU (DI), X0
|
|
MOVOU 16(DI), X1
|
|
MOVOU -32(DI)(SI*1), X2
|
|
MOVOU -16(DI)(SI*1), X3
|
|
MOVOU X0, (BX)
|
|
MOVOU X1, 16(BX)
|
|
MOVOU X2, -32(BX)(SI*1)
|
|
MOVOU X3, -16(BX)(SI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm14B
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_65through128:
|
|
MOVOU (DI), X0
|
|
MOVOU 16(DI), X1
|
|
MOVOU 32(DI), X2
|
|
MOVOU 48(DI), X3
|
|
MOVOU -64(DI)(SI*1), X12
|
|
MOVOU -48(DI)(SI*1), X13
|
|
MOVOU -32(DI)(SI*1), X14
|
|
MOVOU -16(DI)(SI*1), X15
|
|
MOVOU X0, (BX)
|
|
MOVOU X1, 16(BX)
|
|
MOVOU X2, 32(BX)
|
|
MOVOU X3, 48(BX)
|
|
MOVOU X12, -64(BX)(SI*1)
|
|
MOVOU X13, -48(BX)(SI*1)
|
|
MOVOU X14, -32(BX)(SI*1)
|
|
MOVOU X15, -16(BX)(SI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm14B
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_129through256:
|
|
MOVOU (DI), X0
|
|
MOVOU 16(DI), X1
|
|
MOVOU 32(DI), X2
|
|
MOVOU 48(DI), X3
|
|
MOVOU 64(DI), X4
|
|
MOVOU 80(DI), X5
|
|
MOVOU 96(DI), X6
|
|
MOVOU 112(DI), X7
|
|
MOVOU -128(DI)(SI*1), X8
|
|
MOVOU -112(DI)(SI*1), X9
|
|
MOVOU -96(DI)(SI*1), X10
|
|
MOVOU -80(DI)(SI*1), X11
|
|
MOVOU -64(DI)(SI*1), X12
|
|
MOVOU -48(DI)(SI*1), X13
|
|
MOVOU -32(DI)(SI*1), X14
|
|
MOVOU -16(DI)(SI*1), X15
|
|
MOVOU X0, (BX)
|
|
MOVOU X1, 16(BX)
|
|
MOVOU X2, 32(BX)
|
|
MOVOU X3, 48(BX)
|
|
MOVOU X4, 64(BX)
|
|
MOVOU X5, 80(BX)
|
|
MOVOU X6, 96(BX)
|
|
MOVOU X7, 112(BX)
|
|
MOVOU X8, -128(BX)(SI*1)
|
|
MOVOU X9, -112(BX)(SI*1)
|
|
MOVOU X10, -96(BX)(SI*1)
|
|
MOVOU X11, -80(BX)(SI*1)
|
|
MOVOU X12, -64(BX)(SI*1)
|
|
MOVOU X13, -48(BX)(SI*1)
|
|
MOVOU X14, -32(BX)(SI*1)
|
|
MOVOU X15, -16(BX)(SI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm14B
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_256through2048:
|
|
LEAQ -256(SI), SI
|
|
MOVOU (DI), X0
|
|
MOVOU 16(DI), X1
|
|
MOVOU 32(DI), X2
|
|
MOVOU 48(DI), X3
|
|
MOVOU 64(DI), X4
|
|
MOVOU 80(DI), X5
|
|
MOVOU 96(DI), X6
|
|
MOVOU 112(DI), X7
|
|
MOVOU 128(DI), X8
|
|
MOVOU 144(DI), X9
|
|
MOVOU 160(DI), X10
|
|
MOVOU 176(DI), X11
|
|
MOVOU 192(DI), X12
|
|
MOVOU 208(DI), X13
|
|
MOVOU 224(DI), X14
|
|
MOVOU 240(DI), X15
|
|
MOVOU X0, (BX)
|
|
MOVOU X1, 16(BX)
|
|
MOVOU X2, 32(BX)
|
|
MOVOU X3, 48(BX)
|
|
MOVOU X4, 64(BX)
|
|
MOVOU X5, 80(BX)
|
|
MOVOU X6, 96(BX)
|
|
MOVOU X7, 112(BX)
|
|
MOVOU X8, 128(BX)
|
|
MOVOU X9, 144(BX)
|
|
MOVOU X10, 160(BX)
|
|
MOVOU X11, 176(BX)
|
|
MOVOU X12, 192(BX)
|
|
MOVOU X13, 208(BX)
|
|
MOVOU X14, 224(BX)
|
|
MOVOU X15, 240(BX)
|
|
CMPQ SI, $0x00000100
|
|
LEAQ 256(DI), DI
|
|
LEAQ 256(BX), BX
|
|
JGE emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_256through2048
|
|
JMP emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_tail
|
|
MOVQ R8, BX
|
|
|
|
emit_literal_done_repeat_emit_encodeBlockAsm14B:
|
|
MOVQ BX, dst_base+0(FP)
|
|
|
|
emit_literal_skip_repeat_emit_encodeBlockAsm14B:
|
|
ADDL $0x05, AX
|
|
MOVL AX, BX
|
|
SUBL 24(SP), BX
|
|
MOVL 16(SP), BX
|
|
SUBL AX, BX
|
|
XORQ DI, DI
|
|
CMPQ BX, $0x08
|
|
JL matchlen_single_repeat_extend
|
|
|
|
matchlen_loopback_repeat_extend:
|
|
MOVQ (CX)(DI*1), SI
|
|
XORQ (CX)(DI*1), SI
|
|
TESTQ SI, SI
|
|
JZ matchlen_loop_repeat_extend
|
|
BSFQ SI, SI
|
|
SARQ $0x03, SI
|
|
LEAQ (DI)(SI*1), DI
|
|
JMP repeat_extend_forward_end_encodeBlockAsm14B
|
|
|
|
matchlen_loop_repeat_extend:
|
|
LEAQ -8(BX), BX
|
|
LEAQ 8(DI), DI
|
|
CMPQ BX, $0x08
|
|
JGE matchlen_loopback_repeat_extend
|
|
|
|
matchlen_single_repeat_extend:
|
|
TESTQ BX, BX
|
|
JZ repeat_extend_forward_end_encodeBlockAsm14B
|
|
|
|
matchlen_single_loopback_repeat_extend:
|
|
MOVB (CX)(DI*1), SI
|
|
CMPB (CX)(DI*1), SI
|
|
JNE repeat_extend_forward_end_encodeBlockAsm14B
|
|
LEAQ 1(DI), DI
|
|
DECQ BX
|
|
JNZ matchlen_single_loopback_repeat_extend
|
|
|
|
repeat_extend_forward_end_encodeBlockAsm14B:
|
|
ADDL DI, AX
|
|
MOVL AX, BX
|
|
SUBL BP, BX
|
|
MOVL 24(SP), BP
|
|
MOVQ dst_base+0(FP), SI
|
|
MOVL 20(SP), DI
|
|
TESTL DI, DI
|
|
JZ repeat_as_copy_encodeBlockAsm14B
|
|
|
|
emit_repeat_again_match_repeat_:
|
|
MOVQ BX, DI
|
|
LEAQ -4(BX), BX
|
|
CMPL DI, $0x08
|
|
JLE repeat_two_match_repeat_
|
|
CMPL DI, $0x0c
|
|
JGE cant_repeat_two_offset_match_repeat_
|
|
CMPL BP, $0x00000800
|
|
JLT repeat_two_offset_match_repeat_
|
|
|
|
cant_repeat_two_offset_match_repeat_:
|
|
CMPL BX, $0x00000104
|
|
JLT repeat_three_match_repeat_
|
|
CMPL BX, $0x00010100
|
|
JLT repeat_four_match_repeat_
|
|
CMPL BX, $0x0100ffff
|
|
JLT repeat_five_match_repeat_
|
|
LEAQ -16842747(BX), BX
|
|
MOVW $0x001d, (SI)
|
|
MOVW $0xfffb, 2(SI)
|
|
MOVB $0xff, 4(SI)
|
|
ADDQ $0x05, SI
|
|
JMP emit_repeat_again_match_repeat_
|
|
|
|
repeat_five_match_repeat_:
|
|
LEAQ -65536(BX), BX
|
|
MOVQ BX, BP
|
|
MOVW $0x001d, (SI)
|
|
MOVW BX, 2(SI)
|
|
SARQ $0x10, BP
|
|
MOVB BP, 4(SI)
|
|
ADDQ $0x05, SI
|
|
JMP repeat_end_emit_encodeBlockAsm14B
|
|
|
|
repeat_four_match_repeat_:
|
|
LEAQ -256(BX), BX
|
|
MOVW $0x0019, (SI)
|
|
MOVW BX, 2(SI)
|
|
ADDQ $0x04, SI
|
|
JMP repeat_end_emit_encodeBlockAsm14B
|
|
|
|
repeat_three_match_repeat_:
|
|
LEAQ -4(BX), BX
|
|
MOVW $0x0015, (SI)
|
|
MOVB BL, 2(SI)
|
|
ADDQ $0x03, SI
|
|
JMP repeat_end_emit_encodeBlockAsm14B
|
|
|
|
repeat_two_match_repeat_:
|
|
SHLL $0x02, BX
|
|
ORL $0x01, BX
|
|
MOVW BX, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP repeat_end_emit_encodeBlockAsm14B
|
|
|
|
repeat_two_offset_match_repeat_:
|
|
XORQ DI, DI
|
|
LEAQ 1(DI)(BX*4), BX
|
|
MOVB BP, 1(SI)
|
|
SARL $0x08, BP
|
|
SHLL $0x05, BP
|
|
ORL BP, BX
|
|
MOVB BL, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP repeat_end_emit_encodeBlockAsm14B
|
|
|
|
repeat_as_copy_encodeBlockAsm14B:
|
|
CMPL BP, $0x00010000
|
|
JL two_byte_offset_repeat_as_copy_encodeBlockAsm14B
|
|
CMPL BX, $0x40
|
|
JLE four_bytes_remain_repeat_as_copy_encodeBlockAsm14B
|
|
MOVB $0xff, (SI)
|
|
MOVD BP, 1(SI)
|
|
LEAQ -64(BX), BX
|
|
ADDQ $0x05, SI
|
|
CMPL BX, $0x04
|
|
JL four_bytes_remain_repeat_as_copy_encodeBlockAsm14B
|
|
|
|
emit_repeat_again_repeat_as_copy_encodeBlockAsm14B_emit_copy:
|
|
MOVQ BX, DI
|
|
LEAQ -4(BX), BX
|
|
CMPL DI, $0x08
|
|
JLE repeat_two_repeat_as_copy_encodeBlockAsm14B_emit_copy
|
|
CMPL DI, $0x0c
|
|
JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm14B_emit_copy
|
|
CMPL BP, $0x00000800
|
|
JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm14B_emit_copy
|
|
|
|
cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm14B_emit_copy:
|
|
CMPL BX, $0x00000104
|
|
JLT repeat_three_repeat_as_copy_encodeBlockAsm14B_emit_copy
|
|
CMPL BX, $0x00010100
|
|
JLT repeat_four_repeat_as_copy_encodeBlockAsm14B_emit_copy
|
|
CMPL BX, $0x0100ffff
|
|
JLT repeat_five_repeat_as_copy_encodeBlockAsm14B_emit_copy
|
|
LEAQ -16842747(BX), BX
|
|
MOVW $0x001d, (SI)
|
|
MOVW $0xfffb, 2(SI)
|
|
MOVB $0xff, 4(SI)
|
|
ADDQ $0x05, SI
|
|
JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm14B_emit_copy
|
|
|
|
repeat_five_repeat_as_copy_encodeBlockAsm14B_emit_copy:
|
|
LEAQ -65536(BX), BX
|
|
MOVQ BX, BP
|
|
MOVW $0x001d, (SI)
|
|
MOVW BX, 2(SI)
|
|
SARQ $0x10, BP
|
|
MOVB BP, 4(SI)
|
|
ADDQ $0x05, SI
|
|
JMP repeat_end_emit_encodeBlockAsm14B
|
|
|
|
repeat_four_repeat_as_copy_encodeBlockAsm14B_emit_copy:
|
|
LEAQ -256(BX), BX
|
|
MOVW $0x0019, (SI)
|
|
MOVW BX, 2(SI)
|
|
ADDQ $0x04, SI
|
|
JMP repeat_end_emit_encodeBlockAsm14B
|
|
|
|
repeat_three_repeat_as_copy_encodeBlockAsm14B_emit_copy:
|
|
LEAQ -4(BX), BX
|
|
MOVW $0x0015, (SI)
|
|
MOVB BL, 2(SI)
|
|
ADDQ $0x03, SI
|
|
JMP repeat_end_emit_encodeBlockAsm14B
|
|
|
|
repeat_two_repeat_as_copy_encodeBlockAsm14B_emit_copy:
|
|
SHLL $0x02, BX
|
|
ORL $0x01, BX
|
|
MOVW BX, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP repeat_end_emit_encodeBlockAsm14B
|
|
|
|
repeat_two_offset_repeat_as_copy_encodeBlockAsm14B_emit_copy:
|
|
XORQ DI, DI
|
|
LEAQ 1(DI)(BX*4), BX
|
|
MOVB BP, 1(SI)
|
|
SARL $0x08, BP
|
|
SHLL $0x05, BP
|
|
ORL BP, BX
|
|
MOVB BL, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP repeat_end_emit_encodeBlockAsm14B
|
|
|
|
four_bytes_remain_repeat_as_copy_encodeBlockAsm14B:
|
|
TESTL BX, BX
|
|
JZ repeat_end_emit_encodeBlockAsm14B
|
|
MOVB $0x03, DL
|
|
LEAQ -4(DX)(BX*4), BX
|
|
MOVB BL, (SI)
|
|
MOVD BP, 1(SI)
|
|
ADDQ $0x05, SI
|
|
JMP repeat_end_emit_encodeBlockAsm14B
|
|
|
|
two_byte_offset_repeat_as_copy_encodeBlockAsm14B:
|
|
CMPL BX, $0x40
|
|
JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm14B
|
|
MOVB $0xee, (SI)
|
|
MOVW BP, 1(SI)
|
|
LEAQ -60(BX), BX
|
|
ADDQ $0x03, SI
|
|
|
|
emit_repeat_again_repeat_as_copy_encodeBlockAsm14B_emit_copy_short:
|
|
MOVQ BX, DI
|
|
LEAQ -4(BX), BX
|
|
CMPL DI, $0x08
|
|
JLE repeat_two_repeat_as_copy_encodeBlockAsm14B_emit_copy_short
|
|
CMPL DI, $0x0c
|
|
JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm14B_emit_copy_short
|
|
CMPL BP, $0x00000800
|
|
JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm14B_emit_copy_short
|
|
|
|
cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm14B_emit_copy_short:
|
|
CMPL BX, $0x00000104
|
|
JLT repeat_three_repeat_as_copy_encodeBlockAsm14B_emit_copy_short
|
|
CMPL BX, $0x00010100
|
|
JLT repeat_four_repeat_as_copy_encodeBlockAsm14B_emit_copy_short
|
|
CMPL BX, $0x0100ffff
|
|
JLT repeat_five_repeat_as_copy_encodeBlockAsm14B_emit_copy_short
|
|
LEAQ -16842747(BX), BX
|
|
MOVW $0x001d, (SI)
|
|
MOVW $0xfffb, 2(SI)
|
|
MOVB $0xff, 4(SI)
|
|
ADDQ $0x05, SI
|
|
JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm14B_emit_copy_short
|
|
|
|
repeat_five_repeat_as_copy_encodeBlockAsm14B_emit_copy_short:
|
|
LEAQ -65536(BX), BX
|
|
MOVQ BX, BP
|
|
MOVW $0x001d, (SI)
|
|
MOVW BX, 2(SI)
|
|
SARQ $0x10, BP
|
|
MOVB BP, 4(SI)
|
|
ADDQ $0x05, SI
|
|
JMP repeat_end_emit_encodeBlockAsm14B
|
|
|
|
repeat_four_repeat_as_copy_encodeBlockAsm14B_emit_copy_short:
|
|
LEAQ -256(BX), BX
|
|
MOVW $0x0019, (SI)
|
|
MOVW BX, 2(SI)
|
|
ADDQ $0x04, SI
|
|
JMP repeat_end_emit_encodeBlockAsm14B
|
|
|
|
repeat_three_repeat_as_copy_encodeBlockAsm14B_emit_copy_short:
|
|
LEAQ -4(BX), BX
|
|
MOVW $0x0015, (SI)
|
|
MOVB BL, 2(SI)
|
|
ADDQ $0x03, SI
|
|
JMP repeat_end_emit_encodeBlockAsm14B
|
|
|
|
repeat_two_repeat_as_copy_encodeBlockAsm14B_emit_copy_short:
|
|
SHLL $0x02, BX
|
|
ORL $0x01, BX
|
|
MOVW BX, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP repeat_end_emit_encodeBlockAsm14B
|
|
|
|
repeat_two_offset_repeat_as_copy_encodeBlockAsm14B_emit_copy_short:
|
|
XORQ DI, DI
|
|
LEAQ 1(DI)(BX*4), BX
|
|
MOVB BP, 1(SI)
|
|
SARL $0x08, BP
|
|
SHLL $0x05, BP
|
|
ORL BP, BX
|
|
MOVB BL, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP repeat_end_emit_encodeBlockAsm14B
|
|
|
|
two_byte_offset_short_repeat_as_copy_encodeBlockAsm14B:
|
|
CMPL BX, $0x0c
|
|
JGE emit_copy_three_repeat_as_copy_encodeBlockAsm14B
|
|
CMPL BP, $0x00000800
|
|
JGE emit_copy_three_repeat_as_copy_encodeBlockAsm14B
|
|
MOVB $0x01, DL
|
|
LEAQ -16(DX)(BX*4), BX
|
|
MOVB BP, 1(SI)
|
|
SHRL $0x08, BP
|
|
SHLL $0x05, BP
|
|
ORL BP, BX
|
|
MOVB BL, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP repeat_end_emit_encodeBlockAsm14B
|
|
|
|
emit_copy_three_repeat_as_copy_encodeBlockAsm14B:
|
|
MOVB $0x02, DL
|
|
LEAQ -4(DX)(BX*4), BX
|
|
MOVB BL, (SI)
|
|
MOVW BP, 1(SI)
|
|
ADDQ $0x03, SI
|
|
|
|
repeat_end_emit_encodeBlockAsm14B:
|
|
MOVQ SI, dst_base+0(FP)
|
|
MOVL 16(SP), BX
|
|
CMPL AX, BX
|
|
JGT emit_remainder_encodeBlockAsm14B
|
|
JMP search_loop_encodeBlockAsm14B
|
|
|
|
no_repeat_found_encodeBlockAsm14B:
|
|
MOVQ $0x0000cf1bbcdcbf9b, R8
|
|
MOVQ BP, DI
|
|
SHRQ $0x10, DI
|
|
SHLQ $0x10, DI
|
|
IMULQ R8, DI
|
|
SHRQ $0x32, DI
|
|
CMPL (CX)(BX*1), BP
|
|
SHRQ $0x08, BP
|
|
JEQ candidate_match_encodeBlockAsm14B
|
|
MOVL 32(SP)(DI*1), BX
|
|
CMPL (CX)(SI*1), BP
|
|
JEQ candidate2_match_encodeBlockAsm14B
|
|
LEAQ 2(AX), SI
|
|
MOVL SI, 32(SP)(DI*1)
|
|
SHRQ $0x08, BP
|
|
CMPL (CX)(BX*1), BP
|
|
JEQ candidate3_match_encodeBlockAsm14B
|
|
MOVL 28(SP), AX
|
|
JMP search_loop_encodeBlockAsm14B
|
|
|
|
candidate3_match_encodeBlockAsm14B:
|
|
ADDL $0x02, AX
|
|
JMP candidate_match_encodeBlockAsm14B
|
|
|
|
candidate2_match_encodeBlockAsm14B:
|
|
LEAQ -2(AX), BX
|
|
MOVL BX, 32(SP)(DI*1)
|
|
INCL AX
|
|
MOVL SI, BX
|
|
|
|
candidate_match_encodeBlockAsm14B:
|
|
MOVL 20(SP), BP
|
|
TESTL BX, BX
|
|
JZ match_extend_back_end_encodeBlockAsm14B
|
|
|
|
match_extend_back_loop_encodeBlockAsm14B:
|
|
CMPL AX, BP
|
|
JG match_extend_back_end_encodeBlockAsm14B
|
|
MOVB -1(CX)(BX*1), DL
|
|
MOVB -1(CX)(AX*1), SI
|
|
CMPB DL, SI
|
|
JNE match_extend_back_end_encodeBlockAsm14B
|
|
LEAL -1(AX), AX
|
|
DECL BX
|
|
JZ match_extend_back_end_encodeBlockAsm14B
|
|
JMP match_extend_back_loop_encodeBlockAsm14B
|
|
|
|
match_extend_back_end_encodeBlockAsm14B:
|
|
MOVL AX, BP
|
|
SUBL 20(SP), BP
|
|
LEAQ dst_base+0(FP)(BP*1), BP
|
|
CMPQ BP, (SP)
|
|
JL match_dst_size_check_encodeBlockAsm14B
|
|
MOVQ $0x00000000, ret+48(FP)
|
|
RET
|
|
|
|
match_dst_size_check_encodeBlockAsm14B:
|
|
MOVL BX, BP
|
|
MOVL 20(SP), SI
|
|
CMPL SI, BP
|
|
JEQ emit_literal_skip_match_emit_encodeBlockAsm14B
|
|
MOVL BP, DI
|
|
MOVL BP, 20(SP)
|
|
LEAQ (CX)(SI*1), BP
|
|
SUBL SI, DI
|
|
MOVQ dst_base+0(FP), SI
|
|
MOVQ DI, R8
|
|
SUBL $0x01, R8
|
|
JC emit_literal_done_match_emit_encodeBlockAsm14B
|
|
CMPL R8, $0x3c
|
|
JLT one_byte_match_emit_encodeBlockAsm14B
|
|
CMPL R8, $0x00000100
|
|
JLT two_bytes_match_emit_encodeBlockAsm14B
|
|
CMPL R8, $0x00010000
|
|
JLT three_bytes_match_emit_encodeBlockAsm14B
|
|
CMPL R8, $0x01000000
|
|
JLT four_bytes_match_emit_encodeBlockAsm14B
|
|
MOVB $0xfc, (SI)
|
|
MOVL R8, 1(SI)
|
|
ADDQ $0x05, SI
|
|
JMP memmove_match_emit_encodeBlockAsm14B
|
|
|
|
four_bytes_match_emit_encodeBlockAsm14B:
|
|
MOVQ R8, R9
|
|
SHRL $0x10, R9
|
|
MOVB $0xf8, (SI)
|
|
MOVW R8, 1(SI)
|
|
MOVB R9, 3(SI)
|
|
ADDQ $0x04, SI
|
|
JMP memmove_match_emit_encodeBlockAsm14B
|
|
|
|
three_bytes_match_emit_encodeBlockAsm14B:
|
|
MOVB $0xf4, (SI)
|
|
MOVW R8, 1(SI)
|
|
ADDQ $0x03, SI
|
|
JMP memmove_match_emit_encodeBlockAsm14B
|
|
|
|
two_bytes_match_emit_encodeBlockAsm14B:
|
|
MOVB $0xf0, (SI)
|
|
MOVB R8, 1(SI)
|
|
ADDQ $0x02, SI
|
|
JMP memmove_match_emit_encodeBlockAsm14B
|
|
|
|
one_byte_match_emit_encodeBlockAsm14B:
|
|
SHLB $0x02, R8
|
|
MOVB R8, (SI)
|
|
ADDQ $0x01, SI
|
|
|
|
memmove_match_emit_encodeBlockAsm14B:
|
|
LEAQ (SI)(DI*1), R8
|
|
NOP
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_tail:
|
|
TESTQ DI, DI
|
|
JEQ emit_literal_done_match_emit_encodeBlockAsm14B
|
|
CMPQ DI, $0x02
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_1or2
|
|
CMPQ DI, $0x04
|
|
JB emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_3
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_4
|
|
CMPQ DI, $0x08
|
|
JB emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_5through7
|
|
JE emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_8
|
|
CMPQ DI, $0x10
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_9through16
|
|
CMPQ DI, $0x20
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_17through32
|
|
CMPQ DI, $0x40
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_33through64
|
|
CMPQ DI, $0x80
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_65through128
|
|
CMPQ DI, $0x00000100
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_129through256
|
|
JMP emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_256through2048
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_1or2:
|
|
MOVB (BP), R8
|
|
MOVB -1(BP)(DI*1), BP
|
|
MOVB R8, (SI)
|
|
MOVB BP, -1(SI)(DI*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm14B
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_4:
|
|
MOVL (BP), R8
|
|
MOVL R8, (SI)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm14B
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_3:
|
|
MOVW (BP), R8
|
|
MOVB 2(BP), BP
|
|
MOVW R8, (SI)
|
|
MOVB BP, 2(SI)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm14B
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_5through7:
|
|
MOVL (BP), R8
|
|
MOVL -4(BP)(DI*1), BP
|
|
MOVL R8, (SI)
|
|
MOVL BP, -4(SI)(DI*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm14B
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_8:
|
|
MOVQ (BP), R8
|
|
MOVQ R8, (SI)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm14B
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_9through16:
|
|
MOVQ (BP), R8
|
|
MOVQ -8(BP)(DI*1), BP
|
|
MOVQ R8, (SI)
|
|
MOVQ BP, -8(SI)(DI*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm14B
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_17through32:
|
|
MOVOU (BP), X0
|
|
MOVOU -16(BP)(DI*1), X1
|
|
MOVOU X0, (SI)
|
|
MOVOU X1, -16(SI)(DI*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm14B
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_33through64:
|
|
MOVOU (BP), X0
|
|
MOVOU 16(BP), X1
|
|
MOVOU -32(BP)(DI*1), X2
|
|
MOVOU -16(BP)(DI*1), X3
|
|
MOVOU X0, (SI)
|
|
MOVOU X1, 16(SI)
|
|
MOVOU X2, -32(SI)(DI*1)
|
|
MOVOU X3, -16(SI)(DI*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm14B
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_65through128:
|
|
MOVOU (BP), X0
|
|
MOVOU 16(BP), X1
|
|
MOVOU 32(BP), X2
|
|
MOVOU 48(BP), X3
|
|
MOVOU -64(BP)(DI*1), X12
|
|
MOVOU -48(BP)(DI*1), X13
|
|
MOVOU -32(BP)(DI*1), X14
|
|
MOVOU -16(BP)(DI*1), X15
|
|
MOVOU X0, (SI)
|
|
MOVOU X1, 16(SI)
|
|
MOVOU X2, 32(SI)
|
|
MOVOU X3, 48(SI)
|
|
MOVOU X12, -64(SI)(DI*1)
|
|
MOVOU X13, -48(SI)(DI*1)
|
|
MOVOU X14, -32(SI)(DI*1)
|
|
MOVOU X15, -16(SI)(DI*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm14B
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_129through256:
|
|
MOVOU (BP), X0
|
|
MOVOU 16(BP), X1
|
|
MOVOU 32(BP), X2
|
|
MOVOU 48(BP), X3
|
|
MOVOU 64(BP), X4
|
|
MOVOU 80(BP), X5
|
|
MOVOU 96(BP), X6
|
|
MOVOU 112(BP), X7
|
|
MOVOU -128(BP)(DI*1), X8
|
|
MOVOU -112(BP)(DI*1), X9
|
|
MOVOU -96(BP)(DI*1), X10
|
|
MOVOU -80(BP)(DI*1), X11
|
|
MOVOU -64(BP)(DI*1), X12
|
|
MOVOU -48(BP)(DI*1), X13
|
|
MOVOU -32(BP)(DI*1), X14
|
|
MOVOU -16(BP)(DI*1), X15
|
|
MOVOU X0, (SI)
|
|
MOVOU X1, 16(SI)
|
|
MOVOU X2, 32(SI)
|
|
MOVOU X3, 48(SI)
|
|
MOVOU X4, 64(SI)
|
|
MOVOU X5, 80(SI)
|
|
MOVOU X6, 96(SI)
|
|
MOVOU X7, 112(SI)
|
|
MOVOU X8, -128(SI)(DI*1)
|
|
MOVOU X9, -112(SI)(DI*1)
|
|
MOVOU X10, -96(SI)(DI*1)
|
|
MOVOU X11, -80(SI)(DI*1)
|
|
MOVOU X12, -64(SI)(DI*1)
|
|
MOVOU X13, -48(SI)(DI*1)
|
|
MOVOU X14, -32(SI)(DI*1)
|
|
MOVOU X15, -16(SI)(DI*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm14B
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_256through2048:
|
|
LEAQ -256(DI), DI
|
|
MOVOU (BP), X0
|
|
MOVOU 16(BP), X1
|
|
MOVOU 32(BP), X2
|
|
MOVOU 48(BP), X3
|
|
MOVOU 64(BP), X4
|
|
MOVOU 80(BP), X5
|
|
MOVOU 96(BP), X6
|
|
MOVOU 112(BP), X7
|
|
MOVOU 128(BP), X8
|
|
MOVOU 144(BP), X9
|
|
MOVOU 160(BP), X10
|
|
MOVOU 176(BP), X11
|
|
MOVOU 192(BP), X12
|
|
MOVOU 208(BP), X13
|
|
MOVOU 224(BP), X14
|
|
MOVOU 240(BP), X15
|
|
MOVOU X0, (SI)
|
|
MOVOU X1, 16(SI)
|
|
MOVOU X2, 32(SI)
|
|
MOVOU X3, 48(SI)
|
|
MOVOU X4, 64(SI)
|
|
MOVOU X5, 80(SI)
|
|
MOVOU X6, 96(SI)
|
|
MOVOU X7, 112(SI)
|
|
MOVOU X8, 128(SI)
|
|
MOVOU X9, 144(SI)
|
|
MOVOU X10, 160(SI)
|
|
MOVOU X11, 176(SI)
|
|
MOVOU X12, 192(SI)
|
|
MOVOU X13, 208(SI)
|
|
MOVOU X14, 224(SI)
|
|
MOVOU X15, 240(SI)
|
|
CMPQ DI, $0x00000100
|
|
LEAQ 256(BP), BP
|
|
LEAQ 256(SI), SI
|
|
JGE emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_256through2048
|
|
JMP emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_tail
|
|
MOVQ R8, SI
|
|
|
|
emit_literal_done_match_emit_encodeBlockAsm14B:
|
|
MOVQ SI, dst_base+0(FP)
|
|
|
|
emit_literal_skip_match_emit_encodeBlockAsm14B:
|
|
NOP
|
|
|
|
match_nolit_loop_encodeBlockAsm14B:
|
|
MOVL AX, BP
|
|
MOVL AX, BP
|
|
SUBL BX, BP
|
|
MOVL BP, 24(SP)
|
|
ADDL $0x04, AX
|
|
ADDL $0x04, BX
|
|
MOVL 16(SP), BP
|
|
SUBL AX, BP
|
|
XORQ DI, DI
|
|
CMPQ BP, $0x08
|
|
JL matchlen_single_match_nolit_encodeBlockAsm14B
|
|
|
|
matchlen_loopback_match_nolit_encodeBlockAsm14B:
|
|
MOVQ (CX)(DI*1), SI
|
|
XORQ (CX)(DI*1), SI
|
|
TESTQ SI, SI
|
|
JZ matchlen_loop_match_nolit_encodeBlockAsm14B
|
|
BSFQ SI, SI
|
|
SARQ $0x03, SI
|
|
LEAQ (DI)(SI*1), DI
|
|
JMP match_nolit_end_encodeBlockAsm14B
|
|
|
|
matchlen_loop_match_nolit_encodeBlockAsm14B:
|
|
LEAQ -8(BP), BP
|
|
LEAQ 8(DI), DI
|
|
CMPQ BP, $0x08
|
|
JGE matchlen_loopback_match_nolit_encodeBlockAsm14B
|
|
|
|
matchlen_single_match_nolit_encodeBlockAsm14B:
|
|
TESTQ BP, BP
|
|
JZ match_nolit_end_encodeBlockAsm14B
|
|
|
|
matchlen_single_loopback_match_nolit_encodeBlockAsm14B:
|
|
MOVB (CX)(DI*1), SI
|
|
CMPB (CX)(DI*1), SI
|
|
JNE match_nolit_end_encodeBlockAsm14B
|
|
LEAQ 1(DI), DI
|
|
DECQ BP
|
|
JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm14B
|
|
|
|
match_nolit_end_encodeBlockAsm14B:
|
|
MOVL 24(SP), BP
|
|
ADDQ $0x04, DI
|
|
MOVQ dst_base+0(FP), SI
|
|
ADDL DI, AX
|
|
CMPL BP, $0x00010000
|
|
JL two_byte_offset_match_nolit_encodeBlockAsm14B
|
|
CMPL DI, $0x40
|
|
JLE four_bytes_remain_match_nolit_encodeBlockAsm14B
|
|
MOVB $0xff, (SI)
|
|
MOVD BP, 1(SI)
|
|
LEAQ -64(DI), DI
|
|
ADDQ $0x05, SI
|
|
CMPL DI, $0x04
|
|
JL four_bytes_remain_match_nolit_encodeBlockAsm14B
|
|
|
|
emit_repeat_again_match_nolit_encodeBlockAsm14B_emit_copy:
|
|
MOVQ DI, R8
|
|
LEAQ -4(DI), DI
|
|
CMPL R8, $0x08
|
|
JLE repeat_two_match_nolit_encodeBlockAsm14B_emit_copy
|
|
CMPL R8, $0x0c
|
|
JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm14B_emit_copy
|
|
CMPL BP, $0x00000800
|
|
JLT repeat_two_offset_match_nolit_encodeBlockAsm14B_emit_copy
|
|
|
|
cant_repeat_two_offset_match_nolit_encodeBlockAsm14B_emit_copy:
|
|
CMPL DI, $0x00000104
|
|
JLT repeat_three_match_nolit_encodeBlockAsm14B_emit_copy
|
|
CMPL DI, $0x00010100
|
|
JLT repeat_four_match_nolit_encodeBlockAsm14B_emit_copy
|
|
CMPL DI, $0x0100ffff
|
|
JLT repeat_five_match_nolit_encodeBlockAsm14B_emit_copy
|
|
LEAQ -16842747(DI), DI
|
|
MOVW $0x001d, (SI)
|
|
MOVW $0xfffb, 2(SI)
|
|
MOVB $0xff, 4(SI)
|
|
ADDQ $0x05, SI
|
|
JMP emit_repeat_again_match_nolit_encodeBlockAsm14B_emit_copy
|
|
|
|
repeat_five_match_nolit_encodeBlockAsm14B_emit_copy:
|
|
LEAQ -65536(DI), DI
|
|
MOVQ DI, BP
|
|
MOVW $0x001d, (SI)
|
|
MOVW DI, 2(SI)
|
|
SARQ $0x10, BP
|
|
MOVB BP, 4(SI)
|
|
ADDQ $0x05, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm14B
|
|
|
|
repeat_four_match_nolit_encodeBlockAsm14B_emit_copy:
|
|
LEAQ -256(DI), DI
|
|
MOVW $0x0019, (SI)
|
|
MOVW DI, 2(SI)
|
|
ADDQ $0x04, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm14B
|
|
|
|
repeat_three_match_nolit_encodeBlockAsm14B_emit_copy:
|
|
LEAQ -4(DI), DI
|
|
MOVW $0x0015, (SI)
|
|
MOVB DI, 2(SI)
|
|
ADDQ $0x03, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm14B
|
|
|
|
repeat_two_match_nolit_encodeBlockAsm14B_emit_copy:
|
|
SHLL $0x02, DI
|
|
ORL $0x01, DI
|
|
MOVW DI, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm14B
|
|
|
|
repeat_two_offset_match_nolit_encodeBlockAsm14B_emit_copy:
|
|
XORQ R8, R8
|
|
LEAQ 1(R8)(DI*4), DI
|
|
MOVB BP, 1(SI)
|
|
SARL $0x08, BP
|
|
SHLL $0x05, BP
|
|
ORL BP, DI
|
|
MOVB DI, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm14B
|
|
|
|
four_bytes_remain_match_nolit_encodeBlockAsm14B:
|
|
TESTL DI, DI
|
|
JZ match_nolit_emitcopy_end_encodeBlockAsm14B
|
|
MOVB $0x03, DL
|
|
LEAQ -4(DX)(DI*4), DI
|
|
MOVB DI, (SI)
|
|
MOVD BP, 1(SI)
|
|
ADDQ $0x05, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm14B
|
|
|
|
two_byte_offset_match_nolit_encodeBlockAsm14B:
|
|
CMPL DI, $0x40
|
|
JLE two_byte_offset_short_match_nolit_encodeBlockAsm14B
|
|
MOVB $0xee, (SI)
|
|
MOVW BP, 1(SI)
|
|
LEAQ -60(DI), DI
|
|
ADDQ $0x03, SI
|
|
|
|
emit_repeat_again_match_nolit_encodeBlockAsm14B_emit_copy_short:
|
|
MOVQ DI, R8
|
|
LEAQ -4(DI), DI
|
|
CMPL R8, $0x08
|
|
JLE repeat_two_match_nolit_encodeBlockAsm14B_emit_copy_short
|
|
CMPL R8, $0x0c
|
|
JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm14B_emit_copy_short
|
|
CMPL BP, $0x00000800
|
|
JLT repeat_two_offset_match_nolit_encodeBlockAsm14B_emit_copy_short
|
|
|
|
cant_repeat_two_offset_match_nolit_encodeBlockAsm14B_emit_copy_short:
|
|
CMPL DI, $0x00000104
|
|
JLT repeat_three_match_nolit_encodeBlockAsm14B_emit_copy_short
|
|
CMPL DI, $0x00010100
|
|
JLT repeat_four_match_nolit_encodeBlockAsm14B_emit_copy_short
|
|
CMPL DI, $0x0100ffff
|
|
JLT repeat_five_match_nolit_encodeBlockAsm14B_emit_copy_short
|
|
LEAQ -16842747(DI), DI
|
|
MOVW $0x001d, (SI)
|
|
MOVW $0xfffb, 2(SI)
|
|
MOVB $0xff, 4(SI)
|
|
ADDQ $0x05, SI
|
|
JMP emit_repeat_again_match_nolit_encodeBlockAsm14B_emit_copy_short
|
|
|
|
repeat_five_match_nolit_encodeBlockAsm14B_emit_copy_short:
|
|
LEAQ -65536(DI), DI
|
|
MOVQ DI, BP
|
|
MOVW $0x001d, (SI)
|
|
MOVW DI, 2(SI)
|
|
SARQ $0x10, BP
|
|
MOVB BP, 4(SI)
|
|
ADDQ $0x05, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm14B
|
|
|
|
repeat_four_match_nolit_encodeBlockAsm14B_emit_copy_short:
|
|
LEAQ -256(DI), DI
|
|
MOVW $0x0019, (SI)
|
|
MOVW DI, 2(SI)
|
|
ADDQ $0x04, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm14B
|
|
|
|
repeat_three_match_nolit_encodeBlockAsm14B_emit_copy_short:
|
|
LEAQ -4(DI), DI
|
|
MOVW $0x0015, (SI)
|
|
MOVB DI, 2(SI)
|
|
ADDQ $0x03, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm14B
|
|
|
|
repeat_two_match_nolit_encodeBlockAsm14B_emit_copy_short:
|
|
SHLL $0x02, DI
|
|
ORL $0x01, DI
|
|
MOVW DI, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm14B
|
|
|
|
repeat_two_offset_match_nolit_encodeBlockAsm14B_emit_copy_short:
|
|
XORQ R8, R8
|
|
LEAQ 1(R8)(DI*4), DI
|
|
MOVB BP, 1(SI)
|
|
SARL $0x08, BP
|
|
SHLL $0x05, BP
|
|
ORL BP, DI
|
|
MOVB DI, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm14B
|
|
|
|
two_byte_offset_short_match_nolit_encodeBlockAsm14B:
|
|
CMPL DI, $0x0c
|
|
JGE emit_copy_three_match_nolit_encodeBlockAsm14B
|
|
CMPL BP, $0x00000800
|
|
JGE emit_copy_three_match_nolit_encodeBlockAsm14B
|
|
MOVB $0x01, DL
|
|
LEAQ -16(DX)(DI*4), DI
|
|
MOVB BP, 1(SI)
|
|
SHRL $0x08, BP
|
|
SHLL $0x05, BP
|
|
ORL BP, DI
|
|
MOVB DI, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm14B
|
|
|
|
emit_copy_three_match_nolit_encodeBlockAsm14B:
|
|
MOVB $0x02, DL
|
|
LEAQ -4(DX)(DI*4), DI
|
|
MOVB DI, (SI)
|
|
MOVW BP, 1(SI)
|
|
ADDQ $0x03, SI
|
|
|
|
match_nolit_emitcopy_end_encodeBlockAsm14B:
|
|
MOVQ SI, dst_base+0(FP)
|
|
MOVL AX, 20(SP)
|
|
CMPL AX, 16(SP)
|
|
JGE emit_remainder_encodeBlockAsm14B
|
|
CMPQ SI, (SP)
|
|
JL match_nolit_dst_ok_encodeBlockAsm14B
|
|
MOVQ $0x00000000, ret+48(FP)
|
|
RET
|
|
|
|
match_nolit_dst_ok_encodeBlockAsm14B:
|
|
MOVQ -2(CX)(AX*1), BP
|
|
MOVQ $0x0000cf1bbcdcbf9b, SI
|
|
MOVQ BP, DI
|
|
SHRQ $0x10, BP
|
|
MOVQ BP, R8
|
|
SHLQ $0x10, DI
|
|
IMULQ SI, DI
|
|
SHRQ $0x32, DI
|
|
SHLQ $0x10, R8
|
|
IMULQ SI, R8
|
|
SHRQ $0x32, R8
|
|
MOVL 32(SP)(DI*1), SI
|
|
MOVL 32(SP)(R8*1), SI
|
|
LEAQ -2(AX), SI
|
|
MOVL SI, 32(SP)(DI*1)
|
|
MOVL AX, 32(SP)(R8*1)
|
|
CMPL (CX)(R8*1), BP
|
|
JEQ match_nolit_loop_encodeBlockAsm14B
|
|
INCL AX
|
|
JMP search_loop_encodeBlockAsm14B
|
|
|
|
emit_remainder_encodeBlockAsm14B:
|
|
MOVQ src_len+32(FP), AX
|
|
SUBL 20(SP), AX
|
|
MOVQ dst_base+0(FP), DX
|
|
LEAQ (DX)(AX*1), DX
|
|
CMPQ DX, (SP)
|
|
JL emit_remainder_ok_encodeBlockAsm14B
|
|
MOVQ $0x00000000, ret+48(FP)
|
|
RET
|
|
|
|
emit_remainder_ok_encodeBlockAsm14B:
|
|
MOVQ src_len+32(FP), AX
|
|
MOVL 20(SP), DX
|
|
CMPL DX, AX
|
|
JEQ emit_literal_skip_emit_remainder_encodeBlockAsm14B
|
|
MOVL AX, BX
|
|
MOVL AX, 20(SP)
|
|
LEAQ (CX)(DX*1), AX
|
|
SUBL DX, BX
|
|
MOVQ dst_base+0(FP), CX
|
|
MOVQ BX, DX
|
|
SUBL $0x01, DX
|
|
JC emit_literal_done_emit_remainder_encodeBlockAsm14B
|
|
CMPL DX, $0x3c
|
|
JLT one_byte_emit_remainder_encodeBlockAsm14B
|
|
CMPL DX, $0x00000100
|
|
JLT two_bytes_emit_remainder_encodeBlockAsm14B
|
|
CMPL DX, $0x00010000
|
|
JLT three_bytes_emit_remainder_encodeBlockAsm14B
|
|
CMPL DX, $0x01000000
|
|
JLT four_bytes_emit_remainder_encodeBlockAsm14B
|
|
MOVB $0xfc, (CX)
|
|
MOVL DX, 1(CX)
|
|
ADDQ $0x05, CX
|
|
JMP memmove_emit_remainder_encodeBlockAsm14B
|
|
|
|
four_bytes_emit_remainder_encodeBlockAsm14B:
|
|
MOVQ DX, BP
|
|
SHRL $0x10, BP
|
|
MOVB $0xf8, (CX)
|
|
MOVW DX, 1(CX)
|
|
MOVB BP, 3(CX)
|
|
ADDQ $0x04, CX
|
|
JMP memmove_emit_remainder_encodeBlockAsm14B
|
|
|
|
three_bytes_emit_remainder_encodeBlockAsm14B:
|
|
MOVB $0xf4, (CX)
|
|
MOVW DX, 1(CX)
|
|
ADDQ $0x03, CX
|
|
JMP memmove_emit_remainder_encodeBlockAsm14B
|
|
|
|
two_bytes_emit_remainder_encodeBlockAsm14B:
|
|
MOVB $0xf0, (CX)
|
|
MOVB DL, 1(CX)
|
|
ADDQ $0x02, CX
|
|
JMP memmove_emit_remainder_encodeBlockAsm14B
|
|
|
|
one_byte_emit_remainder_encodeBlockAsm14B:
|
|
SHLB $0x02, DL
|
|
MOVB DL, (CX)
|
|
ADDQ $0x01, CX
|
|
|
|
memmove_emit_remainder_encodeBlockAsm14B:
|
|
LEAQ (CX)(BX*1), DX
|
|
NOP
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_tail:
|
|
TESTQ BX, BX
|
|
JEQ emit_literal_done_emit_remainder_encodeBlockAsm14B
|
|
CMPQ BX, $0x02
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_1or2
|
|
CMPQ BX, $0x04
|
|
JB emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_3
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_4
|
|
CMPQ BX, $0x08
|
|
JB emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_5through7
|
|
JE emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_8
|
|
CMPQ BX, $0x10
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_9through16
|
|
CMPQ BX, $0x20
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_17through32
|
|
CMPQ BX, $0x40
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_33through64
|
|
CMPQ BX, $0x80
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_65through128
|
|
CMPQ BX, $0x00000100
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_129through256
|
|
JMP emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_256through2048
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_1or2:
|
|
MOVB (AX), DL
|
|
MOVB -1(AX)(BX*1), AL
|
|
MOVB DL, (CX)
|
|
MOVB AL, -1(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm14B
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_4:
|
|
MOVL (AX), DX
|
|
MOVL DX, (CX)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm14B
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_3:
|
|
MOVW (AX), DX
|
|
MOVB 2(AX), AL
|
|
MOVW DX, (CX)
|
|
MOVB AL, 2(CX)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm14B
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_5through7:
|
|
MOVL (AX), DX
|
|
MOVL -4(AX)(BX*1), AX
|
|
MOVL DX, (CX)
|
|
MOVL AX, -4(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm14B
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_8:
|
|
MOVQ (AX), DX
|
|
MOVQ DX, (CX)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm14B
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_9through16:
|
|
MOVQ (AX), DX
|
|
MOVQ -8(AX)(BX*1), AX
|
|
MOVQ DX, (CX)
|
|
MOVQ AX, -8(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm14B
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_17through32:
|
|
MOVOU (AX), X0
|
|
MOVOU -16(AX)(BX*1), X1
|
|
MOVOU X0, (CX)
|
|
MOVOU X1, -16(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm14B
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_33through64:
|
|
MOVOU (AX), X0
|
|
MOVOU 16(AX), X1
|
|
MOVOU -32(AX)(BX*1), X2
|
|
MOVOU -16(AX)(BX*1), X3
|
|
MOVOU X0, (CX)
|
|
MOVOU X1, 16(CX)
|
|
MOVOU X2, -32(CX)(BX*1)
|
|
MOVOU X3, -16(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm14B
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_65through128:
|
|
MOVOU (AX), X0
|
|
MOVOU 16(AX), X1
|
|
MOVOU 32(AX), X2
|
|
MOVOU 48(AX), X3
|
|
MOVOU -64(AX)(BX*1), X12
|
|
MOVOU -48(AX)(BX*1), X13
|
|
MOVOU -32(AX)(BX*1), X14
|
|
MOVOU -16(AX)(BX*1), X15
|
|
MOVOU X0, (CX)
|
|
MOVOU X1, 16(CX)
|
|
MOVOU X2, 32(CX)
|
|
MOVOU X3, 48(CX)
|
|
MOVOU X12, -64(CX)(BX*1)
|
|
MOVOU X13, -48(CX)(BX*1)
|
|
MOVOU X14, -32(CX)(BX*1)
|
|
MOVOU X15, -16(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm14B
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_129through256:
|
|
MOVOU (AX), X0
|
|
MOVOU 16(AX), X1
|
|
MOVOU 32(AX), X2
|
|
MOVOU 48(AX), X3
|
|
MOVOU 64(AX), X4
|
|
MOVOU 80(AX), X5
|
|
MOVOU 96(AX), X6
|
|
MOVOU 112(AX), X7
|
|
MOVOU -128(AX)(BX*1), X8
|
|
MOVOU -112(AX)(BX*1), X9
|
|
MOVOU -96(AX)(BX*1), X10
|
|
MOVOU -80(AX)(BX*1), X11
|
|
MOVOU -64(AX)(BX*1), X12
|
|
MOVOU -48(AX)(BX*1), X13
|
|
MOVOU -32(AX)(BX*1), X14
|
|
MOVOU -16(AX)(BX*1), X15
|
|
MOVOU X0, (CX)
|
|
MOVOU X1, 16(CX)
|
|
MOVOU X2, 32(CX)
|
|
MOVOU X3, 48(CX)
|
|
MOVOU X4, 64(CX)
|
|
MOVOU X5, 80(CX)
|
|
MOVOU X6, 96(CX)
|
|
MOVOU X7, 112(CX)
|
|
MOVOU X8, -128(CX)(BX*1)
|
|
MOVOU X9, -112(CX)(BX*1)
|
|
MOVOU X10, -96(CX)(BX*1)
|
|
MOVOU X11, -80(CX)(BX*1)
|
|
MOVOU X12, -64(CX)(BX*1)
|
|
MOVOU X13, -48(CX)(BX*1)
|
|
MOVOU X14, -32(CX)(BX*1)
|
|
MOVOU X15, -16(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm14B
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_256through2048:
|
|
LEAQ -256(BX), BX
|
|
MOVOU (AX), X0
|
|
MOVOU 16(AX), X1
|
|
MOVOU 32(AX), X2
|
|
MOVOU 48(AX), X3
|
|
MOVOU 64(AX), X4
|
|
MOVOU 80(AX), X5
|
|
MOVOU 96(AX), X6
|
|
MOVOU 112(AX), X7
|
|
MOVOU 128(AX), X8
|
|
MOVOU 144(AX), X9
|
|
MOVOU 160(AX), X10
|
|
MOVOU 176(AX), X11
|
|
MOVOU 192(AX), X12
|
|
MOVOU 208(AX), X13
|
|
MOVOU 224(AX), X14
|
|
MOVOU 240(AX), X15
|
|
MOVOU X0, (CX)
|
|
MOVOU X1, 16(CX)
|
|
MOVOU X2, 32(CX)
|
|
MOVOU X3, 48(CX)
|
|
MOVOU X4, 64(CX)
|
|
MOVOU X5, 80(CX)
|
|
MOVOU X6, 96(CX)
|
|
MOVOU X7, 112(CX)
|
|
MOVOU X8, 128(CX)
|
|
MOVOU X9, 144(CX)
|
|
MOVOU X10, 160(CX)
|
|
MOVOU X11, 176(CX)
|
|
MOVOU X12, 192(CX)
|
|
MOVOU X13, 208(CX)
|
|
MOVOU X14, 224(CX)
|
|
MOVOU X15, 240(CX)
|
|
CMPQ BX, $0x00000100
|
|
LEAQ 256(AX), AX
|
|
LEAQ 256(CX), CX
|
|
JGE emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_256through2048
|
|
JMP emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_tail
|
|
MOVQ DX, CX
|
|
|
|
emit_literal_done_emit_remainder_encodeBlockAsm14B:
|
|
MOVQ CX, dst_base+0(FP)
|
|
|
|
emit_literal_skip_emit_remainder_encodeBlockAsm14B:
|
|
MOVQ 8(SP), AX
|
|
SUBQ dst_base+0(FP), AX
|
|
MOVQ AX, ret+48(FP)
|
|
RET
|
|
|
|
// func encodeBlockAsm12B(dst []byte, src []byte) int
|
|
// Requires: SSE2
|
|
TEXT ·encodeBlockAsm12B(SB), $4128-56
|
|
MOVQ $0x00000020, AX
|
|
LEAQ 32(SP), CX
|
|
PXOR X0, X0
|
|
|
|
zero_loop_encodeBlockAsm12B:
|
|
MOVOU X0, (CX)
|
|
MOVOU X0, 16(CX)
|
|
MOVOU X0, 32(CX)
|
|
MOVOU X0, 48(CX)
|
|
MOVOU X0, 64(CX)
|
|
MOVOU X0, 80(CX)
|
|
MOVOU X0, 96(CX)
|
|
MOVOU X0, 112(CX)
|
|
ADDQ $0x80, CX
|
|
DECQ AX
|
|
JNZ zero_loop_encodeBlockAsm12B
|
|
MOVL AX, 20(SP)
|
|
MOVQ src_len+32(FP), AX
|
|
LEAQ -5(AX), CX
|
|
LEAQ -8(AX), BX
|
|
SHRQ $0x05, AX
|
|
SUBL AX, CX
|
|
MOVL BX, 16(SP)
|
|
MOVQ dst_base+0(FP), AX
|
|
MOVQ AX, 8(SP)
|
|
LEAQ (AX)(CX*1), CX
|
|
MOVQ CX, (SP)
|
|
MOVL $0x00000001, AX
|
|
MOVL AX, 24(SP)
|
|
MOVQ src_base+24(FP), CX
|
|
|
|
search_loop_encodeBlockAsm12B:
|
|
MOVQ (CX)(AX*1), BP
|
|
MOVL AX, BX
|
|
SUBL 20(SP), BX
|
|
SHRL $0x04, BX
|
|
LEAQ 4(AX)(BX*1), BX
|
|
MOVL 16(SP), SI
|
|
CMPL BX, SI
|
|
JGT emit_remainder_encodeBlockAsm12B
|
|
MOVL BX, 28(SP)
|
|
MOVQ $0x0000cf1bbcdcbf9b, BX
|
|
MOVQ BP, DI
|
|
MOVQ BP, R8
|
|
SHRQ $0x08, R8
|
|
SHLQ $0x10, DI
|
|
IMULQ BX, DI
|
|
SHRQ $0x34, DI
|
|
SHLQ $0x10, R8
|
|
IMULQ BX, R8
|
|
SHRQ $0x34, R8
|
|
MOVL 32(SP)(DI*1), BX
|
|
MOVL 32(SP)(R8*1), SI
|
|
MOVL AX, 32(SP)(DI*1)
|
|
LEAL 1(AX), DI
|
|
MOVL DI, 32(SP)(R8*1)
|
|
MOVL AX, DI
|
|
SUBL 24(SP), DI
|
|
MOVL 1(CX)(DI*1), R9
|
|
MOVQ BP, R8
|
|
SHLQ $0x08, R8
|
|
CMPL R8, R9
|
|
JNE no_repeat_found_encodeBlockAsm12B
|
|
LEAQ 1(AX), BP
|
|
MOVL 20(SP), BX
|
|
TESTL DI, DI
|
|
JZ repeat_extend_back_end_encodeBlockAsm12B
|
|
|
|
repeat_extend_back_loop_encodeBlockAsm12B:
|
|
CMPL BP, BX
|
|
JG repeat_extend_back_end_encodeBlockAsm12B
|
|
MOVB -1(CX)(DI*1), DL
|
|
MOVB -1(CX)(BP*1), SI
|
|
CMPB DL, SI
|
|
JNE repeat_extend_back_end_encodeBlockAsm12B
|
|
LEAQ -1(BP), BP
|
|
DECL DI
|
|
JZ repeat_extend_back_end_encodeBlockAsm12B
|
|
JMP repeat_extend_back_loop_encodeBlockAsm12B
|
|
|
|
repeat_extend_back_end_encodeBlockAsm12B:
|
|
MOVL 20(SP), BX
|
|
CMPL BX, BP
|
|
JEQ emit_literal_skip_repeat_emit_encodeBlockAsm12B
|
|
MOVL BP, SI
|
|
MOVL BP, 20(SP)
|
|
LEAQ (CX)(BX*1), DI
|
|
SUBL BX, SI
|
|
MOVQ dst_base+0(FP), BX
|
|
MOVQ SI, R8
|
|
SUBL $0x01, R8
|
|
JC emit_literal_done_repeat_emit_encodeBlockAsm12B
|
|
CMPL R8, $0x3c
|
|
JLT one_byte_repeat_emit_encodeBlockAsm12B
|
|
CMPL R8, $0x00000100
|
|
JLT two_bytes_repeat_emit_encodeBlockAsm12B
|
|
CMPL R8, $0x00010000
|
|
JLT three_bytes_repeat_emit_encodeBlockAsm12B
|
|
CMPL R8, $0x01000000
|
|
JLT four_bytes_repeat_emit_encodeBlockAsm12B
|
|
MOVB $0xfc, (BX)
|
|
MOVL R8, 1(BX)
|
|
ADDQ $0x05, BX
|
|
JMP memmove_repeat_emit_encodeBlockAsm12B
|
|
|
|
four_bytes_repeat_emit_encodeBlockAsm12B:
|
|
MOVQ R8, R9
|
|
SHRL $0x10, R9
|
|
MOVB $0xf8, (BX)
|
|
MOVW R8, 1(BX)
|
|
MOVB R9, 3(BX)
|
|
ADDQ $0x04, BX
|
|
JMP memmove_repeat_emit_encodeBlockAsm12B
|
|
|
|
three_bytes_repeat_emit_encodeBlockAsm12B:
|
|
MOVB $0xf4, (BX)
|
|
MOVW R8, 1(BX)
|
|
ADDQ $0x03, BX
|
|
JMP memmove_repeat_emit_encodeBlockAsm12B
|
|
|
|
two_bytes_repeat_emit_encodeBlockAsm12B:
|
|
MOVB $0xf0, (BX)
|
|
MOVB R8, 1(BX)
|
|
ADDQ $0x02, BX
|
|
JMP memmove_repeat_emit_encodeBlockAsm12B
|
|
|
|
one_byte_repeat_emit_encodeBlockAsm12B:
|
|
SHLB $0x02, R8
|
|
MOVB R8, (BX)
|
|
ADDQ $0x01, BX
|
|
|
|
memmove_repeat_emit_encodeBlockAsm12B:
|
|
LEAQ (BX)(SI*1), R8
|
|
NOP
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_tail:
|
|
TESTQ SI, SI
|
|
JEQ emit_literal_done_repeat_emit_encodeBlockAsm12B
|
|
CMPQ SI, $0x02
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_1or2
|
|
CMPQ SI, $0x04
|
|
JB emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_3
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_4
|
|
CMPQ SI, $0x08
|
|
JB emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_5through7
|
|
JE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8
|
|
CMPQ SI, $0x10
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_9through16
|
|
CMPQ SI, $0x20
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_17through32
|
|
CMPQ SI, $0x40
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_33through64
|
|
CMPQ SI, $0x80
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_65through128
|
|
CMPQ SI, $0x00000100
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_129through256
|
|
JMP emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_256through2048
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_1or2:
|
|
MOVB (DI), R8
|
|
MOVB -1(DI)(SI*1), DI
|
|
MOVB R8, (BX)
|
|
MOVB DI, -1(BX)(SI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm12B
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_4:
|
|
MOVL (DI), R8
|
|
MOVL R8, (BX)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm12B
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_3:
|
|
MOVW (DI), R8
|
|
MOVB 2(DI), DI
|
|
MOVW R8, (BX)
|
|
MOVB DI, 2(BX)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm12B
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_5through7:
|
|
MOVL (DI), R8
|
|
MOVL -4(DI)(SI*1), DI
|
|
MOVL R8, (BX)
|
|
MOVL DI, -4(BX)(SI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm12B
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8:
|
|
MOVQ (DI), R8
|
|
MOVQ R8, (BX)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm12B
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_9through16:
|
|
MOVQ (DI), R8
|
|
MOVQ -8(DI)(SI*1), DI
|
|
MOVQ R8, (BX)
|
|
MOVQ DI, -8(BX)(SI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm12B
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_17through32:
|
|
MOVOU (DI), X0
|
|
MOVOU -16(DI)(SI*1), X1
|
|
MOVOU X0, (BX)
|
|
MOVOU X1, -16(BX)(SI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm12B
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_33through64:
|
|
MOVOU (DI), X0
|
|
MOVOU 16(DI), X1
|
|
MOVOU -32(DI)(SI*1), X2
|
|
MOVOU -16(DI)(SI*1), X3
|
|
MOVOU X0, (BX)
|
|
MOVOU X1, 16(BX)
|
|
MOVOU X2, -32(BX)(SI*1)
|
|
MOVOU X3, -16(BX)(SI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm12B
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_65through128:
|
|
MOVOU (DI), X0
|
|
MOVOU 16(DI), X1
|
|
MOVOU 32(DI), X2
|
|
MOVOU 48(DI), X3
|
|
MOVOU -64(DI)(SI*1), X12
|
|
MOVOU -48(DI)(SI*1), X13
|
|
MOVOU -32(DI)(SI*1), X14
|
|
MOVOU -16(DI)(SI*1), X15
|
|
MOVOU X0, (BX)
|
|
MOVOU X1, 16(BX)
|
|
MOVOU X2, 32(BX)
|
|
MOVOU X3, 48(BX)
|
|
MOVOU X12, -64(BX)(SI*1)
|
|
MOVOU X13, -48(BX)(SI*1)
|
|
MOVOU X14, -32(BX)(SI*1)
|
|
MOVOU X15, -16(BX)(SI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm12B
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_129through256:
|
|
MOVOU (DI), X0
|
|
MOVOU 16(DI), X1
|
|
MOVOU 32(DI), X2
|
|
MOVOU 48(DI), X3
|
|
MOVOU 64(DI), X4
|
|
MOVOU 80(DI), X5
|
|
MOVOU 96(DI), X6
|
|
MOVOU 112(DI), X7
|
|
MOVOU -128(DI)(SI*1), X8
|
|
MOVOU -112(DI)(SI*1), X9
|
|
MOVOU -96(DI)(SI*1), X10
|
|
MOVOU -80(DI)(SI*1), X11
|
|
MOVOU -64(DI)(SI*1), X12
|
|
MOVOU -48(DI)(SI*1), X13
|
|
MOVOU -32(DI)(SI*1), X14
|
|
MOVOU -16(DI)(SI*1), X15
|
|
MOVOU X0, (BX)
|
|
MOVOU X1, 16(BX)
|
|
MOVOU X2, 32(BX)
|
|
MOVOU X3, 48(BX)
|
|
MOVOU X4, 64(BX)
|
|
MOVOU X5, 80(BX)
|
|
MOVOU X6, 96(BX)
|
|
MOVOU X7, 112(BX)
|
|
MOVOU X8, -128(BX)(SI*1)
|
|
MOVOU X9, -112(BX)(SI*1)
|
|
MOVOU X10, -96(BX)(SI*1)
|
|
MOVOU X11, -80(BX)(SI*1)
|
|
MOVOU X12, -64(BX)(SI*1)
|
|
MOVOU X13, -48(BX)(SI*1)
|
|
MOVOU X14, -32(BX)(SI*1)
|
|
MOVOU X15, -16(BX)(SI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm12B
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_256through2048:
|
|
LEAQ -256(SI), SI
|
|
MOVOU (DI), X0
|
|
MOVOU 16(DI), X1
|
|
MOVOU 32(DI), X2
|
|
MOVOU 48(DI), X3
|
|
MOVOU 64(DI), X4
|
|
MOVOU 80(DI), X5
|
|
MOVOU 96(DI), X6
|
|
MOVOU 112(DI), X7
|
|
MOVOU 128(DI), X8
|
|
MOVOU 144(DI), X9
|
|
MOVOU 160(DI), X10
|
|
MOVOU 176(DI), X11
|
|
MOVOU 192(DI), X12
|
|
MOVOU 208(DI), X13
|
|
MOVOU 224(DI), X14
|
|
MOVOU 240(DI), X15
|
|
MOVOU X0, (BX)
|
|
MOVOU X1, 16(BX)
|
|
MOVOU X2, 32(BX)
|
|
MOVOU X3, 48(BX)
|
|
MOVOU X4, 64(BX)
|
|
MOVOU X5, 80(BX)
|
|
MOVOU X6, 96(BX)
|
|
MOVOU X7, 112(BX)
|
|
MOVOU X8, 128(BX)
|
|
MOVOU X9, 144(BX)
|
|
MOVOU X10, 160(BX)
|
|
MOVOU X11, 176(BX)
|
|
MOVOU X12, 192(BX)
|
|
MOVOU X13, 208(BX)
|
|
MOVOU X14, 224(BX)
|
|
MOVOU X15, 240(BX)
|
|
CMPQ SI, $0x00000100
|
|
LEAQ 256(DI), DI
|
|
LEAQ 256(BX), BX
|
|
JGE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_256through2048
|
|
JMP emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_tail
|
|
MOVQ R8, BX
|
|
|
|
emit_literal_done_repeat_emit_encodeBlockAsm12B:
|
|
MOVQ BX, dst_base+0(FP)
|
|
|
|
emit_literal_skip_repeat_emit_encodeBlockAsm12B:
|
|
ADDL $0x05, AX
|
|
MOVL AX, BX
|
|
SUBL 24(SP), BX
|
|
MOVL 16(SP), BX
|
|
SUBL AX, BX
|
|
XORQ DI, DI
|
|
CMPQ BX, $0x08
|
|
JL matchlen_single_repeat_extend
|
|
|
|
matchlen_loopback_repeat_extend:
|
|
MOVQ (CX)(DI*1), SI
|
|
XORQ (CX)(DI*1), SI
|
|
TESTQ SI, SI
|
|
JZ matchlen_loop_repeat_extend
|
|
BSFQ SI, SI
|
|
SARQ $0x03, SI
|
|
LEAQ (DI)(SI*1), DI
|
|
JMP repeat_extend_forward_end_encodeBlockAsm12B
|
|
|
|
matchlen_loop_repeat_extend:
|
|
LEAQ -8(BX), BX
|
|
LEAQ 8(DI), DI
|
|
CMPQ BX, $0x08
|
|
JGE matchlen_loopback_repeat_extend
|
|
|
|
matchlen_single_repeat_extend:
|
|
TESTQ BX, BX
|
|
JZ repeat_extend_forward_end_encodeBlockAsm12B
|
|
|
|
matchlen_single_loopback_repeat_extend:
|
|
MOVB (CX)(DI*1), SI
|
|
CMPB (CX)(DI*1), SI
|
|
JNE repeat_extend_forward_end_encodeBlockAsm12B
|
|
LEAQ 1(DI), DI
|
|
DECQ BX
|
|
JNZ matchlen_single_loopback_repeat_extend
|
|
|
|
repeat_extend_forward_end_encodeBlockAsm12B:
|
|
ADDL DI, AX
|
|
MOVL AX, BX
|
|
SUBL BP, BX
|
|
MOVL 24(SP), BP
|
|
MOVQ dst_base+0(FP), SI
|
|
MOVL 20(SP), DI
|
|
TESTL DI, DI
|
|
JZ repeat_as_copy_encodeBlockAsm12B
|
|
|
|
emit_repeat_again_match_repeat_:
|
|
MOVQ BX, DI
|
|
LEAQ -4(BX), BX
|
|
CMPL DI, $0x08
|
|
JLE repeat_two_match_repeat_
|
|
CMPL DI, $0x0c
|
|
JGE cant_repeat_two_offset_match_repeat_
|
|
CMPL BP, $0x00000800
|
|
JLT repeat_two_offset_match_repeat_
|
|
|
|
cant_repeat_two_offset_match_repeat_:
|
|
CMPL BX, $0x00000104
|
|
JLT repeat_three_match_repeat_
|
|
CMPL BX, $0x00010100
|
|
JLT repeat_four_match_repeat_
|
|
CMPL BX, $0x0100ffff
|
|
JLT repeat_five_match_repeat_
|
|
LEAQ -16842747(BX), BX
|
|
MOVW $0x001d, (SI)
|
|
MOVW $0xfffb, 2(SI)
|
|
MOVB $0xff, 4(SI)
|
|
ADDQ $0x05, SI
|
|
JMP emit_repeat_again_match_repeat_
|
|
|
|
repeat_five_match_repeat_:
|
|
LEAQ -65536(BX), BX
|
|
MOVQ BX, BP
|
|
MOVW $0x001d, (SI)
|
|
MOVW BX, 2(SI)
|
|
SARQ $0x10, BP
|
|
MOVB BP, 4(SI)
|
|
ADDQ $0x05, SI
|
|
JMP repeat_end_emit_encodeBlockAsm12B
|
|
|
|
repeat_four_match_repeat_:
|
|
LEAQ -256(BX), BX
|
|
MOVW $0x0019, (SI)
|
|
MOVW BX, 2(SI)
|
|
ADDQ $0x04, SI
|
|
JMP repeat_end_emit_encodeBlockAsm12B
|
|
|
|
repeat_three_match_repeat_:
|
|
LEAQ -4(BX), BX
|
|
MOVW $0x0015, (SI)
|
|
MOVB BL, 2(SI)
|
|
ADDQ $0x03, SI
|
|
JMP repeat_end_emit_encodeBlockAsm12B
|
|
|
|
repeat_two_match_repeat_:
|
|
SHLL $0x02, BX
|
|
ORL $0x01, BX
|
|
MOVW BX, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP repeat_end_emit_encodeBlockAsm12B
|
|
|
|
repeat_two_offset_match_repeat_:
|
|
XORQ DI, DI
|
|
LEAQ 1(DI)(BX*4), BX
|
|
MOVB BP, 1(SI)
|
|
SARL $0x08, BP
|
|
SHLL $0x05, BP
|
|
ORL BP, BX
|
|
MOVB BL, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP repeat_end_emit_encodeBlockAsm12B
|
|
|
|
repeat_as_copy_encodeBlockAsm12B:
|
|
CMPL BP, $0x00010000
|
|
JL two_byte_offset_repeat_as_copy_encodeBlockAsm12B
|
|
CMPL BX, $0x40
|
|
JLE four_bytes_remain_repeat_as_copy_encodeBlockAsm12B
|
|
MOVB $0xff, (SI)
|
|
MOVD BP, 1(SI)
|
|
LEAQ -64(BX), BX
|
|
ADDQ $0x05, SI
|
|
CMPL BX, $0x04
|
|
JL four_bytes_remain_repeat_as_copy_encodeBlockAsm12B
|
|
|
|
emit_repeat_again_repeat_as_copy_encodeBlockAsm12B_emit_copy:
|
|
MOVQ BX, DI
|
|
LEAQ -4(BX), BX
|
|
CMPL DI, $0x08
|
|
JLE repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy
|
|
CMPL DI, $0x0c
|
|
JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy
|
|
CMPL BP, $0x00000800
|
|
JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy
|
|
|
|
cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy:
|
|
CMPL BX, $0x00000104
|
|
JLT repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy
|
|
CMPL BX, $0x00010100
|
|
JLT repeat_four_repeat_as_copy_encodeBlockAsm12B_emit_copy
|
|
CMPL BX, $0x0100ffff
|
|
JLT repeat_five_repeat_as_copy_encodeBlockAsm12B_emit_copy
|
|
LEAQ -16842747(BX), BX
|
|
MOVW $0x001d, (SI)
|
|
MOVW $0xfffb, 2(SI)
|
|
MOVB $0xff, 4(SI)
|
|
ADDQ $0x05, SI
|
|
JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm12B_emit_copy
|
|
|
|
repeat_five_repeat_as_copy_encodeBlockAsm12B_emit_copy:
|
|
LEAQ -65536(BX), BX
|
|
MOVQ BX, BP
|
|
MOVW $0x001d, (SI)
|
|
MOVW BX, 2(SI)
|
|
SARQ $0x10, BP
|
|
MOVB BP, 4(SI)
|
|
ADDQ $0x05, SI
|
|
JMP repeat_end_emit_encodeBlockAsm12B
|
|
|
|
repeat_four_repeat_as_copy_encodeBlockAsm12B_emit_copy:
|
|
LEAQ -256(BX), BX
|
|
MOVW $0x0019, (SI)
|
|
MOVW BX, 2(SI)
|
|
ADDQ $0x04, SI
|
|
JMP repeat_end_emit_encodeBlockAsm12B
|
|
|
|
repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy:
|
|
LEAQ -4(BX), BX
|
|
MOVW $0x0015, (SI)
|
|
MOVB BL, 2(SI)
|
|
ADDQ $0x03, SI
|
|
JMP repeat_end_emit_encodeBlockAsm12B
|
|
|
|
repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy:
|
|
SHLL $0x02, BX
|
|
ORL $0x01, BX
|
|
MOVW BX, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP repeat_end_emit_encodeBlockAsm12B
|
|
|
|
repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy:
|
|
XORQ DI, DI
|
|
LEAQ 1(DI)(BX*4), BX
|
|
MOVB BP, 1(SI)
|
|
SARL $0x08, BP
|
|
SHLL $0x05, BP
|
|
ORL BP, BX
|
|
MOVB BL, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP repeat_end_emit_encodeBlockAsm12B
|
|
|
|
four_bytes_remain_repeat_as_copy_encodeBlockAsm12B:
|
|
TESTL BX, BX
|
|
JZ repeat_end_emit_encodeBlockAsm12B
|
|
MOVB $0x03, DL
|
|
LEAQ -4(DX)(BX*4), BX
|
|
MOVB BL, (SI)
|
|
MOVD BP, 1(SI)
|
|
ADDQ $0x05, SI
|
|
JMP repeat_end_emit_encodeBlockAsm12B
|
|
|
|
two_byte_offset_repeat_as_copy_encodeBlockAsm12B:
|
|
CMPL BX, $0x40
|
|
JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm12B
|
|
MOVB $0xee, (SI)
|
|
MOVW BP, 1(SI)
|
|
LEAQ -60(BX), BX
|
|
ADDQ $0x03, SI
|
|
|
|
emit_repeat_again_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
|
|
MOVQ BX, DI
|
|
LEAQ -4(BX), BX
|
|
CMPL DI, $0x08
|
|
JLE repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
|
|
CMPL DI, $0x0c
|
|
JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
|
|
CMPL BP, $0x00000800
|
|
JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
|
|
|
|
cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
|
|
CMPL BX, $0x00000104
|
|
JLT repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
|
|
CMPL BX, $0x00010100
|
|
JLT repeat_four_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
|
|
CMPL BX, $0x0100ffff
|
|
JLT repeat_five_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
|
|
LEAQ -16842747(BX), BX
|
|
MOVW $0x001d, (SI)
|
|
MOVW $0xfffb, 2(SI)
|
|
MOVB $0xff, 4(SI)
|
|
ADDQ $0x05, SI
|
|
JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
|
|
|
|
repeat_five_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
|
|
LEAQ -65536(BX), BX
|
|
MOVQ BX, BP
|
|
MOVW $0x001d, (SI)
|
|
MOVW BX, 2(SI)
|
|
SARQ $0x10, BP
|
|
MOVB BP, 4(SI)
|
|
ADDQ $0x05, SI
|
|
JMP repeat_end_emit_encodeBlockAsm12B
|
|
|
|
repeat_four_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
|
|
LEAQ -256(BX), BX
|
|
MOVW $0x0019, (SI)
|
|
MOVW BX, 2(SI)
|
|
ADDQ $0x04, SI
|
|
JMP repeat_end_emit_encodeBlockAsm12B
|
|
|
|
repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
|
|
LEAQ -4(BX), BX
|
|
MOVW $0x0015, (SI)
|
|
MOVB BL, 2(SI)
|
|
ADDQ $0x03, SI
|
|
JMP repeat_end_emit_encodeBlockAsm12B
|
|
|
|
repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
|
|
SHLL $0x02, BX
|
|
ORL $0x01, BX
|
|
MOVW BX, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP repeat_end_emit_encodeBlockAsm12B
|
|
|
|
repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
|
|
XORQ DI, DI
|
|
LEAQ 1(DI)(BX*4), BX
|
|
MOVB BP, 1(SI)
|
|
SARL $0x08, BP
|
|
SHLL $0x05, BP
|
|
ORL BP, BX
|
|
MOVB BL, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP repeat_end_emit_encodeBlockAsm12B
|
|
|
|
two_byte_offset_short_repeat_as_copy_encodeBlockAsm12B:
|
|
CMPL BX, $0x0c
|
|
JGE emit_copy_three_repeat_as_copy_encodeBlockAsm12B
|
|
CMPL BP, $0x00000800
|
|
JGE emit_copy_three_repeat_as_copy_encodeBlockAsm12B
|
|
MOVB $0x01, DL
|
|
LEAQ -16(DX)(BX*4), BX
|
|
MOVB BP, 1(SI)
|
|
SHRL $0x08, BP
|
|
SHLL $0x05, BP
|
|
ORL BP, BX
|
|
MOVB BL, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP repeat_end_emit_encodeBlockAsm12B
|
|
|
|
emit_copy_three_repeat_as_copy_encodeBlockAsm12B:
|
|
MOVB $0x02, DL
|
|
LEAQ -4(DX)(BX*4), BX
|
|
MOVB BL, (SI)
|
|
MOVW BP, 1(SI)
|
|
ADDQ $0x03, SI
|
|
|
|
repeat_end_emit_encodeBlockAsm12B:
|
|
MOVQ SI, dst_base+0(FP)
|
|
MOVL 16(SP), BX
|
|
CMPL AX, BX
|
|
JGT emit_remainder_encodeBlockAsm12B
|
|
JMP search_loop_encodeBlockAsm12B
|
|
|
|
no_repeat_found_encodeBlockAsm12B:
|
|
MOVQ $0x0000cf1bbcdcbf9b, R8
|
|
MOVQ BP, DI
|
|
SHRQ $0x10, DI
|
|
SHLQ $0x10, DI
|
|
IMULQ R8, DI
|
|
SHRQ $0x34, DI
|
|
CMPL (CX)(BX*1), BP
|
|
SHRQ $0x08, BP
|
|
JEQ candidate_match_encodeBlockAsm12B
|
|
MOVL 32(SP)(DI*1), BX
|
|
CMPL (CX)(SI*1), BP
|
|
JEQ candidate2_match_encodeBlockAsm12B
|
|
LEAQ 2(AX), SI
|
|
MOVL SI, 32(SP)(DI*1)
|
|
SHRQ $0x08, BP
|
|
CMPL (CX)(BX*1), BP
|
|
JEQ candidate3_match_encodeBlockAsm12B
|
|
MOVL 28(SP), AX
|
|
JMP search_loop_encodeBlockAsm12B
|
|
|
|
candidate3_match_encodeBlockAsm12B:
|
|
ADDL $0x02, AX
|
|
JMP candidate_match_encodeBlockAsm12B
|
|
|
|
candidate2_match_encodeBlockAsm12B:
|
|
LEAQ -2(AX), BX
|
|
MOVL BX, 32(SP)(DI*1)
|
|
INCL AX
|
|
MOVL SI, BX
|
|
|
|
candidate_match_encodeBlockAsm12B:
|
|
MOVL 20(SP), BP
|
|
TESTL BX, BX
|
|
JZ match_extend_back_end_encodeBlockAsm12B
|
|
|
|
match_extend_back_loop_encodeBlockAsm12B:
|
|
CMPL AX, BP
|
|
JG match_extend_back_end_encodeBlockAsm12B
|
|
MOVB -1(CX)(BX*1), DL
|
|
MOVB -1(CX)(AX*1), SI
|
|
CMPB DL, SI
|
|
JNE match_extend_back_end_encodeBlockAsm12B
|
|
LEAL -1(AX), AX
|
|
DECL BX
|
|
JZ match_extend_back_end_encodeBlockAsm12B
|
|
JMP match_extend_back_loop_encodeBlockAsm12B
|
|
|
|
match_extend_back_end_encodeBlockAsm12B:
|
|
MOVL AX, BP
|
|
SUBL 20(SP), BP
|
|
LEAQ dst_base+0(FP)(BP*1), BP
|
|
CMPQ BP, (SP)
|
|
JL match_dst_size_check_encodeBlockAsm12B
|
|
MOVQ $0x00000000, ret+48(FP)
|
|
RET
|
|
|
|
match_dst_size_check_encodeBlockAsm12B:
|
|
MOVL BX, BP
|
|
MOVL 20(SP), SI
|
|
CMPL SI, BP
|
|
JEQ emit_literal_skip_match_emit_encodeBlockAsm12B
|
|
MOVL BP, DI
|
|
MOVL BP, 20(SP)
|
|
LEAQ (CX)(SI*1), BP
|
|
SUBL SI, DI
|
|
MOVQ dst_base+0(FP), SI
|
|
MOVQ DI, R8
|
|
SUBL $0x01, R8
|
|
JC emit_literal_done_match_emit_encodeBlockAsm12B
|
|
CMPL R8, $0x3c
|
|
JLT one_byte_match_emit_encodeBlockAsm12B
|
|
CMPL R8, $0x00000100
|
|
JLT two_bytes_match_emit_encodeBlockAsm12B
|
|
CMPL R8, $0x00010000
|
|
JLT three_bytes_match_emit_encodeBlockAsm12B
|
|
CMPL R8, $0x01000000
|
|
JLT four_bytes_match_emit_encodeBlockAsm12B
|
|
MOVB $0xfc, (SI)
|
|
MOVL R8, 1(SI)
|
|
ADDQ $0x05, SI
|
|
JMP memmove_match_emit_encodeBlockAsm12B
|
|
|
|
four_bytes_match_emit_encodeBlockAsm12B:
|
|
MOVQ R8, R9
|
|
SHRL $0x10, R9
|
|
MOVB $0xf8, (SI)
|
|
MOVW R8, 1(SI)
|
|
MOVB R9, 3(SI)
|
|
ADDQ $0x04, SI
|
|
JMP memmove_match_emit_encodeBlockAsm12B
|
|
|
|
three_bytes_match_emit_encodeBlockAsm12B:
|
|
MOVB $0xf4, (SI)
|
|
MOVW R8, 1(SI)
|
|
ADDQ $0x03, SI
|
|
JMP memmove_match_emit_encodeBlockAsm12B
|
|
|
|
two_bytes_match_emit_encodeBlockAsm12B:
|
|
MOVB $0xf0, (SI)
|
|
MOVB R8, 1(SI)
|
|
ADDQ $0x02, SI
|
|
JMP memmove_match_emit_encodeBlockAsm12B
|
|
|
|
one_byte_match_emit_encodeBlockAsm12B:
|
|
SHLB $0x02, R8
|
|
MOVB R8, (SI)
|
|
ADDQ $0x01, SI
|
|
|
|
memmove_match_emit_encodeBlockAsm12B:
|
|
LEAQ (SI)(DI*1), R8
|
|
NOP
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_tail:
|
|
TESTQ DI, DI
|
|
JEQ emit_literal_done_match_emit_encodeBlockAsm12B
|
|
CMPQ DI, $0x02
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_1or2
|
|
CMPQ DI, $0x04
|
|
JB emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_3
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_4
|
|
CMPQ DI, $0x08
|
|
JB emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_5through7
|
|
JE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8
|
|
CMPQ DI, $0x10
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_9through16
|
|
CMPQ DI, $0x20
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_17through32
|
|
CMPQ DI, $0x40
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_33through64
|
|
CMPQ DI, $0x80
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_65through128
|
|
CMPQ DI, $0x00000100
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_129through256
|
|
JMP emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_256through2048
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_1or2:
|
|
MOVB (BP), R8
|
|
MOVB -1(BP)(DI*1), BP
|
|
MOVB R8, (SI)
|
|
MOVB BP, -1(SI)(DI*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm12B
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_4:
|
|
MOVL (BP), R8
|
|
MOVL R8, (SI)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm12B
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_3:
|
|
MOVW (BP), R8
|
|
MOVB 2(BP), BP
|
|
MOVW R8, (SI)
|
|
MOVB BP, 2(SI)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm12B
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_5through7:
|
|
MOVL (BP), R8
|
|
MOVL -4(BP)(DI*1), BP
|
|
MOVL R8, (SI)
|
|
MOVL BP, -4(SI)(DI*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm12B
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8:
|
|
MOVQ (BP), R8
|
|
MOVQ R8, (SI)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm12B
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_9through16:
|
|
MOVQ (BP), R8
|
|
MOVQ -8(BP)(DI*1), BP
|
|
MOVQ R8, (SI)
|
|
MOVQ BP, -8(SI)(DI*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm12B
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_17through32:
|
|
MOVOU (BP), X0
|
|
MOVOU -16(BP)(DI*1), X1
|
|
MOVOU X0, (SI)
|
|
MOVOU X1, -16(SI)(DI*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm12B
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_33through64:
|
|
MOVOU (BP), X0
|
|
MOVOU 16(BP), X1
|
|
MOVOU -32(BP)(DI*1), X2
|
|
MOVOU -16(BP)(DI*1), X3
|
|
MOVOU X0, (SI)
|
|
MOVOU X1, 16(SI)
|
|
MOVOU X2, -32(SI)(DI*1)
|
|
MOVOU X3, -16(SI)(DI*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm12B
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_65through128:
|
|
MOVOU (BP), X0
|
|
MOVOU 16(BP), X1
|
|
MOVOU 32(BP), X2
|
|
MOVOU 48(BP), X3
|
|
MOVOU -64(BP)(DI*1), X12
|
|
MOVOU -48(BP)(DI*1), X13
|
|
MOVOU -32(BP)(DI*1), X14
|
|
MOVOU -16(BP)(DI*1), X15
|
|
MOVOU X0, (SI)
|
|
MOVOU X1, 16(SI)
|
|
MOVOU X2, 32(SI)
|
|
MOVOU X3, 48(SI)
|
|
MOVOU X12, -64(SI)(DI*1)
|
|
MOVOU X13, -48(SI)(DI*1)
|
|
MOVOU X14, -32(SI)(DI*1)
|
|
MOVOU X15, -16(SI)(DI*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm12B
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_129through256:
|
|
MOVOU (BP), X0
|
|
MOVOU 16(BP), X1
|
|
MOVOU 32(BP), X2
|
|
MOVOU 48(BP), X3
|
|
MOVOU 64(BP), X4
|
|
MOVOU 80(BP), X5
|
|
MOVOU 96(BP), X6
|
|
MOVOU 112(BP), X7
|
|
MOVOU -128(BP)(DI*1), X8
|
|
MOVOU -112(BP)(DI*1), X9
|
|
MOVOU -96(BP)(DI*1), X10
|
|
MOVOU -80(BP)(DI*1), X11
|
|
MOVOU -64(BP)(DI*1), X12
|
|
MOVOU -48(BP)(DI*1), X13
|
|
MOVOU -32(BP)(DI*1), X14
|
|
MOVOU -16(BP)(DI*1), X15
|
|
MOVOU X0, (SI)
|
|
MOVOU X1, 16(SI)
|
|
MOVOU X2, 32(SI)
|
|
MOVOU X3, 48(SI)
|
|
MOVOU X4, 64(SI)
|
|
MOVOU X5, 80(SI)
|
|
MOVOU X6, 96(SI)
|
|
MOVOU X7, 112(SI)
|
|
MOVOU X8, -128(SI)(DI*1)
|
|
MOVOU X9, -112(SI)(DI*1)
|
|
MOVOU X10, -96(SI)(DI*1)
|
|
MOVOU X11, -80(SI)(DI*1)
|
|
MOVOU X12, -64(SI)(DI*1)
|
|
MOVOU X13, -48(SI)(DI*1)
|
|
MOVOU X14, -32(SI)(DI*1)
|
|
MOVOU X15, -16(SI)(DI*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm12B
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_256through2048:
|
|
LEAQ -256(DI), DI
|
|
MOVOU (BP), X0
|
|
MOVOU 16(BP), X1
|
|
MOVOU 32(BP), X2
|
|
MOVOU 48(BP), X3
|
|
MOVOU 64(BP), X4
|
|
MOVOU 80(BP), X5
|
|
MOVOU 96(BP), X6
|
|
MOVOU 112(BP), X7
|
|
MOVOU 128(BP), X8
|
|
MOVOU 144(BP), X9
|
|
MOVOU 160(BP), X10
|
|
MOVOU 176(BP), X11
|
|
MOVOU 192(BP), X12
|
|
MOVOU 208(BP), X13
|
|
MOVOU 224(BP), X14
|
|
MOVOU 240(BP), X15
|
|
MOVOU X0, (SI)
|
|
MOVOU X1, 16(SI)
|
|
MOVOU X2, 32(SI)
|
|
MOVOU X3, 48(SI)
|
|
MOVOU X4, 64(SI)
|
|
MOVOU X5, 80(SI)
|
|
MOVOU X6, 96(SI)
|
|
MOVOU X7, 112(SI)
|
|
MOVOU X8, 128(SI)
|
|
MOVOU X9, 144(SI)
|
|
MOVOU X10, 160(SI)
|
|
MOVOU X11, 176(SI)
|
|
MOVOU X12, 192(SI)
|
|
MOVOU X13, 208(SI)
|
|
MOVOU X14, 224(SI)
|
|
MOVOU X15, 240(SI)
|
|
CMPQ DI, $0x00000100
|
|
LEAQ 256(BP), BP
|
|
LEAQ 256(SI), SI
|
|
JGE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_256through2048
|
|
JMP emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_tail
|
|
MOVQ R8, SI
|
|
|
|
emit_literal_done_match_emit_encodeBlockAsm12B:
|
|
MOVQ SI, dst_base+0(FP)
|
|
|
|
emit_literal_skip_match_emit_encodeBlockAsm12B:
|
|
NOP
|
|
|
|
match_nolit_loop_encodeBlockAsm12B:
|
|
MOVL AX, BP
|
|
MOVL AX, BP
|
|
SUBL BX, BP
|
|
MOVL BP, 24(SP)
|
|
ADDL $0x04, AX
|
|
ADDL $0x04, BX
|
|
MOVL 16(SP), BP
|
|
SUBL AX, BP
|
|
XORQ DI, DI
|
|
CMPQ BP, $0x08
|
|
JL matchlen_single_match_nolit_encodeBlockAsm12B
|
|
|
|
matchlen_loopback_match_nolit_encodeBlockAsm12B:
|
|
MOVQ (CX)(DI*1), SI
|
|
XORQ (CX)(DI*1), SI
|
|
TESTQ SI, SI
|
|
JZ matchlen_loop_match_nolit_encodeBlockAsm12B
|
|
BSFQ SI, SI
|
|
SARQ $0x03, SI
|
|
LEAQ (DI)(SI*1), DI
|
|
JMP match_nolit_end_encodeBlockAsm12B
|
|
|
|
matchlen_loop_match_nolit_encodeBlockAsm12B:
|
|
LEAQ -8(BP), BP
|
|
LEAQ 8(DI), DI
|
|
CMPQ BP, $0x08
|
|
JGE matchlen_loopback_match_nolit_encodeBlockAsm12B
|
|
|
|
matchlen_single_match_nolit_encodeBlockAsm12B:
|
|
TESTQ BP, BP
|
|
JZ match_nolit_end_encodeBlockAsm12B
|
|
|
|
matchlen_single_loopback_match_nolit_encodeBlockAsm12B:
|
|
MOVB (CX)(DI*1), SI
|
|
CMPB (CX)(DI*1), SI
|
|
JNE match_nolit_end_encodeBlockAsm12B
|
|
LEAQ 1(DI), DI
|
|
DECQ BP
|
|
JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm12B
|
|
|
|
match_nolit_end_encodeBlockAsm12B:
|
|
MOVL 24(SP), BP
|
|
ADDQ $0x04, DI
|
|
MOVQ dst_base+0(FP), SI
|
|
ADDL DI, AX
|
|
CMPL BP, $0x00010000
|
|
JL two_byte_offset_match_nolit_encodeBlockAsm12B
|
|
CMPL DI, $0x40
|
|
JLE four_bytes_remain_match_nolit_encodeBlockAsm12B
|
|
MOVB $0xff, (SI)
|
|
MOVD BP, 1(SI)
|
|
LEAQ -64(DI), DI
|
|
ADDQ $0x05, SI
|
|
CMPL DI, $0x04
|
|
JL four_bytes_remain_match_nolit_encodeBlockAsm12B
|
|
|
|
emit_repeat_again_match_nolit_encodeBlockAsm12B_emit_copy:
|
|
MOVQ DI, R8
|
|
LEAQ -4(DI), DI
|
|
CMPL R8, $0x08
|
|
JLE repeat_two_match_nolit_encodeBlockAsm12B_emit_copy
|
|
CMPL R8, $0x0c
|
|
JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy
|
|
CMPL BP, $0x00000800
|
|
JLT repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy
|
|
|
|
cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy:
|
|
CMPL DI, $0x00000104
|
|
JLT repeat_three_match_nolit_encodeBlockAsm12B_emit_copy
|
|
CMPL DI, $0x00010100
|
|
JLT repeat_four_match_nolit_encodeBlockAsm12B_emit_copy
|
|
CMPL DI, $0x0100ffff
|
|
JLT repeat_five_match_nolit_encodeBlockAsm12B_emit_copy
|
|
LEAQ -16842747(DI), DI
|
|
MOVW $0x001d, (SI)
|
|
MOVW $0xfffb, 2(SI)
|
|
MOVB $0xff, 4(SI)
|
|
ADDQ $0x05, SI
|
|
JMP emit_repeat_again_match_nolit_encodeBlockAsm12B_emit_copy
|
|
|
|
repeat_five_match_nolit_encodeBlockAsm12B_emit_copy:
|
|
LEAQ -65536(DI), DI
|
|
MOVQ DI, BP
|
|
MOVW $0x001d, (SI)
|
|
MOVW DI, 2(SI)
|
|
SARQ $0x10, BP
|
|
MOVB BP, 4(SI)
|
|
ADDQ $0x05, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm12B
|
|
|
|
repeat_four_match_nolit_encodeBlockAsm12B_emit_copy:
|
|
LEAQ -256(DI), DI
|
|
MOVW $0x0019, (SI)
|
|
MOVW DI, 2(SI)
|
|
ADDQ $0x04, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm12B
|
|
|
|
repeat_three_match_nolit_encodeBlockAsm12B_emit_copy:
|
|
LEAQ -4(DI), DI
|
|
MOVW $0x0015, (SI)
|
|
MOVB DI, 2(SI)
|
|
ADDQ $0x03, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm12B
|
|
|
|
repeat_two_match_nolit_encodeBlockAsm12B_emit_copy:
|
|
SHLL $0x02, DI
|
|
ORL $0x01, DI
|
|
MOVW DI, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm12B
|
|
|
|
repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy:
|
|
XORQ R8, R8
|
|
LEAQ 1(R8)(DI*4), DI
|
|
MOVB BP, 1(SI)
|
|
SARL $0x08, BP
|
|
SHLL $0x05, BP
|
|
ORL BP, DI
|
|
MOVB DI, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm12B
|
|
|
|
four_bytes_remain_match_nolit_encodeBlockAsm12B:
|
|
TESTL DI, DI
|
|
JZ match_nolit_emitcopy_end_encodeBlockAsm12B
|
|
MOVB $0x03, DL
|
|
LEAQ -4(DX)(DI*4), DI
|
|
MOVB DI, (SI)
|
|
MOVD BP, 1(SI)
|
|
ADDQ $0x05, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm12B
|
|
|
|
two_byte_offset_match_nolit_encodeBlockAsm12B:
|
|
CMPL DI, $0x40
|
|
JLE two_byte_offset_short_match_nolit_encodeBlockAsm12B
|
|
MOVB $0xee, (SI)
|
|
MOVW BP, 1(SI)
|
|
LEAQ -60(DI), DI
|
|
ADDQ $0x03, SI
|
|
|
|
emit_repeat_again_match_nolit_encodeBlockAsm12B_emit_copy_short:
|
|
MOVQ DI, R8
|
|
LEAQ -4(DI), DI
|
|
CMPL R8, $0x08
|
|
JLE repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short
|
|
CMPL R8, $0x0c
|
|
JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short
|
|
CMPL BP, $0x00000800
|
|
JLT repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short
|
|
|
|
cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short:
|
|
CMPL DI, $0x00000104
|
|
JLT repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short
|
|
CMPL DI, $0x00010100
|
|
JLT repeat_four_match_nolit_encodeBlockAsm12B_emit_copy_short
|
|
CMPL DI, $0x0100ffff
|
|
JLT repeat_five_match_nolit_encodeBlockAsm12B_emit_copy_short
|
|
LEAQ -16842747(DI), DI
|
|
MOVW $0x001d, (SI)
|
|
MOVW $0xfffb, 2(SI)
|
|
MOVB $0xff, 4(SI)
|
|
ADDQ $0x05, SI
|
|
JMP emit_repeat_again_match_nolit_encodeBlockAsm12B_emit_copy_short
|
|
|
|
repeat_five_match_nolit_encodeBlockAsm12B_emit_copy_short:
|
|
LEAQ -65536(DI), DI
|
|
MOVQ DI, BP
|
|
MOVW $0x001d, (SI)
|
|
MOVW DI, 2(SI)
|
|
SARQ $0x10, BP
|
|
MOVB BP, 4(SI)
|
|
ADDQ $0x05, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm12B
|
|
|
|
repeat_four_match_nolit_encodeBlockAsm12B_emit_copy_short:
|
|
LEAQ -256(DI), DI
|
|
MOVW $0x0019, (SI)
|
|
MOVW DI, 2(SI)
|
|
ADDQ $0x04, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm12B
|
|
|
|
repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short:
|
|
LEAQ -4(DI), DI
|
|
MOVW $0x0015, (SI)
|
|
MOVB DI, 2(SI)
|
|
ADDQ $0x03, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm12B
|
|
|
|
repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short:
|
|
SHLL $0x02, DI
|
|
ORL $0x01, DI
|
|
MOVW DI, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm12B
|
|
|
|
repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short:
|
|
XORQ R8, R8
|
|
LEAQ 1(R8)(DI*4), DI
|
|
MOVB BP, 1(SI)
|
|
SARL $0x08, BP
|
|
SHLL $0x05, BP
|
|
ORL BP, DI
|
|
MOVB DI, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm12B
|
|
|
|
two_byte_offset_short_match_nolit_encodeBlockAsm12B:
|
|
CMPL DI, $0x0c
|
|
JGE emit_copy_three_match_nolit_encodeBlockAsm12B
|
|
CMPL BP, $0x00000800
|
|
JGE emit_copy_three_match_nolit_encodeBlockAsm12B
|
|
MOVB $0x01, DL
|
|
LEAQ -16(DX)(DI*4), DI
|
|
MOVB BP, 1(SI)
|
|
SHRL $0x08, BP
|
|
SHLL $0x05, BP
|
|
ORL BP, DI
|
|
MOVB DI, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm12B
|
|
|
|
emit_copy_three_match_nolit_encodeBlockAsm12B:
|
|
MOVB $0x02, DL
|
|
LEAQ -4(DX)(DI*4), DI
|
|
MOVB DI, (SI)
|
|
MOVW BP, 1(SI)
|
|
ADDQ $0x03, SI
|
|
|
|
match_nolit_emitcopy_end_encodeBlockAsm12B:
|
|
MOVQ SI, dst_base+0(FP)
|
|
MOVL AX, 20(SP)
|
|
CMPL AX, 16(SP)
|
|
JGE emit_remainder_encodeBlockAsm12B
|
|
CMPQ SI, (SP)
|
|
JL match_nolit_dst_ok_encodeBlockAsm12B
|
|
MOVQ $0x00000000, ret+48(FP)
|
|
RET
|
|
|
|
match_nolit_dst_ok_encodeBlockAsm12B:
|
|
MOVQ -2(CX)(AX*1), BP
|
|
MOVQ $0x0000cf1bbcdcbf9b, SI
|
|
MOVQ BP, DI
|
|
SHRQ $0x10, BP
|
|
MOVQ BP, R8
|
|
SHLQ $0x10, DI
|
|
IMULQ SI, DI
|
|
SHRQ $0x34, DI
|
|
SHLQ $0x10, R8
|
|
IMULQ SI, R8
|
|
SHRQ $0x34, R8
|
|
MOVL 32(SP)(DI*1), SI
|
|
MOVL 32(SP)(R8*1), SI
|
|
LEAQ -2(AX), SI
|
|
MOVL SI, 32(SP)(DI*1)
|
|
MOVL AX, 32(SP)(R8*1)
|
|
CMPL (CX)(R8*1), BP
|
|
JEQ match_nolit_loop_encodeBlockAsm12B
|
|
INCL AX
|
|
JMP search_loop_encodeBlockAsm12B
|
|
|
|
emit_remainder_encodeBlockAsm12B:
|
|
MOVQ src_len+32(FP), AX
|
|
SUBL 20(SP), AX
|
|
MOVQ dst_base+0(FP), DX
|
|
LEAQ (DX)(AX*1), DX
|
|
CMPQ DX, (SP)
|
|
JL emit_remainder_ok_encodeBlockAsm12B
|
|
MOVQ $0x00000000, ret+48(FP)
|
|
RET
|
|
|
|
emit_remainder_ok_encodeBlockAsm12B:
|
|
MOVQ src_len+32(FP), AX
|
|
MOVL 20(SP), DX
|
|
CMPL DX, AX
|
|
JEQ emit_literal_skip_emit_remainder_encodeBlockAsm12B
|
|
MOVL AX, BX
|
|
MOVL AX, 20(SP)
|
|
LEAQ (CX)(DX*1), AX
|
|
SUBL DX, BX
|
|
MOVQ dst_base+0(FP), CX
|
|
MOVQ BX, DX
|
|
SUBL $0x01, DX
|
|
JC emit_literal_done_emit_remainder_encodeBlockAsm12B
|
|
CMPL DX, $0x3c
|
|
JLT one_byte_emit_remainder_encodeBlockAsm12B
|
|
CMPL DX, $0x00000100
|
|
JLT two_bytes_emit_remainder_encodeBlockAsm12B
|
|
CMPL DX, $0x00010000
|
|
JLT three_bytes_emit_remainder_encodeBlockAsm12B
|
|
CMPL DX, $0x01000000
|
|
JLT four_bytes_emit_remainder_encodeBlockAsm12B
|
|
MOVB $0xfc, (CX)
|
|
MOVL DX, 1(CX)
|
|
ADDQ $0x05, CX
|
|
JMP memmove_emit_remainder_encodeBlockAsm12B
|
|
|
|
four_bytes_emit_remainder_encodeBlockAsm12B:
|
|
MOVQ DX, BP
|
|
SHRL $0x10, BP
|
|
MOVB $0xf8, (CX)
|
|
MOVW DX, 1(CX)
|
|
MOVB BP, 3(CX)
|
|
ADDQ $0x04, CX
|
|
JMP memmove_emit_remainder_encodeBlockAsm12B
|
|
|
|
three_bytes_emit_remainder_encodeBlockAsm12B:
|
|
MOVB $0xf4, (CX)
|
|
MOVW DX, 1(CX)
|
|
ADDQ $0x03, CX
|
|
JMP memmove_emit_remainder_encodeBlockAsm12B
|
|
|
|
two_bytes_emit_remainder_encodeBlockAsm12B:
|
|
MOVB $0xf0, (CX)
|
|
MOVB DL, 1(CX)
|
|
ADDQ $0x02, CX
|
|
JMP memmove_emit_remainder_encodeBlockAsm12B
|
|
|
|
one_byte_emit_remainder_encodeBlockAsm12B:
|
|
SHLB $0x02, DL
|
|
MOVB DL, (CX)
|
|
ADDQ $0x01, CX
|
|
|
|
memmove_emit_remainder_encodeBlockAsm12B:
|
|
LEAQ (CX)(BX*1), DX
|
|
NOP
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_tail:
|
|
TESTQ BX, BX
|
|
JEQ emit_literal_done_emit_remainder_encodeBlockAsm12B
|
|
CMPQ BX, $0x02
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_1or2
|
|
CMPQ BX, $0x04
|
|
JB emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_3
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_4
|
|
CMPQ BX, $0x08
|
|
JB emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_5through7
|
|
JE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_8
|
|
CMPQ BX, $0x10
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_9through16
|
|
CMPQ BX, $0x20
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_17through32
|
|
CMPQ BX, $0x40
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_33through64
|
|
CMPQ BX, $0x80
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_65through128
|
|
CMPQ BX, $0x00000100
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_129through256
|
|
JMP emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_256through2048
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_1or2:
|
|
MOVB (AX), DL
|
|
MOVB -1(AX)(BX*1), AL
|
|
MOVB DL, (CX)
|
|
MOVB AL, -1(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm12B
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_4:
|
|
MOVL (AX), DX
|
|
MOVL DX, (CX)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm12B
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_3:
|
|
MOVW (AX), DX
|
|
MOVB 2(AX), AL
|
|
MOVW DX, (CX)
|
|
MOVB AL, 2(CX)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm12B
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_5through7:
|
|
MOVL (AX), DX
|
|
MOVL -4(AX)(BX*1), AX
|
|
MOVL DX, (CX)
|
|
MOVL AX, -4(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm12B
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_8:
|
|
MOVQ (AX), DX
|
|
MOVQ DX, (CX)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm12B
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_9through16:
|
|
MOVQ (AX), DX
|
|
MOVQ -8(AX)(BX*1), AX
|
|
MOVQ DX, (CX)
|
|
MOVQ AX, -8(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm12B
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_17through32:
|
|
MOVOU (AX), X0
|
|
MOVOU -16(AX)(BX*1), X1
|
|
MOVOU X0, (CX)
|
|
MOVOU X1, -16(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm12B
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_33through64:
|
|
MOVOU (AX), X0
|
|
MOVOU 16(AX), X1
|
|
MOVOU -32(AX)(BX*1), X2
|
|
MOVOU -16(AX)(BX*1), X3
|
|
MOVOU X0, (CX)
|
|
MOVOU X1, 16(CX)
|
|
MOVOU X2, -32(CX)(BX*1)
|
|
MOVOU X3, -16(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm12B
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_65through128:
|
|
MOVOU (AX), X0
|
|
MOVOU 16(AX), X1
|
|
MOVOU 32(AX), X2
|
|
MOVOU 48(AX), X3
|
|
MOVOU -64(AX)(BX*1), X12
|
|
MOVOU -48(AX)(BX*1), X13
|
|
MOVOU -32(AX)(BX*1), X14
|
|
MOVOU -16(AX)(BX*1), X15
|
|
MOVOU X0, (CX)
|
|
MOVOU X1, 16(CX)
|
|
MOVOU X2, 32(CX)
|
|
MOVOU X3, 48(CX)
|
|
MOVOU X12, -64(CX)(BX*1)
|
|
MOVOU X13, -48(CX)(BX*1)
|
|
MOVOU X14, -32(CX)(BX*1)
|
|
MOVOU X15, -16(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm12B
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_129through256:
|
|
MOVOU (AX), X0
|
|
MOVOU 16(AX), X1
|
|
MOVOU 32(AX), X2
|
|
MOVOU 48(AX), X3
|
|
MOVOU 64(AX), X4
|
|
MOVOU 80(AX), X5
|
|
MOVOU 96(AX), X6
|
|
MOVOU 112(AX), X7
|
|
MOVOU -128(AX)(BX*1), X8
|
|
MOVOU -112(AX)(BX*1), X9
|
|
MOVOU -96(AX)(BX*1), X10
|
|
MOVOU -80(AX)(BX*1), X11
|
|
MOVOU -64(AX)(BX*1), X12
|
|
MOVOU -48(AX)(BX*1), X13
|
|
MOVOU -32(AX)(BX*1), X14
|
|
MOVOU -16(AX)(BX*1), X15
|
|
MOVOU X0, (CX)
|
|
MOVOU X1, 16(CX)
|
|
MOVOU X2, 32(CX)
|
|
MOVOU X3, 48(CX)
|
|
MOVOU X4, 64(CX)
|
|
MOVOU X5, 80(CX)
|
|
MOVOU X6, 96(CX)
|
|
MOVOU X7, 112(CX)
|
|
MOVOU X8, -128(CX)(BX*1)
|
|
MOVOU X9, -112(CX)(BX*1)
|
|
MOVOU X10, -96(CX)(BX*1)
|
|
MOVOU X11, -80(CX)(BX*1)
|
|
MOVOU X12, -64(CX)(BX*1)
|
|
MOVOU X13, -48(CX)(BX*1)
|
|
MOVOU X14, -32(CX)(BX*1)
|
|
MOVOU X15, -16(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm12B
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_256through2048:
|
|
LEAQ -256(BX), BX
|
|
MOVOU (AX), X0
|
|
MOVOU 16(AX), X1
|
|
MOVOU 32(AX), X2
|
|
MOVOU 48(AX), X3
|
|
MOVOU 64(AX), X4
|
|
MOVOU 80(AX), X5
|
|
MOVOU 96(AX), X6
|
|
MOVOU 112(AX), X7
|
|
MOVOU 128(AX), X8
|
|
MOVOU 144(AX), X9
|
|
MOVOU 160(AX), X10
|
|
MOVOU 176(AX), X11
|
|
MOVOU 192(AX), X12
|
|
MOVOU 208(AX), X13
|
|
MOVOU 224(AX), X14
|
|
MOVOU 240(AX), X15
|
|
MOVOU X0, (CX)
|
|
MOVOU X1, 16(CX)
|
|
MOVOU X2, 32(CX)
|
|
MOVOU X3, 48(CX)
|
|
MOVOU X4, 64(CX)
|
|
MOVOU X5, 80(CX)
|
|
MOVOU X6, 96(CX)
|
|
MOVOU X7, 112(CX)
|
|
MOVOU X8, 128(CX)
|
|
MOVOU X9, 144(CX)
|
|
MOVOU X10, 160(CX)
|
|
MOVOU X11, 176(CX)
|
|
MOVOU X12, 192(CX)
|
|
MOVOU X13, 208(CX)
|
|
MOVOU X14, 224(CX)
|
|
MOVOU X15, 240(CX)
|
|
CMPQ BX, $0x00000100
|
|
LEAQ 256(AX), AX
|
|
LEAQ 256(CX), CX
|
|
JGE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_256through2048
|
|
JMP emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_tail
|
|
MOVQ DX, CX
|
|
|
|
emit_literal_done_emit_remainder_encodeBlockAsm12B:
|
|
MOVQ CX, dst_base+0(FP)
|
|
|
|
emit_literal_skip_emit_remainder_encodeBlockAsm12B:
|
|
MOVQ 8(SP), AX
|
|
SUBQ dst_base+0(FP), AX
|
|
MOVQ AX, ret+48(FP)
|
|
RET
|
|
|
|
// func encodeBlockAsmAvx(dst []byte, src []byte) int
|
|
// Requires: AVX, SSE2
|
|
TEXT ·encodeBlockAsmAvx(SB), $65568-56
|
|
MOVQ $0x00000200, AX
|
|
LEAQ 32(SP), CX
|
|
PXOR X0, X0
|
|
|
|
zero_loop_encodeBlockAsmAvx:
|
|
MOVOU X0, (CX)
|
|
MOVOU X0, 16(CX)
|
|
MOVOU X0, 32(CX)
|
|
MOVOU X0, 48(CX)
|
|
MOVOU X0, 64(CX)
|
|
MOVOU X0, 80(CX)
|
|
MOVOU X0, 96(CX)
|
|
MOVOU X0, 112(CX)
|
|
ADDQ $0x80, CX
|
|
DECQ AX
|
|
JNZ zero_loop_encodeBlockAsmAvx
|
|
MOVL AX, 20(SP)
|
|
MOVQ src_len+32(FP), AX
|
|
LEAQ -5(AX), CX
|
|
LEAQ -8(AX), BX
|
|
SHRQ $0x05, AX
|
|
SUBL AX, CX
|
|
MOVL BX, 16(SP)
|
|
MOVQ dst_base+0(FP), AX
|
|
MOVQ AX, 8(SP)
|
|
LEAQ (AX)(CX*1), CX
|
|
MOVQ CX, (SP)
|
|
MOVL $0x00000001, AX
|
|
MOVL AX, 24(SP)
|
|
MOVQ src_base+24(FP), CX
|
|
|
|
search_loop_encodeBlockAsmAvx:
|
|
MOVQ (CX)(AX*1), BP
|
|
MOVL AX, BX
|
|
SUBL 20(SP), BX
|
|
SHRL $0x06, BX
|
|
LEAQ 4(AX)(BX*1), BX
|
|
MOVL 16(SP), SI
|
|
CMPL BX, SI
|
|
JGT emit_remainder_encodeBlockAsmAvx
|
|
MOVL BX, 28(SP)
|
|
MOVQ $0x0000cf1bbcdcbf9b, BX
|
|
MOVQ BP, DI
|
|
MOVQ BP, R8
|
|
SHRQ $0x08, R8
|
|
SHLQ $0x10, DI
|
|
IMULQ BX, DI
|
|
SHRQ $0x30, DI
|
|
SHLQ $0x10, R8
|
|
IMULQ BX, R8
|
|
SHRQ $0x30, R8
|
|
MOVL 32(SP)(DI*1), BX
|
|
MOVL 32(SP)(R8*1), SI
|
|
MOVL AX, 32(SP)(DI*1)
|
|
LEAL 1(AX), DI
|
|
MOVL DI, 32(SP)(R8*1)
|
|
MOVL AX, DI
|
|
SUBL 24(SP), DI
|
|
MOVL 1(CX)(DI*1), R9
|
|
MOVQ BP, R8
|
|
SHLQ $0x08, R8
|
|
CMPL R8, R9
|
|
JNE no_repeat_found_encodeBlockAsmAvx
|
|
LEAQ 1(AX), BP
|
|
MOVL 20(SP), BX
|
|
TESTL DI, DI
|
|
JZ repeat_extend_back_end_encodeBlockAsmAvx
|
|
|
|
repeat_extend_back_loop_encodeBlockAsmAvx:
|
|
CMPL BP, BX
|
|
JG repeat_extend_back_end_encodeBlockAsmAvx
|
|
MOVB -1(CX)(DI*1), DL
|
|
MOVB -1(CX)(BP*1), SI
|
|
CMPB DL, SI
|
|
JNE repeat_extend_back_end_encodeBlockAsmAvx
|
|
LEAQ -1(BP), BP
|
|
DECL DI
|
|
JZ repeat_extend_back_end_encodeBlockAsmAvx
|
|
JMP repeat_extend_back_loop_encodeBlockAsmAvx
|
|
|
|
repeat_extend_back_end_encodeBlockAsmAvx:
|
|
MOVL 20(SP), BX
|
|
CMPL BX, BP
|
|
JEQ emit_literal_skip_repeat_emit_encodeBlockAsmAvx
|
|
MOVL BP, SI
|
|
MOVL BP, 20(SP)
|
|
LEAQ (CX)(BX*1), DI
|
|
SUBL BX, SI
|
|
MOVQ dst_base+0(FP), BX
|
|
MOVQ SI, R8
|
|
SUBL $0x01, R8
|
|
JC emit_literal_done_repeat_emit_encodeBlockAsmAvx
|
|
CMPL R8, $0x3c
|
|
JLT one_byte_repeat_emit_encodeBlockAsmAvx
|
|
CMPL R8, $0x00000100
|
|
JLT two_bytes_repeat_emit_encodeBlockAsmAvx
|
|
CMPL R8, $0x00010000
|
|
JLT three_bytes_repeat_emit_encodeBlockAsmAvx
|
|
CMPL R8, $0x01000000
|
|
JLT four_bytes_repeat_emit_encodeBlockAsmAvx
|
|
MOVB $0xfc, (BX)
|
|
MOVL R8, 1(BX)
|
|
ADDQ $0x05, BX
|
|
JMP memmove_repeat_emit_encodeBlockAsmAvx
|
|
|
|
four_bytes_repeat_emit_encodeBlockAsmAvx:
|
|
MOVQ R8, R9
|
|
SHRL $0x10, R9
|
|
MOVB $0xf8, (BX)
|
|
MOVW R8, 1(BX)
|
|
MOVB R9, 3(BX)
|
|
ADDQ $0x04, BX
|
|
JMP memmove_repeat_emit_encodeBlockAsmAvx
|
|
|
|
three_bytes_repeat_emit_encodeBlockAsmAvx:
|
|
MOVB $0xf4, (BX)
|
|
MOVW R8, 1(BX)
|
|
ADDQ $0x03, BX
|
|
JMP memmove_repeat_emit_encodeBlockAsmAvx
|
|
|
|
two_bytes_repeat_emit_encodeBlockAsmAvx:
|
|
MOVB $0xf0, (BX)
|
|
MOVB R8, 1(BX)
|
|
ADDQ $0x02, BX
|
|
JMP memmove_repeat_emit_encodeBlockAsmAvx
|
|
|
|
one_byte_repeat_emit_encodeBlockAsmAvx:
|
|
SHLB $0x02, R8
|
|
MOVB R8, (BX)
|
|
ADDQ $0x01, BX
|
|
|
|
memmove_repeat_emit_encodeBlockAsmAvx:
|
|
LEAQ (BX)(SI*1), R8
|
|
NOP
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_tail:
|
|
TESTQ SI, SI
|
|
JEQ emit_literal_done_repeat_emit_encodeBlockAsmAvx
|
|
CMPQ SI, $0x02
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_1or2
|
|
CMPQ SI, $0x04
|
|
JB emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_3
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_4
|
|
CMPQ SI, $0x08
|
|
JB emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_5through7
|
|
JE emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_8
|
|
CMPQ SI, $0x10
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_9through16
|
|
CMPQ SI, $0x20
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_17through32
|
|
CMPQ SI, $0x40
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_33through64
|
|
CMPQ SI, $0x80
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_65through128
|
|
CMPQ SI, $0x00000100
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_129through256
|
|
JMP emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_avxUnaligned
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_1or2:
|
|
MOVB (DI), R8
|
|
MOVB -1(DI)(SI*1), R9
|
|
MOVB R8, (BX)
|
|
MOVB R9, -1(BX)(SI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsmAvx
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_4:
|
|
MOVL (DI), R8
|
|
MOVL R8, (BX)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsmAvx
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_3:
|
|
MOVW (DI), R8
|
|
MOVB 2(DI), R9
|
|
MOVW R8, (BX)
|
|
MOVB R9, 2(BX)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsmAvx
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_5through7:
|
|
MOVL (DI), R8
|
|
MOVL -4(DI)(SI*1), R9
|
|
MOVL R8, (BX)
|
|
MOVL R9, -4(BX)(SI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsmAvx
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_8:
|
|
MOVQ (DI), R8
|
|
MOVQ R8, (BX)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsmAvx
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_9through16:
|
|
MOVQ (DI), R8
|
|
MOVQ -8(DI)(SI*1), R9
|
|
MOVQ R8, (BX)
|
|
MOVQ R9, -8(BX)(SI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsmAvx
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_17through32:
|
|
MOVOU (DI), X0
|
|
MOVOU -16(DI)(SI*1), X1
|
|
MOVOU X0, (BX)
|
|
MOVOU X1, -16(BX)(SI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsmAvx
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_33through64:
|
|
MOVOU (DI), X0
|
|
MOVOU 16(DI), X1
|
|
MOVOU -32(DI)(SI*1), X2
|
|
MOVOU -16(DI)(SI*1), X3
|
|
MOVOU X0, (BX)
|
|
MOVOU X1, 16(BX)
|
|
MOVOU X2, -32(BX)(SI*1)
|
|
MOVOU X3, -16(BX)(SI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsmAvx
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_65through128:
|
|
MOVOU (DI), X0
|
|
MOVOU 16(DI), X1
|
|
MOVOU 32(DI), X2
|
|
MOVOU 48(DI), X3
|
|
MOVOU -64(DI)(SI*1), X12
|
|
MOVOU -48(DI)(SI*1), X13
|
|
MOVOU -32(DI)(SI*1), X14
|
|
MOVOU -16(DI)(SI*1), X15
|
|
MOVOU X0, (BX)
|
|
MOVOU X1, 16(BX)
|
|
MOVOU X2, 32(BX)
|
|
MOVOU X3, 48(BX)
|
|
MOVOU X12, -64(BX)(SI*1)
|
|
MOVOU X13, -48(BX)(SI*1)
|
|
MOVOU X14, -32(BX)(SI*1)
|
|
MOVOU X15, -16(BX)(SI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsmAvx
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_129through256:
|
|
MOVOU (DI), X0
|
|
MOVOU 16(DI), X1
|
|
MOVOU 32(DI), X2
|
|
MOVOU 48(DI), X3
|
|
MOVOU 64(DI), X4
|
|
MOVOU 80(DI), X5
|
|
MOVOU 96(DI), X6
|
|
MOVOU 112(DI), X7
|
|
MOVOU -128(DI)(SI*1), X8
|
|
MOVOU -112(DI)(SI*1), X9
|
|
MOVOU -96(DI)(SI*1), X10
|
|
MOVOU -80(DI)(SI*1), X11
|
|
MOVOU -64(DI)(SI*1), X12
|
|
MOVOU -48(DI)(SI*1), X13
|
|
MOVOU -32(DI)(SI*1), X14
|
|
MOVOU -16(DI)(SI*1), X15
|
|
MOVOU X0, (BX)
|
|
MOVOU X1, 16(BX)
|
|
MOVOU X2, 32(BX)
|
|
MOVOU X3, 48(BX)
|
|
MOVOU X4, 64(BX)
|
|
MOVOU X5, 80(BX)
|
|
MOVOU X6, 96(BX)
|
|
MOVOU X7, 112(BX)
|
|
MOVOU X8, -128(BX)(SI*1)
|
|
MOVOU X9, -112(BX)(SI*1)
|
|
MOVOU X10, -96(BX)(SI*1)
|
|
MOVOU X11, -80(BX)(SI*1)
|
|
MOVOU X12, -64(BX)(SI*1)
|
|
MOVOU X13, -48(BX)(SI*1)
|
|
MOVOU X14, -32(BX)(SI*1)
|
|
MOVOU X15, -16(BX)(SI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsmAvx
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_256through2048:
|
|
LEAQ -256(SI), SI
|
|
MOVOU (DI), X0
|
|
MOVOU 16(DI), X1
|
|
MOVOU 32(DI), X2
|
|
MOVOU 48(DI), X3
|
|
MOVOU 64(DI), X4
|
|
MOVOU 80(DI), X5
|
|
MOVOU 96(DI), X6
|
|
MOVOU 112(DI), X7
|
|
MOVOU 128(DI), X8
|
|
MOVOU 144(DI), X9
|
|
MOVOU 160(DI), X10
|
|
MOVOU 176(DI), X11
|
|
MOVOU 192(DI), X12
|
|
MOVOU 208(DI), X13
|
|
MOVOU 224(DI), X14
|
|
MOVOU 240(DI), X15
|
|
MOVOU X0, (BX)
|
|
MOVOU X1, 16(BX)
|
|
MOVOU X2, 32(BX)
|
|
MOVOU X3, 48(BX)
|
|
MOVOU X4, 64(BX)
|
|
MOVOU X5, 80(BX)
|
|
MOVOU X6, 96(BX)
|
|
MOVOU X7, 112(BX)
|
|
MOVOU X8, 128(BX)
|
|
MOVOU X9, 144(BX)
|
|
MOVOU X10, 160(BX)
|
|
MOVOU X11, 176(BX)
|
|
MOVOU X12, 192(BX)
|
|
MOVOU X13, 208(BX)
|
|
MOVOU X14, 224(BX)
|
|
MOVOU X15, 240(BX)
|
|
CMPQ SI, $0x00000100
|
|
LEAQ 256(DI), DI
|
|
LEAQ 256(BX), BX
|
|
JGE emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_256through2048
|
|
JMP emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_tail
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_avxUnaligned:
|
|
LEAQ (DI)(SI*1), R9
|
|
MOVQ BX, R11
|
|
MOVOU -128(R9), X5
|
|
MOVOU -112(R9), X6
|
|
MOVQ $0x00000080, R8
|
|
ANDQ $0xffffffe0, BX
|
|
ADDQ $0x20, BX
|
|
MOVOU -96(R9), X7
|
|
MOVOU -80(R9), X8
|
|
MOVQ BX, R10
|
|
SUBQ R11, R10
|
|
MOVOU -64(R9), X9
|
|
MOVOU -48(R9), X10
|
|
SUBQ R10, SI
|
|
MOVOU -32(R9), X11
|
|
MOVOU -16(R9), X12
|
|
VMOVDQU (DI), Y4
|
|
ADDQ R10, DI
|
|
SUBQ R8, SI
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_gobble_128_loop:
|
|
VMOVDQU (DI), Y0
|
|
VMOVDQU 32(DI), Y1
|
|
VMOVDQU 64(DI), Y2
|
|
VMOVDQU 96(DI), Y3
|
|
ADDQ R8, DI
|
|
VMOVDQA Y0, (BX)
|
|
VMOVDQA Y1, 32(BX)
|
|
VMOVDQA Y2, 64(BX)
|
|
VMOVDQA Y3, 96(BX)
|
|
ADDQ R8, BX
|
|
SUBQ R8, SI
|
|
JA emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_gobble_128_loop
|
|
ADDQ R8, SI
|
|
ADDQ BX, SI
|
|
VMOVDQU Y4, (R11)
|
|
VZEROUPPER
|
|
MOVOU X5, -128(SI)
|
|
MOVOU X6, -112(SI)
|
|
MOVOU X7, -96(SI)
|
|
MOVOU X8, -80(SI)
|
|
MOVOU X9, -64(SI)
|
|
MOVOU X10, -48(SI)
|
|
MOVOU X11, -32(SI)
|
|
MOVOU X12, -16(SI)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsmAvx
|
|
MOVQ R8, BX
|
|
|
|
emit_literal_done_repeat_emit_encodeBlockAsmAvx:
|
|
MOVQ BX, dst_base+0(FP)
|
|
|
|
emit_literal_skip_repeat_emit_encodeBlockAsmAvx:
|
|
ADDL $0x05, AX
|
|
MOVL AX, BX
|
|
SUBL 24(SP), BX
|
|
MOVL 16(SP), BX
|
|
SUBL AX, BX
|
|
XORQ DI, DI
|
|
CMPQ BX, $0x08
|
|
JL matchlen_single_repeat_extend
|
|
|
|
matchlen_loopback_repeat_extend:
|
|
MOVQ (CX)(DI*1), SI
|
|
XORQ (CX)(DI*1), SI
|
|
TESTQ SI, SI
|
|
JZ matchlen_loop_repeat_extend
|
|
BSFQ SI, SI
|
|
SARQ $0x03, SI
|
|
LEAQ (DI)(SI*1), DI
|
|
JMP repeat_extend_forward_end_encodeBlockAsmAvx
|
|
|
|
matchlen_loop_repeat_extend:
|
|
LEAQ -8(BX), BX
|
|
LEAQ 8(DI), DI
|
|
CMPQ BX, $0x08
|
|
JGE matchlen_loopback_repeat_extend
|
|
|
|
matchlen_single_repeat_extend:
|
|
TESTQ BX, BX
|
|
JZ repeat_extend_forward_end_encodeBlockAsmAvx
|
|
|
|
matchlen_single_loopback_repeat_extend:
|
|
MOVB (CX)(DI*1), SI
|
|
CMPB (CX)(DI*1), SI
|
|
JNE repeat_extend_forward_end_encodeBlockAsmAvx
|
|
LEAQ 1(DI), DI
|
|
DECQ BX
|
|
JNZ matchlen_single_loopback_repeat_extend
|
|
|
|
repeat_extend_forward_end_encodeBlockAsmAvx:
|
|
ADDL DI, AX
|
|
MOVL AX, BX
|
|
SUBL BP, BX
|
|
MOVL 24(SP), BP
|
|
MOVQ dst_base+0(FP), SI
|
|
MOVL 20(SP), DI
|
|
TESTL DI, DI
|
|
JZ repeat_as_copy_encodeBlockAsmAvx
|
|
|
|
emit_repeat_again_match_repeat_:
|
|
MOVQ BX, DI
|
|
LEAQ -4(BX), BX
|
|
CMPL DI, $0x08
|
|
JLE repeat_two_match_repeat_
|
|
CMPL DI, $0x0c
|
|
JGE cant_repeat_two_offset_match_repeat_
|
|
CMPL BP, $0x00000800
|
|
JLT repeat_two_offset_match_repeat_
|
|
|
|
cant_repeat_two_offset_match_repeat_:
|
|
CMPL BX, $0x00000104
|
|
JLT repeat_three_match_repeat_
|
|
CMPL BX, $0x00010100
|
|
JLT repeat_four_match_repeat_
|
|
CMPL BX, $0x0100ffff
|
|
JLT repeat_five_match_repeat_
|
|
LEAQ -16842747(BX), BX
|
|
MOVW $0x001d, (SI)
|
|
MOVW $0xfffb, 2(SI)
|
|
MOVB $0xff, 4(SI)
|
|
ADDQ $0x05, SI
|
|
JMP emit_repeat_again_match_repeat_
|
|
|
|
repeat_five_match_repeat_:
|
|
LEAQ -65536(BX), BX
|
|
MOVQ BX, BP
|
|
MOVW $0x001d, (SI)
|
|
MOVW BX, 2(SI)
|
|
SARQ $0x10, BP
|
|
MOVB BP, 4(SI)
|
|
ADDQ $0x05, SI
|
|
JMP repeat_end_emit_encodeBlockAsmAvx
|
|
|
|
repeat_four_match_repeat_:
|
|
LEAQ -256(BX), BX
|
|
MOVW $0x0019, (SI)
|
|
MOVW BX, 2(SI)
|
|
ADDQ $0x04, SI
|
|
JMP repeat_end_emit_encodeBlockAsmAvx
|
|
|
|
repeat_three_match_repeat_:
|
|
LEAQ -4(BX), BX
|
|
MOVW $0x0015, (SI)
|
|
MOVB BL, 2(SI)
|
|
ADDQ $0x03, SI
|
|
JMP repeat_end_emit_encodeBlockAsmAvx
|
|
|
|
repeat_two_match_repeat_:
|
|
SHLL $0x02, BX
|
|
ORL $0x01, BX
|
|
MOVW BX, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP repeat_end_emit_encodeBlockAsmAvx
|
|
|
|
repeat_two_offset_match_repeat_:
|
|
XORQ DI, DI
|
|
LEAQ 1(DI)(BX*4), BX
|
|
MOVB BP, 1(SI)
|
|
SARL $0x08, BP
|
|
SHLL $0x05, BP
|
|
ORL BP, BX
|
|
MOVB BL, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP repeat_end_emit_encodeBlockAsmAvx
|
|
|
|
repeat_as_copy_encodeBlockAsmAvx:
|
|
CMPL BP, $0x00010000
|
|
JL two_byte_offset_repeat_as_copy_encodeBlockAsmAvx
|
|
CMPL BX, $0x40
|
|
JLE four_bytes_remain_repeat_as_copy_encodeBlockAsmAvx
|
|
MOVB $0xff, (SI)
|
|
MOVD BP, 1(SI)
|
|
LEAQ -64(BX), BX
|
|
ADDQ $0x05, SI
|
|
CMPL BX, $0x04
|
|
JL four_bytes_remain_repeat_as_copy_encodeBlockAsmAvx
|
|
|
|
emit_repeat_again_repeat_as_copy_encodeBlockAsmAvx_emit_copy:
|
|
MOVQ BX, DI
|
|
LEAQ -4(BX), BX
|
|
CMPL DI, $0x08
|
|
JLE repeat_two_repeat_as_copy_encodeBlockAsmAvx_emit_copy
|
|
CMPL DI, $0x0c
|
|
JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsmAvx_emit_copy
|
|
CMPL BP, $0x00000800
|
|
JLT repeat_two_offset_repeat_as_copy_encodeBlockAsmAvx_emit_copy
|
|
|
|
cant_repeat_two_offset_repeat_as_copy_encodeBlockAsmAvx_emit_copy:
|
|
CMPL BX, $0x00000104
|
|
JLT repeat_three_repeat_as_copy_encodeBlockAsmAvx_emit_copy
|
|
CMPL BX, $0x00010100
|
|
JLT repeat_four_repeat_as_copy_encodeBlockAsmAvx_emit_copy
|
|
CMPL BX, $0x0100ffff
|
|
JLT repeat_five_repeat_as_copy_encodeBlockAsmAvx_emit_copy
|
|
LEAQ -16842747(BX), BX
|
|
MOVW $0x001d, (SI)
|
|
MOVW $0xfffb, 2(SI)
|
|
MOVB $0xff, 4(SI)
|
|
ADDQ $0x05, SI
|
|
JMP emit_repeat_again_repeat_as_copy_encodeBlockAsmAvx_emit_copy
|
|
|
|
repeat_five_repeat_as_copy_encodeBlockAsmAvx_emit_copy:
|
|
LEAQ -65536(BX), BX
|
|
MOVQ BX, BP
|
|
MOVW $0x001d, (SI)
|
|
MOVW BX, 2(SI)
|
|
SARQ $0x10, BP
|
|
MOVB BP, 4(SI)
|
|
ADDQ $0x05, SI
|
|
JMP repeat_end_emit_encodeBlockAsmAvx
|
|
|
|
repeat_four_repeat_as_copy_encodeBlockAsmAvx_emit_copy:
|
|
LEAQ -256(BX), BX
|
|
MOVW $0x0019, (SI)
|
|
MOVW BX, 2(SI)
|
|
ADDQ $0x04, SI
|
|
JMP repeat_end_emit_encodeBlockAsmAvx
|
|
|
|
repeat_three_repeat_as_copy_encodeBlockAsmAvx_emit_copy:
|
|
LEAQ -4(BX), BX
|
|
MOVW $0x0015, (SI)
|
|
MOVB BL, 2(SI)
|
|
ADDQ $0x03, SI
|
|
JMP repeat_end_emit_encodeBlockAsmAvx
|
|
|
|
repeat_two_repeat_as_copy_encodeBlockAsmAvx_emit_copy:
|
|
SHLL $0x02, BX
|
|
ORL $0x01, BX
|
|
MOVW BX, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP repeat_end_emit_encodeBlockAsmAvx
|
|
|
|
repeat_two_offset_repeat_as_copy_encodeBlockAsmAvx_emit_copy:
|
|
XORQ DI, DI
|
|
LEAQ 1(DI)(BX*4), BX
|
|
MOVB BP, 1(SI)
|
|
SARL $0x08, BP
|
|
SHLL $0x05, BP
|
|
ORL BP, BX
|
|
MOVB BL, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP repeat_end_emit_encodeBlockAsmAvx
|
|
|
|
four_bytes_remain_repeat_as_copy_encodeBlockAsmAvx:
|
|
TESTL BX, BX
|
|
JZ repeat_end_emit_encodeBlockAsmAvx
|
|
MOVB $0x03, DL
|
|
LEAQ -4(DX)(BX*4), BX
|
|
MOVB BL, (SI)
|
|
MOVD BP, 1(SI)
|
|
ADDQ $0x05, SI
|
|
JMP repeat_end_emit_encodeBlockAsmAvx
|
|
|
|
two_byte_offset_repeat_as_copy_encodeBlockAsmAvx:
|
|
CMPL BX, $0x40
|
|
JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsmAvx
|
|
MOVB $0xee, (SI)
|
|
MOVW BP, 1(SI)
|
|
LEAQ -60(BX), BX
|
|
ADDQ $0x03, SI
|
|
|
|
emit_repeat_again_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short:
|
|
MOVQ BX, DI
|
|
LEAQ -4(BX), BX
|
|
CMPL DI, $0x08
|
|
JLE repeat_two_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short
|
|
CMPL DI, $0x0c
|
|
JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short
|
|
CMPL BP, $0x00000800
|
|
JLT repeat_two_offset_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short
|
|
|
|
cant_repeat_two_offset_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short:
|
|
CMPL BX, $0x00000104
|
|
JLT repeat_three_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short
|
|
CMPL BX, $0x00010100
|
|
JLT repeat_four_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short
|
|
CMPL BX, $0x0100ffff
|
|
JLT repeat_five_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short
|
|
LEAQ -16842747(BX), BX
|
|
MOVW $0x001d, (SI)
|
|
MOVW $0xfffb, 2(SI)
|
|
MOVB $0xff, 4(SI)
|
|
ADDQ $0x05, SI
|
|
JMP emit_repeat_again_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short
|
|
|
|
repeat_five_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short:
|
|
LEAQ -65536(BX), BX
|
|
MOVQ BX, BP
|
|
MOVW $0x001d, (SI)
|
|
MOVW BX, 2(SI)
|
|
SARQ $0x10, BP
|
|
MOVB BP, 4(SI)
|
|
ADDQ $0x05, SI
|
|
JMP repeat_end_emit_encodeBlockAsmAvx
|
|
|
|
repeat_four_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short:
|
|
LEAQ -256(BX), BX
|
|
MOVW $0x0019, (SI)
|
|
MOVW BX, 2(SI)
|
|
ADDQ $0x04, SI
|
|
JMP repeat_end_emit_encodeBlockAsmAvx
|
|
|
|
repeat_three_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short:
|
|
LEAQ -4(BX), BX
|
|
MOVW $0x0015, (SI)
|
|
MOVB BL, 2(SI)
|
|
ADDQ $0x03, SI
|
|
JMP repeat_end_emit_encodeBlockAsmAvx
|
|
|
|
repeat_two_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short:
|
|
SHLL $0x02, BX
|
|
ORL $0x01, BX
|
|
MOVW BX, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP repeat_end_emit_encodeBlockAsmAvx
|
|
|
|
repeat_two_offset_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short:
|
|
XORQ DI, DI
|
|
LEAQ 1(DI)(BX*4), BX
|
|
MOVB BP, 1(SI)
|
|
SARL $0x08, BP
|
|
SHLL $0x05, BP
|
|
ORL BP, BX
|
|
MOVB BL, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP repeat_end_emit_encodeBlockAsmAvx
|
|
|
|
two_byte_offset_short_repeat_as_copy_encodeBlockAsmAvx:
|
|
CMPL BX, $0x0c
|
|
JGE emit_copy_three_repeat_as_copy_encodeBlockAsmAvx
|
|
CMPL BP, $0x00000800
|
|
JGE emit_copy_three_repeat_as_copy_encodeBlockAsmAvx
|
|
MOVB $0x01, DL
|
|
LEAQ -16(DX)(BX*4), BX
|
|
MOVB BP, 1(SI)
|
|
SHRL $0x08, BP
|
|
SHLL $0x05, BP
|
|
ORL BP, BX
|
|
MOVB BL, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP repeat_end_emit_encodeBlockAsmAvx
|
|
|
|
emit_copy_three_repeat_as_copy_encodeBlockAsmAvx:
|
|
MOVB $0x02, DL
|
|
LEAQ -4(DX)(BX*4), BX
|
|
MOVB BL, (SI)
|
|
MOVW BP, 1(SI)
|
|
ADDQ $0x03, SI
|
|
|
|
repeat_end_emit_encodeBlockAsmAvx:
|
|
MOVQ SI, dst_base+0(FP)
|
|
MOVL 16(SP), BX
|
|
CMPL AX, BX
|
|
JGT emit_remainder_encodeBlockAsmAvx
|
|
JMP search_loop_encodeBlockAsmAvx
|
|
|
|
no_repeat_found_encodeBlockAsmAvx:
|
|
MOVQ $0x0000cf1bbcdcbf9b, R8
|
|
MOVQ BP, DI
|
|
SHRQ $0x10, DI
|
|
SHLQ $0x10, DI
|
|
IMULQ R8, DI
|
|
SHRQ $0x30, DI
|
|
CMPL (CX)(BX*1), BP
|
|
SHRQ $0x08, BP
|
|
JEQ candidate_match_encodeBlockAsmAvx
|
|
MOVL 32(SP)(DI*1), BX
|
|
CMPL (CX)(SI*1), BP
|
|
JEQ candidate2_match_encodeBlockAsmAvx
|
|
LEAQ 2(AX), SI
|
|
MOVL SI, 32(SP)(DI*1)
|
|
SHRQ $0x08, BP
|
|
CMPL (CX)(BX*1), BP
|
|
JEQ candidate3_match_encodeBlockAsmAvx
|
|
MOVL 28(SP), AX
|
|
JMP search_loop_encodeBlockAsmAvx
|
|
|
|
candidate3_match_encodeBlockAsmAvx:
|
|
ADDL $0x02, AX
|
|
JMP candidate_match_encodeBlockAsmAvx
|
|
|
|
candidate2_match_encodeBlockAsmAvx:
|
|
LEAQ -2(AX), BX
|
|
MOVL BX, 32(SP)(DI*1)
|
|
INCL AX
|
|
MOVL SI, BX
|
|
|
|
candidate_match_encodeBlockAsmAvx:
|
|
MOVL 20(SP), BP
|
|
TESTL BX, BX
|
|
JZ match_extend_back_end_encodeBlockAsmAvx
|
|
|
|
match_extend_back_loop_encodeBlockAsmAvx:
|
|
CMPL AX, BP
|
|
JG match_extend_back_end_encodeBlockAsmAvx
|
|
MOVB -1(CX)(BX*1), DL
|
|
MOVB -1(CX)(AX*1), SI
|
|
CMPB DL, SI
|
|
JNE match_extend_back_end_encodeBlockAsmAvx
|
|
LEAL -1(AX), AX
|
|
DECL BX
|
|
JZ match_extend_back_end_encodeBlockAsmAvx
|
|
JMP match_extend_back_loop_encodeBlockAsmAvx
|
|
|
|
match_extend_back_end_encodeBlockAsmAvx:
|
|
MOVL AX, BP
|
|
SUBL 20(SP), BP
|
|
LEAQ dst_base+0(FP)(BP*1), BP
|
|
CMPQ BP, (SP)
|
|
JL match_dst_size_check_encodeBlockAsmAvx
|
|
MOVQ $0x00000000, ret+48(FP)
|
|
RET
|
|
|
|
match_dst_size_check_encodeBlockAsmAvx:
|
|
MOVL BX, BP
|
|
MOVL 20(SP), SI
|
|
CMPL SI, BP
|
|
JEQ emit_literal_skip_match_emit_encodeBlockAsmAvx
|
|
MOVL BP, DI
|
|
MOVL BP, 20(SP)
|
|
LEAQ (CX)(SI*1), BP
|
|
SUBL SI, DI
|
|
MOVQ dst_base+0(FP), SI
|
|
MOVQ DI, R8
|
|
SUBL $0x01, R8
|
|
JC emit_literal_done_match_emit_encodeBlockAsmAvx
|
|
CMPL R8, $0x3c
|
|
JLT one_byte_match_emit_encodeBlockAsmAvx
|
|
CMPL R8, $0x00000100
|
|
JLT two_bytes_match_emit_encodeBlockAsmAvx
|
|
CMPL R8, $0x00010000
|
|
JLT three_bytes_match_emit_encodeBlockAsmAvx
|
|
CMPL R8, $0x01000000
|
|
JLT four_bytes_match_emit_encodeBlockAsmAvx
|
|
MOVB $0xfc, (SI)
|
|
MOVL R8, 1(SI)
|
|
ADDQ $0x05, SI
|
|
JMP memmove_match_emit_encodeBlockAsmAvx
|
|
|
|
four_bytes_match_emit_encodeBlockAsmAvx:
|
|
MOVQ R8, R9
|
|
SHRL $0x10, R9
|
|
MOVB $0xf8, (SI)
|
|
MOVW R8, 1(SI)
|
|
MOVB R9, 3(SI)
|
|
ADDQ $0x04, SI
|
|
JMP memmove_match_emit_encodeBlockAsmAvx
|
|
|
|
three_bytes_match_emit_encodeBlockAsmAvx:
|
|
MOVB $0xf4, (SI)
|
|
MOVW R8, 1(SI)
|
|
ADDQ $0x03, SI
|
|
JMP memmove_match_emit_encodeBlockAsmAvx
|
|
|
|
two_bytes_match_emit_encodeBlockAsmAvx:
|
|
MOVB $0xf0, (SI)
|
|
MOVB R8, 1(SI)
|
|
ADDQ $0x02, SI
|
|
JMP memmove_match_emit_encodeBlockAsmAvx
|
|
|
|
one_byte_match_emit_encodeBlockAsmAvx:
|
|
SHLB $0x02, R8
|
|
MOVB R8, (SI)
|
|
ADDQ $0x01, SI
|
|
|
|
memmove_match_emit_encodeBlockAsmAvx:
|
|
LEAQ (SI)(DI*1), R8
|
|
NOP
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_tail:
|
|
TESTQ DI, DI
|
|
JEQ emit_literal_done_match_emit_encodeBlockAsmAvx
|
|
CMPQ DI, $0x02
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_1or2
|
|
CMPQ DI, $0x04
|
|
JB emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_3
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_4
|
|
CMPQ DI, $0x08
|
|
JB emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_5through7
|
|
JE emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_8
|
|
CMPQ DI, $0x10
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_9through16
|
|
CMPQ DI, $0x20
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_17through32
|
|
CMPQ DI, $0x40
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_33through64
|
|
CMPQ DI, $0x80
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_65through128
|
|
CMPQ DI, $0x00000100
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_129through256
|
|
JMP emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_avxUnaligned
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_1or2:
|
|
MOVB (BP), R8
|
|
MOVB -1(BP)(DI*1), R9
|
|
MOVB R8, (SI)
|
|
MOVB R9, -1(SI)(DI*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsmAvx
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_4:
|
|
MOVL (BP), R8
|
|
MOVL R8, (SI)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsmAvx
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_3:
|
|
MOVW (BP), R8
|
|
MOVB 2(BP), R9
|
|
MOVW R8, (SI)
|
|
MOVB R9, 2(SI)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsmAvx
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_5through7:
|
|
MOVL (BP), R8
|
|
MOVL -4(BP)(DI*1), R9
|
|
MOVL R8, (SI)
|
|
MOVL R9, -4(SI)(DI*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsmAvx
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_8:
|
|
MOVQ (BP), R8
|
|
MOVQ R8, (SI)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsmAvx
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_9through16:
|
|
MOVQ (BP), R8
|
|
MOVQ -8(BP)(DI*1), R9
|
|
MOVQ R8, (SI)
|
|
MOVQ R9, -8(SI)(DI*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsmAvx
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_17through32:
|
|
MOVOU (BP), X0
|
|
MOVOU -16(BP)(DI*1), X1
|
|
MOVOU X0, (SI)
|
|
MOVOU X1, -16(SI)(DI*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsmAvx
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_33through64:
|
|
MOVOU (BP), X0
|
|
MOVOU 16(BP), X1
|
|
MOVOU -32(BP)(DI*1), X2
|
|
MOVOU -16(BP)(DI*1), X3
|
|
MOVOU X0, (SI)
|
|
MOVOU X1, 16(SI)
|
|
MOVOU X2, -32(SI)(DI*1)
|
|
MOVOU X3, -16(SI)(DI*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsmAvx
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_65through128:
|
|
MOVOU (BP), X0
|
|
MOVOU 16(BP), X1
|
|
MOVOU 32(BP), X2
|
|
MOVOU 48(BP), X3
|
|
MOVOU -64(BP)(DI*1), X12
|
|
MOVOU -48(BP)(DI*1), X13
|
|
MOVOU -32(BP)(DI*1), X14
|
|
MOVOU -16(BP)(DI*1), X15
|
|
MOVOU X0, (SI)
|
|
MOVOU X1, 16(SI)
|
|
MOVOU X2, 32(SI)
|
|
MOVOU X3, 48(SI)
|
|
MOVOU X12, -64(SI)(DI*1)
|
|
MOVOU X13, -48(SI)(DI*1)
|
|
MOVOU X14, -32(SI)(DI*1)
|
|
MOVOU X15, -16(SI)(DI*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsmAvx
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_129through256:
|
|
MOVOU (BP), X0
|
|
MOVOU 16(BP), X1
|
|
MOVOU 32(BP), X2
|
|
MOVOU 48(BP), X3
|
|
MOVOU 64(BP), X4
|
|
MOVOU 80(BP), X5
|
|
MOVOU 96(BP), X6
|
|
MOVOU 112(BP), X7
|
|
MOVOU -128(BP)(DI*1), X8
|
|
MOVOU -112(BP)(DI*1), X9
|
|
MOVOU -96(BP)(DI*1), X10
|
|
MOVOU -80(BP)(DI*1), X11
|
|
MOVOU -64(BP)(DI*1), X12
|
|
MOVOU -48(BP)(DI*1), X13
|
|
MOVOU -32(BP)(DI*1), X14
|
|
MOVOU -16(BP)(DI*1), X15
|
|
MOVOU X0, (SI)
|
|
MOVOU X1, 16(SI)
|
|
MOVOU X2, 32(SI)
|
|
MOVOU X3, 48(SI)
|
|
MOVOU X4, 64(SI)
|
|
MOVOU X5, 80(SI)
|
|
MOVOU X6, 96(SI)
|
|
MOVOU X7, 112(SI)
|
|
MOVOU X8, -128(SI)(DI*1)
|
|
MOVOU X9, -112(SI)(DI*1)
|
|
MOVOU X10, -96(SI)(DI*1)
|
|
MOVOU X11, -80(SI)(DI*1)
|
|
MOVOU X12, -64(SI)(DI*1)
|
|
MOVOU X13, -48(SI)(DI*1)
|
|
MOVOU X14, -32(SI)(DI*1)
|
|
MOVOU X15, -16(SI)(DI*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsmAvx
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_256through2048:
|
|
LEAQ -256(DI), DI
|
|
MOVOU (BP), X0
|
|
MOVOU 16(BP), X1
|
|
MOVOU 32(BP), X2
|
|
MOVOU 48(BP), X3
|
|
MOVOU 64(BP), X4
|
|
MOVOU 80(BP), X5
|
|
MOVOU 96(BP), X6
|
|
MOVOU 112(BP), X7
|
|
MOVOU 128(BP), X8
|
|
MOVOU 144(BP), X9
|
|
MOVOU 160(BP), X10
|
|
MOVOU 176(BP), X11
|
|
MOVOU 192(BP), X12
|
|
MOVOU 208(BP), X13
|
|
MOVOU 224(BP), X14
|
|
MOVOU 240(BP), X15
|
|
MOVOU X0, (SI)
|
|
MOVOU X1, 16(SI)
|
|
MOVOU X2, 32(SI)
|
|
MOVOU X3, 48(SI)
|
|
MOVOU X4, 64(SI)
|
|
MOVOU X5, 80(SI)
|
|
MOVOU X6, 96(SI)
|
|
MOVOU X7, 112(SI)
|
|
MOVOU X8, 128(SI)
|
|
MOVOU X9, 144(SI)
|
|
MOVOU X10, 160(SI)
|
|
MOVOU X11, 176(SI)
|
|
MOVOU X12, 192(SI)
|
|
MOVOU X13, 208(SI)
|
|
MOVOU X14, 224(SI)
|
|
MOVOU X15, 240(SI)
|
|
CMPQ DI, $0x00000100
|
|
LEAQ 256(BP), BP
|
|
LEAQ 256(SI), SI
|
|
JGE emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_256through2048
|
|
JMP emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_tail
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_avxUnaligned:
|
|
LEAQ (BP)(DI*1), R9
|
|
MOVQ SI, R11
|
|
MOVOU -128(R9), X5
|
|
MOVOU -112(R9), X6
|
|
MOVQ $0x00000080, R8
|
|
ANDQ $0xffffffe0, SI
|
|
ADDQ $0x20, SI
|
|
MOVOU -96(R9), X7
|
|
MOVOU -80(R9), X8
|
|
MOVQ SI, R10
|
|
SUBQ R11, R10
|
|
MOVOU -64(R9), X9
|
|
MOVOU -48(R9), X10
|
|
SUBQ R10, DI
|
|
MOVOU -32(R9), X11
|
|
MOVOU -16(R9), X12
|
|
VMOVDQU (BP), Y4
|
|
ADDQ R10, BP
|
|
SUBQ R8, DI
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_gobble_128_loop:
|
|
VMOVDQU (BP), Y0
|
|
VMOVDQU 32(BP), Y1
|
|
VMOVDQU 64(BP), Y2
|
|
VMOVDQU 96(BP), Y3
|
|
ADDQ R8, BP
|
|
VMOVDQA Y0, (SI)
|
|
VMOVDQA Y1, 32(SI)
|
|
VMOVDQA Y2, 64(SI)
|
|
VMOVDQA Y3, 96(SI)
|
|
ADDQ R8, SI
|
|
SUBQ R8, DI
|
|
JA emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_gobble_128_loop
|
|
ADDQ R8, DI
|
|
ADDQ SI, DI
|
|
VMOVDQU Y4, (R11)
|
|
VZEROUPPER
|
|
MOVOU X5, -128(DI)
|
|
MOVOU X6, -112(DI)
|
|
MOVOU X7, -96(DI)
|
|
MOVOU X8, -80(DI)
|
|
MOVOU X9, -64(DI)
|
|
MOVOU X10, -48(DI)
|
|
MOVOU X11, -32(DI)
|
|
MOVOU X12, -16(DI)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsmAvx
|
|
MOVQ R8, SI
|
|
|
|
emit_literal_done_match_emit_encodeBlockAsmAvx:
|
|
MOVQ SI, dst_base+0(FP)
|
|
|
|
emit_literal_skip_match_emit_encodeBlockAsmAvx:
|
|
NOP
|
|
|
|
match_nolit_loop_encodeBlockAsmAvx:
|
|
MOVL AX, BP
|
|
MOVL AX, BP
|
|
SUBL BX, BP
|
|
MOVL BP, 24(SP)
|
|
ADDL $0x04, AX
|
|
ADDL $0x04, BX
|
|
MOVL 16(SP), BP
|
|
SUBL AX, BP
|
|
XORQ DI, DI
|
|
CMPQ BP, $0x08
|
|
JL matchlen_single_match_nolit_encodeBlockAsmAvx
|
|
|
|
matchlen_loopback_match_nolit_encodeBlockAsmAvx:
|
|
MOVQ (CX)(DI*1), SI
|
|
XORQ (CX)(DI*1), SI
|
|
TESTQ SI, SI
|
|
JZ matchlen_loop_match_nolit_encodeBlockAsmAvx
|
|
BSFQ SI, SI
|
|
SARQ $0x03, SI
|
|
LEAQ (DI)(SI*1), DI
|
|
JMP match_nolit_end_encodeBlockAsmAvx
|
|
|
|
matchlen_loop_match_nolit_encodeBlockAsmAvx:
|
|
LEAQ -8(BP), BP
|
|
LEAQ 8(DI), DI
|
|
CMPQ BP, $0x08
|
|
JGE matchlen_loopback_match_nolit_encodeBlockAsmAvx
|
|
|
|
matchlen_single_match_nolit_encodeBlockAsmAvx:
|
|
TESTQ BP, BP
|
|
JZ match_nolit_end_encodeBlockAsmAvx
|
|
|
|
matchlen_single_loopback_match_nolit_encodeBlockAsmAvx:
|
|
MOVB (CX)(DI*1), SI
|
|
CMPB (CX)(DI*1), SI
|
|
JNE match_nolit_end_encodeBlockAsmAvx
|
|
LEAQ 1(DI), DI
|
|
DECQ BP
|
|
JNZ matchlen_single_loopback_match_nolit_encodeBlockAsmAvx
|
|
|
|
match_nolit_end_encodeBlockAsmAvx:
|
|
MOVL 24(SP), BP
|
|
ADDQ $0x04, DI
|
|
MOVQ dst_base+0(FP), SI
|
|
ADDL DI, AX
|
|
CMPL BP, $0x00010000
|
|
JL two_byte_offset_match_nolit_encodeBlockAsmAvx
|
|
CMPL DI, $0x40
|
|
JLE four_bytes_remain_match_nolit_encodeBlockAsmAvx
|
|
MOVB $0xff, (SI)
|
|
MOVD BP, 1(SI)
|
|
LEAQ -64(DI), DI
|
|
ADDQ $0x05, SI
|
|
CMPL DI, $0x04
|
|
JL four_bytes_remain_match_nolit_encodeBlockAsmAvx
|
|
|
|
emit_repeat_again_match_nolit_encodeBlockAsmAvx_emit_copy:
|
|
MOVQ DI, R8
|
|
LEAQ -4(DI), DI
|
|
CMPL R8, $0x08
|
|
JLE repeat_two_match_nolit_encodeBlockAsmAvx_emit_copy
|
|
CMPL R8, $0x0c
|
|
JGE cant_repeat_two_offset_match_nolit_encodeBlockAsmAvx_emit_copy
|
|
CMPL BP, $0x00000800
|
|
JLT repeat_two_offset_match_nolit_encodeBlockAsmAvx_emit_copy
|
|
|
|
cant_repeat_two_offset_match_nolit_encodeBlockAsmAvx_emit_copy:
|
|
CMPL DI, $0x00000104
|
|
JLT repeat_three_match_nolit_encodeBlockAsmAvx_emit_copy
|
|
CMPL DI, $0x00010100
|
|
JLT repeat_four_match_nolit_encodeBlockAsmAvx_emit_copy
|
|
CMPL DI, $0x0100ffff
|
|
JLT repeat_five_match_nolit_encodeBlockAsmAvx_emit_copy
|
|
LEAQ -16842747(DI), DI
|
|
MOVW $0x001d, (SI)
|
|
MOVW $0xfffb, 2(SI)
|
|
MOVB $0xff, 4(SI)
|
|
ADDQ $0x05, SI
|
|
JMP emit_repeat_again_match_nolit_encodeBlockAsmAvx_emit_copy
|
|
|
|
repeat_five_match_nolit_encodeBlockAsmAvx_emit_copy:
|
|
LEAQ -65536(DI), DI
|
|
MOVQ DI, BP
|
|
MOVW $0x001d, (SI)
|
|
MOVW DI, 2(SI)
|
|
SARQ $0x10, BP
|
|
MOVB BP, 4(SI)
|
|
ADDQ $0x05, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsmAvx
|
|
|
|
repeat_four_match_nolit_encodeBlockAsmAvx_emit_copy:
|
|
LEAQ -256(DI), DI
|
|
MOVW $0x0019, (SI)
|
|
MOVW DI, 2(SI)
|
|
ADDQ $0x04, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsmAvx
|
|
|
|
repeat_three_match_nolit_encodeBlockAsmAvx_emit_copy:
|
|
LEAQ -4(DI), DI
|
|
MOVW $0x0015, (SI)
|
|
MOVB DI, 2(SI)
|
|
ADDQ $0x03, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsmAvx
|
|
|
|
repeat_two_match_nolit_encodeBlockAsmAvx_emit_copy:
|
|
SHLL $0x02, DI
|
|
ORL $0x01, DI
|
|
MOVW DI, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsmAvx
|
|
|
|
repeat_two_offset_match_nolit_encodeBlockAsmAvx_emit_copy:
|
|
XORQ R8, R8
|
|
LEAQ 1(R8)(DI*4), DI
|
|
MOVB BP, 1(SI)
|
|
SARL $0x08, BP
|
|
SHLL $0x05, BP
|
|
ORL BP, DI
|
|
MOVB DI, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsmAvx
|
|
|
|
four_bytes_remain_match_nolit_encodeBlockAsmAvx:
|
|
TESTL DI, DI
|
|
JZ match_nolit_emitcopy_end_encodeBlockAsmAvx
|
|
MOVB $0x03, DL
|
|
LEAQ -4(DX)(DI*4), DI
|
|
MOVB DI, (SI)
|
|
MOVD BP, 1(SI)
|
|
ADDQ $0x05, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsmAvx
|
|
|
|
two_byte_offset_match_nolit_encodeBlockAsmAvx:
|
|
CMPL DI, $0x40
|
|
JLE two_byte_offset_short_match_nolit_encodeBlockAsmAvx
|
|
MOVB $0xee, (SI)
|
|
MOVW BP, 1(SI)
|
|
LEAQ -60(DI), DI
|
|
ADDQ $0x03, SI
|
|
|
|
emit_repeat_again_match_nolit_encodeBlockAsmAvx_emit_copy_short:
|
|
MOVQ DI, R8
|
|
LEAQ -4(DI), DI
|
|
CMPL R8, $0x08
|
|
JLE repeat_two_match_nolit_encodeBlockAsmAvx_emit_copy_short
|
|
CMPL R8, $0x0c
|
|
JGE cant_repeat_two_offset_match_nolit_encodeBlockAsmAvx_emit_copy_short
|
|
CMPL BP, $0x00000800
|
|
JLT repeat_two_offset_match_nolit_encodeBlockAsmAvx_emit_copy_short
|
|
|
|
cant_repeat_two_offset_match_nolit_encodeBlockAsmAvx_emit_copy_short:
|
|
CMPL DI, $0x00000104
|
|
JLT repeat_three_match_nolit_encodeBlockAsmAvx_emit_copy_short
|
|
CMPL DI, $0x00010100
|
|
JLT repeat_four_match_nolit_encodeBlockAsmAvx_emit_copy_short
|
|
CMPL DI, $0x0100ffff
|
|
JLT repeat_five_match_nolit_encodeBlockAsmAvx_emit_copy_short
|
|
LEAQ -16842747(DI), DI
|
|
MOVW $0x001d, (SI)
|
|
MOVW $0xfffb, 2(SI)
|
|
MOVB $0xff, 4(SI)
|
|
ADDQ $0x05, SI
|
|
JMP emit_repeat_again_match_nolit_encodeBlockAsmAvx_emit_copy_short
|
|
|
|
repeat_five_match_nolit_encodeBlockAsmAvx_emit_copy_short:
|
|
LEAQ -65536(DI), DI
|
|
MOVQ DI, BP
|
|
MOVW $0x001d, (SI)
|
|
MOVW DI, 2(SI)
|
|
SARQ $0x10, BP
|
|
MOVB BP, 4(SI)
|
|
ADDQ $0x05, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsmAvx
|
|
|
|
repeat_four_match_nolit_encodeBlockAsmAvx_emit_copy_short:
|
|
LEAQ -256(DI), DI
|
|
MOVW $0x0019, (SI)
|
|
MOVW DI, 2(SI)
|
|
ADDQ $0x04, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsmAvx
|
|
|
|
repeat_three_match_nolit_encodeBlockAsmAvx_emit_copy_short:
|
|
LEAQ -4(DI), DI
|
|
MOVW $0x0015, (SI)
|
|
MOVB DI, 2(SI)
|
|
ADDQ $0x03, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsmAvx
|
|
|
|
repeat_two_match_nolit_encodeBlockAsmAvx_emit_copy_short:
|
|
SHLL $0x02, DI
|
|
ORL $0x01, DI
|
|
MOVW DI, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsmAvx
|
|
|
|
repeat_two_offset_match_nolit_encodeBlockAsmAvx_emit_copy_short:
|
|
XORQ R8, R8
|
|
LEAQ 1(R8)(DI*4), DI
|
|
MOVB BP, 1(SI)
|
|
SARL $0x08, BP
|
|
SHLL $0x05, BP
|
|
ORL BP, DI
|
|
MOVB DI, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsmAvx
|
|
|
|
two_byte_offset_short_match_nolit_encodeBlockAsmAvx:
|
|
CMPL DI, $0x0c
|
|
JGE emit_copy_three_match_nolit_encodeBlockAsmAvx
|
|
CMPL BP, $0x00000800
|
|
JGE emit_copy_three_match_nolit_encodeBlockAsmAvx
|
|
MOVB $0x01, DL
|
|
LEAQ -16(DX)(DI*4), DI
|
|
MOVB BP, 1(SI)
|
|
SHRL $0x08, BP
|
|
SHLL $0x05, BP
|
|
ORL BP, DI
|
|
MOVB DI, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsmAvx
|
|
|
|
emit_copy_three_match_nolit_encodeBlockAsmAvx:
|
|
MOVB $0x02, DL
|
|
LEAQ -4(DX)(DI*4), DI
|
|
MOVB DI, (SI)
|
|
MOVW BP, 1(SI)
|
|
ADDQ $0x03, SI
|
|
|
|
match_nolit_emitcopy_end_encodeBlockAsmAvx:
|
|
MOVQ SI, dst_base+0(FP)
|
|
MOVL AX, 20(SP)
|
|
CMPL AX, 16(SP)
|
|
JGE emit_remainder_encodeBlockAsmAvx
|
|
CMPQ SI, (SP)
|
|
JL match_nolit_dst_ok_encodeBlockAsmAvx
|
|
MOVQ $0x00000000, ret+48(FP)
|
|
RET
|
|
|
|
match_nolit_dst_ok_encodeBlockAsmAvx:
|
|
MOVQ -2(CX)(AX*1), BP
|
|
MOVQ $0x0000cf1bbcdcbf9b, SI
|
|
MOVQ BP, DI
|
|
SHRQ $0x10, BP
|
|
MOVQ BP, R8
|
|
SHLQ $0x10, DI
|
|
IMULQ SI, DI
|
|
SHRQ $0x30, DI
|
|
SHLQ $0x10, R8
|
|
IMULQ SI, R8
|
|
SHRQ $0x30, R8
|
|
MOVL 32(SP)(DI*1), SI
|
|
MOVL 32(SP)(R8*1), SI
|
|
LEAQ -2(AX), SI
|
|
MOVL SI, 32(SP)(DI*1)
|
|
MOVL AX, 32(SP)(R8*1)
|
|
CMPL (CX)(R8*1), BP
|
|
JEQ match_nolit_loop_encodeBlockAsmAvx
|
|
INCL AX
|
|
JMP search_loop_encodeBlockAsmAvx
|
|
|
|
emit_remainder_encodeBlockAsmAvx:
|
|
MOVQ src_len+32(FP), AX
|
|
SUBL 20(SP), AX
|
|
MOVQ dst_base+0(FP), DX
|
|
LEAQ (DX)(AX*1), DX
|
|
CMPQ DX, (SP)
|
|
JL emit_remainder_ok_encodeBlockAsmAvx
|
|
MOVQ $0x00000000, ret+48(FP)
|
|
RET
|
|
|
|
emit_remainder_ok_encodeBlockAsmAvx:
|
|
MOVQ src_len+32(FP), AX
|
|
MOVL 20(SP), DX
|
|
CMPL DX, AX
|
|
JEQ emit_literal_skip_emit_remainder_encodeBlockAsmAvx
|
|
MOVL AX, BX
|
|
MOVL AX, 20(SP)
|
|
LEAQ (CX)(DX*1), AX
|
|
SUBL DX, BX
|
|
MOVQ dst_base+0(FP), CX
|
|
MOVQ BX, DX
|
|
SUBL $0x01, DX
|
|
JC emit_literal_done_emit_remainder_encodeBlockAsmAvx
|
|
CMPL DX, $0x3c
|
|
JLT one_byte_emit_remainder_encodeBlockAsmAvx
|
|
CMPL DX, $0x00000100
|
|
JLT two_bytes_emit_remainder_encodeBlockAsmAvx
|
|
CMPL DX, $0x00010000
|
|
JLT three_bytes_emit_remainder_encodeBlockAsmAvx
|
|
CMPL DX, $0x01000000
|
|
JLT four_bytes_emit_remainder_encodeBlockAsmAvx
|
|
MOVB $0xfc, (CX)
|
|
MOVL DX, 1(CX)
|
|
ADDQ $0x05, CX
|
|
JMP memmove_emit_remainder_encodeBlockAsmAvx
|
|
|
|
four_bytes_emit_remainder_encodeBlockAsmAvx:
|
|
MOVQ DX, BP
|
|
SHRL $0x10, BP
|
|
MOVB $0xf8, (CX)
|
|
MOVW DX, 1(CX)
|
|
MOVB BP, 3(CX)
|
|
ADDQ $0x04, CX
|
|
JMP memmove_emit_remainder_encodeBlockAsmAvx
|
|
|
|
three_bytes_emit_remainder_encodeBlockAsmAvx:
|
|
MOVB $0xf4, (CX)
|
|
MOVW DX, 1(CX)
|
|
ADDQ $0x03, CX
|
|
JMP memmove_emit_remainder_encodeBlockAsmAvx
|
|
|
|
two_bytes_emit_remainder_encodeBlockAsmAvx:
|
|
MOVB $0xf0, (CX)
|
|
MOVB DL, 1(CX)
|
|
ADDQ $0x02, CX
|
|
JMP memmove_emit_remainder_encodeBlockAsmAvx
|
|
|
|
one_byte_emit_remainder_encodeBlockAsmAvx:
|
|
SHLB $0x02, DL
|
|
MOVB DL, (CX)
|
|
ADDQ $0x01, CX
|
|
|
|
memmove_emit_remainder_encodeBlockAsmAvx:
|
|
LEAQ (CX)(BX*1), DX
|
|
NOP
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_tail:
|
|
TESTQ BX, BX
|
|
JEQ emit_literal_done_emit_remainder_encodeBlockAsmAvx
|
|
CMPQ BX, $0x02
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_1or2
|
|
CMPQ BX, $0x04
|
|
JB emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_3
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_4
|
|
CMPQ BX, $0x08
|
|
JB emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_5through7
|
|
JE emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_8
|
|
CMPQ BX, $0x10
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_9through16
|
|
CMPQ BX, $0x20
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_17through32
|
|
CMPQ BX, $0x40
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_33through64
|
|
CMPQ BX, $0x80
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_65through128
|
|
CMPQ BX, $0x00000100
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_129through256
|
|
JMP emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_avxUnaligned
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_1or2:
|
|
MOVB (AX), DL
|
|
MOVB -1(AX)(BX*1), BP
|
|
MOVB DL, (CX)
|
|
MOVB BP, -1(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsmAvx
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_4:
|
|
MOVL (AX), DX
|
|
MOVL DX, (CX)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsmAvx
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_3:
|
|
MOVW (AX), DX
|
|
MOVB 2(AX), BP
|
|
MOVW DX, (CX)
|
|
MOVB BP, 2(CX)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsmAvx
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_5through7:
|
|
MOVL (AX), DX
|
|
MOVL -4(AX)(BX*1), BP
|
|
MOVL DX, (CX)
|
|
MOVL BP, -4(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsmAvx
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_8:
|
|
MOVQ (AX), DX
|
|
MOVQ DX, (CX)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsmAvx
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_9through16:
|
|
MOVQ (AX), DX
|
|
MOVQ -8(AX)(BX*1), BP
|
|
MOVQ DX, (CX)
|
|
MOVQ BP, -8(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsmAvx
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_17through32:
|
|
MOVOU (AX), X0
|
|
MOVOU -16(AX)(BX*1), X1
|
|
MOVOU X0, (CX)
|
|
MOVOU X1, -16(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsmAvx
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_33through64:
|
|
MOVOU (AX), X0
|
|
MOVOU 16(AX), X1
|
|
MOVOU -32(AX)(BX*1), X2
|
|
MOVOU -16(AX)(BX*1), X3
|
|
MOVOU X0, (CX)
|
|
MOVOU X1, 16(CX)
|
|
MOVOU X2, -32(CX)(BX*1)
|
|
MOVOU X3, -16(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsmAvx
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_65through128:
|
|
MOVOU (AX), X0
|
|
MOVOU 16(AX), X1
|
|
MOVOU 32(AX), X2
|
|
MOVOU 48(AX), X3
|
|
MOVOU -64(AX)(BX*1), X12
|
|
MOVOU -48(AX)(BX*1), X13
|
|
MOVOU -32(AX)(BX*1), X14
|
|
MOVOU -16(AX)(BX*1), X15
|
|
MOVOU X0, (CX)
|
|
MOVOU X1, 16(CX)
|
|
MOVOU X2, 32(CX)
|
|
MOVOU X3, 48(CX)
|
|
MOVOU X12, -64(CX)(BX*1)
|
|
MOVOU X13, -48(CX)(BX*1)
|
|
MOVOU X14, -32(CX)(BX*1)
|
|
MOVOU X15, -16(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsmAvx
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_129through256:
|
|
MOVOU (AX), X0
|
|
MOVOU 16(AX), X1
|
|
MOVOU 32(AX), X2
|
|
MOVOU 48(AX), X3
|
|
MOVOU 64(AX), X4
|
|
MOVOU 80(AX), X5
|
|
MOVOU 96(AX), X6
|
|
MOVOU 112(AX), X7
|
|
MOVOU -128(AX)(BX*1), X8
|
|
MOVOU -112(AX)(BX*1), X9
|
|
MOVOU -96(AX)(BX*1), X10
|
|
MOVOU -80(AX)(BX*1), X11
|
|
MOVOU -64(AX)(BX*1), X12
|
|
MOVOU -48(AX)(BX*1), X13
|
|
MOVOU -32(AX)(BX*1), X14
|
|
MOVOU -16(AX)(BX*1), X15
|
|
MOVOU X0, (CX)
|
|
MOVOU X1, 16(CX)
|
|
MOVOU X2, 32(CX)
|
|
MOVOU X3, 48(CX)
|
|
MOVOU X4, 64(CX)
|
|
MOVOU X5, 80(CX)
|
|
MOVOU X6, 96(CX)
|
|
MOVOU X7, 112(CX)
|
|
MOVOU X8, -128(CX)(BX*1)
|
|
MOVOU X9, -112(CX)(BX*1)
|
|
MOVOU X10, -96(CX)(BX*1)
|
|
MOVOU X11, -80(CX)(BX*1)
|
|
MOVOU X12, -64(CX)(BX*1)
|
|
MOVOU X13, -48(CX)(BX*1)
|
|
MOVOU X14, -32(CX)(BX*1)
|
|
MOVOU X15, -16(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsmAvx
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_256through2048:
|
|
LEAQ -256(BX), BX
|
|
MOVOU (AX), X0
|
|
MOVOU 16(AX), X1
|
|
MOVOU 32(AX), X2
|
|
MOVOU 48(AX), X3
|
|
MOVOU 64(AX), X4
|
|
MOVOU 80(AX), X5
|
|
MOVOU 96(AX), X6
|
|
MOVOU 112(AX), X7
|
|
MOVOU 128(AX), X8
|
|
MOVOU 144(AX), X9
|
|
MOVOU 160(AX), X10
|
|
MOVOU 176(AX), X11
|
|
MOVOU 192(AX), X12
|
|
MOVOU 208(AX), X13
|
|
MOVOU 224(AX), X14
|
|
MOVOU 240(AX), X15
|
|
MOVOU X0, (CX)
|
|
MOVOU X1, 16(CX)
|
|
MOVOU X2, 32(CX)
|
|
MOVOU X3, 48(CX)
|
|
MOVOU X4, 64(CX)
|
|
MOVOU X5, 80(CX)
|
|
MOVOU X6, 96(CX)
|
|
MOVOU X7, 112(CX)
|
|
MOVOU X8, 128(CX)
|
|
MOVOU X9, 144(CX)
|
|
MOVOU X10, 160(CX)
|
|
MOVOU X11, 176(CX)
|
|
MOVOU X12, 192(CX)
|
|
MOVOU X13, 208(CX)
|
|
MOVOU X14, 224(CX)
|
|
MOVOU X15, 240(CX)
|
|
CMPQ BX, $0x00000100
|
|
LEAQ 256(AX), AX
|
|
LEAQ 256(CX), CX
|
|
JGE emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_256through2048
|
|
JMP emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_tail
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_avxUnaligned:
|
|
LEAQ (AX)(BX*1), BP
|
|
MOVQ CX, DI
|
|
MOVOU -128(BP), X5
|
|
MOVOU -112(BP), X6
|
|
MOVQ $0x00000080, DX
|
|
ANDQ $0xffffffe0, CX
|
|
ADDQ $0x20, CX
|
|
MOVOU -96(BP), X7
|
|
MOVOU -80(BP), X8
|
|
MOVQ CX, SI
|
|
SUBQ DI, SI
|
|
MOVOU -64(BP), X9
|
|
MOVOU -48(BP), X10
|
|
SUBQ SI, BX
|
|
MOVOU -32(BP), X11
|
|
MOVOU -16(BP), X12
|
|
VMOVDQU (AX), Y4
|
|
ADDQ SI, AX
|
|
SUBQ DX, BX
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_gobble_128_loop:
|
|
VMOVDQU (AX), Y0
|
|
VMOVDQU 32(AX), Y1
|
|
VMOVDQU 64(AX), Y2
|
|
VMOVDQU 96(AX), Y3
|
|
ADDQ DX, AX
|
|
VMOVDQA Y0, (CX)
|
|
VMOVDQA Y1, 32(CX)
|
|
VMOVDQA Y2, 64(CX)
|
|
VMOVDQA Y3, 96(CX)
|
|
ADDQ DX, CX
|
|
SUBQ DX, BX
|
|
JA emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_gobble_128_loop
|
|
ADDQ DX, BX
|
|
ADDQ CX, BX
|
|
VMOVDQU Y4, (DI)
|
|
VZEROUPPER
|
|
MOVOU X5, -128(BX)
|
|
MOVOU X6, -112(BX)
|
|
MOVOU X7, -96(BX)
|
|
MOVOU X8, -80(BX)
|
|
MOVOU X9, -64(BX)
|
|
MOVOU X10, -48(BX)
|
|
MOVOU X11, -32(BX)
|
|
MOVOU X12, -16(BX)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsmAvx
|
|
MOVQ DX, CX
|
|
|
|
emit_literal_done_emit_remainder_encodeBlockAsmAvx:
|
|
MOVQ CX, dst_base+0(FP)
|
|
|
|
emit_literal_skip_emit_remainder_encodeBlockAsmAvx:
|
|
MOVQ 8(SP), AX
|
|
SUBQ dst_base+0(FP), AX
|
|
MOVQ AX, ret+48(FP)
|
|
RET
|
|
|
|
// func encodeBlockAsm14BAvx(dst []byte, src []byte) int
|
|
// Requires: AVX, SSE2
|
|
TEXT ·encodeBlockAsm14BAvx(SB), $16416-56
|
|
MOVQ $0x00000080, AX
|
|
LEAQ 32(SP), CX
|
|
PXOR X0, X0
|
|
|
|
zero_loop_encodeBlockAsm14BAvx:
|
|
MOVOU X0, (CX)
|
|
MOVOU X0, 16(CX)
|
|
MOVOU X0, 32(CX)
|
|
MOVOU X0, 48(CX)
|
|
MOVOU X0, 64(CX)
|
|
MOVOU X0, 80(CX)
|
|
MOVOU X0, 96(CX)
|
|
MOVOU X0, 112(CX)
|
|
ADDQ $0x80, CX
|
|
DECQ AX
|
|
JNZ zero_loop_encodeBlockAsm14BAvx
|
|
MOVL AX, 20(SP)
|
|
MOVQ src_len+32(FP), AX
|
|
LEAQ -5(AX), CX
|
|
LEAQ -8(AX), BX
|
|
SHRQ $0x05, AX
|
|
SUBL AX, CX
|
|
MOVL BX, 16(SP)
|
|
MOVQ dst_base+0(FP), AX
|
|
MOVQ AX, 8(SP)
|
|
LEAQ (AX)(CX*1), CX
|
|
MOVQ CX, (SP)
|
|
MOVL $0x00000001, AX
|
|
MOVL AX, 24(SP)
|
|
MOVQ src_base+24(FP), CX
|
|
|
|
search_loop_encodeBlockAsm14BAvx:
|
|
MOVQ (CX)(AX*1), BP
|
|
MOVL AX, BX
|
|
SUBL 20(SP), BX
|
|
SHRL $0x05, BX
|
|
LEAQ 4(AX)(BX*1), BX
|
|
MOVL 16(SP), SI
|
|
CMPL BX, SI
|
|
JGT emit_remainder_encodeBlockAsm14BAvx
|
|
MOVL BX, 28(SP)
|
|
MOVQ $0x0000cf1bbcdcbf9b, BX
|
|
MOVQ BP, DI
|
|
MOVQ BP, R8
|
|
SHRQ $0x08, R8
|
|
SHLQ $0x10, DI
|
|
IMULQ BX, DI
|
|
SHRQ $0x32, DI
|
|
SHLQ $0x10, R8
|
|
IMULQ BX, R8
|
|
SHRQ $0x32, R8
|
|
MOVL 32(SP)(DI*1), BX
|
|
MOVL 32(SP)(R8*1), SI
|
|
MOVL AX, 32(SP)(DI*1)
|
|
LEAL 1(AX), DI
|
|
MOVL DI, 32(SP)(R8*1)
|
|
MOVL AX, DI
|
|
SUBL 24(SP), DI
|
|
MOVL 1(CX)(DI*1), R9
|
|
MOVQ BP, R8
|
|
SHLQ $0x08, R8
|
|
CMPL R8, R9
|
|
JNE no_repeat_found_encodeBlockAsm14BAvx
|
|
LEAQ 1(AX), BP
|
|
MOVL 20(SP), BX
|
|
TESTL DI, DI
|
|
JZ repeat_extend_back_end_encodeBlockAsm14BAvx
|
|
|
|
repeat_extend_back_loop_encodeBlockAsm14BAvx:
|
|
CMPL BP, BX
|
|
JG repeat_extend_back_end_encodeBlockAsm14BAvx
|
|
MOVB -1(CX)(DI*1), DL
|
|
MOVB -1(CX)(BP*1), SI
|
|
CMPB DL, SI
|
|
JNE repeat_extend_back_end_encodeBlockAsm14BAvx
|
|
LEAQ -1(BP), BP
|
|
DECL DI
|
|
JZ repeat_extend_back_end_encodeBlockAsm14BAvx
|
|
JMP repeat_extend_back_loop_encodeBlockAsm14BAvx
|
|
|
|
repeat_extend_back_end_encodeBlockAsm14BAvx:
|
|
MOVL 20(SP), BX
|
|
CMPL BX, BP
|
|
JEQ emit_literal_skip_repeat_emit_encodeBlockAsm14BAvx
|
|
MOVL BP, SI
|
|
MOVL BP, 20(SP)
|
|
LEAQ (CX)(BX*1), DI
|
|
SUBL BX, SI
|
|
MOVQ dst_base+0(FP), BX
|
|
MOVQ SI, R8
|
|
SUBL $0x01, R8
|
|
JC emit_literal_done_repeat_emit_encodeBlockAsm14BAvx
|
|
CMPL R8, $0x3c
|
|
JLT one_byte_repeat_emit_encodeBlockAsm14BAvx
|
|
CMPL R8, $0x00000100
|
|
JLT two_bytes_repeat_emit_encodeBlockAsm14BAvx
|
|
CMPL R8, $0x00010000
|
|
JLT three_bytes_repeat_emit_encodeBlockAsm14BAvx
|
|
CMPL R8, $0x01000000
|
|
JLT four_bytes_repeat_emit_encodeBlockAsm14BAvx
|
|
MOVB $0xfc, (BX)
|
|
MOVL R8, 1(BX)
|
|
ADDQ $0x05, BX
|
|
JMP memmove_repeat_emit_encodeBlockAsm14BAvx
|
|
|
|
four_bytes_repeat_emit_encodeBlockAsm14BAvx:
|
|
MOVQ R8, R9
|
|
SHRL $0x10, R9
|
|
MOVB $0xf8, (BX)
|
|
MOVW R8, 1(BX)
|
|
MOVB R9, 3(BX)
|
|
ADDQ $0x04, BX
|
|
JMP memmove_repeat_emit_encodeBlockAsm14BAvx
|
|
|
|
three_bytes_repeat_emit_encodeBlockAsm14BAvx:
|
|
MOVB $0xf4, (BX)
|
|
MOVW R8, 1(BX)
|
|
ADDQ $0x03, BX
|
|
JMP memmove_repeat_emit_encodeBlockAsm14BAvx
|
|
|
|
two_bytes_repeat_emit_encodeBlockAsm14BAvx:
|
|
MOVB $0xf0, (BX)
|
|
MOVB R8, 1(BX)
|
|
ADDQ $0x02, BX
|
|
JMP memmove_repeat_emit_encodeBlockAsm14BAvx
|
|
|
|
one_byte_repeat_emit_encodeBlockAsm14BAvx:
|
|
SHLB $0x02, R8
|
|
MOVB R8, (BX)
|
|
ADDQ $0x01, BX
|
|
|
|
memmove_repeat_emit_encodeBlockAsm14BAvx:
|
|
LEAQ (BX)(SI*1), R8
|
|
NOP
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_tail:
|
|
TESTQ SI, SI
|
|
JEQ emit_literal_done_repeat_emit_encodeBlockAsm14BAvx
|
|
CMPQ SI, $0x02
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_1or2
|
|
CMPQ SI, $0x04
|
|
JB emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_3
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_4
|
|
CMPQ SI, $0x08
|
|
JB emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_5through7
|
|
JE emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_8
|
|
CMPQ SI, $0x10
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_9through16
|
|
CMPQ SI, $0x20
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_17through32
|
|
CMPQ SI, $0x40
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_33through64
|
|
CMPQ SI, $0x80
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_65through128
|
|
CMPQ SI, $0x00000100
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_129through256
|
|
JMP emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_avxUnaligned
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_1or2:
|
|
MOVB (DI), R8
|
|
MOVB -1(DI)(SI*1), R9
|
|
MOVB R8, (BX)
|
|
MOVB R9, -1(BX)(SI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm14BAvx
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_4:
|
|
MOVL (DI), R8
|
|
MOVL R8, (BX)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm14BAvx
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_3:
|
|
MOVW (DI), R8
|
|
MOVB 2(DI), R9
|
|
MOVW R8, (BX)
|
|
MOVB R9, 2(BX)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm14BAvx
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_5through7:
|
|
MOVL (DI), R8
|
|
MOVL -4(DI)(SI*1), R9
|
|
MOVL R8, (BX)
|
|
MOVL R9, -4(BX)(SI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm14BAvx
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_8:
|
|
MOVQ (DI), R8
|
|
MOVQ R8, (BX)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm14BAvx
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_9through16:
|
|
MOVQ (DI), R8
|
|
MOVQ -8(DI)(SI*1), R9
|
|
MOVQ R8, (BX)
|
|
MOVQ R9, -8(BX)(SI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm14BAvx
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_17through32:
|
|
MOVOU (DI), X0
|
|
MOVOU -16(DI)(SI*1), X1
|
|
MOVOU X0, (BX)
|
|
MOVOU X1, -16(BX)(SI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm14BAvx
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_33through64:
|
|
MOVOU (DI), X0
|
|
MOVOU 16(DI), X1
|
|
MOVOU -32(DI)(SI*1), X2
|
|
MOVOU -16(DI)(SI*1), X3
|
|
MOVOU X0, (BX)
|
|
MOVOU X1, 16(BX)
|
|
MOVOU X2, -32(BX)(SI*1)
|
|
MOVOU X3, -16(BX)(SI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm14BAvx
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_65through128:
|
|
MOVOU (DI), X0
|
|
MOVOU 16(DI), X1
|
|
MOVOU 32(DI), X2
|
|
MOVOU 48(DI), X3
|
|
MOVOU -64(DI)(SI*1), X12
|
|
MOVOU -48(DI)(SI*1), X13
|
|
MOVOU -32(DI)(SI*1), X14
|
|
MOVOU -16(DI)(SI*1), X15
|
|
MOVOU X0, (BX)
|
|
MOVOU X1, 16(BX)
|
|
MOVOU X2, 32(BX)
|
|
MOVOU X3, 48(BX)
|
|
MOVOU X12, -64(BX)(SI*1)
|
|
MOVOU X13, -48(BX)(SI*1)
|
|
MOVOU X14, -32(BX)(SI*1)
|
|
MOVOU X15, -16(BX)(SI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm14BAvx
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_129through256:
|
|
MOVOU (DI), X0
|
|
MOVOU 16(DI), X1
|
|
MOVOU 32(DI), X2
|
|
MOVOU 48(DI), X3
|
|
MOVOU 64(DI), X4
|
|
MOVOU 80(DI), X5
|
|
MOVOU 96(DI), X6
|
|
MOVOU 112(DI), X7
|
|
MOVOU -128(DI)(SI*1), X8
|
|
MOVOU -112(DI)(SI*1), X9
|
|
MOVOU -96(DI)(SI*1), X10
|
|
MOVOU -80(DI)(SI*1), X11
|
|
MOVOU -64(DI)(SI*1), X12
|
|
MOVOU -48(DI)(SI*1), X13
|
|
MOVOU -32(DI)(SI*1), X14
|
|
MOVOU -16(DI)(SI*1), X15
|
|
MOVOU X0, (BX)
|
|
MOVOU X1, 16(BX)
|
|
MOVOU X2, 32(BX)
|
|
MOVOU X3, 48(BX)
|
|
MOVOU X4, 64(BX)
|
|
MOVOU X5, 80(BX)
|
|
MOVOU X6, 96(BX)
|
|
MOVOU X7, 112(BX)
|
|
MOVOU X8, -128(BX)(SI*1)
|
|
MOVOU X9, -112(BX)(SI*1)
|
|
MOVOU X10, -96(BX)(SI*1)
|
|
MOVOU X11, -80(BX)(SI*1)
|
|
MOVOU X12, -64(BX)(SI*1)
|
|
MOVOU X13, -48(BX)(SI*1)
|
|
MOVOU X14, -32(BX)(SI*1)
|
|
MOVOU X15, -16(BX)(SI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm14BAvx
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_256through2048:
|
|
LEAQ -256(SI), SI
|
|
MOVOU (DI), X0
|
|
MOVOU 16(DI), X1
|
|
MOVOU 32(DI), X2
|
|
MOVOU 48(DI), X3
|
|
MOVOU 64(DI), X4
|
|
MOVOU 80(DI), X5
|
|
MOVOU 96(DI), X6
|
|
MOVOU 112(DI), X7
|
|
MOVOU 128(DI), X8
|
|
MOVOU 144(DI), X9
|
|
MOVOU 160(DI), X10
|
|
MOVOU 176(DI), X11
|
|
MOVOU 192(DI), X12
|
|
MOVOU 208(DI), X13
|
|
MOVOU 224(DI), X14
|
|
MOVOU 240(DI), X15
|
|
MOVOU X0, (BX)
|
|
MOVOU X1, 16(BX)
|
|
MOVOU X2, 32(BX)
|
|
MOVOU X3, 48(BX)
|
|
MOVOU X4, 64(BX)
|
|
MOVOU X5, 80(BX)
|
|
MOVOU X6, 96(BX)
|
|
MOVOU X7, 112(BX)
|
|
MOVOU X8, 128(BX)
|
|
MOVOU X9, 144(BX)
|
|
MOVOU X10, 160(BX)
|
|
MOVOU X11, 176(BX)
|
|
MOVOU X12, 192(BX)
|
|
MOVOU X13, 208(BX)
|
|
MOVOU X14, 224(BX)
|
|
MOVOU X15, 240(BX)
|
|
CMPQ SI, $0x00000100
|
|
LEAQ 256(DI), DI
|
|
LEAQ 256(BX), BX
|
|
JGE emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_256through2048
|
|
JMP emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_tail
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_avxUnaligned:
|
|
LEAQ (DI)(SI*1), R9
|
|
MOVQ BX, R11
|
|
MOVOU -128(R9), X5
|
|
MOVOU -112(R9), X6
|
|
MOVQ $0x00000080, R8
|
|
ANDQ $0xffffffe0, BX
|
|
ADDQ $0x20, BX
|
|
MOVOU -96(R9), X7
|
|
MOVOU -80(R9), X8
|
|
MOVQ BX, R10
|
|
SUBQ R11, R10
|
|
MOVOU -64(R9), X9
|
|
MOVOU -48(R9), X10
|
|
SUBQ R10, SI
|
|
MOVOU -32(R9), X11
|
|
MOVOU -16(R9), X12
|
|
VMOVDQU (DI), Y4
|
|
ADDQ R10, DI
|
|
SUBQ R8, SI
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_gobble_128_loop:
|
|
VMOVDQU (DI), Y0
|
|
VMOVDQU 32(DI), Y1
|
|
VMOVDQU 64(DI), Y2
|
|
VMOVDQU 96(DI), Y3
|
|
ADDQ R8, DI
|
|
VMOVDQA Y0, (BX)
|
|
VMOVDQA Y1, 32(BX)
|
|
VMOVDQA Y2, 64(BX)
|
|
VMOVDQA Y3, 96(BX)
|
|
ADDQ R8, BX
|
|
SUBQ R8, SI
|
|
JA emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_gobble_128_loop
|
|
ADDQ R8, SI
|
|
ADDQ BX, SI
|
|
VMOVDQU Y4, (R11)
|
|
VZEROUPPER
|
|
MOVOU X5, -128(SI)
|
|
MOVOU X6, -112(SI)
|
|
MOVOU X7, -96(SI)
|
|
MOVOU X8, -80(SI)
|
|
MOVOU X9, -64(SI)
|
|
MOVOU X10, -48(SI)
|
|
MOVOU X11, -32(SI)
|
|
MOVOU X12, -16(SI)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm14BAvx
|
|
MOVQ R8, BX
|
|
|
|
emit_literal_done_repeat_emit_encodeBlockAsm14BAvx:
|
|
MOVQ BX, dst_base+0(FP)
|
|
|
|
emit_literal_skip_repeat_emit_encodeBlockAsm14BAvx:
|
|
ADDL $0x05, AX
|
|
MOVL AX, BX
|
|
SUBL 24(SP), BX
|
|
MOVL 16(SP), BX
|
|
SUBL AX, BX
|
|
XORQ DI, DI
|
|
CMPQ BX, $0x08
|
|
JL matchlen_single_repeat_extend
|
|
|
|
matchlen_loopback_repeat_extend:
|
|
MOVQ (CX)(DI*1), SI
|
|
XORQ (CX)(DI*1), SI
|
|
TESTQ SI, SI
|
|
JZ matchlen_loop_repeat_extend
|
|
BSFQ SI, SI
|
|
SARQ $0x03, SI
|
|
LEAQ (DI)(SI*1), DI
|
|
JMP repeat_extend_forward_end_encodeBlockAsm14BAvx
|
|
|
|
matchlen_loop_repeat_extend:
|
|
LEAQ -8(BX), BX
|
|
LEAQ 8(DI), DI
|
|
CMPQ BX, $0x08
|
|
JGE matchlen_loopback_repeat_extend
|
|
|
|
matchlen_single_repeat_extend:
|
|
TESTQ BX, BX
|
|
JZ repeat_extend_forward_end_encodeBlockAsm14BAvx
|
|
|
|
matchlen_single_loopback_repeat_extend:
|
|
MOVB (CX)(DI*1), SI
|
|
CMPB (CX)(DI*1), SI
|
|
JNE repeat_extend_forward_end_encodeBlockAsm14BAvx
|
|
LEAQ 1(DI), DI
|
|
DECQ BX
|
|
JNZ matchlen_single_loopback_repeat_extend
|
|
|
|
repeat_extend_forward_end_encodeBlockAsm14BAvx:
|
|
ADDL DI, AX
|
|
MOVL AX, BX
|
|
SUBL BP, BX
|
|
MOVL 24(SP), BP
|
|
MOVQ dst_base+0(FP), SI
|
|
MOVL 20(SP), DI
|
|
TESTL DI, DI
|
|
JZ repeat_as_copy_encodeBlockAsm14BAvx
|
|
|
|
emit_repeat_again_match_repeat_:
|
|
MOVQ BX, DI
|
|
LEAQ -4(BX), BX
|
|
CMPL DI, $0x08
|
|
JLE repeat_two_match_repeat_
|
|
CMPL DI, $0x0c
|
|
JGE cant_repeat_two_offset_match_repeat_
|
|
CMPL BP, $0x00000800
|
|
JLT repeat_two_offset_match_repeat_
|
|
|
|
cant_repeat_two_offset_match_repeat_:
|
|
CMPL BX, $0x00000104
|
|
JLT repeat_three_match_repeat_
|
|
CMPL BX, $0x00010100
|
|
JLT repeat_four_match_repeat_
|
|
CMPL BX, $0x0100ffff
|
|
JLT repeat_five_match_repeat_
|
|
LEAQ -16842747(BX), BX
|
|
MOVW $0x001d, (SI)
|
|
MOVW $0xfffb, 2(SI)
|
|
MOVB $0xff, 4(SI)
|
|
ADDQ $0x05, SI
|
|
JMP emit_repeat_again_match_repeat_
|
|
|
|
repeat_five_match_repeat_:
|
|
LEAQ -65536(BX), BX
|
|
MOVQ BX, BP
|
|
MOVW $0x001d, (SI)
|
|
MOVW BX, 2(SI)
|
|
SARQ $0x10, BP
|
|
MOVB BP, 4(SI)
|
|
ADDQ $0x05, SI
|
|
JMP repeat_end_emit_encodeBlockAsm14BAvx
|
|
|
|
repeat_four_match_repeat_:
|
|
LEAQ -256(BX), BX
|
|
MOVW $0x0019, (SI)
|
|
MOVW BX, 2(SI)
|
|
ADDQ $0x04, SI
|
|
JMP repeat_end_emit_encodeBlockAsm14BAvx
|
|
|
|
repeat_three_match_repeat_:
|
|
LEAQ -4(BX), BX
|
|
MOVW $0x0015, (SI)
|
|
MOVB BL, 2(SI)
|
|
ADDQ $0x03, SI
|
|
JMP repeat_end_emit_encodeBlockAsm14BAvx
|
|
|
|
repeat_two_match_repeat_:
|
|
SHLL $0x02, BX
|
|
ORL $0x01, BX
|
|
MOVW BX, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP repeat_end_emit_encodeBlockAsm14BAvx
|
|
|
|
repeat_two_offset_match_repeat_:
|
|
XORQ DI, DI
|
|
LEAQ 1(DI)(BX*4), BX
|
|
MOVB BP, 1(SI)
|
|
SARL $0x08, BP
|
|
SHLL $0x05, BP
|
|
ORL BP, BX
|
|
MOVB BL, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP repeat_end_emit_encodeBlockAsm14BAvx
|
|
|
|
repeat_as_copy_encodeBlockAsm14BAvx:
|
|
CMPL BP, $0x00010000
|
|
JL two_byte_offset_repeat_as_copy_encodeBlockAsm14BAvx
|
|
CMPL BX, $0x40
|
|
JLE four_bytes_remain_repeat_as_copy_encodeBlockAsm14BAvx
|
|
MOVB $0xff, (SI)
|
|
MOVD BP, 1(SI)
|
|
LEAQ -64(BX), BX
|
|
ADDQ $0x05, SI
|
|
CMPL BX, $0x04
|
|
JL four_bytes_remain_repeat_as_copy_encodeBlockAsm14BAvx
|
|
|
|
emit_repeat_again_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy:
|
|
MOVQ BX, DI
|
|
LEAQ -4(BX), BX
|
|
CMPL DI, $0x08
|
|
JLE repeat_two_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy
|
|
CMPL DI, $0x0c
|
|
JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy
|
|
CMPL BP, $0x00000800
|
|
JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy
|
|
|
|
cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy:
|
|
CMPL BX, $0x00000104
|
|
JLT repeat_three_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy
|
|
CMPL BX, $0x00010100
|
|
JLT repeat_four_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy
|
|
CMPL BX, $0x0100ffff
|
|
JLT repeat_five_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy
|
|
LEAQ -16842747(BX), BX
|
|
MOVW $0x001d, (SI)
|
|
MOVW $0xfffb, 2(SI)
|
|
MOVB $0xff, 4(SI)
|
|
ADDQ $0x05, SI
|
|
JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy
|
|
|
|
repeat_five_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy:
|
|
LEAQ -65536(BX), BX
|
|
MOVQ BX, BP
|
|
MOVW $0x001d, (SI)
|
|
MOVW BX, 2(SI)
|
|
SARQ $0x10, BP
|
|
MOVB BP, 4(SI)
|
|
ADDQ $0x05, SI
|
|
JMP repeat_end_emit_encodeBlockAsm14BAvx
|
|
|
|
repeat_four_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy:
|
|
LEAQ -256(BX), BX
|
|
MOVW $0x0019, (SI)
|
|
MOVW BX, 2(SI)
|
|
ADDQ $0x04, SI
|
|
JMP repeat_end_emit_encodeBlockAsm14BAvx
|
|
|
|
repeat_three_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy:
|
|
LEAQ -4(BX), BX
|
|
MOVW $0x0015, (SI)
|
|
MOVB BL, 2(SI)
|
|
ADDQ $0x03, SI
|
|
JMP repeat_end_emit_encodeBlockAsm14BAvx
|
|
|
|
repeat_two_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy:
|
|
SHLL $0x02, BX
|
|
ORL $0x01, BX
|
|
MOVW BX, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP repeat_end_emit_encodeBlockAsm14BAvx
|
|
|
|
repeat_two_offset_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy:
|
|
XORQ DI, DI
|
|
LEAQ 1(DI)(BX*4), BX
|
|
MOVB BP, 1(SI)
|
|
SARL $0x08, BP
|
|
SHLL $0x05, BP
|
|
ORL BP, BX
|
|
MOVB BL, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP repeat_end_emit_encodeBlockAsm14BAvx
|
|
|
|
four_bytes_remain_repeat_as_copy_encodeBlockAsm14BAvx:
|
|
TESTL BX, BX
|
|
JZ repeat_end_emit_encodeBlockAsm14BAvx
|
|
MOVB $0x03, DL
|
|
LEAQ -4(DX)(BX*4), BX
|
|
MOVB BL, (SI)
|
|
MOVD BP, 1(SI)
|
|
ADDQ $0x05, SI
|
|
JMP repeat_end_emit_encodeBlockAsm14BAvx
|
|
|
|
two_byte_offset_repeat_as_copy_encodeBlockAsm14BAvx:
|
|
CMPL BX, $0x40
|
|
JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm14BAvx
|
|
MOVB $0xee, (SI)
|
|
MOVW BP, 1(SI)
|
|
LEAQ -60(BX), BX
|
|
ADDQ $0x03, SI
|
|
|
|
emit_repeat_again_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short:
|
|
MOVQ BX, DI
|
|
LEAQ -4(BX), BX
|
|
CMPL DI, $0x08
|
|
JLE repeat_two_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short
|
|
CMPL DI, $0x0c
|
|
JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short
|
|
CMPL BP, $0x00000800
|
|
JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short
|
|
|
|
cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short:
|
|
CMPL BX, $0x00000104
|
|
JLT repeat_three_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short
|
|
CMPL BX, $0x00010100
|
|
JLT repeat_four_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short
|
|
CMPL BX, $0x0100ffff
|
|
JLT repeat_five_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short
|
|
LEAQ -16842747(BX), BX
|
|
MOVW $0x001d, (SI)
|
|
MOVW $0xfffb, 2(SI)
|
|
MOVB $0xff, 4(SI)
|
|
ADDQ $0x05, SI
|
|
JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short
|
|
|
|
repeat_five_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short:
|
|
LEAQ -65536(BX), BX
|
|
MOVQ BX, BP
|
|
MOVW $0x001d, (SI)
|
|
MOVW BX, 2(SI)
|
|
SARQ $0x10, BP
|
|
MOVB BP, 4(SI)
|
|
ADDQ $0x05, SI
|
|
JMP repeat_end_emit_encodeBlockAsm14BAvx
|
|
|
|
repeat_four_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short:
|
|
LEAQ -256(BX), BX
|
|
MOVW $0x0019, (SI)
|
|
MOVW BX, 2(SI)
|
|
ADDQ $0x04, SI
|
|
JMP repeat_end_emit_encodeBlockAsm14BAvx
|
|
|
|
repeat_three_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short:
|
|
LEAQ -4(BX), BX
|
|
MOVW $0x0015, (SI)
|
|
MOVB BL, 2(SI)
|
|
ADDQ $0x03, SI
|
|
JMP repeat_end_emit_encodeBlockAsm14BAvx
|
|
|
|
repeat_two_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short:
|
|
SHLL $0x02, BX
|
|
ORL $0x01, BX
|
|
MOVW BX, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP repeat_end_emit_encodeBlockAsm14BAvx
|
|
|
|
repeat_two_offset_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short:
|
|
XORQ DI, DI
|
|
LEAQ 1(DI)(BX*4), BX
|
|
MOVB BP, 1(SI)
|
|
SARL $0x08, BP
|
|
SHLL $0x05, BP
|
|
ORL BP, BX
|
|
MOVB BL, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP repeat_end_emit_encodeBlockAsm14BAvx
|
|
|
|
two_byte_offset_short_repeat_as_copy_encodeBlockAsm14BAvx:
|
|
CMPL BX, $0x0c
|
|
JGE emit_copy_three_repeat_as_copy_encodeBlockAsm14BAvx
|
|
CMPL BP, $0x00000800
|
|
JGE emit_copy_three_repeat_as_copy_encodeBlockAsm14BAvx
|
|
MOVB $0x01, DL
|
|
LEAQ -16(DX)(BX*4), BX
|
|
MOVB BP, 1(SI)
|
|
SHRL $0x08, BP
|
|
SHLL $0x05, BP
|
|
ORL BP, BX
|
|
MOVB BL, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP repeat_end_emit_encodeBlockAsm14BAvx
|
|
|
|
emit_copy_three_repeat_as_copy_encodeBlockAsm14BAvx:
|
|
MOVB $0x02, DL
|
|
LEAQ -4(DX)(BX*4), BX
|
|
MOVB BL, (SI)
|
|
MOVW BP, 1(SI)
|
|
ADDQ $0x03, SI
|
|
|
|
repeat_end_emit_encodeBlockAsm14BAvx:
|
|
MOVQ SI, dst_base+0(FP)
|
|
MOVL 16(SP), BX
|
|
CMPL AX, BX
|
|
JGT emit_remainder_encodeBlockAsm14BAvx
|
|
JMP search_loop_encodeBlockAsm14BAvx
|
|
|
|
no_repeat_found_encodeBlockAsm14BAvx:
|
|
MOVQ $0x0000cf1bbcdcbf9b, R8
|
|
MOVQ BP, DI
|
|
SHRQ $0x10, DI
|
|
SHLQ $0x10, DI
|
|
IMULQ R8, DI
|
|
SHRQ $0x32, DI
|
|
CMPL (CX)(BX*1), BP
|
|
SHRQ $0x08, BP
|
|
JEQ candidate_match_encodeBlockAsm14BAvx
|
|
MOVL 32(SP)(DI*1), BX
|
|
CMPL (CX)(SI*1), BP
|
|
JEQ candidate2_match_encodeBlockAsm14BAvx
|
|
LEAQ 2(AX), SI
|
|
MOVL SI, 32(SP)(DI*1)
|
|
SHRQ $0x08, BP
|
|
CMPL (CX)(BX*1), BP
|
|
JEQ candidate3_match_encodeBlockAsm14BAvx
|
|
MOVL 28(SP), AX
|
|
JMP search_loop_encodeBlockAsm14BAvx
|
|
|
|
candidate3_match_encodeBlockAsm14BAvx:
|
|
ADDL $0x02, AX
|
|
JMP candidate_match_encodeBlockAsm14BAvx
|
|
|
|
candidate2_match_encodeBlockAsm14BAvx:
|
|
LEAQ -2(AX), BX
|
|
MOVL BX, 32(SP)(DI*1)
|
|
INCL AX
|
|
MOVL SI, BX
|
|
|
|
candidate_match_encodeBlockAsm14BAvx:
|
|
MOVL 20(SP), BP
|
|
TESTL BX, BX
|
|
JZ match_extend_back_end_encodeBlockAsm14BAvx
|
|
|
|
match_extend_back_loop_encodeBlockAsm14BAvx:
|
|
CMPL AX, BP
|
|
JG match_extend_back_end_encodeBlockAsm14BAvx
|
|
MOVB -1(CX)(BX*1), DL
|
|
MOVB -1(CX)(AX*1), SI
|
|
CMPB DL, SI
|
|
JNE match_extend_back_end_encodeBlockAsm14BAvx
|
|
LEAL -1(AX), AX
|
|
DECL BX
|
|
JZ match_extend_back_end_encodeBlockAsm14BAvx
|
|
JMP match_extend_back_loop_encodeBlockAsm14BAvx
|
|
|
|
match_extend_back_end_encodeBlockAsm14BAvx:
|
|
MOVL AX, BP
|
|
SUBL 20(SP), BP
|
|
LEAQ dst_base+0(FP)(BP*1), BP
|
|
CMPQ BP, (SP)
|
|
JL match_dst_size_check_encodeBlockAsm14BAvx
|
|
MOVQ $0x00000000, ret+48(FP)
|
|
RET
|
|
|
|
match_dst_size_check_encodeBlockAsm14BAvx:
|
|
MOVL BX, BP
|
|
MOVL 20(SP), SI
|
|
CMPL SI, BP
|
|
JEQ emit_literal_skip_match_emit_encodeBlockAsm14BAvx
|
|
MOVL BP, DI
|
|
MOVL BP, 20(SP)
|
|
LEAQ (CX)(SI*1), BP
|
|
SUBL SI, DI
|
|
MOVQ dst_base+0(FP), SI
|
|
MOVQ DI, R8
|
|
SUBL $0x01, R8
|
|
JC emit_literal_done_match_emit_encodeBlockAsm14BAvx
|
|
CMPL R8, $0x3c
|
|
JLT one_byte_match_emit_encodeBlockAsm14BAvx
|
|
CMPL R8, $0x00000100
|
|
JLT two_bytes_match_emit_encodeBlockAsm14BAvx
|
|
CMPL R8, $0x00010000
|
|
JLT three_bytes_match_emit_encodeBlockAsm14BAvx
|
|
CMPL R8, $0x01000000
|
|
JLT four_bytes_match_emit_encodeBlockAsm14BAvx
|
|
MOVB $0xfc, (SI)
|
|
MOVL R8, 1(SI)
|
|
ADDQ $0x05, SI
|
|
JMP memmove_match_emit_encodeBlockAsm14BAvx
|
|
|
|
four_bytes_match_emit_encodeBlockAsm14BAvx:
|
|
MOVQ R8, R9
|
|
SHRL $0x10, R9
|
|
MOVB $0xf8, (SI)
|
|
MOVW R8, 1(SI)
|
|
MOVB R9, 3(SI)
|
|
ADDQ $0x04, SI
|
|
JMP memmove_match_emit_encodeBlockAsm14BAvx
|
|
|
|
three_bytes_match_emit_encodeBlockAsm14BAvx:
|
|
MOVB $0xf4, (SI)
|
|
MOVW R8, 1(SI)
|
|
ADDQ $0x03, SI
|
|
JMP memmove_match_emit_encodeBlockAsm14BAvx
|
|
|
|
two_bytes_match_emit_encodeBlockAsm14BAvx:
|
|
MOVB $0xf0, (SI)
|
|
MOVB R8, 1(SI)
|
|
ADDQ $0x02, SI
|
|
JMP memmove_match_emit_encodeBlockAsm14BAvx
|
|
|
|
one_byte_match_emit_encodeBlockAsm14BAvx:
|
|
SHLB $0x02, R8
|
|
MOVB R8, (SI)
|
|
ADDQ $0x01, SI
|
|
|
|
memmove_match_emit_encodeBlockAsm14BAvx:
|
|
LEAQ (SI)(DI*1), R8
|
|
NOP
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_tail:
|
|
TESTQ DI, DI
|
|
JEQ emit_literal_done_match_emit_encodeBlockAsm14BAvx
|
|
CMPQ DI, $0x02
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_1or2
|
|
CMPQ DI, $0x04
|
|
JB emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_3
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_4
|
|
CMPQ DI, $0x08
|
|
JB emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_5through7
|
|
JE emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_8
|
|
CMPQ DI, $0x10
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_9through16
|
|
CMPQ DI, $0x20
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_17through32
|
|
CMPQ DI, $0x40
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_33through64
|
|
CMPQ DI, $0x80
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_65through128
|
|
CMPQ DI, $0x00000100
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_129through256
|
|
JMP emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_avxUnaligned
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_1or2:
|
|
MOVB (BP), R8
|
|
MOVB -1(BP)(DI*1), R9
|
|
MOVB R8, (SI)
|
|
MOVB R9, -1(SI)(DI*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm14BAvx
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_4:
|
|
MOVL (BP), R8
|
|
MOVL R8, (SI)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm14BAvx
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_3:
|
|
MOVW (BP), R8
|
|
MOVB 2(BP), R9
|
|
MOVW R8, (SI)
|
|
MOVB R9, 2(SI)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm14BAvx
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_5through7:
|
|
MOVL (BP), R8
|
|
MOVL -4(BP)(DI*1), R9
|
|
MOVL R8, (SI)
|
|
MOVL R9, -4(SI)(DI*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm14BAvx
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_8:
|
|
MOVQ (BP), R8
|
|
MOVQ R8, (SI)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm14BAvx
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_9through16:
|
|
MOVQ (BP), R8
|
|
MOVQ -8(BP)(DI*1), R9
|
|
MOVQ R8, (SI)
|
|
MOVQ R9, -8(SI)(DI*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm14BAvx
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_17through32:
|
|
MOVOU (BP), X0
|
|
MOVOU -16(BP)(DI*1), X1
|
|
MOVOU X0, (SI)
|
|
MOVOU X1, -16(SI)(DI*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm14BAvx
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_33through64:
|
|
MOVOU (BP), X0
|
|
MOVOU 16(BP), X1
|
|
MOVOU -32(BP)(DI*1), X2
|
|
MOVOU -16(BP)(DI*1), X3
|
|
MOVOU X0, (SI)
|
|
MOVOU X1, 16(SI)
|
|
MOVOU X2, -32(SI)(DI*1)
|
|
MOVOU X3, -16(SI)(DI*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm14BAvx
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_65through128:
|
|
MOVOU (BP), X0
|
|
MOVOU 16(BP), X1
|
|
MOVOU 32(BP), X2
|
|
MOVOU 48(BP), X3
|
|
MOVOU -64(BP)(DI*1), X12
|
|
MOVOU -48(BP)(DI*1), X13
|
|
MOVOU -32(BP)(DI*1), X14
|
|
MOVOU -16(BP)(DI*1), X15
|
|
MOVOU X0, (SI)
|
|
MOVOU X1, 16(SI)
|
|
MOVOU X2, 32(SI)
|
|
MOVOU X3, 48(SI)
|
|
MOVOU X12, -64(SI)(DI*1)
|
|
MOVOU X13, -48(SI)(DI*1)
|
|
MOVOU X14, -32(SI)(DI*1)
|
|
MOVOU X15, -16(SI)(DI*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm14BAvx
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_129through256:
|
|
MOVOU (BP), X0
|
|
MOVOU 16(BP), X1
|
|
MOVOU 32(BP), X2
|
|
MOVOU 48(BP), X3
|
|
MOVOU 64(BP), X4
|
|
MOVOU 80(BP), X5
|
|
MOVOU 96(BP), X6
|
|
MOVOU 112(BP), X7
|
|
MOVOU -128(BP)(DI*1), X8
|
|
MOVOU -112(BP)(DI*1), X9
|
|
MOVOU -96(BP)(DI*1), X10
|
|
MOVOU -80(BP)(DI*1), X11
|
|
MOVOU -64(BP)(DI*1), X12
|
|
MOVOU -48(BP)(DI*1), X13
|
|
MOVOU -32(BP)(DI*1), X14
|
|
MOVOU -16(BP)(DI*1), X15
|
|
MOVOU X0, (SI)
|
|
MOVOU X1, 16(SI)
|
|
MOVOU X2, 32(SI)
|
|
MOVOU X3, 48(SI)
|
|
MOVOU X4, 64(SI)
|
|
MOVOU X5, 80(SI)
|
|
MOVOU X6, 96(SI)
|
|
MOVOU X7, 112(SI)
|
|
MOVOU X8, -128(SI)(DI*1)
|
|
MOVOU X9, -112(SI)(DI*1)
|
|
MOVOU X10, -96(SI)(DI*1)
|
|
MOVOU X11, -80(SI)(DI*1)
|
|
MOVOU X12, -64(SI)(DI*1)
|
|
MOVOU X13, -48(SI)(DI*1)
|
|
MOVOU X14, -32(SI)(DI*1)
|
|
MOVOU X15, -16(SI)(DI*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm14BAvx
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_256through2048:
|
|
LEAQ -256(DI), DI
|
|
MOVOU (BP), X0
|
|
MOVOU 16(BP), X1
|
|
MOVOU 32(BP), X2
|
|
MOVOU 48(BP), X3
|
|
MOVOU 64(BP), X4
|
|
MOVOU 80(BP), X5
|
|
MOVOU 96(BP), X6
|
|
MOVOU 112(BP), X7
|
|
MOVOU 128(BP), X8
|
|
MOVOU 144(BP), X9
|
|
MOVOU 160(BP), X10
|
|
MOVOU 176(BP), X11
|
|
MOVOU 192(BP), X12
|
|
MOVOU 208(BP), X13
|
|
MOVOU 224(BP), X14
|
|
MOVOU 240(BP), X15
|
|
MOVOU X0, (SI)
|
|
MOVOU X1, 16(SI)
|
|
MOVOU X2, 32(SI)
|
|
MOVOU X3, 48(SI)
|
|
MOVOU X4, 64(SI)
|
|
MOVOU X5, 80(SI)
|
|
MOVOU X6, 96(SI)
|
|
MOVOU X7, 112(SI)
|
|
MOVOU X8, 128(SI)
|
|
MOVOU X9, 144(SI)
|
|
MOVOU X10, 160(SI)
|
|
MOVOU X11, 176(SI)
|
|
MOVOU X12, 192(SI)
|
|
MOVOU X13, 208(SI)
|
|
MOVOU X14, 224(SI)
|
|
MOVOU X15, 240(SI)
|
|
CMPQ DI, $0x00000100
|
|
LEAQ 256(BP), BP
|
|
LEAQ 256(SI), SI
|
|
JGE emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_256through2048
|
|
JMP emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_tail
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_avxUnaligned:
|
|
LEAQ (BP)(DI*1), R9
|
|
MOVQ SI, R11
|
|
MOVOU -128(R9), X5
|
|
MOVOU -112(R9), X6
|
|
MOVQ $0x00000080, R8
|
|
ANDQ $0xffffffe0, SI
|
|
ADDQ $0x20, SI
|
|
MOVOU -96(R9), X7
|
|
MOVOU -80(R9), X8
|
|
MOVQ SI, R10
|
|
SUBQ R11, R10
|
|
MOVOU -64(R9), X9
|
|
MOVOU -48(R9), X10
|
|
SUBQ R10, DI
|
|
MOVOU -32(R9), X11
|
|
MOVOU -16(R9), X12
|
|
VMOVDQU (BP), Y4
|
|
ADDQ R10, BP
|
|
SUBQ R8, DI
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_gobble_128_loop:
|
|
VMOVDQU (BP), Y0
|
|
VMOVDQU 32(BP), Y1
|
|
VMOVDQU 64(BP), Y2
|
|
VMOVDQU 96(BP), Y3
|
|
ADDQ R8, BP
|
|
VMOVDQA Y0, (SI)
|
|
VMOVDQA Y1, 32(SI)
|
|
VMOVDQA Y2, 64(SI)
|
|
VMOVDQA Y3, 96(SI)
|
|
ADDQ R8, SI
|
|
SUBQ R8, DI
|
|
JA emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_gobble_128_loop
|
|
ADDQ R8, DI
|
|
ADDQ SI, DI
|
|
VMOVDQU Y4, (R11)
|
|
VZEROUPPER
|
|
MOVOU X5, -128(DI)
|
|
MOVOU X6, -112(DI)
|
|
MOVOU X7, -96(DI)
|
|
MOVOU X8, -80(DI)
|
|
MOVOU X9, -64(DI)
|
|
MOVOU X10, -48(DI)
|
|
MOVOU X11, -32(DI)
|
|
MOVOU X12, -16(DI)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm14BAvx
|
|
MOVQ R8, SI
|
|
|
|
emit_literal_done_match_emit_encodeBlockAsm14BAvx:
|
|
MOVQ SI, dst_base+0(FP)
|
|
|
|
emit_literal_skip_match_emit_encodeBlockAsm14BAvx:
|
|
NOP
|
|
|
|
match_nolit_loop_encodeBlockAsm14BAvx:
|
|
MOVL AX, BP
|
|
MOVL AX, BP
|
|
SUBL BX, BP
|
|
MOVL BP, 24(SP)
|
|
ADDL $0x04, AX
|
|
ADDL $0x04, BX
|
|
MOVL 16(SP), BP
|
|
SUBL AX, BP
|
|
XORQ DI, DI
|
|
CMPQ BP, $0x08
|
|
JL matchlen_single_match_nolit_encodeBlockAsm14BAvx
|
|
|
|
matchlen_loopback_match_nolit_encodeBlockAsm14BAvx:
|
|
MOVQ (CX)(DI*1), SI
|
|
XORQ (CX)(DI*1), SI
|
|
TESTQ SI, SI
|
|
JZ matchlen_loop_match_nolit_encodeBlockAsm14BAvx
|
|
BSFQ SI, SI
|
|
SARQ $0x03, SI
|
|
LEAQ (DI)(SI*1), DI
|
|
JMP match_nolit_end_encodeBlockAsm14BAvx
|
|
|
|
matchlen_loop_match_nolit_encodeBlockAsm14BAvx:
|
|
LEAQ -8(BP), BP
|
|
LEAQ 8(DI), DI
|
|
CMPQ BP, $0x08
|
|
JGE matchlen_loopback_match_nolit_encodeBlockAsm14BAvx
|
|
|
|
matchlen_single_match_nolit_encodeBlockAsm14BAvx:
|
|
TESTQ BP, BP
|
|
JZ match_nolit_end_encodeBlockAsm14BAvx
|
|
|
|
matchlen_single_loopback_match_nolit_encodeBlockAsm14BAvx:
|
|
MOVB (CX)(DI*1), SI
|
|
CMPB (CX)(DI*1), SI
|
|
JNE match_nolit_end_encodeBlockAsm14BAvx
|
|
LEAQ 1(DI), DI
|
|
DECQ BP
|
|
JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm14BAvx
|
|
|
|
match_nolit_end_encodeBlockAsm14BAvx:
|
|
MOVL 24(SP), BP
|
|
ADDQ $0x04, DI
|
|
MOVQ dst_base+0(FP), SI
|
|
ADDL DI, AX
|
|
CMPL BP, $0x00010000
|
|
JL two_byte_offset_match_nolit_encodeBlockAsm14BAvx
|
|
CMPL DI, $0x40
|
|
JLE four_bytes_remain_match_nolit_encodeBlockAsm14BAvx
|
|
MOVB $0xff, (SI)
|
|
MOVD BP, 1(SI)
|
|
LEAQ -64(DI), DI
|
|
ADDQ $0x05, SI
|
|
CMPL DI, $0x04
|
|
JL four_bytes_remain_match_nolit_encodeBlockAsm14BAvx
|
|
|
|
emit_repeat_again_match_nolit_encodeBlockAsm14BAvx_emit_copy:
|
|
MOVQ DI, R8
|
|
LEAQ -4(DI), DI
|
|
CMPL R8, $0x08
|
|
JLE repeat_two_match_nolit_encodeBlockAsm14BAvx_emit_copy
|
|
CMPL R8, $0x0c
|
|
JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm14BAvx_emit_copy
|
|
CMPL BP, $0x00000800
|
|
JLT repeat_two_offset_match_nolit_encodeBlockAsm14BAvx_emit_copy
|
|
|
|
cant_repeat_two_offset_match_nolit_encodeBlockAsm14BAvx_emit_copy:
|
|
CMPL DI, $0x00000104
|
|
JLT repeat_three_match_nolit_encodeBlockAsm14BAvx_emit_copy
|
|
CMPL DI, $0x00010100
|
|
JLT repeat_four_match_nolit_encodeBlockAsm14BAvx_emit_copy
|
|
CMPL DI, $0x0100ffff
|
|
JLT repeat_five_match_nolit_encodeBlockAsm14BAvx_emit_copy
|
|
LEAQ -16842747(DI), DI
|
|
MOVW $0x001d, (SI)
|
|
MOVW $0xfffb, 2(SI)
|
|
MOVB $0xff, 4(SI)
|
|
ADDQ $0x05, SI
|
|
JMP emit_repeat_again_match_nolit_encodeBlockAsm14BAvx_emit_copy
|
|
|
|
repeat_five_match_nolit_encodeBlockAsm14BAvx_emit_copy:
|
|
LEAQ -65536(DI), DI
|
|
MOVQ DI, BP
|
|
MOVW $0x001d, (SI)
|
|
MOVW DI, 2(SI)
|
|
SARQ $0x10, BP
|
|
MOVB BP, 4(SI)
|
|
ADDQ $0x05, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm14BAvx
|
|
|
|
repeat_four_match_nolit_encodeBlockAsm14BAvx_emit_copy:
|
|
LEAQ -256(DI), DI
|
|
MOVW $0x0019, (SI)
|
|
MOVW DI, 2(SI)
|
|
ADDQ $0x04, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm14BAvx
|
|
|
|
repeat_three_match_nolit_encodeBlockAsm14BAvx_emit_copy:
|
|
LEAQ -4(DI), DI
|
|
MOVW $0x0015, (SI)
|
|
MOVB DI, 2(SI)
|
|
ADDQ $0x03, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm14BAvx
|
|
|
|
repeat_two_match_nolit_encodeBlockAsm14BAvx_emit_copy:
|
|
SHLL $0x02, DI
|
|
ORL $0x01, DI
|
|
MOVW DI, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm14BAvx
|
|
|
|
repeat_two_offset_match_nolit_encodeBlockAsm14BAvx_emit_copy:
|
|
XORQ R8, R8
|
|
LEAQ 1(R8)(DI*4), DI
|
|
MOVB BP, 1(SI)
|
|
SARL $0x08, BP
|
|
SHLL $0x05, BP
|
|
ORL BP, DI
|
|
MOVB DI, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm14BAvx
|
|
|
|
four_bytes_remain_match_nolit_encodeBlockAsm14BAvx:
|
|
TESTL DI, DI
|
|
JZ match_nolit_emitcopy_end_encodeBlockAsm14BAvx
|
|
MOVB $0x03, DL
|
|
LEAQ -4(DX)(DI*4), DI
|
|
MOVB DI, (SI)
|
|
MOVD BP, 1(SI)
|
|
ADDQ $0x05, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm14BAvx
|
|
|
|
two_byte_offset_match_nolit_encodeBlockAsm14BAvx:
|
|
CMPL DI, $0x40
|
|
JLE two_byte_offset_short_match_nolit_encodeBlockAsm14BAvx
|
|
MOVB $0xee, (SI)
|
|
MOVW BP, 1(SI)
|
|
LEAQ -60(DI), DI
|
|
ADDQ $0x03, SI
|
|
|
|
emit_repeat_again_match_nolit_encodeBlockAsm14BAvx_emit_copy_short:
|
|
MOVQ DI, R8
|
|
LEAQ -4(DI), DI
|
|
CMPL R8, $0x08
|
|
JLE repeat_two_match_nolit_encodeBlockAsm14BAvx_emit_copy_short
|
|
CMPL R8, $0x0c
|
|
JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm14BAvx_emit_copy_short
|
|
CMPL BP, $0x00000800
|
|
JLT repeat_two_offset_match_nolit_encodeBlockAsm14BAvx_emit_copy_short
|
|
|
|
cant_repeat_two_offset_match_nolit_encodeBlockAsm14BAvx_emit_copy_short:
|
|
CMPL DI, $0x00000104
|
|
JLT repeat_three_match_nolit_encodeBlockAsm14BAvx_emit_copy_short
|
|
CMPL DI, $0x00010100
|
|
JLT repeat_four_match_nolit_encodeBlockAsm14BAvx_emit_copy_short
|
|
CMPL DI, $0x0100ffff
|
|
JLT repeat_five_match_nolit_encodeBlockAsm14BAvx_emit_copy_short
|
|
LEAQ -16842747(DI), DI
|
|
MOVW $0x001d, (SI)
|
|
MOVW $0xfffb, 2(SI)
|
|
MOVB $0xff, 4(SI)
|
|
ADDQ $0x05, SI
|
|
JMP emit_repeat_again_match_nolit_encodeBlockAsm14BAvx_emit_copy_short
|
|
|
|
repeat_five_match_nolit_encodeBlockAsm14BAvx_emit_copy_short:
|
|
LEAQ -65536(DI), DI
|
|
MOVQ DI, BP
|
|
MOVW $0x001d, (SI)
|
|
MOVW DI, 2(SI)
|
|
SARQ $0x10, BP
|
|
MOVB BP, 4(SI)
|
|
ADDQ $0x05, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm14BAvx
|
|
|
|
repeat_four_match_nolit_encodeBlockAsm14BAvx_emit_copy_short:
|
|
LEAQ -256(DI), DI
|
|
MOVW $0x0019, (SI)
|
|
MOVW DI, 2(SI)
|
|
ADDQ $0x04, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm14BAvx
|
|
|
|
repeat_three_match_nolit_encodeBlockAsm14BAvx_emit_copy_short:
|
|
LEAQ -4(DI), DI
|
|
MOVW $0x0015, (SI)
|
|
MOVB DI, 2(SI)
|
|
ADDQ $0x03, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm14BAvx
|
|
|
|
repeat_two_match_nolit_encodeBlockAsm14BAvx_emit_copy_short:
|
|
SHLL $0x02, DI
|
|
ORL $0x01, DI
|
|
MOVW DI, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm14BAvx
|
|
|
|
repeat_two_offset_match_nolit_encodeBlockAsm14BAvx_emit_copy_short:
|
|
XORQ R8, R8
|
|
LEAQ 1(R8)(DI*4), DI
|
|
MOVB BP, 1(SI)
|
|
SARL $0x08, BP
|
|
SHLL $0x05, BP
|
|
ORL BP, DI
|
|
MOVB DI, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm14BAvx
|
|
|
|
two_byte_offset_short_match_nolit_encodeBlockAsm14BAvx:
|
|
CMPL DI, $0x0c
|
|
JGE emit_copy_three_match_nolit_encodeBlockAsm14BAvx
|
|
CMPL BP, $0x00000800
|
|
JGE emit_copy_three_match_nolit_encodeBlockAsm14BAvx
|
|
MOVB $0x01, DL
|
|
LEAQ -16(DX)(DI*4), DI
|
|
MOVB BP, 1(SI)
|
|
SHRL $0x08, BP
|
|
SHLL $0x05, BP
|
|
ORL BP, DI
|
|
MOVB DI, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm14BAvx
|
|
|
|
emit_copy_three_match_nolit_encodeBlockAsm14BAvx:
|
|
MOVB $0x02, DL
|
|
LEAQ -4(DX)(DI*4), DI
|
|
MOVB DI, (SI)
|
|
MOVW BP, 1(SI)
|
|
ADDQ $0x03, SI
|
|
|
|
match_nolit_emitcopy_end_encodeBlockAsm14BAvx:
|
|
MOVQ SI, dst_base+0(FP)
|
|
MOVL AX, 20(SP)
|
|
CMPL AX, 16(SP)
|
|
JGE emit_remainder_encodeBlockAsm14BAvx
|
|
CMPQ SI, (SP)
|
|
JL match_nolit_dst_ok_encodeBlockAsm14BAvx
|
|
MOVQ $0x00000000, ret+48(FP)
|
|
RET
|
|
|
|
match_nolit_dst_ok_encodeBlockAsm14BAvx:
|
|
MOVQ -2(CX)(AX*1), BP
|
|
MOVQ $0x0000cf1bbcdcbf9b, SI
|
|
MOVQ BP, DI
|
|
SHRQ $0x10, BP
|
|
MOVQ BP, R8
|
|
SHLQ $0x10, DI
|
|
IMULQ SI, DI
|
|
SHRQ $0x32, DI
|
|
SHLQ $0x10, R8
|
|
IMULQ SI, R8
|
|
SHRQ $0x32, R8
|
|
MOVL 32(SP)(DI*1), SI
|
|
MOVL 32(SP)(R8*1), SI
|
|
LEAQ -2(AX), SI
|
|
MOVL SI, 32(SP)(DI*1)
|
|
MOVL AX, 32(SP)(R8*1)
|
|
CMPL (CX)(R8*1), BP
|
|
JEQ match_nolit_loop_encodeBlockAsm14BAvx
|
|
INCL AX
|
|
JMP search_loop_encodeBlockAsm14BAvx
|
|
|
|
emit_remainder_encodeBlockAsm14BAvx:
|
|
MOVQ src_len+32(FP), AX
|
|
SUBL 20(SP), AX
|
|
MOVQ dst_base+0(FP), DX
|
|
LEAQ (DX)(AX*1), DX
|
|
CMPQ DX, (SP)
|
|
JL emit_remainder_ok_encodeBlockAsm14BAvx
|
|
MOVQ $0x00000000, ret+48(FP)
|
|
RET
|
|
|
|
emit_remainder_ok_encodeBlockAsm14BAvx:
|
|
MOVQ src_len+32(FP), AX
|
|
MOVL 20(SP), DX
|
|
CMPL DX, AX
|
|
JEQ emit_literal_skip_emit_remainder_encodeBlockAsm14BAvx
|
|
MOVL AX, BX
|
|
MOVL AX, 20(SP)
|
|
LEAQ (CX)(DX*1), AX
|
|
SUBL DX, BX
|
|
MOVQ dst_base+0(FP), CX
|
|
MOVQ BX, DX
|
|
SUBL $0x01, DX
|
|
JC emit_literal_done_emit_remainder_encodeBlockAsm14BAvx
|
|
CMPL DX, $0x3c
|
|
JLT one_byte_emit_remainder_encodeBlockAsm14BAvx
|
|
CMPL DX, $0x00000100
|
|
JLT two_bytes_emit_remainder_encodeBlockAsm14BAvx
|
|
CMPL DX, $0x00010000
|
|
JLT three_bytes_emit_remainder_encodeBlockAsm14BAvx
|
|
CMPL DX, $0x01000000
|
|
JLT four_bytes_emit_remainder_encodeBlockAsm14BAvx
|
|
MOVB $0xfc, (CX)
|
|
MOVL DX, 1(CX)
|
|
ADDQ $0x05, CX
|
|
JMP memmove_emit_remainder_encodeBlockAsm14BAvx
|
|
|
|
four_bytes_emit_remainder_encodeBlockAsm14BAvx:
|
|
MOVQ DX, BP
|
|
SHRL $0x10, BP
|
|
MOVB $0xf8, (CX)
|
|
MOVW DX, 1(CX)
|
|
MOVB BP, 3(CX)
|
|
ADDQ $0x04, CX
|
|
JMP memmove_emit_remainder_encodeBlockAsm14BAvx
|
|
|
|
three_bytes_emit_remainder_encodeBlockAsm14BAvx:
|
|
MOVB $0xf4, (CX)
|
|
MOVW DX, 1(CX)
|
|
ADDQ $0x03, CX
|
|
JMP memmove_emit_remainder_encodeBlockAsm14BAvx
|
|
|
|
two_bytes_emit_remainder_encodeBlockAsm14BAvx:
|
|
MOVB $0xf0, (CX)
|
|
MOVB DL, 1(CX)
|
|
ADDQ $0x02, CX
|
|
JMP memmove_emit_remainder_encodeBlockAsm14BAvx
|
|
|
|
one_byte_emit_remainder_encodeBlockAsm14BAvx:
|
|
SHLB $0x02, DL
|
|
MOVB DL, (CX)
|
|
ADDQ $0x01, CX
|
|
|
|
memmove_emit_remainder_encodeBlockAsm14BAvx:
|
|
LEAQ (CX)(BX*1), DX
|
|
NOP
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_tail:
|
|
TESTQ BX, BX
|
|
JEQ emit_literal_done_emit_remainder_encodeBlockAsm14BAvx
|
|
CMPQ BX, $0x02
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_1or2
|
|
CMPQ BX, $0x04
|
|
JB emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_3
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_4
|
|
CMPQ BX, $0x08
|
|
JB emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_5through7
|
|
JE emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_8
|
|
CMPQ BX, $0x10
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_9through16
|
|
CMPQ BX, $0x20
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_17through32
|
|
CMPQ BX, $0x40
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_33through64
|
|
CMPQ BX, $0x80
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_65through128
|
|
CMPQ BX, $0x00000100
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_129through256
|
|
JMP emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_avxUnaligned
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_1or2:
|
|
MOVB (AX), DL
|
|
MOVB -1(AX)(BX*1), BP
|
|
MOVB DL, (CX)
|
|
MOVB BP, -1(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm14BAvx
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_4:
|
|
MOVL (AX), DX
|
|
MOVL DX, (CX)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm14BAvx
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_3:
|
|
MOVW (AX), DX
|
|
MOVB 2(AX), BP
|
|
MOVW DX, (CX)
|
|
MOVB BP, 2(CX)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm14BAvx
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_5through7:
|
|
MOVL (AX), DX
|
|
MOVL -4(AX)(BX*1), BP
|
|
MOVL DX, (CX)
|
|
MOVL BP, -4(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm14BAvx
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_8:
|
|
MOVQ (AX), DX
|
|
MOVQ DX, (CX)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm14BAvx
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_9through16:
|
|
MOVQ (AX), DX
|
|
MOVQ -8(AX)(BX*1), BP
|
|
MOVQ DX, (CX)
|
|
MOVQ BP, -8(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm14BAvx
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_17through32:
|
|
MOVOU (AX), X0
|
|
MOVOU -16(AX)(BX*1), X1
|
|
MOVOU X0, (CX)
|
|
MOVOU X1, -16(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm14BAvx
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_33through64:
|
|
MOVOU (AX), X0
|
|
MOVOU 16(AX), X1
|
|
MOVOU -32(AX)(BX*1), X2
|
|
MOVOU -16(AX)(BX*1), X3
|
|
MOVOU X0, (CX)
|
|
MOVOU X1, 16(CX)
|
|
MOVOU X2, -32(CX)(BX*1)
|
|
MOVOU X3, -16(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm14BAvx
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_65through128:
|
|
MOVOU (AX), X0
|
|
MOVOU 16(AX), X1
|
|
MOVOU 32(AX), X2
|
|
MOVOU 48(AX), X3
|
|
MOVOU -64(AX)(BX*1), X12
|
|
MOVOU -48(AX)(BX*1), X13
|
|
MOVOU -32(AX)(BX*1), X14
|
|
MOVOU -16(AX)(BX*1), X15
|
|
MOVOU X0, (CX)
|
|
MOVOU X1, 16(CX)
|
|
MOVOU X2, 32(CX)
|
|
MOVOU X3, 48(CX)
|
|
MOVOU X12, -64(CX)(BX*1)
|
|
MOVOU X13, -48(CX)(BX*1)
|
|
MOVOU X14, -32(CX)(BX*1)
|
|
MOVOU X15, -16(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm14BAvx
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_129through256:
|
|
MOVOU (AX), X0
|
|
MOVOU 16(AX), X1
|
|
MOVOU 32(AX), X2
|
|
MOVOU 48(AX), X3
|
|
MOVOU 64(AX), X4
|
|
MOVOU 80(AX), X5
|
|
MOVOU 96(AX), X6
|
|
MOVOU 112(AX), X7
|
|
MOVOU -128(AX)(BX*1), X8
|
|
MOVOU -112(AX)(BX*1), X9
|
|
MOVOU -96(AX)(BX*1), X10
|
|
MOVOU -80(AX)(BX*1), X11
|
|
MOVOU -64(AX)(BX*1), X12
|
|
MOVOU -48(AX)(BX*1), X13
|
|
MOVOU -32(AX)(BX*1), X14
|
|
MOVOU -16(AX)(BX*1), X15
|
|
MOVOU X0, (CX)
|
|
MOVOU X1, 16(CX)
|
|
MOVOU X2, 32(CX)
|
|
MOVOU X3, 48(CX)
|
|
MOVOU X4, 64(CX)
|
|
MOVOU X5, 80(CX)
|
|
MOVOU X6, 96(CX)
|
|
MOVOU X7, 112(CX)
|
|
MOVOU X8, -128(CX)(BX*1)
|
|
MOVOU X9, -112(CX)(BX*1)
|
|
MOVOU X10, -96(CX)(BX*1)
|
|
MOVOU X11, -80(CX)(BX*1)
|
|
MOVOU X12, -64(CX)(BX*1)
|
|
MOVOU X13, -48(CX)(BX*1)
|
|
MOVOU X14, -32(CX)(BX*1)
|
|
MOVOU X15, -16(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm14BAvx
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_256through2048:
|
|
LEAQ -256(BX), BX
|
|
MOVOU (AX), X0
|
|
MOVOU 16(AX), X1
|
|
MOVOU 32(AX), X2
|
|
MOVOU 48(AX), X3
|
|
MOVOU 64(AX), X4
|
|
MOVOU 80(AX), X5
|
|
MOVOU 96(AX), X6
|
|
MOVOU 112(AX), X7
|
|
MOVOU 128(AX), X8
|
|
MOVOU 144(AX), X9
|
|
MOVOU 160(AX), X10
|
|
MOVOU 176(AX), X11
|
|
MOVOU 192(AX), X12
|
|
MOVOU 208(AX), X13
|
|
MOVOU 224(AX), X14
|
|
MOVOU 240(AX), X15
|
|
MOVOU X0, (CX)
|
|
MOVOU X1, 16(CX)
|
|
MOVOU X2, 32(CX)
|
|
MOVOU X3, 48(CX)
|
|
MOVOU X4, 64(CX)
|
|
MOVOU X5, 80(CX)
|
|
MOVOU X6, 96(CX)
|
|
MOVOU X7, 112(CX)
|
|
MOVOU X8, 128(CX)
|
|
MOVOU X9, 144(CX)
|
|
MOVOU X10, 160(CX)
|
|
MOVOU X11, 176(CX)
|
|
MOVOU X12, 192(CX)
|
|
MOVOU X13, 208(CX)
|
|
MOVOU X14, 224(CX)
|
|
MOVOU X15, 240(CX)
|
|
CMPQ BX, $0x00000100
|
|
LEAQ 256(AX), AX
|
|
LEAQ 256(CX), CX
|
|
JGE emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_256through2048
|
|
JMP emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_tail
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_avxUnaligned:
|
|
LEAQ (AX)(BX*1), BP
|
|
MOVQ CX, DI
|
|
MOVOU -128(BP), X5
|
|
MOVOU -112(BP), X6
|
|
MOVQ $0x00000080, DX
|
|
ANDQ $0xffffffe0, CX
|
|
ADDQ $0x20, CX
|
|
MOVOU -96(BP), X7
|
|
MOVOU -80(BP), X8
|
|
MOVQ CX, SI
|
|
SUBQ DI, SI
|
|
MOVOU -64(BP), X9
|
|
MOVOU -48(BP), X10
|
|
SUBQ SI, BX
|
|
MOVOU -32(BP), X11
|
|
MOVOU -16(BP), X12
|
|
VMOVDQU (AX), Y4
|
|
ADDQ SI, AX
|
|
SUBQ DX, BX
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_gobble_128_loop:
|
|
VMOVDQU (AX), Y0
|
|
VMOVDQU 32(AX), Y1
|
|
VMOVDQU 64(AX), Y2
|
|
VMOVDQU 96(AX), Y3
|
|
ADDQ DX, AX
|
|
VMOVDQA Y0, (CX)
|
|
VMOVDQA Y1, 32(CX)
|
|
VMOVDQA Y2, 64(CX)
|
|
VMOVDQA Y3, 96(CX)
|
|
ADDQ DX, CX
|
|
SUBQ DX, BX
|
|
JA emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_gobble_128_loop
|
|
ADDQ DX, BX
|
|
ADDQ CX, BX
|
|
VMOVDQU Y4, (DI)
|
|
VZEROUPPER
|
|
MOVOU X5, -128(BX)
|
|
MOVOU X6, -112(BX)
|
|
MOVOU X7, -96(BX)
|
|
MOVOU X8, -80(BX)
|
|
MOVOU X9, -64(BX)
|
|
MOVOU X10, -48(BX)
|
|
MOVOU X11, -32(BX)
|
|
MOVOU X12, -16(BX)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm14BAvx
|
|
MOVQ DX, CX
|
|
|
|
emit_literal_done_emit_remainder_encodeBlockAsm14BAvx:
|
|
MOVQ CX, dst_base+0(FP)
|
|
|
|
emit_literal_skip_emit_remainder_encodeBlockAsm14BAvx:
|
|
MOVQ 8(SP), AX
|
|
SUBQ dst_base+0(FP), AX
|
|
MOVQ AX, ret+48(FP)
|
|
RET
|
|
|
|
// func encodeBlockAsm12BAvx(dst []byte, src []byte) int
|
|
// Requires: AVX, SSE2
|
|
TEXT ·encodeBlockAsm12BAvx(SB), $4128-56
|
|
MOVQ $0x00000020, AX
|
|
LEAQ 32(SP), CX
|
|
PXOR X0, X0
|
|
|
|
zero_loop_encodeBlockAsm12BAvx:
|
|
MOVOU X0, (CX)
|
|
MOVOU X0, 16(CX)
|
|
MOVOU X0, 32(CX)
|
|
MOVOU X0, 48(CX)
|
|
MOVOU X0, 64(CX)
|
|
MOVOU X0, 80(CX)
|
|
MOVOU X0, 96(CX)
|
|
MOVOU X0, 112(CX)
|
|
ADDQ $0x80, CX
|
|
DECQ AX
|
|
JNZ zero_loop_encodeBlockAsm12BAvx
|
|
MOVL AX, 20(SP)
|
|
MOVQ src_len+32(FP), AX
|
|
LEAQ -5(AX), CX
|
|
LEAQ -8(AX), BX
|
|
SHRQ $0x05, AX
|
|
SUBL AX, CX
|
|
MOVL BX, 16(SP)
|
|
MOVQ dst_base+0(FP), AX
|
|
MOVQ AX, 8(SP)
|
|
LEAQ (AX)(CX*1), CX
|
|
MOVQ CX, (SP)
|
|
MOVL $0x00000001, AX
|
|
MOVL AX, 24(SP)
|
|
MOVQ src_base+24(FP), CX
|
|
|
|
search_loop_encodeBlockAsm12BAvx:
|
|
MOVQ (CX)(AX*1), BP
|
|
MOVL AX, BX
|
|
SUBL 20(SP), BX
|
|
SHRL $0x04, BX
|
|
LEAQ 4(AX)(BX*1), BX
|
|
MOVL 16(SP), SI
|
|
CMPL BX, SI
|
|
JGT emit_remainder_encodeBlockAsm12BAvx
|
|
MOVL BX, 28(SP)
|
|
MOVQ $0x0000cf1bbcdcbf9b, BX
|
|
MOVQ BP, DI
|
|
MOVQ BP, R8
|
|
SHRQ $0x08, R8
|
|
SHLQ $0x10, DI
|
|
IMULQ BX, DI
|
|
SHRQ $0x34, DI
|
|
SHLQ $0x10, R8
|
|
IMULQ BX, R8
|
|
SHRQ $0x34, R8
|
|
MOVL 32(SP)(DI*1), BX
|
|
MOVL 32(SP)(R8*1), SI
|
|
MOVL AX, 32(SP)(DI*1)
|
|
LEAL 1(AX), DI
|
|
MOVL DI, 32(SP)(R8*1)
|
|
MOVL AX, DI
|
|
SUBL 24(SP), DI
|
|
MOVL 1(CX)(DI*1), R9
|
|
MOVQ BP, R8
|
|
SHLQ $0x08, R8
|
|
CMPL R8, R9
|
|
JNE no_repeat_found_encodeBlockAsm12BAvx
|
|
LEAQ 1(AX), BP
|
|
MOVL 20(SP), BX
|
|
TESTL DI, DI
|
|
JZ repeat_extend_back_end_encodeBlockAsm12BAvx
|
|
|
|
repeat_extend_back_loop_encodeBlockAsm12BAvx:
|
|
CMPL BP, BX
|
|
JG repeat_extend_back_end_encodeBlockAsm12BAvx
|
|
MOVB -1(CX)(DI*1), DL
|
|
MOVB -1(CX)(BP*1), SI
|
|
CMPB DL, SI
|
|
JNE repeat_extend_back_end_encodeBlockAsm12BAvx
|
|
LEAQ -1(BP), BP
|
|
DECL DI
|
|
JZ repeat_extend_back_end_encodeBlockAsm12BAvx
|
|
JMP repeat_extend_back_loop_encodeBlockAsm12BAvx
|
|
|
|
repeat_extend_back_end_encodeBlockAsm12BAvx:
|
|
MOVL 20(SP), BX
|
|
CMPL BX, BP
|
|
JEQ emit_literal_skip_repeat_emit_encodeBlockAsm12BAvx
|
|
MOVL BP, SI
|
|
MOVL BP, 20(SP)
|
|
LEAQ (CX)(BX*1), DI
|
|
SUBL BX, SI
|
|
MOVQ dst_base+0(FP), BX
|
|
MOVQ SI, R8
|
|
SUBL $0x01, R8
|
|
JC emit_literal_done_repeat_emit_encodeBlockAsm12BAvx
|
|
CMPL R8, $0x3c
|
|
JLT one_byte_repeat_emit_encodeBlockAsm12BAvx
|
|
CMPL R8, $0x00000100
|
|
JLT two_bytes_repeat_emit_encodeBlockAsm12BAvx
|
|
CMPL R8, $0x00010000
|
|
JLT three_bytes_repeat_emit_encodeBlockAsm12BAvx
|
|
CMPL R8, $0x01000000
|
|
JLT four_bytes_repeat_emit_encodeBlockAsm12BAvx
|
|
MOVB $0xfc, (BX)
|
|
MOVL R8, 1(BX)
|
|
ADDQ $0x05, BX
|
|
JMP memmove_repeat_emit_encodeBlockAsm12BAvx
|
|
|
|
four_bytes_repeat_emit_encodeBlockAsm12BAvx:
|
|
MOVQ R8, R9
|
|
SHRL $0x10, R9
|
|
MOVB $0xf8, (BX)
|
|
MOVW R8, 1(BX)
|
|
MOVB R9, 3(BX)
|
|
ADDQ $0x04, BX
|
|
JMP memmove_repeat_emit_encodeBlockAsm12BAvx
|
|
|
|
three_bytes_repeat_emit_encodeBlockAsm12BAvx:
|
|
MOVB $0xf4, (BX)
|
|
MOVW R8, 1(BX)
|
|
ADDQ $0x03, BX
|
|
JMP memmove_repeat_emit_encodeBlockAsm12BAvx
|
|
|
|
two_bytes_repeat_emit_encodeBlockAsm12BAvx:
|
|
MOVB $0xf0, (BX)
|
|
MOVB R8, 1(BX)
|
|
ADDQ $0x02, BX
|
|
JMP memmove_repeat_emit_encodeBlockAsm12BAvx
|
|
|
|
one_byte_repeat_emit_encodeBlockAsm12BAvx:
|
|
SHLB $0x02, R8
|
|
MOVB R8, (BX)
|
|
ADDQ $0x01, BX
|
|
|
|
memmove_repeat_emit_encodeBlockAsm12BAvx:
|
|
LEAQ (BX)(SI*1), R8
|
|
NOP
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_tail:
|
|
TESTQ SI, SI
|
|
JEQ emit_literal_done_repeat_emit_encodeBlockAsm12BAvx
|
|
CMPQ SI, $0x02
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_1or2
|
|
CMPQ SI, $0x04
|
|
JB emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_3
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_4
|
|
CMPQ SI, $0x08
|
|
JB emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_5through7
|
|
JE emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_8
|
|
CMPQ SI, $0x10
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_9through16
|
|
CMPQ SI, $0x20
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_17through32
|
|
CMPQ SI, $0x40
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_33through64
|
|
CMPQ SI, $0x80
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_65through128
|
|
CMPQ SI, $0x00000100
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_129through256
|
|
JMP emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_avxUnaligned
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_1or2:
|
|
MOVB (DI), R8
|
|
MOVB -1(DI)(SI*1), R9
|
|
MOVB R8, (BX)
|
|
MOVB R9, -1(BX)(SI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm12BAvx
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_4:
|
|
MOVL (DI), R8
|
|
MOVL R8, (BX)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm12BAvx
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_3:
|
|
MOVW (DI), R8
|
|
MOVB 2(DI), R9
|
|
MOVW R8, (BX)
|
|
MOVB R9, 2(BX)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm12BAvx
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_5through7:
|
|
MOVL (DI), R8
|
|
MOVL -4(DI)(SI*1), R9
|
|
MOVL R8, (BX)
|
|
MOVL R9, -4(BX)(SI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm12BAvx
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_8:
|
|
MOVQ (DI), R8
|
|
MOVQ R8, (BX)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm12BAvx
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_9through16:
|
|
MOVQ (DI), R8
|
|
MOVQ -8(DI)(SI*1), R9
|
|
MOVQ R8, (BX)
|
|
MOVQ R9, -8(BX)(SI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm12BAvx
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_17through32:
|
|
MOVOU (DI), X0
|
|
MOVOU -16(DI)(SI*1), X1
|
|
MOVOU X0, (BX)
|
|
MOVOU X1, -16(BX)(SI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm12BAvx
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_33through64:
|
|
MOVOU (DI), X0
|
|
MOVOU 16(DI), X1
|
|
MOVOU -32(DI)(SI*1), X2
|
|
MOVOU -16(DI)(SI*1), X3
|
|
MOVOU X0, (BX)
|
|
MOVOU X1, 16(BX)
|
|
MOVOU X2, -32(BX)(SI*1)
|
|
MOVOU X3, -16(BX)(SI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm12BAvx
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_65through128:
|
|
MOVOU (DI), X0
|
|
MOVOU 16(DI), X1
|
|
MOVOU 32(DI), X2
|
|
MOVOU 48(DI), X3
|
|
MOVOU -64(DI)(SI*1), X12
|
|
MOVOU -48(DI)(SI*1), X13
|
|
MOVOU -32(DI)(SI*1), X14
|
|
MOVOU -16(DI)(SI*1), X15
|
|
MOVOU X0, (BX)
|
|
MOVOU X1, 16(BX)
|
|
MOVOU X2, 32(BX)
|
|
MOVOU X3, 48(BX)
|
|
MOVOU X12, -64(BX)(SI*1)
|
|
MOVOU X13, -48(BX)(SI*1)
|
|
MOVOU X14, -32(BX)(SI*1)
|
|
MOVOU X15, -16(BX)(SI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm12BAvx
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_129through256:
|
|
MOVOU (DI), X0
|
|
MOVOU 16(DI), X1
|
|
MOVOU 32(DI), X2
|
|
MOVOU 48(DI), X3
|
|
MOVOU 64(DI), X4
|
|
MOVOU 80(DI), X5
|
|
MOVOU 96(DI), X6
|
|
MOVOU 112(DI), X7
|
|
MOVOU -128(DI)(SI*1), X8
|
|
MOVOU -112(DI)(SI*1), X9
|
|
MOVOU -96(DI)(SI*1), X10
|
|
MOVOU -80(DI)(SI*1), X11
|
|
MOVOU -64(DI)(SI*1), X12
|
|
MOVOU -48(DI)(SI*1), X13
|
|
MOVOU -32(DI)(SI*1), X14
|
|
MOVOU -16(DI)(SI*1), X15
|
|
MOVOU X0, (BX)
|
|
MOVOU X1, 16(BX)
|
|
MOVOU X2, 32(BX)
|
|
MOVOU X3, 48(BX)
|
|
MOVOU X4, 64(BX)
|
|
MOVOU X5, 80(BX)
|
|
MOVOU X6, 96(BX)
|
|
MOVOU X7, 112(BX)
|
|
MOVOU X8, -128(BX)(SI*1)
|
|
MOVOU X9, -112(BX)(SI*1)
|
|
MOVOU X10, -96(BX)(SI*1)
|
|
MOVOU X11, -80(BX)(SI*1)
|
|
MOVOU X12, -64(BX)(SI*1)
|
|
MOVOU X13, -48(BX)(SI*1)
|
|
MOVOU X14, -32(BX)(SI*1)
|
|
MOVOU X15, -16(BX)(SI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm12BAvx
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_256through2048:
|
|
LEAQ -256(SI), SI
|
|
MOVOU (DI), X0
|
|
MOVOU 16(DI), X1
|
|
MOVOU 32(DI), X2
|
|
MOVOU 48(DI), X3
|
|
MOVOU 64(DI), X4
|
|
MOVOU 80(DI), X5
|
|
MOVOU 96(DI), X6
|
|
MOVOU 112(DI), X7
|
|
MOVOU 128(DI), X8
|
|
MOVOU 144(DI), X9
|
|
MOVOU 160(DI), X10
|
|
MOVOU 176(DI), X11
|
|
MOVOU 192(DI), X12
|
|
MOVOU 208(DI), X13
|
|
MOVOU 224(DI), X14
|
|
MOVOU 240(DI), X15
|
|
MOVOU X0, (BX)
|
|
MOVOU X1, 16(BX)
|
|
MOVOU X2, 32(BX)
|
|
MOVOU X3, 48(BX)
|
|
MOVOU X4, 64(BX)
|
|
MOVOU X5, 80(BX)
|
|
MOVOU X6, 96(BX)
|
|
MOVOU X7, 112(BX)
|
|
MOVOU X8, 128(BX)
|
|
MOVOU X9, 144(BX)
|
|
MOVOU X10, 160(BX)
|
|
MOVOU X11, 176(BX)
|
|
MOVOU X12, 192(BX)
|
|
MOVOU X13, 208(BX)
|
|
MOVOU X14, 224(BX)
|
|
MOVOU X15, 240(BX)
|
|
CMPQ SI, $0x00000100
|
|
LEAQ 256(DI), DI
|
|
LEAQ 256(BX), BX
|
|
JGE emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_256through2048
|
|
JMP emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_tail
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_avxUnaligned:
|
|
LEAQ (DI)(SI*1), R9
|
|
MOVQ BX, R11
|
|
MOVOU -128(R9), X5
|
|
MOVOU -112(R9), X6
|
|
MOVQ $0x00000080, R8
|
|
ANDQ $0xffffffe0, BX
|
|
ADDQ $0x20, BX
|
|
MOVOU -96(R9), X7
|
|
MOVOU -80(R9), X8
|
|
MOVQ BX, R10
|
|
SUBQ R11, R10
|
|
MOVOU -64(R9), X9
|
|
MOVOU -48(R9), X10
|
|
SUBQ R10, SI
|
|
MOVOU -32(R9), X11
|
|
MOVOU -16(R9), X12
|
|
VMOVDQU (DI), Y4
|
|
ADDQ R10, DI
|
|
SUBQ R8, SI
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_gobble_128_loop:
|
|
VMOVDQU (DI), Y0
|
|
VMOVDQU 32(DI), Y1
|
|
VMOVDQU 64(DI), Y2
|
|
VMOVDQU 96(DI), Y3
|
|
ADDQ R8, DI
|
|
VMOVDQA Y0, (BX)
|
|
VMOVDQA Y1, 32(BX)
|
|
VMOVDQA Y2, 64(BX)
|
|
VMOVDQA Y3, 96(BX)
|
|
ADDQ R8, BX
|
|
SUBQ R8, SI
|
|
JA emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_gobble_128_loop
|
|
ADDQ R8, SI
|
|
ADDQ BX, SI
|
|
VMOVDQU Y4, (R11)
|
|
VZEROUPPER
|
|
MOVOU X5, -128(SI)
|
|
MOVOU X6, -112(SI)
|
|
MOVOU X7, -96(SI)
|
|
MOVOU X8, -80(SI)
|
|
MOVOU X9, -64(SI)
|
|
MOVOU X10, -48(SI)
|
|
MOVOU X11, -32(SI)
|
|
MOVOU X12, -16(SI)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm12BAvx
|
|
MOVQ R8, BX
|
|
|
|
emit_literal_done_repeat_emit_encodeBlockAsm12BAvx:
|
|
MOVQ BX, dst_base+0(FP)
|
|
|
|
emit_literal_skip_repeat_emit_encodeBlockAsm12BAvx:
|
|
ADDL $0x05, AX
|
|
MOVL AX, BX
|
|
SUBL 24(SP), BX
|
|
MOVL 16(SP), BX
|
|
SUBL AX, BX
|
|
XORQ DI, DI
|
|
CMPQ BX, $0x08
|
|
JL matchlen_single_repeat_extend
|
|
|
|
matchlen_loopback_repeat_extend:
|
|
MOVQ (CX)(DI*1), SI
|
|
XORQ (CX)(DI*1), SI
|
|
TESTQ SI, SI
|
|
JZ matchlen_loop_repeat_extend
|
|
BSFQ SI, SI
|
|
SARQ $0x03, SI
|
|
LEAQ (DI)(SI*1), DI
|
|
JMP repeat_extend_forward_end_encodeBlockAsm12BAvx
|
|
|
|
matchlen_loop_repeat_extend:
|
|
LEAQ -8(BX), BX
|
|
LEAQ 8(DI), DI
|
|
CMPQ BX, $0x08
|
|
JGE matchlen_loopback_repeat_extend
|
|
|
|
matchlen_single_repeat_extend:
|
|
TESTQ BX, BX
|
|
JZ repeat_extend_forward_end_encodeBlockAsm12BAvx
|
|
|
|
matchlen_single_loopback_repeat_extend:
|
|
MOVB (CX)(DI*1), SI
|
|
CMPB (CX)(DI*1), SI
|
|
JNE repeat_extend_forward_end_encodeBlockAsm12BAvx
|
|
LEAQ 1(DI), DI
|
|
DECQ BX
|
|
JNZ matchlen_single_loopback_repeat_extend
|
|
|
|
repeat_extend_forward_end_encodeBlockAsm12BAvx:
|
|
ADDL DI, AX
|
|
MOVL AX, BX
|
|
SUBL BP, BX
|
|
MOVL 24(SP), BP
|
|
MOVQ dst_base+0(FP), SI
|
|
MOVL 20(SP), DI
|
|
TESTL DI, DI
|
|
JZ repeat_as_copy_encodeBlockAsm12BAvx
|
|
|
|
emit_repeat_again_match_repeat_:
|
|
MOVQ BX, DI
|
|
LEAQ -4(BX), BX
|
|
CMPL DI, $0x08
|
|
JLE repeat_two_match_repeat_
|
|
CMPL DI, $0x0c
|
|
JGE cant_repeat_two_offset_match_repeat_
|
|
CMPL BP, $0x00000800
|
|
JLT repeat_two_offset_match_repeat_
|
|
|
|
cant_repeat_two_offset_match_repeat_:
|
|
CMPL BX, $0x00000104
|
|
JLT repeat_three_match_repeat_
|
|
CMPL BX, $0x00010100
|
|
JLT repeat_four_match_repeat_
|
|
CMPL BX, $0x0100ffff
|
|
JLT repeat_five_match_repeat_
|
|
LEAQ -16842747(BX), BX
|
|
MOVW $0x001d, (SI)
|
|
MOVW $0xfffb, 2(SI)
|
|
MOVB $0xff, 4(SI)
|
|
ADDQ $0x05, SI
|
|
JMP emit_repeat_again_match_repeat_
|
|
|
|
repeat_five_match_repeat_:
|
|
LEAQ -65536(BX), BX
|
|
MOVQ BX, BP
|
|
MOVW $0x001d, (SI)
|
|
MOVW BX, 2(SI)
|
|
SARQ $0x10, BP
|
|
MOVB BP, 4(SI)
|
|
ADDQ $0x05, SI
|
|
JMP repeat_end_emit_encodeBlockAsm12BAvx
|
|
|
|
repeat_four_match_repeat_:
|
|
LEAQ -256(BX), BX
|
|
MOVW $0x0019, (SI)
|
|
MOVW BX, 2(SI)
|
|
ADDQ $0x04, SI
|
|
JMP repeat_end_emit_encodeBlockAsm12BAvx
|
|
|
|
repeat_three_match_repeat_:
|
|
LEAQ -4(BX), BX
|
|
MOVW $0x0015, (SI)
|
|
MOVB BL, 2(SI)
|
|
ADDQ $0x03, SI
|
|
JMP repeat_end_emit_encodeBlockAsm12BAvx
|
|
|
|
repeat_two_match_repeat_:
|
|
SHLL $0x02, BX
|
|
ORL $0x01, BX
|
|
MOVW BX, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP repeat_end_emit_encodeBlockAsm12BAvx
|
|
|
|
repeat_two_offset_match_repeat_:
|
|
XORQ DI, DI
|
|
LEAQ 1(DI)(BX*4), BX
|
|
MOVB BP, 1(SI)
|
|
SARL $0x08, BP
|
|
SHLL $0x05, BP
|
|
ORL BP, BX
|
|
MOVB BL, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP repeat_end_emit_encodeBlockAsm12BAvx
|
|
|
|
repeat_as_copy_encodeBlockAsm12BAvx:
|
|
CMPL BP, $0x00010000
|
|
JL two_byte_offset_repeat_as_copy_encodeBlockAsm12BAvx
|
|
CMPL BX, $0x40
|
|
JLE four_bytes_remain_repeat_as_copy_encodeBlockAsm12BAvx
|
|
MOVB $0xff, (SI)
|
|
MOVD BP, 1(SI)
|
|
LEAQ -64(BX), BX
|
|
ADDQ $0x05, SI
|
|
CMPL BX, $0x04
|
|
JL four_bytes_remain_repeat_as_copy_encodeBlockAsm12BAvx
|
|
|
|
emit_repeat_again_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy:
|
|
MOVQ BX, DI
|
|
LEAQ -4(BX), BX
|
|
CMPL DI, $0x08
|
|
JLE repeat_two_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy
|
|
CMPL DI, $0x0c
|
|
JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy
|
|
CMPL BP, $0x00000800
|
|
JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy
|
|
|
|
cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy:
|
|
CMPL BX, $0x00000104
|
|
JLT repeat_three_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy
|
|
CMPL BX, $0x00010100
|
|
JLT repeat_four_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy
|
|
CMPL BX, $0x0100ffff
|
|
JLT repeat_five_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy
|
|
LEAQ -16842747(BX), BX
|
|
MOVW $0x001d, (SI)
|
|
MOVW $0xfffb, 2(SI)
|
|
MOVB $0xff, 4(SI)
|
|
ADDQ $0x05, SI
|
|
JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy
|
|
|
|
repeat_five_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy:
|
|
LEAQ -65536(BX), BX
|
|
MOVQ BX, BP
|
|
MOVW $0x001d, (SI)
|
|
MOVW BX, 2(SI)
|
|
SARQ $0x10, BP
|
|
MOVB BP, 4(SI)
|
|
ADDQ $0x05, SI
|
|
JMP repeat_end_emit_encodeBlockAsm12BAvx
|
|
|
|
repeat_four_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy:
|
|
LEAQ -256(BX), BX
|
|
MOVW $0x0019, (SI)
|
|
MOVW BX, 2(SI)
|
|
ADDQ $0x04, SI
|
|
JMP repeat_end_emit_encodeBlockAsm12BAvx
|
|
|
|
repeat_three_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy:
|
|
LEAQ -4(BX), BX
|
|
MOVW $0x0015, (SI)
|
|
MOVB BL, 2(SI)
|
|
ADDQ $0x03, SI
|
|
JMP repeat_end_emit_encodeBlockAsm12BAvx
|
|
|
|
repeat_two_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy:
|
|
SHLL $0x02, BX
|
|
ORL $0x01, BX
|
|
MOVW BX, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP repeat_end_emit_encodeBlockAsm12BAvx
|
|
|
|
repeat_two_offset_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy:
|
|
XORQ DI, DI
|
|
LEAQ 1(DI)(BX*4), BX
|
|
MOVB BP, 1(SI)
|
|
SARL $0x08, BP
|
|
SHLL $0x05, BP
|
|
ORL BP, BX
|
|
MOVB BL, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP repeat_end_emit_encodeBlockAsm12BAvx
|
|
|
|
four_bytes_remain_repeat_as_copy_encodeBlockAsm12BAvx:
|
|
TESTL BX, BX
|
|
JZ repeat_end_emit_encodeBlockAsm12BAvx
|
|
MOVB $0x03, DL
|
|
LEAQ -4(DX)(BX*4), BX
|
|
MOVB BL, (SI)
|
|
MOVD BP, 1(SI)
|
|
ADDQ $0x05, SI
|
|
JMP repeat_end_emit_encodeBlockAsm12BAvx
|
|
|
|
two_byte_offset_repeat_as_copy_encodeBlockAsm12BAvx:
|
|
CMPL BX, $0x40
|
|
JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm12BAvx
|
|
MOVB $0xee, (SI)
|
|
MOVW BP, 1(SI)
|
|
LEAQ -60(BX), BX
|
|
ADDQ $0x03, SI
|
|
|
|
emit_repeat_again_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short:
|
|
MOVQ BX, DI
|
|
LEAQ -4(BX), BX
|
|
CMPL DI, $0x08
|
|
JLE repeat_two_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short
|
|
CMPL DI, $0x0c
|
|
JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short
|
|
CMPL BP, $0x00000800
|
|
JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short
|
|
|
|
cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short:
|
|
CMPL BX, $0x00000104
|
|
JLT repeat_three_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short
|
|
CMPL BX, $0x00010100
|
|
JLT repeat_four_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short
|
|
CMPL BX, $0x0100ffff
|
|
JLT repeat_five_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short
|
|
LEAQ -16842747(BX), BX
|
|
MOVW $0x001d, (SI)
|
|
MOVW $0xfffb, 2(SI)
|
|
MOVB $0xff, 4(SI)
|
|
ADDQ $0x05, SI
|
|
JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short
|
|
|
|
repeat_five_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short:
|
|
LEAQ -65536(BX), BX
|
|
MOVQ BX, BP
|
|
MOVW $0x001d, (SI)
|
|
MOVW BX, 2(SI)
|
|
SARQ $0x10, BP
|
|
MOVB BP, 4(SI)
|
|
ADDQ $0x05, SI
|
|
JMP repeat_end_emit_encodeBlockAsm12BAvx
|
|
|
|
repeat_four_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short:
|
|
LEAQ -256(BX), BX
|
|
MOVW $0x0019, (SI)
|
|
MOVW BX, 2(SI)
|
|
ADDQ $0x04, SI
|
|
JMP repeat_end_emit_encodeBlockAsm12BAvx
|
|
|
|
repeat_three_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short:
|
|
LEAQ -4(BX), BX
|
|
MOVW $0x0015, (SI)
|
|
MOVB BL, 2(SI)
|
|
ADDQ $0x03, SI
|
|
JMP repeat_end_emit_encodeBlockAsm12BAvx
|
|
|
|
repeat_two_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short:
|
|
SHLL $0x02, BX
|
|
ORL $0x01, BX
|
|
MOVW BX, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP repeat_end_emit_encodeBlockAsm12BAvx
|
|
|
|
repeat_two_offset_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short:
|
|
XORQ DI, DI
|
|
LEAQ 1(DI)(BX*4), BX
|
|
MOVB BP, 1(SI)
|
|
SARL $0x08, BP
|
|
SHLL $0x05, BP
|
|
ORL BP, BX
|
|
MOVB BL, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP repeat_end_emit_encodeBlockAsm12BAvx
|
|
|
|
two_byte_offset_short_repeat_as_copy_encodeBlockAsm12BAvx:
|
|
CMPL BX, $0x0c
|
|
JGE emit_copy_three_repeat_as_copy_encodeBlockAsm12BAvx
|
|
CMPL BP, $0x00000800
|
|
JGE emit_copy_three_repeat_as_copy_encodeBlockAsm12BAvx
|
|
MOVB $0x01, DL
|
|
LEAQ -16(DX)(BX*4), BX
|
|
MOVB BP, 1(SI)
|
|
SHRL $0x08, BP
|
|
SHLL $0x05, BP
|
|
ORL BP, BX
|
|
MOVB BL, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP repeat_end_emit_encodeBlockAsm12BAvx
|
|
|
|
emit_copy_three_repeat_as_copy_encodeBlockAsm12BAvx:
|
|
MOVB $0x02, DL
|
|
LEAQ -4(DX)(BX*4), BX
|
|
MOVB BL, (SI)
|
|
MOVW BP, 1(SI)
|
|
ADDQ $0x03, SI
|
|
|
|
repeat_end_emit_encodeBlockAsm12BAvx:
|
|
MOVQ SI, dst_base+0(FP)
|
|
MOVL 16(SP), BX
|
|
CMPL AX, BX
|
|
JGT emit_remainder_encodeBlockAsm12BAvx
|
|
JMP search_loop_encodeBlockAsm12BAvx
|
|
|
|
no_repeat_found_encodeBlockAsm12BAvx:
|
|
MOVQ $0x0000cf1bbcdcbf9b, R8
|
|
MOVQ BP, DI
|
|
SHRQ $0x10, DI
|
|
SHLQ $0x10, DI
|
|
IMULQ R8, DI
|
|
SHRQ $0x34, DI
|
|
CMPL (CX)(BX*1), BP
|
|
SHRQ $0x08, BP
|
|
JEQ candidate_match_encodeBlockAsm12BAvx
|
|
MOVL 32(SP)(DI*1), BX
|
|
CMPL (CX)(SI*1), BP
|
|
JEQ candidate2_match_encodeBlockAsm12BAvx
|
|
LEAQ 2(AX), SI
|
|
MOVL SI, 32(SP)(DI*1)
|
|
SHRQ $0x08, BP
|
|
CMPL (CX)(BX*1), BP
|
|
JEQ candidate3_match_encodeBlockAsm12BAvx
|
|
MOVL 28(SP), AX
|
|
JMP search_loop_encodeBlockAsm12BAvx
|
|
|
|
candidate3_match_encodeBlockAsm12BAvx:
|
|
ADDL $0x02, AX
|
|
JMP candidate_match_encodeBlockAsm12BAvx
|
|
|
|
candidate2_match_encodeBlockAsm12BAvx:
|
|
LEAQ -2(AX), BX
|
|
MOVL BX, 32(SP)(DI*1)
|
|
INCL AX
|
|
MOVL SI, BX
|
|
|
|
candidate_match_encodeBlockAsm12BAvx:
|
|
MOVL 20(SP), BP
|
|
TESTL BX, BX
|
|
JZ match_extend_back_end_encodeBlockAsm12BAvx
|
|
|
|
match_extend_back_loop_encodeBlockAsm12BAvx:
|
|
CMPL AX, BP
|
|
JG match_extend_back_end_encodeBlockAsm12BAvx
|
|
MOVB -1(CX)(BX*1), DL
|
|
MOVB -1(CX)(AX*1), SI
|
|
CMPB DL, SI
|
|
JNE match_extend_back_end_encodeBlockAsm12BAvx
|
|
LEAL -1(AX), AX
|
|
DECL BX
|
|
JZ match_extend_back_end_encodeBlockAsm12BAvx
|
|
JMP match_extend_back_loop_encodeBlockAsm12BAvx
|
|
|
|
match_extend_back_end_encodeBlockAsm12BAvx:
|
|
MOVL AX, BP
|
|
SUBL 20(SP), BP
|
|
LEAQ dst_base+0(FP)(BP*1), BP
|
|
CMPQ BP, (SP)
|
|
JL match_dst_size_check_encodeBlockAsm12BAvx
|
|
MOVQ $0x00000000, ret+48(FP)
|
|
RET
|
|
|
|
match_dst_size_check_encodeBlockAsm12BAvx:
|
|
MOVL BX, BP
|
|
MOVL 20(SP), SI
|
|
CMPL SI, BP
|
|
JEQ emit_literal_skip_match_emit_encodeBlockAsm12BAvx
|
|
MOVL BP, DI
|
|
MOVL BP, 20(SP)
|
|
LEAQ (CX)(SI*1), BP
|
|
SUBL SI, DI
|
|
MOVQ dst_base+0(FP), SI
|
|
MOVQ DI, R8
|
|
SUBL $0x01, R8
|
|
JC emit_literal_done_match_emit_encodeBlockAsm12BAvx
|
|
CMPL R8, $0x3c
|
|
JLT one_byte_match_emit_encodeBlockAsm12BAvx
|
|
CMPL R8, $0x00000100
|
|
JLT two_bytes_match_emit_encodeBlockAsm12BAvx
|
|
CMPL R8, $0x00010000
|
|
JLT three_bytes_match_emit_encodeBlockAsm12BAvx
|
|
CMPL R8, $0x01000000
|
|
JLT four_bytes_match_emit_encodeBlockAsm12BAvx
|
|
MOVB $0xfc, (SI)
|
|
MOVL R8, 1(SI)
|
|
ADDQ $0x05, SI
|
|
JMP memmove_match_emit_encodeBlockAsm12BAvx
|
|
|
|
four_bytes_match_emit_encodeBlockAsm12BAvx:
|
|
MOVQ R8, R9
|
|
SHRL $0x10, R9
|
|
MOVB $0xf8, (SI)
|
|
MOVW R8, 1(SI)
|
|
MOVB R9, 3(SI)
|
|
ADDQ $0x04, SI
|
|
JMP memmove_match_emit_encodeBlockAsm12BAvx
|
|
|
|
three_bytes_match_emit_encodeBlockAsm12BAvx:
|
|
MOVB $0xf4, (SI)
|
|
MOVW R8, 1(SI)
|
|
ADDQ $0x03, SI
|
|
JMP memmove_match_emit_encodeBlockAsm12BAvx
|
|
|
|
two_bytes_match_emit_encodeBlockAsm12BAvx:
|
|
MOVB $0xf0, (SI)
|
|
MOVB R8, 1(SI)
|
|
ADDQ $0x02, SI
|
|
JMP memmove_match_emit_encodeBlockAsm12BAvx
|
|
|
|
one_byte_match_emit_encodeBlockAsm12BAvx:
|
|
SHLB $0x02, R8
|
|
MOVB R8, (SI)
|
|
ADDQ $0x01, SI
|
|
|
|
memmove_match_emit_encodeBlockAsm12BAvx:
|
|
LEAQ (SI)(DI*1), R8
|
|
NOP
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_tail:
|
|
TESTQ DI, DI
|
|
JEQ emit_literal_done_match_emit_encodeBlockAsm12BAvx
|
|
CMPQ DI, $0x02
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_1or2
|
|
CMPQ DI, $0x04
|
|
JB emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_3
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_4
|
|
CMPQ DI, $0x08
|
|
JB emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_5through7
|
|
JE emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_8
|
|
CMPQ DI, $0x10
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_9through16
|
|
CMPQ DI, $0x20
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_17through32
|
|
CMPQ DI, $0x40
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_33through64
|
|
CMPQ DI, $0x80
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_65through128
|
|
CMPQ DI, $0x00000100
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_129through256
|
|
JMP emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_avxUnaligned
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_1or2:
|
|
MOVB (BP), R8
|
|
MOVB -1(BP)(DI*1), R9
|
|
MOVB R8, (SI)
|
|
MOVB R9, -1(SI)(DI*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm12BAvx
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_4:
|
|
MOVL (BP), R8
|
|
MOVL R8, (SI)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm12BAvx
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_3:
|
|
MOVW (BP), R8
|
|
MOVB 2(BP), R9
|
|
MOVW R8, (SI)
|
|
MOVB R9, 2(SI)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm12BAvx
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_5through7:
|
|
MOVL (BP), R8
|
|
MOVL -4(BP)(DI*1), R9
|
|
MOVL R8, (SI)
|
|
MOVL R9, -4(SI)(DI*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm12BAvx
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_8:
|
|
MOVQ (BP), R8
|
|
MOVQ R8, (SI)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm12BAvx
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_9through16:
|
|
MOVQ (BP), R8
|
|
MOVQ -8(BP)(DI*1), R9
|
|
MOVQ R8, (SI)
|
|
MOVQ R9, -8(SI)(DI*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm12BAvx
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_17through32:
|
|
MOVOU (BP), X0
|
|
MOVOU -16(BP)(DI*1), X1
|
|
MOVOU X0, (SI)
|
|
MOVOU X1, -16(SI)(DI*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm12BAvx
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_33through64:
|
|
MOVOU (BP), X0
|
|
MOVOU 16(BP), X1
|
|
MOVOU -32(BP)(DI*1), X2
|
|
MOVOU -16(BP)(DI*1), X3
|
|
MOVOU X0, (SI)
|
|
MOVOU X1, 16(SI)
|
|
MOVOU X2, -32(SI)(DI*1)
|
|
MOVOU X3, -16(SI)(DI*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm12BAvx
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_65through128:
|
|
MOVOU (BP), X0
|
|
MOVOU 16(BP), X1
|
|
MOVOU 32(BP), X2
|
|
MOVOU 48(BP), X3
|
|
MOVOU -64(BP)(DI*1), X12
|
|
MOVOU -48(BP)(DI*1), X13
|
|
MOVOU -32(BP)(DI*1), X14
|
|
MOVOU -16(BP)(DI*1), X15
|
|
MOVOU X0, (SI)
|
|
MOVOU X1, 16(SI)
|
|
MOVOU X2, 32(SI)
|
|
MOVOU X3, 48(SI)
|
|
MOVOU X12, -64(SI)(DI*1)
|
|
MOVOU X13, -48(SI)(DI*1)
|
|
MOVOU X14, -32(SI)(DI*1)
|
|
MOVOU X15, -16(SI)(DI*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm12BAvx
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_129through256:
|
|
MOVOU (BP), X0
|
|
MOVOU 16(BP), X1
|
|
MOVOU 32(BP), X2
|
|
MOVOU 48(BP), X3
|
|
MOVOU 64(BP), X4
|
|
MOVOU 80(BP), X5
|
|
MOVOU 96(BP), X6
|
|
MOVOU 112(BP), X7
|
|
MOVOU -128(BP)(DI*1), X8
|
|
MOVOU -112(BP)(DI*1), X9
|
|
MOVOU -96(BP)(DI*1), X10
|
|
MOVOU -80(BP)(DI*1), X11
|
|
MOVOU -64(BP)(DI*1), X12
|
|
MOVOU -48(BP)(DI*1), X13
|
|
MOVOU -32(BP)(DI*1), X14
|
|
MOVOU -16(BP)(DI*1), X15
|
|
MOVOU X0, (SI)
|
|
MOVOU X1, 16(SI)
|
|
MOVOU X2, 32(SI)
|
|
MOVOU X3, 48(SI)
|
|
MOVOU X4, 64(SI)
|
|
MOVOU X5, 80(SI)
|
|
MOVOU X6, 96(SI)
|
|
MOVOU X7, 112(SI)
|
|
MOVOU X8, -128(SI)(DI*1)
|
|
MOVOU X9, -112(SI)(DI*1)
|
|
MOVOU X10, -96(SI)(DI*1)
|
|
MOVOU X11, -80(SI)(DI*1)
|
|
MOVOU X12, -64(SI)(DI*1)
|
|
MOVOU X13, -48(SI)(DI*1)
|
|
MOVOU X14, -32(SI)(DI*1)
|
|
MOVOU X15, -16(SI)(DI*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm12BAvx
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_256through2048:
|
|
LEAQ -256(DI), DI
|
|
MOVOU (BP), X0
|
|
MOVOU 16(BP), X1
|
|
MOVOU 32(BP), X2
|
|
MOVOU 48(BP), X3
|
|
MOVOU 64(BP), X4
|
|
MOVOU 80(BP), X5
|
|
MOVOU 96(BP), X6
|
|
MOVOU 112(BP), X7
|
|
MOVOU 128(BP), X8
|
|
MOVOU 144(BP), X9
|
|
MOVOU 160(BP), X10
|
|
MOVOU 176(BP), X11
|
|
MOVOU 192(BP), X12
|
|
MOVOU 208(BP), X13
|
|
MOVOU 224(BP), X14
|
|
MOVOU 240(BP), X15
|
|
MOVOU X0, (SI)
|
|
MOVOU X1, 16(SI)
|
|
MOVOU X2, 32(SI)
|
|
MOVOU X3, 48(SI)
|
|
MOVOU X4, 64(SI)
|
|
MOVOU X5, 80(SI)
|
|
MOVOU X6, 96(SI)
|
|
MOVOU X7, 112(SI)
|
|
MOVOU X8, 128(SI)
|
|
MOVOU X9, 144(SI)
|
|
MOVOU X10, 160(SI)
|
|
MOVOU X11, 176(SI)
|
|
MOVOU X12, 192(SI)
|
|
MOVOU X13, 208(SI)
|
|
MOVOU X14, 224(SI)
|
|
MOVOU X15, 240(SI)
|
|
CMPQ DI, $0x00000100
|
|
LEAQ 256(BP), BP
|
|
LEAQ 256(SI), SI
|
|
JGE emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_256through2048
|
|
JMP emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_tail
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_avxUnaligned:
|
|
LEAQ (BP)(DI*1), R9
|
|
MOVQ SI, R11
|
|
MOVOU -128(R9), X5
|
|
MOVOU -112(R9), X6
|
|
MOVQ $0x00000080, R8
|
|
ANDQ $0xffffffe0, SI
|
|
ADDQ $0x20, SI
|
|
MOVOU -96(R9), X7
|
|
MOVOU -80(R9), X8
|
|
MOVQ SI, R10
|
|
SUBQ R11, R10
|
|
MOVOU -64(R9), X9
|
|
MOVOU -48(R9), X10
|
|
SUBQ R10, DI
|
|
MOVOU -32(R9), X11
|
|
MOVOU -16(R9), X12
|
|
VMOVDQU (BP), Y4
|
|
ADDQ R10, BP
|
|
SUBQ R8, DI
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_gobble_128_loop:
|
|
VMOVDQU (BP), Y0
|
|
VMOVDQU 32(BP), Y1
|
|
VMOVDQU 64(BP), Y2
|
|
VMOVDQU 96(BP), Y3
|
|
ADDQ R8, BP
|
|
VMOVDQA Y0, (SI)
|
|
VMOVDQA Y1, 32(SI)
|
|
VMOVDQA Y2, 64(SI)
|
|
VMOVDQA Y3, 96(SI)
|
|
ADDQ R8, SI
|
|
SUBQ R8, DI
|
|
JA emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_gobble_128_loop
|
|
ADDQ R8, DI
|
|
ADDQ SI, DI
|
|
VMOVDQU Y4, (R11)
|
|
VZEROUPPER
|
|
MOVOU X5, -128(DI)
|
|
MOVOU X6, -112(DI)
|
|
MOVOU X7, -96(DI)
|
|
MOVOU X8, -80(DI)
|
|
MOVOU X9, -64(DI)
|
|
MOVOU X10, -48(DI)
|
|
MOVOU X11, -32(DI)
|
|
MOVOU X12, -16(DI)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm12BAvx
|
|
MOVQ R8, SI
|
|
|
|
emit_literal_done_match_emit_encodeBlockAsm12BAvx:
|
|
MOVQ SI, dst_base+0(FP)
|
|
|
|
emit_literal_skip_match_emit_encodeBlockAsm12BAvx:
|
|
NOP
|
|
|
|
match_nolit_loop_encodeBlockAsm12BAvx:
|
|
MOVL AX, BP
|
|
MOVL AX, BP
|
|
SUBL BX, BP
|
|
MOVL BP, 24(SP)
|
|
ADDL $0x04, AX
|
|
ADDL $0x04, BX
|
|
MOVL 16(SP), BP
|
|
SUBL AX, BP
|
|
XORQ DI, DI
|
|
CMPQ BP, $0x08
|
|
JL matchlen_single_match_nolit_encodeBlockAsm12BAvx
|
|
|
|
matchlen_loopback_match_nolit_encodeBlockAsm12BAvx:
|
|
MOVQ (CX)(DI*1), SI
|
|
XORQ (CX)(DI*1), SI
|
|
TESTQ SI, SI
|
|
JZ matchlen_loop_match_nolit_encodeBlockAsm12BAvx
|
|
BSFQ SI, SI
|
|
SARQ $0x03, SI
|
|
LEAQ (DI)(SI*1), DI
|
|
JMP match_nolit_end_encodeBlockAsm12BAvx
|
|
|
|
matchlen_loop_match_nolit_encodeBlockAsm12BAvx:
|
|
LEAQ -8(BP), BP
|
|
LEAQ 8(DI), DI
|
|
CMPQ BP, $0x08
|
|
JGE matchlen_loopback_match_nolit_encodeBlockAsm12BAvx
|
|
|
|
matchlen_single_match_nolit_encodeBlockAsm12BAvx:
|
|
TESTQ BP, BP
|
|
JZ match_nolit_end_encodeBlockAsm12BAvx
|
|
|
|
matchlen_single_loopback_match_nolit_encodeBlockAsm12BAvx:
|
|
MOVB (CX)(DI*1), SI
|
|
CMPB (CX)(DI*1), SI
|
|
JNE match_nolit_end_encodeBlockAsm12BAvx
|
|
LEAQ 1(DI), DI
|
|
DECQ BP
|
|
JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm12BAvx
|
|
|
|
match_nolit_end_encodeBlockAsm12BAvx:
|
|
MOVL 24(SP), BP
|
|
ADDQ $0x04, DI
|
|
MOVQ dst_base+0(FP), SI
|
|
ADDL DI, AX
|
|
CMPL BP, $0x00010000
|
|
JL two_byte_offset_match_nolit_encodeBlockAsm12BAvx
|
|
CMPL DI, $0x40
|
|
JLE four_bytes_remain_match_nolit_encodeBlockAsm12BAvx
|
|
MOVB $0xff, (SI)
|
|
MOVD BP, 1(SI)
|
|
LEAQ -64(DI), DI
|
|
ADDQ $0x05, SI
|
|
CMPL DI, $0x04
|
|
JL four_bytes_remain_match_nolit_encodeBlockAsm12BAvx
|
|
|
|
emit_repeat_again_match_nolit_encodeBlockAsm12BAvx_emit_copy:
|
|
MOVQ DI, R8
|
|
LEAQ -4(DI), DI
|
|
CMPL R8, $0x08
|
|
JLE repeat_two_match_nolit_encodeBlockAsm12BAvx_emit_copy
|
|
CMPL R8, $0x0c
|
|
JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm12BAvx_emit_copy
|
|
CMPL BP, $0x00000800
|
|
JLT repeat_two_offset_match_nolit_encodeBlockAsm12BAvx_emit_copy
|
|
|
|
cant_repeat_two_offset_match_nolit_encodeBlockAsm12BAvx_emit_copy:
|
|
CMPL DI, $0x00000104
|
|
JLT repeat_three_match_nolit_encodeBlockAsm12BAvx_emit_copy
|
|
CMPL DI, $0x00010100
|
|
JLT repeat_four_match_nolit_encodeBlockAsm12BAvx_emit_copy
|
|
CMPL DI, $0x0100ffff
|
|
JLT repeat_five_match_nolit_encodeBlockAsm12BAvx_emit_copy
|
|
LEAQ -16842747(DI), DI
|
|
MOVW $0x001d, (SI)
|
|
MOVW $0xfffb, 2(SI)
|
|
MOVB $0xff, 4(SI)
|
|
ADDQ $0x05, SI
|
|
JMP emit_repeat_again_match_nolit_encodeBlockAsm12BAvx_emit_copy
|
|
|
|
repeat_five_match_nolit_encodeBlockAsm12BAvx_emit_copy:
|
|
LEAQ -65536(DI), DI
|
|
MOVQ DI, BP
|
|
MOVW $0x001d, (SI)
|
|
MOVW DI, 2(SI)
|
|
SARQ $0x10, BP
|
|
MOVB BP, 4(SI)
|
|
ADDQ $0x05, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx
|
|
|
|
repeat_four_match_nolit_encodeBlockAsm12BAvx_emit_copy:
|
|
LEAQ -256(DI), DI
|
|
MOVW $0x0019, (SI)
|
|
MOVW DI, 2(SI)
|
|
ADDQ $0x04, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx
|
|
|
|
repeat_three_match_nolit_encodeBlockAsm12BAvx_emit_copy:
|
|
LEAQ -4(DI), DI
|
|
MOVW $0x0015, (SI)
|
|
MOVB DI, 2(SI)
|
|
ADDQ $0x03, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx
|
|
|
|
repeat_two_match_nolit_encodeBlockAsm12BAvx_emit_copy:
|
|
SHLL $0x02, DI
|
|
ORL $0x01, DI
|
|
MOVW DI, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx
|
|
|
|
repeat_two_offset_match_nolit_encodeBlockAsm12BAvx_emit_copy:
|
|
XORQ R8, R8
|
|
LEAQ 1(R8)(DI*4), DI
|
|
MOVB BP, 1(SI)
|
|
SARL $0x08, BP
|
|
SHLL $0x05, BP
|
|
ORL BP, DI
|
|
MOVB DI, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx
|
|
|
|
four_bytes_remain_match_nolit_encodeBlockAsm12BAvx:
|
|
TESTL DI, DI
|
|
JZ match_nolit_emitcopy_end_encodeBlockAsm12BAvx
|
|
MOVB $0x03, DL
|
|
LEAQ -4(DX)(DI*4), DI
|
|
MOVB DI, (SI)
|
|
MOVD BP, 1(SI)
|
|
ADDQ $0x05, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx
|
|
|
|
two_byte_offset_match_nolit_encodeBlockAsm12BAvx:
|
|
CMPL DI, $0x40
|
|
JLE two_byte_offset_short_match_nolit_encodeBlockAsm12BAvx
|
|
MOVB $0xee, (SI)
|
|
MOVW BP, 1(SI)
|
|
LEAQ -60(DI), DI
|
|
ADDQ $0x03, SI
|
|
|
|
emit_repeat_again_match_nolit_encodeBlockAsm12BAvx_emit_copy_short:
|
|
MOVQ DI, R8
|
|
LEAQ -4(DI), DI
|
|
CMPL R8, $0x08
|
|
JLE repeat_two_match_nolit_encodeBlockAsm12BAvx_emit_copy_short
|
|
CMPL R8, $0x0c
|
|
JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm12BAvx_emit_copy_short
|
|
CMPL BP, $0x00000800
|
|
JLT repeat_two_offset_match_nolit_encodeBlockAsm12BAvx_emit_copy_short
|
|
|
|
cant_repeat_two_offset_match_nolit_encodeBlockAsm12BAvx_emit_copy_short:
|
|
CMPL DI, $0x00000104
|
|
JLT repeat_three_match_nolit_encodeBlockAsm12BAvx_emit_copy_short
|
|
CMPL DI, $0x00010100
|
|
JLT repeat_four_match_nolit_encodeBlockAsm12BAvx_emit_copy_short
|
|
CMPL DI, $0x0100ffff
|
|
JLT repeat_five_match_nolit_encodeBlockAsm12BAvx_emit_copy_short
|
|
LEAQ -16842747(DI), DI
|
|
MOVW $0x001d, (SI)
|
|
MOVW $0xfffb, 2(SI)
|
|
MOVB $0xff, 4(SI)
|
|
ADDQ $0x05, SI
|
|
JMP emit_repeat_again_match_nolit_encodeBlockAsm12BAvx_emit_copy_short
|
|
|
|
repeat_five_match_nolit_encodeBlockAsm12BAvx_emit_copy_short:
|
|
LEAQ -65536(DI), DI
|
|
MOVQ DI, BP
|
|
MOVW $0x001d, (SI)
|
|
MOVW DI, 2(SI)
|
|
SARQ $0x10, BP
|
|
MOVB BP, 4(SI)
|
|
ADDQ $0x05, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx
|
|
|
|
repeat_four_match_nolit_encodeBlockAsm12BAvx_emit_copy_short:
|
|
LEAQ -256(DI), DI
|
|
MOVW $0x0019, (SI)
|
|
MOVW DI, 2(SI)
|
|
ADDQ $0x04, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx
|
|
|
|
repeat_three_match_nolit_encodeBlockAsm12BAvx_emit_copy_short:
|
|
LEAQ -4(DI), DI
|
|
MOVW $0x0015, (SI)
|
|
MOVB DI, 2(SI)
|
|
ADDQ $0x03, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx
|
|
|
|
repeat_two_match_nolit_encodeBlockAsm12BAvx_emit_copy_short:
|
|
SHLL $0x02, DI
|
|
ORL $0x01, DI
|
|
MOVW DI, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx
|
|
|
|
repeat_two_offset_match_nolit_encodeBlockAsm12BAvx_emit_copy_short:
|
|
XORQ R8, R8
|
|
LEAQ 1(R8)(DI*4), DI
|
|
MOVB BP, 1(SI)
|
|
SARL $0x08, BP
|
|
SHLL $0x05, BP
|
|
ORL BP, DI
|
|
MOVB DI, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx
|
|
|
|
two_byte_offset_short_match_nolit_encodeBlockAsm12BAvx:
|
|
CMPL DI, $0x0c
|
|
JGE emit_copy_three_match_nolit_encodeBlockAsm12BAvx
|
|
CMPL BP, $0x00000800
|
|
JGE emit_copy_three_match_nolit_encodeBlockAsm12BAvx
|
|
MOVB $0x01, DL
|
|
LEAQ -16(DX)(DI*4), DI
|
|
MOVB BP, 1(SI)
|
|
SHRL $0x08, BP
|
|
SHLL $0x05, BP
|
|
ORL BP, DI
|
|
MOVB DI, (SI)
|
|
ADDQ $0x02, SI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx
|
|
|
|
emit_copy_three_match_nolit_encodeBlockAsm12BAvx:
|
|
MOVB $0x02, DL
|
|
LEAQ -4(DX)(DI*4), DI
|
|
MOVB DI, (SI)
|
|
MOVW BP, 1(SI)
|
|
ADDQ $0x03, SI
|
|
|
|
match_nolit_emitcopy_end_encodeBlockAsm12BAvx:
|
|
MOVQ SI, dst_base+0(FP)
|
|
MOVL AX, 20(SP)
|
|
CMPL AX, 16(SP)
|
|
JGE emit_remainder_encodeBlockAsm12BAvx
|
|
CMPQ SI, (SP)
|
|
JL match_nolit_dst_ok_encodeBlockAsm12BAvx
|
|
MOVQ $0x00000000, ret+48(FP)
|
|
RET
|
|
|
|
match_nolit_dst_ok_encodeBlockAsm12BAvx:
|
|
MOVQ -2(CX)(AX*1), BP
|
|
MOVQ $0x0000cf1bbcdcbf9b, SI
|
|
MOVQ BP, DI
|
|
SHRQ $0x10, BP
|
|
MOVQ BP, R8
|
|
SHLQ $0x10, DI
|
|
IMULQ SI, DI
|
|
SHRQ $0x34, DI
|
|
SHLQ $0x10, R8
|
|
IMULQ SI, R8
|
|
SHRQ $0x34, R8
|
|
MOVL 32(SP)(DI*1), SI
|
|
MOVL 32(SP)(R8*1), SI
|
|
LEAQ -2(AX), SI
|
|
MOVL SI, 32(SP)(DI*1)
|
|
MOVL AX, 32(SP)(R8*1)
|
|
CMPL (CX)(R8*1), BP
|
|
JEQ match_nolit_loop_encodeBlockAsm12BAvx
|
|
INCL AX
|
|
JMP search_loop_encodeBlockAsm12BAvx
|
|
|
|
emit_remainder_encodeBlockAsm12BAvx:
|
|
MOVQ src_len+32(FP), AX
|
|
SUBL 20(SP), AX
|
|
MOVQ dst_base+0(FP), DX
|
|
LEAQ (DX)(AX*1), DX
|
|
CMPQ DX, (SP)
|
|
JL emit_remainder_ok_encodeBlockAsm12BAvx
|
|
MOVQ $0x00000000, ret+48(FP)
|
|
RET
|
|
|
|
emit_remainder_ok_encodeBlockAsm12BAvx:
|
|
MOVQ src_len+32(FP), AX
|
|
MOVL 20(SP), DX
|
|
CMPL DX, AX
|
|
JEQ emit_literal_skip_emit_remainder_encodeBlockAsm12BAvx
|
|
MOVL AX, BX
|
|
MOVL AX, 20(SP)
|
|
LEAQ (CX)(DX*1), AX
|
|
SUBL DX, BX
|
|
MOVQ dst_base+0(FP), CX
|
|
MOVQ BX, DX
|
|
SUBL $0x01, DX
|
|
JC emit_literal_done_emit_remainder_encodeBlockAsm12BAvx
|
|
CMPL DX, $0x3c
|
|
JLT one_byte_emit_remainder_encodeBlockAsm12BAvx
|
|
CMPL DX, $0x00000100
|
|
JLT two_bytes_emit_remainder_encodeBlockAsm12BAvx
|
|
CMPL DX, $0x00010000
|
|
JLT three_bytes_emit_remainder_encodeBlockAsm12BAvx
|
|
CMPL DX, $0x01000000
|
|
JLT four_bytes_emit_remainder_encodeBlockAsm12BAvx
|
|
MOVB $0xfc, (CX)
|
|
MOVL DX, 1(CX)
|
|
ADDQ $0x05, CX
|
|
JMP memmove_emit_remainder_encodeBlockAsm12BAvx
|
|
|
|
four_bytes_emit_remainder_encodeBlockAsm12BAvx:
|
|
MOVQ DX, BP
|
|
SHRL $0x10, BP
|
|
MOVB $0xf8, (CX)
|
|
MOVW DX, 1(CX)
|
|
MOVB BP, 3(CX)
|
|
ADDQ $0x04, CX
|
|
JMP memmove_emit_remainder_encodeBlockAsm12BAvx
|
|
|
|
three_bytes_emit_remainder_encodeBlockAsm12BAvx:
|
|
MOVB $0xf4, (CX)
|
|
MOVW DX, 1(CX)
|
|
ADDQ $0x03, CX
|
|
JMP memmove_emit_remainder_encodeBlockAsm12BAvx
|
|
|
|
two_bytes_emit_remainder_encodeBlockAsm12BAvx:
|
|
MOVB $0xf0, (CX)
|
|
MOVB DL, 1(CX)
|
|
ADDQ $0x02, CX
|
|
JMP memmove_emit_remainder_encodeBlockAsm12BAvx
|
|
|
|
one_byte_emit_remainder_encodeBlockAsm12BAvx:
|
|
SHLB $0x02, DL
|
|
MOVB DL, (CX)
|
|
ADDQ $0x01, CX
|
|
|
|
memmove_emit_remainder_encodeBlockAsm12BAvx:
|
|
LEAQ (CX)(BX*1), DX
|
|
NOP
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_tail:
|
|
TESTQ BX, BX
|
|
JEQ emit_literal_done_emit_remainder_encodeBlockAsm12BAvx
|
|
CMPQ BX, $0x02
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_1or2
|
|
CMPQ BX, $0x04
|
|
JB emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_3
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_4
|
|
CMPQ BX, $0x08
|
|
JB emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_5through7
|
|
JE emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_8
|
|
CMPQ BX, $0x10
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_9through16
|
|
CMPQ BX, $0x20
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_17through32
|
|
CMPQ BX, $0x40
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_33through64
|
|
CMPQ BX, $0x80
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_65through128
|
|
CMPQ BX, $0x00000100
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_129through256
|
|
JMP emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_avxUnaligned
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_1or2:
|
|
MOVB (AX), DL
|
|
MOVB -1(AX)(BX*1), BP
|
|
MOVB DL, (CX)
|
|
MOVB BP, -1(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm12BAvx
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_4:
|
|
MOVL (AX), DX
|
|
MOVL DX, (CX)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm12BAvx
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_3:
|
|
MOVW (AX), DX
|
|
MOVB 2(AX), BP
|
|
MOVW DX, (CX)
|
|
MOVB BP, 2(CX)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm12BAvx
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_5through7:
|
|
MOVL (AX), DX
|
|
MOVL -4(AX)(BX*1), BP
|
|
MOVL DX, (CX)
|
|
MOVL BP, -4(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm12BAvx
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_8:
|
|
MOVQ (AX), DX
|
|
MOVQ DX, (CX)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm12BAvx
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_9through16:
|
|
MOVQ (AX), DX
|
|
MOVQ -8(AX)(BX*1), BP
|
|
MOVQ DX, (CX)
|
|
MOVQ BP, -8(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm12BAvx
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_17through32:
|
|
MOVOU (AX), X0
|
|
MOVOU -16(AX)(BX*1), X1
|
|
MOVOU X0, (CX)
|
|
MOVOU X1, -16(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm12BAvx
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_33through64:
|
|
MOVOU (AX), X0
|
|
MOVOU 16(AX), X1
|
|
MOVOU -32(AX)(BX*1), X2
|
|
MOVOU -16(AX)(BX*1), X3
|
|
MOVOU X0, (CX)
|
|
MOVOU X1, 16(CX)
|
|
MOVOU X2, -32(CX)(BX*1)
|
|
MOVOU X3, -16(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm12BAvx
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_65through128:
|
|
MOVOU (AX), X0
|
|
MOVOU 16(AX), X1
|
|
MOVOU 32(AX), X2
|
|
MOVOU 48(AX), X3
|
|
MOVOU -64(AX)(BX*1), X12
|
|
MOVOU -48(AX)(BX*1), X13
|
|
MOVOU -32(AX)(BX*1), X14
|
|
MOVOU -16(AX)(BX*1), X15
|
|
MOVOU X0, (CX)
|
|
MOVOU X1, 16(CX)
|
|
MOVOU X2, 32(CX)
|
|
MOVOU X3, 48(CX)
|
|
MOVOU X12, -64(CX)(BX*1)
|
|
MOVOU X13, -48(CX)(BX*1)
|
|
MOVOU X14, -32(CX)(BX*1)
|
|
MOVOU X15, -16(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm12BAvx
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_129through256:
|
|
MOVOU (AX), X0
|
|
MOVOU 16(AX), X1
|
|
MOVOU 32(AX), X2
|
|
MOVOU 48(AX), X3
|
|
MOVOU 64(AX), X4
|
|
MOVOU 80(AX), X5
|
|
MOVOU 96(AX), X6
|
|
MOVOU 112(AX), X7
|
|
MOVOU -128(AX)(BX*1), X8
|
|
MOVOU -112(AX)(BX*1), X9
|
|
MOVOU -96(AX)(BX*1), X10
|
|
MOVOU -80(AX)(BX*1), X11
|
|
MOVOU -64(AX)(BX*1), X12
|
|
MOVOU -48(AX)(BX*1), X13
|
|
MOVOU -32(AX)(BX*1), X14
|
|
MOVOU -16(AX)(BX*1), X15
|
|
MOVOU X0, (CX)
|
|
MOVOU X1, 16(CX)
|
|
MOVOU X2, 32(CX)
|
|
MOVOU X3, 48(CX)
|
|
MOVOU X4, 64(CX)
|
|
MOVOU X5, 80(CX)
|
|
MOVOU X6, 96(CX)
|
|
MOVOU X7, 112(CX)
|
|
MOVOU X8, -128(CX)(BX*1)
|
|
MOVOU X9, -112(CX)(BX*1)
|
|
MOVOU X10, -96(CX)(BX*1)
|
|
MOVOU X11, -80(CX)(BX*1)
|
|
MOVOU X12, -64(CX)(BX*1)
|
|
MOVOU X13, -48(CX)(BX*1)
|
|
MOVOU X14, -32(CX)(BX*1)
|
|
MOVOU X15, -16(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm12BAvx
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_256through2048:
|
|
LEAQ -256(BX), BX
|
|
MOVOU (AX), X0
|
|
MOVOU 16(AX), X1
|
|
MOVOU 32(AX), X2
|
|
MOVOU 48(AX), X3
|
|
MOVOU 64(AX), X4
|
|
MOVOU 80(AX), X5
|
|
MOVOU 96(AX), X6
|
|
MOVOU 112(AX), X7
|
|
MOVOU 128(AX), X8
|
|
MOVOU 144(AX), X9
|
|
MOVOU 160(AX), X10
|
|
MOVOU 176(AX), X11
|
|
MOVOU 192(AX), X12
|
|
MOVOU 208(AX), X13
|
|
MOVOU 224(AX), X14
|
|
MOVOU 240(AX), X15
|
|
MOVOU X0, (CX)
|
|
MOVOU X1, 16(CX)
|
|
MOVOU X2, 32(CX)
|
|
MOVOU X3, 48(CX)
|
|
MOVOU X4, 64(CX)
|
|
MOVOU X5, 80(CX)
|
|
MOVOU X6, 96(CX)
|
|
MOVOU X7, 112(CX)
|
|
MOVOU X8, 128(CX)
|
|
MOVOU X9, 144(CX)
|
|
MOVOU X10, 160(CX)
|
|
MOVOU X11, 176(CX)
|
|
MOVOU X12, 192(CX)
|
|
MOVOU X13, 208(CX)
|
|
MOVOU X14, 224(CX)
|
|
MOVOU X15, 240(CX)
|
|
CMPQ BX, $0x00000100
|
|
LEAQ 256(AX), AX
|
|
LEAQ 256(CX), CX
|
|
JGE emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_256through2048
|
|
JMP emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_tail
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_avxUnaligned:
|
|
LEAQ (AX)(BX*1), BP
|
|
MOVQ CX, DI
|
|
MOVOU -128(BP), X5
|
|
MOVOU -112(BP), X6
|
|
MOVQ $0x00000080, DX
|
|
ANDQ $0xffffffe0, CX
|
|
ADDQ $0x20, CX
|
|
MOVOU -96(BP), X7
|
|
MOVOU -80(BP), X8
|
|
MOVQ CX, SI
|
|
SUBQ DI, SI
|
|
MOVOU -64(BP), X9
|
|
MOVOU -48(BP), X10
|
|
SUBQ SI, BX
|
|
MOVOU -32(BP), X11
|
|
MOVOU -16(BP), X12
|
|
VMOVDQU (AX), Y4
|
|
ADDQ SI, AX
|
|
SUBQ DX, BX
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_gobble_128_loop:
|
|
VMOVDQU (AX), Y0
|
|
VMOVDQU 32(AX), Y1
|
|
VMOVDQU 64(AX), Y2
|
|
VMOVDQU 96(AX), Y3
|
|
ADDQ DX, AX
|
|
VMOVDQA Y0, (CX)
|
|
VMOVDQA Y1, 32(CX)
|
|
VMOVDQA Y2, 64(CX)
|
|
VMOVDQA Y3, 96(CX)
|
|
ADDQ DX, CX
|
|
SUBQ DX, BX
|
|
JA emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_gobble_128_loop
|
|
ADDQ DX, BX
|
|
ADDQ CX, BX
|
|
VMOVDQU Y4, (DI)
|
|
VZEROUPPER
|
|
MOVOU X5, -128(BX)
|
|
MOVOU X6, -112(BX)
|
|
MOVOU X7, -96(BX)
|
|
MOVOU X8, -80(BX)
|
|
MOVOU X9, -64(BX)
|
|
MOVOU X10, -48(BX)
|
|
MOVOU X11, -32(BX)
|
|
MOVOU X12, -16(BX)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm12BAvx
|
|
MOVQ DX, CX
|
|
|
|
emit_literal_done_emit_remainder_encodeBlockAsm12BAvx:
|
|
MOVQ CX, dst_base+0(FP)
|
|
|
|
emit_literal_skip_emit_remainder_encodeBlockAsm12BAvx:
|
|
MOVQ 8(SP), AX
|
|
SUBQ dst_base+0(FP), AX
|
|
MOVQ AX, ret+48(FP)
|
|
RET
|
|
|
|
// func emitLiteral(dst []byte, lit []byte) int
|
|
// Requires: SSE2
|
|
TEXT ·emitLiteral(SB), NOSPLIT, $8-56
|
|
MOVQ dst_base+0(FP), AX
|
|
MOVQ lit_base+24(FP), CX
|
|
MOVQ lit_len+32(FP), DX
|
|
MOVQ DX, BX
|
|
MOVQ DX, BP
|
|
SUBL $0x01, BP
|
|
JC emit_literal_end_standalone
|
|
CMPL BP, $0x3c
|
|
JLT one_byte_standalone
|
|
CMPL BP, $0x00000100
|
|
JLT two_bytes_standalone
|
|
CMPL BP, $0x00010000
|
|
JLT three_bytes_standalone
|
|
CMPL BP, $0x01000000
|
|
JLT four_bytes_standalone
|
|
MOVB $0xfc, (AX)
|
|
MOVL BP, 1(AX)
|
|
ADDQ $0x05, BX
|
|
ADDQ $0x05, AX
|
|
JMP memmove_standalone
|
|
|
|
four_bytes_standalone:
|
|
MOVQ BP, SI
|
|
SHRL $0x10, SI
|
|
MOVB $0xf8, (AX)
|
|
MOVW BP, 1(AX)
|
|
MOVB SI, 3(AX)
|
|
ADDQ $0x04, BX
|
|
ADDQ $0x04, AX
|
|
JMP memmove_standalone
|
|
|
|
three_bytes_standalone:
|
|
MOVB $0xf4, (AX)
|
|
MOVW BP, 1(AX)
|
|
ADDQ $0x03, BX
|
|
ADDQ $0x03, AX
|
|
JMP memmove_standalone
|
|
|
|
two_bytes_standalone:
|
|
MOVB $0xf0, (AX)
|
|
MOVB BP, 1(AX)
|
|
ADDQ $0x02, BX
|
|
ADDQ $0x02, AX
|
|
JMP memmove_standalone
|
|
|
|
one_byte_standalone:
|
|
SHLB $0x02, BP
|
|
MOVB BP, (AX)
|
|
ADDQ $0x01, BX
|
|
ADDQ $0x01, AX
|
|
|
|
memmove_standalone:
|
|
NOP
|
|
|
|
emit_lit_memmove_standalone_memmove_tail:
|
|
TESTQ DX, DX
|
|
JEQ emit_literal_end_standalone
|
|
CMPQ DX, $0x02
|
|
JBE emit_lit_memmove_standalone_memmove_move_1or2
|
|
CMPQ DX, $0x04
|
|
JB emit_lit_memmove_standalone_memmove_move_3
|
|
JBE emit_lit_memmove_standalone_memmove_move_4
|
|
CMPQ DX, $0x08
|
|
JB emit_lit_memmove_standalone_memmove_move_5through7
|
|
JE emit_lit_memmove_standalone_memmove_move_8
|
|
CMPQ DX, $0x10
|
|
JBE emit_lit_memmove_standalone_memmove_move_9through16
|
|
CMPQ DX, $0x20
|
|
JBE emit_lit_memmove_standalone_memmove_move_17through32
|
|
CMPQ DX, $0x40
|
|
JBE emit_lit_memmove_standalone_memmove_move_33through64
|
|
CMPQ DX, $0x80
|
|
JBE emit_lit_memmove_standalone_memmove_move_65through128
|
|
CMPQ DX, $0x00000100
|
|
JBE emit_lit_memmove_standalone_memmove_move_129through256
|
|
JMP emit_lit_memmove_standalone_memmove_move_256through2048
|
|
|
|
emit_lit_memmove_standalone_memmove_move_1or2:
|
|
MOVB (CX), BP
|
|
MOVB -1(CX)(DX*1), CL
|
|
MOVB BP, (AX)
|
|
MOVB CL, -1(AX)(DX*1)
|
|
JMP emit_literal_end_standalone
|
|
|
|
emit_lit_memmove_standalone_memmove_move_4:
|
|
MOVL (CX), BP
|
|
MOVL BP, (AX)
|
|
JMP emit_literal_end_standalone
|
|
|
|
emit_lit_memmove_standalone_memmove_move_3:
|
|
MOVW (CX), BP
|
|
MOVB 2(CX), CL
|
|
MOVW BP, (AX)
|
|
MOVB CL, 2(AX)
|
|
JMP emit_literal_end_standalone
|
|
|
|
emit_lit_memmove_standalone_memmove_move_5through7:
|
|
MOVL (CX), BP
|
|
MOVL -4(CX)(DX*1), CX
|
|
MOVL BP, (AX)
|
|
MOVL CX, -4(AX)(DX*1)
|
|
JMP emit_literal_end_standalone
|
|
|
|
emit_lit_memmove_standalone_memmove_move_8:
|
|
MOVQ (CX), BP
|
|
MOVQ BP, (AX)
|
|
JMP emit_literal_end_standalone
|
|
|
|
emit_lit_memmove_standalone_memmove_move_9through16:
|
|
MOVQ (CX), BP
|
|
MOVQ -8(CX)(DX*1), CX
|
|
MOVQ BP, (AX)
|
|
MOVQ CX, -8(AX)(DX*1)
|
|
JMP emit_literal_end_standalone
|
|
|
|
emit_lit_memmove_standalone_memmove_move_17through32:
|
|
MOVOU (CX), X0
|
|
MOVOU -16(CX)(DX*1), X1
|
|
MOVOU X0, (AX)
|
|
MOVOU X1, -16(AX)(DX*1)
|
|
JMP emit_literal_end_standalone
|
|
|
|
emit_lit_memmove_standalone_memmove_move_33through64:
|
|
MOVOU (CX), X0
|
|
MOVOU 16(CX), X1
|
|
MOVOU -32(CX)(DX*1), X2
|
|
MOVOU -16(CX)(DX*1), X3
|
|
MOVOU X0, (AX)
|
|
MOVOU X1, 16(AX)
|
|
MOVOU X2, -32(AX)(DX*1)
|
|
MOVOU X3, -16(AX)(DX*1)
|
|
JMP emit_literal_end_standalone
|
|
|
|
emit_lit_memmove_standalone_memmove_move_65through128:
|
|
MOVOU (CX), X0
|
|
MOVOU 16(CX), X1
|
|
MOVOU 32(CX), X2
|
|
MOVOU 48(CX), X3
|
|
MOVOU -64(CX)(DX*1), X12
|
|
MOVOU -48(CX)(DX*1), X13
|
|
MOVOU -32(CX)(DX*1), X14
|
|
MOVOU -16(CX)(DX*1), X15
|
|
MOVOU X0, (AX)
|
|
MOVOU X1, 16(AX)
|
|
MOVOU X2, 32(AX)
|
|
MOVOU X3, 48(AX)
|
|
MOVOU X12, -64(AX)(DX*1)
|
|
MOVOU X13, -48(AX)(DX*1)
|
|
MOVOU X14, -32(AX)(DX*1)
|
|
MOVOU X15, -16(AX)(DX*1)
|
|
JMP emit_literal_end_standalone
|
|
|
|
emit_lit_memmove_standalone_memmove_move_129through256:
|
|
MOVOU (CX), X0
|
|
MOVOU 16(CX), X1
|
|
MOVOU 32(CX), X2
|
|
MOVOU 48(CX), X3
|
|
MOVOU 64(CX), X4
|
|
MOVOU 80(CX), X5
|
|
MOVOU 96(CX), X6
|
|
MOVOU 112(CX), X7
|
|
MOVOU -128(CX)(DX*1), X8
|
|
MOVOU -112(CX)(DX*1), X9
|
|
MOVOU -96(CX)(DX*1), X10
|
|
MOVOU -80(CX)(DX*1), X11
|
|
MOVOU -64(CX)(DX*1), X12
|
|
MOVOU -48(CX)(DX*1), X13
|
|
MOVOU -32(CX)(DX*1), X14
|
|
MOVOU -16(CX)(DX*1), X15
|
|
MOVOU X0, (AX)
|
|
MOVOU X1, 16(AX)
|
|
MOVOU X2, 32(AX)
|
|
MOVOU X3, 48(AX)
|
|
MOVOU X4, 64(AX)
|
|
MOVOU X5, 80(AX)
|
|
MOVOU X6, 96(AX)
|
|
MOVOU X7, 112(AX)
|
|
MOVOU X8, -128(AX)(DX*1)
|
|
MOVOU X9, -112(AX)(DX*1)
|
|
MOVOU X10, -96(AX)(DX*1)
|
|
MOVOU X11, -80(AX)(DX*1)
|
|
MOVOU X12, -64(AX)(DX*1)
|
|
MOVOU X13, -48(AX)(DX*1)
|
|
MOVOU X14, -32(AX)(DX*1)
|
|
MOVOU X15, -16(AX)(DX*1)
|
|
JMP emit_literal_end_standalone
|
|
|
|
emit_lit_memmove_standalone_memmove_move_256through2048:
|
|
LEAQ -256(DX), DX
|
|
MOVOU (CX), X0
|
|
MOVOU 16(CX), X1
|
|
MOVOU 32(CX), X2
|
|
MOVOU 48(CX), X3
|
|
MOVOU 64(CX), X4
|
|
MOVOU 80(CX), X5
|
|
MOVOU 96(CX), X6
|
|
MOVOU 112(CX), X7
|
|
MOVOU 128(CX), X8
|
|
MOVOU 144(CX), X9
|
|
MOVOU 160(CX), X10
|
|
MOVOU 176(CX), X11
|
|
MOVOU 192(CX), X12
|
|
MOVOU 208(CX), X13
|
|
MOVOU 224(CX), X14
|
|
MOVOU 240(CX), X15
|
|
MOVOU X0, (AX)
|
|
MOVOU X1, 16(AX)
|
|
MOVOU X2, 32(AX)
|
|
MOVOU X3, 48(AX)
|
|
MOVOU X4, 64(AX)
|
|
MOVOU X5, 80(AX)
|
|
MOVOU X6, 96(AX)
|
|
MOVOU X7, 112(AX)
|
|
MOVOU X8, 128(AX)
|
|
MOVOU X9, 144(AX)
|
|
MOVOU X10, 160(AX)
|
|
MOVOU X11, 176(AX)
|
|
MOVOU X12, 192(AX)
|
|
MOVOU X13, 208(AX)
|
|
MOVOU X14, 224(AX)
|
|
MOVOU X15, 240(AX)
|
|
CMPQ DX, $0x00000100
|
|
LEAQ 256(CX), CX
|
|
LEAQ 256(AX), AX
|
|
JGE emit_lit_memmove_standalone_memmove_move_256through2048
|
|
JMP emit_lit_memmove_standalone_memmove_tail
|
|
|
|
emit_literal_end_standalone:
|
|
MOVQ BX, ret+48(FP)
|
|
RET
|
|
|
|
// func emitLiteralAvx(dst []byte, lit []byte) int
|
|
// Requires: AVX, SSE2
|
|
TEXT ·emitLiteralAvx(SB), NOSPLIT, $8-56
|
|
MOVQ dst_base+0(FP), AX
|
|
MOVQ lit_base+24(FP), CX
|
|
MOVQ lit_len+32(FP), DX
|
|
MOVQ DX, BX
|
|
MOVQ DX, BP
|
|
SUBL $0x01, BP
|
|
JC emit_literal_end_avx_standalone
|
|
CMPL BP, $0x3c
|
|
JLT one_byte_standalone
|
|
CMPL BP, $0x00000100
|
|
JLT two_bytes_standalone
|
|
CMPL BP, $0x00010000
|
|
JLT three_bytes_standalone
|
|
CMPL BP, $0x01000000
|
|
JLT four_bytes_standalone
|
|
MOVB $0xfc, (AX)
|
|
MOVL BP, 1(AX)
|
|
ADDQ $0x05, BX
|
|
ADDQ $0x05, AX
|
|
JMP memmove_standalone
|
|
|
|
four_bytes_standalone:
|
|
MOVQ BP, SI
|
|
SHRL $0x10, SI
|
|
MOVB $0xf8, (AX)
|
|
MOVW BP, 1(AX)
|
|
MOVB SI, 3(AX)
|
|
ADDQ $0x04, BX
|
|
ADDQ $0x04, AX
|
|
JMP memmove_standalone
|
|
|
|
three_bytes_standalone:
|
|
MOVB $0xf4, (AX)
|
|
MOVW BP, 1(AX)
|
|
ADDQ $0x03, BX
|
|
ADDQ $0x03, AX
|
|
JMP memmove_standalone
|
|
|
|
two_bytes_standalone:
|
|
MOVB $0xf0, (AX)
|
|
MOVB BP, 1(AX)
|
|
ADDQ $0x02, BX
|
|
ADDQ $0x02, AX
|
|
JMP memmove_standalone
|
|
|
|
one_byte_standalone:
|
|
SHLB $0x02, BP
|
|
MOVB BP, (AX)
|
|
ADDQ $0x01, BX
|
|
ADDQ $0x01, AX
|
|
|
|
memmove_standalone:
|
|
NOP
|
|
|
|
emit_lit_memmove_standalone_memmove_tail:
|
|
TESTQ DX, DX
|
|
JEQ emit_literal_end_avx_standalone
|
|
CMPQ DX, $0x02
|
|
JBE emit_lit_memmove_standalone_memmove_move_1or2
|
|
CMPQ DX, $0x04
|
|
JB emit_lit_memmove_standalone_memmove_move_3
|
|
JBE emit_lit_memmove_standalone_memmove_move_4
|
|
CMPQ DX, $0x08
|
|
JB emit_lit_memmove_standalone_memmove_move_5through7
|
|
JE emit_lit_memmove_standalone_memmove_move_8
|
|
CMPQ DX, $0x10
|
|
JBE emit_lit_memmove_standalone_memmove_move_9through16
|
|
CMPQ DX, $0x20
|
|
JBE emit_lit_memmove_standalone_memmove_move_17through32
|
|
CMPQ DX, $0x40
|
|
JBE emit_lit_memmove_standalone_memmove_move_33through64
|
|
CMPQ DX, $0x80
|
|
JBE emit_lit_memmove_standalone_memmove_move_65through128
|
|
CMPQ DX, $0x00000100
|
|
JBE emit_lit_memmove_standalone_memmove_move_129through256
|
|
JMP emit_lit_memmove_standalone_memmove_avxUnaligned
|
|
|
|
emit_lit_memmove_standalone_memmove_move_1or2:
|
|
MOVB (CX), BP
|
|
MOVB -1(CX)(DX*1), SI
|
|
MOVB BP, (AX)
|
|
MOVB SI, -1(AX)(DX*1)
|
|
JMP emit_literal_end_avx_standalone
|
|
|
|
emit_lit_memmove_standalone_memmove_move_4:
|
|
MOVL (CX), BP
|
|
MOVL BP, (AX)
|
|
JMP emit_literal_end_avx_standalone
|
|
|
|
emit_lit_memmove_standalone_memmove_move_3:
|
|
MOVW (CX), BP
|
|
MOVB 2(CX), SI
|
|
MOVW BP, (AX)
|
|
MOVB SI, 2(AX)
|
|
JMP emit_literal_end_avx_standalone
|
|
|
|
emit_lit_memmove_standalone_memmove_move_5through7:
|
|
MOVL (CX), BP
|
|
MOVL -4(CX)(DX*1), SI
|
|
MOVL BP, (AX)
|
|
MOVL SI, -4(AX)(DX*1)
|
|
JMP emit_literal_end_avx_standalone
|
|
|
|
emit_lit_memmove_standalone_memmove_move_8:
|
|
MOVQ (CX), BP
|
|
MOVQ BP, (AX)
|
|
JMP emit_literal_end_avx_standalone
|
|
|
|
emit_lit_memmove_standalone_memmove_move_9through16:
|
|
MOVQ (CX), BP
|
|
MOVQ -8(CX)(DX*1), SI
|
|
MOVQ BP, (AX)
|
|
MOVQ SI, -8(AX)(DX*1)
|
|
JMP emit_literal_end_avx_standalone
|
|
|
|
emit_lit_memmove_standalone_memmove_move_17through32:
|
|
MOVOU (CX), X0
|
|
MOVOU -16(CX)(DX*1), X1
|
|
MOVOU X0, (AX)
|
|
MOVOU X1, -16(AX)(DX*1)
|
|
JMP emit_literal_end_avx_standalone
|
|
|
|
emit_lit_memmove_standalone_memmove_move_33through64:
|
|
MOVOU (CX), X0
|
|
MOVOU 16(CX), X1
|
|
MOVOU -32(CX)(DX*1), X2
|
|
MOVOU -16(CX)(DX*1), X3
|
|
MOVOU X0, (AX)
|
|
MOVOU X1, 16(AX)
|
|
MOVOU X2, -32(AX)(DX*1)
|
|
MOVOU X3, -16(AX)(DX*1)
|
|
JMP emit_literal_end_avx_standalone
|
|
|
|
emit_lit_memmove_standalone_memmove_move_65through128:
|
|
MOVOU (CX), X0
|
|
MOVOU 16(CX), X1
|
|
MOVOU 32(CX), X2
|
|
MOVOU 48(CX), X3
|
|
MOVOU -64(CX)(DX*1), X12
|
|
MOVOU -48(CX)(DX*1), X13
|
|
MOVOU -32(CX)(DX*1), X14
|
|
MOVOU -16(CX)(DX*1), X15
|
|
MOVOU X0, (AX)
|
|
MOVOU X1, 16(AX)
|
|
MOVOU X2, 32(AX)
|
|
MOVOU X3, 48(AX)
|
|
MOVOU X12, -64(AX)(DX*1)
|
|
MOVOU X13, -48(AX)(DX*1)
|
|
MOVOU X14, -32(AX)(DX*1)
|
|
MOVOU X15, -16(AX)(DX*1)
|
|
JMP emit_literal_end_avx_standalone
|
|
|
|
emit_lit_memmove_standalone_memmove_move_129through256:
|
|
MOVOU (CX), X0
|
|
MOVOU 16(CX), X1
|
|
MOVOU 32(CX), X2
|
|
MOVOU 48(CX), X3
|
|
MOVOU 64(CX), X4
|
|
MOVOU 80(CX), X5
|
|
MOVOU 96(CX), X6
|
|
MOVOU 112(CX), X7
|
|
MOVOU -128(CX)(DX*1), X8
|
|
MOVOU -112(CX)(DX*1), X9
|
|
MOVOU -96(CX)(DX*1), X10
|
|
MOVOU -80(CX)(DX*1), X11
|
|
MOVOU -64(CX)(DX*1), X12
|
|
MOVOU -48(CX)(DX*1), X13
|
|
MOVOU -32(CX)(DX*1), X14
|
|
MOVOU -16(CX)(DX*1), X15
|
|
MOVOU X0, (AX)
|
|
MOVOU X1, 16(AX)
|
|
MOVOU X2, 32(AX)
|
|
MOVOU X3, 48(AX)
|
|
MOVOU X4, 64(AX)
|
|
MOVOU X5, 80(AX)
|
|
MOVOU X6, 96(AX)
|
|
MOVOU X7, 112(AX)
|
|
MOVOU X8, -128(AX)(DX*1)
|
|
MOVOU X9, -112(AX)(DX*1)
|
|
MOVOU X10, -96(AX)(DX*1)
|
|
MOVOU X11, -80(AX)(DX*1)
|
|
MOVOU X12, -64(AX)(DX*1)
|
|
MOVOU X13, -48(AX)(DX*1)
|
|
MOVOU X14, -32(AX)(DX*1)
|
|
MOVOU X15, -16(AX)(DX*1)
|
|
JMP emit_literal_end_avx_standalone
|
|
|
|
emit_lit_memmove_standalone_memmove_move_256through2048:
|
|
LEAQ -256(DX), DX
|
|
MOVOU (CX), X0
|
|
MOVOU 16(CX), X1
|
|
MOVOU 32(CX), X2
|
|
MOVOU 48(CX), X3
|
|
MOVOU 64(CX), X4
|
|
MOVOU 80(CX), X5
|
|
MOVOU 96(CX), X6
|
|
MOVOU 112(CX), X7
|
|
MOVOU 128(CX), X8
|
|
MOVOU 144(CX), X9
|
|
MOVOU 160(CX), X10
|
|
MOVOU 176(CX), X11
|
|
MOVOU 192(CX), X12
|
|
MOVOU 208(CX), X13
|
|
MOVOU 224(CX), X14
|
|
MOVOU 240(CX), X15
|
|
MOVOU X0, (AX)
|
|
MOVOU X1, 16(AX)
|
|
MOVOU X2, 32(AX)
|
|
MOVOU X3, 48(AX)
|
|
MOVOU X4, 64(AX)
|
|
MOVOU X5, 80(AX)
|
|
MOVOU X6, 96(AX)
|
|
MOVOU X7, 112(AX)
|
|
MOVOU X8, 128(AX)
|
|
MOVOU X9, 144(AX)
|
|
MOVOU X10, 160(AX)
|
|
MOVOU X11, 176(AX)
|
|
MOVOU X12, 192(AX)
|
|
MOVOU X13, 208(AX)
|
|
MOVOU X14, 224(AX)
|
|
MOVOU X15, 240(AX)
|
|
CMPQ DX, $0x00000100
|
|
LEAQ 256(CX), CX
|
|
LEAQ 256(AX), AX
|
|
JGE emit_lit_memmove_standalone_memmove_move_256through2048
|
|
JMP emit_lit_memmove_standalone_memmove_tail
|
|
|
|
emit_lit_memmove_standalone_memmove_avxUnaligned:
|
|
LEAQ (CX)(DX*1), SI
|
|
MOVQ AX, R8
|
|
MOVOU -128(SI), X5
|
|
MOVOU -112(SI), X6
|
|
MOVQ $0x00000080, BP
|
|
ANDQ $0xffffffe0, AX
|
|
ADDQ $0x20, AX
|
|
MOVOU -96(SI), X7
|
|
MOVOU -80(SI), X8
|
|
MOVQ AX, DI
|
|
SUBQ R8, DI
|
|
MOVOU -64(SI), X9
|
|
MOVOU -48(SI), X10
|
|
SUBQ DI, DX
|
|
MOVOU -32(SI), X11
|
|
MOVOU -16(SI), X12
|
|
VMOVDQU (CX), Y4
|
|
ADDQ DI, CX
|
|
SUBQ BP, DX
|
|
|
|
emit_lit_memmove_standalone_memmove_gobble_128_loop:
|
|
VMOVDQU (CX), Y0
|
|
VMOVDQU 32(CX), Y1
|
|
VMOVDQU 64(CX), Y2
|
|
VMOVDQU 96(CX), Y3
|
|
ADDQ BP, CX
|
|
VMOVDQA Y0, (AX)
|
|
VMOVDQA Y1, 32(AX)
|
|
VMOVDQA Y2, 64(AX)
|
|
VMOVDQA Y3, 96(AX)
|
|
ADDQ BP, AX
|
|
SUBQ BP, DX
|
|
JA emit_lit_memmove_standalone_memmove_gobble_128_loop
|
|
ADDQ BP, DX
|
|
ADDQ AX, DX
|
|
VMOVDQU Y4, (R8)
|
|
VZEROUPPER
|
|
MOVOU X5, -128(DX)
|
|
MOVOU X6, -112(DX)
|
|
MOVOU X7, -96(DX)
|
|
MOVOU X8, -80(DX)
|
|
MOVOU X9, -64(DX)
|
|
MOVOU X10, -48(DX)
|
|
MOVOU X11, -32(DX)
|
|
MOVOU X12, -16(DX)
|
|
|
|
emit_literal_end_avx_standalone:
|
|
MOVQ BX, ret+48(FP)
|
|
RET
|
|
|
|
// func emitRepeat(dst []byte, offset int, length int) int
|
|
TEXT ·emitRepeat(SB), NOSPLIT, $8-48
|
|
XORQ BX, BX
|
|
MOVQ dst_base+0(FP), AX
|
|
MOVQ offset+24(FP), CX
|
|
MOVQ length+32(FP), DX
|
|
|
|
emit_repeat_again_standalone:
|
|
MOVQ DX, BP
|
|
LEAQ -4(DX), DX
|
|
CMPL BP, $0x08
|
|
JLE repeat_two_standalone
|
|
CMPL BP, $0x0c
|
|
JGE cant_repeat_two_offset_standalone
|
|
CMPL CX, $0x00000800
|
|
JLT repeat_two_offset_standalone
|
|
|
|
cant_repeat_two_offset_standalone:
|
|
CMPL DX, $0x00000104
|
|
JLT repeat_three_standalone
|
|
CMPL DX, $0x00010100
|
|
JLT repeat_four_standalone
|
|
CMPL DX, $0x0100ffff
|
|
JLT repeat_five_standalone
|
|
LEAQ -16842747(DX), DX
|
|
MOVW $0x001d, (AX)
|
|
MOVW $0xfffb, 2(AX)
|
|
MOVB $0xff, 4(AX)
|
|
ADDQ $0x05, AX
|
|
ADDQ $0x05, BX
|
|
JMP emit_repeat_again_standalone
|
|
|
|
repeat_five_standalone:
|
|
LEAQ -65536(DX), DX
|
|
MOVQ DX, CX
|
|
MOVW $0x001d, (AX)
|
|
MOVW DX, 2(AX)
|
|
SARQ $0x10, CX
|
|
MOVB CL, 4(AX)
|
|
ADDQ $0x05, BX
|
|
ADDQ $0x05, AX
|
|
JMP gen_emit_repeat_end
|
|
|
|
repeat_four_standalone:
|
|
LEAQ -256(DX), DX
|
|
MOVW $0x0019, (AX)
|
|
MOVW DX, 2(AX)
|
|
ADDQ $0x04, BX
|
|
ADDQ $0x04, AX
|
|
JMP gen_emit_repeat_end
|
|
|
|
repeat_three_standalone:
|
|
LEAQ -4(DX), DX
|
|
MOVW $0x0015, (AX)
|
|
MOVB DL, 2(AX)
|
|
ADDQ $0x03, BX
|
|
ADDQ $0x03, AX
|
|
JMP gen_emit_repeat_end
|
|
|
|
repeat_two_standalone:
|
|
SHLL $0x02, DX
|
|
ORL $0x01, DX
|
|
MOVW DX, (AX)
|
|
ADDQ $0x02, BX
|
|
ADDQ $0x02, AX
|
|
JMP gen_emit_repeat_end
|
|
|
|
repeat_two_offset_standalone:
|
|
XORQ BP, BP
|
|
LEAQ 1(BP)(DX*4), DX
|
|
MOVB CL, 1(AX)
|
|
SARL $0x08, CX
|
|
SHLL $0x05, CX
|
|
ORL CX, DX
|
|
MOVB DL, (AX)
|
|
ADDQ $0x02, BX
|
|
ADDQ $0x02, AX
|
|
|
|
gen_emit_repeat_end:
|
|
MOVQ BX, ret+40(FP)
|
|
RET
|
|
|
|
// func emitCopy(dst []byte, offset int, length int) int
|
|
TEXT ·emitCopy(SB), NOSPLIT, $8-48
|
|
XORQ BX, BX
|
|
MOVQ dst_base+0(FP), AX
|
|
MOVQ offset+24(FP), CX
|
|
MOVQ length+32(FP), DX
|
|
CMPL CX, $0x00010000
|
|
JL two_byte_offset_standalone
|
|
CMPL DX, $0x40
|
|
JLE four_bytes_remain_standalone
|
|
MOVB $0xff, (AX)
|
|
MOVD CX, 1(AX)
|
|
LEAQ -64(DX), DX
|
|
ADDQ $0x05, BX
|
|
ADDQ $0x05, AX
|
|
CMPL DX, $0x04
|
|
JL four_bytes_remain_standalone
|
|
|
|
emit_repeat_again_standalone_emit_copy:
|
|
MOVQ DX, BP
|
|
LEAQ -4(DX), DX
|
|
CMPL BP, $0x08
|
|
JLE repeat_two_standalone_emit_copy
|
|
CMPL BP, $0x0c
|
|
JGE cant_repeat_two_offset_standalone_emit_copy
|
|
CMPL CX, $0x00000800
|
|
JLT repeat_two_offset_standalone_emit_copy
|
|
|
|
cant_repeat_two_offset_standalone_emit_copy:
|
|
CMPL DX, $0x00000104
|
|
JLT repeat_three_standalone_emit_copy
|
|
CMPL DX, $0x00010100
|
|
JLT repeat_four_standalone_emit_copy
|
|
CMPL DX, $0x0100ffff
|
|
JLT repeat_five_standalone_emit_copy
|
|
LEAQ -16842747(DX), DX
|
|
MOVW $0x001d, (AX)
|
|
MOVW $0xfffb, 2(AX)
|
|
MOVB $0xff, 4(AX)
|
|
ADDQ $0x05, AX
|
|
ADDQ $0x05, BX
|
|
JMP emit_repeat_again_standalone_emit_copy
|
|
|
|
repeat_five_standalone_emit_copy:
|
|
LEAQ -65536(DX), DX
|
|
MOVQ DX, CX
|
|
MOVW $0x001d, (AX)
|
|
MOVW DX, 2(AX)
|
|
SARQ $0x10, CX
|
|
MOVB CL, 4(AX)
|
|
ADDQ $0x05, BX
|
|
ADDQ $0x05, AX
|
|
JMP gen_emit_copy_end
|
|
|
|
repeat_four_standalone_emit_copy:
|
|
LEAQ -256(DX), DX
|
|
MOVW $0x0019, (AX)
|
|
MOVW DX, 2(AX)
|
|
ADDQ $0x04, BX
|
|
ADDQ $0x04, AX
|
|
JMP gen_emit_copy_end
|
|
|
|
repeat_three_standalone_emit_copy:
|
|
LEAQ -4(DX), DX
|
|
MOVW $0x0015, (AX)
|
|
MOVB DL, 2(AX)
|
|
ADDQ $0x03, BX
|
|
ADDQ $0x03, AX
|
|
JMP gen_emit_copy_end
|
|
|
|
repeat_two_standalone_emit_copy:
|
|
SHLL $0x02, DX
|
|
ORL $0x01, DX
|
|
MOVW DX, (AX)
|
|
ADDQ $0x02, BX
|
|
ADDQ $0x02, AX
|
|
JMP gen_emit_copy_end
|
|
|
|
repeat_two_offset_standalone_emit_copy:
|
|
XORQ BP, BP
|
|
LEAQ 1(BP)(DX*4), DX
|
|
MOVB CL, 1(AX)
|
|
SARL $0x08, CX
|
|
SHLL $0x05, CX
|
|
ORL CX, DX
|
|
MOVB DL, (AX)
|
|
ADDQ $0x02, BX
|
|
ADDQ $0x02, AX
|
|
JMP gen_emit_copy_end
|
|
|
|
four_bytes_remain_standalone:
|
|
TESTL DX, DX
|
|
JZ gen_emit_copy_end
|
|
MOVB $0x03, BP
|
|
LEAQ -4(BP)(DX*4), DX
|
|
MOVB DL, (AX)
|
|
MOVD CX, 1(AX)
|
|
ADDQ $0x05, BX
|
|
ADDQ $0x05, AX
|
|
JMP gen_emit_copy_end
|
|
|
|
two_byte_offset_standalone:
|
|
CMPL DX, $0x40
|
|
JLE two_byte_offset_short_standalone
|
|
MOVB $0xee, (AX)
|
|
MOVW CX, 1(AX)
|
|
LEAQ -60(DX), DX
|
|
ADDQ $0x03, AX
|
|
ADDQ $0x03, BX
|
|
|
|
emit_repeat_again_standalone_emit_copy_short:
|
|
MOVQ DX, BP
|
|
LEAQ -4(DX), DX
|
|
CMPL BP, $0x08
|
|
JLE repeat_two_standalone_emit_copy_short
|
|
CMPL BP, $0x0c
|
|
JGE cant_repeat_two_offset_standalone_emit_copy_short
|
|
CMPL CX, $0x00000800
|
|
JLT repeat_two_offset_standalone_emit_copy_short
|
|
|
|
cant_repeat_two_offset_standalone_emit_copy_short:
|
|
CMPL DX, $0x00000104
|
|
JLT repeat_three_standalone_emit_copy_short
|
|
CMPL DX, $0x00010100
|
|
JLT repeat_four_standalone_emit_copy_short
|
|
CMPL DX, $0x0100ffff
|
|
JLT repeat_five_standalone_emit_copy_short
|
|
LEAQ -16842747(DX), DX
|
|
MOVW $0x001d, (AX)
|
|
MOVW $0xfffb, 2(AX)
|
|
MOVB $0xff, 4(AX)
|
|
ADDQ $0x05, AX
|
|
ADDQ $0x05, BX
|
|
JMP emit_repeat_again_standalone_emit_copy_short
|
|
|
|
repeat_five_standalone_emit_copy_short:
|
|
LEAQ -65536(DX), DX
|
|
MOVQ DX, CX
|
|
MOVW $0x001d, (AX)
|
|
MOVW DX, 2(AX)
|
|
SARQ $0x10, CX
|
|
MOVB CL, 4(AX)
|
|
ADDQ $0x05, BX
|
|
ADDQ $0x05, AX
|
|
JMP gen_emit_copy_end
|
|
|
|
repeat_four_standalone_emit_copy_short:
|
|
LEAQ -256(DX), DX
|
|
MOVW $0x0019, (AX)
|
|
MOVW DX, 2(AX)
|
|
ADDQ $0x04, BX
|
|
ADDQ $0x04, AX
|
|
JMP gen_emit_copy_end
|
|
|
|
repeat_three_standalone_emit_copy_short:
|
|
LEAQ -4(DX), DX
|
|
MOVW $0x0015, (AX)
|
|
MOVB DL, 2(AX)
|
|
ADDQ $0x03, BX
|
|
ADDQ $0x03, AX
|
|
JMP gen_emit_copy_end
|
|
|
|
repeat_two_standalone_emit_copy_short:
|
|
SHLL $0x02, DX
|
|
ORL $0x01, DX
|
|
MOVW DX, (AX)
|
|
ADDQ $0x02, BX
|
|
ADDQ $0x02, AX
|
|
JMP gen_emit_copy_end
|
|
|
|
repeat_two_offset_standalone_emit_copy_short:
|
|
XORQ BP, BP
|
|
LEAQ 1(BP)(DX*4), DX
|
|
MOVB CL, 1(AX)
|
|
SARL $0x08, CX
|
|
SHLL $0x05, CX
|
|
ORL CX, DX
|
|
MOVB DL, (AX)
|
|
ADDQ $0x02, BX
|
|
ADDQ $0x02, AX
|
|
JMP gen_emit_copy_end
|
|
|
|
two_byte_offset_short_standalone:
|
|
CMPL DX, $0x0c
|
|
JGE emit_copy_three_standalone
|
|
CMPL CX, $0x00000800
|
|
JGE emit_copy_three_standalone
|
|
MOVB $0x01, BP
|
|
LEAQ -16(BP)(DX*4), DX
|
|
MOVB CL, 1(AX)
|
|
SHRL $0x08, CX
|
|
SHLL $0x05, CX
|
|
ORL CX, DX
|
|
MOVB DL, (AX)
|
|
ADDQ $0x02, BX
|
|
ADDQ $0x02, AX
|
|
JMP gen_emit_copy_end
|
|
|
|
emit_copy_three_standalone:
|
|
MOVB $0x02, BP
|
|
LEAQ -4(BP)(DX*4), DX
|
|
MOVB DL, (AX)
|
|
MOVW CX, 1(AX)
|
|
ADDQ $0x03, BX
|
|
ADDQ $0x03, AX
|
|
|
|
gen_emit_copy_end:
|
|
MOVQ BX, ret+40(FP)
|
|
RET
|
|
|
|
// func matchLen(a []byte, b []byte) int
|
|
TEXT ·matchLen(SB), NOSPLIT, $8-56
|
|
MOVQ a_base+0(FP), AX
|
|
MOVQ b_base+24(FP), CX
|
|
MOVQ a_len+8(FP), DX
|
|
XORQ BP, BP
|
|
CMPQ DX, $0x08
|
|
JL matchlen_single_standalone
|
|
|
|
matchlen_loopback_standalone:
|
|
MOVQ (AX)(BP*1), BX
|
|
XORQ (CX)(BP*1), BX
|
|
TESTQ BX, BX
|
|
JZ matchlen_loop_standalone
|
|
BSFQ BX, BX
|
|
SARQ $0x03, BX
|
|
LEAQ (BP)(BX*1), BP
|
|
JMP gen_match_len_end
|
|
|
|
matchlen_loop_standalone:
|
|
LEAQ -8(DX), DX
|
|
LEAQ 8(BP), BP
|
|
CMPQ DX, $0x08
|
|
JGE matchlen_loopback_standalone
|
|
|
|
matchlen_single_standalone:
|
|
TESTQ DX, DX
|
|
JZ gen_match_len_end
|
|
|
|
matchlen_single_loopback_standalone:
|
|
MOVB (AX)(BP*1), BL
|
|
CMPB (CX)(BP*1), BL
|
|
JNE gen_match_len_end
|
|
LEAQ 1(BP), BP
|
|
DECQ DX
|
|
JNZ matchlen_single_loopback_standalone
|
|
|
|
gen_match_len_end:
|
|
MOVQ BP, ret+48(FP)
|
|
RET
|