Files
2022-04-10 21:03:05 -07:00

9824 lines
243 KiB
ArmAsm

// Code generated by command: go run asm.go -out allocfail.s -stubs stubs.go. DO NOT EDIT.
//go:build !appengine && !noasm && gc
#include "textflag.h"
// func encodeBlockAsm(dst []byte, src []byte) int
// Requires: SSE2
TEXT ·encodeBlockAsm(SB), $65568-56
MOVQ $0x00000200, AX
LEAQ 32(SP), CX
PXOR X0, X0
zero_loop_encodeBlockAsm:
MOVOU X0, (CX)
MOVOU X0, 16(CX)
MOVOU X0, 32(CX)
MOVOU X0, 48(CX)
MOVOU X0, 64(CX)
MOVOU X0, 80(CX)
MOVOU X0, 96(CX)
MOVOU X0, 112(CX)
ADDQ $0x80, CX
DECQ AX
JNZ zero_loop_encodeBlockAsm
MOVL AX, 20(SP)
MOVQ src_len+32(FP), AX
LEAQ -5(AX), CX
LEAQ -8(AX), BX
SHRQ $0x05, AX
SUBL AX, CX
MOVL BX, 16(SP)
MOVQ dst_base+0(FP), AX
MOVQ AX, 8(SP)
LEAQ (AX)(CX*1), CX
MOVQ CX, (SP)
MOVL $0x00000001, AX
MOVL AX, 24(SP)
MOVQ src_base+24(FP), CX
search_loop_encodeBlockAsm:
MOVQ (CX)(AX*1), SI
MOVL AX, BX
SUBL 20(SP), BX
SHRL $0x06, BX
LEAQ 4(AX)(BX*1), BX
MOVL 16(SP), DI
CMPL BX, DI
JGT emit_remainder_encodeBlockAsm
MOVL BX, 28(SP)
MOVQ $0x0000cf1bbcdcbf9b, BX
MOVQ SI, R8
MOVQ SI, R9
SHRQ $0x08, R9
SHLQ $0x10, R8
IMULQ BX, R8
SHRQ $0x30, R8
SHLQ $0x10, R9
IMULQ BX, R9
SHRQ $0x30, R9
MOVL 32(SP)(R8*1), BX
MOVL 32(SP)(R9*1), DI
MOVL AX, 32(SP)(R8*1)
LEAL 1(AX), R8
MOVL R8, 32(SP)(R9*1)
MOVL AX, R8
SUBL 24(SP), R8
MOVL 1(CX)(R8*1), R10
MOVQ SI, R9
SHLQ $0x08, R9
CMPL R9, R10
JNE no_repeat_found_encodeBlockAsm
LEAQ 1(AX), SI
MOVL 20(SP), BX
TESTL R8, R8
JZ repeat_extend_back_end_encodeBlockAsm
repeat_extend_back_loop_encodeBlockAsm:
CMPL SI, BX
JG repeat_extend_back_end_encodeBlockAsm
MOVB -1(CX)(R8*1), DL
MOVB -1(CX)(SI*1), DI
CMPB DL, DI
JNE repeat_extend_back_end_encodeBlockAsm
LEAQ -1(SI), SI
DECL R8
JZ repeat_extend_back_end_encodeBlockAsm
JMP repeat_extend_back_loop_encodeBlockAsm
repeat_extend_back_end_encodeBlockAsm:
MOVL 20(SP), BX
CMPL BX, SI
JEQ emit_literal_skip_repeat_emit_encodeBlockAsm
MOVL SI, DI
MOVL SI, 20(SP)
LEAQ (CX)(BX*1), R8
SUBL BX, DI
MOVQ dst_base+0(FP), BX
MOVQ DI, R9
SUBL $0x01, R9
JC emit_literal_done_repeat_emit_encodeBlockAsm
CMPL R9, $0x3c
JLT one_byte_repeat_emit_encodeBlockAsm
CMPL R9, $0x00000100
JLT two_bytes_repeat_emit_encodeBlockAsm
CMPL R9, $0x00010000
JLT three_bytes_repeat_emit_encodeBlockAsm
CMPL R9, $0x01000000
JLT four_bytes_repeat_emit_encodeBlockAsm
MOVB $0xfc, (BX)
MOVL R9, 1(BX)
ADDQ $0x05, BX
JMP memmove_repeat_emit_encodeBlockAsm
four_bytes_repeat_emit_encodeBlockAsm:
MOVQ R9, R10
SHRL $0x10, R10
MOVB $0xf8, (BX)
MOVW R9, 1(BX)
MOVB R10, 3(BX)
ADDQ $0x04, BX
JMP memmove_repeat_emit_encodeBlockAsm
three_bytes_repeat_emit_encodeBlockAsm:
MOVB $0xf4, (BX)
MOVW R9, 1(BX)
ADDQ $0x03, BX
JMP memmove_repeat_emit_encodeBlockAsm
two_bytes_repeat_emit_encodeBlockAsm:
MOVB $0xf0, (BX)
MOVB R9, 1(BX)
ADDQ $0x02, BX
JMP memmove_repeat_emit_encodeBlockAsm
one_byte_repeat_emit_encodeBlockAsm:
SHLB $0x02, R9
MOVB R9, (BX)
ADDQ $0x01, BX
memmove_repeat_emit_encodeBlockAsm:
LEAQ (BX)(DI*1), R9
NOP
emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_tail:
TESTQ DI, DI
JEQ emit_literal_done_repeat_emit_encodeBlockAsm
CMPQ DI, $0x02
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_1or2
CMPQ DI, $0x04
JB emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_3
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_4
CMPQ DI, $0x08
JB emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_5through7
JE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8
CMPQ DI, $0x10
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_9through16
CMPQ DI, $0x20
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_17through32
CMPQ DI, $0x40
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_33through64
CMPQ DI, $0x80
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_65through128
CMPQ DI, $0x00000100
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_129through256
JMP emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_256through2048
emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_1or2:
MOVB (R8), R9
MOVB -1(R8)(DI*1), R8
MOVB R9, (BX)
MOVB R8, -1(BX)(DI*1)
JMP emit_literal_done_repeat_emit_encodeBlockAsm
emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_4:
MOVL (R8), R9
MOVL R9, (BX)
JMP emit_literal_done_repeat_emit_encodeBlockAsm
emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_3:
MOVW (R8), R9
MOVB 2(R8), R8
MOVW R9, (BX)
MOVB R8, 2(BX)
JMP emit_literal_done_repeat_emit_encodeBlockAsm
emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_5through7:
MOVL (R8), R9
MOVL -4(R8)(DI*1), R8
MOVL R9, (BX)
MOVL R8, -4(BX)(DI*1)
JMP emit_literal_done_repeat_emit_encodeBlockAsm
emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8:
MOVQ (R8), R9
MOVQ R9, (BX)
JMP emit_literal_done_repeat_emit_encodeBlockAsm
emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_9through16:
MOVQ (R8), R9
MOVQ -8(R8)(DI*1), R8
MOVQ R9, (BX)
MOVQ R8, -8(BX)(DI*1)
JMP emit_literal_done_repeat_emit_encodeBlockAsm
emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_17through32:
MOVOU (R8), X0
MOVOU -16(R8)(DI*1), X1
MOVOU X0, (BX)
MOVOU X1, -16(BX)(DI*1)
JMP emit_literal_done_repeat_emit_encodeBlockAsm
emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_33through64:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(DI*1), X2
MOVOU -16(R8)(DI*1), X3
MOVOU X0, (BX)
MOVOU X1, 16(BX)
MOVOU X2, -32(BX)(DI*1)
MOVOU X3, -16(BX)(DI*1)
JMP emit_literal_done_repeat_emit_encodeBlockAsm
emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_65through128:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU 32(R8), X2
MOVOU 48(R8), X3
MOVOU -64(R8)(DI*1), X12
MOVOU -48(R8)(DI*1), X13
MOVOU -32(R8)(DI*1), X14
MOVOU -16(R8)(DI*1), X15
MOVOU X0, (BX)
MOVOU X1, 16(BX)
MOVOU X2, 32(BX)
MOVOU X3, 48(BX)
MOVOU X12, -64(BX)(DI*1)
MOVOU X13, -48(BX)(DI*1)
MOVOU X14, -32(BX)(DI*1)
MOVOU X15, -16(BX)(DI*1)
JMP emit_literal_done_repeat_emit_encodeBlockAsm
emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_129through256:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU 32(R8), X2
MOVOU 48(R8), X3
MOVOU 64(R8), X4
MOVOU 80(R8), X5
MOVOU 96(R8), X6
MOVOU 112(R8), X7
MOVOU -128(R8)(DI*1), X8
MOVOU -112(R8)(DI*1), X9
MOVOU -96(R8)(DI*1), X10
MOVOU -80(R8)(DI*1), X11
MOVOU -64(R8)(DI*1), X12
MOVOU -48(R8)(DI*1), X13
MOVOU -32(R8)(DI*1), X14
MOVOU -16(R8)(DI*1), X15
MOVOU X0, (BX)
MOVOU X1, 16(BX)
MOVOU X2, 32(BX)
MOVOU X3, 48(BX)
MOVOU X4, 64(BX)
MOVOU X5, 80(BX)
MOVOU X6, 96(BX)
MOVOU X7, 112(BX)
MOVOU X8, -128(BX)(DI*1)
MOVOU X9, -112(BX)(DI*1)
MOVOU X10, -96(BX)(DI*1)
MOVOU X11, -80(BX)(DI*1)
MOVOU X12, -64(BX)(DI*1)
MOVOU X13, -48(BX)(DI*1)
MOVOU X14, -32(BX)(DI*1)
MOVOU X15, -16(BX)(DI*1)
JMP emit_literal_done_repeat_emit_encodeBlockAsm
emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_256through2048:
LEAQ -256(DI), DI
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU 32(R8), X2
MOVOU 48(R8), X3
MOVOU 64(R8), X4
MOVOU 80(R8), X5
MOVOU 96(R8), X6
MOVOU 112(R8), X7
MOVOU 128(R8), X8
MOVOU 144(R8), X9
MOVOU 160(R8), X10
MOVOU 176(R8), X11
MOVOU 192(R8), X12
MOVOU 208(R8), X13
MOVOU 224(R8), X14
MOVOU 240(R8), X15
MOVOU X0, (BX)
MOVOU X1, 16(BX)
MOVOU X2, 32(BX)
MOVOU X3, 48(BX)
MOVOU X4, 64(BX)
MOVOU X5, 80(BX)
MOVOU X6, 96(BX)
MOVOU X7, 112(BX)
MOVOU X8, 128(BX)
MOVOU X9, 144(BX)
MOVOU X10, 160(BX)
MOVOU X11, 176(BX)
MOVOU X12, 192(BX)
MOVOU X13, 208(BX)
MOVOU X14, 224(BX)
MOVOU X15, 240(BX)
CMPQ DI, $0x00000100
LEAQ 256(R8), R8
LEAQ 256(BX), BX
JGE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_256through2048
JMP emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_tail
MOVQ R9, BX
emit_literal_done_repeat_emit_encodeBlockAsm:
MOVQ BX, dst_base+0(FP)
emit_literal_skip_repeat_emit_encodeBlockAsm:
ADDL $0x05, AX
MOVL AX, BX
SUBL 24(SP), BX
MOVL 16(SP), BX
SUBL AX, BX
XORQ R8, R8
CMPQ BX, $0x08
JL matchlen_single_repeat_extend
matchlen_loopback_repeat_extend:
MOVQ (CX)(R8*1), DI
XORQ (CX)(R8*1), DI
TESTQ DI, DI
JZ matchlen_loop_repeat_extend
BSFQ DI, DI
SARQ $0x03, DI
LEAQ (R8)(DI*1), R8
JMP repeat_extend_forward_end_encodeBlockAsm
matchlen_loop_repeat_extend:
LEAQ -8(BX), BX
LEAQ 8(R8), R8
CMPQ BX, $0x08
JGE matchlen_loopback_repeat_extend
matchlen_single_repeat_extend:
TESTQ BX, BX
JZ repeat_extend_forward_end_encodeBlockAsm
matchlen_single_loopback_repeat_extend:
MOVB (CX)(R8*1), DI
CMPB (CX)(R8*1), DI
JNE repeat_extend_forward_end_encodeBlockAsm
LEAQ 1(R8), R8
DECQ BX
JNZ matchlen_single_loopback_repeat_extend
repeat_extend_forward_end_encodeBlockAsm:
ADDL R8, AX
MOVL AX, BX
SUBL SI, BX
MOVL 24(SP), SI
MOVQ dst_base+0(FP), DI
MOVL 20(SP), R8
TESTL R8, R8
JZ repeat_as_copy_encodeBlockAsm
emit_repeat_again_match_repeat_:
MOVQ BX, R8
LEAQ -4(BX), BX
CMPL R8, $0x08
JLE repeat_two_match_repeat_
CMPL R8, $0x0c
JGE cant_repeat_two_offset_match_repeat_
CMPL SI, $0x00000800
JLT repeat_two_offset_match_repeat_
cant_repeat_two_offset_match_repeat_:
CMPL BX, $0x00000104
JLT repeat_three_match_repeat_
CMPL BX, $0x00010100
JLT repeat_four_match_repeat_
CMPL BX, $0x0100ffff
JLT repeat_five_match_repeat_
LEAQ -16842747(BX), BX
MOVW $0x001d, (DI)
MOVW $0xfffb, 2(DI)
MOVB $0xff, 4(DI)
ADDQ $0x05, DI
JMP emit_repeat_again_match_repeat_
repeat_five_match_repeat_:
LEAQ -65536(BX), BX
MOVQ BX, SI
MOVW $0x001d, (DI)
MOVW BX, 2(DI)
SARQ $0x10, SI
MOVB SI, 4(DI)
ADDQ $0x05, DI
JMP repeat_end_emit_encodeBlockAsm
repeat_four_match_repeat_:
LEAQ -256(BX), BX
MOVW $0x0019, (DI)
MOVW BX, 2(DI)
ADDQ $0x04, DI
JMP repeat_end_emit_encodeBlockAsm
repeat_three_match_repeat_:
LEAQ -4(BX), BX
MOVW $0x0015, (DI)
MOVB BL, 2(DI)
ADDQ $0x03, DI
JMP repeat_end_emit_encodeBlockAsm
repeat_two_match_repeat_:
SHLL $0x02, BX
ORL $0x01, BX
MOVW BX, (DI)
ADDQ $0x02, DI
JMP repeat_end_emit_encodeBlockAsm
repeat_two_offset_match_repeat_:
XORQ R8, R8
LEAQ 1(R8)(BX*4), BX
MOVB SI, 1(DI)
SARL $0x08, SI
SHLL $0x05, SI
ORL SI, BX
MOVB BL, (DI)
ADDQ $0x02, DI
JMP repeat_end_emit_encodeBlockAsm
repeat_as_copy_encodeBlockAsm:
CMPL SI, $0x00010000
JL two_byte_offset_repeat_as_copy_encodeBlockAsm
CMPL BX, $0x40
JLE four_bytes_remain_repeat_as_copy_encodeBlockAsm
MOVB $0xff, (DI)
MOVD SI, 1(DI)
LEAQ -64(BX), BX
ADDQ $0x05, DI
CMPL BX, $0x04
JL four_bytes_remain_repeat_as_copy_encodeBlockAsm
emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy:
MOVQ BX, R8
LEAQ -4(BX), BX
CMPL R8, $0x08
JLE repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy
CMPL R8, $0x0c
JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy
CMPL SI, $0x00000800
JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy
cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy:
CMPL BX, $0x00000104
JLT repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy
CMPL BX, $0x00010100
JLT repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy
CMPL BX, $0x0100ffff
JLT repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy
LEAQ -16842747(BX), BX
MOVW $0x001d, (DI)
MOVW $0xfffb, 2(DI)
MOVB $0xff, 4(DI)
ADDQ $0x05, DI
JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy
repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy:
LEAQ -65536(BX), BX
MOVQ BX, SI
MOVW $0x001d, (DI)
MOVW BX, 2(DI)
SARQ $0x10, SI
MOVB SI, 4(DI)
ADDQ $0x05, DI
JMP repeat_end_emit_encodeBlockAsm
repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy:
LEAQ -256(BX), BX
MOVW $0x0019, (DI)
MOVW BX, 2(DI)
ADDQ $0x04, DI
JMP repeat_end_emit_encodeBlockAsm
repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy:
LEAQ -4(BX), BX
MOVW $0x0015, (DI)
MOVB BL, 2(DI)
ADDQ $0x03, DI
JMP repeat_end_emit_encodeBlockAsm
repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy:
SHLL $0x02, BX
ORL $0x01, BX
MOVW BX, (DI)
ADDQ $0x02, DI
JMP repeat_end_emit_encodeBlockAsm
repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy:
XORQ R8, R8
LEAQ 1(R8)(BX*4), BX
MOVB SI, 1(DI)
SARL $0x08, SI
SHLL $0x05, SI
ORL SI, BX
MOVB BL, (DI)
ADDQ $0x02, DI
JMP repeat_end_emit_encodeBlockAsm
four_bytes_remain_repeat_as_copy_encodeBlockAsm:
TESTL BX, BX
JZ repeat_end_emit_encodeBlockAsm
MOVB $0x03, DL
LEAQ -4(DX)(BX*4), BX
MOVB BL, (DI)
MOVD SI, 1(DI)
ADDQ $0x05, DI
JMP repeat_end_emit_encodeBlockAsm
two_byte_offset_repeat_as_copy_encodeBlockAsm:
CMPL BX, $0x40
JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm
MOVB $0xee, (DI)
MOVW SI, 1(DI)
LEAQ -60(BX), BX
ADDQ $0x03, DI
emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short:
MOVQ BX, R8
LEAQ -4(BX), BX
CMPL R8, $0x08
JLE repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short
CMPL R8, $0x0c
JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short
CMPL SI, $0x00000800
JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short
cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short:
CMPL BX, $0x00000104
JLT repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short
CMPL BX, $0x00010100
JLT repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short
CMPL BX, $0x0100ffff
JLT repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short
LEAQ -16842747(BX), BX
MOVW $0x001d, (DI)
MOVW $0xfffb, 2(DI)
MOVB $0xff, 4(DI)
ADDQ $0x05, DI
JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short
repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short:
LEAQ -65536(BX), BX
MOVQ BX, SI
MOVW $0x001d, (DI)
MOVW BX, 2(DI)
SARQ $0x10, SI
MOVB SI, 4(DI)
ADDQ $0x05, DI
JMP repeat_end_emit_encodeBlockAsm
repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short:
LEAQ -256(BX), BX
MOVW $0x0019, (DI)
MOVW BX, 2(DI)
ADDQ $0x04, DI
JMP repeat_end_emit_encodeBlockAsm
repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short:
LEAQ -4(BX), BX
MOVW $0x0015, (DI)
MOVB BL, 2(DI)
ADDQ $0x03, DI
JMP repeat_end_emit_encodeBlockAsm
repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short:
SHLL $0x02, BX
ORL $0x01, BX
MOVW BX, (DI)
ADDQ $0x02, DI
JMP repeat_end_emit_encodeBlockAsm
repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short:
XORQ R8, R8
LEAQ 1(R8)(BX*4), BX
MOVB SI, 1(DI)
SARL $0x08, SI
SHLL $0x05, SI
ORL SI, BX
MOVB BL, (DI)
ADDQ $0x02, DI
JMP repeat_end_emit_encodeBlockAsm
two_byte_offset_short_repeat_as_copy_encodeBlockAsm:
CMPL BX, $0x0c
JGE emit_copy_three_repeat_as_copy_encodeBlockAsm
CMPL SI, $0x00000800
JGE emit_copy_three_repeat_as_copy_encodeBlockAsm
MOVB $0x01, DL
LEAQ -16(DX)(BX*4), BX
MOVB SI, 1(DI)
SHRL $0x08, SI
SHLL $0x05, SI
ORL SI, BX
MOVB BL, (DI)
ADDQ $0x02, DI
JMP repeat_end_emit_encodeBlockAsm
emit_copy_three_repeat_as_copy_encodeBlockAsm:
MOVB $0x02, DL
LEAQ -4(DX)(BX*4), BX
MOVB BL, (DI)
MOVW SI, 1(DI)
ADDQ $0x03, DI
repeat_end_emit_encodeBlockAsm:
MOVQ DI, dst_base+0(FP)
MOVL 16(SP), BX
CMPL AX, BX
JGT emit_remainder_encodeBlockAsm
JMP search_loop_encodeBlockAsm
no_repeat_found_encodeBlockAsm:
MOVQ $0x0000cf1bbcdcbf9b, R9
MOVQ SI, R8
SHRQ $0x10, R8
SHLQ $0x10, R8
IMULQ R9, R8
SHRQ $0x30, R8
CMPL (CX)(BX*1), SI
SHRQ $0x08, SI
JEQ candidate_match_encodeBlockAsm
MOVL 32(SP)(R8*1), BX
CMPL (CX)(DI*1), SI
JEQ candidate2_match_encodeBlockAsm
LEAQ 2(AX), DI
MOVL DI, 32(SP)(R8*1)
SHRQ $0x08, SI
CMPL (CX)(BX*1), SI
JEQ candidate3_match_encodeBlockAsm
MOVL 28(SP), AX
JMP search_loop_encodeBlockAsm
candidate3_match_encodeBlockAsm:
ADDL $0x02, AX
JMP candidate_match_encodeBlockAsm
candidate2_match_encodeBlockAsm:
LEAQ -2(AX), BX
MOVL BX, 32(SP)(R8*1)
INCL AX
MOVL DI, BX
candidate_match_encodeBlockAsm:
MOVL 20(SP), SI
TESTL BX, BX
JZ match_extend_back_end_encodeBlockAsm
match_extend_back_loop_encodeBlockAsm:
CMPL AX, SI
JG match_extend_back_end_encodeBlockAsm
MOVB -1(CX)(BX*1), DL
MOVB -1(CX)(AX*1), DI
CMPB DL, DI
JNE match_extend_back_end_encodeBlockAsm
LEAL -1(AX), AX
DECL BX
JZ match_extend_back_end_encodeBlockAsm
JMP match_extend_back_loop_encodeBlockAsm
match_extend_back_end_encodeBlockAsm:
MOVL AX, SI
SUBL 20(SP), SI
LEAQ dst_base+0(FP)(SI*1), SI
CMPQ SI, (SP)
JL match_dst_size_check_encodeBlockAsm
MOVQ $0x00000000, ret+48(FP)
RET
match_dst_size_check_encodeBlockAsm:
MOVL BX, SI
MOVL 20(SP), DI
CMPL DI, SI
JEQ emit_literal_skip_match_emit_encodeBlockAsm
MOVL SI, R8
MOVL SI, 20(SP)
LEAQ (CX)(DI*1), SI
SUBL DI, R8
MOVQ dst_base+0(FP), DI
MOVQ R8, R9
SUBL $0x01, R9
JC emit_literal_done_match_emit_encodeBlockAsm
CMPL R9, $0x3c
JLT one_byte_match_emit_encodeBlockAsm
CMPL R9, $0x00000100
JLT two_bytes_match_emit_encodeBlockAsm
CMPL R9, $0x00010000
JLT three_bytes_match_emit_encodeBlockAsm
CMPL R9, $0x01000000
JLT four_bytes_match_emit_encodeBlockAsm
MOVB $0xfc, (DI)
MOVL R9, 1(DI)
ADDQ $0x05, DI
JMP memmove_match_emit_encodeBlockAsm
four_bytes_match_emit_encodeBlockAsm:
MOVQ R9, R10
SHRL $0x10, R10
MOVB $0xf8, (DI)
MOVW R9, 1(DI)
MOVB R10, 3(DI)
ADDQ $0x04, DI
JMP memmove_match_emit_encodeBlockAsm
three_bytes_match_emit_encodeBlockAsm:
MOVB $0xf4, (DI)
MOVW R9, 1(DI)
ADDQ $0x03, DI
JMP memmove_match_emit_encodeBlockAsm
two_bytes_match_emit_encodeBlockAsm:
MOVB $0xf0, (DI)
MOVB R9, 1(DI)
ADDQ $0x02, DI
JMP memmove_match_emit_encodeBlockAsm
one_byte_match_emit_encodeBlockAsm:
SHLB $0x02, R9
MOVB R9, (DI)
ADDQ $0x01, DI
memmove_match_emit_encodeBlockAsm:
LEAQ (DI)(R8*1), R9
NOP
emit_lit_memmove_match_emit_encodeBlockAsm_memmove_tail:
TESTQ R8, R8
JEQ emit_literal_done_match_emit_encodeBlockAsm
CMPQ R8, $0x02
JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_1or2
CMPQ R8, $0x04
JB emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_3
JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_4
CMPQ R8, $0x08
JB emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_5through7
JE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8
CMPQ R8, $0x10
JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_9through16
CMPQ R8, $0x20
JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_17through32
CMPQ R8, $0x40
JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_33through64
CMPQ R8, $0x80
JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_65through128
CMPQ R8, $0x00000100
JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_129through256
JMP emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_256through2048
emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_1or2:
MOVB (SI), R9
MOVB -1(SI)(R8*1), SI
MOVB R9, (DI)
MOVB SI, -1(DI)(R8*1)
JMP emit_literal_done_match_emit_encodeBlockAsm
emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_4:
MOVL (SI), R9
MOVL R9, (DI)
JMP emit_literal_done_match_emit_encodeBlockAsm
emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_3:
MOVW (SI), R9
MOVB 2(SI), SI
MOVW R9, (DI)
MOVB SI, 2(DI)
JMP emit_literal_done_match_emit_encodeBlockAsm
emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_5through7:
MOVL (SI), R9
MOVL -4(SI)(R8*1), SI
MOVL R9, (DI)
MOVL SI, -4(DI)(R8*1)
JMP emit_literal_done_match_emit_encodeBlockAsm
emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8:
MOVQ (SI), R9
MOVQ R9, (DI)
JMP emit_literal_done_match_emit_encodeBlockAsm
emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_9through16:
MOVQ (SI), R9
MOVQ -8(SI)(R8*1), SI
MOVQ R9, (DI)
MOVQ SI, -8(DI)(R8*1)
JMP emit_literal_done_match_emit_encodeBlockAsm
emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_17through32:
MOVOU (SI), X0
MOVOU -16(SI)(R8*1), X1
MOVOU X0, (DI)
MOVOU X1, -16(DI)(R8*1)
JMP emit_literal_done_match_emit_encodeBlockAsm
emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_33through64:
MOVOU (SI), X0
MOVOU 16(SI), X1
MOVOU -32(SI)(R8*1), X2
MOVOU -16(SI)(R8*1), X3
MOVOU X0, (DI)
MOVOU X1, 16(DI)
MOVOU X2, -32(DI)(R8*1)
MOVOU X3, -16(DI)(R8*1)
JMP emit_literal_done_match_emit_encodeBlockAsm
emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_65through128:
MOVOU (SI), X0
MOVOU 16(SI), X1
MOVOU 32(SI), X2
MOVOU 48(SI), X3
MOVOU -64(SI)(R8*1), X12
MOVOU -48(SI)(R8*1), X13
MOVOU -32(SI)(R8*1), X14
MOVOU -16(SI)(R8*1), X15
MOVOU X0, (DI)
MOVOU X1, 16(DI)
MOVOU X2, 32(DI)
MOVOU X3, 48(DI)
MOVOU X12, -64(DI)(R8*1)
MOVOU X13, -48(DI)(R8*1)
MOVOU X14, -32(DI)(R8*1)
MOVOU X15, -16(DI)(R8*1)
JMP emit_literal_done_match_emit_encodeBlockAsm
emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_129through256:
MOVOU (SI), X0
MOVOU 16(SI), X1
MOVOU 32(SI), X2
MOVOU 48(SI), X3
MOVOU 64(SI), X4
MOVOU 80(SI), X5
MOVOU 96(SI), X6
MOVOU 112(SI), X7
MOVOU -128(SI)(R8*1), X8
MOVOU -112(SI)(R8*1), X9
MOVOU -96(SI)(R8*1), X10
MOVOU -80(SI)(R8*1), X11
MOVOU -64(SI)(R8*1), X12
MOVOU -48(SI)(R8*1), X13
MOVOU -32(SI)(R8*1), X14
MOVOU -16(SI)(R8*1), X15
MOVOU X0, (DI)
MOVOU X1, 16(DI)
MOVOU X2, 32(DI)
MOVOU X3, 48(DI)
MOVOU X4, 64(DI)
MOVOU X5, 80(DI)
MOVOU X6, 96(DI)
MOVOU X7, 112(DI)
MOVOU X8, -128(DI)(R8*1)
MOVOU X9, -112(DI)(R8*1)
MOVOU X10, -96(DI)(R8*1)
MOVOU X11, -80(DI)(R8*1)
MOVOU X12, -64(DI)(R8*1)
MOVOU X13, -48(DI)(R8*1)
MOVOU X14, -32(DI)(R8*1)
MOVOU X15, -16(DI)(R8*1)
JMP emit_literal_done_match_emit_encodeBlockAsm
emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_256through2048:
LEAQ -256(R8), R8
MOVOU (SI), X0
MOVOU 16(SI), X1
MOVOU 32(SI), X2
MOVOU 48(SI), X3
MOVOU 64(SI), X4
MOVOU 80(SI), X5
MOVOU 96(SI), X6
MOVOU 112(SI), X7
MOVOU 128(SI), X8
MOVOU 144(SI), X9
MOVOU 160(SI), X10
MOVOU 176(SI), X11
MOVOU 192(SI), X12
MOVOU 208(SI), X13
MOVOU 224(SI), X14
MOVOU 240(SI), X15
MOVOU X0, (DI)
MOVOU X1, 16(DI)
MOVOU X2, 32(DI)
MOVOU X3, 48(DI)
MOVOU X4, 64(DI)
MOVOU X5, 80(DI)
MOVOU X6, 96(DI)
MOVOU X7, 112(DI)
MOVOU X8, 128(DI)
MOVOU X9, 144(DI)
MOVOU X10, 160(DI)
MOVOU X11, 176(DI)
MOVOU X12, 192(DI)
MOVOU X13, 208(DI)
MOVOU X14, 224(DI)
MOVOU X15, 240(DI)
CMPQ R8, $0x00000100
LEAQ 256(SI), SI
LEAQ 256(DI), DI
JGE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_256through2048
JMP emit_lit_memmove_match_emit_encodeBlockAsm_memmove_tail
MOVQ R9, DI
emit_literal_done_match_emit_encodeBlockAsm:
MOVQ DI, dst_base+0(FP)
emit_literal_skip_match_emit_encodeBlockAsm:
NOP
match_nolit_loop_encodeBlockAsm:
MOVL AX, SI
MOVL AX, SI
SUBL BX, SI
MOVL SI, 24(SP)
ADDL $0x04, AX
ADDL $0x04, BX
MOVL 16(SP), SI
SUBL AX, SI
XORQ R8, R8
CMPQ SI, $0x08
JL matchlen_single_match_nolit_encodeBlockAsm
matchlen_loopback_match_nolit_encodeBlockAsm:
MOVQ (CX)(R8*1), DI
XORQ (CX)(R8*1), DI
TESTQ DI, DI
JZ matchlen_loop_match_nolit_encodeBlockAsm
BSFQ DI, DI
SARQ $0x03, DI
LEAQ (R8)(DI*1), R8
JMP match_nolit_end_encodeBlockAsm
matchlen_loop_match_nolit_encodeBlockAsm:
LEAQ -8(SI), SI
LEAQ 8(R8), R8
CMPQ SI, $0x08
JGE matchlen_loopback_match_nolit_encodeBlockAsm
matchlen_single_match_nolit_encodeBlockAsm:
TESTQ SI, SI
JZ match_nolit_end_encodeBlockAsm
matchlen_single_loopback_match_nolit_encodeBlockAsm:
MOVB (CX)(R8*1), DI
CMPB (CX)(R8*1), DI
JNE match_nolit_end_encodeBlockAsm
LEAQ 1(R8), R8
DECQ SI
JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm
match_nolit_end_encodeBlockAsm:
MOVL 24(SP), SI
ADDQ $0x04, R8
MOVQ dst_base+0(FP), DI
ADDL R8, AX
CMPL SI, $0x00010000
JL two_byte_offset_match_nolit_encodeBlockAsm
CMPL R8, $0x40
JLE four_bytes_remain_match_nolit_encodeBlockAsm
MOVB $0xff, (DI)
MOVD SI, 1(DI)
LEAQ -64(R8), R8
ADDQ $0x05, DI
CMPL R8, $0x04
JL four_bytes_remain_match_nolit_encodeBlockAsm
emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy:
MOVQ R8, R9
LEAQ -4(R8), R8
CMPL R9, $0x08
JLE repeat_two_match_nolit_encodeBlockAsm_emit_copy
CMPL R9, $0x0c
JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy
CMPL SI, $0x00000800
JLT repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy
cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy:
CMPL R8, $0x00000104
JLT repeat_three_match_nolit_encodeBlockAsm_emit_copy
CMPL R8, $0x00010100
JLT repeat_four_match_nolit_encodeBlockAsm_emit_copy
CMPL R8, $0x0100ffff
JLT repeat_five_match_nolit_encodeBlockAsm_emit_copy
LEAQ -16842747(R8), R8
MOVW $0x001d, (DI)
MOVW $0xfffb, 2(DI)
MOVB $0xff, 4(DI)
ADDQ $0x05, DI
JMP emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy
repeat_five_match_nolit_encodeBlockAsm_emit_copy:
LEAQ -65536(R8), R8
MOVQ R8, SI
MOVW $0x001d, (DI)
MOVW R8, 2(DI)
SARQ $0x10, SI
MOVB SI, 4(DI)
ADDQ $0x05, DI
JMP match_nolit_emitcopy_end_encodeBlockAsm
repeat_four_match_nolit_encodeBlockAsm_emit_copy:
LEAQ -256(R8), R8
MOVW $0x0019, (DI)
MOVW R8, 2(DI)
ADDQ $0x04, DI
JMP match_nolit_emitcopy_end_encodeBlockAsm
repeat_three_match_nolit_encodeBlockAsm_emit_copy:
LEAQ -4(R8), R8
MOVW $0x0015, (DI)
MOVB R8, 2(DI)
ADDQ $0x03, DI
JMP match_nolit_emitcopy_end_encodeBlockAsm
repeat_two_match_nolit_encodeBlockAsm_emit_copy:
SHLL $0x02, R8
ORL $0x01, R8
MOVW R8, (DI)
ADDQ $0x02, DI
JMP match_nolit_emitcopy_end_encodeBlockAsm
repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy:
XORQ R9, R9
LEAQ 1(R9)(R8*4), R8
MOVB SI, 1(DI)
SARL $0x08, SI
SHLL $0x05, SI
ORL SI, R8
MOVB R8, (DI)
ADDQ $0x02, DI
JMP match_nolit_emitcopy_end_encodeBlockAsm
four_bytes_remain_match_nolit_encodeBlockAsm:
TESTL R8, R8
JZ match_nolit_emitcopy_end_encodeBlockAsm
MOVB $0x03, DL
LEAQ -4(DX)(R8*4), R8
MOVB R8, (DI)
MOVD SI, 1(DI)
ADDQ $0x05, DI
JMP match_nolit_emitcopy_end_encodeBlockAsm
two_byte_offset_match_nolit_encodeBlockAsm:
CMPL R8, $0x40
JLE two_byte_offset_short_match_nolit_encodeBlockAsm
MOVB $0xee, (DI)
MOVW SI, 1(DI)
LEAQ -60(R8), R8
ADDQ $0x03, DI
emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short:
MOVQ R8, R9
LEAQ -4(R8), R8
CMPL R9, $0x08
JLE repeat_two_match_nolit_encodeBlockAsm_emit_copy_short
CMPL R9, $0x0c
JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short
CMPL SI, $0x00000800
JLT repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short
cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short:
CMPL R8, $0x00000104
JLT repeat_three_match_nolit_encodeBlockAsm_emit_copy_short
CMPL R8, $0x00010100
JLT repeat_four_match_nolit_encodeBlockAsm_emit_copy_short
CMPL R8, $0x0100ffff
JLT repeat_five_match_nolit_encodeBlockAsm_emit_copy_short
LEAQ -16842747(R8), R8
MOVW $0x001d, (DI)
MOVW $0xfffb, 2(DI)
MOVB $0xff, 4(DI)
ADDQ $0x05, DI
JMP emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short
repeat_five_match_nolit_encodeBlockAsm_emit_copy_short:
LEAQ -65536(R8), R8
MOVQ R8, SI
MOVW $0x001d, (DI)
MOVW R8, 2(DI)
SARQ $0x10, SI
MOVB SI, 4(DI)
ADDQ $0x05, DI
JMP match_nolit_emitcopy_end_encodeBlockAsm
repeat_four_match_nolit_encodeBlockAsm_emit_copy_short:
LEAQ -256(R8), R8
MOVW $0x0019, (DI)
MOVW R8, 2(DI)
ADDQ $0x04, DI
JMP match_nolit_emitcopy_end_encodeBlockAsm
repeat_three_match_nolit_encodeBlockAsm_emit_copy_short:
LEAQ -4(R8), R8
MOVW $0x0015, (DI)
MOVB R8, 2(DI)
ADDQ $0x03, DI
JMP match_nolit_emitcopy_end_encodeBlockAsm
repeat_two_match_nolit_encodeBlockAsm_emit_copy_short:
SHLL $0x02, R8
ORL $0x01, R8
MOVW R8, (DI)
ADDQ $0x02, DI
JMP match_nolit_emitcopy_end_encodeBlockAsm
repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short:
XORQ R9, R9
LEAQ 1(R9)(R8*4), R8
MOVB SI, 1(DI)
SARL $0x08, SI
SHLL $0x05, SI
ORL SI, R8
MOVB R8, (DI)
ADDQ $0x02, DI
JMP match_nolit_emitcopy_end_encodeBlockAsm
two_byte_offset_short_match_nolit_encodeBlockAsm:
CMPL R8, $0x0c
JGE emit_copy_three_match_nolit_encodeBlockAsm
CMPL SI, $0x00000800
JGE emit_copy_three_match_nolit_encodeBlockAsm
MOVB $0x01, DL
LEAQ -16(DX)(R8*4), R8
MOVB SI, 1(DI)
SHRL $0x08, SI
SHLL $0x05, SI
ORL SI, R8
MOVB R8, (DI)
ADDQ $0x02, DI
JMP match_nolit_emitcopy_end_encodeBlockAsm
emit_copy_three_match_nolit_encodeBlockAsm:
MOVB $0x02, DL
LEAQ -4(DX)(R8*4), R8
MOVB R8, (DI)
MOVW SI, 1(DI)
ADDQ $0x03, DI
match_nolit_emitcopy_end_encodeBlockAsm:
MOVQ DI, dst_base+0(FP)
MOVL AX, 20(SP)
CMPL AX, 16(SP)
JGE emit_remainder_encodeBlockAsm
CMPQ DI, (SP)
JL match_nolit_dst_ok_encodeBlockAsm
MOVQ $0x00000000, ret+48(FP)
RET
match_nolit_dst_ok_encodeBlockAsm:
MOVQ -2(CX)(AX*1), SI
MOVQ $0x0000cf1bbcdcbf9b, DI
MOVQ SI, R8
SHRQ $0x10, SI
MOVQ SI, R9
SHLQ $0x10, R8
IMULQ DI, R8
SHRQ $0x30, R8
SHLQ $0x10, R9
IMULQ DI, R9
SHRQ $0x30, R9
MOVL 32(SP)(R8*1), DI
MOVL 32(SP)(R9*1), DI
LEAQ -2(AX), DI
MOVL DI, 32(SP)(R8*1)
MOVL AX, 32(SP)(R9*1)
CMPL (CX)(R9*1), SI
JEQ match_nolit_loop_encodeBlockAsm
INCL AX
JMP search_loop_encodeBlockAsm
emit_remainder_encodeBlockAsm:
MOVQ src_len+32(FP), AX
SUBL 20(SP), AX
MOVQ dst_base+0(FP), DX
LEAQ (DX)(AX*1), DX
CMPQ DX, (SP)
JL emit_remainder_ok_encodeBlockAsm
MOVQ $0x00000000, ret+48(FP)
RET
emit_remainder_ok_encodeBlockAsm:
MOVQ src_len+32(FP), AX
MOVL 20(SP), DX
CMPL DX, AX
JEQ emit_literal_skip_emit_remainder_encodeBlockAsm
MOVL AX, BX
MOVL AX, 20(SP)
LEAQ (CX)(DX*1), AX
SUBL DX, BX
MOVQ dst_base+0(FP), CX
MOVQ BX, DX
SUBL $0x01, DX
JC emit_literal_done_emit_remainder_encodeBlockAsm
CMPL DX, $0x3c
JLT one_byte_emit_remainder_encodeBlockAsm
CMPL DX, $0x00000100
JLT two_bytes_emit_remainder_encodeBlockAsm
CMPL DX, $0x00010000
JLT three_bytes_emit_remainder_encodeBlockAsm
CMPL DX, $0x01000000
JLT four_bytes_emit_remainder_encodeBlockAsm
MOVB $0xfc, (CX)
MOVL DX, 1(CX)
ADDQ $0x05, CX
JMP memmove_emit_remainder_encodeBlockAsm
four_bytes_emit_remainder_encodeBlockAsm:
MOVQ DX, SI
SHRL $0x10, SI
MOVB $0xf8, (CX)
MOVW DX, 1(CX)
MOVB SI, 3(CX)
ADDQ $0x04, CX
JMP memmove_emit_remainder_encodeBlockAsm
three_bytes_emit_remainder_encodeBlockAsm:
MOVB $0xf4, (CX)
MOVW DX, 1(CX)
ADDQ $0x03, CX
JMP memmove_emit_remainder_encodeBlockAsm
two_bytes_emit_remainder_encodeBlockAsm:
MOVB $0xf0, (CX)
MOVB DL, 1(CX)
ADDQ $0x02, CX
JMP memmove_emit_remainder_encodeBlockAsm
one_byte_emit_remainder_encodeBlockAsm:
SHLB $0x02, DL
MOVB DL, (CX)
ADDQ $0x01, CX
memmove_emit_remainder_encodeBlockAsm:
LEAQ (CX)(BX*1), DX
NOP
emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_tail:
TESTQ BX, BX
JEQ emit_literal_done_emit_remainder_encodeBlockAsm
CMPQ BX, $0x02
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_1or2
CMPQ BX, $0x04
JB emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_3
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_4
CMPQ BX, $0x08
JB emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_5through7
JE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_8
CMPQ BX, $0x10
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_9through16
CMPQ BX, $0x20
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_17through32
CMPQ BX, $0x40
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_33through64
CMPQ BX, $0x80
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_65through128
CMPQ BX, $0x00000100
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_129through256
JMP emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_256through2048
emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_1or2:
MOVB (AX), DL
MOVB -1(AX)(BX*1), AL
MOVB DL, (CX)
MOVB AL, -1(CX)(BX*1)
JMP emit_literal_done_emit_remainder_encodeBlockAsm
emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_4:
MOVL (AX), DX
MOVL DX, (CX)
JMP emit_literal_done_emit_remainder_encodeBlockAsm
emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_3:
MOVW (AX), DX
MOVB 2(AX), AL
MOVW DX, (CX)
MOVB AL, 2(CX)
JMP emit_literal_done_emit_remainder_encodeBlockAsm
emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_5through7:
MOVL (AX), DX
MOVL -4(AX)(BX*1), AX
MOVL DX, (CX)
MOVL AX, -4(CX)(BX*1)
JMP emit_literal_done_emit_remainder_encodeBlockAsm
emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_8:
MOVQ (AX), DX
MOVQ DX, (CX)
JMP emit_literal_done_emit_remainder_encodeBlockAsm
emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_9through16:
MOVQ (AX), DX
MOVQ -8(AX)(BX*1), AX
MOVQ DX, (CX)
MOVQ AX, -8(CX)(BX*1)
JMP emit_literal_done_emit_remainder_encodeBlockAsm
emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_17through32:
MOVOU (AX), X0
MOVOU -16(AX)(BX*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(BX*1)
JMP emit_literal_done_emit_remainder_encodeBlockAsm
emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_33through64:
MOVOU (AX), X0
MOVOU 16(AX), X1
MOVOU -32(AX)(BX*1), X2
MOVOU -16(AX)(BX*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(BX*1)
MOVOU X3, -16(CX)(BX*1)
JMP emit_literal_done_emit_remainder_encodeBlockAsm
emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_65through128:
MOVOU (AX), X0
MOVOU 16(AX), X1
MOVOU 32(AX), X2
MOVOU 48(AX), X3
MOVOU -64(AX)(BX*1), X12
MOVOU -48(AX)(BX*1), X13
MOVOU -32(AX)(BX*1), X14
MOVOU -16(AX)(BX*1), X15
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, 32(CX)
MOVOU X3, 48(CX)
MOVOU X12, -64(CX)(BX*1)
MOVOU X13, -48(CX)(BX*1)
MOVOU X14, -32(CX)(BX*1)
MOVOU X15, -16(CX)(BX*1)
JMP emit_literal_done_emit_remainder_encodeBlockAsm
emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_129through256:
MOVOU (AX), X0
MOVOU 16(AX), X1
MOVOU 32(AX), X2
MOVOU 48(AX), X3
MOVOU 64(AX), X4
MOVOU 80(AX), X5
MOVOU 96(AX), X6
MOVOU 112(AX), X7
MOVOU -128(AX)(BX*1), X8
MOVOU -112(AX)(BX*1), X9
MOVOU -96(AX)(BX*1), X10
MOVOU -80(AX)(BX*1), X11
MOVOU -64(AX)(BX*1), X12
MOVOU -48(AX)(BX*1), X13
MOVOU -32(AX)(BX*1), X14
MOVOU -16(AX)(BX*1), X15
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, 32(CX)
MOVOU X3, 48(CX)
MOVOU X4, 64(CX)
MOVOU X5, 80(CX)
MOVOU X6, 96(CX)
MOVOU X7, 112(CX)
MOVOU X8, -128(CX)(BX*1)
MOVOU X9, -112(CX)(BX*1)
MOVOU X10, -96(CX)(BX*1)
MOVOU X11, -80(CX)(BX*1)
MOVOU X12, -64(CX)(BX*1)
MOVOU X13, -48(CX)(BX*1)
MOVOU X14, -32(CX)(BX*1)
MOVOU X15, -16(CX)(BX*1)
JMP emit_literal_done_emit_remainder_encodeBlockAsm
emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_256through2048:
LEAQ -256(BX), BX
MOVOU (AX), X0
MOVOU 16(AX), X1
MOVOU 32(AX), X2
MOVOU 48(AX), X3
MOVOU 64(AX), X4
MOVOU 80(AX), X5
MOVOU 96(AX), X6
MOVOU 112(AX), X7
MOVOU 128(AX), X8
MOVOU 144(AX), X9
MOVOU 160(AX), X10
MOVOU 176(AX), X11
MOVOU 192(AX), X12
MOVOU 208(AX), X13
MOVOU 224(AX), X14
MOVOU 240(AX), X15
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, 32(CX)
MOVOU X3, 48(CX)
MOVOU X4, 64(CX)
MOVOU X5, 80(CX)
MOVOU X6, 96(CX)
MOVOU X7, 112(CX)
MOVOU X8, 128(CX)
MOVOU X9, 144(CX)
MOVOU X10, 160(CX)
MOVOU X11, 176(CX)
MOVOU X12, 192(CX)
MOVOU X13, 208(CX)
MOVOU X14, 224(CX)
MOVOU X15, 240(CX)
CMPQ BX, $0x00000100
LEAQ 256(AX), AX
LEAQ 256(CX), CX
JGE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_256through2048
JMP emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_tail
MOVQ DX, CX
emit_literal_done_emit_remainder_encodeBlockAsm:
MOVQ CX, dst_base+0(FP)
emit_literal_skip_emit_remainder_encodeBlockAsm:
MOVQ 8(SP), AX
SUBQ dst_base+0(FP), AX
MOVQ AX, ret+48(FP)
RET
// func encodeBlockAsm14B(dst []byte, src []byte) int
// Requires: SSE2
TEXT ·encodeBlockAsm14B(SB), $16416-56
MOVQ $0x00000080, AX
LEAQ 32(SP), CX
PXOR X0, X0
zero_loop_encodeBlockAsm14B:
MOVOU X0, (CX)
MOVOU X0, 16(CX)
MOVOU X0, 32(CX)
MOVOU X0, 48(CX)
MOVOU X0, 64(CX)
MOVOU X0, 80(CX)
MOVOU X0, 96(CX)
MOVOU X0, 112(CX)
ADDQ $0x80, CX
DECQ AX
JNZ zero_loop_encodeBlockAsm14B
MOVL AX, 20(SP)
MOVQ src_len+32(FP), AX
LEAQ -5(AX), CX
LEAQ -8(AX), BX
SHRQ $0x05, AX
SUBL AX, CX
MOVL BX, 16(SP)
MOVQ dst_base+0(FP), AX
MOVQ AX, 8(SP)
LEAQ (AX)(CX*1), CX
MOVQ CX, (SP)
MOVL $0x00000001, AX
MOVL AX, 24(SP)
MOVQ src_base+24(FP), CX
search_loop_encodeBlockAsm14B:
MOVQ (CX)(AX*1), SI
MOVL AX, BX
SUBL 20(SP), BX
SHRL $0x05, BX
LEAQ 4(AX)(BX*1), BX
MOVL 16(SP), DI
CMPL BX, DI
JGT emit_remainder_encodeBlockAsm14B
MOVL BX, 28(SP)
MOVQ $0x0000cf1bbcdcbf9b, BX
MOVQ SI, R8
MOVQ SI, R9
SHRQ $0x08, R9
SHLQ $0x10, R8
IMULQ BX, R8
SHRQ $0x32, R8
SHLQ $0x10, R9
IMULQ BX, R9
SHRQ $0x32, R9
MOVL 32(SP)(R8*1), BX
MOVL 32(SP)(R9*1), DI
MOVL AX, 32(SP)(R8*1)
LEAL 1(AX), R8
MOVL R8, 32(SP)(R9*1)
MOVL AX, R8
SUBL 24(SP), R8
MOVL 1(CX)(R8*1), R10
MOVQ SI, R9
SHLQ $0x08, R9
CMPL R9, R10
JNE no_repeat_found_encodeBlockAsm14B
LEAQ 1(AX), SI
MOVL 20(SP), BX
TESTL R8, R8
JZ repeat_extend_back_end_encodeBlockAsm14B
repeat_extend_back_loop_encodeBlockAsm14B:
CMPL SI, BX
JG repeat_extend_back_end_encodeBlockAsm14B
MOVB -1(CX)(R8*1), DL
MOVB -1(CX)(SI*1), DI
CMPB DL, DI
JNE repeat_extend_back_end_encodeBlockAsm14B
LEAQ -1(SI), SI
DECL R8
JZ repeat_extend_back_end_encodeBlockAsm14B
JMP repeat_extend_back_loop_encodeBlockAsm14B
repeat_extend_back_end_encodeBlockAsm14B:
MOVL 20(SP), BX
CMPL BX, SI
JEQ emit_literal_skip_repeat_emit_encodeBlockAsm14B
MOVL SI, DI
MOVL SI, 20(SP)
LEAQ (CX)(BX*1), R8
SUBL BX, DI
MOVQ dst_base+0(FP), BX
MOVQ DI, R9
SUBL $0x01, R9
JC emit_literal_done_repeat_emit_encodeBlockAsm14B
CMPL R9, $0x3c
JLT one_byte_repeat_emit_encodeBlockAsm14B
CMPL R9, $0x00000100
JLT two_bytes_repeat_emit_encodeBlockAsm14B
CMPL R9, $0x00010000
JLT three_bytes_repeat_emit_encodeBlockAsm14B
CMPL R9, $0x01000000
JLT four_bytes_repeat_emit_encodeBlockAsm14B
MOVB $0xfc, (BX)
MOVL R9, 1(BX)
ADDQ $0x05, BX
JMP memmove_repeat_emit_encodeBlockAsm14B
four_bytes_repeat_emit_encodeBlockAsm14B:
MOVQ R9, R10
SHRL $0x10, R10
MOVB $0xf8, (BX)
MOVW R9, 1(BX)
MOVB R10, 3(BX)
ADDQ $0x04, BX
JMP memmove_repeat_emit_encodeBlockAsm14B
three_bytes_repeat_emit_encodeBlockAsm14B:
MOVB $0xf4, (BX)
MOVW R9, 1(BX)
ADDQ $0x03, BX
JMP memmove_repeat_emit_encodeBlockAsm14B
two_bytes_repeat_emit_encodeBlockAsm14B:
MOVB $0xf0, (BX)
MOVB R9, 1(BX)
ADDQ $0x02, BX
JMP memmove_repeat_emit_encodeBlockAsm14B
one_byte_repeat_emit_encodeBlockAsm14B:
SHLB $0x02, R9
MOVB R9, (BX)
ADDQ $0x01, BX
memmove_repeat_emit_encodeBlockAsm14B:
LEAQ (BX)(DI*1), R9
NOP
emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_tail:
TESTQ DI, DI
JEQ emit_literal_done_repeat_emit_encodeBlockAsm14B
CMPQ DI, $0x02
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_1or2
CMPQ DI, $0x04
JB emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_3
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_4
CMPQ DI, $0x08
JB emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_5through7
JE emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_8
CMPQ DI, $0x10
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_9through16
CMPQ DI, $0x20
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_17through32
CMPQ DI, $0x40
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_33through64
CMPQ DI, $0x80
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_65through128
CMPQ DI, $0x00000100
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_129through256
JMP emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_256through2048
emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_1or2:
MOVB (R8), R9
MOVB -1(R8)(DI*1), R8
MOVB R9, (BX)
MOVB R8, -1(BX)(DI*1)
JMP emit_literal_done_repeat_emit_encodeBlockAsm14B
emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_4:
MOVL (R8), R9
MOVL R9, (BX)
JMP emit_literal_done_repeat_emit_encodeBlockAsm14B
emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_3:
MOVW (R8), R9
MOVB 2(R8), R8
MOVW R9, (BX)
MOVB R8, 2(BX)
JMP emit_literal_done_repeat_emit_encodeBlockAsm14B
emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_5through7:
MOVL (R8), R9
MOVL -4(R8)(DI*1), R8
MOVL R9, (BX)
MOVL R8, -4(BX)(DI*1)
JMP emit_literal_done_repeat_emit_encodeBlockAsm14B
emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_8:
MOVQ (R8), R9
MOVQ R9, (BX)
JMP emit_literal_done_repeat_emit_encodeBlockAsm14B
emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_9through16:
MOVQ (R8), R9
MOVQ -8(R8)(DI*1), R8
MOVQ R9, (BX)
MOVQ R8, -8(BX)(DI*1)
JMP emit_literal_done_repeat_emit_encodeBlockAsm14B
emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_17through32:
MOVOU (R8), X0
MOVOU -16(R8)(DI*1), X1
MOVOU X0, (BX)
MOVOU X1, -16(BX)(DI*1)
JMP emit_literal_done_repeat_emit_encodeBlockAsm14B
emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_33through64:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(DI*1), X2
MOVOU -16(R8)(DI*1), X3
MOVOU X0, (BX)
MOVOU X1, 16(BX)
MOVOU X2, -32(BX)(DI*1)
MOVOU X3, -16(BX)(DI*1)
JMP emit_literal_done_repeat_emit_encodeBlockAsm14B
emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_65through128:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU 32(R8), X2
MOVOU 48(R8), X3
MOVOU -64(R8)(DI*1), X12
MOVOU -48(R8)(DI*1), X13
MOVOU -32(R8)(DI*1), X14
MOVOU -16(R8)(DI*1), X15
MOVOU X0, (BX)
MOVOU X1, 16(BX)
MOVOU X2, 32(BX)
MOVOU X3, 48(BX)
MOVOU X12, -64(BX)(DI*1)
MOVOU X13, -48(BX)(DI*1)
MOVOU X14, -32(BX)(DI*1)
MOVOU X15, -16(BX)(DI*1)
JMP emit_literal_done_repeat_emit_encodeBlockAsm14B
emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_129through256:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU 32(R8), X2
MOVOU 48(R8), X3
MOVOU 64(R8), X4
MOVOU 80(R8), X5
MOVOU 96(R8), X6
MOVOU 112(R8), X7
MOVOU -128(R8)(DI*1), X8
MOVOU -112(R8)(DI*1), X9
MOVOU -96(R8)(DI*1), X10
MOVOU -80(R8)(DI*1), X11
MOVOU -64(R8)(DI*1), X12
MOVOU -48(R8)(DI*1), X13
MOVOU -32(R8)(DI*1), X14
MOVOU -16(R8)(DI*1), X15
MOVOU X0, (BX)
MOVOU X1, 16(BX)
MOVOU X2, 32(BX)
MOVOU X3, 48(BX)
MOVOU X4, 64(BX)
MOVOU X5, 80(BX)
MOVOU X6, 96(BX)
MOVOU X7, 112(BX)
MOVOU X8, -128(BX)(DI*1)
MOVOU X9, -112(BX)(DI*1)
MOVOU X10, -96(BX)(DI*1)
MOVOU X11, -80(BX)(DI*1)
MOVOU X12, -64(BX)(DI*1)
MOVOU X13, -48(BX)(DI*1)
MOVOU X14, -32(BX)(DI*1)
MOVOU X15, -16(BX)(DI*1)
JMP emit_literal_done_repeat_emit_encodeBlockAsm14B
emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_256through2048:
LEAQ -256(DI), DI
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU 32(R8), X2
MOVOU 48(R8), X3
MOVOU 64(R8), X4
MOVOU 80(R8), X5
MOVOU 96(R8), X6
MOVOU 112(R8), X7
MOVOU 128(R8), X8
MOVOU 144(R8), X9
MOVOU 160(R8), X10
MOVOU 176(R8), X11
MOVOU 192(R8), X12
MOVOU 208(R8), X13
MOVOU 224(R8), X14
MOVOU 240(R8), X15
MOVOU X0, (BX)
MOVOU X1, 16(BX)
MOVOU X2, 32(BX)
MOVOU X3, 48(BX)
MOVOU X4, 64(BX)
MOVOU X5, 80(BX)
MOVOU X6, 96(BX)
MOVOU X7, 112(BX)
MOVOU X8, 128(BX)
MOVOU X9, 144(BX)
MOVOU X10, 160(BX)
MOVOU X11, 176(BX)
MOVOU X12, 192(BX)
MOVOU X13, 208(BX)
MOVOU X14, 224(BX)
MOVOU X15, 240(BX)
CMPQ DI, $0x00000100
LEAQ 256(R8), R8
LEAQ 256(BX), BX
JGE emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_256through2048
JMP emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_tail
MOVQ R9, BX
emit_literal_done_repeat_emit_encodeBlockAsm14B:
MOVQ BX, dst_base+0(FP)
emit_literal_skip_repeat_emit_encodeBlockAsm14B:
ADDL $0x05, AX
MOVL AX, BX
SUBL 24(SP), BX
MOVL 16(SP), BX
SUBL AX, BX
XORQ R8, R8
CMPQ BX, $0x08
JL matchlen_single_repeat_extend
matchlen_loopback_repeat_extend:
MOVQ (CX)(R8*1), DI
XORQ (CX)(R8*1), DI
TESTQ DI, DI
JZ matchlen_loop_repeat_extend
BSFQ DI, DI
SARQ $0x03, DI
LEAQ (R8)(DI*1), R8
JMP repeat_extend_forward_end_encodeBlockAsm14B
matchlen_loop_repeat_extend:
LEAQ -8(BX), BX
LEAQ 8(R8), R8
CMPQ BX, $0x08
JGE matchlen_loopback_repeat_extend
matchlen_single_repeat_extend:
TESTQ BX, BX
JZ repeat_extend_forward_end_encodeBlockAsm14B
matchlen_single_loopback_repeat_extend:
MOVB (CX)(R8*1), DI
CMPB (CX)(R8*1), DI
JNE repeat_extend_forward_end_encodeBlockAsm14B
LEAQ 1(R8), R8
DECQ BX
JNZ matchlen_single_loopback_repeat_extend
repeat_extend_forward_end_encodeBlockAsm14B:
ADDL R8, AX
MOVL AX, BX
SUBL SI, BX
MOVL 24(SP), SI
MOVQ dst_base+0(FP), DI
MOVL 20(SP), R8
TESTL R8, R8
JZ repeat_as_copy_encodeBlockAsm14B
emit_repeat_again_match_repeat_:
MOVQ BX, R8
LEAQ -4(BX), BX
CMPL R8, $0x08
JLE repeat_two_match_repeat_
CMPL R8, $0x0c
JGE cant_repeat_two_offset_match_repeat_
CMPL SI, $0x00000800
JLT repeat_two_offset_match_repeat_
cant_repeat_two_offset_match_repeat_:
CMPL BX, $0x00000104
JLT repeat_three_match_repeat_
CMPL BX, $0x00010100
JLT repeat_four_match_repeat_
CMPL BX, $0x0100ffff
JLT repeat_five_match_repeat_
LEAQ -16842747(BX), BX
MOVW $0x001d, (DI)
MOVW $0xfffb, 2(DI)
MOVB $0xff, 4(DI)
ADDQ $0x05, DI
JMP emit_repeat_again_match_repeat_
repeat_five_match_repeat_:
LEAQ -65536(BX), BX
MOVQ BX, SI
MOVW $0x001d, (DI)
MOVW BX, 2(DI)
SARQ $0x10, SI
MOVB SI, 4(DI)
ADDQ $0x05, DI
JMP repeat_end_emit_encodeBlockAsm14B
repeat_four_match_repeat_:
LEAQ -256(BX), BX
MOVW $0x0019, (DI)
MOVW BX, 2(DI)
ADDQ $0x04, DI
JMP repeat_end_emit_encodeBlockAsm14B
repeat_three_match_repeat_:
LEAQ -4(BX), BX
MOVW $0x0015, (DI)
MOVB BL, 2(DI)
ADDQ $0x03, DI
JMP repeat_end_emit_encodeBlockAsm14B
repeat_two_match_repeat_:
SHLL $0x02, BX
ORL $0x01, BX
MOVW BX, (DI)
ADDQ $0x02, DI
JMP repeat_end_emit_encodeBlockAsm14B
repeat_two_offset_match_repeat_:
XORQ R8, R8
LEAQ 1(R8)(BX*4), BX
MOVB SI, 1(DI)
SARL $0x08, SI
SHLL $0x05, SI
ORL SI, BX
MOVB BL, (DI)
ADDQ $0x02, DI
JMP repeat_end_emit_encodeBlockAsm14B
repeat_as_copy_encodeBlockAsm14B:
CMPL SI, $0x00010000
JL two_byte_offset_repeat_as_copy_encodeBlockAsm14B
CMPL BX, $0x40
JLE four_bytes_remain_repeat_as_copy_encodeBlockAsm14B
MOVB $0xff, (DI)
MOVD SI, 1(DI)
LEAQ -64(BX), BX
ADDQ $0x05, DI
CMPL BX, $0x04
JL four_bytes_remain_repeat_as_copy_encodeBlockAsm14B
emit_repeat_again_repeat_as_copy_encodeBlockAsm14B_emit_copy:
MOVQ BX, R8
LEAQ -4(BX), BX
CMPL R8, $0x08
JLE repeat_two_repeat_as_copy_encodeBlockAsm14B_emit_copy
CMPL R8, $0x0c
JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm14B_emit_copy
CMPL SI, $0x00000800
JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm14B_emit_copy
cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm14B_emit_copy:
CMPL BX, $0x00000104
JLT repeat_three_repeat_as_copy_encodeBlockAsm14B_emit_copy
CMPL BX, $0x00010100
JLT repeat_four_repeat_as_copy_encodeBlockAsm14B_emit_copy
CMPL BX, $0x0100ffff
JLT repeat_five_repeat_as_copy_encodeBlockAsm14B_emit_copy
LEAQ -16842747(BX), BX
MOVW $0x001d, (DI)
MOVW $0xfffb, 2(DI)
MOVB $0xff, 4(DI)
ADDQ $0x05, DI
JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm14B_emit_copy
repeat_five_repeat_as_copy_encodeBlockAsm14B_emit_copy:
LEAQ -65536(BX), BX
MOVQ BX, SI
MOVW $0x001d, (DI)
MOVW BX, 2(DI)
SARQ $0x10, SI
MOVB SI, 4(DI)
ADDQ $0x05, DI
JMP repeat_end_emit_encodeBlockAsm14B
repeat_four_repeat_as_copy_encodeBlockAsm14B_emit_copy:
LEAQ -256(BX), BX
MOVW $0x0019, (DI)
MOVW BX, 2(DI)
ADDQ $0x04, DI
JMP repeat_end_emit_encodeBlockAsm14B
repeat_three_repeat_as_copy_encodeBlockAsm14B_emit_copy:
LEAQ -4(BX), BX
MOVW $0x0015, (DI)
MOVB BL, 2(DI)
ADDQ $0x03, DI
JMP repeat_end_emit_encodeBlockAsm14B
repeat_two_repeat_as_copy_encodeBlockAsm14B_emit_copy:
SHLL $0x02, BX
ORL $0x01, BX
MOVW BX, (DI)
ADDQ $0x02, DI
JMP repeat_end_emit_encodeBlockAsm14B
repeat_two_offset_repeat_as_copy_encodeBlockAsm14B_emit_copy:
XORQ R8, R8
LEAQ 1(R8)(BX*4), BX
MOVB SI, 1(DI)
SARL $0x08, SI
SHLL $0x05, SI
ORL SI, BX
MOVB BL, (DI)
ADDQ $0x02, DI
JMP repeat_end_emit_encodeBlockAsm14B
four_bytes_remain_repeat_as_copy_encodeBlockAsm14B:
TESTL BX, BX
JZ repeat_end_emit_encodeBlockAsm14B
MOVB $0x03, DL
LEAQ -4(DX)(BX*4), BX
MOVB BL, (DI)
MOVD SI, 1(DI)
ADDQ $0x05, DI
JMP repeat_end_emit_encodeBlockAsm14B
two_byte_offset_repeat_as_copy_encodeBlockAsm14B:
CMPL BX, $0x40
JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm14B
MOVB $0xee, (DI)
MOVW SI, 1(DI)
LEAQ -60(BX), BX
ADDQ $0x03, DI
emit_repeat_again_repeat_as_copy_encodeBlockAsm14B_emit_copy_short:
MOVQ BX, R8
LEAQ -4(BX), BX
CMPL R8, $0x08
JLE repeat_two_repeat_as_copy_encodeBlockAsm14B_emit_copy_short
CMPL R8, $0x0c
JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm14B_emit_copy_short
CMPL SI, $0x00000800
JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm14B_emit_copy_short
cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm14B_emit_copy_short:
CMPL BX, $0x00000104
JLT repeat_three_repeat_as_copy_encodeBlockAsm14B_emit_copy_short
CMPL BX, $0x00010100
JLT repeat_four_repeat_as_copy_encodeBlockAsm14B_emit_copy_short
CMPL BX, $0x0100ffff
JLT repeat_five_repeat_as_copy_encodeBlockAsm14B_emit_copy_short
LEAQ -16842747(BX), BX
MOVW $0x001d, (DI)
MOVW $0xfffb, 2(DI)
MOVB $0xff, 4(DI)
ADDQ $0x05, DI
JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm14B_emit_copy_short
repeat_five_repeat_as_copy_encodeBlockAsm14B_emit_copy_short:
LEAQ -65536(BX), BX
MOVQ BX, SI
MOVW $0x001d, (DI)
MOVW BX, 2(DI)
SARQ $0x10, SI
MOVB SI, 4(DI)
ADDQ $0x05, DI
JMP repeat_end_emit_encodeBlockAsm14B
repeat_four_repeat_as_copy_encodeBlockAsm14B_emit_copy_short:
LEAQ -256(BX), BX
MOVW $0x0019, (DI)
MOVW BX, 2(DI)
ADDQ $0x04, DI
JMP repeat_end_emit_encodeBlockAsm14B
repeat_three_repeat_as_copy_encodeBlockAsm14B_emit_copy_short:
LEAQ -4(BX), BX
MOVW $0x0015, (DI)
MOVB BL, 2(DI)
ADDQ $0x03, DI
JMP repeat_end_emit_encodeBlockAsm14B
repeat_two_repeat_as_copy_encodeBlockAsm14B_emit_copy_short:
SHLL $0x02, BX
ORL $0x01, BX
MOVW BX, (DI)
ADDQ $0x02, DI
JMP repeat_end_emit_encodeBlockAsm14B
repeat_two_offset_repeat_as_copy_encodeBlockAsm14B_emit_copy_short:
XORQ R8, R8
LEAQ 1(R8)(BX*4), BX
MOVB SI, 1(DI)
SARL $0x08, SI
SHLL $0x05, SI
ORL SI, BX
MOVB BL, (DI)
ADDQ $0x02, DI
JMP repeat_end_emit_encodeBlockAsm14B
two_byte_offset_short_repeat_as_copy_encodeBlockAsm14B:
CMPL BX, $0x0c
JGE emit_copy_three_repeat_as_copy_encodeBlockAsm14B
CMPL SI, $0x00000800
JGE emit_copy_three_repeat_as_copy_encodeBlockAsm14B
MOVB $0x01, DL
LEAQ -16(DX)(BX*4), BX
MOVB SI, 1(DI)
SHRL $0x08, SI
SHLL $0x05, SI
ORL SI, BX
MOVB BL, (DI)
ADDQ $0x02, DI
JMP repeat_end_emit_encodeBlockAsm14B
emit_copy_three_repeat_as_copy_encodeBlockAsm14B:
MOVB $0x02, DL
LEAQ -4(DX)(BX*4), BX
MOVB BL, (DI)
MOVW SI, 1(DI)
ADDQ $0x03, DI
repeat_end_emit_encodeBlockAsm14B:
MOVQ DI, dst_base+0(FP)
MOVL 16(SP), BX
CMPL AX, BX
JGT emit_remainder_encodeBlockAsm14B
JMP search_loop_encodeBlockAsm14B
no_repeat_found_encodeBlockAsm14B:
MOVQ $0x0000cf1bbcdcbf9b, R9
MOVQ SI, R8
SHRQ $0x10, R8
SHLQ $0x10, R8
IMULQ R9, R8
SHRQ $0x32, R8
CMPL (CX)(BX*1), SI
SHRQ $0x08, SI
JEQ candidate_match_encodeBlockAsm14B
MOVL 32(SP)(R8*1), BX
CMPL (CX)(DI*1), SI
JEQ candidate2_match_encodeBlockAsm14B
LEAQ 2(AX), DI
MOVL DI, 32(SP)(R8*1)
SHRQ $0x08, SI
CMPL (CX)(BX*1), SI
JEQ candidate3_match_encodeBlockAsm14B
MOVL 28(SP), AX
JMP search_loop_encodeBlockAsm14B
candidate3_match_encodeBlockAsm14B:
ADDL $0x02, AX
JMP candidate_match_encodeBlockAsm14B
candidate2_match_encodeBlockAsm14B:
LEAQ -2(AX), BX
MOVL BX, 32(SP)(R8*1)
INCL AX
MOVL DI, BX
candidate_match_encodeBlockAsm14B:
MOVL 20(SP), SI
TESTL BX, BX
JZ match_extend_back_end_encodeBlockAsm14B
match_extend_back_loop_encodeBlockAsm14B:
CMPL AX, SI
JG match_extend_back_end_encodeBlockAsm14B
MOVB -1(CX)(BX*1), DL
MOVB -1(CX)(AX*1), DI
CMPB DL, DI
JNE match_extend_back_end_encodeBlockAsm14B
LEAL -1(AX), AX
DECL BX
JZ match_extend_back_end_encodeBlockAsm14B
JMP match_extend_back_loop_encodeBlockAsm14B
match_extend_back_end_encodeBlockAsm14B:
MOVL AX, SI
SUBL 20(SP), SI
LEAQ dst_base+0(FP)(SI*1), SI
CMPQ SI, (SP)
JL match_dst_size_check_encodeBlockAsm14B
MOVQ $0x00000000, ret+48(FP)
RET
match_dst_size_check_encodeBlockAsm14B:
MOVL BX, SI
MOVL 20(SP), DI
CMPL DI, SI
JEQ emit_literal_skip_match_emit_encodeBlockAsm14B
MOVL SI, R8
MOVL SI, 20(SP)
LEAQ (CX)(DI*1), SI
SUBL DI, R8
MOVQ dst_base+0(FP), DI
MOVQ R8, R9
SUBL $0x01, R9
JC emit_literal_done_match_emit_encodeBlockAsm14B
CMPL R9, $0x3c
JLT one_byte_match_emit_encodeBlockAsm14B
CMPL R9, $0x00000100
JLT two_bytes_match_emit_encodeBlockAsm14B
CMPL R9, $0x00010000
JLT three_bytes_match_emit_encodeBlockAsm14B
CMPL R9, $0x01000000
JLT four_bytes_match_emit_encodeBlockAsm14B
MOVB $0xfc, (DI)
MOVL R9, 1(DI)
ADDQ $0x05, DI
JMP memmove_match_emit_encodeBlockAsm14B
four_bytes_match_emit_encodeBlockAsm14B:
MOVQ R9, R10
SHRL $0x10, R10
MOVB $0xf8, (DI)
MOVW R9, 1(DI)
MOVB R10, 3(DI)
ADDQ $0x04, DI
JMP memmove_match_emit_encodeBlockAsm14B
three_bytes_match_emit_encodeBlockAsm14B:
MOVB $0xf4, (DI)
MOVW R9, 1(DI)
ADDQ $0x03, DI
JMP memmove_match_emit_encodeBlockAsm14B
two_bytes_match_emit_encodeBlockAsm14B:
MOVB $0xf0, (DI)
MOVB R9, 1(DI)
ADDQ $0x02, DI
JMP memmove_match_emit_encodeBlockAsm14B
one_byte_match_emit_encodeBlockAsm14B:
SHLB $0x02, R9
MOVB R9, (DI)
ADDQ $0x01, DI
memmove_match_emit_encodeBlockAsm14B:
LEAQ (DI)(R8*1), R9
NOP
emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_tail:
TESTQ R8, R8
JEQ emit_literal_done_match_emit_encodeBlockAsm14B
CMPQ R8, $0x02
JBE emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_1or2
CMPQ R8, $0x04
JB emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_3
JBE emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_4
CMPQ R8, $0x08
JB emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_5through7
JE emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_8
CMPQ R8, $0x10
JBE emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_9through16
CMPQ R8, $0x20
JBE emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_17through32
CMPQ R8, $0x40
JBE emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_33through64
CMPQ R8, $0x80
JBE emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_65through128
CMPQ R8, $0x00000100
JBE emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_129through256
JMP emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_256through2048
emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_1or2:
MOVB (SI), R9
MOVB -1(SI)(R8*1), SI
MOVB R9, (DI)
MOVB SI, -1(DI)(R8*1)
JMP emit_literal_done_match_emit_encodeBlockAsm14B
emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_4:
MOVL (SI), R9
MOVL R9, (DI)
JMP emit_literal_done_match_emit_encodeBlockAsm14B
emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_3:
MOVW (SI), R9
MOVB 2(SI), SI
MOVW R9, (DI)
MOVB SI, 2(DI)
JMP emit_literal_done_match_emit_encodeBlockAsm14B
emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_5through7:
MOVL (SI), R9
MOVL -4(SI)(R8*1), SI
MOVL R9, (DI)
MOVL SI, -4(DI)(R8*1)
JMP emit_literal_done_match_emit_encodeBlockAsm14B
emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_8:
MOVQ (SI), R9
MOVQ R9, (DI)
JMP emit_literal_done_match_emit_encodeBlockAsm14B
emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_9through16:
MOVQ (SI), R9
MOVQ -8(SI)(R8*1), SI
MOVQ R9, (DI)
MOVQ SI, -8(DI)(R8*1)
JMP emit_literal_done_match_emit_encodeBlockAsm14B
emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_17through32:
MOVOU (SI), X0
MOVOU -16(SI)(R8*1), X1
MOVOU X0, (DI)
MOVOU X1, -16(DI)(R8*1)
JMP emit_literal_done_match_emit_encodeBlockAsm14B
emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_33through64:
MOVOU (SI), X0
MOVOU 16(SI), X1
MOVOU -32(SI)(R8*1), X2
MOVOU -16(SI)(R8*1), X3
MOVOU X0, (DI)
MOVOU X1, 16(DI)
MOVOU X2, -32(DI)(R8*1)
MOVOU X3, -16(DI)(R8*1)
JMP emit_literal_done_match_emit_encodeBlockAsm14B
emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_65through128:
MOVOU (SI), X0
MOVOU 16(SI), X1
MOVOU 32(SI), X2
MOVOU 48(SI), X3
MOVOU -64(SI)(R8*1), X12
MOVOU -48(SI)(R8*1), X13
MOVOU -32(SI)(R8*1), X14
MOVOU -16(SI)(R8*1), X15
MOVOU X0, (DI)
MOVOU X1, 16(DI)
MOVOU X2, 32(DI)
MOVOU X3, 48(DI)
MOVOU X12, -64(DI)(R8*1)
MOVOU X13, -48(DI)(R8*1)
MOVOU X14, -32(DI)(R8*1)
MOVOU X15, -16(DI)(R8*1)
JMP emit_literal_done_match_emit_encodeBlockAsm14B
emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_129through256:
MOVOU (SI), X0
MOVOU 16(SI), X1
MOVOU 32(SI), X2
MOVOU 48(SI), X3
MOVOU 64(SI), X4
MOVOU 80(SI), X5
MOVOU 96(SI), X6
MOVOU 112(SI), X7
MOVOU -128(SI)(R8*1), X8
MOVOU -112(SI)(R8*1), X9
MOVOU -96(SI)(R8*1), X10
MOVOU -80(SI)(R8*1), X11
MOVOU -64(SI)(R8*1), X12
MOVOU -48(SI)(R8*1), X13
MOVOU -32(SI)(R8*1), X14
MOVOU -16(SI)(R8*1), X15
MOVOU X0, (DI)
MOVOU X1, 16(DI)
MOVOU X2, 32(DI)
MOVOU X3, 48(DI)
MOVOU X4, 64(DI)
MOVOU X5, 80(DI)
MOVOU X6, 96(DI)
MOVOU X7, 112(DI)
MOVOU X8, -128(DI)(R8*1)
MOVOU X9, -112(DI)(R8*1)
MOVOU X10, -96(DI)(R8*1)
MOVOU X11, -80(DI)(R8*1)
MOVOU X12, -64(DI)(R8*1)
MOVOU X13, -48(DI)(R8*1)
MOVOU X14, -32(DI)(R8*1)
MOVOU X15, -16(DI)(R8*1)
JMP emit_literal_done_match_emit_encodeBlockAsm14B
emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_256through2048:
LEAQ -256(R8), R8
MOVOU (SI), X0
MOVOU 16(SI), X1
MOVOU 32(SI), X2
MOVOU 48(SI), X3
MOVOU 64(SI), X4
MOVOU 80(SI), X5
MOVOU 96(SI), X6
MOVOU 112(SI), X7
MOVOU 128(SI), X8
MOVOU 144(SI), X9
MOVOU 160(SI), X10
MOVOU 176(SI), X11
MOVOU 192(SI), X12
MOVOU 208(SI), X13
MOVOU 224(SI), X14
MOVOU 240(SI), X15
MOVOU X0, (DI)
MOVOU X1, 16(DI)
MOVOU X2, 32(DI)
MOVOU X3, 48(DI)
MOVOU X4, 64(DI)
MOVOU X5, 80(DI)
MOVOU X6, 96(DI)
MOVOU X7, 112(DI)
MOVOU X8, 128(DI)
MOVOU X9, 144(DI)
MOVOU X10, 160(DI)
MOVOU X11, 176(DI)
MOVOU X12, 192(DI)
MOVOU X13, 208(DI)
MOVOU X14, 224(DI)
MOVOU X15, 240(DI)
CMPQ R8, $0x00000100
LEAQ 256(SI), SI
LEAQ 256(DI), DI
JGE emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_256through2048
JMP emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_tail
MOVQ R9, DI
emit_literal_done_match_emit_encodeBlockAsm14B:
MOVQ DI, dst_base+0(FP)
emit_literal_skip_match_emit_encodeBlockAsm14B:
NOP
match_nolit_loop_encodeBlockAsm14B:
MOVL AX, SI
MOVL AX, SI
SUBL BX, SI
MOVL SI, 24(SP)
ADDL $0x04, AX
ADDL $0x04, BX
MOVL 16(SP), SI
SUBL AX, SI
XORQ R8, R8
CMPQ SI, $0x08
JL matchlen_single_match_nolit_encodeBlockAsm14B
matchlen_loopback_match_nolit_encodeBlockAsm14B:
MOVQ (CX)(R8*1), DI
XORQ (CX)(R8*1), DI
TESTQ DI, DI
JZ matchlen_loop_match_nolit_encodeBlockAsm14B
BSFQ DI, DI
SARQ $0x03, DI
LEAQ (R8)(DI*1), R8
JMP match_nolit_end_encodeBlockAsm14B
matchlen_loop_match_nolit_encodeBlockAsm14B:
LEAQ -8(SI), SI
LEAQ 8(R8), R8
CMPQ SI, $0x08
JGE matchlen_loopback_match_nolit_encodeBlockAsm14B
matchlen_single_match_nolit_encodeBlockAsm14B:
TESTQ SI, SI
JZ match_nolit_end_encodeBlockAsm14B
matchlen_single_loopback_match_nolit_encodeBlockAsm14B:
MOVB (CX)(R8*1), DI
CMPB (CX)(R8*1), DI
JNE match_nolit_end_encodeBlockAsm14B
LEAQ 1(R8), R8
DECQ SI
JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm14B
match_nolit_end_encodeBlockAsm14B:
MOVL 24(SP), SI
ADDQ $0x04, R8
MOVQ dst_base+0(FP), DI
ADDL R8, AX
CMPL SI, $0x00010000
JL two_byte_offset_match_nolit_encodeBlockAsm14B
CMPL R8, $0x40
JLE four_bytes_remain_match_nolit_encodeBlockAsm14B
MOVB $0xff, (DI)
MOVD SI, 1(DI)
LEAQ -64(R8), R8
ADDQ $0x05, DI
CMPL R8, $0x04
JL four_bytes_remain_match_nolit_encodeBlockAsm14B
emit_repeat_again_match_nolit_encodeBlockAsm14B_emit_copy:
MOVQ R8, R9
LEAQ -4(R8), R8
CMPL R9, $0x08
JLE repeat_two_match_nolit_encodeBlockAsm14B_emit_copy
CMPL R9, $0x0c
JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm14B_emit_copy
CMPL SI, $0x00000800
JLT repeat_two_offset_match_nolit_encodeBlockAsm14B_emit_copy
cant_repeat_two_offset_match_nolit_encodeBlockAsm14B_emit_copy:
CMPL R8, $0x00000104
JLT repeat_three_match_nolit_encodeBlockAsm14B_emit_copy
CMPL R8, $0x00010100
JLT repeat_four_match_nolit_encodeBlockAsm14B_emit_copy
CMPL R8, $0x0100ffff
JLT repeat_five_match_nolit_encodeBlockAsm14B_emit_copy
LEAQ -16842747(R8), R8
MOVW $0x001d, (DI)
MOVW $0xfffb, 2(DI)
MOVB $0xff, 4(DI)
ADDQ $0x05, DI
JMP emit_repeat_again_match_nolit_encodeBlockAsm14B_emit_copy
repeat_five_match_nolit_encodeBlockAsm14B_emit_copy:
LEAQ -65536(R8), R8
MOVQ R8, SI
MOVW $0x001d, (DI)
MOVW R8, 2(DI)
SARQ $0x10, SI
MOVB SI, 4(DI)
ADDQ $0x05, DI
JMP match_nolit_emitcopy_end_encodeBlockAsm14B
repeat_four_match_nolit_encodeBlockAsm14B_emit_copy:
LEAQ -256(R8), R8
MOVW $0x0019, (DI)
MOVW R8, 2(DI)
ADDQ $0x04, DI
JMP match_nolit_emitcopy_end_encodeBlockAsm14B
repeat_three_match_nolit_encodeBlockAsm14B_emit_copy:
LEAQ -4(R8), R8
MOVW $0x0015, (DI)
MOVB R8, 2(DI)
ADDQ $0x03, DI
JMP match_nolit_emitcopy_end_encodeBlockAsm14B
repeat_two_match_nolit_encodeBlockAsm14B_emit_copy:
SHLL $0x02, R8
ORL $0x01, R8
MOVW R8, (DI)
ADDQ $0x02, DI
JMP match_nolit_emitcopy_end_encodeBlockAsm14B
repeat_two_offset_match_nolit_encodeBlockAsm14B_emit_copy:
XORQ R9, R9
LEAQ 1(R9)(R8*4), R8
MOVB SI, 1(DI)
SARL $0x08, SI
SHLL $0x05, SI
ORL SI, R8
MOVB R8, (DI)
ADDQ $0x02, DI
JMP match_nolit_emitcopy_end_encodeBlockAsm14B
four_bytes_remain_match_nolit_encodeBlockAsm14B:
TESTL R8, R8
JZ match_nolit_emitcopy_end_encodeBlockAsm14B
MOVB $0x03, DL
LEAQ -4(DX)(R8*4), R8
MOVB R8, (DI)
MOVD SI, 1(DI)
ADDQ $0x05, DI
JMP match_nolit_emitcopy_end_encodeBlockAsm14B
two_byte_offset_match_nolit_encodeBlockAsm14B:
CMPL R8, $0x40
JLE two_byte_offset_short_match_nolit_encodeBlockAsm14B
MOVB $0xee, (DI)
MOVW SI, 1(DI)
LEAQ -60(R8), R8
ADDQ $0x03, DI
emit_repeat_again_match_nolit_encodeBlockAsm14B_emit_copy_short:
MOVQ R8, R9
LEAQ -4(R8), R8
CMPL R9, $0x08
JLE repeat_two_match_nolit_encodeBlockAsm14B_emit_copy_short
CMPL R9, $0x0c
JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm14B_emit_copy_short
CMPL SI, $0x00000800
JLT repeat_two_offset_match_nolit_encodeBlockAsm14B_emit_copy_short
cant_repeat_two_offset_match_nolit_encodeBlockAsm14B_emit_copy_short:
CMPL R8, $0x00000104
JLT repeat_three_match_nolit_encodeBlockAsm14B_emit_copy_short
CMPL R8, $0x00010100
JLT repeat_four_match_nolit_encodeBlockAsm14B_emit_copy_short
CMPL R8, $0x0100ffff
JLT repeat_five_match_nolit_encodeBlockAsm14B_emit_copy_short
LEAQ -16842747(R8), R8
MOVW $0x001d, (DI)
MOVW $0xfffb, 2(DI)
MOVB $0xff, 4(DI)
ADDQ $0x05, DI
JMP emit_repeat_again_match_nolit_encodeBlockAsm14B_emit_copy_short
repeat_five_match_nolit_encodeBlockAsm14B_emit_copy_short:
LEAQ -65536(R8), R8
MOVQ R8, SI
MOVW $0x001d, (DI)
MOVW R8, 2(DI)
SARQ $0x10, SI
MOVB SI, 4(DI)
ADDQ $0x05, DI
JMP match_nolit_emitcopy_end_encodeBlockAsm14B
repeat_four_match_nolit_encodeBlockAsm14B_emit_copy_short:
LEAQ -256(R8), R8
MOVW $0x0019, (DI)
MOVW R8, 2(DI)
ADDQ $0x04, DI
JMP match_nolit_emitcopy_end_encodeBlockAsm14B
repeat_three_match_nolit_encodeBlockAsm14B_emit_copy_short:
LEAQ -4(R8), R8
MOVW $0x0015, (DI)
MOVB R8, 2(DI)
ADDQ $0x03, DI
JMP match_nolit_emitcopy_end_encodeBlockAsm14B
repeat_two_match_nolit_encodeBlockAsm14B_emit_copy_short:
SHLL $0x02, R8
ORL $0x01, R8
MOVW R8, (DI)
ADDQ $0x02, DI
JMP match_nolit_emitcopy_end_encodeBlockAsm14B
repeat_two_offset_match_nolit_encodeBlockAsm14B_emit_copy_short:
XORQ R9, R9
LEAQ 1(R9)(R8*4), R8
MOVB SI, 1(DI)
SARL $0x08, SI
SHLL $0x05, SI
ORL SI, R8
MOVB R8, (DI)
ADDQ $0x02, DI
JMP match_nolit_emitcopy_end_encodeBlockAsm14B
two_byte_offset_short_match_nolit_encodeBlockAsm14B:
CMPL R8, $0x0c
JGE emit_copy_three_match_nolit_encodeBlockAsm14B
CMPL SI, $0x00000800
JGE emit_copy_three_match_nolit_encodeBlockAsm14B
MOVB $0x01, DL
LEAQ -16(DX)(R8*4), R8
MOVB SI, 1(DI)
SHRL $0x08, SI
SHLL $0x05, SI
ORL SI, R8
MOVB R8, (DI)
ADDQ $0x02, DI
JMP match_nolit_emitcopy_end_encodeBlockAsm14B
emit_copy_three_match_nolit_encodeBlockAsm14B:
MOVB $0x02, DL
LEAQ -4(DX)(R8*4), R8
MOVB R8, (DI)
MOVW SI, 1(DI)
ADDQ $0x03, DI
match_nolit_emitcopy_end_encodeBlockAsm14B:
MOVQ DI, dst_base+0(FP)
MOVL AX, 20(SP)
CMPL AX, 16(SP)
JGE emit_remainder_encodeBlockAsm14B
CMPQ DI, (SP)
JL match_nolit_dst_ok_encodeBlockAsm14B
MOVQ $0x00000000, ret+48(FP)
RET
match_nolit_dst_ok_encodeBlockAsm14B:
MOVQ -2(CX)(AX*1), SI
MOVQ $0x0000cf1bbcdcbf9b, DI
MOVQ SI, R8
SHRQ $0x10, SI
MOVQ SI, R9
SHLQ $0x10, R8
IMULQ DI, R8
SHRQ $0x32, R8
SHLQ $0x10, R9
IMULQ DI, R9
SHRQ $0x32, R9
MOVL 32(SP)(R8*1), DI
MOVL 32(SP)(R9*1), DI
LEAQ -2(AX), DI
MOVL DI, 32(SP)(R8*1)
MOVL AX, 32(SP)(R9*1)
CMPL (CX)(R9*1), SI
JEQ match_nolit_loop_encodeBlockAsm14B
INCL AX
JMP search_loop_encodeBlockAsm14B
emit_remainder_encodeBlockAsm14B:
MOVQ src_len+32(FP), AX
SUBL 20(SP), AX
MOVQ dst_base+0(FP), DX
LEAQ (DX)(AX*1), DX
CMPQ DX, (SP)
JL emit_remainder_ok_encodeBlockAsm14B
MOVQ $0x00000000, ret+48(FP)
RET
emit_remainder_ok_encodeBlockAsm14B:
MOVQ src_len+32(FP), AX
MOVL 20(SP), DX
CMPL DX, AX
JEQ emit_literal_skip_emit_remainder_encodeBlockAsm14B
MOVL AX, BX
MOVL AX, 20(SP)
LEAQ (CX)(DX*1), AX
SUBL DX, BX
MOVQ dst_base+0(FP), CX
MOVQ BX, DX
SUBL $0x01, DX
JC emit_literal_done_emit_remainder_encodeBlockAsm14B
CMPL DX, $0x3c
JLT one_byte_emit_remainder_encodeBlockAsm14B
CMPL DX, $0x00000100
JLT two_bytes_emit_remainder_encodeBlockAsm14B
CMPL DX, $0x00010000
JLT three_bytes_emit_remainder_encodeBlockAsm14B
CMPL DX, $0x01000000
JLT four_bytes_emit_remainder_encodeBlockAsm14B
MOVB $0xfc, (CX)
MOVL DX, 1(CX)
ADDQ $0x05, CX
JMP memmove_emit_remainder_encodeBlockAsm14B
four_bytes_emit_remainder_encodeBlockAsm14B:
MOVQ DX, SI
SHRL $0x10, SI
MOVB $0xf8, (CX)
MOVW DX, 1(CX)
MOVB SI, 3(CX)
ADDQ $0x04, CX
JMP memmove_emit_remainder_encodeBlockAsm14B
three_bytes_emit_remainder_encodeBlockAsm14B:
MOVB $0xf4, (CX)
MOVW DX, 1(CX)
ADDQ $0x03, CX
JMP memmove_emit_remainder_encodeBlockAsm14B
two_bytes_emit_remainder_encodeBlockAsm14B:
MOVB $0xf0, (CX)
MOVB DL, 1(CX)
ADDQ $0x02, CX
JMP memmove_emit_remainder_encodeBlockAsm14B
one_byte_emit_remainder_encodeBlockAsm14B:
SHLB $0x02, DL
MOVB DL, (CX)
ADDQ $0x01, CX
memmove_emit_remainder_encodeBlockAsm14B:
LEAQ (CX)(BX*1), DX
NOP
emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_tail:
TESTQ BX, BX
JEQ emit_literal_done_emit_remainder_encodeBlockAsm14B
CMPQ BX, $0x02
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_1or2
CMPQ BX, $0x04
JB emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_3
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_4
CMPQ BX, $0x08
JB emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_5through7
JE emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_8
CMPQ BX, $0x10
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_9through16
CMPQ BX, $0x20
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_17through32
CMPQ BX, $0x40
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_33through64
CMPQ BX, $0x80
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_65through128
CMPQ BX, $0x00000100
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_129through256
JMP emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_256through2048
emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_1or2:
MOVB (AX), DL
MOVB -1(AX)(BX*1), AL
MOVB DL, (CX)
MOVB AL, -1(CX)(BX*1)
JMP emit_literal_done_emit_remainder_encodeBlockAsm14B
emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_4:
MOVL (AX), DX
MOVL DX, (CX)
JMP emit_literal_done_emit_remainder_encodeBlockAsm14B
emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_3:
MOVW (AX), DX
MOVB 2(AX), AL
MOVW DX, (CX)
MOVB AL, 2(CX)
JMP emit_literal_done_emit_remainder_encodeBlockAsm14B
emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_5through7:
MOVL (AX), DX
MOVL -4(AX)(BX*1), AX
MOVL DX, (CX)
MOVL AX, -4(CX)(BX*1)
JMP emit_literal_done_emit_remainder_encodeBlockAsm14B
emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_8:
MOVQ (AX), DX
MOVQ DX, (CX)
JMP emit_literal_done_emit_remainder_encodeBlockAsm14B
emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_9through16:
MOVQ (AX), DX
MOVQ -8(AX)(BX*1), AX
MOVQ DX, (CX)
MOVQ AX, -8(CX)(BX*1)
JMP emit_literal_done_emit_remainder_encodeBlockAsm14B
emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_17through32:
MOVOU (AX), X0
MOVOU -16(AX)(BX*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(BX*1)
JMP emit_literal_done_emit_remainder_encodeBlockAsm14B
emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_33through64:
MOVOU (AX), X0
MOVOU 16(AX), X1
MOVOU -32(AX)(BX*1), X2
MOVOU -16(AX)(BX*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(BX*1)
MOVOU X3, -16(CX)(BX*1)
JMP emit_literal_done_emit_remainder_encodeBlockAsm14B
emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_65through128:
MOVOU (AX), X0
MOVOU 16(AX), X1
MOVOU 32(AX), X2
MOVOU 48(AX), X3
MOVOU -64(AX)(BX*1), X12
MOVOU -48(AX)(BX*1), X13
MOVOU -32(AX)(BX*1), X14
MOVOU -16(AX)(BX*1), X15
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, 32(CX)
MOVOU X3, 48(CX)
MOVOU X12, -64(CX)(BX*1)
MOVOU X13, -48(CX)(BX*1)
MOVOU X14, -32(CX)(BX*1)
MOVOU X15, -16(CX)(BX*1)
JMP emit_literal_done_emit_remainder_encodeBlockAsm14B
emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_129through256:
MOVOU (AX), X0
MOVOU 16(AX), X1
MOVOU 32(AX), X2
MOVOU 48(AX), X3
MOVOU 64(AX), X4
MOVOU 80(AX), X5
MOVOU 96(AX), X6
MOVOU 112(AX), X7
MOVOU -128(AX)(BX*1), X8
MOVOU -112(AX)(BX*1), X9
MOVOU -96(AX)(BX*1), X10
MOVOU -80(AX)(BX*1), X11
MOVOU -64(AX)(BX*1), X12
MOVOU -48(AX)(BX*1), X13
MOVOU -32(AX)(BX*1), X14
MOVOU -16(AX)(BX*1), X15
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, 32(CX)
MOVOU X3, 48(CX)
MOVOU X4, 64(CX)
MOVOU X5, 80(CX)
MOVOU X6, 96(CX)
MOVOU X7, 112(CX)
MOVOU X8, -128(CX)(BX*1)
MOVOU X9, -112(CX)(BX*1)
MOVOU X10, -96(CX)(BX*1)
MOVOU X11, -80(CX)(BX*1)
MOVOU X12, -64(CX)(BX*1)
MOVOU X13, -48(CX)(BX*1)
MOVOU X14, -32(CX)(BX*1)
MOVOU X15, -16(CX)(BX*1)
JMP emit_literal_done_emit_remainder_encodeBlockAsm14B
emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_256through2048:
LEAQ -256(BX), BX
MOVOU (AX), X0
MOVOU 16(AX), X1
MOVOU 32(AX), X2
MOVOU 48(AX), X3
MOVOU 64(AX), X4
MOVOU 80(AX), X5
MOVOU 96(AX), X6
MOVOU 112(AX), X7
MOVOU 128(AX), X8
MOVOU 144(AX), X9
MOVOU 160(AX), X10
MOVOU 176(AX), X11
MOVOU 192(AX), X12
MOVOU 208(AX), X13
MOVOU 224(AX), X14
MOVOU 240(AX), X15
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, 32(CX)
MOVOU X3, 48(CX)
MOVOU X4, 64(CX)
MOVOU X5, 80(CX)
MOVOU X6, 96(CX)
MOVOU X7, 112(CX)
MOVOU X8, 128(CX)
MOVOU X9, 144(CX)
MOVOU X10, 160(CX)
MOVOU X11, 176(CX)
MOVOU X12, 192(CX)
MOVOU X13, 208(CX)
MOVOU X14, 224(CX)
MOVOU X15, 240(CX)
CMPQ BX, $0x00000100
LEAQ 256(AX), AX
LEAQ 256(CX), CX
JGE emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_256through2048
JMP emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_tail
MOVQ DX, CX
emit_literal_done_emit_remainder_encodeBlockAsm14B:
MOVQ CX, dst_base+0(FP)
emit_literal_skip_emit_remainder_encodeBlockAsm14B:
MOVQ 8(SP), AX
SUBQ dst_base+0(FP), AX
MOVQ AX, ret+48(FP)
RET
// func encodeBlockAsm12B(dst []byte, src []byte) int
// Requires: SSE2
TEXT ·encodeBlockAsm12B(SB), $4128-56
MOVQ $0x00000020, AX
LEAQ 32(SP), CX
PXOR X0, X0
zero_loop_encodeBlockAsm12B:
MOVOU X0, (CX)
MOVOU X0, 16(CX)
MOVOU X0, 32(CX)
MOVOU X0, 48(CX)
MOVOU X0, 64(CX)
MOVOU X0, 80(CX)
MOVOU X0, 96(CX)
MOVOU X0, 112(CX)
ADDQ $0x80, CX
DECQ AX
JNZ zero_loop_encodeBlockAsm12B
MOVL AX, 20(SP)
MOVQ src_len+32(FP), AX
LEAQ -5(AX), CX
LEAQ -8(AX), BX
SHRQ $0x05, AX
SUBL AX, CX
MOVL BX, 16(SP)
MOVQ dst_base+0(FP), AX
MOVQ AX, 8(SP)
LEAQ (AX)(CX*1), CX
MOVQ CX, (SP)
MOVL $0x00000001, AX
MOVL AX, 24(SP)
MOVQ src_base+24(FP), CX
search_loop_encodeBlockAsm12B:
MOVQ (CX)(AX*1), SI
MOVL AX, BX
SUBL 20(SP), BX
SHRL $0x04, BX
LEAQ 4(AX)(BX*1), BX
MOVL 16(SP), DI
CMPL BX, DI
JGT emit_remainder_encodeBlockAsm12B
MOVL BX, 28(SP)
MOVQ $0x0000cf1bbcdcbf9b, BX
MOVQ SI, R8
MOVQ SI, R9
SHRQ $0x08, R9
SHLQ $0x10, R8
IMULQ BX, R8
SHRQ $0x34, R8
SHLQ $0x10, R9
IMULQ BX, R9
SHRQ $0x34, R9
MOVL 32(SP)(R8*1), BX
MOVL 32(SP)(R9*1), DI
MOVL AX, 32(SP)(R8*1)
LEAL 1(AX), R8
MOVL R8, 32(SP)(R9*1)
MOVL AX, R8
SUBL 24(SP), R8
MOVL 1(CX)(R8*1), R10
MOVQ SI, R9
SHLQ $0x08, R9
CMPL R9, R10
JNE no_repeat_found_encodeBlockAsm12B
LEAQ 1(AX), SI
MOVL 20(SP), BX
TESTL R8, R8
JZ repeat_extend_back_end_encodeBlockAsm12B
repeat_extend_back_loop_encodeBlockAsm12B:
CMPL SI, BX
JG repeat_extend_back_end_encodeBlockAsm12B
MOVB -1(CX)(R8*1), DL
MOVB -1(CX)(SI*1), DI
CMPB DL, DI
JNE repeat_extend_back_end_encodeBlockAsm12B
LEAQ -1(SI), SI
DECL R8
JZ repeat_extend_back_end_encodeBlockAsm12B
JMP repeat_extend_back_loop_encodeBlockAsm12B
repeat_extend_back_end_encodeBlockAsm12B:
MOVL 20(SP), BX
CMPL BX, SI
JEQ emit_literal_skip_repeat_emit_encodeBlockAsm12B
MOVL SI, DI
MOVL SI, 20(SP)
LEAQ (CX)(BX*1), R8
SUBL BX, DI
MOVQ dst_base+0(FP), BX
MOVQ DI, R9
SUBL $0x01, R9
JC emit_literal_done_repeat_emit_encodeBlockAsm12B
CMPL R9, $0x3c
JLT one_byte_repeat_emit_encodeBlockAsm12B
CMPL R9, $0x00000100
JLT two_bytes_repeat_emit_encodeBlockAsm12B
CMPL R9, $0x00010000
JLT three_bytes_repeat_emit_encodeBlockAsm12B
CMPL R9, $0x01000000
JLT four_bytes_repeat_emit_encodeBlockAsm12B
MOVB $0xfc, (BX)
MOVL R9, 1(BX)
ADDQ $0x05, BX
JMP memmove_repeat_emit_encodeBlockAsm12B
four_bytes_repeat_emit_encodeBlockAsm12B:
MOVQ R9, R10
SHRL $0x10, R10
MOVB $0xf8, (BX)
MOVW R9, 1(BX)
MOVB R10, 3(BX)
ADDQ $0x04, BX
JMP memmove_repeat_emit_encodeBlockAsm12B
three_bytes_repeat_emit_encodeBlockAsm12B:
MOVB $0xf4, (BX)
MOVW R9, 1(BX)
ADDQ $0x03, BX
JMP memmove_repeat_emit_encodeBlockAsm12B
two_bytes_repeat_emit_encodeBlockAsm12B:
MOVB $0xf0, (BX)
MOVB R9, 1(BX)
ADDQ $0x02, BX
JMP memmove_repeat_emit_encodeBlockAsm12B
one_byte_repeat_emit_encodeBlockAsm12B:
SHLB $0x02, R9
MOVB R9, (BX)
ADDQ $0x01, BX
memmove_repeat_emit_encodeBlockAsm12B:
LEAQ (BX)(DI*1), R9
NOP
emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_tail:
TESTQ DI, DI
JEQ emit_literal_done_repeat_emit_encodeBlockAsm12B
CMPQ DI, $0x02
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_1or2
CMPQ DI, $0x04
JB emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_3
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_4
CMPQ DI, $0x08
JB emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_5through7
JE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8
CMPQ DI, $0x10
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_9through16
CMPQ DI, $0x20
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_17through32
CMPQ DI, $0x40
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_33through64
CMPQ DI, $0x80
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_65through128
CMPQ DI, $0x00000100
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_129through256
JMP emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_256through2048
emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_1or2:
MOVB (R8), R9
MOVB -1(R8)(DI*1), R8
MOVB R9, (BX)
MOVB R8, -1(BX)(DI*1)
JMP emit_literal_done_repeat_emit_encodeBlockAsm12B
emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_4:
MOVL (R8), R9
MOVL R9, (BX)
JMP emit_literal_done_repeat_emit_encodeBlockAsm12B
emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_3:
MOVW (R8), R9
MOVB 2(R8), R8
MOVW R9, (BX)
MOVB R8, 2(BX)
JMP emit_literal_done_repeat_emit_encodeBlockAsm12B
emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_5through7:
MOVL (R8), R9
MOVL -4(R8)(DI*1), R8
MOVL R9, (BX)
MOVL R8, -4(BX)(DI*1)
JMP emit_literal_done_repeat_emit_encodeBlockAsm12B
emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8:
MOVQ (R8), R9
MOVQ R9, (BX)
JMP emit_literal_done_repeat_emit_encodeBlockAsm12B
emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_9through16:
MOVQ (R8), R9
MOVQ -8(R8)(DI*1), R8
MOVQ R9, (BX)
MOVQ R8, -8(BX)(DI*1)
JMP emit_literal_done_repeat_emit_encodeBlockAsm12B
emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_17through32:
MOVOU (R8), X0
MOVOU -16(R8)(DI*1), X1
MOVOU X0, (BX)
MOVOU X1, -16(BX)(DI*1)
JMP emit_literal_done_repeat_emit_encodeBlockAsm12B
emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_33through64:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(DI*1), X2
MOVOU -16(R8)(DI*1), X3
MOVOU X0, (BX)
MOVOU X1, 16(BX)
MOVOU X2, -32(BX)(DI*1)
MOVOU X3, -16(BX)(DI*1)
JMP emit_literal_done_repeat_emit_encodeBlockAsm12B
emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_65through128:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU 32(R8), X2
MOVOU 48(R8), X3
MOVOU -64(R8)(DI*1), X12
MOVOU -48(R8)(DI*1), X13
MOVOU -32(R8)(DI*1), X14
MOVOU -16(R8)(DI*1), X15
MOVOU X0, (BX)
MOVOU X1, 16(BX)
MOVOU X2, 32(BX)
MOVOU X3, 48(BX)
MOVOU X12, -64(BX)(DI*1)
MOVOU X13, -48(BX)(DI*1)
MOVOU X14, -32(BX)(DI*1)
MOVOU X15, -16(BX)(DI*1)
JMP emit_literal_done_repeat_emit_encodeBlockAsm12B
emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_129through256:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU 32(R8), X2
MOVOU 48(R8), X3
MOVOU 64(R8), X4
MOVOU 80(R8), X5
MOVOU 96(R8), X6
MOVOU 112(R8), X7
MOVOU -128(R8)(DI*1), X8
MOVOU -112(R8)(DI*1), X9
MOVOU -96(R8)(DI*1), X10
MOVOU -80(R8)(DI*1), X11
MOVOU -64(R8)(DI*1), X12
MOVOU -48(R8)(DI*1), X13
MOVOU -32(R8)(DI*1), X14
MOVOU -16(R8)(DI*1), X15
MOVOU X0, (BX)
MOVOU X1, 16(BX)
MOVOU X2, 32(BX)
MOVOU X3, 48(BX)
MOVOU X4, 64(BX)
MOVOU X5, 80(BX)
MOVOU X6, 96(BX)
MOVOU X7, 112(BX)
MOVOU X8, -128(BX)(DI*1)
MOVOU X9, -112(BX)(DI*1)
MOVOU X10, -96(BX)(DI*1)
MOVOU X11, -80(BX)(DI*1)
MOVOU X12, -64(BX)(DI*1)
MOVOU X13, -48(BX)(DI*1)
MOVOU X14, -32(BX)(DI*1)
MOVOU X15, -16(BX)(DI*1)
JMP emit_literal_done_repeat_emit_encodeBlockAsm12B
emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_256through2048:
LEAQ -256(DI), DI
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU 32(R8), X2
MOVOU 48(R8), X3
MOVOU 64(R8), X4
MOVOU 80(R8), X5
MOVOU 96(R8), X6
MOVOU 112(R8), X7
MOVOU 128(R8), X8
MOVOU 144(R8), X9
MOVOU 160(R8), X10
MOVOU 176(R8), X11
MOVOU 192(R8), X12
MOVOU 208(R8), X13
MOVOU 224(R8), X14
MOVOU 240(R8), X15
MOVOU X0, (BX)
MOVOU X1, 16(BX)
MOVOU X2, 32(BX)
MOVOU X3, 48(BX)
MOVOU X4, 64(BX)
MOVOU X5, 80(BX)
MOVOU X6, 96(BX)
MOVOU X7, 112(BX)
MOVOU X8, 128(BX)
MOVOU X9, 144(BX)
MOVOU X10, 160(BX)
MOVOU X11, 176(BX)
MOVOU X12, 192(BX)
MOVOU X13, 208(BX)
MOVOU X14, 224(BX)
MOVOU X15, 240(BX)
CMPQ DI, $0x00000100
LEAQ 256(R8), R8
LEAQ 256(BX), BX
JGE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_256through2048
JMP emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_tail
MOVQ R9, BX
emit_literal_done_repeat_emit_encodeBlockAsm12B:
MOVQ BX, dst_base+0(FP)
emit_literal_skip_repeat_emit_encodeBlockAsm12B:
ADDL $0x05, AX
MOVL AX, BX
SUBL 24(SP), BX
MOVL 16(SP), BX
SUBL AX, BX
XORQ R8, R8
CMPQ BX, $0x08
JL matchlen_single_repeat_extend
matchlen_loopback_repeat_extend:
MOVQ (CX)(R8*1), DI
XORQ (CX)(R8*1), DI
TESTQ DI, DI
JZ matchlen_loop_repeat_extend
BSFQ DI, DI
SARQ $0x03, DI
LEAQ (R8)(DI*1), R8
JMP repeat_extend_forward_end_encodeBlockAsm12B
matchlen_loop_repeat_extend:
LEAQ -8(BX), BX
LEAQ 8(R8), R8
CMPQ BX, $0x08
JGE matchlen_loopback_repeat_extend
matchlen_single_repeat_extend:
TESTQ BX, BX
JZ repeat_extend_forward_end_encodeBlockAsm12B
matchlen_single_loopback_repeat_extend:
MOVB (CX)(R8*1), DI
CMPB (CX)(R8*1), DI
JNE repeat_extend_forward_end_encodeBlockAsm12B
LEAQ 1(R8), R8
DECQ BX
JNZ matchlen_single_loopback_repeat_extend
repeat_extend_forward_end_encodeBlockAsm12B:
ADDL R8, AX
MOVL AX, BX
SUBL SI, BX
MOVL 24(SP), SI
MOVQ dst_base+0(FP), DI
MOVL 20(SP), R8
TESTL R8, R8
JZ repeat_as_copy_encodeBlockAsm12B
emit_repeat_again_match_repeat_:
MOVQ BX, R8
LEAQ -4(BX), BX
CMPL R8, $0x08
JLE repeat_two_match_repeat_
CMPL R8, $0x0c
JGE cant_repeat_two_offset_match_repeat_
CMPL SI, $0x00000800
JLT repeat_two_offset_match_repeat_
cant_repeat_two_offset_match_repeat_:
CMPL BX, $0x00000104
JLT repeat_three_match_repeat_
CMPL BX, $0x00010100
JLT repeat_four_match_repeat_
CMPL BX, $0x0100ffff
JLT repeat_five_match_repeat_
LEAQ -16842747(BX), BX
MOVW $0x001d, (DI)
MOVW $0xfffb, 2(DI)
MOVB $0xff, 4(DI)
ADDQ $0x05, DI
JMP emit_repeat_again_match_repeat_
repeat_five_match_repeat_:
LEAQ -65536(BX), BX
MOVQ BX, SI
MOVW $0x001d, (DI)
MOVW BX, 2(DI)
SARQ $0x10, SI
MOVB SI, 4(DI)
ADDQ $0x05, DI
JMP repeat_end_emit_encodeBlockAsm12B
repeat_four_match_repeat_:
LEAQ -256(BX), BX
MOVW $0x0019, (DI)
MOVW BX, 2(DI)
ADDQ $0x04, DI
JMP repeat_end_emit_encodeBlockAsm12B
repeat_three_match_repeat_:
LEAQ -4(BX), BX
MOVW $0x0015, (DI)
MOVB BL, 2(DI)
ADDQ $0x03, DI
JMP repeat_end_emit_encodeBlockAsm12B
repeat_two_match_repeat_:
SHLL $0x02, BX
ORL $0x01, BX
MOVW BX, (DI)
ADDQ $0x02, DI
JMP repeat_end_emit_encodeBlockAsm12B
repeat_two_offset_match_repeat_:
XORQ R8, R8
LEAQ 1(R8)(BX*4), BX
MOVB SI, 1(DI)
SARL $0x08, SI
SHLL $0x05, SI
ORL SI, BX
MOVB BL, (DI)
ADDQ $0x02, DI
JMP repeat_end_emit_encodeBlockAsm12B
repeat_as_copy_encodeBlockAsm12B:
CMPL SI, $0x00010000
JL two_byte_offset_repeat_as_copy_encodeBlockAsm12B
CMPL BX, $0x40
JLE four_bytes_remain_repeat_as_copy_encodeBlockAsm12B
MOVB $0xff, (DI)
MOVD SI, 1(DI)
LEAQ -64(BX), BX
ADDQ $0x05, DI
CMPL BX, $0x04
JL four_bytes_remain_repeat_as_copy_encodeBlockAsm12B
emit_repeat_again_repeat_as_copy_encodeBlockAsm12B_emit_copy:
MOVQ BX, R8
LEAQ -4(BX), BX
CMPL R8, $0x08
JLE repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy
CMPL R8, $0x0c
JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy
CMPL SI, $0x00000800
JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy
cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy:
CMPL BX, $0x00000104
JLT repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy
CMPL BX, $0x00010100
JLT repeat_four_repeat_as_copy_encodeBlockAsm12B_emit_copy
CMPL BX, $0x0100ffff
JLT repeat_five_repeat_as_copy_encodeBlockAsm12B_emit_copy
LEAQ -16842747(BX), BX
MOVW $0x001d, (DI)
MOVW $0xfffb, 2(DI)
MOVB $0xff, 4(DI)
ADDQ $0x05, DI
JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm12B_emit_copy
repeat_five_repeat_as_copy_encodeBlockAsm12B_emit_copy:
LEAQ -65536(BX), BX
MOVQ BX, SI
MOVW $0x001d, (DI)
MOVW BX, 2(DI)
SARQ $0x10, SI
MOVB SI, 4(DI)
ADDQ $0x05, DI
JMP repeat_end_emit_encodeBlockAsm12B
repeat_four_repeat_as_copy_encodeBlockAsm12B_emit_copy:
LEAQ -256(BX), BX
MOVW $0x0019, (DI)
MOVW BX, 2(DI)
ADDQ $0x04, DI
JMP repeat_end_emit_encodeBlockAsm12B
repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy:
LEAQ -4(BX), BX
MOVW $0x0015, (DI)
MOVB BL, 2(DI)
ADDQ $0x03, DI
JMP repeat_end_emit_encodeBlockAsm12B
repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy:
SHLL $0x02, BX
ORL $0x01, BX
MOVW BX, (DI)
ADDQ $0x02, DI
JMP repeat_end_emit_encodeBlockAsm12B
repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy:
XORQ R8, R8
LEAQ 1(R8)(BX*4), BX
MOVB SI, 1(DI)
SARL $0x08, SI
SHLL $0x05, SI
ORL SI, BX
MOVB BL, (DI)
ADDQ $0x02, DI
JMP repeat_end_emit_encodeBlockAsm12B
four_bytes_remain_repeat_as_copy_encodeBlockAsm12B:
TESTL BX, BX
JZ repeat_end_emit_encodeBlockAsm12B
MOVB $0x03, DL
LEAQ -4(DX)(BX*4), BX
MOVB BL, (DI)
MOVD SI, 1(DI)
ADDQ $0x05, DI
JMP repeat_end_emit_encodeBlockAsm12B
two_byte_offset_repeat_as_copy_encodeBlockAsm12B:
CMPL BX, $0x40
JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm12B
MOVB $0xee, (DI)
MOVW SI, 1(DI)
LEAQ -60(BX), BX
ADDQ $0x03, DI
emit_repeat_again_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
MOVQ BX, R8
LEAQ -4(BX), BX
CMPL R8, $0x08
JLE repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
CMPL R8, $0x0c
JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
CMPL SI, $0x00000800
JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
CMPL BX, $0x00000104
JLT repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
CMPL BX, $0x00010100
JLT repeat_four_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
CMPL BX, $0x0100ffff
JLT repeat_five_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
LEAQ -16842747(BX), BX
MOVW $0x001d, (DI)
MOVW $0xfffb, 2(DI)
MOVB $0xff, 4(DI)
ADDQ $0x05, DI
JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
repeat_five_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
LEAQ -65536(BX), BX
MOVQ BX, SI
MOVW $0x001d, (DI)
MOVW BX, 2(DI)
SARQ $0x10, SI
MOVB SI, 4(DI)
ADDQ $0x05, DI
JMP repeat_end_emit_encodeBlockAsm12B
repeat_four_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
LEAQ -256(BX), BX
MOVW $0x0019, (DI)
MOVW BX, 2(DI)
ADDQ $0x04, DI
JMP repeat_end_emit_encodeBlockAsm12B
repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
LEAQ -4(BX), BX
MOVW $0x0015, (DI)
MOVB BL, 2(DI)
ADDQ $0x03, DI
JMP repeat_end_emit_encodeBlockAsm12B
repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
SHLL $0x02, BX
ORL $0x01, BX
MOVW BX, (DI)
ADDQ $0x02, DI
JMP repeat_end_emit_encodeBlockAsm12B
repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
XORQ R8, R8
LEAQ 1(R8)(BX*4), BX
MOVB SI, 1(DI)
SARL $0x08, SI
SHLL $0x05, SI
ORL SI, BX
MOVB BL, (DI)
ADDQ $0x02, DI
JMP repeat_end_emit_encodeBlockAsm12B
two_byte_offset_short_repeat_as_copy_encodeBlockAsm12B:
CMPL BX, $0x0c
JGE emit_copy_three_repeat_as_copy_encodeBlockAsm12B
CMPL SI, $0x00000800
JGE emit_copy_three_repeat_as_copy_encodeBlockAsm12B
MOVB $0x01, DL
LEAQ -16(DX)(BX*4), BX
MOVB SI, 1(DI)
SHRL $0x08, SI
SHLL $0x05, SI
ORL SI, BX
MOVB BL, (DI)
ADDQ $0x02, DI
JMP repeat_end_emit_encodeBlockAsm12B
emit_copy_three_repeat_as_copy_encodeBlockAsm12B:
MOVB $0x02, DL
LEAQ -4(DX)(BX*4), BX
MOVB BL, (DI)
MOVW SI, 1(DI)
ADDQ $0x03, DI
repeat_end_emit_encodeBlockAsm12B:
MOVQ DI, dst_base+0(FP)
MOVL 16(SP), BX
CMPL AX, BX
JGT emit_remainder_encodeBlockAsm12B
JMP search_loop_encodeBlockAsm12B
no_repeat_found_encodeBlockAsm12B:
MOVQ $0x0000cf1bbcdcbf9b, R9
MOVQ SI, R8
SHRQ $0x10, R8
SHLQ $0x10, R8
IMULQ R9, R8
SHRQ $0x34, R8
CMPL (CX)(BX*1), SI
SHRQ $0x08, SI
JEQ candidate_match_encodeBlockAsm12B
MOVL 32(SP)(R8*1), BX
CMPL (CX)(DI*1), SI
JEQ candidate2_match_encodeBlockAsm12B
LEAQ 2(AX), DI
MOVL DI, 32(SP)(R8*1)
SHRQ $0x08, SI
CMPL (CX)(BX*1), SI
JEQ candidate3_match_encodeBlockAsm12B
MOVL 28(SP), AX
JMP search_loop_encodeBlockAsm12B
candidate3_match_encodeBlockAsm12B:
ADDL $0x02, AX
JMP candidate_match_encodeBlockAsm12B
candidate2_match_encodeBlockAsm12B:
LEAQ -2(AX), BX
MOVL BX, 32(SP)(R8*1)
INCL AX
MOVL DI, BX
candidate_match_encodeBlockAsm12B:
MOVL 20(SP), SI
TESTL BX, BX
JZ match_extend_back_end_encodeBlockAsm12B
match_extend_back_loop_encodeBlockAsm12B:
CMPL AX, SI
JG match_extend_back_end_encodeBlockAsm12B
MOVB -1(CX)(BX*1), DL
MOVB -1(CX)(AX*1), DI
CMPB DL, DI
JNE match_extend_back_end_encodeBlockAsm12B
LEAL -1(AX), AX
DECL BX
JZ match_extend_back_end_encodeBlockAsm12B
JMP match_extend_back_loop_encodeBlockAsm12B
match_extend_back_end_encodeBlockAsm12B:
MOVL AX, SI
SUBL 20(SP), SI
LEAQ dst_base+0(FP)(SI*1), SI
CMPQ SI, (SP)
JL match_dst_size_check_encodeBlockAsm12B
MOVQ $0x00000000, ret+48(FP)
RET
match_dst_size_check_encodeBlockAsm12B:
MOVL BX, SI
MOVL 20(SP), DI
CMPL DI, SI
JEQ emit_literal_skip_match_emit_encodeBlockAsm12B
MOVL SI, R8
MOVL SI, 20(SP)
LEAQ (CX)(DI*1), SI
SUBL DI, R8
MOVQ dst_base+0(FP), DI
MOVQ R8, R9
SUBL $0x01, R9
JC emit_literal_done_match_emit_encodeBlockAsm12B
CMPL R9, $0x3c
JLT one_byte_match_emit_encodeBlockAsm12B
CMPL R9, $0x00000100
JLT two_bytes_match_emit_encodeBlockAsm12B
CMPL R9, $0x00010000
JLT three_bytes_match_emit_encodeBlockAsm12B
CMPL R9, $0x01000000
JLT four_bytes_match_emit_encodeBlockAsm12B
MOVB $0xfc, (DI)
MOVL R9, 1(DI)
ADDQ $0x05, DI
JMP memmove_match_emit_encodeBlockAsm12B
four_bytes_match_emit_encodeBlockAsm12B:
MOVQ R9, R10
SHRL $0x10, R10
MOVB $0xf8, (DI)
MOVW R9, 1(DI)
MOVB R10, 3(DI)
ADDQ $0x04, DI
JMP memmove_match_emit_encodeBlockAsm12B
three_bytes_match_emit_encodeBlockAsm12B:
MOVB $0xf4, (DI)
MOVW R9, 1(DI)
ADDQ $0x03, DI
JMP memmove_match_emit_encodeBlockAsm12B
two_bytes_match_emit_encodeBlockAsm12B:
MOVB $0xf0, (DI)
MOVB R9, 1(DI)
ADDQ $0x02, DI
JMP memmove_match_emit_encodeBlockAsm12B
one_byte_match_emit_encodeBlockAsm12B:
SHLB $0x02, R9
MOVB R9, (DI)
ADDQ $0x01, DI
memmove_match_emit_encodeBlockAsm12B:
LEAQ (DI)(R8*1), R9
NOP
emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_tail:
TESTQ R8, R8
JEQ emit_literal_done_match_emit_encodeBlockAsm12B
CMPQ R8, $0x02
JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_1or2
CMPQ R8, $0x04
JB emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_3
JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_4
CMPQ R8, $0x08
JB emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_5through7
JE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8
CMPQ R8, $0x10
JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_9through16
CMPQ R8, $0x20
JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_17through32
CMPQ R8, $0x40
JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_33through64
CMPQ R8, $0x80
JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_65through128
CMPQ R8, $0x00000100
JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_129through256
JMP emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_256through2048
emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_1or2:
MOVB (SI), R9
MOVB -1(SI)(R8*1), SI
MOVB R9, (DI)
MOVB SI, -1(DI)(R8*1)
JMP emit_literal_done_match_emit_encodeBlockAsm12B
emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_4:
MOVL (SI), R9
MOVL R9, (DI)
JMP emit_literal_done_match_emit_encodeBlockAsm12B
emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_3:
MOVW (SI), R9
MOVB 2(SI), SI
MOVW R9, (DI)
MOVB SI, 2(DI)
JMP emit_literal_done_match_emit_encodeBlockAsm12B
emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_5through7:
MOVL (SI), R9
MOVL -4(SI)(R8*1), SI
MOVL R9, (DI)
MOVL SI, -4(DI)(R8*1)
JMP emit_literal_done_match_emit_encodeBlockAsm12B
emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8:
MOVQ (SI), R9
MOVQ R9, (DI)
JMP emit_literal_done_match_emit_encodeBlockAsm12B
emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_9through16:
MOVQ (SI), R9
MOVQ -8(SI)(R8*1), SI
MOVQ R9, (DI)
MOVQ SI, -8(DI)(R8*1)
JMP emit_literal_done_match_emit_encodeBlockAsm12B
emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_17through32:
MOVOU (SI), X0
MOVOU -16(SI)(R8*1), X1
MOVOU X0, (DI)
MOVOU X1, -16(DI)(R8*1)
JMP emit_literal_done_match_emit_encodeBlockAsm12B
emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_33through64:
MOVOU (SI), X0
MOVOU 16(SI), X1
MOVOU -32(SI)(R8*1), X2
MOVOU -16(SI)(R8*1), X3
MOVOU X0, (DI)
MOVOU X1, 16(DI)
MOVOU X2, -32(DI)(R8*1)
MOVOU X3, -16(DI)(R8*1)
JMP emit_literal_done_match_emit_encodeBlockAsm12B
emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_65through128:
MOVOU (SI), X0
MOVOU 16(SI), X1
MOVOU 32(SI), X2
MOVOU 48(SI), X3
MOVOU -64(SI)(R8*1), X12
MOVOU -48(SI)(R8*1), X13
MOVOU -32(SI)(R8*1), X14
MOVOU -16(SI)(R8*1), X15
MOVOU X0, (DI)
MOVOU X1, 16(DI)
MOVOU X2, 32(DI)
MOVOU X3, 48(DI)
MOVOU X12, -64(DI)(R8*1)
MOVOU X13, -48(DI)(R8*1)
MOVOU X14, -32(DI)(R8*1)
MOVOU X15, -16(DI)(R8*1)
JMP emit_literal_done_match_emit_encodeBlockAsm12B
emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_129through256:
MOVOU (SI), X0
MOVOU 16(SI), X1
MOVOU 32(SI), X2
MOVOU 48(SI), X3
MOVOU 64(SI), X4
MOVOU 80(SI), X5
MOVOU 96(SI), X6
MOVOU 112(SI), X7
MOVOU -128(SI)(R8*1), X8
MOVOU -112(SI)(R8*1), X9
MOVOU -96(SI)(R8*1), X10
MOVOU -80(SI)(R8*1), X11
MOVOU -64(SI)(R8*1), X12
MOVOU -48(SI)(R8*1), X13
MOVOU -32(SI)(R8*1), X14
MOVOU -16(SI)(R8*1), X15
MOVOU X0, (DI)
MOVOU X1, 16(DI)
MOVOU X2, 32(DI)
MOVOU X3, 48(DI)
MOVOU X4, 64(DI)
MOVOU X5, 80(DI)
MOVOU X6, 96(DI)
MOVOU X7, 112(DI)
MOVOU X8, -128(DI)(R8*1)
MOVOU X9, -112(DI)(R8*1)
MOVOU X10, -96(DI)(R8*1)
MOVOU X11, -80(DI)(R8*1)
MOVOU X12, -64(DI)(R8*1)
MOVOU X13, -48(DI)(R8*1)
MOVOU X14, -32(DI)(R8*1)
MOVOU X15, -16(DI)(R8*1)
JMP emit_literal_done_match_emit_encodeBlockAsm12B
emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_256through2048:
LEAQ -256(R8), R8
MOVOU (SI), X0
MOVOU 16(SI), X1
MOVOU 32(SI), X2
MOVOU 48(SI), X3
MOVOU 64(SI), X4
MOVOU 80(SI), X5
MOVOU 96(SI), X6
MOVOU 112(SI), X7
MOVOU 128(SI), X8
MOVOU 144(SI), X9
MOVOU 160(SI), X10
MOVOU 176(SI), X11
MOVOU 192(SI), X12
MOVOU 208(SI), X13
MOVOU 224(SI), X14
MOVOU 240(SI), X15
MOVOU X0, (DI)
MOVOU X1, 16(DI)
MOVOU X2, 32(DI)
MOVOU X3, 48(DI)
MOVOU X4, 64(DI)
MOVOU X5, 80(DI)
MOVOU X6, 96(DI)
MOVOU X7, 112(DI)
MOVOU X8, 128(DI)
MOVOU X9, 144(DI)
MOVOU X10, 160(DI)
MOVOU X11, 176(DI)
MOVOU X12, 192(DI)
MOVOU X13, 208(DI)
MOVOU X14, 224(DI)
MOVOU X15, 240(DI)
CMPQ R8, $0x00000100
LEAQ 256(SI), SI
LEAQ 256(DI), DI
JGE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_256through2048
JMP emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_tail
MOVQ R9, DI
emit_literal_done_match_emit_encodeBlockAsm12B:
MOVQ DI, dst_base+0(FP)
emit_literal_skip_match_emit_encodeBlockAsm12B:
NOP
match_nolit_loop_encodeBlockAsm12B:
MOVL AX, SI
MOVL AX, SI
SUBL BX, SI
MOVL SI, 24(SP)
ADDL $0x04, AX
ADDL $0x04, BX
MOVL 16(SP), SI
SUBL AX, SI
XORQ R8, R8
CMPQ SI, $0x08
JL matchlen_single_match_nolit_encodeBlockAsm12B
matchlen_loopback_match_nolit_encodeBlockAsm12B:
MOVQ (CX)(R8*1), DI
XORQ (CX)(R8*1), DI
TESTQ DI, DI
JZ matchlen_loop_match_nolit_encodeBlockAsm12B
BSFQ DI, DI
SARQ $0x03, DI
LEAQ (R8)(DI*1), R8
JMP match_nolit_end_encodeBlockAsm12B
matchlen_loop_match_nolit_encodeBlockAsm12B:
LEAQ -8(SI), SI
LEAQ 8(R8), R8
CMPQ SI, $0x08
JGE matchlen_loopback_match_nolit_encodeBlockAsm12B
matchlen_single_match_nolit_encodeBlockAsm12B:
TESTQ SI, SI
JZ match_nolit_end_encodeBlockAsm12B
matchlen_single_loopback_match_nolit_encodeBlockAsm12B:
MOVB (CX)(R8*1), DI
CMPB (CX)(R8*1), DI
JNE match_nolit_end_encodeBlockAsm12B
LEAQ 1(R8), R8
DECQ SI
JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm12B
match_nolit_end_encodeBlockAsm12B:
MOVL 24(SP), SI
ADDQ $0x04, R8
MOVQ dst_base+0(FP), DI
ADDL R8, AX
CMPL SI, $0x00010000
JL two_byte_offset_match_nolit_encodeBlockAsm12B
CMPL R8, $0x40
JLE four_bytes_remain_match_nolit_encodeBlockAsm12B
MOVB $0xff, (DI)
MOVD SI, 1(DI)
LEAQ -64(R8), R8
ADDQ $0x05, DI
CMPL R8, $0x04
JL four_bytes_remain_match_nolit_encodeBlockAsm12B
emit_repeat_again_match_nolit_encodeBlockAsm12B_emit_copy:
MOVQ R8, R9
LEAQ -4(R8), R8
CMPL R9, $0x08
JLE repeat_two_match_nolit_encodeBlockAsm12B_emit_copy
CMPL R9, $0x0c
JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy
CMPL SI, $0x00000800
JLT repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy
cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy:
CMPL R8, $0x00000104
JLT repeat_three_match_nolit_encodeBlockAsm12B_emit_copy
CMPL R8, $0x00010100
JLT repeat_four_match_nolit_encodeBlockAsm12B_emit_copy
CMPL R8, $0x0100ffff
JLT repeat_five_match_nolit_encodeBlockAsm12B_emit_copy
LEAQ -16842747(R8), R8
MOVW $0x001d, (DI)
MOVW $0xfffb, 2(DI)
MOVB $0xff, 4(DI)
ADDQ $0x05, DI
JMP emit_repeat_again_match_nolit_encodeBlockAsm12B_emit_copy
repeat_five_match_nolit_encodeBlockAsm12B_emit_copy:
LEAQ -65536(R8), R8
MOVQ R8, SI
MOVW $0x001d, (DI)
MOVW R8, 2(DI)
SARQ $0x10, SI
MOVB SI, 4(DI)
ADDQ $0x05, DI
JMP match_nolit_emitcopy_end_encodeBlockAsm12B
repeat_four_match_nolit_encodeBlockAsm12B_emit_copy:
LEAQ -256(R8), R8
MOVW $0x0019, (DI)
MOVW R8, 2(DI)
ADDQ $0x04, DI
JMP match_nolit_emitcopy_end_encodeBlockAsm12B
repeat_three_match_nolit_encodeBlockAsm12B_emit_copy:
LEAQ -4(R8), R8
MOVW $0x0015, (DI)
MOVB R8, 2(DI)
ADDQ $0x03, DI
JMP match_nolit_emitcopy_end_encodeBlockAsm12B
repeat_two_match_nolit_encodeBlockAsm12B_emit_copy:
SHLL $0x02, R8
ORL $0x01, R8
MOVW R8, (DI)
ADDQ $0x02, DI
JMP match_nolit_emitcopy_end_encodeBlockAsm12B
repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy:
XORQ R9, R9
LEAQ 1(R9)(R8*4), R8
MOVB SI, 1(DI)
SARL $0x08, SI
SHLL $0x05, SI
ORL SI, R8
MOVB R8, (DI)
ADDQ $0x02, DI
JMP match_nolit_emitcopy_end_encodeBlockAsm12B
four_bytes_remain_match_nolit_encodeBlockAsm12B:
TESTL R8, R8
JZ match_nolit_emitcopy_end_encodeBlockAsm12B
MOVB $0x03, DL
LEAQ -4(DX)(R8*4), R8
MOVB R8, (DI)
MOVD SI, 1(DI)
ADDQ $0x05, DI
JMP match_nolit_emitcopy_end_encodeBlockAsm12B
two_byte_offset_match_nolit_encodeBlockAsm12B:
CMPL R8, $0x40
JLE two_byte_offset_short_match_nolit_encodeBlockAsm12B
MOVB $0xee, (DI)
MOVW SI, 1(DI)
LEAQ -60(R8), R8
ADDQ $0x03, DI
emit_repeat_again_match_nolit_encodeBlockAsm12B_emit_copy_short:
MOVQ R8, R9
LEAQ -4(R8), R8
CMPL R9, $0x08
JLE repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short
CMPL R9, $0x0c
JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short
CMPL SI, $0x00000800
JLT repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short
cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short:
CMPL R8, $0x00000104
JLT repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short
CMPL R8, $0x00010100
JLT repeat_four_match_nolit_encodeBlockAsm12B_emit_copy_short
CMPL R8, $0x0100ffff
JLT repeat_five_match_nolit_encodeBlockAsm12B_emit_copy_short
LEAQ -16842747(R8), R8
MOVW $0x001d, (DI)
MOVW $0xfffb, 2(DI)
MOVB $0xff, 4(DI)
ADDQ $0x05, DI
JMP emit_repeat_again_match_nolit_encodeBlockAsm12B_emit_copy_short
repeat_five_match_nolit_encodeBlockAsm12B_emit_copy_short:
LEAQ -65536(R8), R8
MOVQ R8, SI
MOVW $0x001d, (DI)
MOVW R8, 2(DI)
SARQ $0x10, SI
MOVB SI, 4(DI)
ADDQ $0x05, DI
JMP match_nolit_emitcopy_end_encodeBlockAsm12B
repeat_four_match_nolit_encodeBlockAsm12B_emit_copy_short:
LEAQ -256(R8), R8
MOVW $0x0019, (DI)
MOVW R8, 2(DI)
ADDQ $0x04, DI
JMP match_nolit_emitcopy_end_encodeBlockAsm12B
repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short:
LEAQ -4(R8), R8
MOVW $0x0015, (DI)
MOVB R8, 2(DI)
ADDQ $0x03, DI
JMP match_nolit_emitcopy_end_encodeBlockAsm12B
repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short:
SHLL $0x02, R8
ORL $0x01, R8
MOVW R8, (DI)
ADDQ $0x02, DI
JMP match_nolit_emitcopy_end_encodeBlockAsm12B
repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short:
XORQ R9, R9
LEAQ 1(R9)(R8*4), R8
MOVB SI, 1(DI)
SARL $0x08, SI
SHLL $0x05, SI
ORL SI, R8
MOVB R8, (DI)
ADDQ $0x02, DI
JMP match_nolit_emitcopy_end_encodeBlockAsm12B
two_byte_offset_short_match_nolit_encodeBlockAsm12B:
CMPL R8, $0x0c
JGE emit_copy_three_match_nolit_encodeBlockAsm12B
CMPL SI, $0x00000800
JGE emit_copy_three_match_nolit_encodeBlockAsm12B
MOVB $0x01, DL
LEAQ -16(DX)(R8*4), R8
MOVB SI, 1(DI)
SHRL $0x08, SI
SHLL $0x05, SI
ORL SI, R8
MOVB R8, (DI)
ADDQ $0x02, DI
JMP match_nolit_emitcopy_end_encodeBlockAsm12B
emit_copy_three_match_nolit_encodeBlockAsm12B:
MOVB $0x02, DL
LEAQ -4(DX)(R8*4), R8
MOVB R8, (DI)
MOVW SI, 1(DI)
ADDQ $0x03, DI
match_nolit_emitcopy_end_encodeBlockAsm12B:
MOVQ DI, dst_base+0(FP)
MOVL AX, 20(SP)
CMPL AX, 16(SP)
JGE emit_remainder_encodeBlockAsm12B
CMPQ DI, (SP)
JL match_nolit_dst_ok_encodeBlockAsm12B
MOVQ $0x00000000, ret+48(FP)
RET
match_nolit_dst_ok_encodeBlockAsm12B:
MOVQ -2(CX)(AX*1), SI
MOVQ $0x0000cf1bbcdcbf9b, DI
MOVQ SI, R8
SHRQ $0x10, SI
MOVQ SI, R9
SHLQ $0x10, R8
IMULQ DI, R8
SHRQ $0x34, R8
SHLQ $0x10, R9
IMULQ DI, R9
SHRQ $0x34, R9
MOVL 32(SP)(R8*1), DI
MOVL 32(SP)(R9*1), DI
LEAQ -2(AX), DI
MOVL DI, 32(SP)(R8*1)
MOVL AX, 32(SP)(R9*1)
CMPL (CX)(R9*1), SI
JEQ match_nolit_loop_encodeBlockAsm12B
INCL AX
JMP search_loop_encodeBlockAsm12B
emit_remainder_encodeBlockAsm12B:
MOVQ src_len+32(FP), AX
SUBL 20(SP), AX
MOVQ dst_base+0(FP), DX
LEAQ (DX)(AX*1), DX
CMPQ DX, (SP)
JL emit_remainder_ok_encodeBlockAsm12B
MOVQ $0x00000000, ret+48(FP)
RET
emit_remainder_ok_encodeBlockAsm12B:
MOVQ src_len+32(FP), AX
MOVL 20(SP), DX
CMPL DX, AX
JEQ emit_literal_skip_emit_remainder_encodeBlockAsm12B
MOVL AX, BX
MOVL AX, 20(SP)
LEAQ (CX)(DX*1), AX
SUBL DX, BX
MOVQ dst_base+0(FP), CX
MOVQ BX, DX
SUBL $0x01, DX
JC emit_literal_done_emit_remainder_encodeBlockAsm12B
CMPL DX, $0x3c
JLT one_byte_emit_remainder_encodeBlockAsm12B
CMPL DX, $0x00000100
JLT two_bytes_emit_remainder_encodeBlockAsm12B
CMPL DX, $0x00010000
JLT three_bytes_emit_remainder_encodeBlockAsm12B
CMPL DX, $0x01000000
JLT four_bytes_emit_remainder_encodeBlockAsm12B
MOVB $0xfc, (CX)
MOVL DX, 1(CX)
ADDQ $0x05, CX
JMP memmove_emit_remainder_encodeBlockAsm12B
four_bytes_emit_remainder_encodeBlockAsm12B:
MOVQ DX, SI
SHRL $0x10, SI
MOVB $0xf8, (CX)
MOVW DX, 1(CX)
MOVB SI, 3(CX)
ADDQ $0x04, CX
JMP memmove_emit_remainder_encodeBlockAsm12B
three_bytes_emit_remainder_encodeBlockAsm12B:
MOVB $0xf4, (CX)
MOVW DX, 1(CX)
ADDQ $0x03, CX
JMP memmove_emit_remainder_encodeBlockAsm12B
two_bytes_emit_remainder_encodeBlockAsm12B:
MOVB $0xf0, (CX)
MOVB DL, 1(CX)
ADDQ $0x02, CX
JMP memmove_emit_remainder_encodeBlockAsm12B
one_byte_emit_remainder_encodeBlockAsm12B:
SHLB $0x02, DL
MOVB DL, (CX)
ADDQ $0x01, CX
memmove_emit_remainder_encodeBlockAsm12B:
LEAQ (CX)(BX*1), DX
NOP
emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_tail:
TESTQ BX, BX
JEQ emit_literal_done_emit_remainder_encodeBlockAsm12B
CMPQ BX, $0x02
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_1or2
CMPQ BX, $0x04
JB emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_3
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_4
CMPQ BX, $0x08
JB emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_5through7
JE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_8
CMPQ BX, $0x10
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_9through16
CMPQ BX, $0x20
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_17through32
CMPQ BX, $0x40
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_33through64
CMPQ BX, $0x80
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_65through128
CMPQ BX, $0x00000100
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_129through256
JMP emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_256through2048
emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_1or2:
MOVB (AX), DL
MOVB -1(AX)(BX*1), AL
MOVB DL, (CX)
MOVB AL, -1(CX)(BX*1)
JMP emit_literal_done_emit_remainder_encodeBlockAsm12B
emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_4:
MOVL (AX), DX
MOVL DX, (CX)
JMP emit_literal_done_emit_remainder_encodeBlockAsm12B
emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_3:
MOVW (AX), DX
MOVB 2(AX), AL
MOVW DX, (CX)
MOVB AL, 2(CX)
JMP emit_literal_done_emit_remainder_encodeBlockAsm12B
emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_5through7:
MOVL (AX), DX
MOVL -4(AX)(BX*1), AX
MOVL DX, (CX)
MOVL AX, -4(CX)(BX*1)
JMP emit_literal_done_emit_remainder_encodeBlockAsm12B
emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_8:
MOVQ (AX), DX
MOVQ DX, (CX)
JMP emit_literal_done_emit_remainder_encodeBlockAsm12B
emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_9through16:
MOVQ (AX), DX
MOVQ -8(AX)(BX*1), AX
MOVQ DX, (CX)
MOVQ AX, -8(CX)(BX*1)
JMP emit_literal_done_emit_remainder_encodeBlockAsm12B
emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_17through32:
MOVOU (AX), X0
MOVOU -16(AX)(BX*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(BX*1)
JMP emit_literal_done_emit_remainder_encodeBlockAsm12B
emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_33through64:
MOVOU (AX), X0
MOVOU 16(AX), X1
MOVOU -32(AX)(BX*1), X2
MOVOU -16(AX)(BX*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(BX*1)
MOVOU X3, -16(CX)(BX*1)
JMP emit_literal_done_emit_remainder_encodeBlockAsm12B
emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_65through128:
MOVOU (AX), X0
MOVOU 16(AX), X1
MOVOU 32(AX), X2
MOVOU 48(AX), X3
MOVOU -64(AX)(BX*1), X12
MOVOU -48(AX)(BX*1), X13
MOVOU -32(AX)(BX*1), X14
MOVOU -16(AX)(BX*1), X15
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, 32(CX)
MOVOU X3, 48(CX)
MOVOU X12, -64(CX)(BX*1)
MOVOU X13, -48(CX)(BX*1)
MOVOU X14, -32(CX)(BX*1)
MOVOU X15, -16(CX)(BX*1)
JMP emit_literal_done_emit_remainder_encodeBlockAsm12B
emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_129through256:
MOVOU (AX), X0
MOVOU 16(AX), X1
MOVOU 32(AX), X2
MOVOU 48(AX), X3
MOVOU 64(AX), X4
MOVOU 80(AX), X5
MOVOU 96(AX), X6
MOVOU 112(AX), X7
MOVOU -128(AX)(BX*1), X8
MOVOU -112(AX)(BX*1), X9
MOVOU -96(AX)(BX*1), X10
MOVOU -80(AX)(BX*1), X11
MOVOU -64(AX)(BX*1), X12
MOVOU -48(AX)(BX*1), X13
MOVOU -32(AX)(BX*1), X14
MOVOU -16(AX)(BX*1), X15
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, 32(CX)
MOVOU X3, 48(CX)
MOVOU X4, 64(CX)
MOVOU X5, 80(CX)
MOVOU X6, 96(CX)
MOVOU X7, 112(CX)
MOVOU X8, -128(CX)(BX*1)
MOVOU X9, -112(CX)(BX*1)
MOVOU X10, -96(CX)(BX*1)
MOVOU X11, -80(CX)(BX*1)
MOVOU X12, -64(CX)(BX*1)
MOVOU X13, -48(CX)(BX*1)
MOVOU X14, -32(CX)(BX*1)
MOVOU X15, -16(CX)(BX*1)
JMP emit_literal_done_emit_remainder_encodeBlockAsm12B
emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_256through2048:
LEAQ -256(BX), BX
MOVOU (AX), X0
MOVOU 16(AX), X1
MOVOU 32(AX), X2
MOVOU 48(AX), X3
MOVOU 64(AX), X4
MOVOU 80(AX), X5
MOVOU 96(AX), X6
MOVOU 112(AX), X7
MOVOU 128(AX), X8
MOVOU 144(AX), X9
MOVOU 160(AX), X10
MOVOU 176(AX), X11
MOVOU 192(AX), X12
MOVOU 208(AX), X13
MOVOU 224(AX), X14
MOVOU 240(AX), X15
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, 32(CX)
MOVOU X3, 48(CX)
MOVOU X4, 64(CX)
MOVOU X5, 80(CX)
MOVOU X6, 96(CX)
MOVOU X7, 112(CX)
MOVOU X8, 128(CX)
MOVOU X9, 144(CX)
MOVOU X10, 160(CX)
MOVOU X11, 176(CX)
MOVOU X12, 192(CX)
MOVOU X13, 208(CX)
MOVOU X14, 224(CX)
MOVOU X15, 240(CX)
CMPQ BX, $0x00000100
LEAQ 256(AX), AX
LEAQ 256(CX), CX
JGE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_256through2048
JMP emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_tail
MOVQ DX, CX
emit_literal_done_emit_remainder_encodeBlockAsm12B:
MOVQ CX, dst_base+0(FP)
emit_literal_skip_emit_remainder_encodeBlockAsm12B:
MOVQ 8(SP), AX
SUBQ dst_base+0(FP), AX
MOVQ AX, ret+48(FP)
RET
// func encodeBlockAsmAvx(dst []byte, src []byte) int
// Requires: AVX, SSE2
TEXT ·encodeBlockAsmAvx(SB), $65568-56
MOVQ $0x00000200, AX
LEAQ 32(SP), CX
PXOR X0, X0
zero_loop_encodeBlockAsmAvx:
MOVOU X0, (CX)
MOVOU X0, 16(CX)
MOVOU X0, 32(CX)
MOVOU X0, 48(CX)
MOVOU X0, 64(CX)
MOVOU X0, 80(CX)
MOVOU X0, 96(CX)
MOVOU X0, 112(CX)
ADDQ $0x80, CX
DECQ AX
JNZ zero_loop_encodeBlockAsmAvx
MOVL AX, 20(SP)
MOVQ src_len+32(FP), AX
LEAQ -5(AX), CX
LEAQ -8(AX), BX
SHRQ $0x05, AX
SUBL AX, CX
MOVL BX, 16(SP)
MOVQ dst_base+0(FP), AX
MOVQ AX, 8(SP)
LEAQ (AX)(CX*1), CX
MOVQ CX, (SP)
MOVL $0x00000001, AX
MOVL AX, 24(SP)
MOVQ src_base+24(FP), CX
search_loop_encodeBlockAsmAvx:
MOVQ (CX)(AX*1), SI
MOVL AX, BX
SUBL 20(SP), BX
SHRL $0x06, BX
LEAQ 4(AX)(BX*1), BX
MOVL 16(SP), DI
CMPL BX, DI
JGT emit_remainder_encodeBlockAsmAvx
MOVL BX, 28(SP)
MOVQ $0x0000cf1bbcdcbf9b, BX
MOVQ SI, R8
MOVQ SI, R9
SHRQ $0x08, R9
SHLQ $0x10, R8
IMULQ BX, R8
SHRQ $0x30, R8
SHLQ $0x10, R9
IMULQ BX, R9
SHRQ $0x30, R9
MOVL 32(SP)(R8*1), BX
MOVL 32(SP)(R9*1), DI
MOVL AX, 32(SP)(R8*1)
LEAL 1(AX), R8
MOVL R8, 32(SP)(R9*1)
MOVL AX, R8
SUBL 24(SP), R8
MOVL 1(CX)(R8*1), R10
MOVQ SI, R9
SHLQ $0x08, R9
CMPL R9, R10
JNE no_repeat_found_encodeBlockAsmAvx
LEAQ 1(AX), SI
MOVL 20(SP), BX
TESTL R8, R8
JZ repeat_extend_back_end_encodeBlockAsmAvx
repeat_extend_back_loop_encodeBlockAsmAvx:
CMPL SI, BX
JG repeat_extend_back_end_encodeBlockAsmAvx
MOVB -1(CX)(R8*1), DL
MOVB -1(CX)(SI*1), DI
CMPB DL, DI
JNE repeat_extend_back_end_encodeBlockAsmAvx
LEAQ -1(SI), SI
DECL R8
JZ repeat_extend_back_end_encodeBlockAsmAvx
JMP repeat_extend_back_loop_encodeBlockAsmAvx
repeat_extend_back_end_encodeBlockAsmAvx:
MOVL 20(SP), BX
CMPL BX, SI
JEQ emit_literal_skip_repeat_emit_encodeBlockAsmAvx
MOVL SI, DI
MOVL SI, 20(SP)
LEAQ (CX)(BX*1), R8
SUBL BX, DI
MOVQ dst_base+0(FP), BX
MOVQ DI, R9
SUBL $0x01, R9
JC emit_literal_done_repeat_emit_encodeBlockAsmAvx
CMPL R9, $0x3c
JLT one_byte_repeat_emit_encodeBlockAsmAvx
CMPL R9, $0x00000100
JLT two_bytes_repeat_emit_encodeBlockAsmAvx
CMPL R9, $0x00010000
JLT three_bytes_repeat_emit_encodeBlockAsmAvx
CMPL R9, $0x01000000
JLT four_bytes_repeat_emit_encodeBlockAsmAvx
MOVB $0xfc, (BX)
MOVL R9, 1(BX)
ADDQ $0x05, BX
JMP memmove_repeat_emit_encodeBlockAsmAvx
four_bytes_repeat_emit_encodeBlockAsmAvx:
MOVQ R9, R10
SHRL $0x10, R10
MOVB $0xf8, (BX)
MOVW R9, 1(BX)
MOVB R10, 3(BX)
ADDQ $0x04, BX
JMP memmove_repeat_emit_encodeBlockAsmAvx
three_bytes_repeat_emit_encodeBlockAsmAvx:
MOVB $0xf4, (BX)
MOVW R9, 1(BX)
ADDQ $0x03, BX
JMP memmove_repeat_emit_encodeBlockAsmAvx
two_bytes_repeat_emit_encodeBlockAsmAvx:
MOVB $0xf0, (BX)
MOVB R9, 1(BX)
ADDQ $0x02, BX
JMP memmove_repeat_emit_encodeBlockAsmAvx
one_byte_repeat_emit_encodeBlockAsmAvx:
SHLB $0x02, R9
MOVB R9, (BX)
ADDQ $0x01, BX
memmove_repeat_emit_encodeBlockAsmAvx:
LEAQ (BX)(DI*1), R9
NOP
emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_tail:
TESTQ DI, DI
JEQ emit_literal_done_repeat_emit_encodeBlockAsmAvx
CMPQ DI, $0x02
JBE emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_1or2
CMPQ DI, $0x04
JB emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_3
JBE emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_4
CMPQ DI, $0x08
JB emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_5through7
JE emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_8
CMPQ DI, $0x10
JBE emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_9through16
CMPQ DI, $0x20
JBE emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_17through32
CMPQ DI, $0x40
JBE emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_33through64
CMPQ DI, $0x80
JBE emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_65through128
CMPQ DI, $0x00000100
JBE emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_129through256
JMP emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_avxUnaligned
emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_1or2:
MOVB (R8), R9
MOVB -1(R8)(DI*1), R10
MOVB R9, (BX)
MOVB R10, -1(BX)(DI*1)
JMP emit_literal_done_repeat_emit_encodeBlockAsmAvx
emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_4:
MOVL (R8), R9
MOVL R9, (BX)
JMP emit_literal_done_repeat_emit_encodeBlockAsmAvx
emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_3:
MOVW (R8), R9
MOVB 2(R8), R10
MOVW R9, (BX)
MOVB R10, 2(BX)
JMP emit_literal_done_repeat_emit_encodeBlockAsmAvx
emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_5through7:
MOVL (R8), R9
MOVL -4(R8)(DI*1), R10
MOVL R9, (BX)
MOVL R10, -4(BX)(DI*1)
JMP emit_literal_done_repeat_emit_encodeBlockAsmAvx
emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_8:
MOVQ (R8), R9
MOVQ R9, (BX)
JMP emit_literal_done_repeat_emit_encodeBlockAsmAvx
emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_9through16:
MOVQ (R8), R9
MOVQ -8(R8)(DI*1), R10
MOVQ R9, (BX)
MOVQ R10, -8(BX)(DI*1)
JMP emit_literal_done_repeat_emit_encodeBlockAsmAvx
emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_17through32:
MOVOU (R8), X0
MOVOU -16(R8)(DI*1), X1
MOVOU X0, (BX)
MOVOU X1, -16(BX)(DI*1)
JMP emit_literal_done_repeat_emit_encodeBlockAsmAvx
emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_33through64:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(DI*1), X2
MOVOU -16(R8)(DI*1), X3
MOVOU X0, (BX)
MOVOU X1, 16(BX)
MOVOU X2, -32(BX)(DI*1)
MOVOU X3, -16(BX)(DI*1)
JMP emit_literal_done_repeat_emit_encodeBlockAsmAvx
emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_65through128:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU 32(R8), X2
MOVOU 48(R8), X3
MOVOU -64(R8)(DI*1), X12
MOVOU -48(R8)(DI*1), X13
MOVOU -32(R8)(DI*1), X14
MOVOU -16(R8)(DI*1), X15
MOVOU X0, (BX)
MOVOU X1, 16(BX)
MOVOU X2, 32(BX)
MOVOU X3, 48(BX)
MOVOU X12, -64(BX)(DI*1)
MOVOU X13, -48(BX)(DI*1)
MOVOU X14, -32(BX)(DI*1)
MOVOU X15, -16(BX)(DI*1)
JMP emit_literal_done_repeat_emit_encodeBlockAsmAvx
emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_129through256:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU 32(R8), X2
MOVOU 48(R8), X3
MOVOU 64(R8), X4
MOVOU 80(R8), X5
MOVOU 96(R8), X6
MOVOU 112(R8), X7
MOVOU -128(R8)(DI*1), X8
MOVOU -112(R8)(DI*1), X9
MOVOU -96(R8)(DI*1), X10
MOVOU -80(R8)(DI*1), X11
MOVOU -64(R8)(DI*1), X12
MOVOU -48(R8)(DI*1), X13
MOVOU -32(R8)(DI*1), X14
MOVOU -16(R8)(DI*1), X15
MOVOU X0, (BX)
MOVOU X1, 16(BX)
MOVOU X2, 32(BX)
MOVOU X3, 48(BX)
MOVOU X4, 64(BX)
MOVOU X5, 80(BX)
MOVOU X6, 96(BX)
MOVOU X7, 112(BX)
MOVOU X8, -128(BX)(DI*1)
MOVOU X9, -112(BX)(DI*1)
MOVOU X10, -96(BX)(DI*1)
MOVOU X11, -80(BX)(DI*1)
MOVOU X12, -64(BX)(DI*1)
MOVOU X13, -48(BX)(DI*1)
MOVOU X14, -32(BX)(DI*1)
MOVOU X15, -16(BX)(DI*1)
JMP emit_literal_done_repeat_emit_encodeBlockAsmAvx
emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_256through2048:
LEAQ -256(DI), DI
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU 32(R8), X2
MOVOU 48(R8), X3
MOVOU 64(R8), X4
MOVOU 80(R8), X5
MOVOU 96(R8), X6
MOVOU 112(R8), X7
MOVOU 128(R8), X8
MOVOU 144(R8), X9
MOVOU 160(R8), X10
MOVOU 176(R8), X11
MOVOU 192(R8), X12
MOVOU 208(R8), X13
MOVOU 224(R8), X14
MOVOU 240(R8), X15
MOVOU X0, (BX)
MOVOU X1, 16(BX)
MOVOU X2, 32(BX)
MOVOU X3, 48(BX)
MOVOU X4, 64(BX)
MOVOU X5, 80(BX)
MOVOU X6, 96(BX)
MOVOU X7, 112(BX)
MOVOU X8, 128(BX)
MOVOU X9, 144(BX)
MOVOU X10, 160(BX)
MOVOU X11, 176(BX)
MOVOU X12, 192(BX)
MOVOU X13, 208(BX)
MOVOU X14, 224(BX)
MOVOU X15, 240(BX)
CMPQ DI, $0x00000100
LEAQ 256(R8), R8
LEAQ 256(BX), BX
JGE emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_256through2048
JMP emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_tail
emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_avxUnaligned:
LEAQ (R8)(DI*1), R10
MOVQ BX, R12
MOVOU -128(R10), X5
MOVOU -112(R10), X6
MOVQ $0x00000080, R9
ANDQ $0xffffffe0, BX
ADDQ $0x20, BX
MOVOU -96(R10), X7
MOVOU -80(R10), X8
MOVQ BX, R11
SUBQ R12, R11
MOVOU -64(R10), X9
MOVOU -48(R10), X10
SUBQ R11, DI
MOVOU -32(R10), X11
MOVOU -16(R10), X12
VMOVDQU (R8), Y4
ADDQ R11, R8
SUBQ R9, DI
emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_gobble_128_loop:
VMOVDQU (R8), Y0
VMOVDQU 32(R8), Y1
VMOVDQU 64(R8), Y2
VMOVDQU 96(R8), Y3
ADDQ R9, R8
VMOVDQA Y0, (BX)
VMOVDQA Y1, 32(BX)
VMOVDQA Y2, 64(BX)
VMOVDQA Y3, 96(BX)
ADDQ R9, BX
SUBQ R9, DI
JA emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_gobble_128_loop
ADDQ R9, DI
ADDQ BX, DI
VMOVDQU Y4, (R12)
VZEROUPPER
MOVOU X5, -128(DI)
MOVOU X6, -112(DI)
MOVOU X7, -96(DI)
MOVOU X8, -80(DI)
MOVOU X9, -64(DI)
MOVOU X10, -48(DI)
MOVOU X11, -32(DI)
MOVOU X12, -16(DI)
JMP emit_literal_done_repeat_emit_encodeBlockAsmAvx
MOVQ R9, BX
emit_literal_done_repeat_emit_encodeBlockAsmAvx:
MOVQ BX, dst_base+0(FP)
emit_literal_skip_repeat_emit_encodeBlockAsmAvx:
ADDL $0x05, AX
MOVL AX, BX
SUBL 24(SP), BX
MOVL 16(SP), BX
SUBL AX, BX
XORQ R8, R8
CMPQ BX, $0x08
JL matchlen_single_repeat_extend
matchlen_loopback_repeat_extend:
MOVQ (CX)(R8*1), DI
XORQ (CX)(R8*1), DI
TESTQ DI, DI
JZ matchlen_loop_repeat_extend
BSFQ DI, DI
SARQ $0x03, DI
LEAQ (R8)(DI*1), R8
JMP repeat_extend_forward_end_encodeBlockAsmAvx
matchlen_loop_repeat_extend:
LEAQ -8(BX), BX
LEAQ 8(R8), R8
CMPQ BX, $0x08
JGE matchlen_loopback_repeat_extend
matchlen_single_repeat_extend:
TESTQ BX, BX
JZ repeat_extend_forward_end_encodeBlockAsmAvx
matchlen_single_loopback_repeat_extend:
MOVB (CX)(R8*1), DI
CMPB (CX)(R8*1), DI
JNE repeat_extend_forward_end_encodeBlockAsmAvx
LEAQ 1(R8), R8
DECQ BX
JNZ matchlen_single_loopback_repeat_extend
repeat_extend_forward_end_encodeBlockAsmAvx:
ADDL R8, AX
MOVL AX, BX
SUBL SI, BX
MOVL 24(SP), SI
MOVQ dst_base+0(FP), DI
MOVL 20(SP), R8
TESTL R8, R8
JZ repeat_as_copy_encodeBlockAsmAvx
emit_repeat_again_match_repeat_:
MOVQ BX, R8
LEAQ -4(BX), BX
CMPL R8, $0x08
JLE repeat_two_match_repeat_
CMPL R8, $0x0c
JGE cant_repeat_two_offset_match_repeat_
CMPL SI, $0x00000800
JLT repeat_two_offset_match_repeat_
cant_repeat_two_offset_match_repeat_:
CMPL BX, $0x00000104
JLT repeat_three_match_repeat_
CMPL BX, $0x00010100
JLT repeat_four_match_repeat_
CMPL BX, $0x0100ffff
JLT repeat_five_match_repeat_
LEAQ -16842747(BX), BX
MOVW $0x001d, (DI)
MOVW $0xfffb, 2(DI)
MOVB $0xff, 4(DI)
ADDQ $0x05, DI
JMP emit_repeat_again_match_repeat_
repeat_five_match_repeat_:
LEAQ -65536(BX), BX
MOVQ BX, SI
MOVW $0x001d, (DI)
MOVW BX, 2(DI)
SARQ $0x10, SI
MOVB SI, 4(DI)
ADDQ $0x05, DI
JMP repeat_end_emit_encodeBlockAsmAvx
repeat_four_match_repeat_:
LEAQ -256(BX), BX
MOVW $0x0019, (DI)
MOVW BX, 2(DI)
ADDQ $0x04, DI
JMP repeat_end_emit_encodeBlockAsmAvx
repeat_three_match_repeat_:
LEAQ -4(BX), BX
MOVW $0x0015, (DI)
MOVB BL, 2(DI)
ADDQ $0x03, DI
JMP repeat_end_emit_encodeBlockAsmAvx
repeat_two_match_repeat_:
SHLL $0x02, BX
ORL $0x01, BX
MOVW BX, (DI)
ADDQ $0x02, DI
JMP repeat_end_emit_encodeBlockAsmAvx
repeat_two_offset_match_repeat_:
XORQ R8, R8
LEAQ 1(R8)(BX*4), BX
MOVB SI, 1(DI)
SARL $0x08, SI
SHLL $0x05, SI
ORL SI, BX
MOVB BL, (DI)
ADDQ $0x02, DI
JMP repeat_end_emit_encodeBlockAsmAvx
repeat_as_copy_encodeBlockAsmAvx:
CMPL SI, $0x00010000
JL two_byte_offset_repeat_as_copy_encodeBlockAsmAvx
CMPL BX, $0x40
JLE four_bytes_remain_repeat_as_copy_encodeBlockAsmAvx
MOVB $0xff, (DI)
MOVD SI, 1(DI)
LEAQ -64(BX), BX
ADDQ $0x05, DI
CMPL BX, $0x04
JL four_bytes_remain_repeat_as_copy_encodeBlockAsmAvx
emit_repeat_again_repeat_as_copy_encodeBlockAsmAvx_emit_copy:
MOVQ BX, R8
LEAQ -4(BX), BX
CMPL R8, $0x08
JLE repeat_two_repeat_as_copy_encodeBlockAsmAvx_emit_copy
CMPL R8, $0x0c
JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsmAvx_emit_copy
CMPL SI, $0x00000800
JLT repeat_two_offset_repeat_as_copy_encodeBlockAsmAvx_emit_copy
cant_repeat_two_offset_repeat_as_copy_encodeBlockAsmAvx_emit_copy:
CMPL BX, $0x00000104
JLT repeat_three_repeat_as_copy_encodeBlockAsmAvx_emit_copy
CMPL BX, $0x00010100
JLT repeat_four_repeat_as_copy_encodeBlockAsmAvx_emit_copy
CMPL BX, $0x0100ffff
JLT repeat_five_repeat_as_copy_encodeBlockAsmAvx_emit_copy
LEAQ -16842747(BX), BX
MOVW $0x001d, (DI)
MOVW $0xfffb, 2(DI)
MOVB $0xff, 4(DI)
ADDQ $0x05, DI
JMP emit_repeat_again_repeat_as_copy_encodeBlockAsmAvx_emit_copy
repeat_five_repeat_as_copy_encodeBlockAsmAvx_emit_copy:
LEAQ -65536(BX), BX
MOVQ BX, SI
MOVW $0x001d, (DI)
MOVW BX, 2(DI)
SARQ $0x10, SI
MOVB SI, 4(DI)
ADDQ $0x05, DI
JMP repeat_end_emit_encodeBlockAsmAvx
repeat_four_repeat_as_copy_encodeBlockAsmAvx_emit_copy:
LEAQ -256(BX), BX
MOVW $0x0019, (DI)
MOVW BX, 2(DI)
ADDQ $0x04, DI
JMP repeat_end_emit_encodeBlockAsmAvx
repeat_three_repeat_as_copy_encodeBlockAsmAvx_emit_copy:
LEAQ -4(BX), BX
MOVW $0x0015, (DI)
MOVB BL, 2(DI)
ADDQ $0x03, DI
JMP repeat_end_emit_encodeBlockAsmAvx
repeat_two_repeat_as_copy_encodeBlockAsmAvx_emit_copy:
SHLL $0x02, BX
ORL $0x01, BX
MOVW BX, (DI)
ADDQ $0x02, DI
JMP repeat_end_emit_encodeBlockAsmAvx
repeat_two_offset_repeat_as_copy_encodeBlockAsmAvx_emit_copy:
XORQ R8, R8
LEAQ 1(R8)(BX*4), BX
MOVB SI, 1(DI)
SARL $0x08, SI
SHLL $0x05, SI
ORL SI, BX
MOVB BL, (DI)
ADDQ $0x02, DI
JMP repeat_end_emit_encodeBlockAsmAvx
four_bytes_remain_repeat_as_copy_encodeBlockAsmAvx:
TESTL BX, BX
JZ repeat_end_emit_encodeBlockAsmAvx
MOVB $0x03, DL
LEAQ -4(DX)(BX*4), BX
MOVB BL, (DI)
MOVD SI, 1(DI)
ADDQ $0x05, DI
JMP repeat_end_emit_encodeBlockAsmAvx
two_byte_offset_repeat_as_copy_encodeBlockAsmAvx:
CMPL BX, $0x40
JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsmAvx
MOVB $0xee, (DI)
MOVW SI, 1(DI)
LEAQ -60(BX), BX
ADDQ $0x03, DI
emit_repeat_again_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short:
MOVQ BX, R8
LEAQ -4(BX), BX
CMPL R8, $0x08
JLE repeat_two_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short
CMPL R8, $0x0c
JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short
CMPL SI, $0x00000800
JLT repeat_two_offset_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short
cant_repeat_two_offset_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short:
CMPL BX, $0x00000104
JLT repeat_three_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short
CMPL BX, $0x00010100
JLT repeat_four_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short
CMPL BX, $0x0100ffff
JLT repeat_five_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short
LEAQ -16842747(BX), BX
MOVW $0x001d, (DI)
MOVW $0xfffb, 2(DI)
MOVB $0xff, 4(DI)
ADDQ $0x05, DI
JMP emit_repeat_again_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short
repeat_five_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short:
LEAQ -65536(BX), BX
MOVQ BX, SI
MOVW $0x001d, (DI)
MOVW BX, 2(DI)
SARQ $0x10, SI
MOVB SI, 4(DI)
ADDQ $0x05, DI
JMP repeat_end_emit_encodeBlockAsmAvx
repeat_four_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short:
LEAQ -256(BX), BX
MOVW $0x0019, (DI)
MOVW BX, 2(DI)
ADDQ $0x04, DI
JMP repeat_end_emit_encodeBlockAsmAvx
repeat_three_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short:
LEAQ -4(BX), BX
MOVW $0x0015, (DI)
MOVB BL, 2(DI)
ADDQ $0x03, DI
JMP repeat_end_emit_encodeBlockAsmAvx
repeat_two_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short:
SHLL $0x02, BX
ORL $0x01, BX
MOVW BX, (DI)
ADDQ $0x02, DI
JMP repeat_end_emit_encodeBlockAsmAvx
repeat_two_offset_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short:
XORQ R8, R8
LEAQ 1(R8)(BX*4), BX
MOVB SI, 1(DI)
SARL $0x08, SI
SHLL $0x05, SI
ORL SI, BX
MOVB BL, (DI)
ADDQ $0x02, DI
JMP repeat_end_emit_encodeBlockAsmAvx
two_byte_offset_short_repeat_as_copy_encodeBlockAsmAvx:
CMPL BX, $0x0c
JGE emit_copy_three_repeat_as_copy_encodeBlockAsmAvx
CMPL SI, $0x00000800
JGE emit_copy_three_repeat_as_copy_encodeBlockAsmAvx
MOVB $0x01, DL
LEAQ -16(DX)(BX*4), BX
MOVB SI, 1(DI)
SHRL $0x08, SI
SHLL $0x05, SI
ORL SI, BX
MOVB BL, (DI)
ADDQ $0x02, DI
JMP repeat_end_emit_encodeBlockAsmAvx
emit_copy_three_repeat_as_copy_encodeBlockAsmAvx:
MOVB $0x02, DL
LEAQ -4(DX)(BX*4), BX
MOVB BL, (DI)
MOVW SI, 1(DI)
ADDQ $0x03, DI
repeat_end_emit_encodeBlockAsmAvx:
MOVQ DI, dst_base+0(FP)
MOVL 16(SP), BX
CMPL AX, BX
JGT emit_remainder_encodeBlockAsmAvx
JMP search_loop_encodeBlockAsmAvx
no_repeat_found_encodeBlockAsmAvx:
MOVQ $0x0000cf1bbcdcbf9b, R9
MOVQ SI, R8
SHRQ $0x10, R8
SHLQ $0x10, R8
IMULQ R9, R8
SHRQ $0x30, R8
CMPL (CX)(BX*1), SI
SHRQ $0x08, SI
JEQ candidate_match_encodeBlockAsmAvx
MOVL 32(SP)(R8*1), BX
CMPL (CX)(DI*1), SI
JEQ candidate2_match_encodeBlockAsmAvx
LEAQ 2(AX), DI
MOVL DI, 32(SP)(R8*1)
SHRQ $0x08, SI
CMPL (CX)(BX*1), SI
JEQ candidate3_match_encodeBlockAsmAvx
MOVL 28(SP), AX
JMP search_loop_encodeBlockAsmAvx
candidate3_match_encodeBlockAsmAvx:
ADDL $0x02, AX
JMP candidate_match_encodeBlockAsmAvx
candidate2_match_encodeBlockAsmAvx:
LEAQ -2(AX), BX
MOVL BX, 32(SP)(R8*1)
INCL AX
MOVL DI, BX
candidate_match_encodeBlockAsmAvx:
MOVL 20(SP), SI
TESTL BX, BX
JZ match_extend_back_end_encodeBlockAsmAvx
match_extend_back_loop_encodeBlockAsmAvx:
CMPL AX, SI
JG match_extend_back_end_encodeBlockAsmAvx
MOVB -1(CX)(BX*1), DL
MOVB -1(CX)(AX*1), DI
CMPB DL, DI
JNE match_extend_back_end_encodeBlockAsmAvx
LEAL -1(AX), AX
DECL BX
JZ match_extend_back_end_encodeBlockAsmAvx
JMP match_extend_back_loop_encodeBlockAsmAvx
match_extend_back_end_encodeBlockAsmAvx:
MOVL AX, SI
SUBL 20(SP), SI
LEAQ dst_base+0(FP)(SI*1), SI
CMPQ SI, (SP)
JL match_dst_size_check_encodeBlockAsmAvx
MOVQ $0x00000000, ret+48(FP)
RET
match_dst_size_check_encodeBlockAsmAvx:
MOVL BX, SI
MOVL 20(SP), DI
CMPL DI, SI
JEQ emit_literal_skip_match_emit_encodeBlockAsmAvx
MOVL SI, R8
MOVL SI, 20(SP)
LEAQ (CX)(DI*1), SI
SUBL DI, R8
MOVQ dst_base+0(FP), DI
MOVQ R8, R9
SUBL $0x01, R9
JC emit_literal_done_match_emit_encodeBlockAsmAvx
CMPL R9, $0x3c
JLT one_byte_match_emit_encodeBlockAsmAvx
CMPL R9, $0x00000100
JLT two_bytes_match_emit_encodeBlockAsmAvx
CMPL R9, $0x00010000
JLT three_bytes_match_emit_encodeBlockAsmAvx
CMPL R9, $0x01000000
JLT four_bytes_match_emit_encodeBlockAsmAvx
MOVB $0xfc, (DI)
MOVL R9, 1(DI)
ADDQ $0x05, DI
JMP memmove_match_emit_encodeBlockAsmAvx
four_bytes_match_emit_encodeBlockAsmAvx:
MOVQ R9, R10
SHRL $0x10, R10
MOVB $0xf8, (DI)
MOVW R9, 1(DI)
MOVB R10, 3(DI)
ADDQ $0x04, DI
JMP memmove_match_emit_encodeBlockAsmAvx
three_bytes_match_emit_encodeBlockAsmAvx:
MOVB $0xf4, (DI)
MOVW R9, 1(DI)
ADDQ $0x03, DI
JMP memmove_match_emit_encodeBlockAsmAvx
two_bytes_match_emit_encodeBlockAsmAvx:
MOVB $0xf0, (DI)
MOVB R9, 1(DI)
ADDQ $0x02, DI
JMP memmove_match_emit_encodeBlockAsmAvx
one_byte_match_emit_encodeBlockAsmAvx:
SHLB $0x02, R9
MOVB R9, (DI)
ADDQ $0x01, DI
memmove_match_emit_encodeBlockAsmAvx:
LEAQ (DI)(R8*1), R9
NOP
emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_tail:
TESTQ R8, R8
JEQ emit_literal_done_match_emit_encodeBlockAsmAvx
CMPQ R8, $0x02
JBE emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_1or2
CMPQ R8, $0x04
JB emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_3
JBE emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_4
CMPQ R8, $0x08
JB emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_5through7
JE emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_8
CMPQ R8, $0x10
JBE emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_9through16
CMPQ R8, $0x20
JBE emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_17through32
CMPQ R8, $0x40
JBE emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_33through64
CMPQ R8, $0x80
JBE emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_65through128
CMPQ R8, $0x00000100
JBE emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_129through256
JMP emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_avxUnaligned
emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_1or2:
MOVB (SI), R9
MOVB -1(SI)(R8*1), R10
MOVB R9, (DI)
MOVB R10, -1(DI)(R8*1)
JMP emit_literal_done_match_emit_encodeBlockAsmAvx
emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_4:
MOVL (SI), R9
MOVL R9, (DI)
JMP emit_literal_done_match_emit_encodeBlockAsmAvx
emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_3:
MOVW (SI), R9
MOVB 2(SI), R10
MOVW R9, (DI)
MOVB R10, 2(DI)
JMP emit_literal_done_match_emit_encodeBlockAsmAvx
emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_5through7:
MOVL (SI), R9
MOVL -4(SI)(R8*1), R10
MOVL R9, (DI)
MOVL R10, -4(DI)(R8*1)
JMP emit_literal_done_match_emit_encodeBlockAsmAvx
emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_8:
MOVQ (SI), R9
MOVQ R9, (DI)
JMP emit_literal_done_match_emit_encodeBlockAsmAvx
emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_9through16:
MOVQ (SI), R9
MOVQ -8(SI)(R8*1), R10
MOVQ R9, (DI)
MOVQ R10, -8(DI)(R8*1)
JMP emit_literal_done_match_emit_encodeBlockAsmAvx
emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_17through32:
MOVOU (SI), X0
MOVOU -16(SI)(R8*1), X1
MOVOU X0, (DI)
MOVOU X1, -16(DI)(R8*1)
JMP emit_literal_done_match_emit_encodeBlockAsmAvx
emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_33through64:
MOVOU (SI), X0
MOVOU 16(SI), X1
MOVOU -32(SI)(R8*1), X2
MOVOU -16(SI)(R8*1), X3
MOVOU X0, (DI)
MOVOU X1, 16(DI)
MOVOU X2, -32(DI)(R8*1)
MOVOU X3, -16(DI)(R8*1)
JMP emit_literal_done_match_emit_encodeBlockAsmAvx
emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_65through128:
MOVOU (SI), X0
MOVOU 16(SI), X1
MOVOU 32(SI), X2
MOVOU 48(SI), X3
MOVOU -64(SI)(R8*1), X12
MOVOU -48(SI)(R8*1), X13
MOVOU -32(SI)(R8*1), X14
MOVOU -16(SI)(R8*1), X15
MOVOU X0, (DI)
MOVOU X1, 16(DI)
MOVOU X2, 32(DI)
MOVOU X3, 48(DI)
MOVOU X12, -64(DI)(R8*1)
MOVOU X13, -48(DI)(R8*1)
MOVOU X14, -32(DI)(R8*1)
MOVOU X15, -16(DI)(R8*1)
JMP emit_literal_done_match_emit_encodeBlockAsmAvx
emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_129through256:
MOVOU (SI), X0
MOVOU 16(SI), X1
MOVOU 32(SI), X2
MOVOU 48(SI), X3
MOVOU 64(SI), X4
MOVOU 80(SI), X5
MOVOU 96(SI), X6
MOVOU 112(SI), X7
MOVOU -128(SI)(R8*1), X8
MOVOU -112(SI)(R8*1), X9
MOVOU -96(SI)(R8*1), X10
MOVOU -80(SI)(R8*1), X11
MOVOU -64(SI)(R8*1), X12
MOVOU -48(SI)(R8*1), X13
MOVOU -32(SI)(R8*1), X14
MOVOU -16(SI)(R8*1), X15
MOVOU X0, (DI)
MOVOU X1, 16(DI)
MOVOU X2, 32(DI)
MOVOU X3, 48(DI)
MOVOU X4, 64(DI)
MOVOU X5, 80(DI)
MOVOU X6, 96(DI)
MOVOU X7, 112(DI)
MOVOU X8, -128(DI)(R8*1)
MOVOU X9, -112(DI)(R8*1)
MOVOU X10, -96(DI)(R8*1)
MOVOU X11, -80(DI)(R8*1)
MOVOU X12, -64(DI)(R8*1)
MOVOU X13, -48(DI)(R8*1)
MOVOU X14, -32(DI)(R8*1)
MOVOU X15, -16(DI)(R8*1)
JMP emit_literal_done_match_emit_encodeBlockAsmAvx
emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_256through2048:
LEAQ -256(R8), R8
MOVOU (SI), X0
MOVOU 16(SI), X1
MOVOU 32(SI), X2
MOVOU 48(SI), X3
MOVOU 64(SI), X4
MOVOU 80(SI), X5
MOVOU 96(SI), X6
MOVOU 112(SI), X7
MOVOU 128(SI), X8
MOVOU 144(SI), X9
MOVOU 160(SI), X10
MOVOU 176(SI), X11
MOVOU 192(SI), X12
MOVOU 208(SI), X13
MOVOU 224(SI), X14
MOVOU 240(SI), X15
MOVOU X0, (DI)
MOVOU X1, 16(DI)
MOVOU X2, 32(DI)
MOVOU X3, 48(DI)
MOVOU X4, 64(DI)
MOVOU X5, 80(DI)
MOVOU X6, 96(DI)
MOVOU X7, 112(DI)
MOVOU X8, 128(DI)
MOVOU X9, 144(DI)
MOVOU X10, 160(DI)
MOVOU X11, 176(DI)
MOVOU X12, 192(DI)
MOVOU X13, 208(DI)
MOVOU X14, 224(DI)
MOVOU X15, 240(DI)
CMPQ R8, $0x00000100
LEAQ 256(SI), SI
LEAQ 256(DI), DI
JGE emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_256through2048
JMP emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_tail
emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_avxUnaligned:
LEAQ (SI)(R8*1), R10
MOVQ DI, R12
MOVOU -128(R10), X5
MOVOU -112(R10), X6
MOVQ $0x00000080, R9
ANDQ $0xffffffe0, DI
ADDQ $0x20, DI
MOVOU -96(R10), X7
MOVOU -80(R10), X8
MOVQ DI, R11
SUBQ R12, R11
MOVOU -64(R10), X9
MOVOU -48(R10), X10
SUBQ R11, R8
MOVOU -32(R10), X11
MOVOU -16(R10), X12
VMOVDQU (SI), Y4
ADDQ R11, SI
SUBQ R9, R8
emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_gobble_128_loop:
VMOVDQU (SI), Y0
VMOVDQU 32(SI), Y1
VMOVDQU 64(SI), Y2
VMOVDQU 96(SI), Y3
ADDQ R9, SI
VMOVDQA Y0, (DI)
VMOVDQA Y1, 32(DI)
VMOVDQA Y2, 64(DI)
VMOVDQA Y3, 96(DI)
ADDQ R9, DI
SUBQ R9, R8
JA emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_gobble_128_loop
ADDQ R9, R8
ADDQ DI, R8
VMOVDQU Y4, (R12)
VZEROUPPER
MOVOU X5, -128(R8)
MOVOU X6, -112(R8)
MOVOU X7, -96(R8)
MOVOU X8, -80(R8)
MOVOU X9, -64(R8)
MOVOU X10, -48(R8)
MOVOU X11, -32(R8)
MOVOU X12, -16(R8)
JMP emit_literal_done_match_emit_encodeBlockAsmAvx
MOVQ R9, DI
emit_literal_done_match_emit_encodeBlockAsmAvx:
MOVQ DI, dst_base+0(FP)
emit_literal_skip_match_emit_encodeBlockAsmAvx:
NOP
match_nolit_loop_encodeBlockAsmAvx:
MOVL AX, SI
MOVL AX, SI
SUBL BX, SI
MOVL SI, 24(SP)
ADDL $0x04, AX
ADDL $0x04, BX
MOVL 16(SP), SI
SUBL AX, SI
XORQ R8, R8
CMPQ SI, $0x08
JL matchlen_single_match_nolit_encodeBlockAsmAvx
matchlen_loopback_match_nolit_encodeBlockAsmAvx:
MOVQ (CX)(R8*1), DI
XORQ (CX)(R8*1), DI
TESTQ DI, DI
JZ matchlen_loop_match_nolit_encodeBlockAsmAvx
BSFQ DI, DI
SARQ $0x03, DI
LEAQ (R8)(DI*1), R8
JMP match_nolit_end_encodeBlockAsmAvx
matchlen_loop_match_nolit_encodeBlockAsmAvx:
LEAQ -8(SI), SI
LEAQ 8(R8), R8
CMPQ SI, $0x08
JGE matchlen_loopback_match_nolit_encodeBlockAsmAvx
matchlen_single_match_nolit_encodeBlockAsmAvx:
TESTQ SI, SI
JZ match_nolit_end_encodeBlockAsmAvx
matchlen_single_loopback_match_nolit_encodeBlockAsmAvx:
MOVB (CX)(R8*1), DI
CMPB (CX)(R8*1), DI
JNE match_nolit_end_encodeBlockAsmAvx
LEAQ 1(R8), R8
DECQ SI
JNZ matchlen_single_loopback_match_nolit_encodeBlockAsmAvx
match_nolit_end_encodeBlockAsmAvx:
MOVL 24(SP), SI
ADDQ $0x04, R8
MOVQ dst_base+0(FP), DI
ADDL R8, AX
CMPL SI, $0x00010000
JL two_byte_offset_match_nolit_encodeBlockAsmAvx
CMPL R8, $0x40
JLE four_bytes_remain_match_nolit_encodeBlockAsmAvx
MOVB $0xff, (DI)
MOVD SI, 1(DI)
LEAQ -64(R8), R8
ADDQ $0x05, DI
CMPL R8, $0x04
JL four_bytes_remain_match_nolit_encodeBlockAsmAvx
emit_repeat_again_match_nolit_encodeBlockAsmAvx_emit_copy:
MOVQ R8, R9
LEAQ -4(R8), R8
CMPL R9, $0x08
JLE repeat_two_match_nolit_encodeBlockAsmAvx_emit_copy
CMPL R9, $0x0c
JGE cant_repeat_two_offset_match_nolit_encodeBlockAsmAvx_emit_copy
CMPL SI, $0x00000800
JLT repeat_two_offset_match_nolit_encodeBlockAsmAvx_emit_copy
cant_repeat_two_offset_match_nolit_encodeBlockAsmAvx_emit_copy:
CMPL R8, $0x00000104
JLT repeat_three_match_nolit_encodeBlockAsmAvx_emit_copy
CMPL R8, $0x00010100
JLT repeat_four_match_nolit_encodeBlockAsmAvx_emit_copy
CMPL R8, $0x0100ffff
JLT repeat_five_match_nolit_encodeBlockAsmAvx_emit_copy
LEAQ -16842747(R8), R8
MOVW $0x001d, (DI)
MOVW $0xfffb, 2(DI)
MOVB $0xff, 4(DI)
ADDQ $0x05, DI
JMP emit_repeat_again_match_nolit_encodeBlockAsmAvx_emit_copy
repeat_five_match_nolit_encodeBlockAsmAvx_emit_copy:
LEAQ -65536(R8), R8
MOVQ R8, SI
MOVW $0x001d, (DI)
MOVW R8, 2(DI)
SARQ $0x10, SI
MOVB SI, 4(DI)
ADDQ $0x05, DI
JMP match_nolit_emitcopy_end_encodeBlockAsmAvx
repeat_four_match_nolit_encodeBlockAsmAvx_emit_copy:
LEAQ -256(R8), R8
MOVW $0x0019, (DI)
MOVW R8, 2(DI)
ADDQ $0x04, DI
JMP match_nolit_emitcopy_end_encodeBlockAsmAvx
repeat_three_match_nolit_encodeBlockAsmAvx_emit_copy:
LEAQ -4(R8), R8
MOVW $0x0015, (DI)
MOVB R8, 2(DI)
ADDQ $0x03, DI
JMP match_nolit_emitcopy_end_encodeBlockAsmAvx
repeat_two_match_nolit_encodeBlockAsmAvx_emit_copy:
SHLL $0x02, R8
ORL $0x01, R8
MOVW R8, (DI)
ADDQ $0x02, DI
JMP match_nolit_emitcopy_end_encodeBlockAsmAvx
repeat_two_offset_match_nolit_encodeBlockAsmAvx_emit_copy:
XORQ R9, R9
LEAQ 1(R9)(R8*4), R8
MOVB SI, 1(DI)
SARL $0x08, SI
SHLL $0x05, SI
ORL SI, R8
MOVB R8, (DI)
ADDQ $0x02, DI
JMP match_nolit_emitcopy_end_encodeBlockAsmAvx
four_bytes_remain_match_nolit_encodeBlockAsmAvx:
TESTL R8, R8
JZ match_nolit_emitcopy_end_encodeBlockAsmAvx
MOVB $0x03, DL
LEAQ -4(DX)(R8*4), R8
MOVB R8, (DI)
MOVD SI, 1(DI)
ADDQ $0x05, DI
JMP match_nolit_emitcopy_end_encodeBlockAsmAvx
two_byte_offset_match_nolit_encodeBlockAsmAvx:
CMPL R8, $0x40
JLE two_byte_offset_short_match_nolit_encodeBlockAsmAvx
MOVB $0xee, (DI)
MOVW SI, 1(DI)
LEAQ -60(R8), R8
ADDQ $0x03, DI
emit_repeat_again_match_nolit_encodeBlockAsmAvx_emit_copy_short:
MOVQ R8, R9
LEAQ -4(R8), R8
CMPL R9, $0x08
JLE repeat_two_match_nolit_encodeBlockAsmAvx_emit_copy_short
CMPL R9, $0x0c
JGE cant_repeat_two_offset_match_nolit_encodeBlockAsmAvx_emit_copy_short
CMPL SI, $0x00000800
JLT repeat_two_offset_match_nolit_encodeBlockAsmAvx_emit_copy_short
cant_repeat_two_offset_match_nolit_encodeBlockAsmAvx_emit_copy_short:
CMPL R8, $0x00000104
JLT repeat_three_match_nolit_encodeBlockAsmAvx_emit_copy_short
CMPL R8, $0x00010100
JLT repeat_four_match_nolit_encodeBlockAsmAvx_emit_copy_short
CMPL R8, $0x0100ffff
JLT repeat_five_match_nolit_encodeBlockAsmAvx_emit_copy_short
LEAQ -16842747(R8), R8
MOVW $0x001d, (DI)
MOVW $0xfffb, 2(DI)
MOVB $0xff, 4(DI)
ADDQ $0x05, DI
JMP emit_repeat_again_match_nolit_encodeBlockAsmAvx_emit_copy_short
repeat_five_match_nolit_encodeBlockAsmAvx_emit_copy_short:
LEAQ -65536(R8), R8
MOVQ R8, SI
MOVW $0x001d, (DI)
MOVW R8, 2(DI)
SARQ $0x10, SI
MOVB SI, 4(DI)
ADDQ $0x05, DI
JMP match_nolit_emitcopy_end_encodeBlockAsmAvx
repeat_four_match_nolit_encodeBlockAsmAvx_emit_copy_short:
LEAQ -256(R8), R8
MOVW $0x0019, (DI)
MOVW R8, 2(DI)
ADDQ $0x04, DI
JMP match_nolit_emitcopy_end_encodeBlockAsmAvx
repeat_three_match_nolit_encodeBlockAsmAvx_emit_copy_short:
LEAQ -4(R8), R8
MOVW $0x0015, (DI)
MOVB R8, 2(DI)
ADDQ $0x03, DI
JMP match_nolit_emitcopy_end_encodeBlockAsmAvx
repeat_two_match_nolit_encodeBlockAsmAvx_emit_copy_short:
SHLL $0x02, R8
ORL $0x01, R8
MOVW R8, (DI)
ADDQ $0x02, DI
JMP match_nolit_emitcopy_end_encodeBlockAsmAvx
repeat_two_offset_match_nolit_encodeBlockAsmAvx_emit_copy_short:
XORQ R9, R9
LEAQ 1(R9)(R8*4), R8
MOVB SI, 1(DI)
SARL $0x08, SI
SHLL $0x05, SI
ORL SI, R8
MOVB R8, (DI)
ADDQ $0x02, DI
JMP match_nolit_emitcopy_end_encodeBlockAsmAvx
two_byte_offset_short_match_nolit_encodeBlockAsmAvx:
CMPL R8, $0x0c
JGE emit_copy_three_match_nolit_encodeBlockAsmAvx
CMPL SI, $0x00000800
JGE emit_copy_three_match_nolit_encodeBlockAsmAvx
MOVB $0x01, DL
LEAQ -16(DX)(R8*4), R8
MOVB SI, 1(DI)
SHRL $0x08, SI
SHLL $0x05, SI
ORL SI, R8
MOVB R8, (DI)
ADDQ $0x02, DI
JMP match_nolit_emitcopy_end_encodeBlockAsmAvx
emit_copy_three_match_nolit_encodeBlockAsmAvx:
MOVB $0x02, DL
LEAQ -4(DX)(R8*4), R8
MOVB R8, (DI)
MOVW SI, 1(DI)
ADDQ $0x03, DI
match_nolit_emitcopy_end_encodeBlockAsmAvx:
MOVQ DI, dst_base+0(FP)
MOVL AX, 20(SP)
CMPL AX, 16(SP)
JGE emit_remainder_encodeBlockAsmAvx
CMPQ DI, (SP)
JL match_nolit_dst_ok_encodeBlockAsmAvx
MOVQ $0x00000000, ret+48(FP)
RET
match_nolit_dst_ok_encodeBlockAsmAvx:
MOVQ -2(CX)(AX*1), SI
MOVQ $0x0000cf1bbcdcbf9b, DI
MOVQ SI, R8
SHRQ $0x10, SI
MOVQ SI, R9
SHLQ $0x10, R8
IMULQ DI, R8
SHRQ $0x30, R8
SHLQ $0x10, R9
IMULQ DI, R9
SHRQ $0x30, R9
MOVL 32(SP)(R8*1), DI
MOVL 32(SP)(R9*1), DI
LEAQ -2(AX), DI
MOVL DI, 32(SP)(R8*1)
MOVL AX, 32(SP)(R9*1)
CMPL (CX)(R9*1), SI
JEQ match_nolit_loop_encodeBlockAsmAvx
INCL AX
JMP search_loop_encodeBlockAsmAvx
emit_remainder_encodeBlockAsmAvx:
MOVQ src_len+32(FP), AX
SUBL 20(SP), AX
MOVQ dst_base+0(FP), DX
LEAQ (DX)(AX*1), DX
CMPQ DX, (SP)
JL emit_remainder_ok_encodeBlockAsmAvx
MOVQ $0x00000000, ret+48(FP)
RET
emit_remainder_ok_encodeBlockAsmAvx:
MOVQ src_len+32(FP), AX
MOVL 20(SP), DX
CMPL DX, AX
JEQ emit_literal_skip_emit_remainder_encodeBlockAsmAvx
MOVL AX, BX
MOVL AX, 20(SP)
LEAQ (CX)(DX*1), AX
SUBL DX, BX
MOVQ dst_base+0(FP), CX
MOVQ BX, DX
SUBL $0x01, DX
JC emit_literal_done_emit_remainder_encodeBlockAsmAvx
CMPL DX, $0x3c
JLT one_byte_emit_remainder_encodeBlockAsmAvx
CMPL DX, $0x00000100
JLT two_bytes_emit_remainder_encodeBlockAsmAvx
CMPL DX, $0x00010000
JLT three_bytes_emit_remainder_encodeBlockAsmAvx
CMPL DX, $0x01000000
JLT four_bytes_emit_remainder_encodeBlockAsmAvx
MOVB $0xfc, (CX)
MOVL DX, 1(CX)
ADDQ $0x05, CX
JMP memmove_emit_remainder_encodeBlockAsmAvx
four_bytes_emit_remainder_encodeBlockAsmAvx:
MOVQ DX, SI
SHRL $0x10, SI
MOVB $0xf8, (CX)
MOVW DX, 1(CX)
MOVB SI, 3(CX)
ADDQ $0x04, CX
JMP memmove_emit_remainder_encodeBlockAsmAvx
three_bytes_emit_remainder_encodeBlockAsmAvx:
MOVB $0xf4, (CX)
MOVW DX, 1(CX)
ADDQ $0x03, CX
JMP memmove_emit_remainder_encodeBlockAsmAvx
two_bytes_emit_remainder_encodeBlockAsmAvx:
MOVB $0xf0, (CX)
MOVB DL, 1(CX)
ADDQ $0x02, CX
JMP memmove_emit_remainder_encodeBlockAsmAvx
one_byte_emit_remainder_encodeBlockAsmAvx:
SHLB $0x02, DL
MOVB DL, (CX)
ADDQ $0x01, CX
memmove_emit_remainder_encodeBlockAsmAvx:
LEAQ (CX)(BX*1), DX
NOP
emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_tail:
TESTQ BX, BX
JEQ emit_literal_done_emit_remainder_encodeBlockAsmAvx
CMPQ BX, $0x02
JBE emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_1or2
CMPQ BX, $0x04
JB emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_3
JBE emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_4
CMPQ BX, $0x08
JB emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_5through7
JE emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_8
CMPQ BX, $0x10
JBE emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_9through16
CMPQ BX, $0x20
JBE emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_17through32
CMPQ BX, $0x40
JBE emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_33through64
CMPQ BX, $0x80
JBE emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_65through128
CMPQ BX, $0x00000100
JBE emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_129through256
JMP emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_avxUnaligned
emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_1or2:
MOVB (AX), DL
MOVB -1(AX)(BX*1), SI
MOVB DL, (CX)
MOVB SI, -1(CX)(BX*1)
JMP emit_literal_done_emit_remainder_encodeBlockAsmAvx
emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_4:
MOVL (AX), DX
MOVL DX, (CX)
JMP emit_literal_done_emit_remainder_encodeBlockAsmAvx
emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_3:
MOVW (AX), DX
MOVB 2(AX), SI
MOVW DX, (CX)
MOVB SI, 2(CX)
JMP emit_literal_done_emit_remainder_encodeBlockAsmAvx
emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_5through7:
MOVL (AX), DX
MOVL -4(AX)(BX*1), SI
MOVL DX, (CX)
MOVL SI, -4(CX)(BX*1)
JMP emit_literal_done_emit_remainder_encodeBlockAsmAvx
emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_8:
MOVQ (AX), DX
MOVQ DX, (CX)
JMP emit_literal_done_emit_remainder_encodeBlockAsmAvx
emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_9through16:
MOVQ (AX), DX
MOVQ -8(AX)(BX*1), SI
MOVQ DX, (CX)
MOVQ SI, -8(CX)(BX*1)
JMP emit_literal_done_emit_remainder_encodeBlockAsmAvx
emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_17through32:
MOVOU (AX), X0
MOVOU -16(AX)(BX*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(BX*1)
JMP emit_literal_done_emit_remainder_encodeBlockAsmAvx
emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_33through64:
MOVOU (AX), X0
MOVOU 16(AX), X1
MOVOU -32(AX)(BX*1), X2
MOVOU -16(AX)(BX*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(BX*1)
MOVOU X3, -16(CX)(BX*1)
JMP emit_literal_done_emit_remainder_encodeBlockAsmAvx
emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_65through128:
MOVOU (AX), X0
MOVOU 16(AX), X1
MOVOU 32(AX), X2
MOVOU 48(AX), X3
MOVOU -64(AX)(BX*1), X12
MOVOU -48(AX)(BX*1), X13
MOVOU -32(AX)(BX*1), X14
MOVOU -16(AX)(BX*1), X15
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, 32(CX)
MOVOU X3, 48(CX)
MOVOU X12, -64(CX)(BX*1)
MOVOU X13, -48(CX)(BX*1)
MOVOU X14, -32(CX)(BX*1)
MOVOU X15, -16(CX)(BX*1)
JMP emit_literal_done_emit_remainder_encodeBlockAsmAvx
emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_129through256:
MOVOU (AX), X0
MOVOU 16(AX), X1
MOVOU 32(AX), X2
MOVOU 48(AX), X3
MOVOU 64(AX), X4
MOVOU 80(AX), X5
MOVOU 96(AX), X6
MOVOU 112(AX), X7
MOVOU -128(AX)(BX*1), X8
MOVOU -112(AX)(BX*1), X9
MOVOU -96(AX)(BX*1), X10
MOVOU -80(AX)(BX*1), X11
MOVOU -64(AX)(BX*1), X12
MOVOU -48(AX)(BX*1), X13
MOVOU -32(AX)(BX*1), X14
MOVOU -16(AX)(BX*1), X15
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, 32(CX)
MOVOU X3, 48(CX)
MOVOU X4, 64(CX)
MOVOU X5, 80(CX)
MOVOU X6, 96(CX)
MOVOU X7, 112(CX)
MOVOU X8, -128(CX)(BX*1)
MOVOU X9, -112(CX)(BX*1)
MOVOU X10, -96(CX)(BX*1)
MOVOU X11, -80(CX)(BX*1)
MOVOU X12, -64(CX)(BX*1)
MOVOU X13, -48(CX)(BX*1)
MOVOU X14, -32(CX)(BX*1)
MOVOU X15, -16(CX)(BX*1)
JMP emit_literal_done_emit_remainder_encodeBlockAsmAvx
emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_256through2048:
LEAQ -256(BX), BX
MOVOU (AX), X0
MOVOU 16(AX), X1
MOVOU 32(AX), X2
MOVOU 48(AX), X3
MOVOU 64(AX), X4
MOVOU 80(AX), X5
MOVOU 96(AX), X6
MOVOU 112(AX), X7
MOVOU 128(AX), X8
MOVOU 144(AX), X9
MOVOU 160(AX), X10
MOVOU 176(AX), X11
MOVOU 192(AX), X12
MOVOU 208(AX), X13
MOVOU 224(AX), X14
MOVOU 240(AX), X15
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, 32(CX)
MOVOU X3, 48(CX)
MOVOU X4, 64(CX)
MOVOU X5, 80(CX)
MOVOU X6, 96(CX)
MOVOU X7, 112(CX)
MOVOU X8, 128(CX)
MOVOU X9, 144(CX)
MOVOU X10, 160(CX)
MOVOU X11, 176(CX)
MOVOU X12, 192(CX)
MOVOU X13, 208(CX)
MOVOU X14, 224(CX)
MOVOU X15, 240(CX)
CMPQ BX, $0x00000100
LEAQ 256(AX), AX
LEAQ 256(CX), CX
JGE emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_256through2048
JMP emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_tail
emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_avxUnaligned:
LEAQ (AX)(BX*1), SI
MOVQ CX, R8
MOVOU -128(SI), X5
MOVOU -112(SI), X6
MOVQ $0x00000080, DX
ANDQ $0xffffffe0, CX
ADDQ $0x20, CX
MOVOU -96(SI), X7
MOVOU -80(SI), X8
MOVQ CX, DI
SUBQ R8, DI
MOVOU -64(SI), X9
MOVOU -48(SI), X10
SUBQ DI, BX
MOVOU -32(SI), X11
MOVOU -16(SI), X12
VMOVDQU (AX), Y4
ADDQ DI, AX
SUBQ DX, BX
emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_gobble_128_loop:
VMOVDQU (AX), Y0
VMOVDQU 32(AX), Y1
VMOVDQU 64(AX), Y2
VMOVDQU 96(AX), Y3
ADDQ DX, AX
VMOVDQA Y0, (CX)
VMOVDQA Y1, 32(CX)
VMOVDQA Y2, 64(CX)
VMOVDQA Y3, 96(CX)
ADDQ DX, CX
SUBQ DX, BX
JA emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_gobble_128_loop
ADDQ DX, BX
ADDQ CX, BX
VMOVDQU Y4, (R8)
VZEROUPPER
MOVOU X5, -128(BX)
MOVOU X6, -112(BX)
MOVOU X7, -96(BX)
MOVOU X8, -80(BX)
MOVOU X9, -64(BX)
MOVOU X10, -48(BX)
MOVOU X11, -32(BX)
MOVOU X12, -16(BX)
JMP emit_literal_done_emit_remainder_encodeBlockAsmAvx
MOVQ DX, CX
emit_literal_done_emit_remainder_encodeBlockAsmAvx:
MOVQ CX, dst_base+0(FP)
emit_literal_skip_emit_remainder_encodeBlockAsmAvx:
MOVQ 8(SP), AX
SUBQ dst_base+0(FP), AX
MOVQ AX, ret+48(FP)
RET
// func encodeBlockAsm14BAvx(dst []byte, src []byte) int
// Requires: AVX, SSE2
TEXT ·encodeBlockAsm14BAvx(SB), $16416-56
MOVQ $0x00000080, AX
LEAQ 32(SP), CX
PXOR X0, X0
zero_loop_encodeBlockAsm14BAvx:
MOVOU X0, (CX)
MOVOU X0, 16(CX)
MOVOU X0, 32(CX)
MOVOU X0, 48(CX)
MOVOU X0, 64(CX)
MOVOU X0, 80(CX)
MOVOU X0, 96(CX)
MOVOU X0, 112(CX)
ADDQ $0x80, CX
DECQ AX
JNZ zero_loop_encodeBlockAsm14BAvx
MOVL AX, 20(SP)
MOVQ src_len+32(FP), AX
LEAQ -5(AX), CX
LEAQ -8(AX), BX
SHRQ $0x05, AX
SUBL AX, CX
MOVL BX, 16(SP)
MOVQ dst_base+0(FP), AX
MOVQ AX, 8(SP)
LEAQ (AX)(CX*1), CX
MOVQ CX, (SP)
MOVL $0x00000001, AX
MOVL AX, 24(SP)
MOVQ src_base+24(FP), CX
search_loop_encodeBlockAsm14BAvx:
MOVQ (CX)(AX*1), SI
MOVL AX, BX
SUBL 20(SP), BX
SHRL $0x05, BX
LEAQ 4(AX)(BX*1), BX
MOVL 16(SP), DI
CMPL BX, DI
JGT emit_remainder_encodeBlockAsm14BAvx
MOVL BX, 28(SP)
MOVQ $0x0000cf1bbcdcbf9b, BX
MOVQ SI, R8
MOVQ SI, R9
SHRQ $0x08, R9
SHLQ $0x10, R8
IMULQ BX, R8
SHRQ $0x32, R8
SHLQ $0x10, R9
IMULQ BX, R9
SHRQ $0x32, R9
MOVL 32(SP)(R8*1), BX
MOVL 32(SP)(R9*1), DI
MOVL AX, 32(SP)(R8*1)
LEAL 1(AX), R8
MOVL R8, 32(SP)(R9*1)
MOVL AX, R8
SUBL 24(SP), R8
MOVL 1(CX)(R8*1), R10
MOVQ SI, R9
SHLQ $0x08, R9
CMPL R9, R10
JNE no_repeat_found_encodeBlockAsm14BAvx
LEAQ 1(AX), SI
MOVL 20(SP), BX
TESTL R8, R8
JZ repeat_extend_back_end_encodeBlockAsm14BAvx
repeat_extend_back_loop_encodeBlockAsm14BAvx:
CMPL SI, BX
JG repeat_extend_back_end_encodeBlockAsm14BAvx
MOVB -1(CX)(R8*1), DL
MOVB -1(CX)(SI*1), DI
CMPB DL, DI
JNE repeat_extend_back_end_encodeBlockAsm14BAvx
LEAQ -1(SI), SI
DECL R8
JZ repeat_extend_back_end_encodeBlockAsm14BAvx
JMP repeat_extend_back_loop_encodeBlockAsm14BAvx
repeat_extend_back_end_encodeBlockAsm14BAvx:
MOVL 20(SP), BX
CMPL BX, SI
JEQ emit_literal_skip_repeat_emit_encodeBlockAsm14BAvx
MOVL SI, DI
MOVL SI, 20(SP)
LEAQ (CX)(BX*1), R8
SUBL BX, DI
MOVQ dst_base+0(FP), BX
MOVQ DI, R9
SUBL $0x01, R9
JC emit_literal_done_repeat_emit_encodeBlockAsm14BAvx
CMPL R9, $0x3c
JLT one_byte_repeat_emit_encodeBlockAsm14BAvx
CMPL R9, $0x00000100
JLT two_bytes_repeat_emit_encodeBlockAsm14BAvx
CMPL R9, $0x00010000
JLT three_bytes_repeat_emit_encodeBlockAsm14BAvx
CMPL R9, $0x01000000
JLT four_bytes_repeat_emit_encodeBlockAsm14BAvx
MOVB $0xfc, (BX)
MOVL R9, 1(BX)
ADDQ $0x05, BX
JMP memmove_repeat_emit_encodeBlockAsm14BAvx
four_bytes_repeat_emit_encodeBlockAsm14BAvx:
MOVQ R9, R10
SHRL $0x10, R10
MOVB $0xf8, (BX)
MOVW R9, 1(BX)
MOVB R10, 3(BX)
ADDQ $0x04, BX
JMP memmove_repeat_emit_encodeBlockAsm14BAvx
three_bytes_repeat_emit_encodeBlockAsm14BAvx:
MOVB $0xf4, (BX)
MOVW R9, 1(BX)
ADDQ $0x03, BX
JMP memmove_repeat_emit_encodeBlockAsm14BAvx
two_bytes_repeat_emit_encodeBlockAsm14BAvx:
MOVB $0xf0, (BX)
MOVB R9, 1(BX)
ADDQ $0x02, BX
JMP memmove_repeat_emit_encodeBlockAsm14BAvx
one_byte_repeat_emit_encodeBlockAsm14BAvx:
SHLB $0x02, R9
MOVB R9, (BX)
ADDQ $0x01, BX
memmove_repeat_emit_encodeBlockAsm14BAvx:
LEAQ (BX)(DI*1), R9
NOP
emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_tail:
TESTQ DI, DI
JEQ emit_literal_done_repeat_emit_encodeBlockAsm14BAvx
CMPQ DI, $0x02
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_1or2
CMPQ DI, $0x04
JB emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_3
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_4
CMPQ DI, $0x08
JB emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_5through7
JE emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_8
CMPQ DI, $0x10
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_9through16
CMPQ DI, $0x20
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_17through32
CMPQ DI, $0x40
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_33through64
CMPQ DI, $0x80
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_65through128
CMPQ DI, $0x00000100
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_129through256
JMP emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_avxUnaligned
emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_1or2:
MOVB (R8), R9
MOVB -1(R8)(DI*1), R10
MOVB R9, (BX)
MOVB R10, -1(BX)(DI*1)
JMP emit_literal_done_repeat_emit_encodeBlockAsm14BAvx
emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_4:
MOVL (R8), R9
MOVL R9, (BX)
JMP emit_literal_done_repeat_emit_encodeBlockAsm14BAvx
emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_3:
MOVW (R8), R9
MOVB 2(R8), R10
MOVW R9, (BX)
MOVB R10, 2(BX)
JMP emit_literal_done_repeat_emit_encodeBlockAsm14BAvx
emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_5through7:
MOVL (R8), R9
MOVL -4(R8)(DI*1), R10
MOVL R9, (BX)
MOVL R10, -4(BX)(DI*1)
JMP emit_literal_done_repeat_emit_encodeBlockAsm14BAvx
emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_8:
MOVQ (R8), R9
MOVQ R9, (BX)
JMP emit_literal_done_repeat_emit_encodeBlockAsm14BAvx
emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_9through16:
MOVQ (R8), R9
MOVQ -8(R8)(DI*1), R10
MOVQ R9, (BX)
MOVQ R10, -8(BX)(DI*1)
JMP emit_literal_done_repeat_emit_encodeBlockAsm14BAvx
emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_17through32:
MOVOU (R8), X0
MOVOU -16(R8)(DI*1), X1
MOVOU X0, (BX)
MOVOU X1, -16(BX)(DI*1)
JMP emit_literal_done_repeat_emit_encodeBlockAsm14BAvx
emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_33through64:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(DI*1), X2
MOVOU -16(R8)(DI*1), X3
MOVOU X0, (BX)
MOVOU X1, 16(BX)
MOVOU X2, -32(BX)(DI*1)
MOVOU X3, -16(BX)(DI*1)
JMP emit_literal_done_repeat_emit_encodeBlockAsm14BAvx
emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_65through128:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU 32(R8), X2
MOVOU 48(R8), X3
MOVOU -64(R8)(DI*1), X12
MOVOU -48(R8)(DI*1), X13
MOVOU -32(R8)(DI*1), X14
MOVOU -16(R8)(DI*1), X15
MOVOU X0, (BX)
MOVOU X1, 16(BX)
MOVOU X2, 32(BX)
MOVOU X3, 48(BX)
MOVOU X12, -64(BX)(DI*1)
MOVOU X13, -48(BX)(DI*1)
MOVOU X14, -32(BX)(DI*1)
MOVOU X15, -16(BX)(DI*1)
JMP emit_literal_done_repeat_emit_encodeBlockAsm14BAvx
emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_129through256:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU 32(R8), X2
MOVOU 48(R8), X3
MOVOU 64(R8), X4
MOVOU 80(R8), X5
MOVOU 96(R8), X6
MOVOU 112(R8), X7
MOVOU -128(R8)(DI*1), X8
MOVOU -112(R8)(DI*1), X9
MOVOU -96(R8)(DI*1), X10
MOVOU -80(R8)(DI*1), X11
MOVOU -64(R8)(DI*1), X12
MOVOU -48(R8)(DI*1), X13
MOVOU -32(R8)(DI*1), X14
MOVOU -16(R8)(DI*1), X15
MOVOU X0, (BX)
MOVOU X1, 16(BX)
MOVOU X2, 32(BX)
MOVOU X3, 48(BX)
MOVOU X4, 64(BX)
MOVOU X5, 80(BX)
MOVOU X6, 96(BX)
MOVOU X7, 112(BX)
MOVOU X8, -128(BX)(DI*1)
MOVOU X9, -112(BX)(DI*1)
MOVOU X10, -96(BX)(DI*1)
MOVOU X11, -80(BX)(DI*1)
MOVOU X12, -64(BX)(DI*1)
MOVOU X13, -48(BX)(DI*1)
MOVOU X14, -32(BX)(DI*1)
MOVOU X15, -16(BX)(DI*1)
JMP emit_literal_done_repeat_emit_encodeBlockAsm14BAvx
emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_256through2048:
LEAQ -256(DI), DI
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU 32(R8), X2
MOVOU 48(R8), X3
MOVOU 64(R8), X4
MOVOU 80(R8), X5
MOVOU 96(R8), X6
MOVOU 112(R8), X7
MOVOU 128(R8), X8
MOVOU 144(R8), X9
MOVOU 160(R8), X10
MOVOU 176(R8), X11
MOVOU 192(R8), X12
MOVOU 208(R8), X13
MOVOU 224(R8), X14
MOVOU 240(R8), X15
MOVOU X0, (BX)
MOVOU X1, 16(BX)
MOVOU X2, 32(BX)
MOVOU X3, 48(BX)
MOVOU X4, 64(BX)
MOVOU X5, 80(BX)
MOVOU X6, 96(BX)
MOVOU X7, 112(BX)
MOVOU X8, 128(BX)
MOVOU X9, 144(BX)
MOVOU X10, 160(BX)
MOVOU X11, 176(BX)
MOVOU X12, 192(BX)
MOVOU X13, 208(BX)
MOVOU X14, 224(BX)
MOVOU X15, 240(BX)
CMPQ DI, $0x00000100
LEAQ 256(R8), R8
LEAQ 256(BX), BX
JGE emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_256through2048
JMP emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_tail
emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_avxUnaligned:
LEAQ (R8)(DI*1), R10
MOVQ BX, R12
MOVOU -128(R10), X5
MOVOU -112(R10), X6
MOVQ $0x00000080, R9
ANDQ $0xffffffe0, BX
ADDQ $0x20, BX
MOVOU -96(R10), X7
MOVOU -80(R10), X8
MOVQ BX, R11
SUBQ R12, R11
MOVOU -64(R10), X9
MOVOU -48(R10), X10
SUBQ R11, DI
MOVOU -32(R10), X11
MOVOU -16(R10), X12
VMOVDQU (R8), Y4
ADDQ R11, R8
SUBQ R9, DI
emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_gobble_128_loop:
VMOVDQU (R8), Y0
VMOVDQU 32(R8), Y1
VMOVDQU 64(R8), Y2
VMOVDQU 96(R8), Y3
ADDQ R9, R8
VMOVDQA Y0, (BX)
VMOVDQA Y1, 32(BX)
VMOVDQA Y2, 64(BX)
VMOVDQA Y3, 96(BX)
ADDQ R9, BX
SUBQ R9, DI
JA emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_gobble_128_loop
ADDQ R9, DI
ADDQ BX, DI
VMOVDQU Y4, (R12)
VZEROUPPER
MOVOU X5, -128(DI)
MOVOU X6, -112(DI)
MOVOU X7, -96(DI)
MOVOU X8, -80(DI)
MOVOU X9, -64(DI)
MOVOU X10, -48(DI)
MOVOU X11, -32(DI)
MOVOU X12, -16(DI)
JMP emit_literal_done_repeat_emit_encodeBlockAsm14BAvx
MOVQ R9, BX
emit_literal_done_repeat_emit_encodeBlockAsm14BAvx:
MOVQ BX, dst_base+0(FP)
emit_literal_skip_repeat_emit_encodeBlockAsm14BAvx:
ADDL $0x05, AX
MOVL AX, BX
SUBL 24(SP), BX
MOVL 16(SP), BX
SUBL AX, BX
XORQ R8, R8
CMPQ BX, $0x08
JL matchlen_single_repeat_extend
matchlen_loopback_repeat_extend:
MOVQ (CX)(R8*1), DI
XORQ (CX)(R8*1), DI
TESTQ DI, DI
JZ matchlen_loop_repeat_extend
BSFQ DI, DI
SARQ $0x03, DI
LEAQ (R8)(DI*1), R8
JMP repeat_extend_forward_end_encodeBlockAsm14BAvx
matchlen_loop_repeat_extend:
LEAQ -8(BX), BX
LEAQ 8(R8), R8
CMPQ BX, $0x08
JGE matchlen_loopback_repeat_extend
matchlen_single_repeat_extend:
TESTQ BX, BX
JZ repeat_extend_forward_end_encodeBlockAsm14BAvx
matchlen_single_loopback_repeat_extend:
MOVB (CX)(R8*1), DI
CMPB (CX)(R8*1), DI
JNE repeat_extend_forward_end_encodeBlockAsm14BAvx
LEAQ 1(R8), R8
DECQ BX
JNZ matchlen_single_loopback_repeat_extend
repeat_extend_forward_end_encodeBlockAsm14BAvx:
ADDL R8, AX
MOVL AX, BX
SUBL SI, BX
MOVL 24(SP), SI
MOVQ dst_base+0(FP), DI
MOVL 20(SP), R8
TESTL R8, R8
JZ repeat_as_copy_encodeBlockAsm14BAvx
emit_repeat_again_match_repeat_:
MOVQ BX, R8
LEAQ -4(BX), BX
CMPL R8, $0x08
JLE repeat_two_match_repeat_
CMPL R8, $0x0c
JGE cant_repeat_two_offset_match_repeat_
CMPL SI, $0x00000800
JLT repeat_two_offset_match_repeat_
cant_repeat_two_offset_match_repeat_:
CMPL BX, $0x00000104
JLT repeat_three_match_repeat_
CMPL BX, $0x00010100
JLT repeat_four_match_repeat_
CMPL BX, $0x0100ffff
JLT repeat_five_match_repeat_
LEAQ -16842747(BX), BX
MOVW $0x001d, (DI)
MOVW $0xfffb, 2(DI)
MOVB $0xff, 4(DI)
ADDQ $0x05, DI
JMP emit_repeat_again_match_repeat_
repeat_five_match_repeat_:
LEAQ -65536(BX), BX
MOVQ BX, SI
MOVW $0x001d, (DI)
MOVW BX, 2(DI)
SARQ $0x10, SI
MOVB SI, 4(DI)
ADDQ $0x05, DI
JMP repeat_end_emit_encodeBlockAsm14BAvx
repeat_four_match_repeat_:
LEAQ -256(BX), BX
MOVW $0x0019, (DI)
MOVW BX, 2(DI)
ADDQ $0x04, DI
JMP repeat_end_emit_encodeBlockAsm14BAvx
repeat_three_match_repeat_:
LEAQ -4(BX), BX
MOVW $0x0015, (DI)
MOVB BL, 2(DI)
ADDQ $0x03, DI
JMP repeat_end_emit_encodeBlockAsm14BAvx
repeat_two_match_repeat_:
SHLL $0x02, BX
ORL $0x01, BX
MOVW BX, (DI)
ADDQ $0x02, DI
JMP repeat_end_emit_encodeBlockAsm14BAvx
repeat_two_offset_match_repeat_:
XORQ R8, R8
LEAQ 1(R8)(BX*4), BX
MOVB SI, 1(DI)
SARL $0x08, SI
SHLL $0x05, SI
ORL SI, BX
MOVB BL, (DI)
ADDQ $0x02, DI
JMP repeat_end_emit_encodeBlockAsm14BAvx
repeat_as_copy_encodeBlockAsm14BAvx:
CMPL SI, $0x00010000
JL two_byte_offset_repeat_as_copy_encodeBlockAsm14BAvx
CMPL BX, $0x40
JLE four_bytes_remain_repeat_as_copy_encodeBlockAsm14BAvx
MOVB $0xff, (DI)
MOVD SI, 1(DI)
LEAQ -64(BX), BX
ADDQ $0x05, DI
CMPL BX, $0x04
JL four_bytes_remain_repeat_as_copy_encodeBlockAsm14BAvx
emit_repeat_again_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy:
MOVQ BX, R8
LEAQ -4(BX), BX
CMPL R8, $0x08
JLE repeat_two_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy
CMPL R8, $0x0c
JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy
CMPL SI, $0x00000800
JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy
cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy:
CMPL BX, $0x00000104
JLT repeat_three_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy
CMPL BX, $0x00010100
JLT repeat_four_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy
CMPL BX, $0x0100ffff
JLT repeat_five_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy
LEAQ -16842747(BX), BX
MOVW $0x001d, (DI)
MOVW $0xfffb, 2(DI)
MOVB $0xff, 4(DI)
ADDQ $0x05, DI
JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy
repeat_five_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy:
LEAQ -65536(BX), BX
MOVQ BX, SI
MOVW $0x001d, (DI)
MOVW BX, 2(DI)
SARQ $0x10, SI
MOVB SI, 4(DI)
ADDQ $0x05, DI
JMP repeat_end_emit_encodeBlockAsm14BAvx
repeat_four_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy:
LEAQ -256(BX), BX
MOVW $0x0019, (DI)
MOVW BX, 2(DI)
ADDQ $0x04, DI
JMP repeat_end_emit_encodeBlockAsm14BAvx
repeat_three_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy:
LEAQ -4(BX), BX
MOVW $0x0015, (DI)
MOVB BL, 2(DI)
ADDQ $0x03, DI
JMP repeat_end_emit_encodeBlockAsm14BAvx
repeat_two_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy:
SHLL $0x02, BX
ORL $0x01, BX
MOVW BX, (DI)
ADDQ $0x02, DI
JMP repeat_end_emit_encodeBlockAsm14BAvx
repeat_two_offset_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy:
XORQ R8, R8
LEAQ 1(R8)(BX*4), BX
MOVB SI, 1(DI)
SARL $0x08, SI
SHLL $0x05, SI
ORL SI, BX
MOVB BL, (DI)
ADDQ $0x02, DI
JMP repeat_end_emit_encodeBlockAsm14BAvx
four_bytes_remain_repeat_as_copy_encodeBlockAsm14BAvx:
TESTL BX, BX
JZ repeat_end_emit_encodeBlockAsm14BAvx
MOVB $0x03, DL
LEAQ -4(DX)(BX*4), BX
MOVB BL, (DI)
MOVD SI, 1(DI)
ADDQ $0x05, DI
JMP repeat_end_emit_encodeBlockAsm14BAvx
two_byte_offset_repeat_as_copy_encodeBlockAsm14BAvx:
CMPL BX, $0x40
JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm14BAvx
MOVB $0xee, (DI)
MOVW SI, 1(DI)
LEAQ -60(BX), BX
ADDQ $0x03, DI
emit_repeat_again_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short:
MOVQ BX, R8
LEAQ -4(BX), BX
CMPL R8, $0x08
JLE repeat_two_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short
CMPL R8, $0x0c
JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short
CMPL SI, $0x00000800
JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short
cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short:
CMPL BX, $0x00000104
JLT repeat_three_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short
CMPL BX, $0x00010100
JLT repeat_four_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short
CMPL BX, $0x0100ffff
JLT repeat_five_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short
LEAQ -16842747(BX), BX
MOVW $0x001d, (DI)
MOVW $0xfffb, 2(DI)
MOVB $0xff, 4(DI)
ADDQ $0x05, DI
JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short
repeat_five_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short:
LEAQ -65536(BX), BX
MOVQ BX, SI
MOVW $0x001d, (DI)
MOVW BX, 2(DI)
SARQ $0x10, SI
MOVB SI, 4(DI)
ADDQ $0x05, DI
JMP repeat_end_emit_encodeBlockAsm14BAvx
repeat_four_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short:
LEAQ -256(BX), BX
MOVW $0x0019, (DI)
MOVW BX, 2(DI)
ADDQ $0x04, DI
JMP repeat_end_emit_encodeBlockAsm14BAvx
repeat_three_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short:
LEAQ -4(BX), BX
MOVW $0x0015, (DI)
MOVB BL, 2(DI)
ADDQ $0x03, DI
JMP repeat_end_emit_encodeBlockAsm14BAvx
repeat_two_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short:
SHLL $0x02, BX
ORL $0x01, BX
MOVW BX, (DI)
ADDQ $0x02, DI
JMP repeat_end_emit_encodeBlockAsm14BAvx
repeat_two_offset_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short:
XORQ R8, R8
LEAQ 1(R8)(BX*4), BX
MOVB SI, 1(DI)
SARL $0x08, SI
SHLL $0x05, SI
ORL SI, BX
MOVB BL, (DI)
ADDQ $0x02, DI
JMP repeat_end_emit_encodeBlockAsm14BAvx
two_byte_offset_short_repeat_as_copy_encodeBlockAsm14BAvx:
CMPL BX, $0x0c
JGE emit_copy_three_repeat_as_copy_encodeBlockAsm14BAvx
CMPL SI, $0x00000800
JGE emit_copy_three_repeat_as_copy_encodeBlockAsm14BAvx
MOVB $0x01, DL
LEAQ -16(DX)(BX*4), BX
MOVB SI, 1(DI)
SHRL $0x08, SI
SHLL $0x05, SI
ORL SI, BX
MOVB BL, (DI)
ADDQ $0x02, DI
JMP repeat_end_emit_encodeBlockAsm14BAvx
emit_copy_three_repeat_as_copy_encodeBlockAsm14BAvx:
MOVB $0x02, DL
LEAQ -4(DX)(BX*4), BX
MOVB BL, (DI)
MOVW SI, 1(DI)
ADDQ $0x03, DI
repeat_end_emit_encodeBlockAsm14BAvx:
MOVQ DI, dst_base+0(FP)
MOVL 16(SP), BX
CMPL AX, BX
JGT emit_remainder_encodeBlockAsm14BAvx
JMP search_loop_encodeBlockAsm14BAvx
no_repeat_found_encodeBlockAsm14BAvx:
MOVQ $0x0000cf1bbcdcbf9b, R9
MOVQ SI, R8
SHRQ $0x10, R8
SHLQ $0x10, R8
IMULQ R9, R8
SHRQ $0x32, R8
CMPL (CX)(BX*1), SI
SHRQ $0x08, SI
JEQ candidate_match_encodeBlockAsm14BAvx
MOVL 32(SP)(R8*1), BX
CMPL (CX)(DI*1), SI
JEQ candidate2_match_encodeBlockAsm14BAvx
LEAQ 2(AX), DI
MOVL DI, 32(SP)(R8*1)
SHRQ $0x08, SI
CMPL (CX)(BX*1), SI
JEQ candidate3_match_encodeBlockAsm14BAvx
MOVL 28(SP), AX
JMP search_loop_encodeBlockAsm14BAvx
candidate3_match_encodeBlockAsm14BAvx:
ADDL $0x02, AX
JMP candidate_match_encodeBlockAsm14BAvx
candidate2_match_encodeBlockAsm14BAvx:
LEAQ -2(AX), BX
MOVL BX, 32(SP)(R8*1)
INCL AX
MOVL DI, BX
candidate_match_encodeBlockAsm14BAvx:
MOVL 20(SP), SI
TESTL BX, BX
JZ match_extend_back_end_encodeBlockAsm14BAvx
match_extend_back_loop_encodeBlockAsm14BAvx:
CMPL AX, SI
JG match_extend_back_end_encodeBlockAsm14BAvx
MOVB -1(CX)(BX*1), DL
MOVB -1(CX)(AX*1), DI
CMPB DL, DI
JNE match_extend_back_end_encodeBlockAsm14BAvx
LEAL -1(AX), AX
DECL BX
JZ match_extend_back_end_encodeBlockAsm14BAvx
JMP match_extend_back_loop_encodeBlockAsm14BAvx
match_extend_back_end_encodeBlockAsm14BAvx:
MOVL AX, SI
SUBL 20(SP), SI
LEAQ dst_base+0(FP)(SI*1), SI
CMPQ SI, (SP)
JL match_dst_size_check_encodeBlockAsm14BAvx
MOVQ $0x00000000, ret+48(FP)
RET
match_dst_size_check_encodeBlockAsm14BAvx:
MOVL BX, SI
MOVL 20(SP), DI
CMPL DI, SI
JEQ emit_literal_skip_match_emit_encodeBlockAsm14BAvx
MOVL SI, R8
MOVL SI, 20(SP)
LEAQ (CX)(DI*1), SI
SUBL DI, R8
MOVQ dst_base+0(FP), DI
MOVQ R8, R9
SUBL $0x01, R9
JC emit_literal_done_match_emit_encodeBlockAsm14BAvx
CMPL R9, $0x3c
JLT one_byte_match_emit_encodeBlockAsm14BAvx
CMPL R9, $0x00000100
JLT two_bytes_match_emit_encodeBlockAsm14BAvx
CMPL R9, $0x00010000
JLT three_bytes_match_emit_encodeBlockAsm14BAvx
CMPL R9, $0x01000000
JLT four_bytes_match_emit_encodeBlockAsm14BAvx
MOVB $0xfc, (DI)
MOVL R9, 1(DI)
ADDQ $0x05, DI
JMP memmove_match_emit_encodeBlockAsm14BAvx
four_bytes_match_emit_encodeBlockAsm14BAvx:
MOVQ R9, R10
SHRL $0x10, R10
MOVB $0xf8, (DI)
MOVW R9, 1(DI)
MOVB R10, 3(DI)
ADDQ $0x04, DI
JMP memmove_match_emit_encodeBlockAsm14BAvx
three_bytes_match_emit_encodeBlockAsm14BAvx:
MOVB $0xf4, (DI)
MOVW R9, 1(DI)
ADDQ $0x03, DI
JMP memmove_match_emit_encodeBlockAsm14BAvx
two_bytes_match_emit_encodeBlockAsm14BAvx:
MOVB $0xf0, (DI)
MOVB R9, 1(DI)
ADDQ $0x02, DI
JMP memmove_match_emit_encodeBlockAsm14BAvx
one_byte_match_emit_encodeBlockAsm14BAvx:
SHLB $0x02, R9
MOVB R9, (DI)
ADDQ $0x01, DI
memmove_match_emit_encodeBlockAsm14BAvx:
LEAQ (DI)(R8*1), R9
NOP
emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_tail:
TESTQ R8, R8
JEQ emit_literal_done_match_emit_encodeBlockAsm14BAvx
CMPQ R8, $0x02
JBE emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_1or2
CMPQ R8, $0x04
JB emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_3
JBE emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_4
CMPQ R8, $0x08
JB emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_5through7
JE emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_8
CMPQ R8, $0x10
JBE emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_9through16
CMPQ R8, $0x20
JBE emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_17through32
CMPQ R8, $0x40
JBE emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_33through64
CMPQ R8, $0x80
JBE emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_65through128
CMPQ R8, $0x00000100
JBE emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_129through256
JMP emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_avxUnaligned
emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_1or2:
MOVB (SI), R9
MOVB -1(SI)(R8*1), R10
MOVB R9, (DI)
MOVB R10, -1(DI)(R8*1)
JMP emit_literal_done_match_emit_encodeBlockAsm14BAvx
emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_4:
MOVL (SI), R9
MOVL R9, (DI)
JMP emit_literal_done_match_emit_encodeBlockAsm14BAvx
emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_3:
MOVW (SI), R9
MOVB 2(SI), R10
MOVW R9, (DI)
MOVB R10, 2(DI)
JMP emit_literal_done_match_emit_encodeBlockAsm14BAvx
emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_5through7:
MOVL (SI), R9
MOVL -4(SI)(R8*1), R10
MOVL R9, (DI)
MOVL R10, -4(DI)(R8*1)
JMP emit_literal_done_match_emit_encodeBlockAsm14BAvx
emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_8:
MOVQ (SI), R9
MOVQ R9, (DI)
JMP emit_literal_done_match_emit_encodeBlockAsm14BAvx
emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_9through16:
MOVQ (SI), R9
MOVQ -8(SI)(R8*1), R10
MOVQ R9, (DI)
MOVQ R10, -8(DI)(R8*1)
JMP emit_literal_done_match_emit_encodeBlockAsm14BAvx
emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_17through32:
MOVOU (SI), X0
MOVOU -16(SI)(R8*1), X1
MOVOU X0, (DI)
MOVOU X1, -16(DI)(R8*1)
JMP emit_literal_done_match_emit_encodeBlockAsm14BAvx
emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_33through64:
MOVOU (SI), X0
MOVOU 16(SI), X1
MOVOU -32(SI)(R8*1), X2
MOVOU -16(SI)(R8*1), X3
MOVOU X0, (DI)
MOVOU X1, 16(DI)
MOVOU X2, -32(DI)(R8*1)
MOVOU X3, -16(DI)(R8*1)
JMP emit_literal_done_match_emit_encodeBlockAsm14BAvx
emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_65through128:
MOVOU (SI), X0
MOVOU 16(SI), X1
MOVOU 32(SI), X2
MOVOU 48(SI), X3
MOVOU -64(SI)(R8*1), X12
MOVOU -48(SI)(R8*1), X13
MOVOU -32(SI)(R8*1), X14
MOVOU -16(SI)(R8*1), X15
MOVOU X0, (DI)
MOVOU X1, 16(DI)
MOVOU X2, 32(DI)
MOVOU X3, 48(DI)
MOVOU X12, -64(DI)(R8*1)
MOVOU X13, -48(DI)(R8*1)
MOVOU X14, -32(DI)(R8*1)
MOVOU X15, -16(DI)(R8*1)
JMP emit_literal_done_match_emit_encodeBlockAsm14BAvx
emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_129through256:
MOVOU (SI), X0
MOVOU 16(SI), X1
MOVOU 32(SI), X2
MOVOU 48(SI), X3
MOVOU 64(SI), X4
MOVOU 80(SI), X5
MOVOU 96(SI), X6
MOVOU 112(SI), X7
MOVOU -128(SI)(R8*1), X8
MOVOU -112(SI)(R8*1), X9
MOVOU -96(SI)(R8*1), X10
MOVOU -80(SI)(R8*1), X11
MOVOU -64(SI)(R8*1), X12
MOVOU -48(SI)(R8*1), X13
MOVOU -32(SI)(R8*1), X14
MOVOU -16(SI)(R8*1), X15
MOVOU X0, (DI)
MOVOU X1, 16(DI)
MOVOU X2, 32(DI)
MOVOU X3, 48(DI)
MOVOU X4, 64(DI)
MOVOU X5, 80(DI)
MOVOU X6, 96(DI)
MOVOU X7, 112(DI)
MOVOU X8, -128(DI)(R8*1)
MOVOU X9, -112(DI)(R8*1)
MOVOU X10, -96(DI)(R8*1)
MOVOU X11, -80(DI)(R8*1)
MOVOU X12, -64(DI)(R8*1)
MOVOU X13, -48(DI)(R8*1)
MOVOU X14, -32(DI)(R8*1)
MOVOU X15, -16(DI)(R8*1)
JMP emit_literal_done_match_emit_encodeBlockAsm14BAvx
emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_256through2048:
LEAQ -256(R8), R8
MOVOU (SI), X0
MOVOU 16(SI), X1
MOVOU 32(SI), X2
MOVOU 48(SI), X3
MOVOU 64(SI), X4
MOVOU 80(SI), X5
MOVOU 96(SI), X6
MOVOU 112(SI), X7
MOVOU 128(SI), X8
MOVOU 144(SI), X9
MOVOU 160(SI), X10
MOVOU 176(SI), X11
MOVOU 192(SI), X12
MOVOU 208(SI), X13
MOVOU 224(SI), X14
MOVOU 240(SI), X15
MOVOU X0, (DI)
MOVOU X1, 16(DI)
MOVOU X2, 32(DI)
MOVOU X3, 48(DI)
MOVOU X4, 64(DI)
MOVOU X5, 80(DI)
MOVOU X6, 96(DI)
MOVOU X7, 112(DI)
MOVOU X8, 128(DI)
MOVOU X9, 144(DI)
MOVOU X10, 160(DI)
MOVOU X11, 176(DI)
MOVOU X12, 192(DI)
MOVOU X13, 208(DI)
MOVOU X14, 224(DI)
MOVOU X15, 240(DI)
CMPQ R8, $0x00000100
LEAQ 256(SI), SI
LEAQ 256(DI), DI
JGE emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_256through2048
JMP emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_tail
emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_avxUnaligned:
LEAQ (SI)(R8*1), R10
MOVQ DI, R12
MOVOU -128(R10), X5
MOVOU -112(R10), X6
MOVQ $0x00000080, R9
ANDQ $0xffffffe0, DI
ADDQ $0x20, DI
MOVOU -96(R10), X7
MOVOU -80(R10), X8
MOVQ DI, R11
SUBQ R12, R11
MOVOU -64(R10), X9
MOVOU -48(R10), X10
SUBQ R11, R8
MOVOU -32(R10), X11
MOVOU -16(R10), X12
VMOVDQU (SI), Y4
ADDQ R11, SI
SUBQ R9, R8
emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_gobble_128_loop:
VMOVDQU (SI), Y0
VMOVDQU 32(SI), Y1
VMOVDQU 64(SI), Y2
VMOVDQU 96(SI), Y3
ADDQ R9, SI
VMOVDQA Y0, (DI)
VMOVDQA Y1, 32(DI)
VMOVDQA Y2, 64(DI)
VMOVDQA Y3, 96(DI)
ADDQ R9, DI
SUBQ R9, R8
JA emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_gobble_128_loop
ADDQ R9, R8
ADDQ DI, R8
VMOVDQU Y4, (R12)
VZEROUPPER
MOVOU X5, -128(R8)
MOVOU X6, -112(R8)
MOVOU X7, -96(R8)
MOVOU X8, -80(R8)
MOVOU X9, -64(R8)
MOVOU X10, -48(R8)
MOVOU X11, -32(R8)
MOVOU X12, -16(R8)
JMP emit_literal_done_match_emit_encodeBlockAsm14BAvx
MOVQ R9, DI
emit_literal_done_match_emit_encodeBlockAsm14BAvx:
MOVQ DI, dst_base+0(FP)
emit_literal_skip_match_emit_encodeBlockAsm14BAvx:
NOP
match_nolit_loop_encodeBlockAsm14BAvx:
MOVL AX, SI
MOVL AX, SI
SUBL BX, SI
MOVL SI, 24(SP)
ADDL $0x04, AX
ADDL $0x04, BX
MOVL 16(SP), SI
SUBL AX, SI
XORQ R8, R8
CMPQ SI, $0x08
JL matchlen_single_match_nolit_encodeBlockAsm14BAvx
matchlen_loopback_match_nolit_encodeBlockAsm14BAvx:
MOVQ (CX)(R8*1), DI
XORQ (CX)(R8*1), DI
TESTQ DI, DI
JZ matchlen_loop_match_nolit_encodeBlockAsm14BAvx
BSFQ DI, DI
SARQ $0x03, DI
LEAQ (R8)(DI*1), R8
JMP match_nolit_end_encodeBlockAsm14BAvx
matchlen_loop_match_nolit_encodeBlockAsm14BAvx:
LEAQ -8(SI), SI
LEAQ 8(R8), R8
CMPQ SI, $0x08
JGE matchlen_loopback_match_nolit_encodeBlockAsm14BAvx
matchlen_single_match_nolit_encodeBlockAsm14BAvx:
TESTQ SI, SI
JZ match_nolit_end_encodeBlockAsm14BAvx
matchlen_single_loopback_match_nolit_encodeBlockAsm14BAvx:
MOVB (CX)(R8*1), DI
CMPB (CX)(R8*1), DI
JNE match_nolit_end_encodeBlockAsm14BAvx
LEAQ 1(R8), R8
DECQ SI
JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm14BAvx
match_nolit_end_encodeBlockAsm14BAvx:
MOVL 24(SP), SI
ADDQ $0x04, R8
MOVQ dst_base+0(FP), DI
ADDL R8, AX
CMPL SI, $0x00010000
JL two_byte_offset_match_nolit_encodeBlockAsm14BAvx
CMPL R8, $0x40
JLE four_bytes_remain_match_nolit_encodeBlockAsm14BAvx
MOVB $0xff, (DI)
MOVD SI, 1(DI)
LEAQ -64(R8), R8
ADDQ $0x05, DI
CMPL R8, $0x04
JL four_bytes_remain_match_nolit_encodeBlockAsm14BAvx
emit_repeat_again_match_nolit_encodeBlockAsm14BAvx_emit_copy:
MOVQ R8, R9
LEAQ -4(R8), R8
CMPL R9, $0x08
JLE repeat_two_match_nolit_encodeBlockAsm14BAvx_emit_copy
CMPL R9, $0x0c
JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm14BAvx_emit_copy
CMPL SI, $0x00000800
JLT repeat_two_offset_match_nolit_encodeBlockAsm14BAvx_emit_copy
cant_repeat_two_offset_match_nolit_encodeBlockAsm14BAvx_emit_copy:
CMPL R8, $0x00000104
JLT repeat_three_match_nolit_encodeBlockAsm14BAvx_emit_copy
CMPL R8, $0x00010100
JLT repeat_four_match_nolit_encodeBlockAsm14BAvx_emit_copy
CMPL R8, $0x0100ffff
JLT repeat_five_match_nolit_encodeBlockAsm14BAvx_emit_copy
LEAQ -16842747(R8), R8
MOVW $0x001d, (DI)
MOVW $0xfffb, 2(DI)
MOVB $0xff, 4(DI)
ADDQ $0x05, DI
JMP emit_repeat_again_match_nolit_encodeBlockAsm14BAvx_emit_copy
repeat_five_match_nolit_encodeBlockAsm14BAvx_emit_copy:
LEAQ -65536(R8), R8
MOVQ R8, SI
MOVW $0x001d, (DI)
MOVW R8, 2(DI)
SARQ $0x10, SI
MOVB SI, 4(DI)
ADDQ $0x05, DI
JMP match_nolit_emitcopy_end_encodeBlockAsm14BAvx
repeat_four_match_nolit_encodeBlockAsm14BAvx_emit_copy:
LEAQ -256(R8), R8
MOVW $0x0019, (DI)
MOVW R8, 2(DI)
ADDQ $0x04, DI
JMP match_nolit_emitcopy_end_encodeBlockAsm14BAvx
repeat_three_match_nolit_encodeBlockAsm14BAvx_emit_copy:
LEAQ -4(R8), R8
MOVW $0x0015, (DI)
MOVB R8, 2(DI)
ADDQ $0x03, DI
JMP match_nolit_emitcopy_end_encodeBlockAsm14BAvx
repeat_two_match_nolit_encodeBlockAsm14BAvx_emit_copy:
SHLL $0x02, R8
ORL $0x01, R8
MOVW R8, (DI)
ADDQ $0x02, DI
JMP match_nolit_emitcopy_end_encodeBlockAsm14BAvx
repeat_two_offset_match_nolit_encodeBlockAsm14BAvx_emit_copy:
XORQ R9, R9
LEAQ 1(R9)(R8*4), R8
MOVB SI, 1(DI)
SARL $0x08, SI
SHLL $0x05, SI
ORL SI, R8
MOVB R8, (DI)
ADDQ $0x02, DI
JMP match_nolit_emitcopy_end_encodeBlockAsm14BAvx
four_bytes_remain_match_nolit_encodeBlockAsm14BAvx:
TESTL R8, R8
JZ match_nolit_emitcopy_end_encodeBlockAsm14BAvx
MOVB $0x03, DL
LEAQ -4(DX)(R8*4), R8
MOVB R8, (DI)
MOVD SI, 1(DI)
ADDQ $0x05, DI
JMP match_nolit_emitcopy_end_encodeBlockAsm14BAvx
two_byte_offset_match_nolit_encodeBlockAsm14BAvx:
CMPL R8, $0x40
JLE two_byte_offset_short_match_nolit_encodeBlockAsm14BAvx
MOVB $0xee, (DI)
MOVW SI, 1(DI)
LEAQ -60(R8), R8
ADDQ $0x03, DI
emit_repeat_again_match_nolit_encodeBlockAsm14BAvx_emit_copy_short:
MOVQ R8, R9
LEAQ -4(R8), R8
CMPL R9, $0x08
JLE repeat_two_match_nolit_encodeBlockAsm14BAvx_emit_copy_short
CMPL R9, $0x0c
JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm14BAvx_emit_copy_short
CMPL SI, $0x00000800
JLT repeat_two_offset_match_nolit_encodeBlockAsm14BAvx_emit_copy_short
cant_repeat_two_offset_match_nolit_encodeBlockAsm14BAvx_emit_copy_short:
CMPL R8, $0x00000104
JLT repeat_three_match_nolit_encodeBlockAsm14BAvx_emit_copy_short
CMPL R8, $0x00010100
JLT repeat_four_match_nolit_encodeBlockAsm14BAvx_emit_copy_short
CMPL R8, $0x0100ffff
JLT repeat_five_match_nolit_encodeBlockAsm14BAvx_emit_copy_short
LEAQ -16842747(R8), R8
MOVW $0x001d, (DI)
MOVW $0xfffb, 2(DI)
MOVB $0xff, 4(DI)
ADDQ $0x05, DI
JMP emit_repeat_again_match_nolit_encodeBlockAsm14BAvx_emit_copy_short
repeat_five_match_nolit_encodeBlockAsm14BAvx_emit_copy_short:
LEAQ -65536(R8), R8
MOVQ R8, SI
MOVW $0x001d, (DI)
MOVW R8, 2(DI)
SARQ $0x10, SI
MOVB SI, 4(DI)
ADDQ $0x05, DI
JMP match_nolit_emitcopy_end_encodeBlockAsm14BAvx
repeat_four_match_nolit_encodeBlockAsm14BAvx_emit_copy_short:
LEAQ -256(R8), R8
MOVW $0x0019, (DI)
MOVW R8, 2(DI)
ADDQ $0x04, DI
JMP match_nolit_emitcopy_end_encodeBlockAsm14BAvx
repeat_three_match_nolit_encodeBlockAsm14BAvx_emit_copy_short:
LEAQ -4(R8), R8
MOVW $0x0015, (DI)
MOVB R8, 2(DI)
ADDQ $0x03, DI
JMP match_nolit_emitcopy_end_encodeBlockAsm14BAvx
repeat_two_match_nolit_encodeBlockAsm14BAvx_emit_copy_short:
SHLL $0x02, R8
ORL $0x01, R8
MOVW R8, (DI)
ADDQ $0x02, DI
JMP match_nolit_emitcopy_end_encodeBlockAsm14BAvx
repeat_two_offset_match_nolit_encodeBlockAsm14BAvx_emit_copy_short:
XORQ R9, R9
LEAQ 1(R9)(R8*4), R8
MOVB SI, 1(DI)
SARL $0x08, SI
SHLL $0x05, SI
ORL SI, R8
MOVB R8, (DI)
ADDQ $0x02, DI
JMP match_nolit_emitcopy_end_encodeBlockAsm14BAvx
two_byte_offset_short_match_nolit_encodeBlockAsm14BAvx:
CMPL R8, $0x0c
JGE emit_copy_three_match_nolit_encodeBlockAsm14BAvx
CMPL SI, $0x00000800
JGE emit_copy_three_match_nolit_encodeBlockAsm14BAvx
MOVB $0x01, DL
LEAQ -16(DX)(R8*4), R8
MOVB SI, 1(DI)
SHRL $0x08, SI
SHLL $0x05, SI
ORL SI, R8
MOVB R8, (DI)
ADDQ $0x02, DI
JMP match_nolit_emitcopy_end_encodeBlockAsm14BAvx
emit_copy_three_match_nolit_encodeBlockAsm14BAvx:
MOVB $0x02, DL
LEAQ -4(DX)(R8*4), R8
MOVB R8, (DI)
MOVW SI, 1(DI)
ADDQ $0x03, DI
match_nolit_emitcopy_end_encodeBlockAsm14BAvx:
MOVQ DI, dst_base+0(FP)
MOVL AX, 20(SP)
CMPL AX, 16(SP)
JGE emit_remainder_encodeBlockAsm14BAvx
CMPQ DI, (SP)
JL match_nolit_dst_ok_encodeBlockAsm14BAvx
MOVQ $0x00000000, ret+48(FP)
RET
match_nolit_dst_ok_encodeBlockAsm14BAvx:
MOVQ -2(CX)(AX*1), SI
MOVQ $0x0000cf1bbcdcbf9b, DI
MOVQ SI, R8
SHRQ $0x10, SI
MOVQ SI, R9
SHLQ $0x10, R8
IMULQ DI, R8
SHRQ $0x32, R8
SHLQ $0x10, R9
IMULQ DI, R9
SHRQ $0x32, R9
MOVL 32(SP)(R8*1), DI
MOVL 32(SP)(R9*1), DI
LEAQ -2(AX), DI
MOVL DI, 32(SP)(R8*1)
MOVL AX, 32(SP)(R9*1)
CMPL (CX)(R9*1), SI
JEQ match_nolit_loop_encodeBlockAsm14BAvx
INCL AX
JMP search_loop_encodeBlockAsm14BAvx
emit_remainder_encodeBlockAsm14BAvx:
MOVQ src_len+32(FP), AX
SUBL 20(SP), AX
MOVQ dst_base+0(FP), DX
LEAQ (DX)(AX*1), DX
CMPQ DX, (SP)
JL emit_remainder_ok_encodeBlockAsm14BAvx
MOVQ $0x00000000, ret+48(FP)
RET
emit_remainder_ok_encodeBlockAsm14BAvx:
MOVQ src_len+32(FP), AX
MOVL 20(SP), DX
CMPL DX, AX
JEQ emit_literal_skip_emit_remainder_encodeBlockAsm14BAvx
MOVL AX, BX
MOVL AX, 20(SP)
LEAQ (CX)(DX*1), AX
SUBL DX, BX
MOVQ dst_base+0(FP), CX
MOVQ BX, DX
SUBL $0x01, DX
JC emit_literal_done_emit_remainder_encodeBlockAsm14BAvx
CMPL DX, $0x3c
JLT one_byte_emit_remainder_encodeBlockAsm14BAvx
CMPL DX, $0x00000100
JLT two_bytes_emit_remainder_encodeBlockAsm14BAvx
CMPL DX, $0x00010000
JLT three_bytes_emit_remainder_encodeBlockAsm14BAvx
CMPL DX, $0x01000000
JLT four_bytes_emit_remainder_encodeBlockAsm14BAvx
MOVB $0xfc, (CX)
MOVL DX, 1(CX)
ADDQ $0x05, CX
JMP memmove_emit_remainder_encodeBlockAsm14BAvx
four_bytes_emit_remainder_encodeBlockAsm14BAvx:
MOVQ DX, SI
SHRL $0x10, SI
MOVB $0xf8, (CX)
MOVW DX, 1(CX)
MOVB SI, 3(CX)
ADDQ $0x04, CX
JMP memmove_emit_remainder_encodeBlockAsm14BAvx
three_bytes_emit_remainder_encodeBlockAsm14BAvx:
MOVB $0xf4, (CX)
MOVW DX, 1(CX)
ADDQ $0x03, CX
JMP memmove_emit_remainder_encodeBlockAsm14BAvx
two_bytes_emit_remainder_encodeBlockAsm14BAvx:
MOVB $0xf0, (CX)
MOVB DL, 1(CX)
ADDQ $0x02, CX
JMP memmove_emit_remainder_encodeBlockAsm14BAvx
one_byte_emit_remainder_encodeBlockAsm14BAvx:
SHLB $0x02, DL
MOVB DL, (CX)
ADDQ $0x01, CX
memmove_emit_remainder_encodeBlockAsm14BAvx:
LEAQ (CX)(BX*1), DX
NOP
emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_tail:
TESTQ BX, BX
JEQ emit_literal_done_emit_remainder_encodeBlockAsm14BAvx
CMPQ BX, $0x02
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_1or2
CMPQ BX, $0x04
JB emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_3
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_4
CMPQ BX, $0x08
JB emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_5through7
JE emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_8
CMPQ BX, $0x10
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_9through16
CMPQ BX, $0x20
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_17through32
CMPQ BX, $0x40
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_33through64
CMPQ BX, $0x80
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_65through128
CMPQ BX, $0x00000100
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_129through256
JMP emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_avxUnaligned
emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_1or2:
MOVB (AX), DL
MOVB -1(AX)(BX*1), SI
MOVB DL, (CX)
MOVB SI, -1(CX)(BX*1)
JMP emit_literal_done_emit_remainder_encodeBlockAsm14BAvx
emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_4:
MOVL (AX), DX
MOVL DX, (CX)
JMP emit_literal_done_emit_remainder_encodeBlockAsm14BAvx
emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_3:
MOVW (AX), DX
MOVB 2(AX), SI
MOVW DX, (CX)
MOVB SI, 2(CX)
JMP emit_literal_done_emit_remainder_encodeBlockAsm14BAvx
emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_5through7:
MOVL (AX), DX
MOVL -4(AX)(BX*1), SI
MOVL DX, (CX)
MOVL SI, -4(CX)(BX*1)
JMP emit_literal_done_emit_remainder_encodeBlockAsm14BAvx
emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_8:
MOVQ (AX), DX
MOVQ DX, (CX)
JMP emit_literal_done_emit_remainder_encodeBlockAsm14BAvx
emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_9through16:
MOVQ (AX), DX
MOVQ -8(AX)(BX*1), SI
MOVQ DX, (CX)
MOVQ SI, -8(CX)(BX*1)
JMP emit_literal_done_emit_remainder_encodeBlockAsm14BAvx
emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_17through32:
MOVOU (AX), X0
MOVOU -16(AX)(BX*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(BX*1)
JMP emit_literal_done_emit_remainder_encodeBlockAsm14BAvx
emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_33through64:
MOVOU (AX), X0
MOVOU 16(AX), X1
MOVOU -32(AX)(BX*1), X2
MOVOU -16(AX)(BX*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(BX*1)
MOVOU X3, -16(CX)(BX*1)
JMP emit_literal_done_emit_remainder_encodeBlockAsm14BAvx
emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_65through128:
MOVOU (AX), X0
MOVOU 16(AX), X1
MOVOU 32(AX), X2
MOVOU 48(AX), X3
MOVOU -64(AX)(BX*1), X12
MOVOU -48(AX)(BX*1), X13
MOVOU -32(AX)(BX*1), X14
MOVOU -16(AX)(BX*1), X15
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, 32(CX)
MOVOU X3, 48(CX)
MOVOU X12, -64(CX)(BX*1)
MOVOU X13, -48(CX)(BX*1)
MOVOU X14, -32(CX)(BX*1)
MOVOU X15, -16(CX)(BX*1)
JMP emit_literal_done_emit_remainder_encodeBlockAsm14BAvx
emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_129through256:
MOVOU (AX), X0
MOVOU 16(AX), X1
MOVOU 32(AX), X2
MOVOU 48(AX), X3
MOVOU 64(AX), X4
MOVOU 80(AX), X5
MOVOU 96(AX), X6
MOVOU 112(AX), X7
MOVOU -128(AX)(BX*1), X8
MOVOU -112(AX)(BX*1), X9
MOVOU -96(AX)(BX*1), X10
MOVOU -80(AX)(BX*1), X11
MOVOU -64(AX)(BX*1), X12
MOVOU -48(AX)(BX*1), X13
MOVOU -32(AX)(BX*1), X14
MOVOU -16(AX)(BX*1), X15
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, 32(CX)
MOVOU X3, 48(CX)
MOVOU X4, 64(CX)
MOVOU X5, 80(CX)
MOVOU X6, 96(CX)
MOVOU X7, 112(CX)
MOVOU X8, -128(CX)(BX*1)
MOVOU X9, -112(CX)(BX*1)
MOVOU X10, -96(CX)(BX*1)
MOVOU X11, -80(CX)(BX*1)
MOVOU X12, -64(CX)(BX*1)
MOVOU X13, -48(CX)(BX*1)
MOVOU X14, -32(CX)(BX*1)
MOVOU X15, -16(CX)(BX*1)
JMP emit_literal_done_emit_remainder_encodeBlockAsm14BAvx
emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_256through2048:
LEAQ -256(BX), BX
MOVOU (AX), X0
MOVOU 16(AX), X1
MOVOU 32(AX), X2
MOVOU 48(AX), X3
MOVOU 64(AX), X4
MOVOU 80(AX), X5
MOVOU 96(AX), X6
MOVOU 112(AX), X7
MOVOU 128(AX), X8
MOVOU 144(AX), X9
MOVOU 160(AX), X10
MOVOU 176(AX), X11
MOVOU 192(AX), X12
MOVOU 208(AX), X13
MOVOU 224(AX), X14
MOVOU 240(AX), X15
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, 32(CX)
MOVOU X3, 48(CX)
MOVOU X4, 64(CX)
MOVOU X5, 80(CX)
MOVOU X6, 96(CX)
MOVOU X7, 112(CX)
MOVOU X8, 128(CX)
MOVOU X9, 144(CX)
MOVOU X10, 160(CX)
MOVOU X11, 176(CX)
MOVOU X12, 192(CX)
MOVOU X13, 208(CX)
MOVOU X14, 224(CX)
MOVOU X15, 240(CX)
CMPQ BX, $0x00000100
LEAQ 256(AX), AX
LEAQ 256(CX), CX
JGE emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_256through2048
JMP emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_tail
emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_avxUnaligned:
LEAQ (AX)(BX*1), SI
MOVQ CX, R8
MOVOU -128(SI), X5
MOVOU -112(SI), X6
MOVQ $0x00000080, DX
ANDQ $0xffffffe0, CX
ADDQ $0x20, CX
MOVOU -96(SI), X7
MOVOU -80(SI), X8
MOVQ CX, DI
SUBQ R8, DI
MOVOU -64(SI), X9
MOVOU -48(SI), X10
SUBQ DI, BX
MOVOU -32(SI), X11
MOVOU -16(SI), X12
VMOVDQU (AX), Y4
ADDQ DI, AX
SUBQ DX, BX
emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_gobble_128_loop:
VMOVDQU (AX), Y0
VMOVDQU 32(AX), Y1
VMOVDQU 64(AX), Y2
VMOVDQU 96(AX), Y3
ADDQ DX, AX
VMOVDQA Y0, (CX)
VMOVDQA Y1, 32(CX)
VMOVDQA Y2, 64(CX)
VMOVDQA Y3, 96(CX)
ADDQ DX, CX
SUBQ DX, BX
JA emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_gobble_128_loop
ADDQ DX, BX
ADDQ CX, BX
VMOVDQU Y4, (R8)
VZEROUPPER
MOVOU X5, -128(BX)
MOVOU X6, -112(BX)
MOVOU X7, -96(BX)
MOVOU X8, -80(BX)
MOVOU X9, -64(BX)
MOVOU X10, -48(BX)
MOVOU X11, -32(BX)
MOVOU X12, -16(BX)
JMP emit_literal_done_emit_remainder_encodeBlockAsm14BAvx
MOVQ DX, CX
emit_literal_done_emit_remainder_encodeBlockAsm14BAvx:
MOVQ CX, dst_base+0(FP)
emit_literal_skip_emit_remainder_encodeBlockAsm14BAvx:
MOVQ 8(SP), AX
SUBQ dst_base+0(FP), AX
MOVQ AX, ret+48(FP)
RET
// func encodeBlockAsm12BAvx(dst []byte, src []byte) int
// Requires: AVX, SSE2
TEXT ·encodeBlockAsm12BAvx(SB), $4128-56
MOVQ $0x00000020, AX
LEAQ 32(SP), CX
PXOR X0, X0
zero_loop_encodeBlockAsm12BAvx:
MOVOU X0, (CX)
MOVOU X0, 16(CX)
MOVOU X0, 32(CX)
MOVOU X0, 48(CX)
MOVOU X0, 64(CX)
MOVOU X0, 80(CX)
MOVOU X0, 96(CX)
MOVOU X0, 112(CX)
ADDQ $0x80, CX
DECQ AX
JNZ zero_loop_encodeBlockAsm12BAvx
MOVL AX, 20(SP)
MOVQ src_len+32(FP), AX
LEAQ -5(AX), CX
LEAQ -8(AX), BX
SHRQ $0x05, AX
SUBL AX, CX
MOVL BX, 16(SP)
MOVQ dst_base+0(FP), AX
MOVQ AX, 8(SP)
LEAQ (AX)(CX*1), CX
MOVQ CX, (SP)
MOVL $0x00000001, AX
MOVL AX, 24(SP)
MOVQ src_base+24(FP), CX
search_loop_encodeBlockAsm12BAvx:
MOVQ (CX)(AX*1), SI
MOVL AX, BX
SUBL 20(SP), BX
SHRL $0x04, BX
LEAQ 4(AX)(BX*1), BX
MOVL 16(SP), DI
CMPL BX, DI
JGT emit_remainder_encodeBlockAsm12BAvx
MOVL BX, 28(SP)
MOVQ $0x0000cf1bbcdcbf9b, BX
MOVQ SI, R8
MOVQ SI, R9
SHRQ $0x08, R9
SHLQ $0x10, R8
IMULQ BX, R8
SHRQ $0x34, R8
SHLQ $0x10, R9
IMULQ BX, R9
SHRQ $0x34, R9
MOVL 32(SP)(R8*1), BX
MOVL 32(SP)(R9*1), DI
MOVL AX, 32(SP)(R8*1)
LEAL 1(AX), R8
MOVL R8, 32(SP)(R9*1)
MOVL AX, R8
SUBL 24(SP), R8
MOVL 1(CX)(R8*1), R10
MOVQ SI, R9
SHLQ $0x08, R9
CMPL R9, R10
JNE no_repeat_found_encodeBlockAsm12BAvx
LEAQ 1(AX), SI
MOVL 20(SP), BX
TESTL R8, R8
JZ repeat_extend_back_end_encodeBlockAsm12BAvx
repeat_extend_back_loop_encodeBlockAsm12BAvx:
CMPL SI, BX
JG repeat_extend_back_end_encodeBlockAsm12BAvx
MOVB -1(CX)(R8*1), DL
MOVB -1(CX)(SI*1), DI
CMPB DL, DI
JNE repeat_extend_back_end_encodeBlockAsm12BAvx
LEAQ -1(SI), SI
DECL R8
JZ repeat_extend_back_end_encodeBlockAsm12BAvx
JMP repeat_extend_back_loop_encodeBlockAsm12BAvx
repeat_extend_back_end_encodeBlockAsm12BAvx:
MOVL 20(SP), BX
CMPL BX, SI
JEQ emit_literal_skip_repeat_emit_encodeBlockAsm12BAvx
MOVL SI, DI
MOVL SI, 20(SP)
LEAQ (CX)(BX*1), R8
SUBL BX, DI
MOVQ dst_base+0(FP), BX
MOVQ DI, R9
SUBL $0x01, R9
JC emit_literal_done_repeat_emit_encodeBlockAsm12BAvx
CMPL R9, $0x3c
JLT one_byte_repeat_emit_encodeBlockAsm12BAvx
CMPL R9, $0x00000100
JLT two_bytes_repeat_emit_encodeBlockAsm12BAvx
CMPL R9, $0x00010000
JLT three_bytes_repeat_emit_encodeBlockAsm12BAvx
CMPL R9, $0x01000000
JLT four_bytes_repeat_emit_encodeBlockAsm12BAvx
MOVB $0xfc, (BX)
MOVL R9, 1(BX)
ADDQ $0x05, BX
JMP memmove_repeat_emit_encodeBlockAsm12BAvx
four_bytes_repeat_emit_encodeBlockAsm12BAvx:
MOVQ R9, R10
SHRL $0x10, R10
MOVB $0xf8, (BX)
MOVW R9, 1(BX)
MOVB R10, 3(BX)
ADDQ $0x04, BX
JMP memmove_repeat_emit_encodeBlockAsm12BAvx
three_bytes_repeat_emit_encodeBlockAsm12BAvx:
MOVB $0xf4, (BX)
MOVW R9, 1(BX)
ADDQ $0x03, BX
JMP memmove_repeat_emit_encodeBlockAsm12BAvx
two_bytes_repeat_emit_encodeBlockAsm12BAvx:
MOVB $0xf0, (BX)
MOVB R9, 1(BX)
ADDQ $0x02, BX
JMP memmove_repeat_emit_encodeBlockAsm12BAvx
one_byte_repeat_emit_encodeBlockAsm12BAvx:
SHLB $0x02, R9
MOVB R9, (BX)
ADDQ $0x01, BX
memmove_repeat_emit_encodeBlockAsm12BAvx:
LEAQ (BX)(DI*1), R9
NOP
emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_tail:
TESTQ DI, DI
JEQ emit_literal_done_repeat_emit_encodeBlockAsm12BAvx
CMPQ DI, $0x02
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_1or2
CMPQ DI, $0x04
JB emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_3
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_4
CMPQ DI, $0x08
JB emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_5through7
JE emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_8
CMPQ DI, $0x10
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_9through16
CMPQ DI, $0x20
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_17through32
CMPQ DI, $0x40
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_33through64
CMPQ DI, $0x80
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_65through128
CMPQ DI, $0x00000100
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_129through256
JMP emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_avxUnaligned
emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_1or2:
MOVB (R8), R9
MOVB -1(R8)(DI*1), R10
MOVB R9, (BX)
MOVB R10, -1(BX)(DI*1)
JMP emit_literal_done_repeat_emit_encodeBlockAsm12BAvx
emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_4:
MOVL (R8), R9
MOVL R9, (BX)
JMP emit_literal_done_repeat_emit_encodeBlockAsm12BAvx
emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_3:
MOVW (R8), R9
MOVB 2(R8), R10
MOVW R9, (BX)
MOVB R10, 2(BX)
JMP emit_literal_done_repeat_emit_encodeBlockAsm12BAvx
emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_5through7:
MOVL (R8), R9
MOVL -4(R8)(DI*1), R10
MOVL R9, (BX)
MOVL R10, -4(BX)(DI*1)
JMP emit_literal_done_repeat_emit_encodeBlockAsm12BAvx
emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_8:
MOVQ (R8), R9
MOVQ R9, (BX)
JMP emit_literal_done_repeat_emit_encodeBlockAsm12BAvx
emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_9through16:
MOVQ (R8), R9
MOVQ -8(R8)(DI*1), R10
MOVQ R9, (BX)
MOVQ R10, -8(BX)(DI*1)
JMP emit_literal_done_repeat_emit_encodeBlockAsm12BAvx
emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_17through32:
MOVOU (R8), X0
MOVOU -16(R8)(DI*1), X1
MOVOU X0, (BX)
MOVOU X1, -16(BX)(DI*1)
JMP emit_literal_done_repeat_emit_encodeBlockAsm12BAvx
emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_33through64:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU -32(R8)(DI*1), X2
MOVOU -16(R8)(DI*1), X3
MOVOU X0, (BX)
MOVOU X1, 16(BX)
MOVOU X2, -32(BX)(DI*1)
MOVOU X3, -16(BX)(DI*1)
JMP emit_literal_done_repeat_emit_encodeBlockAsm12BAvx
emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_65through128:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU 32(R8), X2
MOVOU 48(R8), X3
MOVOU -64(R8)(DI*1), X12
MOVOU -48(R8)(DI*1), X13
MOVOU -32(R8)(DI*1), X14
MOVOU -16(R8)(DI*1), X15
MOVOU X0, (BX)
MOVOU X1, 16(BX)
MOVOU X2, 32(BX)
MOVOU X3, 48(BX)
MOVOU X12, -64(BX)(DI*1)
MOVOU X13, -48(BX)(DI*1)
MOVOU X14, -32(BX)(DI*1)
MOVOU X15, -16(BX)(DI*1)
JMP emit_literal_done_repeat_emit_encodeBlockAsm12BAvx
emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_129through256:
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU 32(R8), X2
MOVOU 48(R8), X3
MOVOU 64(R8), X4
MOVOU 80(R8), X5
MOVOU 96(R8), X6
MOVOU 112(R8), X7
MOVOU -128(R8)(DI*1), X8
MOVOU -112(R8)(DI*1), X9
MOVOU -96(R8)(DI*1), X10
MOVOU -80(R8)(DI*1), X11
MOVOU -64(R8)(DI*1), X12
MOVOU -48(R8)(DI*1), X13
MOVOU -32(R8)(DI*1), X14
MOVOU -16(R8)(DI*1), X15
MOVOU X0, (BX)
MOVOU X1, 16(BX)
MOVOU X2, 32(BX)
MOVOU X3, 48(BX)
MOVOU X4, 64(BX)
MOVOU X5, 80(BX)
MOVOU X6, 96(BX)
MOVOU X7, 112(BX)
MOVOU X8, -128(BX)(DI*1)
MOVOU X9, -112(BX)(DI*1)
MOVOU X10, -96(BX)(DI*1)
MOVOU X11, -80(BX)(DI*1)
MOVOU X12, -64(BX)(DI*1)
MOVOU X13, -48(BX)(DI*1)
MOVOU X14, -32(BX)(DI*1)
MOVOU X15, -16(BX)(DI*1)
JMP emit_literal_done_repeat_emit_encodeBlockAsm12BAvx
emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_256through2048:
LEAQ -256(DI), DI
MOVOU (R8), X0
MOVOU 16(R8), X1
MOVOU 32(R8), X2
MOVOU 48(R8), X3
MOVOU 64(R8), X4
MOVOU 80(R8), X5
MOVOU 96(R8), X6
MOVOU 112(R8), X7
MOVOU 128(R8), X8
MOVOU 144(R8), X9
MOVOU 160(R8), X10
MOVOU 176(R8), X11
MOVOU 192(R8), X12
MOVOU 208(R8), X13
MOVOU 224(R8), X14
MOVOU 240(R8), X15
MOVOU X0, (BX)
MOVOU X1, 16(BX)
MOVOU X2, 32(BX)
MOVOU X3, 48(BX)
MOVOU X4, 64(BX)
MOVOU X5, 80(BX)
MOVOU X6, 96(BX)
MOVOU X7, 112(BX)
MOVOU X8, 128(BX)
MOVOU X9, 144(BX)
MOVOU X10, 160(BX)
MOVOU X11, 176(BX)
MOVOU X12, 192(BX)
MOVOU X13, 208(BX)
MOVOU X14, 224(BX)
MOVOU X15, 240(BX)
CMPQ DI, $0x00000100
LEAQ 256(R8), R8
LEAQ 256(BX), BX
JGE emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_256through2048
JMP emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_tail
emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_avxUnaligned:
LEAQ (R8)(DI*1), R10
MOVQ BX, R12
MOVOU -128(R10), X5
MOVOU -112(R10), X6
MOVQ $0x00000080, R9
ANDQ $0xffffffe0, BX
ADDQ $0x20, BX
MOVOU -96(R10), X7
MOVOU -80(R10), X8
MOVQ BX, R11
SUBQ R12, R11
MOVOU -64(R10), X9
MOVOU -48(R10), X10
SUBQ R11, DI
MOVOU -32(R10), X11
MOVOU -16(R10), X12
VMOVDQU (R8), Y4
ADDQ R11, R8
SUBQ R9, DI
emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_gobble_128_loop:
VMOVDQU (R8), Y0
VMOVDQU 32(R8), Y1
VMOVDQU 64(R8), Y2
VMOVDQU 96(R8), Y3
ADDQ R9, R8
VMOVDQA Y0, (BX)
VMOVDQA Y1, 32(BX)
VMOVDQA Y2, 64(BX)
VMOVDQA Y3, 96(BX)
ADDQ R9, BX
SUBQ R9, DI
JA emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_gobble_128_loop
ADDQ R9, DI
ADDQ BX, DI
VMOVDQU Y4, (R12)
VZEROUPPER
MOVOU X5, -128(DI)
MOVOU X6, -112(DI)
MOVOU X7, -96(DI)
MOVOU X8, -80(DI)
MOVOU X9, -64(DI)
MOVOU X10, -48(DI)
MOVOU X11, -32(DI)
MOVOU X12, -16(DI)
JMP emit_literal_done_repeat_emit_encodeBlockAsm12BAvx
MOVQ R9, BX
emit_literal_done_repeat_emit_encodeBlockAsm12BAvx:
MOVQ BX, dst_base+0(FP)
emit_literal_skip_repeat_emit_encodeBlockAsm12BAvx:
ADDL $0x05, AX
MOVL AX, BX
SUBL 24(SP), BX
MOVL 16(SP), BX
SUBL AX, BX
XORQ R8, R8
CMPQ BX, $0x08
JL matchlen_single_repeat_extend
matchlen_loopback_repeat_extend:
MOVQ (CX)(R8*1), DI
XORQ (CX)(R8*1), DI
TESTQ DI, DI
JZ matchlen_loop_repeat_extend
BSFQ DI, DI
SARQ $0x03, DI
LEAQ (R8)(DI*1), R8
JMP repeat_extend_forward_end_encodeBlockAsm12BAvx
matchlen_loop_repeat_extend:
LEAQ -8(BX), BX
LEAQ 8(R8), R8
CMPQ BX, $0x08
JGE matchlen_loopback_repeat_extend
matchlen_single_repeat_extend:
TESTQ BX, BX
JZ repeat_extend_forward_end_encodeBlockAsm12BAvx
matchlen_single_loopback_repeat_extend:
MOVB (CX)(R8*1), DI
CMPB (CX)(R8*1), DI
JNE repeat_extend_forward_end_encodeBlockAsm12BAvx
LEAQ 1(R8), R8
DECQ BX
JNZ matchlen_single_loopback_repeat_extend
repeat_extend_forward_end_encodeBlockAsm12BAvx:
ADDL R8, AX
MOVL AX, BX
SUBL SI, BX
MOVL 24(SP), SI
MOVQ dst_base+0(FP), DI
MOVL 20(SP), R8
TESTL R8, R8
JZ repeat_as_copy_encodeBlockAsm12BAvx
emit_repeat_again_match_repeat_:
MOVQ BX, R8
LEAQ -4(BX), BX
CMPL R8, $0x08
JLE repeat_two_match_repeat_
CMPL R8, $0x0c
JGE cant_repeat_two_offset_match_repeat_
CMPL SI, $0x00000800
JLT repeat_two_offset_match_repeat_
cant_repeat_two_offset_match_repeat_:
CMPL BX, $0x00000104
JLT repeat_three_match_repeat_
CMPL BX, $0x00010100
JLT repeat_four_match_repeat_
CMPL BX, $0x0100ffff
JLT repeat_five_match_repeat_
LEAQ -16842747(BX), BX
MOVW $0x001d, (DI)
MOVW $0xfffb, 2(DI)
MOVB $0xff, 4(DI)
ADDQ $0x05, DI
JMP emit_repeat_again_match_repeat_
repeat_five_match_repeat_:
LEAQ -65536(BX), BX
MOVQ BX, SI
MOVW $0x001d, (DI)
MOVW BX, 2(DI)
SARQ $0x10, SI
MOVB SI, 4(DI)
ADDQ $0x05, DI
JMP repeat_end_emit_encodeBlockAsm12BAvx
repeat_four_match_repeat_:
LEAQ -256(BX), BX
MOVW $0x0019, (DI)
MOVW BX, 2(DI)
ADDQ $0x04, DI
JMP repeat_end_emit_encodeBlockAsm12BAvx
repeat_three_match_repeat_:
LEAQ -4(BX), BX
MOVW $0x0015, (DI)
MOVB BL, 2(DI)
ADDQ $0x03, DI
JMP repeat_end_emit_encodeBlockAsm12BAvx
repeat_two_match_repeat_:
SHLL $0x02, BX
ORL $0x01, BX
MOVW BX, (DI)
ADDQ $0x02, DI
JMP repeat_end_emit_encodeBlockAsm12BAvx
repeat_two_offset_match_repeat_:
XORQ R8, R8
LEAQ 1(R8)(BX*4), BX
MOVB SI, 1(DI)
SARL $0x08, SI
SHLL $0x05, SI
ORL SI, BX
MOVB BL, (DI)
ADDQ $0x02, DI
JMP repeat_end_emit_encodeBlockAsm12BAvx
repeat_as_copy_encodeBlockAsm12BAvx:
CMPL SI, $0x00010000
JL two_byte_offset_repeat_as_copy_encodeBlockAsm12BAvx
CMPL BX, $0x40
JLE four_bytes_remain_repeat_as_copy_encodeBlockAsm12BAvx
MOVB $0xff, (DI)
MOVD SI, 1(DI)
LEAQ -64(BX), BX
ADDQ $0x05, DI
CMPL BX, $0x04
JL four_bytes_remain_repeat_as_copy_encodeBlockAsm12BAvx
emit_repeat_again_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy:
MOVQ BX, R8
LEAQ -4(BX), BX
CMPL R8, $0x08
JLE repeat_two_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy
CMPL R8, $0x0c
JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy
CMPL SI, $0x00000800
JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy
cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy:
CMPL BX, $0x00000104
JLT repeat_three_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy
CMPL BX, $0x00010100
JLT repeat_four_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy
CMPL BX, $0x0100ffff
JLT repeat_five_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy
LEAQ -16842747(BX), BX
MOVW $0x001d, (DI)
MOVW $0xfffb, 2(DI)
MOVB $0xff, 4(DI)
ADDQ $0x05, DI
JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy
repeat_five_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy:
LEAQ -65536(BX), BX
MOVQ BX, SI
MOVW $0x001d, (DI)
MOVW BX, 2(DI)
SARQ $0x10, SI
MOVB SI, 4(DI)
ADDQ $0x05, DI
JMP repeat_end_emit_encodeBlockAsm12BAvx
repeat_four_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy:
LEAQ -256(BX), BX
MOVW $0x0019, (DI)
MOVW BX, 2(DI)
ADDQ $0x04, DI
JMP repeat_end_emit_encodeBlockAsm12BAvx
repeat_three_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy:
LEAQ -4(BX), BX
MOVW $0x0015, (DI)
MOVB BL, 2(DI)
ADDQ $0x03, DI
JMP repeat_end_emit_encodeBlockAsm12BAvx
repeat_two_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy:
SHLL $0x02, BX
ORL $0x01, BX
MOVW BX, (DI)
ADDQ $0x02, DI
JMP repeat_end_emit_encodeBlockAsm12BAvx
repeat_two_offset_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy:
XORQ R8, R8
LEAQ 1(R8)(BX*4), BX
MOVB SI, 1(DI)
SARL $0x08, SI
SHLL $0x05, SI
ORL SI, BX
MOVB BL, (DI)
ADDQ $0x02, DI
JMP repeat_end_emit_encodeBlockAsm12BAvx
four_bytes_remain_repeat_as_copy_encodeBlockAsm12BAvx:
TESTL BX, BX
JZ repeat_end_emit_encodeBlockAsm12BAvx
MOVB $0x03, DL
LEAQ -4(DX)(BX*4), BX
MOVB BL, (DI)
MOVD SI, 1(DI)
ADDQ $0x05, DI
JMP repeat_end_emit_encodeBlockAsm12BAvx
two_byte_offset_repeat_as_copy_encodeBlockAsm12BAvx:
CMPL BX, $0x40
JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm12BAvx
MOVB $0xee, (DI)
MOVW SI, 1(DI)
LEAQ -60(BX), BX
ADDQ $0x03, DI
emit_repeat_again_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short:
MOVQ BX, R8
LEAQ -4(BX), BX
CMPL R8, $0x08
JLE repeat_two_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short
CMPL R8, $0x0c
JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short
CMPL SI, $0x00000800
JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short
cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short:
CMPL BX, $0x00000104
JLT repeat_three_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short
CMPL BX, $0x00010100
JLT repeat_four_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short
CMPL BX, $0x0100ffff
JLT repeat_five_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short
LEAQ -16842747(BX), BX
MOVW $0x001d, (DI)
MOVW $0xfffb, 2(DI)
MOVB $0xff, 4(DI)
ADDQ $0x05, DI
JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short
repeat_five_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short:
LEAQ -65536(BX), BX
MOVQ BX, SI
MOVW $0x001d, (DI)
MOVW BX, 2(DI)
SARQ $0x10, SI
MOVB SI, 4(DI)
ADDQ $0x05, DI
JMP repeat_end_emit_encodeBlockAsm12BAvx
repeat_four_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short:
LEAQ -256(BX), BX
MOVW $0x0019, (DI)
MOVW BX, 2(DI)
ADDQ $0x04, DI
JMP repeat_end_emit_encodeBlockAsm12BAvx
repeat_three_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short:
LEAQ -4(BX), BX
MOVW $0x0015, (DI)
MOVB BL, 2(DI)
ADDQ $0x03, DI
JMP repeat_end_emit_encodeBlockAsm12BAvx
repeat_two_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short:
SHLL $0x02, BX
ORL $0x01, BX
MOVW BX, (DI)
ADDQ $0x02, DI
JMP repeat_end_emit_encodeBlockAsm12BAvx
repeat_two_offset_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short:
XORQ R8, R8
LEAQ 1(R8)(BX*4), BX
MOVB SI, 1(DI)
SARL $0x08, SI
SHLL $0x05, SI
ORL SI, BX
MOVB BL, (DI)
ADDQ $0x02, DI
JMP repeat_end_emit_encodeBlockAsm12BAvx
two_byte_offset_short_repeat_as_copy_encodeBlockAsm12BAvx:
CMPL BX, $0x0c
JGE emit_copy_three_repeat_as_copy_encodeBlockAsm12BAvx
CMPL SI, $0x00000800
JGE emit_copy_three_repeat_as_copy_encodeBlockAsm12BAvx
MOVB $0x01, DL
LEAQ -16(DX)(BX*4), BX
MOVB SI, 1(DI)
SHRL $0x08, SI
SHLL $0x05, SI
ORL SI, BX
MOVB BL, (DI)
ADDQ $0x02, DI
JMP repeat_end_emit_encodeBlockAsm12BAvx
emit_copy_three_repeat_as_copy_encodeBlockAsm12BAvx:
MOVB $0x02, DL
LEAQ -4(DX)(BX*4), BX
MOVB BL, (DI)
MOVW SI, 1(DI)
ADDQ $0x03, DI
repeat_end_emit_encodeBlockAsm12BAvx:
MOVQ DI, dst_base+0(FP)
MOVL 16(SP), BX
CMPL AX, BX
JGT emit_remainder_encodeBlockAsm12BAvx
JMP search_loop_encodeBlockAsm12BAvx
no_repeat_found_encodeBlockAsm12BAvx:
MOVQ $0x0000cf1bbcdcbf9b, R9
MOVQ SI, R8
SHRQ $0x10, R8
SHLQ $0x10, R8
IMULQ R9, R8
SHRQ $0x34, R8
CMPL (CX)(BX*1), SI
SHRQ $0x08, SI
JEQ candidate_match_encodeBlockAsm12BAvx
MOVL 32(SP)(R8*1), BX
CMPL (CX)(DI*1), SI
JEQ candidate2_match_encodeBlockAsm12BAvx
LEAQ 2(AX), DI
MOVL DI, 32(SP)(R8*1)
SHRQ $0x08, SI
CMPL (CX)(BX*1), SI
JEQ candidate3_match_encodeBlockAsm12BAvx
MOVL 28(SP), AX
JMP search_loop_encodeBlockAsm12BAvx
candidate3_match_encodeBlockAsm12BAvx:
ADDL $0x02, AX
JMP candidate_match_encodeBlockAsm12BAvx
candidate2_match_encodeBlockAsm12BAvx:
LEAQ -2(AX), BX
MOVL BX, 32(SP)(R8*1)
INCL AX
MOVL DI, BX
candidate_match_encodeBlockAsm12BAvx:
MOVL 20(SP), SI
TESTL BX, BX
JZ match_extend_back_end_encodeBlockAsm12BAvx
match_extend_back_loop_encodeBlockAsm12BAvx:
CMPL AX, SI
JG match_extend_back_end_encodeBlockAsm12BAvx
MOVB -1(CX)(BX*1), DL
MOVB -1(CX)(AX*1), DI
CMPB DL, DI
JNE match_extend_back_end_encodeBlockAsm12BAvx
LEAL -1(AX), AX
DECL BX
JZ match_extend_back_end_encodeBlockAsm12BAvx
JMP match_extend_back_loop_encodeBlockAsm12BAvx
match_extend_back_end_encodeBlockAsm12BAvx:
MOVL AX, SI
SUBL 20(SP), SI
LEAQ dst_base+0(FP)(SI*1), SI
CMPQ SI, (SP)
JL match_dst_size_check_encodeBlockAsm12BAvx
MOVQ $0x00000000, ret+48(FP)
RET
match_dst_size_check_encodeBlockAsm12BAvx:
MOVL BX, SI
MOVL 20(SP), DI
CMPL DI, SI
JEQ emit_literal_skip_match_emit_encodeBlockAsm12BAvx
MOVL SI, R8
MOVL SI, 20(SP)
LEAQ (CX)(DI*1), SI
SUBL DI, R8
MOVQ dst_base+0(FP), DI
MOVQ R8, R9
SUBL $0x01, R9
JC emit_literal_done_match_emit_encodeBlockAsm12BAvx
CMPL R9, $0x3c
JLT one_byte_match_emit_encodeBlockAsm12BAvx
CMPL R9, $0x00000100
JLT two_bytes_match_emit_encodeBlockAsm12BAvx
CMPL R9, $0x00010000
JLT three_bytes_match_emit_encodeBlockAsm12BAvx
CMPL R9, $0x01000000
JLT four_bytes_match_emit_encodeBlockAsm12BAvx
MOVB $0xfc, (DI)
MOVL R9, 1(DI)
ADDQ $0x05, DI
JMP memmove_match_emit_encodeBlockAsm12BAvx
four_bytes_match_emit_encodeBlockAsm12BAvx:
MOVQ R9, R10
SHRL $0x10, R10
MOVB $0xf8, (DI)
MOVW R9, 1(DI)
MOVB R10, 3(DI)
ADDQ $0x04, DI
JMP memmove_match_emit_encodeBlockAsm12BAvx
three_bytes_match_emit_encodeBlockAsm12BAvx:
MOVB $0xf4, (DI)
MOVW R9, 1(DI)
ADDQ $0x03, DI
JMP memmove_match_emit_encodeBlockAsm12BAvx
two_bytes_match_emit_encodeBlockAsm12BAvx:
MOVB $0xf0, (DI)
MOVB R9, 1(DI)
ADDQ $0x02, DI
JMP memmove_match_emit_encodeBlockAsm12BAvx
one_byte_match_emit_encodeBlockAsm12BAvx:
SHLB $0x02, R9
MOVB R9, (DI)
ADDQ $0x01, DI
memmove_match_emit_encodeBlockAsm12BAvx:
LEAQ (DI)(R8*1), R9
NOP
emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_tail:
TESTQ R8, R8
JEQ emit_literal_done_match_emit_encodeBlockAsm12BAvx
CMPQ R8, $0x02
JBE emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_1or2
CMPQ R8, $0x04
JB emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_3
JBE emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_4
CMPQ R8, $0x08
JB emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_5through7
JE emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_8
CMPQ R8, $0x10
JBE emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_9through16
CMPQ R8, $0x20
JBE emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_17through32
CMPQ R8, $0x40
JBE emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_33through64
CMPQ R8, $0x80
JBE emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_65through128
CMPQ R8, $0x00000100
JBE emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_129through256
JMP emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_avxUnaligned
emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_1or2:
MOVB (SI), R9
MOVB -1(SI)(R8*1), R10
MOVB R9, (DI)
MOVB R10, -1(DI)(R8*1)
JMP emit_literal_done_match_emit_encodeBlockAsm12BAvx
emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_4:
MOVL (SI), R9
MOVL R9, (DI)
JMP emit_literal_done_match_emit_encodeBlockAsm12BAvx
emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_3:
MOVW (SI), R9
MOVB 2(SI), R10
MOVW R9, (DI)
MOVB R10, 2(DI)
JMP emit_literal_done_match_emit_encodeBlockAsm12BAvx
emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_5through7:
MOVL (SI), R9
MOVL -4(SI)(R8*1), R10
MOVL R9, (DI)
MOVL R10, -4(DI)(R8*1)
JMP emit_literal_done_match_emit_encodeBlockAsm12BAvx
emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_8:
MOVQ (SI), R9
MOVQ R9, (DI)
JMP emit_literal_done_match_emit_encodeBlockAsm12BAvx
emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_9through16:
MOVQ (SI), R9
MOVQ -8(SI)(R8*1), R10
MOVQ R9, (DI)
MOVQ R10, -8(DI)(R8*1)
JMP emit_literal_done_match_emit_encodeBlockAsm12BAvx
emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_17through32:
MOVOU (SI), X0
MOVOU -16(SI)(R8*1), X1
MOVOU X0, (DI)
MOVOU X1, -16(DI)(R8*1)
JMP emit_literal_done_match_emit_encodeBlockAsm12BAvx
emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_33through64:
MOVOU (SI), X0
MOVOU 16(SI), X1
MOVOU -32(SI)(R8*1), X2
MOVOU -16(SI)(R8*1), X3
MOVOU X0, (DI)
MOVOU X1, 16(DI)
MOVOU X2, -32(DI)(R8*1)
MOVOU X3, -16(DI)(R8*1)
JMP emit_literal_done_match_emit_encodeBlockAsm12BAvx
emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_65through128:
MOVOU (SI), X0
MOVOU 16(SI), X1
MOVOU 32(SI), X2
MOVOU 48(SI), X3
MOVOU -64(SI)(R8*1), X12
MOVOU -48(SI)(R8*1), X13
MOVOU -32(SI)(R8*1), X14
MOVOU -16(SI)(R8*1), X15
MOVOU X0, (DI)
MOVOU X1, 16(DI)
MOVOU X2, 32(DI)
MOVOU X3, 48(DI)
MOVOU X12, -64(DI)(R8*1)
MOVOU X13, -48(DI)(R8*1)
MOVOU X14, -32(DI)(R8*1)
MOVOU X15, -16(DI)(R8*1)
JMP emit_literal_done_match_emit_encodeBlockAsm12BAvx
emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_129through256:
MOVOU (SI), X0
MOVOU 16(SI), X1
MOVOU 32(SI), X2
MOVOU 48(SI), X3
MOVOU 64(SI), X4
MOVOU 80(SI), X5
MOVOU 96(SI), X6
MOVOU 112(SI), X7
MOVOU -128(SI)(R8*1), X8
MOVOU -112(SI)(R8*1), X9
MOVOU -96(SI)(R8*1), X10
MOVOU -80(SI)(R8*1), X11
MOVOU -64(SI)(R8*1), X12
MOVOU -48(SI)(R8*1), X13
MOVOU -32(SI)(R8*1), X14
MOVOU -16(SI)(R8*1), X15
MOVOU X0, (DI)
MOVOU X1, 16(DI)
MOVOU X2, 32(DI)
MOVOU X3, 48(DI)
MOVOU X4, 64(DI)
MOVOU X5, 80(DI)
MOVOU X6, 96(DI)
MOVOU X7, 112(DI)
MOVOU X8, -128(DI)(R8*1)
MOVOU X9, -112(DI)(R8*1)
MOVOU X10, -96(DI)(R8*1)
MOVOU X11, -80(DI)(R8*1)
MOVOU X12, -64(DI)(R8*1)
MOVOU X13, -48(DI)(R8*1)
MOVOU X14, -32(DI)(R8*1)
MOVOU X15, -16(DI)(R8*1)
JMP emit_literal_done_match_emit_encodeBlockAsm12BAvx
emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_256through2048:
LEAQ -256(R8), R8
MOVOU (SI), X0
MOVOU 16(SI), X1
MOVOU 32(SI), X2
MOVOU 48(SI), X3
MOVOU 64(SI), X4
MOVOU 80(SI), X5
MOVOU 96(SI), X6
MOVOU 112(SI), X7
MOVOU 128(SI), X8
MOVOU 144(SI), X9
MOVOU 160(SI), X10
MOVOU 176(SI), X11
MOVOU 192(SI), X12
MOVOU 208(SI), X13
MOVOU 224(SI), X14
MOVOU 240(SI), X15
MOVOU X0, (DI)
MOVOU X1, 16(DI)
MOVOU X2, 32(DI)
MOVOU X3, 48(DI)
MOVOU X4, 64(DI)
MOVOU X5, 80(DI)
MOVOU X6, 96(DI)
MOVOU X7, 112(DI)
MOVOU X8, 128(DI)
MOVOU X9, 144(DI)
MOVOU X10, 160(DI)
MOVOU X11, 176(DI)
MOVOU X12, 192(DI)
MOVOU X13, 208(DI)
MOVOU X14, 224(DI)
MOVOU X15, 240(DI)
CMPQ R8, $0x00000100
LEAQ 256(SI), SI
LEAQ 256(DI), DI
JGE emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_256through2048
JMP emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_tail
emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_avxUnaligned:
LEAQ (SI)(R8*1), R10
MOVQ DI, R12
MOVOU -128(R10), X5
MOVOU -112(R10), X6
MOVQ $0x00000080, R9
ANDQ $0xffffffe0, DI
ADDQ $0x20, DI
MOVOU -96(R10), X7
MOVOU -80(R10), X8
MOVQ DI, R11
SUBQ R12, R11
MOVOU -64(R10), X9
MOVOU -48(R10), X10
SUBQ R11, R8
MOVOU -32(R10), X11
MOVOU -16(R10), X12
VMOVDQU (SI), Y4
ADDQ R11, SI
SUBQ R9, R8
emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_gobble_128_loop:
VMOVDQU (SI), Y0
VMOVDQU 32(SI), Y1
VMOVDQU 64(SI), Y2
VMOVDQU 96(SI), Y3
ADDQ R9, SI
VMOVDQA Y0, (DI)
VMOVDQA Y1, 32(DI)
VMOVDQA Y2, 64(DI)
VMOVDQA Y3, 96(DI)
ADDQ R9, DI
SUBQ R9, R8
JA emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_gobble_128_loop
ADDQ R9, R8
ADDQ DI, R8
VMOVDQU Y4, (R12)
VZEROUPPER
MOVOU X5, -128(R8)
MOVOU X6, -112(R8)
MOVOU X7, -96(R8)
MOVOU X8, -80(R8)
MOVOU X9, -64(R8)
MOVOU X10, -48(R8)
MOVOU X11, -32(R8)
MOVOU X12, -16(R8)
JMP emit_literal_done_match_emit_encodeBlockAsm12BAvx
MOVQ R9, DI
emit_literal_done_match_emit_encodeBlockAsm12BAvx:
MOVQ DI, dst_base+0(FP)
emit_literal_skip_match_emit_encodeBlockAsm12BAvx:
NOP
match_nolit_loop_encodeBlockAsm12BAvx:
MOVL AX, SI
MOVL AX, SI
SUBL BX, SI
MOVL SI, 24(SP)
ADDL $0x04, AX
ADDL $0x04, BX
MOVL 16(SP), SI
SUBL AX, SI
XORQ R8, R8
CMPQ SI, $0x08
JL matchlen_single_match_nolit_encodeBlockAsm12BAvx
matchlen_loopback_match_nolit_encodeBlockAsm12BAvx:
MOVQ (CX)(R8*1), DI
XORQ (CX)(R8*1), DI
TESTQ DI, DI
JZ matchlen_loop_match_nolit_encodeBlockAsm12BAvx
BSFQ DI, DI
SARQ $0x03, DI
LEAQ (R8)(DI*1), R8
JMP match_nolit_end_encodeBlockAsm12BAvx
matchlen_loop_match_nolit_encodeBlockAsm12BAvx:
LEAQ -8(SI), SI
LEAQ 8(R8), R8
CMPQ SI, $0x08
JGE matchlen_loopback_match_nolit_encodeBlockAsm12BAvx
matchlen_single_match_nolit_encodeBlockAsm12BAvx:
TESTQ SI, SI
JZ match_nolit_end_encodeBlockAsm12BAvx
matchlen_single_loopback_match_nolit_encodeBlockAsm12BAvx:
MOVB (CX)(R8*1), DI
CMPB (CX)(R8*1), DI
JNE match_nolit_end_encodeBlockAsm12BAvx
LEAQ 1(R8), R8
DECQ SI
JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm12BAvx
match_nolit_end_encodeBlockAsm12BAvx:
MOVL 24(SP), SI
ADDQ $0x04, R8
MOVQ dst_base+0(FP), DI
ADDL R8, AX
CMPL SI, $0x00010000
JL two_byte_offset_match_nolit_encodeBlockAsm12BAvx
CMPL R8, $0x40
JLE four_bytes_remain_match_nolit_encodeBlockAsm12BAvx
MOVB $0xff, (DI)
MOVD SI, 1(DI)
LEAQ -64(R8), R8
ADDQ $0x05, DI
CMPL R8, $0x04
JL four_bytes_remain_match_nolit_encodeBlockAsm12BAvx
emit_repeat_again_match_nolit_encodeBlockAsm12BAvx_emit_copy:
MOVQ R8, R9
LEAQ -4(R8), R8
CMPL R9, $0x08
JLE repeat_two_match_nolit_encodeBlockAsm12BAvx_emit_copy
CMPL R9, $0x0c
JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm12BAvx_emit_copy
CMPL SI, $0x00000800
JLT repeat_two_offset_match_nolit_encodeBlockAsm12BAvx_emit_copy
cant_repeat_two_offset_match_nolit_encodeBlockAsm12BAvx_emit_copy:
CMPL R8, $0x00000104
JLT repeat_three_match_nolit_encodeBlockAsm12BAvx_emit_copy
CMPL R8, $0x00010100
JLT repeat_four_match_nolit_encodeBlockAsm12BAvx_emit_copy
CMPL R8, $0x0100ffff
JLT repeat_five_match_nolit_encodeBlockAsm12BAvx_emit_copy
LEAQ -16842747(R8), R8
MOVW $0x001d, (DI)
MOVW $0xfffb, 2(DI)
MOVB $0xff, 4(DI)
ADDQ $0x05, DI
JMP emit_repeat_again_match_nolit_encodeBlockAsm12BAvx_emit_copy
repeat_five_match_nolit_encodeBlockAsm12BAvx_emit_copy:
LEAQ -65536(R8), R8
MOVQ R8, SI
MOVW $0x001d, (DI)
MOVW R8, 2(DI)
SARQ $0x10, SI
MOVB SI, 4(DI)
ADDQ $0x05, DI
JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx
repeat_four_match_nolit_encodeBlockAsm12BAvx_emit_copy:
LEAQ -256(R8), R8
MOVW $0x0019, (DI)
MOVW R8, 2(DI)
ADDQ $0x04, DI
JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx
repeat_three_match_nolit_encodeBlockAsm12BAvx_emit_copy:
LEAQ -4(R8), R8
MOVW $0x0015, (DI)
MOVB R8, 2(DI)
ADDQ $0x03, DI
JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx
repeat_two_match_nolit_encodeBlockAsm12BAvx_emit_copy:
SHLL $0x02, R8
ORL $0x01, R8
MOVW R8, (DI)
ADDQ $0x02, DI
JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx
repeat_two_offset_match_nolit_encodeBlockAsm12BAvx_emit_copy:
XORQ R9, R9
LEAQ 1(R9)(R8*4), R8
MOVB SI, 1(DI)
SARL $0x08, SI
SHLL $0x05, SI
ORL SI, R8
MOVB R8, (DI)
ADDQ $0x02, DI
JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx
four_bytes_remain_match_nolit_encodeBlockAsm12BAvx:
TESTL R8, R8
JZ match_nolit_emitcopy_end_encodeBlockAsm12BAvx
MOVB $0x03, DL
LEAQ -4(DX)(R8*4), R8
MOVB R8, (DI)
MOVD SI, 1(DI)
ADDQ $0x05, DI
JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx
two_byte_offset_match_nolit_encodeBlockAsm12BAvx:
CMPL R8, $0x40
JLE two_byte_offset_short_match_nolit_encodeBlockAsm12BAvx
MOVB $0xee, (DI)
MOVW SI, 1(DI)
LEAQ -60(R8), R8
ADDQ $0x03, DI
emit_repeat_again_match_nolit_encodeBlockAsm12BAvx_emit_copy_short:
MOVQ R8, R9
LEAQ -4(R8), R8
CMPL R9, $0x08
JLE repeat_two_match_nolit_encodeBlockAsm12BAvx_emit_copy_short
CMPL R9, $0x0c
JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm12BAvx_emit_copy_short
CMPL SI, $0x00000800
JLT repeat_two_offset_match_nolit_encodeBlockAsm12BAvx_emit_copy_short
cant_repeat_two_offset_match_nolit_encodeBlockAsm12BAvx_emit_copy_short:
CMPL R8, $0x00000104
JLT repeat_three_match_nolit_encodeBlockAsm12BAvx_emit_copy_short
CMPL R8, $0x00010100
JLT repeat_four_match_nolit_encodeBlockAsm12BAvx_emit_copy_short
CMPL R8, $0x0100ffff
JLT repeat_five_match_nolit_encodeBlockAsm12BAvx_emit_copy_short
LEAQ -16842747(R8), R8
MOVW $0x001d, (DI)
MOVW $0xfffb, 2(DI)
MOVB $0xff, 4(DI)
ADDQ $0x05, DI
JMP emit_repeat_again_match_nolit_encodeBlockAsm12BAvx_emit_copy_short
repeat_five_match_nolit_encodeBlockAsm12BAvx_emit_copy_short:
LEAQ -65536(R8), R8
MOVQ R8, SI
MOVW $0x001d, (DI)
MOVW R8, 2(DI)
SARQ $0x10, SI
MOVB SI, 4(DI)
ADDQ $0x05, DI
JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx
repeat_four_match_nolit_encodeBlockAsm12BAvx_emit_copy_short:
LEAQ -256(R8), R8
MOVW $0x0019, (DI)
MOVW R8, 2(DI)
ADDQ $0x04, DI
JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx
repeat_three_match_nolit_encodeBlockAsm12BAvx_emit_copy_short:
LEAQ -4(R8), R8
MOVW $0x0015, (DI)
MOVB R8, 2(DI)
ADDQ $0x03, DI
JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx
repeat_two_match_nolit_encodeBlockAsm12BAvx_emit_copy_short:
SHLL $0x02, R8
ORL $0x01, R8
MOVW R8, (DI)
ADDQ $0x02, DI
JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx
repeat_two_offset_match_nolit_encodeBlockAsm12BAvx_emit_copy_short:
XORQ R9, R9
LEAQ 1(R9)(R8*4), R8
MOVB SI, 1(DI)
SARL $0x08, SI
SHLL $0x05, SI
ORL SI, R8
MOVB R8, (DI)
ADDQ $0x02, DI
JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx
two_byte_offset_short_match_nolit_encodeBlockAsm12BAvx:
CMPL R8, $0x0c
JGE emit_copy_three_match_nolit_encodeBlockAsm12BAvx
CMPL SI, $0x00000800
JGE emit_copy_three_match_nolit_encodeBlockAsm12BAvx
MOVB $0x01, DL
LEAQ -16(DX)(R8*4), R8
MOVB SI, 1(DI)
SHRL $0x08, SI
SHLL $0x05, SI
ORL SI, R8
MOVB R8, (DI)
ADDQ $0x02, DI
JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx
emit_copy_three_match_nolit_encodeBlockAsm12BAvx:
MOVB $0x02, DL
LEAQ -4(DX)(R8*4), R8
MOVB R8, (DI)
MOVW SI, 1(DI)
ADDQ $0x03, DI
match_nolit_emitcopy_end_encodeBlockAsm12BAvx:
MOVQ DI, dst_base+0(FP)
MOVL AX, 20(SP)
CMPL AX, 16(SP)
JGE emit_remainder_encodeBlockAsm12BAvx
CMPQ DI, (SP)
JL match_nolit_dst_ok_encodeBlockAsm12BAvx
MOVQ $0x00000000, ret+48(FP)
RET
match_nolit_dst_ok_encodeBlockAsm12BAvx:
MOVQ -2(CX)(AX*1), SI
MOVQ $0x0000cf1bbcdcbf9b, DI
MOVQ SI, R8
SHRQ $0x10, SI
MOVQ SI, R9
SHLQ $0x10, R8
IMULQ DI, R8
SHRQ $0x34, R8
SHLQ $0x10, R9
IMULQ DI, R9
SHRQ $0x34, R9
MOVL 32(SP)(R8*1), DI
MOVL 32(SP)(R9*1), DI
LEAQ -2(AX), DI
MOVL DI, 32(SP)(R8*1)
MOVL AX, 32(SP)(R9*1)
CMPL (CX)(R9*1), SI
JEQ match_nolit_loop_encodeBlockAsm12BAvx
INCL AX
JMP search_loop_encodeBlockAsm12BAvx
emit_remainder_encodeBlockAsm12BAvx:
MOVQ src_len+32(FP), AX
SUBL 20(SP), AX
MOVQ dst_base+0(FP), DX
LEAQ (DX)(AX*1), DX
CMPQ DX, (SP)
JL emit_remainder_ok_encodeBlockAsm12BAvx
MOVQ $0x00000000, ret+48(FP)
RET
emit_remainder_ok_encodeBlockAsm12BAvx:
MOVQ src_len+32(FP), AX
MOVL 20(SP), DX
CMPL DX, AX
JEQ emit_literal_skip_emit_remainder_encodeBlockAsm12BAvx
MOVL AX, BX
MOVL AX, 20(SP)
LEAQ (CX)(DX*1), AX
SUBL DX, BX
MOVQ dst_base+0(FP), CX
MOVQ BX, DX
SUBL $0x01, DX
JC emit_literal_done_emit_remainder_encodeBlockAsm12BAvx
CMPL DX, $0x3c
JLT one_byte_emit_remainder_encodeBlockAsm12BAvx
CMPL DX, $0x00000100
JLT two_bytes_emit_remainder_encodeBlockAsm12BAvx
CMPL DX, $0x00010000
JLT three_bytes_emit_remainder_encodeBlockAsm12BAvx
CMPL DX, $0x01000000
JLT four_bytes_emit_remainder_encodeBlockAsm12BAvx
MOVB $0xfc, (CX)
MOVL DX, 1(CX)
ADDQ $0x05, CX
JMP memmove_emit_remainder_encodeBlockAsm12BAvx
four_bytes_emit_remainder_encodeBlockAsm12BAvx:
MOVQ DX, SI
SHRL $0x10, SI
MOVB $0xf8, (CX)
MOVW DX, 1(CX)
MOVB SI, 3(CX)
ADDQ $0x04, CX
JMP memmove_emit_remainder_encodeBlockAsm12BAvx
three_bytes_emit_remainder_encodeBlockAsm12BAvx:
MOVB $0xf4, (CX)
MOVW DX, 1(CX)
ADDQ $0x03, CX
JMP memmove_emit_remainder_encodeBlockAsm12BAvx
two_bytes_emit_remainder_encodeBlockAsm12BAvx:
MOVB $0xf0, (CX)
MOVB DL, 1(CX)
ADDQ $0x02, CX
JMP memmove_emit_remainder_encodeBlockAsm12BAvx
one_byte_emit_remainder_encodeBlockAsm12BAvx:
SHLB $0x02, DL
MOVB DL, (CX)
ADDQ $0x01, CX
memmove_emit_remainder_encodeBlockAsm12BAvx:
LEAQ (CX)(BX*1), DX
NOP
emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_tail:
TESTQ BX, BX
JEQ emit_literal_done_emit_remainder_encodeBlockAsm12BAvx
CMPQ BX, $0x02
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_1or2
CMPQ BX, $0x04
JB emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_3
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_4
CMPQ BX, $0x08
JB emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_5through7
JE emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_8
CMPQ BX, $0x10
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_9through16
CMPQ BX, $0x20
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_17through32
CMPQ BX, $0x40
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_33through64
CMPQ BX, $0x80
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_65through128
CMPQ BX, $0x00000100
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_129through256
JMP emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_avxUnaligned
emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_1or2:
MOVB (AX), DL
MOVB -1(AX)(BX*1), SI
MOVB DL, (CX)
MOVB SI, -1(CX)(BX*1)
JMP emit_literal_done_emit_remainder_encodeBlockAsm12BAvx
emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_4:
MOVL (AX), DX
MOVL DX, (CX)
JMP emit_literal_done_emit_remainder_encodeBlockAsm12BAvx
emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_3:
MOVW (AX), DX
MOVB 2(AX), SI
MOVW DX, (CX)
MOVB SI, 2(CX)
JMP emit_literal_done_emit_remainder_encodeBlockAsm12BAvx
emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_5through7:
MOVL (AX), DX
MOVL -4(AX)(BX*1), SI
MOVL DX, (CX)
MOVL SI, -4(CX)(BX*1)
JMP emit_literal_done_emit_remainder_encodeBlockAsm12BAvx
emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_8:
MOVQ (AX), DX
MOVQ DX, (CX)
JMP emit_literal_done_emit_remainder_encodeBlockAsm12BAvx
emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_9through16:
MOVQ (AX), DX
MOVQ -8(AX)(BX*1), SI
MOVQ DX, (CX)
MOVQ SI, -8(CX)(BX*1)
JMP emit_literal_done_emit_remainder_encodeBlockAsm12BAvx
emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_17through32:
MOVOU (AX), X0
MOVOU -16(AX)(BX*1), X1
MOVOU X0, (CX)
MOVOU X1, -16(CX)(BX*1)
JMP emit_literal_done_emit_remainder_encodeBlockAsm12BAvx
emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_33through64:
MOVOU (AX), X0
MOVOU 16(AX), X1
MOVOU -32(AX)(BX*1), X2
MOVOU -16(AX)(BX*1), X3
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, -32(CX)(BX*1)
MOVOU X3, -16(CX)(BX*1)
JMP emit_literal_done_emit_remainder_encodeBlockAsm12BAvx
emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_65through128:
MOVOU (AX), X0
MOVOU 16(AX), X1
MOVOU 32(AX), X2
MOVOU 48(AX), X3
MOVOU -64(AX)(BX*1), X12
MOVOU -48(AX)(BX*1), X13
MOVOU -32(AX)(BX*1), X14
MOVOU -16(AX)(BX*1), X15
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, 32(CX)
MOVOU X3, 48(CX)
MOVOU X12, -64(CX)(BX*1)
MOVOU X13, -48(CX)(BX*1)
MOVOU X14, -32(CX)(BX*1)
MOVOU X15, -16(CX)(BX*1)
JMP emit_literal_done_emit_remainder_encodeBlockAsm12BAvx
emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_129through256:
MOVOU (AX), X0
MOVOU 16(AX), X1
MOVOU 32(AX), X2
MOVOU 48(AX), X3
MOVOU 64(AX), X4
MOVOU 80(AX), X5
MOVOU 96(AX), X6
MOVOU 112(AX), X7
MOVOU -128(AX)(BX*1), X8
MOVOU -112(AX)(BX*1), X9
MOVOU -96(AX)(BX*1), X10
MOVOU -80(AX)(BX*1), X11
MOVOU -64(AX)(BX*1), X12
MOVOU -48(AX)(BX*1), X13
MOVOU -32(AX)(BX*1), X14
MOVOU -16(AX)(BX*1), X15
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, 32(CX)
MOVOU X3, 48(CX)
MOVOU X4, 64(CX)
MOVOU X5, 80(CX)
MOVOU X6, 96(CX)
MOVOU X7, 112(CX)
MOVOU X8, -128(CX)(BX*1)
MOVOU X9, -112(CX)(BX*1)
MOVOU X10, -96(CX)(BX*1)
MOVOU X11, -80(CX)(BX*1)
MOVOU X12, -64(CX)(BX*1)
MOVOU X13, -48(CX)(BX*1)
MOVOU X14, -32(CX)(BX*1)
MOVOU X15, -16(CX)(BX*1)
JMP emit_literal_done_emit_remainder_encodeBlockAsm12BAvx
emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_256through2048:
LEAQ -256(BX), BX
MOVOU (AX), X0
MOVOU 16(AX), X1
MOVOU 32(AX), X2
MOVOU 48(AX), X3
MOVOU 64(AX), X4
MOVOU 80(AX), X5
MOVOU 96(AX), X6
MOVOU 112(AX), X7
MOVOU 128(AX), X8
MOVOU 144(AX), X9
MOVOU 160(AX), X10
MOVOU 176(AX), X11
MOVOU 192(AX), X12
MOVOU 208(AX), X13
MOVOU 224(AX), X14
MOVOU 240(AX), X15
MOVOU X0, (CX)
MOVOU X1, 16(CX)
MOVOU X2, 32(CX)
MOVOU X3, 48(CX)
MOVOU X4, 64(CX)
MOVOU X5, 80(CX)
MOVOU X6, 96(CX)
MOVOU X7, 112(CX)
MOVOU X8, 128(CX)
MOVOU X9, 144(CX)
MOVOU X10, 160(CX)
MOVOU X11, 176(CX)
MOVOU X12, 192(CX)
MOVOU X13, 208(CX)
MOVOU X14, 224(CX)
MOVOU X15, 240(CX)
CMPQ BX, $0x00000100
LEAQ 256(AX), AX
LEAQ 256(CX), CX
JGE emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_256through2048
JMP emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_tail
emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_avxUnaligned:
LEAQ (AX)(BX*1), SI
MOVQ CX, R8
MOVOU -128(SI), X5
MOVOU -112(SI), X6
MOVQ $0x00000080, DX
ANDQ $0xffffffe0, CX
ADDQ $0x20, CX
MOVOU -96(SI), X7
MOVOU -80(SI), X8
MOVQ CX, DI
SUBQ R8, DI
MOVOU -64(SI), X9
MOVOU -48(SI), X10
SUBQ DI, BX
MOVOU -32(SI), X11
MOVOU -16(SI), X12
VMOVDQU (AX), Y4
ADDQ DI, AX
SUBQ DX, BX
emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_gobble_128_loop:
VMOVDQU (AX), Y0
VMOVDQU 32(AX), Y1
VMOVDQU 64(AX), Y2
VMOVDQU 96(AX), Y3
ADDQ DX, AX
VMOVDQA Y0, (CX)
VMOVDQA Y1, 32(CX)
VMOVDQA Y2, 64(CX)
VMOVDQA Y3, 96(CX)
ADDQ DX, CX
SUBQ DX, BX
JA emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_gobble_128_loop
ADDQ DX, BX
ADDQ CX, BX
VMOVDQU Y4, (R8)
VZEROUPPER
MOVOU X5, -128(BX)
MOVOU X6, -112(BX)
MOVOU X7, -96(BX)
MOVOU X8, -80(BX)
MOVOU X9, -64(BX)
MOVOU X10, -48(BX)
MOVOU X11, -32(BX)
MOVOU X12, -16(BX)
JMP emit_literal_done_emit_remainder_encodeBlockAsm12BAvx
MOVQ DX, CX
emit_literal_done_emit_remainder_encodeBlockAsm12BAvx:
MOVQ CX, dst_base+0(FP)
emit_literal_skip_emit_remainder_encodeBlockAsm12BAvx:
MOVQ 8(SP), AX
SUBQ dst_base+0(FP), AX
MOVQ AX, ret+48(FP)
RET
// func emitLiteral(dst []byte, lit []byte) int
// Requires: SSE2
TEXT ·emitLiteral(SB), NOSPLIT, $0-56
MOVQ dst_base+0(FP), AX
MOVQ lit_base+24(FP), CX
MOVQ lit_len+32(FP), DX
MOVQ DX, BX
MOVQ DX, SI
SUBL $0x01, SI
JC emit_literal_end_standalone
CMPL SI, $0x3c
JLT one_byte_standalone
CMPL SI, $0x00000100
JLT two_bytes_standalone
CMPL SI, $0x00010000
JLT three_bytes_standalone
CMPL SI, $0x01000000
JLT four_bytes_standalone
MOVB $0xfc, (AX)
MOVL SI, 1(AX)
ADDQ $0x05, BX
ADDQ $0x05, AX
JMP memmove_standalone
four_bytes_standalone:
MOVQ SI, DI
SHRL $0x10, DI
MOVB $0xf8, (AX)
MOVW SI, 1(AX)
MOVB DI, 3(AX)
ADDQ $0x04, BX
ADDQ $0x04, AX
JMP memmove_standalone
three_bytes_standalone:
MOVB $0xf4, (AX)
MOVW SI, 1(AX)
ADDQ $0x03, BX
ADDQ $0x03, AX
JMP memmove_standalone
two_bytes_standalone:
MOVB $0xf0, (AX)
MOVB SI, 1(AX)
ADDQ $0x02, BX
ADDQ $0x02, AX
JMP memmove_standalone
one_byte_standalone:
SHLB $0x02, SI
MOVB SI, (AX)
ADDQ $0x01, BX
ADDQ $0x01, AX
memmove_standalone:
NOP
emit_lit_memmove_standalone_memmove_tail:
TESTQ DX, DX
JEQ emit_literal_end_standalone
CMPQ DX, $0x02
JBE emit_lit_memmove_standalone_memmove_move_1or2
CMPQ DX, $0x04
JB emit_lit_memmove_standalone_memmove_move_3
JBE emit_lit_memmove_standalone_memmove_move_4
CMPQ DX, $0x08
JB emit_lit_memmove_standalone_memmove_move_5through7
JE emit_lit_memmove_standalone_memmove_move_8
CMPQ DX, $0x10
JBE emit_lit_memmove_standalone_memmove_move_9through16
CMPQ DX, $0x20
JBE emit_lit_memmove_standalone_memmove_move_17through32
CMPQ DX, $0x40
JBE emit_lit_memmove_standalone_memmove_move_33through64
CMPQ DX, $0x80
JBE emit_lit_memmove_standalone_memmove_move_65through128
CMPQ DX, $0x00000100
JBE emit_lit_memmove_standalone_memmove_move_129through256
JMP emit_lit_memmove_standalone_memmove_move_256through2048
emit_lit_memmove_standalone_memmove_move_1or2:
MOVB (CX), SI
MOVB -1(CX)(DX*1), CL
MOVB SI, (AX)
MOVB CL, -1(AX)(DX*1)
JMP emit_literal_end_standalone
emit_lit_memmove_standalone_memmove_move_4:
MOVL (CX), SI
MOVL SI, (AX)
JMP emit_literal_end_standalone
emit_lit_memmove_standalone_memmove_move_3:
MOVW (CX), SI
MOVB 2(CX), CL
MOVW SI, (AX)
MOVB CL, 2(AX)
JMP emit_literal_end_standalone
emit_lit_memmove_standalone_memmove_move_5through7:
MOVL (CX), SI
MOVL -4(CX)(DX*1), CX
MOVL SI, (AX)
MOVL CX, -4(AX)(DX*1)
JMP emit_literal_end_standalone
emit_lit_memmove_standalone_memmove_move_8:
MOVQ (CX), SI
MOVQ SI, (AX)
JMP emit_literal_end_standalone
emit_lit_memmove_standalone_memmove_move_9through16:
MOVQ (CX), SI
MOVQ -8(CX)(DX*1), CX
MOVQ SI, (AX)
MOVQ CX, -8(AX)(DX*1)
JMP emit_literal_end_standalone
emit_lit_memmove_standalone_memmove_move_17through32:
MOVOU (CX), X0
MOVOU -16(CX)(DX*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(DX*1)
JMP emit_literal_end_standalone
emit_lit_memmove_standalone_memmove_move_33through64:
MOVOU (CX), X0
MOVOU 16(CX), X1
MOVOU -32(CX)(DX*1), X2
MOVOU -16(CX)(DX*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(DX*1)
MOVOU X3, -16(AX)(DX*1)
JMP emit_literal_end_standalone
emit_lit_memmove_standalone_memmove_move_65through128:
MOVOU (CX), X0
MOVOU 16(CX), X1
MOVOU 32(CX), X2
MOVOU 48(CX), X3
MOVOU -64(CX)(DX*1), X12
MOVOU -48(CX)(DX*1), X13
MOVOU -32(CX)(DX*1), X14
MOVOU -16(CX)(DX*1), X15
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, 32(AX)
MOVOU X3, 48(AX)
MOVOU X12, -64(AX)(DX*1)
MOVOU X13, -48(AX)(DX*1)
MOVOU X14, -32(AX)(DX*1)
MOVOU X15, -16(AX)(DX*1)
JMP emit_literal_end_standalone
emit_lit_memmove_standalone_memmove_move_129through256:
MOVOU (CX), X0
MOVOU 16(CX), X1
MOVOU 32(CX), X2
MOVOU 48(CX), X3
MOVOU 64(CX), X4
MOVOU 80(CX), X5
MOVOU 96(CX), X6
MOVOU 112(CX), X7
MOVOU -128(CX)(DX*1), X8
MOVOU -112(CX)(DX*1), X9
MOVOU -96(CX)(DX*1), X10
MOVOU -80(CX)(DX*1), X11
MOVOU -64(CX)(DX*1), X12
MOVOU -48(CX)(DX*1), X13
MOVOU -32(CX)(DX*1), X14
MOVOU -16(CX)(DX*1), X15
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, 32(AX)
MOVOU X3, 48(AX)
MOVOU X4, 64(AX)
MOVOU X5, 80(AX)
MOVOU X6, 96(AX)
MOVOU X7, 112(AX)
MOVOU X8, -128(AX)(DX*1)
MOVOU X9, -112(AX)(DX*1)
MOVOU X10, -96(AX)(DX*1)
MOVOU X11, -80(AX)(DX*1)
MOVOU X12, -64(AX)(DX*1)
MOVOU X13, -48(AX)(DX*1)
MOVOU X14, -32(AX)(DX*1)
MOVOU X15, -16(AX)(DX*1)
JMP emit_literal_end_standalone
emit_lit_memmove_standalone_memmove_move_256through2048:
LEAQ -256(DX), DX
MOVOU (CX), X0
MOVOU 16(CX), X1
MOVOU 32(CX), X2
MOVOU 48(CX), X3
MOVOU 64(CX), X4
MOVOU 80(CX), X5
MOVOU 96(CX), X6
MOVOU 112(CX), X7
MOVOU 128(CX), X8
MOVOU 144(CX), X9
MOVOU 160(CX), X10
MOVOU 176(CX), X11
MOVOU 192(CX), X12
MOVOU 208(CX), X13
MOVOU 224(CX), X14
MOVOU 240(CX), X15
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, 32(AX)
MOVOU X3, 48(AX)
MOVOU X4, 64(AX)
MOVOU X5, 80(AX)
MOVOU X6, 96(AX)
MOVOU X7, 112(AX)
MOVOU X8, 128(AX)
MOVOU X9, 144(AX)
MOVOU X10, 160(AX)
MOVOU X11, 176(AX)
MOVOU X12, 192(AX)
MOVOU X13, 208(AX)
MOVOU X14, 224(AX)
MOVOU X15, 240(AX)
CMPQ DX, $0x00000100
LEAQ 256(CX), CX
LEAQ 256(AX), AX
JGE emit_lit_memmove_standalone_memmove_move_256through2048
JMP emit_lit_memmove_standalone_memmove_tail
emit_literal_end_standalone:
MOVQ BX, ret+48(FP)
RET
// func emitLiteralAvx(dst []byte, lit []byte) int
// Requires: AVX, SSE2
TEXT ·emitLiteralAvx(SB), NOSPLIT, $0-56
MOVQ dst_base+0(FP), AX
MOVQ lit_base+24(FP), CX
MOVQ lit_len+32(FP), DX
MOVQ DX, BX
MOVQ DX, SI
SUBL $0x01, SI
JC emit_literal_end_avx_standalone
CMPL SI, $0x3c
JLT one_byte_standalone
CMPL SI, $0x00000100
JLT two_bytes_standalone
CMPL SI, $0x00010000
JLT three_bytes_standalone
CMPL SI, $0x01000000
JLT four_bytes_standalone
MOVB $0xfc, (AX)
MOVL SI, 1(AX)
ADDQ $0x05, BX
ADDQ $0x05, AX
JMP memmove_standalone
four_bytes_standalone:
MOVQ SI, DI
SHRL $0x10, DI
MOVB $0xf8, (AX)
MOVW SI, 1(AX)
MOVB DI, 3(AX)
ADDQ $0x04, BX
ADDQ $0x04, AX
JMP memmove_standalone
three_bytes_standalone:
MOVB $0xf4, (AX)
MOVW SI, 1(AX)
ADDQ $0x03, BX
ADDQ $0x03, AX
JMP memmove_standalone
two_bytes_standalone:
MOVB $0xf0, (AX)
MOVB SI, 1(AX)
ADDQ $0x02, BX
ADDQ $0x02, AX
JMP memmove_standalone
one_byte_standalone:
SHLB $0x02, SI
MOVB SI, (AX)
ADDQ $0x01, BX
ADDQ $0x01, AX
memmove_standalone:
NOP
emit_lit_memmove_standalone_memmove_tail:
TESTQ DX, DX
JEQ emit_literal_end_avx_standalone
CMPQ DX, $0x02
JBE emit_lit_memmove_standalone_memmove_move_1or2
CMPQ DX, $0x04
JB emit_lit_memmove_standalone_memmove_move_3
JBE emit_lit_memmove_standalone_memmove_move_4
CMPQ DX, $0x08
JB emit_lit_memmove_standalone_memmove_move_5through7
JE emit_lit_memmove_standalone_memmove_move_8
CMPQ DX, $0x10
JBE emit_lit_memmove_standalone_memmove_move_9through16
CMPQ DX, $0x20
JBE emit_lit_memmove_standalone_memmove_move_17through32
CMPQ DX, $0x40
JBE emit_lit_memmove_standalone_memmove_move_33through64
CMPQ DX, $0x80
JBE emit_lit_memmove_standalone_memmove_move_65through128
CMPQ DX, $0x00000100
JBE emit_lit_memmove_standalone_memmove_move_129through256
JMP emit_lit_memmove_standalone_memmove_avxUnaligned
emit_lit_memmove_standalone_memmove_move_1or2:
MOVB (CX), SI
MOVB -1(CX)(DX*1), DI
MOVB SI, (AX)
MOVB DI, -1(AX)(DX*1)
JMP emit_literal_end_avx_standalone
emit_lit_memmove_standalone_memmove_move_4:
MOVL (CX), SI
MOVL SI, (AX)
JMP emit_literal_end_avx_standalone
emit_lit_memmove_standalone_memmove_move_3:
MOVW (CX), SI
MOVB 2(CX), DI
MOVW SI, (AX)
MOVB DI, 2(AX)
JMP emit_literal_end_avx_standalone
emit_lit_memmove_standalone_memmove_move_5through7:
MOVL (CX), SI
MOVL -4(CX)(DX*1), DI
MOVL SI, (AX)
MOVL DI, -4(AX)(DX*1)
JMP emit_literal_end_avx_standalone
emit_lit_memmove_standalone_memmove_move_8:
MOVQ (CX), SI
MOVQ SI, (AX)
JMP emit_literal_end_avx_standalone
emit_lit_memmove_standalone_memmove_move_9through16:
MOVQ (CX), SI
MOVQ -8(CX)(DX*1), DI
MOVQ SI, (AX)
MOVQ DI, -8(AX)(DX*1)
JMP emit_literal_end_avx_standalone
emit_lit_memmove_standalone_memmove_move_17through32:
MOVOU (CX), X0
MOVOU -16(CX)(DX*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(DX*1)
JMP emit_literal_end_avx_standalone
emit_lit_memmove_standalone_memmove_move_33through64:
MOVOU (CX), X0
MOVOU 16(CX), X1
MOVOU -32(CX)(DX*1), X2
MOVOU -16(CX)(DX*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(DX*1)
MOVOU X3, -16(AX)(DX*1)
JMP emit_literal_end_avx_standalone
emit_lit_memmove_standalone_memmove_move_65through128:
MOVOU (CX), X0
MOVOU 16(CX), X1
MOVOU 32(CX), X2
MOVOU 48(CX), X3
MOVOU -64(CX)(DX*1), X12
MOVOU -48(CX)(DX*1), X13
MOVOU -32(CX)(DX*1), X14
MOVOU -16(CX)(DX*1), X15
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, 32(AX)
MOVOU X3, 48(AX)
MOVOU X12, -64(AX)(DX*1)
MOVOU X13, -48(AX)(DX*1)
MOVOU X14, -32(AX)(DX*1)
MOVOU X15, -16(AX)(DX*1)
JMP emit_literal_end_avx_standalone
emit_lit_memmove_standalone_memmove_move_129through256:
MOVOU (CX), X0
MOVOU 16(CX), X1
MOVOU 32(CX), X2
MOVOU 48(CX), X3
MOVOU 64(CX), X4
MOVOU 80(CX), X5
MOVOU 96(CX), X6
MOVOU 112(CX), X7
MOVOU -128(CX)(DX*1), X8
MOVOU -112(CX)(DX*1), X9
MOVOU -96(CX)(DX*1), X10
MOVOU -80(CX)(DX*1), X11
MOVOU -64(CX)(DX*1), X12
MOVOU -48(CX)(DX*1), X13
MOVOU -32(CX)(DX*1), X14
MOVOU -16(CX)(DX*1), X15
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, 32(AX)
MOVOU X3, 48(AX)
MOVOU X4, 64(AX)
MOVOU X5, 80(AX)
MOVOU X6, 96(AX)
MOVOU X7, 112(AX)
MOVOU X8, -128(AX)(DX*1)
MOVOU X9, -112(AX)(DX*1)
MOVOU X10, -96(AX)(DX*1)
MOVOU X11, -80(AX)(DX*1)
MOVOU X12, -64(AX)(DX*1)
MOVOU X13, -48(AX)(DX*1)
MOVOU X14, -32(AX)(DX*1)
MOVOU X15, -16(AX)(DX*1)
JMP emit_literal_end_avx_standalone
emit_lit_memmove_standalone_memmove_move_256through2048:
LEAQ -256(DX), DX
MOVOU (CX), X0
MOVOU 16(CX), X1
MOVOU 32(CX), X2
MOVOU 48(CX), X3
MOVOU 64(CX), X4
MOVOU 80(CX), X5
MOVOU 96(CX), X6
MOVOU 112(CX), X7
MOVOU 128(CX), X8
MOVOU 144(CX), X9
MOVOU 160(CX), X10
MOVOU 176(CX), X11
MOVOU 192(CX), X12
MOVOU 208(CX), X13
MOVOU 224(CX), X14
MOVOU 240(CX), X15
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, 32(AX)
MOVOU X3, 48(AX)
MOVOU X4, 64(AX)
MOVOU X5, 80(AX)
MOVOU X6, 96(AX)
MOVOU X7, 112(AX)
MOVOU X8, 128(AX)
MOVOU X9, 144(AX)
MOVOU X10, 160(AX)
MOVOU X11, 176(AX)
MOVOU X12, 192(AX)
MOVOU X13, 208(AX)
MOVOU X14, 224(AX)
MOVOU X15, 240(AX)
CMPQ DX, $0x00000100
LEAQ 256(CX), CX
LEAQ 256(AX), AX
JGE emit_lit_memmove_standalone_memmove_move_256through2048
JMP emit_lit_memmove_standalone_memmove_tail
emit_lit_memmove_standalone_memmove_avxUnaligned:
LEAQ (CX)(DX*1), DI
MOVQ AX, R9
MOVOU -128(DI), X5
MOVOU -112(DI), X6
MOVQ $0x00000080, SI
ANDQ $0xffffffe0, AX
ADDQ $0x20, AX
MOVOU -96(DI), X7
MOVOU -80(DI), X8
MOVQ AX, R8
SUBQ R9, R8
MOVOU -64(DI), X9
MOVOU -48(DI), X10
SUBQ R8, DX
MOVOU -32(DI), X11
MOVOU -16(DI), X12
VMOVDQU (CX), Y4
ADDQ R8, CX
SUBQ SI, DX
emit_lit_memmove_standalone_memmove_gobble_128_loop:
VMOVDQU (CX), Y0
VMOVDQU 32(CX), Y1
VMOVDQU 64(CX), Y2
VMOVDQU 96(CX), Y3
ADDQ SI, CX
VMOVDQA Y0, (AX)
VMOVDQA Y1, 32(AX)
VMOVDQA Y2, 64(AX)
VMOVDQA Y3, 96(AX)
ADDQ SI, AX
SUBQ SI, DX
JA emit_lit_memmove_standalone_memmove_gobble_128_loop
ADDQ SI, DX
ADDQ AX, DX
VMOVDQU Y4, (R9)
VZEROUPPER
MOVOU X5, -128(DX)
MOVOU X6, -112(DX)
MOVOU X7, -96(DX)
MOVOU X8, -80(DX)
MOVOU X9, -64(DX)
MOVOU X10, -48(DX)
MOVOU X11, -32(DX)
MOVOU X12, -16(DX)
emit_literal_end_avx_standalone:
MOVQ BX, ret+48(FP)
RET
// func emitRepeat(dst []byte, offset int, length int) int
TEXT ·emitRepeat(SB), NOSPLIT, $0-48
XORQ BX, BX
MOVQ dst_base+0(FP), AX
MOVQ offset+24(FP), CX
MOVQ length+32(FP), DX
emit_repeat_again_standalone:
MOVQ DX, SI
LEAQ -4(DX), DX
CMPL SI, $0x08
JLE repeat_two_standalone
CMPL SI, $0x0c
JGE cant_repeat_two_offset_standalone
CMPL CX, $0x00000800
JLT repeat_two_offset_standalone
cant_repeat_two_offset_standalone:
CMPL DX, $0x00000104
JLT repeat_three_standalone
CMPL DX, $0x00010100
JLT repeat_four_standalone
CMPL DX, $0x0100ffff
JLT repeat_five_standalone
LEAQ -16842747(DX), DX
MOVW $0x001d, (AX)
MOVW $0xfffb, 2(AX)
MOVB $0xff, 4(AX)
ADDQ $0x05, AX
ADDQ $0x05, BX
JMP emit_repeat_again_standalone
repeat_five_standalone:
LEAQ -65536(DX), DX
MOVQ DX, CX
MOVW $0x001d, (AX)
MOVW DX, 2(AX)
SARQ $0x10, CX
MOVB CL, 4(AX)
ADDQ $0x05, BX
ADDQ $0x05, AX
JMP gen_emit_repeat_end
repeat_four_standalone:
LEAQ -256(DX), DX
MOVW $0x0019, (AX)
MOVW DX, 2(AX)
ADDQ $0x04, BX
ADDQ $0x04, AX
JMP gen_emit_repeat_end
repeat_three_standalone:
LEAQ -4(DX), DX
MOVW $0x0015, (AX)
MOVB DL, 2(AX)
ADDQ $0x03, BX
ADDQ $0x03, AX
JMP gen_emit_repeat_end
repeat_two_standalone:
SHLL $0x02, DX
ORL $0x01, DX
MOVW DX, (AX)
ADDQ $0x02, BX
ADDQ $0x02, AX
JMP gen_emit_repeat_end
repeat_two_offset_standalone:
XORQ SI, SI
LEAQ 1(SI)(DX*4), DX
MOVB CL, 1(AX)
SARL $0x08, CX
SHLL $0x05, CX
ORL CX, DX
MOVB DL, (AX)
ADDQ $0x02, BX
ADDQ $0x02, AX
gen_emit_repeat_end:
MOVQ BX, ret+40(FP)
RET
// func emitCopy(dst []byte, offset int, length int) int
TEXT ·emitCopy(SB), NOSPLIT, $0-48
XORQ BX, BX
MOVQ dst_base+0(FP), AX
MOVQ offset+24(FP), CX
MOVQ length+32(FP), DX
CMPL CX, $0x00010000
JL two_byte_offset_standalone
CMPL DX, $0x40
JLE four_bytes_remain_standalone
MOVB $0xff, (AX)
MOVD CX, 1(AX)
LEAQ -64(DX), DX
ADDQ $0x05, BX
ADDQ $0x05, AX
CMPL DX, $0x04
JL four_bytes_remain_standalone
emit_repeat_again_standalone_emit_copy:
MOVQ DX, SI
LEAQ -4(DX), DX
CMPL SI, $0x08
JLE repeat_two_standalone_emit_copy
CMPL SI, $0x0c
JGE cant_repeat_two_offset_standalone_emit_copy
CMPL CX, $0x00000800
JLT repeat_two_offset_standalone_emit_copy
cant_repeat_two_offset_standalone_emit_copy:
CMPL DX, $0x00000104
JLT repeat_three_standalone_emit_copy
CMPL DX, $0x00010100
JLT repeat_four_standalone_emit_copy
CMPL DX, $0x0100ffff
JLT repeat_five_standalone_emit_copy
LEAQ -16842747(DX), DX
MOVW $0x001d, (AX)
MOVW $0xfffb, 2(AX)
MOVB $0xff, 4(AX)
ADDQ $0x05, AX
ADDQ $0x05, BX
JMP emit_repeat_again_standalone_emit_copy
repeat_five_standalone_emit_copy:
LEAQ -65536(DX), DX
MOVQ DX, CX
MOVW $0x001d, (AX)
MOVW DX, 2(AX)
SARQ $0x10, CX
MOVB CL, 4(AX)
ADDQ $0x05, BX
ADDQ $0x05, AX
JMP gen_emit_copy_end
repeat_four_standalone_emit_copy:
LEAQ -256(DX), DX
MOVW $0x0019, (AX)
MOVW DX, 2(AX)
ADDQ $0x04, BX
ADDQ $0x04, AX
JMP gen_emit_copy_end
repeat_three_standalone_emit_copy:
LEAQ -4(DX), DX
MOVW $0x0015, (AX)
MOVB DL, 2(AX)
ADDQ $0x03, BX
ADDQ $0x03, AX
JMP gen_emit_copy_end
repeat_two_standalone_emit_copy:
SHLL $0x02, DX
ORL $0x01, DX
MOVW DX, (AX)
ADDQ $0x02, BX
ADDQ $0x02, AX
JMP gen_emit_copy_end
repeat_two_offset_standalone_emit_copy:
XORQ SI, SI
LEAQ 1(SI)(DX*4), DX
MOVB CL, 1(AX)
SARL $0x08, CX
SHLL $0x05, CX
ORL CX, DX
MOVB DL, (AX)
ADDQ $0x02, BX
ADDQ $0x02, AX
JMP gen_emit_copy_end
four_bytes_remain_standalone:
TESTL DX, DX
JZ gen_emit_copy_end
MOVB $0x03, SI
LEAQ -4(SI)(DX*4), DX
MOVB DL, (AX)
MOVD CX, 1(AX)
ADDQ $0x05, BX
ADDQ $0x05, AX
JMP gen_emit_copy_end
two_byte_offset_standalone:
CMPL DX, $0x40
JLE two_byte_offset_short_standalone
MOVB $0xee, (AX)
MOVW CX, 1(AX)
LEAQ -60(DX), DX
ADDQ $0x03, AX
ADDQ $0x03, BX
emit_repeat_again_standalone_emit_copy_short:
MOVQ DX, SI
LEAQ -4(DX), DX
CMPL SI, $0x08
JLE repeat_two_standalone_emit_copy_short
CMPL SI, $0x0c
JGE cant_repeat_two_offset_standalone_emit_copy_short
CMPL CX, $0x00000800
JLT repeat_two_offset_standalone_emit_copy_short
cant_repeat_two_offset_standalone_emit_copy_short:
CMPL DX, $0x00000104
JLT repeat_three_standalone_emit_copy_short
CMPL DX, $0x00010100
JLT repeat_four_standalone_emit_copy_short
CMPL DX, $0x0100ffff
JLT repeat_five_standalone_emit_copy_short
LEAQ -16842747(DX), DX
MOVW $0x001d, (AX)
MOVW $0xfffb, 2(AX)
MOVB $0xff, 4(AX)
ADDQ $0x05, AX
ADDQ $0x05, BX
JMP emit_repeat_again_standalone_emit_copy_short
repeat_five_standalone_emit_copy_short:
LEAQ -65536(DX), DX
MOVQ DX, CX
MOVW $0x001d, (AX)
MOVW DX, 2(AX)
SARQ $0x10, CX
MOVB CL, 4(AX)
ADDQ $0x05, BX
ADDQ $0x05, AX
JMP gen_emit_copy_end
repeat_four_standalone_emit_copy_short:
LEAQ -256(DX), DX
MOVW $0x0019, (AX)
MOVW DX, 2(AX)
ADDQ $0x04, BX
ADDQ $0x04, AX
JMP gen_emit_copy_end
repeat_three_standalone_emit_copy_short:
LEAQ -4(DX), DX
MOVW $0x0015, (AX)
MOVB DL, 2(AX)
ADDQ $0x03, BX
ADDQ $0x03, AX
JMP gen_emit_copy_end
repeat_two_standalone_emit_copy_short:
SHLL $0x02, DX
ORL $0x01, DX
MOVW DX, (AX)
ADDQ $0x02, BX
ADDQ $0x02, AX
JMP gen_emit_copy_end
repeat_two_offset_standalone_emit_copy_short:
XORQ SI, SI
LEAQ 1(SI)(DX*4), DX
MOVB CL, 1(AX)
SARL $0x08, CX
SHLL $0x05, CX
ORL CX, DX
MOVB DL, (AX)
ADDQ $0x02, BX
ADDQ $0x02, AX
JMP gen_emit_copy_end
two_byte_offset_short_standalone:
CMPL DX, $0x0c
JGE emit_copy_three_standalone
CMPL CX, $0x00000800
JGE emit_copy_three_standalone
MOVB $0x01, SI
LEAQ -16(SI)(DX*4), DX
MOVB CL, 1(AX)
SHRL $0x08, CX
SHLL $0x05, CX
ORL CX, DX
MOVB DL, (AX)
ADDQ $0x02, BX
ADDQ $0x02, AX
JMP gen_emit_copy_end
emit_copy_three_standalone:
MOVB $0x02, SI
LEAQ -4(SI)(DX*4), DX
MOVB DL, (AX)
MOVW CX, 1(AX)
ADDQ $0x03, BX
ADDQ $0x03, AX
gen_emit_copy_end:
MOVQ BX, ret+40(FP)
RET
// func matchLen(a []byte, b []byte) int
TEXT ·matchLen(SB), NOSPLIT, $0-56
MOVQ a_base+0(FP), AX
MOVQ b_base+24(FP), CX
MOVQ a_len+8(FP), DX
XORQ SI, SI
CMPQ DX, $0x08
JL matchlen_single_standalone
matchlen_loopback_standalone:
MOVQ (AX)(SI*1), BX
XORQ (CX)(SI*1), BX
TESTQ BX, BX
JZ matchlen_loop_standalone
BSFQ BX, BX
SARQ $0x03, BX
LEAQ (SI)(BX*1), SI
JMP gen_match_len_end
matchlen_loop_standalone:
LEAQ -8(DX), DX
LEAQ 8(SI), SI
CMPQ DX, $0x08
JGE matchlen_loopback_standalone
matchlen_single_standalone:
TESTQ DX, DX
JZ gen_match_len_end
matchlen_single_loopback_standalone:
MOVB (AX)(SI*1), BL
CMPB (CX)(SI*1), BL
JNE gen_match_len_end
LEAQ 1(SI), SI
DECQ DX
JNZ matchlen_single_loopback_standalone
gen_match_len_end:
MOVQ SI, ret+48(FP)
RET