9826 lines
243 KiB
ArmAsm
9826 lines
243 KiB
ArmAsm
// Code generated by command: go run asm.go -out allocfail.s -stubs stubs.go. DO NOT EDIT.
|
|
|
|
// +build !appengine
|
|
// +build !noasm
|
|
// +build gc
|
|
|
|
#include "textflag.h"
|
|
|
|
// func encodeBlockAsm(dst []byte, src []byte) int
|
|
// Requires: SSE2
|
|
TEXT ·encodeBlockAsm(SB), $65568-56
|
|
MOVQ $0x00000200, AX
|
|
LEAQ 32(SP), CX
|
|
PXOR X0, X0
|
|
|
|
zero_loop_encodeBlockAsm:
|
|
MOVOU X0, (CX)
|
|
MOVOU X0, 16(CX)
|
|
MOVOU X0, 32(CX)
|
|
MOVOU X0, 48(CX)
|
|
MOVOU X0, 64(CX)
|
|
MOVOU X0, 80(CX)
|
|
MOVOU X0, 96(CX)
|
|
MOVOU X0, 112(CX)
|
|
ADDQ $0x80, CX
|
|
DECQ AX
|
|
JNZ zero_loop_encodeBlockAsm
|
|
MOVL AX, 20(SP)
|
|
MOVQ src_len+32(FP), AX
|
|
LEAQ -5(AX), CX
|
|
LEAQ -8(AX), BX
|
|
SHRQ $0x05, AX
|
|
SUBL AX, CX
|
|
MOVL BX, 16(SP)
|
|
MOVQ dst_base+0(FP), AX
|
|
MOVQ AX, 8(SP)
|
|
LEAQ (AX)(CX*1), CX
|
|
MOVQ CX, (SP)
|
|
MOVL $0x00000001, AX
|
|
MOVL AX, 24(SP)
|
|
MOVQ src_base+24(FP), CX
|
|
|
|
search_loop_encodeBlockAsm:
|
|
MOVQ (CX)(AX*1), SI
|
|
MOVL AX, BX
|
|
SUBL 20(SP), BX
|
|
SHRL $0x06, BX
|
|
LEAQ 4(AX)(BX*1), BX
|
|
MOVL 16(SP), DI
|
|
CMPL BX, DI
|
|
JGT emit_remainder_encodeBlockAsm
|
|
MOVL BX, 28(SP)
|
|
MOVQ $0x0000cf1bbcdcbf9b, BX
|
|
MOVQ SI, R8
|
|
MOVQ SI, R9
|
|
SHRQ $0x08, R9
|
|
SHLQ $0x10, R8
|
|
IMULQ BX, R8
|
|
SHRQ $0x30, R8
|
|
SHLQ $0x10, R9
|
|
IMULQ BX, R9
|
|
SHRQ $0x30, R9
|
|
MOVL 32(SP)(R8*1), BX
|
|
MOVL 32(SP)(R9*1), DI
|
|
MOVL AX, 32(SP)(R8*1)
|
|
LEAL 1(AX), R8
|
|
MOVL R8, 32(SP)(R9*1)
|
|
MOVL AX, R8
|
|
SUBL 24(SP), R8
|
|
MOVL 1(CX)(R8*1), R10
|
|
MOVQ SI, R9
|
|
SHLQ $0x08, R9
|
|
CMPL R9, R10
|
|
JNE no_repeat_found_encodeBlockAsm
|
|
LEAQ 1(AX), SI
|
|
MOVL 20(SP), BX
|
|
TESTL R8, R8
|
|
JZ repeat_extend_back_end_encodeBlockAsm
|
|
|
|
repeat_extend_back_loop_encodeBlockAsm:
|
|
CMPL SI, BX
|
|
JG repeat_extend_back_end_encodeBlockAsm
|
|
MOVB -1(CX)(R8*1), DL
|
|
MOVB -1(CX)(SI*1), DI
|
|
CMPB DL, DI
|
|
JNE repeat_extend_back_end_encodeBlockAsm
|
|
LEAQ -1(SI), SI
|
|
DECL R8
|
|
JZ repeat_extend_back_end_encodeBlockAsm
|
|
JMP repeat_extend_back_loop_encodeBlockAsm
|
|
|
|
repeat_extend_back_end_encodeBlockAsm:
|
|
MOVL 20(SP), BX
|
|
CMPL BX, SI
|
|
JEQ emit_literal_skip_repeat_emit_encodeBlockAsm
|
|
MOVL SI, DI
|
|
MOVL SI, 20(SP)
|
|
LEAQ (CX)(BX*1), R8
|
|
SUBL BX, DI
|
|
MOVQ dst_base+0(FP), BX
|
|
MOVQ DI, R9
|
|
SUBL $0x01, R9
|
|
JC emit_literal_done_repeat_emit_encodeBlockAsm
|
|
CMPL R9, $0x3c
|
|
JLT one_byte_repeat_emit_encodeBlockAsm
|
|
CMPL R9, $0x00000100
|
|
JLT two_bytes_repeat_emit_encodeBlockAsm
|
|
CMPL R9, $0x00010000
|
|
JLT three_bytes_repeat_emit_encodeBlockAsm
|
|
CMPL R9, $0x01000000
|
|
JLT four_bytes_repeat_emit_encodeBlockAsm
|
|
MOVB $0xfc, (BX)
|
|
MOVL R9, 1(BX)
|
|
ADDQ $0x05, BX
|
|
JMP memmove_repeat_emit_encodeBlockAsm
|
|
|
|
four_bytes_repeat_emit_encodeBlockAsm:
|
|
MOVQ R9, R10
|
|
SHRL $0x10, R10
|
|
MOVB $0xf8, (BX)
|
|
MOVW R9, 1(BX)
|
|
MOVB R10, 3(BX)
|
|
ADDQ $0x04, BX
|
|
JMP memmove_repeat_emit_encodeBlockAsm
|
|
|
|
three_bytes_repeat_emit_encodeBlockAsm:
|
|
MOVB $0xf4, (BX)
|
|
MOVW R9, 1(BX)
|
|
ADDQ $0x03, BX
|
|
JMP memmove_repeat_emit_encodeBlockAsm
|
|
|
|
two_bytes_repeat_emit_encodeBlockAsm:
|
|
MOVB $0xf0, (BX)
|
|
MOVB R9, 1(BX)
|
|
ADDQ $0x02, BX
|
|
JMP memmove_repeat_emit_encodeBlockAsm
|
|
|
|
one_byte_repeat_emit_encodeBlockAsm:
|
|
SHLB $0x02, R9
|
|
MOVB R9, (BX)
|
|
ADDQ $0x01, BX
|
|
|
|
memmove_repeat_emit_encodeBlockAsm:
|
|
LEAQ (BX)(DI*1), R9
|
|
NOP
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_tail:
|
|
TESTQ DI, DI
|
|
JEQ emit_literal_done_repeat_emit_encodeBlockAsm
|
|
CMPQ DI, $0x02
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_1or2
|
|
CMPQ DI, $0x04
|
|
JB emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_3
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_4
|
|
CMPQ DI, $0x08
|
|
JB emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_5through7
|
|
JE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8
|
|
CMPQ DI, $0x10
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_9through16
|
|
CMPQ DI, $0x20
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_17through32
|
|
CMPQ DI, $0x40
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_33through64
|
|
CMPQ DI, $0x80
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_65through128
|
|
CMPQ DI, $0x00000100
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_129through256
|
|
JMP emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_256through2048
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_1or2:
|
|
MOVB (R8), R9
|
|
MOVB -1(R8)(DI*1), R8
|
|
MOVB R9, (BX)
|
|
MOVB R8, -1(BX)(DI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_4:
|
|
MOVL (R8), R9
|
|
MOVL R9, (BX)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_3:
|
|
MOVW (R8), R9
|
|
MOVB 2(R8), R8
|
|
MOVW R9, (BX)
|
|
MOVB R8, 2(BX)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_5through7:
|
|
MOVL (R8), R9
|
|
MOVL -4(R8)(DI*1), R8
|
|
MOVL R9, (BX)
|
|
MOVL R8, -4(BX)(DI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8:
|
|
MOVQ (R8), R9
|
|
MOVQ R9, (BX)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_9through16:
|
|
MOVQ (R8), R9
|
|
MOVQ -8(R8)(DI*1), R8
|
|
MOVQ R9, (BX)
|
|
MOVQ R8, -8(BX)(DI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_17through32:
|
|
MOVOU (R8), X0
|
|
MOVOU -16(R8)(DI*1), X1
|
|
MOVOU X0, (BX)
|
|
MOVOU X1, -16(BX)(DI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_33through64:
|
|
MOVOU (R8), X0
|
|
MOVOU 16(R8), X1
|
|
MOVOU -32(R8)(DI*1), X2
|
|
MOVOU -16(R8)(DI*1), X3
|
|
MOVOU X0, (BX)
|
|
MOVOU X1, 16(BX)
|
|
MOVOU X2, -32(BX)(DI*1)
|
|
MOVOU X3, -16(BX)(DI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_65through128:
|
|
MOVOU (R8), X0
|
|
MOVOU 16(R8), X1
|
|
MOVOU 32(R8), X2
|
|
MOVOU 48(R8), X3
|
|
MOVOU -64(R8)(DI*1), X12
|
|
MOVOU -48(R8)(DI*1), X13
|
|
MOVOU -32(R8)(DI*1), X14
|
|
MOVOU -16(R8)(DI*1), X15
|
|
MOVOU X0, (BX)
|
|
MOVOU X1, 16(BX)
|
|
MOVOU X2, 32(BX)
|
|
MOVOU X3, 48(BX)
|
|
MOVOU X12, -64(BX)(DI*1)
|
|
MOVOU X13, -48(BX)(DI*1)
|
|
MOVOU X14, -32(BX)(DI*1)
|
|
MOVOU X15, -16(BX)(DI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_129through256:
|
|
MOVOU (R8), X0
|
|
MOVOU 16(R8), X1
|
|
MOVOU 32(R8), X2
|
|
MOVOU 48(R8), X3
|
|
MOVOU 64(R8), X4
|
|
MOVOU 80(R8), X5
|
|
MOVOU 96(R8), X6
|
|
MOVOU 112(R8), X7
|
|
MOVOU -128(R8)(DI*1), X8
|
|
MOVOU -112(R8)(DI*1), X9
|
|
MOVOU -96(R8)(DI*1), X10
|
|
MOVOU -80(R8)(DI*1), X11
|
|
MOVOU -64(R8)(DI*1), X12
|
|
MOVOU -48(R8)(DI*1), X13
|
|
MOVOU -32(R8)(DI*1), X14
|
|
MOVOU -16(R8)(DI*1), X15
|
|
MOVOU X0, (BX)
|
|
MOVOU X1, 16(BX)
|
|
MOVOU X2, 32(BX)
|
|
MOVOU X3, 48(BX)
|
|
MOVOU X4, 64(BX)
|
|
MOVOU X5, 80(BX)
|
|
MOVOU X6, 96(BX)
|
|
MOVOU X7, 112(BX)
|
|
MOVOU X8, -128(BX)(DI*1)
|
|
MOVOU X9, -112(BX)(DI*1)
|
|
MOVOU X10, -96(BX)(DI*1)
|
|
MOVOU X11, -80(BX)(DI*1)
|
|
MOVOU X12, -64(BX)(DI*1)
|
|
MOVOU X13, -48(BX)(DI*1)
|
|
MOVOU X14, -32(BX)(DI*1)
|
|
MOVOU X15, -16(BX)(DI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_256through2048:
|
|
LEAQ -256(DI), DI
|
|
MOVOU (R8), X0
|
|
MOVOU 16(R8), X1
|
|
MOVOU 32(R8), X2
|
|
MOVOU 48(R8), X3
|
|
MOVOU 64(R8), X4
|
|
MOVOU 80(R8), X5
|
|
MOVOU 96(R8), X6
|
|
MOVOU 112(R8), X7
|
|
MOVOU 128(R8), X8
|
|
MOVOU 144(R8), X9
|
|
MOVOU 160(R8), X10
|
|
MOVOU 176(R8), X11
|
|
MOVOU 192(R8), X12
|
|
MOVOU 208(R8), X13
|
|
MOVOU 224(R8), X14
|
|
MOVOU 240(R8), X15
|
|
MOVOU X0, (BX)
|
|
MOVOU X1, 16(BX)
|
|
MOVOU X2, 32(BX)
|
|
MOVOU X3, 48(BX)
|
|
MOVOU X4, 64(BX)
|
|
MOVOU X5, 80(BX)
|
|
MOVOU X6, 96(BX)
|
|
MOVOU X7, 112(BX)
|
|
MOVOU X8, 128(BX)
|
|
MOVOU X9, 144(BX)
|
|
MOVOU X10, 160(BX)
|
|
MOVOU X11, 176(BX)
|
|
MOVOU X12, 192(BX)
|
|
MOVOU X13, 208(BX)
|
|
MOVOU X14, 224(BX)
|
|
MOVOU X15, 240(BX)
|
|
CMPQ DI, $0x00000100
|
|
LEAQ 256(R8), R8
|
|
LEAQ 256(BX), BX
|
|
JGE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_256through2048
|
|
JMP emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_tail
|
|
MOVQ R9, BX
|
|
|
|
emit_literal_done_repeat_emit_encodeBlockAsm:
|
|
MOVQ BX, dst_base+0(FP)
|
|
|
|
emit_literal_skip_repeat_emit_encodeBlockAsm:
|
|
ADDL $0x05, AX
|
|
MOVL AX, BX
|
|
SUBL 24(SP), BX
|
|
MOVL 16(SP), BX
|
|
SUBL AX, BX
|
|
XORQ R8, R8
|
|
CMPQ BX, $0x08
|
|
JL matchlen_single_repeat_extend
|
|
|
|
matchlen_loopback_repeat_extend:
|
|
MOVQ (CX)(R8*1), DI
|
|
XORQ (CX)(R8*1), DI
|
|
TESTQ DI, DI
|
|
JZ matchlen_loop_repeat_extend
|
|
BSFQ DI, DI
|
|
SARQ $0x03, DI
|
|
LEAQ (R8)(DI*1), R8
|
|
JMP repeat_extend_forward_end_encodeBlockAsm
|
|
|
|
matchlen_loop_repeat_extend:
|
|
LEAQ -8(BX), BX
|
|
LEAQ 8(R8), R8
|
|
CMPQ BX, $0x08
|
|
JGE matchlen_loopback_repeat_extend
|
|
|
|
matchlen_single_repeat_extend:
|
|
TESTQ BX, BX
|
|
JZ repeat_extend_forward_end_encodeBlockAsm
|
|
|
|
matchlen_single_loopback_repeat_extend:
|
|
MOVB (CX)(R8*1), DI
|
|
CMPB (CX)(R8*1), DI
|
|
JNE repeat_extend_forward_end_encodeBlockAsm
|
|
LEAQ 1(R8), R8
|
|
DECQ BX
|
|
JNZ matchlen_single_loopback_repeat_extend
|
|
|
|
repeat_extend_forward_end_encodeBlockAsm:
|
|
ADDL R8, AX
|
|
MOVL AX, BX
|
|
SUBL SI, BX
|
|
MOVL 24(SP), SI
|
|
MOVQ dst_base+0(FP), DI
|
|
MOVL 20(SP), R8
|
|
TESTL R8, R8
|
|
JZ repeat_as_copy_encodeBlockAsm
|
|
|
|
emit_repeat_again_match_repeat_:
|
|
MOVQ BX, R8
|
|
LEAQ -4(BX), BX
|
|
CMPL R8, $0x08
|
|
JLE repeat_two_match_repeat_
|
|
CMPL R8, $0x0c
|
|
JGE cant_repeat_two_offset_match_repeat_
|
|
CMPL SI, $0x00000800
|
|
JLT repeat_two_offset_match_repeat_
|
|
|
|
cant_repeat_two_offset_match_repeat_:
|
|
CMPL BX, $0x00000104
|
|
JLT repeat_three_match_repeat_
|
|
CMPL BX, $0x00010100
|
|
JLT repeat_four_match_repeat_
|
|
CMPL BX, $0x0100ffff
|
|
JLT repeat_five_match_repeat_
|
|
LEAQ -16842747(BX), BX
|
|
MOVW $0x001d, (DI)
|
|
MOVW $0xfffb, 2(DI)
|
|
MOVB $0xff, 4(DI)
|
|
ADDQ $0x05, DI
|
|
JMP emit_repeat_again_match_repeat_
|
|
|
|
repeat_five_match_repeat_:
|
|
LEAQ -65536(BX), BX
|
|
MOVQ BX, SI
|
|
MOVW $0x001d, (DI)
|
|
MOVW BX, 2(DI)
|
|
SARQ $0x10, SI
|
|
MOVB SI, 4(DI)
|
|
ADDQ $0x05, DI
|
|
JMP repeat_end_emit_encodeBlockAsm
|
|
|
|
repeat_four_match_repeat_:
|
|
LEAQ -256(BX), BX
|
|
MOVW $0x0019, (DI)
|
|
MOVW BX, 2(DI)
|
|
ADDQ $0x04, DI
|
|
JMP repeat_end_emit_encodeBlockAsm
|
|
|
|
repeat_three_match_repeat_:
|
|
LEAQ -4(BX), BX
|
|
MOVW $0x0015, (DI)
|
|
MOVB BL, 2(DI)
|
|
ADDQ $0x03, DI
|
|
JMP repeat_end_emit_encodeBlockAsm
|
|
|
|
repeat_two_match_repeat_:
|
|
SHLL $0x02, BX
|
|
ORL $0x01, BX
|
|
MOVW BX, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP repeat_end_emit_encodeBlockAsm
|
|
|
|
repeat_two_offset_match_repeat_:
|
|
XORQ R8, R8
|
|
LEAQ 1(R8)(BX*4), BX
|
|
MOVB SI, 1(DI)
|
|
SARL $0x08, SI
|
|
SHLL $0x05, SI
|
|
ORL SI, BX
|
|
MOVB BL, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP repeat_end_emit_encodeBlockAsm
|
|
|
|
repeat_as_copy_encodeBlockAsm:
|
|
CMPL SI, $0x00010000
|
|
JL two_byte_offset_repeat_as_copy_encodeBlockAsm
|
|
CMPL BX, $0x40
|
|
JLE four_bytes_remain_repeat_as_copy_encodeBlockAsm
|
|
MOVB $0xff, (DI)
|
|
MOVD SI, 1(DI)
|
|
LEAQ -64(BX), BX
|
|
ADDQ $0x05, DI
|
|
CMPL BX, $0x04
|
|
JL four_bytes_remain_repeat_as_copy_encodeBlockAsm
|
|
|
|
emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy:
|
|
MOVQ BX, R8
|
|
LEAQ -4(BX), BX
|
|
CMPL R8, $0x08
|
|
JLE repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy
|
|
CMPL R8, $0x0c
|
|
JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy
|
|
CMPL SI, $0x00000800
|
|
JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy
|
|
|
|
cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy:
|
|
CMPL BX, $0x00000104
|
|
JLT repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy
|
|
CMPL BX, $0x00010100
|
|
JLT repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy
|
|
CMPL BX, $0x0100ffff
|
|
JLT repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy
|
|
LEAQ -16842747(BX), BX
|
|
MOVW $0x001d, (DI)
|
|
MOVW $0xfffb, 2(DI)
|
|
MOVB $0xff, 4(DI)
|
|
ADDQ $0x05, DI
|
|
JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy
|
|
|
|
repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy:
|
|
LEAQ -65536(BX), BX
|
|
MOVQ BX, SI
|
|
MOVW $0x001d, (DI)
|
|
MOVW BX, 2(DI)
|
|
SARQ $0x10, SI
|
|
MOVB SI, 4(DI)
|
|
ADDQ $0x05, DI
|
|
JMP repeat_end_emit_encodeBlockAsm
|
|
|
|
repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy:
|
|
LEAQ -256(BX), BX
|
|
MOVW $0x0019, (DI)
|
|
MOVW BX, 2(DI)
|
|
ADDQ $0x04, DI
|
|
JMP repeat_end_emit_encodeBlockAsm
|
|
|
|
repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy:
|
|
LEAQ -4(BX), BX
|
|
MOVW $0x0015, (DI)
|
|
MOVB BL, 2(DI)
|
|
ADDQ $0x03, DI
|
|
JMP repeat_end_emit_encodeBlockAsm
|
|
|
|
repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy:
|
|
SHLL $0x02, BX
|
|
ORL $0x01, BX
|
|
MOVW BX, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP repeat_end_emit_encodeBlockAsm
|
|
|
|
repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy:
|
|
XORQ R8, R8
|
|
LEAQ 1(R8)(BX*4), BX
|
|
MOVB SI, 1(DI)
|
|
SARL $0x08, SI
|
|
SHLL $0x05, SI
|
|
ORL SI, BX
|
|
MOVB BL, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP repeat_end_emit_encodeBlockAsm
|
|
|
|
four_bytes_remain_repeat_as_copy_encodeBlockAsm:
|
|
TESTL BX, BX
|
|
JZ repeat_end_emit_encodeBlockAsm
|
|
MOVB $0x03, DL
|
|
LEAQ -4(DX)(BX*4), BX
|
|
MOVB BL, (DI)
|
|
MOVD SI, 1(DI)
|
|
ADDQ $0x05, DI
|
|
JMP repeat_end_emit_encodeBlockAsm
|
|
|
|
two_byte_offset_repeat_as_copy_encodeBlockAsm:
|
|
CMPL BX, $0x40
|
|
JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm
|
|
MOVB $0xee, (DI)
|
|
MOVW SI, 1(DI)
|
|
LEAQ -60(BX), BX
|
|
ADDQ $0x03, DI
|
|
|
|
emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short:
|
|
MOVQ BX, R8
|
|
LEAQ -4(BX), BX
|
|
CMPL R8, $0x08
|
|
JLE repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short
|
|
CMPL R8, $0x0c
|
|
JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short
|
|
CMPL SI, $0x00000800
|
|
JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short
|
|
|
|
cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short:
|
|
CMPL BX, $0x00000104
|
|
JLT repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short
|
|
CMPL BX, $0x00010100
|
|
JLT repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short
|
|
CMPL BX, $0x0100ffff
|
|
JLT repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short
|
|
LEAQ -16842747(BX), BX
|
|
MOVW $0x001d, (DI)
|
|
MOVW $0xfffb, 2(DI)
|
|
MOVB $0xff, 4(DI)
|
|
ADDQ $0x05, DI
|
|
JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short
|
|
|
|
repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short:
|
|
LEAQ -65536(BX), BX
|
|
MOVQ BX, SI
|
|
MOVW $0x001d, (DI)
|
|
MOVW BX, 2(DI)
|
|
SARQ $0x10, SI
|
|
MOVB SI, 4(DI)
|
|
ADDQ $0x05, DI
|
|
JMP repeat_end_emit_encodeBlockAsm
|
|
|
|
repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short:
|
|
LEAQ -256(BX), BX
|
|
MOVW $0x0019, (DI)
|
|
MOVW BX, 2(DI)
|
|
ADDQ $0x04, DI
|
|
JMP repeat_end_emit_encodeBlockAsm
|
|
|
|
repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short:
|
|
LEAQ -4(BX), BX
|
|
MOVW $0x0015, (DI)
|
|
MOVB BL, 2(DI)
|
|
ADDQ $0x03, DI
|
|
JMP repeat_end_emit_encodeBlockAsm
|
|
|
|
repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short:
|
|
SHLL $0x02, BX
|
|
ORL $0x01, BX
|
|
MOVW BX, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP repeat_end_emit_encodeBlockAsm
|
|
|
|
repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short:
|
|
XORQ R8, R8
|
|
LEAQ 1(R8)(BX*4), BX
|
|
MOVB SI, 1(DI)
|
|
SARL $0x08, SI
|
|
SHLL $0x05, SI
|
|
ORL SI, BX
|
|
MOVB BL, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP repeat_end_emit_encodeBlockAsm
|
|
|
|
two_byte_offset_short_repeat_as_copy_encodeBlockAsm:
|
|
CMPL BX, $0x0c
|
|
JGE emit_copy_three_repeat_as_copy_encodeBlockAsm
|
|
CMPL SI, $0x00000800
|
|
JGE emit_copy_three_repeat_as_copy_encodeBlockAsm
|
|
MOVB $0x01, DL
|
|
LEAQ -16(DX)(BX*4), BX
|
|
MOVB SI, 1(DI)
|
|
SHRL $0x08, SI
|
|
SHLL $0x05, SI
|
|
ORL SI, BX
|
|
MOVB BL, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP repeat_end_emit_encodeBlockAsm
|
|
|
|
emit_copy_three_repeat_as_copy_encodeBlockAsm:
|
|
MOVB $0x02, DL
|
|
LEAQ -4(DX)(BX*4), BX
|
|
MOVB BL, (DI)
|
|
MOVW SI, 1(DI)
|
|
ADDQ $0x03, DI
|
|
|
|
repeat_end_emit_encodeBlockAsm:
|
|
MOVQ DI, dst_base+0(FP)
|
|
MOVL 16(SP), BX
|
|
CMPL AX, BX
|
|
JGT emit_remainder_encodeBlockAsm
|
|
JMP search_loop_encodeBlockAsm
|
|
|
|
no_repeat_found_encodeBlockAsm:
|
|
MOVQ $0x0000cf1bbcdcbf9b, R9
|
|
MOVQ SI, R8
|
|
SHRQ $0x10, R8
|
|
SHLQ $0x10, R8
|
|
IMULQ R9, R8
|
|
SHRQ $0x30, R8
|
|
CMPL (CX)(BX*1), SI
|
|
SHRQ $0x08, SI
|
|
JEQ candidate_match_encodeBlockAsm
|
|
MOVL 32(SP)(R8*1), BX
|
|
CMPL (CX)(DI*1), SI
|
|
JEQ candidate2_match_encodeBlockAsm
|
|
LEAQ 2(AX), DI
|
|
MOVL DI, 32(SP)(R8*1)
|
|
SHRQ $0x08, SI
|
|
CMPL (CX)(BX*1), SI
|
|
JEQ candidate3_match_encodeBlockAsm
|
|
MOVL 28(SP), AX
|
|
JMP search_loop_encodeBlockAsm
|
|
|
|
candidate3_match_encodeBlockAsm:
|
|
ADDL $0x02, AX
|
|
JMP candidate_match_encodeBlockAsm
|
|
|
|
candidate2_match_encodeBlockAsm:
|
|
LEAQ -2(AX), BX
|
|
MOVL BX, 32(SP)(R8*1)
|
|
INCL AX
|
|
MOVL DI, BX
|
|
|
|
candidate_match_encodeBlockAsm:
|
|
MOVL 20(SP), SI
|
|
TESTL BX, BX
|
|
JZ match_extend_back_end_encodeBlockAsm
|
|
|
|
match_extend_back_loop_encodeBlockAsm:
|
|
CMPL AX, SI
|
|
JG match_extend_back_end_encodeBlockAsm
|
|
MOVB -1(CX)(BX*1), DL
|
|
MOVB -1(CX)(AX*1), DI
|
|
CMPB DL, DI
|
|
JNE match_extend_back_end_encodeBlockAsm
|
|
LEAL -1(AX), AX
|
|
DECL BX
|
|
JZ match_extend_back_end_encodeBlockAsm
|
|
JMP match_extend_back_loop_encodeBlockAsm
|
|
|
|
match_extend_back_end_encodeBlockAsm:
|
|
MOVL AX, SI
|
|
SUBL 20(SP), SI
|
|
LEAQ dst_base+0(FP)(SI*1), SI
|
|
CMPQ SI, (SP)
|
|
JL match_dst_size_check_encodeBlockAsm
|
|
MOVQ $0x00000000, ret+48(FP)
|
|
RET
|
|
|
|
match_dst_size_check_encodeBlockAsm:
|
|
MOVL BX, SI
|
|
MOVL 20(SP), DI
|
|
CMPL DI, SI
|
|
JEQ emit_literal_skip_match_emit_encodeBlockAsm
|
|
MOVL SI, R8
|
|
MOVL SI, 20(SP)
|
|
LEAQ (CX)(DI*1), SI
|
|
SUBL DI, R8
|
|
MOVQ dst_base+0(FP), DI
|
|
MOVQ R8, R9
|
|
SUBL $0x01, R9
|
|
JC emit_literal_done_match_emit_encodeBlockAsm
|
|
CMPL R9, $0x3c
|
|
JLT one_byte_match_emit_encodeBlockAsm
|
|
CMPL R9, $0x00000100
|
|
JLT two_bytes_match_emit_encodeBlockAsm
|
|
CMPL R9, $0x00010000
|
|
JLT three_bytes_match_emit_encodeBlockAsm
|
|
CMPL R9, $0x01000000
|
|
JLT four_bytes_match_emit_encodeBlockAsm
|
|
MOVB $0xfc, (DI)
|
|
MOVL R9, 1(DI)
|
|
ADDQ $0x05, DI
|
|
JMP memmove_match_emit_encodeBlockAsm
|
|
|
|
four_bytes_match_emit_encodeBlockAsm:
|
|
MOVQ R9, R10
|
|
SHRL $0x10, R10
|
|
MOVB $0xf8, (DI)
|
|
MOVW R9, 1(DI)
|
|
MOVB R10, 3(DI)
|
|
ADDQ $0x04, DI
|
|
JMP memmove_match_emit_encodeBlockAsm
|
|
|
|
three_bytes_match_emit_encodeBlockAsm:
|
|
MOVB $0xf4, (DI)
|
|
MOVW R9, 1(DI)
|
|
ADDQ $0x03, DI
|
|
JMP memmove_match_emit_encodeBlockAsm
|
|
|
|
two_bytes_match_emit_encodeBlockAsm:
|
|
MOVB $0xf0, (DI)
|
|
MOVB R9, 1(DI)
|
|
ADDQ $0x02, DI
|
|
JMP memmove_match_emit_encodeBlockAsm
|
|
|
|
one_byte_match_emit_encodeBlockAsm:
|
|
SHLB $0x02, R9
|
|
MOVB R9, (DI)
|
|
ADDQ $0x01, DI
|
|
|
|
memmove_match_emit_encodeBlockAsm:
|
|
LEAQ (DI)(R8*1), R9
|
|
NOP
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm_memmove_tail:
|
|
TESTQ R8, R8
|
|
JEQ emit_literal_done_match_emit_encodeBlockAsm
|
|
CMPQ R8, $0x02
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_1or2
|
|
CMPQ R8, $0x04
|
|
JB emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_3
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_4
|
|
CMPQ R8, $0x08
|
|
JB emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_5through7
|
|
JE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8
|
|
CMPQ R8, $0x10
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_9through16
|
|
CMPQ R8, $0x20
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_17through32
|
|
CMPQ R8, $0x40
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_33through64
|
|
CMPQ R8, $0x80
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_65through128
|
|
CMPQ R8, $0x00000100
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_129through256
|
|
JMP emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_256through2048
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_1or2:
|
|
MOVB (SI), R9
|
|
MOVB -1(SI)(R8*1), SI
|
|
MOVB R9, (DI)
|
|
MOVB SI, -1(DI)(R8*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_4:
|
|
MOVL (SI), R9
|
|
MOVL R9, (DI)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_3:
|
|
MOVW (SI), R9
|
|
MOVB 2(SI), SI
|
|
MOVW R9, (DI)
|
|
MOVB SI, 2(DI)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_5through7:
|
|
MOVL (SI), R9
|
|
MOVL -4(SI)(R8*1), SI
|
|
MOVL R9, (DI)
|
|
MOVL SI, -4(DI)(R8*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8:
|
|
MOVQ (SI), R9
|
|
MOVQ R9, (DI)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_9through16:
|
|
MOVQ (SI), R9
|
|
MOVQ -8(SI)(R8*1), SI
|
|
MOVQ R9, (DI)
|
|
MOVQ SI, -8(DI)(R8*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_17through32:
|
|
MOVOU (SI), X0
|
|
MOVOU -16(SI)(R8*1), X1
|
|
MOVOU X0, (DI)
|
|
MOVOU X1, -16(DI)(R8*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_33through64:
|
|
MOVOU (SI), X0
|
|
MOVOU 16(SI), X1
|
|
MOVOU -32(SI)(R8*1), X2
|
|
MOVOU -16(SI)(R8*1), X3
|
|
MOVOU X0, (DI)
|
|
MOVOU X1, 16(DI)
|
|
MOVOU X2, -32(DI)(R8*1)
|
|
MOVOU X3, -16(DI)(R8*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_65through128:
|
|
MOVOU (SI), X0
|
|
MOVOU 16(SI), X1
|
|
MOVOU 32(SI), X2
|
|
MOVOU 48(SI), X3
|
|
MOVOU -64(SI)(R8*1), X12
|
|
MOVOU -48(SI)(R8*1), X13
|
|
MOVOU -32(SI)(R8*1), X14
|
|
MOVOU -16(SI)(R8*1), X15
|
|
MOVOU X0, (DI)
|
|
MOVOU X1, 16(DI)
|
|
MOVOU X2, 32(DI)
|
|
MOVOU X3, 48(DI)
|
|
MOVOU X12, -64(DI)(R8*1)
|
|
MOVOU X13, -48(DI)(R8*1)
|
|
MOVOU X14, -32(DI)(R8*1)
|
|
MOVOU X15, -16(DI)(R8*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_129through256:
|
|
MOVOU (SI), X0
|
|
MOVOU 16(SI), X1
|
|
MOVOU 32(SI), X2
|
|
MOVOU 48(SI), X3
|
|
MOVOU 64(SI), X4
|
|
MOVOU 80(SI), X5
|
|
MOVOU 96(SI), X6
|
|
MOVOU 112(SI), X7
|
|
MOVOU -128(SI)(R8*1), X8
|
|
MOVOU -112(SI)(R8*1), X9
|
|
MOVOU -96(SI)(R8*1), X10
|
|
MOVOU -80(SI)(R8*1), X11
|
|
MOVOU -64(SI)(R8*1), X12
|
|
MOVOU -48(SI)(R8*1), X13
|
|
MOVOU -32(SI)(R8*1), X14
|
|
MOVOU -16(SI)(R8*1), X15
|
|
MOVOU X0, (DI)
|
|
MOVOU X1, 16(DI)
|
|
MOVOU X2, 32(DI)
|
|
MOVOU X3, 48(DI)
|
|
MOVOU X4, 64(DI)
|
|
MOVOU X5, 80(DI)
|
|
MOVOU X6, 96(DI)
|
|
MOVOU X7, 112(DI)
|
|
MOVOU X8, -128(DI)(R8*1)
|
|
MOVOU X9, -112(DI)(R8*1)
|
|
MOVOU X10, -96(DI)(R8*1)
|
|
MOVOU X11, -80(DI)(R8*1)
|
|
MOVOU X12, -64(DI)(R8*1)
|
|
MOVOU X13, -48(DI)(R8*1)
|
|
MOVOU X14, -32(DI)(R8*1)
|
|
MOVOU X15, -16(DI)(R8*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_256through2048:
|
|
LEAQ -256(R8), R8
|
|
MOVOU (SI), X0
|
|
MOVOU 16(SI), X1
|
|
MOVOU 32(SI), X2
|
|
MOVOU 48(SI), X3
|
|
MOVOU 64(SI), X4
|
|
MOVOU 80(SI), X5
|
|
MOVOU 96(SI), X6
|
|
MOVOU 112(SI), X7
|
|
MOVOU 128(SI), X8
|
|
MOVOU 144(SI), X9
|
|
MOVOU 160(SI), X10
|
|
MOVOU 176(SI), X11
|
|
MOVOU 192(SI), X12
|
|
MOVOU 208(SI), X13
|
|
MOVOU 224(SI), X14
|
|
MOVOU 240(SI), X15
|
|
MOVOU X0, (DI)
|
|
MOVOU X1, 16(DI)
|
|
MOVOU X2, 32(DI)
|
|
MOVOU X3, 48(DI)
|
|
MOVOU X4, 64(DI)
|
|
MOVOU X5, 80(DI)
|
|
MOVOU X6, 96(DI)
|
|
MOVOU X7, 112(DI)
|
|
MOVOU X8, 128(DI)
|
|
MOVOU X9, 144(DI)
|
|
MOVOU X10, 160(DI)
|
|
MOVOU X11, 176(DI)
|
|
MOVOU X12, 192(DI)
|
|
MOVOU X13, 208(DI)
|
|
MOVOU X14, 224(DI)
|
|
MOVOU X15, 240(DI)
|
|
CMPQ R8, $0x00000100
|
|
LEAQ 256(SI), SI
|
|
LEAQ 256(DI), DI
|
|
JGE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_256through2048
|
|
JMP emit_lit_memmove_match_emit_encodeBlockAsm_memmove_tail
|
|
MOVQ R9, DI
|
|
|
|
emit_literal_done_match_emit_encodeBlockAsm:
|
|
MOVQ DI, dst_base+0(FP)
|
|
|
|
emit_literal_skip_match_emit_encodeBlockAsm:
|
|
NOP
|
|
|
|
match_nolit_loop_encodeBlockAsm:
|
|
MOVL AX, SI
|
|
MOVL AX, SI
|
|
SUBL BX, SI
|
|
MOVL SI, 24(SP)
|
|
ADDL $0x04, AX
|
|
ADDL $0x04, BX
|
|
MOVL 16(SP), SI
|
|
SUBL AX, SI
|
|
XORQ R8, R8
|
|
CMPQ SI, $0x08
|
|
JL matchlen_single_match_nolit_encodeBlockAsm
|
|
|
|
matchlen_loopback_match_nolit_encodeBlockAsm:
|
|
MOVQ (CX)(R8*1), DI
|
|
XORQ (CX)(R8*1), DI
|
|
TESTQ DI, DI
|
|
JZ matchlen_loop_match_nolit_encodeBlockAsm
|
|
BSFQ DI, DI
|
|
SARQ $0x03, DI
|
|
LEAQ (R8)(DI*1), R8
|
|
JMP match_nolit_end_encodeBlockAsm
|
|
|
|
matchlen_loop_match_nolit_encodeBlockAsm:
|
|
LEAQ -8(SI), SI
|
|
LEAQ 8(R8), R8
|
|
CMPQ SI, $0x08
|
|
JGE matchlen_loopback_match_nolit_encodeBlockAsm
|
|
|
|
matchlen_single_match_nolit_encodeBlockAsm:
|
|
TESTQ SI, SI
|
|
JZ match_nolit_end_encodeBlockAsm
|
|
|
|
matchlen_single_loopback_match_nolit_encodeBlockAsm:
|
|
MOVB (CX)(R8*1), DI
|
|
CMPB (CX)(R8*1), DI
|
|
JNE match_nolit_end_encodeBlockAsm
|
|
LEAQ 1(R8), R8
|
|
DECQ SI
|
|
JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm
|
|
|
|
match_nolit_end_encodeBlockAsm:
|
|
MOVL 24(SP), SI
|
|
ADDQ $0x04, R8
|
|
MOVQ dst_base+0(FP), DI
|
|
ADDL R8, AX
|
|
CMPL SI, $0x00010000
|
|
JL two_byte_offset_match_nolit_encodeBlockAsm
|
|
CMPL R8, $0x40
|
|
JLE four_bytes_remain_match_nolit_encodeBlockAsm
|
|
MOVB $0xff, (DI)
|
|
MOVD SI, 1(DI)
|
|
LEAQ -64(R8), R8
|
|
ADDQ $0x05, DI
|
|
CMPL R8, $0x04
|
|
JL four_bytes_remain_match_nolit_encodeBlockAsm
|
|
|
|
emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy:
|
|
MOVQ R8, R9
|
|
LEAQ -4(R8), R8
|
|
CMPL R9, $0x08
|
|
JLE repeat_two_match_nolit_encodeBlockAsm_emit_copy
|
|
CMPL R9, $0x0c
|
|
JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy
|
|
CMPL SI, $0x00000800
|
|
JLT repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy
|
|
|
|
cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy:
|
|
CMPL R8, $0x00000104
|
|
JLT repeat_three_match_nolit_encodeBlockAsm_emit_copy
|
|
CMPL R8, $0x00010100
|
|
JLT repeat_four_match_nolit_encodeBlockAsm_emit_copy
|
|
CMPL R8, $0x0100ffff
|
|
JLT repeat_five_match_nolit_encodeBlockAsm_emit_copy
|
|
LEAQ -16842747(R8), R8
|
|
MOVW $0x001d, (DI)
|
|
MOVW $0xfffb, 2(DI)
|
|
MOVB $0xff, 4(DI)
|
|
ADDQ $0x05, DI
|
|
JMP emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy
|
|
|
|
repeat_five_match_nolit_encodeBlockAsm_emit_copy:
|
|
LEAQ -65536(R8), R8
|
|
MOVQ R8, SI
|
|
MOVW $0x001d, (DI)
|
|
MOVW R8, 2(DI)
|
|
SARQ $0x10, SI
|
|
MOVB SI, 4(DI)
|
|
ADDQ $0x05, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm
|
|
|
|
repeat_four_match_nolit_encodeBlockAsm_emit_copy:
|
|
LEAQ -256(R8), R8
|
|
MOVW $0x0019, (DI)
|
|
MOVW R8, 2(DI)
|
|
ADDQ $0x04, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm
|
|
|
|
repeat_three_match_nolit_encodeBlockAsm_emit_copy:
|
|
LEAQ -4(R8), R8
|
|
MOVW $0x0015, (DI)
|
|
MOVB R8, 2(DI)
|
|
ADDQ $0x03, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm
|
|
|
|
repeat_two_match_nolit_encodeBlockAsm_emit_copy:
|
|
SHLL $0x02, R8
|
|
ORL $0x01, R8
|
|
MOVW R8, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm
|
|
|
|
repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy:
|
|
XORQ R9, R9
|
|
LEAQ 1(R9)(R8*4), R8
|
|
MOVB SI, 1(DI)
|
|
SARL $0x08, SI
|
|
SHLL $0x05, SI
|
|
ORL SI, R8
|
|
MOVB R8, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm
|
|
|
|
four_bytes_remain_match_nolit_encodeBlockAsm:
|
|
TESTL R8, R8
|
|
JZ match_nolit_emitcopy_end_encodeBlockAsm
|
|
MOVB $0x03, DL
|
|
LEAQ -4(DX)(R8*4), R8
|
|
MOVB R8, (DI)
|
|
MOVD SI, 1(DI)
|
|
ADDQ $0x05, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm
|
|
|
|
two_byte_offset_match_nolit_encodeBlockAsm:
|
|
CMPL R8, $0x40
|
|
JLE two_byte_offset_short_match_nolit_encodeBlockAsm
|
|
MOVB $0xee, (DI)
|
|
MOVW SI, 1(DI)
|
|
LEAQ -60(R8), R8
|
|
ADDQ $0x03, DI
|
|
|
|
emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short:
|
|
MOVQ R8, R9
|
|
LEAQ -4(R8), R8
|
|
CMPL R9, $0x08
|
|
JLE repeat_two_match_nolit_encodeBlockAsm_emit_copy_short
|
|
CMPL R9, $0x0c
|
|
JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short
|
|
CMPL SI, $0x00000800
|
|
JLT repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short
|
|
|
|
cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short:
|
|
CMPL R8, $0x00000104
|
|
JLT repeat_three_match_nolit_encodeBlockAsm_emit_copy_short
|
|
CMPL R8, $0x00010100
|
|
JLT repeat_four_match_nolit_encodeBlockAsm_emit_copy_short
|
|
CMPL R8, $0x0100ffff
|
|
JLT repeat_five_match_nolit_encodeBlockAsm_emit_copy_short
|
|
LEAQ -16842747(R8), R8
|
|
MOVW $0x001d, (DI)
|
|
MOVW $0xfffb, 2(DI)
|
|
MOVB $0xff, 4(DI)
|
|
ADDQ $0x05, DI
|
|
JMP emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short
|
|
|
|
repeat_five_match_nolit_encodeBlockAsm_emit_copy_short:
|
|
LEAQ -65536(R8), R8
|
|
MOVQ R8, SI
|
|
MOVW $0x001d, (DI)
|
|
MOVW R8, 2(DI)
|
|
SARQ $0x10, SI
|
|
MOVB SI, 4(DI)
|
|
ADDQ $0x05, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm
|
|
|
|
repeat_four_match_nolit_encodeBlockAsm_emit_copy_short:
|
|
LEAQ -256(R8), R8
|
|
MOVW $0x0019, (DI)
|
|
MOVW R8, 2(DI)
|
|
ADDQ $0x04, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm
|
|
|
|
repeat_three_match_nolit_encodeBlockAsm_emit_copy_short:
|
|
LEAQ -4(R8), R8
|
|
MOVW $0x0015, (DI)
|
|
MOVB R8, 2(DI)
|
|
ADDQ $0x03, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm
|
|
|
|
repeat_two_match_nolit_encodeBlockAsm_emit_copy_short:
|
|
SHLL $0x02, R8
|
|
ORL $0x01, R8
|
|
MOVW R8, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm
|
|
|
|
repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short:
|
|
XORQ R9, R9
|
|
LEAQ 1(R9)(R8*4), R8
|
|
MOVB SI, 1(DI)
|
|
SARL $0x08, SI
|
|
SHLL $0x05, SI
|
|
ORL SI, R8
|
|
MOVB R8, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm
|
|
|
|
two_byte_offset_short_match_nolit_encodeBlockAsm:
|
|
CMPL R8, $0x0c
|
|
JGE emit_copy_three_match_nolit_encodeBlockAsm
|
|
CMPL SI, $0x00000800
|
|
JGE emit_copy_three_match_nolit_encodeBlockAsm
|
|
MOVB $0x01, DL
|
|
LEAQ -16(DX)(R8*4), R8
|
|
MOVB SI, 1(DI)
|
|
SHRL $0x08, SI
|
|
SHLL $0x05, SI
|
|
ORL SI, R8
|
|
MOVB R8, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm
|
|
|
|
emit_copy_three_match_nolit_encodeBlockAsm:
|
|
MOVB $0x02, DL
|
|
LEAQ -4(DX)(R8*4), R8
|
|
MOVB R8, (DI)
|
|
MOVW SI, 1(DI)
|
|
ADDQ $0x03, DI
|
|
|
|
match_nolit_emitcopy_end_encodeBlockAsm:
|
|
MOVQ DI, dst_base+0(FP)
|
|
MOVL AX, 20(SP)
|
|
CMPL AX, 16(SP)
|
|
JGE emit_remainder_encodeBlockAsm
|
|
CMPQ DI, (SP)
|
|
JL match_nolit_dst_ok_encodeBlockAsm
|
|
MOVQ $0x00000000, ret+48(FP)
|
|
RET
|
|
|
|
match_nolit_dst_ok_encodeBlockAsm:
|
|
MOVQ -2(CX)(AX*1), SI
|
|
MOVQ $0x0000cf1bbcdcbf9b, DI
|
|
MOVQ SI, R8
|
|
SHRQ $0x10, SI
|
|
MOVQ SI, R9
|
|
SHLQ $0x10, R8
|
|
IMULQ DI, R8
|
|
SHRQ $0x30, R8
|
|
SHLQ $0x10, R9
|
|
IMULQ DI, R9
|
|
SHRQ $0x30, R9
|
|
MOVL 32(SP)(R8*1), DI
|
|
MOVL 32(SP)(R9*1), DI
|
|
LEAQ -2(AX), DI
|
|
MOVL DI, 32(SP)(R8*1)
|
|
MOVL AX, 32(SP)(R9*1)
|
|
CMPL (CX)(R9*1), SI
|
|
JEQ match_nolit_loop_encodeBlockAsm
|
|
INCL AX
|
|
JMP search_loop_encodeBlockAsm
|
|
|
|
emit_remainder_encodeBlockAsm:
|
|
MOVQ src_len+32(FP), AX
|
|
SUBL 20(SP), AX
|
|
MOVQ dst_base+0(FP), DX
|
|
LEAQ (DX)(AX*1), DX
|
|
CMPQ DX, (SP)
|
|
JL emit_remainder_ok_encodeBlockAsm
|
|
MOVQ $0x00000000, ret+48(FP)
|
|
RET
|
|
|
|
emit_remainder_ok_encodeBlockAsm:
|
|
MOVQ src_len+32(FP), AX
|
|
MOVL 20(SP), DX
|
|
CMPL DX, AX
|
|
JEQ emit_literal_skip_emit_remainder_encodeBlockAsm
|
|
MOVL AX, BX
|
|
MOVL AX, 20(SP)
|
|
LEAQ (CX)(DX*1), AX
|
|
SUBL DX, BX
|
|
MOVQ dst_base+0(FP), CX
|
|
MOVQ BX, DX
|
|
SUBL $0x01, DX
|
|
JC emit_literal_done_emit_remainder_encodeBlockAsm
|
|
CMPL DX, $0x3c
|
|
JLT one_byte_emit_remainder_encodeBlockAsm
|
|
CMPL DX, $0x00000100
|
|
JLT two_bytes_emit_remainder_encodeBlockAsm
|
|
CMPL DX, $0x00010000
|
|
JLT three_bytes_emit_remainder_encodeBlockAsm
|
|
CMPL DX, $0x01000000
|
|
JLT four_bytes_emit_remainder_encodeBlockAsm
|
|
MOVB $0xfc, (CX)
|
|
MOVL DX, 1(CX)
|
|
ADDQ $0x05, CX
|
|
JMP memmove_emit_remainder_encodeBlockAsm
|
|
|
|
four_bytes_emit_remainder_encodeBlockAsm:
|
|
MOVQ DX, SI
|
|
SHRL $0x10, SI
|
|
MOVB $0xf8, (CX)
|
|
MOVW DX, 1(CX)
|
|
MOVB SI, 3(CX)
|
|
ADDQ $0x04, CX
|
|
JMP memmove_emit_remainder_encodeBlockAsm
|
|
|
|
three_bytes_emit_remainder_encodeBlockAsm:
|
|
MOVB $0xf4, (CX)
|
|
MOVW DX, 1(CX)
|
|
ADDQ $0x03, CX
|
|
JMP memmove_emit_remainder_encodeBlockAsm
|
|
|
|
two_bytes_emit_remainder_encodeBlockAsm:
|
|
MOVB $0xf0, (CX)
|
|
MOVB DL, 1(CX)
|
|
ADDQ $0x02, CX
|
|
JMP memmove_emit_remainder_encodeBlockAsm
|
|
|
|
one_byte_emit_remainder_encodeBlockAsm:
|
|
SHLB $0x02, DL
|
|
MOVB DL, (CX)
|
|
ADDQ $0x01, CX
|
|
|
|
memmove_emit_remainder_encodeBlockAsm:
|
|
LEAQ (CX)(BX*1), DX
|
|
NOP
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_tail:
|
|
TESTQ BX, BX
|
|
JEQ emit_literal_done_emit_remainder_encodeBlockAsm
|
|
CMPQ BX, $0x02
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_1or2
|
|
CMPQ BX, $0x04
|
|
JB emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_3
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_4
|
|
CMPQ BX, $0x08
|
|
JB emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_5through7
|
|
JE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_8
|
|
CMPQ BX, $0x10
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_9through16
|
|
CMPQ BX, $0x20
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_17through32
|
|
CMPQ BX, $0x40
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_33through64
|
|
CMPQ BX, $0x80
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_65through128
|
|
CMPQ BX, $0x00000100
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_129through256
|
|
JMP emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_256through2048
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_1or2:
|
|
MOVB (AX), DL
|
|
MOVB -1(AX)(BX*1), AL
|
|
MOVB DL, (CX)
|
|
MOVB AL, -1(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_4:
|
|
MOVL (AX), DX
|
|
MOVL DX, (CX)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_3:
|
|
MOVW (AX), DX
|
|
MOVB 2(AX), AL
|
|
MOVW DX, (CX)
|
|
MOVB AL, 2(CX)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_5through7:
|
|
MOVL (AX), DX
|
|
MOVL -4(AX)(BX*1), AX
|
|
MOVL DX, (CX)
|
|
MOVL AX, -4(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_8:
|
|
MOVQ (AX), DX
|
|
MOVQ DX, (CX)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_9through16:
|
|
MOVQ (AX), DX
|
|
MOVQ -8(AX)(BX*1), AX
|
|
MOVQ DX, (CX)
|
|
MOVQ AX, -8(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_17through32:
|
|
MOVOU (AX), X0
|
|
MOVOU -16(AX)(BX*1), X1
|
|
MOVOU X0, (CX)
|
|
MOVOU X1, -16(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_33through64:
|
|
MOVOU (AX), X0
|
|
MOVOU 16(AX), X1
|
|
MOVOU -32(AX)(BX*1), X2
|
|
MOVOU -16(AX)(BX*1), X3
|
|
MOVOU X0, (CX)
|
|
MOVOU X1, 16(CX)
|
|
MOVOU X2, -32(CX)(BX*1)
|
|
MOVOU X3, -16(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_65through128:
|
|
MOVOU (AX), X0
|
|
MOVOU 16(AX), X1
|
|
MOVOU 32(AX), X2
|
|
MOVOU 48(AX), X3
|
|
MOVOU -64(AX)(BX*1), X12
|
|
MOVOU -48(AX)(BX*1), X13
|
|
MOVOU -32(AX)(BX*1), X14
|
|
MOVOU -16(AX)(BX*1), X15
|
|
MOVOU X0, (CX)
|
|
MOVOU X1, 16(CX)
|
|
MOVOU X2, 32(CX)
|
|
MOVOU X3, 48(CX)
|
|
MOVOU X12, -64(CX)(BX*1)
|
|
MOVOU X13, -48(CX)(BX*1)
|
|
MOVOU X14, -32(CX)(BX*1)
|
|
MOVOU X15, -16(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_129through256:
|
|
MOVOU (AX), X0
|
|
MOVOU 16(AX), X1
|
|
MOVOU 32(AX), X2
|
|
MOVOU 48(AX), X3
|
|
MOVOU 64(AX), X4
|
|
MOVOU 80(AX), X5
|
|
MOVOU 96(AX), X6
|
|
MOVOU 112(AX), X7
|
|
MOVOU -128(AX)(BX*1), X8
|
|
MOVOU -112(AX)(BX*1), X9
|
|
MOVOU -96(AX)(BX*1), X10
|
|
MOVOU -80(AX)(BX*1), X11
|
|
MOVOU -64(AX)(BX*1), X12
|
|
MOVOU -48(AX)(BX*1), X13
|
|
MOVOU -32(AX)(BX*1), X14
|
|
MOVOU -16(AX)(BX*1), X15
|
|
MOVOU X0, (CX)
|
|
MOVOU X1, 16(CX)
|
|
MOVOU X2, 32(CX)
|
|
MOVOU X3, 48(CX)
|
|
MOVOU X4, 64(CX)
|
|
MOVOU X5, 80(CX)
|
|
MOVOU X6, 96(CX)
|
|
MOVOU X7, 112(CX)
|
|
MOVOU X8, -128(CX)(BX*1)
|
|
MOVOU X9, -112(CX)(BX*1)
|
|
MOVOU X10, -96(CX)(BX*1)
|
|
MOVOU X11, -80(CX)(BX*1)
|
|
MOVOU X12, -64(CX)(BX*1)
|
|
MOVOU X13, -48(CX)(BX*1)
|
|
MOVOU X14, -32(CX)(BX*1)
|
|
MOVOU X15, -16(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_256through2048:
|
|
LEAQ -256(BX), BX
|
|
MOVOU (AX), X0
|
|
MOVOU 16(AX), X1
|
|
MOVOU 32(AX), X2
|
|
MOVOU 48(AX), X3
|
|
MOVOU 64(AX), X4
|
|
MOVOU 80(AX), X5
|
|
MOVOU 96(AX), X6
|
|
MOVOU 112(AX), X7
|
|
MOVOU 128(AX), X8
|
|
MOVOU 144(AX), X9
|
|
MOVOU 160(AX), X10
|
|
MOVOU 176(AX), X11
|
|
MOVOU 192(AX), X12
|
|
MOVOU 208(AX), X13
|
|
MOVOU 224(AX), X14
|
|
MOVOU 240(AX), X15
|
|
MOVOU X0, (CX)
|
|
MOVOU X1, 16(CX)
|
|
MOVOU X2, 32(CX)
|
|
MOVOU X3, 48(CX)
|
|
MOVOU X4, 64(CX)
|
|
MOVOU X5, 80(CX)
|
|
MOVOU X6, 96(CX)
|
|
MOVOU X7, 112(CX)
|
|
MOVOU X8, 128(CX)
|
|
MOVOU X9, 144(CX)
|
|
MOVOU X10, 160(CX)
|
|
MOVOU X11, 176(CX)
|
|
MOVOU X12, 192(CX)
|
|
MOVOU X13, 208(CX)
|
|
MOVOU X14, 224(CX)
|
|
MOVOU X15, 240(CX)
|
|
CMPQ BX, $0x00000100
|
|
LEAQ 256(AX), AX
|
|
LEAQ 256(CX), CX
|
|
JGE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_256through2048
|
|
JMP emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_tail
|
|
MOVQ DX, CX
|
|
|
|
emit_literal_done_emit_remainder_encodeBlockAsm:
|
|
MOVQ CX, dst_base+0(FP)
|
|
|
|
emit_literal_skip_emit_remainder_encodeBlockAsm:
|
|
MOVQ 8(SP), AX
|
|
SUBQ dst_base+0(FP), AX
|
|
MOVQ AX, ret+48(FP)
|
|
RET
|
|
|
|
// func encodeBlockAsm14B(dst []byte, src []byte) int
|
|
// Requires: SSE2
|
|
TEXT ·encodeBlockAsm14B(SB), $16416-56
|
|
MOVQ $0x00000080, AX
|
|
LEAQ 32(SP), CX
|
|
PXOR X0, X0
|
|
|
|
zero_loop_encodeBlockAsm14B:
|
|
MOVOU X0, (CX)
|
|
MOVOU X0, 16(CX)
|
|
MOVOU X0, 32(CX)
|
|
MOVOU X0, 48(CX)
|
|
MOVOU X0, 64(CX)
|
|
MOVOU X0, 80(CX)
|
|
MOVOU X0, 96(CX)
|
|
MOVOU X0, 112(CX)
|
|
ADDQ $0x80, CX
|
|
DECQ AX
|
|
JNZ zero_loop_encodeBlockAsm14B
|
|
MOVL AX, 20(SP)
|
|
MOVQ src_len+32(FP), AX
|
|
LEAQ -5(AX), CX
|
|
LEAQ -8(AX), BX
|
|
SHRQ $0x05, AX
|
|
SUBL AX, CX
|
|
MOVL BX, 16(SP)
|
|
MOVQ dst_base+0(FP), AX
|
|
MOVQ AX, 8(SP)
|
|
LEAQ (AX)(CX*1), CX
|
|
MOVQ CX, (SP)
|
|
MOVL $0x00000001, AX
|
|
MOVL AX, 24(SP)
|
|
MOVQ src_base+24(FP), CX
|
|
|
|
search_loop_encodeBlockAsm14B:
|
|
MOVQ (CX)(AX*1), SI
|
|
MOVL AX, BX
|
|
SUBL 20(SP), BX
|
|
SHRL $0x05, BX
|
|
LEAQ 4(AX)(BX*1), BX
|
|
MOVL 16(SP), DI
|
|
CMPL BX, DI
|
|
JGT emit_remainder_encodeBlockAsm14B
|
|
MOVL BX, 28(SP)
|
|
MOVQ $0x0000cf1bbcdcbf9b, BX
|
|
MOVQ SI, R8
|
|
MOVQ SI, R9
|
|
SHRQ $0x08, R9
|
|
SHLQ $0x10, R8
|
|
IMULQ BX, R8
|
|
SHRQ $0x32, R8
|
|
SHLQ $0x10, R9
|
|
IMULQ BX, R9
|
|
SHRQ $0x32, R9
|
|
MOVL 32(SP)(R8*1), BX
|
|
MOVL 32(SP)(R9*1), DI
|
|
MOVL AX, 32(SP)(R8*1)
|
|
LEAL 1(AX), R8
|
|
MOVL R8, 32(SP)(R9*1)
|
|
MOVL AX, R8
|
|
SUBL 24(SP), R8
|
|
MOVL 1(CX)(R8*1), R10
|
|
MOVQ SI, R9
|
|
SHLQ $0x08, R9
|
|
CMPL R9, R10
|
|
JNE no_repeat_found_encodeBlockAsm14B
|
|
LEAQ 1(AX), SI
|
|
MOVL 20(SP), BX
|
|
TESTL R8, R8
|
|
JZ repeat_extend_back_end_encodeBlockAsm14B
|
|
|
|
repeat_extend_back_loop_encodeBlockAsm14B:
|
|
CMPL SI, BX
|
|
JG repeat_extend_back_end_encodeBlockAsm14B
|
|
MOVB -1(CX)(R8*1), DL
|
|
MOVB -1(CX)(SI*1), DI
|
|
CMPB DL, DI
|
|
JNE repeat_extend_back_end_encodeBlockAsm14B
|
|
LEAQ -1(SI), SI
|
|
DECL R8
|
|
JZ repeat_extend_back_end_encodeBlockAsm14B
|
|
JMP repeat_extend_back_loop_encodeBlockAsm14B
|
|
|
|
repeat_extend_back_end_encodeBlockAsm14B:
|
|
MOVL 20(SP), BX
|
|
CMPL BX, SI
|
|
JEQ emit_literal_skip_repeat_emit_encodeBlockAsm14B
|
|
MOVL SI, DI
|
|
MOVL SI, 20(SP)
|
|
LEAQ (CX)(BX*1), R8
|
|
SUBL BX, DI
|
|
MOVQ dst_base+0(FP), BX
|
|
MOVQ DI, R9
|
|
SUBL $0x01, R9
|
|
JC emit_literal_done_repeat_emit_encodeBlockAsm14B
|
|
CMPL R9, $0x3c
|
|
JLT one_byte_repeat_emit_encodeBlockAsm14B
|
|
CMPL R9, $0x00000100
|
|
JLT two_bytes_repeat_emit_encodeBlockAsm14B
|
|
CMPL R9, $0x00010000
|
|
JLT three_bytes_repeat_emit_encodeBlockAsm14B
|
|
CMPL R9, $0x01000000
|
|
JLT four_bytes_repeat_emit_encodeBlockAsm14B
|
|
MOVB $0xfc, (BX)
|
|
MOVL R9, 1(BX)
|
|
ADDQ $0x05, BX
|
|
JMP memmove_repeat_emit_encodeBlockAsm14B
|
|
|
|
four_bytes_repeat_emit_encodeBlockAsm14B:
|
|
MOVQ R9, R10
|
|
SHRL $0x10, R10
|
|
MOVB $0xf8, (BX)
|
|
MOVW R9, 1(BX)
|
|
MOVB R10, 3(BX)
|
|
ADDQ $0x04, BX
|
|
JMP memmove_repeat_emit_encodeBlockAsm14B
|
|
|
|
three_bytes_repeat_emit_encodeBlockAsm14B:
|
|
MOVB $0xf4, (BX)
|
|
MOVW R9, 1(BX)
|
|
ADDQ $0x03, BX
|
|
JMP memmove_repeat_emit_encodeBlockAsm14B
|
|
|
|
two_bytes_repeat_emit_encodeBlockAsm14B:
|
|
MOVB $0xf0, (BX)
|
|
MOVB R9, 1(BX)
|
|
ADDQ $0x02, BX
|
|
JMP memmove_repeat_emit_encodeBlockAsm14B
|
|
|
|
one_byte_repeat_emit_encodeBlockAsm14B:
|
|
SHLB $0x02, R9
|
|
MOVB R9, (BX)
|
|
ADDQ $0x01, BX
|
|
|
|
memmove_repeat_emit_encodeBlockAsm14B:
|
|
LEAQ (BX)(DI*1), R9
|
|
NOP
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_tail:
|
|
TESTQ DI, DI
|
|
JEQ emit_literal_done_repeat_emit_encodeBlockAsm14B
|
|
CMPQ DI, $0x02
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_1or2
|
|
CMPQ DI, $0x04
|
|
JB emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_3
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_4
|
|
CMPQ DI, $0x08
|
|
JB emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_5through7
|
|
JE emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_8
|
|
CMPQ DI, $0x10
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_9through16
|
|
CMPQ DI, $0x20
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_17through32
|
|
CMPQ DI, $0x40
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_33through64
|
|
CMPQ DI, $0x80
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_65through128
|
|
CMPQ DI, $0x00000100
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_129through256
|
|
JMP emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_256through2048
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_1or2:
|
|
MOVB (R8), R9
|
|
MOVB -1(R8)(DI*1), R8
|
|
MOVB R9, (BX)
|
|
MOVB R8, -1(BX)(DI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm14B
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_4:
|
|
MOVL (R8), R9
|
|
MOVL R9, (BX)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm14B
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_3:
|
|
MOVW (R8), R9
|
|
MOVB 2(R8), R8
|
|
MOVW R9, (BX)
|
|
MOVB R8, 2(BX)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm14B
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_5through7:
|
|
MOVL (R8), R9
|
|
MOVL -4(R8)(DI*1), R8
|
|
MOVL R9, (BX)
|
|
MOVL R8, -4(BX)(DI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm14B
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_8:
|
|
MOVQ (R8), R9
|
|
MOVQ R9, (BX)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm14B
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_9through16:
|
|
MOVQ (R8), R9
|
|
MOVQ -8(R8)(DI*1), R8
|
|
MOVQ R9, (BX)
|
|
MOVQ R8, -8(BX)(DI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm14B
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_17through32:
|
|
MOVOU (R8), X0
|
|
MOVOU -16(R8)(DI*1), X1
|
|
MOVOU X0, (BX)
|
|
MOVOU X1, -16(BX)(DI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm14B
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_33through64:
|
|
MOVOU (R8), X0
|
|
MOVOU 16(R8), X1
|
|
MOVOU -32(R8)(DI*1), X2
|
|
MOVOU -16(R8)(DI*1), X3
|
|
MOVOU X0, (BX)
|
|
MOVOU X1, 16(BX)
|
|
MOVOU X2, -32(BX)(DI*1)
|
|
MOVOU X3, -16(BX)(DI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm14B
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_65through128:
|
|
MOVOU (R8), X0
|
|
MOVOU 16(R8), X1
|
|
MOVOU 32(R8), X2
|
|
MOVOU 48(R8), X3
|
|
MOVOU -64(R8)(DI*1), X12
|
|
MOVOU -48(R8)(DI*1), X13
|
|
MOVOU -32(R8)(DI*1), X14
|
|
MOVOU -16(R8)(DI*1), X15
|
|
MOVOU X0, (BX)
|
|
MOVOU X1, 16(BX)
|
|
MOVOU X2, 32(BX)
|
|
MOVOU X3, 48(BX)
|
|
MOVOU X12, -64(BX)(DI*1)
|
|
MOVOU X13, -48(BX)(DI*1)
|
|
MOVOU X14, -32(BX)(DI*1)
|
|
MOVOU X15, -16(BX)(DI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm14B
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_129through256:
|
|
MOVOU (R8), X0
|
|
MOVOU 16(R8), X1
|
|
MOVOU 32(R8), X2
|
|
MOVOU 48(R8), X3
|
|
MOVOU 64(R8), X4
|
|
MOVOU 80(R8), X5
|
|
MOVOU 96(R8), X6
|
|
MOVOU 112(R8), X7
|
|
MOVOU -128(R8)(DI*1), X8
|
|
MOVOU -112(R8)(DI*1), X9
|
|
MOVOU -96(R8)(DI*1), X10
|
|
MOVOU -80(R8)(DI*1), X11
|
|
MOVOU -64(R8)(DI*1), X12
|
|
MOVOU -48(R8)(DI*1), X13
|
|
MOVOU -32(R8)(DI*1), X14
|
|
MOVOU -16(R8)(DI*1), X15
|
|
MOVOU X0, (BX)
|
|
MOVOU X1, 16(BX)
|
|
MOVOU X2, 32(BX)
|
|
MOVOU X3, 48(BX)
|
|
MOVOU X4, 64(BX)
|
|
MOVOU X5, 80(BX)
|
|
MOVOU X6, 96(BX)
|
|
MOVOU X7, 112(BX)
|
|
MOVOU X8, -128(BX)(DI*1)
|
|
MOVOU X9, -112(BX)(DI*1)
|
|
MOVOU X10, -96(BX)(DI*1)
|
|
MOVOU X11, -80(BX)(DI*1)
|
|
MOVOU X12, -64(BX)(DI*1)
|
|
MOVOU X13, -48(BX)(DI*1)
|
|
MOVOU X14, -32(BX)(DI*1)
|
|
MOVOU X15, -16(BX)(DI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm14B
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_256through2048:
|
|
LEAQ -256(DI), DI
|
|
MOVOU (R8), X0
|
|
MOVOU 16(R8), X1
|
|
MOVOU 32(R8), X2
|
|
MOVOU 48(R8), X3
|
|
MOVOU 64(R8), X4
|
|
MOVOU 80(R8), X5
|
|
MOVOU 96(R8), X6
|
|
MOVOU 112(R8), X7
|
|
MOVOU 128(R8), X8
|
|
MOVOU 144(R8), X9
|
|
MOVOU 160(R8), X10
|
|
MOVOU 176(R8), X11
|
|
MOVOU 192(R8), X12
|
|
MOVOU 208(R8), X13
|
|
MOVOU 224(R8), X14
|
|
MOVOU 240(R8), X15
|
|
MOVOU X0, (BX)
|
|
MOVOU X1, 16(BX)
|
|
MOVOU X2, 32(BX)
|
|
MOVOU X3, 48(BX)
|
|
MOVOU X4, 64(BX)
|
|
MOVOU X5, 80(BX)
|
|
MOVOU X6, 96(BX)
|
|
MOVOU X7, 112(BX)
|
|
MOVOU X8, 128(BX)
|
|
MOVOU X9, 144(BX)
|
|
MOVOU X10, 160(BX)
|
|
MOVOU X11, 176(BX)
|
|
MOVOU X12, 192(BX)
|
|
MOVOU X13, 208(BX)
|
|
MOVOU X14, 224(BX)
|
|
MOVOU X15, 240(BX)
|
|
CMPQ DI, $0x00000100
|
|
LEAQ 256(R8), R8
|
|
LEAQ 256(BX), BX
|
|
JGE emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_256through2048
|
|
JMP emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_tail
|
|
MOVQ R9, BX
|
|
|
|
emit_literal_done_repeat_emit_encodeBlockAsm14B:
|
|
MOVQ BX, dst_base+0(FP)
|
|
|
|
emit_literal_skip_repeat_emit_encodeBlockAsm14B:
|
|
ADDL $0x05, AX
|
|
MOVL AX, BX
|
|
SUBL 24(SP), BX
|
|
MOVL 16(SP), BX
|
|
SUBL AX, BX
|
|
XORQ R8, R8
|
|
CMPQ BX, $0x08
|
|
JL matchlen_single_repeat_extend
|
|
|
|
matchlen_loopback_repeat_extend:
|
|
MOVQ (CX)(R8*1), DI
|
|
XORQ (CX)(R8*1), DI
|
|
TESTQ DI, DI
|
|
JZ matchlen_loop_repeat_extend
|
|
BSFQ DI, DI
|
|
SARQ $0x03, DI
|
|
LEAQ (R8)(DI*1), R8
|
|
JMP repeat_extend_forward_end_encodeBlockAsm14B
|
|
|
|
matchlen_loop_repeat_extend:
|
|
LEAQ -8(BX), BX
|
|
LEAQ 8(R8), R8
|
|
CMPQ BX, $0x08
|
|
JGE matchlen_loopback_repeat_extend
|
|
|
|
matchlen_single_repeat_extend:
|
|
TESTQ BX, BX
|
|
JZ repeat_extend_forward_end_encodeBlockAsm14B
|
|
|
|
matchlen_single_loopback_repeat_extend:
|
|
MOVB (CX)(R8*1), DI
|
|
CMPB (CX)(R8*1), DI
|
|
JNE repeat_extend_forward_end_encodeBlockAsm14B
|
|
LEAQ 1(R8), R8
|
|
DECQ BX
|
|
JNZ matchlen_single_loopback_repeat_extend
|
|
|
|
repeat_extend_forward_end_encodeBlockAsm14B:
|
|
ADDL R8, AX
|
|
MOVL AX, BX
|
|
SUBL SI, BX
|
|
MOVL 24(SP), SI
|
|
MOVQ dst_base+0(FP), DI
|
|
MOVL 20(SP), R8
|
|
TESTL R8, R8
|
|
JZ repeat_as_copy_encodeBlockAsm14B
|
|
|
|
emit_repeat_again_match_repeat_:
|
|
MOVQ BX, R8
|
|
LEAQ -4(BX), BX
|
|
CMPL R8, $0x08
|
|
JLE repeat_two_match_repeat_
|
|
CMPL R8, $0x0c
|
|
JGE cant_repeat_two_offset_match_repeat_
|
|
CMPL SI, $0x00000800
|
|
JLT repeat_two_offset_match_repeat_
|
|
|
|
cant_repeat_two_offset_match_repeat_:
|
|
CMPL BX, $0x00000104
|
|
JLT repeat_three_match_repeat_
|
|
CMPL BX, $0x00010100
|
|
JLT repeat_four_match_repeat_
|
|
CMPL BX, $0x0100ffff
|
|
JLT repeat_five_match_repeat_
|
|
LEAQ -16842747(BX), BX
|
|
MOVW $0x001d, (DI)
|
|
MOVW $0xfffb, 2(DI)
|
|
MOVB $0xff, 4(DI)
|
|
ADDQ $0x05, DI
|
|
JMP emit_repeat_again_match_repeat_
|
|
|
|
repeat_five_match_repeat_:
|
|
LEAQ -65536(BX), BX
|
|
MOVQ BX, SI
|
|
MOVW $0x001d, (DI)
|
|
MOVW BX, 2(DI)
|
|
SARQ $0x10, SI
|
|
MOVB SI, 4(DI)
|
|
ADDQ $0x05, DI
|
|
JMP repeat_end_emit_encodeBlockAsm14B
|
|
|
|
repeat_four_match_repeat_:
|
|
LEAQ -256(BX), BX
|
|
MOVW $0x0019, (DI)
|
|
MOVW BX, 2(DI)
|
|
ADDQ $0x04, DI
|
|
JMP repeat_end_emit_encodeBlockAsm14B
|
|
|
|
repeat_three_match_repeat_:
|
|
LEAQ -4(BX), BX
|
|
MOVW $0x0015, (DI)
|
|
MOVB BL, 2(DI)
|
|
ADDQ $0x03, DI
|
|
JMP repeat_end_emit_encodeBlockAsm14B
|
|
|
|
repeat_two_match_repeat_:
|
|
SHLL $0x02, BX
|
|
ORL $0x01, BX
|
|
MOVW BX, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP repeat_end_emit_encodeBlockAsm14B
|
|
|
|
repeat_two_offset_match_repeat_:
|
|
XORQ R8, R8
|
|
LEAQ 1(R8)(BX*4), BX
|
|
MOVB SI, 1(DI)
|
|
SARL $0x08, SI
|
|
SHLL $0x05, SI
|
|
ORL SI, BX
|
|
MOVB BL, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP repeat_end_emit_encodeBlockAsm14B
|
|
|
|
repeat_as_copy_encodeBlockAsm14B:
|
|
CMPL SI, $0x00010000
|
|
JL two_byte_offset_repeat_as_copy_encodeBlockAsm14B
|
|
CMPL BX, $0x40
|
|
JLE four_bytes_remain_repeat_as_copy_encodeBlockAsm14B
|
|
MOVB $0xff, (DI)
|
|
MOVD SI, 1(DI)
|
|
LEAQ -64(BX), BX
|
|
ADDQ $0x05, DI
|
|
CMPL BX, $0x04
|
|
JL four_bytes_remain_repeat_as_copy_encodeBlockAsm14B
|
|
|
|
emit_repeat_again_repeat_as_copy_encodeBlockAsm14B_emit_copy:
|
|
MOVQ BX, R8
|
|
LEAQ -4(BX), BX
|
|
CMPL R8, $0x08
|
|
JLE repeat_two_repeat_as_copy_encodeBlockAsm14B_emit_copy
|
|
CMPL R8, $0x0c
|
|
JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm14B_emit_copy
|
|
CMPL SI, $0x00000800
|
|
JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm14B_emit_copy
|
|
|
|
cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm14B_emit_copy:
|
|
CMPL BX, $0x00000104
|
|
JLT repeat_three_repeat_as_copy_encodeBlockAsm14B_emit_copy
|
|
CMPL BX, $0x00010100
|
|
JLT repeat_four_repeat_as_copy_encodeBlockAsm14B_emit_copy
|
|
CMPL BX, $0x0100ffff
|
|
JLT repeat_five_repeat_as_copy_encodeBlockAsm14B_emit_copy
|
|
LEAQ -16842747(BX), BX
|
|
MOVW $0x001d, (DI)
|
|
MOVW $0xfffb, 2(DI)
|
|
MOVB $0xff, 4(DI)
|
|
ADDQ $0x05, DI
|
|
JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm14B_emit_copy
|
|
|
|
repeat_five_repeat_as_copy_encodeBlockAsm14B_emit_copy:
|
|
LEAQ -65536(BX), BX
|
|
MOVQ BX, SI
|
|
MOVW $0x001d, (DI)
|
|
MOVW BX, 2(DI)
|
|
SARQ $0x10, SI
|
|
MOVB SI, 4(DI)
|
|
ADDQ $0x05, DI
|
|
JMP repeat_end_emit_encodeBlockAsm14B
|
|
|
|
repeat_four_repeat_as_copy_encodeBlockAsm14B_emit_copy:
|
|
LEAQ -256(BX), BX
|
|
MOVW $0x0019, (DI)
|
|
MOVW BX, 2(DI)
|
|
ADDQ $0x04, DI
|
|
JMP repeat_end_emit_encodeBlockAsm14B
|
|
|
|
repeat_three_repeat_as_copy_encodeBlockAsm14B_emit_copy:
|
|
LEAQ -4(BX), BX
|
|
MOVW $0x0015, (DI)
|
|
MOVB BL, 2(DI)
|
|
ADDQ $0x03, DI
|
|
JMP repeat_end_emit_encodeBlockAsm14B
|
|
|
|
repeat_two_repeat_as_copy_encodeBlockAsm14B_emit_copy:
|
|
SHLL $0x02, BX
|
|
ORL $0x01, BX
|
|
MOVW BX, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP repeat_end_emit_encodeBlockAsm14B
|
|
|
|
repeat_two_offset_repeat_as_copy_encodeBlockAsm14B_emit_copy:
|
|
XORQ R8, R8
|
|
LEAQ 1(R8)(BX*4), BX
|
|
MOVB SI, 1(DI)
|
|
SARL $0x08, SI
|
|
SHLL $0x05, SI
|
|
ORL SI, BX
|
|
MOVB BL, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP repeat_end_emit_encodeBlockAsm14B
|
|
|
|
four_bytes_remain_repeat_as_copy_encodeBlockAsm14B:
|
|
TESTL BX, BX
|
|
JZ repeat_end_emit_encodeBlockAsm14B
|
|
MOVB $0x03, DL
|
|
LEAQ -4(DX)(BX*4), BX
|
|
MOVB BL, (DI)
|
|
MOVD SI, 1(DI)
|
|
ADDQ $0x05, DI
|
|
JMP repeat_end_emit_encodeBlockAsm14B
|
|
|
|
two_byte_offset_repeat_as_copy_encodeBlockAsm14B:
|
|
CMPL BX, $0x40
|
|
JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm14B
|
|
MOVB $0xee, (DI)
|
|
MOVW SI, 1(DI)
|
|
LEAQ -60(BX), BX
|
|
ADDQ $0x03, DI
|
|
|
|
emit_repeat_again_repeat_as_copy_encodeBlockAsm14B_emit_copy_short:
|
|
MOVQ BX, R8
|
|
LEAQ -4(BX), BX
|
|
CMPL R8, $0x08
|
|
JLE repeat_two_repeat_as_copy_encodeBlockAsm14B_emit_copy_short
|
|
CMPL R8, $0x0c
|
|
JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm14B_emit_copy_short
|
|
CMPL SI, $0x00000800
|
|
JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm14B_emit_copy_short
|
|
|
|
cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm14B_emit_copy_short:
|
|
CMPL BX, $0x00000104
|
|
JLT repeat_three_repeat_as_copy_encodeBlockAsm14B_emit_copy_short
|
|
CMPL BX, $0x00010100
|
|
JLT repeat_four_repeat_as_copy_encodeBlockAsm14B_emit_copy_short
|
|
CMPL BX, $0x0100ffff
|
|
JLT repeat_five_repeat_as_copy_encodeBlockAsm14B_emit_copy_short
|
|
LEAQ -16842747(BX), BX
|
|
MOVW $0x001d, (DI)
|
|
MOVW $0xfffb, 2(DI)
|
|
MOVB $0xff, 4(DI)
|
|
ADDQ $0x05, DI
|
|
JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm14B_emit_copy_short
|
|
|
|
repeat_five_repeat_as_copy_encodeBlockAsm14B_emit_copy_short:
|
|
LEAQ -65536(BX), BX
|
|
MOVQ BX, SI
|
|
MOVW $0x001d, (DI)
|
|
MOVW BX, 2(DI)
|
|
SARQ $0x10, SI
|
|
MOVB SI, 4(DI)
|
|
ADDQ $0x05, DI
|
|
JMP repeat_end_emit_encodeBlockAsm14B
|
|
|
|
repeat_four_repeat_as_copy_encodeBlockAsm14B_emit_copy_short:
|
|
LEAQ -256(BX), BX
|
|
MOVW $0x0019, (DI)
|
|
MOVW BX, 2(DI)
|
|
ADDQ $0x04, DI
|
|
JMP repeat_end_emit_encodeBlockAsm14B
|
|
|
|
repeat_three_repeat_as_copy_encodeBlockAsm14B_emit_copy_short:
|
|
LEAQ -4(BX), BX
|
|
MOVW $0x0015, (DI)
|
|
MOVB BL, 2(DI)
|
|
ADDQ $0x03, DI
|
|
JMP repeat_end_emit_encodeBlockAsm14B
|
|
|
|
repeat_two_repeat_as_copy_encodeBlockAsm14B_emit_copy_short:
|
|
SHLL $0x02, BX
|
|
ORL $0x01, BX
|
|
MOVW BX, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP repeat_end_emit_encodeBlockAsm14B
|
|
|
|
repeat_two_offset_repeat_as_copy_encodeBlockAsm14B_emit_copy_short:
|
|
XORQ R8, R8
|
|
LEAQ 1(R8)(BX*4), BX
|
|
MOVB SI, 1(DI)
|
|
SARL $0x08, SI
|
|
SHLL $0x05, SI
|
|
ORL SI, BX
|
|
MOVB BL, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP repeat_end_emit_encodeBlockAsm14B
|
|
|
|
two_byte_offset_short_repeat_as_copy_encodeBlockAsm14B:
|
|
CMPL BX, $0x0c
|
|
JGE emit_copy_three_repeat_as_copy_encodeBlockAsm14B
|
|
CMPL SI, $0x00000800
|
|
JGE emit_copy_three_repeat_as_copy_encodeBlockAsm14B
|
|
MOVB $0x01, DL
|
|
LEAQ -16(DX)(BX*4), BX
|
|
MOVB SI, 1(DI)
|
|
SHRL $0x08, SI
|
|
SHLL $0x05, SI
|
|
ORL SI, BX
|
|
MOVB BL, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP repeat_end_emit_encodeBlockAsm14B
|
|
|
|
emit_copy_three_repeat_as_copy_encodeBlockAsm14B:
|
|
MOVB $0x02, DL
|
|
LEAQ -4(DX)(BX*4), BX
|
|
MOVB BL, (DI)
|
|
MOVW SI, 1(DI)
|
|
ADDQ $0x03, DI
|
|
|
|
repeat_end_emit_encodeBlockAsm14B:
|
|
MOVQ DI, dst_base+0(FP)
|
|
MOVL 16(SP), BX
|
|
CMPL AX, BX
|
|
JGT emit_remainder_encodeBlockAsm14B
|
|
JMP search_loop_encodeBlockAsm14B
|
|
|
|
no_repeat_found_encodeBlockAsm14B:
|
|
MOVQ $0x0000cf1bbcdcbf9b, R9
|
|
MOVQ SI, R8
|
|
SHRQ $0x10, R8
|
|
SHLQ $0x10, R8
|
|
IMULQ R9, R8
|
|
SHRQ $0x32, R8
|
|
CMPL (CX)(BX*1), SI
|
|
SHRQ $0x08, SI
|
|
JEQ candidate_match_encodeBlockAsm14B
|
|
MOVL 32(SP)(R8*1), BX
|
|
CMPL (CX)(DI*1), SI
|
|
JEQ candidate2_match_encodeBlockAsm14B
|
|
LEAQ 2(AX), DI
|
|
MOVL DI, 32(SP)(R8*1)
|
|
SHRQ $0x08, SI
|
|
CMPL (CX)(BX*1), SI
|
|
JEQ candidate3_match_encodeBlockAsm14B
|
|
MOVL 28(SP), AX
|
|
JMP search_loop_encodeBlockAsm14B
|
|
|
|
candidate3_match_encodeBlockAsm14B:
|
|
ADDL $0x02, AX
|
|
JMP candidate_match_encodeBlockAsm14B
|
|
|
|
candidate2_match_encodeBlockAsm14B:
|
|
LEAQ -2(AX), BX
|
|
MOVL BX, 32(SP)(R8*1)
|
|
INCL AX
|
|
MOVL DI, BX
|
|
|
|
candidate_match_encodeBlockAsm14B:
|
|
MOVL 20(SP), SI
|
|
TESTL BX, BX
|
|
JZ match_extend_back_end_encodeBlockAsm14B
|
|
|
|
match_extend_back_loop_encodeBlockAsm14B:
|
|
CMPL AX, SI
|
|
JG match_extend_back_end_encodeBlockAsm14B
|
|
MOVB -1(CX)(BX*1), DL
|
|
MOVB -1(CX)(AX*1), DI
|
|
CMPB DL, DI
|
|
JNE match_extend_back_end_encodeBlockAsm14B
|
|
LEAL -1(AX), AX
|
|
DECL BX
|
|
JZ match_extend_back_end_encodeBlockAsm14B
|
|
JMP match_extend_back_loop_encodeBlockAsm14B
|
|
|
|
match_extend_back_end_encodeBlockAsm14B:
|
|
MOVL AX, SI
|
|
SUBL 20(SP), SI
|
|
LEAQ dst_base+0(FP)(SI*1), SI
|
|
CMPQ SI, (SP)
|
|
JL match_dst_size_check_encodeBlockAsm14B
|
|
MOVQ $0x00000000, ret+48(FP)
|
|
RET
|
|
|
|
match_dst_size_check_encodeBlockAsm14B:
|
|
MOVL BX, SI
|
|
MOVL 20(SP), DI
|
|
CMPL DI, SI
|
|
JEQ emit_literal_skip_match_emit_encodeBlockAsm14B
|
|
MOVL SI, R8
|
|
MOVL SI, 20(SP)
|
|
LEAQ (CX)(DI*1), SI
|
|
SUBL DI, R8
|
|
MOVQ dst_base+0(FP), DI
|
|
MOVQ R8, R9
|
|
SUBL $0x01, R9
|
|
JC emit_literal_done_match_emit_encodeBlockAsm14B
|
|
CMPL R9, $0x3c
|
|
JLT one_byte_match_emit_encodeBlockAsm14B
|
|
CMPL R9, $0x00000100
|
|
JLT two_bytes_match_emit_encodeBlockAsm14B
|
|
CMPL R9, $0x00010000
|
|
JLT three_bytes_match_emit_encodeBlockAsm14B
|
|
CMPL R9, $0x01000000
|
|
JLT four_bytes_match_emit_encodeBlockAsm14B
|
|
MOVB $0xfc, (DI)
|
|
MOVL R9, 1(DI)
|
|
ADDQ $0x05, DI
|
|
JMP memmove_match_emit_encodeBlockAsm14B
|
|
|
|
four_bytes_match_emit_encodeBlockAsm14B:
|
|
MOVQ R9, R10
|
|
SHRL $0x10, R10
|
|
MOVB $0xf8, (DI)
|
|
MOVW R9, 1(DI)
|
|
MOVB R10, 3(DI)
|
|
ADDQ $0x04, DI
|
|
JMP memmove_match_emit_encodeBlockAsm14B
|
|
|
|
three_bytes_match_emit_encodeBlockAsm14B:
|
|
MOVB $0xf4, (DI)
|
|
MOVW R9, 1(DI)
|
|
ADDQ $0x03, DI
|
|
JMP memmove_match_emit_encodeBlockAsm14B
|
|
|
|
two_bytes_match_emit_encodeBlockAsm14B:
|
|
MOVB $0xf0, (DI)
|
|
MOVB R9, 1(DI)
|
|
ADDQ $0x02, DI
|
|
JMP memmove_match_emit_encodeBlockAsm14B
|
|
|
|
one_byte_match_emit_encodeBlockAsm14B:
|
|
SHLB $0x02, R9
|
|
MOVB R9, (DI)
|
|
ADDQ $0x01, DI
|
|
|
|
memmove_match_emit_encodeBlockAsm14B:
|
|
LEAQ (DI)(R8*1), R9
|
|
NOP
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_tail:
|
|
TESTQ R8, R8
|
|
JEQ emit_literal_done_match_emit_encodeBlockAsm14B
|
|
CMPQ R8, $0x02
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_1or2
|
|
CMPQ R8, $0x04
|
|
JB emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_3
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_4
|
|
CMPQ R8, $0x08
|
|
JB emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_5through7
|
|
JE emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_8
|
|
CMPQ R8, $0x10
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_9through16
|
|
CMPQ R8, $0x20
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_17through32
|
|
CMPQ R8, $0x40
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_33through64
|
|
CMPQ R8, $0x80
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_65through128
|
|
CMPQ R8, $0x00000100
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_129through256
|
|
JMP emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_256through2048
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_1or2:
|
|
MOVB (SI), R9
|
|
MOVB -1(SI)(R8*1), SI
|
|
MOVB R9, (DI)
|
|
MOVB SI, -1(DI)(R8*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm14B
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_4:
|
|
MOVL (SI), R9
|
|
MOVL R9, (DI)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm14B
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_3:
|
|
MOVW (SI), R9
|
|
MOVB 2(SI), SI
|
|
MOVW R9, (DI)
|
|
MOVB SI, 2(DI)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm14B
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_5through7:
|
|
MOVL (SI), R9
|
|
MOVL -4(SI)(R8*1), SI
|
|
MOVL R9, (DI)
|
|
MOVL SI, -4(DI)(R8*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm14B
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_8:
|
|
MOVQ (SI), R9
|
|
MOVQ R9, (DI)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm14B
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_9through16:
|
|
MOVQ (SI), R9
|
|
MOVQ -8(SI)(R8*1), SI
|
|
MOVQ R9, (DI)
|
|
MOVQ SI, -8(DI)(R8*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm14B
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_17through32:
|
|
MOVOU (SI), X0
|
|
MOVOU -16(SI)(R8*1), X1
|
|
MOVOU X0, (DI)
|
|
MOVOU X1, -16(DI)(R8*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm14B
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_33through64:
|
|
MOVOU (SI), X0
|
|
MOVOU 16(SI), X1
|
|
MOVOU -32(SI)(R8*1), X2
|
|
MOVOU -16(SI)(R8*1), X3
|
|
MOVOU X0, (DI)
|
|
MOVOU X1, 16(DI)
|
|
MOVOU X2, -32(DI)(R8*1)
|
|
MOVOU X3, -16(DI)(R8*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm14B
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_65through128:
|
|
MOVOU (SI), X0
|
|
MOVOU 16(SI), X1
|
|
MOVOU 32(SI), X2
|
|
MOVOU 48(SI), X3
|
|
MOVOU -64(SI)(R8*1), X12
|
|
MOVOU -48(SI)(R8*1), X13
|
|
MOVOU -32(SI)(R8*1), X14
|
|
MOVOU -16(SI)(R8*1), X15
|
|
MOVOU X0, (DI)
|
|
MOVOU X1, 16(DI)
|
|
MOVOU X2, 32(DI)
|
|
MOVOU X3, 48(DI)
|
|
MOVOU X12, -64(DI)(R8*1)
|
|
MOVOU X13, -48(DI)(R8*1)
|
|
MOVOU X14, -32(DI)(R8*1)
|
|
MOVOU X15, -16(DI)(R8*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm14B
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_129through256:
|
|
MOVOU (SI), X0
|
|
MOVOU 16(SI), X1
|
|
MOVOU 32(SI), X2
|
|
MOVOU 48(SI), X3
|
|
MOVOU 64(SI), X4
|
|
MOVOU 80(SI), X5
|
|
MOVOU 96(SI), X6
|
|
MOVOU 112(SI), X7
|
|
MOVOU -128(SI)(R8*1), X8
|
|
MOVOU -112(SI)(R8*1), X9
|
|
MOVOU -96(SI)(R8*1), X10
|
|
MOVOU -80(SI)(R8*1), X11
|
|
MOVOU -64(SI)(R8*1), X12
|
|
MOVOU -48(SI)(R8*1), X13
|
|
MOVOU -32(SI)(R8*1), X14
|
|
MOVOU -16(SI)(R8*1), X15
|
|
MOVOU X0, (DI)
|
|
MOVOU X1, 16(DI)
|
|
MOVOU X2, 32(DI)
|
|
MOVOU X3, 48(DI)
|
|
MOVOU X4, 64(DI)
|
|
MOVOU X5, 80(DI)
|
|
MOVOU X6, 96(DI)
|
|
MOVOU X7, 112(DI)
|
|
MOVOU X8, -128(DI)(R8*1)
|
|
MOVOU X9, -112(DI)(R8*1)
|
|
MOVOU X10, -96(DI)(R8*1)
|
|
MOVOU X11, -80(DI)(R8*1)
|
|
MOVOU X12, -64(DI)(R8*1)
|
|
MOVOU X13, -48(DI)(R8*1)
|
|
MOVOU X14, -32(DI)(R8*1)
|
|
MOVOU X15, -16(DI)(R8*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm14B
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_256through2048:
|
|
LEAQ -256(R8), R8
|
|
MOVOU (SI), X0
|
|
MOVOU 16(SI), X1
|
|
MOVOU 32(SI), X2
|
|
MOVOU 48(SI), X3
|
|
MOVOU 64(SI), X4
|
|
MOVOU 80(SI), X5
|
|
MOVOU 96(SI), X6
|
|
MOVOU 112(SI), X7
|
|
MOVOU 128(SI), X8
|
|
MOVOU 144(SI), X9
|
|
MOVOU 160(SI), X10
|
|
MOVOU 176(SI), X11
|
|
MOVOU 192(SI), X12
|
|
MOVOU 208(SI), X13
|
|
MOVOU 224(SI), X14
|
|
MOVOU 240(SI), X15
|
|
MOVOU X0, (DI)
|
|
MOVOU X1, 16(DI)
|
|
MOVOU X2, 32(DI)
|
|
MOVOU X3, 48(DI)
|
|
MOVOU X4, 64(DI)
|
|
MOVOU X5, 80(DI)
|
|
MOVOU X6, 96(DI)
|
|
MOVOU X7, 112(DI)
|
|
MOVOU X8, 128(DI)
|
|
MOVOU X9, 144(DI)
|
|
MOVOU X10, 160(DI)
|
|
MOVOU X11, 176(DI)
|
|
MOVOU X12, 192(DI)
|
|
MOVOU X13, 208(DI)
|
|
MOVOU X14, 224(DI)
|
|
MOVOU X15, 240(DI)
|
|
CMPQ R8, $0x00000100
|
|
LEAQ 256(SI), SI
|
|
LEAQ 256(DI), DI
|
|
JGE emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_256through2048
|
|
JMP emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_tail
|
|
MOVQ R9, DI
|
|
|
|
emit_literal_done_match_emit_encodeBlockAsm14B:
|
|
MOVQ DI, dst_base+0(FP)
|
|
|
|
emit_literal_skip_match_emit_encodeBlockAsm14B:
|
|
NOP
|
|
|
|
match_nolit_loop_encodeBlockAsm14B:
|
|
MOVL AX, SI
|
|
MOVL AX, SI
|
|
SUBL BX, SI
|
|
MOVL SI, 24(SP)
|
|
ADDL $0x04, AX
|
|
ADDL $0x04, BX
|
|
MOVL 16(SP), SI
|
|
SUBL AX, SI
|
|
XORQ R8, R8
|
|
CMPQ SI, $0x08
|
|
JL matchlen_single_match_nolit_encodeBlockAsm14B
|
|
|
|
matchlen_loopback_match_nolit_encodeBlockAsm14B:
|
|
MOVQ (CX)(R8*1), DI
|
|
XORQ (CX)(R8*1), DI
|
|
TESTQ DI, DI
|
|
JZ matchlen_loop_match_nolit_encodeBlockAsm14B
|
|
BSFQ DI, DI
|
|
SARQ $0x03, DI
|
|
LEAQ (R8)(DI*1), R8
|
|
JMP match_nolit_end_encodeBlockAsm14B
|
|
|
|
matchlen_loop_match_nolit_encodeBlockAsm14B:
|
|
LEAQ -8(SI), SI
|
|
LEAQ 8(R8), R8
|
|
CMPQ SI, $0x08
|
|
JGE matchlen_loopback_match_nolit_encodeBlockAsm14B
|
|
|
|
matchlen_single_match_nolit_encodeBlockAsm14B:
|
|
TESTQ SI, SI
|
|
JZ match_nolit_end_encodeBlockAsm14B
|
|
|
|
matchlen_single_loopback_match_nolit_encodeBlockAsm14B:
|
|
MOVB (CX)(R8*1), DI
|
|
CMPB (CX)(R8*1), DI
|
|
JNE match_nolit_end_encodeBlockAsm14B
|
|
LEAQ 1(R8), R8
|
|
DECQ SI
|
|
JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm14B
|
|
|
|
match_nolit_end_encodeBlockAsm14B:
|
|
MOVL 24(SP), SI
|
|
ADDQ $0x04, R8
|
|
MOVQ dst_base+0(FP), DI
|
|
ADDL R8, AX
|
|
CMPL SI, $0x00010000
|
|
JL two_byte_offset_match_nolit_encodeBlockAsm14B
|
|
CMPL R8, $0x40
|
|
JLE four_bytes_remain_match_nolit_encodeBlockAsm14B
|
|
MOVB $0xff, (DI)
|
|
MOVD SI, 1(DI)
|
|
LEAQ -64(R8), R8
|
|
ADDQ $0x05, DI
|
|
CMPL R8, $0x04
|
|
JL four_bytes_remain_match_nolit_encodeBlockAsm14B
|
|
|
|
emit_repeat_again_match_nolit_encodeBlockAsm14B_emit_copy:
|
|
MOVQ R8, R9
|
|
LEAQ -4(R8), R8
|
|
CMPL R9, $0x08
|
|
JLE repeat_two_match_nolit_encodeBlockAsm14B_emit_copy
|
|
CMPL R9, $0x0c
|
|
JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm14B_emit_copy
|
|
CMPL SI, $0x00000800
|
|
JLT repeat_two_offset_match_nolit_encodeBlockAsm14B_emit_copy
|
|
|
|
cant_repeat_two_offset_match_nolit_encodeBlockAsm14B_emit_copy:
|
|
CMPL R8, $0x00000104
|
|
JLT repeat_three_match_nolit_encodeBlockAsm14B_emit_copy
|
|
CMPL R8, $0x00010100
|
|
JLT repeat_four_match_nolit_encodeBlockAsm14B_emit_copy
|
|
CMPL R8, $0x0100ffff
|
|
JLT repeat_five_match_nolit_encodeBlockAsm14B_emit_copy
|
|
LEAQ -16842747(R8), R8
|
|
MOVW $0x001d, (DI)
|
|
MOVW $0xfffb, 2(DI)
|
|
MOVB $0xff, 4(DI)
|
|
ADDQ $0x05, DI
|
|
JMP emit_repeat_again_match_nolit_encodeBlockAsm14B_emit_copy
|
|
|
|
repeat_five_match_nolit_encodeBlockAsm14B_emit_copy:
|
|
LEAQ -65536(R8), R8
|
|
MOVQ R8, SI
|
|
MOVW $0x001d, (DI)
|
|
MOVW R8, 2(DI)
|
|
SARQ $0x10, SI
|
|
MOVB SI, 4(DI)
|
|
ADDQ $0x05, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm14B
|
|
|
|
repeat_four_match_nolit_encodeBlockAsm14B_emit_copy:
|
|
LEAQ -256(R8), R8
|
|
MOVW $0x0019, (DI)
|
|
MOVW R8, 2(DI)
|
|
ADDQ $0x04, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm14B
|
|
|
|
repeat_three_match_nolit_encodeBlockAsm14B_emit_copy:
|
|
LEAQ -4(R8), R8
|
|
MOVW $0x0015, (DI)
|
|
MOVB R8, 2(DI)
|
|
ADDQ $0x03, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm14B
|
|
|
|
repeat_two_match_nolit_encodeBlockAsm14B_emit_copy:
|
|
SHLL $0x02, R8
|
|
ORL $0x01, R8
|
|
MOVW R8, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm14B
|
|
|
|
repeat_two_offset_match_nolit_encodeBlockAsm14B_emit_copy:
|
|
XORQ R9, R9
|
|
LEAQ 1(R9)(R8*4), R8
|
|
MOVB SI, 1(DI)
|
|
SARL $0x08, SI
|
|
SHLL $0x05, SI
|
|
ORL SI, R8
|
|
MOVB R8, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm14B
|
|
|
|
four_bytes_remain_match_nolit_encodeBlockAsm14B:
|
|
TESTL R8, R8
|
|
JZ match_nolit_emitcopy_end_encodeBlockAsm14B
|
|
MOVB $0x03, DL
|
|
LEAQ -4(DX)(R8*4), R8
|
|
MOVB R8, (DI)
|
|
MOVD SI, 1(DI)
|
|
ADDQ $0x05, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm14B
|
|
|
|
two_byte_offset_match_nolit_encodeBlockAsm14B:
|
|
CMPL R8, $0x40
|
|
JLE two_byte_offset_short_match_nolit_encodeBlockAsm14B
|
|
MOVB $0xee, (DI)
|
|
MOVW SI, 1(DI)
|
|
LEAQ -60(R8), R8
|
|
ADDQ $0x03, DI
|
|
|
|
emit_repeat_again_match_nolit_encodeBlockAsm14B_emit_copy_short:
|
|
MOVQ R8, R9
|
|
LEAQ -4(R8), R8
|
|
CMPL R9, $0x08
|
|
JLE repeat_two_match_nolit_encodeBlockAsm14B_emit_copy_short
|
|
CMPL R9, $0x0c
|
|
JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm14B_emit_copy_short
|
|
CMPL SI, $0x00000800
|
|
JLT repeat_two_offset_match_nolit_encodeBlockAsm14B_emit_copy_short
|
|
|
|
cant_repeat_two_offset_match_nolit_encodeBlockAsm14B_emit_copy_short:
|
|
CMPL R8, $0x00000104
|
|
JLT repeat_three_match_nolit_encodeBlockAsm14B_emit_copy_short
|
|
CMPL R8, $0x00010100
|
|
JLT repeat_four_match_nolit_encodeBlockAsm14B_emit_copy_short
|
|
CMPL R8, $0x0100ffff
|
|
JLT repeat_five_match_nolit_encodeBlockAsm14B_emit_copy_short
|
|
LEAQ -16842747(R8), R8
|
|
MOVW $0x001d, (DI)
|
|
MOVW $0xfffb, 2(DI)
|
|
MOVB $0xff, 4(DI)
|
|
ADDQ $0x05, DI
|
|
JMP emit_repeat_again_match_nolit_encodeBlockAsm14B_emit_copy_short
|
|
|
|
repeat_five_match_nolit_encodeBlockAsm14B_emit_copy_short:
|
|
LEAQ -65536(R8), R8
|
|
MOVQ R8, SI
|
|
MOVW $0x001d, (DI)
|
|
MOVW R8, 2(DI)
|
|
SARQ $0x10, SI
|
|
MOVB SI, 4(DI)
|
|
ADDQ $0x05, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm14B
|
|
|
|
repeat_four_match_nolit_encodeBlockAsm14B_emit_copy_short:
|
|
LEAQ -256(R8), R8
|
|
MOVW $0x0019, (DI)
|
|
MOVW R8, 2(DI)
|
|
ADDQ $0x04, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm14B
|
|
|
|
repeat_three_match_nolit_encodeBlockAsm14B_emit_copy_short:
|
|
LEAQ -4(R8), R8
|
|
MOVW $0x0015, (DI)
|
|
MOVB R8, 2(DI)
|
|
ADDQ $0x03, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm14B
|
|
|
|
repeat_two_match_nolit_encodeBlockAsm14B_emit_copy_short:
|
|
SHLL $0x02, R8
|
|
ORL $0x01, R8
|
|
MOVW R8, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm14B
|
|
|
|
repeat_two_offset_match_nolit_encodeBlockAsm14B_emit_copy_short:
|
|
XORQ R9, R9
|
|
LEAQ 1(R9)(R8*4), R8
|
|
MOVB SI, 1(DI)
|
|
SARL $0x08, SI
|
|
SHLL $0x05, SI
|
|
ORL SI, R8
|
|
MOVB R8, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm14B
|
|
|
|
two_byte_offset_short_match_nolit_encodeBlockAsm14B:
|
|
CMPL R8, $0x0c
|
|
JGE emit_copy_three_match_nolit_encodeBlockAsm14B
|
|
CMPL SI, $0x00000800
|
|
JGE emit_copy_three_match_nolit_encodeBlockAsm14B
|
|
MOVB $0x01, DL
|
|
LEAQ -16(DX)(R8*4), R8
|
|
MOVB SI, 1(DI)
|
|
SHRL $0x08, SI
|
|
SHLL $0x05, SI
|
|
ORL SI, R8
|
|
MOVB R8, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm14B
|
|
|
|
emit_copy_three_match_nolit_encodeBlockAsm14B:
|
|
MOVB $0x02, DL
|
|
LEAQ -4(DX)(R8*4), R8
|
|
MOVB R8, (DI)
|
|
MOVW SI, 1(DI)
|
|
ADDQ $0x03, DI
|
|
|
|
match_nolit_emitcopy_end_encodeBlockAsm14B:
|
|
MOVQ DI, dst_base+0(FP)
|
|
MOVL AX, 20(SP)
|
|
CMPL AX, 16(SP)
|
|
JGE emit_remainder_encodeBlockAsm14B
|
|
CMPQ DI, (SP)
|
|
JL match_nolit_dst_ok_encodeBlockAsm14B
|
|
MOVQ $0x00000000, ret+48(FP)
|
|
RET
|
|
|
|
match_nolit_dst_ok_encodeBlockAsm14B:
|
|
MOVQ -2(CX)(AX*1), SI
|
|
MOVQ $0x0000cf1bbcdcbf9b, DI
|
|
MOVQ SI, R8
|
|
SHRQ $0x10, SI
|
|
MOVQ SI, R9
|
|
SHLQ $0x10, R8
|
|
IMULQ DI, R8
|
|
SHRQ $0x32, R8
|
|
SHLQ $0x10, R9
|
|
IMULQ DI, R9
|
|
SHRQ $0x32, R9
|
|
MOVL 32(SP)(R8*1), DI
|
|
MOVL 32(SP)(R9*1), DI
|
|
LEAQ -2(AX), DI
|
|
MOVL DI, 32(SP)(R8*1)
|
|
MOVL AX, 32(SP)(R9*1)
|
|
CMPL (CX)(R9*1), SI
|
|
JEQ match_nolit_loop_encodeBlockAsm14B
|
|
INCL AX
|
|
JMP search_loop_encodeBlockAsm14B
|
|
|
|
emit_remainder_encodeBlockAsm14B:
|
|
MOVQ src_len+32(FP), AX
|
|
SUBL 20(SP), AX
|
|
MOVQ dst_base+0(FP), DX
|
|
LEAQ (DX)(AX*1), DX
|
|
CMPQ DX, (SP)
|
|
JL emit_remainder_ok_encodeBlockAsm14B
|
|
MOVQ $0x00000000, ret+48(FP)
|
|
RET
|
|
|
|
emit_remainder_ok_encodeBlockAsm14B:
|
|
MOVQ src_len+32(FP), AX
|
|
MOVL 20(SP), DX
|
|
CMPL DX, AX
|
|
JEQ emit_literal_skip_emit_remainder_encodeBlockAsm14B
|
|
MOVL AX, BX
|
|
MOVL AX, 20(SP)
|
|
LEAQ (CX)(DX*1), AX
|
|
SUBL DX, BX
|
|
MOVQ dst_base+0(FP), CX
|
|
MOVQ BX, DX
|
|
SUBL $0x01, DX
|
|
JC emit_literal_done_emit_remainder_encodeBlockAsm14B
|
|
CMPL DX, $0x3c
|
|
JLT one_byte_emit_remainder_encodeBlockAsm14B
|
|
CMPL DX, $0x00000100
|
|
JLT two_bytes_emit_remainder_encodeBlockAsm14B
|
|
CMPL DX, $0x00010000
|
|
JLT three_bytes_emit_remainder_encodeBlockAsm14B
|
|
CMPL DX, $0x01000000
|
|
JLT four_bytes_emit_remainder_encodeBlockAsm14B
|
|
MOVB $0xfc, (CX)
|
|
MOVL DX, 1(CX)
|
|
ADDQ $0x05, CX
|
|
JMP memmove_emit_remainder_encodeBlockAsm14B
|
|
|
|
four_bytes_emit_remainder_encodeBlockAsm14B:
|
|
MOVQ DX, SI
|
|
SHRL $0x10, SI
|
|
MOVB $0xf8, (CX)
|
|
MOVW DX, 1(CX)
|
|
MOVB SI, 3(CX)
|
|
ADDQ $0x04, CX
|
|
JMP memmove_emit_remainder_encodeBlockAsm14B
|
|
|
|
three_bytes_emit_remainder_encodeBlockAsm14B:
|
|
MOVB $0xf4, (CX)
|
|
MOVW DX, 1(CX)
|
|
ADDQ $0x03, CX
|
|
JMP memmove_emit_remainder_encodeBlockAsm14B
|
|
|
|
two_bytes_emit_remainder_encodeBlockAsm14B:
|
|
MOVB $0xf0, (CX)
|
|
MOVB DL, 1(CX)
|
|
ADDQ $0x02, CX
|
|
JMP memmove_emit_remainder_encodeBlockAsm14B
|
|
|
|
one_byte_emit_remainder_encodeBlockAsm14B:
|
|
SHLB $0x02, DL
|
|
MOVB DL, (CX)
|
|
ADDQ $0x01, CX
|
|
|
|
memmove_emit_remainder_encodeBlockAsm14B:
|
|
LEAQ (CX)(BX*1), DX
|
|
NOP
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_tail:
|
|
TESTQ BX, BX
|
|
JEQ emit_literal_done_emit_remainder_encodeBlockAsm14B
|
|
CMPQ BX, $0x02
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_1or2
|
|
CMPQ BX, $0x04
|
|
JB emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_3
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_4
|
|
CMPQ BX, $0x08
|
|
JB emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_5through7
|
|
JE emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_8
|
|
CMPQ BX, $0x10
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_9through16
|
|
CMPQ BX, $0x20
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_17through32
|
|
CMPQ BX, $0x40
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_33through64
|
|
CMPQ BX, $0x80
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_65through128
|
|
CMPQ BX, $0x00000100
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_129through256
|
|
JMP emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_256through2048
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_1or2:
|
|
MOVB (AX), DL
|
|
MOVB -1(AX)(BX*1), AL
|
|
MOVB DL, (CX)
|
|
MOVB AL, -1(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm14B
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_4:
|
|
MOVL (AX), DX
|
|
MOVL DX, (CX)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm14B
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_3:
|
|
MOVW (AX), DX
|
|
MOVB 2(AX), AL
|
|
MOVW DX, (CX)
|
|
MOVB AL, 2(CX)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm14B
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_5through7:
|
|
MOVL (AX), DX
|
|
MOVL -4(AX)(BX*1), AX
|
|
MOVL DX, (CX)
|
|
MOVL AX, -4(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm14B
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_8:
|
|
MOVQ (AX), DX
|
|
MOVQ DX, (CX)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm14B
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_9through16:
|
|
MOVQ (AX), DX
|
|
MOVQ -8(AX)(BX*1), AX
|
|
MOVQ DX, (CX)
|
|
MOVQ AX, -8(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm14B
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_17through32:
|
|
MOVOU (AX), X0
|
|
MOVOU -16(AX)(BX*1), X1
|
|
MOVOU X0, (CX)
|
|
MOVOU X1, -16(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm14B
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_33through64:
|
|
MOVOU (AX), X0
|
|
MOVOU 16(AX), X1
|
|
MOVOU -32(AX)(BX*1), X2
|
|
MOVOU -16(AX)(BX*1), X3
|
|
MOVOU X0, (CX)
|
|
MOVOU X1, 16(CX)
|
|
MOVOU X2, -32(CX)(BX*1)
|
|
MOVOU X3, -16(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm14B
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_65through128:
|
|
MOVOU (AX), X0
|
|
MOVOU 16(AX), X1
|
|
MOVOU 32(AX), X2
|
|
MOVOU 48(AX), X3
|
|
MOVOU -64(AX)(BX*1), X12
|
|
MOVOU -48(AX)(BX*1), X13
|
|
MOVOU -32(AX)(BX*1), X14
|
|
MOVOU -16(AX)(BX*1), X15
|
|
MOVOU X0, (CX)
|
|
MOVOU X1, 16(CX)
|
|
MOVOU X2, 32(CX)
|
|
MOVOU X3, 48(CX)
|
|
MOVOU X12, -64(CX)(BX*1)
|
|
MOVOU X13, -48(CX)(BX*1)
|
|
MOVOU X14, -32(CX)(BX*1)
|
|
MOVOU X15, -16(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm14B
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_129through256:
|
|
MOVOU (AX), X0
|
|
MOVOU 16(AX), X1
|
|
MOVOU 32(AX), X2
|
|
MOVOU 48(AX), X3
|
|
MOVOU 64(AX), X4
|
|
MOVOU 80(AX), X5
|
|
MOVOU 96(AX), X6
|
|
MOVOU 112(AX), X7
|
|
MOVOU -128(AX)(BX*1), X8
|
|
MOVOU -112(AX)(BX*1), X9
|
|
MOVOU -96(AX)(BX*1), X10
|
|
MOVOU -80(AX)(BX*1), X11
|
|
MOVOU -64(AX)(BX*1), X12
|
|
MOVOU -48(AX)(BX*1), X13
|
|
MOVOU -32(AX)(BX*1), X14
|
|
MOVOU -16(AX)(BX*1), X15
|
|
MOVOU X0, (CX)
|
|
MOVOU X1, 16(CX)
|
|
MOVOU X2, 32(CX)
|
|
MOVOU X3, 48(CX)
|
|
MOVOU X4, 64(CX)
|
|
MOVOU X5, 80(CX)
|
|
MOVOU X6, 96(CX)
|
|
MOVOU X7, 112(CX)
|
|
MOVOU X8, -128(CX)(BX*1)
|
|
MOVOU X9, -112(CX)(BX*1)
|
|
MOVOU X10, -96(CX)(BX*1)
|
|
MOVOU X11, -80(CX)(BX*1)
|
|
MOVOU X12, -64(CX)(BX*1)
|
|
MOVOU X13, -48(CX)(BX*1)
|
|
MOVOU X14, -32(CX)(BX*1)
|
|
MOVOU X15, -16(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm14B
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_256through2048:
|
|
LEAQ -256(BX), BX
|
|
MOVOU (AX), X0
|
|
MOVOU 16(AX), X1
|
|
MOVOU 32(AX), X2
|
|
MOVOU 48(AX), X3
|
|
MOVOU 64(AX), X4
|
|
MOVOU 80(AX), X5
|
|
MOVOU 96(AX), X6
|
|
MOVOU 112(AX), X7
|
|
MOVOU 128(AX), X8
|
|
MOVOU 144(AX), X9
|
|
MOVOU 160(AX), X10
|
|
MOVOU 176(AX), X11
|
|
MOVOU 192(AX), X12
|
|
MOVOU 208(AX), X13
|
|
MOVOU 224(AX), X14
|
|
MOVOU 240(AX), X15
|
|
MOVOU X0, (CX)
|
|
MOVOU X1, 16(CX)
|
|
MOVOU X2, 32(CX)
|
|
MOVOU X3, 48(CX)
|
|
MOVOU X4, 64(CX)
|
|
MOVOU X5, 80(CX)
|
|
MOVOU X6, 96(CX)
|
|
MOVOU X7, 112(CX)
|
|
MOVOU X8, 128(CX)
|
|
MOVOU X9, 144(CX)
|
|
MOVOU X10, 160(CX)
|
|
MOVOU X11, 176(CX)
|
|
MOVOU X12, 192(CX)
|
|
MOVOU X13, 208(CX)
|
|
MOVOU X14, 224(CX)
|
|
MOVOU X15, 240(CX)
|
|
CMPQ BX, $0x00000100
|
|
LEAQ 256(AX), AX
|
|
LEAQ 256(CX), CX
|
|
JGE emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_256through2048
|
|
JMP emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_tail
|
|
MOVQ DX, CX
|
|
|
|
emit_literal_done_emit_remainder_encodeBlockAsm14B:
|
|
MOVQ CX, dst_base+0(FP)
|
|
|
|
emit_literal_skip_emit_remainder_encodeBlockAsm14B:
|
|
MOVQ 8(SP), AX
|
|
SUBQ dst_base+0(FP), AX
|
|
MOVQ AX, ret+48(FP)
|
|
RET
|
|
|
|
// func encodeBlockAsm12B(dst []byte, src []byte) int
|
|
// Requires: SSE2
|
|
TEXT ·encodeBlockAsm12B(SB), $4128-56
|
|
MOVQ $0x00000020, AX
|
|
LEAQ 32(SP), CX
|
|
PXOR X0, X0
|
|
|
|
zero_loop_encodeBlockAsm12B:
|
|
MOVOU X0, (CX)
|
|
MOVOU X0, 16(CX)
|
|
MOVOU X0, 32(CX)
|
|
MOVOU X0, 48(CX)
|
|
MOVOU X0, 64(CX)
|
|
MOVOU X0, 80(CX)
|
|
MOVOU X0, 96(CX)
|
|
MOVOU X0, 112(CX)
|
|
ADDQ $0x80, CX
|
|
DECQ AX
|
|
JNZ zero_loop_encodeBlockAsm12B
|
|
MOVL AX, 20(SP)
|
|
MOVQ src_len+32(FP), AX
|
|
LEAQ -5(AX), CX
|
|
LEAQ -8(AX), BX
|
|
SHRQ $0x05, AX
|
|
SUBL AX, CX
|
|
MOVL BX, 16(SP)
|
|
MOVQ dst_base+0(FP), AX
|
|
MOVQ AX, 8(SP)
|
|
LEAQ (AX)(CX*1), CX
|
|
MOVQ CX, (SP)
|
|
MOVL $0x00000001, AX
|
|
MOVL AX, 24(SP)
|
|
MOVQ src_base+24(FP), CX
|
|
|
|
search_loop_encodeBlockAsm12B:
|
|
MOVQ (CX)(AX*1), SI
|
|
MOVL AX, BX
|
|
SUBL 20(SP), BX
|
|
SHRL $0x04, BX
|
|
LEAQ 4(AX)(BX*1), BX
|
|
MOVL 16(SP), DI
|
|
CMPL BX, DI
|
|
JGT emit_remainder_encodeBlockAsm12B
|
|
MOVL BX, 28(SP)
|
|
MOVQ $0x0000cf1bbcdcbf9b, BX
|
|
MOVQ SI, R8
|
|
MOVQ SI, R9
|
|
SHRQ $0x08, R9
|
|
SHLQ $0x10, R8
|
|
IMULQ BX, R8
|
|
SHRQ $0x34, R8
|
|
SHLQ $0x10, R9
|
|
IMULQ BX, R9
|
|
SHRQ $0x34, R9
|
|
MOVL 32(SP)(R8*1), BX
|
|
MOVL 32(SP)(R9*1), DI
|
|
MOVL AX, 32(SP)(R8*1)
|
|
LEAL 1(AX), R8
|
|
MOVL R8, 32(SP)(R9*1)
|
|
MOVL AX, R8
|
|
SUBL 24(SP), R8
|
|
MOVL 1(CX)(R8*1), R10
|
|
MOVQ SI, R9
|
|
SHLQ $0x08, R9
|
|
CMPL R9, R10
|
|
JNE no_repeat_found_encodeBlockAsm12B
|
|
LEAQ 1(AX), SI
|
|
MOVL 20(SP), BX
|
|
TESTL R8, R8
|
|
JZ repeat_extend_back_end_encodeBlockAsm12B
|
|
|
|
repeat_extend_back_loop_encodeBlockAsm12B:
|
|
CMPL SI, BX
|
|
JG repeat_extend_back_end_encodeBlockAsm12B
|
|
MOVB -1(CX)(R8*1), DL
|
|
MOVB -1(CX)(SI*1), DI
|
|
CMPB DL, DI
|
|
JNE repeat_extend_back_end_encodeBlockAsm12B
|
|
LEAQ -1(SI), SI
|
|
DECL R8
|
|
JZ repeat_extend_back_end_encodeBlockAsm12B
|
|
JMP repeat_extend_back_loop_encodeBlockAsm12B
|
|
|
|
repeat_extend_back_end_encodeBlockAsm12B:
|
|
MOVL 20(SP), BX
|
|
CMPL BX, SI
|
|
JEQ emit_literal_skip_repeat_emit_encodeBlockAsm12B
|
|
MOVL SI, DI
|
|
MOVL SI, 20(SP)
|
|
LEAQ (CX)(BX*1), R8
|
|
SUBL BX, DI
|
|
MOVQ dst_base+0(FP), BX
|
|
MOVQ DI, R9
|
|
SUBL $0x01, R9
|
|
JC emit_literal_done_repeat_emit_encodeBlockAsm12B
|
|
CMPL R9, $0x3c
|
|
JLT one_byte_repeat_emit_encodeBlockAsm12B
|
|
CMPL R9, $0x00000100
|
|
JLT two_bytes_repeat_emit_encodeBlockAsm12B
|
|
CMPL R9, $0x00010000
|
|
JLT three_bytes_repeat_emit_encodeBlockAsm12B
|
|
CMPL R9, $0x01000000
|
|
JLT four_bytes_repeat_emit_encodeBlockAsm12B
|
|
MOVB $0xfc, (BX)
|
|
MOVL R9, 1(BX)
|
|
ADDQ $0x05, BX
|
|
JMP memmove_repeat_emit_encodeBlockAsm12B
|
|
|
|
four_bytes_repeat_emit_encodeBlockAsm12B:
|
|
MOVQ R9, R10
|
|
SHRL $0x10, R10
|
|
MOVB $0xf8, (BX)
|
|
MOVW R9, 1(BX)
|
|
MOVB R10, 3(BX)
|
|
ADDQ $0x04, BX
|
|
JMP memmove_repeat_emit_encodeBlockAsm12B
|
|
|
|
three_bytes_repeat_emit_encodeBlockAsm12B:
|
|
MOVB $0xf4, (BX)
|
|
MOVW R9, 1(BX)
|
|
ADDQ $0x03, BX
|
|
JMP memmove_repeat_emit_encodeBlockAsm12B
|
|
|
|
two_bytes_repeat_emit_encodeBlockAsm12B:
|
|
MOVB $0xf0, (BX)
|
|
MOVB R9, 1(BX)
|
|
ADDQ $0x02, BX
|
|
JMP memmove_repeat_emit_encodeBlockAsm12B
|
|
|
|
one_byte_repeat_emit_encodeBlockAsm12B:
|
|
SHLB $0x02, R9
|
|
MOVB R9, (BX)
|
|
ADDQ $0x01, BX
|
|
|
|
memmove_repeat_emit_encodeBlockAsm12B:
|
|
LEAQ (BX)(DI*1), R9
|
|
NOP
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_tail:
|
|
TESTQ DI, DI
|
|
JEQ emit_literal_done_repeat_emit_encodeBlockAsm12B
|
|
CMPQ DI, $0x02
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_1or2
|
|
CMPQ DI, $0x04
|
|
JB emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_3
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_4
|
|
CMPQ DI, $0x08
|
|
JB emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_5through7
|
|
JE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8
|
|
CMPQ DI, $0x10
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_9through16
|
|
CMPQ DI, $0x20
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_17through32
|
|
CMPQ DI, $0x40
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_33through64
|
|
CMPQ DI, $0x80
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_65through128
|
|
CMPQ DI, $0x00000100
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_129through256
|
|
JMP emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_256through2048
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_1or2:
|
|
MOVB (R8), R9
|
|
MOVB -1(R8)(DI*1), R8
|
|
MOVB R9, (BX)
|
|
MOVB R8, -1(BX)(DI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm12B
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_4:
|
|
MOVL (R8), R9
|
|
MOVL R9, (BX)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm12B
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_3:
|
|
MOVW (R8), R9
|
|
MOVB 2(R8), R8
|
|
MOVW R9, (BX)
|
|
MOVB R8, 2(BX)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm12B
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_5through7:
|
|
MOVL (R8), R9
|
|
MOVL -4(R8)(DI*1), R8
|
|
MOVL R9, (BX)
|
|
MOVL R8, -4(BX)(DI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm12B
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8:
|
|
MOVQ (R8), R9
|
|
MOVQ R9, (BX)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm12B
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_9through16:
|
|
MOVQ (R8), R9
|
|
MOVQ -8(R8)(DI*1), R8
|
|
MOVQ R9, (BX)
|
|
MOVQ R8, -8(BX)(DI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm12B
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_17through32:
|
|
MOVOU (R8), X0
|
|
MOVOU -16(R8)(DI*1), X1
|
|
MOVOU X0, (BX)
|
|
MOVOU X1, -16(BX)(DI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm12B
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_33through64:
|
|
MOVOU (R8), X0
|
|
MOVOU 16(R8), X1
|
|
MOVOU -32(R8)(DI*1), X2
|
|
MOVOU -16(R8)(DI*1), X3
|
|
MOVOU X0, (BX)
|
|
MOVOU X1, 16(BX)
|
|
MOVOU X2, -32(BX)(DI*1)
|
|
MOVOU X3, -16(BX)(DI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm12B
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_65through128:
|
|
MOVOU (R8), X0
|
|
MOVOU 16(R8), X1
|
|
MOVOU 32(R8), X2
|
|
MOVOU 48(R8), X3
|
|
MOVOU -64(R8)(DI*1), X12
|
|
MOVOU -48(R8)(DI*1), X13
|
|
MOVOU -32(R8)(DI*1), X14
|
|
MOVOU -16(R8)(DI*1), X15
|
|
MOVOU X0, (BX)
|
|
MOVOU X1, 16(BX)
|
|
MOVOU X2, 32(BX)
|
|
MOVOU X3, 48(BX)
|
|
MOVOU X12, -64(BX)(DI*1)
|
|
MOVOU X13, -48(BX)(DI*1)
|
|
MOVOU X14, -32(BX)(DI*1)
|
|
MOVOU X15, -16(BX)(DI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm12B
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_129through256:
|
|
MOVOU (R8), X0
|
|
MOVOU 16(R8), X1
|
|
MOVOU 32(R8), X2
|
|
MOVOU 48(R8), X3
|
|
MOVOU 64(R8), X4
|
|
MOVOU 80(R8), X5
|
|
MOVOU 96(R8), X6
|
|
MOVOU 112(R8), X7
|
|
MOVOU -128(R8)(DI*1), X8
|
|
MOVOU -112(R8)(DI*1), X9
|
|
MOVOU -96(R8)(DI*1), X10
|
|
MOVOU -80(R8)(DI*1), X11
|
|
MOVOU -64(R8)(DI*1), X12
|
|
MOVOU -48(R8)(DI*1), X13
|
|
MOVOU -32(R8)(DI*1), X14
|
|
MOVOU -16(R8)(DI*1), X15
|
|
MOVOU X0, (BX)
|
|
MOVOU X1, 16(BX)
|
|
MOVOU X2, 32(BX)
|
|
MOVOU X3, 48(BX)
|
|
MOVOU X4, 64(BX)
|
|
MOVOU X5, 80(BX)
|
|
MOVOU X6, 96(BX)
|
|
MOVOU X7, 112(BX)
|
|
MOVOU X8, -128(BX)(DI*1)
|
|
MOVOU X9, -112(BX)(DI*1)
|
|
MOVOU X10, -96(BX)(DI*1)
|
|
MOVOU X11, -80(BX)(DI*1)
|
|
MOVOU X12, -64(BX)(DI*1)
|
|
MOVOU X13, -48(BX)(DI*1)
|
|
MOVOU X14, -32(BX)(DI*1)
|
|
MOVOU X15, -16(BX)(DI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm12B
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_256through2048:
|
|
LEAQ -256(DI), DI
|
|
MOVOU (R8), X0
|
|
MOVOU 16(R8), X1
|
|
MOVOU 32(R8), X2
|
|
MOVOU 48(R8), X3
|
|
MOVOU 64(R8), X4
|
|
MOVOU 80(R8), X5
|
|
MOVOU 96(R8), X6
|
|
MOVOU 112(R8), X7
|
|
MOVOU 128(R8), X8
|
|
MOVOU 144(R8), X9
|
|
MOVOU 160(R8), X10
|
|
MOVOU 176(R8), X11
|
|
MOVOU 192(R8), X12
|
|
MOVOU 208(R8), X13
|
|
MOVOU 224(R8), X14
|
|
MOVOU 240(R8), X15
|
|
MOVOU X0, (BX)
|
|
MOVOU X1, 16(BX)
|
|
MOVOU X2, 32(BX)
|
|
MOVOU X3, 48(BX)
|
|
MOVOU X4, 64(BX)
|
|
MOVOU X5, 80(BX)
|
|
MOVOU X6, 96(BX)
|
|
MOVOU X7, 112(BX)
|
|
MOVOU X8, 128(BX)
|
|
MOVOU X9, 144(BX)
|
|
MOVOU X10, 160(BX)
|
|
MOVOU X11, 176(BX)
|
|
MOVOU X12, 192(BX)
|
|
MOVOU X13, 208(BX)
|
|
MOVOU X14, 224(BX)
|
|
MOVOU X15, 240(BX)
|
|
CMPQ DI, $0x00000100
|
|
LEAQ 256(R8), R8
|
|
LEAQ 256(BX), BX
|
|
JGE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_256through2048
|
|
JMP emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_tail
|
|
MOVQ R9, BX
|
|
|
|
emit_literal_done_repeat_emit_encodeBlockAsm12B:
|
|
MOVQ BX, dst_base+0(FP)
|
|
|
|
emit_literal_skip_repeat_emit_encodeBlockAsm12B:
|
|
ADDL $0x05, AX
|
|
MOVL AX, BX
|
|
SUBL 24(SP), BX
|
|
MOVL 16(SP), BX
|
|
SUBL AX, BX
|
|
XORQ R8, R8
|
|
CMPQ BX, $0x08
|
|
JL matchlen_single_repeat_extend
|
|
|
|
matchlen_loopback_repeat_extend:
|
|
MOVQ (CX)(R8*1), DI
|
|
XORQ (CX)(R8*1), DI
|
|
TESTQ DI, DI
|
|
JZ matchlen_loop_repeat_extend
|
|
BSFQ DI, DI
|
|
SARQ $0x03, DI
|
|
LEAQ (R8)(DI*1), R8
|
|
JMP repeat_extend_forward_end_encodeBlockAsm12B
|
|
|
|
matchlen_loop_repeat_extend:
|
|
LEAQ -8(BX), BX
|
|
LEAQ 8(R8), R8
|
|
CMPQ BX, $0x08
|
|
JGE matchlen_loopback_repeat_extend
|
|
|
|
matchlen_single_repeat_extend:
|
|
TESTQ BX, BX
|
|
JZ repeat_extend_forward_end_encodeBlockAsm12B
|
|
|
|
matchlen_single_loopback_repeat_extend:
|
|
MOVB (CX)(R8*1), DI
|
|
CMPB (CX)(R8*1), DI
|
|
JNE repeat_extend_forward_end_encodeBlockAsm12B
|
|
LEAQ 1(R8), R8
|
|
DECQ BX
|
|
JNZ matchlen_single_loopback_repeat_extend
|
|
|
|
repeat_extend_forward_end_encodeBlockAsm12B:
|
|
ADDL R8, AX
|
|
MOVL AX, BX
|
|
SUBL SI, BX
|
|
MOVL 24(SP), SI
|
|
MOVQ dst_base+0(FP), DI
|
|
MOVL 20(SP), R8
|
|
TESTL R8, R8
|
|
JZ repeat_as_copy_encodeBlockAsm12B
|
|
|
|
emit_repeat_again_match_repeat_:
|
|
MOVQ BX, R8
|
|
LEAQ -4(BX), BX
|
|
CMPL R8, $0x08
|
|
JLE repeat_two_match_repeat_
|
|
CMPL R8, $0x0c
|
|
JGE cant_repeat_two_offset_match_repeat_
|
|
CMPL SI, $0x00000800
|
|
JLT repeat_two_offset_match_repeat_
|
|
|
|
cant_repeat_two_offset_match_repeat_:
|
|
CMPL BX, $0x00000104
|
|
JLT repeat_three_match_repeat_
|
|
CMPL BX, $0x00010100
|
|
JLT repeat_four_match_repeat_
|
|
CMPL BX, $0x0100ffff
|
|
JLT repeat_five_match_repeat_
|
|
LEAQ -16842747(BX), BX
|
|
MOVW $0x001d, (DI)
|
|
MOVW $0xfffb, 2(DI)
|
|
MOVB $0xff, 4(DI)
|
|
ADDQ $0x05, DI
|
|
JMP emit_repeat_again_match_repeat_
|
|
|
|
repeat_five_match_repeat_:
|
|
LEAQ -65536(BX), BX
|
|
MOVQ BX, SI
|
|
MOVW $0x001d, (DI)
|
|
MOVW BX, 2(DI)
|
|
SARQ $0x10, SI
|
|
MOVB SI, 4(DI)
|
|
ADDQ $0x05, DI
|
|
JMP repeat_end_emit_encodeBlockAsm12B
|
|
|
|
repeat_four_match_repeat_:
|
|
LEAQ -256(BX), BX
|
|
MOVW $0x0019, (DI)
|
|
MOVW BX, 2(DI)
|
|
ADDQ $0x04, DI
|
|
JMP repeat_end_emit_encodeBlockAsm12B
|
|
|
|
repeat_three_match_repeat_:
|
|
LEAQ -4(BX), BX
|
|
MOVW $0x0015, (DI)
|
|
MOVB BL, 2(DI)
|
|
ADDQ $0x03, DI
|
|
JMP repeat_end_emit_encodeBlockAsm12B
|
|
|
|
repeat_two_match_repeat_:
|
|
SHLL $0x02, BX
|
|
ORL $0x01, BX
|
|
MOVW BX, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP repeat_end_emit_encodeBlockAsm12B
|
|
|
|
repeat_two_offset_match_repeat_:
|
|
XORQ R8, R8
|
|
LEAQ 1(R8)(BX*4), BX
|
|
MOVB SI, 1(DI)
|
|
SARL $0x08, SI
|
|
SHLL $0x05, SI
|
|
ORL SI, BX
|
|
MOVB BL, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP repeat_end_emit_encodeBlockAsm12B
|
|
|
|
repeat_as_copy_encodeBlockAsm12B:
|
|
CMPL SI, $0x00010000
|
|
JL two_byte_offset_repeat_as_copy_encodeBlockAsm12B
|
|
CMPL BX, $0x40
|
|
JLE four_bytes_remain_repeat_as_copy_encodeBlockAsm12B
|
|
MOVB $0xff, (DI)
|
|
MOVD SI, 1(DI)
|
|
LEAQ -64(BX), BX
|
|
ADDQ $0x05, DI
|
|
CMPL BX, $0x04
|
|
JL four_bytes_remain_repeat_as_copy_encodeBlockAsm12B
|
|
|
|
emit_repeat_again_repeat_as_copy_encodeBlockAsm12B_emit_copy:
|
|
MOVQ BX, R8
|
|
LEAQ -4(BX), BX
|
|
CMPL R8, $0x08
|
|
JLE repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy
|
|
CMPL R8, $0x0c
|
|
JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy
|
|
CMPL SI, $0x00000800
|
|
JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy
|
|
|
|
cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy:
|
|
CMPL BX, $0x00000104
|
|
JLT repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy
|
|
CMPL BX, $0x00010100
|
|
JLT repeat_four_repeat_as_copy_encodeBlockAsm12B_emit_copy
|
|
CMPL BX, $0x0100ffff
|
|
JLT repeat_five_repeat_as_copy_encodeBlockAsm12B_emit_copy
|
|
LEAQ -16842747(BX), BX
|
|
MOVW $0x001d, (DI)
|
|
MOVW $0xfffb, 2(DI)
|
|
MOVB $0xff, 4(DI)
|
|
ADDQ $0x05, DI
|
|
JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm12B_emit_copy
|
|
|
|
repeat_five_repeat_as_copy_encodeBlockAsm12B_emit_copy:
|
|
LEAQ -65536(BX), BX
|
|
MOVQ BX, SI
|
|
MOVW $0x001d, (DI)
|
|
MOVW BX, 2(DI)
|
|
SARQ $0x10, SI
|
|
MOVB SI, 4(DI)
|
|
ADDQ $0x05, DI
|
|
JMP repeat_end_emit_encodeBlockAsm12B
|
|
|
|
repeat_four_repeat_as_copy_encodeBlockAsm12B_emit_copy:
|
|
LEAQ -256(BX), BX
|
|
MOVW $0x0019, (DI)
|
|
MOVW BX, 2(DI)
|
|
ADDQ $0x04, DI
|
|
JMP repeat_end_emit_encodeBlockAsm12B
|
|
|
|
repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy:
|
|
LEAQ -4(BX), BX
|
|
MOVW $0x0015, (DI)
|
|
MOVB BL, 2(DI)
|
|
ADDQ $0x03, DI
|
|
JMP repeat_end_emit_encodeBlockAsm12B
|
|
|
|
repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy:
|
|
SHLL $0x02, BX
|
|
ORL $0x01, BX
|
|
MOVW BX, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP repeat_end_emit_encodeBlockAsm12B
|
|
|
|
repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy:
|
|
XORQ R8, R8
|
|
LEAQ 1(R8)(BX*4), BX
|
|
MOVB SI, 1(DI)
|
|
SARL $0x08, SI
|
|
SHLL $0x05, SI
|
|
ORL SI, BX
|
|
MOVB BL, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP repeat_end_emit_encodeBlockAsm12B
|
|
|
|
four_bytes_remain_repeat_as_copy_encodeBlockAsm12B:
|
|
TESTL BX, BX
|
|
JZ repeat_end_emit_encodeBlockAsm12B
|
|
MOVB $0x03, DL
|
|
LEAQ -4(DX)(BX*4), BX
|
|
MOVB BL, (DI)
|
|
MOVD SI, 1(DI)
|
|
ADDQ $0x05, DI
|
|
JMP repeat_end_emit_encodeBlockAsm12B
|
|
|
|
two_byte_offset_repeat_as_copy_encodeBlockAsm12B:
|
|
CMPL BX, $0x40
|
|
JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm12B
|
|
MOVB $0xee, (DI)
|
|
MOVW SI, 1(DI)
|
|
LEAQ -60(BX), BX
|
|
ADDQ $0x03, DI
|
|
|
|
emit_repeat_again_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
|
|
MOVQ BX, R8
|
|
LEAQ -4(BX), BX
|
|
CMPL R8, $0x08
|
|
JLE repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
|
|
CMPL R8, $0x0c
|
|
JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
|
|
CMPL SI, $0x00000800
|
|
JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
|
|
|
|
cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
|
|
CMPL BX, $0x00000104
|
|
JLT repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
|
|
CMPL BX, $0x00010100
|
|
JLT repeat_four_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
|
|
CMPL BX, $0x0100ffff
|
|
JLT repeat_five_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
|
|
LEAQ -16842747(BX), BX
|
|
MOVW $0x001d, (DI)
|
|
MOVW $0xfffb, 2(DI)
|
|
MOVB $0xff, 4(DI)
|
|
ADDQ $0x05, DI
|
|
JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
|
|
|
|
repeat_five_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
|
|
LEAQ -65536(BX), BX
|
|
MOVQ BX, SI
|
|
MOVW $0x001d, (DI)
|
|
MOVW BX, 2(DI)
|
|
SARQ $0x10, SI
|
|
MOVB SI, 4(DI)
|
|
ADDQ $0x05, DI
|
|
JMP repeat_end_emit_encodeBlockAsm12B
|
|
|
|
repeat_four_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
|
|
LEAQ -256(BX), BX
|
|
MOVW $0x0019, (DI)
|
|
MOVW BX, 2(DI)
|
|
ADDQ $0x04, DI
|
|
JMP repeat_end_emit_encodeBlockAsm12B
|
|
|
|
repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
|
|
LEAQ -4(BX), BX
|
|
MOVW $0x0015, (DI)
|
|
MOVB BL, 2(DI)
|
|
ADDQ $0x03, DI
|
|
JMP repeat_end_emit_encodeBlockAsm12B
|
|
|
|
repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
|
|
SHLL $0x02, BX
|
|
ORL $0x01, BX
|
|
MOVW BX, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP repeat_end_emit_encodeBlockAsm12B
|
|
|
|
repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
|
|
XORQ R8, R8
|
|
LEAQ 1(R8)(BX*4), BX
|
|
MOVB SI, 1(DI)
|
|
SARL $0x08, SI
|
|
SHLL $0x05, SI
|
|
ORL SI, BX
|
|
MOVB BL, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP repeat_end_emit_encodeBlockAsm12B
|
|
|
|
two_byte_offset_short_repeat_as_copy_encodeBlockAsm12B:
|
|
CMPL BX, $0x0c
|
|
JGE emit_copy_three_repeat_as_copy_encodeBlockAsm12B
|
|
CMPL SI, $0x00000800
|
|
JGE emit_copy_three_repeat_as_copy_encodeBlockAsm12B
|
|
MOVB $0x01, DL
|
|
LEAQ -16(DX)(BX*4), BX
|
|
MOVB SI, 1(DI)
|
|
SHRL $0x08, SI
|
|
SHLL $0x05, SI
|
|
ORL SI, BX
|
|
MOVB BL, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP repeat_end_emit_encodeBlockAsm12B
|
|
|
|
emit_copy_three_repeat_as_copy_encodeBlockAsm12B:
|
|
MOVB $0x02, DL
|
|
LEAQ -4(DX)(BX*4), BX
|
|
MOVB BL, (DI)
|
|
MOVW SI, 1(DI)
|
|
ADDQ $0x03, DI
|
|
|
|
repeat_end_emit_encodeBlockAsm12B:
|
|
MOVQ DI, dst_base+0(FP)
|
|
MOVL 16(SP), BX
|
|
CMPL AX, BX
|
|
JGT emit_remainder_encodeBlockAsm12B
|
|
JMP search_loop_encodeBlockAsm12B
|
|
|
|
no_repeat_found_encodeBlockAsm12B:
|
|
MOVQ $0x0000cf1bbcdcbf9b, R9
|
|
MOVQ SI, R8
|
|
SHRQ $0x10, R8
|
|
SHLQ $0x10, R8
|
|
IMULQ R9, R8
|
|
SHRQ $0x34, R8
|
|
CMPL (CX)(BX*1), SI
|
|
SHRQ $0x08, SI
|
|
JEQ candidate_match_encodeBlockAsm12B
|
|
MOVL 32(SP)(R8*1), BX
|
|
CMPL (CX)(DI*1), SI
|
|
JEQ candidate2_match_encodeBlockAsm12B
|
|
LEAQ 2(AX), DI
|
|
MOVL DI, 32(SP)(R8*1)
|
|
SHRQ $0x08, SI
|
|
CMPL (CX)(BX*1), SI
|
|
JEQ candidate3_match_encodeBlockAsm12B
|
|
MOVL 28(SP), AX
|
|
JMP search_loop_encodeBlockAsm12B
|
|
|
|
candidate3_match_encodeBlockAsm12B:
|
|
ADDL $0x02, AX
|
|
JMP candidate_match_encodeBlockAsm12B
|
|
|
|
candidate2_match_encodeBlockAsm12B:
|
|
LEAQ -2(AX), BX
|
|
MOVL BX, 32(SP)(R8*1)
|
|
INCL AX
|
|
MOVL DI, BX
|
|
|
|
candidate_match_encodeBlockAsm12B:
|
|
MOVL 20(SP), SI
|
|
TESTL BX, BX
|
|
JZ match_extend_back_end_encodeBlockAsm12B
|
|
|
|
match_extend_back_loop_encodeBlockAsm12B:
|
|
CMPL AX, SI
|
|
JG match_extend_back_end_encodeBlockAsm12B
|
|
MOVB -1(CX)(BX*1), DL
|
|
MOVB -1(CX)(AX*1), DI
|
|
CMPB DL, DI
|
|
JNE match_extend_back_end_encodeBlockAsm12B
|
|
LEAL -1(AX), AX
|
|
DECL BX
|
|
JZ match_extend_back_end_encodeBlockAsm12B
|
|
JMP match_extend_back_loop_encodeBlockAsm12B
|
|
|
|
match_extend_back_end_encodeBlockAsm12B:
|
|
MOVL AX, SI
|
|
SUBL 20(SP), SI
|
|
LEAQ dst_base+0(FP)(SI*1), SI
|
|
CMPQ SI, (SP)
|
|
JL match_dst_size_check_encodeBlockAsm12B
|
|
MOVQ $0x00000000, ret+48(FP)
|
|
RET
|
|
|
|
match_dst_size_check_encodeBlockAsm12B:
|
|
MOVL BX, SI
|
|
MOVL 20(SP), DI
|
|
CMPL DI, SI
|
|
JEQ emit_literal_skip_match_emit_encodeBlockAsm12B
|
|
MOVL SI, R8
|
|
MOVL SI, 20(SP)
|
|
LEAQ (CX)(DI*1), SI
|
|
SUBL DI, R8
|
|
MOVQ dst_base+0(FP), DI
|
|
MOVQ R8, R9
|
|
SUBL $0x01, R9
|
|
JC emit_literal_done_match_emit_encodeBlockAsm12B
|
|
CMPL R9, $0x3c
|
|
JLT one_byte_match_emit_encodeBlockAsm12B
|
|
CMPL R9, $0x00000100
|
|
JLT two_bytes_match_emit_encodeBlockAsm12B
|
|
CMPL R9, $0x00010000
|
|
JLT three_bytes_match_emit_encodeBlockAsm12B
|
|
CMPL R9, $0x01000000
|
|
JLT four_bytes_match_emit_encodeBlockAsm12B
|
|
MOVB $0xfc, (DI)
|
|
MOVL R9, 1(DI)
|
|
ADDQ $0x05, DI
|
|
JMP memmove_match_emit_encodeBlockAsm12B
|
|
|
|
four_bytes_match_emit_encodeBlockAsm12B:
|
|
MOVQ R9, R10
|
|
SHRL $0x10, R10
|
|
MOVB $0xf8, (DI)
|
|
MOVW R9, 1(DI)
|
|
MOVB R10, 3(DI)
|
|
ADDQ $0x04, DI
|
|
JMP memmove_match_emit_encodeBlockAsm12B
|
|
|
|
three_bytes_match_emit_encodeBlockAsm12B:
|
|
MOVB $0xf4, (DI)
|
|
MOVW R9, 1(DI)
|
|
ADDQ $0x03, DI
|
|
JMP memmove_match_emit_encodeBlockAsm12B
|
|
|
|
two_bytes_match_emit_encodeBlockAsm12B:
|
|
MOVB $0xf0, (DI)
|
|
MOVB R9, 1(DI)
|
|
ADDQ $0x02, DI
|
|
JMP memmove_match_emit_encodeBlockAsm12B
|
|
|
|
one_byte_match_emit_encodeBlockAsm12B:
|
|
SHLB $0x02, R9
|
|
MOVB R9, (DI)
|
|
ADDQ $0x01, DI
|
|
|
|
memmove_match_emit_encodeBlockAsm12B:
|
|
LEAQ (DI)(R8*1), R9
|
|
NOP
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_tail:
|
|
TESTQ R8, R8
|
|
JEQ emit_literal_done_match_emit_encodeBlockAsm12B
|
|
CMPQ R8, $0x02
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_1or2
|
|
CMPQ R8, $0x04
|
|
JB emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_3
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_4
|
|
CMPQ R8, $0x08
|
|
JB emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_5through7
|
|
JE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8
|
|
CMPQ R8, $0x10
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_9through16
|
|
CMPQ R8, $0x20
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_17through32
|
|
CMPQ R8, $0x40
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_33through64
|
|
CMPQ R8, $0x80
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_65through128
|
|
CMPQ R8, $0x00000100
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_129through256
|
|
JMP emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_256through2048
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_1or2:
|
|
MOVB (SI), R9
|
|
MOVB -1(SI)(R8*1), SI
|
|
MOVB R9, (DI)
|
|
MOVB SI, -1(DI)(R8*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm12B
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_4:
|
|
MOVL (SI), R9
|
|
MOVL R9, (DI)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm12B
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_3:
|
|
MOVW (SI), R9
|
|
MOVB 2(SI), SI
|
|
MOVW R9, (DI)
|
|
MOVB SI, 2(DI)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm12B
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_5through7:
|
|
MOVL (SI), R9
|
|
MOVL -4(SI)(R8*1), SI
|
|
MOVL R9, (DI)
|
|
MOVL SI, -4(DI)(R8*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm12B
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8:
|
|
MOVQ (SI), R9
|
|
MOVQ R9, (DI)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm12B
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_9through16:
|
|
MOVQ (SI), R9
|
|
MOVQ -8(SI)(R8*1), SI
|
|
MOVQ R9, (DI)
|
|
MOVQ SI, -8(DI)(R8*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm12B
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_17through32:
|
|
MOVOU (SI), X0
|
|
MOVOU -16(SI)(R8*1), X1
|
|
MOVOU X0, (DI)
|
|
MOVOU X1, -16(DI)(R8*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm12B
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_33through64:
|
|
MOVOU (SI), X0
|
|
MOVOU 16(SI), X1
|
|
MOVOU -32(SI)(R8*1), X2
|
|
MOVOU -16(SI)(R8*1), X3
|
|
MOVOU X0, (DI)
|
|
MOVOU X1, 16(DI)
|
|
MOVOU X2, -32(DI)(R8*1)
|
|
MOVOU X3, -16(DI)(R8*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm12B
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_65through128:
|
|
MOVOU (SI), X0
|
|
MOVOU 16(SI), X1
|
|
MOVOU 32(SI), X2
|
|
MOVOU 48(SI), X3
|
|
MOVOU -64(SI)(R8*1), X12
|
|
MOVOU -48(SI)(R8*1), X13
|
|
MOVOU -32(SI)(R8*1), X14
|
|
MOVOU -16(SI)(R8*1), X15
|
|
MOVOU X0, (DI)
|
|
MOVOU X1, 16(DI)
|
|
MOVOU X2, 32(DI)
|
|
MOVOU X3, 48(DI)
|
|
MOVOU X12, -64(DI)(R8*1)
|
|
MOVOU X13, -48(DI)(R8*1)
|
|
MOVOU X14, -32(DI)(R8*1)
|
|
MOVOU X15, -16(DI)(R8*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm12B
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_129through256:
|
|
MOVOU (SI), X0
|
|
MOVOU 16(SI), X1
|
|
MOVOU 32(SI), X2
|
|
MOVOU 48(SI), X3
|
|
MOVOU 64(SI), X4
|
|
MOVOU 80(SI), X5
|
|
MOVOU 96(SI), X6
|
|
MOVOU 112(SI), X7
|
|
MOVOU -128(SI)(R8*1), X8
|
|
MOVOU -112(SI)(R8*1), X9
|
|
MOVOU -96(SI)(R8*1), X10
|
|
MOVOU -80(SI)(R8*1), X11
|
|
MOVOU -64(SI)(R8*1), X12
|
|
MOVOU -48(SI)(R8*1), X13
|
|
MOVOU -32(SI)(R8*1), X14
|
|
MOVOU -16(SI)(R8*1), X15
|
|
MOVOU X0, (DI)
|
|
MOVOU X1, 16(DI)
|
|
MOVOU X2, 32(DI)
|
|
MOVOU X3, 48(DI)
|
|
MOVOU X4, 64(DI)
|
|
MOVOU X5, 80(DI)
|
|
MOVOU X6, 96(DI)
|
|
MOVOU X7, 112(DI)
|
|
MOVOU X8, -128(DI)(R8*1)
|
|
MOVOU X9, -112(DI)(R8*1)
|
|
MOVOU X10, -96(DI)(R8*1)
|
|
MOVOU X11, -80(DI)(R8*1)
|
|
MOVOU X12, -64(DI)(R8*1)
|
|
MOVOU X13, -48(DI)(R8*1)
|
|
MOVOU X14, -32(DI)(R8*1)
|
|
MOVOU X15, -16(DI)(R8*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm12B
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_256through2048:
|
|
LEAQ -256(R8), R8
|
|
MOVOU (SI), X0
|
|
MOVOU 16(SI), X1
|
|
MOVOU 32(SI), X2
|
|
MOVOU 48(SI), X3
|
|
MOVOU 64(SI), X4
|
|
MOVOU 80(SI), X5
|
|
MOVOU 96(SI), X6
|
|
MOVOU 112(SI), X7
|
|
MOVOU 128(SI), X8
|
|
MOVOU 144(SI), X9
|
|
MOVOU 160(SI), X10
|
|
MOVOU 176(SI), X11
|
|
MOVOU 192(SI), X12
|
|
MOVOU 208(SI), X13
|
|
MOVOU 224(SI), X14
|
|
MOVOU 240(SI), X15
|
|
MOVOU X0, (DI)
|
|
MOVOU X1, 16(DI)
|
|
MOVOU X2, 32(DI)
|
|
MOVOU X3, 48(DI)
|
|
MOVOU X4, 64(DI)
|
|
MOVOU X5, 80(DI)
|
|
MOVOU X6, 96(DI)
|
|
MOVOU X7, 112(DI)
|
|
MOVOU X8, 128(DI)
|
|
MOVOU X9, 144(DI)
|
|
MOVOU X10, 160(DI)
|
|
MOVOU X11, 176(DI)
|
|
MOVOU X12, 192(DI)
|
|
MOVOU X13, 208(DI)
|
|
MOVOU X14, 224(DI)
|
|
MOVOU X15, 240(DI)
|
|
CMPQ R8, $0x00000100
|
|
LEAQ 256(SI), SI
|
|
LEAQ 256(DI), DI
|
|
JGE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_256through2048
|
|
JMP emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_tail
|
|
MOVQ R9, DI
|
|
|
|
emit_literal_done_match_emit_encodeBlockAsm12B:
|
|
MOVQ DI, dst_base+0(FP)
|
|
|
|
emit_literal_skip_match_emit_encodeBlockAsm12B:
|
|
NOP
|
|
|
|
match_nolit_loop_encodeBlockAsm12B:
|
|
MOVL AX, SI
|
|
MOVL AX, SI
|
|
SUBL BX, SI
|
|
MOVL SI, 24(SP)
|
|
ADDL $0x04, AX
|
|
ADDL $0x04, BX
|
|
MOVL 16(SP), SI
|
|
SUBL AX, SI
|
|
XORQ R8, R8
|
|
CMPQ SI, $0x08
|
|
JL matchlen_single_match_nolit_encodeBlockAsm12B
|
|
|
|
matchlen_loopback_match_nolit_encodeBlockAsm12B:
|
|
MOVQ (CX)(R8*1), DI
|
|
XORQ (CX)(R8*1), DI
|
|
TESTQ DI, DI
|
|
JZ matchlen_loop_match_nolit_encodeBlockAsm12B
|
|
BSFQ DI, DI
|
|
SARQ $0x03, DI
|
|
LEAQ (R8)(DI*1), R8
|
|
JMP match_nolit_end_encodeBlockAsm12B
|
|
|
|
matchlen_loop_match_nolit_encodeBlockAsm12B:
|
|
LEAQ -8(SI), SI
|
|
LEAQ 8(R8), R8
|
|
CMPQ SI, $0x08
|
|
JGE matchlen_loopback_match_nolit_encodeBlockAsm12B
|
|
|
|
matchlen_single_match_nolit_encodeBlockAsm12B:
|
|
TESTQ SI, SI
|
|
JZ match_nolit_end_encodeBlockAsm12B
|
|
|
|
matchlen_single_loopback_match_nolit_encodeBlockAsm12B:
|
|
MOVB (CX)(R8*1), DI
|
|
CMPB (CX)(R8*1), DI
|
|
JNE match_nolit_end_encodeBlockAsm12B
|
|
LEAQ 1(R8), R8
|
|
DECQ SI
|
|
JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm12B
|
|
|
|
match_nolit_end_encodeBlockAsm12B:
|
|
MOVL 24(SP), SI
|
|
ADDQ $0x04, R8
|
|
MOVQ dst_base+0(FP), DI
|
|
ADDL R8, AX
|
|
CMPL SI, $0x00010000
|
|
JL two_byte_offset_match_nolit_encodeBlockAsm12B
|
|
CMPL R8, $0x40
|
|
JLE four_bytes_remain_match_nolit_encodeBlockAsm12B
|
|
MOVB $0xff, (DI)
|
|
MOVD SI, 1(DI)
|
|
LEAQ -64(R8), R8
|
|
ADDQ $0x05, DI
|
|
CMPL R8, $0x04
|
|
JL four_bytes_remain_match_nolit_encodeBlockAsm12B
|
|
|
|
emit_repeat_again_match_nolit_encodeBlockAsm12B_emit_copy:
|
|
MOVQ R8, R9
|
|
LEAQ -4(R8), R8
|
|
CMPL R9, $0x08
|
|
JLE repeat_two_match_nolit_encodeBlockAsm12B_emit_copy
|
|
CMPL R9, $0x0c
|
|
JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy
|
|
CMPL SI, $0x00000800
|
|
JLT repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy
|
|
|
|
cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy:
|
|
CMPL R8, $0x00000104
|
|
JLT repeat_three_match_nolit_encodeBlockAsm12B_emit_copy
|
|
CMPL R8, $0x00010100
|
|
JLT repeat_four_match_nolit_encodeBlockAsm12B_emit_copy
|
|
CMPL R8, $0x0100ffff
|
|
JLT repeat_five_match_nolit_encodeBlockAsm12B_emit_copy
|
|
LEAQ -16842747(R8), R8
|
|
MOVW $0x001d, (DI)
|
|
MOVW $0xfffb, 2(DI)
|
|
MOVB $0xff, 4(DI)
|
|
ADDQ $0x05, DI
|
|
JMP emit_repeat_again_match_nolit_encodeBlockAsm12B_emit_copy
|
|
|
|
repeat_five_match_nolit_encodeBlockAsm12B_emit_copy:
|
|
LEAQ -65536(R8), R8
|
|
MOVQ R8, SI
|
|
MOVW $0x001d, (DI)
|
|
MOVW R8, 2(DI)
|
|
SARQ $0x10, SI
|
|
MOVB SI, 4(DI)
|
|
ADDQ $0x05, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm12B
|
|
|
|
repeat_four_match_nolit_encodeBlockAsm12B_emit_copy:
|
|
LEAQ -256(R8), R8
|
|
MOVW $0x0019, (DI)
|
|
MOVW R8, 2(DI)
|
|
ADDQ $0x04, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm12B
|
|
|
|
repeat_three_match_nolit_encodeBlockAsm12B_emit_copy:
|
|
LEAQ -4(R8), R8
|
|
MOVW $0x0015, (DI)
|
|
MOVB R8, 2(DI)
|
|
ADDQ $0x03, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm12B
|
|
|
|
repeat_two_match_nolit_encodeBlockAsm12B_emit_copy:
|
|
SHLL $0x02, R8
|
|
ORL $0x01, R8
|
|
MOVW R8, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm12B
|
|
|
|
repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy:
|
|
XORQ R9, R9
|
|
LEAQ 1(R9)(R8*4), R8
|
|
MOVB SI, 1(DI)
|
|
SARL $0x08, SI
|
|
SHLL $0x05, SI
|
|
ORL SI, R8
|
|
MOVB R8, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm12B
|
|
|
|
four_bytes_remain_match_nolit_encodeBlockAsm12B:
|
|
TESTL R8, R8
|
|
JZ match_nolit_emitcopy_end_encodeBlockAsm12B
|
|
MOVB $0x03, DL
|
|
LEAQ -4(DX)(R8*4), R8
|
|
MOVB R8, (DI)
|
|
MOVD SI, 1(DI)
|
|
ADDQ $0x05, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm12B
|
|
|
|
two_byte_offset_match_nolit_encodeBlockAsm12B:
|
|
CMPL R8, $0x40
|
|
JLE two_byte_offset_short_match_nolit_encodeBlockAsm12B
|
|
MOVB $0xee, (DI)
|
|
MOVW SI, 1(DI)
|
|
LEAQ -60(R8), R8
|
|
ADDQ $0x03, DI
|
|
|
|
emit_repeat_again_match_nolit_encodeBlockAsm12B_emit_copy_short:
|
|
MOVQ R8, R9
|
|
LEAQ -4(R8), R8
|
|
CMPL R9, $0x08
|
|
JLE repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short
|
|
CMPL R9, $0x0c
|
|
JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short
|
|
CMPL SI, $0x00000800
|
|
JLT repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short
|
|
|
|
cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short:
|
|
CMPL R8, $0x00000104
|
|
JLT repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short
|
|
CMPL R8, $0x00010100
|
|
JLT repeat_four_match_nolit_encodeBlockAsm12B_emit_copy_short
|
|
CMPL R8, $0x0100ffff
|
|
JLT repeat_five_match_nolit_encodeBlockAsm12B_emit_copy_short
|
|
LEAQ -16842747(R8), R8
|
|
MOVW $0x001d, (DI)
|
|
MOVW $0xfffb, 2(DI)
|
|
MOVB $0xff, 4(DI)
|
|
ADDQ $0x05, DI
|
|
JMP emit_repeat_again_match_nolit_encodeBlockAsm12B_emit_copy_short
|
|
|
|
repeat_five_match_nolit_encodeBlockAsm12B_emit_copy_short:
|
|
LEAQ -65536(R8), R8
|
|
MOVQ R8, SI
|
|
MOVW $0x001d, (DI)
|
|
MOVW R8, 2(DI)
|
|
SARQ $0x10, SI
|
|
MOVB SI, 4(DI)
|
|
ADDQ $0x05, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm12B
|
|
|
|
repeat_four_match_nolit_encodeBlockAsm12B_emit_copy_short:
|
|
LEAQ -256(R8), R8
|
|
MOVW $0x0019, (DI)
|
|
MOVW R8, 2(DI)
|
|
ADDQ $0x04, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm12B
|
|
|
|
repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short:
|
|
LEAQ -4(R8), R8
|
|
MOVW $0x0015, (DI)
|
|
MOVB R8, 2(DI)
|
|
ADDQ $0x03, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm12B
|
|
|
|
repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short:
|
|
SHLL $0x02, R8
|
|
ORL $0x01, R8
|
|
MOVW R8, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm12B
|
|
|
|
repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short:
|
|
XORQ R9, R9
|
|
LEAQ 1(R9)(R8*4), R8
|
|
MOVB SI, 1(DI)
|
|
SARL $0x08, SI
|
|
SHLL $0x05, SI
|
|
ORL SI, R8
|
|
MOVB R8, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm12B
|
|
|
|
two_byte_offset_short_match_nolit_encodeBlockAsm12B:
|
|
CMPL R8, $0x0c
|
|
JGE emit_copy_three_match_nolit_encodeBlockAsm12B
|
|
CMPL SI, $0x00000800
|
|
JGE emit_copy_three_match_nolit_encodeBlockAsm12B
|
|
MOVB $0x01, DL
|
|
LEAQ -16(DX)(R8*4), R8
|
|
MOVB SI, 1(DI)
|
|
SHRL $0x08, SI
|
|
SHLL $0x05, SI
|
|
ORL SI, R8
|
|
MOVB R8, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm12B
|
|
|
|
emit_copy_three_match_nolit_encodeBlockAsm12B:
|
|
MOVB $0x02, DL
|
|
LEAQ -4(DX)(R8*4), R8
|
|
MOVB R8, (DI)
|
|
MOVW SI, 1(DI)
|
|
ADDQ $0x03, DI
|
|
|
|
match_nolit_emitcopy_end_encodeBlockAsm12B:
|
|
MOVQ DI, dst_base+0(FP)
|
|
MOVL AX, 20(SP)
|
|
CMPL AX, 16(SP)
|
|
JGE emit_remainder_encodeBlockAsm12B
|
|
CMPQ DI, (SP)
|
|
JL match_nolit_dst_ok_encodeBlockAsm12B
|
|
MOVQ $0x00000000, ret+48(FP)
|
|
RET
|
|
|
|
match_nolit_dst_ok_encodeBlockAsm12B:
|
|
MOVQ -2(CX)(AX*1), SI
|
|
MOVQ $0x0000cf1bbcdcbf9b, DI
|
|
MOVQ SI, R8
|
|
SHRQ $0x10, SI
|
|
MOVQ SI, R9
|
|
SHLQ $0x10, R8
|
|
IMULQ DI, R8
|
|
SHRQ $0x34, R8
|
|
SHLQ $0x10, R9
|
|
IMULQ DI, R9
|
|
SHRQ $0x34, R9
|
|
MOVL 32(SP)(R8*1), DI
|
|
MOVL 32(SP)(R9*1), DI
|
|
LEAQ -2(AX), DI
|
|
MOVL DI, 32(SP)(R8*1)
|
|
MOVL AX, 32(SP)(R9*1)
|
|
CMPL (CX)(R9*1), SI
|
|
JEQ match_nolit_loop_encodeBlockAsm12B
|
|
INCL AX
|
|
JMP search_loop_encodeBlockAsm12B
|
|
|
|
emit_remainder_encodeBlockAsm12B:
|
|
MOVQ src_len+32(FP), AX
|
|
SUBL 20(SP), AX
|
|
MOVQ dst_base+0(FP), DX
|
|
LEAQ (DX)(AX*1), DX
|
|
CMPQ DX, (SP)
|
|
JL emit_remainder_ok_encodeBlockAsm12B
|
|
MOVQ $0x00000000, ret+48(FP)
|
|
RET
|
|
|
|
emit_remainder_ok_encodeBlockAsm12B:
|
|
MOVQ src_len+32(FP), AX
|
|
MOVL 20(SP), DX
|
|
CMPL DX, AX
|
|
JEQ emit_literal_skip_emit_remainder_encodeBlockAsm12B
|
|
MOVL AX, BX
|
|
MOVL AX, 20(SP)
|
|
LEAQ (CX)(DX*1), AX
|
|
SUBL DX, BX
|
|
MOVQ dst_base+0(FP), CX
|
|
MOVQ BX, DX
|
|
SUBL $0x01, DX
|
|
JC emit_literal_done_emit_remainder_encodeBlockAsm12B
|
|
CMPL DX, $0x3c
|
|
JLT one_byte_emit_remainder_encodeBlockAsm12B
|
|
CMPL DX, $0x00000100
|
|
JLT two_bytes_emit_remainder_encodeBlockAsm12B
|
|
CMPL DX, $0x00010000
|
|
JLT three_bytes_emit_remainder_encodeBlockAsm12B
|
|
CMPL DX, $0x01000000
|
|
JLT four_bytes_emit_remainder_encodeBlockAsm12B
|
|
MOVB $0xfc, (CX)
|
|
MOVL DX, 1(CX)
|
|
ADDQ $0x05, CX
|
|
JMP memmove_emit_remainder_encodeBlockAsm12B
|
|
|
|
four_bytes_emit_remainder_encodeBlockAsm12B:
|
|
MOVQ DX, SI
|
|
SHRL $0x10, SI
|
|
MOVB $0xf8, (CX)
|
|
MOVW DX, 1(CX)
|
|
MOVB SI, 3(CX)
|
|
ADDQ $0x04, CX
|
|
JMP memmove_emit_remainder_encodeBlockAsm12B
|
|
|
|
three_bytes_emit_remainder_encodeBlockAsm12B:
|
|
MOVB $0xf4, (CX)
|
|
MOVW DX, 1(CX)
|
|
ADDQ $0x03, CX
|
|
JMP memmove_emit_remainder_encodeBlockAsm12B
|
|
|
|
two_bytes_emit_remainder_encodeBlockAsm12B:
|
|
MOVB $0xf0, (CX)
|
|
MOVB DL, 1(CX)
|
|
ADDQ $0x02, CX
|
|
JMP memmove_emit_remainder_encodeBlockAsm12B
|
|
|
|
one_byte_emit_remainder_encodeBlockAsm12B:
|
|
SHLB $0x02, DL
|
|
MOVB DL, (CX)
|
|
ADDQ $0x01, CX
|
|
|
|
memmove_emit_remainder_encodeBlockAsm12B:
|
|
LEAQ (CX)(BX*1), DX
|
|
NOP
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_tail:
|
|
TESTQ BX, BX
|
|
JEQ emit_literal_done_emit_remainder_encodeBlockAsm12B
|
|
CMPQ BX, $0x02
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_1or2
|
|
CMPQ BX, $0x04
|
|
JB emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_3
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_4
|
|
CMPQ BX, $0x08
|
|
JB emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_5through7
|
|
JE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_8
|
|
CMPQ BX, $0x10
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_9through16
|
|
CMPQ BX, $0x20
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_17through32
|
|
CMPQ BX, $0x40
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_33through64
|
|
CMPQ BX, $0x80
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_65through128
|
|
CMPQ BX, $0x00000100
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_129through256
|
|
JMP emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_256through2048
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_1or2:
|
|
MOVB (AX), DL
|
|
MOVB -1(AX)(BX*1), AL
|
|
MOVB DL, (CX)
|
|
MOVB AL, -1(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm12B
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_4:
|
|
MOVL (AX), DX
|
|
MOVL DX, (CX)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm12B
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_3:
|
|
MOVW (AX), DX
|
|
MOVB 2(AX), AL
|
|
MOVW DX, (CX)
|
|
MOVB AL, 2(CX)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm12B
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_5through7:
|
|
MOVL (AX), DX
|
|
MOVL -4(AX)(BX*1), AX
|
|
MOVL DX, (CX)
|
|
MOVL AX, -4(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm12B
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_8:
|
|
MOVQ (AX), DX
|
|
MOVQ DX, (CX)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm12B
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_9through16:
|
|
MOVQ (AX), DX
|
|
MOVQ -8(AX)(BX*1), AX
|
|
MOVQ DX, (CX)
|
|
MOVQ AX, -8(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm12B
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_17through32:
|
|
MOVOU (AX), X0
|
|
MOVOU -16(AX)(BX*1), X1
|
|
MOVOU X0, (CX)
|
|
MOVOU X1, -16(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm12B
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_33through64:
|
|
MOVOU (AX), X0
|
|
MOVOU 16(AX), X1
|
|
MOVOU -32(AX)(BX*1), X2
|
|
MOVOU -16(AX)(BX*1), X3
|
|
MOVOU X0, (CX)
|
|
MOVOU X1, 16(CX)
|
|
MOVOU X2, -32(CX)(BX*1)
|
|
MOVOU X3, -16(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm12B
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_65through128:
|
|
MOVOU (AX), X0
|
|
MOVOU 16(AX), X1
|
|
MOVOU 32(AX), X2
|
|
MOVOU 48(AX), X3
|
|
MOVOU -64(AX)(BX*1), X12
|
|
MOVOU -48(AX)(BX*1), X13
|
|
MOVOU -32(AX)(BX*1), X14
|
|
MOVOU -16(AX)(BX*1), X15
|
|
MOVOU X0, (CX)
|
|
MOVOU X1, 16(CX)
|
|
MOVOU X2, 32(CX)
|
|
MOVOU X3, 48(CX)
|
|
MOVOU X12, -64(CX)(BX*1)
|
|
MOVOU X13, -48(CX)(BX*1)
|
|
MOVOU X14, -32(CX)(BX*1)
|
|
MOVOU X15, -16(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm12B
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_129through256:
|
|
MOVOU (AX), X0
|
|
MOVOU 16(AX), X1
|
|
MOVOU 32(AX), X2
|
|
MOVOU 48(AX), X3
|
|
MOVOU 64(AX), X4
|
|
MOVOU 80(AX), X5
|
|
MOVOU 96(AX), X6
|
|
MOVOU 112(AX), X7
|
|
MOVOU -128(AX)(BX*1), X8
|
|
MOVOU -112(AX)(BX*1), X9
|
|
MOVOU -96(AX)(BX*1), X10
|
|
MOVOU -80(AX)(BX*1), X11
|
|
MOVOU -64(AX)(BX*1), X12
|
|
MOVOU -48(AX)(BX*1), X13
|
|
MOVOU -32(AX)(BX*1), X14
|
|
MOVOU -16(AX)(BX*1), X15
|
|
MOVOU X0, (CX)
|
|
MOVOU X1, 16(CX)
|
|
MOVOU X2, 32(CX)
|
|
MOVOU X3, 48(CX)
|
|
MOVOU X4, 64(CX)
|
|
MOVOU X5, 80(CX)
|
|
MOVOU X6, 96(CX)
|
|
MOVOU X7, 112(CX)
|
|
MOVOU X8, -128(CX)(BX*1)
|
|
MOVOU X9, -112(CX)(BX*1)
|
|
MOVOU X10, -96(CX)(BX*1)
|
|
MOVOU X11, -80(CX)(BX*1)
|
|
MOVOU X12, -64(CX)(BX*1)
|
|
MOVOU X13, -48(CX)(BX*1)
|
|
MOVOU X14, -32(CX)(BX*1)
|
|
MOVOU X15, -16(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm12B
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_256through2048:
|
|
LEAQ -256(BX), BX
|
|
MOVOU (AX), X0
|
|
MOVOU 16(AX), X1
|
|
MOVOU 32(AX), X2
|
|
MOVOU 48(AX), X3
|
|
MOVOU 64(AX), X4
|
|
MOVOU 80(AX), X5
|
|
MOVOU 96(AX), X6
|
|
MOVOU 112(AX), X7
|
|
MOVOU 128(AX), X8
|
|
MOVOU 144(AX), X9
|
|
MOVOU 160(AX), X10
|
|
MOVOU 176(AX), X11
|
|
MOVOU 192(AX), X12
|
|
MOVOU 208(AX), X13
|
|
MOVOU 224(AX), X14
|
|
MOVOU 240(AX), X15
|
|
MOVOU X0, (CX)
|
|
MOVOU X1, 16(CX)
|
|
MOVOU X2, 32(CX)
|
|
MOVOU X3, 48(CX)
|
|
MOVOU X4, 64(CX)
|
|
MOVOU X5, 80(CX)
|
|
MOVOU X6, 96(CX)
|
|
MOVOU X7, 112(CX)
|
|
MOVOU X8, 128(CX)
|
|
MOVOU X9, 144(CX)
|
|
MOVOU X10, 160(CX)
|
|
MOVOU X11, 176(CX)
|
|
MOVOU X12, 192(CX)
|
|
MOVOU X13, 208(CX)
|
|
MOVOU X14, 224(CX)
|
|
MOVOU X15, 240(CX)
|
|
CMPQ BX, $0x00000100
|
|
LEAQ 256(AX), AX
|
|
LEAQ 256(CX), CX
|
|
JGE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_256through2048
|
|
JMP emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_tail
|
|
MOVQ DX, CX
|
|
|
|
emit_literal_done_emit_remainder_encodeBlockAsm12B:
|
|
MOVQ CX, dst_base+0(FP)
|
|
|
|
emit_literal_skip_emit_remainder_encodeBlockAsm12B:
|
|
MOVQ 8(SP), AX
|
|
SUBQ dst_base+0(FP), AX
|
|
MOVQ AX, ret+48(FP)
|
|
RET
|
|
|
|
// func encodeBlockAsmAvx(dst []byte, src []byte) int
|
|
// Requires: AVX, SSE2
|
|
TEXT ·encodeBlockAsmAvx(SB), $65568-56
|
|
MOVQ $0x00000200, AX
|
|
LEAQ 32(SP), CX
|
|
PXOR X0, X0
|
|
|
|
zero_loop_encodeBlockAsmAvx:
|
|
MOVOU X0, (CX)
|
|
MOVOU X0, 16(CX)
|
|
MOVOU X0, 32(CX)
|
|
MOVOU X0, 48(CX)
|
|
MOVOU X0, 64(CX)
|
|
MOVOU X0, 80(CX)
|
|
MOVOU X0, 96(CX)
|
|
MOVOU X0, 112(CX)
|
|
ADDQ $0x80, CX
|
|
DECQ AX
|
|
JNZ zero_loop_encodeBlockAsmAvx
|
|
MOVL AX, 20(SP)
|
|
MOVQ src_len+32(FP), AX
|
|
LEAQ -5(AX), CX
|
|
LEAQ -8(AX), BX
|
|
SHRQ $0x05, AX
|
|
SUBL AX, CX
|
|
MOVL BX, 16(SP)
|
|
MOVQ dst_base+0(FP), AX
|
|
MOVQ AX, 8(SP)
|
|
LEAQ (AX)(CX*1), CX
|
|
MOVQ CX, (SP)
|
|
MOVL $0x00000001, AX
|
|
MOVL AX, 24(SP)
|
|
MOVQ src_base+24(FP), CX
|
|
|
|
search_loop_encodeBlockAsmAvx:
|
|
MOVQ (CX)(AX*1), SI
|
|
MOVL AX, BX
|
|
SUBL 20(SP), BX
|
|
SHRL $0x06, BX
|
|
LEAQ 4(AX)(BX*1), BX
|
|
MOVL 16(SP), DI
|
|
CMPL BX, DI
|
|
JGT emit_remainder_encodeBlockAsmAvx
|
|
MOVL BX, 28(SP)
|
|
MOVQ $0x0000cf1bbcdcbf9b, BX
|
|
MOVQ SI, R8
|
|
MOVQ SI, R9
|
|
SHRQ $0x08, R9
|
|
SHLQ $0x10, R8
|
|
IMULQ BX, R8
|
|
SHRQ $0x30, R8
|
|
SHLQ $0x10, R9
|
|
IMULQ BX, R9
|
|
SHRQ $0x30, R9
|
|
MOVL 32(SP)(R8*1), BX
|
|
MOVL 32(SP)(R9*1), DI
|
|
MOVL AX, 32(SP)(R8*1)
|
|
LEAL 1(AX), R8
|
|
MOVL R8, 32(SP)(R9*1)
|
|
MOVL AX, R8
|
|
SUBL 24(SP), R8
|
|
MOVL 1(CX)(R8*1), R10
|
|
MOVQ SI, R9
|
|
SHLQ $0x08, R9
|
|
CMPL R9, R10
|
|
JNE no_repeat_found_encodeBlockAsmAvx
|
|
LEAQ 1(AX), SI
|
|
MOVL 20(SP), BX
|
|
TESTL R8, R8
|
|
JZ repeat_extend_back_end_encodeBlockAsmAvx
|
|
|
|
repeat_extend_back_loop_encodeBlockAsmAvx:
|
|
CMPL SI, BX
|
|
JG repeat_extend_back_end_encodeBlockAsmAvx
|
|
MOVB -1(CX)(R8*1), DL
|
|
MOVB -1(CX)(SI*1), DI
|
|
CMPB DL, DI
|
|
JNE repeat_extend_back_end_encodeBlockAsmAvx
|
|
LEAQ -1(SI), SI
|
|
DECL R8
|
|
JZ repeat_extend_back_end_encodeBlockAsmAvx
|
|
JMP repeat_extend_back_loop_encodeBlockAsmAvx
|
|
|
|
repeat_extend_back_end_encodeBlockAsmAvx:
|
|
MOVL 20(SP), BX
|
|
CMPL BX, SI
|
|
JEQ emit_literal_skip_repeat_emit_encodeBlockAsmAvx
|
|
MOVL SI, DI
|
|
MOVL SI, 20(SP)
|
|
LEAQ (CX)(BX*1), R8
|
|
SUBL BX, DI
|
|
MOVQ dst_base+0(FP), BX
|
|
MOVQ DI, R9
|
|
SUBL $0x01, R9
|
|
JC emit_literal_done_repeat_emit_encodeBlockAsmAvx
|
|
CMPL R9, $0x3c
|
|
JLT one_byte_repeat_emit_encodeBlockAsmAvx
|
|
CMPL R9, $0x00000100
|
|
JLT two_bytes_repeat_emit_encodeBlockAsmAvx
|
|
CMPL R9, $0x00010000
|
|
JLT three_bytes_repeat_emit_encodeBlockAsmAvx
|
|
CMPL R9, $0x01000000
|
|
JLT four_bytes_repeat_emit_encodeBlockAsmAvx
|
|
MOVB $0xfc, (BX)
|
|
MOVL R9, 1(BX)
|
|
ADDQ $0x05, BX
|
|
JMP memmove_repeat_emit_encodeBlockAsmAvx
|
|
|
|
four_bytes_repeat_emit_encodeBlockAsmAvx:
|
|
MOVQ R9, R10
|
|
SHRL $0x10, R10
|
|
MOVB $0xf8, (BX)
|
|
MOVW R9, 1(BX)
|
|
MOVB R10, 3(BX)
|
|
ADDQ $0x04, BX
|
|
JMP memmove_repeat_emit_encodeBlockAsmAvx
|
|
|
|
three_bytes_repeat_emit_encodeBlockAsmAvx:
|
|
MOVB $0xf4, (BX)
|
|
MOVW R9, 1(BX)
|
|
ADDQ $0x03, BX
|
|
JMP memmove_repeat_emit_encodeBlockAsmAvx
|
|
|
|
two_bytes_repeat_emit_encodeBlockAsmAvx:
|
|
MOVB $0xf0, (BX)
|
|
MOVB R9, 1(BX)
|
|
ADDQ $0x02, BX
|
|
JMP memmove_repeat_emit_encodeBlockAsmAvx
|
|
|
|
one_byte_repeat_emit_encodeBlockAsmAvx:
|
|
SHLB $0x02, R9
|
|
MOVB R9, (BX)
|
|
ADDQ $0x01, BX
|
|
|
|
memmove_repeat_emit_encodeBlockAsmAvx:
|
|
LEAQ (BX)(DI*1), R9
|
|
NOP
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_tail:
|
|
TESTQ DI, DI
|
|
JEQ emit_literal_done_repeat_emit_encodeBlockAsmAvx
|
|
CMPQ DI, $0x02
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_1or2
|
|
CMPQ DI, $0x04
|
|
JB emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_3
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_4
|
|
CMPQ DI, $0x08
|
|
JB emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_5through7
|
|
JE emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_8
|
|
CMPQ DI, $0x10
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_9through16
|
|
CMPQ DI, $0x20
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_17through32
|
|
CMPQ DI, $0x40
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_33through64
|
|
CMPQ DI, $0x80
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_65through128
|
|
CMPQ DI, $0x00000100
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_129through256
|
|
JMP emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_avxUnaligned
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_1or2:
|
|
MOVB (R8), R9
|
|
MOVB -1(R8)(DI*1), R10
|
|
MOVB R9, (BX)
|
|
MOVB R10, -1(BX)(DI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsmAvx
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_4:
|
|
MOVL (R8), R9
|
|
MOVL R9, (BX)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsmAvx
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_3:
|
|
MOVW (R8), R9
|
|
MOVB 2(R8), R10
|
|
MOVW R9, (BX)
|
|
MOVB R10, 2(BX)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsmAvx
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_5through7:
|
|
MOVL (R8), R9
|
|
MOVL -4(R8)(DI*1), R10
|
|
MOVL R9, (BX)
|
|
MOVL R10, -4(BX)(DI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsmAvx
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_8:
|
|
MOVQ (R8), R9
|
|
MOVQ R9, (BX)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsmAvx
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_9through16:
|
|
MOVQ (R8), R9
|
|
MOVQ -8(R8)(DI*1), R10
|
|
MOVQ R9, (BX)
|
|
MOVQ R10, -8(BX)(DI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsmAvx
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_17through32:
|
|
MOVOU (R8), X0
|
|
MOVOU -16(R8)(DI*1), X1
|
|
MOVOU X0, (BX)
|
|
MOVOU X1, -16(BX)(DI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsmAvx
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_33through64:
|
|
MOVOU (R8), X0
|
|
MOVOU 16(R8), X1
|
|
MOVOU -32(R8)(DI*1), X2
|
|
MOVOU -16(R8)(DI*1), X3
|
|
MOVOU X0, (BX)
|
|
MOVOU X1, 16(BX)
|
|
MOVOU X2, -32(BX)(DI*1)
|
|
MOVOU X3, -16(BX)(DI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsmAvx
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_65through128:
|
|
MOVOU (R8), X0
|
|
MOVOU 16(R8), X1
|
|
MOVOU 32(R8), X2
|
|
MOVOU 48(R8), X3
|
|
MOVOU -64(R8)(DI*1), X12
|
|
MOVOU -48(R8)(DI*1), X13
|
|
MOVOU -32(R8)(DI*1), X14
|
|
MOVOU -16(R8)(DI*1), X15
|
|
MOVOU X0, (BX)
|
|
MOVOU X1, 16(BX)
|
|
MOVOU X2, 32(BX)
|
|
MOVOU X3, 48(BX)
|
|
MOVOU X12, -64(BX)(DI*1)
|
|
MOVOU X13, -48(BX)(DI*1)
|
|
MOVOU X14, -32(BX)(DI*1)
|
|
MOVOU X15, -16(BX)(DI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsmAvx
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_129through256:
|
|
MOVOU (R8), X0
|
|
MOVOU 16(R8), X1
|
|
MOVOU 32(R8), X2
|
|
MOVOU 48(R8), X3
|
|
MOVOU 64(R8), X4
|
|
MOVOU 80(R8), X5
|
|
MOVOU 96(R8), X6
|
|
MOVOU 112(R8), X7
|
|
MOVOU -128(R8)(DI*1), X8
|
|
MOVOU -112(R8)(DI*1), X9
|
|
MOVOU -96(R8)(DI*1), X10
|
|
MOVOU -80(R8)(DI*1), X11
|
|
MOVOU -64(R8)(DI*1), X12
|
|
MOVOU -48(R8)(DI*1), X13
|
|
MOVOU -32(R8)(DI*1), X14
|
|
MOVOU -16(R8)(DI*1), X15
|
|
MOVOU X0, (BX)
|
|
MOVOU X1, 16(BX)
|
|
MOVOU X2, 32(BX)
|
|
MOVOU X3, 48(BX)
|
|
MOVOU X4, 64(BX)
|
|
MOVOU X5, 80(BX)
|
|
MOVOU X6, 96(BX)
|
|
MOVOU X7, 112(BX)
|
|
MOVOU X8, -128(BX)(DI*1)
|
|
MOVOU X9, -112(BX)(DI*1)
|
|
MOVOU X10, -96(BX)(DI*1)
|
|
MOVOU X11, -80(BX)(DI*1)
|
|
MOVOU X12, -64(BX)(DI*1)
|
|
MOVOU X13, -48(BX)(DI*1)
|
|
MOVOU X14, -32(BX)(DI*1)
|
|
MOVOU X15, -16(BX)(DI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsmAvx
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_256through2048:
|
|
LEAQ -256(DI), DI
|
|
MOVOU (R8), X0
|
|
MOVOU 16(R8), X1
|
|
MOVOU 32(R8), X2
|
|
MOVOU 48(R8), X3
|
|
MOVOU 64(R8), X4
|
|
MOVOU 80(R8), X5
|
|
MOVOU 96(R8), X6
|
|
MOVOU 112(R8), X7
|
|
MOVOU 128(R8), X8
|
|
MOVOU 144(R8), X9
|
|
MOVOU 160(R8), X10
|
|
MOVOU 176(R8), X11
|
|
MOVOU 192(R8), X12
|
|
MOVOU 208(R8), X13
|
|
MOVOU 224(R8), X14
|
|
MOVOU 240(R8), X15
|
|
MOVOU X0, (BX)
|
|
MOVOU X1, 16(BX)
|
|
MOVOU X2, 32(BX)
|
|
MOVOU X3, 48(BX)
|
|
MOVOU X4, 64(BX)
|
|
MOVOU X5, 80(BX)
|
|
MOVOU X6, 96(BX)
|
|
MOVOU X7, 112(BX)
|
|
MOVOU X8, 128(BX)
|
|
MOVOU X9, 144(BX)
|
|
MOVOU X10, 160(BX)
|
|
MOVOU X11, 176(BX)
|
|
MOVOU X12, 192(BX)
|
|
MOVOU X13, 208(BX)
|
|
MOVOU X14, 224(BX)
|
|
MOVOU X15, 240(BX)
|
|
CMPQ DI, $0x00000100
|
|
LEAQ 256(R8), R8
|
|
LEAQ 256(BX), BX
|
|
JGE emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_256through2048
|
|
JMP emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_tail
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_avxUnaligned:
|
|
LEAQ (R8)(DI*1), R10
|
|
MOVQ BX, R12
|
|
MOVOU -128(R10), X5
|
|
MOVOU -112(R10), X6
|
|
MOVQ $0x00000080, R9
|
|
ANDQ $0xffffffe0, BX
|
|
ADDQ $0x20, BX
|
|
MOVOU -96(R10), X7
|
|
MOVOU -80(R10), X8
|
|
MOVQ BX, R11
|
|
SUBQ R12, R11
|
|
MOVOU -64(R10), X9
|
|
MOVOU -48(R10), X10
|
|
SUBQ R11, DI
|
|
MOVOU -32(R10), X11
|
|
MOVOU -16(R10), X12
|
|
VMOVDQU (R8), Y4
|
|
ADDQ R11, R8
|
|
SUBQ R9, DI
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_gobble_128_loop:
|
|
VMOVDQU (R8), Y0
|
|
VMOVDQU 32(R8), Y1
|
|
VMOVDQU 64(R8), Y2
|
|
VMOVDQU 96(R8), Y3
|
|
ADDQ R9, R8
|
|
VMOVDQA Y0, (BX)
|
|
VMOVDQA Y1, 32(BX)
|
|
VMOVDQA Y2, 64(BX)
|
|
VMOVDQA Y3, 96(BX)
|
|
ADDQ R9, BX
|
|
SUBQ R9, DI
|
|
JA emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_gobble_128_loop
|
|
ADDQ R9, DI
|
|
ADDQ BX, DI
|
|
VMOVDQU Y4, (R12)
|
|
VZEROUPPER
|
|
MOVOU X5, -128(DI)
|
|
MOVOU X6, -112(DI)
|
|
MOVOU X7, -96(DI)
|
|
MOVOU X8, -80(DI)
|
|
MOVOU X9, -64(DI)
|
|
MOVOU X10, -48(DI)
|
|
MOVOU X11, -32(DI)
|
|
MOVOU X12, -16(DI)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsmAvx
|
|
MOVQ R9, BX
|
|
|
|
emit_literal_done_repeat_emit_encodeBlockAsmAvx:
|
|
MOVQ BX, dst_base+0(FP)
|
|
|
|
emit_literal_skip_repeat_emit_encodeBlockAsmAvx:
|
|
ADDL $0x05, AX
|
|
MOVL AX, BX
|
|
SUBL 24(SP), BX
|
|
MOVL 16(SP), BX
|
|
SUBL AX, BX
|
|
XORQ R8, R8
|
|
CMPQ BX, $0x08
|
|
JL matchlen_single_repeat_extend
|
|
|
|
matchlen_loopback_repeat_extend:
|
|
MOVQ (CX)(R8*1), DI
|
|
XORQ (CX)(R8*1), DI
|
|
TESTQ DI, DI
|
|
JZ matchlen_loop_repeat_extend
|
|
BSFQ DI, DI
|
|
SARQ $0x03, DI
|
|
LEAQ (R8)(DI*1), R8
|
|
JMP repeat_extend_forward_end_encodeBlockAsmAvx
|
|
|
|
matchlen_loop_repeat_extend:
|
|
LEAQ -8(BX), BX
|
|
LEAQ 8(R8), R8
|
|
CMPQ BX, $0x08
|
|
JGE matchlen_loopback_repeat_extend
|
|
|
|
matchlen_single_repeat_extend:
|
|
TESTQ BX, BX
|
|
JZ repeat_extend_forward_end_encodeBlockAsmAvx
|
|
|
|
matchlen_single_loopback_repeat_extend:
|
|
MOVB (CX)(R8*1), DI
|
|
CMPB (CX)(R8*1), DI
|
|
JNE repeat_extend_forward_end_encodeBlockAsmAvx
|
|
LEAQ 1(R8), R8
|
|
DECQ BX
|
|
JNZ matchlen_single_loopback_repeat_extend
|
|
|
|
repeat_extend_forward_end_encodeBlockAsmAvx:
|
|
ADDL R8, AX
|
|
MOVL AX, BX
|
|
SUBL SI, BX
|
|
MOVL 24(SP), SI
|
|
MOVQ dst_base+0(FP), DI
|
|
MOVL 20(SP), R8
|
|
TESTL R8, R8
|
|
JZ repeat_as_copy_encodeBlockAsmAvx
|
|
|
|
emit_repeat_again_match_repeat_:
|
|
MOVQ BX, R8
|
|
LEAQ -4(BX), BX
|
|
CMPL R8, $0x08
|
|
JLE repeat_two_match_repeat_
|
|
CMPL R8, $0x0c
|
|
JGE cant_repeat_two_offset_match_repeat_
|
|
CMPL SI, $0x00000800
|
|
JLT repeat_two_offset_match_repeat_
|
|
|
|
cant_repeat_two_offset_match_repeat_:
|
|
CMPL BX, $0x00000104
|
|
JLT repeat_three_match_repeat_
|
|
CMPL BX, $0x00010100
|
|
JLT repeat_four_match_repeat_
|
|
CMPL BX, $0x0100ffff
|
|
JLT repeat_five_match_repeat_
|
|
LEAQ -16842747(BX), BX
|
|
MOVW $0x001d, (DI)
|
|
MOVW $0xfffb, 2(DI)
|
|
MOVB $0xff, 4(DI)
|
|
ADDQ $0x05, DI
|
|
JMP emit_repeat_again_match_repeat_
|
|
|
|
repeat_five_match_repeat_:
|
|
LEAQ -65536(BX), BX
|
|
MOVQ BX, SI
|
|
MOVW $0x001d, (DI)
|
|
MOVW BX, 2(DI)
|
|
SARQ $0x10, SI
|
|
MOVB SI, 4(DI)
|
|
ADDQ $0x05, DI
|
|
JMP repeat_end_emit_encodeBlockAsmAvx
|
|
|
|
repeat_four_match_repeat_:
|
|
LEAQ -256(BX), BX
|
|
MOVW $0x0019, (DI)
|
|
MOVW BX, 2(DI)
|
|
ADDQ $0x04, DI
|
|
JMP repeat_end_emit_encodeBlockAsmAvx
|
|
|
|
repeat_three_match_repeat_:
|
|
LEAQ -4(BX), BX
|
|
MOVW $0x0015, (DI)
|
|
MOVB BL, 2(DI)
|
|
ADDQ $0x03, DI
|
|
JMP repeat_end_emit_encodeBlockAsmAvx
|
|
|
|
repeat_two_match_repeat_:
|
|
SHLL $0x02, BX
|
|
ORL $0x01, BX
|
|
MOVW BX, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP repeat_end_emit_encodeBlockAsmAvx
|
|
|
|
repeat_two_offset_match_repeat_:
|
|
XORQ R8, R8
|
|
LEAQ 1(R8)(BX*4), BX
|
|
MOVB SI, 1(DI)
|
|
SARL $0x08, SI
|
|
SHLL $0x05, SI
|
|
ORL SI, BX
|
|
MOVB BL, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP repeat_end_emit_encodeBlockAsmAvx
|
|
|
|
repeat_as_copy_encodeBlockAsmAvx:
|
|
CMPL SI, $0x00010000
|
|
JL two_byte_offset_repeat_as_copy_encodeBlockAsmAvx
|
|
CMPL BX, $0x40
|
|
JLE four_bytes_remain_repeat_as_copy_encodeBlockAsmAvx
|
|
MOVB $0xff, (DI)
|
|
MOVD SI, 1(DI)
|
|
LEAQ -64(BX), BX
|
|
ADDQ $0x05, DI
|
|
CMPL BX, $0x04
|
|
JL four_bytes_remain_repeat_as_copy_encodeBlockAsmAvx
|
|
|
|
emit_repeat_again_repeat_as_copy_encodeBlockAsmAvx_emit_copy:
|
|
MOVQ BX, R8
|
|
LEAQ -4(BX), BX
|
|
CMPL R8, $0x08
|
|
JLE repeat_two_repeat_as_copy_encodeBlockAsmAvx_emit_copy
|
|
CMPL R8, $0x0c
|
|
JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsmAvx_emit_copy
|
|
CMPL SI, $0x00000800
|
|
JLT repeat_two_offset_repeat_as_copy_encodeBlockAsmAvx_emit_copy
|
|
|
|
cant_repeat_two_offset_repeat_as_copy_encodeBlockAsmAvx_emit_copy:
|
|
CMPL BX, $0x00000104
|
|
JLT repeat_three_repeat_as_copy_encodeBlockAsmAvx_emit_copy
|
|
CMPL BX, $0x00010100
|
|
JLT repeat_four_repeat_as_copy_encodeBlockAsmAvx_emit_copy
|
|
CMPL BX, $0x0100ffff
|
|
JLT repeat_five_repeat_as_copy_encodeBlockAsmAvx_emit_copy
|
|
LEAQ -16842747(BX), BX
|
|
MOVW $0x001d, (DI)
|
|
MOVW $0xfffb, 2(DI)
|
|
MOVB $0xff, 4(DI)
|
|
ADDQ $0x05, DI
|
|
JMP emit_repeat_again_repeat_as_copy_encodeBlockAsmAvx_emit_copy
|
|
|
|
repeat_five_repeat_as_copy_encodeBlockAsmAvx_emit_copy:
|
|
LEAQ -65536(BX), BX
|
|
MOVQ BX, SI
|
|
MOVW $0x001d, (DI)
|
|
MOVW BX, 2(DI)
|
|
SARQ $0x10, SI
|
|
MOVB SI, 4(DI)
|
|
ADDQ $0x05, DI
|
|
JMP repeat_end_emit_encodeBlockAsmAvx
|
|
|
|
repeat_four_repeat_as_copy_encodeBlockAsmAvx_emit_copy:
|
|
LEAQ -256(BX), BX
|
|
MOVW $0x0019, (DI)
|
|
MOVW BX, 2(DI)
|
|
ADDQ $0x04, DI
|
|
JMP repeat_end_emit_encodeBlockAsmAvx
|
|
|
|
repeat_three_repeat_as_copy_encodeBlockAsmAvx_emit_copy:
|
|
LEAQ -4(BX), BX
|
|
MOVW $0x0015, (DI)
|
|
MOVB BL, 2(DI)
|
|
ADDQ $0x03, DI
|
|
JMP repeat_end_emit_encodeBlockAsmAvx
|
|
|
|
repeat_two_repeat_as_copy_encodeBlockAsmAvx_emit_copy:
|
|
SHLL $0x02, BX
|
|
ORL $0x01, BX
|
|
MOVW BX, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP repeat_end_emit_encodeBlockAsmAvx
|
|
|
|
repeat_two_offset_repeat_as_copy_encodeBlockAsmAvx_emit_copy:
|
|
XORQ R8, R8
|
|
LEAQ 1(R8)(BX*4), BX
|
|
MOVB SI, 1(DI)
|
|
SARL $0x08, SI
|
|
SHLL $0x05, SI
|
|
ORL SI, BX
|
|
MOVB BL, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP repeat_end_emit_encodeBlockAsmAvx
|
|
|
|
four_bytes_remain_repeat_as_copy_encodeBlockAsmAvx:
|
|
TESTL BX, BX
|
|
JZ repeat_end_emit_encodeBlockAsmAvx
|
|
MOVB $0x03, DL
|
|
LEAQ -4(DX)(BX*4), BX
|
|
MOVB BL, (DI)
|
|
MOVD SI, 1(DI)
|
|
ADDQ $0x05, DI
|
|
JMP repeat_end_emit_encodeBlockAsmAvx
|
|
|
|
two_byte_offset_repeat_as_copy_encodeBlockAsmAvx:
|
|
CMPL BX, $0x40
|
|
JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsmAvx
|
|
MOVB $0xee, (DI)
|
|
MOVW SI, 1(DI)
|
|
LEAQ -60(BX), BX
|
|
ADDQ $0x03, DI
|
|
|
|
emit_repeat_again_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short:
|
|
MOVQ BX, R8
|
|
LEAQ -4(BX), BX
|
|
CMPL R8, $0x08
|
|
JLE repeat_two_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short
|
|
CMPL R8, $0x0c
|
|
JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short
|
|
CMPL SI, $0x00000800
|
|
JLT repeat_two_offset_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short
|
|
|
|
cant_repeat_two_offset_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short:
|
|
CMPL BX, $0x00000104
|
|
JLT repeat_three_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short
|
|
CMPL BX, $0x00010100
|
|
JLT repeat_four_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short
|
|
CMPL BX, $0x0100ffff
|
|
JLT repeat_five_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short
|
|
LEAQ -16842747(BX), BX
|
|
MOVW $0x001d, (DI)
|
|
MOVW $0xfffb, 2(DI)
|
|
MOVB $0xff, 4(DI)
|
|
ADDQ $0x05, DI
|
|
JMP emit_repeat_again_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short
|
|
|
|
repeat_five_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short:
|
|
LEAQ -65536(BX), BX
|
|
MOVQ BX, SI
|
|
MOVW $0x001d, (DI)
|
|
MOVW BX, 2(DI)
|
|
SARQ $0x10, SI
|
|
MOVB SI, 4(DI)
|
|
ADDQ $0x05, DI
|
|
JMP repeat_end_emit_encodeBlockAsmAvx
|
|
|
|
repeat_four_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short:
|
|
LEAQ -256(BX), BX
|
|
MOVW $0x0019, (DI)
|
|
MOVW BX, 2(DI)
|
|
ADDQ $0x04, DI
|
|
JMP repeat_end_emit_encodeBlockAsmAvx
|
|
|
|
repeat_three_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short:
|
|
LEAQ -4(BX), BX
|
|
MOVW $0x0015, (DI)
|
|
MOVB BL, 2(DI)
|
|
ADDQ $0x03, DI
|
|
JMP repeat_end_emit_encodeBlockAsmAvx
|
|
|
|
repeat_two_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short:
|
|
SHLL $0x02, BX
|
|
ORL $0x01, BX
|
|
MOVW BX, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP repeat_end_emit_encodeBlockAsmAvx
|
|
|
|
repeat_two_offset_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short:
|
|
XORQ R8, R8
|
|
LEAQ 1(R8)(BX*4), BX
|
|
MOVB SI, 1(DI)
|
|
SARL $0x08, SI
|
|
SHLL $0x05, SI
|
|
ORL SI, BX
|
|
MOVB BL, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP repeat_end_emit_encodeBlockAsmAvx
|
|
|
|
two_byte_offset_short_repeat_as_copy_encodeBlockAsmAvx:
|
|
CMPL BX, $0x0c
|
|
JGE emit_copy_three_repeat_as_copy_encodeBlockAsmAvx
|
|
CMPL SI, $0x00000800
|
|
JGE emit_copy_three_repeat_as_copy_encodeBlockAsmAvx
|
|
MOVB $0x01, DL
|
|
LEAQ -16(DX)(BX*4), BX
|
|
MOVB SI, 1(DI)
|
|
SHRL $0x08, SI
|
|
SHLL $0x05, SI
|
|
ORL SI, BX
|
|
MOVB BL, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP repeat_end_emit_encodeBlockAsmAvx
|
|
|
|
emit_copy_three_repeat_as_copy_encodeBlockAsmAvx:
|
|
MOVB $0x02, DL
|
|
LEAQ -4(DX)(BX*4), BX
|
|
MOVB BL, (DI)
|
|
MOVW SI, 1(DI)
|
|
ADDQ $0x03, DI
|
|
|
|
repeat_end_emit_encodeBlockAsmAvx:
|
|
MOVQ DI, dst_base+0(FP)
|
|
MOVL 16(SP), BX
|
|
CMPL AX, BX
|
|
JGT emit_remainder_encodeBlockAsmAvx
|
|
JMP search_loop_encodeBlockAsmAvx
|
|
|
|
no_repeat_found_encodeBlockAsmAvx:
|
|
MOVQ $0x0000cf1bbcdcbf9b, R9
|
|
MOVQ SI, R8
|
|
SHRQ $0x10, R8
|
|
SHLQ $0x10, R8
|
|
IMULQ R9, R8
|
|
SHRQ $0x30, R8
|
|
CMPL (CX)(BX*1), SI
|
|
SHRQ $0x08, SI
|
|
JEQ candidate_match_encodeBlockAsmAvx
|
|
MOVL 32(SP)(R8*1), BX
|
|
CMPL (CX)(DI*1), SI
|
|
JEQ candidate2_match_encodeBlockAsmAvx
|
|
LEAQ 2(AX), DI
|
|
MOVL DI, 32(SP)(R8*1)
|
|
SHRQ $0x08, SI
|
|
CMPL (CX)(BX*1), SI
|
|
JEQ candidate3_match_encodeBlockAsmAvx
|
|
MOVL 28(SP), AX
|
|
JMP search_loop_encodeBlockAsmAvx
|
|
|
|
candidate3_match_encodeBlockAsmAvx:
|
|
ADDL $0x02, AX
|
|
JMP candidate_match_encodeBlockAsmAvx
|
|
|
|
candidate2_match_encodeBlockAsmAvx:
|
|
LEAQ -2(AX), BX
|
|
MOVL BX, 32(SP)(R8*1)
|
|
INCL AX
|
|
MOVL DI, BX
|
|
|
|
candidate_match_encodeBlockAsmAvx:
|
|
MOVL 20(SP), SI
|
|
TESTL BX, BX
|
|
JZ match_extend_back_end_encodeBlockAsmAvx
|
|
|
|
match_extend_back_loop_encodeBlockAsmAvx:
|
|
CMPL AX, SI
|
|
JG match_extend_back_end_encodeBlockAsmAvx
|
|
MOVB -1(CX)(BX*1), DL
|
|
MOVB -1(CX)(AX*1), DI
|
|
CMPB DL, DI
|
|
JNE match_extend_back_end_encodeBlockAsmAvx
|
|
LEAL -1(AX), AX
|
|
DECL BX
|
|
JZ match_extend_back_end_encodeBlockAsmAvx
|
|
JMP match_extend_back_loop_encodeBlockAsmAvx
|
|
|
|
match_extend_back_end_encodeBlockAsmAvx:
|
|
MOVL AX, SI
|
|
SUBL 20(SP), SI
|
|
LEAQ dst_base+0(FP)(SI*1), SI
|
|
CMPQ SI, (SP)
|
|
JL match_dst_size_check_encodeBlockAsmAvx
|
|
MOVQ $0x00000000, ret+48(FP)
|
|
RET
|
|
|
|
match_dst_size_check_encodeBlockAsmAvx:
|
|
MOVL BX, SI
|
|
MOVL 20(SP), DI
|
|
CMPL DI, SI
|
|
JEQ emit_literal_skip_match_emit_encodeBlockAsmAvx
|
|
MOVL SI, R8
|
|
MOVL SI, 20(SP)
|
|
LEAQ (CX)(DI*1), SI
|
|
SUBL DI, R8
|
|
MOVQ dst_base+0(FP), DI
|
|
MOVQ R8, R9
|
|
SUBL $0x01, R9
|
|
JC emit_literal_done_match_emit_encodeBlockAsmAvx
|
|
CMPL R9, $0x3c
|
|
JLT one_byte_match_emit_encodeBlockAsmAvx
|
|
CMPL R9, $0x00000100
|
|
JLT two_bytes_match_emit_encodeBlockAsmAvx
|
|
CMPL R9, $0x00010000
|
|
JLT three_bytes_match_emit_encodeBlockAsmAvx
|
|
CMPL R9, $0x01000000
|
|
JLT four_bytes_match_emit_encodeBlockAsmAvx
|
|
MOVB $0xfc, (DI)
|
|
MOVL R9, 1(DI)
|
|
ADDQ $0x05, DI
|
|
JMP memmove_match_emit_encodeBlockAsmAvx
|
|
|
|
four_bytes_match_emit_encodeBlockAsmAvx:
|
|
MOVQ R9, R10
|
|
SHRL $0x10, R10
|
|
MOVB $0xf8, (DI)
|
|
MOVW R9, 1(DI)
|
|
MOVB R10, 3(DI)
|
|
ADDQ $0x04, DI
|
|
JMP memmove_match_emit_encodeBlockAsmAvx
|
|
|
|
three_bytes_match_emit_encodeBlockAsmAvx:
|
|
MOVB $0xf4, (DI)
|
|
MOVW R9, 1(DI)
|
|
ADDQ $0x03, DI
|
|
JMP memmove_match_emit_encodeBlockAsmAvx
|
|
|
|
two_bytes_match_emit_encodeBlockAsmAvx:
|
|
MOVB $0xf0, (DI)
|
|
MOVB R9, 1(DI)
|
|
ADDQ $0x02, DI
|
|
JMP memmove_match_emit_encodeBlockAsmAvx
|
|
|
|
one_byte_match_emit_encodeBlockAsmAvx:
|
|
SHLB $0x02, R9
|
|
MOVB R9, (DI)
|
|
ADDQ $0x01, DI
|
|
|
|
memmove_match_emit_encodeBlockAsmAvx:
|
|
LEAQ (DI)(R8*1), R9
|
|
NOP
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_tail:
|
|
TESTQ R8, R8
|
|
JEQ emit_literal_done_match_emit_encodeBlockAsmAvx
|
|
CMPQ R8, $0x02
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_1or2
|
|
CMPQ R8, $0x04
|
|
JB emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_3
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_4
|
|
CMPQ R8, $0x08
|
|
JB emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_5through7
|
|
JE emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_8
|
|
CMPQ R8, $0x10
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_9through16
|
|
CMPQ R8, $0x20
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_17through32
|
|
CMPQ R8, $0x40
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_33through64
|
|
CMPQ R8, $0x80
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_65through128
|
|
CMPQ R8, $0x00000100
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_129through256
|
|
JMP emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_avxUnaligned
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_1or2:
|
|
MOVB (SI), R9
|
|
MOVB -1(SI)(R8*1), R10
|
|
MOVB R9, (DI)
|
|
MOVB R10, -1(DI)(R8*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsmAvx
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_4:
|
|
MOVL (SI), R9
|
|
MOVL R9, (DI)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsmAvx
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_3:
|
|
MOVW (SI), R9
|
|
MOVB 2(SI), R10
|
|
MOVW R9, (DI)
|
|
MOVB R10, 2(DI)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsmAvx
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_5through7:
|
|
MOVL (SI), R9
|
|
MOVL -4(SI)(R8*1), R10
|
|
MOVL R9, (DI)
|
|
MOVL R10, -4(DI)(R8*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsmAvx
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_8:
|
|
MOVQ (SI), R9
|
|
MOVQ R9, (DI)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsmAvx
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_9through16:
|
|
MOVQ (SI), R9
|
|
MOVQ -8(SI)(R8*1), R10
|
|
MOVQ R9, (DI)
|
|
MOVQ R10, -8(DI)(R8*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsmAvx
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_17through32:
|
|
MOVOU (SI), X0
|
|
MOVOU -16(SI)(R8*1), X1
|
|
MOVOU X0, (DI)
|
|
MOVOU X1, -16(DI)(R8*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsmAvx
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_33through64:
|
|
MOVOU (SI), X0
|
|
MOVOU 16(SI), X1
|
|
MOVOU -32(SI)(R8*1), X2
|
|
MOVOU -16(SI)(R8*1), X3
|
|
MOVOU X0, (DI)
|
|
MOVOU X1, 16(DI)
|
|
MOVOU X2, -32(DI)(R8*1)
|
|
MOVOU X3, -16(DI)(R8*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsmAvx
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_65through128:
|
|
MOVOU (SI), X0
|
|
MOVOU 16(SI), X1
|
|
MOVOU 32(SI), X2
|
|
MOVOU 48(SI), X3
|
|
MOVOU -64(SI)(R8*1), X12
|
|
MOVOU -48(SI)(R8*1), X13
|
|
MOVOU -32(SI)(R8*1), X14
|
|
MOVOU -16(SI)(R8*1), X15
|
|
MOVOU X0, (DI)
|
|
MOVOU X1, 16(DI)
|
|
MOVOU X2, 32(DI)
|
|
MOVOU X3, 48(DI)
|
|
MOVOU X12, -64(DI)(R8*1)
|
|
MOVOU X13, -48(DI)(R8*1)
|
|
MOVOU X14, -32(DI)(R8*1)
|
|
MOVOU X15, -16(DI)(R8*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsmAvx
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_129through256:
|
|
MOVOU (SI), X0
|
|
MOVOU 16(SI), X1
|
|
MOVOU 32(SI), X2
|
|
MOVOU 48(SI), X3
|
|
MOVOU 64(SI), X4
|
|
MOVOU 80(SI), X5
|
|
MOVOU 96(SI), X6
|
|
MOVOU 112(SI), X7
|
|
MOVOU -128(SI)(R8*1), X8
|
|
MOVOU -112(SI)(R8*1), X9
|
|
MOVOU -96(SI)(R8*1), X10
|
|
MOVOU -80(SI)(R8*1), X11
|
|
MOVOU -64(SI)(R8*1), X12
|
|
MOVOU -48(SI)(R8*1), X13
|
|
MOVOU -32(SI)(R8*1), X14
|
|
MOVOU -16(SI)(R8*1), X15
|
|
MOVOU X0, (DI)
|
|
MOVOU X1, 16(DI)
|
|
MOVOU X2, 32(DI)
|
|
MOVOU X3, 48(DI)
|
|
MOVOU X4, 64(DI)
|
|
MOVOU X5, 80(DI)
|
|
MOVOU X6, 96(DI)
|
|
MOVOU X7, 112(DI)
|
|
MOVOU X8, -128(DI)(R8*1)
|
|
MOVOU X9, -112(DI)(R8*1)
|
|
MOVOU X10, -96(DI)(R8*1)
|
|
MOVOU X11, -80(DI)(R8*1)
|
|
MOVOU X12, -64(DI)(R8*1)
|
|
MOVOU X13, -48(DI)(R8*1)
|
|
MOVOU X14, -32(DI)(R8*1)
|
|
MOVOU X15, -16(DI)(R8*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsmAvx
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_256through2048:
|
|
LEAQ -256(R8), R8
|
|
MOVOU (SI), X0
|
|
MOVOU 16(SI), X1
|
|
MOVOU 32(SI), X2
|
|
MOVOU 48(SI), X3
|
|
MOVOU 64(SI), X4
|
|
MOVOU 80(SI), X5
|
|
MOVOU 96(SI), X6
|
|
MOVOU 112(SI), X7
|
|
MOVOU 128(SI), X8
|
|
MOVOU 144(SI), X9
|
|
MOVOU 160(SI), X10
|
|
MOVOU 176(SI), X11
|
|
MOVOU 192(SI), X12
|
|
MOVOU 208(SI), X13
|
|
MOVOU 224(SI), X14
|
|
MOVOU 240(SI), X15
|
|
MOVOU X0, (DI)
|
|
MOVOU X1, 16(DI)
|
|
MOVOU X2, 32(DI)
|
|
MOVOU X3, 48(DI)
|
|
MOVOU X4, 64(DI)
|
|
MOVOU X5, 80(DI)
|
|
MOVOU X6, 96(DI)
|
|
MOVOU X7, 112(DI)
|
|
MOVOU X8, 128(DI)
|
|
MOVOU X9, 144(DI)
|
|
MOVOU X10, 160(DI)
|
|
MOVOU X11, 176(DI)
|
|
MOVOU X12, 192(DI)
|
|
MOVOU X13, 208(DI)
|
|
MOVOU X14, 224(DI)
|
|
MOVOU X15, 240(DI)
|
|
CMPQ R8, $0x00000100
|
|
LEAQ 256(SI), SI
|
|
LEAQ 256(DI), DI
|
|
JGE emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_256through2048
|
|
JMP emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_tail
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_avxUnaligned:
|
|
LEAQ (SI)(R8*1), R10
|
|
MOVQ DI, R12
|
|
MOVOU -128(R10), X5
|
|
MOVOU -112(R10), X6
|
|
MOVQ $0x00000080, R9
|
|
ANDQ $0xffffffe0, DI
|
|
ADDQ $0x20, DI
|
|
MOVOU -96(R10), X7
|
|
MOVOU -80(R10), X8
|
|
MOVQ DI, R11
|
|
SUBQ R12, R11
|
|
MOVOU -64(R10), X9
|
|
MOVOU -48(R10), X10
|
|
SUBQ R11, R8
|
|
MOVOU -32(R10), X11
|
|
MOVOU -16(R10), X12
|
|
VMOVDQU (SI), Y4
|
|
ADDQ R11, SI
|
|
SUBQ R9, R8
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_gobble_128_loop:
|
|
VMOVDQU (SI), Y0
|
|
VMOVDQU 32(SI), Y1
|
|
VMOVDQU 64(SI), Y2
|
|
VMOVDQU 96(SI), Y3
|
|
ADDQ R9, SI
|
|
VMOVDQA Y0, (DI)
|
|
VMOVDQA Y1, 32(DI)
|
|
VMOVDQA Y2, 64(DI)
|
|
VMOVDQA Y3, 96(DI)
|
|
ADDQ R9, DI
|
|
SUBQ R9, R8
|
|
JA emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_gobble_128_loop
|
|
ADDQ R9, R8
|
|
ADDQ DI, R8
|
|
VMOVDQU Y4, (R12)
|
|
VZEROUPPER
|
|
MOVOU X5, -128(R8)
|
|
MOVOU X6, -112(R8)
|
|
MOVOU X7, -96(R8)
|
|
MOVOU X8, -80(R8)
|
|
MOVOU X9, -64(R8)
|
|
MOVOU X10, -48(R8)
|
|
MOVOU X11, -32(R8)
|
|
MOVOU X12, -16(R8)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsmAvx
|
|
MOVQ R9, DI
|
|
|
|
emit_literal_done_match_emit_encodeBlockAsmAvx:
|
|
MOVQ DI, dst_base+0(FP)
|
|
|
|
emit_literal_skip_match_emit_encodeBlockAsmAvx:
|
|
NOP
|
|
|
|
match_nolit_loop_encodeBlockAsmAvx:
|
|
MOVL AX, SI
|
|
MOVL AX, SI
|
|
SUBL BX, SI
|
|
MOVL SI, 24(SP)
|
|
ADDL $0x04, AX
|
|
ADDL $0x04, BX
|
|
MOVL 16(SP), SI
|
|
SUBL AX, SI
|
|
XORQ R8, R8
|
|
CMPQ SI, $0x08
|
|
JL matchlen_single_match_nolit_encodeBlockAsmAvx
|
|
|
|
matchlen_loopback_match_nolit_encodeBlockAsmAvx:
|
|
MOVQ (CX)(R8*1), DI
|
|
XORQ (CX)(R8*1), DI
|
|
TESTQ DI, DI
|
|
JZ matchlen_loop_match_nolit_encodeBlockAsmAvx
|
|
BSFQ DI, DI
|
|
SARQ $0x03, DI
|
|
LEAQ (R8)(DI*1), R8
|
|
JMP match_nolit_end_encodeBlockAsmAvx
|
|
|
|
matchlen_loop_match_nolit_encodeBlockAsmAvx:
|
|
LEAQ -8(SI), SI
|
|
LEAQ 8(R8), R8
|
|
CMPQ SI, $0x08
|
|
JGE matchlen_loopback_match_nolit_encodeBlockAsmAvx
|
|
|
|
matchlen_single_match_nolit_encodeBlockAsmAvx:
|
|
TESTQ SI, SI
|
|
JZ match_nolit_end_encodeBlockAsmAvx
|
|
|
|
matchlen_single_loopback_match_nolit_encodeBlockAsmAvx:
|
|
MOVB (CX)(R8*1), DI
|
|
CMPB (CX)(R8*1), DI
|
|
JNE match_nolit_end_encodeBlockAsmAvx
|
|
LEAQ 1(R8), R8
|
|
DECQ SI
|
|
JNZ matchlen_single_loopback_match_nolit_encodeBlockAsmAvx
|
|
|
|
match_nolit_end_encodeBlockAsmAvx:
|
|
MOVL 24(SP), SI
|
|
ADDQ $0x04, R8
|
|
MOVQ dst_base+0(FP), DI
|
|
ADDL R8, AX
|
|
CMPL SI, $0x00010000
|
|
JL two_byte_offset_match_nolit_encodeBlockAsmAvx
|
|
CMPL R8, $0x40
|
|
JLE four_bytes_remain_match_nolit_encodeBlockAsmAvx
|
|
MOVB $0xff, (DI)
|
|
MOVD SI, 1(DI)
|
|
LEAQ -64(R8), R8
|
|
ADDQ $0x05, DI
|
|
CMPL R8, $0x04
|
|
JL four_bytes_remain_match_nolit_encodeBlockAsmAvx
|
|
|
|
emit_repeat_again_match_nolit_encodeBlockAsmAvx_emit_copy:
|
|
MOVQ R8, R9
|
|
LEAQ -4(R8), R8
|
|
CMPL R9, $0x08
|
|
JLE repeat_two_match_nolit_encodeBlockAsmAvx_emit_copy
|
|
CMPL R9, $0x0c
|
|
JGE cant_repeat_two_offset_match_nolit_encodeBlockAsmAvx_emit_copy
|
|
CMPL SI, $0x00000800
|
|
JLT repeat_two_offset_match_nolit_encodeBlockAsmAvx_emit_copy
|
|
|
|
cant_repeat_two_offset_match_nolit_encodeBlockAsmAvx_emit_copy:
|
|
CMPL R8, $0x00000104
|
|
JLT repeat_three_match_nolit_encodeBlockAsmAvx_emit_copy
|
|
CMPL R8, $0x00010100
|
|
JLT repeat_four_match_nolit_encodeBlockAsmAvx_emit_copy
|
|
CMPL R8, $0x0100ffff
|
|
JLT repeat_five_match_nolit_encodeBlockAsmAvx_emit_copy
|
|
LEAQ -16842747(R8), R8
|
|
MOVW $0x001d, (DI)
|
|
MOVW $0xfffb, 2(DI)
|
|
MOVB $0xff, 4(DI)
|
|
ADDQ $0x05, DI
|
|
JMP emit_repeat_again_match_nolit_encodeBlockAsmAvx_emit_copy
|
|
|
|
repeat_five_match_nolit_encodeBlockAsmAvx_emit_copy:
|
|
LEAQ -65536(R8), R8
|
|
MOVQ R8, SI
|
|
MOVW $0x001d, (DI)
|
|
MOVW R8, 2(DI)
|
|
SARQ $0x10, SI
|
|
MOVB SI, 4(DI)
|
|
ADDQ $0x05, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsmAvx
|
|
|
|
repeat_four_match_nolit_encodeBlockAsmAvx_emit_copy:
|
|
LEAQ -256(R8), R8
|
|
MOVW $0x0019, (DI)
|
|
MOVW R8, 2(DI)
|
|
ADDQ $0x04, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsmAvx
|
|
|
|
repeat_three_match_nolit_encodeBlockAsmAvx_emit_copy:
|
|
LEAQ -4(R8), R8
|
|
MOVW $0x0015, (DI)
|
|
MOVB R8, 2(DI)
|
|
ADDQ $0x03, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsmAvx
|
|
|
|
repeat_two_match_nolit_encodeBlockAsmAvx_emit_copy:
|
|
SHLL $0x02, R8
|
|
ORL $0x01, R8
|
|
MOVW R8, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsmAvx
|
|
|
|
repeat_two_offset_match_nolit_encodeBlockAsmAvx_emit_copy:
|
|
XORQ R9, R9
|
|
LEAQ 1(R9)(R8*4), R8
|
|
MOVB SI, 1(DI)
|
|
SARL $0x08, SI
|
|
SHLL $0x05, SI
|
|
ORL SI, R8
|
|
MOVB R8, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsmAvx
|
|
|
|
four_bytes_remain_match_nolit_encodeBlockAsmAvx:
|
|
TESTL R8, R8
|
|
JZ match_nolit_emitcopy_end_encodeBlockAsmAvx
|
|
MOVB $0x03, DL
|
|
LEAQ -4(DX)(R8*4), R8
|
|
MOVB R8, (DI)
|
|
MOVD SI, 1(DI)
|
|
ADDQ $0x05, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsmAvx
|
|
|
|
two_byte_offset_match_nolit_encodeBlockAsmAvx:
|
|
CMPL R8, $0x40
|
|
JLE two_byte_offset_short_match_nolit_encodeBlockAsmAvx
|
|
MOVB $0xee, (DI)
|
|
MOVW SI, 1(DI)
|
|
LEAQ -60(R8), R8
|
|
ADDQ $0x03, DI
|
|
|
|
emit_repeat_again_match_nolit_encodeBlockAsmAvx_emit_copy_short:
|
|
MOVQ R8, R9
|
|
LEAQ -4(R8), R8
|
|
CMPL R9, $0x08
|
|
JLE repeat_two_match_nolit_encodeBlockAsmAvx_emit_copy_short
|
|
CMPL R9, $0x0c
|
|
JGE cant_repeat_two_offset_match_nolit_encodeBlockAsmAvx_emit_copy_short
|
|
CMPL SI, $0x00000800
|
|
JLT repeat_two_offset_match_nolit_encodeBlockAsmAvx_emit_copy_short
|
|
|
|
cant_repeat_two_offset_match_nolit_encodeBlockAsmAvx_emit_copy_short:
|
|
CMPL R8, $0x00000104
|
|
JLT repeat_three_match_nolit_encodeBlockAsmAvx_emit_copy_short
|
|
CMPL R8, $0x00010100
|
|
JLT repeat_four_match_nolit_encodeBlockAsmAvx_emit_copy_short
|
|
CMPL R8, $0x0100ffff
|
|
JLT repeat_five_match_nolit_encodeBlockAsmAvx_emit_copy_short
|
|
LEAQ -16842747(R8), R8
|
|
MOVW $0x001d, (DI)
|
|
MOVW $0xfffb, 2(DI)
|
|
MOVB $0xff, 4(DI)
|
|
ADDQ $0x05, DI
|
|
JMP emit_repeat_again_match_nolit_encodeBlockAsmAvx_emit_copy_short
|
|
|
|
repeat_five_match_nolit_encodeBlockAsmAvx_emit_copy_short:
|
|
LEAQ -65536(R8), R8
|
|
MOVQ R8, SI
|
|
MOVW $0x001d, (DI)
|
|
MOVW R8, 2(DI)
|
|
SARQ $0x10, SI
|
|
MOVB SI, 4(DI)
|
|
ADDQ $0x05, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsmAvx
|
|
|
|
repeat_four_match_nolit_encodeBlockAsmAvx_emit_copy_short:
|
|
LEAQ -256(R8), R8
|
|
MOVW $0x0019, (DI)
|
|
MOVW R8, 2(DI)
|
|
ADDQ $0x04, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsmAvx
|
|
|
|
repeat_three_match_nolit_encodeBlockAsmAvx_emit_copy_short:
|
|
LEAQ -4(R8), R8
|
|
MOVW $0x0015, (DI)
|
|
MOVB R8, 2(DI)
|
|
ADDQ $0x03, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsmAvx
|
|
|
|
repeat_two_match_nolit_encodeBlockAsmAvx_emit_copy_short:
|
|
SHLL $0x02, R8
|
|
ORL $0x01, R8
|
|
MOVW R8, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsmAvx
|
|
|
|
repeat_two_offset_match_nolit_encodeBlockAsmAvx_emit_copy_short:
|
|
XORQ R9, R9
|
|
LEAQ 1(R9)(R8*4), R8
|
|
MOVB SI, 1(DI)
|
|
SARL $0x08, SI
|
|
SHLL $0x05, SI
|
|
ORL SI, R8
|
|
MOVB R8, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsmAvx
|
|
|
|
two_byte_offset_short_match_nolit_encodeBlockAsmAvx:
|
|
CMPL R8, $0x0c
|
|
JGE emit_copy_three_match_nolit_encodeBlockAsmAvx
|
|
CMPL SI, $0x00000800
|
|
JGE emit_copy_three_match_nolit_encodeBlockAsmAvx
|
|
MOVB $0x01, DL
|
|
LEAQ -16(DX)(R8*4), R8
|
|
MOVB SI, 1(DI)
|
|
SHRL $0x08, SI
|
|
SHLL $0x05, SI
|
|
ORL SI, R8
|
|
MOVB R8, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsmAvx
|
|
|
|
emit_copy_three_match_nolit_encodeBlockAsmAvx:
|
|
MOVB $0x02, DL
|
|
LEAQ -4(DX)(R8*4), R8
|
|
MOVB R8, (DI)
|
|
MOVW SI, 1(DI)
|
|
ADDQ $0x03, DI
|
|
|
|
match_nolit_emitcopy_end_encodeBlockAsmAvx:
|
|
MOVQ DI, dst_base+0(FP)
|
|
MOVL AX, 20(SP)
|
|
CMPL AX, 16(SP)
|
|
JGE emit_remainder_encodeBlockAsmAvx
|
|
CMPQ DI, (SP)
|
|
JL match_nolit_dst_ok_encodeBlockAsmAvx
|
|
MOVQ $0x00000000, ret+48(FP)
|
|
RET
|
|
|
|
match_nolit_dst_ok_encodeBlockAsmAvx:
|
|
MOVQ -2(CX)(AX*1), SI
|
|
MOVQ $0x0000cf1bbcdcbf9b, DI
|
|
MOVQ SI, R8
|
|
SHRQ $0x10, SI
|
|
MOVQ SI, R9
|
|
SHLQ $0x10, R8
|
|
IMULQ DI, R8
|
|
SHRQ $0x30, R8
|
|
SHLQ $0x10, R9
|
|
IMULQ DI, R9
|
|
SHRQ $0x30, R9
|
|
MOVL 32(SP)(R8*1), DI
|
|
MOVL 32(SP)(R9*1), DI
|
|
LEAQ -2(AX), DI
|
|
MOVL DI, 32(SP)(R8*1)
|
|
MOVL AX, 32(SP)(R9*1)
|
|
CMPL (CX)(R9*1), SI
|
|
JEQ match_nolit_loop_encodeBlockAsmAvx
|
|
INCL AX
|
|
JMP search_loop_encodeBlockAsmAvx
|
|
|
|
emit_remainder_encodeBlockAsmAvx:
|
|
MOVQ src_len+32(FP), AX
|
|
SUBL 20(SP), AX
|
|
MOVQ dst_base+0(FP), DX
|
|
LEAQ (DX)(AX*1), DX
|
|
CMPQ DX, (SP)
|
|
JL emit_remainder_ok_encodeBlockAsmAvx
|
|
MOVQ $0x00000000, ret+48(FP)
|
|
RET
|
|
|
|
emit_remainder_ok_encodeBlockAsmAvx:
|
|
MOVQ src_len+32(FP), AX
|
|
MOVL 20(SP), DX
|
|
CMPL DX, AX
|
|
JEQ emit_literal_skip_emit_remainder_encodeBlockAsmAvx
|
|
MOVL AX, BX
|
|
MOVL AX, 20(SP)
|
|
LEAQ (CX)(DX*1), AX
|
|
SUBL DX, BX
|
|
MOVQ dst_base+0(FP), CX
|
|
MOVQ BX, DX
|
|
SUBL $0x01, DX
|
|
JC emit_literal_done_emit_remainder_encodeBlockAsmAvx
|
|
CMPL DX, $0x3c
|
|
JLT one_byte_emit_remainder_encodeBlockAsmAvx
|
|
CMPL DX, $0x00000100
|
|
JLT two_bytes_emit_remainder_encodeBlockAsmAvx
|
|
CMPL DX, $0x00010000
|
|
JLT three_bytes_emit_remainder_encodeBlockAsmAvx
|
|
CMPL DX, $0x01000000
|
|
JLT four_bytes_emit_remainder_encodeBlockAsmAvx
|
|
MOVB $0xfc, (CX)
|
|
MOVL DX, 1(CX)
|
|
ADDQ $0x05, CX
|
|
JMP memmove_emit_remainder_encodeBlockAsmAvx
|
|
|
|
four_bytes_emit_remainder_encodeBlockAsmAvx:
|
|
MOVQ DX, SI
|
|
SHRL $0x10, SI
|
|
MOVB $0xf8, (CX)
|
|
MOVW DX, 1(CX)
|
|
MOVB SI, 3(CX)
|
|
ADDQ $0x04, CX
|
|
JMP memmove_emit_remainder_encodeBlockAsmAvx
|
|
|
|
three_bytes_emit_remainder_encodeBlockAsmAvx:
|
|
MOVB $0xf4, (CX)
|
|
MOVW DX, 1(CX)
|
|
ADDQ $0x03, CX
|
|
JMP memmove_emit_remainder_encodeBlockAsmAvx
|
|
|
|
two_bytes_emit_remainder_encodeBlockAsmAvx:
|
|
MOVB $0xf0, (CX)
|
|
MOVB DL, 1(CX)
|
|
ADDQ $0x02, CX
|
|
JMP memmove_emit_remainder_encodeBlockAsmAvx
|
|
|
|
one_byte_emit_remainder_encodeBlockAsmAvx:
|
|
SHLB $0x02, DL
|
|
MOVB DL, (CX)
|
|
ADDQ $0x01, CX
|
|
|
|
memmove_emit_remainder_encodeBlockAsmAvx:
|
|
LEAQ (CX)(BX*1), DX
|
|
NOP
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_tail:
|
|
TESTQ BX, BX
|
|
JEQ emit_literal_done_emit_remainder_encodeBlockAsmAvx
|
|
CMPQ BX, $0x02
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_1or2
|
|
CMPQ BX, $0x04
|
|
JB emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_3
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_4
|
|
CMPQ BX, $0x08
|
|
JB emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_5through7
|
|
JE emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_8
|
|
CMPQ BX, $0x10
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_9through16
|
|
CMPQ BX, $0x20
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_17through32
|
|
CMPQ BX, $0x40
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_33through64
|
|
CMPQ BX, $0x80
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_65through128
|
|
CMPQ BX, $0x00000100
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_129through256
|
|
JMP emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_avxUnaligned
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_1or2:
|
|
MOVB (AX), DL
|
|
MOVB -1(AX)(BX*1), SI
|
|
MOVB DL, (CX)
|
|
MOVB SI, -1(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsmAvx
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_4:
|
|
MOVL (AX), DX
|
|
MOVL DX, (CX)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsmAvx
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_3:
|
|
MOVW (AX), DX
|
|
MOVB 2(AX), SI
|
|
MOVW DX, (CX)
|
|
MOVB SI, 2(CX)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsmAvx
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_5through7:
|
|
MOVL (AX), DX
|
|
MOVL -4(AX)(BX*1), SI
|
|
MOVL DX, (CX)
|
|
MOVL SI, -4(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsmAvx
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_8:
|
|
MOVQ (AX), DX
|
|
MOVQ DX, (CX)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsmAvx
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_9through16:
|
|
MOVQ (AX), DX
|
|
MOVQ -8(AX)(BX*1), SI
|
|
MOVQ DX, (CX)
|
|
MOVQ SI, -8(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsmAvx
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_17through32:
|
|
MOVOU (AX), X0
|
|
MOVOU -16(AX)(BX*1), X1
|
|
MOVOU X0, (CX)
|
|
MOVOU X1, -16(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsmAvx
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_33through64:
|
|
MOVOU (AX), X0
|
|
MOVOU 16(AX), X1
|
|
MOVOU -32(AX)(BX*1), X2
|
|
MOVOU -16(AX)(BX*1), X3
|
|
MOVOU X0, (CX)
|
|
MOVOU X1, 16(CX)
|
|
MOVOU X2, -32(CX)(BX*1)
|
|
MOVOU X3, -16(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsmAvx
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_65through128:
|
|
MOVOU (AX), X0
|
|
MOVOU 16(AX), X1
|
|
MOVOU 32(AX), X2
|
|
MOVOU 48(AX), X3
|
|
MOVOU -64(AX)(BX*1), X12
|
|
MOVOU -48(AX)(BX*1), X13
|
|
MOVOU -32(AX)(BX*1), X14
|
|
MOVOU -16(AX)(BX*1), X15
|
|
MOVOU X0, (CX)
|
|
MOVOU X1, 16(CX)
|
|
MOVOU X2, 32(CX)
|
|
MOVOU X3, 48(CX)
|
|
MOVOU X12, -64(CX)(BX*1)
|
|
MOVOU X13, -48(CX)(BX*1)
|
|
MOVOU X14, -32(CX)(BX*1)
|
|
MOVOU X15, -16(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsmAvx
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_129through256:
|
|
MOVOU (AX), X0
|
|
MOVOU 16(AX), X1
|
|
MOVOU 32(AX), X2
|
|
MOVOU 48(AX), X3
|
|
MOVOU 64(AX), X4
|
|
MOVOU 80(AX), X5
|
|
MOVOU 96(AX), X6
|
|
MOVOU 112(AX), X7
|
|
MOVOU -128(AX)(BX*1), X8
|
|
MOVOU -112(AX)(BX*1), X9
|
|
MOVOU -96(AX)(BX*1), X10
|
|
MOVOU -80(AX)(BX*1), X11
|
|
MOVOU -64(AX)(BX*1), X12
|
|
MOVOU -48(AX)(BX*1), X13
|
|
MOVOU -32(AX)(BX*1), X14
|
|
MOVOU -16(AX)(BX*1), X15
|
|
MOVOU X0, (CX)
|
|
MOVOU X1, 16(CX)
|
|
MOVOU X2, 32(CX)
|
|
MOVOU X3, 48(CX)
|
|
MOVOU X4, 64(CX)
|
|
MOVOU X5, 80(CX)
|
|
MOVOU X6, 96(CX)
|
|
MOVOU X7, 112(CX)
|
|
MOVOU X8, -128(CX)(BX*1)
|
|
MOVOU X9, -112(CX)(BX*1)
|
|
MOVOU X10, -96(CX)(BX*1)
|
|
MOVOU X11, -80(CX)(BX*1)
|
|
MOVOU X12, -64(CX)(BX*1)
|
|
MOVOU X13, -48(CX)(BX*1)
|
|
MOVOU X14, -32(CX)(BX*1)
|
|
MOVOU X15, -16(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsmAvx
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_256through2048:
|
|
LEAQ -256(BX), BX
|
|
MOVOU (AX), X0
|
|
MOVOU 16(AX), X1
|
|
MOVOU 32(AX), X2
|
|
MOVOU 48(AX), X3
|
|
MOVOU 64(AX), X4
|
|
MOVOU 80(AX), X5
|
|
MOVOU 96(AX), X6
|
|
MOVOU 112(AX), X7
|
|
MOVOU 128(AX), X8
|
|
MOVOU 144(AX), X9
|
|
MOVOU 160(AX), X10
|
|
MOVOU 176(AX), X11
|
|
MOVOU 192(AX), X12
|
|
MOVOU 208(AX), X13
|
|
MOVOU 224(AX), X14
|
|
MOVOU 240(AX), X15
|
|
MOVOU X0, (CX)
|
|
MOVOU X1, 16(CX)
|
|
MOVOU X2, 32(CX)
|
|
MOVOU X3, 48(CX)
|
|
MOVOU X4, 64(CX)
|
|
MOVOU X5, 80(CX)
|
|
MOVOU X6, 96(CX)
|
|
MOVOU X7, 112(CX)
|
|
MOVOU X8, 128(CX)
|
|
MOVOU X9, 144(CX)
|
|
MOVOU X10, 160(CX)
|
|
MOVOU X11, 176(CX)
|
|
MOVOU X12, 192(CX)
|
|
MOVOU X13, 208(CX)
|
|
MOVOU X14, 224(CX)
|
|
MOVOU X15, 240(CX)
|
|
CMPQ BX, $0x00000100
|
|
LEAQ 256(AX), AX
|
|
LEAQ 256(CX), CX
|
|
JGE emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_256through2048
|
|
JMP emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_tail
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_avxUnaligned:
|
|
LEAQ (AX)(BX*1), SI
|
|
MOVQ CX, R8
|
|
MOVOU -128(SI), X5
|
|
MOVOU -112(SI), X6
|
|
MOVQ $0x00000080, DX
|
|
ANDQ $0xffffffe0, CX
|
|
ADDQ $0x20, CX
|
|
MOVOU -96(SI), X7
|
|
MOVOU -80(SI), X8
|
|
MOVQ CX, DI
|
|
SUBQ R8, DI
|
|
MOVOU -64(SI), X9
|
|
MOVOU -48(SI), X10
|
|
SUBQ DI, BX
|
|
MOVOU -32(SI), X11
|
|
MOVOU -16(SI), X12
|
|
VMOVDQU (AX), Y4
|
|
ADDQ DI, AX
|
|
SUBQ DX, BX
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_gobble_128_loop:
|
|
VMOVDQU (AX), Y0
|
|
VMOVDQU 32(AX), Y1
|
|
VMOVDQU 64(AX), Y2
|
|
VMOVDQU 96(AX), Y3
|
|
ADDQ DX, AX
|
|
VMOVDQA Y0, (CX)
|
|
VMOVDQA Y1, 32(CX)
|
|
VMOVDQA Y2, 64(CX)
|
|
VMOVDQA Y3, 96(CX)
|
|
ADDQ DX, CX
|
|
SUBQ DX, BX
|
|
JA emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_gobble_128_loop
|
|
ADDQ DX, BX
|
|
ADDQ CX, BX
|
|
VMOVDQU Y4, (R8)
|
|
VZEROUPPER
|
|
MOVOU X5, -128(BX)
|
|
MOVOU X6, -112(BX)
|
|
MOVOU X7, -96(BX)
|
|
MOVOU X8, -80(BX)
|
|
MOVOU X9, -64(BX)
|
|
MOVOU X10, -48(BX)
|
|
MOVOU X11, -32(BX)
|
|
MOVOU X12, -16(BX)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsmAvx
|
|
MOVQ DX, CX
|
|
|
|
emit_literal_done_emit_remainder_encodeBlockAsmAvx:
|
|
MOVQ CX, dst_base+0(FP)
|
|
|
|
emit_literal_skip_emit_remainder_encodeBlockAsmAvx:
|
|
MOVQ 8(SP), AX
|
|
SUBQ dst_base+0(FP), AX
|
|
MOVQ AX, ret+48(FP)
|
|
RET
|
|
|
|
// func encodeBlockAsm14BAvx(dst []byte, src []byte) int
|
|
// Requires: AVX, SSE2
|
|
TEXT ·encodeBlockAsm14BAvx(SB), $16416-56
|
|
MOVQ $0x00000080, AX
|
|
LEAQ 32(SP), CX
|
|
PXOR X0, X0
|
|
|
|
zero_loop_encodeBlockAsm14BAvx:
|
|
MOVOU X0, (CX)
|
|
MOVOU X0, 16(CX)
|
|
MOVOU X0, 32(CX)
|
|
MOVOU X0, 48(CX)
|
|
MOVOU X0, 64(CX)
|
|
MOVOU X0, 80(CX)
|
|
MOVOU X0, 96(CX)
|
|
MOVOU X0, 112(CX)
|
|
ADDQ $0x80, CX
|
|
DECQ AX
|
|
JNZ zero_loop_encodeBlockAsm14BAvx
|
|
MOVL AX, 20(SP)
|
|
MOVQ src_len+32(FP), AX
|
|
LEAQ -5(AX), CX
|
|
LEAQ -8(AX), BX
|
|
SHRQ $0x05, AX
|
|
SUBL AX, CX
|
|
MOVL BX, 16(SP)
|
|
MOVQ dst_base+0(FP), AX
|
|
MOVQ AX, 8(SP)
|
|
LEAQ (AX)(CX*1), CX
|
|
MOVQ CX, (SP)
|
|
MOVL $0x00000001, AX
|
|
MOVL AX, 24(SP)
|
|
MOVQ src_base+24(FP), CX
|
|
|
|
search_loop_encodeBlockAsm14BAvx:
|
|
MOVQ (CX)(AX*1), SI
|
|
MOVL AX, BX
|
|
SUBL 20(SP), BX
|
|
SHRL $0x05, BX
|
|
LEAQ 4(AX)(BX*1), BX
|
|
MOVL 16(SP), DI
|
|
CMPL BX, DI
|
|
JGT emit_remainder_encodeBlockAsm14BAvx
|
|
MOVL BX, 28(SP)
|
|
MOVQ $0x0000cf1bbcdcbf9b, BX
|
|
MOVQ SI, R8
|
|
MOVQ SI, R9
|
|
SHRQ $0x08, R9
|
|
SHLQ $0x10, R8
|
|
IMULQ BX, R8
|
|
SHRQ $0x32, R8
|
|
SHLQ $0x10, R9
|
|
IMULQ BX, R9
|
|
SHRQ $0x32, R9
|
|
MOVL 32(SP)(R8*1), BX
|
|
MOVL 32(SP)(R9*1), DI
|
|
MOVL AX, 32(SP)(R8*1)
|
|
LEAL 1(AX), R8
|
|
MOVL R8, 32(SP)(R9*1)
|
|
MOVL AX, R8
|
|
SUBL 24(SP), R8
|
|
MOVL 1(CX)(R8*1), R10
|
|
MOVQ SI, R9
|
|
SHLQ $0x08, R9
|
|
CMPL R9, R10
|
|
JNE no_repeat_found_encodeBlockAsm14BAvx
|
|
LEAQ 1(AX), SI
|
|
MOVL 20(SP), BX
|
|
TESTL R8, R8
|
|
JZ repeat_extend_back_end_encodeBlockAsm14BAvx
|
|
|
|
repeat_extend_back_loop_encodeBlockAsm14BAvx:
|
|
CMPL SI, BX
|
|
JG repeat_extend_back_end_encodeBlockAsm14BAvx
|
|
MOVB -1(CX)(R8*1), DL
|
|
MOVB -1(CX)(SI*1), DI
|
|
CMPB DL, DI
|
|
JNE repeat_extend_back_end_encodeBlockAsm14BAvx
|
|
LEAQ -1(SI), SI
|
|
DECL R8
|
|
JZ repeat_extend_back_end_encodeBlockAsm14BAvx
|
|
JMP repeat_extend_back_loop_encodeBlockAsm14BAvx
|
|
|
|
repeat_extend_back_end_encodeBlockAsm14BAvx:
|
|
MOVL 20(SP), BX
|
|
CMPL BX, SI
|
|
JEQ emit_literal_skip_repeat_emit_encodeBlockAsm14BAvx
|
|
MOVL SI, DI
|
|
MOVL SI, 20(SP)
|
|
LEAQ (CX)(BX*1), R8
|
|
SUBL BX, DI
|
|
MOVQ dst_base+0(FP), BX
|
|
MOVQ DI, R9
|
|
SUBL $0x01, R9
|
|
JC emit_literal_done_repeat_emit_encodeBlockAsm14BAvx
|
|
CMPL R9, $0x3c
|
|
JLT one_byte_repeat_emit_encodeBlockAsm14BAvx
|
|
CMPL R9, $0x00000100
|
|
JLT two_bytes_repeat_emit_encodeBlockAsm14BAvx
|
|
CMPL R9, $0x00010000
|
|
JLT three_bytes_repeat_emit_encodeBlockAsm14BAvx
|
|
CMPL R9, $0x01000000
|
|
JLT four_bytes_repeat_emit_encodeBlockAsm14BAvx
|
|
MOVB $0xfc, (BX)
|
|
MOVL R9, 1(BX)
|
|
ADDQ $0x05, BX
|
|
JMP memmove_repeat_emit_encodeBlockAsm14BAvx
|
|
|
|
four_bytes_repeat_emit_encodeBlockAsm14BAvx:
|
|
MOVQ R9, R10
|
|
SHRL $0x10, R10
|
|
MOVB $0xf8, (BX)
|
|
MOVW R9, 1(BX)
|
|
MOVB R10, 3(BX)
|
|
ADDQ $0x04, BX
|
|
JMP memmove_repeat_emit_encodeBlockAsm14BAvx
|
|
|
|
three_bytes_repeat_emit_encodeBlockAsm14BAvx:
|
|
MOVB $0xf4, (BX)
|
|
MOVW R9, 1(BX)
|
|
ADDQ $0x03, BX
|
|
JMP memmove_repeat_emit_encodeBlockAsm14BAvx
|
|
|
|
two_bytes_repeat_emit_encodeBlockAsm14BAvx:
|
|
MOVB $0xf0, (BX)
|
|
MOVB R9, 1(BX)
|
|
ADDQ $0x02, BX
|
|
JMP memmove_repeat_emit_encodeBlockAsm14BAvx
|
|
|
|
one_byte_repeat_emit_encodeBlockAsm14BAvx:
|
|
SHLB $0x02, R9
|
|
MOVB R9, (BX)
|
|
ADDQ $0x01, BX
|
|
|
|
memmove_repeat_emit_encodeBlockAsm14BAvx:
|
|
LEAQ (BX)(DI*1), R9
|
|
NOP
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_tail:
|
|
TESTQ DI, DI
|
|
JEQ emit_literal_done_repeat_emit_encodeBlockAsm14BAvx
|
|
CMPQ DI, $0x02
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_1or2
|
|
CMPQ DI, $0x04
|
|
JB emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_3
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_4
|
|
CMPQ DI, $0x08
|
|
JB emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_5through7
|
|
JE emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_8
|
|
CMPQ DI, $0x10
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_9through16
|
|
CMPQ DI, $0x20
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_17through32
|
|
CMPQ DI, $0x40
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_33through64
|
|
CMPQ DI, $0x80
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_65through128
|
|
CMPQ DI, $0x00000100
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_129through256
|
|
JMP emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_avxUnaligned
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_1or2:
|
|
MOVB (R8), R9
|
|
MOVB -1(R8)(DI*1), R10
|
|
MOVB R9, (BX)
|
|
MOVB R10, -1(BX)(DI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm14BAvx
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_4:
|
|
MOVL (R8), R9
|
|
MOVL R9, (BX)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm14BAvx
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_3:
|
|
MOVW (R8), R9
|
|
MOVB 2(R8), R10
|
|
MOVW R9, (BX)
|
|
MOVB R10, 2(BX)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm14BAvx
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_5through7:
|
|
MOVL (R8), R9
|
|
MOVL -4(R8)(DI*1), R10
|
|
MOVL R9, (BX)
|
|
MOVL R10, -4(BX)(DI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm14BAvx
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_8:
|
|
MOVQ (R8), R9
|
|
MOVQ R9, (BX)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm14BAvx
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_9through16:
|
|
MOVQ (R8), R9
|
|
MOVQ -8(R8)(DI*1), R10
|
|
MOVQ R9, (BX)
|
|
MOVQ R10, -8(BX)(DI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm14BAvx
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_17through32:
|
|
MOVOU (R8), X0
|
|
MOVOU -16(R8)(DI*1), X1
|
|
MOVOU X0, (BX)
|
|
MOVOU X1, -16(BX)(DI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm14BAvx
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_33through64:
|
|
MOVOU (R8), X0
|
|
MOVOU 16(R8), X1
|
|
MOVOU -32(R8)(DI*1), X2
|
|
MOVOU -16(R8)(DI*1), X3
|
|
MOVOU X0, (BX)
|
|
MOVOU X1, 16(BX)
|
|
MOVOU X2, -32(BX)(DI*1)
|
|
MOVOU X3, -16(BX)(DI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm14BAvx
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_65through128:
|
|
MOVOU (R8), X0
|
|
MOVOU 16(R8), X1
|
|
MOVOU 32(R8), X2
|
|
MOVOU 48(R8), X3
|
|
MOVOU -64(R8)(DI*1), X12
|
|
MOVOU -48(R8)(DI*1), X13
|
|
MOVOU -32(R8)(DI*1), X14
|
|
MOVOU -16(R8)(DI*1), X15
|
|
MOVOU X0, (BX)
|
|
MOVOU X1, 16(BX)
|
|
MOVOU X2, 32(BX)
|
|
MOVOU X3, 48(BX)
|
|
MOVOU X12, -64(BX)(DI*1)
|
|
MOVOU X13, -48(BX)(DI*1)
|
|
MOVOU X14, -32(BX)(DI*1)
|
|
MOVOU X15, -16(BX)(DI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm14BAvx
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_129through256:
|
|
MOVOU (R8), X0
|
|
MOVOU 16(R8), X1
|
|
MOVOU 32(R8), X2
|
|
MOVOU 48(R8), X3
|
|
MOVOU 64(R8), X4
|
|
MOVOU 80(R8), X5
|
|
MOVOU 96(R8), X6
|
|
MOVOU 112(R8), X7
|
|
MOVOU -128(R8)(DI*1), X8
|
|
MOVOU -112(R8)(DI*1), X9
|
|
MOVOU -96(R8)(DI*1), X10
|
|
MOVOU -80(R8)(DI*1), X11
|
|
MOVOU -64(R8)(DI*1), X12
|
|
MOVOU -48(R8)(DI*1), X13
|
|
MOVOU -32(R8)(DI*1), X14
|
|
MOVOU -16(R8)(DI*1), X15
|
|
MOVOU X0, (BX)
|
|
MOVOU X1, 16(BX)
|
|
MOVOU X2, 32(BX)
|
|
MOVOU X3, 48(BX)
|
|
MOVOU X4, 64(BX)
|
|
MOVOU X5, 80(BX)
|
|
MOVOU X6, 96(BX)
|
|
MOVOU X7, 112(BX)
|
|
MOVOU X8, -128(BX)(DI*1)
|
|
MOVOU X9, -112(BX)(DI*1)
|
|
MOVOU X10, -96(BX)(DI*1)
|
|
MOVOU X11, -80(BX)(DI*1)
|
|
MOVOU X12, -64(BX)(DI*1)
|
|
MOVOU X13, -48(BX)(DI*1)
|
|
MOVOU X14, -32(BX)(DI*1)
|
|
MOVOU X15, -16(BX)(DI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm14BAvx
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_256through2048:
|
|
LEAQ -256(DI), DI
|
|
MOVOU (R8), X0
|
|
MOVOU 16(R8), X1
|
|
MOVOU 32(R8), X2
|
|
MOVOU 48(R8), X3
|
|
MOVOU 64(R8), X4
|
|
MOVOU 80(R8), X5
|
|
MOVOU 96(R8), X6
|
|
MOVOU 112(R8), X7
|
|
MOVOU 128(R8), X8
|
|
MOVOU 144(R8), X9
|
|
MOVOU 160(R8), X10
|
|
MOVOU 176(R8), X11
|
|
MOVOU 192(R8), X12
|
|
MOVOU 208(R8), X13
|
|
MOVOU 224(R8), X14
|
|
MOVOU 240(R8), X15
|
|
MOVOU X0, (BX)
|
|
MOVOU X1, 16(BX)
|
|
MOVOU X2, 32(BX)
|
|
MOVOU X3, 48(BX)
|
|
MOVOU X4, 64(BX)
|
|
MOVOU X5, 80(BX)
|
|
MOVOU X6, 96(BX)
|
|
MOVOU X7, 112(BX)
|
|
MOVOU X8, 128(BX)
|
|
MOVOU X9, 144(BX)
|
|
MOVOU X10, 160(BX)
|
|
MOVOU X11, 176(BX)
|
|
MOVOU X12, 192(BX)
|
|
MOVOU X13, 208(BX)
|
|
MOVOU X14, 224(BX)
|
|
MOVOU X15, 240(BX)
|
|
CMPQ DI, $0x00000100
|
|
LEAQ 256(R8), R8
|
|
LEAQ 256(BX), BX
|
|
JGE emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_256through2048
|
|
JMP emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_tail
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_avxUnaligned:
|
|
LEAQ (R8)(DI*1), R10
|
|
MOVQ BX, R12
|
|
MOVOU -128(R10), X5
|
|
MOVOU -112(R10), X6
|
|
MOVQ $0x00000080, R9
|
|
ANDQ $0xffffffe0, BX
|
|
ADDQ $0x20, BX
|
|
MOVOU -96(R10), X7
|
|
MOVOU -80(R10), X8
|
|
MOVQ BX, R11
|
|
SUBQ R12, R11
|
|
MOVOU -64(R10), X9
|
|
MOVOU -48(R10), X10
|
|
SUBQ R11, DI
|
|
MOVOU -32(R10), X11
|
|
MOVOU -16(R10), X12
|
|
VMOVDQU (R8), Y4
|
|
ADDQ R11, R8
|
|
SUBQ R9, DI
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_gobble_128_loop:
|
|
VMOVDQU (R8), Y0
|
|
VMOVDQU 32(R8), Y1
|
|
VMOVDQU 64(R8), Y2
|
|
VMOVDQU 96(R8), Y3
|
|
ADDQ R9, R8
|
|
VMOVDQA Y0, (BX)
|
|
VMOVDQA Y1, 32(BX)
|
|
VMOVDQA Y2, 64(BX)
|
|
VMOVDQA Y3, 96(BX)
|
|
ADDQ R9, BX
|
|
SUBQ R9, DI
|
|
JA emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_gobble_128_loop
|
|
ADDQ R9, DI
|
|
ADDQ BX, DI
|
|
VMOVDQU Y4, (R12)
|
|
VZEROUPPER
|
|
MOVOU X5, -128(DI)
|
|
MOVOU X6, -112(DI)
|
|
MOVOU X7, -96(DI)
|
|
MOVOU X8, -80(DI)
|
|
MOVOU X9, -64(DI)
|
|
MOVOU X10, -48(DI)
|
|
MOVOU X11, -32(DI)
|
|
MOVOU X12, -16(DI)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm14BAvx
|
|
MOVQ R9, BX
|
|
|
|
emit_literal_done_repeat_emit_encodeBlockAsm14BAvx:
|
|
MOVQ BX, dst_base+0(FP)
|
|
|
|
emit_literal_skip_repeat_emit_encodeBlockAsm14BAvx:
|
|
ADDL $0x05, AX
|
|
MOVL AX, BX
|
|
SUBL 24(SP), BX
|
|
MOVL 16(SP), BX
|
|
SUBL AX, BX
|
|
XORQ R8, R8
|
|
CMPQ BX, $0x08
|
|
JL matchlen_single_repeat_extend
|
|
|
|
matchlen_loopback_repeat_extend:
|
|
MOVQ (CX)(R8*1), DI
|
|
XORQ (CX)(R8*1), DI
|
|
TESTQ DI, DI
|
|
JZ matchlen_loop_repeat_extend
|
|
BSFQ DI, DI
|
|
SARQ $0x03, DI
|
|
LEAQ (R8)(DI*1), R8
|
|
JMP repeat_extend_forward_end_encodeBlockAsm14BAvx
|
|
|
|
matchlen_loop_repeat_extend:
|
|
LEAQ -8(BX), BX
|
|
LEAQ 8(R8), R8
|
|
CMPQ BX, $0x08
|
|
JGE matchlen_loopback_repeat_extend
|
|
|
|
matchlen_single_repeat_extend:
|
|
TESTQ BX, BX
|
|
JZ repeat_extend_forward_end_encodeBlockAsm14BAvx
|
|
|
|
matchlen_single_loopback_repeat_extend:
|
|
MOVB (CX)(R8*1), DI
|
|
CMPB (CX)(R8*1), DI
|
|
JNE repeat_extend_forward_end_encodeBlockAsm14BAvx
|
|
LEAQ 1(R8), R8
|
|
DECQ BX
|
|
JNZ matchlen_single_loopback_repeat_extend
|
|
|
|
repeat_extend_forward_end_encodeBlockAsm14BAvx:
|
|
ADDL R8, AX
|
|
MOVL AX, BX
|
|
SUBL SI, BX
|
|
MOVL 24(SP), SI
|
|
MOVQ dst_base+0(FP), DI
|
|
MOVL 20(SP), R8
|
|
TESTL R8, R8
|
|
JZ repeat_as_copy_encodeBlockAsm14BAvx
|
|
|
|
emit_repeat_again_match_repeat_:
|
|
MOVQ BX, R8
|
|
LEAQ -4(BX), BX
|
|
CMPL R8, $0x08
|
|
JLE repeat_two_match_repeat_
|
|
CMPL R8, $0x0c
|
|
JGE cant_repeat_two_offset_match_repeat_
|
|
CMPL SI, $0x00000800
|
|
JLT repeat_two_offset_match_repeat_
|
|
|
|
cant_repeat_two_offset_match_repeat_:
|
|
CMPL BX, $0x00000104
|
|
JLT repeat_three_match_repeat_
|
|
CMPL BX, $0x00010100
|
|
JLT repeat_four_match_repeat_
|
|
CMPL BX, $0x0100ffff
|
|
JLT repeat_five_match_repeat_
|
|
LEAQ -16842747(BX), BX
|
|
MOVW $0x001d, (DI)
|
|
MOVW $0xfffb, 2(DI)
|
|
MOVB $0xff, 4(DI)
|
|
ADDQ $0x05, DI
|
|
JMP emit_repeat_again_match_repeat_
|
|
|
|
repeat_five_match_repeat_:
|
|
LEAQ -65536(BX), BX
|
|
MOVQ BX, SI
|
|
MOVW $0x001d, (DI)
|
|
MOVW BX, 2(DI)
|
|
SARQ $0x10, SI
|
|
MOVB SI, 4(DI)
|
|
ADDQ $0x05, DI
|
|
JMP repeat_end_emit_encodeBlockAsm14BAvx
|
|
|
|
repeat_four_match_repeat_:
|
|
LEAQ -256(BX), BX
|
|
MOVW $0x0019, (DI)
|
|
MOVW BX, 2(DI)
|
|
ADDQ $0x04, DI
|
|
JMP repeat_end_emit_encodeBlockAsm14BAvx
|
|
|
|
repeat_three_match_repeat_:
|
|
LEAQ -4(BX), BX
|
|
MOVW $0x0015, (DI)
|
|
MOVB BL, 2(DI)
|
|
ADDQ $0x03, DI
|
|
JMP repeat_end_emit_encodeBlockAsm14BAvx
|
|
|
|
repeat_two_match_repeat_:
|
|
SHLL $0x02, BX
|
|
ORL $0x01, BX
|
|
MOVW BX, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP repeat_end_emit_encodeBlockAsm14BAvx
|
|
|
|
repeat_two_offset_match_repeat_:
|
|
XORQ R8, R8
|
|
LEAQ 1(R8)(BX*4), BX
|
|
MOVB SI, 1(DI)
|
|
SARL $0x08, SI
|
|
SHLL $0x05, SI
|
|
ORL SI, BX
|
|
MOVB BL, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP repeat_end_emit_encodeBlockAsm14BAvx
|
|
|
|
repeat_as_copy_encodeBlockAsm14BAvx:
|
|
CMPL SI, $0x00010000
|
|
JL two_byte_offset_repeat_as_copy_encodeBlockAsm14BAvx
|
|
CMPL BX, $0x40
|
|
JLE four_bytes_remain_repeat_as_copy_encodeBlockAsm14BAvx
|
|
MOVB $0xff, (DI)
|
|
MOVD SI, 1(DI)
|
|
LEAQ -64(BX), BX
|
|
ADDQ $0x05, DI
|
|
CMPL BX, $0x04
|
|
JL four_bytes_remain_repeat_as_copy_encodeBlockAsm14BAvx
|
|
|
|
emit_repeat_again_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy:
|
|
MOVQ BX, R8
|
|
LEAQ -4(BX), BX
|
|
CMPL R8, $0x08
|
|
JLE repeat_two_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy
|
|
CMPL R8, $0x0c
|
|
JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy
|
|
CMPL SI, $0x00000800
|
|
JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy
|
|
|
|
cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy:
|
|
CMPL BX, $0x00000104
|
|
JLT repeat_three_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy
|
|
CMPL BX, $0x00010100
|
|
JLT repeat_four_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy
|
|
CMPL BX, $0x0100ffff
|
|
JLT repeat_five_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy
|
|
LEAQ -16842747(BX), BX
|
|
MOVW $0x001d, (DI)
|
|
MOVW $0xfffb, 2(DI)
|
|
MOVB $0xff, 4(DI)
|
|
ADDQ $0x05, DI
|
|
JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy
|
|
|
|
repeat_five_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy:
|
|
LEAQ -65536(BX), BX
|
|
MOVQ BX, SI
|
|
MOVW $0x001d, (DI)
|
|
MOVW BX, 2(DI)
|
|
SARQ $0x10, SI
|
|
MOVB SI, 4(DI)
|
|
ADDQ $0x05, DI
|
|
JMP repeat_end_emit_encodeBlockAsm14BAvx
|
|
|
|
repeat_four_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy:
|
|
LEAQ -256(BX), BX
|
|
MOVW $0x0019, (DI)
|
|
MOVW BX, 2(DI)
|
|
ADDQ $0x04, DI
|
|
JMP repeat_end_emit_encodeBlockAsm14BAvx
|
|
|
|
repeat_three_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy:
|
|
LEAQ -4(BX), BX
|
|
MOVW $0x0015, (DI)
|
|
MOVB BL, 2(DI)
|
|
ADDQ $0x03, DI
|
|
JMP repeat_end_emit_encodeBlockAsm14BAvx
|
|
|
|
repeat_two_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy:
|
|
SHLL $0x02, BX
|
|
ORL $0x01, BX
|
|
MOVW BX, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP repeat_end_emit_encodeBlockAsm14BAvx
|
|
|
|
repeat_two_offset_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy:
|
|
XORQ R8, R8
|
|
LEAQ 1(R8)(BX*4), BX
|
|
MOVB SI, 1(DI)
|
|
SARL $0x08, SI
|
|
SHLL $0x05, SI
|
|
ORL SI, BX
|
|
MOVB BL, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP repeat_end_emit_encodeBlockAsm14BAvx
|
|
|
|
four_bytes_remain_repeat_as_copy_encodeBlockAsm14BAvx:
|
|
TESTL BX, BX
|
|
JZ repeat_end_emit_encodeBlockAsm14BAvx
|
|
MOVB $0x03, DL
|
|
LEAQ -4(DX)(BX*4), BX
|
|
MOVB BL, (DI)
|
|
MOVD SI, 1(DI)
|
|
ADDQ $0x05, DI
|
|
JMP repeat_end_emit_encodeBlockAsm14BAvx
|
|
|
|
two_byte_offset_repeat_as_copy_encodeBlockAsm14BAvx:
|
|
CMPL BX, $0x40
|
|
JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm14BAvx
|
|
MOVB $0xee, (DI)
|
|
MOVW SI, 1(DI)
|
|
LEAQ -60(BX), BX
|
|
ADDQ $0x03, DI
|
|
|
|
emit_repeat_again_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short:
|
|
MOVQ BX, R8
|
|
LEAQ -4(BX), BX
|
|
CMPL R8, $0x08
|
|
JLE repeat_two_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short
|
|
CMPL R8, $0x0c
|
|
JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short
|
|
CMPL SI, $0x00000800
|
|
JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short
|
|
|
|
cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short:
|
|
CMPL BX, $0x00000104
|
|
JLT repeat_three_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short
|
|
CMPL BX, $0x00010100
|
|
JLT repeat_four_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short
|
|
CMPL BX, $0x0100ffff
|
|
JLT repeat_five_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short
|
|
LEAQ -16842747(BX), BX
|
|
MOVW $0x001d, (DI)
|
|
MOVW $0xfffb, 2(DI)
|
|
MOVB $0xff, 4(DI)
|
|
ADDQ $0x05, DI
|
|
JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short
|
|
|
|
repeat_five_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short:
|
|
LEAQ -65536(BX), BX
|
|
MOVQ BX, SI
|
|
MOVW $0x001d, (DI)
|
|
MOVW BX, 2(DI)
|
|
SARQ $0x10, SI
|
|
MOVB SI, 4(DI)
|
|
ADDQ $0x05, DI
|
|
JMP repeat_end_emit_encodeBlockAsm14BAvx
|
|
|
|
repeat_four_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short:
|
|
LEAQ -256(BX), BX
|
|
MOVW $0x0019, (DI)
|
|
MOVW BX, 2(DI)
|
|
ADDQ $0x04, DI
|
|
JMP repeat_end_emit_encodeBlockAsm14BAvx
|
|
|
|
repeat_three_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short:
|
|
LEAQ -4(BX), BX
|
|
MOVW $0x0015, (DI)
|
|
MOVB BL, 2(DI)
|
|
ADDQ $0x03, DI
|
|
JMP repeat_end_emit_encodeBlockAsm14BAvx
|
|
|
|
repeat_two_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short:
|
|
SHLL $0x02, BX
|
|
ORL $0x01, BX
|
|
MOVW BX, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP repeat_end_emit_encodeBlockAsm14BAvx
|
|
|
|
repeat_two_offset_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short:
|
|
XORQ R8, R8
|
|
LEAQ 1(R8)(BX*4), BX
|
|
MOVB SI, 1(DI)
|
|
SARL $0x08, SI
|
|
SHLL $0x05, SI
|
|
ORL SI, BX
|
|
MOVB BL, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP repeat_end_emit_encodeBlockAsm14BAvx
|
|
|
|
two_byte_offset_short_repeat_as_copy_encodeBlockAsm14BAvx:
|
|
CMPL BX, $0x0c
|
|
JGE emit_copy_three_repeat_as_copy_encodeBlockAsm14BAvx
|
|
CMPL SI, $0x00000800
|
|
JGE emit_copy_three_repeat_as_copy_encodeBlockAsm14BAvx
|
|
MOVB $0x01, DL
|
|
LEAQ -16(DX)(BX*4), BX
|
|
MOVB SI, 1(DI)
|
|
SHRL $0x08, SI
|
|
SHLL $0x05, SI
|
|
ORL SI, BX
|
|
MOVB BL, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP repeat_end_emit_encodeBlockAsm14BAvx
|
|
|
|
emit_copy_three_repeat_as_copy_encodeBlockAsm14BAvx:
|
|
MOVB $0x02, DL
|
|
LEAQ -4(DX)(BX*4), BX
|
|
MOVB BL, (DI)
|
|
MOVW SI, 1(DI)
|
|
ADDQ $0x03, DI
|
|
|
|
repeat_end_emit_encodeBlockAsm14BAvx:
|
|
MOVQ DI, dst_base+0(FP)
|
|
MOVL 16(SP), BX
|
|
CMPL AX, BX
|
|
JGT emit_remainder_encodeBlockAsm14BAvx
|
|
JMP search_loop_encodeBlockAsm14BAvx
|
|
|
|
no_repeat_found_encodeBlockAsm14BAvx:
|
|
MOVQ $0x0000cf1bbcdcbf9b, R9
|
|
MOVQ SI, R8
|
|
SHRQ $0x10, R8
|
|
SHLQ $0x10, R8
|
|
IMULQ R9, R8
|
|
SHRQ $0x32, R8
|
|
CMPL (CX)(BX*1), SI
|
|
SHRQ $0x08, SI
|
|
JEQ candidate_match_encodeBlockAsm14BAvx
|
|
MOVL 32(SP)(R8*1), BX
|
|
CMPL (CX)(DI*1), SI
|
|
JEQ candidate2_match_encodeBlockAsm14BAvx
|
|
LEAQ 2(AX), DI
|
|
MOVL DI, 32(SP)(R8*1)
|
|
SHRQ $0x08, SI
|
|
CMPL (CX)(BX*1), SI
|
|
JEQ candidate3_match_encodeBlockAsm14BAvx
|
|
MOVL 28(SP), AX
|
|
JMP search_loop_encodeBlockAsm14BAvx
|
|
|
|
candidate3_match_encodeBlockAsm14BAvx:
|
|
ADDL $0x02, AX
|
|
JMP candidate_match_encodeBlockAsm14BAvx
|
|
|
|
candidate2_match_encodeBlockAsm14BAvx:
|
|
LEAQ -2(AX), BX
|
|
MOVL BX, 32(SP)(R8*1)
|
|
INCL AX
|
|
MOVL DI, BX
|
|
|
|
candidate_match_encodeBlockAsm14BAvx:
|
|
MOVL 20(SP), SI
|
|
TESTL BX, BX
|
|
JZ match_extend_back_end_encodeBlockAsm14BAvx
|
|
|
|
match_extend_back_loop_encodeBlockAsm14BAvx:
|
|
CMPL AX, SI
|
|
JG match_extend_back_end_encodeBlockAsm14BAvx
|
|
MOVB -1(CX)(BX*1), DL
|
|
MOVB -1(CX)(AX*1), DI
|
|
CMPB DL, DI
|
|
JNE match_extend_back_end_encodeBlockAsm14BAvx
|
|
LEAL -1(AX), AX
|
|
DECL BX
|
|
JZ match_extend_back_end_encodeBlockAsm14BAvx
|
|
JMP match_extend_back_loop_encodeBlockAsm14BAvx
|
|
|
|
match_extend_back_end_encodeBlockAsm14BAvx:
|
|
MOVL AX, SI
|
|
SUBL 20(SP), SI
|
|
LEAQ dst_base+0(FP)(SI*1), SI
|
|
CMPQ SI, (SP)
|
|
JL match_dst_size_check_encodeBlockAsm14BAvx
|
|
MOVQ $0x00000000, ret+48(FP)
|
|
RET
|
|
|
|
match_dst_size_check_encodeBlockAsm14BAvx:
|
|
MOVL BX, SI
|
|
MOVL 20(SP), DI
|
|
CMPL DI, SI
|
|
JEQ emit_literal_skip_match_emit_encodeBlockAsm14BAvx
|
|
MOVL SI, R8
|
|
MOVL SI, 20(SP)
|
|
LEAQ (CX)(DI*1), SI
|
|
SUBL DI, R8
|
|
MOVQ dst_base+0(FP), DI
|
|
MOVQ R8, R9
|
|
SUBL $0x01, R9
|
|
JC emit_literal_done_match_emit_encodeBlockAsm14BAvx
|
|
CMPL R9, $0x3c
|
|
JLT one_byte_match_emit_encodeBlockAsm14BAvx
|
|
CMPL R9, $0x00000100
|
|
JLT two_bytes_match_emit_encodeBlockAsm14BAvx
|
|
CMPL R9, $0x00010000
|
|
JLT three_bytes_match_emit_encodeBlockAsm14BAvx
|
|
CMPL R9, $0x01000000
|
|
JLT four_bytes_match_emit_encodeBlockAsm14BAvx
|
|
MOVB $0xfc, (DI)
|
|
MOVL R9, 1(DI)
|
|
ADDQ $0x05, DI
|
|
JMP memmove_match_emit_encodeBlockAsm14BAvx
|
|
|
|
four_bytes_match_emit_encodeBlockAsm14BAvx:
|
|
MOVQ R9, R10
|
|
SHRL $0x10, R10
|
|
MOVB $0xf8, (DI)
|
|
MOVW R9, 1(DI)
|
|
MOVB R10, 3(DI)
|
|
ADDQ $0x04, DI
|
|
JMP memmove_match_emit_encodeBlockAsm14BAvx
|
|
|
|
three_bytes_match_emit_encodeBlockAsm14BAvx:
|
|
MOVB $0xf4, (DI)
|
|
MOVW R9, 1(DI)
|
|
ADDQ $0x03, DI
|
|
JMP memmove_match_emit_encodeBlockAsm14BAvx
|
|
|
|
two_bytes_match_emit_encodeBlockAsm14BAvx:
|
|
MOVB $0xf0, (DI)
|
|
MOVB R9, 1(DI)
|
|
ADDQ $0x02, DI
|
|
JMP memmove_match_emit_encodeBlockAsm14BAvx
|
|
|
|
one_byte_match_emit_encodeBlockAsm14BAvx:
|
|
SHLB $0x02, R9
|
|
MOVB R9, (DI)
|
|
ADDQ $0x01, DI
|
|
|
|
memmove_match_emit_encodeBlockAsm14BAvx:
|
|
LEAQ (DI)(R8*1), R9
|
|
NOP
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_tail:
|
|
TESTQ R8, R8
|
|
JEQ emit_literal_done_match_emit_encodeBlockAsm14BAvx
|
|
CMPQ R8, $0x02
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_1or2
|
|
CMPQ R8, $0x04
|
|
JB emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_3
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_4
|
|
CMPQ R8, $0x08
|
|
JB emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_5through7
|
|
JE emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_8
|
|
CMPQ R8, $0x10
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_9through16
|
|
CMPQ R8, $0x20
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_17through32
|
|
CMPQ R8, $0x40
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_33through64
|
|
CMPQ R8, $0x80
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_65through128
|
|
CMPQ R8, $0x00000100
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_129through256
|
|
JMP emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_avxUnaligned
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_1or2:
|
|
MOVB (SI), R9
|
|
MOVB -1(SI)(R8*1), R10
|
|
MOVB R9, (DI)
|
|
MOVB R10, -1(DI)(R8*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm14BAvx
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_4:
|
|
MOVL (SI), R9
|
|
MOVL R9, (DI)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm14BAvx
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_3:
|
|
MOVW (SI), R9
|
|
MOVB 2(SI), R10
|
|
MOVW R9, (DI)
|
|
MOVB R10, 2(DI)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm14BAvx
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_5through7:
|
|
MOVL (SI), R9
|
|
MOVL -4(SI)(R8*1), R10
|
|
MOVL R9, (DI)
|
|
MOVL R10, -4(DI)(R8*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm14BAvx
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_8:
|
|
MOVQ (SI), R9
|
|
MOVQ R9, (DI)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm14BAvx
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_9through16:
|
|
MOVQ (SI), R9
|
|
MOVQ -8(SI)(R8*1), R10
|
|
MOVQ R9, (DI)
|
|
MOVQ R10, -8(DI)(R8*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm14BAvx
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_17through32:
|
|
MOVOU (SI), X0
|
|
MOVOU -16(SI)(R8*1), X1
|
|
MOVOU X0, (DI)
|
|
MOVOU X1, -16(DI)(R8*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm14BAvx
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_33through64:
|
|
MOVOU (SI), X0
|
|
MOVOU 16(SI), X1
|
|
MOVOU -32(SI)(R8*1), X2
|
|
MOVOU -16(SI)(R8*1), X3
|
|
MOVOU X0, (DI)
|
|
MOVOU X1, 16(DI)
|
|
MOVOU X2, -32(DI)(R8*1)
|
|
MOVOU X3, -16(DI)(R8*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm14BAvx
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_65through128:
|
|
MOVOU (SI), X0
|
|
MOVOU 16(SI), X1
|
|
MOVOU 32(SI), X2
|
|
MOVOU 48(SI), X3
|
|
MOVOU -64(SI)(R8*1), X12
|
|
MOVOU -48(SI)(R8*1), X13
|
|
MOVOU -32(SI)(R8*1), X14
|
|
MOVOU -16(SI)(R8*1), X15
|
|
MOVOU X0, (DI)
|
|
MOVOU X1, 16(DI)
|
|
MOVOU X2, 32(DI)
|
|
MOVOU X3, 48(DI)
|
|
MOVOU X12, -64(DI)(R8*1)
|
|
MOVOU X13, -48(DI)(R8*1)
|
|
MOVOU X14, -32(DI)(R8*1)
|
|
MOVOU X15, -16(DI)(R8*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm14BAvx
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_129through256:
|
|
MOVOU (SI), X0
|
|
MOVOU 16(SI), X1
|
|
MOVOU 32(SI), X2
|
|
MOVOU 48(SI), X3
|
|
MOVOU 64(SI), X4
|
|
MOVOU 80(SI), X5
|
|
MOVOU 96(SI), X6
|
|
MOVOU 112(SI), X7
|
|
MOVOU -128(SI)(R8*1), X8
|
|
MOVOU -112(SI)(R8*1), X9
|
|
MOVOU -96(SI)(R8*1), X10
|
|
MOVOU -80(SI)(R8*1), X11
|
|
MOVOU -64(SI)(R8*1), X12
|
|
MOVOU -48(SI)(R8*1), X13
|
|
MOVOU -32(SI)(R8*1), X14
|
|
MOVOU -16(SI)(R8*1), X15
|
|
MOVOU X0, (DI)
|
|
MOVOU X1, 16(DI)
|
|
MOVOU X2, 32(DI)
|
|
MOVOU X3, 48(DI)
|
|
MOVOU X4, 64(DI)
|
|
MOVOU X5, 80(DI)
|
|
MOVOU X6, 96(DI)
|
|
MOVOU X7, 112(DI)
|
|
MOVOU X8, -128(DI)(R8*1)
|
|
MOVOU X9, -112(DI)(R8*1)
|
|
MOVOU X10, -96(DI)(R8*1)
|
|
MOVOU X11, -80(DI)(R8*1)
|
|
MOVOU X12, -64(DI)(R8*1)
|
|
MOVOU X13, -48(DI)(R8*1)
|
|
MOVOU X14, -32(DI)(R8*1)
|
|
MOVOU X15, -16(DI)(R8*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm14BAvx
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_256through2048:
|
|
LEAQ -256(R8), R8
|
|
MOVOU (SI), X0
|
|
MOVOU 16(SI), X1
|
|
MOVOU 32(SI), X2
|
|
MOVOU 48(SI), X3
|
|
MOVOU 64(SI), X4
|
|
MOVOU 80(SI), X5
|
|
MOVOU 96(SI), X6
|
|
MOVOU 112(SI), X7
|
|
MOVOU 128(SI), X8
|
|
MOVOU 144(SI), X9
|
|
MOVOU 160(SI), X10
|
|
MOVOU 176(SI), X11
|
|
MOVOU 192(SI), X12
|
|
MOVOU 208(SI), X13
|
|
MOVOU 224(SI), X14
|
|
MOVOU 240(SI), X15
|
|
MOVOU X0, (DI)
|
|
MOVOU X1, 16(DI)
|
|
MOVOU X2, 32(DI)
|
|
MOVOU X3, 48(DI)
|
|
MOVOU X4, 64(DI)
|
|
MOVOU X5, 80(DI)
|
|
MOVOU X6, 96(DI)
|
|
MOVOU X7, 112(DI)
|
|
MOVOU X8, 128(DI)
|
|
MOVOU X9, 144(DI)
|
|
MOVOU X10, 160(DI)
|
|
MOVOU X11, 176(DI)
|
|
MOVOU X12, 192(DI)
|
|
MOVOU X13, 208(DI)
|
|
MOVOU X14, 224(DI)
|
|
MOVOU X15, 240(DI)
|
|
CMPQ R8, $0x00000100
|
|
LEAQ 256(SI), SI
|
|
LEAQ 256(DI), DI
|
|
JGE emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_256through2048
|
|
JMP emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_tail
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_avxUnaligned:
|
|
LEAQ (SI)(R8*1), R10
|
|
MOVQ DI, R12
|
|
MOVOU -128(R10), X5
|
|
MOVOU -112(R10), X6
|
|
MOVQ $0x00000080, R9
|
|
ANDQ $0xffffffe0, DI
|
|
ADDQ $0x20, DI
|
|
MOVOU -96(R10), X7
|
|
MOVOU -80(R10), X8
|
|
MOVQ DI, R11
|
|
SUBQ R12, R11
|
|
MOVOU -64(R10), X9
|
|
MOVOU -48(R10), X10
|
|
SUBQ R11, R8
|
|
MOVOU -32(R10), X11
|
|
MOVOU -16(R10), X12
|
|
VMOVDQU (SI), Y4
|
|
ADDQ R11, SI
|
|
SUBQ R9, R8
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_gobble_128_loop:
|
|
VMOVDQU (SI), Y0
|
|
VMOVDQU 32(SI), Y1
|
|
VMOVDQU 64(SI), Y2
|
|
VMOVDQU 96(SI), Y3
|
|
ADDQ R9, SI
|
|
VMOVDQA Y0, (DI)
|
|
VMOVDQA Y1, 32(DI)
|
|
VMOVDQA Y2, 64(DI)
|
|
VMOVDQA Y3, 96(DI)
|
|
ADDQ R9, DI
|
|
SUBQ R9, R8
|
|
JA emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_gobble_128_loop
|
|
ADDQ R9, R8
|
|
ADDQ DI, R8
|
|
VMOVDQU Y4, (R12)
|
|
VZEROUPPER
|
|
MOVOU X5, -128(R8)
|
|
MOVOU X6, -112(R8)
|
|
MOVOU X7, -96(R8)
|
|
MOVOU X8, -80(R8)
|
|
MOVOU X9, -64(R8)
|
|
MOVOU X10, -48(R8)
|
|
MOVOU X11, -32(R8)
|
|
MOVOU X12, -16(R8)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm14BAvx
|
|
MOVQ R9, DI
|
|
|
|
emit_literal_done_match_emit_encodeBlockAsm14BAvx:
|
|
MOVQ DI, dst_base+0(FP)
|
|
|
|
emit_literal_skip_match_emit_encodeBlockAsm14BAvx:
|
|
NOP
|
|
|
|
match_nolit_loop_encodeBlockAsm14BAvx:
|
|
MOVL AX, SI
|
|
MOVL AX, SI
|
|
SUBL BX, SI
|
|
MOVL SI, 24(SP)
|
|
ADDL $0x04, AX
|
|
ADDL $0x04, BX
|
|
MOVL 16(SP), SI
|
|
SUBL AX, SI
|
|
XORQ R8, R8
|
|
CMPQ SI, $0x08
|
|
JL matchlen_single_match_nolit_encodeBlockAsm14BAvx
|
|
|
|
matchlen_loopback_match_nolit_encodeBlockAsm14BAvx:
|
|
MOVQ (CX)(R8*1), DI
|
|
XORQ (CX)(R8*1), DI
|
|
TESTQ DI, DI
|
|
JZ matchlen_loop_match_nolit_encodeBlockAsm14BAvx
|
|
BSFQ DI, DI
|
|
SARQ $0x03, DI
|
|
LEAQ (R8)(DI*1), R8
|
|
JMP match_nolit_end_encodeBlockAsm14BAvx
|
|
|
|
matchlen_loop_match_nolit_encodeBlockAsm14BAvx:
|
|
LEAQ -8(SI), SI
|
|
LEAQ 8(R8), R8
|
|
CMPQ SI, $0x08
|
|
JGE matchlen_loopback_match_nolit_encodeBlockAsm14BAvx
|
|
|
|
matchlen_single_match_nolit_encodeBlockAsm14BAvx:
|
|
TESTQ SI, SI
|
|
JZ match_nolit_end_encodeBlockAsm14BAvx
|
|
|
|
matchlen_single_loopback_match_nolit_encodeBlockAsm14BAvx:
|
|
MOVB (CX)(R8*1), DI
|
|
CMPB (CX)(R8*1), DI
|
|
JNE match_nolit_end_encodeBlockAsm14BAvx
|
|
LEAQ 1(R8), R8
|
|
DECQ SI
|
|
JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm14BAvx
|
|
|
|
match_nolit_end_encodeBlockAsm14BAvx:
|
|
MOVL 24(SP), SI
|
|
ADDQ $0x04, R8
|
|
MOVQ dst_base+0(FP), DI
|
|
ADDL R8, AX
|
|
CMPL SI, $0x00010000
|
|
JL two_byte_offset_match_nolit_encodeBlockAsm14BAvx
|
|
CMPL R8, $0x40
|
|
JLE four_bytes_remain_match_nolit_encodeBlockAsm14BAvx
|
|
MOVB $0xff, (DI)
|
|
MOVD SI, 1(DI)
|
|
LEAQ -64(R8), R8
|
|
ADDQ $0x05, DI
|
|
CMPL R8, $0x04
|
|
JL four_bytes_remain_match_nolit_encodeBlockAsm14BAvx
|
|
|
|
emit_repeat_again_match_nolit_encodeBlockAsm14BAvx_emit_copy:
|
|
MOVQ R8, R9
|
|
LEAQ -4(R8), R8
|
|
CMPL R9, $0x08
|
|
JLE repeat_two_match_nolit_encodeBlockAsm14BAvx_emit_copy
|
|
CMPL R9, $0x0c
|
|
JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm14BAvx_emit_copy
|
|
CMPL SI, $0x00000800
|
|
JLT repeat_two_offset_match_nolit_encodeBlockAsm14BAvx_emit_copy
|
|
|
|
cant_repeat_two_offset_match_nolit_encodeBlockAsm14BAvx_emit_copy:
|
|
CMPL R8, $0x00000104
|
|
JLT repeat_three_match_nolit_encodeBlockAsm14BAvx_emit_copy
|
|
CMPL R8, $0x00010100
|
|
JLT repeat_four_match_nolit_encodeBlockAsm14BAvx_emit_copy
|
|
CMPL R8, $0x0100ffff
|
|
JLT repeat_five_match_nolit_encodeBlockAsm14BAvx_emit_copy
|
|
LEAQ -16842747(R8), R8
|
|
MOVW $0x001d, (DI)
|
|
MOVW $0xfffb, 2(DI)
|
|
MOVB $0xff, 4(DI)
|
|
ADDQ $0x05, DI
|
|
JMP emit_repeat_again_match_nolit_encodeBlockAsm14BAvx_emit_copy
|
|
|
|
repeat_five_match_nolit_encodeBlockAsm14BAvx_emit_copy:
|
|
LEAQ -65536(R8), R8
|
|
MOVQ R8, SI
|
|
MOVW $0x001d, (DI)
|
|
MOVW R8, 2(DI)
|
|
SARQ $0x10, SI
|
|
MOVB SI, 4(DI)
|
|
ADDQ $0x05, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm14BAvx
|
|
|
|
repeat_four_match_nolit_encodeBlockAsm14BAvx_emit_copy:
|
|
LEAQ -256(R8), R8
|
|
MOVW $0x0019, (DI)
|
|
MOVW R8, 2(DI)
|
|
ADDQ $0x04, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm14BAvx
|
|
|
|
repeat_three_match_nolit_encodeBlockAsm14BAvx_emit_copy:
|
|
LEAQ -4(R8), R8
|
|
MOVW $0x0015, (DI)
|
|
MOVB R8, 2(DI)
|
|
ADDQ $0x03, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm14BAvx
|
|
|
|
repeat_two_match_nolit_encodeBlockAsm14BAvx_emit_copy:
|
|
SHLL $0x02, R8
|
|
ORL $0x01, R8
|
|
MOVW R8, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm14BAvx
|
|
|
|
repeat_two_offset_match_nolit_encodeBlockAsm14BAvx_emit_copy:
|
|
XORQ R9, R9
|
|
LEAQ 1(R9)(R8*4), R8
|
|
MOVB SI, 1(DI)
|
|
SARL $0x08, SI
|
|
SHLL $0x05, SI
|
|
ORL SI, R8
|
|
MOVB R8, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm14BAvx
|
|
|
|
four_bytes_remain_match_nolit_encodeBlockAsm14BAvx:
|
|
TESTL R8, R8
|
|
JZ match_nolit_emitcopy_end_encodeBlockAsm14BAvx
|
|
MOVB $0x03, DL
|
|
LEAQ -4(DX)(R8*4), R8
|
|
MOVB R8, (DI)
|
|
MOVD SI, 1(DI)
|
|
ADDQ $0x05, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm14BAvx
|
|
|
|
two_byte_offset_match_nolit_encodeBlockAsm14BAvx:
|
|
CMPL R8, $0x40
|
|
JLE two_byte_offset_short_match_nolit_encodeBlockAsm14BAvx
|
|
MOVB $0xee, (DI)
|
|
MOVW SI, 1(DI)
|
|
LEAQ -60(R8), R8
|
|
ADDQ $0x03, DI
|
|
|
|
emit_repeat_again_match_nolit_encodeBlockAsm14BAvx_emit_copy_short:
|
|
MOVQ R8, R9
|
|
LEAQ -4(R8), R8
|
|
CMPL R9, $0x08
|
|
JLE repeat_two_match_nolit_encodeBlockAsm14BAvx_emit_copy_short
|
|
CMPL R9, $0x0c
|
|
JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm14BAvx_emit_copy_short
|
|
CMPL SI, $0x00000800
|
|
JLT repeat_two_offset_match_nolit_encodeBlockAsm14BAvx_emit_copy_short
|
|
|
|
cant_repeat_two_offset_match_nolit_encodeBlockAsm14BAvx_emit_copy_short:
|
|
CMPL R8, $0x00000104
|
|
JLT repeat_three_match_nolit_encodeBlockAsm14BAvx_emit_copy_short
|
|
CMPL R8, $0x00010100
|
|
JLT repeat_four_match_nolit_encodeBlockAsm14BAvx_emit_copy_short
|
|
CMPL R8, $0x0100ffff
|
|
JLT repeat_five_match_nolit_encodeBlockAsm14BAvx_emit_copy_short
|
|
LEAQ -16842747(R8), R8
|
|
MOVW $0x001d, (DI)
|
|
MOVW $0xfffb, 2(DI)
|
|
MOVB $0xff, 4(DI)
|
|
ADDQ $0x05, DI
|
|
JMP emit_repeat_again_match_nolit_encodeBlockAsm14BAvx_emit_copy_short
|
|
|
|
repeat_five_match_nolit_encodeBlockAsm14BAvx_emit_copy_short:
|
|
LEAQ -65536(R8), R8
|
|
MOVQ R8, SI
|
|
MOVW $0x001d, (DI)
|
|
MOVW R8, 2(DI)
|
|
SARQ $0x10, SI
|
|
MOVB SI, 4(DI)
|
|
ADDQ $0x05, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm14BAvx
|
|
|
|
repeat_four_match_nolit_encodeBlockAsm14BAvx_emit_copy_short:
|
|
LEAQ -256(R8), R8
|
|
MOVW $0x0019, (DI)
|
|
MOVW R8, 2(DI)
|
|
ADDQ $0x04, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm14BAvx
|
|
|
|
repeat_three_match_nolit_encodeBlockAsm14BAvx_emit_copy_short:
|
|
LEAQ -4(R8), R8
|
|
MOVW $0x0015, (DI)
|
|
MOVB R8, 2(DI)
|
|
ADDQ $0x03, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm14BAvx
|
|
|
|
repeat_two_match_nolit_encodeBlockAsm14BAvx_emit_copy_short:
|
|
SHLL $0x02, R8
|
|
ORL $0x01, R8
|
|
MOVW R8, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm14BAvx
|
|
|
|
repeat_two_offset_match_nolit_encodeBlockAsm14BAvx_emit_copy_short:
|
|
XORQ R9, R9
|
|
LEAQ 1(R9)(R8*4), R8
|
|
MOVB SI, 1(DI)
|
|
SARL $0x08, SI
|
|
SHLL $0x05, SI
|
|
ORL SI, R8
|
|
MOVB R8, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm14BAvx
|
|
|
|
two_byte_offset_short_match_nolit_encodeBlockAsm14BAvx:
|
|
CMPL R8, $0x0c
|
|
JGE emit_copy_three_match_nolit_encodeBlockAsm14BAvx
|
|
CMPL SI, $0x00000800
|
|
JGE emit_copy_three_match_nolit_encodeBlockAsm14BAvx
|
|
MOVB $0x01, DL
|
|
LEAQ -16(DX)(R8*4), R8
|
|
MOVB SI, 1(DI)
|
|
SHRL $0x08, SI
|
|
SHLL $0x05, SI
|
|
ORL SI, R8
|
|
MOVB R8, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm14BAvx
|
|
|
|
emit_copy_three_match_nolit_encodeBlockAsm14BAvx:
|
|
MOVB $0x02, DL
|
|
LEAQ -4(DX)(R8*4), R8
|
|
MOVB R8, (DI)
|
|
MOVW SI, 1(DI)
|
|
ADDQ $0x03, DI
|
|
|
|
match_nolit_emitcopy_end_encodeBlockAsm14BAvx:
|
|
MOVQ DI, dst_base+0(FP)
|
|
MOVL AX, 20(SP)
|
|
CMPL AX, 16(SP)
|
|
JGE emit_remainder_encodeBlockAsm14BAvx
|
|
CMPQ DI, (SP)
|
|
JL match_nolit_dst_ok_encodeBlockAsm14BAvx
|
|
MOVQ $0x00000000, ret+48(FP)
|
|
RET
|
|
|
|
match_nolit_dst_ok_encodeBlockAsm14BAvx:
|
|
MOVQ -2(CX)(AX*1), SI
|
|
MOVQ $0x0000cf1bbcdcbf9b, DI
|
|
MOVQ SI, R8
|
|
SHRQ $0x10, SI
|
|
MOVQ SI, R9
|
|
SHLQ $0x10, R8
|
|
IMULQ DI, R8
|
|
SHRQ $0x32, R8
|
|
SHLQ $0x10, R9
|
|
IMULQ DI, R9
|
|
SHRQ $0x32, R9
|
|
MOVL 32(SP)(R8*1), DI
|
|
MOVL 32(SP)(R9*1), DI
|
|
LEAQ -2(AX), DI
|
|
MOVL DI, 32(SP)(R8*1)
|
|
MOVL AX, 32(SP)(R9*1)
|
|
CMPL (CX)(R9*1), SI
|
|
JEQ match_nolit_loop_encodeBlockAsm14BAvx
|
|
INCL AX
|
|
JMP search_loop_encodeBlockAsm14BAvx
|
|
|
|
emit_remainder_encodeBlockAsm14BAvx:
|
|
MOVQ src_len+32(FP), AX
|
|
SUBL 20(SP), AX
|
|
MOVQ dst_base+0(FP), DX
|
|
LEAQ (DX)(AX*1), DX
|
|
CMPQ DX, (SP)
|
|
JL emit_remainder_ok_encodeBlockAsm14BAvx
|
|
MOVQ $0x00000000, ret+48(FP)
|
|
RET
|
|
|
|
emit_remainder_ok_encodeBlockAsm14BAvx:
|
|
MOVQ src_len+32(FP), AX
|
|
MOVL 20(SP), DX
|
|
CMPL DX, AX
|
|
JEQ emit_literal_skip_emit_remainder_encodeBlockAsm14BAvx
|
|
MOVL AX, BX
|
|
MOVL AX, 20(SP)
|
|
LEAQ (CX)(DX*1), AX
|
|
SUBL DX, BX
|
|
MOVQ dst_base+0(FP), CX
|
|
MOVQ BX, DX
|
|
SUBL $0x01, DX
|
|
JC emit_literal_done_emit_remainder_encodeBlockAsm14BAvx
|
|
CMPL DX, $0x3c
|
|
JLT one_byte_emit_remainder_encodeBlockAsm14BAvx
|
|
CMPL DX, $0x00000100
|
|
JLT two_bytes_emit_remainder_encodeBlockAsm14BAvx
|
|
CMPL DX, $0x00010000
|
|
JLT three_bytes_emit_remainder_encodeBlockAsm14BAvx
|
|
CMPL DX, $0x01000000
|
|
JLT four_bytes_emit_remainder_encodeBlockAsm14BAvx
|
|
MOVB $0xfc, (CX)
|
|
MOVL DX, 1(CX)
|
|
ADDQ $0x05, CX
|
|
JMP memmove_emit_remainder_encodeBlockAsm14BAvx
|
|
|
|
four_bytes_emit_remainder_encodeBlockAsm14BAvx:
|
|
MOVQ DX, SI
|
|
SHRL $0x10, SI
|
|
MOVB $0xf8, (CX)
|
|
MOVW DX, 1(CX)
|
|
MOVB SI, 3(CX)
|
|
ADDQ $0x04, CX
|
|
JMP memmove_emit_remainder_encodeBlockAsm14BAvx
|
|
|
|
three_bytes_emit_remainder_encodeBlockAsm14BAvx:
|
|
MOVB $0xf4, (CX)
|
|
MOVW DX, 1(CX)
|
|
ADDQ $0x03, CX
|
|
JMP memmove_emit_remainder_encodeBlockAsm14BAvx
|
|
|
|
two_bytes_emit_remainder_encodeBlockAsm14BAvx:
|
|
MOVB $0xf0, (CX)
|
|
MOVB DL, 1(CX)
|
|
ADDQ $0x02, CX
|
|
JMP memmove_emit_remainder_encodeBlockAsm14BAvx
|
|
|
|
one_byte_emit_remainder_encodeBlockAsm14BAvx:
|
|
SHLB $0x02, DL
|
|
MOVB DL, (CX)
|
|
ADDQ $0x01, CX
|
|
|
|
memmove_emit_remainder_encodeBlockAsm14BAvx:
|
|
LEAQ (CX)(BX*1), DX
|
|
NOP
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_tail:
|
|
TESTQ BX, BX
|
|
JEQ emit_literal_done_emit_remainder_encodeBlockAsm14BAvx
|
|
CMPQ BX, $0x02
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_1or2
|
|
CMPQ BX, $0x04
|
|
JB emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_3
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_4
|
|
CMPQ BX, $0x08
|
|
JB emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_5through7
|
|
JE emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_8
|
|
CMPQ BX, $0x10
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_9through16
|
|
CMPQ BX, $0x20
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_17through32
|
|
CMPQ BX, $0x40
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_33through64
|
|
CMPQ BX, $0x80
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_65through128
|
|
CMPQ BX, $0x00000100
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_129through256
|
|
JMP emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_avxUnaligned
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_1or2:
|
|
MOVB (AX), DL
|
|
MOVB -1(AX)(BX*1), SI
|
|
MOVB DL, (CX)
|
|
MOVB SI, -1(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm14BAvx
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_4:
|
|
MOVL (AX), DX
|
|
MOVL DX, (CX)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm14BAvx
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_3:
|
|
MOVW (AX), DX
|
|
MOVB 2(AX), SI
|
|
MOVW DX, (CX)
|
|
MOVB SI, 2(CX)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm14BAvx
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_5through7:
|
|
MOVL (AX), DX
|
|
MOVL -4(AX)(BX*1), SI
|
|
MOVL DX, (CX)
|
|
MOVL SI, -4(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm14BAvx
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_8:
|
|
MOVQ (AX), DX
|
|
MOVQ DX, (CX)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm14BAvx
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_9through16:
|
|
MOVQ (AX), DX
|
|
MOVQ -8(AX)(BX*1), SI
|
|
MOVQ DX, (CX)
|
|
MOVQ SI, -8(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm14BAvx
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_17through32:
|
|
MOVOU (AX), X0
|
|
MOVOU -16(AX)(BX*1), X1
|
|
MOVOU X0, (CX)
|
|
MOVOU X1, -16(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm14BAvx
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_33through64:
|
|
MOVOU (AX), X0
|
|
MOVOU 16(AX), X1
|
|
MOVOU -32(AX)(BX*1), X2
|
|
MOVOU -16(AX)(BX*1), X3
|
|
MOVOU X0, (CX)
|
|
MOVOU X1, 16(CX)
|
|
MOVOU X2, -32(CX)(BX*1)
|
|
MOVOU X3, -16(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm14BAvx
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_65through128:
|
|
MOVOU (AX), X0
|
|
MOVOU 16(AX), X1
|
|
MOVOU 32(AX), X2
|
|
MOVOU 48(AX), X3
|
|
MOVOU -64(AX)(BX*1), X12
|
|
MOVOU -48(AX)(BX*1), X13
|
|
MOVOU -32(AX)(BX*1), X14
|
|
MOVOU -16(AX)(BX*1), X15
|
|
MOVOU X0, (CX)
|
|
MOVOU X1, 16(CX)
|
|
MOVOU X2, 32(CX)
|
|
MOVOU X3, 48(CX)
|
|
MOVOU X12, -64(CX)(BX*1)
|
|
MOVOU X13, -48(CX)(BX*1)
|
|
MOVOU X14, -32(CX)(BX*1)
|
|
MOVOU X15, -16(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm14BAvx
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_129through256:
|
|
MOVOU (AX), X0
|
|
MOVOU 16(AX), X1
|
|
MOVOU 32(AX), X2
|
|
MOVOU 48(AX), X3
|
|
MOVOU 64(AX), X4
|
|
MOVOU 80(AX), X5
|
|
MOVOU 96(AX), X6
|
|
MOVOU 112(AX), X7
|
|
MOVOU -128(AX)(BX*1), X8
|
|
MOVOU -112(AX)(BX*1), X9
|
|
MOVOU -96(AX)(BX*1), X10
|
|
MOVOU -80(AX)(BX*1), X11
|
|
MOVOU -64(AX)(BX*1), X12
|
|
MOVOU -48(AX)(BX*1), X13
|
|
MOVOU -32(AX)(BX*1), X14
|
|
MOVOU -16(AX)(BX*1), X15
|
|
MOVOU X0, (CX)
|
|
MOVOU X1, 16(CX)
|
|
MOVOU X2, 32(CX)
|
|
MOVOU X3, 48(CX)
|
|
MOVOU X4, 64(CX)
|
|
MOVOU X5, 80(CX)
|
|
MOVOU X6, 96(CX)
|
|
MOVOU X7, 112(CX)
|
|
MOVOU X8, -128(CX)(BX*1)
|
|
MOVOU X9, -112(CX)(BX*1)
|
|
MOVOU X10, -96(CX)(BX*1)
|
|
MOVOU X11, -80(CX)(BX*1)
|
|
MOVOU X12, -64(CX)(BX*1)
|
|
MOVOU X13, -48(CX)(BX*1)
|
|
MOVOU X14, -32(CX)(BX*1)
|
|
MOVOU X15, -16(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm14BAvx
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_256through2048:
|
|
LEAQ -256(BX), BX
|
|
MOVOU (AX), X0
|
|
MOVOU 16(AX), X1
|
|
MOVOU 32(AX), X2
|
|
MOVOU 48(AX), X3
|
|
MOVOU 64(AX), X4
|
|
MOVOU 80(AX), X5
|
|
MOVOU 96(AX), X6
|
|
MOVOU 112(AX), X7
|
|
MOVOU 128(AX), X8
|
|
MOVOU 144(AX), X9
|
|
MOVOU 160(AX), X10
|
|
MOVOU 176(AX), X11
|
|
MOVOU 192(AX), X12
|
|
MOVOU 208(AX), X13
|
|
MOVOU 224(AX), X14
|
|
MOVOU 240(AX), X15
|
|
MOVOU X0, (CX)
|
|
MOVOU X1, 16(CX)
|
|
MOVOU X2, 32(CX)
|
|
MOVOU X3, 48(CX)
|
|
MOVOU X4, 64(CX)
|
|
MOVOU X5, 80(CX)
|
|
MOVOU X6, 96(CX)
|
|
MOVOU X7, 112(CX)
|
|
MOVOU X8, 128(CX)
|
|
MOVOU X9, 144(CX)
|
|
MOVOU X10, 160(CX)
|
|
MOVOU X11, 176(CX)
|
|
MOVOU X12, 192(CX)
|
|
MOVOU X13, 208(CX)
|
|
MOVOU X14, 224(CX)
|
|
MOVOU X15, 240(CX)
|
|
CMPQ BX, $0x00000100
|
|
LEAQ 256(AX), AX
|
|
LEAQ 256(CX), CX
|
|
JGE emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_256through2048
|
|
JMP emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_tail
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_avxUnaligned:
|
|
LEAQ (AX)(BX*1), SI
|
|
MOVQ CX, R8
|
|
MOVOU -128(SI), X5
|
|
MOVOU -112(SI), X6
|
|
MOVQ $0x00000080, DX
|
|
ANDQ $0xffffffe0, CX
|
|
ADDQ $0x20, CX
|
|
MOVOU -96(SI), X7
|
|
MOVOU -80(SI), X8
|
|
MOVQ CX, DI
|
|
SUBQ R8, DI
|
|
MOVOU -64(SI), X9
|
|
MOVOU -48(SI), X10
|
|
SUBQ DI, BX
|
|
MOVOU -32(SI), X11
|
|
MOVOU -16(SI), X12
|
|
VMOVDQU (AX), Y4
|
|
ADDQ DI, AX
|
|
SUBQ DX, BX
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_gobble_128_loop:
|
|
VMOVDQU (AX), Y0
|
|
VMOVDQU 32(AX), Y1
|
|
VMOVDQU 64(AX), Y2
|
|
VMOVDQU 96(AX), Y3
|
|
ADDQ DX, AX
|
|
VMOVDQA Y0, (CX)
|
|
VMOVDQA Y1, 32(CX)
|
|
VMOVDQA Y2, 64(CX)
|
|
VMOVDQA Y3, 96(CX)
|
|
ADDQ DX, CX
|
|
SUBQ DX, BX
|
|
JA emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_gobble_128_loop
|
|
ADDQ DX, BX
|
|
ADDQ CX, BX
|
|
VMOVDQU Y4, (R8)
|
|
VZEROUPPER
|
|
MOVOU X5, -128(BX)
|
|
MOVOU X6, -112(BX)
|
|
MOVOU X7, -96(BX)
|
|
MOVOU X8, -80(BX)
|
|
MOVOU X9, -64(BX)
|
|
MOVOU X10, -48(BX)
|
|
MOVOU X11, -32(BX)
|
|
MOVOU X12, -16(BX)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm14BAvx
|
|
MOVQ DX, CX
|
|
|
|
emit_literal_done_emit_remainder_encodeBlockAsm14BAvx:
|
|
MOVQ CX, dst_base+0(FP)
|
|
|
|
emit_literal_skip_emit_remainder_encodeBlockAsm14BAvx:
|
|
MOVQ 8(SP), AX
|
|
SUBQ dst_base+0(FP), AX
|
|
MOVQ AX, ret+48(FP)
|
|
RET
|
|
|
|
// func encodeBlockAsm12BAvx(dst []byte, src []byte) int
|
|
// Requires: AVX, SSE2
|
|
TEXT ·encodeBlockAsm12BAvx(SB), $4128-56
|
|
MOVQ $0x00000020, AX
|
|
LEAQ 32(SP), CX
|
|
PXOR X0, X0
|
|
|
|
zero_loop_encodeBlockAsm12BAvx:
|
|
MOVOU X0, (CX)
|
|
MOVOU X0, 16(CX)
|
|
MOVOU X0, 32(CX)
|
|
MOVOU X0, 48(CX)
|
|
MOVOU X0, 64(CX)
|
|
MOVOU X0, 80(CX)
|
|
MOVOU X0, 96(CX)
|
|
MOVOU X0, 112(CX)
|
|
ADDQ $0x80, CX
|
|
DECQ AX
|
|
JNZ zero_loop_encodeBlockAsm12BAvx
|
|
MOVL AX, 20(SP)
|
|
MOVQ src_len+32(FP), AX
|
|
LEAQ -5(AX), CX
|
|
LEAQ -8(AX), BX
|
|
SHRQ $0x05, AX
|
|
SUBL AX, CX
|
|
MOVL BX, 16(SP)
|
|
MOVQ dst_base+0(FP), AX
|
|
MOVQ AX, 8(SP)
|
|
LEAQ (AX)(CX*1), CX
|
|
MOVQ CX, (SP)
|
|
MOVL $0x00000001, AX
|
|
MOVL AX, 24(SP)
|
|
MOVQ src_base+24(FP), CX
|
|
|
|
search_loop_encodeBlockAsm12BAvx:
|
|
MOVQ (CX)(AX*1), SI
|
|
MOVL AX, BX
|
|
SUBL 20(SP), BX
|
|
SHRL $0x04, BX
|
|
LEAQ 4(AX)(BX*1), BX
|
|
MOVL 16(SP), DI
|
|
CMPL BX, DI
|
|
JGT emit_remainder_encodeBlockAsm12BAvx
|
|
MOVL BX, 28(SP)
|
|
MOVQ $0x0000cf1bbcdcbf9b, BX
|
|
MOVQ SI, R8
|
|
MOVQ SI, R9
|
|
SHRQ $0x08, R9
|
|
SHLQ $0x10, R8
|
|
IMULQ BX, R8
|
|
SHRQ $0x34, R8
|
|
SHLQ $0x10, R9
|
|
IMULQ BX, R9
|
|
SHRQ $0x34, R9
|
|
MOVL 32(SP)(R8*1), BX
|
|
MOVL 32(SP)(R9*1), DI
|
|
MOVL AX, 32(SP)(R8*1)
|
|
LEAL 1(AX), R8
|
|
MOVL R8, 32(SP)(R9*1)
|
|
MOVL AX, R8
|
|
SUBL 24(SP), R8
|
|
MOVL 1(CX)(R8*1), R10
|
|
MOVQ SI, R9
|
|
SHLQ $0x08, R9
|
|
CMPL R9, R10
|
|
JNE no_repeat_found_encodeBlockAsm12BAvx
|
|
LEAQ 1(AX), SI
|
|
MOVL 20(SP), BX
|
|
TESTL R8, R8
|
|
JZ repeat_extend_back_end_encodeBlockAsm12BAvx
|
|
|
|
repeat_extend_back_loop_encodeBlockAsm12BAvx:
|
|
CMPL SI, BX
|
|
JG repeat_extend_back_end_encodeBlockAsm12BAvx
|
|
MOVB -1(CX)(R8*1), DL
|
|
MOVB -1(CX)(SI*1), DI
|
|
CMPB DL, DI
|
|
JNE repeat_extend_back_end_encodeBlockAsm12BAvx
|
|
LEAQ -1(SI), SI
|
|
DECL R8
|
|
JZ repeat_extend_back_end_encodeBlockAsm12BAvx
|
|
JMP repeat_extend_back_loop_encodeBlockAsm12BAvx
|
|
|
|
repeat_extend_back_end_encodeBlockAsm12BAvx:
|
|
MOVL 20(SP), BX
|
|
CMPL BX, SI
|
|
JEQ emit_literal_skip_repeat_emit_encodeBlockAsm12BAvx
|
|
MOVL SI, DI
|
|
MOVL SI, 20(SP)
|
|
LEAQ (CX)(BX*1), R8
|
|
SUBL BX, DI
|
|
MOVQ dst_base+0(FP), BX
|
|
MOVQ DI, R9
|
|
SUBL $0x01, R9
|
|
JC emit_literal_done_repeat_emit_encodeBlockAsm12BAvx
|
|
CMPL R9, $0x3c
|
|
JLT one_byte_repeat_emit_encodeBlockAsm12BAvx
|
|
CMPL R9, $0x00000100
|
|
JLT two_bytes_repeat_emit_encodeBlockAsm12BAvx
|
|
CMPL R9, $0x00010000
|
|
JLT three_bytes_repeat_emit_encodeBlockAsm12BAvx
|
|
CMPL R9, $0x01000000
|
|
JLT four_bytes_repeat_emit_encodeBlockAsm12BAvx
|
|
MOVB $0xfc, (BX)
|
|
MOVL R9, 1(BX)
|
|
ADDQ $0x05, BX
|
|
JMP memmove_repeat_emit_encodeBlockAsm12BAvx
|
|
|
|
four_bytes_repeat_emit_encodeBlockAsm12BAvx:
|
|
MOVQ R9, R10
|
|
SHRL $0x10, R10
|
|
MOVB $0xf8, (BX)
|
|
MOVW R9, 1(BX)
|
|
MOVB R10, 3(BX)
|
|
ADDQ $0x04, BX
|
|
JMP memmove_repeat_emit_encodeBlockAsm12BAvx
|
|
|
|
three_bytes_repeat_emit_encodeBlockAsm12BAvx:
|
|
MOVB $0xf4, (BX)
|
|
MOVW R9, 1(BX)
|
|
ADDQ $0x03, BX
|
|
JMP memmove_repeat_emit_encodeBlockAsm12BAvx
|
|
|
|
two_bytes_repeat_emit_encodeBlockAsm12BAvx:
|
|
MOVB $0xf0, (BX)
|
|
MOVB R9, 1(BX)
|
|
ADDQ $0x02, BX
|
|
JMP memmove_repeat_emit_encodeBlockAsm12BAvx
|
|
|
|
one_byte_repeat_emit_encodeBlockAsm12BAvx:
|
|
SHLB $0x02, R9
|
|
MOVB R9, (BX)
|
|
ADDQ $0x01, BX
|
|
|
|
memmove_repeat_emit_encodeBlockAsm12BAvx:
|
|
LEAQ (BX)(DI*1), R9
|
|
NOP
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_tail:
|
|
TESTQ DI, DI
|
|
JEQ emit_literal_done_repeat_emit_encodeBlockAsm12BAvx
|
|
CMPQ DI, $0x02
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_1or2
|
|
CMPQ DI, $0x04
|
|
JB emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_3
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_4
|
|
CMPQ DI, $0x08
|
|
JB emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_5through7
|
|
JE emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_8
|
|
CMPQ DI, $0x10
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_9through16
|
|
CMPQ DI, $0x20
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_17through32
|
|
CMPQ DI, $0x40
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_33through64
|
|
CMPQ DI, $0x80
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_65through128
|
|
CMPQ DI, $0x00000100
|
|
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_129through256
|
|
JMP emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_avxUnaligned
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_1or2:
|
|
MOVB (R8), R9
|
|
MOVB -1(R8)(DI*1), R10
|
|
MOVB R9, (BX)
|
|
MOVB R10, -1(BX)(DI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm12BAvx
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_4:
|
|
MOVL (R8), R9
|
|
MOVL R9, (BX)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm12BAvx
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_3:
|
|
MOVW (R8), R9
|
|
MOVB 2(R8), R10
|
|
MOVW R9, (BX)
|
|
MOVB R10, 2(BX)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm12BAvx
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_5through7:
|
|
MOVL (R8), R9
|
|
MOVL -4(R8)(DI*1), R10
|
|
MOVL R9, (BX)
|
|
MOVL R10, -4(BX)(DI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm12BAvx
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_8:
|
|
MOVQ (R8), R9
|
|
MOVQ R9, (BX)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm12BAvx
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_9through16:
|
|
MOVQ (R8), R9
|
|
MOVQ -8(R8)(DI*1), R10
|
|
MOVQ R9, (BX)
|
|
MOVQ R10, -8(BX)(DI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm12BAvx
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_17through32:
|
|
MOVOU (R8), X0
|
|
MOVOU -16(R8)(DI*1), X1
|
|
MOVOU X0, (BX)
|
|
MOVOU X1, -16(BX)(DI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm12BAvx
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_33through64:
|
|
MOVOU (R8), X0
|
|
MOVOU 16(R8), X1
|
|
MOVOU -32(R8)(DI*1), X2
|
|
MOVOU -16(R8)(DI*1), X3
|
|
MOVOU X0, (BX)
|
|
MOVOU X1, 16(BX)
|
|
MOVOU X2, -32(BX)(DI*1)
|
|
MOVOU X3, -16(BX)(DI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm12BAvx
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_65through128:
|
|
MOVOU (R8), X0
|
|
MOVOU 16(R8), X1
|
|
MOVOU 32(R8), X2
|
|
MOVOU 48(R8), X3
|
|
MOVOU -64(R8)(DI*1), X12
|
|
MOVOU -48(R8)(DI*1), X13
|
|
MOVOU -32(R8)(DI*1), X14
|
|
MOVOU -16(R8)(DI*1), X15
|
|
MOVOU X0, (BX)
|
|
MOVOU X1, 16(BX)
|
|
MOVOU X2, 32(BX)
|
|
MOVOU X3, 48(BX)
|
|
MOVOU X12, -64(BX)(DI*1)
|
|
MOVOU X13, -48(BX)(DI*1)
|
|
MOVOU X14, -32(BX)(DI*1)
|
|
MOVOU X15, -16(BX)(DI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm12BAvx
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_129through256:
|
|
MOVOU (R8), X0
|
|
MOVOU 16(R8), X1
|
|
MOVOU 32(R8), X2
|
|
MOVOU 48(R8), X3
|
|
MOVOU 64(R8), X4
|
|
MOVOU 80(R8), X5
|
|
MOVOU 96(R8), X6
|
|
MOVOU 112(R8), X7
|
|
MOVOU -128(R8)(DI*1), X8
|
|
MOVOU -112(R8)(DI*1), X9
|
|
MOVOU -96(R8)(DI*1), X10
|
|
MOVOU -80(R8)(DI*1), X11
|
|
MOVOU -64(R8)(DI*1), X12
|
|
MOVOU -48(R8)(DI*1), X13
|
|
MOVOU -32(R8)(DI*1), X14
|
|
MOVOU -16(R8)(DI*1), X15
|
|
MOVOU X0, (BX)
|
|
MOVOU X1, 16(BX)
|
|
MOVOU X2, 32(BX)
|
|
MOVOU X3, 48(BX)
|
|
MOVOU X4, 64(BX)
|
|
MOVOU X5, 80(BX)
|
|
MOVOU X6, 96(BX)
|
|
MOVOU X7, 112(BX)
|
|
MOVOU X8, -128(BX)(DI*1)
|
|
MOVOU X9, -112(BX)(DI*1)
|
|
MOVOU X10, -96(BX)(DI*1)
|
|
MOVOU X11, -80(BX)(DI*1)
|
|
MOVOU X12, -64(BX)(DI*1)
|
|
MOVOU X13, -48(BX)(DI*1)
|
|
MOVOU X14, -32(BX)(DI*1)
|
|
MOVOU X15, -16(BX)(DI*1)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm12BAvx
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_256through2048:
|
|
LEAQ -256(DI), DI
|
|
MOVOU (R8), X0
|
|
MOVOU 16(R8), X1
|
|
MOVOU 32(R8), X2
|
|
MOVOU 48(R8), X3
|
|
MOVOU 64(R8), X4
|
|
MOVOU 80(R8), X5
|
|
MOVOU 96(R8), X6
|
|
MOVOU 112(R8), X7
|
|
MOVOU 128(R8), X8
|
|
MOVOU 144(R8), X9
|
|
MOVOU 160(R8), X10
|
|
MOVOU 176(R8), X11
|
|
MOVOU 192(R8), X12
|
|
MOVOU 208(R8), X13
|
|
MOVOU 224(R8), X14
|
|
MOVOU 240(R8), X15
|
|
MOVOU X0, (BX)
|
|
MOVOU X1, 16(BX)
|
|
MOVOU X2, 32(BX)
|
|
MOVOU X3, 48(BX)
|
|
MOVOU X4, 64(BX)
|
|
MOVOU X5, 80(BX)
|
|
MOVOU X6, 96(BX)
|
|
MOVOU X7, 112(BX)
|
|
MOVOU X8, 128(BX)
|
|
MOVOU X9, 144(BX)
|
|
MOVOU X10, 160(BX)
|
|
MOVOU X11, 176(BX)
|
|
MOVOU X12, 192(BX)
|
|
MOVOU X13, 208(BX)
|
|
MOVOU X14, 224(BX)
|
|
MOVOU X15, 240(BX)
|
|
CMPQ DI, $0x00000100
|
|
LEAQ 256(R8), R8
|
|
LEAQ 256(BX), BX
|
|
JGE emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_256through2048
|
|
JMP emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_tail
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_avxUnaligned:
|
|
LEAQ (R8)(DI*1), R10
|
|
MOVQ BX, R12
|
|
MOVOU -128(R10), X5
|
|
MOVOU -112(R10), X6
|
|
MOVQ $0x00000080, R9
|
|
ANDQ $0xffffffe0, BX
|
|
ADDQ $0x20, BX
|
|
MOVOU -96(R10), X7
|
|
MOVOU -80(R10), X8
|
|
MOVQ BX, R11
|
|
SUBQ R12, R11
|
|
MOVOU -64(R10), X9
|
|
MOVOU -48(R10), X10
|
|
SUBQ R11, DI
|
|
MOVOU -32(R10), X11
|
|
MOVOU -16(R10), X12
|
|
VMOVDQU (R8), Y4
|
|
ADDQ R11, R8
|
|
SUBQ R9, DI
|
|
|
|
emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_gobble_128_loop:
|
|
VMOVDQU (R8), Y0
|
|
VMOVDQU 32(R8), Y1
|
|
VMOVDQU 64(R8), Y2
|
|
VMOVDQU 96(R8), Y3
|
|
ADDQ R9, R8
|
|
VMOVDQA Y0, (BX)
|
|
VMOVDQA Y1, 32(BX)
|
|
VMOVDQA Y2, 64(BX)
|
|
VMOVDQA Y3, 96(BX)
|
|
ADDQ R9, BX
|
|
SUBQ R9, DI
|
|
JA emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_gobble_128_loop
|
|
ADDQ R9, DI
|
|
ADDQ BX, DI
|
|
VMOVDQU Y4, (R12)
|
|
VZEROUPPER
|
|
MOVOU X5, -128(DI)
|
|
MOVOU X6, -112(DI)
|
|
MOVOU X7, -96(DI)
|
|
MOVOU X8, -80(DI)
|
|
MOVOU X9, -64(DI)
|
|
MOVOU X10, -48(DI)
|
|
MOVOU X11, -32(DI)
|
|
MOVOU X12, -16(DI)
|
|
JMP emit_literal_done_repeat_emit_encodeBlockAsm12BAvx
|
|
MOVQ R9, BX
|
|
|
|
emit_literal_done_repeat_emit_encodeBlockAsm12BAvx:
|
|
MOVQ BX, dst_base+0(FP)
|
|
|
|
emit_literal_skip_repeat_emit_encodeBlockAsm12BAvx:
|
|
ADDL $0x05, AX
|
|
MOVL AX, BX
|
|
SUBL 24(SP), BX
|
|
MOVL 16(SP), BX
|
|
SUBL AX, BX
|
|
XORQ R8, R8
|
|
CMPQ BX, $0x08
|
|
JL matchlen_single_repeat_extend
|
|
|
|
matchlen_loopback_repeat_extend:
|
|
MOVQ (CX)(R8*1), DI
|
|
XORQ (CX)(R8*1), DI
|
|
TESTQ DI, DI
|
|
JZ matchlen_loop_repeat_extend
|
|
BSFQ DI, DI
|
|
SARQ $0x03, DI
|
|
LEAQ (R8)(DI*1), R8
|
|
JMP repeat_extend_forward_end_encodeBlockAsm12BAvx
|
|
|
|
matchlen_loop_repeat_extend:
|
|
LEAQ -8(BX), BX
|
|
LEAQ 8(R8), R8
|
|
CMPQ BX, $0x08
|
|
JGE matchlen_loopback_repeat_extend
|
|
|
|
matchlen_single_repeat_extend:
|
|
TESTQ BX, BX
|
|
JZ repeat_extend_forward_end_encodeBlockAsm12BAvx
|
|
|
|
matchlen_single_loopback_repeat_extend:
|
|
MOVB (CX)(R8*1), DI
|
|
CMPB (CX)(R8*1), DI
|
|
JNE repeat_extend_forward_end_encodeBlockAsm12BAvx
|
|
LEAQ 1(R8), R8
|
|
DECQ BX
|
|
JNZ matchlen_single_loopback_repeat_extend
|
|
|
|
repeat_extend_forward_end_encodeBlockAsm12BAvx:
|
|
ADDL R8, AX
|
|
MOVL AX, BX
|
|
SUBL SI, BX
|
|
MOVL 24(SP), SI
|
|
MOVQ dst_base+0(FP), DI
|
|
MOVL 20(SP), R8
|
|
TESTL R8, R8
|
|
JZ repeat_as_copy_encodeBlockAsm12BAvx
|
|
|
|
emit_repeat_again_match_repeat_:
|
|
MOVQ BX, R8
|
|
LEAQ -4(BX), BX
|
|
CMPL R8, $0x08
|
|
JLE repeat_two_match_repeat_
|
|
CMPL R8, $0x0c
|
|
JGE cant_repeat_two_offset_match_repeat_
|
|
CMPL SI, $0x00000800
|
|
JLT repeat_two_offset_match_repeat_
|
|
|
|
cant_repeat_two_offset_match_repeat_:
|
|
CMPL BX, $0x00000104
|
|
JLT repeat_three_match_repeat_
|
|
CMPL BX, $0x00010100
|
|
JLT repeat_four_match_repeat_
|
|
CMPL BX, $0x0100ffff
|
|
JLT repeat_five_match_repeat_
|
|
LEAQ -16842747(BX), BX
|
|
MOVW $0x001d, (DI)
|
|
MOVW $0xfffb, 2(DI)
|
|
MOVB $0xff, 4(DI)
|
|
ADDQ $0x05, DI
|
|
JMP emit_repeat_again_match_repeat_
|
|
|
|
repeat_five_match_repeat_:
|
|
LEAQ -65536(BX), BX
|
|
MOVQ BX, SI
|
|
MOVW $0x001d, (DI)
|
|
MOVW BX, 2(DI)
|
|
SARQ $0x10, SI
|
|
MOVB SI, 4(DI)
|
|
ADDQ $0x05, DI
|
|
JMP repeat_end_emit_encodeBlockAsm12BAvx
|
|
|
|
repeat_four_match_repeat_:
|
|
LEAQ -256(BX), BX
|
|
MOVW $0x0019, (DI)
|
|
MOVW BX, 2(DI)
|
|
ADDQ $0x04, DI
|
|
JMP repeat_end_emit_encodeBlockAsm12BAvx
|
|
|
|
repeat_three_match_repeat_:
|
|
LEAQ -4(BX), BX
|
|
MOVW $0x0015, (DI)
|
|
MOVB BL, 2(DI)
|
|
ADDQ $0x03, DI
|
|
JMP repeat_end_emit_encodeBlockAsm12BAvx
|
|
|
|
repeat_two_match_repeat_:
|
|
SHLL $0x02, BX
|
|
ORL $0x01, BX
|
|
MOVW BX, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP repeat_end_emit_encodeBlockAsm12BAvx
|
|
|
|
repeat_two_offset_match_repeat_:
|
|
XORQ R8, R8
|
|
LEAQ 1(R8)(BX*4), BX
|
|
MOVB SI, 1(DI)
|
|
SARL $0x08, SI
|
|
SHLL $0x05, SI
|
|
ORL SI, BX
|
|
MOVB BL, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP repeat_end_emit_encodeBlockAsm12BAvx
|
|
|
|
repeat_as_copy_encodeBlockAsm12BAvx:
|
|
CMPL SI, $0x00010000
|
|
JL two_byte_offset_repeat_as_copy_encodeBlockAsm12BAvx
|
|
CMPL BX, $0x40
|
|
JLE four_bytes_remain_repeat_as_copy_encodeBlockAsm12BAvx
|
|
MOVB $0xff, (DI)
|
|
MOVD SI, 1(DI)
|
|
LEAQ -64(BX), BX
|
|
ADDQ $0x05, DI
|
|
CMPL BX, $0x04
|
|
JL four_bytes_remain_repeat_as_copy_encodeBlockAsm12BAvx
|
|
|
|
emit_repeat_again_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy:
|
|
MOVQ BX, R8
|
|
LEAQ -4(BX), BX
|
|
CMPL R8, $0x08
|
|
JLE repeat_two_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy
|
|
CMPL R8, $0x0c
|
|
JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy
|
|
CMPL SI, $0x00000800
|
|
JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy
|
|
|
|
cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy:
|
|
CMPL BX, $0x00000104
|
|
JLT repeat_three_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy
|
|
CMPL BX, $0x00010100
|
|
JLT repeat_four_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy
|
|
CMPL BX, $0x0100ffff
|
|
JLT repeat_five_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy
|
|
LEAQ -16842747(BX), BX
|
|
MOVW $0x001d, (DI)
|
|
MOVW $0xfffb, 2(DI)
|
|
MOVB $0xff, 4(DI)
|
|
ADDQ $0x05, DI
|
|
JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy
|
|
|
|
repeat_five_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy:
|
|
LEAQ -65536(BX), BX
|
|
MOVQ BX, SI
|
|
MOVW $0x001d, (DI)
|
|
MOVW BX, 2(DI)
|
|
SARQ $0x10, SI
|
|
MOVB SI, 4(DI)
|
|
ADDQ $0x05, DI
|
|
JMP repeat_end_emit_encodeBlockAsm12BAvx
|
|
|
|
repeat_four_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy:
|
|
LEAQ -256(BX), BX
|
|
MOVW $0x0019, (DI)
|
|
MOVW BX, 2(DI)
|
|
ADDQ $0x04, DI
|
|
JMP repeat_end_emit_encodeBlockAsm12BAvx
|
|
|
|
repeat_three_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy:
|
|
LEAQ -4(BX), BX
|
|
MOVW $0x0015, (DI)
|
|
MOVB BL, 2(DI)
|
|
ADDQ $0x03, DI
|
|
JMP repeat_end_emit_encodeBlockAsm12BAvx
|
|
|
|
repeat_two_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy:
|
|
SHLL $0x02, BX
|
|
ORL $0x01, BX
|
|
MOVW BX, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP repeat_end_emit_encodeBlockAsm12BAvx
|
|
|
|
repeat_two_offset_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy:
|
|
XORQ R8, R8
|
|
LEAQ 1(R8)(BX*4), BX
|
|
MOVB SI, 1(DI)
|
|
SARL $0x08, SI
|
|
SHLL $0x05, SI
|
|
ORL SI, BX
|
|
MOVB BL, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP repeat_end_emit_encodeBlockAsm12BAvx
|
|
|
|
four_bytes_remain_repeat_as_copy_encodeBlockAsm12BAvx:
|
|
TESTL BX, BX
|
|
JZ repeat_end_emit_encodeBlockAsm12BAvx
|
|
MOVB $0x03, DL
|
|
LEAQ -4(DX)(BX*4), BX
|
|
MOVB BL, (DI)
|
|
MOVD SI, 1(DI)
|
|
ADDQ $0x05, DI
|
|
JMP repeat_end_emit_encodeBlockAsm12BAvx
|
|
|
|
two_byte_offset_repeat_as_copy_encodeBlockAsm12BAvx:
|
|
CMPL BX, $0x40
|
|
JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm12BAvx
|
|
MOVB $0xee, (DI)
|
|
MOVW SI, 1(DI)
|
|
LEAQ -60(BX), BX
|
|
ADDQ $0x03, DI
|
|
|
|
emit_repeat_again_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short:
|
|
MOVQ BX, R8
|
|
LEAQ -4(BX), BX
|
|
CMPL R8, $0x08
|
|
JLE repeat_two_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short
|
|
CMPL R8, $0x0c
|
|
JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short
|
|
CMPL SI, $0x00000800
|
|
JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short
|
|
|
|
cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short:
|
|
CMPL BX, $0x00000104
|
|
JLT repeat_three_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short
|
|
CMPL BX, $0x00010100
|
|
JLT repeat_four_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short
|
|
CMPL BX, $0x0100ffff
|
|
JLT repeat_five_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short
|
|
LEAQ -16842747(BX), BX
|
|
MOVW $0x001d, (DI)
|
|
MOVW $0xfffb, 2(DI)
|
|
MOVB $0xff, 4(DI)
|
|
ADDQ $0x05, DI
|
|
JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short
|
|
|
|
repeat_five_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short:
|
|
LEAQ -65536(BX), BX
|
|
MOVQ BX, SI
|
|
MOVW $0x001d, (DI)
|
|
MOVW BX, 2(DI)
|
|
SARQ $0x10, SI
|
|
MOVB SI, 4(DI)
|
|
ADDQ $0x05, DI
|
|
JMP repeat_end_emit_encodeBlockAsm12BAvx
|
|
|
|
repeat_four_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short:
|
|
LEAQ -256(BX), BX
|
|
MOVW $0x0019, (DI)
|
|
MOVW BX, 2(DI)
|
|
ADDQ $0x04, DI
|
|
JMP repeat_end_emit_encodeBlockAsm12BAvx
|
|
|
|
repeat_three_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short:
|
|
LEAQ -4(BX), BX
|
|
MOVW $0x0015, (DI)
|
|
MOVB BL, 2(DI)
|
|
ADDQ $0x03, DI
|
|
JMP repeat_end_emit_encodeBlockAsm12BAvx
|
|
|
|
repeat_two_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short:
|
|
SHLL $0x02, BX
|
|
ORL $0x01, BX
|
|
MOVW BX, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP repeat_end_emit_encodeBlockAsm12BAvx
|
|
|
|
repeat_two_offset_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short:
|
|
XORQ R8, R8
|
|
LEAQ 1(R8)(BX*4), BX
|
|
MOVB SI, 1(DI)
|
|
SARL $0x08, SI
|
|
SHLL $0x05, SI
|
|
ORL SI, BX
|
|
MOVB BL, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP repeat_end_emit_encodeBlockAsm12BAvx
|
|
|
|
two_byte_offset_short_repeat_as_copy_encodeBlockAsm12BAvx:
|
|
CMPL BX, $0x0c
|
|
JGE emit_copy_three_repeat_as_copy_encodeBlockAsm12BAvx
|
|
CMPL SI, $0x00000800
|
|
JGE emit_copy_three_repeat_as_copy_encodeBlockAsm12BAvx
|
|
MOVB $0x01, DL
|
|
LEAQ -16(DX)(BX*4), BX
|
|
MOVB SI, 1(DI)
|
|
SHRL $0x08, SI
|
|
SHLL $0x05, SI
|
|
ORL SI, BX
|
|
MOVB BL, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP repeat_end_emit_encodeBlockAsm12BAvx
|
|
|
|
emit_copy_three_repeat_as_copy_encodeBlockAsm12BAvx:
|
|
MOVB $0x02, DL
|
|
LEAQ -4(DX)(BX*4), BX
|
|
MOVB BL, (DI)
|
|
MOVW SI, 1(DI)
|
|
ADDQ $0x03, DI
|
|
|
|
repeat_end_emit_encodeBlockAsm12BAvx:
|
|
MOVQ DI, dst_base+0(FP)
|
|
MOVL 16(SP), BX
|
|
CMPL AX, BX
|
|
JGT emit_remainder_encodeBlockAsm12BAvx
|
|
JMP search_loop_encodeBlockAsm12BAvx
|
|
|
|
no_repeat_found_encodeBlockAsm12BAvx:
|
|
MOVQ $0x0000cf1bbcdcbf9b, R9
|
|
MOVQ SI, R8
|
|
SHRQ $0x10, R8
|
|
SHLQ $0x10, R8
|
|
IMULQ R9, R8
|
|
SHRQ $0x34, R8
|
|
CMPL (CX)(BX*1), SI
|
|
SHRQ $0x08, SI
|
|
JEQ candidate_match_encodeBlockAsm12BAvx
|
|
MOVL 32(SP)(R8*1), BX
|
|
CMPL (CX)(DI*1), SI
|
|
JEQ candidate2_match_encodeBlockAsm12BAvx
|
|
LEAQ 2(AX), DI
|
|
MOVL DI, 32(SP)(R8*1)
|
|
SHRQ $0x08, SI
|
|
CMPL (CX)(BX*1), SI
|
|
JEQ candidate3_match_encodeBlockAsm12BAvx
|
|
MOVL 28(SP), AX
|
|
JMP search_loop_encodeBlockAsm12BAvx
|
|
|
|
candidate3_match_encodeBlockAsm12BAvx:
|
|
ADDL $0x02, AX
|
|
JMP candidate_match_encodeBlockAsm12BAvx
|
|
|
|
candidate2_match_encodeBlockAsm12BAvx:
|
|
LEAQ -2(AX), BX
|
|
MOVL BX, 32(SP)(R8*1)
|
|
INCL AX
|
|
MOVL DI, BX
|
|
|
|
candidate_match_encodeBlockAsm12BAvx:
|
|
MOVL 20(SP), SI
|
|
TESTL BX, BX
|
|
JZ match_extend_back_end_encodeBlockAsm12BAvx
|
|
|
|
match_extend_back_loop_encodeBlockAsm12BAvx:
|
|
CMPL AX, SI
|
|
JG match_extend_back_end_encodeBlockAsm12BAvx
|
|
MOVB -1(CX)(BX*1), DL
|
|
MOVB -1(CX)(AX*1), DI
|
|
CMPB DL, DI
|
|
JNE match_extend_back_end_encodeBlockAsm12BAvx
|
|
LEAL -1(AX), AX
|
|
DECL BX
|
|
JZ match_extend_back_end_encodeBlockAsm12BAvx
|
|
JMP match_extend_back_loop_encodeBlockAsm12BAvx
|
|
|
|
match_extend_back_end_encodeBlockAsm12BAvx:
|
|
MOVL AX, SI
|
|
SUBL 20(SP), SI
|
|
LEAQ dst_base+0(FP)(SI*1), SI
|
|
CMPQ SI, (SP)
|
|
JL match_dst_size_check_encodeBlockAsm12BAvx
|
|
MOVQ $0x00000000, ret+48(FP)
|
|
RET
|
|
|
|
match_dst_size_check_encodeBlockAsm12BAvx:
|
|
MOVL BX, SI
|
|
MOVL 20(SP), DI
|
|
CMPL DI, SI
|
|
JEQ emit_literal_skip_match_emit_encodeBlockAsm12BAvx
|
|
MOVL SI, R8
|
|
MOVL SI, 20(SP)
|
|
LEAQ (CX)(DI*1), SI
|
|
SUBL DI, R8
|
|
MOVQ dst_base+0(FP), DI
|
|
MOVQ R8, R9
|
|
SUBL $0x01, R9
|
|
JC emit_literal_done_match_emit_encodeBlockAsm12BAvx
|
|
CMPL R9, $0x3c
|
|
JLT one_byte_match_emit_encodeBlockAsm12BAvx
|
|
CMPL R9, $0x00000100
|
|
JLT two_bytes_match_emit_encodeBlockAsm12BAvx
|
|
CMPL R9, $0x00010000
|
|
JLT three_bytes_match_emit_encodeBlockAsm12BAvx
|
|
CMPL R9, $0x01000000
|
|
JLT four_bytes_match_emit_encodeBlockAsm12BAvx
|
|
MOVB $0xfc, (DI)
|
|
MOVL R9, 1(DI)
|
|
ADDQ $0x05, DI
|
|
JMP memmove_match_emit_encodeBlockAsm12BAvx
|
|
|
|
four_bytes_match_emit_encodeBlockAsm12BAvx:
|
|
MOVQ R9, R10
|
|
SHRL $0x10, R10
|
|
MOVB $0xf8, (DI)
|
|
MOVW R9, 1(DI)
|
|
MOVB R10, 3(DI)
|
|
ADDQ $0x04, DI
|
|
JMP memmove_match_emit_encodeBlockAsm12BAvx
|
|
|
|
three_bytes_match_emit_encodeBlockAsm12BAvx:
|
|
MOVB $0xf4, (DI)
|
|
MOVW R9, 1(DI)
|
|
ADDQ $0x03, DI
|
|
JMP memmove_match_emit_encodeBlockAsm12BAvx
|
|
|
|
two_bytes_match_emit_encodeBlockAsm12BAvx:
|
|
MOVB $0xf0, (DI)
|
|
MOVB R9, 1(DI)
|
|
ADDQ $0x02, DI
|
|
JMP memmove_match_emit_encodeBlockAsm12BAvx
|
|
|
|
one_byte_match_emit_encodeBlockAsm12BAvx:
|
|
SHLB $0x02, R9
|
|
MOVB R9, (DI)
|
|
ADDQ $0x01, DI
|
|
|
|
memmove_match_emit_encodeBlockAsm12BAvx:
|
|
LEAQ (DI)(R8*1), R9
|
|
NOP
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_tail:
|
|
TESTQ R8, R8
|
|
JEQ emit_literal_done_match_emit_encodeBlockAsm12BAvx
|
|
CMPQ R8, $0x02
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_1or2
|
|
CMPQ R8, $0x04
|
|
JB emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_3
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_4
|
|
CMPQ R8, $0x08
|
|
JB emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_5through7
|
|
JE emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_8
|
|
CMPQ R8, $0x10
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_9through16
|
|
CMPQ R8, $0x20
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_17through32
|
|
CMPQ R8, $0x40
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_33through64
|
|
CMPQ R8, $0x80
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_65through128
|
|
CMPQ R8, $0x00000100
|
|
JBE emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_129through256
|
|
JMP emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_avxUnaligned
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_1or2:
|
|
MOVB (SI), R9
|
|
MOVB -1(SI)(R8*1), R10
|
|
MOVB R9, (DI)
|
|
MOVB R10, -1(DI)(R8*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm12BAvx
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_4:
|
|
MOVL (SI), R9
|
|
MOVL R9, (DI)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm12BAvx
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_3:
|
|
MOVW (SI), R9
|
|
MOVB 2(SI), R10
|
|
MOVW R9, (DI)
|
|
MOVB R10, 2(DI)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm12BAvx
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_5through7:
|
|
MOVL (SI), R9
|
|
MOVL -4(SI)(R8*1), R10
|
|
MOVL R9, (DI)
|
|
MOVL R10, -4(DI)(R8*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm12BAvx
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_8:
|
|
MOVQ (SI), R9
|
|
MOVQ R9, (DI)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm12BAvx
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_9through16:
|
|
MOVQ (SI), R9
|
|
MOVQ -8(SI)(R8*1), R10
|
|
MOVQ R9, (DI)
|
|
MOVQ R10, -8(DI)(R8*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm12BAvx
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_17through32:
|
|
MOVOU (SI), X0
|
|
MOVOU -16(SI)(R8*1), X1
|
|
MOVOU X0, (DI)
|
|
MOVOU X1, -16(DI)(R8*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm12BAvx
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_33through64:
|
|
MOVOU (SI), X0
|
|
MOVOU 16(SI), X1
|
|
MOVOU -32(SI)(R8*1), X2
|
|
MOVOU -16(SI)(R8*1), X3
|
|
MOVOU X0, (DI)
|
|
MOVOU X1, 16(DI)
|
|
MOVOU X2, -32(DI)(R8*1)
|
|
MOVOU X3, -16(DI)(R8*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm12BAvx
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_65through128:
|
|
MOVOU (SI), X0
|
|
MOVOU 16(SI), X1
|
|
MOVOU 32(SI), X2
|
|
MOVOU 48(SI), X3
|
|
MOVOU -64(SI)(R8*1), X12
|
|
MOVOU -48(SI)(R8*1), X13
|
|
MOVOU -32(SI)(R8*1), X14
|
|
MOVOU -16(SI)(R8*1), X15
|
|
MOVOU X0, (DI)
|
|
MOVOU X1, 16(DI)
|
|
MOVOU X2, 32(DI)
|
|
MOVOU X3, 48(DI)
|
|
MOVOU X12, -64(DI)(R8*1)
|
|
MOVOU X13, -48(DI)(R8*1)
|
|
MOVOU X14, -32(DI)(R8*1)
|
|
MOVOU X15, -16(DI)(R8*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm12BAvx
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_129through256:
|
|
MOVOU (SI), X0
|
|
MOVOU 16(SI), X1
|
|
MOVOU 32(SI), X2
|
|
MOVOU 48(SI), X3
|
|
MOVOU 64(SI), X4
|
|
MOVOU 80(SI), X5
|
|
MOVOU 96(SI), X6
|
|
MOVOU 112(SI), X7
|
|
MOVOU -128(SI)(R8*1), X8
|
|
MOVOU -112(SI)(R8*1), X9
|
|
MOVOU -96(SI)(R8*1), X10
|
|
MOVOU -80(SI)(R8*1), X11
|
|
MOVOU -64(SI)(R8*1), X12
|
|
MOVOU -48(SI)(R8*1), X13
|
|
MOVOU -32(SI)(R8*1), X14
|
|
MOVOU -16(SI)(R8*1), X15
|
|
MOVOU X0, (DI)
|
|
MOVOU X1, 16(DI)
|
|
MOVOU X2, 32(DI)
|
|
MOVOU X3, 48(DI)
|
|
MOVOU X4, 64(DI)
|
|
MOVOU X5, 80(DI)
|
|
MOVOU X6, 96(DI)
|
|
MOVOU X7, 112(DI)
|
|
MOVOU X8, -128(DI)(R8*1)
|
|
MOVOU X9, -112(DI)(R8*1)
|
|
MOVOU X10, -96(DI)(R8*1)
|
|
MOVOU X11, -80(DI)(R8*1)
|
|
MOVOU X12, -64(DI)(R8*1)
|
|
MOVOU X13, -48(DI)(R8*1)
|
|
MOVOU X14, -32(DI)(R8*1)
|
|
MOVOU X15, -16(DI)(R8*1)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm12BAvx
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_256through2048:
|
|
LEAQ -256(R8), R8
|
|
MOVOU (SI), X0
|
|
MOVOU 16(SI), X1
|
|
MOVOU 32(SI), X2
|
|
MOVOU 48(SI), X3
|
|
MOVOU 64(SI), X4
|
|
MOVOU 80(SI), X5
|
|
MOVOU 96(SI), X6
|
|
MOVOU 112(SI), X7
|
|
MOVOU 128(SI), X8
|
|
MOVOU 144(SI), X9
|
|
MOVOU 160(SI), X10
|
|
MOVOU 176(SI), X11
|
|
MOVOU 192(SI), X12
|
|
MOVOU 208(SI), X13
|
|
MOVOU 224(SI), X14
|
|
MOVOU 240(SI), X15
|
|
MOVOU X0, (DI)
|
|
MOVOU X1, 16(DI)
|
|
MOVOU X2, 32(DI)
|
|
MOVOU X3, 48(DI)
|
|
MOVOU X4, 64(DI)
|
|
MOVOU X5, 80(DI)
|
|
MOVOU X6, 96(DI)
|
|
MOVOU X7, 112(DI)
|
|
MOVOU X8, 128(DI)
|
|
MOVOU X9, 144(DI)
|
|
MOVOU X10, 160(DI)
|
|
MOVOU X11, 176(DI)
|
|
MOVOU X12, 192(DI)
|
|
MOVOU X13, 208(DI)
|
|
MOVOU X14, 224(DI)
|
|
MOVOU X15, 240(DI)
|
|
CMPQ R8, $0x00000100
|
|
LEAQ 256(SI), SI
|
|
LEAQ 256(DI), DI
|
|
JGE emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_256through2048
|
|
JMP emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_tail
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_avxUnaligned:
|
|
LEAQ (SI)(R8*1), R10
|
|
MOVQ DI, R12
|
|
MOVOU -128(R10), X5
|
|
MOVOU -112(R10), X6
|
|
MOVQ $0x00000080, R9
|
|
ANDQ $0xffffffe0, DI
|
|
ADDQ $0x20, DI
|
|
MOVOU -96(R10), X7
|
|
MOVOU -80(R10), X8
|
|
MOVQ DI, R11
|
|
SUBQ R12, R11
|
|
MOVOU -64(R10), X9
|
|
MOVOU -48(R10), X10
|
|
SUBQ R11, R8
|
|
MOVOU -32(R10), X11
|
|
MOVOU -16(R10), X12
|
|
VMOVDQU (SI), Y4
|
|
ADDQ R11, SI
|
|
SUBQ R9, R8
|
|
|
|
emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_gobble_128_loop:
|
|
VMOVDQU (SI), Y0
|
|
VMOVDQU 32(SI), Y1
|
|
VMOVDQU 64(SI), Y2
|
|
VMOVDQU 96(SI), Y3
|
|
ADDQ R9, SI
|
|
VMOVDQA Y0, (DI)
|
|
VMOVDQA Y1, 32(DI)
|
|
VMOVDQA Y2, 64(DI)
|
|
VMOVDQA Y3, 96(DI)
|
|
ADDQ R9, DI
|
|
SUBQ R9, R8
|
|
JA emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_gobble_128_loop
|
|
ADDQ R9, R8
|
|
ADDQ DI, R8
|
|
VMOVDQU Y4, (R12)
|
|
VZEROUPPER
|
|
MOVOU X5, -128(R8)
|
|
MOVOU X6, -112(R8)
|
|
MOVOU X7, -96(R8)
|
|
MOVOU X8, -80(R8)
|
|
MOVOU X9, -64(R8)
|
|
MOVOU X10, -48(R8)
|
|
MOVOU X11, -32(R8)
|
|
MOVOU X12, -16(R8)
|
|
JMP emit_literal_done_match_emit_encodeBlockAsm12BAvx
|
|
MOVQ R9, DI
|
|
|
|
emit_literal_done_match_emit_encodeBlockAsm12BAvx:
|
|
MOVQ DI, dst_base+0(FP)
|
|
|
|
emit_literal_skip_match_emit_encodeBlockAsm12BAvx:
|
|
NOP
|
|
|
|
match_nolit_loop_encodeBlockAsm12BAvx:
|
|
MOVL AX, SI
|
|
MOVL AX, SI
|
|
SUBL BX, SI
|
|
MOVL SI, 24(SP)
|
|
ADDL $0x04, AX
|
|
ADDL $0x04, BX
|
|
MOVL 16(SP), SI
|
|
SUBL AX, SI
|
|
XORQ R8, R8
|
|
CMPQ SI, $0x08
|
|
JL matchlen_single_match_nolit_encodeBlockAsm12BAvx
|
|
|
|
matchlen_loopback_match_nolit_encodeBlockAsm12BAvx:
|
|
MOVQ (CX)(R8*1), DI
|
|
XORQ (CX)(R8*1), DI
|
|
TESTQ DI, DI
|
|
JZ matchlen_loop_match_nolit_encodeBlockAsm12BAvx
|
|
BSFQ DI, DI
|
|
SARQ $0x03, DI
|
|
LEAQ (R8)(DI*1), R8
|
|
JMP match_nolit_end_encodeBlockAsm12BAvx
|
|
|
|
matchlen_loop_match_nolit_encodeBlockAsm12BAvx:
|
|
LEAQ -8(SI), SI
|
|
LEAQ 8(R8), R8
|
|
CMPQ SI, $0x08
|
|
JGE matchlen_loopback_match_nolit_encodeBlockAsm12BAvx
|
|
|
|
matchlen_single_match_nolit_encodeBlockAsm12BAvx:
|
|
TESTQ SI, SI
|
|
JZ match_nolit_end_encodeBlockAsm12BAvx
|
|
|
|
matchlen_single_loopback_match_nolit_encodeBlockAsm12BAvx:
|
|
MOVB (CX)(R8*1), DI
|
|
CMPB (CX)(R8*1), DI
|
|
JNE match_nolit_end_encodeBlockAsm12BAvx
|
|
LEAQ 1(R8), R8
|
|
DECQ SI
|
|
JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm12BAvx
|
|
|
|
match_nolit_end_encodeBlockAsm12BAvx:
|
|
MOVL 24(SP), SI
|
|
ADDQ $0x04, R8
|
|
MOVQ dst_base+0(FP), DI
|
|
ADDL R8, AX
|
|
CMPL SI, $0x00010000
|
|
JL two_byte_offset_match_nolit_encodeBlockAsm12BAvx
|
|
CMPL R8, $0x40
|
|
JLE four_bytes_remain_match_nolit_encodeBlockAsm12BAvx
|
|
MOVB $0xff, (DI)
|
|
MOVD SI, 1(DI)
|
|
LEAQ -64(R8), R8
|
|
ADDQ $0x05, DI
|
|
CMPL R8, $0x04
|
|
JL four_bytes_remain_match_nolit_encodeBlockAsm12BAvx
|
|
|
|
emit_repeat_again_match_nolit_encodeBlockAsm12BAvx_emit_copy:
|
|
MOVQ R8, R9
|
|
LEAQ -4(R8), R8
|
|
CMPL R9, $0x08
|
|
JLE repeat_two_match_nolit_encodeBlockAsm12BAvx_emit_copy
|
|
CMPL R9, $0x0c
|
|
JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm12BAvx_emit_copy
|
|
CMPL SI, $0x00000800
|
|
JLT repeat_two_offset_match_nolit_encodeBlockAsm12BAvx_emit_copy
|
|
|
|
cant_repeat_two_offset_match_nolit_encodeBlockAsm12BAvx_emit_copy:
|
|
CMPL R8, $0x00000104
|
|
JLT repeat_three_match_nolit_encodeBlockAsm12BAvx_emit_copy
|
|
CMPL R8, $0x00010100
|
|
JLT repeat_four_match_nolit_encodeBlockAsm12BAvx_emit_copy
|
|
CMPL R8, $0x0100ffff
|
|
JLT repeat_five_match_nolit_encodeBlockAsm12BAvx_emit_copy
|
|
LEAQ -16842747(R8), R8
|
|
MOVW $0x001d, (DI)
|
|
MOVW $0xfffb, 2(DI)
|
|
MOVB $0xff, 4(DI)
|
|
ADDQ $0x05, DI
|
|
JMP emit_repeat_again_match_nolit_encodeBlockAsm12BAvx_emit_copy
|
|
|
|
repeat_five_match_nolit_encodeBlockAsm12BAvx_emit_copy:
|
|
LEAQ -65536(R8), R8
|
|
MOVQ R8, SI
|
|
MOVW $0x001d, (DI)
|
|
MOVW R8, 2(DI)
|
|
SARQ $0x10, SI
|
|
MOVB SI, 4(DI)
|
|
ADDQ $0x05, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx
|
|
|
|
repeat_four_match_nolit_encodeBlockAsm12BAvx_emit_copy:
|
|
LEAQ -256(R8), R8
|
|
MOVW $0x0019, (DI)
|
|
MOVW R8, 2(DI)
|
|
ADDQ $0x04, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx
|
|
|
|
repeat_three_match_nolit_encodeBlockAsm12BAvx_emit_copy:
|
|
LEAQ -4(R8), R8
|
|
MOVW $0x0015, (DI)
|
|
MOVB R8, 2(DI)
|
|
ADDQ $0x03, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx
|
|
|
|
repeat_two_match_nolit_encodeBlockAsm12BAvx_emit_copy:
|
|
SHLL $0x02, R8
|
|
ORL $0x01, R8
|
|
MOVW R8, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx
|
|
|
|
repeat_two_offset_match_nolit_encodeBlockAsm12BAvx_emit_copy:
|
|
XORQ R9, R9
|
|
LEAQ 1(R9)(R8*4), R8
|
|
MOVB SI, 1(DI)
|
|
SARL $0x08, SI
|
|
SHLL $0x05, SI
|
|
ORL SI, R8
|
|
MOVB R8, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx
|
|
|
|
four_bytes_remain_match_nolit_encodeBlockAsm12BAvx:
|
|
TESTL R8, R8
|
|
JZ match_nolit_emitcopy_end_encodeBlockAsm12BAvx
|
|
MOVB $0x03, DL
|
|
LEAQ -4(DX)(R8*4), R8
|
|
MOVB R8, (DI)
|
|
MOVD SI, 1(DI)
|
|
ADDQ $0x05, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx
|
|
|
|
two_byte_offset_match_nolit_encodeBlockAsm12BAvx:
|
|
CMPL R8, $0x40
|
|
JLE two_byte_offset_short_match_nolit_encodeBlockAsm12BAvx
|
|
MOVB $0xee, (DI)
|
|
MOVW SI, 1(DI)
|
|
LEAQ -60(R8), R8
|
|
ADDQ $0x03, DI
|
|
|
|
emit_repeat_again_match_nolit_encodeBlockAsm12BAvx_emit_copy_short:
|
|
MOVQ R8, R9
|
|
LEAQ -4(R8), R8
|
|
CMPL R9, $0x08
|
|
JLE repeat_two_match_nolit_encodeBlockAsm12BAvx_emit_copy_short
|
|
CMPL R9, $0x0c
|
|
JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm12BAvx_emit_copy_short
|
|
CMPL SI, $0x00000800
|
|
JLT repeat_two_offset_match_nolit_encodeBlockAsm12BAvx_emit_copy_short
|
|
|
|
cant_repeat_two_offset_match_nolit_encodeBlockAsm12BAvx_emit_copy_short:
|
|
CMPL R8, $0x00000104
|
|
JLT repeat_three_match_nolit_encodeBlockAsm12BAvx_emit_copy_short
|
|
CMPL R8, $0x00010100
|
|
JLT repeat_four_match_nolit_encodeBlockAsm12BAvx_emit_copy_short
|
|
CMPL R8, $0x0100ffff
|
|
JLT repeat_five_match_nolit_encodeBlockAsm12BAvx_emit_copy_short
|
|
LEAQ -16842747(R8), R8
|
|
MOVW $0x001d, (DI)
|
|
MOVW $0xfffb, 2(DI)
|
|
MOVB $0xff, 4(DI)
|
|
ADDQ $0x05, DI
|
|
JMP emit_repeat_again_match_nolit_encodeBlockAsm12BAvx_emit_copy_short
|
|
|
|
repeat_five_match_nolit_encodeBlockAsm12BAvx_emit_copy_short:
|
|
LEAQ -65536(R8), R8
|
|
MOVQ R8, SI
|
|
MOVW $0x001d, (DI)
|
|
MOVW R8, 2(DI)
|
|
SARQ $0x10, SI
|
|
MOVB SI, 4(DI)
|
|
ADDQ $0x05, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx
|
|
|
|
repeat_four_match_nolit_encodeBlockAsm12BAvx_emit_copy_short:
|
|
LEAQ -256(R8), R8
|
|
MOVW $0x0019, (DI)
|
|
MOVW R8, 2(DI)
|
|
ADDQ $0x04, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx
|
|
|
|
repeat_three_match_nolit_encodeBlockAsm12BAvx_emit_copy_short:
|
|
LEAQ -4(R8), R8
|
|
MOVW $0x0015, (DI)
|
|
MOVB R8, 2(DI)
|
|
ADDQ $0x03, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx
|
|
|
|
repeat_two_match_nolit_encodeBlockAsm12BAvx_emit_copy_short:
|
|
SHLL $0x02, R8
|
|
ORL $0x01, R8
|
|
MOVW R8, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx
|
|
|
|
repeat_two_offset_match_nolit_encodeBlockAsm12BAvx_emit_copy_short:
|
|
XORQ R9, R9
|
|
LEAQ 1(R9)(R8*4), R8
|
|
MOVB SI, 1(DI)
|
|
SARL $0x08, SI
|
|
SHLL $0x05, SI
|
|
ORL SI, R8
|
|
MOVB R8, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx
|
|
|
|
two_byte_offset_short_match_nolit_encodeBlockAsm12BAvx:
|
|
CMPL R8, $0x0c
|
|
JGE emit_copy_three_match_nolit_encodeBlockAsm12BAvx
|
|
CMPL SI, $0x00000800
|
|
JGE emit_copy_three_match_nolit_encodeBlockAsm12BAvx
|
|
MOVB $0x01, DL
|
|
LEAQ -16(DX)(R8*4), R8
|
|
MOVB SI, 1(DI)
|
|
SHRL $0x08, SI
|
|
SHLL $0x05, SI
|
|
ORL SI, R8
|
|
MOVB R8, (DI)
|
|
ADDQ $0x02, DI
|
|
JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx
|
|
|
|
emit_copy_three_match_nolit_encodeBlockAsm12BAvx:
|
|
MOVB $0x02, DL
|
|
LEAQ -4(DX)(R8*4), R8
|
|
MOVB R8, (DI)
|
|
MOVW SI, 1(DI)
|
|
ADDQ $0x03, DI
|
|
|
|
match_nolit_emitcopy_end_encodeBlockAsm12BAvx:
|
|
MOVQ DI, dst_base+0(FP)
|
|
MOVL AX, 20(SP)
|
|
CMPL AX, 16(SP)
|
|
JGE emit_remainder_encodeBlockAsm12BAvx
|
|
CMPQ DI, (SP)
|
|
JL match_nolit_dst_ok_encodeBlockAsm12BAvx
|
|
MOVQ $0x00000000, ret+48(FP)
|
|
RET
|
|
|
|
match_nolit_dst_ok_encodeBlockAsm12BAvx:
|
|
MOVQ -2(CX)(AX*1), SI
|
|
MOVQ $0x0000cf1bbcdcbf9b, DI
|
|
MOVQ SI, R8
|
|
SHRQ $0x10, SI
|
|
MOVQ SI, R9
|
|
SHLQ $0x10, R8
|
|
IMULQ DI, R8
|
|
SHRQ $0x34, R8
|
|
SHLQ $0x10, R9
|
|
IMULQ DI, R9
|
|
SHRQ $0x34, R9
|
|
MOVL 32(SP)(R8*1), DI
|
|
MOVL 32(SP)(R9*1), DI
|
|
LEAQ -2(AX), DI
|
|
MOVL DI, 32(SP)(R8*1)
|
|
MOVL AX, 32(SP)(R9*1)
|
|
CMPL (CX)(R9*1), SI
|
|
JEQ match_nolit_loop_encodeBlockAsm12BAvx
|
|
INCL AX
|
|
JMP search_loop_encodeBlockAsm12BAvx
|
|
|
|
emit_remainder_encodeBlockAsm12BAvx:
|
|
MOVQ src_len+32(FP), AX
|
|
SUBL 20(SP), AX
|
|
MOVQ dst_base+0(FP), DX
|
|
LEAQ (DX)(AX*1), DX
|
|
CMPQ DX, (SP)
|
|
JL emit_remainder_ok_encodeBlockAsm12BAvx
|
|
MOVQ $0x00000000, ret+48(FP)
|
|
RET
|
|
|
|
emit_remainder_ok_encodeBlockAsm12BAvx:
|
|
MOVQ src_len+32(FP), AX
|
|
MOVL 20(SP), DX
|
|
CMPL DX, AX
|
|
JEQ emit_literal_skip_emit_remainder_encodeBlockAsm12BAvx
|
|
MOVL AX, BX
|
|
MOVL AX, 20(SP)
|
|
LEAQ (CX)(DX*1), AX
|
|
SUBL DX, BX
|
|
MOVQ dst_base+0(FP), CX
|
|
MOVQ BX, DX
|
|
SUBL $0x01, DX
|
|
JC emit_literal_done_emit_remainder_encodeBlockAsm12BAvx
|
|
CMPL DX, $0x3c
|
|
JLT one_byte_emit_remainder_encodeBlockAsm12BAvx
|
|
CMPL DX, $0x00000100
|
|
JLT two_bytes_emit_remainder_encodeBlockAsm12BAvx
|
|
CMPL DX, $0x00010000
|
|
JLT three_bytes_emit_remainder_encodeBlockAsm12BAvx
|
|
CMPL DX, $0x01000000
|
|
JLT four_bytes_emit_remainder_encodeBlockAsm12BAvx
|
|
MOVB $0xfc, (CX)
|
|
MOVL DX, 1(CX)
|
|
ADDQ $0x05, CX
|
|
JMP memmove_emit_remainder_encodeBlockAsm12BAvx
|
|
|
|
four_bytes_emit_remainder_encodeBlockAsm12BAvx:
|
|
MOVQ DX, SI
|
|
SHRL $0x10, SI
|
|
MOVB $0xf8, (CX)
|
|
MOVW DX, 1(CX)
|
|
MOVB SI, 3(CX)
|
|
ADDQ $0x04, CX
|
|
JMP memmove_emit_remainder_encodeBlockAsm12BAvx
|
|
|
|
three_bytes_emit_remainder_encodeBlockAsm12BAvx:
|
|
MOVB $0xf4, (CX)
|
|
MOVW DX, 1(CX)
|
|
ADDQ $0x03, CX
|
|
JMP memmove_emit_remainder_encodeBlockAsm12BAvx
|
|
|
|
two_bytes_emit_remainder_encodeBlockAsm12BAvx:
|
|
MOVB $0xf0, (CX)
|
|
MOVB DL, 1(CX)
|
|
ADDQ $0x02, CX
|
|
JMP memmove_emit_remainder_encodeBlockAsm12BAvx
|
|
|
|
one_byte_emit_remainder_encodeBlockAsm12BAvx:
|
|
SHLB $0x02, DL
|
|
MOVB DL, (CX)
|
|
ADDQ $0x01, CX
|
|
|
|
memmove_emit_remainder_encodeBlockAsm12BAvx:
|
|
LEAQ (CX)(BX*1), DX
|
|
NOP
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_tail:
|
|
TESTQ BX, BX
|
|
JEQ emit_literal_done_emit_remainder_encodeBlockAsm12BAvx
|
|
CMPQ BX, $0x02
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_1or2
|
|
CMPQ BX, $0x04
|
|
JB emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_3
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_4
|
|
CMPQ BX, $0x08
|
|
JB emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_5through7
|
|
JE emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_8
|
|
CMPQ BX, $0x10
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_9through16
|
|
CMPQ BX, $0x20
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_17through32
|
|
CMPQ BX, $0x40
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_33through64
|
|
CMPQ BX, $0x80
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_65through128
|
|
CMPQ BX, $0x00000100
|
|
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_129through256
|
|
JMP emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_avxUnaligned
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_1or2:
|
|
MOVB (AX), DL
|
|
MOVB -1(AX)(BX*1), SI
|
|
MOVB DL, (CX)
|
|
MOVB SI, -1(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm12BAvx
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_4:
|
|
MOVL (AX), DX
|
|
MOVL DX, (CX)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm12BAvx
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_3:
|
|
MOVW (AX), DX
|
|
MOVB 2(AX), SI
|
|
MOVW DX, (CX)
|
|
MOVB SI, 2(CX)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm12BAvx
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_5through7:
|
|
MOVL (AX), DX
|
|
MOVL -4(AX)(BX*1), SI
|
|
MOVL DX, (CX)
|
|
MOVL SI, -4(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm12BAvx
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_8:
|
|
MOVQ (AX), DX
|
|
MOVQ DX, (CX)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm12BAvx
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_9through16:
|
|
MOVQ (AX), DX
|
|
MOVQ -8(AX)(BX*1), SI
|
|
MOVQ DX, (CX)
|
|
MOVQ SI, -8(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm12BAvx
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_17through32:
|
|
MOVOU (AX), X0
|
|
MOVOU -16(AX)(BX*1), X1
|
|
MOVOU X0, (CX)
|
|
MOVOU X1, -16(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm12BAvx
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_33through64:
|
|
MOVOU (AX), X0
|
|
MOVOU 16(AX), X1
|
|
MOVOU -32(AX)(BX*1), X2
|
|
MOVOU -16(AX)(BX*1), X3
|
|
MOVOU X0, (CX)
|
|
MOVOU X1, 16(CX)
|
|
MOVOU X2, -32(CX)(BX*1)
|
|
MOVOU X3, -16(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm12BAvx
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_65through128:
|
|
MOVOU (AX), X0
|
|
MOVOU 16(AX), X1
|
|
MOVOU 32(AX), X2
|
|
MOVOU 48(AX), X3
|
|
MOVOU -64(AX)(BX*1), X12
|
|
MOVOU -48(AX)(BX*1), X13
|
|
MOVOU -32(AX)(BX*1), X14
|
|
MOVOU -16(AX)(BX*1), X15
|
|
MOVOU X0, (CX)
|
|
MOVOU X1, 16(CX)
|
|
MOVOU X2, 32(CX)
|
|
MOVOU X3, 48(CX)
|
|
MOVOU X12, -64(CX)(BX*1)
|
|
MOVOU X13, -48(CX)(BX*1)
|
|
MOVOU X14, -32(CX)(BX*1)
|
|
MOVOU X15, -16(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm12BAvx
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_129through256:
|
|
MOVOU (AX), X0
|
|
MOVOU 16(AX), X1
|
|
MOVOU 32(AX), X2
|
|
MOVOU 48(AX), X3
|
|
MOVOU 64(AX), X4
|
|
MOVOU 80(AX), X5
|
|
MOVOU 96(AX), X6
|
|
MOVOU 112(AX), X7
|
|
MOVOU -128(AX)(BX*1), X8
|
|
MOVOU -112(AX)(BX*1), X9
|
|
MOVOU -96(AX)(BX*1), X10
|
|
MOVOU -80(AX)(BX*1), X11
|
|
MOVOU -64(AX)(BX*1), X12
|
|
MOVOU -48(AX)(BX*1), X13
|
|
MOVOU -32(AX)(BX*1), X14
|
|
MOVOU -16(AX)(BX*1), X15
|
|
MOVOU X0, (CX)
|
|
MOVOU X1, 16(CX)
|
|
MOVOU X2, 32(CX)
|
|
MOVOU X3, 48(CX)
|
|
MOVOU X4, 64(CX)
|
|
MOVOU X5, 80(CX)
|
|
MOVOU X6, 96(CX)
|
|
MOVOU X7, 112(CX)
|
|
MOVOU X8, -128(CX)(BX*1)
|
|
MOVOU X9, -112(CX)(BX*1)
|
|
MOVOU X10, -96(CX)(BX*1)
|
|
MOVOU X11, -80(CX)(BX*1)
|
|
MOVOU X12, -64(CX)(BX*1)
|
|
MOVOU X13, -48(CX)(BX*1)
|
|
MOVOU X14, -32(CX)(BX*1)
|
|
MOVOU X15, -16(CX)(BX*1)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm12BAvx
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_256through2048:
|
|
LEAQ -256(BX), BX
|
|
MOVOU (AX), X0
|
|
MOVOU 16(AX), X1
|
|
MOVOU 32(AX), X2
|
|
MOVOU 48(AX), X3
|
|
MOVOU 64(AX), X4
|
|
MOVOU 80(AX), X5
|
|
MOVOU 96(AX), X6
|
|
MOVOU 112(AX), X7
|
|
MOVOU 128(AX), X8
|
|
MOVOU 144(AX), X9
|
|
MOVOU 160(AX), X10
|
|
MOVOU 176(AX), X11
|
|
MOVOU 192(AX), X12
|
|
MOVOU 208(AX), X13
|
|
MOVOU 224(AX), X14
|
|
MOVOU 240(AX), X15
|
|
MOVOU X0, (CX)
|
|
MOVOU X1, 16(CX)
|
|
MOVOU X2, 32(CX)
|
|
MOVOU X3, 48(CX)
|
|
MOVOU X4, 64(CX)
|
|
MOVOU X5, 80(CX)
|
|
MOVOU X6, 96(CX)
|
|
MOVOU X7, 112(CX)
|
|
MOVOU X8, 128(CX)
|
|
MOVOU X9, 144(CX)
|
|
MOVOU X10, 160(CX)
|
|
MOVOU X11, 176(CX)
|
|
MOVOU X12, 192(CX)
|
|
MOVOU X13, 208(CX)
|
|
MOVOU X14, 224(CX)
|
|
MOVOU X15, 240(CX)
|
|
CMPQ BX, $0x00000100
|
|
LEAQ 256(AX), AX
|
|
LEAQ 256(CX), CX
|
|
JGE emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_256through2048
|
|
JMP emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_tail
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_avxUnaligned:
|
|
LEAQ (AX)(BX*1), SI
|
|
MOVQ CX, R8
|
|
MOVOU -128(SI), X5
|
|
MOVOU -112(SI), X6
|
|
MOVQ $0x00000080, DX
|
|
ANDQ $0xffffffe0, CX
|
|
ADDQ $0x20, CX
|
|
MOVOU -96(SI), X7
|
|
MOVOU -80(SI), X8
|
|
MOVQ CX, DI
|
|
SUBQ R8, DI
|
|
MOVOU -64(SI), X9
|
|
MOVOU -48(SI), X10
|
|
SUBQ DI, BX
|
|
MOVOU -32(SI), X11
|
|
MOVOU -16(SI), X12
|
|
VMOVDQU (AX), Y4
|
|
ADDQ DI, AX
|
|
SUBQ DX, BX
|
|
|
|
emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_gobble_128_loop:
|
|
VMOVDQU (AX), Y0
|
|
VMOVDQU 32(AX), Y1
|
|
VMOVDQU 64(AX), Y2
|
|
VMOVDQU 96(AX), Y3
|
|
ADDQ DX, AX
|
|
VMOVDQA Y0, (CX)
|
|
VMOVDQA Y1, 32(CX)
|
|
VMOVDQA Y2, 64(CX)
|
|
VMOVDQA Y3, 96(CX)
|
|
ADDQ DX, CX
|
|
SUBQ DX, BX
|
|
JA emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_gobble_128_loop
|
|
ADDQ DX, BX
|
|
ADDQ CX, BX
|
|
VMOVDQU Y4, (R8)
|
|
VZEROUPPER
|
|
MOVOU X5, -128(BX)
|
|
MOVOU X6, -112(BX)
|
|
MOVOU X7, -96(BX)
|
|
MOVOU X8, -80(BX)
|
|
MOVOU X9, -64(BX)
|
|
MOVOU X10, -48(BX)
|
|
MOVOU X11, -32(BX)
|
|
MOVOU X12, -16(BX)
|
|
JMP emit_literal_done_emit_remainder_encodeBlockAsm12BAvx
|
|
MOVQ DX, CX
|
|
|
|
emit_literal_done_emit_remainder_encodeBlockAsm12BAvx:
|
|
MOVQ CX, dst_base+0(FP)
|
|
|
|
emit_literal_skip_emit_remainder_encodeBlockAsm12BAvx:
|
|
MOVQ 8(SP), AX
|
|
SUBQ dst_base+0(FP), AX
|
|
MOVQ AX, ret+48(FP)
|
|
RET
|
|
|
|
// func emitLiteral(dst []byte, lit []byte) int
|
|
// Requires: SSE2
|
|
TEXT ·emitLiteral(SB), NOSPLIT, $0-56
|
|
MOVQ dst_base+0(FP), AX
|
|
MOVQ lit_base+24(FP), CX
|
|
MOVQ lit_len+32(FP), DX
|
|
MOVQ DX, BX
|
|
MOVQ DX, SI
|
|
SUBL $0x01, SI
|
|
JC emit_literal_end_standalone
|
|
CMPL SI, $0x3c
|
|
JLT one_byte_standalone
|
|
CMPL SI, $0x00000100
|
|
JLT two_bytes_standalone
|
|
CMPL SI, $0x00010000
|
|
JLT three_bytes_standalone
|
|
CMPL SI, $0x01000000
|
|
JLT four_bytes_standalone
|
|
MOVB $0xfc, (AX)
|
|
MOVL SI, 1(AX)
|
|
ADDQ $0x05, BX
|
|
ADDQ $0x05, AX
|
|
JMP memmove_standalone
|
|
|
|
four_bytes_standalone:
|
|
MOVQ SI, DI
|
|
SHRL $0x10, DI
|
|
MOVB $0xf8, (AX)
|
|
MOVW SI, 1(AX)
|
|
MOVB DI, 3(AX)
|
|
ADDQ $0x04, BX
|
|
ADDQ $0x04, AX
|
|
JMP memmove_standalone
|
|
|
|
three_bytes_standalone:
|
|
MOVB $0xf4, (AX)
|
|
MOVW SI, 1(AX)
|
|
ADDQ $0x03, BX
|
|
ADDQ $0x03, AX
|
|
JMP memmove_standalone
|
|
|
|
two_bytes_standalone:
|
|
MOVB $0xf0, (AX)
|
|
MOVB SI, 1(AX)
|
|
ADDQ $0x02, BX
|
|
ADDQ $0x02, AX
|
|
JMP memmove_standalone
|
|
|
|
one_byte_standalone:
|
|
SHLB $0x02, SI
|
|
MOVB SI, (AX)
|
|
ADDQ $0x01, BX
|
|
ADDQ $0x01, AX
|
|
|
|
memmove_standalone:
|
|
NOP
|
|
|
|
emit_lit_memmove_standalone_memmove_tail:
|
|
TESTQ DX, DX
|
|
JEQ emit_literal_end_standalone
|
|
CMPQ DX, $0x02
|
|
JBE emit_lit_memmove_standalone_memmove_move_1or2
|
|
CMPQ DX, $0x04
|
|
JB emit_lit_memmove_standalone_memmove_move_3
|
|
JBE emit_lit_memmove_standalone_memmove_move_4
|
|
CMPQ DX, $0x08
|
|
JB emit_lit_memmove_standalone_memmove_move_5through7
|
|
JE emit_lit_memmove_standalone_memmove_move_8
|
|
CMPQ DX, $0x10
|
|
JBE emit_lit_memmove_standalone_memmove_move_9through16
|
|
CMPQ DX, $0x20
|
|
JBE emit_lit_memmove_standalone_memmove_move_17through32
|
|
CMPQ DX, $0x40
|
|
JBE emit_lit_memmove_standalone_memmove_move_33through64
|
|
CMPQ DX, $0x80
|
|
JBE emit_lit_memmove_standalone_memmove_move_65through128
|
|
CMPQ DX, $0x00000100
|
|
JBE emit_lit_memmove_standalone_memmove_move_129through256
|
|
JMP emit_lit_memmove_standalone_memmove_move_256through2048
|
|
|
|
emit_lit_memmove_standalone_memmove_move_1or2:
|
|
MOVB (CX), SI
|
|
MOVB -1(CX)(DX*1), CL
|
|
MOVB SI, (AX)
|
|
MOVB CL, -1(AX)(DX*1)
|
|
JMP emit_literal_end_standalone
|
|
|
|
emit_lit_memmove_standalone_memmove_move_4:
|
|
MOVL (CX), SI
|
|
MOVL SI, (AX)
|
|
JMP emit_literal_end_standalone
|
|
|
|
emit_lit_memmove_standalone_memmove_move_3:
|
|
MOVW (CX), SI
|
|
MOVB 2(CX), CL
|
|
MOVW SI, (AX)
|
|
MOVB CL, 2(AX)
|
|
JMP emit_literal_end_standalone
|
|
|
|
emit_lit_memmove_standalone_memmove_move_5through7:
|
|
MOVL (CX), SI
|
|
MOVL -4(CX)(DX*1), CX
|
|
MOVL SI, (AX)
|
|
MOVL CX, -4(AX)(DX*1)
|
|
JMP emit_literal_end_standalone
|
|
|
|
emit_lit_memmove_standalone_memmove_move_8:
|
|
MOVQ (CX), SI
|
|
MOVQ SI, (AX)
|
|
JMP emit_literal_end_standalone
|
|
|
|
emit_lit_memmove_standalone_memmove_move_9through16:
|
|
MOVQ (CX), SI
|
|
MOVQ -8(CX)(DX*1), CX
|
|
MOVQ SI, (AX)
|
|
MOVQ CX, -8(AX)(DX*1)
|
|
JMP emit_literal_end_standalone
|
|
|
|
emit_lit_memmove_standalone_memmove_move_17through32:
|
|
MOVOU (CX), X0
|
|
MOVOU -16(CX)(DX*1), X1
|
|
MOVOU X0, (AX)
|
|
MOVOU X1, -16(AX)(DX*1)
|
|
JMP emit_literal_end_standalone
|
|
|
|
emit_lit_memmove_standalone_memmove_move_33through64:
|
|
MOVOU (CX), X0
|
|
MOVOU 16(CX), X1
|
|
MOVOU -32(CX)(DX*1), X2
|
|
MOVOU -16(CX)(DX*1), X3
|
|
MOVOU X0, (AX)
|
|
MOVOU X1, 16(AX)
|
|
MOVOU X2, -32(AX)(DX*1)
|
|
MOVOU X3, -16(AX)(DX*1)
|
|
JMP emit_literal_end_standalone
|
|
|
|
emit_lit_memmove_standalone_memmove_move_65through128:
|
|
MOVOU (CX), X0
|
|
MOVOU 16(CX), X1
|
|
MOVOU 32(CX), X2
|
|
MOVOU 48(CX), X3
|
|
MOVOU -64(CX)(DX*1), X12
|
|
MOVOU -48(CX)(DX*1), X13
|
|
MOVOU -32(CX)(DX*1), X14
|
|
MOVOU -16(CX)(DX*1), X15
|
|
MOVOU X0, (AX)
|
|
MOVOU X1, 16(AX)
|
|
MOVOU X2, 32(AX)
|
|
MOVOU X3, 48(AX)
|
|
MOVOU X12, -64(AX)(DX*1)
|
|
MOVOU X13, -48(AX)(DX*1)
|
|
MOVOU X14, -32(AX)(DX*1)
|
|
MOVOU X15, -16(AX)(DX*1)
|
|
JMP emit_literal_end_standalone
|
|
|
|
emit_lit_memmove_standalone_memmove_move_129through256:
|
|
MOVOU (CX), X0
|
|
MOVOU 16(CX), X1
|
|
MOVOU 32(CX), X2
|
|
MOVOU 48(CX), X3
|
|
MOVOU 64(CX), X4
|
|
MOVOU 80(CX), X5
|
|
MOVOU 96(CX), X6
|
|
MOVOU 112(CX), X7
|
|
MOVOU -128(CX)(DX*1), X8
|
|
MOVOU -112(CX)(DX*1), X9
|
|
MOVOU -96(CX)(DX*1), X10
|
|
MOVOU -80(CX)(DX*1), X11
|
|
MOVOU -64(CX)(DX*1), X12
|
|
MOVOU -48(CX)(DX*1), X13
|
|
MOVOU -32(CX)(DX*1), X14
|
|
MOVOU -16(CX)(DX*1), X15
|
|
MOVOU X0, (AX)
|
|
MOVOU X1, 16(AX)
|
|
MOVOU X2, 32(AX)
|
|
MOVOU X3, 48(AX)
|
|
MOVOU X4, 64(AX)
|
|
MOVOU X5, 80(AX)
|
|
MOVOU X6, 96(AX)
|
|
MOVOU X7, 112(AX)
|
|
MOVOU X8, -128(AX)(DX*1)
|
|
MOVOU X9, -112(AX)(DX*1)
|
|
MOVOU X10, -96(AX)(DX*1)
|
|
MOVOU X11, -80(AX)(DX*1)
|
|
MOVOU X12, -64(AX)(DX*1)
|
|
MOVOU X13, -48(AX)(DX*1)
|
|
MOVOU X14, -32(AX)(DX*1)
|
|
MOVOU X15, -16(AX)(DX*1)
|
|
JMP emit_literal_end_standalone
|
|
|
|
emit_lit_memmove_standalone_memmove_move_256through2048:
|
|
LEAQ -256(DX), DX
|
|
MOVOU (CX), X0
|
|
MOVOU 16(CX), X1
|
|
MOVOU 32(CX), X2
|
|
MOVOU 48(CX), X3
|
|
MOVOU 64(CX), X4
|
|
MOVOU 80(CX), X5
|
|
MOVOU 96(CX), X6
|
|
MOVOU 112(CX), X7
|
|
MOVOU 128(CX), X8
|
|
MOVOU 144(CX), X9
|
|
MOVOU 160(CX), X10
|
|
MOVOU 176(CX), X11
|
|
MOVOU 192(CX), X12
|
|
MOVOU 208(CX), X13
|
|
MOVOU 224(CX), X14
|
|
MOVOU 240(CX), X15
|
|
MOVOU X0, (AX)
|
|
MOVOU X1, 16(AX)
|
|
MOVOU X2, 32(AX)
|
|
MOVOU X3, 48(AX)
|
|
MOVOU X4, 64(AX)
|
|
MOVOU X5, 80(AX)
|
|
MOVOU X6, 96(AX)
|
|
MOVOU X7, 112(AX)
|
|
MOVOU X8, 128(AX)
|
|
MOVOU X9, 144(AX)
|
|
MOVOU X10, 160(AX)
|
|
MOVOU X11, 176(AX)
|
|
MOVOU X12, 192(AX)
|
|
MOVOU X13, 208(AX)
|
|
MOVOU X14, 224(AX)
|
|
MOVOU X15, 240(AX)
|
|
CMPQ DX, $0x00000100
|
|
LEAQ 256(CX), CX
|
|
LEAQ 256(AX), AX
|
|
JGE emit_lit_memmove_standalone_memmove_move_256through2048
|
|
JMP emit_lit_memmove_standalone_memmove_tail
|
|
|
|
emit_literal_end_standalone:
|
|
MOVQ BX, ret+48(FP)
|
|
RET
|
|
|
|
// func emitLiteralAvx(dst []byte, lit []byte) int
|
|
// Requires: AVX, SSE2
|
|
TEXT ·emitLiteralAvx(SB), NOSPLIT, $0-56
|
|
MOVQ dst_base+0(FP), AX
|
|
MOVQ lit_base+24(FP), CX
|
|
MOVQ lit_len+32(FP), DX
|
|
MOVQ DX, BX
|
|
MOVQ DX, SI
|
|
SUBL $0x01, SI
|
|
JC emit_literal_end_avx_standalone
|
|
CMPL SI, $0x3c
|
|
JLT one_byte_standalone
|
|
CMPL SI, $0x00000100
|
|
JLT two_bytes_standalone
|
|
CMPL SI, $0x00010000
|
|
JLT three_bytes_standalone
|
|
CMPL SI, $0x01000000
|
|
JLT four_bytes_standalone
|
|
MOVB $0xfc, (AX)
|
|
MOVL SI, 1(AX)
|
|
ADDQ $0x05, BX
|
|
ADDQ $0x05, AX
|
|
JMP memmove_standalone
|
|
|
|
four_bytes_standalone:
|
|
MOVQ SI, DI
|
|
SHRL $0x10, DI
|
|
MOVB $0xf8, (AX)
|
|
MOVW SI, 1(AX)
|
|
MOVB DI, 3(AX)
|
|
ADDQ $0x04, BX
|
|
ADDQ $0x04, AX
|
|
JMP memmove_standalone
|
|
|
|
three_bytes_standalone:
|
|
MOVB $0xf4, (AX)
|
|
MOVW SI, 1(AX)
|
|
ADDQ $0x03, BX
|
|
ADDQ $0x03, AX
|
|
JMP memmove_standalone
|
|
|
|
two_bytes_standalone:
|
|
MOVB $0xf0, (AX)
|
|
MOVB SI, 1(AX)
|
|
ADDQ $0x02, BX
|
|
ADDQ $0x02, AX
|
|
JMP memmove_standalone
|
|
|
|
one_byte_standalone:
|
|
SHLB $0x02, SI
|
|
MOVB SI, (AX)
|
|
ADDQ $0x01, BX
|
|
ADDQ $0x01, AX
|
|
|
|
memmove_standalone:
|
|
NOP
|
|
|
|
emit_lit_memmove_standalone_memmove_tail:
|
|
TESTQ DX, DX
|
|
JEQ emit_literal_end_avx_standalone
|
|
CMPQ DX, $0x02
|
|
JBE emit_lit_memmove_standalone_memmove_move_1or2
|
|
CMPQ DX, $0x04
|
|
JB emit_lit_memmove_standalone_memmove_move_3
|
|
JBE emit_lit_memmove_standalone_memmove_move_4
|
|
CMPQ DX, $0x08
|
|
JB emit_lit_memmove_standalone_memmove_move_5through7
|
|
JE emit_lit_memmove_standalone_memmove_move_8
|
|
CMPQ DX, $0x10
|
|
JBE emit_lit_memmove_standalone_memmove_move_9through16
|
|
CMPQ DX, $0x20
|
|
JBE emit_lit_memmove_standalone_memmove_move_17through32
|
|
CMPQ DX, $0x40
|
|
JBE emit_lit_memmove_standalone_memmove_move_33through64
|
|
CMPQ DX, $0x80
|
|
JBE emit_lit_memmove_standalone_memmove_move_65through128
|
|
CMPQ DX, $0x00000100
|
|
JBE emit_lit_memmove_standalone_memmove_move_129through256
|
|
JMP emit_lit_memmove_standalone_memmove_avxUnaligned
|
|
|
|
emit_lit_memmove_standalone_memmove_move_1or2:
|
|
MOVB (CX), SI
|
|
MOVB -1(CX)(DX*1), DI
|
|
MOVB SI, (AX)
|
|
MOVB DI, -1(AX)(DX*1)
|
|
JMP emit_literal_end_avx_standalone
|
|
|
|
emit_lit_memmove_standalone_memmove_move_4:
|
|
MOVL (CX), SI
|
|
MOVL SI, (AX)
|
|
JMP emit_literal_end_avx_standalone
|
|
|
|
emit_lit_memmove_standalone_memmove_move_3:
|
|
MOVW (CX), SI
|
|
MOVB 2(CX), DI
|
|
MOVW SI, (AX)
|
|
MOVB DI, 2(AX)
|
|
JMP emit_literal_end_avx_standalone
|
|
|
|
emit_lit_memmove_standalone_memmove_move_5through7:
|
|
MOVL (CX), SI
|
|
MOVL -4(CX)(DX*1), DI
|
|
MOVL SI, (AX)
|
|
MOVL DI, -4(AX)(DX*1)
|
|
JMP emit_literal_end_avx_standalone
|
|
|
|
emit_lit_memmove_standalone_memmove_move_8:
|
|
MOVQ (CX), SI
|
|
MOVQ SI, (AX)
|
|
JMP emit_literal_end_avx_standalone
|
|
|
|
emit_lit_memmove_standalone_memmove_move_9through16:
|
|
MOVQ (CX), SI
|
|
MOVQ -8(CX)(DX*1), DI
|
|
MOVQ SI, (AX)
|
|
MOVQ DI, -8(AX)(DX*1)
|
|
JMP emit_literal_end_avx_standalone
|
|
|
|
emit_lit_memmove_standalone_memmove_move_17through32:
|
|
MOVOU (CX), X0
|
|
MOVOU -16(CX)(DX*1), X1
|
|
MOVOU X0, (AX)
|
|
MOVOU X1, -16(AX)(DX*1)
|
|
JMP emit_literal_end_avx_standalone
|
|
|
|
emit_lit_memmove_standalone_memmove_move_33through64:
|
|
MOVOU (CX), X0
|
|
MOVOU 16(CX), X1
|
|
MOVOU -32(CX)(DX*1), X2
|
|
MOVOU -16(CX)(DX*1), X3
|
|
MOVOU X0, (AX)
|
|
MOVOU X1, 16(AX)
|
|
MOVOU X2, -32(AX)(DX*1)
|
|
MOVOU X3, -16(AX)(DX*1)
|
|
JMP emit_literal_end_avx_standalone
|
|
|
|
emit_lit_memmove_standalone_memmove_move_65through128:
|
|
MOVOU (CX), X0
|
|
MOVOU 16(CX), X1
|
|
MOVOU 32(CX), X2
|
|
MOVOU 48(CX), X3
|
|
MOVOU -64(CX)(DX*1), X12
|
|
MOVOU -48(CX)(DX*1), X13
|
|
MOVOU -32(CX)(DX*1), X14
|
|
MOVOU -16(CX)(DX*1), X15
|
|
MOVOU X0, (AX)
|
|
MOVOU X1, 16(AX)
|
|
MOVOU X2, 32(AX)
|
|
MOVOU X3, 48(AX)
|
|
MOVOU X12, -64(AX)(DX*1)
|
|
MOVOU X13, -48(AX)(DX*1)
|
|
MOVOU X14, -32(AX)(DX*1)
|
|
MOVOU X15, -16(AX)(DX*1)
|
|
JMP emit_literal_end_avx_standalone
|
|
|
|
emit_lit_memmove_standalone_memmove_move_129through256:
|
|
MOVOU (CX), X0
|
|
MOVOU 16(CX), X1
|
|
MOVOU 32(CX), X2
|
|
MOVOU 48(CX), X3
|
|
MOVOU 64(CX), X4
|
|
MOVOU 80(CX), X5
|
|
MOVOU 96(CX), X6
|
|
MOVOU 112(CX), X7
|
|
MOVOU -128(CX)(DX*1), X8
|
|
MOVOU -112(CX)(DX*1), X9
|
|
MOVOU -96(CX)(DX*1), X10
|
|
MOVOU -80(CX)(DX*1), X11
|
|
MOVOU -64(CX)(DX*1), X12
|
|
MOVOU -48(CX)(DX*1), X13
|
|
MOVOU -32(CX)(DX*1), X14
|
|
MOVOU -16(CX)(DX*1), X15
|
|
MOVOU X0, (AX)
|
|
MOVOU X1, 16(AX)
|
|
MOVOU X2, 32(AX)
|
|
MOVOU X3, 48(AX)
|
|
MOVOU X4, 64(AX)
|
|
MOVOU X5, 80(AX)
|
|
MOVOU X6, 96(AX)
|
|
MOVOU X7, 112(AX)
|
|
MOVOU X8, -128(AX)(DX*1)
|
|
MOVOU X9, -112(AX)(DX*1)
|
|
MOVOU X10, -96(AX)(DX*1)
|
|
MOVOU X11, -80(AX)(DX*1)
|
|
MOVOU X12, -64(AX)(DX*1)
|
|
MOVOU X13, -48(AX)(DX*1)
|
|
MOVOU X14, -32(AX)(DX*1)
|
|
MOVOU X15, -16(AX)(DX*1)
|
|
JMP emit_literal_end_avx_standalone
|
|
|
|
emit_lit_memmove_standalone_memmove_move_256through2048:
|
|
LEAQ -256(DX), DX
|
|
MOVOU (CX), X0
|
|
MOVOU 16(CX), X1
|
|
MOVOU 32(CX), X2
|
|
MOVOU 48(CX), X3
|
|
MOVOU 64(CX), X4
|
|
MOVOU 80(CX), X5
|
|
MOVOU 96(CX), X6
|
|
MOVOU 112(CX), X7
|
|
MOVOU 128(CX), X8
|
|
MOVOU 144(CX), X9
|
|
MOVOU 160(CX), X10
|
|
MOVOU 176(CX), X11
|
|
MOVOU 192(CX), X12
|
|
MOVOU 208(CX), X13
|
|
MOVOU 224(CX), X14
|
|
MOVOU 240(CX), X15
|
|
MOVOU X0, (AX)
|
|
MOVOU X1, 16(AX)
|
|
MOVOU X2, 32(AX)
|
|
MOVOU X3, 48(AX)
|
|
MOVOU X4, 64(AX)
|
|
MOVOU X5, 80(AX)
|
|
MOVOU X6, 96(AX)
|
|
MOVOU X7, 112(AX)
|
|
MOVOU X8, 128(AX)
|
|
MOVOU X9, 144(AX)
|
|
MOVOU X10, 160(AX)
|
|
MOVOU X11, 176(AX)
|
|
MOVOU X12, 192(AX)
|
|
MOVOU X13, 208(AX)
|
|
MOVOU X14, 224(AX)
|
|
MOVOU X15, 240(AX)
|
|
CMPQ DX, $0x00000100
|
|
LEAQ 256(CX), CX
|
|
LEAQ 256(AX), AX
|
|
JGE emit_lit_memmove_standalone_memmove_move_256through2048
|
|
JMP emit_lit_memmove_standalone_memmove_tail
|
|
|
|
emit_lit_memmove_standalone_memmove_avxUnaligned:
|
|
LEAQ (CX)(DX*1), DI
|
|
MOVQ AX, R9
|
|
MOVOU -128(DI), X5
|
|
MOVOU -112(DI), X6
|
|
MOVQ $0x00000080, SI
|
|
ANDQ $0xffffffe0, AX
|
|
ADDQ $0x20, AX
|
|
MOVOU -96(DI), X7
|
|
MOVOU -80(DI), X8
|
|
MOVQ AX, R8
|
|
SUBQ R9, R8
|
|
MOVOU -64(DI), X9
|
|
MOVOU -48(DI), X10
|
|
SUBQ R8, DX
|
|
MOVOU -32(DI), X11
|
|
MOVOU -16(DI), X12
|
|
VMOVDQU (CX), Y4
|
|
ADDQ R8, CX
|
|
SUBQ SI, DX
|
|
|
|
emit_lit_memmove_standalone_memmove_gobble_128_loop:
|
|
VMOVDQU (CX), Y0
|
|
VMOVDQU 32(CX), Y1
|
|
VMOVDQU 64(CX), Y2
|
|
VMOVDQU 96(CX), Y3
|
|
ADDQ SI, CX
|
|
VMOVDQA Y0, (AX)
|
|
VMOVDQA Y1, 32(AX)
|
|
VMOVDQA Y2, 64(AX)
|
|
VMOVDQA Y3, 96(AX)
|
|
ADDQ SI, AX
|
|
SUBQ SI, DX
|
|
JA emit_lit_memmove_standalone_memmove_gobble_128_loop
|
|
ADDQ SI, DX
|
|
ADDQ AX, DX
|
|
VMOVDQU Y4, (R9)
|
|
VZEROUPPER
|
|
MOVOU X5, -128(DX)
|
|
MOVOU X6, -112(DX)
|
|
MOVOU X7, -96(DX)
|
|
MOVOU X8, -80(DX)
|
|
MOVOU X9, -64(DX)
|
|
MOVOU X10, -48(DX)
|
|
MOVOU X11, -32(DX)
|
|
MOVOU X12, -16(DX)
|
|
|
|
emit_literal_end_avx_standalone:
|
|
MOVQ BX, ret+48(FP)
|
|
RET
|
|
|
|
// func emitRepeat(dst []byte, offset int, length int) int
|
|
TEXT ·emitRepeat(SB), NOSPLIT, $0-48
|
|
XORQ BX, BX
|
|
MOVQ dst_base+0(FP), AX
|
|
MOVQ offset+24(FP), CX
|
|
MOVQ length+32(FP), DX
|
|
|
|
emit_repeat_again_standalone:
|
|
MOVQ DX, SI
|
|
LEAQ -4(DX), DX
|
|
CMPL SI, $0x08
|
|
JLE repeat_two_standalone
|
|
CMPL SI, $0x0c
|
|
JGE cant_repeat_two_offset_standalone
|
|
CMPL CX, $0x00000800
|
|
JLT repeat_two_offset_standalone
|
|
|
|
cant_repeat_two_offset_standalone:
|
|
CMPL DX, $0x00000104
|
|
JLT repeat_three_standalone
|
|
CMPL DX, $0x00010100
|
|
JLT repeat_four_standalone
|
|
CMPL DX, $0x0100ffff
|
|
JLT repeat_five_standalone
|
|
LEAQ -16842747(DX), DX
|
|
MOVW $0x001d, (AX)
|
|
MOVW $0xfffb, 2(AX)
|
|
MOVB $0xff, 4(AX)
|
|
ADDQ $0x05, AX
|
|
ADDQ $0x05, BX
|
|
JMP emit_repeat_again_standalone
|
|
|
|
repeat_five_standalone:
|
|
LEAQ -65536(DX), DX
|
|
MOVQ DX, CX
|
|
MOVW $0x001d, (AX)
|
|
MOVW DX, 2(AX)
|
|
SARQ $0x10, CX
|
|
MOVB CL, 4(AX)
|
|
ADDQ $0x05, BX
|
|
ADDQ $0x05, AX
|
|
JMP gen_emit_repeat_end
|
|
|
|
repeat_four_standalone:
|
|
LEAQ -256(DX), DX
|
|
MOVW $0x0019, (AX)
|
|
MOVW DX, 2(AX)
|
|
ADDQ $0x04, BX
|
|
ADDQ $0x04, AX
|
|
JMP gen_emit_repeat_end
|
|
|
|
repeat_three_standalone:
|
|
LEAQ -4(DX), DX
|
|
MOVW $0x0015, (AX)
|
|
MOVB DL, 2(AX)
|
|
ADDQ $0x03, BX
|
|
ADDQ $0x03, AX
|
|
JMP gen_emit_repeat_end
|
|
|
|
repeat_two_standalone:
|
|
SHLL $0x02, DX
|
|
ORL $0x01, DX
|
|
MOVW DX, (AX)
|
|
ADDQ $0x02, BX
|
|
ADDQ $0x02, AX
|
|
JMP gen_emit_repeat_end
|
|
|
|
repeat_two_offset_standalone:
|
|
XORQ SI, SI
|
|
LEAQ 1(SI)(DX*4), DX
|
|
MOVB CL, 1(AX)
|
|
SARL $0x08, CX
|
|
SHLL $0x05, CX
|
|
ORL CX, DX
|
|
MOVB DL, (AX)
|
|
ADDQ $0x02, BX
|
|
ADDQ $0x02, AX
|
|
|
|
gen_emit_repeat_end:
|
|
MOVQ BX, ret+40(FP)
|
|
RET
|
|
|
|
// func emitCopy(dst []byte, offset int, length int) int
|
|
TEXT ·emitCopy(SB), NOSPLIT, $0-48
|
|
XORQ BX, BX
|
|
MOVQ dst_base+0(FP), AX
|
|
MOVQ offset+24(FP), CX
|
|
MOVQ length+32(FP), DX
|
|
CMPL CX, $0x00010000
|
|
JL two_byte_offset_standalone
|
|
CMPL DX, $0x40
|
|
JLE four_bytes_remain_standalone
|
|
MOVB $0xff, (AX)
|
|
MOVD CX, 1(AX)
|
|
LEAQ -64(DX), DX
|
|
ADDQ $0x05, BX
|
|
ADDQ $0x05, AX
|
|
CMPL DX, $0x04
|
|
JL four_bytes_remain_standalone
|
|
|
|
emit_repeat_again_standalone_emit_copy:
|
|
MOVQ DX, SI
|
|
LEAQ -4(DX), DX
|
|
CMPL SI, $0x08
|
|
JLE repeat_two_standalone_emit_copy
|
|
CMPL SI, $0x0c
|
|
JGE cant_repeat_two_offset_standalone_emit_copy
|
|
CMPL CX, $0x00000800
|
|
JLT repeat_two_offset_standalone_emit_copy
|
|
|
|
cant_repeat_two_offset_standalone_emit_copy:
|
|
CMPL DX, $0x00000104
|
|
JLT repeat_three_standalone_emit_copy
|
|
CMPL DX, $0x00010100
|
|
JLT repeat_four_standalone_emit_copy
|
|
CMPL DX, $0x0100ffff
|
|
JLT repeat_five_standalone_emit_copy
|
|
LEAQ -16842747(DX), DX
|
|
MOVW $0x001d, (AX)
|
|
MOVW $0xfffb, 2(AX)
|
|
MOVB $0xff, 4(AX)
|
|
ADDQ $0x05, AX
|
|
ADDQ $0x05, BX
|
|
JMP emit_repeat_again_standalone_emit_copy
|
|
|
|
repeat_five_standalone_emit_copy:
|
|
LEAQ -65536(DX), DX
|
|
MOVQ DX, CX
|
|
MOVW $0x001d, (AX)
|
|
MOVW DX, 2(AX)
|
|
SARQ $0x10, CX
|
|
MOVB CL, 4(AX)
|
|
ADDQ $0x05, BX
|
|
ADDQ $0x05, AX
|
|
JMP gen_emit_copy_end
|
|
|
|
repeat_four_standalone_emit_copy:
|
|
LEAQ -256(DX), DX
|
|
MOVW $0x0019, (AX)
|
|
MOVW DX, 2(AX)
|
|
ADDQ $0x04, BX
|
|
ADDQ $0x04, AX
|
|
JMP gen_emit_copy_end
|
|
|
|
repeat_three_standalone_emit_copy:
|
|
LEAQ -4(DX), DX
|
|
MOVW $0x0015, (AX)
|
|
MOVB DL, 2(AX)
|
|
ADDQ $0x03, BX
|
|
ADDQ $0x03, AX
|
|
JMP gen_emit_copy_end
|
|
|
|
repeat_two_standalone_emit_copy:
|
|
SHLL $0x02, DX
|
|
ORL $0x01, DX
|
|
MOVW DX, (AX)
|
|
ADDQ $0x02, BX
|
|
ADDQ $0x02, AX
|
|
JMP gen_emit_copy_end
|
|
|
|
repeat_two_offset_standalone_emit_copy:
|
|
XORQ SI, SI
|
|
LEAQ 1(SI)(DX*4), DX
|
|
MOVB CL, 1(AX)
|
|
SARL $0x08, CX
|
|
SHLL $0x05, CX
|
|
ORL CX, DX
|
|
MOVB DL, (AX)
|
|
ADDQ $0x02, BX
|
|
ADDQ $0x02, AX
|
|
JMP gen_emit_copy_end
|
|
|
|
four_bytes_remain_standalone:
|
|
TESTL DX, DX
|
|
JZ gen_emit_copy_end
|
|
MOVB $0x03, SI
|
|
LEAQ -4(SI)(DX*4), DX
|
|
MOVB DL, (AX)
|
|
MOVD CX, 1(AX)
|
|
ADDQ $0x05, BX
|
|
ADDQ $0x05, AX
|
|
JMP gen_emit_copy_end
|
|
|
|
two_byte_offset_standalone:
|
|
CMPL DX, $0x40
|
|
JLE two_byte_offset_short_standalone
|
|
MOVB $0xee, (AX)
|
|
MOVW CX, 1(AX)
|
|
LEAQ -60(DX), DX
|
|
ADDQ $0x03, AX
|
|
ADDQ $0x03, BX
|
|
|
|
emit_repeat_again_standalone_emit_copy_short:
|
|
MOVQ DX, SI
|
|
LEAQ -4(DX), DX
|
|
CMPL SI, $0x08
|
|
JLE repeat_two_standalone_emit_copy_short
|
|
CMPL SI, $0x0c
|
|
JGE cant_repeat_two_offset_standalone_emit_copy_short
|
|
CMPL CX, $0x00000800
|
|
JLT repeat_two_offset_standalone_emit_copy_short
|
|
|
|
cant_repeat_two_offset_standalone_emit_copy_short:
|
|
CMPL DX, $0x00000104
|
|
JLT repeat_three_standalone_emit_copy_short
|
|
CMPL DX, $0x00010100
|
|
JLT repeat_four_standalone_emit_copy_short
|
|
CMPL DX, $0x0100ffff
|
|
JLT repeat_five_standalone_emit_copy_short
|
|
LEAQ -16842747(DX), DX
|
|
MOVW $0x001d, (AX)
|
|
MOVW $0xfffb, 2(AX)
|
|
MOVB $0xff, 4(AX)
|
|
ADDQ $0x05, AX
|
|
ADDQ $0x05, BX
|
|
JMP emit_repeat_again_standalone_emit_copy_short
|
|
|
|
repeat_five_standalone_emit_copy_short:
|
|
LEAQ -65536(DX), DX
|
|
MOVQ DX, CX
|
|
MOVW $0x001d, (AX)
|
|
MOVW DX, 2(AX)
|
|
SARQ $0x10, CX
|
|
MOVB CL, 4(AX)
|
|
ADDQ $0x05, BX
|
|
ADDQ $0x05, AX
|
|
JMP gen_emit_copy_end
|
|
|
|
repeat_four_standalone_emit_copy_short:
|
|
LEAQ -256(DX), DX
|
|
MOVW $0x0019, (AX)
|
|
MOVW DX, 2(AX)
|
|
ADDQ $0x04, BX
|
|
ADDQ $0x04, AX
|
|
JMP gen_emit_copy_end
|
|
|
|
repeat_three_standalone_emit_copy_short:
|
|
LEAQ -4(DX), DX
|
|
MOVW $0x0015, (AX)
|
|
MOVB DL, 2(AX)
|
|
ADDQ $0x03, BX
|
|
ADDQ $0x03, AX
|
|
JMP gen_emit_copy_end
|
|
|
|
repeat_two_standalone_emit_copy_short:
|
|
SHLL $0x02, DX
|
|
ORL $0x01, DX
|
|
MOVW DX, (AX)
|
|
ADDQ $0x02, BX
|
|
ADDQ $0x02, AX
|
|
JMP gen_emit_copy_end
|
|
|
|
repeat_two_offset_standalone_emit_copy_short:
|
|
XORQ SI, SI
|
|
LEAQ 1(SI)(DX*4), DX
|
|
MOVB CL, 1(AX)
|
|
SARL $0x08, CX
|
|
SHLL $0x05, CX
|
|
ORL CX, DX
|
|
MOVB DL, (AX)
|
|
ADDQ $0x02, BX
|
|
ADDQ $0x02, AX
|
|
JMP gen_emit_copy_end
|
|
|
|
two_byte_offset_short_standalone:
|
|
CMPL DX, $0x0c
|
|
JGE emit_copy_three_standalone
|
|
CMPL CX, $0x00000800
|
|
JGE emit_copy_three_standalone
|
|
MOVB $0x01, SI
|
|
LEAQ -16(SI)(DX*4), DX
|
|
MOVB CL, 1(AX)
|
|
SHRL $0x08, CX
|
|
SHLL $0x05, CX
|
|
ORL CX, DX
|
|
MOVB DL, (AX)
|
|
ADDQ $0x02, BX
|
|
ADDQ $0x02, AX
|
|
JMP gen_emit_copy_end
|
|
|
|
emit_copy_three_standalone:
|
|
MOVB $0x02, SI
|
|
LEAQ -4(SI)(DX*4), DX
|
|
MOVB DL, (AX)
|
|
MOVW CX, 1(AX)
|
|
ADDQ $0x03, BX
|
|
ADDQ $0x03, AX
|
|
|
|
gen_emit_copy_end:
|
|
MOVQ BX, ret+40(FP)
|
|
RET
|
|
|
|
// func matchLen(a []byte, b []byte) int
|
|
TEXT ·matchLen(SB), NOSPLIT, $0-56
|
|
MOVQ a_base+0(FP), AX
|
|
MOVQ b_base+24(FP), CX
|
|
MOVQ a_len+8(FP), DX
|
|
XORQ SI, SI
|
|
CMPQ DX, $0x08
|
|
JL matchlen_single_standalone
|
|
|
|
matchlen_loopback_standalone:
|
|
MOVQ (AX)(SI*1), BX
|
|
XORQ (CX)(SI*1), BX
|
|
TESTQ BX, BX
|
|
JZ matchlen_loop_standalone
|
|
BSFQ BX, BX
|
|
SARQ $0x03, BX
|
|
LEAQ (SI)(BX*1), SI
|
|
JMP gen_match_len_end
|
|
|
|
matchlen_loop_standalone:
|
|
LEAQ -8(DX), DX
|
|
LEAQ 8(SI), SI
|
|
CMPQ DX, $0x08
|
|
JGE matchlen_loopback_standalone
|
|
|
|
matchlen_single_standalone:
|
|
TESTQ DX, DX
|
|
JZ gen_match_len_end
|
|
|
|
matchlen_single_loopback_standalone:
|
|
MOVB (AX)(SI*1), BL
|
|
CMPB (CX)(SI*1), BL
|
|
JNE gen_match_len_end
|
|
LEAQ 1(SI), SI
|
|
DECQ DX
|
|
JNZ matchlen_single_loopback_standalone
|
|
|
|
gen_match_len_end:
|
|
MOVQ SI, ret+48(FP)
|
|
RET
|