// Code generated by command: go run asm.go -out allocfail.s -stubs stubs.go. DO NOT EDIT. // +build !appengine // +build !noasm // +build gc #include "textflag.h" // func encodeBlockAsm(dst []byte, src []byte) int // Requires: SSE2 TEXT ·encodeBlockAsm(SB), $65568-56 MOVQ $0x00000200, AX LEAQ 32(SP), CX PXOR X0, X0 zero_loop_encodeBlockAsm: MOVOU X0, (CX) MOVOU X0, 16(CX) MOVOU X0, 32(CX) MOVOU X0, 48(CX) MOVOU X0, 64(CX) MOVOU X0, 80(CX) MOVOU X0, 96(CX) MOVOU X0, 112(CX) ADDQ $0x80, CX DECQ AX JNZ zero_loop_encodeBlockAsm MOVL AX, 20(SP) MOVQ src_len+32(FP), AX LEAQ -5(AX), CX LEAQ -8(AX), BX SHRQ $0x05, AX SUBL AX, CX MOVL BX, 16(SP) MOVQ dst_base+0(FP), AX MOVQ AX, 8(SP) LEAQ (AX)(CX*1), CX MOVQ CX, (SP) MOVL $0x00000001, AX MOVL AX, 24(SP) MOVQ src_base+24(FP), CX search_loop_encodeBlockAsm: MOVQ (CX)(AX*1), BP MOVL AX, BX SUBL 20(SP), BX SHRL $0x06, BX LEAQ 4(AX)(BX*1), BX MOVL 16(SP), SI CMPL BX, SI JGT emit_remainder_encodeBlockAsm MOVL BX, 28(SP) MOVQ $0x0000cf1bbcdcbf9b, BX MOVQ BP, DI MOVQ BP, R8 SHRQ $0x08, R8 SHLQ $0x10, DI IMULQ BX, DI SHRQ $0x30, DI SHLQ $0x10, R8 IMULQ BX, R8 SHRQ $0x30, R8 MOVL 32(SP)(DI*1), BX MOVL 32(SP)(R8*1), SI MOVL AX, 32(SP)(DI*1) LEAL 1(AX), DI MOVL DI, 32(SP)(R8*1) MOVL AX, DI SUBL 24(SP), DI MOVL 1(CX)(DI*1), R9 MOVQ BP, R8 SHLQ $0x08, R8 CMPL R8, R9 JNE no_repeat_found_encodeBlockAsm LEAQ 1(AX), BP MOVL 20(SP), BX TESTL DI, DI JZ repeat_extend_back_end_encodeBlockAsm repeat_extend_back_loop_encodeBlockAsm: CMPL BP, BX JG repeat_extend_back_end_encodeBlockAsm MOVB -1(CX)(DI*1), DL MOVB -1(CX)(BP*1), SI CMPB DL, SI JNE repeat_extend_back_end_encodeBlockAsm LEAQ -1(BP), BP DECL DI JZ repeat_extend_back_end_encodeBlockAsm JMP repeat_extend_back_loop_encodeBlockAsm repeat_extend_back_end_encodeBlockAsm: MOVL 20(SP), BX CMPL BX, BP JEQ emit_literal_skip_repeat_emit_encodeBlockAsm MOVL BP, SI MOVL BP, 20(SP) LEAQ (CX)(BX*1), DI SUBL BX, SI MOVQ dst_base+0(FP), BX MOVQ SI, R8 SUBL $0x01, R8 JC emit_literal_done_repeat_emit_encodeBlockAsm CMPL R8, $0x3c JLT one_byte_repeat_emit_encodeBlockAsm CMPL R8, $0x00000100 JLT two_bytes_repeat_emit_encodeBlockAsm CMPL R8, $0x00010000 JLT three_bytes_repeat_emit_encodeBlockAsm CMPL R8, $0x01000000 JLT four_bytes_repeat_emit_encodeBlockAsm MOVB $0xfc, (BX) MOVL R8, 1(BX) ADDQ $0x05, BX JMP memmove_repeat_emit_encodeBlockAsm four_bytes_repeat_emit_encodeBlockAsm: MOVQ R8, R9 SHRL $0x10, R9 MOVB $0xf8, (BX) MOVW R8, 1(BX) MOVB R9, 3(BX) ADDQ $0x04, BX JMP memmove_repeat_emit_encodeBlockAsm three_bytes_repeat_emit_encodeBlockAsm: MOVB $0xf4, (BX) MOVW R8, 1(BX) ADDQ $0x03, BX JMP memmove_repeat_emit_encodeBlockAsm two_bytes_repeat_emit_encodeBlockAsm: MOVB $0xf0, (BX) MOVB R8, 1(BX) ADDQ $0x02, BX JMP memmove_repeat_emit_encodeBlockAsm one_byte_repeat_emit_encodeBlockAsm: SHLB $0x02, R8 MOVB R8, (BX) ADDQ $0x01, BX memmove_repeat_emit_encodeBlockAsm: LEAQ (BX)(SI*1), R8 NOP emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_tail: TESTQ SI, SI JEQ emit_literal_done_repeat_emit_encodeBlockAsm CMPQ SI, $0x02 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_1or2 CMPQ SI, $0x04 JB emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_3 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_4 CMPQ SI, $0x08 JB emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_5through7 JE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8 CMPQ SI, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_9through16 CMPQ SI, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_17through32 CMPQ SI, $0x40 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_33through64 CMPQ SI, $0x80 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_65through128 CMPQ SI, $0x00000100 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_129through256 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_256through2048 emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_1or2: MOVB (DI), R8 MOVB -1(DI)(SI*1), DI MOVB R8, (BX) MOVB DI, -1(BX)(SI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_4: MOVL (DI), R8 MOVL R8, (BX) JMP emit_literal_done_repeat_emit_encodeBlockAsm emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_3: MOVW (DI), R8 MOVB 2(DI), DI MOVW R8, (BX) MOVB DI, 2(BX) JMP emit_literal_done_repeat_emit_encodeBlockAsm emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_5through7: MOVL (DI), R8 MOVL -4(DI)(SI*1), DI MOVL R8, (BX) MOVL DI, -4(BX)(SI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8: MOVQ (DI), R8 MOVQ R8, (BX) JMP emit_literal_done_repeat_emit_encodeBlockAsm emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_9through16: MOVQ (DI), R8 MOVQ -8(DI)(SI*1), DI MOVQ R8, (BX) MOVQ DI, -8(BX)(SI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_17through32: MOVOU (DI), X0 MOVOU -16(DI)(SI*1), X1 MOVOU X0, (BX) MOVOU X1, -16(BX)(SI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_33through64: MOVOU (DI), X0 MOVOU 16(DI), X1 MOVOU -32(DI)(SI*1), X2 MOVOU -16(DI)(SI*1), X3 MOVOU X0, (BX) MOVOU X1, 16(BX) MOVOU X2, -32(BX)(SI*1) MOVOU X3, -16(BX)(SI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_65through128: MOVOU (DI), X0 MOVOU 16(DI), X1 MOVOU 32(DI), X2 MOVOU 48(DI), X3 MOVOU -64(DI)(SI*1), X12 MOVOU -48(DI)(SI*1), X13 MOVOU -32(DI)(SI*1), X14 MOVOU -16(DI)(SI*1), X15 MOVOU X0, (BX) MOVOU X1, 16(BX) MOVOU X2, 32(BX) MOVOU X3, 48(BX) MOVOU X12, -64(BX)(SI*1) MOVOU X13, -48(BX)(SI*1) MOVOU X14, -32(BX)(SI*1) MOVOU X15, -16(BX)(SI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_129through256: MOVOU (DI), X0 MOVOU 16(DI), X1 MOVOU 32(DI), X2 MOVOU 48(DI), X3 MOVOU 64(DI), X4 MOVOU 80(DI), X5 MOVOU 96(DI), X6 MOVOU 112(DI), X7 MOVOU -128(DI)(SI*1), X8 MOVOU -112(DI)(SI*1), X9 MOVOU -96(DI)(SI*1), X10 MOVOU -80(DI)(SI*1), X11 MOVOU -64(DI)(SI*1), X12 MOVOU -48(DI)(SI*1), X13 MOVOU -32(DI)(SI*1), X14 MOVOU -16(DI)(SI*1), X15 MOVOU X0, (BX) MOVOU X1, 16(BX) MOVOU X2, 32(BX) MOVOU X3, 48(BX) MOVOU X4, 64(BX) MOVOU X5, 80(BX) MOVOU X6, 96(BX) MOVOU X7, 112(BX) MOVOU X8, -128(BX)(SI*1) MOVOU X9, -112(BX)(SI*1) MOVOU X10, -96(BX)(SI*1) MOVOU X11, -80(BX)(SI*1) MOVOU X12, -64(BX)(SI*1) MOVOU X13, -48(BX)(SI*1) MOVOU X14, -32(BX)(SI*1) MOVOU X15, -16(BX)(SI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_256through2048: LEAQ -256(SI), SI MOVOU (DI), X0 MOVOU 16(DI), X1 MOVOU 32(DI), X2 MOVOU 48(DI), X3 MOVOU 64(DI), X4 MOVOU 80(DI), X5 MOVOU 96(DI), X6 MOVOU 112(DI), X7 MOVOU 128(DI), X8 MOVOU 144(DI), X9 MOVOU 160(DI), X10 MOVOU 176(DI), X11 MOVOU 192(DI), X12 MOVOU 208(DI), X13 MOVOU 224(DI), X14 MOVOU 240(DI), X15 MOVOU X0, (BX) MOVOU X1, 16(BX) MOVOU X2, 32(BX) MOVOU X3, 48(BX) MOVOU X4, 64(BX) MOVOU X5, 80(BX) MOVOU X6, 96(BX) MOVOU X7, 112(BX) MOVOU X8, 128(BX) MOVOU X9, 144(BX) MOVOU X10, 160(BX) MOVOU X11, 176(BX) MOVOU X12, 192(BX) MOVOU X13, 208(BX) MOVOU X14, 224(BX) MOVOU X15, 240(BX) CMPQ SI, $0x00000100 LEAQ 256(DI), DI LEAQ 256(BX), BX JGE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_256through2048 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_tail MOVQ R8, BX emit_literal_done_repeat_emit_encodeBlockAsm: MOVQ BX, dst_base+0(FP) emit_literal_skip_repeat_emit_encodeBlockAsm: ADDL $0x05, AX MOVL AX, BX SUBL 24(SP), BX MOVL 16(SP), BX SUBL AX, BX XORQ DI, DI CMPQ BX, $0x08 JL matchlen_single_repeat_extend matchlen_loopback_repeat_extend: MOVQ (CX)(DI*1), SI XORQ (CX)(DI*1), SI TESTQ SI, SI JZ matchlen_loop_repeat_extend BSFQ SI, SI SARQ $0x03, SI LEAQ (DI)(SI*1), DI JMP repeat_extend_forward_end_encodeBlockAsm matchlen_loop_repeat_extend: LEAQ -8(BX), BX LEAQ 8(DI), DI CMPQ BX, $0x08 JGE matchlen_loopback_repeat_extend matchlen_single_repeat_extend: TESTQ BX, BX JZ repeat_extend_forward_end_encodeBlockAsm matchlen_single_loopback_repeat_extend: MOVB (CX)(DI*1), SI CMPB (CX)(DI*1), SI JNE repeat_extend_forward_end_encodeBlockAsm LEAQ 1(DI), DI DECQ BX JNZ matchlen_single_loopback_repeat_extend repeat_extend_forward_end_encodeBlockAsm: ADDL DI, AX MOVL AX, BX SUBL BP, BX MOVL 24(SP), BP MOVQ dst_base+0(FP), SI MOVL 20(SP), DI TESTL DI, DI JZ repeat_as_copy_encodeBlockAsm emit_repeat_again_match_repeat_: MOVQ BX, DI LEAQ -4(BX), BX CMPL DI, $0x08 JLE repeat_two_match_repeat_ CMPL DI, $0x0c JGE cant_repeat_two_offset_match_repeat_ CMPL BP, $0x00000800 JLT repeat_two_offset_match_repeat_ cant_repeat_two_offset_match_repeat_: CMPL BX, $0x00000104 JLT repeat_three_match_repeat_ CMPL BX, $0x00010100 JLT repeat_four_match_repeat_ CMPL BX, $0x0100ffff JLT repeat_five_match_repeat_ LEAQ -16842747(BX), BX MOVW $0x001d, (SI) MOVW $0xfffb, 2(SI) MOVB $0xff, 4(SI) ADDQ $0x05, SI JMP emit_repeat_again_match_repeat_ repeat_five_match_repeat_: LEAQ -65536(BX), BX MOVQ BX, BP MOVW $0x001d, (SI) MOVW BX, 2(SI) SARQ $0x10, BP MOVB BP, 4(SI) ADDQ $0x05, SI JMP repeat_end_emit_encodeBlockAsm repeat_four_match_repeat_: LEAQ -256(BX), BX MOVW $0x0019, (SI) MOVW BX, 2(SI) ADDQ $0x04, SI JMP repeat_end_emit_encodeBlockAsm repeat_three_match_repeat_: LEAQ -4(BX), BX MOVW $0x0015, (SI) MOVB BL, 2(SI) ADDQ $0x03, SI JMP repeat_end_emit_encodeBlockAsm repeat_two_match_repeat_: SHLL $0x02, BX ORL $0x01, BX MOVW BX, (SI) ADDQ $0x02, SI JMP repeat_end_emit_encodeBlockAsm repeat_two_offset_match_repeat_: XORQ DI, DI LEAQ 1(DI)(BX*4), BX MOVB BP, 1(SI) SARL $0x08, BP SHLL $0x05, BP ORL BP, BX MOVB BL, (SI) ADDQ $0x02, SI JMP repeat_end_emit_encodeBlockAsm repeat_as_copy_encodeBlockAsm: CMPL BP, $0x00010000 JL two_byte_offset_repeat_as_copy_encodeBlockAsm CMPL BX, $0x40 JLE four_bytes_remain_repeat_as_copy_encodeBlockAsm MOVB $0xff, (SI) MOVD BP, 1(SI) LEAQ -64(BX), BX ADDQ $0x05, SI CMPL BX, $0x04 JL four_bytes_remain_repeat_as_copy_encodeBlockAsm emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy: MOVQ BX, DI LEAQ -4(BX), BX CMPL DI, $0x08 JLE repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy CMPL DI, $0x0c JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy CMPL BP, $0x00000800 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy: CMPL BX, $0x00000104 JLT repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy CMPL BX, $0x00010100 JLT repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy CMPL BX, $0x0100ffff JLT repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy LEAQ -16842747(BX), BX MOVW $0x001d, (SI) MOVW $0xfffb, 2(SI) MOVB $0xff, 4(SI) ADDQ $0x05, SI JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy: LEAQ -65536(BX), BX MOVQ BX, BP MOVW $0x001d, (SI) MOVW BX, 2(SI) SARQ $0x10, BP MOVB BP, 4(SI) ADDQ $0x05, SI JMP repeat_end_emit_encodeBlockAsm repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy: LEAQ -256(BX), BX MOVW $0x0019, (SI) MOVW BX, 2(SI) ADDQ $0x04, SI JMP repeat_end_emit_encodeBlockAsm repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy: LEAQ -4(BX), BX MOVW $0x0015, (SI) MOVB BL, 2(SI) ADDQ $0x03, SI JMP repeat_end_emit_encodeBlockAsm repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy: SHLL $0x02, BX ORL $0x01, BX MOVW BX, (SI) ADDQ $0x02, SI JMP repeat_end_emit_encodeBlockAsm repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy: XORQ DI, DI LEAQ 1(DI)(BX*4), BX MOVB BP, 1(SI) SARL $0x08, BP SHLL $0x05, BP ORL BP, BX MOVB BL, (SI) ADDQ $0x02, SI JMP repeat_end_emit_encodeBlockAsm four_bytes_remain_repeat_as_copy_encodeBlockAsm: TESTL BX, BX JZ repeat_end_emit_encodeBlockAsm MOVB $0x03, DL LEAQ -4(DX)(BX*4), BX MOVB BL, (SI) MOVD BP, 1(SI) ADDQ $0x05, SI JMP repeat_end_emit_encodeBlockAsm two_byte_offset_repeat_as_copy_encodeBlockAsm: CMPL BX, $0x40 JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm MOVB $0xee, (SI) MOVW BP, 1(SI) LEAQ -60(BX), BX ADDQ $0x03, SI emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short: MOVQ BX, DI LEAQ -4(BX), BX CMPL DI, $0x08 JLE repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short CMPL DI, $0x0c JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short CMPL BP, $0x00000800 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short: CMPL BX, $0x00000104 JLT repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short CMPL BX, $0x00010100 JLT repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short CMPL BX, $0x0100ffff JLT repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short LEAQ -16842747(BX), BX MOVW $0x001d, (SI) MOVW $0xfffb, 2(SI) MOVB $0xff, 4(SI) ADDQ $0x05, SI JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short: LEAQ -65536(BX), BX MOVQ BX, BP MOVW $0x001d, (SI) MOVW BX, 2(SI) SARQ $0x10, BP MOVB BP, 4(SI) ADDQ $0x05, SI JMP repeat_end_emit_encodeBlockAsm repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short: LEAQ -256(BX), BX MOVW $0x0019, (SI) MOVW BX, 2(SI) ADDQ $0x04, SI JMP repeat_end_emit_encodeBlockAsm repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short: LEAQ -4(BX), BX MOVW $0x0015, (SI) MOVB BL, 2(SI) ADDQ $0x03, SI JMP repeat_end_emit_encodeBlockAsm repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short: SHLL $0x02, BX ORL $0x01, BX MOVW BX, (SI) ADDQ $0x02, SI JMP repeat_end_emit_encodeBlockAsm repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short: XORQ DI, DI LEAQ 1(DI)(BX*4), BX MOVB BP, 1(SI) SARL $0x08, BP SHLL $0x05, BP ORL BP, BX MOVB BL, (SI) ADDQ $0x02, SI JMP repeat_end_emit_encodeBlockAsm two_byte_offset_short_repeat_as_copy_encodeBlockAsm: CMPL BX, $0x0c JGE emit_copy_three_repeat_as_copy_encodeBlockAsm CMPL BP, $0x00000800 JGE emit_copy_three_repeat_as_copy_encodeBlockAsm MOVB $0x01, DL LEAQ -16(DX)(BX*4), BX MOVB BP, 1(SI) SHRL $0x08, BP SHLL $0x05, BP ORL BP, BX MOVB BL, (SI) ADDQ $0x02, SI JMP repeat_end_emit_encodeBlockAsm emit_copy_three_repeat_as_copy_encodeBlockAsm: MOVB $0x02, DL LEAQ -4(DX)(BX*4), BX MOVB BL, (SI) MOVW BP, 1(SI) ADDQ $0x03, SI repeat_end_emit_encodeBlockAsm: MOVQ SI, dst_base+0(FP) MOVL 16(SP), BX CMPL AX, BX JGT emit_remainder_encodeBlockAsm JMP search_loop_encodeBlockAsm no_repeat_found_encodeBlockAsm: MOVQ $0x0000cf1bbcdcbf9b, R8 MOVQ BP, DI SHRQ $0x10, DI SHLQ $0x10, DI IMULQ R8, DI SHRQ $0x30, DI CMPL (CX)(BX*1), BP SHRQ $0x08, BP JEQ candidate_match_encodeBlockAsm MOVL 32(SP)(DI*1), BX CMPL (CX)(SI*1), BP JEQ candidate2_match_encodeBlockAsm LEAQ 2(AX), SI MOVL SI, 32(SP)(DI*1) SHRQ $0x08, BP CMPL (CX)(BX*1), BP JEQ candidate3_match_encodeBlockAsm MOVL 28(SP), AX JMP search_loop_encodeBlockAsm candidate3_match_encodeBlockAsm: ADDL $0x02, AX JMP candidate_match_encodeBlockAsm candidate2_match_encodeBlockAsm: LEAQ -2(AX), BX MOVL BX, 32(SP)(DI*1) INCL AX MOVL SI, BX candidate_match_encodeBlockAsm: MOVL 20(SP), BP TESTL BX, BX JZ match_extend_back_end_encodeBlockAsm match_extend_back_loop_encodeBlockAsm: CMPL AX, BP JG match_extend_back_end_encodeBlockAsm MOVB -1(CX)(BX*1), DL MOVB -1(CX)(AX*1), SI CMPB DL, SI JNE match_extend_back_end_encodeBlockAsm LEAL -1(AX), AX DECL BX JZ match_extend_back_end_encodeBlockAsm JMP match_extend_back_loop_encodeBlockAsm match_extend_back_end_encodeBlockAsm: MOVL AX, BP SUBL 20(SP), BP LEAQ dst_base+0(FP)(BP*1), BP CMPQ BP, (SP) JL match_dst_size_check_encodeBlockAsm MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeBlockAsm: MOVL BX, BP MOVL 20(SP), SI CMPL SI, BP JEQ emit_literal_skip_match_emit_encodeBlockAsm MOVL BP, DI MOVL BP, 20(SP) LEAQ (CX)(SI*1), BP SUBL SI, DI MOVQ dst_base+0(FP), SI MOVQ DI, R8 SUBL $0x01, R8 JC emit_literal_done_match_emit_encodeBlockAsm CMPL R8, $0x3c JLT one_byte_match_emit_encodeBlockAsm CMPL R8, $0x00000100 JLT two_bytes_match_emit_encodeBlockAsm CMPL R8, $0x00010000 JLT three_bytes_match_emit_encodeBlockAsm CMPL R8, $0x01000000 JLT four_bytes_match_emit_encodeBlockAsm MOVB $0xfc, (SI) MOVL R8, 1(SI) ADDQ $0x05, SI JMP memmove_match_emit_encodeBlockAsm four_bytes_match_emit_encodeBlockAsm: MOVQ R8, R9 SHRL $0x10, R9 MOVB $0xf8, (SI) MOVW R8, 1(SI) MOVB R9, 3(SI) ADDQ $0x04, SI JMP memmove_match_emit_encodeBlockAsm three_bytes_match_emit_encodeBlockAsm: MOVB $0xf4, (SI) MOVW R8, 1(SI) ADDQ $0x03, SI JMP memmove_match_emit_encodeBlockAsm two_bytes_match_emit_encodeBlockAsm: MOVB $0xf0, (SI) MOVB R8, 1(SI) ADDQ $0x02, SI JMP memmove_match_emit_encodeBlockAsm one_byte_match_emit_encodeBlockAsm: SHLB $0x02, R8 MOVB R8, (SI) ADDQ $0x01, SI memmove_match_emit_encodeBlockAsm: LEAQ (SI)(DI*1), R8 NOP emit_lit_memmove_match_emit_encodeBlockAsm_memmove_tail: TESTQ DI, DI JEQ emit_literal_done_match_emit_encodeBlockAsm CMPQ DI, $0x02 JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_1or2 CMPQ DI, $0x04 JB emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_3 JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_4 CMPQ DI, $0x08 JB emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_5through7 JE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8 CMPQ DI, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_9through16 CMPQ DI, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_17through32 CMPQ DI, $0x40 JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_33through64 CMPQ DI, $0x80 JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_65through128 CMPQ DI, $0x00000100 JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_129through256 JMP emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_256through2048 emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_1or2: MOVB (BP), R8 MOVB -1(BP)(DI*1), BP MOVB R8, (SI) MOVB BP, -1(SI)(DI*1) JMP emit_literal_done_match_emit_encodeBlockAsm emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_4: MOVL (BP), R8 MOVL R8, (SI) JMP emit_literal_done_match_emit_encodeBlockAsm emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_3: MOVW (BP), R8 MOVB 2(BP), BP MOVW R8, (SI) MOVB BP, 2(SI) JMP emit_literal_done_match_emit_encodeBlockAsm emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_5through7: MOVL (BP), R8 MOVL -4(BP)(DI*1), BP MOVL R8, (SI) MOVL BP, -4(SI)(DI*1) JMP emit_literal_done_match_emit_encodeBlockAsm emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8: MOVQ (BP), R8 MOVQ R8, (SI) JMP emit_literal_done_match_emit_encodeBlockAsm emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_9through16: MOVQ (BP), R8 MOVQ -8(BP)(DI*1), BP MOVQ R8, (SI) MOVQ BP, -8(SI)(DI*1) JMP emit_literal_done_match_emit_encodeBlockAsm emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_17through32: MOVOU (BP), X0 MOVOU -16(BP)(DI*1), X1 MOVOU X0, (SI) MOVOU X1, -16(SI)(DI*1) JMP emit_literal_done_match_emit_encodeBlockAsm emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_33through64: MOVOU (BP), X0 MOVOU 16(BP), X1 MOVOU -32(BP)(DI*1), X2 MOVOU -16(BP)(DI*1), X3 MOVOU X0, (SI) MOVOU X1, 16(SI) MOVOU X2, -32(SI)(DI*1) MOVOU X3, -16(SI)(DI*1) JMP emit_literal_done_match_emit_encodeBlockAsm emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_65through128: MOVOU (BP), X0 MOVOU 16(BP), X1 MOVOU 32(BP), X2 MOVOU 48(BP), X3 MOVOU -64(BP)(DI*1), X12 MOVOU -48(BP)(DI*1), X13 MOVOU -32(BP)(DI*1), X14 MOVOU -16(BP)(DI*1), X15 MOVOU X0, (SI) MOVOU X1, 16(SI) MOVOU X2, 32(SI) MOVOU X3, 48(SI) MOVOU X12, -64(SI)(DI*1) MOVOU X13, -48(SI)(DI*1) MOVOU X14, -32(SI)(DI*1) MOVOU X15, -16(SI)(DI*1) JMP emit_literal_done_match_emit_encodeBlockAsm emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_129through256: MOVOU (BP), X0 MOVOU 16(BP), X1 MOVOU 32(BP), X2 MOVOU 48(BP), X3 MOVOU 64(BP), X4 MOVOU 80(BP), X5 MOVOU 96(BP), X6 MOVOU 112(BP), X7 MOVOU -128(BP)(DI*1), X8 MOVOU -112(BP)(DI*1), X9 MOVOU -96(BP)(DI*1), X10 MOVOU -80(BP)(DI*1), X11 MOVOU -64(BP)(DI*1), X12 MOVOU -48(BP)(DI*1), X13 MOVOU -32(BP)(DI*1), X14 MOVOU -16(BP)(DI*1), X15 MOVOU X0, (SI) MOVOU X1, 16(SI) MOVOU X2, 32(SI) MOVOU X3, 48(SI) MOVOU X4, 64(SI) MOVOU X5, 80(SI) MOVOU X6, 96(SI) MOVOU X7, 112(SI) MOVOU X8, -128(SI)(DI*1) MOVOU X9, -112(SI)(DI*1) MOVOU X10, -96(SI)(DI*1) MOVOU X11, -80(SI)(DI*1) MOVOU X12, -64(SI)(DI*1) MOVOU X13, -48(SI)(DI*1) MOVOU X14, -32(SI)(DI*1) MOVOU X15, -16(SI)(DI*1) JMP emit_literal_done_match_emit_encodeBlockAsm emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_256through2048: LEAQ -256(DI), DI MOVOU (BP), X0 MOVOU 16(BP), X1 MOVOU 32(BP), X2 MOVOU 48(BP), X3 MOVOU 64(BP), X4 MOVOU 80(BP), X5 MOVOU 96(BP), X6 MOVOU 112(BP), X7 MOVOU 128(BP), X8 MOVOU 144(BP), X9 MOVOU 160(BP), X10 MOVOU 176(BP), X11 MOVOU 192(BP), X12 MOVOU 208(BP), X13 MOVOU 224(BP), X14 MOVOU 240(BP), X15 MOVOU X0, (SI) MOVOU X1, 16(SI) MOVOU X2, 32(SI) MOVOU X3, 48(SI) MOVOU X4, 64(SI) MOVOU X5, 80(SI) MOVOU X6, 96(SI) MOVOU X7, 112(SI) MOVOU X8, 128(SI) MOVOU X9, 144(SI) MOVOU X10, 160(SI) MOVOU X11, 176(SI) MOVOU X12, 192(SI) MOVOU X13, 208(SI) MOVOU X14, 224(SI) MOVOU X15, 240(SI) CMPQ DI, $0x00000100 LEAQ 256(BP), BP LEAQ 256(SI), SI JGE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_256through2048 JMP emit_lit_memmove_match_emit_encodeBlockAsm_memmove_tail MOVQ R8, SI emit_literal_done_match_emit_encodeBlockAsm: MOVQ SI, dst_base+0(FP) emit_literal_skip_match_emit_encodeBlockAsm: NOP match_nolit_loop_encodeBlockAsm: MOVL AX, BP MOVL AX, BP SUBL BX, BP MOVL BP, 24(SP) ADDL $0x04, AX ADDL $0x04, BX MOVL 16(SP), BP SUBL AX, BP XORQ DI, DI CMPQ BP, $0x08 JL matchlen_single_match_nolit_encodeBlockAsm matchlen_loopback_match_nolit_encodeBlockAsm: MOVQ (CX)(DI*1), SI XORQ (CX)(DI*1), SI TESTQ SI, SI JZ matchlen_loop_match_nolit_encodeBlockAsm BSFQ SI, SI SARQ $0x03, SI LEAQ (DI)(SI*1), DI JMP match_nolit_end_encodeBlockAsm matchlen_loop_match_nolit_encodeBlockAsm: LEAQ -8(BP), BP LEAQ 8(DI), DI CMPQ BP, $0x08 JGE matchlen_loopback_match_nolit_encodeBlockAsm matchlen_single_match_nolit_encodeBlockAsm: TESTQ BP, BP JZ match_nolit_end_encodeBlockAsm matchlen_single_loopback_match_nolit_encodeBlockAsm: MOVB (CX)(DI*1), SI CMPB (CX)(DI*1), SI JNE match_nolit_end_encodeBlockAsm LEAQ 1(DI), DI DECQ BP JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm match_nolit_end_encodeBlockAsm: MOVL 24(SP), BP ADDQ $0x04, DI MOVQ dst_base+0(FP), SI ADDL DI, AX CMPL BP, $0x00010000 JL two_byte_offset_match_nolit_encodeBlockAsm CMPL DI, $0x40 JLE four_bytes_remain_match_nolit_encodeBlockAsm MOVB $0xff, (SI) MOVD BP, 1(SI) LEAQ -64(DI), DI ADDQ $0x05, SI CMPL DI, $0x04 JL four_bytes_remain_match_nolit_encodeBlockAsm emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy: MOVQ DI, R8 LEAQ -4(DI), DI CMPL R8, $0x08 JLE repeat_two_match_nolit_encodeBlockAsm_emit_copy CMPL R8, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy CMPL BP, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy: CMPL DI, $0x00000104 JLT repeat_three_match_nolit_encodeBlockAsm_emit_copy CMPL DI, $0x00010100 JLT repeat_four_match_nolit_encodeBlockAsm_emit_copy CMPL DI, $0x0100ffff JLT repeat_five_match_nolit_encodeBlockAsm_emit_copy LEAQ -16842747(DI), DI MOVW $0x001d, (SI) MOVW $0xfffb, 2(SI) MOVB $0xff, 4(SI) ADDQ $0x05, SI JMP emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy repeat_five_match_nolit_encodeBlockAsm_emit_copy: LEAQ -65536(DI), DI MOVQ DI, BP MOVW $0x001d, (SI) MOVW DI, 2(SI) SARQ $0x10, BP MOVB BP, 4(SI) ADDQ $0x05, SI JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_four_match_nolit_encodeBlockAsm_emit_copy: LEAQ -256(DI), DI MOVW $0x0019, (SI) MOVW DI, 2(SI) ADDQ $0x04, SI JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_three_match_nolit_encodeBlockAsm_emit_copy: LEAQ -4(DI), DI MOVW $0x0015, (SI) MOVB DI, 2(SI) ADDQ $0x03, SI JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_two_match_nolit_encodeBlockAsm_emit_copy: SHLL $0x02, DI ORL $0x01, DI MOVW DI, (SI) ADDQ $0x02, SI JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy: XORQ R8, R8 LEAQ 1(R8)(DI*4), DI MOVB BP, 1(SI) SARL $0x08, BP SHLL $0x05, BP ORL BP, DI MOVB DI, (SI) ADDQ $0x02, SI JMP match_nolit_emitcopy_end_encodeBlockAsm four_bytes_remain_match_nolit_encodeBlockAsm: TESTL DI, DI JZ match_nolit_emitcopy_end_encodeBlockAsm MOVB $0x03, DL LEAQ -4(DX)(DI*4), DI MOVB DI, (SI) MOVD BP, 1(SI) ADDQ $0x05, SI JMP match_nolit_emitcopy_end_encodeBlockAsm two_byte_offset_match_nolit_encodeBlockAsm: CMPL DI, $0x40 JLE two_byte_offset_short_match_nolit_encodeBlockAsm MOVB $0xee, (SI) MOVW BP, 1(SI) LEAQ -60(DI), DI ADDQ $0x03, SI emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short: MOVQ DI, R8 LEAQ -4(DI), DI CMPL R8, $0x08 JLE repeat_two_match_nolit_encodeBlockAsm_emit_copy_short CMPL R8, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short CMPL BP, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short: CMPL DI, $0x00000104 JLT repeat_three_match_nolit_encodeBlockAsm_emit_copy_short CMPL DI, $0x00010100 JLT repeat_four_match_nolit_encodeBlockAsm_emit_copy_short CMPL DI, $0x0100ffff JLT repeat_five_match_nolit_encodeBlockAsm_emit_copy_short LEAQ -16842747(DI), DI MOVW $0x001d, (SI) MOVW $0xfffb, 2(SI) MOVB $0xff, 4(SI) ADDQ $0x05, SI JMP emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short repeat_five_match_nolit_encodeBlockAsm_emit_copy_short: LEAQ -65536(DI), DI MOVQ DI, BP MOVW $0x001d, (SI) MOVW DI, 2(SI) SARQ $0x10, BP MOVB BP, 4(SI) ADDQ $0x05, SI JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_four_match_nolit_encodeBlockAsm_emit_copy_short: LEAQ -256(DI), DI MOVW $0x0019, (SI) MOVW DI, 2(SI) ADDQ $0x04, SI JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_three_match_nolit_encodeBlockAsm_emit_copy_short: LEAQ -4(DI), DI MOVW $0x0015, (SI) MOVB DI, 2(SI) ADDQ $0x03, SI JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_two_match_nolit_encodeBlockAsm_emit_copy_short: SHLL $0x02, DI ORL $0x01, DI MOVW DI, (SI) ADDQ $0x02, SI JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short: XORQ R8, R8 LEAQ 1(R8)(DI*4), DI MOVB BP, 1(SI) SARL $0x08, BP SHLL $0x05, BP ORL BP, DI MOVB DI, (SI) ADDQ $0x02, SI JMP match_nolit_emitcopy_end_encodeBlockAsm two_byte_offset_short_match_nolit_encodeBlockAsm: CMPL DI, $0x0c JGE emit_copy_three_match_nolit_encodeBlockAsm CMPL BP, $0x00000800 JGE emit_copy_three_match_nolit_encodeBlockAsm MOVB $0x01, DL LEAQ -16(DX)(DI*4), DI MOVB BP, 1(SI) SHRL $0x08, BP SHLL $0x05, BP ORL BP, DI MOVB DI, (SI) ADDQ $0x02, SI JMP match_nolit_emitcopy_end_encodeBlockAsm emit_copy_three_match_nolit_encodeBlockAsm: MOVB $0x02, DL LEAQ -4(DX)(DI*4), DI MOVB DI, (SI) MOVW BP, 1(SI) ADDQ $0x03, SI match_nolit_emitcopy_end_encodeBlockAsm: MOVQ SI, dst_base+0(FP) MOVL AX, 20(SP) CMPL AX, 16(SP) JGE emit_remainder_encodeBlockAsm CMPQ SI, (SP) JL match_nolit_dst_ok_encodeBlockAsm MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeBlockAsm: MOVQ -2(CX)(AX*1), BP MOVQ $0x0000cf1bbcdcbf9b, SI MOVQ BP, DI SHRQ $0x10, BP MOVQ BP, R8 SHLQ $0x10, DI IMULQ SI, DI SHRQ $0x30, DI SHLQ $0x10, R8 IMULQ SI, R8 SHRQ $0x30, R8 MOVL 32(SP)(DI*1), SI MOVL 32(SP)(R8*1), SI LEAQ -2(AX), SI MOVL SI, 32(SP)(DI*1) MOVL AX, 32(SP)(R8*1) CMPL (CX)(R8*1), BP JEQ match_nolit_loop_encodeBlockAsm INCL AX JMP search_loop_encodeBlockAsm emit_remainder_encodeBlockAsm: MOVQ src_len+32(FP), AX SUBL 20(SP), AX MOVQ dst_base+0(FP), DX LEAQ (DX)(AX*1), DX CMPQ DX, (SP) JL emit_remainder_ok_encodeBlockAsm MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeBlockAsm: MOVQ src_len+32(FP), AX MOVL 20(SP), DX CMPL DX, AX JEQ emit_literal_skip_emit_remainder_encodeBlockAsm MOVL AX, BX MOVL AX, 20(SP) LEAQ (CX)(DX*1), AX SUBL DX, BX MOVQ dst_base+0(FP), CX MOVQ BX, DX SUBL $0x01, DX JC emit_literal_done_emit_remainder_encodeBlockAsm CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeBlockAsm CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeBlockAsm CMPL DX, $0x00010000 JLT three_bytes_emit_remainder_encodeBlockAsm CMPL DX, $0x01000000 JLT four_bytes_emit_remainder_encodeBlockAsm MOVB $0xfc, (CX) MOVL DX, 1(CX) ADDQ $0x05, CX JMP memmove_emit_remainder_encodeBlockAsm four_bytes_emit_remainder_encodeBlockAsm: MOVQ DX, BP SHRL $0x10, BP MOVB $0xf8, (CX) MOVW DX, 1(CX) MOVB BP, 3(CX) ADDQ $0x04, CX JMP memmove_emit_remainder_encodeBlockAsm three_bytes_emit_remainder_encodeBlockAsm: MOVB $0xf4, (CX) MOVW DX, 1(CX) ADDQ $0x03, CX JMP memmove_emit_remainder_encodeBlockAsm two_bytes_emit_remainder_encodeBlockAsm: MOVB $0xf0, (CX) MOVB DL, 1(CX) ADDQ $0x02, CX JMP memmove_emit_remainder_encodeBlockAsm one_byte_emit_remainder_encodeBlockAsm: SHLB $0x02, DL MOVB DL, (CX) ADDQ $0x01, CX memmove_emit_remainder_encodeBlockAsm: LEAQ (CX)(BX*1), DX NOP emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_tail: TESTQ BX, BX JEQ emit_literal_done_emit_remainder_encodeBlockAsm CMPQ BX, $0x02 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_1or2 CMPQ BX, $0x04 JB emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_3 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_4 CMPQ BX, $0x08 JB emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_5through7 JE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_8 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_9through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_17through32 CMPQ BX, $0x40 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_33through64 CMPQ BX, $0x80 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_65through128 CMPQ BX, $0x00000100 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_129through256 JMP emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_256through2048 emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_1or2: MOVB (AX), DL MOVB -1(AX)(BX*1), AL MOVB DL, (CX) MOVB AL, -1(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_4: MOVL (AX), DX MOVL DX, (CX) JMP emit_literal_done_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_3: MOVW (AX), DX MOVB 2(AX), AL MOVW DX, (CX) MOVB AL, 2(CX) JMP emit_literal_done_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_5through7: MOVL (AX), DX MOVL -4(AX)(BX*1), AX MOVL DX, (CX) MOVL AX, -4(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_8: MOVQ (AX), DX MOVQ DX, (CX) JMP emit_literal_done_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_9through16: MOVQ (AX), DX MOVQ -8(AX)(BX*1), AX MOVQ DX, (CX) MOVQ AX, -8(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_17through32: MOVOU (AX), X0 MOVOU -16(AX)(BX*1), X1 MOVOU X0, (CX) MOVOU X1, -16(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_33through64: MOVOU (AX), X0 MOVOU 16(AX), X1 MOVOU -32(AX)(BX*1), X2 MOVOU -16(AX)(BX*1), X3 MOVOU X0, (CX) MOVOU X1, 16(CX) MOVOU X2, -32(CX)(BX*1) MOVOU X3, -16(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_65through128: MOVOU (AX), X0 MOVOU 16(AX), X1 MOVOU 32(AX), X2 MOVOU 48(AX), X3 MOVOU -64(AX)(BX*1), X12 MOVOU -48(AX)(BX*1), X13 MOVOU -32(AX)(BX*1), X14 MOVOU -16(AX)(BX*1), X15 MOVOU X0, (CX) MOVOU X1, 16(CX) MOVOU X2, 32(CX) MOVOU X3, 48(CX) MOVOU X12, -64(CX)(BX*1) MOVOU X13, -48(CX)(BX*1) MOVOU X14, -32(CX)(BX*1) MOVOU X15, -16(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_129through256: MOVOU (AX), X0 MOVOU 16(AX), X1 MOVOU 32(AX), X2 MOVOU 48(AX), X3 MOVOU 64(AX), X4 MOVOU 80(AX), X5 MOVOU 96(AX), X6 MOVOU 112(AX), X7 MOVOU -128(AX)(BX*1), X8 MOVOU -112(AX)(BX*1), X9 MOVOU -96(AX)(BX*1), X10 MOVOU -80(AX)(BX*1), X11 MOVOU -64(AX)(BX*1), X12 MOVOU -48(AX)(BX*1), X13 MOVOU -32(AX)(BX*1), X14 MOVOU -16(AX)(BX*1), X15 MOVOU X0, (CX) MOVOU X1, 16(CX) MOVOU X2, 32(CX) MOVOU X3, 48(CX) MOVOU X4, 64(CX) MOVOU X5, 80(CX) MOVOU X6, 96(CX) MOVOU X7, 112(CX) MOVOU X8, -128(CX)(BX*1) MOVOU X9, -112(CX)(BX*1) MOVOU X10, -96(CX)(BX*1) MOVOU X11, -80(CX)(BX*1) MOVOU X12, -64(CX)(BX*1) MOVOU X13, -48(CX)(BX*1) MOVOU X14, -32(CX)(BX*1) MOVOU X15, -16(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_256through2048: LEAQ -256(BX), BX MOVOU (AX), X0 MOVOU 16(AX), X1 MOVOU 32(AX), X2 MOVOU 48(AX), X3 MOVOU 64(AX), X4 MOVOU 80(AX), X5 MOVOU 96(AX), X6 MOVOU 112(AX), X7 MOVOU 128(AX), X8 MOVOU 144(AX), X9 MOVOU 160(AX), X10 MOVOU 176(AX), X11 MOVOU 192(AX), X12 MOVOU 208(AX), X13 MOVOU 224(AX), X14 MOVOU 240(AX), X15 MOVOU X0, (CX) MOVOU X1, 16(CX) MOVOU X2, 32(CX) MOVOU X3, 48(CX) MOVOU X4, 64(CX) MOVOU X5, 80(CX) MOVOU X6, 96(CX) MOVOU X7, 112(CX) MOVOU X8, 128(CX) MOVOU X9, 144(CX) MOVOU X10, 160(CX) MOVOU X11, 176(CX) MOVOU X12, 192(CX) MOVOU X13, 208(CX) MOVOU X14, 224(CX) MOVOU X15, 240(CX) CMPQ BX, $0x00000100 LEAQ 256(AX), AX LEAQ 256(CX), CX JGE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_256through2048 JMP emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_tail MOVQ DX, CX emit_literal_done_emit_remainder_encodeBlockAsm: MOVQ CX, dst_base+0(FP) emit_literal_skip_emit_remainder_encodeBlockAsm: MOVQ 8(SP), AX SUBQ dst_base+0(FP), AX MOVQ AX, ret+48(FP) RET // func encodeBlockAsm14B(dst []byte, src []byte) int // Requires: SSE2 TEXT ·encodeBlockAsm14B(SB), $16416-56 MOVQ $0x00000080, AX LEAQ 32(SP), CX PXOR X0, X0 zero_loop_encodeBlockAsm14B: MOVOU X0, (CX) MOVOU X0, 16(CX) MOVOU X0, 32(CX) MOVOU X0, 48(CX) MOVOU X0, 64(CX) MOVOU X0, 80(CX) MOVOU X0, 96(CX) MOVOU X0, 112(CX) ADDQ $0x80, CX DECQ AX JNZ zero_loop_encodeBlockAsm14B MOVL AX, 20(SP) MOVQ src_len+32(FP), AX LEAQ -5(AX), CX LEAQ -8(AX), BX SHRQ $0x05, AX SUBL AX, CX MOVL BX, 16(SP) MOVQ dst_base+0(FP), AX MOVQ AX, 8(SP) LEAQ (AX)(CX*1), CX MOVQ CX, (SP) MOVL $0x00000001, AX MOVL AX, 24(SP) MOVQ src_base+24(FP), CX search_loop_encodeBlockAsm14B: MOVQ (CX)(AX*1), BP MOVL AX, BX SUBL 20(SP), BX SHRL $0x05, BX LEAQ 4(AX)(BX*1), BX MOVL 16(SP), SI CMPL BX, SI JGT emit_remainder_encodeBlockAsm14B MOVL BX, 28(SP) MOVQ $0x0000cf1bbcdcbf9b, BX MOVQ BP, DI MOVQ BP, R8 SHRQ $0x08, R8 SHLQ $0x10, DI IMULQ BX, DI SHRQ $0x32, DI SHLQ $0x10, R8 IMULQ BX, R8 SHRQ $0x32, R8 MOVL 32(SP)(DI*1), BX MOVL 32(SP)(R8*1), SI MOVL AX, 32(SP)(DI*1) LEAL 1(AX), DI MOVL DI, 32(SP)(R8*1) MOVL AX, DI SUBL 24(SP), DI MOVL 1(CX)(DI*1), R9 MOVQ BP, R8 SHLQ $0x08, R8 CMPL R8, R9 JNE no_repeat_found_encodeBlockAsm14B LEAQ 1(AX), BP MOVL 20(SP), BX TESTL DI, DI JZ repeat_extend_back_end_encodeBlockAsm14B repeat_extend_back_loop_encodeBlockAsm14B: CMPL BP, BX JG repeat_extend_back_end_encodeBlockAsm14B MOVB -1(CX)(DI*1), DL MOVB -1(CX)(BP*1), SI CMPB DL, SI JNE repeat_extend_back_end_encodeBlockAsm14B LEAQ -1(BP), BP DECL DI JZ repeat_extend_back_end_encodeBlockAsm14B JMP repeat_extend_back_loop_encodeBlockAsm14B repeat_extend_back_end_encodeBlockAsm14B: MOVL 20(SP), BX CMPL BX, BP JEQ emit_literal_skip_repeat_emit_encodeBlockAsm14B MOVL BP, SI MOVL BP, 20(SP) LEAQ (CX)(BX*1), DI SUBL BX, SI MOVQ dst_base+0(FP), BX MOVQ SI, R8 SUBL $0x01, R8 JC emit_literal_done_repeat_emit_encodeBlockAsm14B CMPL R8, $0x3c JLT one_byte_repeat_emit_encodeBlockAsm14B CMPL R8, $0x00000100 JLT two_bytes_repeat_emit_encodeBlockAsm14B CMPL R8, $0x00010000 JLT three_bytes_repeat_emit_encodeBlockAsm14B CMPL R8, $0x01000000 JLT four_bytes_repeat_emit_encodeBlockAsm14B MOVB $0xfc, (BX) MOVL R8, 1(BX) ADDQ $0x05, BX JMP memmove_repeat_emit_encodeBlockAsm14B four_bytes_repeat_emit_encodeBlockAsm14B: MOVQ R8, R9 SHRL $0x10, R9 MOVB $0xf8, (BX) MOVW R8, 1(BX) MOVB R9, 3(BX) ADDQ $0x04, BX JMP memmove_repeat_emit_encodeBlockAsm14B three_bytes_repeat_emit_encodeBlockAsm14B: MOVB $0xf4, (BX) MOVW R8, 1(BX) ADDQ $0x03, BX JMP memmove_repeat_emit_encodeBlockAsm14B two_bytes_repeat_emit_encodeBlockAsm14B: MOVB $0xf0, (BX) MOVB R8, 1(BX) ADDQ $0x02, BX JMP memmove_repeat_emit_encodeBlockAsm14B one_byte_repeat_emit_encodeBlockAsm14B: SHLB $0x02, R8 MOVB R8, (BX) ADDQ $0x01, BX memmove_repeat_emit_encodeBlockAsm14B: LEAQ (BX)(SI*1), R8 NOP emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_tail: TESTQ SI, SI JEQ emit_literal_done_repeat_emit_encodeBlockAsm14B CMPQ SI, $0x02 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_1or2 CMPQ SI, $0x04 JB emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_3 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_4 CMPQ SI, $0x08 JB emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_5through7 JE emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_8 CMPQ SI, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_9through16 CMPQ SI, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_17through32 CMPQ SI, $0x40 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_33through64 CMPQ SI, $0x80 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_65through128 CMPQ SI, $0x00000100 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_129through256 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_256through2048 emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_1or2: MOVB (DI), R8 MOVB -1(DI)(SI*1), DI MOVB R8, (BX) MOVB DI, -1(BX)(SI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm14B emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_4: MOVL (DI), R8 MOVL R8, (BX) JMP emit_literal_done_repeat_emit_encodeBlockAsm14B emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_3: MOVW (DI), R8 MOVB 2(DI), DI MOVW R8, (BX) MOVB DI, 2(BX) JMP emit_literal_done_repeat_emit_encodeBlockAsm14B emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_5through7: MOVL (DI), R8 MOVL -4(DI)(SI*1), DI MOVL R8, (BX) MOVL DI, -4(BX)(SI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm14B emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_8: MOVQ (DI), R8 MOVQ R8, (BX) JMP emit_literal_done_repeat_emit_encodeBlockAsm14B emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_9through16: MOVQ (DI), R8 MOVQ -8(DI)(SI*1), DI MOVQ R8, (BX) MOVQ DI, -8(BX)(SI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm14B emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_17through32: MOVOU (DI), X0 MOVOU -16(DI)(SI*1), X1 MOVOU X0, (BX) MOVOU X1, -16(BX)(SI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm14B emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_33through64: MOVOU (DI), X0 MOVOU 16(DI), X1 MOVOU -32(DI)(SI*1), X2 MOVOU -16(DI)(SI*1), X3 MOVOU X0, (BX) MOVOU X1, 16(BX) MOVOU X2, -32(BX)(SI*1) MOVOU X3, -16(BX)(SI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm14B emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_65through128: MOVOU (DI), X0 MOVOU 16(DI), X1 MOVOU 32(DI), X2 MOVOU 48(DI), X3 MOVOU -64(DI)(SI*1), X12 MOVOU -48(DI)(SI*1), X13 MOVOU -32(DI)(SI*1), X14 MOVOU -16(DI)(SI*1), X15 MOVOU X0, (BX) MOVOU X1, 16(BX) MOVOU X2, 32(BX) MOVOU X3, 48(BX) MOVOU X12, -64(BX)(SI*1) MOVOU X13, -48(BX)(SI*1) MOVOU X14, -32(BX)(SI*1) MOVOU X15, -16(BX)(SI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm14B emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_129through256: MOVOU (DI), X0 MOVOU 16(DI), X1 MOVOU 32(DI), X2 MOVOU 48(DI), X3 MOVOU 64(DI), X4 MOVOU 80(DI), X5 MOVOU 96(DI), X6 MOVOU 112(DI), X7 MOVOU -128(DI)(SI*1), X8 MOVOU -112(DI)(SI*1), X9 MOVOU -96(DI)(SI*1), X10 MOVOU -80(DI)(SI*1), X11 MOVOU -64(DI)(SI*1), X12 MOVOU -48(DI)(SI*1), X13 MOVOU -32(DI)(SI*1), X14 MOVOU -16(DI)(SI*1), X15 MOVOU X0, (BX) MOVOU X1, 16(BX) MOVOU X2, 32(BX) MOVOU X3, 48(BX) MOVOU X4, 64(BX) MOVOU X5, 80(BX) MOVOU X6, 96(BX) MOVOU X7, 112(BX) MOVOU X8, -128(BX)(SI*1) MOVOU X9, -112(BX)(SI*1) MOVOU X10, -96(BX)(SI*1) MOVOU X11, -80(BX)(SI*1) MOVOU X12, -64(BX)(SI*1) MOVOU X13, -48(BX)(SI*1) MOVOU X14, -32(BX)(SI*1) MOVOU X15, -16(BX)(SI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm14B emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_256through2048: LEAQ -256(SI), SI MOVOU (DI), X0 MOVOU 16(DI), X1 MOVOU 32(DI), X2 MOVOU 48(DI), X3 MOVOU 64(DI), X4 MOVOU 80(DI), X5 MOVOU 96(DI), X6 MOVOU 112(DI), X7 MOVOU 128(DI), X8 MOVOU 144(DI), X9 MOVOU 160(DI), X10 MOVOU 176(DI), X11 MOVOU 192(DI), X12 MOVOU 208(DI), X13 MOVOU 224(DI), X14 MOVOU 240(DI), X15 MOVOU X0, (BX) MOVOU X1, 16(BX) MOVOU X2, 32(BX) MOVOU X3, 48(BX) MOVOU X4, 64(BX) MOVOU X5, 80(BX) MOVOU X6, 96(BX) MOVOU X7, 112(BX) MOVOU X8, 128(BX) MOVOU X9, 144(BX) MOVOU X10, 160(BX) MOVOU X11, 176(BX) MOVOU X12, 192(BX) MOVOU X13, 208(BX) MOVOU X14, 224(BX) MOVOU X15, 240(BX) CMPQ SI, $0x00000100 LEAQ 256(DI), DI LEAQ 256(BX), BX JGE emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_move_256through2048 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm14B_memmove_tail MOVQ R8, BX emit_literal_done_repeat_emit_encodeBlockAsm14B: MOVQ BX, dst_base+0(FP) emit_literal_skip_repeat_emit_encodeBlockAsm14B: ADDL $0x05, AX MOVL AX, BX SUBL 24(SP), BX MOVL 16(SP), BX SUBL AX, BX XORQ DI, DI CMPQ BX, $0x08 JL matchlen_single_repeat_extend matchlen_loopback_repeat_extend: MOVQ (CX)(DI*1), SI XORQ (CX)(DI*1), SI TESTQ SI, SI JZ matchlen_loop_repeat_extend BSFQ SI, SI SARQ $0x03, SI LEAQ (DI)(SI*1), DI JMP repeat_extend_forward_end_encodeBlockAsm14B matchlen_loop_repeat_extend: LEAQ -8(BX), BX LEAQ 8(DI), DI CMPQ BX, $0x08 JGE matchlen_loopback_repeat_extend matchlen_single_repeat_extend: TESTQ BX, BX JZ repeat_extend_forward_end_encodeBlockAsm14B matchlen_single_loopback_repeat_extend: MOVB (CX)(DI*1), SI CMPB (CX)(DI*1), SI JNE repeat_extend_forward_end_encodeBlockAsm14B LEAQ 1(DI), DI DECQ BX JNZ matchlen_single_loopback_repeat_extend repeat_extend_forward_end_encodeBlockAsm14B: ADDL DI, AX MOVL AX, BX SUBL BP, BX MOVL 24(SP), BP MOVQ dst_base+0(FP), SI MOVL 20(SP), DI TESTL DI, DI JZ repeat_as_copy_encodeBlockAsm14B emit_repeat_again_match_repeat_: MOVQ BX, DI LEAQ -4(BX), BX CMPL DI, $0x08 JLE repeat_two_match_repeat_ CMPL DI, $0x0c JGE cant_repeat_two_offset_match_repeat_ CMPL BP, $0x00000800 JLT repeat_two_offset_match_repeat_ cant_repeat_two_offset_match_repeat_: CMPL BX, $0x00000104 JLT repeat_three_match_repeat_ CMPL BX, $0x00010100 JLT repeat_four_match_repeat_ CMPL BX, $0x0100ffff JLT repeat_five_match_repeat_ LEAQ -16842747(BX), BX MOVW $0x001d, (SI) MOVW $0xfffb, 2(SI) MOVB $0xff, 4(SI) ADDQ $0x05, SI JMP emit_repeat_again_match_repeat_ repeat_five_match_repeat_: LEAQ -65536(BX), BX MOVQ BX, BP MOVW $0x001d, (SI) MOVW BX, 2(SI) SARQ $0x10, BP MOVB BP, 4(SI) ADDQ $0x05, SI JMP repeat_end_emit_encodeBlockAsm14B repeat_four_match_repeat_: LEAQ -256(BX), BX MOVW $0x0019, (SI) MOVW BX, 2(SI) ADDQ $0x04, SI JMP repeat_end_emit_encodeBlockAsm14B repeat_three_match_repeat_: LEAQ -4(BX), BX MOVW $0x0015, (SI) MOVB BL, 2(SI) ADDQ $0x03, SI JMP repeat_end_emit_encodeBlockAsm14B repeat_two_match_repeat_: SHLL $0x02, BX ORL $0x01, BX MOVW BX, (SI) ADDQ $0x02, SI JMP repeat_end_emit_encodeBlockAsm14B repeat_two_offset_match_repeat_: XORQ DI, DI LEAQ 1(DI)(BX*4), BX MOVB BP, 1(SI) SARL $0x08, BP SHLL $0x05, BP ORL BP, BX MOVB BL, (SI) ADDQ $0x02, SI JMP repeat_end_emit_encodeBlockAsm14B repeat_as_copy_encodeBlockAsm14B: CMPL BP, $0x00010000 JL two_byte_offset_repeat_as_copy_encodeBlockAsm14B CMPL BX, $0x40 JLE four_bytes_remain_repeat_as_copy_encodeBlockAsm14B MOVB $0xff, (SI) MOVD BP, 1(SI) LEAQ -64(BX), BX ADDQ $0x05, SI CMPL BX, $0x04 JL four_bytes_remain_repeat_as_copy_encodeBlockAsm14B emit_repeat_again_repeat_as_copy_encodeBlockAsm14B_emit_copy: MOVQ BX, DI LEAQ -4(BX), BX CMPL DI, $0x08 JLE repeat_two_repeat_as_copy_encodeBlockAsm14B_emit_copy CMPL DI, $0x0c JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm14B_emit_copy CMPL BP, $0x00000800 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm14B_emit_copy cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm14B_emit_copy: CMPL BX, $0x00000104 JLT repeat_three_repeat_as_copy_encodeBlockAsm14B_emit_copy CMPL BX, $0x00010100 JLT repeat_four_repeat_as_copy_encodeBlockAsm14B_emit_copy CMPL BX, $0x0100ffff JLT repeat_five_repeat_as_copy_encodeBlockAsm14B_emit_copy LEAQ -16842747(BX), BX MOVW $0x001d, (SI) MOVW $0xfffb, 2(SI) MOVB $0xff, 4(SI) ADDQ $0x05, SI JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm14B_emit_copy repeat_five_repeat_as_copy_encodeBlockAsm14B_emit_copy: LEAQ -65536(BX), BX MOVQ BX, BP MOVW $0x001d, (SI) MOVW BX, 2(SI) SARQ $0x10, BP MOVB BP, 4(SI) ADDQ $0x05, SI JMP repeat_end_emit_encodeBlockAsm14B repeat_four_repeat_as_copy_encodeBlockAsm14B_emit_copy: LEAQ -256(BX), BX MOVW $0x0019, (SI) MOVW BX, 2(SI) ADDQ $0x04, SI JMP repeat_end_emit_encodeBlockAsm14B repeat_three_repeat_as_copy_encodeBlockAsm14B_emit_copy: LEAQ -4(BX), BX MOVW $0x0015, (SI) MOVB BL, 2(SI) ADDQ $0x03, SI JMP repeat_end_emit_encodeBlockAsm14B repeat_two_repeat_as_copy_encodeBlockAsm14B_emit_copy: SHLL $0x02, BX ORL $0x01, BX MOVW BX, (SI) ADDQ $0x02, SI JMP repeat_end_emit_encodeBlockAsm14B repeat_two_offset_repeat_as_copy_encodeBlockAsm14B_emit_copy: XORQ DI, DI LEAQ 1(DI)(BX*4), BX MOVB BP, 1(SI) SARL $0x08, BP SHLL $0x05, BP ORL BP, BX MOVB BL, (SI) ADDQ $0x02, SI JMP repeat_end_emit_encodeBlockAsm14B four_bytes_remain_repeat_as_copy_encodeBlockAsm14B: TESTL BX, BX JZ repeat_end_emit_encodeBlockAsm14B MOVB $0x03, DL LEAQ -4(DX)(BX*4), BX MOVB BL, (SI) MOVD BP, 1(SI) ADDQ $0x05, SI JMP repeat_end_emit_encodeBlockAsm14B two_byte_offset_repeat_as_copy_encodeBlockAsm14B: CMPL BX, $0x40 JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm14B MOVB $0xee, (SI) MOVW BP, 1(SI) LEAQ -60(BX), BX ADDQ $0x03, SI emit_repeat_again_repeat_as_copy_encodeBlockAsm14B_emit_copy_short: MOVQ BX, DI LEAQ -4(BX), BX CMPL DI, $0x08 JLE repeat_two_repeat_as_copy_encodeBlockAsm14B_emit_copy_short CMPL DI, $0x0c JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm14B_emit_copy_short CMPL BP, $0x00000800 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm14B_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm14B_emit_copy_short: CMPL BX, $0x00000104 JLT repeat_three_repeat_as_copy_encodeBlockAsm14B_emit_copy_short CMPL BX, $0x00010100 JLT repeat_four_repeat_as_copy_encodeBlockAsm14B_emit_copy_short CMPL BX, $0x0100ffff JLT repeat_five_repeat_as_copy_encodeBlockAsm14B_emit_copy_short LEAQ -16842747(BX), BX MOVW $0x001d, (SI) MOVW $0xfffb, 2(SI) MOVB $0xff, 4(SI) ADDQ $0x05, SI JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm14B_emit_copy_short repeat_five_repeat_as_copy_encodeBlockAsm14B_emit_copy_short: LEAQ -65536(BX), BX MOVQ BX, BP MOVW $0x001d, (SI) MOVW BX, 2(SI) SARQ $0x10, BP MOVB BP, 4(SI) ADDQ $0x05, SI JMP repeat_end_emit_encodeBlockAsm14B repeat_four_repeat_as_copy_encodeBlockAsm14B_emit_copy_short: LEAQ -256(BX), BX MOVW $0x0019, (SI) MOVW BX, 2(SI) ADDQ $0x04, SI JMP repeat_end_emit_encodeBlockAsm14B repeat_three_repeat_as_copy_encodeBlockAsm14B_emit_copy_short: LEAQ -4(BX), BX MOVW $0x0015, (SI) MOVB BL, 2(SI) ADDQ $0x03, SI JMP repeat_end_emit_encodeBlockAsm14B repeat_two_repeat_as_copy_encodeBlockAsm14B_emit_copy_short: SHLL $0x02, BX ORL $0x01, BX MOVW BX, (SI) ADDQ $0x02, SI JMP repeat_end_emit_encodeBlockAsm14B repeat_two_offset_repeat_as_copy_encodeBlockAsm14B_emit_copy_short: XORQ DI, DI LEAQ 1(DI)(BX*4), BX MOVB BP, 1(SI) SARL $0x08, BP SHLL $0x05, BP ORL BP, BX MOVB BL, (SI) ADDQ $0x02, SI JMP repeat_end_emit_encodeBlockAsm14B two_byte_offset_short_repeat_as_copy_encodeBlockAsm14B: CMPL BX, $0x0c JGE emit_copy_three_repeat_as_copy_encodeBlockAsm14B CMPL BP, $0x00000800 JGE emit_copy_three_repeat_as_copy_encodeBlockAsm14B MOVB $0x01, DL LEAQ -16(DX)(BX*4), BX MOVB BP, 1(SI) SHRL $0x08, BP SHLL $0x05, BP ORL BP, BX MOVB BL, (SI) ADDQ $0x02, SI JMP repeat_end_emit_encodeBlockAsm14B emit_copy_three_repeat_as_copy_encodeBlockAsm14B: MOVB $0x02, DL LEAQ -4(DX)(BX*4), BX MOVB BL, (SI) MOVW BP, 1(SI) ADDQ $0x03, SI repeat_end_emit_encodeBlockAsm14B: MOVQ SI, dst_base+0(FP) MOVL 16(SP), BX CMPL AX, BX JGT emit_remainder_encodeBlockAsm14B JMP search_loop_encodeBlockAsm14B no_repeat_found_encodeBlockAsm14B: MOVQ $0x0000cf1bbcdcbf9b, R8 MOVQ BP, DI SHRQ $0x10, DI SHLQ $0x10, DI IMULQ R8, DI SHRQ $0x32, DI CMPL (CX)(BX*1), BP SHRQ $0x08, BP JEQ candidate_match_encodeBlockAsm14B MOVL 32(SP)(DI*1), BX CMPL (CX)(SI*1), BP JEQ candidate2_match_encodeBlockAsm14B LEAQ 2(AX), SI MOVL SI, 32(SP)(DI*1) SHRQ $0x08, BP CMPL (CX)(BX*1), BP JEQ candidate3_match_encodeBlockAsm14B MOVL 28(SP), AX JMP search_loop_encodeBlockAsm14B candidate3_match_encodeBlockAsm14B: ADDL $0x02, AX JMP candidate_match_encodeBlockAsm14B candidate2_match_encodeBlockAsm14B: LEAQ -2(AX), BX MOVL BX, 32(SP)(DI*1) INCL AX MOVL SI, BX candidate_match_encodeBlockAsm14B: MOVL 20(SP), BP TESTL BX, BX JZ match_extend_back_end_encodeBlockAsm14B match_extend_back_loop_encodeBlockAsm14B: CMPL AX, BP JG match_extend_back_end_encodeBlockAsm14B MOVB -1(CX)(BX*1), DL MOVB -1(CX)(AX*1), SI CMPB DL, SI JNE match_extend_back_end_encodeBlockAsm14B LEAL -1(AX), AX DECL BX JZ match_extend_back_end_encodeBlockAsm14B JMP match_extend_back_loop_encodeBlockAsm14B match_extend_back_end_encodeBlockAsm14B: MOVL AX, BP SUBL 20(SP), BP LEAQ dst_base+0(FP)(BP*1), BP CMPQ BP, (SP) JL match_dst_size_check_encodeBlockAsm14B MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeBlockAsm14B: MOVL BX, BP MOVL 20(SP), SI CMPL SI, BP JEQ emit_literal_skip_match_emit_encodeBlockAsm14B MOVL BP, DI MOVL BP, 20(SP) LEAQ (CX)(SI*1), BP SUBL SI, DI MOVQ dst_base+0(FP), SI MOVQ DI, R8 SUBL $0x01, R8 JC emit_literal_done_match_emit_encodeBlockAsm14B CMPL R8, $0x3c JLT one_byte_match_emit_encodeBlockAsm14B CMPL R8, $0x00000100 JLT two_bytes_match_emit_encodeBlockAsm14B CMPL R8, $0x00010000 JLT three_bytes_match_emit_encodeBlockAsm14B CMPL R8, $0x01000000 JLT four_bytes_match_emit_encodeBlockAsm14B MOVB $0xfc, (SI) MOVL R8, 1(SI) ADDQ $0x05, SI JMP memmove_match_emit_encodeBlockAsm14B four_bytes_match_emit_encodeBlockAsm14B: MOVQ R8, R9 SHRL $0x10, R9 MOVB $0xf8, (SI) MOVW R8, 1(SI) MOVB R9, 3(SI) ADDQ $0x04, SI JMP memmove_match_emit_encodeBlockAsm14B three_bytes_match_emit_encodeBlockAsm14B: MOVB $0xf4, (SI) MOVW R8, 1(SI) ADDQ $0x03, SI JMP memmove_match_emit_encodeBlockAsm14B two_bytes_match_emit_encodeBlockAsm14B: MOVB $0xf0, (SI) MOVB R8, 1(SI) ADDQ $0x02, SI JMP memmove_match_emit_encodeBlockAsm14B one_byte_match_emit_encodeBlockAsm14B: SHLB $0x02, R8 MOVB R8, (SI) ADDQ $0x01, SI memmove_match_emit_encodeBlockAsm14B: LEAQ (SI)(DI*1), R8 NOP emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_tail: TESTQ DI, DI JEQ emit_literal_done_match_emit_encodeBlockAsm14B CMPQ DI, $0x02 JBE emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_1or2 CMPQ DI, $0x04 JB emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_3 JBE emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_4 CMPQ DI, $0x08 JB emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_5through7 JE emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_8 CMPQ DI, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_9through16 CMPQ DI, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_17through32 CMPQ DI, $0x40 JBE emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_33through64 CMPQ DI, $0x80 JBE emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_65through128 CMPQ DI, $0x00000100 JBE emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_129through256 JMP emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_256through2048 emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_1or2: MOVB (BP), R8 MOVB -1(BP)(DI*1), BP MOVB R8, (SI) MOVB BP, -1(SI)(DI*1) JMP emit_literal_done_match_emit_encodeBlockAsm14B emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_4: MOVL (BP), R8 MOVL R8, (SI) JMP emit_literal_done_match_emit_encodeBlockAsm14B emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_3: MOVW (BP), R8 MOVB 2(BP), BP MOVW R8, (SI) MOVB BP, 2(SI) JMP emit_literal_done_match_emit_encodeBlockAsm14B emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_5through7: MOVL (BP), R8 MOVL -4(BP)(DI*1), BP MOVL R8, (SI) MOVL BP, -4(SI)(DI*1) JMP emit_literal_done_match_emit_encodeBlockAsm14B emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_8: MOVQ (BP), R8 MOVQ R8, (SI) JMP emit_literal_done_match_emit_encodeBlockAsm14B emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_9through16: MOVQ (BP), R8 MOVQ -8(BP)(DI*1), BP MOVQ R8, (SI) MOVQ BP, -8(SI)(DI*1) JMP emit_literal_done_match_emit_encodeBlockAsm14B emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_17through32: MOVOU (BP), X0 MOVOU -16(BP)(DI*1), X1 MOVOU X0, (SI) MOVOU X1, -16(SI)(DI*1) JMP emit_literal_done_match_emit_encodeBlockAsm14B emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_33through64: MOVOU (BP), X0 MOVOU 16(BP), X1 MOVOU -32(BP)(DI*1), X2 MOVOU -16(BP)(DI*1), X3 MOVOU X0, (SI) MOVOU X1, 16(SI) MOVOU X2, -32(SI)(DI*1) MOVOU X3, -16(SI)(DI*1) JMP emit_literal_done_match_emit_encodeBlockAsm14B emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_65through128: MOVOU (BP), X0 MOVOU 16(BP), X1 MOVOU 32(BP), X2 MOVOU 48(BP), X3 MOVOU -64(BP)(DI*1), X12 MOVOU -48(BP)(DI*1), X13 MOVOU -32(BP)(DI*1), X14 MOVOU -16(BP)(DI*1), X15 MOVOU X0, (SI) MOVOU X1, 16(SI) MOVOU X2, 32(SI) MOVOU X3, 48(SI) MOVOU X12, -64(SI)(DI*1) MOVOU X13, -48(SI)(DI*1) MOVOU X14, -32(SI)(DI*1) MOVOU X15, -16(SI)(DI*1) JMP emit_literal_done_match_emit_encodeBlockAsm14B emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_129through256: MOVOU (BP), X0 MOVOU 16(BP), X1 MOVOU 32(BP), X2 MOVOU 48(BP), X3 MOVOU 64(BP), X4 MOVOU 80(BP), X5 MOVOU 96(BP), X6 MOVOU 112(BP), X7 MOVOU -128(BP)(DI*1), X8 MOVOU -112(BP)(DI*1), X9 MOVOU -96(BP)(DI*1), X10 MOVOU -80(BP)(DI*1), X11 MOVOU -64(BP)(DI*1), X12 MOVOU -48(BP)(DI*1), X13 MOVOU -32(BP)(DI*1), X14 MOVOU -16(BP)(DI*1), X15 MOVOU X0, (SI) MOVOU X1, 16(SI) MOVOU X2, 32(SI) MOVOU X3, 48(SI) MOVOU X4, 64(SI) MOVOU X5, 80(SI) MOVOU X6, 96(SI) MOVOU X7, 112(SI) MOVOU X8, -128(SI)(DI*1) MOVOU X9, -112(SI)(DI*1) MOVOU X10, -96(SI)(DI*1) MOVOU X11, -80(SI)(DI*1) MOVOU X12, -64(SI)(DI*1) MOVOU X13, -48(SI)(DI*1) MOVOU X14, -32(SI)(DI*1) MOVOU X15, -16(SI)(DI*1) JMP emit_literal_done_match_emit_encodeBlockAsm14B emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_256through2048: LEAQ -256(DI), DI MOVOU (BP), X0 MOVOU 16(BP), X1 MOVOU 32(BP), X2 MOVOU 48(BP), X3 MOVOU 64(BP), X4 MOVOU 80(BP), X5 MOVOU 96(BP), X6 MOVOU 112(BP), X7 MOVOU 128(BP), X8 MOVOU 144(BP), X9 MOVOU 160(BP), X10 MOVOU 176(BP), X11 MOVOU 192(BP), X12 MOVOU 208(BP), X13 MOVOU 224(BP), X14 MOVOU 240(BP), X15 MOVOU X0, (SI) MOVOU X1, 16(SI) MOVOU X2, 32(SI) MOVOU X3, 48(SI) MOVOU X4, 64(SI) MOVOU X5, 80(SI) MOVOU X6, 96(SI) MOVOU X7, 112(SI) MOVOU X8, 128(SI) MOVOU X9, 144(SI) MOVOU X10, 160(SI) MOVOU X11, 176(SI) MOVOU X12, 192(SI) MOVOU X13, 208(SI) MOVOU X14, 224(SI) MOVOU X15, 240(SI) CMPQ DI, $0x00000100 LEAQ 256(BP), BP LEAQ 256(SI), SI JGE emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_move_256through2048 JMP emit_lit_memmove_match_emit_encodeBlockAsm14B_memmove_tail MOVQ R8, SI emit_literal_done_match_emit_encodeBlockAsm14B: MOVQ SI, dst_base+0(FP) emit_literal_skip_match_emit_encodeBlockAsm14B: NOP match_nolit_loop_encodeBlockAsm14B: MOVL AX, BP MOVL AX, BP SUBL BX, BP MOVL BP, 24(SP) ADDL $0x04, AX ADDL $0x04, BX MOVL 16(SP), BP SUBL AX, BP XORQ DI, DI CMPQ BP, $0x08 JL matchlen_single_match_nolit_encodeBlockAsm14B matchlen_loopback_match_nolit_encodeBlockAsm14B: MOVQ (CX)(DI*1), SI XORQ (CX)(DI*1), SI TESTQ SI, SI JZ matchlen_loop_match_nolit_encodeBlockAsm14B BSFQ SI, SI SARQ $0x03, SI LEAQ (DI)(SI*1), DI JMP match_nolit_end_encodeBlockAsm14B matchlen_loop_match_nolit_encodeBlockAsm14B: LEAQ -8(BP), BP LEAQ 8(DI), DI CMPQ BP, $0x08 JGE matchlen_loopback_match_nolit_encodeBlockAsm14B matchlen_single_match_nolit_encodeBlockAsm14B: TESTQ BP, BP JZ match_nolit_end_encodeBlockAsm14B matchlen_single_loopback_match_nolit_encodeBlockAsm14B: MOVB (CX)(DI*1), SI CMPB (CX)(DI*1), SI JNE match_nolit_end_encodeBlockAsm14B LEAQ 1(DI), DI DECQ BP JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm14B match_nolit_end_encodeBlockAsm14B: MOVL 24(SP), BP ADDQ $0x04, DI MOVQ dst_base+0(FP), SI ADDL DI, AX CMPL BP, $0x00010000 JL two_byte_offset_match_nolit_encodeBlockAsm14B CMPL DI, $0x40 JLE four_bytes_remain_match_nolit_encodeBlockAsm14B MOVB $0xff, (SI) MOVD BP, 1(SI) LEAQ -64(DI), DI ADDQ $0x05, SI CMPL DI, $0x04 JL four_bytes_remain_match_nolit_encodeBlockAsm14B emit_repeat_again_match_nolit_encodeBlockAsm14B_emit_copy: MOVQ DI, R8 LEAQ -4(DI), DI CMPL R8, $0x08 JLE repeat_two_match_nolit_encodeBlockAsm14B_emit_copy CMPL R8, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm14B_emit_copy CMPL BP, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBlockAsm14B_emit_copy cant_repeat_two_offset_match_nolit_encodeBlockAsm14B_emit_copy: CMPL DI, $0x00000104 JLT repeat_three_match_nolit_encodeBlockAsm14B_emit_copy CMPL DI, $0x00010100 JLT repeat_four_match_nolit_encodeBlockAsm14B_emit_copy CMPL DI, $0x0100ffff JLT repeat_five_match_nolit_encodeBlockAsm14B_emit_copy LEAQ -16842747(DI), DI MOVW $0x001d, (SI) MOVW $0xfffb, 2(SI) MOVB $0xff, 4(SI) ADDQ $0x05, SI JMP emit_repeat_again_match_nolit_encodeBlockAsm14B_emit_copy repeat_five_match_nolit_encodeBlockAsm14B_emit_copy: LEAQ -65536(DI), DI MOVQ DI, BP MOVW $0x001d, (SI) MOVW DI, 2(SI) SARQ $0x10, BP MOVB BP, 4(SI) ADDQ $0x05, SI JMP match_nolit_emitcopy_end_encodeBlockAsm14B repeat_four_match_nolit_encodeBlockAsm14B_emit_copy: LEAQ -256(DI), DI MOVW $0x0019, (SI) MOVW DI, 2(SI) ADDQ $0x04, SI JMP match_nolit_emitcopy_end_encodeBlockAsm14B repeat_three_match_nolit_encodeBlockAsm14B_emit_copy: LEAQ -4(DI), DI MOVW $0x0015, (SI) MOVB DI, 2(SI) ADDQ $0x03, SI JMP match_nolit_emitcopy_end_encodeBlockAsm14B repeat_two_match_nolit_encodeBlockAsm14B_emit_copy: SHLL $0x02, DI ORL $0x01, DI MOVW DI, (SI) ADDQ $0x02, SI JMP match_nolit_emitcopy_end_encodeBlockAsm14B repeat_two_offset_match_nolit_encodeBlockAsm14B_emit_copy: XORQ R8, R8 LEAQ 1(R8)(DI*4), DI MOVB BP, 1(SI) SARL $0x08, BP SHLL $0x05, BP ORL BP, DI MOVB DI, (SI) ADDQ $0x02, SI JMP match_nolit_emitcopy_end_encodeBlockAsm14B four_bytes_remain_match_nolit_encodeBlockAsm14B: TESTL DI, DI JZ match_nolit_emitcopy_end_encodeBlockAsm14B MOVB $0x03, DL LEAQ -4(DX)(DI*4), DI MOVB DI, (SI) MOVD BP, 1(SI) ADDQ $0x05, SI JMP match_nolit_emitcopy_end_encodeBlockAsm14B two_byte_offset_match_nolit_encodeBlockAsm14B: CMPL DI, $0x40 JLE two_byte_offset_short_match_nolit_encodeBlockAsm14B MOVB $0xee, (SI) MOVW BP, 1(SI) LEAQ -60(DI), DI ADDQ $0x03, SI emit_repeat_again_match_nolit_encodeBlockAsm14B_emit_copy_short: MOVQ DI, R8 LEAQ -4(DI), DI CMPL R8, $0x08 JLE repeat_two_match_nolit_encodeBlockAsm14B_emit_copy_short CMPL R8, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm14B_emit_copy_short CMPL BP, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBlockAsm14B_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsm14B_emit_copy_short: CMPL DI, $0x00000104 JLT repeat_three_match_nolit_encodeBlockAsm14B_emit_copy_short CMPL DI, $0x00010100 JLT repeat_four_match_nolit_encodeBlockAsm14B_emit_copy_short CMPL DI, $0x0100ffff JLT repeat_five_match_nolit_encodeBlockAsm14B_emit_copy_short LEAQ -16842747(DI), DI MOVW $0x001d, (SI) MOVW $0xfffb, 2(SI) MOVB $0xff, 4(SI) ADDQ $0x05, SI JMP emit_repeat_again_match_nolit_encodeBlockAsm14B_emit_copy_short repeat_five_match_nolit_encodeBlockAsm14B_emit_copy_short: LEAQ -65536(DI), DI MOVQ DI, BP MOVW $0x001d, (SI) MOVW DI, 2(SI) SARQ $0x10, BP MOVB BP, 4(SI) ADDQ $0x05, SI JMP match_nolit_emitcopy_end_encodeBlockAsm14B repeat_four_match_nolit_encodeBlockAsm14B_emit_copy_short: LEAQ -256(DI), DI MOVW $0x0019, (SI) MOVW DI, 2(SI) ADDQ $0x04, SI JMP match_nolit_emitcopy_end_encodeBlockAsm14B repeat_three_match_nolit_encodeBlockAsm14B_emit_copy_short: LEAQ -4(DI), DI MOVW $0x0015, (SI) MOVB DI, 2(SI) ADDQ $0x03, SI JMP match_nolit_emitcopy_end_encodeBlockAsm14B repeat_two_match_nolit_encodeBlockAsm14B_emit_copy_short: SHLL $0x02, DI ORL $0x01, DI MOVW DI, (SI) ADDQ $0x02, SI JMP match_nolit_emitcopy_end_encodeBlockAsm14B repeat_two_offset_match_nolit_encodeBlockAsm14B_emit_copy_short: XORQ R8, R8 LEAQ 1(R8)(DI*4), DI MOVB BP, 1(SI) SARL $0x08, BP SHLL $0x05, BP ORL BP, DI MOVB DI, (SI) ADDQ $0x02, SI JMP match_nolit_emitcopy_end_encodeBlockAsm14B two_byte_offset_short_match_nolit_encodeBlockAsm14B: CMPL DI, $0x0c JGE emit_copy_three_match_nolit_encodeBlockAsm14B CMPL BP, $0x00000800 JGE emit_copy_three_match_nolit_encodeBlockAsm14B MOVB $0x01, DL LEAQ -16(DX)(DI*4), DI MOVB BP, 1(SI) SHRL $0x08, BP SHLL $0x05, BP ORL BP, DI MOVB DI, (SI) ADDQ $0x02, SI JMP match_nolit_emitcopy_end_encodeBlockAsm14B emit_copy_three_match_nolit_encodeBlockAsm14B: MOVB $0x02, DL LEAQ -4(DX)(DI*4), DI MOVB DI, (SI) MOVW BP, 1(SI) ADDQ $0x03, SI match_nolit_emitcopy_end_encodeBlockAsm14B: MOVQ SI, dst_base+0(FP) MOVL AX, 20(SP) CMPL AX, 16(SP) JGE emit_remainder_encodeBlockAsm14B CMPQ SI, (SP) JL match_nolit_dst_ok_encodeBlockAsm14B MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeBlockAsm14B: MOVQ -2(CX)(AX*1), BP MOVQ $0x0000cf1bbcdcbf9b, SI MOVQ BP, DI SHRQ $0x10, BP MOVQ BP, R8 SHLQ $0x10, DI IMULQ SI, DI SHRQ $0x32, DI SHLQ $0x10, R8 IMULQ SI, R8 SHRQ $0x32, R8 MOVL 32(SP)(DI*1), SI MOVL 32(SP)(R8*1), SI LEAQ -2(AX), SI MOVL SI, 32(SP)(DI*1) MOVL AX, 32(SP)(R8*1) CMPL (CX)(R8*1), BP JEQ match_nolit_loop_encodeBlockAsm14B INCL AX JMP search_loop_encodeBlockAsm14B emit_remainder_encodeBlockAsm14B: MOVQ src_len+32(FP), AX SUBL 20(SP), AX MOVQ dst_base+0(FP), DX LEAQ (DX)(AX*1), DX CMPQ DX, (SP) JL emit_remainder_ok_encodeBlockAsm14B MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeBlockAsm14B: MOVQ src_len+32(FP), AX MOVL 20(SP), DX CMPL DX, AX JEQ emit_literal_skip_emit_remainder_encodeBlockAsm14B MOVL AX, BX MOVL AX, 20(SP) LEAQ (CX)(DX*1), AX SUBL DX, BX MOVQ dst_base+0(FP), CX MOVQ BX, DX SUBL $0x01, DX JC emit_literal_done_emit_remainder_encodeBlockAsm14B CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeBlockAsm14B CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeBlockAsm14B CMPL DX, $0x00010000 JLT three_bytes_emit_remainder_encodeBlockAsm14B CMPL DX, $0x01000000 JLT four_bytes_emit_remainder_encodeBlockAsm14B MOVB $0xfc, (CX) MOVL DX, 1(CX) ADDQ $0x05, CX JMP memmove_emit_remainder_encodeBlockAsm14B four_bytes_emit_remainder_encodeBlockAsm14B: MOVQ DX, BP SHRL $0x10, BP MOVB $0xf8, (CX) MOVW DX, 1(CX) MOVB BP, 3(CX) ADDQ $0x04, CX JMP memmove_emit_remainder_encodeBlockAsm14B three_bytes_emit_remainder_encodeBlockAsm14B: MOVB $0xf4, (CX) MOVW DX, 1(CX) ADDQ $0x03, CX JMP memmove_emit_remainder_encodeBlockAsm14B two_bytes_emit_remainder_encodeBlockAsm14B: MOVB $0xf0, (CX) MOVB DL, 1(CX) ADDQ $0x02, CX JMP memmove_emit_remainder_encodeBlockAsm14B one_byte_emit_remainder_encodeBlockAsm14B: SHLB $0x02, DL MOVB DL, (CX) ADDQ $0x01, CX memmove_emit_remainder_encodeBlockAsm14B: LEAQ (CX)(BX*1), DX NOP emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_tail: TESTQ BX, BX JEQ emit_literal_done_emit_remainder_encodeBlockAsm14B CMPQ BX, $0x02 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_1or2 CMPQ BX, $0x04 JB emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_3 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_4 CMPQ BX, $0x08 JB emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_5through7 JE emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_8 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_9through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_17through32 CMPQ BX, $0x40 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_33through64 CMPQ BX, $0x80 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_65through128 CMPQ BX, $0x00000100 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_129through256 JMP emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_256through2048 emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_1or2: MOVB (AX), DL MOVB -1(AX)(BX*1), AL MOVB DL, (CX) MOVB AL, -1(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm14B emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_4: MOVL (AX), DX MOVL DX, (CX) JMP emit_literal_done_emit_remainder_encodeBlockAsm14B emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_3: MOVW (AX), DX MOVB 2(AX), AL MOVW DX, (CX) MOVB AL, 2(CX) JMP emit_literal_done_emit_remainder_encodeBlockAsm14B emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_5through7: MOVL (AX), DX MOVL -4(AX)(BX*1), AX MOVL DX, (CX) MOVL AX, -4(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm14B emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_8: MOVQ (AX), DX MOVQ DX, (CX) JMP emit_literal_done_emit_remainder_encodeBlockAsm14B emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_9through16: MOVQ (AX), DX MOVQ -8(AX)(BX*1), AX MOVQ DX, (CX) MOVQ AX, -8(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm14B emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_17through32: MOVOU (AX), X0 MOVOU -16(AX)(BX*1), X1 MOVOU X0, (CX) MOVOU X1, -16(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm14B emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_33through64: MOVOU (AX), X0 MOVOU 16(AX), X1 MOVOU -32(AX)(BX*1), X2 MOVOU -16(AX)(BX*1), X3 MOVOU X0, (CX) MOVOU X1, 16(CX) MOVOU X2, -32(CX)(BX*1) MOVOU X3, -16(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm14B emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_65through128: MOVOU (AX), X0 MOVOU 16(AX), X1 MOVOU 32(AX), X2 MOVOU 48(AX), X3 MOVOU -64(AX)(BX*1), X12 MOVOU -48(AX)(BX*1), X13 MOVOU -32(AX)(BX*1), X14 MOVOU -16(AX)(BX*1), X15 MOVOU X0, (CX) MOVOU X1, 16(CX) MOVOU X2, 32(CX) MOVOU X3, 48(CX) MOVOU X12, -64(CX)(BX*1) MOVOU X13, -48(CX)(BX*1) MOVOU X14, -32(CX)(BX*1) MOVOU X15, -16(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm14B emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_129through256: MOVOU (AX), X0 MOVOU 16(AX), X1 MOVOU 32(AX), X2 MOVOU 48(AX), X3 MOVOU 64(AX), X4 MOVOU 80(AX), X5 MOVOU 96(AX), X6 MOVOU 112(AX), X7 MOVOU -128(AX)(BX*1), X8 MOVOU -112(AX)(BX*1), X9 MOVOU -96(AX)(BX*1), X10 MOVOU -80(AX)(BX*1), X11 MOVOU -64(AX)(BX*1), X12 MOVOU -48(AX)(BX*1), X13 MOVOU -32(AX)(BX*1), X14 MOVOU -16(AX)(BX*1), X15 MOVOU X0, (CX) MOVOU X1, 16(CX) MOVOU X2, 32(CX) MOVOU X3, 48(CX) MOVOU X4, 64(CX) MOVOU X5, 80(CX) MOVOU X6, 96(CX) MOVOU X7, 112(CX) MOVOU X8, -128(CX)(BX*1) MOVOU X9, -112(CX)(BX*1) MOVOU X10, -96(CX)(BX*1) MOVOU X11, -80(CX)(BX*1) MOVOU X12, -64(CX)(BX*1) MOVOU X13, -48(CX)(BX*1) MOVOU X14, -32(CX)(BX*1) MOVOU X15, -16(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm14B emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_256through2048: LEAQ -256(BX), BX MOVOU (AX), X0 MOVOU 16(AX), X1 MOVOU 32(AX), X2 MOVOU 48(AX), X3 MOVOU 64(AX), X4 MOVOU 80(AX), X5 MOVOU 96(AX), X6 MOVOU 112(AX), X7 MOVOU 128(AX), X8 MOVOU 144(AX), X9 MOVOU 160(AX), X10 MOVOU 176(AX), X11 MOVOU 192(AX), X12 MOVOU 208(AX), X13 MOVOU 224(AX), X14 MOVOU 240(AX), X15 MOVOU X0, (CX) MOVOU X1, 16(CX) MOVOU X2, 32(CX) MOVOU X3, 48(CX) MOVOU X4, 64(CX) MOVOU X5, 80(CX) MOVOU X6, 96(CX) MOVOU X7, 112(CX) MOVOU X8, 128(CX) MOVOU X9, 144(CX) MOVOU X10, 160(CX) MOVOU X11, 176(CX) MOVOU X12, 192(CX) MOVOU X13, 208(CX) MOVOU X14, 224(CX) MOVOU X15, 240(CX) CMPQ BX, $0x00000100 LEAQ 256(AX), AX LEAQ 256(CX), CX JGE emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_move_256through2048 JMP emit_lit_memmove_emit_remainder_encodeBlockAsm14B_memmove_tail MOVQ DX, CX emit_literal_done_emit_remainder_encodeBlockAsm14B: MOVQ CX, dst_base+0(FP) emit_literal_skip_emit_remainder_encodeBlockAsm14B: MOVQ 8(SP), AX SUBQ dst_base+0(FP), AX MOVQ AX, ret+48(FP) RET // func encodeBlockAsm12B(dst []byte, src []byte) int // Requires: SSE2 TEXT ·encodeBlockAsm12B(SB), $4128-56 MOVQ $0x00000020, AX LEAQ 32(SP), CX PXOR X0, X0 zero_loop_encodeBlockAsm12B: MOVOU X0, (CX) MOVOU X0, 16(CX) MOVOU X0, 32(CX) MOVOU X0, 48(CX) MOVOU X0, 64(CX) MOVOU X0, 80(CX) MOVOU X0, 96(CX) MOVOU X0, 112(CX) ADDQ $0x80, CX DECQ AX JNZ zero_loop_encodeBlockAsm12B MOVL AX, 20(SP) MOVQ src_len+32(FP), AX LEAQ -5(AX), CX LEAQ -8(AX), BX SHRQ $0x05, AX SUBL AX, CX MOVL BX, 16(SP) MOVQ dst_base+0(FP), AX MOVQ AX, 8(SP) LEAQ (AX)(CX*1), CX MOVQ CX, (SP) MOVL $0x00000001, AX MOVL AX, 24(SP) MOVQ src_base+24(FP), CX search_loop_encodeBlockAsm12B: MOVQ (CX)(AX*1), BP MOVL AX, BX SUBL 20(SP), BX SHRL $0x04, BX LEAQ 4(AX)(BX*1), BX MOVL 16(SP), SI CMPL BX, SI JGT emit_remainder_encodeBlockAsm12B MOVL BX, 28(SP) MOVQ $0x0000cf1bbcdcbf9b, BX MOVQ BP, DI MOVQ BP, R8 SHRQ $0x08, R8 SHLQ $0x10, DI IMULQ BX, DI SHRQ $0x34, DI SHLQ $0x10, R8 IMULQ BX, R8 SHRQ $0x34, R8 MOVL 32(SP)(DI*1), BX MOVL 32(SP)(R8*1), SI MOVL AX, 32(SP)(DI*1) LEAL 1(AX), DI MOVL DI, 32(SP)(R8*1) MOVL AX, DI SUBL 24(SP), DI MOVL 1(CX)(DI*1), R9 MOVQ BP, R8 SHLQ $0x08, R8 CMPL R8, R9 JNE no_repeat_found_encodeBlockAsm12B LEAQ 1(AX), BP MOVL 20(SP), BX TESTL DI, DI JZ repeat_extend_back_end_encodeBlockAsm12B repeat_extend_back_loop_encodeBlockAsm12B: CMPL BP, BX JG repeat_extend_back_end_encodeBlockAsm12B MOVB -1(CX)(DI*1), DL MOVB -1(CX)(BP*1), SI CMPB DL, SI JNE repeat_extend_back_end_encodeBlockAsm12B LEAQ -1(BP), BP DECL DI JZ repeat_extend_back_end_encodeBlockAsm12B JMP repeat_extend_back_loop_encodeBlockAsm12B repeat_extend_back_end_encodeBlockAsm12B: MOVL 20(SP), BX CMPL BX, BP JEQ emit_literal_skip_repeat_emit_encodeBlockAsm12B MOVL BP, SI MOVL BP, 20(SP) LEAQ (CX)(BX*1), DI SUBL BX, SI MOVQ dst_base+0(FP), BX MOVQ SI, R8 SUBL $0x01, R8 JC emit_literal_done_repeat_emit_encodeBlockAsm12B CMPL R8, $0x3c JLT one_byte_repeat_emit_encodeBlockAsm12B CMPL R8, $0x00000100 JLT two_bytes_repeat_emit_encodeBlockAsm12B CMPL R8, $0x00010000 JLT three_bytes_repeat_emit_encodeBlockAsm12B CMPL R8, $0x01000000 JLT four_bytes_repeat_emit_encodeBlockAsm12B MOVB $0xfc, (BX) MOVL R8, 1(BX) ADDQ $0x05, BX JMP memmove_repeat_emit_encodeBlockAsm12B four_bytes_repeat_emit_encodeBlockAsm12B: MOVQ R8, R9 SHRL $0x10, R9 MOVB $0xf8, (BX) MOVW R8, 1(BX) MOVB R9, 3(BX) ADDQ $0x04, BX JMP memmove_repeat_emit_encodeBlockAsm12B three_bytes_repeat_emit_encodeBlockAsm12B: MOVB $0xf4, (BX) MOVW R8, 1(BX) ADDQ $0x03, BX JMP memmove_repeat_emit_encodeBlockAsm12B two_bytes_repeat_emit_encodeBlockAsm12B: MOVB $0xf0, (BX) MOVB R8, 1(BX) ADDQ $0x02, BX JMP memmove_repeat_emit_encodeBlockAsm12B one_byte_repeat_emit_encodeBlockAsm12B: SHLB $0x02, R8 MOVB R8, (BX) ADDQ $0x01, BX memmove_repeat_emit_encodeBlockAsm12B: LEAQ (BX)(SI*1), R8 NOP emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_tail: TESTQ SI, SI JEQ emit_literal_done_repeat_emit_encodeBlockAsm12B CMPQ SI, $0x02 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_1or2 CMPQ SI, $0x04 JB emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_3 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_4 CMPQ SI, $0x08 JB emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_5through7 JE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8 CMPQ SI, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_9through16 CMPQ SI, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_17through32 CMPQ SI, $0x40 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_33through64 CMPQ SI, $0x80 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_65through128 CMPQ SI, $0x00000100 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_129through256 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_256through2048 emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_1or2: MOVB (DI), R8 MOVB -1(DI)(SI*1), DI MOVB R8, (BX) MOVB DI, -1(BX)(SI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm12B emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_4: MOVL (DI), R8 MOVL R8, (BX) JMP emit_literal_done_repeat_emit_encodeBlockAsm12B emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_3: MOVW (DI), R8 MOVB 2(DI), DI MOVW R8, (BX) MOVB DI, 2(BX) JMP emit_literal_done_repeat_emit_encodeBlockAsm12B emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_5through7: MOVL (DI), R8 MOVL -4(DI)(SI*1), DI MOVL R8, (BX) MOVL DI, -4(BX)(SI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm12B emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8: MOVQ (DI), R8 MOVQ R8, (BX) JMP emit_literal_done_repeat_emit_encodeBlockAsm12B emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_9through16: MOVQ (DI), R8 MOVQ -8(DI)(SI*1), DI MOVQ R8, (BX) MOVQ DI, -8(BX)(SI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm12B emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_17through32: MOVOU (DI), X0 MOVOU -16(DI)(SI*1), X1 MOVOU X0, (BX) MOVOU X1, -16(BX)(SI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm12B emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_33through64: MOVOU (DI), X0 MOVOU 16(DI), X1 MOVOU -32(DI)(SI*1), X2 MOVOU -16(DI)(SI*1), X3 MOVOU X0, (BX) MOVOU X1, 16(BX) MOVOU X2, -32(BX)(SI*1) MOVOU X3, -16(BX)(SI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm12B emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_65through128: MOVOU (DI), X0 MOVOU 16(DI), X1 MOVOU 32(DI), X2 MOVOU 48(DI), X3 MOVOU -64(DI)(SI*1), X12 MOVOU -48(DI)(SI*1), X13 MOVOU -32(DI)(SI*1), X14 MOVOU -16(DI)(SI*1), X15 MOVOU X0, (BX) MOVOU X1, 16(BX) MOVOU X2, 32(BX) MOVOU X3, 48(BX) MOVOU X12, -64(BX)(SI*1) MOVOU X13, -48(BX)(SI*1) MOVOU X14, -32(BX)(SI*1) MOVOU X15, -16(BX)(SI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm12B emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_129through256: MOVOU (DI), X0 MOVOU 16(DI), X1 MOVOU 32(DI), X2 MOVOU 48(DI), X3 MOVOU 64(DI), X4 MOVOU 80(DI), X5 MOVOU 96(DI), X6 MOVOU 112(DI), X7 MOVOU -128(DI)(SI*1), X8 MOVOU -112(DI)(SI*1), X9 MOVOU -96(DI)(SI*1), X10 MOVOU -80(DI)(SI*1), X11 MOVOU -64(DI)(SI*1), X12 MOVOU -48(DI)(SI*1), X13 MOVOU -32(DI)(SI*1), X14 MOVOU -16(DI)(SI*1), X15 MOVOU X0, (BX) MOVOU X1, 16(BX) MOVOU X2, 32(BX) MOVOU X3, 48(BX) MOVOU X4, 64(BX) MOVOU X5, 80(BX) MOVOU X6, 96(BX) MOVOU X7, 112(BX) MOVOU X8, -128(BX)(SI*1) MOVOU X9, -112(BX)(SI*1) MOVOU X10, -96(BX)(SI*1) MOVOU X11, -80(BX)(SI*1) MOVOU X12, -64(BX)(SI*1) MOVOU X13, -48(BX)(SI*1) MOVOU X14, -32(BX)(SI*1) MOVOU X15, -16(BX)(SI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm12B emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_256through2048: LEAQ -256(SI), SI MOVOU (DI), X0 MOVOU 16(DI), X1 MOVOU 32(DI), X2 MOVOU 48(DI), X3 MOVOU 64(DI), X4 MOVOU 80(DI), X5 MOVOU 96(DI), X6 MOVOU 112(DI), X7 MOVOU 128(DI), X8 MOVOU 144(DI), X9 MOVOU 160(DI), X10 MOVOU 176(DI), X11 MOVOU 192(DI), X12 MOVOU 208(DI), X13 MOVOU 224(DI), X14 MOVOU 240(DI), X15 MOVOU X0, (BX) MOVOU X1, 16(BX) MOVOU X2, 32(BX) MOVOU X3, 48(BX) MOVOU X4, 64(BX) MOVOU X5, 80(BX) MOVOU X6, 96(BX) MOVOU X7, 112(BX) MOVOU X8, 128(BX) MOVOU X9, 144(BX) MOVOU X10, 160(BX) MOVOU X11, 176(BX) MOVOU X12, 192(BX) MOVOU X13, 208(BX) MOVOU X14, 224(BX) MOVOU X15, 240(BX) CMPQ SI, $0x00000100 LEAQ 256(DI), DI LEAQ 256(BX), BX JGE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_256through2048 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_tail MOVQ R8, BX emit_literal_done_repeat_emit_encodeBlockAsm12B: MOVQ BX, dst_base+0(FP) emit_literal_skip_repeat_emit_encodeBlockAsm12B: ADDL $0x05, AX MOVL AX, BX SUBL 24(SP), BX MOVL 16(SP), BX SUBL AX, BX XORQ DI, DI CMPQ BX, $0x08 JL matchlen_single_repeat_extend matchlen_loopback_repeat_extend: MOVQ (CX)(DI*1), SI XORQ (CX)(DI*1), SI TESTQ SI, SI JZ matchlen_loop_repeat_extend BSFQ SI, SI SARQ $0x03, SI LEAQ (DI)(SI*1), DI JMP repeat_extend_forward_end_encodeBlockAsm12B matchlen_loop_repeat_extend: LEAQ -8(BX), BX LEAQ 8(DI), DI CMPQ BX, $0x08 JGE matchlen_loopback_repeat_extend matchlen_single_repeat_extend: TESTQ BX, BX JZ repeat_extend_forward_end_encodeBlockAsm12B matchlen_single_loopback_repeat_extend: MOVB (CX)(DI*1), SI CMPB (CX)(DI*1), SI JNE repeat_extend_forward_end_encodeBlockAsm12B LEAQ 1(DI), DI DECQ BX JNZ matchlen_single_loopback_repeat_extend repeat_extend_forward_end_encodeBlockAsm12B: ADDL DI, AX MOVL AX, BX SUBL BP, BX MOVL 24(SP), BP MOVQ dst_base+0(FP), SI MOVL 20(SP), DI TESTL DI, DI JZ repeat_as_copy_encodeBlockAsm12B emit_repeat_again_match_repeat_: MOVQ BX, DI LEAQ -4(BX), BX CMPL DI, $0x08 JLE repeat_two_match_repeat_ CMPL DI, $0x0c JGE cant_repeat_two_offset_match_repeat_ CMPL BP, $0x00000800 JLT repeat_two_offset_match_repeat_ cant_repeat_two_offset_match_repeat_: CMPL BX, $0x00000104 JLT repeat_three_match_repeat_ CMPL BX, $0x00010100 JLT repeat_four_match_repeat_ CMPL BX, $0x0100ffff JLT repeat_five_match_repeat_ LEAQ -16842747(BX), BX MOVW $0x001d, (SI) MOVW $0xfffb, 2(SI) MOVB $0xff, 4(SI) ADDQ $0x05, SI JMP emit_repeat_again_match_repeat_ repeat_five_match_repeat_: LEAQ -65536(BX), BX MOVQ BX, BP MOVW $0x001d, (SI) MOVW BX, 2(SI) SARQ $0x10, BP MOVB BP, 4(SI) ADDQ $0x05, SI JMP repeat_end_emit_encodeBlockAsm12B repeat_four_match_repeat_: LEAQ -256(BX), BX MOVW $0x0019, (SI) MOVW BX, 2(SI) ADDQ $0x04, SI JMP repeat_end_emit_encodeBlockAsm12B repeat_three_match_repeat_: LEAQ -4(BX), BX MOVW $0x0015, (SI) MOVB BL, 2(SI) ADDQ $0x03, SI JMP repeat_end_emit_encodeBlockAsm12B repeat_two_match_repeat_: SHLL $0x02, BX ORL $0x01, BX MOVW BX, (SI) ADDQ $0x02, SI JMP repeat_end_emit_encodeBlockAsm12B repeat_two_offset_match_repeat_: XORQ DI, DI LEAQ 1(DI)(BX*4), BX MOVB BP, 1(SI) SARL $0x08, BP SHLL $0x05, BP ORL BP, BX MOVB BL, (SI) ADDQ $0x02, SI JMP repeat_end_emit_encodeBlockAsm12B repeat_as_copy_encodeBlockAsm12B: CMPL BP, $0x00010000 JL two_byte_offset_repeat_as_copy_encodeBlockAsm12B CMPL BX, $0x40 JLE four_bytes_remain_repeat_as_copy_encodeBlockAsm12B MOVB $0xff, (SI) MOVD BP, 1(SI) LEAQ -64(BX), BX ADDQ $0x05, SI CMPL BX, $0x04 JL four_bytes_remain_repeat_as_copy_encodeBlockAsm12B emit_repeat_again_repeat_as_copy_encodeBlockAsm12B_emit_copy: MOVQ BX, DI LEAQ -4(BX), BX CMPL DI, $0x08 JLE repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy CMPL DI, $0x0c JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy CMPL BP, $0x00000800 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy: CMPL BX, $0x00000104 JLT repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy CMPL BX, $0x00010100 JLT repeat_four_repeat_as_copy_encodeBlockAsm12B_emit_copy CMPL BX, $0x0100ffff JLT repeat_five_repeat_as_copy_encodeBlockAsm12B_emit_copy LEAQ -16842747(BX), BX MOVW $0x001d, (SI) MOVW $0xfffb, 2(SI) MOVB $0xff, 4(SI) ADDQ $0x05, SI JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm12B_emit_copy repeat_five_repeat_as_copy_encodeBlockAsm12B_emit_copy: LEAQ -65536(BX), BX MOVQ BX, BP MOVW $0x001d, (SI) MOVW BX, 2(SI) SARQ $0x10, BP MOVB BP, 4(SI) ADDQ $0x05, SI JMP repeat_end_emit_encodeBlockAsm12B repeat_four_repeat_as_copy_encodeBlockAsm12B_emit_copy: LEAQ -256(BX), BX MOVW $0x0019, (SI) MOVW BX, 2(SI) ADDQ $0x04, SI JMP repeat_end_emit_encodeBlockAsm12B repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy: LEAQ -4(BX), BX MOVW $0x0015, (SI) MOVB BL, 2(SI) ADDQ $0x03, SI JMP repeat_end_emit_encodeBlockAsm12B repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy: SHLL $0x02, BX ORL $0x01, BX MOVW BX, (SI) ADDQ $0x02, SI JMP repeat_end_emit_encodeBlockAsm12B repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy: XORQ DI, DI LEAQ 1(DI)(BX*4), BX MOVB BP, 1(SI) SARL $0x08, BP SHLL $0x05, BP ORL BP, BX MOVB BL, (SI) ADDQ $0x02, SI JMP repeat_end_emit_encodeBlockAsm12B four_bytes_remain_repeat_as_copy_encodeBlockAsm12B: TESTL BX, BX JZ repeat_end_emit_encodeBlockAsm12B MOVB $0x03, DL LEAQ -4(DX)(BX*4), BX MOVB BL, (SI) MOVD BP, 1(SI) ADDQ $0x05, SI JMP repeat_end_emit_encodeBlockAsm12B two_byte_offset_repeat_as_copy_encodeBlockAsm12B: CMPL BX, $0x40 JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm12B MOVB $0xee, (SI) MOVW BP, 1(SI) LEAQ -60(BX), BX ADDQ $0x03, SI emit_repeat_again_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: MOVQ BX, DI LEAQ -4(BX), BX CMPL DI, $0x08 JLE repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short CMPL DI, $0x0c JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short CMPL BP, $0x00000800 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: CMPL BX, $0x00000104 JLT repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short CMPL BX, $0x00010100 JLT repeat_four_repeat_as_copy_encodeBlockAsm12B_emit_copy_short CMPL BX, $0x0100ffff JLT repeat_five_repeat_as_copy_encodeBlockAsm12B_emit_copy_short LEAQ -16842747(BX), BX MOVW $0x001d, (SI) MOVW $0xfffb, 2(SI) MOVB $0xff, 4(SI) ADDQ $0x05, SI JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm12B_emit_copy_short repeat_five_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: LEAQ -65536(BX), BX MOVQ BX, BP MOVW $0x001d, (SI) MOVW BX, 2(SI) SARQ $0x10, BP MOVB BP, 4(SI) ADDQ $0x05, SI JMP repeat_end_emit_encodeBlockAsm12B repeat_four_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: LEAQ -256(BX), BX MOVW $0x0019, (SI) MOVW BX, 2(SI) ADDQ $0x04, SI JMP repeat_end_emit_encodeBlockAsm12B repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: LEAQ -4(BX), BX MOVW $0x0015, (SI) MOVB BL, 2(SI) ADDQ $0x03, SI JMP repeat_end_emit_encodeBlockAsm12B repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: SHLL $0x02, BX ORL $0x01, BX MOVW BX, (SI) ADDQ $0x02, SI JMP repeat_end_emit_encodeBlockAsm12B repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: XORQ DI, DI LEAQ 1(DI)(BX*4), BX MOVB BP, 1(SI) SARL $0x08, BP SHLL $0x05, BP ORL BP, BX MOVB BL, (SI) ADDQ $0x02, SI JMP repeat_end_emit_encodeBlockAsm12B two_byte_offset_short_repeat_as_copy_encodeBlockAsm12B: CMPL BX, $0x0c JGE emit_copy_three_repeat_as_copy_encodeBlockAsm12B CMPL BP, $0x00000800 JGE emit_copy_three_repeat_as_copy_encodeBlockAsm12B MOVB $0x01, DL LEAQ -16(DX)(BX*4), BX MOVB BP, 1(SI) SHRL $0x08, BP SHLL $0x05, BP ORL BP, BX MOVB BL, (SI) ADDQ $0x02, SI JMP repeat_end_emit_encodeBlockAsm12B emit_copy_three_repeat_as_copy_encodeBlockAsm12B: MOVB $0x02, DL LEAQ -4(DX)(BX*4), BX MOVB BL, (SI) MOVW BP, 1(SI) ADDQ $0x03, SI repeat_end_emit_encodeBlockAsm12B: MOVQ SI, dst_base+0(FP) MOVL 16(SP), BX CMPL AX, BX JGT emit_remainder_encodeBlockAsm12B JMP search_loop_encodeBlockAsm12B no_repeat_found_encodeBlockAsm12B: MOVQ $0x0000cf1bbcdcbf9b, R8 MOVQ BP, DI SHRQ $0x10, DI SHLQ $0x10, DI IMULQ R8, DI SHRQ $0x34, DI CMPL (CX)(BX*1), BP SHRQ $0x08, BP JEQ candidate_match_encodeBlockAsm12B MOVL 32(SP)(DI*1), BX CMPL (CX)(SI*1), BP JEQ candidate2_match_encodeBlockAsm12B LEAQ 2(AX), SI MOVL SI, 32(SP)(DI*1) SHRQ $0x08, BP CMPL (CX)(BX*1), BP JEQ candidate3_match_encodeBlockAsm12B MOVL 28(SP), AX JMP search_loop_encodeBlockAsm12B candidate3_match_encodeBlockAsm12B: ADDL $0x02, AX JMP candidate_match_encodeBlockAsm12B candidate2_match_encodeBlockAsm12B: LEAQ -2(AX), BX MOVL BX, 32(SP)(DI*1) INCL AX MOVL SI, BX candidate_match_encodeBlockAsm12B: MOVL 20(SP), BP TESTL BX, BX JZ match_extend_back_end_encodeBlockAsm12B match_extend_back_loop_encodeBlockAsm12B: CMPL AX, BP JG match_extend_back_end_encodeBlockAsm12B MOVB -1(CX)(BX*1), DL MOVB -1(CX)(AX*1), SI CMPB DL, SI JNE match_extend_back_end_encodeBlockAsm12B LEAL -1(AX), AX DECL BX JZ match_extend_back_end_encodeBlockAsm12B JMP match_extend_back_loop_encodeBlockAsm12B match_extend_back_end_encodeBlockAsm12B: MOVL AX, BP SUBL 20(SP), BP LEAQ dst_base+0(FP)(BP*1), BP CMPQ BP, (SP) JL match_dst_size_check_encodeBlockAsm12B MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeBlockAsm12B: MOVL BX, BP MOVL 20(SP), SI CMPL SI, BP JEQ emit_literal_skip_match_emit_encodeBlockAsm12B MOVL BP, DI MOVL BP, 20(SP) LEAQ (CX)(SI*1), BP SUBL SI, DI MOVQ dst_base+0(FP), SI MOVQ DI, R8 SUBL $0x01, R8 JC emit_literal_done_match_emit_encodeBlockAsm12B CMPL R8, $0x3c JLT one_byte_match_emit_encodeBlockAsm12B CMPL R8, $0x00000100 JLT two_bytes_match_emit_encodeBlockAsm12B CMPL R8, $0x00010000 JLT three_bytes_match_emit_encodeBlockAsm12B CMPL R8, $0x01000000 JLT four_bytes_match_emit_encodeBlockAsm12B MOVB $0xfc, (SI) MOVL R8, 1(SI) ADDQ $0x05, SI JMP memmove_match_emit_encodeBlockAsm12B four_bytes_match_emit_encodeBlockAsm12B: MOVQ R8, R9 SHRL $0x10, R9 MOVB $0xf8, (SI) MOVW R8, 1(SI) MOVB R9, 3(SI) ADDQ $0x04, SI JMP memmove_match_emit_encodeBlockAsm12B three_bytes_match_emit_encodeBlockAsm12B: MOVB $0xf4, (SI) MOVW R8, 1(SI) ADDQ $0x03, SI JMP memmove_match_emit_encodeBlockAsm12B two_bytes_match_emit_encodeBlockAsm12B: MOVB $0xf0, (SI) MOVB R8, 1(SI) ADDQ $0x02, SI JMP memmove_match_emit_encodeBlockAsm12B one_byte_match_emit_encodeBlockAsm12B: SHLB $0x02, R8 MOVB R8, (SI) ADDQ $0x01, SI memmove_match_emit_encodeBlockAsm12B: LEAQ (SI)(DI*1), R8 NOP emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_tail: TESTQ DI, DI JEQ emit_literal_done_match_emit_encodeBlockAsm12B CMPQ DI, $0x02 JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_1or2 CMPQ DI, $0x04 JB emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_3 JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_4 CMPQ DI, $0x08 JB emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_5through7 JE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8 CMPQ DI, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_9through16 CMPQ DI, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_17through32 CMPQ DI, $0x40 JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_33through64 CMPQ DI, $0x80 JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_65through128 CMPQ DI, $0x00000100 JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_129through256 JMP emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_256through2048 emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_1or2: MOVB (BP), R8 MOVB -1(BP)(DI*1), BP MOVB R8, (SI) MOVB BP, -1(SI)(DI*1) JMP emit_literal_done_match_emit_encodeBlockAsm12B emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_4: MOVL (BP), R8 MOVL R8, (SI) JMP emit_literal_done_match_emit_encodeBlockAsm12B emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_3: MOVW (BP), R8 MOVB 2(BP), BP MOVW R8, (SI) MOVB BP, 2(SI) JMP emit_literal_done_match_emit_encodeBlockAsm12B emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_5through7: MOVL (BP), R8 MOVL -4(BP)(DI*1), BP MOVL R8, (SI) MOVL BP, -4(SI)(DI*1) JMP emit_literal_done_match_emit_encodeBlockAsm12B emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8: MOVQ (BP), R8 MOVQ R8, (SI) JMP emit_literal_done_match_emit_encodeBlockAsm12B emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_9through16: MOVQ (BP), R8 MOVQ -8(BP)(DI*1), BP MOVQ R8, (SI) MOVQ BP, -8(SI)(DI*1) JMP emit_literal_done_match_emit_encodeBlockAsm12B emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_17through32: MOVOU (BP), X0 MOVOU -16(BP)(DI*1), X1 MOVOU X0, (SI) MOVOU X1, -16(SI)(DI*1) JMP emit_literal_done_match_emit_encodeBlockAsm12B emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_33through64: MOVOU (BP), X0 MOVOU 16(BP), X1 MOVOU -32(BP)(DI*1), X2 MOVOU -16(BP)(DI*1), X3 MOVOU X0, (SI) MOVOU X1, 16(SI) MOVOU X2, -32(SI)(DI*1) MOVOU X3, -16(SI)(DI*1) JMP emit_literal_done_match_emit_encodeBlockAsm12B emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_65through128: MOVOU (BP), X0 MOVOU 16(BP), X1 MOVOU 32(BP), X2 MOVOU 48(BP), X3 MOVOU -64(BP)(DI*1), X12 MOVOU -48(BP)(DI*1), X13 MOVOU -32(BP)(DI*1), X14 MOVOU -16(BP)(DI*1), X15 MOVOU X0, (SI) MOVOU X1, 16(SI) MOVOU X2, 32(SI) MOVOU X3, 48(SI) MOVOU X12, -64(SI)(DI*1) MOVOU X13, -48(SI)(DI*1) MOVOU X14, -32(SI)(DI*1) MOVOU X15, -16(SI)(DI*1) JMP emit_literal_done_match_emit_encodeBlockAsm12B emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_129through256: MOVOU (BP), X0 MOVOU 16(BP), X1 MOVOU 32(BP), X2 MOVOU 48(BP), X3 MOVOU 64(BP), X4 MOVOU 80(BP), X5 MOVOU 96(BP), X6 MOVOU 112(BP), X7 MOVOU -128(BP)(DI*1), X8 MOVOU -112(BP)(DI*1), X9 MOVOU -96(BP)(DI*1), X10 MOVOU -80(BP)(DI*1), X11 MOVOU -64(BP)(DI*1), X12 MOVOU -48(BP)(DI*1), X13 MOVOU -32(BP)(DI*1), X14 MOVOU -16(BP)(DI*1), X15 MOVOU X0, (SI) MOVOU X1, 16(SI) MOVOU X2, 32(SI) MOVOU X3, 48(SI) MOVOU X4, 64(SI) MOVOU X5, 80(SI) MOVOU X6, 96(SI) MOVOU X7, 112(SI) MOVOU X8, -128(SI)(DI*1) MOVOU X9, -112(SI)(DI*1) MOVOU X10, -96(SI)(DI*1) MOVOU X11, -80(SI)(DI*1) MOVOU X12, -64(SI)(DI*1) MOVOU X13, -48(SI)(DI*1) MOVOU X14, -32(SI)(DI*1) MOVOU X15, -16(SI)(DI*1) JMP emit_literal_done_match_emit_encodeBlockAsm12B emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_256through2048: LEAQ -256(DI), DI MOVOU (BP), X0 MOVOU 16(BP), X1 MOVOU 32(BP), X2 MOVOU 48(BP), X3 MOVOU 64(BP), X4 MOVOU 80(BP), X5 MOVOU 96(BP), X6 MOVOU 112(BP), X7 MOVOU 128(BP), X8 MOVOU 144(BP), X9 MOVOU 160(BP), X10 MOVOU 176(BP), X11 MOVOU 192(BP), X12 MOVOU 208(BP), X13 MOVOU 224(BP), X14 MOVOU 240(BP), X15 MOVOU X0, (SI) MOVOU X1, 16(SI) MOVOU X2, 32(SI) MOVOU X3, 48(SI) MOVOU X4, 64(SI) MOVOU X5, 80(SI) MOVOU X6, 96(SI) MOVOU X7, 112(SI) MOVOU X8, 128(SI) MOVOU X9, 144(SI) MOVOU X10, 160(SI) MOVOU X11, 176(SI) MOVOU X12, 192(SI) MOVOU X13, 208(SI) MOVOU X14, 224(SI) MOVOU X15, 240(SI) CMPQ DI, $0x00000100 LEAQ 256(BP), BP LEAQ 256(SI), SI JGE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_256through2048 JMP emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_tail MOVQ R8, SI emit_literal_done_match_emit_encodeBlockAsm12B: MOVQ SI, dst_base+0(FP) emit_literal_skip_match_emit_encodeBlockAsm12B: NOP match_nolit_loop_encodeBlockAsm12B: MOVL AX, BP MOVL AX, BP SUBL BX, BP MOVL BP, 24(SP) ADDL $0x04, AX ADDL $0x04, BX MOVL 16(SP), BP SUBL AX, BP XORQ DI, DI CMPQ BP, $0x08 JL matchlen_single_match_nolit_encodeBlockAsm12B matchlen_loopback_match_nolit_encodeBlockAsm12B: MOVQ (CX)(DI*1), SI XORQ (CX)(DI*1), SI TESTQ SI, SI JZ matchlen_loop_match_nolit_encodeBlockAsm12B BSFQ SI, SI SARQ $0x03, SI LEAQ (DI)(SI*1), DI JMP match_nolit_end_encodeBlockAsm12B matchlen_loop_match_nolit_encodeBlockAsm12B: LEAQ -8(BP), BP LEAQ 8(DI), DI CMPQ BP, $0x08 JGE matchlen_loopback_match_nolit_encodeBlockAsm12B matchlen_single_match_nolit_encodeBlockAsm12B: TESTQ BP, BP JZ match_nolit_end_encodeBlockAsm12B matchlen_single_loopback_match_nolit_encodeBlockAsm12B: MOVB (CX)(DI*1), SI CMPB (CX)(DI*1), SI JNE match_nolit_end_encodeBlockAsm12B LEAQ 1(DI), DI DECQ BP JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm12B match_nolit_end_encodeBlockAsm12B: MOVL 24(SP), BP ADDQ $0x04, DI MOVQ dst_base+0(FP), SI ADDL DI, AX CMPL BP, $0x00010000 JL two_byte_offset_match_nolit_encodeBlockAsm12B CMPL DI, $0x40 JLE four_bytes_remain_match_nolit_encodeBlockAsm12B MOVB $0xff, (SI) MOVD BP, 1(SI) LEAQ -64(DI), DI ADDQ $0x05, SI CMPL DI, $0x04 JL four_bytes_remain_match_nolit_encodeBlockAsm12B emit_repeat_again_match_nolit_encodeBlockAsm12B_emit_copy: MOVQ DI, R8 LEAQ -4(DI), DI CMPL R8, $0x08 JLE repeat_two_match_nolit_encodeBlockAsm12B_emit_copy CMPL R8, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy CMPL BP, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy: CMPL DI, $0x00000104 JLT repeat_three_match_nolit_encodeBlockAsm12B_emit_copy CMPL DI, $0x00010100 JLT repeat_four_match_nolit_encodeBlockAsm12B_emit_copy CMPL DI, $0x0100ffff JLT repeat_five_match_nolit_encodeBlockAsm12B_emit_copy LEAQ -16842747(DI), DI MOVW $0x001d, (SI) MOVW $0xfffb, 2(SI) MOVB $0xff, 4(SI) ADDQ $0x05, SI JMP emit_repeat_again_match_nolit_encodeBlockAsm12B_emit_copy repeat_five_match_nolit_encodeBlockAsm12B_emit_copy: LEAQ -65536(DI), DI MOVQ DI, BP MOVW $0x001d, (SI) MOVW DI, 2(SI) SARQ $0x10, BP MOVB BP, 4(SI) ADDQ $0x05, SI JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_four_match_nolit_encodeBlockAsm12B_emit_copy: LEAQ -256(DI), DI MOVW $0x0019, (SI) MOVW DI, 2(SI) ADDQ $0x04, SI JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_three_match_nolit_encodeBlockAsm12B_emit_copy: LEAQ -4(DI), DI MOVW $0x0015, (SI) MOVB DI, 2(SI) ADDQ $0x03, SI JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_two_match_nolit_encodeBlockAsm12B_emit_copy: SHLL $0x02, DI ORL $0x01, DI MOVW DI, (SI) ADDQ $0x02, SI JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy: XORQ R8, R8 LEAQ 1(R8)(DI*4), DI MOVB BP, 1(SI) SARL $0x08, BP SHLL $0x05, BP ORL BP, DI MOVB DI, (SI) ADDQ $0x02, SI JMP match_nolit_emitcopy_end_encodeBlockAsm12B four_bytes_remain_match_nolit_encodeBlockAsm12B: TESTL DI, DI JZ match_nolit_emitcopy_end_encodeBlockAsm12B MOVB $0x03, DL LEAQ -4(DX)(DI*4), DI MOVB DI, (SI) MOVD BP, 1(SI) ADDQ $0x05, SI JMP match_nolit_emitcopy_end_encodeBlockAsm12B two_byte_offset_match_nolit_encodeBlockAsm12B: CMPL DI, $0x40 JLE two_byte_offset_short_match_nolit_encodeBlockAsm12B MOVB $0xee, (SI) MOVW BP, 1(SI) LEAQ -60(DI), DI ADDQ $0x03, SI emit_repeat_again_match_nolit_encodeBlockAsm12B_emit_copy_short: MOVQ DI, R8 LEAQ -4(DI), DI CMPL R8, $0x08 JLE repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short CMPL R8, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short CMPL BP, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short: CMPL DI, $0x00000104 JLT repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short CMPL DI, $0x00010100 JLT repeat_four_match_nolit_encodeBlockAsm12B_emit_copy_short CMPL DI, $0x0100ffff JLT repeat_five_match_nolit_encodeBlockAsm12B_emit_copy_short LEAQ -16842747(DI), DI MOVW $0x001d, (SI) MOVW $0xfffb, 2(SI) MOVB $0xff, 4(SI) ADDQ $0x05, SI JMP emit_repeat_again_match_nolit_encodeBlockAsm12B_emit_copy_short repeat_five_match_nolit_encodeBlockAsm12B_emit_copy_short: LEAQ -65536(DI), DI MOVQ DI, BP MOVW $0x001d, (SI) MOVW DI, 2(SI) SARQ $0x10, BP MOVB BP, 4(SI) ADDQ $0x05, SI JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_four_match_nolit_encodeBlockAsm12B_emit_copy_short: LEAQ -256(DI), DI MOVW $0x0019, (SI) MOVW DI, 2(SI) ADDQ $0x04, SI JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short: LEAQ -4(DI), DI MOVW $0x0015, (SI) MOVB DI, 2(SI) ADDQ $0x03, SI JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short: SHLL $0x02, DI ORL $0x01, DI MOVW DI, (SI) ADDQ $0x02, SI JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short: XORQ R8, R8 LEAQ 1(R8)(DI*4), DI MOVB BP, 1(SI) SARL $0x08, BP SHLL $0x05, BP ORL BP, DI MOVB DI, (SI) ADDQ $0x02, SI JMP match_nolit_emitcopy_end_encodeBlockAsm12B two_byte_offset_short_match_nolit_encodeBlockAsm12B: CMPL DI, $0x0c JGE emit_copy_three_match_nolit_encodeBlockAsm12B CMPL BP, $0x00000800 JGE emit_copy_three_match_nolit_encodeBlockAsm12B MOVB $0x01, DL LEAQ -16(DX)(DI*4), DI MOVB BP, 1(SI) SHRL $0x08, BP SHLL $0x05, BP ORL BP, DI MOVB DI, (SI) ADDQ $0x02, SI JMP match_nolit_emitcopy_end_encodeBlockAsm12B emit_copy_three_match_nolit_encodeBlockAsm12B: MOVB $0x02, DL LEAQ -4(DX)(DI*4), DI MOVB DI, (SI) MOVW BP, 1(SI) ADDQ $0x03, SI match_nolit_emitcopy_end_encodeBlockAsm12B: MOVQ SI, dst_base+0(FP) MOVL AX, 20(SP) CMPL AX, 16(SP) JGE emit_remainder_encodeBlockAsm12B CMPQ SI, (SP) JL match_nolit_dst_ok_encodeBlockAsm12B MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeBlockAsm12B: MOVQ -2(CX)(AX*1), BP MOVQ $0x0000cf1bbcdcbf9b, SI MOVQ BP, DI SHRQ $0x10, BP MOVQ BP, R8 SHLQ $0x10, DI IMULQ SI, DI SHRQ $0x34, DI SHLQ $0x10, R8 IMULQ SI, R8 SHRQ $0x34, R8 MOVL 32(SP)(DI*1), SI MOVL 32(SP)(R8*1), SI LEAQ -2(AX), SI MOVL SI, 32(SP)(DI*1) MOVL AX, 32(SP)(R8*1) CMPL (CX)(R8*1), BP JEQ match_nolit_loop_encodeBlockAsm12B INCL AX JMP search_loop_encodeBlockAsm12B emit_remainder_encodeBlockAsm12B: MOVQ src_len+32(FP), AX SUBL 20(SP), AX MOVQ dst_base+0(FP), DX LEAQ (DX)(AX*1), DX CMPQ DX, (SP) JL emit_remainder_ok_encodeBlockAsm12B MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeBlockAsm12B: MOVQ src_len+32(FP), AX MOVL 20(SP), DX CMPL DX, AX JEQ emit_literal_skip_emit_remainder_encodeBlockAsm12B MOVL AX, BX MOVL AX, 20(SP) LEAQ (CX)(DX*1), AX SUBL DX, BX MOVQ dst_base+0(FP), CX MOVQ BX, DX SUBL $0x01, DX JC emit_literal_done_emit_remainder_encodeBlockAsm12B CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeBlockAsm12B CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeBlockAsm12B CMPL DX, $0x00010000 JLT three_bytes_emit_remainder_encodeBlockAsm12B CMPL DX, $0x01000000 JLT four_bytes_emit_remainder_encodeBlockAsm12B MOVB $0xfc, (CX) MOVL DX, 1(CX) ADDQ $0x05, CX JMP memmove_emit_remainder_encodeBlockAsm12B four_bytes_emit_remainder_encodeBlockAsm12B: MOVQ DX, BP SHRL $0x10, BP MOVB $0xf8, (CX) MOVW DX, 1(CX) MOVB BP, 3(CX) ADDQ $0x04, CX JMP memmove_emit_remainder_encodeBlockAsm12B three_bytes_emit_remainder_encodeBlockAsm12B: MOVB $0xf4, (CX) MOVW DX, 1(CX) ADDQ $0x03, CX JMP memmove_emit_remainder_encodeBlockAsm12B two_bytes_emit_remainder_encodeBlockAsm12B: MOVB $0xf0, (CX) MOVB DL, 1(CX) ADDQ $0x02, CX JMP memmove_emit_remainder_encodeBlockAsm12B one_byte_emit_remainder_encodeBlockAsm12B: SHLB $0x02, DL MOVB DL, (CX) ADDQ $0x01, CX memmove_emit_remainder_encodeBlockAsm12B: LEAQ (CX)(BX*1), DX NOP emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_tail: TESTQ BX, BX JEQ emit_literal_done_emit_remainder_encodeBlockAsm12B CMPQ BX, $0x02 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_1or2 CMPQ BX, $0x04 JB emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_3 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_4 CMPQ BX, $0x08 JB emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_5through7 JE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_8 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_9through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_17through32 CMPQ BX, $0x40 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_33through64 CMPQ BX, $0x80 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_65through128 CMPQ BX, $0x00000100 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_129through256 JMP emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_256through2048 emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_1or2: MOVB (AX), DL MOVB -1(AX)(BX*1), AL MOVB DL, (CX) MOVB AL, -1(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_4: MOVL (AX), DX MOVL DX, (CX) JMP emit_literal_done_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_3: MOVW (AX), DX MOVB 2(AX), AL MOVW DX, (CX) MOVB AL, 2(CX) JMP emit_literal_done_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_5through7: MOVL (AX), DX MOVL -4(AX)(BX*1), AX MOVL DX, (CX) MOVL AX, -4(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_8: MOVQ (AX), DX MOVQ DX, (CX) JMP emit_literal_done_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_9through16: MOVQ (AX), DX MOVQ -8(AX)(BX*1), AX MOVQ DX, (CX) MOVQ AX, -8(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_17through32: MOVOU (AX), X0 MOVOU -16(AX)(BX*1), X1 MOVOU X0, (CX) MOVOU X1, -16(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_33through64: MOVOU (AX), X0 MOVOU 16(AX), X1 MOVOU -32(AX)(BX*1), X2 MOVOU -16(AX)(BX*1), X3 MOVOU X0, (CX) MOVOU X1, 16(CX) MOVOU X2, -32(CX)(BX*1) MOVOU X3, -16(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_65through128: MOVOU (AX), X0 MOVOU 16(AX), X1 MOVOU 32(AX), X2 MOVOU 48(AX), X3 MOVOU -64(AX)(BX*1), X12 MOVOU -48(AX)(BX*1), X13 MOVOU -32(AX)(BX*1), X14 MOVOU -16(AX)(BX*1), X15 MOVOU X0, (CX) MOVOU X1, 16(CX) MOVOU X2, 32(CX) MOVOU X3, 48(CX) MOVOU X12, -64(CX)(BX*1) MOVOU X13, -48(CX)(BX*1) MOVOU X14, -32(CX)(BX*1) MOVOU X15, -16(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_129through256: MOVOU (AX), X0 MOVOU 16(AX), X1 MOVOU 32(AX), X2 MOVOU 48(AX), X3 MOVOU 64(AX), X4 MOVOU 80(AX), X5 MOVOU 96(AX), X6 MOVOU 112(AX), X7 MOVOU -128(AX)(BX*1), X8 MOVOU -112(AX)(BX*1), X9 MOVOU -96(AX)(BX*1), X10 MOVOU -80(AX)(BX*1), X11 MOVOU -64(AX)(BX*1), X12 MOVOU -48(AX)(BX*1), X13 MOVOU -32(AX)(BX*1), X14 MOVOU -16(AX)(BX*1), X15 MOVOU X0, (CX) MOVOU X1, 16(CX) MOVOU X2, 32(CX) MOVOU X3, 48(CX) MOVOU X4, 64(CX) MOVOU X5, 80(CX) MOVOU X6, 96(CX) MOVOU X7, 112(CX) MOVOU X8, -128(CX)(BX*1) MOVOU X9, -112(CX)(BX*1) MOVOU X10, -96(CX)(BX*1) MOVOU X11, -80(CX)(BX*1) MOVOU X12, -64(CX)(BX*1) MOVOU X13, -48(CX)(BX*1) MOVOU X14, -32(CX)(BX*1) MOVOU X15, -16(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_256through2048: LEAQ -256(BX), BX MOVOU (AX), X0 MOVOU 16(AX), X1 MOVOU 32(AX), X2 MOVOU 48(AX), X3 MOVOU 64(AX), X4 MOVOU 80(AX), X5 MOVOU 96(AX), X6 MOVOU 112(AX), X7 MOVOU 128(AX), X8 MOVOU 144(AX), X9 MOVOU 160(AX), X10 MOVOU 176(AX), X11 MOVOU 192(AX), X12 MOVOU 208(AX), X13 MOVOU 224(AX), X14 MOVOU 240(AX), X15 MOVOU X0, (CX) MOVOU X1, 16(CX) MOVOU X2, 32(CX) MOVOU X3, 48(CX) MOVOU X4, 64(CX) MOVOU X5, 80(CX) MOVOU X6, 96(CX) MOVOU X7, 112(CX) MOVOU X8, 128(CX) MOVOU X9, 144(CX) MOVOU X10, 160(CX) MOVOU X11, 176(CX) MOVOU X12, 192(CX) MOVOU X13, 208(CX) MOVOU X14, 224(CX) MOVOU X15, 240(CX) CMPQ BX, $0x00000100 LEAQ 256(AX), AX LEAQ 256(CX), CX JGE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_256through2048 JMP emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_tail MOVQ DX, CX emit_literal_done_emit_remainder_encodeBlockAsm12B: MOVQ CX, dst_base+0(FP) emit_literal_skip_emit_remainder_encodeBlockAsm12B: MOVQ 8(SP), AX SUBQ dst_base+0(FP), AX MOVQ AX, ret+48(FP) RET // func encodeBlockAsmAvx(dst []byte, src []byte) int // Requires: AVX, SSE2 TEXT ·encodeBlockAsmAvx(SB), $65568-56 MOVQ $0x00000200, AX LEAQ 32(SP), CX PXOR X0, X0 zero_loop_encodeBlockAsmAvx: MOVOU X0, (CX) MOVOU X0, 16(CX) MOVOU X0, 32(CX) MOVOU X0, 48(CX) MOVOU X0, 64(CX) MOVOU X0, 80(CX) MOVOU X0, 96(CX) MOVOU X0, 112(CX) ADDQ $0x80, CX DECQ AX JNZ zero_loop_encodeBlockAsmAvx MOVL AX, 20(SP) MOVQ src_len+32(FP), AX LEAQ -5(AX), CX LEAQ -8(AX), BX SHRQ $0x05, AX SUBL AX, CX MOVL BX, 16(SP) MOVQ dst_base+0(FP), AX MOVQ AX, 8(SP) LEAQ (AX)(CX*1), CX MOVQ CX, (SP) MOVL $0x00000001, AX MOVL AX, 24(SP) MOVQ src_base+24(FP), CX search_loop_encodeBlockAsmAvx: MOVQ (CX)(AX*1), BP MOVL AX, BX SUBL 20(SP), BX SHRL $0x06, BX LEAQ 4(AX)(BX*1), BX MOVL 16(SP), SI CMPL BX, SI JGT emit_remainder_encodeBlockAsmAvx MOVL BX, 28(SP) MOVQ $0x0000cf1bbcdcbf9b, BX MOVQ BP, DI MOVQ BP, R8 SHRQ $0x08, R8 SHLQ $0x10, DI IMULQ BX, DI SHRQ $0x30, DI SHLQ $0x10, R8 IMULQ BX, R8 SHRQ $0x30, R8 MOVL 32(SP)(DI*1), BX MOVL 32(SP)(R8*1), SI MOVL AX, 32(SP)(DI*1) LEAL 1(AX), DI MOVL DI, 32(SP)(R8*1) MOVL AX, DI SUBL 24(SP), DI MOVL 1(CX)(DI*1), R9 MOVQ BP, R8 SHLQ $0x08, R8 CMPL R8, R9 JNE no_repeat_found_encodeBlockAsmAvx LEAQ 1(AX), BP MOVL 20(SP), BX TESTL DI, DI JZ repeat_extend_back_end_encodeBlockAsmAvx repeat_extend_back_loop_encodeBlockAsmAvx: CMPL BP, BX JG repeat_extend_back_end_encodeBlockAsmAvx MOVB -1(CX)(DI*1), DL MOVB -1(CX)(BP*1), SI CMPB DL, SI JNE repeat_extend_back_end_encodeBlockAsmAvx LEAQ -1(BP), BP DECL DI JZ repeat_extend_back_end_encodeBlockAsmAvx JMP repeat_extend_back_loop_encodeBlockAsmAvx repeat_extend_back_end_encodeBlockAsmAvx: MOVL 20(SP), BX CMPL BX, BP JEQ emit_literal_skip_repeat_emit_encodeBlockAsmAvx MOVL BP, SI MOVL BP, 20(SP) LEAQ (CX)(BX*1), DI SUBL BX, SI MOVQ dst_base+0(FP), BX MOVQ SI, R8 SUBL $0x01, R8 JC emit_literal_done_repeat_emit_encodeBlockAsmAvx CMPL R8, $0x3c JLT one_byte_repeat_emit_encodeBlockAsmAvx CMPL R8, $0x00000100 JLT two_bytes_repeat_emit_encodeBlockAsmAvx CMPL R8, $0x00010000 JLT three_bytes_repeat_emit_encodeBlockAsmAvx CMPL R8, $0x01000000 JLT four_bytes_repeat_emit_encodeBlockAsmAvx MOVB $0xfc, (BX) MOVL R8, 1(BX) ADDQ $0x05, BX JMP memmove_repeat_emit_encodeBlockAsmAvx four_bytes_repeat_emit_encodeBlockAsmAvx: MOVQ R8, R9 SHRL $0x10, R9 MOVB $0xf8, (BX) MOVW R8, 1(BX) MOVB R9, 3(BX) ADDQ $0x04, BX JMP memmove_repeat_emit_encodeBlockAsmAvx three_bytes_repeat_emit_encodeBlockAsmAvx: MOVB $0xf4, (BX) MOVW R8, 1(BX) ADDQ $0x03, BX JMP memmove_repeat_emit_encodeBlockAsmAvx two_bytes_repeat_emit_encodeBlockAsmAvx: MOVB $0xf0, (BX) MOVB R8, 1(BX) ADDQ $0x02, BX JMP memmove_repeat_emit_encodeBlockAsmAvx one_byte_repeat_emit_encodeBlockAsmAvx: SHLB $0x02, R8 MOVB R8, (BX) ADDQ $0x01, BX memmove_repeat_emit_encodeBlockAsmAvx: LEAQ (BX)(SI*1), R8 NOP emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_tail: TESTQ SI, SI JEQ emit_literal_done_repeat_emit_encodeBlockAsmAvx CMPQ SI, $0x02 JBE emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_1or2 CMPQ SI, $0x04 JB emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_3 JBE emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_4 CMPQ SI, $0x08 JB emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_5through7 JE emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_8 CMPQ SI, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_9through16 CMPQ SI, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_17through32 CMPQ SI, $0x40 JBE emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_33through64 CMPQ SI, $0x80 JBE emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_65through128 CMPQ SI, $0x00000100 JBE emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_129through256 JMP emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_avxUnaligned emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_1or2: MOVB (DI), R8 MOVB -1(DI)(SI*1), R9 MOVB R8, (BX) MOVB R9, -1(BX)(SI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsmAvx emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_4: MOVL (DI), R8 MOVL R8, (BX) JMP emit_literal_done_repeat_emit_encodeBlockAsmAvx emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_3: MOVW (DI), R8 MOVB 2(DI), R9 MOVW R8, (BX) MOVB R9, 2(BX) JMP emit_literal_done_repeat_emit_encodeBlockAsmAvx emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_5through7: MOVL (DI), R8 MOVL -4(DI)(SI*1), R9 MOVL R8, (BX) MOVL R9, -4(BX)(SI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsmAvx emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_8: MOVQ (DI), R8 MOVQ R8, (BX) JMP emit_literal_done_repeat_emit_encodeBlockAsmAvx emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_9through16: MOVQ (DI), R8 MOVQ -8(DI)(SI*1), R9 MOVQ R8, (BX) MOVQ R9, -8(BX)(SI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsmAvx emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_17through32: MOVOU (DI), X0 MOVOU -16(DI)(SI*1), X1 MOVOU X0, (BX) MOVOU X1, -16(BX)(SI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsmAvx emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_33through64: MOVOU (DI), X0 MOVOU 16(DI), X1 MOVOU -32(DI)(SI*1), X2 MOVOU -16(DI)(SI*1), X3 MOVOU X0, (BX) MOVOU X1, 16(BX) MOVOU X2, -32(BX)(SI*1) MOVOU X3, -16(BX)(SI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsmAvx emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_65through128: MOVOU (DI), X0 MOVOU 16(DI), X1 MOVOU 32(DI), X2 MOVOU 48(DI), X3 MOVOU -64(DI)(SI*1), X12 MOVOU -48(DI)(SI*1), X13 MOVOU -32(DI)(SI*1), X14 MOVOU -16(DI)(SI*1), X15 MOVOU X0, (BX) MOVOU X1, 16(BX) MOVOU X2, 32(BX) MOVOU X3, 48(BX) MOVOU X12, -64(BX)(SI*1) MOVOU X13, -48(BX)(SI*1) MOVOU X14, -32(BX)(SI*1) MOVOU X15, -16(BX)(SI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsmAvx emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_129through256: MOVOU (DI), X0 MOVOU 16(DI), X1 MOVOU 32(DI), X2 MOVOU 48(DI), X3 MOVOU 64(DI), X4 MOVOU 80(DI), X5 MOVOU 96(DI), X6 MOVOU 112(DI), X7 MOVOU -128(DI)(SI*1), X8 MOVOU -112(DI)(SI*1), X9 MOVOU -96(DI)(SI*1), X10 MOVOU -80(DI)(SI*1), X11 MOVOU -64(DI)(SI*1), X12 MOVOU -48(DI)(SI*1), X13 MOVOU -32(DI)(SI*1), X14 MOVOU -16(DI)(SI*1), X15 MOVOU X0, (BX) MOVOU X1, 16(BX) MOVOU X2, 32(BX) MOVOU X3, 48(BX) MOVOU X4, 64(BX) MOVOU X5, 80(BX) MOVOU X6, 96(BX) MOVOU X7, 112(BX) MOVOU X8, -128(BX)(SI*1) MOVOU X9, -112(BX)(SI*1) MOVOU X10, -96(BX)(SI*1) MOVOU X11, -80(BX)(SI*1) MOVOU X12, -64(BX)(SI*1) MOVOU X13, -48(BX)(SI*1) MOVOU X14, -32(BX)(SI*1) MOVOU X15, -16(BX)(SI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsmAvx emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_256through2048: LEAQ -256(SI), SI MOVOU (DI), X0 MOVOU 16(DI), X1 MOVOU 32(DI), X2 MOVOU 48(DI), X3 MOVOU 64(DI), X4 MOVOU 80(DI), X5 MOVOU 96(DI), X6 MOVOU 112(DI), X7 MOVOU 128(DI), X8 MOVOU 144(DI), X9 MOVOU 160(DI), X10 MOVOU 176(DI), X11 MOVOU 192(DI), X12 MOVOU 208(DI), X13 MOVOU 224(DI), X14 MOVOU 240(DI), X15 MOVOU X0, (BX) MOVOU X1, 16(BX) MOVOU X2, 32(BX) MOVOU X3, 48(BX) MOVOU X4, 64(BX) MOVOU X5, 80(BX) MOVOU X6, 96(BX) MOVOU X7, 112(BX) MOVOU X8, 128(BX) MOVOU X9, 144(BX) MOVOU X10, 160(BX) MOVOU X11, 176(BX) MOVOU X12, 192(BX) MOVOU X13, 208(BX) MOVOU X14, 224(BX) MOVOU X15, 240(BX) CMPQ SI, $0x00000100 LEAQ 256(DI), DI LEAQ 256(BX), BX JGE emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_move_256through2048 JMP emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_tail emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_avxUnaligned: LEAQ (DI)(SI*1), R9 MOVQ BX, R11 MOVOU -128(R9), X5 MOVOU -112(R9), X6 MOVQ $0x00000080, R8 ANDQ $0xffffffe0, BX ADDQ $0x20, BX MOVOU -96(R9), X7 MOVOU -80(R9), X8 MOVQ BX, R10 SUBQ R11, R10 MOVOU -64(R9), X9 MOVOU -48(R9), X10 SUBQ R10, SI MOVOU -32(R9), X11 MOVOU -16(R9), X12 VMOVDQU (DI), Y4 ADDQ R10, DI SUBQ R8, SI emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_gobble_128_loop: VMOVDQU (DI), Y0 VMOVDQU 32(DI), Y1 VMOVDQU 64(DI), Y2 VMOVDQU 96(DI), Y3 ADDQ R8, DI VMOVDQA Y0, (BX) VMOVDQA Y1, 32(BX) VMOVDQA Y2, 64(BX) VMOVDQA Y3, 96(BX) ADDQ R8, BX SUBQ R8, SI JA emit_lit_memmove_repeat_emit_encodeBlockAsmAvx_memmove_gobble_128_loop ADDQ R8, SI ADDQ BX, SI VMOVDQU Y4, (R11) VZEROUPPER MOVOU X5, -128(SI) MOVOU X6, -112(SI) MOVOU X7, -96(SI) MOVOU X8, -80(SI) MOVOU X9, -64(SI) MOVOU X10, -48(SI) MOVOU X11, -32(SI) MOVOU X12, -16(SI) JMP emit_literal_done_repeat_emit_encodeBlockAsmAvx MOVQ R8, BX emit_literal_done_repeat_emit_encodeBlockAsmAvx: MOVQ BX, dst_base+0(FP) emit_literal_skip_repeat_emit_encodeBlockAsmAvx: ADDL $0x05, AX MOVL AX, BX SUBL 24(SP), BX MOVL 16(SP), BX SUBL AX, BX XORQ DI, DI CMPQ BX, $0x08 JL matchlen_single_repeat_extend matchlen_loopback_repeat_extend: MOVQ (CX)(DI*1), SI XORQ (CX)(DI*1), SI TESTQ SI, SI JZ matchlen_loop_repeat_extend BSFQ SI, SI SARQ $0x03, SI LEAQ (DI)(SI*1), DI JMP repeat_extend_forward_end_encodeBlockAsmAvx matchlen_loop_repeat_extend: LEAQ -8(BX), BX LEAQ 8(DI), DI CMPQ BX, $0x08 JGE matchlen_loopback_repeat_extend matchlen_single_repeat_extend: TESTQ BX, BX JZ repeat_extend_forward_end_encodeBlockAsmAvx matchlen_single_loopback_repeat_extend: MOVB (CX)(DI*1), SI CMPB (CX)(DI*1), SI JNE repeat_extend_forward_end_encodeBlockAsmAvx LEAQ 1(DI), DI DECQ BX JNZ matchlen_single_loopback_repeat_extend repeat_extend_forward_end_encodeBlockAsmAvx: ADDL DI, AX MOVL AX, BX SUBL BP, BX MOVL 24(SP), BP MOVQ dst_base+0(FP), SI MOVL 20(SP), DI TESTL DI, DI JZ repeat_as_copy_encodeBlockAsmAvx emit_repeat_again_match_repeat_: MOVQ BX, DI LEAQ -4(BX), BX CMPL DI, $0x08 JLE repeat_two_match_repeat_ CMPL DI, $0x0c JGE cant_repeat_two_offset_match_repeat_ CMPL BP, $0x00000800 JLT repeat_two_offset_match_repeat_ cant_repeat_two_offset_match_repeat_: CMPL BX, $0x00000104 JLT repeat_three_match_repeat_ CMPL BX, $0x00010100 JLT repeat_four_match_repeat_ CMPL BX, $0x0100ffff JLT repeat_five_match_repeat_ LEAQ -16842747(BX), BX MOVW $0x001d, (SI) MOVW $0xfffb, 2(SI) MOVB $0xff, 4(SI) ADDQ $0x05, SI JMP emit_repeat_again_match_repeat_ repeat_five_match_repeat_: LEAQ -65536(BX), BX MOVQ BX, BP MOVW $0x001d, (SI) MOVW BX, 2(SI) SARQ $0x10, BP MOVB BP, 4(SI) ADDQ $0x05, SI JMP repeat_end_emit_encodeBlockAsmAvx repeat_four_match_repeat_: LEAQ -256(BX), BX MOVW $0x0019, (SI) MOVW BX, 2(SI) ADDQ $0x04, SI JMP repeat_end_emit_encodeBlockAsmAvx repeat_three_match_repeat_: LEAQ -4(BX), BX MOVW $0x0015, (SI) MOVB BL, 2(SI) ADDQ $0x03, SI JMP repeat_end_emit_encodeBlockAsmAvx repeat_two_match_repeat_: SHLL $0x02, BX ORL $0x01, BX MOVW BX, (SI) ADDQ $0x02, SI JMP repeat_end_emit_encodeBlockAsmAvx repeat_two_offset_match_repeat_: XORQ DI, DI LEAQ 1(DI)(BX*4), BX MOVB BP, 1(SI) SARL $0x08, BP SHLL $0x05, BP ORL BP, BX MOVB BL, (SI) ADDQ $0x02, SI JMP repeat_end_emit_encodeBlockAsmAvx repeat_as_copy_encodeBlockAsmAvx: CMPL BP, $0x00010000 JL two_byte_offset_repeat_as_copy_encodeBlockAsmAvx CMPL BX, $0x40 JLE four_bytes_remain_repeat_as_copy_encodeBlockAsmAvx MOVB $0xff, (SI) MOVD BP, 1(SI) LEAQ -64(BX), BX ADDQ $0x05, SI CMPL BX, $0x04 JL four_bytes_remain_repeat_as_copy_encodeBlockAsmAvx emit_repeat_again_repeat_as_copy_encodeBlockAsmAvx_emit_copy: MOVQ BX, DI LEAQ -4(BX), BX CMPL DI, $0x08 JLE repeat_two_repeat_as_copy_encodeBlockAsmAvx_emit_copy CMPL DI, $0x0c JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsmAvx_emit_copy CMPL BP, $0x00000800 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsmAvx_emit_copy cant_repeat_two_offset_repeat_as_copy_encodeBlockAsmAvx_emit_copy: CMPL BX, $0x00000104 JLT repeat_three_repeat_as_copy_encodeBlockAsmAvx_emit_copy CMPL BX, $0x00010100 JLT repeat_four_repeat_as_copy_encodeBlockAsmAvx_emit_copy CMPL BX, $0x0100ffff JLT repeat_five_repeat_as_copy_encodeBlockAsmAvx_emit_copy LEAQ -16842747(BX), BX MOVW $0x001d, (SI) MOVW $0xfffb, 2(SI) MOVB $0xff, 4(SI) ADDQ $0x05, SI JMP emit_repeat_again_repeat_as_copy_encodeBlockAsmAvx_emit_copy repeat_five_repeat_as_copy_encodeBlockAsmAvx_emit_copy: LEAQ -65536(BX), BX MOVQ BX, BP MOVW $0x001d, (SI) MOVW BX, 2(SI) SARQ $0x10, BP MOVB BP, 4(SI) ADDQ $0x05, SI JMP repeat_end_emit_encodeBlockAsmAvx repeat_four_repeat_as_copy_encodeBlockAsmAvx_emit_copy: LEAQ -256(BX), BX MOVW $0x0019, (SI) MOVW BX, 2(SI) ADDQ $0x04, SI JMP repeat_end_emit_encodeBlockAsmAvx repeat_three_repeat_as_copy_encodeBlockAsmAvx_emit_copy: LEAQ -4(BX), BX MOVW $0x0015, (SI) MOVB BL, 2(SI) ADDQ $0x03, SI JMP repeat_end_emit_encodeBlockAsmAvx repeat_two_repeat_as_copy_encodeBlockAsmAvx_emit_copy: SHLL $0x02, BX ORL $0x01, BX MOVW BX, (SI) ADDQ $0x02, SI JMP repeat_end_emit_encodeBlockAsmAvx repeat_two_offset_repeat_as_copy_encodeBlockAsmAvx_emit_copy: XORQ DI, DI LEAQ 1(DI)(BX*4), BX MOVB BP, 1(SI) SARL $0x08, BP SHLL $0x05, BP ORL BP, BX MOVB BL, (SI) ADDQ $0x02, SI JMP repeat_end_emit_encodeBlockAsmAvx four_bytes_remain_repeat_as_copy_encodeBlockAsmAvx: TESTL BX, BX JZ repeat_end_emit_encodeBlockAsmAvx MOVB $0x03, DL LEAQ -4(DX)(BX*4), BX MOVB BL, (SI) MOVD BP, 1(SI) ADDQ $0x05, SI JMP repeat_end_emit_encodeBlockAsmAvx two_byte_offset_repeat_as_copy_encodeBlockAsmAvx: CMPL BX, $0x40 JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsmAvx MOVB $0xee, (SI) MOVW BP, 1(SI) LEAQ -60(BX), BX ADDQ $0x03, SI emit_repeat_again_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short: MOVQ BX, DI LEAQ -4(BX), BX CMPL DI, $0x08 JLE repeat_two_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short CMPL DI, $0x0c JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short CMPL BP, $0x00000800 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short: CMPL BX, $0x00000104 JLT repeat_three_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short CMPL BX, $0x00010100 JLT repeat_four_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short CMPL BX, $0x0100ffff JLT repeat_five_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short LEAQ -16842747(BX), BX MOVW $0x001d, (SI) MOVW $0xfffb, 2(SI) MOVB $0xff, 4(SI) ADDQ $0x05, SI JMP emit_repeat_again_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short repeat_five_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short: LEAQ -65536(BX), BX MOVQ BX, BP MOVW $0x001d, (SI) MOVW BX, 2(SI) SARQ $0x10, BP MOVB BP, 4(SI) ADDQ $0x05, SI JMP repeat_end_emit_encodeBlockAsmAvx repeat_four_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short: LEAQ -256(BX), BX MOVW $0x0019, (SI) MOVW BX, 2(SI) ADDQ $0x04, SI JMP repeat_end_emit_encodeBlockAsmAvx repeat_three_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short: LEAQ -4(BX), BX MOVW $0x0015, (SI) MOVB BL, 2(SI) ADDQ $0x03, SI JMP repeat_end_emit_encodeBlockAsmAvx repeat_two_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short: SHLL $0x02, BX ORL $0x01, BX MOVW BX, (SI) ADDQ $0x02, SI JMP repeat_end_emit_encodeBlockAsmAvx repeat_two_offset_repeat_as_copy_encodeBlockAsmAvx_emit_copy_short: XORQ DI, DI LEAQ 1(DI)(BX*4), BX MOVB BP, 1(SI) SARL $0x08, BP SHLL $0x05, BP ORL BP, BX MOVB BL, (SI) ADDQ $0x02, SI JMP repeat_end_emit_encodeBlockAsmAvx two_byte_offset_short_repeat_as_copy_encodeBlockAsmAvx: CMPL BX, $0x0c JGE emit_copy_three_repeat_as_copy_encodeBlockAsmAvx CMPL BP, $0x00000800 JGE emit_copy_three_repeat_as_copy_encodeBlockAsmAvx MOVB $0x01, DL LEAQ -16(DX)(BX*4), BX MOVB BP, 1(SI) SHRL $0x08, BP SHLL $0x05, BP ORL BP, BX MOVB BL, (SI) ADDQ $0x02, SI JMP repeat_end_emit_encodeBlockAsmAvx emit_copy_three_repeat_as_copy_encodeBlockAsmAvx: MOVB $0x02, DL LEAQ -4(DX)(BX*4), BX MOVB BL, (SI) MOVW BP, 1(SI) ADDQ $0x03, SI repeat_end_emit_encodeBlockAsmAvx: MOVQ SI, dst_base+0(FP) MOVL 16(SP), BX CMPL AX, BX JGT emit_remainder_encodeBlockAsmAvx JMP search_loop_encodeBlockAsmAvx no_repeat_found_encodeBlockAsmAvx: MOVQ $0x0000cf1bbcdcbf9b, R8 MOVQ BP, DI SHRQ $0x10, DI SHLQ $0x10, DI IMULQ R8, DI SHRQ $0x30, DI CMPL (CX)(BX*1), BP SHRQ $0x08, BP JEQ candidate_match_encodeBlockAsmAvx MOVL 32(SP)(DI*1), BX CMPL (CX)(SI*1), BP JEQ candidate2_match_encodeBlockAsmAvx LEAQ 2(AX), SI MOVL SI, 32(SP)(DI*1) SHRQ $0x08, BP CMPL (CX)(BX*1), BP JEQ candidate3_match_encodeBlockAsmAvx MOVL 28(SP), AX JMP search_loop_encodeBlockAsmAvx candidate3_match_encodeBlockAsmAvx: ADDL $0x02, AX JMP candidate_match_encodeBlockAsmAvx candidate2_match_encodeBlockAsmAvx: LEAQ -2(AX), BX MOVL BX, 32(SP)(DI*1) INCL AX MOVL SI, BX candidate_match_encodeBlockAsmAvx: MOVL 20(SP), BP TESTL BX, BX JZ match_extend_back_end_encodeBlockAsmAvx match_extend_back_loop_encodeBlockAsmAvx: CMPL AX, BP JG match_extend_back_end_encodeBlockAsmAvx MOVB -1(CX)(BX*1), DL MOVB -1(CX)(AX*1), SI CMPB DL, SI JNE match_extend_back_end_encodeBlockAsmAvx LEAL -1(AX), AX DECL BX JZ match_extend_back_end_encodeBlockAsmAvx JMP match_extend_back_loop_encodeBlockAsmAvx match_extend_back_end_encodeBlockAsmAvx: MOVL AX, BP SUBL 20(SP), BP LEAQ dst_base+0(FP)(BP*1), BP CMPQ BP, (SP) JL match_dst_size_check_encodeBlockAsmAvx MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeBlockAsmAvx: MOVL BX, BP MOVL 20(SP), SI CMPL SI, BP JEQ emit_literal_skip_match_emit_encodeBlockAsmAvx MOVL BP, DI MOVL BP, 20(SP) LEAQ (CX)(SI*1), BP SUBL SI, DI MOVQ dst_base+0(FP), SI MOVQ DI, R8 SUBL $0x01, R8 JC emit_literal_done_match_emit_encodeBlockAsmAvx CMPL R8, $0x3c JLT one_byte_match_emit_encodeBlockAsmAvx CMPL R8, $0x00000100 JLT two_bytes_match_emit_encodeBlockAsmAvx CMPL R8, $0x00010000 JLT three_bytes_match_emit_encodeBlockAsmAvx CMPL R8, $0x01000000 JLT four_bytes_match_emit_encodeBlockAsmAvx MOVB $0xfc, (SI) MOVL R8, 1(SI) ADDQ $0x05, SI JMP memmove_match_emit_encodeBlockAsmAvx four_bytes_match_emit_encodeBlockAsmAvx: MOVQ R8, R9 SHRL $0x10, R9 MOVB $0xf8, (SI) MOVW R8, 1(SI) MOVB R9, 3(SI) ADDQ $0x04, SI JMP memmove_match_emit_encodeBlockAsmAvx three_bytes_match_emit_encodeBlockAsmAvx: MOVB $0xf4, (SI) MOVW R8, 1(SI) ADDQ $0x03, SI JMP memmove_match_emit_encodeBlockAsmAvx two_bytes_match_emit_encodeBlockAsmAvx: MOVB $0xf0, (SI) MOVB R8, 1(SI) ADDQ $0x02, SI JMP memmove_match_emit_encodeBlockAsmAvx one_byte_match_emit_encodeBlockAsmAvx: SHLB $0x02, R8 MOVB R8, (SI) ADDQ $0x01, SI memmove_match_emit_encodeBlockAsmAvx: LEAQ (SI)(DI*1), R8 NOP emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_tail: TESTQ DI, DI JEQ emit_literal_done_match_emit_encodeBlockAsmAvx CMPQ DI, $0x02 JBE emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_1or2 CMPQ DI, $0x04 JB emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_3 JBE emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_4 CMPQ DI, $0x08 JB emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_5through7 JE emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_8 CMPQ DI, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_9through16 CMPQ DI, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_17through32 CMPQ DI, $0x40 JBE emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_33through64 CMPQ DI, $0x80 JBE emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_65through128 CMPQ DI, $0x00000100 JBE emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_129through256 JMP emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_avxUnaligned emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_1or2: MOVB (BP), R8 MOVB -1(BP)(DI*1), R9 MOVB R8, (SI) MOVB R9, -1(SI)(DI*1) JMP emit_literal_done_match_emit_encodeBlockAsmAvx emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_4: MOVL (BP), R8 MOVL R8, (SI) JMP emit_literal_done_match_emit_encodeBlockAsmAvx emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_3: MOVW (BP), R8 MOVB 2(BP), R9 MOVW R8, (SI) MOVB R9, 2(SI) JMP emit_literal_done_match_emit_encodeBlockAsmAvx emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_5through7: MOVL (BP), R8 MOVL -4(BP)(DI*1), R9 MOVL R8, (SI) MOVL R9, -4(SI)(DI*1) JMP emit_literal_done_match_emit_encodeBlockAsmAvx emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_8: MOVQ (BP), R8 MOVQ R8, (SI) JMP emit_literal_done_match_emit_encodeBlockAsmAvx emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_9through16: MOVQ (BP), R8 MOVQ -8(BP)(DI*1), R9 MOVQ R8, (SI) MOVQ R9, -8(SI)(DI*1) JMP emit_literal_done_match_emit_encodeBlockAsmAvx emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_17through32: MOVOU (BP), X0 MOVOU -16(BP)(DI*1), X1 MOVOU X0, (SI) MOVOU X1, -16(SI)(DI*1) JMP emit_literal_done_match_emit_encodeBlockAsmAvx emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_33through64: MOVOU (BP), X0 MOVOU 16(BP), X1 MOVOU -32(BP)(DI*1), X2 MOVOU -16(BP)(DI*1), X3 MOVOU X0, (SI) MOVOU X1, 16(SI) MOVOU X2, -32(SI)(DI*1) MOVOU X3, -16(SI)(DI*1) JMP emit_literal_done_match_emit_encodeBlockAsmAvx emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_65through128: MOVOU (BP), X0 MOVOU 16(BP), X1 MOVOU 32(BP), X2 MOVOU 48(BP), X3 MOVOU -64(BP)(DI*1), X12 MOVOU -48(BP)(DI*1), X13 MOVOU -32(BP)(DI*1), X14 MOVOU -16(BP)(DI*1), X15 MOVOU X0, (SI) MOVOU X1, 16(SI) MOVOU X2, 32(SI) MOVOU X3, 48(SI) MOVOU X12, -64(SI)(DI*1) MOVOU X13, -48(SI)(DI*1) MOVOU X14, -32(SI)(DI*1) MOVOU X15, -16(SI)(DI*1) JMP emit_literal_done_match_emit_encodeBlockAsmAvx emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_129through256: MOVOU (BP), X0 MOVOU 16(BP), X1 MOVOU 32(BP), X2 MOVOU 48(BP), X3 MOVOU 64(BP), X4 MOVOU 80(BP), X5 MOVOU 96(BP), X6 MOVOU 112(BP), X7 MOVOU -128(BP)(DI*1), X8 MOVOU -112(BP)(DI*1), X9 MOVOU -96(BP)(DI*1), X10 MOVOU -80(BP)(DI*1), X11 MOVOU -64(BP)(DI*1), X12 MOVOU -48(BP)(DI*1), X13 MOVOU -32(BP)(DI*1), X14 MOVOU -16(BP)(DI*1), X15 MOVOU X0, (SI) MOVOU X1, 16(SI) MOVOU X2, 32(SI) MOVOU X3, 48(SI) MOVOU X4, 64(SI) MOVOU X5, 80(SI) MOVOU X6, 96(SI) MOVOU X7, 112(SI) MOVOU X8, -128(SI)(DI*1) MOVOU X9, -112(SI)(DI*1) MOVOU X10, -96(SI)(DI*1) MOVOU X11, -80(SI)(DI*1) MOVOU X12, -64(SI)(DI*1) MOVOU X13, -48(SI)(DI*1) MOVOU X14, -32(SI)(DI*1) MOVOU X15, -16(SI)(DI*1) JMP emit_literal_done_match_emit_encodeBlockAsmAvx emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_256through2048: LEAQ -256(DI), DI MOVOU (BP), X0 MOVOU 16(BP), X1 MOVOU 32(BP), X2 MOVOU 48(BP), X3 MOVOU 64(BP), X4 MOVOU 80(BP), X5 MOVOU 96(BP), X6 MOVOU 112(BP), X7 MOVOU 128(BP), X8 MOVOU 144(BP), X9 MOVOU 160(BP), X10 MOVOU 176(BP), X11 MOVOU 192(BP), X12 MOVOU 208(BP), X13 MOVOU 224(BP), X14 MOVOU 240(BP), X15 MOVOU X0, (SI) MOVOU X1, 16(SI) MOVOU X2, 32(SI) MOVOU X3, 48(SI) MOVOU X4, 64(SI) MOVOU X5, 80(SI) MOVOU X6, 96(SI) MOVOU X7, 112(SI) MOVOU X8, 128(SI) MOVOU X9, 144(SI) MOVOU X10, 160(SI) MOVOU X11, 176(SI) MOVOU X12, 192(SI) MOVOU X13, 208(SI) MOVOU X14, 224(SI) MOVOU X15, 240(SI) CMPQ DI, $0x00000100 LEAQ 256(BP), BP LEAQ 256(SI), SI JGE emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_move_256through2048 JMP emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_tail emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_avxUnaligned: LEAQ (BP)(DI*1), R9 MOVQ SI, R11 MOVOU -128(R9), X5 MOVOU -112(R9), X6 MOVQ $0x00000080, R8 ANDQ $0xffffffe0, SI ADDQ $0x20, SI MOVOU -96(R9), X7 MOVOU -80(R9), X8 MOVQ SI, R10 SUBQ R11, R10 MOVOU -64(R9), X9 MOVOU -48(R9), X10 SUBQ R10, DI MOVOU -32(R9), X11 MOVOU -16(R9), X12 VMOVDQU (BP), Y4 ADDQ R10, BP SUBQ R8, DI emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_gobble_128_loop: VMOVDQU (BP), Y0 VMOVDQU 32(BP), Y1 VMOVDQU 64(BP), Y2 VMOVDQU 96(BP), Y3 ADDQ R8, BP VMOVDQA Y0, (SI) VMOVDQA Y1, 32(SI) VMOVDQA Y2, 64(SI) VMOVDQA Y3, 96(SI) ADDQ R8, SI SUBQ R8, DI JA emit_lit_memmove_match_emit_encodeBlockAsmAvx_memmove_gobble_128_loop ADDQ R8, DI ADDQ SI, DI VMOVDQU Y4, (R11) VZEROUPPER MOVOU X5, -128(DI) MOVOU X6, -112(DI) MOVOU X7, -96(DI) MOVOU X8, -80(DI) MOVOU X9, -64(DI) MOVOU X10, -48(DI) MOVOU X11, -32(DI) MOVOU X12, -16(DI) JMP emit_literal_done_match_emit_encodeBlockAsmAvx MOVQ R8, SI emit_literal_done_match_emit_encodeBlockAsmAvx: MOVQ SI, dst_base+0(FP) emit_literal_skip_match_emit_encodeBlockAsmAvx: NOP match_nolit_loop_encodeBlockAsmAvx: MOVL AX, BP MOVL AX, BP SUBL BX, BP MOVL BP, 24(SP) ADDL $0x04, AX ADDL $0x04, BX MOVL 16(SP), BP SUBL AX, BP XORQ DI, DI CMPQ BP, $0x08 JL matchlen_single_match_nolit_encodeBlockAsmAvx matchlen_loopback_match_nolit_encodeBlockAsmAvx: MOVQ (CX)(DI*1), SI XORQ (CX)(DI*1), SI TESTQ SI, SI JZ matchlen_loop_match_nolit_encodeBlockAsmAvx BSFQ SI, SI SARQ $0x03, SI LEAQ (DI)(SI*1), DI JMP match_nolit_end_encodeBlockAsmAvx matchlen_loop_match_nolit_encodeBlockAsmAvx: LEAQ -8(BP), BP LEAQ 8(DI), DI CMPQ BP, $0x08 JGE matchlen_loopback_match_nolit_encodeBlockAsmAvx matchlen_single_match_nolit_encodeBlockAsmAvx: TESTQ BP, BP JZ match_nolit_end_encodeBlockAsmAvx matchlen_single_loopback_match_nolit_encodeBlockAsmAvx: MOVB (CX)(DI*1), SI CMPB (CX)(DI*1), SI JNE match_nolit_end_encodeBlockAsmAvx LEAQ 1(DI), DI DECQ BP JNZ matchlen_single_loopback_match_nolit_encodeBlockAsmAvx match_nolit_end_encodeBlockAsmAvx: MOVL 24(SP), BP ADDQ $0x04, DI MOVQ dst_base+0(FP), SI ADDL DI, AX CMPL BP, $0x00010000 JL two_byte_offset_match_nolit_encodeBlockAsmAvx CMPL DI, $0x40 JLE four_bytes_remain_match_nolit_encodeBlockAsmAvx MOVB $0xff, (SI) MOVD BP, 1(SI) LEAQ -64(DI), DI ADDQ $0x05, SI CMPL DI, $0x04 JL four_bytes_remain_match_nolit_encodeBlockAsmAvx emit_repeat_again_match_nolit_encodeBlockAsmAvx_emit_copy: MOVQ DI, R8 LEAQ -4(DI), DI CMPL R8, $0x08 JLE repeat_two_match_nolit_encodeBlockAsmAvx_emit_copy CMPL R8, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBlockAsmAvx_emit_copy CMPL BP, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBlockAsmAvx_emit_copy cant_repeat_two_offset_match_nolit_encodeBlockAsmAvx_emit_copy: CMPL DI, $0x00000104 JLT repeat_three_match_nolit_encodeBlockAsmAvx_emit_copy CMPL DI, $0x00010100 JLT repeat_four_match_nolit_encodeBlockAsmAvx_emit_copy CMPL DI, $0x0100ffff JLT repeat_five_match_nolit_encodeBlockAsmAvx_emit_copy LEAQ -16842747(DI), DI MOVW $0x001d, (SI) MOVW $0xfffb, 2(SI) MOVB $0xff, 4(SI) ADDQ $0x05, SI JMP emit_repeat_again_match_nolit_encodeBlockAsmAvx_emit_copy repeat_five_match_nolit_encodeBlockAsmAvx_emit_copy: LEAQ -65536(DI), DI MOVQ DI, BP MOVW $0x001d, (SI) MOVW DI, 2(SI) SARQ $0x10, BP MOVB BP, 4(SI) ADDQ $0x05, SI JMP match_nolit_emitcopy_end_encodeBlockAsmAvx repeat_four_match_nolit_encodeBlockAsmAvx_emit_copy: LEAQ -256(DI), DI MOVW $0x0019, (SI) MOVW DI, 2(SI) ADDQ $0x04, SI JMP match_nolit_emitcopy_end_encodeBlockAsmAvx repeat_three_match_nolit_encodeBlockAsmAvx_emit_copy: LEAQ -4(DI), DI MOVW $0x0015, (SI) MOVB DI, 2(SI) ADDQ $0x03, SI JMP match_nolit_emitcopy_end_encodeBlockAsmAvx repeat_two_match_nolit_encodeBlockAsmAvx_emit_copy: SHLL $0x02, DI ORL $0x01, DI MOVW DI, (SI) ADDQ $0x02, SI JMP match_nolit_emitcopy_end_encodeBlockAsmAvx repeat_two_offset_match_nolit_encodeBlockAsmAvx_emit_copy: XORQ R8, R8 LEAQ 1(R8)(DI*4), DI MOVB BP, 1(SI) SARL $0x08, BP SHLL $0x05, BP ORL BP, DI MOVB DI, (SI) ADDQ $0x02, SI JMP match_nolit_emitcopy_end_encodeBlockAsmAvx four_bytes_remain_match_nolit_encodeBlockAsmAvx: TESTL DI, DI JZ match_nolit_emitcopy_end_encodeBlockAsmAvx MOVB $0x03, DL LEAQ -4(DX)(DI*4), DI MOVB DI, (SI) MOVD BP, 1(SI) ADDQ $0x05, SI JMP match_nolit_emitcopy_end_encodeBlockAsmAvx two_byte_offset_match_nolit_encodeBlockAsmAvx: CMPL DI, $0x40 JLE two_byte_offset_short_match_nolit_encodeBlockAsmAvx MOVB $0xee, (SI) MOVW BP, 1(SI) LEAQ -60(DI), DI ADDQ $0x03, SI emit_repeat_again_match_nolit_encodeBlockAsmAvx_emit_copy_short: MOVQ DI, R8 LEAQ -4(DI), DI CMPL R8, $0x08 JLE repeat_two_match_nolit_encodeBlockAsmAvx_emit_copy_short CMPL R8, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBlockAsmAvx_emit_copy_short CMPL BP, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBlockAsmAvx_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsmAvx_emit_copy_short: CMPL DI, $0x00000104 JLT repeat_three_match_nolit_encodeBlockAsmAvx_emit_copy_short CMPL DI, $0x00010100 JLT repeat_four_match_nolit_encodeBlockAsmAvx_emit_copy_short CMPL DI, $0x0100ffff JLT repeat_five_match_nolit_encodeBlockAsmAvx_emit_copy_short LEAQ -16842747(DI), DI MOVW $0x001d, (SI) MOVW $0xfffb, 2(SI) MOVB $0xff, 4(SI) ADDQ $0x05, SI JMP emit_repeat_again_match_nolit_encodeBlockAsmAvx_emit_copy_short repeat_five_match_nolit_encodeBlockAsmAvx_emit_copy_short: LEAQ -65536(DI), DI MOVQ DI, BP MOVW $0x001d, (SI) MOVW DI, 2(SI) SARQ $0x10, BP MOVB BP, 4(SI) ADDQ $0x05, SI JMP match_nolit_emitcopy_end_encodeBlockAsmAvx repeat_four_match_nolit_encodeBlockAsmAvx_emit_copy_short: LEAQ -256(DI), DI MOVW $0x0019, (SI) MOVW DI, 2(SI) ADDQ $0x04, SI JMP match_nolit_emitcopy_end_encodeBlockAsmAvx repeat_three_match_nolit_encodeBlockAsmAvx_emit_copy_short: LEAQ -4(DI), DI MOVW $0x0015, (SI) MOVB DI, 2(SI) ADDQ $0x03, SI JMP match_nolit_emitcopy_end_encodeBlockAsmAvx repeat_two_match_nolit_encodeBlockAsmAvx_emit_copy_short: SHLL $0x02, DI ORL $0x01, DI MOVW DI, (SI) ADDQ $0x02, SI JMP match_nolit_emitcopy_end_encodeBlockAsmAvx repeat_two_offset_match_nolit_encodeBlockAsmAvx_emit_copy_short: XORQ R8, R8 LEAQ 1(R8)(DI*4), DI MOVB BP, 1(SI) SARL $0x08, BP SHLL $0x05, BP ORL BP, DI MOVB DI, (SI) ADDQ $0x02, SI JMP match_nolit_emitcopy_end_encodeBlockAsmAvx two_byte_offset_short_match_nolit_encodeBlockAsmAvx: CMPL DI, $0x0c JGE emit_copy_three_match_nolit_encodeBlockAsmAvx CMPL BP, $0x00000800 JGE emit_copy_three_match_nolit_encodeBlockAsmAvx MOVB $0x01, DL LEAQ -16(DX)(DI*4), DI MOVB BP, 1(SI) SHRL $0x08, BP SHLL $0x05, BP ORL BP, DI MOVB DI, (SI) ADDQ $0x02, SI JMP match_nolit_emitcopy_end_encodeBlockAsmAvx emit_copy_three_match_nolit_encodeBlockAsmAvx: MOVB $0x02, DL LEAQ -4(DX)(DI*4), DI MOVB DI, (SI) MOVW BP, 1(SI) ADDQ $0x03, SI match_nolit_emitcopy_end_encodeBlockAsmAvx: MOVQ SI, dst_base+0(FP) MOVL AX, 20(SP) CMPL AX, 16(SP) JGE emit_remainder_encodeBlockAsmAvx CMPQ SI, (SP) JL match_nolit_dst_ok_encodeBlockAsmAvx MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeBlockAsmAvx: MOVQ -2(CX)(AX*1), BP MOVQ $0x0000cf1bbcdcbf9b, SI MOVQ BP, DI SHRQ $0x10, BP MOVQ BP, R8 SHLQ $0x10, DI IMULQ SI, DI SHRQ $0x30, DI SHLQ $0x10, R8 IMULQ SI, R8 SHRQ $0x30, R8 MOVL 32(SP)(DI*1), SI MOVL 32(SP)(R8*1), SI LEAQ -2(AX), SI MOVL SI, 32(SP)(DI*1) MOVL AX, 32(SP)(R8*1) CMPL (CX)(R8*1), BP JEQ match_nolit_loop_encodeBlockAsmAvx INCL AX JMP search_loop_encodeBlockAsmAvx emit_remainder_encodeBlockAsmAvx: MOVQ src_len+32(FP), AX SUBL 20(SP), AX MOVQ dst_base+0(FP), DX LEAQ (DX)(AX*1), DX CMPQ DX, (SP) JL emit_remainder_ok_encodeBlockAsmAvx MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeBlockAsmAvx: MOVQ src_len+32(FP), AX MOVL 20(SP), DX CMPL DX, AX JEQ emit_literal_skip_emit_remainder_encodeBlockAsmAvx MOVL AX, BX MOVL AX, 20(SP) LEAQ (CX)(DX*1), AX SUBL DX, BX MOVQ dst_base+0(FP), CX MOVQ BX, DX SUBL $0x01, DX JC emit_literal_done_emit_remainder_encodeBlockAsmAvx CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeBlockAsmAvx CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeBlockAsmAvx CMPL DX, $0x00010000 JLT three_bytes_emit_remainder_encodeBlockAsmAvx CMPL DX, $0x01000000 JLT four_bytes_emit_remainder_encodeBlockAsmAvx MOVB $0xfc, (CX) MOVL DX, 1(CX) ADDQ $0x05, CX JMP memmove_emit_remainder_encodeBlockAsmAvx four_bytes_emit_remainder_encodeBlockAsmAvx: MOVQ DX, BP SHRL $0x10, BP MOVB $0xf8, (CX) MOVW DX, 1(CX) MOVB BP, 3(CX) ADDQ $0x04, CX JMP memmove_emit_remainder_encodeBlockAsmAvx three_bytes_emit_remainder_encodeBlockAsmAvx: MOVB $0xf4, (CX) MOVW DX, 1(CX) ADDQ $0x03, CX JMP memmove_emit_remainder_encodeBlockAsmAvx two_bytes_emit_remainder_encodeBlockAsmAvx: MOVB $0xf0, (CX) MOVB DL, 1(CX) ADDQ $0x02, CX JMP memmove_emit_remainder_encodeBlockAsmAvx one_byte_emit_remainder_encodeBlockAsmAvx: SHLB $0x02, DL MOVB DL, (CX) ADDQ $0x01, CX memmove_emit_remainder_encodeBlockAsmAvx: LEAQ (CX)(BX*1), DX NOP emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_tail: TESTQ BX, BX JEQ emit_literal_done_emit_remainder_encodeBlockAsmAvx CMPQ BX, $0x02 JBE emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_1or2 CMPQ BX, $0x04 JB emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_3 JBE emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_4 CMPQ BX, $0x08 JB emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_5through7 JE emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_8 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_9through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_17through32 CMPQ BX, $0x40 JBE emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_33through64 CMPQ BX, $0x80 JBE emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_65through128 CMPQ BX, $0x00000100 JBE emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_129through256 JMP emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_avxUnaligned emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_1or2: MOVB (AX), DL MOVB -1(AX)(BX*1), BP MOVB DL, (CX) MOVB BP, -1(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsmAvx emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_4: MOVL (AX), DX MOVL DX, (CX) JMP emit_literal_done_emit_remainder_encodeBlockAsmAvx emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_3: MOVW (AX), DX MOVB 2(AX), BP MOVW DX, (CX) MOVB BP, 2(CX) JMP emit_literal_done_emit_remainder_encodeBlockAsmAvx emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_5through7: MOVL (AX), DX MOVL -4(AX)(BX*1), BP MOVL DX, (CX) MOVL BP, -4(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsmAvx emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_8: MOVQ (AX), DX MOVQ DX, (CX) JMP emit_literal_done_emit_remainder_encodeBlockAsmAvx emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_9through16: MOVQ (AX), DX MOVQ -8(AX)(BX*1), BP MOVQ DX, (CX) MOVQ BP, -8(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsmAvx emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_17through32: MOVOU (AX), X0 MOVOU -16(AX)(BX*1), X1 MOVOU X0, (CX) MOVOU X1, -16(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsmAvx emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_33through64: MOVOU (AX), X0 MOVOU 16(AX), X1 MOVOU -32(AX)(BX*1), X2 MOVOU -16(AX)(BX*1), X3 MOVOU X0, (CX) MOVOU X1, 16(CX) MOVOU X2, -32(CX)(BX*1) MOVOU X3, -16(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsmAvx emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_65through128: MOVOU (AX), X0 MOVOU 16(AX), X1 MOVOU 32(AX), X2 MOVOU 48(AX), X3 MOVOU -64(AX)(BX*1), X12 MOVOU -48(AX)(BX*1), X13 MOVOU -32(AX)(BX*1), X14 MOVOU -16(AX)(BX*1), X15 MOVOU X0, (CX) MOVOU X1, 16(CX) MOVOU X2, 32(CX) MOVOU X3, 48(CX) MOVOU X12, -64(CX)(BX*1) MOVOU X13, -48(CX)(BX*1) MOVOU X14, -32(CX)(BX*1) MOVOU X15, -16(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsmAvx emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_129through256: MOVOU (AX), X0 MOVOU 16(AX), X1 MOVOU 32(AX), X2 MOVOU 48(AX), X3 MOVOU 64(AX), X4 MOVOU 80(AX), X5 MOVOU 96(AX), X6 MOVOU 112(AX), X7 MOVOU -128(AX)(BX*1), X8 MOVOU -112(AX)(BX*1), X9 MOVOU -96(AX)(BX*1), X10 MOVOU -80(AX)(BX*1), X11 MOVOU -64(AX)(BX*1), X12 MOVOU -48(AX)(BX*1), X13 MOVOU -32(AX)(BX*1), X14 MOVOU -16(AX)(BX*1), X15 MOVOU X0, (CX) MOVOU X1, 16(CX) MOVOU X2, 32(CX) MOVOU X3, 48(CX) MOVOU X4, 64(CX) MOVOU X5, 80(CX) MOVOU X6, 96(CX) MOVOU X7, 112(CX) MOVOU X8, -128(CX)(BX*1) MOVOU X9, -112(CX)(BX*1) MOVOU X10, -96(CX)(BX*1) MOVOU X11, -80(CX)(BX*1) MOVOU X12, -64(CX)(BX*1) MOVOU X13, -48(CX)(BX*1) MOVOU X14, -32(CX)(BX*1) MOVOU X15, -16(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsmAvx emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_256through2048: LEAQ -256(BX), BX MOVOU (AX), X0 MOVOU 16(AX), X1 MOVOU 32(AX), X2 MOVOU 48(AX), X3 MOVOU 64(AX), X4 MOVOU 80(AX), X5 MOVOU 96(AX), X6 MOVOU 112(AX), X7 MOVOU 128(AX), X8 MOVOU 144(AX), X9 MOVOU 160(AX), X10 MOVOU 176(AX), X11 MOVOU 192(AX), X12 MOVOU 208(AX), X13 MOVOU 224(AX), X14 MOVOU 240(AX), X15 MOVOU X0, (CX) MOVOU X1, 16(CX) MOVOU X2, 32(CX) MOVOU X3, 48(CX) MOVOU X4, 64(CX) MOVOU X5, 80(CX) MOVOU X6, 96(CX) MOVOU X7, 112(CX) MOVOU X8, 128(CX) MOVOU X9, 144(CX) MOVOU X10, 160(CX) MOVOU X11, 176(CX) MOVOU X12, 192(CX) MOVOU X13, 208(CX) MOVOU X14, 224(CX) MOVOU X15, 240(CX) CMPQ BX, $0x00000100 LEAQ 256(AX), AX LEAQ 256(CX), CX JGE emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_move_256through2048 JMP emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_tail emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_avxUnaligned: LEAQ (AX)(BX*1), BP MOVQ CX, DI MOVOU -128(BP), X5 MOVOU -112(BP), X6 MOVQ $0x00000080, DX ANDQ $0xffffffe0, CX ADDQ $0x20, CX MOVOU -96(BP), X7 MOVOU -80(BP), X8 MOVQ CX, SI SUBQ DI, SI MOVOU -64(BP), X9 MOVOU -48(BP), X10 SUBQ SI, BX MOVOU -32(BP), X11 MOVOU -16(BP), X12 VMOVDQU (AX), Y4 ADDQ SI, AX SUBQ DX, BX emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_gobble_128_loop: VMOVDQU (AX), Y0 VMOVDQU 32(AX), Y1 VMOVDQU 64(AX), Y2 VMOVDQU 96(AX), Y3 ADDQ DX, AX VMOVDQA Y0, (CX) VMOVDQA Y1, 32(CX) VMOVDQA Y2, 64(CX) VMOVDQA Y3, 96(CX) ADDQ DX, CX SUBQ DX, BX JA emit_lit_memmove_emit_remainder_encodeBlockAsmAvx_memmove_gobble_128_loop ADDQ DX, BX ADDQ CX, BX VMOVDQU Y4, (DI) VZEROUPPER MOVOU X5, -128(BX) MOVOU X6, -112(BX) MOVOU X7, -96(BX) MOVOU X8, -80(BX) MOVOU X9, -64(BX) MOVOU X10, -48(BX) MOVOU X11, -32(BX) MOVOU X12, -16(BX) JMP emit_literal_done_emit_remainder_encodeBlockAsmAvx MOVQ DX, CX emit_literal_done_emit_remainder_encodeBlockAsmAvx: MOVQ CX, dst_base+0(FP) emit_literal_skip_emit_remainder_encodeBlockAsmAvx: MOVQ 8(SP), AX SUBQ dst_base+0(FP), AX MOVQ AX, ret+48(FP) RET // func encodeBlockAsm14BAvx(dst []byte, src []byte) int // Requires: AVX, SSE2 TEXT ·encodeBlockAsm14BAvx(SB), $16416-56 MOVQ $0x00000080, AX LEAQ 32(SP), CX PXOR X0, X0 zero_loop_encodeBlockAsm14BAvx: MOVOU X0, (CX) MOVOU X0, 16(CX) MOVOU X0, 32(CX) MOVOU X0, 48(CX) MOVOU X0, 64(CX) MOVOU X0, 80(CX) MOVOU X0, 96(CX) MOVOU X0, 112(CX) ADDQ $0x80, CX DECQ AX JNZ zero_loop_encodeBlockAsm14BAvx MOVL AX, 20(SP) MOVQ src_len+32(FP), AX LEAQ -5(AX), CX LEAQ -8(AX), BX SHRQ $0x05, AX SUBL AX, CX MOVL BX, 16(SP) MOVQ dst_base+0(FP), AX MOVQ AX, 8(SP) LEAQ (AX)(CX*1), CX MOVQ CX, (SP) MOVL $0x00000001, AX MOVL AX, 24(SP) MOVQ src_base+24(FP), CX search_loop_encodeBlockAsm14BAvx: MOVQ (CX)(AX*1), BP MOVL AX, BX SUBL 20(SP), BX SHRL $0x05, BX LEAQ 4(AX)(BX*1), BX MOVL 16(SP), SI CMPL BX, SI JGT emit_remainder_encodeBlockAsm14BAvx MOVL BX, 28(SP) MOVQ $0x0000cf1bbcdcbf9b, BX MOVQ BP, DI MOVQ BP, R8 SHRQ $0x08, R8 SHLQ $0x10, DI IMULQ BX, DI SHRQ $0x32, DI SHLQ $0x10, R8 IMULQ BX, R8 SHRQ $0x32, R8 MOVL 32(SP)(DI*1), BX MOVL 32(SP)(R8*1), SI MOVL AX, 32(SP)(DI*1) LEAL 1(AX), DI MOVL DI, 32(SP)(R8*1) MOVL AX, DI SUBL 24(SP), DI MOVL 1(CX)(DI*1), R9 MOVQ BP, R8 SHLQ $0x08, R8 CMPL R8, R9 JNE no_repeat_found_encodeBlockAsm14BAvx LEAQ 1(AX), BP MOVL 20(SP), BX TESTL DI, DI JZ repeat_extend_back_end_encodeBlockAsm14BAvx repeat_extend_back_loop_encodeBlockAsm14BAvx: CMPL BP, BX JG repeat_extend_back_end_encodeBlockAsm14BAvx MOVB -1(CX)(DI*1), DL MOVB -1(CX)(BP*1), SI CMPB DL, SI JNE repeat_extend_back_end_encodeBlockAsm14BAvx LEAQ -1(BP), BP DECL DI JZ repeat_extend_back_end_encodeBlockAsm14BAvx JMP repeat_extend_back_loop_encodeBlockAsm14BAvx repeat_extend_back_end_encodeBlockAsm14BAvx: MOVL 20(SP), BX CMPL BX, BP JEQ emit_literal_skip_repeat_emit_encodeBlockAsm14BAvx MOVL BP, SI MOVL BP, 20(SP) LEAQ (CX)(BX*1), DI SUBL BX, SI MOVQ dst_base+0(FP), BX MOVQ SI, R8 SUBL $0x01, R8 JC emit_literal_done_repeat_emit_encodeBlockAsm14BAvx CMPL R8, $0x3c JLT one_byte_repeat_emit_encodeBlockAsm14BAvx CMPL R8, $0x00000100 JLT two_bytes_repeat_emit_encodeBlockAsm14BAvx CMPL R8, $0x00010000 JLT three_bytes_repeat_emit_encodeBlockAsm14BAvx CMPL R8, $0x01000000 JLT four_bytes_repeat_emit_encodeBlockAsm14BAvx MOVB $0xfc, (BX) MOVL R8, 1(BX) ADDQ $0x05, BX JMP memmove_repeat_emit_encodeBlockAsm14BAvx four_bytes_repeat_emit_encodeBlockAsm14BAvx: MOVQ R8, R9 SHRL $0x10, R9 MOVB $0xf8, (BX) MOVW R8, 1(BX) MOVB R9, 3(BX) ADDQ $0x04, BX JMP memmove_repeat_emit_encodeBlockAsm14BAvx three_bytes_repeat_emit_encodeBlockAsm14BAvx: MOVB $0xf4, (BX) MOVW R8, 1(BX) ADDQ $0x03, BX JMP memmove_repeat_emit_encodeBlockAsm14BAvx two_bytes_repeat_emit_encodeBlockAsm14BAvx: MOVB $0xf0, (BX) MOVB R8, 1(BX) ADDQ $0x02, BX JMP memmove_repeat_emit_encodeBlockAsm14BAvx one_byte_repeat_emit_encodeBlockAsm14BAvx: SHLB $0x02, R8 MOVB R8, (BX) ADDQ $0x01, BX memmove_repeat_emit_encodeBlockAsm14BAvx: LEAQ (BX)(SI*1), R8 NOP emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_tail: TESTQ SI, SI JEQ emit_literal_done_repeat_emit_encodeBlockAsm14BAvx CMPQ SI, $0x02 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_1or2 CMPQ SI, $0x04 JB emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_3 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_4 CMPQ SI, $0x08 JB emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_5through7 JE emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_8 CMPQ SI, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_9through16 CMPQ SI, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_17through32 CMPQ SI, $0x40 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_33through64 CMPQ SI, $0x80 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_65through128 CMPQ SI, $0x00000100 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_129through256 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_avxUnaligned emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_1or2: MOVB (DI), R8 MOVB -1(DI)(SI*1), R9 MOVB R8, (BX) MOVB R9, -1(BX)(SI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm14BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_4: MOVL (DI), R8 MOVL R8, (BX) JMP emit_literal_done_repeat_emit_encodeBlockAsm14BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_3: MOVW (DI), R8 MOVB 2(DI), R9 MOVW R8, (BX) MOVB R9, 2(BX) JMP emit_literal_done_repeat_emit_encodeBlockAsm14BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_5through7: MOVL (DI), R8 MOVL -4(DI)(SI*1), R9 MOVL R8, (BX) MOVL R9, -4(BX)(SI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm14BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_8: MOVQ (DI), R8 MOVQ R8, (BX) JMP emit_literal_done_repeat_emit_encodeBlockAsm14BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_9through16: MOVQ (DI), R8 MOVQ -8(DI)(SI*1), R9 MOVQ R8, (BX) MOVQ R9, -8(BX)(SI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm14BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_17through32: MOVOU (DI), X0 MOVOU -16(DI)(SI*1), X1 MOVOU X0, (BX) MOVOU X1, -16(BX)(SI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm14BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_33through64: MOVOU (DI), X0 MOVOU 16(DI), X1 MOVOU -32(DI)(SI*1), X2 MOVOU -16(DI)(SI*1), X3 MOVOU X0, (BX) MOVOU X1, 16(BX) MOVOU X2, -32(BX)(SI*1) MOVOU X3, -16(BX)(SI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm14BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_65through128: MOVOU (DI), X0 MOVOU 16(DI), X1 MOVOU 32(DI), X2 MOVOU 48(DI), X3 MOVOU -64(DI)(SI*1), X12 MOVOU -48(DI)(SI*1), X13 MOVOU -32(DI)(SI*1), X14 MOVOU -16(DI)(SI*1), X15 MOVOU X0, (BX) MOVOU X1, 16(BX) MOVOU X2, 32(BX) MOVOU X3, 48(BX) MOVOU X12, -64(BX)(SI*1) MOVOU X13, -48(BX)(SI*1) MOVOU X14, -32(BX)(SI*1) MOVOU X15, -16(BX)(SI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm14BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_129through256: MOVOU (DI), X0 MOVOU 16(DI), X1 MOVOU 32(DI), X2 MOVOU 48(DI), X3 MOVOU 64(DI), X4 MOVOU 80(DI), X5 MOVOU 96(DI), X6 MOVOU 112(DI), X7 MOVOU -128(DI)(SI*1), X8 MOVOU -112(DI)(SI*1), X9 MOVOU -96(DI)(SI*1), X10 MOVOU -80(DI)(SI*1), X11 MOVOU -64(DI)(SI*1), X12 MOVOU -48(DI)(SI*1), X13 MOVOU -32(DI)(SI*1), X14 MOVOU -16(DI)(SI*1), X15 MOVOU X0, (BX) MOVOU X1, 16(BX) MOVOU X2, 32(BX) MOVOU X3, 48(BX) MOVOU X4, 64(BX) MOVOU X5, 80(BX) MOVOU X6, 96(BX) MOVOU X7, 112(BX) MOVOU X8, -128(BX)(SI*1) MOVOU X9, -112(BX)(SI*1) MOVOU X10, -96(BX)(SI*1) MOVOU X11, -80(BX)(SI*1) MOVOU X12, -64(BX)(SI*1) MOVOU X13, -48(BX)(SI*1) MOVOU X14, -32(BX)(SI*1) MOVOU X15, -16(BX)(SI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm14BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_256through2048: LEAQ -256(SI), SI MOVOU (DI), X0 MOVOU 16(DI), X1 MOVOU 32(DI), X2 MOVOU 48(DI), X3 MOVOU 64(DI), X4 MOVOU 80(DI), X5 MOVOU 96(DI), X6 MOVOU 112(DI), X7 MOVOU 128(DI), X8 MOVOU 144(DI), X9 MOVOU 160(DI), X10 MOVOU 176(DI), X11 MOVOU 192(DI), X12 MOVOU 208(DI), X13 MOVOU 224(DI), X14 MOVOU 240(DI), X15 MOVOU X0, (BX) MOVOU X1, 16(BX) MOVOU X2, 32(BX) MOVOU X3, 48(BX) MOVOU X4, 64(BX) MOVOU X5, 80(BX) MOVOU X6, 96(BX) MOVOU X7, 112(BX) MOVOU X8, 128(BX) MOVOU X9, 144(BX) MOVOU X10, 160(BX) MOVOU X11, 176(BX) MOVOU X12, 192(BX) MOVOU X13, 208(BX) MOVOU X14, 224(BX) MOVOU X15, 240(BX) CMPQ SI, $0x00000100 LEAQ 256(DI), DI LEAQ 256(BX), BX JGE emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_move_256through2048 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_tail emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_avxUnaligned: LEAQ (DI)(SI*1), R9 MOVQ BX, R11 MOVOU -128(R9), X5 MOVOU -112(R9), X6 MOVQ $0x00000080, R8 ANDQ $0xffffffe0, BX ADDQ $0x20, BX MOVOU -96(R9), X7 MOVOU -80(R9), X8 MOVQ BX, R10 SUBQ R11, R10 MOVOU -64(R9), X9 MOVOU -48(R9), X10 SUBQ R10, SI MOVOU -32(R9), X11 MOVOU -16(R9), X12 VMOVDQU (DI), Y4 ADDQ R10, DI SUBQ R8, SI emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_gobble_128_loop: VMOVDQU (DI), Y0 VMOVDQU 32(DI), Y1 VMOVDQU 64(DI), Y2 VMOVDQU 96(DI), Y3 ADDQ R8, DI VMOVDQA Y0, (BX) VMOVDQA Y1, 32(BX) VMOVDQA Y2, 64(BX) VMOVDQA Y3, 96(BX) ADDQ R8, BX SUBQ R8, SI JA emit_lit_memmove_repeat_emit_encodeBlockAsm14BAvx_memmove_gobble_128_loop ADDQ R8, SI ADDQ BX, SI VMOVDQU Y4, (R11) VZEROUPPER MOVOU X5, -128(SI) MOVOU X6, -112(SI) MOVOU X7, -96(SI) MOVOU X8, -80(SI) MOVOU X9, -64(SI) MOVOU X10, -48(SI) MOVOU X11, -32(SI) MOVOU X12, -16(SI) JMP emit_literal_done_repeat_emit_encodeBlockAsm14BAvx MOVQ R8, BX emit_literal_done_repeat_emit_encodeBlockAsm14BAvx: MOVQ BX, dst_base+0(FP) emit_literal_skip_repeat_emit_encodeBlockAsm14BAvx: ADDL $0x05, AX MOVL AX, BX SUBL 24(SP), BX MOVL 16(SP), BX SUBL AX, BX XORQ DI, DI CMPQ BX, $0x08 JL matchlen_single_repeat_extend matchlen_loopback_repeat_extend: MOVQ (CX)(DI*1), SI XORQ (CX)(DI*1), SI TESTQ SI, SI JZ matchlen_loop_repeat_extend BSFQ SI, SI SARQ $0x03, SI LEAQ (DI)(SI*1), DI JMP repeat_extend_forward_end_encodeBlockAsm14BAvx matchlen_loop_repeat_extend: LEAQ -8(BX), BX LEAQ 8(DI), DI CMPQ BX, $0x08 JGE matchlen_loopback_repeat_extend matchlen_single_repeat_extend: TESTQ BX, BX JZ repeat_extend_forward_end_encodeBlockAsm14BAvx matchlen_single_loopback_repeat_extend: MOVB (CX)(DI*1), SI CMPB (CX)(DI*1), SI JNE repeat_extend_forward_end_encodeBlockAsm14BAvx LEAQ 1(DI), DI DECQ BX JNZ matchlen_single_loopback_repeat_extend repeat_extend_forward_end_encodeBlockAsm14BAvx: ADDL DI, AX MOVL AX, BX SUBL BP, BX MOVL 24(SP), BP MOVQ dst_base+0(FP), SI MOVL 20(SP), DI TESTL DI, DI JZ repeat_as_copy_encodeBlockAsm14BAvx emit_repeat_again_match_repeat_: MOVQ BX, DI LEAQ -4(BX), BX CMPL DI, $0x08 JLE repeat_two_match_repeat_ CMPL DI, $0x0c JGE cant_repeat_two_offset_match_repeat_ CMPL BP, $0x00000800 JLT repeat_two_offset_match_repeat_ cant_repeat_two_offset_match_repeat_: CMPL BX, $0x00000104 JLT repeat_three_match_repeat_ CMPL BX, $0x00010100 JLT repeat_four_match_repeat_ CMPL BX, $0x0100ffff JLT repeat_five_match_repeat_ LEAQ -16842747(BX), BX MOVW $0x001d, (SI) MOVW $0xfffb, 2(SI) MOVB $0xff, 4(SI) ADDQ $0x05, SI JMP emit_repeat_again_match_repeat_ repeat_five_match_repeat_: LEAQ -65536(BX), BX MOVQ BX, BP MOVW $0x001d, (SI) MOVW BX, 2(SI) SARQ $0x10, BP MOVB BP, 4(SI) ADDQ $0x05, SI JMP repeat_end_emit_encodeBlockAsm14BAvx repeat_four_match_repeat_: LEAQ -256(BX), BX MOVW $0x0019, (SI) MOVW BX, 2(SI) ADDQ $0x04, SI JMP repeat_end_emit_encodeBlockAsm14BAvx repeat_three_match_repeat_: LEAQ -4(BX), BX MOVW $0x0015, (SI) MOVB BL, 2(SI) ADDQ $0x03, SI JMP repeat_end_emit_encodeBlockAsm14BAvx repeat_two_match_repeat_: SHLL $0x02, BX ORL $0x01, BX MOVW BX, (SI) ADDQ $0x02, SI JMP repeat_end_emit_encodeBlockAsm14BAvx repeat_two_offset_match_repeat_: XORQ DI, DI LEAQ 1(DI)(BX*4), BX MOVB BP, 1(SI) SARL $0x08, BP SHLL $0x05, BP ORL BP, BX MOVB BL, (SI) ADDQ $0x02, SI JMP repeat_end_emit_encodeBlockAsm14BAvx repeat_as_copy_encodeBlockAsm14BAvx: CMPL BP, $0x00010000 JL two_byte_offset_repeat_as_copy_encodeBlockAsm14BAvx CMPL BX, $0x40 JLE four_bytes_remain_repeat_as_copy_encodeBlockAsm14BAvx MOVB $0xff, (SI) MOVD BP, 1(SI) LEAQ -64(BX), BX ADDQ $0x05, SI CMPL BX, $0x04 JL four_bytes_remain_repeat_as_copy_encodeBlockAsm14BAvx emit_repeat_again_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy: MOVQ BX, DI LEAQ -4(BX), BX CMPL DI, $0x08 JLE repeat_two_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy CMPL DI, $0x0c JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy CMPL BP, $0x00000800 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy: CMPL BX, $0x00000104 JLT repeat_three_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy CMPL BX, $0x00010100 JLT repeat_four_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy CMPL BX, $0x0100ffff JLT repeat_five_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy LEAQ -16842747(BX), BX MOVW $0x001d, (SI) MOVW $0xfffb, 2(SI) MOVB $0xff, 4(SI) ADDQ $0x05, SI JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy repeat_five_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy: LEAQ -65536(BX), BX MOVQ BX, BP MOVW $0x001d, (SI) MOVW BX, 2(SI) SARQ $0x10, BP MOVB BP, 4(SI) ADDQ $0x05, SI JMP repeat_end_emit_encodeBlockAsm14BAvx repeat_four_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy: LEAQ -256(BX), BX MOVW $0x0019, (SI) MOVW BX, 2(SI) ADDQ $0x04, SI JMP repeat_end_emit_encodeBlockAsm14BAvx repeat_three_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy: LEAQ -4(BX), BX MOVW $0x0015, (SI) MOVB BL, 2(SI) ADDQ $0x03, SI JMP repeat_end_emit_encodeBlockAsm14BAvx repeat_two_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy: SHLL $0x02, BX ORL $0x01, BX MOVW BX, (SI) ADDQ $0x02, SI JMP repeat_end_emit_encodeBlockAsm14BAvx repeat_two_offset_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy: XORQ DI, DI LEAQ 1(DI)(BX*4), BX MOVB BP, 1(SI) SARL $0x08, BP SHLL $0x05, BP ORL BP, BX MOVB BL, (SI) ADDQ $0x02, SI JMP repeat_end_emit_encodeBlockAsm14BAvx four_bytes_remain_repeat_as_copy_encodeBlockAsm14BAvx: TESTL BX, BX JZ repeat_end_emit_encodeBlockAsm14BAvx MOVB $0x03, DL LEAQ -4(DX)(BX*4), BX MOVB BL, (SI) MOVD BP, 1(SI) ADDQ $0x05, SI JMP repeat_end_emit_encodeBlockAsm14BAvx two_byte_offset_repeat_as_copy_encodeBlockAsm14BAvx: CMPL BX, $0x40 JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm14BAvx MOVB $0xee, (SI) MOVW BP, 1(SI) LEAQ -60(BX), BX ADDQ $0x03, SI emit_repeat_again_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short: MOVQ BX, DI LEAQ -4(BX), BX CMPL DI, $0x08 JLE repeat_two_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short CMPL DI, $0x0c JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short CMPL BP, $0x00000800 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short: CMPL BX, $0x00000104 JLT repeat_three_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short CMPL BX, $0x00010100 JLT repeat_four_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short CMPL BX, $0x0100ffff JLT repeat_five_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short LEAQ -16842747(BX), BX MOVW $0x001d, (SI) MOVW $0xfffb, 2(SI) MOVB $0xff, 4(SI) ADDQ $0x05, SI JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short repeat_five_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short: LEAQ -65536(BX), BX MOVQ BX, BP MOVW $0x001d, (SI) MOVW BX, 2(SI) SARQ $0x10, BP MOVB BP, 4(SI) ADDQ $0x05, SI JMP repeat_end_emit_encodeBlockAsm14BAvx repeat_four_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short: LEAQ -256(BX), BX MOVW $0x0019, (SI) MOVW BX, 2(SI) ADDQ $0x04, SI JMP repeat_end_emit_encodeBlockAsm14BAvx repeat_three_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short: LEAQ -4(BX), BX MOVW $0x0015, (SI) MOVB BL, 2(SI) ADDQ $0x03, SI JMP repeat_end_emit_encodeBlockAsm14BAvx repeat_two_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short: SHLL $0x02, BX ORL $0x01, BX MOVW BX, (SI) ADDQ $0x02, SI JMP repeat_end_emit_encodeBlockAsm14BAvx repeat_two_offset_repeat_as_copy_encodeBlockAsm14BAvx_emit_copy_short: XORQ DI, DI LEAQ 1(DI)(BX*4), BX MOVB BP, 1(SI) SARL $0x08, BP SHLL $0x05, BP ORL BP, BX MOVB BL, (SI) ADDQ $0x02, SI JMP repeat_end_emit_encodeBlockAsm14BAvx two_byte_offset_short_repeat_as_copy_encodeBlockAsm14BAvx: CMPL BX, $0x0c JGE emit_copy_three_repeat_as_copy_encodeBlockAsm14BAvx CMPL BP, $0x00000800 JGE emit_copy_three_repeat_as_copy_encodeBlockAsm14BAvx MOVB $0x01, DL LEAQ -16(DX)(BX*4), BX MOVB BP, 1(SI) SHRL $0x08, BP SHLL $0x05, BP ORL BP, BX MOVB BL, (SI) ADDQ $0x02, SI JMP repeat_end_emit_encodeBlockAsm14BAvx emit_copy_three_repeat_as_copy_encodeBlockAsm14BAvx: MOVB $0x02, DL LEAQ -4(DX)(BX*4), BX MOVB BL, (SI) MOVW BP, 1(SI) ADDQ $0x03, SI repeat_end_emit_encodeBlockAsm14BAvx: MOVQ SI, dst_base+0(FP) MOVL 16(SP), BX CMPL AX, BX JGT emit_remainder_encodeBlockAsm14BAvx JMP search_loop_encodeBlockAsm14BAvx no_repeat_found_encodeBlockAsm14BAvx: MOVQ $0x0000cf1bbcdcbf9b, R8 MOVQ BP, DI SHRQ $0x10, DI SHLQ $0x10, DI IMULQ R8, DI SHRQ $0x32, DI CMPL (CX)(BX*1), BP SHRQ $0x08, BP JEQ candidate_match_encodeBlockAsm14BAvx MOVL 32(SP)(DI*1), BX CMPL (CX)(SI*1), BP JEQ candidate2_match_encodeBlockAsm14BAvx LEAQ 2(AX), SI MOVL SI, 32(SP)(DI*1) SHRQ $0x08, BP CMPL (CX)(BX*1), BP JEQ candidate3_match_encodeBlockAsm14BAvx MOVL 28(SP), AX JMP search_loop_encodeBlockAsm14BAvx candidate3_match_encodeBlockAsm14BAvx: ADDL $0x02, AX JMP candidate_match_encodeBlockAsm14BAvx candidate2_match_encodeBlockAsm14BAvx: LEAQ -2(AX), BX MOVL BX, 32(SP)(DI*1) INCL AX MOVL SI, BX candidate_match_encodeBlockAsm14BAvx: MOVL 20(SP), BP TESTL BX, BX JZ match_extend_back_end_encodeBlockAsm14BAvx match_extend_back_loop_encodeBlockAsm14BAvx: CMPL AX, BP JG match_extend_back_end_encodeBlockAsm14BAvx MOVB -1(CX)(BX*1), DL MOVB -1(CX)(AX*1), SI CMPB DL, SI JNE match_extend_back_end_encodeBlockAsm14BAvx LEAL -1(AX), AX DECL BX JZ match_extend_back_end_encodeBlockAsm14BAvx JMP match_extend_back_loop_encodeBlockAsm14BAvx match_extend_back_end_encodeBlockAsm14BAvx: MOVL AX, BP SUBL 20(SP), BP LEAQ dst_base+0(FP)(BP*1), BP CMPQ BP, (SP) JL match_dst_size_check_encodeBlockAsm14BAvx MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeBlockAsm14BAvx: MOVL BX, BP MOVL 20(SP), SI CMPL SI, BP JEQ emit_literal_skip_match_emit_encodeBlockAsm14BAvx MOVL BP, DI MOVL BP, 20(SP) LEAQ (CX)(SI*1), BP SUBL SI, DI MOVQ dst_base+0(FP), SI MOVQ DI, R8 SUBL $0x01, R8 JC emit_literal_done_match_emit_encodeBlockAsm14BAvx CMPL R8, $0x3c JLT one_byte_match_emit_encodeBlockAsm14BAvx CMPL R8, $0x00000100 JLT two_bytes_match_emit_encodeBlockAsm14BAvx CMPL R8, $0x00010000 JLT three_bytes_match_emit_encodeBlockAsm14BAvx CMPL R8, $0x01000000 JLT four_bytes_match_emit_encodeBlockAsm14BAvx MOVB $0xfc, (SI) MOVL R8, 1(SI) ADDQ $0x05, SI JMP memmove_match_emit_encodeBlockAsm14BAvx four_bytes_match_emit_encodeBlockAsm14BAvx: MOVQ R8, R9 SHRL $0x10, R9 MOVB $0xf8, (SI) MOVW R8, 1(SI) MOVB R9, 3(SI) ADDQ $0x04, SI JMP memmove_match_emit_encodeBlockAsm14BAvx three_bytes_match_emit_encodeBlockAsm14BAvx: MOVB $0xf4, (SI) MOVW R8, 1(SI) ADDQ $0x03, SI JMP memmove_match_emit_encodeBlockAsm14BAvx two_bytes_match_emit_encodeBlockAsm14BAvx: MOVB $0xf0, (SI) MOVB R8, 1(SI) ADDQ $0x02, SI JMP memmove_match_emit_encodeBlockAsm14BAvx one_byte_match_emit_encodeBlockAsm14BAvx: SHLB $0x02, R8 MOVB R8, (SI) ADDQ $0x01, SI memmove_match_emit_encodeBlockAsm14BAvx: LEAQ (SI)(DI*1), R8 NOP emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_tail: TESTQ DI, DI JEQ emit_literal_done_match_emit_encodeBlockAsm14BAvx CMPQ DI, $0x02 JBE emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_1or2 CMPQ DI, $0x04 JB emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_3 JBE emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_4 CMPQ DI, $0x08 JB emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_5through7 JE emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_8 CMPQ DI, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_9through16 CMPQ DI, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_17through32 CMPQ DI, $0x40 JBE emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_33through64 CMPQ DI, $0x80 JBE emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_65through128 CMPQ DI, $0x00000100 JBE emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_129through256 JMP emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_avxUnaligned emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_1or2: MOVB (BP), R8 MOVB -1(BP)(DI*1), R9 MOVB R8, (SI) MOVB R9, -1(SI)(DI*1) JMP emit_literal_done_match_emit_encodeBlockAsm14BAvx emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_4: MOVL (BP), R8 MOVL R8, (SI) JMP emit_literal_done_match_emit_encodeBlockAsm14BAvx emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_3: MOVW (BP), R8 MOVB 2(BP), R9 MOVW R8, (SI) MOVB R9, 2(SI) JMP emit_literal_done_match_emit_encodeBlockAsm14BAvx emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_5through7: MOVL (BP), R8 MOVL -4(BP)(DI*1), R9 MOVL R8, (SI) MOVL R9, -4(SI)(DI*1) JMP emit_literal_done_match_emit_encodeBlockAsm14BAvx emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_8: MOVQ (BP), R8 MOVQ R8, (SI) JMP emit_literal_done_match_emit_encodeBlockAsm14BAvx emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_9through16: MOVQ (BP), R8 MOVQ -8(BP)(DI*1), R9 MOVQ R8, (SI) MOVQ R9, -8(SI)(DI*1) JMP emit_literal_done_match_emit_encodeBlockAsm14BAvx emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_17through32: MOVOU (BP), X0 MOVOU -16(BP)(DI*1), X1 MOVOU X0, (SI) MOVOU X1, -16(SI)(DI*1) JMP emit_literal_done_match_emit_encodeBlockAsm14BAvx emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_33through64: MOVOU (BP), X0 MOVOU 16(BP), X1 MOVOU -32(BP)(DI*1), X2 MOVOU -16(BP)(DI*1), X3 MOVOU X0, (SI) MOVOU X1, 16(SI) MOVOU X2, -32(SI)(DI*1) MOVOU X3, -16(SI)(DI*1) JMP emit_literal_done_match_emit_encodeBlockAsm14BAvx emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_65through128: MOVOU (BP), X0 MOVOU 16(BP), X1 MOVOU 32(BP), X2 MOVOU 48(BP), X3 MOVOU -64(BP)(DI*1), X12 MOVOU -48(BP)(DI*1), X13 MOVOU -32(BP)(DI*1), X14 MOVOU -16(BP)(DI*1), X15 MOVOU X0, (SI) MOVOU X1, 16(SI) MOVOU X2, 32(SI) MOVOU X3, 48(SI) MOVOU X12, -64(SI)(DI*1) MOVOU X13, -48(SI)(DI*1) MOVOU X14, -32(SI)(DI*1) MOVOU X15, -16(SI)(DI*1) JMP emit_literal_done_match_emit_encodeBlockAsm14BAvx emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_129through256: MOVOU (BP), X0 MOVOU 16(BP), X1 MOVOU 32(BP), X2 MOVOU 48(BP), X3 MOVOU 64(BP), X4 MOVOU 80(BP), X5 MOVOU 96(BP), X6 MOVOU 112(BP), X7 MOVOU -128(BP)(DI*1), X8 MOVOU -112(BP)(DI*1), X9 MOVOU -96(BP)(DI*1), X10 MOVOU -80(BP)(DI*1), X11 MOVOU -64(BP)(DI*1), X12 MOVOU -48(BP)(DI*1), X13 MOVOU -32(BP)(DI*1), X14 MOVOU -16(BP)(DI*1), X15 MOVOU X0, (SI) MOVOU X1, 16(SI) MOVOU X2, 32(SI) MOVOU X3, 48(SI) MOVOU X4, 64(SI) MOVOU X5, 80(SI) MOVOU X6, 96(SI) MOVOU X7, 112(SI) MOVOU X8, -128(SI)(DI*1) MOVOU X9, -112(SI)(DI*1) MOVOU X10, -96(SI)(DI*1) MOVOU X11, -80(SI)(DI*1) MOVOU X12, -64(SI)(DI*1) MOVOU X13, -48(SI)(DI*1) MOVOU X14, -32(SI)(DI*1) MOVOU X15, -16(SI)(DI*1) JMP emit_literal_done_match_emit_encodeBlockAsm14BAvx emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_256through2048: LEAQ -256(DI), DI MOVOU (BP), X0 MOVOU 16(BP), X1 MOVOU 32(BP), X2 MOVOU 48(BP), X3 MOVOU 64(BP), X4 MOVOU 80(BP), X5 MOVOU 96(BP), X6 MOVOU 112(BP), X7 MOVOU 128(BP), X8 MOVOU 144(BP), X9 MOVOU 160(BP), X10 MOVOU 176(BP), X11 MOVOU 192(BP), X12 MOVOU 208(BP), X13 MOVOU 224(BP), X14 MOVOU 240(BP), X15 MOVOU X0, (SI) MOVOU X1, 16(SI) MOVOU X2, 32(SI) MOVOU X3, 48(SI) MOVOU X4, 64(SI) MOVOU X5, 80(SI) MOVOU X6, 96(SI) MOVOU X7, 112(SI) MOVOU X8, 128(SI) MOVOU X9, 144(SI) MOVOU X10, 160(SI) MOVOU X11, 176(SI) MOVOU X12, 192(SI) MOVOU X13, 208(SI) MOVOU X14, 224(SI) MOVOU X15, 240(SI) CMPQ DI, $0x00000100 LEAQ 256(BP), BP LEAQ 256(SI), SI JGE emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_move_256through2048 JMP emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_tail emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_avxUnaligned: LEAQ (BP)(DI*1), R9 MOVQ SI, R11 MOVOU -128(R9), X5 MOVOU -112(R9), X6 MOVQ $0x00000080, R8 ANDQ $0xffffffe0, SI ADDQ $0x20, SI MOVOU -96(R9), X7 MOVOU -80(R9), X8 MOVQ SI, R10 SUBQ R11, R10 MOVOU -64(R9), X9 MOVOU -48(R9), X10 SUBQ R10, DI MOVOU -32(R9), X11 MOVOU -16(R9), X12 VMOVDQU (BP), Y4 ADDQ R10, BP SUBQ R8, DI emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_gobble_128_loop: VMOVDQU (BP), Y0 VMOVDQU 32(BP), Y1 VMOVDQU 64(BP), Y2 VMOVDQU 96(BP), Y3 ADDQ R8, BP VMOVDQA Y0, (SI) VMOVDQA Y1, 32(SI) VMOVDQA Y2, 64(SI) VMOVDQA Y3, 96(SI) ADDQ R8, SI SUBQ R8, DI JA emit_lit_memmove_match_emit_encodeBlockAsm14BAvx_memmove_gobble_128_loop ADDQ R8, DI ADDQ SI, DI VMOVDQU Y4, (R11) VZEROUPPER MOVOU X5, -128(DI) MOVOU X6, -112(DI) MOVOU X7, -96(DI) MOVOU X8, -80(DI) MOVOU X9, -64(DI) MOVOU X10, -48(DI) MOVOU X11, -32(DI) MOVOU X12, -16(DI) JMP emit_literal_done_match_emit_encodeBlockAsm14BAvx MOVQ R8, SI emit_literal_done_match_emit_encodeBlockAsm14BAvx: MOVQ SI, dst_base+0(FP) emit_literal_skip_match_emit_encodeBlockAsm14BAvx: NOP match_nolit_loop_encodeBlockAsm14BAvx: MOVL AX, BP MOVL AX, BP SUBL BX, BP MOVL BP, 24(SP) ADDL $0x04, AX ADDL $0x04, BX MOVL 16(SP), BP SUBL AX, BP XORQ DI, DI CMPQ BP, $0x08 JL matchlen_single_match_nolit_encodeBlockAsm14BAvx matchlen_loopback_match_nolit_encodeBlockAsm14BAvx: MOVQ (CX)(DI*1), SI XORQ (CX)(DI*1), SI TESTQ SI, SI JZ matchlen_loop_match_nolit_encodeBlockAsm14BAvx BSFQ SI, SI SARQ $0x03, SI LEAQ (DI)(SI*1), DI JMP match_nolit_end_encodeBlockAsm14BAvx matchlen_loop_match_nolit_encodeBlockAsm14BAvx: LEAQ -8(BP), BP LEAQ 8(DI), DI CMPQ BP, $0x08 JGE matchlen_loopback_match_nolit_encodeBlockAsm14BAvx matchlen_single_match_nolit_encodeBlockAsm14BAvx: TESTQ BP, BP JZ match_nolit_end_encodeBlockAsm14BAvx matchlen_single_loopback_match_nolit_encodeBlockAsm14BAvx: MOVB (CX)(DI*1), SI CMPB (CX)(DI*1), SI JNE match_nolit_end_encodeBlockAsm14BAvx LEAQ 1(DI), DI DECQ BP JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm14BAvx match_nolit_end_encodeBlockAsm14BAvx: MOVL 24(SP), BP ADDQ $0x04, DI MOVQ dst_base+0(FP), SI ADDL DI, AX CMPL BP, $0x00010000 JL two_byte_offset_match_nolit_encodeBlockAsm14BAvx CMPL DI, $0x40 JLE four_bytes_remain_match_nolit_encodeBlockAsm14BAvx MOVB $0xff, (SI) MOVD BP, 1(SI) LEAQ -64(DI), DI ADDQ $0x05, SI CMPL DI, $0x04 JL four_bytes_remain_match_nolit_encodeBlockAsm14BAvx emit_repeat_again_match_nolit_encodeBlockAsm14BAvx_emit_copy: MOVQ DI, R8 LEAQ -4(DI), DI CMPL R8, $0x08 JLE repeat_two_match_nolit_encodeBlockAsm14BAvx_emit_copy CMPL R8, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm14BAvx_emit_copy CMPL BP, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBlockAsm14BAvx_emit_copy cant_repeat_two_offset_match_nolit_encodeBlockAsm14BAvx_emit_copy: CMPL DI, $0x00000104 JLT repeat_three_match_nolit_encodeBlockAsm14BAvx_emit_copy CMPL DI, $0x00010100 JLT repeat_four_match_nolit_encodeBlockAsm14BAvx_emit_copy CMPL DI, $0x0100ffff JLT repeat_five_match_nolit_encodeBlockAsm14BAvx_emit_copy LEAQ -16842747(DI), DI MOVW $0x001d, (SI) MOVW $0xfffb, 2(SI) MOVB $0xff, 4(SI) ADDQ $0x05, SI JMP emit_repeat_again_match_nolit_encodeBlockAsm14BAvx_emit_copy repeat_five_match_nolit_encodeBlockAsm14BAvx_emit_copy: LEAQ -65536(DI), DI MOVQ DI, BP MOVW $0x001d, (SI) MOVW DI, 2(SI) SARQ $0x10, BP MOVB BP, 4(SI) ADDQ $0x05, SI JMP match_nolit_emitcopy_end_encodeBlockAsm14BAvx repeat_four_match_nolit_encodeBlockAsm14BAvx_emit_copy: LEAQ -256(DI), DI MOVW $0x0019, (SI) MOVW DI, 2(SI) ADDQ $0x04, SI JMP match_nolit_emitcopy_end_encodeBlockAsm14BAvx repeat_three_match_nolit_encodeBlockAsm14BAvx_emit_copy: LEAQ -4(DI), DI MOVW $0x0015, (SI) MOVB DI, 2(SI) ADDQ $0x03, SI JMP match_nolit_emitcopy_end_encodeBlockAsm14BAvx repeat_two_match_nolit_encodeBlockAsm14BAvx_emit_copy: SHLL $0x02, DI ORL $0x01, DI MOVW DI, (SI) ADDQ $0x02, SI JMP match_nolit_emitcopy_end_encodeBlockAsm14BAvx repeat_two_offset_match_nolit_encodeBlockAsm14BAvx_emit_copy: XORQ R8, R8 LEAQ 1(R8)(DI*4), DI MOVB BP, 1(SI) SARL $0x08, BP SHLL $0x05, BP ORL BP, DI MOVB DI, (SI) ADDQ $0x02, SI JMP match_nolit_emitcopy_end_encodeBlockAsm14BAvx four_bytes_remain_match_nolit_encodeBlockAsm14BAvx: TESTL DI, DI JZ match_nolit_emitcopy_end_encodeBlockAsm14BAvx MOVB $0x03, DL LEAQ -4(DX)(DI*4), DI MOVB DI, (SI) MOVD BP, 1(SI) ADDQ $0x05, SI JMP match_nolit_emitcopy_end_encodeBlockAsm14BAvx two_byte_offset_match_nolit_encodeBlockAsm14BAvx: CMPL DI, $0x40 JLE two_byte_offset_short_match_nolit_encodeBlockAsm14BAvx MOVB $0xee, (SI) MOVW BP, 1(SI) LEAQ -60(DI), DI ADDQ $0x03, SI emit_repeat_again_match_nolit_encodeBlockAsm14BAvx_emit_copy_short: MOVQ DI, R8 LEAQ -4(DI), DI CMPL R8, $0x08 JLE repeat_two_match_nolit_encodeBlockAsm14BAvx_emit_copy_short CMPL R8, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm14BAvx_emit_copy_short CMPL BP, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBlockAsm14BAvx_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsm14BAvx_emit_copy_short: CMPL DI, $0x00000104 JLT repeat_three_match_nolit_encodeBlockAsm14BAvx_emit_copy_short CMPL DI, $0x00010100 JLT repeat_four_match_nolit_encodeBlockAsm14BAvx_emit_copy_short CMPL DI, $0x0100ffff JLT repeat_five_match_nolit_encodeBlockAsm14BAvx_emit_copy_short LEAQ -16842747(DI), DI MOVW $0x001d, (SI) MOVW $0xfffb, 2(SI) MOVB $0xff, 4(SI) ADDQ $0x05, SI JMP emit_repeat_again_match_nolit_encodeBlockAsm14BAvx_emit_copy_short repeat_five_match_nolit_encodeBlockAsm14BAvx_emit_copy_short: LEAQ -65536(DI), DI MOVQ DI, BP MOVW $0x001d, (SI) MOVW DI, 2(SI) SARQ $0x10, BP MOVB BP, 4(SI) ADDQ $0x05, SI JMP match_nolit_emitcopy_end_encodeBlockAsm14BAvx repeat_four_match_nolit_encodeBlockAsm14BAvx_emit_copy_short: LEAQ -256(DI), DI MOVW $0x0019, (SI) MOVW DI, 2(SI) ADDQ $0x04, SI JMP match_nolit_emitcopy_end_encodeBlockAsm14BAvx repeat_three_match_nolit_encodeBlockAsm14BAvx_emit_copy_short: LEAQ -4(DI), DI MOVW $0x0015, (SI) MOVB DI, 2(SI) ADDQ $0x03, SI JMP match_nolit_emitcopy_end_encodeBlockAsm14BAvx repeat_two_match_nolit_encodeBlockAsm14BAvx_emit_copy_short: SHLL $0x02, DI ORL $0x01, DI MOVW DI, (SI) ADDQ $0x02, SI JMP match_nolit_emitcopy_end_encodeBlockAsm14BAvx repeat_two_offset_match_nolit_encodeBlockAsm14BAvx_emit_copy_short: XORQ R8, R8 LEAQ 1(R8)(DI*4), DI MOVB BP, 1(SI) SARL $0x08, BP SHLL $0x05, BP ORL BP, DI MOVB DI, (SI) ADDQ $0x02, SI JMP match_nolit_emitcopy_end_encodeBlockAsm14BAvx two_byte_offset_short_match_nolit_encodeBlockAsm14BAvx: CMPL DI, $0x0c JGE emit_copy_three_match_nolit_encodeBlockAsm14BAvx CMPL BP, $0x00000800 JGE emit_copy_three_match_nolit_encodeBlockAsm14BAvx MOVB $0x01, DL LEAQ -16(DX)(DI*4), DI MOVB BP, 1(SI) SHRL $0x08, BP SHLL $0x05, BP ORL BP, DI MOVB DI, (SI) ADDQ $0x02, SI JMP match_nolit_emitcopy_end_encodeBlockAsm14BAvx emit_copy_three_match_nolit_encodeBlockAsm14BAvx: MOVB $0x02, DL LEAQ -4(DX)(DI*4), DI MOVB DI, (SI) MOVW BP, 1(SI) ADDQ $0x03, SI match_nolit_emitcopy_end_encodeBlockAsm14BAvx: MOVQ SI, dst_base+0(FP) MOVL AX, 20(SP) CMPL AX, 16(SP) JGE emit_remainder_encodeBlockAsm14BAvx CMPQ SI, (SP) JL match_nolit_dst_ok_encodeBlockAsm14BAvx MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeBlockAsm14BAvx: MOVQ -2(CX)(AX*1), BP MOVQ $0x0000cf1bbcdcbf9b, SI MOVQ BP, DI SHRQ $0x10, BP MOVQ BP, R8 SHLQ $0x10, DI IMULQ SI, DI SHRQ $0x32, DI SHLQ $0x10, R8 IMULQ SI, R8 SHRQ $0x32, R8 MOVL 32(SP)(DI*1), SI MOVL 32(SP)(R8*1), SI LEAQ -2(AX), SI MOVL SI, 32(SP)(DI*1) MOVL AX, 32(SP)(R8*1) CMPL (CX)(R8*1), BP JEQ match_nolit_loop_encodeBlockAsm14BAvx INCL AX JMP search_loop_encodeBlockAsm14BAvx emit_remainder_encodeBlockAsm14BAvx: MOVQ src_len+32(FP), AX SUBL 20(SP), AX MOVQ dst_base+0(FP), DX LEAQ (DX)(AX*1), DX CMPQ DX, (SP) JL emit_remainder_ok_encodeBlockAsm14BAvx MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeBlockAsm14BAvx: MOVQ src_len+32(FP), AX MOVL 20(SP), DX CMPL DX, AX JEQ emit_literal_skip_emit_remainder_encodeBlockAsm14BAvx MOVL AX, BX MOVL AX, 20(SP) LEAQ (CX)(DX*1), AX SUBL DX, BX MOVQ dst_base+0(FP), CX MOVQ BX, DX SUBL $0x01, DX JC emit_literal_done_emit_remainder_encodeBlockAsm14BAvx CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeBlockAsm14BAvx CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeBlockAsm14BAvx CMPL DX, $0x00010000 JLT three_bytes_emit_remainder_encodeBlockAsm14BAvx CMPL DX, $0x01000000 JLT four_bytes_emit_remainder_encodeBlockAsm14BAvx MOVB $0xfc, (CX) MOVL DX, 1(CX) ADDQ $0x05, CX JMP memmove_emit_remainder_encodeBlockAsm14BAvx four_bytes_emit_remainder_encodeBlockAsm14BAvx: MOVQ DX, BP SHRL $0x10, BP MOVB $0xf8, (CX) MOVW DX, 1(CX) MOVB BP, 3(CX) ADDQ $0x04, CX JMP memmove_emit_remainder_encodeBlockAsm14BAvx three_bytes_emit_remainder_encodeBlockAsm14BAvx: MOVB $0xf4, (CX) MOVW DX, 1(CX) ADDQ $0x03, CX JMP memmove_emit_remainder_encodeBlockAsm14BAvx two_bytes_emit_remainder_encodeBlockAsm14BAvx: MOVB $0xf0, (CX) MOVB DL, 1(CX) ADDQ $0x02, CX JMP memmove_emit_remainder_encodeBlockAsm14BAvx one_byte_emit_remainder_encodeBlockAsm14BAvx: SHLB $0x02, DL MOVB DL, (CX) ADDQ $0x01, CX memmove_emit_remainder_encodeBlockAsm14BAvx: LEAQ (CX)(BX*1), DX NOP emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_tail: TESTQ BX, BX JEQ emit_literal_done_emit_remainder_encodeBlockAsm14BAvx CMPQ BX, $0x02 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_1or2 CMPQ BX, $0x04 JB emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_3 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_4 CMPQ BX, $0x08 JB emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_5through7 JE emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_8 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_9through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_17through32 CMPQ BX, $0x40 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_33through64 CMPQ BX, $0x80 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_65through128 CMPQ BX, $0x00000100 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_129through256 JMP emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_avxUnaligned emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_1or2: MOVB (AX), DL MOVB -1(AX)(BX*1), BP MOVB DL, (CX) MOVB BP, -1(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm14BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_4: MOVL (AX), DX MOVL DX, (CX) JMP emit_literal_done_emit_remainder_encodeBlockAsm14BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_3: MOVW (AX), DX MOVB 2(AX), BP MOVW DX, (CX) MOVB BP, 2(CX) JMP emit_literal_done_emit_remainder_encodeBlockAsm14BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_5through7: MOVL (AX), DX MOVL -4(AX)(BX*1), BP MOVL DX, (CX) MOVL BP, -4(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm14BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_8: MOVQ (AX), DX MOVQ DX, (CX) JMP emit_literal_done_emit_remainder_encodeBlockAsm14BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_9through16: MOVQ (AX), DX MOVQ -8(AX)(BX*1), BP MOVQ DX, (CX) MOVQ BP, -8(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm14BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_17through32: MOVOU (AX), X0 MOVOU -16(AX)(BX*1), X1 MOVOU X0, (CX) MOVOU X1, -16(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm14BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_33through64: MOVOU (AX), X0 MOVOU 16(AX), X1 MOVOU -32(AX)(BX*1), X2 MOVOU -16(AX)(BX*1), X3 MOVOU X0, (CX) MOVOU X1, 16(CX) MOVOU X2, -32(CX)(BX*1) MOVOU X3, -16(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm14BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_65through128: MOVOU (AX), X0 MOVOU 16(AX), X1 MOVOU 32(AX), X2 MOVOU 48(AX), X3 MOVOU -64(AX)(BX*1), X12 MOVOU -48(AX)(BX*1), X13 MOVOU -32(AX)(BX*1), X14 MOVOU -16(AX)(BX*1), X15 MOVOU X0, (CX) MOVOU X1, 16(CX) MOVOU X2, 32(CX) MOVOU X3, 48(CX) MOVOU X12, -64(CX)(BX*1) MOVOU X13, -48(CX)(BX*1) MOVOU X14, -32(CX)(BX*1) MOVOU X15, -16(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm14BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_129through256: MOVOU (AX), X0 MOVOU 16(AX), X1 MOVOU 32(AX), X2 MOVOU 48(AX), X3 MOVOU 64(AX), X4 MOVOU 80(AX), X5 MOVOU 96(AX), X6 MOVOU 112(AX), X7 MOVOU -128(AX)(BX*1), X8 MOVOU -112(AX)(BX*1), X9 MOVOU -96(AX)(BX*1), X10 MOVOU -80(AX)(BX*1), X11 MOVOU -64(AX)(BX*1), X12 MOVOU -48(AX)(BX*1), X13 MOVOU -32(AX)(BX*1), X14 MOVOU -16(AX)(BX*1), X15 MOVOU X0, (CX) MOVOU X1, 16(CX) MOVOU X2, 32(CX) MOVOU X3, 48(CX) MOVOU X4, 64(CX) MOVOU X5, 80(CX) MOVOU X6, 96(CX) MOVOU X7, 112(CX) MOVOU X8, -128(CX)(BX*1) MOVOU X9, -112(CX)(BX*1) MOVOU X10, -96(CX)(BX*1) MOVOU X11, -80(CX)(BX*1) MOVOU X12, -64(CX)(BX*1) MOVOU X13, -48(CX)(BX*1) MOVOU X14, -32(CX)(BX*1) MOVOU X15, -16(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm14BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_256through2048: LEAQ -256(BX), BX MOVOU (AX), X0 MOVOU 16(AX), X1 MOVOU 32(AX), X2 MOVOU 48(AX), X3 MOVOU 64(AX), X4 MOVOU 80(AX), X5 MOVOU 96(AX), X6 MOVOU 112(AX), X7 MOVOU 128(AX), X8 MOVOU 144(AX), X9 MOVOU 160(AX), X10 MOVOU 176(AX), X11 MOVOU 192(AX), X12 MOVOU 208(AX), X13 MOVOU 224(AX), X14 MOVOU 240(AX), X15 MOVOU X0, (CX) MOVOU X1, 16(CX) MOVOU X2, 32(CX) MOVOU X3, 48(CX) MOVOU X4, 64(CX) MOVOU X5, 80(CX) MOVOU X6, 96(CX) MOVOU X7, 112(CX) MOVOU X8, 128(CX) MOVOU X9, 144(CX) MOVOU X10, 160(CX) MOVOU X11, 176(CX) MOVOU X12, 192(CX) MOVOU X13, 208(CX) MOVOU X14, 224(CX) MOVOU X15, 240(CX) CMPQ BX, $0x00000100 LEAQ 256(AX), AX LEAQ 256(CX), CX JGE emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_move_256through2048 JMP emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_tail emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_avxUnaligned: LEAQ (AX)(BX*1), BP MOVQ CX, DI MOVOU -128(BP), X5 MOVOU -112(BP), X6 MOVQ $0x00000080, DX ANDQ $0xffffffe0, CX ADDQ $0x20, CX MOVOU -96(BP), X7 MOVOU -80(BP), X8 MOVQ CX, SI SUBQ DI, SI MOVOU -64(BP), X9 MOVOU -48(BP), X10 SUBQ SI, BX MOVOU -32(BP), X11 MOVOU -16(BP), X12 VMOVDQU (AX), Y4 ADDQ SI, AX SUBQ DX, BX emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_gobble_128_loop: VMOVDQU (AX), Y0 VMOVDQU 32(AX), Y1 VMOVDQU 64(AX), Y2 VMOVDQU 96(AX), Y3 ADDQ DX, AX VMOVDQA Y0, (CX) VMOVDQA Y1, 32(CX) VMOVDQA Y2, 64(CX) VMOVDQA Y3, 96(CX) ADDQ DX, CX SUBQ DX, BX JA emit_lit_memmove_emit_remainder_encodeBlockAsm14BAvx_memmove_gobble_128_loop ADDQ DX, BX ADDQ CX, BX VMOVDQU Y4, (DI) VZEROUPPER MOVOU X5, -128(BX) MOVOU X6, -112(BX) MOVOU X7, -96(BX) MOVOU X8, -80(BX) MOVOU X9, -64(BX) MOVOU X10, -48(BX) MOVOU X11, -32(BX) MOVOU X12, -16(BX) JMP emit_literal_done_emit_remainder_encodeBlockAsm14BAvx MOVQ DX, CX emit_literal_done_emit_remainder_encodeBlockAsm14BAvx: MOVQ CX, dst_base+0(FP) emit_literal_skip_emit_remainder_encodeBlockAsm14BAvx: MOVQ 8(SP), AX SUBQ dst_base+0(FP), AX MOVQ AX, ret+48(FP) RET // func encodeBlockAsm12BAvx(dst []byte, src []byte) int // Requires: AVX, SSE2 TEXT ·encodeBlockAsm12BAvx(SB), $4128-56 MOVQ $0x00000020, AX LEAQ 32(SP), CX PXOR X0, X0 zero_loop_encodeBlockAsm12BAvx: MOVOU X0, (CX) MOVOU X0, 16(CX) MOVOU X0, 32(CX) MOVOU X0, 48(CX) MOVOU X0, 64(CX) MOVOU X0, 80(CX) MOVOU X0, 96(CX) MOVOU X0, 112(CX) ADDQ $0x80, CX DECQ AX JNZ zero_loop_encodeBlockAsm12BAvx MOVL AX, 20(SP) MOVQ src_len+32(FP), AX LEAQ -5(AX), CX LEAQ -8(AX), BX SHRQ $0x05, AX SUBL AX, CX MOVL BX, 16(SP) MOVQ dst_base+0(FP), AX MOVQ AX, 8(SP) LEAQ (AX)(CX*1), CX MOVQ CX, (SP) MOVL $0x00000001, AX MOVL AX, 24(SP) MOVQ src_base+24(FP), CX search_loop_encodeBlockAsm12BAvx: MOVQ (CX)(AX*1), BP MOVL AX, BX SUBL 20(SP), BX SHRL $0x04, BX LEAQ 4(AX)(BX*1), BX MOVL 16(SP), SI CMPL BX, SI JGT emit_remainder_encodeBlockAsm12BAvx MOVL BX, 28(SP) MOVQ $0x0000cf1bbcdcbf9b, BX MOVQ BP, DI MOVQ BP, R8 SHRQ $0x08, R8 SHLQ $0x10, DI IMULQ BX, DI SHRQ $0x34, DI SHLQ $0x10, R8 IMULQ BX, R8 SHRQ $0x34, R8 MOVL 32(SP)(DI*1), BX MOVL 32(SP)(R8*1), SI MOVL AX, 32(SP)(DI*1) LEAL 1(AX), DI MOVL DI, 32(SP)(R8*1) MOVL AX, DI SUBL 24(SP), DI MOVL 1(CX)(DI*1), R9 MOVQ BP, R8 SHLQ $0x08, R8 CMPL R8, R9 JNE no_repeat_found_encodeBlockAsm12BAvx LEAQ 1(AX), BP MOVL 20(SP), BX TESTL DI, DI JZ repeat_extend_back_end_encodeBlockAsm12BAvx repeat_extend_back_loop_encodeBlockAsm12BAvx: CMPL BP, BX JG repeat_extend_back_end_encodeBlockAsm12BAvx MOVB -1(CX)(DI*1), DL MOVB -1(CX)(BP*1), SI CMPB DL, SI JNE repeat_extend_back_end_encodeBlockAsm12BAvx LEAQ -1(BP), BP DECL DI JZ repeat_extend_back_end_encodeBlockAsm12BAvx JMP repeat_extend_back_loop_encodeBlockAsm12BAvx repeat_extend_back_end_encodeBlockAsm12BAvx: MOVL 20(SP), BX CMPL BX, BP JEQ emit_literal_skip_repeat_emit_encodeBlockAsm12BAvx MOVL BP, SI MOVL BP, 20(SP) LEAQ (CX)(BX*1), DI SUBL BX, SI MOVQ dst_base+0(FP), BX MOVQ SI, R8 SUBL $0x01, R8 JC emit_literal_done_repeat_emit_encodeBlockAsm12BAvx CMPL R8, $0x3c JLT one_byte_repeat_emit_encodeBlockAsm12BAvx CMPL R8, $0x00000100 JLT two_bytes_repeat_emit_encodeBlockAsm12BAvx CMPL R8, $0x00010000 JLT three_bytes_repeat_emit_encodeBlockAsm12BAvx CMPL R8, $0x01000000 JLT four_bytes_repeat_emit_encodeBlockAsm12BAvx MOVB $0xfc, (BX) MOVL R8, 1(BX) ADDQ $0x05, BX JMP memmove_repeat_emit_encodeBlockAsm12BAvx four_bytes_repeat_emit_encodeBlockAsm12BAvx: MOVQ R8, R9 SHRL $0x10, R9 MOVB $0xf8, (BX) MOVW R8, 1(BX) MOVB R9, 3(BX) ADDQ $0x04, BX JMP memmove_repeat_emit_encodeBlockAsm12BAvx three_bytes_repeat_emit_encodeBlockAsm12BAvx: MOVB $0xf4, (BX) MOVW R8, 1(BX) ADDQ $0x03, BX JMP memmove_repeat_emit_encodeBlockAsm12BAvx two_bytes_repeat_emit_encodeBlockAsm12BAvx: MOVB $0xf0, (BX) MOVB R8, 1(BX) ADDQ $0x02, BX JMP memmove_repeat_emit_encodeBlockAsm12BAvx one_byte_repeat_emit_encodeBlockAsm12BAvx: SHLB $0x02, R8 MOVB R8, (BX) ADDQ $0x01, BX memmove_repeat_emit_encodeBlockAsm12BAvx: LEAQ (BX)(SI*1), R8 NOP emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_tail: TESTQ SI, SI JEQ emit_literal_done_repeat_emit_encodeBlockAsm12BAvx CMPQ SI, $0x02 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_1or2 CMPQ SI, $0x04 JB emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_3 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_4 CMPQ SI, $0x08 JB emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_5through7 JE emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_8 CMPQ SI, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_9through16 CMPQ SI, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_17through32 CMPQ SI, $0x40 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_33through64 CMPQ SI, $0x80 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_65through128 CMPQ SI, $0x00000100 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_129through256 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_avxUnaligned emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_1or2: MOVB (DI), R8 MOVB -1(DI)(SI*1), R9 MOVB R8, (BX) MOVB R9, -1(BX)(SI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm12BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_4: MOVL (DI), R8 MOVL R8, (BX) JMP emit_literal_done_repeat_emit_encodeBlockAsm12BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_3: MOVW (DI), R8 MOVB 2(DI), R9 MOVW R8, (BX) MOVB R9, 2(BX) JMP emit_literal_done_repeat_emit_encodeBlockAsm12BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_5through7: MOVL (DI), R8 MOVL -4(DI)(SI*1), R9 MOVL R8, (BX) MOVL R9, -4(BX)(SI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm12BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_8: MOVQ (DI), R8 MOVQ R8, (BX) JMP emit_literal_done_repeat_emit_encodeBlockAsm12BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_9through16: MOVQ (DI), R8 MOVQ -8(DI)(SI*1), R9 MOVQ R8, (BX) MOVQ R9, -8(BX)(SI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm12BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_17through32: MOVOU (DI), X0 MOVOU -16(DI)(SI*1), X1 MOVOU X0, (BX) MOVOU X1, -16(BX)(SI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm12BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_33through64: MOVOU (DI), X0 MOVOU 16(DI), X1 MOVOU -32(DI)(SI*1), X2 MOVOU -16(DI)(SI*1), X3 MOVOU X0, (BX) MOVOU X1, 16(BX) MOVOU X2, -32(BX)(SI*1) MOVOU X3, -16(BX)(SI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm12BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_65through128: MOVOU (DI), X0 MOVOU 16(DI), X1 MOVOU 32(DI), X2 MOVOU 48(DI), X3 MOVOU -64(DI)(SI*1), X12 MOVOU -48(DI)(SI*1), X13 MOVOU -32(DI)(SI*1), X14 MOVOU -16(DI)(SI*1), X15 MOVOU X0, (BX) MOVOU X1, 16(BX) MOVOU X2, 32(BX) MOVOU X3, 48(BX) MOVOU X12, -64(BX)(SI*1) MOVOU X13, -48(BX)(SI*1) MOVOU X14, -32(BX)(SI*1) MOVOU X15, -16(BX)(SI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm12BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_129through256: MOVOU (DI), X0 MOVOU 16(DI), X1 MOVOU 32(DI), X2 MOVOU 48(DI), X3 MOVOU 64(DI), X4 MOVOU 80(DI), X5 MOVOU 96(DI), X6 MOVOU 112(DI), X7 MOVOU -128(DI)(SI*1), X8 MOVOU -112(DI)(SI*1), X9 MOVOU -96(DI)(SI*1), X10 MOVOU -80(DI)(SI*1), X11 MOVOU -64(DI)(SI*1), X12 MOVOU -48(DI)(SI*1), X13 MOVOU -32(DI)(SI*1), X14 MOVOU -16(DI)(SI*1), X15 MOVOU X0, (BX) MOVOU X1, 16(BX) MOVOU X2, 32(BX) MOVOU X3, 48(BX) MOVOU X4, 64(BX) MOVOU X5, 80(BX) MOVOU X6, 96(BX) MOVOU X7, 112(BX) MOVOU X8, -128(BX)(SI*1) MOVOU X9, -112(BX)(SI*1) MOVOU X10, -96(BX)(SI*1) MOVOU X11, -80(BX)(SI*1) MOVOU X12, -64(BX)(SI*1) MOVOU X13, -48(BX)(SI*1) MOVOU X14, -32(BX)(SI*1) MOVOU X15, -16(BX)(SI*1) JMP emit_literal_done_repeat_emit_encodeBlockAsm12BAvx emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_256through2048: LEAQ -256(SI), SI MOVOU (DI), X0 MOVOU 16(DI), X1 MOVOU 32(DI), X2 MOVOU 48(DI), X3 MOVOU 64(DI), X4 MOVOU 80(DI), X5 MOVOU 96(DI), X6 MOVOU 112(DI), X7 MOVOU 128(DI), X8 MOVOU 144(DI), X9 MOVOU 160(DI), X10 MOVOU 176(DI), X11 MOVOU 192(DI), X12 MOVOU 208(DI), X13 MOVOU 224(DI), X14 MOVOU 240(DI), X15 MOVOU X0, (BX) MOVOU X1, 16(BX) MOVOU X2, 32(BX) MOVOU X3, 48(BX) MOVOU X4, 64(BX) MOVOU X5, 80(BX) MOVOU X6, 96(BX) MOVOU X7, 112(BX) MOVOU X8, 128(BX) MOVOU X9, 144(BX) MOVOU X10, 160(BX) MOVOU X11, 176(BX) MOVOU X12, 192(BX) MOVOU X13, 208(BX) MOVOU X14, 224(BX) MOVOU X15, 240(BX) CMPQ SI, $0x00000100 LEAQ 256(DI), DI LEAQ 256(BX), BX JGE emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_move_256through2048 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_tail emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_avxUnaligned: LEAQ (DI)(SI*1), R9 MOVQ BX, R11 MOVOU -128(R9), X5 MOVOU -112(R9), X6 MOVQ $0x00000080, R8 ANDQ $0xffffffe0, BX ADDQ $0x20, BX MOVOU -96(R9), X7 MOVOU -80(R9), X8 MOVQ BX, R10 SUBQ R11, R10 MOVOU -64(R9), X9 MOVOU -48(R9), X10 SUBQ R10, SI MOVOU -32(R9), X11 MOVOU -16(R9), X12 VMOVDQU (DI), Y4 ADDQ R10, DI SUBQ R8, SI emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_gobble_128_loop: VMOVDQU (DI), Y0 VMOVDQU 32(DI), Y1 VMOVDQU 64(DI), Y2 VMOVDQU 96(DI), Y3 ADDQ R8, DI VMOVDQA Y0, (BX) VMOVDQA Y1, 32(BX) VMOVDQA Y2, 64(BX) VMOVDQA Y3, 96(BX) ADDQ R8, BX SUBQ R8, SI JA emit_lit_memmove_repeat_emit_encodeBlockAsm12BAvx_memmove_gobble_128_loop ADDQ R8, SI ADDQ BX, SI VMOVDQU Y4, (R11) VZEROUPPER MOVOU X5, -128(SI) MOVOU X6, -112(SI) MOVOU X7, -96(SI) MOVOU X8, -80(SI) MOVOU X9, -64(SI) MOVOU X10, -48(SI) MOVOU X11, -32(SI) MOVOU X12, -16(SI) JMP emit_literal_done_repeat_emit_encodeBlockAsm12BAvx MOVQ R8, BX emit_literal_done_repeat_emit_encodeBlockAsm12BAvx: MOVQ BX, dst_base+0(FP) emit_literal_skip_repeat_emit_encodeBlockAsm12BAvx: ADDL $0x05, AX MOVL AX, BX SUBL 24(SP), BX MOVL 16(SP), BX SUBL AX, BX XORQ DI, DI CMPQ BX, $0x08 JL matchlen_single_repeat_extend matchlen_loopback_repeat_extend: MOVQ (CX)(DI*1), SI XORQ (CX)(DI*1), SI TESTQ SI, SI JZ matchlen_loop_repeat_extend BSFQ SI, SI SARQ $0x03, SI LEAQ (DI)(SI*1), DI JMP repeat_extend_forward_end_encodeBlockAsm12BAvx matchlen_loop_repeat_extend: LEAQ -8(BX), BX LEAQ 8(DI), DI CMPQ BX, $0x08 JGE matchlen_loopback_repeat_extend matchlen_single_repeat_extend: TESTQ BX, BX JZ repeat_extend_forward_end_encodeBlockAsm12BAvx matchlen_single_loopback_repeat_extend: MOVB (CX)(DI*1), SI CMPB (CX)(DI*1), SI JNE repeat_extend_forward_end_encodeBlockAsm12BAvx LEAQ 1(DI), DI DECQ BX JNZ matchlen_single_loopback_repeat_extend repeat_extend_forward_end_encodeBlockAsm12BAvx: ADDL DI, AX MOVL AX, BX SUBL BP, BX MOVL 24(SP), BP MOVQ dst_base+0(FP), SI MOVL 20(SP), DI TESTL DI, DI JZ repeat_as_copy_encodeBlockAsm12BAvx emit_repeat_again_match_repeat_: MOVQ BX, DI LEAQ -4(BX), BX CMPL DI, $0x08 JLE repeat_two_match_repeat_ CMPL DI, $0x0c JGE cant_repeat_two_offset_match_repeat_ CMPL BP, $0x00000800 JLT repeat_two_offset_match_repeat_ cant_repeat_two_offset_match_repeat_: CMPL BX, $0x00000104 JLT repeat_three_match_repeat_ CMPL BX, $0x00010100 JLT repeat_four_match_repeat_ CMPL BX, $0x0100ffff JLT repeat_five_match_repeat_ LEAQ -16842747(BX), BX MOVW $0x001d, (SI) MOVW $0xfffb, 2(SI) MOVB $0xff, 4(SI) ADDQ $0x05, SI JMP emit_repeat_again_match_repeat_ repeat_five_match_repeat_: LEAQ -65536(BX), BX MOVQ BX, BP MOVW $0x001d, (SI) MOVW BX, 2(SI) SARQ $0x10, BP MOVB BP, 4(SI) ADDQ $0x05, SI JMP repeat_end_emit_encodeBlockAsm12BAvx repeat_four_match_repeat_: LEAQ -256(BX), BX MOVW $0x0019, (SI) MOVW BX, 2(SI) ADDQ $0x04, SI JMP repeat_end_emit_encodeBlockAsm12BAvx repeat_three_match_repeat_: LEAQ -4(BX), BX MOVW $0x0015, (SI) MOVB BL, 2(SI) ADDQ $0x03, SI JMP repeat_end_emit_encodeBlockAsm12BAvx repeat_two_match_repeat_: SHLL $0x02, BX ORL $0x01, BX MOVW BX, (SI) ADDQ $0x02, SI JMP repeat_end_emit_encodeBlockAsm12BAvx repeat_two_offset_match_repeat_: XORQ DI, DI LEAQ 1(DI)(BX*4), BX MOVB BP, 1(SI) SARL $0x08, BP SHLL $0x05, BP ORL BP, BX MOVB BL, (SI) ADDQ $0x02, SI JMP repeat_end_emit_encodeBlockAsm12BAvx repeat_as_copy_encodeBlockAsm12BAvx: CMPL BP, $0x00010000 JL two_byte_offset_repeat_as_copy_encodeBlockAsm12BAvx CMPL BX, $0x40 JLE four_bytes_remain_repeat_as_copy_encodeBlockAsm12BAvx MOVB $0xff, (SI) MOVD BP, 1(SI) LEAQ -64(BX), BX ADDQ $0x05, SI CMPL BX, $0x04 JL four_bytes_remain_repeat_as_copy_encodeBlockAsm12BAvx emit_repeat_again_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy: MOVQ BX, DI LEAQ -4(BX), BX CMPL DI, $0x08 JLE repeat_two_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy CMPL DI, $0x0c JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy CMPL BP, $0x00000800 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy: CMPL BX, $0x00000104 JLT repeat_three_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy CMPL BX, $0x00010100 JLT repeat_four_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy CMPL BX, $0x0100ffff JLT repeat_five_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy LEAQ -16842747(BX), BX MOVW $0x001d, (SI) MOVW $0xfffb, 2(SI) MOVB $0xff, 4(SI) ADDQ $0x05, SI JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy repeat_five_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy: LEAQ -65536(BX), BX MOVQ BX, BP MOVW $0x001d, (SI) MOVW BX, 2(SI) SARQ $0x10, BP MOVB BP, 4(SI) ADDQ $0x05, SI JMP repeat_end_emit_encodeBlockAsm12BAvx repeat_four_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy: LEAQ -256(BX), BX MOVW $0x0019, (SI) MOVW BX, 2(SI) ADDQ $0x04, SI JMP repeat_end_emit_encodeBlockAsm12BAvx repeat_three_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy: LEAQ -4(BX), BX MOVW $0x0015, (SI) MOVB BL, 2(SI) ADDQ $0x03, SI JMP repeat_end_emit_encodeBlockAsm12BAvx repeat_two_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy: SHLL $0x02, BX ORL $0x01, BX MOVW BX, (SI) ADDQ $0x02, SI JMP repeat_end_emit_encodeBlockAsm12BAvx repeat_two_offset_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy: XORQ DI, DI LEAQ 1(DI)(BX*4), BX MOVB BP, 1(SI) SARL $0x08, BP SHLL $0x05, BP ORL BP, BX MOVB BL, (SI) ADDQ $0x02, SI JMP repeat_end_emit_encodeBlockAsm12BAvx four_bytes_remain_repeat_as_copy_encodeBlockAsm12BAvx: TESTL BX, BX JZ repeat_end_emit_encodeBlockAsm12BAvx MOVB $0x03, DL LEAQ -4(DX)(BX*4), BX MOVB BL, (SI) MOVD BP, 1(SI) ADDQ $0x05, SI JMP repeat_end_emit_encodeBlockAsm12BAvx two_byte_offset_repeat_as_copy_encodeBlockAsm12BAvx: CMPL BX, $0x40 JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm12BAvx MOVB $0xee, (SI) MOVW BP, 1(SI) LEAQ -60(BX), BX ADDQ $0x03, SI emit_repeat_again_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short: MOVQ BX, DI LEAQ -4(BX), BX CMPL DI, $0x08 JLE repeat_two_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short CMPL DI, $0x0c JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short CMPL BP, $0x00000800 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short: CMPL BX, $0x00000104 JLT repeat_three_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short CMPL BX, $0x00010100 JLT repeat_four_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short CMPL BX, $0x0100ffff JLT repeat_five_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short LEAQ -16842747(BX), BX MOVW $0x001d, (SI) MOVW $0xfffb, 2(SI) MOVB $0xff, 4(SI) ADDQ $0x05, SI JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short repeat_five_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short: LEAQ -65536(BX), BX MOVQ BX, BP MOVW $0x001d, (SI) MOVW BX, 2(SI) SARQ $0x10, BP MOVB BP, 4(SI) ADDQ $0x05, SI JMP repeat_end_emit_encodeBlockAsm12BAvx repeat_four_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short: LEAQ -256(BX), BX MOVW $0x0019, (SI) MOVW BX, 2(SI) ADDQ $0x04, SI JMP repeat_end_emit_encodeBlockAsm12BAvx repeat_three_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short: LEAQ -4(BX), BX MOVW $0x0015, (SI) MOVB BL, 2(SI) ADDQ $0x03, SI JMP repeat_end_emit_encodeBlockAsm12BAvx repeat_two_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short: SHLL $0x02, BX ORL $0x01, BX MOVW BX, (SI) ADDQ $0x02, SI JMP repeat_end_emit_encodeBlockAsm12BAvx repeat_two_offset_repeat_as_copy_encodeBlockAsm12BAvx_emit_copy_short: XORQ DI, DI LEAQ 1(DI)(BX*4), BX MOVB BP, 1(SI) SARL $0x08, BP SHLL $0x05, BP ORL BP, BX MOVB BL, (SI) ADDQ $0x02, SI JMP repeat_end_emit_encodeBlockAsm12BAvx two_byte_offset_short_repeat_as_copy_encodeBlockAsm12BAvx: CMPL BX, $0x0c JGE emit_copy_three_repeat_as_copy_encodeBlockAsm12BAvx CMPL BP, $0x00000800 JGE emit_copy_three_repeat_as_copy_encodeBlockAsm12BAvx MOVB $0x01, DL LEAQ -16(DX)(BX*4), BX MOVB BP, 1(SI) SHRL $0x08, BP SHLL $0x05, BP ORL BP, BX MOVB BL, (SI) ADDQ $0x02, SI JMP repeat_end_emit_encodeBlockAsm12BAvx emit_copy_three_repeat_as_copy_encodeBlockAsm12BAvx: MOVB $0x02, DL LEAQ -4(DX)(BX*4), BX MOVB BL, (SI) MOVW BP, 1(SI) ADDQ $0x03, SI repeat_end_emit_encodeBlockAsm12BAvx: MOVQ SI, dst_base+0(FP) MOVL 16(SP), BX CMPL AX, BX JGT emit_remainder_encodeBlockAsm12BAvx JMP search_loop_encodeBlockAsm12BAvx no_repeat_found_encodeBlockAsm12BAvx: MOVQ $0x0000cf1bbcdcbf9b, R8 MOVQ BP, DI SHRQ $0x10, DI SHLQ $0x10, DI IMULQ R8, DI SHRQ $0x34, DI CMPL (CX)(BX*1), BP SHRQ $0x08, BP JEQ candidate_match_encodeBlockAsm12BAvx MOVL 32(SP)(DI*1), BX CMPL (CX)(SI*1), BP JEQ candidate2_match_encodeBlockAsm12BAvx LEAQ 2(AX), SI MOVL SI, 32(SP)(DI*1) SHRQ $0x08, BP CMPL (CX)(BX*1), BP JEQ candidate3_match_encodeBlockAsm12BAvx MOVL 28(SP), AX JMP search_loop_encodeBlockAsm12BAvx candidate3_match_encodeBlockAsm12BAvx: ADDL $0x02, AX JMP candidate_match_encodeBlockAsm12BAvx candidate2_match_encodeBlockAsm12BAvx: LEAQ -2(AX), BX MOVL BX, 32(SP)(DI*1) INCL AX MOVL SI, BX candidate_match_encodeBlockAsm12BAvx: MOVL 20(SP), BP TESTL BX, BX JZ match_extend_back_end_encodeBlockAsm12BAvx match_extend_back_loop_encodeBlockAsm12BAvx: CMPL AX, BP JG match_extend_back_end_encodeBlockAsm12BAvx MOVB -1(CX)(BX*1), DL MOVB -1(CX)(AX*1), SI CMPB DL, SI JNE match_extend_back_end_encodeBlockAsm12BAvx LEAL -1(AX), AX DECL BX JZ match_extend_back_end_encodeBlockAsm12BAvx JMP match_extend_back_loop_encodeBlockAsm12BAvx match_extend_back_end_encodeBlockAsm12BAvx: MOVL AX, BP SUBL 20(SP), BP LEAQ dst_base+0(FP)(BP*1), BP CMPQ BP, (SP) JL match_dst_size_check_encodeBlockAsm12BAvx MOVQ $0x00000000, ret+48(FP) RET match_dst_size_check_encodeBlockAsm12BAvx: MOVL BX, BP MOVL 20(SP), SI CMPL SI, BP JEQ emit_literal_skip_match_emit_encodeBlockAsm12BAvx MOVL BP, DI MOVL BP, 20(SP) LEAQ (CX)(SI*1), BP SUBL SI, DI MOVQ dst_base+0(FP), SI MOVQ DI, R8 SUBL $0x01, R8 JC emit_literal_done_match_emit_encodeBlockAsm12BAvx CMPL R8, $0x3c JLT one_byte_match_emit_encodeBlockAsm12BAvx CMPL R8, $0x00000100 JLT two_bytes_match_emit_encodeBlockAsm12BAvx CMPL R8, $0x00010000 JLT three_bytes_match_emit_encodeBlockAsm12BAvx CMPL R8, $0x01000000 JLT four_bytes_match_emit_encodeBlockAsm12BAvx MOVB $0xfc, (SI) MOVL R8, 1(SI) ADDQ $0x05, SI JMP memmove_match_emit_encodeBlockAsm12BAvx four_bytes_match_emit_encodeBlockAsm12BAvx: MOVQ R8, R9 SHRL $0x10, R9 MOVB $0xf8, (SI) MOVW R8, 1(SI) MOVB R9, 3(SI) ADDQ $0x04, SI JMP memmove_match_emit_encodeBlockAsm12BAvx three_bytes_match_emit_encodeBlockAsm12BAvx: MOVB $0xf4, (SI) MOVW R8, 1(SI) ADDQ $0x03, SI JMP memmove_match_emit_encodeBlockAsm12BAvx two_bytes_match_emit_encodeBlockAsm12BAvx: MOVB $0xf0, (SI) MOVB R8, 1(SI) ADDQ $0x02, SI JMP memmove_match_emit_encodeBlockAsm12BAvx one_byte_match_emit_encodeBlockAsm12BAvx: SHLB $0x02, R8 MOVB R8, (SI) ADDQ $0x01, SI memmove_match_emit_encodeBlockAsm12BAvx: LEAQ (SI)(DI*1), R8 NOP emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_tail: TESTQ DI, DI JEQ emit_literal_done_match_emit_encodeBlockAsm12BAvx CMPQ DI, $0x02 JBE emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_1or2 CMPQ DI, $0x04 JB emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_3 JBE emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_4 CMPQ DI, $0x08 JB emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_5through7 JE emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_8 CMPQ DI, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_9through16 CMPQ DI, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_17through32 CMPQ DI, $0x40 JBE emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_33through64 CMPQ DI, $0x80 JBE emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_65through128 CMPQ DI, $0x00000100 JBE emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_129through256 JMP emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_avxUnaligned emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_1or2: MOVB (BP), R8 MOVB -1(BP)(DI*1), R9 MOVB R8, (SI) MOVB R9, -1(SI)(DI*1) JMP emit_literal_done_match_emit_encodeBlockAsm12BAvx emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_4: MOVL (BP), R8 MOVL R8, (SI) JMP emit_literal_done_match_emit_encodeBlockAsm12BAvx emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_3: MOVW (BP), R8 MOVB 2(BP), R9 MOVW R8, (SI) MOVB R9, 2(SI) JMP emit_literal_done_match_emit_encodeBlockAsm12BAvx emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_5through7: MOVL (BP), R8 MOVL -4(BP)(DI*1), R9 MOVL R8, (SI) MOVL R9, -4(SI)(DI*1) JMP emit_literal_done_match_emit_encodeBlockAsm12BAvx emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_8: MOVQ (BP), R8 MOVQ R8, (SI) JMP emit_literal_done_match_emit_encodeBlockAsm12BAvx emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_9through16: MOVQ (BP), R8 MOVQ -8(BP)(DI*1), R9 MOVQ R8, (SI) MOVQ R9, -8(SI)(DI*1) JMP emit_literal_done_match_emit_encodeBlockAsm12BAvx emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_17through32: MOVOU (BP), X0 MOVOU -16(BP)(DI*1), X1 MOVOU X0, (SI) MOVOU X1, -16(SI)(DI*1) JMP emit_literal_done_match_emit_encodeBlockAsm12BAvx emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_33through64: MOVOU (BP), X0 MOVOU 16(BP), X1 MOVOU -32(BP)(DI*1), X2 MOVOU -16(BP)(DI*1), X3 MOVOU X0, (SI) MOVOU X1, 16(SI) MOVOU X2, -32(SI)(DI*1) MOVOU X3, -16(SI)(DI*1) JMP emit_literal_done_match_emit_encodeBlockAsm12BAvx emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_65through128: MOVOU (BP), X0 MOVOU 16(BP), X1 MOVOU 32(BP), X2 MOVOU 48(BP), X3 MOVOU -64(BP)(DI*1), X12 MOVOU -48(BP)(DI*1), X13 MOVOU -32(BP)(DI*1), X14 MOVOU -16(BP)(DI*1), X15 MOVOU X0, (SI) MOVOU X1, 16(SI) MOVOU X2, 32(SI) MOVOU X3, 48(SI) MOVOU X12, -64(SI)(DI*1) MOVOU X13, -48(SI)(DI*1) MOVOU X14, -32(SI)(DI*1) MOVOU X15, -16(SI)(DI*1) JMP emit_literal_done_match_emit_encodeBlockAsm12BAvx emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_129through256: MOVOU (BP), X0 MOVOU 16(BP), X1 MOVOU 32(BP), X2 MOVOU 48(BP), X3 MOVOU 64(BP), X4 MOVOU 80(BP), X5 MOVOU 96(BP), X6 MOVOU 112(BP), X7 MOVOU -128(BP)(DI*1), X8 MOVOU -112(BP)(DI*1), X9 MOVOU -96(BP)(DI*1), X10 MOVOU -80(BP)(DI*1), X11 MOVOU -64(BP)(DI*1), X12 MOVOU -48(BP)(DI*1), X13 MOVOU -32(BP)(DI*1), X14 MOVOU -16(BP)(DI*1), X15 MOVOU X0, (SI) MOVOU X1, 16(SI) MOVOU X2, 32(SI) MOVOU X3, 48(SI) MOVOU X4, 64(SI) MOVOU X5, 80(SI) MOVOU X6, 96(SI) MOVOU X7, 112(SI) MOVOU X8, -128(SI)(DI*1) MOVOU X9, -112(SI)(DI*1) MOVOU X10, -96(SI)(DI*1) MOVOU X11, -80(SI)(DI*1) MOVOU X12, -64(SI)(DI*1) MOVOU X13, -48(SI)(DI*1) MOVOU X14, -32(SI)(DI*1) MOVOU X15, -16(SI)(DI*1) JMP emit_literal_done_match_emit_encodeBlockAsm12BAvx emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_256through2048: LEAQ -256(DI), DI MOVOU (BP), X0 MOVOU 16(BP), X1 MOVOU 32(BP), X2 MOVOU 48(BP), X3 MOVOU 64(BP), X4 MOVOU 80(BP), X5 MOVOU 96(BP), X6 MOVOU 112(BP), X7 MOVOU 128(BP), X8 MOVOU 144(BP), X9 MOVOU 160(BP), X10 MOVOU 176(BP), X11 MOVOU 192(BP), X12 MOVOU 208(BP), X13 MOVOU 224(BP), X14 MOVOU 240(BP), X15 MOVOU X0, (SI) MOVOU X1, 16(SI) MOVOU X2, 32(SI) MOVOU X3, 48(SI) MOVOU X4, 64(SI) MOVOU X5, 80(SI) MOVOU X6, 96(SI) MOVOU X7, 112(SI) MOVOU X8, 128(SI) MOVOU X9, 144(SI) MOVOU X10, 160(SI) MOVOU X11, 176(SI) MOVOU X12, 192(SI) MOVOU X13, 208(SI) MOVOU X14, 224(SI) MOVOU X15, 240(SI) CMPQ DI, $0x00000100 LEAQ 256(BP), BP LEAQ 256(SI), SI JGE emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_move_256through2048 JMP emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_tail emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_avxUnaligned: LEAQ (BP)(DI*1), R9 MOVQ SI, R11 MOVOU -128(R9), X5 MOVOU -112(R9), X6 MOVQ $0x00000080, R8 ANDQ $0xffffffe0, SI ADDQ $0x20, SI MOVOU -96(R9), X7 MOVOU -80(R9), X8 MOVQ SI, R10 SUBQ R11, R10 MOVOU -64(R9), X9 MOVOU -48(R9), X10 SUBQ R10, DI MOVOU -32(R9), X11 MOVOU -16(R9), X12 VMOVDQU (BP), Y4 ADDQ R10, BP SUBQ R8, DI emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_gobble_128_loop: VMOVDQU (BP), Y0 VMOVDQU 32(BP), Y1 VMOVDQU 64(BP), Y2 VMOVDQU 96(BP), Y3 ADDQ R8, BP VMOVDQA Y0, (SI) VMOVDQA Y1, 32(SI) VMOVDQA Y2, 64(SI) VMOVDQA Y3, 96(SI) ADDQ R8, SI SUBQ R8, DI JA emit_lit_memmove_match_emit_encodeBlockAsm12BAvx_memmove_gobble_128_loop ADDQ R8, DI ADDQ SI, DI VMOVDQU Y4, (R11) VZEROUPPER MOVOU X5, -128(DI) MOVOU X6, -112(DI) MOVOU X7, -96(DI) MOVOU X8, -80(DI) MOVOU X9, -64(DI) MOVOU X10, -48(DI) MOVOU X11, -32(DI) MOVOU X12, -16(DI) JMP emit_literal_done_match_emit_encodeBlockAsm12BAvx MOVQ R8, SI emit_literal_done_match_emit_encodeBlockAsm12BAvx: MOVQ SI, dst_base+0(FP) emit_literal_skip_match_emit_encodeBlockAsm12BAvx: NOP match_nolit_loop_encodeBlockAsm12BAvx: MOVL AX, BP MOVL AX, BP SUBL BX, BP MOVL BP, 24(SP) ADDL $0x04, AX ADDL $0x04, BX MOVL 16(SP), BP SUBL AX, BP XORQ DI, DI CMPQ BP, $0x08 JL matchlen_single_match_nolit_encodeBlockAsm12BAvx matchlen_loopback_match_nolit_encodeBlockAsm12BAvx: MOVQ (CX)(DI*1), SI XORQ (CX)(DI*1), SI TESTQ SI, SI JZ matchlen_loop_match_nolit_encodeBlockAsm12BAvx BSFQ SI, SI SARQ $0x03, SI LEAQ (DI)(SI*1), DI JMP match_nolit_end_encodeBlockAsm12BAvx matchlen_loop_match_nolit_encodeBlockAsm12BAvx: LEAQ -8(BP), BP LEAQ 8(DI), DI CMPQ BP, $0x08 JGE matchlen_loopback_match_nolit_encodeBlockAsm12BAvx matchlen_single_match_nolit_encodeBlockAsm12BAvx: TESTQ BP, BP JZ match_nolit_end_encodeBlockAsm12BAvx matchlen_single_loopback_match_nolit_encodeBlockAsm12BAvx: MOVB (CX)(DI*1), SI CMPB (CX)(DI*1), SI JNE match_nolit_end_encodeBlockAsm12BAvx LEAQ 1(DI), DI DECQ BP JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm12BAvx match_nolit_end_encodeBlockAsm12BAvx: MOVL 24(SP), BP ADDQ $0x04, DI MOVQ dst_base+0(FP), SI ADDL DI, AX CMPL BP, $0x00010000 JL two_byte_offset_match_nolit_encodeBlockAsm12BAvx CMPL DI, $0x40 JLE four_bytes_remain_match_nolit_encodeBlockAsm12BAvx MOVB $0xff, (SI) MOVD BP, 1(SI) LEAQ -64(DI), DI ADDQ $0x05, SI CMPL DI, $0x04 JL four_bytes_remain_match_nolit_encodeBlockAsm12BAvx emit_repeat_again_match_nolit_encodeBlockAsm12BAvx_emit_copy: MOVQ DI, R8 LEAQ -4(DI), DI CMPL R8, $0x08 JLE repeat_two_match_nolit_encodeBlockAsm12BAvx_emit_copy CMPL R8, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm12BAvx_emit_copy CMPL BP, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBlockAsm12BAvx_emit_copy cant_repeat_two_offset_match_nolit_encodeBlockAsm12BAvx_emit_copy: CMPL DI, $0x00000104 JLT repeat_three_match_nolit_encodeBlockAsm12BAvx_emit_copy CMPL DI, $0x00010100 JLT repeat_four_match_nolit_encodeBlockAsm12BAvx_emit_copy CMPL DI, $0x0100ffff JLT repeat_five_match_nolit_encodeBlockAsm12BAvx_emit_copy LEAQ -16842747(DI), DI MOVW $0x001d, (SI) MOVW $0xfffb, 2(SI) MOVB $0xff, 4(SI) ADDQ $0x05, SI JMP emit_repeat_again_match_nolit_encodeBlockAsm12BAvx_emit_copy repeat_five_match_nolit_encodeBlockAsm12BAvx_emit_copy: LEAQ -65536(DI), DI MOVQ DI, BP MOVW $0x001d, (SI) MOVW DI, 2(SI) SARQ $0x10, BP MOVB BP, 4(SI) ADDQ $0x05, SI JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx repeat_four_match_nolit_encodeBlockAsm12BAvx_emit_copy: LEAQ -256(DI), DI MOVW $0x0019, (SI) MOVW DI, 2(SI) ADDQ $0x04, SI JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx repeat_three_match_nolit_encodeBlockAsm12BAvx_emit_copy: LEAQ -4(DI), DI MOVW $0x0015, (SI) MOVB DI, 2(SI) ADDQ $0x03, SI JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx repeat_two_match_nolit_encodeBlockAsm12BAvx_emit_copy: SHLL $0x02, DI ORL $0x01, DI MOVW DI, (SI) ADDQ $0x02, SI JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx repeat_two_offset_match_nolit_encodeBlockAsm12BAvx_emit_copy: XORQ R8, R8 LEAQ 1(R8)(DI*4), DI MOVB BP, 1(SI) SARL $0x08, BP SHLL $0x05, BP ORL BP, DI MOVB DI, (SI) ADDQ $0x02, SI JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx four_bytes_remain_match_nolit_encodeBlockAsm12BAvx: TESTL DI, DI JZ match_nolit_emitcopy_end_encodeBlockAsm12BAvx MOVB $0x03, DL LEAQ -4(DX)(DI*4), DI MOVB DI, (SI) MOVD BP, 1(SI) ADDQ $0x05, SI JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx two_byte_offset_match_nolit_encodeBlockAsm12BAvx: CMPL DI, $0x40 JLE two_byte_offset_short_match_nolit_encodeBlockAsm12BAvx MOVB $0xee, (SI) MOVW BP, 1(SI) LEAQ -60(DI), DI ADDQ $0x03, SI emit_repeat_again_match_nolit_encodeBlockAsm12BAvx_emit_copy_short: MOVQ DI, R8 LEAQ -4(DI), DI CMPL R8, $0x08 JLE repeat_two_match_nolit_encodeBlockAsm12BAvx_emit_copy_short CMPL R8, $0x0c JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm12BAvx_emit_copy_short CMPL BP, $0x00000800 JLT repeat_two_offset_match_nolit_encodeBlockAsm12BAvx_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsm12BAvx_emit_copy_short: CMPL DI, $0x00000104 JLT repeat_three_match_nolit_encodeBlockAsm12BAvx_emit_copy_short CMPL DI, $0x00010100 JLT repeat_four_match_nolit_encodeBlockAsm12BAvx_emit_copy_short CMPL DI, $0x0100ffff JLT repeat_five_match_nolit_encodeBlockAsm12BAvx_emit_copy_short LEAQ -16842747(DI), DI MOVW $0x001d, (SI) MOVW $0xfffb, 2(SI) MOVB $0xff, 4(SI) ADDQ $0x05, SI JMP emit_repeat_again_match_nolit_encodeBlockAsm12BAvx_emit_copy_short repeat_five_match_nolit_encodeBlockAsm12BAvx_emit_copy_short: LEAQ -65536(DI), DI MOVQ DI, BP MOVW $0x001d, (SI) MOVW DI, 2(SI) SARQ $0x10, BP MOVB BP, 4(SI) ADDQ $0x05, SI JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx repeat_four_match_nolit_encodeBlockAsm12BAvx_emit_copy_short: LEAQ -256(DI), DI MOVW $0x0019, (SI) MOVW DI, 2(SI) ADDQ $0x04, SI JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx repeat_three_match_nolit_encodeBlockAsm12BAvx_emit_copy_short: LEAQ -4(DI), DI MOVW $0x0015, (SI) MOVB DI, 2(SI) ADDQ $0x03, SI JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx repeat_two_match_nolit_encodeBlockAsm12BAvx_emit_copy_short: SHLL $0x02, DI ORL $0x01, DI MOVW DI, (SI) ADDQ $0x02, SI JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx repeat_two_offset_match_nolit_encodeBlockAsm12BAvx_emit_copy_short: XORQ R8, R8 LEAQ 1(R8)(DI*4), DI MOVB BP, 1(SI) SARL $0x08, BP SHLL $0x05, BP ORL BP, DI MOVB DI, (SI) ADDQ $0x02, SI JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx two_byte_offset_short_match_nolit_encodeBlockAsm12BAvx: CMPL DI, $0x0c JGE emit_copy_three_match_nolit_encodeBlockAsm12BAvx CMPL BP, $0x00000800 JGE emit_copy_three_match_nolit_encodeBlockAsm12BAvx MOVB $0x01, DL LEAQ -16(DX)(DI*4), DI MOVB BP, 1(SI) SHRL $0x08, BP SHLL $0x05, BP ORL BP, DI MOVB DI, (SI) ADDQ $0x02, SI JMP match_nolit_emitcopy_end_encodeBlockAsm12BAvx emit_copy_three_match_nolit_encodeBlockAsm12BAvx: MOVB $0x02, DL LEAQ -4(DX)(DI*4), DI MOVB DI, (SI) MOVW BP, 1(SI) ADDQ $0x03, SI match_nolit_emitcopy_end_encodeBlockAsm12BAvx: MOVQ SI, dst_base+0(FP) MOVL AX, 20(SP) CMPL AX, 16(SP) JGE emit_remainder_encodeBlockAsm12BAvx CMPQ SI, (SP) JL match_nolit_dst_ok_encodeBlockAsm12BAvx MOVQ $0x00000000, ret+48(FP) RET match_nolit_dst_ok_encodeBlockAsm12BAvx: MOVQ -2(CX)(AX*1), BP MOVQ $0x0000cf1bbcdcbf9b, SI MOVQ BP, DI SHRQ $0x10, BP MOVQ BP, R8 SHLQ $0x10, DI IMULQ SI, DI SHRQ $0x34, DI SHLQ $0x10, R8 IMULQ SI, R8 SHRQ $0x34, R8 MOVL 32(SP)(DI*1), SI MOVL 32(SP)(R8*1), SI LEAQ -2(AX), SI MOVL SI, 32(SP)(DI*1) MOVL AX, 32(SP)(R8*1) CMPL (CX)(R8*1), BP JEQ match_nolit_loop_encodeBlockAsm12BAvx INCL AX JMP search_loop_encodeBlockAsm12BAvx emit_remainder_encodeBlockAsm12BAvx: MOVQ src_len+32(FP), AX SUBL 20(SP), AX MOVQ dst_base+0(FP), DX LEAQ (DX)(AX*1), DX CMPQ DX, (SP) JL emit_remainder_ok_encodeBlockAsm12BAvx MOVQ $0x00000000, ret+48(FP) RET emit_remainder_ok_encodeBlockAsm12BAvx: MOVQ src_len+32(FP), AX MOVL 20(SP), DX CMPL DX, AX JEQ emit_literal_skip_emit_remainder_encodeBlockAsm12BAvx MOVL AX, BX MOVL AX, 20(SP) LEAQ (CX)(DX*1), AX SUBL DX, BX MOVQ dst_base+0(FP), CX MOVQ BX, DX SUBL $0x01, DX JC emit_literal_done_emit_remainder_encodeBlockAsm12BAvx CMPL DX, $0x3c JLT one_byte_emit_remainder_encodeBlockAsm12BAvx CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeBlockAsm12BAvx CMPL DX, $0x00010000 JLT three_bytes_emit_remainder_encodeBlockAsm12BAvx CMPL DX, $0x01000000 JLT four_bytes_emit_remainder_encodeBlockAsm12BAvx MOVB $0xfc, (CX) MOVL DX, 1(CX) ADDQ $0x05, CX JMP memmove_emit_remainder_encodeBlockAsm12BAvx four_bytes_emit_remainder_encodeBlockAsm12BAvx: MOVQ DX, BP SHRL $0x10, BP MOVB $0xf8, (CX) MOVW DX, 1(CX) MOVB BP, 3(CX) ADDQ $0x04, CX JMP memmove_emit_remainder_encodeBlockAsm12BAvx three_bytes_emit_remainder_encodeBlockAsm12BAvx: MOVB $0xf4, (CX) MOVW DX, 1(CX) ADDQ $0x03, CX JMP memmove_emit_remainder_encodeBlockAsm12BAvx two_bytes_emit_remainder_encodeBlockAsm12BAvx: MOVB $0xf0, (CX) MOVB DL, 1(CX) ADDQ $0x02, CX JMP memmove_emit_remainder_encodeBlockAsm12BAvx one_byte_emit_remainder_encodeBlockAsm12BAvx: SHLB $0x02, DL MOVB DL, (CX) ADDQ $0x01, CX memmove_emit_remainder_encodeBlockAsm12BAvx: LEAQ (CX)(BX*1), DX NOP emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_tail: TESTQ BX, BX JEQ emit_literal_done_emit_remainder_encodeBlockAsm12BAvx CMPQ BX, $0x02 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_1or2 CMPQ BX, $0x04 JB emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_3 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_4 CMPQ BX, $0x08 JB emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_5through7 JE emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_8 CMPQ BX, $0x10 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_9through16 CMPQ BX, $0x20 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_17through32 CMPQ BX, $0x40 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_33through64 CMPQ BX, $0x80 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_65through128 CMPQ BX, $0x00000100 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_129through256 JMP emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_avxUnaligned emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_1or2: MOVB (AX), DL MOVB -1(AX)(BX*1), BP MOVB DL, (CX) MOVB BP, -1(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm12BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_4: MOVL (AX), DX MOVL DX, (CX) JMP emit_literal_done_emit_remainder_encodeBlockAsm12BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_3: MOVW (AX), DX MOVB 2(AX), BP MOVW DX, (CX) MOVB BP, 2(CX) JMP emit_literal_done_emit_remainder_encodeBlockAsm12BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_5through7: MOVL (AX), DX MOVL -4(AX)(BX*1), BP MOVL DX, (CX) MOVL BP, -4(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm12BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_8: MOVQ (AX), DX MOVQ DX, (CX) JMP emit_literal_done_emit_remainder_encodeBlockAsm12BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_9through16: MOVQ (AX), DX MOVQ -8(AX)(BX*1), BP MOVQ DX, (CX) MOVQ BP, -8(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm12BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_17through32: MOVOU (AX), X0 MOVOU -16(AX)(BX*1), X1 MOVOU X0, (CX) MOVOU X1, -16(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm12BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_33through64: MOVOU (AX), X0 MOVOU 16(AX), X1 MOVOU -32(AX)(BX*1), X2 MOVOU -16(AX)(BX*1), X3 MOVOU X0, (CX) MOVOU X1, 16(CX) MOVOU X2, -32(CX)(BX*1) MOVOU X3, -16(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm12BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_65through128: MOVOU (AX), X0 MOVOU 16(AX), X1 MOVOU 32(AX), X2 MOVOU 48(AX), X3 MOVOU -64(AX)(BX*1), X12 MOVOU -48(AX)(BX*1), X13 MOVOU -32(AX)(BX*1), X14 MOVOU -16(AX)(BX*1), X15 MOVOU X0, (CX) MOVOU X1, 16(CX) MOVOU X2, 32(CX) MOVOU X3, 48(CX) MOVOU X12, -64(CX)(BX*1) MOVOU X13, -48(CX)(BX*1) MOVOU X14, -32(CX)(BX*1) MOVOU X15, -16(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm12BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_129through256: MOVOU (AX), X0 MOVOU 16(AX), X1 MOVOU 32(AX), X2 MOVOU 48(AX), X3 MOVOU 64(AX), X4 MOVOU 80(AX), X5 MOVOU 96(AX), X6 MOVOU 112(AX), X7 MOVOU -128(AX)(BX*1), X8 MOVOU -112(AX)(BX*1), X9 MOVOU -96(AX)(BX*1), X10 MOVOU -80(AX)(BX*1), X11 MOVOU -64(AX)(BX*1), X12 MOVOU -48(AX)(BX*1), X13 MOVOU -32(AX)(BX*1), X14 MOVOU -16(AX)(BX*1), X15 MOVOU X0, (CX) MOVOU X1, 16(CX) MOVOU X2, 32(CX) MOVOU X3, 48(CX) MOVOU X4, 64(CX) MOVOU X5, 80(CX) MOVOU X6, 96(CX) MOVOU X7, 112(CX) MOVOU X8, -128(CX)(BX*1) MOVOU X9, -112(CX)(BX*1) MOVOU X10, -96(CX)(BX*1) MOVOU X11, -80(CX)(BX*1) MOVOU X12, -64(CX)(BX*1) MOVOU X13, -48(CX)(BX*1) MOVOU X14, -32(CX)(BX*1) MOVOU X15, -16(CX)(BX*1) JMP emit_literal_done_emit_remainder_encodeBlockAsm12BAvx emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_256through2048: LEAQ -256(BX), BX MOVOU (AX), X0 MOVOU 16(AX), X1 MOVOU 32(AX), X2 MOVOU 48(AX), X3 MOVOU 64(AX), X4 MOVOU 80(AX), X5 MOVOU 96(AX), X6 MOVOU 112(AX), X7 MOVOU 128(AX), X8 MOVOU 144(AX), X9 MOVOU 160(AX), X10 MOVOU 176(AX), X11 MOVOU 192(AX), X12 MOVOU 208(AX), X13 MOVOU 224(AX), X14 MOVOU 240(AX), X15 MOVOU X0, (CX) MOVOU X1, 16(CX) MOVOU X2, 32(CX) MOVOU X3, 48(CX) MOVOU X4, 64(CX) MOVOU X5, 80(CX) MOVOU X6, 96(CX) MOVOU X7, 112(CX) MOVOU X8, 128(CX) MOVOU X9, 144(CX) MOVOU X10, 160(CX) MOVOU X11, 176(CX) MOVOU X12, 192(CX) MOVOU X13, 208(CX) MOVOU X14, 224(CX) MOVOU X15, 240(CX) CMPQ BX, $0x00000100 LEAQ 256(AX), AX LEAQ 256(CX), CX JGE emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_move_256through2048 JMP emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_tail emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_avxUnaligned: LEAQ (AX)(BX*1), BP MOVQ CX, DI MOVOU -128(BP), X5 MOVOU -112(BP), X6 MOVQ $0x00000080, DX ANDQ $0xffffffe0, CX ADDQ $0x20, CX MOVOU -96(BP), X7 MOVOU -80(BP), X8 MOVQ CX, SI SUBQ DI, SI MOVOU -64(BP), X9 MOVOU -48(BP), X10 SUBQ SI, BX MOVOU -32(BP), X11 MOVOU -16(BP), X12 VMOVDQU (AX), Y4 ADDQ SI, AX SUBQ DX, BX emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_gobble_128_loop: VMOVDQU (AX), Y0 VMOVDQU 32(AX), Y1 VMOVDQU 64(AX), Y2 VMOVDQU 96(AX), Y3 ADDQ DX, AX VMOVDQA Y0, (CX) VMOVDQA Y1, 32(CX) VMOVDQA Y2, 64(CX) VMOVDQA Y3, 96(CX) ADDQ DX, CX SUBQ DX, BX JA emit_lit_memmove_emit_remainder_encodeBlockAsm12BAvx_memmove_gobble_128_loop ADDQ DX, BX ADDQ CX, BX VMOVDQU Y4, (DI) VZEROUPPER MOVOU X5, -128(BX) MOVOU X6, -112(BX) MOVOU X7, -96(BX) MOVOU X8, -80(BX) MOVOU X9, -64(BX) MOVOU X10, -48(BX) MOVOU X11, -32(BX) MOVOU X12, -16(BX) JMP emit_literal_done_emit_remainder_encodeBlockAsm12BAvx MOVQ DX, CX emit_literal_done_emit_remainder_encodeBlockAsm12BAvx: MOVQ CX, dst_base+0(FP) emit_literal_skip_emit_remainder_encodeBlockAsm12BAvx: MOVQ 8(SP), AX SUBQ dst_base+0(FP), AX MOVQ AX, ret+48(FP) RET // func emitLiteral(dst []byte, lit []byte) int // Requires: SSE2 TEXT ·emitLiteral(SB), NOSPLIT, $0-56 MOVQ dst_base+0(FP), AX MOVQ lit_base+24(FP), CX MOVQ lit_len+32(FP), DX MOVQ DX, BX MOVQ DX, BP SUBL $0x01, BP JC emit_literal_end_standalone CMPL BP, $0x3c JLT one_byte_standalone CMPL BP, $0x00000100 JLT two_bytes_standalone CMPL BP, $0x00010000 JLT three_bytes_standalone CMPL BP, $0x01000000 JLT four_bytes_standalone MOVB $0xfc, (AX) MOVL BP, 1(AX) ADDQ $0x05, BX ADDQ $0x05, AX JMP memmove_standalone four_bytes_standalone: MOVQ BP, SI SHRL $0x10, SI MOVB $0xf8, (AX) MOVW BP, 1(AX) MOVB SI, 3(AX) ADDQ $0x04, BX ADDQ $0x04, AX JMP memmove_standalone three_bytes_standalone: MOVB $0xf4, (AX) MOVW BP, 1(AX) ADDQ $0x03, BX ADDQ $0x03, AX JMP memmove_standalone two_bytes_standalone: MOVB $0xf0, (AX) MOVB BP, 1(AX) ADDQ $0x02, BX ADDQ $0x02, AX JMP memmove_standalone one_byte_standalone: SHLB $0x02, BP MOVB BP, (AX) ADDQ $0x01, BX ADDQ $0x01, AX memmove_standalone: NOP emit_lit_memmove_standalone_memmove_tail: TESTQ DX, DX JEQ emit_literal_end_standalone CMPQ DX, $0x02 JBE emit_lit_memmove_standalone_memmove_move_1or2 CMPQ DX, $0x04 JB emit_lit_memmove_standalone_memmove_move_3 JBE emit_lit_memmove_standalone_memmove_move_4 CMPQ DX, $0x08 JB emit_lit_memmove_standalone_memmove_move_5through7 JE emit_lit_memmove_standalone_memmove_move_8 CMPQ DX, $0x10 JBE emit_lit_memmove_standalone_memmove_move_9through16 CMPQ DX, $0x20 JBE emit_lit_memmove_standalone_memmove_move_17through32 CMPQ DX, $0x40 JBE emit_lit_memmove_standalone_memmove_move_33through64 CMPQ DX, $0x80 JBE emit_lit_memmove_standalone_memmove_move_65through128 CMPQ DX, $0x00000100 JBE emit_lit_memmove_standalone_memmove_move_129through256 JMP emit_lit_memmove_standalone_memmove_move_256through2048 emit_lit_memmove_standalone_memmove_move_1or2: MOVB (CX), BP MOVB -1(CX)(DX*1), CL MOVB BP, (AX) MOVB CL, -1(AX)(DX*1) JMP emit_literal_end_standalone emit_lit_memmove_standalone_memmove_move_4: MOVL (CX), BP MOVL BP, (AX) JMP emit_literal_end_standalone emit_lit_memmove_standalone_memmove_move_3: MOVW (CX), BP MOVB 2(CX), CL MOVW BP, (AX) MOVB CL, 2(AX) JMP emit_literal_end_standalone emit_lit_memmove_standalone_memmove_move_5through7: MOVL (CX), BP MOVL -4(CX)(DX*1), CX MOVL BP, (AX) MOVL CX, -4(AX)(DX*1) JMP emit_literal_end_standalone emit_lit_memmove_standalone_memmove_move_8: MOVQ (CX), BP MOVQ BP, (AX) JMP emit_literal_end_standalone emit_lit_memmove_standalone_memmove_move_9through16: MOVQ (CX), BP MOVQ -8(CX)(DX*1), CX MOVQ BP, (AX) MOVQ CX, -8(AX)(DX*1) JMP emit_literal_end_standalone emit_lit_memmove_standalone_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(DX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(DX*1) JMP emit_literal_end_standalone emit_lit_memmove_standalone_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(DX*1), X2 MOVOU -16(CX)(DX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(DX*1) MOVOU X3, -16(AX)(DX*1) JMP emit_literal_end_standalone emit_lit_memmove_standalone_memmove_move_65through128: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU 32(CX), X2 MOVOU 48(CX), X3 MOVOU -64(CX)(DX*1), X12 MOVOU -48(CX)(DX*1), X13 MOVOU -32(CX)(DX*1), X14 MOVOU -16(CX)(DX*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X12, -64(AX)(DX*1) MOVOU X13, -48(AX)(DX*1) MOVOU X14, -32(AX)(DX*1) MOVOU X15, -16(AX)(DX*1) JMP emit_literal_end_standalone emit_lit_memmove_standalone_memmove_move_129through256: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU 32(CX), X2 MOVOU 48(CX), X3 MOVOU 64(CX), X4 MOVOU 80(CX), X5 MOVOU 96(CX), X6 MOVOU 112(CX), X7 MOVOU -128(CX)(DX*1), X8 MOVOU -112(CX)(DX*1), X9 MOVOU -96(CX)(DX*1), X10 MOVOU -80(CX)(DX*1), X11 MOVOU -64(CX)(DX*1), X12 MOVOU -48(CX)(DX*1), X13 MOVOU -32(CX)(DX*1), X14 MOVOU -16(CX)(DX*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, -128(AX)(DX*1) MOVOU X9, -112(AX)(DX*1) MOVOU X10, -96(AX)(DX*1) MOVOU X11, -80(AX)(DX*1) MOVOU X12, -64(AX)(DX*1) MOVOU X13, -48(AX)(DX*1) MOVOU X14, -32(AX)(DX*1) MOVOU X15, -16(AX)(DX*1) JMP emit_literal_end_standalone emit_lit_memmove_standalone_memmove_move_256through2048: LEAQ -256(DX), DX MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU 32(CX), X2 MOVOU 48(CX), X3 MOVOU 64(CX), X4 MOVOU 80(CX), X5 MOVOU 96(CX), X6 MOVOU 112(CX), X7 MOVOU 128(CX), X8 MOVOU 144(CX), X9 MOVOU 160(CX), X10 MOVOU 176(CX), X11 MOVOU 192(CX), X12 MOVOU 208(CX), X13 MOVOU 224(CX), X14 MOVOU 240(CX), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, 128(AX) MOVOU X9, 144(AX) MOVOU X10, 160(AX) MOVOU X11, 176(AX) MOVOU X12, 192(AX) MOVOU X13, 208(AX) MOVOU X14, 224(AX) MOVOU X15, 240(AX) CMPQ DX, $0x00000100 LEAQ 256(CX), CX LEAQ 256(AX), AX JGE emit_lit_memmove_standalone_memmove_move_256through2048 JMP emit_lit_memmove_standalone_memmove_tail emit_literal_end_standalone: MOVQ BX, ret+48(FP) RET // func emitLiteralAvx(dst []byte, lit []byte) int // Requires: AVX, SSE2 TEXT ·emitLiteralAvx(SB), NOSPLIT, $0-56 MOVQ dst_base+0(FP), AX MOVQ lit_base+24(FP), CX MOVQ lit_len+32(FP), DX MOVQ DX, BX MOVQ DX, BP SUBL $0x01, BP JC emit_literal_end_avx_standalone CMPL BP, $0x3c JLT one_byte_standalone CMPL BP, $0x00000100 JLT two_bytes_standalone CMPL BP, $0x00010000 JLT three_bytes_standalone CMPL BP, $0x01000000 JLT four_bytes_standalone MOVB $0xfc, (AX) MOVL BP, 1(AX) ADDQ $0x05, BX ADDQ $0x05, AX JMP memmove_standalone four_bytes_standalone: MOVQ BP, SI SHRL $0x10, SI MOVB $0xf8, (AX) MOVW BP, 1(AX) MOVB SI, 3(AX) ADDQ $0x04, BX ADDQ $0x04, AX JMP memmove_standalone three_bytes_standalone: MOVB $0xf4, (AX) MOVW BP, 1(AX) ADDQ $0x03, BX ADDQ $0x03, AX JMP memmove_standalone two_bytes_standalone: MOVB $0xf0, (AX) MOVB BP, 1(AX) ADDQ $0x02, BX ADDQ $0x02, AX JMP memmove_standalone one_byte_standalone: SHLB $0x02, BP MOVB BP, (AX) ADDQ $0x01, BX ADDQ $0x01, AX memmove_standalone: NOP emit_lit_memmove_standalone_memmove_tail: TESTQ DX, DX JEQ emit_literal_end_avx_standalone CMPQ DX, $0x02 JBE emit_lit_memmove_standalone_memmove_move_1or2 CMPQ DX, $0x04 JB emit_lit_memmove_standalone_memmove_move_3 JBE emit_lit_memmove_standalone_memmove_move_4 CMPQ DX, $0x08 JB emit_lit_memmove_standalone_memmove_move_5through7 JE emit_lit_memmove_standalone_memmove_move_8 CMPQ DX, $0x10 JBE emit_lit_memmove_standalone_memmove_move_9through16 CMPQ DX, $0x20 JBE emit_lit_memmove_standalone_memmove_move_17through32 CMPQ DX, $0x40 JBE emit_lit_memmove_standalone_memmove_move_33through64 CMPQ DX, $0x80 JBE emit_lit_memmove_standalone_memmove_move_65through128 CMPQ DX, $0x00000100 JBE emit_lit_memmove_standalone_memmove_move_129through256 JMP emit_lit_memmove_standalone_memmove_avxUnaligned emit_lit_memmove_standalone_memmove_move_1or2: MOVB (CX), BP MOVB -1(CX)(DX*1), SI MOVB BP, (AX) MOVB SI, -1(AX)(DX*1) JMP emit_literal_end_avx_standalone emit_lit_memmove_standalone_memmove_move_4: MOVL (CX), BP MOVL BP, (AX) JMP emit_literal_end_avx_standalone emit_lit_memmove_standalone_memmove_move_3: MOVW (CX), BP MOVB 2(CX), SI MOVW BP, (AX) MOVB SI, 2(AX) JMP emit_literal_end_avx_standalone emit_lit_memmove_standalone_memmove_move_5through7: MOVL (CX), BP MOVL -4(CX)(DX*1), SI MOVL BP, (AX) MOVL SI, -4(AX)(DX*1) JMP emit_literal_end_avx_standalone emit_lit_memmove_standalone_memmove_move_8: MOVQ (CX), BP MOVQ BP, (AX) JMP emit_literal_end_avx_standalone emit_lit_memmove_standalone_memmove_move_9through16: MOVQ (CX), BP MOVQ -8(CX)(DX*1), SI MOVQ BP, (AX) MOVQ SI, -8(AX)(DX*1) JMP emit_literal_end_avx_standalone emit_lit_memmove_standalone_memmove_move_17through32: MOVOU (CX), X0 MOVOU -16(CX)(DX*1), X1 MOVOU X0, (AX) MOVOU X1, -16(AX)(DX*1) JMP emit_literal_end_avx_standalone emit_lit_memmove_standalone_memmove_move_33through64: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU -32(CX)(DX*1), X2 MOVOU -16(CX)(DX*1), X3 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, -32(AX)(DX*1) MOVOU X3, -16(AX)(DX*1) JMP emit_literal_end_avx_standalone emit_lit_memmove_standalone_memmove_move_65through128: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU 32(CX), X2 MOVOU 48(CX), X3 MOVOU -64(CX)(DX*1), X12 MOVOU -48(CX)(DX*1), X13 MOVOU -32(CX)(DX*1), X14 MOVOU -16(CX)(DX*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X12, -64(AX)(DX*1) MOVOU X13, -48(AX)(DX*1) MOVOU X14, -32(AX)(DX*1) MOVOU X15, -16(AX)(DX*1) JMP emit_literal_end_avx_standalone emit_lit_memmove_standalone_memmove_move_129through256: MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU 32(CX), X2 MOVOU 48(CX), X3 MOVOU 64(CX), X4 MOVOU 80(CX), X5 MOVOU 96(CX), X6 MOVOU 112(CX), X7 MOVOU -128(CX)(DX*1), X8 MOVOU -112(CX)(DX*1), X9 MOVOU -96(CX)(DX*1), X10 MOVOU -80(CX)(DX*1), X11 MOVOU -64(CX)(DX*1), X12 MOVOU -48(CX)(DX*1), X13 MOVOU -32(CX)(DX*1), X14 MOVOU -16(CX)(DX*1), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, -128(AX)(DX*1) MOVOU X9, -112(AX)(DX*1) MOVOU X10, -96(AX)(DX*1) MOVOU X11, -80(AX)(DX*1) MOVOU X12, -64(AX)(DX*1) MOVOU X13, -48(AX)(DX*1) MOVOU X14, -32(AX)(DX*1) MOVOU X15, -16(AX)(DX*1) JMP emit_literal_end_avx_standalone emit_lit_memmove_standalone_memmove_move_256through2048: LEAQ -256(DX), DX MOVOU (CX), X0 MOVOU 16(CX), X1 MOVOU 32(CX), X2 MOVOU 48(CX), X3 MOVOU 64(CX), X4 MOVOU 80(CX), X5 MOVOU 96(CX), X6 MOVOU 112(CX), X7 MOVOU 128(CX), X8 MOVOU 144(CX), X9 MOVOU 160(CX), X10 MOVOU 176(CX), X11 MOVOU 192(CX), X12 MOVOU 208(CX), X13 MOVOU 224(CX), X14 MOVOU 240(CX), X15 MOVOU X0, (AX) MOVOU X1, 16(AX) MOVOU X2, 32(AX) MOVOU X3, 48(AX) MOVOU X4, 64(AX) MOVOU X5, 80(AX) MOVOU X6, 96(AX) MOVOU X7, 112(AX) MOVOU X8, 128(AX) MOVOU X9, 144(AX) MOVOU X10, 160(AX) MOVOU X11, 176(AX) MOVOU X12, 192(AX) MOVOU X13, 208(AX) MOVOU X14, 224(AX) MOVOU X15, 240(AX) CMPQ DX, $0x00000100 LEAQ 256(CX), CX LEAQ 256(AX), AX JGE emit_lit_memmove_standalone_memmove_move_256through2048 JMP emit_lit_memmove_standalone_memmove_tail emit_lit_memmove_standalone_memmove_avxUnaligned: LEAQ (CX)(DX*1), SI MOVQ AX, R8 MOVOU -128(SI), X5 MOVOU -112(SI), X6 MOVQ $0x00000080, BP ANDQ $0xffffffe0, AX ADDQ $0x20, AX MOVOU -96(SI), X7 MOVOU -80(SI), X8 MOVQ AX, DI SUBQ R8, DI MOVOU -64(SI), X9 MOVOU -48(SI), X10 SUBQ DI, DX MOVOU -32(SI), X11 MOVOU -16(SI), X12 VMOVDQU (CX), Y4 ADDQ DI, CX SUBQ BP, DX emit_lit_memmove_standalone_memmove_gobble_128_loop: VMOVDQU (CX), Y0 VMOVDQU 32(CX), Y1 VMOVDQU 64(CX), Y2 VMOVDQU 96(CX), Y3 ADDQ BP, CX VMOVDQA Y0, (AX) VMOVDQA Y1, 32(AX) VMOVDQA Y2, 64(AX) VMOVDQA Y3, 96(AX) ADDQ BP, AX SUBQ BP, DX JA emit_lit_memmove_standalone_memmove_gobble_128_loop ADDQ BP, DX ADDQ AX, DX VMOVDQU Y4, (R8) VZEROUPPER MOVOU X5, -128(DX) MOVOU X6, -112(DX) MOVOU X7, -96(DX) MOVOU X8, -80(DX) MOVOU X9, -64(DX) MOVOU X10, -48(DX) MOVOU X11, -32(DX) MOVOU X12, -16(DX) emit_literal_end_avx_standalone: MOVQ BX, ret+48(FP) RET // func emitRepeat(dst []byte, offset int, length int) int TEXT ·emitRepeat(SB), NOSPLIT, $0-48 XORQ BX, BX MOVQ dst_base+0(FP), AX MOVQ offset+24(FP), CX MOVQ length+32(FP), DX emit_repeat_again_standalone: MOVQ DX, BP LEAQ -4(DX), DX CMPL BP, $0x08 JLE repeat_two_standalone CMPL BP, $0x0c JGE cant_repeat_two_offset_standalone CMPL CX, $0x00000800 JLT repeat_two_offset_standalone cant_repeat_two_offset_standalone: CMPL DX, $0x00000104 JLT repeat_three_standalone CMPL DX, $0x00010100 JLT repeat_four_standalone CMPL DX, $0x0100ffff JLT repeat_five_standalone LEAQ -16842747(DX), DX MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX ADDQ $0x05, BX JMP emit_repeat_again_standalone repeat_five_standalone: LEAQ -65536(DX), DX MOVQ DX, CX MOVW $0x001d, (AX) MOVW DX, 2(AX) SARQ $0x10, CX MOVB CL, 4(AX) ADDQ $0x05, BX ADDQ $0x05, AX JMP gen_emit_repeat_end repeat_four_standalone: LEAQ -256(DX), DX MOVW $0x0019, (AX) MOVW DX, 2(AX) ADDQ $0x04, BX ADDQ $0x04, AX JMP gen_emit_repeat_end repeat_three_standalone: LEAQ -4(DX), DX MOVW $0x0015, (AX) MOVB DL, 2(AX) ADDQ $0x03, BX ADDQ $0x03, AX JMP gen_emit_repeat_end repeat_two_standalone: SHLL $0x02, DX ORL $0x01, DX MOVW DX, (AX) ADDQ $0x02, BX ADDQ $0x02, AX JMP gen_emit_repeat_end repeat_two_offset_standalone: XORQ BP, BP LEAQ 1(BP)(DX*4), DX MOVB CL, 1(AX) SARL $0x08, CX SHLL $0x05, CX ORL CX, DX MOVB DL, (AX) ADDQ $0x02, BX ADDQ $0x02, AX gen_emit_repeat_end: MOVQ BX, ret+40(FP) RET // func emitCopy(dst []byte, offset int, length int) int TEXT ·emitCopy(SB), NOSPLIT, $0-48 XORQ BX, BX MOVQ dst_base+0(FP), AX MOVQ offset+24(FP), CX MOVQ length+32(FP), DX CMPL CX, $0x00010000 JL two_byte_offset_standalone CMPL DX, $0x40 JLE four_bytes_remain_standalone MOVB $0xff, (AX) MOVD CX, 1(AX) LEAQ -64(DX), DX ADDQ $0x05, BX ADDQ $0x05, AX CMPL DX, $0x04 JL four_bytes_remain_standalone emit_repeat_again_standalone_emit_copy: MOVQ DX, BP LEAQ -4(DX), DX CMPL BP, $0x08 JLE repeat_two_standalone_emit_copy CMPL BP, $0x0c JGE cant_repeat_two_offset_standalone_emit_copy CMPL CX, $0x00000800 JLT repeat_two_offset_standalone_emit_copy cant_repeat_two_offset_standalone_emit_copy: CMPL DX, $0x00000104 JLT repeat_three_standalone_emit_copy CMPL DX, $0x00010100 JLT repeat_four_standalone_emit_copy CMPL DX, $0x0100ffff JLT repeat_five_standalone_emit_copy LEAQ -16842747(DX), DX MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX ADDQ $0x05, BX JMP emit_repeat_again_standalone_emit_copy repeat_five_standalone_emit_copy: LEAQ -65536(DX), DX MOVQ DX, CX MOVW $0x001d, (AX) MOVW DX, 2(AX) SARQ $0x10, CX MOVB CL, 4(AX) ADDQ $0x05, BX ADDQ $0x05, AX JMP gen_emit_copy_end repeat_four_standalone_emit_copy: LEAQ -256(DX), DX MOVW $0x0019, (AX) MOVW DX, 2(AX) ADDQ $0x04, BX ADDQ $0x04, AX JMP gen_emit_copy_end repeat_three_standalone_emit_copy: LEAQ -4(DX), DX MOVW $0x0015, (AX) MOVB DL, 2(AX) ADDQ $0x03, BX ADDQ $0x03, AX JMP gen_emit_copy_end repeat_two_standalone_emit_copy: SHLL $0x02, DX ORL $0x01, DX MOVW DX, (AX) ADDQ $0x02, BX ADDQ $0x02, AX JMP gen_emit_copy_end repeat_two_offset_standalone_emit_copy: XORQ BP, BP LEAQ 1(BP)(DX*4), DX MOVB CL, 1(AX) SARL $0x08, CX SHLL $0x05, CX ORL CX, DX MOVB DL, (AX) ADDQ $0x02, BX ADDQ $0x02, AX JMP gen_emit_copy_end four_bytes_remain_standalone: TESTL DX, DX JZ gen_emit_copy_end MOVB $0x03, BP LEAQ -4(BP)(DX*4), DX MOVB DL, (AX) MOVD CX, 1(AX) ADDQ $0x05, BX ADDQ $0x05, AX JMP gen_emit_copy_end two_byte_offset_standalone: CMPL DX, $0x40 JLE two_byte_offset_short_standalone MOVB $0xee, (AX) MOVW CX, 1(AX) LEAQ -60(DX), DX ADDQ $0x03, AX ADDQ $0x03, BX emit_repeat_again_standalone_emit_copy_short: MOVQ DX, BP LEAQ -4(DX), DX CMPL BP, $0x08 JLE repeat_two_standalone_emit_copy_short CMPL BP, $0x0c JGE cant_repeat_two_offset_standalone_emit_copy_short CMPL CX, $0x00000800 JLT repeat_two_offset_standalone_emit_copy_short cant_repeat_two_offset_standalone_emit_copy_short: CMPL DX, $0x00000104 JLT repeat_three_standalone_emit_copy_short CMPL DX, $0x00010100 JLT repeat_four_standalone_emit_copy_short CMPL DX, $0x0100ffff JLT repeat_five_standalone_emit_copy_short LEAQ -16842747(DX), DX MOVW $0x001d, (AX) MOVW $0xfffb, 2(AX) MOVB $0xff, 4(AX) ADDQ $0x05, AX ADDQ $0x05, BX JMP emit_repeat_again_standalone_emit_copy_short repeat_five_standalone_emit_copy_short: LEAQ -65536(DX), DX MOVQ DX, CX MOVW $0x001d, (AX) MOVW DX, 2(AX) SARQ $0x10, CX MOVB CL, 4(AX) ADDQ $0x05, BX ADDQ $0x05, AX JMP gen_emit_copy_end repeat_four_standalone_emit_copy_short: LEAQ -256(DX), DX MOVW $0x0019, (AX) MOVW DX, 2(AX) ADDQ $0x04, BX ADDQ $0x04, AX JMP gen_emit_copy_end repeat_three_standalone_emit_copy_short: LEAQ -4(DX), DX MOVW $0x0015, (AX) MOVB DL, 2(AX) ADDQ $0x03, BX ADDQ $0x03, AX JMP gen_emit_copy_end repeat_two_standalone_emit_copy_short: SHLL $0x02, DX ORL $0x01, DX MOVW DX, (AX) ADDQ $0x02, BX ADDQ $0x02, AX JMP gen_emit_copy_end repeat_two_offset_standalone_emit_copy_short: XORQ BP, BP LEAQ 1(BP)(DX*4), DX MOVB CL, 1(AX) SARL $0x08, CX SHLL $0x05, CX ORL CX, DX MOVB DL, (AX) ADDQ $0x02, BX ADDQ $0x02, AX JMP gen_emit_copy_end two_byte_offset_short_standalone: CMPL DX, $0x0c JGE emit_copy_three_standalone CMPL CX, $0x00000800 JGE emit_copy_three_standalone MOVB $0x01, BP LEAQ -16(BP)(DX*4), DX MOVB CL, 1(AX) SHRL $0x08, CX SHLL $0x05, CX ORL CX, DX MOVB DL, (AX) ADDQ $0x02, BX ADDQ $0x02, AX JMP gen_emit_copy_end emit_copy_three_standalone: MOVB $0x02, BP LEAQ -4(BP)(DX*4), DX MOVB DL, (AX) MOVW CX, 1(AX) ADDQ $0x03, BX ADDQ $0x03, AX gen_emit_copy_end: MOVQ BX, ret+40(FP) RET // func matchLen(a []byte, b []byte) int TEXT ·matchLen(SB), NOSPLIT, $0-56 MOVQ a_base+0(FP), AX MOVQ b_base+24(FP), CX MOVQ a_len+8(FP), DX XORQ BP, BP CMPQ DX, $0x08 JL matchlen_single_standalone matchlen_loopback_standalone: MOVQ (AX)(BP*1), BX XORQ (CX)(BP*1), BX TESTQ BX, BX JZ matchlen_loop_standalone BSFQ BX, BX SARQ $0x03, BX LEAQ (BP)(BX*1), BP JMP gen_match_len_end matchlen_loop_standalone: LEAQ -8(DX), DX LEAQ 8(BP), BP CMPQ DX, $0x08 JGE matchlen_loopback_standalone matchlen_single_standalone: TESTQ DX, DX JZ gen_match_len_end matchlen_single_loopback_standalone: MOVB (AX)(BP*1), BL CMPB (CX)(BP*1), BL JNE gen_match_len_end LEAQ 1(BP), BP DECQ DX JNZ matchlen_single_loopback_standalone gen_match_len_end: MOVQ BP, ret+48(FP) RET